From 44cb9ad2630c335b38ef76d1f57d4d13dc8dfc7c Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" <muksiukei@gmail.com>
Date: Tue, 26 Dec 2017 18:35:34 +0800
Subject: [PATCH 0001/1357] adding ps_strategy to run_config to enable
 different placement strategy in estimator

---
 tensorflow/python/estimator/estimator.py      |  3 ++-
 tensorflow/python/estimator/run_config.py     | 23 +++++++++++++++---
 .../python/estimator/run_config_test.py       | 24 +++++++++++++++----
 3 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 63103ef4c1..196c9e7d56 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -938,7 +938,8 @@ def _get_replica_device_setter(config):
         worker_device=worker_device,
         merge_devices=True,
         ps_ops=ps_ops,
-        cluster=config.cluster_spec)
+        cluster=config.cluster_spec,
+        ps_strategy=config.ps_strategy)
   else:
     return None
 
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index 294a1caff3..9aba7beeee 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -27,9 +27,11 @@ import six
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
+from tensorflow.python.estimator import util
 
 
 _USE_DEFAULT = object()
+_VALID_PS_STRATEGY_ARGS = set(['op'])
 
 # A list of the property names in RunConfig that the user is allowed to change.
 _DEFAULT_REPLACEABLE_LIST = [
@@ -41,7 +43,8 @@ _DEFAULT_REPLACEABLE_LIST = [
     'session_config',
     'keep_checkpoint_max',
     'keep_checkpoint_every_n_hours',
-    'log_step_count_steps'
+    'log_step_count_steps',
+    'ps_strategy'
 ]
 
 _SAVE_CKPT_ERR = (
@@ -248,6 +251,10 @@ def _validate_properties(run_config):
   _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types),
             message='tf_random_seed must be integer.')
 
+  _validate('ps_strategy', lambda ps_strategy: six.callable(ps_strategy) and
+                                               set(util.fn_args(ps_strategy)) == set(['op']),
+            message='ps_strategy must be callable with exactly one argument "op".')
+
 
 class TaskType(object):
   MASTER = 'master'
@@ -269,7 +276,8 @@ class RunConfig(object):
                session_config=None,
                keep_checkpoint_max=5,
                keep_checkpoint_every_n_hours=10000,
-               log_step_count_steps=100):
+               log_step_count_steps=100,
+               ps_strategy=None):
     """Constructs a RunConfig.
 
     All distributed training related properties `cluster_spec`, `is_chief`,
@@ -392,6 +400,10 @@ class RunConfig(object):
         the feature.
       log_step_count_steps: The frequency, in number of global steps, that the
         global step/sec will be logged during training.
+      ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by
+        `ps_ops`), that takes the `Operation` and returns the ps task index to
+        use.  If `None`, defaults to a round-robin strategy across all `ps`
+        devices.
 
 
     Raises:
@@ -427,7 +439,8 @@ class RunConfig(object):
         session_config=session_config,
         keep_checkpoint_max=keep_checkpoint_max,
         keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
-        log_step_count_steps=log_step_count_steps)
+        log_step_count_steps=log_step_count_steps,
+        ps_strategy=ps_strategy)
 
     self._init_distributed_setting_from_environment_var(tf_config)
 
@@ -536,6 +549,10 @@ class RunConfig(object):
   def num_worker_replicas(self):
     return self._num_worker_replicas
 
+  @property
+  def ps_strategy(self):
+    return self._ps_strategy
+
   @property
   def task_id(self):
     return self._task_id
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
index 9b7af60ff2..7277e5f1ac 100644
--- a/tensorflow/python/estimator/run_config_test.py
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto'
 _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0'
 _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0'
 _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer'
+_PS_STRATEGY_ERR = 'ps_strategy must be callable with exactly one argument "op"'
 _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.'
 _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.'
 _INVALID_TASK_TYPE_FOR_EVAL_MASTER = (
@@ -83,6 +84,7 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(5, config.keep_checkpoint_max)
     self.assertEqual(10000, config.keep_checkpoint_every_n_hours)
     self.assertIsNone(config.service)
+    self.assertIsNone(config.ps_strategy)
 
   def test_model_dir(self):
     empty_config = run_config_lib.RunConfig()
@@ -93,6 +95,7 @@ class RunConfigTest(test.TestCase):
 
   def test_replace_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    ps_strategy = lambda op: 0
 
     config = run_config_lib.RunConfig().replace(
         tf_random_seed=11,
@@ -100,13 +103,15 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_secs=14,
         session_config=session_config,
         keep_checkpoint_max=16,
-        keep_checkpoint_every_n_hours=17)
+        keep_checkpoint_every_n_hours=17,
+        ps_strategy=ps_strategy)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
     self.assertEqual(14, config.save_checkpoints_secs)
     self.assertEqual(session_config, config.session_config)
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(ps_strategy, config.ps_strategy)
 
   def test_replace_none_value(self):
     config = run_config_lib.RunConfig().replace(
@@ -117,7 +122,8 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_steps=None,
         session_config=None,
         keep_checkpoint_max=None,
-        keep_checkpoint_every_n_hours=None)
+        keep_checkpoint_every_n_hours=None,
+        ps_strategy=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
     self.assertIsNone(config.save_summary_steps)
@@ -126,6 +132,7 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.session_config)
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.ps_strategy)
 
   def test_replace_with_disallowallowed_properties(self):
     config = run_config_lib.RunConfig()
@@ -166,9 +173,12 @@ class RunConfigTest(test.TestCase):
       config.replace(keep_checkpoint_every_n_hours=0)
     with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
       config.replace(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR):
+      config.replace(ps_strategy=lambda x: 0)
 
   def test_init_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    ps_strategy = lambda op: 0
 
     config = run_config_lib.RunConfig(
         tf_random_seed=11,
@@ -176,13 +186,15 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_secs=14,
         session_config=session_config,
         keep_checkpoint_max=16,
-        keep_checkpoint_every_n_hours=17)
+        keep_checkpoint_every_n_hours=17,
+        ps_strategy=ps_strategy)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
     self.assertEqual(14, config.save_checkpoints_secs)
     self.assertEqual(session_config, config.session_config)
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(ps_strategy, config.ps_strategy)
 
   def test_init_none_value(self):
     config = run_config_lib.RunConfig(
@@ -193,7 +205,8 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_steps=None,
         session_config=None,
         keep_checkpoint_max=None,
-        keep_checkpoint_every_n_hours=None)
+        keep_checkpoint_every_n_hours=None,
+        ps_strategy=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
     self.assertIsNone(config.save_summary_steps)
@@ -202,6 +215,7 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.session_config)
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.ps_strategy)
 
   def test_init_invalid_values(self):
     with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
@@ -220,6 +234,8 @@ class RunConfigTest(test.TestCase):
       run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0)
     with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
       run_config_lib.RunConfig(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR):
+      run_config_lib.RunConfig(ps_strategy=lambda x: 0)
 
 
 class RunConfigDistributedSettingTest(test.TestCase):
-- 
GitLab


From 69ac707731c32b0cb856bf4682aeee73c4391e9e Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" <muksiukei@gmail.com>
Date: Mon, 19 Feb 2018 12:32:18 +0800
Subject: [PATCH 0002/1357] 1. Moved estimator._device_fn to RunConfig as
 @property 2. Made RunConfig.device_fn to return custom device function if one
 is specified, otherwise the result from `tf.train.replica_device_setter` call
 is used 3. Added some basic unit tests, may need further tests.

---
 tensorflow/python/estimator/estimator.py      |  4 +-
 tensorflow/python/estimator/run_config.py     | 66 +++++++++++++++++++
 .../python/estimator/run_config_test.py       | 16 +++++
 3 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 5553c58f55..23fc75adeb 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -207,7 +207,7 @@ class Estimator(object):
     else:
       self._session_config = self._config.session_config
 
-    self._device_fn = _get_replica_device_setter(self._config)
+    # self._device_fn = _get_replica_device_setter(self._config)
 
     if model_fn is None:
       raise ValueError('model_fn must be provided to Estimator.')
@@ -811,7 +811,7 @@ class Estimator(object):
 
   def _train_model(self, input_fn, hooks, saving_listeners):
     worker_hooks = []
-    with ops.Graph().as_default() as g, g.device(self._device_fn):
+    with ops.Graph().as_default() as g, g.device(self._config.device_fn):  # g.device(self._device_fn):
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
       training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index 646276abbe..14b4446601 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -28,12 +28,14 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.estimator import util
+from tensorflow.python.training import training
 from tensorflow.python.util import compat_internal
 from tensorflow.python.util.tf_export import tf_export
 
 
 _USE_DEFAULT = object()
 _VALID_PS_STRATEGY_ARGS = set(['op'])
+_VALID_DEVICE_FN_ARGS = set(['op'])
 
 # A list of the property names in RunConfig that the user is allowed to change.
 _DEFAULT_REPLACEABLE_LIST = [
@@ -46,6 +48,7 @@ _DEFAULT_REPLACEABLE_LIST = [
     'keep_checkpoint_max',
     'keep_checkpoint_every_n_hours',
     'log_step_count_steps',
+    'device_fn',
     'ps_strategy'
 ]
 
@@ -281,6 +284,9 @@ def _validate_properties(run_config):
   _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types),
             message='tf_random_seed must be integer.')
 
+  _validate('device_fn', lambda device_fn: six.callable(device_fn) and
+                                           set(util.fn_args(device_fn)) == set(['op']),
+            message='device_fn must be callable with exactly one argument "op".')
   _validate('ps_strategy', lambda ps_strategy: six.callable(ps_strategy) and
                                                set(util.fn_args(ps_strategy)) == set(['op']),
             message='ps_strategy must be callable with exactly one argument "op".')
@@ -308,6 +314,7 @@ class RunConfig(object):
                keep_checkpoint_max=5,
                keep_checkpoint_every_n_hours=10000,
                log_step_count_steps=100,
+               device_fn=None,
                ps_strategy=None):
     """Constructs a RunConfig.
 
@@ -432,6 +439,9 @@ class RunConfig(object):
         the feature.
       log_step_count_steps: The frequency, in number of global steps, that the
         global step/sec will be logged during training.
+      device_fn: A callable invoked for every `Operation` that takes the
+        `Operation` and returns the device string. If `None`, defaults to
+        device function returned by `tf.train.replica_device_setter`.
       ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by
         `ps_ops`), that takes the `Operation` and returns the ps task index to
         use.  If `None`, defaults to a round-robin strategy across all `ps`
@@ -473,6 +483,7 @@ class RunConfig(object):
         keep_checkpoint_max=keep_checkpoint_max,
         keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
         log_step_count_steps=log_step_count_steps,
+        device_fn=device_fn,
         ps_strategy=ps_strategy)
 
     self._init_distributed_setting_from_environment_var(tf_config)
@@ -575,6 +586,22 @@ class RunConfig(object):
   def cluster_spec(self):
     return self._cluster_spec
 
+  @property
+  def device_fn(self):
+    """Returns the device_fn.
+
+    If the device_fn is None, the device function returned by
+    `training.replica_device_setter` is used.
+    If the device_fn is not None, it is returned directly.
+
+    Returns:
+      None for non-distributed setting, device_fn otherwise.
+    """
+    if self._device_fn is None:
+      return _get_replica_device_setter(self)
+
+    return self._device_fn
+
   @property
   def evaluation_master(self):
     return self._evaluation_master
@@ -702,6 +729,8 @@ class RunConfig(object):
       - `keep_checkpoint_max`,
       - `keep_checkpoint_every_n_hours`,
       - `log_step_count_steps`,
+      - `device_fn`,
+      - `ps_strategy`
 
     In addition, either `save_checkpoints_steps` or `save_checkpoints_secs`
     can be set (should not be both).
@@ -785,3 +814,40 @@ def _get_model_dir(tf_config, model_dir):
     logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config)
 
   return model_dir or model_dir_in_tf_config
+
+
+def _get_replica_device_setter(config):
+  """Creates a replica device setter if required as a default device_fn.
+
+  `Estimator` uses ReplicaDeviceSetter as a default device placer. It sets the
+  distributed related arguments such as number of ps_replicas based on given
+  config.
+
+  Args:
+    config: A `RunConfig` instance.
+
+  Returns:
+    A replica device setter, or None.
+  """
+  ps_ops = [
+      'Variable', 'VariableV2', 'AutoReloadVariable', 'MutableHashTable',
+      'MutableHashTableV2', 'MutableHashTableOfTensors',
+      'MutableHashTableOfTensorsV2', 'MutableDenseHashTable',
+      'MutableDenseHashTableV2', 'VarHandleOp'
+  ]
+
+  if config.task_type:
+    worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id)
+  else:
+    worker_device = '/job:worker'
+
+  if config.num_ps_replicas > 0:
+    return training.replica_device_setter(
+        ps_tasks=config.num_ps_replicas,
+        worker_device=worker_device,
+        merge_devices=True,
+        ps_ops=ps_ops,
+        cluster=config.cluster_spec,
+        ps_strategy=config.ps_strategy)
+  else:
+    return None
\ No newline at end of file
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
index 59652ef82d..12923c4373 100644
--- a/tensorflow/python/estimator/run_config_test.py
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto'
 _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0'
 _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0'
 _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer'
+_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".'
 _PS_STRATEGY_ERR = 'ps_strategy must be callable with exactly one argument "op"'
 _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.'
 _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.'
@@ -84,6 +85,7 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(5, config.keep_checkpoint_max)
     self.assertEqual(10000, config.keep_checkpoint_every_n_hours)
     self.assertIsNone(config.service)
+    self.assertIsNone(config.device_fn)
     self.assertIsNone(config.ps_strategy)
 
   def test_model_dir(self):
@@ -96,6 +98,7 @@ class RunConfigTest(test.TestCase):
   def test_replace_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
     ps_strategy = lambda op: 0
+    device_fn = lambda op: "/cpu:0"
 
     config = run_config_lib.RunConfig().replace(
         tf_random_seed=11,
@@ -104,6 +107,7 @@ class RunConfigTest(test.TestCase):
         session_config=session_config,
         keep_checkpoint_max=16,
         keep_checkpoint_every_n_hours=17,
+        device_fn=device_fn,
         ps_strategy=ps_strategy)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
@@ -111,6 +115,7 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(session_config, config.session_config)
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(device_fn, config.device_fn)
     self.assertEqual(ps_strategy, config.ps_strategy)
 
   def test_replace_none_value(self):
@@ -123,6 +128,7 @@ class RunConfigTest(test.TestCase):
         session_config=None,
         keep_checkpoint_max=None,
         keep_checkpoint_every_n_hours=None,
+        device_fn=None,
         ps_strategy=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
@@ -132,6 +138,7 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.session_config)
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.device_fn)
     self.assertIsNone(config.ps_strategy)
 
   def test_replace_with_disallowallowed_properties(self):
@@ -173,11 +180,14 @@ class RunConfigTest(test.TestCase):
       config.replace(keep_checkpoint_every_n_hours=0)
     with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
       config.replace(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
+      config.replace(device_fn=lambda x, y: 0)
     with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR):
       config.replace(ps_strategy=lambda x: 0)
 
   def test_init_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    device_fn = lambda op: "/cpu:0"
     ps_strategy = lambda op: 0
 
     config = run_config_lib.RunConfig(
@@ -187,6 +197,7 @@ class RunConfigTest(test.TestCase):
         session_config=session_config,
         keep_checkpoint_max=16,
         keep_checkpoint_every_n_hours=17,
+        device_fn=device_fn,
         ps_strategy=ps_strategy)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
@@ -194,6 +205,7 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(session_config, config.session_config)
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(device_fn, config.device_fn)
     self.assertEqual(ps_strategy, config.ps_strategy)
 
   def test_init_none_value(self):
@@ -206,6 +218,7 @@ class RunConfigTest(test.TestCase):
         session_config=None,
         keep_checkpoint_max=None,
         keep_checkpoint_every_n_hours=None,
+        device_fn=None,
         ps_strategy=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
@@ -215,6 +228,7 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.session_config)
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.device_fn)
     self.assertIsNone(config.ps_strategy)
 
   def test_init_invalid_values(self):
@@ -234,6 +248,8 @@ class RunConfigTest(test.TestCase):
       run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0)
     with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
       run_config_lib.RunConfig(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
+      run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0")
     with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR):
       run_config_lib.RunConfig(ps_strategy=lambda x: 0)
 
-- 
GitLab


From 51115ee74ed5b64cc03f18d523d8d48f36ef27ba Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" <muksiukei@gmail.com>
Date: Sat, 24 Feb 2018 14:32:36 +0800
Subject: [PATCH 0003/1357] 1. Removing ps_strategy. 2. Modified estimator to
 take overriden device_fn from  if set. 3. Removed ps_strategy related unit
 tests.

---
 tensorflow/python/estimator/estimator.py      |  7 +-
 tensorflow/python/estimator/run_config.py     | 78 +++----------------
 .../python/estimator/run_config_test.py       | 24 +-----
 3 files changed, 17 insertions(+), 92 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 23fc75adeb..821cbc10d2 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -207,7 +207,7 @@ class Estimator(object):
     else:
       self._session_config = self._config.session_config
 
-    # self._device_fn = _get_replica_device_setter(self._config)
+    self._device_fn = self._config.device_fn or _get_replica_device_setter(self._config)
 
     if model_fn is None:
       raise ValueError('model_fn must be provided to Estimator.')
@@ -811,7 +811,7 @@ class Estimator(object):
 
   def _train_model(self, input_fn, hooks, saving_listeners):
     worker_hooks = []
-    with ops.Graph().as_default() as g, g.device(self._config.device_fn):  # g.device(self._device_fn):
+    with ops.Graph().as_default() as g, g.device(self._device_fn):
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
       training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
@@ -1025,8 +1025,7 @@ def _get_replica_device_setter(config):
         worker_device=worker_device,
         merge_devices=True,
         ps_ops=ps_ops,
-        cluster=config.cluster_spec,
-        ps_strategy=config.ps_strategy)
+        cluster=config.cluster_spec)
   else:
     return None
 
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index 14b4446601..b06f212ac0 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -34,7 +34,6 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 _USE_DEFAULT = object()
-_VALID_PS_STRATEGY_ARGS = set(['op'])
 _VALID_DEVICE_FN_ARGS = set(['op'])
 
 # A list of the property names in RunConfig that the user is allowed to change.
@@ -48,8 +47,7 @@ _DEFAULT_REPLACEABLE_LIST = [
     'keep_checkpoint_max',
     'keep_checkpoint_every_n_hours',
     'log_step_count_steps',
-    'device_fn',
-    'ps_strategy'
+    'device_fn'
 ]
 
 _SAVE_CKPT_ERR = (
@@ -285,11 +283,8 @@ def _validate_properties(run_config):
             message='tf_random_seed must be integer.')
 
   _validate('device_fn', lambda device_fn: six.callable(device_fn) and
-                                           set(util.fn_args(device_fn)) == set(['op']),
+                                           set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS,
             message='device_fn must be callable with exactly one argument "op".')
-  _validate('ps_strategy', lambda ps_strategy: six.callable(ps_strategy) and
-                                               set(util.fn_args(ps_strategy)) == set(['op']),
-            message='ps_strategy must be callable with exactly one argument "op".')
 
 
 class TaskType(object):
@@ -314,8 +309,7 @@ class RunConfig(object):
                keep_checkpoint_max=5,
                keep_checkpoint_every_n_hours=10000,
                log_step_count_steps=100,
-               device_fn=None,
-               ps_strategy=None):
+               device_fn=None):
     """Constructs a RunConfig.
 
     All distributed training related properties `cluster_spec`, `is_chief`,
@@ -441,11 +435,8 @@ class RunConfig(object):
         global step/sec will be logged during training.
       device_fn: A callable invoked for every `Operation` that takes the
         `Operation` and returns the device string. If `None`, defaults to
-        device function returned by `tf.train.replica_device_setter`.
-      ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by
-        `ps_ops`), that takes the `Operation` and returns the ps task index to
-        use.  If `None`, defaults to a round-robin strategy across all `ps`
-        devices.
+        the device function returned by `tf.train.replica_device_setter`
+        with round-robin strategy.
 
 
     Raises:
@@ -483,8 +474,7 @@ class RunConfig(object):
         keep_checkpoint_max=keep_checkpoint_max,
         keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
         log_step_count_steps=log_step_count_steps,
-        device_fn=device_fn,
-        ps_strategy=ps_strategy)
+        device_fn=device_fn)
 
     self._init_distributed_setting_from_environment_var(tf_config)
 
@@ -590,16 +580,10 @@ class RunConfig(object):
   def device_fn(self):
     """Returns the device_fn.
 
-    If the device_fn is None, the device function returned by
-    `training.replica_device_setter` is used.
-    If the device_fn is not None, it is returned directly.
-
-    Returns:
-      None for non-distributed setting, device_fn otherwise.
+    If device_fn is not `None`, it overrides the default
+    device function used in `Estimator`.
+    Otherwise the default one is used.
     """
-    if self._device_fn is None:
-      return _get_replica_device_setter(self)
-
     return self._device_fn
 
   @property
@@ -622,10 +606,6 @@ class RunConfig(object):
   def num_worker_replicas(self):
     return self._num_worker_replicas
 
-  @property
-  def ps_strategy(self):
-    return self._ps_strategy
-
   @property
   def task_id(self):
     return self._task_id
@@ -729,8 +709,7 @@ class RunConfig(object):
       - `keep_checkpoint_max`,
       - `keep_checkpoint_every_n_hours`,
       - `log_step_count_steps`,
-      - `device_fn`,
-      - `ps_strategy`
+      - `device_fn`
 
     In addition, either `save_checkpoints_steps` or `save_checkpoints_secs`
     can be set (should not be both).
@@ -814,40 +793,3 @@ def _get_model_dir(tf_config, model_dir):
     logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config)
 
   return model_dir or model_dir_in_tf_config
-
-
-def _get_replica_device_setter(config):
-  """Creates a replica device setter if required as a default device_fn.
-
-  `Estimator` uses ReplicaDeviceSetter as a default device placer. It sets the
-  distributed related arguments such as number of ps_replicas based on given
-  config.
-
-  Args:
-    config: A `RunConfig` instance.
-
-  Returns:
-    A replica device setter, or None.
-  """
-  ps_ops = [
-      'Variable', 'VariableV2', 'AutoReloadVariable', 'MutableHashTable',
-      'MutableHashTableV2', 'MutableHashTableOfTensors',
-      'MutableHashTableOfTensorsV2', 'MutableDenseHashTable',
-      'MutableDenseHashTableV2', 'VarHandleOp'
-  ]
-
-  if config.task_type:
-    worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id)
-  else:
-    worker_device = '/job:worker'
-
-  if config.num_ps_replicas > 0:
-    return training.replica_device_setter(
-        ps_tasks=config.num_ps_replicas,
-        worker_device=worker_device,
-        merge_devices=True,
-        ps_ops=ps_ops,
-        cluster=config.cluster_spec,
-        ps_strategy=config.ps_strategy)
-  else:
-    return None
\ No newline at end of file
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
index 12923c4373..c8b12605e1 100644
--- a/tensorflow/python/estimator/run_config_test.py
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -43,7 +43,6 @@ _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0'
 _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0'
 _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer'
 _DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".'
-_PS_STRATEGY_ERR = 'ps_strategy must be callable with exactly one argument "op"'
 _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.'
 _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.'
 _INVALID_TASK_TYPE_FOR_EVAL_MASTER = (
@@ -86,7 +85,6 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(10000, config.keep_checkpoint_every_n_hours)
     self.assertIsNone(config.service)
     self.assertIsNone(config.device_fn)
-    self.assertIsNone(config.ps_strategy)
 
   def test_model_dir(self):
     empty_config = run_config_lib.RunConfig()
@@ -97,7 +95,6 @@ class RunConfigTest(test.TestCase):
 
   def test_replace_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
-    ps_strategy = lambda op: 0
     device_fn = lambda op: "/cpu:0"
 
     config = run_config_lib.RunConfig().replace(
@@ -107,8 +104,7 @@ class RunConfigTest(test.TestCase):
         session_config=session_config,
         keep_checkpoint_max=16,
         keep_checkpoint_every_n_hours=17,
-        device_fn=device_fn,
-        ps_strategy=ps_strategy)
+        device_fn=device_fn)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
     self.assertEqual(14, config.save_checkpoints_secs)
@@ -116,7 +112,6 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
     self.assertEqual(device_fn, config.device_fn)
-    self.assertEqual(ps_strategy, config.ps_strategy)
 
   def test_replace_none_value(self):
     config = run_config_lib.RunConfig().replace(
@@ -128,8 +123,7 @@ class RunConfigTest(test.TestCase):
         session_config=None,
         keep_checkpoint_max=None,
         keep_checkpoint_every_n_hours=None,
-        device_fn=None,
-        ps_strategy=None)
+        device_fn=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
     self.assertIsNone(config.save_summary_steps)
@@ -139,7 +133,6 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
     self.assertIsNone(config.device_fn)
-    self.assertIsNone(config.ps_strategy)
 
   def test_replace_with_disallowallowed_properties(self):
     config = run_config_lib.RunConfig()
@@ -182,13 +175,10 @@ class RunConfigTest(test.TestCase):
       config.replace(tf_random_seed=1.0)
     with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
       config.replace(device_fn=lambda x, y: 0)
-    with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR):
-      config.replace(ps_strategy=lambda x: 0)
 
   def test_init_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
     device_fn = lambda op: "/cpu:0"
-    ps_strategy = lambda op: 0
 
     config = run_config_lib.RunConfig(
         tf_random_seed=11,
@@ -197,8 +187,7 @@ class RunConfigTest(test.TestCase):
         session_config=session_config,
         keep_checkpoint_max=16,
         keep_checkpoint_every_n_hours=17,
-        device_fn=device_fn,
-        ps_strategy=ps_strategy)
+        device_fn=device_fn)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
     self.assertEqual(14, config.save_checkpoints_secs)
@@ -206,7 +195,6 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
     self.assertEqual(device_fn, config.device_fn)
-    self.assertEqual(ps_strategy, config.ps_strategy)
 
   def test_init_none_value(self):
     config = run_config_lib.RunConfig(
@@ -218,8 +206,7 @@ class RunConfigTest(test.TestCase):
         session_config=None,
         keep_checkpoint_max=None,
         keep_checkpoint_every_n_hours=None,
-        device_fn=None,
-        ps_strategy=None)
+        device_fn=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
     self.assertIsNone(config.save_summary_steps)
@@ -229,7 +216,6 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
     self.assertIsNone(config.device_fn)
-    self.assertIsNone(config.ps_strategy)
 
   def test_init_invalid_values(self):
     with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
@@ -250,8 +236,6 @@ class RunConfigTest(test.TestCase):
       run_config_lib.RunConfig(tf_random_seed=1.0)
     with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
       run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0")
-    with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR):
-      run_config_lib.RunConfig(ps_strategy=lambda x: 0)
 
 
 class RunConfigDistributedSettingTest(test.TestCase):
-- 
GitLab


From 584e3495b50db8fd0a894de8b6d85fcf4268a855 Mon Sep 17 00:00:00 2001
From: Sunitha Kambhampati <skambha@us.ibm.com>
Date: Tue, 13 Mar 2018 11:43:01 -0700
Subject: [PATCH 0004/1357] Fix floating point exception with bps calculation 
 modified:   tensorflow/contrib/tensorboard/db/loader.cc

---
 tensorflow/contrib/tensorboard/db/loader.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc
index 4d7337a53d..9134296c74 100644
--- a/tensorflow/contrib/tensorboard/db/loader.cc
+++ b/tensorflow/contrib/tensorboard/db/loader.cc
@@ -112,8 +112,10 @@ int main(int argc, char* argv[]) {
   }
   uint64 elapsed = env->NowMicros() - start;
   LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with "
-            << AddCommas(records) << " records at "
-            << AddCommas(offset / (elapsed / 1000000)) << " bps";
+            << AddCommas(records) << " records";
+  if (elapsed > 0) {
+    LOG(INFO) << "bps=" << (uint64)(offset / (elapsed / 1000000.0));
+  }
 
   return 0;
 }
-- 
GitLab


From 548415b9be78839a23a3909044329c3f221fa4b3 Mon Sep 17 00:00:00 2001
From: Sunitha Kambhampati <skambha@us.ibm.com>
Date: Wed, 28 Mar 2018 21:25:23 -0700
Subject: [PATCH 0005/1357] Use the same log line for bps and also report bps
 when elapsed is 0

---
 tensorflow/contrib/tensorboard/db/loader.cc | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc
index 9134296c74..97b9daa361 100644
--- a/tensorflow/contrib/tensorboard/db/loader.cc
+++ b/tensorflow/contrib/tensorboard/db/loader.cc
@@ -112,11 +112,10 @@ int main(int argc, char* argv[]) {
   }
   uint64 elapsed = env->NowMicros() - start;
   LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with "
-            << AddCommas(records) << " records";
-  if (elapsed > 0) {
-    LOG(INFO) << "bps=" << (uint64)(offset / (elapsed / 1000000.0));
-  }
-
+            << AddCommas(records) << " records at "
+            << (elapsed == 0 ? offset : static_cast<uint64>(
+                                            offset / (elapsed / 1000000.0)))
+            << " bps";
   return 0;
 }
 
-- 
GitLab


From b621ac047e43540992b3ac0e9055b9e7225e74da Mon Sep 17 00:00:00 2001
From: Sunitha Kambhampati <skambha@us.ibm.com>
Date: Thu, 29 Mar 2018 11:51:02 -0700
Subject: [PATCH 0006/1357] Add the commas back

---
 tensorflow/contrib/tensorboard/db/loader.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc
index 97b9daa361..6439328022 100644
--- a/tensorflow/contrib/tensorboard/db/loader.cc
+++ b/tensorflow/contrib/tensorboard/db/loader.cc
@@ -111,11 +111,10 @@ int main(int argc, char* argv[]) {
     ++records;
   }
   uint64 elapsed = env->NowMicros() - start;
+  uint64 bps = (elapsed == 0 ? offset : static_cast<uint64>(
+                                            offset / (elapsed / 1000000.0)));
   LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with "
-            << AddCommas(records) << " records at "
-            << (elapsed == 0 ? offset : static_cast<uint64>(
-                                            offset / (elapsed / 1000000.0)))
-            << " bps";
+            << AddCommas(records) << " records at " << AddCommas(bps) << " bps";
   return 0;
 }
 
-- 
GitLab


From 203972b68a416725cd00fc3462345c9e7c0ebfa8 Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" <muksiukei@gmail.com>
Date: Tue, 3 Apr 2018 19:15:55 +0800
Subject: [PATCH 0007/1357] Adding manual initialization of _device_fn in
 legacy RunConfig class

---
 tensorflow/contrib/learn/python/learn/estimators/run_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
index f3500bf56f..6d0f0b8da9 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
@@ -299,6 +299,7 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig):
     # so instead of breaking compatibility with that assumption, we
     # just manually initialize this field:
     self._distribute = None
+    self._device_fn = None
 
     gpu_options = config_pb2.GPUOptions(
         per_process_gpu_memory_fraction=gpu_memory_fraction)
-- 
GitLab


From cb54e6c766a152657c78cc4f91ebe81fc15b9b9c Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" <muksiukei@gmail.com>
Date: Thu, 5 Apr 2018 23:46:26 +0800
Subject: [PATCH 0008/1357] Updated estimator golden API through 1. bazel build
 //tensorflow/tools/api/tests:api_compatibility_test 2.
 bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens
 True

---
 .../tools/api/golden/tensorflow.estimator.-run-config.pbtxt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
index 759ff752b0..6188840d90 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
@@ -6,6 +6,10 @@ tf_class {
     name: "cluster_spec"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "device_fn"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "distribute"
     mtype: "<type \'property\'>"
@@ -84,7 +88,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'distribute\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'<object object instance>\', \'<object object instance>\', \'None\', \'5\', \'10000\', \'100\', \'None\'], "
+    argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'distribute\', \'device_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'<object object instance>\', \'<object object instance>\', \'None\', \'5\', \'10000\', \'100\', \'None\', \'None\'], "
   }
   member_method {
     name: "replace"
-- 
GitLab


From e5ff57da82742660262b7e77c2906d9621d0aaa3 Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" <muksiukei@gmail.com>
Date: Thu, 12 Apr 2018 22:59:05 +0800
Subject: [PATCH 0009/1357] fixing code styles

---
 tensorflow/python/estimator/estimator.py  | 3 ++-
 tensorflow/python/estimator/run_config.py | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 022dbde30f..c3f7c8de3f 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -216,7 +216,8 @@ class Estimator(object):
     else:
       self._session_config = self._config.session_config
 
-    self._device_fn = self._config.device_fn or _get_replica_device_setter(self._config)
+    self._device_fn = self._config.device_fn or \
+                      _get_replica_device_setter(self._config)
 
     if model_fn is None:
       raise ValueError('model_fn must be provided to Estimator.')
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index bb0d900be4..40f114b274 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -28,7 +28,6 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.estimator import util
-from tensorflow.python.training import training
 from tensorflow.python.util import compat_internal
 from tensorflow.python.util.tf_export import tf_export
 
@@ -284,8 +283,9 @@ def _validate_properties(run_config):
             message='tf_random_seed must be integer.')
 
   _validate('device_fn', lambda device_fn: six.callable(device_fn) and
-                                           set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS,
-            message='device_fn must be callable with exactly one argument "op".')
+            set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS,
+            message='device_fn must be callable with exactly'
+                    ' one argument "op".')
 
 
 class TaskType(object):
-- 
GitLab


From c22d996c3d6a16db292bd3464b2ef7b91adae676 Mon Sep 17 00:00:00 2001
From: imsheridan <xiaoyudong0512@gmail.com>
Date: Tue, 17 Apr 2018 01:00:44 +0800
Subject: [PATCH 0010/1357] Fix expand_dims of dims argument has been
 deprecated with axis

---
 .../contrib/layers/python/layers/target_column.py      |  4 ++--
 .../contrib/learn/python/learn/estimators/head.py      | 10 +++++-----
 .../timeseries/state_space_models/state_space_model.py |  2 +-
 .../tools/compatibility/testdata/test_file_v0_11.py    |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py
index 3e639a180e..f3377f2a05 100644
--- a/tensorflow/contrib/layers/python/layers/target_column.py
+++ b/tensorflow/contrib/layers/python/layers/target_column.py
@@ -396,7 +396,7 @@ class _BinarySvmTargetColumn(_MultiClassTargetColumn):
 def _mean_squared_loss(logits, target):
   # To prevent broadcasting inside "-".
   if len(target.get_shape()) == 1:
-    target = array_ops.expand_dims(target, dim=[1])
+    target = array_ops.expand_dims(target, axis=1)
 
   logits.get_shape().assert_is_compatible_with(target.get_shape())
   return math_ops.square(logits - math_ops.to_float(target))
@@ -405,7 +405,7 @@ def _mean_squared_loss(logits, target):
 def _log_loss_with_two_classes(logits, target):
   # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
   if len(target.get_shape()) == 1:
-    target = array_ops.expand_dims(target, dim=[1])
+    target = array_ops.expand_dims(target, axis=1)
   loss_vec = nn.sigmoid_cross_entropy_with_logits(
       labels=math_ops.to_float(target), logits=logits)
   return loss_vec
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index 2b4b6eff39..06f4173170 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -563,10 +563,10 @@ def _mean_squared_loss(labels, logits, weights=None):
     labels = ops.convert_to_tensor(labels)
     # To prevent broadcasting inside "-".
     if len(labels.get_shape()) == 1:
-      labels = array_ops.expand_dims(labels, dim=(1,))
+      labels = array_ops.expand_dims(labels, axis=1)
     # TODO(zakaria): make sure it does not recreate the broadcast bug.
     if len(logits.get_shape()) == 1:
-      logits = array_ops.expand_dims(logits, dim=(1,))
+      logits = array_ops.expand_dims(logits, axis=1)
     logits.get_shape().assert_is_compatible_with(labels.get_shape())
     loss = math_ops.square(logits - math_ops.to_float(labels), name=name)
     return _compute_weighted_loss(loss, weights)
@@ -579,10 +579,10 @@ def _poisson_loss(labels, logits, weights=None):
     labels = ops.convert_to_tensor(labels)
     # To prevent broadcasting inside "-".
     if len(labels.get_shape()) == 1:
-      labels = array_ops.expand_dims(labels, dim=(1,))
+      labels = array_ops.expand_dims(labels, axis=1)
     # TODO(zakaria): make sure it does not recreate the broadcast bug.
     if len(logits.get_shape()) == 1:
-      logits = array_ops.expand_dims(logits, dim=(1,))
+      logits = array_ops.expand_dims(logits, axis=1)
     logits.get_shape().assert_is_compatible_with(labels.get_shape())
     loss = nn.log_poisson_loss(labels, logits, compute_full_loss=True,
                                name=name)
@@ -797,7 +797,7 @@ def _log_loss_with_two_classes(labels, logits, weights=None):
     # TODO(ptucker): This will break for dynamic shapes.
     # sigmoid_cross_entropy_with_logits requires [batch_size, 1] labels.
     if len(labels.get_shape()) == 1:
-      labels = array_ops.expand_dims(labels, dim=(1,))
+      labels = array_ops.expand_dims(labels, axis=1)
     loss = nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits,
                                                 name=name)
     return _compute_weighted_loss(loss, weights)
diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py
index 951c6546d5..d04c721007 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py
@@ -909,7 +909,7 @@ class StateSpaceModel(model.SequentialTimeSeriesModel):
     elif unbroadcasted_shape.ndims == 2:
       # Unbroadcasted shape [num features x state dimension]
       broadcasted_model = array_ops.tile(
-          array_ops.expand_dims(unbroadcasted_model, dim=0),
+          array_ops.expand_dims(unbroadcasted_model, axis=0),
           [array_ops.shape(times)[0], 1, 1])
     elif unbroadcasted_shape.ndims == 3:
       broadcasted_model = unbroadcasted_model
diff --git a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
index 01f37d8768..40526d930c 100644
--- a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
+++ b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
@@ -94,7 +94,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
       self.assertAllClose(
           tf.reduce_logsumexp(a, [0, 1]).eval(), 6.45619344711)
       self.assertAllEqual(
-          tf.expand_dims([[1, 2], [3, 4]], dim=1).eval(),
+          tf.expand_dims([[1, 2], [3, 4]], axis=1).eval(),
           [[[1, 2]], [[3, 4]]])
 
   def testArgMinMax(self):
-- 
GitLab


From f35dc0a522ae630902baa5be16d2a53b59266770 Mon Sep 17 00:00:00 2001
From: Bruno Goncalves <882745+brunomorishita@users.noreply.github.com>
Date: Sat, 28 Apr 2018 19:24:22 -0300
Subject: [PATCH 0011/1357] Fix cmake library path for libpng16.a

---
 tensorflow/contrib/cmake/external/png.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake
index ad2af01bc0..1a147e9c8e 100644
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 include (ExternalProject)
+include (GNUInstallDirs)
 
 set(png_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/png_archive)
 set(png_URL https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz)
@@ -35,7 +36,7 @@ if(WIN32)
     endif()
   endif()
 else()
-  set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng16.a)
+  set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/${CMAKE_INSTALL_LIBDIR}/libpng16.a)
 endif()
 
 set(png_HEADERS
-- 
GitLab


From 071e6175dcc130b4c623e849a380d6434289eb66 Mon Sep 17 00:00:00 2001
From: Erik Smistad <ersmistad@gmail.com>
Date: Thu, 24 May 2018 15:47:00 +0200
Subject: [PATCH 0012/1357] Added the -Thost=x64 flag to cmake build
 instructions

---
 tensorflow/contrib/cmake/README.md | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 0b79f718d4..5c203b777c 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -106,17 +106,6 @@ Step-by-step Windows build
 
 1. Install the prerequisites detailed above, and set up your environment.
 
-   * The following commands assume that you are using the Windows Command
-     Prompt (`cmd.exe`). You will need to set up your environment to use the
-     appropriate toolchain, i.e. the 64-bit tools. (Some of the binary targets
-     we will build are too large for the 32-bit tools, and they will fail with
-     out-of-memory errors.) The typical command to do set up your
-     environment is:
-
-     ```
-     D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"
-     ```
-
    * When building with GPU support after installing the CUDNN zip file from NVidia, append its
      bin directory to your PATH environment variable.
      In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable.
@@ -168,7 +157,7 @@ Step-by-step Windows build
    and must be the last character on each line.
 
    ```
-   D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^
+   D:\...\build> cmake .. -A x64 -Thost=x64 -DCMAKE_BUILD_TYPE=Release ^
    More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^
    More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^
    More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib
@@ -197,6 +186,10 @@ Step-by-step Windows build
    not currently supported, because it relies on a `Debug` library for
    Python (`python35d.lib`) that is not distributed by default.
 
+   The `-Thost=x64` flag will ensure that the 64 bit compiler and linker
+   is used when building. Without this flag, MSBuild will use the 32 bit
+   toolchain which is prone to compile errors such as "compiler out of heap space".
+
    There are various options that can be specified when generating the
    solution and project files:
 
@@ -263,6 +256,11 @@ Step-by-step Windows build
 
 4. Invoke MSBuild to build TensorFlow.
 
+   Set up the path to find MSbuild:
+   ```
+   D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"
+   ```
+
    To build the C++ example program, which will be created as a `.exe`
    executable in the subdirectory `.\Release`:
 
-- 
GitLab


From 6890731b2693f6b71dedaca6b2eaf8b488226836 Mon Sep 17 00:00:00 2001
From: Erik Smistad <ersmistad@gmail.com>
Date: Thu, 24 May 2018 15:47:22 +0200
Subject: [PATCH 0013/1357] increase minimum cmake version required to 3.8

---
 tensorflow/contrib/cmake/CMakeLists.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 0708d6b7b9..225c5e6227 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -1,5 +1,9 @@
 # Minimum CMake required
-cmake_minimum_required(VERSION 3.5)
+if(WIN32)
+  cmake_minimum_required(VERSION 3.8)
+else()
+  cmake_minimum_required(VERSION 3.5)
+endif()
 
 # Project
 project(tensorflow C CXX)
-- 
GitLab


From f78fd433118830482dddbf6055751898a19265de Mon Sep 17 00:00:00 2001
From: jiefangxuanyan <505745416@qq.com>
Date: Wed, 13 Jun 2018 17:28:23 +0800
Subject: [PATCH 0014/1357] Specify endianness in expected_result array to fix
 #15767.

---
 tensorflow/python/kernel_tests/decode_raw_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py
index 122a9ed469..0bd8bc3c7b 100644
--- a/tensorflow/python/kernel_tests/decode_raw_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py
@@ -79,7 +79,7 @@ class DecodeRawOpTest(test.TestCase):
       decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16)
       self.assertEqual([None, None], decode.get_shape().as_list())
 
-      expected_result = np.matrix([[1, -2, -3, 4]], dtype=np.float16)
+      expected_result = np.matrix([[1, -2, -3, 4]], dtype="<f2")
       result = decode.eval(feed_dict={in_bytes: [expected_result.tostring()]})
 
       self.assertAllEqual(expected_result, result)
-- 
GitLab


From 553801900621c03fb463b3ddd2db65059bf894b8 Mon Sep 17 00:00:00 2001
From: naurril <naurril@gmail.com>
Date: Sun, 1 Jul 2018 01:13:06 +0800
Subject: [PATCH 0015/1357] Removed unused lambda capture

---
 tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
index f9f36443a8..6824e0f89f 100644
--- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
+++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
@@ -50,7 +50,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass {
     }
     for (Node* n : matches) {
       AttrSlice n_attrs = n->attrs();
-      auto base_make_node = [n, g, &n_attrs](const string& op,
+      auto base_make_node = [n, &n_attrs](const string& op,
                                              const string& name) {
         NodeBuilder node_builder(name, op);
         node_builder.Device(n->requested_device());
-- 
GitLab


From f7a00dbf1799f3fb3900b0788047e460a9abfd31 Mon Sep 17 00:00:00 2001
From: naurril <naurril@gmail.com>
Date: Sun, 1 Jul 2018 01:47:25 +0800
Subject: [PATCH 0016/1357] Removed unused lambda capture

---
 tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
index 6824e0f89f..0f853ae52a 100644
--- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
+++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
@@ -60,7 +60,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass {
         }
         return node_builder;
       };
-      auto make_node = [n, g, &n_attrs, &base_make_node](string op) {
+      auto make_node = [n, g, &base_make_node](string op) {
         return base_make_node(
             op, g->NewName(strings::StrCat(n->name(), "/Internal")));
       };
-- 
GitLab


From e5a7c13a8f15b0f98df849fbe3196f2ecedec04e Mon Sep 17 00:00:00 2001
From: naurril <naurril@gmail.com>
Date: Tue, 3 Jul 2018 00:21:25 +0800
Subject: [PATCH 0017/1357] cleanup CondContext at execption

---
 tensorflow/python/ops/control_flow_ops.py | 30 +++++++++++++----------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index fc37805c79..386305ba30 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2044,22 +2044,26 @@ def cond(pred,
 
     # Build the graph for the true branch in a new context.
     context_t = CondContext(pred, pivot_1, branch=1)
-    context_t.Enter()
-    orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
-    if orig_res_t is None:
-      raise ValueError("true_fn must have a return value.")
-    context_t.ExitResult(res_t)
-    context_t.Exit()
+    try:
+      context_t.Enter()
+      orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
+      if orig_res_t is None:
+        raise ValueError("true_fn must have a return value.")
+      context_t.ExitResult(res_t)
+    finally:
+      context_t.Exit()
 
     # Build the graph for the false branch in a new context.
     context_f = CondContext(pred, pivot_2, branch=0)
-    context_f.Enter()
-    orig_res_f, res_f = context_f.BuildCondBranch(false_fn)
-    if orig_res_f is None:
-      raise ValueError("false_fn must have a return value.")
-    context_f.ExitResult(res_f)
-    context_f.Exit()
-
+    try:
+      context_f.Enter()
+      orig_res_f, res_f = context_f.BuildCondBranch(false_fn)
+      if orig_res_f is None:
+        raise ValueError("false_fn must have a return value.")
+      context_f.ExitResult(res_f)
+    finally:
+      context_f.Exit()
+        
     if not strict:
       orig_res_t = _UnpackIfSingleton(orig_res_t)
       orig_res_f = _UnpackIfSingleton(orig_res_f)
-- 
GitLab


From 9bab0c89c4ffeeb780e7a3dc415ab888164b9b00 Mon Sep 17 00:00:00 2001
From: "candy.dc" <dingchen.mail@gmail.com>
Date: Thu, 26 Jul 2018 11:36:30 +0800
Subject: [PATCH 0018/1357] fix: No need to convert to tensor when using
 ResourceVariable in embedding_lookup, because ResourceVariable support
 ResourceGather OP.

---
 tensorflow/contrib/layers/python/layers/embedding_ops.py | 7 ++++---
 tensorflow/python/feature_column/feature_column_v2.py    | 7 ++++---
 tensorflow/python/ops/embedding_ops.py                   | 7 ++++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py
index 60e1d85ea9..897aed527d 100644
--- a/tensorflow/contrib/layers/python/layers/embedding_ops.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py
@@ -112,9 +112,10 @@ def safe_embedding_lookup_sparse(embedding_weights,
   dtype = sparse_weights.dtype if sparse_weights is not None else None
   if isinstance(embedding_weights, variables.PartitionedVariable):
     embedding_weights = list(embedding_weights)
-  embedding_weights = [
-      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
-  ]
+  if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable):
+    embedding_weights = [
+        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
+    ]
 
   contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                               [sparse_weights])
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index b4dd23f58d..220a4f7ed6 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -3283,9 +3283,10 @@ def _safe_embedding_lookup_sparse(embedding_weights,
     raise ValueError('Missing embedding_weights %s.' % embedding_weights)
 
   dtype = sparse_weights.dtype if sparse_weights is not None else None
-  embedding_weights = [
-      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
-  ]
+  if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable):
+    embedding_weights = [
+        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
+    ]
 
   with ops.name_scope(name, 'embedding_lookup',
                       embedding_weights + [sparse_ids,
diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py
index 27c2fa7017..fe422f5095 100644
--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@@ -545,9 +545,10 @@ def safe_embedding_lookup_sparse(embedding_weights,
     raise ValueError('Missing embedding_weights %s.' % embedding_weights)
 
   dtype = sparse_weights.dtype if sparse_weights is not None else None
-  embedding_weights = [
-      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
-  ]
+  if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable):
+    embedding_weights = [
+        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
+    ]
 
   with ops.name_scope(name, 'embedding_lookup',
                       embedding_weights + [sparse_ids,
-- 
GitLab


From aba7fcaf87f8d4099212db2e3bffad1dbab168a2 Mon Sep 17 00:00:00 2001
From: shaohua <shaohua.zhang@intel.com>
Date: Thu, 26 Jul 2018 15:00:53 +0800
Subject: [PATCH 0019/1357] Fix gcc6.3 build link issue

Signed-off-by: shaohua <shaohua.zhang@intel.com>
---
 tensorflow/tensorflow.bzl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 340d3f393c..054d68d42c 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -404,7 +404,7 @@ def tf_gen_op_wrapper_cc(name,
   tf_cc_binary(
       name=tool,
       copts=tf_copts(),
-      linkopts=if_not_windows(["-lm"]),
+      linkopts=if_not_windows(["-lm","-Wl,-ldl"]),
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=[op_gen] + deps)
 
@@ -573,7 +573,7 @@ def tf_gen_op_wrapper_py(name,
     deps = [str(Label("//tensorflow/core:" + name + "_op_lib"))]
   tf_cc_binary(
       name=tool_name,
-      linkopts=if_not_windows(["-lm"]) + cc_linkopts,
+      linkopts=if_not_windows(["-lm","-Wl,-ldl"]) + cc_linkopts,
       copts=tf_copts(),
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=([
-- 
GitLab


From 27de8e717c1bec91398f5a6be6c7287b657fc960 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 25 Jul 2018 02:29:43 +0000
Subject: [PATCH 0020/1357] Improve shape function for CudnnRNNParamsSize

In cudnn_rnn_ops.cc, the CudnnRNNParamsSize does not
have restrictions on num_layers, num_units, and input_size,
though they all should be scalars.

This fix adds the shape check of num_layers, num_units, and input_size
for CudnnRNNParamsSize.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/cudnn_rnn_ops.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc
index f78f7a897a..7eb141aa8c 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops.cc
@@ -52,6 +52,12 @@ REGISTER_OP("CudnnRNNParamsSize")
     .Attr("seed2: int = 0")
     .Output("params_size: S")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      // num_layers, num_units, and input_size should be scalars.
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+
       c->set_output(0, c->Vector(1));
       return Status::OK();
     });
-- 
GitLab


From 01387ccddcf5c23d48c5745f4a6a49a670f528aa Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 25 Jul 2018 04:28:08 +0000
Subject: [PATCH 0021/1357] Add test cases for shape function of
 CudnnRNNParamsSize

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../python/kernel_tests/cudnn_rnn_ops_test.py | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
index 5a667485be..675b7ce185 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
@@ -413,6 +413,28 @@ class CudnnRNNTestParamsSize(TensorFlowTestCase):
         self._testOneLSTMParamsSize(num_layers, num_units, input_size,
                                     direction)
 
+  @unittest.skipUnless(test.is_built_with_cuda(),
+                       "Test only applicable when running on GPUs")
+  def testLSTMParamsSizeShape(self):
+      with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"):
+        model = _CreateModel(
+            cudnn_rnn_ops.CUDNN_LSTM,
+            constant_op.constant([4]), 200, 200,
+            direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
+        params_size = model.params_size()
+      with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"):
+        model = _CreateModel(
+            cudnn_rnn_ops.CUDNN_LSTM,
+            4, constant_op.constant([200]), 200,
+            direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
+        params_size = model.params_size()
+      with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"):
+        model = _CreateModel(
+            cudnn_rnn_ops.CUDNN_LSTM,
+            4, 200, constant_op.constant([200]),
+            direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
+        params_size = model.params_size()
+
 
 class CudnnRNNTestInference(TensorFlowTestCase):
 
-- 
GitLab


From d27b5a3e5458c82ce1ca3cda1a9879149c779959 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 25 Jul 2018 14:52:52 +0000
Subject: [PATCH 0022/1357] Pylint fix

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../python/kernel_tests/cudnn_rnn_ops_test.py | 39 ++++++++++---------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
index 675b7ce185..c59d3682d4 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
@@ -416,24 +416,27 @@ class CudnnRNNTestParamsSize(TensorFlowTestCase):
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   def testLSTMParamsSizeShape(self):
-      with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"):
-        model = _CreateModel(
-            cudnn_rnn_ops.CUDNN_LSTM,
-            constant_op.constant([4]), 200, 200,
-            direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
-        params_size = model.params_size()
-      with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"):
-        model = _CreateModel(
-            cudnn_rnn_ops.CUDNN_LSTM,
-            4, constant_op.constant([200]), 200,
-            direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
-        params_size = model.params_size()
-      with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"):
-        model = _CreateModel(
-            cudnn_rnn_ops.CUDNN_LSTM,
-            4, 200, constant_op.constant([200]),
-            direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
-        params_size = model.params_size()
+    with self.assertRaisesRegexp(
+        ValueError, "Shape must be rank 0 but is rank 1"):
+      model = _CreateModel(
+          cudnn_rnn_ops.CUDNN_LSTM,
+          constant_op.constant([4]), 200, 200,
+          direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
+      params_size = model.params_size()
+    with self.assertRaisesRegexp(
+        ValueError, "Shape must be rank 0 but is rank 1"):
+      model = _CreateModel(
+          cudnn_rnn_ops.CUDNN_LSTM,
+          4, constant_op.constant([200]), 200,
+          direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
+      params_size = model.params_size()
+    with self.assertRaisesRegexp(
+        ValueError, "Shape must be rank 0 but is rank 1"):
+      model = _CreateModel(
+          cudnn_rnn_ops.CUDNN_LSTM,
+          4, 200, constant_op.constant([200]),
+          direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
+      params_size = model.params_size()
 
 
 class CudnnRNNTestInference(TensorFlowTestCase):
-- 
GitLab


From c86327921c6e5e918250652558e4075abd88c6f4 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 25 Jul 2018 14:53:02 +0000
Subject: [PATCH 0023/1357] Add additional unit test in c++ for cudnn_rnn_ops

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/cudnn_rnn_ops_test.cc | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
index 2dd867561b..095ee1fc95 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
@@ -26,7 +26,19 @@ namespace tensorflow {
 
 TEST(CudnnRNNOpsTest, ParamsSize_ShapeFn) {
   ShapeInferenceTestOp op("CudnnRNNParamsSize");
-  INFER_OK(op, "[1];[1];[1]", "[1]");
+  INFER_OK(op, "[];[];[]", "[1]");
+  INFER_OK(op, "?;[];[]", "[1]");
+  INFER_OK(op, "[];?;[]", "[1]");
+  INFER_OK(op, "[];[];?", "[1]");
+  INFER_OK(op, "[];?;?", "[1]");
+  INFER_OK(op, "?;?;?", "[1]");
+
+  INFER_ERROR("Shape must be rank 0 ", op,
+              "[1,2];?;[]");
+  INFER_ERROR("Shape must be rank 0 ", op,
+              "?;[2];[]");
+  INFER_ERROR("Shape must be rank 0 ", op,
+              "?;?;[1]");
 }
 
 TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) {
-- 
GitLab


From 2e436951bb63a0294848b6f6d3746e449a305ad1 Mon Sep 17 00:00:00 2001
From: Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
Date: Tue, 17 Jul 2018 22:37:19 +0300
Subject: [PATCH 0024/1357] version_info.cc generated only once

version_info.cc in the cmake files is generated every time when we build tensorflow and this forces rebuild of the whole project, since it is in the core library.
added make.bat for windows, which does the same as make.sh to be executed easily from a build machine. the default now is visual studio 17
---
 tensorflow/contrib/cmake/make.bat             | 38 +++++++++++++++++++
 .../contrib/cmake/tf_core_framework.cmake     | 23 +++++++----
 2 files changed, 53 insertions(+), 8 deletions(-)
 create mode 100644 tensorflow/contrib/cmake/make.bat

diff --git a/tensorflow/contrib/cmake/make.bat b/tensorflow/contrib/cmake/make.bat
new file mode 100644
index 0000000000..d52b24e01d
--- /dev/null
+++ b/tensorflow/contrib/cmake/make.bat
@@ -0,0 +1,38 @@
+%echo off
+
+cd /d %~dp0
+
+if exist _build rd /s /q _build
+
+mkdir _build
+chdir _build
+
+
+rem cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install
+
+CALL :NORMALIZEPATH "..\..\..\.."
+SET SOURCE_DIR=%RETVAL%
+
+echo %SOURCE_DIR%
+
+SET SOURCE_DIR=F:\frameworks\tensorflow\
+
+CALL :NORMALIZEPATH "../../../tools/git/gen_git_source.py"
+SET SOURCE_PYTHON_SCRIPT=%RETVAL%
+
+CALL :NORMALIZEPATH "../../../core/util/version_info.cc"
+SET SOURCE_VERSION_CC=%RETVAL%
+
+python %SOURCE_PYTHON_SCRIPT% --raw_generate %SOURCE_VERSION_CC% --source_dir %SOURCE_DIR% --git_tag_override=
+
+cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install
+
+EXIT /B
+
+:NORMALIZEPATH
+  SET RETVAL=%~dpfn1
+  EXIT /B
+
+
+
+                                                                              
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index 067c299a71..7e806685b8 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -258,14 +258,21 @@ add_dependencies(tf_core_lib ${tensorflow_EXTERNAL_DEPENDENCIES} tf_protos_cc)
 # force_rebuild always runs forcing ${VERSION_INFO_CC} target to run
 # ${VERSION_INFO_CC} would cache, but it depends on a phony never produced
 # target.
-set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
-add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC})
-add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo)
-add_custom_command(OUTPUT
-    ${VERSION_INFO_CC}
-    COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py
-    ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE}
-    DEPENDS __force_rebuild)
+# This code forces rebuild every time, not needed as version from git is fetched only once
+# move to make.bat which mimicks make.sh
+
+if (NOT WIN32)
+
+  set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
+  add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC})
+  add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo)
+  add_custom_command(OUTPUT
+      ${VERSION_INFO_CC}
+      COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py
+      ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE}
+      DEPENDS __force_rebuild)
+endif()
+
 set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
 
 ########################################################
-- 
GitLab


From 0d7b11f4d63f9bae0d0e4001dd96ce840810210b Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 5 Aug 2018 17:23:47 +0000
Subject: [PATCH 0025/1357] Fix op_scope warning in adjust_gamma

While running the following op_scope causes the warning:
```
Python 3.5.2 (default, Nov 23 2017, 16:37:01)
[GCC 5.4.0 20160609] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import tensorflow as tf
i>>> import numpy as np
>>> tf.image.adjust_gamma(np.random.uniform(0.0, 255.0, (8, 8)), gamma=1)
WARNING:tensorflow:tf.op_scope(values, name, default_name) is deprecated, use tf.name_scope(name, default_name, values)
<tf.Tensor 'adjust_gamma/mul_1:0' shape=(8, 8) dtype=float32>
>>>
```

This fix fixes the warning by switching op_scope to name_scope.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/image_ops_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 855a4d0c33..1b11b8b074 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1377,7 +1377,7 @@ def adjust_gamma(image, gamma=1, gain=1):
     [1] http://en.wikipedia.org/wiki/Gamma_correction
   """
 
-  with ops.op_scope([image, gamma, gain], None, 'adjust_gamma'):
+  with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:
     # Convert pixel value to DT_FLOAT for computing adjusted image.
     img = ops.convert_to_tensor(image, name='img', dtype=dtypes.float32)
     # Keep image dtype for computing the scale of corresponding dtype.
-- 
GitLab


From b81f4bb5468b0fdf9e36591d3a7d56740bedb7dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 9 Aug 2018 14:34:52 +0800
Subject: [PATCH 0026/1357] ENH: implement feature importances

---
 .../python/estimator/canned/boosted_trees.py  | 105 +++++++++++++++++-
 1 file changed, 101 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 8b423f76de..060f5cb3fa 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -21,6 +21,11 @@ import abc
 import collections
 import functools
 
+import numpy as np
+
+from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.eager import context
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import head as head_lib
@@ -38,7 +43,9 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops.losses import losses
 from tensorflow.python.summary import summary
+from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import distribute as distribute_lib
+from tensorflow.python.training import saver
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training_util
 from tensorflow.python.util.tf_export import estimator_export
@@ -54,6 +61,8 @@ _HOLD_FOR_MULTI_DIM_SUPPORT = object()
 _DUMMY_NUM_BUCKETS = -1
 _DUMMY_NODE_ID = -1
 
+_BOOSTED_TREES_SERIALIZED_PROTO = '_BOOSTED_TREES_SERIALIZED_PROTO'
+
 
 def _get_transformed_features(features, sorted_feature_columns):
   """Gets the transformed features from features/feature_columns pair.
@@ -736,6 +745,8 @@ def _bt_model_fn(
           bucketized_features=input_feature_list,
           logits_dimension=head.logits_dimension)
     else:
+      _, serialized_proto = tree_ensemble.serialize()
+      ops.add_to_collection(_BOOSTED_TREES_SERIALIZED_PROTO, serialized_proto)
       if is_single_machine:
         local_tree_ensemble = tree_ensemble
         ensemble_reload = control_flow_ops.no_op()
@@ -910,8 +921,92 @@ def _create_regression_head(label_dimension, weight_column=None):
   # pylint: enable=protected-access
 
 
+def _compute_feature_importance_for_tree(tree, num_features, normalize):
+  importances = np.zeros(num_features)
+
+  for node in tree.nodes:
+    node_type = node.WhichOneof('node')
+    if node_type == 'bucketized_split':
+      feature_id = node.bucketized_split.feature_id
+      importances[feature_id] += node.metadata.gain
+    elif node_type == 'leaf':
+      assert node.metadata.gain == 0
+    else:
+      raise ValueError('Unexpected split type %s', node_type)
+
+  if normalize:
+    normalizer = np.sum(importances)
+    if normalizer > 0.0:
+      # Avoid dividing by zero (e.g., when root is pure)
+      importances /= normalizer
+
+  return importances
+
+
+def compute_feature_importances(tree_ensemble, num_features, normalize=True):
+  tree_importances = [_compute_feature_importance_for_tree(tree,
+                                                           num_features,
+                                                           normalize)
+                      for tree in tree_ensemble.trees]
+  tree_importances = np.array(tree_importances)
+  tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
+  feature_importances = np.sum(tree_importances * tree_weights,
+                               axis=0) / np.sum(tree_weights)
+  if normalize:
+    normalizer = np.sum(feature_importances)
+    if normalizer > 0.0:
+      feature_importances /= normalizer
+
+  sorted_feature = np.argsort(feature_importances)[::-1]
+  return sorted_feature, feature_importances[sorted_feature]
+
+
+class _BoostedTrees(estimator.Estimator):
+
+  def __init__(self, model_fn, model_dir, config, feature_columns):
+    super(_BoostedTrees, self).__init__(
+        model_fn=model_fn, model_dir=model_dir, config=config)
+
+    sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
+    self._num_features = _calculate_num_features(sorted_feature_columns)
+
+  def compute_feature_importances(self, normalize=True):
+    tree_ensemble = self._read_tree_ensemble_from_checkpoint()
+    if tree_ensemble:
+      return compute_feature_importances(tree_ensemble,
+                                         self._num_features,
+                                         normalize)
+    else:
+      return [], []
+
+  def _read_tree_ensemble_from_checkpoint(self):
+    with context.graph_mode():
+      checkpoint_path = checkpoint_management.latest_checkpoint(
+          self._model_dir)
+      if not checkpoint_path:
+        raise ValueError("Couldn't find trained model at %s." % self._model_dir)
+
+      with ops.Graph().as_default() as g:
+        with tf_session.Session(config=self._session_config) as session:
+          meta_file = checkpoint_path + '.meta'
+          graph_saver = saver.import_meta_graph(meta_file)
+          graph_saver.restore(session, checkpoint_path)
+
+          serialized_proto = ops.get_collection(_BOOSTED_TREES_SERIALIZED_PROTO)
+          assert len(serialized_proto) == 1
+          serialized_proto_string = session.run(serialized_proto[0])
+
+          if serialized_proto_string:
+            tree_ensemble = boosted_trees_pb2.TreeEnsemble()
+            tree_ensemble.ParseFromString(serialized_proto_string)
+            return tree_ensemble
+          else:
+            # serialized_proto_string is empty string before training.
+            return None
+
+
 @estimator_export('estimator.BoostedTreesClassifier')
-class BoostedTreesClassifier(estimator.Estimator):
+class BoostedTreesClassifier(_BoostedTrees):
   """A Classifier for Tensorflow Boosted Trees models.
 
   @compatibility(eager)
@@ -1046,11 +1141,12 @@ class BoostedTreesClassifier(estimator.Estimator):
           closed_form_grad_and_hess_fn=closed_form)
 
     super(BoostedTreesClassifier, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        feature_columns=feature_columns)
 
 
 @estimator_export('estimator.BoostedTreesRegressor')
-class BoostedTreesRegressor(estimator.Estimator):
+class BoostedTreesRegressor(_BoostedTrees):
   """A Regressor for Tensorflow Boosted Trees models.
 
   @compatibility(eager)
@@ -1169,4 +1265,5 @@ class BoostedTreesRegressor(estimator.Estimator):
           n_batches_per_layer, config)
 
     super(BoostedTreesRegressor, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        feature_columns=feature_columns)
-- 
GitLab


From 54fbe83c1bc50510a7712ab78aaf369ba562538e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 9 Aug 2018 14:35:19 +0800
Subject: [PATCH 0027/1357] TST: add test case

---
 .../estimator/canned/boosted_trees_test.py    | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index ec597e4686..054d820527 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -91,6 +91,17 @@ def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None):
   return _input_fn
 
 
+def _compute_feature_importances_np(feature_gains, normalize):
+  if normalize:
+    feature_gains /= np.sum(feature_gains, axis=1, keepdims=True)
+    feature_gains = np.nan_to_num(feature_gains)
+    feature_importances = np.sum(feature_gains, axis=0) / len(feature_gains)
+    feature_importances /= np.sum(feature_importances)
+    return np.nan_to_num(feature_importances)
+  else:
+    return np.sum(feature_gains, axis=0) / len(feature_gains)
+
+
 class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -154,6 +165,10 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     predictions = list(est.predict(input_fn=predict_input_fn))
     self.assertAllClose([[0], [0], [0], [0], [0]],
                         [pred['class_ids'] for pred in predictions])
+    self.assertEqual(3, est._num_features)  # pylint:disable=protected-access
+    sorted_features, importances = est.compute_feature_importances()
+    self.assertAllEqual([], sorted_features)
+    self.assertAllEqual([], importances)
 
   def testTrainAndEvaluateBinaryClassifier(self):
     input_fn = _make_train_input_fn(is_classification=True)
@@ -544,6 +559,85 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
     self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
 
+  def testCalculateFeatureImportances(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    self.assertEqual(3, est._num_features)  # pylint:disable=protected-access
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+
+    # TreeEnsemble Proto:
+    # tree_ensemble: trees {
+    #   nodes {
+    #     bucketized_split {
+    #       feature_id: 2
+    #       threshold: 2
+    #       left_id: 1
+    #       right_id: 2
+    #     }
+    #     metadata {
+    #       gain: 0.426666676998
+    #     }
+    #   }
+    #   ......
+    #   nodes {
+    #     bucketized_split {
+    #       threshold: 1
+    #       left_id: 5
+    #       right_id: 6
+    #     }
+    #     metadata {
+    #       gain: 0.133481562138
+    #       original_leaf {
+    #         scalar: 0.066666662693
+    #       }
+    #     }
+    #   }
+    #   ......
+    #   nodes {
+    #     bucketized_split {
+    #       left_id: 11
+    #       right_id: 12
+    #     }
+    #     metadata {
+    #       gain: 0.400360047817
+    #       original_leaf {
+    #         scalar: 0.0599950700998
+    #       }
+    #     }
+    #   }
+    # }
+    # trees {
+    #   nodes {
+    #     leaf {
+    #     }
+    #   }
+    # }
+    # tree_weights: 1.0
+    # tree_weights: 1.0
+    # ......
+    sorted_features_expected = [0, 2, 1]
+    feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0],  # 1st tree.
+                     [0.0, 0.0, 0.0]]                                         # 2nd tree.
+
+    sorted_features, importances = est.compute_feature_importances(normalize=False)
+    self.assertAllEqual(sorted_features_expected, sorted_features)
+    self.assertAllClose(_compute_feature_importances_np(feature_gains, False),
+                        importances)
+
+    sorted_features1, importances1 = est.compute_feature_importances(normalize=True)
+    self.assertAllEqual(sorted_features_expected, sorted_features1)
+    self.assertAllClose(_compute_feature_importances_np(feature_gains, True),
+                        importances1)
+
 
 class ModelFnTests(test_util.TensorFlowTestCase):
   """Tests bt_model_fn including unexposed internal functionalities."""
-- 
GitLab


From b127c201cda558db21ce5f48f5899593d73da46b Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 10 Aug 2018 20:37:32 +0000
Subject: [PATCH 0028/1357] Fix clang-format issue in `Experimental
 clang-format Check`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/cudnn_rnn_ops.cc      | 3 ---
 tensorflow/core/ops/cudnn_rnn_ops_test.cc | 9 +++------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc
index 7eb141aa8c..f84142c992 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops.cc
@@ -37,7 +37,6 @@ using shape_inference::DimensionHandle;
 using shape_inference::InferenceContext;
 using shape_inference::ShapeHandle;
 
-
 REGISTER_OP("CudnnRNNParamsSize")
     .Input("num_layers: int32")
     .Input("num_units: int32")
@@ -62,7 +61,6 @@ REGISTER_OP("CudnnRNNParamsSize")
       return Status::OK();
     });
 
-
 REGISTER_OP("CudnnRNN")
     .Input("input: T")
     .Input("input_h: T")
@@ -254,7 +252,6 @@ REGISTER_OP("CudnnRNNParamsToCanonical")
       return Status::OK();
     });
 
-
 REGISTER_OP("CudnnRNNCanonicalToParams")
     .Input("num_layers: int32")
     .Input("num_units: int32")
diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
index 095ee1fc95..13c3b933f4 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
@@ -33,12 +33,9 @@ TEST(CudnnRNNOpsTest, ParamsSize_ShapeFn) {
   INFER_OK(op, "[];?;?", "[1]");
   INFER_OK(op, "?;?;?", "[1]");
 
-  INFER_ERROR("Shape must be rank 0 ", op,
-              "[1,2];?;[]");
-  INFER_ERROR("Shape must be rank 0 ", op,
-              "?;[2];[]");
-  INFER_ERROR("Shape must be rank 0 ", op,
-              "?;?;[1]");
+  INFER_ERROR("Shape must be rank 0 ", op, "[1,2];?;[]");
+  INFER_ERROR("Shape must be rank 0 ", op, "?;[2];[]");
+  INFER_ERROR("Shape must be rank 0 ", op, "?;?;[1]");
 }
 
 TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) {
-- 
GitLab


From 29f596cf21f0332c1e2ece8798fdd9fefd2ba947 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Jun 2018 14:04:59 +0000
Subject: [PATCH 0029/1357] Improve the shape function of Bincount

There was not a lot of restriction in shape function
of Bincount and the output shape was unknown.
It is actually possible to get a better shape output
if `size` input is known.
This fix adds enhancement to the shape function of
Bincount.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/math_ops.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 1667c398f4..7d0f29368b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1416,6 +1416,10 @@ REGISTER_OP("Bincount")
     .Attr("T: {int32, int64, float32, float64}")
     .Output("bins: T")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      // The input `size` must be a scalar.
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+
       c->set_output(0, c->UnknownShapeOfRank(1));
       return Status::OK();
     });
-- 
GitLab


From 740c58b6fa5b6e1c85f688fbda322da0231aa169 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Jun 2018 14:44:44 +0000
Subject: [PATCH 0030/1357] Return `[size]` shape if size is known for
 Bincount.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/math_ops.cc | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 7d0f29368b..b57385f63b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1420,7 +1420,19 @@ REGISTER_OP("Bincount")
       // The input `size` must be a scalar.
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
 
-      c->set_output(0, c->UnknownShapeOfRank(1));
+      const Tensor* size_tensor = c->input_tensor(1);
+      if (size_tensor == nullptr) {
+        // Return unknown shape if size is not known.
+        c->set_output(0, c->UnknownShapeOfRank(1));
+        return Status::OK();
+      }
+
+      // Return `[size]` shape if size is known.
+      int32 size_val = size_tensor->scalar<int32>()();
+      if (size_val < 0) {
+        return errors::InvalidArgument("size (", size_val, ") must be non-negative");
+      }
+      c->set_output(0, c->MakeShape({size_val}));
       return Status::OK();
     });
 
-- 
GitLab


From e6981fc2225a529427391e98f492eee7bb865988 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 11 Aug 2018 18:39:13 +0000
Subject: [PATCH 0031/1357] Add additional test cases for Bincount Shape
 function, and fix clang-format issue

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/math_ops.cc               |  3 ++-
 tensorflow/core/ops/math_ops_test.cc          | 12 ++++++++++++
 .../python/kernel_tests/bincount_op_test.py   | 19 +++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index b57385f63b..0ba4a9a005 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1430,7 +1430,8 @@ REGISTER_OP("Bincount")
       // Return `[size]` shape if size is known.
       int32 size_val = size_tensor->scalar<int32>()();
       if (size_val < 0) {
-        return errors::InvalidArgument("size (", size_val, ") must be non-negative");
+        return errors::InvalidArgument("size (", size_val,
+                                       ") must be non-negative");
       }
       c->set_output(0, c->MakeShape({size_val}));
       return Status::OK();
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 23f1538912..7bf7c476f4 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -558,4 +558,16 @@ TEST(MathOpsTest, QuantizedAdd_ShapeFn) {
   INFER_ERROR("must be rank 0", op, "?;?;?;?;[3];?");
   INFER_ERROR("must be rank 0", op, "?;?;?;?;?;[4]");
 }
+
+TEST(MathOpsTest, Bincount_ShapeFn) {
+  ShapeInferenceTestOp op("Bincount");
+
+  // size should be scalar.
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;[1];?");
+
+  INFER_OK(op, "?;?;?", "[?]");
+  INFER_OK(op, "?;[];?", "[?]");
+  INFER_OK(op, "[?];[];?", "[?]");
+  INFER_OK(op, "[?];[];[?]", "[?]");
+}
 }  // end namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 2767df127e..15d9de56db 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -22,6 +22,8 @@ import numpy as np
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
@@ -97,6 +99,23 @@ class BincountTest(test_util.TensorFlowTestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
 
+  def test_shape_function(self):
+    # size must be scalar.
+    with self.assertRaisesRegexp(
+        ValueError, "Shape must be rank 0 but is rank 1 for 'Bincount'"):
+      gen_math_ops.bincount([1, 2, 3, -1, 6, 8], [1], [])
+    # size must be positive.
+    with self.assertRaisesRegexp(
+        ValueError, "must be non-negative"):
+      gen_math_ops.bincount([1, 2, 3, -1, 6, 8], -5, [])
+    # if size is a constant then the shape is known.
+    v1 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], 5, [])
+    self.assertAllEqual(v1.get_shape().as_list(), [5])
+    # if size is a placeholder then the shape is unknown.
+    s = array_ops.placeholder(dtype=dtypes.int32)
+    v2 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], s, [])
+    self.assertAllEqual(v2.get_shape().as_list(), [None])
+
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From 7ad604778ed69303458145376f2b6ec403fc5345 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Mon, 13 Aug 2018 15:57:54 +0800
Subject: [PATCH 0032/1357] ENH: mapping idx to feature_name

---
 .../python/estimator/canned/boosted_trees.py  | 38 ++++++--
 .../estimator/canned/boosted_trees_test.py    | 88 +++++++++++++++++--
 2 files changed, 113 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 060f5cb3fa..ba90b361b3 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -201,6 +201,23 @@ def _calculate_num_features(sorted_feature_columns):
   return num_features
 
 
+def _generate_feature_name_for_index(sorted_feature_columns):
+  names = []
+  for column in sorted_feature_columns:
+    if isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
+      categorical_column = column.categorical_column
+      if isinstance(categorical_column,
+                    feature_column_lib._VocabularyListCategoricalColumn):  # pylint:disable=protected-access
+        for voc in categorical_column.vocabulary_list:
+          names.append('{}:{}'.format(column.name, voc))
+      else:
+        for num in categorical_column._num_buckets:  # pylint:disable=protected-access
+          names.append('{}:{}'.format(column.name, num))
+    else:
+      names.append(column.name)
+  return names
+
+
 def _cache_transformed_features(features, sorted_feature_columns, batch_size):
   """Transform features and cache, then returns (cached_features, cache_op)."""
   num_features = _calculate_num_features(sorted_feature_columns)
@@ -943,7 +960,9 @@ def _compute_feature_importance_for_tree(tree, num_features, normalize):
   return importances
 
 
-def compute_feature_importances(tree_ensemble, num_features, normalize=True):
+def compute_feature_importances(tree_ensemble,
+                                num_features,
+                                normalize=True):
   tree_importances = [_compute_feature_importance_for_tree(tree,
                                                            num_features,
                                                            normalize)
@@ -957,8 +976,8 @@ def compute_feature_importances(tree_ensemble, num_features, normalize=True):
     if normalizer > 0.0:
       feature_importances /= normalizer
 
-  sorted_feature = np.argsort(feature_importances)[::-1]
-  return sorted_feature, feature_importances[sorted_feature]
+  sorted_feature_idx = np.argsort(feature_importances)[::-1]
+  return sorted_feature_idx, feature_importances[sorted_feature_idx]
 
 
 class _BoostedTrees(estimator.Estimator):
@@ -967,15 +986,18 @@ class _BoostedTrees(estimator.Estimator):
     super(_BoostedTrees, self).__init__(
         model_fn=model_fn, model_dir=model_dir, config=config)
 
-    sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
-    self._num_features = _calculate_num_features(sorted_feature_columns)
+    self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
 
   def compute_feature_importances(self, normalize=True):
     tree_ensemble = self._read_tree_ensemble_from_checkpoint()
     if tree_ensemble:
-      return compute_feature_importances(tree_ensemble,
-                                         self._num_features,
-                                         normalize)
+      num_features = _calculate_num_features(self._sorted_feature_columns)
+      names_for_idx = np.array(
+          _generate_feature_name_for_index(self._sorted_feature_columns))
+      idx, importances = compute_feature_importances(tree_ensemble,
+                                                     num_features,
+                                                     normalize)
+      return names_for_idx[idx], importances
     else:
       return [], []
 
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 054d820527..880f0f10ba 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -165,7 +165,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     predictions = list(est.predict(input_fn=predict_input_fn))
     self.assertAllClose([[0], [0], [0], [0], [0]],
                         [pred['class_ids'] for pred in predictions])
-    self.assertEqual(3, est._num_features)  # pylint:disable=protected-access
     sorted_features, importances = est.compute_feature_importances()
     self.assertAllEqual([], sorted_features)
     self.assertAllEqual([], importances)
@@ -568,7 +567,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         n_trees=1,
         max_depth=5)
 
-    self.assertEqual(3, est._num_features)  # pylint:disable=protected-access
     # It will stop after 5 steps because of the max depth and num trees.
     num_steps = 100
     # Train for a few steps, and validate final checkpoint.
@@ -624,17 +622,97 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     # tree_weights: 1.0
     # tree_weights: 1.0
     # ......
-    sorted_features_expected = [0, 2, 1]
+    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
     feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0],  # 1st tree.
                      [0.0, 0.0, 0.0]]                                         # 2nd tree.
 
     sorted_features, importances = est.compute_feature_importances(normalize=False)
-    self.assertAllEqual(sorted_features_expected, sorted_features)
+    self.assertAllEqual(feature_names_expected, sorted_features)
     self.assertAllClose(_compute_feature_importances_np(feature_gains, False),
                         importances)
 
     sorted_features1, importances1 = est.compute_feature_importances(normalize=True)
-    self.assertAllEqual(sorted_features_expected, sorted_features1)
+    self.assertAllEqual(feature_names_expected, sorted_features1)
+    self.assertAllClose(_compute_feature_importances_np(feature_gains, True),
+                        importances1)
+
+  def testCalculateFeatureImportancesWithIndicatorColumn(self):
+    categorical = feature_column.categorical_column_with_vocabulary_list(
+        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
+    feature_indicator = feature_column.indicator_column(categorical)
+    bucketized_col = feature_column.bucketized_column(
+        feature_column.numeric_column(
+            'an_uninformative_feature', dtype=dtypes.float32),
+        BUCKET_BOUNDARIES)
+
+    labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
+    # Our categorical feature defines the labels perfectly
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+          'an_uninformative_feature': np.array([1, 1, 1, 1, 1]),
+          'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']),
+        },
+        y=labels,
+        batch_size=5,
+        shuffle=False)
+
+    # Train depth 1 tree.
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=[bucketized_col, feature_indicator],
+        n_batches_per_layer=1,
+        n_trees=1,
+        learning_rate=1.0,
+        max_depth=1)
+
+    num_steps = 1
+    est.train(input_fn, steps=num_steps)
+
+    # TreeEnsemble Proto:
+    # trees {
+    #   nodes {
+    #     bucketized_split {
+    #       feature_id: 2
+    #       left_id: 1
+    #       right_id: 2
+    #     }
+    #     metadata {
+    #       gain: 15.5952005386
+    #     }
+    #   }
+    #   nodes {
+    #     leaf {
+    #     }
+    #   }
+    #   nodes {
+    #     leaf {
+    #       scalar: 5.7000002861
+    #     }
+    #   }
+    # }
+    # trees {
+    #   nodes {
+    #     leaf {
+    #     }
+    #   }
+    # }
+    # tree_weights: 1.0
+    # tree_weights: 1.0
+    feature_names_expected = ['categorical_indicator:good',
+                              # Reverse order because feature importances
+                              # are sorted by np.argsort(f)[::-1]
+                              'categorical_indicator:ok',
+                              'categorical_indicator:bad',
+                              'an_uninformative_feature_bucketized']
+    feature_gains = [[15.5952005386, 0.0, 0.0, 0.0],  # 1st tree.
+                     [0.0, 0.0, 0.0, 0.0]]            # 2nd tree.
+
+    sorted_features, importances = est.compute_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, sorted_features)
+    self.assertAllClose(_compute_feature_importances_np(feature_gains, False),
+                        importances)
+
+    sorted_features1, importances1 = est.compute_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, sorted_features1)
     self.assertAllClose(_compute_feature_importances_np(feature_gains, True),
                         importances1)
 
-- 
GitLab


From aa25cc078c9b55e5ca3e0f59df43e169bfee8f3c Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Thu, 16 Aug 2018 19:04:37 +0800
Subject: [PATCH 0033/1357] Add LeakyRelu C++ Op and its gradient
 implementation.

LeakyRelu, defined as 'y = { x (x>=0) or alpha*x (x<0) }', was computed
by combined Ops 'max(x, alpha*x)' in current codes. Hence its gradient
calculation for back propagation would contain a serial of element-wise
Ops. This looks really unnecessary for such a simple op and it could be
done within just one Op with less memory accesses.
---
 tensorflow/cc/gradients/nn_grad.cc            |  13 ++
 tensorflow/cc/gradients/nn_grad_test.cc       |  13 ++
 tensorflow/core/kernels/relu_op.cc            | 153 +++++++++++-------
 tensorflow/core/kernels/relu_op.h             |  59 +++++++
 tensorflow/core/kernels/relu_op_functor.h     |  31 ++++
 tensorflow/core/kernels/relu_op_gpu.cu.cc     |  18 ++-
 tensorflow/core/ops/nn_ops.cc                 |  15 ++
 tensorflow/core/ops/ops.pbtxt                 |  68 ++++++++
 tensorflow/python/eager/pywrap_tfe_src.cc     |   2 +
 .../python/kernel_tests/relu_op_test.py       | 113 +++++++++++++
 tensorflow/python/ops/nn_grad.py              |  15 ++
 tensorflow/python/ops/nn_ops.py               |   3 +-
 12 files changed, 432 insertions(+), 71 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 588e96cb19..0fc23d0bf7 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -143,6 +143,19 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper);
 
+Status LeakyReluGradHelper(const Scope& scope, const Operation& op,
+                           const std::vector<Output>& grad_inputs,
+                           std::vector<Output>* grad_outputs) {
+  float alpha;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha));
+  internal::LeakyReluGrad::Attrs attrs;
+  attrs.Alpha(alpha);
+  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs);
+  grad_outputs->push_back(dx);
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper);
+
 Status EluGradHelper(const Scope& scope, const Operation& op,
                      const std::vector<Output>& grad_inputs,
                      std::vector<Output>* grad_outputs) {
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index aa72cf7ba2..5ebece7b6e 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -41,6 +41,7 @@ using ops::MaxPoolV2;
 using ops::Placeholder;
 using ops::Relu;
 using ops::Relu6;
+using ops::LeakyRelu;
 using ops::Selu;
 using ops::Softmax;
 using ops::Softplus;
@@ -160,6 +161,18 @@ TEST_F(NNGradTest, Relu6Grad) {
   RunTest(x, x_init_value, y, shape);
 }
 
+TEST_F(NNGradTest, LeakyReluGrad) {
+  TensorShape shape({5, 2});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+  auto y = LeakyRelu(scope_, x);
+  // Avoid input values where Leaky ReLU gradient is not well defined (around
+  // zero).
+  Tensor x_init_value = test::AsTensor<float>(
+      {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f},
+      {5, 2});
+  RunTest(x, x_init_value, y, shape);
+}
+
 TEST_F(NNGradTest, EluGrad) {
   TensorShape shape({5, 2});
   auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc
index d52358737f..c4f2ef5632 100644
--- a/tensorflow/core/kernels/relu_op.cc
+++ b/tensorflow/core/kernels/relu_op.cc
@@ -33,19 +33,25 @@ typedef Eigen::GpuDevice GPUDevice;
 typedef Eigen::SyclDevice SYCLDevice;
 #endif  // TENSORFLOW_USE_SYCL
 
-#define REGISTER_RELU_KERNELS(type)                                   \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"),      \
-      ReluOp<CPUDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),  \
-      ReluGradOp<CPUDevice, type>);                                   \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
-      Relu6Op<CPUDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
-      Relu6GradOp<CPUDevice, type>)
+#define REGISTER_RELU_KERNELS(type)                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"),          \
+      ReluOp<CPUDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),      \
+      ReluGradOp<CPUDevice, type>);                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"),         \
+      Relu6Op<CPUDevice, type>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
+      Relu6GradOp<CPUDevice, type>)                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
+      LeakyReluOp<CPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      LeakyReluGradOp<CPUDevice, type>);
 
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS);
 #undef REGISTER_RELU_KERNELS
@@ -99,6 +105,19 @@ namespace functor {
   extern template struct Relu6Grad<GPUDevice, T>;                              \
                                                                                \
   template <>                                                                  \
+  void LeakyRelu<GPUDevice, T>::operator()(                                    \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor features,            \
+      T alpha, typename TTypes<T>::Tensor activations);                        \
+  extern template struct LeakyRelu<GPUDevice, T>;                              \
+                                                                               \
+  template <>                                                                  \
+  void LeakyReluGrad<GPUDevice, T>::operator()(                                \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
+      typename TTypes<T>::ConstTensor features,                                \
+      T alpha, typename TTypes<T>::Tensor backprops);                          \
+  extern template struct LeakyReluGrad<GPUDevice, T>;                          \
+                                                                               \
+  template <>                                                                  \
   void Elu<GPUDevice, T>::operator()(const GPUDevice& d,                       \
                                      typename TTypes<T>::ConstTensor features, \
                                      typename TTypes<T>::Tensor activations);  \
@@ -128,30 +147,36 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
 }  // namespace functor
 
 // Registration of the GPU implementations.
-#define REGISTER_GPU_KERNELS(type)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
-      ReluOp<GPUDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),  \
-      ReluGradOp<GPUDevice, type>);                                   \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
-      Relu6Op<GPUDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
-      Relu6GradOp<GPUDevice, type>);                                  \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"),       \
-      EluOp<GPUDevice, type>);                                        \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),   \
-      EluGradOp<GPUDevice, type>);                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
-      SeluOp<GPUDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),  \
+#define REGISTER_GPU_KERNELS(type)                                        \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"),          \
+      ReluOp<GPUDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
+      ReluGradOp<GPUDevice, type>);                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"),         \
+      Relu6Op<GPUDevice, type>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
+      Relu6GradOp<GPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
+      LeakyReluOp<GPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
+      LeakyReluGradOp<GPUDevice, type>);                                  \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"),           \
+      EluOp<GPUDevice, type>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),       \
+      EluGradOp<GPUDevice, type>);                                        \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"),          \
+      SeluOp<GPUDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
       SeluGradOp<GPUDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
@@ -161,30 +186,36 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
 
 #ifdef TENSORFLOW_USE_SYCL
 // Registration of the GPU implementations.
-#define REGISTER_SYCL_KERNELS(type)                                    \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
-      ReluOp<SYCLDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),  \
-      ReluGradOp<SYCLDevice, type>);                                   \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
-      Relu6Op<SYCLDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      Relu6GradOp<SYCLDevice, type>);                                  \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),       \
-      EluOp<SYCLDevice, type>);                                        \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),   \
-      EluGradOp<SYCLDevice, type>);                                    \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
-      SeluOp<SYCLDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),  \
+#define REGISTER_SYCL_KERNELS(type)                                        \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
+      ReluOp<SYCLDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
+      ReluGradOp<SYCLDevice, type>);                                       \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"),         \
+      Relu6Op<SYCLDevice, type>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
+      Relu6GradOp<SYCLDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
+      LeakyReluOp<SYCLDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
+      LeakyReluGradOp<SYCLDevice, type>);                                  \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),           \
+      EluOp<SYCLDevice, type>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),       \
+      EluGradOp<SYCLDevice, type>);                                        \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
+      SeluOp<SYCLDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
       SeluGradOp<SYCLDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS);
diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h
index e712b02bd7..c55190065c 100644
--- a/tensorflow/core/kernels/relu_op.h
+++ b/tensorflow/core/kernels/relu_op.h
@@ -131,6 +131,65 @@ void Relu6GradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
           output->flat<T>());
 }
 
+template <typename Device, typename T>
+class LeakyReluOp : public UnaryElementWiseOp<T, LeakyReluOp<Device, T>> {
+ public:
+  explicit LeakyReluOp(OpKernelConstruction* context)
+      : UnaryElementWiseOp<T, LeakyReluOp<Device, T>>(context) {
+    float alpha_tmp;
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp));
+    alpha_ = T(alpha_tmp);
+  }
+
+  void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
+    functor::LeakyRelu<Device, T> functor;
+    functor(context->eigen_device<Device>(), input.flat<T>(),
+            alpha_, output->flat<T>());
+  }
+
+ private:
+  T alpha_;
+};
+
+template <typename Device, typename T>
+class LeakyReluGradOp
+    : public BinaryElementWiseOp<T, LeakyReluGradOp<Device, T>> {
+ public:
+  explicit LeakyReluGradOp(OpKernelConstruction* context)
+      : BinaryElementWiseOp<T, LeakyReluGradOp<Device, T>>(context) {
+    float alpha_tmp;
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp));
+    alpha_ = T(alpha_tmp);
+  }
+
+  void OperateNoTemplate(OpKernelContext* context, const Tensor& g,
+                         const Tensor& a, T alpha, Tensor* output);
+
+  // INPUTS:
+  //   g (gradients): backpropagated gradients
+  //   a (inputs): either the inputs that were passed to LeakyReluOp(), or its
+  //               outputs (using either one yields the same result here).
+  // OUTPUT:
+  //   gradients to backprop
+  template <int NDIMS>
+  void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a,
+               Tensor* output) {
+    OperateNoTemplate(context, g, a, alpha_, output);
+  }
+
+ private:
+  T alpha_;
+};
+
+template <typename Device, typename T>
+void LeakyReluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
+    const Tensor& g, const Tensor& a, T alpha, Tensor* output) {
+  if (!ReluHelpers::ValidateSameSize(context, g, a)) return;
+  functor::LeakyReluGrad<Device, T> functor;
+  functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(), alpha,
+          output->flat<T>());
+};
+
 template <typename Device, typename T>
 class EluOp : public UnaryElementWiseOp<T, EluOp<Device, T>> {
  public:
diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h
index 3bc5ba8a50..7f0951451d 100644
--- a/tensorflow/core/kernels/relu_op_functor.h
+++ b/tensorflow/core/kernels/relu_op_functor.h
@@ -91,6 +91,37 @@ struct Relu6Grad {
   }
 };
 
+
+// Functor used by LeakyReluOp to do the computations.
+template <typename Device, typename T>
+struct LeakyRelu {
+  // Computes LeakyRelu activation.
+  //
+  // features: any shape.
+  // activations: same shape as "features".
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor features,
+                  T alpha, typename TTypes<T>::Tensor activations) {
+    activations.device(d) = features.cwiseMax(features * alpha);
+  }
+};
+
+// Functor used by LeakyReluGradOp to do the computations.
+template <typename Device, typename T>
+struct LeakyReluGrad {
+  // Computes LeakyReluGrad backprops.
+  //
+  // gradients: gradients backpropagated to the LeakyRelu op.
+  // features: either the inputs that were passed to the LeakyRelu or, or its
+  //           outputs (using either one yields the same result here).
+  // backprops: gradients to backpropagate to the LeakyRelu inputs.
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
+                  typename TTypes<T>::ConstTensor features, T alpha,
+                  typename TTypes<T>::Tensor backprops) {
+    backprops.device(d) =
+        (features > static_cast<T>(0)).select(gradients, gradients * alpha);
+  }
+};
+
 // Functor used by EluOp to do the computations.
 template <typename Device, typename T>
 struct Elu {
diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc
index 089ca8ed27..4452f4dcc9 100644
--- a/tensorflow/core/kernels/relu_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc
@@ -114,14 +114,16 @@ struct ReluGrad<Device, Eigen::half> {
 }  // namespace functor
 
 // Definition of the GPU implementations declared in relu_op.cc.
-#define DEFINE_GPU_KERNELS(T)                       \
-  template struct functor::Relu<GPUDevice, T>;      \
-  template struct functor::ReluGrad<GPUDevice, T>;  \
-  template struct functor::Relu6<GPUDevice, T>;     \
-  template struct functor::Relu6Grad<GPUDevice, T>; \
-  template struct functor::Elu<GPUDevice, T>;       \
-  template struct functor::EluGrad<GPUDevice, T>;   \
-  template struct functor::Selu<GPUDevice, T>;      \
+#define DEFINE_GPU_KERNELS(T)                           \
+  template struct functor::Relu<GPUDevice, T>;          \
+  template struct functor::ReluGrad<GPUDevice, T>;      \
+  template struct functor::Relu6<GPUDevice, T>;         \
+  template struct functor::Relu6Grad<GPUDevice, T>;     \
+  template struct functor::LeakyRelu<GPUDevice, T>;     \
+  template struct functor::LeakyReluGrad<GPUDevice, T>; \
+  template struct functor::Elu<GPUDevice, T>;           \
+  template struct functor::EluGrad<GPUDevice, T>;       \
+  template struct functor::Selu<GPUDevice, T>;          \
   template struct functor::SeluGrad<GPUDevice, T>;
 
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index e0f25fb4ef..023f988f80 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -983,6 +983,21 @@ REGISTER_OP("Relu6Grad")
     .Attr("T: realnumbertype")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
+REGISTER_OP("LeakyRelu")
+    .Input("features: T")
+    .Output("activations: T")
+    .Attr("alpha: float = 0.2")
+    .Attr("T: {half, float, double} = DT_FLOAT")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("LeakyReluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Output("backprops: T")
+    .Attr("alpha: float = 0.2")
+    .Attr("T: {half, float, double} = DT_FLOAT")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+
 REGISTER_OP("Elu")
     .Input("features: T")
     .Output("activations: T")
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index f2595279e0..837e91bc23 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -13604,6 +13604,74 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "LeakyRelu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LeakykReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "LearnedUnigramCandidateSampler"
   input_arg {
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 2d54555cd3..9b3b5fd7aa 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) {
           "SoftplusGrad",
           "Softsign",
           "ReluGrad",
+          "LeakyReluGrad",
           "Conv2D",
           "DepthwiseConv2dNative",
           "Dilation2D",
@@ -1799,6 +1800,7 @@ bool OpDoesntRequireInput(const string& op_name) {
           "BiasAdd",
           "Relu",
           "Relu6",
+          "LeakyRelu",
           "Elu",
           "Selu",
           "SparseSoftmaxCrossEntropyWithLogits",
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 25e947f09e..ccb3a231bb 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -252,6 +252,119 @@ class Relu6Test(test.TestCase):
     self.assertLess(err, 1e-10)
 
 
+class LeakyReluTest(test.TestCase):
+
+  def _npLeakyRelu(self, np_features, alpha=0.1):
+    return np.maximum(np_features, alpha * np_features)
+
+  def testNpLeakyRelu(self):
+    self.assertAllClose(
+        np.array([[-0.09, 0.7, -0.05, 0.3, -0.01],
+                  [0.1, -0.03, 0.5, -0.07, 0.9]]),
+        self._npLeakyRelu(
+            np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7, 0.9]
+                     ]), alpha=0.1))
+
+  def _testLeakyRelu(self, np_features, alpha, use_gpu=False):
+    np_leaky_relu = self._npLeakyRelu(np_features, alpha)
+    with self.test_session(use_gpu=use_gpu):
+      leaky_relu = nn_ops.leaky_relu(np_features, alpha)
+      tf_leaky_relu = leaky_relu.eval()
+    self.assertAllClose(np_leaky_relu, tf_leaky_relu)
+    self.assertShapeEqual(np_leaky_relu, leaky_relu)
+
+  def testNumbers(self):
+    for t in [np.int32, np.int64, np.float16, np.float32, np.float64]:
+      self._testLeakyRelu(
+          np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
+          alpha=0.2, use_gpu=False)
+      if t in [np.float16, np.float32, np.float64]:
+        self._testLeakyRelu(
+            np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
+            alpha=0.1, use_gpu=True)
+
+  # The gradient test for ReLU is a bit tricky as the derivative is not well
+  # defined at around zero and we want to avoid that in terms of input values.
+  def testGradientFloat32(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float32,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], y, [2, 5], x_init_value=x_init)
+    print("leaky_relu (float32) gradient err = ", err)
+    self.assertLess(err, 1e-4)
+
+  def testGradientFloat64(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          dtype=dtypes.float64,
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.2, name="leaky_relu")
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float64,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], y, [2, 5], x_init_value=x_init)
+    print("leaky_relu (float64) gradient err = ", err)
+    self.assertLess(err, 1e-10)
+
+  def testGradGradFloat32(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+      z = gradients_impl.gradients(y, x)
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float32,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+    print("leaky_relu (float32) gradient of gradient err = ", err)
+    self.assertLess(err, 1e-4)
+
+  def testGradGradFloat64(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          dtype=dtypes.float64,
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
+      z = gradients_impl.gradients(y, x)
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float64,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+    print("leaky_relu (float64) gradient of gradient err = ", err)
+    self.assertLess(err, 1e-10)
+
+  def testGradientScalar(self):
+    with self.test_session() as sess:
+      x = variables.Variable(-100.)
+      y = nn_ops.leaky_relu(x, 0.05)
+      loss = y**2
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.2)
+      train_op = optimizer.minimize(loss)
+      sess.run(variables.global_variables_initializer())
+      sess.run(train_op)
+      self.assertAllClose(x.eval(), -99.9)
+
+
 class EluTest(test.TestCase):
 
   def _npElu(self, np_features):
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index df23ac55ce..c2dd58bdf0 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -390,6 +390,21 @@ def _Relu6GradGrad(op, grad):
           array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype))
 
 
+@ops.RegisterGradient("LeakyRelu")
+def _LeakyReluGrad(op, grad):
+  x = op.inputs[0]
+  alpha = op.get_attr("alpha")
+  return gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha)
+
+
+@ops.RegisterGradient("LeakyReluGrad")
+def _LeakyReluGradGrad(op, grad):
+  x = op.inputs[1]
+  alpha = op.get_attr("alpha")
+  return (gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha),
+          array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype))
+
+
 @ops.RegisterGradient("Elu")
 def _EluGrad(op, grad):
   return gen_nn_ops.elu_grad(grad, op.outputs[0])
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 6fd1273687..31b8f3945d 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1601,8 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None):
     features = ops.convert_to_tensor(features, name="features")
     if features.dtype.is_integer:
       features = math_ops.to_float(features)
-    alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha")
-    return math_ops.maximum(alpha * features, features, name=name)
+    return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
 
 
 def _flatten_outer_dims(logits):
-- 
GitLab


From 0845a01256fd3797804f247f76a1655a56c119a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Fri, 17 Aug 2018 11:24:21 +0800
Subject: [PATCH 0034/1357] CLN: revise code according to comments

---
 .../python/estimator/canned/boosted_trees.py  | 81 +++++++++++++------
 .../estimator/canned/boosted_trees_test.py    | 10 +--
 2 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index ba90b361b3..848698311c 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -201,15 +201,23 @@ def _calculate_num_features(sorted_feature_columns):
   return num_features
 
 
-def _generate_feature_name_for_index(sorted_feature_columns):
+def _generate_feature_name_mapping(sorted_feature_columns):
+  """Return a list of feature name for feature ids.
+
+  Args:
+    sorted_feature_columns: a list/set of tf.feature_column sorted by name.
+
+  Returns:
+    feature_name_mapping: a list of feature name.
+  """
   names = []
   for column in sorted_feature_columns:
     if isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
       categorical_column = column.categorical_column
       if isinstance(categorical_column,
                     feature_column_lib._VocabularyListCategoricalColumn):  # pylint:disable=protected-access
-        for voc in categorical_column.vocabulary_list:
-          names.append('{}:{}'.format(column.name, voc))
+        for value in categorical_column.vocabulary_list:
+          names.append('{}:{}'.format(column.name, value))
       else:
         for num in categorical_column._num_buckets:  # pylint:disable=protected-access
           names.append('{}:{}'.format(column.name, num))
@@ -938,7 +946,8 @@ def _create_regression_head(label_dimension, weight_column=None):
   # pylint: enable=protected-access
 
 
-def _compute_feature_importance_for_tree(tree, num_features, normalize):
+def _compute_feature_importances_per_tree(tree, num_features):
+  """Computes the importance of each feature in the tree."""
   importances = np.zeros(num_features)
 
   for node in tree.nodes:
@@ -951,21 +960,29 @@ def _compute_feature_importance_for_tree(tree, num_features, normalize):
     else:
       raise ValueError('Unexpected split type %s', node_type)
 
-  if normalize:
-    normalizer = np.sum(importances)
-    if normalizer > 0.0:
-      # Avoid dividing by zero (e.g., when root is pure)
-      importances /= normalizer
-
   return importances
 
 
-def compute_feature_importances(tree_ensemble,
-                                num_features,
-                                normalize=True):
-  tree_importances = [_compute_feature_importance_for_tree(tree,
-                                                           num_features,
-                                                           normalize)
+def _compute_feature_importances(tree_ensemble,
+                                 num_features,
+                                 normalize=True):
+  """Compute the feature importances.
+
+  The higher the value, the more important the feature.
+
+  Args:
+    tree_ensemble: TreeEnsemble.
+    num_features: The total number of feature ids.
+    normalize: If True, normalize the feature importances.
+
+  Returns:
+    sorted_feature_idx: A list of feature_id which is sorted
+      by its feature importance.
+    feature_importances: A list of corresponding feature importance.
+  """
+  tree_importances = [_compute_feature_importances_per_tree(tree,
+                                                            num_features,
+                                                            normalize)
                       for tree in tree_ensemble.trees]
   tree_importances = np.array(tree_importances)
   tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
@@ -973,8 +990,8 @@ def compute_feature_importances(tree_ensemble,
                                axis=0) / np.sum(tree_weights)
   if normalize:
     normalizer = np.sum(feature_importances)
-    if normalizer > 0.0:
-      feature_importances /= normalizer
+    assert normalizer > 0, 'Trees are all empty or root node only.'
+    feature_importances /= normalizer
 
   sorted_feature_idx = np.argsort(feature_importances)[::-1]
   return sorted_feature_idx, feature_importances[sorted_feature_idx]
@@ -988,18 +1005,34 @@ class _BoostedTrees(estimator.Estimator):
 
     self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
 
-  def compute_feature_importances(self, normalize=True):
+  def experimental_feature_importances(self, normalize=True):
+    """Compute the feature importances.
+
+    The higher the value, the more important the corresponding feature.
+
+    Args:
+      normalize: If True, normalize the feature importances.
+
+    Returns:
+      sorted_feature_names: A list of feature name which is sorted
+        by its feature importance.
+      feature_importances: A list of corresponding feature importance.
+
+    Raises:
+      ValueError: Empty ensemble.
+    """
     tree_ensemble = self._read_tree_ensemble_from_checkpoint()
     if tree_ensemble:
       num_features = _calculate_num_features(self._sorted_feature_columns)
       names_for_idx = np.array(
-          _generate_feature_name_for_index(self._sorted_feature_columns))
-      idx, importances = compute_feature_importances(tree_ensemble,
-                                                     num_features,
-                                                     normalize)
+          _generate_feature_name_mapping(self._sorted_feature_columns))
+      idx, importances = _compute_feature_importances(tree_ensemble,
+                                                      num_features,
+                                                      normalize)
       return names_for_idx[idx], importances
     else:
-      return [], []
+      raise ValueError('Found empty serialized string for TreeEnsemble.'
+                       'You should only call the method after training.')
 
   def _read_tree_ensemble_from_checkpoint(self):
     with context.graph_mode():
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 880f0f10ba..8625c7d968 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -165,7 +165,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     predictions = list(est.predict(input_fn=predict_input_fn))
     self.assertAllClose([[0], [0], [0], [0], [0]],
                         [pred['class_ids'] for pred in predictions])
-    sorted_features, importances = est.compute_feature_importances()
+    sorted_features, importances = est.experimental_feature_importances()
     self.assertAllEqual([], sorted_features)
     self.assertAllEqual([], importances)
 
@@ -626,12 +626,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0],  # 1st tree.
                      [0.0, 0.0, 0.0]]                                         # 2nd tree.
 
-    sorted_features, importances = est.compute_feature_importances(normalize=False)
+    sorted_features, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, sorted_features)
     self.assertAllClose(_compute_feature_importances_np(feature_gains, False),
                         importances)
 
-    sorted_features1, importances1 = est.compute_feature_importances(normalize=True)
+    sorted_features1, importances1 = est.experimental_feature_importances(normalize=True)
     self.assertAllEqual(feature_names_expected, sorted_features1)
     self.assertAllClose(_compute_feature_importances_np(feature_gains, True),
                         importances1)
@@ -706,12 +706,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     feature_gains = [[15.5952005386, 0.0, 0.0, 0.0],  # 1st tree.
                      [0.0, 0.0, 0.0, 0.0]]            # 2nd tree.
 
-    sorted_features, importances = est.compute_feature_importances(normalize=False)
+    sorted_features, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, sorted_features)
     self.assertAllClose(_compute_feature_importances_np(feature_gains, False),
                         importances)
 
-    sorted_features1, importances1 = est.compute_feature_importances(normalize=True)
+    sorted_features1, importances1 = est.experimental_feature_importances(normalize=True)
     self.assertAllEqual(feature_names_expected, sorted_features1)
     self.assertAllClose(_compute_feature_importances_np(feature_gains, True),
                         importances1)
-- 
GitLab


From 196f5478d780b6e069290366fd4b85bb09d8141d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Fri, 17 Aug 2018 12:22:13 +0800
Subject: [PATCH 0035/1357] CLN: use CheckpointReader to load TreeEnsemble
 proto

---
 .../python/estimator/canned/boosted_trees.py  | 60 +++++--------------
 1 file changed, 14 insertions(+), 46 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 848698311c..62757ef588 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -24,8 +24,6 @@ import functools
 import numpy as np
 
 from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.eager import context
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import head as head_lib
@@ -43,9 +41,8 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops.losses import losses
 from tensorflow.python.summary import summary
-from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import checkpoint_utils
 from tensorflow.python.training import distribute as distribute_lib
-from tensorflow.python.training import saver
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training_util
 from tensorflow.python.util.tf_export import estimator_export
@@ -61,8 +58,6 @@ _HOLD_FOR_MULTI_DIM_SUPPORT = object()
 _DUMMY_NUM_BUCKETS = -1
 _DUMMY_NODE_ID = -1
 
-_BOOSTED_TREES_SERIALIZED_PROTO = '_BOOSTED_TREES_SERIALIZED_PROTO'
-
 
 def _get_transformed_features(features, sorted_feature_columns):
   """Gets the transformed features from features/feature_columns pair.
@@ -770,8 +765,6 @@ def _bt_model_fn(
           bucketized_features=input_feature_list,
           logits_dimension=head.logits_dimension)
     else:
-      _, serialized_proto = tree_ensemble.serialize()
-      ops.add_to_collection(_BOOSTED_TREES_SERIALIZED_PROTO, serialized_proto)
       if is_single_machine:
         local_tree_ensemble = tree_ensemble
         ensemble_reload = control_flow_ops.no_op()
@@ -980,9 +973,7 @@ def _compute_feature_importances(tree_ensemble,
       by its feature importance.
     feature_importances: A list of corresponding feature importance.
   """
-  tree_importances = [_compute_feature_importances_per_tree(tree,
-                                                            num_features,
-                                                            normalize)
+  tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
                       for tree in tree_ensemble.trees]
   tree_importances = np.array(tree_importances)
   tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
@@ -1021,43 +1012,20 @@ class _BoostedTrees(estimator.Estimator):
     Raises:
       ValueError: Empty ensemble.
     """
-    tree_ensemble = self._read_tree_ensemble_from_checkpoint()
-    if tree_ensemble:
-      num_features = _calculate_num_features(self._sorted_feature_columns)
-      names_for_idx = np.array(
-          _generate_feature_name_mapping(self._sorted_feature_columns))
-      idx, importances = _compute_feature_importances(tree_ensemble,
-                                                      num_features,
-                                                      normalize)
-      return names_for_idx[idx], importances
-    else:
+    reader = checkpoint_utils.load_checkpoint(self._model_dir)
+    serialized = reader.get_tensor('boosted_trees:0_serialized')
+    if not serialized:
       raise ValueError('Found empty serialized string for TreeEnsemble.'
                        'You should only call the method after training.')
-
-  def _read_tree_ensemble_from_checkpoint(self):
-    with context.graph_mode():
-      checkpoint_path = checkpoint_management.latest_checkpoint(
-          self._model_dir)
-      if not checkpoint_path:
-        raise ValueError("Couldn't find trained model at %s." % self._model_dir)
-
-      with ops.Graph().as_default() as g:
-        with tf_session.Session(config=self._session_config) as session:
-          meta_file = checkpoint_path + '.meta'
-          graph_saver = saver.import_meta_graph(meta_file)
-          graph_saver.restore(session, checkpoint_path)
-
-          serialized_proto = ops.get_collection(_BOOSTED_TREES_SERIALIZED_PROTO)
-          assert len(serialized_proto) == 1
-          serialized_proto_string = session.run(serialized_proto[0])
-
-          if serialized_proto_string:
-            tree_ensemble = boosted_trees_pb2.TreeEnsemble()
-            tree_ensemble.ParseFromString(serialized_proto_string)
-            return tree_ensemble
-          else:
-            # serialized_proto_string is empty string before training.
-            return None
+    ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+    ensemble_proto.ParseFromString(serialized)
+
+    num_features = _calculate_num_features(self._sorted_feature_columns)
+    names_for_feature_id = np.array(
+        _generate_feature_name_mapping(self._sorted_feature_columns))
+    sorted_feature_id, importances = _compute_feature_importances(
+        ensemble_proto, num_features, normalize)
+    return names_for_feature_id[sorted_feature_id], importances
 
 
 @estimator_export('estimator.BoostedTreesClassifier')
-- 
GitLab


From 7ed06809ba3aabf1d93cf726a0b9b6416d80ef85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Fri, 17 Aug 2018 14:11:50 +0800
Subject: [PATCH 0036/1357] TST: revise test case

---
 .../estimator/canned/boosted_trees_test.py    | 547 +++++++++++++-----
 1 file changed, 410 insertions(+), 137 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 8625c7d968..80d9ac7552 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -17,9 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
+from google.protobuf import text_format
 import numpy as np
 
 from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
+from tensorflow.python.client import session
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator import run_config
@@ -31,10 +35,12 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import gen_boosted_trees_ops
+from tensorflow.python.ops import boosted_trees_ops
 from tensorflow.python.ops import resources
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training import session_run_hook
 
 NUM_FEATURES = 3
@@ -91,17 +97,6 @@ def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None):
   return _input_fn
 
 
-def _compute_feature_importances_np(feature_gains, normalize):
-  if normalize:
-    feature_gains /= np.sum(feature_gains, axis=1, keepdims=True)
-    feature_gains = np.nan_to_num(feature_gains)
-    feature_importances = np.sum(feature_gains, axis=0) / len(feature_gains)
-    feature_importances /= np.sum(feature_importances)
-    return np.nan_to_num(feature_importances)
-  else:
-    return np.sum(feature_gains, axis=0) / len(feature_gains)
-
-
 class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -165,9 +160,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     predictions = list(est.predict(input_fn=predict_input_fn))
     self.assertAllClose([[0], [0], [0], [0], [0]],
                         [pred['class_ids'] for pred in predictions])
-    sorted_features, importances = est.experimental_feature_importances()
-    self.assertAllEqual([], sorted_features)
-    self.assertAllEqual([], importances)
+
+    with self.assertRaisesRegexp(ValueError, 'empty'):
+      est.experimental_feature_importances(normalize=False)
+
+    with self.assertRaisesRegexp(ValueError, 'empty'):
+      est.experimental_feature_importances(normalize=True)
 
   def testTrainAndEvaluateBinaryClassifier(self):
     input_fn = _make_train_input_fn(is_classification=True)
@@ -558,7 +556,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
     self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
 
-  def testCalculateFeatureImportances(self):
+  def testExperimentalFeatureImportancesWithTraining(self):
     input_fn = _make_train_input_fn(is_classification=True)
 
     est = boosted_trees.BoostedTreesClassifier(
@@ -572,71 +570,358 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     # Train for a few steps, and validate final checkpoint.
     est.train(input_fn, steps=num_steps)
 
-    # TreeEnsemble Proto:
-    # tree_ensemble: trees {
-    #   nodes {
-    #     bucketized_split {
-    #       feature_id: 2
-    #       threshold: 2
-    #       left_id: 1
-    #       right_id: 2
-    #     }
-    #     metadata {
-    #       gain: 0.426666676998
-    #     }
-    #   }
-    #   ......
-    #   nodes {
-    #     bucketized_split {
-    #       threshold: 1
-    #       left_id: 5
-    #       right_id: 6
-    #     }
-    #     metadata {
-    #       gain: 0.133481562138
-    #       original_leaf {
-    #         scalar: 0.066666662693
-    #       }
-    #     }
-    #   }
-    #   ......
-    #   nodes {
-    #     bucketized_split {
-    #       left_id: 11
-    #       right_id: 12
-    #     }
-    #     metadata {
-    #       gain: 0.400360047817
-    #       original_leaf {
-    #         scalar: 0.0599950700998
-    #       }
-    #     }
-    #   }
-    # }
-    # trees {
-    #   nodes {
-    #     leaf {
-    #     }
-    #   }
-    # }
-    # tree_weights: 1.0
-    # tree_weights: 1.0
-    # ......
     feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
-    feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0],  # 1st tree.
-                     [0.0, 0.0, 0.0]]                                         # 2nd tree.
 
-    sorted_features, importances = est.experimental_feature_importances(normalize=False)
-    self.assertAllEqual(feature_names_expected, sorted_features)
-    self.assertAllClose(_compute_feature_importances_np(feature_gains, False),
-                        importances)
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.2669208, 0.21333334, 0.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.55579074, 0.44420926, 0.0], importances)
+
+  def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text):
+    with ops.Graph().as_default():
+      with ops.name_scope('boosted_trees') as name:
+        tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
+        tree_ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+        text_format.Merge(tree_ensemble_text, tree_ensemble_proto)
+        stamp_token, _ = tree_ensemble.serialize()
+        restore_op = tree_ensemble.deserialize(
+            stamp_token, tree_ensemble_proto.SerializeToString())
+
+        with session.Session() as sess:
+          resources.initialize_resources(resources.shared_resources()).run()
+          restore_op.run()
+          saver = saver_lib.Saver()
+          save_path = os.path.join(est.model_dir, 'model.ckpt')
+          saver.save(sess, save_path)
+
+  def testExperimentalCalculateFeatureImportances(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 3.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 7
+              right_id: 8
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([2.5, 1.5, 1.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2], importances)
+
+  def testExperimentalCalculateFeatureImportancesWithTreeWeights(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 12.5
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+        }
+        tree_weights: 0.4
+        tree_weights: 0.6
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([5.0, 3.0, 2.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2], importances)
+
+  def testExperimentalCalculateFeatureImportancesWithEmptyTree(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 3.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+
+    feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([1.5, 0.5, 0.0], importances)
 
-    sorted_features1, importances1 = est.experimental_feature_importances(normalize=True)
-    self.assertAllEqual(feature_names_expected, sorted_features1)
-    self.assertAllClose(_compute_feature_importances_np(feature_gains, True),
-                        importances1)
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.75, 0.25, 0.0], importances)
 
-  def testCalculateFeatureImportancesWithIndicatorColumn(self):
+  def testExperimentalCalculateFeatureImportancesWithAllEmptyTree(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+
+    # Reverse order because feature importances are sorted by np.argsort(f)[::-1]
+    feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized']
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.0, 0.0, 0.0], importances)
+
+    with self.assertRaisesRegexp(AssertionError, 'empty or root node'):
+      est.experimental_feature_importances(normalize=True)
+
+  def testExperimentalCalculateFeatureImportancesWithMoreTrees(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=5,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 4.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 3.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 8.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([2, 1.2, 0.8], importances)
+
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2], importances)
+
+  def testExperimentalFeatureImportancesWithIndicatorColumn(self):
     categorical = feature_column.categorical_column_with_vocabulary_list(
         key='categorical', vocabulary_list=('bad', 'good', 'ok'))
     feature_indicator = feature_column.indicator_column(categorical)
@@ -645,76 +930,64 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
             'an_uninformative_feature', dtype=dtypes.float32),
         BUCKET_BOUNDARIES)
 
-    labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
-    # Our categorical feature defines the labels perfectly
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-          'an_uninformative_feature': np.array([1, 1, 1, 1, 1]),
-          'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']),
-        },
-        y=labels,
-        batch_size=5,
-        shuffle=False)
-
-    # Train depth 1 tree.
     est = boosted_trees.BoostedTreesRegressor(
         feature_columns=[bucketized_col, feature_indicator],
         n_batches_per_layer=1,
-        n_trees=1,
+        n_trees=2,
         learning_rate=1.0,
         max_depth=1)
 
-    num_steps = 1
-    est.train(input_fn, steps=num_steps)
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 3
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 3.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
 
-    # TreeEnsemble Proto:
-    # trees {
-    #   nodes {
-    #     bucketized_split {
-    #       feature_id: 2
-    #       left_id: 1
-    #       right_id: 2
-    #     }
-    #     metadata {
-    #       gain: 15.5952005386
-    #     }
-    #   }
-    #   nodes {
-    #     leaf {
-    #     }
-    #   }
-    #   nodes {
-    #     leaf {
-    #       scalar: 5.7000002861
-    #     }
-    #   }
-    # }
-    # trees {
-    #   nodes {
-    #     leaf {
-    #     }
-    #   }
-    # }
-    # tree_weights: 1.0
-    # tree_weights: 1.0
     feature_names_expected = ['categorical_indicator:good',
-                              # Reverse order because feature importances
-                              # are sorted by np.argsort(f)[::-1]
+                              'an_uninformative_feature_bucketized',
                               'categorical_indicator:ok',
-                              'categorical_indicator:bad',
-                              'an_uninformative_feature_bucketized']
-    feature_gains = [[15.5952005386, 0.0, 0.0, 0.0],  # 1st tree.
-                     [0.0, 0.0, 0.0, 0.0]]            # 2nd tree.
-
-    sorted_features, importances = est.experimental_feature_importances(normalize=False)
-    self.assertAllEqual(feature_names_expected, sorted_features)
-    self.assertAllClose(_compute_feature_importances_np(feature_gains, False),
-                        importances)
-
-    sorted_features1, importances1 = est.experimental_feature_importances(normalize=True)
-    self.assertAllEqual(feature_names_expected, sorted_features1)
-    self.assertAllClose(_compute_feature_importances_np(feature_gains, True),
-                        importances1)
+                              'categorical_indicator:bad']
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([2.5, 1.5, 1.0, 0.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances)
 
 
 class ModelFnTests(test_util.TensorFlowTestCase):
-- 
GitLab


From 52d637e604dacd3bff836a27bd991f95966226e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Sun, 19 Aug 2018 17:28:12 +0800
Subject: [PATCH 0037/1357] CLN: normalize is False by default

---
 tensorflow/python/estimator/canned/boosted_trees.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 62757ef588..c59b59b653 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -956,9 +956,7 @@ def _compute_feature_importances_per_tree(tree, num_features):
   return importances
 
 
-def _compute_feature_importances(tree_ensemble,
-                                 num_features,
-                                 normalize=True):
+def _compute_feature_importances(tree_ensemble, num_features, normalize):
   """Compute the feature importances.
 
   The higher the value, the more important the feature.
@@ -972,6 +970,9 @@ def _compute_feature_importances(tree_ensemble,
     sorted_feature_idx: A list of feature_id which is sorted
       by its feature importance.
     feature_importances: A list of corresponding feature importance.
+
+  Raises:
+    AssertionError: Trees are all empty or root node only when normalizing.
   """
   tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
                       for tree in tree_ensemble.trees]
@@ -996,7 +997,7 @@ class _BoostedTrees(estimator.Estimator):
 
     self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
 
-  def experimental_feature_importances(self, normalize=True):
+  def experimental_feature_importances(self, normalize=False):
     """Compute the feature importances.
 
     The higher the value, the more important the corresponding feature.
@@ -1005,9 +1006,9 @@ class _BoostedTrees(estimator.Estimator):
       normalize: If True, normalize the feature importances.
 
     Returns:
-      sorted_feature_names: A list of feature name which is sorted
+      sorted_feature_names: 1-D array of feature name which is sorted
         by its feature importance.
-      feature_importances: A list of corresponding feature importance.
+      feature_importances: 1-D array of the corresponding feature importance.
 
     Raises:
       ValueError: Empty ensemble.
-- 
GitLab


From ad18b2dd923329ef598ee12b9bafd7fc63d7013d Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Mon, 20 Aug 2018 00:41:57 +0900
Subject: [PATCH 0038/1357] Implement extract_volume_patches

---
 .../api_def_ExtractVolumePatches.pbtxt        |  49 +++++
 tensorflow/core/kernels/BUILD                 |  14 ++
 .../core/kernels/extract_volume_patches_op.cc | 189 ++++++++++++++++++
 .../core/kernels/extract_volume_patches_op.h  |  58 ++++++
 .../extract_volume_patches_op_gpu.cu.cc       |  38 ++++
 tensorflow/core/ops/array_ops.cc              | 103 ++++++++++
 tensorflow/python/kernel_tests/BUILD          |  12 ++
 .../extract_volume_patches_op_test.py         | 130 ++++++++++++
 8 files changed, 593 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
 create mode 100644 tensorflow/core/kernels/extract_volume_patches_op.cc
 create mode 100644 tensorflow/core/kernels/extract_volume_patches_op.h
 create mode 100644 tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc
 create mode 100644 tensorflow/python/kernel_tests/extract_volume_patches_op_test.py

diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
new file mode 100644
index 0000000000..3499ade368
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
@@ -0,0 +1,49 @@
+op {
+  graph_op_name: "ExtractVolumePatches"
+  in_arg {
+    name: "images"
+    description: <<END
+5-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
+END
+  }
+  out_arg {
+    name: "patches"
+    description: <<END
+5-D Tensor with shape `[batch, out_planes, out_rows, out_cols, 
+ksize_planes * ksize_rows * ksize_cols * depth]` containing image 
+patches with size `ksize_patches x ksize_rows x ksize_cols x depth` 
+vectorized in the "depth" dimension. Note `out_planes`, `out_rows` and 
+`out_cols` are the dimensions of the output patches.
+END
+  }
+  attr {
+    name: "ksizes"
+    description: <<END
+The size of the sliding window for each dimension of `images`.
+END
+  }
+  attr {
+    name: "strides"
+    description: <<END
+1-D of length 5. How far the centers of two consecutive patches are in
+the images. Must be: `[1, stride_planes, stride_rows, stride_cols, 1]`.
+END
+  }
+  attr {
+    name: "padding"
+    description: <<END
+The type of padding algorithm to use.
+
+We specify the size-related attributes as:
+
+```python
+      ksizes = [1, ksize_planes, ksize_rows, ksize_cols, 1]
+      strides = [1, stride_planes, strides_rows, strides_cols, 1]
+```
+END
+  }
+  summary: <<END
+Extract `patches` from `images` and put them in the \"depth\" output 
+dimension. 3D extension of `extract_image_patches`.
+END
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index a30916d8b9..a47c59792c 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -210,6 +210,19 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "extract_volume_patches_op",
+    prefix = "extract_volume_patches_op",
+    deps = [
+        ":bounds_check",
+        ":eigen_helpers",
+        ":ops_util",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+    ],
+)
+
 cc_library(
     name = "conv_3d",
     hdrs = ["conv_3d.h"],
@@ -625,6 +638,7 @@ cc_library(
         ":diag_op",
         ":edit_distance_op",
         ":extract_image_patches_op",
+        ":extract_volume_patches_op",
         ":gather_nd_op",
         ":gather_op",
         ":guarantee_const_op",
diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/extract_volume_patches_op.cc
new file mode 100644
index 0000000000..80405c66dc
--- /dev/null
+++ b/tensorflow/core/kernels/extract_volume_patches_op.cc
@@ -0,0 +1,189 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/* 
+See extract_image_patches_op* files and docs for extract_image_patches in 
+../ops/image_ops.cc.
+
+Rates are not supported as of now, but the comments hint how to edit the code
+when rates are to be added.
+*/
+
+#define USE_EIGEN_TENSOR
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/extract_volume_patches_op.h"
+#include <vector>
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+static inline void ParseAttributeVec5(OpKernelConstruction* context,
+                                      const string& attr_name,
+                                      std::vector<int32>* attr) {
+  OP_REQUIRES_OK(context, context->GetAttr(attr_name, attr));
+  OP_REQUIRES(
+      context, (*attr)[0] == 1 && (*attr)[4] == 1,
+      errors::Unimplemented("Only support ", attr_name, " across space."));
+  OP_REQUIRES(context, (*attr)[1] >= 1 && (*attr)[2] >= 1 && (*attr)[3] >= 1,
+              errors::OutOfRange(attr_name, " is out of range."));
+}
+
+template <typename Device, typename T>
+class ExtractVolumePatchesOp : public UnaryOp<T> {
+ public:
+  explicit ExtractVolumePatchesOp(OpKernelConstruction* context)
+      : UnaryOp<T>(context) {
+    ParseAttributeVec5(context, "ksizes", &ksizes_);
+    ParseAttributeVec5(context, "strides", &strides_);
+    //ParseAttributeVec5(context, "rates", &rates_);
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Input tensor is of the following dimensions:
+    // [ batch, in_planes, in_rows, in_cols, channels ]
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, input.dims() == 5,
+                errors::InvalidArgument("input must be 5-dimensional",
+                                        input.shape().DebugString()));
+
+    const int batch = input.dim_size(0);
+    const int in_planes = input.dim_size(1);
+    const int in_rows = input.dim_size(2);
+    const int in_cols = input.dim_size(3);
+    const int depth = input.dim_size(4);
+
+    const int ksize_planes = ksizes_[1];
+    const int ksize_rows = ksizes_[2];
+    const int ksize_cols = ksizes_[3];
+
+    const int stride_planes = strides_[1];
+    const int stride_rows = strides_[2];
+    const int stride_cols = strides_[3];
+
+    /*
+    // In order to enable rates, uncomment the following lines and use
+    // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize
+    // calls.
+
+    const int rate_planes = rates_[1];
+    const int rate_rows = rates_[2];
+    const int rate_cols = rates_[3];
+
+    const int ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1);
+    const int ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1);
+    const int ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1);
+    */
+
+    int64 out_planes = 0, out_rows = 0, out_cols = 0;
+    int64 pad_planes = 0, pad_rows = 0, pad_cols = 0;
+    OP_REQUIRES_OK(context,
+                   GetWindowedOutputSize(in_planes, ksize_planes, stride_planes,
+                                         padding_, &out_planes, &pad_planes));
+    OP_REQUIRES_OK(context,
+                   GetWindowedOutputSize(in_rows, ksize_rows, stride_rows,
+                                         padding_, &out_rows, &pad_rows));
+    OP_REQUIRES_OK(context,
+                   GetWindowedOutputSize(in_cols, ksize_cols, stride_cols,
+                                         padding_, &out_cols, &pad_cols));
+
+    const std::vector<int64> out_sizes = {batch, out_planes, out_rows, out_cols,
+                                          ksize_planes * ksize_rows * ksize_cols * depth};
+    TensorShape out_shape(out_sizes);
+
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+    // If there is nothing to compute, return.
+    if (out_shape.num_elements() == 0) {
+      return;
+    }
+
+    functor::ExtractVolumePatchesForward<Device, T>()(
+        context->eigen_device<Device>(), input.tensor<T, 5>(), 
+        ksize_planes, ksize_rows, ksize_cols, 
+        stride_planes, stride_rows, stride_cols, 
+        /* rate_planes, rate_rows, rate_cols, */
+        BrainPadding2EigenPadding(padding_), output->tensor<T, 5>());
+  }
+
+ private:
+  std::vector<int32> ksizes_;
+  std::vector<int32> strides_;
+  // std::vector<int32> rates_;
+
+  Padding padding_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(ExtractVolumePatchesOp);
+};
+
+// Registration of the CPU implementations.
+#define REGISTER(T)                                                           \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("ExtractVolumePatches").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ExtractVolumePatchesOp<CPUDevice, T>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER);
+
+#undef REGISTER
+
+#if GOOGLE_CUDA
+
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+
+#define DECLARE_GPU_SPEC(T)                                             \
+  template <>                                                           \
+  void ExtractVolumePatchesForward<GPUDevice, T>::operator()(           \
+      const GPUDevice& d, typename TTypes<T, 5>::ConstTensor input,     \
+      int patch_planes, int patch_rows, int patch_cols,                 \
+      int stride_planes, int stride_rows, int stride_cols,              \
+      /* int rate_planes, int rate_rows, int rate_cols, */              \
+      const Eigen::PaddingType& padding,                                \
+      typename TTypes<T, 5>::Tensor output);                            \
+  extern template struct ExtractVolumePatchesForward<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
+
+#undef DECLARE_GPU_SPEC
+
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER(T)                                                           \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("ExtractVolumePatches").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      ExtractVolumePatchesOp<GPUDevice, T>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
+
+#undef REGISTER
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/extract_volume_patches_op.h b/tensorflow/core/kernels/extract_volume_patches_op.h
new file mode 100644
index 0000000000..e2418334ac
--- /dev/null
+++ b/tensorflow/core/kernels/extract_volume_patches_op.h
@@ -0,0 +1,58 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_
+#define TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/kernels/eigen_volume_patch.h"
+
+namespace tensorflow {
+namespace functor {
+
+template <typename Device, typename T>
+struct ExtractVolumePatchesForward {
+  void operator()(const Device& d, typename TTypes<T, 5>::ConstTensor input,
+                  int patch_planes, int patch_rows, int patch_cols, 
+                  int stride_planes, int stride_rows, int stride_cols,
+                  /* int rate_planes, int rate_rows, int rate_cols, */
+                  const Eigen::PaddingType& padding,
+                  typename TTypes<T, 5>::Tensor output) {
+    const int64 N = std::max(input.size(), output.size());
+    if (N <= std::numeric_limits<Index32>::max()) {
+      auto output_32bit = To32Bit(output);
+      output_32bit.device(d) =
+          To32Bit(input)
+              .extract_volume_patches(patch_cols, patch_rows, patch_planes,
+                                     stride_cols, stride_rows, stride_planes,
+                                     padding)
+              .reshape(output_32bit.dimensions());
+    } else {
+      output.device(d) =
+          input
+              .extract_volume_patches(patch_cols, patch_rows, patch_planes,
+                                     stride_cols, stride_rows, stride_planes,
+                                     padding)
+              .reshape(output.dimensions());
+    }
+  }
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_
diff --git a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc
new file mode 100644
index 0000000000..08b3386c13
--- /dev/null
+++ b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc
@@ -0,0 +1,38 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/kernels/extract_volume_patches_op.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+#define REGISTER(T) template struct ExtractVolumePatchesForward<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
+
+#undef REGISTER
+
+}  // end namespace functor
+}  // end namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index ef8ad7972c..48d8327a9e 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2549,6 +2549,109 @@ REGISTER_OP("ExtractImagePatches")
 
 // --------------------------------------------------------------------------
 
+// To enable rates, uncomment all lines commented below and use ksize_*_eff
+// as the second parameter of all GetWindowedOutputSizeVerbose calls instead
+// of ksize_*.
+REGISTER_OP("ExtractVolumePatches")
+    .Input("images: T")
+    .Output("patches: T")
+    .Attr("ksizes: list(int) >= 5")
+    .Attr("strides: list(int) >= 5")
+    /* .Attr("rates: list(int) >= 5") */
+    .Attr("T: realnumbertype")
+    .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 5, &input_shape));
+
+      std::vector<int32> ksizes;
+      TF_RETURN_IF_ERROR(c->GetAttr("ksizes", &ksizes));
+      if (ksizes.size() != 5) {
+        return errors::InvalidArgument(
+            "ExtractVolumePatches requires the ksizes attribute to contain 5 "
+            "values, but got: ",
+            ksizes.size());
+      }
+
+      std::vector<int32> strides;
+      TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
+      if (strides.size() != 5) {
+        return errors::InvalidArgument(
+            "ExtractVolumePatches requires the stride attribute to contain 5 "
+            "values, but got: ",
+            strides.size());
+      }
+
+      /*
+      std::vector<int32> rates;
+      TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates));
+      if (rates.size() != 5) {
+        return errors::InvalidArgument(
+            "ExtractVolumePatches requires the rates attribute to contain 5 "
+            "values, but got: ",
+            rates.size());
+      }
+      */
+
+      int32 ksize_planes = ksizes[1];
+      int32 ksize_rows = ksizes[2];
+      int32 ksize_cols = ksizes[3];
+
+      int32 stride_planes = strides[1];
+      int32 stride_rows = strides[2];
+      int32 stride_cols = strides[3];
+
+      /*
+      int32 rate_planes = rates[1];
+      int32 rate_rows = rates[2];
+      int32 rate_cols = rates[3];
+
+      int32 ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1);
+      int32 ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1);
+      int32 ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1);
+      */
+
+      DimensionHandle batch_size_dim = c->Dim(input_shape, 0);
+      DimensionHandle in_planes_dim = c->Dim(input_shape, 1);
+      DimensionHandle in_rows_dim = c->Dim(input_shape, 2);
+      DimensionHandle in_cols_dim = c->Dim(input_shape, 3);
+      DimensionHandle output_depth_dim;
+      TF_RETURN_IF_ERROR(c->Multiply(
+          c->Dim(input_shape, 4), ksize_planes * ksize_rows * ksize_cols, &output_depth_dim));
+
+      if (!c->ValueKnown(in_planes_dim) || !c->ValueKnown(in_rows_dim) || !c->ValueKnown(in_cols_dim)) {
+        ShapeHandle output_shape =
+            c->MakeShape({batch_size_dim, InferenceContext::kUnknownDim,
+                          InferenceContext::kUnknownDim, output_depth_dim});
+        c->set_output(0, output_shape);
+        return Status::OK();
+      }
+      auto in_planes = c->Value(in_planes_dim);
+      auto in_rows = c->Value(in_rows_dim);
+      auto in_cols = c->Value(in_cols_dim);
+
+      Padding padding;
+      TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
+
+      int64 output_planes, output_rows, output_cols;
+      int64 padding_before, padding_after;
+      TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+          in_planes, ksize_planes, stride_planes, padding, &output_planes,
+          &padding_before, &padding_after));
+      TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+          in_rows, ksize_rows, stride_rows, padding, &output_rows,
+          &padding_before, &padding_after));
+      TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+          in_cols, ksize_cols, stride_cols, padding, &output_cols,
+          &padding_before, &padding_after));
+      ShapeHandle output_shape = c->MakeShape(
+          {batch_size_dim, output_planes, output_rows, output_cols, output_depth_dim});
+      c->set_output(0, output_shape);
+      return Status::OK();
+    });
+
+// --------------------------------------------------------------------------
+
 REGISTER_OP("Bitcast")
     .Input("input: T")
     .Output("output: type")
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 2451dc7257..bb896085f2 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1582,6 +1582,18 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "extract_volume_patches_op_test",
+    size = "small",
+    srcs = ["extract_volume_patches_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+    ],
+)
+
 cuda_py_test(
     name = "functional_ops_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
new file mode 100644
index 0000000000..215474f6db
--- /dev/null
+++ b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
@@ -0,0 +1,130 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for ExtractVolumePatches op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+class ExtractVolumePatches(test.TestCase):
+  """Functional tests for ExtractVolumePatches op."""
+
+  def _VerifyValues(self, image, ksizes, strides, padding, patches):
+    """Tests input-output pairs for the ExtractVolumePatches op.
+
+    Args:
+      image: Input tensor with shape:
+             [batch, in_planes, in_rows, in_cols, depth].
+      ksizes: Patch size specified as: [ksize_planes, ksize_rows, ksize_cols].
+      strides: Output strides, specified as:
+               [stride_planes, stride_rows, stride_cols].
+      padding: Padding type.
+      patches: Expected output.
+
+    Note:
+      rates are not supported as of now.
+    """
+    ksizes = [1] + ksizes + [1]
+    strides = [1] + strides + [1]
+
+    with self.test_session(use_gpu=True):
+      out_tensor = array_ops.extract_volume_patches(
+          constant_op.constant(image),
+          ksizes=ksizes,
+          strides=strides,
+          padding=padding,
+          name="im2col_3d")
+      self.assertAllClose(patches, out_tensor.eval())
+
+  def testKsize1x1x1Stride1x1x1(self):
+    """Verifies that for 1x1x1 kernel the output equals the input."""
+    image = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6]) + 1
+    patches = image
+    for padding in ["VALID", "SAME"]:
+      self._VerifyValues(
+          image,
+          ksizes=[1, 1, 1],
+          strides=[1, 1, 1],
+          padding=padding,
+          patches=patches)
+
+  def testKsize1x1x1Stride2x3x4(self):
+    """Test for 1x1x1 kernel and strides."""
+    image = np.arange(6 * 2 * 4 * 5 * 3).reshape([6, 2, 4, 5, 3]) + 1
+    patches = image[:, ::2, ::3, ::4, :]
+    for padding in ["VALID", "SAME"]:
+      self._VerifyValues(
+          image,
+          ksizes=[1, 1, 1],
+          strides=[2, 3, 4],
+          padding=padding,
+          patches=patches)
+
+  def testKsize1x1x2Stride2x2x3(self):
+    """Test for 1x1x2 kernel and strides."""
+    image = np.arange(45).reshape([1, 3, 3, 5, 1]) + 1
+    patches = np.array([[[[[ 1,  2],
+                           [ 4,  5]],
+                          [[11, 12],
+                           [14, 15]]],
+                         [[[31, 32],
+                           [34, 35]],
+                          [[41, 42],
+                           [44, 45]]]]])
+    for padding in ["VALID", "SAME"]:
+      self._VerifyValues(
+          image,
+          ksizes=[1, 1, 2],
+          strides=[2, 2, 3],
+          padding=padding,
+          patches=patches)
+
+  def testKsize2x2x2Stride1x1x1Valid(self):
+    """Test for 2x2x2 kernel with VALID padding."""
+    image = np.arange(8).reshape([1, 2, 2, 2, 1]) + 1
+    patches = np.array([[[[[1, 2, 3, 4, 5, 6, 7, 8]]]]])
+    self._VerifyValues(
+        image,
+        ksizes=[2, 2, 2],
+        strides=[1, 1, 1],
+        padding="VALID",
+        patches=patches)
+
+  def testKsize2x2x2Stride1x1x1Same(self):
+    """Test for 2x2x2 kernel with SAME padding."""
+    image = np.arange(8).reshape([1, 2, 2, 2, 1]) + 1
+    patches = np.array([[[[[1, 2, 3, 4, 5, 6, 7, 8],
+                           [2, 0, 4, 0, 6, 0, 8, 0]],
+                          [[3, 4, 0, 0, 7, 8, 0, 0],
+                           [4, 0, 0, 0, 8, 0, 0, 0]]],
+                         [[[5, 6, 7, 8, 0, 0, 0, 0],
+                           [6, 0, 8, 0, 0, 0, 0, 0]],
+                          [[7, 8, 0, 0, 0, 0, 0, 0],
+                           [8, 0, 0, 0, 0, 0, 0, 0]]]]])
+    self._VerifyValues(
+        image,
+        ksizes=[2, 2, 2],
+        strides=[1, 1, 1],
+        padding="SAME",
+        patches=patches)
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 5630efcca924563b549a788b4b5ec93fea91e559 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 21 Aug 2018 13:06:02 +0800
Subject: [PATCH 0039/1357] CLN: revise according to comments

---
 .../python/estimator/boosted_trees.py         |  5 ++-
 .../python/estimator/canned/boosted_trees.py  | 19 ++++++----
 .../estimator/canned/boosted_trees_test.py    | 37 ++++++++++++++-----
 3 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
index 7ed77bcce6..e6bdc97fe5 100644
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
@@ -33,7 +33,7 @@ def _validate_input_fn_and_repeat_dataset(train_input_fn):
   return _input_fn
 
 
-class _BoostedTreesEstimator(estimator.Estimator):
+class _BoostedTreesEstimator(canned_boosted_trees._BoostedTrees):  # pylint: disable=protected-access
   """An Estimator for Tensorflow Boosted Trees models."""
 
   def __init__(self,
@@ -115,7 +115,8 @@ class _BoostedTreesEstimator(estimator.Estimator):
           config=config)
 
     super(_BoostedTreesEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        feature_columns=feature_columns)
     # pylint:enable=protected-access
 
 
diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index c59b59b653..d051399b52 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -203,7 +203,7 @@ def _generate_feature_name_mapping(sorted_feature_columns):
     sorted_feature_columns: a list/set of tf.feature_column sorted by name.
 
   Returns:
-    feature_name_mapping: a list of feature name.
+    feature_name_mapping: a list of feature names indexed by the feature ids.
   """
   names = []
   for column in sorted_feature_columns:
@@ -962,17 +962,19 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
   The higher the value, the more important the feature.
 
   Args:
-    tree_ensemble: TreeEnsemble.
+    tree_ensemble: a trained tree ensemble, instance of proto
+      boosted_trees.TreeEnsemble.
     num_features: The total number of feature ids.
     normalize: If True, normalize the feature importances.
 
   Returns:
     sorted_feature_idx: A list of feature_id which is sorted
       by its feature importance.
-    feature_importances: A list of corresponding feature importance.
+    feature_importances: A list of corresponding feature importances.
 
   Raises:
-    AssertionError: Trees are all empty or root node only when normalizing.
+    AssertionError: If normalize = True and normalization is not possible
+      (e.g. ensemble is empty or trees contain only a root node).
   """
   tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
                       for tree in tree_ensemble.trees]
@@ -982,7 +984,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
                                axis=0) / np.sum(tree_weights)
   if normalize:
     normalizer = np.sum(feature_importances)
-    assert normalizer > 0, 'Trees are all empty or root node only.'
+    assert normalizer > 0, 'Trees are all empty or contains only a root node.'
     feature_importances /= normalizer
 
   sorted_feature_idx = np.argsort(feature_importances)[::-1]
@@ -990,15 +992,17 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
 
 
 class _BoostedTrees(estimator.Estimator):
+  """Base class for boosted trees estimators."""
 
   def __init__(self, model_fn, model_dir, config, feature_columns):
     super(_BoostedTrees, self).__init__(
         model_fn=model_fn, model_dir=model_dir, config=config)
 
     self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
+    self._num_features = _calculate_num_features(self._sorted_feature_columns)
 
   def experimental_feature_importances(self, normalize=False):
-    """Compute the feature importances.
+    """Computes gain-based feature importances.
 
     The higher the value, the more important the corresponding feature.
 
@@ -1021,11 +1025,10 @@ class _BoostedTrees(estimator.Estimator):
     ensemble_proto = boosted_trees_pb2.TreeEnsemble()
     ensemble_proto.ParseFromString(serialized)
 
-    num_features = _calculate_num_features(self._sorted_feature_columns)
     names_for_feature_id = np.array(
         _generate_feature_name_mapping(self._sorted_feature_columns))
     sorted_feature_id, importances = _compute_feature_importances(
-        ensemble_proto, num_features, normalize)
+        ensemble_proto, self._num_features, normalize)
     return names_for_feature_id[sorted_feature_id], importances
 
 
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 80d9ac7552..c764831279 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -161,12 +161,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllClose([[0], [0], [0], [0], [0]],
                         [pred['class_ids'] for pred in predictions])
 
-    with self.assertRaisesRegexp(ValueError, 'empty'):
-      est.experimental_feature_importances(normalize=False)
-
-    with self.assertRaisesRegexp(ValueError, 'empty'):
-      est.experimental_feature_importances(normalize=True)
-
   def testTrainAndEvaluateBinaryClassifier(self):
     input_fn = _make_train_input_fn(is_classification=True)
 
@@ -556,7 +550,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
     self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
 
-  def testExperimentalFeatureImportancesWithTraining(self):
+  def testExperimentalFeatureImportancesWithTrainedEnsemble(self):
     input_fn = _make_train_input_fn(is_classification=True)
 
     est = boosted_trees.BoostedTreesClassifier(
@@ -580,6 +574,31 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.55579074, 0.44420926, 0.0], importances)
 
+  def testFeatureImportancesOnEmtpyEnsemble(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    class BailOutWithoutTraining(session_run_hook.SessionRunHook):
+
+      def before_run(self, run_context):
+        raise StopIteration('to bail out.')
+
+    # The step-0 checkpoint will have only an empty ensemble.
+    est.train(input_fn,
+              steps=100,  # must stop at 0 anyway.
+              hooks=[BailOutWithoutTraining()])
+
+    with self.assertRaisesRegexp(ValueError, 'empty serialized string'):
+      est.experimental_feature_importances(normalize=False)
+
+    with self.assertRaisesRegexp(ValueError, 'empty serialized string'):
+      est.experimental_feature_importances(normalize=True)
+
   def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text):
     with ops.Graph().as_default():
       with ops.name_scope('boosted_trees') as name:
@@ -823,7 +842,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.0, 0.0, 0.0], importances)
 
-    with self.assertRaisesRegexp(AssertionError, 'empty or root node'):
+    with self.assertRaisesRegexp(AssertionError, 'empty or contains'):
       est.experimental_feature_importances(normalize=True)
 
   def testExperimentalCalculateFeatureImportancesWithMoreTrees(self):
@@ -921,7 +940,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.3, 0.2], importances)
 
-  def testExperimentalFeatureImportancesWithIndicatorColumn(self):
+  def TestFeatureImportancesNamesForCategoricalColumn(self):
     categorical = feature_column.categorical_column_with_vocabulary_list(
         key='categorical', vocabulary_list=('bad', 'good', 'ok'))
     feature_indicator = feature_column.indicator_column(categorical)
-- 
GitLab


From e39bbe4947801c10c41e96fe4cbbb77817136e1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 21 Aug 2018 13:52:38 +0800
Subject: [PATCH 0040/1357] TST: add test case for negative feature importances

---
 .../python/estimator/canned/boosted_trees.py  |  5 +-
 .../estimator/canned/boosted_trees_test.py    | 52 ++++++++++++++++---
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index d051399b52..85bc934a0e 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -973,7 +973,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
     feature_importances: A list of corresponding feature importances.
 
   Raises:
-    AssertionError: If normalize = True and normalization is not possible
+    AssertionError: If feature importances contain negative value.
+      Or if normalize = True and normalization is not possible
       (e.g. ensemble is empty or trees contain only a root node).
   """
   tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
@@ -982,6 +983,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
   tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
   feature_importances = np.sum(tree_importances * tree_weights,
                                axis=0) / np.sum(tree_weights)
+  assert np.all(feature_importances >= 0), ('feature_importances '
+                                            'must be non-negative.')
   if normalize:
     normalizer = np.sum(feature_importances)
     assert normalizer > 0, 'Trees are all empty or contains only a root node.'
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index c764831279..9362b927e2 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -550,7 +550,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
     self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
 
-  def testExperimentalFeatureImportancesWithTrainedEnsemble(self):
+  def testFeatureImportancesWithTrainedEnsemble(self):
     input_fn = _make_train_input_fn(is_classification=True)
 
     est = boosted_trees.BoostedTreesClassifier(
@@ -616,7 +616,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
           save_path = os.path.join(est.model_dir, 'model.ckpt')
           saver.save(sess, save_path)
 
-  def testExperimentalCalculateFeatureImportances(self):
+  def testFeatureImportances(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
@@ -702,7 +702,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.3, 0.2], importances)
 
-  def testExperimentalCalculateFeatureImportancesWithTreeWeights(self):
+  def testFeatureImportancesWithTreeWeights(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
@@ -758,7 +758,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.3, 0.2], importances)
 
-  def testExperimentalCalculateFeatureImportancesWithEmptyTree(self):
+  def testFeatureImportancesWithEmptyTree(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
@@ -809,7 +809,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.75, 0.25, 0.0], importances)
 
-  def testExperimentalCalculateFeatureImportancesWithAllEmptyTree(self):
+  def testFeatureImportancesWithAllEmptyTree(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
@@ -845,7 +845,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(AssertionError, 'empty or contains'):
       est.experimental_feature_importances(normalize=True)
 
-  def testExperimentalCalculateFeatureImportancesWithMoreTrees(self):
+  def testFeatureImportancesWithMoreTrees(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
@@ -1008,6 +1008,46 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances)
 
+  def testNegativeFeatureImportances(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: -5.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+
+    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
+      est.experimental_feature_importances(normalize=False)
+
+    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
+      est.experimental_feature_importances(normalize=True)
+
 
 class ModelFnTests(test_util.TensorFlowTestCase):
   """Tests bt_model_fn including unexposed internal functionalities."""
-- 
GitLab


From 88d722c13418fd177c3e03e954307fdfa86a474b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 21 Aug 2018 14:07:55 +0800
Subject: [PATCH 0041/1357] ENH: don't divide by the sum of tree weights

---
 .../python/estimator/canned/boosted_trees.py       |  3 +--
 .../python/estimator/canned/boosted_trees_test.py  | 14 +++++++-------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 85bc934a0e..2f5e46b559 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -981,8 +981,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
                       for tree in tree_ensemble.trees]
   tree_importances = np.array(tree_importances)
   tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
-  feature_importances = np.sum(tree_importances * tree_weights,
-                               axis=0) / np.sum(tree_weights)
+  feature_importances = np.sum(tree_importances * tree_weights, axis=0)
   assert np.all(feature_importances >= 0), ('feature_importances '
                                             'must be non-negative.')
   if normalize:
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 9362b927e2..54ad052915 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -556,7 +556,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
-        n_trees=1,
+        n_trees=2,
         max_depth=5)
 
     # It will stop after 5 steps because of the max depth and num trees.
@@ -568,11 +568,11 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
 
     feature_names, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.2669208, 0.21333334, 0.0], importances)
+    self.assertAllClose([0.833933, 0.606342, 0.0], importances)
 
     feature_names, importances = est.experimental_feature_importances(normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.55579074, 0.44420926, 0.0], importances)
+    self.assertAllClose([0.579010, 0.420990, 0.0], importances)
 
   def testFeatureImportancesOnEmtpyEnsemble(self):
     input_fn = _make_train_input_fn(is_classification=True)
@@ -696,7 +696,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
     feature_names, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([2.5, 1.5, 1.0], importances)
+    self.assertAllClose([5.0, 3.0, 2.0], importances)
 
     feature_names, importances = est.experimental_feature_importances(normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
@@ -803,7 +803,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized']
     feature_names, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([1.5, 0.5, 0.0], importances)
+    self.assertAllClose([3.0, 1.0, 0.0], importances)
 
     feature_names, importances = est.experimental_feature_importances(normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
@@ -934,7 +934,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
     feature_names, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([2, 1.2, 0.8], importances)
+    self.assertAllClose([10, 6.0, 4.0], importances)
 
     feature_names, importances = est.experimental_feature_importances(normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
@@ -1002,7 +1002,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
                               'categorical_indicator:bad']
     feature_names, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([2.5, 1.5, 1.0, 0.0], importances)
+    self.assertAllClose([5.0, 3.0, 2.0, 0.0], importances)
 
     feature_names, importances = est.experimental_feature_importances(normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
-- 
GitLab


From 73c8cbb413029cf3e540e99b883ae89f4b08fc11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 21 Aug 2018 14:18:27 +0800
Subject: [PATCH 0042/1357] TST: add test case for full tree with leaves

---
 .../estimator/canned/boosted_trees_test.py    | 111 ++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 54ad052915..13e1d224bc 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -845,6 +845,117 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(AssertionError, 'empty or contains'):
       est.experimental_feature_importances(normalize=True)
 
+  def testFeatureImportancesWithFullTrees(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 3.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 3.34
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.88
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.88
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -2.88
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([5.0, 3.0, 2.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2], importances)
+
   def testFeatureImportancesWithMoreTrees(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
-- 
GitLab


From 4979d7314dd1f1788751781b2dfbfb9e47c8e20e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Wed, 22 Aug 2018 11:34:50 +0800
Subject: [PATCH 0043/1357] CLN: revise codes

---
 .../python/estimator/canned/boosted_trees.py  |  18 +-
 .../estimator/canned/boosted_trees_test.py    | 338 +++++-------------
 2 files changed, 101 insertions(+), 255 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 2f5e46b559..b1d5d60fb0 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -957,7 +957,7 @@ def _compute_feature_importances_per_tree(tree, num_features):
 
 
 def _compute_feature_importances(tree_ensemble, num_features, normalize):
-  """Compute the feature importances.
+  """Computes gain-based feature importances.
 
   The higher the value, the more important the feature.
 
@@ -986,7 +986,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
                                             'must be non-negative.')
   if normalize:
     normalizer = np.sum(feature_importances)
-    assert normalizer > 0, 'Trees are all empty or contains only a root node.'
+    assert normalizer > 0, 'Trees are all empty or contain only a root node.'
     feature_importances /= normalizer
 
   sorted_feature_idx = np.argsort(feature_importances)[::-1]
@@ -1000,8 +1000,11 @@ class _BoostedTrees(estimator.Estimator):
     super(_BoostedTrees, self).__init__(
         model_fn=model_fn, model_dir=model_dir, config=config)
 
-    self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
+    self._sorted_feature_columns = sorted(feature_columns,
+                                          key=lambda tc: tc.name)
     self._num_features = _calculate_num_features(self._sorted_feature_columns)
+    self._names_for_feature_id = np.array(
+        _generate_feature_name_mapping(self._sorted_feature_columns))
 
   def experimental_feature_importances(self, normalize=False):
     """Computes gain-based feature importances.
@@ -1017,21 +1020,20 @@ class _BoostedTrees(estimator.Estimator):
       feature_importances: 1-D array of the corresponding feature importance.
 
     Raises:
-      ValueError: Empty ensemble.
+      ValueError: When attempting to normalize on an empty ensemble
+        or an ensemble of trees which have no splits.
     """
     reader = checkpoint_utils.load_checkpoint(self._model_dir)
     serialized = reader.get_tensor('boosted_trees:0_serialized')
     if not serialized:
       raise ValueError('Found empty serialized string for TreeEnsemble.'
-                       'You should only call the method after training.')
+                       'You should only call this method after training.')
     ensemble_proto = boosted_trees_pb2.TreeEnsemble()
     ensemble_proto.ParseFromString(serialized)
 
-    names_for_feature_id = np.array(
-        _generate_feature_name_mapping(self._sorted_feature_columns))
     sorted_feature_id, importances = _compute_feature_importances(
         ensemble_proto, self._num_features, normalize)
-    return names_for_feature_id[sorted_feature_id], importances
+    return self._names_for_feature_id[sorted_feature_id], importances
 
 
 @estimator_export('estimator.BoostedTreesClassifier')
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 13e1d224bc..24d3a3501e 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -574,7 +574,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.579010, 0.420990, 0.0], importances)
 
-  def testFeatureImportancesOnEmtpyEnsemble(self):
+  def testFeatureImportancesOnEmptyEnsemble(self):
     input_fn = _make_train_input_fn(is_classification=True)
 
     est = boosted_trees.BoostedTreesClassifier(
@@ -616,7 +616,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
           save_path = os.path.join(est.model_dir, 'model.ckpt')
           saver.save(sess, save_path)
 
-  def testFeatureImportances(self):
+  def testFeatureImportancesOnNonEmptyEnsemble(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
@@ -656,130 +656,60 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
             }
           }
           nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 7
-              right_id: 8
-            }
-            metadata {
-              gain: 1.0
+            leaf {
+              scalar: -0.34
             }
           }
-        }
-        trees {
           nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.0
+            leaf {
+              scalar: 1.34
             }
           }
           nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 1.0
+            leaf {
+              scalar: 0.0
             }
           }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
-
-    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([5.0, 3.0, 2.0], importances)
-
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.5, 0.3, 0.2], importances)
-
-  def testFeatureImportancesWithTreeWeights(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        max_depth=5)
-
-    tree_ensemble_text = """
-        trees {
           nodes {
             bucketized_split {
               feature_id: 0
-              left_id: 1
-              right_id: 2
+              left_id: 7
+              right_id: 8
             }
             metadata {
-              gain: 12.5
+              gain: 1.0
             }
           }
           nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 3
-              right_id: 4
+            leaf {
+              scalar: 3.34
             }
-            metadata {
-              gain: 5.0
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
             }
           }
         }
         trees {
           nodes {
             bucketized_split {
-              feature_id: 2
+              feature_id: 0
               left_id: 1
               right_id: 2
             }
             metadata {
-              gain: 5.0
+              gain: 1.0
             }
           }
-        }
-        tree_weights: 0.4
-        tree_weights: 0.6
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
-
-    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([5.0, 3.0, 2.0], importances)
-
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.5, 0.3, 0.2], importances)
-
-  def testFeatureImportancesWithEmptyTree(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        max_depth=5)
-
-    tree_ensemble_text = """
-        trees {
           nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 3.0
+            leaf {
+              scalar: 3.34
             }
           }
           nodes {
             bucketized_split {
-              feature_id: 0
+              feature_id: 2
               left_id: 3
               right_id: 4
             }
@@ -787,47 +717,14 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
               gain: 1.0
             }
           }
-        }
-        trees {
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
-
-    feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([3.0, 1.0, 0.0], importances)
-
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.75, 0.25, 0.0], importances)
-
-  def testFeatureImportancesWithAllEmptyTree(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        max_depth=5)
-
-    tree_ensemble_text = """
-        trees {
           nodes {
             leaf {
-              scalar: 0.0
+              scalar: 3.34
             }
           }
-        }
-        trees {
           nodes {
             leaf {
-              scalar: 0.0
+              scalar: 1.34
             }
           }
         }
@@ -836,52 +733,42 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         """
     self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
 
-    # Reverse order because feature importances are sorted by np.argsort(f)[::-1]
-    feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized']
+    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
     feature_names, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.0, 0.0, 0.0], importances)
+    self.assertAllClose([5.0, 3.0, 2.0], importances)
 
-    with self.assertRaisesRegexp(AssertionError, 'empty or contains'):
-      est.experimental_feature_importances(normalize=True)
+    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2], importances)
 
-  def testFeatureImportancesWithFullTrees(self):
+  def testFeatureImportancesWithTreeWeights(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
-        n_trees=2,
+        n_trees=3,
         max_depth=5)
 
     tree_ensemble_text = """
         trees {
           nodes {
             bucketized_split {
-              feature_id: 2
+              feature_id: 0
               left_id: 1
               right_id: 2
             }
             metadata {
-              gain: 2.0
+              gain: 12.5
             }
           }
           nodes {
             bucketized_split {
-              feature_id: 0
+              feature_id: 1
               left_id: 3
               right_id: 4
             }
             metadata {
-              gain: 3.0
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 2.0
+              gain: 5.0
             }
           }
           nodes {
@@ -899,50 +786,38 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
               scalar: 0.0
             }
           }
-          nodes {
-            leaf {
-              scalar: 3.34
-            }
-          }
         }
         trees {
           nodes {
             bucketized_split {
-              feature_id: 0
+              feature_id: 2
               left_id: 1
               right_id: 2
             }
             metadata {
-              gain: 2.0
+              gain: 5.0
             }
           }
           nodes {
             leaf {
-              scalar: -0.88
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 1.0
+              scalar: -0.34
             }
           }
           nodes {
             leaf {
-              scalar: 1.88
+              scalar: 1.34
             }
           }
+        }
+        trees {
           nodes {
             leaf {
-              scalar: -2.88
+              scalar: 0.0
             }
           }
         }
-        tree_weights: 1.0
+        tree_weights: 0.4
+        tree_weights: 0.6
         tree_weights: 1.0
         """
     self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
@@ -956,100 +831,42 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.3, 0.2], importances)
 
-  def testFeatureImportancesWithMoreTrees(self):
+  def testFeatureImportancesWithAllEmptyTree(self):
     est = boosted_trees.BoostedTreesClassifier(
         feature_columns=self._feature_columns,
         n_batches_per_layer=1,
-        n_trees=5,
+        n_trees=2,
         max_depth=5)
 
     tree_ensemble_text = """
         trees {
           nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 4.0
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 3.0
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 2.0
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.0
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 8.0
+            leaf {
+              scalar: 0.0
             }
           }
         }
         trees {
           nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 2.0
+            leaf {
+              scalar: 0.0
             }
           }
         }
         tree_weights: 1.0
         tree_weights: 1.0
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_weights: 1.0
         """
     self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
 
-    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
+    # Reverse order because feature importances are sorted by np.argsort(f)[::-1]
+    feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized']
     feature_names, importances = est.experimental_feature_importances(normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([10, 6.0, 4.0], importances)
+    self.assertAllClose([0.0, 0.0, 0.0], importances)
 
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.5, 0.3, 0.2], importances)
+    with self.assertRaisesRegexp(AssertionError,
+                                 'all empty or contain only a root node'):
+      est.experimental_feature_importances(normalize=True)
 
   def TestFeatureImportancesNamesForCategoricalColumn(self):
     categorical = feature_column.categorical_column_with_vocabulary_list(
@@ -1089,6 +906,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
               gain: 2.0
             }
           }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
         }
         trees {
           nodes {
@@ -1101,6 +933,16 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
               gain: 3.0
             }
           }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
         }
         tree_weights: 1.0
         tree_weights: 1.0
@@ -1126,6 +968,8 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         n_trees=1,
         max_depth=5)
 
+    # In order to generate a negative feature importances,
+    # We assign an invalid value -1 to tree_weights here.
     tree_ensemble_text = """
         trees {
           nodes {
@@ -1135,21 +979,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
               right_id: 2
             }
             metadata {
-              gain: -5.0
+              gain: 5.0
             }
           }
           nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 3
-              right_id: 4
+            leaf {
+              scalar: -0.34
             }
-            metadata {
-              gain: 2.0
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
             }
           }
         }
-        tree_weights: 1.0
+        tree_weights: -1.0
         """
     self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
 
-- 
GitLab


From 56ea7fc45559f372315b2aedd0a2df15113f5f93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Wed, 22 Aug 2018 17:51:17 +0800
Subject: [PATCH 0044/1357] ENH: div_no_nan supports to treate negative as zero

---
 tensorflow/python/ops/math_ops.py      |  5 ++++-
 tensorflow/python/ops/math_ops_test.py | 13 +++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 67ea534639..a693b1ebac 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1039,13 +1039,14 @@ def div(x, y, name=None):
 
 
 @tf_export("div_no_nan")
-def div_no_nan(x, y, name=None):
+def div_no_nan(x, y, name=None, negative_to_zero=False):
   """Computes an unsafe divide which returns 0 if the y is zero.
 
   Args:
     x: A `Tensor`. Must be one of the following types: `float32`, `float64`.
     y: A `Tensor` whose dtype is compatible with `x`.
     name: A name for the operation (optional).
+    negative_to_zero: If `True`, negative is treated as zero in denominator.
   Returns:
     The element-wise value of the x divided by y.
   """
@@ -1058,6 +1059,8 @@ def div_no_nan(x, y, name=None):
     if x_dtype != y_dtype:
       raise TypeError("x and y must have the same dtype, got %r != %r" %
                       (x_dtype, y_dtype))
+    if negative_to_zero:
+      y = gen_math_ops.maximum(y, 0, name='negative_to_zero')
     return gen_math_ops.div_no_nan(x, y, name=name)
 
 
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 6bd41020c5..6e1e5f37c8 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -487,6 +487,19 @@ class DivNoNanTest(test_util.TensorFlowTestCase):
         tf_result = math_ops.div_no_nan(nums, divs).eval()
         self.assertAllEqual(tf_result, np_result)
 
+  def testNegativeToZero(self):
+    for dtype in [np.float32, np.float64]:
+      nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1)
+      divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24)
+
+      np_result = np.true_divide(nums, divs)
+      np_result[:, divs[0] <= 0] = 0
+
+      with self.cached_session():
+        tf_result = math_ops.div_no_nan(nums, divs,
+                                        negative_to_zero=True).eval()
+        self.assertAllEqual(tf_result, np_result)
+
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From c05bb4efcaf53d4cbc315ef6d12de822f2557a13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Wed, 22 Aug 2018 18:13:37 +0800
Subject: [PATCH 0045/1357] CLN: replace safe_div method by div_no_nan

---
 .../contrib/losses/python/losses/loss_ops.py  | 40 ++++---------
 .../contrib/metrics/python/ops/metric_ops.py  | 46 ++++++---------
 tensorflow/contrib/rate/rate.py               | 11 +---
 .../python/keras/engine/training_utils.py     |  3 +-
 tensorflow/python/keras/metrics.py            | 19 +------
 tensorflow/python/kernel_tests/losses_test.py | 14 -----
 tensorflow/python/ops/losses/losses_impl.py   | 40 ++++---------
 tensorflow/python/ops/metrics_impl.py         | 57 ++++++++-----------
 8 files changed, 67 insertions(+), 163 deletions(-)

diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 651de4e2f4..29f7953c3b 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -66,32 +66,6 @@ def _scale_losses(losses, weights):
   return math_ops.reduce_sum(reduced_losses)
 
 
-def _safe_div(numerator, denominator, name="value"):
-  """Computes a safe divide which returns 0 if the denominator is zero.
-
-  Note that the function contains an additional conditional check that is
-  necessary for avoiding situations where the loss is zero causing NaNs to
-  creep into the gradient computation.
-
-  Args:
-    numerator: An arbitrary `Tensor`.
-    denominator: A `Tensor` whose shape matches `numerator` and whose values are
-      assumed to be non-negative.
-    name: An optional name for the returned op.
-
-  Returns:
-    The element-wise value of the numerator divided by the denominator.
-  """
-  return array_ops.where(
-      math_ops.greater(denominator, 0),
-      math_ops.div(numerator,
-                   array_ops.where(
-                       math_ops.equal(denominator, 0),
-                       array_ops.ones_like(denominator), denominator)),
-      array_ops.zeros_like(numerator),
-      name=name)
-
-
 def _safe_mean(losses, num_present):
   """Computes a safe mean of the losses.
 
@@ -104,7 +78,8 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return _safe_div(total_loss, num_present)
+  return math_ops.div_no_nan(total_loss, num_present,
+                             negative_to_zero=True, name="value")
 
 
 @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.")
@@ -609,11 +584,16 @@ def mean_pairwise_squared_error(predictions,
         math_ops.square(diffs), reduction_indices=reduction_indices)
     num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-    term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch)
+    term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch,
+                                      num_present_per_batch,
+                                      negative_to_zero=True,
+                                      name="value")
 
     sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
-    term2 = 2.0 * _safe_div(
-        math_ops.square(sum_diff), math_ops.square(num_present_per_batch))
+    term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff),
+                                      math_ops.square(num_present_per_batch),
+                                      negative_to_zero=True,
+                                      name="value")
 
     loss = _scale_losses(term1 - term2, weights)
 
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index a328670526..d972e7da53 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -45,24 +45,6 @@ from tensorflow.python.util.deprecation import deprecated
 _EPSILON = 1e-7
 
 
-def _safe_div(numerator, denominator, name):
-  """Divides two values, returning 0 if the denominator is <= 0.
-
-  Args:
-    numerator: A real `Tensor`.
-    denominator: A real `Tensor`, with dtype matching `numerator`.
-    name: Name for the returned op.
-
-  Returns:
-    0 if `denominator` <= 0, else `numerator` / `denominator`
-  """
-  return array_ops.where(
-      math_ops.greater(denominator, 0),
-      math_ops.truediv(numerator, denominator),
-      0,
-      name=name)
-
-
 @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_true_positives(predictions,
@@ -3205,22 +3187,28 @@ def streaming_covariance(predictions,
 
     # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount)
     # batch_mean_prediction is E[x_B] in the update equation
-    batch_mean_prediction = _safe_div(
+    batch_mean_prediction = math_ops.div_no_nan(
         math_ops.reduce_sum(weighted_predictions), batch_count,
-        'batch_mean_prediction')
-    delta_mean_prediction = _safe_div(
+        negative_to_zero=True,
+        name='batch_mean_prediction')
+    delta_mean_prediction = math_ops.div_no_nan(
         (batch_mean_prediction - mean_prediction) * batch_count, update_count,
-        'delta_mean_prediction')
+        negative_to_zero=True,
+        name='delta_mean_prediction')
     update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                   delta_mean_prediction)
     # prev_mean_prediction is E[x_A] in the update equation
     prev_mean_prediction = update_mean_prediction - delta_mean_prediction
 
     # batch_mean_label is E[y_B] in the update equation
-    batch_mean_label = _safe_div(
-        math_ops.reduce_sum(weighted_labels), batch_count, 'batch_mean_label')
-    delta_mean_label = _safe_div((batch_mean_label - mean_label) * batch_count,
-                                 update_count, 'delta_mean_label')
+    batch_mean_label = math_ops.div_no_nan(
+        math_ops.reduce_sum(weighted_labels), batch_count,
+        negative_to_zero=True,
+        name='batch_mean_label')
+    delta_mean_label = math_ops.div_no_nan(
+        (batch_mean_label - mean_label) * batch_count, update_count,
+        negative_to_zero=True,
+        name='delta_mean_label')
     update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
     # prev_mean_label is E[y_A] in the update equation
     prev_mean_label = update_mean_label - delta_mean_label
@@ -3882,8 +3870,10 @@ def cohen_kappa(labels,
       po_sum = math_ops.reduce_sum(po)
       total = math_ops.reduce_sum(pe_row)
       pe_sum = math_ops.reduce_sum(
-          metrics_impl._safe_div(  # pylint: disable=protected-access
-              pe_row * pe_col, total, None))
+          math_ops.div_no_nan(
+              pe_row * pe_col, total,
+              negative_to_zero=True,
+              name=None))
       po_sum, pe_sum, total = (math_ops.to_double(po_sum),
                                math_ops.to_double(pe_sum),
                                math_ops.to_double(total))
diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py
index 24d586479a..68f5a6e58a 100644
--- a/tensorflow/contrib/rate/rate.py
+++ b/tensorflow/contrib/rate/rate.py
@@ -108,13 +108,6 @@ class Rate(object):
   def variables(self):
     return self._vars
 
-  def _safe_div(self, numerator, denominator, name):
-    t = math_ops.truediv(numerator, denominator)
-    zero = array_ops.zeros_like(t, dtype=denominator.dtype)
-    condition = math_ops.greater(denominator, zero)
-    zero = math_ops.cast(zero, t.dtype)
-    return array_ops.where(condition, t, zero, name=name)
-
   def _add_variable(self, name, shape=None, dtype=None):
     """Private method for adding variables to the graph."""
     if self._built:
@@ -148,4 +141,6 @@ class Rate(object):
     state_ops.assign(self.prev_values, values)
     state_ops.assign(self.prev_denominator, denominator)
 
-    return self._safe_div(self.numer, self.denom, name="safe_rate")
+    return math_ops.div_no_nan(self.numer, self.denom,
+                               negative_to_zero=True,
+                               name="safe_rate")
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index f94697c913..12ea75c5ea 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -607,7 +607,8 @@ def weighted_masked_objective(fn):
       score_array = math_ops.multiply(score_array, weights)
       score_array = math_ops.reduce_sum(score_array)
       weights = math_ops.reduce_sum(weights)
-      score_array = metrics_module.safe_div(score_array, weights)
+      score_array = math_ops.div_no_nan(score_array, weights,
+                                        negative_to_zero=True)
     return K.mean(score_array)
 
   return weighted
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 0983d62c59..6f4353f96a 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -136,23 +136,6 @@ def result_wrapper(result_fn):
   return tf_decorator.make_decorator(result_fn, decorated)
 
 
-def safe_div(numerator, denominator):
-  """Divides two tensors element-wise, returning 0 if the denominator is <= 0.
-
-  Args:
-    numerator: A `Tensor`.
-    denominator: A `Tensor`, with dtype matching `numerator`.
-
-  Returns:
-    0 if `denominator` <= 0, else `numerator` / `denominator`
-  """
-  t = math_ops.truediv(numerator, denominator)
-  zero = array_ops.zeros_like(t, dtype=denominator.dtype)
-  condition = math_ops.greater(denominator, zero)
-  zero = math_ops.cast(zero, t.dtype)
-  return array_ops.where(condition, t, zero)
-
-
 def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
   """Squeeze or expand last dimension if needed.
 
@@ -472,7 +455,7 @@ class Mean(Metric):
     state_ops.assign_add(self.count, num_values)
 
   def result(self):
-    return safe_div(self.total, self.count)
+    return math_ops.div_no_nan(self.total, self.count, negative_to_zero=True)
 
 
 class MeanMetricWrapper(Mean):
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index 87fc715783..c45b5035de 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -34,25 +34,11 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.losses import losses
-from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.ops.losses import util
 from tensorflow.python.platform import test
 from tensorflow.python.training import momentum as momentum_lib
 
 
-safe_div = losses_impl._safe_div  # pylint: disable=protected-access
-
-
-class SafeDivTest(test.TestCase):
-
-  def testEager(self):
-    with context.eager_mode():
-      self.assertAllEqual(safe_div(constant_op.constant(1.0),
-                                   constant_op.constant(0.0)), 0.0)
-      self.assertAllEqual(safe_div(constant_op.constant(1.0),
-                                   0.0), 0.0)
-
-
 class AbsoluteDifferenceLossTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 806539747e..1e65aac115 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -74,31 +74,6 @@ class Reduction(object):
       raise ValueError("Invalid ReductionKey %s." % key)
 
 
-def _safe_div(numerator, denominator, name="value"):
-  """Computes a safe divide which returns 0 if the denominator is zero.
-
-  Note that the function contains an additional conditional check that is
-  necessary for avoiding situations where the loss is zero causing NaNs to
-  creep into the gradient computation.
-
-  Args:
-    numerator: An arbitrary `Tensor`.
-    denominator: `Tensor` whose shape matches `numerator` and whose values are
-      assumed to be non-negative.
-    name: An optional name for the returned op.
-
-  Returns:
-    The element-wise value of the numerator divided by the denominator.
-  """
-  return array_ops.where(
-      math_ops.greater(denominator, 0),
-      math_ops.div(numerator, array_ops.where(
-          math_ops.equal(denominator, 0),
-          array_ops.ones_like(denominator), denominator)),
-      array_ops.zeros_like(numerator),
-      name=name)
-
-
 def _safe_mean(losses, num_present):
   """Computes a safe mean of the losses.
 
@@ -111,7 +86,8 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return _safe_div(total_loss, num_present)
+  return math_ops.div_no_nan(total_loss, num_present,
+                             negative_to_zero=True, name="value")
 
 
 def _num_present(losses, weights, per_batch=False):
@@ -599,14 +575,18 @@ def mean_pairwise_squared_error(
           keepdims=True)
       num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-      term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
-                              num_present_per_batch - 1)
+      term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch,
+                                        num_present_per_batch - 1,
+                                        negative_to_zero=True,
+                                        name="value")
 
       sum_diff = math_ops.reduce_sum(
           diffs, reduction_indices=reduction_indices, keepdims=True)
-      term2 = 2.0 * _safe_div(
+      term2 = 2.0 * math_ops.div_no_nan(
           math_ops.square(sum_diff),
-          math_ops.multiply(num_present_per_batch, num_present_per_batch - 1))
+          math_ops.multiply(num_present_per_batch, num_present_per_batch - 1),
+          negative_to_zero=True,
+          name="value")
 
       weighted_losses = math_ops.multiply(term1 - term2, weights)
       loss = math_ops.reduce_sum(weighted_losses)
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index 763877c2d2..32f8fd3ed7 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -213,24 +213,6 @@ def _maybe_expand_labels(labels, predictions):
         lambda: array_ops.expand_dims(labels, -1, name=scope), lambda: labels)
 
 
-def _safe_div(numerator, denominator, name):
-  """Divides two tensors element-wise, returning 0 if the denominator is <= 0.
-
-  Args:
-    numerator: A real `Tensor`.
-    denominator: A real `Tensor`, with dtype matching `numerator`.
-    name: Name for the returned op.
-
-  Returns:
-    0 if `denominator` <= 0, else `numerator` / `denominator`
-  """
-  t = math_ops.truediv(numerator, denominator)
-  zero = array_ops.zeros_like(t, dtype=denominator.dtype)
-  condition = math_ops.greater(denominator, zero)
-  zero = math_ops.cast(zero, t.dtype)
-  return array_ops.where(condition, t, zero, name=name)
-
-
 def _safe_scalar_div(numerator, denominator, name):
   """Divides two values, returning 0 if the denominator is 0.
 
@@ -244,13 +226,7 @@ def _safe_scalar_div(numerator, denominator, name):
   """
   numerator.get_shape().with_rank_at_most(1)
   denominator.get_shape().with_rank_at_most(1)
-  return control_flow_ops.cond(
-      math_ops.equal(
-          array_ops.constant(0.0, dtype=dtypes.float64), denominator),
-      lambda: array_ops.constant(0.0, dtype=dtypes.float64),
-      lambda: math_ops.div(numerator, denominator),
-      name=name)
-
+  return math_ops.div_no_nan(numerator, denominator, name=name)
 
 def _streaming_confusion_matrix(labels, predictions, num_classes, weights=None):
   """Calculate a streaming confusion matrix.
@@ -402,11 +378,13 @@ def mean(values,
     with ops.control_dependencies([values]):
       update_count_op = state_ops.assign_add(count, num_values)
 
-    compute_mean = lambda _, t, c: _safe_div(t, c, 'value')
+    compute_mean = lambda _, t, c: math_ops.div_no_nan(
+        t, c, negative_to_zero=True, name='value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
-    update_op = _safe_div(update_total_op, update_count_op, 'update_op')
+    update_op = math_ops.div_no_nan(update_total_op, update_count_op,
+                                    negative_to_zero=True, name='update_op')
 
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
@@ -778,16 +756,21 @@ def auc(labels,
       """
       dtp = tp[:num_thresholds - 1] - tp[1:]
       p = tp + fp
-      prec_slope = _safe_div(dtp, p[:num_thresholds - 1] - p[1:], 'prec_slope')
+      prec_slope = math_ops.div_no_nan(dtp, p[:num_thresholds - 1] - p[1:],
+                                       negative_to_zero=True,
+                                       name='prec_slope')
       intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:])
       safe_p_ratio = array_ops.where(
           math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0),
-          _safe_div(p[:num_thresholds - 1], p[1:], 'recall_relative_ratio'),
+          math_ops.div_no_nan(p[:num_thresholds - 1], p[1:],
+                              negative_to_zero=True,
+                              name='recall_relative_ratio'),
           array_ops.ones_like(p[1:]))
       return math_ops.reduce_sum(
-          _safe_div(
+          math_ops.div_no_nan(
               prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)),
               tp[1:] + fn[1:],
+              negative_to_zero=True,
               name='pr_auc_increment'),
           name='interpolate_pr_auc')
 
@@ -1068,7 +1051,8 @@ def mean_per_class_accuracy(labels,
     update_count_op = state_ops.scatter_add(count, labels, is_correct)
 
     def compute_mean_accuracy(_, count, total):
-      per_class_accuracy = _safe_div(count, total, None)
+      per_class_accuracy = math_ops.div_no_nan(
+          count, total, negative_to_zero=True, name=None)
       mean_accuracy_v = math_ops.reduce_mean(
           per_class_accuracy, name='mean_accuracy')
       return mean_accuracy_v
@@ -1076,7 +1060,9 @@ def mean_per_class_accuracy(labels,
     mean_accuracy_v = _aggregate_across_towers(
         metrics_collections, compute_mean_accuracy, count, total)
 
-    update_op = _safe_div(update_count_op, update_total_op, name='update_op')
+    update_op = math_ops.div_no_nan(update_count_op, update_total_op,
+                                    negative_to_zero=True,
+                                    name='update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
 
@@ -1385,12 +1371,15 @@ def mean_tensor(values,
     with ops.control_dependencies([values]):
       update_count_op = state_ops.assign_add(count, num_values)
 
-    compute_mean = lambda _, t, c: _safe_div(t, c, 'value')
+    compute_mean = lambda _, t, c: math_ops.div_no_nan(
+        t, c, negative_to_zero=True, name='value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
 
-    update_op = _safe_div(update_total_op, update_count_op, 'update_op')
+    update_op = math_ops.div_no_nan(update_total_op, update_count_op,
+                                    negative_to_zero=True,
+                                    name='update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
 
-- 
GitLab


From a6b016dc0a33f50f20fd1e8e3b9716ddbec75e57 Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Thu, 23 Aug 2018 11:14:25 +0900
Subject: [PATCH 0046/1357] comments regarding why rates are disabled

---
 tensorflow/core/kernels/extract_volume_patches_op.cc | 4 ++++
 tensorflow/core/ops/array_ops.cc                     | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/extract_volume_patches_op.cc
index 80405c66dc..0f1d566c75 100644
--- a/tensorflow/core/kernels/extract_volume_patches_op.cc
+++ b/tensorflow/core/kernels/extract_volume_patches_op.cc
@@ -87,6 +87,10 @@ class ExtractVolumePatchesOp : public UnaryOp<T> {
     const int stride_cols = strides_[3];
 
     /*
+    // TODO(hsgkim): enable rates
+    // Rates are disabled as of now due to Eigen's definitions of extract_volume_patch
+    // functions; none of them accept rates as its argument and rates are fixed to
+    // (1, 1, 1, 1, 1). A workaround has to be found for this.
     // In order to enable rates, uncomment the following lines and use
     // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize
     // calls.
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 48d8327a9e..6c8369200a 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2583,6 +2583,9 @@ REGISTER_OP("ExtractVolumePatches")
       }
 
       /*
+      // TODO(hsgkim): Enable rates.
+      // See extract_volume_patches_op.cc for why rates are disabled now.
+
       std::vector<int32> rates;
       TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates));
       if (rates.size() != 5) {
-- 
GitLab


From 52d3e5a3a7bece06da072dcfb3f4ac53e83f8470 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Wed, 22 Aug 2018 23:34:34 -0700
Subject: [PATCH 0047/1357] Added the BUILD files for tbb and updated the
 ngraph.BUILD with CPU library (DEX).

---
 WORKSPACE                          |   7 ++
 tensorflow/workspace.bzl           |  29 +++++---
 third_party/ngraph/ngraph.BUILD    | 109 ++++++++++++++++++++++++++++-
 third_party/ngraph/ngraph_tf.BUILD |  11 +--
 third_party/ngraph/tbb.BUILD       |  52 ++++++++++++++
 5 files changed, 188 insertions(+), 20 deletions(-)
 create mode 100644 third_party/ngraph/tbb.BUILD

diff --git a/WORKSPACE b/WORKSPACE
index 17961829a6..4af1a1e75f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -79,3 +79,10 @@ new_http_archive(
         "http://download.tensorflow.org/models/speech_commands_v0.01.zip",
     ],
 )
+
+new_local_repository(
+    name = "ngraph",
+    path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph",
+    build_file = "//third_party/ngraph:ngraph.BUILD",
+)
+
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 5d90d0fe64..951cb8a89d 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -833,15 +833,26 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     )
 
     tf_http_archive(
-        name = "ngraph",
-        urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz",
-            "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz",
-        ],
-        sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c",
-        strip_prefix = "ngraph-0.5.0",
-        build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
-    )
+        name = "tbb",
+        urls = [
+            "https://mirror.bazel.build/github.com/01org/tbb/archive/tbb_2018.zip",
+            "https://github.com/01org/tbb/archive/tbb_2018.zip",
+        ],
+        sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13",
+        strip_prefix = "tbb-tbb_2018",
+        build_file = clean_dep("//third_party/ngraph:tbb.BUILD"),
+    )
+
+    # tf_http_archive(
+    #     name = "ngraph",
+    #     urls = [
+    #         "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz",
+    #         "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz",
+    #     ],
+    #     sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c",
+    #     strip_prefix = "ngraph-0.5.0",
+    #     build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
+    # )
 
     tf_http_archive(
         name = "nlohmann_json_lib",
diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index 31aa3cee51..f1cf8acbf6 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -2,6 +2,112 @@ licenses(["notice"])  # 3-Clause BSD
 
 exports_files(["LICENSE"])
 
+cc_library(
+    name = "ngraph_headers",
+    hdrs = glob(["src/ngraph/**/*.hpp"]) ,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "ngraph_cpu_backend",
+    srcs = [
+        "src/ngraph/runtime/cpu/cpu_backend.cpp",
+        "src/ngraph/runtime/cpu/cpu_builder.cpp",
+        "src/ngraph/runtime/cpu/cpu_call_frame.cpp",
+        "src/ngraph/runtime/cpu/cpu_external_function.cpp",
+        "src/ngraph/runtime/cpu/cpu_kernels.cpp",
+        "src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp",
+        "src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp",
+        "src/ngraph/runtime/cpu/cpu_tensor_view.cpp",
+        "src/ngraph/runtime/cpu/cpu_tracing.cpp",
+        "src/ngraph/runtime/cpu/builder/add.cpp",
+        "src/ngraph/runtime/cpu/builder/allreduce.cpp",
+        "src/ngraph/runtime/cpu/builder/avg_pool.cpp",
+        "src/ngraph/runtime/cpu/builder/argmin.cpp",
+        "src/ngraph/runtime/cpu/builder/argmax.cpp",
+        "src/ngraph/runtime/cpu/builder/batch_norm.cpp",
+        "src/ngraph/runtime/cpu/builder/broadcast.cpp",
+        "src/ngraph/runtime/cpu/builder/bounded_relu.cpp",
+        "src/ngraph/runtime/cpu/builder/concat.cpp",
+        "src/ngraph/runtime/cpu/builder/convert.cpp",
+        "src/ngraph/runtime/cpu/builder/convert_layout.cpp",
+        "src/ngraph/runtime/cpu/builder/convolution.cpp",
+        "src/ngraph/runtime/cpu/builder/dot.cpp",
+        "src/ngraph/runtime/cpu/builder/function_call.cpp",
+        "src/ngraph/runtime/cpu/builder/lstm.cpp",
+        "src/ngraph/runtime/cpu/builder/lrn.cpp",
+        "src/ngraph/runtime/cpu/builder/matmul_bias.cpp",
+        "src/ngraph/runtime/cpu/builder/max.cpp",
+        "src/ngraph/runtime/cpu/builder/max_pool.cpp",
+        "src/ngraph/runtime/cpu/builder/min.cpp",
+        "src/ngraph/runtime/cpu/builder/one_hot.cpp",
+        "src/ngraph/runtime/cpu/builder/relu.cpp",
+        "src/ngraph/runtime/cpu/builder/pad.cpp",
+        "src/ngraph/runtime/cpu/builder/product.cpp",
+        "src/ngraph/runtime/cpu/builder/reduce_function.cpp",
+        "src/ngraph/runtime/cpu/builder/reduce_function_window.cpp",
+        "src/ngraph/runtime/cpu/builder/replace_slice.cpp",
+        "src/ngraph/runtime/cpu/builder/reshape.cpp",
+        "src/ngraph/runtime/cpu/builder/reverse.cpp",
+        "src/ngraph/runtime/cpu/builder/reverse_sequence.cpp",
+        "src/ngraph/runtime/cpu/builder/rnn.cpp",
+        "src/ngraph/runtime/cpu/builder/select.cpp",
+        "src/ngraph/runtime/cpu/builder/select_and_scatter.cpp",
+        "src/ngraph/runtime/cpu/builder/sigmoid.cpp",
+        "src/ngraph/runtime/cpu/builder/slice.cpp",
+        "src/ngraph/runtime/cpu/builder/softmax.cpp",
+        "src/ngraph/runtime/cpu/builder/sum.cpp",
+        "src/ngraph/runtime/cpu/kernel/eigen_thread_pool.cpp",
+        "src/ngraph/runtime/cpu/kernel/pad.cpp",
+        "src/ngraph/runtime/cpu/kernel/reduce_max.cpp",
+        "src/ngraph/runtime/cpu/kernel/reduce_sum.cpp",
+        "src/ngraph/runtime/cpu/kernel/reshape.cpp",
+        "src/ngraph/runtime/cpu/mkldnn_emitter.cpp",
+        "src/ngraph/runtime/cpu/mkldnn_invoke.cpp",
+        "src/ngraph/runtime/cpu/mkldnn_utils.cpp",
+        "src/ngraph/runtime/cpu/op/batch_dot.cpp",
+        "src/ngraph/runtime/cpu/op/batch_norm_relu.cpp",
+        "src/ngraph/runtime/cpu/op/bounded_relu.cpp",
+        "src/ngraph/runtime/cpu/op/group_conv.cpp",
+        "src/ngraph/runtime/cpu/op/conv_bias.cpp",
+        "src/ngraph/runtime/cpu/op/conv_relu.cpp",
+        "src/ngraph/runtime/cpu/op/convert_layout.cpp",
+        "src/ngraph/runtime/cpu/op/loop_kernel.cpp",
+        "src/ngraph/runtime/cpu/op/lstm.cpp",
+        "src/ngraph/runtime/cpu/op/matmul_bias.cpp",
+        "src/ngraph/runtime/cpu/op/max_pool_with_indices.cpp",
+        "src/ngraph/runtime/cpu/op/rnn.cpp",
+        "src/ngraph/runtime/cpu/op/sigmoid_mul.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_assignment.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_collapse_dims.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_concat_inputs.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_layout.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_mat_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_workspace_insertion.cpp",
+    ],
+    hdrs = glob(["src/ngraph/runtime/cpu/**/*.hpp"]) + glob([]),
+    deps = [
+        ":ngraph_headers",
+        "@eigen_archive//:eigen",
+        "@nlohmann_json_lib",
+        "@tbb",
+        "@mkl_dnn//:mkl_dnn",
+    ],
+    copts = [
+        "-I external/ngraph/src",
+        "-I external/nlohmann_json_lib/include/",
+        '-D SHARED_LIB_EXT=\\".so\\"',
+        '-D NGRAPH_VERSION=\\"0.5.0\\"',
+        '-D NGRAPH_DEX_ONLY',
+    ],
+    visibility = ["//visibility:public"],
+    alwayslink = 1,
+)
+
 cc_library(
     name = "ngraph_core",
     srcs = glob([
@@ -21,8 +127,9 @@ cc_library(
         "src/ngraph/runtime/interpreter/*.cpp",
         "src/ngraph/runtime/interpreter/*.hpp",
     ]),
-    hdrs = glob(["src/ngraph/**/*.hpp"]),
     deps = [
+        ":ngraph_headers",
+        ":ngraph_cpu_backend",
         "@eigen_archive//:eigen",
         "@nlohmann_json_lib",
     ],
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index 4d96ccf2f2..0647d9926a 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -7,15 +7,6 @@ load(
     "tf_cc_test",
 )
 
-cc_library(
-    name = "ngraph_libs_linux",
-    srcs = [
-        "lib/libiomp5.so",
-        "lib/libmklml_intel.so",
-    ],
-    visibility = ["//visibility:public"],
-)
-
 cc_library(
     name = "ngraph_tf",
     srcs = [
@@ -58,7 +49,7 @@ cc_library(
         "-I external/ngraph_tf/src",
         "-I external/ngraph_tf/logging",
         "-I external/ngraph/src",
-        "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
+        #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
     ],
     alwayslink = 1,
     visibility = ["//visibility:public"],
diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD
new file mode 100644
index 0000000000..c3e7f7fd35
--- /dev/null
+++ b/third_party/ngraph/tbb.BUILD
@@ -0,0 +1,52 @@
+licenses(["notice"])  # 3-Clause BSD
+
+exports_files(["LICENSE"])
+
+genrule(
+  name = "build_tbb",
+  srcs = glob(["**"]) + [
+    "@local_config_cc//:toolchain",
+  ],
+  cmd = """
+         set -e
+         WORK_DIR=$$PWD
+         DEST_DIR=$$PWD/$(@D)
+         export PATH=$$(dirname $(AR)):$$PATH
+         export CXXFLAGS=$(CC_FLAGS)
+         export NM=$(NM)
+         export AR=$(AR)
+         cd $$(dirname $(location :Makefile))
+
+         #TBB's build needs some help to figure out what compiler it's using
+         if $$CXX --version | grep clang &> /dev/null; then 
+           COMPILER_OPT="compiler=clang"
+         else
+           COMPILER_OPT="compiler=gcc"
+         fi 
+
+         # uses extra_inc=big_iron.inc to specify that static libraries are
+         # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792
+         make tbb_build_prefix="build" \
+              extra_inc=big_iron.inc \
+              $$COMPILER_OPT; \
+
+         echo cp build/build_{release,debug}/*.a $$DEST_DIR
+         cp build/build_{release,debug}/*.a $$DEST_DIR
+         cd $$WORK_DIR
+  """,
+  outs = [
+    "libtbb.a",
+    "libtbbmalloc.a",
+  ] 
+)
+
+cc_library(
+    name = "tbb",
+    hdrs = glob([
+        "include/serial/**",
+        "include/tbb/**/**",
+        ]),
+    srcs = ["libtbb.a"],
+    includes = ["include"],
+    visibility = ["//visibility:public"],
+)
\ No newline at end of file
-- 
GitLab


From 38f811077dd52820eaa3d5c684f41142de01c7eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 23 Aug 2018 16:23:03 +0800
Subject: [PATCH 0048/1357] CLN: remove negative_to_zero argument

---
 .../contrib/losses/python/losses/loss_ops.py  |  9 +++--
 .../contrib/metrics/python/ops/metric_ops.py  | 20 +++++------
 tensorflow/contrib/rate/rate.py               |  4 +--
 .../python/keras/engine/training_utils.py     |  4 +--
 tensorflow/python/keras/metrics.py            |  2 +-
 tensorflow/python/ops/losses/losses_impl.py   | 18 +++++-----
 tensorflow/python/ops/math_ops.py             |  5 +--
 tensorflow/python/ops/math_ops_test.py        | 13 --------
 tensorflow/python/ops/metrics_impl.py         | 33 ++++++++++---------
 9 files changed, 47 insertions(+), 61 deletions(-)

diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 29f7953c3b..8a0932c376 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -78,8 +78,9 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return math_ops.div_no_nan(total_loss, num_present,
-                             negative_to_zero=True, name="value")
+  return math_ops.div_no_nan(total_loss,
+                             math_ops.maximum(num_present, 0),
+                             name="value")
 
 
 @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.")
@@ -585,14 +586,12 @@ def mean_pairwise_squared_error(predictions,
     num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
     term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch,
-                                      num_present_per_batch,
-                                      negative_to_zero=True,
+                                      math_ops.maximum(num_present_per_batch),
                                       name="value")
 
     sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
     term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff),
                                       math_ops.square(num_present_per_batch),
-                                      negative_to_zero=True,
                                       name="value")
 
     loss = _scale_losses(term1 - term2, weights)
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index d972e7da53..bfef0816aa 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -3188,12 +3188,12 @@ def streaming_covariance(predictions,
     # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount)
     # batch_mean_prediction is E[x_B] in the update equation
     batch_mean_prediction = math_ops.div_no_nan(
-        math_ops.reduce_sum(weighted_predictions), batch_count,
-        negative_to_zero=True,
+        math_ops.reduce_sum(weighted_predictions),
+        math_ops.maximum(batch_count, 0),
         name='batch_mean_prediction')
     delta_mean_prediction = math_ops.div_no_nan(
-        (batch_mean_prediction - mean_prediction) * batch_count, update_count,
-        negative_to_zero=True,
+        (batch_mean_prediction - mean_prediction) * batch_count,
+        math_ops.maximum(update_count, 0),
         name='delta_mean_prediction')
     update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                   delta_mean_prediction)
@@ -3202,12 +3202,12 @@ def streaming_covariance(predictions,
 
     # batch_mean_label is E[y_B] in the update equation
     batch_mean_label = math_ops.div_no_nan(
-        math_ops.reduce_sum(weighted_labels), batch_count,
-        negative_to_zero=True,
+        math_ops.reduce_sum(weighted_labels),
+        math_ops.maximum(batch_count, 0),
         name='batch_mean_label')
     delta_mean_label = math_ops.div_no_nan(
-        (batch_mean_label - mean_label) * batch_count, update_count,
-        negative_to_zero=True,
+        (batch_mean_label - mean_label) * batch_count,
+        math_ops.maximum(update_count, 0),
         name='delta_mean_label')
     update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
     # prev_mean_label is E[y_A] in the update equation
@@ -3871,8 +3871,8 @@ def cohen_kappa(labels,
       total = math_ops.reduce_sum(pe_row)
       pe_sum = math_ops.reduce_sum(
           math_ops.div_no_nan(
-              pe_row * pe_col, total,
-              negative_to_zero=True,
+              pe_row * pe_col,
+              math_ops.maximum(total, 0),
               name=None))
       po_sum, pe_sum, total = (math_ops.to_double(po_sum),
                                math_ops.to_double(pe_sum),
diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py
index 68f5a6e58a..489d5cce78 100644
--- a/tensorflow/contrib/rate/rate.py
+++ b/tensorflow/contrib/rate/rate.py
@@ -141,6 +141,6 @@ class Rate(object):
     state_ops.assign(self.prev_values, values)
     state_ops.assign(self.prev_denominator, denominator)
 
-    return math_ops.div_no_nan(self.numer, self.denom,
-                               negative_to_zero=True,
+    return math_ops.div_no_nan(self.numer,
+                               math_op.maximum(self.denom, 0),
                                name="safe_rate")
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 12ea75c5ea..eeca60dc57 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -607,8 +607,8 @@ def weighted_masked_objective(fn):
       score_array = math_ops.multiply(score_array, weights)
       score_array = math_ops.reduce_sum(score_array)
       weights = math_ops.reduce_sum(weights)
-      score_array = math_ops.div_no_nan(score_array, weights,
-                                        negative_to_zero=True)
+      score_array = math_ops.div_no_nan(score_array,
+                                        math_ops.maximum(weights, 0))
     return K.mean(score_array)
 
   return weighted
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 6f4353f96a..b5d3138da2 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -455,7 +455,7 @@ class Mean(Metric):
     state_ops.assign_add(self.count, num_values)
 
   def result(self):
-    return math_ops.div_no_nan(self.total, self.count, negative_to_zero=True)
+    return math_ops.div_no_nan(self.total, math_ops.maximum(self.count, 0))
 
 
 class MeanMetricWrapper(Mean):
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 1e65aac115..a980a43f62 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -86,8 +86,9 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return math_ops.div_no_nan(total_loss, num_present,
-                             negative_to_zero=True, name="value")
+  return math_ops.div_no_nan(total_loss,
+                             math_ops.maximum(num_present, 0),
+                             name="value")
 
 
 def _num_present(losses, weights, per_batch=False):
@@ -575,17 +576,18 @@ def mean_pairwise_squared_error(
           keepdims=True)
       num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-      term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch,
-                                        num_present_per_batch - 1,
-                                        negative_to_zero=True,
-                                        name="value")
+      term1 = 2.0 * math_ops.div_no_nan(
+          sum_squares_diff_per_batch,
+          math_ops.maximum(num_present_per_batch - 1, 0),
+          name="value")
 
       sum_diff = math_ops.reduce_sum(
           diffs, reduction_indices=reduction_indices, keepdims=True)
       term2 = 2.0 * math_ops.div_no_nan(
           math_ops.square(sum_diff),
-          math_ops.multiply(num_present_per_batch, num_present_per_batch - 1),
-          negative_to_zero=True,
+          math_ops.maximum(
+              math_ops.multiply(num_present_per_batch, num_present_per_batch - 1),
+              0),
           name="value")
 
       weighted_losses = math_ops.multiply(term1 - term2, weights)
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index a693b1ebac..67ea534639 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1039,14 +1039,13 @@ def div(x, y, name=None):
 
 
 @tf_export("div_no_nan")
-def div_no_nan(x, y, name=None, negative_to_zero=False):
+def div_no_nan(x, y, name=None):
   """Computes an unsafe divide which returns 0 if the y is zero.
 
   Args:
     x: A `Tensor`. Must be one of the following types: `float32`, `float64`.
     y: A `Tensor` whose dtype is compatible with `x`.
     name: A name for the operation (optional).
-    negative_to_zero: If `True`, negative is treated as zero in denominator.
   Returns:
     The element-wise value of the x divided by y.
   """
@@ -1059,8 +1058,6 @@ def div_no_nan(x, y, name=None, negative_to_zero=False):
     if x_dtype != y_dtype:
       raise TypeError("x and y must have the same dtype, got %r != %r" %
                       (x_dtype, y_dtype))
-    if negative_to_zero:
-      y = gen_math_ops.maximum(y, 0, name='negative_to_zero')
     return gen_math_ops.div_no_nan(x, y, name=name)
 
 
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 6e1e5f37c8..6bd41020c5 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -487,19 +487,6 @@ class DivNoNanTest(test_util.TensorFlowTestCase):
         tf_result = math_ops.div_no_nan(nums, divs).eval()
         self.assertAllEqual(tf_result, np_result)
 
-  def testNegativeToZero(self):
-    for dtype in [np.float32, np.float64]:
-      nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1)
-      divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24)
-
-      np_result = np.true_divide(nums, divs)
-      np_result[:, divs[0] <= 0] = 0
-
-      with self.cached_session():
-        tf_result = math_ops.div_no_nan(nums, divs,
-                                        negative_to_zero=True).eval()
-        self.assertAllEqual(tf_result, np_result)
-
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index 32f8fd3ed7..e449318020 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -379,12 +379,13 @@ def mean(values,
       update_count_op = state_ops.assign_add(count, num_values)
 
     compute_mean = lambda _, t, c: math_ops.div_no_nan(
-        t, c, negative_to_zero=True, name='value')
+        t, math_ops.maximum(c, 0), name='value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
-    update_op = math_ops.div_no_nan(update_total_op, update_count_op,
-                                    negative_to_zero=True, name='update_op')
+    update_op = math_ops.div_no_nan(update_total_op,
+                                    math_ops.maximum(update_count_op, 0),
+                                    name='update_op')
 
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
@@ -756,21 +757,21 @@ def auc(labels,
       """
       dtp = tp[:num_thresholds - 1] - tp[1:]
       p = tp + fp
-      prec_slope = math_ops.div_no_nan(dtp, p[:num_thresholds - 1] - p[1:],
-                                       negative_to_zero=True,
-                                       name='prec_slope')
+      prec_slope = math_ops.div_no_nan(
+          dtp,
+          math_ops.maximum(p[:num_thresholds - 1] - p[1:], 0),
+          name='prec_slope')
       intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:])
       safe_p_ratio = array_ops.where(
           math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0),
-          math_ops.div_no_nan(p[:num_thresholds - 1], p[1:],
-                              negative_to_zero=True,
+          math_ops.div_no_nan(p[:num_thresholds - 1],
+                              math_ops.maximum(p[1:], 0),
                               name='recall_relative_ratio'),
           array_ops.ones_like(p[1:]))
       return math_ops.reduce_sum(
           math_ops.div_no_nan(
               prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)),
-              tp[1:] + fn[1:],
-              negative_to_zero=True,
+              math_ops.maximum(tp[1:] + fn[1:], 0),
               name='pr_auc_increment'),
           name='interpolate_pr_auc')
 
@@ -1052,7 +1053,7 @@ def mean_per_class_accuracy(labels,
 
     def compute_mean_accuracy(_, count, total):
       per_class_accuracy = math_ops.div_no_nan(
-          count, total, negative_to_zero=True, name=None)
+          count, math_ops.maximum(total, 0), name=None)
       mean_accuracy_v = math_ops.reduce_mean(
           per_class_accuracy, name='mean_accuracy')
       return mean_accuracy_v
@@ -1060,8 +1061,8 @@ def mean_per_class_accuracy(labels,
     mean_accuracy_v = _aggregate_across_towers(
         metrics_collections, compute_mean_accuracy, count, total)
 
-    update_op = math_ops.div_no_nan(update_count_op, update_total_op,
-                                    negative_to_zero=True,
+    update_op = math_ops.div_no_nan(update_count_op,
+                                    math_ops.maximum(update_total_op, 0),
                                     name='update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
@@ -1372,13 +1373,13 @@ def mean_tensor(values,
       update_count_op = state_ops.assign_add(count, num_values)
 
     compute_mean = lambda _, t, c: math_ops.div_no_nan(
-        t, c, negative_to_zero=True, name='value')
+        t, math_ops.maximum(c, 0), name='value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
 
-    update_op = math_ops.div_no_nan(update_total_op, update_count_op,
-                                    negative_to_zero=True,
+    update_op = math_ops.div_no_nan(update_total_op,
+                                    math_ops.maximum(update_count_op, 0),
                                     name='update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
-- 
GitLab


From 407a64b773f15bfe67a2b5b1979134368464b6ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 23 Aug 2018 16:52:00 +0800
Subject: [PATCH 0049/1357] TST: revise test case and too long line

---
 .../python/estimator/canned/boosted_trees.py  |   7 +-
 .../estimator/canned/boosted_trees_test.py    | 125 +++++++++++++-----
 2 files changed, 96 insertions(+), 36 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index b1d5d60fb0..f2a5b9178b 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -213,8 +213,13 @@ def _generate_feature_name_mapping(sorted_feature_columns):
                     feature_column_lib._VocabularyListCategoricalColumn):  # pylint:disable=protected-access
         for value in categorical_column.vocabulary_list:
           names.append('{}:{}'.format(column.name, value))
+      elif isinstance(categorical_column,
+                      feature_column_lib._BucketizedColumn):  # pylint:disable=protected-access
+        boundaries = [-np.inf] + list(categorical_column.boundaries) + [np.inf]
+        for pair in zip(boundaries[:-1], boundaries[1:]):
+          names.append('{}:{}'.format(column.name, pair))
       else:
-        for num in categorical_column._num_buckets:  # pylint:disable=protected-access
+        for num in range(categorical_column._num_buckets):  # pylint:disable=protected-access
           names.append('{}:{}'.format(column.name, num))
     else:
       names.append(column.name)
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 24d3a3501e..7620f73425 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -564,13 +564,17 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     # Train for a few steps, and validate final checkpoint.
     est.train(input_fn, steps=num_steps)
 
-    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
+    feature_names_expected = ['f_0_bucketized',
+                              'f_2_bucketized',
+                              'f_1_bucketized']
 
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.833933, 0.606342, 0.0], importances)
 
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.579010, 0.420990, 0.0], importances)
 
@@ -599,7 +603,9 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(ValueError, 'empty serialized string'):
       est.experimental_feature_importances(normalize=True)
 
-  def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text):
+  def _create_fake_checkpoint_with_tree_ensemble_proto(self,
+                                                       est,
+                                                       tree_ensemble_text):
     with ops.Graph().as_default():
       with ops.name_scope('boosted_trees') as name:
         tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
@@ -731,14 +737,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         tree_weights: 1.0
         tree_weights: 1.0
         """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
-
-    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized',
+                              'f_2_bucketized',
+                              'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
+    # Gain sum for each features:
+    # = 1.0 * [3 + 1, 2, 2] + 1.0 * [1, 1, 0]
     self.assertAllClose([5.0, 3.0, 2.0], importances)
 
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.3, 0.2], importances)
 
@@ -820,14 +833,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         tree_weights: 0.6
         tree_weights: 1.0
         """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
-
-    feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized',
+                              'f_2_bucketized',
+                              'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
+    # Gain sum for each features:
+    # = 0.4 * [12.5, 0, 5] + 0.6 * [0, 5, 0] + 1.0 * [0, 0, 0]
     self.assertAllClose([5.0, 3.0, 2.0], importances)
 
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.3, 0.2], importances)
 
@@ -856,11 +876,15 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         tree_weights: 1.0
         tree_weights: 1.0
         """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
 
     # Reverse order because feature importances are sorted by np.argsort(f)[::-1]
-    feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized']
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    feature_names_expected = ['f_2_bucketized',
+                              'f_1_bucketized',
+                              'f_0_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.0, 0.0, 0.0], importances)
 
@@ -868,17 +892,20 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
                                  'all empty or contain only a root node'):
       est.experimental_feature_importances(normalize=True)
 
-  def TestFeatureImportancesNamesForCategoricalColumn(self):
+  def testFeatureImportancesNamesForCategoricalColumn(self):
     categorical = feature_column.categorical_column_with_vocabulary_list(
         key='categorical', vocabulary_list=('bad', 'good', 'ok'))
     feature_indicator = feature_column.indicator_column(categorical)
     bucketized_col = feature_column.bucketized_column(
         feature_column.numeric_column(
-            'an_uninformative_feature', dtype=dtypes.float32),
+            'continuous', dtype=dtypes.float32),
         BUCKET_BOUNDARIES)
+    bucketized_indicator = feature_column.indicator_column(bucketized_col)
 
     est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=[bucketized_col, feature_indicator],
+        feature_columns=[feature_indicator,
+                         bucketized_col,
+                         bucketized_indicator],
         n_batches_per_layer=1,
         n_trees=2,
         learning_rate=1.0,
@@ -898,7 +925,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
           }
           nodes {
             bucketized_split {
-              feature_id: 3
+              feature_id: 4
               left_id: 3
               right_id: 4
             }
@@ -930,36 +957,63 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
               right_id: 2
             }
             metadata {
-              gain: 3.0
+              gain: 1.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 5
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.0
             }
           }
           nodes {
             leaf {
-              scalar: -0.34
+              scalar: -2.34
             }
           }
           nodes {
             leaf {
-              scalar: 1.34
+              scalar: 3.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 4.34
             }
           }
         }
         tree_weights: 1.0
         tree_weights: 1.0
         """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
-
-    feature_names_expected = ['categorical_indicator:good',
-                              'an_uninformative_feature_bucketized',
-                              'categorical_indicator:ok',
-                              'categorical_indicator:bad']
-    feature_names, importances = est.experimental_feature_importances(normalize=False)
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    feature_names_expected = ['categorical_indicator:ok',
+                              'continuous_bucketized_indicator:(-2.0, 0.5)',
+                              'continuous_bucketized_indicator:(-inf, -2.0)',
+                              'categorical_indicator:bad',
+                              # Reverse order because feature importances
+                              # are sorted by np.argsort(f)[::-1]
+                              'continuous_bucketized_indicator:(12.0, inf)',
+                              'continuous_bucketized_indicator:(0.5, 12.0)',
+                              'continuous_bucketized',
+                              'categorical_indicator:good']
+
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([5.0, 3.0, 2.0, 0.0], importances)
+    # Gain sum for each features:
+    # = 1.0 * [5, 0, 2, 0, 0, 0, 0, 0] + 1.0 * [0, 2, 0, 1, 0, 0, 0, 0]
+    self.assertAllClose([5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0], importances)
 
-    feature_names, importances = est.experimental_feature_importances(normalize=True)
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
     self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances)
+    self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances)
 
   def testNegativeFeatureImportances(self):
     est = boosted_trees.BoostedTreesClassifier(
@@ -995,7 +1049,8 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         }
         tree_weights: -1.0
         """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text)
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
 
     with self.assertRaisesRegexp(AssertionError, 'non-negative'):
       est.experimental_feature_importances(normalize=False)
-- 
GitLab


From cb5c61a3e11a37fb39a246aaf8ed6d02dd9ae9ab Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Fri, 24 Aug 2018 11:51:34 +0800
Subject: [PATCH 0050/1357] Refine LeakyRelu codes and update APIs.

---
 .../api_def/base_api/api_def_LeakyRelu.pbtxt  |  4 ++++
 .../base_api/api_def_LeakyReluGrad.pbtxt      | 24 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  2 +-
 tensorflow/python/eager/pywrap_tfe_src.cc     |  2 +-
 4 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
new file mode 100644
index 0000000000..4a61889f54
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "LeakyRelu"
+  summary: "Computes rectified linear: `max(features, features * alpha)`."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt
new file mode 100644
index 0000000000..e427526602
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt
@@ -0,0 +1,24 @@
+op {
+  graph_op_name: "LeakyReluGrad"
+  visibility: HIDDEN
+  in_arg {
+    name: "gradients"
+    description: <<END
+The backpropagated gradients to the corresponding LeakyRelu operation.
+END
+  }
+  in_arg {
+    name: "features"
+    description: <<END
+The features passed as input to the corresponding LeakyRelu operation,
+OR the outputs of that operation (both work equivalently).
+END
+  }
+  out_arg {
+    name: "backprops"
+    description: <<END
+`gradients * (features > 0) + alpha * gradients * (featurs <= 0)`.
+END
+  }
+  summary: "Computes rectified linear gradients for a LeakyRelu operation."
+}
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 837e91bc23..7693c2d485 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -13637,7 +13637,7 @@ op {
   }
 }
 op {
-  name: "LeakykReluGrad"
+  name: "LeakyReluGrad"
   input_arg {
     name: "gradients"
     type_attr: "T"
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 9b3b5fd7aa..18fafd0de1 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) {
           "SoftplusGrad",
           "Softsign",
           "ReluGrad",
+          "LeakyRelu",
           "LeakyReluGrad",
           "Conv2D",
           "DepthwiseConv2dNative",
@@ -1800,7 +1801,6 @@ bool OpDoesntRequireInput(const string& op_name) {
           "BiasAdd",
           "Relu",
           "Relu6",
-          "LeakyRelu",
           "Elu",
           "Selu",
           "SparseSoftmaxCrossEntropyWithLogits",
-- 
GitLab


From aa02f7f3622dca8c7b03e745cdb8a10797f32f61 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Thu, 23 Aug 2018 22:56:22 -0700
Subject: [PATCH 0051/1357] Updated build files

---
 third_party/ngraph/ngraph.BUILD    |  8 ++--
 third_party/ngraph/ngraph_tf.BUILD |  2 +-
 third_party/ngraph/tbb.BUILD       | 72 +++++++++++++++++-------------
 3 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index f1cf8acbf6..3d9c3ac044 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -4,7 +4,7 @@ exports_files(["LICENSE"])
 
 cc_library(
     name = "ngraph_headers",
-    hdrs = glob(["src/ngraph/**/*.hpp"]) ,
+    hdrs = glob(["src/ngraph/**/*.hpp"]),
     visibility = ["//visibility:public"],
 )
 
@@ -102,7 +102,7 @@ cc_library(
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
         '-D NGRAPH_VERSION=\\"0.5.0\\"',
-        '-D NGRAPH_DEX_ONLY',
+        "-D NGRAPH_DEX_ONLY",
     ],
     visibility = ["//visibility:public"],
     alwayslink = 1,
@@ -124,8 +124,8 @@ cc_library(
         "src/ngraph/pass/*.hpp",
         "src/ngraph/runtime/*.cpp",
         "src/ngraph/type/*.cpp",
-        "src/ngraph/runtime/interpreter/*.cpp",
-        "src/ngraph/runtime/interpreter/*.hpp",
+        #"src/ngraph/runtime/interpreter/*.cpp",
+        #"src/ngraph/runtime/interpreter/*.hpp",
     ]),
     deps = [
         ":ngraph_headers",
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index 0647d9926a..d0231e468e 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -71,7 +71,7 @@ tf_cc_test(
     ],
     extra_copts = [
         "-fexceptions ",
-        "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
+        #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
         "-I external/ngraph_tf/src",
         "-I external/ngraph_tf/logging",
         "-I external/ngraph/src",
diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD
index c3e7f7fd35..7c760cb3b3 100644
--- a/third_party/ngraph/tbb.BUILD
+++ b/third_party/ngraph/tbb.BUILD
@@ -2,42 +2,50 @@ licenses(["notice"])  # 3-Clause BSD
 
 exports_files(["LICENSE"])
 
+# Taken from: https://github.com/rnburn/satyr/blob/master/bazel/tbb.BUILD
+# License: MIT
+# See: https://github.com/rnburn/satyr/blob/master/LICENSE
+
 genrule(
-  name = "build_tbb",
-  srcs = glob(["**"]) + [
-    "@local_config_cc//:toolchain",
-  ],
-  cmd = """
-         set -e
-         WORK_DIR=$$PWD
-         DEST_DIR=$$PWD/$(@D)
-         export PATH=$$(dirname $(AR)):$$PATH
-         export CXXFLAGS=$(CC_FLAGS)
-         export NM=$(NM)
-         export AR=$(AR)
-         cd $$(dirname $(location :Makefile))
-
-         #TBB's build needs some help to figure out what compiler it's using
-         if $$CXX --version | grep clang &> /dev/null; then 
+    name = "build_tbb",
+    srcs = glob(["**"]) + [
+        "@local_config_cc//:toolchain",
+    ],
+    cmd = """
+	    set -e
+	    WORK_DIR=$$PWD
+		DEST_DIR=$$PWD/$(@D)
+        export PATH=$$(dirname $(AR)):$$PATH
+		export CXXFLAGS=$(CC_FLAGS)
+		export NM=$(NM)
+		export AR=$(AR)
+		cd $$(dirname $(location :Makefile))
+
+        #TBB's build needs some help to figure out what compiler it's using
+        if $$CXX --version | grep clang &> /dev/null; then 
            COMPILER_OPT="compiler=clang"
-         else
-           COMPILER_OPT="compiler=gcc"
-         fi 
+        else
+			COMPILER_OPT="compiler=gcc"
 
-         # uses extra_inc=big_iron.inc to specify that static libraries are
-         # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792
-         make tbb_build_prefix="build" \
+          #  # Workaround for TBB bug
+          #  # See https://github.com/01org/tbb/issues/59
+          #  CXXFLAGS="$$CXXFLAGS -flifetime-dse=1"
+        fi 
+
+        # uses extra_inc=big_iron.inc to specify that static libraries are
+        # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792
+        make tbb_build_prefix="build" \
               extra_inc=big_iron.inc \
               $$COMPILER_OPT; \
 
-         echo cp build/build_{release,debug}/*.a $$DEST_DIR
-         cp build/build_{release,debug}/*.a $$DEST_DIR
-         cd $$WORK_DIR
-  """,
-  outs = [
-    "libtbb.a",
-    "libtbbmalloc.a",
-  ] 
+        echo cp build/build_{release,debug}/*.a $$DEST_DIR
+        cp build/build_{release,debug}/*.a $$DEST_DIR
+		cd $$WORK_DIR
+	""",
+    outs = [
+        "libtbb.a",
+        "libtbbmalloc.a",
+    ],
 )
 
 cc_library(
@@ -45,8 +53,8 @@ cc_library(
     hdrs = glob([
         "include/serial/**",
         "include/tbb/**/**",
-        ]),
+    ]),
     srcs = ["libtbb.a"],
     includes = ["include"],
     visibility = ["//visibility:public"],
-)
\ No newline at end of file
+)
-- 
GitLab


From c7c152981cdf9494dce9efdeed04a9c3ae7a8e3d Mon Sep 17 00:00:00 2001
From: weidankong <kongweidan84@gmail.com>
Date: Fri, 24 Aug 2018 11:23:26 -0700
Subject: [PATCH 0052/1357] Accumulated Gradient Normalization Optimizer

---
 tensorflow/contrib/opt/BUILD                  |  19 ++
 tensorflow/contrib/opt/__init__.py            |   3 +
 .../opt/python/training/agn_optimizer.py      | 309 ++++++++++++++++++
 .../opt/python/training/agn_optimizer_test.py | 279 ++++++++++++++++
 4 files changed, 610 insertions(+)
 create mode 100644 tensorflow/contrib/opt/python/training/agn_optimizer.py
 create mode 100644 tensorflow/contrib/opt/python/training/agn_optimizer_test.py

diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 5319a8b655..642cda7845 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -16,6 +16,7 @@ py_library(
         "__init__.py",
         "python/training/adamax.py",
         "python/training/addsign.py",
+        "python/training/agn_optimizer.py",
         "python/training/drop_stale_gradient_optimizer.py",
         "python/training/elastic_average_optimizer.py",
         "python/training/external_optimizer.py",
@@ -242,6 +243,24 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "agn_optimizer_test",
+    srcs = ["python/training/agn_optimizer_test.py"],
+    additional_deps = [
+        ":opt_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:variables",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:training",
+        "//tensorflow/python:ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//third_party/py/numpy",
+    ],
+)
+
 tf_py_test(
     name = "elastic_average_optimizer_test",
     srcs = ["python/training/elastic_average_optimizer_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 781621dba0..b814a57680 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 # pylint: disable=wildcard-import
 from tensorflow.contrib.opt.python.training.adamax import *
 from tensorflow.contrib.opt.python.training.addsign import *
+from tensorflow.contrib.opt.python.training.agn_optimizer import *
 from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import *
 from tensorflow.contrib.opt.python.training.elastic_average_optimizer import *
 from tensorflow.contrib.opt.python.training.external_optimizer import *
@@ -59,6 +60,8 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
+    'AGNOptimizer',
+    'AGNCustomGetter',
     'ElasticAverageOptimizer',
     'ElasticAverageCustomGetter',
     'ModelAverageOptimizer',
diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py
new file mode 100644
index 0000000000..dc1f8d6347
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py
@@ -0,0 +1,309 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import gradient_descent
+
+
+GLOBAL_VARIABLE_NAME = 'global_center_variable'
+GRAD_VARIABLE_NAME = 'grad_variable'
+
+class AGNCustomGetter(object):
+  """Custom_getter class is used to do:
+  1. Change trainable variables to local collection and place them at worker
+    device
+  2. Generate global variables(global center variables)
+  3. Generate grad variables(gradients) which record the gradients sum
+    and place them at worker device
+    Notice that the class should be used with tf.replica_device_setter,
+    so that the global center variables and global step variable can be placed
+    at ps device.
+  """
+  def __init__(self, worker_device):
+    """
+      Args:
+        worker_device: put the grad_variables on worker device
+    """
+    self._worker_device = worker_device
+    self._global_map = {}
+    self._grad_map = {}
+
+  def __call__(self, getter, name, trainable, collections, *args, **kwargs):
+    if trainable:
+      with ops.device(self._worker_device):
+        local_var = getter(
+            name,
+            trainable=True,
+            collections=[ops.GraphKeys.LOCAL_VARIABLES],
+            *args,
+            **kwargs)
+      if kwargs['reuse'] == True:
+        return local_var
+      global_center_variable = getter(
+          name='%s/%s' % (GLOBAL_VARIABLE_NAME, name),
+          trainable=False,
+          collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+          *args,
+          **kwargs)
+
+      with ops.device(self._worker_device):
+        grad_variable = getter(
+            name='%s/%s' % (GRAD_VARIABLE_NAME, name),
+            trainable=False,
+            collections=[ops.GraphKeys.LOCAL_VARIABLES],
+            *args,
+            **kwargs)
+      if kwargs['partitioner'] is None:
+        self._grad_map[local_var] = grad_variable
+        self._global_map[local_var] = global_center_variable
+      else:
+        v_list = list(local_var)
+        for i in range(len(v_list)):
+          self._grad_map[v_list[i]] = list(grad_variable)[i]
+          self._global_map[v_list[i]] = list(global_center_variable)[i]
+      return local_var
+    else:
+      return getter(name,
+                    trainable=trainable,
+                    collections=collections,
+                    *args,
+                    **kwargs)
+
+class AGNOptimizer(optimizer.Optimizer):
+  """Wrapper that implements the Accumulated GradientNormalization algorithm.
+  Reference:
+    Accumulated Gradient Normalization: Joeri Hermans ACML2017
+    https://arxiv.org/abs/1710.02368
+  """
+
+  def __init__(self,
+               optimizer,
+               num_worker,
+               custom_getter,
+               communication_period=10,
+               use_locking=True,
+               name='AGNOptimizer'):
+    """Construct a new AGN optimizer.
+
+    Args:
+      optimizer: input optimizer, can be sgd/momentum/adam etc.
+      num_worker: The number of workers
+      custom_getter: The AGNCustomGetter
+      communication_period: An int point value to controls the frequency
+        of the communication between every worker and the ps.
+      use_locking: If True use locks for update operations.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "AGNOptimizer".
+
+    """
+    super(AGNOptimizer, self).__init__(use_locking, name)
+    self._opt = optimizer
+    self._num_worker = num_worker
+    self._period = communication_period
+    self._global_map = custom_getter._global_map
+    self._grad_map = custom_getter._grad_map
+    self._local_step = variable_scope.get_variable(
+        initializer=0,
+        trainable=False,
+        collections=[ops.GraphKeys.LOCAL_VARIABLES],
+        name='local_step')
+    self._opt._prepare()
+
+  def compute_gradients(self,
+                        loss,
+                        var_list=None,
+                        gate_gradients=optimizer.Optimizer.GATE_OP,
+                        aggregation_method=None,
+                        colocate_gradients_with_ops=False,
+                        grad_loss=None):
+    """Compute gradients of `loss` for the variables in `var_list`.
+    Args:
+      loss: A Tensor containing the value to minimize.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKey.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`
+
+    Returns:
+      A list of (gradient, variable) pairs. Variable is always present, but
+      gradient can be `None`.
+    """
+    if not var_list:
+      var_list = variables.trainable_variables()
+    return self._opt.compute_gradients(loss,
+                                       var_list,
+                                       gate_gradients,
+                                       aggregation_method,
+                                       colocate_gradients_with_ops,
+                                       grad_loss)
+
+  def _adjust_optimizer_variable_collection(self, opt_vars):
+    """ Move optimizer created variables to local collection
+    """
+    g = ops.get_default_graph()
+    idx = 0
+    for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])):
+      var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx]
+      name = var.op.name
+      if name in opt_vars:
+        ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var)
+        del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx]
+      else:
+        idx += 1
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to global variables.
+
+    This is the second part of `minimize()`. It returns an `Operation` that
+    applies gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the `Optimizer` constructor.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+    """
+    local_vars = [v for g, v in grads_and_vars if g is not None]
+    grads = [g for g, v in grads_and_vars if g is not None]
+    # theta = theta - lr * grad
+    global_old = set(n.op.name for n in variables.global_variables())
+    local_update_op = self._opt.apply_gradients(grads_and_vars)
+    global_new = set(n.op.name for n in variables.global_variables())
+
+    self._adjust_optimizer_variable_collection(global_new - global_old)
+
+    # a = a + grad
+    update_ops = []
+    update_ops.append(local_update_op)
+    grad_vars = [self._grad_map[var] for var in local_vars]
+    for g, grad_var in zip (grads, grad_vars):
+      update_ops.append(state_ops.assign_add(grad_var, g))
+
+    global_center_vars = [self._global_map[var] for var in local_vars]
+
+    # update global variables.
+    def _Update_global_variables():
+      global_norm = []
+      # a = a / t
+      for g in grad_vars:
+        global_norm.append(state_ops.assign(g, g / self._period))
+      # apply
+      with ops.control_dependencies(global_norm):
+        apply_global_op = self._opt.apply_gradients(zip(grad_vars,
+                                                        global_center_vars))
+
+      # pull
+      with ops.control_dependencies([apply_global_op]):
+        update_ops = []
+        if global_step:
+          with ops.colocate_with(global_step):
+            update_ops.append(state_ops.assign_add(global_step, 1))
+
+        for lvar in local_vars:
+          g_val = self._global_map[lvar].read_value()
+          update_ops.append(state_ops.assign(lvar, g_val))
+        for grad_var in grad_vars:
+          update_ops.append(state_ops.assign(grad_var,
+                                             array_ops.zeros_like(grad_var)))
+        variable_update = control_flow_ops.group(*(update_ops))
+      return variable_update
+
+    local_update = state_ops.assign_add(
+      self._local_step, 1, name='local_step_update').op
+
+    with ops.control_dependencies([local_update]):
+      condition = math_ops.equal(
+          math_ops.mod(self._local_step, self._period), 0)
+    with ops.control_dependencies(update_ops):
+      conditional_update = control_flow_ops.cond(
+          condition, _Update_global_variables, control_flow_ops.no_op)
+    return conditional_update
+
+  def get_init_op(self, task_index):
+    """Returns the op to let all the local variables and local center
+    variables equal to the global center variables before the training begins
+    """
+    init_ops = []
+    local_vars = variables.trainable_variables()
+    global_center_vars = [self._global_map[var] for var in local_vars]
+    grad_vars = [self._grad_map[var] for var in local_vars]
+    if not (local_vars and global_center_vars and grad_vars):
+      raise ValueError('The lists of local_variables, global_center_variables,'
+                       'grad_center_variables should not be empty')
+    for lvar, gc_var in zip(local_vars, global_center_vars):
+      init_ops.append(state_ops.assign(gc_var, lvar))
+    for g in grad_vars:
+      init_ops.append(state_ops.assign(g, array_ops.zeros_like(g)))
+    init_op = control_flow_ops.group(*(init_ops))
+    return init_op
+
+  def make_session_run_hook(self, is_chief, task_index):
+    """Creates a hook to handle AGNOptimizerHook ops such as initialization."""
+    return _AGNOptimizerHook(self, is_chief, task_index)
+
+
+class _AGNOptimizerHook(session_run_hook.SessionRunHook):
+
+  def __init__(self, agn_optimizer, is_chief, task_index):
+    """Creates hook to handle AGNOptimizer initialization ops.
+
+    Args:
+      agn_optimizer: `AGNOptimizer` which this hook will initialize.
+      is_chief: `Bool`, whether is this a chief replica or not.
+      task_index: int, task_index of worker
+    """
+    self._agn_optimizer = agn_optimizer
+    self._is_chief = is_chief
+    self._task_index = task_index
+
+  def begin(self):
+    self._local_init_op = variables.local_variables_initializer()
+    self._global_init_op = None
+    if self._is_chief:
+      self._global_init_op = variables.global_variables_initializer()
+    self._variable_init_op = self._agn_optimizer.get_init_op(self._task_index)
+
+  def after_create_session(self, session, coord):
+    """Run initialization ops"""
+    session.run(self._variable_init_op)
diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
new file mode 100644
index 0000000000..091943de02
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
@@ -0,0 +1,279 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Tests for EAOptimizer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import portpicker
+
+from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import device_setter
+from tensorflow.python.training import momentum
+from tensorflow.python.training import server_lib
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+
+from tensorflow.contrib.opt.python.training.agn_optimizer import \
+  AGNOptimizer, AGNCustomGetter, GLOBAL_VARIABLE_NAME
+
+
+def create_local_cluster(num_workers, num_ps, protocol="grpc"):
+  """Create local GRPC servers and return them."""
+  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
+  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
+  cluster_dict = {
+      "worker": ["localhost:%s" % port for port in worker_ports],
+      "ps": ["localhost:%s" % port for port in ps_ports]
+  }
+  cs = server_lib.ClusterSpec(cluster_dict)
+
+  workers = [
+      server_lib.Server(
+          cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
+      for ix in range(num_workers)
+  ]
+  ps_servers = [
+      server_lib.Server(
+          cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
+      for ix in range(num_ps)
+  ]
+
+  return cluster_dict, workers, ps_servers
+
+
+# Creates the workers and return their sessions, graphs, train_ops.
+# Cheif worker will update at last
+def _get_workers(num_workers, period, workers, num_ps=1):
+  sessions = []
+  graphs = []
+  train_ops = []
+  for worker_id in range(num_workers):
+    graph = ops.Graph()
+    is_chief = (worker_id == 0)
+    with graph.as_default():
+      worker_device = "/job:worker/task:%d/cpu:0" % (worker_id)
+      ps_device = device_setter.replica_device_setter(
+                  worker_device=worker_device,
+                  ps_device="/job:ps/task:0/cpu:0",
+                  ps_tasks=1)
+      agn_getter = AGNCustomGetter(worker_device=worker_device)
+      with variable_scope.variable_scope(
+          "", custom_getter=agn_getter), ops.device(ps_device):
+        global_step = training_util.get_or_create_global_step()
+        var_0 = variable_scope.get_variable(initializer=0.0, name="v0")
+        var_1 = variable_scope.get_variable(initializer=0.5, name="v1")
+      if num_ps > 1:
+        with variable_scope.variable_scope("",
+            partitioner=partitioned_variables.fixed_size_partitioner(
+                num_ps, axis=0),
+            custom_getter=agn_getter), ops.device(ps_device):
+
+          partition_var = variable_scope.get_variable(
+              'partition_var',
+              shape=[2, 4],
+              initializer=init_ops.zeros_initializer)
+          part_0 = list(partition_var)[0]
+          part_1 = list(partition_var)[1]
+
+      with ops.device("/job:worker/task:" + str(worker_id)):
+        grads_0 = constant_op.constant(-1.0)
+        grads_1 = constant_op.constant(-1.0)
+        grads_part_0 = constant_op.constant([[-1., -1., -1., -1.]])
+        grads_part_1 = constant_op.constant([[-1., -1., -1., -1.]])
+
+        optimizer = \
+            momentum.MomentumOptimizer(learning_rate=0.1, momentum=0.0)
+        opt = AGNOptimizer(
+            optimizer,
+            num_worker=num_workers,
+            communication_period=period,
+            custom_getter=agn_getter)
+        if num_ps == 1:
+          train_op = [
+            opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]),
+                                global_step)
+          ]
+        else:
+          train_op = [
+            opt.apply_gradients(([grads_0, var_0],
+                                 [grads_1, var_1],
+                                 [grads_part_0, part_0],
+                                 [grads_part_1, part_1]),
+                                global_step)
+          ]
+        hook = opt.make_session_run_hook(is_chief, worker_id)
+      # Creates MonitoredSession
+      sess = training.MonitoredTrainingSession(
+          workers[worker_id].target, hooks=[hook])
+
+    sessions.append(sess)
+    graphs.append(graph)
+    train_ops.append(train_op)
+
+  return sessions, graphs, train_ops
+
+
+class AGNOptimizerTest(test.TestCase):
+
+  def _run(self, train_op, sess):
+    sess.run(train_op)
+
+  def test1Workers2Period(self):
+    num_workers = 1
+    communication_period = 4
+    num_ps = 1
+    _, workers, _ = create_local_cluster(
+        num_workers=num_workers, num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(
+        num_workers, communication_period, workers)
+
+    var_0 = graphs[0].get_tensor_by_name("v0:0")
+    var_1 = graphs[0].get_tensor_by_name("v1:0")
+    global_step = training_util.get_global_step(graphs[0])
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(0.5, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(0.5, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+    # step 0
+    sessions[0].run(train_ops[0])
+    self.assertNear(0.1, sessions[0].run(var_0), 1e-6)
+    self.assertNear(0.6, sessions[0].run(var_1), 1e-6)
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(0.5, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    # 2 & 3
+    sessions[0].run(train_ops[0])
+    sessions[0].run(train_ops[0])
+    self.assertNear(0.3, sessions[0].run(var_0), 1e-6)
+    self.assertNear(0.8, sessions[0].run(var_1), 1e-6)
+
+    # 4
+    sessions[0].run(train_ops[0])
+    # pull
+    self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g))
+    self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g))
+    self.assertNear(0.1, sessions[0].run(var_0), 1e-6)
+    self.assertNear(0.6, sessions[0].run(var_1), 1e-6)
+
+    sessions[0].run(train_ops[0])
+    sessions[0].run(train_ops[0])
+    sessions[0].run(train_ops[0])
+    sessions[0].run(train_ops[0])
+    self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g))
+    self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g))
+    self.assertNear(0.2, sessions[0].run(var_0), 1e-6)
+    self.assertNear(0.7, sessions[0].run(var_1), 1e-6)
+
+  def test2Worker1Period(self):
+    num_workers = 2
+    communication_period = 1
+    num_ps = 2
+    _, workers, _ = create_local_cluster(
+        num_workers=num_workers, num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(
+        num_workers, communication_period, workers, num_ps=2)
+
+    var_0 = graphs[0].get_tensor_by_name("v0:0")
+    var_1 = graphs[0].get_tensor_by_name("v1:0")
+
+    var_0_1 = graphs[1].get_tensor_by_name("v0:0")
+    var_1_1 = graphs[1].get_tensor_by_name("v1:0")
+
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    part_0_g = graphs[0].get_tensor_by_name(
+        GLOBAL_VARIABLE_NAME + "/partition_var/part_0:0")
+    part_1_g = graphs[0].get_tensor_by_name(
+        GLOBAL_VARIABLE_NAME + "/partition_var/part_1:0")
+
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(0.5, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[1].run(var_0_1))
+    self.assertAllEqual(0.5, sessions[1].run(var_1_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(0.5, sessions[0].run(var_1_g))
+
+    # verify each step
+    sessions[0].run(train_ops[0])
+    self.assertNear(0.1, sessions[0].run(var_0_g), 1e-6)
+    self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1],
+                            sessions[0].run(part_0_g),
+                            1e-6)
+    self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1],
+                            sessions[0].run(part_1_g),
+                            1e-6)
+
+    sessions[1].run(train_ops[1])
+    self.assertNear(0.2, sessions[0].run(var_0_g), 1e-6)
+    self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2],
+                            sessions[0].run(part_0_g),
+                            1e-6)
+    self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2],
+                            sessions[0].run(part_1_g),
+                            1e-6)
+
+    sessions[0].run(train_ops[0])
+    sessions[1].run(train_ops[1])
+
+    sessions[0].run(train_ops[0])
+    sessions[1].run(train_ops[1])
+    self.assertNear(0.6, sessions[0].run(var_0_g), 1e-6)
+    self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6],
+                            sessions[0].run(part_0_g),
+                            1e-6)
+    self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6],
+                            sessions[0].run(part_1_g),
+                            1e-6)
+
+  def testAGNCustomGetter(self):
+    cluster_spec = server_lib.ClusterSpec({
+        "ps": ["ps0:2222", "ps1:2222"],
+        "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
+    })
+    agn_getter = AGNCustomGetter(worker_device="/job:worker/task:0")
+    from tensorflow.python.training import device_setter
+    with ops.device(
+        device_setter.replica_device_setter(cluster=cluster_spec,
+                                            worker_device="/job:worker/task:0",
+                                            ps_device="/job:ps")), \
+        variable_scope.variable_scope("", custom_getter=agn_getter):
+      v = variable_scope.get_variable(initializer=[1, 2], name="v")
+      w = variable_scope.get_variable(initializer=[2, 1], name="w")
+      v_g, w_g = agn_getter._global_map[v], agn_getter._global_map[w]
+      self.assertDeviceEqual("/job:worker/task:0", v.device)
+      self.assertDeviceEqual("job:ps/task:0", v_g.device)
+      self.assertDeviceEqual("/job:worker/task:0", w.device)
+      self.assertDeviceEqual("job:ps/task:1", w_g.device)
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 44dc83c18dfb8fff5525422e6c08a468aca4fb65 Mon Sep 17 00:00:00 2001
From: weidankong <kongweidan84@gmail.com>
Date: Fri, 24 Aug 2018 11:52:18 -0700
Subject: [PATCH 0053/1357] AGN: clear unused imports

---
 tensorflow/contrib/opt/python/training/agn_optimizer.py      | 4 ----
 tensorflow/contrib/opt/python/training/agn_optimizer_test.py | 2 --
 2 files changed, 6 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py
index dc1f8d6347..dd058bc26e 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py
@@ -16,12 +16,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import logging_ops
@@ -31,7 +28,6 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import optimizer
 from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import gradient_descent
 
 
 GLOBAL_VARIABLE_NAME = 'global_center_variable'
diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
index 091943de02..4e2200fa1a 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
@@ -19,13 +19,11 @@ from __future__ import print_function
 
 import portpicker
 
-from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import device_setter
 from tensorflow.python.training import momentum
-- 
GitLab


From f8ee9799e6a72d4fe24f9fad76d6e6b1b3a01af1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Sat, 25 Aug 2018 07:03:07 +0800
Subject: [PATCH 0054/1357] ENH: raise exception if unsupported
 features/columns is given

---
 .../python/estimator/canned/boosted_trees.py  |  9 +-
 .../estimator/canned/boosted_trees_test.py    | 97 +++++++++++--------
 2 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index f2a5b9178b..66784fad0c 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -204,6 +204,9 @@ def _generate_feature_name_mapping(sorted_feature_columns):
 
   Returns:
     feature_name_mapping: a list of feature names indexed by the feature ids.
+
+  Raises:
+    ValueError: when unsupported features/columns are tried.
   """
   names = []
   for column in sorted_feature_columns:
@@ -221,8 +224,12 @@ def _generate_feature_name_mapping(sorted_feature_columns):
       else:
         for num in range(categorical_column._num_buckets):  # pylint:disable=protected-access
           names.append('{}:{}'.format(column.name, num))
-    else:
+    elif isinstance(column, feature_column_lib._BucketizedColumn):
       names.append(column.name)
+    else:
+      raise ValueError(
+          'For now, only bucketized_column and indicator_column is supported '
+          'but got: {}'.format(column))
   return names
 
 
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 7620f73425..14c05e024d 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -892,6 +892,49 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
                                  'all empty or contain only a root node'):
       est.experimental_feature_importances(normalize=True)
 
+  def testNegativeFeatureImportances(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    # In order to generate a negative feature importances,
+    # We assign an invalid value -1 to tree_weights here.
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+        }
+        tree_weights: -1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
+      est.experimental_feature_importances(normalize=False)
+
+    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
+      est.experimental_feature_importances(normalize=True)
+
   def testFeatureImportancesNamesForCategoricalColumn(self):
     categorical = feature_column.categorical_column_with_vocabulary_list(
         key='categorical', vocabulary_list=('bad', 'good', 'ok'))
@@ -1015,48 +1058,18 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(feature_names_expected, feature_names)
     self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances)
 
-  def testNegativeFeatureImportances(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    # In order to generate a negative feature importances,
-    # We assign an invalid value -1 to tree_weights here.
-    tree_ensemble_text = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 5.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-        }
-        tree_weights: -1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(
-        est, tree_ensemble_text)
-
-    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
-      est.experimental_feature_importances(normalize=False)
-
-    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
-      est.experimental_feature_importances(normalize=True)
+  def testFeatureImportancesNamesForUnsupportedColumn(self):
+    numeric_col = feature_column.numeric_column(
+        'continuous', dtype=dtypes.float32)
+
+    with self.assertRaisesRegexp(ValueError,
+        'only bucketized_column and indicator_column'):
+      _ = boosted_trees.BoostedTreesRegressor(
+          feature_columns=[numeric_col],
+          n_batches_per_layer=1,
+          n_trees=2,
+          learning_rate=1.0,
+          max_depth=1)
 
 
 class ModelFnTests(test_util.TensorFlowTestCase):
-- 
GitLab


From 7e91ec68c7df088c306cc56cce621aee7ff53c94 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Fri, 24 Aug 2018 22:13:21 -0700
Subject: [PATCH 0055/1357] Added more unit tests and upgraded to the
 device-less bridge.

---
 WORKSPACE                          |  6 ++++++
 tensorflow/workspace.bzl           | 20 +++++++++---------
 third_party/ngraph/ngraph_tf.BUILD | 34 ++++++++++++++++++++----------
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 4af1a1e75f..15aa24f3c1 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -86,3 +86,9 @@ new_local_repository(
     build_file = "//third_party/ngraph:ngraph.BUILD",
 )
 
+new_local_repository(
+    name = "ngraph_tf",
+    path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph-tf",
+    build_file = "//third_party/ngraph:ngraph_tf.BUILD",
+)
+
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 951cb8a89d..a5dc95d609 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -865,16 +865,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"),
     )
 
-    tf_http_archive(
-        name = "ngraph_tf",
-        urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz",
-            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz",
-        ],
-        sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072",
-        strip_prefix = "ngraph-tf-0.3.0-rc1",
-        build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
-    )
+    # tf_http_archive(
+    #     name = "ngraph_tf",
+    #     urls = [
+    #         "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz",
+    #         "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz",
+    #     ],
+    #     sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072",
+    #     strip_prefix = "ngraph-tf-0.3.0-rc1",
+    #     build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
+    # )
 
     ##############################################################################
     # BIND DEFINITIONS
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index d0231e468e..f40d2057e8 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -10,26 +10,35 @@ load(
 cc_library(
     name = "ngraph_tf",
     srcs = [
+        "src/ngraph_assign_clusters.h",
+        "src/ngraph_assign_clusters.cc",
         "src/ngraph_builder.h",
         "src/ngraph_builder.cc",
-        "src/ngraph_cluster.h",
-        "src/ngraph_cluster.cc",
+        "src/ngraph_capture_variables.h",
+        "src/ngraph_capture_variables.cc",
+        "src/ngraph_conversions.h",
         "src/ngraph_cluster_manager.h",
         "src/ngraph_cluster_manager.cc",
-        "src/ngraph_confirm_pass.cc",
-        "src/ngraph_device.cc",
+        "src/ngraph_deassign_clusters.h",
+        "src/ngraph_deassign_clusters.cc",
         "src/ngraph_encapsulate_op.cc",
-        "src/ngraph_encapsulate_pass.cc",
+        "src/ngraph_encapsulate_clusters.h",
+        "src/ngraph_encapsulate_clusters.cc",
         "src/ngraph_freshness_tracker.h",
         "src/ngraph_freshness_tracker.cc",
-        "src/ngraph_graph_rewrite_passes.cc",
-        "src/ngraph_liberate_pass.cc",
-        "src/ngraph_op_kernels.cc",
-        "src/ngraph_stub_ops.cc",
+        # "src/ngraph_liberate_pass.cc",
+        # "src/ngraph_op_kernels.cc",
+        # "src/ngraph_stub_ops.cc",
+        "src/ngraph_mark_for_clustering.h",
+        "src/ngraph_mark_for_clustering.cc",
+        "src/ngraph_rewrite_pass.cc",
+        "src/ngraph_rewrite_for_tracking.h",
+        "src/ngraph_rewrite_for_tracking.cc",
+        "src/ngraph_tracked_variable.cc",
         "src/ngraph_utils.h",
         "src/ngraph_utils.cc",
-        "src/ngraph_send_recv_ops.cc",
-        "src/ngraph_variable_ops.cc",
+        # "src/ngraph_send_recv_ops.cc",
+        # "src/ngraph_variable_ops.cc",
         "src/tf_graphcycles.cc",
         "logging/ngraph_log.h",
         "logging/ngraph_log.cc",
@@ -60,6 +69,9 @@ tf_cc_test(
     size = "small",
     srcs = [
         "test/tf_exec.cpp",
+        "test/conversions.cpp",
+        "test/padding.cpp",
+        "test/graph_rewrites/assign_clusters.cc",
         "test/main.cpp",
     ],
     deps = [
-- 
GitLab


From 7a54c15804f7bb0d0c40fea5c84b1f4acee58bac Mon Sep 17 00:00:00 2001
From: Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
Date: Sat, 25 Aug 2018 13:18:11 +0300
Subject: [PATCH 0056/1357] upgraded protobuf to v.3.6.1

---
 tensorflow/contrib/cmake/external/protobuf.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index f56fb35a0f..56a57a2340 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -16,7 +16,7 @@ include (ExternalProject)
 
 set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
 set(PROTOBUF_URL https://github.com/google/protobuf.git)
-set(PROTOBUF_TAG v3.6.0)
+set(PROTOBUF_TAG v3.6.1)
 
 if(WIN32)
   if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
-- 
GitLab


From 607004e583ecbd9fb788aaf9b360a8d85cf167ac Mon Sep 17 00:00:00 2001
From: weidankong <kongweidan84@gmail.com>
Date: Mon, 27 Aug 2018 13:12:23 -0700
Subject: [PATCH 0057/1357] AGN: remove compute_gradient

---
 .../opt/python/training/agn_optimizer.py      | 38 +------------------
 1 file changed, 1 insertion(+), 37 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py
index dd058bc26e..f47ef5acc5 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py
@@ -19,9 +19,7 @@ from __future__ import print_function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
@@ -134,40 +132,6 @@ class AGNOptimizer(optimizer.Optimizer):
         name='local_step')
     self._opt._prepare()
 
-  def compute_gradients(self,
-                        loss,
-                        var_list=None,
-                        gate_gradients=optimizer.Optimizer.GATE_OP,
-                        aggregation_method=None,
-                        colocate_gradients_with_ops=False,
-                        grad_loss=None):
-    """Compute gradients of `loss` for the variables in `var_list`.
-    Args:
-      loss: A Tensor containing the value to minimize.
-      var_list: Optional list or tuple of `tf.Variable` to update to minimize
-        `loss`.  Defaults to the list of variables collected in the graph
-        under the key `GraphKey.TRAINABLE_VARIABLES`.
-      gate_gradients: How to gate the computation of gradients.  Can be
-        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with
-        the corresponding op.
-      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`
-
-    Returns:
-      A list of (gradient, variable) pairs. Variable is always present, but
-      gradient can be `None`.
-    """
-    if not var_list:
-      var_list = variables.trainable_variables()
-    return self._opt.compute_gradients(loss,
-                                       var_list,
-                                       gate_gradients,
-                                       aggregation_method,
-                                       colocate_gradients_with_ops,
-                                       grad_loss)
-
   def _adjust_optimizer_variable_collection(self, opt_vars):
     """ Move optimizer created variables to local collection
     """
@@ -268,7 +232,7 @@ class AGNOptimizer(optimizer.Optimizer):
       raise ValueError('The lists of local_variables, global_center_variables,'
                        'grad_center_variables should not be empty')
     for lvar, gc_var in zip(local_vars, global_center_vars):
-      init_ops.append(state_ops.assign(gc_var, lvar))
+      init_ops.append(state_ops.assign(lvar, gc_var))
     for g in grad_vars:
       init_ops.append(state_ops.assign(g, array_ops.zeros_like(g)))
     init_op = control_flow_ops.group(*(init_ops))
-- 
GitLab


From 8d226fe074d18aadf98a869755e7d432341ba882 Mon Sep 17 00:00:00 2001
From: weidankong <kongweidan84@gmail.com>
Date: Mon, 27 Aug 2018 15:59:54 -0700
Subject: [PATCH 0058/1357] AGN: use variable_creator_scope to move variables
 from GLOBAL_VARIABLES to LOCAL VARIABLES

---
 .../contrib/opt/python/training/agn_optimizer.py  | 15 ++++++++++-----
 .../opt/python/training/agn_optimizer_test.py     | 12 ++++++++++--
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py
index f47ef5acc5..8f415c75b9 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py
@@ -166,12 +166,17 @@ class AGNOptimizer(optimizer.Optimizer):
     """
     local_vars = [v for g, v in grads_and_vars if g is not None]
     grads = [g for g, v in grads_and_vars if g is not None]
+    def _variable_creator(next_creator, collections, **kwargs):
+      if not collections:
+        collections = [ops.GraphKeys.LOCAL_VARIABLES]
+      elif ops.GraphKeys.GLOBAL_VARIABLES in collections:
+        collections = list(collections)
+        collections.append(ops.GraphKeys.LOCAL_VARIABLES)
+        collections.remove(ops.GraphKeys.GLOBAL_VARIABLES)
+      return next_creator(collections=collections, **kwargs)
     # theta = theta - lr * grad
-    global_old = set(n.op.name for n in variables.global_variables())
-    local_update_op = self._opt.apply_gradients(grads_and_vars)
-    global_new = set(n.op.name for n in variables.global_variables())
-
-    self._adjust_optimizer_variable_collection(global_new - global_old)
+    with variable_scope.variable_creator_scope(_variable_creator):
+      local_update_op = self._opt.apply_gradients(grads_and_vars)
 
     # a = a + grad
     update_ops = []
diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
index 4e2200fa1a..a2302d2f11 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
@@ -23,10 +23,11 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import variables
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
 from tensorflow.python.training import device_setter
-from tensorflow.python.training import momentum
+from tensorflow.python.training import adam
 from tensorflow.python.training import server_lib
 from tensorflow.python.training import training
 from tensorflow.python.training import training_util
@@ -100,7 +101,7 @@ def _get_workers(num_workers, period, workers, num_ps=1):
         grads_part_1 = constant_op.constant([[-1., -1., -1., -1.]])
 
         optimizer = \
-            momentum.MomentumOptimizer(learning_rate=0.1, momentum=0.0)
+            adam.AdamOptimizer(learning_rate=0.1, beta1=0.0, beta2=0.0)
         opt = AGNOptimizer(
             optimizer,
             num_worker=num_workers,
@@ -152,6 +153,13 @@ class AGNOptimizerTest(test.TestCase):
     var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
     var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
 
+    # verify adam/beta variables not in global collection
+    with graphs[0].as_default():
+      for ele in variables.global_variables():
+        self.assertTrue(ele.op.name.find('beta') < 0)
+        if ele.op.name.find('global_center_variable') < 0:
+          self.assertTrue(ele.op.name.find('Adam') < 0)
+
     # Verify the initialized value.
     self.assertAllEqual(0.0, sessions[0].run(var_0))
     self.assertAllEqual(0.5, sessions[0].run(var_1))
-- 
GitLab


From 540ca4a8755a3670920b49647860d085df834a00 Mon Sep 17 00:00:00 2001
From: weidankong <kongweidan84@gmail.com>
Date: Mon, 27 Aug 2018 17:03:47 -0700
Subject: [PATCH 0059/1357] AGN: fix Sanity test

---
 .../opt/python/training/agn_optimizer.py      | 19 +---------
 .../opt/python/training/agn_optimizer_test.py | 37 ++++++++++---------
 2 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py
index 8f415c75b9..9fb5be56e6 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py
@@ -19,7 +19,6 @@ from __future__ import print_function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
@@ -132,20 +131,6 @@ class AGNOptimizer(optimizer.Optimizer):
         name='local_step')
     self._opt._prepare()
 
-  def _adjust_optimizer_variable_collection(self, opt_vars):
-    """ Move optimizer created variables to local collection
-    """
-    g = ops.get_default_graph()
-    idx = 0
-    for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])):
-      var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx]
-      name = var.op.name
-      if name in opt_vars:
-        ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var)
-        del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx]
-      else:
-        idx += 1
-
   def apply_gradients(self, grads_and_vars, global_step=None, name=None):
     """Apply gradients to global variables.
 
@@ -182,7 +167,7 @@ class AGNOptimizer(optimizer.Optimizer):
     update_ops = []
     update_ops.append(local_update_op)
     grad_vars = [self._grad_map[var] for var in local_vars]
-    for g, grad_var in zip (grads, grad_vars):
+    for g, grad_var in zip(grads, grad_vars):
       update_ops.append(state_ops.assign_add(grad_var, g))
 
     global_center_vars = [self._global_map[var] for var in local_vars]
@@ -215,7 +200,7 @@ class AGNOptimizer(optimizer.Optimizer):
       return variable_update
 
     local_update = state_ops.assign_add(
-      self._local_step, 1, name='local_step_update').op
+        self._local_step, 1, name='local_step_update').op
 
     with ops.control_dependencies([local_update]):
       condition = math_ops.equal(
diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
index a2302d2f11..28732c2a1d 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
@@ -72,9 +72,9 @@ def _get_workers(num_workers, period, workers, num_ps=1):
     with graph.as_default():
       worker_device = "/job:worker/task:%d/cpu:0" % (worker_id)
       ps_device = device_setter.replica_device_setter(
-                  worker_device=worker_device,
-                  ps_device="/job:ps/task:0/cpu:0",
-                  ps_tasks=1)
+          worker_device=worker_device,
+          ps_device="/job:ps/task:0/cpu:0",
+          ps_tasks=1)
       agn_getter = AGNCustomGetter(worker_device=worker_device)
       with variable_scope.variable_scope(
           "", custom_getter=agn_getter), ops.device(ps_device):
@@ -82,7 +82,8 @@ def _get_workers(num_workers, period, workers, num_ps=1):
         var_0 = variable_scope.get_variable(initializer=0.0, name="v0")
         var_1 = variable_scope.get_variable(initializer=0.5, name="v1")
       if num_ps > 1:
-        with variable_scope.variable_scope("",
+        with variable_scope.variable_scope(
+            "",
             partitioner=partitioned_variables.fixed_size_partitioner(
                 num_ps, axis=0),
             custom_getter=agn_getter), ops.device(ps_device):
@@ -109,12 +110,12 @@ def _get_workers(num_workers, period, workers, num_ps=1):
             custom_getter=agn_getter)
         if num_ps == 1:
           train_op = [
-            opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]),
+              opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]),
                                 global_step)
           ]
         else:
           train_op = [
-            opt.apply_gradients(([grads_0, var_0],
+              opt.apply_gradients(([grads_0, var_0],
                                  [grads_1, var_1],
                                  [grads_part_0, part_0],
                                  [grads_part_1, part_1]),
@@ -232,20 +233,20 @@ class AGNOptimizerTest(test.TestCase):
     sessions[0].run(train_ops[0])
     self.assertNear(0.1, sessions[0].run(var_0_g), 1e-6)
     self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1],
-                            sessions[0].run(part_0_g),
-                            1e-6)
+                           sessions[0].run(part_0_g),
+                           1e-6)
     self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1],
-                            sessions[0].run(part_1_g),
-                            1e-6)
+                           sessions[0].run(part_1_g),
+                           1e-6)
 
     sessions[1].run(train_ops[1])
     self.assertNear(0.2, sessions[0].run(var_0_g), 1e-6)
     self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2],
-                            sessions[0].run(part_0_g),
-                            1e-6)
+                           sessions[0].run(part_0_g),
+                           1e-6)
     self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2],
-                            sessions[0].run(part_1_g),
-                            1e-6)
+                           sessions[0].run(part_1_g),
+                           1e-6)
 
     sessions[0].run(train_ops[0])
     sessions[1].run(train_ops[1])
@@ -254,11 +255,11 @@ class AGNOptimizerTest(test.TestCase):
     sessions[1].run(train_ops[1])
     self.assertNear(0.6, sessions[0].run(var_0_g), 1e-6)
     self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6],
-                            sessions[0].run(part_0_g),
-                            1e-6)
+                           sessions[0].run(part_0_g),
+                           1e-6)
     self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6],
-                            sessions[0].run(part_1_g),
-                            1e-6)
+                           sessions[0].run(part_1_g),
+                           1e-6)
 
   def testAGNCustomGetter(self):
     cluster_spec = server_lib.ClusterSpec({
-- 
GitLab


From 6b25c37daaa6a063b6b687252343db5453a84b8b Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Mon, 27 Aug 2018 19:15:36 -0700
Subject: [PATCH 0060/1357] Added new version of the bridge that supports
 deviceless operation.

---
 third_party/ngraph/ngraph_tf.BUILD | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index f40d2057e8..c1221cc385 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -26,9 +26,6 @@ cc_library(
         "src/ngraph_encapsulate_clusters.cc",
         "src/ngraph_freshness_tracker.h",
         "src/ngraph_freshness_tracker.cc",
-        # "src/ngraph_liberate_pass.cc",
-        # "src/ngraph_op_kernels.cc",
-        # "src/ngraph_stub_ops.cc",
         "src/ngraph_mark_for_clustering.h",
         "src/ngraph_mark_for_clustering.cc",
         "src/ngraph_rewrite_pass.cc",
@@ -37,8 +34,6 @@ cc_library(
         "src/ngraph_tracked_variable.cc",
         "src/ngraph_utils.h",
         "src/ngraph_utils.cc",
-        # "src/ngraph_send_recv_ops.cc",
-        # "src/ngraph_variable_ops.cc",
         "src/tf_graphcycles.cc",
         "logging/ngraph_log.h",
         "logging/ngraph_log.cc",
@@ -58,7 +53,6 @@ cc_library(
         "-I external/ngraph_tf/src",
         "-I external/ngraph_tf/logging",
         "-I external/ngraph/src",
-        #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
     ],
     alwayslink = 1,
     visibility = ["//visibility:public"],
@@ -83,7 +77,6 @@ tf_cc_test(
     ],
     extra_copts = [
         "-fexceptions ",
-        #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
         "-I external/ngraph_tf/src",
         "-I external/ngraph_tf/logging",
         "-I external/ngraph/src",
-- 
GitLab


From ccb1af57af2532dfee1af73899d1970ac7a263e4 Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Tue, 28 Aug 2018 12:33:41 +0900
Subject: [PATCH 0061/1357] update golden & pylint

---
 .../python/kernel_tests/extract_volume_patches_op_test.py     | 1 +
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt               | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
index 215474f6db..64757a3e07 100644
--- a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
+++ b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
@@ -54,6 +54,7 @@ class ExtractVolumePatches(test.TestCase):
           name="im2col_3d")
       self.assertAllClose(patches, out_tensor.eval())
 
+  # pylint: disable=bad-whitespace
   def testKsize1x1x1Stride1x1x1(self):
     """Verifies that for 1x1x1 kernel the output equals the input."""
     image = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6]) + 1
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 4f19627691..ba928eba9e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1060,6 +1060,10 @@ tf_module {
     name: "extract_image_patches"
     argspec: "args=[\'images\', \'ksizes\', \'strides\', \'rates\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "extract_volume_patches"
+    argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
-- 
GitLab


From eafc3914b0356e013b888fb103d20a76faf5ee5c Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Tue, 28 Aug 2018 20:49:09 +0900
Subject: [PATCH 0062/1357] change golden/v2/tensorflow.pbtxt

Running the API compatibility test only checks for pbtxt files under directory
v1. Manually added extract_volume_patches under v2 as extract_image_patches is
registered under v2 as well.
---
 tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 5eb42b4db3..f7e63978da 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1060,6 +1060,10 @@ tf_module {
     name: "extract_image_patches"
     argspec: "args=[\'images\', \'ksizes\', \'strides\', \'rates\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "extract_volume_patches"
+    argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
-- 
GitLab


From 40aee739c3d5c7aee63020f36b83aded09044efb Mon Sep 17 00:00:00 2001
From: weidankong <kongweidan84@gmail.com>
Date: Tue, 28 Aug 2018 10:09:13 -0700
Subject: [PATCH 0063/1357] AGN: fix sanity failure

---
 .../contrib/opt/python/training/agn_optimizer_test.py  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
index 28732c2a1d..fc291f829f 100644
--- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py
@@ -111,15 +111,15 @@ def _get_workers(num_workers, period, workers, num_ps=1):
         if num_ps == 1:
           train_op = [
               opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]),
-                                global_step)
+                                  global_step)
           ]
         else:
           train_op = [
               opt.apply_gradients(([grads_0, var_0],
-                                 [grads_1, var_1],
-                                 [grads_part_0, part_0],
-                                 [grads_part_1, part_1]),
-                                global_step)
+                                   [grads_1, var_1],
+                                   [grads_part_0, part_0],
+                                   [grads_part_1, part_1]),
+                                  global_step)
           ]
         hook = opt.make_session_run_hook(is_chief, worker_id)
       # Creates MonitoredSession
-- 
GitLab


From 66b27b0f4c3541268007b251885f8db424147e66 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Tue, 28 Aug 2018 18:46:45 -0700
Subject: [PATCH 0064/1357] Added comments.

---
 third_party/ngraph/ngraph.BUILD | 2 --
 third_party/ngraph/tbb.BUILD    | 5 ++++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index 3d9c3ac044..426d49c542 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -124,8 +124,6 @@ cc_library(
         "src/ngraph/pass/*.hpp",
         "src/ngraph/runtime/*.cpp",
         "src/ngraph/type/*.cpp",
-        #"src/ngraph/runtime/interpreter/*.cpp",
-        #"src/ngraph/runtime/interpreter/*.hpp",
     ]),
     deps = [
         ":ngraph_headers",
diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD
index 7c760cb3b3..e2096e48af 100644
--- a/third_party/ngraph/tbb.BUILD
+++ b/third_party/ngraph/tbb.BUILD
@@ -3,8 +3,11 @@ licenses(["notice"])  # 3-Clause BSD
 exports_files(["LICENSE"])
 
 # Taken from: https://github.com/rnburn/satyr/blob/master/bazel/tbb.BUILD
-# License: MIT
+# License for this BUILD file: MIT
 # See: https://github.com/rnburn/satyr/blob/master/LICENSE
+#
+# License for TBB: Apache 2.0 
+# See: https://github.com/01org/tbb/blob/tbb_2018/LICENSE
 
 genrule(
     name = "build_tbb",
-- 
GitLab


From e93a9f9ccfd9c7a2419bf3fc1d7866765bbcfce3 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 28 Aug 2018 18:55:51 -0700
Subject: [PATCH 0065/1357] Update GPU occupancy checking to utilize CUDA's
 occupancy calculator functions

-Replace references to the UnqueryableDeviceParams struct with calls to CUDA's built-in occupancy calculation functions
-Update calls to the occupancy checking functions with the new changes
-Changes should provide more long-term reliability and will remove the need to manually update hardcoded data values for new GPU architectures
---
 .../xla/service/gpu/partition_assignment.cc   |   9 +-
 .../stream_executor/cuda/cuda_gpu_executor.cc | 192 ++----------------
 .../stream_executor/device_description.cc     |  98 +++------
 .../stream_executor/device_description.h      |  73 ++-----
 4 files changed, 61 insertions(+), 311 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
index cf9f102d31..375f68a159 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
@@ -62,13 +62,8 @@ LaunchDimensions CalculateLaunchDimensions(
   //
   //   <num threads per block> * <max blocks per core> = <max threads per core>
 
-  auto threads_per_core = device_desc.threads_per_core_limit();
-  auto blocks_per_core = device_desc.blocks_per_core_limit();
-  int64 threads_per_block;
-  if (threads_per_core != 0 && blocks_per_core != 0) {
-    threads_per_block = device_desc.threads_per_core_limit() /
-                        device_desc.blocks_per_core_limit();
-  } else {
+  int64 threads_per_block = device_desc.threads_per_block_limit();
+  if (threads_per_block == 0) {
     static std::atomic<int64> log_count{0};
     if (log_count.fetch_add(1) < 8) {
       LOG(WARNING) << "Attempting to calculate launch dimensions for GPU "
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index e30f50ea2a..39b0696c93 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -467,33 +467,26 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
     return;
   }
 
+  int block_size = thread_dims.x * thread_dims.y * thread_dims.z;
+
   const DeviceDescription &device_description =
       kernel.parent()->GetDeviceDescription();
 
-  uint64 blocks_per_sm = CalculateOccupancy(
-      device_description, regs_per_thread, smem_per_block, thread_dims);
-  VLOG(2) << "Resident blocks per SM is " << blocks_per_sm;
+  const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel);
+  CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue();
 
-  // To increase occupancy, there must be a sufficient number of blocks
-  // available to spread across the sm's at this new improved occupancy level.
-  int multiprocessor_count = device_description.core_count();
-  int block_count = block_dims.x * block_dims.y * block_dims.z;
-  int available_blocks_per_sm =
-      port::MathUtil::CeilOfRatio(block_count, multiprocessor_count);
-  if (available_blocks_per_sm <= static_cast<int64>(blocks_per_sm)) {
-    VLOG(2) << "Occupancy is limited by number of blocks available per sm.";
-    return;
-  }
+  int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread,
+                                         smem_per_block, thread_dims, cufunc);
+  VLOG(2) << "Resident blocks per SM is " << blocks_per_sm;
 
-  uint64 improved_regs_per_thread = CalculateRegisterLimitForTargetOccupancy(
-      device_description, smem_per_block, thread_dims, blocks_per_sm + 1);
-  if (improved_regs_per_thread != 0) {
-    VLOG(2) << "Reducing register usage from " << regs_per_thread
-            << " to " << improved_regs_per_thread
-            << " could increase resident blocks per SM by one.";
-  } else {
-    VLOG(2) << "Resident blocks per SM cannot be increased by reducing "
-        "register usage.";
+  int suggested_threads =
+      CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread,
+                       smem_per_block, thread_dims, cufunc);
+  if (suggested_threads != 0) {
+    VLOG(2) << "The cuda occupancy calculator reccommends using "
+            << suggested_threads
+            << " threads per block to acheive an occupancy of " << blocks_per_sm
+            << " blocks per SM.";
   }
 }
 
@@ -980,144 +973,6 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
 #endif
 }
 
-// Set of compute capability specific device parameters that cannot be
-// queried from the driver API.  These values instead are baked into a
-// lookup table indexed by compute capability version.
-struct UnqueryableDeviceParams {
-  int cc_major;
-  int cc_minor;
-  uint64 blocks_per_core_limit;
-  uint64 registers_per_core_limit;
-  uint64 registers_per_thread_limit;
-  uint64 warp_alloc_granularity;
-  uint64 register_alloc_granularity;
-  uint64 shared_memory_alloc_granularity;
-};
-
-// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
-// https://developer.download.nvidia.com/compute/cuda/CUDA_Occupancy_calculator.xls
-static const UnqueryableDeviceParams kAllUnqueryableDeviceParams[] = {
-    {
-        2, 0,       // compute capability (2.0)
-        8,          // blocks_per_core_limit
-        32 * 1024,  // registers_per_core_limit
-        63,         // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        64,         // register_alloc_granularity
-        128,        // shared_memory_alloc_granularity
-    },
-    {
-        2, 1,       // compute capability (2.1)
-        8,          // blocks_per_core_limit
-        32 * 1024,  // registers_per_core_limit
-        63,         // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        64,         // register_alloc_granularity
-        128,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 0,       // compute capability (3.0)
-        16,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        63,         // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 2,       // compute capability (3.2)
-        16,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 5,       // compute capability (3.5)
-        16,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 7,        // compute capability (3.7)
-        16,          // blocks_per_core_limit
-        128 * 1024,  // registers_per_core_limit
-        255,         // registers_per_thread_limit
-        4,           // warp_alloc_granularity
-        256,         // register_alloc_granularity
-        256,         // shared_memory_alloc_granularity
-    },
-    {
-        5, 0,       // compute capability (5.0)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        5, 2,       // compute capability (5.2)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        5, 3,       // compute capability (5.3)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        6, 0,       // compute capability (6.0)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        6, 1,       // compute capability (6.1)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        6, 2,       // compute capability (6.2)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    // TODO(jlebar): Confirm the alloc granularity values for sm_70.  These are
-    // not published in the spreadsheet linked above.  Currently we guess that
-    // they're the same as sm_60.
-    {
-        7, 0,       // compute capability (7.0)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-};
 
 DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
   internal::DeviceDescriptionBuilder builder;
@@ -1193,19 +1048,6 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
     builder.set_name(device_name);
   }
 
-  for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) {
-    const auto &params = kAllUnqueryableDeviceParams[i];
-    if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) {
-      builder.set_blocks_per_core_limit(params.blocks_per_core_limit);
-      builder.set_registers_per_core_limit(params.registers_per_core_limit);
-      builder.set_registers_per_thread_limit(params.registers_per_thread_limit);
-      builder.set_warp_alloc_granularity(params.warp_alloc_granularity);
-      builder.set_register_alloc_granularity(params.register_alloc_granularity);
-      builder.set_shared_memory_alloc_granularity(
-          params.shared_memory_alloc_granularity);
-    }
-  }
-
   builder.set_platform_version(
       port::StrCat("Compute Capability ", cc_major_, ".", cc_minor_));
 
@@ -1227,6 +1069,10 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
       CUDADriver::GetMaxRegistersPerBlock(device_).ValueOrDie());
   builder.set_threads_per_warp(
       CUDADriver::GetThreadsPerWarp(device_).ValueOrDie());
+  builder.set_registers_per_core_limit(
+      CUDADriver::GetDeviceAttribute(
+          CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, device_)
+          .ValueOrDie());
 
   auto built = builder.Build();
   return built.release();
diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc
index 8ca0677f8a..df52ce6cce 100644
--- a/tensorflow/stream_executor/device_description.cc
+++ b/tensorflow/stream_executor/device_description.cc
@@ -37,16 +37,11 @@ DeviceDescription::DeviceDescription()
                         kUninitializedUint64),
       block_dim_limit_(kUninitializedUint64, kUninitializedUint64,
                        kUninitializedUint64),
-      blocks_per_core_limit_(kUninitializedUint64),
       threads_per_core_limit_(kUninitializedUint64),
       threads_per_block_limit_(kUninitializedUint64),
       threads_per_warp_(kUninitializedUint64),
       registers_per_core_limit_(kUninitializedUint64),
       registers_per_block_limit_(kUninitializedUint64),
-      registers_per_thread_limit_(kUninitializedUint64),
-      warp_alloc_granularity_(1),
-      register_alloc_granularity_(1),
-      shared_memory_alloc_granularity_(1),
       device_address_bits_(kUninitializedUint64),
       device_memory_size_(kUninitializedUint64),
       memory_bandwidth_(kUninitializedUint64),
@@ -162,75 +157,36 @@ static uint64 RoundDown(uint64 value, uint64 n) {
   return port::MathUtil::FloorOfRatio(value, n) * n;
 }
 
-uint64 CalculateOccupancy(const DeviceDescription &device_description,
-                          uint64 registers_per_thread,
-                          uint64 shared_memory_per_block,
-                          const ThreadDim &thread_dims) {
-  // Don't try to compute occupancy if necessary values are not initialized.
-  uint64 required_fields[] =  { device_description.registers_per_thread_limit(),
-                                device_description.threads_per_warp(),
-                                device_description.warp_alloc_granularity(),
-                                device_description.register_alloc_granularity(),
-                                device_description.registers_per_block_limit(),
-                                device_description.shared_memory_per_core(),
-                                device_description.blocks_per_core_limit() };
-  for (auto value : required_fields) {
-    if (value == kUninitializedUint64) {
-      return 0;
-    }
-  }
-
-  if (registers_per_thread > device_description.registers_per_thread_limit()) {
-    return 0;
-  }
-
-  uint64 warps_per_block =
-      port::MathUtil::CeilOfRatio(thread_dims.x * thread_dims.y * thread_dims.z,
-                                  device_description.threads_per_warp());
-
-  // Warp resources are allocated at a particular granularity.  This value is
-  // the effective number of warps for resource allocation purposes.
-  uint64 alloc_warps_per_block =
-      RoundUp(warps_per_block, device_description.warp_alloc_granularity());
-
-  uint64 alloc_regs_per_warp =
-      RoundUp(device_description.threads_per_warp() * registers_per_thread,
-              device_description.register_alloc_granularity());
-  uint64 regs_per_block = alloc_warps_per_block * alloc_regs_per_warp;
-  uint64 reg_limit =
-      device_description.registers_per_block_limit() / regs_per_block;
-
-  uint64 alloc_smem_per_block = RoundUp(
-      shared_memory_per_block,
-      device_description.shared_memory_alloc_granularity());
-  uint64 smem_limit = alloc_smem_per_block > 0 ?
-      device_description.shared_memory_per_core() / alloc_smem_per_block :
-      device_description.blocks_per_core_limit();
-
-  uint64 thread_limit = device_description.threads_per_core_limit()
-      / (warps_per_block  * device_description.threads_per_warp());
-
-  return std::min({ device_description.blocks_per_core_limit(),
-          reg_limit, smem_limit, thread_limit });
+int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  return suggested_blocks;
 }
 
-uint64 CalculateRegisterLimitForTargetOccupancy(
-    const DeviceDescription &device_description, uint64 shared_memory_per_block,
-    const ThreadDim &thread_dims, uint64 target_blocks_per_core) {
-  // Linear search from maximum number of registers down until the target
-  // blocks per SM is found.
-  // TODO(meheff): Compute this using a closed form solution.
-  int reg_step = device_description.register_alloc_granularity() /
-      device_description.threads_per_warp();
-  for (int r = device_description.registers_per_thread_limit(); r > 0;
-       r = RoundDown(r - 1, reg_step)) {
-    uint64 occupancy = CalculateOccupancy(
-        device_description, r, shared_memory_per_block, thread_dims);
-    if (occupancy >= target_blocks_per_core) {
-      return r;
-    }
+int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  if (suggested_blocks > *initial_blocks) {
+    *initial_blocks = suggested_blocks;
+    return suggested_threads;
+  } else {
+    return 0;
   }
-  return 0;
 }
 
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h
index 7f99d81ef3..d335b9b875 100644
--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include <memory>
 #include "tensorflow/stream_executor/platform/port.h"
 
+#include "tensorflow/stream_executor/cuda/cuda_driver.h"
 #include "tensorflow/stream_executor/launch_dim.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
@@ -79,10 +80,6 @@ class DeviceDescription {
   // legitimate kernel launch request.
   const BlockDim &block_dim_limit() const { return block_dim_limit_; }
 
-  // Returns the limit on the number of simultaneously resident blocks
-  // on a multiprocessor.
-  uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; }
-
   // Returns the limit on the total number of threads that can be launched in a
   // single block; i.e. the limit on x * y * z dimensions of a ThreadDim.
   // This limit affects what constitutes a legitimate kernel launch request.
@@ -110,27 +107,6 @@ class DeviceDescription {
     return registers_per_block_limit_;
   }
 
-  // Returns the limit on the total number of registers that can be
-  // allocated to a thread.
-  const uint64 &registers_per_thread_limit() const {
-    return registers_per_thread_limit_;
-  }
-
-  // Returns the granularity at which warps are allocated resources.
-  const uint64 &warp_alloc_granularity() const {
-    return warp_alloc_granularity_;
-  }
-
-  // Returns the granularity at which registers are allocated to warps.
-  const uint64 &register_alloc_granularity() const {
-    return register_alloc_granularity_;
-  }
-
-  // Returns the granularity at which shared memory is allocated to warps.
-  const uint64 &shared_memory_alloc_granularity() const {
-    return shared_memory_alloc_granularity_;
-  }
-
   // Returns the number of address bits available to kernel code running on the
   // platform. This affects things like the maximum allocation size and perhaps
   // types used in kernel code such as size_t.
@@ -200,19 +176,12 @@ class DeviceDescription {
   ThreadDim thread_dim_limit_;
   BlockDim block_dim_limit_;
 
-  uint64 blocks_per_core_limit_;
-
   uint64 threads_per_core_limit_;
   uint64 threads_per_block_limit_;
   uint64 threads_per_warp_;
 
   uint64 registers_per_core_limit_;
   uint64 registers_per_block_limit_;
-  uint64 registers_per_thread_limit_;
-
-  uint64 warp_alloc_granularity_;
-  uint64 register_alloc_granularity_;
-  uint64 shared_memory_alloc_granularity_;
 
   uint64 device_address_bits_;
   uint64 device_memory_size_;
@@ -270,10 +239,6 @@ class DeviceDescriptionBuilder {
     device_description_->block_dim_limit_ = value;
   }
 
-  void set_blocks_per_core_limit(uint64 value) {
-    device_description_->blocks_per_core_limit_ = value;
-  }
-
   void set_threads_per_core_limit(uint64 value) {
     device_description_->threads_per_core_limit_ = value;
   }
@@ -290,19 +255,6 @@ class DeviceDescriptionBuilder {
   void set_registers_per_block_limit(uint64 value) {
     device_description_->registers_per_block_limit_ = value;
   }
-  void set_registers_per_thread_limit(uint64 value) {
-    device_description_->registers_per_thread_limit_ = value;
-  }
-
-  void set_warp_alloc_granularity(uint64 value) {
-    device_description_->warp_alloc_granularity_ = value;
-  }
-  void set_register_alloc_granularity(uint64 value) {
-    device_description_->register_alloc_granularity_ = value;
-  }
-  void set_shared_memory_alloc_granularity(uint64 value) {
-    device_description_->shared_memory_alloc_granularity_ = value;
-  }
 
   void set_device_address_bits(uint64 value) {
     device_description_->device_address_bits_ = value;
@@ -375,17 +327,18 @@ void CalculateDimensionality(const DeviceDescription &device_description,
 // Compute and return maximum blocks per core (occupancy) based on the
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
-uint64 CalculateOccupancy(const DeviceDescription &device_description,
-                          uint64 registers_per_thread,
-                          uint64 shared_memory_per_block,
-                          const ThreadDim &thread_dims);
-
-// Compute and return the maximum number of registers per thread which
-// achieves the target occupancy.  If the target is not possible then
-// zero is returned.
-uint64 CalculateRegisterLimitForTargetOccupancy(
-    const DeviceDescription &device_description, uint64 shared_memory_per_block,
-    const ThreadDim &thread_dims, uint64 target_blocks_per_core);
+int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func);
+
+// Compute and return the suggested thread count to acheive ideal occupancy.
+// If the provided thread dimensions match this number, zero is returned.
+int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func);
 
 }  // namespace stream_executor
 
-- 
GitLab


From 4e72dd865a3fc83baa69f6b7c08720a1b546a464 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 29 Aug 2018 17:05:43 +0800
Subject: [PATCH 0066/1357] Refine LeakyRelu codes.

1. Add C++ gradient of gradient definition of LeakyReLu and revalant UT.
2. Using forward compatibility layer for python code changes.
---
 tensorflow/cc/gradients/nn_grad.cc            | 18 ++++-
 tensorflow/cc/gradients/nn_grad_test.cc       | 16 +++++
 .../python/kernel_tests/relu_op_test.py       | 70 ++++++++++---------
 tensorflow/python/ops/nn_ops.py               |  5 +-
 4 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 0fc23d0bf7..2a32a2ed6f 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -149,13 +149,27 @@ Status LeakyReluGradHelper(const Scope& scope, const Operation& op,
   float alpha;
   TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha));
   internal::LeakyReluGrad::Attrs attrs;
-  attrs.Alpha(alpha);
-  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs);
+  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0),
+                                    attrs.Alpha(alpha));
   grad_outputs->push_back(dx);
   return scope.status();
 }
 REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper);
 
+Status LeakyReluGradGradHelper(const Scope& scope, const Operation& op,
+                               const std::vector<Output>& grad_inputs,
+                               std::vector<Output>* grad_outputs) {
+  float alpha;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha));
+  internal::LeakyReluGrad::Attrs attrs;
+  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(1),
+                                    attrs.Alpha(alpha));
+  grad_outputs->push_back(dx);
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("LeakyReluGrad", LeakyReluGradGradHelper);
+
 Status EluGradHelper(const Scope& scope, const Operation& op,
                      const std::vector<Output>& grad_inputs,
                      std::vector<Output>* grad_outputs) {
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index 5ebece7b6e..bf0db1f59d 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include "tensorflow/cc/framework/gradient_checker.h"
 #include "tensorflow/cc/framework/testutil.h"
 #include "tensorflow/cc/gradients/grad_testutil.h"
+#include "tensorflow/cc/ops/nn_ops_internal.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -173,6 +174,21 @@ TEST_F(NNGradTest, LeakyReluGrad) {
   RunTest(x, x_init_value, y, shape);
 }
 
+TEST_F(NNGradTest, LeakyReluGradGrad) {
+  TensorShape shape({5, 2});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+  // Avoid input values where Leaky ReLU gradient is not well defined (around
+  // zero).
+  Tensor x_init_value = test::AsTensor<float>(
+      {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f},
+      {5, 2});
+  Tensor features = test::AsTensor<float>(
+      {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f},
+      {5, 2});
+  auto y = ops::internal::LeakyReluGrad(scope_, x, features);
+  RunTest(x, x_init_value, y, shape);
+}
+
 TEST_F(NNGradTest, EluGrad) {
   TensorShape shape({5, 2});
   auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index ccb3a231bb..7066f28883 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -283,8 +284,9 @@ class LeakyReluTest(test.TestCase):
             np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
             alpha=0.1, use_gpu=True)
 
-  # The gradient test for ReLU is a bit tricky as the derivative is not well
-  # defined at around zero and we want to avoid that in terms of input values.
+  # The gradient test for Leaky ReLU is a bit tricky as the derivative is not
+  # well defined at around zero and we want to avoid that in terms of input
+  # values.
   def testGradientFloat32(self):
     with self.test_session():
       x = constant_op.constant(
@@ -319,39 +321,41 @@ class LeakyReluTest(test.TestCase):
     self.assertLess(err, 1e-10)
 
   def testGradGradFloat32(self):
-    with self.test_session():
-      x = constant_op.constant(
-          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-          shape=[2, 5],
-          name="x")
-      y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
-      z = gradients_impl.gradients(y, x)
-      x_init = np.asarray(
-          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-          dtype=np.float32,
-          order="F")
-      err = gradient_checker.compute_gradient_error(
-          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
-    print("leaky_relu (float32) gradient of gradient err = ", err)
-    self.assertLess(err, 1e-4)
+    with compat.forward_compatibility_horizon(2018, 10, 2):
+      with self.test_session():
+	x = constant_op.constant(
+	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+	    shape=[2, 5],
+	    name="x")
+	y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+	z = gradients_impl.gradients(y, x)
+	x_init = np.asarray(
+	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+	    dtype=np.float32,
+	    order="F")
+	err = gradient_checker.compute_gradient_error(
+	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+      print("leaky_relu (float32) gradient of gradient err = ", err)
+      self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with self.test_session():
-      x = constant_op.constant(
-          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-          shape=[2, 5],
-          dtype=dtypes.float64,
-          name="x")
-      y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
-      z = gradients_impl.gradients(y, x)
-      x_init = np.asarray(
-          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-          dtype=np.float64,
-          order="F")
-      err = gradient_checker.compute_gradient_error(
-          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
-    print("leaky_relu (float64) gradient of gradient err = ", err)
-    self.assertLess(err, 1e-10)
+    with compat.forward_compatibility_horizon(2018, 10, 2):
+      with self.test_session():
+	x = constant_op.constant(
+	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+	    shape=[2, 5],
+	    dtype=dtypes.float64,
+	    name="x")
+	y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
+	z = gradients_impl.gradients(y, x)
+	x_init = np.asarray(
+	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+	    dtype=np.float64,
+	    order="F")
+	err = gradient_checker.compute_gradient_error(
+	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+      print("leaky_relu (float64) gradient of gradient err = ", err)
+      self.assertLess(err, 1e-10)
 
   def testGradientScalar(self):
     with self.test_session() as sess:
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 31b8f3945d..52ea202636 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1601,7 +1601,10 @@ def leaky_relu(features, alpha=0.2, name=None):
     features = ops.convert_to_tensor(features, name="features")
     if features.dtype.is_integer:
       features = math_ops.to_float(features)
-    return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
+    if compat.forward_compatible(2018, 10, 1):
+      return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
+    alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha")
+    return math_ops.maximum(alpha * features, features, name=name)
 
 
 def _flatten_outer_dims(logits):
-- 
GitLab


From bb45e28b207f9a0d56f1b4a0d372b267e216ad04 Mon Sep 17 00:00:00 2001
From: Naurril <naurril@gmail.com>
Date: Wed, 29 Aug 2018 22:45:38 +0800
Subject: [PATCH 0067/1357] Code formatted

---
 tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
index 0f853ae52a..6af4ca4d96 100644
--- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
+++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
@@ -51,7 +51,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass {
     for (Node* n : matches) {
       AttrSlice n_attrs = n->attrs();
       auto base_make_node = [n, &n_attrs](const string& op,
-                                             const string& name) {
+                                          const string& name) {
         NodeBuilder node_builder(name, op);
         node_builder.Device(n->requested_device());
         string colo;
-- 
GitLab


From 1b166c7e6f30bf7179f31764b3615e63025a7472 Mon Sep 17 00:00:00 2001
From: "Wen-Heng (Jack) Chung" <whchung@gmail.com>
Date: Fri, 20 Jul 2018 19:03:55 +0000
Subject: [PATCH 0068/1357] Rename CUDA GPU ID to platform GPU ID

Rename CUDA GPU ID to platform GPU ID so the notion is applicable on both CUDA
and ROCm platform.
---
 .../contrib/tensorrt/convert/convert_graph.cc |   8 +-
 .../contrib/tensorrt/kernels/trt_engine_op.cc |  13 +-
 .../common_runtime/gpu/gpu_bfc_allocator.cc   |  11 +-
 .../common_runtime/gpu/gpu_bfc_allocator.h    |   6 +-
 .../gpu/gpu_bfc_allocator_test.cc             |  30 +--
 .../gpu/gpu_cudamalloc_allocator.cc           |   5 +-
 .../gpu/gpu_cudamalloc_allocator.h            |   2 +-
 .../common_runtime/gpu/gpu_debug_allocator.cc |  10 +-
 .../common_runtime/gpu/gpu_debug_allocator.h  |   4 +-
 .../gpu/gpu_debug_allocator_test.cc           |  59 ++---
 .../core/common_runtime/gpu/gpu_device.cc     | 224 ++++++++++--------
 .../core/common_runtime/gpu/gpu_device.h      |  22 +-
 .../common_runtime/gpu/gpu_device_test.cc     |  19 +-
 tensorflow/core/common_runtime/gpu/gpu_id.h   |  32 +--
 .../core/common_runtime/gpu/gpu_id_manager.cc |  38 +--
 .../core/common_runtime/gpu/gpu_id_manager.h  |  12 +-
 .../common_runtime/gpu/gpu_id_manager_test.cc |  32 +--
 .../core/common_runtime/gpu/gpu_id_utils.h    |  37 +--
 .../common_runtime/gpu/gpu_process_state.cc   |  15 +-
 .../core/grappler/clusters/single_machine.cc  |   6 +-
 tensorflow/core/grappler/clusters/utils.cc    |  13 +-
 tensorflow/core/grappler/clusters/utils.h     |   2 +-
 .../core/grappler/clusters/utils_test.cc      |  22 +-
 tensorflow/core/grappler/costs/utils.cc       |   8 +-
 tensorflow/core/protobuf/config.proto         |   2 +-
 25 files changed, 333 insertions(+), 299 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index b019c99882..f29f4d6deb 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -780,12 +780,12 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
     // If device is not set, use the first found GPU device for the conversion.
     for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
       TfGpuId tf_gpu_id(tf_gpu_id_value);
-      CudaGpuId cuda_gpu_id;
-      Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+      PlatformGpuId platform_gpu_id;
+      Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
       if (s.ok()) {
         VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
-                << cuda_gpu_id.value();
-        cuda_device_id = cuda_gpu_id.value();
+                << platform_gpu_id.value();
+        cuda_device_id = platform_gpu_id.value();
         GPUOptions gpu_options;
         // If the TF to Cuda gpu id mapping exist, the device and corresponding
         // allocator must have been initialized already, so the
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
index 2b42d81f47..88cf8d5980 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
@@ -565,21 +565,22 @@ tensorflow::Status TRTEngineOp::AllocateCalibrationResources(
       new TRTInt8Calibrator(device_buffers_, batch_size, name()));
   const string label(name());
   auto segment_graph = &segment_graph_;
-  const int cuda_gpu_id = ctx->device()->tensorflow_gpu_device_info()->gpu_id;
-  if (cuda_gpu_id < 0) {
+  const int platform_gpu_id =
+      ctx->device()->tensorflow_gpu_device_info()->gpu_id;
+  if (platform_gpu_id < 0) {
     LOG(ERROR) << "Can't get gpu_device_info from context->device()";
     return tensorflow::errors::InvalidArgument(
         "Context->device doesn't contain device info!");
   }
   const int64 workspace_size_bytes = workspace_size_;
   cres->thr_.reset(new std::thread([cres, label, segment_graph, shapes,
-                                    cuda_gpu_id, workspace_size_bytes]() {
-    VLOG(0) << "Starting calibration thread on device " << cuda_gpu_id
+                                    platform_gpu_id, workspace_size_bytes]() {
+    VLOG(0) << "Starting calibration thread on device " << platform_gpu_id
             << ", Calibration Resource @ " << cres;
-    auto err = cudaSetDevice(cuda_gpu_id);
+    auto err = cudaSetDevice(platform_gpu_id);
     if (err != cudaSuccess) {
       // TODO(aaroey): should return error here.
-      LOG(ERROR) << "Couldn't set cuda device to " << cuda_gpu_id
+      LOG(ERROR) << "Couldn't set cuda device to " << platform_gpu_id
                  << " in calibration thread";
     }
     // ConvertGraphDefToEngine() will try to build the engine. This thread
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 2d4c8d0201..c8db384b64 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -22,16 +22,17 @@ limitations under the License.
 
 namespace tensorflow {
 
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
-                                 const string& name)
-    : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {}
+GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id,
+                                 size_t total_memory, const string& name)
+    : GPUBFCAllocator(platform_gpu_id, total_memory, GPUOptions(), name) {}
 
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id,
+                                 size_t total_memory,
                                  const GPUOptions& gpu_options,
                                  const string& name)
     : BFCAllocator(
           new GPUMemAllocator(
-              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
+              GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
               gpu_options.per_process_gpu_memory_fraction() > 1.0 ||
                   gpu_options.experimental().use_unified_memory()),
           total_memory, gpu_options.allow_growth(), name) {}
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index f1cc2eace1..435ffb4959 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -35,11 +35,11 @@ namespace tensorflow {
 // algorithm.
 class GPUBFCAllocator : public BFCAllocator {
  public:
-  // 'cuda_gpu_id' refers to the ID of the GPU device within
+  // 'platform_gpu_id' refers to the ID of the GPU device within
   // the process and must reference a valid ID in the process.
-  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+  GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory,
                   const string& name);
-  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+  GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory,
                   const GPUOptions& gpu_options, const string& name);
   virtual ~GPUBFCAllocator() {}
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 67caeb3495..518ccba580 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -46,7 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
 }
 
 TEST(GPUBFCAllocatorTest, NoDups) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   // Allocate a lot of raw pointers
@@ -75,7 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
   // Allocate 256 raw pointers of sizes between 100 bytes and about
   // a meg
   random::PhiloxRandom philox(123, 17);
@@ -133,7 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
 }
 
 TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   float* first_ptr = a.Allocate<float>(1024);
@@ -168,18 +168,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
   float* ptr = a.Allocate<float>(0);
   EXPECT_EQ(nullptr, ptr);
 }
 
 TEST(GPUBFCAllocatorTest, TracksSizes) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
@@ -188,7 +188,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
 
 TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
   // Configure a 1MiB byte limit
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 20, "GPU_0_bfc");
 
   float* first_ptr = a.Allocate<float>(1 << 6);
   float* second_ptr = a.Allocate<float>(1 << 20);
@@ -203,7 +203,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
   options.set_allow_growth(true);
 
   // Max of 2GiB, but starts out small.
-  GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1LL << 31, options, "GPU_0_bfc");
 
   // Allocate 10 raw pointers of sizes between 100 bytes and about
   // 64 megs.
@@ -264,8 +264,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
 }
 
 TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
-  GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
-  GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc");
+  GPUBFCAllocator b(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc");
   void* amem = a.AllocateRaw(1, 1);
   void* bmem = b.AllocateRaw(1, 1 << 30);
   a.DeallocateRaw(amem);
@@ -273,7 +273,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
 }
 
 static void BM_Allocation(int iters) {
-  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<size_t> sizes = {256,        4096,      16384,    524288,
                                512,        1048576,   10485760, 104857600,
@@ -289,7 +289,7 @@ static void BM_Allocation(int iters) {
 BENCHMARK(BM_Allocation);
 
 static void BM_AllocationThreaded(int iters, int num_threads) {
-  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc");
   thread::ThreadPool pool(Env::Default(), "test", num_threads);
   std::atomic_int_fast32_t count(iters);
   mutex done_lock;
@@ -325,7 +325,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16);
 // A more complex benchmark that defers deallocation of an object for
 // "delay" allocations.
 static void BM_AllocationDelayed(int iters, int delay) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<int> sizes = {256, 4096, 16384, 4096, 512, 1024, 1024};
   int size_index = 0;
@@ -363,7 +363,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   // only methods inside this class can access private members of BFCAllocator.
 
   void TestBinDebugInfo() {
-    GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+    GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
 
     std::vector<void*> initial_ptrs;
     std::vector<size_t> initial_ptrs_allocated_sizes;
@@ -441,7 +441,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   }
 
   void TestLog2FloorNonZeroSlow() {
-    GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
+    GPUBFCAllocator a(PlatformGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
     EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0));
     EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1));
     EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
index 934a57a5fb..553a5628ad 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -28,9 +28,10 @@ limitations under the License.
 namespace tensorflow {
 
 GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
-                                               CudaGpuId cuda_gpu_id)
+                                               PlatformGpuId platform_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+  stream_exec_ =
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 }
 
 GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
index 856fdc34b4..8f38cc5a18 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -32,7 +32,7 @@ namespace tensorflow {
 class GPUcudaMallocAllocator : public VisitableAllocator {
  public:
   explicit GPUcudaMallocAllocator(VisitableAllocator* allocator,
-                                  CudaGpuId cuda_gpu_id);
+                                  PlatformGpuId platform_gpu_id);
   ~GPUcudaMallocAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index e4c834b30d..badb021aa5 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@@ -74,9 +74,10 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
 // GPUDebugAllocator
 // -----------------------------------------------------------------------------
 GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator,
-                                     CudaGpuId cuda_gpu_id)
+                                     PlatformGpuId platform_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+  stream_exec_ =
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 }
 
 GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; }
@@ -159,9 +160,10 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
 // GPUNanResetAllocator
 // -----------------------------------------------------------------------------
 GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator,
-                                           CudaGpuId cuda_gpu_id)
+                                           PlatformGpuId platform_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+  stream_exec_ =
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 }
 
 GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 0f9b72040c..9e007ed8c1 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@@ -34,7 +34,7 @@ namespace tensorflow {
 class GPUDebugAllocator : public VisitableAllocator {
  public:
   explicit GPUDebugAllocator(VisitableAllocator* allocator,
-                             CudaGpuId cuda_gpu_id);
+                             PlatformGpuId platform_gpu_id);
   ~GPUDebugAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
@@ -66,7 +66,7 @@ class GPUDebugAllocator : public VisitableAllocator {
 class GPUNanResetAllocator : public VisitableAllocator {
  public:
   explicit GPUNanResetAllocator(VisitableAllocator* allocator,
-                                CudaGpuId cuda_gpu_id);
+                                PlatformGpuId platform_gpu_id);
   ~GPUNanResetAllocator() override;
   string Name() override { return "gpu_nan_reset"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
index 236a0afa0b..bc3e3a8c35 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@@ -34,10 +34,11 @@ namespace tensorflow {
 namespace {
 
 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
-  const CudaGpuId cuda_gpu_id(0);
-  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
-                      cuda_gpu_id);
-  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+  const PlatformGpuId platform_gpu_id(0);
+  GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+                      platform_gpu_id);
+  auto stream_exec =
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 
   for (int s : {8}) {
     std::vector<int64> cpu_array(s);
@@ -58,11 +59,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
   for (int s : {8, 211}) {
     EXPECT_DEATH(
         {
-          const CudaGpuId cuda_gpu_id(0);
-          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
-                              cuda_gpu_id);
+          const PlatformGpuId platform_gpu_id(0);
+          GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+                              platform_gpu_id);
           auto stream_exec =
-              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+              GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 
           std::vector<int64> cpu_array(s);
           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -91,11 +92,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
   for (int s : {8, 22}) {
     EXPECT_DEATH(
         {
-          const CudaGpuId cuda_gpu_id(0);
-          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
-                              cuda_gpu_id);
+          const PlatformGpuId platform_gpu_id(0);
+          GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+                              platform_gpu_id);
           auto stream_exec =
-              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+              GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 
           std::vector<int64> cpu_array(s);
           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -121,10 +122,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
 }
 
 TEST(GPUDebugAllocatorTest, ResetToNan) {
-  const CudaGpuId cuda_gpu_id(0);
-  GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
-                         cuda_gpu_id);
-  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+  const PlatformGpuId platform_gpu_id(0);
+  GPUNanResetAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+                         platform_gpu_id);
+  auto stream_exec =
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 
   std::vector<float> cpu_array(1024);
   std::vector<float> cpu_array_result(1024);
@@ -161,13 +163,14 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
 }
 
 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
-  const CudaGpuId cuda_gpu_id(0);
+  const PlatformGpuId platform_gpu_id(0);
   // NaN reset must be the outer-most allocator.
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
-                            cuda_gpu_id),
-      cuda_gpu_id);
-  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+      new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+                            platform_gpu_id),
+      platform_gpu_id);
+  auto stream_exec =
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
 
   std::vector<float> cpu_array(1024);
   std::vector<float> cpu_array_result(1024);
@@ -204,18 +207,18 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
 }
 
 TEST(GPUDebugAllocatorTest, TracksSizes) {
-  const CudaGpuId cuda_gpu_id(0);
-  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
-                      cuda_gpu_id);
+  const PlatformGpuId platform_gpu_id(0);
+  GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+                      platform_gpu_id);
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
-  const CudaGpuId cuda_gpu_id(0);
+  const PlatformGpuId platform_gpu_id(0);
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
-                            cuda_gpu_id),
-      cuda_gpu_id);
+      new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+                            platform_gpu_id),
+      platform_gpu_id);
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 2763ac0d4a..4bf23bc017 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -105,9 +105,9 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
         reinterpret_cast<unsigned int*>(scratch + Eigen::kCudaScratchSize);
     stream_ = cuda_stream;
     allocator_ = alloc;
-    CudaGpuId cuda_gpu_id;
-    TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
-    device_prop_ = &Eigen::m_deviceProperties[cuda_gpu_id.value()];
+    PlatformGpuId platform_gpu_id;
+    TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
+    device_prop_ = &Eigen::m_deviceProperties[platform_gpu_id.value()];
   }
 
   const cudaStream_t& stream() const override { return *stream_; }
@@ -332,9 +332,10 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   gpu_device_info_->stream = streams_[0]->compute;
   gpu_device_info_->default_context = device_contexts_[0];
   gpu_device_info_->event_mgr = em_.get();
-  CudaGpuId cuda_gpu_id;
-  TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id));
-  gpu_device_info_->gpu_id = cuda_gpu_id.value();
+  PlatformGpuId platform_gpu_id;
+  TF_RETURN_IF_ERROR(
+      GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id));
+  gpu_device_info_->gpu_id = platform_gpu_id.value();
   set_tensorflow_gpu_device_info(gpu_device_info_);
 
   // Whether and how the GPU device uses its own threadpool.
@@ -690,9 +691,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
   Eigen::GpuDevice device_;
 };
 
-// Parse 'visible_device_list' into a list of CUDA GPU ids.
+// Parse 'visible_device_list' into a list of platform GPU ids.
 Status ParseVisibleDeviceList(const string& visible_device_list,
-                              std::vector<CudaGpuId>* visible_gpu_order) {
+                              std::vector<PlatformGpuId>* visible_gpu_order) {
   visible_gpu_order->clear();
   se::Platform* gpu_manager = GPUMachineManager();
 
@@ -707,26 +708,28 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
   } else {
     const std::vector<string> order_str =
         str_util::Split(visible_device_list, ',');
-    for (const string& cuda_gpu_id_str : order_str) {
-      int32 cuda_gpu_id;
-      if (!strings::safe_strto32(cuda_gpu_id_str, &cuda_gpu_id)) {
+    for (const string& platform_gpu_id_str : order_str) {
+      int32 platform_gpu_id;
+      if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) {
         return errors::InvalidArgument(
             "Could not parse entry in 'visible_device_list': '",
-            cuda_gpu_id_str, "'. visible_device_list = ", visible_device_list);
+            platform_gpu_id_str, "'. visible_device_list = ",
+            visible_device_list);
       }
-      if (cuda_gpu_id < 0 || cuda_gpu_id >= gpu_manager->VisibleDeviceCount()) {
+      if (platform_gpu_id < 0 ||
+          platform_gpu_id >= gpu_manager->VisibleDeviceCount()) {
         return errors::InvalidArgument(
-            "'visible_device_list' listed an invalid GPU id '", cuda_gpu_id,
+            "'visible_device_list' listed an invalid GPU id '", platform_gpu_id,
             "' but visible device count is ",
             gpu_manager->VisibleDeviceCount());
       }
-      visible_gpu_order->push_back(CudaGpuId(cuda_gpu_id));
+      visible_gpu_order->push_back(PlatformGpuId(platform_gpu_id));
     }
   }
 
   // Validate no repeats.
-  std::set<CudaGpuId> visible_device_set(visible_gpu_order->begin(),
-                                         visible_gpu_order->end());
+  std::set<PlatformGpuId> visible_device_set(visible_gpu_order->begin(),
+                                             visible_gpu_order->end());
   if (visible_device_set.size() != visible_gpu_order->size()) {
     return errors::InvalidArgument(
         "visible_device_list contained a duplicate entry: ",
@@ -737,8 +740,8 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
 
 Status VerifyVirtualDeviceSettings(
     const size_t num_gpus_to_use, const GPUOptions& gpu_options,
-    const std::vector<CudaGpuId>& visible_gpu_order,
-    const std::vector<CudaGpuId>& valid_cuda_gpu_ids) {
+    const std::vector<PlatformGpuId>& visible_gpu_order,
+    const std::vector<PlatformGpuId>& valid_platform_gpu_ids) {
   const auto& virtual_devices = gpu_options.experimental().virtual_devices();
   CHECK(!virtual_devices.empty());
   if (gpu_options.per_process_gpu_memory_fraction() > 0) {
@@ -760,11 +763,11 @@ Status VerifyVirtualDeviceSettings(
         " #GPUs in visible_device_list: ", visible_gpu_order.size(),
         " virtual_devices.size(): ", virtual_devices.size());
   }
-  if (valid_cuda_gpu_ids.size() != virtual_devices.size()) {
+  if (valid_platform_gpu_ids.size() != virtual_devices.size()) {
     return errors::Unknown(
         "The number of valid GPUs doesn't match the number of elements in "
         "the virtual_devices list.",
-        " #valid GPUs: ", valid_cuda_gpu_ids.size(),
+        " #valid GPUs: ", valid_platform_gpu_ids.size(),
         " virtual_devices.size(): ", virtual_devices.size());
   }
   return Status::OK();
@@ -806,18 +809,18 @@ int64 MinSystemMemory(int64 available_memory) {
 }
 
 // Get the memory limit for the virtual device being created on GPU with
-// 'cuda_gpu_id', when that virtual device is the only virtual device being
+// 'platform_gpu_id', when that virtual device is the only virtual device being
 // created on that GPU.
 Status SingleVirtualDeviceMemoryLimit(const GPUOptions& gpu_options,
-                                      CudaGpuId cuda_gpu_id,
+                                      PlatformGpuId platform_gpu_id,
                                       int64* memory_limit) {
   int64 total_memory = 0;
   int64 available_memory = 0;
   se::StreamExecutor* se =
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
   if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) {
     return errors::Unknown("Failed to query available memory for GPU ",
-                           cuda_gpu_id.value());
+                           platform_gpu_id.value());
   }
 
   int64 allocated_memory = 0;
@@ -916,8 +919,8 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
     num_gpus_to_use = iter->second;
   }
   const auto& gpu_options = options.config.gpu_options();
-  std::vector<CudaGpuId> visible_gpu_order;
-  std::vector<CudaGpuId> valid_cuda_gpu_ids;
+  std::vector<PlatformGpuId> visible_gpu_order;
+  std::vector<PlatformGpuId> valid_platform_gpu_ids;
   // If we aren't going to use any GPUs, don't initialize them.
   // We don't want to call ParseVisibleDeviceList if num_gpus_to_use is 0,
   // because it treats an empty gpu_options.visible_device_list as 'all GPUs are
@@ -926,12 +929,12 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
     TF_RETURN_IF_ERROR(ParseVisibleDeviceList(gpu_options.visible_device_list(),
                                               &visible_gpu_order));
     TF_RETURN_IF_ERROR(
-        GetValidDeviceIds(visible_gpu_order, &valid_cuda_gpu_ids));
+        GetValidDeviceIds(visible_gpu_order, &valid_platform_gpu_ids));
   }
-  if (num_gpus_to_use > valid_cuda_gpu_ids.size()) {
-    num_gpus_to_use = valid_cuda_gpu_ids.size();
+  if (num_gpus_to_use > valid_platform_gpu_ids.size()) {
+    num_gpus_to_use = valid_platform_gpu_ids.size();
   }
-  if (!valid_cuda_gpu_ids.empty()) {
+  if (!valid_platform_gpu_ids.empty()) {
     // Save the original device.
     int original_device = 0;
     cudaError_t err = cudaGetDevice(&original_device);
@@ -941,17 +944,18 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
     }
     // Force to implicitly initialize CUDA runtime on each valid GPU before
     // CreateGPUDevice().
-    for (CudaGpuId cuda_gpu_id : valid_cuda_gpu_ids) {
-      err = cudaSetDevice(cuda_gpu_id.value());
+    for (PlatformGpuId platform_gpu_id : valid_platform_gpu_ids) {
+      err = cudaSetDevice(platform_gpu_id.value());
       if (err != cudaSuccess) {
-        return errors::Internal("cudaSetDevice() on GPU:", cuda_gpu_id.value(),
-                                " failed. Status: ", cudaGetErrorString(err));
+        return errors::Internal("cudaSetDevice() on GPU:",
+                                platform_gpu_id.value(), " failed. Status: ",
+                                cudaGetErrorString(err));
       }
       err = cudaFree(nullptr);
       if (err != cudaSuccess) {
-        return errors::Internal(
-            "CUDA runtime implicit initialization on GPU:", cuda_gpu_id.value(),
-            " failed. Status: ", cudaGetErrorString(err));
+        return errors::Internal("CUDA runtime implicit initialization on GPU:",
+                                platform_gpu_id.value(), " failed. Status: ",
+                                cudaGetErrorString(err));
       }
     }
     // Reset to the original device.
@@ -977,10 +981,10 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
     LOG(INFO) << line_buf;
     for (int i = 0; i < visible_gpu_order.size(); ++i) {
       line_buf = strings::StrCat(visible_gpu_order[i].value(), ":   ");
-      CudaGpuId cuda_id_i = visible_gpu_order[i];
+      PlatformGpuId gpu_id_i = visible_gpu_order[i];
       for (int j = 0; j < visible_gpu_order.size(); ++j) {
-        CudaGpuId cuda_id_j = visible_gpu_order[j];
-        if (im.directed_links.find({cuda_id_i, cuda_id_j}) !=
+        PlatformGpuId gpu_id_j = visible_gpu_order[j];
+        if (im.directed_links.find({gpu_id_i, gpu_id_j}) !=
             im.directed_links.end()) {
           line_buf.append("Y ");
         } else {
@@ -993,22 +997,23 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
 
   const auto& virtual_devices = gpu_options.experimental().virtual_devices();
   if (!virtual_devices.empty()) {
-    TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(
-        num_gpus_to_use, gpu_options, visible_gpu_order, valid_cuda_gpu_ids));
+    TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(num_gpus_to_use, gpu_options,
+                                                   visible_gpu_order,
+                                                   valid_platform_gpu_ids));
     // We've verified that num_gpus_to_use >= virtual_devices.size().
     num_gpus_to_use = virtual_devices.size();
     CHECK(gpu_options.visible_device_list().empty() ||
-          valid_cuda_gpu_ids == visible_gpu_order);
+          valid_platform_gpu_ids == visible_gpu_order);
   }
   int next_tf_gpu_id = 0;
   std::vector<int64> memory_limit_bytes;
   for (int i = 0; i < num_gpus_to_use; ++i) {
-    const CudaGpuId cuda_gpu_id = valid_cuda_gpu_ids[i];
+    const PlatformGpuId platform_gpu_id = valid_platform_gpu_ids[i];
     if (virtual_devices.empty() ||
         virtual_devices.Get(i).memory_limit_mb_size() == 0) {
       int64 single_virtual_device_memory_limit = 0;
       TF_RETURN_IF_ERROR(SingleVirtualDeviceMemoryLimit(
-          gpu_options, cuda_gpu_id, &single_virtual_device_memory_limit));
+          gpu_options, platform_gpu_id, &single_virtual_device_memory_limit));
       memory_limit_bytes.push_back(single_virtual_device_memory_limit);
     } else {
       const auto& memory_limit_mb = virtual_devices.Get(i).memory_limit_mb();
@@ -1021,7 +1026,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
       TfGpuId tf_gpu_id(next_tf_gpu_id);
       ++next_tf_gpu_id;
       TF_RETURN_IF_ERROR(
-          GpuIdManager::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id));
+          GpuIdManager::InsertTfPlatformGpuIdPair(tf_gpu_id, platform_gpu_id));
     }
   }
   const int num_tf_gpus = next_tf_gpu_id;
@@ -1046,7 +1051,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
   return Status::OK();
 }
 
-static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id,
+static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id,
                                         const se::DeviceDescription& desc) {
   int cc_major;
   int cc_minor;
@@ -1055,9 +1060,8 @@ static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id,
     cc_minor = 0;
   }
   // LINT.IfChange
-  return strings::StrCat("device: ", cuda_gpu_id.value(),
-                         ", name: ", desc.name(),
-                         ", pci bus id: ", desc.pci_bus_id(),
+  return strings::StrCat("device: ", platform_gpu_id.value(), ", name: ",
+                         desc.name(), ", pci bus id: ", desc.pci_bus_id(),
                          ", compute capability: ", cc_major, ".", cc_minor);
   // LINT.ThenChange(//tensorflow/python/platform/test.py)
 }
@@ -1072,12 +1076,13 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
   const string device_name =
       strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value());
   GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
-  CudaGpuId cuda_gpu_id;
-  TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+  PlatformGpuId platform_gpu_id;
+  TF_RETURN_IF_ERROR(
+      GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
   int numa_node = dev_locality.numa_node();
 
   se::StreamExecutor* se =
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
   const se::DeviceDescription& desc = se->GetDeviceDescription();
   GPUProcessState* process_state = GPUProcessState::singleton();
   Allocator* gpu_allocator = process_state->GetGPUAllocator(
@@ -1098,11 +1103,11 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
   // TODO(laigd): report error if memory_limit doesn't match stats.bytes_limit.
   BaseGPUDevice* gpu_device = CreateGPUDevice(
       options, device_name, static_cast<Bytes>(stats.bytes_limit), dev_locality,
-      tf_gpu_id, GetShortDeviceDescription(cuda_gpu_id, desc), gpu_allocator,
-      ProcessState::singleton()->GetCPUAllocator(numa_node));
+      tf_gpu_id, GetShortDeviceDescription(platform_gpu_id, desc),
+      gpu_allocator, ProcessState::singleton()->GetCPUAllocator(numa_node));
   LOG(INFO) << "Created TensorFlow device (" << device_name << " with "
             << (stats.bytes_limit >> 20) << " MB memory) -> physical GPU ("
-            << GetShortDeviceDescription(cuda_gpu_id, desc) << ")";
+            << GetShortDeviceDescription(platform_gpu_id, desc) << ")";
   TF_RETURN_IF_ERROR(gpu_device->Init(options));
   devices->push_back(gpu_device);
 
@@ -1110,18 +1115,21 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
 }
 
 namespace {
-std::unique_ptr<std::map<std::pair<CudaGpuId, CudaGpuId>, bool>>
+std::unique_ptr<std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>>
 GetPeerAccessMap(se::Platform* platform,
-                 const std::vector<CudaGpuId>& visible_gpu_order) {
-  std::unique_ptr<std::map<std::pair<CudaGpuId, CudaGpuId>, bool>> map(
-      new std::map<std::pair<CudaGpuId, CudaGpuId>, bool>);
-  for (CudaGpuId cuda_gpu_i : visible_gpu_order) {
-    for (CudaGpuId cuda_gpu_j : visible_gpu_order) {
+                 const std::vector<PlatformGpuId>& visible_gpu_order) {
+  std::unique_ptr<std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>> map(
+      new std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>);
+  for (PlatformGpuId platform_gpu_i : visible_gpu_order) {
+    for (PlatformGpuId platform_gpu_j : visible_gpu_order) {
       se::StreamExecutor* from =
-          GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie();
+          GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i)
+              .ValueOrDie();
       se::StreamExecutor* to =
-          GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie();
-      (*map)[{cuda_gpu_i, cuda_gpu_j}] = from->CanEnablePeerAccessTo(to);
+          GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j)
+              .ValueOrDie();
+      (*map)[{platform_gpu_i, platform_gpu_j}] =
+          from->CanEnablePeerAccessTo(to);
     }
   }
 
@@ -1131,19 +1139,19 @@ GetPeerAccessMap(se::Platform* platform,
 }  // namespace
 
 Status BaseGPUDeviceFactory::GetInterconnectMaps(
-    const std::vector<CudaGpuId>& visible_gpu_order, se::Platform* gpu_manager,
-    std::vector<InterconnectMap>* maps) {
+    const std::vector<PlatformGpuId>& visible_gpu_order,
+    se::Platform* gpu_manager, std::vector<InterconnectMap>* maps) {
   // The default interconnect map is obtained from the StreamExecutor.
   auto access_map = GetPeerAccessMap(gpu_manager, visible_gpu_order);
   maps->resize(1);
   InterconnectMap& imap = maps->at(0);
   imap.name = "StreamExecutor";
   imap.strength = InterconnectMap::kStreamExecutorStrength;
-  for (CudaGpuId cuda_id_i : visible_gpu_order) {
-    for (CudaGpuId cuda_id_j : visible_gpu_order) {
-      if (cuda_id_i == cuda_id_j) continue;
-      if ((*access_map)[{cuda_id_i, cuda_id_j}]) {
-        imap.directed_links.insert({cuda_id_i, cuda_id_j});
+  for (PlatformGpuId gpu_id_i : visible_gpu_order) {
+    for (PlatformGpuId gpu_id_j : visible_gpu_order) {
+      if (gpu_id_i == gpu_id_j) continue;
+      if ((*access_map)[{gpu_id_i, gpu_id_j}]) {
+        imap.directed_links.insert({gpu_id_i, gpu_id_j});
       }
     }
   }
@@ -1158,13 +1166,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
     all_tf_gpu_ids.push_back(TfGpuId(i));
   }
   for (TfGpuId tf_gpu_id : all_tf_gpu_ids) {
-    CudaGpuId cuda_gpu_id;
-    TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+    PlatformGpuId platform_gpu_id;
+    TF_RETURN_IF_ERROR(
+        GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
     // Get GPU bus_id from its reported NUMA affinity.  Because GPUs are
     // virtualized in some environments, we can't just use the GPU id.
     // NUMA locales are indexed from 0, buses are indexed from 1.
     se::StreamExecutor* se =
-        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+        GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
     const se::DeviceDescription& desc = se->GetDeviceDescription();
     int numa_node = desc.numa_node();
     if (numa_node < 0) {
@@ -1174,7 +1183,8 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
       // may run into trouble later with data transfer operations.  The
       // trouble may manifest as slower than expected performance, or
       // outright failures.
-      LOG(INFO) << "Could not identify NUMA node of CUDA gpu id " << cuda_gpu_id
+      LOG(INFO) << "Could not identify NUMA node of platform GPU id "
+                << platform_gpu_id
                 << ", defaulting to 0.  Your kernel may not have been built "
                 << "with NUMA support.";
       numa_node = 0;
@@ -1187,10 +1197,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
     LocalLinks* links = dev_locality.mutable_links();
     for (const InterconnectMap& imap : interconnects) {
       for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) {
-        CudaGpuId cuda_gpu_dst;
+        PlatformGpuId platform_gpu_dst;
         TF_RETURN_IF_ERROR(
-            GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst));
-        if (imap.directed_links.find({cuda_gpu_id, cuda_gpu_dst}) !=
+            GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst));
+        if (imap.directed_links.find({platform_gpu_id, platform_gpu_dst}) !=
             imap.directed_links.end()) {
           InterconnectLink* ilink = links->add_link();
           ilink->set_device_id(tf_gpu_dst.value());
@@ -1204,10 +1214,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
     // add high strength links to the others.
     for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) {
       if (tf_gpu_id == tf_gpu_dst) continue;
-      CudaGpuId cuda_gpu_dst;
+      PlatformGpuId platform_gpu_dst;
       TF_RETURN_IF_ERROR(
-          GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst));
-      if (cuda_gpu_id == cuda_gpu_dst) {
+          GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst));
+      if (platform_gpu_id == platform_gpu_dst) {
         InterconnectLink* ilink = links->add_link();
         ilink->set_device_id(tf_gpu_dst.value());
         ilink->set_type("SAME_DEVICE");
@@ -1216,9 +1226,9 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
     }
 
     (*localities)[tf_gpu_id] = dev_locality;
-    VLOG(1) << "GPUDevice CudaGpuId " << cuda_gpu_id << " TfGpuId " << tf_gpu_id
-            << " on bus " << dev_locality.bus_id() << " numa: " << numa_node
-            << " pci: " << desc.pci_bus_id()
+    VLOG(1) << "GPUDevice PlatformGpuId " << platform_gpu_id << " TfGpuId "
+            << tf_gpu_id << " on bus " << dev_locality.bus_id()
+            << " numa: " << numa_node << " pci: " << desc.pci_bus_id()
             << " DeviceLocality: " << dev_locality.DebugString();
   }
   return Status::OK();
@@ -1226,14 +1236,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
 
 static int GetDefaultMinGPUMultiprocessorCount(
     se::Platform* gpu_manager,
-    const std::vector<CudaGpuId>& visible_gpu_order) {
+    const std::vector<PlatformGpuId>& visible_gpu_order) {
   static const int kDefaultMinGPUMultiprocessorCount = 8;
 
   // Find the highest multi-processor count across all visible GPUs.
   int max_count = -1;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
     auto exec_status =
-        GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_order[i]);
+        GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_order[i]);
     if (!exec_status.ok()) {
       continue;
     }
@@ -1252,7 +1262,7 @@ static int GetDefaultMinGPUMultiprocessorCount(
 
 static int GetMinGPUMultiprocessorCount(
     se::Platform* gpu_manager,
-    const std::vector<CudaGpuId>& visible_gpu_order) {
+    const std::vector<PlatformGpuId>& visible_gpu_order) {
   const char* tf_min_gpu_core_count = getenv("TF_MIN_GPU_MULTIPROCESSOR_COUNT");
 
   if (tf_min_gpu_core_count == nullptr ||
@@ -1330,18 +1340,20 @@ std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
 }
 
 Status EnablePeerAccess(se::Platform* platform,
-                        const std::vector<CudaGpuId>& visible_gpu_order) {
+                        const std::vector<PlatformGpuId>& visible_gpu_order) {
   int possible_peer_count = 0;
   int enabled_peer_count = 0;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const CudaGpuId cuda_gpu_i = visible_gpu_order[i];
+    const PlatformGpuId platform_gpu_i = visible_gpu_order[i];
     for (int j = 0; j < visible_gpu_order.size(); ++j) {
-      const CudaGpuId cuda_gpu_j = visible_gpu_order[j];
+      const PlatformGpuId platform_gpu_j = visible_gpu_order[j];
       // We have already validated that ExecutorForDevice() calls return OK.
       se::StreamExecutor* from =
-          GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie();
+          GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i)
+              .ValueOrDie();
       se::StreamExecutor* to =
-          GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie();
+          GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j)
+              .ValueOrDie();
 
       if (from->CanEnablePeerAccessTo(to)) {
         ++possible_peer_count;
@@ -1349,7 +1361,8 @@ Status EnablePeerAccess(se::Platform* platform,
         if (!status.ok()) {
           LOG(WARNING)
               << "Unable to enable peer access between device ordinals "
-              << cuda_gpu_i << " and " << cuda_gpu_j << ", status: " << status;
+              << platform_gpu_i << " and " << platform_gpu_j
+              << ", status: " << status;
         } else {
           ++enabled_peer_count;
         }
@@ -1372,22 +1385,23 @@ Status EnablePeerAccess(se::Platform* platform,
 }  // namespace
 
 Status BaseGPUDeviceFactory::GetValidDeviceIds(
-    const std::vector<CudaGpuId>& visible_gpu_order,
-    std::vector<CudaGpuId>* ids) {
+    const std::vector<PlatformGpuId>& visible_gpu_order,
+    std::vector<PlatformGpuId>* ids) {
   se::Platform* gpu_manager = GPUMachineManager();
   bool new_gpu_found = false;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const CudaGpuId cuda_gpu_id = visible_gpu_order[i];
+    const PlatformGpuId visible_gpu_id = visible_gpu_order[i];
 
-    // Only perform this once per visible cuda gpu id.
-    if (visible_gpu_initialized_[cuda_gpu_id.value()]) {
+    // Only perform this once per visible platform gpu id.
+    if (visible_gpu_initialized_[visible_gpu_id.value()]) {
       continue;
     }
 
-    visible_gpu_initialized_[cuda_gpu_id.value()] = true;
+    visible_gpu_initialized_[visible_gpu_id.value()] = true;
     new_gpu_found = true;
 
-    auto executor = GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, cuda_gpu_id);
+    auto executor =
+        GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id);
     if (!executor.ok()) {
       return executor.status();
     }
@@ -1435,9 +1449,9 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
 
   // Filter out devices that don't have the right capability or power.
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const CudaGpuId visible_gpu_id = visible_gpu_order[i];
+    const PlatformGpuId visible_gpu_id = visible_gpu_order[i];
     auto exec_status =
-        GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_id);
+        GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id);
     if (!exec_status.ok()) {
       LOG(INFO) << "Ignoring visible gpu device " << visible_gpu_id
                 << " whose executor is in invalid state: "
@@ -1486,7 +1500,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
   if (!ids->empty()) {
     std::vector<int> raw_ids(ids->size());
     std::transform(ids->begin(), ids->end(), raw_ids.begin(),
-                   [](CudaGpuId id) -> int { return id.value(); });
+                   [](PlatformGpuId id) -> int { return id.value(); });
     LOG(INFO) << "Adding visible gpu devices: "
               << str_util::Join(raw_ids, ", ");
   }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 56d03d7a8c..684cc0c1de 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -89,12 +89,12 @@ class BaseGPUDevice : public LocalDevice {
   void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
                              DeviceContext* dc, Allocator* allocator) override;
 
-  // Returns the CUDA GPU id of this device within the native driver system;
+  // Returns the platform GPU id of this device within the native driver system;
   // e.g., for CUDA this is the ordinal of the GPU within the system.
   int gpu_id() const {
-    CudaGpuId cuda_gpu_id;
-    TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id));
-    return cuda_gpu_id.value();
+    PlatformGpuId platform_gpu_id;
+    TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id));
+    return platform_gpu_id.value();
   }
 
   // The executor that provides control for the device; e.g., for CUDA this
@@ -168,14 +168,14 @@ class BaseGPUDeviceFactory : public DeviceFactory {
     int32 strength;
     static const int kSameDeviceStrength;
     static const int kStreamExecutorStrength;
-    std::set<std::pair<CudaGpuId, CudaGpuId>> directed_links;
+    std::set<std::pair<PlatformGpuId, PlatformGpuId>> directed_links;
   };
 
  protected:
   // Populates *maps with interconnect maps for all local direct access
   // pathways between GPUs.
   virtual Status GetInterconnectMaps(
-      const std::vector<CudaGpuId>& visible_gpu_order,
+      const std::vector<PlatformGpuId>& visible_gpu_order,
       se::Platform* gpu_manager, std::vector<InterconnectMap>* maps);
 
   struct TfGpuIdHash {
@@ -207,16 +207,16 @@ class BaseGPUDeviceFactory : public DeviceFactory {
                                          Allocator* gpu_allocator,
                                          Allocator* cpu_allocator) = 0;
 
-  // Returns into 'ids' the list of valid CUDA GPU ids, in the order that
+  // Returns into 'ids' the list of valid platform GPU ids, in the order that
   // they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc,
   // based upon 'visible_gpu_order' which was generated by parsing
   // GPUOptions::visible_device_list which is a comma-separated list of CUDA GPU
   // ids.
-  Status GetValidDeviceIds(const std::vector<CudaGpuId>& visible_gpu_order,
-                           std::vector<CudaGpuId>* ids);
+  Status GetValidDeviceIds(const std::vector<PlatformGpuId>& visible_gpu_order,
+                           std::vector<PlatformGpuId>* ids);
 
-  // visible_gpu_initialized_[cuda_gpu_id] is true if visible GPU cuda_gpu_id
-  // has been initialized by the process.
+  // visible_gpu_initialized_[platform_gpu_id] is true if visible GPU
+  // platform_gpu_id has been initialized by the process.
   std::unordered_map<int, bool> visible_gpu_initialized_;
 };
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
index daf59f0560..36294094e9 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@@ -30,18 +30,21 @@ namespace tensorflow {
 namespace {
 const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0";
 
-int64 GetTotalGPUMemory(CudaGpuId gpu_id) {
+int64 GetTotalGPUMemory(PlatformGpuId gpu_id) {
   se::StreamExecutor* se =
-      GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie();
+      GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id)
+          .ValueOrDie();
 
   int64 total_memory, available_memory;
   CHECK(se->DeviceMemoryUsage(&available_memory, &total_memory));
   return total_memory;
 }
 
-Status GetComputeCapability(CudaGpuId gpu_id, int* cc_major, int* cc_minor) {
+Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major,
+                            int* cc_minor) {
   se::StreamExecutor* se =
-      GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie();
+      GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id)
+          .ValueOrDie();
   if (!se->GetDeviceDescription().cuda_compute_capability(cc_major, cc_minor)) {
     *cc_major = 0;
     *cc_minor = 0;
@@ -223,7 +226,7 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
 // error.
 TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) {
   int cc_major, cc_minor;
-  TF_ASSERT_OK(GetComputeCapability(CudaGpuId(0), &cc_major, &cc_minor));
+  TF_ASSERT_OK(GetComputeCapability(PlatformGpuId(0), &cc_major, &cc_minor));
   // Exit early while running on Pascal or later GPUs.
   if (cc_major >= 6) {
     return;
@@ -244,10 +247,10 @@ TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) {
 // more memory than what is available on the device.
 TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) {
   static constexpr double kGpuMemoryFraction = 1.2;
-  static constexpr CudaGpuId kCudaGpuId(0);
+  static constexpr PlatformGpuId kPlatformGpuId(0);
 
   int cc_major, cc_minor;
-  TF_ASSERT_OK(GetComputeCapability(kCudaGpuId, &cc_major, &cc_minor));
+  TF_ASSERT_OK(GetComputeCapability(kPlatformGpuId, &cc_major, &cc_minor));
   // Exit early if running on pre-Pascal GPUs.
   if (cc_major < 6) {
     LOG(INFO)
@@ -262,7 +265,7 @@ TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) {
   ASSERT_EQ(1, devices.size());
 
   int64 memory_limit = devices[0]->attributes().memory_limit();
-  ASSERT_EQ(memory_limit, static_cast<int64>(GetTotalGPUMemory(kCudaGpuId) *
+  ASSERT_EQ(memory_limit, static_cast<int64>(GetTotalGPUMemory(kPlatformGpuId) *
                                              kGpuMemoryFraction));
 
   AllocatorAttributes allocator_attributes = AllocatorAttributes();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id.h b/tensorflow/core/common_runtime/gpu/gpu_id.h
index 2a6caea296..f0d9022821 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id.h
@@ -25,10 +25,10 @@ namespace tensorflow {
 //   physical machine, it can be filtered by CUDA environment variable
 //   CUDA_VISIBLE_DEVICES. Note that this id is not visible to Tensorflow, but
 //   result after filtering by CUDA_VISIBLE_DEVICES is visible to TF and is
-//   called CUDA GPU id as below. See
+//   called platform GPU id as below. See
 //   http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
 //   for more details.
-// - CUDA GPU id (also called *visible* GPU id in
+// - *platform* GPU id (also called *visible* GPU id in
 //   third_party/tensorflow/core/protobuf/config.proto): this is the id that is
 //   visible to Tensorflow after filtering by CUDA_VISIBLE_DEVICES, and is
 //   generated by the CUDA GPU driver. It starts from 0 and is used for CUDA API
@@ -39,14 +39,14 @@ namespace tensorflow {
 //   field of the device name "/device:GPU:<id>", and is also the identifier of
 //   a BaseGPUDevice. Note that the configuration allows us to create multiple
 //   BaseGPUDevice per GPU hardware in order to use multi CUDA streams on the
-//   hardware, so the mapping between TF GPU id and CUDA GPU id is not a 1:1
+//   hardware, so the mapping between TF GPU id and platform GPU id is not a 1:1
 //   mapping, see the example below.
 //
 // For example, assuming that in the machine we have GPU device with index 0, 1,
 // 2 and 3 (physical GPU id). Setting "CUDA_VISIBLE_DEVICES=1,2,3" will create
-// the following mapping between CUDA GPU id and physical GPU id:
+// the following mapping between platform GPU id and physical GPU id:
 //
-//        CUDA GPU id ->  physical GPU id
+//        platform GPU id ->  physical GPU id
 //                 0  ->  1
 //                 1  ->  2
 //                 2  ->  3
@@ -56,32 +56,32 @@ namespace tensorflow {
 //
 // Assuming we configure the Session to create one BaseGPUDevice per GPU
 // hardware, then setting GPUOptions::visible_device_list to "2,0" will create
-// the following mappting between TF GPU id and CUDA GPU id:
+// the following mappting between TF GPU id and platform GPU id:
 //
-//                  TF GPU id  ->  CUDA GPU ID
+//                  TF GPU id  ->  platform GPU ID
 //      0 (i.e. /device:GPU:0) ->  2
 //      1 (i.e. /device:GPU:1) ->  0
 //
-// Note that CUDA GPU id 1 is filtered out by GPUOptions::visible_device_list,
-// so it won't be used by the TF process.
+// Note that platform GPU id 1 is filtered out by
+// GPUOptions::visible_device_list, so it won't be used by the TF process.
 //
 // On the other hand, if we configure it to create 2 BaseGPUDevice per GPU
 // hardware, then setting GPUOptions::visible_device_list to "2,0" will create
-// the following mappting between TF GPU id and CUDA GPU id:
+// the following mappting between TF GPU id and platform GPU id:
 //
-//                  TF GPU id  ->  CUDA GPU ID
+//                  TF GPU id  ->  platform GPU ID
 //      0 (i.e. /device:GPU:0) ->  2
 //      1 (i.e. /device:GPU:1) ->  2
 //      2 (i.e. /device:GPU:2) ->  0
 //      3 (i.e. /device:GPU:3) ->  0
 //
-// We create strong-typed integer classes for both TF GPU id and CUDA GPU id to
-// minimize programming errors and improve code readability. Except for the
+// We create strong-typed integer classes for both TF GPU id and platform GPU id
+// to minimize programming errors and improve code readability. Except for the
 // StreamExecutor interface (as we don't change its API), whenever we need a
-// TF GPU id (or CUDA GPU id) we should use TfGpuId (or CudaGpuId) instead of a
-// raw integer.
+// TF GPU id (or platform GPU id) we should use TfGpuId (or PlatformGpuId)
+// instead of a raw integer.
 TF_LIB_GTL_DEFINE_INT_TYPE(TfGpuId, int32);
-TF_LIB_GTL_DEFINE_INT_TYPE(CudaGpuId, int32);
+TF_LIB_GTL_DEFINE_INT_TYPE(PlatformGpuId, int32);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
index b5099dc8ef..2b40730119 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
@@ -26,26 +26,27 @@ limitations under the License.
 
 namespace tensorflow {
 namespace {
-// Manages the map between TfGpuId and CUDA GPU id.
-class TfToCudaGpuIdMap {
+// Manages the map between TfGpuId and platform GPU id.
+class TfToPlatformGpuIdMap {
  public:
-  static TfToCudaGpuIdMap* singleton() {
-    static auto* id_map = new TfToCudaGpuIdMap;
+  static TfToPlatformGpuIdMap* singleton() {
+    static auto* id_map = new TfToPlatformGpuIdMap;
     return id_map;
   }
 
-  Status Insert(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) LOCKS_EXCLUDED(mu_) {
+  Status Insert(TfGpuId tf_gpu_id, PlatformGpuId platform_gpu_id)
+      LOCKS_EXCLUDED(mu_) {
     std::pair<IdMapType::iterator, bool> result;
     {
       mutex_lock lock(mu_);
-      result = id_map_.insert({tf_gpu_id.value(), cuda_gpu_id.value()});
+      result = id_map_.insert({tf_gpu_id.value(), platform_gpu_id.value()});
     }
-    if (!result.second && cuda_gpu_id.value() != result.first->second) {
+    if (!result.second && platform_gpu_id.value() != result.first->second) {
       return errors::AlreadyExists(
           "TensorFlow device (GPU:", tf_gpu_id.value(),
           ") is being mapped to "
           "multiple CUDA devices (",
-          cuda_gpu_id.value(), " now, and ", result.first->second,
+          platform_gpu_id.value(), " now, and ", result.first->second,
           " previously), which is not supported. "
           "This may be the result of providing different GPU configurations "
           "(ConfigProto.gpu_options, for example different visible_device_list)"
@@ -56,17 +57,17 @@ class TfToCudaGpuIdMap {
     return Status::OK();
   }
 
-  bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const
+  bool Find(TfGpuId tf_gpu_id, PlatformGpuId* platform_gpu_id) const
       LOCKS_EXCLUDED(mu_) {
     mutex_lock lock(mu_);
     auto result = id_map_.find(tf_gpu_id.value());
     if (result == id_map_.end()) return false;
-    *cuda_gpu_id = result->second;
+    *platform_gpu_id = result->second;
     return true;
   }
 
  private:
-  TfToCudaGpuIdMap() = default;
+  TfToPlatformGpuIdMap() = default;
 
   void TestOnlyReset() LOCKS_EXCLUDED(mu_) {
     mutex_lock lock(mu_);
@@ -78,17 +79,18 @@ class TfToCudaGpuIdMap {
   IdMapType id_map_ GUARDED_BY(mu_);
 
   friend class ::tensorflow::GpuIdManager;
-  TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap);
+  TF_DISALLOW_COPY_AND_ASSIGN(TfToPlatformGpuIdMap);
 };
 }  // namespace
 
-Status GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id,
-                                           CudaGpuId cuda_gpu_id) {
-  return TfToCudaGpuIdMap::singleton()->Insert(tf_gpu_id, cuda_gpu_id);
+Status GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id,
+                                               PlatformGpuId platform_gpu_id) {
+  return TfToPlatformGpuIdMap::singleton()->Insert(tf_gpu_id, platform_gpu_id);
 }
 
-Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) {
-  if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) {
+Status GpuIdManager::TfToPlatformGpuId(TfGpuId tf_gpu_id,
+                                       PlatformGpuId* platform_gpu_id) {
+  if (TfToPlatformGpuIdMap::singleton()->Find(tf_gpu_id, platform_gpu_id)) {
     return Status::OK();
   }
   return errors::NotFound("TensorFlow device GPU:", tf_gpu_id.value(),
@@ -96,7 +98,7 @@ Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) {
 }
 
 void GpuIdManager::TestOnlyReset() {
-  TfToCudaGpuIdMap::singleton()->TestOnlyReset();
+  TfToPlatformGpuIdMap::singleton()->TestOnlyReset();
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
index 491d92ccdd..62df4310c4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
@@ -21,15 +21,17 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Class that maintains a map from TfGpuId to CudaGpuId, and manages the
+// Class that maintains a map from TfGpuId to PlatformGpuId, and manages the
 // translation between them.
 class GpuIdManager {
  public:
-  // Adds a mapping from tf_gpu_id to cuda_gpu_id.
-  static Status InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id);
+  // Adds a mapping from tf_gpu_id to platform_gpu_id.
+  static Status InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id,
+                                          PlatformGpuId platform_gpu_id);
 
-  // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found.
-  static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id);
+  // Gets the platform_gpu_id associated with tf_gpu_id. Returns OK if found.
+  static Status TfToPlatformGpuId(TfGpuId tf_gpu_id,
+                                  PlatformGpuId* platform_gpu_id);
 
   // Clears the map. Used in unit tests only.
   static void TestOnlyReset();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc
index a663ec7051..8bf3c6a308 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc
@@ -22,38 +22,38 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-CudaGpuId TfToCudaGpuId(TfGpuId tf) {
-  CudaGpuId cuda;
-  TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf, &cuda));
-  return cuda;
+PlatformGpuId TfToPlatformGpuId(TfGpuId tf) {
+  PlatformGpuId platform_gpu_id;
+  TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf, &platform_gpu_id));
+  return platform_gpu_id;
 }
 
 TEST(GpuIdManagerTest, Basics) {
   TfGpuId key_0(0);
-  CudaGpuId value_0(0);
-  TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0));
-  EXPECT_EQ(value_0, TfToCudaGpuId(key_0));
+  PlatformGpuId value_0(0);
+  TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0));
+  EXPECT_EQ(value_0, TfToPlatformGpuId(key_0));
 
   // Multiple calls to map the same value is ok.
-  TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0));
-  EXPECT_EQ(value_0, TfToCudaGpuId(key_0));
+  TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0));
+  EXPECT_EQ(value_0, TfToPlatformGpuId(key_0));
 
   // Map a different TfGpuId to a different value.
   TfGpuId key_1(3);
-  CudaGpuId value_1(2);
-  TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_1, value_1));
-  EXPECT_EQ(value_1, TfToCudaGpuId(key_1));
+  PlatformGpuId value_1(2);
+  TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_1, value_1));
+  EXPECT_EQ(value_1, TfToPlatformGpuId(key_1));
 
   // Mapping a different TfGpuId to the same value is ok.
   TfGpuId key_2(10);
-  TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_1));
-  EXPECT_EQ(value_1, TfToCudaGpuId(key_2));
+  TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_1));
+  EXPECT_EQ(value_1, TfToPlatformGpuId(key_2));
 
   // Mapping the same TfGpuId to a different value.
-  ASSERT_FALSE(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_0).ok());
+  ASSERT_FALSE(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_0).ok());
 
   // Getting a nonexistent mapping.
-  ASSERT_FALSE(GpuIdManager::TfToCudaGpuId(TfGpuId(100), &value_0).ok());
+  ASSERT_FALSE(GpuIdManager::TfToPlatformGpuId(TfGpuId(100), &value_0).ok());
 }
 
 }  // namespace
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
index b9c66b3328..b1f10fb1dc 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
@@ -24,34 +24,37 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Utility methods for translation between Tensorflow GPU ids and CUDA GPU ids.
+// Utility methods for translation between Tensorflow GPU ids and platform GPU
+// ids.
 class GpuIdUtil {
  public:
   // Convenient methods for getting the associated executor given a TfGpuId or
-  // CudaGpuId.
-  static se::port::StatusOr<se::StreamExecutor*> ExecutorForCudaGpuId(
-      se::Platform* gpu_manager, CudaGpuId cuda_gpu_id) {
-    return gpu_manager->ExecutorForDevice(cuda_gpu_id.value());
+  // PlatformGpuId.
+  static se::port::StatusOr<se::StreamExecutor*> ExecutorForPlatformGpuId(
+      se::Platform* gpu_manager, PlatformGpuId platform_gpu_id) {
+    return gpu_manager->ExecutorForDevice(platform_gpu_id.value());
   }
-  static se::port::StatusOr<se::StreamExecutor*> ExecutorForCudaGpuId(
-      CudaGpuId cuda_gpu_id) {
-    return ExecutorForCudaGpuId(GPUMachineManager(), cuda_gpu_id);
+  static se::port::StatusOr<se::StreamExecutor*> ExecutorForPlatformGpuId(
+      PlatformGpuId platform_gpu_id) {
+    return ExecutorForPlatformGpuId(GPUMachineManager(), platform_gpu_id);
   }
   static se::port::StatusOr<se::StreamExecutor*> ExecutorForTfGpuId(
       TfGpuId tf_gpu_id) {
-    CudaGpuId cuda_gpu_id;
-    TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
-    return ExecutorForCudaGpuId(cuda_gpu_id);
+    PlatformGpuId platform_gpu_id;
+    TF_RETURN_IF_ERROR(
+        GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
+    return ExecutorForPlatformGpuId(platform_gpu_id);
   }
 
-  // Verify that the cuda_gpu_id associated with a TfGpuId is legitimate.
+  // Verify that the platform_gpu_id associated with a TfGpuId is legitimate.
   static void CheckValidTfGpuId(TfGpuId tf_gpu_id) {
-    CudaGpuId cuda_gpu_id;
-    TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+    PlatformGpuId platform_gpu_id;
+    TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
     const int visible_device_count = GPUMachineManager()->VisibleDeviceCount();
-    CHECK_LT(cuda_gpu_id.value(), visible_device_count)
-        << "cuda_gpu_id is outside discovered device range."
-        << " TF GPU id: " << tf_gpu_id << " CUDA GPU id: " << cuda_gpu_id
+    CHECK_LT(platform_gpu_id.value(), visible_device_count)
+        << "platform_gpu_id is outside discovered device range."
+        << " TF GPU id: " << tf_gpu_id
+        << " platform GPU id: " << platform_gpu_id
         << " visible device count: " << visible_device_count;
   }
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
index b18688174d..a5b46382f1 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
@@ -106,22 +106,23 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
       return nullptr;
     }
 
-    CudaGpuId cuda_gpu_id;
-    TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+    PlatformGpuId platform_gpu_id;
+    TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
     gpu_allocator =
-        new GPUBFCAllocator(cuda_gpu_id, total_bytes, options,
+        new GPUBFCAllocator(platform_gpu_id, total_bytes, options,
                             strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
 
     // If true, checks for memory overwrites by writing
     // distinctive patterns on both ends of allocated memory.
     if (useCudaMemoryGuardAllocator()) {
-      gpu_allocator = new GPUDebugAllocator(gpu_allocator, cuda_gpu_id);
-      gpu_allocator = new GPUNanResetAllocator(gpu_allocator, cuda_gpu_id);
+      gpu_allocator = new GPUDebugAllocator(gpu_allocator, platform_gpu_id);
+      gpu_allocator = new GPUNanResetAllocator(gpu_allocator, platform_gpu_id);
     } else if (useCudaMallocAllocator()) {
       // If true, passes all allocation requests through to cudaMalloc
       // useful for doing memory debugging with tools like cuda-memcheck
       // **WARNING** probably will not work in a multi-gpu scenario
-      gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id);
+      gpu_allocator =
+          new GPUcudaMallocAllocator(gpu_allocator, platform_gpu_id);
     }
     gpu_allocators_[tf_gpu_id.value()] = gpu_allocator;
 
@@ -138,7 +139,7 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
       ProcessState::MemDesc md;
       md.loc = ProcessState::MemDesc::GPU;
-      md.dev_index = cuda_gpu_id.value();
+      md.dev_index = platform_gpu_id.value();
       md.gpu_registered = false;
       md.nic_registered = true;
       if (static_cast<int64>(gpu_al_.size()) <= tf_gpu_id.value()) {
diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc
index b97603c890..e4f6bf7c86 100644
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc
@@ -93,13 +93,13 @@ Status SingleMachine::Provision() {
             strings::StrCat("Not able to parse GPU device name: ", dev.name()));
       }
       TfGpuId tf_gpu_id(parsed.id);
-      CudaGpuId cuda_gpu_id;
-      Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+      PlatformGpuId platform_gpu_id;
+      Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
       if (!s.ok()) {
         return errors::Unavailable("Unknown TF GPU device with id ",
                                    tf_gpu_id.value(), ": ", s.ToString());
       }
-      attr = GetLocalGPUInfo(cuda_gpu_id);
+      attr = GetLocalGPUInfo(platform_gpu_id);
     } else if (dev.device_type().find("XLA") == string::npos) {
       // Filter out the fake XLA devices to avoid double counting the actual
       // hardware resources that are available.
diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc
index a7519725a5..567e7c075e 100644
--- a/tensorflow/core/grappler/clusters/utils.cc
+++ b/tensorflow/core/grappler/clusters/utils.cc
@@ -70,13 +70,14 @@ DeviceProperties GetLocalCPUInfo() {
   return device;
 }
 
-DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) {
+DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) {
   DeviceProperties device;
   device.set_type("GPU");
 
 #if GOOGLE_CUDA
   cudaDeviceProp properties;
-  cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value());
+  cudaError_t error =
+      cudaGetDeviceProperties(&properties, platform_gpu_id.value());
   if (error != cudaSuccess) {
     device.set_type("UNKNOWN");
     LOG(ERROR) << "Failed to get device properties, error code: " << error;
@@ -122,15 +123,15 @@ DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) {
   } else if (device.type == "GPU") {
     if (device.has_id) {
       TfGpuId tf_gpu_id(device.id);
-      CudaGpuId cuda_gpu_id;
-      Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+      PlatformGpuId platform_gpu_id;
+      Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
       if (!s.ok()) {
         LOG(ERROR) << s;
         return unknown;
       }
-      return GetLocalGPUInfo(cuda_gpu_id);
+      return GetLocalGPUInfo(platform_gpu_id);
     } else {
-      return GetLocalGPUInfo(CudaGpuId(0));
+      return GetLocalGPUInfo(PlatformGpuId(0));
     }
   }
   return unknown;
diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h
index ca15c48006..f0a342b728 100644
--- a/tensorflow/core/grappler/clusters/utils.h
+++ b/tensorflow/core/grappler/clusters/utils.h
@@ -28,7 +28,7 @@ DeviceProperties GetLocalCPUInfo();
 
 // Returns the DeviceProperties for the specified GPU attached to the server on
 // which grappler is running.
-DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id);
+DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id);
 
 // Returns the DeviceProperties of the specified device
 DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device);
diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc
index 74218adbac..3863d62980 100644
--- a/tensorflow/core/grappler/clusters/utils_test.cc
+++ b/tensorflow/core/grappler/clusters/utils_test.cc
@@ -31,22 +31,22 @@ TEST(UtilsTest, GetLocalGPUInfo) {
   LOG(INFO) << "CUDA is enabled.";
   DeviceProperties properties;
 
-  // Invalid CUDA GPU ID.
-  properties = GetLocalGPUInfo(CudaGpuId(100));
+  // Invalid platform GPU ID.
+  properties = GetLocalGPUInfo(PlatformGpuId(100));
   EXPECT_EQ("UNKNOWN", properties.type());
 
-  // Succeed when a valid CUDA GPU id was inserted.
-  properties = GetLocalGPUInfo(CudaGpuId(0));
+  // Succeed when a valid platform GPU id was inserted.
+  properties = GetLocalGPUInfo(PlatformGpuId(0));
   EXPECT_EQ("GPU", properties.type());
   EXPECT_EQ("NVIDIA", properties.vendor());
 #else
   LOG(INFO) << "CUDA is not enabled.";
   DeviceProperties properties;
 
-  properties = GetLocalGPUInfo(CudaGpuId(0));
+  properties = GetLocalGPUInfo(PlatformGpuId(0));
   EXPECT_EQ("GPU", properties.type());
 
-  properties = GetLocalGPUInfo(CudaGpuId(100));
+  properties = GetLocalGPUInfo(PlatformGpuId(100));
   EXPECT_EQ("GPU", properties.type());
 #endif
 }
@@ -74,20 +74,20 @@ TEST(UtilsTest, GetDeviceInfo) {
   EXPECT_EQ("NVIDIA", properties.vendor());
 #endif
 
-  // TF to CUDA GPU id mapping entry doesn't exist.
+  // TF to platform GPU id mapping entry doesn't exist.
   device.has_id = true;
   device.id = 0;
   properties = GetDeviceInfo(device);
   EXPECT_EQ("UNKNOWN", properties.type());
 
 #if GOOGLE_CUDA
-  // Invalid CUDA GPU id.
-  GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100));
+  // Invalid platform GPU id.
+  GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100));
   properties = GetDeviceInfo(device);
   EXPECT_EQ("UNKNOWN", properties.type());
 
-  // Valid CUDA GPU id.
-  GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0));
+  // Valid platform GPU id.
+  GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(1), PlatformGpuId(0));
   device.id = 1;
   properties = GetDeviceInfo(device);
   EXPECT_EQ("GPU", properties.type());
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index aad00ce039..7691f25327 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -209,13 +209,13 @@ DeviceProperties GetDeviceInfo(const string& device_str) {
   if (DeviceNameUtils::ParseFullName(device_str, &parsed)) {
     if (parsed.type == "GPU") {
       TfGpuId tf_gpu_id(parsed.id);
-      CudaGpuId cuda_gpu_id;
-      Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+      PlatformGpuId platform_gpu_id;
+      Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
       if (!s.ok()) {
         // We are probably running simulation without linking cuda libraries.
-        cuda_gpu_id = CudaGpuId(parsed.id);
+        platform_gpu_id = PlatformGpuId(parsed.id);
       }
-      return GetLocalGPUInfo(cuda_gpu_id);
+      return GetLocalGPUInfo(platform_gpu_id);
     } else if (parsed.type == "CPU") {
       return GetLocalCPUInfo();
     }
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index da3a99565e..c68504a272 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -68,7 +68,7 @@ message GPUOptions {
   //    after the process starts.  Users are required to use vendor
   //    specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
   //    physical to visible device mapping prior to invoking TensorFlow.
-  // 2. In the code, the ids in this list are also called "CUDA GPU id"s,
+  // 2. In the code, the ids in this list are also called "platform GPU id"s,
   //    and the 'virtual' ids of GPU devices (i.e. the ids in the device
   //    name "/device:GPU:<id>") are also called "TF GPU id"s. Please
   //    refer to third_party/tensorflow/core/common_runtime/gpu/gpu_id.h
-- 
GitLab


From 204ef67242ce7fbba067b631c4d6c4bcd64288c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Sat, 1 Sep 2018 21:06:52 +0800
Subject: [PATCH 0069/1357] CLN: remove print method, and append error msg to
 exception

---
 tensorflow/python/framework/test_util.py      | 30 ++++++++++---------
 tensorflow/python/framework/test_util_test.py |  8 +++++
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index b5388ad0b2..6d03e956da 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1329,35 +1329,36 @@ class TensorFlowTestCase(googletest.TestCase):
     self.assertEqual(
         a.shape, b.shape,
         "Shape mismatch: expected %s, got %s." % (a.shape, b.shape))
+    msgs = [msg]
     if not np.allclose(a, b, rtol=rtol, atol=atol):
-      # Prints more details than np.testing.assert_allclose.
+      # Add more details than np.testing.assert_allclose.
       #
       # NOTE: numpy.allclose (and numpy.testing.assert_allclose)
       # checks whether two arrays are element-wise equal within a
       # tolerance. The relative difference (rtol * abs(b)) and the
       # absolute difference atol are added together to compare against
       # the absolute difference between a and b.  Here, we want to
-      # print out which elements violate such conditions.
+      # tell user which elements violate such conditions.
       cond = np.logical_or(
           np.abs(a - b) > atol + rtol * np.abs(b),
           np.isnan(a) != np.isnan(b))
       if a.ndim:
         x = a[np.where(cond)]
         y = b[np.where(cond)]
-        print("not close where = ", np.where(cond))
+        msgs.append("not close where = {}".format(np.where(cond)))
       else:
         # np.where is broken for scalars
         x, y = a, b
-      print("not close lhs = ", x)
-      print("not close rhs = ", y)
-      print("not close dif = ", np.abs(x - y))
-      print("not close tol = ", atol + rtol * np.abs(y))
-      print("dtype = %s, shape = %s" % (a.dtype, a.shape))
+      msgs.append("not close lhs = {}".format(x))
+      msgs.append("not close rhs = {}".format(y))
+      msgs.append("not close dif = {}".format(np.abs(x - y)))
+      msgs.append("not close tol = {}".format(atol + rtol * np.abs(y)))
+      msgs.append("dtype = {}, shape = {}".format(a.dtype, a.shape))
       # TODO(xpan): There seems to be a bug:
       # tensorflow/compiler/tests:binary_ops_test pass with float32
       # nan even though the equal_nan is False by default internally.
       np.testing.assert_allclose(
-          a, b, rtol=rtol, atol=atol, err_msg=msg, equal_nan=True)
+          a, b, rtol=rtol, atol=atol, err_msg="\n".join(msgs), equal_nan=True)
 
   def _assertAllCloseRecursive(self,
                                a,
@@ -1539,19 +1540,20 @@ class TensorFlowTestCase(googletest.TestCase):
         np.float16, np.float32, np.float64, dtypes.bfloat16.as_numpy_dtype
     ]):
       same = np.logical_or(same, np.logical_and(np.isnan(a), np.isnan(b)))
+    msgs = [msg]
     if not np.all(same):
-      # Prints more details than np.testing.assert_array_equal.
+      # Add more details than np.testing.assert_array_equal.
       diff = np.logical_not(same)
       if a.ndim:
         x = a[np.where(diff)]
         y = b[np.where(diff)]
-        print("not equal where = ", np.where(diff))
+        msgs.append("not equal where = {}".format(np.where(diff)))
       else:
         # np.where is broken for scalars
         x, y = a, b
-      print("not equal lhs = ", x)
-      print("not equal rhs = ", y)
-      np.testing.assert_array_equal(a, b, err_msg=msg)
+      msgs.append("not equal lhs = {}".format(x))
+      msgs.append("not equal rhs = {}".format(y))
+      np.testing.assert_array_equal(a, b, err_msg="\n".join(msgs))
 
   def assertAllGreater(self, a, comparison_target):
     """Assert element values are all greater than a target value.
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index a0939f98b2..c9b5d46f98 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -270,6 +270,11 @@ class TestUtilTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(AssertionError, r"Not equal to tolerance"):
       self.assertAllClose(7, 7 + 1e-5)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testAllCloseList(self):
+    with self.assertRaisesRegexp(AssertionError, r"not close dif"):
+      self.assertAllClose([0], [1])
+
   @test_util.run_in_graph_and_eager_modes
   def testAllCloseDictToNonDict(self):
     with self.assertRaisesRegexp(ValueError, r"Can't compare dict to non-dict"):
@@ -455,6 +460,9 @@ class TestUtilTest(test_util.TensorFlowTestCase):
     self.assertAllEqual([120] * 3, k)
     self.assertAllEqual([20] * 3, j)
 
+    with self.assertRaisesRegexp(AssertionError, r"not equal lhs"):
+      self.assertAllEqual([0] * 3, k)
+
   @test_util.run_in_graph_and_eager_modes
   def testAssertNotAllClose(self):
     # Test with arrays
-- 
GitLab


From 2586eb3bfeeef3af357e438ae5aff92d2bac12a5 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Mon, 3 Sep 2018 11:48:35 +0800
Subject: [PATCH 0070/1357] Code fix against ci_build error results.

---
 tensorflow/cc/gradients/nn_grad_test.cc       |  3 +-
 tensorflow/core/kernels/relu_op.cc            |  8 +--
 tensorflow/core/kernels/relu_op.h             |  8 +--
 tensorflow/core/kernels/relu_op_functor.h     |  1 -
 .../python/kernel_tests/relu_op_test.py       | 50 +++++++++----------
 .../tools/api/golden/v1/tensorflow.pbtxt      |  4 ++
 6 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index bf0db1f59d..d8c2a1a0fc 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -180,8 +180,7 @@ TEST_F(NNGradTest, LeakyReluGradGrad) {
   // Avoid input values where Leaky ReLU gradient is not well defined (around
   // zero).
   Tensor x_init_value = test::AsTensor<float>(
-      {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f},
-      {5, 2});
+      {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, {5, 2});
   Tensor features = test::AsTensor<float>(
       {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f},
       {5, 2});
diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc
index c4f2ef5632..cafa49cbb6 100644
--- a/tensorflow/core/kernels/relu_op.cc
+++ b/tensorflow/core/kernels/relu_op.cc
@@ -106,15 +106,15 @@ namespace functor {
                                                                                \
   template <>                                                                  \
   void LeakyRelu<GPUDevice, T>::operator()(                                    \
-      const GPUDevice& d, typename TTypes<T>::ConstTensor features,            \
-      T alpha, typename TTypes<T>::Tensor activations);                        \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor features, T alpha,   \
+      typename TTypes<T>::Tensor activations);                                 \
   extern template struct LeakyRelu<GPUDevice, T>;                              \
                                                                                \
   template <>                                                                  \
   void LeakyReluGrad<GPUDevice, T>::operator()(                                \
       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
-      typename TTypes<T>::ConstTensor features,                                \
-      T alpha, typename TTypes<T>::Tensor backprops);                          \
+      typename TTypes<T>::ConstTensor features, T alpha,                       \
+      typename TTypes<T>::Tensor backprops);                                   \
   extern template struct LeakyReluGrad<GPUDevice, T>;                          \
                                                                                \
   template <>                                                                  \
diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h
index c55190065c..fa79ab03ae 100644
--- a/tensorflow/core/kernels/relu_op.h
+++ b/tensorflow/core/kernels/relu_op.h
@@ -143,8 +143,8 @@ class LeakyReluOp : public UnaryElementWiseOp<T, LeakyReluOp<Device, T>> {
 
   void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
     functor::LeakyRelu<Device, T> functor;
-    functor(context->eigen_device<Device>(), input.flat<T>(),
-            alpha_, output->flat<T>());
+    functor(context->eigen_device<Device>(), input.flat<T>(), alpha_,
+            output->flat<T>());
   }
 
  private:
@@ -183,7 +183,9 @@ class LeakyReluGradOp
 
 template <typename Device, typename T>
 void LeakyReluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
-    const Tensor& g, const Tensor& a, T alpha, Tensor* output) {
+                                                   const Tensor& g,
+                                                   const Tensor& a, T alpha,
+                                                   Tensor* output) {
   if (!ReluHelpers::ValidateSameSize(context, g, a)) return;
   functor::LeakyReluGrad<Device, T> functor;
   functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(), alpha,
diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h
index 7f0951451d..548d5a277d 100644
--- a/tensorflow/core/kernels/relu_op_functor.h
+++ b/tensorflow/core/kernels/relu_op_functor.h
@@ -91,7 +91,6 @@ struct Relu6Grad {
   }
 };
 
-
 // Functor used by LeakyReluOp to do the computations.
 template <typename Device, typename T>
 struct LeakyRelu {
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 7066f28883..3e24b8a2c4 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -323,37 +323,37 @@ class LeakyReluTest(test.TestCase):
   def testGradGradFloat32(self):
     with compat.forward_compatibility_horizon(2018, 10, 2):
       with self.test_session():
-	x = constant_op.constant(
-	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-	    shape=[2, 5],
-	    name="x")
-	y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
-	z = gradients_impl.gradients(y, x)
-	x_init = np.asarray(
-	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-	    dtype=np.float32,
-	    order="F")
-	err = gradient_checker.compute_gradient_error(
-	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+        x = constant_op.constant(
+            [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+            shape=[2, 5],
+            name="x")
+        y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+        z = gradients_impl.gradients(y, x)
+        x_init = np.asarray(
+            [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+            dtype=np.float32,
+            order="F")
+        err = gradient_checker.compute_gradient_error(
+            x, [2, 5], z[0], [2, 5], x_init_value=x_init)
       print("leaky_relu (float32) gradient of gradient err = ", err)
       self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
     with compat.forward_compatibility_horizon(2018, 10, 2):
       with self.test_session():
-	x = constant_op.constant(
-	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-	    shape=[2, 5],
-	    dtype=dtypes.float64,
-	    name="x")
-	y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
-	z = gradients_impl.gradients(y, x)
-	x_init = np.asarray(
-	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-	    dtype=np.float64,
-	    order="F")
-	err = gradient_checker.compute_gradient_error(
-	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+        x = constant_op.constant(
+            [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+            shape=[2, 5],
+            dtype=dtypes.float64,
+            name="x")
+        y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
+        z = gradients_impl.gradients(y, x)
+        x_init = np.asarray(
+            [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+            dtype=np.float64,
+            order="F")
+        err = gradient_checker.compute_gradient_error(
+            x, [2, 5], z[0], [2, 5], x_init_value=x_init)
       print("leaky_relu (float64) gradient of gradient err = ", err)
       self.assertLess(err, 1e-10)
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 4de662fe33..9e8d320f06 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1324,6 +1324,10 @@ tf_module {
     name: "lbeta"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "leaky_relu"
+    argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], "
+  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Mon, 3 Sep 2018 12:10:51 +0800
Subject: [PATCH 0071/1357] Add XLA support for LeakyReluOp.

Code contributed by: Meng Chen <mc119496@alibaba-inc.com>
---
 tensorflow/compiler/tests/binary_ops_test.py  |  7 ++++
 tensorflow/compiler/tests/unary_ops_test.py   |  5 +++
 tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 +++++++++++++++++++
 3 files changed, 54 insertions(+)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 0aafda7fb4..8941dd4e27 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -178,6 +178,13 @@ class BinaryOpsTest(xla_test.XLATestCase):
               [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype),
           expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype))
 
+      self._testBinary(
+          gen_nn_ops._leaky_relu_grad,
+          np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype),
+          np.array(
+              [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype),
+          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype))
+
       self._testBinary(
           gen_nn_ops.softmax_cross_entropy_with_logits,
           np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype),
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index 73adb0d243..91f876fa23 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -361,6 +361,11 @@ class UnaryOpsTest(xla_test.XLATestCase):
           np.array([[-0.05, 6.05, 5]], dtype=dtype),
           expected=np.array([[0, 6, 5]], dtype=dtype))
 
+      self._assertOpOutputMatchesExpected(
+          nn_ops.leaky_relu,
+          np.array([[-1.0, 1.0]], dtype=dtype),
+          expected=np.array([[-0.2, 1.0]], dtype=dtype))
+
       self._assertOpOutputMatchesExpected(
           nn_ops.softmax,
           np.array([1, 2, 3, 4], dtype=dtype),
diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
index d35777ccb1..ec14735884 100644
--- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
@@ -50,6 +50,24 @@ class Relu6Op : public XlaOpKernel {
   }
 };
 
+
+class LeakyReluOp : public XlaOpKernel {
+ public:
+  explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
+  }
+  // Compute the max of the input x and alpha*x.
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* builder = ctx->builder();
+    auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0),
+                                          static_cast<double>(alpha_));
+    ctx->SetOutput(0,
+        xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
+  }
+ private:
+  float alpha_;
+};
+
 class ReluGradOp : public XlaOpKernel {
  public:
   explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
@@ -84,10 +102,34 @@ class Relu6GradOp : public XlaOpKernel {
   }
 };
 
+class LeakyReluGradOp : public XlaOpKernel {
+ public:
+  explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
+  }
+  // Return the lhs (incoming gradient) if the rhs (input feature) > 0,
+  // otherwise return the alpha * lhs.
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    const TensorShape shape = ctx->InputShape(0);
+    const auto zero =
+        xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes());
+    const auto pred = xla::Gt(ctx->Input(1), zero);
+    auto alpha = XlaHelpers::FloatLiteral(b, input_type(0),
+                                          static_cast<double>(alpha_));
+    ctx->SetOutput(0,
+        xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
+  }
+ private:
+  float alpha_;
+};
+
 REGISTER_XLA_OP(Name("Relu"), ReluOp);
 REGISTER_XLA_OP(Name("Relu6"), Relu6Op);
+REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp);
 REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp);
 REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp);
+REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp);
 
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 6712df7f3c73bfabab51e7c7eed2130d7bcff6ec Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Tue, 4 Sep 2018 16:18:40 +0800
Subject: [PATCH 0072/1357] Add MklSlice op.

---
 tensorflow/core/BUILD                         |   2 +
 tensorflow/core/graph/mkl_layout_pass.cc      |  19 +
 tensorflow/core/graph/mkl_layout_pass_test.cc |  18 +
 tensorflow/core/kernels/BUILD                 |   9 +
 tensorflow/core/kernels/mkl_slice_op.cc       | 356 ++++++++++++++++++
 tensorflow/core/ops/array_ops.cc              |  54 +++
 6 files changed, 458 insertions(+)
 create mode 100644 tensorflow/core/kernels/mkl_slice_op.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 5c314f359c..47f16ac747 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1337,6 +1337,7 @@ cc_library(
         "//tensorflow/core/kernels:mkl_pooling_ops",
         "//tensorflow/core/kernels:mkl_relu_op",
         "//tensorflow/core/kernels:mkl_reshape_op",
+        "//tensorflow/core/kernels:mkl_slice_op",
         "//tensorflow/core/kernels:mkl_softmax_op",
         "//tensorflow/core/kernels:mkl_tfconv_op",
         "//tensorflow/core/kernels:mkl_aggregate_ops",
@@ -3758,6 +3759,7 @@ tf_cc_test_mkl(
         "//tensorflow/core/kernels:mkl_pooling_ops",
         "//tensorflow/core/kernels:mkl_relu_op",
         "//tensorflow/core/kernels:mkl_reshape_op",
+        "//tensorflow/core/kernels:mkl_slice_op",
         "//tensorflow/core/kernels:mkl_softmax_op",
         "//tensorflow/core/kernels:mkl_tfconv_op",
     ]),
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 2e644fe987..50fd6bae12 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -2447,6 +2447,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     csinfo_.tanh = "Tanh";
     csinfo_.tanh_grad = "TanhGrad";
     csinfo_.reshape = "Reshape";
+    csinfo_.slice = "Slice";
     csinfo_.softmax = "Softmax";
     csinfo_.split = "Split";
     // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
@@ -2554,6 +2555,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     rinfo_.push_back({csinfo_.reshape,
                       mkl_op_registry::GetMklOpName(csinfo_.reshape),
                       CopyAttrsReshape, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.slice,
+                      mkl_op_registry::GetMklOpName(csinfo_.slice),
+                      CopyAttrsSlice, AlwaysRewrite});
     rinfo_.push_back({csinfo_.softmax,
                       mkl_op_registry::GetMklOpName(csinfo_.softmax),
                       CopyAttrsDataType, AlwaysRewrite});
@@ -2673,6 +2677,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string tanh;
     string tanh_grad;
     string reshape;
+    string slice;
     string softmax;
     string split;
     string squared_difference;
@@ -3131,6 +3136,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsSlice(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
 
   // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
@@ -3734,6 +3740,19 @@ void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
   nb->Attr("Tshape", Tshape);
 }
 
+void MklLayoutRewritePass::CopyAttrsSlice(const Node* orig_node,
+                                          NodeBuilder* nb) {
+  DataType T;
+  DataType Index;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Index", &Index));
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("Index", Index);
+}
+
 void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
                                           NodeBuilder* nb) {
   DataType T;
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index e8bac847e5..cccef5a03a 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -3510,6 +3510,24 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
             "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
 }
 
+TEST_F(MklLayoutPassTest, NodeRewrite_Slice_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Int32Input'}"
+      "node { name: 'D' op: 'Slice'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'Index'        value { type: DT_INT32 } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Int32Input);"
+            "D(_MklSlice);DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A->E;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;A:control->DMT/_2:control;"
+            "B->D:1;C->D:2;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
 /////////////////////////////////////////////////////////////////////
 //         Post-rewrite fixup pass test
 
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 25063ac823..2582814d08 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -6328,6 +6328,15 @@ tf_mkl_kernel_library(
     deps = ARRAY_DEPS + mkl_deps(),
 )
 
+tf_mkl_kernel_library(
+    name = "mkl_slice_op",
+    prefix = "mkl_slice_op",
+    deps = ARRAY_DEPS + if_mkl([
+        "//third_party/mkl:intel_binary_blob",
+        "@mkl_dnn",
+    ]),
+)
+
 tf_mkl_kernel_library(
     name = "mkl_identity_op",
     prefix = "mkl_identity_op",
diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
new file mode 100644
index 0000000000..86fb572478
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -0,0 +1,356 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+
+#ifdef INTEL_MKL
+#ifndef INTEL_MKL_ML_ONLY
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/prefetch.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "mkldnn.hpp"
+#include "tensorflow/core/util/mkl_util.h"
+
+using mkldnn::stream;
+using mkldnn::view;
+
+namespace tensorflow {
+
+namespace {
+
+gtl::InlinedVector<int64, 4> IntTensorToInt64Vec(const Tensor& tensor) {
+  gtl::InlinedVector<int64, 4> out;
+  if (tensor.dtype() == DT_INT32) {
+    for (int64 i = 0; i < tensor.NumElements(); ++i) {
+      out.push_back(tensor.flat<int32>()(i));
+    }
+  } else if (tensor.dtype() == DT_INT64) {
+    for (int64 i = 0; i < tensor.NumElements(); ++i) {
+      out.push_back(tensor.flat<int64>()(i));
+    }
+  } else {
+    LOG(FATAL) << "begin must be either int32 or int64";
+  }
+  return out;
+}
+
+}  // namespace
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+// A version of SharedValidation (slice_op.h) written for input that is in
+// either Mkl layout or Tensorflow layout.
+static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
+                              gtl::InlinedVector<int64, 4>* begin,
+                              gtl::InlinedVector<int64, 4>* size) {
+  const int kInputTensorIndex = 0;
+  const int kInputBeginIndex = 1;
+  const int kInputSizeIndex = 2;
+  const Tensor& input = MklGetInput(context, kInputTensorIndex);
+  const Tensor& begin_tensor = MklGetInput(context, kInputBeginIndex);
+  const Tensor& size_tensor = MklGetInput(context, kInputSizeIndex);
+
+  MklDnnShape input_mkl_shape, begin_mkl_shape, size_mkl_shape;
+  GetMklShape(context, kInputTensorIndex, &input_mkl_shape);
+  GetMklShape(context, kInputBeginIndex, &begin_mkl_shape);
+  GetMklShape(context, kInputSizeIndex, &size_mkl_shape);
+
+  // Begin and size tensors cannot be in MklDnn layout.
+  CHECK_EQ(begin_mkl_shape.IsMklTensor(), false);
+  CHECK_EQ(size_mkl_shape.IsMklTensor(), false);
+
+  TensorShape input_tf_shape = input_mkl_shape.IsMklTensor()
+                                   ? input_mkl_shape.GetTfShape()
+                                   : input.shape();
+
+  OP_REQUIRES(
+      context, context->op_kernel().IsLegacyVector(begin_tensor.shape()) &&
+                   context->op_kernel().IsLegacyVector(size_tensor.shape()) &&
+                   begin_tensor.NumElements() == input_tf_shape.dims() &&
+                   size_tensor.NumElements() == input_tf_shape.dims(),
+      errors::InvalidArgument(
+          "Expected begin and size arguments to be 1-D tensors of size ",
+          input_tf_shape.dims(), ", but got shapes ",
+          begin_tensor.shape().DebugString(), " and ",
+          size_tensor.shape().DebugString(), " instead."));
+
+  const int input_dims = input_tf_shape.dims();
+  *begin = IntTensorToInt64Vec(begin_tensor);
+  *size = IntTensorToInt64Vec(size_tensor);
+  for (int i = 0; i < input_dims; ++i) {
+    if ((*size)[i] == -1) {
+      // A size[i] of -1 means "all elements from begin[i] to dim_size(i)".
+      (*size)[i] = input_tf_shape.dim_size(i) - (*begin)[i];
+    }
+  }
+
+  *is_identity = true;
+  for (int i = 0; i < input_dims; ++i) {
+    int64 b = (*begin)[i];
+    int64 s = (*size)[i];
+    if (input_tf_shape.dim_size(i) == 0) {
+      OP_REQUIRES(
+          context, b == 0 && s == 0,
+          errors::InvalidArgument("Expected begin[", i, "] == 0 (got ", b,
+                                  ") and size[", i, "] == 0 ", "(got ", s,
+                                  ") when ", "input.dim_size(", i, ") == 0"));
+    } else {
+      OP_REQUIRES(context, 0 <= b && b <= input_tf_shape.dim_size(i),
+                  errors::InvalidArgument("Expected begin[", i, "] in [0, ",
+                                          input_tf_shape.dim_size(i),
+                                          "], but got ", b));
+      OP_REQUIRES(context, 0 <= s && b + s <= input_tf_shape.dim_size(i),
+                  errors::InvalidArgument("Expected size[", i, "] in [0, ",
+                                          input_tf_shape.dim_size(i) - b,
+                                          "], but ", "got ", s));
+    }
+    const bool take_all = (b == 0) && (s == input_tf_shape.dim_size(i));
+    (*is_identity) &= take_all;
+  }
+}
+
+// A version of SharedSliceCommonCases function written for input tensor
+// that may be in MklDnn layout or in Tensorflow layout.
+template <typename T>
+static void CheckCommonCasesForMklInputs(OpKernelContext* context,
+                                         gtl::InlinedVector<int64, 4>* begin,
+                                         gtl::InlinedVector<int64, 4>* size,
+                                         bool* done) {
+  bool is_identity = true;
+  *done = false;
+
+  ValidateMklInputs(context, &is_identity, begin, size);
+  if (!context->status().ok()) return;
+
+  const Tensor& input = MklGetInput(context, 0);
+  MklDnnShape input_mkl_shape;
+  GetMklShape(context, 0, &input_mkl_shape);
+
+  if (is_identity) {
+    VLOG(1) << "Slice identity";
+    context->set_output(0, input);
+    // Mkl metadata tensor in this case can just be forwarded from input to
+    // output.
+    AllocateOutputSetMklShape(context, 0, input_mkl_shape);
+    *done = true;
+    return;
+  }
+}
+
+// MKL-DNN implementation of Slice
+template <typename Device, typename T>
+class MklDnnSliceOp : public OpKernel {
+ public:
+  explicit MklDnnSliceOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  ~MklDnnSliceOp() {}
+
+  void Compute(OpKernelContext* context) override {
+    gtl::InlinedVector<int64, 4> begin;
+    gtl::InlinedVector<int64, 4> size;
+    bool done = false;
+
+    CheckCommonCasesForMklInputs<T>(context, &begin, &size, &done);
+    if (!context->status().ok() || done == true) return;
+
+    // MKL-DNN does not have this limitation of supporting less than 8 dimension
+    // tensor. But we are mimicking functionality of Eigen Slice op for CPU.
+    if (begin.size() >= 8) {
+      OP_REQUIRES(
+          context, false,
+          errors::Unimplemented("MklDnnSliceOp : Unhandled input dimensions"));
+    }
+
+    ComputeMklDnnSlice(context, begin, size);
+    return;
+  }
+
+ private:
+  // Slice op implemented using MKL-DNN APIs.
+  void ComputeMklDnnSlice(OpKernelContext* context,
+                          const gtl::InlinedVector<int64, 4>& begin,
+                          const gtl::InlinedVector<int64, 4>& size) {
+    try {
+      // MKL-DNN API usage below is guided by description at:
+      //  https://github.com/01org/mkl-dnn/issues/69
+      //
+      // Relevant part of the description is copied below:
+      //
+      // Let's say you want to copy a part of memory into another buffer (and
+      // probably change the format). Then your steps are:
+      //
+      // 1. create memory primitive descriptor in_mem_pd and memory primitive
+      //    in_mem_p for the entire source data.
+      // 2. create view primitive descriptor in_submem_pd based on in_mem_pd,
+      //    initial offsets, and sub-sizes
+      // 3. create memory primitive descriptor out_mem_pd and memory primitive
+      //    out_mem_p for the output (the logical sizes should much sub-sizes
+      //    used in step 2, but the format might be arbitrary)
+      // 4. create reorder primitive descriptor reorder_pd based on in_submem_pd
+      //    and out_mem_pd
+      // 5. create reorder primitive itself based on reorder_pd, in_mem_p, and
+      //    out_mem_p.
+      //
+      // Please notice that there is no view primitive. There is only view
+      // primitive descriptor. And the reorder uses source memory as input but
+      // traverses it according to a view in_submem_pd.
+
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> output(&cpu_engine);
+
+      // Populate offsets and sizes in memory::dims format based on vector.
+      memory::dims begin_dims = {};
+      begin_dims.resize(begin.size());
+      for (size_t i = 0; i < begin.size(); ++i) begin_dims[i] = begin[i];
+      memory::dims size_dims = {};
+      bool empty = false;
+      size_dims.resize(size.size());
+      for (size_t i = 0; i < size.size(); ++i) {
+        size_dims[i] = size[i];
+        if (size_dims[i] == 0) empty = true;
+      }
+
+      Tensor* output_tensor = nullptr;
+      MklDnnShape output_mkl_shape;
+      if (empty) {  // for empty dims
+        auto shape_to = MklDnnDimsToTFShape(size_dims);
+        AllocateOutputSetMklShape(context, 0, &output_tensor, shape_to,
+                                  output_mkl_shape);
+        return;
+      }
+
+      // Step 1 (as per above description) - Create memory for user data.
+      // We use blocked format here to describe input tensor.
+      const Tensor& input_tensor = MklGetInput(context, 0);
+      MklDnnShape input_mkl_shape;
+      GetMklShape(context, 0, &input_mkl_shape);
+
+      if (input_mkl_shape.IsMklTensor()) {
+        auto input_mkl_format = input_mkl_shape.GetTfDataFormat();
+        auto input_tf_format = MklDnnDataFormatToTFDataFormat(input_mkl_format);
+        begin_dims = MklDnnDimsInNCHW(begin_dims, input_tf_format);
+        size_dims = MklDnnDimsInNCHW(size_dims, input_tf_format);
+      }
+
+      // Initialize input dimensions and strides to be used when input is not in
+      // MklDnn layout.
+      memory::dims input_dims, input_strides;
+      if (!input_mkl_shape.IsMklTensor()) {
+        input_dims = TFShapeToMklDnnDims(input_tensor.shape());
+        input_strides = CalculateTFStrides(input_dims);
+      }
+
+      // Create input memory descriptor.
+      auto input_md =
+          input_mkl_shape.IsMklTensor()
+              ? input_mkl_shape.GetMklLayout()
+              : MklDnnData<T>::CreateBlockedMemDesc(input_dims, input_strides);
+      src.SetUsrMem(input_md, &input_tensor);
+
+      // Step 2 - create view primitive descriptor
+      auto view_pd =
+          view::primitive_desc(src.GetUsrMemPrimDesc(), size_dims, begin_dims)
+              .dst_primitive_desc();
+      auto output_strides = CalculateTFStrides(size_dims);
+      auto output_md =
+          MklDnnData<T>::CreateBlockedMemDesc(size_dims, output_strides);
+      auto output_pd = memory::primitive_desc(output_md, cpu_engine);
+
+      // Step 3 - Create memory for output. If input is in MklDnn layout, then
+      // output is also in MklDnn layout. Otherwise, output is in Tensorflow
+      // layout.
+      AllocateOutputTensor(context, input_mkl_shape, &output_pd, size_dims,
+                           &output_tensor, &output_mkl_shape);
+      CHECK_NOTNULL(output_tensor);
+      CHECK_EQ(input_mkl_shape.IsMklTensor(), output_mkl_shape.IsMklTensor());
+      output.SetUsrMem(output_md, output_tensor);
+
+      std::vector<primitive> net;
+      // Step 4 - create reorder primitive desc between view_pd and output_pd.
+      auto reorder_pd =
+          reorder::primitive_desc(view_pd, output.GetUsrMemPrimDesc());
+      // Step 5 - create reorder primitive itself.
+      net.push_back(reorder(reorder_pd, *src.GetUsrMem(), *output.GetUsrMem()));
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error& e) {
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
+    }
+  }
+
+ private:
+  void AllocateOutputTensor(OpKernelContext* context,
+                            const MklDnnShape& input_mkl_shape,
+                            memory::primitive_desc* output_pd,
+                            const memory::dims& output_dims,
+                            Tensor** output_tensor,
+                            MklDnnShape* output_mkl_shape) {
+    CHECK_NOTNULL(output_tensor);
+    CHECK_NOTNULL(output_mkl_shape);
+
+    TensorShape output_tf_shape;
+
+    if (input_mkl_shape.IsMklTensor()) {
+      // Since input tensor is in Mkl layout, output tensor will be in Mkl
+      // layout.
+
+      // Allocate shape of Mkl tensor.
+      output_mkl_shape->SetMklTensor(true);
+      output_mkl_shape->SetMklLayout(output_pd);
+      output_mkl_shape->SetElemType(MklDnnType<T>());
+      output_mkl_shape->SetTfLayout(input_mkl_shape.GetDimension(), output_dims,
+                                    input_mkl_shape.GetTfDataFormat());
+
+      output_tf_shape.AddDim((output_pd->get_size() / sizeof(T)) + 1);
+    } else {
+      // If input is not in Mkl layout, then output won't be in Mkl layout.
+      output_mkl_shape->SetMklTensor(false);
+      output_tf_shape = MklDnnDimsToTFShape(output_dims);
+    }
+
+    AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                              *output_mkl_shape);
+  }
+};
+
+// MKL-DNN Slice registration
+#define REGISTER_MKL_SLICE(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("_MklSlice")                         \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .HostMemory("begin")                  \
+                              .HostMemory("size")                   \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklDnnSliceOp<CPUDevice, type>);
+
+TF_CALL_float(REGISTER_MKL_SLICE);
+#undef REGISTER_MKL_SLICE
+
+}  // namespace tensorflow
+
+#endif  // INTEL_MKL_DNN
+#endif  // INTEL_MKL
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 7dbb18aa5d..18cc529a9b 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1648,6 +1648,60 @@ REGISTER_OP("Slice")
       return Status::OK();
     });
 
+#ifdef INTEL_MKL
+REGISTER_OP("_MklSlice")
+    .Input("input: T")
+    .Input("begin: Index")
+    .Input("size: Index")
+    .Input("mkl_input: uint8")
+    .Input("mkl_begin: uint8")
+    .Input("mkl_size: uint8")
+    .Output("output: T")
+    .Output("mkl_output: uint8")
+    .Attr("T: type")
+    .Attr("Index: {int32,int64}")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input = c->input(0);
+      ShapeHandle begin_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &begin_shape));
+      ShapeHandle sizes_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &sizes_shape));
+
+      // Merge to check compatibility of begin and sizes tensors.
+      TF_RETURN_IF_ERROR(c->Merge(begin_shape, sizes_shape, &begin_shape));
+
+      DimensionHandle ndims = c->Dim(begin_shape, 0);
+      if (c->ValueKnown(ndims)) {
+        TF_RETURN_IF_ERROR(c->WithRank(input, c->Value(ndims), &input));
+      }
+
+      // NOTE(mrry): Use MakeShapeFromShapeTensor to handle partially-known
+      // values, even though the `begin` value does not represent a shape.
+      ShapeHandle begin_value;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &begin_value));
+
+      // NOTE(mrry): We can't use `MakeShapeFromShapeTensor` for `sizes` because
+      // it might contain -1, which can't be represented -1 in the ShapeHandle
+      // would meqan "unknown".
+      const Tensor* sizes_value = c->input_tensor(3);
+
+      if (sizes_value != nullptr) {
+        TF_RETURN_IF_ERROR(
+            c->WithRank(begin_value, sizes_value->NumElements(), &begin_value));
+        std::vector<DimensionHandle> dims;
+        // If the begin and sizes tensors are available, then
+        // we can be precise about the shape of the output.
+        if (sizes_value->dtype() == DT_INT64) {
+          TF_RETURN_IF_ERROR(
+              SliceHelper<int64>(c, begin_value, sizes_value, &dims));
+        } else {
+          TF_RETURN_IF_ERROR(
+              SliceHelper<int32>(c, begin_value, sizes_value, &dims));
+        }
+      }
+    });
+#endif
+
 REGISTER_OP("StridedSlice")
     .Input("input: T")
     .Input("begin: Index")
-- 
GitLab


From 4644d186e1cc5862d152a6be4df4443c46f76b4a Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Tue, 4 Sep 2018 07:28:44 -0700
Subject: [PATCH 0073/1357] Added the api for the bridge

---
 third_party/ngraph/ngraph_tf.BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index c1221cc385..a8d1fdc194 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -10,6 +10,8 @@ load(
 cc_library(
     name = "ngraph_tf",
     srcs = [
+        "src/ngraph_api.h",
+        "src/ngraph_api.cc",
         "src/ngraph_assign_clusters.h",
         "src/ngraph_assign_clusters.cc",
         "src/ngraph_builder.h",
-- 
GitLab


From a65c6c17d0705fe11be6f33f63a677106bf26ffb Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Tue, 4 Sep 2018 12:34:14 -0700
Subject: [PATCH 0074/1357] Updated the unit test files

---
 third_party/ngraph/ngraph_tf.BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index a8d1fdc194..7577a4014d 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -68,6 +68,8 @@ tf_cc_test(
         "test/conversions.cpp",
         "test/padding.cpp",
         "test/graph_rewrites/assign_clusters.cc",
+        "test/test_utilities.h",
+        "test/test_utilities.cpp",
         "test/main.cpp",
     ],
     deps = [
-- 
GitLab


From fa20b59b920233d35bb8da3fbc3c234c369a8291 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 4 Sep 2018 14:20:40 -0700
Subject: [PATCH 0075/1357] Move CUDA-specific occupancy calculation into
 proper file

-Maintain functionality, just move CalculateOccupancy() and CompareOccupancy() methods from device_description to cuda_gpu_executor
-Remove CUDA requirement in general class device_description
---
 .../stream_executor/cuda/cuda_gpu_executor.cc | 37 +++++++++++++++++++
 .../stream_executor/cuda/cuda_gpu_executor.h  | 11 ++++++
 .../stream_executor/device_description.cc     | 32 ----------------
 .../stream_executor/device_description.h      | 17 ---------
 4 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 39b0696c93..458c0e3030 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -490,6 +490,43 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
   }
 }
 
+// Compute and return maximum blocks per core (occupancy) based on the
+// device description, some kernel characteristics and the number of threads per
+// block.  If unable to compute occupancy, zero is returned.
+int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  return suggested_blocks;
+}
+
+// Compute and return the suggested thread count to acheive ideal occupancy.
+// If the provided thread dimensions match this number, zero is returned.
+int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  if (suggested_blocks > *initial_blocks) {
+    *initial_blocks = suggested_blocks;
+    return suggested_threads;
+  } else {
+    return 0;
+  }
+}
+
 void *CUDAExecutor::Allocate(uint64 size) {
   return CUDADriver::DeviceAllocate(context_, size);
 }
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 8a954d5461..e8ebbc3220 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -70,6 +70,17 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
               const BlockDim &block_dims, const KernelBase &k,
               const KernelArgsArrayBase &args) override;
 
+  int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func);
+
+  int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func);
+
   void *Allocate(uint64 size) override;
 
   void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,
diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc
index df52ce6cce..726c4adf74 100644
--- a/tensorflow/stream_executor/device_description.cc
+++ b/tensorflow/stream_executor/device_description.cc
@@ -157,36 +157,4 @@ static uint64 RoundDown(uint64 value, uint64 n) {
   return port::MathUtil::FloorOfRatio(value, n) * n;
 }
 
-int CalculateOccupancy(const DeviceDescription& device_description,
-                       uint64 registers_per_thread,
-                       uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func) {
-  int suggested_blocks = 0;
-  int suggested_threads = 0;
-  CUresult err =
-      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
-                                       func, NULL, shared_memory_per_block, 0);
-  CHECK_EQ(err, CUDA_SUCCESS);
-  return suggested_blocks;
-}
-
-int CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func) {
-  int suggested_blocks = 0;
-  int suggested_threads = 0;
-  CUresult err =
-      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
-                                       func, NULL, shared_memory_per_block, 0);
-  CHECK_EQ(err, CUDA_SUCCESS);
-  if (suggested_blocks > *initial_blocks) {
-    *initial_blocks = suggested_blocks;
-    return suggested_threads;
-  } else {
-    return 0;
-  }
-}
-
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h
index d335b9b875..b15ce31216 100644
--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -24,7 +24,6 @@ limitations under the License.
 #include <memory>
 #include "tensorflow/stream_executor/platform/port.h"
 
-#include "tensorflow/stream_executor/cuda/cuda_driver.h"
 #include "tensorflow/stream_executor/launch_dim.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
@@ -324,22 +323,6 @@ void CalculateDimensionality(const DeviceDescription &device_description,
                              uint64 element_count, uint64 *threads_per_block,
                              uint64 *block_count);
 
-// Compute and return maximum blocks per core (occupancy) based on the
-// device description, some kernel characteristics and the number of threads per
-// block.  If unable to compute occupancy, zero is returned.
-int CalculateOccupancy(const DeviceDescription& device_description,
-                       uint64 registers_per_thread,
-                       uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func);
-
-// Compute and return the suggested thread count to acheive ideal occupancy.
-// If the provided thread dimensions match this number, zero is returned.
-int CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func);
-
 }  // namespace stream_executor
 
 #endif  // TENSORFLOW_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_
-- 
GitLab


From cd6597b8fcd82b51ddb47a297972a1614c2a5d78 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 4 Sep 2018 16:17:40 -0700
Subject: [PATCH 0076/1357] Fixed transition typo

---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 458c0e3030..a961e9a6c4 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -493,7 +493,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
 // Compute and return maximum blocks per core (occupancy) based on the
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
-int CalculateOccupancy(const DeviceDescription& device_description,
+int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description,
                        uint64 registers_per_thread,
                        uint64 shared_memory_per_block,
                        const ThreadDim& thread_dims, CUfunction func) {
@@ -508,7 +508,7 @@ int CalculateOccupancy(const DeviceDescription& device_description,
 
 // Compute and return the suggested thread count to acheive ideal occupancy.
 // If the provided thread dimensions match this number, zero is returned.
-int CompareOccupancy(int* initial_blocks,
+int CUDAExecutor::CompareOccupancy(int* initial_blocks,
                      const DeviceDescription& device_description,
                      uint64 registers_per_thread,
                      uint64 shared_memory_per_block,
-- 
GitLab


From 475b7715f16ad0f94fa9986a0eefc1b2cf2044bd Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 4 Sep 2018 16:31:01 -0700
Subject: [PATCH 0077/1357] Recommended typo fix

---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index a961e9a6c4..ce2f1ce3ae 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -483,7 +483,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
       CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread,
                        smem_per_block, thread_dims, cufunc);
   if (suggested_threads != 0) {
-    VLOG(2) << "The cuda occupancy calculator reccommends using "
+    VLOG(2) << "The cuda occupancy calculator recommends using "
             << suggested_threads
             << " threads per block to acheive an occupancy of " << blocks_per_sm
             << " blocks per SM.";
-- 
GitLab


From a95281ce1b449d8f92a3799ff9c1dbf661b70bc4 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 5 Sep 2018 09:02:40 +0800
Subject: [PATCH 0078/1357] Avoid golden API file changing.

---
 tensorflow/cc/gradients/nn_grad_test.cc                  | 3 +--
 tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt | 1 +
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt          | 4 ----
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index d8c2a1a0fc..f5a09e09dc 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -42,7 +42,6 @@ using ops::MaxPoolV2;
 using ops::Placeholder;
 using ops::Relu;
 using ops::Relu6;
-using ops::LeakyRelu;
 using ops::Selu;
 using ops::Softmax;
 using ops::Softplus;
@@ -165,7 +164,7 @@ TEST_F(NNGradTest, Relu6Grad) {
 TEST_F(NNGradTest, LeakyReluGrad) {
   TensorShape shape({5, 2});
   auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
-  auto y = LeakyRelu(scope_, x);
+  auto y = ops::internal::LeakyRelu(scope_, x);
   // Avoid input values where Leaky ReLU gradient is not well defined (around
   // zero).
   Tensor x_init_value = test::AsTensor<float>(
diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
index 4a61889f54..280148e032 100644
--- a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
@@ -1,4 +1,5 @@
 op {
   graph_op_name: "LeakyRelu"
+  visibility: HIDDEN
   summary: "Computes rectified linear: `max(features, features * alpha)`."
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 9e8d320f06..4de662fe33 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1324,10 +1324,6 @@ tf_module {
     name: "lbeta"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "leaky_relu"
-    argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], "
-  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From 89979f42e827d9eb5c349259a5aa2ec32d38c86a Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 1 Sep 2018 16:07:46 +0000
Subject: [PATCH 0079/1357] Fix MPI build failure caused by StringPiece ->
 absl::string_view

This fix tries to fix the MPI build failure caused by
StringPiece -> absl::string_view.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc | 4 ++--
 tensorflow/contrib/mpi/mpi_rendezvous_mgr.h  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
index 6a7f5efecd..e195cca647 100644
--- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
+++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
@@ -136,7 +136,7 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync(
 
   MPIRendezvousMgr* mgr =
       reinterpret_cast<MPIRendezvousMgr*>(this->rendezvous_mgr_);
-  mgr->QueueRequest(parsed.FullKey().ToString(), step_id_,
+  mgr->QueueRequest(string(parsed.FullKey()), step_id_,
                     std::move(request_call), rendezvous_call);
 }
 
@@ -258,7 +258,7 @@ void MPIRendezvousMgr::AddRequest(RecvTensorRequest request,
         std::function<MPISendTensorCall*()> res = std::bind(
             send_cb, status, send_args, recv_args, val, is_dead, mpi_send_call);
 
-        SendQueueEntry req(parsed.FullKey().ToString().c_str(), std::move(res));
+        SendQueueEntry req(string(parsed.FullKey()), std::move(res));
 
         this->QueueSendRequest(req);
 
diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h
index 5596601ddb..90140fcab3 100644
--- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h
+++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h
@@ -71,7 +71,7 @@ class MPISendTensorCall {
 
   void Init(const Rendezvous::ParsedKey& parsed, const int64 step_id,
             const bool is_dead) {
-    mRes_.set_key(parsed.FullKey().ToString());
+    mRes_.set_key(string(parsed.FullKey()));
     mRes_.set_step_id(step_id);
     mRes_.mutable_response()->set_is_dead(is_dead);
     mRes_.mutable_response()->set_send_start_micros(
-- 
GitLab


From 39e324505c380c9d449dc31d34629a9d470c765f Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Tue, 4 Sep 2018 15:01:22 +0800
Subject: [PATCH 0080/1357] Add //tensorflow:install_headers target

Used to prepare all the header files so they can easily be installed
into /usr/include when packaging TF.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tensorflow/BUILD         | 28 ++++++++++++++++++++++++++++
 tensorflow/cc/BUILD      | 28 ++++++++++++++++++++++++++--
 tensorflow/core/BUILD    | 19 ++++++++++++++++---
 third_party/eigen3/BUILD | 10 ++--------
 4 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 661cba5ff0..768d4107d8 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -617,3 +617,31 @@ py_library(
     visibility = ["//visibility:public"],
     deps = ["//tensorflow/python:no_contrib"],
 )
+
+genrule(
+    name = "install_headers",
+    srcs = [
+        "//tensorflow/c:headers",
+        "//tensorflow/c/eager:headers",
+        "//tensorflow/cc:headers",
+        "//tensorflow/core:headers",
+    ],
+    outs = ["include"],
+    cmd = """
+    mkdir $@
+    for f in $(SRCS); do
+      d="$${f%/*}"
+      d="$${d#bazel-out*genfiles/}"
+      d="$${d#*external/eigen_archive/}"
+
+      if [[ $${d} == *local_config_* ]]; then
+        continue
+      fi
+
+      mkdir -p "$@/$${d}"
+      cp "$${f}" "$@/$${d}/"
+    done
+    """,
+    tags = ["manual"],
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index f56521dac0..b587e63227 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -10,11 +10,12 @@ licenses(["notice"])  # Apache 2.0
 
 load(
     "//tensorflow:tensorflow.bzl",
-    "tf_cc_test",
+    "cc_library_with_android_deps",
     "tf_cc_binary",
+    "tf_cc_test",
     "tf_copts",
     "tf_gen_op_wrappers_cc",
-    "cc_library_with_android_deps",
+    "transitive_hdrs",
 )
 
 cc_library(
@@ -716,3 +717,26 @@ tf_cc_test(
         "//tensorflow/core:testlib",
     ],
 )
+
+transitive_hdrs(
+    name = "headers",
+    visibility = ["//tensorflow:__subpackages__"],
+    deps = [
+        ":cc_ops",
+        ":client_session",
+        ":coordinator",
+        ":gradient_checker",
+        ":gradients",
+        ":ops",
+        ":queue_runner",
+        ":remote_fused_graph_ops",
+        ":scope",
+        "//tensorflow/cc/profiler",
+        "//tensorflow/cc/saved_model:constants",
+        "//tensorflow/cc/saved_model:loader",
+        "//tensorflow/cc/saved_model:reader",
+        "//tensorflow/cc/saved_model:signature_constants",
+        "//tensorflow/cc/saved_model:tag_constants",
+        "//tensorflow/cc/tools:freeze_saved_model",
+    ],
+)
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 5c314f359c..d5d4aad541 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -85,11 +85,12 @@ load(
     "tf_cc_tests",
     "tf_copts",
     "tf_cuda_library",
+    "tf_features_nomodules_if_android",
     "tf_gen_op_libs",
     "tf_generate_proto_text_sources",
     "tf_genrule_cmd_append_to_srcs",
     "tf_opts_nortti_if_android",
-    "tf_features_nomodules_if_android",
+    "transitive_hdrs",
 )
 load("//tensorflow:tensorflow.bzl", "tf_cc_test_mkl")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu")
@@ -120,16 +121,16 @@ load(
     "tf_additional_libdevice_srcs",
     "tf_additional_minimal_lib_srcs",
     "tf_additional_mpi_lib_defines",
-    "tf_additional_proto_hdrs",
     "tf_additional_proto_compiler_hdrs",
+    "tf_additional_proto_hdrs",
     "tf_additional_proto_srcs",
     "tf_additional_test_deps",
     "tf_additional_test_srcs",
     "tf_additional_verbs_lib_defines",
     "tf_jspb_proto_library",
     "tf_kernel_tests_linkstatic",
-    "tf_lib_proto_parsing_deps",
     "tf_lib_proto_compiler_deps",
+    "tf_lib_proto_parsing_deps",
     "tf_nano_proto_library",
     "tf_platform_hdrs",
     "tf_platform_srcs",
@@ -4691,6 +4692,18 @@ cc_library(
     ] + tf_additional_libdevice_deps(),
 )
 
+transitive_hdrs(
+    name = "headers",
+    visibility = ["//tensorflow:__subpackages__"],
+    deps = [
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:stream_executor",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 # Google-internal targets go here (must be at the end).
 
diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD
index 203991b50f..f072f2545a 100644
--- a/third_party/eigen3/BUILD
+++ b/third_party/eigen3/BUILD
@@ -66,19 +66,13 @@ genrule(
     outs = ["include"],
     cmd = """
     mkdir $@
-    for f in $(locations @eigen_archive//:eigen_header_files) ; do
+    for f in $(SRCS); do
       d="$${f%/*}"
       d="$${d#*external/eigen_archive/}"
 
       mkdir -p "$@/$${d}"
       cp "$${f}" "$@/$${d}/"
     done
-
-    for f in $(locations :eigen_third_party_header_files) ; do
-      d="$${f%/*}"
-
-      mkdir -p "$@/$${d}"
-      cp "$${f}" "$@/$${d}/"
-    done
     """,
+    tags = ["manual"],
 )
-- 
GitLab


From 69d3b8faf41791834301a74a05e288964940427d Mon Sep 17 00:00:00 2001
From: "Wen-Heng (Jack) Chung" <whchung@gmail.com>
Date: Fri, 22 Jun 2018 23:09:43 -0500
Subject: [PATCH 0081/1357] [ROCm] bazel build system and continuous
 integration logic

The commit contains following components to support TensorFlow on ROCm platform

- bazel build system
- continuous integration logic

Authors:

- Jack Chung: jack.chung@amd.com
- Jeffrey Poznanovic: Jeffrey.Poznanovic@amd.com
- Peng Sun: Peng.Sun@amd.com
---
 configure.py                                  |  20 +
 tensorflow/core/BUILD                         |   4 +-
 tensorflow/core/kernels/BUILD                 |   3 +-
 tensorflow/tensorflow.bzl                     |  67 +-
 tensorflow/tools/ci_build/Dockerfile.rocm     |  97 +++
 .../tools/ci_build/builds/docker_test.sh      |   9 +-
 tensorflow/tools/ci_build/builds/pip.sh       |   4 +-
 .../tools/ci_build/builds/with_the_same_user  |   6 +
 tensorflow/tools/ci_build/ci_build.sh         |  11 +-
 .../tools/ci_build/linux/cpu/run_cc_core.sh   |   1 +
 .../tools/ci_build/linux/cpu/run_py2_core.sh  |   1 +
 .../ci_build/linux/cpu/run_py3_contrib.sh     |   1 +
 .../tools/ci_build/linux/cpu/run_py3_core.sh  |   1 +
 .../tools/ci_build/linux/libtensorflow.sh     |   3 +
 .../tools/ci_build/linux/libtensorflow_cpu.sh |   1 +
 .../ci_build/linux/libtensorflow_docker.sh    |   6 +
 .../ci_build/linux/libtensorflow_rocm.sh      |  22 +
 .../tools/ci_build/linux/rocm/run_cc_core.sh  |  39 ++
 .../tools/ci_build/linux/rocm/run_py3_core.sh |  39 ++
 .../tools/ci_build/osx/cpu/run_py2_cc_core.sh |   1 +
 .../tools/ci_build/osx/libtensorflow_cpu.sh   |   1 +
 .../tools/ci_build/osx/libtensorflow_gpu.sh   |   1 +
 .../tools/ci_build/osx/libtensorflow_rocm.sh  |  36 +
 .../tools/ci_build/xla/linux/rocm/run_py3.sh  |  41 ++
 tensorflow/workspace.bzl                      |   2 +
 .../gpus/crosstool/CROSSTOOL_hipcc.tpl        | 158 +++++
 .../bin/crosstool_wrapper_driver_rocm.tpl     | 241 +++++++
 third_party/gpus/rocm/BUILD                   |   0
 third_party/gpus/rocm/BUILD.tpl               |  99 +++
 third_party/gpus/rocm/build_defs.bzl.tpl      |  32 +
 third_party/gpus/rocm/rocm_config.h.tpl       |  21 +
 third_party/gpus/rocm_configure.bzl           | 663 ++++++++++++++++++
 tools/bazel.rc                                |   3 +
 33 files changed, 1611 insertions(+), 23 deletions(-)
 create mode 100644 tensorflow/tools/ci_build/Dockerfile.rocm
 create mode 100755 tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
 create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
 create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
 create mode 100755 tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
 create mode 100755 tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
 create mode 100644 third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
 create mode 100755 third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
 create mode 100644 third_party/gpus/rocm/BUILD
 create mode 100644 third_party/gpus/rocm/BUILD.tpl
 create mode 100644 third_party/gpus/rocm/build_defs.bzl.tpl
 create mode 100644 third_party/gpus/rocm/rocm_config.h.tpl
 create mode 100644 third_party/gpus/rocm_configure.bzl

diff --git a/configure.py b/configure.py
index 361bd4764d..4f998511aa 100644
--- a/configure.py
+++ b/configure.py
@@ -1521,6 +1521,13 @@ def main():
     else:
       set_trisycl_include_dir(environ_cp)
 
+  set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
+  if (environ_cp.get('TF_NEED_ROCM') == '1' and
+      'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(
+      'LD_LIBRARY_PATH') != '1'):
+      write_action_env_to_bazelrc('LD_LIBRARY_PATH',
+                                  environ_cp.get('LD_LIBRARY_PATH'))
+
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
@@ -1561,6 +1568,19 @@ def main():
       write_to_bazelrc('build --config=download_clang')
       write_to_bazelrc('test --config=download_clang')
 
+  # SYCL / ROCm / CUDA are mutually exclusive.
+  # At most 1 GPU platform can be configured.
+  gpu_platform_count = 0
+  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_ROCM') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_CUDA') == '1':
+    gpu_platform_count += 1
+  if gpu_platform_count >= 2:
+    raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
+                         'At most 1 GPU platform can be configured.')
+
   set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
   if environ_cp.get('TF_NEED_MPI') == '1':
     set_mpi_home(environ_cp)
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index c06fea130f..d5dfb8c813 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -146,7 +146,7 @@ load(
     "if_static",
     "tf_cuda_tests_tags",
 )
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured")
 load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
 load(
     "//third_party/mkl:build_defs.bzl",
@@ -2941,7 +2941,7 @@ tf_cuda_library(
         "platform/device_tracer.h",
     ],
     copts = tf_copts(),
-    cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
+    cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()),
     visibility = ["//visibility:private"],
     deps = [
         ":core_cpu_internal",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 25063ac823..68fa8fa481 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -55,7 +55,8 @@ load(
     "if_mkl_ml",
     "mkl_deps",
 )
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm", "if_rocm_is_configured")
 
 config_setting(
     # Add "--define tensorflow_xsmm=1" to your build command to use libxsmm for
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index adac895a17..f51a628ca3 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -17,8 +17,15 @@ load(
 )
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
-    "cuda_default_copts",
     "if_cuda",
+    "if_cuda_is_configured",
+    "cuda_default_copts",
+)
+load(
+    "@local_config_rocm//rocm:build_defs.bzl",
+    "if_rocm",
+    "if_rocm_is_configured",
+    "rocm_default_copts",
 )
 load(
     "//third_party/mkl:build_defs.bzl",
@@ -860,12 +867,14 @@ def tf_cuda_only_cc_test(
         srcs = srcs + tf_binary_additional_srcs(),
         size = size,
         args = args,
-        copts = _cuda_copts() + tf_copts(),
+        copts = _cuda_copts() + _rocm_copts() + tf_copts(),
         data = data + tf_binary_dynamic_kernel_dsos(kernels),
-        deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([
-            clean_dep("//tensorflow/core:cuda"),
-            clean_dep("//tensorflow/core:gpu_lib"),
-        ]),
+        deps = deps + tf_binary_dynamic_kernel_deps(kernels) +
+            if_cuda_is_configured([
+                clean_dep("//tensorflow/core:cuda"),
+                clean_dep("//tensorflow/core:gpu_lib")]) +
+            if_rocm_is_configured([
+                clean_dep("//tensorflow/core:gpu_lib")]),
         linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
         linkstatic = linkstatic or select({
             # cc_tests with ".so"s in srcs incorrectly link on Darwin
@@ -1000,7 +1009,7 @@ register_extension_info(
     label_regex_for_dep = "{extension_name}",
 )
 
-def _cuda_copts():
+def _cuda_copts(opts = []):
     """Gets the appropriate set of copts for (maybe) CUDA compilation.
 
       If we're doing CUDA compilation, returns copts for our particular CUDA
@@ -1016,13 +1025,31 @@ def _cuda_copts():
         "@local_config_cuda//cuda:using_clang": ([
             "-fcuda-flush-denormals-to-zero",
         ]),
-    })
+    }) + if_cuda_is_configured(opts)
+
+def _rocm_copts(opts = []):
+    """Gets the appropriate set of copts for (maybe) ROCm compilation.
+
+      If we're doing ROCm compilation, returns copts for our particular ROCm
+      compiler.  If we're not doing ROCm compilation, returns an empty list.
+
+      """
+    return rocm_default_copts() + select({
+        "//conditions:default": [],
+        "@local_config_rocm//rocm:using_hipcc": ([
+            "",
+        ])
+    }) + if_rocm_is_configured(opts)
 
 # Build defs for TensorFlow kernels
 
 # When this target is built using --config=cuda, a cc_library is built
 # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
 # libraries needed by GPU kernels.
+#
+# When this target is built using --config=rocm, a cc_library is built
+# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional
+# libraries needed by GPU kernels.
 def tf_gpu_kernel_library(
         srcs,
         copts = [],
@@ -1030,16 +1057,18 @@ def tf_gpu_kernel_library(
         deps = [],
         hdrs = [],
         **kwargs):
-    copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
+    copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + _rocm_copts(opts = cuda_copts)
     kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
 
     native.cc_library(
         srcs = srcs,
         hdrs = hdrs,
         copts = copts,
-        deps = deps + if_cuda([
+        deps = deps + if_cuda_is_configured([
             clean_dep("//tensorflow/core:cuda"),
             clean_dep("//tensorflow/core:gpu_lib"),
+        ]) + if_rocm_is_configured([
+            clean_dep("//tensorflow/core:gpu_lib"),
         ]),
         alwayslink = 1,
         **kwargs
@@ -1075,11 +1104,13 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
 
     kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
     native.cc_library(
-        deps = deps + if_cuda(cuda_deps + [
+        deps = deps + if_cuda_is_configured(cuda_deps + [
             clean_dep("//tensorflow/core:cuda"),
-            "@local_config_cuda//cuda:cuda_headers",
+            "@local_config_cuda//cuda:cuda_headers"
+        ]) + if_rocm_is_configured(cuda_deps + [
+            "@local_config_rocm//rocm:rocm_headers"
         ]),
-        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
+        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
                  if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
                  if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
         **kwargs
@@ -1459,6 +1490,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
         "@local_config_cuda//cuda:cuda_headers",
         "@local_config_cuda//cuda:cudart_static",
     ]
+    rocm_deps = [
+        clean_dep("//tensorflow/core:stream_executor_headers_lib"),
+    ]
     deps = deps + tf_custom_op_library_additional_deps()
     if gpu_srcs:
         basename = name.split(".")[0]
@@ -1467,13 +1501,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
             srcs = gpu_srcs,
             copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
             features = if_cuda(["-use_header_modules"]),
-            deps = deps + if_cuda(cuda_deps),
+            deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps)
         )
         cuda_deps.extend([":" + basename + "_gpu"])
+        rocm_deps.extend([":" + basename + "_gpu"])
 
     check_deps(
         name = name + "_check_deps",
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
         disallowed_deps = [
             clean_dep("//tensorflow/core:framework"),
             clean_dep("//tensorflow/core:lib"),
@@ -1482,7 +1517,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
     tf_cc_shared_object(
         name = name,
         srcs = srcs,
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
         data = if_static([name + "_check_deps"]),
         copts = tf_copts(is_external = True),
         features = ["windows_export_all_symbols"],
diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
new file mode 100644
index 0000000000..aadaa8bac1
--- /dev/null
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -0,0 +1,97 @@
+# This Dockerfile provides a starting point for a ROCm installation of 
+# MIOpen and tensorflow.  
+FROM ubuntu:xenial
+MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
+
+ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/
+ARG ROCM_PATH=/opt/rocm
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV TF_NEED_ROCM 1
+ENV HOME /root/
+RUN apt update && apt install -y wget software-properties-common 
+
+# Add rocm repository
+RUN apt-get clean all
+RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
+RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
+
+# Install misc pkgs
+RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+  build-essential \
+  clang-3.8 \
+  clang-format-3.8 \
+  clang-tidy-3.8 \
+  cmake \
+  cmake-qt-gui \
+  ssh \
+  curl \
+  apt-utils \
+  pkg-config \
+  g++-multilib \
+  git \
+  libunwind-dev \
+  libfftw3-dev \
+  libelf-dev \
+  libncurses5-dev \
+  libpthread-stubs0-dev \
+  vim \
+  gfortran \
+  libboost-program-options-dev \
+  libssl-dev \
+  libboost-dev \
+  libboost-system-dev \
+  libboost-filesystem-dev \
+  rpm \
+  libnuma-dev \
+  virtualenv \
+  python-pip \
+  python3-pip \
+  wget && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install rocm pkgs
+RUN apt-get update --allow-insecure-repositories && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
+    rocm-dev rocm-libs rocm-utils \
+    rocfft miopen-hip miopengemm rocblas hipblas rocrand \
+    rocm-profiler cxlactivitylogger && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
+RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
+
+ENV HCC_HOME=$ROCM_PATH/hcc
+ENV HIP_PATH=$ROCM_PATH/hip
+ENV OPENCL_ROOT=$ROCM_PATH/opencl
+ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
+ENV PATH="$ROCM_PATH/bin:${PATH}"
+ENV PATH="$OPENCL_ROOT/bin:${PATH}"
+
+# Add target file to help determine which device(s) to build for
+RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
+
+# Setup environment variables, and add those environment variables at the end of ~/.bashrc 
+ARG HCC_HOME=/opt/rocm/hcc
+ARG HIP_PATH=/opt/rocm/hip
+ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
+
+# Copy and run the install scripts.
+COPY install/*.sh /install/
+ARG DEBIAN_FRONTEND=noninteractive
+RUN /install/install_bootstrap_deb_packages.sh
+RUN add-apt-repository -y ppa:openjdk-r/ppa && \
+    add-apt-repository -y ppa:george-edison55/cmake-3.x
+RUN /install/install_deb_packages.sh
+RUN /install/install_pip_packages.sh
+RUN /install/install_bazel.sh
+RUN /install/install_golang.sh
+
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
+
+# Configure the build for our CUDA configuration.
+ENV TF_NEED_ROCM 1
+
diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh
index e337ea4b05..38891b60e5 100755
--- a/tensorflow/tools/ci_build/builds/docker_test.sh
+++ b/tensorflow/tools/ci_build/builds/docker_test.sh
@@ -19,7 +19,7 @@
 #
 # Usage: docker_test.sh <IMAGE_TYPE> <TAG> <WHL_PATH>
 # Arguments:
-#   IMAGE_TYPE : Type of the image: (CPU|GPU)
+#   IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM)
 #   TAG        : Docker image tag
 #   WHL_PATH   : Path to the whl file to be installed inside the docker image
 #
@@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then
   DOCKERFILE="tensorflow/tools/docker/Dockerfile"
 elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then
   DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu"
+elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then
+  DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm"
 else
   die "Unrecognized image type: $1"
 fi
@@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
   devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
   libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
   GPU_EXTRA_PARAMS="${devices} ${libs}"
+elif [ "${IMAGE_TYPE}" == "rocm" ]; then
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
 else
   GPU_EXTRA_PARAMS=""
+  ROCM_EXTRA_PARAMS=""
 fi
 
 # Run docker image with source directory mapped
 docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \
-${GPU_EXTRA_PARAMS} \
+${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \
 "${DOCKER_IMG_TAG}" \
 /bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\
 "tensorflow/tools/ci_build/builds/test_tutorials.sh && "\
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index fef121ab5a..6543779022 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}"
 PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
 GPU_FLAG=""
 if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
+   [[ ${CONTAINER_TYPE} == "rocm" ]] || \
    [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
   bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \
       die "Build failed."
@@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then
       die "ERROR: Cannot find repaired wheel."
     fi
   # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
-  elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
+  elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \
+       [[ ${CONTAINER_TYPE} == "rocm" ]]; then
     WHL_PATH=${AUDITED_WHL_NAME}
     cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH}
     echo "Copied manylinx1 wheel file at ${WHL_PATH}"
diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user
index b216e3549f..1cc5aed15d 100755
--- a/tensorflow/tools/ci_build/builds/with_the_same_user
+++ b/tensorflow/tools/ci_build/builds/with_the_same_user
@@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
 usermod -a -G sudo "${CI_BUILD_USER}"
 echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
 
+if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
+  # ROCm requires the video group in order to use the GPU for compute. If it
+  # exists on the host, add it to the container.
+  getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
+fi
+
 if [ -e /root/.bazelrc ]; then
   cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
   chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 77265e0f50..eab0616513 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -18,7 +18,7 @@
 #                    <COMMAND>
 #
 # CONTAINER_TYPE: Type of the docker container used the run the build:
-#                 e.g., (cpu | gpu | android | tensorboard)
+#                 e.g., (cpu | gpu | rocm | android | tensorboard)
 #
 # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build.
 #                  If this optional value is not supplied (via the
@@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then
   GPU_EXTRA_PARAMS=""
 fi
 
+# Add extra params for rocm devices and libraries for ROCm container.
+if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
+else
+  ROCM_EXTRA_PARAMS=""
+fi
+
+
 # Determine the docker image name
 DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}"
 
@@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
     -v ${WORKSPACE}:/workspace \
     -w /workspace \
     ${GPU_EXTRA_PARAMS} \
+    ${ROCM_EXTRA_PARAMS} \
     ${CI_DOCKER_EXTRA_PARAMS[@]} \
     "${DOCKER_IMG_NAME}" \
     ${CI_COMMAND_PREFIX[@]} \
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
index 8eeddcdb82..3b5c92d148 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 # Only running cc tests, python version does not matter.
 export PYTHON_BIN_PATH=`which python`
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
index 8eca1987f0..52eff6330f 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python2`
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
index f6fa9251d4..d12027599a 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python3`
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
index 51eb2cd7e6..7c531a4d68 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python3`
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow.sh b/tensorflow/tools/ci_build/linux/libtensorflow.sh
index beef8e063b..3b6e15feb9 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow.sh
@@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-"
 if [ "${TF_NEED_CUDA}" == "1" ]; then
   SUFFIX="-gpu-linux-"
 fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+  SUFFIX="-rocm-linux-"
+fi
 
 build_libtensorflow_tarball "${SUFFIX}$(uname -m)"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
index 4bf34dd299..b76262b6e9 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
@@ -19,4 +19,5 @@
 set -ex
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 "${SCRIPT_DIR}/libtensorflow_docker.sh"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
index 60c974c36b..467b8dc808 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then
   DOCKER_BINARY="nvidia-docker"
   DOCKER_FILE="Dockerfile.gpu"
 fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+  DOCKER_IMAGE="tf-tensorflow-rocm"
+  DOCKER_BINARY="docker"
+  DOCKER_FILE="Dockerfile.rocm"
+fi
 
 docker build \
   -t "${DOCKER_IMAGE}" \
@@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \
   -e "TF_NEED_HDFS=0" \
   -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
   -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \
+  -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \
   -e "TF_NEED_OPENCL_SYCL=0" \
   "${DOCKER_IMAGE}" \
   "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
new file mode 100755
index 0000000000..c1ebbe3630
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to build a binary releases of libtensorflow with GPU support.
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+export TF_NEED_ROCM=1
+"${SCRIPT_DIR}/libtensorflow_docker.sh"
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
new file mode 100755
index 0000000000..200089f90e
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+    --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
new file mode 100755
index 0000000000..1d0b838c1b
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+    --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
index c7cc16e669..adee0d3171 100755
--- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
+++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
@@ -27,6 +27,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=$(which python2)
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
index 9ae5fc6bea..06798adc03 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
index d95fcdeb85..95f1992d7d 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@@ -27,6 +27,7 @@ export TF_NEED_CUDA=1
 export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}"
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_HDFS=0
+export TF_NEED_ROCM=0
 export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
new file mode 100755
index 0000000000..aeabc0e39e
--- /dev/null
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to produce binary release of libtensorflow (C API, Java jars etc.).
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# See comments at the top of this file for details.
+source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
+
+# Configure script
+export TF_NEED_ROCM=1
+export PYTHON_BIN_PATH="/usr/bin/python"
+export TF_NEED_GCP=0
+export TF_NEED_HDFS=0
+export TF_NEED_CUDA=0
+export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_MKL=0
+export COMPUTECPP_PATH="/usr/local"
+
+export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
+build_libtensorflow_tarball "-gpu-darwin-$(uname -m)"
diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
new file mode 100755
index 0000000000..a0de128020
--- /dev/null
+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
+
+bazel clean
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \
+    --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 \
+    --config=xla -- \
+    //tensorflow/compiler/...
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 1e7c5d6790..87d1243563 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -1,6 +1,7 @@
 # TensorFlow external dependencies that can be loaded in WORKSPACE files.
 
 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
+load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
 load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure")
 load("//third_party:nccl/nccl_configure.bzl", "nccl_configure")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
@@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     sycl_configure(name = "local_config_sycl")
     syslibs_configure(name = "local_config_syslibs")
     python_configure(name = "local_config_python")
+    rocm_configure(name="local_config_rocm")
 
     initialize_third_party()
 
diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
new file mode 100644
index 0000000000..0e175b3ef6
--- /dev/null
+++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
@@ -0,0 +1,158 @@
+major_version: "local"
+minor_version: ""
+default_target_cpu: "same_as_host"
+
+default_toolchain {
+  cpu: "k8"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "piii"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "arm"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "ppc"
+  toolchain_identifier: "local_linux"
+}
+
+toolchain {
+  abi_version: "local"
+  abi_libc_version: "local"
+  builtin_sysroot: ""
+  compiler: "compiler"
+  host_system_name: "local"
+  needsPic: true
+  supports_gold_linker: false
+  supports_incremental_linker: false
+  supports_fission: false
+  supports_interface_shared_objects: false
+  supports_normalizing_ar: false
+  supports_start_end_lib: false
+  supports_thin_archives: false
+  target_libc: "local"
+  target_cpu: "local"
+  target_system_name: "local"
+  toolchain_identifier: "local_linux"
+
+  tool_path { name: "ar" path: "/usr/bin/ar" }
+  tool_path { name: "compat-ld" path: "/usr/bin/ld" }
+  tool_path { name: "cpp" path: "/usr/bin/cpp" }
+  tool_path { name: "dwp" path: "/usr/bin/dwp" }
+  # As part of the TensorFlow release, we place some ROCm-related compilation
+  # files in @local_config_rocm//crosstool/clang/bin, and this relative
+  # path, combined with the rest of our Bazel configuration causes our
+  # compilation to use those files.
+  tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" }
+  # Use "-std=c++11" for hipcc. For consistency, force both the host compiler
+  # and the device compiler to use "-std=c++11".
+  cxx_flag: "-std=c++11"
+  linker_flag: "-Wl,-no-as-needed"
+  linker_flag: "-lstdc++"
+  #linker_flag: "-B/usr/bin/"
+  linker_flag: "-B/opt/rocm/hcc/compiler/bin"
+
+%{host_compiler_includes}
+  tool_path { name: "gcov" path: "/usr/bin/gcov" }
+
+  # C(++) compiles invoke the compiler (as that is the one knowing where
+  # to find libraries), but we provide LD so other rules can invoke the linker.
+  tool_path { name: "ld" path: "/usr/bin/ld" }
+
+  tool_path { name: "nm" path: "/usr/bin/nm" }
+  tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
+  objcopy_embed_flag: "-I"
+  objcopy_embed_flag: "binary"
+  tool_path { name: "objdump" path: "/usr/bin/objdump" }
+  tool_path { name: "strip" path: "/usr/bin/strip" }
+
+  # Anticipated future default.
+  unfiltered_cxx_flag: "-no-canonical-prefixes"
+
+  # Make C++ compilation deterministic. Use linkstamping instead of these
+  # compiler symbols.
+  unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+  unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+  unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__"
+  # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers
+  # It needs to be always set when compiling Eigen headers
+  # (irrespective of whether the source file is being compiled via HIPCC)
+  # so adding -DEIGEN_USE_HIP as a default CXX flag here
+  unfiltered_cxx_flag: "-DEIGEN_USE_HIP"
+
+    
+  # Security hardening on by default.
+  # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+  # We need to undef it before redefining it as some distributions now have
+  # it enabled by default.
+  #compiler_flag: "-U_FORTIFY_SOURCE"
+  #compiler_flag: "-D_FORTIFY_SOURCE=1"
+  #compiler_flag: "-fstack-protector"
+  #compiler_flag: "-fPIE"
+  #linker_flag: "-pie"
+  #linker_flag: "-Wl,-z,relro,-z,now"
+
+  # Enable coloring even if there's no attached terminal. Bazel removes the
+  # escape sequences if --nocolor is specified. This isn't supported by gcc
+  # on Ubuntu 14.04.
+  # compiler_flag: "-fcolor-diagnostics"
+
+  # All warnings are enabled. Maybe enable -Werror as well?
+  compiler_flag: "-Wall"
+  # Enable a few more warnings that aren't part of -Wall.
+  compiler_flag: "-Wunused-but-set-parameter"
+  # But disable some that are problematic.
+  compiler_flag: "-Wno-free-nonheap-object" # has false positives
+
+  # Keep stack frames for debugging, even in opt mode.
+  compiler_flag: "-fno-omit-frame-pointer"
+
+  # Anticipated future default.
+  linker_flag: "-no-canonical-prefixes"
+  unfiltered_cxx_flag: "-fno-canonical-system-headers"
+  # Have gcc return the exit code from ld.
+  linker_flag: "-pass-exit-codes"
+  # Stamp the binary with a unique identifier.
+  linker_flag: "-Wl,--build-id=md5"
+  linker_flag: "-Wl,--hash-style=gnu"
+  # Gold linker only? Can we enable this by default?
+  # linker_flag: "-Wl,--warn-execstack"
+  # linker_flag: "-Wl,--detect-odr-violations"
+
+  # Include directory for ROCm headers.
+%{rocm_include_path}
+
+  compilation_mode_flags {
+    mode: DBG
+    # Enable debug symbols.
+    compiler_flag: "-g"
+  }
+  compilation_mode_flags {
+    mode: OPT
+
+    # No debug symbols.
+    # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
+    # even generally? However, that can't happen here, as it requires special
+    # handling in Bazel.
+    compiler_flag: "-g0"
+
+    # Conservative choice for -O
+    # -O3 can increase binary size and even slow down the resulting binaries.
+    # Profile first and / or use FDO if you need better performance than this.
+    compiler_flag: "-O2"
+
+    # Disable assertions
+    compiler_flag: "-DNDEBUG"
+
+    # Removal of unused code and data at link time (can this increase binary size in some cases?).
+    compiler_flag: "-ffunction-sections"
+    compiler_flag: "-fdata-sections"
+    linker_flag: "-Wl,--gc-sections"
+  }
+  linking_mode_flags { mode: DYNAMIC }
+}
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
new file mode 100755
index 0000000000..824238022b
--- /dev/null
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+"""Crosstool wrapper for compiling ROCm programs.
+
+SYNOPSIS:
+  crosstool_wrapper_driver_rocm [options passed in by cc_library()
+                                or cc_binary() rule]
+
+DESCRIPTION:
+  This script is expected to be called by the cc_library() or cc_binary() bazel
+  rules. When the option "-x rocm" is present in the list of arguments passed
+  to this script, it invokes the hipcc compiler. Most arguments are passed
+  as is as a string to --compiler-options of hipcc. When "-x rocm" is not
+  present, this wrapper invokes gcc with the input arguments as is.
+"""
+
+from __future__ import print_function
+
+__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)'
+
+from argparse import ArgumentParser
+import os
+import subprocess
+import re
+import sys
+import pipes
+
+# Template values set by rocm_configure.bzl.
+CPU_COMPILER = ('%{cpu_compiler}')
+GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
+
+HIPCC_PATH = '%{hipcc_path}'
+PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+
+def Log(s):
+  print('gpus/crosstool: {0}'.format(s))
+
+
+def GetOptionValue(argv, option):
+  """Extract the list of values for option from the argv list.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    option: The option whose value to extract, without the leading '-'.
+
+  Returns:
+    A list of values, either directly following the option,
+    (eg., -opt val1 val2) or values collected from multiple occurrences of
+    the option (eg., -opt val1 -opt val2).
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-' + option, nargs='*', action='append')
+  args, _ = parser.parse_known_args(argv)
+  if not args or not vars(args)[option]:
+    return []
+  else:
+    return sum(vars(args)[option], [])
+
+
+def GetHostCompilerOptions(argv):
+  """Collect the -isystem, -iquote, and --sysroot option values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be used as the --compiler-options to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-isystem', nargs='*', action='append')
+  parser.add_argument('-iquote', nargs='*', action='append')
+  parser.add_argument('--sysroot', nargs=1)
+  parser.add_argument('-g', nargs='*', action='append')
+  parser.add_argument('-fno-canonical-system-headers', action='store_true')
+
+  args, _ = parser.parse_known_args(argv)
+
+  opts = ''
+
+  if args.isystem:
+    opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
+  if args.iquote:
+    opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
+  if args.g:
+    opts += ' -g' + ' -g'.join(sum(args.g, []))
+  #if args.fno_canonical_system_headers:
+  #  opts += ' -fno-canonical-system-headers'
+  if args.sysroot:
+    opts += ' --sysroot ' + args.sysroot[0]
+
+  return opts
+
+def GetHipccOptions(argv):
+  """Collect the -hipcc_options values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be passed directly to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-hipcc_options', nargs='*', action='append')
+
+  args, _ = parser.parse_known_args(argv)
+
+  if args.hipcc_options:
+    options = _update_options(sum(args.hipcc_options, []))
+    return ' '.join(['--'+a for a in options])
+  return ''
+
+
+def InvokeHipcc(argv, log=False):
+  """Call hipcc with arguments assembled from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    log: True if logging is requested.
+
+  Returns:
+    The return value of calling os.system('hipcc ' + args)
+  """
+
+  host_compiler_options = GetHostCompilerOptions(argv)
+  hipcc_compiler_options = GetHipccOptions(argv)
+  opt_option = GetOptionValue(argv, 'O')
+  m_options = GetOptionValue(argv, 'm')
+  m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
+  include_options = GetOptionValue(argv, 'I')
+  out_file = GetOptionValue(argv, 'o')
+  depfiles = GetOptionValue(argv, 'MF')
+  defines = GetOptionValue(argv, 'D')
+  defines = ''.join([' -D' + define for define in defines])
+  undefines = GetOptionValue(argv, 'U')
+  undefines = ''.join([' -U' + define for define in undefines])
+  std_options = GetOptionValue(argv, 'std')
+  hipcc_allowed_std_options = ["c++11"]
+  std_options = ''.join([' -std=' + define
+      for define in std_options if define in hipcc_allowed_std_options])
+
+  # The list of source files get passed after the -c option. I don't know of
+  # any other reliable way to just get the list of source files to be compiled.
+  src_files = GetOptionValue(argv, 'c')
+
+  if len(src_files) == 0:
+    return 1
+  if len(out_file) != 1:
+    return 1
+
+  opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
+         else ' -g')
+
+  includes = (' -I ' + ' -I '.join(include_options)
+              if len(include_options) > 0
+              else '')
+
+  # Unfortunately, there are other options that have -c prefix too.
+  # So allowing only those look like C/C++ files.
+  src_files = [f for f in src_files if
+               re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
+  srcs = ' '.join(src_files)
+  out = ' -o ' + out_file[0]
+
+  hipccopts = ' '
+  hipccopts += ' ' + hipcc_compiler_options
+  hipccopts += undefines
+  hipccopts += defines
+  hipccopts += std_options
+  hipccopts += m_options
+
+  if depfiles:
+    # Generate the dependency file
+    depfile = depfiles[0]
+    cmd = (HIPCC_PATH + ' ' + hipccopts +
+           host_compiler_options +
+           ' ' + GCC_HOST_COMPILER_PATH +
+           ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile)
+    if log: Log(cmd)
+    exit_status = os.system(cmd)
+    if exit_status != 0:
+      return exit_status
+
+  cmd = (HIPCC_PATH + ' ' + hipccopts +
+         host_compiler_options + ' -fPIC' +
+         ' ' + GCC_HOST_COMPILER_PATH +
+         ' -I .' + opt + includes + ' -c ' + srcs + out)
+
+  # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
+  # Need to investigate and fix.
+  cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
+  if log: Log(cmd)
+  return os.system(cmd)
+
+
+def main():
+  # ignore PWD env var
+  os.environ['PWD']=''
+
+  parser = ArgumentParser()
+  parser.add_argument('-x', nargs=1)
+  parser.add_argument('--rocm_log', action='store_true')
+  parser.add_argument('-pass-exit-codes', action='store_true')
+  args, leftover = parser.parse_known_args(sys.argv[1:])
+
+  if args.x and args.x[0] == 'rocm':
+    if args.rocm_log: Log('-x rocm')
+    leftover = [pipes.quote(s) for s in leftover]
+    if args.rocm_log: Log('using hipcc')
+    return InvokeHipcc(leftover, log=args.rocm_log)
+
+  # XXX use hipcc to link
+  if args.pass_exit_codes:
+    gpu_compiler_flags = [flag for flag in sys.argv[1:]
+                               if not flag.startswith(('-pass-exit-codes'))]
+
+    # special handling for $ORIGIN
+    # - guard every argument with ''
+    modified_gpu_compiler_flags = []
+    for flag in gpu_compiler_flags:
+      modified_gpu_compiler_flags.append("'" + flag + "'")
+
+    if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
+    return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
+
+  # Strip our flags before passing through to the CPU compiler for files which
+  # are not -x rocm. We can't just pass 'leftover' because it also strips -x.
+  # We not only want to pass -x to the CPU compiler, but also keep it in its
+  # relative location in the argv list (the compiler is actually sensitive to
+  # this).
+  cpu_compiler_flags = [flag for flag in sys.argv[1:]
+                             if not flag.startswith(('--rocm_log'))]
+
+  # XXX: SE codes need to be built with gcc, but need this macro defined
+  cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
+
+  return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/third_party/gpus/rocm/BUILD b/third_party/gpus/rocm/BUILD
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl
new file mode 100644
index 0000000000..8258bb3589
--- /dev/null
+++ b/third_party/gpus/rocm/BUILD.tpl
@@ -0,0 +1,99 @@
+licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "using_hipcc",
+    values = {
+        "define": "using_rocm_hipcc=true",
+    },
+)
+
+cc_library(
+    name = "rocm_headers",
+    hdrs = [
+        "rocm/rocm_config.h",
+        %{rocm_headers}
+    ],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hip",
+    srcs = ["rocm/lib/%{hip_lib}"],
+    data = ["rocm/lib/%{hip_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocblas",
+    srcs = ["rocm/lib/%{rocblas_lib}"],
+    data = ["rocm/lib/%{rocblas_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocfft",
+    srcs = ["rocm/lib/%{rocfft_lib}"],
+    data = ["rocm/lib/%{rocfft_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hiprand",
+    srcs = ["rocm/lib/%{hiprand_lib}"],
+    data = ["rocm/lib/%{hiprand_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+        "rocm/include/rocrand",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "miopen",
+    srcs = ["rocm/lib/%{miopen_lib}"],
+    data = ["rocm/lib/%{miopen_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocm",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":rocm_headers",
+        ":hip",
+        ":rocblas",
+        ":rocfft",
+        ":hiprand",
+        ":miopen",
+    ],
+)
+
+%{rocm_include_genrules}
diff --git a/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/gpus/rocm/build_defs.bzl.tpl
new file mode 100644
index 0000000000..306f57551f
--- /dev/null
+++ b/third_party/gpus/rocm/build_defs.bzl.tpl
@@ -0,0 +1,32 @@
+# Macros for building ROCm code.
+def if_rocm(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with ROCm.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with ROCm enabled.  Otherwise, the select statement evaluates to if_false.
+
+    """
+    return select({
+        "@local_config_rocm//rocm:using_hipcc": if_true,
+        "//conditions:default": if_false
+    })
+
+
+def rocm_default_copts():
+    """Default options for all ROCm compilations."""
+    return if_rocm(["-x", "rocm"] + %{rocm_extra_copts})
+
+
+def rocm_is_configured():
+    """Returns true if ROCm was enabled during the configure process."""
+    return %{rocm_is_configured}
+
+def if_rocm_is_configured(x):
+    """Tests if the ROCm was enabled during the configure process.
+
+    Unlike if_rocm(), this does not require that we are building with
+    --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries.
+    """
+    if rocm_is_configured():
+      return x
+    return []
diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl
new file mode 100644
index 0000000000..c5f25a845c
--- /dev/null
+++ b/third_party/gpus/rocm/rocm_config.h.tpl
@@ -0,0 +1,21 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef ROCM_ROCM_CONFIG_H_
+#define ROCM_ROCM_CONFIG_H_
+
+#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
+
+#endif  // ROCM_ROCM_CONFIG_H_
diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
new file mode 100644
index 0000000000..9371e33f97
--- /dev/null
+++ b/third_party/gpus/rocm_configure.bzl
@@ -0,0 +1,663 @@
+# -*- Python -*-
+"""Repository rule for ROCm autoconfiguration.
+
+`rocm_configure` depends on the following environment variables:
+
+  * `TF_NEED_ROCM`: Whether to enable building with ROCm.
+  * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
+  * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is
+    `/opt/rocm`.
+  * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then
+    use the system default.
+  * `TF_MIOPEN_VERSION`: The version of the MIOpen library.
+  * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is
+    `gfx803,gfx900`.
+"""
+
+_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
+_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
+_TF_ROCM_VERSION = "TF_ROCM_VERSION"
+_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
+_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
+_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
+
+_DEFAULT_ROCM_VERSION = ""
+_DEFAULT_MIOPEN_VERSION = ""
+_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm"
+_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"]
+
+def find_cc(repository_ctx):
+  """Find the C++ compiler."""
+  # Return a dummy value for GCC detection here to avoid error
+  target_cc_name = "gcc"
+  cc_path_envvar = _GCC_HOST_COMPILER_PATH
+  cc_name = target_cc_name
+
+  if cc_path_envvar in repository_ctx.os.environ:
+    cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+    if cc_name_from_env:
+      cc_name = cc_name_from_env
+  if cc_name.startswith("/"):
+    # Absolute path, maybe we should make this supported by our which function.
+    return cc_name
+  cc = repository_ctx.which(cc_name)
+  if cc == None:
+    fail(("Cannot find {}, either correct your path or set the {}" +
+          " environment variable").format(target_cc_name, cc_path_envvar))
+  return cc
+
+_INC_DIR_MARKER_BEGIN = "#include <...>"
+
+def _cxx_inc_convert(path):
+  """Convert path returned by cc -E xc++ in a complete path."""
+  path = path.strip()
+  return path
+
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+  """Compute the list of default C or C++ include directories."""
+  if lang_is_cpp:
+    lang = "c++"
+  else:
+    lang = "c"
+  # TODO: We pass -no-canonical-prefixes here to match the compiler flags,
+  #       but in rocm_clang CROSSTOOL file that is a `feature` and we should
+  #       handle the case when it's disabled and no flag is passed
+  result = repository_ctx.execute([cc, "-no-canonical-prefixes",
+                                   "-E", "-x" + lang, "-", "-v"])
+  index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+  if index1 == -1:
+    return []
+  index1 = result.stderr.find("\n", index1)
+  if index1 == -1:
+    return []
+  index2 = result.stderr.rfind("\n ")
+  if index2 == -1 or index2 < index1:
+    return []
+  index2 = result.stderr.find("\n", index2 + 1)
+  if index2 == -1:
+    inc_dirs = result.stderr[index1 + 1:]
+  else:
+    inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+  return [str(repository_ctx.path(_cxx_inc_convert(p)))
+          for p in inc_dirs.split("\n")]
+
+def get_cxx_inc_directories(repository_ctx, cc):
+  """Compute the list of default C and C++ include directories."""
+  # For some reason `clang -xc` sometimes returns include paths that are
+  # different from the ones from `clang -xc++`. (Symlink and a dir)
+  # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+  includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+  includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+  includes_cpp_set = depset(includes_cpp)
+  return includes_cpp + [inc for inc in includes_c
+                         if inc not in includes_cpp_set]
+
+def auto_configure_fail(msg):
+  """Output failure message when rocm configuration fails."""
+  red = "\033[0;31m"
+  no_color = "\033[0m"
+  fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg))
+# END cc_configure common functions (see TODO above).
+
+def _host_compiler_includes(repository_ctx, cc):
+  """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+
+  Args:
+    repository_ctx: The repository context.
+    cc: The path to the gcc host compiler.
+
+  Returns:
+    A string containing the cxx_builtin_include_directory for each of the gcc
+    host compiler include directories, which can be added to the CROSSTOOL
+    file.
+  """
+  inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+
+  # Add numpy headers
+  inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include")
+
+  entries = []
+  for inc_dir in inc_dirs:
+    entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+
+  # define TENSORFLOW_USE_ROCM
+  entries.append("  unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"")
+
+  return "\n".join(entries)
+
+def _rocm_include_path(repository_ctx, rocm_config):
+  """Generates the cxx_builtin_include_directory entries for rocm inc dirs.
+
+  Args:
+    repository_ctx: The repository context.
+    cc: The path to the gcc host compiler.
+
+  Returns:
+    A string containing the cxx_builtin_include_directory for each of the gcc
+    host compiler include directories, which can be added to the CROSSTOOL
+    file.
+  """
+  inc_dirs = []
+
+  # general ROCm include path
+  inc_dirs.append(rocm_config.rocm_toolkit_path + '/include')
+
+  # Add HSA headers
+  inc_dirs.append("/opt/rocm/hsa/include")
+
+  # Add HIP headers
+  inc_dirs.append("/opt/rocm/include/hip")
+  inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
+
+  # Add rocrand and hiprand headers
+  inc_dirs.append("/opt/rocm/rocrand/include")
+  inc_dirs.append("/opt/rocm/hiprand/include")
+
+  # Add rocfft headers
+  inc_dirs.append("/opt/rocm/rocfft/include")
+
+  # Add rocBLAS headers
+  inc_dirs.append("/opt/rocm/rocblas/include")
+
+  # Add MIOpen headers
+  inc_dirs.append("/opt/rocm/miopen/include")
+
+  # Add hcc headers
+  inc_dirs.append("/opt/rocm/hcc/include")
+  inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
+  inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
+  # Newer hcc builds use/are based off of clang 8.0.0.
+  inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
+  inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
+
+  inc_entries = []
+  for inc_dir in inc_dirs:
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+  return "\n".join(inc_entries)
+
+def _enable_rocm(repository_ctx):
+  if "TF_NEED_ROCM" in repository_ctx.os.environ:
+    enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip()
+    return enable_rocm == "1"
+  return False
+
+def _rocm_toolkit_path(repository_ctx):
+  """Finds the rocm toolkit directory.
+
+  Args:
+    repository_ctx: The repository context.
+
+  Returns:
+    A speculative real path of the rocm toolkit install directory.
+  """
+  rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH
+  if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ:
+    rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip()
+  if not repository_ctx.path(rocm_toolkit_path).exists:
+    auto_configure_fail("Cannot find rocm toolkit path.")
+  return str(repository_ctx.path(rocm_toolkit_path).realpath)
+
+def _amdgpu_targets(repository_ctx):
+  """Returns a list of strings representing AMDGPU targets."""
+  if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ:
+    return _DEFAULT_ROCM_AMDGPU_TARGETS
+  amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS]
+  amdgpu_targets = amdgpu_targets_str.split(",")
+  for amdgpu_target in amdgpu_targets:
+    if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit():
+      auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
+  return amdgpu_targets
+
+def _cpu_value(repository_ctx):
+  """Returns the name of the host operating system.
+
+  Args:
+    repository_ctx: The repository context.
+
+  Returns:
+    A string containing the name of the host operating system.
+  """
+  os_name = repository_ctx.os.name.lower()
+  if os_name.startswith("mac os"):
+    return "Darwin"
+  if os_name.find("windows") != -1:
+    return "Windows"
+  result = repository_ctx.execute(["uname", "-s"])
+  return result.stdout.strip()
+
+def _lib_name(lib, cpu_value, version="", static=False):
+  """Constructs the platform-specific name of a library.
+
+  Args:
+    lib: The name of the library, such as "hip"
+    cpu_value: The name of the host operating system.
+    version: The version of the library.
+    static: True the library is static or False if it is a shared object.
+
+  Returns:
+    The platform-specific name of the library.
+  """
+  if cpu_value in ("Linux"):
+    if static:
+      return "lib%s.a" % lib
+    else:
+      if version:
+        version = ".%s" % version
+      return "lib%s.so%s" % (lib, version)
+  elif cpu_value == "Windows":
+      return "%s.lib" % lib
+  elif cpu_value == "Darwin":
+      if static:
+          return "lib%s.a" % lib
+      elif version:
+          version = ".%s" % version
+      return "lib%s%s.dylib" % (lib, version)
+  else:
+    auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
+def _find_rocm_lib(lib, repository_ctx, cpu_value, basedir, version="",
+                   static=False):
+  """Finds the given ROCm libraries on the system.
+
+  Args:
+    lib: The name of the library, such as "hip"
+    repository_ctx: The repository context.
+    cpu_value: The name of the host operating system.
+    basedir: The install directory of ROCm.
+    version: The version of the library.
+    static: True if static library, False if shared object.
+
+  Returns:
+    Returns a struct with the following fields:
+      file_name: The basename of the library found on the system.
+      path: The full path to the library.
+  """
+  file_name = _lib_name(lib, cpu_value, version, static)
+  if cpu_value == "Linux":
+    path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+    path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+    path = repository_ctx.path(
+        "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+
+  path = repository_ctx.path("%s/lib/%s" % (basedir, file_name))
+  if path.exists:
+    return struct(file_name=file_name, path=str(path.realpath))
+  path = repository_ctx.path("%s/%s" % (basedir, file_name))
+  if path.exists:
+    return struct(file_name=file_name, path=str(path.realpath))
+
+  auto_configure_fail("Cannot find rocm library %s" % file_name)
+
+def _find_libs(repository_ctx, rocm_config):
+  """Returns the ROCm libraries on the system.
+
+  Args:
+    repository_ctx: The repository context.
+    rocm_config: The ROCm config as returned by _get_rocm_config
+
+  Returns:
+    Map of library names to structs of filename and path as returned by
+    _find_rocm_lib.
+  """
+  cpu_value = rocm_config.cpu_value
+  return {
+      "hip": _find_rocm_lib(
+          "hip_hcc", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path),
+      "rocblas": _find_rocm_lib(
+          "rocblas", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocblas"),
+      "rocfft": _find_rocm_lib(
+          "rocfft", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocfft"),
+      "hiprand": _find_rocm_lib(
+          "hiprand", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/hiprand"),
+      "miopen": _find_rocm_lib(
+          "MIOpen", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/miopen"),
+  }
+
+def _get_rocm_config(repository_ctx):
+  """Detects and returns information about the ROCm installation on the system.
+
+  Args:
+    repository_ctx: The repository context.
+
+  Returns:
+    A struct containing the following fields:
+      rocm_toolkit_path: The ROCm toolkit installation directory.
+      amdgpu_targets: A list of the system's AMDGPU targets.
+      cpu_value: The name of the host operating system.
+  """
+  cpu_value = _cpu_value(repository_ctx)
+  rocm_toolkit_path = _rocm_toolkit_path(repository_ctx)
+  return struct(
+      rocm_toolkit_path = rocm_toolkit_path,
+      amdgpu_targets = _amdgpu_targets(repository_ctx),
+      cpu_value = cpu_value)
+
+def _tpl(repository_ctx, tpl, substitutions={}, out=None):
+  if not out:
+    out = tpl.replace(":", "/")
+  repository_ctx.template(
+      out,
+      Label("//third_party/gpus/%s.tpl" % tpl),
+      substitutions)
+
+
+def _file(repository_ctx, label):
+  repository_ctx.template(
+      label.replace(":", "/"),
+      Label("//third_party/gpus/%s.tpl" % label),
+      {})
+
+
+_DUMMY_CROSSTOOL_BZL_FILE = """
+def error_gpu_disabled():
+  fail("ERROR: Building with --config=rocm but TensorFlow is not configured " +
+       "to build with GPU support. Please re-run ./configure and enter 'Y' " +
+       "at the prompt to build with GPU support.")
+
+  native.genrule(
+      name = "error_gen_crosstool",
+      outs = ["CROSSTOOL"],
+      cmd = "echo 'Should not be run.' && exit 1",
+  )
+
+  native.filegroup(
+      name = "crosstool",
+      srcs = [":CROSSTOOL"],
+      output_licenses = ["unencumbered"],
+  )
+"""
+
+
+_DUMMY_CROSSTOOL_BUILD_FILE = """
+load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
+
+error_gpu_disabled()
+"""
+
+def _create_dummy_repository(repository_ctx):
+  cpu_value = _cpu_value(repository_ctx)
+
+  # Set up BUILD file for rocm/.
+  _tpl(repository_ctx, "rocm:build_defs.bzl",
+       {
+           "%{rocm_is_configured}": "False",
+           "%{rocm_extra_copts}": "[]"
+       })
+  _tpl(repository_ctx, "rocm:BUILD",
+       {
+           "%{hip_lib}": _lib_name("hip", cpu_value),
+           "%{rocblas_lib}": _lib_name("rocblas", cpu_value),
+           "%{miopen_lib}": _lib_name("miopen", cpu_value),
+           "%{rocfft_lib}": _lib_name("rocfft", cpu_value),
+           "%{hiprand_lib}": _lib_name("hiprand", cpu_value),
+           "%{rocm_include_genrules}": '',
+           "%{rocm_headers}": '',
+       })
+
+  # Create dummy files for the ROCm toolkit since they are still required by
+  # tensorflow/core/platform/default/build_config:rocm.
+  repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "")
+
+  # Set up rocm_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(repository_ctx, "rocm:rocm_config.h",
+       {
+           "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH,
+       }, "rocm/rocm/rocm_config.h")
+
+  # If rocm_configure is not configured to build with GPU support, and the user
+  # attempts to build with --config=rocm, add a dummy build rule to intercept
+  # this and fail with an actionable error message.
+  repository_ctx.file("crosstool/error_gpu_disabled.bzl",
+                      _DUMMY_CROSSTOOL_BZL_FILE)
+  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
+
+def _execute(repository_ctx, cmdline, error_msg=None, error_details=None,
+             empty_stdout_fine=False):
+  """Executes an arbitrary shell command.
+
+  Args:
+    repository_ctx: the repository_ctx object
+    cmdline: list of strings, the command to execute
+    error_msg: string, a summary of the error if the command fails
+    error_details: string, details about the error or steps to fix it
+    empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
+      it's an error
+  Return:
+    the result of repository_ctx.execute(cmdline)
+  """
+  result = repository_ctx.execute(cmdline)
+  if result.stderr or not (empty_stdout_fine or result.stdout):
+    auto_configure_fail(
+        "\n".join([
+            error_msg.strip() if error_msg else "Repository command failed",
+            result.stderr.strip(),
+            error_details if error_details else ""]))
+  return result
+
+def _norm_path(path):
+  """Returns a path with '/' and remove the trailing slash."""
+  path = path.replace("\\", "/")
+  if path[-1] == "/":
+    path = path[:-1]
+  return path
+
+def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
+    src_files = [], dest_files = []):
+  """Returns a genrule to symlink(or copy if on Windows) a set of files.
+
+  If src_dir is passed, files will be read from the given directory; otherwise
+  we assume files are in src_files and dest_files
+  """
+  if src_dir != None:
+    src_dir = _norm_path(src_dir)
+    dest_dir = _norm_path(dest_dir)
+    files = _read_dir(repository_ctx, src_dir)
+    # Create a list with the src_dir stripped to use for outputs.
+    dest_files = files.replace(src_dir, '').splitlines()
+    src_files = files.splitlines()
+  command = []
+  # We clear folders that might have been generated previously to avoid
+  # undesired inclusions
+  command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+  command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+  outs = []
+  for i in range(len(dest_files)):
+    if dest_files[i] != "":
+      # If we have only one file to link we do not want to use the dest_dir, as
+      # $(@D) will include the full path to the file.
+      dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i]
+      # On Windows, symlink is not supported, so we just copy all the files.
+      cmd = 'ln -s'
+      command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest))
+      outs.append('        "' + dest_dir + dest_files[i] + '",')
+  genrule = _genrule(src_dir, genrule_name, " && ".join(command),
+                     "\n".join(outs))
+  return genrule
+
+def _genrule(src_dir, genrule_name, command, outs):
+  """Returns a string with a genrule.
+
+  Genrule executes the given command and produces the given outputs.
+  """
+  return (
+      'genrule(\n' +
+      '    name = "' +
+      genrule_name + '",\n' +
+      '    outs = [\n' +
+      outs +
+      '\n    ],\n' +
+      '    cmd = """\n' +
+      command +
+      '\n   """,\n' +
+      ')\n'
+  )
+
+def _read_dir(repository_ctx, src_dir):
+  """Returns a string with all files in a directory.
+
+  Finds all files inside a directory, traversing subfolders and following
+  symlinks. The returned string contains the full path of all files
+  separated by line breaks.
+  """
+  find_result = _execute(
+      repository_ctx, ["find", src_dir, "-follow", "-type", "f"],
+      empty_stdout_fine=True)
+  result = find_result.stdout
+  return result
+
+def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
+  if False:
+    amdgpu_target_flags = ["--amdgpu-target=" +
+        amdgpu_target for amdgpu_target in amdgpu_targets]
+  else:
+    # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc"
+    amdgpu_target_flags = []
+  return str(amdgpu_target_flags)
+
+def _create_local_rocm_repository(repository_ctx):
+  """Creates the repository containing files set up to build with ROCm."""
+  rocm_config = _get_rocm_config(repository_ctx)
+
+  # Set up symbolic links for the rocm toolkit by creating genrules to do
+  # symlinking. We create one genrule for each directory we want to track under
+  # rocm_toolkit_path
+  rocm_toolkit_path = rocm_config.rocm_toolkit_path
+  rocm_include_path = rocm_toolkit_path + "/include"
+  genrules = [_symlink_genrule_for_dir(repository_ctx,
+      rocm_include_path, "rocm/include", "rocm-include")]
+  genrules.append(_symlink_genrule_for_dir(repository_ctx,
+      rocm_toolkit_path + "/rocfft/include", "rocm/include/rocfft", "rocfft-include"))
+  genrules.append(_symlink_genrule_for_dir(repository_ctx,
+      rocm_toolkit_path + "/rocblas/include", "rocm/include/rocblas", "rocblas-include"))
+  genrules.append(_symlink_genrule_for_dir(repository_ctx,
+      rocm_toolkit_path + "/miopen/include", "rocm/include/miopen", "miopen-include"))
+
+  rocm_libs = _find_libs(repository_ctx, rocm_config)
+  rocm_lib_src = []
+  rocm_lib_dest = []
+  for lib in rocm_libs.values():
+    rocm_lib_src.append(lib.path)
+    rocm_lib_dest.append("rocm/lib/" + lib.file_name)
+  genrules.append(_symlink_genrule_for_dir(repository_ctx, None, "", "rocm-lib",
+                                       rocm_lib_src, rocm_lib_dest))
+
+  included_files = _read_dir(repository_ctx, rocm_include_path).replace(
+      rocm_include_path, '').splitlines()
+
+  # Set up BUILD file for rocm/
+  _tpl(repository_ctx, "rocm:build_defs.bzl",
+       {
+           "%{rocm_is_configured}": "True",
+           "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+               repository_ctx, rocm_config.amdgpu_targets),
+
+       })
+  _tpl(repository_ctx, "rocm:BUILD",
+       {
+           "%{hip_lib}": rocm_libs["hip"].file_name,
+           "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
+           "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
+           "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
+           "%{miopen_lib}": rocm_libs["miopen"].file_name,
+           "%{rocm_include_genrules}": "\n".join(genrules),
+           "%{rocm_headers}": ('":rocm-include",\n' +
+                               '":rocfft-include",\n' +
+                               '":rocblas-include",\n' +
+                               '":miopen-include",'),
+       })
+  # Set up crosstool/
+  _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
+  cc = find_cc(repository_ctx)
+  host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
+  rocm_defines = {
+           "%{rocm_include_path}": _rocm_include_path(repository_ctx,
+                                                      rocm_config),
+           "%{host_compiler_includes}": host_compiler_includes,
+           "%{clang_path}": str(cc),
+       }
+
+  _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out="crosstool/CROSSTOOL")
+
+  _tpl(repository_ctx,
+       "crosstool:clang/bin/crosstool_wrapper_driver_rocm",
+       {
+           "%{cpu_compiler}": str(cc),
+           "%{hipcc_path}": "/opt/rocm/bin/hipcc",
+           "%{gcc_host_compiler_path}": str(cc),
+           "%{rocm_amdgpu_targets}": ",".join(
+               ["\"%s\"" % c for c in rocm_config.amdgpu_targets]),
+       })
+
+  # Set up rocm_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(repository_ctx, "rocm:rocm_config.h",
+       {
+           "%{rocm_amdgpu_targets}": ",".join(
+               ["\"%s\"" % c for c in rocm_config.amdgpu_targets]),
+           "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path,
+       }, "rocm/rocm/rocm_config.h")
+
+
+def _create_remote_rocm_repository(repository_ctx, remote_config_repo):
+  """Creates pointers to a remotely configured repo set up to build with ROCm."""
+  _tpl(repository_ctx, "rocm:build_defs.bzl",
+       {
+           "%{rocm_is_configured}": "True",
+           "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+               repository_ctx, #_compute_capabilities(repository_ctx)
+            ),
+
+       })
+  _tpl(repository_ctx, "rocm:remote.BUILD",
+       {
+           "%{remote_rocm_repo}": remote_config_repo,
+       }, "rocm/BUILD")
+  _tpl(repository_ctx, "crosstool:remote.BUILD", {
+           "%{remote_rocm_repo}": remote_config_repo,
+       }, "crosstool/BUILD")
+
+def _rocm_autoconf_impl(repository_ctx):
+  """Implementation of the rocm_autoconf repository rule."""
+  if not _enable_rocm(repository_ctx):
+    _create_dummy_repository(repository_ctx)
+  else:
+    if _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ:
+      _create_remote_rocm_repository(repository_ctx,
+          repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO])
+    else:
+      _create_local_rocm_repository(repository_ctx)
+
+
+rocm_configure = repository_rule(
+    implementation = _rocm_autoconf_impl,
+    environ = [
+        _GCC_HOST_COMPILER_PATH,
+        "TF_NEED_ROCM",
+        _ROCM_TOOLKIT_PATH,
+        _TF_ROCM_VERSION,
+        _TF_MIOPEN_VERSION,
+        _TF_ROCM_AMDGPU_TARGETS,
+        _TF_ROCM_CONFIG_REPO,
+    ],
+)
+
+"""Detects and configures the local ROCm toolchain.
+
+Add the following to your WORKSPACE FILE:
+
+```python
+rocm_configure(name = "local_config_rocm")
+```
+
+Args:
+  name: A unique name for this workspace rule.
+"""
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 601e07ffdd..afc5cf56ab 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -42,6 +42,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld'
 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
 
+build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
+build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
+
 build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true
 
-- 
GitLab


From 18b80bbd4b8db8bd35afad7264258c1c5c269226 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Wed, 5 Sep 2018 22:56:20 -0700
Subject: [PATCH 0082/1357] Updated with more unit tests

---
 third_party/ngraph/ngraph.BUILD    | 4 ++--
 third_party/ngraph/ngraph_tf.BUILD | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index 426d49c542..1fd1b8e8e0 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -101,7 +101,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.5.0\\"',
+        '-D NGRAPH_VERSION=\\"0.7.0\\"',
         "-D NGRAPH_DEX_ONLY",
     ],
     visibility = ["//visibility:public"],
@@ -135,7 +135,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.5.0\\"',
+        '-D NGRAPH_VERSION=\\"0.7.0\\"',
     ],
     visibility = ["//visibility:public"],
     alwayslink = 1,
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index 7577a4014d..979318d7c2 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -70,6 +70,10 @@ tf_cc_test(
         "test/graph_rewrites/assign_clusters.cc",
         "test/test_utilities.h",
         "test/test_utilities.cpp",
+        "test/test_math_ops.cpp",
+        "test/test_nn_ops.cpp",
+        "test/opexecuter.h",
+        "test/opexecuter.cpp",
         "test/main.cpp",
     ],
     deps = [
-- 
GitLab


From d0574f6b25ab01052e093ab92612520a7e4ada8d Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Thu, 6 Sep 2018 08:22:37 -0700
Subject: [PATCH 0083/1357] Fixed clang formatting

---
 .../stream_executor/cuda/cuda_gpu_executor.cc   | 17 +++++++++--------
 .../stream_executor/cuda/cuda_gpu_executor.h    | 12 ++++++------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index ce2f1ce3ae..ef84d01a94 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -493,10 +493,10 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
 // Compute and return maximum blocks per core (occupancy) based on the
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
-int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description,
-                       uint64 registers_per_thread,
-                       uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func) {
+int CUDAExecutor::CalculateOccupancy(
+    const DeviceDescription& device_description, uint64 registers_per_thread,
+    uint64 shared_memory_per_block, const ThreadDim& thread_dims,
+    CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
   CUresult err =
@@ -509,10 +509,11 @@ int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description
 // Compute and return the suggested thread count to acheive ideal occupancy.
 // If the provided thread dimensions match this number, zero is returned.
 int CUDAExecutor::CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func) {
+                                   const DeviceDescription& device_description,
+                                   uint64 registers_per_thread,
+                                   uint64 shared_memory_per_block,
+                                   const ThreadDim& thread_dims,
+                                   CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
   CUresult err =
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index e8ebbc3220..1481dcc19a 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -71,16 +71,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
               const KernelArgsArrayBase &args) override;
 
   int CalculateOccupancy(const DeviceDescription& device_description,
+                         uint64 registers_per_thread,
+                         uint64 shared_memory_per_block,
+                         const ThreadDim& thread_dims, CUfunction func);
+
+  int CompareOccupancy(int* initial_blocks,
+                       const DeviceDescription& device_description,
                        uint64 registers_per_thread,
                        uint64 shared_memory_per_block,
                        const ThreadDim& thread_dims, CUfunction func);
 
-  int CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func);
-
   void *Allocate(uint64 size) override;
 
   void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,
-- 
GitLab


From e3654a3cb4e26c26409aeeb9e127e3addcb14cee Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 19:20:11 +0000
Subject: [PATCH 0084/1357] Add float16 support on GPU for
 tf.contrib.image.transform

This fix tries to address the issue raised in 22115 where
there were no float16 support on GPU for tf.contrib.image.transform.

This fix fixes 22115.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/image/kernels/image_ops.cc        | 2 ++
 tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc
index 370a8caf6a..788bf04b28 100644
--- a/tensorflow/contrib/image/kernels/image_ops.cc
+++ b/tensorflow/contrib/image/kernels/image_ops.cc
@@ -156,6 +156,7 @@ namespace functor {
 TF_CALL_uint8(DECLARE_FUNCTOR);
 TF_CALL_int32(DECLARE_FUNCTOR);
 TF_CALL_int64(DECLARE_FUNCTOR);
+TF_CALL_half(DECLARE_FUNCTOR);
 TF_CALL_float(DECLARE_FUNCTOR);
 TF_CALL_double(DECLARE_FUNCTOR);
 
@@ -175,6 +176,7 @@ TF_CALL_double(DECLARE_FUNCTOR);
 TF_CALL_uint8(REGISTER);
 TF_CALL_int32(REGISTER);
 TF_CALL_int64(REGISTER);
+TF_CALL_half(REGISTER);
 TF_CALL_float(REGISTER);
 TF_CALL_double(REGISTER);
 
diff --git a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc
index 8743a5ff72..36b9a236a6 100644
--- a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc
+++ b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc
@@ -32,6 +32,7 @@ typedef Eigen::GpuDevice GPUDevice;
 template class FillProjectiveTransform<GPUDevice, uint8>;
 template class FillProjectiveTransform<GPUDevice, int32>;
 template class FillProjectiveTransform<GPUDevice, int64>;
+template class FillProjectiveTransform<GPUDevice, Eigen::half>;
 template class FillProjectiveTransform<GPUDevice, float>;
 template class FillProjectiveTransform<GPUDevice, double>;
 
-- 
GitLab


From 7d7e8a725aeede4b724f7376d22df2c7f2ebdcf9 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 19:22:39 +0000
Subject: [PATCH 0085/1357] Add test case for float16 support on GPU for
 tf.contrib.image.transform

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../contrib/image/python/kernel_tests/image_ops_test.py    | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
index 376c0751ee..ef1f79bb94 100644
--- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
@@ -272,6 +272,13 @@ class ImageOpsTest(test_util.TensorFlowTestCase):
     with self.cached_session():
       self.assertAllEqual([[[[1], [0]], [[0], [1]]]], result.eval())
 
+  def test_transform_data_types(self):
+    for dtype in _DTYPES:
+      image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype)
+      value = image_ops.transform(image, [1] * 8)
+      with self.test_session(use_gpu=True):
+        self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype()))
+
 
 class BipartiteMatchTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 04e20965487c36f43ba5c773b547b23e39478a5c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 19:25:22 +0000
Subject: [PATCH 0086/1357] Pylint fix

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../contrib/image/python/kernel_tests/image_ops_test.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
index ef1f79bb94..4997c31a7f 100644
--- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
@@ -277,7 +277,9 @@ class ImageOpsTest(test_util.TensorFlowTestCase):
       image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype)
       value = image_ops.transform(image, [1] * 8)
       with self.test_session(use_gpu=True):
-        self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype()))
+        self.assertAllEqual(
+            value.eval(),
+            np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype()))
 
 
 class BipartiteMatchTest(test_util.TensorFlowTestCase):
-- 
GitLab


From 6a5090b086bc9d665eb9e65f05eb94cdb58baaa2 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Thu, 6 Sep 2018 13:09:12 -0700
Subject: [PATCH 0087/1357] Fully fixed clang errors

---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 12 ++++++------
 tensorflow/stream_executor/cuda/cuda_gpu_executor.h  | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index ef84d01a94..9d5bcc7f77 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -472,7 +472,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
   const DeviceDescription &device_description =
       kernel.parent()->GetDeviceDescription();
 
-  const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel);
+  const CUDAKernel *cuda_kernel = AsCUDAKernel(&kernel);
   CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue();
 
   int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread,
@@ -494,8 +494,8 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
 int CUDAExecutor::CalculateOccupancy(
-    const DeviceDescription& device_description, uint64 registers_per_thread,
-    uint64 shared_memory_per_block, const ThreadDim& thread_dims,
+    const DeviceDescription &device_description, uint64 registers_per_thread,
+    uint64 shared_memory_per_block, const ThreadDim &thread_dims,
     CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
@@ -508,11 +508,11 @@ int CUDAExecutor::CalculateOccupancy(
 
 // Compute and return the suggested thread count to acheive ideal occupancy.
 // If the provided thread dimensions match this number, zero is returned.
-int CUDAExecutor::CompareOccupancy(int* initial_blocks,
-                                   const DeviceDescription& device_description,
+int CUDAExecutor::CompareOccupancy(int *initial_blocks,
+                                   const DeviceDescription &device_description,
                                    uint64 registers_per_thread,
                                    uint64 shared_memory_per_block,
-                                   const ThreadDim& thread_dims,
+                                   const ThreadDim &thread_dims,
                                    CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 1481dcc19a..53b2a29ae7 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -70,16 +70,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
               const BlockDim &block_dims, const KernelBase &k,
               const KernelArgsArrayBase &args) override;
 
-  int CalculateOccupancy(const DeviceDescription& device_description,
+  int CalculateOccupancy(const DeviceDescription &device_description,
                          uint64 registers_per_thread,
                          uint64 shared_memory_per_block,
-                         const ThreadDim& thread_dims, CUfunction func);
+                         const ThreadDim &thread_dims, CUfunction func);
 
-  int CompareOccupancy(int* initial_blocks,
-                       const DeviceDescription& device_description,
+  int CompareOccupancy(int *initial_blocks,
+                       const DeviceDescription &device_description,
                        uint64 registers_per_thread,
                        uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func);
+                       const ThreadDim &thread_dims, CUfunction func);
 
   void *Allocate(uint64 size) override;
 
-- 
GitLab


From a0da587dddb7ec2bd703e15882b68085cfd7933e Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Fri, 7 Sep 2018 06:48:27 +0900
Subject: [PATCH 0088/1357] fix documentation errors

---
 .../api_def_ExtractVolumePatches.pbtxt        | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
index 3499ade368..3c8a455983 100644
--- a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
@@ -1,32 +1,32 @@
 op {
   graph_op_name: "ExtractVolumePatches"
   in_arg {
-    name: "images"
+    name: "input"
     description: <<END
-5-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
+5-D Tensor with shape `[batch, in_planes, in_rows, in_cols, depth]`.
 END
   }
   out_arg {
     name: "patches"
     description: <<END
-5-D Tensor with shape `[batch, out_planes, out_rows, out_cols, 
-ksize_planes * ksize_rows * ksize_cols * depth]` containing image 
-patches with size `ksize_patches x ksize_rows x ksize_cols x depth` 
-vectorized in the "depth" dimension. Note `out_planes`, `out_rows` and 
-`out_cols` are the dimensions of the output patches.
+5-D Tensor with shape `[batch, out_planes, out_rows, out_cols,
+ksize_planes * ksize_rows * ksize_cols * depth]` containing patches
+with size `ksize_planes x ksize_rows x ksize_cols x depth` vectorized
+in the "depth" dimension. Note `out_planes`, `out_rows` and `out_cols`
+are the dimensions of the output patches.
 END
   }
   attr {
     name: "ksizes"
     description: <<END
-The size of the sliding window for each dimension of `images`.
+The size of the sliding window for each dimension of `input`.
 END
   }
   attr {
     name: "strides"
     description: <<END
 1-D of length 5. How far the centers of two consecutive patches are in
-the images. Must be: `[1, stride_planes, stride_rows, stride_cols, 1]`.
+`input`. Must be: `[1, stride_planes, stride_rows, stride_cols, 1]`.
 END
   }
   attr {
@@ -43,7 +43,7 @@ We specify the size-related attributes as:
 END
   }
   summary: <<END
-Extract `patches` from `images` and put them in the \"depth\" output 
+Extract `patches` from `input` and put them in the "depth" output
 dimension. 3D extension of `extract_image_patches`.
 END
 }
-- 
GitLab


From b3ec2caeeefecc95684176a6211622ed20f00f9b Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Fri, 7 Sep 2018 08:16:48 +0900
Subject: [PATCH 0089/1357] fix argument name

---
 tensorflow/core/ops/array_ops.cc                | 2 +-
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 2 +-
 tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 6c8369200a..44908fe875 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2553,7 +2553,7 @@ REGISTER_OP("ExtractImagePatches")
 // as the second parameter of all GetWindowedOutputSizeVerbose calls instead
 // of ksize_*.
 REGISTER_OP("ExtractVolumePatches")
-    .Input("images: T")
+    .Input("input: T")
     .Output("patches: T")
     .Attr("ksizes: list(int) >= 5")
     .Attr("strides: list(int) >= 5")
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index ba928eba9e..eafcc208cc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1062,7 +1062,7 @@ tf_module {
   }
   member_method {
     name: "extract_volume_patches"
-    argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "eye"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index f7e63978da..cd06ee5763 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1062,7 +1062,7 @@ tf_module {
   }
   member_method {
     name: "extract_volume_patches"
-    argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "eye"
-- 
GitLab


From e25cf78285fef5234380ee26fef9090a939e91f5 Mon Sep 17 00:00:00 2001
From: Richard Yu <yohan.richard.yu@gmail.com>
Date: Thu, 6 Sep 2018 17:05:08 -0700
Subject: [PATCH 0090/1357] Ensure all ValueErrors are raised

---
 tensorflow/contrib/quantize/python/fold_batch_norms.py | 2 +-
 tensorflow/python/keras/layers/embeddings.py           | 8 ++++----
 tensorflow/python/ops/nn_ops.py                        | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index d9f179bee4..d882b79892 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -628,7 +628,7 @@ def _GetBatchNormParams(graph, context, has_scaling):
   bn_decay_var_tensor = _FindMatchingTensor(graph, op_suffix_bn_decay_var,
                                             context)
   if batch_mean_tensor is None and moving_mean_tensor is None:
-    ValueError('Error folding unfused batch norms')
+    raise ValueError('Error folding unfused batch norms')
   if has_scaling:
     gamma_tensor = _FindMatchingTensor(graph, op_suffix_gamma, context)
 
diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py
index 629a9ec9a1..a0b9393812 100644
--- a/tensorflow/python/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/layers/embeddings.py
@@ -142,13 +142,13 @@ class Embedding(Layer):
       else:
         in_lens = [self.input_length]
       if len(in_lens) != len(input_shape) - 1:
-        ValueError('"input_length" is %s, but received input has shape %s' %
-                   (str(self.input_length), str(input_shape)))
+        raise ValueError('"input_length" is %s, but received input has shape %s' %
+                         (str(self.input_length), str(input_shape)))
       else:
         for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])):
           if s1 is not None and s2 is not None and s1 != s2:
-            ValueError('"input_length" is %s, but received input has shape %s' %
-                       (str(self.input_length), str(input_shape)))
+            raise ValueError('"input_length" is %s, but received input has shape %s' %
+                             (str(self.input_length), str(input_shape)))
           elif s1 is None:
             in_lens[i] = s2
       return (input_shape[0],) + tuple(in_lens) + (self.output_dim,)
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index ef9afd9e8e..17e10995f2 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -427,8 +427,8 @@ class _WithSpaceToBatch(object):
     try:
       input_shape.with_rank_at_least(expected_input_rank)
     except ValueError:
-      ValueError("input tensor must have rank %d at least" %
-                 (expected_input_rank))
+      raise ValueError("input tensor must have rank %d at least" %
+                       (expected_input_rank))
 
     const_rate = tensor_util.constant_value(dilation_rate)
     rate_or_const_rate = dilation_rate
@@ -818,12 +818,12 @@ class Convolution(object):
     try:
       input_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
 
     try:
       filter_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
 
     if data_format is None or not data_format.startswith("NC"):
       input_channels_dim = input_shape[num_spatial_dims + 1]
-- 
GitLab


From f5eb30c29d5d34145252e49ac3f9bda067abafe8 Mon Sep 17 00:00:00 2001
From: Smokrow <moritz.kroeger@tu-dortmund.de>
Date: Fri, 7 Sep 2018 09:26:44 +0200
Subject: [PATCH 0091/1357] edited flat_map description and removed typo

The examples in interleave are quite helpful. I just added a reference to this example
---
 tensorflow/python/data/ops/dataset_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 2c1aa22116..8242c7309d 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1007,7 +1007,7 @@ class Dataset(object):
       return ParallelMapDataset(self, map_func, num_parallel_calls)
 
   def flat_map(self, map_func):
-    """Maps `map_func` across this dataset and flattens the result.
+    """Maps `map_func` across this dataset and flattens the result. Will produce identical results to 'tf.data.Dataset.interleave'
 
     Args:
       map_func: A function mapping a nested structure of tensors (having shapes
@@ -1043,7 +1043,7 @@ class Dataset(object):
     elements are produced. `cycle_length` controls the number of input elements
     that are processed concurrently. If you set `cycle_length` to 1, this
     transformation will handle one input element at a time, and will produce
-    identical results = to `tf.data.Dataset.flat_map`. In general,
+    identical results to `tf.data.Dataset.flat_map`. In general,
     this transformation will apply `map_func` to `cycle_length` input elements,
     open iterators on the returned `Dataset` objects, and cycle through them
     producing `block_length` consecutive elements from each iterator, and
-- 
GitLab


From a11cb4cb1500f35266667d9f72b0a0534f2d1581 Mon Sep 17 00:00:00 2001
From: BY Shen <byshen@gmail.com>
Date: Fri, 7 Sep 2018 22:20:37 +0800
Subject: [PATCH 0092/1357] Fix a bug in TF_LITE_ENSURE_OK.

---
 tensorflow/contrib/lite/context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h
index b23183b743..58977b5c47 100644
--- a/tensorflow/contrib/lite/context.h
+++ b/tensorflow/contrib/lite/context.h
@@ -148,7 +148,7 @@ void TfLiteIntArrayFree(TfLiteIntArray* v);
 #define TF_LITE_ENSURE_OK(context, status) \
   do {                                     \
     if ((status) != kTfLiteOk) {           \
-      return status;                       \
+      return kTfLiteError;                 \
     }                                      \
   } while (0)
 
-- 
GitLab


From 3445242ac138d4d5aa9b346e17cd47ebf23770a5 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Sep 2018 23:39:53 +0000
Subject: [PATCH 0093/1357] Fix int64 failure on GPU for TensorArray

This fix tries to address the issue raised in 22054 where
int64 on GPU results in colocation errors. This fix enables
int64 on GPU with TensorArray.

This fix fixes 22054.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/tensor_array_ops.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc
index 2ec2651c04..82a7735c6d 100644
--- a/tensorflow/core/kernels/tensor_array_ops.cc
+++ b/tensorflow/core/kernels/tensor_array_ops.cc
@@ -259,6 +259,7 @@ REGISTER_KERNEL_BUILDER(Name("TensorArrayV3").Device(DEVICE_CPU),
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -487,6 +488,7 @@ TF_CALL_ALL_TYPES(REGISTER_WRITE);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -576,6 +578,7 @@ TF_CALL_ALL_TYPES(REGISTER_READ)
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -777,6 +780,7 @@ REGISTER_GATHER_AND_PACK(qint32);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -998,6 +1002,7 @@ REGISTER_CONCAT(qint32);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -1218,6 +1223,7 @@ TF_CALL_ALL_TYPES(REGISTER_SCATTER_AND_UNPACK);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 #undef REGISTER_GPU
 
 #endif  // GOOGLE_CUDA
@@ -1388,6 +1394,7 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 #undef REGISTER_GPU
 
 #endif  // GOOGLE_CUDA
-- 
GitLab


From 81677d2f20664c7f76598c20f2a01d62465999b4 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Sep 2018 23:42:20 +0000
Subject: [PATCH 0094/1357] Add needed specifications for Split on GPU.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/split_lib_gpu.cu.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
index 393818730b..8623e47e41 100644
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
@@ -54,6 +54,7 @@ void SplitCustom<Device, T>::operator()(
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
+TF_CALL_int64(DEFINE_GPU_KERNELS);
 TF_CALL_bfloat16(DEFINE_GPU_KERNELS);
 
 #undef DEFINE_GPU_KERNELS
@@ -245,6 +246,7 @@ struct SplitVOpGPULaunch {
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
+TF_CALL_int64(REGISTER_GPU_KERNEL);
 TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 #define REGISTER_GPU_KERNEL(T)                 \
@@ -254,6 +256,7 @@ TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
+TF_CALL_int64(REGISTER_GPU_KERNEL);
 TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
-- 
GitLab


From bd1fd82712706592b9a6d34a6bac1b1f438eb00f Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Fri, 7 Sep 2018 19:16:04 -0700
Subject: [PATCH 0095/1357] Updated the ngraph-tf and ngraph releases.

---
 WORKSPACE                | 12 ------------
 tensorflow/workspace.bzl | 40 ++++++++++++++++++++--------------------
 2 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 15aa24f3c1..f1d0ed565d 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -80,15 +80,3 @@ new_http_archive(
     ],
 )
 
-new_local_repository(
-    name = "ngraph",
-    path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph",
-    build_file = "//third_party/ngraph:ngraph.BUILD",
-)
-
-new_local_repository(
-    name = "ngraph_tf",
-    path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph-tf",
-    build_file = "//third_party/ngraph:ngraph_tf.BUILD",
-)
-
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 0ff695d9f8..79b3df1e51 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -841,16 +841,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         build_file = clean_dep("//third_party/ngraph:tbb.BUILD"),
     )
 
-    # tf_http_archive(
-    #     name = "ngraph",
-    #     urls = [
-    #         "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz",
-    #         "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz",
-    #     ],
-    #     sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c",
-    #     strip_prefix = "ngraph-0.5.0",
-    #     build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
-    # )
+    tf_http_archive(
+        name = "ngraph",
+        urls = [
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz",
+            "https://github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz",
+        ],
+        sha256 = "",
+        strip_prefix = "ngraph-0.7.0",
+        build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
+    )
 
     tf_http_archive(
         name = "nlohmann_json_lib",
@@ -863,16 +863,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"),
     )
 
-    # tf_http_archive(
-    #     name = "ngraph_tf",
-    #     urls = [
-    #         "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz",
-    #         "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz",
-    #     ],
-    #     sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072",
-    #     strip_prefix = "ngraph-tf-0.3.0-rc1",
-    #     build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
-    # )
+    tf_http_archive(
+        name = "ngraph_tf",
+        urls = [
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz",
+            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz",
+        ],
+        sha256 = "23b4566d8e40d6f1f236b0ffe3905dd964ae42ca54bacff67f24abcefd443afb",
+        strip_prefix = "ngraph-tf-0.5.0",
+        build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
+    )
 
     ##############################################################################
     # BIND DEFINITIONS
-- 
GitLab


From 47df1ccb1837382a526439b38cd1259fca5d074b Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Fri, 7 Sep 2018 19:18:02 -0700
Subject: [PATCH 0096/1357] Removed empty newline

---
 WORKSPACE | 1 -
 1 file changed, 1 deletion(-)

diff --git a/WORKSPACE b/WORKSPACE
index f1d0ed565d..17961829a6 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -79,4 +79,3 @@ new_http_archive(
         "http://download.tensorflow.org/models/speech_commands_v0.01.zip",
     ],
 )
-
-- 
GitLab


From 2032512ba1de376baadfa9f3983e3edbc67a6731 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Fri, 7 Sep 2018 19:21:19 -0700
Subject: [PATCH 0097/1357] Updated the sha256 for ngraph

---
 tensorflow/workspace.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 79b3df1e51..9a82c724b7 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -847,7 +847,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
             "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz",
             "https://github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz",
         ],
-        sha256 = "",
+        sha256 = "34434b6d5993ac5233538c84f498840db7ac91df82e225c379ee7c8f6de644a5",
         strip_prefix = "ngraph-0.7.0",
         build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
     )
-- 
GitLab


From 864e290d1776895d7877777b8368ca8bc6fc22a3 Mon Sep 17 00:00:00 2001
From: Edvard Fagerholm <edvard.fagerholm@gmail.com>
Date: Wed, 29 Aug 2018 11:56:35 +0300
Subject: [PATCH 0098/1357] Make tf.transpose emit simpler graph when possible

If not given an explicit 'perm' parameter, tf.transpose currently
emits a graph that dynamically calculates it from the rank of the
input tensor. This is completely unnecessary when the rank of the
input can be statically determined at graph construction time.

Modify tf.transpose to emit 'perm' as a single Const node whenever
possible.
---
 tensorflow/python/ops/array_ops.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 7bf3869ddf..9597839301 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1409,8 +1409,13 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
         gen_array_ops.conjugate_transpose
         if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
     if perm is None:
-      rank = gen_array_ops.rank(a)
-      perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      a = ops.convert_to_tensor(a, name="a")
+      if not a.get_shape().ndims:
+        rank = gen_array_ops.rank(a)
+        perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      else:
+        rank = a.get_shape().ndims
+        perm = (rank - 1) - np.arange(rank)
       ret = transpose_fn(a, perm, name=name)
       # NOTE(mrry): Setting the shape explicitly because
       #   reverse is not handled by the shape function.
-- 
GitLab


From ea0d499693c4609a8be55add3163971f93b8f2be Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 10 Sep 2018 01:41:54 +0000
Subject: [PATCH 0099/1357] Fix python 3 GPU test failures

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/split_lib_gpu.cu.cc | 2 --
 tensorflow/core/kernels/tensor_array_ops.cc | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
index 8623e47e41..a4a59dbcbc 100644
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
@@ -246,7 +246,6 @@ struct SplitVOpGPULaunch {
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
-TF_CALL_int64(REGISTER_GPU_KERNEL);
 TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 #define REGISTER_GPU_KERNEL(T)                 \
@@ -256,7 +255,6 @@ TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
-TF_CALL_int64(REGISTER_GPU_KERNEL);
 TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc
index 82a7735c6d..58f1a36a90 100644
--- a/tensorflow/core/kernels/tensor_array_ops.cc
+++ b/tensorflow/core/kernels/tensor_array_ops.cc
@@ -488,7 +488,6 @@ TF_CALL_ALL_TYPES(REGISTER_WRITE);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
-TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -780,7 +779,6 @@ REGISTER_GATHER_AND_PACK(qint32);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
-TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -1002,7 +1000,6 @@ REGISTER_CONCAT(qint32);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
-TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
@@ -1394,7 +1391,6 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
-TF_CALL_int64(REGISTER_GPU);
 #undef REGISTER_GPU
 
 #endif  // GOOGLE_CUDA
-- 
GitLab


From 90cf7fb7786c8a9c135ef73482856b082e80f61a Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Tue, 11 Sep 2018 12:48:30 +0800
Subject: [PATCH 0100/1357] Fix lint errors and typos.

---
 tensorflow/compiler/tests/binary_ops_test.py  |  9 +++++----
 tensorflow/compiler/tf2xla/kernels/relu_op.cc | 14 +++++++-------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 8941dd4e27..069e83d083 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -179,11 +179,12 @@ class BinaryOpsTest(xla_test.XLATestCase):
           expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype))
 
       self._testBinary(
-          gen_nn_ops._leaky_relu_grad,
+          gen_nn_ops.leaky_relu_grad,
           np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype),
-          np.array(
-              [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype),
-          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype))
+          np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+                   dtype=dtype),
+          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10],
+                            dtype=dtype))
 
       self._testBinary(
           gen_nn_ops.softmax_cross_entropy_with_logits,
diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
index ec14735884..8d65e0339c 100644
--- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
@@ -50,7 +50,6 @@ class Relu6Op : public XlaOpKernel {
   }
 };
 
-
 class LeakyReluOp : public XlaOpKernel {
  public:
   explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
@@ -61,9 +60,9 @@ class LeakyReluOp : public XlaOpKernel {
     xla::XlaBuilder* builder = ctx->builder();
     auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0),
                                           static_cast<double>(alpha_));
-    ctx->SetOutput(0,
-        xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
+    ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
   }
+
  private:
   float alpha_;
 };
@@ -115,11 +114,12 @@ class LeakyReluGradOp : public XlaOpKernel {
     const auto zero =
         xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes());
     const auto pred = xla::Gt(ctx->Input(1), zero);
-    auto alpha = XlaHelpers::FloatLiteral(b, input_type(0),
-                                          static_cast<double>(alpha_));
-    ctx->SetOutput(0,
-        xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
+    auto alpha =
+        XlaHelpers::FloatLiteral(b, input_type(0), static_cast<double>(alpha_));
+    ctx->SetOutput(
+        0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
   }
+
  private:
   float alpha_;
 };
-- 
GitLab


From c807662d69dd1ca8bda7c34a642b812b38a4720b Mon Sep 17 00:00:00 2001
From: Smokrow <moritz.kroeger@tu-dortmund.de>
Date: Tue, 11 Sep 2018 10:35:27 +0200
Subject: [PATCH 0101/1357] added example for flat_map

---
 tensorflow/python/data/ops/dataset_ops.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 8242c7309d..14a1e3d803 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1007,8 +1007,20 @@ class Dataset(object):
       return ParallelMapDataset(self, map_func, num_parallel_calls)
 
   def flat_map(self, map_func):
-    """Maps `map_func` across this dataset and flattens the result. Will produce identical results to 'tf.data.Dataset.interleave'
+    """Maps `map_func` across this dataset and flattens the result. 
+    
+    Will produce similar results to `tf.data.Dataset.interleave(cycle_length=1)`. 
+    Use `flat_map` if you want to make sure, that the order of your dataset stays the same.
+    For example:
 
+    ```python
+    # NOTE: The following examples use `{ ... }` to represent the
+    # contents of a dataset. '[...]' represents a tensor.
+    a = {[1,2,3,4,5], [6,7,8,9], [10]}
+    
+    a.flat_map(lambda x: Dataset.from_tensors(x)) == 
+      {[1,2,3,4,5,6,7,8,9,10]}
+    ```
     Args:
       map_func: A function mapping a nested structure of tensors (having shapes
         and types defined by `self.output_shapes` and `self.output_types`) to a
-- 
GitLab


From 8530167f68673fa756565c0394bbe2dcdc39db05 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Fri, 24 Aug 2018 16:52:07 +0300
Subject: [PATCH 0102/1357] Add IgniteDataset that allows to work with Apache
 Ignite.

---
 configure.py                                  |   2 +
 tensorflow/BUILD                              |   6 +
 tensorflow/contrib/BUILD                      |  15 +
 tensorflow/contrib/cmake/python_modules.txt   |   2 +
 tensorflow/contrib/ignite/BUILD               | 136 ++++
 tensorflow/contrib/ignite/README.md           | 167 ++++
 tensorflow/contrib/ignite/__init__.py         |  42 +
 .../kernels/ignite_binary_object_parser.cc    | 304 +++++++
 .../kernels/ignite_binary_object_parser.h     |  54 ++
 .../contrib/ignite/kernels/ignite_client.cc   |  55 ++
 .../contrib/ignite/kernels/ignite_client.h    |  40 +
 .../contrib/ignite/kernels/ignite_dataset.cc  | 123 +++
 .../contrib/ignite/kernels/ignite_dataset.h   |  65 ++
 .../ignite/kernels/ignite_dataset_iterator.cc | 447 ++++++++++
 .../ignite/kernels/ignite_dataset_iterator.h  |  87 ++
 .../ignite/kernels/ignite_dataset_ops.cc      | 145 ++++
 .../ignite/kernels/ignite_plain_client.h      |  43 +
 .../kernels/ignite_plain_client_unix.cc       | 132 +++
 .../kernels/ignite_plain_client_windows.cc    | 143 ++++
 .../ignite/kernels/ignite_ssl_wrapper.cc      | 149 ++++
 .../ignite/kernels/ignite_ssl_wrapper.h       |  49 ++
 tensorflow/contrib/ignite/ops/dataset_ops.cc  |  64 ++
 .../ignite/python/ops/ignite_dataset_ops.py   | 763 ++++++++++++++++++
 .../ignite/python/ops/ignite_op_loader.py     |  25 +
 .../ignite/python/tests/bin/start-plain.sh    |  24 +
 .../ignite/python/tests/bin/start-ssl-auth.sh |  28 +
 .../ignite/python/tests/bin/start-ssl.sh      |  26 +
 .../tests/config/ignite-config-plain.xml      |  39 +
 .../tests/config/ignite-config-ssl-auth.xml   |  59 ++
 .../python/tests/config/ignite-config-ssl.xml |  59 ++
 .../python/tests/ignite_dataset_test.py       |  77 ++
 .../ignite/python/tests/keystore/client.jks   | Bin 0 -> 3232 bytes
 .../ignite/python/tests/keystore/client.pem   |  69 ++
 .../ignite/python/tests/keystore/server.jks   | Bin 0 -> 3230 bytes
 .../ignite/python/tests/keystore/trust.jks    | Bin 0 -> 2432 bytes
 .../contrib/ignite/python/tests/sql/init.sql  |  20 +
 .../ignite/python/tests/start_ignite.sh       |  30 +
 .../ignite/python/tests/stop_ignite.sh        |  19 +
 38 files changed, 3508 insertions(+)
 create mode 100644 tensorflow/contrib/ignite/BUILD
 create mode 100644 tensorflow/contrib/ignite/README.md
 create mode 100644 tensorflow/contrib/ignite/__init__.py
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
 create mode 100644 tensorflow/contrib/ignite/ops/dataset_ops.cc
 create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
 create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_op_loader.py
 create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-plain.sh
 create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh
 create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh
 create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml
 create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml
 create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml
 create mode 100644 tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.jks
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.pem
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/server.jks
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/trust.jks
 create mode 100644 tensorflow/contrib/ignite/python/tests/sql/init.sql
 create mode 100755 tensorflow/contrib/ignite/python/tests/start_ignite.sh
 create mode 100755 tensorflow/contrib/ignite/python/tests/stop_ignite.sh

diff --git a/configure.py b/configure.py
index 361bd4764d..8f1957e870 100644
--- a/configure.py
+++ b/configure.py
@@ -1502,6 +1502,8 @@ def main():
                 'with_aws_support', True, 'aws')
   set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform',
                 'with_kafka_support', True, 'kafka')
+  set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite',
+                'with_ignite_support', True, 'ignite')
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
                 False, 'xla')
   set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support',
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 386e0096ff..6c29c78793 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -248,6 +248,12 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "with_ignite_support",
+    define_values = {"with_ignite_support": "true"},
+    visibility = ["//visibility:public"],
+)
+
 # Crosses between platforms and file system libraries not supported on those
 # platforms due to limitations in nested select() statements.
 config_setting(
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 798f499870..f055e643d0 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -118,6 +118,11 @@ py_library(
             "//tensorflow/contrib/kafka",
         ],
         "//conditions:default": [],
+    }) + select({
+        "//tensorflow:with_ignite_support": [
+            "//tensorflow/contrib/ignite",
+        ],
+        "//conditions:default": [],
     }) + select({
         "//tensorflow:with_aws_support_windows_override": [],
         "//tensorflow:with_aws_support": [
@@ -160,6 +165,11 @@ cc_library(
             "//tensorflow/contrib/kafka:dataset_kernels",
         ],
         "//conditions:default": [],
+    }) + select({
+        "//tensorflow:with_ignite_support": [
+            "//tensorflow/contrib/ignite:dataset_kernels",
+        ],
+        "//conditions:default": [],
     }) + select({
         "//tensorflow:with_aws_support_windows_override": [],
         "//tensorflow:with_aws_support": [
@@ -197,6 +207,11 @@ cc_library(
             "//tensorflow/contrib/kafka:dataset_ops_op_lib",
         ],
         "//conditions:default": [],
+    }) + select({
+        "//tensorflow:with_ignite_support": [
+            "//tensorflow/contrib/ignite:dataset_ops_op_lib",
+        ],
+        "//conditions:default": [],
     }) + select({
         "//tensorflow:with_aws_support_windows_override": [],
         "//tensorflow:with_aws_support": [
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index fb871acae9..56755e817a 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -207,6 +207,8 @@ tensorflow/contrib/integrate/python
 tensorflow/contrib/integrate/python/ops
 tensorflow/contrib/kafka/python
 tensorflow/contrib/kafka/python/ops
+tensorflow/contrib/ignite/python
+tensorflow/contrib/ignite/python/ops
 tensorflow/contrib/keras
 tensorflow/contrib/keras/api
 tensorflow/contrib/keras/api/keras
diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
new file mode 100644
index 0000000000..9f6c666893
--- /dev/null
+++ b/tensorflow/contrib/ignite/BUILD
@@ -0,0 +1,136 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
+    "tf_custom_op_library",
+    "tf_custom_op_py_library",
+    "tf_gen_op_libs",
+    "tf_py_test",
+    "if_not_windows",
+    "if_windows",
+)
+
+py_library(
+    name = "ignite",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dataset_ops",
+    ],
+)
+
+tf_custom_op_library(
+    name = "_dataset_ops.so",
+    srcs = ["ops/dataset_ops.cc"],
+    deps = [":dataset_kernels"],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["dataset_ops"],
+)
+
+cc_library(
+    name = "dataset_kernels",
+    srcs = [
+        "kernels/ignite_dataset_ops.cc",
+        "kernels/ignite_client.h",
+        "kernels/ignite_client.cc",
+        "kernels/ignite_plain_client.h",
+        "kernels/ignite_ssl_wrapper.h",
+        "kernels/ignite_ssl_wrapper.cc",
+        "kernels/ignite_binary_object_parser.h",
+        "kernels/ignite_binary_object_parser.cc",
+        "kernels/ignite_dataset.h",
+        "kernels/ignite_dataset.cc",
+        "kernels/ignite_dataset_iterator.h",
+        "kernels/ignite_dataset_iterator.cc",
+    ] + if_not_windows([
+        "kernels/ignite_plain_client_unix.cc",
+    ]) + if_windows([
+        "kernels/ignite_plain_client_windows.cc",
+    ]),
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//third_party/eigen3",
+        "@boringssl//:ssl",
+        "@protobuf_archive//:protobuf_headers",
+    ],
+    alwayslink = 1,
+)
+
+py_library(
+    name = "dataset_ops",
+    srcs = [
+        "python/ops/ignite_dataset_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":ignite_op_loader",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "gen_dataset_ops",
+    out = "python/ops/gen_dataset_ops.py",
+    deps = ["//tensorflow/contrib/ignite:dataset_ops_op_lib"],
+)
+
+tf_kernel_library(
+    name = "dataset_ops_kernels",
+    deps = [
+        ":dataset_kernels",
+        "//tensorflow/core:framework",
+    ],
+    alwayslink = 1,
+)
+
+tf_custom_op_py_library(
+    name = "ignite_op_loader",
+    srcs = ["python/ops/ignite_op_loader.py"],
+    dso = ["//tensorflow/contrib/ignite:_dataset_ops.so"],
+    kernels = [
+        ":dataset_ops_kernels",
+        "//tensorflow/contrib/ignite:dataset_ops_op_lib",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":gen_dataset_ops",
+        "//tensorflow/contrib/util:util_py",
+        "//tensorflow/python:platform",
+    ],
+)
+
+# The Apache Ignite servers have to setup before the test and tear down
+# after the test manually. The docker engine has to be installed.
+#
+# To setup Apache Ignite servers:
+# $ bash ./python/tests/start_ignite.sh
+#
+# To tear down Apache Ignite servers:
+# $ bash ./python/tests/stop_ignite.sh
+tf_py_test(
+    name = "ignite_dataset_test",
+    srcs = ["python/tests/ignite_dataset_test.py"],
+    additional_deps = [
+        ":ignite",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+    tags = [
+        "manual",
+        "no_windows",
+        "notap",
+    ],
+)
diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md
new file mode 100644
index 0000000000..9054344e94
--- /dev/null
+++ b/tensorflow/contrib/ignite/README.md
@@ -0,0 +1,167 @@
+### Ignite Dataset
+# Ignite Dataset
+
+- [Overview](#overview)
+- [Features](#features)
+  * [Distributed In-Memory Datasource](#distributed-in-memory-datasource)
+  * [Structured Objects](#structured-objects)
+  * [Distributed Training](#distributed-training)
+  * [SSL Connection](#ssl-connection)
+  * [Windows Support](#windows-support)
+- [Try it out](#try-it-out)
+- [Limitations](#limitations)
+
+## Overview
+
+[Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for
+transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. 
+
+## Features
+
+Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below.
+
+### Distributed In-Memory Datasource
+[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. 
+- If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations.
+- If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations.
+- If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow.
+
+It's  important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference.
+
+```bash
+$ apache-ignite-fabric/bin/ignite.sh
+$ apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://localhost:10800/"
+
+jdbc:ignite:thin://localhost/> CREATE TABLE KITTEN_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR);
+jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (1, 'WARM KITTY');
+jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (2, 'SOFT KITTY');
+jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL OF FUR');
+```
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="SQL_PUBLIC_KITTEN_CACHE")
+>>> iterator = dataset.make_one_shot_iterator()
+>>> next_obj = iterator.get_next()
+>>>
+>>> with tf.Session() as sess:
+>>>   for _ in range(3):
+>>>     print(sess.run(next_obj))
+
+{'key': 1, 'val': {'NAME': b'WARM KITTY'}}
+{'key': 2, 'val': {'NAME': b'SOFT KITTY'}}
+{'key': 3, 'val': {'NAME': b'LITTLE BALL OF FUR'}}
+```
+
+### Structured Objects
+[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="IMAGES")
+>>> iterator = dataset.make_one_shot_iterator()
+>>> next_obj = iterator.get_next()
+>>>
+>>> with tf.Session() as sess:
+>>>   print(sess.run(next_obj))
+
+{
+    'key': 'kitten.png', 
+    'val': {
+        'metadata': {
+            'file_name': b'kitten.png',
+            'label': b'little ball of fur',
+            width: 800, 
+            height: 600
+        }, 
+        'pixels': [0, 0, 0, 0, ..., 0]
+    }
+}
+```
+ Neural network training and other computations require transformations that can be done as part of  [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="IMAGES").map(lambda obj: obj['val']['pixels'])
+>>> iterator = dataset.make_one_shot_iterator()
+>>> next_obj = iterator.get_next()
+>>>
+>>> with tf.Session() as sess:
+>>>   print(sess.run(next_obj))
+
+[0, 0, 0, 0, ..., 0]
+```
+
+### Distributed Training
+
+TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. 
+
+<a href="https://www.codecogs.com/eqnedit.php?latex=\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" title="\nabla[\sum_1^n(y - \hat{y})^2] = \nabla[\sum_1^{n_1}(y - \hat{y})^2] + \nabla[\sum_{n_1}^{n_2}(y - \hat{y})^2] + ... + \nabla[\sum_{n_{k-1}}^n(y - \hat{y})^2]" /></a>
+
+Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck.
+
+Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition.
+
+Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset("IMAGES")
+>>>
+>>> # Compute gradients locally on every worker node.
+>>> gradients = []    
+>>> for i in range(5):
+>>>     with tf.device("/job:WORKER/task:%d" % i):
+>>>         device_iterator = dataset.make_one_shot_iterator()
+>>>         device_next_obj = device_iterator.get_next()
+>>>         gradient = compute_gradient(device_next_obj)
+>>>         gradients.append(gradient)        
+>>>        
+>>> # Aggregate them on master node.
+>>> result_gradient = tf.reduce_sum(gradients)
+>>>
+>>> with tf.Session("grpc://localhost:10000") as sess:
+>>>     print(sess.run(result_gradient))
+```
+
+High-level TensorFlow API for [distributed training](https://www.tensorflow.org/api_docs/python/tf/contrib/distribute/DistributionStrategy) is supported as well. 
+
+### SSL Connection
+
+Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="IMAGES", certfile="client.pem", cert_password="password", username="ignite", password="ignite")
+>>> ...
+```
+
+### Windows Support
+
+Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems.
+
+## Try it out
+
+The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine:
+
+```
+docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist
+```
+
+After that you will be able to work with it following way:
+
+![ignite-dataset-mnist](https://s3.amazonaws.com/helloworld23423423ew23/ignite-dataset-mnist.png "Ignite Dataset Mnist")
+
+## Limitations
+
+Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
\ No newline at end of file
diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py
new file mode 100644
index 0000000000..468920a557
--- /dev/null
+++ b/tensorflow/contrib/ignite/__init__.py
@@ -0,0 +1,42 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Apache Ignite is a memory-centric distributed database, caching, and
+   processing platform for transactional, analytical, and streaming workloads,
+   delivering in-memory speeds at petabyte scale. This contrib package
+   contains an integration between Apache Ignite and TensorFlow. The
+   integration is based on tf.data from TensorFlow side and Binary Client
+   Protocol from Apache Ignite side. It allows to use Apache Ignite as a
+   datasource for neural network training, inference and all other
+   computations supported by TensorFlow. Ignite Dataset is based on Apache
+   Ignite Binary Client Protocol:
+   https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+
+@@IgniteDataset
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \
+import IgniteDataset
+
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = [
+    "IgniteDataset",
+]
+
+remove_undocumented(__name__)
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
new file mode 100644
index 0000000000..bf0ef8766e
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
@@ -0,0 +1,304 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_binary_object_parser.h"
+
+namespace ignite {
+
+tensorflow::Status BinaryObjectParser::Parse(
+    uint8_t*& ptr, std::vector<tensorflow::Tensor>& out_tensors,
+    std::vector<int32_t>& types) {
+  uint8_t object_type_id = *ptr;
+  ptr += 1;
+
+  switch (object_type_id) {
+    case BYTE: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT8, {});
+      tensor.scalar<tensorflow::uint8>()() = *((uint8_t*)ptr);
+      ptr += 1;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case SHORT: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT16, {});
+      tensor.scalar<tensorflow::int16>()() = *((int16_t*)ptr);
+      ptr += 2;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case INT: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT32, {});
+      tensor.scalar<tensorflow::int32>()() = *((int32_t*)ptr);
+      ptr += 4;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case LONG: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64, {});
+      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
+      ptr += 8;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case FLOAT: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_FLOAT, {});
+      tensor.scalar<float>()() = *((float*)ptr);
+      ptr += 4;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case DOUBLE: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_DOUBLE, {});
+      tensor.scalar<double>()() = *((double*)ptr);
+      ptr += 8;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case UCHAR: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT16, {});
+      tensor.scalar<tensorflow::uint16>()() = *((uint16_t*)ptr);
+      ptr += 2;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case BOOL: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_BOOL, {});
+      tensor.scalar<bool>()() = *((bool*)ptr);
+      ptr += 1;
+      out_tensors.emplace_back(std::move(tensor));
+
+      break;
+    }
+    case STRING: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_STRING, {});
+      tensor.scalar<std::string>()() = std::string((char*)ptr, length);
+      ptr += length;
+      out_tensors.emplace_back(std::move(tensor));
+
+      break;
+    }
+    case DATE: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64, {});
+      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
+      ptr += 8;
+      out_tensors.emplace_back(std::move(tensor));
+
+      break;
+    }
+    case BYTE_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT8,
+                                tensorflow::TensorShape({length}));
+
+      uint8_t* arr = (uint8_t*)ptr;
+      ptr += length;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::uint8>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case SHORT_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT16,
+                                tensorflow::TensorShape({length}));
+
+      int16_t* arr = (int16_t*)ptr;
+      ptr += length * 2;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int16>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case INT_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT32,
+                                tensorflow::TensorShape({length}));
+
+      int32_t* arr = (int32_t*)ptr;
+      ptr += length * 4;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int32>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case LONG_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64,
+                                tensorflow::TensorShape({length}));
+
+      int64_t* arr = (int64_t*)ptr;
+      ptr += length * 8;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case FLOAT_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_FLOAT,
+                                tensorflow::TensorShape({length}));
+
+      float* arr = (float*)ptr;
+      ptr += 4 * length;
+
+      std::copy_n(arr, length, tensor.flat<float>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case DOUBLE_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_DOUBLE,
+                                tensorflow::TensorShape({length}));
+
+      double* arr = (double*)ptr;
+      ptr += 8 * length;
+
+      std::copy_n(arr, length, tensor.flat<double>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case UCHAR_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT16,
+                                tensorflow::TensorShape({length}));
+
+      uint16_t* arr = (uint16_t*)ptr;
+      ptr += length * 2;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::uint16>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case BOOL_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_BOOL,
+                                tensorflow::TensorShape({length}));
+
+      bool* arr = (bool*)ptr;
+      ptr += length;
+
+      std::copy_n(arr, length, tensor.flat<bool>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case STRING_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_STRING,
+                                tensorflow::TensorShape({length}));
+
+      for (int32_t i = 0; i < length; i++) {
+        int32_t str_length = *((int32_t*)ptr);
+        ptr += 4;
+        const int8_t* str = (const int8_t*)ptr;
+        ptr += str_length;
+        tensor.vec<std::string>()(i) = std::string((char*)str, str_length);
+      }
+
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case DATE_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64,
+                                tensorflow::TensorShape({length}));
+      int64_t* arr = (int64_t*)ptr;
+      ptr += length * 8;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case WRAPPED_OBJ: {
+      int32_t byte_arr_size = *((int32_t*)ptr);
+      ptr += 4;
+
+      tensorflow::Status status = Parse(ptr, out_tensors, types);
+      if (!status.ok()) return status;
+
+      int32_t offset = *((int32_t*)ptr);
+      ptr += 4;
+
+      break;
+    }
+    case COMPLEX_OBJ: {
+      uint8_t version = *ptr;
+      ptr += 1;
+      int16_t flags = *((int16_t*)ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
+      ptr += 2;
+      int32_t type_id = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t hash_code = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t schema_id = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t schema_offset = *((int32_t*)ptr);
+      ptr += 4;
+
+      uint8_t* end = ptr + schema_offset - 24;
+      int32_t i = 0;
+      while (ptr < end) {
+        i++;
+        tensorflow::Status status = Parse(ptr, out_tensors, types);
+        if (!status.ok()) return status;
+      }
+
+      ptr += (length - schema_offset);
+
+      break;
+    }
+    default: {
+      return tensorflow::errors::Internal("Unknowd binary type (type id ",
+                                          (int)object_type_id, ")");
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
new file mode 100644
index 0000000000..1e845cbc56
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace ignite {
+
+class BinaryObjectParser {
+ public:
+  tensorflow::Status Parse(uint8_t*& ptr,
+                           std::vector<tensorflow::Tensor>& out_tensors,
+                           std::vector<int32_t>& types);
+};
+
+enum ObjectType {
+  BYTE = 1,
+  SHORT = 2,
+  INT = 3,
+  LONG = 4,
+  FLOAT = 5,
+  DOUBLE = 6,
+  UCHAR = 7,
+  BOOL = 8,
+  STRING = 9,
+  DATE = 11,
+  BYTE_ARR = 12,
+  SHORT_ARR = 13,
+  INT_ARR = 14,
+  LONG_ARR = 15,
+  FLOAT_ARR = 16,
+  DOUBLE_ARR = 17,
+  UCHAR_ARR = 18,
+  BOOL_ARR = 19,
+  STRING_ARR = 20,
+  DATE_ARR = 22,
+  WRAPPED_OBJ = 27,
+  COMPLEX_OBJ = 103
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc
new file mode 100644
index 0000000000..5a8eddb944
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.cc
@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+namespace ignite {
+
+tensorflow::Status Client::ReadByte(uint8_t& data) {
+  return ReadData((uint8_t*)&data, 1);
+}
+
+tensorflow::Status Client::ReadShort(int16_t& data) {
+  return ReadData((uint8_t*)&data, 2);
+}
+
+tensorflow::Status Client::ReadInt(int32_t& data) {
+  return ReadData((uint8_t*)&data, 4);
+}
+
+tensorflow::Status Client::ReadLong(int64_t& data) {
+  return ReadData((uint8_t*)&data, 8);
+}
+
+tensorflow::Status Client::WriteByte(uint8_t data) {
+  return WriteData((uint8_t*)&data, 1);
+}
+
+tensorflow::Status Client::WriteShort(int16_t data) {
+  return WriteData((uint8_t*)&data, 2);
+}
+
+tensorflow::Status Client::WriteInt(int32_t data) {
+  return WriteData((uint8_t*)&data, 4);
+}
+
+tensorflow::Status Client::WriteLong(int64_t data) {
+  return WriteData((uint8_t*)&data, 8);
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h
new file mode 100644
index 0000000000..64e28d75f0
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.h
@@ -0,0 +1,40 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/core/status.h"
+
+namespace ignite {
+
+class Client {
+ public:
+  virtual tensorflow::Status Connect() = 0;
+  virtual tensorflow::Status Disconnect() = 0;
+  virtual bool IsConnected() = 0;
+  virtual int GetSocketDescriptor() = 0;
+
+  virtual tensorflow::Status ReadByte(uint8_t& data);
+  virtual tensorflow::Status ReadShort(int16_t& data);
+  virtual tensorflow::Status ReadInt(int32_t& data);
+  virtual tensorflow::Status ReadLong(int64_t& data);
+  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0;
+
+  virtual tensorflow::Status WriteByte(uint8_t data);
+  virtual tensorflow::Status WriteShort(int16_t data);
+  virtual tensorflow::Status WriteInt(int32_t data);
+  virtual tensorflow::Status WriteLong(int64_t data);
+  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0;
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
new file mode 100644
index 0000000000..a9bf26955b
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
@@ -0,0 +1,123 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_dataset_iterator.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace ignite {
+
+IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx,
+                             std::string cache_name, std::string host,
+                             tensorflow::int32 port, bool local,
+                             tensorflow::int32 part,
+                             tensorflow::int32 page_size, std::string username,
+                             std::string password, std::string certfile,
+                             std::string keyfile, std::string cert_password,
+                             std::vector<tensorflow::int32> schema,
+                             std::vector<tensorflow::int32> permutation)
+    : DatasetBase(tensorflow::DatasetContext(ctx)),
+      cache_name(cache_name),
+      host(host),
+      port(port),
+      local(local),
+      part(part),
+      page_size(page_size),
+      username(username),
+      password(password),
+      certfile(certfile),
+      keyfile(keyfile),
+      cert_password(cert_password),
+      schema(schema),
+      permutation(permutation) {
+  SchemaToTypes();
+  SchemaToShapes();
+
+  LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name
+            << "', host='" << host << "', port=" << port << ", local=" << local
+            << ", part=" << part << ", page_size=" << page_size
+            << ", username='" << username << "', certfile='" << certfile
+            << "', keyfile='" << keyfile + "']";
+}
+
+IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
+
+std::unique_ptr<tensorflow::IteratorBase> IgniteDataset::MakeIteratorInternal(
+    const tensorflow::string& prefix) const {
+  return std::unique_ptr<tensorflow::IteratorBase>(new IgniteDatasetIterator(
+      {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host,
+      this->port, this->cache_name, this->local, this->part, this->page_size,
+      this->username, this->password, this->certfile, this->keyfile,
+      this->cert_password, this->schema, this->permutation));
+}
+
+const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const {
+  return dtypes;
+}
+
+const std::vector<tensorflow::PartialTensorShape>&
+IgniteDataset::output_shapes() const {
+  return shapes;
+}
+
+tensorflow::string IgniteDataset::DebugString() const {
+  return "IgniteDatasetOp::Dataset";
+}
+
+tensorflow::Status IgniteDataset::AsGraphDefInternal(
+    tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
+    tensorflow::Node** output) const {
+  return tensorflow::errors::Unimplemented(
+      "IgniteDataset does not support 'AsGraphDefInternal'");
+}
+
+void IgniteDataset::SchemaToTypes() {
+  for (auto e : schema) {
+    if (e == BYTE || e == BYTE_ARR) {
+      dtypes.push_back(tensorflow::DT_UINT8);
+    } else if (e == SHORT || e == SHORT_ARR) {
+      dtypes.push_back(tensorflow::DT_INT16);
+    } else if (e == INT || e == INT_ARR) {
+      dtypes.push_back(tensorflow::DT_INT32);
+    } else if (e == LONG || e == LONG_ARR) {
+      dtypes.push_back(tensorflow::DT_INT64);
+    } else if (e == FLOAT || e == FLOAT_ARR) {
+      dtypes.push_back(tensorflow::DT_FLOAT);
+    } else if (e == DOUBLE || e == DOUBLE_ARR) {
+      dtypes.push_back(tensorflow::DT_DOUBLE);
+    } else if (e == UCHAR || e == UCHAR_ARR) {
+      dtypes.push_back(tensorflow::DT_UINT8);
+    } else if (e == BOOL || e == BOOL_ARR) {
+      dtypes.push_back(tensorflow::DT_BOOL);
+    } else if (e == STRING || e == STRING_ARR) {
+      dtypes.push_back(tensorflow::DT_STRING);
+    } else {
+      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
+    }
+  }
+}
+
+void IgniteDataset::SchemaToShapes() {
+  for (auto e : schema) {
+    if (e >= 1 && e < 10) {
+      shapes.push_back(tensorflow::PartialTensorShape({}));
+    } else if (e >= 12 && e < 21) {
+      shapes.push_back(tensorflow::PartialTensorShape({-1}));
+    } else {
+      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
+    }
+  }
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
new file mode 100644
index 0000000000..2120dfd342
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
@@ -0,0 +1,65 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/dataset.h"
+
+namespace ignite {
+
+class IgniteDataset : public tensorflow::DatasetBase {
+ public:
+  IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name,
+                std::string host, tensorflow::int32 port, bool local,
+                tensorflow::int32 part, tensorflow::int32 page_size,
+                std::string username, std::string password,
+                std::string certfile, std::string keyfile,
+                std::string cert_password,
+                std::vector<tensorflow::int32> schema,
+                std::vector<tensorflow::int32> permutation);
+  ~IgniteDataset();
+  std::unique_ptr<tensorflow::IteratorBase> MakeIteratorInternal(
+      const tensorflow::string& prefix) const override;
+  const tensorflow::DataTypeVector& output_dtypes() const override;
+  const std::vector<tensorflow::PartialTensorShape>& output_shapes()
+      const override;
+  tensorflow::string DebugString() const override;
+
+ protected:
+  tensorflow::Status AsGraphDefInternal(
+      tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
+      tensorflow::Node** output) const override;
+
+ private:
+  const std::string cache_name;
+  const std::string host;
+  const tensorflow::int32 port;
+  const bool local;
+  const tensorflow::int32 part;
+  const tensorflow::int32 page_size;
+  const std::string username;
+  const std::string password;
+  const std::string certfile;
+  const std::string keyfile;
+  const std::string cert_password;
+  const std::vector<tensorflow::int32> schema;
+  const std::vector<tensorflow::int32> permutation;
+
+  tensorflow::DataTypeVector dtypes;
+  std::vector<tensorflow::PartialTensorShape> shapes;
+
+  void SchemaToTypes();
+  void SchemaToShapes();
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
new file mode 100644
index 0000000000..03cc3c1291
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
@@ -0,0 +1,447 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_dataset_iterator.h"
+
+#include "ignite_plain_client.h"
+#include "ignite_ssl_wrapper.h"
+#include "tensorflow/core/platform/logging.h"
+
+#include <time.h>
+#include <chrono>
+
+namespace ignite {
+
+#define CHECK_STATUS(status) \
+  if (!status.ok()) return status;
+
+IgniteDatasetIterator::IgniteDatasetIterator(
+    const Params& params, std::string host, tensorflow::int32 port,
+    std::string cache_name, bool local, tensorflow::int32 part,
+    tensorflow::int32 page_size, std::string username, std::string password,
+    std::string certfile, std::string keyfile, std::string cert_password,
+    std::vector<tensorflow::int32> schema,
+    std::vector<tensorflow::int32> permutation)
+    : tensorflow::DatasetIterator<IgniteDataset>(params),
+      cache_name(cache_name),
+      local(local),
+      part(part),
+      page_size(page_size),
+      username(username),
+      password(password),
+      schema(schema),
+      permutation(permutation),
+      remainder(-1),
+      cursor_id(-1),
+      last_page(false) {
+  Client* p_client = new PlainClient(host, port);
+
+  if (certfile.empty())
+    client = std::unique_ptr<Client>(p_client);
+  else
+    client = std::unique_ptr<Client>(new SslWrapper(
+        std::unique_ptr<Client>(p_client), certfile, keyfile, cert_password));
+
+  LOG(INFO) << "Ignite Dataset Iterator created";
+}
+
+IgniteDatasetIterator::~IgniteDatasetIterator() {
+  tensorflow::Status status = CloseConnection();
+  if (!status.ok()) LOG(ERROR) << status.ToString();
+
+  LOG(INFO) << "Ignite Dataset Iterator destroyed";
+}
+
+tensorflow::Status IgniteDatasetIterator::EstablishConnection() {
+  if (!client->IsConnected()) {
+    tensorflow::Status status = client->Connect();
+    if (!status.ok()) return status;
+
+    status = Handshake();
+    if (!status.ok()) {
+      tensorflow::Status disconnect_status = client->Disconnect();
+      if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString();
+
+      return status;
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::CloseConnection() {
+  if (cursor_id != -1 && !last_page) {
+    tensorflow::Status conn_status = EstablishConnection();
+    if (!conn_status.ok()) return conn_status;
+
+    CHECK_STATUS(client->WriteInt(18));  // Message length
+    CHECK_STATUS(
+        client->WriteShort(close_connection_opcode));  // Operation code
+    CHECK_STATUS(client->WriteLong(0));                // Request ID
+    CHECK_STATUS(client->WriteLong(cursor_id));        // Resource ID
+
+    int32_t res_len;
+    CHECK_STATUS(client->ReadInt(res_len));
+    if (res_len < 12)
+      return tensorflow::errors::Internal(
+          "Close Resource Response is corrupted");
+
+    int64_t req_id;
+    CHECK_STATUS(client->ReadLong(req_id));
+    int32_t status;
+    CHECK_STATUS(client->ReadInt(status));
+    if (status != 0) {
+      uint8_t err_msg_header;
+      CHECK_STATUS(client->ReadByte(err_msg_header));
+      if (err_msg_header == string_val) {
+        int32_t err_msg_length;
+        CHECK_STATUS(client->ReadInt(err_msg_length));
+        uint8_t* err_msg_c = new uint8_t[err_msg_length];
+        CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+        std::string err_msg((char*)err_msg_c, err_msg_length);
+        delete[] err_msg_c;
+
+        return tensorflow::errors::Internal("Close Resource Error [status=",
+                                            status, ", message=", err_msg, "]");
+      }
+      return tensorflow::errors::Internal("Close Resource Error [status=",
+                                          status, "]");
+    }
+
+    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+
+    cursor_id = -1;
+
+    return client->Disconnect();
+  } else {
+    LOG(INFO) << "Query Cursor " << cursor_id << " is already closed";
+  }
+
+  return client->IsConnected() ? client->Disconnect()
+                               : tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::GetNextInternal(
+    tensorflow::IteratorContext* ctx,
+    std::vector<tensorflow::Tensor>* out_tensors, bool* end_of_sequence) {
+  if (remainder == 0 && last_page) {
+    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+
+    cursor_id = -1;
+    *end_of_sequence = true;
+    return tensorflow::Status::OK();
+  } else {
+    tensorflow::Status status = EstablishConnection();
+    if (!status.ok()) return status;
+
+    if (remainder == -1 || remainder == 0) {
+      tensorflow::Status status =
+          remainder == -1 ? ScanQuery() : LoadNextPage();
+      if (!status.ok()) return status;
+    }
+
+    uint8_t* initial_ptr = ptr;
+    std::vector<int32_t> types;
+    std::vector<tensorflow::Tensor> tensors;
+
+    status = parser.Parse(ptr, tensors, types);  // Parse key
+    if (!status.ok()) return status;
+
+    status = parser.Parse(ptr, tensors, types);  // Parse val
+    if (!status.ok()) return status;
+
+    remainder -= (ptr - initial_ptr);
+
+    out_tensors->resize(tensors.size());
+    for (int32_t i = 0; i < tensors.size(); i++)
+      (*out_tensors)[permutation[i]] = std::move(tensors[i]);
+
+    *end_of_sequence = false;
+    return tensorflow::Status::OK();
+  }
+
+  *end_of_sequence = true;
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::SaveInternal(
+    tensorflow::IteratorStateWriter* writer) {
+  return tensorflow::errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'SaveInternal'");
+}
+
+tensorflow::Status IgniteDatasetIterator::RestoreInternal(
+    tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) {
+  return tensorflow::errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'RestoreInternal')");
+}
+
+tensorflow::Status IgniteDatasetIterator::Handshake() {
+  int32_t msg_len = 8;
+
+  if (username.empty())
+    msg_len += 1;
+  else
+    msg_len += 5 + username.length();
+
+  if (password.empty())
+    msg_len += 1;
+  else
+    msg_len += 5 + password.length();
+
+  CHECK_STATUS(client->WriteInt(msg_len));
+  CHECK_STATUS(client->WriteByte(1));
+  CHECK_STATUS(client->WriteShort(protocol_major_version));
+  CHECK_STATUS(client->WriteShort(protocol_minor_version));
+  CHECK_STATUS(client->WriteShort(protocol_patch_version));
+  CHECK_STATUS(client->WriteByte(2));
+  if (username.empty()) {
+    CHECK_STATUS(client->WriteByte(null_val));
+  } else {
+    CHECK_STATUS(client->WriteByte(string_val));
+    CHECK_STATUS(client->WriteInt(username.length()));
+    CHECK_STATUS(
+        client->WriteData((uint8_t*)username.c_str(), username.length()));
+  }
+
+  if (password.empty()) {
+    CHECK_STATUS(client->WriteByte(null_val));
+  } else {
+    CHECK_STATUS(client->WriteByte(string_val));
+    CHECK_STATUS(client->WriteInt(password.length()));
+    CHECK_STATUS(
+        client->WriteData((uint8_t*)password.c_str(), password.length()));
+  }
+
+  int32_t handshake_res_len;
+  CHECK_STATUS(client->ReadInt(handshake_res_len));
+  uint8_t handshake_res;
+  CHECK_STATUS(client->ReadByte(handshake_res));
+
+  LOG(INFO) << "Handshake length " << handshake_res_len << ", res "
+            << (int16_t)handshake_res;
+
+  if (handshake_res != 1) {
+    int16_t serv_ver_major;
+    CHECK_STATUS(client->ReadShort(serv_ver_major));
+    int16_t serv_ver_minor;
+    CHECK_STATUS(client->ReadShort(serv_ver_minor));
+    int16_t serv_ver_patch;
+    CHECK_STATUS(client->ReadShort(serv_ver_patch));
+    uint8_t header;
+    CHECK_STATUS(client->ReadByte(header));
+
+    if (header == string_val) {
+      int32_t length;
+      CHECK_STATUS(client->ReadInt(length));
+      uint8_t* err_msg_c = new uint8_t[length];
+      CHECK_STATUS(client->ReadData(err_msg_c, length));
+      std::string err_msg((char*)err_msg_c, length);
+      delete[] err_msg_c;
+
+      return tensorflow::errors::Internal(
+          "Handshake Error [result=", handshake_res, ", version=",
+          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch,
+          ", message='", err_msg, "']");
+    } else if (header == null_val) {
+      return tensorflow::errors::Internal(
+          "Handshake Error [result=", handshake_res, ", version=",
+          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+    } else {
+      return tensorflow::errors::Internal(
+          "Handshake Error [result=", handshake_res, ", version=",
+          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::ScanQuery() {
+  CHECK_STATUS(client->WriteInt(25));                        // Message length
+  CHECK_STATUS(client->WriteShort(scan_query_opcode));       // Operation code
+  CHECK_STATUS(client->WriteLong(0));                        // Request ID
+  CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name)));  // Cache name
+  CHECK_STATUS(client->WriteByte(0));                        // Flags
+  CHECK_STATUS(client->WriteByte(null_val));                 // Filter object
+  CHECK_STATUS(client->WriteInt(page_size));                 // Cursor page size
+  CHECK_STATUS(client->WriteInt(part));    // Partition to query
+  CHECK_STATUS(client->WriteByte(local));  // Local flag
+
+  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                           std::chrono::system_clock::now().time_since_epoch())
+                           .count();
+
+  int32_t res_len;
+  CHECK_STATUS(client->ReadInt(res_len));
+
+  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                          std::chrono::system_clock::now().time_since_epoch())
+                          .count();
+
+  LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms";
+
+  if (res_len < 12)
+    return tensorflow::errors::Internal("Scan Query Response is corrupted");
+
+  int64_t req_id;
+  CHECK_STATUS(client->ReadLong(req_id));
+
+  int32_t status;
+  CHECK_STATUS(client->ReadInt(status));
+
+  if (status != 0) {
+    uint8_t err_msg_header;
+    CHECK_STATUS(client->ReadByte(err_msg_header));
+
+    if (err_msg_header == string_val) {
+      int32_t err_msg_length;
+      CHECK_STATUS(client->ReadInt(err_msg_length));
+
+      uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      std::string err_msg((char*)err_msg_c, err_msg_length);
+      delete[] err_msg_c;
+
+      return tensorflow::errors::Internal("Scan Query Error [status=", status,
+                                          ", message=", err_msg, "]");
+    }
+    return tensorflow::errors::Internal("Scan Query Error [status=", status,
+                                        "]");
+  }
+
+  CHECK_STATUS(client->ReadLong(cursor_id));
+
+  LOG(INFO) << "Query Cursor " << cursor_id << " is opened";
+
+  int32_t row_cnt;
+  CHECK_STATUS(client->ReadInt(row_cnt));
+
+  remainder = res_len - 25;
+  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
+  ptr = page.get();
+
+  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                      std::chrono::system_clock::now().time_since_epoch())
+                      .count();
+
+  CHECK_STATUS(client->ReadData(ptr, remainder));
+
+  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                     std::chrono::system_clock::now().time_since_epoch())
+                     .count();
+  ;
+
+  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double time_in_s = 1.0 * (stop - start) / 1000;
+  LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
+            << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
+
+  uint8_t last_page_b;
+  CHECK_STATUS(client->ReadByte(last_page_b));
+
+  last_page = !last_page_b;
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::LoadNextPage() {
+  CHECK_STATUS(client->WriteInt(18));                       // Message length
+  CHECK_STATUS(client->WriteShort(load_next_page_opcode));  // Operation code
+  CHECK_STATUS(client->WriteLong(0));                       // Request ID
+  CHECK_STATUS(client->WriteLong(cursor_id));               // Cursor ID
+
+  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                           std::chrono::system_clock::now().time_since_epoch())
+                           .count();
+
+  int32_t res_len;
+  CHECK_STATUS(client->ReadInt(res_len));
+
+  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                          std::chrono::system_clock::now().time_since_epoch())
+                          .count();
+
+  LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms";
+
+  if (res_len < 12)
+    return tensorflow::errors::Internal("Load Next Page Response is corrupted");
+
+  int64_t req_id;
+  CHECK_STATUS(client->ReadLong(req_id));
+
+  int32_t status;
+  CHECK_STATUS(client->ReadInt(status));
+
+  if (status != 0) {
+    uint8_t err_msg_header;
+    CHECK_STATUS(client->ReadByte(err_msg_header));
+
+    if (err_msg_header == string_val) {
+      int32_t err_msg_length;
+      CHECK_STATUS(client->ReadInt(err_msg_length));
+
+      uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      std::string err_msg((char*)err_msg_c, err_msg_length);
+      delete[] err_msg_c;
+
+      return tensorflow::errors::Internal("Load Next Page Error [status=",
+                                          status, ", message=", err_msg, "]");
+    }
+    return tensorflow::errors::Internal("Load Next Page Error [status=", status,
+                                        "]");
+  }
+
+  int32_t row_cnt;
+  CHECK_STATUS(client->ReadInt(row_cnt));
+
+  remainder = res_len - 17;
+  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
+  ptr = page.get();
+
+  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                      std::chrono::system_clock::now().time_since_epoch())
+                      .count();
+
+  CHECK_STATUS(client->ReadData(ptr, remainder));
+
+  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                     std::chrono::system_clock::now().time_since_epoch())
+                     .count();
+  ;
+
+  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double time_in_s = 1.0 * (stop - start) / 1000;
+  LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
+            << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
+
+  uint8_t last_page_b;
+  CHECK_STATUS(client->ReadByte(last_page_b));
+
+  last_page = !last_page_b;
+
+  return tensorflow::Status::OK();
+}
+
+int32_t IgniteDatasetIterator::JavaHashCode(std::string str) {
+  int32_t h = 0;
+  for (char& c : str) {
+    h = 31 * h + c;
+  }
+  return h;
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
new file mode 100644
index 0000000000..d1df4527f9
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
@@ -0,0 +1,87 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_binary_object_parser.h"
+#include "ignite_dataset.h"
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+namespace ignite {
+
+class IgniteDatasetIterator
+    : public tensorflow::DatasetIterator<IgniteDataset> {
+ public:
+  IgniteDatasetIterator(const Params& params, std::string host,
+                        tensorflow::int32 port, std::string cache_name,
+                        bool local, tensorflow::int32 part,
+                        tensorflow::int32 page_size, std::string username,
+                        std::string password, std::string certfile,
+                        std::string keyfile, std::string cert_password,
+                        std::vector<tensorflow::int32> schema,
+                        std::vector<tensorflow::int32> permutation);
+  ~IgniteDatasetIterator();
+  tensorflow::Status GetNextInternal(
+      tensorflow::IteratorContext* ctx,
+      std::vector<tensorflow::Tensor>* out_tensors,
+      bool* end_of_sequence) override;
+
+ protected:
+  tensorflow::Status SaveInternal(
+      tensorflow::IteratorStateWriter* writer) override;
+  tensorflow::Status RestoreInternal(
+      tensorflow::IteratorContext* ctx,
+      tensorflow::IteratorStateReader* reader) override;
+
+ private:
+  std::unique_ptr<Client> client;
+  BinaryObjectParser parser;
+
+  const std::string cache_name;
+  const bool local;
+  const tensorflow::int32 part;
+  const tensorflow::int32 page_size;
+  const std::string username;
+  const std::string password;
+  const std::vector<tensorflow::int32> schema;
+  const std::vector<tensorflow::int32> permutation;
+
+  int32_t remainder;
+  int64_t cursor_id;
+  bool last_page;
+
+  std::unique_ptr<uint8_t> page;
+  uint8_t* ptr;
+
+  tensorflow::Status EstablishConnection();
+  tensorflow::Status CloseConnection();
+  tensorflow::Status Handshake();
+  tensorflow::Status ScanQuery();
+  tensorflow::Status LoadNextPage();
+  int32_t JavaHashCode(std::string str);
+};
+
+constexpr uint8_t null_val = 101;
+constexpr uint8_t string_val = 9;
+constexpr uint8_t protocol_major_version = 1;
+constexpr uint8_t protocol_minor_version = 1;
+constexpr uint8_t protocol_patch_version = 0;
+constexpr int16_t scan_query_opcode = 2000;
+constexpr int16_t load_next_page_opcode = 2001;
+constexpr int16_t close_connection_opcode = 0;
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
new file mode 100644
index 0000000000..543b5e4afc
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -0,0 +1,145 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_dataset.h"
+#include <stdlib.h>
+#include "tensorflow/core/framework/dataset.h"
+
+namespace tensorflow {
+
+class IgniteDatasetOp : public DatasetOpKernel {
+ public:
+  using DatasetOpKernel::DatasetOpKernel;
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
+    std::string cache_name = "";
+    std::string host = "";
+    int32 port = -1;
+    bool local = false;
+    int32 part = -1;
+    int32 page_size = -1;
+    std::string username = "";
+    std::string password = "";
+    std::string certfile = "";
+    std::string keyfile = "";
+    std::string cert_password = "";
+
+    const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME");
+    const char* env_host = std::getenv("IGNITE_DATASET_HOST");
+    const char* env_port = std::getenv("IGNITE_DATASET_PORT");
+    const char* env_local = std::getenv("IGNITE_DATASET_LOCAL");
+    const char* env_part = std::getenv("IGNITE_DATASET_PART");
+    const char* env_page_size = std::getenv("IGNITE_DATASET_PAGE_SIZE");
+    const char* env_username = std::getenv("IGNITE_DATASET_USERNAME");
+    const char* env_password = std::getenv("IGNITE_DATASET_PASSWORD");
+    const char* env_certfile = std::getenv("IGNITE_DATASET_CERTFILE");
+    const char* env_keyfile = std::getenv("IGNITE_DATASET_KEYFILE");
+    const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD");
+
+    if (env_cache_name)
+      cache_name = std::string(env_cache_name);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cache_name",
+                                                           &cache_name));
+
+    if (env_host)
+      host = std::string(env_host);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "host", &host));
+
+    if (env_port)
+      port = atoi(env_port);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<int32>(ctx, "port", &port));
+
+    if (env_local)
+      local = true;
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<bool>(ctx, "local", &local));
+
+    if (env_part)
+      part = atoi(env_part);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<int32>(ctx, "part", &part));
+
+    if (env_page_size)
+      page_size = atoi(env_page_size);
+    else
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<int32>(ctx, "page_size", &page_size));
+
+    if (env_username)
+      username = std::string(env_username);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "username", &username));
+
+    if (env_password)
+      password = std::string(env_password);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "password", &password));
+
+    if (env_certfile)
+      certfile = std::string(env_certfile);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "certfile", &certfile));
+
+    if (env_keyfile)
+      keyfile = std::string(env_keyfile);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "keyfile", &keyfile));
+
+    if (env_cert_password)
+      cert_password = std::string(env_cert_password);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cert_password",
+                                                           &cert_password));
+
+    const Tensor* schema_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor));
+    OP_REQUIRES(ctx, schema_tensor->dims() == 1,
+                errors::InvalidArgument("`schema` must be a vector."));
+
+    std::vector<int32> schema;
+    schema.reserve(schema_tensor->NumElements());
+    for (int i = 0; i < schema_tensor->NumElements(); i++) {
+      schema.push_back(schema_tensor->flat<int32>()(i));
+    }
+
+    const Tensor* permutation_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor));
+    OP_REQUIRES(ctx, schema_tensor->dims() == 1,
+                errors::InvalidArgument("`permutation` must be a vector."));
+
+    std::vector<int32> permutation;
+    permutation.reserve(permutation_tensor->NumElements());
+    for (int i = 0; i < permutation_tensor->NumElements(); i++) {
+      permutation.push_back(permutation_tensor->flat<int32>()(i));
+    }
+
+    *output = new ignite::IgniteDataset(
+        ctx, cache_name, host, port, local, part, page_size, username, password,
+        certfile, keyfile, cert_password, std::move(schema),
+        std::move(permutation));
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU),
+                        IgniteDatasetOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
new file mode 100644
index 0000000000..5491af68d6
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+#include <string>
+
+namespace ignite {
+
+class PlainClient : public Client {
+ public:
+  PlainClient(std::string host, int port);
+  ~PlainClient();
+
+  virtual tensorflow::Status Connect();
+  virtual tensorflow::Status Disconnect();
+  virtual bool IsConnected();
+  virtual int GetSocketDescriptor();
+  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
+  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+
+ private:
+  std::string host;
+  int port;
+  int sock;
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
new file mode 100644
index 0000000000..dbfa4f8786
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
@@ -0,0 +1,132 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_plain_client.h"
+
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/socket.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <map>
+
+#include <iostream>
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace ignite {
+
+PlainClient::PlainClient(std::string host, int port)
+    : host(host), port(port), sock(-1) {}
+
+PlainClient::~PlainClient() {
+  if (IsConnected()) {
+    tensorflow::Status status = Disconnect();
+    if (!status.ok()) LOG(WARNING) << status.ToString();
+  }
+}
+
+tensorflow::Status PlainClient::Connect() {
+  if (sock == -1) {
+    sock = socket(AF_INET, SOCK_STREAM, 0);
+    if (sock == -1)
+      return tensorflow::errors::Internal("Failed to create socket");
+  }
+
+  sockaddr_in server;
+
+  server.sin_addr.s_addr = inet_addr(host.c_str());
+  if (server.sin_addr.s_addr == -1) {
+    hostent* he;
+    in_addr** addr_list;
+
+    if ((he = gethostbyname(host.c_str())) == NULL)
+      return tensorflow::errors::Internal("Failed to resolve hostname \"", host,
+                                          "\"");
+
+    addr_list = (in_addr**)he->h_addr_list;
+    if (addr_list[0] != NULL) server.sin_addr = *addr_list[0];
+  }
+
+  server.sin_family = AF_INET;
+  server.sin_port = htons(port);
+
+  if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0)
+    return tensorflow::errors::Internal("Failed to connect to \"", host, ":",
+                                        port, "\"");
+
+  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::Disconnect() {
+  int close_res = close(sock);
+  sock = -1;
+
+  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed";
+
+  return close_res == 0 ? tensorflow::Status::OK()
+                        : tensorflow::errors::Internal(
+                              "Failed to correctly close connection");
+}
+
+bool PlainClient::IsConnected() { return sock != -1; }
+
+int PlainClient::GetSocketDescriptor() { return sock; }
+
+tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
+  int recieved = 0;
+
+  while (recieved < length) {
+    int res = recv(sock, buf, length - recieved, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while reading from socket: ", res, ", ",
+          std::string(strerror(errno)));
+
+    if (res == 0)
+      return tensorflow::errors::Internal("Server closed connection");
+
+    recieved += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
+  int sent = 0;
+
+  while (sent < length) {
+    int res = send(sock, buf, length - sent, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while writing into socket: ", res, ", ",
+          std::string(strerror(errno)));
+
+    sent += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
new file mode 100644
index 0000000000..f78c9b3627
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -0,0 +1,143 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_plain_client.h"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+
+#pragma comment(lib, "Ws2_32.lib")
+#pragma comment(lib, "Mswsock.lib")
+#pragma comment(lib, "AdvApi32.lib")
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace ignite {
+
+PlainClient::PlainClient(std::string host, int port)
+    : host(host), port(port), sock(INVALID_SOCKET) {}
+
+PlainClient::~PlainClient() {
+  if (IsConnected()) {
+    tensorflow::Status status = Disconnect();
+    if (!status.ok()) LOG(WARNING) << status.ToString();
+  }
+}
+
+tensorflow::Status PlainClient::Connect() {
+  WSADATA wsaData;
+  addrinfo *result = NULL, *ptr = NULL, hints;
+
+  int res = WSAStartup(MAKEWORD(2, 2), &wsaData);
+  if (res != 0)
+    return tensorflow::errors::Internal("WSAStartup failed with error: ", res);
+
+  ZeroMemory(&hints, sizeof(hints));
+  hints.ai_family = AF_UNSPEC;
+  hints.ai_socktype = SOCK_STREAM;
+  hints.ai_protocol = IPPROTO_TCP;
+
+  res =
+      getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result);
+  if (res != 0)
+    return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res);
+
+  for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
+    sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
+    if (sock == INVALID_SOCKET) {
+      WSACleanup();
+      return tensorflow::errors::Internal("Socket failed with error: ",
+                                          WSAGetLastError());
+    }
+
+    res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen);
+    if (res == SOCKET_ERROR) {
+      closesocket(sock);
+      sock = INVALID_SOCKET;
+      continue;
+    }
+
+    break;
+  }
+
+  freeaddrinfo(result);
+
+  if (sock == INVALID_SOCKET) {
+    WSACleanup();
+    return tensorflow::errors::Internal("Unable to connect to server");
+  }
+
+  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::Disconnect() {
+  int res = shutdown(sock, SD_SEND);
+  closesocket(sock);
+  WSACleanup();
+
+  if (res == SOCKET_ERROR)
+    return tensorflow::errors::Internal("Shutdown failed with error: ",
+                                        WSAGetLastError());
+  else
+    return tensorflow::Status::OK();
+}
+
+bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; }
+
+int PlainClient::GetSocketDescriptor() { return sock; }
+
+tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
+  int recieved = 0;
+
+  while (recieved < length) {
+    int res = recv(sock, buf, length - recieved, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while reading from socket: ", res);
+
+    if (res == 0)
+      return tensorflow::errors::Internal("Server closed connection");
+
+    recieved += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
+  int sent = 0;
+
+  while (sent < length) {
+    int res = send(sock, buf, length - sent, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while writing into socket: ", res);
+
+    sent += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
new file mode 100644
index 0000000000..a1101b91f3
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
@@ -0,0 +1,149 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_ssl_wrapper.h"
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+
+namespace ignite {
+
+static int PasswordCb(char *buf, int size, int rwflag, void *password) {
+  strncpy(buf, (char *)(password), size);
+  buf[size - 1] = '\0';
+  return (strlen(buf));
+}
+
+SslWrapper::SslWrapper(std::shared_ptr<Client> client, std::string certfile,
+                       std::string keyfile, std::string cert_password)
+    : client(client),
+      certfile(certfile),
+      keyfile(keyfile),
+      cert_password(cert_password),
+      ctx(NULL) {}
+
+SslWrapper::~SslWrapper() {
+  if (IsConnected()) {
+    tensorflow::Status status = Disconnect();
+    if (!status.ok()) LOG(WARNING) << status.ToString();
+  }
+
+  if (ctx != NULL) {
+    SSL_CTX_free(ctx);
+    ctx = NULL;
+  }
+}
+
+tensorflow::Status SslWrapper::InitSslContext() {
+  OpenSSL_add_all_algorithms();
+  SSL_load_error_strings();
+
+  ctx = SSL_CTX_new(SSLv23_method());
+  if (ctx == NULL)
+    return tensorflow::errors::Internal("Couldn't create SSL context");
+
+  SSL_CTX_set_default_passwd_cb(ctx, PasswordCb);
+  SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str());
+
+  if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1)
+    return tensorflow::errors::Internal(
+        "Couldn't load cetificate chain (file '", certfile, "')");
+
+  std::string private_key_file = keyfile.empty() ? certfile : keyfile;
+  if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(),
+                                  SSL_FILETYPE_PEM) != 1)
+    return tensorflow::errors::Internal("Couldn't load private key (file '",
+                                        private_key_file, "')");
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status SslWrapper::Connect() {
+  tensorflow::Status status;
+
+  if (ctx == NULL) {
+    status = InitSslContext();
+    if (!status.ok()) return status;
+  }
+
+  ssl = SSL_new(ctx);
+  if (ssl == NULL)
+    return tensorflow::errors::Internal("Failed to establish SSL connection");
+
+  status = client->Connect();
+  if (!status.ok()) return status;
+
+  SSL_set_fd(ssl, client->GetSocketDescriptor());
+  if (SSL_connect(ssl) != 1)
+    return tensorflow::errors::Internal("Failed to establish SSL connection");
+
+  LOG(INFO) << "SSL connection established";
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status SslWrapper::Disconnect() {
+  SSL_free(ssl);
+
+  LOG(INFO) << "SSL connection closed";
+
+  return client->Disconnect();
+}
+
+bool SslWrapper::IsConnected() { return client->IsConnected(); }
+
+int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); }
+
+tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
+  int recieved = 0;
+
+  while (recieved < length) {
+    int res = SSL_read(ssl, buf, length - recieved);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while reading from SSL socket: ", res);
+
+    if (res == 0)
+      return tensorflow::errors::Internal("Server closed SSL connection");
+
+    recieved += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
+  int sent = 0;
+
+  while (sent < length) {
+    int res = SSL_write(ssl, buf, length - sent);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while writing into socket: ", res);
+
+    sent += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
new file mode 100644
index 0000000000..e0c2a242dc
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+#include <openssl/ssl.h>
+#include <string>
+
+namespace ignite {
+
+class SslWrapper : public Client {
+ public:
+  SslWrapper(std::shared_ptr<Client> client, std::string certfile,
+             std::string keyfile, std::string cert_password);
+  ~SslWrapper();
+
+  virtual tensorflow::Status Connect();
+  virtual tensorflow::Status Disconnect();
+  virtual bool IsConnected();
+  virtual int GetSocketDescriptor();
+  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
+  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+
+ private:
+  std::shared_ptr<Client> client;
+  std::string certfile;
+  std::string keyfile;
+  std::string cert_password;
+  SSL_CTX* ctx;
+  SSL* ssl;
+  tensorflow::Status InitSslContext();
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
new file mode 100644
index 0000000000..17494d1cfd
--- /dev/null
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -0,0 +1,64 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+REGISTER_OP("IgniteDataset")
+  .Input("cache_name: string")
+  .Input("host: string")
+  .Input("port: int32")
+  .Input("local: bool")
+  .Input("part: int32")
+  .Input("page_size: int32")
+  .Input("username: string")
+  .Input("password: string")
+  .Input("certfile: string")
+  .Input("keyfile: string")
+  .Input("cert_password: string")
+  .Input("schema: int32")
+  .Input("permutation: int32")
+  .Output("handle: variant")
+  .SetIsStateful()
+  .SetShapeFn(shape_inference::ScalarShape)
+  .Doc(R"doc(
+Apache Ignite is a memory-centric distributed database, caching, and processing
+platform for transactional, analytical, and streaming workloads, delivering 
+in-memory speeds at petabyte scale. This contrib package contains an 
+integration between Apache Ignite and TensorFlow. The integration is based on 
+tf.data from TensorFlow side and Binary Client Protocol from Apache Ignite side. 
+It allows to use Apache Ignite as a datasource for neural network training, 
+inference and all other computations supported by TensorFlow. Ignite Dataset
+is based on Apache Ignite Binary Client Protocol.
+
+cache_name: Ignite Cache Name.
+host: Ignite Thin Client Host.
+port: Ignite Thin Client Port.
+local: Local flag that defines that data should be fetched from local host only.
+part: Partition data should be fetched from.
+page_size: Page size for Ignite Thin Client.
+username: Username to authenticate via Ignite Thin Client.
+password: Password to authenticate via Ignite Thin Client.
+certfile: SSL certificate to establish SSL connection.
+keyfile: Private key file to establish SSL connection.
+cert_password: SSL certificate password to establish SSL connection.
+schema: Internal structure that defines schema of cache objects.
+permutation: Internal structure that defines permutation of cache objects.
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
new file mode 100644
index 0000000000..6fa073957a
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -0,0 +1,763 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Ignite Dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import socket
+import struct
+import ssl
+import abc
+
+from tensorflow.contrib.ignite.python.ops import ignite_op_loader  # pylint: disable=unused-import
+from tensorflow.contrib.ignite.python.ops import gen_dataset_ops
+from tensorflow.python.data.ops.dataset_ops import Dataset
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+
+class Readable():
+  """Readable abstract class that exposes methods to do reading-related
+     operations.
+  """
+
+  @abc.abstractmethod
+  def __init__(self):
+    pass
+
+  def read_byte(self):
+    """Reads and returnes byte."""
+    return self.__read("b", 1)
+
+  def read_short(self):
+    """Reads and returns short (2 bytes, little-endian)."""
+    return self.__read("h", 2)
+
+  def read_int(self):
+    """Reads and returns int (4 bytes, little-endian)."""
+    return self.__read("i", 4)
+
+  def read_long(self):
+    """Reads and returns long (8 bytes, little-endian)."""
+    return self.__read("q", 8)
+
+  def skip(self, length):
+    """Skips the specified number of bytes."""
+    self.read_data(length)
+
+  @abc.abstractmethod
+  def read_data(self, length):
+    """Reads the specified number of bytes and returns them as a buffer."""
+    return None
+
+  def __read(self, data_type, length):
+    """Reads, unpacks and returns specified type (little-endian)."""
+    buffer = self.read_data(length)
+    return struct.unpack("<" + data_type, buffer)[0]
+
+class DataBuffer(Readable):
+  """DataBuffer class that exposes methods to read data from a byte buffer."""
+
+  def __init__(self, buffer):
+    """Constructs a new instance of DataBuffer based on the specified byte
+       buffer.
+
+    Args:
+      buffer: Buffer to be read.
+    """
+    Readable.__init__(self)
+    self.buffer = buffer
+    self.ptr = 0
+
+  def read_data(self, length):
+    """Reads the specified number of bytes and returns them as a buffer."""
+    data_buffer = self.buffer[self.ptr:][:length]
+    self.ptr += length
+    return data_buffer
+
+class TcpClient(Readable):
+  """TcpClient class that exposes methods to read data from a socket."""
+
+  def __init__(self, host, port, certfile=None, keyfile=None, password=None):
+    """Constructs a new instance of TcpClient based on the specified host
+       and port.
+
+    Args:
+      host: Host to be connected.
+      port: Port to be connected.
+      certfile: File in PEM format containing the certificate as well as any
+        number of CA certificates needed to establish the certificate’s
+        authenticity.
+      keyfile: File containing the private key (otherwise the private key
+        will be taken from certfile as well).
+      password: Password to be used if the private key is encrypted and a
+        password is necessary.
+    """
+    Readable.__init__(self)
+    self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+    if certfile is not None:
+      context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+      context.load_cert_chain(certfile, keyfile, password)
+      self.sock = context.wrap_socket(self.sock)
+    else:
+      if keyfile is not None:
+        raise Exception("SSL is disabled, keyfile must not be specified \
+          (to enable SSL specify certfile)")
+      if password is not None:
+        raise Exception("SSL is disabled, password must not be specified \
+          (to enable SSL specify certfile)")
+
+    self.host = host
+    self.port = port
+
+  def __enter__(self):
+    """Connects to host and port specified in the constructor."""
+    self.sock.connect((self.host, self.port))
+    return self
+
+  def __exit__(self, t, v, traceback):
+    """Disconnects the socket."""
+    self.sock.close()
+
+  def write_byte(self, v):
+    """Writes the specified byte."""
+    self.__write(v, "b")
+
+  def write_short(self, v):
+    """Writes the specified short (2 bytes, little-endian)."""
+    self.__write(v, "h")
+
+  def write_int(self, v):
+    """Writes the specified short (4 bytes, little-endian)."""
+    self.__write(v, "i")
+
+  def write_long(self, v):
+    """Writes the specified int (8 bytes, little-endian)."""
+    self.__write(v, "q")
+
+  def write_string(self, v):
+    """Writes the specified string."""
+    self.sock.sendall(v.encode("UTF-8"))
+
+  def read_data(self, length):
+    """Reads the specified number of bytes and returns them as a buffer."""
+    data_buffer = None
+    rem = length
+    while rem > 0:
+      buf = self.sock.recv(rem)
+      rem = rem - len(buf)
+      if data_buffer is None:
+        data_buffer = buf
+      else:
+        data_buffer += buf
+    return data_buffer
+
+  def __write(self, value, data_type):
+    """Packs and writes data using the specified type (little-endian)."""
+    data_buffer = struct.pack("<" + data_type, value)
+    self.sock.sendall(data_buffer)
+
+class BinaryType():
+  """BinaryType class that encapsulated type id, type name and fields."""
+
+  def __init__(self, type_id, type_name, fields):
+    """Constructs a new instance of BinaryType."""
+    self.type_id = type_id
+    self.type_name = type_name
+    self.fields = fields
+
+class BinaryField():
+  """BinaryField class that encapsulated field name, type id and field id."""
+
+  def __init__(self, field_name, type_id, field_id):
+    """Constructs a new instance of BinaryField."""
+    self.field_name = field_name
+    self.type_id = type_id
+    self.field_id = field_id
+
+# Binary types defined in Apache Ignite Thin client and supported by
+# TensorFlow on Apache Ignite, see
+# https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+types = {
+    1: (dtypes.uint8, False),
+    2: (dtypes.int16, False),
+    3: (dtypes.int32, False),
+    4: (dtypes.int64, False),
+    5: (dtypes.float32, False),
+    6: (dtypes.float64, False),
+    7: (dtypes.uint16, False),
+    8: (dtypes.bool, False),
+    9: (dtypes.string, False),
+    12: (dtypes.uint8, True),
+    13: (dtypes.int16, True),
+    14: (dtypes.int32, True),
+    15: (dtypes.int64, True),
+    16: (dtypes.float32, True),
+    17: (dtypes.float64, True),
+    18: (dtypes.uint16, True),
+    19: (dtypes.bool, True),
+    20: (dtypes.string, True)
+}
+
+class TypeTreeNode():
+  """TypeTreeNode class exposes methods to format object tree structure
+     data.
+  """
+  def __init__(self, name, type_id, fields=None, permutation=None):
+    """Constructs a new instance of TypeTreeNode.
+
+    Args:
+      name: Name of the object tree node.
+      type_id: Type id of the object tree node.
+      fields: List of fields (children of the object tree node).
+      permutation: Permutation that should be applied to order object children.
+    """
+    self.name = name
+    self.type_id = type_id
+    self.fields = fields
+    self.permutation = permutation
+
+  def to_output_classes(self):
+    """Formats the tree object the way required in 'output_classes' property of
+       dataset.
+    """
+    if self.fields is None:
+      return ops.Tensor
+    output_classes = {}
+    for field in self.fields:
+      output_classes[field.name] = field.to_output_classes()
+    return output_classes
+
+  def to_output_shapes(self):
+    """Formats the tree object the way required in 'output_shapes' property of
+       dataset.
+    """
+    if self.fields is None:
+      object_type = types[self.type_id]
+      if object_type is not None:
+        is_array = object_type[1]
+        if is_array:
+          return tensor_shape.TensorShape([None])
+        return tensor_shape.TensorShape([])
+      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+    output_shapes = {}
+    for field in self.fields:
+      output_shapes[field.name] = field.to_output_shapes()
+    return output_shapes
+
+  def to_output_types(self):
+    """Formats the tree object the way required in 'output_types' property of
+       dataset.
+    """
+    if self.fields is None:
+      object_type = types[self.type_id]
+      if object_type is not None:
+        return object_type[0]
+      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+    else:
+      output_types = {}
+      for field in self.fields:
+        output_types[field.name] = field.to_output_types()
+      return output_types
+
+  def to_flat(self):
+    """Returns a list of leaf node types."""
+    return self.to_flat_rec([])
+
+  def to_permutation(self):
+    """Returns a permutation that should be applied to order object leafs."""
+    correct_order_dict = {}
+    self.traversal_rec(correct_order_dict, 0)
+    object_order = []
+    self.traversal_permutation_rec(object_order)
+    return [correct_order_dict[o] for o in object_order]
+
+  def to_flat_rec(self, flat):
+    """Formats a list of leaf node types."""
+    flat.append(self.type_id)
+    if self.fields is not None:
+      for field in self.fields:
+        field.to_flat_rec(flat)
+    return flat
+
+  def traversal_permutation_rec(self, permutation):
+    """Collects nodes in accordance with permutation."""
+    if self.fields is None:
+      permutation.append(self)
+    else:
+      for idx in self.permutation:
+        field = self.fields[idx]
+        field.traversal_permutation_rec(permutation)
+
+  def traversal_rec(self, d, i):
+    """Collects nodes in pre-order traversal."""
+    if self.fields is None:
+      d[self] = i
+      i += 1
+    else:
+      for field in self.fields:
+        i = field.traversal_rec(d, i)
+    return i
+
+class IgniteClient(TcpClient):
+  """IgniteClient class exposes methods to work with Apache Ignite using Thin
+     client. This client works with assumption that all object in the cache
+     have the same structure (homogeneous objects) and the cache contains at
+     least one object.
+  """
+  def __init__(self, host, port, username=None, password=None, certfile=None,\
+    keyfile=None, cert_password=None):
+    """Constructs a new instance of IgniteClient.
+
+    Args:
+      host: Apache Ignite Thin client host to be connected.
+      port: Apache Ignite Thin client port to be connected.
+      username: Apache Ignite Thin Client authentication username.
+      password: Apache Ignite Thin Client authentication password.
+      certfile: File in PEM format containing the certificate as well as
+        any number of CA certificates needed to establish the certificate’s
+        authenticity.
+      keyfile: File containing the private key (otherwise the private key
+        will be taken from certfile as well).
+      cert_password: Password to be used if the private key is encrypted and a
+        password is necessary.
+    """
+    TcpClient.__init__(self, host, port, certfile, keyfile, cert_password)
+    self.username = username
+    self.password = password
+
+  def handshake(self):
+    """Makes a handshake required to be made after connect before any other
+       calls.
+    """
+    msg_len = 8
+
+    if self.username is None:
+      msg_len += 1
+    else:
+      msg_len += 5 + len(self.username)
+
+    if self.password is None:
+      msg_len += 1
+    else:
+      msg_len += 5 + len(self.password)
+
+    self.write_int(msg_len)   # Message length
+    self.write_byte(1)        # Handshake operation
+    self.write_short(1)       # Version (1.1.0)
+    self.write_short(1)
+    self.write_short(0)
+    self.write_byte(2)        # Thin client
+
+    if self.username is None: # Username
+      self.write_byte(101)
+    else:
+      self.write_byte(9)
+      self.write_int(len(self.username))
+      self.write_string(self.username)
+
+    if self.password is None: # Password
+      self.write_byte(101)
+    else:
+      self.write_byte(9)
+      self.write_int(len(self.password))
+      self.write_string(self.password)
+
+    self.read_int()           # Result length
+    res = self.read_byte()
+
+    if res != 1:
+      serv_ver_major = self.read_short()
+      serv_ver_minor = self.read_short()
+      serv_ver_patch = self.read_short()
+      err_msg = self.__parse_string()
+      if err_msg is None:
+        raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \
+            % (res, serv_ver_major, serv_ver_minor, serv_ver_patch))
+      else:
+        raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \
+            message='%s']" % (
+                res,
+                serv_ver_major,
+                serv_ver_minor,
+                serv_ver_patch,
+                err_msg
+            ))
+
+  def get_cache_type(self, cache_name):
+    """Collects type information about objects stored in the specified
+       cache.
+    """
+    cache_name_hash = self.__java_hash_code(cache_name)
+    self.write_int(25)        # Message length
+    self.write_short(2000)      # Operation code
+    self.write_long(0)        # Request ID
+    self.write_int(cache_name_hash) # Cache name
+    self.write_byte(0)        # Flags
+    self.write_byte(101)      # Filter (NULL)
+    self.write_int(1)         # Cursor page size
+    self.write_int(-1)        # Partition to query
+    self.write_byte(0)        # Local flag
+
+    result_length = self.read_int()
+    self.read_long()          # Request id
+    status = self.read_int()
+
+    if status != 0:
+      err_msg = self.__parse_string()
+      if err_msg is None:
+        raise Exception("Scan Query Error [status=%s]" % status)
+      else:
+        raise Exception("Scan Query Error [status=%s, message='%s']" \
+            % (status, err_msg))
+
+    self.read_long()          # Cursor id
+    row_count = self.read_int()
+
+    if row_count == 0:
+      raise Exception("Scan Query returned empty result, so it's \
+        impossible to derive the cache type")
+
+    payload = DataBuffer(self.read_data(result_length - 25))
+
+    self.read_byte()          # Next page
+
+    res = TypeTreeNode("root", 0, [
+        self.__collect_types("key", payload),
+        self.__collect_types("val", payload)
+    ], [0, 1])
+
+    return res
+
+  def __java_hash_code(self, s):
+    """Computes hash code of the specified string using Java code."""
+    h = 0
+    for c in s:
+      h = (31 * h + ord(c)) & 0xFFFFFFFF
+    return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
+
+  def __collect_types(self, field_name, data):
+    """Extracts type information from the specified object."""
+    type_id = data.read_byte()
+
+    # Byte scalar.
+    if type_id == 1:
+      data.skip(1)
+      return TypeTreeNode(field_name, type_id)
+
+    # Short scalar.
+    if type_id == 2:
+      data.skip(2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Integer scalar.
+    if type_id == 3:
+      data.skip(4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Long scalar.
+    if type_id == 4:
+      data.skip(8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Float scalar.
+    if type_id == 5:
+      data.skip(4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Double scalar.
+    if type_id == 6:
+      data.skip(8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Char scalar.
+    if type_id == 7:
+      data.skip(2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Bool scalar.
+    if type_id == 8:
+      data.skip(1)
+      return TypeTreeNode(field_name, type_id)
+
+    # String scalar.
+    if type_id == 9:
+      length = data.read_int()
+      data.skip(length)
+      return TypeTreeNode(field_name, type_id)
+
+    # UUID scalar.
+    if type_id == 10:
+      data.skip(16)
+      return TypeTreeNode(field_name, type_id)
+
+    # Date scalar.
+    if type_id == 11:
+      data.skip(8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Byte array.
+    if type_id == 12:
+      length = data.read_int()
+      data.skip(length)
+      return TypeTreeNode(field_name, type_id)
+
+    # Short array.
+    if type_id == 13:
+      length = data.read_int()
+      data.skip(length * 2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Integer array.
+    if type_id == 14:
+      length = data.read_int()
+      data.skip(length * 4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Long array.
+    if type_id == 15:
+      length = data.read_int()
+      data.skip(length * 8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Float array.
+    if type_id == 16:
+      length = data.read_int()
+      data.skip(length * 4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Double array.
+    if type_id == 17:
+      length = data.read_int()
+      data.skip(length * 8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Char array.
+    if type_id == 18:
+      length = data.read_int()
+      data.skip(length * 2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Bool array.
+    if type_id == 19:
+      length = data.read_int()
+      data.skip(length)
+      return TypeTreeNode(field_name, type_id)
+
+    # String array.
+    if type_id == 20:
+      length = data.read_int()
+      for _ in range(length):
+        header = data.read_byte()
+        if header == 9:
+          str_length = data.read_int()
+          data.skip(str_length)
+        elif header == 101:
+          pass
+        else:
+          raise Exception("Unknown binary type when expected string \
+            [type_id=%d]" % header)
+      return TypeTreeNode(field_name, type_id)
+
+    # UUID array.
+    if type_id == 21:
+      length = data.read_int()
+      data.skip(length * 16) # TODO: support NULL values.
+      return TypeTreeNode(field_name, type_id)
+
+    # Date array.
+    if type_id == 22:
+      length = data.read_int()
+      data.skip(length * 8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Wrapped Binary Object.
+    if type_id == 27:
+      length = data.read_int()
+      inner_data = data.read_data(length)
+      data.read_int()   # Offset
+      return self.__collect_types(field_name, DataBuffer(inner_data))
+
+    # Complex Object.
+    if type_id == 103:
+      data.read_byte()  # Object version
+      data.read_short() # Object flags
+      obj_type_id = data.read_int()
+      data.read_int()   # Object hash code
+      obj_length = data.read_int()
+      data.read_int()   # Object schema id
+      obj_schema_offset = data.read_int()
+
+      obj_type = self.__get_type(obj_type_id)
+      children = []
+
+      for obj_field in obj_type.fields:
+        child = self.__collect_types(obj_field.field_name, data)
+        children.append(child)
+
+      children_sorted = sorted(children, key=lambda child: child.name)
+      permutation = [children_sorted.index(child) for child in children]
+      children = children_sorted
+
+      data.skip(obj_length - obj_schema_offset)
+
+      return TypeTreeNode(field_name, type_id, children, permutation)
+
+    raise Exception("Unknown binary type [type_id=%d]" % type_id)
+
+  def __get_type(self, type_id):
+    """Queries Apache Ignite information about type by type id."""
+    self.write_int(14)      # Message length
+    self.write_short(3002)  # Operation code
+    self.write_long(0)      # Request ID
+    self.write_int(type_id) # Type ID
+
+    self.read_int()         # Result length
+    self.read_long()        # Request id
+    status = self.read_int()
+
+    if status != 0:
+      err_msg = self.__parse_string()
+      if err_msg is None:
+        raise Exception("Get Binary Type Error [status=%d, message='%s']" \
+            % (status, err_msg))
+      else:
+        raise Exception("Get Binary Type Error [status=%d]" % status)
+
+    binary_type_exists = self.read_byte()
+
+    if binary_type_exists == 0:
+      raise Exception("Binary type not found [type_id=%d] " % type_id)
+
+    binary_type_id = self.read_int()
+    binary_type_name = self.__parse_string()
+    self.__parse_string()   # Affinity field name
+
+    fields = []
+    for _ in range(self.read_int()):
+      field_name = self.__parse_string()
+      field_type_id = self.read_int()
+      field_id = self.read_int()
+
+      field = BinaryField(field_name, field_type_id, field_id)
+      fields.append(field)
+
+    is_enum = self.read_byte()
+    if is_enum == 1:
+      raise Exception("Enum fields are not supported yet")
+
+    schema_cnt = self.read_int()
+    for _ in range(schema_cnt):
+      self.read_int()       # Schema id
+      field_cnt = self.read_int()
+      self.skip(field_cnt * 4)
+
+    return BinaryType(binary_type_id, binary_type_name, fields)
+
+  def __parse_string(self):
+    """Parses string."""
+    header = self.read_byte()
+    if header == 9:
+      length = self.read_int()
+      return self.read_data(length).decode("utf-8")
+    if header == 101:
+      return None
+    raise Exception("Unknown binary type when expected string [type_id=%d]" \
+        % header)
+
+class IgniteDataset(Dataset):
+  """Apache Ignite is a memory-centric distributed database, caching, and
+     processing platform for transactional, analytical, and streaming workloads,
+     delivering in-memory speeds at petabyte scale. This contrib package
+     contains an integration between Apache Ignite and TensorFlow. The
+     integration is based on tf.data from TensorFlow side and Binary Client
+     Protocol from Apache Ignite side. It allows to use Apache Ignite as a
+     datasource for neural network training, inference and all other
+     computations supported by TensorFlow. Ignite Dataset is based on Apache
+     Ignite Binary Client Protocol.
+  """
+
+  def __init__(self, cache_name, host="localhost", port=10800, local=False,\
+    part=-1, page_size=100, username=None, password=None, certfile=None,\
+    keyfile=None, cert_password=None):
+    """Create a IgniteDataset.
+
+    Args:
+      cache_name: Cache name to be used as datasource.
+      host: Apache Ignite Thin Client host to be connected.
+      port: Apache Ignite Thin Client port to be connected.
+      local: Local flag that defines to query only local data.
+      part: Number of partitions to be queried.
+      page_size: Apache Ignite Thin Client page size.
+      username: Apache Ignite Thin Client authentication username.
+      password: Apache Ignite Thin Client authentication password.
+      certfile: File in PEM format containing the certificate as well as
+        any number of CA certificates needed to establish the certificate’s
+        authenticity.
+      keyfile: File containing the private key (otherwise the private key
+        will be taken from certfile as well).
+      cert_password: Password to be used if the private key is encrypted and a
+        password is necessary.
+    """
+    super(IgniteDataset, self).__init__()
+
+    with IgniteClient(host, port, username, password, certfile, keyfile,\
+        cert_password) as client:
+      client.handshake()
+      self.cache_type = client.get_cache_type(cache_name)
+
+    self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\
+        name="cache_name")
+    self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host")
+    self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port")
+    self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local")
+    self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part")
+    self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\
+        name="page_size")
+    self.username = ops.convert_to_tensor("" if username is None else username,\
+        dtype=dtypes.string, name="username")
+    self.password = ops.convert_to_tensor("" if password is None else password,\
+        dtype=dtypes.string, name="password")
+    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\
+        dtype=dtypes.string, name="certfile")
+    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\
+        dtype=dtypes.string, name="keyfile")
+    self.cert_password = ops.convert_to_tensor("" if cert_password is None\
+        else cert_password, dtype=dtypes.string, name="cert_password")
+    self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\
+        dtype=dtypes.int32, name="schema")
+    self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\
+        dtype=dtypes.int32, name="permutation")
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\
+        self.port, self.local, self.part, self.page_size, self.username,\
+        self.password, self.certfile, self.keyfile, self.cert_password,\
+        self.schema, self.permutation)
+
+  @property
+  def output_classes(self):
+    return self.cache_type.to_output_classes()
+
+  @property
+  def output_shapes(self):
+    return self.cache_type.to_output_shapes()
+
+  @property
+  def output_types(self):
+    return self.cache_type.to_output_types()
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py
new file mode 100644
index 0000000000..8115bda85b
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py
@@ -0,0 +1,25 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Python helper for loading Ignite ops and kernels."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.util import loader
+from tensorflow.python.platform import resource_loader
+
+_dataset_ops = loader.load_op_library(
+    resource_loader.get_path_to_datafile("../../_dataset_ops.so"))
diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh
new file mode 100755
index 0000000000..f4607ce8ad
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-plain.xml & 
+sleep 5 # Wait Apache Ignite to be started
+
+./apache-ignite-fabric/bin/sqlline.sh \
+-u "jdbc:ignite:thin://127.0.0.1/" \
+--run=/data/sql/init.sql
+
+tail -f nohup.out
diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh
new file mode 100755
index 0000000000..dde1162816
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl-auth.xml & 
+sleep 5 # Wait Apache Ignite to be started
+
+./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\
+sslMode=require&\
+sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\
+sslClientCertificateKeyStorePassword=123456&\
+sslTrustAll=true&\
+username=ignite&\
+password=ignite" --run=/data/sql/init.sql
+
+tail -f nohup.out
diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh
new file mode 100755
index 0000000000..58b40b2738
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl.xml & 
+sleep 5 # Wait Apache Ignite to be started
+
+./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\
+sslMode=require&\
+sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\
+sslClientCertificateKeyStorePassword=123456&\
+sslTrustAll=true" --run=/data/sql/init.sql --verbose=true
+
+tail -f nohup.out
diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml
new file mode 100644
index 0000000000..d900174a8a
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+       http://www.springframework.org/schema/beans/spring-beans.xsd
+       http://www.springframework.org/schema/util
+       http://www.springframework.org/schema/util/spring-util.xsd">  
+
+  <bean class="org.apache.ignite.configuration.IgniteConfiguration">
+    <property name="discoverySpi">
+      <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+        <property name="ipFinder">
+          <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+            <property name="addresses">
+              <list>
+                <value>127.0.0.1</value>
+              </list>
+            </property>
+          </bean>
+        </property>
+      </bean>
+    </property>
+  </bean>
+
+</beans>
diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml
new file mode 100644
index 0000000000..8e001b28ab
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+       http://www.springframework.org/schema/beans/spring-beans.xsd
+       http://www.springframework.org/schema/util
+       http://www.springframework.org/schema/util/spring-util.xsd">  
+
+  <bean id="client-connector-configuration" 
+        class="org.apache.ignite.configuration.ClientConnectorConfiguration">
+    <property name="sslClientAuth" value="true" />
+    <property name="sslEnabled" value="true" />
+    <property name="useIgniteSslContextFactory" value="true" />
+  </bean>
+
+  <bean id="ssl-context-factory" 
+        class="org.apache.ignite.ssl.SslContextFactory">
+    <property name="keyStoreFilePath" value="/data/keystore/server.jks"/>
+    <property name="keyStorePassword" value="123456"/>
+    <property name="trustStoreFilePath" value="/data/keystore/trust.jks"/>
+    <property name="trustStorePassword" value="123456"/>
+  </bean>
+
+  <bean id="ignite-configuration" 
+        class="org.apache.ignite.configuration.IgniteConfiguration">
+    <property name="clientConnectorConfiguration" 
+              ref="client-connector-configuration" />
+    <property name="sslContextFactory" ref="ssl-context-factory" />
+    <property name="discoverySpi">
+      <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+        <property name="ipFinder">
+          <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+            <property name="addresses">
+              <list>
+                <value>127.0.0.1</value>
+              </list>
+            </property>
+          </bean>
+        </property>
+      </bean>
+    </property>
+  </bean>
+
+</beans>
diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml
new file mode 100644
index 0000000000..42d480c114
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+       http://www.springframework.org/schema/beans/spring-beans.xsd
+       http://www.springframework.org/schema/util
+       http://www.springframework.org/schema/util/spring-util.xsd">  
+
+  <bean id="client-connector-configuration" 
+        class="org.apache.ignite.configuration.ClientConnectorConfiguration">
+    <property name="sslClientAuth" value="false" />
+    <property name="sslEnabled" value="true" />
+    <property name="useIgniteSslContextFactory" value="true" />
+  </bean>
+
+  <bean id="ssl-context-factory" 
+        class="org.apache.ignite.ssl.SslContextFactory">
+    <property name="keyStoreFilePath" value="/data/keystore/server.jks"/>
+    <property name="keyStorePassword" value="123456"/>
+    <property name="trustStoreFilePath" value="/data/keystore/trust.jks"/>
+    <property name="trustStorePassword" value="123456"/>
+  </bean>
+
+  <bean id="ignite-configuration" 
+        class="org.apache.ignite.configuration.IgniteConfiguration">
+    <property name="clientConnectorConfiguration" 
+              ref="client-connector-configuration" />
+    <property name="sslContextFactory" ref="ssl-context-factory" />
+    <property name="discoverySpi">
+      <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+        <property name="ipFinder">
+          <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+            <property name="addresses">
+              <list>
+                <value>127.0.0.1</value>
+              </list>
+            </property>
+          </bean>
+        </property>
+      </bean>
+    </property>
+  </bean>
+
+</beans>
diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
new file mode 100644
index 0000000000..933e62b804
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
@@ -0,0 +1,77 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""Tests for IgniteDataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tensorflow as tf
+from tensorflow.contrib.ignite import IgniteDataset
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import test
+
+class IgniteDatasetTest(test.TestCase):
+  """The Apache Ignite servers have to setup before the test and tear down
+     after the test manually. The docker engine has to be installed.
+
+     To setup Apache Ignite servers:
+     $ bash start_ignite.sh
+
+     To tear down Apache Ignite servers:
+     $ bash stop_ignite.sh
+  """
+
+  def test_ignite_dataset_with_plain_client(self):
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300)
+    self.__check_dataset(ds)
+
+  def test_ignite_dataset_with_ssl_client(self):
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\
+      certfile=os.path.dirname(os.path.realpath(__file__)) +\
+      "/keystore/client.pem", cert_password="123456")
+    self.__check_dataset(ds)
+
+  def test_ignite_dataset_with_ssl_client_and_auth(self):
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\
+      certfile=os.path.dirname(os.path.realpath(__file__)) +\
+      "/keystore/client.pem", cert_password="123456",\
+      username="ignite", password="ignite")
+    self.__check_dataset(ds)
+
+  def __check_dataset(self, dataset):
+    """Checks that dataset provids correct data.
+    """
+    self.assertEquals(tf.int64, dataset.output_types['key'])
+    self.assertEquals(tf.string, dataset.output_types['val']['NAME'])
+    self.assertEquals(tf.int64, dataset.output_types['val']['VAL'])
+
+    it = dataset.make_one_shot_iterator()
+    ne = it.get_next()
+
+    with tf.Session() as sess:
+      rows = [sess.run(ne), sess.run(ne), sess.run(ne)]
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(ne)
+
+    self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\
+      rows[0])
+    self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\
+      rows[1])
+    self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\
+      rows[2])
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/ignite/python/tests/keystore/client.jks b/tensorflow/contrib/ignite/python/tests/keystore/client.jks
new file mode 100644
index 0000000000000000000000000000000000000000..1875c71b605253603eb63e446da8f07cd84f64a0
GIT binary patch
literal 3232
zcmezO_TO6u1_mZ5W@KPX&dE&8D`8+@G{5kwMTdcbX^%k@(+&eZHZE;8MixdbCP79<
zRtA<PrX_ZfJ6RuW=xI&a{Juu@TNk6n?t`_bmfw-w$HcMcfVITRQcFv&@Iarl*<JD%
zPl<oY&|+<wSv`NR+56Xv&-#Qm88yu?ROmRuI&VG0@+&M4a;zqusMk3raOz(P!;3eo
zjH?daoM820?wU85i#gkRrpXoO2P+juJhy9}c<_JjPF|^4j=Os5H<N^OHP<^b+<TrO
zwRUkf_l|#jkAE#)_;yFzeCeZp%l2N`X`66{`N+%rn`75l1h4%R^ZE7*{$BfMZQ%v8
zyNaet-|9Gb!SpMGnb?B`Q$l@r#{Otm{<9)K{_6KG1H%B{XQq$VDA*iN%sN~8z^6b{
zAiw6ZiC<!HLc;k~>Yq8p|C_4X`71ir_3D+J-^_h0{`v;Jq^I7?=cj)By2Gv~;-!V;
zrk^*tG^Q9QMw#R_-+6sBwsZISYYa0FeehS&sNA)_h$$$<F3ePXLu>xItuMbc#jm^U
z)EsGM7v48t=yRQpg+|uW%p2uNTMy;l<&ooQ-L|Fm+-LC}Ql7ti^n{IXo#DN-aQ4pg
z#|pK-#~2r{+<*SvmVdL<Pdc=QA8HWRh;?&4;!`}&bP0RxPZxE@ACiydU3=CV{}b+F
zQSjFCsonj(D*sag+pkG%Q^St4+~Ys~_r&F`%kEbgNZe0)x_tBTZ<Tkd=BGW+<xx`D
zQ+4RMy8B(7^Y1o17kFZ}ynr!GU15&qw~IeF?-aXh8@0$~(kqjdZ_fnWd~;!zI3HK~
zJhj5@EP@WVop(1z{{Jee!dP%g{giCLCclr5A9;Q8iT-TzCeW~~?a`^zZFL+c4kWzq
zU;8-hcCPNpzJnh=uKwCK)1#2*?3%?z&1ZuJkEiy^#N2M-@L}($nk@S&;YnWm8MeEN
zm#1t$Gc#bqrK9GT(=@hJ*0;~N{f7DL(oXj^A-|uT&RRC@VTAngkk*Ud;B?Izp=WAf
z$-uyL*Pw~%ra=>v(*kBDMkXdk6$4&2POUbNw(q=*jI68-2IYp_2Apinp)72|OhKWB
zJO*4K4u>#va6l4_#}45M8t{YUxP&>}i!xK(6EpK*3b=)tT~f<nCNT>$B_|rn8OXqG
z;$#%#Pfm15FUm|wPt45IOU}<VkQ3)MG%_$VurxF@FflZb66ZBW<{A{jjKk(If==N#
z;03vrSs0qT4V4TOkewq8$@HjBYMhT8I*hCg%#FPa292Fejg1VK*kz9?F+b=R5I$u(
zscP2kvk&h_^!FZo{-b#Fms5A&i7O?&F%rEc6sEhjMEtM9skgI#p1tvl$*JARsg~8A
z^Y9*hOP>1uEr#pj7x=Nuzv6iE!us=ylaaxU{}zcI(6gN!b!PABw|X+mmI_84U%E|>
z{mQW=_S;Qr`2HyF@K#y!w@;mknUR5UakW7uIJRVkS(pqM40PBy6WTl&+kQAP^0LT_
z$@&+h<^>1)D7dHQr4}WYq^2l1rxulDre!84mZTcUvN4Cs^0A1qh&cZ>+QiDUB3!Cv
z*QCz@@``+0T3;B*gQS&NBn-qFM7I9ms7T4H+0xmqwdiX68%NFUt%1m4!wL$UMuzTN
zKavj~N?{J}lzRGR$J_m%x8=zk+ue9hbM_;R=>eq;wo5iLG6bg=|6T5UzizMm4#|@0
zC3nU6D?6K?{L1|Lkcsc;orbHcP1{x#s@#Ya5UGy8w0-x%FE!^&Uhc18xbL`FJu7bh
zEc2IVEfii)u)od6#&v3~%l?{W-yQ1i>pxA|_=W+N&Y67;nwUKenwU00(m5x?%60vB
zcixWkLv!^YNbd$_rly8Qu=H*M<qn*~@TK~#cdK}mXP-E?-DF{X(U%zE_cH6lWA2`N
z=2ddUd2LwT;*&SI);KU!RlloSeL3^0TIZ>S!M-c39-n=bx7Ci%v0sW&eC`$Zt&A6J
zx2%mi{^N~-V|3b??)9@L{4(U4*Vx*;m6w(6+2y~S{15JDb~|jS)~V&%s_R*>%7H6j
zac`Q_$Ni90-+0HM@g`F8#hN-BS3=Tn<3fYRdDv6%!s;cJ0~XFrNNJJ><Wyl6Rs&{6
z#{Xz3l)15yp>5CIRj);LinkxSv+h!V3q#BbzJF1B6oSQ$a90?dH&#Df;JoYLpG4o4
z_vH`lZk%qiR`UI=vL~V2U#z=$!t$%x!4s7Yn#sG}#XXJ0Jr*}aeBxPNc>0L`(;ur`
z7niRTx7FTo)X_L*nZ@%!-U~AR;=(>@))7-*?Qk|v-SSgPPEf}ZT!%0vCqmjp&4Hu&
pj0zRrXg*^OWubk;VkqUaBW_g*^E|kHw{Q7<!r*Cx;ya&dW&r&#dOrXF

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/ignite/python/tests/keystore/client.pem b/tensorflow/contrib/ignite/python/tests/keystore/client.pem
new file mode 100644
index 0000000000..a71a87e0bb
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/keystore/client.pem
@@ -0,0 +1,69 @@
+Bag Attributes
+    friendlyName: client
+    localKeyID: 54 69 6D 65 20 31 33 33 39 32 33 39 38 35 39 34 34 36 
+Key Attributes: <No Attributes>
+-----BEGIN RSA PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: DES-EDE3-CBC,CE61EDD98349D0C7
+
+Kzl16sj8R7YUXPCEZCqCrY4LSAjiKCRFNOagEehvN9Jpswcz4JbatoFmvVvOCgBF
+7kkeCaALhfM5a+46uynZ1sOOFUOn8fUFgguN3lLInWfm6vTuXDPslg0/tRNI0YqW
+ujfxyzrm1/k4RX0oLzRE1jZr69VZsBmZndkz9nkz3anWKLE7X/VIFV6U/N6YNPch
+BG1Fxpt/HtM9p3B5wNDSjCVaeNP1ROKe3APLRY6k+SppTuntHV5q9Ni82r1l3ahU
+zf2QvocSy9MLh+bGusJGHyJJAGuwPHm6ytPwbXGHn5xe4HPIno28j9kN7EL1ZoUs
+q0PhipAkFrGIM4zg6nAwVdzY5iGySDQ3fWpz2MkrKMDRftBwA3o/M321NBUW9/2X
+l+XmjXcJd0dEOslGxveb6UXLL2YvYszjQXRR4dCV/40bMJL3umRhVSay0NteoXfY
+82rQchm2NHKOiDfB4RpD8JJtVQeDSMXc9TH5y2Ua7FZND60JXtFpdnfCVfVZuBJm
+yBafyIsXR7EQzLG4z28Dvp4fs42A3JkF+e9Aq6Y6MmYA1wsvIKKT9HKEifqKmbgG
+4E9WOZn5IWi4ZJ44VAwN/uBGrLm//3OjByeB9y8vszNbyY8dQ8x5XqnF/IzIvgqc
+uKA8xuLAkTFmgRGQ/lmMDR+iMhet5dCtg9Orb9tYVL55JAb/OfsCX0LTJ3Y2RmIx
+CaFpkUP7KKYD+69ajnFCxvfGnGxyBkf+JeuDYIZVFklVT9SUtL9RJh26jUdvHt2A
+LQerBl8UCkVbPxsxYjdawvxuBNTD6tSRykM8zwtWcvIubp+gxE7png==
+-----END RSA PRIVATE KEY-----
+Bag Attributes
+    friendlyName: 1.2.840.113549.1.9.1=#1613636c69656e7440677269646761696e2e636f6d,CN=client,OU=Dev,O=GridGain,ST=SPb,C=RU
+    localKeyID: 54 69 6D 65 20 31 33 33 39 32 33 39 38 35 39 34 34 36 
+subject=/C=RU/ST=SPb/O=GridGain/OU=Dev/CN=client/emailAddress=client@gridgain.com
+issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com
+-----BEGIN CERTIFICATE-----
+MIIC2TCCAkKgAwIBAgIBJDANBgkqhkiG9w0BAQUFADB3MQswCQYDVQQGEwJSVTEM
+MAoGA1UECBMDU1BiMQwwCgYDVQQHEwNTUGIxETAPBgNVBAoTCEdyaWRHYWluMQww
+CgYDVQQLEwNEZXYxCzAJBgNVBAMTAmNhMR4wHAYJKoZIhvcNAQkBFg9jYUBncmlk
+Z2Fpbi5jb20wHhcNMTIwNjA5MTEwNDE3WhcNMzIwNjA5MTEwNDE3WjBxMQswCQYD
+VQQGEwJSVTEMMAoGA1UECBMDU1BiMREwDwYDVQQKEwhHcmlkR2FpbjEMMAoGA1UE
+CxMDRGV2MQ8wDQYDVQQDEwZjbGllbnQxIjAgBgkqhkiG9w0BCQEWE2NsaWVudEBn
+cmlkZ2Fpbi5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANIHHcYiA+CP
+EBPKNZJ6mtvN4d9Yj43B5/hzs/TK3e4XImLsMhXaElYtrXQX/SDK7Zv5zdj6AkKH
+QkJ9BT8Jw7wvOQx/v4Qxrl+gTgcf6gjk6DvzqMlZUwH+ohbALj2TWsy9y+0uHKal
+EVrHpbYeB9TGpD+3NHwO/CG4SySk/Y4nAgMBAAGjezB5MAkGA1UdEwQCMAAwLAYJ
+YIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1Ud
+DgQWBBRD/TKyBQyoVxqEupLzUB8hDrSF6DAfBgNVHSMEGDAWgBS1+Ah4ZG58tImL
+KqLVX+xBKbeFUTANBgkqhkiG9w0BAQUFAAOBgQCL2vhjwcJkA1OJGuXsuO2/87Zu
+HMa7gc4pm+Iol1B1gD2ksQEAU2dz/adD3369H7gZdHuk3RYPeYmD5Ppp9eECDsXc
+gNWrNYaqcSTYWRAUe1/St7vB9HzPdOm/eADfQaMnal6fmjfpzTgg65A/2w4GCsqt
+RL98pvdAft8v5WSx7A==
+-----END CERTIFICATE-----
+Bag Attributes
+    friendlyName: 1.2.840.113549.1.9.1=#160f636140677269646761696e2e636f6d,CN=ca,OU=Dev,O=GridGain,L=SPb,ST=SPb,C=RU
+subject=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com
+issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com
+-----BEGIN CERTIFICATE-----
+MIIDSTCCArKgAwIBAgIJAKmuj925215OMA0GCSqGSIb3DQEBBQUAMHcxCzAJBgNV
+BAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3Jp
+ZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEW
+D2NhQGdyaWRnYWluLmNvbTAeFw0xMjA2MDkwNjU1MTJaFw0zMjA2MDQwNjU1MTJa
+MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8G
+A1UEChMIR3JpZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkq
+hkiG9w0BCQEWD2NhQGdyaWRnYWluLmNvbTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw
+gYkCgYEAtd16DCObyM63NKF/cvRcE+8cr1dc3c7mSnTEQ61WfqPJ2QqsQAB6e+5+
+q9Np1SaJyqFTTag6483ibrU+DkGPGgEXndRHtQHQPbStWsf47DBBW2bMi6+bkPox
+Cp6BhYO1DQUG5tP9CQ/g32mLQLB7LH0KtS1JcKpAClCjjWZC8b8CAwEAAaOB3DCB
+2TAdBgNVHQ4EFgQUtfgIeGRufLSJiyqi1V/sQSm3hVEwgakGA1UdIwSBoTCBnoAU
+tfgIeGRufLSJiyqi1V/sQSm3hVGhe6R5MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQI
+EwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3JpZEdhaW4xDDAKBgNVBAsT
+A0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEWD2NhQGdyaWRnYWluLmNv
+bYIJAKmuj925215OMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEAhrzd
+qusVLHO3wtyu0o+EAFyoDv5avCBTFsQLeDDPMyfDcEO6wfxhTanfH8C7gZc0rRnv
+2nbkVbfortHIOfU2wch5gClju0cXSTIXSKOAWPIMp3HLxC/l+KpFo3epFz0rsMVB
+M1ymOOdRDdAcTxcTTGY7WJXquEM3ZbT5Gh4RLDk=
+-----END CERTIFICATE-----
diff --git a/tensorflow/contrib/ignite/python/tests/keystore/server.jks b/tensorflow/contrib/ignite/python/tests/keystore/server.jks
new file mode 100644
index 0000000000000000000000000000000000000000..006ececc31118aa18ddb6e4ec27d002e5e11646c
GIT binary patch
literal 3230
zcmezO_TO6u1_mZLW=c+EU|=-A@M=~y18anysevT}1GBF|6SJp56Vs*z%uI|-Oq>iW
z*Y)4sc{|R}fR~L^tIebBJ1-+6D=ULRxgoa!CmVAp3!5-gP^ck~0T+nFA<P^ckObqg
zLwJG){2)0lVGj4A%oO*;%siL^ZeeDZ)H0Y!%)(5`iH33pGH{zX8O8XM6CKivGE>qM
zGxPM4^K%X4#CZ*k49pBH4a`hU4UMA2d5u9_6DW7!9M(7=IgA)t8JHV;84MabnHn1z
zw%)DcQJ#I`+;)?N^+jJ|gx|}o50ANf?wMD~5$CmGb&F5l<XYpvP*we|ZuRBNt7@I6
z76$vSuzGy<QQlTNKF5A3M)A2<+_y4bu-&pY>iCa029D8bXS&zVp76_%YhGh(^HyF~
zwr7|Ba`Hd8pV{rOp<1VwYpbqj!72x?fW^IOP9OI(F*7nSE^fSI(0J28mW?@7mXAe@
zMP%y_j*67Lnk}8(T8pm6zj4&u-Wq7oxDq6<%+k2fpmAOURy7N&msAc|I5#1sNgj|>
zg;`h)m>C)WBZodKDD)c{+V<RC^;%S?c>AF{>n`=TFvP6j`xmuGAz17PcZI=uWA(!Y
z&bto&N%UQLU;e=E#_1+&CEwpFdlI_+#kz|pEWer^JW<)8nY`Ow+|x+hV{t>oC!Xbn
zr;q4A{jth*arsJdTkQ=;9gSm_Sv(Kqy&&T+F6@(L9WnLQ4rlY!EkC8?1a&MK7#J8C
z*osq&%2JCUd2Mzj0|V18gC?eJ27GK>+H8z0j9N^BjEt-dEKN*{JR;+brdQtCd04*1
z$YblrpVOXB<ym^M`G-LLJTCnxao6PMarN9H70d42Tl!<QJCo;uPp413*vKWxv@}ug
zogeFuTkqM-(ghhc6e`~BpT5^PYKigDv`*d2hEaDO^v=vQ%@*VoD?4J!;h4Io;ZSk#
z`P6w6S$vxB%J+)t=jxQac%*&o!GpcM-!tDgl<AfmaL6cRa+m+|&6~Q>Dssj0qL-e!
z_3tE^3-va%w|`$L;}Nj<>_c{`U$cx$gv{s4>?kxoKIdBIo;jx!9jbh5SLQifJJrzs
zKg>i-)9GBrhpt-!C&V3HuCAQCd+ibRlH+zhVzLkahd3JDTd_wa_k`B9sMrWek?13{
z*dBacz4IgAeb)@<DTjZ#B{%+0+ITE!Q-Ppme*`C2&?~-`MzL)V0<4=(raczZSdsln
zVe`sX^Gzq*^Y&Z6c3UUDGrQ~lJ4sH%6H8t%)lPXj^-QN<z#3!cz5fo}bt$%=wIrqP
ze~swA+9M3_Ss%@r>M~3AX3(WI_H1kwhYD0)`sg3qynpK@nTnN(CM+8|HvZ!6J^p;z
z70HDklrAs3a-g*4a!r-uh2Qu8U1U*M%*r<}VcnA4fROmk&r_Y<Ot0}u+~R6_-0f;I
zQT@#Vt>szlR%axuOEMR(yjB<ff1Pkedcd4l7Gibz?_1wxE_$2N!|6L=s_4G3D;H%r
zeYhV^m$!Q!H8&}A-lhwm4)Sjf?o(RDp0@s1<Z*)~f!CPMJvuqH#BR&WkDiOYH*Z^O
z`m6X<%&SKA!c2!%f&2?Z-p~J%(AH8Z@#mJ@mcFWAXTELN^S3`$>1N0!)447OT5hin
z`e@h^I(x}V?cc3Y48HPiKl(P^C`fr6_w|pjbFf;;&fi9lD<_^(-g^I5=a%BLPyRf3
zdU0>w&2)9mhk?Jp?O3_5|F@rk_~K+{aP0xBEtu{aG%?*YXkv1L)E11YXnAnJ3JF9l
zVQ6SzVg#=xETLS3LRh7Pt@<KZVeuRAf@(BoVQAhqR5DONuC#<9Ssu0G!dE|B`@3^R
z#`A<V;j`wxcVfPOP*qy#_}k^?ua`X%*DCzZp0eu3ybtwSW|sR@z1*D&Ueyb_v>$#x
zU$5)brMsWC-|mr^9a-qdEyI5}#;Jbot6!hlq^dr@w9ngL^i9Q-xAje)zPeoZQTZ3^
zI{SX>N-Unl-@TrBs?fRG_WGF4H4fa*%CAD|hiZdLaGfqI%)(^AV4%asnb79J*!IJT
zk(WhYOxC|3H7_{WN5MTcFSRJKBsE3BIkl)HGc7YYu_P6#T4`B)<&VueNfCvrDJrI6
z;+q!D*D*4X2Nl)IED{D{*y|Lul*8QE$guv)rbS0jOy~Khd5i0YgcQr&WH!FHLBEbS
zt>f>NJd)IQh2=oPhQ#@NHyz)uGMdNwXx?Y07a!}Fce*DBynQM9JI6`FqT^^1yQE9p
z&0|vfQ<(D9Zo~)i9e#GAtb#Qsng8dHFBKsVC+4kv*LYIX@L62#V&#`f_RBX&U0c1a
z>(PbZn?_qjRA{c@OZB6zBIZyQ>NJaoQmbfAcG9xQoh!B}_ieqmB!8{QbKdug03J1V
AGXMYp

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/ignite/python/tests/keystore/trust.jks b/tensorflow/contrib/ignite/python/tests/keystore/trust.jks
new file mode 100644
index 0000000000000000000000000000000000000000..a00f1251af72982ddcd42c0274fc7b16e35dbc4c
GIT binary patch
literal 2432
zcmezO_TO6u1_mYu1_ov@&6J$Tz`$sJ;nlo346G4)rUsS_49va;P0XGKO-!2>Ff%bS
zF>x}iT-Seh=j}K@170>xtu~Lg@4SqRtgH+M<%Zk_oNUaYENsF|L7|2`23#NxhcI(+
zKoX3{4&ezJ@Pp*IggM-cGE>|WGxJ~yxP_TrQp;c_F$*&#CmPBb$iQvlWEA61PIO2w
z%1lX5%*@kE&d)WF6X!KFGB7i+G%zzYH8hG6=QRd#O`zO?b6Dei<S=4nWngaXWiV*$
zWNK_=*m}2$M|t*%bK6Z8))#$=5q>YTK0M~`xo2J_N1WG&)h#}GlWUCwLsj*=y49C6
zuc~#PS{UrR!s_waM|oTA_#FGC7{%vaao@^#!FJ2qsN+B07&u0!o#|dbd%`b6u6d2E
z&0Bd{*`8hg%gO)XerC7BhH9N!uC2PB1*;sm0v7kCIepyE#LURRxVZ6-LE}vWSvKZS
zSw0pq7Llz#I4V-|YPNKCYc0AO|He^sduyOU<4TacGE3t^gT{FcSk)}7UQ#(=;oO9j
zCV4<k6=q>IU}j|ej~x1}pwMq*Xxnpl)oW3m;_Zj-th?0T!Vt59?_bm&g<!EG+!Y4r
zjnxkqIPW_6C((E1efa~s8>gGBm3)7z>`CbM7waybu>5Lv@I+;UX7X-#aZe+0kHrlU
zpLmuRo<5@g^v5dK#pNr-ZM8QXbu^AyX7N0b_kxVSxUf%}b;Q(HJDkl^xBQfn6V$N;
z=QFnCoXpg`5=dT~Ujff+Om_{Mm~I+0F*!l<8lwuD4+boV5qZqe(7?pd9G1r{p<IJP
zSeC_>`3Yusegj@mdB7|T4H-iv0|n$PFANDV)NGG0cU@waJ*LF`pkF}vl<B0ZS+~zV
zydTlud+_;>;>}-9-F+vnl=Q|(^p;SV?%ERZzY3?`&i;A!#xEwPb|<G=R(sCFd-N@N
z>i4%8u8Uvb$1eYh<H-x_&nr$w1~dL!Bz8d0c5>92y{F&m$t+tc7<GK<HaYey$ClV{
zH>u(KqqxIcWy#+@bx7{2HmC%bh_b>gOa=@FI&7Q?Z61tmKb#nOS>(lJ{R>j_f`fe&
z+*9*XixNvxQxu$2i%K%nGLsWaQjs#H^IxM)tUN2irCN4P`Wzsy$hW2Sg@HUMp)0dU
z7>Hrdk!UH0xv`O<`__--gNIU>gFB_3zS;41|L1LaGRJl|p3|KDNMm|HX@l*Ojf@Pz
z>BWDSJKwL{E5Adsq<YC+G5*TV<|n^0zdmH*J9?+#>T1)rRfQ@yA_YXM<1cOBeeg@o
z`I49WD;VxOE>_Qqn?K9^<yi}b*Awh-^RaQATI;gEX4!X#y8HT1Q#QVVr2XR5qO#N?
zNZMcEg*)x5qWNHuq<tfJ{Q^$=Mn+LoOZ$+J!AScMVdB&NwZA)8WIRt;6FzJ1dne}m
z2UVq&j=x=Q{(9LXajnAd>?x~m%==KUWoEfg)yv(f;8ne#OZ(yH^Yyw;UAp^O`|Tcy
z*^z~Q+%o)!W1Q;OzWVi<O{(hiOZ&Y2Mc-6Rd0XG)>8s0iAC-TxuCwpAuEgR={N3xB
zrwW~`ZLg2%T;ss~to$lf(|*h1D}QX(Ns1^`O;Iro6W_FGzK#(&X@C8fO^c46n9lQ0
z^A^_)2`QGl$!vUYgMJ-vTF2ijc_gXr3d@0n4T<ylZaThQWi*fV(Y()0FFw{U?{rTN
zc>7ZHcaD>UMaR)3c1f4Go5!T|r!eKI-G~q3JN)cKSp{oOGXKvXUn)W#PRv{TuJNR%
v;j_5f#mX;}?3Ztly0&^-*P{!+H%&8h)e~82@&Dp^^P<ERetXZ{Q569I9;OV&

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/ignite/python/tests/sql/init.sql b/tensorflow/contrib/ignite/python/tests/sql/init.sql
new file mode 100644
index 0000000000..5a192aef17
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/sql/init.sql
@@ -0,0 +1,20 @@
+-- Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS, 
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+-- ==============================================================================
+
+CREATE TABLE TEST_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR, VAL LONG);
+
+INSERT INTO TEST_CACHE VALUES (1, 'TEST1', 42);
+INSERT INTO TEST_CACHE VALUES (2, 'TEST2', 43);
+INSERT INTO TEST_CACHE VALUES (3, 'TEST3', 44);
diff --git a/tensorflow/contrib/ignite/python/tests/start_ignite.sh b/tensorflow/contrib/ignite/python/tests/start_ignite.sh
new file mode 100755
index 0000000000..fbcf656afd
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/start_ignite.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+IGNITE_VERSION=2.6.0
+SCRIPT_PATH="$( cd "$(dirname "$0")" ; pwd -P )"
+
+# Start Apache Ignite with plain client listener.
+docker run -itd --name ignite-plain -p 42300:10800 \
+-v ${SCRIPT_PATH}:/data apacheignite/ignite:${IGNITE_VERSION} /data/bin/start-plain.sh
+
+# Start Apache Ignite with SSL client listener.
+docker run -itd --name ignite-ssl -p 42301:10800 \
+-v ${SCRIPT_PATH}:/data apacheignite/ignite:${IGNITE_VERSION} /data/bin/start-ssl.sh
+
+# Start Apache Ignite with SSL client listener with auth.
+docker run -itd --name ignite-ssl-auth -p 42302:10800 \
+-v ${SCRIPT_PATH}:/data apacheignite/ignite:${IGNITE_VERSION} /data/bin/start-ssl-auth.sh
diff --git a/tensorflow/contrib/ignite/python/tests/stop_ignite.sh b/tensorflow/contrib/ignite/python/tests/stop_ignite.sh
new file mode 100755
index 0000000000..8f03dbd1ed
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/stop_ignite.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+docker rm -f ignite-plain
+docker rm -f ignite-ssl
+docker rm -f ignite-ssl-auth
-- 
GitLab


From 28b0608a8536c287b4084449e36fd42b6f4aed5b Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Fri, 24 Aug 2018 18:15:57 +0300
Subject: [PATCH 0103/1357] Remove duplicated header from README.md.

---
 tensorflow/contrib/ignite/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md
index 9054344e94..f2596fc572 100644
--- a/tensorflow/contrib/ignite/README.md
+++ b/tensorflow/contrib/ignite/README.md
@@ -1,4 +1,3 @@
-### Ignite Dataset
 # Ignite Dataset
 
 - [Overview](#overview)
@@ -164,4 +163,4 @@ After that you will be able to work with it following way:
 
 ## Limitations
 
-Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
\ No newline at end of file
+Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
-- 
GitLab


From 241c1740ee26b57b7a5fe8f72b9d34f4515af760 Mon Sep 17 00:00:00 2001
From: dmitrievanthony <dmitrievanthony@gmail.com>
Date: Sun, 26 Aug 2018 16:03:04 +0000
Subject: [PATCH 0104/1357] Update after review: change 'ignite' namespace to
 'tensorflow', rename variables to satisty code style, use pointers instead of
 references.

---
 tensorflow/contrib/ignite/BUILD               |   1 -
 tensorflow/contrib/ignite/__init__.py         |   4 +-
 .../kernels/ignite_binary_object_parser.cc    | 322 +++++++---------
 .../kernels/ignite_binary_object_parser.h     |   9 +-
 .../contrib/ignite/kernels/ignite_client.cc   |  55 ---
 .../contrib/ignite/kernels/ignite_client.h    |  45 ++-
 .../contrib/ignite/kernels/ignite_dataset.cc  | 105 +++--
 .../contrib/ignite/kernels/ignite_dataset.h   |  65 ++--
 .../ignite/kernels/ignite_dataset_iterator.cc | 358 +++++++++---------
 .../ignite/kernels/ignite_dataset_iterator.h  |  80 ++--
 .../ignite/kernels/ignite_dataset_ops.cc      |  10 +-
 .../ignite/kernels/ignite_plain_client.h      |  21 +-
 .../kernels/ignite_plain_client_unix.cc       |  78 ++--
 .../kernels/ignite_plain_client_windows.cc    |  77 ++--
 .../ignite/kernels/ignite_ssl_wrapper.cc      | 107 +++---
 .../ignite/kernels/ignite_ssl_wrapper.h       |  30 +-
 16 files changed, 619 insertions(+), 748 deletions(-)
 delete mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index 9f6c666893..b7d40a99f7 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -40,7 +40,6 @@ cc_library(
     srcs = [
         "kernels/ignite_dataset_ops.cc",
         "kernels/ignite_client.h",
-        "kernels/ignite_client.cc",
         "kernels/ignite_plain_client.h",
         "kernels/ignite_ssl_wrapper.h",
         "kernels/ignite_ssl_wrapper.cc",
diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py
index 468920a557..b78829d0f4 100644
--- a/tensorflow/contrib/ignite/__init__.py
+++ b/tensorflow/contrib/ignite/__init__.py
@@ -30,9 +30,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \
-import IgniteDataset
-
+from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops import IgniteDataset
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
index bf0ef8766e..9bf4480d2d 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
@@ -15,290 +15,258 @@ limitations under the License.
 
 #include "ignite_binary_object_parser.h"
 
-namespace ignite {
+namespace tensorflow {
 
-tensorflow::Status BinaryObjectParser::Parse(
-    uint8_t*& ptr, std::vector<tensorflow::Tensor>& out_tensors,
-    std::vector<int32_t>& types) {
-  uint8_t object_type_id = *ptr;
-  ptr += 1;
+Status BinaryObjectParser::Parse(uint8_t** ptr,
+                                 std::vector<Tensor>* out_tensors,
+                                 std::vector<int32_t>* types) {
+  uint8_t object_type_id = **ptr;
+  *ptr += 1;
 
   switch (object_type_id) {
     case BYTE: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT8, {});
-      tensor.scalar<tensorflow::uint8>()() = *((uint8_t*)ptr);
-      ptr += 1;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_UINT8, {});
+      tensor.scalar<uint8>()() = *((uint8_t*)*ptr);
+      *ptr += 1;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case SHORT: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT16, {});
-      tensor.scalar<tensorflow::int16>()() = *((int16_t*)ptr);
-      ptr += 2;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT16, {});
+      tensor.scalar<int16>()() = *((int16_t*)*ptr);
+      *ptr += 2;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case INT: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT32, {});
-      tensor.scalar<tensorflow::int32>()() = *((int32_t*)ptr);
-      ptr += 4;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT32, {});
+      tensor.scalar<int32>()() = *((int32_t*)*ptr);
+      *ptr += 4;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case LONG: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64, {});
-      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
-      ptr += 8;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT64, {});
+      tensor.scalar<int64>()() = *((int64_t*)*ptr);
+      *ptr += 8;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case FLOAT: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_FLOAT, {});
-      tensor.scalar<float>()() = *((float*)ptr);
-      ptr += 4;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_FLOAT, {});
+      tensor.scalar<float>()() = *((float*)*ptr);
+      *ptr += 4;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case DOUBLE: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_DOUBLE, {});
-      tensor.scalar<double>()() = *((double*)ptr);
-      ptr += 8;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_DOUBLE, {});
+      tensor.scalar<double>()() = *((double*)*ptr);
+      *ptr += 8;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case UCHAR: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT16, {});
-      tensor.scalar<tensorflow::uint16>()() = *((uint16_t*)ptr);
-      ptr += 2;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_UINT16, {});
+      tensor.scalar<uint16>()() = *((uint16_t*)*ptr);
+      *ptr += 2;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case BOOL: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_BOOL, {});
-      tensor.scalar<bool>()() = *((bool*)ptr);
-      ptr += 1;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_BOOL, {});
+      tensor.scalar<bool>()() = *((bool*)*ptr);
+      *ptr += 1;
+      out_tensors->push_back(std::move(tensor));
 
       break;
     }
     case STRING: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_STRING, {});
-      tensor.scalar<std::string>()() = std::string((char*)ptr, length);
-      ptr += length;
-      out_tensors.emplace_back(std::move(tensor));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_STRING, {});
+      tensor.scalar<std::string>()() = std::string((char*)*ptr, length);
+      *ptr += length;
+      out_tensors->push_back(std::move(tensor));
 
       break;
     }
     case DATE: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64, {});
-      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
-      ptr += 8;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT64, {});
+      tensor.scalar<int64>()() = *((int64_t*)*ptr);
+      *ptr += 8;
+      out_tensors->push_back(std::move(tensor));
 
       break;
     }
     case BYTE_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT8,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length}));
 
-      uint8_t* arr = (uint8_t*)ptr;
-      ptr += length;
+      uint8_t* arr = (uint8_t*)*ptr;
+      *ptr += length;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::uint8>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<uint8>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case SHORT_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT16,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length}));
 
-      int16_t* arr = (int16_t*)ptr;
-      ptr += length * 2;
+      int16_t* arr = (int16_t*)*ptr;
+      *ptr += length * 2;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::int16>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<int16>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case INT_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT32,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length}));
 
-      int32_t* arr = (int32_t*)ptr;
-      ptr += length * 4;
+      int32_t* arr = (int32_t*)*ptr;
+      *ptr += length * 4;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::int32>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<int32>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case LONG_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
 
-      int64_t* arr = (int64_t*)ptr;
-      ptr += length * 8;
+      int64_t* arr = (int64_t*)*ptr;
+      *ptr += length * 8;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<int64>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case FLOAT_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_FLOAT,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length}));
 
-      float* arr = (float*)ptr;
-      ptr += 4 * length;
+      float* arr = (float*)*ptr;
+      *ptr += 4 * length;
 
       std::copy_n(arr, length, tensor.flat<float>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case DOUBLE_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_DOUBLE,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length}));
 
-      double* arr = (double*)ptr;
-      ptr += 8 * length;
+      double* arr = (double*)*ptr;
+      *ptr += 8 * length;
 
       std::copy_n(arr, length, tensor.flat<double>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case UCHAR_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT16,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length}));
 
-      uint16_t* arr = (uint16_t*)ptr;
-      ptr += length * 2;
+      uint16_t* arr = (uint16_t*)*ptr;
+      *ptr += length * 2;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::uint16>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<uint16>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case BOOL_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_BOOL,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length}));
 
-      bool* arr = (bool*)ptr;
-      ptr += length;
+      bool* arr = (bool*)*ptr;
+      *ptr += length;
 
       std::copy_n(arr, length, tensor.flat<bool>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case STRING_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_STRING,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length}));
 
       for (int32_t i = 0; i < length; i++) {
-        int32_t str_length = *((int32_t*)ptr);
-        ptr += 4;
-        const int8_t* str = (const int8_t*)ptr;
-        ptr += str_length;
+        int32_t str_length = *((int32_t*)*ptr);
+        *ptr += 4;
+        const int8_t* str = (const int8_t*)*ptr;
+        *ptr += str_length;
         tensor.vec<std::string>()(i) = std::string((char*)str, str_length);
       }
 
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case DATE_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64,
-                                tensorflow::TensorShape({length}));
-      int64_t* arr = (int64_t*)ptr;
-      ptr += length * 8;
-
-      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
+      int64_t* arr = (int64_t*)*ptr;
+      *ptr += length * 8;
+
+      std::copy_n(arr, length, tensor.flat<int64>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case WRAPPED_OBJ: {
-      int32_t byte_arr_size = *((int32_t*)ptr);
-      ptr += 4;
+      int32_t byte_arr_size = *((int32_t*)*ptr);
+      *ptr += 4;
 
-      tensorflow::Status status = Parse(ptr, out_tensors, types);
-      if (!status.ok()) return status;
+      TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types));
 
-      int32_t offset = *((int32_t*)ptr);
-      ptr += 4;
+      int32_t offset = *((int32_t*)*ptr);
+      *ptr += 4;
 
       break;
     }
     case COMPLEX_OBJ: {
-      uint8_t version = *ptr;
-      ptr += 1;
-      int16_t flags = *((int16_t*)ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
-      ptr += 2;
-      int32_t type_id = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t hash_code = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t schema_id = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t schema_offset = *((int32_t*)ptr);
-      ptr += 4;
-
-      uint8_t* end = ptr + schema_offset - 24;
+      uint8_t version = **ptr;
+      *ptr += 1;
+      int16_t flags = *((int16_t*)*ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
+      *ptr += 2;
+      int32_t type_id = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t hash_code = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t schema_id = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t schema_offset = *((int32_t*)*ptr);
+      *ptr += 4;
+
+      uint8_t* end = *ptr + schema_offset - 24;
       int32_t i = 0;
-      while (ptr < end) {
+      while (*ptr < end) {
         i++;
-        tensorflow::Status status = Parse(ptr, out_tensors, types);
-        if (!status.ok()) return status;
+        TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types));
       }
 
-      ptr += (length - schema_offset);
+      *ptr += (length - schema_offset);
 
       break;
     }
     default: {
-      return tensorflow::errors::Internal("Unknowd binary type (type id ",
-                                          (int)object_type_id, ")");
+      return errors::Internal("Unknowd binary type (type id ",
+                              (int)object_type_id, ")");
     }
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
index 1e845cbc56..9accbd796f 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
@@ -17,13 +17,12 @@ limitations under the License.
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/lib/core/status.h"
 
-namespace ignite {
+namespace tensorflow {
 
 class BinaryObjectParser {
  public:
-  tensorflow::Status Parse(uint8_t*& ptr,
-                           std::vector<tensorflow::Tensor>& out_tensors,
-                           std::vector<int32_t>& types);
+  Status Parse(uint8_t** ptr, std::vector<Tensor>* out_tensors,
+               std::vector<int32_t>* types);
 };
 
 enum ObjectType {
@@ -51,4 +50,4 @@ enum ObjectType {
   COMPLEX_OBJ = 103
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc
deleted file mode 100644
index 5a8eddb944..0000000000
--- a/tensorflow/contrib/ignite/kernels/ignite_client.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
-#include "ignite_client.h"
-#endif
-
-namespace ignite {
-
-tensorflow::Status Client::ReadByte(uint8_t& data) {
-  return ReadData((uint8_t*)&data, 1);
-}
-
-tensorflow::Status Client::ReadShort(int16_t& data) {
-  return ReadData((uint8_t*)&data, 2);
-}
-
-tensorflow::Status Client::ReadInt(int32_t& data) {
-  return ReadData((uint8_t*)&data, 4);
-}
-
-tensorflow::Status Client::ReadLong(int64_t& data) {
-  return ReadData((uint8_t*)&data, 8);
-}
-
-tensorflow::Status Client::WriteByte(uint8_t data) {
-  return WriteData((uint8_t*)&data, 1);
-}
-
-tensorflow::Status Client::WriteShort(int16_t data) {
-  return WriteData((uint8_t*)&data, 2);
-}
-
-tensorflow::Status Client::WriteInt(int32_t data) {
-  return WriteData((uint8_t*)&data, 4);
-}
-
-tensorflow::Status Client::WriteLong(int64_t data) {
-  return WriteData((uint8_t*)&data, 8);
-}
-
-}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h
index 64e28d75f0..944b3fe184 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.h
@@ -13,28 +13,43 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
+
 #include "tensorflow/core/lib/core/status.h"
 
-namespace ignite {
+namespace tensorflow {
 
 class Client {
  public:
-  virtual tensorflow::Status Connect() = 0;
-  virtual tensorflow::Status Disconnect() = 0;
+  virtual Status Connect() = 0;
+  virtual Status Disconnect() = 0;
   virtual bool IsConnected() = 0;
   virtual int GetSocketDescriptor() = 0;
+  virtual Status ReadData(uint8_t* buf, int32_t length) = 0;
+  virtual Status WriteData(uint8_t* buf, int32_t length) = 0;
+
+  inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); }
+
+  inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); }
+
+  inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); }
+
+  inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); }
 
-  virtual tensorflow::Status ReadByte(uint8_t& data);
-  virtual tensorflow::Status ReadShort(int16_t& data);
-  virtual tensorflow::Status ReadInt(int32_t& data);
-  virtual tensorflow::Status ReadLong(int64_t& data);
-  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0;
-
-  virtual tensorflow::Status WriteByte(uint8_t data);
-  virtual tensorflow::Status WriteShort(int16_t data);
-  virtual tensorflow::Status WriteInt(int32_t data);
-  virtual tensorflow::Status WriteLong(int64_t data);
-  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0;
+  inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); }
+
+  inline Status WriteShort(int16_t data) {
+    return WriteData((uint8_t*)&data, 2);
+  }
+
+  inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); }
+
+  inline Status WriteLong(int64_t data) {
+    return WriteData((uint8_t*)&data, 8);
+  }
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
+
+#endif
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
index a9bf26955b..f25f8a5b18 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
@@ -16,31 +16,29 @@ limitations under the License.
 #include "ignite_dataset_iterator.h"
 #include "tensorflow/core/platform/logging.h"
 
-namespace ignite {
+namespace tensorflow {
 
-IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx,
-                             std::string cache_name, std::string host,
-                             tensorflow::int32 port, bool local,
-                             tensorflow::int32 part,
-                             tensorflow::int32 page_size, std::string username,
+IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name,
+                             std::string host, int32 port, bool local,
+                             int32 part, int32 page_size, std::string username,
                              std::string password, std::string certfile,
                              std::string keyfile, std::string cert_password,
-                             std::vector<tensorflow::int32> schema,
-                             std::vector<tensorflow::int32> permutation)
-    : DatasetBase(tensorflow::DatasetContext(ctx)),
-      cache_name(cache_name),
-      host(host),
-      port(port),
-      local(local),
-      part(part),
-      page_size(page_size),
-      username(username),
-      password(password),
-      certfile(certfile),
-      keyfile(keyfile),
-      cert_password(cert_password),
-      schema(schema),
-      permutation(permutation) {
+                             std::vector<int32> schema,
+                             std::vector<int32> permutation)
+    : DatasetBase(DatasetContext(ctx)),
+      cache_name_(cache_name),
+      host_(host),
+      port_(port),
+      local_(local),
+      part_(part),
+      page_size_(page_size),
+      username_(username),
+      password_(password),
+      certfile_(certfile),
+      keyfile_(keyfile),
+      cert_password_(cert_password),
+      schema_(schema),
+      permutation_(permutation) {
   SchemaToTypes();
   SchemaToShapes();
 
@@ -53,55 +51,50 @@ IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx,
 
 IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
 
-std::unique_ptr<tensorflow::IteratorBase> IgniteDataset::MakeIteratorInternal(
-    const tensorflow::string& prefix) const {
-  return std::unique_ptr<tensorflow::IteratorBase>(new IgniteDatasetIterator(
-      {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host,
-      this->port, this->cache_name, this->local, this->part, this->page_size,
-      this->username, this->password, this->certfile, this->keyfile,
-      this->cert_password, this->schema, this->permutation));
+std::unique_ptr<IteratorBase> IgniteDataset::MakeIteratorInternal(
+    const string& prefix) const {
+  return std::unique_ptr<IteratorBase>(new IgniteDatasetIterator(
+      {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_,
+      this->cache_name_, this->local_, this->part_, this->page_size_,
+      this->username_, this->password_, this->certfile_, this->keyfile_,
+      this->cert_password_, this->schema_, this->permutation_));
 }
 
-const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const {
-  return dtypes;
-}
+const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; }
 
-const std::vector<tensorflow::PartialTensorShape>&
-IgniteDataset::output_shapes() const {
-  return shapes;
+const std::vector<PartialTensorShape>& IgniteDataset::output_shapes() const {
+  return shapes_;
 }
 
-tensorflow::string IgniteDataset::DebugString() const {
-  return "IgniteDatasetOp::Dataset";
-}
+string IgniteDataset::DebugString() const { return "IgniteDatasetOp::Dataset"; }
 
-tensorflow::Status IgniteDataset::AsGraphDefInternal(
-    tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
-    tensorflow::Node** output) const {
-  return tensorflow::errors::Unimplemented(
+Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx,
+                                         DatasetGraphDefBuilder* b,
+                                         Node** output) const {
+  return errors::Unimplemented(
       "IgniteDataset does not support 'AsGraphDefInternal'");
 }
 
 void IgniteDataset::SchemaToTypes() {
-  for (auto e : schema) {
+  for (auto e : schema_) {
     if (e == BYTE || e == BYTE_ARR) {
-      dtypes.push_back(tensorflow::DT_UINT8);
+      dtypes_.push_back(DT_UINT8);
     } else if (e == SHORT || e == SHORT_ARR) {
-      dtypes.push_back(tensorflow::DT_INT16);
+      dtypes_.push_back(DT_INT16);
     } else if (e == INT || e == INT_ARR) {
-      dtypes.push_back(tensorflow::DT_INT32);
+      dtypes_.push_back(DT_INT32);
     } else if (e == LONG || e == LONG_ARR) {
-      dtypes.push_back(tensorflow::DT_INT64);
+      dtypes_.push_back(DT_INT64);
     } else if (e == FLOAT || e == FLOAT_ARR) {
-      dtypes.push_back(tensorflow::DT_FLOAT);
+      dtypes_.push_back(DT_FLOAT);
     } else if (e == DOUBLE || e == DOUBLE_ARR) {
-      dtypes.push_back(tensorflow::DT_DOUBLE);
+      dtypes_.push_back(DT_DOUBLE);
     } else if (e == UCHAR || e == UCHAR_ARR) {
-      dtypes.push_back(tensorflow::DT_UINT8);
+      dtypes_.push_back(DT_UINT8);
     } else if (e == BOOL || e == BOOL_ARR) {
-      dtypes.push_back(tensorflow::DT_BOOL);
+      dtypes_.push_back(DT_BOOL);
     } else if (e == STRING || e == STRING_ARR) {
-      dtypes.push_back(tensorflow::DT_STRING);
+      dtypes_.push_back(DT_STRING);
     } else {
       LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
     }
@@ -109,15 +102,15 @@ void IgniteDataset::SchemaToTypes() {
 }
 
 void IgniteDataset::SchemaToShapes() {
-  for (auto e : schema) {
+  for (auto e : schema_) {
     if (e >= 1 && e < 10) {
-      shapes.push_back(tensorflow::PartialTensorShape({}));
+      shapes_.push_back(PartialTensorShape({}));
     } else if (e >= 12 && e < 21) {
-      shapes.push_back(tensorflow::PartialTensorShape({-1}));
+      shapes_.push_back(PartialTensorShape({-1}));
     } else {
       LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
     }
   }
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
index 2120dfd342..d3fec5910b 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
@@ -15,51 +15,48 @@ limitations under the License.
 
 #include "tensorflow/core/framework/dataset.h"
 
-namespace ignite {
+namespace tensorflow {
 
-class IgniteDataset : public tensorflow::DatasetBase {
+class IgniteDataset : public DatasetBase {
  public:
-  IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name,
-                std::string host, tensorflow::int32 port, bool local,
-                tensorflow::int32 part, tensorflow::int32 page_size,
+  IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host,
+                int32 port, bool local, int32 part, int32 page_size,
                 std::string username, std::string password,
                 std::string certfile, std::string keyfile,
-                std::string cert_password,
-                std::vector<tensorflow::int32> schema,
-                std::vector<tensorflow::int32> permutation);
+                std::string cert_password, std::vector<int32> schema,
+                std::vector<int32> permutation);
   ~IgniteDataset();
-  std::unique_ptr<tensorflow::IteratorBase> MakeIteratorInternal(
-      const tensorflow::string& prefix) const override;
-  const tensorflow::DataTypeVector& output_dtypes() const override;
-  const std::vector<tensorflow::PartialTensorShape>& output_shapes()
-      const override;
-  tensorflow::string DebugString() const override;
+  std::unique_ptr<IteratorBase> MakeIteratorInternal(
+      const string& prefix) const override;
+  const DataTypeVector& output_dtypes() const override;
+  const std::vector<PartialTensorShape>& output_shapes() const override;
+  string DebugString() const override;
 
  protected:
-  tensorflow::Status AsGraphDefInternal(
-      tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
-      tensorflow::Node** output) const override;
+  Status AsGraphDefInternal(SerializationContext* ctx,
+                            DatasetGraphDefBuilder* b,
+                            Node** output) const override;
 
  private:
-  const std::string cache_name;
-  const std::string host;
-  const tensorflow::int32 port;
-  const bool local;
-  const tensorflow::int32 part;
-  const tensorflow::int32 page_size;
-  const std::string username;
-  const std::string password;
-  const std::string certfile;
-  const std::string keyfile;
-  const std::string cert_password;
-  const std::vector<tensorflow::int32> schema;
-  const std::vector<tensorflow::int32> permutation;
-
-  tensorflow::DataTypeVector dtypes;
-  std::vector<tensorflow::PartialTensorShape> shapes;
+  const std::string cache_name_;
+  const std::string host_;
+  const int32 port_;
+  const bool local_;
+  const int32 part_;
+  const int32 page_size_;
+  const std::string username_;
+  const std::string password_;
+  const std::string certfile_;
+  const std::string keyfile_;
+  const std::string cert_password_;
+  const std::vector<int32> schema_;
+  const std::vector<int32> permutation_;
+
+  DataTypeVector dtypes_;
+  std::vector<PartialTensorShape> shapes_;
 
   void SchemaToTypes();
   void SchemaToShapes();
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
index 03cc3c1291..1774585ecd 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
@@ -22,270 +22,262 @@ limitations under the License.
 #include <time.h>
 #include <chrono>
 
-namespace ignite {
-
-#define CHECK_STATUS(status) \
-  if (!status.ok()) return status;
+namespace tensorflow {
 
 IgniteDatasetIterator::IgniteDatasetIterator(
-    const Params& params, std::string host, tensorflow::int32 port,
-    std::string cache_name, bool local, tensorflow::int32 part,
-    tensorflow::int32 page_size, std::string username, std::string password,
-    std::string certfile, std::string keyfile, std::string cert_password,
-    std::vector<tensorflow::int32> schema,
-    std::vector<tensorflow::int32> permutation)
-    : tensorflow::DatasetIterator<IgniteDataset>(params),
-      cache_name(cache_name),
-      local(local),
-      part(part),
-      page_size(page_size),
-      username(username),
-      password(password),
-      schema(schema),
-      permutation(permutation),
-      remainder(-1),
-      cursor_id(-1),
-      last_page(false) {
+    const Params& params, std::string host, int32 port, std::string cache_name,
+    bool local, int32 part, int32 page_size, std::string username,
+    std::string password, std::string certfile, std::string keyfile,
+    std::string cert_password, std::vector<int32> schema,
+    std::vector<int32> permutation)
+    : DatasetIterator<IgniteDataset>(params),
+      cache_name_(cache_name),
+      local_(local),
+      part_(part),
+      page_size_(page_size),
+      username_(username),
+      password_(password),
+      schema_(schema),
+      permutation_(permutation),
+      remainder_(-1),
+      cursor_id_(-1),
+      last_page_(false) {
   Client* p_client = new PlainClient(host, port);
 
   if (certfile.empty())
-    client = std::unique_ptr<Client>(p_client);
+    client_ = std::unique_ptr<Client>(p_client);
   else
-    client = std::unique_ptr<Client>(new SslWrapper(
+    client_ = std::unique_ptr<Client>(new SslWrapper(
         std::unique_ptr<Client>(p_client), certfile, keyfile, cert_password));
 
   LOG(INFO) << "Ignite Dataset Iterator created";
 }
 
 IgniteDatasetIterator::~IgniteDatasetIterator() {
-  tensorflow::Status status = CloseConnection();
+  Status status = CloseConnection();
   if (!status.ok()) LOG(ERROR) << status.ToString();
 
   LOG(INFO) << "Ignite Dataset Iterator destroyed";
 }
 
-tensorflow::Status IgniteDatasetIterator::EstablishConnection() {
-  if (!client->IsConnected()) {
-    tensorflow::Status status = client->Connect();
+Status IgniteDatasetIterator::EstablishConnection() {
+  if (!client_->IsConnected()) {
+    Status status = client_->Connect();
     if (!status.ok()) return status;
 
     status = Handshake();
     if (!status.ok()) {
-      tensorflow::Status disconnect_status = client->Disconnect();
+      Status disconnect_status = client_->Disconnect();
       if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString();
 
       return status;
     }
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::CloseConnection() {
-  if (cursor_id != -1 && !last_page) {
-    tensorflow::Status conn_status = EstablishConnection();
+Status IgniteDatasetIterator::CloseConnection() {
+  if (cursor_id_ != -1 && !last_page_) {
+    Status conn_status = EstablishConnection();
     if (!conn_status.ok()) return conn_status;
 
-    CHECK_STATUS(client->WriteInt(18));  // Message length
-    CHECK_STATUS(
-        client->WriteShort(close_connection_opcode));  // Operation code
-    CHECK_STATUS(client->WriteLong(0));                // Request ID
-    CHECK_STATUS(client->WriteLong(cursor_id));        // Resource ID
+    TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
+    TF_RETURN_IF_ERROR(
+        client_->WriteShort(close_connection_opcode));   // Operation code
+    TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
+    TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Resource ID
 
     int32_t res_len;
-    CHECK_STATUS(client->ReadInt(res_len));
+    TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
     if (res_len < 12)
-      return tensorflow::errors::Internal(
-          "Close Resource Response is corrupted");
+      return errors::Internal("Close Resource Response is corrupted");
 
     int64_t req_id;
-    CHECK_STATUS(client->ReadLong(req_id));
+    TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
     int32_t status;
-    CHECK_STATUS(client->ReadInt(status));
+    TF_RETURN_IF_ERROR(client_->ReadInt(&status));
     if (status != 0) {
       uint8_t err_msg_header;
-      CHECK_STATUS(client->ReadByte(err_msg_header));
+      TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
       if (err_msg_header == string_val) {
         int32_t err_msg_length;
-        CHECK_STATUS(client->ReadInt(err_msg_length));
+        TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
         uint8_t* err_msg_c = new uint8_t[err_msg_length];
-        CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+        TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
         std::string err_msg((char*)err_msg_c, err_msg_length);
         delete[] err_msg_c;
 
-        return tensorflow::errors::Internal("Close Resource Error [status=",
-                                            status, ", message=", err_msg, "]");
+        return errors::Internal("Close Resource Error [status=", status,
+                                ", message=", err_msg, "]");
       }
-      return tensorflow::errors::Internal("Close Resource Error [status=",
-                                          status, "]");
+      return errors::Internal("Close Resource Error [status=", status, "]");
     }
 
-    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
 
-    cursor_id = -1;
+    cursor_id_ = -1;
 
-    return client->Disconnect();
+    return client_->Disconnect();
   } else {
-    LOG(INFO) << "Query Cursor " << cursor_id << " is already closed";
+    LOG(INFO) << "Query Cursor " << cursor_id_ << " is already closed";
   }
 
-  return client->IsConnected() ? client->Disconnect()
-                               : tensorflow::Status::OK();
+  return client_->IsConnected() ? client_->Disconnect() : Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::GetNextInternal(
-    tensorflow::IteratorContext* ctx,
-    std::vector<tensorflow::Tensor>* out_tensors, bool* end_of_sequence) {
-  if (remainder == 0 && last_page) {
-    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx,
+                                              std::vector<Tensor>* out_tensors,
+                                              bool* end_of_sequence) {
+  if (remainder_ == 0 && last_page_) {
+    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
 
-    cursor_id = -1;
+    cursor_id_ = -1;
     *end_of_sequence = true;
-    return tensorflow::Status::OK();
+    return Status::OK();
   } else {
-    tensorflow::Status status = EstablishConnection();
+    Status status = EstablishConnection();
     if (!status.ok()) return status;
 
-    if (remainder == -1 || remainder == 0) {
-      tensorflow::Status status =
-          remainder == -1 ? ScanQuery() : LoadNextPage();
+    if (remainder_ == -1 || remainder_ == 0) {
+      Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage();
       if (!status.ok()) return status;
     }
 
-    uint8_t* initial_ptr = ptr;
+    uint8_t* initial_ptr = ptr_;
     std::vector<int32_t> types;
-    std::vector<tensorflow::Tensor> tensors;
+    std::vector<Tensor> tensors;
 
-    status = parser.Parse(ptr, tensors, types);  // Parse key
+    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse key
     if (!status.ok()) return status;
 
-    status = parser.Parse(ptr, tensors, types);  // Parse val
+    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse val
     if (!status.ok()) return status;
 
-    remainder -= (ptr - initial_ptr);
+    remainder_ -= (ptr_ - initial_ptr);
 
     out_tensors->resize(tensors.size());
     for (int32_t i = 0; i < tensors.size(); i++)
-      (*out_tensors)[permutation[i]] = std::move(tensors[i]);
+      (*out_tensors)[permutation_[i]] = std::move(tensors[i]);
 
     *end_of_sequence = false;
-    return tensorflow::Status::OK();
+    return Status::OK();
   }
 
   *end_of_sequence = true;
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::SaveInternal(
-    tensorflow::IteratorStateWriter* writer) {
-  return tensorflow::errors::Unimplemented(
+Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) {
+  return errors::Unimplemented(
       "Iterator for IgniteDataset does not support 'SaveInternal'");
 }
 
-tensorflow::Status IgniteDatasetIterator::RestoreInternal(
-    tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) {
-  return tensorflow::errors::Unimplemented(
+Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx,
+                                              IteratorStateReader* reader) {
+  return errors::Unimplemented(
       "Iterator for IgniteDataset does not support 'RestoreInternal')");
 }
 
-tensorflow::Status IgniteDatasetIterator::Handshake() {
+Status IgniteDatasetIterator::Handshake() {
   int32_t msg_len = 8;
 
-  if (username.empty())
+  if (username_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + username.length();
+    msg_len += 5 + username_.length();
 
-  if (password.empty())
+  if (password_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + password.length();
-
-  CHECK_STATUS(client->WriteInt(msg_len));
-  CHECK_STATUS(client->WriteByte(1));
-  CHECK_STATUS(client->WriteShort(protocol_major_version));
-  CHECK_STATUS(client->WriteShort(protocol_minor_version));
-  CHECK_STATUS(client->WriteShort(protocol_patch_version));
-  CHECK_STATUS(client->WriteByte(2));
-  if (username.empty()) {
-    CHECK_STATUS(client->WriteByte(null_val));
+    msg_len += 5 + password_.length();
+
+  TF_RETURN_IF_ERROR(client_->WriteInt(msg_len));
+  TF_RETURN_IF_ERROR(client_->WriteByte(1));
+  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version));
+  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version));
+  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version));
+  TF_RETURN_IF_ERROR(client_->WriteByte(2));
+  if (username_.empty()) {
+    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
   } else {
-    CHECK_STATUS(client->WriteByte(string_val));
-    CHECK_STATUS(client->WriteInt(username.length()));
-    CHECK_STATUS(
-        client->WriteData((uint8_t*)username.c_str(), username.length()));
+    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteInt(username_.length()));
+    TF_RETURN_IF_ERROR(
+        client_->WriteData((uint8_t*)username_.c_str(), username_.length()));
   }
 
-  if (password.empty()) {
-    CHECK_STATUS(client->WriteByte(null_val));
+  if (password_.empty()) {
+    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
   } else {
-    CHECK_STATUS(client->WriteByte(string_val));
-    CHECK_STATUS(client->WriteInt(password.length()));
-    CHECK_STATUS(
-        client->WriteData((uint8_t*)password.c_str(), password.length()));
+    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteInt(password_.length()));
+    TF_RETURN_IF_ERROR(
+        client_->WriteData((uint8_t*)password_.c_str(), password_.length()));
   }
 
   int32_t handshake_res_len;
-  CHECK_STATUS(client->ReadInt(handshake_res_len));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&handshake_res_len));
   uint8_t handshake_res;
-  CHECK_STATUS(client->ReadByte(handshake_res));
+  TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res));
 
   LOG(INFO) << "Handshake length " << handshake_res_len << ", res "
             << (int16_t)handshake_res;
 
   if (handshake_res != 1) {
     int16_t serv_ver_major;
-    CHECK_STATUS(client->ReadShort(serv_ver_major));
+    TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major));
     int16_t serv_ver_minor;
-    CHECK_STATUS(client->ReadShort(serv_ver_minor));
+    TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_minor));
     int16_t serv_ver_patch;
-    CHECK_STATUS(client->ReadShort(serv_ver_patch));
+    TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_patch));
     uint8_t header;
-    CHECK_STATUS(client->ReadByte(header));
+    TF_RETURN_IF_ERROR(client_->ReadByte(&header));
 
     if (header == string_val) {
       int32_t length;
-      CHECK_STATUS(client->ReadInt(length));
+      TF_RETURN_IF_ERROR(client_->ReadInt(&length));
       uint8_t* err_msg_c = new uint8_t[length];
-      CHECK_STATUS(client->ReadData(err_msg_c, length));
+      TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length));
       std::string err_msg((char*)err_msg_c, length);
       delete[] err_msg_c;
 
-      return tensorflow::errors::Internal(
-          "Handshake Error [result=", handshake_res, ", version=",
-          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch,
-          ", message='", err_msg, "']");
+      return errors::Internal("Handshake Error [result=", handshake_res,
+                              ", version=", serv_ver_major, ".", serv_ver_minor,
+                              ".", serv_ver_patch, ", message='", err_msg,
+                              "']");
     } else if (header == null_val) {
-      return tensorflow::errors::Internal(
-          "Handshake Error [result=", handshake_res, ", version=",
-          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+      return errors::Internal("Handshake Error [result=", handshake_res,
+                              ", version=", serv_ver_major, ".", serv_ver_minor,
+                              ".", serv_ver_patch, "]");
     } else {
-      return tensorflow::errors::Internal(
-          "Handshake Error [result=", handshake_res, ", version=",
-          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+      return errors::Internal("Handshake Error [result=", handshake_res,
+                              ", version=", serv_ver_major, ".", serv_ver_minor,
+                              ".", serv_ver_patch, "]");
     }
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::ScanQuery() {
-  CHECK_STATUS(client->WriteInt(25));                        // Message length
-  CHECK_STATUS(client->WriteShort(scan_query_opcode));       // Operation code
-  CHECK_STATUS(client->WriteLong(0));                        // Request ID
-  CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name)));  // Cache name
-  CHECK_STATUS(client->WriteByte(0));                        // Flags
-  CHECK_STATUS(client->WriteByte(null_val));                 // Filter object
-  CHECK_STATUS(client->WriteInt(page_size));                 // Cursor page size
-  CHECK_STATUS(client->WriteInt(part));    // Partition to query
-  CHECK_STATUS(client->WriteByte(local));  // Local flag
+Status IgniteDatasetIterator::ScanQuery() {
+  TF_RETURN_IF_ERROR(client_->WriteInt(25));                   // Message length
+  TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode));  // Operation code
+  TF_RETURN_IF_ERROR(client_->WriteLong(0));                   // Request ID
+  TF_RETURN_IF_ERROR(
+      client_->WriteInt(JavaHashCode(cache_name_)));  // Cache name
+  TF_RETURN_IF_ERROR(client_->WriteByte(0));          // Flags
+  TF_RETURN_IF_ERROR(client_->WriteByte(null_val));   // Filter object
+  TF_RETURN_IF_ERROR(client_->WriteInt(page_size_));  // Cursor page size
+  TF_RETURN_IF_ERROR(client_->WriteInt(part_));       // part_ition to query
+  TF_RETURN_IF_ERROR(client_->WriteByte(local_));     // local_ flag
 
   int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
                            std::chrono::system_clock::now().time_since_epoch())
                            .count();
 
   int32_t res_len;
-  CHECK_STATUS(client->ReadInt(res_len));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
 
   int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                           std::chrono::system_clock::now().time_since_epoch())
@@ -293,82 +285,81 @@ tensorflow::Status IgniteDatasetIterator::ScanQuery() {
 
   LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms";
 
-  if (res_len < 12)
-    return tensorflow::errors::Internal("Scan Query Response is corrupted");
+  if (res_len < 12) return errors::Internal("Scan Query Response is corrupted");
 
   int64_t req_id;
-  CHECK_STATUS(client->ReadLong(req_id));
+  TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
 
   int32_t status;
-  CHECK_STATUS(client->ReadInt(status));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&status));
 
   if (status != 0) {
     uint8_t err_msg_header;
-    CHECK_STATUS(client->ReadByte(err_msg_header));
+    TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
     if (err_msg_header == string_val) {
       int32_t err_msg_length;
-      CHECK_STATUS(client->ReadInt(err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
-      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
       std::string err_msg((char*)err_msg_c, err_msg_length);
       delete[] err_msg_c;
 
-      return tensorflow::errors::Internal("Scan Query Error [status=", status,
-                                          ", message=", err_msg, "]");
+      return errors::Internal("Scan Query Error [status=", status, ", message=",
+                              err_msg, "]");
     }
-    return tensorflow::errors::Internal("Scan Query Error [status=", status,
-                                        "]");
+    return errors::Internal("Scan Query Error [status=", status, "]");
   }
 
-  CHECK_STATUS(client->ReadLong(cursor_id));
+  TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_));
 
-  LOG(INFO) << "Query Cursor " << cursor_id << " is opened";
+  LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened";
 
   int32_t row_cnt;
-  CHECK_STATUS(client->ReadInt(row_cnt));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder = res_len - 25;
-  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
-  ptr = page.get();
+  remainder_ = res_len - 25;
+  page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
+  ptr_ = page_.get();
 
   int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
                       std::chrono::system_clock::now().time_since_epoch())
                       .count();
 
-  CHECK_STATUS(client->ReadData(ptr, remainder));
+  TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
 
   int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                      std::chrono::system_clock::now().time_since_epoch())
                      .count();
   ;
 
-  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
   double time_in_s = 1.0 * (stop - start) / 1000;
   LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
             << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
 
   uint8_t last_page_b;
-  CHECK_STATUS(client->ReadByte(last_page_b));
+  TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b));
 
-  last_page = !last_page_b;
+  last_page_ = !last_page_b;
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::LoadNextPage() {
-  CHECK_STATUS(client->WriteInt(18));                       // Message length
-  CHECK_STATUS(client->WriteShort(load_next_page_opcode));  // Operation code
-  CHECK_STATUS(client->WriteLong(0));                       // Request ID
-  CHECK_STATUS(client->WriteLong(cursor_id));               // Cursor ID
+Status IgniteDatasetIterator::LoadNextPage() {
+  TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
+  TF_RETURN_IF_ERROR(
+      client_->WriteShort(load_next_page_opcode));     // Operation code
+  TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
+  TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Cursor ID
 
   int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
                            std::chrono::system_clock::now().time_since_epoch())
                            .count();
 
   int32_t res_len;
-  CHECK_STATUS(client->ReadInt(res_len));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
 
   int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                           std::chrono::system_clock::now().time_since_epoch())
@@ -377,66 +368,65 @@ tensorflow::Status IgniteDatasetIterator::LoadNextPage() {
   LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms";
 
   if (res_len < 12)
-    return tensorflow::errors::Internal("Load Next Page Response is corrupted");
+    return errors::Internal("Load Next Page Response is corrupted");
 
   int64_t req_id;
-  CHECK_STATUS(client->ReadLong(req_id));
+  TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
 
   int32_t status;
-  CHECK_STATUS(client->ReadInt(status));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&status));
 
   if (status != 0) {
     uint8_t err_msg_header;
-    CHECK_STATUS(client->ReadByte(err_msg_header));
+    TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
     if (err_msg_header == string_val) {
       int32_t err_msg_length;
-      CHECK_STATUS(client->ReadInt(err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
-      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
       std::string err_msg((char*)err_msg_c, err_msg_length);
       delete[] err_msg_c;
 
-      return tensorflow::errors::Internal("Load Next Page Error [status=",
-                                          status, ", message=", err_msg, "]");
+      return errors::Internal("Load Next Page Error [status=", status,
+                              ", message=", err_msg, "]");
     }
-    return tensorflow::errors::Internal("Load Next Page Error [status=", status,
-                                        "]");
+    return errors::Internal("Load Next Page Error [status=", status, "]");
   }
 
   int32_t row_cnt;
-  CHECK_STATUS(client->ReadInt(row_cnt));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder = res_len - 17;
-  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
-  ptr = page.get();
+  remainder_ = res_len - 17;
+  page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
+  ptr_ = page_.get();
 
   int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
                       std::chrono::system_clock::now().time_since_epoch())
                       .count();
 
-  CHECK_STATUS(client->ReadData(ptr, remainder));
+  TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
 
   int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                      std::chrono::system_clock::now().time_since_epoch())
                      .count();
   ;
 
-  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
   double time_in_s = 1.0 * (stop - start) / 1000;
   LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
             << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
 
   uint8_t last_page_b;
-  CHECK_STATUS(client->ReadByte(last_page_b));
+  TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b));
 
-  last_page = !last_page_b;
+  last_page_ = !last_page_b;
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-int32_t IgniteDatasetIterator::JavaHashCode(std::string str) {
+int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const {
   int32_t h = 0;
   for (char& c : str) {
     h = 31 * h + c;
@@ -444,4 +434,4 @@ int32_t IgniteDatasetIterator::JavaHashCode(std::string str) {
   return h;
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
index d1df4527f9..5858dbfcb9 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
@@ -14,65 +14,55 @@ limitations under the License.
 ==============================================================================*/
 
 #include "ignite_binary_object_parser.h"
-#include "ignite_dataset.h"
-
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
 #include "ignite_client.h"
-#endif
+#include "ignite_dataset.h"
 
-namespace ignite {
+namespace tensorflow {
 
-class IgniteDatasetIterator
-    : public tensorflow::DatasetIterator<IgniteDataset> {
+class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
  public:
-  IgniteDatasetIterator(const Params& params, std::string host,
-                        tensorflow::int32 port, std::string cache_name,
-                        bool local, tensorflow::int32 part,
-                        tensorflow::int32 page_size, std::string username,
+  IgniteDatasetIterator(const Params& params, std::string host, int32 port,
+                        std::string cache_name, bool local, int32 part,
+                        int32 page_size, std::string username,
                         std::string password, std::string certfile,
                         std::string keyfile, std::string cert_password,
-                        std::vector<tensorflow::int32> schema,
-                        std::vector<tensorflow::int32> permutation);
+                        std::vector<int32> schema,
+                        std::vector<int32> permutation);
   ~IgniteDatasetIterator();
-  tensorflow::Status GetNextInternal(
-      tensorflow::IteratorContext* ctx,
-      std::vector<tensorflow::Tensor>* out_tensors,
-      bool* end_of_sequence) override;
+  Status GetNextInternal(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+                         bool* end_of_sequence) override;
 
  protected:
-  tensorflow::Status SaveInternal(
-      tensorflow::IteratorStateWriter* writer) override;
-  tensorflow::Status RestoreInternal(
-      tensorflow::IteratorContext* ctx,
-      tensorflow::IteratorStateReader* reader) override;
+  Status SaveInternal(IteratorStateWriter* writer) override;
+  Status RestoreInternal(IteratorContext* ctx,
+                         IteratorStateReader* reader) override;
 
  private:
-  std::unique_ptr<Client> client;
-  BinaryObjectParser parser;
+  std::unique_ptr<Client> client_;
+  BinaryObjectParser parser_;
 
-  const std::string cache_name;
-  const bool local;
-  const tensorflow::int32 part;
-  const tensorflow::int32 page_size;
-  const std::string username;
-  const std::string password;
-  const std::vector<tensorflow::int32> schema;
-  const std::vector<tensorflow::int32> permutation;
+  const std::string cache_name_;
+  const bool local_;
+  const int32 part_;
+  const int32 page_size_;
+  const std::string username_;
+  const std::string password_;
+  const std::vector<int32> schema_;
+  const std::vector<int32> permutation_;
 
-  int32_t remainder;
-  int64_t cursor_id;
-  bool last_page;
+  int32_t remainder_;
+  int64_t cursor_id_;
+  bool last_page_;
 
-  std::unique_ptr<uint8_t> page;
-  uint8_t* ptr;
+  std::unique_ptr<uint8_t> page_;
+  uint8_t* ptr_;
 
-  tensorflow::Status EstablishConnection();
-  tensorflow::Status CloseConnection();
-  tensorflow::Status Handshake();
-  tensorflow::Status ScanQuery();
-  tensorflow::Status LoadNextPage();
-  int32_t JavaHashCode(std::string str);
+  Status EstablishConnection();
+  Status CloseConnection();
+  Status Handshake();
+  Status ScanQuery();
+  Status LoadNextPage();
+  int32_t JavaHashCode(std::string str) const;
 };
 
 constexpr uint8_t null_val = 101;
@@ -84,4 +74,4 @@ constexpr int16_t scan_query_opcode = 2000;
 constexpr int16_t load_next_page_opcode = 2001;
 constexpr int16_t close_connection_opcode = 0;
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index 543b5e4afc..89eecf9c14 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
+namespace {
 
 class IgniteDatasetOp : public DatasetOpKernel {
  public:
@@ -132,14 +133,15 @@ class IgniteDatasetOp : public DatasetOpKernel {
       permutation.push_back(permutation_tensor->flat<int32>()(i));
     }
 
-    *output = new ignite::IgniteDataset(
-        ctx, cache_name, host, port, local, part, page_size, username, password,
-        certfile, keyfile, cert_password, std::move(schema),
-        std::move(permutation));
+    *output =
+        new IgniteDataset(ctx, cache_name, host, port, local, part, page_size,
+                          username, password, certfile, keyfile, cert_password,
+                          std::move(schema), std::move(permutation));
   }
 };
 
 REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU),
                         IgniteDatasetOp);
 
+}  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
index 5491af68d6..6f417a3cb5 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -13,31 +13,28 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
 #include "ignite_client.h"
-#endif
 
 #include <string>
 
-namespace ignite {
+namespace tensorflow {
 
 class PlainClient : public Client {
  public:
   PlainClient(std::string host, int port);
   ~PlainClient();
 
-  virtual tensorflow::Status Connect();
-  virtual tensorflow::Status Disconnect();
+  virtual Status Connect();
+  virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
-  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, int32_t length);
+  virtual Status WriteData(uint8_t* buf, int32_t length);
 
  private:
-  std::string host;
-  int port;
-  int sock;
+  const std::string host_;
+  const int port_;
+  int sock_;
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
index dbfa4f8786..a4c58a9563 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
@@ -29,104 +29,98 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
-namespace ignite {
+namespace tensorflow {
 
 PlainClient::PlainClient(std::string host, int port)
-    : host(host), port(port), sock(-1) {}
+    : host_(host), port_(port), sock_(-1) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
-    tensorflow::Status status = Disconnect();
+    Status status = Disconnect();
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 }
 
-tensorflow::Status PlainClient::Connect() {
-  if (sock == -1) {
-    sock = socket(AF_INET, SOCK_STREAM, 0);
-    if (sock == -1)
-      return tensorflow::errors::Internal("Failed to create socket");
+Status PlainClient::Connect() {
+  if (sock_ == -1) {
+    sock_ = socket(AF_INET, SOCK_STREAM, 0);
+    if (sock_ == -1) return errors::Internal("Failed to create socket");
   }
 
   sockaddr_in server;
 
-  server.sin_addr.s_addr = inet_addr(host.c_str());
+  server.sin_addr.s_addr = inet_addr(host_.c_str());
   if (server.sin_addr.s_addr == -1) {
     hostent* he;
     in_addr** addr_list;
 
-    if ((he = gethostbyname(host.c_str())) == NULL)
-      return tensorflow::errors::Internal("Failed to resolve hostname \"", host,
-                                          "\"");
+    if ((he = gethostbyname(host_.c_str())) == NULL)
+      return errors::Internal("Failed to resolve hostname \"", host_, "\"");
 
     addr_list = (in_addr**)he->h_addr_list;
     if (addr_list[0] != NULL) server.sin_addr = *addr_list[0];
   }
 
   server.sin_family = AF_INET;
-  server.sin_port = htons(port);
+  server.sin_port = htons(port_);
 
-  if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0)
-    return tensorflow::errors::Internal("Failed to connect to \"", host, ":",
-                                        port, "\"");
+  if (connect(sock_, (sockaddr*)&server, sizeof(server)) < 0)
+    return errors::Internal("Failed to connect to \"", host_, ":", port_, "\"");
 
-  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+  LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established";
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::Disconnect() {
-  int close_res = close(sock);
-  sock = -1;
+Status PlainClient::Disconnect() {
+  int close_res = close(sock_);
+  sock_ = -1;
 
-  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed";
+  LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" is closed";
 
-  return close_res == 0 ? tensorflow::Status::OK()
-                        : tensorflow::errors::Internal(
-                              "Failed to correctly close connection");
+  return close_res == 0
+             ? Status::OK()
+             : errors::Internal("Failed to correctly close connection");
 }
 
-bool PlainClient::IsConnected() { return sock != -1; }
+bool PlainClient::IsConnected() { return sock_ != -1; }
 
-int PlainClient::GetSocketDescriptor() { return sock; }
+int PlainClient::GetSocketDescriptor() { return sock_; }
 
-tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock, buf, length - recieved, 0);
+    int res = recv(sock_, buf, length - recieved, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while reading from socket: ", res, ", ",
-          std::string(strerror(errno)));
+      return errors::Internal("Error occured while reading from socket: ", res,
+                              ", ", std::string(strerror(errno)));
 
-    if (res == 0)
-      return tensorflow::errors::Internal("Server closed connection");
+    if (res == 0) return errors::Internal("Server closed connection");
 
     recieved += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
+Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock, buf, length - sent, 0);
+    int res = send(sock_, buf, length - sent, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while writing into socket: ", res, ", ",
-          std::string(strerror(errno)));
+      return errors::Internal("Error occured while writing into socket: ", res,
+                              ", ", std::string(strerror(errno)));
 
     sent += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index f78c9b3627..7ba037f2d2 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -27,48 +27,45 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
-namespace ignite {
+namespace tensorflow {
 
 PlainClient::PlainClient(std::string host, int port)
-    : host(host), port(port), sock(INVALID_SOCKET) {}
+    : host_(host), port_(port), sock_(INVALID_SOCKET) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
-    tensorflow::Status status = Disconnect();
+    Status status = Disconnect();
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 }
 
-tensorflow::Status PlainClient::Connect() {
+Status PlainClient::Connect() {
   WSADATA wsaData;
   addrinfo *result = NULL, *ptr = NULL, hints;
 
   int res = WSAStartup(MAKEWORD(2, 2), &wsaData);
-  if (res != 0)
-    return tensorflow::errors::Internal("WSAStartup failed with error: ", res);
+  if (res != 0) return errors::Internal("WSAStartup failed with error: ", res);
 
   ZeroMemory(&hints, sizeof(hints));
   hints.ai_family = AF_UNSPEC;
   hints.ai_socktype = SOCK_STREAM;
   hints.ai_protocol = IPPROTO_TCP;
 
-  res =
-      getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result);
-  if (res != 0)
-    return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res);
+  res = getaddrinfo(host_.c_str(), std::to_string(port_).c_str(), &hints,
+                    &result);
+  if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res);
 
   for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
-    sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
-    if (sock == INVALID_SOCKET) {
+    sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
+    if (sock_ == INVALID_SOCKET) {
       WSACleanup();
-      return tensorflow::errors::Internal("Socket failed with error: ",
-                                          WSAGetLastError());
+      return errors::Internal("Socket failed with error: ", WSAGetLastError());
     }
 
-    res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen);
+    res = connect(sock_, ptr->ai_addr, (int)ptr->ai_addrlen);
     if (res == SOCKET_ERROR) {
-      closesocket(sock);
-      sock = INVALID_SOCKET;
+      closesocket(sock_);
+      sock_ = INVALID_SOCKET;
       continue;
     }
 
@@ -77,67 +74,63 @@ tensorflow::Status PlainClient::Connect() {
 
   freeaddrinfo(result);
 
-  if (sock == INVALID_SOCKET) {
+  if (sock_ == INVALID_SOCKET) {
     WSACleanup();
-    return tensorflow::errors::Internal("Unable to connect to server");
+    return errors::Internal("Unable to connect to server");
   }
 
-  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+  LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established";
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::Disconnect() {
-  int res = shutdown(sock, SD_SEND);
-  closesocket(sock);
+Status PlainClient::Disconnect() {
+  int res = shutdown(sock_, SD_SEND);
+  closesocket(sock_);
   WSACleanup();
 
   if (res == SOCKET_ERROR)
-    return tensorflow::errors::Internal("Shutdown failed with error: ",
-                                        WSAGetLastError());
+    return errors::Internal("Shutdown failed with error: ", WSAGetLastError());
   else
-    return tensorflow::Status::OK();
+    return Status::OK();
 }
 
-bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; }
+bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; }
 
-int PlainClient::GetSocketDescriptor() { return sock; }
+int PlainClient::GetSocketDescriptor() { return sock_; }
 
-tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock, buf, length - recieved, 0);
+    int res = recv(sock_, buf, length - recieved, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while reading from socket: ", res);
+      return errors::Internal("Error occured while reading from socket: ", res);
 
-    if (res == 0)
-      return tensorflow::errors::Internal("Server closed connection");
+    if (res == 0) return errors::Internal("Server closed connection");
 
     recieved += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
+Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock, buf, length - sent, 0);
+    int res = send(sock_, buf, length - sent, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while writing into socket: ", res);
+      return errors::Internal("Error occured while writing into socket: ", res);
 
     sent += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
index a1101b91f3..a2bc6b9609 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
@@ -21,7 +21,7 @@ limitations under the License.
 #include <openssl/err.h>
 #include <openssl/ssl.h>
 
-namespace ignite {
+namespace tensorflow {
 
 static int PasswordCb(char *buf, int size, int rwflag, void *password) {
   strncpy(buf, (char *)(password), size);
@@ -31,119 +31,112 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) {
 
 SslWrapper::SslWrapper(std::shared_ptr<Client> client, std::string certfile,
                        std::string keyfile, std::string cert_password)
-    : client(client),
-      certfile(certfile),
-      keyfile(keyfile),
-      cert_password(cert_password),
-      ctx(NULL) {}
+    : client_(client),
+      certfile_(certfile),
+      keyfile_(keyfile),
+      cert_password_(cert_password),
+      ctx_(NULL) {}
 
 SslWrapper::~SslWrapper() {
   if (IsConnected()) {
-    tensorflow::Status status = Disconnect();
+    Status status = Disconnect();
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 
-  if (ctx != NULL) {
-    SSL_CTX_free(ctx);
-    ctx = NULL;
+  if (ctx_ != NULL) {
+    SSL_CTX_free(ctx_);
+    ctx_ = NULL;
   }
 }
 
-tensorflow::Status SslWrapper::InitSslContext() {
+Status SslWrapper::InitSslContext() {
   OpenSSL_add_all_algorithms();
   SSL_load_error_strings();
 
-  ctx = SSL_CTX_new(SSLv23_method());
-  if (ctx == NULL)
-    return tensorflow::errors::Internal("Couldn't create SSL context");
+  ctx_ = SSL_CTX_new(SSLv23_method());
+  if (ctx_ == NULL) return errors::Internal("Couldn't create SSL context");
 
-  SSL_CTX_set_default_passwd_cb(ctx, PasswordCb);
-  SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str());
+  SSL_CTX_set_default_passwd_cb(ctx_, PasswordCb);
+  SSL_CTX_set_default_passwd_cb_userdata(ctx_, (void *)cert_password_.c_str());
 
-  if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1)
-    return tensorflow::errors::Internal(
-        "Couldn't load cetificate chain (file '", certfile, "')");
+  if (SSL_CTX_use_certificate_chain_file(ctx_, certfile_.c_str()) != 1)
+    return errors::Internal("Couldn't load cetificate chain (file '", certfile_,
+                            "')");
 
-  std::string private_key_file = keyfile.empty() ? certfile : keyfile;
-  if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(),
+  std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_;
+  if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(),
                                   SSL_FILETYPE_PEM) != 1)
-    return tensorflow::errors::Internal("Couldn't load private key (file '",
-                                        private_key_file, "')");
+    return errors::Internal("Couldn't load private key (file '",
+                            private_key_file, "')");
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status SslWrapper::Connect() {
-  tensorflow::Status status;
-
-  if (ctx == NULL) {
-    status = InitSslContext();
-    if (!status.ok()) return status;
+Status SslWrapper::Connect() {
+  if (ctx_ == NULL) {
+    TF_RETURN_IF_ERROR(InitSslContext());
   }
 
-  ssl = SSL_new(ctx);
-  if (ssl == NULL)
-    return tensorflow::errors::Internal("Failed to establish SSL connection");
+  ssl_ = SSL_new(ctx_);
+  if (ssl_ == NULL)
+    return errors::Internal("Failed to establish SSL connection");
 
-  status = client->Connect();
-  if (!status.ok()) return status;
+  TF_RETURN_IF_ERROR(client_->Connect());
 
-  SSL_set_fd(ssl, client->GetSocketDescriptor());
-  if (SSL_connect(ssl) != 1)
-    return tensorflow::errors::Internal("Failed to establish SSL connection");
+  SSL_set_fd(ssl_, client_->GetSocketDescriptor());
+  if (SSL_connect(ssl_) != 1)
+    return errors::Internal("Failed to establish SSL connection");
 
   LOG(INFO) << "SSL connection established";
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status SslWrapper::Disconnect() {
-  SSL_free(ssl);
+Status SslWrapper::Disconnect() {
+  SSL_free(ssl_);
 
   LOG(INFO) << "SSL connection closed";
 
-  return client->Disconnect();
+  return client_->Disconnect();
 }
 
-bool SslWrapper::IsConnected() { return client->IsConnected(); }
+bool SslWrapper::IsConnected() { return client_->IsConnected(); }
 
-int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); }
+int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); }
 
-tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
+Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = SSL_read(ssl, buf, length - recieved);
+    int res = SSL_read(ssl_, buf, length - recieved);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while reading from SSL socket: ", res);
+      return errors::Internal("Error occured while reading from SSL socket: ",
+                              res);
 
-    if (res == 0)
-      return tensorflow::errors::Internal("Server closed SSL connection");
+    if (res == 0) return errors::Internal("Server closed SSL connection");
 
     recieved += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
+Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = SSL_write(ssl, buf, length - sent);
+    int res = SSL_write(ssl_, buf, length - sent);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while writing into socket: ", res);
+      return errors::Internal("Error occured while writing into socket: ", res);
 
     sent += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
index e0c2a242dc..bbba6cc181 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -13,15 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
 #include "ignite_client.h"
-#endif
 
 #include <openssl/ssl.h>
 #include <string>
 
-namespace ignite {
+namespace tensorflow {
 
 class SslWrapper : public Client {
  public:
@@ -29,21 +26,22 @@ class SslWrapper : public Client {
              std::string keyfile, std::string cert_password);
   ~SslWrapper();
 
-  virtual tensorflow::Status Connect();
-  virtual tensorflow::Status Disconnect();
+  virtual Status Connect();
+  virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
-  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, int32_t length);
+  virtual Status WriteData(uint8_t* buf, int32_t length);
 
  private:
-  std::shared_ptr<Client> client;
-  std::string certfile;
-  std::string keyfile;
-  std::string cert_password;
-  SSL_CTX* ctx;
-  SSL* ssl;
-  tensorflow::Status InitSslContext();
+  std::shared_ptr<Client> client_;
+  std::string certfile_;
+  std::string keyfile_;
+  std::string cert_password_;
+  SSL_CTX* ctx_;
+  SSL* ssl_;
+
+  Status InitSslContext();
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
-- 
GitLab


From 1408a1563e73e69f68c1eb6f34a0976c7c950ad9 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 28 Aug 2018 11:32:57 +0300
Subject: [PATCH 0105/1357] Update README.md.

---
 tensorflow/contrib/ignite/README.md | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md
index f2596fc572..8fec4066c4 100644
--- a/tensorflow/contrib/ignite/README.md
+++ b/tensorflow/contrib/ignite/README.md
@@ -13,19 +13,20 @@
 ## Overview
 
 [Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for
-transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. 
+transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a data source for neural network training, inference and all other computations supported by TensorFlow. 
 
 ## Features
 
-Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below.
+Ignite Dataset provides features that that you can use in a wide range of cases. The most important and interesting features are described below.
 
 ### Distributed In-Memory Datasource
-[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. 
+[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that provides fast data access. It allows you to avoid limitations of hard drive and and store and operate with as much data as you need in distributed cluster. You can utilize
+these benefits of Apache Ignite by using Ignite Dataset. Moreover, Ignite Dataset can be used for the following use-cases:
 - If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations.
 - If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations.
 - If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow.
 
-It's  important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference.
+Note that Apache Ignite is not just a step of ETL pipeline between a database or a data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. By choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, at the same time, an ability to use this data for neural network training and inference.
 
 ```bash
 $ apache-ignite-fabric/bin/ignite.sh
@@ -55,7 +56,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL
 ```
 
 ### Structured Objects
-[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects.
+[Apache Ignite](https://ignite.apache.org/) allows to store any type of objects. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects.
 
 ```python
 >>> import tensorflow as tf
@@ -81,7 +82,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL
     }
 }
 ```
- Neural network training and other computations require transformations that can be done as part of  [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset.
+ Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset.
 
 ```python
 >>> import tensorflow as tf
@@ -99,15 +100,15 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL
 
 ### Distributed Training
 
-TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. 
+TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is the ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. 
 
 <a href="https://www.codecogs.com/eqnedit.php?latex=\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" title="\nabla[\sum_1^n(y - \hat{y})^2] = \nabla[\sum_1^{n_1}(y - \hat{y})^2] + \nabla[\sum_{n_1}^{n_2}(y - \hat{y})^2] + ... + \nabla[\sum_{n_{k-1}}^n(y - \hat{y})^2]" /></a>
 
-Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck.
+Using this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottlenecks.
 
-Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition.
+Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL), we can specify the number of partitions the data will be partitioned on. For example, if an Apache Ignite cluster consists of 10 machines and we create cache with 10 partitions, then every machine will maintain approximately one data partition.
 
-Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset.
+Ignite Dataset allows using these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that can be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach, we can assign a specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset.
 
 ```python
 >>> import tensorflow as tf
@@ -135,7 +136,7 @@ High-level TensorFlow API for [distributed training](https://www.tensorflow.org/
 
 ### SSL Connection
 
-Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation.
+Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information, please refer to the [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation.
 
 ```python
 >>> import tensorflow as tf
@@ -147,11 +148,11 @@ Your data should not be accessible without any control. Apache Ignite allows to
 
 ### Windows Support
 
-Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems.
+Ignite Dataset is fully compatible with Windows. You can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems.
 
 ## Try it out
 
-The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine:
+The simplest way to try Ignite Dataset is to run a [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and after start interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine:
 
 ```
 docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist
@@ -163,4 +164,4 @@ After that you will be able to work with it following way:
 
 ## Limitations
 
-Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
+Presently, Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of an object structure.
-- 
GitLab


From 92019765d7b7db99d0235268d00f349b7a53d1a9 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Wed, 5 Sep 2018 14:47:20 +0000
Subject: [PATCH 0106/1357] Fix pylint checks, fix VS compilation issue.

---
 .../contrib/ignite/kernels/ignite_plain_client_windows.cc | 4 ++--
 .../contrib/ignite/python/ops/ignite_dataset_ops.py       | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 7ba037f2d2..e1e2ee3b20 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock_, buf, length - recieved, 0);
+    int res = recv(sock_, (char*)buf, length - recieved, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while reading from socket: ", res);
@@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock_, buf, length - sent, 0);
+    int res = send(sock_, (char*)buf, length - sent, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while writing into socket: ", res);
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index 6fa073957a..60003ca3b7 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -66,13 +66,13 @@ class Readable():
 
   def __read(self, data_type, length):
     """Reads, unpacks and returns specified type (little-endian)."""
-    buffer = self.read_data(length)
-    return struct.unpack("<" + data_type, buffer)[0]
+    data_buffer = self.read_data(length)
+    return struct.unpack("<" + data_type, data_buffer)[0]
 
 class DataBuffer(Readable):
   """DataBuffer class that exposes methods to read data from a byte buffer."""
 
-  def __init__(self, buffer):
+  def __init__(self, data_buffer):
     """Constructs a new instance of DataBuffer based on the specified byte
        buffer.
 
@@ -80,7 +80,7 @@ class DataBuffer(Readable):
       buffer: Buffer to be read.
     """
     Readable.__init__(self)
-    self.buffer = buffer
+    self.buffer = data_buffer
     self.ptr = 0
 
   def read_data(self, length):
-- 
GitLab


From 0b6654bc223f4f3807209043dc34ccb07b55474e Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 11 Sep 2018 09:50:47 +0000
Subject: [PATCH 0107/1357] Fix code style.

---
 .../ignite/kernels/ignite_dataset_ops.cc      |  2 +-
 .../kernels/ignite_plain_client_windows.cc    |  4 +--
 tensorflow/contrib/ignite/ops/dataset_ops.cc  | 34 +++++++++----------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index 89eecf9c14..d03404a460 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_dataset.h"
 #include <stdlib.h>
+#include "ignite_dataset.h"
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index e1e2ee3b20..8182fde6d9 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock_, (char*)buf, length - recieved, 0);
+    int res = recv(sock_, (char *)buf, length - recieved, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while reading from socket: ", res);
@@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock_, (char*)buf, length - sent, 0);
+    int res = send(sock_, (char *)buf, length - sent, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while writing into socket: ", res);
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
index 17494d1cfd..fb16b290b1 100644
--- a/tensorflow/contrib/ignite/ops/dataset_ops.cc
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -20,23 +20,23 @@ limitations under the License.
 namespace tensorflow {
 
 REGISTER_OP("IgniteDataset")
-  .Input("cache_name: string")
-  .Input("host: string")
-  .Input("port: int32")
-  .Input("local: bool")
-  .Input("part: int32")
-  .Input("page_size: int32")
-  .Input("username: string")
-  .Input("password: string")
-  .Input("certfile: string")
-  .Input("keyfile: string")
-  .Input("cert_password: string")
-  .Input("schema: int32")
-  .Input("permutation: int32")
-  .Output("handle: variant")
-  .SetIsStateful()
-  .SetShapeFn(shape_inference::ScalarShape)
-  .Doc(R"doc(
+    .Input("cache_name: string")
+    .Input("host: string")
+    .Input("port: int32")
+    .Input("local: bool")
+    .Input("part: int32")
+    .Input("page_size: int32")
+    .Input("username: string")
+    .Input("password: string")
+    .Input("certfile: string")
+    .Input("keyfile: string")
+    .Input("cert_password: string")
+    .Input("schema: int32")
+    .Input("permutation: int32")
+    .Output("handle: variant")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
 Apache Ignite is a memory-centric distributed database, caching, and processing
 platform for transactional, analytical, and streaming workloads, delivering 
 in-memory speeds at petabyte scale. This contrib package contains an 
-- 
GitLab


From 9ac00398d1c0e5f3f2e76dec15fa6646f5027633 Mon Sep 17 00:00:00 2001
From: Smokrow <moritz.kroeger@tu-dortmund.de>
Date: Tue, 11 Sep 2018 17:26:16 +0200
Subject: [PATCH 0108/1357] Update of flat_map

Rework based on Marks review
---
 tensorflow/python/data/ops/dataset_ops.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 14a1e3d803..2fc41a3b98 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1009,16 +1009,18 @@ class Dataset(object):
   def flat_map(self, map_func):
     """Maps `map_func` across this dataset and flattens the result. 
     
-    Will produce similar results to `tf.data.Dataset.interleave(cycle_length=1)`. 
+    `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since 
+    `flat_map` produces a similar outputs as `tf.data.Dataset.interleave(cycle_length=1)`
+    
     Use `flat_map` if you want to make sure, that the order of your dataset stays the same.
-    For example:
+    For example, to implement unbatch:
 
     ```python
     # NOTE: The following examples use `{ ... }` to represent the
     # contents of a dataset. '[...]' represents a tensor.
     a = {[1,2,3,4,5], [6,7,8,9], [10]}
     
-    a.flat_map(lambda x: Dataset.from_tensors(x)) == 
+    a.flat_map(lambda x: Dataset.from_tensor_slices(x)) == 
       {[1,2,3,4,5,6,7,8,9,10]}
     ```
     Args:
-- 
GitLab


From 82d082a2d775843a858919f4de84b3f6dfe0d62d Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 12 Sep 2018 02:11:42 +0000
Subject: [PATCH 0109/1357] Add unit test for TensorArray with int64 in GPU

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../python/kernel_tests/tensor_array_ops_test.py    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 6de6fbe767..b47e750f4b 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -1504,6 +1504,19 @@ class TensorArrayTest(test.TestCase):
         vdx, vdy = sess.run([dx, dy])
       self.assertAllClose(vdx, vdy)
 
+  def testTensorArrayInt64GPU(self):
+    if not test.is_gpu_available():
+       return
+    with self.test_session(use_gpu=True, force_gpu=True) as sess:
+      value = array_ops.placeholder(dtypes.int64)
+      ta = tensor_array_ops.TensorArray(dtype=dtypes.int64, size=2)
+      ta = ta.scatter([0, 1], value)
+      r0 = ta.read(0)
+      r1 = ta.read(1)
+      v0, v1 = sess.run([r0, r1], feed_dict={value: [-3, 100]})
+      self.assertAllEqual(v0, -3)
+      self.assertAllEqual(v1, 100)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 5e9a9547f907599f6954fc5e28b7a78acf3b54eb Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 12 Sep 2018 11:02:12 +0800
Subject: [PATCH 0110/1357] Revert "Add XLA support for LeakyReluOp."

This reverts commit d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74.

Since bfloat16 was not supported by LeakyRelu, but it should be
supported in XLA Ops.
---
 tensorflow/compiler/tests/binary_ops_test.py  |  8 ----
 tensorflow/compiler/tests/unary_ops_test.py   |  5 ---
 tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 -------------------
 3 files changed, 55 deletions(-)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index c478ff4eea..17280e445b 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -178,14 +178,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
               [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype),
           expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype))
 
-      self._testBinary(
-          gen_nn_ops.leaky_relu_grad,
-          np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype),
-          np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-                   dtype=dtype),
-          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10],
-                            dtype=dtype))
-
       self._testBinary(
           gen_nn_ops.softmax_cross_entropy_with_logits,
           np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype),
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index dd29ef34ce..5b0e57f83f 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -361,11 +361,6 @@ class UnaryOpsTest(xla_test.XLATestCase):
           np.array([[-0.05, 6.05, 5]], dtype=dtype),
           expected=np.array([[0, 6, 5]], dtype=dtype))
 
-      self._assertOpOutputMatchesExpected(
-          nn_ops.leaky_relu,
-          np.array([[-1.0, 1.0]], dtype=dtype),
-          expected=np.array([[-0.2, 1.0]], dtype=dtype))
-
       self._assertOpOutputMatchesExpected(
           nn_ops.softmax,
           np.array([1, 2, 3, 4], dtype=dtype),
diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
index 8d65e0339c..d35777ccb1 100644
--- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
@@ -50,23 +50,6 @@ class Relu6Op : public XlaOpKernel {
   }
 };
 
-class LeakyReluOp : public XlaOpKernel {
- public:
-  explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
-  }
-  // Compute the max of the input x and alpha*x.
-  void Compile(XlaOpKernelContext* ctx) override {
-    xla::XlaBuilder* builder = ctx->builder();
-    auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0),
-                                          static_cast<double>(alpha_));
-    ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
-  }
-
- private:
-  float alpha_;
-};
-
 class ReluGradOp : public XlaOpKernel {
  public:
   explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
@@ -101,35 +84,10 @@ class Relu6GradOp : public XlaOpKernel {
   }
 };
 
-class LeakyReluGradOp : public XlaOpKernel {
- public:
-  explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
-  }
-  // Return the lhs (incoming gradient) if the rhs (input feature) > 0,
-  // otherwise return the alpha * lhs.
-  void Compile(XlaOpKernelContext* ctx) override {
-    xla::XlaBuilder* b = ctx->builder();
-    const TensorShape shape = ctx->InputShape(0);
-    const auto zero =
-        xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes());
-    const auto pred = xla::Gt(ctx->Input(1), zero);
-    auto alpha =
-        XlaHelpers::FloatLiteral(b, input_type(0), static_cast<double>(alpha_));
-    ctx->SetOutput(
-        0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
-  }
-
- private:
-  float alpha_;
-};
-
 REGISTER_XLA_OP(Name("Relu"), ReluOp);
 REGISTER_XLA_OP(Name("Relu6"), Relu6Op);
-REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp);
 REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp);
 REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp);
-REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp);
 
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 8c51bbcd1b8d7d32a634df6eadde084e87ede1bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Wed, 12 Sep 2018 14:01:09 +0800
Subject: [PATCH 0111/1357] BLD: update golden file

---
 .../v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt  | 5 +++++
 .../v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt   | 5 +++++
 .../v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt  | 5 +++++
 .../v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt   | 5 +++++
 4 files changed, 20 insertions(+)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index 7027e78df4..150dd21dbc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTrees\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_feature_importances"
+    argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index d8167ea7cb..6e7b5a3d47 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTrees\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_feature_importances"
+    argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index 7027e78df4..150dd21dbc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTrees\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_feature_importances"
+    argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index d8167ea7cb..6e7b5a3d47 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTrees\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_feature_importances"
+    argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
-- 
GitLab


From 2dd5fb6cfb16ccc612b6e278d6282ef90581c0bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 11 Sep 2018 21:35:22 +0800
Subject: [PATCH 0112/1357] CLN: fix merge error

---
 tensorflow/contrib/losses/python/losses/loss_ops.py | 7 ++++---
 tensorflow/contrib/metrics/python/ops/metric_ops.py | 4 ++--
 tensorflow/contrib/rate/rate.py                     | 2 +-
 tensorflow/python/kernel_tests/losses_test.py       | 1 -
 tensorflow/python/ops/losses/losses_impl.py         | 3 ++-
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 8a0932c376..66322140cb 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -585,9 +585,10 @@ def mean_pairwise_squared_error(predictions,
         math_ops.square(diffs), reduction_indices=reduction_indices)
     num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-    term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch,
-                                      math_ops.maximum(num_present_per_batch),
-                                      name="value")
+    term1 = 2.0 * math_ops.div_no_nan(
+        sum_squares_diff_per_batch,
+        math_ops.maximum(num_present_per_batch, 0),
+        name="value")
 
     sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
     term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff),
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 1ddd7e521b..d7c73c8f99 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -3904,8 +3904,8 @@ def cohen_kappa(labels,
       total = math_ops.reduce_sum(pe_row)
       pe_sum = math_ops.reduce_sum(
           math_ops.div_no_nan(
-              pe_row * pe_col,
-              math_ops.maximum(total, 0),
+              math_ops.to_double(pe_row * pe_col),
+              math_ops.to_double(total),
               name=None))
       po_sum, pe_sum, total = (math_ops.to_double(po_sum),
                                math_ops.to_double(pe_sum),
diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py
index 489d5cce78..d948066b36 100644
--- a/tensorflow/contrib/rate/rate.py
+++ b/tensorflow/contrib/rate/rate.py
@@ -142,5 +142,5 @@ class Rate(object):
     state_ops.assign(self.prev_denominator, denominator)
 
     return math_ops.div_no_nan(self.numer,
-                               math_op.maximum(self.denom, 0),
+                               math_ops.maximum(self.denom, 0),
                                name="safe_rate")
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index c45b5035de..273a916fe5 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index a980a43f62..2035aaf9fe 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -586,7 +586,8 @@ def mean_pairwise_squared_error(
       term2 = 2.0 * math_ops.div_no_nan(
           math_ops.square(sum_diff),
           math_ops.maximum(
-              math_ops.multiply(num_present_per_batch, num_present_per_batch - 1),
+              math_ops.multiply(num_present_per_batch,
+                                num_present_per_batch - 1),
               0),
           name="value")
 
-- 
GitLab


From e3c334e57fba9afc0b0a3aa5f7787ee35e17ddf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Wed, 12 Sep 2018 14:59:44 +0800
Subject: [PATCH 0113/1357] CLN: remove unnecessary math_ops.maximum

---
 tensorflow/contrib/losses/python/losses/loss_ops.py | 11 ++++-------
 tensorflow/contrib/metrics/python/ops/metric_ops.py |  8 ++++----
 tensorflow/python/keras/engine/training_utils.py    |  3 +--
 tensorflow/python/keras/metrics.py                  |  2 +-
 tensorflow/python/ops/losses/losses_impl.py         |  4 +---
 5 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 66322140cb..7e5ab05987 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -78,9 +78,7 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return math_ops.div_no_nan(total_loss,
-                             math_ops.maximum(num_present, 0),
-                             name="value")
+  return math_ops.div_no_nan(total_loss, num_present, name="value")
 
 
 @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.")
@@ -585,10 +583,9 @@ def mean_pairwise_squared_error(predictions,
         math_ops.square(diffs), reduction_indices=reduction_indices)
     num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-    term1 = 2.0 * math_ops.div_no_nan(
-        sum_squares_diff_per_batch,
-        math_ops.maximum(num_present_per_batch, 0),
-        name="value")
+    term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch,
+                                      num_present_per_batch,
+                                      name="value")
 
     sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
     term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff),
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index d7c73c8f99..91939b5bf2 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -3222,11 +3222,11 @@ def streaming_covariance(predictions,
     # batch_mean_prediction is E[x_B] in the update equation
     batch_mean_prediction = math_ops.div_no_nan(
         math_ops.reduce_sum(weighted_predictions),
-        math_ops.maximum(batch_count, 0),
+        batch_count,
         name='batch_mean_prediction')
     delta_mean_prediction = math_ops.div_no_nan(
         (batch_mean_prediction - mean_prediction) * batch_count,
-        math_ops.maximum(update_count, 0),
+        update_count,
         name='delta_mean_prediction')
     update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                   delta_mean_prediction)
@@ -3236,11 +3236,11 @@ def streaming_covariance(predictions,
     # batch_mean_label is E[y_B] in the update equation
     batch_mean_label = math_ops.div_no_nan(
         math_ops.reduce_sum(weighted_labels),
-        math_ops.maximum(batch_count, 0),
+        batch_count,
         name='batch_mean_label')
     delta_mean_label = math_ops.div_no_nan(
         (batch_mean_label - mean_label) * batch_count,
-        math_ops.maximum(update_count, 0),
+        update_count,
         name='delta_mean_label')
     update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
     # prev_mean_label is E[y_A] in the update equation
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 9082b9f0fa..c23168ccef 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -613,8 +613,7 @@ def weighted_masked_objective(fn):
       score_array = math_ops.multiply(score_array, weights)
       score_array = math_ops.reduce_sum(score_array)
       weights = math_ops.reduce_sum(weights)
-      score_array = math_ops.div_no_nan(score_array,
-                                        math_ops.maximum(weights, 0))
+      score_array = math_ops.div_no_nan(score_array, weights)
     return K.mean(score_array)
 
   return weighted
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 4050eb95a4..f85b6554bd 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -488,7 +488,7 @@ class Mean(Metric):
     state_ops.assign_add(self.count, num_values)
 
   def result(self):
-    return math_ops.div_no_nan(self.total, math_ops.maximum(self.count, 0))
+    return math_ops.div_no_nan(self.total, self.count)
 
 
 class MeanMetricWrapper(Mean):
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 2035aaf9fe..fe4950a475 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -86,9 +86,7 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return math_ops.div_no_nan(total_loss,
-                             math_ops.maximum(num_present, 0),
-                             name="value")
+  return math_ops.div_no_nan(total_loss, num_present, name="value")
 
 
 def _num_present(losses, weights, per_batch=False):
-- 
GitLab


From fd41d2c959372d7a068cb4474391362ef6a92fca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Wed, 12 Sep 2018 15:04:28 +0800
Subject: [PATCH 0114/1357] CLN: fix code style

---
 tensorflow/python/estimator/canned/boosted_trees_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index a176b4941f..c1309fb809 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -1083,7 +1083,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         'continuous', dtype=dtypes.float32)
 
     with self.assertRaisesRegexp(ValueError,
-        'only bucketized_column and indicator_column'):
+                                 'only bucketized_column and indicator_column'):
       _ = boosted_trees.BoostedTreesRegressor(
           feature_columns=[numeric_col],
           n_batches_per_layer=1,
-- 
GitLab


From 5f69ba51752561f6294705b5d66705bdf322831d Mon Sep 17 00:00:00 2001
From: Johannes Bannhofer <4116408+joba01@users.noreply.github.com>
Date: Wed, 12 Sep 2018 09:23:02 +0200
Subject: [PATCH 0115/1357] Fixed wrong variable name in example

The Keras model used a wrong variable name in the MirroredStrategy example
---
 tensorflow/contrib/distribute/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md
index 30e1992c01..91a27f97b7 100644
--- a/tensorflow/contrib/distribute/README.md
+++ b/tensorflow/contrib/distribute/README.md
@@ -76,7 +76,7 @@ We then compile the Keras model and pass the `MirroredStrategy` object in the
 ```python
 model.compile(loss='mean_squared_error',
               optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.2),
-              distribute=strategy)
+              distribute=distribution)
 ```
 
 To train the model we call Keras `fit` API using the input dataset that we
-- 
GitLab


From 9a13fc35951cef95d4dc71dabce4c270eb73d62a Mon Sep 17 00:00:00 2001
From: hellcom <vitalii.stoianov.ua@gmail.com>
Date: Wed, 12 Sep 2018 10:58:24 +0300
Subject: [PATCH 0116/1357] Fix missprint - unknown variable name.

Signed-off-by: hellcom <vitalii.stoianov.ua@gmail.com>
---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 361bd4764d..52a513779e 100644
--- a/configure.py
+++ b/configure.py
@@ -852,7 +852,7 @@ def set_tf_cuda_version(environ_cp):
 
     # Reset and retry
     print('Invalid path to CUDA %s toolkit. %s cannot be found' %
-          (tf_cuda_version, cuda_toolkit_path_full))
+          (tf_cuda_version, cuda_toolkit_paths_full))
     environ_cp['TF_CUDA_VERSION'] = ''
     environ_cp['CUDA_TOOLKIT_PATH'] = ''
 
-- 
GitLab


From 9ec9c8b24cca5f1e746fef8cd351b3cae6d5a740 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Wed, 12 Sep 2018 20:42:01 +0300
Subject: [PATCH 0117/1357] Fixes after second review.

---
 tensorflow/contrib/ignite/BUILD               |   1 +
 tensorflow/contrib/ignite/__init__.py         |  22 +-
 .../kernels/ignite_binary_object_parser.cc    | 404 ++++++++++--------
 .../kernels/ignite_binary_object_parser.h     |  36 +-
 .../contrib/ignite/kernels/ignite_client.h    |  55 ++-
 .../contrib/ignite/kernels/ignite_dataset.cc  |  99 ++---
 .../contrib/ignite/kernels/ignite_dataset.h   |  37 +-
 .../ignite/kernels/ignite_dataset_iterator.cc | 383 ++++++++---------
 .../ignite/kernels/ignite_dataset_iterator.h  |  74 ++--
 .../ignite/kernels/ignite_dataset_ops.cc      | 123 ++++--
 .../ignite/kernels/ignite_plain_client.h      |  15 +-
 .../kernels/ignite_plain_client_unix.cc       |  14 +-
 .../kernels/ignite_plain_client_windows.cc    |  17 +-
 .../ignite/kernels/ignite_ssl_wrapper.cc      |  34 +-
 .../ignite/kernels/ignite_ssl_wrapper.h       |  26 +-
 tensorflow/contrib/ignite/ops/dataset_ops.cc  |   2 +
 .../ignite/python/ops/ignite_dataset_ops.py   | 176 ++++----
 17 files changed, 848 insertions(+), 670 deletions(-)

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index b7d40a99f7..2f598b4aed 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -40,6 +40,7 @@ cc_library(
     srcs = [
         "kernels/ignite_dataset_ops.cc",
         "kernels/ignite_client.h",
+        "kernels/ignite_byte_swapper.h",
         "kernels/ignite_plain_client.h",
         "kernels/ignite_ssl_wrapper.h",
         "kernels/ignite_ssl_wrapper.cc",
diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py
index b78829d0f4..f42947696f 100644
--- a/tensorflow/contrib/ignite/__init__.py
+++ b/tensorflow/contrib/ignite/__init__.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Apache Ignite is a memory-centric distributed database, caching, and
-   processing platform for transactional, analytical, and streaming workloads,
-   delivering in-memory speeds at petabyte scale. This contrib package
-   contains an integration between Apache Ignite and TensorFlow. The
-   integration is based on tf.data from TensorFlow side and Binary Client
-   Protocol from Apache Ignite side. It allows to use Apache Ignite as a
-   datasource for neural network training, inference and all other
-   computations supported by TensorFlow. Ignite Dataset is based on Apache
-   Ignite Binary Client Protocol:
-   https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+"""IgniteDataset that allows to get data from Apache Ignite.
+
+Apache Ignite is a memory-centric distributed database, caching, and
+processing platform for transactional, analytical, and streaming workloads,
+delivering in-memory speeds at petabyte scale. This contrib package
+contains an integration between Apache Ignite and TensorFlow. The
+integration is based on tf.data from TensorFlow side and Binary Client
+Protocol from Apache Ignite side. It allows to use Apache Ignite as a
+datasource for neural network training, inference and all other
+computations supported by TensorFlow. Ignite Dataset is based on Apache
+Ignite Binary Client Protocol:
+https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
 
 @@IgniteDataset
 """
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
index 9bf4480d2d..2c8a7d44b0 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
@@ -13,242 +13,171 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_binary_object_parser.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
+BinaryObjectParser::BinaryObjectParser() : byte_swapper_(ByteSwapper(false)) {}
+
 Status BinaryObjectParser::Parse(uint8_t** ptr,
                                  std::vector<Tensor>* out_tensors,
-                                 std::vector<int32_t>* types) {
-  uint8_t object_type_id = **ptr;
-  *ptr += 1;
+                                 std::vector<int32_t>* types) const {
+  uint8_t object_type_id = ParseByte(ptr);
+
+  // Skip non-leaf nodes.
+  if (object_type_id != WRAPPED_OBJ && object_type_id != COMPLEX_OBJ)
+    types->push_back(object_type_id);
 
   switch (object_type_id) {
     case BYTE: {
-      Tensor tensor(cpu_allocator(), DT_UINT8, {});
-      tensor.scalar<uint8>()() = *((uint8_t*)*ptr);
-      *ptr += 1;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT8, TensorShape({}));
+      out_tensors->back().scalar<uint8>()() = ParseByte(ptr);
       break;
     }
     case SHORT: {
-      Tensor tensor(cpu_allocator(), DT_INT16, {});
-      tensor.scalar<int16>()() = *((int16_t*)*ptr);
-      *ptr += 2;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_INT16, TensorShape({}));
+      out_tensors->back().scalar<int16>()() = ParseShort(ptr);
+      break;
+    }
+    case USHORT: {
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT16, TensorShape({}));
+      out_tensors->back().scalar<uint16>()() = ParseUnsignedShort(ptr);
       break;
     }
     case INT: {
-      Tensor tensor(cpu_allocator(), DT_INT32, {});
-      tensor.scalar<int32>()() = *((int32_t*)*ptr);
-      *ptr += 4;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_INT32, TensorShape({}));
+      out_tensors->back().scalar<int32>()() = ParseInt(ptr);
       break;
     }
     case LONG: {
-      Tensor tensor(cpu_allocator(), DT_INT64, {});
-      tensor.scalar<int64>()() = *((int64_t*)*ptr);
-      *ptr += 8;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({}));
+      out_tensors->back().scalar<int64>()() = ParseLong(ptr);
       break;
     }
     case FLOAT: {
-      Tensor tensor(cpu_allocator(), DT_FLOAT, {});
-      tensor.scalar<float>()() = *((float*)*ptr);
-      *ptr += 4;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, TensorShape({}));
+      out_tensors->back().scalar<float>()() = ParseFloat(ptr);
       break;
     }
     case DOUBLE: {
-      Tensor tensor(cpu_allocator(), DT_DOUBLE, {});
-      tensor.scalar<double>()() = *((double*)*ptr);
-      *ptr += 8;
-      out_tensors->push_back(std::move(tensor));
-      break;
-    }
-    case UCHAR: {
-      Tensor tensor(cpu_allocator(), DT_UINT16, {});
-      tensor.scalar<uint16>()() = *((uint16_t*)*ptr);
-      *ptr += 2;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, TensorShape({}));
+      out_tensors->back().scalar<double>()() = ParseDouble(ptr);
       break;
     }
     case BOOL: {
-      Tensor tensor(cpu_allocator(), DT_BOOL, {});
-      tensor.scalar<bool>()() = *((bool*)*ptr);
-      *ptr += 1;
-      out_tensors->push_back(std::move(tensor));
-
+      out_tensors->emplace_back(cpu_allocator(), DT_BOOL, TensorShape({}));
+      out_tensors->back().scalar<bool>()() = ParseBool(ptr);
       break;
     }
     case STRING: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_STRING, {});
-      tensor.scalar<std::string>()() = std::string((char*)*ptr, length);
-      *ptr += length;
-      out_tensors->push_back(std::move(tensor));
-
+      out_tensors->emplace_back(cpu_allocator(), DT_STRING, TensorShape({}));
+      out_tensors->back().scalar<string>()() = ParseString(ptr);
       break;
     }
     case DATE: {
-      Tensor tensor(cpu_allocator(), DT_INT64, {});
-      tensor.scalar<int64>()() = *((int64_t*)*ptr);
-      *ptr += 8;
-      out_tensors->push_back(std::move(tensor));
-
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({}));
+      out_tensors->back().scalar<int64>()() = ParseLong(ptr);
       break;
     }
     case BYTE_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length}));
-
-      uint8_t* arr = (uint8_t*)*ptr;
-      *ptr += length;
-
-      std::copy_n(arr, length, tensor.flat<uint8>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      uint8_t* arr = ParseByteArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT8,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<uint8>().data());
       break;
     }
     case SHORT_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length}));
-
-      int16_t* arr = (int16_t*)*ptr;
-      *ptr += length * 2;
-
-      std::copy_n(arr, length, tensor.flat<int16>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int16_t* arr = ParseShortArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT16,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int16>().data());
+      break;
+    }
+    case USHORT_ARR: {
+      int32_t length = ParseInt(ptr);
+      uint16_t* arr = ParseUnsignedShortArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT16,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<uint16>().data());
       break;
     }
     case INT_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length}));
-
-      int32_t* arr = (int32_t*)*ptr;
-      *ptr += length * 4;
-
-      std::copy_n(arr, length, tensor.flat<int32>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int32_t* arr = ParseIntArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT32,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int32>().data());
       break;
     }
     case LONG_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
-
-      int64_t* arr = (int64_t*)*ptr;
-      *ptr += length * 8;
-
-      std::copy_n(arr, length, tensor.flat<int64>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int64_t* arr = ParseLongArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int64>().data());
       break;
     }
     case FLOAT_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length}));
-
-      float* arr = (float*)*ptr;
-      *ptr += 4 * length;
-
-      std::copy_n(arr, length, tensor.flat<float>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      float* arr = ParseFloatArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_FLOAT,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<float>().data());
       break;
     }
     case DOUBLE_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length}));
-
-      double* arr = (double*)*ptr;
-      *ptr += 8 * length;
-
-      std::copy_n(arr, length, tensor.flat<double>().data());
-      out_tensors->push_back(std::move(tensor));
-      break;
-    }
-    case UCHAR_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length}));
-
-      uint16_t* arr = (uint16_t*)*ptr;
-      *ptr += length * 2;
-
-      std::copy_n(arr, length, tensor.flat<uint16>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      double* arr = ParseDoubleArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<double>().data());
       break;
     }
     case BOOL_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length}));
-
-      bool* arr = (bool*)*ptr;
-      *ptr += length;
-
-      std::copy_n(arr, length, tensor.flat<bool>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      bool* arr = ParseBoolArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_BOOL,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<bool>().data());
       break;
     }
     case STRING_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length}));
-
-      for (int32_t i = 0; i < length; i++) {
-        int32_t str_length = *((int32_t*)*ptr);
-        *ptr += 4;
-        const int8_t* str = (const int8_t*)*ptr;
-        *ptr += str_length;
-        tensor.vec<std::string>()(i) = std::string((char*)str, str_length);
-      }
-
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      out_tensors->emplace_back(cpu_allocator(), DT_STRING,
+                                TensorShape({length}));
+      for (int32_t i = 0; i < length; i++)
+        out_tensors->back().vec<string>()(i) = ParseString(ptr);
       break;
     }
     case DATE_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
-      int64_t* arr = (int64_t*)*ptr;
-      *ptr += length * 8;
-
-      std::copy_n(arr, length, tensor.flat<int64>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int64_t* arr = ParseLongArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int64>().data());
       break;
     }
     case WRAPPED_OBJ: {
-      int32_t byte_arr_size = *((int32_t*)*ptr);
-      *ptr += 4;
-
+      int32_t byte_arr_size = ParseInt(ptr);
       TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types));
-
-      int32_t offset = *((int32_t*)*ptr);
-      *ptr += 4;
+      int32_t offset = ParseInt(ptr);
 
       break;
     }
     case COMPLEX_OBJ: {
-      uint8_t version = **ptr;
-      *ptr += 1;
-      int16_t flags = *((int16_t*)*ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
-      *ptr += 2;
-      int32_t type_id = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t hash_code = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t schema_id = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t schema_offset = *((int32_t*)*ptr);
-      *ptr += 4;
-
+      uint8_t version = ParseByte(ptr);
+      int16_t flags = ParseShort(ptr);
+      int32_t type_id = ParseInt(ptr);
+      int32_t hash_code = ParseInt(ptr);
+      int32_t length = ParseInt(ptr);
+      int32_t schema_id = ParseInt(ptr);
+      int32_t schema_offset = ParseInt(ptr);
+
+      // 24 is size of header just read.
       uint8_t* end = *ptr + schema_offset - 24;
       int32_t i = 0;
       while (*ptr < end) {
@@ -261,12 +190,145 @@ Status BinaryObjectParser::Parse(uint8_t** ptr,
       break;
     }
     default: {
-      return errors::Internal("Unknowd binary type (type id ",
-                              (int)object_type_id, ")");
+      return errors::Unknown("Unknowd binary type (type id ",
+                             (int)object_type_id, ")");
     }
   }
 
   return Status::OK();
 }
 
+uint8_t BinaryObjectParser::ParseByte(uint8_t** ptr) const {
+  uint8_t res = **ptr;
+  *ptr += 1;
+
+  return res;
+}
+
+int16_t BinaryObjectParser::ParseShort(uint8_t** ptr) const {
+  int16_t* res = *reinterpret_cast<int16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt16(res);
+  *ptr += 2;
+
+  return *res;
+}
+
+uint16_t BinaryObjectParser::ParseUnsignedShort(uint8_t** ptr) const {
+  uint16_t* res = *reinterpret_cast<uint16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredUnsignedInt16(res);
+  *ptr += 2;
+
+  return *res;
+}
+
+int32_t BinaryObjectParser::ParseInt(uint8_t** ptr) const {
+  int32_t* res = *reinterpret_cast<int32_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt32(res);
+  *ptr += 4;
+
+  return *res;
+}
+
+int64_t BinaryObjectParser::ParseLong(uint8_t** ptr) const {
+  int64_t* res = *reinterpret_cast<int64_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt64(res);
+  *ptr += 8;
+
+  return *res;
+}
+
+float BinaryObjectParser::ParseFloat(uint8_t** ptr) const {
+  float* res = *reinterpret_cast<float**>(ptr);
+  byte_swapper_.SwapIfRequiredFloat(res);
+  *ptr += 4;
+
+  return *res;
+}
+
+double BinaryObjectParser::ParseDouble(uint8_t** ptr) const {
+  double* res = *reinterpret_cast<double**>(ptr);
+  byte_swapper_.SwapIfRequiredDouble(res);
+  *ptr += 8;
+
+  return *res;
+}
+
+bool BinaryObjectParser::ParseBool(uint8_t** ptr) const {
+  bool res = **reinterpret_cast<bool**>(ptr);
+  *ptr += 1;
+
+  return res;
+}
+
+string BinaryObjectParser::ParseString(uint8_t** ptr) const {
+  int32_t length = ParseInt(ptr);
+  string res(*reinterpret_cast<char**>(ptr), length);
+  *ptr += length;
+
+  return res;
+}
+
+uint8_t* BinaryObjectParser::ParseByteArr(uint8_t** ptr, int length) const {
+  uint8_t* res = *reinterpret_cast<uint8_t**>(ptr);
+  *ptr += length;
+
+  return res;
+}
+
+int16_t* BinaryObjectParser::ParseShortArr(uint8_t** ptr, int length) const {
+  int16_t* res = *reinterpret_cast<int16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt16Arr(res, length);
+  *ptr += length * 2;
+
+  return res;
+}
+
+uint16_t* BinaryObjectParser::ParseUnsignedShortArr(uint8_t** ptr,
+                                                    int length) const {
+  uint16_t* res = *reinterpret_cast<uint16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredUnsignedInt16Arr(res, length);
+  *ptr += length * 2;
+
+  return res;
+}
+
+int32_t* BinaryObjectParser::ParseIntArr(uint8_t** ptr, int length) const {
+  int32_t* res = *reinterpret_cast<int32_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt32Arr(res, length);
+  *ptr += length * 4;
+
+  return res;
+}
+
+int64_t* BinaryObjectParser::ParseLongArr(uint8_t** ptr, int length) const {
+  int64_t* res = *reinterpret_cast<int64_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt64Arr(res, length);
+  *ptr += length * 8;
+
+  return res;
+}
+
+float* BinaryObjectParser::ParseFloatArr(uint8_t** ptr, int length) const {
+  float* res = *reinterpret_cast<float**>(ptr);
+  byte_swapper_.SwapIfRequiredFloatArr(res, length);
+  *ptr += length * 4;
+
+  return res;
+}
+
+double* BinaryObjectParser::ParseDoubleArr(uint8_t** ptr, int length) const {
+  double* res = *reinterpret_cast<double**>(ptr);
+  byte_swapper_.SwapIfRequiredDoubleArr(res, length);
+  *ptr += length * 8;
+
+  return res;
+}
+
+bool* BinaryObjectParser::ParseBoolArr(uint8_t** ptr, int length) const {
+  bool* res = *reinterpret_cast<bool**>(ptr);
+  *ptr += length;
+
+  return res;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
index 9accbd796f..eb1f856643 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
@@ -13,16 +13,42 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_
+
 #include <vector>
-#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h"
+#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 
 class BinaryObjectParser {
  public:
+  BinaryObjectParser();
   Status Parse(uint8_t** ptr, std::vector<Tensor>* out_tensors,
-               std::vector<int32_t>* types);
+               std::vector<int32_t>* types) const;
+
+ private:
+  uint8_t ParseByte(uint8_t** ptr) const;
+  int16_t ParseShort(uint8_t** ptr) const;
+  uint16_t ParseUnsignedShort(uint8_t** ptr) const;
+  int32_t ParseInt(uint8_t** ptr) const;
+  int64_t ParseLong(uint8_t** ptr) const;
+  float ParseFloat(uint8_t** ptr) const;
+  double ParseDouble(uint8_t** ptr) const;
+  bool ParseBool(uint8_t** ptr) const;
+  string ParseString(uint8_t** ptr) const;
+  uint8_t* ParseByteArr(uint8_t** ptr, int length) const;
+  int16_t* ParseShortArr(uint8_t** ptr, int length) const;
+  uint16_t* ParseUnsignedShortArr(uint8_t** ptr, int length) const;
+  int32_t* ParseIntArr(uint8_t** ptr, int length) const;
+  int64_t* ParseLongArr(uint8_t** ptr, int length) const;
+  float* ParseFloatArr(uint8_t** ptr, int length) const;
+  double* ParseDoubleArr(uint8_t** ptr, int length) const;
+  bool* ParseBoolArr(uint8_t** ptr, int length) const;
+
+  const ByteSwapper byte_swapper_;
 };
 
 enum ObjectType {
@@ -32,7 +58,7 @@ enum ObjectType {
   LONG = 4,
   FLOAT = 5,
   DOUBLE = 6,
-  UCHAR = 7,
+  USHORT = 7,
   BOOL = 8,
   STRING = 9,
   DATE = 11,
@@ -42,7 +68,7 @@ enum ObjectType {
   LONG_ARR = 15,
   FLOAT_ARR = 16,
   DOUBLE_ARR = 17,
-  UCHAR_ARR = 18,
+  USHORT_ARR = 18,
   BOOL_ARR = 19,
   STRING_ARR = 20,
   DATE_ARR = 22,
@@ -51,3 +77,5 @@ enum ObjectType {
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h
index 944b3fe184..508b6e4a60 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.h
@@ -16,40 +16,69 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
 #define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
 
+#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 
 class Client {
  public:
+  Client(bool big_endian) : byte_swapper_(ByteSwapper(big_endian)){};
   virtual Status Connect() = 0;
   virtual Status Disconnect() = 0;
   virtual bool IsConnected() = 0;
   virtual int GetSocketDescriptor() = 0;
-  virtual Status ReadData(uint8_t* buf, int32_t length) = 0;
-  virtual Status WriteData(uint8_t* buf, int32_t length) = 0;
+  virtual Status ReadData(uint8_t *buf, const int32_t length) = 0;
+  virtual Status WriteData(const uint8_t *buf, const int32_t length) = 0;
 
-  inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); }
+  inline Status ReadByte(uint8_t *data) { return ReadData(data, 1); }
 
-  inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); }
+  inline Status ReadShort(int16_t *data) {
+    TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 2));
+    byte_swapper_.SwapIfRequiredInt16(data);
 
-  inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); }
+    return Status::OK();
+  }
+
+  inline Status ReadInt(int32_t *data) {
+    TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 4));
+    byte_swapper_.SwapIfRequiredInt32(data);
+
+    return Status::OK();
+  }
 
-  inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); }
+  inline Status ReadLong(int64_t *data) {
+    TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 8));
+    byte_swapper_.SwapIfRequiredInt64(data);
 
-  inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); }
+    return Status::OK();
+  }
+
+  inline Status WriteByte(const uint8_t data) { return WriteData(&data, 1); }
 
-  inline Status WriteShort(int16_t data) {
-    return WriteData((uint8_t*)&data, 2);
+  inline Status WriteShort(const int16_t data) {
+    int16_t tmp = data;
+    byte_swapper_.SwapIfRequiredInt16(&tmp);
+    return WriteData((uint8_t *)&tmp, 2);
   }
 
-  inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); }
+  inline Status WriteInt(const int32_t data) {
+    int32_t tmp = data;
+    byte_swapper_.SwapIfRequiredInt32(&tmp);
+    return WriteData((uint8_t *)&tmp, 4);
+  }
 
-  inline Status WriteLong(int64_t data) {
-    return WriteData((uint8_t*)&data, 8);
+  inline Status WriteLong(const int64_t data) {
+    int64_t tmp = data;
+    byte_swapper_.SwapIfRequiredInt64(&tmp);
+    return WriteData((uint8_t *)&tmp, 8);
   }
+
+ private:
+  const ByteSwapper byte_swapper_;
 };
 
 }  // namespace tensorflow
 
-#endif
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
index f25f8a5b18..c4a7d3c513 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
@@ -13,40 +13,41 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_dataset_iterator.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
 
-IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name,
-                             std::string host, int32 port, bool local,
-                             int32 part, int32 page_size, std::string username,
-                             std::string password, std::string certfile,
-                             std::string keyfile, std::string cert_password,
-                             std::vector<int32> schema,
-                             std::vector<int32> permutation)
+IgniteDataset::IgniteDataset(OpKernelContext* ctx, string cache_name,
+                             string host, int32 port, bool local, int32 part,
+                             int32 page_size, string username, string password,
+                             string certfile, string keyfile,
+                             string cert_password, std::vector<int32> schema,
+                             std::vector<int32> permutation,
+                             DataTypeVector dtypes,
+                             std::vector<PartialTensorShape> shapes)
     : DatasetBase(DatasetContext(ctx)),
-      cache_name_(cache_name),
-      host_(host),
+      cache_name_(std::move(cache_name)),
+      host_(std::move(host)),
       port_(port),
       local_(local),
       part_(part),
       page_size_(page_size),
-      username_(username),
-      password_(password),
-      certfile_(certfile),
-      keyfile_(keyfile),
-      cert_password_(cert_password),
-      schema_(schema),
-      permutation_(permutation) {
-  SchemaToTypes();
-  SchemaToShapes();
-
-  LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name
-            << "', host='" << host << "', port=" << port << ", local=" << local
-            << ", part=" << part << ", page_size=" << page_size
-            << ", username='" << username << "', certfile='" << certfile
-            << "', keyfile='" << keyfile + "']";
+      username_(std::move(username)),
+      password_(std::move(password)),
+      certfile_(std::move(certfile)),
+      keyfile_(std::move(keyfile)),
+      cert_password_(std::move(cert_password)),
+      schema_(std::move(schema)),
+      permutation_(std::move(permutation)),
+      dtypes_(dtypes),
+      shapes_(shapes) {
+  LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name_
+            << "', host='" << host_ << "', port=" << port_
+            << ", local=" << local_ << ", part=" << part_
+            << ", page_size=" << page_size_ << ", username='" << username_
+            << "', certfile='" << certfile_ << "', keyfile='"
+            << keyfile_ + "']";
 }
 
 IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
@@ -54,10 +55,12 @@ IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
 std::unique_ptr<IteratorBase> IgniteDataset::MakeIteratorInternal(
     const string& prefix) const {
   return std::unique_ptr<IteratorBase>(new IgniteDatasetIterator(
-      {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_,
-      this->cache_name_, this->local_, this->part_, this->page_size_,
-      this->username_, this->password_, this->certfile_, this->keyfile_,
-      this->cert_password_, this->schema_, this->permutation_));
+      {this, strings::StrCat(prefix, "::Ignite")}, std::move(this->host_),
+      this->port_, std::move(this->cache_name_), this->local_, this->part_,
+      this->page_size_, std::move(this->username_), std::move(this->password_),
+      std::move(this->certfile_), std::move(this->keyfile_),
+      std::move(this->cert_password_), std::move(this->schema_),
+      std::move(this->permutation_)));
 }
 
 const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; }
@@ -75,42 +78,4 @@ Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx,
       "IgniteDataset does not support 'AsGraphDefInternal'");
 }
 
-void IgniteDataset::SchemaToTypes() {
-  for (auto e : schema_) {
-    if (e == BYTE || e == BYTE_ARR) {
-      dtypes_.push_back(DT_UINT8);
-    } else if (e == SHORT || e == SHORT_ARR) {
-      dtypes_.push_back(DT_INT16);
-    } else if (e == INT || e == INT_ARR) {
-      dtypes_.push_back(DT_INT32);
-    } else if (e == LONG || e == LONG_ARR) {
-      dtypes_.push_back(DT_INT64);
-    } else if (e == FLOAT || e == FLOAT_ARR) {
-      dtypes_.push_back(DT_FLOAT);
-    } else if (e == DOUBLE || e == DOUBLE_ARR) {
-      dtypes_.push_back(DT_DOUBLE);
-    } else if (e == UCHAR || e == UCHAR_ARR) {
-      dtypes_.push_back(DT_UINT8);
-    } else if (e == BOOL || e == BOOL_ARR) {
-      dtypes_.push_back(DT_BOOL);
-    } else if (e == STRING || e == STRING_ARR) {
-      dtypes_.push_back(DT_STRING);
-    } else {
-      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
-    }
-  }
-}
-
-void IgniteDataset::SchemaToShapes() {
-  for (auto e : schema_) {
-    if (e >= 1 && e < 10) {
-      shapes_.push_back(PartialTensorShape({}));
-    } else if (e >= 12 && e < 21) {
-      shapes_.push_back(PartialTensorShape({-1}));
-    } else {
-      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
-    }
-  }
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
index d3fec5910b..66bfdf2e2a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
@@ -13,18 +13,21 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_
+
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
 
 class IgniteDataset : public DatasetBase {
  public:
-  IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host,
+  IgniteDataset(OpKernelContext* ctx, string cache_name, string host,
                 int32 port, bool local, int32 part, int32 page_size,
-                std::string username, std::string password,
-                std::string certfile, std::string keyfile,
-                std::string cert_password, std::vector<int32> schema,
-                std::vector<int32> permutation);
+                string username, string password, string certfile,
+                string keyfile, string cert_password, std::vector<int32> schema,
+                std::vector<int32> permutation, DataTypeVector dtypes,
+                std::vector<PartialTensorShape> shapes);
   ~IgniteDataset();
   std::unique_ptr<IteratorBase> MakeIteratorInternal(
       const string& prefix) const override;
@@ -38,25 +41,23 @@ class IgniteDataset : public DatasetBase {
                             Node** output) const override;
 
  private:
-  const std::string cache_name_;
-  const std::string host_;
+  const string cache_name_;
+  const string host_;
   const int32 port_;
   const bool local_;
   const int32 part_;
   const int32 page_size_;
-  const std::string username_;
-  const std::string password_;
-  const std::string certfile_;
-  const std::string keyfile_;
-  const std::string cert_password_;
+  const string username_;
+  const string password_;
+  const string certfile_;
+  const string keyfile_;
+  const string cert_password_;
   const std::vector<int32> schema_;
   const std::vector<int32> permutation_;
-
-  DataTypeVector dtypes_;
-  std::vector<PartialTensorShape> shapes_;
-
-  void SchemaToTypes();
-  void SchemaToShapes();
+  const DataTypeVector dtypes_;
+  const std::vector<PartialTensorShape> shapes_;
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
index 1774585ecd..f68ded5a3a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_dataset_iterator.h"
-
-#include "ignite_plain_client.h"
-#include "ignite_ssl_wrapper.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/logging.h"
 
 #include <time.h>
@@ -25,30 +25,31 @@ limitations under the License.
 namespace tensorflow {
 
 IgniteDatasetIterator::IgniteDatasetIterator(
-    const Params& params, std::string host, int32 port, std::string cache_name,
-    bool local, int32 part, int32 page_size, std::string username,
-    std::string password, std::string certfile, std::string keyfile,
-    std::string cert_password, std::vector<int32> schema,
-    std::vector<int32> permutation)
+    const Params& params, string host, int32 port, string cache_name,
+    bool local, int32 part, int32 page_size, string username, string password,
+    string certfile, string keyfile, string cert_password,
+    std::vector<int32> schema, std::vector<int32> permutation)
     : DatasetIterator<IgniteDataset>(params),
-      cache_name_(cache_name),
+      cache_name_(std::move(cache_name)),
       local_(local),
       part_(part),
       page_size_(page_size),
-      username_(username),
-      password_(password),
-      schema_(schema),
-      permutation_(permutation),
+      username_(std::move(username)),
+      password_(std::move(password)),
+      schema_(std::move(schema)),
+      permutation_(std::move(permutation)),
       remainder_(-1),
       cursor_id_(-1),
-      last_page_(false) {
-  Client* p_client = new PlainClient(host, port);
+      last_page_(false),
+      valid_state_(true) {
+  Client* p_client = new PlainClient(std::move(host), port, false);
 
   if (certfile.empty())
     client_ = std::unique_ptr<Client>(p_client);
   else
-    client_ = std::unique_ptr<Client>(new SslWrapper(
-        std::unique_ptr<Client>(p_client), certfile, keyfile, cert_password));
+    client_ = std::unique_ptr<Client>(
+        new SslWrapper(std::unique_ptr<Client>(p_client), std::move(certfile),
+                       std::move(keyfile), std::move(cert_password), false));
 
   LOG(INFO) << "Ignite Dataset Iterator created";
 }
@@ -60,12 +61,80 @@ IgniteDatasetIterator::~IgniteDatasetIterator() {
   LOG(INFO) << "Ignite Dataset Iterator destroyed";
 }
 
+Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx,
+                                              std::vector<Tensor>* out_tensors,
+                                              bool* end_of_sequence) {
+  mutex_lock l(mutex_);
+
+  if (valid_state_) {
+    Status status =
+        GetNextInternalWithValidState(ctx, out_tensors, end_of_sequence);
+
+    if (!status.ok()) valid_state_ = false;
+
+    return status;
+  }
+
+  return errors::Unknown("Iterator is invalid");
+}
+
+Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) {
+  return errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'SaveInternal'");
+}
+
+Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx,
+                                              IteratorStateReader* reader) {
+  return errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'RestoreInternal')");
+}
+
+Status IgniteDatasetIterator::GetNextInternalWithValidState(
+    IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+    bool* end_of_sequence) {
+  if (remainder_ == 0 && last_page_) {
+    cursor_id_ = -1;
+    *end_of_sequence = true;
+
+    return Status::OK();
+  } else {
+    TF_RETURN_IF_ERROR(EstablishConnection());
+
+    if (remainder_ == -1) {
+      TF_RETURN_IF_ERROR(ScanQuery());
+    } else if (remainder_ == 0) {
+      TF_RETURN_IF_ERROR(LoadNextPage());
+    }
+
+    uint8_t* initial_ptr = ptr_;
+    std::vector<Tensor> tensors;
+    std::vector<int32_t> types;
+
+    TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types));  // Parse key
+    TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types));  // Parse val
+
+    remainder_ -= (ptr_ - initial_ptr);
+
+    TF_RETURN_IF_ERROR(CheckTypes(types));
+
+    for (size_t i = 0; i < tensors.size(); i++)
+      out_tensors->push_back(tensors[permutation_[i]]);
+
+    *end_of_sequence = false;
+
+    return Status::OK();
+  }
+
+  *end_of_sequence = true;
+
+  return Status::OK();
+}
+
 Status IgniteDatasetIterator::EstablishConnection() {
   if (!client_->IsConnected()) {
-    Status status = client_->Connect();
-    if (!status.ok()) return status;
+    TF_RETURN_IF_ERROR(client_->Connect());
 
-    status = Handshake();
+    Status status = Handshake();
     if (!status.ok()) {
       Status disconnect_status = client_->Disconnect();
       if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString();
@@ -79,19 +148,17 @@ Status IgniteDatasetIterator::EstablishConnection() {
 
 Status IgniteDatasetIterator::CloseConnection() {
   if (cursor_id_ != -1 && !last_page_) {
-    Status conn_status = EstablishConnection();
-    if (!conn_status.ok()) return conn_status;
+    TF_RETURN_IF_ERROR(EstablishConnection());
 
-    TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
-    TF_RETURN_IF_ERROR(
-        client_->WriteShort(close_connection_opcode));   // Operation code
+    TF_RETURN_IF_ERROR(client_->WriteInt(kCloseConnectionReqLength));
+    TF_RETURN_IF_ERROR(client_->WriteShort(kCloseConnectionOpcode));
     TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
     TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Resource ID
 
     int32_t res_len;
     TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
-    if (res_len < 12)
-      return errors::Internal("Close Resource Response is corrupted");
+    if (res_len < kMinResLength)
+      return errors::Unknown("Close Resource Response is corrupted");
 
     int64_t req_id;
     TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
@@ -100,22 +167,21 @@ Status IgniteDatasetIterator::CloseConnection() {
     if (status != 0) {
       uint8_t err_msg_header;
       TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
-      if (err_msg_header == string_val) {
+      if (err_msg_header == kStringVal) {
         int32_t err_msg_length;
         TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
+
         uint8_t* err_msg_c = new uint8_t[err_msg_length];
+        auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
         TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
-        std::string err_msg((char*)err_msg_c, err_msg_length);
-        delete[] err_msg_c;
+        string err_msg(reinterpret_cast<char*>(err_msg_c), err_msg_length);
 
-        return errors::Internal("Close Resource Error [status=", status,
-                                ", message=", err_msg, "]");
+        return errors::Unknown("Close Resource Error [status=", status,
+                               ", message=", err_msg, "]");
       }
-      return errors::Internal("Close Resource Error [status=", status, "]");
+      return errors::Unknown("Close Resource Error [status=", status, "]");
     }
 
-    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
-
     cursor_id_ = -1;
 
     return client_->Disconnect();
@@ -126,94 +192,43 @@ Status IgniteDatasetIterator::CloseConnection() {
   return client_->IsConnected() ? client_->Disconnect() : Status::OK();
 }
 
-Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx,
-                                              std::vector<Tensor>* out_tensors,
-                                              bool* end_of_sequence) {
-  if (remainder_ == 0 && last_page_) {
-    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
-
-    cursor_id_ = -1;
-    *end_of_sequence = true;
-    return Status::OK();
-  } else {
-    Status status = EstablishConnection();
-    if (!status.ok()) return status;
-
-    if (remainder_ == -1 || remainder_ == 0) {
-      Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage();
-      if (!status.ok()) return status;
-    }
-
-    uint8_t* initial_ptr = ptr_;
-    std::vector<int32_t> types;
-    std::vector<Tensor> tensors;
-
-    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse key
-    if (!status.ok()) return status;
-
-    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse val
-    if (!status.ok()) return status;
-
-    remainder_ -= (ptr_ - initial_ptr);
-
-    out_tensors->resize(tensors.size());
-    for (int32_t i = 0; i < tensors.size(); i++)
-      (*out_tensors)[permutation_[i]] = std::move(tensors[i]);
-
-    *end_of_sequence = false;
-    return Status::OK();
-  }
-
-  *end_of_sequence = true;
-  return Status::OK();
-}
-
-Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) {
-  return errors::Unimplemented(
-      "Iterator for IgniteDataset does not support 'SaveInternal'");
-}
-
-Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx,
-                                              IteratorStateReader* reader) {
-  return errors::Unimplemented(
-      "Iterator for IgniteDataset does not support 'RestoreInternal')");
-}
-
 Status IgniteDatasetIterator::Handshake() {
-  int32_t msg_len = 8;
+  int32_t msg_len = kHandshakeReqDefaultLength;
 
   if (username_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + username_.length();
+    msg_len += 5 + username_.length();  // 1 byte header, 4 bytes length.
 
   if (password_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + password_.length();
+    msg_len += 5 + password_.length();  // 1 byte header, 4 bytes length.
 
   TF_RETURN_IF_ERROR(client_->WriteInt(msg_len));
   TF_RETURN_IF_ERROR(client_->WriteByte(1));
-  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version));
-  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version));
-  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMajorVersion));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMinorVersion));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolPatchVersion));
   TF_RETURN_IF_ERROR(client_->WriteByte(2));
   if (username_.empty()) {
-    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal));
   } else {
-    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal));
     TF_RETURN_IF_ERROR(client_->WriteInt(username_.length()));
     TF_RETURN_IF_ERROR(
-        client_->WriteData((uint8_t*)username_.c_str(), username_.length()));
+        client_->WriteData(reinterpret_cast<const uint8_t*>(username_.c_str()),
+                           username_.length()));
   }
 
   if (password_.empty()) {
-    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal));
   } else {
-    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal));
     TF_RETURN_IF_ERROR(client_->WriteInt(password_.length()));
     TF_RETURN_IF_ERROR(
-        client_->WriteData((uint8_t*)password_.c_str(), password_.length()));
+        client_->WriteData(reinterpret_cast<const uint8_t*>(password_.c_str()),
+                           password_.length()));
   }
 
   int32_t handshake_res_len;
@@ -221,9 +236,6 @@ Status IgniteDatasetIterator::Handshake() {
   uint8_t handshake_res;
   TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res));
 
-  LOG(INFO) << "Handshake length " << handshake_res_len << ", res "
-            << (int16_t)handshake_res;
-
   if (handshake_res != 1) {
     int16_t serv_ver_major;
     TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major));
@@ -234,26 +246,26 @@ Status IgniteDatasetIterator::Handshake() {
     uint8_t header;
     TF_RETURN_IF_ERROR(client_->ReadByte(&header));
 
-    if (header == string_val) {
+    if (header == kStringVal) {
       int32_t length;
       TF_RETURN_IF_ERROR(client_->ReadInt(&length));
+
       uint8_t* err_msg_c = new uint8_t[length];
+      auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
       TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length));
-      std::string err_msg((char*)err_msg_c, length);
-      delete[] err_msg_c;
-
-      return errors::Internal("Handshake Error [result=", handshake_res,
-                              ", version=", serv_ver_major, ".", serv_ver_minor,
-                              ".", serv_ver_patch, ", message='", err_msg,
-                              "']");
-    } else if (header == null_val) {
-      return errors::Internal("Handshake Error [result=", handshake_res,
-                              ", version=", serv_ver_major, ".", serv_ver_minor,
-                              ".", serv_ver_patch, "]");
+      string err_msg(reinterpret_cast<char*>(err_msg_c), length);
+
+      return errors::Unknown("Handshake Error [result=", handshake_res,
+                             ", version=", serv_ver_major, ".", serv_ver_minor,
+                             ".", serv_ver_patch, ", message='", err_msg, "']");
+    } else if (header == kNullVal) {
+      return errors::Unknown("Handshake Error [result=", handshake_res,
+                             ", version=", serv_ver_major, ".", serv_ver_minor,
+                             ".", serv_ver_patch, "]");
     } else {
-      return errors::Internal("Handshake Error [result=", handshake_res,
-                              ", version=", serv_ver_major, ".", serv_ver_minor,
-                              ".", serv_ver_patch, "]");
+      return errors::Unknown("Handshake Error [result=", handshake_res,
+                             ", version=", serv_ver_major, ".", serv_ver_minor,
+                             ".", serv_ver_patch, "]");
     }
   }
 
@@ -261,31 +273,26 @@ Status IgniteDatasetIterator::Handshake() {
 }
 
 Status IgniteDatasetIterator::ScanQuery() {
-  TF_RETURN_IF_ERROR(client_->WriteInt(25));                   // Message length
-  TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode));  // Operation code
-  TF_RETURN_IF_ERROR(client_->WriteLong(0));                   // Request ID
+  TF_RETURN_IF_ERROR(client_->WriteInt(kScanQueryReqLength));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kScanQueryOpcode));
+  TF_RETURN_IF_ERROR(client_->WriteLong(0));  // Request ID
   TF_RETURN_IF_ERROR(
       client_->WriteInt(JavaHashCode(cache_name_)));  // Cache name
   TF_RETURN_IF_ERROR(client_->WriteByte(0));          // Flags
-  TF_RETURN_IF_ERROR(client_->WriteByte(null_val));   // Filter object
+  TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal));   // Filter object
   TF_RETURN_IF_ERROR(client_->WriteInt(page_size_));  // Cursor page size
   TF_RETURN_IF_ERROR(client_->WriteInt(part_));       // part_ition to query
   TF_RETURN_IF_ERROR(client_->WriteByte(local_));     // local_ flag
 
-  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                           std::chrono::system_clock::now().time_since_epoch())
-                           .count();
-
+  uint64 wait_start = Env::Default()->NowMicros();
   int32_t res_len;
   TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
+  int64_t wait_stop = Env::Default()->NowMicros();
 
-  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                          std::chrono::system_clock::now().time_since_epoch())
-                          .count();
+  LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) / 1000 << " ms";
 
-  LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms";
-
-  if (res_len < 12) return errors::Internal("Scan Query Response is corrupted");
+  if (res_len < kMinResLength)
+    return errors::Unknown("Scan Query Response is corrupted");
 
   int64_t req_id;
   TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
@@ -297,78 +304,47 @@ Status IgniteDatasetIterator::ScanQuery() {
     uint8_t err_msg_header;
     TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
-    if (err_msg_header == string_val) {
+    if (err_msg_header == kStringVal) {
       int32_t err_msg_length;
       TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
       TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
-      std::string err_msg((char*)err_msg_c, err_msg_length);
-      delete[] err_msg_c;
+      string err_msg(reinterpret_cast<char*>(err_msg_c), err_msg_length);
 
-      return errors::Internal("Scan Query Error [status=", status, ", message=",
-                              err_msg, "]");
+      return errors::Unknown("Scan Query Error [status=", status, ", message=",
+                             err_msg, "]");
     }
-    return errors::Internal("Scan Query Error [status=", status, "]");
+    return errors::Unknown("Scan Query Error [status=", status, "]");
   }
 
   TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_));
 
-  LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened";
-
   int32_t row_cnt;
   TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder_ = res_len - 25;
-  page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
-  ptr_ = page_.get();
-
-  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                      std::chrono::system_clock::now().time_since_epoch())
-                      .count();
-
-  TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
-
-  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                     std::chrono::system_clock::now().time_since_epoch())
-                     .count();
-  ;
-
-  double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
-  double time_in_s = 1.0 * (stop - start) / 1000;
-  LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
-            << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
-
-  uint8_t last_page_b;
-  TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b));
-
-  last_page_ = !last_page_b;
+  int32_t page_size = res_len - kScanQueryResHeaderLength;
 
-  return Status::OK();
+  return ReceivePage(page_size);
 }
 
 Status IgniteDatasetIterator::LoadNextPage() {
-  TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
-  TF_RETURN_IF_ERROR(
-      client_->WriteShort(load_next_page_opcode));     // Operation code
+  TF_RETURN_IF_ERROR(client_->WriteInt(kLoadNextPageReqLength));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kLoadNextPageOpcode));
   TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
   TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Cursor ID
 
-  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                           std::chrono::system_clock::now().time_since_epoch())
-                           .count();
-
+  uint64 wait_start = Env::Default()->NowMicros();
   int32_t res_len;
   TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
+  uint64 wait_stop = Env::Default()->NowMicros();
 
-  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                          std::chrono::system_clock::now().time_since_epoch())
-                          .count();
+  LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) / 1000
+            << " ms";
 
-  LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms";
-
-  if (res_len < 12)
-    return errors::Internal("Load Next Page Response is corrupted");
+  if (res_len < kMinResLength)
+    return errors::Unknown("Load Next Page Response is corrupted");
 
   int64_t req_id;
   TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
@@ -380,41 +356,40 @@ Status IgniteDatasetIterator::LoadNextPage() {
     uint8_t err_msg_header;
     TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
-    if (err_msg_header == string_val) {
+    if (err_msg_header == kStringVal) {
       int32_t err_msg_length;
       TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
       TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
-      std::string err_msg((char*)err_msg_c, err_msg_length);
-      delete[] err_msg_c;
+      string err_msg(reinterpret_cast<char*>(err_msg_c), err_msg_length);
 
-      return errors::Internal("Load Next Page Error [status=", status,
-                              ", message=", err_msg, "]");
+      return errors::Unknown("Load Next Page Error [status=", status,
+                             ", message=", err_msg, "]");
     }
-    return errors::Internal("Load Next Page Error [status=", status, "]");
+    return errors::Unknown("Load Next Page Error [status=", status, "]");
   }
 
   int32_t row_cnt;
   TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder_ = res_len - 17;
+  int32_t page_size = res_len - kLoadNextPageResHeaderLength;
+
+  return ReceivePage(page_size);
+}
+
+Status IgniteDatasetIterator::ReceivePage(int32_t page_size) {
+  remainder_ = page_size;
   page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
   ptr_ = page_.get();
 
-  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                      std::chrono::system_clock::now().time_since_epoch())
-                      .count();
-
+  uint64 start = Env::Default()->NowMicros();
   TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
-
-  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                     std::chrono::system_clock::now().time_since_epoch())
-                     .count();
-  ;
+  uint64 stop = Env::Default()->NowMicros();
 
   double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
-  double time_in_s = 1.0 * (stop - start) / 1000;
+  double time_in_s = 1.0 * (stop - start) / 1000 / 1000;
   LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
             << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
 
@@ -426,7 +401,19 @@ Status IgniteDatasetIterator::LoadNextPage() {
   return Status::OK();
 }
 
-int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const {
+Status IgniteDatasetIterator::CheckTypes(const std::vector<int32_t>& types) {
+  if (schema_.size() != types.size())
+    return errors::Unknown("Object has unexpected schema");
+
+  for (size_t i = 0; i < schema_.size(); i++) {
+    if (schema_[i] != types[permutation_[i]])
+      return errors::Unknown("Object has unexpected schema");
+  }
+
+  return Status::OK();
+}
+
+int32_t IgniteDatasetIterator::JavaHashCode(string str) const {
   int32_t h = 0;
   for (char& c : str) {
     h = 31 * h + c;
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
index 5858dbfcb9..c499e2c9cc 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
@@ -13,19 +13,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_binary_object_parser.h"
-#include "ignite_client.h"
-#include "ignite_dataset.h"
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_
+
+#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
+#include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
 
 class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
  public:
-  IgniteDatasetIterator(const Params& params, std::string host, int32 port,
-                        std::string cache_name, bool local, int32 part,
-                        int32 page_size, std::string username,
-                        std::string password, std::string certfile,
-                        std::string keyfile, std::string cert_password,
+  IgniteDatasetIterator(const Params& params, string host, int32 port,
+                        string cache_name, bool local, int32 part,
+                        int32 page_size, string username, string password,
+                        string certfile, string keyfile, string cert_password,
                         std::vector<int32> schema,
                         std::vector<int32> permutation);
   ~IgniteDatasetIterator();
@@ -38,15 +41,28 @@ class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
                          IteratorStateReader* reader) override;
 
  private:
+  Status GetNextInternalWithValidState(IteratorContext* ctx,
+                                       std::vector<Tensor>* out_tensors,
+                                       bool* end_of_sequence);
+
+  Status EstablishConnection();
+  Status CloseConnection();
+  Status Handshake();
+  Status ScanQuery();
+  Status LoadNextPage();
+  Status ReceivePage(int32_t page_size);
+  Status CheckTypes(const std::vector<int32_t>& types);
+  int32_t JavaHashCode(string str) const;
+
   std::unique_ptr<Client> client_;
   BinaryObjectParser parser_;
 
-  const std::string cache_name_;
+  const string cache_name_;
   const bool local_;
   const int32 part_;
   const int32 page_size_;
-  const std::string username_;
-  const std::string password_;
+  const string username_;
+  const string password_;
   const std::vector<int32> schema_;
   const std::vector<int32> permutation_;
 
@@ -54,24 +70,30 @@ class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
   int64_t cursor_id_;
   bool last_page_;
 
+  bool valid_state_;
+
+  mutex mutex_;
+
   std::unique_ptr<uint8_t> page_;
   uint8_t* ptr_;
-
-  Status EstablishConnection();
-  Status CloseConnection();
-  Status Handshake();
-  Status ScanQuery();
-  Status LoadNextPage();
-  int32_t JavaHashCode(std::string str) const;
 };
 
-constexpr uint8_t null_val = 101;
-constexpr uint8_t string_val = 9;
-constexpr uint8_t protocol_major_version = 1;
-constexpr uint8_t protocol_minor_version = 1;
-constexpr uint8_t protocol_patch_version = 0;
-constexpr int16_t scan_query_opcode = 2000;
-constexpr int16_t load_next_page_opcode = 2001;
-constexpr int16_t close_connection_opcode = 0;
+constexpr uint8_t kNullVal = 101;
+constexpr uint8_t kStringVal = 9;
+constexpr uint8_t kProtocolMajorVersion = 1;
+constexpr uint8_t kProtocolMinorVersion = 1;
+constexpr uint8_t kProtocolPatchVersion = 0;
+constexpr int16_t kScanQueryOpcode = 2000;
+constexpr int16_t kLoadNextPageOpcode = 2001;
+constexpr int16_t kCloseConnectionOpcode = 0;
+constexpr int32_t kScanQueryReqLength = 25;
+constexpr int32_t kScanQueryResHeaderLength = 25;
+constexpr int32_t kLoadNextPageReqLength = 18;
+constexpr int32_t kLoadNextPageResHeaderLength = 17;
+constexpr int32_t kCloseConnectionReqLength = 18;
+constexpr int32_t kHandshakeReqDefaultLength = 8;
+constexpr int32_t kMinResLength = 12;
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index d03404a460..eeb29ef30b 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -13,29 +13,73 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
 #include <stdlib.h>
-#include "ignite_dataset.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
 namespace {
 
+Status SchemaToTypes(const std::vector<int32>& schema, DataTypeVector* dtypes) {
+  for (auto e : schema) {
+    if (e == BYTE || e == BYTE_ARR) {
+      dtypes->push_back(DT_UINT8);
+    } else if (e == SHORT || e == SHORT_ARR) {
+      dtypes->push_back(DT_INT16);
+    } else if (e == INT || e == INT_ARR) {
+      dtypes->push_back(DT_INT32);
+    } else if (e == LONG || e == LONG_ARR) {
+      dtypes->push_back(DT_INT64);
+    } else if (e == FLOAT || e == FLOAT_ARR) {
+      dtypes->push_back(DT_FLOAT);
+    } else if (e == DOUBLE || e == DOUBLE_ARR) {
+      dtypes->push_back(DT_DOUBLE);
+    } else if (e == USHORT || e == USHORT_ARR) {
+      dtypes->push_back(DT_UINT8);
+    } else if (e == BOOL || e == BOOL_ARR) {
+      dtypes->push_back(DT_BOOL);
+    } else if (e == STRING || e == STRING_ARR) {
+      dtypes->push_back(DT_STRING);
+    } else {
+      return errors::Unknown("Unexpected type in schema [type_id=", e, "]");
+    }
+  }
+
+  return Status::OK();
+}
+
+Status SchemaToShapes(const std::vector<int32>& schema,
+                      std::vector<PartialTensorShape>* shapes) {
+  for (auto e : schema) {
+    if (e >= 1 && e < 10) {
+      shapes->push_back(PartialTensorShape({}));
+    } else if (e >= 12 && e < 21) {
+      shapes->push_back(PartialTensorShape({-1}));
+    } else {
+      return errors::Unknown("Unexpected type in schema [type_id=", e, "]");
+    }
+  }
+
+  return Status::OK();
+}
+
 class IgniteDatasetOp : public DatasetOpKernel {
  public:
   using DatasetOpKernel::DatasetOpKernel;
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
-    std::string cache_name = "";
-    std::string host = "";
+    string cache_name = "";
+    string host = "";
     int32 port = -1;
     bool local = false;
     int32 part = -1;
     int32 page_size = -1;
-    std::string username = "";
-    std::string password = "";
-    std::string certfile = "";
-    std::string keyfile = "";
-    std::string cert_password = "";
+    string username = "";
+    string password = "";
+    string certfile = "";
+    string keyfile = "";
+    string cert_password = "";
 
     const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME");
     const char* env_host = std::getenv("IGNITE_DATASET_HOST");
@@ -50,15 +94,15 @@ class IgniteDatasetOp : public DatasetOpKernel {
     const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD");
 
     if (env_cache_name)
-      cache_name = std::string(env_cache_name);
+      cache_name = string(env_cache_name);
     else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cache_name",
-                                                           &cache_name));
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<string>(ctx, "cache_name", &cache_name));
 
     if (env_host)
-      host = std::string(env_host);
+      host = string(env_host);
     else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "host", &host));
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<string>(ctx, "host", &host));
 
     if (env_port)
       port = atoi(env_port);
@@ -82,34 +126,34 @@ class IgniteDatasetOp : public DatasetOpKernel {
                      ParseScalarArgument<int32>(ctx, "page_size", &page_size));
 
     if (env_username)
-      username = std::string(env_username);
+      username = string(env_username);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "username", &username));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "username", &username));
 
     if (env_password)
-      password = std::string(env_password);
+      password = string(env_password);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "password", &password));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "password", &password));
 
     if (env_certfile)
-      certfile = std::string(env_certfile);
+      certfile = string(env_certfile);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "certfile", &certfile));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "certfile", &certfile));
 
     if (env_keyfile)
-      keyfile = std::string(env_keyfile);
+      keyfile = string(env_keyfile);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "keyfile", &keyfile));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "keyfile", &keyfile));
 
     if (env_cert_password)
-      cert_password = std::string(env_cert_password);
+      cert_password = string(env_cert_password);
     else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cert_password",
-                                                           &cert_password));
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<string>(ctx, "cert_password",
+                                                      &cert_password));
 
     const Tensor* schema_tensor;
     OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor));
@@ -124,19 +168,28 @@ class IgniteDatasetOp : public DatasetOpKernel {
 
     const Tensor* permutation_tensor;
     OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor));
-    OP_REQUIRES(ctx, schema_tensor->dims() == 1,
+    OP_REQUIRES(ctx, permutation_tensor->dims() == 1,
                 errors::InvalidArgument("`permutation` must be a vector."));
 
     std::vector<int32> permutation;
-    permutation.reserve(permutation_tensor->NumElements());
+    permutation.resize(permutation_tensor->NumElements());
     for (int i = 0; i < permutation_tensor->NumElements(); i++) {
-      permutation.push_back(permutation_tensor->flat<int32>()(i));
+      // Inversed permutation.
+      permutation[permutation_tensor->flat<int32>()(i)] = i;
     }
 
-    *output =
-        new IgniteDataset(ctx, cache_name, host, port, local, part, page_size,
-                          username, password, certfile, keyfile, cert_password,
-                          std::move(schema), std::move(permutation));
+    DataTypeVector dtypes;
+    std::vector<PartialTensorShape> shapes;
+
+    OP_REQUIRES_OK(ctx, SchemaToTypes(schema, &dtypes));
+    OP_REQUIRES_OK(ctx, SchemaToShapes(schema, &shapes));
+
+    *output = new IgniteDataset(
+        ctx, std::move(cache_name), std::move(host), port, local, part,
+        page_size, std::move(username), std::move(password),
+        std::move(certfile), std::move(keyfile), std::move(cert_password),
+        std::move(schema), std::move(permutation), std::move(dtypes),
+        std::move(shapes));
   }
 };
 
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
index 6f417a3cb5..750ebe605a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -13,28 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_client.h"
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_
 
-#include <string>
+#include "tensorflow/contrib/ignite/kernels/ignite_client.h"
 
 namespace tensorflow {
 
 class PlainClient : public Client {
  public:
-  PlainClient(std::string host, int port);
+  PlainClient(string host, int port, bool big_endian);
   ~PlainClient();
 
   virtual Status Connect();
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, int32_t length);
-  virtual Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length);
+  virtual Status WriteData(const uint8_t* buf, const int32_t length);
 
  private:
-  const std::string host_;
+  const string host_;
   const int port_;
   int sock_;
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
index a4c58a9563..e16c92307d 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_plain_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h"
 
 #include <arpa/inet.h>
 #include <netdb.h>
@@ -31,8 +31,8 @@ limitations under the License.
 
 namespace tensorflow {
 
-PlainClient::PlainClient(std::string host, int port)
-    : host_(host), port_(port), sock_(-1) {}
+PlainClient::PlainClient(string host, int port, bool big_endian)
+    : Client(big_endian), host_(std::move(host)), port_(port), sock_(-1) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
@@ -87,7 +87,7 @@ bool PlainClient::IsConnected() { return sock_ != -1; }
 
 int PlainClient::GetSocketDescriptor() { return sock_; }
 
-Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t* buf, const int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
@@ -95,7 +95,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
 
     if (res < 0)
       return errors::Internal("Error occured while reading from socket: ", res,
-                              ", ", std::string(strerror(errno)));
+                              ", ", string(strerror(errno)));
 
     if (res == 0) return errors::Internal("Server closed connection");
 
@@ -106,7 +106,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
   return Status::OK();
 }
 
-Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
+Status PlainClient::WriteData(const uint8_t* buf, const int32_t length) {
   int sent = 0;
 
   while (sent < length) {
@@ -114,7 +114,7 @@ Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
 
     if (res < 0)
       return errors::Internal("Error occured while writing into socket: ", res,
-                              ", ", std::string(strerror(errno)));
+                              ", ", string(strerror(errno)));
 
     sent += res;
     buf += res;
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 8182fde6d9..9cd08a7779 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_plain_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h"
 
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
@@ -29,8 +29,11 @@ limitations under the License.
 
 namespace tensorflow {
 
-PlainClient::PlainClient(std::string host, int port)
-    : host_(host), port_(port), sock_(INVALID_SOCKET) {}
+PlainClient::PlainClient(string host, int port, bool big_endian)
+    : Client(big_endian),
+      host_(std::move(host)),
+      port_(port),
+      sock_(INVALID_SOCKET) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
@@ -55,6 +58,8 @@ Status PlainClient::Connect() {
                     &result);
   if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res);
 
+  auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); });
+
   for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
     sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
     if (sock_ == INVALID_SOCKET) {
@@ -72,8 +77,6 @@ Status PlainClient::Connect() {
     break;
   }
 
-  freeaddrinfo(result);
-
   if (sock_ == INVALID_SOCKET) {
     WSACleanup();
     return errors::Internal("Unable to connect to server");
@@ -99,7 +102,7 @@ bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; }
 
 int PlainClient::GetSocketDescriptor() { return sock_; }
 
-Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t *buf, const int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
@@ -117,7 +120,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   return Status::OK();
 }
 
-Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
+Status PlainClient::WriteData(const uint8_t *buf, const int32_t length) {
   int sent = 0;
 
   while (sent < length) {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
index a2bc6b9609..28db509eaa 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_ssl_wrapper.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h"
 
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
@@ -29,13 +29,15 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) {
   return (strlen(buf));
 }
 
-SslWrapper::SslWrapper(std::shared_ptr<Client> client, std::string certfile,
-                       std::string keyfile, std::string cert_password)
-    : client_(client),
-      certfile_(certfile),
-      keyfile_(keyfile),
-      cert_password_(cert_password),
-      ctx_(NULL) {}
+SslWrapper::SslWrapper(std::shared_ptr<Client> client, string certfile,
+                       string keyfile, string cert_password, bool big_endian)
+    : Client(big_endian),
+      client_(client),
+      certfile_(std::move(certfile)),
+      keyfile_(std::move(keyfile)),
+      cert_password_(std::move(cert_password)),
+      ctx_(nullptr),
+      ssl_(nullptr) {}
 
 SslWrapper::~SslWrapper() {
   if (IsConnected()) {
@@ -43,9 +45,14 @@ SslWrapper::~SslWrapper() {
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 
-  if (ctx_ != NULL) {
+  if (ctx_ != nullptr) {
     SSL_CTX_free(ctx_);
-    ctx_ = NULL;
+    ctx_ = nullptr;
+  }
+
+  if (ssl_ != nullptr) {
+    SSL_free(ssl_);
+    ssl_ = nullptr;
   }
 }
 
@@ -63,7 +70,7 @@ Status SslWrapper::InitSslContext() {
     return errors::Internal("Couldn't load cetificate chain (file '", certfile_,
                             "')");
 
-  std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_;
+  string private_key_file = keyfile_.empty() ? certfile_ : keyfile_;
   if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(),
                                   SSL_FILETYPE_PEM) != 1)
     return errors::Internal("Couldn't load private key (file '",
@@ -94,6 +101,7 @@ Status SslWrapper::Connect() {
 
 Status SslWrapper::Disconnect() {
   SSL_free(ssl_);
+  ssl_ = nullptr;
 
   LOG(INFO) << "SSL connection closed";
 
@@ -104,7 +112,7 @@ bool SslWrapper::IsConnected() { return client_->IsConnected(); }
 
 int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); }
 
-Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
+Status SslWrapper::ReadData(uint8_t *buf, const int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
@@ -123,7 +131,7 @@ Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
   return Status::OK();
 }
 
-Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
+Status SslWrapper::WriteData(const uint8_t *buf, const int32_t length) {
   int sent = 0;
 
   while (sent < length) {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
index bbba6cc181..d59ce91aba 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -13,35 +13,39 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_client.h"
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_
+
+#include "tensorflow/contrib/ignite/kernels/ignite_client.h"
 
 #include <openssl/ssl.h>
-#include <string>
 
 namespace tensorflow {
 
 class SslWrapper : public Client {
  public:
-  SslWrapper(std::shared_ptr<Client> client, std::string certfile,
-             std::string keyfile, std::string cert_password);
+  SslWrapper(std::shared_ptr<Client> client, string certfile, string keyfile,
+             string cert_password, bool big_endian);
   ~SslWrapper();
 
   virtual Status Connect();
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, int32_t length);
-  virtual Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length);
+  virtual Status WriteData(const uint8_t* buf, const int32_t length);
 
  private:
+  Status InitSslContext();
+
   std::shared_ptr<Client> client_;
-  std::string certfile_;
-  std::string keyfile_;
-  std::string cert_password_;
+  string certfile_;
+  string keyfile_;
+  string cert_password_;
   SSL_CTX* ctx_;
   SSL* ssl_;
-
-  Status InitSslContext();
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_
\ No newline at end of file
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
index fb16b290b1..7d18df11aa 100644
--- a/tensorflow/contrib/ignite/ops/dataset_ops.cc
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -37,6 +37,8 @@ REGISTER_OP("IgniteDataset")
     .SetIsStateful()
     .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
+IgniteDataset that allows to get data from Apache Ignite.
+
 Apache Ignite is a memory-centric distributed database, caching, and processing
 platform for transactional, analytical, and streaming workloads, delivering 
 in-memory speeds at petabyte scale. This contrib package contains an 
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index 60003ca3b7..c0e24b1c69 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -41,19 +41,19 @@ class Readable():
 
   def read_byte(self):
     """Reads and returnes byte."""
-    return self.__read("b", 1)
+    return self._read("b", 1)
 
   def read_short(self):
     """Reads and returns short (2 bytes, little-endian)."""
-    return self.__read("h", 2)
+    return self._read("h", 2)
 
   def read_int(self):
     """Reads and returns int (4 bytes, little-endian)."""
-    return self.__read("i", 4)
+    return self._read("i", 4)
 
   def read_long(self):
     """Reads and returns long (8 bytes, little-endian)."""
-    return self.__read("q", 8)
+    return self._read("q", 8)
 
   def skip(self, length):
     """Skips the specified number of bytes."""
@@ -64,7 +64,7 @@ class Readable():
     """Reads the specified number of bytes and returns them as a buffer."""
     return None
 
-  def __read(self, data_type, length):
+  def _read(self, data_type, length):
     """Reads, unpacks and returns specified type (little-endian)."""
     data_buffer = self.read_data(length)
     return struct.unpack("<" + data_type, data_buffer)[0]
@@ -116,10 +116,10 @@ class TcpClient(Readable):
       self.sock = context.wrap_socket(self.sock)
     else:
       if keyfile is not None:
-        raise Exception("SSL is disabled, keyfile must not be specified \
+        raise RuntimeError("SSL is disabled, keyfile must not be specified \
           (to enable SSL specify certfile)")
       if password is not None:
-        raise Exception("SSL is disabled, password must not be specified \
+        raise RuntimeError("SSL is disabled, password must not be specified \
           (to enable SSL specify certfile)")
 
     self.host = host
@@ -136,19 +136,19 @@ class TcpClient(Readable):
 
   def write_byte(self, v):
     """Writes the specified byte."""
-    self.__write(v, "b")
+    self._write(v, "b")
 
   def write_short(self, v):
     """Writes the specified short (2 bytes, little-endian)."""
-    self.__write(v, "h")
+    self._write(v, "h")
 
   def write_int(self, v):
     """Writes the specified short (4 bytes, little-endian)."""
-    self.__write(v, "i")
+    self._write(v, "i")
 
   def write_long(self, v):
     """Writes the specified int (8 bytes, little-endian)."""
-    self.__write(v, "q")
+    self._write(v, "q")
 
   def write_string(self, v):
     """Writes the specified string."""
@@ -167,7 +167,7 @@ class TcpClient(Readable):
         data_buffer += buf
     return data_buffer
 
-  def __write(self, value, data_type):
+  def _write(self, value, data_type):
     """Packs and writes data using the specified type (little-endian)."""
     data_buffer = struct.pack("<" + data_type, value)
     self.sock.sendall(data_buffer)
@@ -193,6 +193,7 @@ class BinaryField():
 # Binary types defined in Apache Ignite Thin client and supported by
 # TensorFlow on Apache Ignite, see
 # https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+# True means that type is a vector, False means type is scalar.
 types = {
     1: (dtypes.uint8, False),
     2: (dtypes.int16, False),
@@ -248,13 +249,13 @@ class TypeTreeNode():
        dataset.
     """
     if self.fields is None:
-      object_type = types[self.type_id]
-      if object_type is not None:
+      if self.type_id in types:
+        object_type = types[self.type_id]
         is_array = object_type[1]
         if is_array:
           return tensor_shape.TensorShape([None])
         return tensor_shape.TensorShape([])
-      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+      raise ValueError("Unsupported type [type_id=%d]" % self.type_id)
     output_shapes = {}
     for field in self.fields:
       output_shapes[field.name] = field.to_output_shapes()
@@ -265,10 +266,10 @@ class TypeTreeNode():
        dataset.
     """
     if self.fields is None:
-      object_type = types[self.type_id]
-      if object_type is not None:
+      if self.type_id in types:
+        object_type = types[self.type_id]
         return object_type[0]
-      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+      raise ValueError("Unsupported type [type_id=%d]" % self.type_id)
     else:
       output_types = {}
       for field in self.fields:
@@ -276,11 +277,11 @@ class TypeTreeNode():
       return output_types
 
   def to_flat(self):
-    """Returns a list of leaf node types."""
+    """Returns a list of node types."""
     return self.to_flat_rec([])
 
   def to_permutation(self):
-    """Returns a permutation that should be applied to order object leafs."""
+    """Returns a permutation that should be applied to order object leaves."""
     correct_order_dict = {}
     self.traversal_rec(correct_order_dict, 0)
     object_order = []
@@ -288,9 +289,10 @@ class TypeTreeNode():
     return [correct_order_dict[o] for o in object_order]
 
   def to_flat_rec(self, flat):
-    """Formats a list of leaf node types."""
-    flat.append(self.type_id)
-    if self.fields is not None:
+    """Formats a list of leaf node types in pre-order."""
+    if self.fields is None:
+      flat.append(self.type_id)
+    else:
       for field in self.fields:
         field.to_flat_rec(flat)
     return flat
@@ -320,8 +322,8 @@ class IgniteClient(TcpClient):
      have the same structure (homogeneous objects) and the cache contains at
      least one object.
   """
-  def __init__(self, host, port, username=None, password=None, certfile=None,\
-    keyfile=None, cert_password=None):
+  def __init__(self, host, port, username=None, password=None, certfile=None,
+               keyfile=None, cert_password=None):
     """Constructs a new instance of IgniteClient.
 
     Args:
@@ -385,12 +387,13 @@ class IgniteClient(TcpClient):
       serv_ver_major = self.read_short()
       serv_ver_minor = self.read_short()
       serv_ver_patch = self.read_short()
-      err_msg = self.__parse_string()
+      err_msg = self._parse_string()
       if err_msg is None:
-        raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \
-            % (res, serv_ver_major, serv_ver_minor, serv_ver_patch))
+        raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d]"
+                           % (res, serv_ver_major, serv_ver_minor,
+                              serv_ver_patch))
       else:
-        raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \
+        raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d, \
             message='%s']" % (
                 res,
                 serv_ver_major,
@@ -403,7 +406,7 @@ class IgniteClient(TcpClient):
     """Collects type information about objects stored in the specified
        cache.
     """
-    cache_name_hash = self.__java_hash_code(cache_name)
+    cache_name_hash = self._java_hash_code(cache_name)
     self.write_int(25)        # Message length
     self.write_short(2000)      # Operation code
     self.write_long(0)        # Request ID
@@ -419,18 +422,18 @@ class IgniteClient(TcpClient):
     status = self.read_int()
 
     if status != 0:
-      err_msg = self.__parse_string()
+      err_msg = self._parse_string()
       if err_msg is None:
-        raise Exception("Scan Query Error [status=%s]" % status)
+        raise RuntimeError("Scan Query Error [status=%s]" % status)
       else:
-        raise Exception("Scan Query Error [status=%s, message='%s']" \
-            % (status, err_msg))
+        raise RuntimeError("Scan Query Error [status=%s, message='%s']"
+                           % (status, err_msg))
 
     self.read_long()          # Cursor id
     row_count = self.read_int()
 
     if row_count == 0:
-      raise Exception("Scan Query returned empty result, so it's \
+      raise RuntimeError("Scan Query returned empty result, so it's \
         impossible to derive the cache type")
 
     payload = DataBuffer(self.read_data(result_length - 25))
@@ -438,20 +441,20 @@ class IgniteClient(TcpClient):
     self.read_byte()          # Next page
 
     res = TypeTreeNode("root", 0, [
-        self.__collect_types("key", payload),
-        self.__collect_types("val", payload)
+        self._collect_types("key", payload),
+        self._collect_types("val", payload)
     ], [0, 1])
 
     return res
 
-  def __java_hash_code(self, s):
+  def _java_hash_code(self, s):
     """Computes hash code of the specified string using Java code."""
     h = 0
     for c in s:
       h = (31 * h + ord(c)) & 0xFFFFFFFF
     return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
 
-  def __collect_types(self, field_name, data):
+  def _collect_types(self, field_name, data):
     """Extracts type information from the specified object."""
     type_id = data.read_byte()
 
@@ -570,7 +573,7 @@ class IgniteClient(TcpClient):
         elif header == 101:
           pass
         else:
-          raise Exception("Unknown binary type when expected string \
+          raise RuntimeError("Unknown binary type when expected string \
             [type_id=%d]" % header)
       return TypeTreeNode(field_name, type_id)
 
@@ -591,7 +594,7 @@ class IgniteClient(TcpClient):
       length = data.read_int()
       inner_data = data.read_data(length)
       data.read_int()   # Offset
-      return self.__collect_types(field_name, DataBuffer(inner_data))
+      return self._collect_types(field_name, DataBuffer(inner_data))
 
     # Complex Object.
     if type_id == 103:
@@ -603,11 +606,11 @@ class IgniteClient(TcpClient):
       data.read_int()   # Object schema id
       obj_schema_offset = data.read_int()
 
-      obj_type = self.__get_type(obj_type_id)
+      obj_type = self._get_type(obj_type_id)
       children = []
 
       for obj_field in obj_type.fields:
-        child = self.__collect_types(obj_field.field_name, data)
+        child = self._collect_types(obj_field.field_name, data)
         children.append(child)
 
       children_sorted = sorted(children, key=lambda child: child.name)
@@ -618,9 +621,9 @@ class IgniteClient(TcpClient):
 
       return TypeTreeNode(field_name, type_id, children, permutation)
 
-    raise Exception("Unknown binary type [type_id=%d]" % type_id)
+    raise RuntimeError("Unknown binary type [type_id=%d]" % type_id)
 
-  def __get_type(self, type_id):
+  def _get_type(self, type_id):
     """Queries Apache Ignite information about type by type id."""
     self.write_int(14)      # Message length
     self.write_short(3002)  # Operation code
@@ -632,25 +635,25 @@ class IgniteClient(TcpClient):
     status = self.read_int()
 
     if status != 0:
-      err_msg = self.__parse_string()
+      err_msg = self._parse_string()
       if err_msg is None:
-        raise Exception("Get Binary Type Error [status=%d, message='%s']" \
-            % (status, err_msg))
+        raise RuntimeError("Get Binary Type Error [status=%d, message='%s']"
+                           % (status, err_msg))
       else:
-        raise Exception("Get Binary Type Error [status=%d]" % status)
+        raise RuntimeError("Get Binary Type Error [status=%d]" % status)
 
     binary_type_exists = self.read_byte()
 
     if binary_type_exists == 0:
-      raise Exception("Binary type not found [type_id=%d] " % type_id)
+      raise RuntimeError("Binary type not found [type_id=%d] " % type_id)
 
     binary_type_id = self.read_int()
-    binary_type_name = self.__parse_string()
-    self.__parse_string()   # Affinity field name
+    binary_type_name = self._parse_string()
+    self._parse_string()   # Affinity field name
 
     fields = []
     for _ in range(self.read_int()):
-      field_name = self.__parse_string()
+      field_name = self._parse_string()
       field_type_id = self.read_int()
       field_id = self.read_int()
 
@@ -659,7 +662,7 @@ class IgniteClient(TcpClient):
 
     is_enum = self.read_byte()
     if is_enum == 1:
-      raise Exception("Enum fields are not supported yet")
+      raise RuntimeError("Enum fields are not supported yet")
 
     schema_cnt = self.read_int()
     for _ in range(schema_cnt):
@@ -669,7 +672,7 @@ class IgniteClient(TcpClient):
 
     return BinaryType(binary_type_id, binary_type_name, fields)
 
-  def __parse_string(self):
+  def _parse_string(self):
     """Parses string."""
     header = self.read_byte()
     if header == 9:
@@ -677,8 +680,8 @@ class IgniteClient(TcpClient):
       return self.read_data(length).decode("utf-8")
     if header == 101:
       return None
-    raise Exception("Unknown binary type when expected string [type_id=%d]" \
-        % header)
+    raise RuntimeError("Unknown binary type when expected string [type_id=%d]"
+                       % header)
 
 class IgniteDataset(Dataset):
   """Apache Ignite is a memory-centric distributed database, caching, and
@@ -692,9 +695,9 @@ class IgniteDataset(Dataset):
      Ignite Binary Client Protocol.
   """
 
-  def __init__(self, cache_name, host="localhost", port=10800, local=False,\
-    part=-1, page_size=100, username=None, password=None, certfile=None,\
-    keyfile=None, cert_password=None):
+  def __init__(self, cache_name, host="localhost", port=10800, local=False,
+               part=-1, page_size=100, username=None, password=None,
+               certfile=None, keyfile=None, cert_password=None):
     """Create a IgniteDataset.
 
     Args:
@@ -716,39 +719,44 @@ class IgniteDataset(Dataset):
     """
     super(IgniteDataset, self).__init__()
 
-    with IgniteClient(host, port, username, password, certfile, keyfile,\
-        cert_password) as client:
+    with IgniteClient(host, port, username, password, certfile, keyfile,
+                      cert_password) as client:
       client.handshake()
       self.cache_type = client.get_cache_type(cache_name)
 
-    self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\
-        name="cache_name")
+    self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,
+                                            name="cache_name")
     self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host")
     self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port")
     self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local")
     self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part")
-    self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\
-        name="page_size")
-    self.username = ops.convert_to_tensor("" if username is None else username,\
-        dtype=dtypes.string, name="username")
-    self.password = ops.convert_to_tensor("" if password is None else password,\
-        dtype=dtypes.string, name="password")
-    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\
-        dtype=dtypes.string, name="certfile")
-    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\
-        dtype=dtypes.string, name="keyfile")
-    self.cert_password = ops.convert_to_tensor("" if cert_password is None\
-        else cert_password, dtype=dtypes.string, name="cert_password")
-    self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\
-        dtype=dtypes.int32, name="schema")
-    self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\
-        dtype=dtypes.int32, name="permutation")
+    self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,
+                                           name="page_size")
+    self.username = ops.convert_to_tensor("" if username is None else username,
+                                          dtype=dtypes.string, name="username")
+    self.password = ops.convert_to_tensor("" if password is None else password,
+                                          dtype=dtypes.string, name="password")
+    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,
+                                          dtype=dtypes.string, name="certfile")
+    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,
+                                         dtype=dtypes.string, name="keyfile")
+    self.cert_password = ops.convert_to_tensor("" if cert_password is None
+                                               else cert_password,
+                                               dtype=dtypes.string,
+                                               name="cert_password")
+    self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),
+                                        dtype=dtypes.int32, name="schema")
+    self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),
+                                             dtype=dtypes.int32,
+                                             name="permutation")
 
   def _as_variant_tensor(self):
-    return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\
-        self.port, self.local, self.part, self.page_size, self.username,\
-        self.password, self.certfile, self.keyfile, self.cert_password,\
-        self.schema, self.permutation)
+    return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,
+                                          self.port, self.local, self.part,
+                                          self.page_size, self.username,
+                                          self.password, self.certfile,
+                                          self.keyfile, self.cert_password,
+                                          self.schema, self.permutation)
 
   @property
   def output_classes(self):
-- 
GitLab


From 49410e6bbed9020d5705303a533d43312c46f886 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 12 Sep 2018 17:42:46 +0000
Subject: [PATCH 0118/1357] Fix pylint error

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/tensor_array_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index b47e750f4b..0ad2063558 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -1506,7 +1506,7 @@ class TensorArrayTest(test.TestCase):
 
   def testTensorArrayInt64GPU(self):
     if not test.is_gpu_available():
-       return
+      return
     with self.test_session(use_gpu=True, force_gpu=True) as sess:
       value = array_ops.placeholder(dtypes.int64)
       ta = tensor_array_ops.TensorArray(dtype=dtypes.int64, size=2)
-- 
GitLab


From f832a9b3743fbb160eff5e9775457b4769ea2e81 Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Wed, 12 Sep 2018 12:49:41 -0700
Subject: [PATCH 0119/1357] Update RELEASE.md

---
 RELEASE.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/RELEASE.md b/RELEASE.md
index 763ef3b279..bdc23795e5 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,9 @@
+# Release 1.10.1
+## Bug Fixes and Other Changes
+
+* `tf.keras`:
+  * Fixing keras on Cloud TPUs. No new binaries will be built for Windows.
+
 # Release 1.10.0
 
 ## Major Features And Improvements
-- 
GitLab


From 626bc997c28e1dfeaa85041e6c5a057fec7e0a02 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 00:05:23 -0700
Subject: [PATCH 0120/1357] Move from deprecated self.test_session() to
 self.cached_session().

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 212766976
---
 .../python/kernel_tests/accumulate_n_test.py  |  12 +-
 .../python/kernel_tests/ackermann_test.py     |   2 +-
 .../python/kernel_tests/argmax_op_test.py     |   6 +-
 .../python/kernel_tests/array_ops_test.py     |  56 ++--
 .../python/kernel_tests/as_string_op_test.py  |  12 +-
 .../kernel_tests/atrous_convolution_test.py   |   2 +-
 .../python/kernel_tests/attention_ops_test.py |   4 +-
 .../python/kernel_tests/barrier_ops_test.py   |  32 +--
 .../python/kernel_tests/base64_ops_test.py    |   6 +-
 .../python/kernel_tests/basic_gpu_test.py     |   4 +-
 .../kernel_tests/batch_gather_op_test.py      |   2 +-
 .../kernel_tests/batchtospace_op_test.py      |   6 +-
 .../python/kernel_tests/bcast_ops_test.py     |   4 +-
 .../python/kernel_tests/betainc_op_test.py    |  12 +-
 .../python/kernel_tests/bincount_op_test.py   |   2 +-
 .../candidate_sampler_ops_test.py             |  12 +-
 .../python/kernel_tests/cast_op_test.py       |  10 +-
 .../kernel_tests/checkpoint_ops_test.py       |  32 +--
 .../python/kernel_tests/clip_ops_test.py      |   4 +-
 .../python/kernel_tests/concat_op_test.py     |  28 +-
 .../python/kernel_tests/cond_v2_test.py       |   4 +-
 .../conditional_accumulator_test.py           |  38 +--
 .../kernel_tests/confusion_matrix_test.py     |  28 +-
 .../python/kernel_tests/constant_op_test.py   |  52 ++--
 .../kernel_tests/control_flow_ops_py_test.py  | 248 ++++++++---------
 tensorflow/python/kernel_tests/conv1d_test.py |   2 +-
 .../conv2d_backprop_filter_grad_test.py       |   2 +-
 .../kernel_tests/conv2d_transpose_test.py     |   8 +-
 .../conv3d_backprop_filter_v2_grad_test.py    |   2 +-
 .../kernel_tests/conv3d_transpose_test.py     |  10 +-
 .../python/kernel_tests/conv_ops_3d_test.py   |   4 +-
 .../python/kernel_tests/conv_ops_test.py      |   4 +-
 .../python/kernel_tests/cross_grad_test.py    |   2 +-
 .../python/kernel_tests/cwise_ops_test.py     |  56 ++--
 .../python/kernel_tests/decode_bmp_op_test.py |   4 +-
 .../kernel_tests/decode_compressed_op_test.py |   4 +-
 .../python/kernel_tests/decode_csv_op_test.py |   2 +-
 .../kernel_tests/decode_image_op_test.py      |   2 +-
 .../python/kernel_tests/decode_png_op_test.py |   2 +-
 .../python/kernel_tests/decode_raw_op_test.py |  12 +-
 .../dense_update_ops_no_tsan_test.py          |   8 +-
 .../kernel_tests/dense_update_ops_test.py     |   6 +-
 .../kernel_tests/division_future_test.py      |   2 +-
 .../python/kernel_tests/division_past_test.py |   2 +-
 .../python/kernel_tests/duplicate_op_test.py  |   2 +-
 .../kernel_tests/dynamic_partition_op_test.py |   8 +-
 .../kernel_tests/dynamic_stitch_op_test.py    |   4 +-
 .../python/kernel_tests/embedding_ops_test.py |  60 ++--
 .../extract_image_patches_grad_test.py        |   2 +-
 .../python/kernel_tests/fft_ops_test.py       |   4 +-
 .../python/kernel_tests/fifo_queue_test.py    | 128 ++++-----
 .../fractional_avg_pool_op_test.py            |  18 +-
 .../fractional_max_pool_op_test.py            |  18 +-
 .../python/kernel_tests/gather_op_test.py     |   4 +-
 .../kernel_tests/gradient_correctness_test.py |   8 +-
 .../kernel_tests/identity_n_op_py_test.py     |   8 +-
 .../kernel_tests/identity_op_py_test.py       |  10 +-
 .../python/kernel_tests/in_topk_op_test.py    |   6 +-
 .../python/kernel_tests/init_ops_test.py      |   2 +-
 .../python/kernel_tests/inplace_ops_test.py   |   2 +-
 tensorflow/python/kernel_tests/io_ops_test.py |   8 +-
 .../python/kernel_tests/linalg_grad_test.py   |   2 +-
 .../python/kernel_tests/linalg_ops_test.py    |   2 +-
 .../python/kernel_tests/listdiff_op_test.py   |   2 +-
 .../python/kernel_tests/logging_ops_test.py   |   4 +-
 .../python/kernel_tests/lookup_ops_test.py    | 156 +++++------
 tensorflow/python/kernel_tests/losses_test.py | 216 +++++++--------
 .../python/kernel_tests/manip_ops_test.py     |  16 +-
 .../python/kernel_tests/matmul_op_test.py     |   2 +-
 .../kernel_tests/matrix_inverse_op_test.py    |   2 +-
 .../matrix_triangular_solve_op_test.py        |   6 +-
 .../python/kernel_tests/metrics_test.py       | 258 +++++++++---------
 tensorflow/python/kernel_tests/pad_op_test.py |   2 +-
 .../kernel_tests/padding_fifo_queue_test.py   | 124 ++++-----
 .../parse_single_example_op_test.py           |   4 +-
 .../python/kernel_tests/parsing_ops_test.py   |  18 +-
 .../partitioned_variables_test.py             |  40 +--
 .../kernel_tests/priority_queue_test.py       |  20 +-
 .../python/kernel_tests/reader_ops_test.py    |  36 +--
 .../python/kernel_tests/record_input_test.py  |  14 +-
 .../kernel_tests/reduce_join_op_test.py       |  16 +-
 .../python/kernel_tests/reduction_ops_test.py |  30 +-
 .../kernel_tests/regex_full_match_op_test.py  |   6 +-
 .../python/kernel_tests/relu_op_test.py       |  36 +--
 .../python/kernel_tests/reshape_op_test.py    |   2 +-
 .../kernel_tests/reverse_sequence_op_test.py  |   4 +-
 .../kernel_tests/scatter_nd_ops_test.py       |  32 +--
 .../segment_reduction_ops_test.py             |  12 +-
 .../python/kernel_tests/session_ops_test.py   |  32 +--
 tensorflow/python/kernel_tests/sets_test.py   |  10 +-
 .../python/kernel_tests/shape_ops_test.py     |  34 +--
 .../python/kernel_tests/slice_op_test.py      |   4 +-
 .../python/kernel_tests/softmax_op_test.py    |   4 +-
 .../python/kernel_tests/softplus_op_test.py   |   8 +-
 .../python/kernel_tests/softsign_op_test.py   |   4 +-
 .../kernel_tests/spacetobatch_op_test.py      |   4 +-
 .../sparse_conditional_accumulator_test.py    |  40 +--
 .../kernel_tests/sparse_cross_op_test.py      |  34 +--
 .../kernel_tests/sparse_matmul_op_test.py     |   2 +-
 .../python/kernel_tests/sparse_ops_test.py    |   2 +-
 .../sparse_to_dense_op_py_test.py             |  16 +-
 .../python/kernel_tests/sparsemask_op_test.py |   2 +-
 .../kernel_tests/string_join_op_test.py       |   2 +-
 .../kernel_tests/string_length_op_test.py     |   2 +-
 .../kernel_tests/string_split_op_test.py      |  30 +-
 .../kernel_tests/string_strip_op_test.py      |   6 +-
 .../string_to_hash_bucket_op_test.py          |  14 +-
 .../kernel_tests/string_to_number_op_test.py  |   2 +-
 .../python/kernel_tests/substr_op_test.py     |  28 +-
 .../python/kernel_tests/summary_ops_test.py   |   6 +-
 .../kernel_tests/summary_tensor_op_test.py    |  14 +-
 .../python/kernel_tests/tensordot_op_test.py  |   6 +-
 .../python/kernel_tests/transpose_op_test.py  |   4 +-
 .../python/kernel_tests/unique_op_test.py     |  20 +-
 .../python/kernel_tests/unstack_op_test.py    |   8 +-
 .../python/kernel_tests/variable_ops_test.py  |   4 +-
 .../kernel_tests/variable_scope_test.py       |  60 ++--
 .../python/kernel_tests/variables_test.py     |  58 ++--
 .../kernel_tests/weights_broadcast_test.py    |   8 +-
 .../python/kernel_tests/xent_op_test.py       |  10 +-
 120 files changed, 1292 insertions(+), 1292 deletions(-)

diff --git a/tensorflow/python/kernel_tests/accumulate_n_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py
index b793906fac..0bc5268f38 100644
--- a/tensorflow/python/kernel_tests/accumulate_n_test.py
+++ b/tensorflow/python/kernel_tests/accumulate_n_test.py
@@ -76,7 +76,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
   # Putting them here so that everything that exercises AccumulateNV2 is in
   # one place and the default build runs all unit tests.
   def testSimple(self):
-    with self.test_session():
+    with self.cached_session():
       random_arrays = [
           np.random.rand(16, 16, 16, 16).astype(np.float32) for _ in range(20)
       ]
@@ -91,27 +91,27 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
       self.assertAllClose(np_val, tf_val.eval())
 
   def testZeroArgs(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         tf_val = math_ops.accumulate_n([])
         tf_val.eval()
 
   def testWrongShape(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         a = variables.Variable(0.2)
         b = variables.Variable(0.1)
         math_ops.accumulate_n([a, b], shape=[2, 2])  # Should be shape=[]
 
   def testIncompatibleShapes(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         a = variables.Variable(np.array([0.1, 0.2]))
         b = variables.Variable(np.array([[0.3], [0.4]]))
         math_ops.accumulate_n([a, b])
 
   def testWrongType(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(TypeError):
         a = variables.Variable(0.2, dtype=np.float32)
         b = variables.Variable(0.1, dtype=np.float32)
@@ -119,7 +119,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
 
   def testWrongTypeOneInput(self):
     # Scenario that used to trigger a bug, even when testWrongType() worked
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(TypeError):
         a = variables.Variable(0.2, dtype=np.float32)
         math_ops.accumulate_n([a], tensor_dtype=np.int32)
diff --git a/tensorflow/python/kernel_tests/ackermann_test.py b/tensorflow/python/kernel_tests/ackermann_test.py
index 5e0d87c783..d267e49752 100644
--- a/tensorflow/python/kernel_tests/ackermann_test.py
+++ b/tensorflow/python/kernel_tests/ackermann_test.py
@@ -34,7 +34,7 @@ class AckermannTest(test.TestCase):
     self.assertEqual(len(ackermann.OP_LIST.op), 1)
     self.assertEqual(ackermann.OP_LIST.op[0].name, 'Ackermann')
 
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(ackermann.ackermann().eval(), b'A(m, 0) == A(m-1, 1)')
 
 
diff --git a/tensorflow/python/kernel_tests/argmax_op_test.py b/tensorflow/python/kernel_tests/argmax_op_test.py
index 1202c463e8..127d14c250 100644
--- a/tensorflow/python/kernel_tests/argmax_op_test.py
+++ b/tensorflow/python/kernel_tests/argmax_op_test.py
@@ -104,20 +104,20 @@ class ArgMaxTest(test.TestCase):
     self._testDim(np.int64)
 
   def testEmpty(self):
-    with self.test_session():
+    with self.cached_session():
       for op in math_ops.argmin, math_ops.argmax:
         with self.assertRaisesOpError(
             r"Reduction axis 0 is empty in shape \[0\]"):
           op([], 0).eval()
 
   def testDefaultAxis(self):
-    with self.test_session():
+    with self.cached_session():
       for op in math_ops.argmin, math_ops.argmax:
         ans = op([1]).eval()
         self.assertAllEqual(ans, 0)
 
   def testOutputEmpty(self):
-    with self.test_session():
+    with self.cached_session():
       for op in math_ops.argmin, math_ops.argmax:
         ret = op(array_ops.zeros(shape=[1, 0, 2]), axis=-1).eval()
         self.assertEqual(ret.shape, (1, 0))
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index a164682227..573bb8614f 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -50,7 +50,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase):
   def testNonBatchMatrix(self):
     matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
     expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
-    with self.test_session():
+    with self.cached_session():
       transposed = array_ops.matrix_transpose(matrix)
       self.assertEqual((3, 2), transposed.get_shape())
       self.assertAllEqual(expected_transposed, transposed.eval())
@@ -58,7 +58,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase):
   def testConjugate(self):
     m = [[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j, 6 + 6j]]
     expected_transposed = [[1 - 1j, 4 - 4j], [2 - 2j, 5 - 5j], [3 - 3j, 6 - 6j]]
-    with self.test_session():
+    with self.cached_session():
       matrix = ops.convert_to_tensor(m)
       transposed = array_ops.matrix_transpose(matrix, conjugate=True)
       self.assertEqual((3, 2), transposed.get_shape())
@@ -71,7 +71,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase):
     matrix_1_t = [[11, 44], [22, 55], [33, 66]]
     batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
     expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
-    with self.test_session():
+    with self.cached_session():
       transposed = array_ops.matrix_transpose(batch_matrix)
       self.assertEqual((2, 3, 2), transposed.get_shape())
       self.assertAllEqual(expected_transposed, transposed.eval())
@@ -79,7 +79,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase):
   def testNonBatchMatrixDynamicallyDefined(self):
     matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
     expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
-    with self.test_session():
+    with self.cached_session():
       matrix_ph = array_ops.placeholder(dtypes.int32)
       transposed = array_ops.matrix_transpose(matrix_ph)
       self.assertAllEqual(
@@ -94,7 +94,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase):
     matrix_1_t = [[11, 44], [22, 55], [33, 66]]
     batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
     expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
-    with self.test_session():
+    with self.cached_session():
       batch_matrix_ph = array_ops.placeholder(dtypes.int32)
       transposed = array_ops.matrix_transpose(batch_matrix_ph)
       self.assertAllEqual(
@@ -105,7 +105,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase):
 
   def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self):
     vector = [1, 2, 3]
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError, "should be a "):
         array_ops.matrix_transpose(vector)
 
@@ -129,7 +129,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
       masked_arr = arr[:, mask]
     elif axis == 2:
       masked_arr = arr[:, :, mask]
-    with self.test_session():
+    with self.cached_session():
       masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
       # Leading dimension size of masked_tensor is always unknown until runtime
@@ -176,7 +176,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     numpy_result = arr[mask]
     tf_result = array_ops.boolean_mask(arr, mask)
     self.assertAllEqual(numpy_result.shape[1:], tf_result.get_shape()[1:])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(numpy_result, tf_result.eval())
 
   def testEmptyInput1D(self):
@@ -185,7 +185,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     numpy_result = arr[mask]
     tf_result = array_ops.boolean_mask(arr, mask)
     self.assertAllEqual(numpy_result.shape[1:], tf_result.get_shape()[1:])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(numpy_result, tf_result.eval())
 
   def testEmptyOutput(self):
@@ -199,7 +199,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
   def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self):
     # The rank of the mask tensor must be specified. This is explained
     # in the docstring as well.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ph_tensor = array_ops.placeholder(dtypes.int32, shape=None)
       ph_mask = array_ops.placeholder(dtypes.bool, shape=[None])
 
@@ -217,7 +217,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
   def testMaskDimensionsSetToNoneRaises(self):
     # The rank of the mask tensor must be specified. This is explained
     # in the docstring as well.
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.placeholder(dtypes.int32, shape=[None, 2])
       mask = array_ops.placeholder(dtypes.bool, shape=None)
       with self.assertRaisesRegexp(ValueError, "dimensions must be specified"):
@@ -226,21 +226,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
   def testMaskHasMoreDimsThanTensorRaises(self):
     mask = [[True, True], [False, False]]
     tensor = [1, 2, 3, 4]
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError, "incompatible"):
         array_ops.boolean_mask(tensor, mask).eval()
 
   def testMaskIsScalarRaises(self):
     mask = True
     tensor = 1
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError, "mask.*scalar"):
         array_ops.boolean_mask(tensor, mask).eval()
 
   def testMaskShapeDifferentThanFirstPartOfTensorShapeRaises(self):
     mask = [True, True, True]
     tensor = [[1, 2], [3, 4]]
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError, "incompatible"):
         array_ops.boolean_mask(tensor, mask).eval()
 
@@ -345,7 +345,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
   def testInvalid(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
     axis = array_ops.placeholder(dtypes.int32)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "is out of valid range"):
         array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]})
@@ -954,7 +954,7 @@ class StridedSliceAssignChecker(object):
 class SliceAssignTest(test_util.TensorFlowTestCase):
 
   def testInvalidSlice(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       foo = constant_op.constant([1, 2, 3])
       with self.assertRaisesRegexp(ValueError, "Sliced assignment"
                                    " is only supported for variables"):
@@ -1000,7 +1000,7 @@ class SliceAssignTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(
         errors.FailedPreconditionError,
         "Attempting to use uninitialized value Variable"):
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         v = variables.Variable([1, 2])
         sess.run(v[:].assign([1, 2]))
 
@@ -1019,7 +1019,7 @@ class SliceAssignTest(test_util.TensorFlowTestCase):
     too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8)
     too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64)
     v = resource_variable_ops.ResourceVariable(init_val)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(v.initializer)
       with self.assertRaises(ValueError):
         sess.run(v[:].assign(too_large_val))
@@ -1066,12 +1066,12 @@ class ShapeSizeRankTest(test_util.TensorFlowTestCase):
 class SequenceMaskTest(test_util.TensorFlowTestCase):
 
   def testExceptions(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError, "maxlen must be scalar"):
         array_ops.sequence_mask([10, 20], [10, 20])
 
   def testOneDimensionalWithMaxlen(self):
-    with self.test_session():
+    with self.cached_session():
       res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5)
       self.assertAllEqual(res.get_shape(), [3, 5])
       self.assertAllEqual(
@@ -1081,7 +1081,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
 
   @test_util.enable_c_shapes
   def testOneDimensionalDtypeWithoutMaxlen(self):
-    with self.test_session():
+    with self.cached_session():
       # test dtype and default maxlen:
       res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]),
                                     dtype=dtypes.float32)
@@ -1092,7 +1092,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
 
   @test_util.enable_c_shapes
   def testOneDimensionalWithoutMaxlen(self):
-    with self.test_session():
+    with self.cached_session():
       res = array_ops.sequence_mask(
           constant_op.constant([0, 1, 4]))
       self.assertAllEqual(res.get_shape().as_list(), [3, 4])
@@ -1104,7 +1104,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
 
   @test_util.enable_c_shapes
   def testTwoDimensional(self):
-    with self.test_session():
+    with self.cached_session():
       res = array_ops.sequence_mask(constant_op.constant([[1, 3, 2]]), 5)
       self.assertAllEqual(res.get_shape(), [1, 3, 5])
       self.assertAllEqual(res.eval(), [[[True, False, False, False, False], [
@@ -1137,7 +1137,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
           [[True, False, False, False, False], [True, True, True, False, False],
            [True, True, False, False, False]])
 
-    with self.test_session():
+    with self.cached_session():
       check_dtypes(dtypes.int32, dtypes.int32)
       check_dtypes(dtypes.int32, dtypes.int64)
       check_dtypes(dtypes.int64, dtypes.int32)
@@ -1216,7 +1216,7 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
   # TODO(b/73086570): Reenable test.
   @unittest.skip("Test does not pass internally.")
   def testUnravelIndex(self):
-    with self.test_session():
+    with self.cached_session():
       for dtype in [dtypes.int32, dtypes.int64]:
         indices_1 = constant_op.constant(1621, dtype=dtype)
         dims_1 = constant_op.constant([6, 7, 8, 9], dtype=dtype)
@@ -1237,13 +1237,13 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
 class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
 
   def testSimple(self):
-    with self.test_session():
+    with self.cached_session():
       a = array_ops.constant(10)
       guarantee_a = array_ops.guarantee_const(a)
       self.assertEqual(10, guarantee_a.eval())
 
   def testVariables(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for use_resource in [False, True]:
         a = variable_scope.get_variable(
             "var_{}".format(use_resource), [],
@@ -1254,7 +1254,7 @@ class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
         self.assertEqual(10.0, guarantee_a.eval())
 
   def testResourceRejection(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a = variable_scope.get_variable(
           "resource_var", [],
           initializer=init_ops.constant_initializer(10.0),
diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py
index 51aa17babe..dd4a90e5f6 100644
--- a/tensorflow/python/kernel_tests/as_string_op_test.py
+++ b/tensorflow/python/kernel_tests/as_string_op_test.py
@@ -32,7 +32,7 @@ class AsStringOpTest(test.TestCase):
         0, 1, -1, 0.5, 0.25, 0.125, float("INF"), float("NAN"), float("-INF")
     ]
 
-    with self.test_session():
+    with self.cached_session():
       for dtype in (dtypes.float32, dtypes.float64):
         input_ = array_ops.placeholder(dtype)
 
@@ -84,7 +84,7 @@ class AsStringOpTest(test.TestCase):
     int_inputs_ = [0, -1, 1, -128, 127, -101, 101, -0]
     s = lambda strs: [x.decode("ascii") for x in strs]
 
-    with self.test_session():
+    with self.cached_session():
       for dtype in (dtypes.int32, dtypes.int64, dtypes.int8):
         input_ = array_ops.placeholder(dtype)
 
@@ -117,7 +117,7 @@ class AsStringOpTest(test.TestCase):
     # testing int8
     s = lambda strs: [x.decode("ascii") for x in strs]
 
-    with self.test_session():
+    with self.cached_session():
       input_ = array_ops.placeholder(dtypes.int32)
       int_inputs_ = [np.iinfo(np.int32).min, np.iinfo(np.int32).max]
       output = string_ops.as_string(input_)
@@ -133,7 +133,7 @@ class AsStringOpTest(test.TestCase):
   def testHalfInt(self):
     s = lambda strs: [x.decode("ascii") for x in strs]
 
-    with self.test_session():
+    with self.cached_session():
       input_ = array_ops.placeholder(dtypes.int16)
       int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max]
       output = string_ops.as_string(input_)
@@ -144,7 +144,7 @@ class AsStringOpTest(test.TestCase):
     bool_inputs_ = [False, True]
     s = lambda strs: [x.decode("ascii") for x in strs]
 
-    with self.test_session():
+    with self.cached_session():
       for dtype in (dtypes.bool,):
         input_ = array_ops.placeholder(dtype)
 
@@ -159,7 +159,7 @@ class AsStringOpTest(test.TestCase):
     ]
     complex_inputs_ = [(x + (x + 1) * 1j) for x in float_inputs_]
 
-    with self.test_session():
+    with self.cached_session():
       for dtype in (dtypes.complex64, dtypes.complex128):
         input_ = array_ops.placeholder(dtype)
 
diff --git a/tensorflow/python/kernel_tests/atrous_convolution_test.py b/tensorflow/python/kernel_tests/atrous_convolution_test.py
index b98e5fd386..6b16fca29d 100644
--- a/tensorflow/python/kernel_tests/atrous_convolution_test.py
+++ b/tensorflow/python/kernel_tests/atrous_convolution_test.py
@@ -263,7 +263,7 @@ class AtrousConvolutionTest(test.TestCase):
     self.assertLess(err, err_tolerance)
 
   def testGradient(self):
-    with self.test_session():
+    with self.cached_session():
       for padding in ["SAME", "VALID"]:
         for rate_width in range(1, 3):
           for rate_height in range(1, 3):
diff --git a/tensorflow/python/kernel_tests/attention_ops_test.py b/tensorflow/python/kernel_tests/attention_ops_test.py
index fb74698660..1e09ba5b65 100644
--- a/tensorflow/python/kernel_tests/attention_ops_test.py
+++ b/tensorflow/python/kernel_tests/attention_ops_test.py
@@ -84,7 +84,7 @@ class ExtractGlimpseTest(test.TestCase):
         image_ops.extract_glimpse(t_cols_4d, t1, t2), [0, 2, 1, 3]))
 
     # Evaluate the TensorFlow Graph.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       value_rows, value_cols = sess.run([glimpse_rows, glimpse_cols])
 
     # Check dimensions of returned glimpse.
@@ -118,7 +118,7 @@ class ExtractGlimpseTest(test.TestCase):
   def testEmptyTensor(self):
     empty_image = np.zeros((0, 4, 3, 0))
     offsets = np.zeros((0, 2))
-    with self.test_session():
+    with self.cached_session():
       result = image_ops.extract_glimpse(empty_image, [1, 1], offsets)
       self.assertAllEqual(
           np.zeros(
diff --git a/tensorflow/python/kernel_tests/barrier_ops_test.py b/tensorflow/python/kernel_tests/barrier_ops_test.py
index 7f49c63957..4d36b3a465 100644
--- a/tensorflow/python/kernel_tests/barrier_ops_test.py
+++ b/tensorflow/python/kernel_tests/barrier_ops_test.py
@@ -67,7 +67,7 @@ class BarrierTest(test.TestCase):
       """, b.barrier_ref.op.node_def)
 
   def testInsertMany(self):
-    with self.test_session():
+    with self.cached_session():
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((), ()), name="B")
       size_t = b.ready_size()
@@ -83,7 +83,7 @@ class BarrierTest(test.TestCase):
       self.assertEquals(size_t.eval(), [3])
 
   def testInsertManyEmptyTensor(self):
-    with self.test_session():
+    with self.cached_session():
       error_message = ("Empty tensors are not supported, but received shape "
                        r"\'\(0,\)\' at index 1")
       with self.assertRaisesRegexp(ValueError, error_message):
@@ -91,7 +91,7 @@ class BarrierTest(test.TestCase):
             (dtypes.float32, dtypes.float32), shapes=((1,), (0,)), name="B")
 
   def testInsertManyEmptyTensorUnknown(self):
-    with self.test_session():
+    with self.cached_session():
       b = data_flow_ops.Barrier((dtypes.float32, dtypes.float32), name="B")
       size_t = b.ready_size()
       self.assertEqual([], size_t.get_shape())
@@ -103,7 +103,7 @@ class BarrierTest(test.TestCase):
         insert_0_op.run()
 
   def testTakeMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((), ()), name="B")
       size_t = b.ready_size()
@@ -128,7 +128,7 @@ class BarrierTest(test.TestCase):
       self.assertEqual(values_1_val[idx], v1)
 
   def testTakeManySmallBatch(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((), ()), name="B")
       size_t = b.ready_size()
@@ -192,7 +192,7 @@ class BarrierTest(test.TestCase):
         insert_1_3_op.run()
 
   def testUseBarrierWithShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((2, 2), (8,)), name="B")
       size_t = b.ready_size()
@@ -221,7 +221,7 @@ class BarrierTest(test.TestCase):
       self.assertAllEqual(values_1_val[idx], v1)
 
   def testParallelInsertMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(dtypes.float32, shapes=())
       size_t = b.ready_size()
       keys = [str(x).encode("ascii") for x in range(10)]
@@ -241,7 +241,7 @@ class BarrierTest(test.TestCase):
       self.assertEqual(values_val[idx], v)
 
   def testParallelTakeMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(dtypes.float32, shapes=())
       size_t = b.ready_size()
       keys = [str(x).encode("ascii") for x in range(10)]
@@ -275,7 +275,7 @@ class BarrierTest(test.TestCase):
         zip(keys, values), [(k[0], v[0]) for k, v in zip(key_vals, value_vals)])
 
   def testBlockingTakeMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(dtypes.float32, shapes=())
       keys = [str(x).encode("ascii") for x in range(10)]
       values = [float(x) for x in range(10)]
@@ -297,7 +297,7 @@ class BarrierTest(test.TestCase):
       t.join()
 
   def testParallelInsertManyTakeMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.int64), shapes=((), (2,)))
       num_iterations = 100
@@ -376,7 +376,7 @@ class BarrierTest(test.TestCase):
         self.assertAllEqual(taken_i["values_1"], expected_values_1)
 
   def testClose(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((), ()), name="B")
       size_t = b.ready_size()
@@ -434,7 +434,7 @@ class BarrierTest(test.TestCase):
         sess.run(take_t[0])
 
   def testCancel(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((), ()), name="B")
       size_t = b.ready_size()
@@ -487,7 +487,7 @@ class BarrierTest(test.TestCase):
         sess.run(take_t[0])
 
   def _testClosedEmptyBarrierTakeManyAllowSmallBatchRaises(self, cancel):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((), ()), name="B")
       take_t = b.take_many(1, allow_small_batch=True)
@@ -500,7 +500,7 @@ class BarrierTest(test.TestCase):
     self._testClosedEmptyBarrierTakeManyAllowSmallBatchRaises(cancel=True)
 
   def _testParallelInsertManyTakeManyCloseHalfwayThrough(self, cancel):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.int64), shapes=((), (2,)))
       num_iterations = 50
@@ -576,7 +576,7 @@ class BarrierTest(test.TestCase):
     self._testParallelInsertManyTakeManyCloseHalfwayThrough(cancel=True)
 
   def _testParallelPartialInsertManyTakeManyCloseHalfwayThrough(self, cancel):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.int64), shapes=((), (2,)))
       num_iterations = 100
@@ -676,7 +676,7 @@ class BarrierTest(test.TestCase):
     self._testParallelPartialInsertManyTakeManyCloseHalfwayThrough(cancel=True)
 
   def testIncompatibleSharedBarrierErrors(self):
-    with self.test_session():
+    with self.cached_session():
       # Do component types and shapes.
       b_a_1 = data_flow_ops.Barrier(
           (dtypes.float32,), shapes=(()), shared_name="b_a")
diff --git a/tensorflow/python/kernel_tests/base64_ops_test.py b/tensorflow/python/kernel_tests/base64_ops_test.py
index be96f45497..1b399942ef 100644
--- a/tensorflow/python/kernel_tests/base64_ops_test.py
+++ b/tensorflow/python/kernel_tests/base64_ops_test.py
@@ -48,7 +48,7 @@ class Base64OpsTest(test_util.TensorFlowTestCase):
     return base64_msg
 
   def _RunTest(self, msg, pad):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       if pad:
         encoded, decoded = sess.run([self._encoded_t, self._decoded_t],
                                     feed_dict={self._msg: msg})
@@ -92,7 +92,7 @@ class Base64OpsTest(test_util.TensorFlowTestCase):
         encoded = string_ops.encode_base64(msg, pad=pad)
         decoded = string_ops.decode_base64(encoded)
 
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           encoded_value, decoded_value = sess.run([encoded, decoded])
 
         self.assertEqual(encoded_value.shape, msg.shape)
@@ -102,7 +102,7 @@ class Base64OpsTest(test_util.TensorFlowTestCase):
     def try_decode(enc):
       self._decoded_f.eval(feed_dict={self._encoded_f: enc})
 
-    with self.test_session():
+    with self.cached_session():
       # Invalid length.
       msg = np.random.bytes(99)
       enc = base64.urlsafe_b64encode(msg)
diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py
index 987a6ffcd4..e651fa0070 100644
--- a/tensorflow/python/kernel_tests/basic_gpu_test.py
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@@ -174,7 +174,7 @@ class BroadcastSimpleTest(test.TestCase):
                         numeric_gradient_type=None):
     z = np_func(x, y)
     zs = list(z.shape)
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       if x.dtype in (np.float32, np.float64):
@@ -195,7 +195,7 @@ class BroadcastSimpleTest(test.TestCase):
                         numeric_gradient_type=None):
     z = np_func(x, y)
     zs = list(z.shape)
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       if x.dtype in (np.float32, np.float64):
diff --git a/tensorflow/python/kernel_tests/batch_gather_op_test.py b/tensorflow/python/kernel_tests/batch_gather_op_test.py
index 8e7ae89f9d..7dd347989a 100644
--- a/tensorflow/python/kernel_tests/batch_gather_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_gather_op_test.py
@@ -86,7 +86,7 @@ class GatherTest(test.TestCase):
 
   def testString(self):
     params = np.array([[b"asdf", b"zxcv"], [b"qwer", b"uiop"]])
-    with self.test_session():
+    with self.cached_session():
       indices_tf = constant_op.constant([1])
       self.assertAllEqual([[b"qwer", b"uiop"]],
                           array_ops.batch_gather(params, indices_tf).eval())
diff --git a/tensorflow/python/kernel_tests/batchtospace_op_test.py b/tensorflow/python/kernel_tests/batchtospace_op_test.py
index 6143cd3baa..03f3f64353 100644
--- a/tensorflow/python/kernel_tests/batchtospace_op_test.py
+++ b/tensorflow/python/kernel_tests/batchtospace_op_test.py
@@ -60,7 +60,7 @@ class BatchToSpaceDepthToSpace(test.TestCase, PythonOpImpl):
           array_ops.depth_to_space(
               array_ops.transpose(x, [3, 1, 2, 0]), block_size=block_size),
           [3, 1, 2, 0])
-      with self.test_session():
+      with self.cached_session():
         self.assertAllEqual(y1.eval(), y2.eval())
 
 
@@ -235,7 +235,7 @@ class BatchToSpaceGradientTest(test.TestCase, PythonOpImpl):
   # Check the gradients.
   def _checkGrad(self, x, crops, block_size):
     assert 4 == x.ndim
-    with self.test_session():
+    with self.cached_session():
       tf_x = ops.convert_to_tensor(x)
       tf_y = self.batch_to_space(tf_x, crops, block_size)
       epsilon = 1e-5
@@ -293,7 +293,7 @@ class BatchToSpaceNDGradientTest(test.TestCase):
     block_shape = np.array(block_shape)
     crops = constant_op.constant(
         np.array(crops).reshape((len(block_shape), 2)), crops_dtype)
-    with self.test_session():
+    with self.cached_session():
       tf_x = ops.convert_to_tensor(x)
       tf_y = array_ops.batch_to_space_nd(tf_x, block_shape, crops)
       epsilon = 1e-5
diff --git a/tensorflow/python/kernel_tests/bcast_ops_test.py b/tensorflow/python/kernel_tests/bcast_ops_test.py
index 3305e55c05..3ec820aead 100644
--- a/tensorflow/python/kernel_tests/bcast_ops_test.py
+++ b/tensorflow/python/kernel_tests/bcast_ops_test.py
@@ -28,11 +28,11 @@ from tensorflow.python.platform import test
 class BcastOpsTest(test.TestCase):
 
   def _GetBroadcastShape(self, xs, ys):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       return sess.run(broadcast_args(xs, ys))
 
   def _GetGradientArgs(self, xs, ys):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       return sess.run(broadcast_gradient_args(xs, ys))
 
   def testBasic(self):
diff --git a/tensorflow/python/kernel_tests/betainc_op_test.py b/tensorflow/python/kernel_tests/betainc_op_test.py
index 16fdedac41..92d21462d5 100644
--- a/tensorflow/python/kernel_tests/betainc_op_test.py
+++ b/tensorflow/python/kernel_tests/betainc_op_test.py
@@ -47,7 +47,7 @@ class BetaincTest(test.TestCase):
       tf_b_s = constant_op.constant(b_s, dtype=dtype)
       tf_x_s = constant_op.constant(x_s, dtype=dtype)
       tf_out_t = math_ops.betainc(tf_a_s, tf_b_s, tf_x_s)
-      with self.test_session():
+      with self.cached_session():
         tf_out = tf_out_t.eval()
       scipy_out = special.betainc(a_s, b_s, x_s).astype(np_dt)
 
@@ -60,13 +60,13 @@ class BetaincTest(test.TestCase):
       # Test out-of-range values (most should return nan output)
       combinations = list(itertools.product([-1, 0, 0.5, 1.0, 1.5], repeat=3))
       a_comb, b_comb, x_comb = np.asarray(list(zip(*combinations)), dtype=np_dt)
-      with self.test_session():
+      with self.cached_session():
         tf_comb = math_ops.betainc(a_comb, b_comb, x_comb).eval()
       scipy_comb = special.betainc(a_comb, b_comb, x_comb).astype(np_dt)
       self.assertAllCloseAccordingToType(scipy_comb, tf_comb)
 
       # Test broadcasting between scalars and other shapes
-      with self.test_session():
+      with self.cached_session():
         self.assertAllCloseAccordingToType(
             special.betainc(0.1, b_s, x_s).astype(np_dt),
             math_ops.betainc(0.1, b_s, x_s).eval(),
@@ -96,7 +96,7 @@ class BetaincTest(test.TestCase):
       with self.assertRaisesRegexp(ValueError, "must be equal"):
         math_ops.betainc(0.5, [0.5], [[0.5]])
 
-      with self.test_session():
+      with self.cached_session():
         with self.assertRaisesOpError("Shapes of .* are inconsistent"):
           a_p = array_ops.placeholder(dtype)
           b_p = array_ops.placeholder(dtype)
@@ -140,7 +140,7 @@ class BetaincTest(test.TestCase):
     self._testBetaInc(a_s, b_s, x_s, dtypes.float32)
 
   def testBetaIncFpropAndBpropAreNeverNAN(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       space = np.logspace(-8, 5).tolist()
       space_x = np.linspace(1e-16, 1 - 1e-16).tolist()
       ga_s, gb_s, gx_s = zip(*list(itertools.product(space, space, space_x)))
@@ -161,7 +161,7 @@ class BetaincTest(test.TestCase):
 
   def testBetaIncGrads(self):
     err_tolerance = 1e-3
-    with self.test_session():
+    with self.cached_session():
       # Test gradient
       ga_s = np.abs(np.random.randn(2, 2) * 30)  # in (0, infty)
       gb_s = np.abs(np.random.randn(2, 2) * 30)  # in (0, infty)
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 2767df127e..8a58b3f97e 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -93,7 +93,7 @@ class BincountTest(test_util.TensorFlowTestCase):
 
   def test_negative(self):
     # unsorted_segment_sum will only report InvalidArgumentError on CPU
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors.InvalidArgumentError):
         math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
 
diff --git a/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py b/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py
index 28b3dc45e9..b19077db56 100644
--- a/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py
+++ b/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py
@@ -38,7 +38,7 @@ class RangeSamplerOpsTest(test.TestCase):
   TRUE_LABELS = [[1, 2], [0, 4], [3, 3]]
 
   def testTrueCandidates(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       indices = constant_op.constant([0, 0, 1, 1, 2, 2])
       true_candidates_vec = constant_op.constant([1, 2, 0, 4, 3, 3])
       true_candidates_matrix = array_ops.reshape(
@@ -50,7 +50,7 @@ class RangeSamplerOpsTest(test.TestCase):
     self.assertAllEqual(true_candidates_val, self.TRUE_LABELS)
 
   def testSampledCandidates(self):
-    with self.test_session():
+    with self.cached_session():
       true_classes = constant_op.constant(
           [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64)
       sampled_candidates, _, _ = candidate_sampling_ops.all_candidate_sampler(
@@ -62,7 +62,7 @@ class RangeSamplerOpsTest(test.TestCase):
     self.assertEqual(sampled_candidates.get_shape(), [self.NUM_SAMPLED])
 
   def testTrueLogExpectedCount(self):
-    with self.test_session():
+    with self.cached_session():
       true_classes = constant_op.constant(
           [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64)
       _, true_expected_count, _ = candidate_sampling_ops.all_candidate_sampler(
@@ -77,7 +77,7 @@ class RangeSamplerOpsTest(test.TestCase):
                      [self.BATCH_SIZE, self.NUM_TRUE])
 
   def testSampledLogExpectedCount(self):
-    with self.test_session():
+    with self.cached_session():
       true_classes = constant_op.constant(
           [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64)
       _, _, sampled_expected_count = candidate_sampling_ops.all_candidate_sampler(  # pylint: disable=line-too-long
@@ -90,7 +90,7 @@ class RangeSamplerOpsTest(test.TestCase):
     self.assertEqual(sampled_log_expected_count.get_shape(), [self.NUM_SAMPLED])
 
   def testAccidentalHits(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       true_classes = constant_op.constant(
           [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64)
       sampled_candidates, _, _ = candidate_sampling_ops.all_candidate_sampler(
@@ -109,7 +109,7 @@ class RangeSamplerOpsTest(test.TestCase):
   def testSeed(self):
 
     def draw(seed):
-      with self.test_session():
+      with self.cached_session():
         true_classes = constant_op.constant(
             [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64)
         sampled, _, _ = candidate_sampling_ops.log_uniform_candidate_sampler(
diff --git a/tensorflow/python/kernel_tests/cast_op_test.py b/tensorflow/python/kernel_tests/cast_op_test.py
index 214d5cb3c0..c90520e46d 100644
--- a/tensorflow/python/kernel_tests/cast_op_test.py
+++ b/tensorflow/python/kernel_tests/cast_op_test.py
@@ -174,7 +174,7 @@ class CastOpTest(test.TestCase):
     self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, True)), True)
 
   def _OpError(self, x, dtype, err):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError(err):
         math_ops.cast(x, dtype).eval()
 
@@ -182,7 +182,7 @@ class CastOpTest(test.TestCase):
     self._OpError(np.arange(0, 10), dtypes.string, "Cast.*int64.*string.*")
 
   def testCastToTypeOfVariable(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = variables.Variable(5, dtype=dtypes.float32)
       y = variables.Variable(True, dtype=dtypes.bool)
       cast = math_ops.cast(y, x.dtype)
@@ -193,7 +193,7 @@ class CastOpTest(test.TestCase):
     t = [dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128]
     for src_t in t:
       for dst_t in t:
-        with self.test_session():
+        with self.cached_session():
           x = constant_op.constant(1.0, src_t)
           z = array_ops.identity(x)
           y = math_ops.cast(z, dst_t)
@@ -209,7 +209,7 @@ class SparseTensorCastTest(test.TestCase):
     shape = constant_op.constant([3], dtypes.int64)
     st = sparse_tensor.SparseTensor(indices, values, shape)
     st_cast = math_ops.cast(st, dtypes.float32)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(st_cast.indices.eval(), [[0], [1], [2]])
       self.assertAllEqual(st_cast.values.eval(),
                           np.array([1, 2, 3], np.float32))
@@ -221,7 +221,7 @@ class SaturateCastTest(test.TestCase):
   def testSaturate(self):
     in_types = dtypes.float32,
     out_types = dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.float32
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for in_type in in_types:
         for out_type in out_types:
           lo, hi = in_type.min, in_type.max
diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
index 7f147ba53a..51611b75af 100644
--- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py
+++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
@@ -57,7 +57,7 @@ class GenerateVocabRemappingTest(test.TestCase):
         new_vocab_offset=0)
     expected_remapping = range(0, 3)
     expected_num_present = 3
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_remapping, remapping.eval())
       self.assertAllEqual(expected_num_present, num_present.eval())
 
@@ -70,7 +70,7 @@ class GenerateVocabRemappingTest(test.TestCase):
         new_vocab_offset=0)
     expected_remapping = [2, 0, 1]
     expected_num_present = 3
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_remapping, remapping.eval())
       self.assertAllEqual(expected_num_present, num_present.eval())
 
@@ -83,7 +83,7 @@ class GenerateVocabRemappingTest(test.TestCase):
         new_vocab_offset=1)
     expected_remapping = [0]
     expected_num_present = 1
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_remapping, remapping.eval())
       self.assertAllEqual(expected_num_present, num_present.eval())
 
@@ -98,7 +98,7 @@ class GenerateVocabRemappingTest(test.TestCase):
         old_vocab_size=2)
     expected_remapping = [-1, 0, 1]
     expected_num_present = 2
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_remapping, remapping.eval())
       self.assertAllEqual(expected_num_present, num_present.eval())
 
@@ -122,7 +122,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
       self.old_tensor_name = 'some_scope/matrix'
 
     save = saver.Saver([matrix])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables.global_variables_initializer().run()
       self.bundle_file = os.path.join(test.get_temp_dir(), 'bundle_checkpoint')
       save.save(sess, self.bundle_file)
@@ -140,7 +140,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=[],
         num_rows=2,
         num_cols=self.old_num_cols)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(self.matrix_value[row_remapping],
                           remapped_matrix.eval())
 
@@ -155,7 +155,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=[],
         num_rows=len(row_remapping),
         num_cols=len(col_remapping))
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(self.matrix_value[row_remapping][:, col_remapping],
                           remapped_matrix.eval())
 
@@ -170,7 +170,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=[],
         num_rows=len(row_remapping),
         num_cols=len(col_remapping))
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(self.matrix_value[row_remapping][:, col_remapping],
                           remapped_matrix.eval())
 
@@ -189,7 +189,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
     expected_remapped_matrix = np.reshape(
         [33, init_val, init_val, init_val, 1, init_val], [3, 2])
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(expected_remapped_matrix, remapped_matrix.eval())
 
   def test_load_and_remap_all_missing_rows(self):
@@ -204,7 +204,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=initializing_values,
         num_rows=num_rows,
         num_cols=self.old_num_cols)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(
           np.reshape(initializing_values, (num_rows, self.old_num_cols)),
           remapped_matrix.eval())
@@ -222,7 +222,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=initializing_values,
         num_rows=num_rows,
         num_cols=num_cols)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(
           np.reshape(initializing_values, (num_rows, num_cols)),
           remapped_matrix.eval())
@@ -243,7 +243,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=[],
         num_rows=len(invalid_remapping),
         num_cols=self.old_num_cols)
-    with self.test_session(), self.assertRaises(errors.UnimplementedError):
+    with self.cached_session(), self.assertRaises(errors.UnimplementedError):
       remapped_matrix.eval()
 
     # Invalid column remapping.
@@ -255,7 +255,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=[],
         num_rows=self.old_num_rows,
         num_cols=len(invalid_remapping))
-    with self.test_session(), self.assertRaises(errors.UnimplementedError):
+    with self.cached_session(), self.assertRaises(errors.UnimplementedError):
       remapped_matrix.eval()
 
   def test_load_and_remap_incorrect_initializing_values(self):
@@ -272,7 +272,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=[],
         num_rows=3,
         num_cols=2)
-    with self.test_session(), self.assertRaises(errors.InvalidArgumentError):
+    with self.cached_session(), self.assertRaises(errors.InvalidArgumentError):
       remapped_matrix.eval()
 
     remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix(
@@ -284,7 +284,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
         initializing_values=[0] * 5,
         num_rows=3,
         num_cols=2)
-    with self.test_session(), self.assertRaises(errors.InvalidArgumentError):
+    with self.cached_session(), self.assertRaises(errors.InvalidArgumentError):
       remapped_matrix.eval()
 
 
@@ -306,7 +306,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase):
         initializer=constant_op.constant(np_value, dtype=dtypes.float32),
         partitioner=partitioner)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ckpt_path = os.path.join(test.get_temp_dir(), 'temp_ckpt')
       save = saver.Saver([matrix])
       variables.global_variables_initializer().run()
diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py
index de52a70cc0..bb7b645da2 100644
--- a/tensorflow/python/kernel_tests/clip_ops_test.py
+++ b/tensorflow/python/kernel_tests/clip_ops_test.py
@@ -39,7 +39,7 @@ class ClipTest(test.TestCase):
     min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32)
     max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32)
     outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val)
-    with self.test_session():
+    with self.cached_session():
       error_1 = gradient_checker.compute_gradient_error(inputs, [4], outputs_1,
                                                         [4])
       self.assertLess(error_1, 1e-4)
@@ -139,7 +139,7 @@ class ClipTest(test.TestCase):
 
   def testClipByValueNonFinite(self):
     # TODO(b/78016351): Enable test on GPU once the bug is fixed.
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')])
       np_ans = [float('NaN'), 4.0, -4.0]
       clip_value = 4.0
diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py
index c22934ce47..0e59ce6972 100644
--- a/tensorflow/python/kernel_tests/concat_op_test.py
+++ b/tensorflow/python/kernel_tests/concat_op_test.py
@@ -383,7 +383,7 @@ class ConcatOpTest(test.TestCase):
         np.random.random_sample(x_shape).astype(np.float64)
         for x_shape in x_shapes
     ]
-    with self.test_session():
+    with self.cached_session():
       xs = [constant_op.constant(x_val) for x_val in x_vals]
       output = array_ops.concat(xs, 0)
       err = gradient_checker.compute_gradient_error(xs, x_shapes, output,
@@ -397,7 +397,7 @@ class ConcatOpTest(test.TestCase):
         np.random.random_sample(x_shape).astype(np.float64)
         for x_shape in x_shapes
     ]
-    with self.test_session():
+    with self.cached_session():
       xs = [constant_op.constant(x_val) for x_val in x_vals]
       output = array_ops.concat(xs, 1)
       err = gradient_checker.compute_gradient_error(xs, x_shapes, output,
@@ -411,7 +411,7 @@ class ConcatOpTest(test.TestCase):
         np.random.random_sample(x_shape).astype(np.float64)
         for x_shape in x_shapes
     ]
-    with self.test_session():
+    with self.cached_session():
       xs = [constant_op.constant(x_val) for x_val in x_vals]
       x_concat = array_ops.concat(xs, 0)
       output = array_ops.gather(x_concat, [1, 2, 0, 5])
@@ -426,7 +426,7 @@ class ConcatOpTest(test.TestCase):
         np.random.random_sample(x_shape).astype(np.float64)
         for x_shape in x_shapes
     ]
-    with self.test_session():
+    with self.cached_session():
       xs = [constant_op.constant(x_val) for x_val in x_vals]
       x_concat = array_ops.concat(xs, 1)
       output = array_ops.gather(x_concat, [1, 2, 0, 5])
@@ -441,7 +441,7 @@ class ConcatOpTest(test.TestCase):
         np.random.random_sample(x_shape).astype(np.float64)
         for x_shape in x_shapes
     ]
-    with self.test_session():
+    with self.cached_session():
       xs = [constant_op.constant(x_val) for x_val in x_vals]
       x_concat = array_ops.concat(xs, 2)
       output = array_ops.gather(x_concat, [1, 2, 0, 5])
@@ -452,7 +452,7 @@ class ConcatOpTest(test.TestCase):
   def testIndexedSlicesConcatDim1Grad_UnknownInputDim(self):
     x_shapes = [[20, 7, 3], [20, 3, 3], [20, 1, 3]]
     output_shape = [4, 11, 3]
-    with self.test_session():
+    with self.cached_session():
       x_1 = array_ops.placeholder(dtypes.float64)
       x_2 = array_ops.placeholder(dtypes.float64)
       x_3 = array_ops.placeholder(dtypes.float64)
@@ -473,13 +473,13 @@ class ConcatOpTest(test.TestCase):
   def testConcatTuple(self):
     c1 = np.random.rand(4, 4)
     c2 = np.random.rand(4, 4)
-    with self.test_session():
+    with self.cached_session():
       concat_list_t = array_ops.concat([c1, c2], 0)
       concat_tuple_t = array_ops.concat((c1, c2), 0)
       self.assertAllEqual(concat_list_t.eval(), concat_tuple_t.eval())
 
   def testConcatNoScalars(self):
-    with self.test_session():
+    with self.cached_session():
       scalar = constant_op.constant(7)
       dim = array_ops.placeholder(dtypes.int32)
       with self.assertRaisesRegexp(
@@ -554,7 +554,7 @@ class ConcatOpTest(test.TestCase):
 
   def _testGradientsForAxis(
       self, inp_tensors, axis, output_shape, feed_dict=None):
-    with self.test_session():
+    with self.cached_session():
       c = array_ops.concat(inp_tensors, axis)
       grad_inp = np.random.rand(*output_shape).astype("f")
       grad_tensor = constant_op.constant(
@@ -566,7 +566,7 @@ class ConcatOpTest(test.TestCase):
 
   def _testIndexedSlicesGradientsForAxis(
       self, inp_tensors, axis, output_shape, gather_indexes, feed_dict=None):
-    with self.test_session():
+    with self.cached_session():
       c = array_ops.gather(
           array_ops.concat(inp_tensors, axis), gather_indexes)
       grad_inp = np.random.rand(*output_shape).astype("f")
@@ -631,7 +631,7 @@ class ConcatOffsetTest(test.TestCase):
       self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]])
 
   def testNotVector(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       cdim = constant_op.constant(1, dtypes.int32)
       s0 = constant_op.constant([[2, 3, 5]], dtypes.int32)
       s1 = constant_op.constant([[2, 7, 5]], dtypes.int32)
@@ -641,7 +641,7 @@ class ConcatOffsetTest(test.TestCase):
         sess.run(off)
 
   def testConcatDimOutOfRange(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       cdim = constant_op.constant(4, dtypes.int32)
       s0 = constant_op.constant([2, 3, 5], dtypes.int32)
       s1 = constant_op.constant([2, 7, 5], dtypes.int32)
@@ -651,7 +651,7 @@ class ConcatOffsetTest(test.TestCase):
         sess.run(off)
 
   def testDimMismatch(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       cdim = constant_op.constant(1, dtypes.int32)
       s0 = constant_op.constant([2, 3, 5], dtypes.int32)
       s1 = constant_op.constant([2, 7, 5, 10], dtypes.int32)
@@ -661,7 +661,7 @@ class ConcatOffsetTest(test.TestCase):
         sess.run(off)
 
   def testSizeMismatch(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       cdim = constant_op.constant(1, dtypes.int32)
       s0 = constant_op.constant([2, 3, 5], dtypes.int32)
       s1 = constant_op.constant([2, 7, 10], dtypes.int32)
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 1fac7f8270..18a1b230a0 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -107,7 +107,7 @@ class CondV2Test(test.TestCase):
     self._testCond(true_fn, false_fn, [y])
 
   def testNoInputs(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       pred = array_ops.placeholder(dtypes.bool, name="pred")
 
       def true_fn():
@@ -527,7 +527,7 @@ class CondV2Test(test.TestCase):
             }), [5., 0.])
 
   def testSecondDerivative(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       pred = array_ops.placeholder(dtypes.bool, name="pred")
       x = constant_op.constant(3.0, name="x")
 
diff --git a/tensorflow/python/kernel_tests/conditional_accumulator_test.py b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
index 86802664d1..262352a9af 100644
--- a/tensorflow/python/kernel_tests/conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
@@ -80,26 +80,26 @@ class ConditionalAccumulatorTest(test.TestCase):
       """, q.accumulator_ref.op.node_def)
 
   def testAccumulatorSizeEmpty(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(dtypes_lib.float32, name="Q")
       self.assertEqual(q.num_accumulated().eval(), 0)
 
   def testAccumulatorSetGlobalStep(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       set_global_step_op = q.set_global_step(1)
       set_global_step_op.run()
 
   def testAccumulatorApplyGradFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       accum_op = q.apply_grad((10.0,))
       accum_op.run()
 
   def testDtypes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64]
 
       for i in range(len(dtypes)):
@@ -116,7 +116,7 @@ class ConditionalAccumulatorTest(test.TestCase):
         self.assertEqual(sum(elems) / len(elems), result)
 
   def testAccumulatorMultipleAccumulators(self):
-    with self.test_session():
+    with self.cached_session():
       q_f32_0 = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       q_f32_1 = data_flow_ops.ConditionalAccumulator(
@@ -135,7 +135,7 @@ class ConditionalAccumulatorTest(test.TestCase):
         self.assertEqual(result, i + 10.0)
 
   def testAccumulatorApplyAndTakeGradWithShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=(3, 2))
       elems = [[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
@@ -166,7 +166,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       q.apply_grad([[1.0], [2.0], [3.0]])
 
   def testAccumulatorDynamicShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=None)
 
@@ -191,7 +191,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertTrue(is_all_equal)
 
   def testAccumulatorWrongDynamicShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=None)
 
@@ -209,7 +209,7 @@ class ConditionalAccumulatorTest(test.TestCase):
         sess.run(accum_op, feed_dict={x: [[1.0], [2.0], [3.0]]})
 
   def testAccumulatorSizeAfterApplyGrad(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       accum_op = q.apply_grad((10.0,))
@@ -220,7 +220,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertEqual(q.num_accumulated().eval(), 2)
 
   def testAccumulatorSizeAfterApplyGradAndTakeGrad(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       accum_op = q.apply_grad((10.0,))
@@ -248,7 +248,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertEqual(q.num_accumulated().eval(), 0)
 
   def testAccumulatorTakeGradMean(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       elems = [10.0, 20.0]
@@ -307,7 +307,7 @@ class ConditionalAccumulatorTest(test.TestCase):
           reduction_type="Invalid")
 
   def testAccumulatorInvalidTakeGrad(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       elems = [10.0, 20.0]
@@ -322,7 +322,7 @@ class ConditionalAccumulatorTest(test.TestCase):
         takeg_t.eval()
 
   def testAccumulatorRepeatedTakeGradMean(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
 
@@ -379,7 +379,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertEqual(elems_sum, val)
 
   def testAccumulatorIncrementGlobalStep(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
 
@@ -395,7 +395,7 @@ class ConditionalAccumulatorTest(test.TestCase):
         inc_global_step.eval()
 
   def testAccumulatorSetGlobalStepPreventsAccumulation(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
 
@@ -416,7 +416,7 @@ class ConditionalAccumulatorTest(test.TestCase):
                                                      if x >= ls), val)
 
   def testParallelApplyGrad(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
@@ -441,7 +441,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertEqual(val, sum(elems) / len(elems))
 
   def testParallelTakeGrad(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       elems = [e for e in range(10)]
@@ -473,7 +473,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertItemsEqual(elems, results)
 
   def testAccumulatorApplyAndBlockingTake(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
 
@@ -506,7 +506,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       sess.run(takeg_op)
 
   def testAccumulatorCancel(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       takeg_t = q.take_grad(1)
diff --git a/tensorflow/python/kernel_tests/confusion_matrix_test.py b/tensorflow/python/kernel_tests/confusion_matrix_test.py
index 93f5323c41..bc24345261 100644
--- a/tensorflow/python/kernel_tests/confusion_matrix_test.py
+++ b/tensorflow/python/kernel_tests/confusion_matrix_test.py
@@ -37,7 +37,7 @@ class ConfusionMatrixTest(test.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def testExample(self):
     """This is a test of the example provided in pydoc."""
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual([
           [0, 0, 0, 0, 0],
           [0, 0, 1, 0, 0],
@@ -49,7 +49,7 @@ class ConfusionMatrixTest(test.TestCase):
 
   def _testConfMatrix(self, labels, predictions, truth, weights=None,
                       num_classes=None):
-    with self.test_session():
+    with self.cached_session():
       dtype = predictions.dtype
       ans = confusion_matrix.confusion_matrix(
           labels, predictions, dtype=dtype, weights=weights,
@@ -78,7 +78,7 @@ class ConfusionMatrixTest(test.TestCase):
     self._testBasic(dtype=np.int64)
 
   def _testConfMatrixOnTensors(self, tf_dtype, np_dtype):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       m_neg = array_ops.placeholder(dtype=dtypes.float32)
       m_pos = array_ops.placeholder(dtype=dtypes.float32)
       s = array_ops.placeholder(dtype=dtypes.float32)
@@ -229,7 +229,7 @@ class ConfusionMatrixTest(test.TestCase):
   def testOutputIsInt32(self):
     labels = np.arange(2)
     predictions = np.arange(2)
-    with self.test_session():
+    with self.cached_session():
       cm = confusion_matrix.confusion_matrix(
           labels, predictions, dtype=dtypes.int32)
       tf_cm = cm.eval()
@@ -238,7 +238,7 @@ class ConfusionMatrixTest(test.TestCase):
   def testOutputIsInt64(self):
     labels = np.arange(2)
     predictions = np.arange(2)
-    with self.test_session():
+    with self.cached_session():
       cm = confusion_matrix.confusion_matrix(
           labels, predictions, dtype=dtypes.int64)
       tf_cm = cm.eval()
@@ -260,7 +260,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
         confusion_matrix.remove_squeezable_dimensions(
             labels_placeholder, predictions_placeholder))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(label_values, static_labels.eval())
       self.assertAllEqual(prediction_values, static_predictions.eval())
       feed_dict = {
@@ -285,7 +285,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
         confusion_matrix.remove_squeezable_dimensions(
             labels_placeholder, predictions_placeholder))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(label_values, static_labels.eval())
       self.assertAllEqual(prediction_values, static_predictions.eval())
       feed_dict = {
@@ -310,7 +310,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
         confusion_matrix.remove_squeezable_dimensions(
             labels_placeholder, predictions_placeholder, expected_rank_diff=0))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(label_values, static_labels.eval())
       self.assertAllEqual(prediction_values, static_predictions.eval())
       feed_dict = {
@@ -336,7 +336,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
             labels_placeholder, predictions_placeholder))
 
     expected_label_values = np.reshape(label_values, newshape=(2, 3))
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_label_values, static_labels.eval())
       self.assertAllEqual(prediction_values, static_predictions.eval())
       feed_dict = {
@@ -362,7 +362,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
             labels_placeholder, predictions_placeholder, expected_rank_diff=1))
 
     expected_label_values = np.reshape(label_values, newshape=(2, 3))
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_label_values, static_labels.eval())
       self.assertAllEqual(prediction_values, static_predictions.eval())
       feed_dict = {
@@ -388,7 +388,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
             labels_placeholder, predictions_placeholder))
 
     expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3))
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(label_values, static_labels.eval())
       self.assertAllEqual(expected_prediction_values, static_predictions.eval())
       feed_dict = {
@@ -415,7 +415,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
             labels_placeholder, predictions_placeholder, expected_rank_diff=-1))
 
     expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3))
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(label_values, static_labels.eval())
       self.assertAllEqual(expected_prediction_values, static_predictions.eval())
       feed_dict = {
@@ -441,7 +441,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
         confusion_matrix.remove_squeezable_dimensions(
             labels_placeholder, predictions_placeholder))
 
-    with self.test_session():
+    with self.cached_session():
       feed_dict = {
           labels_placeholder: label_values,
           predictions_placeholder: prediction_values
@@ -466,7 +466,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase):
         confusion_matrix.remove_squeezable_dimensions(
             labels_placeholder, predictions_placeholder))
 
-    with self.test_session():
+    with self.cached_session():
       feed_dict = {
           labels_placeholder: label_values,
           predictions_placeholder: prediction_values
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 107ee37fab..d1e4e5477f 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -162,18 +162,18 @@ class ConstantTest(test.TestCase):
       logging_const_op.run()
 
   def testStringWithNulls(self):
-    with self.test_session():
+    with self.cached_session():
       val = ops.convert_to_tensor(b"\0\0\0\0").eval()
     self.assertEqual(len(val), 4)
     self.assertEqual(val, b"\0\0\0\0")
 
-    with self.test_session():
+    with self.cached_session():
       val = ops.convert_to_tensor(b"xx\0xx").eval()
     self.assertEqual(len(val), 5)
     self.assertAllEqual(val, b"xx\0xx")
     nested = [[b"\0\0\0\0", b"xx\0xx"], [b"\0_\0_\0_\0", b"\0"]]
 
-    with self.test_session():
+    with self.cached_session():
       val = ops.convert_to_tensor(nested).eval()
     # NOTE(mrry): Do not use assertAllEqual, because it converts nested to a
     #   numpy array, which loses the null terminators.
@@ -279,7 +279,7 @@ class AsTensorTest(test.TestCase):
     self.assertTrue(isinstance(x, ops.Tensor))
 
   def testAsTensorForShapeInput(self):
-    with self.test_session():
+    with self.cached_session():
       x = ops.convert_to_tensor(tensor_shape.TensorShape([]))
       self.assertEqual(dtypes_lib.int32, x.dtype)
       self.assertAllEqual([], x.eval())
@@ -331,7 +331,7 @@ class AsTensorTest(test.TestCase):
           tensor_shape.TensorShape([1, 2, 3]), dtype=dtypes_lib.float32)
 
   def testAsTensorForDimensionInput(self):
-    with self.test_session():
+    with self.cached_session():
       x = ops.convert_to_tensor(tensor_shape.TensorShape([1, 2, 3])[1])
       self.assertEqual(dtypes_lib.int32, x.dtype)
       self.assertAllEqual(2, x.eval())
@@ -367,7 +367,7 @@ class IdentityOpTest(test.TestCase):
 class ZerosTest(test.TestCase):
 
   def _Zeros(self, shape):
-    with self.test_session():
+    with self.cached_session():
       ret = array_ops.zeros(shape)
       self.assertEqual(shape, ret.get_shape())
       return ret.eval()
@@ -379,13 +379,13 @@ class ZerosTest(test.TestCase):
   def testScalar(self):
     self.assertEqual(0, self._Zeros([]))
     self.assertEqual(0, self._Zeros(()))
-    with self.test_session():
+    with self.cached_session():
       scalar = array_ops.zeros(constant_op.constant([], dtype=dtypes_lib.int32))
       self.assertEqual(0, scalar.eval())
 
   def testDynamicSizes(self):
     np_ans = np.array([[0] * 3] * 2)
-    with self.test_session():
+    with self.cached_session():
       # Creates a tensor of 2 x 3.
       d = array_ops.fill([2, 3], 12., name="fill")
       # Constructs a tensor of zeros of the same dimensions as "d".
@@ -396,7 +396,7 @@ class ZerosTest(test.TestCase):
     self.assertShapeEqual(np_ans, z)
 
   def testDtype(self):
-    with self.test_session():
+    with self.cached_session():
       d = array_ops.fill([2, 3], 12., name="fill")
       self.assertEqual(d.get_shape(), [2, 3])
       # Test default type for both constant size and dynamic size
@@ -489,7 +489,7 @@ class ZerosLikeTest(test.TestCase):
 
   def testZerosLikeDtype(self):
     # Make sure zeros_like works even for dtypes that cannot be cast between
-    with self.test_session():
+    with self.cached_session():
       shape = (3, 5)
       dtypes = np.float32, np.complex64
       for in_type in dtypes:
@@ -533,7 +533,7 @@ class ZerosLikeTest(test.TestCase):
 class OnesTest(test.TestCase):
 
   def _Ones(self, shape):
-    with self.test_session():
+    with self.cached_session():
       ret = array_ops.ones(shape)
       self.assertEqual(shape, ret.get_shape())
       return ret.eval()
@@ -544,13 +544,13 @@ class OnesTest(test.TestCase):
   def testScalar(self):
     self.assertEqual(1, self._Ones([]))
     self.assertEqual(1, self._Ones(()))
-    with self.test_session():
+    with self.cached_session():
       scalar = array_ops.ones(constant_op.constant([], dtype=dtypes_lib.int32))
       self.assertEqual(1, scalar.eval())
 
   def testDynamicSizes(self):
     np_ans = np.array([[1] * 3] * 2)
-    with self.test_session():
+    with self.cached_session():
       # Creates a tensor of 2 x 3.
       d = array_ops.fill([2, 3], 12., name="fill")
       # Constructs a tensor of ones of the same dimensions as "d".
@@ -561,7 +561,7 @@ class OnesTest(test.TestCase):
     self.assertShapeEqual(np_ans, z)
 
   def testAutoPack(self):
-    with self.test_session():
+    with self.cached_session():
       h = array_ops.placeholder(dtypes_lib.int32, shape=[])
       w = array_ops.placeholder(dtypes_lib.int32, shape=[])
       z = array_ops.ones([h, w])
@@ -569,7 +569,7 @@ class OnesTest(test.TestCase):
     self.assertAllEqual(out, np.array([[1] * 16] * 4))
 
   def testDtype(self):
-    with self.test_session():
+    with self.cached_session():
       d = array_ops.fill([2, 3], 12., name="fill")
       self.assertEqual(d.get_shape(), [2, 3])
       # Test default type for both constant size and dynamic size
@@ -606,7 +606,7 @@ class OnesLikeTest(test.TestCase):
         dtypes_lib.complex128
     ]:
       numpy_dtype = dtype.as_numpy_dtype
-      with self.test_session():
+      with self.cached_session():
         # Creates a tensor of non-zero values with shape 2 x 3.
         d = constant_op.constant(
             np.ones(
@@ -672,7 +672,7 @@ class FillTest(test.TestCase):
     self.assertAllEqual(np_ans, tf_ans)
 
   def testFillNegative(self):
-    with self.test_session():
+    with self.cached_session():
       for shape in (-1,), (2, -1), (-1, 2), (-2), (-3):
         with self.assertRaises(ValueError):
           array_ops.fill(shape, 7)
@@ -703,7 +703,7 @@ class FillTest(test.TestCase):
     self.assertEqual([None, 17], f.get_shape().as_list())
 
   def testGradient(self):
-    with self.test_session():
+    with self.cached_session():
       in_v = constant_op.constant(5.0)
       out_shape = [3, 2]
       out_filled = array_ops.fill(out_shape, in_v)
@@ -715,7 +715,7 @@ class FillTest(test.TestCase):
 class PlaceholderTest(test.TestCase):
 
   def testDtype(self):
-    with self.test_session():
+    with self.cached_session():
       p = array_ops.placeholder(dtypes_lib.float32, shape=(10, 10), name="p")
       p_identity = array_ops.identity(p)
       feed_array = np.random.rand(10, 10)
@@ -727,7 +727,7 @@ class PlaceholderTest(test.TestCase):
         p_identity.eval()
 
   def testShape(self):
-    with self.test_session():
+    with self.cached_session():
       p = array_ops.placeholder(dtypes_lib.float32, shape=(10, 10), name="p")
       p_identity = array_ops.identity(p)
       feed_array = np.random.rand(10, 10)
@@ -744,7 +744,7 @@ class PlaceholderTest(test.TestCase):
         p_identity.eval(feed_dict={p: feed_array[:5, :5]})
 
   def testUnknownShape(self):
-    with self.test_session():
+    with self.cached_session():
       p = array_ops.placeholder(dtypes_lib.float32, shape=None, name="p")
       p_identity = array_ops.identity(p)
       # can feed anything
@@ -756,13 +756,13 @@ class PlaceholderTest(test.TestCase):
           p_identity.eval(feed_dict={p: feed_array}), feed_array)
 
   def testScalarShape(self):
-    with self.test_session():
+    with self.cached_session():
       p = array_ops.placeholder(dtypes_lib.float32, shape=[], name="p")
       p_identity = array_ops.identity(p)
       self.assertAllClose(p_identity.eval(feed_dict={p: 5}), 5)
 
   def testPartialShape(self):
-    with self.test_session():
+    with self.cached_session():
       p = array_ops.placeholder(dtypes_lib.float32, shape=[None, 3], name="p")
       p_identity = array_ops.identity(p)
       feed_array = np.random.rand(10, 3)
@@ -774,7 +774,7 @@ class PlaceholderTest(test.TestCase):
         p_identity.eval(feed_dict={p: feed_array[:5, :2]})
 
   def testPartialShapeWhenNotFed(self):
-    with self.test_session():
+    with self.cached_session():
       p = array_ops.placeholder(dtypes_lib.float32, shape=[None, 3], name="p")
       p_identity = array_ops.identity(p)
 
@@ -784,7 +784,7 @@ class PlaceholderTest(test.TestCase):
         p_identity.eval()
 
   def testControlDependency(self):
-    with self.test_session():
+    with self.cached_session():
       p = array_ops.placeholder(dtypes_lib.int32, shape=[], name="p")
       with ops.control_dependencies([p]):
         c = constant_op.constant(5, dtypes_lib.int32)
@@ -872,7 +872,7 @@ versions {
 """
     gdef = graph_pb2.GraphDef()
     text_format.Merge(graph, gdef)
-    with self.test_session():
+    with self.cached_session():
       p, ret = importer.import_graph_def(
           gdef, return_elements=["Placeholder:0", "add:0"])
 
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 374faad7a7..ebeabcfe1a 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -129,7 +129,7 @@ def isum(s, maximum_iterations=None):
 class ControlFlowTest(test.TestCase):
 
   def testRefIdentity(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(7)
 
       v = control_flow_ops._Identity(v)
@@ -141,7 +141,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(9, v2.eval())
 
   def testRefEnter(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(7)
 
       enter_v = control_flow_ops._Enter(v, "foo_1", is_constant=True)
@@ -154,7 +154,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(9, v3.eval())
 
   def testRefSwitch(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(7)
 
       p = constant_op.constant(True)
@@ -164,7 +164,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(9, v2.eval())
 
   def testEnterMulExit(self):
-    with self.test_session():
+    with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
       enter_data = gen_control_flow_ops.enter(data, "foo_1", False)
       five = constant_op.constant(5)
@@ -176,7 +176,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(np.array([x * 5 for x in [1, 2, 3, 4, 5, 6]]), result)
 
   def testEnterShapePropagation(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable([0.0, 0.0], dtype=dtypes.float32)
 
       # If is_constant=True, the shape information should be propagated.
@@ -190,7 +190,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(enter_v_non_constant.shape, None)
 
   def testSwitchMergeIndexedSlices(self):
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant([1, 2, 3, 4, 5, 6])
       indices = constant_op.constant([0, 2, 4, 6, 8, 10])
       data = ops.IndexedSlices(values, indices)
@@ -204,7 +204,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(np.arange(0, 12, 2), ind)
 
   def testSwitchDeadBranch(self):
-    with self.test_session():
+    with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
       ports = ops.convert_to_tensor(True, name="ports")
       switch_op = control_flow_ops.switch(data, ports)
@@ -216,7 +216,7 @@ class ControlFlowTest(test.TestCase):
         dead_branch.eval()
 
   def testSwitchMergeLess(self):
-    with self.test_session():
+    with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
       zero = ops.convert_to_tensor(0)
       one = ops.convert_to_tensor(1)
@@ -228,7 +228,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(np.arange(1, 7), result)
 
   def testSwitchMergeAddIdentity(self):
-    with self.test_session():
+    with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
       ports = ops.convert_to_tensor(False, name="ports")
       switch_op = control_flow_ops.switch(data, ports)
@@ -241,7 +241,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(np.array([x + 1 for x in [1, 2, 3, 4, 5, 6]]), result)
 
   def testSwitchMergeAddMul(self):
-    with self.test_session():
+    with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
       ports = ops.convert_to_tensor(True, name="ports")
       switch_op = control_flow_ops.switch(data, ports)
@@ -255,7 +255,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(np.array([x * 5 for x in [1, 2, 3, 4, 5, 6]]), result)
 
   def testLoop_false(self):
-    with self.test_session():
+    with self.cached_session():
       false = ops.convert_to_tensor(False)
       n = constant_op.constant(10)
 
@@ -272,7 +272,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(10, result)
 
   def testLoop_1(self):
-    with self.test_session():
+    with self.cached_session():
       zero = constant_op.constant(0)
       one = constant_op.constant(1)
       n = constant_op.constant(10)
@@ -298,7 +298,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(10, result)
 
   def testLoop_2(self):
-    with self.test_session():
+    with self.cached_session():
       zero = constant_op.constant(0)
       one = constant_op.constant(1)
       n = constant_op.constant(10)
@@ -324,7 +324,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(10, result)
 
   def testDifferentFrame(self):
-    with self.test_session():
+    with self.cached_session():
       data = array_ops.placeholder(dtypes.float32, shape=[])
       enter_1 = gen_control_flow_ops.enter(data, "foo_1", False)
       enter_2 = gen_control_flow_ops.enter(data, "foo_2", False)
@@ -352,7 +352,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual([None], grad)
 
   def testFetchable(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = array_ops.placeholder(dtypes.float32)
       control_flow_ops.cond(
           constant_op.constant(True), lambda: x + 2, lambda: x + 0)
@@ -367,7 +367,7 @@ class ControlFlowTest(test.TestCase):
               sess.run(t, feed_dict={x: 3})
 
   def testFeedable(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       c = constant_op.constant(2)
       i0 = constant_op.constant(0)
       r = control_flow_ops.while_loop(lambda i: i < 1000,
@@ -387,7 +387,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113296180")
 
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant(10)
       indices = constant_op.constant(0)
       x = ops.IndexedSlices(values, indices)
@@ -405,7 +405,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113296161 (SparseTensors)")
 
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
       indices = constant_op.constant(
           [[0], [3]], dtype=dtypes.int64, name="indices")
@@ -425,7 +425,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       rv = resource_variable_ops.ResourceVariable(True)
       variables.global_variables_initializer().run()
       t = ops.convert_to_tensor(1.0)
@@ -441,7 +441,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113293074")
 
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant(10)
       i_32 = ops.convert_to_tensor(0, name="one", dtype=dtypes.int32)
       i_64 = ops.convert_to_tensor(0, name="one", dtype=dtypes.int64)
@@ -494,7 +494,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(10)
       r = control_flow_ops.cond(
           math_ops.less(1, 0), lambda: math_ops.add(x, 1),
@@ -506,7 +506,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(10)
       pred = math_ops.less(1, 2)
       fn1 = lambda: math_ops.add(x, 1)
@@ -521,7 +521,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113324949 (ref vars)")
 
-    with self.test_session():
+    with self.cached_session():
       v1 = variables.Variable(7)
       v2 = variables.Variable(7)
       v3 = variables.Variable(7)
@@ -542,7 +542,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(7, v3.eval())
 
   def testCond_5(self):
-    with self.test_session():
+    with self.cached_session():
       alive = constant_op.constant(True, name="alive")
       count = constant_op.constant(0, name="count")
 
@@ -559,7 +559,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       v1 = variables.Variable([7])
 
       age = constant_op.constant(3)
@@ -573,7 +573,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(np.array([7]), result)
 
   def testCond_7(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = constant_op.constant(10)
       y = constant_op.constant(200)
       pred = math_ops.less(1, 2)
@@ -586,7 +586,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       x = gen_state_ops.variable(
           shape=[1],
           dtype=dtypes.float32,
@@ -602,7 +602,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/79881896")
 
-    with self.test_session() as sess:
+    with self.cached_session():
       control_holder = array_ops.placeholder(dtypes.float32, shape=())
       a = constant_op.constant(3)
 
@@ -617,7 +617,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(5, r.eval())
 
   def testUninitializedRefIdentity(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v = gen_state_ops.variable(
           shape=[1],
           dtype=dtypes.float32,
@@ -689,11 +689,11 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
 
       grad = gradients_impl.gradients(r, [x])[0]
-      with self.test_session():
+      with self.cached_session():
         self.assertAllEqual(1.0, grad.eval())
 
   def testCondGrad_2(self):
-    with self.test_session():
+    with self.cached_session():
       c = array_ops.placeholder(dtypes.int32, shape=[])
       x = constant_op.constant(10.0)
       pred = math_ops.less(c, 2)
@@ -709,7 +709,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/110550782 (gradient w.r.t external variable)")
 
-    with self.test_session():
+    with self.cached_session():
       c = array_ops.placeholder(dtypes.int32, shape=[])
       ox = constant_op.constant(10.0)
       pred = math_ops.less(c, 2)
@@ -726,7 +726,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(30.0, r.eval(feed_dict={c: 3}))
 
   def testNestedCond_Simple(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(0., name="X")
       y = control_flow_ops.cond(
           constant_op.constant(True), lambda: x,
@@ -744,7 +744,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113327884")
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v1 = variables.Variable([1.0, 42.0])
       c = array_ops.placeholder(dtypes.int32, shape=[])
       pred = math_ops.less(c, 2)
@@ -768,7 +768,7 @@ class ControlFlowTest(test.TestCase):
 
   # Microbenchmark: 256,000 iterations/s.
   def testWhile_1(self):
-    with self.test_session():
+    with self.cached_session():
       n = constant_op.constant(0)
       c = lambda x: math_ops.less(x, 10000)
       b = lambda x: math_ops.add(x, 1)
@@ -776,7 +776,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10000, r.eval())
 
   def testWhileExternalControlDependencies(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(0.0)
       v.initializer.run()
       increment = v.assign_add(1.0)
@@ -791,7 +791,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(v.eval(), 1.0)
 
   def testWhileExternalControlDependenciesNoInput(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(0.0)
       v.initializer.run()
       increment = v.assign_add(1.0)
@@ -806,7 +806,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(v.eval(), 1.0)
 
   def testWhileWithRefs_1(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = variables.Variable(0)._ref()  # pylint: disable=protected-access
       i = constant_op.constant(0)
       c = lambda i, x: math_ops.less(i, 100)
@@ -830,19 +830,19 @@ class ControlFlowTest(test.TestCase):
     self.assertEqual(0, value_x)
 
   def testWhile_2(self):
-    with self.test_session():
+    with self.cached_session():
       s = constant_op.constant(0)
       r = isum(s)
       self.assertAllEqual(45, r.eval())
 
   def testWhileWithMaximumIterations(self):
-    with self.test_session():
+    with self.cached_session():
       s = constant_op.constant([1, 2, 3, 4, 5])
       r = isum(s, maximum_iterations=3)
       self.assertAllEqual([1 + 3, 2 + 3, 3 + 3, 4 + 3, 5 + 3], r.eval())
 
   def testWhileWithMaximumIterationsAndSingleArgument(self):
-    with self.test_session():
+    with self.cached_session():
       r = control_flow_ops.while_loop(
           lambda i: i < 3, lambda i: i + 1, [0], maximum_iterations=1)
       self.assertEqual(1, r.eval())
@@ -1019,7 +1019,7 @@ class ControlFlowTest(test.TestCase):
   # Have more than 10 parallel iterations and hence exercise k-bound
   # most of the time.
   def testWhile_3(self):
-    with self.test_session():
+    with self.cached_session():
 
       def compute(i, m, c, o):
         m, c = [math_ops.add(m, 1), math_ops.add(c, 1)]
@@ -1039,7 +1039,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(10100, result)
 
   def testWhile_4(self):
-    with self.test_session():
+    with self.cached_session():
 
       def compute(i, m, c, o):
         m, c = [array_ops.gather(x, i), array_ops.gather(x, i)]
@@ -1060,7 +1060,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(42, result)
 
   def testWhile_5(self):
-    with self.test_session():
+    with self.cached_session():
 
       def compute(i, c, o):
         c = array_ops.strided_slice(x, array_ops.expand_dims(i, 0),
@@ -1088,7 +1088,7 @@ class ControlFlowTest(test.TestCase):
         trace_level=config_pb2.RunOptions.FULL_TRACE)
     run_metadata = config_pb2.RunMetadata()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with ops.device("/cpu:0"):
         c = constant_op.constant(2)
         i0 = constant_op.constant(0)
@@ -1134,7 +1134,7 @@ class ControlFlowTest(test.TestCase):
     self._testWhile_Gpu_1(use_gpu=True)
 
   def testWhileShape(self):
-    with self.test_session():
+    with self.cached_session():
       i = constant_op.constant(0)
       m = array_ops.ones([2, 2])
       c = lambda i, j: math_ops.less(i, 2)
@@ -1151,7 +1151,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(np.ones((8, 8)), r.eval())
 
   def testWhileWithNonTensorInput_Scalar(self):
-    with self.test_session():
+    with self.cached_session():
       n = 0
       c = lambda x: x < 10000
       b = lambda x: x + 1
@@ -1159,7 +1159,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10000, r.eval())
 
   def testWhileWithNonTensorInput_Vector(self):
-    with self.test_session():
+    with self.cached_session():
       n = np.array([0])  # Note, [0] would not work here; that is a list
       c = lambda x: x[0] < 10000
       b = lambda x: array_ops.stack([x[0] + 1])
@@ -1167,7 +1167,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual([10000], r.eval())
 
   def testWhileShapeInference(self):
-    with self.test_session():
+    with self.cached_session():
       i = constant_op.constant(0)
       m = array_ops.ones([2, 2])
       c = lambda i, j: math_ops.less(i, 2)
@@ -1192,7 +1192,7 @@ class ControlFlowTest(test.TestCase):
         r = control_flow_ops.while_loop(c, b, [i, m])
 
   def testWhileShapeInferenceSparseTensor(self):
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
       indices = constant_op.constant(
           [[0], [3]], dtype=dtypes.int64, name="indices")
@@ -1223,7 +1223,7 @@ class ControlFlowTest(test.TestCase):
             [i.get_shape(), tensor_shape.TensorShape([5])])
 
   def testWhileShapeInferenceIndexedSlices(self):
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant([[2.0, 4.0], [3.0, 5.0]], name="values")
       indices = constant_op.constant([0, 3], name="indices")
       shape = constant_op.constant([10, 2], name="dense_shape")
@@ -1313,7 +1313,7 @@ class ControlFlowTest(test.TestCase):
     self._testNestedWhile_2(use_gpu=True)
 
   def testWhileWithControl_1(self):
-    with self.test_session():
+    with self.cached_session():
       n = constant_op.constant(0)
       r = constant_op.constant(0)
       condition = lambda n_, r_: math_ops.less(n_, 10)
@@ -1329,7 +1329,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(12, res[1].eval())
 
   def testWhileWithControl_2(self):
-    with self.test_session():
+    with self.cached_session():
       r = constant_op.constant(0)
       condition = lambda r_: math_ops.less(r_, 10)
 
@@ -1343,7 +1343,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(12, res.eval())
 
   def testWhileWithControl_3(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = array_ops.placeholder(dtypes.bool)
       c = constant_op.constant(1)
       x0 = constant_op.constant(0)
@@ -1352,7 +1352,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, sess.run(r, {b: True}))
 
   def testWhileWithControl_4(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = array_ops.placeholder(dtypes.bool)
       c = constant_op.constant(1)
       x0 = constant_op.constant(0)
@@ -1362,7 +1362,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, sess.run(r, {b: True}))
 
   def testWhileWithControl_5(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       b = array_ops.placeholder(dtypes.bool)
       c = constant_op.constant(1)
       x0 = constant_op.constant(0)
@@ -1380,7 +1380,7 @@ class ControlFlowTest(test.TestCase):
 
     # Ensure that no control edges by an outer control dependency context are
     # added to nodes inside cond/while contexts.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const_true = lambda: constant_op.constant(True)
       const_false = lambda: constant_op.constant(False)
       cond = lambda i: control_flow_ops.cond(i > 0, const_true, const_false)
@@ -1395,7 +1395,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113324949 (ref vars)")
 
-    with self.test_session():
+    with self.cached_session():
       v = variable_scope.get_variable(
           "v", [], initializer=init_ops.constant_initializer(2))
       i0 = constant_op.constant(0)
@@ -1420,7 +1420,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113294340 (enable while_v2)")
 
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(1)
 
       def false_branch():
@@ -1446,7 +1446,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       n = ops.convert_to_tensor(0, name="n")
       c = lambda x: math_ops.less(x, 10)
       b = lambda x: math_ops.add(x, 1)
@@ -1459,7 +1459,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       n = ops.convert_to_tensor(0)
       c = lambda x: math_ops.less(x, 10)
       b = lambda x: math_ops.add(x, 1)
@@ -1501,7 +1501,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113294377 (unknown shape)")
 
-    with self.test_session():
+    with self.cached_session():
       i = ops.convert_to_tensor(0, name="i")
       n = ops.convert_to_tensor(10, name="n")
       one = ops.convert_to_tensor(1, name="one")
@@ -1519,7 +1519,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113294377 (unknown shape)")
 
-    with self.test_session():
+    with self.cached_session():
       n = ops.convert_to_tensor(0, name="n")
       c = lambda x: math_ops.less(x, 10)
       b = lambda x: control_flow_ops.cond(constant_op.constant(True), lambda: math_ops.add(x, 1), lambda: n)
@@ -1530,7 +1530,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113294377 (unknown shape)")
 
-    with self.test_session():
+    with self.cached_session():
       n = ops.convert_to_tensor(0)
       c = lambda x: math_ops.less(x, 10)
       # pylint: disable=undefined-variable
@@ -1544,7 +1544,7 @@ class ControlFlowTest(test.TestCase):
 
   # NOTE: It is ok to have parallel_iterations > 1
   def testWhileUpdateVariable_1(self):
-    with self.test_session():
+    with self.cached_session():
       select = variables.Variable([3.0, 4.0, 5.0])
       n = constant_op.constant(0)
 
@@ -1566,7 +1566,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
   def testWhileUpdateVariable_2(self):
-    with self.test_session():
+    with self.cached_session():
       select1 = variables.Variable([3.0, 4.0, 5.0])
       select2 = variables.Variable([3.0, 4.0, 5.0])
       n = constant_op.constant(0)
@@ -1592,7 +1592,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result2)
 
   def testWhileUpdateVariable_3(self):
-    with self.test_session():
+    with self.cached_session():
       select = variables.Variable([3.0, 4.0, 5.0])
       n = constant_op.constant(0)
 
@@ -1614,7 +1614,7 @@ class ControlFlowTest(test.TestCase):
 
   # b/24814703
   def testWhileUpdateVariable_4(self):
-    with self.test_session():
+    with self.cached_session():
       var_a = variables.Variable(0, name="a")
       var_b = variables.Variable(0, name="b")
       variables.global_variables_initializer().run()
@@ -1642,7 +1642,7 @@ class ControlFlowTest(test.TestCase):
 
   # b/24736492
   def testWhileUpdateVariable_5(self):
-    with self.test_session():
+    with self.cached_session():
       # Create some variables.
       var_a = variables.Variable(0, name="a")
       var_b = variables.Variable(0, name="b")
@@ -1672,7 +1672,7 @@ class ControlFlowTest(test.TestCase):
 
   # b/24814668
   def testWhileUpdateVariable_6(self):
-    with self.test_session():
+    with self.cached_session():
       # Create some variables.
       var_a = variables.Variable(0, name="a")
       var_b = variables.Variable(0, name="b")
@@ -1701,7 +1701,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, var_a.eval())
 
   def testWhileQueue_1(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(-1, dtypes.int32)
       i = constant_op.constant(0)
 
@@ -1719,7 +1719,7 @@ class ControlFlowTest(test.TestCase):
         self.assertEqual([i], q.dequeue().eval())
 
   def testWhileStack_1(self):
-    with self.test_session():
+    with self.cached_session():
       s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo")
       i = constant_op.constant(0)
 
@@ -1791,7 +1791,7 @@ class ControlFlowTest(test.TestCase):
     self._testWhileGrad_ColocateGradients(colocate=True)
 
   def testWhileGrad_Square(self):
-    with self.test_session():
+    with self.cached_session():
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
       b = math_ops.square
@@ -1802,7 +1802,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1024.0, r.eval())
 
   def testWhileGrad_Shape(self):
-    with self.test_session():
+    with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=[None])
       v = constant_op.constant([2.0], name="v")
       n = constant_op.constant(0, name="n")
@@ -1819,7 +1819,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose([810.0, 2560.0], r.eval(feed_dict={x: [3.0, 4.0]}))
 
   def testWhileGrad_BaseShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = array_ops.placeholder(dtypes.float32, [None])
       v0 = constant_op.constant([2.0, 2.0], name="v")
       c = lambda v: constant_op.constant(False)
@@ -1831,7 +1831,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose([2.0, 4.0], sess.run(r, feed_dict={x: [1.0, 2.0]}))
 
   def testWhileGrad_MultipleUses(self):
-    with self.test_session():
+    with self.cached_session():
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
       b = math_ops.square
@@ -1842,7 +1842,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(524288.0, r.eval())
 
   def testWhileGrad_LoopAdd(self):
-    with self.test_session():
+    with self.cached_session():
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
       b = math_ops.square
@@ -1901,7 +1901,7 @@ class ControlFlowTest(test.TestCase):
     self._testNestedWhileCondWhileGrad(use_gpu=True)
 
   def testWhileGrad_Variable(self):
-    with self.test_session():
+    with self.cached_session():
       a = variables.Variable(3.0)
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
@@ -1916,7 +1916,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/110550782 (gradient w.r.t external variable)")
 
-    with self.test_session():
+    with self.cached_session():
       n = ops.convert_to_tensor(1.0, name="n")
       x = array_ops.placeholder(dtypes.float32, shape=None)
       c = lambda n: math_ops.less(n, 10.0)
@@ -1931,7 +1931,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(9.0, r.eval(feed_dict={x: 1.0}))
 
   def testGradInWhileWrtInitialLoopVal(self):
-    with self.test_session():
+    with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=(), name="x")
       y = x + 1
 
@@ -1948,7 +1948,7 @@ class ControlFlowTest(test.TestCase):
         control_flow_ops.while_loop(lambda i, x: i < 3, body, [0, y])
 
   def testWhileGradInWhile(self):
-    with self.test_session():
+    with self.cached_session():
       n = ops.convert_to_tensor(1.0, name="n")
       x = array_ops.placeholder(dtypes.float32, shape=None)
       c = lambda n: math_ops.less(n, 10.0)
@@ -1978,13 +1978,13 @@ class ControlFlowTest(test.TestCase):
 
     i, x = control_flow_ops.while_loop(lambda i, x: i < 3, outer_body, [0, 0.0])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       i_val, x_val = sess.run([i, x])
       self.assertEqual(i_val, 3)
       self.assertAllClose(x_val, 1.0)
 
   def testWhile_NestedInput(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       named = collections.namedtuple("named", ("a", "b"))
       loop_vars = [
           named(a=constant_op.constant(0.0), b=constant_op.constant(1.0)),
@@ -2011,7 +2011,7 @@ class ControlFlowTest(test.TestCase):
                        sess.run(r_flattened))
 
   def testWhile_NestedBadArityFails(self):
-    with self.test_session():
+    with self.cached_session():
       named = collections.namedtuple("named", ("a", "b"))
       loop_vars = [
           named(a=constant_op.constant(0.0), b=constant_op.constant(1.0)),
@@ -2027,7 +2027,7 @@ class ControlFlowTest(test.TestCase):
         control_flow_ops.while_loop(c, b, loop_vars)
 
   def testWhileGrad_ys_xs(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(3.0, name="x")
       y = constant_op.constant(2.0, name="y")
 
@@ -2050,7 +2050,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(120.0, r[0].eval())
 
   def testWhileGrad_Dependency(self):
-    with self.test_session():
+    with self.cached_session():
       i = constant_op.constant(0, name="i")
       x = constant_op.constant(2.0, name="x")
 
@@ -2069,7 +2069,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1024.0, r[0].eval())
 
   def testWhileGrad_NoGradient(self):
-    with self.test_session():
+    with self.cached_session():
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
       b = math_ops.square
@@ -2079,7 +2079,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1.0, r[0].eval())
 
   def testWhileGrad_NoDependency(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variable = variables.Variable(array_ops.ones([2, 3]))
       duration = array_ops.zeros([], dtype=dtypes.int32)
 
@@ -2099,7 +2099,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.ones([2, 3]), sess.run(grad[0]))
 
   def testWhileGrad_Const(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       c0 = constant_op.constant(0.0, name="c0")
       c1 = constant_op.constant(1.0, name="c1")
       duration = constant_op.constant(0, name="t")
@@ -2118,7 +2118,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(0.0, sess.run(grad[0]))
 
   def testWhileGrad_SerialTwoLoops(self):
-    with self.test_session():
+    with self.cached_session():
       i = constant_op.constant(0, name="i")
       x = constant_op.constant(2.0, name="x")
 
@@ -2136,7 +2136,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1024.0, r[0].eval())
 
   def testWhileGrad_ParallelTwoLoops(self):
-    with self.test_session():
+    with self.cached_session():
       i = constant_op.constant(0, name="i")
       x = constant_op.constant(2.0, name="x")
 
@@ -2155,7 +2155,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(64.0, r[0].eval())
 
   def testWhileGrad_OneOutputWithControlDependencyOnSecond(self):
-    with self.test_session():
+    with self.cached_session():
       i = constant_op.constant(0, name="i")
       x = constant_op.constant(1.0, name="x")
       y = constant_op.constant(1.0, name="y")
@@ -2196,7 +2196,7 @@ class ControlFlowTest(test.TestCase):
     self._testNestedWhileGrad_Simple(use_gpu=True)
 
   def testNestedWhileGrad_SerialInner(self):
-    with self.test_session():
+    with self.cached_session():
       v = constant_op.constant(1.0)
 
       def inner_loop1(s):
@@ -2219,7 +2219,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(256.0, r.eval())
 
   def testNestedWhileGrad_ParallelInner(self):
-    with self.test_session():
+    with self.cached_session():
       v = constant_op.constant(1.0)
 
       def inner_loop1(s):
@@ -2244,7 +2244,7 @@ class ControlFlowTest(test.TestCase):
   def testNestedWhileGrad_ParallelIterations(self):
     # Make sure the stack pushes and pops of an inner loop are executed in
     # the sequential order of the iterations of its outer loop.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
 
       def inner_loop(t):
         fn = lambda n: n + math_ops.square(var)
@@ -2287,7 +2287,7 @@ class ControlFlowTest(test.TestCase):
     self._testWhileCondGrad_Simple(use_gpu=True)
 
   def testWhileCondGrad_UnknownShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v = array_ops.placeholder(dtypes.float32)
       n = ops.convert_to_tensor(100.0, name="n")
       one = ops.convert_to_tensor(1.0, name="one")
@@ -2304,7 +2304,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1024.0, r)
 
   def testWhileGrad_Concat(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = variable_scope.get_variable("x", initializer=[[1., 2.]])
       i0 = constant_op.constant(0)
       h0 = array_ops.zeros([0, 2])
@@ -2327,7 +2327,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose([[0.98000002, 1.98000002]], sess.run(x))
 
   def testWhileWithRefsWithGradients_1(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = variables.Variable(0.)._ref()  # pylint: disable=protected-access
       i = constant_op.constant(0)
       c = lambda i, x: math_ops.less(i, 10)
@@ -2355,7 +2355,7 @@ class ControlFlowTest(test.TestCase):
     self.assertEqual(73, value_x_grad)
 
   def testWhileGrad_IndexedSlices(self):
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
       indices = constant_op.constant([0, 3], name="indices")
       shape = constant_op.constant([10], name="dense_shape")
@@ -2376,7 +2376,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([1024.0, 1024.0]), r.eval())
 
   def testWhileGrad_SparseTensor(self):
-    with self.test_session():
+    with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
       indices = constant_op.constant(
           [[0], [3]], dtype=dtypes.int64, name="indices")
@@ -2398,7 +2398,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([1024.0, 1024.0]), r.eval())
 
   def testCallGradInLoop(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       i0 = constant_op.constant(0)
       params = constant_op.constant(5.0)
       params_1 = math_ops.square(params)
@@ -2417,7 +2417,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(600.0, sess.run(output_grad)[1])
 
   def testWhileAndTensorArray(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       param = constant_op.constant(2.0)
       n0 = constant_op.constant(0)
       y0 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="elems")
@@ -2436,7 +2436,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(107520.0, sess.run(r))
 
   def testWhileGrad_StopGrad(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(3.0, name="x")
       y = constant_op.constant(2.0, name="y")
 
@@ -2479,7 +2479,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(32.0, r.eval())
 
   def testWhileGrad_StopGradInside(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(3.0, name="x")
       y = constant_op.constant(2.0, name="y")
 
@@ -2498,7 +2498,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(156.0, r.eval())
 
   def testWhileGrad_StopGradInsideNoShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = array_ops.placeholder(dtypes.float32)
       y = array_ops.placeholder(dtypes.float32)
 
@@ -2534,7 +2534,7 @@ class ControlFlowTest(test.TestCase):
     gradients_impl.gradients(grad_theta_stopped, theta)
 
   def testStopGradOnWhileGrad(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(2.0, name="x")
       y = constant_op.constant(2.0, name="y")
 
@@ -2562,7 +2562,7 @@ class ControlFlowTest(test.TestCase):
     _, y = control_flow_ops.while_loop(cond, body, (math_ops.argmin(q), 0.))
     dy_dq, = gradients_impl.gradients(y, q)
     self.assertIsNotNone(dy_dq)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(q.initializer)
       self.assertAllClose([0., 0.], sess.run(dy_dq))
 
@@ -2579,7 +2579,7 @@ class ControlFlowTest(test.TestCase):
     _, y = control_flow_ops.while_loop(cond, body, (math_ops.argmin(q), 0.))
     dy_dq, = gradients_impl.gradients(y, q)
     self.assertIsNotNone(dy_dq)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(q.initializer)
       self.assertAllClose([1., 1.], sess.run(dy_dq))
 
@@ -2607,7 +2607,7 @@ class ControlFlowTest(test.TestCase):
     self.assertIsNotNone(grad)
 
   def testStopGradMultiFlows(self):
-    with self.test_session():
+    with self.cached_session():
 
       def body(i, y, r):
         x = variable_scope.get_variable(
@@ -2636,7 +2636,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       c = array_ops.placeholder(dtypes.int32, shape=[])
       one = ops.convert_to_tensor(1, name="one")
       two = ops.convert_to_tensor(2, name="two")
@@ -2654,7 +2654,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/111124878 (don't return tuple)")
 
-    with self.test_session():
+    with self.cached_session():
       x = ops.convert_to_tensor([-2.0, 2.0], name="x")
       d = array_ops.placeholder(dtypes.int32, shape=[])
 
@@ -2672,7 +2672,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/112477618 (Operation returned from cond)")
 
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(1)
       y = constant_op.constant(2)
       z = constant_op.constant(3)
@@ -2727,7 +2727,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/112477618 (Operation returned from cond)")
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v0 = variables.Variable(-1)
       v1 = variables.Variable(-1)
       v2 = variables.Variable(-1)
@@ -2765,7 +2765,7 @@ class ControlFlowTest(test.TestCase):
     if control_flow_ops.ENABLE_COND_V2:
       return unittest.skip("b/113324949 (ref vars)")
 
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(0)
       c = ops.convert_to_tensor(0)
       one = ops.convert_to_tensor(1)
@@ -2793,7 +2793,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(2, v.eval())
 
   def testWithOpsDependencies(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v = variables.Variable(0.0)
       c = constant_op.constant(10)
 
@@ -2816,7 +2816,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllClose(0.0, real_v_val)
 
   def testWithTensorDependencies(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(0.0)
       c1 = constant_op.constant(10)
       c2 = constant_op.constant(20)
@@ -2842,7 +2842,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(0.0, v.eval())
 
   def testWithIndexedSlicesDependencies(self):
-    with self.test_session():
+    with self.cached_session():
       v = variables.Variable(
           np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(np.float32))
       v_at_1 = ops.IndexedSlices(v, constant_op.constant([1]))
@@ -2886,7 +2886,7 @@ class ControlFlowTest(test.TestCase):
         self.assertEqual([b"loc:@vdef"], with_vdef_dep.op.colocation_groups())
 
   def testGroup(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v1 = variables.Variable([0.0])
       v2 = variables.Variable([1.0])
 
@@ -2997,7 +2997,7 @@ class ControlFlowTest(test.TestCase):
     self.assertEqual(None, s.get_shape())
 
   def testRunLoopTensor(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tensor_list = []
 
       def condition(t):
@@ -3021,7 +3021,7 @@ class ControlFlowTest(test.TestCase):
     def func(x):
       return np.square(x)
 
-    with self.test_session():
+    with self.cached_session():
       r = control_flow_ops.while_loop(
           lambda i, v: i < 4,
           lambda i, v: [i + 1, script_ops.py_func(func, [v], [dtypes.float32])[0]],
@@ -3035,7 +3035,7 @@ class ControlFlowTest(test.TestCase):
     def func(x):
       return math_ops.square(math_ops.square(x))
 
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(2.0, dtypes.float32)
       r = control_flow_ops.while_loop(
           lambda i, v: i < 2, lambda i, v: [i + 1, func(v)],
@@ -3174,7 +3174,7 @@ class TupleTest(test.TestCase):
 
   def testTensors(self):
     for v1_first in [True, False]:
-      with self.test_session():
+      with self.cached_session():
         v1 = variables.Variable([1.0])
         add1 = math_ops.add(
             control_flow_ops.with_dependencies([v1.initializer], v1._ref()),  # pylint: disable=protected-access
@@ -3204,7 +3204,7 @@ class TupleTest(test.TestCase):
 
   def testIndexedSlices(self):
     for v1_first in [True, False]:
-      with self.test_session():
+      with self.cached_session():
         v1 = variables.Variable(
             np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(
                 np.float32))
@@ -3243,7 +3243,7 @@ class TupleTest(test.TestCase):
                               v1.eval())
 
   def testAcceptTensorsAsControlInputs(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(0)
       assign = state_ops.assign(var, 1)
       t, = control_flow_ops.tuple(
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index fcba456004..2d6d8a8051 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -53,7 +53,7 @@ class Conv1DTest(test.TestCase):
             self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4])
 
   def testConv1DTranspose(self):
-    with self.test_session():
+    with self.cached_session():
       stride = 2
 
       # Input, output: [batch, width, depth]
diff --git a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
index be299beee4..644a151710 100644
--- a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
+++ b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
@@ -32,7 +32,7 @@ from tensorflow.python.platform import test
 class Conv2DBackpropFilterGradTest(test.TestCase):
 
   def testGradient(self):
-    with self.test_session():
+    with self.cached_session():
       for padding in ["SAME", "VALID"]:
         for stride in [1, 2]:
           np.random.seed(1)
diff --git a/tensorflow/python/kernel_tests/conv2d_transpose_test.py b/tensorflow/python/kernel_tests/conv2d_transpose_test.py
index 27804be65c..cbdd2c5991 100644
--- a/tensorflow/python/kernel_tests/conv2d_transpose_test.py
+++ b/tensorflow/python/kernel_tests/conv2d_transpose_test.py
@@ -37,7 +37,7 @@ from tensorflow.python.platform import test
 class Conv2DTransposeTest(test.TestCase):
 
   def testConv2DTransposeSingleStride(self):
-    with self.test_session():
+    with self.cached_session():
       strides = [1, 1, 1, 1]
 
       # Input, output: [batch, height, width, depth]
@@ -75,7 +75,7 @@ class Conv2DTransposeTest(test.TestCase):
               self.assertAllClose(target, value[n, h, w, k])
 
   def testConv2DTransposeSame(self):
-    with self.test_session():
+    with self.cached_session():
       strides = [1, 2, 2, 1]
 
       # Input, output: [batch, height, width, depth]
@@ -108,7 +108,7 @@ class Conv2DTransposeTest(test.TestCase):
               self.assertAllClose(target, value[n, h, w, k])
 
   def testConv2DTransposeValid(self):
-    with self.test_session():
+    with self.cached_session():
       strides = [1, 2, 2, 1]
 
       # Input, output: [batch, height, width, depth]
@@ -163,7 +163,7 @@ class Conv2DTransposeTest(test.TestCase):
     np.random.seed(1)  # Make it reproducible.
     x_val = np.random.random_sample(x_shape).astype(np.float64)
     f_val = np.random.random_sample(f_shape).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(x_val, name="x", dtype=dtypes.float32)
       f = constant_op.constant(f_val, name="f", dtype=dtypes.float32)
       output = nn_ops.conv2d_transpose(
diff --git a/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py b/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py
index 85264ef876..89b64068ac 100644
--- a/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py
+++ b/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py
@@ -32,7 +32,7 @@ from tensorflow.python.platform import test
 class Conv3DBackpropFilterV2GradTest(test.TestCase):
 
   def testGradient(self):
-    with self.test_session():
+    with self.cached_session():
       for padding in ["SAME", "VALID"]:
         for stride in [1, 2]:
           np.random.seed(1)
diff --git a/tensorflow/python/kernel_tests/conv3d_transpose_test.py b/tensorflow/python/kernel_tests/conv3d_transpose_test.py
index 289ae29fce..2527b83769 100644
--- a/tensorflow/python/kernel_tests/conv3d_transpose_test.py
+++ b/tensorflow/python/kernel_tests/conv3d_transpose_test.py
@@ -32,7 +32,7 @@ from tensorflow.python.platform import test
 class Conv3DTransposeTest(test.TestCase):
 
   def testConv3DTransposeSingleStride(self):
-    with self.test_session():
+    with self.cached_session():
       strides = [1, 1, 1, 1, 1]
 
       # Input, output: [batch, depth, height, width, channel]
@@ -82,7 +82,7 @@ class Conv3DTransposeTest(test.TestCase):
                 self.assertAllClose(target, value[n, d, h, w, k])
 
   def testConv3DTransposeSame(self):
-    with self.test_session():
+    with self.cached_session():
       strides = [1, 2, 2, 2, 1]
 
       # Input, output: [batch, depth, height, width, depth]
@@ -134,7 +134,7 @@ class Conv3DTransposeTest(test.TestCase):
   def testConv3DTransposeOutputShapeType(self):
     # Test case for GitHub issue 18887
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session():
+      with self.cached_session():
         x_shape = [2, 5, 6, 4, 3]
         y_shape = [2, 5, 6, 4, 2]
         f_shape = [3, 3, 3, 2, 3]
@@ -149,7 +149,7 @@ class Conv3DTransposeTest(test.TestCase):
         output.eval()
 
   def testConv3DTransposeValid(self):
-    with self.test_session():
+    with self.cached_session():
       strides = [1, 2, 2, 2, 1]
 
       # Input, output: [batch, depth, height, width, depth]
@@ -209,7 +209,7 @@ class Conv3DTransposeTest(test.TestCase):
     np.random.seed(1)  # Make it reproducible.
     x_val = np.random.random_sample(x_shape).astype(np.float64)
     f_val = np.random.random_sample(f_shape).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(x_val, name="x", dtype=dtypes.float32)
       f = constant_op.constant(f_val, name="f", dtype=dtypes.float32)
       output = nn_ops.conv3d_transpose(
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index 0b531125f3..6794464e3a 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -108,7 +108,7 @@ class Conv3DTest(test.TestCase):
             use_gpu=use_gpu)
         results.append(result)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         values = sess.run(results)
         for value in values:
           print("expected = ", expected)
@@ -183,7 +183,7 @@ class Conv3DTest(test.TestCase):
         expected_results.append(expected)
         computed_results.append(computed)
         tolerance = 1e-2 if use_gpu else 1e-5
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           expected_values = sess.run(expected_results)
           computed_values = sess.run(computed_results)
           for e_value, c_value in zip(expected_values, computed_values):
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 00de94f004..ea611497d9 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -1474,7 +1474,7 @@ class Conv2DTest(test.TestCase):
           padding="SAME")
 
   def testOpEdgeCases(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Illegal strides.
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "strides in the batch and depth"):
@@ -1539,7 +1539,7 @@ class DepthwiseConv2DTest(test.TestCase):
     # numbers from 1.
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t1.set_shape(tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
diff --git a/tensorflow/python/kernel_tests/cross_grad_test.py b/tensorflow/python/kernel_tests/cross_grad_test.py
index f040ac6055..0bd4006d6a 100644
--- a/tensorflow/python/kernel_tests/cross_grad_test.py
+++ b/tensorflow/python/kernel_tests/cross_grad_test.py
@@ -27,7 +27,7 @@ from tensorflow.python.platform import test
 class CrossOpTest(test.TestCase):
 
   def testGradientRandomValues(self):
-    with self.test_session():
+    with self.cached_session():
       us = [2, 3]
       u = array_ops.reshape(
           [0.854, -0.616, 0.767, 0.725, -0.927, 0.159], shape=us)
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index b61232cded..00d7f956c2 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -541,7 +541,7 @@ class UnaryOpTest(test.TestCase):
       return x
 
     for op, real_range in op_range:
-      with self.test_session():
+      with self.cached_session():
         for dtype, tol in dtype_tols:
           x = constant_op.constant(rand(dtype))
           y = constant_op.constant(rand(dtype))
@@ -604,7 +604,7 @@ class BinaryOpTest(test.TestCase):
                         numeric_gradient_type=None):
     z = np_func(x, y)
     zs = list(z.shape)
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       if x.dtype in (np.float32, np.float64):
@@ -634,7 +634,7 @@ class BinaryOpTest(test.TestCase):
                         numeric_gradient_type=None):
     z = np_func(x, y)
     zs = list(z.shape)
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       if x.dtype in (np.float32, np.float64):
@@ -720,7 +720,7 @@ class BinaryOpTest(test.TestCase):
   def testFloatDifferentShapes(self):
     x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.float32)
     y = np.array([1, 2]).reshape(2, 1).astype(np.float32)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       s = math_ops.reduce_sum(inx * iny)
@@ -736,7 +736,7 @@ class BinaryOpTest(test.TestCase):
     y = np.array([1, 2]).reshape(2, 1).astype(np.int32)
     var_x = variables.Variable(x)
     var_y = variables.Variable(y)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run([var_x.initializer, var_y.initializer])
       left_result = (var_x * y).eval()
       right_result = (x * var_y).eval()
@@ -1168,7 +1168,7 @@ class BinaryOpTest(test.TestCase):
             ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]]))
 
   def testZeroPowGrad(self):
-    with self.test_session():
+    with self.cached_session():
       for dtype in (np.float16, np.float32, np.float64, np.complex64,
                     np.complex128):
         x = constant_op.constant(0.0, dtype=dtype)
@@ -1178,7 +1178,7 @@ class BinaryOpTest(test.TestCase):
         self.assertEqual(error, 0)
 
   def testComplexPowGrad(self):
-    with self.test_session():
+    with self.cached_session():
       for dtype in np.complex64, np.complex128:
         for base in 2.0, -2.0:
           x = constant_op.constant(base, dtype=dtype)
@@ -1470,7 +1470,7 @@ class SelectOpTest(test.TestCase):
     self.assertShapeEqual(np_ans, out)
 
   def _compareGradientX(self, c, x, y, numeric_gradient_type=None):
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = array_ops.where(c, inx, iny)
@@ -1494,7 +1494,7 @@ class SelectOpTest(test.TestCase):
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
 
   def _compareGradientY(self, c, x, y, numeric_gradient_type=None):
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = array_ops.where(c, inx, iny)
@@ -1582,7 +1582,7 @@ class SelectOpTest(test.TestCase):
     x = np.random.rand(1, 3, 0) * 100
     y = np.random.rand(1, 3, 0) * 100
     z_expected = np.zeros((1, 3, 0), dtype=np.float32)
-    with self.test_session():
+    with self.cached_session():
       xt = x.astype(np.float32)
       yt = y.astype(np.float32)
       z = array_ops.where(c, xt, yt).eval()
@@ -1590,7 +1590,7 @@ class SelectOpTest(test.TestCase):
 
   def testNan(self):
     """Verify that nans don't propagate where they shouldn't."""
-    with self.test_session():
+    with self.cached_session():
       for c in False, True:
         for a in 7.0, np.nan:
           for b in 5.0, np.nan:
@@ -1614,7 +1614,7 @@ class BatchSelectOpTest(test.TestCase):
     self.assertShapeEqual(np_ans, out)
 
   def _compareGradientX(self, c, x, y, numeric_gradient_type=None):
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = array_ops.where(c, inx, iny)
@@ -1638,7 +1638,7 @@ class BatchSelectOpTest(test.TestCase):
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
 
   def _compareGradientY(self, c, x, y, numeric_gradient_type=None):
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = array_ops.where(c, inx, iny)
@@ -1745,7 +1745,7 @@ class MinMaxOpTest(test.TestCase):
       self._compare(x.astype(t), t(y), use_gpu=True)
 
   def _compareGradientX(self, func, x, y):
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = func(inx, iny)
@@ -1760,7 +1760,7 @@ class MinMaxOpTest(test.TestCase):
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
 
   def _compareGradientY(self, func, x, y):
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = func(inx, iny)
@@ -1932,7 +1932,7 @@ class RoundingTest(test.TestCase):
 
   def _compare_values(self, x, y=None):
     y = np.rint(x) if y is None else np.asarray(y)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tf_rint = math_ops.rint(x)
       np_rint = sess.run(tf_rint)
     self.assertAllEqual(y, np_rint)
@@ -1940,7 +1940,7 @@ class RoundingTest(test.TestCase):
 
   def _compare(self, x):
     np_floor, np_ceil = np.floor(x), np.ceil(x)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inx = ops.convert_to_tensor(x)
       ofloor, oceil = math_ops.floor(inx), math_ops.ceil(inx)
       tf_floor, tf_ceil = sess.run([ofloor, oceil])
@@ -2099,7 +2099,7 @@ class ComplexMakeRealImagTest(test.TestCase):
     # computes the squared sum. This is obviously the same as sum(real
     # * real) + sum(imag * imag). We just want to make sure the
     # gradient function is checked.
-    with self.test_session():
+    with self.cached_session():
       inx = ops.convert_to_tensor(x)
       real, imag = array_ops.split(value=inx, num_or_size_splits=2, axis=1)
       real, imag = array_ops.reshape(real, [-1]), array_ops.reshape(imag, [-1])
@@ -2116,7 +2116,7 @@ class ComplexMakeRealImagTest(test.TestCase):
   def _compareBroadcastGradient(self, x):
     x_ = ops.convert_to_tensor(x)
     epsilon = 1e-3
-    with self.test_session():
+    with self.cached_session():
       for args in [(x_, 0.), (0., x_)]:
         z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args)))
         jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -2136,7 +2136,7 @@ class ComplexMakeRealImagTest(test.TestCase):
     # data is a float matrix of shape [n, 4].  data[:, 0], data[:, 1],
     # data[:, 2], data[:, 3] are real parts of x, imaginary parts of
     # x, real parts of y and imaginary parts of y.
-    with self.test_session():
+    with self.cached_session():
       inp = ops.convert_to_tensor(data)
       xr, xi, yr, yi = array_ops.split(value=inp, num_or_size_splits=4, axis=1)
 
@@ -2166,7 +2166,7 @@ class ComplexMakeRealImagTest(test.TestCase):
 class AccumulateTest(test.TestCase):
 
   def testSimple(self):
-    with self.test_session():
+    with self.cached_session():
       random_arrays = [
           np.random.rand(16, 16, 16, 16).astype(np.float32) for _ in range(20)
       ]
@@ -2181,20 +2181,20 @@ class AccumulateTest(test.TestCase):
       self.assertAllClose(np_val, tf_val.eval())
 
   def testZeroArgs(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         tf_val = math_ops.accumulate_n([])
         tf_val.eval()
 
   def testWrongShape(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         a = variables.Variable(0.2)
         b = variables.Variable(0.1)
         math_ops.accumulate_n([a, b], shape=[2, 2])  # Should be shape=[]
 
   def testWrongType(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(TypeError):
         a = variables.Variable(0.2, dtype=np.float32)
         b = variables.Variable(0.1, dtype=np.float32)
@@ -2202,7 +2202,7 @@ class AccumulateTest(test.TestCase):
 
   def testWrongTypeOneInput(self):
     # Scenario that used to trigger a bug, even when testWrongType() worked
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(TypeError):
         a = variables.Variable(0.2, dtype=np.float32)
         math_ops.accumulate_n([a], tensor_dtype=np.int32)
@@ -2214,7 +2214,7 @@ class PolyvalTest(test.TestCase):
     x = np.random.rand(2, 2).astype(dtype)
     coeffs = [np.random.rand(2, 2).astype(dtype) for _ in range(degree + 1)]
     np_val = np.polyval(coeffs, x)
-    with self.test_session():
+    with self.cached_session():
       tf_val = math_ops.polyval(coeffs, x)
       self.assertAllClose(np_val, tf_val.eval())
 
@@ -2237,7 +2237,7 @@ class PolyvalTest(test.TestCase):
             for _ in range(degree + 1)
         ]
         np_val = np.polyval(coeffs, x)
-        with self.test_session():
+        with self.cached_session():
           tf_val = math_ops.polyval(coeffs, x)
           self.assertAllClose(np_val, tf_val.eval())
 
@@ -2245,7 +2245,7 @@ class PolyvalTest(test.TestCase):
     x = np.random.rand(2, 2).astype(np.float32)
     coeffs = []
     np_val = np.polyval(coeffs, x)
-    with self.test_session():
+    with self.cached_session():
       tf_val = math_ops.polyval(coeffs, x)
       self.assertAllClose(np_val, tf_val.eval())
 
diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
index 35f8f76991..eebaffbe13 100644
--- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
@@ -60,7 +60,7 @@ class DecodeBmpOpTest(test.TestCase):
     img_in = constant_op.constant(byte_string, dtype=dtypes.string)
     decode = array_ops.squeeze(image_ops.decode_bmp(img_in))
 
-    with self.test_session():
+    with self.cached_session():
       decoded = decode.eval()
       self.assertAllEqual(decoded, img_bytes)
 
@@ -135,7 +135,7 @@ class DecodeBmpOpTest(test.TestCase):
     img_in = constant_op.constant(byte_string, dtype=dtypes.string)
     decode = image_ops.decode_bmp(img_in)
 
-    with self.test_session():
+    with self.cached_session():
       decoded = decode.eval()
       self.assertAllEqual(decoded, img_bytes)
 
diff --git a/tensorflow/python/kernel_tests/decode_compressed_op_test.py b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
index c9bda58ca7..1cc1c7da30 100644
--- a/tensorflow/python/kernel_tests/decode_compressed_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
@@ -44,7 +44,7 @@ class DecodeCompressedOpTest(test.TestCase):
 
   def testDecompress(self):
     for compression_type in ["ZLIB", "GZIP", ""]:
-      with self.test_session():
+      with self.cached_session():
         in_bytes = array_ops.placeholder(dtypes.string, shape=[2])
         decompressed = parsing_ops.decode_compressed(
             in_bytes, compression_type=compression_type)
@@ -57,7 +57,7 @@ class DecodeCompressedOpTest(test.TestCase):
 
   def testDecompressWithRaw(self):
     for compression_type in ["ZLIB", "GZIP", ""]:
-      with self.test_session():
+      with self.cached_session():
         in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
         decompressed = parsing_ops.decode_compressed(
             in_bytes, compression_type=compression_type)
diff --git a/tensorflow/python/kernel_tests/decode_csv_op_test.py b/tensorflow/python/kernel_tests/decode_csv_op_test.py
index 4f49d72676..40b17a11f8 100644
--- a/tensorflow/python/kernel_tests/decode_csv_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_csv_op_test.py
@@ -27,7 +27,7 @@ from tensorflow.python.platform import test
 class DecodeCSVOpTest(test.TestCase):
 
   def _test(self, args, expected_out=None, expected_err_re=None):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       decode = parsing_ops.decode_csv(**args)
 
       if expected_err_re is None:
diff --git a/tensorflow/python/kernel_tests/decode_image_op_test.py b/tensorflow/python/kernel_tests/decode_image_op_test.py
index 58280432d6..7f73fbaa84 100644
--- a/tensorflow/python/kernel_tests/decode_image_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_image_op_test.py
@@ -111,7 +111,7 @@ class DecodeImageOpTest(test.TestCase):
   def testInvalidBytes(self):
     image_bytes = b"ThisIsNotAnImage!"
     decode = image_ops.decode_image(image_bytes)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         decode.eval()
 
diff --git a/tensorflow/python/kernel_tests/decode_png_op_test.py b/tensorflow/python/kernel_tests/decode_png_op_test.py
index d2e03938ee..8f36343667 100644
--- a/tensorflow/python/kernel_tests/decode_png_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_png_op_test.py
@@ -46,7 +46,7 @@ class DecodePngOpTest(test.TestCase):
         image_ops.decode_png(
             img_in, dtype=dtypes.uint16))
 
-    with self.test_session():
+    with self.cached_session():
       decoded = decode.eval()
       self.assertAllEqual(decoded, img_bytes)
 
diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py
index 122a9ed469..dc01f4196a 100644
--- a/tensorflow/python/kernel_tests/decode_raw_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py
@@ -29,7 +29,7 @@ from tensorflow.python.platform import test
 class DecodeRawOpTest(test.TestCase):
 
   def testToUint8(self):
-    with self.test_session():
+    with self.cached_session():
       in_bytes = array_ops.placeholder(dtypes.string, shape=[2])
       decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.uint8)
       self.assertEqual([2, None], decode.get_shape().as_list())
@@ -47,7 +47,7 @@ class DecodeRawOpTest(test.TestCase):
         decode.eval(feed_dict={in_bytes: ["short", "longer"]})
 
   def testToInt16(self):
-    with self.test_session():
+    with self.cached_session():
       in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
       decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.int16)
       self.assertEqual([None, None], decode.get_shape().as_list())
@@ -62,7 +62,7 @@ class DecodeRawOpTest(test.TestCase):
         decode.eval(feed_dict={in_bytes: ["123", "456"]})
 
   def testEndianness(self):
-    with self.test_session():
+    with self.cached_session():
       in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
       decode_le = parsing_ops.decode_raw(
           in_bytes, out_type=dtypes.int32, little_endian=True)
@@ -74,7 +74,7 @@ class DecodeRawOpTest(test.TestCase):
       self.assertAllEqual([[0x01020304]], result)
 
   def testToFloat16(self):
-    with self.test_session():
+    with self.cached_session():
       in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
       decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16)
       self.assertEqual([None, None], decode.get_shape().as_list())
@@ -85,7 +85,7 @@ class DecodeRawOpTest(test.TestCase):
       self.assertAllEqual(expected_result, result)
 
   def testEmptyStringInput(self):
-    with self.test_session():
+    with self.cached_session():
       in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
       decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16)
 
@@ -94,7 +94,7 @@ class DecodeRawOpTest(test.TestCase):
         self.assertEqual((num_inputs, 0), result.shape)
 
   def testToUInt16(self):
-    with self.test_session():
+    with self.cached_session():
       in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
       decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.uint16)
       self.assertEqual([None, None], decode.get_shape().as_list())
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
index d33bf1ba12..affbaf159d 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
@@ -33,7 +33,7 @@ class AssignOpTest(test.TestCase):
   #   contain benign and deliberate data races when multiple threads update
   #   the same parameters without a lock.
   def testParallelUpdateWithoutLocking(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ones_t = array_ops.fill([1024, 1024], 1.0)
       p = variables.Variable(array_ops.zeros([1024, 1024]))
       adds = [
@@ -60,7 +60,7 @@ class AssignOpTest(test.TestCase):
       self.assertTrue((vals <= ones * 20).all())
 
   def testParallelAssignWithoutLocking(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ones_t = array_ops.fill([1024, 1024], float(1))
       p = variables.Variable(array_ops.zeros([1024, 1024]))
       assigns = [
@@ -92,7 +92,7 @@ class AssignOpTest(test.TestCase):
   # returning the output tensors. This issue will be resolved with the new
   # resource variables.
   def testParallelUpdateWithLocking(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       zeros_t = array_ops.fill([1024, 1024], 0.0)
       ones_t = array_ops.fill([1024, 1024], 1.0)
       p = variables.Variable(zeros_t)
@@ -119,7 +119,7 @@ class AssignOpTest(test.TestCase):
       self.assertAllEqual(vals, ones * 20)
 
   def testParallelAssignWithLocking(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       zeros_t = array_ops.fill([1024, 1024], 0.0)
       ones_t = array_ops.fill([1024, 1024], 1.0)
       p = variables.Variable(zeros_t)
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_test.py b/tensorflow/python/kernel_tests/dense_update_ops_test.py
index 4dda9f093b..06c3271850 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_test.py
@@ -85,7 +85,7 @@ class AssignOpTest(test.TestCase):
     self._testTypes(np.arange(0, 20).reshape([4, 5]))
 
   def testAssignNonStrictShapeChecking(self):
-    with self.test_session():
+    with self.cached_session():
       data = array_ops.fill([1024, 1024], 0)
       p = variables.Variable([1])
       a = state_ops.assign(p, data, validate_shape=False)
@@ -99,14 +99,14 @@ class AssignOpTest(test.TestCase):
       self.assertAllEqual(p.eval(), data2.eval())
 
   def testInitRequiredAssignAdd(self):
-    with self.test_session():
+    with self.cached_session():
       p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32)
       a = state_ops.assign_add(p, array_ops.fill([1024, 1024], 0))
       with self.assertRaisesOpError("use uninitialized"):
         a.op.run()
 
   def testInitRequiredAssignSub(self):
-    with self.test_session():
+    with self.cached_session():
       p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32)
       a = state_ops.assign_sub(p, array_ops.fill([1024, 1024], 0))
       with self.assertRaisesOpError("use uninitialized"):
diff --git a/tensorflow/python/kernel_tests/division_future_test.py b/tensorflow/python/kernel_tests/division_future_test.py
index e681b32856..e477bdc73b 100644
--- a/tensorflow/python/kernel_tests/division_future_test.py
+++ b/tensorflow/python/kernel_tests/division_future_test.py
@@ -50,7 +50,7 @@ class DivisionTestCase(test.TestCase):
         self.assertEqual(x, y)
       checks.append(f)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for dtype in dtypes:
         for x in map(dtype, values):
           for y in map(dtype, values):
diff --git a/tensorflow/python/kernel_tests/division_past_test.py b/tensorflow/python/kernel_tests/division_past_test.py
index 9ddd62e63c..63951b5b38 100644
--- a/tensorflow/python/kernel_tests/division_past_test.py
+++ b/tensorflow/python/kernel_tests/division_past_test.py
@@ -49,7 +49,7 @@ class DivisionTestCase(test.TestCase):
         self.assertEqual(x, y)
       checks.append(f)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for dtype in dtypes:
         for x in map(dtype, values):
           for y in map(dtype, values):
diff --git a/tensorflow/python/kernel_tests/duplicate_op_test.py b/tensorflow/python/kernel_tests/duplicate_op_test.py
index 529d3dd0b3..654267a582 100644
--- a/tensorflow/python/kernel_tests/duplicate_op_test.py
+++ b/tensorflow/python/kernel_tests/duplicate_op_test.py
@@ -34,7 +34,7 @@ class DuplicateOpTest(test.TestCase):
 
     self.assertEqual(len(duplicate.OP_LIST.op), 0)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(math_ops.add(1, 41).eval(), 42)
 
 
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index 5e8937ad2c..9557e30993 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -288,7 +288,7 @@ class DynamicPartitionTest(test.TestCase):
       self.assertAllEqual([], partition_vals[i])
 
   def testErrorIndexOutOfRange(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
                                    [12, 13, 14]])
       indices = constant_op.constant([0, 2, 99, 2, 2])
@@ -298,7 +298,7 @@ class DynamicPartitionTest(test.TestCase):
         sess.run(partitions)
 
   def testScalarIndexOutOfRange(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       bad = 17
       data = np.zeros(5)
       partitions = data_flow_ops.dynamic_partition(data, bad, num_partitions=7)
@@ -306,7 +306,7 @@ class DynamicPartitionTest(test.TestCase):
         sess.run(partitions)
 
   def testHigherRankIndexOutOfRange(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       shape = (2, 3)
       indices = array_ops.placeholder(shape=shape, dtype=np.int32)
       data = np.zeros(shape + (5,))
@@ -334,7 +334,7 @@ class DynamicPartitionTest(test.TestCase):
     inds += [13]*194 + [14]*194 + [15]*192
     self.assertEqual(len(inds), x.shape[0])
     partitioned = data_flow_ops.dynamic_partition(x, inds, 16)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       res = sess.run(partitioned)
     self.assertEqual(res[-1].shape[0], 192)
 
diff --git a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
index 49b9569e2b..3a1036e52a 100644
--- a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
@@ -252,7 +252,7 @@ class ParallelDynamicStitchTest(DynamicStitchTestBase, test.TestCase):
 
   # GPU version unit tests
   def testScalarGPU(self):
-    with self.test_session():
+    with self.cached_session():
       indices = [constant_op.constant(0), constant_op.constant(1)]
       data = [constant_op.constant(40.0), constant_op.constant(60.0)]
       for step in -1, 1:
@@ -263,7 +263,7 @@ class ParallelDynamicStitchTest(DynamicStitchTestBase, test.TestCase):
         self.assertEqual([2], stitched_t.get_shape().as_list())
 
   def testHigherRankGPU(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       indices = [
           constant_op.constant(6),
           constant_op.constant([4, 1]),
diff --git a/tensorflow/python/kernel_tests/embedding_ops_test.py b/tensorflow/python/kernel_tests/embedding_ops_test.py
index dcd435e1ff..40b8548cea 100644
--- a/tensorflow/python/kernel_tests/embedding_ops_test.py
+++ b/tensorflow/python/kernel_tests/embedding_ops_test.py
@@ -242,7 +242,7 @@ class EmbeddingLookupTest(test.TestCase):
   # vector is going to be empty. The subsequent DivOp fails because of that.
   # TODO(keveman): Disabling the test until the underlying problem is fixed.
   def testSimpleSharded(self):
-    with self.test_session():
+    with self.cached_session():
       num_shards = 2
       vocab_size = 4
       p, params, feed_dict = _EmbeddingParams(num_shards, vocab_size)
@@ -258,7 +258,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testMaxNorm(self):
-    with self.test_session():
+    with self.cached_session():
       embeddings = constant_op.constant([[2.0]])
 
       ids = constant_op.constant([0], dtype=dtypes.int32)
@@ -268,7 +268,7 @@ class EmbeddingLookupTest(test.TestCase):
       self.assertAllEqual(embedding.eval(), [[1.0]])
 
   def testMaxNormNontrivial(self):
-    with self.test_session():
+    with self.cached_session():
       embeddings = constant_op.constant([[2.0, 4.0], [3.0, 1.0]])
 
       ids = constant_op.constant([0, 1], dtype=dtypes.int32)
@@ -281,7 +281,7 @@ class EmbeddingLookupTest(test.TestCase):
       self.assertAllEqual(embedding.eval(), 2 * normalized.eval())
 
   def testSimpleShardedPartitionedVariable(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       num_shards = 2
       vocab_size = 4
       p, p_variable, params, feed_dict = _EmbeddingParamsAsPartitionedVariable(
@@ -303,7 +303,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testSimpleShardedPartitionedResourceVariable(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       num_shards = 2
       vocab_size = 4
       p, p_variable, params, _ = _EmbeddingParamsAsPartitionedVariable(
@@ -326,7 +326,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testShardedModPartitioningInt32Ids(self):
-    with self.test_session():
+    with self.cached_session():
       num_shards = 5
       vocab_size = 13
       # Embedding dimensions is 10. The vocab_size x 10 embedding
@@ -348,7 +348,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testShardedModPartitioningInt64Ids(self):
-    with self.test_session():
+    with self.cached_session():
       num_shards = 5
       vocab_size = 13
       # Embedding dimensions is 10. The vocab_size x 10 embedding
@@ -370,7 +370,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testShardedDivPartitioningInt32Ids(self):
-    with self.test_session():
+    with self.cached_session():
       num_shards = 5
       vocab_size = 13
       # Embedding dimensions is 10. The vocab_size x 10 embedding
@@ -394,7 +394,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testShardedDivPartitioningInt32IdsPartitionedVariable(self):
-    with self.test_session():
+    with self.cached_session():
       num_shards = 5
       vocab_size = 13
       # Embedding dimensions is 10. The vocab_size x 10 embedding
@@ -419,7 +419,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testShardedDivPartitioningInt64Ids(self):
-    with self.test_session():
+    with self.cached_session():
       num_shards = 5
       vocab_size = 13
       # Embedding dimensions is 10. The vocab_size x 10 embedding
@@ -443,7 +443,7 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertShapeEqual(np_result, embedding)
 
   def testShardedDivPartitioningUnknownParamShape(self):
-    with self.test_session():
+    with self.cached_session():
       num_shards = 5
       vocab_size = 13
       # Embedding dimensions is 10. The vocab_size x 10 embedding
@@ -475,7 +475,7 @@ class EmbeddingLookupTest(test.TestCase):
     tf_logging.vlog(1, id_vals)
     for ids_shape in [(10,), (2, 5)]:
       for num_shards in [1, 3]:
-        with self.test_session():
+        with self.cached_session():
           ids = constant_op.constant(
               id_vals, shape=ids_shape, dtype=dtypes.int32)
           x, params, _ = _EmbeddingParams(num_shards, vocab_size, shape=[2])
@@ -494,7 +494,7 @@ class EmbeddingLookupTest(test.TestCase):
     id_vals = list(np.random.randint(vocab_size, size=num_ids))
     tf_logging.vlog(1, id_vals)
     for num_shards in [1, 3]:
-      with self.test_session():
+      with self.cached_session():
         ids = constant_op.constant(id_vals, dtype=dtypes.int32)
         x, params, _ = _EmbeddingParams(num_shards, vocab_size, shape=[2])
         # This will force a conversion from IndexedSlices to Tensor.
@@ -528,7 +528,7 @@ class EmbeddingLookupTest(test.TestCase):
 
   def testHigherRank(self):
     np.random.seed(8)
-    with self.test_session():
+    with self.cached_session():
       for params_shape in (12,), (6, 3):
         params = np.random.randn(*params_shape)
         for ids_shape in (3, 2), (4, 3):
@@ -548,7 +548,7 @@ class EmbeddingLookupTest(test.TestCase):
 
   def testHigherRankMaxNorm(self):
     np.random.seed(8)
-    with self.test_session():
+    with self.cached_session():
       for params_shape in (12,), (6, 3), (6, 2, 3):
         # Test embedding rank 0, 1, 2.
         # Note: the first dimension must be a common multiple of procs below.
@@ -581,7 +581,7 @@ class EmbeddingLookupTest(test.TestCase):
     # It always applies max_norm.
     np.random.seed(8)
     l2_norm = 2.
-    with self.test_session():
+    with self.cached_session():
       # Param values are in [l2_norm, l2_norm+1) so it will always clip.
       params = np.random.rand(6, 3) + l2_norm
       params_norm = l2_norm * params / np.sqrt(
@@ -667,7 +667,7 @@ class EmbeddingLookupSparseTest(test.TestCase):
         [dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64],
         [True, False]):
 
-      with self.test_session():
+      with self.cached_session():
         p, params, feed_dict = _EmbeddingParams(
             num_shards, vocab_size, shape=param_shape, dtype=dtype)
         embedding_sum = embedding_ops.embedding_lookup_sparse(
@@ -716,7 +716,7 @@ class EmbeddingLookupSparseTest(test.TestCase):
     for num_shards, combiner, dtype, ignore_weights in itertools.product(
         [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32,
                                            dtypes.float64], [True, False]):
-      with self.test_session():
+      with self.cached_session():
         x, params, _ = _EmbeddingParams(
             num_shards, vocab_size, shape=param_shape, dtype=dtype)
 
@@ -734,7 +734,7 @@ class EmbeddingLookupSparseTest(test.TestCase):
       self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3)
 
   def testIncompatibleShapes(self):
-    with self.test_session():
+    with self.cached_session():
       x, _, _ = _EmbeddingParams(1, 10, dtype=dtypes.float32)
       sp_ids = sparse_tensor.SparseTensor(
           constant_op.constant([[0, 0], [0, 1], [1, 0]], dtypes.int64),
@@ -819,7 +819,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
     return sparse_ids, sparse_weights
 
   def test_safe_embedding_lookup_sparse_return_zero_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_2d()
 
@@ -832,7 +832,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
            3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4])
 
   def test_safe_embedding_lookup_sparse_return_special_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_2d()
 
@@ -846,7 +846,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
            embedding_weights[0][2], embedding_weights[0][3]])
 
   def test_safe_embedding_lookup_sparse_no_weights(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, _ = self._ids_and_weights_2d()
 
@@ -860,7 +860,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
                embedding_weights[0][0] + embedding_weights[0][1]) / 2.0])
 
   def test_safe_embedding_lookup_sparse_partitioned(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, _ = self._ids_and_weights_2d()
 
@@ -874,7 +874,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
                            (embedding_weights[0] + embedding_weights[1]) / 2.0])
 
   def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, sparse_weights = self._ids_and_weights_2d()
 
@@ -889,7 +889,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
                         embedding_weights, sparse_ids, sparse_weights)
 
   def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_3d()
 
@@ -902,7 +902,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
       ], [embedding_weights[0][2], [0] * 4, [0] * 4]])
 
   def test_safe_embedding_lookup_sparse_3d_return_special_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_3d()
 
@@ -918,7 +918,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
             ]])
 
   def test_safe_embedding_lookup_sparse_3d_no_weights(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, _ = self._ids_and_weights_3d()
 
@@ -934,7 +934,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
           ]])
 
   def test_safe_embedding_lookup_sparse_3d_partitioned(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, _ = self._ids_and_weights_3d()
 
@@ -951,7 +951,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
 
   def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights(
       self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, sparse_weights = self._ids_and_weights_3d()
 
@@ -1035,7 +1035,7 @@ class DynamicStitchOpTest(test.TestCase):
 
   # We expect that the values are merged in order.
   def testStitchOrder(self):
-    with self.test_session():
+    with self.cached_session():
       indices = []
       np_values = []
       values = []
diff --git a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py
index e1f5a6b620..7d9d4e5175 100644
--- a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py
+++ b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py
@@ -83,7 +83,7 @@ class ExtractImagePatchesGradTest(test.TestCase):
     random_seed = 42
     random_seed_lib.set_random_seed(random_seed)
 
-    with self.test_session():
+    with self.cached_session():
       for test_case in self._TEST_CASES:
         np.random.seed(random_seed)
         in_shape = test_case['in_shape']
diff --git a/tensorflow/python/kernel_tests/fft_ops_test.py b/tensorflow/python/kernel_tests/fft_ops_test.py
index 629acedda5..f117934e4b 100644
--- a/tensorflow/python/kernel_tests/fft_ops_test.py
+++ b/tensorflow/python/kernel_tests/fft_ops_test.py
@@ -496,7 +496,7 @@ class RFFTOpsTest(BaseFFTOpsTest):
             "Input dimension .* must have length of at least 6 but got: 5"):
           x = np.zeros((5,) * rank).astype(np.float32)
           fft_length = [6] * rank
-          with self.test_session():
+          with self.cached_session():
             rfft_fn(x, fft_length).eval()
 
         with self.assertRaisesWithPredicateMatch(
@@ -504,7 +504,7 @@ class RFFTOpsTest(BaseFFTOpsTest):
             "Input dimension .* must have length of at least .* but got: 3"):
           x = np.zeros((3,) * rank).astype(np.complex64)
           fft_length = [6] * rank
-          with self.test_session():
+          with self.cached_session():
             irfft_fn(x, fft_length).eval()
 
   def testGrad_Simple(self):
diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py
index 9e7b528338..a5f8f64e0c 100644
--- a/tensorflow/python/kernel_tests/fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/fifo_queue_test.py
@@ -99,19 +99,19 @@ class FIFOQueueTest(test.TestCase):
       """, q.queue_ref.op.node_def)
 
   def testEnqueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       enqueue_op = q.enqueue((10.0,))
       enqueue_op.run()
 
   def testEnqueueHalf(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float16)
       enqueue_op = q.enqueue((10.0,))
       enqueue_op.run()
 
   def testEnqueueWithShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shapes=(3, 2))
       enqueue_correct_op = q.enqueue(([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],))
       enqueue_correct_op.run()
@@ -120,7 +120,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(1, q.size().eval())
 
   def testEnqueueManyWithShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(
           10, [dtypes_lib.int32, dtypes_lib.int32], shapes=[(), (2,)])
       q.enqueue_many([[1, 2, 3, 4], [[1, 1], [2, 2], [3, 3], [4, 4]]]).run()
@@ -143,7 +143,7 @@ class FIFOQueueTest(test.TestCase):
     self.assertAllEqual(self.evaluate(q.dequeue()), 1)
 
   def testEnqueueDictWithoutNames(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       with self.assertRaisesRegexp(ValueError, "must have names"):
         q.enqueue({"a": 12.0})
@@ -151,7 +151,7 @@ class FIFOQueueTest(test.TestCase):
         q.enqueue_many({"a": [12.0, 13.0]})
 
   def testParallelEnqueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -177,7 +177,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, results)
 
   def testParallelDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -201,7 +201,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, results)
 
   def testDequeue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -215,7 +215,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual([elems[i]], vals)
 
   def testDequeueHalf(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float16)
       elems = [10.0, 20.0, 30.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -229,7 +229,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual([elems[i]], vals)
 
   def testEnqueueAndBlockingDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(3, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -259,7 +259,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual([elem], result)
 
   def testMultiEnqueueAndDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32))
       elems = [(5, 10.0), (10, 20.0), (15, 30.0)]
       enqueue_ops = [q.enqueue((x, y)) for x, y in elems]
@@ -275,12 +275,12 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual([y], y_val)
 
   def testQueueSizeEmpty(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       self.assertEqual([0], q.size().eval())
 
   def testQueueSizeAfterEnqueueAndDequeue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       enqueue_op = q.enqueue((10.0,))
       dequeued_t = q.dequeue()
@@ -293,7 +293,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(0, size.eval())
 
   def testEnqueueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -306,7 +306,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual([elems[i % 4]], vals)
 
   def testEmptyEnqueueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       empty_t = constant_op.constant(
           [], dtype=dtypes_lib.float32, shape=[0, 2, 3])
@@ -318,7 +318,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual([0], size_t.eval())
 
   def testEmptyDequeueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shapes=())
       enqueue_op = q.enqueue((10.0,))
       dequeued_t = q.dequeue_many(0)
@@ -328,7 +328,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual([], dequeued_t.eval().tolist())
 
   def testEmptyDequeueUpTo(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shapes=())
       enqueue_op = q.enqueue((10.0,))
       dequeued_t = q.dequeue_up_to(0)
@@ -338,14 +338,14 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual([], dequeued_t.eval().tolist())
 
   def testEmptyDequeueManyWithNoShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       # Expect the operation to fail due to the shape not being constrained.
       with self.assertRaisesOpError("specified shapes"):
         q.dequeue_many(0).eval()
 
   def testMultiEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, (dtypes_lib.float32, dtypes_lib.int32))
       float_elems = [10.0, 20.0, 30.0, 40.0]
       int_elems = [[1, 2], [3, 4], [5, 6], [7, 8]]
@@ -361,7 +361,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertAllEqual(int_elems[i % 4], int_val)
 
   def testDequeueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -373,7 +373,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems[4:8], dequeued_t.eval())
 
   def testDequeueUpToNoBlocking(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -385,7 +385,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems[4:8], dequeued_t.eval())
 
   def testMultiDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(
           10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (2,)))
       float_elems = [
@@ -416,7 +416,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape())
 
   def testMultiDequeueUpToNoBlocking(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(
           10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (2,)))
       float_elems = [
@@ -440,7 +440,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertAllEqual(int_elems[4:8], int_val)
 
   def testHighDimension(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.int32, (4, 4, 4, 4))
       elems = np.array([[[[[x] * 4] * 4] * 4] * 4 for x in range(10)], np.int32)
       enqueue_op = q.enqueue_many((elems,))
@@ -494,7 +494,7 @@ class FIFOQueueTest(test.TestCase):
                       array_ops.placeholder(dtypes_lib.int32)))
 
   def testEnqueueWrongShapeAtRuntime(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.int32), (
           (2, 2), (3, 3)))
       elems_ok = np.array([1] * 4).reshape((2, 2)).astype(np.int32)
@@ -506,7 +506,7 @@ class FIFOQueueTest(test.TestCase):
                  feed_dict={elems_bad: np.array([1] * 12).reshape((3, 4))})
 
   def testEnqueueDequeueManyWrongShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.int32), (
           (2, 2), (3, 3)))
       elems_ok = np.array([1] * 8).reshape((2, 2, 2)).astype(np.int32)
@@ -521,7 +521,7 @@ class FIFOQueueTest(test.TestCase):
         dequeued_t.eval()
 
   def testParallelEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(1000, dtypes_lib.float32, shapes=())
       elems = [10.0 * x for x in range(100)]
       enqueue_op = q.enqueue_many((elems,))
@@ -540,7 +540,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertItemsEqual(dequeued_t.eval(), elems * 10)
 
   def testParallelDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(1000, dtypes_lib.float32, shapes=())
       elems = [10.0 * x for x in range(1000)]
       enqueue_op = q.enqueue_many((elems,))
@@ -562,7 +562,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, dequeued_elems)
 
   def testParallelDequeueUpTo(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(1000, dtypes_lib.float32, shapes=())
       elems = [10.0 * x for x in range(1000)]
       enqueue_op = q.enqueue_many((elems,))
@@ -586,7 +586,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, dequeued_elems)
 
   def testParallelEnqueueAndDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(50, dtypes_lib.float32, shapes=())
       initial_elements = [10.0] * 49
       q.enqueue_many((initial_elements,)).run()
@@ -619,7 +619,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertTrue(elem in (10.0, 20.0))
 
   def testMixtureOfEnqueueAndEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.int32, shapes=())
       enqueue_placeholder = array_ops.placeholder(dtypes_lib.int32, shape=())
       enqueue_op = q.enqueue((enqueue_placeholder,))
@@ -655,7 +655,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testMixtureOfDequeueAndDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.int32, shapes=())
       enqueue_op = q.enqueue_many((np.arange(250, dtype=np.int32),))
       dequeued_t = q.dequeue()
@@ -689,7 +689,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testBlockingDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -716,7 +716,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems, dequeued_elems)
 
   def testBlockingDequeueUpTo(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -743,7 +743,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems, dequeued_elems)
 
   def testDequeueManyWithTensorParameter(self):
-    with self.test_session():
+    with self.cached_session():
       # Define a first queue that contains integer counts.
       dequeue_counts = [random.randint(1, 10) for _ in range(100)]
       count_q = data_flow_ops.FIFOQueue(100, dtypes_lib.int32, ())
@@ -768,7 +768,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(elems, dequeued_elems)
 
   def testDequeueFromClosedQueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -786,7 +786,7 @@ class FIFOQueueTest(test.TestCase):
         dequeued_t.eval()
 
   def testBlockingDequeueFromClosedQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -812,7 +812,7 @@ class FIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueFromClosedEmptyQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       close_op = q.close()
       dequeued_t = q.dequeue()
@@ -832,7 +832,7 @@ class FIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueManyFromClosedQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -857,7 +857,7 @@ class FIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueManyButNotAllFromClosedQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -882,7 +882,7 @@ class FIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testDequeueUpToFromClosedQueueReturnsRemainder(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -904,7 +904,7 @@ class FIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testEnqueueManyLargerThanCapacityWithConcurrentDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32, ())
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -941,7 +941,7 @@ class FIFOQueueTest(test.TestCase):
       close_thread.join()
 
   def testClosedBlockingDequeueManyRestoresPartialBatch(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(4, (dtypes_lib.float32, dtypes_lib.float32), (
           (), ()))
       elems_a = [1.0, 2.0, 3.0]
@@ -974,7 +974,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testBlockingDequeueManyFromClosedEmptyQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       close_op = q.close()
       dequeued_t = q.dequeue_many(4)
@@ -994,7 +994,7 @@ class FIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueUpToFromClosedEmptyQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ())
       close_op = q.close()
       dequeued_t = q.dequeue_up_to(4)
@@ -1014,7 +1014,7 @@ class FIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testEnqueueToClosedQueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       enqueue_op = q.enqueue((10.0,))
       close_op = q.close()
@@ -1027,7 +1027,7 @@ class FIFOQueueTest(test.TestCase):
         enqueue_op.run()
 
   def testEnqueueManyToClosedQueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1041,7 +1041,7 @@ class FIFOQueueTest(test.TestCase):
         enqueue_op.run()
 
   def testBlockingEnqueueToFullQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1064,7 +1064,7 @@ class FIFOQueueTest(test.TestCase):
       thread.join()
 
   def testBlockingEnqueueManyToFullQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1091,7 +1091,7 @@ class FIFOQueueTest(test.TestCase):
       thread.join()
 
   def testBlockingEnqueueBeforeClose(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1128,7 +1128,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testBlockingEnqueueManyBeforeClose(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32)
       elems = [10.0, 20.0, 30.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1161,7 +1161,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual(elem, dequeued_t.eval())
 
   def testDoesNotLoseValue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.FIFOQueue(1, dtypes_lib.float32)
       enqueue_op = q.enqueue((10.0,))
       size_t = q.size()
@@ -1171,7 +1171,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual(size_t.eval(), [1])
 
   def testSharedQueueSameSession(self):
-    with self.test_session():
+    with self.cached_session():
       q1 = data_flow_ops.FIFOQueue(
           1, dtypes_lib.float32, shared_name="shared_queue")
       q1.enqueue((10.0,)).run()
@@ -1201,7 +1201,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertEqual(q2_size_t.eval(), [0])
 
   def testIncompatibleSharedQueueErrors(self):
-    with self.test_session():
+    with self.cached_session():
       q_a_1 = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shared_name="q_a")
       q_a_2 = data_flow_ops.FIFOQueue(15, dtypes_lib.float32, shared_name="q_a")
       q_a_1.queue_ref.op.run()
@@ -1244,7 +1244,7 @@ class FIFOQueueTest(test.TestCase):
         q_f_2.queue_ref.op.run()
 
   def testSelectQueue(self):
-    with self.test_session():
+    with self.cached_session():
       num_queues = 10
       qlist = list()
       for _ in xrange(num_queues):
@@ -1257,7 +1257,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertEqual(q.dequeue().eval(), 10.0)
 
   def testSelectQueueOutOfRange(self):
-    with self.test_session():
+    with self.cached_session():
       q1 = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       q2 = data_flow_ops.FIFOQueue(15, dtypes_lib.float32)
       enq_q = data_flow_ops.FIFOQueue.from_list(3, [q1, q2])
@@ -1281,7 +1281,7 @@ class FIFOQueueTest(test.TestCase):
       sess.run(enqueue_many_op)
 
   def testResetOfBlockingOperation(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q_empty = data_flow_ops.FIFOQueue(5, dtypes_lib.float32, ())
       dequeue_op = q_empty.dequeue()
       dequeue_many_op = q_empty.dequeue_many(1)
@@ -1309,7 +1309,7 @@ class FIFOQueueTest(test.TestCase):
         t.join()
 
   def testBigEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(5, dtypes_lib.int32, ((),))
       elem = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
       enq = q.enqueue_many((elem,))
@@ -1354,7 +1354,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertAllEqual(elem, results)
 
   def testBigDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(2, dtypes_lib.int32, ((),))
       elem = np.arange(4, dtype=np.int32)
       enq_list = [q.enqueue((e,)) for e in elem]
@@ -1380,7 +1380,7 @@ class FIFOQueueTest(test.TestCase):
       self.assertAllEqual(elem, results)
 
   def testDtypes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       dtypes = [
           dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
           dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, dtypes_lib.int64,
@@ -1411,7 +1411,7 @@ class FIFOQueueTest(test.TestCase):
         self.assertAllEqual(input_elem, output_elem)
 
   def testDequeueEnqueueFail(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       q = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()])
       a = q.dequeue()
       b = control_flow_ops.Assert(False, ["Before enqueue"])
@@ -1474,7 +1474,7 @@ class FIFOQueueDictTest(test.TestCase):
     self.assertEqual(["i", "f"], q.names)
 
   def testEnqueueDequeueOneComponent(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(
           10, dtypes_lib.float32, shapes=((),), names="f")
       # Verify that enqueue() checks that when using names we must enqueue a
@@ -1519,7 +1519,7 @@ class FIFOQueueDictTest(test.TestCase):
       self.assertEqual([40.0, 50.0], list(f))
 
   def testEnqueueDequeueMultipleComponent(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(
           10, (dtypes_lib.float32, dtypes_lib.int32, dtypes_lib.string),
           shapes=((), (), ()),
@@ -1600,7 +1600,7 @@ class FIFOQueueWithTimeoutTest(test.TestCase):
         sess.run(dequeued_t)
 
   def testReusableAfterTimeout(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       dequeued_t = q.dequeue()
       enqueue_op = q.enqueue(37)
diff --git a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
index faac7d8365..f89d2062f1 100644
--- a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
@@ -127,7 +127,7 @@ class FractionalAvgTest(test.TestCase):
     Returns:
       None
     """
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       p, r, c = nn_ops.fractional_avg_pool(
           input_tensor,
           pooling_ratio,
@@ -160,7 +160,7 @@ class FractionalAvgTest(test.TestCase):
           overlapping))
       rand_mat = self._PRNG.randint(10, size=tensor_shape)
       pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1]
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         p, r, c = nn_ops.fractional_avg_pool(
             rand_mat.astype(np.float32),
             pooling_ratio,
@@ -234,7 +234,7 @@ class FractionalAvgTest(test.TestCase):
         [4, 4, 5, 9, 7, 2]
     ])
     # pyformat: enable
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Since deterministic = True, seed and seed2 are fixed. Therefore r, and c
       # are the same each time. We can have an expected result precomputed.
       # r = [0, 2, 4, 6]
@@ -314,7 +314,7 @@ class FractionalAvgTest(test.TestCase):
 
   def testDifferentInputTensorShape(self):
     """Runs the operation in one session with different input tensor shapes."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       input_holder = array_ops.placeholder(dtypes.float32,
                                            [None, None, None, 3])
       pooling_ratio = [1, 1.5, 1.5, 1]
@@ -389,7 +389,7 @@ class FractionalAvgPoolGradTest(test.TestCase):
           num_cols = col_window_size * 7
           for num_channels in [1, 2]:
             input_shape = (num_batches, num_rows, num_cols, num_channels)
-            with self.test_session() as _:
+            with self.cached_session() as _:
               input_tensor = constant_op.constant(
                   self._GenerateRandomInputTensor(input_shape).astype(
                       np.float32))
@@ -428,7 +428,7 @@ class FractionalAvgPoolGradTest(test.TestCase):
           num_cols = (col_window_size - 1) * 7 + 1
           for num_channels in [1, 2]:
             input_shape = (num_batches, num_rows, num_cols, num_channels)
-            with self.test_session() as _:
+            with self.cached_session() as _:
               input_tensor = constant_op.constant(
                   self._GenerateRandomInputTensor(input_shape).astype(
                       np.float32))
@@ -468,7 +468,7 @@ class FractionalAvgPoolGradTest(test.TestCase):
 
     for pseudo_random in True, False:
       for overlapping in True, False:
-        with self.test_session() as _:
+        with self.cached_session() as _:
           input_tensor = constant_op.constant(input_data, shape=input_shape)
           output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool(
               input_tensor,
@@ -501,7 +501,7 @@ class FractionalAvgPoolGradTest(test.TestCase):
           for num_channels in [1, 3]:
             input_shape = (num_batches, num_rows, num_cols, num_channels)
             input_data = self._GenerateRandomInputTensor(input_shape)
-            with self.test_session() as _:
+            with self.cached_session() as _:
               input_tensor = constant_op.constant(input_data, shape=input_shape)
               output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool(
                   input_tensor,
@@ -532,7 +532,7 @@ class FractionalAvgPoolGradTest(test.TestCase):
     overlapping = True
     pseudo_random = False
 
-    with self.test_session() as _:
+    with self.cached_session() as _:
       input_tensor = constant_op.constant(input_data, shape=input_shape)
       output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool(
           input_tensor,
diff --git a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
index 6477c9ebc4..9b94ca8554 100644
--- a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
@@ -127,7 +127,7 @@ class FractionalMaxPoolTest(test.TestCase):
     Returns:
       None
     """
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       p, r, c = nn_ops.fractional_max_pool(
           input_tensor,
           pooling_ratio,
@@ -160,7 +160,7 @@ class FractionalMaxPoolTest(test.TestCase):
           overlapping))
       rand_mat = self._PRNG.randint(10, size=tensor_shape)
       pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1]
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         p, r, c = nn_ops.fractional_max_pool(
             rand_mat,
             pooling_ratio,
@@ -285,7 +285,7 @@ class FractionalMaxPoolTest(test.TestCase):
 
   def testDifferentInputTensorShape(self):
     """Runs the operation in one session with different input tensor shapes."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       input_holder = array_ops.placeholder(dtypes.float32,
                                            [None, None, None, 3])
       pooling_ratio = [1, 1.5, 1.5, 1]
@@ -374,7 +374,7 @@ class FractionalMaxPoolGradTest(test.TestCase):
           num_cols = col_window_size * 7
           for num_channels in [1, 2]:
             input_shape = (num_batches, num_rows, num_cols, num_channels)
-            with self.test_session() as _:
+            with self.cached_session() as _:
               input_tensor = constant_op.constant(
                   self._GenerateUniqueRandomInputTensor(input_shape))
               window_size = [1, row_window_size, col_window_size, 1]
@@ -409,7 +409,7 @@ class FractionalMaxPoolGradTest(test.TestCase):
           num_cols = (col_window_size - 1) * 7 + 1
           for num_channels in [1, 2]:
             input_shape = (num_batches, num_rows, num_cols, num_channels)
-            with self.test_session() as _:
+            with self.cached_session() as _:
               input_tensor = constant_op.constant(
                   self._GenerateUniqueRandomInputTensor(input_shape))
               window_size = [1, row_window_size, col_window_size, 1]
@@ -447,7 +447,7 @@ class FractionalMaxPoolGradTest(test.TestCase):
 
     for pseudo_random in True, False:
       for overlapping in True, False:
-        with self.test_session() as _:
+        with self.cached_session() as _:
           input_tensor = constant_op.constant(input_data, shape=input_shape)
           output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool(
               input_tensor,
@@ -482,7 +482,7 @@ class FractionalMaxPoolGradTest(test.TestCase):
             input_data = self._GenerateUniqueRandomInputTensor(input_shape)
             # Add some randomness to make input_data not so 'integer'
             input_data += self._PRNG.random_sample(input_shape)
-            with self.test_session() as _:
+            with self.cached_session() as _:
               input_tensor = constant_op.constant(input_data, shape=input_shape)
               output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool(
                   input_tensor,
@@ -515,7 +515,7 @@ class FractionalMaxPoolGradTest(test.TestCase):
     overlapping = True
     pseudo_random = False
 
-    with self.test_session() as _:
+    with self.cached_session() as _:
       input_tensor = constant_op.constant(input_data, shape=input_shape)
       output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool(
           input_tensor,
@@ -579,7 +579,7 @@ class FractionalMaxPoolGradTest(test.TestCase):
          0.0, 0.0, 0.0, 0.0,
          6.0, 0.0, 21.0, 0.0],
         input_size)  # pyformat: disable
-    with self.test_session() as _:
+    with self.cached_session() as _:
       # Test when overlapping is False
       input_tensor = constant_op.constant(input_data, shape=input_size)
       output_tensor = constant_op.constant(
diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py
index 033fa95935..85bf969068 100644
--- a/tensorflow/python/kernel_tests/gather_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_op_test.py
@@ -147,7 +147,7 @@ class GatherTest(test.TestCase):
 
   def testString(self):
     params = np.array([[b"asdf", b"zxcv"], [b"qwer", b"uiop"]])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual([b"qwer", b"uiop"],
                           array_ops.gather(params, 1, axis=0).eval())
       self.assertAllEqual([b"asdf", b"qwer"],
@@ -157,7 +157,7 @@ class GatherTest(test.TestCase):
     for unsigned_type in (dtypes.uint32, dtypes.uint64):
       params = self._buildParams(
           np.array([[1, 2, 3], [7, 8, 9]]), unsigned_type)
-      with self.test_session():
+      with self.cached_session():
         self.assertAllEqual([7, 8, 9],
                             array_ops.gather(params, 1, axis=0).eval())
         self.assertAllEqual([1, 7], array_ops.gather(params, 0, axis=1).eval())
diff --git a/tensorflow/python/kernel_tests/gradient_correctness_test.py b/tensorflow/python/kernel_tests/gradient_correctness_test.py
index e93c6235f7..291a69ebac 100644
--- a/tensorflow/python/kernel_tests/gradient_correctness_test.py
+++ b/tensorflow/python/kernel_tests/gradient_correctness_test.py
@@ -30,7 +30,7 @@ from tensorflow.python.platform import test
 class GradientCorrectnessTest(test.TestCase):
 
   def testMultipleOutputChainedGradients(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = constant_op.constant(1.0, dtype=dtypes.float32)
       yexp = math_ops.exp(x)
       yexplog = math_ops.log(yexp)
@@ -43,13 +43,13 @@ class GradientCorrectnessTest(test.TestCase):
   def testIdentityGradient(self):
     x = constant_op.constant(3.)
     dx_dx, = gradients_impl.gradients(x, x)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllClose(1., sess.run(dx_dx))
 
   def testIntegerIdentityGradient(self):
     x = constant_op.constant(3)
     dx_dx, = gradients_impl.gradients(x, x)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllClose(1, sess.run(dx_dx))
 
   def testGradientWithIntegerPath(self):
@@ -57,7 +57,7 @@ class GradientCorrectnessTest(test.TestCase):
     k = math_ops.to_float(math_ops.to_int32(x))
     y = x * k
     dy_dx, = gradients_impl.gradients(y, x)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllClose([3., 4.], sess.run(dy_dx))
 
   def testNoIntegerGradient1(self):
diff --git a/tensorflow/python/kernel_tests/identity_n_op_py_test.py b/tensorflow/python/kernel_tests/identity_n_op_py_test.py
index 408b173981..518733cd8e 100644
--- a/tensorflow/python/kernel_tests/identity_n_op_py_test.py
+++ b/tensorflow/python/kernel_tests/identity_n_op_py_test.py
@@ -28,7 +28,7 @@ from tensorflow.python.platform import test
 class IdentityNOpTest(test.TestCase):
 
   def testInt32String_6(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       [value0, value1] = sess.run(
           array_ops.identity_n([[1, 2, 3, 4, 5, 6],
                                 [b"a", b"b", b"C", b"d", b"E", b"f", b"g"]]))
@@ -37,7 +37,7 @@ class IdentityNOpTest(test.TestCase):
         np.array([b"a", b"b", b"C", b"d", b"E", b"f", b"g"]), value1)
 
   def testInt32_shapes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp0 = constant_op.constant([10, 20, 30, 40, 50, 60], shape=[2, 3])
       inp1 = constant_op.constant([11, 21, 31, 41, 51, 61], shape=[3, 2])
       inp2 = constant_op.constant(
@@ -52,12 +52,12 @@ class IdentityNOpTest(test.TestCase):
 
   def testString(self):
     source = [b"A", b"b", b"C", b"d", b"E", b"f"]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       [value] = sess.run(array_ops.identity_n([source]))
     self.assertAllEqual(source, value)
 
   def testIdentityShape(self):
-    with self.test_session():
+    with self.cached_session():
       shape = [2, 3]
       array_2x3 = [[1, 2, 3], [6, 5, 4]]
       tensor = constant_op.constant(array_2x3)
diff --git a/tensorflow/python/kernel_tests/identity_op_py_test.py b/tensorflow/python/kernel_tests/identity_op_py_test.py
index 49fb76d5b4..37f9f716f8 100644
--- a/tensorflow/python/kernel_tests/identity_op_py_test.py
+++ b/tensorflow/python/kernel_tests/identity_op_py_test.py
@@ -31,24 +31,24 @@ from tensorflow.python.platform import test
 class IdentityOpTest(test.TestCase):
 
   def testInt32_6(self):
-    with self.test_session():
+    with self.cached_session():
       value = array_ops.identity([1, 2, 3, 4, 5, 6]).eval()
     self.assertAllEqual(np.array([1, 2, 3, 4, 5, 6]), value)
 
   def testInt32_2_3(self):
-    with self.test_session():
+    with self.cached_session():
       inp = constant_op.constant([10, 20, 30, 40, 50, 60], shape=[2, 3])
       value = array_ops.identity(inp).eval()
     self.assertAllEqual(np.array([[10, 20, 30], [40, 50, 60]]), value)
 
   def testString(self):
     source = [b"A", b"b", b"C", b"d", b"E", b"f"]
-    with self.test_session():
+    with self.cached_session():
       value = array_ops.identity(source).eval()
     self.assertAllEqual(source, value)
 
   def testIdentityShape(self):
-    with self.test_session():
+    with self.cached_session():
       shape = [2, 3]
       array_2x3 = [[1, 2, 3], [6, 5, 4]]
       tensor = constant_op.constant(array_2x3)
@@ -59,7 +59,7 @@ class IdentityOpTest(test.TestCase):
                         array_ops.identity(np.array(array_2x3)).get_shape())
 
   def testRefIdentityShape(self):
-    with self.test_session():
+    with self.cached_session():
       shape = [2, 3]
       tensor = variables.Variable(
           constant_op.constant(
diff --git a/tensorflow/python/kernel_tests/in_topk_op_test.py b/tensorflow/python/kernel_tests/in_topk_op_test.py
index fafeea8ec0..6fdb497bc6 100644
--- a/tensorflow/python/kernel_tests/in_topk_op_test.py
+++ b/tensorflow/python/kernel_tests/in_topk_op_test.py
@@ -30,7 +30,7 @@ class InTopKTest(test.TestCase):
 
   def _validateInTopK(self, predictions, target, k, expected):
     np_ans = np.array(expected)
-    with self.test_session():
+    with self.cached_session():
       precision = nn_ops.in_top_k(predictions, target, k)
       out = precision.eval()
       self.assertAllClose(np_ans, out)
@@ -65,7 +65,7 @@ class InTopKTest(test.TestCase):
   def testBadTarget(self):
     predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
     target = [0, 80000]
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "target.*out of range"):
         nn_ops.in_top_k(predictions, target, 2).eval()
@@ -75,7 +75,7 @@ class InTopKTest(test.TestCase):
     target = [0, 2]
     k = constant_op.constant(3)
     np_ans = np.array([False, True])
-    with self.test_session():
+    with self.cached_session():
       precision = nn_ops.in_top_k(predictions, target, k)
       out = precision.eval()
       self.assertAllClose(np_ans, out)
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index f6097ad489..79ce965242 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -343,7 +343,7 @@ class UniformUnitScalingInitializationTest(test.TestCase):
 
   def testZeroSize(self):
     shape = [0, 2]
-    with self.test_session():
+    with self.cached_session():
       x = variable_scope.get_variable(
           "x",
           shape=shape,
diff --git a/tensorflow/python/kernel_tests/inplace_ops_test.py b/tensorflow/python/kernel_tests/inplace_ops_test.py
index 6e894365af..90759c23ae 100644
--- a/tensorflow/python/kernel_tests/inplace_ops_test.py
+++ b/tensorflow/python/kernel_tests/inplace_ops_test.py
@@ -153,7 +153,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(vy, vz)
 
   def testError(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    "must be a vector"):
         _ = inplace_ops.inplace_update([[1.]], [[0]], [[10]]).eval()
diff --git a/tensorflow/python/kernel_tests/io_ops_test.py b/tensorflow/python/kernel_tests/io_ops_test.py
index 61944f7e31..afa24195cb 100644
--- a/tensorflow/python/kernel_tests/io_ops_test.py
+++ b/tensorflow/python/kernel_tests/io_ops_test.py
@@ -37,7 +37,7 @@ class IoOpsTest(test.TestCase):
       with tempfile.NamedTemporaryFile(
           prefix='ReadFileTest', dir=self.get_temp_dir(), delete=False) as temp:
         temp.write(contents)
-      with self.test_session():
+      with self.cached_session():
         read = io_ops.read_file(temp.name)
         self.assertEqual([], read.get_shape())
         self.assertEqual(read.eval(), contents)
@@ -51,7 +51,7 @@ class IoOpsTest(test.TestCase):
           prefix='WriteFileTest', dir=self.get_temp_dir(),
           delete=False) as temp:
         pass
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         w = io_ops.write_file(temp.name, contents)
         sess.run(w)
         with open(temp.name, 'rb') as f:
@@ -65,7 +65,7 @@ class IoOpsTest(test.TestCase):
       contents = compat.as_bytes(contents)
       subdir = os.path.join(self.get_temp_dir(), 'subdir1')
       filepath = os.path.join(subdir, 'subdir2', 'filename')
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         w = io_ops.write_file(filepath, contents)
         sess.run(w)
         with open(filepath, 'rb') as f:
@@ -88,7 +88,7 @@ class IoOpsTest(test.TestCase):
             prefix=c, dir=self.get_temp_dir(), delete=True) for c in cases
     ]
 
-    with self.test_session():
+    with self.cached_session():
       # Test exact match without wildcards.
       for f in files:
         self.assertEqual(
diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py
index 0e4e58409e..cd6a34d657 100644
--- a/tensorflow/python/kernel_tests/linalg_grad_test.py
+++ b/tensorflow/python/kernel_tests/linalg_grad_test.py
@@ -40,7 +40,7 @@ def _AddTest(test, op_name, testcase_name, fn):
 class ShapeTest(test_lib.TestCase):
 
   def testBatchGradientUnknownSize(self):
-    with self.test_session():
+    with self.cached_session():
       batch_size = constant_op.constant(3)
       matrix_size = constant_op.constant(4)
       batch_identity = array_ops.tile(
diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py
index 2f28d37eff..aa17f727d0 100644
--- a/tensorflow/python/kernel_tests/linalg_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg_ops_test.py
@@ -128,7 +128,7 @@ class AdjointTest(test.TestCase):
       matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j,
                                                        6 + 6j]]).astype(dtype)
       expected_transposed = np.conj(matrix_np.T)
-      with self.test_session():
+      with self.cached_session():
         matrix = ops.convert_to_tensor(matrix_np)
         transposed = linalg.adjoint(matrix)
         self.assertEqual((3, 2), transposed.get_shape())
diff --git a/tensorflow/python/kernel_tests/listdiff_op_test.py b/tensorflow/python/kernel_tests/listdiff_op_test.py
index ee86cf0b24..baeb40dd63 100644
--- a/tensorflow/python/kernel_tests/listdiff_op_test.py
+++ b/tensorflow/python/kernel_tests/listdiff_op_test.py
@@ -42,7 +42,7 @@ class ListDiffTest(test.TestCase):
         out = [compat.as_bytes(str(a)) for a in out]
       for diff_func in [array_ops.setdiff1d]:
         for index_dtype in [dtypes.int32, dtypes.int64]:
-          with self.test_session() as sess:
+          with self.cached_session() as sess:
             x_tensor = ops.convert_to_tensor(x, dtype=dtype)
             y_tensor = ops.convert_to_tensor(y, dtype=dtype)
             out_tensor, idx_tensor = diff_func(x_tensor, y_tensor,
diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index e635a71c78..82729b9e27 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -31,7 +31,7 @@ from tensorflow.python.platform import test
 class LoggingOpsTest(test.TestCase):
 
   def testAssertDivideByZero(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       epsilon = ops.convert_to_tensor(1e-20)
       x = ops.convert_to_tensor(0.0)
       y = ops.convert_to_tensor(1.0)
@@ -66,7 +66,7 @@ class PrintGradientTest(test.TestCase):
     self.assertEqual(inp.get_shape(), inp_printed.get_shape())
 
   def testPrintGradient(self):
-    with self.test_session():
+    with self.cached_session():
       inp = constant_op.constant(2.0, shape=[100, 32], name="in")
       w = constant_op.constant(4.0, shape=[10, 100], name="w")
       wx = math_ops.matmul(w, inp, name="wx")
diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index 5f08339fe5..38b14e34cc 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -36,7 +36,7 @@ from tensorflow.python.training import server_lib
 class HashTableOpTest(test.TestCase):
 
   def testHashTable(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2], dtypes.int64)
@@ -54,7 +54,7 @@ class HashTableOpTest(test.TestCase):
       self.assertAllEqual([0, 1, -1], result)
 
   def testHashTableFindHighRank(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2], dtypes.int64)
@@ -72,7 +72,7 @@ class HashTableOpTest(test.TestCase):
       self.assertAllEqual([[0, 1], [-1, -1]], result)
 
   def testHashTableInitWithPythonArrays(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       keys = ["brain", "salad", "surgery"]
       values = [0, 1, 2]
@@ -90,7 +90,7 @@ class HashTableOpTest(test.TestCase):
       self.assertAllEqual([0, 1, -1], result)
 
   def testHashTableInitWithNumPyArrays(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       keys = np.array(["brain", "salad", "surgery"], dtype=np.str)
       values = np.array([0, 1, 2], dtype=np.int64)
@@ -107,7 +107,7 @@ class HashTableOpTest(test.TestCase):
       self.assertAllEqual([0, 1, -1], result)
 
   def testMultipleHashTables(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       default_val = -1
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2], dtypes.int64)
@@ -135,7 +135,7 @@ class HashTableOpTest(test.TestCase):
       self.assertAllEqual([0, 1, -1], out3)
 
   def testHashTableWithTensorDefault(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = constant_op.constant(-1, dtypes.int64)
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2], dtypes.int64)
@@ -150,7 +150,7 @@ class HashTableOpTest(test.TestCase):
       self.assertAllEqual([0, 1, -1], result)
 
   def testHashTableWithSparseTensorInput(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       default_val = constant_op.constant(-1, dtypes.int64)
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2], dtypes.int64)
@@ -173,7 +173,7 @@ class HashTableOpTest(test.TestCase):
       self.assertAllEqual(sp_shape, out_shape)
 
   def testSignatureMismatch(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2], dtypes.int64)
@@ -190,7 +190,7 @@ class HashTableOpTest(test.TestCase):
             lookup_ops.KeyValueTensorInitializer(keys, values), "UNK")
 
   def testDTypes(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       with self.assertRaises(TypeError):
         lookup_ops.HashTable(
@@ -198,7 +198,7 @@ class HashTableOpTest(test.TestCase):
                                                  dtypes.int64), default_val)
 
   def testNotInitialized(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       table = lookup_ops.HashTable(
           lookup_ops.KeyValueTensorInitializer(
@@ -211,7 +211,7 @@ class HashTableOpTest(test.TestCase):
         output.eval()
 
   def testInitializeTwice(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2], dtypes.int64)
@@ -223,7 +223,7 @@ class HashTableOpTest(test.TestCase):
         table.init.run()
 
   def testInitializationWithInvalidDimensions(self):
-    with self.test_session():
+    with self.cached_session():
       default_val = -1
       keys = constant_op.constant(["brain", "salad", "surgery"])
       values = constant_op.constant([0, 1, 2, 3, 4], dtypes.int64)
@@ -272,7 +272,7 @@ class IndexTableFromFile(test.TestCase):
 
   def test_string_index_table_from_file(self):
     vocabulary_file = self._createVocabFile("f2i_vocab1.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, num_oov_buckets=1)
       ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"]))
@@ -284,7 +284,7 @@ class IndexTableFromFile(test.TestCase):
   def test_string_index_table_from_multicolumn_file(self):
     vocabulary_file = self._createVocabFile(
         "f2i_vocab1.txt", values=("brain\t300", "salad\t20", "surgery\t1"))
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file,
           num_oov_buckets=1,
@@ -299,7 +299,7 @@ class IndexTableFromFile(test.TestCase):
   def test_string_index_table_from_multicolumn_file_custom_delimiter(self):
     vocabulary_file = self._createVocabFile(
         "f2i_vocab1.txt", values=("brain 300", "salad 20", "surgery 1"))
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file,
           num_oov_buckets=1,
@@ -314,7 +314,7 @@ class IndexTableFromFile(test.TestCase):
 
   def test_string_index_table_from_file_tensor_filename(self):
     vocabulary_file = self._createVocabFile("f2i_vocab1.txt")
-    with self.test_session():
+    with self.cached_session():
       vocabulary_file = constant_op.constant(vocabulary_file)
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, num_oov_buckets=1)
@@ -328,7 +328,7 @@ class IndexTableFromFile(test.TestCase):
 
   def test_string_index_table_from_file_placeholder_filename(self):
     vocabulary_file = self._createVocabFile("f2i_vocab1.txt")
-    with self.test_session():
+    with self.cached_session():
       vocabulary_placeholder = array_ops.placeholder(dtypes.string, [])
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_placeholder, num_oov_buckets=1)
@@ -344,7 +344,7 @@ class IndexTableFromFile(test.TestCase):
   def test_int32_index_table_from_file(self):
     vocabulary_file = self._createVocabFile(
         "f2i_vocab2.txt", values=("42", "1", "-1000"))
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file,
           num_oov_buckets=1,
@@ -359,7 +359,7 @@ class IndexTableFromFile(test.TestCase):
   def test_int64_index_table_from_file(self):
     vocabulary_file = self._createVocabFile(
         "f2i_vocab3.txt", values=("42", "1", "-1000"))
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file,
           num_oov_buckets=1,
@@ -374,7 +374,7 @@ class IndexTableFromFile(test.TestCase):
   def test_index_table_from_file_with_default_value(self):
     default_value = -42
     vocabulary_file = self._createVocabFile("f2i_vocab4.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, default_value=default_value)
       ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"]))
@@ -385,7 +385,7 @@ class IndexTableFromFile(test.TestCase):
 
   def test_index_table_from_file_with_oov_buckets(self):
     vocabulary_file = self._createVocabFile("f2i_vocab5.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, num_oov_buckets=1000)
       ids = table.lookup(
@@ -432,7 +432,7 @@ class IndexTableFromFile(test.TestCase):
 
   def test_index_table_from_file_with_vocab_size_too_small(self):
     vocabulary_file = self._createVocabFile("f2i_vocab6.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, vocab_size=2)
       ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"]))
@@ -444,7 +444,7 @@ class IndexTableFromFile(test.TestCase):
 
   def test_index_table_from_file_with_vocab_size_too_large(self):
     vocabulary_file = self._createVocabFile("f2i_vocab7.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, vocab_size=4)
       self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
@@ -459,7 +459,7 @@ class IndexTableFromFile(test.TestCase):
         vocabulary_file=vocabulary_file,
         vocab_size=0)
 
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, vocab_size=3)
       ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"]))
@@ -471,7 +471,7 @@ class IndexTableFromFile(test.TestCase):
 
   def test_index_table_from_file_with_invalid_hashers(self):
     vocabulary_file = self._createVocabFile("invalid_hasher.txt")
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(TypeError):
         lookup_ops.index_table_from_file(
             vocabulary_file=vocabulary_file,
@@ -490,14 +490,14 @@ class IndexTableFromFile(test.TestCase):
 
   def test_index_table_from_file_table_ref_with_oov_buckets(self):
     vocabulary_file = self._createVocabFile("f2i_vocab9.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, num_oov_buckets=1)
       self.assertIsNotNone(table.table_ref)
 
   def test_index_table_from_file_table_ref_without_oov_buckets(self):
     vocabulary_file = self._createVocabFile("f2i_vocab10.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_file(
           vocabulary_file=vocabulary_file, num_oov_buckets=0)
       self.assertIsNotNone(table.table_ref)
@@ -506,21 +506,21 @@ class IndexTableFromFile(test.TestCase):
 class KeyValueTensorInitializerTest(test.TestCase):
 
   def test_string(self):
-    with ops.Graph().as_default(), self.test_session():
+    with ops.Graph().as_default(), self.cached_session():
       init = lookup_ops.KeyValueTensorInitializer(
           ("brain", "salad", "surgery"), (0, 1, 2), dtypes.string, dtypes.int64)
       table = lookup_ops.HashTable(init, default_value=-1)
       table.init.run()
 
   def test_int64(self):
-    with ops.Graph().as_default(), self.test_session():
+    with ops.Graph().as_default(), self.cached_session():
       init = lookup_ops.KeyValueTensorInitializer((42, 1, -1000), (0, 1, 2),
                                                   dtypes.int64, dtypes.int64)
       table = lookup_ops.HashTable(init, default_value=-1)
       table.init.run()
 
   def test_int32(self):
-    with ops.Graph().as_default(), self.test_session():
+    with ops.Graph().as_default(), self.cached_session():
       init = lookup_ops.KeyValueTensorInitializer((42, 1, -1000), (0, 1, 2),
                                                   dtypes.int32, dtypes.int64)
       table = lookup_ops.HashTable(init, default_value=-1)
@@ -532,7 +532,7 @@ class KeyValueTensorInitializerTest(test.TestCase):
 class IndexTableFromTensor(test.TestCase):
 
   def test_index_table_from_tensor_with_tensor_init(self):
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_tensor(
           vocabulary_list=("brain", "salad", "surgery"), num_oov_buckets=1)
       ids = table.lookup(constant_op.constant(("salad", "surgery", "tarkus")))
@@ -542,7 +542,7 @@ class IndexTableFromTensor(test.TestCase):
       self.assertAllEqual((1, 2, 3), ids.eval())
 
   def test_int32_index_table_from_tensor_with_tensor_init(self):
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_tensor(
           vocabulary_list=(42, 1, -1000), num_oov_buckets=1, dtype=dtypes.int32)
       ids = table.lookup(
@@ -553,7 +553,7 @@ class IndexTableFromTensor(test.TestCase):
       self.assertAllEqual((1, 2, 3), ids.eval())
 
   def test_int64_index_table_from_tensor_with_tensor_init(self):
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_tensor(
           vocabulary_list=(42, 1, -1000), num_oov_buckets=1, dtype=dtypes.int64)
       ids = table.lookup(
@@ -565,7 +565,7 @@ class IndexTableFromTensor(test.TestCase):
 
   def test_index_table_from_tensor_with_default_value(self):
     default_value = -42
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_tensor(
           vocabulary_list=["brain", "salad", "surgery"],
           default_value=default_value)
@@ -576,14 +576,14 @@ class IndexTableFromTensor(test.TestCase):
       self.assertAllEqual((1, 2, default_value), ids.eval())
 
   def test_index_table_from_tensor_missing_vocabulary_list(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError,
                                    "vocabulary_list must be specified"):
         lookup_ops.index_table_from_tensor(
             vocabulary_list=None, num_oov_buckets=1)
 
   def test_index_table_from_tensor_empty_vocabulary_list(self):
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_table_from_tensor(
           vocabulary_list=np.array([], dtype=np.str_), num_oov_buckets=1)
       ids = table.lookup(constant_op.constant(["salad", "surgery", "brain"]))
@@ -593,7 +593,7 @@ class IndexTableFromTensor(test.TestCase):
         lookup_ops.tables_initializer().run()
 
   def test_index_table_from_tensor_with_invalid_hashers(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(TypeError):
         lookup_ops.index_table_from_tensor(
             vocabulary_list=["brain", "salad", "surgery"],
@@ -623,7 +623,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
     type_funcs = [str, constant_op.constant]
     for type_func in type_funcs:
       vocabulary_file = type_func(vocabulary_path)
-      with self.test_session():
+      with self.cached_session():
         table = lookup_ops.index_to_string_table_from_file(
             vocabulary_file=vocabulary_file)
         features = table.lookup(
@@ -636,7 +636,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
   def test_index_to_string_table_from_multicolumn_file(self):
     vocabulary_file = self._createVocabFile(
         "f2i_vocab1.txt", values=("brain\t300", "salad\t20", "surgery\t1"))
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_to_string_table_from_file(
           vocabulary_file=vocabulary_file,
           key_column_index=lookup_ops.TextFileIndex.LINE_NUMBER,
@@ -650,7 +650,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
   def test_index_to_string_table_from_multicolumn_file_custom_delimiter(self):
     vocabulary_file = self._createVocabFile(
         "f2i_vocab1.txt", values=("brain 300", "salad 20", "surgery 1"))
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_to_string_table_from_file(
           vocabulary_file=vocabulary_file,
           key_column_index=lookup_ops.TextFileIndex.LINE_NUMBER,
@@ -665,7 +665,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
   def test_index_to_string_table_with_default_value(self):
     default_value = b"NONE"
     vocabulary_file = self._createVocabFile("f2i_vocab2.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_to_string_table_from_file(
           vocabulary_file=vocabulary_file, default_value=default_value)
       features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64))
@@ -677,7 +677,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
   def test_index_to_string_table_with_vocab_size_too_small(self):
     default_value = b"NONE"
     vocabulary_file = self._createVocabFile("f2i_vocab2.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_to_string_table_from_file(
           vocabulary_file=vocabulary_file,
           vocab_size=2,
@@ -690,7 +690,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
 
   def test_index_to_string_table_with_vocab_size_too_large(self):
     vocabulary_file = self._createVocabFile("f2i_vocab6.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_to_string_table_from_file(
           vocabulary_file=vocabulary_file, vocab_size=4)
       features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64))
@@ -702,7 +702,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
 
   def test_index_to_string_table_with_vocab_size(self):
     vocabulary_file = self._createVocabFile("f2i_vocab7.txt")
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.index_to_string_table_from_file(
           vocabulary_file=vocabulary_file, vocab_size=3)
       features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64))
@@ -715,7 +715,7 @@ class IndexToStringTableFromFileTest(test.TestCase):
 class IndexToStringTableFromTensorTest(test.TestCase):
 
   def test_index_to_string_table_from_tensor(self):
-    with self.test_session():
+    with self.cached_session():
       vocabulary_list = constant_op.constant(["brain", "salad", "surgery"])
       table = lookup_ops.index_to_string_table_from_tensor(
           vocabulary_list=vocabulary_list)
@@ -729,7 +729,7 @@ class IndexToStringTableFromTensorTest(test.TestCase):
                           features.eval())
 
   def test_duplicate_entries(self):
-    with self.test_session():
+    with self.cached_session():
       vocabulary_list = constant_op.constant(["hello", "hello"])
       table = lookup_ops.index_to_string_table_from_tensor(
           vocabulary_list=vocabulary_list)
@@ -740,7 +740,7 @@ class IndexToStringTableFromTensorTest(test.TestCase):
 
   def test_index_to_string_with_default_value(self):
     default_value = b"NONE"
-    with self.test_session():
+    with self.cached_session():
       vocabulary_list = constant_op.constant(["brain", "salad", "surgery"])
       table = lookup_ops.index_to_string_table_from_tensor(
           vocabulary_list=vocabulary_list, default_value=default_value)
@@ -764,7 +764,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
   def testInitializeStringTable(self):
     vocabulary_file = self._createVocabFile("one_column_1.txt")
 
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       table = lookup_ops.HashTable(
           lookup_ops.TextFileInitializer(
@@ -782,7 +782,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
     vocabulary_file = self._createVocabFile(
         "one_column_int64.txt", values=("42", "1", "-1000"))
 
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       table = lookup_ops.HashTable(
           lookup_ops.TextFileInitializer(
@@ -800,7 +800,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
   def testInitializeIndexTable(self):
     vocabulary_file = self._createVocabFile("one_column_2.txt")
 
-    with self.test_session():
+    with self.cached_session():
       default_value = "UNK"
       key_index = lookup_ops.TextFileIndex.LINE_NUMBER
       value_index = lookup_ops.TextFileIndex.WHOLE_LINE
@@ -821,7 +821,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
     with open(vocabulary_file, "w") as f:
       f.write("\n".join(["0\tbrain\t1", "1\tsalad\t5", "2\tsurgery\t6"]) + "\n")
 
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       key_index = 1
       value_index = 2
@@ -843,7 +843,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
     with open(vocabulary_file, "w") as f:
       f.write("\n".join(["0\tbrain\t1", "1\tsalad\t5", "2\tsurgery\t6"]) + "\n")
 
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       key_index = 2
       value_index = 1
@@ -857,7 +857,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
   def testInvalidDataType(self):
     vocabulary_file = self._createVocabFile("one_column_3.txt")
 
-    with self.test_session():
+    with self.cached_session():
       default_value = "UNK"
       key_index = lookup_ops.TextFileIndex.WHOLE_LINE
       value_index = lookup_ops.TextFileIndex.LINE_NUMBER
@@ -870,7 +870,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
 
   def testInvalidIndex(self):
     vocabulary_file = self._createVocabFile("one_column_4.txt")
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       key_index = 1  # second column of the line
       value_index = lookup_ops.TextFileIndex.LINE_NUMBER
@@ -885,7 +885,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
   def testInitializeSameTableWithMultipleNodes(self):
     vocabulary_file = self._createVocabFile("one_column_5.txt")
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       shared_name = "shared-one-columm"
       default_value = -1
       table1 = lookup_ops.HashTable(
@@ -924,7 +924,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
       self.assertAllEqual([0, 1, -1], out3)
 
   def testInitializeTableWithNoFilename(self):
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       with self.assertRaises(ValueError):
         lookup_ops.HashTable(
@@ -934,7 +934,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
             default_value)
 
   def testInitializeWithVocabSize(self):
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       vocabulary_file1 = self._createVocabFile("one_column6.txt")
@@ -982,7 +982,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
   def testFeedVocabularyName(self):
     vocabulary_file = self._createVocabFile("feed_vocabulary.txt")
 
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       table = lookup_ops.HashTable(
           lookup_ops.TextFileInitializer(
@@ -1008,7 +1008,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
   def testInvalidFilenames(self):
     vocabulary_file = self._createVocabFile("filename_shape.txt")
 
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
 
       # Invalid data type
@@ -1031,7 +1031,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
 
   def testIdToStringTable(self):
     vocab_file = self._createVocabFile("feat_to_id_1.txt")
-    with self.test_session():
+    with self.cached_session():
       default_value = "UNK"
       vocab_size = 3
       table = lookup_ops.HashTable(
@@ -1048,7 +1048,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
 
   def testStringToIdTable(self):
     vocab_file = self._createVocabFile("feat_to_id_2.txt")
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       table = lookup_ops.HashTable(
@@ -1065,7 +1065,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
   def testInt64ToIdTable(self):
     vocab_file = self._createVocabFile(
         "feat_to_id_3.txt", values=("42", "1", "-1000"))
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       table = lookup_ops.HashTable(
@@ -1090,7 +1090,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
 
   def testStringIdTableWithHashBuckets(self):
     vocab_file = self._createVocabFile("feat_to_id_1.txt")
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       oov_buckets = 1
@@ -1110,7 +1110,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
 
   def testInt32IdTableWithHashBuckets(self):
     vocab_file = self._createVocabFile("feat_to_id_2.txt", ("42", "1", "-1000"))
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       oov_buckets = 1
@@ -1132,7 +1132,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
 
   def testInt64IdTableWithHashBuckets(self):
     vocab_file = self._createVocabFile("feat_to_id_3.txt", ("42", "1", "-1000"))
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       oov_buckets = 1
@@ -1151,7 +1151,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
       self.assertEquals(vocab_size + oov_buckets, table.size().eval())
 
   def testStringIdTableWithOnlyHashBucket(self):
-    with self.test_session():
+    with self.cached_session():
       oov_buckets = 5
 
       # Set a table that only uses hash buckets, for each input value returns
@@ -1172,7 +1172,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
       self.assertEquals(oov_buckets, table.size().eval())
 
   def testInt32IdTableWithOnlyHashBucket(self):
-    with self.test_session():
+    with self.cached_session():
       oov_buckets = 5
 
       # Set a table that only uses hash buckets, for each input value returns
@@ -1194,20 +1194,20 @@ class IdTableWithHashBucketsTest(test.TestCase):
       self.assertEquals(oov_buckets, table.size().eval())
 
   def testFloat64IdTableWithOnlyHashBucket(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(TypeError, "Invalid key_dtype"):
         lookup_ops.IdTableWithHashBuckets(
             None, num_oov_buckets=5, key_dtype=dtypes.float64)
 
   def testBoolIdTableWithOnlyHashBucket(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(TypeError, "Invalid key_dtype"):
         lookup_ops.IdTableWithHashBuckets(
             None, num_oov_buckets=5, key_dtype=dtypes.bool)
 
   def testIdTableWithHashBucketsWithMultipleInitializers(self):
     vocab_file = self._createVocabFile("feat_to_id_4.txt")
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       default_value = -1
       vocab_size = 3
       oov_buckets = 3
@@ -1248,7 +1248,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
   def testIdTableWithHashBucketsInitializationAcrossSessions(self):
     vocab_file = self._createVocabFile("feat_to_id_5.txt")
     shared_name = "across-sessions"
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       oov_buckets = 1
@@ -1269,7 +1269,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
       self.assertAllEqual([0, 1, 2, 3], out1.eval())
       self.assertEquals(vocab_size + oov_buckets, table1.size().eval())
 
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       oov_buckets = 1
@@ -1292,7 +1292,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
 
   def testIdTableWithHashBucketsWithMultipleInitializersDifferentDefault(self):
     vocab_file = self._createVocabFile("feat_to_id_6.txt")
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       default_value1 = -1
       vocab_size = 3
       oov_buckets = 0
@@ -1328,7 +1328,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
     vocab_file = self._createVocabFile("feat_to_id_7.txt")
     input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]]
     input_shape = [4, 4]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sp_features = sparse_tensor.SparseTensor(
           constant_op.constant(input_indices, dtypes.int64),
           constant_op.constant(["brain", "salad", "brain", "surgery", "tarkus"],
@@ -1355,7 +1355,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
   def testInt32SparseTensor(self):
     input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]]
     input_shape = [4, 4]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sp_features = sparse_tensor.SparseTensor(
           constant_op.constant(input_indices, dtypes.int64),
           constant_op.constant([42, 1, 42, -1000, 11], dtypes.int32),
@@ -1383,7 +1383,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
   def testInt64SparseTensor(self):
     input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]]
     input_shape = [4, 4]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sp_features = sparse_tensor.SparseTensor(
           constant_op.constant(input_indices, dtypes.int64),
           constant_op.constant([42, 1, 42, -1000, 11], dtypes.int64),
@@ -1410,7 +1410,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
 
   def testIdTableWithHashBucketsWithInvalidHashers(self):
     vocab_file = self._createVocabFile("feat_to_id_4.txt")
-    with self.test_session():
+    with self.cached_session():
       default_value = -1
       vocab_size = 3
       oov_buckets = 1
@@ -1451,7 +1451,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
             hasher_spec=lookup_ops.StrongHashSpec([None, 2]))
 
   def testIdTableWithHashBucketsNoInnerTable(self):
-    with self.test_session():
+    with self.cached_session():
       table = lookup_ops.IdTableWithHashBuckets(None, num_oov_buckets=1)
       self.assertIsNone(table.table_ref)
 
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index 87fc715783..3ce0b74263 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -61,62 +61,62 @@ class AbsoluteDifferenceLossTest(test.TestCase):
     self._labels = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
 
   def testValueErrorThrownWhenWeightIsNone(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.absolute_difference(
             self._predictions, self._predictions, weights=None)
 
   def testAllCorrectNoLossWeight(self):
     loss = losses.absolute_difference(self._predictions, self._predictions)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
   def testNonZeroLoss(self):
     loss = losses.absolute_difference(self._labels, self._predictions)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(5.5, loss.eval(), 3)
 
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(5.5 * weights, loss.eval(), 3)
 
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.absolute_difference(self._labels, self._predictions,
                                       constant_op.constant(weights))
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(5.5 * weights, loss.eval(), 3)
 
   def testNonZeroLossWithOneDimBatchSpecificWeights(self):
     weights = constant_op.constant((1.2, 0.0), shape=(2, 1))
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(5.6, loss.eval(), 3)
 
   def testNonZeroLossWithTwoDimBatchSpecificWeights(self):
     weights = constant_op.constant([1.2, 0.0], shape=[2, 1])
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(5.6, loss.eval(), 3)
 
   def testNonZeroLossWithSampleSpecificWeights(self):
     weights = constant_op.constant([3, 6, 5, 0, 4, 2], shape=[2, 3])
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(16.6, loss.eval(), 3)
 
   def testNonZeroLossWithSampleSpecificWeightsMostZero(self):
     weights = constant_op.constant([0, 0, 0, 0, 0, 2], shape=[2, 3])
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(6.0, loss.eval(), 3)
 
   def testLossWithSampleSpecificWeightsAllZero(self):
     weights = array_ops.zeros((2, 3))
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
   @test_util.assert_no_new_pyobjects_executing_eagerly
@@ -134,12 +134,12 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.softmax_cross_entropy(labels, logits, weights=None)
 
   def testAllCorrect(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                      [0.0, 0.0, 10.0]])
       labels = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
@@ -152,7 +152,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
 
-    with self.test_session():
+    with self.cached_session():
       loss = losses.softmax_cross_entropy(labels, logits)
       self.assertEquals(loss.op.name, 'softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
@@ -162,7 +162,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
     weights = 2.3
-    with self.test_session():
+    with self.cached_session():
       loss = losses.softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(weights * 10.0, loss.eval(), 3)
 
@@ -171,7 +171,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
     weights = 2.3
-    with self.test_session():
+    with self.cached_session():
       loss = losses.softmax_cross_entropy(labels, logits,
                                           constant_op.constant(weights))
       self.assertAlmostEqual(weights * 10.0, loss.eval(), 3)
@@ -181,7 +181,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
     weights = constant_op.constant((1.2, 3.4, 5.6))
-    with self.test_session():
+    with self.cached_session():
       loss = losses.softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3)
 
@@ -190,7 +190,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
     weights = constant_op.constant([0, 0, 0], shape=[3])
-    with self.test_session():
+    with self.cached_session():
       loss = losses.softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
@@ -199,12 +199,12 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
     weights = constant_op.constant([1.2, 0, 0], shape=[3])
-    with self.test_session():
+    with self.cached_session():
       loss = losses.softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(12.0, loss.eval(), 3)
 
   def testSoftmaxWithMeasurementSpecificWeightsRaisesException(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0],
                                      [-100.0, -100.0, 100.0]])
@@ -215,7 +215,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
         losses.softmax_cross_entropy(labels, logits, weights=weights).eval()
 
   def testSoftmaxLabelSmoothing(self):
-    with self.test_session():
+    with self.cached_session():
       # Softmax Cross Entropy Loss is:
       #   -\sum_i p_i \log q_i
       # where for a softmax activation
@@ -242,12 +242,12 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[0], [1], [2]])
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.sparse_softmax_cross_entropy(labels, logits, weights=None)
 
   def testAllCorrectInt32Labels(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                      [0.0, 0.0, 10.0]])
       labels = constant_op.constant([[0], [1], [2]], dtype=dtypes.int32)
@@ -263,7 +263,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
     losses.sparse_softmax_cross_entropy(labels, logits)
 
   def testAllCorrectInt64Labels(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                      [0.0, 0.0, 10.0]])
       labels = constant_op.constant([[0], [1], [2]], dtype=dtypes.int64)
@@ -272,7 +272,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
       self.assertAlmostEqual(loss.eval(), 0.0, 3)
 
   def testAllCorrectNonColumnLabels(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                      [0.0, 0.0, 10.0]])
       labels = constant_op.constant([0, 1, 2])
@@ -285,7 +285,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]], dtype=dtypes.int32)
 
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits)
       self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
@@ -295,7 +295,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]], dtype=dtypes.int64)
 
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits)
       self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
@@ -305,7 +305,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([2, 0, 1])
 
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits)
       self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
@@ -315,7 +315,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = 2.3
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(weights * 10.0, loss.eval(), 3)
 
@@ -324,7 +324,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = 2.3
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits,
                                                  constant_op.constant(weights))
       self.assertAlmostEqual(weights * 10.0, loss.eval(), 3)
@@ -334,7 +334,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = 2.3
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(
           labels, logits, constant_op.constant((weights,)))
       self.assertAlmostEqual(weights * 10.0, loss.eval(), 3)
@@ -345,7 +345,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = array_ops.placeholder(dtypes.float32)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       loss_val = sess.run(loss,
                           feed_dict={weights: ((1.2,), (3.4,), (5.6,))})
@@ -355,7 +355,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
     logits = array_ops.placeholder(dtypes.float32)
     labels = array_ops.placeholder(dtypes.int32)
     weights = 1.0
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       loss_val = sess.run(loss,
                           feed_dict={
@@ -370,7 +370,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
     logits = array_ops.placeholder(dtypes.float32, shape=(None, 3))
     labels = array_ops.placeholder(dtypes.int32, shape=(None, 1))
     weights = array_ops.placeholder(dtypes.float32)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       loss_val = sess.run(loss,
                           feed_dict={
@@ -387,7 +387,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = constant_op.constant([1.2, 3.4, 5.6], shape=(3, 1))
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3)
 
@@ -396,7 +396,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = constant_op.constant([[1.2], [3.4], [5.6]])
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3)
 
@@ -405,7 +405,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = constant_op.constant([0, 0, 0], shape=(3, 1))
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
@@ -414,12 +414,12 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
                                    [0.0, 0.0, 10.0]])
     labels = constant_op.constant([[2], [0], [1]])
     weights = constant_op.constant([1.2, 0, 0], shape=(3, 1))
-    with self.test_session():
+    with self.cached_session():
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(12.0, loss.eval(), 3)
 
   def testMeasurementSpecificWeightsRaisesException(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0],
                                      [-100.0, -100.0, 100.0]])
@@ -432,7 +432,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
 
   def testInconsistentWeightSizeRaisesException(self):
     """The weight tensor has incorrect number of elements."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0],
                                      [-100.0, -100.0, 100.0]])
@@ -445,7 +445,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
 
   def testInconsistentLabelSizeRaisesException(self):
     """The label tensor has incorrect number of elements."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0],
                                      [-100.0, -100.0, 100.0]])
@@ -458,7 +458,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
 
   def testInconsistentWeightShapeRaisesException(self):
     """The weight tensor has incorrect shape."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0, -100.0],
                                      [-100.0, -100.0, 100.0, -100.0],
@@ -472,7 +472,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
 
   def testInconsistentLabelShapeRaisesException(self):
     """The label tensor has incorrect shape."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0, -100.0],
                                      [-100.0, -100.0, 100.0, -100.0],
@@ -488,7 +488,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
 class SigmoidCrossEntropyLossTest(test.TestCase):
 
   def testAllCorrectSigmoid(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0],
                                      [-100.0, -100.0, 100.0]])
@@ -506,7 +506,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
     loss = losses.sigmoid_cross_entropy(labels, logits, weights)
     self.assertEquals(logits.dtype, loss.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = sess.run(loss,
                       feed_dict={
                           logits: np.ones((32, 1)),
@@ -522,7 +522,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
     loss = losses.sigmoid_cross_entropy(labels, logits, weights)
     self.assertEquals(logits.dtype, loss.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = sess.run(loss,
                       feed_dict={
                           logits: np.ones((32, 2)),
@@ -531,7 +531,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
       self.assertAlmostEqual(0.313, loss, 3)
 
   def testAllWrongSigmoid(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0],
                                      [-100.0, -100.0, 100.0]])
@@ -542,7 +542,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
       self.assertAlmostEqual(loss.eval(), 600.0 / 9.0, 3)
 
   def testAllWrongSigmoidWithMeasurementSpecificWeights(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0],
                                      [-100.0, 100.0, -100.0],
                                      [-100.0, -100.0, 100.0]])
@@ -562,7 +562,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
     self.assertEquals(logits.dtype, loss.dtype)
     self.assertEquals('sigmoid_cross_entropy_loss/value', loss.op.name)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
   def testSigmoidFloat64(self):
@@ -577,7 +577,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
     loss = losses.sigmoid_cross_entropy(labels, logits)
     self.assertEquals(logits.dtype, loss.dtype)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(44.444, loss.eval(), 3)
 
   def testSigmoidNoReduction(self):
@@ -590,7 +590,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
         labels, logits, reduction=losses.Reduction.NONE)
     self.assertEquals(logits.dtype, loss.dtype)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose((
           (0., 0., 0.),
           (0., 100., 100.),
@@ -598,7 +598,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
       ), loss.eval(), 3)
 
   def testSigmoidLabelSmoothingCorrect(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[100.0, -100.0, -100.0]])
       labels = constant_op.constant([[1, 0, 1]])
       # Sigmoid cross entropy loss is:
@@ -621,7 +621,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
       self.assertAlmostEqual(loss.eval(), expected_value, 3)
 
   def testSigmoidLabelSmoothingEqualsSoftmaxTwoLabel(self):
-    with self.test_session():
+    with self.cached_session():
       label_smoothing = 0.1
       sigmoid_logits = constant_op.constant([[100.0, -100.0, -100.0]])
       sigmoid_labels = constant_op.constant([[1, 0, 1]])
@@ -656,33 +656,33 @@ class LogLossTest(test.TestCase):
     self._labels = constant_op.constant(labels)
 
   def testValueErrorThrownWhenWeightIsNone(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.log_loss(self._labels, self._labels, weights=None)
 
   def testAllCorrectNoLossWeight(self):
     loss = losses.log_loss(self._labels, self._labels)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
   def testAllCorrectNoLossWeightWithPlaceholder(self):
     tf_predictions = array_ops.placeholder(
         dtypes.float32, shape=self._np_labels.shape)
     loss = losses.log_loss(self._labels, tf_predictions)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(
           0.0, loss.eval(feed_dict={tf_predictions: self._np_labels}), 3)
 
   def testNonZeroLoss(self):
     loss = losses.log_loss(self._labels, self._predictions)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(-np.sum(self._expected_losses) / 6.0,
                              loss.eval(), 3)
 
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.log_loss(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              loss.eval(), 3)
 
@@ -690,7 +690,7 @@ class LogLossTest(test.TestCase):
     weights = 2.3
     loss = losses.log_loss(self._labels, self._predictions,
                            constant_op.constant(weights))
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              loss.eval(), 3)
 
@@ -700,7 +700,7 @@ class LogLossTest(test.TestCase):
     weights = 2.3
     loss = losses.log_loss(self._labels, tf_predictions,
                            constant_op.constant(weights))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions})
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              loss, 3)
@@ -710,7 +710,7 @@ class LogLossTest(test.TestCase):
     weights = 2.3
     loss = losses.log_loss(self._labels, tf_predictions,
                            constant_op.constant(weights))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions})
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              loss, 3)
@@ -721,7 +721,7 @@ class LogLossTest(test.TestCase):
         self._expected_losses,
         np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)))
     loss = losses.log_loss(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(-np.sum(expected_losses) / 6.0, loss.eval(), 3)
 
   def testNonZeroLossWithOneDimBatchSpecificWeightsSomeZero(self):
@@ -730,7 +730,7 @@ class LogLossTest(test.TestCase):
                                   np.asarray([1.2, 1.2, 1.2, 0, 0, 0]).reshape(
                                       (2, 3)))
     loss = losses.log_loss(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(-np.sum(expected_losses) / 3.0, loss.eval(), 3)
 
   def testNonZeroLossWithTwoDimBatchSpecificWeightsSomeZero(self):
@@ -739,12 +739,12 @@ class LogLossTest(test.TestCase):
                                   np.asarray([1.2, 1.2, 1.2, 0, 0, 0]).reshape(
                                       (2, 3)))
     loss = losses.log_loss(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(-np.sum(expected_losses) / 3.0, loss.eval(), 3)
 
   def testWeightsWithSameNumDimsButWrongShapeThrowsException(self):
     weights = constant_op.constant(np.random.normal(size=(2, 4)), shape=[2, 4])
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.log_loss(self._labels, self._predictions, weights)
 
@@ -757,7 +757,7 @@ class LogLossTest(test.TestCase):
         self._predictions,
         constant_op.constant(
             weights, shape=(2, 3)))
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(-np.sum(expected_losses) / 5.0, loss.eval(), 3)
 
   def testNonZeroLossWithMeasurementSpecificWeightsWithPlaceholder(self):
@@ -771,7 +771,7 @@ class LogLossTest(test.TestCase):
         constant_op.constant(
             weights, shape=(2, 3)))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions})
       self.assertAlmostEqual(-np.sum(expected_losses) / 5.0, loss, 3)
 
@@ -784,7 +784,7 @@ class LogLossTest(test.TestCase):
         self._predictions,
         constant_op.constant(
             weights, shape=(2, 3)))
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(-np.sum(expected_losses), loss.eval(), 3)
 
   def testNonZeroLossWithSampleSpecificWeightsMostZeroWithPlaceholder(self):
@@ -795,35 +795,35 @@ class LogLossTest(test.TestCase):
     tf_weights = constant_op.constant(weights, shape=(2, 3))
     loss = losses.log_loss(self._labels, tf_predictions, tf_weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions})
       self.assertAlmostEqual(-np.sum(expected_losses), loss, 3)
 
   def testLossWithSampleSpecificWeightsAllZero(self):
     tf_weights = array_ops.zeros(shape=(2, 3))
     loss = losses.log_loss(self._labels, self._predictions, tf_weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
 
 class HingeLossTest(test.TestCase):
 
   def testIncompatibleShapes(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[-1.0], [2.1]])
       labels = constant_op.constant([0.0, 1.0])
       with self.assertRaises(ValueError):
         _ = losses.hinge_loss(labels, logits).eval()
 
   def testAllOutsideMargin(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([1.2, -1.4, -1.0, 2.1])
       labels = constant_op.constant([1.0, 0.0, 0.0, 1.0])
       loss = losses.hinge_loss(labels, logits)
       self.assertAllClose(loss.eval(), 0.0, atol=1e-3)
 
   def testSomeInsideMargin(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[-0.7], [-1.4], [1.4], [0.6]])
       labels = constant_op.constant([[0.0], [0.0], [1.0], [1.0]])
       loss = losses.hinge_loss(labels, logits)
@@ -832,7 +832,7 @@ class HingeLossTest(test.TestCase):
       self.assertAllClose(loss.eval(), 0.175, atol=1e-3)
 
   def testSomeMisclassified(self):
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[[1.2], [0.4], [-1.0], [-1.1]]])
       labels = constant_op.constant([[[1.0], [0.0], [0.0], [1.0]]])
       loss = losses.hinge_loss(labels, logits)
@@ -844,14 +844,14 @@ class HingeLossTest(test.TestCase):
 class HuberLossTest(test.TestCase):
 
   def testIncompatibleShapes(self):
-    with self.test_session():
+    with self.cached_session():
       predictions = constant_op.constant([[-1.0], [2.1]])
       labels = constant_op.constant([0.0, 1.0])
       with self.assertRaises(ValueError):
         _ = losses.huber_loss(labels, predictions).eval()
 
   def testAllQuadratic(self):
-    with self.test_session():
+    with self.cached_session():
       predictions = constant_op.constant([1.5, -1.4, -1.0, 0.0])
       labels = constant_op.constant([1.0, -1.0, 0.0, 0.5])
       loss = losses.huber_loss(labels, predictions)
@@ -859,7 +859,7 @@ class HuberLossTest(test.TestCase):
                           0.5 * (0.25 + 0.16 + 1.0 + 0.25) / 4., atol=1e-5)
 
   def testAllLinear(self):
-    with self.test_session():
+    with self.cached_session():
       predictions = constant_op.constant([1.5, -1.4, -1.0, 0.0])
       labels = constant_op.constant([0.0, 1.0, 0.0, 1.5])
       loss = losses.huber_loss(labels, predictions)
@@ -867,7 +867,7 @@ class HuberLossTest(test.TestCase):
                           (1.5 + 2.4 + 1.0 + 1.5) / 4. - 0.5, atol=1e-5)
 
   def testMixedQuadraticLinear(self):
-    with self.test_session():
+    with self.cached_session():
       predictions = constant_op.constant([[1.5, -1.4, -1.0, 0.0],
                                           [1.5, -1.4, -1.0, 0.0]])
       labels = constant_op.constant([[1.0, -1.0, 0.0, 0.5],
@@ -879,7 +879,7 @@ class HuberLossTest(test.TestCase):
       self.assertAllClose(loss.eval(), expected_loss, atol=1e-5)
 
   def testAllQuadraticDelta(self):
-    with self.test_session():
+    with self.cached_session():
       delta = 0.5
       predictions = constant_op.constant([1.5, -1.4, -0.5, 0.0])
       labels = constant_op.constant([1.0, -1.0, 0.0, 0.5])
@@ -894,7 +894,7 @@ class HuberLossTest(test.TestCase):
     expected = delta * np.array([1.5, 2.4, 1.0, 1.5]).mean()
     expected -= 0.5 * delta**2
     loss = losses.huber_loss(labels, predictions, delta=delta)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(expected, loss.eval(), atol=1e-5)
 
 
@@ -906,13 +906,13 @@ class MeanSquaredErrorTest(test.TestCase):
     self._labels = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
 
   def testValueErrorThrownWhenWeightIsNone(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.mean_squared_error(
             self._predictions, self._predictions, weights=None)
 
   def testScalar(self):
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(
           0.0,
           losses.mean_squared_error(predictions=constant_op.constant(0),
@@ -920,55 +920,55 @@ class MeanSquaredErrorTest(test.TestCase):
 
   def testAllCorrectNoLossWeight(self):
     loss = losses.mean_squared_error(self._predictions, self._predictions)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
   def testNonZeroLoss(self):
     loss = losses.mean_squared_error(self._labels, self._predictions)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(49.5, loss.eval(), 3)
 
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(49.5 * weights, loss.eval(), 3)
 
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.mean_squared_error(self._labels, self._predictions,
                                      constant_op.constant(weights))
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(49.5 * weights, loss.eval(), 3)
 
   def testNonZeroLossWithOneDimBatchSpecificWeights(self):
     weights = constant_op.constant([1.2, 3.4], shape=(2, 1))
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(767.8 / 6.0, loss.eval(), 3)
 
   def testNonZeroLossWithTwoDimBatchSpecificWeights(self):
     weights = constant_op.constant([1.2, 3.4], shape=[2, 1])
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(767.8 / 6.0, loss.eval(), 3)
 
   def testNonZeroLossWithSampleSpecificWeights(self):
     weights = constant_op.constant([3, 6, 5, 0, 4, 2], shape=[2, 3])
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(587 / 5.0, loss.eval(), 3)
 
   def testNonZeroLossWithSampleSpecificWeightsMostZero(self):
     weights = constant_op.constant([0, 0, 0, 0, 0, 2], shape=[2, 3])
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(18.0, loss.eval(), 3)
 
   def testLossWithSampleSpecificWeightsAllZero(self):
     weights = array_ops.zeros((2, 3))
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0.0, loss.eval(), 3)
 
 
@@ -994,7 +994,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
     self._expected_losses = np.divide(total, 3.0)
 
   def testValueErrorThrownWhenWeightIsNone(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.mean_pairwise_squared_error(
             predictions=constant_op.constant(self._labels),
@@ -1003,7 +1003,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
 
   def _test_valid_weights(
       self, labels, predictions, expected_loss, weights=1.0):
-    with self.test_session():
+    with self.cached_session():
       static_inputs_op = losses.mean_pairwise_squared_error(
           predictions=predictions, labels=labels, weights=weights)
       self.assertAlmostEqual(expected_loss, static_inputs_op.eval(), places=3)
@@ -1054,7 +1054,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
 
       init_op = variables.global_variables_initializer()
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(init_op)
         for grad, _ in gradients_to_variables:
           np_grad = sess.run(grad)
@@ -1073,7 +1073,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
         predictions=constant_op.constant(self._predictions),
         labels=constant_op.constant(self._labels),
         weights=constant_op.constant(weights))
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(weights * np.sum(self._expected_losses),
                              loss.eval(), 3)
 
@@ -1122,7 +1122,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
         predictions=predictions_placeholder,
         labels=labels_placeholder,
         weights=weights_placeholder)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg):
         dynamic_inputs_op.eval(feed_dict={
             predictions_placeholder: predictions,
@@ -1191,7 +1191,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
           labels=array_ops.concat([labels0, labels1], 0),
           predictions=array_ops.concat([predictions0, predictions1], 0))
 
-      with self.test_session() as session:
+      with self.cached_session() as session:
         loss0, loss1, loss0_1 = session.run([loss0, loss1, loss0_1])
 
         self.assertTrue(loss0 > 0)
@@ -1216,7 +1216,7 @@ class CosineDistanceLossTest(test.TestCase):
                                [0, 0, 1], [0, 1, 0]]).reshape((3, 2, 3))
 
   def testValueErrorThrownWhenWeightIsNone(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         losses.cosine_distance(
             predictions=constant_op.constant(self._labels),
@@ -1229,7 +1229,7 @@ class CosineDistanceLossTest(test.TestCase):
         predictions=constant_op.constant(self._labels),
         labels=constant_op.constant(self._labels),
         dim=2)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(0, loss.eval(), 5)
 
   def testPartiallyCorrectWithIntegerValues(self):
@@ -1237,7 +1237,7 @@ class CosineDistanceLossTest(test.TestCase):
         predictions=constant_op.constant(self._predictions),
         labels=constant_op.constant(self._labels),
         dim=2)
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(1, loss.eval(), 5)
 
   def testPartiallyCorrectFloatingPointValues(self):
@@ -1255,7 +1255,7 @@ class CosineDistanceLossTest(test.TestCase):
         labels, shape=(3, 1, 3), dtype=dtypes.float32)
     loss = losses.cosine_distance(tf_labels, tf_preds, dim=2)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAlmostEqual(1.0, loss.eval(), 5)
 
   def testSampleSpecificWeights(self):
@@ -1264,7 +1264,7 @@ class CosineDistanceLossTest(test.TestCase):
         labels=constant_op.constant(self._labels),
         dim=2,
         weights=np.asarray((1, 0, 0)).reshape((3, 1, 1)))
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(1.0, loss.eval())
 
   def testMeasurementSpecificWeights(self):
@@ -1274,7 +1274,7 @@ class CosineDistanceLossTest(test.TestCase):
         dim=2,
         weights=constant_op.constant(
             [1, 0, 0, 1, 1, 1], shape=(3, 2, 1)))
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(3.0 / 4.0, loss.eval())
 
   def testMeasurementSpecificWeightsWithPlaceholderWithShape(self):
@@ -1286,7 +1286,7 @@ class CosineDistanceLossTest(test.TestCase):
         dim=2,
         weights=constant_op.constant(
             [1, 0, 0, 1, 1, 1], shape=(3, 2, 1)))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       loss = sess.run(loss, feed_dict={tf_predictions: self._predictions})
       self.assertEqual(3.0 / 4.0, loss)
 
@@ -1296,7 +1296,7 @@ class CosineDistanceLossTest(test.TestCase):
         labels=constant_op.constant(self._labels),
         dim=2,
         weights=array_ops.zeros((3, 1, 1)))
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(0, loss.eval())
 
   def testZeroLossWhenAllMeasurementSpecificWeightsAreZero(self):
@@ -1305,7 +1305,7 @@ class CosineDistanceLossTest(test.TestCase):
         labels=constant_op.constant(self._labels),
         dim=2,
         weights=array_ops.zeros((3, 2, 1)))
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(0, loss.eval())
 
 
@@ -1411,7 +1411,7 @@ class ComputeWeightedLossTest(test.TestCase):
       weighted_loss = losses.compute_weighted_loss(
           self._raw_losses, weights=weight)
       self.assertEqual(1, len(util.get_losses()))
-      with self.test_session():
+      with self.cached_session():
         self.assertAllClose(
             np.mean(weight * self._raw_losses), weighted_loss.eval())
 
@@ -1429,7 +1429,7 @@ class ComputeWeightedLossTest(test.TestCase):
       weighted_loss = losses.compute_weighted_loss(
           self._raw_losses, weights=weights_placeholder)
       self.assertEqual(1, len(util.get_losses()))
-      with self.test_session():
+      with self.cached_session():
         with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg):
           weighted_loss.eval(feed_dict={weights_placeholder: weights})
 
@@ -1452,7 +1452,7 @@ class ComputeWeightedLossTest(test.TestCase):
       weighted_loss = losses.compute_weighted_loss(
           raw_losses, weights=weights_placeholder)
       self.assertEqual(1, len(util.get_losses()))
-      with self.test_session():
+      with self.cached_session():
         with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg):
           weighted_loss.eval(feed_dict={weights_placeholder: weights})
 
diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py
index dc3ea38671..f71857a3cb 100644
--- a/tensorflow/python/kernel_tests/manip_ops_test.py
+++ b/tensorflow/python/kernel_tests/manip_ops_test.py
@@ -42,12 +42,12 @@ class RollTest(test_util.TensorFlowTestCase):
 
   def _testRoll(self, np_input, shift, axis):
     expected_roll = np.roll(np_input, shift, axis)
-    with self.test_session():
+    with self.cached_session():
       roll = manip_ops.roll(np_input, shift, axis)
       self.assertAllEqual(roll.eval(), expected_roll)
 
   def _testGradient(self, np_input, shift, axis):
-    with self.test_session():
+    with self.cached_session():
       inx = constant_op.constant(np_input.tolist())
       xs = list(np_input.shape)
       y = manip_ops.roll(inx, shift, axis)
@@ -94,7 +94,7 @@ class RollTest(test_util.TensorFlowTestCase):
     self._testAll(np.random.randint(-100, 100, (5)).astype(np.int32), 3, -1)
     self._testAll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), 3, -2)
     # Make sure negative axis should be 0 <= axis + dims < dims
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "is out of range"):
         manip_ops.roll(np.random.randint(-100, 100, (4, 4)).astype(np.int32),
@@ -111,7 +111,7 @@ class RollTest(test_util.TensorFlowTestCase):
     tensor = array_ops.placeholder(dtype=dtypes.int32)
     shift = 1
     axis = 0
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "input must be 1-D or higher"):
         manip_ops.roll(tensor, shift, axis).eval(feed_dict={tensor: 7})
@@ -127,7 +127,7 @@ class RollTest(test_util.TensorFlowTestCase):
     tensor = [[1, 2], [3, 4]]
     shift = 1
     axis = array_ops.placeholder(dtype=dtypes.int32)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "axis must be a scalar or a 1-D vector"):
         manip_ops.roll(tensor, shift, axis).eval(feed_dict={axis: [[0, 1]]})
@@ -143,7 +143,7 @@ class RollTest(test_util.TensorFlowTestCase):
     tensor = [[1, 2], [3, 4]]
     shift = array_ops.placeholder(dtype=dtypes.int32)
     axis = 1
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "shift must be a scalar or a 1-D vector"):
         manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [[0, 1]]})
@@ -158,7 +158,7 @@ class RollTest(test_util.TensorFlowTestCase):
     tensor = [[1, 2], [3, 4]]
     shift = array_ops.placeholder(dtype=dtypes.int32)
     axis = [0, 1]
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "shift and axis must have the same size"):
         manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [1]})
@@ -167,7 +167,7 @@ class RollTest(test_util.TensorFlowTestCase):
     tensor = [1, 2]
     shift = 1
     axis = 1
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "is out of range"):
         manip_ops.roll(tensor, shift, axis).eval()
diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
index b167278984..309da8f184 100644
--- a/tensorflow/python/kernel_tests/matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -206,7 +206,7 @@ class MatMulInfixOperatorTest(test_lib.TestCase):
     b = ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0], [80.0, 90.0]])
     c = infix_matmul(a, b)
     d = math_ops.matmul(a, b)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(c.eval(), d.eval())
 
 
diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
index f41967ff98..720ba806e9 100644
--- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
@@ -114,7 +114,7 @@ class InverseOpTest(test.TestCase):
 
   def testNotInvertible(self):
     # The input should be invertible.
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError("Input is not invertible."):
         # All rows of the matrix below add to zero.
         tensor3 = constant_op.constant([[1., 0., -1.], [-1., 1., 0.],
diff --git a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
index 33288392c0..dd01ba11af 100644
--- a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
@@ -143,7 +143,7 @@ class MatrixTriangularSolveOpTest(test.TestCase):
   def testNonSquareMatrix(self):
     # A non-square matrix should cause an error.
     matrix = np.array([[1., 2., 3.], [3., 4., 5.]])
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         self._verifySolve(matrix, matrix)
       with self.assertRaises(ValueError):
@@ -154,7 +154,7 @@ class MatrixTriangularSolveOpTest(test.TestCase):
     # right-hand sides.
     matrix = np.array([[1., 0.], [0., 1.]])
     rhs = np.array([[1., 0.]])
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         self._verifySolve(matrix, rhs)
       with self.assertRaises(ValueError):
@@ -164,7 +164,7 @@ class MatrixTriangularSolveOpTest(test.TestCase):
     # The input should be invertible.
     # The matrix is singular because it has a zero on the diagonal.
     singular_matrix = np.array([[1., 0., -1.], [-1., 0., 1.], [0., -1., 1.]])
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError("Input matrix is not invertible."):
         self._verifySolve(singular_matrix, singular_matrix)
       with self.assertRaisesOpError("Input matrix is not invertible."):
diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py
index 55653489af..5dcdb9e420 100644
--- a/tensorflow/python/kernel_tests/metrics_test.py
+++ b/tensorflow/python/kernel_tests/metrics_test.py
@@ -192,7 +192,7 @@ class MeanTest(test.TestCase):
     self.assertListEqual(ops.get_collection(my_collection_name), [update_op])
 
   def testBasic(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
       _enqueue_vector(sess, values_queue, [0, 1])
@@ -209,7 +209,7 @@ class MeanTest(test.TestCase):
       self.assertAlmostEqual(1.65, sess.run(mean), 5)
 
   def testUpdateOpsReturnsCurrentValue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
       _enqueue_vector(sess, values_queue, [0, 1])
@@ -253,7 +253,7 @@ class MeanTest(test.TestCase):
         metrics.mean(values, weights=np.ones((3, 2, 4, 1))),
         metrics.mean(values, weights=np.ones((3, 2, 4, 1, 1))),)
     expected = np.mean(values)
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       for mean_result in mean_results:
         mean, update_op = mean_result
@@ -266,7 +266,7 @@ class MeanTest(test.TestCase):
         np.sum(np.multiply(weights, np.ones_like(values)))
     )
     mean, update_op = metrics.mean(values, weights=weights)
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       self.assertAlmostEqual(expected, update_op.eval(), places=5)
       self.assertAlmostEqual(expected, mean.eval(), places=5)
@@ -330,7 +330,7 @@ class MeanTest(test.TestCase):
 
       # Dynamic shapes.
       with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg):
-        with self.test_session():
+        with self.cached_session():
           _, update_op = metrics.mean(values_placeholder, invalid_weight)
           variables.local_variables_initializer().run()
           update_op.eval(feed_dict={values_placeholder: values})
@@ -359,7 +359,7 @@ class MeanTensorTest(test.TestCase):
     self.assertListEqual(ops.get_collection(my_collection_name), [update_op])
 
   def testBasic(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
       _enqueue_vector(sess, values_queue, [0, 1])
@@ -376,7 +376,7 @@ class MeanTensorTest(test.TestCase):
       self.assertAllClose([[-0.9 / 4., 3.525]], sess.run(mean))
 
   def testMultiDimensional(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values_queue = data_flow_ops.FIFOQueue(
           2, dtypes=dtypes_lib.float32, shapes=(2, 2, 2))
       _enqueue_vector(
@@ -397,7 +397,7 @@ class MeanTensorTest(test.TestCase):
       self.assertAllClose([[[1, 2], [1, 2]], [[2, 3], [5, 6]]], sess.run(mean))
 
   def testUpdateOpsReturnsCurrentValue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
       _enqueue_vector(sess, values_queue, [0, 1])
@@ -418,7 +418,7 @@ class MeanTensorTest(test.TestCase):
       self.assertAllClose([[-0.9 / 4., 3.525]], sess.run(mean), 5)
 
   def testBinaryWeighted1d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the values.
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
@@ -445,7 +445,7 @@ class MeanTensorTest(test.TestCase):
       self.assertAllClose([[3.25, 0.5]], sess.run(mean), 5)
 
   def testWeighted1d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the values.
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
@@ -472,7 +472,7 @@ class MeanTensorTest(test.TestCase):
       self.assertAllClose([[0.8, 3.52]], sess.run(mean), 5)
 
   def testWeighted2d_1(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the values.
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
@@ -499,7 +499,7 @@ class MeanTensorTest(test.TestCase):
       self.assertAllClose([[-2.1, 0.5]], sess.run(mean), 5)
 
   def testWeighted2d_2(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the values.
       values_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 2))
@@ -575,7 +575,7 @@ class AccuracyTest(test.TestCase):
         (10, 3), maxval=3, dtype=dtypes_lib.int64, seed=1)
     accuracy, update_op = metrics.accuracy(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -588,7 +588,7 @@ class AccuracyTest(test.TestCase):
         self.assertEqual(initial_accuracy, accuracy.eval())
 
   def testMultipleUpdates(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 1))
@@ -618,7 +618,7 @@ class AccuracyTest(test.TestCase):
   def testEffectivelyEquivalentSizes(self):
     predictions = array_ops.ones((40, 1))
     labels = array_ops.ones((40,))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       accuracy, update_op = metrics.accuracy(labels, predictions)
 
       sess.run(variables.local_variables_initializer())
@@ -628,7 +628,7 @@ class AccuracyTest(test.TestCase):
   def testEffectivelyEquivalentSizesWithScalarWeight(self):
     predictions = array_ops.ones((40, 1))
     labels = array_ops.ones((40,))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       accuracy, update_op = metrics.accuracy(labels, predictions, weights=2.0)
 
       sess.run(variables.local_variables_initializer())
@@ -642,7 +642,7 @@ class AccuracyTest(test.TestCase):
     weights = array_ops.expand_dims(ops.convert_to_tensor([100, 1, 1]),
                                     1)  # shape 3, 1
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       accuracy, update_op = metrics.accuracy(labels, predictions, weights)
 
       sess.run(variables.local_variables_initializer())
@@ -662,7 +662,7 @@ class AccuracyTest(test.TestCase):
         dtype=dtypes_lib.int32, name='weights')
     feed_dict = {weights_placeholder: weights}
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       accuracy, update_op = metrics.accuracy(labels, predictions,
                                              weights_placeholder)
 
@@ -674,7 +674,7 @@ class AccuracyTest(test.TestCase):
       self.assertGreater(accuracy.eval(feed_dict=feed_dict), .95)
 
   def testMultipleUpdatesWithWeightedValues(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           4, dtypes=dtypes_lib.float32, shapes=(1, 1))
@@ -746,7 +746,7 @@ class PrecisionTest(test.TestCase):
         (10, 3), maxval=1, dtype=dtypes_lib.int64, seed=1)
     precision, update_op = metrics.precision(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -765,7 +765,7 @@ class PrecisionTest(test.TestCase):
     labels = constant_op.constant(inputs)
     precision, update_op = metrics.precision(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAlmostEqual(1, sess.run(update_op))
       self.assertAlmostEqual(1, precision.eval())
@@ -778,7 +778,7 @@ class PrecisionTest(test.TestCase):
           constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype)
       precision, update_op = metrics.precision(labels, predictions)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.local_variables_initializer())
         self.assertAlmostEqual(0.5, update_op.eval())
         self.assertAlmostEqual(0.5, precision.eval())
@@ -789,7 +789,7 @@ class PrecisionTest(test.TestCase):
     precision, update_op = metrics.precision(
         labels, predictions, weights=constant_op.constant([[2], [5]]))
 
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       weighted_tp = 2.0 + 5.0
       weighted_positives = (2.0 + 2.0) + (5.0 + 5.0)
@@ -806,7 +806,7 @@ class PrecisionTest(test.TestCase):
     }
     precision, update_op = metrics.precision(labels, predictions, weights=2)
 
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       weighted_tp = 2.0 + 2.0
       weighted_positives = (2.0 + 2.0) + (2.0 + 2.0)
@@ -826,7 +826,7 @@ class PrecisionTest(test.TestCase):
     precision, update_op = metrics.precision(
         labels, predictions, weights=constant_op.constant([[2], [5]]))
 
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       weighted_tp = 2.0 + 5.0
       weighted_positives = (2.0 + 2.0) + (5.0 + 5.0)
@@ -844,7 +844,7 @@ class PrecisionTest(test.TestCase):
         predictions,
         weights=constant_op.constant([[1, 2, 3, 4], [4, 3, 2, 1]]))
 
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       weighted_tp = 3.0 + 4.0
       weighted_positives = (1.0 + 3.0) + (4.0 + 2.0)
@@ -864,7 +864,7 @@ class PrecisionTest(test.TestCase):
         predictions,
         weights=constant_op.constant([[1, 2, 3, 4], [4, 3, 2, 1]]))
 
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       weighted_tp = 3.0 + 4.0
       weighted_positives = (1.0 + 3.0) + (4.0 + 2.0)
@@ -881,7 +881,7 @@ class PrecisionTest(test.TestCase):
     labels = constant_op.constant(1 - inputs)
     precision, update_op = metrics.precision(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       sess.run(update_op)
       self.assertAlmostEqual(0, precision.eval())
@@ -891,7 +891,7 @@ class PrecisionTest(test.TestCase):
     labels = constant_op.constant([0, 0, 0, 0])
     precision, update_op = metrics.precision(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       sess.run(update_op)
       self.assertEqual(0.0, precision.eval())
@@ -933,7 +933,7 @@ class RecallTest(test.TestCase):
         (10, 3), maxval=1, dtype=dtypes_lib.int64, seed=1)
     recall, update_op = metrics.recall(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -952,7 +952,7 @@ class RecallTest(test.TestCase):
     labels = constant_op.constant(np_inputs)
     recall, update_op = metrics.recall(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       sess.run(update_op)
       self.assertEqual(1, recall.eval())
@@ -965,7 +965,7 @@ class RecallTest(test.TestCase):
           constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype)
       recall, update_op = metrics.recall(labels, predictions)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.local_variables_initializer())
         self.assertAlmostEqual(0.5, update_op.eval())
         self.assertAlmostEqual(0.5, recall.eval())
@@ -976,7 +976,7 @@ class RecallTest(test.TestCase):
     weights = constant_op.constant([[2], [5]])
     recall, update_op = metrics.recall(labels, predictions, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       weighted_tp = 2.0 + 5.0
       weighted_t = (2.0 + 2.0) + (5.0 + 5.0)
@@ -990,7 +990,7 @@ class RecallTest(test.TestCase):
     weights = constant_op.constant([[1, 2, 3, 4], [4, 3, 2, 1]])
     recall, update_op = metrics.recall(labels, predictions, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       weighted_tp = 3.0 + 1.0
       weighted_t = (2.0 + 3.0) + (4.0 + 1.0)
@@ -1005,7 +1005,7 @@ class RecallTest(test.TestCase):
     labels = constant_op.constant(1 - np_inputs)
     recall, update_op = metrics.recall(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       sess.run(update_op)
       self.assertEqual(0, recall.eval())
@@ -1015,7 +1015,7 @@ class RecallTest(test.TestCase):
     labels = array_ops.zeros((1, 4))
     recall, update_op = metrics.recall(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       sess.run(update_op)
       self.assertEqual(0, recall.eval())
@@ -1055,7 +1055,7 @@ class AUCTest(test.TestCase):
         (10, 3), maxval=1, dtype=dtypes_lib.int64, seed=1)
     auc, update_op = metrics.auc(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -1073,7 +1073,7 @@ class AUCTest(test.TestCase):
   def allCorrectAsExpected(self, curve):
     inputs = np.random.randint(0, 2, size=(100, 1))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
       labels = constant_op.constant(inputs)
       auc, update_op = metrics.auc(labels, predictions, curve=curve)
@@ -1084,7 +1084,7 @@ class AUCTest(test.TestCase):
       self.assertEqual(1, auc.eval())
 
   def testSomeCorrect_multipleLabelDtypes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for label_dtype in (
           dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
         predictions = constant_op.constant(
@@ -1099,7 +1099,7 @@ class AUCTest(test.TestCase):
         self.assertAlmostEqual(0.5, auc.eval())
 
   def testWeighted1d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32)
       labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4))
@@ -1112,7 +1112,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(0.5, auc.eval(), 5)
 
   def testWeighted2d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32)
       labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4))
@@ -1127,7 +1127,7 @@ class AUCTest(test.TestCase):
   # Regarding the AUC-PR tests: note that the preferred method when
   # calculating AUC-PR is summation_method='careful_interpolation'.
   def testCorrectAUCPRSpecialCase(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [0.1, 0.4, 0.35, 0.8], shape=(1, 4), dtype=dtypes_lib.float32)
       labels = constant_op.constant([0, 0, 1, 1], shape=(1, 4))
@@ -1141,7 +1141,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(expected, auc.eval(), delta=1e-3)
 
   def testCorrectAnotherAUCPRSpecialCase(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81],
           shape=(1, 7),
@@ -1157,7 +1157,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(expected, auc.eval(), delta=1e-3)
 
   def testThirdCorrectAUCPRSpecialCase(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5],
           shape=(1, 7),
@@ -1173,7 +1173,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(expected, auc.eval(), delta=1e-3)
 
   def testIncorrectAUCPRSpecialCase(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [0.1, 0.4, 0.35, 0.8], shape=(1, 4), dtype=dtypes_lib.float32)
       labels = constant_op.constant([0, 0, 1, 1], shape=(1, 4))
@@ -1186,7 +1186,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3)
 
   def testAnotherIncorrectAUCPRSpecialCase(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81],
           shape=(1, 7),
@@ -1201,7 +1201,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3)
 
   def testThirdIncorrectAUCPRSpecialCase(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5],
           shape=(1, 7),
@@ -1218,7 +1218,7 @@ class AUCTest(test.TestCase):
   def testAllIncorrect(self):
     inputs = np.random.randint(0, 2, size=(100, 1))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
       labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32)
       auc, update_op = metrics.auc(labels, predictions)
@@ -1229,7 +1229,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(0, auc.eval())
 
   def testZeroTruePositivesAndFalseNegativesGivesOneAUC(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = array_ops.zeros([4], dtype=dtypes_lib.float32)
       labels = array_ops.zeros([4])
       auc, update_op = metrics.auc(labels, predictions)
@@ -1240,7 +1240,7 @@ class AUCTest(test.TestCase):
       self.assertAlmostEqual(1, auc.eval(), 6)
 
   def testRecallOneAndPrecisionOneGivesOnePRAUC(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = array_ops.ones([4], dtype=dtypes_lib.float32)
       labels = array_ops.ones([4])
       auc, update_op = metrics.auc(labels, predictions, curve='PR')
@@ -1301,7 +1301,7 @@ class AUCTest(test.TestCase):
         scale=1.0, size=num_samples)):
       expected_auc = self.np_auc(predictions, labels, weights)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         enqueue_ops = [[] for i in range(num_batches)]
         tf_predictions = _enqueue_as_batches(predictions, enqueue_ops)
         tf_labels = _enqueue_as_batches(labels, enqueue_ops)
@@ -1370,7 +1370,7 @@ class SpecificityAtSensitivityTest(test.TestCase):
     specificity, update_op = metrics.specificity_at_sensitivity(
         labels, predictions, sensitivity=0.7)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -1390,7 +1390,7 @@ class SpecificityAtSensitivityTest(test.TestCase):
     specificity, update_op = metrics.specificity_at_sensitivity(
         labels, predictions, sensitivity=0.7)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(1, sess.run(update_op))
       self.assertEqual(1, specificity.eval())
@@ -1405,7 +1405,7 @@ class SpecificityAtSensitivityTest(test.TestCase):
     specificity, update_op = metrics.specificity_at_sensitivity(
         labels, predictions, sensitivity=0.8)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAlmostEqual(1.0, sess.run(update_op))
       self.assertAlmostEqual(1.0, specificity.eval())
@@ -1420,7 +1420,7 @@ class SpecificityAtSensitivityTest(test.TestCase):
     specificity, update_op = metrics.specificity_at_sensitivity(
         labels, predictions, sensitivity=0.4)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       self.assertAlmostEqual(0.6, sess.run(update_op))
@@ -1439,7 +1439,7 @@ class SpecificityAtSensitivityTest(test.TestCase):
       specificity, update_op = metrics.specificity_at_sensitivity(
           labels, predictions, weights=weights, sensitivity=0.4)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.local_variables_initializer())
 
         self.assertAlmostEqual(0.6, sess.run(update_op))
@@ -1457,7 +1457,7 @@ class SpecificityAtSensitivityTest(test.TestCase):
     specificity, update_op = metrics.specificity_at_sensitivity(
         labels, predictions, weights=weights, sensitivity=0.4)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       self.assertAlmostEqual(8.0 / 15.0, sess.run(update_op))
@@ -1507,7 +1507,7 @@ class SensitivityAtSpecificityTest(test.TestCase):
     sensitivity, update_op = metrics.sensitivity_at_specificity(
         labels, predictions, specificity=0.7)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -1527,7 +1527,7 @@ class SensitivityAtSpecificityTest(test.TestCase):
     specificity, update_op = metrics.sensitivity_at_specificity(
         labels, predictions, specificity=0.7)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(1, sess.run(update_op))
       self.assertEqual(1, specificity.eval())
@@ -1542,7 +1542,7 @@ class SensitivityAtSpecificityTest(test.TestCase):
     specificity, update_op = metrics.sensitivity_at_specificity(
         labels, predictions, specificity=0.8)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAlmostEqual(0.8, sess.run(update_op))
       self.assertAlmostEqual(0.8, specificity.eval())
@@ -1557,7 +1557,7 @@ class SensitivityAtSpecificityTest(test.TestCase):
     specificity, update_op = metrics.sensitivity_at_specificity(
         labels, predictions, specificity=0.4)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAlmostEqual(0.6, sess.run(update_op))
       self.assertAlmostEqual(0.6, specificity.eval())
@@ -1576,7 +1576,7 @@ class SensitivityAtSpecificityTest(test.TestCase):
       specificity, update_op = metrics.sensitivity_at_specificity(
           labels, predictions, weights=weights, specificity=0.4)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.local_variables_initializer())
         self.assertAlmostEqual(0.675, sess.run(update_op))
         self.assertAlmostEqual(0.675, specificity.eval())
@@ -1638,7 +1638,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
                                                     thresholds)
     rec, rec_op = metrics.recall_at_thresholds(labels, predictions, thresholds)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates, then verify idempotency.
@@ -1654,7 +1654,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
   def testAllCorrect(self):
     inputs = np.random.randint(0, 2, size=(100, 1))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
       labels = constant_op.constant(inputs)
       thresholds = [0.5]
@@ -1670,7 +1670,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       self.assertEqual(1, rec.eval())
 
   def testSomeCorrect_multipleLabelDtypes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for label_dtype in (
           dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
         predictions = constant_op.constant(
@@ -1692,7 +1692,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
   def testAllIncorrect(self):
     inputs = np.random.randint(0, 2, size=(100, 1))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
       labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32)
       thresholds = [0.5]
@@ -1708,7 +1708,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       self.assertAlmostEqual(0, rec.eval())
 
   def testWeights1d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes_lib.float32)
       labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2))
@@ -1738,7 +1738,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       self.assertAlmostEqual(0.0, rec_high.eval(), places=5)
 
   def testWeights2d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes_lib.float32)
       labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2))
@@ -1768,7 +1768,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       self.assertAlmostEqual(0.0, rec_high.eval(), places=5)
 
   def testExtremeThresholds(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32)
       labels = constant_op.constant([0, 1, 1, 1], shape=(1, 4))
@@ -1792,7 +1792,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       self.assertAlmostEqual(0.0, rec_high.eval())
 
   def testZeroLabelsPredictions(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = array_ops.zeros([4], dtype=dtypes_lib.float32)
       labels = array_ops.zeros([4])
       thresholds = [0.5]
@@ -1842,7 +1842,7 @@ class PrecisionRecallThresholdsTest(test.TestCase):
     labels = labels.astype(np.float32)
     predictions = predictions.astype(np.float32)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Reshape the data so its easy to queue up:
       predictions_batches = predictions.reshape((batch_size, num_batches))
       labels_batches = labels.reshape((batch_size, num_batches))
@@ -2801,7 +2801,7 @@ class MeanAbsoluteErrorTest(test.TestCase):
     labels = random_ops.random_normal((10, 3), seed=2)
     error, update_op = metrics.mean_absolute_error(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -2822,7 +2822,7 @@ class MeanAbsoluteErrorTest(test.TestCase):
 
     error, update_op = metrics.mean_absolute_error(labels, predictions, weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(3, sess.run(update_op))
       self.assertEqual(3, error.eval())
@@ -2866,7 +2866,7 @@ class MeanRelativeErrorTest(test.TestCase):
     error, update_op = metrics.mean_relative_error(labels, predictions,
                                                    normalizer)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -2891,7 +2891,7 @@ class MeanRelativeErrorTest(test.TestCase):
     error, update_op = metrics.mean_relative_error(
         labels, predictions, normalizer=labels)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(expected_error, sess.run(update_op))
       self.assertEqual(expected_error, error.eval())
@@ -2907,7 +2907,7 @@ class MeanRelativeErrorTest(test.TestCase):
     error, update_op = metrics.mean_relative_error(
         labels, predictions, normalizer=array_ops.zeros_like(labels))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(0.0, sess.run(update_op))
       self.assertEqual(0.0, error.eval())
@@ -2945,7 +2945,7 @@ class MeanSquaredErrorTest(test.TestCase):
     labels = random_ops.random_normal((10, 3), seed=2)
     error, update_op = metrics.mean_squared_error(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -2963,7 +2963,7 @@ class MeanSquaredErrorTest(test.TestCase):
 
     error, update_op = metrics.mean_squared_error(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(0, sess.run(update_op))
       self.assertEqual(0, error.eval())
@@ -2976,7 +2976,7 @@ class MeanSquaredErrorTest(test.TestCase):
 
     error, update_op = metrics.mean_squared_error(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(6, sess.run(update_op))
       self.assertEqual(6, error.eval())
@@ -2990,13 +2990,13 @@ class MeanSquaredErrorTest(test.TestCase):
 
     error, update_op = metrics.mean_squared_error(labels, predictions, weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(13, sess.run(update_op))
       self.assertEqual(13, error.eval())
 
   def testMultipleBatchesOfSizeOne(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           2, dtypes=dtypes_lib.float32, shapes=(1, 3))
@@ -3020,7 +3020,7 @@ class MeanSquaredErrorTest(test.TestCase):
       self.assertAlmostEqual(208.0 / 6, error.eval(), 5)
 
   def testMetricsComputedConcurrently(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates one set of predictions.
       preds_queue0 = data_flow_ops.FIFOQueue(
           2, dtypes=dtypes_lib.float32, shapes=(1, 3))
@@ -3063,7 +3063,7 @@ class MeanSquaredErrorTest(test.TestCase):
       self.assertAlmostEqual(79.0 / 6, mse1, 5)
 
   def testMultipleMetricsOnMultipleBatchesOfSizeOne(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           2, dtypes=dtypes_lib.float32, shapes=(1, 3))
@@ -3122,7 +3122,7 @@ class RootMeanSquaredErrorTest(test.TestCase):
     labels = random_ops.random_normal((10, 3), seed=2)
     error, update_op = metrics.root_mean_squared_error(labels, predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -3135,7 +3135,7 @@ class RootMeanSquaredErrorTest(test.TestCase):
         self.assertEqual(initial_error, error.eval())
 
   def testSingleUpdateZeroError(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           0.0, shape=(1, 3), dtype=dtypes_lib.float32)
       labels = constant_op.constant(0.0, shape=(1, 3), dtype=dtypes_lib.float32)
@@ -3148,7 +3148,7 @@ class RootMeanSquaredErrorTest(test.TestCase):
       self.assertEqual(0, rmse.eval())
 
   def testSingleUpdateWithError(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [2, 4, 6], shape=(1, 3), dtype=dtypes_lib.float32)
       labels = constant_op.constant(
@@ -3161,7 +3161,7 @@ class RootMeanSquaredErrorTest(test.TestCase):
       self.assertAlmostEqual(math.sqrt(6), rmse.eval(), 5)
 
   def testSingleUpdateWithErrorAndWeights(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [2, 4, 6, 8], shape=(1, 4), dtype=dtypes_lib.float32)
       labels = constant_op.constant(
@@ -3220,7 +3220,7 @@ class MeanCosineDistanceTest(test.TestCase):
     labels = random_ops.random_normal((10, 3), seed=2)
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=1)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -3242,7 +3242,7 @@ class MeanCosineDistanceTest(test.TestCase):
 
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(0, sess.run(update_op))
       self.assertEqual(0, error.eval())
@@ -3258,7 +3258,7 @@ class MeanCosineDistanceTest(test.TestCase):
 
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAlmostEqual(1, sess.run(update_op), 5)
       self.assertAlmostEqual(1, error.eval(), 5)
@@ -3279,7 +3279,7 @@ class MeanCosineDistanceTest(test.TestCase):
         np_labels, shape=(3, 1, 3), dtype=dtypes_lib.float32)
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAlmostEqual(1.0, sess.run(update_op), 5)
       self.assertAlmostEqual(1.0, error.eval(), 5)
@@ -3298,7 +3298,7 @@ class MeanCosineDistanceTest(test.TestCase):
     error, update_op = metrics.mean_cosine_distance(
         labels, predictions, dim=2, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(0, sess.run(update_op))
       self.assertEqual(0, error.eval())
@@ -3317,7 +3317,7 @@ class MeanCosineDistanceTest(test.TestCase):
     error, update_op = metrics.mean_cosine_distance(
         labels, predictions, dim=2, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertEqual(1.5, update_op.eval())
       self.assertEqual(1.5, error.eval())
@@ -3352,7 +3352,7 @@ class PcntBelowThreshTest(test.TestCase):
     self.assertListEqual(ops.get_collection(my_collection_name), [update_op])
 
   def testOneUpdate(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values = constant_op.constant(
           [2, 4, 6, 8], shape=(1, 4), dtype=dtypes_lib.float32)
 
@@ -3369,7 +3369,7 @@ class PcntBelowThreshTest(test.TestCase):
       self.assertAlmostEqual(0.0, pcnt2, 5)
 
   def testSomePresentOneUpdate(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values = constant_op.constant(
           [2, 4, 6, 8], shape=(1, 4), dtype=dtypes_lib.float32)
       weights = constant_op.constant(
@@ -3445,7 +3445,7 @@ class MeanIOUTest(test.TestCase):
     mean_iou, update_op = metrics.mean_iou(
         labels, predictions, num_classes=num_classes)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -3459,7 +3459,7 @@ class MeanIOUTest(test.TestCase):
 
   def testMultipleUpdates(self):
     num_classes = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           5, dtypes=dtypes_lib.int32, shapes=(1, 1))
@@ -3490,7 +3490,7 @@ class MeanIOUTest(test.TestCase):
 
   def testMultipleUpdatesWithWeights(self):
     num_classes = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           6, dtypes=dtypes_lib.int32, shapes=(1, 1))
@@ -3538,7 +3538,7 @@ class MeanIOUTest(test.TestCase):
     # one class, and thus there is one row and one column with
     # zero entries in the confusion matrix.
     num_classes = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       # There is no prediction for class 2.
       preds_queue = data_flow_ops.FIFOQueue(
@@ -3585,7 +3585,7 @@ class MeanIOUTest(test.TestCase):
         ],
         0)
     num_classes = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
       confusion_matrix = update_op.eval()
@@ -3597,7 +3597,7 @@ class MeanIOUTest(test.TestCase):
     predictions = array_ops.zeros([40])
     labels = array_ops.zeros([40])
     num_classes = 1
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
       self.assertEqual(40, update_op.eval()[0])
@@ -3607,7 +3607,7 @@ class MeanIOUTest(test.TestCase):
     predictions = array_ops.zeros([40])
     labels = array_ops.ones([40])
     num_classes = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual([[0, 0], [40, 0]], update_op.eval())
@@ -3637,7 +3637,7 @@ class MeanIOUTest(test.TestCase):
                         0, shape=[1])
         ],
         0)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(
           labels, predictions, num_classes, weights=weights)
       sess.run(variables.local_variables_initializer())
@@ -3657,7 +3657,7 @@ class MeanIOUTest(test.TestCase):
         [[0, 0, 2, 1, 1, 1],
          [1, 1, 2, 0, 0, 0]]])
     num_classes = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual([[7, 4, 3], [3, 5, 2], [0, 0, 0]], update_op.eval())
@@ -3669,7 +3669,7 @@ class MeanIOUTest(test.TestCase):
     labels = constant_op.constant([0])
     predictions = constant_op.constant([0])
     num_classes = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual([[1, 0], [0, 0]], update_op.eval())
@@ -3687,7 +3687,7 @@ class MeanIOUTest(test.TestCase):
         [[0, 0, 0, 1, 1, 1],
          [1, 1, 1, 0, 0, 0]]])
     num_classes = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual([[9, 5, 0], [3, 7, 0], [0, 0, 0]], update_op.eval())
@@ -3751,7 +3751,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
     mean_accuracy, update_op = metrics.mean_per_class_accuracy(
         labels, predictions, num_classes=num_classes)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -3764,7 +3764,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
         self.assertEqual(initial_mean_accuracy, mean_accuracy.eval())
 
     num_classes = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           5, dtypes=dtypes_lib.int32, shapes=(1, 1))
@@ -3796,7 +3796,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
 
   def testMultipleUpdatesWithWeights(self):
     num_classes = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       preds_queue = data_flow_ops.FIFOQueue(
           6, dtypes=dtypes_lib.int32, shapes=(1, 1))
@@ -3844,7 +3844,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
     # one class, and thus there is one row and one column with
     # zero entries in the confusion matrix.
     num_classes = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create the queue that populates the predictions.
       # There is no prediction for class 2.
       preds_queue = data_flow_ops.FIFOQueue(
@@ -3880,7 +3880,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
     predictions = array_ops.zeros([40])
     labels = array_ops.zeros([40])
     num_classes = 1
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
@@ -3891,7 +3891,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
     predictions = array_ops.zeros([40])
     labels = array_ops.ones([40])
     num_classes = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes)
       sess.run(variables.local_variables_initializer())
@@ -3910,7 +3910,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
         constant_op.constant(0, shape=[1]), constant_op.constant(1, shape=[8]),
         constant_op.constant(0, shape=[1])
     ], 0)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes, weights=weights)
       sess.run(variables.local_variables_initializer())
@@ -3944,7 +3944,7 @@ class FalseNegativesTest(test.TestCase):
     tn, tn_update_op = metrics.false_negatives(
         labels=labels, predictions=predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(3., tn_update_op.eval())
@@ -3963,7 +3963,7 @@ class FalseNegativesTest(test.TestCase):
     tn, tn_update_op = metrics.false_negatives(
         labels=labels, predictions=predictions, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(5., tn_update_op.eval())
@@ -3993,7 +3993,7 @@ class FalseNegativesAtThresholdsTest(test.TestCase):
     fn, fn_update_op = metrics.false_negatives_at_thresholds(
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), fn.eval())
       self.assertAllEqual((0, 2, 3), fn_update_op.eval())
@@ -4012,7 +4012,7 @@ class FalseNegativesAtThresholdsTest(test.TestCase):
         weights=((3.0,), (5.0,), (7.0,)),
         thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), fn.eval())
       self.assertAllEqual((0.0, 8.0, 11.0), fn_update_op.eval())
@@ -4043,7 +4043,7 @@ class FalsePositivesTest(test.TestCase):
     tn, tn_update_op = metrics.false_positives(
         labels=labels, predictions=predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(7., tn_update_op.eval())
@@ -4062,7 +4062,7 @@ class FalsePositivesTest(test.TestCase):
     tn, tn_update_op = metrics.false_positives(
         labels=labels, predictions=predictions, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(14., tn_update_op.eval())
@@ -4092,7 +4092,7 @@ class FalsePositivesAtThresholdsTest(test.TestCase):
     fp, fp_update_op = metrics.false_positives_at_thresholds(
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), fp.eval())
       self.assertAllEqual((7, 4, 2), fp_update_op.eval())
@@ -4113,7 +4113,7 @@ class FalsePositivesAtThresholdsTest(test.TestCase):
                  (19.0, 23.0, 29.0, 31.0)),
         thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), fp.eval())
       self.assertAllEqual((125.0, 42.0, 12.0), fp_update_op.eval())
@@ -4144,7 +4144,7 @@ class TrueNegativesTest(test.TestCase):
     tn, tn_update_op = metrics.true_negatives(
         labels=labels, predictions=predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(3., tn_update_op.eval())
@@ -4163,7 +4163,7 @@ class TrueNegativesTest(test.TestCase):
     tn, tn_update_op = metrics.true_negatives(
         labels=labels, predictions=predictions, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(4., tn_update_op.eval())
@@ -4193,7 +4193,7 @@ class TrueNegativesAtThresholdsTest(test.TestCase):
     tn, tn_update_op = metrics.true_negatives_at_thresholds(
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), tn.eval())
       self.assertAllEqual((2, 5, 7), tn_update_op.eval())
@@ -4212,7 +4212,7 @@ class TrueNegativesAtThresholdsTest(test.TestCase):
         weights=((0.0, 2.0, 3.0, 5.0),),
         thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), tn.eval())
       self.assertAllEqual((5.0, 15.0, 23.0), tn_update_op.eval())
@@ -4243,7 +4243,7 @@ class TruePositivesTest(test.TestCase):
     tn, tn_update_op = metrics.true_positives(
         labels=labels, predictions=predictions)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(7., tn_update_op.eval())
@@ -4262,7 +4262,7 @@ class TruePositivesTest(test.TestCase):
     tn, tn_update_op = metrics.true_positives(
         labels=labels, predictions=predictions, weights=weights)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(12., tn_update_op.eval())
@@ -4292,7 +4292,7 @@ class TruePositivesAtThresholdsTest(test.TestCase):
     tp, tp_update_op = metrics.true_positives_at_thresholds(
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), tp.eval())
       self.assertAllEqual((3, 1, 0), tp_update_op.eval())
@@ -4309,7 +4309,7 @@ class TruePositivesAtThresholdsTest(test.TestCase):
         predictions=predictions, labels=labels, weights=37.0,
         thresholds=[0.15, 0.5, 0.85])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), tp.eval())
       self.assertAllEqual((111.0, 37.0, 0.0), tp_update_op.eval())
diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py
index 944de217a1..e415d7879e 100644
--- a/tensorflow/python/kernel_tests/pad_op_test.py
+++ b/tensorflow/python/kernel_tests/pad_op_test.py
@@ -188,7 +188,7 @@ class PadOpTest(test.TestCase):
                       mode="SYMMETRIC").eval()
 
   def testInvalid(self):
-    with self.test_session():
+    with self.cached_session():
       x = [[1, 2, 3], [4, 5, 6]]
       with self.assertRaisesRegexp(ValueError, "Unknown padding mode"):
         array_ops.pad(x, [[1, 0], [2, 1]], mode="weird").eval()
diff --git a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
index d8c3f9823c..95f3dcceea 100644
--- a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
@@ -95,13 +95,13 @@ class PaddingFIFOQueueTest(test.TestCase):
       """, q.queue_ref.op.node_def)
 
   def testEnqueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       enqueue_op = q.enqueue((10.0,))
       enqueue_op.run()
 
   def testEnqueueWithShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(
           10, dtypes_lib.float32, shapes=((3, 2),))
       enqueue_correct_op = q.enqueue(([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],))
@@ -111,14 +111,14 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(1, q.size().eval())
 
   def testEnqueueManyWithShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(
           10, [dtypes_lib.int32, dtypes_lib.int32], shapes=[(), (2,)])
       q.enqueue_many([[1, 2, 3, 4], [[1, 1], [2, 2], [3, 3], [4, 4]]]).run()
       self.assertEqual(4, q.size().eval())
 
   def testParallelEnqueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -144,7 +144,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, results)
 
   def testParallelDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -168,7 +168,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, results)
 
   def testDequeue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -182,7 +182,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertEqual([elems[i]], vals)
 
   def testEnqueueAndBlockingDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(3, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0]
       enqueue_ops = [q.enqueue((x,)) for x in elems]
@@ -212,7 +212,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertEqual([elem], result)
 
   def testMultiEnqueueAndDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10,
                                          (dtypes_lib.int32, dtypes_lib.float32),
                                          ((), ()))
@@ -230,12 +230,12 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertEqual([y], y_val)
 
   def testQueueSizeEmpty(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       self.assertEqual([0], q.size().eval())
 
   def testQueueSizeAfterEnqueueAndDequeue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       enqueue_op = q.enqueue((10.0,))
       dequeued_t = q.dequeue()
@@ -248,7 +248,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(0, size.eval())
 
   def testEnqueueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -261,7 +261,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertEqual([elems[i % 4]], vals)
 
   def testEmptyEnqueueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, (
           (None, None),))
       empty_t = constant_op.constant(
@@ -274,7 +274,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual([0], size_t.eval())
 
   def testEmptyDequeueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, shapes=((),))
       enqueue_op = q.enqueue((10.0,))
       dequeued_t = q.dequeue_many(0)
@@ -284,7 +284,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual([], dequeued_t.eval().tolist())
 
   def testEmptyDequeueManyWithDynamicShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(
           10, dtypes_lib.float32, shapes=((None,),))
       enqueue_op = q.enqueue(([10.0],))
@@ -295,7 +295,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual([], dequeued_t.eval().tolist())
 
   def testEmptyDequeueUpToWithDynamicShape(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(
           10, dtypes_lib.float32, shapes=((None,),))
       enqueue_op = q.enqueue(([10.0],))
@@ -306,7 +306,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual([], dequeued_t.eval().tolist())
 
   def testConstructPaddingFIFOQueueWithNoShape(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError,
           r"When providing partial shapes, a list of shapes must be provided."):
@@ -314,7 +314,7 @@ class PaddingFIFOQueueTest(test.TestCase):
                                        None).queue_ref.eval()
 
   def testMultiEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10,
                                          (dtypes_lib.float32, dtypes_lib.int32),
                                          ((), (2,)))
@@ -332,7 +332,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertAllEqual(int_elems[i % 4], int_val)
 
   def testMultiEnqueueManyWithPartiallyKnownShapes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(
           10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (None,)))
       float_elems = [10.0, 20.0, 30.0, 40.0]
@@ -349,7 +349,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertAllEqual(int_elems[i % 4], int_val)
 
   def testDequeueMany(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -361,7 +361,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems[4:8], dequeued_t.eval())
 
   def testDequeueUpToNoBlocking(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -373,7 +373,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems[4:8], dequeued_t.eval())
 
   def testMultiDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(
           10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (2,)))
       float_elems = [
@@ -404,7 +404,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape())
 
   def testMultiDequeueManyWithPartiallyKnownShapes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(
           10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (None,)))
       float_elems = [
@@ -443,7 +443,7 @@ class PaddingFIFOQueueTest(test.TestCase):
               dequeued_single_t[1].get_shape()))
 
   def testMultiDequeueManyWithPartiallyKnownShapesAndVariableSizeInput(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(
           10, (dtypes_lib.string, dtypes_lib.int32),
           shapes=((None,), (1, None)))
@@ -484,7 +484,7 @@ class PaddingFIFOQueueTest(test.TestCase):
               dequeued_single_t[1].get_shape()))
 
   def testMultiDequeueUpToPartiallyKnownShapesAndVariableInputNoBlocking(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(
           10, (dtypes_lib.string, dtypes_lib.int32),
           shapes=((None,), (1, None)))
@@ -525,7 +525,7 @@ class PaddingFIFOQueueTest(test.TestCase):
               dequeued_single_t[1].get_shape()))
 
   def testHighDimension(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, ((4, 4, 4, 4),))
       elems = np.array([[[[[x] * 4] * 4] * 4] * 4 for x in range(10)], np.int32)
       enqueue_op = q.enqueue_many((elems,))
@@ -535,7 +535,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertAllEqual(dequeued_t.eval(), elems)
 
   def testPartiallyKnownHighDimension(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, (
           (4, None, 4, None),))
       elems = np.array([[[[[x] * 4] * 4] * 4] * 4 for x in range(10)], np.int32)
@@ -592,7 +592,7 @@ class PaddingFIFOQueueTest(test.TestCase):
                       array_ops.placeholder(dtypes_lib.int32)))
 
   def testEnqueueWrongPartiallyKnownShapeAtRuntime(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # First dimension of second component is unknown, second
       # dimension must be 3.
       q = data_flow_ops.PaddingFIFOQueue(10,
@@ -607,7 +607,7 @@ class PaddingFIFOQueueTest(test.TestCase):
                  feed_dict={elems_bad: np.array([1] * 12).reshape((3, 4))})
 
   def testEnqueueDequeueManyWrongPartiallyKnownShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # First dimension of second component is unknown, second
       # dimension must be 3.
       q = data_flow_ops.PaddingFIFOQueue(10,
@@ -625,7 +625,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         dequeued_t.eval()
 
   def testParallelEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(1000, dtypes_lib.float32, shapes=((),))
       elems = [10.0 * x for x in range(100)]
       enqueue_op = q.enqueue_many((elems,))
@@ -644,7 +644,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertItemsEqual(dequeued_t.eval(), elems * 10)
 
   def testParallelDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(1000, dtypes_lib.float32, shapes=((),))
       elems = [10.0 * x for x in range(1000)]
       enqueue_op = q.enqueue_many((elems,))
@@ -666,7 +666,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, dequeued_elems)
 
   def testParallelDequeueUpTo(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(1000, dtypes_lib.float32, shapes=((),))
       elems = [10.0 * x for x in range(1000)]
       enqueue_op = q.enqueue_many((elems,))
@@ -690,7 +690,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertItemsEqual(elems, dequeued_elems)
 
   def testParallelEnqueueAndDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(50, dtypes_lib.float32, shapes=((),))
       initial_elements = [10.0] * 49
       q.enqueue_many((initial_elements,)).run()
@@ -723,7 +723,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertTrue(elem in (10.0, 20.0))
 
   def testMixtureOfEnqueueAndEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, shapes=((),))
       enqueue_placeholder = array_ops.placeholder(dtypes_lib.int32, shape=())
       enqueue_op = q.enqueue((enqueue_placeholder,))
@@ -759,7 +759,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testMixtureOfDequeueAndDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, shapes=((),))
       enqueue_op = q.enqueue_many((np.arange(250, dtype=np.int32),))
       dequeued_t = q.dequeue()
@@ -793,7 +793,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testBlockingDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -820,7 +820,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems, dequeued_elems)
 
   def testBlockingDequeueUpTo(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -847,7 +847,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertAllEqual(elems, dequeued_elems)
 
   def testDequeueManyWithTensorParameter(self):
-    with self.test_session():
+    with self.cached_session():
       # Define a first queue that contains integer counts.
       dequeue_counts = [random.randint(1, 10) for _ in range(100)]
       count_q = data_flow_ops.PaddingFIFOQueue(100, dtypes_lib.int32, ((),))
@@ -872,7 +872,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(elems, dequeued_elems)
 
   def testDequeueFromClosedQueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -890,7 +890,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         dequeued_t.eval()
 
   def testBlockingDequeueFromClosedQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -916,7 +916,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testDequeueUpToFromClosedQueueReturnsRemainder(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -938,7 +938,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueFromClosedEmptyQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       close_op = q.close()
       dequeued_t = q.dequeue()
@@ -958,7 +958,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueManyFromClosedQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -983,7 +983,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueManyButNotAllFromClosedQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1008,7 +1008,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testEnqueueManyLargerThanCapacityWithConcurrentDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1045,7 +1045,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       close_thread.join()
 
   def testClosedBlockingDequeueManyRestoresPartialBatch(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(4, (dtypes_lib.float32,
                                              dtypes_lib.float32), ((), ()))
       elems_a = [1.0, 2.0, 3.0]
@@ -1078,7 +1078,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testBlockingDequeueManyFromClosedEmptyQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       close_op = q.close()
       dequeued_t = q.dequeue_many(4)
@@ -1098,7 +1098,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testBlockingDequeueUpToFromClosedEmptyQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       close_op = q.close()
       dequeued_t = q.dequeue_up_to(4)
@@ -1118,7 +1118,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeue_thread.join()
 
   def testEnqueueToClosedQueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       enqueue_op = q.enqueue((10.0,))
       close_op = q.close()
@@ -1131,7 +1131,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         enqueue_op.run()
 
   def testEnqueueManyToClosedQueue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1145,7 +1145,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         enqueue_op.run()
 
   def testBlockingEnqueueToFullQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1168,7 +1168,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       thread.join()
 
   def testBlockingEnqueueManyToFullQueue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1195,7 +1195,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       thread.join()
 
   def testBlockingEnqueueBeforeClose(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0, 40.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1232,7 +1232,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(0, q.size().eval())
 
   def testBlockingEnqueueManyBeforeClose(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),))
       elems = [10.0, 20.0, 30.0]
       enqueue_op = q.enqueue_many((elems,))
@@ -1265,7 +1265,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertEqual(elem, dequeued_t.eval())
 
   def testDoesNotLoseValue(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PaddingFIFOQueue(1, dtypes_lib.float32, ((),))
       enqueue_op = q.enqueue((10.0,))
       size_t = q.size()
@@ -1275,7 +1275,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertEqual(size_t.eval(), [1])
 
   def testSharedQueueSameSession(self):
-    with self.test_session():
+    with self.cached_session():
       q1 = data_flow_ops.PaddingFIFOQueue(
           1, dtypes_lib.float32, ((),), shared_name="shared_queue")
       q1.enqueue((10.0,)).run()
@@ -1305,7 +1305,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertEqual(q2_size_t.eval(), [0])
 
   def testIncompatibleSharedQueueErrors(self):
-    with self.test_session():
+    with self.cached_session():
       q_a_1 = data_flow_ops.PaddingFIFOQueue(
           10, dtypes_lib.float32, ((),), shared_name="q_a")
       q_a_2 = data_flow_ops.PaddingFIFOQueue(
@@ -1356,7 +1356,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         q_f_2.queue_ref.op.run()
 
   def testSelectQueue(self):
-    with self.test_session():
+    with self.cached_session():
       num_queues = 10
       qlist = list()
       for _ in xrange(num_queues):
@@ -1370,7 +1370,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         self.assertEqual(q.dequeue().eval(), 10.0)
 
   def testSelectQueueOutOfRange(self):
-    with self.test_session():
+    with self.cached_session():
       q1 = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),))
       q2 = data_flow_ops.PaddingFIFOQueue(15, dtypes_lib.float32, ((),))
       enq_q = data_flow_ops.PaddingFIFOQueue.from_list(3, [q1, q2])
@@ -1394,7 +1394,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       sess.run(enqueue_many_op)
 
   def testResetOfBlockingOperation(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q_empty = data_flow_ops.PaddingFIFOQueue(5, dtypes_lib.float32, ((),))
       dequeue_op = q_empty.dequeue()
       dequeue_many_op = q_empty.dequeue_many(1)
@@ -1422,7 +1422,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         t.join()
 
   def testBigEnqueueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(5, dtypes_lib.int32, ((),))
       elem = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
       enq = q.enqueue_many((elem,))
@@ -1467,7 +1467,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertAllEqual(elem, results)
 
   def testBigDequeueMany(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PaddingFIFOQueue(2, dtypes_lib.int32, ((),))
       elem = np.arange(4, dtype=np.int32)
       enq_list = [q.enqueue((e,)) for e in elem]
@@ -1493,7 +1493,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       self.assertAllEqual(elem, results)
 
   def testDtypes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       dtypes = [
           dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
           dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, dtypes_lib.int64,
diff --git a/tensorflow/python/kernel_tests/parse_single_example_op_test.py b/tensorflow/python/kernel_tests/parse_single_example_op_test.py
index bf4c89b368..a84895a287 100644
--- a/tensorflow/python/kernel_tests/parse_single_example_op_test.py
+++ b/tensorflow/python/kernel_tests/parse_single_example_op_test.py
@@ -89,7 +89,7 @@ def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
 class ParseExampleTest(test.TestCase):
 
   def _test(self, kwargs, expected_values=None, expected_err=None):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       if expected_err:
         with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                  expected_err[1]):
@@ -844,7 +844,7 @@ class ParseExampleTest(test.TestCase):
 class ParseSingleExampleTest(test.TestCase):
 
   def _test(self, kwargs, expected_values=None, expected_err=None):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       if expected_err:
         with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                  expected_err[1]):
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
index 7dff4501cc..71d8b60d3c 100644
--- a/tensorflow/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -89,7 +89,7 @@ def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
 class ParseExampleTest(test.TestCase):
 
   def _test(self, kwargs, expected_values=None, expected_err=None):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       if expected_err:
         with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                  expected_err[1]):
@@ -937,7 +937,7 @@ class ParseExampleTest(test.TestCase):
 class ParseSingleExampleTest(test.TestCase):
 
   def _test(self, kwargs, expected_values=None, expected_err=None):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       if expected_err:
         with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                  expected_err[1]):
@@ -1054,7 +1054,7 @@ class ParseSequenceExampleTest(test.TestCase):
     expected_feat_list_values = expected_feat_list_values or {}
     expected_length_values = expected_length_values or {}
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       if expected_err:
         with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                  expected_err[1]):
@@ -1606,7 +1606,7 @@ class ParseSequenceExampleTest(test.TestCase):
 class DecodeJSONExampleTest(test.TestCase):
 
   def _testRoundTrip(self, examples):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       examples = np.array(examples, dtype=np.object)
 
       json_tensor = constant_op.constant(
@@ -1696,7 +1696,7 @@ class DecodeJSONExampleTest(test.TestCase):
     ])
 
   def testInvalidSyntax(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       json_tensor = constant_op.constant(["{]"])
       binary_tensor = parsing_ops.decode_json_example(json_tensor)
       with self.assertRaisesOpError("Error while parsing JSON"):
@@ -1706,7 +1706,7 @@ class DecodeJSONExampleTest(test.TestCase):
 class ParseTensorOpTest(test.TestCase):
 
   def testToFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       expected = np.random.rand(3, 4, 5).astype(np.float32)
       tensor_proto = tensor_util.make_tensor_proto(expected)
 
@@ -1719,7 +1719,7 @@ class ParseTensorOpTest(test.TestCase):
       self.assertAllEqual(expected, result)
 
   def testToUint8(self):
-    with self.test_session():
+    with self.cached_session():
       expected = np.random.rand(3, 4, 5).astype(np.uint8)
       tensor_proto = tensor_util.make_tensor_proto(expected)
 
@@ -1732,7 +1732,7 @@ class ParseTensorOpTest(test.TestCase):
       self.assertAllEqual(expected, result)
 
   def testTypeMismatch(self):
-    with self.test_session():
+    with self.cached_session():
       expected = np.random.rand(3, 4, 5).astype(np.uint8)
       tensor_proto = tensor_util.make_tensor_proto(expected)
 
@@ -1745,7 +1745,7 @@ class ParseTensorOpTest(test.TestCase):
         tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()})
 
   def testInvalidInput(self):
-    with self.test_session():
+    with self.cached_session():
       serialized = array_ops.placeholder(dtypes.string)
       tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
 
diff --git a/tensorflow/python/kernel_tests/partitioned_variables_test.py b/tensorflow/python/kernel_tests/partitioned_variables_test.py
index 15d5702252..b34d30f5c0 100644
--- a/tensorflow/python/kernel_tests/partitioned_variables_test.py
+++ b/tensorflow/python/kernel_tests/partitioned_variables_test.py
@@ -39,7 +39,7 @@ from tensorflow.python.training import saver as saver_lib
 class PartitionerCreatorsTest(test.TestCase):
 
   def testFixedSizePartitioner(self):
-    with self.test_session():
+    with self.cached_session():
       partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0)
       with variable_scope.variable_scope("root", partitioner=partitioner):
         v0 = variable_scope.get_variable(
@@ -50,7 +50,7 @@ class PartitionerCreatorsTest(test.TestCase):
         self.assertAllEqual(v0_part, (5, 1))
 
   def testFixedSizePartitionerInt64(self):
-    with self.test_session():
+    with self.cached_session():
       partitioner = partitioned_variables.fixed_size_partitioner(4, axis=0)
       with variable_scope.variable_scope("root", partitioner=partitioner):
         v0 = variable_scope.get_variable("v0", dtype=dtypes.int64, shape=[20])
@@ -58,7 +58,7 @@ class PartitionerCreatorsTest(test.TestCase):
         self.assertEqual(len(v0_list), 4)
 
   def testResourceFixedSizePartitioner(self):
-    with self.test_session():
+    with self.cached_session():
       partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0)
       with variable_scope.variable_scope(
           "root", partitioner=partitioner, use_resource=True):
@@ -88,7 +88,7 @@ class PartitionerCreatorsTest(test.TestCase):
       self.assertAllEqual(v0_part, expected_partitions)
 
   def testVariableAxisSizePartitioner(self):
-    with self.test_session():
+    with self.cached_session():
       # Create a partitioned variable of shape (4, 8, 16, 32) type float32
       # Bytes per slice along the given axes:
 
@@ -210,7 +210,7 @@ class PartitionerCreatorsTest(test.TestCase):
       self.assertAllEqual(v0_part, expected_partitions)
 
   def testMinMaxVariablePartitioner(self):
-    with self.test_session():
+    with self.cached_session():
       # Partitioning a variable of shape=[2048] with a minimum of 2K per slice.
       self._testMinMaxVariablePartitioner(
           max_partitions=100,
@@ -323,7 +323,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self.assertEquals(expected_specs[i], slices[i]._save_slice_info.spec)
 
   def testVecConstantInit(self):
-    with self.test_session():
+    with self.cached_session():
       rnd_par = constant_op.constant([1, 2, 3, 4])
       vs = partitioned_variables.create_partitioned_variables([4], [4], rnd_par)
       variables.global_variables_initializer().run()
@@ -334,7 +334,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self._TestSaveSpec(vs, ["4 0,1", "4 1,1", "4 2,1", "4 3,1"])
 
   def testConstantInit(self):
-    with self.test_session():
+    with self.cached_session():
       rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]])
       vs = partitioned_variables.create_partitioned_variables([2, 4], [1, 2],
                                                               rnd_par)
@@ -346,7 +346,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self._TestSaveSpec(vs, ["2 4 0,2:0,2", "2 4 0,2:2,2"])
 
   def _testNameHelper(self, use_resource=False):
-    with self.test_session():
+    with self.cached_session():
       rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]])
       with variable_scope.variable_scope("hi", use_resource=use_resource):
         vs1 = partitioned_variables.create_partitioned_variables([2, 4], [1, 2],
@@ -363,7 +363,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self.assertEqual(var2_name + "/part_0:0", vs2[0].name)
       self.assertEqual(var2_name + "/part_1:0", vs2[1].name)
     # Test same variable.
-    with self.test_session():
+    with self.cached_session():
       rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]])
       with variable_scope.variable_scope(
           "hola", use_resource=use_resource) as vs:
@@ -383,7 +383,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self.assertEqual(var2_name + "/part_0:0", vs2[0].name)
       self.assertEqual(var2_name + "/part_1:0", vs2[1].name)
     # Test name_scope
-    with self.test_session():
+    with self.cached_session():
       rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]])
       with ops.name_scope("ola"):
         vs1 = partitioned_variables.create_partitioned_variables([2, 4], [1, 2],
@@ -408,7 +408,7 @@ class PartitionedVariablesTestCase(test.TestCase):
     self._testNameHelper(use_resource=True)
 
   def testRandomInitValue(self):
-    with self.test_session():
+    with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([200, 40]))
       vs = partitioned_variables.create_partitioned_variables(
           rnd.get_shape(), [1, 10], rnd.initialized_value())
@@ -425,7 +425,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       ])
 
   def testRandomInitUnevenPartitions(self):
-    with self.test_session():
+    with self.cached_session():
       rnd = variables.Variable(
           random_ops.random_uniform([20, 43], dtype=dtypes.float64))
       var_lists = [
@@ -463,7 +463,7 @@ class PartitionedVariablesTestCase(test.TestCase):
           self._TestSaveSpec(vs, save_specs[i])
 
   def testDegenerate(self):
-    with self.test_session():
+    with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([10, 43]))
       vs = partitioned_variables.create_partitioned_variables(
           rnd.get_shape(), [1, 1], rnd.initialized_value())
@@ -474,7 +474,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self._TestSaveSpec(vs, ["10 43 0,10:0,43"])
 
   def testSliceSizeOne(self):
-    with self.test_session():
+    with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([10, 43]))
       vs = partitioned_variables.create_partitioned_variables(
           rnd.get_shape(), [10, 1], rnd.initialized_value())
@@ -492,7 +492,7 @@ class PartitionedVariablesTestCase(test.TestCase):
     self.assertAllClose([0., 1., 2., 3.], _IotaInitializer([4]))
     self.assertAllClose([[0., 1.], [0., 10.], [0., 100.], [0., 1000.]],
                         _IotaInitializer([4, 2]))
-    with self.test_session():
+    with self.cached_session():
       vs = partitioned_variables.create_partitioned_variables([13, 5], [3, 1],
                                                               _IotaInitializer)
       variables.global_variables_initializer().run()
@@ -506,7 +506,7 @@ class PartitionedVariablesTestCase(test.TestCase):
   def testRandomInitializer(self):
     # Sanity check that the slices uses a different seed when using a random
     # initializer function.
-    with self.test_session():
+    with self.cached_session():
       var0, var1 = partitioned_variables.create_partitioned_variables(
           [20, 12], [1, 2], init_ops.random_uniform_initializer())
       variables.global_variables_initializer().run()
@@ -514,7 +514,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self.assertTrue(np.linalg.norm(val0 - val1) > 1e-6)
     # Negative test that proves that slices have the same values if
     # the random initializer uses a seed.
-    with self.test_session():
+    with self.cached_session():
       var0, var1 = partitioned_variables.create_partitioned_variables(
           [20, 12], [1, 2], init_ops.random_uniform_initializer(seed=201))
       variables.global_variables_initializer().run()
@@ -522,7 +522,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self.assertAllClose(val0, val1)
 
   def testSomeErrors(self):
-    with self.test_session():
+    with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([10, 43]))
       with self.assertRaises(ValueError):
         partitioned_variables.create_partitioned_variables(
@@ -547,7 +547,7 @@ class PartitionedVariablesTestCase(test.TestCase):
             [10, 43], [1, 50], rnd.initialized_value())
 
   def testControlDepsNone(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       c = constant_op.constant(1.0)
       with ops.control_dependencies([c]):
         # d get the control dependency.
@@ -573,7 +573,7 @@ class PartitionedVariablesTestCase(test.TestCase):
         self.assertEqual([], op.control_inputs)
 
   def testConcat(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       var_x = variable_scope.get_variable(
           "x",
           initializer=constant_op.constant([1., 2.]),
diff --git a/tensorflow/python/kernel_tests/priority_queue_test.py b/tensorflow/python/kernel_tests/priority_queue_test.py
index 3fb9c9c468..73a9c81638 100644
--- a/tensorflow/python/kernel_tests/priority_queue_test.py
+++ b/tensorflow/python/kernel_tests/priority_queue_test.py
@@ -36,7 +36,7 @@ from tensorflow.python.platform import test
 class PriorityQueueTest(test.TestCase):
 
   def testRoundTripInsertReadOnceSorts(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
           (), ()))
       elem = np.random.randint(-5, 5, size=100).astype(np.int64)
@@ -67,7 +67,7 @@ class PriorityQueueTest(test.TestCase):
       self.assertEqual(missed, set())
 
   def testRoundTripInsertMultiThreadedReadOnceSorts(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
           (), ()))
       elem = np.random.randint(-5, 5, size=100).astype(np.int64)
@@ -113,7 +113,7 @@ class PriorityQueueTest(test.TestCase):
       self.assertEqual(missed, set())
 
   def testRoundTripFillsCapacityMultiThreadedEnqueueAndDequeue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(10, (dtypes.int64), (()))
 
       num_threads = 40
@@ -163,7 +163,7 @@ class PriorityQueueTest(test.TestCase):
       self.assertAllEqual(sorted(dequeued), sorted(all_enqueued_values))
 
   def testRoundTripInsertManyMultiThreadedReadManyMultithreadedSorts(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (()))
 
       num_threads = 40
@@ -219,7 +219,7 @@ class PriorityQueueTest(test.TestCase):
       self.assertAllEqual(set(dequeued), set(all_enqueued_values))
 
   def testRoundTripInsertManyMultiThreadedReadOnceSorts(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
           (), ()))
       elem = np.random.randint(-5, 5, size=100).astype(np.int64)
@@ -268,7 +268,7 @@ class PriorityQueueTest(test.TestCase):
       self.assertEqual(missed, set())
 
   def testRoundTripInsertOnceReadOnceSorts(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
           (), ()))
       elem = np.random.randint(-100, 100, size=1000).astype(np.int64)
@@ -289,7 +289,7 @@ class PriorityQueueTest(test.TestCase):
         self.assertTrue((dv0, dv1) in allowed[e])
 
   def testRoundTripInsertOnceReadManySorts(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (()))
       elem = np.random.randint(-100, 100, size=1000).astype(np.int64)
       q.enqueue_many((elem, elem)).run()
@@ -297,7 +297,7 @@ class PriorityQueueTest(test.TestCase):
       self.assertAllEqual(deq_values, sorted(elem))
 
   def testRoundTripInsertOnceReadOnceLotsSorts(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (()))
       elem = np.random.randint(-100, 100, size=1000).astype(np.int64)
       q.enqueue_many((elem, elem)).run()
@@ -306,13 +306,13 @@ class PriorityQueueTest(test.TestCase):
       self.assertAllEqual(deq_values, sorted(elem))
 
   def testInsertingNonInt64Fails(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string), (()))
       with self.assertRaises(TypeError):
         q.enqueue_many((["a", "b", "c"], ["a", "b", "c"])).run()
 
   def testInsertingNonScalarFails(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       input_priority = array_ops.placeholder(dtypes.int64)
       input_other = array_ops.placeholder(dtypes.string)
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string,), (()))
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 8e06e1abfb..8c84b2a49f 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -146,7 +146,7 @@ class IdentityReaderTest(test.TestCase):
     self.assertAllEqual(expected, v)
 
   def testOneEpoch(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.IdentityReader("test_reader")
       work_completed = reader.num_work_units_completed()
       produced = reader.num_records_produced()
@@ -180,7 +180,7 @@ class IdentityReaderTest(test.TestCase):
       self.assertAllEqual(0, queued_length.eval())
 
   def testMultipleEpochs(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.IdentityReader("test_reader")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       enqueue = queue.enqueue_many([["DD", "EE"]])
@@ -201,7 +201,7 @@ class IdentityReaderTest(test.TestCase):
         sess.run([key, value])
 
   def testSerializeRestore(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.IdentityReader("test_reader")
       produced = reader.num_records_produced()
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
@@ -256,7 +256,7 @@ class IdentityReaderTest(test.TestCase):
         reader.restore_state(b"BOGUS" + state[5:]).run()
 
   def testReset(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.IdentityReader("test_reader")
       work_completed = reader.num_work_units_completed()
       produced = reader.num_records_produced()
@@ -307,7 +307,7 @@ class WholeFileReaderTest(test.TestCase):
     self.assertAllEqual(self._content[index], v)
 
   def testOneEpoch(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.WholeFileReader("test_reader")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       queue.enqueue_many([self._filenames]).run()
@@ -323,7 +323,7 @@ class WholeFileReaderTest(test.TestCase):
         sess.run([key, value])
 
   def testInfiniteEpochs(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.WholeFileReader("test_reader")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       enqueue = queue.enqueue_many([self._filenames])
@@ -366,7 +366,7 @@ class TextLineReaderTest(test.TestCase):
     return filenames
 
   def _testOneEpoch(self, files):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.TextLineReader(name="test_reader")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       key, value = reader.read(queue)
@@ -391,7 +391,7 @@ class TextLineReaderTest(test.TestCase):
 
   def testSkipHeaderLines(self):
     files = self._CreateFiles()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.TextLineReader(skip_header_lines=1, name="test_reader")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       key, value = reader.read(queue)
@@ -522,7 +522,7 @@ class FixedLengthRecordReaderTest(TFCompressionTestCase):
   # gap_bytes=hop_bytes-record_bytes
   def _TestOneEpoch(self, files, num_records, gap_bytes, encoding=None):
     hop_bytes = 0 if gap_bytes == 0 else self._record_bytes + gap_bytes
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.FixedLengthRecordReader(
           header_bytes=self._header_bytes,
           record_bytes=self._record_bytes,
@@ -549,7 +549,7 @@ class FixedLengthRecordReaderTest(TFCompressionTestCase):
                                 files,
                                 num_overlapped_records,
                                 encoding=None):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.FixedLengthRecordReader(
           header_bytes=self._header_bytes,
           record_bytes=self._record_bytes,
@@ -621,7 +621,7 @@ class TFRecordReaderTest(TFCompressionTestCase):
 
   def testOneEpoch(self):
     files = self._CreateFiles()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.TFRecordReader(name="test_reader")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       key, value = reader.read(queue)
@@ -640,7 +640,7 @@ class TFRecordReaderTest(TFCompressionTestCase):
 
   def testReadUpTo(self):
     files = self._CreateFiles()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.TFRecordReader(name="test_reader")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       batch_size = 3
@@ -670,7 +670,7 @@ class TFRecordReaderTest(TFCompressionTestCase):
     options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
     files = self._CreateFiles(options)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.TFRecordReader(name="test_reader", options=options)
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       key, value = reader.read(queue)
@@ -687,7 +687,7 @@ class TFRecordReaderTest(TFCompressionTestCase):
     options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP)
     files = self._CreateFiles(options)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.TFRecordReader(name="test_reader", options=options)
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       key, value = reader.read(queue)
@@ -752,7 +752,7 @@ class LMDBReaderTest(test.TestCase):
     shutil.copy(path, self.db_path)
 
   def testReadFromFile(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_file")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       key, value = reader.read(queue)
@@ -770,7 +770,7 @@ class LMDBReaderTest(test.TestCase):
         k, v = sess.run([key, value])
 
   def testReadFromSameFile(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader1 = io_ops.LMDBReader(name="test_read_from_same_file1")
       reader2 = io_ops.LMDBReader(name="test_read_from_same_file2")
       filename_queue = input_lib.string_input_producer(
@@ -789,7 +789,7 @@ class LMDBReaderTest(test.TestCase):
       coord.join(threads)
 
   def testReadFromFolder(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_folder")
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       key, value = reader.read(queue)
@@ -807,7 +807,7 @@ class LMDBReaderTest(test.TestCase):
         k, v = sess.run([key, value])
 
   def testReadFromFileRepeatedly(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_file_repeated")
       filename_queue = input_lib.string_input_producer(
           [self.db_path], num_epochs=None)
diff --git a/tensorflow/python/kernel_tests/record_input_test.py b/tensorflow/python/kernel_tests/record_input_test.py
index 068860d5d4..ebb9872f22 100644
--- a/tensorflow/python/kernel_tests/record_input_test.py
+++ b/tensorflow/python/kernel_tests/record_input_test.py
@@ -44,7 +44,7 @@ class RecordInputOpTest(test.TestCase):
     w.close()
 
   def testRecordInputSimple(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.generateTestData("basic", 1, 1)
 
       yield_op = data_flow_ops.RecordInput(
@@ -57,7 +57,7 @@ class RecordInputOpTest(test.TestCase):
       self.assertEqual(sess.run(yield_op), b"0000000000")
 
   def testRecordInputSimpleGzip(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.generateTestData(
           "basic",
           1,
@@ -76,7 +76,7 @@ class RecordInputOpTest(test.TestCase):
       self.assertEqual(sess.run(yield_op), b"0000000000")
 
   def testRecordInputSimpleZlib(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.generateTestData(
           "basic",
           1,
@@ -98,7 +98,7 @@ class RecordInputOpTest(test.TestCase):
     files = 100
     records_per_file = 100
     batches = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.generateTestData("basic", files, records_per_file)
 
       records = data_flow_ops.RecordInput(
@@ -126,7 +126,7 @@ class RecordInputOpTest(test.TestCase):
   def testDoesNotDeadlock(self):
     # Iterate multiple times to cause deadlock if there is a chance it can occur
     for _ in range(30):
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         self.generateTestData("basic", 1, 1)
 
         records = data_flow_ops.RecordInput(
@@ -141,7 +141,7 @@ class RecordInputOpTest(test.TestCase):
           sess.run(yield_op)
 
   def testEmptyGlob(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       record_input = data_flow_ops.RecordInput(file_pattern="foo")
       yield_op = record_input.get_yield_op()
       sess.run(variables.global_variables_initializer())
@@ -152,7 +152,7 @@ class RecordInputOpTest(test.TestCase):
     files = 10
     records_per_file = 10
     batches = 2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.generateTestData("basic", files, records_per_file)
 
       records = data_flow_ops.RecordInput(
diff --git a/tensorflow/python/kernel_tests/reduce_join_op_test.py b/tensorflow/python/kernel_tests/reduce_join_op_test.py
index 663561ced7..3bb4986313 100644
--- a/tensorflow/python/kernel_tests/reduce_join_op_test.py
+++ b/tensorflow/python/kernel_tests/reduce_join_op_test.py
@@ -113,7 +113,7 @@ class ReduceJoinTest(UnicodeTestCase):
       keep_dims: Whether or not to retain reduced dimensions.
       separator: The separator to use for joining.
     """
-    with self.test_session():
+    with self.cached_session():
       output = string_ops.reduce_join(
           inputs=input_array,
           axis=axis,
@@ -136,7 +136,7 @@ class ReduceJoinTest(UnicodeTestCase):
       axis: The indices to reduce.
       separator: The separator to use when joining.
     """
-    with self.test_session():
+    with self.cached_session():
       output = string_ops.reduce_join(
           inputs=input_array, axis=axis, keep_dims=False, separator=separator)
       output_keep_dims = string_ops.reduce_join(
@@ -234,7 +234,7 @@ class ReduceJoinTest(UnicodeTestCase):
     input_array = [["a"], ["b"]]
     truth = ["ab"]
     truth_shape = None
-    with self.test_session():
+    with self.cached_session():
       placeholder = array_ops.placeholder(dtypes.string, name="placeholder")
       reduced = string_ops.reduce_join(placeholder, axis=0)
       output_array = reduced.eval(feed_dict={placeholder.name: input_array})
@@ -247,7 +247,7 @@ class ReduceJoinTest(UnicodeTestCase):
     truth_dim_zero = ["thisplease", "isdo", "anot", "testpanic"]
     truth_dim_one = ["thisisatest", "pleasedonotpanic"]
     truth_shape = None
-    with self.test_session():
+    with self.cached_session():
       placeholder = array_ops.placeholder(dtypes.int32, name="placeholder")
       reduced = string_ops.reduce_join(input_array, axis=placeholder)
       output_array_dim_zero = reduced.eval(feed_dict={placeholder.name: [0]})
@@ -298,7 +298,7 @@ class ReduceJoinTest(UnicodeTestCase):
         self._testMultipleReduceJoin(input_array, axis=permutation)
 
   def testInvalidReductionIndices(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError, "Invalid reduction dim"):
         string_ops.reduce_join(inputs="", axis=0)
       with self.assertRaisesRegexp(ValueError,
@@ -313,7 +313,7 @@ class ReduceJoinTest(UnicodeTestCase):
         string_ops.reduce_join(inputs=[[""]], axis=[0, 2])
 
   def testZeroDims(self):
-    with self.test_session():
+    with self.cached_session():
       inputs = np.zeros([0, 1], dtype=str)
 
       # Reduction that drops the dim of size 0.
@@ -326,7 +326,7 @@ class ReduceJoinTest(UnicodeTestCase):
       self.assertAllEqual([0], output_shape)
 
   def testInvalidArgsUnknownShape(self):
-    with self.test_session():
+    with self.cached_session():
       placeholder = array_ops.placeholder(dtypes.string, name="placeholder")
       index_too_high = string_ops.reduce_join(placeholder, axis=1)
       duplicate_index = string_ops.reduce_join(placeholder, axis=[-1, 1])
@@ -336,7 +336,7 @@ class ReduceJoinTest(UnicodeTestCase):
         duplicate_index.eval(feed_dict={placeholder.name: [[""]]})
 
   def testInvalidArgsUnknownIndices(self):
-    with self.test_session():
+    with self.cached_session():
       placeholder = array_ops.placeholder(dtypes.int32, name="placeholder")
       reduced = string_ops.reduce_join(["test", "test2"], axis=placeholder)
 
diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index ea78b58d88..496a452a03 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@@ -61,7 +61,7 @@ class ReducedShapeTest(test.TestCase):
     self.assertAllEqual(output.eval(), result)
 
   def testSimple(self):
-    with self.test_session():
+    with self.cached_session():
       self._check([3], [], [3])
       self._check([3], [0], [1])
       self._check([5, 3], [], [5, 3])
@@ -71,7 +71,7 @@ class ReducedShapeTest(test.TestCase):
 
   def testZeros(self):
     """Check that reduced_shape does the right thing with zero dimensions."""
-    with self.test_session():
+    with self.cached_session():
       self._check([0], [], [0])
       self._check([0], [0], [1])
       self._check([0, 3], [], [0, 3])
@@ -84,7 +84,7 @@ class ReducedShapeTest(test.TestCase):
       self._check([3, 0], [0, 1], [1, 1])
 
   def testNegAxes(self):
-    with self.test_session():
+    with self.cached_session():
       self._check([10, 10, 10], [-1], [10, 10, 1])
       self._check([10, 10, 10], [-1, 2], [10, 10, 1])
       self._check([10, 10, 10], [-1, -1], [10, 10, 1])
@@ -95,7 +95,7 @@ class ReducedShapeTest(test.TestCase):
 class ReductionUnknownShape(test.TestCase):
 
   def testBasic(self):
-    with self.test_session():
+    with self.cached_session():
       for dtype, reductions in [(dtypes.float32,
                                  (math_ops.reduce_sum, math_ops.reduce_mean,
                                   math_ops.reduce_prod, math_ops.reduce_max,
@@ -617,7 +617,7 @@ class MinReductionTest(test.TestCase):
   def testGradient(self):
     s = [2, 3, 4, 2]
     x = np.arange(1.0, 49.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_min(t, [1, 2])
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -627,7 +627,7 @@ class MinReductionTest(test.TestCase):
   def testGradient2(self):
     s = [2, 3, 4, 2]
     x = np.arange(1.0, 49.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_min(t, [1])
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -637,7 +637,7 @@ class MinReductionTest(test.TestCase):
   def testGradient3(self):
     s = [2, 3, 4, 2]
     x = np.arange(1.0, 49.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_min(t, [2])
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -647,7 +647,7 @@ class MinReductionTest(test.TestCase):
   def testGradient4(self):
     s = [2, 3, 4, 2]
     x = np.arange(1.0, 49.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_min(t)
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -655,7 +655,7 @@ class MinReductionTest(test.TestCase):
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testEmptyGradients(self):
-    with self.test_session():
+    with self.cached_session():
       x = array_ops.zeros([0, 3])
       y = math_ops.reduce_min(x, [1])
       error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
@@ -744,7 +744,7 @@ class MaxReductionTest(test.TestCase):
   def testGradient(self):
     s = [2, 3, 4, 2]
     x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_max(t, [1, 2])
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -754,7 +754,7 @@ class MaxReductionTest(test.TestCase):
   def testGradient2(self):
     s = [2, 3, 4, 2]
     x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_max(t, [1])
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -764,7 +764,7 @@ class MaxReductionTest(test.TestCase):
   def testGradient3(self):
     s = [2, 3, 4, 2]
     x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_max(t, [2])
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -774,7 +774,7 @@ class MaxReductionTest(test.TestCase):
   def testGradient4(self):
     s = [2, 3, 4, 2]
     x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64)
-    with self.test_session():
+    with self.cached_session():
       t = ops.convert_to_tensor(x)
       su = math_ops.reduce_max(t)
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -782,7 +782,7 @@ class MaxReductionTest(test.TestCase):
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testEmptyGradients(self):
-    with self.test_session():
+    with self.cached_session():
       x = array_ops.zeros([0, 3])
       y = math_ops.reduce_max(x, [1])
       error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
@@ -960,7 +960,7 @@ class CountNonzeroReductionTest(test.TestCase):
 
   def testStringReduce(self):
     # Test case for GitHub issue 18712
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v = math_ops.count_nonzero(constant_op.constant(["test"]))
       self.assertAllClose(sess.run(v), 1)
 
diff --git a/tensorflow/python/kernel_tests/regex_full_match_op_test.py b/tensorflow/python/kernel_tests/regex_full_match_op_test.py
index 7bd8c3ca27..e81f562a2a 100644
--- a/tensorflow/python/kernel_tests/regex_full_match_op_test.py
+++ b/tensorflow/python/kernel_tests/regex_full_match_op_test.py
@@ -35,7 +35,7 @@ class RegexFullMatchOpVariantsTest(test.TestCase, parameterized.TestCase):
 
   def testRegexFullMatch(self, op):
     values = ["abaaba", "abcdabcde"]
-    with self.test_session():
+    with self.cached_session():
       input_tensor = constant_op.constant(values, dtypes.string)
       matched = op(input_tensor, "a.*a").eval()
       self.assertAllEqual([True, False], matched)
@@ -49,14 +49,14 @@ class RegexFullMatchOpVariantsTest(test.TestCase, parameterized.TestCase):
 
   def testEmptyMatch(self, op):
     values = ["abc", "1"]
-    with self.test_session():
+    with self.cached_session():
       input_tensor = constant_op.constant(values, dtypes.string)
       matched = op(input_tensor, "").eval()
       self.assertAllEqual([False, False], matched)
 
   def testInvalidPattern(self, op):
     values = ["abc", "1"]
-    with self.test_session():
+    with self.cached_session():
       input_tensor = constant_op.constant(values, dtypes.string)
       invalid_pattern = "A["
       matched = op(input_tensor, invalid_pattern)
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 657d92fa23..a45a325b47 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -104,7 +104,7 @@ class ReluTest(test.TestCase):
   # The gradient test for ReLU is a bit tricky as the derivative is not well
   # defined at around zero and we want to avoid that in terms of input values.
   def testGradientFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -149,7 +149,7 @@ class ReluTest(test.TestCase):
         self.assertAllClose(dx_f32_v, dx_f16_v, atol=3e-4)
 
   def testGradientFloat64(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -166,7 +166,7 @@ class ReluTest(test.TestCase):
     self.assertLess(err, 1e-10)
 
   def testGradGradFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -183,7 +183,7 @@ class ReluTest(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -201,7 +201,7 @@ class ReluTest(test.TestCase):
     self.assertLess(err, 1e-10)
 
   def testGradientScalar(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = variables.Variable(100.)
       y = nn_ops.relu(x)
       loss = y**2
@@ -249,7 +249,7 @@ class Relu6Test(test.TestCase):
   # not well defined at around zero and six and we want to avoid that
   # in terms of input values.
   def testGradientFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 6.1, 6.3, 6.5, 6.7, 6.9],
           shape=[2, 5],
@@ -265,7 +265,7 @@ class Relu6Test(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testGradientFloat64(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 6.1, 6.3, 6.5, 6.7, 6.9],
           shape=[2, 5],
@@ -313,7 +313,7 @@ class EluTest(test.TestCase):
           use_gpu=True)
 
   def testGradientFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]]
       x = constant_op.constant(x_val, name="x")
       y = nn_ops.elu(x, name="elu")
@@ -324,7 +324,7 @@ class EluTest(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testGradientFloat64(self):
-    with self.test_session():
+    with self.cached_session():
       x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]]
       x = constant_op.constant(x_val, dtype=dtypes.float64, name="x")
       y = nn_ops.elu(x, name="elu")
@@ -335,7 +335,7 @@ class EluTest(test.TestCase):
     self.assertLess(err, 1e-6)
 
   def testGradGrad(self):
-    with self.test_session():
+    with self.cached_session():
       x = array_ops.placeholder(dtype=dtypes.float32)
       elu = nn_ops.elu(x)
       g, = gradients_impl.gradients(elu, x)
@@ -346,7 +346,7 @@ class EluTest(test.TestCase):
         self.assertLess(err, 1e-4)
 
   def testGradGradFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -363,7 +363,7 @@ class EluTest(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -415,7 +415,7 @@ class SeluTest(test.TestCase):
           use_gpu=True)
 
   def testGradientFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]]
       x = constant_op.constant(x_val, name="x")
       y = nn_ops.selu(x, name="selu")
@@ -426,7 +426,7 @@ class SeluTest(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testGradientFloat64(self):
-    with self.test_session():
+    with self.cached_session():
       x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]]
       x = constant_op.constant(x_val, dtype=dtypes.float64, name="x")
       y = nn_ops.selu(x, name="selu")
@@ -437,7 +437,7 @@ class SeluTest(test.TestCase):
     self.assertLess(err, 1e-6)
 
   def testGradGradFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -454,7 +454,7 @@ class SeluTest(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -503,7 +503,7 @@ class CreluTest(test.TestCase):
             use_gpu=True)
 
   def testNumbersWithAxis0(self):
-    with self.test_session():
+    with self.cached_session():
       crelu = nn_ops.crelu(
           np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]), axis=0)
       tf_relu = crelu.eval()
@@ -512,7 +512,7 @@ class CreluTest(test.TestCase):
       self.assertAllEqual(np_crelu, tf_relu)
 
   def testNumbersWithAxis1(self):
-    with self.test_session():
+    with self.cached_session():
       crelu = nn_ops.crelu(
           np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]), axis=1)
       tf_relu = crelu.eval()
diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py
index ef9b439230..ca3ff1d1df 100644
--- a/tensorflow/python/kernel_tests/reshape_op_test.py
+++ b/tensorflow/python/kernel_tests/reshape_op_test.py
@@ -94,7 +94,7 @@ class ReshapeTest(test.TestCase):
   def testFloatReshapeGradThreeDimensions(self):
     x = np.arange(1., 25.).reshape([2, 3, 4]).astype(np.float32)
     s = list(np.shape(x))
-    with self.test_session():
+    with self.cached_session():
       input_tensor = constant_op.constant(x)
       reshape_out = array_ops.reshape(input_tensor, [1, 8, 3])
       err = gradient_checker.compute_gradient_error(
diff --git a/tensorflow/python/kernel_tests/reverse_sequence_op_test.py b/tensorflow/python/kernel_tests/reverse_sequence_op_test.py
index 9beb615b2c..8fc71e0c57 100644
--- a/tensorflow/python/kernel_tests/reverse_sequence_op_test.py
+++ b/tensorflow/python/kernel_tests/reverse_sequence_op_test.py
@@ -120,7 +120,7 @@ class ReverseSequenceTest(test.TestCase):
     batch_axis = 2
     seq_lengths = np.asarray([3, 0, 4], dtype=np.int64)
 
-    with self.test_session():
+    with self.cached_session():
       input_t = constant_op.constant(x, shape=x.shape)
       seq_lengths_t = constant_op.constant(seq_lengths, shape=seq_lengths.shape)
       reverse_sequence_out = array_ops.reverse_sequence(
@@ -171,7 +171,7 @@ class ReverseSequenceTest(test.TestCase):
           seq_axis=0,
           batch_axis=3)
 
-    with self.test_session():
+    with self.cached_session():
       inputs = array_ops.placeholder(dtypes.float32, shape=(32, 2, 3))
       seq_lengths = array_ops.placeholder(dtypes.int64, shape=(32,))
       output = array_ops.reverse_sequence(
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index f2f3023469..86e063cb36 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -294,7 +294,7 @@ class StatefulScatterNdTest(test.TestCase):
     self.assertAllEqual(scatter_update.get_shape().as_list(), shape)
 
     expected_result = np.zeros([2, 2], dtype=np.int32)
-    with self.test_session():
+    with self.cached_session():
       ref.initializer.run()
       self.assertAllEqual(expected_result, scatter_update.eval())
 
@@ -409,7 +409,7 @@ class ScatterNdTest(test.TestCase):
     expected = np.array([b"", b"one", b"", b"three", b"four",
                          b"", b"", b"seven"])
     scatter = self.scatter_nd(indices, updates, shape=(8,))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       result = sess.run(scatter)
       self.assertAllEqual(expected, result)
 
@@ -420,7 +420,7 @@ class ScatterNdTest(test.TestCase):
                                    dtype=dtypes.string)
     expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"])
     scatter = self.scatter_nd(indices, updates, shape=(8,))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       result = sess.run(scatter)
       self.assertAllEqual(expected, result)
 
@@ -432,7 +432,7 @@ class ScatterNdTest(test.TestCase):
     expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]),
                 np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])]
     scatter = self.scatter_nd(indices, updates, shape=(8,))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       result = sess.run(scatter)
       self.assertTrue(np.array_equal(result, expected[0]) or
                       np.array_equal(result, expected[1]))
@@ -451,7 +451,7 @@ class ScatterNdTest(test.TestCase):
     scatter = self.scatter_nd(indices, updates, shape)
     self.assertAllEqual(scatter.get_shape().as_list(), shape)
     expected_result = np.zeros([2, 2], dtype=np.int32)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_result, scatter.eval())
 
   def testUndefinedIndicesShape(self):
@@ -486,7 +486,7 @@ class ScatterNdTest(test.TestCase):
     updates = array_ops.placeholder(dtypes.int32, shape=None)
     shape = constant_op.constant([0, 3, 2], dtypes.int32)
 
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError(
           "Indices and updates specified for empty output"):
         self.scatter_nd(indices, updates, shape).eval(feed_dict={
@@ -500,7 +500,7 @@ class ScatterNdTest(test.TestCase):
     shape = constant_op.constant([0], dtypes.int32)
     scatter = self.scatter_nd(indices, updates, shape)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(scatter.eval().size, 0)
 
   def testRank3InvalidShape1(self):
@@ -531,7 +531,7 @@ class ScatterNdTest(test.TestCase):
         [outputs], [updates, input_], [grad_vals])
     expected_updates_grad = np.array([1, 4], dtype=np.float64)
     expected_input_grad = np.array([[1, 2], [3, 4]], dtype=np.float64)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_updates_grad, updates_grad.eval())
       if self.non_aliasing_add_test:
         self.assertAllEqual(expected_input_grad, input_grad.eval())
@@ -548,7 +548,7 @@ class ScatterNdTest(test.TestCase):
         [outputs], [updates, input_], [grad_vals])
     expected_updates_grad = np.array([[1, 2], [3, 4]], dtype=np.float64)
     expected_input_grad = np.array([[3, 4], [1, 2]], dtype=np.float64)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_updates_grad, updates_grad.eval())
       if self.non_aliasing_add_test:
         self.assertAllEqual(expected_input_grad, input_grad.eval())
@@ -570,7 +570,7 @@ class ScatterNdTest(test.TestCase):
         [[[3, 4], [5, 6]], [[1, 2], [7, 8]]], dtype=np.float64)
     expected_input_grad = np.array(
         [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.float64)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_updates_grad, updates_grad.eval())
       if self.non_aliasing_add_test:
         self.assertAllEqual(expected_input_grad, input_grad.eval())
@@ -607,7 +607,7 @@ class ScatterNdTest(test.TestCase):
             [[[[1, 2], [3, 4]]]],
             [[[[5, 6], [7, 8]]]]
         ]]], dtype=np.float64)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_updates_grad, updates_grad.eval())
       if self.non_aliasing_add_test:
         self.assertAllEqual(expected_input_grad, input_grad.eval())
@@ -616,33 +616,33 @@ class ScatterNdTest(test.TestCase):
     indices = array_ops.zeros([100000, 1], dtypes.int32)
     values = np.random.randn(100000)
     shape = [1]
-    with self.test_session():
+    with self.cached_session():
       val = self.scatter_nd(indices, values, shape).eval()
     self.assertAllClose([np.sum(values)], val)
 
   def testSmokeScatterNdBatch2DSliceDim2(self):
-    with self.test_session():
+    with self.cached_session():
       indices = array_ops.zeros([3, 5, 2], dtype=dtypes.int32)
       values = array_ops.zeros([3, 5, 7])
       shape = [4, 6, 7]
       self.scatter_nd(indices, values, shape).eval()
 
   def testSmokeScatterNdBatch1DSliceDim2(self):
-    with self.test_session():
+    with self.cached_session():
       indices = array_ops.zeros([0, 2], dtype=dtypes.int32)
       values = array_ops.zeros([0, 7])
       shape = [4, 6, 7]
       self.scatter_nd(indices, values, shape).eval()
 
   def testSmokeScatterNdBatch1DSliceDim3ShapeRank7(self):
-    with self.test_session():
+    with self.cached_session():
       indices = array_ops.zeros([1, 3], dtype=dtypes.int32)
       values = array_ops.zeros([1, 6, 7, 8, 9])
       shape = [3, 4, 5, 6, 7, 8, 9]
       self.scatter_nd(indices, values, shape).eval()
 
   def testSmokeScatterNdBatch2DSliceDim3ShapeRank7(self):
-    with self.test_session():
+    with self.cached_session():
       indices = array_ops.zeros([1, 2, 3], dtype=dtypes.int32)
       values = array_ops.zeros([1, 2, 6, 7, 8, 9])
       shape = [3, 4, 5, 6, 7, 8, 9]
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index a82855dfeb..ce507e4ad7 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -177,7 +177,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
 
   def testSegmentIdsInvalid1(self):
     shape = [4, 4]
-    with self.test_session():
+    with self.cached_session():
       tf_x, _ = self._input(shape)
       indices = [-1, -1, 0, 0]
       s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
@@ -188,7 +188,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
 
   def testSegmentIdsInvalid2(self):
     shape = [4, 4]
-    with self.test_session():
+    with self.cached_session():
       tf_x, _ = self._input(shape)
       indices = [0, 1, 0, 1]
       s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
@@ -197,7 +197,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
 
   def testSegmentIdsInvalid3(self):
     shape = [4, 4]
-    with self.test_session():
+    with self.cached_session():
       tf_x, _ = self._input(shape)
       indices = [0, 1, 2, 0]
       s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
@@ -233,7 +233,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
         math_ops.segment_sum, math_ops.segment_mean, math_ops.segment_min,
         math_ops.segment_max
     ]:
-      with self.test_session():
+      with self.cached_session():
         tf_x, np_x = self._input(shape, dtype=dtypes_lib.float64)
         s = tf_op(data=tf_x, segment_ids=indices)
         jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -736,7 +736,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     segment_indices = [0, 1, 2, 2]
     num_indices = len(segment_indices)
     for tf_op in [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]:
-      with self.test_session():
+      with self.cached_session():
         tf_indices, _, tf_x, np_x = self._sparse_input(
             shape, num_indices, dtype=dtypes_lib.float64)
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
@@ -758,7 +758,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         math_ops.sparse_segment_sum_with_num_segments,
         math_ops.sparse_segment_mean_with_num_segments,
     ]:
-      with self.test_session():
+      with self.cached_session():
         tf_indices, _, tf_x, np_x = self._sparse_input(
             shape, num_indices, dtype=dtypes_lib.float64)
         s = tf_op(
diff --git a/tensorflow/python/kernel_tests/session_ops_test.py b/tensorflow/python/kernel_tests/session_ops_test.py
index 678016b13d..03e1ae852f 100644
--- a/tensorflow/python/kernel_tests/session_ops_test.py
+++ b/tensorflow/python/kernel_tests/session_ops_test.py
@@ -31,7 +31,7 @@ from tensorflow.python.platform import test
 class SessionOpsTest(test.TestCase):
 
   def testHandleBasic(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Return a handle.
       a = constant_op.constant(10)
       b = constant_op.constant(5)
@@ -45,7 +45,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(500, sess.run(y, feed_dict={f: h.handle}))
 
   def testHandleEval(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Return a handle.
       a = constant_op.constant(10)
       b = constant_op.constant(5)
@@ -57,7 +57,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(50, h.eval())
 
   def testHandleAndValue(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Return a handle and a value.
       a = constant_op.constant(10)
       b = constant_op.constant(5)
@@ -70,7 +70,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(500, v)
 
   def testHandleCond(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Return a handle and a value
       a = constant_op.constant(10)
       b = constant_op.constant(5)
@@ -90,7 +90,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(5000, result)
 
   def testHandleForLoop(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Initialize a handle.
       a = constant_op.constant(0)
       h = session_ops.get_session_handle(a)
@@ -107,7 +107,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(100, h.eval())
 
   def testHandleWhileLoop(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Initialize a handle.
       a = constant_op.constant(0)
       h = session_ops.get_session_handle(a)
@@ -127,7 +127,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(101, h.eval())
 
   def testHandleMover(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Return a handle.
       a = constant_op.constant(10)
       b = constant_op.constant(5)
@@ -148,7 +148,7 @@ class SessionOpsTest(test.TestCase):
         self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
 
   def testHandleDelete(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Return a handle.
       a = constant_op.constant(10)
       b = constant_op.constant(5)
@@ -157,7 +157,7 @@ class SessionOpsTest(test.TestCase):
       sess.run(h).delete()
 
   def testHandleDeleteRaw(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Return a handle.
       a = constant_op.constant(10)
       b = constant_op.constant(5)
@@ -171,7 +171,7 @@ class SessionOpsTest(test.TestCase):
       sess.run(x, feed_dict={f: raw_h})
 
   def testMultiDevices(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with ops.device(test.gpu_device_name()):
         a = constant_op.constant(1.0)
         a_handle = sess.run(session_ops.get_session_handle(a))
@@ -189,7 +189,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(3.0, c_handle.eval())
 
   def testHandleGC(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # initial values live on CPU
       with ops.device("/cpu:0"):
         one = constant_op.constant(1, dtype=dtypes.float32)
@@ -213,7 +213,7 @@ class SessionOpsTest(test.TestCase):
                        add_h2: x_handle.handle})
 
   def testHandlePlacement(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a = constant_op.constant(1.0)
       a_handle_op = session_ops.get_session_handle(a)
       b = constant_op.constant(2.0)
@@ -233,7 +233,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(3.0, c_handle.eval())
 
   def testFeedOneHandleDirectly(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a = constant_op.constant(10.0)
       b = constant_op.constant(5.0)
       c = math_ops.multiply(a, b)
@@ -244,7 +244,7 @@ class SessionOpsTest(test.TestCase):
       self.assertAllClose(2500.0, sess.run(d, feed_dict={c: h_c}))
 
   def testDirectHandleFeedOverlappingWithFetches(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a = constant_op.constant(10.0)
       b = constant_op.constant(5.0)
       c = math_ops.multiply(a, b)
@@ -270,7 +270,7 @@ class SessionOpsTest(test.TestCase):
       self.assertAllClose(50.0, d_val)
 
   def testFeedTwoHandlesDirectly(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a = constant_op.constant(10.0)
       b = constant_op.constant(5.0)
       c = math_ops.multiply(a, b)
@@ -284,7 +284,7 @@ class SessionOpsTest(test.TestCase):
       self.assertAllClose(-48.0, sess.run(e, feed_dict={c: h_d, d: h_c}))
 
   def testFeedHandleToVariableDirectly(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a = variables.Variable(12.0)
       inc_a = state_ops.assign_add(a, 2.0)
       b = math_ops.add(a, 5.0)
diff --git a/tensorflow/python/kernel_tests/sets_test.py b/tensorflow/python/kernel_tests/sets_test.py
index 52b723802f..8335e9c139 100644
--- a/tensorflow/python/kernel_tests/sets_test.py
+++ b/tensorflow/python/kernel_tests/sets_test.py
@@ -158,7 +158,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
     for op in ops:
       self.assertEqual(None, op.get_shape().dims)
       self.assertEqual(dtypes.int32, op.dtype)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       results = sess.run(ops)
     self.assertAllEqual(results[0], results[1])
     return results[0]
@@ -477,7 +477,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
     dynamic_values_shape_ops = []
     static_indices_shape = None
     static_values_shape = None
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for op in ops:
         if static_indices_shape is None:
           static_indices_shape = op.indices.get_shape()
@@ -533,7 +533,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
 
   def _set_intersection_count(self, a, b):
     op = sets.set_size(sets.set_intersection(a, b))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       return sess.run(op)
 
   def test_set_difference_multirow_2d(self):
@@ -971,7 +971,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
 
   def _set_difference_count(self, a, b, aminusb=True):
     op = sets.set_size(sets.set_difference(a, b, aminusb))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       return sess.run(op)
 
   def test_set_union_multirow_2d(self):
@@ -1220,7 +1220,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
 
   def _set_union_count(self, a, b):
     op = sets.set_size(sets.set_union(a, b))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       return sess.run(op)
 
   def _assert_set_operation(self, expected_indices, expected_values,
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index 34e34d9d1b..0304dc3875 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -158,7 +158,7 @@ class ShapeOpsTest(test.TestCase):
   # Disabled because it takes too long to run, but manually verified
   # as passing at time of writing.
   def _test64BitOutput(self):
-    with self.test_session():
+    with self.cached_session():
       inp = array_ops.zeros([2**31])
       num_elements = array_ops.size_internal(
           inp, optimize=False, out_type=dtypes.int64)
@@ -166,7 +166,7 @@ class ShapeOpsTest(test.TestCase):
 
     # Too large for tf.int32 output.
     with self.assertRaises(errors_impl.InvalidArgumentError):
-      with self.test_session():
+      with self.cached_session():
         inp = array_ops.zeros([2**31])
         num_elements = array_ops.size_internal(
             inp, optimize=False, out_type=dtypes.int32)
@@ -228,7 +228,7 @@ class ShapeOpsTest(test.TestCase):
     self._compareExpandDimsAll(choice([2, 3, 5]), -4)
 
   def testExpandDimsErrors(self):
-    with self.test_session():
+    with self.cached_session():
       self.assertRaises(ValueError, array_ops.expand_dims,
                         np.zeros([2, 3, 5]), -5)
       self.assertRaises(ValueError, array_ops.expand_dims,
@@ -239,7 +239,7 @@ class ShapeOpsTest(test.TestCase):
                         [False, True, True], 4)
 
   def testExpandDimsGradient(self):
-    with self.test_session():
+    with self.cached_session():
       inp = constant_op.constant(
           np.random.rand(4, 2).astype("f"), dtype=dtypes.float32)
       squeezed = array_ops.expand_dims(inp, 1)
@@ -249,7 +249,7 @@ class ShapeOpsTest(test.TestCase):
     self.assertLess(err, 1e-3)
 
   def testExpandDimsScalar(self):
-    with self.test_session():
+    with self.cached_session():
       inp = constant_op.constant(7)
       self.assertAllEqual([7], array_ops.expand_dims(inp, 0).eval())
       self.assertAllEqual([7], array_ops.expand_dims(inp, -1).eval())
@@ -375,7 +375,7 @@ class ShapeOpsTest(test.TestCase):
                           np.zeros([1, 2, 1]), [2, 3])
 
   def testSqueezeGradient(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(4, 2).astype("f")
       a = array_ops.reshape(inp, [4, 1, 2])
       squeezed = array_ops.squeeze(a, [])
@@ -385,7 +385,7 @@ class ShapeOpsTest(test.TestCase):
     self.assertLess(err, 1e-3)
 
   def testSqueezeGradientWithSqueezeDims(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(4, 2).astype("f")
       a = array_ops.reshape(inp, [4, 1, 2, 1])
       squeezed = array_ops.squeeze(a, [1])
@@ -395,7 +395,7 @@ class ShapeOpsTest(test.TestCase):
     self.assertLess(err, 1e-3)
 
   def testSqueezeWithUnknownShape(self):
-    with self.test_session():
+    with self.cached_session():
       a = array_ops.placeholder(dtypes.float32, shape=[2, None])
 
       squeezed = array_ops.squeeze(a, [1])
@@ -433,7 +433,7 @@ class TileTest(test.TestCase):
       self.assertTrue((result == np.tile(inp, (1, 4))).all())
 
   def testIdentityTileAndGrad(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(4, 1).astype(np.float32)
       a = constant_op.constant(inp)
       tiled = array_ops.tile(a, [1, 1])
@@ -443,7 +443,7 @@ class TileTest(test.TestCase):
     self.assertTrue((result == np.tile(inp, (1, 1))).all())
 
   def testEmpty(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(2, 3).astype(np.float32)
       a = constant_op.constant(inp)
       tiled = array_ops.tile(a, [5, 0])
@@ -453,7 +453,7 @@ class TileTest(test.TestCase):
 
   def testUnknownInputShape(self):
     """Importing can call _TileShape without shape of <multiples> known."""
-    with self.test_session():
+    with self.cached_session():
       inp = array_ops.placeholder(dtypes.float32)  # unknown shape
       multiples = constant_op.constant([1, 2, 3, 4], dtype=np.int32)
       tiled = array_ops.tile(inp, multiples)
@@ -503,7 +503,7 @@ class TileTest(test.TestCase):
       self.assertAllEqual(result, np.tile(inp, (1, 4)))
 
   def testInvalidDim(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(4, 1).astype("f")
       a = constant_op.constant(
           [float(x) for x in inp.ravel(order="C")],
@@ -546,7 +546,7 @@ class TileTest(test.TestCase):
       self._RunAndVerifyResult(10, use_gpu=True)
 
   def testGradientSimpleReduction(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(4, 1).astype("f")
       a = constant_op.constant(
           [float(x) for x in inp.flatten()], shape=[4, 1], dtype=dtypes.float32)
@@ -561,7 +561,7 @@ class TileTest(test.TestCase):
     self.assertAllClose(np.sum(grad_inp, axis=1).reshape(4, 1), result, 1e-3)
 
   def testGradientStridedReduction(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(4, 2).astype("f")
       a = constant_op.constant(
           [float(x) for x in inp.flatten()], shape=[4, 2], dtype=dtypes.float32)
@@ -634,7 +634,7 @@ class TileTest(test.TestCase):
     self._RunAndVerifyGradientResult([2, 1, 3, 3, 2], [1, 3, 3, 1, 2])
 
   def testGradientStridedReductionGC(self):
-    with self.test_session():
+    with self.cached_session():
       inp = np.random.rand(4, 2).astype("f")
       a = constant_op.constant(
           [float(x) for x in inp.flatten()], shape=[4, 2], dtype=dtypes.float32)
@@ -647,7 +647,7 @@ class TileTest(test.TestCase):
                                   dtype=dtypes.float32)
     outputs = array_ops.gather(array_ops.tile(inputs, [3]),
                                [1, 5, 9, 3, 7, 2, 2, 2])
-    with self.test_session():
+    with self.cached_session():
       error = gradient_checker.compute_gradient_error(
           inputs, inputs.get_shape().as_list(),
           outputs, outputs.get_shape().as_list())
@@ -659,7 +659,7 @@ class TileTest(test.TestCase):
     inputs = array_ops.reshape(inputs, [-1, 1, 1])
     outputs = array_ops.gather(array_ops.tile(inputs, [3, 4, 2]),
                                [1, 5, 9, 3, 7, 2, 2, 2])
-    with self.test_session():
+    with self.cached_session():
       error = gradient_checker.compute_gradient_error(
           inputs, inputs.get_shape().as_list(),
           outputs, outputs.get_shape().as_list())
diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py
index 40d384c623..c08d3222b3 100644
--- a/tensorflow/python/kernel_tests/slice_op_test.py
+++ b/tensorflow/python/kernel_tests/slice_op_test.py
@@ -107,7 +107,7 @@ class SliceTest(test.TestCase):
 
   def testScalarInput(self):
     input_val = 0
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Test with constant input; shape inference fails.
       with self.assertRaisesWithPredicateMatch(ValueError, "out of range"):
         constant_op.constant(input_val)[:].get_shape()
@@ -121,7 +121,7 @@ class SliceTest(test.TestCase):
 
   def testInvalidIndex(self):
     input_val = [1, 2]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Test with constant input; shape inference fails.
       with self.assertRaisesWithPredicateMatch(ValueError, "out of range"):
         constant_op.constant(input_val)[1:, 1:].get_shape()
diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py
index fbf1adba9b..e53347c4bc 100644
--- a/tensorflow/python/kernel_tests/softmax_op_test.py
+++ b/tensorflow/python/kernel_tests/softmax_op_test.py
@@ -210,7 +210,7 @@ class SoftmaxTest(test.TestCase):
     self.assertEqual([3, 2, 4], op.get_shape())
 
   def testEmptyInput(self):
-    with self.test_session():
+    with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=[0, 3])
       self.assertEqual(0, array_ops.size(x).eval())
       # reshape would raise if logits is empty
@@ -218,7 +218,7 @@ class SoftmaxTest(test.TestCase):
         nn_ops.softmax(x, axis=0).eval()
 
   def testDimTooLarge(self):
-    with self.test_session():
+    with self.cached_session():
       # Use placeholder to make sure we get runtime error instead of shape
       # inference error.
       dim = array_ops.placeholder_with_default(100, shape=[])
diff --git a/tensorflow/python/kernel_tests/softplus_op_test.py b/tensorflow/python/kernel_tests/softplus_op_test.py
index c0269db9ae..afe3df6178 100644
--- a/tensorflow/python/kernel_tests/softplus_op_test.py
+++ b/tensorflow/python/kernel_tests/softplus_op_test.py
@@ -72,7 +72,7 @@ class SoftplusTest(test.TestCase):
           use_gpu=True)
 
   def testGradient(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -88,7 +88,7 @@ class SoftplusTest(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testGradGrad(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -105,7 +105,7 @@ class SoftplusTest(test.TestCase):
     self.assertLess(err, 5e-5)
 
   def testGradGradGrad(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -123,7 +123,7 @@ class SoftplusTest(test.TestCase):
     self.assertLess(err, 5e-5)
 
   def testNoInts(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           "No OpKernel was registered to support Op 'Softplus'"):
diff --git a/tensorflow/python/kernel_tests/softsign_op_test.py b/tensorflow/python/kernel_tests/softsign_op_test.py
index a5247ce08d..05a7c53dee 100644
--- a/tensorflow/python/kernel_tests/softsign_op_test.py
+++ b/tensorflow/python/kernel_tests/softsign_op_test.py
@@ -51,7 +51,7 @@ class SoftsignTest(test.TestCase):
           use_gpu=True)
 
   def testGradient(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant(
           [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
           shape=[2, 5],
@@ -67,7 +67,7 @@ class SoftsignTest(test.TestCase):
     self.assertLess(err, 1e-4)
 
   def testNoInts(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           "No OpKernel was registered to support Op 'Softsign'"):
diff --git a/tensorflow/python/kernel_tests/spacetobatch_op_test.py b/tensorflow/python/kernel_tests/spacetobatch_op_test.py
index 2a9232b6ae..e267c05915 100644
--- a/tensorflow/python/kernel_tests/spacetobatch_op_test.py
+++ b/tensorflow/python/kernel_tests/spacetobatch_op_test.py
@@ -551,7 +551,7 @@ class SpaceToBatchNDGradientTest(test.TestCase):
   def _checkGrad(self, x, block_shape, paddings):
     block_shape = np.array(block_shape)
     paddings = np.array(paddings).reshape((len(block_shape), 2))
-    with self.test_session():
+    with self.cached_session():
       tf_x = ops.convert_to_tensor(x)
       tf_y = array_ops.space_to_batch_nd(tf_x, block_shape, paddings)
       epsilon = 1e-5
@@ -638,7 +638,7 @@ class RequiredSpaceToBatchPaddingsTest(test.TestCase):
     t_paddings, t_crops = array_ops.required_space_to_batch_paddings(
         input_shape_placeholder, block_shape_placeholder,
         base_paddings_placeholder)
-    with self.test_session():
+    with self.cached_session():
       paddings_result = t_paddings.eval(assignments)
       crops_result = t_crops.eval(assignments)
     self.assertAllEqual(paddings_result, paddings_const)
diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
index 3bb5e899fe..477720302d 100644
--- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
@@ -99,20 +99,20 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       """, q.accumulator_ref.op.node_def)
 
   def testAccumulatorSizeEmpty(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q")
       self.assertEqual(q.num_accumulated().eval(), 0)
 
   def testAccumulatorSetGlobalStep(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1]))
       set_global_step_op = q.set_global_step(1)
       set_global_step_op.run()
 
   def testAccumulatorApplyGradFloat32(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
       accum_op = q.apply_indexed_slices_grad(
@@ -123,7 +123,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       self.assertEqual(q.num_accumulated().eval(), 1)
 
   def testDtypes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64]
 
       for i in range(len(dtypes)):
@@ -145,7 +145,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
         self._assertEqual_nparray(sum_elems / len(elems), result, sess)
 
   def testAccumulatorMultipleAccumulators(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q_f32_0 = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))
       q_f32_1 = data_flow_ops.SparseConditionalAccumulator(
@@ -175,7 +175,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
         self._assertEqual_indexedslices(expected_tensors[i], result)
 
   def testAccumulatorTakeGradMean(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=())
 
@@ -220,7 +220,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
           dtypes_lib.float32, name="Q", shape=(), reduction_type="Invalid")
 
   def testAccumulatorRepeatedTakeGrad(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=())
 
@@ -258,7 +258,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       self.assertAllEqual(val.dense_shape, [-1, 2])
 
   def testParallelApplyGradMean(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))
       elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
@@ -323,7 +323,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
           val, sess)
 
   def testParallelTakeGrad(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))
       elems = [e + 1 for e in range(10)]
@@ -362,7 +362,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
             np.array([[0, 0], [elems[i], 0]]), results[i], sess)
 
   def testAccumulatorApplyAndBlockingTake(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))
 
@@ -397,7 +397,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       sess.run(takeg_op)
 
   def testAccumulatorCancel(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32,
           name="Q",
@@ -416,7 +416,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       takeg_thread.join()
 
   def testNonVectorIndices(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
 
@@ -428,7 +428,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
             grad_values=np.array([1, 2]).astype(np.float32)).run()
 
   def testZeroDimensionValues(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
 
@@ -438,7 +438,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
             grad_indices=[0], grad_values=np.array(1).astype(np.float32)).run()
 
   def testWrongNonEmptyInputValues(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
 
@@ -449,7 +449,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
             grad_values=np.array([[0, 1, 1]]).astype(np.float32)).run()
 
   def testDynamicNonVectorIndices(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
 
@@ -468,7 +468,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
                  })
 
   def testDynamicWrongNonEmptyInputValues(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
 
@@ -486,7 +486,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
                  })
 
   def testEmptyShapeApply(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([]))
 
@@ -511,7 +511,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       q.apply_grad(grad_indices=[0], grad_values=[1.0]).run()
 
   def testValidateShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=[2, 2, None])
 
@@ -606,7 +606,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
             local_step=1).run()
 
   def testReturnShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=[2, None])
 
@@ -631,7 +631,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       self.assertAllEqual(val.dense_shape, [-1, 2, 2, 3])
 
   def testApplyGradtInt32IndicesAndShape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
       accum_op = q.apply_grad(
diff --git a/tensorflow/python/kernel_tests/sparse_cross_op_test.py b/tensorflow/python/kernel_tests/sparse_cross_op_test.py
index ca7898d466..6e0714da70 100644
--- a/tensorflow/python/kernel_tests/sparse_cross_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_cross_op_test.py
@@ -42,7 +42,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_dense(self):
@@ -62,7 +62,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_mixed_string_sparse(self):
@@ -76,7 +76,7 @@ class SparseCrossOpTest(test.TestCase):
         '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1',
         '55555_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_mixed_string_dense(self):
@@ -94,7 +94,7 @@ class SparseCrossOpTest(test.TestCase):
         '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2',
         '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_sparse_cross_dense(self):
@@ -111,7 +111,7 @@ class SparseCrossOpTest(test.TestCase):
             'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
             'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
         ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_sparse_input(self):
@@ -127,7 +127,7 @@ class SparseCrossOpTest(test.TestCase):
             '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2',
             '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2'
         ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_permutation_3x3x3(self):
@@ -169,7 +169,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F2',
         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_permutation_3x1x2(self):
@@ -188,7 +188,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_large_batch(self):
@@ -221,7 +221,7 @@ class SparseCrossOpTest(test.TestCase):
       ])
 
     expected_out = self._sparse_tensor(col_out)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_one_column_empty(self):
@@ -234,7 +234,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([], 1),
         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_empty(sess.run(op))
 
   def test_some_columns_empty(self):
@@ -253,7 +253,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]], 2)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_all_columns_empty(self):
@@ -266,7 +266,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([]),
         self._sparse_tensor([])
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_empty(sess.run(op))
 
   def test_hashed_zero_bucket_no_hash_key(self):
@@ -277,7 +277,7 @@ class SparseCrossOpTest(test.TestCase):
     ])
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[1971693436396284976]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_zero_bucket(self):
@@ -290,7 +290,7 @@ class SparseCrossOpTest(test.TestCase):
         hash_key=sparse_ops._DEFAULT_HASH_KEY + 1)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[4847552627144134031]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed.
@@ -304,7 +304,7 @@ class SparseCrossOpTest(test.TestCase):
         num_buckets=100)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[83]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_output(self):
@@ -318,7 +318,7 @@ class SparseCrossOpTest(test.TestCase):
         hash_key=sparse_ops._DEFAULT_HASH_KEY + 1)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[31]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed__has_no_collision(self):
@@ -344,7 +344,7 @@ class SparseCrossOpTest(test.TestCase):
             self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
         ],
         num_buckets=1000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       out = sess.run(op)
       self.assertEqual(6, len(out.values))
       self.assertAllEqual([[0, i] for i in range(6)], out.indices)
diff --git a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
index f50e39d6d5..90009fc33e 100644
--- a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
@@ -130,7 +130,7 @@ class MatMulGradientTest(test.TestCase):
 
   def _testGradients(self, tr_a, tr_b, sp_a, sp_b, a_dtype, b_dtype, delta,
                      name):
-    with self.test_session():
+    with self.cached_session():
       a = constant_op.constant(
           RandMatrix(
               3, 2, tr_a, round_bfloat=True), dtype=dtypes.float32)
diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index fc39de150e..79efee3f5b 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -628,7 +628,7 @@ class SparseReduceTest(test_util.TensorFlowTestCase):
         else:
           np_ans = np.max(np_ans, axis=ra, keepdims=keep_dims)
 
-    with self.test_session():
+    with self.cached_session():
       if do_sum:
         tf_dense_ans = sparse_ops.sparse_reduce_sum(sp_t, reduction_axes,
                                                     keep_dims)
diff --git a/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py b/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py
index 87a4eb9c7b..c71746cc99 100644
--- a/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py
+++ b/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py
@@ -81,7 +81,7 @@ class SparseToDenseTest(test.TestCase):
     self.assertAllClose(np_ans, tf_ans)
 
   def testZeroDefault(self):
-    with self.test_session():
+    with self.cached_session():
       x = sparse_ops.sparse_to_dense(2, [4], 7).eval()
       self.assertAllEqual(x, [0, 0, 7, 0])
 
@@ -94,12 +94,12 @@ class SparseToDenseTest(test.TestCase):
     self.assertAllClose(np_ans, tf_ans)
 
   def testBadShape(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesWithPredicateMatch(ValueError, "must be rank 1"):
         _SparseToDense([1, 3], [[5], [3]], 1, -1)
 
   def testBadValue(self):
-    with self.test_session():
+    with self.cached_session():
       dense = _SparseToDense([1, 3], [5], [[5], [3]], -1)
       with self.assertRaisesOpError(
           r"sparse_values has incorrect shape \[2,1\], "
@@ -107,20 +107,20 @@ class SparseToDenseTest(test.TestCase):
         dense.eval()
 
   def testBadNumValues(self):
-    with self.test_session():
+    with self.cached_session():
       dense = _SparseToDense([1, 3], [5], [1, 2, 3], -1)
       with self.assertRaisesOpError(
           r"sparse_values has incorrect shape \[3\], should be \[\] or \[2\]"):
         dense.eval()
 
   def testBadDefault(self):
-    with self.test_session():
+    with self.cached_session():
       dense = _SparseToDense([1, 3], [5], [1, 2], [0])
       with self.assertRaisesOpError("default_value should be a scalar"):
         dense.eval()
 
   def testOutOfBoundsIndicesWithWithoutValidation(self):
-    with self.test_session():
+    with self.cached_session():
       dense = _SparseToDense(
           sparse_indices=[[1], [10]],
           output_size=[5],
@@ -140,7 +140,7 @@ class SparseToDenseTest(test.TestCase):
         dense_without_validation.eval()
 
   def testRepeatingIndicesWithWithoutValidation(self):
-    with self.test_session():
+    with self.cached_session():
       dense = _SparseToDense(
           sparse_indices=[[1], [1]],
           output_size=[5],
@@ -158,7 +158,7 @@ class SparseToDenseTest(test.TestCase):
       dense_without_validation.eval()
 
   def testUnsortedIndicesWithWithoutValidation(self):
-    with self.test_session():
+    with self.cached_session():
       dense = _SparseToDense(
           sparse_indices=[[2], [1]],
           output_size=[5],
diff --git a/tensorflow/python/kernel_tests/sparsemask_op_test.py b/tensorflow/python/kernel_tests/sparsemask_op_test.py
index cf6c9494ae..6f5dd45b61 100644
--- a/tensorflow/python/kernel_tests/sparsemask_op_test.py
+++ b/tensorflow/python/kernel_tests/sparsemask_op_test.py
@@ -34,7 +34,7 @@ class SparseMaskTest(test.TestCase):
     out_values = values[1:, :]
     out_indices = np.array([2, 3, 4], dtype=np.int32)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values_tensor = ops.convert_to_tensor(values)
       indices_tensor = ops.convert_to_tensor(indices)
       mask_indices_tensor = ops.convert_to_tensor(mask_indices)
diff --git a/tensorflow/python/kernel_tests/string_join_op_test.py b/tensorflow/python/kernel_tests/string_join_op_test.py
index ce19333654..e4371ab5b9 100644
--- a/tensorflow/python/kernel_tests/string_join_op_test.py
+++ b/tensorflow/python/kernel_tests/string_join_op_test.py
@@ -28,7 +28,7 @@ class StringJoinOpTest(test.TestCase):
     input1 = "a"
     input2 = [["b"], ["c"]]
 
-    with self.test_session():
+    with self.cached_session():
       output = string_ops.string_join([input0, input1])
       self.assertAllEqual(output.eval(), [b"aa", b"ba"])
 
diff --git a/tensorflow/python/kernel_tests/string_length_op_test.py b/tensorflow/python/kernel_tests/string_length_op_test.py
index 075a3204ad..9f013c2c7e 100644
--- a/tensorflow/python/kernel_tests/string_length_op_test.py
+++ b/tensorflow/python/kernel_tests/string_length_op_test.py
@@ -27,7 +27,7 @@ class StringLengthOpTest(test.TestCase):
   def testStringLength(self):
     strings = [[["1", "12"], ["123", "1234"], ["12345", "123456"]]]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       lengths = string_ops.string_length(strings)
       values = sess.run(lengths)
       self.assertAllEqual(values, [[[1, 2], [3, 4], [5, 6]]])
diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py
index b6a0f45adc..b968e885ed 100644
--- a/tensorflow/python/kernel_tests/string_split_op_test.py
+++ b/tensorflow/python/kernel_tests/string_split_op_test.py
@@ -32,7 +32,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplit(self):
     strings = ["pigs on the wing", "animals"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split(strings)
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
@@ -42,7 +42,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplitEmptyDelimiter(self):
     strings = ["hello", "hola", b"\xF0\x9F\x98\x8E"]  # Last string is U+1F60E
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, delimiter="")
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4],
@@ -60,7 +60,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplitEmptyToken(self):
     strings = ["", " a", "b ", " c", " ", " d ", "  e", "f  ", "  g  ", "  "]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split(strings)
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(
@@ -72,7 +72,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplitOnSetEmptyToken(self):
     strings = ["", " a", "b ", " c", " ", " d ", ". e", "f .", " .g. ", " ."]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, delimiter=" .")
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(
@@ -84,7 +84,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplitWithDelimiter(self):
     strings = ["hello|world", "hello world"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertRaises(
           ValueError, string_ops.string_split, strings, delimiter=["|", ""])
 
@@ -106,7 +106,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplitWithDelimiterTensor(self):
     strings = ["hello|world", "hello world"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       delimiter = array_ops.placeholder(dtypes.string)
 
       tokens = string_ops.string_split(strings, delimiter=delimiter)
@@ -124,7 +124,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplitWithDelimitersTensor(self):
     strings = ["hello.cruel,world", "hello cruel world"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       delimiter = array_ops.placeholder(dtypes.string)
 
       tokens = string_ops.string_split(strings, delimiter=delimiter)
@@ -143,7 +143,7 @@ class StringSplitOpTest(test.TestCase):
   def testStringSplitWithNoSkipEmpty(self):
     strings = ["#a", "b#", "#c#"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, "#", skip_empty=False)
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1],
@@ -152,7 +152,7 @@ class StringSplitOpTest(test.TestCase):
       self.assertAllEqual(values, [b"", b"a", b"b", b"", b"", b"c", b""])
       self.assertAllEqual(shape, [3, 3])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, "#")
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(values, [b"a", b"b", b"c"])
@@ -165,7 +165,7 @@ class StringSplitV2OpTest(test.TestCase):
   def testSplitV2(self):
     strings = ["pigs on the wing", "animals"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings)
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
@@ -180,7 +180,7 @@ class StringSplitV2OpTest(test.TestCase):
     # ['', '', '4', '5', '', '6', '']
     strings = ["1<>2<>3", "<><>4<>5<><>6<>"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, sep="<>")
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(
@@ -198,7 +198,7 @@ class StringSplitV2OpTest(test.TestCase):
     # ['1', '2', '', '3', '']
     strings = ["1,2,3", "4,5,,6,"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, sep=',')
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2],
@@ -215,7 +215,7 @@ class StringSplitV2OpTest(test.TestCase):
     #['1', '2', '3']
     strings = ["1 2 3", "  4  5    6  "]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings)
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2],
@@ -231,7 +231,7 @@ class StringSplitV2OpTest(test.TestCase):
     # ['4', '5,,6,']
     strings = ["1,2,3", "4,5,,6,"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1)
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1],
@@ -247,7 +247,7 @@ class StringSplitV2OpTest(test.TestCase):
     # ['4', '5    6  ']
     strings = ["1 2 3", "  4  5    6  "]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, maxsplit=1)
       indices, values, shape = sess.run(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1],
diff --git a/tensorflow/python/kernel_tests/string_strip_op_test.py b/tensorflow/python/kernel_tests/string_strip_op_test.py
index 30fd477ff4..a96b71490e 100644
--- a/tensorflow/python/kernel_tests/string_strip_op_test.py
+++ b/tensorflow/python/kernel_tests/string_strip_op_test.py
@@ -28,7 +28,7 @@ class StringStripOpTest(test.TestCase):
   def test_string_strip(self):
     strings = ["pigs on the wing", "animals"]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       output = string_ops.string_strip(strings)
       output = sess.run(output)
       self.assertAllEqual(output, [b"pigs on the wing", b"animals"])
@@ -37,7 +37,7 @@ class StringStripOpTest(test.TestCase):
     strings = [["pigs on the wing", "animals"],
                [" hello ", "\n\tworld \r \n"]]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       output = string_ops.string_strip(strings)
       output = sess.run(output)
       self.assertAllEqual(output, [[b"pigs on the wing", b"animals"],
@@ -46,7 +46,7 @@ class StringStripOpTest(test.TestCase):
   def test_string_strip_with_empty_strings(self):
     strings = [" hello ", "", "world ", " \t \r \n "]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       output = string_ops.string_strip(strings)
       output = sess.run(output)
       self.assertAllEqual(output, [b"hello", b"", b"world", b""])
diff --git a/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py b/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py
index 2c6064e64b..9cb0c9d18f 100644
--- a/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py
+++ b/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py
@@ -27,7 +27,7 @@ from tensorflow.python.platform import test
 class StringToHashBucketOpTest(test.TestCase):
 
   def testStringToOneHashBucketFast(self):
-    with self.test_session():
+    with self.cached_session():
       input_string = array_ops.placeholder(dtypes.string)
       output = string_ops.string_to_hash_bucket_fast(input_string, 1)
       result = output.eval(feed_dict={input_string: ['a', 'b', 'c']})
@@ -35,7 +35,7 @@ class StringToHashBucketOpTest(test.TestCase):
       self.assertAllEqual([0, 0, 0], result)
 
   def testStringToHashBucketsFast(self):
-    with self.test_session():
+    with self.cached_session():
       input_string = array_ops.placeholder(dtypes.string)
       output = string_ops.string_to_hash_bucket_fast(input_string, 10)
       result = output.eval(feed_dict={input_string: ['a', 'b', 'c', 'd']})
@@ -47,7 +47,7 @@ class StringToHashBucketOpTest(test.TestCase):
       self.assertAllEqual([9, 2, 2, 5], result)
 
   def testStringToOneHashBucketLegacyHash(self):
-    with self.test_session():
+    with self.cached_session():
       input_string = array_ops.placeholder(dtypes.string)
       output = string_ops.string_to_hash_bucket(input_string, 1)
       result = output.eval(feed_dict={input_string: ['a', 'b', 'c']})
@@ -55,7 +55,7 @@ class StringToHashBucketOpTest(test.TestCase):
       self.assertAllEqual([0, 0, 0], result)
 
   def testStringToHashBucketsLegacyHash(self):
-    with self.test_session():
+    with self.cached_session():
       input_string = array_ops.placeholder(dtypes.string)
       output = string_ops.string_to_hash_bucket(input_string, 10)
       result = output.eval(feed_dict={input_string: ['a', 'b', 'c']})
@@ -66,14 +66,14 @@ class StringToHashBucketOpTest(test.TestCase):
       self.assertAllEqual([8, 0, 7], result)
 
   def testStringToOneHashBucketStrongOneHashBucket(self):
-    with self.test_session():
+    with self.cached_session():
       input_string = constant_op.constant(['a', 'b', 'c'])
       output = string_ops.string_to_hash_bucket_strong(
           input_string, 1, key=[123, 345])
       self.assertAllEqual([0, 0, 0], output.eval())
 
   def testStringToHashBucketsStrong(self):
-    with self.test_session():
+    with self.cached_session():
       input_string = constant_op.constant(['a', 'b', 'c'])
       output = string_ops.string_to_hash_bucket_strong(
           input_string, 10, key=[98765, 132])
@@ -84,7 +84,7 @@ class StringToHashBucketOpTest(test.TestCase):
       self.assertAllEqual([4, 2, 8], output.eval())
 
   def testStringToHashBucketsStrongInvalidKey(self):
-    with self.test_session():
+    with self.cached_session():
       input_string = constant_op.constant(['a', 'b', 'c'])
       with self.assertRaisesOpError('Key must have 2 elements'):
         string_ops.string_to_hash_bucket_strong(
diff --git a/tensorflow/python/kernel_tests/string_to_number_op_test.py b/tensorflow/python/kernel_tests/string_to_number_op_test.py
index cc4c21b66c..99ee25e125 100644
--- a/tensorflow/python/kernel_tests/string_to_number_op_test.py
+++ b/tensorflow/python/kernel_tests/string_to_number_op_test.py
@@ -29,7 +29,7 @@ _ERROR_MESSAGE = "StringToNumberOp could not correctly convert string: "
 class StringToNumberOpTest(test.TestCase):
 
   def _test(self, tf_type, good_pairs, bad_pairs):
-    with self.test_session():
+    with self.cached_session():
       # Build a small testing graph.
       input_string = array_ops.placeholder(dtypes.string)
       output = parsing_ops.string_to_number(
diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py
index 753eac9c62..4d163a0f6f 100644
--- a/tensorflow/python/kernel_tests/substr_op_test.py
+++ b/tensorflow/python/kernel_tests/substr_op_test.py
@@ -35,7 +35,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = b"ell"
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -68,7 +68,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = b"y"
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -90,7 +90,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = [b"ell", b"orl"]
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -118,7 +118,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
                       [b"en", b"en", b"en"]]
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -132,7 +132,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
                       [b"xteen", b"vente", b"hteen"]]
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -147,7 +147,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = [[b"e", b"ev", b"lve"], [b"h", b"te", b"tee"],
                       [b"i", b"te", b"hte"], [b"i", b"en", b"nty"]]
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -158,7 +158,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = [[b"hir", b"en", b"t"], [b"e", b"ur", b"ift"],
                       [b"ee", b"ee", b"ft"]]
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -168,7 +168,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     length = np.array([3, 2, 1], dtype)
     expected_value = [b"hir", b"rt", b"n"]
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -187,7 +187,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array(7, dtype)
     length = np.array(3, dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
@@ -205,7 +205,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array(4, dtype)
     length = np.array(1, dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
@@ -214,7 +214,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array(-4, dtype)
     length = np.array(1, dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
@@ -224,7 +224,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 3]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
@@ -243,7 +243,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array([1, 2, 4], dtype)
     length = np.array([1, 2, 3], dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
@@ -294,7 +294,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     self._testMismatchPosLenShapes(dtype)
 
   def testWrongDtype(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(TypeError):
         string_ops.substr(b"test", 3.0, 1)
       with self.assertRaises(TypeError):
diff --git a/tensorflow/python/kernel_tests/summary_ops_test.py b/tensorflow/python/kernel_tests/summary_ops_test.py
index 2da7107f61..0c500120b0 100644
--- a/tensorflow/python/kernel_tests/summary_ops_test.py
+++ b/tensorflow/python/kernel_tests/summary_ops_test.py
@@ -34,7 +34,7 @@ class SummaryOpsTest(test.TestCase):
     return summ
 
   def testScalarSummary(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = constant_op.constant([10.0, 20.0])
       summ = logging_ops.scalar_summary(["c1", "c2"], const, name="mysumm")
       value = sess.run(summ)
@@ -45,7 +45,7 @@ class SummaryOpsTest(test.TestCase):
       """, self._AsSummary(value))
 
   def testScalarSummaryDefaultName(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = constant_op.constant([10.0, 20.0])
       summ = logging_ops.scalar_summary(["c1", "c2"], const)
       value = sess.run(summ)
@@ -56,7 +56,7 @@ class SummaryOpsTest(test.TestCase):
       """, self._AsSummary(value))
 
   def testMergeSummary(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = constant_op.constant(10.0)
       summ1 = summary.histogram("h", const)
       summ2 = logging_ops.scalar_summary("c", const)
diff --git a/tensorflow/python/kernel_tests/summary_tensor_op_test.py b/tensorflow/python/kernel_tests/summary_tensor_op_test.py
index d534aadb79..0f4643393a 100644
--- a/tensorflow/python/kernel_tests/summary_tensor_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_tensor_op_test.py
@@ -42,7 +42,7 @@ class SummaryOpsTest(test.TestCase):
     self.assertTrue(np.array_equal(actual, expected))
 
   def testTags(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       c = constant_op.constant(1)
       s1 = summary_ops.tensor_summary("s1", c)
       with ops.name_scope("foo"):
@@ -65,7 +65,7 @@ class SummaryOpsTest(test.TestCase):
     self.assertEqual(v4.tag, "foo/zod/TensorSummary")
 
   def testScalarSummary(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = constant_op.constant(10.0)
       summ = summary_ops.tensor_summary("foo", const)
       result = sess.run(summ)
@@ -76,7 +76,7 @@ class SummaryOpsTest(test.TestCase):
 
   def testStringSummary(self):
     s = six.b("foobar")
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = constant_op.constant(s)
       summ = summary_ops.tensor_summary("foo", const)
       result = sess.run(summ)
@@ -86,7 +86,7 @@ class SummaryOpsTest(test.TestCase):
     self._AssertNumpyEq(n, s)
 
   def testManyScalarSummary(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = array_ops.ones([5, 5, 5])
       summ = summary_ops.tensor_summary("foo", const)
       result = sess.run(summ)
@@ -96,7 +96,7 @@ class SummaryOpsTest(test.TestCase):
 
   def testManyStringSummary(self):
     strings = [[six.b("foo bar"), six.b("baz")], [six.b("zoink"), six.b("zod")]]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = constant_op.constant(strings)
       summ = summary_ops.tensor_summary("foo", const)
       result = sess.run(summ)
@@ -106,7 +106,7 @@ class SummaryOpsTest(test.TestCase):
 
   def testManyBools(self):
     bools = [True, True, True, False, False, False]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       const = constant_op.constant(bools)
       summ = summary_ops.tensor_summary("foo", const)
       result = sess.run(summ)
@@ -116,7 +116,7 @@ class SummaryOpsTest(test.TestCase):
     self._AssertNumpyEq(n, bools)
 
   def testSummaryDescriptionAndDisplayName(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
 
       def get_description(summary_op):
         summ_str = sess.run(summary_op)
diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py
index 8ad29afd0a..d8d76440f1 100644
--- a/tensorflow/python/kernel_tests/tensordot_op_test.py
+++ b/tensorflow/python/kernel_tests/tensordot_op_test.py
@@ -48,7 +48,7 @@ class TensordotTest(test_lib.TestCase):
     with self.assertRaises(ValueError):
       math_ops.tensordot(a, b, (a_axes, b_axes))
     # Invalid dynamic shapes.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "Matrix size-incompatible"):
         a_ph = array_ops.placeholder(dtypes.float32)
@@ -80,7 +80,7 @@ class TensordotTest(test_lib.TestCase):
     output = math_ops.tensordot(a_ph, b_ph, axes_ph)
     # Note: We don't support scalar Tensor values for axes.
     for axes_value in 1, [1], [0, 1], [[1]], [[0, 1]], [[0], [7]]:
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         with self.assertRaises(errors_impl.InvalidArgumentError):
           _ = sess.run(
               [output], feed_dict={
@@ -92,7 +92,7 @@ class TensordotTest(test_lib.TestCase):
   # Test case for 11950
   def test_valid_axis(self):
     for axes_value in [1, 2], [[1], [2]], [[], []], 0:
-      with self.test_session() as sess:
+      with self.cached_session():
         np_a = np.ones((3, 3))
         np_b = np.array([2, 3, 1])[None, None]
         np_ans = np.tensordot(np_a, np_b, axes_value)
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index 290200ce45..f42800226e 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -451,13 +451,13 @@ class TransposeTest(test.TestCase):
         array_ops.transpose(array_ops.placeholder(dtypes.int32)).get_shape())
 
   def testNullTensor(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant([], dtype=dtypes.float32, shape=[1, 4, 0])
       xt = array_ops.transpose(x, [0, 2, 1]).eval()
       self.assertAllEqual(xt.shape, (1, 0, 4))
 
   def _testError(self, x, p, err):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError(err):
         array_ops.transpose(x, p).eval()
 
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index bbc040dc13..316570e13e 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -30,7 +30,7 @@ class UniqueTest(test.TestCase):
 
   def testInt32(self):
     x = np.random.randint(2, high=10, size=7000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx = array_ops.unique(x)
       tf_y, tf_idx = sess.run([y, idx])
 
@@ -41,7 +41,7 @@ class UniqueTest(test.TestCase):
 
   def testInt32OutIdxInt64(self):
     x = np.random.randint(2, high=10, size=7000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx = array_ops.unique(x, out_idx=dtypes.int64)
       tf_y, tf_idx = sess.run([y, idx])
 
@@ -53,7 +53,7 @@ class UniqueTest(test.TestCase):
   def testString(self):
     indx = np.random.randint(65, high=122, size=7000)
     x = [chr(i) for i in indx]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx = array_ops.unique(x)
       tf_y, tf_idx = sess.run([y, idx])
 
@@ -65,7 +65,7 @@ class UniqueTest(test.TestCase):
   def testInt32Axis(self):
     for dtype in [np.int32, np.int64]:
       x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         y0, idx0 = gen_array_ops.unique_v2(x, axis=np.array([0], dtype))
         tf_y0, tf_idx0 = sess.run([y0, idx0])
         y1, idx1 = gen_array_ops.unique_v2(x, axis=np.array([1], dtype))
@@ -79,7 +79,7 @@ class UniqueTest(test.TestCase):
     # This test is only temporary, once V2 is used
     # by default, the axis will be wrapped to allow `axis=None`.
     x = np.random.randint(2, high=10, size=7000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx = gen_array_ops.unique_v2(x, axis=np.array([], np.int32))
       tf_y, tf_idx = sess.run([y, idx])
 
@@ -93,7 +93,7 @@ class UniqueWithCountsTest(test.TestCase):
 
   def testInt32(self):
     x = np.random.randint(2, high=10, size=7000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx, count = array_ops.unique_with_counts(x)
       tf_y, tf_idx, tf_count = sess.run([y, idx, count])
 
@@ -106,7 +106,7 @@ class UniqueWithCountsTest(test.TestCase):
 
   def testInt32OutIdxInt64(self):
     x = np.random.randint(2, high=10, size=7000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx, count = array_ops.unique_with_counts(x, out_idx=dtypes.int64)
       tf_y, tf_idx, tf_count = sess.run([y, idx, count])
 
@@ -121,7 +121,7 @@ class UniqueWithCountsTest(test.TestCase):
     indx = np.random.randint(65, high=122, size=7000)
     x = [chr(i) for i in indx]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx, count = array_ops.unique_with_counts(x)
       tf_y, tf_idx, tf_count = sess.run([y, idx, count])
 
@@ -136,7 +136,7 @@ class UniqueWithCountsTest(test.TestCase):
   def testInt32Axis(self):
     for dtype in [np.int32, np.int64]:
       x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         y0, idx0, count0 = gen_array_ops.unique_with_counts_v2(
             x, axis=np.array([0], dtype))
         tf_y0, tf_idx0, tf_count0 = sess.run([y0, idx0, count0])
@@ -154,7 +154,7 @@ class UniqueWithCountsTest(test.TestCase):
     # This test is only temporary, once V2 is used
     # by default, the axis will be wrapped to allow `axis=None`.
     x = np.random.randint(2, high=10, size=7000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       y, idx, count = gen_array_ops.unique_with_counts_v2(
           x, axis=np.array([], np.int32))
       tf_y, tf_idx, tf_count = sess.run([y, idx, count])
diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py
index 1ee6e0866a..b373c419b6 100644
--- a/tensorflow/python/kernel_tests/unstack_op_test.py
+++ b/tensorflow/python/kernel_tests/unstack_op_test.py
@@ -99,7 +99,7 @@ class UnstackOpTest(test.TestCase):
           self.assertLess(err, 1e-6)
 
   def testInferNum(self):
-    with self.test_session():
+    with self.cached_session():
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         x = array_ops.placeholder(np.float32, shape=shape)
         cs = array_ops.unstack(x)
@@ -131,13 +131,13 @@ class UnstackOpTest(test.TestCase):
       for j in range(-i, i):
         expected = np_split_squeeze(a, j)
 
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           actual_unstack = sess.run(array_ops.unstack(a, axis=j))
 
         self.assertAllEqual(expected, actual_unstack)
 
   def testAxis0Default(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a = constant_op.constant([[1, 2, 3], [4, 5, 6]], name='a')
       unstacked = sess.run(array_ops.unstack(a))
 
@@ -156,7 +156,7 @@ class UnstackOpTest(test.TestCase):
       array_ops.unstack(a, axis=-3)
 
   def testZeroLengthDim(self):
-    with self.test_session():
+    with self.cached_session():
       x = array_ops.zeros(shape=(0, 1, 2))
       y = array_ops.unstack(x, axis=1)[0].eval()
       self.assertEqual(y.shape, (0, 2))
diff --git a/tensorflow/python/kernel_tests/variable_ops_test.py b/tensorflow/python/kernel_tests/variable_ops_test.py
index cf369c0718..3d2f8b6155 100644
--- a/tensorflow/python/kernel_tests/variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/variable_ops_test.py
@@ -118,7 +118,7 @@ class VariableOpTest(test.TestCase):
     self.assertEqual(tensor_shape.unknown_shape(), assigned.get_shape())
 
   def testAssignNoShape(self):
-    with self.test_session():
+    with self.cached_session():
       value = self._NewShapelessTensor()
       var = state_ops.variable_op([1, 2], dtypes.float32, set_shape=False)
       self.assertEqual(tensor_shape.unknown_shape(), var.get_shape())
@@ -126,7 +126,7 @@ class VariableOpTest(test.TestCase):
                        state_ops.assign(var, value).get_shape())
 
   def testAssignNoShapeNoValidateShape(self):
-    with self.test_session():
+    with self.cached_session():
       value = self._NewShapelessTensor()
       var = state_ops.variable_op([1, 2], dtypes.float32, set_shape=False)
       self.assertEqual(tensor_shape.unknown_shape(), var.get_shape())
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index d57b79cb90..401e1ae102 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -113,7 +113,7 @@ class VariableScopeTest(test.TestCase):
         self.assertEqual(w.constraint, constraint)
 
   def testStringDefaultInitializer(self):
-    with self.test_session():
+    with self.cached_session():
       v = variable_scope.get_variable("string", shape=[], dtype=dtypes.string)
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(compat.as_bytes(v.eval()), b"")
@@ -263,7 +263,7 @@ class VariableScopeTest(test.TestCase):
 
   # TODO(alive): support variable partitioning/caching in eager mode.
   def testVarScopeCachingDevice(self):
-    with self.test_session():
+    with self.cached_session():
       caching_device = "/job:moo"
       with variable_scope.variable_scope("tower"):
         with variable_scope.variable_scope(
@@ -367,7 +367,7 @@ class VariableScopeTest(test.TestCase):
       variable_scope.get_variable("s", initializer=init, dtype=dtypes.float64)
 
   def testControlDeps(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v0 = variable_scope.get_variable(
           "v0", [1], initializer=init_ops.constant_initializer(0))
       with ops.control_dependencies([v0.value()]):
@@ -403,7 +403,7 @@ class VariableScopeTest(test.TestCase):
       variable_scope._DEFAULT_USE_RESOURCE = old
 
   def testControlFlow(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v0 = variable_scope.get_variable(
           "v0", [], initializer=init_ops.constant_initializer(0))
       var_dict = {}
@@ -513,7 +513,7 @@ class VariableScopeTest(test.TestCase):
           self.assertEqual(sc2, "testVarScopeNameScope3/scope2/")
 
   def testVarScopeOriginalNameScope(self):
-    with self.test_session():
+    with self.cached_session():
       with ops.name_scope("scope1"):
         with variable_scope.variable_scope("tower") as tower:
           self.assertEqual(tower.original_name_scope, "scope1/tower/")
@@ -536,7 +536,7 @@ class VariableScopeTest(test.TestCase):
               self.assertEqual(sc3, "scope1/tower/bar_1/")
 
   def testVarScopeObjectReuse(self):
-    with self.test_session():
+    with self.cached_session():
       vs = None
       with variable_scope.variable_scope("jump", reuse=True) as scope:
         vs = scope
@@ -563,7 +563,7 @@ class VariableScopeTest(test.TestCase):
         self.assertFalse(jump_no_reuse.reuse)
 
   def testVarScopeGetOrCreateReuse(self):
-    with self.test_session():
+    with self.cached_session():
 
       def test_value(value):
         x = constant_op.constant(value)
@@ -582,7 +582,7 @@ class VariableScopeTest(test.TestCase):
       test_value(17.)
 
   def testVarOpScope(self):
-    with self.test_session():
+    with self.cached_session():
       with ops.name_scope("testVarOpScope1"):
         with variable_scope.variable_scope("tower", "default", []):
           self.assertEqual(
@@ -608,7 +608,7 @@ class VariableScopeTest(test.TestCase):
             self.assertEqual(sc2, "testVarOpScope2/default_1/testVarOpScope2/")
 
   def testVarOpScopeUniqueNamesInterleavedSubstringScopes(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope(None, "defaultScope1"):
         with variable_scope.variable_scope(None, "layer"):
           self.assertEqual(
@@ -631,7 +631,7 @@ class VariableScopeTest(test.TestCase):
               "defaultScope1_2/layer/w:0")
 
   def testVarOpScopeUniqueNamesWithJump(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("default") as default:
         with variable_scope.variable_scope(None, "layer"):
           self.assertEqual(
@@ -647,7 +647,7 @@ class VariableScopeTest(test.TestCase):
               variable_scope.get_variable("w", []).name, "default/layer_2/w:0")
 
   def testVarOpScopeReuse(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("outer") as outer:
         with variable_scope.variable_scope("tower", "default", []):
           self.assertEqual(
@@ -673,7 +673,7 @@ class VariableScopeTest(test.TestCase):
             self.assertEqual(sc2, "outer_1/default/scope2/")
 
   def testVarScopeGetVar(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("root"):
         with variable_scope.variable_scope("towerA") as tower_a:
           va = variable_scope.get_variable("v", [1])
@@ -719,7 +719,7 @@ class VariableScopeTest(test.TestCase):
         self.assertEqual("dtype" in str(exc.exception), True)
 
   def testVarScopeOuterScope(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("outer") as outer:
         pass
       with variable_scope.variable_scope(outer):
@@ -743,7 +743,7 @@ class VariableScopeTest(test.TestCase):
             self.assertEqual(sc2, "outer_2/default/scope2/")
 
   def testVarScopeNestedOuterScope(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("outer") as outer:
         with variable_scope.variable_scope(outer):
           self.assertEqual(
@@ -768,7 +768,7 @@ class VariableScopeTest(test.TestCase):
             self.assertEqual(sc2, "outer/default_1/scope2/")
 
   def testVarOpScopeReuseParam(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("outer") as outer:
         with variable_scope.variable_scope("tower", "default", []):
           self.assertEqual(
@@ -795,14 +795,14 @@ class VariableScopeTest(test.TestCase):
             self.assertEqual(sc2, "outer_1/default/scope2/")
 
   def testVarOpScopeReuseError(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         with variable_scope.variable_scope(None, "default", reuse=True):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "outer/tower/w:0")
 
   def testVarOpScopeOuterScope(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("outer") as outer:
         pass
       with variable_scope.variable_scope(outer, "default", []):
@@ -827,7 +827,7 @@ class VariableScopeTest(test.TestCase):
             self.assertEqual(sc2, "outer_2/default/scope2/")
 
   def testVarOpScopeNestedOuterScope(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("outer") as outer:
         with variable_scope.variable_scope(outer, "default", []):
           self.assertEqual(
@@ -851,7 +851,7 @@ class VariableScopeTest(test.TestCase):
             self.assertEqual(sc2, "outer_1/default/scope2/")
 
   def testBasicWhenAuxiliaryNameScopeIsFalse(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope(
           "scope", auxiliary_name_scope=False) as scope:
         self.assertEqual(scope.original_name_scope, "")
@@ -886,7 +886,7 @@ class VariableScopeTest(test.TestCase):
               constant_op.constant([], name="c").name, "outer/inner/c:0")
 
   def testCreatedByDefaultNameWhenAuxiliaryNameScopeIsFalse(self):
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope(
           None, default_name="default", auxiliary_name_scope=False) as scope:
         self.assertEqual(scope.original_name_scope, "")
@@ -910,7 +910,7 @@ class VariableScopeTest(test.TestCase):
               constant_op.constant([], name="c").name, "outer/default/c:0")
 
   def testReenterRootScopeWhenAuxiliaryNameScopeIsFalse(self):
-    with self.test_session():
+    with self.cached_session():
       root_scope = variable_scope.get_variable_scope()
       with variable_scope.variable_scope(
           root_scope, auxiliary_name_scope=False) as scope:
@@ -927,7 +927,7 @@ class VariableScopeTest(test.TestCase):
               constant_op.constant([], name="c1").name, "outer/c1:0")
 
   def testAuxiliaryNameScopeIsInvalid(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
         with variable_scope.variable_scope(
             None, default_name="scope", auxiliary_name_scope="invalid"):
@@ -947,7 +947,7 @@ class VariableScopeTest(test.TestCase):
 
   def testReuseScopeWithoutNameScopeCollision(self):
     # Github issue: #13429
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope("outer"):
         with variable_scope.variable_scope("inner") as inner:
           pass
@@ -1021,7 +1021,7 @@ class VariableScopeTest(test.TestCase):
     self.assertEqual(varname_type[1], ("y", dtypes.int64))
 
   def testGetCollection(self):
-    with self.test_session():
+    with self.cached_session():
       _ = variable_scope.get_variable("testGetCollection_a", [])
       _ = variable_scope.get_variable(
           "testGetCollection_b", [], trainable=False)
@@ -1075,7 +1075,7 @@ class VariableScopeTest(test.TestCase):
       ])
 
   def testGetTrainableVariablesWithGetVariable(self):
-    with self.test_session():
+    with self.cached_session():
       _ = variable_scope.get_variable("testGetTrainableVariables_a", [])
       with variable_scope.variable_scope(
           "testGetTrainableVariables_foo") as scope:
@@ -1111,7 +1111,7 @@ class VariableScopeTest(test.TestCase):
             trainable=True)
 
   def testGetTrainableVariablesWithVariable(self):
-    with self.test_session():
+    with self.cached_session():
       _ = variable_scope.variable(1.0, name="testGetTrainableVariables_a")
       with variable_scope.variable_scope(
           "testGetTrainableVariables_foo") as scope:
@@ -1150,7 +1150,7 @@ class VariableScopeTest(test.TestCase):
             trainable=True)
 
   def testGetGlobalVariables(self):
-    with self.test_session():
+    with self.cached_session():
       _ = variable_scope.get_variable("testGetGlobalVariables_a", [])
       with variable_scope.variable_scope("testGetGlobalVariables_foo") as scope:
         _ = variable_scope.get_variable("testGetGlobalVariables_b", [])
@@ -1160,7 +1160,7 @@ class VariableScopeTest(test.TestCase):
              "testGetGlobalVariables_b:0"])
 
   def testGetLocalVariables(self):
-    with self.test_session():
+    with self.cached_session():
       _ = variable_scope.get_variable(
           "a", [], collections=[ops.GraphKeys.LOCAL_VARIABLES])
       with variable_scope.variable_scope("foo") as scope:
@@ -1396,7 +1396,7 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
     self.assertEqual("scope/v/0:0", true_vars[0].name)
     self.assertEqual("scope/v/1:0", true_vars[1].name)
     self.assertEqual("custom_getter/add:0", v.name)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       np_vars, np_v = sess.run([true_vars, v])
       self.assertAllClose(np_v, sum(np_vars))
@@ -1436,7 +1436,7 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
     self.assertEqual(template % (1, 1, 0), true_vars[6].name)
     self.assertEqual(template % (1, 1, 1), true_vars[7].name)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       np_vars, np_v = sess.run([true_vars, v])
       # take products of sums of products
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 2b9c62ad6f..2e7975667c 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -42,7 +42,7 @@ from tensorflow.python.util import compat
 class VariablesTestCase(test.TestCase):
 
   def testInitialization(self):
-    with self.test_session():
+    with self.cached_session():
       var0 = variables.Variable(0.0)
       self.assertEqual("Variable:0", var0.name)
       self.assertEqual("Variable", var0._shared_name)
@@ -69,7 +69,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose(1.1, var1.eval())
 
   def testInitializationOrder(self):
-    with self.test_session():
+    with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([3, 6]), name="rnd")
       self.assertEqual("rnd:0", rnd.name)
       self.assertEqual([3, 6], rnd.get_shape())
@@ -106,7 +106,7 @@ class VariablesTestCase(test.TestCase):
         pass
 
   def testAssignments(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(0.0)
       plus_one = var.assign_add(1.0)
       minus_one = var.assign_sub(2.0)
@@ -142,7 +142,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose(4.0, var.eval())
 
   def testZeroSizeStringAssign(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       array = variables.Variable(
           initial_value=array_ops.zeros((0,), dtype=dtypes.string),
           name="foo",
@@ -154,7 +154,7 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual([], list(sess.run(copy_op)))
 
   def _countUpToTest(self, dtype):
-    with self.test_session():
+    with self.cached_session():
       zero = constant_op.constant(0, dtype=dtype)
       var = variables.Variable(zero)
       count_up_to = var.count_up_to(3)
@@ -186,7 +186,7 @@ class VariablesTestCase(test.TestCase):
     self._countUpToTest(dtypes.int64)
 
   def testControlDepsNone(self):
-    with self.test_session():
+    with self.cached_session():
       c = constant_op.constant(1.0)
       with ops.control_dependencies([c]):
         # d get the control dep.
@@ -199,7 +199,7 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual([], var_x._ref().op.control_inputs)  # pylint: disable=protected-access
 
   def testControlFlow(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       v0 = variables.Variable(0, name="v0")
       var_dict = {}
 
@@ -248,7 +248,7 @@ class VariablesTestCase(test.TestCase):
       control_flow_ops.while_loop(cond, body, [0, 0])
 
   def testUseVariableAsTensor(self):
-    with self.test_session():
+    with self.cached_session():
       var_x = variables.Variable(2.0)
       var_y = variables.Variable(3.0)
       variables.global_variables_initializer().run()
@@ -257,7 +257,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose(5.0, math_ops.add(var_x, var_y).eval())
 
   def testZeroSizeVarSameAsConst(self):
-    with self.test_session():
+    with self.cached_session():
       zero_size_var = variables.Variable(array_ops.zeros([0, 2]))
       zero_size_const = array_ops.ones([2, 0])
       variable_mul = math_ops.matmul(zero_size_const, zero_size_var)
@@ -269,7 +269,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose([[0., 0.], [0., 0.]], variable_output)
 
   def testCachingDevice(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(2.0)
       self.assertEqual(var.device, var.value().device)
       self.assertEqual(var.device, var.initialized_value().device)
@@ -279,7 +279,7 @@ class VariablesTestCase(test.TestCase):
       self.assertTrue(var_cached.value().device.startswith("/job:foo"))
 
   def testCollections(self):
-    with self.test_session():
+    with self.cached_session():
       var_x = variables.Variable(2.0)
       var_y = variables.Variable(2.0, trainable=False)
       var_z = variables.Variable(2.0, trainable=True)
@@ -294,7 +294,7 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual([var_x, var_z, var_t], variables.trainable_variables())
 
   def testCollectionsWithScope(self):
-    with self.test_session():
+    with self.cached_session():
       with ops.name_scope("scope_1"):
         var_x = variables.Variable(2.0)
       with ops.name_scope("scope_2"):
@@ -309,7 +309,7 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual([var_y], variables.trainable_variables("scope_2"))
 
   def testOperators(self):
-    with self.test_session():
+    with self.cached_session():
       var_f = variables.Variable([2.0])
       add = var_f + 0.0
       radd = 1.0 + var_f
@@ -382,13 +382,13 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose([[20.0, 30.0], [40.0, 60.0]], rmatmul.eval())
 
   def testSession(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       var = variables.Variable([1, 12])
       variables.global_variables_initializer().run()
       self.assertAllClose([1, 12], sess.run(var))
 
   def testDevicePlacement(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with ops.device("/cpu:0"):
         var = variables.Variable([1, 12])
       init_value = var.initialized_value()
@@ -408,7 +408,7 @@ class VariablesTestCase(test.TestCase):
   def testInitializerFunction(self):
     value = [[-42], [133.7]]
     shape = [2, 1]
-    with self.test_session():
+    with self.cached_session():
       initializer = lambda: constant_op.constant(value)
 
       v1 = variables.Variable(initializer, dtype=dtypes.float32)
@@ -443,7 +443,7 @@ class VariablesTestCase(test.TestCase):
           constraint=constraint)
 
   def testNoRefDataRace(self):
-    with self.test_session():
+    with self.cached_session():
       a = variables.Variable([1, 2, 3], dtype=dtypes.float32)
       b = variables.Variable(a.initialized_value() + 2)
       c = variables.Variable(b.initialized_value() + 2)
@@ -453,7 +453,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllEqual(c.eval(), [5, 6, 7])
 
   def testInitializerFunctionDevicePlacement(self):
-    with self.test_session():
+    with self.cached_session():
       initializer = lambda: constant_op.constant(42.0)
       with ops.device("/cpu:100"):
         v1 = variables.Variable(initializer, dtype=dtypes.float32, name="v1")
@@ -471,11 +471,11 @@ class VariablesTestCase(test.TestCase):
         self.assertEqual(expected_group_v2, i.op.colocation_groups())
 
   def testVariableDefInitializedInstances(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       v_def = variables.Variable(
           initial_value=constant_op.constant(3.0)).to_proto()
 
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       # v describes a VariableDef-based variable without an initial value.
       v = variables.Variable(variable_def=v_def)
       self.assertEqual(3.0, sess.run(v.initialized_value()))
@@ -486,7 +486,7 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual(1.0, v.initialized_value().eval())
 
     v_def.ClearField("initial_value_name")
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       # Restoring a legacy VariableDef proto that does not have
       # initial_value_name set should still work.
       v = variables.Variable(variable_def=v_def)
@@ -514,7 +514,7 @@ class VariablesTestCase(test.TestCase):
           .trainable)
 
   def testLoad(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(np.zeros((5, 5), np.float32))
       variables.global_variables_initializer().run()
       var.load(np.ones((5, 5), np.float32))
@@ -540,12 +540,12 @@ class VariablesTestCase(test.TestCase):
 class IsInitializedTest(test.TestCase):
 
   def testNoVars(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       uninited = variables.report_uninitialized_variables()
       self.assertEqual(0, sess.run(uninited).size)
 
   def testAssertVariablesInitialized(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.Variable([1, 2], name="v")
       w = variables.Variable([3, 4], name="w")
       _ = v, w
@@ -555,7 +555,7 @@ class IsInitializedTest(test.TestCase):
       self.assertEqual(0, sess.run(uninited).size)
 
   def testVariableList(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.Variable([1, 2], name="v")
       w = variables.Variable([3, 4], name="w")
       uninited = variables.report_uninitialized_variables()
@@ -566,14 +566,14 @@ class IsInitializedTest(test.TestCase):
       self.assertEqual(0, sess.run(uninited).size)
 
   def testZeroSizeVarInitialized(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.Variable(array_ops.zeros([0, 2]), name="v")
       uninited = variables.report_uninitialized_variables()
       v.initializer.run()  # not strictly necessary
       self.assertEqual(0, sess.run(uninited).size)
 
   def testTrainingWithZeroSizeVar(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       a = variables.Variable(array_ops.zeros([0, 2]))
       b = variables.Variable(array_ops.ones([2, 2]))
       objective = math_ops.reduce_sum(b + math_ops.matmul(
@@ -592,7 +592,7 @@ class ObsoleteIsInitializedTest(test.TestCase):
       self.assertEqual(None, variables.assert_variables_initialized())
 
   def testVariables(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.Variable([1, 2])
       w = variables.Variable([3, 4])
       _ = v, w
@@ -603,7 +603,7 @@ class ObsoleteIsInitializedTest(test.TestCase):
       sess.run(inited)
 
   def testVariableList(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.Variable([1, 2])
       w = variables.Variable([3, 4])
       inited = variables.assert_variables_initialized([v])
diff --git a/tensorflow/python/kernel_tests/weights_broadcast_test.py b/tensorflow/python/kernel_tests/weights_broadcast_test.py
index eda2856e0b..85f9abc69f 100644
--- a/tensorflow/python/kernel_tests/weights_broadcast_test.py
+++ b/tensorflow/python/kernel_tests/weights_broadcast_test.py
@@ -44,7 +44,7 @@ class AssertBroadcastableTest(test.TestCase):
     values_placeholder = array_ops.placeholder(dtypes_lib.float32)
     dynamic_op = weights_broadcast_ops.assert_broadcastable(
         weights=weights_placeholder, values=values_placeholder)
-    with self.test_session():
+    with self.cached_session():
       static_op.run()
       dynamic_op.run(feed_dict={
           weights_placeholder: weights,
@@ -100,7 +100,7 @@ class AssertBroadcastableTest(test.TestCase):
     values_placeholder = array_ops.placeholder(dtypes_lib.float32)
     dynamic_op = weights_broadcast_ops.assert_broadcastable(
         weights=weights_placeholder, values=values_placeholder)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.OpError, error_msg):
         dynamic_op.run(feed_dict={
             weights_placeholder: weights,
@@ -157,7 +157,7 @@ class BroadcastWeightsTest(test.TestCase):
     values_placeholder = array_ops.placeholder(dtypes_lib.float32)
     dynamic_op = weights_broadcast_ops.broadcast_weights(
         weights=weights_placeholder, values=values_placeholder)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected, static_op.eval())
       self.assertAllEqual(expected, dynamic_op.eval(feed_dict={
           weights_placeholder: weights,
@@ -227,7 +227,7 @@ class BroadcastWeightsTest(test.TestCase):
     values_placeholder = array_ops.placeholder(dtypes_lib.float32)
     dynamic_op = weights_broadcast_ops.broadcast_weights(
         weights=weights_placeholder, values=values_placeholder)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(errors_impl.OpError, error_msg):
         dynamic_op.eval(feed_dict={
             weights_placeholder: weights,
diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py
index 60c726d54c..729885169e 100644
--- a/tensorflow/python/kernel_tests/xent_op_test.py
+++ b/tensorflow/python/kernel_tests/xent_op_test.py
@@ -153,13 +153,13 @@ class XentTest(test.TestCase):
       self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
 
   def testShapeMismatch(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         gen_nn_ops.softmax_cross_entropy_with_logits(
             [[0., 1.], [2., 3.]], [[0., 1., 0.], [1., 0., 0.]])
 
   def testNotMatrix(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         gen_nn_ops.softmax_cross_entropy_with_logits([0., 1., 2., 3.],
                                                      [0., 1., 0., 1.])
@@ -180,7 +180,7 @@ class XentTest(test.TestCase):
         np.array([[0., 0., 0., 1.], [0., .5, .5, 0.]]).astype(np.float64))
 
   def testGradient(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       l = constant_op.constant(
           [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5],
           shape=[3, 4],
@@ -207,7 +207,7 @@ class XentTest(test.TestCase):
     self.assertLess(err, 5e-8)
 
   def testGradientLabelWithV2(self):
-    with self.test_session():
+    with self.cached_session():
       l = constant_op.constant(
           [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5],
           shape=[3, 4],
@@ -225,7 +225,7 @@ class XentTest(test.TestCase):
     self.assertLess(err, 5e-8)
 
   def testSecondGradient(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       l = constant_op.constant(
           [
               0.0, 0.0, 1.0 / 3, 0.0, 1.0 / 3, 0.0, 0.0, 0.0, 0.0, 0.5 / 3, 0.0,
-- 
GitLab


From 7f3938deb393f7688cd364b630afdd9338460299 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 00:33:03 -0700
Subject: [PATCH 0121/1357] [TF] Update strings to run on device:CPU

/cpu is an old style and can be misleading for new people trying to specify other devices.
Also correct comparison in tensorflow/python/client/timeline_test.py

PiperOrigin-RevId: 212769480
---
 tensorflow/python/client/timeline_test.py |  2 +-
 tensorflow/python/framework/test_util.py  | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py
index c046e9cfd4..03effde098 100644
--- a/tensorflow/python/client/timeline_test.py
+++ b/tensorflow/python/client/timeline_test.py
@@ -161,7 +161,7 @@ class TimelineTest(test.TestCase):
     cpu_max = maximums[
         'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname]
     # At least num1 + num2, both float32s (4 bytes each)
-    self.assertGreater(cpu_max.num_bytes, 8)
+    self.assertGreaterEqual(cpu_max.num_bytes, 8)
     self.assertGreater(cpu_max.timestamp, 0)
     self.assertTrue('num1' in cpu_max.tensors or 'num1/read' in cpu_max.tensors)
     self.assertTrue('num2' in cpu_max.tensors or 'num2/read' in cpu_max.tensors)
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 1cc3bb4628..b7398238f5 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -779,7 +779,7 @@ def run_in_graph_and_eager_modes(func=None,
 
       def run_eagerly(self, **kwargs):
         if not use_gpu:
-          with ops.device("/cpu:0"):
+          with ops.device("/device:CPU:0"):
             f(self, **kwargs)
         else:
           f(self, **kwargs)
@@ -1839,7 +1839,7 @@ class TensorFlowTestCase(googletest.TestCase):
         elif use_gpu:
           yield sess
         else:
-          with sess.graph.device("/cpu:0"):
+          with sess.graph.device("/device:CPU:0"):
             yield sess
 
   def _create_session(self, graph, config, force_gpu):
@@ -1854,12 +1854,18 @@ class TensorFlowTestCase(googletest.TestCase):
       Returns:
         A config_pb2.ConfigProto object.
       """
+      # TODO(b/114333779): Enforce allow_soft_placement=False when
+      # use_gpu=False. Currently many tests rely on the fact that any device
+      # will be used even when a specific device is supposed to be used.
+      allow_soft_placement = not force_gpu
       if config is None:
         config = config_pb2.ConfigProto()
-        config.allow_soft_placement = not force_gpu
+        config.allow_soft_placement = allow_soft_placement
         config.gpu_options.per_process_gpu_memory_fraction = 0.3
-      elif force_gpu and config.allow_soft_placement:
-        config = config_pb2.ConfigProto().CopyFrom(config)
+      elif not allow_soft_placement and config.allow_soft_placement:
+        config_copy = config_pb2.ConfigProto()
+        config_copy.CopyFrom(config)
+        config = config_copy
         config.allow_soft_placement = False
       # Don't perform optimizations for tests so we don't inadvertently run
       # gpu ops on cpu
-- 
GitLab


From ce9b23070638094022036656e5d1fbf3e23b74c6 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 11:24:37 +0300
Subject: [PATCH 0122/1357] Add forgotten ignite_byte_swapper.h

---
 .../ignite/kernels/ignite_byte_swapper.h      | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
new file mode 100644
index 0000000000..986bedcf69
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -0,0 +1,129 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
+
+#include <stdint.h>
+
+namespace tensorflow {
+
+class ByteSwapper {
+ public:
+  ByteSwapper(bool big_endian) {
+    int x = 1;
+    bool is_little_endian = (*(char *)&x == 1);
+    swap_ = big_endian == is_little_endian;
+  }
+
+  inline void SwapIfRequiredInt16(int16_t *x) const {
+    if (swap_) {
+      Swap16(x);
+    }
+  }
+
+  inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const {
+    if (swap_) {
+      Swap16(reinterpret_cast<int16_t*>(x));
+    }
+  }
+
+  inline void SwapIfRequiredInt32(int32_t *x) const {
+    if (swap_) {
+      Swap32(x);
+    }
+  }
+
+  inline void SwapIfRequiredFloat(float *x) const {
+    if (swap_) {
+      Swap32(reinterpret_cast<int32_t*>(x));
+    }
+  }
+
+  inline void SwapIfRequiredInt64(int64_t *x) const {
+    if (swap_) {
+      Swap64(x);
+    }
+  }
+
+  inline void SwapIfRequiredDouble(double *x) const {
+    if (swap_) {
+      Swap64(reinterpret_cast<int64_t*>(x));
+    }
+  }
+
+  inline void SwapIfRequiredInt16Arr(int16_t *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) Swap16(&x[i]);
+    }
+  }
+
+  inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x,
+                                             int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) 
+        Swap16(reinterpret_cast<int16_t*>(&x[i]));
+    }
+  }
+
+  inline void SwapIfRequiredInt32Arr(int32_t *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) Swap32(&x[i]);
+    }
+  }
+
+  inline void SwapIfRequiredFloatArr(float *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) 
+        Swap32(reinterpret_cast<int32_t*>(&x[i]));
+    }
+  }
+
+  inline void SwapIfRequiredInt64Arr(int64_t *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) Swap64(&x[i]);
+    }
+  }
+
+  inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) 
+        Swap64(reinterpret_cast<int64_t*>(&x[i]));
+    }
+  }
+
+ private:
+  inline void Swap16(int16_t *x) const {
+    *x = ((*x & 0xFF) << 8) | ((*x >> 8) & 0xFF);
+  }
+
+  inline void Swap32(int32_t *x) const {
+    *x = ((*x & 0xFF) << 24) | (((*x >> 8) & 0xFF) << 16) |
+         (((*x >> 16) & 0xFF) << 8) | ((*x >> 24) & 0xFF);
+  }
+
+  inline void Swap64(int64_t *x) const {
+    *x = ((*x & 0xFF) << 56) | (((*x >> 8) & 0xFF) << 48) |
+         (((*x >> 16) & 0xFF) << 40) | (((*x >> 24) & 0xFF) << 32) |
+         (((*x >> 32) & 0xFF) << 24) | (((*x >> 40) & 0xFF) << 16) |
+         (((*x >> 48) & 0xFF) << 8) | ((*x >> 56) & 0xFF);
+  }
+
+  bool swap_;
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
-- 
GitLab


From 567de999ae29a2cfb30132f82178006fe5688d6b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 01:35:35 -0700
Subject: [PATCH 0123/1357] Change test to use 2 CPU devices instead of GPU.

General cleanup: testDeviceInAndOutOfCond uses a GPU in a CPU only test build resulting in all operations run on the same device even though the graph is for multiple devices.

PiperOrigin-RevId: 212775360
---
 tensorflow/python/kernel_tests/cond_v2_test.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 18a1b230a0..a1efecf28a 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -892,11 +892,13 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
   def testDeviceInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.test_session(
+          graph=g, config=config_pb2.ConfigProto(device_count={"CPU": 2})):
+
         def fn2():
-          with ops.device("/device:GPU:0"):
+          with ops.device("/device:CPU:1"):
             c = constant_op.constant(3.0)
-            self.assertEqual("/device:GPU:0", c.op.device)
+            self.assertEqual("/device:CPU:1", c.op.device)
             return c
 
         with ops.device("/device:CPU:0"):
-- 
GitLab


From c1de96776067f96da55f8d4709fe5a3c50cccd4b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 01:55:22 -0700
Subject: [PATCH 0124/1357] Use remote builds for the XLA GPU presubmit with
 gcc/nvcc.

PiperOrigin-RevId: 212776966
---
 third_party/toolchains/BUILD | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
index ec1006fe23..4303751452 100644
--- a/third_party/toolchains/BUILD
+++ b/third_party/toolchains/BUILD
@@ -20,3 +20,18 @@ platform(
             value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:495a025ed5e273cfa5d53357ef93ac20500c008994e0be106c509f51555fb93c"
         }""",
 )
+
+platform(
+    name = "rbe_cuda9.0-cudnn7-ubuntu14.04",
+    constraint_values = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//tools/cpp:clang",
+        "@bazel_toolchains//constraints:xenial",
+    ],
+    remote_execution_properties = """
+        properties: {
+            name: "container-image"
+            value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:ae58329b961e7c17d89725bf8fd72dfbd5850f4f3313de58e0cafbf5b0343735"
+        }""",
+)
-- 
GitLab


From da02a441f4a96ddb47579a52fbbf50d501d72b53 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 02:02:50 -0700
Subject: [PATCH 0125/1357] compat: Update forward compatibility horizon to
 2018-09-13

PiperOrigin-RevId: 212777606
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 550017653a..1a1ed04e0d 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 12)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 13)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From d797e99a043e01609583a37c04e1e509d126e1a0 Mon Sep 17 00:00:00 2001
From: dmitrievanthony <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 09:42:16 +0000
Subject: [PATCH 0126/1357] Fix windows build.

---
 .../contrib/ignite/kernels/ignite_plain_client_windows.cc      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 9cd08a7779..17f2bf45d1 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #pragma comment(lib, "Mswsock.lib")
 #pragma comment(lib, "AdvApi32.lib")
 
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -58,7 +59,7 @@ Status PlainClient::Connect() {
                     &result);
   if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res);
 
-  auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); });
+  auto clean = gtl::MakeCleanup([result] { freeaddrinfo(result); });
 
   for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
     sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
-- 
GitLab


From c8b60b894b91cfdb4176176d7dcf328d2b40b41f Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 16:34:59 +0300
Subject: [PATCH 0127/1357] Fix code style.

---
 .../ignite/kernels/ignite_byte_swapper.h       | 18 +++++++++---------
 .../ignite/kernels/ignite_dataset_ops.cc       |  2 +-
 .../kernels/ignite_plain_client_windows.cc     |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 986bedcf69..5b42de4c5a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -36,7 +36,7 @@ class ByteSwapper {
 
   inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const {
     if (swap_) {
-      Swap16(reinterpret_cast<int16_t*>(x));
+      Swap16(reinterpret_cast<int16_t *>(x));
     }
   }
 
@@ -48,7 +48,7 @@ class ByteSwapper {
 
   inline void SwapIfRequiredFloat(float *x) const {
     if (swap_) {
-      Swap32(reinterpret_cast<int32_t*>(x));
+      Swap32(reinterpret_cast<int32_t *>(x));
     }
   }
 
@@ -60,7 +60,7 @@ class ByteSwapper {
 
   inline void SwapIfRequiredDouble(double *x) const {
     if (swap_) {
-      Swap64(reinterpret_cast<int64_t*>(x));
+      Swap64(reinterpret_cast<int64_t *>(x));
     }
   }
 
@@ -73,8 +73,8 @@ class ByteSwapper {
   inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x,
                                              int32_t length) const {
     if (swap_) {
-      for (int32_t i = 0; i < length; i++) 
-        Swap16(reinterpret_cast<int16_t*>(&x[i]));
+      for (int32_t i = 0; i < length; i++)
+        Swap16(reinterpret_cast<int16_t *>(&x[i]));
     }
   }
 
@@ -86,8 +86,8 @@ class ByteSwapper {
 
   inline void SwapIfRequiredFloatArr(float *x, int32_t length) const {
     if (swap_) {
-      for (int32_t i = 0; i < length; i++) 
-        Swap32(reinterpret_cast<int32_t*>(&x[i]));
+      for (int32_t i = 0; i < length; i++)
+        Swap32(reinterpret_cast<int32_t *>(&x[i]));
     }
   }
 
@@ -99,8 +99,8 @@ class ByteSwapper {
 
   inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const {
     if (swap_) {
-      for (int32_t i = 0; i < length; i++) 
-        Swap64(reinterpret_cast<int64_t*>(&x[i]));
+      for (int32_t i = 0; i < length; i++)
+        Swap64(reinterpret_cast<int64_t *>(&x[i]));
     }
   }
 
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index eeb29ef30b..e48fce4ed2 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
 #include <stdlib.h>
 #include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 17f2bf45d1..43d6108c34 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #pragma comment(lib, "Mswsock.lib")
 #pragma comment(lib, "AdvApi32.lib")
 
-#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
-- 
GitLab


From 5f28bab20d303e9f815bbe8611c24b7f751e6f9e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 06:44:19 -0700
Subject: [PATCH 0128/1357] Avoid excessive cpu<->gpu memory swaps, compute
 shape ops on the CPU. This results in +10% perf improvement for tensor2tensor
 Transformer model training step times, and +37% perf improvement for
 tensor2tensor Transformer model decoding.

PiperOrigin-RevId: 212804933
---
 tensorflow/python/ops/math_ops.py | 34 ++++++++++++++++---------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index acd5a32e82..7c59232e40 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2903,22 +2903,24 @@ def tensordot(a, b, axes, name=None):
         free_dims_static = None
       shape_a = array_ops.shape(a)
       rank_a = array_ops.rank(a)
-      axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
-      axes = cast(axes >= 0, dtypes.int32) * axes + cast(
-          axes < 0, dtypes.int32) * (
-              axes + rank_a)
-      free, _ = array_ops.setdiff1d(range(rank_a), axes)
-      free_dims = array_ops.gather(shape_a, free)
-      axes_dims = array_ops.gather(shape_a, axes)
-      prod_free_dims = reduce_prod(free_dims)
-      prod_axes_dims = reduce_prod(axes_dims)
-      perm = array_ops.concat([axes_dims, free_dims], 0)
-      if flipped:
-        perm = array_ops.concat([axes, free], 0)
-        new_shape = array_ops.stack([prod_axes_dims, prod_free_dims])
-      else:
-        perm = array_ops.concat([free, axes], 0)
-        new_shape = array_ops.stack([prod_free_dims, prod_axes_dims])
+      # TODO(b/115583659): Automate this.
+      with ops.device("/cpu:0"):
+        axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
+        axes = cast(axes >= 0, dtypes.int32) * axes + cast(
+            axes < 0, dtypes.int32) * (
+                axes + rank_a)
+        free, _ = array_ops.setdiff1d(range(rank_a), axes)
+        free_dims = array_ops.gather(shape_a, free)
+        axes_dims = array_ops.gather(shape_a, axes)
+        prod_free_dims = reduce_prod(free_dims)
+        prod_axes_dims = reduce_prod(axes_dims)
+        perm = array_ops.concat([axes_dims, free_dims], 0)
+        if flipped:
+          perm = array_ops.concat([axes, free], 0)
+          new_shape = array_ops.stack([prod_axes_dims, prod_free_dims])
+        else:
+          perm = array_ops.concat([free, axes], 0)
+          new_shape = array_ops.stack([prod_free_dims, prod_axes_dims])
       reshaped_a = array_ops.reshape(array_ops.transpose(a, perm), new_shape)
       return reshaped_a, free_dims, free_dims_static
 
-- 
GitLab


From 46aa7cf45c62d193f56f55d7d2ffc5baf7af3b65 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Thu, 13 Sep 2018 06:52:12 -0700
Subject: [PATCH 0129/1357] Replace iter->second with partition_graph in
 DirectSession::Run

This loop uses an iterator. It takes references to iter->first and iter->second right at the top of the loop and uses these references throughout, except for this line, which I've fixed.

PiperOrigin-RevId: 212805731
---
 tensorflow/core/common_runtime/direct_session.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index eb388202fa..b4d8e285bd 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -1228,7 +1228,7 @@ Status DirectSession::CreateExecutors(
       }
     };
 
-    optimizer.Optimize(lib, options_.env, device, &iter->second,
+    optimizer.Optimize(lib, options_.env, device, &partition_graph,
                        /*shape_map=*/nullptr);
 
     // TensorFlow Debugger (tfdbg) inserts debug nodes in the graph.
-- 
GitLab


From 226cc7c47e2df8682b384aef5c54836948caecb3 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 13 Sep 2018 07:26:18 -0700
Subject: [PATCH 0130/1357] Allow unsupported comparison operators to be passed
 through and scale back the coverage of overloads. It's up for discussion
 whether we allow overloading everything or let the users rely on the existing
 operator overloading mechanisms instead. The one case that we do want to
 support is the equality operator.

PiperOrigin-RevId: 212809447
---
 .../converters/logical_expressions.py         | 21 ++++++++++----
 .../converters/logical_expressions_test.py    | 10 +++----
 tensorflow/python/autograph/utils/__init__.py |  2 --
 .../autograph/utils/multiple_dispatch.py      | 10 -------
 .../autograph/utils/multiple_dispatch_test.py | 29 -------------------
 5 files changed, 19 insertions(+), 53 deletions(-)

diff --git a/tensorflow/python/autograph/converters/logical_expressions.py b/tensorflow/python/autograph/converters/logical_expressions.py
index ac42ee2c33..8c4d53f9a8 100644
--- a/tensorflow/python/autograph/converters/logical_expressions.py
+++ b/tensorflow/python/autograph/converters/logical_expressions.py
@@ -57,8 +57,6 @@ class LogicalExpressionTransformer(converter.Base):
         gast.NotEq: 'tf.not_equal',
         gast.Or: 'tf.logical_or',
         gast.USub: 'tf.negative',
-        gast.Is: 'ag__.utils.dynamic_is',
-        gast.IsNot: 'ag__.utils.dynamic_is_not'
     }
 
   def _expect_simple_symbol(self, operand):
@@ -72,12 +70,13 @@ class LogicalExpressionTransformer(converter.Base):
         '"a.x or b"; for a workaround, assign the expression to a local '
         'variable and use that instead, for example "tmp = a.x", "tmp or b"')
 
+  def _has_matching_func(self, operator):
+    op_type = type(operator)
+    return op_type in self.op_mapping
+
   def _matching_func(self, operator):
     op_type = type(operator)
-    mapped_op = self.op_mapping.get(op_type)
-    if not mapped_op:
-      raise NotImplementedError('operator %s is not yet supported' % op_type)
-    return mapped_op
+    return self.op_mapping[op_type]
 
   def _as_function(self, func_name, args):
     template = """
@@ -90,6 +89,16 @@ class LogicalExpressionTransformer(converter.Base):
 
   def visit_Compare(self, node):
     node = self.generic_visit(node)
+
+    if not all(self._has_matching_func(op) for op in node.ops):
+      if len(node.ops) == 1:
+        # Basic expressions are safe to leave as they are.
+        return node
+      else:
+        raise NotImplementedError(
+            'compound expression with at least one unsupported '
+            'operator: {}'.format(node.ops))
+
     ops_and_comps = list(zip(node.ops, node.comparators))
     left = node.left
     op_tree = None
diff --git a/tensorflow/python/autograph/converters/logical_expressions_test.py b/tensorflow/python/autograph/converters/logical_expressions_test.py
index 5fb3fb992f..b78b4d3a6a 100644
--- a/tensorflow/python/autograph/converters/logical_expressions_test.py
+++ b/tensorflow/python/autograph/converters/logical_expressions_test.py
@@ -47,14 +47,12 @@ class GradientsFunctionTest(converter_testing.TestCase):
       with self.cached_session() as sess:
         self.assertTrue(sess.run(result.test_fn(True, False, True)))
 
-  def test_ag_utils_lookup(self):
+  def test_unsupported_ops(self):
     def test_fn(a, b):
-      return a is b or a is not b
+      return a in b
 
-    with self.converted(test_fn, logical_expressions, {}, math_ops.logical_or
-                       ) as result:
-      with self.cached_session() as sess:
-        self.assertTrue(sess.run(result.test_fn(True, False)))
+    with self.converted(test_fn, logical_expressions, {}) as result:
+      self.assertTrue(result.test_fn('a', ('a',)))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/utils/__init__.py b/tensorflow/python/autograph/utils/__init__.py
index e38c82a079..c781958481 100644
--- a/tensorflow/python/autograph/utils/__init__.py
+++ b/tensorflow/python/autograph/utils/__init__.py
@@ -20,8 +20,6 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.utils.context_managers import control_dependency_on_returns
 from tensorflow.python.autograph.utils.misc import alias_tensors
-from tensorflow.python.autograph.utils.multiple_dispatch import dynamic_is
-from tensorflow.python.autograph.utils.multiple_dispatch import dynamic_is_not
 from tensorflow.python.autograph.utils.multiple_dispatch import run_cond
 from tensorflow.python.autograph.utils.py_func import wrap_py_func
 from tensorflow.python.autograph.utils.tensor_list import dynamic_list_append
diff --git a/tensorflow/python/autograph/utils/multiple_dispatch.py b/tensorflow/python/autograph/utils/multiple_dispatch.py
index 33f521db2c..107c8f7a68 100644
--- a/tensorflow/python/autograph/utils/multiple_dispatch.py
+++ b/tensorflow/python/autograph/utils/multiple_dispatch.py
@@ -22,16 +22,6 @@ from tensorflow.python.autograph.utils.type_check import is_tensor
 from tensorflow.python.ops import control_flow_ops
 
 
-def dynamic_is(left, right):
-  # TODO(alexbw) if we're sure we should leave 'is' in place,
-  # then change the semantics in converters/logical_expressions.py
-  return left is right
-
-
-def dynamic_is_not(left, right):
-  return left is not right
-
-
 def run_cond(condition, true_fn, false_fn):
   """Type-dependent functional conditional.
 
diff --git a/tensorflow/python/autograph/utils/multiple_dispatch_test.py b/tensorflow/python/autograph/utils/multiple_dispatch_test.py
index ed20822529..2a77c895ce 100644
--- a/tensorflow/python/autograph/utils/multiple_dispatch_test.py
+++ b/tensorflow/python/autograph/utils/multiple_dispatch_test.py
@@ -18,8 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
 from tensorflow.python.autograph.utils import multiple_dispatch
 from tensorflow.python.client.session import Session
 from tensorflow.python.framework.constant_op import constant
@@ -28,33 +26,6 @@ from tensorflow.python.platform import test
 
 class MultipleDispatchTest(test.TestCase):
 
-  def test_dynamic_is_python(self):
-    a = np.eye(3)
-    also_a = a
-    not_actually_a = np.eye(3)
-    should_be_true1 = multiple_dispatch.dynamic_is(a, also_a)
-    should_be_false1 = multiple_dispatch.dynamic_is_not(a, also_a)
-    should_be_true2 = multiple_dispatch.dynamic_is_not(a, not_actually_a)
-    should_be_false2 = multiple_dispatch.dynamic_is(a, not_actually_a)
-    self.assertTrue(should_be_true1)
-    self.assertTrue(should_be_true2)
-    self.assertFalse(should_be_false1)
-    self.assertFalse(should_be_false2)
-
-  def test_dynamic_is_tf(self):
-    with Session().as_default():
-      a = constant([2.0])
-      also_a = a
-      not_actually_a = constant([2.0])
-      should_be_true1 = multiple_dispatch.dynamic_is(a, also_a)
-      should_be_false1 = multiple_dispatch.dynamic_is_not(a, also_a)
-      should_be_true2 = multiple_dispatch.dynamic_is_not(a, not_actually_a)
-      should_be_false2 = multiple_dispatch.dynamic_is(a, not_actually_a)
-      self.assertTrue(should_be_true1)
-      self.assertTrue(should_be_true2)
-      self.assertFalse(should_be_false1)
-      self.assertFalse(should_be_false2)
-
   def test_run_cond_python(self):
     true_fn = lambda: (2,)
     false_fn = lambda: (3,)
-- 
GitLab


From cd06ad2516cba760d875f77f43f20021e3560036 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 13 Sep 2018 14:33:04 +0000
Subject: [PATCH 0131/1357] Update code owner for S3 file system

Add myself so that I could be assigned for issues and PRs in S3 file
systems.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 CODEOWNERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CODEOWNERS b/CODEOWNERS
index 78f80c8d71..0d208eca77 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -2,6 +2,7 @@
 
 /tenosrflow/core/debug @caisq
 /tensorflow/core/platform/windows/ @mrry
+/tensorflow/core/platform/s3 @yongtang
 /tensorflow/go @asimshankar
 /tensorflow/java/ @asimshankar
 /tensorflow/python/debug @caisq
-- 
GitLab


From f57ea2399e96131d26dedadd901fa852685e23a1 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 13 Sep 2018 14:35:21 +0000
Subject: [PATCH 0132/1357] Update code owner for contrib/{kafka,kinesis}

Add myself so that issues or PRs could be assigned to me.

Note contrib/{kafka,kinesis} might be moved:
https://github.com/tensorflow/community/pull/18

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 CODEOWNERS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CODEOWNERS b/CODEOWNERS
index 0d208eca77..b612bccffb 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -31,9 +31,12 @@
 /tensorflow/contrib/gan/ @joel-shor
 /tensorflow/contrib/graph_editor/ @purpledog
 # NEED OWNER: /tensorflow/contrib/grid_rnn/
+/tensorflow/contrib/hadoop @yongtang
 /tensorflow/contrib/hvx/ @satok16
 /tensorflow/contrib/integrate/ @shoyer
+/tensorflow/contrib/kafka @yongtang
 /tensorflow/contrib/kernel_methods/ @petrosmol
+/tensorflow/contrib/kinesis @yongtang
 /tensorflow/contrib/ios_examples/ @petewarden
 /tensorflow/contrib/labeled_tensor/ @shoyer
 /tensorflow/contrib/layers/ @fchollet @martinwicke
-- 
GitLab


From c513c04aed8790c78c46b78f90ec848555498ce4 Mon Sep 17 00:00:00 2001
From: dmitrievanthony <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 15:13:54 +0000
Subject: [PATCH 0133/1357] Add -DWIN32_LEAN_AND_MEAN option into BUILD.

---
 tensorflow/contrib/ignite/BUILD | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index 2f598b4aed..1adc6c6ccc 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -61,6 +61,9 @@ cc_library(
         "@boringssl//:ssl",
         "@protobuf_archive//:protobuf_headers",
     ],
+    copts = if_windows([
+        "-DWIN32_LEAN_AND_MEAN",
+    ]),
     alwayslink = 1,
 )
 
-- 
GitLab


From 7453b0b1cee3d251106684876bc9d639235f5c4a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 08:09:39 -0700
Subject: [PATCH 0134/1357] Updates TensorFlow landing pages to make
 description and code block widths consistent at all breakpoints.

PiperOrigin-RevId: 212814483
---
 tensorflow/contrib/lite/g3doc/_index.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/g3doc/_index.yaml b/tensorflow/contrib/lite/g3doc/_index.yaml
index 9119e49117..b3f21e21ac 100644
--- a/tensorflow/contrib/lite/g3doc/_index.yaml
+++ b/tensorflow/contrib/lite/g3doc/_index.yaml
@@ -5,7 +5,8 @@ landing_page:
   rows:
   - heading: TensorFlow Lite is a lightweight solution for mobile and embedded devices.
     items:
-    - description: >
+    - classname: devsite-landing-row-50
+      description: >
         TensorFlow Lite is TensorFlow’s lightweight solution for mobile and
         embedded devices. It enables on-device machine learning inference with
         low latency and a small binary size. TensorFlow Lite also supports
@@ -33,7 +34,7 @@ landing_page:
           icon_name: chevron_right
           foreground: theme
           background: grey
-    - code_block: |
+      code_block: |
         <pre class = "prettyprint">
         $ toco --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
                --input_format=TENSORFLOW_GRAPHDEF \
-- 
GitLab


From 8a6c83656a2197309dacba124944c665530dd218 Mon Sep 17 00:00:00 2001
From: "William D. Irons" <wdirons@us.ibm.com>
Date: Thu, 13 Sep 2018 11:19:41 -0400
Subject: [PATCH 0135/1357] Move ppc64le build/test to cuda 9.2

CUDA 9.2 is the first version of cuda that nvidia will support on
Power9 hardware.
The dockerfile is used in the jenkins build of the pp64le whl file
and in the CI/CD test.
---
 tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
index e026edb6bb..0a55b84ac4 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
@@ -1,4 +1,4 @@
-FROM nvidia/cuda-ppc64le:9.0-cudnn7-devel-ubuntu16.04
+FROM nvidia/cuda-ppc64le:9.2-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="William Irons <wdirons@us.ibm.com>"
 
@@ -26,6 +26,8 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 # Configure the build for our CUDA configuration.
 ENV TF_NEED_CUDA 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES 3.0
+ENV TF_CUDA_VERSION 9.2
+ENV CUDA_TOOLKIT_PATH /usr/local/cuda-9.2
 
 # TODO get NCCL 2 in the docker image
 ENV TF_NCCL_VERSION 1
-- 
GitLab


From a4bf3d0935570762e9d60eb917d8f42be7e398b4 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 13 Sep 2018 09:01:27 -0700
Subject: [PATCH 0136/1357] Add HloModuleGroup abstraction. This CL adds
 HloModuleGroup which is a simple container of HLO modules. The module group
 gathers together HLO modules which are built to run concurrently across
 multiple devices. This cl just adds the container class. Later CLs will tie
 this into other parts of XLA including adding HloModuleGroup HLO passes which
 operate on an entire module group.

PiperOrigin-RevId: 212821390
---
 tensorflow/compiler/xla/service/BUILD         |  31 ++++
 tensorflow/compiler/xla/service/hlo.proto     |   7 +
 .../compiler/xla/service/hlo_module_group.cc  |  91 +++++++++++
 .../compiler/xla/service/hlo_module_group.h   |  81 ++++++++++
 .../xla/service/hlo_module_group_test.cc      | 142 ++++++++++++++++++
 .../compiler/xla/service/hlo_module_test.cc   |   1 -
 6 files changed, 352 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_group.cc
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_group.h
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_group_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 17a557ccc3..fb80c78f68 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1146,6 +1146,37 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "hlo_module_group",
+    srcs = ["hlo_module_group.cc"],
+    hdrs = ["hlo_module_group.h"],
+    deps = [
+        ":hlo",
+        ":hlo_proto",
+        "//tensorflow/compiler/xla:statusor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_test(
+    name = "hlo_module_group_test",
+    srcs = ["hlo_module_group_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_matchers",
+        ":hlo_module_group",
+        ":hlo_parser",
+        ":hlo_proto",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "hlo_module_group_metadata",
     srcs = ["hlo_module_group_metadata.cc"],
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 93ec2c9438..b19ec12638 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -309,6 +309,13 @@ message HeapSimulatorTrace {
   bool whole_module_simulation = 2;
 }
 
+// An abstraction representing a set of HLO module built to run concurrently
+// across different devices.
+message HloModuleGroupProto {
+  string name = 1;
+  repeated HloModuleProto hlo_modules = 2;
+}
+
 // Serialization of BufferAssignment.
 message BufferAssignmentProto {
   // Alias represents a source LogicalBuffer, and the buffer location that
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.cc b/tensorflow/compiler/xla/service/hlo_module_group.cc
new file mode 100644
index 0000000000..f9b56ef464
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_group.cc
@@ -0,0 +1,91 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
+
+namespace xla {
+
+HloModuleGroup::HloModuleGroup(absl::string_view name,
+                               std::unique_ptr<HloModule> module)
+    : name_(name) {
+  push_back(std::move(module));
+}
+
+HloModuleGroup::HloModuleGroup(absl::string_view name,
+                               absl::Span<std::unique_ptr<HloModule>> modules)
+    : name_(name) {
+  for (auto& module : modules) {
+    push_back(std::move(module));
+  }
+}
+
+std::vector<std::unique_ptr<HloModule>> HloModuleGroup::ConsumeModules() {
+  std::vector<std::unique_ptr<HloModule>> ret_modules = std::move(modules_);
+
+  // Clear everything so the object state is in a known (empty) state.
+  modules_.clear();
+  module_ptrs_.clear();
+  return ret_modules;
+}
+
+string HloModuleGroup::ToString() const {
+  std::ostringstream s;
+  s << "HloModuleGroup " << name() << "\n\n";
+  for (const HloModule* module : modules()) {
+    s << module->ToString() << "\n";
+  }
+  return s.str();
+}
+
+HloModuleGroupProto HloModuleGroup::ToProto() const {
+  HloModuleGroupProto proto;
+  proto.set_name(name());
+  for (const HloModule* module : modules()) {
+    *proto.add_hlo_modules() = module->ToProto();
+  }
+  return proto;
+}
+
+/* static */ StatusOr<HloModuleGroup> HloModuleGroup::CreateFromProto(
+    const HloModuleGroupProto& proto,
+    absl::Span<const HloModuleConfig> module_configs) {
+  TF_RET_CHECK(!proto.name().empty()) << "Module group name cannot be empty";
+  TF_RET_CHECK(proto.hlo_modules_size() > 0)
+      << "Module group must have at least one HLO module";
+  TF_RET_CHECK(proto.hlo_modules_size() == module_configs.size());
+
+  std::vector<std::unique_ptr<HloModule>> modules;
+  for (int i = 0; i < proto.hlo_modules_size(); ++i) {
+    const HloModuleProto& module_proto = proto.hlo_modules(i);
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<HloModule> module,
+        HloModule::CreateFromProto(module_proto, module_configs[i]));
+    modules.push_back(std::move(module));
+  }
+
+  return HloModuleGroup(proto.name(), absl::MakeSpan(modules));
+}
+
+void HloModuleGroup::push_back(std::unique_ptr<HloModule> module) {
+  modules_.push_back(std::move(module));
+  module_ptrs_.push_back(modules_.back().get());
+}
+
+std::ostream& operator<<(std::ostream& out, const HloModuleGroup& group) {
+  out << group.ToString();
+  return out;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.h b/tensorflow/compiler/xla/service/hlo_module_group.h
new file mode 100644
index 0000000000..7338be8b9c
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_group.h
@@ -0,0 +1,81 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_H_
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace xla {
+
+// An abstraction representing a ordered set of HLO module built to run
+// concurrently across different devices.
+class HloModuleGroup {
+ public:
+  // Construct an empty module group.
+  explicit HloModuleGroup(absl::string_view name) : name_(name) {}
+
+  // Construct a module group containing a single module.
+  HloModuleGroup(absl::string_view name, std::unique_ptr<HloModule> module);
+
+  // Construct a module group containing any number of modules.
+  HloModuleGroup(absl::string_view name,
+                 absl::Span<std::unique_ptr<HloModule>> modules);
+
+  // Returns the modules contained in the group.
+  const std::vector<HloModule*>& modules() const { return module_ptrs_; }
+
+  // Returns a module at a particular index.
+  HloModule& module(int index) const { return *module_ptrs_.at(index); }
+
+  // Add a module to the back of vector of modules in the group.
+  void push_back(std::unique_ptr<HloModule> module);
+
+  // Moves all modules from the group into the returned vector. After this
+  // method runs, the module group will be empty.
+  std::vector<std::unique_ptr<HloModule>> ConsumeModules();
+
+  string name() const { return name_; }
+  string ToString() const;
+
+  // Serialize the module group to/from a proto.
+  HloModuleGroupProto ToProto() const;
+  static StatusOr<HloModuleGroup> CreateFromProto(
+      const HloModuleGroupProto& proto,
+      absl::Span<const HloModuleConfig> module_configs);
+
+ private:
+  string name_;
+
+  // Vector of modules as std::unique_ptrs.
+  std::vector<std::unique_ptr<HloModule>> modules_;
+
+  // Vector of modules as normal pointers. This vector is kept in sync with
+  // modules_ as modules are added to the group with push_back.
+  std::vector<HloModule*> module_ptrs_;
+};
+
+std::ostream& operator<<(std::ostream& out, const HloModuleGroup& group);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_H_
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_test.cc b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
new file mode 100644
index 0000000000..ebf790ba6f
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
@@ -0,0 +1,142 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
+
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+
+namespace {
+
+namespace op = ::xla::testing::opcode_matchers;
+
+class HloModuleGroupTest : public HloTestBase {
+ protected:
+  HloModuleGroupTest() = default;
+};
+
+TEST_F(HloModuleGroupTest, SingleModule) {
+  const string text = R"(
+HloModule simple_module
+
+ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(text));
+  HloModuleGroup group(TestName(), std::move(module));
+
+  EXPECT_EQ(group.modules().size(), 1);
+  EXPECT_THAT(
+      group.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+
+  TF_ASSERT_OK_AND_ASSIGN(HloModuleGroup group_copy,
+                          HloModuleGroup::CreateFromProto(
+                              group.ToProto(), {group.module(0).config()}));
+  EXPECT_EQ(group_copy.modules().size(), 1);
+  EXPECT_THAT(
+      group_copy.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+
+  std::vector<std::unique_ptr<HloModule>> modules = group.ConsumeModules();
+  EXPECT_EQ(modules.size(), 1);
+  EXPECT_EQ(group.modules().size(), 0);
+}
+
+TEST_F(HloModuleGroupTest, MultipleModules) {
+  const string text_0 = R"(
+HloModule module0
+
+ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+)";
+  const string text_1 = R"(
+HloModule module1
+
+ENTRY %entry (a: f32[]) -> f32[] {
+  ROOT %a = f32[] parameter(0)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module_0,
+                          ParseHloString(text_0));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module_1,
+                          ParseHloString(text_1));
+  std::vector<std::unique_ptr<HloModule>> modules;
+  modules.push_back(std::move(module_0));
+  modules.push_back(std::move(module_1));
+  HloModuleGroup group(TestName(), absl::MakeSpan(modules));
+  EXPECT_EQ(group.modules().size(), 2);
+  EXPECT_THAT(
+      group.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+  EXPECT_THAT(group.module(1).entry_computation()->instructions(),
+              ::testing::ElementsAre(op::Parameter()));
+
+  TF_ASSERT_OK_AND_ASSIGN(HloModuleGroup group_copy,
+                          HloModuleGroup::CreateFromProto(
+                              group.ToProto(), {group.module(0).config(),
+                                                group.module(1).config()}));
+  EXPECT_EQ(group_copy.modules().size(), 2);
+}
+
+TEST_F(HloModuleGroupTest, BuildModuleGroupByPushBack) {
+  const string text_0 = R"(
+HloModule module0
+
+ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+)";
+  const string text_1 = R"(
+HloModule module1
+
+ENTRY %entry (a: f32[]) -> f32[] {
+  ROOT %a = f32[] parameter(0)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module_0,
+                          ParseHloString(text_0));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module_1,
+                          ParseHloString(text_1));
+  HloModuleGroup group(TestName());
+  group.push_back(std::move(module_0));
+  group.push_back(std::move(module_1));
+
+  EXPECT_EQ(group.modules().size(), 2);
+  EXPECT_THAT(
+      group.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+  EXPECT_THAT(group.module(1).entry_computation()->instructions(),
+              ::testing::ElementsAre(op::Parameter()));
+}
+
+}  // namespace
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module_test.cc b/tensorflow/compiler/xla/service/hlo_module_test.cc
index 6243943420..39f38b417a 100644
--- a/tensorflow/compiler/xla/service/hlo_module_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_test.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/test.h"
 
-- 
GitLab


From 88a7c5b98fc1ccb56134003ba3dc88a09385c0a7 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Thu, 13 Sep 2018 09:33:24 -0700
Subject: [PATCH 0137/1357] [TF:XLA] Make DataTypeToPrimitiveType work with all
 quantized types supported by TF

PiperOrigin-RevId: 212826065
---
 .../compiler/tf2xla/literal_util_test.cc      | 85 +++++++++++--------
 tensorflow/compiler/tf2xla/type_util.cc       | 11 ++-
 2 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/literal_util_test.cc b/tensorflow/compiler/tf2xla/literal_util_test.cc
index ed452bceeb..15f4c38da2 100644
--- a/tensorflow/compiler/tf2xla/literal_util_test.cc
+++ b/tensorflow/compiler/tf2xla/literal_util_test.cc
@@ -22,48 +22,61 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
+namespace {
 
 TEST(LiteralUtil, LiteralToHostTensor) {
   // int64 literal can only be converted to an int64 host tensor.
-  {
-    std::vector<int64> int64_values = {1, 2, 3};
-    xla::Literal int64_values_literal =
-        xla::LiteralUtil::CreateR1(absl::Span<const int64>(int64_values));
-    Tensor host_tensor;
-    EXPECT_EQ("Cannot convert literal of type S64 to tensor of type int32",
-              LiteralToHostTensor(int64_values_literal, DT_INT32, &host_tensor)
-                  .error_message());
-    EXPECT_EQ("Cannot convert literal of type S64 to tensor of type qint32",
-              LiteralToHostTensor(int64_values_literal, DT_QINT32, &host_tensor)
-                  .error_message());
-    EXPECT_TRUE(
-        LiteralToHostTensor(int64_values_literal, DT_INT64, &host_tensor).ok());
-    test::ExpectTensorEqual<int64>(host_tensor,
-                                   test::AsTensor<int64>(int64_values));
-  }
+  std::vector<int64> int64_values = {1, 2, 3};
+  xla::Literal int64_values_literal =
+      xla::LiteralUtil::CreateR1(absl::Span<const int64>(int64_values));
+  Tensor host_tensor;
+  EXPECT_EQ("Cannot convert literal of type S64 to tensor of type int32",
+            LiteralToHostTensor(int64_values_literal, DT_INT32, &host_tensor)
+                .error_message());
+  EXPECT_EQ("Cannot convert literal of type S64 to tensor of type qint32",
+            LiteralToHostTensor(int64_values_literal, DT_QINT32, &host_tensor)
+                .error_message());
+  EXPECT_TRUE(
+      LiteralToHostTensor(int64_values_literal, DT_INT64, &host_tensor).ok());
+  test::ExpectTensorEqual<int64>(host_tensor,
+                                 test::AsTensor<int64>(int64_values));
+}
+
+template <class T>
+using LiteralUtilTest = ::testing::Test;
+using Types =
+    ::testing::Types<std::pair<int8, qint8>, std::pair<uint8, quint8>,
+                     std::pair<int16, qint16>, std::pair<uint16, quint16>,
+                     std::pair<int32, qint32>>;
+
+TYPED_TEST_CASE(LiteralUtilTest, Types);
+
+TYPED_TEST(LiteralUtilTest, LiteralToQuantizedHostTensor) {
+  using int_type = typename TypeParam::first_type;
+  using qint_type = typename TypeParam::second_type;
 
-  {
-    // Repeat tests with int32.
-    Tensor host_tensor;
-    std::vector<int32> int32_values = {10, 11};
-    xla::Literal int32_values_literal =
-        xla::LiteralUtil::CreateR1(absl::Span<const int32>(int32_values));
-    EXPECT_TRUE(
-        LiteralToHostTensor(int32_values_literal, DT_INT32, &host_tensor).ok());
-    test::ExpectTensorEqual<int32>(host_tensor,
-                                   test::AsTensor<int32>(int32_values));
+  Tensor host_tensor;
+  std::vector<int_type> int_values = {10, 11};
+  xla::Literal int_values_literal =
+      xla::LiteralUtil::CreateR1(absl::Span<const int_type>(int_values));
+  EXPECT_TRUE(LiteralToHostTensor(int_values_literal,
+                                  DataTypeToEnum<int_type>::value, &host_tensor)
+                  .ok());
+  test::ExpectTensorEqual<int_type>(host_tensor,
+                                    test::AsTensor<int_type>(int_values));
 
-    EXPECT_TRUE(
-        LiteralToHostTensor(int32_values_literal, DT_QINT32, &host_tensor)
-            .ok());
-    std::vector<qint32> qint32_values = {10, 11};
-    test::ExpectTensorEqual<qint32>(host_tensor,
-                                    test::AsTensor<qint32>(qint32_values));
+  EXPECT_TRUE(LiteralToHostTensor(int_values_literal,
+                                  DataTypeToEnum<qint_type>::value,
+                                  &host_tensor)
+                  .ok());
+  std::vector<qint_type> qint_values = {10, 11};
+  test::ExpectTensorEqual<qint_type>(host_tensor,
+                                     test::AsTensor<qint_type>(qint_values));
 
-    EXPECT_EQ("Cannot convert literal of type S32 to tensor of type int64",
-              LiteralToHostTensor(int32_values_literal, DT_INT64, &host_tensor)
-                  .error_message());
-  }
+  EXPECT_EQ(
+      error::INVALID_ARGUMENT,
+      LiteralToHostTensor(int_values_literal, DT_INT64, &host_tensor).code());
 }
 
+}  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc
index c969212a1b..d00b137662 100644
--- a/tensorflow/compiler/tf2xla/type_util.cc
+++ b/tensorflow/compiler/tf2xla/type_util.cc
@@ -26,21 +26,26 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) {
       *type = xla::PRED;
       return Status::OK();
     case tensorflow::DT_INT8:
+    case tensorflow::DT_QINT8:
       *type = xla::S8;
       return Status::OK();
     case tensorflow::DT_INT16:
+    case tensorflow::DT_QINT16:
       *type = xla::S16;
       return Status::OK();
     case tensorflow::DT_INT32:
+    case tensorflow::DT_QINT32:
       *type = xla::S32;
       return Status::OK();
     case tensorflow::DT_INT64:
       *type = xla::S64;
       return Status::OK();
     case tensorflow::DT_UINT8:
+    case tensorflow::DT_QUINT8:
       *type = xla::U8;
       return Status::OK();
     case tensorflow::DT_UINT16:
+    case tensorflow::DT_QUINT16:
       *type = xla::U16;
       return Status::OK();
     case tensorflow::DT_UINT32:
@@ -64,12 +69,6 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) {
     case tensorflow::DT_COMPLEX64:
       *type = xla::C64;
       return Status::OK();
-    case tensorflow::DT_QUINT8:
-      *type = xla::U8;
-      return Status::OK();
-    case tensorflow::DT_QINT32:
-      *type = xla::S32;
-      return Status::OK();
     default:
       return errors::InvalidArgument(
           "Unsupported type in DataTypeToPrimitiveType ",
-- 
GitLab


From 5ae1c93473ae690d4a7b9389b1219179cb2504a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 09:35:01 -0700
Subject: [PATCH 0138/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 212826308
---
 .../internal/optimized/optimized_ops.h        | 688 ++++++++++++------
 .../contrib/lite/kernels/internal/types.h     |  42 +-
 2 files changed, 473 insertions(+), 257 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 2c8e8f90e3..baed8f4993 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -260,16 +260,16 @@ inline bool AreSameDims(const Dims<4>& dims1, const Dims<4>& dims2) {
   return true;
 }
 
-inline void AddBiasAndEvalActivationFunction(const float* bias_data,
-                                             const Dims<4>& bias_dims,
-                                             float* array_data,
-                                             const Dims<4>& array_dims,
-                                             float output_activation_min,
-                                             float output_activation_max) {
+inline void AddBiasAndEvalActivationFunction(float output_activation_min,
+                                             float output_activation_max,
+                                             const RuntimeShape& bias_shape,
+                                             const float* bias_data,
+                                             const RuntimeShape& array_shape,
+                                             float* array_data) {
 #ifdef USE_NEON
   gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
-  const int bias_size = FlatSize(bias_dims);
-  const int array_size = FlatSize(array_dims);
+  const int bias_size = bias_shape.FlatSize();
+  const int array_size = array_shape.FlatSize();
   TFLITE_DCHECK_EQ((array_size % bias_size), 0);
   float* array_ptr = array_data;
   float* array_end_ptr = array_ptr + array_size;
@@ -319,8 +319,8 @@ inline void AddBiasAndEvalActivationFunction(const float* bias_data,
   }
 #else  // not NEON
   gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
-  const int bias_size = FlatSize(bias_dims);
-  const int array_size = FlatSize(array_dims);
+  const int bias_size = bias_shape.FlatSize();
+  const int array_size = array_shape.FlatSize();
   TFLITE_DCHECK_EQ((array_size % bias_size), 0);
   for (int array_offset = 0; array_offset < array_size;
        array_offset += bias_size) {
@@ -333,6 +333,19 @@ inline void AddBiasAndEvalActivationFunction(const float* bias_data,
 #endif
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void AddBiasAndEvalActivationFunction(const float* bias_data,
+                                             const Dims<4>& bias_dims,
+                                             float* array_data,
+                                             const Dims<4>& array_dims,
+                                             float output_activation_min,
+                                             float output_activation_max) {
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   DimsToShape(bias_dims), bias_data,
+                                   DimsToShape(array_dims), array_data);
+}
+
 // Note: This to be converted to RuntimeShapes along with Conv.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
@@ -1672,12 +1685,16 @@ inline void ShuffledFullyConnected(
 }
 
 template <typename T>
-inline void ExtractPatchIntoBufferColumn(
-    const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
-    int stride_width, int stride_height, int pad_width, int pad_height,
-    int in_width, int in_height, int in_depth, int single_buffer_length,
-    int buffer_id, const T* in_data, T* conv_buffer_data, uint8 byte_zero) {
+inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w,
+                                         int h, int b, int kheight, int kwidth,
+                                         int stride_width, int stride_height,
+                                         int pad_width, int pad_height,
+                                         int in_width, int in_height,
+                                         int in_depth, int single_buffer_length,
+                                         int buffer_id, const T* in_data,
+                                         T* conv_buffer_data, uint8 zero_byte) {
   gemmlowp::ScopedProfilingLabel label("ExtractPatchIntoBufferColumn");
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
   // This chunk of code reshapes all the inputs corresponding to
   // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
   const int kwidth_times_indepth = kwidth * in_depth;
@@ -1699,7 +1716,7 @@ inline void ExtractPatchIntoBufferColumn(
   const int output_row_offset = (buffer_id * single_buffer_length);
   int out_offset =
       output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
-  int in_offset = Offset(input_dims, 0, iw_start, ih_start, b);
+  int in_offset = Offset(input_shape, b, ih_start, iw_start, 0);
 
   // Express all of the calculations as padding around the input patch.
   const int top_padding = h_offset;
@@ -1713,7 +1730,7 @@ inline void ExtractPatchIntoBufferColumn(
   // patch that are off the edge of the input image.
   if (top_padding > 0) {
     const int top_row_elements = (top_padding * kwidth * in_depth);
-    memset(conv_buffer_data + output_row_offset, byte_zero,
+    memset(conv_buffer_data + output_row_offset, zero_byte,
            (top_row_elements * sizeof(T)));
   }
 
@@ -1730,14 +1747,14 @@ inline void ExtractPatchIntoBufferColumn(
     for (int ih = ih_start; ih < ih_end; ++ih) {
       if (left_padding > 0) {
         const int left_start = (out_offset - (left_padding * in_depth));
-        memset(conv_buffer_data + left_start, byte_zero,
+        memset(conv_buffer_data + left_start, zero_byte,
                (left_padding * in_depth * sizeof(T)));
       }
       memcpy(conv_buffer_data + out_offset, in_data + in_offset,
              single_row_num * sizeof(T));
       if (right_padding > 0) {
         const int right_start = (out_offset + single_row_num);
-        memset(conv_buffer_data + right_start, byte_zero,
+        memset(conv_buffer_data + right_start, zero_byte,
                (right_padding * in_depth * sizeof(T)));
       }
       out_offset += kwidth_times_indepth;
@@ -1752,61 +1769,64 @@ inline void ExtractPatchIntoBufferColumn(
     const int bottom_start =
         output_row_offset +
         ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
-    memset(conv_buffer_data + bottom_start, byte_zero,
+    memset(conv_buffer_data + bottom_start, zero_byte,
            (bottom_row_elements * sizeof(T)));
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template <typename T>
-void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
-                   const Dims<4>& filter_dims, int stride_width,
-                   int stride_height, int dilation_width_factor,
-                   int dilation_height_factor, int pad_width, int pad_height,
-                   const Dims<4>& output_dims, uint8 byte_zero,
-                   T* im2col_data) {
+inline void ExtractPatchIntoBufferColumn(
+    const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
+    int stride_width, int stride_height, int pad_width, int pad_height,
+    int in_width, int in_height, int in_depth, int single_buffer_length,
+    int buffer_id, const T* in_data, T* conv_buffer_data, uint8 zero_byte) {
+  ExtractPatchIntoBufferColumn(
+      DimsToShape(input_dims), w, h, b, kheight, kwidth, stride_width,
+      stride_height, pad_width, pad_height, in_width, in_height, in_depth,
+      single_buffer_length, buffer_id, in_data, conv_buffer_data, zero_byte);
+}
+
+template <typename T>
+void DilatedIm2col(const ConvParams& params, uint8 zero_byte,
+                   const RuntimeShape& input_shape, const T* input_data,
+                   const RuntimeShape& filter_shape,
+                   const RuntimeShape& output_shape, T* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
   // For dilated convolution, the input pixels are not contiguous therefore we
   // can't use the same opitimizations as Im2Col(). Though note this code would
   // work fine for the non-dilated case too (though likely a bit slower).
   gemmlowp::ScopedProfilingLabel label("DilatedIm2col");
   TFLITE_DCHECK(dilation_width_factor != 1 || dilation_height_factor != 1);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
   TFLITE_DCHECK(im2col_data);
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
-  MatchingArraySize(output_dims, 0, filter_dims, 3);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  MatchingDim(output_shape, 3, filter_shape, 0);
 
   // Construct the MxN sized im2col matrix.
   // The rows M, are sub-ordered B x H x W
-  Dims<4> row_dims;
-  row_dims.sizes[0] = output_width;
-  row_dims.sizes[1] = output_height;
-  row_dims.sizes[2] = batches;
-  row_dims.sizes[3] = 1;
-  ComputeStrides(&row_dims);
-
+  const RuntimeShape row_shape({1, batches, output_height, output_width});
   // The columns, N, are sub-ordered Kh x Kw x Din
-  Dims<4> col_dims;
-  col_dims.sizes[0] = input_depth;
-  col_dims.sizes[1] = filter_width;
-  col_dims.sizes[2] = filter_height;
-  col_dims.sizes[3] = 1;
-  ComputeStrides(&col_dims);
-
+  const RuntimeShape col_shape({1, filter_height, filter_width, input_depth});
   // Use dimensions M and N to construct dims for indexing directly into im2col
-  Dims<4> im2col_dims;
-  im2col_dims.sizes[0] = FlatSize(col_dims);
-  im2col_dims.sizes[1] = FlatSize(row_dims);
-  im2col_dims.sizes[2] = 1;
-  im2col_dims.sizes[3] = 1;
-  ComputeStrides(&im2col_dims);
+  const RuntimeShape im2col_shape(
+      {1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
 
   // Loop through the output rows (B x H x W)
   for (int batch = 0; batch < batches; ++batch) {
@@ -1814,7 +1834,7 @@ void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
       for (int out_x = 0; out_x < output_width; ++out_x) {
         // Each im2col row is an output pixel. Arrange the input data in this
         // row in an order we can conveniently multiply with the filter data.
-        int row_offset = Offset(row_dims, out_x, out_y, batch, 0);
+        int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
         const int in_x_origin = (out_x * stride_width) - pad_width;
         const int in_y_origin = (out_y * stride_height) - pad_height;
         // Loop through all the pixels of the filter (Kh x Kw)
@@ -1825,25 +1845,25 @@ void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
             // Loop through all the filter pixels in this row.
             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
               const int in_x = in_x_origin + dilation_width_factor * filter_x;
-              int col_offset = Offset(col_dims, 0, filter_x, filter_y, 0);
+              int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
               T* dst = im2col_data +
-                       Offset(im2col_dims, col_offset, row_offset, 0, 0);
+                       Offset(im2col_shape, 0, 0, row_offset, col_offset);
               if ((in_x >= 0) && (in_x < input_width)) {
                 // Filter pixel is within the input, copy the input data.
                 T const* src =
-                    input_data + Offset(input_dims, 0, in_x, in_y, batch);
+                    input_data + Offset(input_shape, batch, in_y, in_x, 0);
                 memcpy(dst, src, input_depth * sizeof(T));
               } else {
                 // Filter pixel is outside the input, zero it out.
-                memset(dst, byte_zero, input_depth * sizeof(T));
+                memset(dst, zero_byte, input_depth * sizeof(T));
               }
             }
           } else {
             // Filter row is outside the input, zero out the entire filter row.
-            int col_offset = Offset(col_dims, 0, 0, filter_y, 0);
-            T* dst =
-                im2col_data + Offset(im2col_dims, col_offset, row_offset, 0, 0);
-            memset(dst, byte_zero, filter_width * input_depth * sizeof(T));
+            int col_offset = Offset(col_shape, 0, filter_y, 0, 0);
+            T* dst = im2col_data +
+                     Offset(im2col_shape, 0, 0, row_offset, col_offset);
+            memset(dst, zero_byte, filter_width * input_depth * sizeof(T));
           }
         }
       }
@@ -1851,21 +1871,49 @@ void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template <typename T>
-void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
-            int stride_height, int pad_width, int pad_height, int kheight,
-            int kwidth, uint8 byte_zero, T* output_data,
-            const Dims<4>& output_dims) {
+void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
+                   const Dims<4>& filter_dims, int stride_width,
+                   int stride_height, int dilation_width_factor,
+                   int dilation_height_factor, int pad_width, int pad_height,
+                   const Dims<4>& output_dims, uint8 zero_byte,
+                   T* im2col_data) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+
+  DilatedIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), DimsToShape(output_dims),
+                im2col_data);
+}
+
+template <typename T>
+void Im2col(const ConvParams& params, int kheight, int kwidth, uint8 zero_byte,
+            const RuntimeShape& input_shape, const T* input_data,
+            const RuntimeShape& output_shape, T* output_data) {
   gemmlowp::ScopedProfilingLabel label("Im2col");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_height = ArraySize(input_dims, 2);
-  const int output_depth = ArraySize(output_dims, 0);
-  const int output_width = ArraySize(output_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = input_shape.Dims(3);
+  const int input_width = input_shape.Dims(2);
+  const int input_height = input_shape.Dims(1);
+  const int output_depth = output_shape.Dims(3);
+  const int output_width = output_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
 
   int buffer_id = 0;
   // Loop over the output nodes.
@@ -1873,93 +1921,155 @@ void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
     for (int h = 0; h < output_height; ++h) {
       for (int w = 0; w < output_width; ++w) {
         ExtractPatchIntoBufferColumn(
-            input_dims, w, h, b, kheight, kwidth, stride_width, stride_height,
+            input_shape, w, h, b, kheight, kwidth, stride_width, stride_height,
             pad_width, pad_height, input_width, input_height, input_depth,
-            output_depth, buffer_id, input_data, output_data, byte_zero);
+            output_depth, buffer_id, input_data, output_data, zero_byte);
         ++buffer_id;
       }
     }
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template <typename T>
+void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
+            int stride_height, int pad_width, int pad_height, int kheight,
+            int kwidth, uint8 zero_byte, T* output_data,
+            const Dims<4>& output_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+
+  Im2col(op_params, kheight, kwidth, zero_byte, DimsToShape(input_dims),
+         input_data, DimsToShape(output_dims), output_data);
+}
+
 // legacy, for compatibility with old checked-in code
 template <typename T>
 void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
             int pad_width, int pad_height, int kheight, int kwidth,
-            uint8 byte_zero, T* output_data, const Dims<4>& output_dims) {
+            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
   Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
-         kwidth, byte_zero, output_data, output_dims);
+         kwidth, zero_byte, output_data, output_dims);
 }
 
-inline void Conv(const float* input_data, const Dims<4>& input_dims,
-                 const float* filter_data, const Dims<4>& filter_dims,
-                 const float* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 float output_activation_min, float output_activation_max,
-                 float* output_data, const Dims<4>& output_dims,
-                 float* im2col_data, const Dims<4>& im2col_dims) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& filter_shape,
+                 const float* filter_data, const RuntimeShape& bias_shape,
+                 const float* bias_data, const RuntimeShape& output_shape,
+                 float* output_data, const RuntimeShape& im2col_shape,
+                 float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
   (void)im2col_data;
-  (void)im2col_dims;
+  (void)im2col_shape;
   gemmlowp::ScopedProfilingLabel label("Conv");
 
   // NB: static_cast<float>(0x00000000h) == 0.0f
   const uint8 float_zero_byte = 0x00;
   const float* gemm_input_data = nullptr;
-  const Dims<4>* gemm_input_dims = nullptr;
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+  const RuntimeShape* gemm_input_shape = nullptr;
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
   const bool need_dilated_im2col =
       dilation_width_factor != 1 || dilation_height_factor != 1;
   const bool need_im2col = stride_width != 1 || stride_height != 1 ||
                            filter_width != 1 || filter_height != 1;
   if (need_dilated_im2col) {
-    DilatedIm2col(input_data, input_dims, filter_dims, stride_width,
-                  stride_height, dilation_width_factor, dilation_height_factor,
-                  pad_width, pad_height, output_dims, float_zero_byte,
-                  im2col_data);
+    DilatedIm2col(params, float_zero_byte, input_shape, input_data,
+                  filter_shape, output_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else if (need_im2col) {
     TFLITE_DCHECK(im2col_data);
-    Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
-           pad_height, filter_height, filter_width, float_zero_byte,
-           im2col_data, im2col_dims);
+    Im2col(params, filter_height, filter_width, float_zero_byte, input_shape,
+           input_data, im2col_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else {
     // TODO(aselle): We need to make sure to not send im2col if it is not
     // needed.
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    gemm_input_dims = &input_dims;
+    gemm_input_shape = &input_shape;
   }
 
   const auto im2col_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims);
+      MapAsMatrixWithLastDimAsRows(gemm_input_data, *gemm_input_shape);
   const auto filter_matrix_map =
-      MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
+      MapAsMatrixWithFirstDimAsCols(filter_data, filter_shape);
   auto output_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+      MapAsMatrixWithLastDimAsRows(output_data, output_shape);
 
   Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
 
-  AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data,
-                                   output_dims, output_activation_min,
-                                   output_activation_max);
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   bias_shape, bias_data, output_shape,
+                                   output_data);
 }
 
-inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
-                       const int8_t* filter_data, const Dims<4>& filter_dims,
-                       const float* bias_data, const Dims<4>& bias_dims,
-                       int stride_width, int stride_height, int pad_width,
-                       int pad_height, float* scaling_factors_ptr,
-                       float output_activation_min, float output_activation_max,
-                       float* output_data, const Dims<4>& output_dims,
-                       int8_t* im2col_data, const Dims<4>& im2col_dims) {
-  const int batch_size = input_dims.sizes[3];
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Conv(const float* input_data, const Dims<4>& input_dims,
+                 const float* filter_data, const Dims<4>& filter_dims,
+                 const float* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 float output_activation_min, float output_activation_max,
+                 float* output_data, const Dims<4>& output_dims,
+                 float* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
+                       const RuntimeShape& input_shape,
+                       const int8_t* input_data,
+                       const RuntimeShape& filter_shape,
+                       const int8_t* filter_data,
+                       const RuntimeShape& bias_shape, const float* bias_data,
+                       const RuntimeShape& output_shape, float* output_data,
+                       const RuntimeShape& im2col_shape, int8_t* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4);
+
+  const int batch_size = input_shape.Dims(0);
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
 
   const int8_t* gemm_input_data = nullptr;
   int num_input;
@@ -1970,25 +2080,22 @@ inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
     TFLITE_DCHECK(im2col_data);
     // symmetric quantization assumes zero point of 0.
     const int input_zero_point = 0;
-    Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
-           pad_height, filter_height, filter_width, input_zero_point,
-           im2col_data, im2col_dims);
+
+    Im2col(params, filter_height, filter_width, input_zero_point, input_shape,
+           input_data, im2col_shape, im2col_data);
     gemm_input_data = im2col_data;
-    num_input = im2col_dims.sizes[0] * im2col_dims.sizes[1] *
-                im2col_dims.sizes[2] * im2col_dims.sizes[3];
+    num_input = im2col_shape.FlatSize();
   } else {
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    num_input = input_dims.sizes[0] * input_dims.sizes[1] *
-                input_dims.sizes[2] * input_dims.sizes[3];
+    num_input = input_shape.FlatSize();
   }
 
   // Flatten 4D matrices into 2D matrices for matrix multiplication.
 
   // Flatten so that each filter has its own row.
-  const int filter_rows = filter_dims.sizes[3];
-  const int filter_cols =
-      filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2];
+  const int filter_rows = filter_shape.Dims(0);
+  const int filter_cols = FlatSizeSkipDim(filter_shape, 0);
 
   // In MatrixBatchVectorMultiplyAccumulate, each output value is the
   // dot product of one row of the first matrix with one row of the second
@@ -1998,15 +2105,14 @@ inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
   const int gemm_input_cols = filter_cols;
   const int gemm_input_rows = num_input / gemm_input_cols;
 
-  const int output_cols = output_dims.sizes[0];
-  const int output_rows =
-      output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3];
+  const int output_cols = output_shape.Dims(3);
+  const int output_rows = FlatSizeSkipDim(output_shape, 3);
   TFLITE_DCHECK_EQ(output_cols, filter_rows);
   TFLITE_DCHECK_EQ(output_rows, gemm_input_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_cols);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(3), output_cols);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(2), 1);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(1), 1);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(0), 1);
 
   // MatrixBatchVectorMultiplyAccumulate assumes that each row of the second
   // input matrix has its own scale factor. This code duplicates the scale
@@ -2023,11 +2129,39 @@ inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
       scaling_factors_ptr, /*n_batch=*/gemm_input_rows, output_data,
       /*result_stride=*/1);
 
-  AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data,
-                                   output_dims, output_activation_min,
-                                   output_activation_max);
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   bias_shape, bias_data, output_shape,
+                                   output_data);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
+                       const int8_t* filter_data, const Dims<4>& filter_dims,
+                       const float* bias_data, const Dims<4>& bias_dims,
+                       int stride_width, int stride_height, int pad_width,
+                       int pad_height, float* scaling_factors_ptr,
+                       float output_activation_min, float output_activation_max,
+                       float* output_data, const Dims<4>& output_dims,
+                       int8_t* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  HybridConv(op_params, scaling_factors_ptr, DimsToShape(input_dims),
+             input_data, DimsToShape(filter_dims), filter_data,
+             DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+             output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template <FusedActivationFunctionType Ac>
 void Conv(const float* input_data, const Dims<4>& input_dims,
           const float* filter_data, const Dims<4>& filter_dims,
@@ -2045,6 +2179,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
        im2col_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void Conv(const float* input_data, const Dims<4>& input_dims,
@@ -2061,6 +2196,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void Conv(const float* input_data, const Dims<4>& input_dims,
@@ -2074,27 +2210,33 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
            output_dims, im2col_data, im2col_dims);
 }
 
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 int32 output_offset, int32 output_multiplier, int output_shift,
-                 int32 output_activation_min, int32 output_activation_max,
-                 uint8* output_data, const Dims<4>& output_dims,
-                 uint8* im2col_data, const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const uint8* input_data, const RuntimeShape& filter_shape,
+                 const uint8* filter_data, const RuntimeShape& bias_shape,
+                 const int32* bias_data, const RuntimeShape& output_shape,
+                 uint8* output_data, const RuntimeShape& im2col_shape,
+                 uint8* im2col_data, gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("Conv/8bit");
-
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4);
 
   const uint8* gemm_input_data = nullptr;
-  const Dims<4>* gemm_input_dims = nullptr;
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+  const RuntimeShape* gemm_input_shape = nullptr;
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
   const bool need_dilated_im2col =
       dilation_width_factor != 1 || dilation_height_factor != 1;
   const bool need_im2col = stride_width != 1 || stride_height != 1 ||
@@ -2104,53 +2246,47 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
     const int input_zero_point = -input_offset;
     TFLITE_DCHECK_GE(input_zero_point, 0);
     TFLITE_DCHECK_LE(input_zero_point, 255);
-    DilatedIm2col(input_data, input_dims, filter_dims, stride_width,
-                  stride_height, dilation_width_factor, dilation_height_factor,
-                  pad_width, pad_height, output_dims, input_zero_point,
-                  im2col_data);
+    DilatedIm2col(params, input_zero_point, input_shape, input_data,
+                  filter_shape, output_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else if (need_im2col) {
     TFLITE_DCHECK(im2col_data);
     const int input_zero_point = -input_offset;
     TFLITE_DCHECK_GE(input_zero_point, 0);
     TFLITE_DCHECK_LE(input_zero_point, 255);
-    Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
-           pad_height, filter_height, filter_width, input_zero_point,
-           im2col_data, im2col_dims);
+    Im2col(params, filter_height, filter_width, input_zero_point, input_shape,
+           input_data, im2col_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else {
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    gemm_input_dims = &input_dims;
+    gemm_input_shape = &input_shape;
   }
 
-  const int gemm_input_rows = gemm_input_dims->sizes[0];
+  const int gemm_input_rows = gemm_input_shape->Dims(3);
   // Using FlatSizeSkipDim causes segfault in some contexts (see b/79927784).
   // The root cause has not yet been identified though. Same applies below for
   // the other calls commented out. This is a partial rollback of cl/196819423.
-  // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_dims, 0);
-  const int gemm_input_cols = gemm_input_dims->sizes[1] *
-                              gemm_input_dims->sizes[2] *
-                              gemm_input_dims->sizes[3];
-  const int filter_rows = filter_dims.sizes[3];
+  // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_shape, 3);
+  const int gemm_input_cols = gemm_input_shape->Dims(0) *
+                              gemm_input_shape->Dims(1) *
+                              gemm_input_shape->Dims(2);
+  const int filter_rows = filter_shape.Dims(0);
   // See b/79927784.
-  // const int filter_cols = FlatSizeSkipDim(filter_dims, 3);
+  // const int filter_cols = FlatSizeSkipDim(filter_shape, 0);
   const int filter_cols =
-      filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2];
-  const int output_rows = output_dims.sizes[0];
+      filter_shape.Dims(1) * filter_shape.Dims(2) * filter_shape.Dims(3);
+  const int output_rows = output_shape.Dims(3);
   // See b/79927784.
-  // const int output_cols = FlatSizeSkipDim(output_dims, 0);
+  // const int output_cols = FlatSizeSkipDim(output_shape, 3);
   const int output_cols =
-      output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3];
+      output_shape.Dims(0) * output_shape.Dims(1) * output_shape.Dims(2);
   TFLITE_DCHECK_EQ(output_rows, filter_rows);
   TFLITE_DCHECK_EQ(output_cols, gemm_input_cols);
   TFLITE_DCHECK_EQ(filter_cols, gemm_input_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_rows);
   gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::RowMajor> filter_matrix(
       filter_data, filter_rows, filter_cols);
   gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::ColMajor> input_matrix(
@@ -2166,6 +2302,43 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
       input_offset, output_pipeline);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 int32 output_offset, int32 output_multiplier, int output_shift,
+                 int32 output_activation_min, int32 output_activation_max,
+                 uint8* output_data, const Dims<4>& output_dims,
+                 uint8* im2col_data, const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data, gemm_context);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
                  int32 input_offset, const uint8* filter_data,
                  const Dims<4>& filter_dims, int32 filter_offset,
@@ -2184,6 +2357,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
@@ -2213,6 +2387,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void Conv(const uint8* input_data, const Dims<4>& input_dims,
@@ -2236,13 +2411,14 @@ void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac, typename T>
 void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
             int pad_width, int pad_height, int kheight, int kwidth,
-            uint8 byte_zero, T* output_data, const Dims<4>& output_dims) {
+            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
   Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
-         kwidth, byte_zero, output_data, output_dims);
+         kwidth, zero_byte, output_data, output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
@@ -2266,6 +2442,7 @@ void ConvAsGemm(const float* input_data, const Dims<4>& input_dims,
                                        output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims,
@@ -5832,58 +6009,45 @@ void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
 }
 
 template <typename T>
-void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
-                     const Dims<4>& filter_dims, int stride_width,
-                     int stride_height, int pad_width, int pad_height,
-                     const Dims<4>& output_dims, uint8 zero_byte,
-                     T* im2col_data) {
+void TransposeIm2col(const ConvParams& params, uint8 zero_byte,
+                     const RuntimeShape& input_shape, const T* input_data,
+                     const RuntimeShape& filter_shape,
+                     const RuntimeShape& output_shape, T* im2col_data) {
   gemmlowp::ScopedProfilingLabel label("TransposeIm2col");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   TFLITE_DCHECK(im2col_data);
 
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 3);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
-  MatchingArraySize(output_dims, 0, filter_dims, 0);  // output_depth
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 0);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  MatchingDim(output_shape, 3, filter_shape, 3);  // output_depth
 
   // Construct the MxN sized im2col matrix.
   // The rows M, are sub-ordered B x H x W
-  Dims<4> row_dims;
-  row_dims.sizes[0] = output_width;
-  row_dims.sizes[1] = output_height;
-  row_dims.sizes[2] = batches;
-  row_dims.sizes[3] = 1;
-  ComputeStrides(&row_dims);
-
+  const RuntimeShape row_shape({1, batches, output_height, output_width});
   // The columns, N, are sub-ordered Kh x Kw x Din
-  Dims<4> col_dims;
-  col_dims.sizes[0] = input_depth;
-  col_dims.sizes[1] = filter_width;
-  col_dims.sizes[2] = filter_height;
-  col_dims.sizes[3] = 1;
-  ComputeStrides(&col_dims);
-
+  const RuntimeShape col_shape({1, filter_height, filter_width, input_depth});
   // Use dimensions M and N to construct dims for indexing directly into im2col
-  Dims<4> im2col_dims;
-  im2col_dims.sizes[0] = FlatSize(col_dims);
-  im2col_dims.sizes[1] = FlatSize(row_dims);
-  im2col_dims.sizes[2] = 1;
-  im2col_dims.sizes[3] = 1;
-  ComputeStrides(&im2col_dims);
+  const RuntimeShape im2col_shape(
+      {1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
 
   // Build the im2col matrix by looping through all the input pixels,
   // computing their influence on the output, rather than looping through all
   // the output pixels. We therefore must initialize the im2col array to zero.
   // This is potentially inefficient because we subsequently overwrite bytes
   // set here. However, in practice memset is very fast and costs negligible.
-  memset(im2col_data, zero_byte, FlatSize(im2col_dims) * sizeof(T));
+  memset(im2col_data, zero_byte, im2col_shape.FlatSize() * sizeof(T));
 
   // Loop through the output batches
   for (int batch = 0; batch < batches; ++batch) {
@@ -5903,11 +6067,11 @@ void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
               if ((out_x >= 0) && (out_x < output_width)) {
                 // Copy the input elements of this pixel
                 T const* src =
-                    input_data + Offset(input_dims, 0, in_x, in_y, batch);
+                    input_data + Offset(input_shape, batch, in_y, in_x, 0);
+                int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+                int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
                 T* dst = im2col_data +
-                         Offset(im2col_dims,
-                                Offset(col_dims, 0, filter_x, filter_y, 0),
-                                Offset(row_dims, out_x, out_y, batch, 0), 0, 0);
+                         Offset(im2col_shape, 0, 0, row_offset, col_offset);
                 memcpy(dst, src, input_depth * sizeof(T));
               }
             }
@@ -5918,31 +6082,71 @@ void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
   }
 }
 
-inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, float* output_data,
-                          const Dims<4>& output_dims, float* im2col_data,
-                          const Dims<4>& im2col_dims) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template <typename T>
+void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
+                     const Dims<4>& filter_dims, int stride_width,
+                     int stride_height, int pad_width, int pad_height,
+                     const Dims<4>& output_dims, uint8 zero_byte,
+                     T* im2col_data) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
+                  DimsToShape(filter_dims), DimsToShape(output_dims),
+                  im2col_data);
+}
+
+inline void TransposeConv(
+    const ConvParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& filter_shape,
+    const float* filter_data, const RuntimeShape& output_shape,
+    float* output_data, const RuntimeShape& im2col_shape, float* im2col_data) {
   gemmlowp::ScopedProfilingLabel label("TransposeConv");
 
   // Note we could use transposed weights with forward conv for unstrided
   // cases. But we are already getting good performance with this code as-is.
   TFLITE_DCHECK(im2col_data);
-  TransposeIm2col(input_data, input_dims, filter_dims, stride_width,
-                  stride_height, pad_width, pad_height, output_dims, 0,
-                  im2col_data);
+  TransposeIm2col(params, 0, input_shape, input_data, filter_shape,
+                  output_shape, im2col_data);
 
   const auto im2col_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(im2col_data, im2col_dims);
+      MapAsMatrixWithLastDimAsRows(im2col_data, im2col_shape);
   const auto filter_matrix_map =
-      MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
+      MapAsMatrixWithFirstDimAsCols(filter_data, filter_shape);
   auto output_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+      MapAsMatrixWithLastDimAsRows(output_data, output_shape);
 
   Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, float* output_data,
+                          const Dims<4>& output_dims, float* im2col_data,
+                          const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(output_dims),
+                output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index c4c7cf3842..023707d466 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -26,8 +26,8 @@ enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
 enum class PaddingType : uint8 { kNone, kSame, kValid };
 
 struct PaddingValues {
-  int8 width;
-  int8 height;
+  int16 width;
+  int16 height;
 };
 
 // This enumeration allows for non-default formats for the weights array
@@ -734,10 +734,10 @@ struct ConvParams {
   PaddingType padding_type;
   PaddingValues padding_values;
   // TODO(starka): This was just "stride", so check that width+height is OK.
-  int8 stride_width;
-  int8 stride_height;
-  int8 dilation_width_factor;
-  int8 dilation_height_factor;
+  int16 stride_width;
+  int16 stride_height;
+  int16 dilation_width_factor;
+  int16 dilation_height_factor;
   // uint8 inference params.
   // TODO(b/65838351): Use smaller types if appropriate.
   int32 input_offset;
@@ -745,8 +745,12 @@ struct ConvParams {
   int32 output_offset;
   int32 output_multiplier;
   int output_shift;
-  int32 output_activation_min;
-  int32 output_activation_max;
+  // uint8, etc, activation params.
+  int32 quantized_activation_min;
+  int32 quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
 };
 
 struct DepthToSpaceParams {
@@ -756,8 +760,8 @@ struct DepthToSpaceParams {
 struct DepthwiseParams {
   PaddingType padding_type;
   PaddingValues padding_values;
-  int8 stride;
-  int8 depth_multiplier;
+  int16 stride;
+  int16 depth_multiplier;
   // uint8 inference params.
   // TODO(b/65838351): Use smaller types if appropriate.
   int32 input_offset;
@@ -765,8 +769,12 @@ struct DepthwiseParams {
   int32 output_offset;
   int32 output_multiplier;
   int output_shift;
-  int32 output_activation_min;
-  int32 output_activation_max;
+  // uint8, etc, activation params.
+  int32 quantized_activation_min;
+  int32 quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
 };
 
 struct DequantizationParams {
@@ -787,13 +795,17 @@ struct FullyConnectedParams {
   int32 output_offset;
   int32 output_multiplier;
   int output_shift;
-  int32 output_activation_min;
-  int32 output_activation_max;
+  // uint8, etc, activation params.
+  int32 quantized_activation_min;
+  int32 quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
   FullyConnectedWeightsFormat weights_format;
 };
 
 struct GatherParams {
-  int8 input_rank;
+  int16 input_rank;
   int16 axis;
 };
 
-- 
GitLab


From 56d4fc8ff67f48294ae5cb0a7f9ff3d954463aa3 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 13 Sep 2018 09:47:30 -0700
Subject: [PATCH 0139/1357] Add a `namedtuple` factory that accepts
 doc-strings.

PiperOrigin-RevId: 212828094
---
 tensorflow/python/estimator/model_fn.py       | 93 ++++++++++++++-----
 tensorflow/python/util/collections.py         | 51 ++++++++++
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 4 files changed, 125 insertions(+), 23 deletions(-)
 create mode 100644 tensorflow/python/util/collections.py

diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 439cc2e3a4..728de65559 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -33,6 +33,7 @@ from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.util import nest
+from tensorflow.python.util.collections import tf_namedtuple
 from tensorflow.python.util.tf_export import estimator_export
 
 
@@ -62,14 +63,65 @@ EXPORT_TAG_MAP = {
     ModeKeys.EVAL: [tag_constants.EVAL],
 }
 
+# pylint: disable=line-too-long
+
+_EstimatorSpecNamedTuple = tf_namedtuple('EstimatorSpec', [   # pylint: disable=invalid-name
+    ('mode',
+     'A `ModeKeys`. Specifies if this is training, evaluation or prediction.'
+    ),
+    ('predictions', 'Predictions `Tensor` or dict of `Tensor`.'),
+    ('loss',
+     'Training loss `Tensor`. Must be either scalar, or with shape `[1]`.'),
+    ('train_op', 'Op to run one training step.'),
+    ('eval_metric_ops',
+     """Dict of metric results keyed by name.
+
+     The values of the dict are the results of calling a metric function,
+     namely a `(metric_tensor, update_op)` tuple.
+
+     `metric_tensor` should be evaluated without any impact on state
+     (typically is a pure computation results based on variables.).
+     For example, it should not trigger the `update_op` or requires any
+     input fetching."""
+    ),
+    ('export_outputs',
+     """Describes the output signatures to be exported to `SavedModel`.
+
+     A dict `{name: output}` where:
+
+       * `name` is An arbitrary name for this output.
+       * `output` is an `ExportOutput` object such as `ClassificationOutput`,
+         `RegressionOutput`, or `PredictOutput`.
+
+     Single-headed models only need to specify one entry in this dictionary.
+     Multi-headed models should specify one entry for each head, one of
+     which must be named using
+     `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry is
+     provided, a default `PredictOutput` mapping to `predictions` will be
+     created."""
+    ),
+    ('training_chief_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training.'
+    ),
+    ('training_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run on all workers during training.'
+    ),
+    ('scaffold',
+     'A `tf.train.Scaffold` object that can be used to set initialization, saver, and more to be used in training.'
+    ),
+    ('evaluation_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run during evaluation.'
+    ),
+    ('prediction_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run during predictions.'
+    ),
+])
+
+# pylint: enable=line-too-long
+
 
 @estimator_export('estimator.EstimatorSpec')
-class EstimatorSpec(
-    collections.namedtuple('EstimatorSpec', [
-        'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops',
-        'export_outputs', 'training_chief_hooks', 'training_hooks', 'scaffold',
-        'evaluation_hooks', 'prediction_hooks'
-    ])):
+class EstimatorSpec(_EstimatorSpecNamedTuple):
   """Ops and objects returned from a `model_fn` and passed to an `Estimator`.
 
   `EstimatorSpec` fully defines the model to be run by an `Estimator`.
@@ -156,23 +208,22 @@ class EstimatorSpec(
         A dict `{name: output}` where:
         * name: An arbitrary name for this output.
         * output: an `ExportOutput` object such as `ClassificationOutput`,
-            `RegressionOutput`, or `PredictOutput`.
-        Single-headed models only need to specify one entry in this dictionary.
-        Multi-headed models should specify one entry for each head, one of
-        which must be named using
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.
-        If no entry is provided, a default `PredictOutput` mapping to
-        `predictions` will be created.
-      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run on the chief worker during training.
-      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        on all workers during training.
+          `RegressionOutput`, or `PredictOutput`. Single-headed models only need
+          to specify one entry in this dictionary. Multi-headed models should
+          specify one entry for each head, one of which must be named using
+          `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry
+          is provided, a default `PredictOutput` mapping to `predictions` will
+          be created.
+      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        on the chief worker during training.
+      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run on
+        all workers during training.
       scaffold: A `tf.train.Scaffold` object that can be used to set
         initialization, saver, and more to be used in training.
-      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run during evaluation.
-      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run during predictions.
+      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        during evaluation.
+      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        during predictions.
 
     Returns:
       A validated `EstimatorSpec` object.
diff --git a/tensorflow/python/util/collections.py b/tensorflow/python/util/collections.py
new file mode 100644
index 0000000000..ef5290ee8b
--- /dev/null
+++ b/tensorflow/python/util/collections.py
@@ -0,0 +1,51 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Collections utilities."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+
+def tf_namedtuple(name, fieldnames_and_docs):
+  """A `namedtuple` class factory that supports field-docstrings.
+
+  ```
+  cls = tf_namedtuple("MyNamedTuple",[("a", "Docs for a"),
+                                      ("b", "Docs for b")])
+  cls.a.__doc__  # ==> "Docs for a"
+  ```
+
+  Args:
+    name: The name of the new class.
+    fieldnames_and_docs: A sequence of `(fieldname, docstring)` pairs. The
+      fieldnames are passed to `collections.namedtuple`.
+
+  Returns:
+    A namedtuple class.
+  """
+  fieldnames_and_docs = list(fieldnames_and_docs)
+  fieldnames = [fieldname for fieldname, doc in fieldnames_and_docs]
+  cls = collections.namedtuple(name, fieldnames)
+
+  for fieldname, doc in fieldnames_and_docs:
+    old_prop = getattr(cls, fieldname)
+    new_prop = property(fget=old_prop.fget, fset=old_prop.fset,
+                        fdel=old_prop.fdel, doc=doc)
+    setattr(cls, fieldname, new_prop)
+
+  return cls
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
index aa6ac46613..37695572c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.util.collections.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
index aa6ac46613..37695572c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.util.collections.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
-- 
GitLab


From a9a5929d06e5eb4dd38bef63d56c4e338bbd38a2 Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Thu, 13 Sep 2018 09:50:09 -0700
Subject: [PATCH 0140/1357] Register a new Sum op for T:int64 and Tidx:int32

PiperOrigin-RevId: 212828463
---
 tensorflow/core/kernels/reduction_ops_sum.cc | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc
index 5318d8c133..e4ca89eca3 100644
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@@ -76,7 +76,15 @@ REGISTER_KERNEL_BUILDER(
         .HostMemory("output")
         .HostMemory("reduction_indices"),
     ReductionOp<CPUDevice, int32, int64, Eigen::internal::SumReducer<int32>>);
-
+REGISTER_KERNEL_BUILDER(
+    Name("Sum")
+        .Device(DEVICE_GPU)
+        .TypeConstraint<int64>("T")
+        .TypeConstraint<int32>("Tidx")
+        .HostMemory("input")
+        .HostMemory("output")
+        .HostMemory("reduction_indices"),
+    ReductionOp<CPUDevice, int64, int32, Eigen::internal::SumReducer<int64>>);
 #endif
 
 #ifdef TENSORFLOW_USE_SYCL
-- 
GitLab


From c6c6aad47dfb24cf4b5db565f49b59c2d224362b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 09:57:24 -0700
Subject: [PATCH 0141/1357] Removed `contrib.layers` dependency
 `bucket_by_sequence_length` tests.

PiperOrigin-RevId: 212829466
---
 .../contrib/data/python/kernel_tests/BUILD    |   1 -
 .../python/kernel_tests/bucketing_test.py     | 104 ++++++++++++------
 2 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 1f947e97f9..b3c90ded39 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -44,7 +44,6 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
index 94718bb477..48971f2ccc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
@@ -21,7 +21,6 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib import layers
 from tensorflow.contrib.data.python.ops import grouping
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -537,6 +536,40 @@ def _element_length_fn(x, y=None):
   return array_ops.shape(x)[0]
 
 
+def _to_sparse_tensor(record):
+  return sparse_tensor.SparseTensor(**record)
+
+
+def _format_record(array, sparse):
+  if sparse:
+    return {
+        "values": array,
+        "indices": [[i] for i in range(len(array))],
+        "dense_shape": (len(array),)
+    }
+  return array
+
+
+def _get_record_type(sparse):
+  if sparse:
+    return {
+        "values": dtypes.int64,
+        "indices": dtypes.int64,
+        "dense_shape": dtypes.int64
+    }
+  return dtypes.int32
+
+
+def _get_record_shape(sparse):
+  if sparse:
+    return {
+        "values": tensor_shape.TensorShape([None,]),
+        "indices": tensor_shape.TensorShape([None, 1]),
+        "dense_shape": tensor_shape.TensorShape([1,])
+    }
+  return tensor_shape.TensorShape([None])
+
+
 class BucketBySequenceLength(test.TestCase):
 
   def testBucket(self):
@@ -545,23 +578,28 @@ class BucketBySequenceLength(test.TestCase):
     batch_sizes = [10, 8, 4, 2]
     lengths = [8, 13, 25, 35]
 
-    def element_gen():
-      # Produce 1 batch for each bucket
-      elements = []
-      for batch_size, length in zip(batch_sizes, lengths):
-        record_len = length - 1
-        for _ in range(batch_size):
-          elements.append([1] * record_len)
-          record_len = length
-      random.shuffle(elements)
-      for el in elements:
-        yield (el,)
+    def build_dataset(sparse):
+      def _generator():
+        # Produce 1 batch for each bucket
+        elements = []
+        for batch_size, length in zip(batch_sizes, lengths):
+          record_len = length - 1
+          for _ in range(batch_size):
+            elements.append([1] * record_len)
+            record_len = length
+        random.shuffle(elements)
+        for el in elements:
+          yield (_format_record(el, sparse),)
+      dataset = dataset_ops.Dataset.from_generator(
+          _generator,
+          (_get_record_type(sparse),),
+          (_get_record_shape(sparse),))
+      if sparse:
+        dataset = dataset.map(lambda x: (_to_sparse_tensor(x),))
+      return dataset
 
     def _test_bucket_by_padding(no_padding):
-      dataset = dataset_ops.Dataset.from_generator(
-          element_gen, (dtypes.int64,), ([None],))
-      if no_padding:
-        dataset = dataset.map(lambda x: (layers.dense_to_sparse(x),))
+      dataset = build_dataset(sparse=no_padding)
       dataset = dataset.apply(
           grouping.bucket_by_sequence_length(
               _element_length_fn,
@@ -677,20 +715,23 @@ class BucketBySequenceLength(test.TestCase):
 
   def testTupleElements(self):
 
-    def elements_gen():
-      text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
-      label = [1, 2, 1, 2]
-      for x, y in zip(text, label):
-        yield (x, y)
+    def build_dataset(sparse):
+      def _generator():
+        text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
+        label = [1, 2, 1, 2]
+        for x, y in zip(text, label):
+          yield (_format_record(x, sparse), y)
+      dataset = dataset_ops.Dataset.from_generator(
+          generator=_generator,
+          output_types=(_get_record_type(sparse), dtypes.int32),
+          output_shapes=(_get_record_shape(sparse),
+                         tensor_shape.TensorShape([])))
+      if sparse:
+        dataset = dataset.map(lambda x, y: (_to_sparse_tensor(x), y))
+      return dataset
 
     def _test_tuple_elements_by_padding(no_padding):
-      dataset = dataset_ops.Dataset.from_generator(
-          generator=elements_gen,
-          output_shapes=(tensor_shape.TensorShape([None]),
-                         tensor_shape.TensorShape([])),
-          output_types=(dtypes.int32, dtypes.int32))
-      if no_padding:
-        dataset = dataset.map(lambda x, y: (layers.dense_to_sparse(x), y))
+      dataset = build_dataset(sparse=no_padding)
       dataset = dataset.apply(grouping.bucket_by_sequence_length(
           element_length_func=_element_length_fn,
           bucket_batch_sizes=[2, 2, 2],
@@ -727,12 +768,11 @@ class BucketBySequenceLength(test.TestCase):
       input_data = [range(i+1) for i in range(min_len, max_len)]
       def generator_fn():
         for record in input_data:
-          yield record
+          yield _format_record(record, sparse=True)
       dataset = dataset_ops.Dataset.from_generator(
           generator=generator_fn,
-          output_shapes=(tensor_shape.TensorShape([None])),
-          output_types=(dtypes.int64))
-      dataset = dataset.map(lambda x: layers.dense_to_sparse(x, eos_token=-1))
+          output_types=_get_record_type(sparse=True))
+      dataset = dataset.map(_to_sparse_tensor)
       return dataset
 
     def _compute_expected_batches():
-- 
GitLab


From 609a84774dfdbf6b54d91f70bed07f8d01f87a66 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 13 Sep 2018 10:01:41 -0700
Subject: [PATCH 0142/1357] Gracefully handle invalid inputs in Split and
 ReverseSequence.

PiperOrigin-RevId: 212830139
---
 tensorflow/core/kernels/reverse_sequence_op.cc | 5 +++--
 tensorflow/core/kernels/split_op.cc            | 7 ++++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/reverse_sequence_op.cc b/tensorflow/core/kernels/reverse_sequence_op.cc
index 15a707a9c6..cded417986 100644
--- a/tensorflow/core/kernels/reverse_sequence_op.cc
+++ b/tensorflow/core/kernels/reverse_sequence_op.cc
@@ -64,7 +64,7 @@ void CheckErrors(OpKernelContext* context, int batch_dim, int seq_dim) {
   OP_REQUIRES(context, seq_lens.NumElements() == input.dim_size(batch_dim),
               errors::InvalidArgument("len(seq_lens) != input.dims(", batch_dim,
                                       "), ", "(", seq_lens.NumElements(),
-                                      " vs. ", input.dim_size(batch_dim)));
+                                      " vs. ", input.dim_size(batch_dim), ")"));
 
   for (size_t d = 0; d < seq_lens_vec.size(); ++d) {
     OP_REQUIRES(context, seq_lens_vec[d] >= 0,
@@ -91,7 +91,7 @@ void CheckErrorsGPU(OpKernelContext* context, int batch_dim, int seq_dim) {
   OP_REQUIRES(context, seq_lens.NumElements() == input.dim_size(batch_dim),
               errors::InvalidArgument("len(seq_lens) != input.dims(", batch_dim,
                                       "), ", "(", seq_lens.NumElements(),
-                                      " vs. ", input.dim_size(batch_dim)));
+                                      " vs. ", input.dim_size(batch_dim), ")"));
 }
 
 template <>
@@ -127,6 +127,7 @@ class ReverseSequenceOp : public OpKernel {
     auto seq_lens_t = seq_lens.vec<Tlen>();
 
     CheckErrors<Device, Tlen>(context, batch_dim_, seq_dim_);
+    if (!context->status().ok()) return;
 
     const int input_dims = input.dims();
 
diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc
index 7cc3c532c9..11db72bfa3 100644
--- a/tensorflow/core/kernels/split_op.cc
+++ b/tensorflow/core/kernels/split_op.cc
@@ -49,7 +49,12 @@ class SplitOpBase : public OpKernel {
   void ComputeEasyCases(OpKernelContext* context, bool* done) {
     const Tensor& input = context->input(1);
     const TensorShape& input_shape = input.shape();
-    const int32 split_dim_orig = context->input(0).flat<int32>()(0);
+    const Tensor& split_dim_tensor = context->input(0);
+    OP_REQUIRES(
+        context, split_dim_tensor.shape().dims() == 0,
+        errors::InvalidArgument("split_dim must be a scalar but has rank ",
+                                split_dim_tensor.shape().dims()));
+    const int32 split_dim_orig = split_dim_tensor.flat<int32>()(0);
     const int32 split_dim =
         split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig;
     const int32 num_split = num_outputs();
-- 
GitLab


From 1050e5dc93cd579607495df6086f3cec2d9aa1f4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 10:24:23 -0700
Subject: [PATCH 0143/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 212834379
---
 .../internal/optimized/optimized_ops.h        | 359 ++++++++++++------
 1 file changed, 250 insertions(+), 109 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index baed8f4993..370ca03c92 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -200,6 +200,8 @@ struct TTypes {
       UnalignedConstMatrix;
 };
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 // TODO(b/62193649): this function is only needed as long
 // as we have the --variable_batch hack.
 template <typename Scalar, int N>
@@ -212,6 +214,18 @@ MatrixMap<Scalar> MapAsMatrixWithGivenNumberOfRows(Scalar* data,
   return MatrixMap<Scalar>(data, rows, cols);
 }
 
+// TODO(b/62193649): this function is only needed as long
+// as we have the --variable_batch hack.
+template <typename Scalar>
+MatrixMap<Scalar> MapAsMatrixWithGivenNumberOfRows(Scalar* data,
+                                                   const RuntimeShape& shape,
+                                                   int rows) {
+  const int flatsize = shape.FlatSize();
+  TFLITE_DCHECK_EQ(flatsize % rows, 0);
+  const int cols = flatsize / rows;
+  return MatrixMap<Scalar>(data, rows, cols);
+}
+
 // This is like the template-parameter version, except that the power-of-two is
 // passed as a function parameter. The template version is to be preferred,
 // since some target hardware optimizations depend on the range of the exponent.
@@ -393,21 +407,24 @@ inline void optimized_ops_preload_l1_keep(const uint8* ptr) {
 // to a matrix*vector product. LSTM cells contain a fully-connected node;
 // when quantized, this becomes a special type of GEMV operation where
 // the output is 16bit-quantized, thus needs its own special path.
-inline void GEMVForLstmCell(const uint8* input_data, const Dims<4>& input_dims,
-                            const uint8* weights_data,
-                            const Dims<4>& weights_dims,
-                            uint8 weights_zero_point, const int32* bias_data,
-                            const Dims<4>& bias_dims, int32 accum_multiplier,
-                            int accum_shift, int16* output_data,
-                            const Dims<4>& output_dims) {
+inline void GEMVForLstmCell(const RuntimeShape& input_shape,
+                            const uint8* input_data,
+                            const RuntimeShape& weights_shape,
+                            const uint8* weights_data, uint8 weights_zero_point,
+                            const RuntimeShape& bias_shape,
+                            const int32* bias_data, int32 accum_multiplier,
+                            int accum_shift, const RuntimeShape& output_shape,
+                            int16* output_data) {
   gemmlowp::ScopedProfilingLabel label("GEMVForLstmCell");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_dims, 0), 1);
-  const int input_size = FlatSizeSkipDim(input_dims, 3);
-  const int output_size = MatchingArraySize(weights_dims, 1, output_dims, 0);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_shape, output_dim_count - 1), 1);
+  const int input_size = FlatSizeSkipDim(input_shape, 0);
+  const int output_size = MatchingDim(weights_shape, weights_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   // This special fast path for quantized LSTM cells does not try to support
   // odd sizes that we haven't encountered in any LSTM cell, that would
   // require special code (that would go untested until any LSTM cell
@@ -580,18 +597,21 @@ inline void GEMVForLstmCell(const uint8* input_data, const Dims<4>& input_dims,
 
 #ifdef GEMMLOWP_NEON
 inline void GEMVForLstmCellWithSymmetricRange(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 accum_multiplier,
-    int accum_shift, int16* output_data, const Dims<4>& output_dims) {
+    const RuntimeShape& input_shape, const uint8* input_data,
+    const RuntimeShape& weights_shape, const uint8* weights_data,
+    const RuntimeShape& bias_shape, const int32* bias_data,
+    int32 accum_multiplier, int accum_shift, const RuntimeShape& output_shape,
+    int16* output_data) {
   gemmlowp::ScopedProfilingLabel label("GEMVForLstmCellWithSymmetricRange");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_dims, 0), 1);
-  const int input_size = FlatSizeSkipDim(input_dims, 3);
-  const int output_size = MatchingArraySize(weights_dims, 1, output_dims, 0);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_shape, output_dim_count - 1), 1);
+  const int input_size = FlatSizeSkipDim(input_shape, 0);
+  const int output_size = MatchingDim(weights_shape, weights_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   // This special fast path for quantized LSTM cells does not try to support
   // odd sizes that we haven't encountered in any LSTM cell, that would
   // require special code (that would go untested until any LSTM cell
@@ -867,14 +887,16 @@ inline void GEMVForLstmCellWithSymmetricRange(
 }
 #endif
 
-inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                           const float* weights_data,
-                           const Dims<4>& weights_dims, const float* bias_data,
-                           const Dims<4>& bias_dims,
-                           float output_activation_min,
-                           float output_activation_max, float* output_data,
-                           const Dims<4>& output_dims) {
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& weights_shape,
+    const float* weights_data, const RuntimeShape& bias_shape,
+    const float* bias_data, const RuntimeShape& output_shape,
+    float* output_data) {
   gemmlowp::ScopedProfilingLabel label("FullyConnected");
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+
   // TODO(b/62193649): this convoluted shape computation (determining
   // input_rows from the weights_dims, then MapAsMatrixWithGivenNumberOfRows)
   // is because the current --variable_batch hack consists in overwriting the
@@ -883,18 +905,38 @@ inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
   // When that is fixed, this should become:
   // const auto input_matrix_map =
   //     MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
-  const int input_rows = ArraySize(weights_dims, 0);
+  const int dims_count = weights_shape.DimensionsCount();
+  const int input_rows = weights_shape.Dims(dims_count - 1);
   const auto input_matrix_map =
-      MapAsMatrixWithGivenNumberOfRows(input_data, input_dims, input_rows);
+      MapAsMatrixWithGivenNumberOfRows(input_data, input_shape, input_rows);
   const auto filter_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(weights_data, weights_dims);
+      MapAsMatrixWithLastDimAsRows(weights_data, weights_shape);
   auto output_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+      MapAsMatrixWithLastDimAsRows(output_data, output_shape);
 
   Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map);
-  AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data,
-                                   output_dims, output_activation_min,
-                                   output_activation_max);
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   bias_shape, bias_data, output_shape,
+                                   output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                           const float* weights_data,
+                           const Dims<4>& weights_dims, const float* bias_data,
+                           const Dims<4>& bias_dims,
+                           float output_activation_min,
+                           float output_activation_max, float* output_data,
+                           const Dims<4>& output_dims) {
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(weights_dims), weights_data,
+                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+                 output_data);
 }
 
 // legacy, for compatibility with old checked-in code
@@ -912,20 +954,23 @@ void FullyConnected(const float* input_data, const Dims<4>& input_dims,
 
 #ifdef USE_NEON
 inline void FullyConnectedAsGEMV(
-    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
-    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_offset,
+    const RuntimeShape& input_shape, const uint8* input_data,
+    int32 input_offset, const RuntimeShape& filter_shape,
+    const uint8* filter_data, int32 filter_offset,
+    const RuntimeShape& bias_shape, const int32* bias_data, int32 output_offset,
     int32 output_multiplier, int output_shift, int32 output_activation_min,
-    int32 output_activation_max, uint8* output_data,
-    const Dims<4>& output_dims) {
+    int32 output_activation_max, const RuntimeShape& output_shape,
+    uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label("FullyConnectedAsGEMV/8bit");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_dims, 0), 1);
-  const int input_size = FlatSizeSkipDim(input_dims, 3);
-  const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_shape, output_dim_count - 1), 1);
+  const int input_size = FlatSizeSkipDim(input_shape, 0);
+  const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   static constexpr int kPeel = 4;
   const bool shift_left = (output_shift <= 0);
   for (int k = 0; k < input_size; k += 64) {
@@ -1096,42 +1141,47 @@ struct GemmlowpOutputPipeline {
   }
 };
 
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, uint8* output_data,
-                           const Dims<4>& output_dims,
-                           gemmlowp::GemmContext* gemm_context) {
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data, gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("FullyConnected/8bit");
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = FlatSizeSkipDim(output_dims, 0);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
 #ifdef USE_NEON
-  const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0);
+  const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   if (batches == 1 && !(output_size % 4)) {
     return FullyConnectedAsGEMV(
-        input_data, input_dims, input_offset, filter_data, filter_dims,
-        filter_offset, bias_data, bias_dims, output_offset, output_multiplier,
-        output_shift, output_activation_min, output_activation_max, output_data,
-        output_dims);
+        input_shape, input_data, input_offset, filter_shape, filter_data,
+        filter_offset, bias_shape, bias_data, output_offset, output_multiplier,
+        output_shift, output_activation_min, output_activation_max,
+        output_shape, output_data);
   }
 #endif  // USE_NEON
-  const int filter_rows = filter_dims.sizes[1];
-  const int filter_cols = filter_dims.sizes[0];
-  TFLITE_DCHECK_EQ(filter_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(filter_dims.sizes[3], 1);
-  const int output_rows = output_dims.sizes[0];
+  const int filter_rows = filter_shape.Dims(filter_dim_count - 2);
+  const int filter_cols = filter_shape.Dims(filter_dim_count - 1);
+  TFLITE_DCHECK_EQ(filter_shape.FlatSize(), filter_rows * filter_cols);
+  const int output_rows = output_shape.Dims(output_dim_count - 1);
   TFLITE_DCHECK_EQ(output_rows, filter_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_rows);
 
   gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::RowMajor> filter_matrix(
       filter_data, output_rows, filter_cols, filter_cols);
@@ -1148,30 +1198,65 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
       input_offset, output_pipeline);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, uint8* output_data,
+                           const Dims<4>& output_dims,
+                           gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
 inline void FullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
-    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
-    const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset,
-    int32 output_multiplier, int output_shift, int32 output_activation_min,
-    int32 output_activation_max, int16* output_data, const Dims<4>& output_dims,
-    gemmlowp::GemmContext* gemm_context) {
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data_int32, const RuntimeShape& output_shape,
+    int16* output_data, gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("FullyConnected/Uint8Int16");
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
   // This is a copy of the reference implementation. We do not currently have a
   // properly optimized version.
   (void)gemm_context;  // only used in properly optimized code.
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
   TFLITE_DCHECK_EQ(output_offset, 0);
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
 
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = FlatSizeSkipDim(output_dims, 0);
-  const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(filter_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
 
   // Implementation of the fully connected node suited to the inside of an LSTM
   // cell. The operands are 8-bit integers, the accumulators are internally
@@ -1182,17 +1267,17 @@ inline void FullyConnected(
   if (batches == 1 && input_offset == -128 && output_activation_min == -32768 &&
       output_activation_max == 32767) {
     if (filter_offset == -128 && !(output_depth % 4) && !(accum_depth % 64)) {
-      GEMVForLstmCellWithSymmetricRange(input_data, input_dims, filter_data,
-                                        filter_dims, bias_data_int32, bias_dims,
-                                        output_multiplier, -output_shift,
-                                        output_data, output_dims);
+      GEMVForLstmCellWithSymmetricRange(
+          input_shape, input_data, filter_shape, filter_data, bias_shape,
+          bias_data_int32, output_multiplier, -output_shift, output_shape,
+          output_data);
       return;
     }
     if (!(output_depth % 4) && !(accum_depth % 8)) {
-      GEMVForLstmCell(input_data, input_dims, filter_data, filter_dims,
-                      filter_offset, bias_data_int32, bias_dims,
-                      output_multiplier, -output_shift, output_data,
-                      output_dims);
+      GEMVForLstmCell(input_shape, input_data, filter_shape, filter_data,
+                      filter_offset, bias_shape, bias_data_int32,
+                      output_multiplier, -output_shift, output_shape,
+                      output_data);
       return;
     }
   }
@@ -1226,6 +1311,31 @@ inline void FullyConnected(
       input_offset, output_pipeline);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
+    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
+    const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset,
+    int32 output_multiplier, int output_shift, int32 output_activation_min,
+    int32 output_activation_max, int16* output_data, const Dims<4>& output_dims,
+    gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data_int32, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
@@ -1568,26 +1678,34 @@ struct ShuffledFullyConnectedWorkerTask : gemmlowp::Task {
 };
 
 inline void ShuffledFullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
-    int output_shift, int32 output_activation_min, int32 output_activation_max,
-    int16* output_data, const Dims<4>& output_dims,
-    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& weights_shape,
+    const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    int16* output_data, uint8* shuffled_input_workspace_data,
+    gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("ShuffledFullyConnected/8bit");
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
   (void)gemm_context;  // only used in optimized code.
   TFLITE_DCHECK_EQ(output_activation_min, -32768);
   TFLITE_DCHECK_EQ(output_activation_max, 32767);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = FlatSizeSkipDim(output_dims, 0);
-  const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(weights_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
   TFLITE_DCHECK((accum_depth % 16) == 0);
   TFLITE_DCHECK((output_depth % 4) == 0);
   // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
@@ -1684,6 +1802,28 @@ inline void ShuffledFullyConnected(
   gemm_context->workers_pool()->Execute(tasks);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void ShuffledFullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims,
+    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
+    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
+    int output_shift, int32 output_activation_min, int32 output_activation_max,
+    int16* output_data, const Dims<4>& output_dims,
+    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
+                         DimsToShape(weights_dims), shuffled_weights_data,
+                         DimsToShape(bias_dims), bias_data,
+                         DimsToShape(output_dims), output_data,
+                         shuffled_input_workspace_data, gemm_context);
+}
+
 template <typename T>
 inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w,
                                          int h, int b, int kheight, int kwidth,
@@ -3635,10 +3775,11 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
   bool gemm_already_performed = false;
 #ifdef GEMMLOWP_NEON
   if (fc_batches == 1 && !(fc_output_depth % 4) && !(fc_accum_depth % 8)) {
-    GEMVForLstmCell(concat_temp_data_uint8, concat_temp_dims,
-                    weights_data_uint8, weights_dims, weights_zero_point,
-                    bias_data_int32, bias_dims, accum_multiplier, accum_shift,
-                    activ_temp_data_int16, activ_temp_dims);
+    GEMVForLstmCell(DimsToShape(concat_temp_dims), concat_temp_data_uint8,
+                    DimsToShape(weights_dims), weights_data_uint8,
+                    weights_zero_point, DimsToShape(bias_dims), bias_data_int32,
+                    accum_multiplier, accum_shift, DimsToShape(activ_temp_dims),
+                    activ_temp_data_int16);
     gemm_already_performed = true;
   }
 #endif
-- 
GitLab


From 685f2832daa7084cd1bf484e8a7bb4333e246428 Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Thu, 13 Sep 2018 10:44:21 -0700
Subject: [PATCH 0144/1357] Add TF-TRT kernels/ops to contrib_kernels and
 contrib_ops_op_lib, so TF serving can use them.

PiperOrigin-RevId: 212838380
---
 tensorflow/contrib/BUILD | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 798f499870..d98a24994c 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -166,7 +166,9 @@ cc_library(
             "//tensorflow/contrib/kinesis:dataset_kernels",
         ],
         "//conditions:default": [],
-    }),
+    }) + if_not_windows([
+        "//tensorflow/contrib/tensorrt:trt_engine_op_kernel",
+    ]),
 )
 
 cc_library(
@@ -203,5 +205,7 @@ cc_library(
             "//tensorflow/contrib/kinesis:dataset_ops_op_lib",
         ],
         "//conditions:default": [],
-    }),
+    }) + if_not_windows([
+        "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib",
+    ]),
 )
-- 
GitLab


From f54856b1448bed24534189e4aa2ebb9d0b4f5b9a Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 18:13:47 +0000
Subject: [PATCH 0145/1357] Apply buildifier changes.

---
 tensorflow/contrib/ignite/BUILD | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index 1adc6c6ccc..9393b702d1 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -6,14 +6,14 @@ exports_files(["LICENSE"])
 
 load(
     "//tensorflow:tensorflow.bzl",
-    "tf_gen_op_wrapper_py",
-    "tf_kernel_library",
+    "if_not_windows",
+    "if_windows",
     "tf_custom_op_library",
     "tf_custom_op_py_library",
     "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
     "tf_py_test",
-    "if_not_windows",
-    "if_windows",
 )
 
 py_library(
@@ -55,15 +55,15 @@ cc_library(
     ]) + if_windows([
         "kernels/ignite_plain_client_windows.cc",
     ]),
+    copts = if_windows([
+        "-DWIN32_LEAN_AND_MEAN",
+    ]),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
         "@boringssl//:ssl",
         "@protobuf_archive//:protobuf_headers",
     ],
-    copts = if_windows([
-        "-DWIN32_LEAN_AND_MEAN",
-    ]),
     alwayslink = 1,
 )
 
-- 
GitLab


From ee72b6a204232532e64221f1b9db7843ee13c312 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 11:30:45 -0700
Subject: [PATCH 0146/1357] Automated rollback of commit
 56d4fc8ff67f48294ae5cb0a7f9ff3d954463aa3

PiperOrigin-RevId: 212847619
---
 tensorflow/python/estimator/model_fn.py       | 93 +++++--------------
 tensorflow/python/util/collections.py         | 51 ----------
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 4 files changed, 23 insertions(+), 125 deletions(-)
 delete mode 100644 tensorflow/python/util/collections.py

diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 728de65559..439cc2e3a4 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -33,7 +33,6 @@ from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.util import nest
-from tensorflow.python.util.collections import tf_namedtuple
 from tensorflow.python.util.tf_export import estimator_export
 
 
@@ -63,65 +62,14 @@ EXPORT_TAG_MAP = {
     ModeKeys.EVAL: [tag_constants.EVAL],
 }
 
-# pylint: disable=line-too-long
-
-_EstimatorSpecNamedTuple = tf_namedtuple('EstimatorSpec', [   # pylint: disable=invalid-name
-    ('mode',
-     'A `ModeKeys`. Specifies if this is training, evaluation or prediction.'
-    ),
-    ('predictions', 'Predictions `Tensor` or dict of `Tensor`.'),
-    ('loss',
-     'Training loss `Tensor`. Must be either scalar, or with shape `[1]`.'),
-    ('train_op', 'Op to run one training step.'),
-    ('eval_metric_ops',
-     """Dict of metric results keyed by name.
-
-     The values of the dict are the results of calling a metric function,
-     namely a `(metric_tensor, update_op)` tuple.
-
-     `metric_tensor` should be evaluated without any impact on state
-     (typically is a pure computation results based on variables.).
-     For example, it should not trigger the `update_op` or requires any
-     input fetching."""
-    ),
-    ('export_outputs',
-     """Describes the output signatures to be exported to `SavedModel`.
-
-     A dict `{name: output}` where:
-
-       * `name` is An arbitrary name for this output.
-       * `output` is an `ExportOutput` object such as `ClassificationOutput`,
-         `RegressionOutput`, or `PredictOutput`.
-
-     Single-headed models only need to specify one entry in this dictionary.
-     Multi-headed models should specify one entry for each head, one of
-     which must be named using
-     `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry is
-     provided, a default `PredictOutput` mapping to `predictions` will be
-     created."""
-    ),
-    ('training_chief_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training.'
-    ),
-    ('training_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run on all workers during training.'
-    ),
-    ('scaffold',
-     'A `tf.train.Scaffold` object that can be used to set initialization, saver, and more to be used in training.'
-    ),
-    ('evaluation_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run during evaluation.'
-    ),
-    ('prediction_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run during predictions.'
-    ),
-])
-
-# pylint: enable=line-too-long
-
 
 @estimator_export('estimator.EstimatorSpec')
-class EstimatorSpec(_EstimatorSpecNamedTuple):
+class EstimatorSpec(
+    collections.namedtuple('EstimatorSpec', [
+        'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops',
+        'export_outputs', 'training_chief_hooks', 'training_hooks', 'scaffold',
+        'evaluation_hooks', 'prediction_hooks'
+    ])):
   """Ops and objects returned from a `model_fn` and passed to an `Estimator`.
 
   `EstimatorSpec` fully defines the model to be run by an `Estimator`.
@@ -208,22 +156,23 @@ class EstimatorSpec(_EstimatorSpecNamedTuple):
         A dict `{name: output}` where:
         * name: An arbitrary name for this output.
         * output: an `ExportOutput` object such as `ClassificationOutput`,
-          `RegressionOutput`, or `PredictOutput`. Single-headed models only need
-          to specify one entry in this dictionary. Multi-headed models should
-          specify one entry for each head, one of which must be named using
-          `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry
-          is provided, a default `PredictOutput` mapping to `predictions` will
-          be created.
-      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        on the chief worker during training.
-      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run on
-        all workers during training.
+            `RegressionOutput`, or `PredictOutput`.
+        Single-headed models only need to specify one entry in this dictionary.
+        Multi-headed models should specify one entry for each head, one of
+        which must be named using
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.
+        If no entry is provided, a default `PredictOutput` mapping to
+        `predictions` will be created.
+      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run on the chief worker during training.
+      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        on all workers during training.
       scaffold: A `tf.train.Scaffold` object that can be used to set
         initialization, saver, and more to be used in training.
-      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        during evaluation.
-      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        during predictions.
+      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run during evaluation.
+      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run during predictions.
 
     Returns:
       A validated `EstimatorSpec` object.
diff --git a/tensorflow/python/util/collections.py b/tensorflow/python/util/collections.py
deleted file mode 100644
index ef5290ee8b..0000000000
--- a/tensorflow/python/util/collections.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Collections utilities."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-
-def tf_namedtuple(name, fieldnames_and_docs):
-  """A `namedtuple` class factory that supports field-docstrings.
-
-  ```
-  cls = tf_namedtuple("MyNamedTuple",[("a", "Docs for a"),
-                                      ("b", "Docs for b")])
-  cls.a.__doc__  # ==> "Docs for a"
-  ```
-
-  Args:
-    name: The name of the new class.
-    fieldnames_and_docs: A sequence of `(fieldname, docstring)` pairs. The
-      fieldnames are passed to `collections.namedtuple`.
-
-  Returns:
-    A namedtuple class.
-  """
-  fieldnames_and_docs = list(fieldnames_and_docs)
-  fieldnames = [fieldname for fieldname, doc in fieldnames_and_docs]
-  cls = collections.namedtuple(name, fieldnames)
-
-  for fieldname, doc in fieldnames_and_docs:
-    old_prop = getattr(cls, fieldname)
-    new_prop = property(fget=old_prop.fget, fset=old_prop.fset,
-                        fdel=old_prop.fdel, doc=doc)
-    setattr(cls, fieldname, new_prop)
-
-  return cls
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
index 37695572c8..aa6ac46613 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow.python.util.collections.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
index 37695572c8..aa6ac46613 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow.python.util.collections.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
-- 
GitLab


From edd2ee1f5e06d3c755aa402e2617f82fc49330aa Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 11:31:42 -0700
Subject: [PATCH 0147/1357] Fix the outfeed test and add a test for empty while
 loop body.

PiperOrigin-RevId: 212847779
---
 .../xla/service/hlo_module_dce_test.cc        | 48 +++++++++++++++++--
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
index d025edbb9c..bf66cc6bc3 100644
--- a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
@@ -372,26 +372,64 @@ TEST_F(HloModuleDceTest, WhileWithOutfeed) {
   auto module = ParseHloString(R"(
   HloModule OutfeedLoop
   WhileBody {
-    loop_var.1 = (s32[]) parameter(0)
+    body_param = (s32[]) parameter(0)
     token = token[] after-all()
     constant.2 = s32[] constant(2)
     outfeed_tuple = (s32[]) outfeed(constant.2, token)
-    get-tuple-element.1 = s32[] get-tuple-element(loop_var.1), index=0
+    get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
     constant.1 = s32[] constant(1)
     add = s32[] add(get-tuple-element.1, constant.1)
     ROOT tuple = (s32[]) tuple(add)
   }
   WhileCondition {
-    loop_var.2 = (s32[]) parameter(0)
-    get-tuple-element.3 = s32[] get-tuple-element(loop_var.2), index=0
+    cond_param = (s32[]) parameter(0)
+    get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0
     constant.2 = s32[] constant(10)
     ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2)
   }
   ENTRY SimpleLoop {
     constant.3 = s32[] constant(0)
     tuple.1 = (s32[]) tuple(constant.3)
-    ROOT while = (s32[]) while(tuple.1), condition=WhileCondition,
+    while = (s32[]) while(tuple.1), condition=WhileCondition,
+      body=WhileBody
+    ROOT rtuple = () tuple()
+  })")
+                    .ValueOrDie();
+
+  HloModuleDCE dce;
+  EXPECT_FALSE(dce.Run(module.get()).ValueOrDie());
+  EXPECT_FALSE(WhileBodyHasPassThroughTupleElement(module->entry_computation(),
+                                                   "while", 0));
+}
+
+// Tests that if a loop variable is not referenced outside of a kWhile, the loop
+// variable changes are not elided within the loop body, if the condition
+// computation uses them.
+TEST_F(HloModuleDceTest, WhileWithOnlyLoopVariableBumping) {
+  auto module = ParseHloString(R"(
+  HloModule InfiniteLoop
+  WhileBody {
+    body_param = (s32[], s32[]) parameter(0)
+    get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
+    get-tuple-element.2 = s32[] get-tuple-element(body_param), index=1
+    constant.1 = s32[] constant(1)
+    add = s32[] add(get-tuple-element.1, constant.1)
+    ROOT tuple = (s32[], s32[]) tuple(add, get-tuple-element.2)
+  }
+  WhileCondition {
+    cond_param = (s32[], s32[]) parameter(0)
+    get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0
+    constant.2 = s32[] constant(10)
+    ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2)
+  }
+  ENTRY SimpleLoop {
+    p0 = (s32[]) parameter(0)
+    get-tuple-element.5 = s32[] get-tuple-element(p0), index=0
+    constant.3 = s32[] constant(0)
+    tuple.1 = (s32[], s32[]) tuple(constant.3, get-tuple-element.5)
+    while = (s32[], s32[]) while(tuple.1), condition=WhileCondition,
       body=WhileBody
+    ROOT get-tuple-element.4 = s32[] get-tuple-element(while), index=1
   })")
                     .ValueOrDie();
 
-- 
GitLab


From e40c240642637695de8469441ccf8759c74fb63e Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Thu, 13 Sep 2018 11:40:22 -0700
Subject: [PATCH 0148/1357] Removing OutOfRangeError checks and testing going
 to the end of the dataset in PrefetchingOpsV2. There is a bit of non
 determinism with the FunctionBufferingResource that will get fixed with the
 MultiDeviceIterator and once we transition to that we can go back to enabling
 these checks.

PiperOrigin-RevId: 212849405
---
 .../distribute/python/prefetching_ops_v2_test.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
index bb10b546a1..16799104e8 100644
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
+++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
@@ -55,14 +55,14 @@ class PrefetchingOpsV2Test(test.TestCase):
     next_element = iterator.get_next()
 
     output = []
+    # TODO(rohanj): Modify test to go till the end of the dataset when we
+    # switch to MultiDeviceIterator.
     with self.cached_session() as sess:
-      for _ in range(5):
+      for _ in range(4):
         result = sess.run(next_element)
         self.assertEqual(2, len(result))
         output.extend(result)
-      self.assertEquals(set(range(10)), set(output))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+      self.assertEquals(set(range(8)), set(output))
 
   def testPrefetchToTwoDevicesWithReinit(self):
     if not test_util.is_gpu_available():
@@ -75,14 +75,14 @@ class PrefetchingOpsV2Test(test.TestCase):
     iterator = device_dataset.make_initializable_iterator()
     next_element = iterator.get_next()
 
+    # TODO(rohanj): Modify test to go till the end of the dataset when we
+    # switch to MultiDeviceIterator.
     with self.cached_session() as sess:
       sess.run(iterator.initializer)
-      for _ in range(5):
-        sess.run(next_element)
-      with self.assertRaises(errors.OutOfRangeError):
+      for _ in range(4):
         sess.run(next_element)
       sess.run(iterator.initializer)
-      for _ in range(5):
+      for _ in range(4):
         sess.run(next_element)
 
 
-- 
GitLab


From 0fbeac58e098cf0ac8e131617ebb6780e10c9606 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 13 Sep 2018 11:51:06 -0700
Subject: [PATCH 0149/1357] Prevent an integral division by zero (undefined
 behavior).

PiperOrigin-RevId: 212851417
---
 tensorflow/core/lib/wav/wav_io.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc
index 36d939e061..c536b5688e 100644
--- a/tensorflow/core/lib/wav/wav_io.cc
+++ b/tensorflow/core/lib/wav/wav_io.cc
@@ -232,6 +232,11 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string,
         "Bad audio format for WAV: Expected 1 (PCM), but got", audio_format);
   }
   TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, channel_count, &offset));
+  if (*channel_count < 1) {
+    return errors::InvalidArgument(
+        "Bad number of channels for WAV: Expected at least 1, but got ",
+        *channel_count);
+  }
   TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, sample_rate, &offset));
   uint32 bytes_per_second;
   TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &bytes_per_second, &offset));
-- 
GitLab


From 49581856c47c2d3d1e81c4b10d9896259f58bae6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 12:13:11 -0700
Subject: [PATCH 0150/1357] Add some debugging checks for categorical split
 handler. Also use MIN_INT64 for the bias feature accumulation since
 categorical_feature_with_xyz  use -1 for out of vocab features.

PiperOrigin-RevId: 212855656
---
 .../contrib/boosted_trees/kernels/split_handler_ops.cc   | 9 +++++++++
 .../lib/learner/batch/categorical_split_handler.py       | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
index 3b28ed77f3..51e0c2e431 100644
--- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
@@ -862,6 +862,15 @@ class BuildCategoricalEqualitySplitsOp : public OpKernel {
       auto* equality_split = split_info.mutable_split_node()
                                  ->mutable_categorical_id_binary_split();
       equality_split->set_feature_column(state->feature_column_group_id());
+      CHECK(feature_ids(best_feature_idx, 0) != bias_feature_id)
+          << "Unexpected feature ID selected. "
+          << "Start feature ID: [" << start_index << "] "
+          << feature_ids(start_index, 0) << ", " << feature_ids(start_index, 1)
+          << "\nBest feature ID: [" << best_feature_idx << "] "
+          << feature_ids(best_feature_idx, 0) << ", "
+          << feature_ids(best_feature_idx, 1)
+          << "\nPartition IDS: " << partition_ids(start_index) << "  "
+          << partition_ids(best_feature_idx);
       equality_split->set_feature_id(feature_ids(best_feature_idx, 0));
       auto* left_child = split_info.mutable_left_child();
       auto* right_child = split_info.mutable_right_child();
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py
index 35d727482b..4da25298cb 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py
@@ -29,7 +29,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 
-_BIAS_FEATURE_ID = -1
+_BIAS_FEATURE_ID = int(dtypes.int64.min)
 
 
 class EqualitySplitHandler(base_split_handler.BaseSplitHandler):
-- 
GitLab


From 54cac449527a6668d5410b6403c1c54d71a9ba82 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 12:24:35 -0700
Subject: [PATCH 0151/1357] Add root of profile broken down by program to
 Profile proto.

PiperOrigin-RevId: 212857508
---
 tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc |  5 ++---
 tensorflow/contrib/tpu/profiler/op_profile.proto    | 10 ++++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index 98cc31f18d..b4b06a40a2 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
@@ -142,9 +142,8 @@ Status WriteTensorboardTPUProfile(const string& logdir, const string& run,
     TF_RETURN_IF_ERROR(DumpTraceToLogDirectory(profile_run_dir, host_prefix,
                                                response.encoded_trace(), os));
   }
-  if (response.has_op_profile() &&
-      (response.op_profile().has_by_program_structure() ||
-       response.op_profile().has_by_category())) {
+  if (response.has_op_profile() && (response.op_profile().has_by_program() ||
+                                    response.op_profile().has_by_category())) {
     TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir, host_prefix,
                                                    response.op_profile(), os));
   }
diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto
index feb177a7da..68cf510e71 100644
--- a/tensorflow/contrib/tpu/profiler/op_profile.proto
+++ b/tensorflow/contrib/tpu/profiler/op_profile.proto
@@ -4,12 +4,14 @@ package tensorflow.tpu.op_profile;
 
 // Profile is the top-level data that summarizes a program.
 message Profile {
+  reserved 2;
+  reserved "by_program_structure";
+  reserved 3;
+  reserved "per_program";
   // Root of a profile broken down by instruction category.
   Node by_category = 1;
-  // Root of a profile broken down by program structure.
-  Node by_program_structure = 2;
-  // Per program profile, indexed by hlo module name of the program.
-  map<string, Node> per_program = 3;
+  // Root of a profile broken down by program.
+  Node by_program = 4;
 }
 
 // An entry in the profile tree. (An instruction, or set of instructions).
-- 
GitLab


From d860915b0198ddb96f93e9e97a789af156544dc6 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 13 Sep 2018 12:31:47 -0700
Subject: [PATCH 0152/1357] Move nccl_rewrite.cc back to tf_kernel_library.

PiperOrigin-RevId: 212858590
---
 tensorflow/contrib/nccl/BUILD | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD
index 225025e995..9a9d480260 100644
--- a/tensorflow/contrib/nccl/BUILD
+++ b/tensorflow/contrib/nccl/BUILD
@@ -25,7 +25,7 @@ tf_custom_op_library(
     name = "python/ops/_nccl_ops.so",
     srcs = [
         "ops/nccl_ops.cc",
-    ] + if_cuda(["kernels/nccl_rewrite.cc"]),
+    ],
     gpu_srcs = if_not_windows_cuda([
         "kernels/nccl_manager.cc",
         "kernels/nccl_manager.h",
@@ -74,6 +74,7 @@ tf_kernel_library(
         "kernels/nccl_manager.cc",
         "kernels/nccl_manager.h",
         "kernels/nccl_ops.cc",
+        "kernels/nccl_rewrite.cc",
     ]),
     deps = if_cuda([
         "@local_config_nccl//:nccl",
-- 
GitLab


From f2c23922fc4d977a4fbe4d2353f7b14231d63f6b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 12:49:49 -0700
Subject: [PATCH 0153/1357] Clean ups related to runtime shapes refactoring.

PiperOrigin-RevId: 212861571
---
 .../internal/optimized/optimized_ops.h        | 30 ++++----
 .../internal/reference/reference_ops.h        | 72 ++++++++++---------
 2 files changed, 53 insertions(+), 49 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 370ca03c92..659a65a8ea 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -2637,9 +2637,9 @@ inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int input_depth = input_shape.Dims(3);
@@ -2678,9 +2678,9 @@ inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_depth = output_shape.Dims(3);
@@ -3508,7 +3508,7 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -5760,9 +5760,9 @@ inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
   gemmlowp::ScopedProfilingLabel label("ResizeBilinear");
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
@@ -5809,9 +5809,9 @@ inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
   gemmlowp::ScopedProfilingLabel label("ResizeBilinear");
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
@@ -5870,9 +5870,9 @@ inline void BatchToSpaceND(
 
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input1_shape =
+  const RuntimeShape input1_shape =
       RuntimeShape::ExtendedShape(4, unextended_input1_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_width = output_shape.Dims(2);
@@ -5956,8 +5956,10 @@ inline void PadImpl(const tflite::PadParams& op_params,
                     const P* pad_value_ptr, const RuntimeShape& output_shape,
                     T* output_data) {
   gemmlowp::ScopedProfilingLabel label("Pad");
-  RuntimeShape ext_input_shape = RuntimeShape::ExtendedShape(4, input_shape);
-  RuntimeShape ext_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+  const RuntimeShape ext_input_shape =
+      RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_output_shape =
+      RuntimeShape::ExtendedShape(4, output_shape);
   TFLITE_DCHECK_LE(op_params.left_padding_count, 4);
   TFLITE_DCHECK_LE(op_params.right_padding_count, 4);
 
@@ -6089,7 +6091,7 @@ inline void Slice(const tflite::SliceParams& op_params,
                   const RuntimeShape& input_shape, const T* input_data,
                   const RuntimeShape& output_shape, T* output_data) {
   gemmlowp::ScopedProfilingLabel label("Slice");
-  RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
   // TODO(dkalenichenko): This op only supports 4D tensors or smaller.
   TFLITE_DCHECK_LE(op_params.begin_count, 4);
   TFLITE_DCHECK_LE(op_params.size_count, 4);
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 977367026d..66f18ec195 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -419,9 +419,9 @@ inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
                          T* output_data) {
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int input_depth = input_shape.Dims(3);
@@ -472,9 +472,9 @@ inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
                          T* output_data) {
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int input_depth = input_shape.Dims(3);
@@ -1117,7 +1117,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1158,7 +1158,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1200,7 +1200,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1350,7 +1350,7 @@ void BroadcastMul4DSlow(const ArithmeticParams& params,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -1483,7 +1483,7 @@ inline void BroadcastMul4DSlow(const ArithmeticParams& params,
   // The input shapes are extended as part of NdArrayDesc initialization.
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
@@ -1579,7 +1579,7 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -1713,7 +1713,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1754,7 +1754,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1818,7 +1818,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1858,7 +1858,7 @@ void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1897,7 +1897,7 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -3543,11 +3543,11 @@ inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_size_shape =
+  const RuntimeShape output_size_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
@@ -3606,9 +3606,9 @@ inline void SpaceToBatchND(
     const RuntimeShape& unextended_output_shape, T* output_data) {
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input1_shape =
+  const RuntimeShape input1_shape =
       RuntimeShape::ExtendedShape(4, unextended_input1_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int depth = input1_shape.Dims(3);
@@ -3663,9 +3663,9 @@ inline void BatchToSpaceND(
     const RuntimeShape& unextended_output_shape, T* output_data) {
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input1_shape =
+  const RuntimeShape input1_shape =
       RuntimeShape::ExtendedShape(4, unextended_input1_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_width = output_shape.Dims(2);
@@ -3719,8 +3719,10 @@ inline void PadImpl(const tflite::PadParams& op_params,
                     const RuntimeShape& input_shape, const T* input_data,
                     const P* pad_value_ptr, const RuntimeShape& output_shape,
                     T* output_data) {
-  RuntimeShape ext_input_shape = RuntimeShape::ExtendedShape(4, input_shape);
-  RuntimeShape ext_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+  const RuntimeShape ext_input_shape =
+      RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_output_shape =
+      RuntimeShape::ExtendedShape(4, output_shape);
   TFLITE_DCHECK_LE(op_params.left_padding_count, 4);
   TFLITE_DCHECK_LE(op_params.right_padding_count, 4);
 
@@ -3817,9 +3819,9 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   // Reverse and pad to 4 dimensions because that is what the runtime code
@@ -3915,7 +3917,7 @@ template <typename T>
 inline void Slice(const tflite::SliceParams& op_params,
                   const RuntimeShape& input_shape, const T* input_data,
                   const RuntimeShape& output_shape, T* output_data) {
-  RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
   // TODO(dkalenichenko): This op only supports 4D tensors or smaller.
   TFLITE_DCHECK_LE(op_params.begin_count, 4);
   TFLITE_DCHECK_LE(op_params.size_count, 4);
@@ -4141,9 +4143,9 @@ inline void Mean(const tflite::MeanParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_batch = output_shape.Dims(0);
@@ -4290,7 +4292,7 @@ void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4577,7 +4579,7 @@ inline void BroadcastComparison4DSlowImpl(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4636,7 +4638,7 @@ inline void BroadcastComparison4DSlowWithScaling(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4886,7 +4888,7 @@ inline void BroadcastPow4DSlow(const RuntimeShape& unextended_input1_shape,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4929,7 +4931,7 @@ inline void BroadcastLogical4DSlow(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4968,7 +4970,7 @@ inline void BroadcastBinaryFunction4DSlow(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
-- 
GitLab


From 2646bf2d2bfb717c828db6391563b431f760a7d3 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Thu, 13 Sep 2018 13:08:26 -0700
Subject: [PATCH 0154/1357] Internal change.

PiperOrigin-RevId: 212864677
---
 tensorflow/contrib/lite/python/convert.py     | 43 ++++++++++++++++---
 tensorflow/contrib/lite/python/lite.py        | 11 +++++
 tensorflow/contrib/lite/python/lite_test.py   | 22 ++++++++++
 .../contrib/lite/python/tflite_convert.py     | 11 +++++
 4 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 1c5516ae7c..1f48a826d4 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import enum  # pylint: disable=g-bad-import-order
+
 import os as _os
 import platform as _platform
 import subprocess as _subprocess
@@ -30,7 +32,6 @@ from tensorflow.python.platform import resource_loader as _resource_loader
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.lazy_loader import LazyLoader
 
-
 # Lazy load since some of the performance benchmark skylark rules
 # break dependencies.
 _toco_python = LazyLoader(
@@ -52,6 +53,31 @@ if _toco_from_proto_bin and not _os.path.exists(_toco_from_proto_bin):
   _toco_from_proto_bin = "toco_from_protos"
 
 
+class ConverterMode(enum.Enum):
+  """Enum class defining the converters available to generate TFLite models.
+
+  WARNING: Experimental interface, subject to change.
+  """
+  # Convert model using TOCO such that all ops are TensorFlow Lite native ops.
+  #
+  # This is the only supported mode for any models that contain operations that
+  # cannot be resolved in TensorFlow.
+  DEFAULT = "DEFAULT"
+
+  # Convert model using TOCO such that only unsupported operations are
+  # represented as TensorFlow ops.
+  # WARNING: Experimental interface, subject to change.
+  TOCO_EXTENDED = "TOCO_EXTENDED"
+
+  # Convert model using TOCO such that all operations are represented as
+  # TensorFlow ops.
+  # WARNING: Experimental interface, subject to change.
+  TOCO_EXTENDED_ALL = "TOCO_EXTENDED_ALL"
+
+  def __str__(self):
+    return self.value
+
+
 def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str):
   """Convert `input_data_str` according to model and toco parameters.
 
@@ -128,7 +154,8 @@ def build_toco_convert_protos(input_tensors,
                               change_concat_input_ranges=False,
                               post_training_quantize=False,
                               dump_graphviz_dir=None,
-                              dump_graphviz_video=False):
+                              dump_graphviz_video=False,
+                              converter_mode=ConverterMode.DEFAULT):
   """Builds protocol buffers describing a conversion of a model using TOCO.
 
   Typically this is to convert from TensorFlow GraphDef to TFLite, in which
@@ -183,6 +210,8 @@ def build_toco_convert_protos(input_tensors,
       output file. (default None)
     dump_graphviz_video: Boolean indicating whether to dump the graph after
       every graph transformation. (default False)
+    converter_mode: Experimental flag, subject to change. ConverterMode
+      indicating which converter to use. (default ConverterMode.DEFAULT)
 
   Returns:
     model_flags, toco_flags: two protocol buffers describing the conversion
@@ -211,6 +240,11 @@ def build_toco_convert_protos(input_tensors,
   if dump_graphviz_dir:
     toco.dump_graphviz_dir = dump_graphviz_dir
   toco.dump_graphviz_include_video = dump_graphviz_video
+  if converter_mode == ConverterMode.TOCO_EXTENDED:
+    toco.allow_eager_ops = True
+  elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL:
+    toco.allow_eager_ops = True
+    toco.force_eager_ops = True
 
   model = _model_flags_pb2.ModelFlags()
   model.change_concat_input_ranges = change_concat_input_ranges
@@ -301,9 +335,8 @@ def toco_convert_impl(input_data, input_tensors, output_tensors, *args,
   Raises:
     Defined in `build_toco_convert_protos`.
   """
-  model_flags, toco_flags = build_toco_convert_protos(input_tensors,
-                                                      output_tensors,
-                                                      *args, **kwargs)
+  model_flags, toco_flags = build_toco_convert_protos(
+      input_tensors, output_tensors, *args, **kwargs)
   data = toco_convert_protos(model_flags.SerializeToString(),
                              toco_flags.SerializeToString(),
                              input_data.SerializeToString())
diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 44dfb97b84..2be24455d8 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -40,6 +40,7 @@ from google.protobuf import text_format as _text_format
 from google.protobuf.message import DecodeError
 from tensorflow.contrib.lite.python import lite_constants as constants
 from tensorflow.contrib.lite.python.convert import build_toco_convert_protos  # pylint: disable=unused-import
+from tensorflow.contrib.lite.python.convert import ConverterMode
 from tensorflow.contrib.lite.python.convert import tensor_name as _tensor_name
 from tensorflow.contrib.lite.python.convert import toco_convert  # pylint: disable=unused-import
 from tensorflow.contrib.lite.python.convert import toco_convert_graph_def as _toco_convert_graph_def
@@ -113,6 +114,8 @@ class TocoConverter(object):
       output file. (default None)
     dump_graphviz_video: Boolean indicating whether to dump the graph after
       every graph transformation. (default False)
+    converter_mode: Experimental flag, subject to change. ConverterMode
+      indicating which converter to use. (default ConverterMode.DEFAULT)
 
   Example usage:
 
@@ -179,6 +182,7 @@ class TocoConverter(object):
     self.post_training_quantize = False
     self.dump_graphviz_dir = None
     self.dump_graphviz_video = False
+    self.converter_mode = ConverterMode.DEFAULT
 
     # Attributes are used by models that cannot be loaded into TensorFlow.
     if not self._has_valid_tensors():
@@ -389,6 +393,7 @@ class TocoConverter(object):
       ValueError:
         Input shape is not specified.
         None value for dimension in input_tensor.
+        ConverterMode option is unsupported for the model.
     """
     # Checks dimensions in input tensor.
     if self._has_valid_tensors():
@@ -439,12 +444,18 @@ class TocoConverter(object):
 
     # Converts model.
     if self._has_valid_tensors():
+      converter_kwargs["converter_mode"] = self.converter_mode
       result = _toco_convert_impl(
           input_data=self._graph_def,
           input_tensors=self._input_tensors,
           output_tensors=self._output_tensors,
           **converter_kwargs)
     else:
+      # Graphs without valid tensors cannot be loaded into tf.Session since they
+      # contain TFLite operation(s) that cannot be resolved in TensorFlow.
+      if self.converter_mode != ConverterMode.DEFAULT:
+        raise ValueError("This model can only be converted with the default "
+                         "converter.")
       result = _toco_convert_graph_def(
           input_data=self._graph_def,
           input_arrays_with_shape=self._input_arrays_with_shape,
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index 3f8ea433ff..f112ed5cdd 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -402,6 +402,28 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     # Ensure that the quantized weights tflite model is smaller.
     self.assertTrue(len(quantized_tflite) < len(float_tflite))
 
+  def testExtendedMode(self):
+    in_tensor = array_ops.placeholder(
+        shape=[1, 16, 16, 3], dtype=dtypes.float32)
+    out_tensor = in_tensor + in_tensor
+    sess = session.Session()
+
+    # Convert model and ensure model is not None.
+    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter.converter_mode = lite.ConverterMode.TOCO_EXTENDED_ALL
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+    # Ensures the model contains TensorFlow ops.
+    # TODO(nupurgarg): Check values once there is a Python delegate interface.
+    interpreter = Interpreter(model_content=tflite_model)
+    with self.assertRaises(RuntimeError) as error:
+      interpreter.allocate_tensors()
+    self.assertIn(
+        'Regular TensorFlow ops are not supported by this interpreter. Make '
+        'sure you invoke the Eager delegate before inference.',
+        str(error.exception))
+
 
 class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py
index cc08ed3fe9..c0ff7f37f9 100644
--- a/tensorflow/contrib/lite/python/tflite_convert.py
+++ b/tensorflow/contrib/lite/python/tflite_convert.py
@@ -140,8 +140,11 @@ def _convert_model(flags):
   if flags.change_concat_input_ranges:
     converter.change_concat_input_ranges = (
         flags.change_concat_input_ranges == "TRUE")
+
   if flags.allow_custom_ops:
     converter.allow_custom_ops = flags.allow_custom_ops
+  if flags.converter_mode:
+    converter.converter_mode = flags.converter_mode
 
   if flags.post_training_quantize:
     converter.post_training_quantize = flags.post_training_quantize
@@ -363,6 +366,8 @@ def run_main(_):
       help=("Boolean to change behavior of min/max ranges for inputs and "
             "outputs of the concat operator for quantized models. Changes the "
             "ranges of concat operator overlap when true. (default False)"))
+
+  # Permitted ops flags.
   parser.add_argument(
       "--allow_custom_ops",
       action="store_true",
@@ -371,6 +376,12 @@ def run_main(_):
             "created for any op that is unknown. The developer will need to "
             "provide these to the TensorFlow Lite runtime with a custom "
             "resolver. (default False)"))
+  parser.add_argument(
+      "--converter_mode",
+      type=lite.ConverterMode,
+      choices=list(lite.ConverterMode),
+      help=("Experimental flag, subject to change. ConverterMode indicating "
+            "which converter to use. (default ConverterMode.DEFAULT)"))
 
   # Logging flags.
   parser.add_argument(
-- 
GitLab


From df46916ab0f8aa9fbf45f6847c9216ecc90515a9 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Thu, 13 Sep 2018 13:54:44 -0700
Subject: [PATCH 0155/1357] Allow user to the pre register a defun function
 into graph without calling it.

PiperOrigin-RevId: 212872452
---
 tensorflow/python/eager/function.py      | 28 +++++++++
 tensorflow/python/eager/function_test.py | 78 ++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 348bf4650f..552ed29f65 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1204,6 +1204,34 @@ class PolymorphicFunction(object):
       return graph_function, (args, kwds)
 
 
+def register(func, *args, **kwargs):
+  """Register the defun function into the graph.
+
+  This won't actually call the function with the inputs, and only put the
+  function definition into graph. Register function with different input param
+  will result into multiple version of functions registered in graph.
+
+  Args:
+    func: the PolymorphicFunction instance that generated by a @defun
+    *args: input arguments for the Python function.
+    **kwargs: input keyword arguments for the Python function.
+
+  Returns:
+    a `Function` object specialized to inputs and execution context.
+
+  Raises:
+    ValueError: When the input function is not a defun wrapped python function.
+  """
+  if not isinstance(func, PolymorphicFunction):
+    raise ValueError("Only defun function is allowed to be registered. "
+                     "Got type: %s" % type(func))
+  concrete_func = func.get_concrete_function(*args, **kwargs)
+  graph = ops.get_default_graph()
+  concrete_func._inference_function.add_to_graph(graph)   # pylint: disable=protected-access
+  # TODO(scottzhu): support concrete_func._backward_graph_function in future.
+  return concrete_func
+
+
 def _validate_signature(signature):
   if any(not isinstance(arg, tensor_spec.TensorSpec)
          for arg in nest.flatten(signature)):
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index d2b1d9c8a7..a0abefe666 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1607,6 +1607,84 @@ class FunctionTest(test.TestCase):
           t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
           add(t, t)
 
+  def testRegisterFunction(self):
+    @function.defun
+    def add(x, y):
+      return math_ops.add(x, y)
+
+    def matmul(x, y):
+      return math_ops.matmul(x, y)
+    defun_matmul = function.defun(matmul)
+
+    with context.graph_mode(), self.cached_session():
+      with ops.get_default_graph().as_default():
+        t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+        function.register(defun_matmul, t, t)
+        function.register(add, t, t)
+
+        graph = ops.get_default_graph()
+        # pylint: disable=protected-access
+        self.assertEqual(len(graph._functions), 2)
+        functions = list(graph._functions.values())
+        pre_register_matmul_func_name = functions[0].definition.signature.name
+        self.assertRegexpMatches(pre_register_matmul_func_name, '.*matmul.*')
+        pre_register_add_func_name = functions[1].definition.signature.name
+        self.assertRegexpMatches(pre_register_add_func_name, '.*add.*')
+
+        sq = defun_matmul(t, t)
+        double = add(t, t)
+        self.assertAllEqual(sq.eval().reshape(-1), [7, 10, 15, 22])
+        self.assertAllEqual(double.eval().reshape(-1), [2, 4, 6, 8])
+        # Make sure the pre registered function is used, and no other function
+        # is added.
+        self.assertEqual(len(graph._functions), 2)
+        functions = list(graph._functions.values())
+        called_func_name = functions[0].definition.signature.name
+        self.assertEqual(pre_register_matmul_func_name, called_func_name)
+        called_func_name = functions[1].definition.signature.name
+        self.assertEqual(pre_register_add_func_name, called_func_name)
+
+  def testRegisterFunctionWithInputSignature(self):
+    def matmul(x, y):
+      return math_ops.matmul(x, y)
+    defun_matmul = function.defun(
+        matmul,
+        input_signature=[
+            tensor_spec.TensorSpec(shape=(2, 2), dtype=dtypes.float32),
+            tensor_spec.TensorSpec(shape=(2, 2), dtype=dtypes.float32)
+        ])
+    with context.graph_mode(), self.cached_session():
+      with ops.get_default_graph().as_default():
+        t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+        function.register(defun_matmul, t, t)
+
+        graph = ops.get_default_graph()
+        # pylint: disable=protected-access
+        self.assertEqual(len(graph._functions), 1)
+
+        # Test input param shape mismatch
+        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        with self.assertRaisesRegexp(
+            ValueError, 'Python inputs incompatible with input_signature'):
+          function.register(defun_matmul, t2, t2)
+
+  def testRegisterFunctionWithCache(self):
+    def matmul(x, y):
+      return math_ops.matmul(x, y)
+    defun_matmul = function.defun(matmul)
+
+    with context.graph_mode(), self.cached_session():
+      with ops.get_default_graph().as_default():
+        t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+        t2 = constant_op.constant([[2.0, 3.0], [4.0, 5.0]])
+        function.register(defun_matmul, t, t)
+        function.register(defun_matmul, t2, t2)
+
+        graph = ops.get_default_graph()
+        # Only one function is registered since the input param are in same type
+        # pylint: disable=protected-access
+        self.assertEqual(len(graph._functions), 1)
+
 
 @test_util.with_c_shapes
 class AutomaticControlDependenciesTest(test.TestCase):
-- 
GitLab


From c4c80a3fe7f585748110056dade5748856b34f5c Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 13 Sep 2018 13:55:35 -0700
Subject: [PATCH 0156/1357] internal change

PiperOrigin-RevId: 212872625
---
 tensorflow/tools/docs/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py
index a6159fa692..83b4bf8128 100644
--- a/tensorflow/tools/docs/parser.py
+++ b/tensorflow/tools/docs/parser.py
@@ -1479,7 +1479,7 @@ class ParserConfig(object):
     self.base_dir = base_dir
     self.defined_in_prefix = 'tensorflow/'
     self.code_url_prefix = (
-        'https://www.tensorflow.org/code/tensorflow/')  # pylint: disable=line-too-long
+        '/code/stable/tensorflow/')  # pylint: disable=line-too-long
 
   def py_name_to_object(self, full_name):
     """Return the Python object for a Python symbol name."""
-- 
GitLab


From 490e46f29dba0254fa69385d4235ab26854868c8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 13:59:02 -0700
Subject: [PATCH 0157/1357] Increase test timeout for xla_ops_test to de-flake.

PiperOrigin-RevId: 212873250
---
 tensorflow/compiler/tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index e7623582f6..2176eaebe4 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1198,7 +1198,7 @@ tf_xla_py_test(
 
 tf_xla_py_test(
     name = "xla_ops_test",
-    size = "small",
+    size = "medium",
     srcs = ["xla_ops_test.py"],
     disabled_backends = ["cpu_ondemand"],
     deps = [
-- 
GitLab


From 304faf0444260912b6996d39227417c09561c37e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 13:59:24 -0700
Subject: [PATCH 0158/1357] Remove tf.contrib.get_signature_def_by_key. This
 can be replaced by meta_graph_def.signature_def[signature_def_key]

PiperOrigin-RevId: 212873314
---
 .../predictor/saved_model_predictor.py        |  19 +-
 tensorflow/contrib/saved_model/BUILD          |  17 --
 tensorflow/contrib/saved_model/__init__.py    |   2 -
 .../python/saved_model/__init__.py            |   1 -
 .../python/saved_model/signature_def_utils.py |  42 ----
 .../saved_model/signature_def_utils_test.py   | 191 ------------------
 tensorflow/python/tools/saved_model_cli.py    |   7 +-
 7 files changed, 9 insertions(+), 270 deletions(-)
 delete mode 100644 tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py
 delete mode 100644 tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py

diff --git a/tensorflow/contrib/predictor/saved_model_predictor.py b/tensorflow/contrib/predictor/saved_model_predictor.py
index 95da6d04ed..03399396df 100644
--- a/tensorflow/contrib/predictor/saved_model_predictor.py
+++ b/tensorflow/contrib/predictor/saved_model_predictor.py
@@ -23,7 +23,6 @@ import logging
 
 from tensorflow.contrib.predictor import predictor
 from tensorflow.contrib.saved_model.python.saved_model import reader
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
 from tensorflow.python.client import session
 from tensorflow.python.framework import ops
 from tensorflow.python.saved_model import loader
@@ -68,23 +67,19 @@ def _get_signature_def(signature_def_key, export_dir, tags):
   metagraph_def = get_meta_graph_def(export_dir, tags)
 
   try:
-    signature_def = signature_def_utils.get_signature_def_by_key(
-        metagraph_def,
+    signature_def = metagraph_def.signature_def[signature_def_key]
+  except KeyError as e:
+    formatted_key = _DEFAULT_INPUT_ALTERNATIVE_FORMAT.format(
         signature_def_key)
-  except ValueError as e:
     try:
-      formatted_key = _DEFAULT_INPUT_ALTERNATIVE_FORMAT.format(
-          signature_def_key)
-      signature_def = signature_def_utils.get_signature_def_by_key(
-          metagraph_def, formatted_key)
-
-      logging.warning('Could not find signature def "%s". '
-                      'Using "%s" instead', signature_def_key, formatted_key)
-    except ValueError:
+      signature_def = metagraph_def.signature_def[formatted_key]
+    except KeyError:
       raise ValueError(
           'Got signature_def_key "{}". Available signatures are {}. '
           'Original error:\n{}'.format(
               signature_def_key, list(metagraph_def.signature_def), e))
+    logging.warning('Could not find signature def "%s". '
+                    'Using "%s" instead', signature_def_key, formatted_key)
   return signature_def
 
 
diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD
index f687b56ea3..4ca5274b2e 100644
--- a/tensorflow/contrib/saved_model/BUILD
+++ b/tensorflow/contrib/saved_model/BUILD
@@ -78,23 +78,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "signature_def_utils_test",
-    size = "small",
-    srcs = ["python/saved_model/signature_def_utils_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":saved_model_py",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python/saved_model:signature_constants",
-        "//tensorflow/python/saved_model:signature_def_utils",
-        "//tensorflow/python/saved_model:utils",
-    ],
-)
-
 py_library(
     name = "keras_saved_model",
     srcs = ["python/saved_model/keras_saved_model.py"],
diff --git a/tensorflow/contrib/saved_model/__init__.py b/tensorflow/contrib/saved_model/__init__.py
index 074dc655ac..ac95e38011 100644
--- a/tensorflow/contrib/saved_model/__init__.py
+++ b/tensorflow/contrib/saved_model/__init__.py
@@ -25,13 +25,11 @@ from __future__ import print_function
 
 # pylint: disable=unused-import,wildcard-import,line-too-long
 from tensorflow.contrib.saved_model.python.saved_model.keras_saved_model import *
-from tensorflow.contrib.saved_model.python.saved_model.signature_def_utils import *
 # pylint: enable=unused-import,wildcard-import,line-too-long
 
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
-    "get_signature_def_by_key",
     "load_keras_model",
     "save_keras_model"]
 
diff --git a/tensorflow/contrib/saved_model/python/saved_model/__init__.py b/tensorflow/contrib/saved_model/python/saved_model/__init__.py
index e3b76bb6f3..fd3dc1d7aa 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/__init__.py
+++ b/tensorflow/contrib/saved_model/python/saved_model/__init__.py
@@ -25,5 +25,4 @@ from __future__ import print_function
 
 # pylint: disable=wildcard-import
 from tensorflow.contrib.saved_model.python.saved_model import keras_saved_model
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
 # pylint: enable=wildcard-import
diff --git a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py b/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py
deleted file mode 100644
index f521647999..0000000000
--- a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SignatureDef utility functions implementation."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-def get_signature_def_by_key(meta_graph_def, signature_def_key):
-  """Utility function to get a SignatureDef protocol buffer by its key.
-
-  Args:
-    meta_graph_def: MetaGraphDef protocol buffer with the SignatureDefMap to
-      look up.
-    signature_def_key: Key of the SignatureDef protocol buffer to find in the
-      SignatureDefMap.
-
-  Returns:
-    A SignatureDef protocol buffer corresponding to the supplied key, if it
-    exists.
-
-  Raises:
-    ValueError: If no entry corresponding to the supplied key is found in the
-    SignatureDefMap of the MetaGraphDef.
-  """
-  if signature_def_key not in meta_graph_def.signature_def:
-    raise ValueError("No SignatureDef with key '%s' found in MetaGraphDef." %
-                     signature_def_key)
-  return meta_graph_def.signature_def[signature_def_key]
diff --git a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py b/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py
deleted file mode 100644
index d2e14f73e4..0000000000
--- a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for SignatureDef utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils as signature_def_contrib_utils
-from tensorflow.core.protobuf import meta_graph_pb2
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.saved_model import signature_def_utils
-from tensorflow.python.saved_model import utils
-
-
-class SignatureDefUtilsTest(test.TestCase):
-
-  def _add_to_signature_def_map(self, meta_graph_def, signature_def_map=None):
-    if signature_def_map is not None:
-      for key in signature_def_map:
-        meta_graph_def.signature_def[key].CopyFrom(signature_def_map[key])
-
-  def _check_tensor_info(self, tensor_info_map, map_key, expected_tensor_name):
-    actual_tensor_info = tensor_info_map[map_key]
-    self.assertEqual(expected_tensor_name, actual_tensor_info.name)
-
-  def testGetSignatureDefByKey(self):
-    x = array_ops.placeholder(dtypes.float32, 1, name="x")
-    x_tensor_info = utils.build_tensor_info(x)
-
-    y = array_ops.placeholder(dtypes.float32, name="y")
-    y_tensor_info = utils.build_tensor_info(y)
-
-    foo_signature_def = signature_def_utils.build_signature_def({
-        "foo-input": x_tensor_info
-    }, {"foo-output": y_tensor_info}, "foo-method-name")
-    bar_signature_def = signature_def_utils.build_signature_def({
-        "bar-input": x_tensor_info
-    }, {"bar-output": y_tensor_info}, "bar-method-name")
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(
-        meta_graph_def, {"foo": foo_signature_def,
-                         "bar": bar_signature_def})
-
-    # Look up a key that does not exist in the SignatureDefMap.
-    missing_key = "missing-key"
-    with self.assertRaisesRegexp(
-        ValueError,
-        "No SignatureDef with key '%s' found in MetaGraphDef" % missing_key):
-      signature_def_contrib_utils.get_signature_def_by_key(
-          meta_graph_def, missing_key)
-
-    # Look up the key, `foo` which exists in the SignatureDefMap.
-    foo_signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "foo")
-    self.assertTrue("foo-method-name", foo_signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(foo_signature_def.inputs))
-    self._check_tensor_info(foo_signature_def.inputs, "foo-input", "x:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(1, len(foo_signature_def.outputs))
-    self._check_tensor_info(foo_signature_def.outputs, "foo-output", "y:0")
-
-    # Look up the key, `bar` which exists in the SignatureDefMap.
-    bar_signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "bar")
-    self.assertTrue("bar-method-name", bar_signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(bar_signature_def.inputs))
-    self._check_tensor_info(bar_signature_def.inputs, "bar-input", "x:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(1, len(bar_signature_def.outputs))
-    self._check_tensor_info(bar_signature_def.outputs, "bar-output", "y:0")
-
-  def testGetSignatureDefByKeyRegression(self):
-    input1 = constant_op.constant("a", name="input-1")
-    output1 = constant_op.constant(7.2, name="output-1")
-
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(meta_graph_def, {
-        "my_regression":
-            signature_def_utils.regression_signature_def(input1, output1)
-    })
-
-    # Look up the regression signature with the key used while saving.
-    signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "my_regression")
-
-    # Check the method name to match the constants regression method name.
-    self.assertEqual(signature_constants.REGRESS_METHOD_NAME,
-                     signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(signature_def.inputs))
-    self._check_tensor_info(signature_def.inputs,
-                            signature_constants.REGRESS_INPUTS, "input-1:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(1, len(signature_def.outputs))
-    self._check_tensor_info(signature_def.outputs,
-                            signature_constants.REGRESS_OUTPUTS, "output-1:0")
-
-  def testGetSignatureDefByKeyClassification(self):
-    input1 = constant_op.constant("a", name="input-1")
-    output1 = constant_op.constant("b", name="output-1")
-    output2 = constant_op.constant(3.0, name="output-2")
-
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(meta_graph_def, {
-        "my_classification":
-            signature_def_utils.classification_signature_def(
-                input1, output1, output2)
-    })
-
-    # Look up the classification signature def with the key used while saving.
-    signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "my_classification")
-
-    # Check the method name to match the constants classification method name.
-    self.assertEqual(signature_constants.CLASSIFY_METHOD_NAME,
-                     signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(signature_def.inputs))
-    self._check_tensor_info(signature_def.inputs,
-                            signature_constants.CLASSIFY_INPUTS, "input-1:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(2, len(signature_def.outputs))
-    self._check_tensor_info(signature_def.outputs,
-                            signature_constants.CLASSIFY_OUTPUT_CLASSES,
-                            "output-1:0")
-    self._check_tensor_info(signature_def.outputs,
-                            signature_constants.CLASSIFY_OUTPUT_SCORES,
-                            "output-2:0")
-
-  def testPredictionSignatureDef(self):
-    input1 = constant_op.constant("a", name="input-1")
-    input2 = constant_op.constant("b", name="input-2")
-    output1 = constant_op.constant("c", name="output-1")
-    output2 = constant_op.constant("d", name="output-2")
-
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(meta_graph_def, {
-        "my_prediction":
-            signature_def_utils.predict_signature_def({
-                "input-1": input1,
-                "input-2": input2
-            }, {"output-1": output1,
-                "output-2": output2})
-    })
-
-    # Look up the prediction signature def with the key used while saving.
-    signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "my_prediction")
-    self.assertEqual(signature_constants.PREDICT_METHOD_NAME,
-                     signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(2, len(signature_def.inputs))
-    self._check_tensor_info(signature_def.inputs, "input-1", "input-1:0")
-    self._check_tensor_info(signature_def.inputs, "input-2", "input-2:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(2, len(signature_def.outputs))
-    self._check_tensor_info(signature_def.outputs, "output-1", "output-1:0")
-    self._check_tensor_info(signature_def.outputs, "output-2", "output-2:0")
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index c5289564fe..d8ba13d8d2 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -33,7 +33,6 @@ import numpy as np
 
 from six import integer_types
 from tensorflow.contrib.saved_model.python.saved_model import reader
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
 from tensorflow.core.example import example_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.python.client import session
@@ -97,8 +96,7 @@ def _get_inputs_tensor_info_from_meta_graph_def(meta_graph_def,
   Returns:
     A dictionary that maps input tensor keys to TensorInfos.
   """
-  return signature_def_utils.get_signature_def_by_key(meta_graph_def,
-                                                      signature_def_key).inputs
+  return meta_graph_def.signature_def[signature_def_key].inputs
 
 
 def _get_outputs_tensor_info_from_meta_graph_def(meta_graph_def,
@@ -116,8 +114,7 @@ def _get_outputs_tensor_info_from_meta_graph_def(meta_graph_def,
   Returns:
     A dictionary that maps output tensor keys to TensorInfos.
   """
-  return signature_def_utils.get_signature_def_by_key(meta_graph_def,
-                                                      signature_def_key).outputs
+  return meta_graph_def.signature_def[signature_def_key].outputs
 
 
 def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key, indent=0):
-- 
GitLab


From 4053f4d89ee9c8fdd8389c6604347449ced4fabf Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Mon, 10 Sep 2018 14:31:25 -0700
Subject: [PATCH 0159/1357] Add 1.11 release notes (#22067)

---
 RELEASE.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/RELEASE.md b/RELEASE.md
index bdc23795e5..2f26623373 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,9 +1,86 @@
+# Release 1.11.0
+
+## Major Features and Improvements
+
+* Nvidia GPU:
+  * Prebuilt binaries are now (as of TensorFlow 1.11) built against cuDNN 7.2 and TensorRT 4. See updated install guides: [Installing TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux#tensorflow_gpu_support)
+* Google Cloud TPU:
+  * Experimental tf.data integration for Keras on Google Cloud TPUs.
+  * Experimental / preview support for eager execution on Google Cloud TPUs.
+* DistributionStrategy:
+  * Add multi-GPU DistributionStrategy support in tf.keras. Users can now use `fit`, `evaluate` and `predict` to distribute their model on multiple GPUs.
+  * Add multi-worker DistributionStrategy and standalone client support in Estimator. See [README] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute) for more details.
+* Add C, C++, and Python functions for querying kernels
+
+## Breaking Changes
+
+* Keras:
+  * The default values for tf.keras `RandomUniform`, `RandomNormal`, and `TruncatedNormal` initializers have been changed to match those in external Keras.
+  * Breaking change: `model.get_config()` on a Sequential model now returns a config dictionary (consistent with other Model instances) instead of a list of configs for the underlying layers.
+
+## Bug Fixes and Other Changes
+
+* C++:
+  * Changed the signature of SessionFactory::NewSession so that it can return a meaningful error message on failure.
+* tf.data:
+  * Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`. [tf.data] Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`.
+  * `tf.data.Dataset.list_files()` raises an exception at initialization time if the argument matches no files.
+  * Renamed BigTable class to BigtableTable for clarity
+  * Document use of the Cloud Bigtable API
+  * Adding `tf.contrib.data.reduce_dataset` which can be used to reduce a dataset to a single element.
+  * Generalization of `tf.contrib.data.sliding_window_batch`.
+* INC:
+  * Runtime improvements to triangular solve.
+* `tf.contrib`:
+  * Add an `implementation` argument to `tf.keras.layers.LocallyConnected2D` and `tf.keras.layers.LocallyConnected1D`. The new mode (`implementation=2`) performs forward pass as a single dense matrix multiplication, allowing dramatic speedups in certain scenarios (but worse performance in others - see docstring). The option also allows to use `padding=same`.
+  * Add documentation clarifying the differences between tf.fill and tf.constant.
+  * Add experimental IndexedDatasets.
+  * Add selective registration target using the lite proto runtime.
+  * Add simple Tensor and DataType classes to TensorFlow Lite Java
+  * Add support for bitcasting to/from uint32 and uint64.
+  * Added a subclass of Estimator that can be created from a SavedModel (SavedModelEstimator).
+  * Adds leaf index modes as an argument.
+  * Allow a different output shape from the input in tf.contrib.image.transform.
+  * Change the state_size order of the StackedRNNCell to be natural order. To keep the existing behavior, user can add reverse_state_order=True when constructing the StackedRNNCells.
+  * Deprecate self.test_session() in favor of self.session() or self.cached_session().
+  * Directly import tensor.proto.h (the transitive import will be removed from tensor.h soon)
+  * Estimator.train() now supports tf.contrib.summary.\* summaries out of the box; each call to .train() will now create a separate tfevents file rather than re-using a shared one.
+  * Fix FTRL L2-shrinkage behavior: the gradient from the L2 shrinkage term should not end up in the accumulator.
+  * Fix toco compilation/execution on Windows
+  * GoogleZoneProvider class added to detect  which Google Cloud Engine zone tensorflow is running in.
+  * It is now safe to call any of the C API's TF_Delete\* functions on nullptr
+  * Log some errors on Android to logcat
+  * Match FakeQuant numerics in TFLite to improve accuracy of TFLite quantized inference models.
+  * Optional bucket location check for the GCS Filesystem.
+  * Performance enhancements for StringSplitOp & StringSplitV2Op.
+  * Performance improvements for regex replace operations.
+  * TFRecordWriter now raises an error if .write() fails.
+  * TPU: More helpful error messages in TPUClusterResolvers.
+  * The legacy_init_op argument to SavedModelBuilder methods for adding MetaGraphs has been deprecated. Please use the equivalent main_op argument instead. As part of this, we now explicitly check for a single main_op or legacy_init_op at the time of SavedModel building, whereas the check on main_op was previously only done at load time.
+  * The protocol used for Estimator training is now configurable in RunConfig.
+  * Triangular solve performance improvements.
+  * Unify RNN cell interface between TF and Keras. Add new get_initial_state() to Keras and TF RNN cell, which will use to replace the existing zero_state() method.
+  * Update initialization of variables in Keras.
+  * Updates to "constrained_optimization" in tensorflow/contrib.
+  * boosted trees: adding pruning mode
+  * tf.train.Checkpoint does not delete old checkpoints by default.
+  * tfdbg: Limit the total disk space occupied by dumped tensor data to 100 GBytes. Add environment variable `TFDBG_DISK_BYTES_LIMIT` to allow adjustment of this upper limit.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+Aapeli, adoda, Ag Ramesh, Amogh Mannekote, Andrew Gibiansky, Andy Craze, Anirudh Koul, Aurelien Geron, Avijit, Avijit-Nervana, Ben, Benjamin H. Myara, bhack, Brett Koonce, Cao Zongyan, cbockman, cheerss, Chikanaga Tomoyuki, Clayne Robison, cosine0, Cui Wei, Dan J, David, David Norman, Dmitry Klimenkov, Eliel Hojman, Florian Courtial, fo40225, formath, Geoffrey Irving, gracehoney, Grzegorz Pawelczak, Guoliang Hua, Guozhong Zhuang, Herman Zvonimir DošIlović, HuiyangFei, Jacker, Jan HüNnemeyer, Jason Taylor, Jason Zaman, Jesse, Jiang,Zhoulong, Jiawei Zhang, Jie, Joe Yearsley, Johannes Schmitz, Jon Perl, Jon Triebenbach, Jonathan, Jonathan Hseu, Jongmin Park, Justin Shenk, karl@kubx.ca, Kate Hodesdon, Kb Sriram, Keishi Hattori, Kenneth Blomqvist, Koan-Sin Tan, Li Liangbin, Li, Yiqiang, Loo Rong Jie, Madiyar, Mahmoud Abuzaina, Mark Ryan, Matt Dodge, mbhuiyan, melvinljy96, Miguel Mota, Nafis Sadat, Nathan Luehr, naurril, Nehal J Wani, Niall Moran, Niranjan Hasabnis, Nishidha Panpaliya, npow, olicht, Pei Zhang, Peng Wang (Simpeng), Peng Yu, Philipp Jund, Pradeep Banavara, Pratik Kalshetti, qwertWZ, Rakesh Chada, Randy West, Ray Kim, Rholais Lii, Robin Richtsfeld, Rodrigo Silveira, Ruizhi, Santosh Kumar, Seb Bro, Sergei Lebedev, sfujiwara, Shaba Abhiram, Shashi, SneakyFish5, Soila Kavulya, Stefan Dyulgerov, Steven Winston, Sunitha Kambhampati, Surry Shome, Taehoon Lee, Thor Johnsen, Tristan Rice, TShapinsky, tucan, tucan9389, Vicente Reyes, Vilmar-Hillow, Vitaly Lavrukhin, wangershi, weidan.kong, weidankong, Wen-Heng (Jack) Chung, William D. Irons, Wim Glenn, XFeiF, Yan Facai (颜发才), Yanbo Liang, Yong Tang, Yoshihiro Yamazaki, Yuan (Terry) Tang, Yuan, Man, zhaoyongke, ÁRon
+Ricardo Perez-Lopez, 张天启, 张晓飞
+
+
 # Release 1.10.1
 ## Bug Fixes and Other Changes
 
 * `tf.keras`:
   * Fixing keras on Cloud TPUs. No new binaries will be built for Windows.
 
+
 # Release 1.10.0
 
 ## Major Features And Improvements
-- 
GitLab


From d46753f993def43f6c878120e52f5dba598ceae2 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Tue, 4 Sep 2018 15:48:50 -0700
Subject: [PATCH 0160/1357] Update TF version strings (#22070)

---
 tensorflow/core/public/version.h             | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel     | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-mkl | 2 +-
 tensorflow/tools/pip_package/setup.py        | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 4129c93af5..1f71e24eeb 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,12 +19,12 @@ limitations under the License.
 // TensorFlow uses semantic versioning, see http://semver.org/.
 
 #define TF_MAJOR_VERSION 1
-#define TF_MINOR_VERSION 10
+#define TF_MINOR_VERSION 11
 #define TF_PATCH_VERSION 0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
+#define TF_VERSION_SUFFIX "-rc0"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 39e7bc8b66..c741e8ad0c 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -78,7 +78,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.10 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index e487779e7a..f544725af4 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -100,7 +100,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.10 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index 371451d2aa..db7c701289 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -3,7 +3,7 @@ FROM ubuntu:16.04
 LABEL maintainer="Clayne Robison <clayne.b.robison@intel.com>"
 
 # These parameters can be overridden by parameterized_docker_build.sh
-ARG TF_BUILD_VERSION=r1.10
+ARG TF_BUILD_VERSION=r1.11
 ARG PYTHON="python"
 ARG PYTHON3_DEV=""
 ARG WHL_DIR="/tmp/pip"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 3102239a19..8442e58f20 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.10.0'
+_VERSION = '1.11.0-rc0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 885cd2942ae7b6239146a3f51ec3d6948ac2b89e Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 13 Sep 2018 14:17:30 -0700
Subject: [PATCH 0161/1357] No segfault in GradientTape with partially unknown
 shapes.

PiperOrigin-RevId: 212876876
---
 tensorflow/python/eager/pywrap_tfe_src.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 1a8f3577b2..9f2f4e06ad 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1403,9 +1403,13 @@ class PyVSpace
     PyObject* arglist =
         Py_BuildValue("(O)", reinterpret_cast<PyObject*>(tensor));
     PyObject* result = PyEval_CallObject(num_elements_, arglist);
+    Py_DECREF(arglist);
+    if (result == nullptr) {
+      // The caller detects whether a python exception has been raised.
+      return -1;
+    }
     tensorflow::int64 r = MakeInt(result);
     Py_DECREF(result);
-    Py_DECREF(arglist);
     return r;
   }
 
-- 
GitLab


From d3458112ad5a1612ec6c77f7de4a0e0ec801e882 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 13 Sep 2018 14:18:16 -0700
Subject: [PATCH 0162/1357] Consistency in record_default shapes for
 tf.contrib.data.CsvDataset & tf.decode_csv: - Modify shape assertions so that
 both graph and eager accept rank 0 (scalar) and rank 1 tensors as
 `record_defaults`, and raise an error on other shapes. - Make tests run in
 both graph and eager modes

Fixes #22030.

PiperOrigin-RevId: 212877058
---
 .../contrib/data/kernels/csv_dataset_op.cc    |   3 +
 tensorflow/contrib/data/ops/dataset_ops.cc    |   8 +-
 .../contrib/data/python/kernel_tests/BUILD    |   3 +-
 .../kernel_tests/csv_dataset_op_test.py       | 123 +++++++++++-------
 .../api_def/base_api/api_def_DecodeCSV.pbtxt  |   3 +-
 tensorflow/core/kernels/decode_csv_op.cc      |   3 +
 tensorflow/core/ops/parsing_ops.cc            |   7 +-
 tensorflow/core/ops/parsing_ops_test.cc       |   7 +-
 tensorflow/python/kernel_tests/BUILD          |   3 +
 .../python/kernel_tests/decode_csv_op_test.py |  55 ++++++--
 tensorflow/python/ops/parsing_ops.py          |   3 +-
 11 files changed, 145 insertions(+), 73 deletions(-)

diff --git a/tensorflow/contrib/data/kernels/csv_dataset_op.cc b/tensorflow/contrib/data/kernels/csv_dataset_op.cc
index 74107d5242..21ec50fb6b 100644
--- a/tensorflow/contrib/data/kernels/csv_dataset_op.cc
+++ b/tensorflow/contrib/data/kernels/csv_dataset_op.cc
@@ -49,6 +49,9 @@ class CSVDatasetOp : public DatasetOpKernel {
     OP_REQUIRES_OK(ctx,
                    ctx->input_list("record_defaults", &record_defaults_list));
     for (int i = 0; i < record_defaults_list.size(); ++i) {
+      OP_REQUIRES(ctx, record_defaults_list[i].dims() <= 1,
+                  errors::InvalidArgument(
+                      "Each record default should be at most rank 1"));
       OP_REQUIRES(ctx, record_defaults_list[i].NumElements() < 2,
                   errors::InvalidArgument(
                       "There should only be 1 default per field but field ", i,
diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc
index ae104d55bd..ad410e17fe 100644
--- a/tensorflow/contrib/data/ops/dataset_ops.cc
+++ b/tensorflow/contrib/data/ops/dataset_ops.cc
@@ -65,7 +65,13 @@ REGISTER_OP("CSVDataset")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 1, &unused));
       // `record_defaults` must be lists of scalars
       for (size_t i = 8; i < c->num_inputs(); ++i) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &unused));
+        shape_inference::ShapeHandle v;
+        TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(i), 1, &v));
+        if (c->Rank(c->input(i)) == 1 && c->Value(c->Dim(v, 0)) > 1) {
+          return errors::InvalidArgument(
+              "Shape of a default must be a length-0 or length-1 vector, or a "
+              "scalar.");
+        }
       }
       return shape_inference::ScalarShape(c);
     });
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index b3c90ded39..ba202839b2 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -72,12 +72,13 @@ py_test(
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:platform_test",
         "//tensorflow/python:session",
         "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
index 63bffd023f..f8e74e4583 100644
--- a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
@@ -31,38 +31,49 @@ from tensorflow.contrib.data.python.ops import error_ops
 from tensorflow.contrib.data.python.ops import readers
 from tensorflow.python.client import session
 from tensorflow.python.data.ops import readers as core_readers
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import googletest
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class CsvDatasetOpTest(test.TestCase):
 
-  def _assert_datasets_equal(self, g, ds1, ds2):
+  def _get_next(self, dataset):
+    # Returns a no argument function whose result is fed to self.evaluate to
+    # yield the next element
+    it = dataset.make_one_shot_iterator()
+    if context.executing_eagerly():
+      return it.get_next
+    else:
+      get_next = it.get_next()
+      return lambda: get_next
+
+  def _assert_datasets_equal(self, ds1, ds2):
     assert ds1.output_shapes == ds2.output_shapes, ('output_shapes differ: %s, '
                                                     '%s') % (ds1.output_shapes,
                                                              ds2.output_shapes)
     assert ds1.output_types == ds2.output_types
     assert ds1.output_classes == ds2.output_classes
-    next1 = ds1.make_one_shot_iterator().get_next()
-    next2 = ds2.make_one_shot_iterator().get_next()
-    with self.session(graph=g) as sess:
-      # Run through datasets and check that outputs match, or errors match.
-      while True:
-        try:
-          op1 = sess.run(next1)
-        except (errors.OutOfRangeError, ValueError) as e:
-          # If op1 throws an exception, check that op2 throws same exception.
-          with self.assertRaises(type(e)):
-            sess.run(next2)
-          break
-        op2 = sess.run(next2)
-        self.assertAllEqual(op1, op2)
+    next1 = self._get_next(ds1)
+    next2 = self._get_next(ds2)
+    # Run through datasets and check that outputs match, or errors match.
+    while True:
+      try:
+        op1 = self.evaluate(next1())
+      except (errors.OutOfRangeError, ValueError) as e:
+        # If op1 throws an exception, check that op2 throws same exception.
+        with self.assertRaises(type(e)):
+          self.evaluate(next2())
+        break
+      op2 = self.evaluate(next2())
+      self.assertAllEqual(op1, op2)
 
   def _setup_files(self, inputs, linebreak='\n', compression_type=None):
     filenames = []
@@ -95,33 +106,32 @@ class CsvDatasetOpTest(test.TestCase):
 
   def _test_by_comparison(self, inputs, **kwargs):
     """Checks that CsvDataset is equiv to TextLineDataset->map(decode_csv)."""
-    with ops.Graph().as_default() as g:
-      dataset_actual, dataset_expected = self._make_test_datasets(
-          inputs, **kwargs)
-      self._assert_datasets_equal(g, dataset_actual, dataset_expected)
+    dataset_actual, dataset_expected = self._make_test_datasets(
+        inputs, **kwargs)
+    self._assert_datasets_equal(dataset_actual, dataset_expected)
 
   def _verify_output_or_err(self,
-                            sess,
                             dataset,
                             expected_output=None,
                             expected_err_re=None):
-    nxt = dataset.make_one_shot_iterator().get_next()
     if expected_err_re is None:
       # Verify that output is expected, without errors
+      nxt = self._get_next(dataset)
       expected_output = [[
           v.encode('utf-8') if isinstance(v, str) else v for v in op
       ] for op in expected_output]
       for value in expected_output:
-        op = sess.run(nxt)
+        op = self.evaluate(nxt())
         self.assertAllEqual(op, value)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(nxt)
+        self.evaluate(nxt())
     else:
       # Verify that OpError is produced as expected
       with self.assertRaisesOpError(expected_err_re):
+        nxt = self._get_next(dataset)
         while True:
           try:
-            sess.run(nxt)
+            self.evaluate(nxt())
           except errors.OutOfRangeError:
             break
 
@@ -137,11 +147,8 @@ class CsvDatasetOpTest(test.TestCase):
     # Convert str type because py3 tf strings are bytestrings
     filenames = self._setup_files(inputs, linebreak, compression_type)
     kwargs['compression_type'] = compression_type
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.CsvDataset(filenames, **kwargs)
-        self._verify_output_or_err(sess, dataset, expected_output,
-                                   expected_err_re)
+    dataset = readers.CsvDataset(filenames, **kwargs)
+    self._verify_output_or_err(dataset, expected_output, expected_err_re)
 
   def testCsvDataset_requiredFields(self):
     record_defaults = [[]] * 4
@@ -191,21 +198,17 @@ class CsvDatasetOpTest(test.TestCase):
     record_defaults = [['']] * 3
     inputs = [['1,"2"3",4', '1,"2"3",4",5,5', 'a,b,"c"d"', 'e,f,g']]
     filenames = self._setup_files(inputs)
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
-        dataset = dataset.apply(error_ops.ignore_errors())
-        self._verify_output_or_err(sess, dataset, [['e', 'f', 'g']])
+    dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
+    dataset = dataset.apply(error_ops.ignore_errors())
+    self._verify_output_or_err(dataset, [['e', 'f', 'g']])
 
   def testCsvDataset_ignoreErrWithUnquotedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['1,2"3,4', 'a,b,c"d', '9,8"7,6,5', 'e,f,g']]
     filenames = self._setup_files(inputs)
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
-        dataset = dataset.apply(error_ops.ignore_errors())
-        self._verify_output_or_err(sess, dataset, [['e', 'f', 'g']])
+    dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
+    dataset = dataset.apply(error_ops.ignore_errors())
+    self._verify_output_or_err(dataset, [['e', 'f', 'g']])
 
   def testCsvDataset_withNoQuoteDelimAndUnquotedQuotes(self):
     record_defaults = [['']] * 3
@@ -351,10 +354,9 @@ class CsvDatasetOpTest(test.TestCase):
     inputs = [['1,,3,4', '5,6,,8']]
     ds_actual, ds_expected = self._make_test_datasets(
         inputs, record_defaults=record_defaults)
-    with ops.Graph().as_default() as g:
-      self._assert_datasets_equal(g,
-                                  ds_actual.repeat(5).prefetch(1),
-                                  ds_expected.repeat(5).prefetch(1))
+    self._assert_datasets_equal(
+        ds_actual.repeat(5).prefetch(1),
+        ds_expected.repeat(5).prefetch(1))
 
   def testCsvDataset_withTypeDefaults(self):
     # Testing using dtypes as record_defaults for required fields
@@ -373,13 +375,11 @@ class CsvDatasetOpTest(test.TestCase):
     ]]
     file_path = self._setup_files(data)
 
-    with ops.Graph().as_default() as g:
-      ds = readers.make_csv_dataset(
-          file_path, batch_size=1, shuffle=False, num_epochs=1)
-      next_batch = ds.make_one_shot_iterator().get_next()
+    ds = readers.make_csv_dataset(
+        file_path, batch_size=1, shuffle=False, num_epochs=1)
+    nxt = self._get_next(ds)
 
-    with self.session(graph=g) as sess:
-      result = list(sess.run(next_batch).values())
+    result = list(self.evaluate(nxt()).values())
 
     self.assertEqual(result, sorted(result))
 
@@ -542,6 +542,29 @@ class CsvDatasetOpTest(test.TestCase):
         compression_type='ZLIB',
         record_defaults=record_defaults)
 
+  def testCsvDataset_withScalarDefaults(self):
+    record_defaults = [constant_op.constant(0, dtype=dtypes.int64)] * 4
+    inputs = [[',,,', '1,1,1,', ',2,2,2']]
+    self._test_dataset(
+        inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]],
+        record_defaults=record_defaults)
+
+  def testCsvDataset_with2DDefaults(self):
+    record_defaults = [constant_op.constant([[0]], dtype=dtypes.int64)] * 4
+    inputs = [[',,,', '1,1,1,', ',2,2,2']]
+
+    if context.executing_eagerly():
+      err_spec = errors.InvalidArgumentError, (
+          'Each record default should be at '
+          'most rank 1.')
+    else:
+      err_spec = ValueError, 'Shape must be at most rank 1 but is rank 2'
+
+    with self.assertRaisesWithPredicateMatch(*err_spec):
+      self._test_dataset(
+          inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]],
+          record_defaults=record_defaults)
+
 
 class CsvDatasetBenchmark(test.Benchmark):
   """Benchmarks for the various ways of creating a dataset from CSV files.
diff --git a/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt b/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt
index e39213cbc7..440800704e 100644
--- a/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt
@@ -11,7 +11,8 @@ END
     name: "record_defaults"
     description: <<END
 One tensor per column of the input record, with either a
-scalar default value for that column or empty if the column is required.
+scalar default value for that column or an empty vector if the column is
+required.
 END
   }
   out_arg {
diff --git a/tensorflow/core/kernels/decode_csv_op.cc b/tensorflow/core/kernels/decode_csv_op.cc
index 3eed847c16..6bfb5bd5bc 100644
--- a/tensorflow/core/kernels/decode_csv_op.cc
+++ b/tensorflow/core/kernels/decode_csv_op.cc
@@ -61,6 +61,9 @@ class DecodeCSVOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->input_list("record_defaults", &record_defaults));
 
     for (int i = 0; i < record_defaults.size(); ++i) {
+      OP_REQUIRES(ctx, record_defaults[i].dims() <= 1,
+                  errors::InvalidArgument(
+                      "Each record default should be at most rank 1"));
       OP_REQUIRES(ctx, record_defaults[i].NumElements() < 2,
                   errors::InvalidArgument(
                       "There should only be 1 default per field but field ", i,
diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index 79ca96d249..eff453241d 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -343,10 +343,11 @@ REGISTER_OP("DecodeCSV")
       // Validate the record_defaults inputs.
       for (int i = 1; i < c->num_inputs(); ++i) {
         ShapeHandle v;
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &v));
-        if (c->Value(c->Dim(v, 0)) > 1) {
+        TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(i), 1, &v));
+        if (c->Rank(c->input(i)) == 1 && c->Value(c->Dim(v, 0)) > 1) {
           return errors::InvalidArgument(
-              "Shape of a default must be a length-0 or length-1 vector");
+              "Shape of a default must be a length-0 or length-1 vector, or a "
+              "scalar.");
         }
       }
 
diff --git a/tensorflow/core/ops/parsing_ops_test.cc b/tensorflow/core/ops/parsing_ops_test.cc
index c65e66d1a8..ba594e400c 100644
--- a/tensorflow/core/ops/parsing_ops_test.cc
+++ b/tensorflow/core/ops/parsing_ops_test.cc
@@ -52,9 +52,12 @@ TEST(ParsingOpsTest, DecodeCSV_ShapeFn) {
   INFER_OK(op, "[1,2,?,4];?;?", "in0;in0");
   INFER_OK(op, "[1,2,?,4];[?];[?]", "in0;in0");
 
+  // Scalar defaults are ok
+  INFER_OK(op, "?;?;[]", "in0;in0");
+
   // Check errors in the record_defaults inputs.
-  INFER_ERROR("must be rank 1", op, "?;?;[]");
-  INFER_ERROR("must be rank 1", op, "?;[];?");
+  INFER_ERROR("must be at most rank 1 but is rank 2", op, "?;?;[1,2]");
+  INFER_ERROR("must be at most rank 1 but is rank 2", op, "?;[3,4];?");
   INFER_ERROR("Shape of a default must be", op, "?;?;[2]");
   INFER_ERROR("Shape of a default must be", op, "?;[2];?");
 }
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index da21ee3043..6bba99b9e7 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -286,7 +286,10 @@ tf_py_test(
     srcs = ["decode_csv_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
+        "//tensorflow/python/eager:context",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:parsing_ops",
     ],
 )
diff --git a/tensorflow/python/kernel_tests/decode_csv_op_test.py b/tensorflow/python/kernel_tests/decode_csv_op_test.py
index 40b17a11f8..e9307a6b2f 100644
--- a/tensorflow/python/kernel_tests/decode_csv_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_csv_op_test.py
@@ -20,28 +20,30 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.eager import context
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class DecodeCSVOpTest(test.TestCase):
 
   def _test(self, args, expected_out=None, expected_err_re=None):
-    with self.cached_session() as sess:
+    if expected_err_re is None:
       decode = parsing_ops.decode_csv(**args)
-
-      if expected_err_re is None:
-        out = sess.run(decode)
-
-        for i, field in enumerate(out):
-          if field.dtype == np.float32 or field.dtype == np.float64:
-            self.assertAllClose(field, expected_out[i])
-          else:
-            self.assertAllEqual(field, expected_out[i])
-
-      else:
-        with self.assertRaisesOpError(expected_err_re):
-          sess.run(decode)
+      out = self.evaluate(decode)
+
+      for i, field in enumerate(out):
+        if field.dtype == np.float32 or field.dtype == np.float64:
+          self.assertAllClose(field, expected_out[i])
+        else:
+          self.assertAllEqual(field, expected_out[i])
+    else:
+      with self.assertRaisesOpError(expected_err_re):
+        decode = parsing_ops.decode_csv(**args)
+        self.evaluate(decode)
 
   def testSimple(self):
     args = {
@@ -53,6 +55,31 @@ class DecodeCSVOpTest(test.TestCase):
 
     self._test(args, expected_out)
 
+  def testSimpleWithScalarDefaults(self):
+    args = {
+        "records": ["1,4", "2,5", "3,6"],
+        "record_defaults": [1, 2],
+    }
+
+    expected_out = [[1, 2, 3], [4, 5, 6]]
+
+    self._test(args, expected_out)
+
+  def testSimpleWith2DDefaults(self):
+    args = {
+        "records": ["1", "2", "3"],
+        "record_defaults": [[[0]]],
+    }
+
+    if context.executing_eagerly():
+      err_spec = errors.InvalidArgumentError, (
+          "Each record default should be at "
+          "most rank 1.")
+    else:
+      err_spec = ValueError, "Shape must be at most rank 1 but is rank 2"
+    with self.assertRaisesWithPredicateMatch(*err_spec):
+      self._test(args)
+
   def testSimpleNoQuoteDelimiter(self):
     args = {
         "records": ["1", "2", '"3"'],
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index 8224097ac4..bb8da3162a 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -1584,7 +1584,8 @@ def decode_csv(records,
     record_defaults: A list of `Tensor` objects with specific types.
       Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
       One tensor per column of the input record, with either a
-      scalar default value for that column or empty if the column is required.
+      scalar default value for that column or an empty vector if the column is
+      required.
     field_delim: An optional `string`. Defaults to `","`.
       char delimiter to separate fields in a record.
     use_quote_delim: An optional `bool`. Defaults to `True`.
-- 
GitLab


From 4999d856d2953aee56fa9759f995038edf3ff566 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 14:31:18 -0700
Subject: [PATCH 0163/1357] Expose tf.contrib.checkpoint.PythonStateWrapper.

This makes it possible to checkpoint arbitrary python state if it can be
serialized to a string.

Also updates NumpyState to accept np.int32, np.int64, np.float32, np.float64
types.

PiperOrigin-RevId: 212879609
---
 tensorflow/contrib/checkpoint/__init__.py     |  2 +
 .../contrib/checkpoint/python/python_state.py | 40 +++++++++++++------
 .../checkpoint/python/python_state_test.py    |  5 +++
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/checkpoint/__init__.py b/tensorflow/contrib/checkpoint/__init__.py
index 150d734db6..94b7f4f867 100644
--- a/tensorflow/contrib/checkpoint/__init__.py
+++ b/tensorflow/contrib/checkpoint/__init__.py
@@ -37,6 +37,7 @@ Checkpoint management:
 
 Saving and restoring Python state:
 @@NumpyState
+@@PythonStateWrapper
 """
 
 from __future__ import absolute_import
@@ -45,6 +46,7 @@ from __future__ import print_function
 
 from tensorflow.contrib.checkpoint.python.containers import UniqueNameTracker
 from tensorflow.contrib.checkpoint.python.python_state import NumpyState
+from tensorflow.contrib.checkpoint.python.python_state import PythonStateWrapper
 from tensorflow.contrib.checkpoint.python.split_dependency import split_dependency
 from tensorflow.contrib.checkpoint.python.visualize import dot_graph_from_checkpoint
 from tensorflow.core.protobuf.checkpointable_object_graph_pb2 import CheckpointableObjectGraph
diff --git a/tensorflow/contrib/checkpoint/python/python_state.py b/tensorflow/contrib/checkpoint/python/python_state.py
index 9b11035b6d..302d5cfb79 100644
--- a/tensorflow/contrib/checkpoint/python/python_state.py
+++ b/tensorflow/contrib/checkpoint/python/python_state.py
@@ -17,7 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import abc
 import functools
+import six
 
 import numpy
 
@@ -101,7 +103,7 @@ class NumpyState(base.CheckpointableBase):
     # TODO(allenl): Consider supporting lists/tuples, either ad-hoc or by making
     # ndarrays checkpointable natively and using standard checkpointable list
     # tracking.
-    if isinstance(value, numpy.ndarray):
+    if isinstance(value, (numpy.ndarray, numpy.generic)):
       try:
         existing = super(NumpyState, self).__getattribute__(name)
         existing.array = value
@@ -127,7 +129,29 @@ class NumpyState(base.CheckpointableBase):
     super(NumpyState, self).__setattr__(name, value)
 
 
-class _NumpyWrapper(base.CheckpointableBase):
+@six.add_metaclass(abc.ABCMeta)
+class PythonStateWrapper(base.CheckpointableBase):
+  """Wraps a Python object for storage in an object-based checkpoint."""
+
+  @abc.abstractmethod
+  def _serialize(self):
+    """Callback for `PythonStringStateSaveable` to serialize the object."""
+
+  @abc.abstractmethod
+  def _deserialize(self, string_value):
+    """Callback for `PythonStringStateSaveable` to deserialize the object."""
+
+  def _gather_saveables_for_checkpoint(self):
+    """Specify callbacks for saving and restoring `array`."""
+    return {
+        "py_state": functools.partial(
+            base.PythonStringStateSaveable,
+            state_callback=self._serialize,
+            restore_callback=self._deserialize)
+        }
+
+
+class _NumpyWrapper(PythonStateWrapper):
   """Wraps a NumPy array for storage in an object-based checkpoint."""
 
   def __init__(self, array):
@@ -139,7 +163,7 @@ class _NumpyWrapper(base.CheckpointableBase):
     self.array = array
 
   def _serialize(self):
-    """Callback for `PythonStringStateSaveable` to serialize the array."""
+    """Callback to serialize the array."""
     string_file = BytesIO()
     try:
       numpy.save(string_file, self.array, allow_pickle=False)
@@ -149,18 +173,10 @@ class _NumpyWrapper(base.CheckpointableBase):
     return serialized
 
   def _deserialize(self, string_value):
-    """Callback for `PythonStringStateSaveable` to deserialize the array."""
+    """Callback to deserialize the array."""
     string_file = BytesIO(string_value)
     try:
       self.array = numpy.load(string_file, allow_pickle=False)
     finally:
       string_file.close()
 
-  def _gather_saveables_for_checkpoint(self):
-    """Specify callbacks for saving and restoring `array`."""
-    return {
-        "array": functools.partial(
-            base.PythonStringStateSaveable,
-            state_callback=self._serialize,
-            restore_callback=self._deserialize)
-        }
diff --git a/tensorflow/contrib/checkpoint/python/python_state_test.py b/tensorflow/contrib/checkpoint/python/python_state_test.py
index 0439a4755e..45494351ff 100644
--- a/tensorflow/contrib/checkpoint/python/python_state_test.py
+++ b/tensorflow/contrib/checkpoint/python/python_state_test.py
@@ -40,10 +40,13 @@ class NumpyStateTests(test.TestCase):
     save_state.a = numpy.ones([2, 2])
     save_state.b = numpy.ones([2, 2])
     save_state.b = numpy.zeros([2, 2])
+    save_state.c = numpy.int64(3)
     self.assertAllEqual(numpy.ones([2, 2]), save_state.a)
     self.assertAllEqual(numpy.zeros([2, 2]), save_state.b)
+    self.assertEqual(3, save_state.c)
     first_save_path = saver.save(prefix)
     save_state.a[1, 1] = 2.
+    save_state.c = numpy.int64(4)
     second_save_path = saver.save(prefix)
 
     load_state = python_state.NumpyState()
@@ -51,6 +54,7 @@ class NumpyStateTests(test.TestCase):
     loader.restore(first_save_path).initialize_or_restore()
     self.assertAllEqual(numpy.ones([2, 2]), load_state.a)
     self.assertAllEqual(numpy.zeros([2, 2]), load_state.b)
+    self.assertEqual(3, load_state.c)
     load_state.a[0, 0] = 42.
     self.assertAllEqual([[42., 1.], [1., 1.]], load_state.a)
     loader.restore(first_save_path).run_restore_ops()
@@ -58,6 +62,7 @@ class NumpyStateTests(test.TestCase):
     loader.restore(second_save_path).run_restore_ops()
     self.assertAllEqual([[1., 1.], [1., 2.]], load_state.a)
     self.assertAllEqual(numpy.zeros([2, 2]), load_state.b)
+    self.assertEqual(4, load_state.c)
 
   def testNoGraphPollution(self):
     graph = ops.Graph()
-- 
GitLab


From 25d8c732dcf7fa82d086c5da46408838fa0f04f1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 14:53:31 -0700
Subject: [PATCH 0164/1357] Add ability to skip serializing selected tensors in
 interpreter serializer.

PiperOrigin-RevId: 212883697
---
 .../contrib/lite/experimental/writer/writer_lib.cc | 14 ++++++++++----
 .../contrib/lite/experimental/writer/writer_lib.h  |  7 ++++++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/writer/writer_lib.cc b/tensorflow/contrib/lite/experimental/writer/writer_lib.cc
index 52b17faf82..555a9cc4b0 100644
--- a/tensorflow/contrib/lite/experimental/writer/writer_lib.cc
+++ b/tensorflow/contrib/lite/experimental/writer/writer_lib.cc
@@ -117,6 +117,8 @@ Offset<Vector<Offset<Operator>>> InterpreterWriter::ExportOperators(
 
 Offset<Vector<Offset<Tensor>>> InterpreterWriter::ExportTensors(
     FlatBufferBuilder* fbb) {
+  // Initialized to -1.
+  // A value of -1 means this tensor will not be exported.
   tensor_to_written_tensor_.resize(interpreter_->tensors_size(), -1);
 
   std::vector<Offset<Tensor>> tensors;
@@ -135,15 +137,17 @@ Offset<Vector<Offset<Tensor>>> InterpreterWriter::ExportTensors(
   int curr_output_index = 0;
   for (int tensor_index = 0; tensor_index < interpreter_->tensors_size();
        tensor_index++) {
-    if (!tensor_is_temporary[tensor_index]) {
+    // Temporary tensors and unused tensors will not be written.
+    if (!tensor_is_temporary[tensor_index] &&
+        unused_tensors_.find(tensor_index) == unused_tensors_.end()) {
       tensor_to_written_tensor_[tensor_index] = curr_output_index++;
     }
   }
 
   for (int tensor_index = 0; tensor_index < interpreter_->tensors_size();
        ++tensor_index) {
-    // Skip temporaries.
-    if (tensor_is_temporary[tensor_index]) continue;
+    // Tensor not exported.
+    if (tensor_to_written_tensor_[tensor_index] == -1) continue;
 
     if (TfLiteTensor* tensor = interpreter_->tensor(tensor_index)) {
       // We only need to convert non temporaries
@@ -215,7 +219,9 @@ std::vector<int> InterpreterWriter::RemapTensorIndicesToWritten(
   std::vector<int> output;
   output.reserve(input.size());
   for (int x : input) {
-    output.push_back(tensor_to_written_tensor_[x]);
+    if (tensor_to_written_tensor_[x] != -1) {
+      output.push_back(tensor_to_written_tensor_[x]);
+    }
   }
   return output;
 }
diff --git a/tensorflow/contrib/lite/experimental/writer/writer_lib.h b/tensorflow/contrib/lite/experimental/writer/writer_lib.h
index a98108b496..a5f14697cf 100644
--- a/tensorflow/contrib/lite/experimental/writer/writer_lib.h
+++ b/tensorflow/contrib/lite/experimental/writer/writer_lib.h
@@ -62,6 +62,10 @@ class InterpreterWriter {
   // caller to change the custom data.
   TfLiteStatus RegisterCustomWriter(const std::string& custom_name,
                                     CustomWriter custom_writer);
+  // Tensors that are unused and shouldn't be written.
+  void SetUnusedTensors(const std::set<int>& unused_tensors) {
+    unused_tensors_ = unused_tensors;
+  }
 
  private:
   template <class T>
@@ -111,8 +115,9 @@ class InterpreterWriter {
     int builtin;
     std::string custom;
   };
+  std::set<int> unused_tensors_;
   // For every tensor index in the interpreter, the index in the written.
-  // This is different due to temporary tensors not being written.
+  // This is different due to temporary and unused tensors not being written.
   std::vector<int> tensor_to_written_tensor_;
   // List of used opcodes
   std::vector<OpCode> opcodes_;
-- 
GitLab


From 51d72a7d7f74784b68916819edd04e890b36f957 Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" <muksiukei@gmail.com>
Date: Fri, 14 Sep 2018 05:59:05 +0800
Subject: [PATCH 0165/1357] Modified "_check_is_tensor_or_operation" to check
 if "x" is "tensor_like"

---
 tensorflow/python/estimator/model_fn.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 439cc2e3a4..331a9d1a05 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -26,6 +26,7 @@ import six
 from tensorflow.python.estimator.export import export_output as export_output_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras.metrics import Metric
 from tensorflow.python.ops import array_ops
 from tensorflow.python.saved_model import signature_constants
@@ -466,7 +467,7 @@ class _TPUEstimatorSpec(
 
 
 def _check_is_tensor_or_operation(x, name):
-  if not (isinstance(x, ops.Operation) or isinstance(x, ops.Tensor)):
+  if not (isinstance(x, ops.Operation) or tensor_util.is_tensor(x)):
     raise TypeError('{} must be Operation or Tensor, given: {}'.format(name, x))
 
 
-- 
GitLab


From e8af4e1bb9496c111530e88263fb1b8dac8bdde9 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 13 Sep 2018 14:59:51 -0700
Subject: [PATCH 0166/1357] Convert "post training quant" tutorial to a
 notebook.

PiperOrigin-RevId: 212884746
---
 .../lite/tutorials/post_training_quant.ipynb  | 702 ++++++++++++++++++
 1 file changed, 702 insertions(+)
 create mode 100644 tensorflow/contrib/lite/tutorials/post_training_quant.ipynb

diff --git a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
new file mode 100644
index 0000000000..a96e2c4e1b
--- /dev/null
+++ b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
@@ -0,0 +1,702 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "6Y8E0lw5eYWm"
+      },
+      "source": [
+        "# Post Training Quantization"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "CIGrZZPTZVeO"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "BTC1rDAuei_1"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "[TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) now supports\n",
+        "converting weights to 8 bit precision as part of model conversion from\n",
+        "tensorflow graphdefs to TFLite's flat buffer format. Weight quantization\n",
+        "achieves a 4x reduction in the model size. In addition, TFLite supports on the\n",
+        "fly quantization and dequantization of activations to allow for:\n",
+        "\n",
+        "1.  Using quantized kernels for faster implementation when available.\n",
+        "\n",
+        "2.  Mixing of floating-point kernels with quantized kernels for different parts\n",
+        "    of the graph.\n",
+        "\n",
+        "Note that the activations are always stored in floating point. For ops that\n",
+        "support quantized kernels, the activations are quantized to 8 bits of precision\n",
+        "dynamically prior to processing and are de-quantized to float precision after\n",
+        "processing. Depending on the model being converted, this can give a speedup over\n",
+        "pure floating point computation.\n",
+        "\n",
+        "In contrast to\n",
+        "[quantization aware training](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/quantize)\n",
+        ", the weights are quantized post training and the activations are quantized dynamically \n",
+        "at inference in this method.\n",
+        "Therefore, the model weights are not retrained to compensate for quantization\n",
+        "induced errors. It is important to check the accuracy of the quantized model to\n",
+        "ensure that the degradation is acceptable.\n",
+        "\n",
+        "In this tutorial, we train an MNIST model from scratch, check its accuracy in\n",
+        "tensorflow and then convert the saved model into a Tensorflow Lite flatbuffer\n",
+        "with weight quantization. We finally check the\n",
+        "accuracy of the converted model and compare it to the original saved model. We\n",
+        "run the training script mnist.py from\n",
+        "[Tensorflow official mnist tutorial](https://github.com/tensorflow/models/tree/master/official/mnist).\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "2XsEP17Zelz9"
+      },
+      "source": [
+        "## Building an MNIST model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "dDqqUIZjZjac"
+      },
+      "source": [
+        "### Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "gyqAw1M9lyab"
+      },
+      "outputs": [],
+      "source": [
+        "! pip uninstall -y tensorflow\n",
+        "! pip install -U tf-nightly"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "WsN6s5L1ieNl"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "tf.enable_eager_execution()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "00U0taBoe-w7"
+      },
+      "outputs": [],
+      "source": [
+        "! git clone --depth 1 https://github.com/tensorflow/models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "4XZPtSh-fUOc"
+      },
+      "outputs": [],
+      "source": [
+        "import sys\n",
+        "import os\n",
+        "\n",
+        "if sys.version_info.major \u003e= 3:\n",
+        "    import pathlib\n",
+        "else:\n",
+        "    import pathlib2 as pathlib\n",
+        "\n",
+        "# Add `models` to the python path.\n",
+        "models_path = os.path.join(os.getcwd(), \"models\")\n",
+        "sys.path.append(models_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "eQ6Q0qqKZogR"
+      },
+      "source": [
+        "### Train and export the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "eMsw_6HujaqM"
+      },
+      "outputs": [],
+      "source": [
+        "saved_models_root = \"/tmp/mnist_saved_model\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "hWSAjQWagIHl"
+      },
+      "outputs": [],
+      "source": [
+        "# The above path addition is not visible to subprocesses, add the path for the subprocess as well.\n",
+        "# Note: channels_last is required here or the conversion may fail. \n",
+        "!PYTHONPATH={models_path} python models/official/mnist/mnist.py --train_epochs=1 --export_dir {saved_models_root} --data_format=channels_last"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "5NMaNZQCkW9X"
+      },
+      "source": [
+        "For the example, we only trained the model for a single epoch, so it only trains to ~96% accuracy.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "xl8_fzVAZwOh"
+      },
+      "source": [
+        "### Convert to a TFLite model\n",
+        "\n",
+        "The `savedmodel` directory is named with a timestamp. Select the most recent one: "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Xp5oClaZkbtn"
+      },
+      "outputs": [],
+      "source": [
+        "saved_model_dir = str(sorted(pathlib.Path(saved_models_root).glob(\"*\"))[-1])\n",
+        "saved_model_dir"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "AT8BgkKmljOy"
+      },
+      "source": [
+        "Using the python `TocoConverter`, the saved model can be converted into a TFLite model.\n",
+        "\n",
+        "First load the model using the `TocoConverter`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "_i8B2nDZmAgQ"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "tf.enable_eager_execution()\n",
+        "converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir)\n",
+        "tflite_model = converter.convert()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "F2o2ZfF0aiCx"
+      },
+      "source": [
+        "Write it out to a tflite file:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "vptWZq2xnclo"
+      },
+      "outputs": [],
+      "source": [
+        "tflite_models_dir = pathlib.Path(\"/tmp/mnist_tflite_models/\")\n",
+        "tflite_models_dir.mkdir(exist_ok=True, parents=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Ie9pQaQrn5ue"
+      },
+      "outputs": [],
+      "source": [
+        "tflite_model_file = tflite_models_dir/\"mnist_model.tflite\"\n",
+        "tflite_model_file.write_bytes(tflite_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7BONhYtYocQY"
+      },
+      "source": [
+        "To quantize the model on export, set the `post_training_quantize` flag:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "g8PUvLWDlmmz"
+      },
+      "outputs": [],
+      "source": [
+        "# Note: If you don't have a recent tf-nightly installed, the\n",
+        "# \"post_training_quantize\" line will have no effect.\n",
+        "tf.logging.set_verbosity(tf.logging.INFO)\n",
+        "converter.post_training_quantize = True\n",
+        "tflite_quant_model = converter.convert()\n",
+        "tflite_model_quant_file = tflite_models_dir/\"mnist_model_quant.tflite\"\n",
+        "tflite_model_quant_file.write_bytes(tflite_quant_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "PhMmUTl4sbkz"
+      },
+      "source": [
+        "Note how the resulting file, with `post_training_quantize` set, is approximately `1/4` the size."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "JExfcfLDscu4"
+      },
+      "outputs": [],
+      "source": [
+        "!ls -lh {tflite_models_dir}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "L8lQHMp_asCq"
+      },
+      "source": [
+        "## Run the TFLite models"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "-5l6-ciItvX6"
+      },
+      "source": [
+        "We can run the TensorFlow Lite model using the python TensorFlow Lite\n",
+        "Interpreter. \n",
+        "\n",
+        "### load the test data\n",
+        "\n",
+        "First let's load the mnist test data to feed to it:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "eTIuU07NuKFL"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()\n",
+        "images, labels = tf.to_float(mnist_test[0])/255.0, mnist_test[1]\n",
+        "\n",
+        "# Note: If you change the batch size, then use \n",
+        "# `tf.contrib.lite.Interpreter.resize_tensor_input` to also change it for\n",
+        "# the interpreter.\n",
+        "mnist_ds = tf.data.Dataset.from_tensor_slices((images, labels)).batch(1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Ap_jE7QRvhPf"
+      },
+      "source": [
+        "### Load the model into an interpreter"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Jn16Rc23zTss"
+      },
+      "outputs": [],
+      "source": [
+        "interpreter = tf.contrib.lite.Interpreter(model_path=str(tflite_model_file))\n",
+        "interpreter.allocate_tensors()\n",
+        "input_index = interpreter.get_input_details()[0][\"index\"]\n",
+        "output_index = interpreter.get_output_details()[0][\"index\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "J8Pztk1mvNVL"
+      },
+      "outputs": [],
+      "source": [
+        "tf.logging.set_verbosity(tf.logging.DEBUG)\n",
+        "interpreter_quant = tf.contrib.lite.Interpreter(model_path=str(tflite_model_quant_file))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Afl6yGvWyqAr"
+      },
+      "outputs": [],
+      "source": [
+        "interpreter_quant.allocate_tensors()\n",
+        "input_index = interpreter_quant.get_input_details()[0][\"index\"]\n",
+        "output_index = interpreter_quant.get_output_details()[0][\"index\"]\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "2opUt_JTdyEu"
+      },
+      "source": [
+        "### Test the model on one image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "AKslvo2kwWac"
+      },
+      "outputs": [],
+      "source": [
+        "for img, label in mnist_ds.take(1):\n",
+        "  break\n",
+        "\n",
+        "interpreter.set_tensor(input_index, img)\n",
+        "interpreter.invoke()\n",
+        "predictions = interpreter.get_tensor(output_index)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "XZClM2vo3_bm"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pylab as plt\n",
+        "\n",
+        "plt.imshow(img[0])\n",
+        "template = \"True:{true}, predicted:{predict}\"\n",
+        "_ = plt.title(template.format(true= str(label[0].numpy()),\n",
+        "                              predict=str(predictions[0,0])))\n",
+        "plt.grid(False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "LwN7uIdCd8Gw"
+      },
+      "source": [
+        "### Evaluate the models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "05aeAuWjvjPx"
+      },
+      "outputs": [],
+      "source": [
+        "def eval_model(interpreter, mnist_ds):\n",
+        "  total_seen = 0\n",
+        "  num_correct = 0\n",
+        "\n",
+        "  for img, label in mnist_ds:\n",
+        "    total_seen += 1\n",
+        "    interpreter.set_tensor(input_index, img)\n",
+        "    interpreter.invoke()\n",
+        "    predictions = interpreter.get_tensor(output_index)\n",
+        "    if predictions == label.numpy():\n",
+        "      num_correct += 1\n",
+        "\n",
+        "    if total_seen % 500 == 0:\n",
+        "        print(\"Accuracy after %i images: %f\" %\n",
+        "              (total_seen, float(num_correct) / float(total_seen)))\n",
+        "\n",
+        "  return float(num_correct) / float(total_seen)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DqXBnDfJ7qxL"
+      },
+      "outputs": [],
+      "source": [
+        "print(eval_model(interpreter_quant, mnist_ds))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Km3cY9ry8ZlG"
+      },
+      "source": [
+        "We can repeat the evaluation on the weight quantized model to obtain:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "-9cnwiPp6EGm"
+      },
+      "outputs": [],
+      "source": [
+        "print(eval_model(interpreter_quant, mnist_ds))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "L7lfxkor8pgv"
+      },
+      "source": [
+        "\n",
+        "In this example, we have compressed model with no difference in the accuracy."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "M0o1FtmWeKZm"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "## Optimizing an existing model\n",
+        "\n",
+        "We now consider another example. Resnets with pre-activation layers (Resnet-v2) are widely used for vision applications.\n",
+        "  Pre-trained frozen graph for resnet-v2-101 is available at the\n",
+        "  [Tensorflow Lite model repository](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/models.md).\n",
+        "\n",
+        "We can convert the frozen graph to a TFLite flatbuffer with quantization by:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "v5p5VcNPjILQ"
+      },
+      "outputs": [],
+      "source": [
+        "archive_path = tf.keras.utils.get_file(\"resnet_v2_101.tgz\", \"https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz\", extract=True)\n",
+        "archive_path = pathlib.Path(archive_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "-sxnXQuC4ThD"
+      },
+      "source": [
+        "The `info.txt` file lists the input and output names. You can also find them using TensorBoard to visually inspect the graph."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "g_Q_OMEJ4LIc"
+      },
+      "outputs": [],
+      "source": [
+        "! cat {archive_path}/resnet_v2_101_299_info.txt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "ujCAFhqm-C6H"
+      },
+      "outputs": [],
+      "source": [
+        "graph_def_file = pathlib.Path(archive_path).parent/\"resnet_v2_101_299_frozen.pb\"\n",
+        "input_arrays = [\"input\"] \n",
+        "output_arrays = [\"output\"]\n",
+        "converter = tf.contrib.lite.TocoConverter.from_frozen_graph(\n",
+        "  str(graph_def_file), input_arrays, output_arrays, input_shapes={\"input\":[1,299,299,3]})\n",
+        "converter.post_training_quantize = True\n",
+        "resnet_tflite_file = graph_def_file.parent/\"resnet_v2_101_quantized.tflite\"\n",
+        "resnet_tflite_file.write_bytes(converter.convert())\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "vhOjeg1x9Knp"
+      },
+      "outputs": [],
+      "source": [
+        "archive_dir = str(archive_path.parent)\n",
+        "!ls -lh {archive_dir}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "qqHLaqFMCjRZ"
+      },
+      "source": [
+        "\n",
+        "The model size reduces from 171 MB to 43 MB.\n",
+        "The accuracy of this model on imagenet can be evaluated using the scripts provided for [TFLite accuracy measurement](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/accuracy/ilsvrc).\n",
+        "\n",
+        "The optimized model top-1 accuracy is 76.8, the same as the floating point model."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "post-training-quant.ipynb",
+      "private_outputs": true,
+      "provenance": [],
+      "toc_visible": true,
+      "version": "0.3.2"
+    },
+    "kernelspec": {
+      "display_name": "Python 2",
+      "name": "python2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
-- 
GitLab


From fb50c8e9a3cb2ccfac9cf4a847d5841cba80b524 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Thu, 13 Sep 2018 15:01:08 -0700
Subject: [PATCH 0167/1357] Dilated Depthwise Conv reference implementations.

PiperOrigin-RevId: 212884951
---
 tensorflow/contrib/lite/c/builtin_op_data.h   |   7 ++
 .../lite/core/api/flatbuffer_conversions.cc   |   3 +
 .../contrib/lite/kernels/depthwise_conv.cc    |  61 ++++++---
 .../lite/kernels/depthwise_conv_test.cc       | 116 +++++++++++++++++-
 .../internal/optimized/depthwiseconv_float.h  |  20 +++
 .../internal/optimized/depthwiseconv_uint8.h  |  24 ++++
 .../internal/reference/depthwiseconv_float.h  |  24 +++-
 .../internal/reference/depthwiseconv_uint8.h  |  28 ++++-
 tensorflow/contrib/lite/schema/schema.fbs     |   4 +
 .../contrib/lite/schema/schema_generated.h    |  38 +++++-
 .../contrib/lite/testing/generate_examples.py |   2 +
 tensorflow/contrib/lite/toco/model.h          |   5 +
 .../contrib/lite/toco/tflite/operator.cc      |  14 ++-
 13 files changed, 314 insertions(+), 32 deletions(-)

diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h
index fa43e6a024..be9d551ee4 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data.h
+++ b/tensorflow/contrib/lite/c/builtin_op_data.h
@@ -25,6 +25,9 @@ extern "C" {
 
 // TODO(aselle): Consider using "if this then that" for testing.
 
+// IMPORTANT: All new members of structs must be added at the end to ensure
+// backwards compatibility.
+
 // Possible padding types (for convolutions)
 typedef enum {
   kTfLitePaddingUnknown = 0,
@@ -71,11 +74,15 @@ typedef struct {
 } TfLitePoolParams;
 
 typedef struct {
+  // Parameters for DepthwiseConv version 1 or above.
   TfLitePadding padding;
   int stride_width;
   int stride_height;
   int depth_multiplier;
   TfLiteFusedActivation activation;
+  // Parameters for DepthwiseConv version 2 or above.
+  int dilation_width_factor;
+  int dilation_height_factor;
 } TfLiteDepthwiseConvParams;
 
 typedef struct {
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index eef4b6d831..f4d2839b1b 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -216,6 +216,9 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
         params->depth_multiplier = conv_params->depth_multiplier();
         params->activation =
             parse_activation(conv_params->fused_activation_function());
+
+        params->dilation_width_factor = conv_params->dilation_w_factor();
+        params->dilation_height_factor = conv_params->dilation_h_factor();
       }
       *builtin_data = reinterpret_cast<void*>(params);
       break;
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
index 347515f289..3e1ce60113 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
@@ -126,23 +126,28 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   // Matching GetWindowedOutputSize in TensorFlow.
   auto padding = params->padding;
-  auto compute_out_size = [padding](int imageSize, int filterSize,
-                                    int stride) -> int {
+  auto compute_out_size = [padding](int image_size, int filter_size, int stride,
+                                    int dilation_rate) -> int {
+    int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
     return padding == kTfLitePaddingSame
-               ? (imageSize + stride - 1) / stride
+               ? (image_size + stride - 1) / stride
                : padding == kTfLitePaddingValid
-                     ? (imageSize - filterSize + stride) / stride
+                     ? (image_size - effective_filter_size + stride) / stride
                      : 0;
   };
 
-  int out_width = compute_out_size(width, filter_width, params->stride_width);
+  int out_width = compute_out_size(width, filter_width, params->stride_width,
+                                   params->dilation_width_factor);
   int out_height =
-      compute_out_size(height, filter_height, params->stride_height);
+      compute_out_size(height, filter_height, params->stride_height,
+                       params->dilation_height_factor);
 
-  data->padding.height = ComputePadding(params->stride_height, 1, height,
-                                        filter_height, out_height);
+  data->padding.height =
+      ComputePadding(params->stride_height, params->dilation_height_factor,
+                     height, filter_height, out_height);
   data->padding.width =
-      ComputePadding(params->stride_width, 1, width, filter_width, out_width);
+      ComputePadding(params->stride_width, params->dilation_width_factor, width,
+                     filter_width, out_width);
 
   // Note that quantized inference requires that all tensors have their
   // parameters set. This is usually done during quantized training.
@@ -177,8 +182,19 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
 
   void (*depthwise_conv)(const float*, const Dims<4>&, const float*,
                          const Dims<4>&, const float*, const Dims<4>&, int, int,
-                         int, int, int, float, float, float*, const Dims<4>&);
-  if (kernel_type == kReference) {
+                         int, int, int, int, int, float, float, float*,
+                         const Dims<4>&);
+  KernelType effective_kernel_type;
+  // TODO(suharshs): Currently only the reference implementation supports
+  // dilations.
+  if ((params->dilation_width_factor != 1) ||
+      (params->dilation_height_factor != 1)) {
+    effective_kernel_type = kReference;
+  } else {
+    effective_kernel_type = kernel_type;
+  }
+
+  if (effective_kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
   } else {
     depthwise_conv = &optimized_ops::DepthwiseConv;
@@ -188,7 +204,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
       GetTensorData<float>(input), GetTensorDims(input),
       GetTensorData<float>(filter), GetTensorDims(filter),
       GetTensorData<float>(bias), GetTensorDims(bias), params->stride_width,
-      params->stride_height, data->padding.width, data->padding.height,
+      params->stride_height, params->dilation_width_factor,
+      params->dilation_height_factor, data->padding.width, data->padding.height,
       params->depth_multiplier, output_activation_min, output_activation_max,
       GetTensorData<float>(output), GetTensorDims(output));
 }
@@ -204,9 +221,20 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
 
   void (*depthwise_conv)(const uint8*, const Dims<4>&, int32, const uint8*,
                          const Dims<4>&, int32, const int32*, const Dims<4>&,
-                         int, int, int, int, int, int32, int32, int, int32,
-                         int32, uint8*, const Dims<4>&);
-  if (kernel_type == kReference) {
+                         int, int, int, int, int, int, int, int32, int32, int,
+                         int32, int32, uint8*, const Dims<4>&);
+
+  KernelType effective_kernel_type;
+  // TODO(suharshs): Currently only the reference implementation supports
+  // dilations.
+  if ((params->dilation_width_factor != 1) ||
+      (params->dilation_height_factor != 1)) {
+    effective_kernel_type = kReference;
+  } else {
+    effective_kernel_type = kernel_type;
+  }
+
+  if (effective_kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
   } else {
     depthwise_conv = &optimized_ops::DepthwiseConv;
@@ -216,7 +244,8 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
       GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset,
       GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset,
       GetTensorData<int32_t>(bias), GetTensorDims(bias), params->stride_width,
-      params->stride_height, data->padding.width, data->padding.height,
+      params->stride_height, params->dilation_width_factor,
+      params->dilation_height_factor, data->padding.width, data->padding.height,
       params->depth_multiplier, output_offset, data->output_multiplier,
       data->output_shift, data->output_activation_min,
       data->output_activation_max, GetTensorData<uint8_t>(output),
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
index c00cafb9fb..2af26ab80a 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
@@ -30,7 +30,8 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel {
   // stride values.
   BaseDepthwiseConvolutionOpModel(const TensorData& input,
                                   const TensorData& filter,
-                                  const TensorData& output) {
+                                  const TensorData& output,
+                                  int dilation_factor = 1) {
     input_ = AddInput(input);
     filter_ = AddInput(filter);
 
@@ -56,7 +57,8 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel {
         BuiltinOperator_DEPTHWISE_CONV_2D,
         BuiltinOptions_DepthwiseConv2DOptions,
         CreateDepthwiseConv2DOptions(builder_, Padding_VALID, 1, 1, depth_mul,
-                                     ActivationFunctionType_NONE)
+                                     ActivationFunctionType_NONE,
+                                     dilation_factor, dilation_factor)
             .Union());
 
     BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)});
@@ -110,6 +112,58 @@ TEST(DepthwiseConvolutionOpTest, SimpleTest) {
                              }));
 }
 
+TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) {
+  const int depth = 1;
+  const int image_width = 9;
+  const int image_height = 9;
+  const int image_batch_count = 1;
+  const int filter_size = 3;
+  const int filter_count = 1;
+  const int dilation_factor = 3;
+  DepthwiseConvolutionOpModel m(
+      {TensorType_FLOAT32,
+       {image_batch_count, image_height, image_width, depth}},
+      {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}},
+      {TensorType_FLOAT32, {}}, dilation_factor);
+
+  // The image matrix is:
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // clang-format off
+  m.SetInput({0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0});
+  // clang-format on
+  // The filter matrix is:
+  // | 1 | 2 | 3 |
+  // | 4 | 5 | 6 |
+  // | 7 | 8 | 9 |
+  m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
+  // No bias for this test.
+  m.SetBias({0});
+  m.Invoke();
+
+  // Since the dilation rate is 3 this will reduce the size of the output from
+  // 10x10 to 3x3 of all 5s. Specifically:
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
+}
+
 class QuantizedDepthwiseConvolutionOpModel
     : public BaseDepthwiseConvolutionOpModel {
  public:
@@ -207,6 +261,64 @@ TEST(QuantizedDepthwiseConvolutionOpTest,
               ElementsAreArray(ArrayFloatNear(float_op.GetOutput(), 1)));
 }
 
+TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) {
+  const int depth = 1;
+  const int image_width = 9;
+  const int image_height = 9;
+  const int image_batch_count = 1;
+  const int filter_size = 3;
+  const int filter_count = 1;
+  const int dilation_factor = 3;
+  QuantizedDepthwiseConvolutionOpModel m(
+      {TensorType_UINT8,
+       {image_batch_count, image_height, image_width, depth},
+       0,
+       255},
+      {TensorType_UINT8,
+       {depth, filter_size, filter_size, filter_count},
+       0,
+       255},
+      {TensorType_UINT8, {}, 0, 255}, dilation_factor);
+
+  // The image matrix is:
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // clang-format off
+  m.SetInput({0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0});
+  // clang-format on
+  // The filter matrix is:
+  // | 1 | 2 | 3 |
+  // | 4 | 5 | 6 |
+  // | 7 | 8 | 9 |
+  m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
+  // No bias for this test.
+  m.SetBias({0});
+  m.Invoke();
+
+  // Since the dilation rate is 3 this will reduce the size of the output from
+  // 10x10 to 3x3 of all 5s. Specifically:
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
+}
+
 }  // namespace
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index 7f6eea2d5d..70810ca784 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -1067,6 +1067,26 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
+  // be implemented.
+  TFLITE_DCHECK(dilation_width_factor == 1);
+  TFLITE_DCHECK(dilation_height_factor == 1);
+
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, pad_width, pad_height,
+                depth_multiplier, output_activation_min, output_activation_max,
+                output_data, output_dims);
+}
+
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index 3fd00c8930..f707279600 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1964,6 +1964,30 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  // TODO(suharshs): Optimized implementation of dilation depthwise is not
+  // supported yet.
+  TFLITE_DCHECK(dilation_width_factor == 1);
+  TFLITE_DCHECK(dilation_height_factor == 1);
+
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
index 9aabee5000..bb5d590775 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -25,8 +25,9 @@ namespace reference_ops {
 inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
                           float output_activation_min,
                           float output_activation_max, float* output_data,
                           const Dims<4>& output_dims) {
@@ -52,8 +53,9 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
             float total = 0.f;
             for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
               for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + filter_x;
-                const int in_y = in_y_origin + filter_y;
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y =
+                    in_y_origin + dilation_height_factor * filter_y;
                 // If the location is outside the bounds of the input image,
                 // use zero as a default value.
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
@@ -81,6 +83,20 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, 1, 1, pad_width,
+                pad_height, depth_multiplier, output_activation_min,
+                output_activation_max, output_data, output_dims);
+}
+
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
index d57739279f..5e3e8997fc 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -30,8 +30,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                           int32 input_offset, const uint8* filter_data,
                           const Dims<4>& filter_dims, int32 filter_offset,
                           const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
                           int32 output_offset, int32 output_multiplier,
                           int output_shift, int32 output_activation_min,
                           int32 output_activation_max, uint8* output_data,
@@ -58,8 +59,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
             int32 acc = 0;
             for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
               for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + filter_x;
-                const int in_y = in_y_origin + filter_y;
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y =
+                    in_y_origin + dilation_height_factor * filter_y;
                 // If the location is outside the bounds of the input image,
                 // use zero as a default value.
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
@@ -90,6 +92,24 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index d5da4fcccf..f0db22d581 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -276,11 +276,15 @@ table Pool2DOptions {
 }
 
 table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
   padding:Padding;
   stride_w:int;
   stride_h:int;
   depth_multiplier:int;
   fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
 }
 
 table ConcatEmbeddingsOptions {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 0b9c57480e..8c086a5e67 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -2339,12 +2339,16 @@ struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable {
   int32_t stride_h;
   int32_t depth_multiplier;
   ActivationFunctionType fused_activation_function;
+  int32_t dilation_w_factor;
+  int32_t dilation_h_factor;
   DepthwiseConv2DOptionsT()
       : padding(Padding_SAME),
         stride_w(0),
         stride_h(0),
         depth_multiplier(0),
-        fused_activation_function(ActivationFunctionType_NONE) {
+        fused_activation_function(ActivationFunctionType_NONE),
+        dilation_w_factor(1),
+        dilation_h_factor(1) {
   }
 };
 
@@ -2355,7 +2359,9 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
     VT_STRIDE_W = 6,
     VT_STRIDE_H = 8,
     VT_DEPTH_MULTIPLIER = 10,
-    VT_FUSED_ACTIVATION_FUNCTION = 12
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_W_FACTOR = 14,
+    VT_DILATION_H_FACTOR = 16
   };
   Padding padding() const {
     return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0));
@@ -2372,6 +2378,12 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   ActivationFunctionType fused_activation_function() const {
     return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  int32_t dilation_w_factor() const {
+    return GetField<int32_t>(VT_DILATION_W_FACTOR, 1);
+  }
+  int32_t dilation_h_factor() const {
+    return GetField<int32_t>(VT_DILATION_H_FACTOR, 1);
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_PADDING) &&
@@ -2379,6 +2391,8 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
            VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
            VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) &&
            verifier.EndTable();
   }
   DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -2404,6 +2418,12 @@ struct DepthwiseConv2DOptionsBuilder {
   void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
     fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
   }
+  void add_dilation_w_factor(int32_t dilation_w_factor) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
   explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -2422,8 +2442,12 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
     int32_t stride_w = 0,
     int32_t stride_h = 0,
     int32_t depth_multiplier = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    int32_t dilation_w_factor = 1,
+    int32_t dilation_h_factor = 1) {
   DepthwiseConv2DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
   builder_.add_depth_multiplier(depth_multiplier);
   builder_.add_stride_h(stride_h);
   builder_.add_stride_w(stride_w);
@@ -7064,6 +7088,8 @@ inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const
   { auto _e = stride_h(); _o->stride_h = _e; };
   { auto _e = depth_multiplier(); _o->depth_multiplier = _e; };
   { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; };
+  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; };
 }
 
 inline flatbuffers::Offset<DepthwiseConv2DOptions> DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -7079,13 +7105,17 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
   auto _stride_h = _o->stride_h;
   auto _depth_multiplier = _o->depth_multiplier;
   auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
   return tflite::CreateDepthwiseConv2DOptions(
       _fbb,
       _padding,
       _stride_w,
       _stride_h,
       _depth_multiplier,
-      _fused_activation_function);
+      _fused_activation_function,
+      _dilation_w_factor,
+      _dilation_h_factor);
 }
 
 inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 5d0895c72f..3754b58b23 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -1434,6 +1434,7 @@ def make_depthwiseconv_tests(zip_path):
           "input_shape": [[1, 3, 4, 3], [1, 10, 10, 3]],
           "filter_size": [[1, 1], [1, 2], [3, 3]],
           "strides": [[1, 1, 1, 1], [1, 3, 3, 1]],
+          "dilations": [[1, 1, 1, 1], [1, 3, 2, 1], [1, 2, 2, 1]],
           "channel_multiplier": [1, 2],
           "rate": [[1, 1]],
           "padding": ["SAME", "VALID"],
@@ -1444,6 +1445,7 @@ def make_depthwiseconv_tests(zip_path):
           "input_shape": [[1, 3, 4, 3]],
           "filter_size": [[1, 1]],
           "strides": [[1, 1, 2, 1]],  # TF needs [1, x, x, 1]
+          "dilations": [[1, 1, 1, 1], [1, 2, 2, 1]],
           "channel_multiplier": [2],
           "rate": [[2, 2]],  #  Only [1, 1] is supported
           "padding": ["SAME"],
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 2e100e37f6..164b70f2df 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -477,6 +477,11 @@ struct DepthwiseConvOperator : Operator {
   int stride_height = 0;
   int stride_width = 0;
   int depth_multiplier = 0;
+  // A dilation_rate of 0 is invalid and this field is an optional attribute.
+  // Thus initializing it to 1 to allow default conv behavior when the
+  // attribute is not present.
+  int dilation_width_factor = 1;
+  int dilation_height_factor = 1;
 };
 
 // Depth-to-space transform operator.
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 5486012176..1061e7c7c4 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -107,7 +107,8 @@ class DepthwiseConvolution
         ActivationFunction::Serialize(op.fused_activation_function);
     return ::tflite::CreateDepthwiseConv2DOptions(
         *builder, padding, op.stride_width, op.stride_height,
-        op.depth_multiplier, activation_function);
+        op.depth_multiplier, activation_function, op.dilation_width_factor,
+        op.dilation_height_factor);
   }
 
   void ReadOptions(const TfLiteOptions& options,
@@ -118,9 +119,18 @@ class DepthwiseConvolution
     op->depth_multiplier = options.depth_multiplier();
     op->fused_activation_function =
         ActivationFunction::Deserialize(options.fused_activation_function());
+    op->dilation_width_factor = options.dilation_w_factor();
+    op->dilation_height_factor = options.dilation_h_factor();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const Operator& op) const override {
+    const auto& conv_op = static_cast<const DepthwiseConvOperator&>(op);
+    if (conv_op.dilation_width_factor != 1 ||
+        conv_op.dilation_height_factor != 1) {
+      return 2;
+    }
+    return 1;
+  }
 };
 
 class Add : public BuiltinOperator<AddOperator, ::tflite::AddOptions,
-- 
GitLab


From eb7953970c8b2b8a054cddf8ed4b78e66fcd2d02 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 13 Sep 2018 15:15:23 -0700
Subject: [PATCH 0168/1357] Fix parallel_gpu_execute.sh script on windows.

PiperOrigin-RevId: 212887532
---
 .../gpu_build/parallel_gpu_execute.sh         | 26 +++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 48b3989d86..03a2a07fb1 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -31,6 +31,28 @@ TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-4}
 # future and to use a rounder number, we set it to 1G.
 export TF_PER_DEVICE_MEMORY_LIMIT_MB=1024
 
+# *******************************************************************
+#         This section of the script is needed to
+#         make things work on windows under msys.
+# *******************************************************************
+RUNFILES_MANIFEST_FILE="${TEST_SRCDIR}/MANIFEST"
+function rlocation() {
+  if is_absolute "$1" ; then
+    # If the file path is already fully specified, simply return it.
+    echo "$1"
+  elif [[ -e "$TEST_SRCDIR/$1" ]]; then
+    # If the file exists in the $TEST_SRCDIR then just use it.
+    echo "$TEST_SRCDIR/$1"
+  elif [[ -e "$RUNFILES_MANIFEST_FILE" ]]; then
+    # If a runfiles manifest file exists then use it.
+    echo "$(grep "^$1 " "$RUNFILES_MANIFEST_FILE" | sed 's/[^ ]* //')"
+  fi
+}
+
+TEST_BINARY="$(rlocation $TEST_WORKSPACE/${1#./})"
+shift
+# *******************************************************************
+
 mkdir -p /var/lock
 # Try to acquire any of the TF_GPU_COUNT * TF_TESTS_PER_GPU
 # slots to run a test at.
@@ -46,8 +68,8 @@ for j in `seq 0 $((TF_TESTS_PER_GPU-1))`; do
         # This export only works within the brackets, so it is isolated to one
         # single command.
         export CUDA_VISIBLE_DEVICES=$i
-        echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
-        $@
+        echo "Running test $TEST_BINARY $* on GPU $CUDA_VISIBLE_DEVICES"
+        "$TEST_BINARY" $@
       )
       return_code=$?
       flock -u "$lock_fd"
-- 
GitLab


From ea52ecd836098e0b1d37325cf1b91133f908547e Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 13 Sep 2018 15:27:12 -0700
Subject: [PATCH 0169/1357] Fix bug in kSlice implementation in evaluator.
 Slice was producing a literal with a default layout rather than the layout of
 the slice HLO instruction. This resulted in errors when the produced literal
 was consumed by later operations.

PiperOrigin-RevId: 212889334
---
 .../compiler/xla/service/hlo_evaluator.cc     |  6 ++++++
 .../xla/service/hlo_evaluator_test.cc         | 19 +++++++++++++++++++
 .../xla/service/hlo_evaluator_typed_visitor.h | 16 +++-------------
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 064b86493d..06b6d5b559 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1339,6 +1339,12 @@ Status HloEvaluator::Preprocess(HloInstruction* hlo) {
 Status HloEvaluator::Postprocess(HloInstruction* hlo) {
   VLOG(2) << "Finished visiting " << hlo->ToString()
           << "; evaluated value is: " << GetEvaluatedLiteralFor(hlo).ToString();
+  // Out of convenience the literal may have been produced with a different
+  // layout. Relayout as indicated by the HLO instruction.
+  if (!LayoutUtil::LayoutsInShapesEqual(GetEvaluatedLiteralFor(hlo).shape(),
+                                        hlo->shape())) {
+    evaluated_.at(hlo) = evaluated_.at(hlo).Relayout(hlo->shape());
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index 16411eb078..01e88566a5 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -2570,6 +2570,25 @@ ENTRY main {
   EXPECT_TRUE(LiteralTestUtil::Equal(expected, Evaluate({&arg})));
 }
 
+TEST_P(HloEvaluatorTest, SliceWithDifferentLayout) {
+  // Regression test for b/114735354.
+  const string hlo_text = R"(
+HloModule SliceWithDifferentLayout
+
+ENTRY main {
+  arg = f32[2,2,2]{0,1,2} parameter(0)
+  ROOT %slice = f32[2,2,2]{1,0,2} slice(f32[2,2,2]{0,1,2} %arg), slice={[0:2], [0:2], [0:2]}
+}
+)";
+  ParseAndVerifyModule(hlo_text);
+
+  Literal arg = LiteralUtil::CreateR3WithLayout<float>(
+      {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}},
+      LayoutUtil::MakeLayout({0, 1, 2}));
+  Literal actual = Evaluate({&arg});
+  EXPECT_TRUE(LiteralTestUtil::Equal(arg, actual));
+}
+
 INSTANTIATE_TEST_CASE_P(HloEvaluatorTest_Instantiation, HloEvaluatorTest,
                         ::testing::ValuesIn(use_bf16_params));
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index 7f090a52db..8fb17a0033 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -249,12 +249,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     TF_ASSIGN_OR_RETURN(Literal result,
                         parent_->GetEvaluatedLiteralFor(operand).Convert(
                             convert->shape().element_type()));
-
-    if (LayoutUtil::LayoutsInShapesEqual(result.shape(), convert->shape())) {
-      parent_->evaluated_[convert] = std::move(result);
-    } else {
-      parent_->evaluated_[convert] = result.Relayout(convert->shape().layout());
-    }
+    parent_->evaluated_[convert] = std::move(result);
     return Status::OK();
   }
 
@@ -265,11 +260,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
                         parent_->GetEvaluatedLiteralFor(operand).BitcastConvert(
                             convert->shape().element_type()));
 
-    if (LayoutUtil::LayoutsInShapesEqual(result.shape(), convert->shape())) {
-      parent_->evaluated_[convert] = std::move(result);
-    } else {
-      parent_->evaluated_[convert] = result.Relayout(convert->shape().layout());
-    }
+    parent_->evaluated_[convert] = std::move(result);
     return Status::OK();
   }
 
@@ -2350,8 +2341,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       return operand_literal.Get<ReturnT>(operand_index);
     };
 
-    auto result = LiteralUtil::CreateFromDimensions(
-        shape.element_type(), AsInt64Slice(shape.dimensions()));
+    Literal result(shape);
     TF_RETURN_IF_ERROR(result.Populate<ReturnT>(func));
     parent_->evaluated_[slice] = std::move(result);
     return Status::OK();
-- 
GitLab


From e59ddcca727340a8b45694a28cd9f52352607e63 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Thu, 13 Sep 2018 15:34:43 -0700
Subject: [PATCH 0170/1357] Automated rollback of commit
 6b507a6de855a6f988100904229b7f46a5652b88

PiperOrigin-RevId: 212890622
---
 tensorflow/contrib/lite/toco/BUILD            |  1 -
 .../contrib/lite/toco/import_tensorflow.cc    | 18 -----
 .../lite/toco/import_tensorflow_test.cc       | 75 ++-----------------
 3 files changed, 5 insertions(+), 89 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index 72c71b2841..bea90f1ce8 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -331,7 +331,6 @@ cc_library(
         "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
     ] + select({
         # Placeholder for internal darwin rule.
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index eb36b3411d..9bc23c4b3c 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -58,7 +58,6 @@ using tensorflow::DT_STRING;
 using tensorflow::DT_UINT8;
 using tensorflow::GraphDef;
 using tensorflow::NodeDef;
-using tensorflow::OpRegistry;
 using tensorflow::TensorProto;
 using tensorflow::TensorShapeProto;
 
@@ -1080,23 +1079,6 @@ tensorflow::Status ConvertUnsupportedOperator(
   } else if (HasAttr(node, "Tout")) {
     const auto& output_type = GetDataTypeAttr(node, "Tout");
     op->output_data_types.push_back(ConvertDataType(output_type));
-  } else {
-    const tensorflow::OpDef* op_def = nullptr;
-    if (OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
-      for (const auto& output_arg : op_def->output_arg()) {
-        if (HasAttr(node, output_arg.type_attr())) {
-          op->output_data_types.push_back(
-              ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
-        } else {
-          LOG(INFO) << "Op node missing output type attribute: " << node.name();
-        }
-      }
-    }
-    if (op->output_data_types.empty()) {
-      // TODO(b/113613439): Figure out how to propagate types for custom ops
-      // that have no OpDef.
-      LOG(INFO) << "Unable to determine output type for op: " << node.op();
-    }
   }
   if (HasAttr(node, kAttrOutputShapes)) {
     const auto& output_shapes = GetListAttr(node, kAttrOutputShapes);
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index da248826a7..a00e136dd6 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -49,17 +49,6 @@ Status ImportTensorFlowNode(const NodeDef&, const TensorFlowImportFlags&,
 
 namespace {
 
-Status ImportNode(const NodeDef& node, Model* model) {
-  const auto converter = internal::GetTensorFlowNodeConverterMap();
-  return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), model,
-                                        converter);
-}
-
-Status ImportNode(const NodeDef& node) {
-  Model model;
-  return ImportNode(node, &model);
-}
-
 class ShapeImportTest : public ::testing::TestWithParam<tensorflow::DataType> {
  protected:
   ShapeImportTest() {}
@@ -120,24 +109,12 @@ class ShapeImportTest : public ::testing::TestWithParam<tensorflow::DataType> {
     SetAttrValue(t, &value_attr);
     (*node->mutable_attr())["value"] = value_attr;
   }
-};
-
-class TypeImportTest : public ::testing::TestWithParam<
-                           std::pair<tensorflow::DataType, ArrayDataType>> {
- protected:
-  TypeImportTest() {}
-
-  void BuildUnaryNode(const std::string& op_name, tensorflow::DataType dtype,
-                      NodeDef* node) {
-    node->set_op(op_name);
-    node->set_name("Node1");
-
-    node->add_input();
-    node->set_input(0, "Node0");
 
-    AttrValue dtype_attr;
-    SetAttrValue(dtype, &dtype_attr);
-    (*node->mutable_attr())["T"] = dtype_attr;
+  Status ImportNode(const NodeDef& node) {
+    Model model;
+    const auto converter = internal::GetTensorFlowNodeConverterMap();
+    return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), &model,
+                                          converter);
   }
 };
 
@@ -190,47 +167,5 @@ TEST_P(ShapeImportTest, ValidShapeButZeroElements) {
 INSTANTIATE_TEST_CASE_P(ValidShapeButZeroElements, ShapeImportTest,
                         ::testing::ValuesIn(TestTypes()));
 
-std::vector<std::pair<tensorflow::DataType, ArrayDataType>> UnaryTestTypes() {
-  return {{DT_FLOAT, ArrayDataType::kFloat},
-          {DT_INT32, ArrayDataType::kInt32},
-          {DT_INT64, ArrayDataType::kInt64}};
-}
-
-TEST_P(TypeImportTest, BasicTypeInference) {
-  NodeDef node;
-  BuildUnaryNode("Atan", GetParam().first, &node);
-
-  Model model;
-  EXPECT_TRUE(ImportNode(node, &model).ok());
-
-  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
-  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
-  const TensorFlowUnsupportedOperator* op =
-      static_cast<const TensorFlowUnsupportedOperator*>(
-          model.operators[0].get());
-  ASSERT_THAT(op->output_data_types, ::testing::ElementsAre(GetParam().second));
-}
-INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest,
-                        ::testing::ValuesIn(UnaryTestTypes()));
-
-TEST(ImportTest, FailedTypeInference) {
-  // Create a unary op with no Type ("T") annotation.
-  NodeDef node;
-  node.set_op("Atan");
-  node.set_name("Node1");
-  node.add_input();
-  node.set_input(0, "Node0");
-
-  Model model;
-  EXPECT_TRUE(ImportNode(node, &model).ok());
-
-  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
-  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
-  const TensorFlowUnsupportedOperator* op =
-      static_cast<const TensorFlowUnsupportedOperator*>(
-          model.operators[0].get());
-  ASSERT_TRUE(op->output_data_types.empty());
-}
-
 }  // namespace
 }  // namespace toco
-- 
GitLab


From ec3f08e28f77309860fe7430a4567407bc26c5df Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 13 Sep 2018 15:47:36 -0700
Subject: [PATCH 0171/1357] Fixing error output in api_compatibility_test.py.
 Looks like it should be self.maxDiff instead of self.maxDiffs: "Diff is 2708
 characters long. Set self.maxDiff to None to see it."

PiperOrigin-RevId: 212892831
---
 tensorflow/tools/api/tests/api_compatibility_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index 99bed5714f..d06c7f2d49 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -174,7 +174,7 @@ class ApiCompatibilityTest(test.TestCase):
         verbose_diff_message = diff_message
       else:
         # Do not truncate diff
-        self.maxDiffs = None  # pylint: disable=invalid-name
+        self.maxDiff = None  # pylint: disable=invalid-name
         # Now we can run an actual proto diff.
         try:
           self.assertProtoEquals(expected_dict[key], actual_dict[key])
-- 
GitLab


From 133a9ef4cb05e4a1a2122bdb5176e2954139c3c3 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 13 Sep 2018 15:47:40 -0700
Subject: [PATCH 0172/1357] Put a deprecation notice in cmake readme.

PiperOrigin-RevId: 212892844
---
 tensorflow/contrib/cmake/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 0b79f718d4..789dab81ed 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -1,6 +1,10 @@
 TensorFlow CMake build
 ======================
 
+CMAKE build is deprecated for TensorFlow. Please use `bazel` to build TF for all
+platforms. For details, see the
+[TensorFlow install guide](https://www.tensorflow.org/install/).
+
 This directory contains CMake files for building TensorFlow on Microsoft
 Windows. [CMake](https://cmake.org) is a cross-platform tool that can
 generate build scripts for multiple build systems, including Microsoft
-- 
GitLab


From 4292b8107175b3c3223f65c75b3ca091bd0604ec Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 13 Sep 2018 15:48:52 -0700
Subject: [PATCH 0173/1357] [TF:XLA] Bump open source abseil revision to
 8ff1374008259719b54a8cb128ef951c02da164c

PiperOrigin-RevId: 212893036
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 65314a4a06..25698da1c9 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -106,11 +106,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/02451914b9ad5320f81f56a89f3eef1f8683227c.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/02451914b9ad5320f81f56a89f3eef1f8683227c.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/8ff1374008259719b54a8cb128ef951c02da164c.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/8ff1374008259719b54a8cb128ef951c02da164c.tar.gz",
         ],
-        sha256 = "345fa25136484a9e5d918880d66ee577a9cb24377f8978d4e5a6c543706a1011",
-        strip_prefix = "abseil-cpp-02451914b9ad5320f81f56a89f3eef1f8683227c",
+        sha256 = "006931f9705484041eed65189038f87931a87cff200bb296f94b3d42339c4cd9",
+        strip_prefix = "abseil-cpp-8ff1374008259719b54a8cb128ef951c02da164c",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
     )
 
-- 
GitLab


From 29b56bde1e28e558111b917fd44b973e2aea7fcf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 16:08:12 -0700
Subject: [PATCH 0174/1357] Automated rollback of commit
 ac60b46e2c5962fd8099a4406c1788d826ad3c0d

PiperOrigin-RevId: 212896336
---
 tensorflow/compiler/jit/BUILD                 |   6 +
 .../jit/encapsulate_subgraphs_pass.cc         |  17 +
 .../compiler/jit/encapsulate_subgraphs_pass.h |   6 +
 .../jit/encapsulate_xla_computations_pass.cc  | 360 ++++++++++++++++++
 .../jit/encapsulate_xla_computations_pass.h   |  60 +++
 .../encapsulate_xla_computations_pass_test.cc | 346 +++++++++++++++++
 .../jit/jit_compilation_pass_registration.cc  |   9 +-
 tensorflow/compiler/jit/ops/xla_ops.cc        |  19 +
 tensorflow/compiler/tf2xla/BUILD              |   1 +
 tensorflow/compiler/tf2xla/test_util.cc       |   8 +
 tensorflow/compiler/tf2xla/test_util.h        |  16 +
 .../common_runtime/graph_execution_state.cc   |   4 +
 .../grappler/optimizers/meta_optimizer.cc     |  23 ++
 13 files changed, 874 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
 create mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
 create mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 7d5db713f6..f4e1bc5e83 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -363,6 +363,7 @@ cc_library(
         "deadness_analysis.cc",
         "deadness_analysis_internal.h",
         "encapsulate_subgraphs_pass.cc",
+        "encapsulate_xla_computations_pass.cc",
         "mark_for_compilation_pass.cc",
         "mark_for_compilation_pass_test_helper.cc",
         "partially_decluster_pass.cc",
@@ -371,6 +372,7 @@ cc_library(
         "build_xla_launch_ops_pass.h",
         "deadness_analysis.h",
         "encapsulate_subgraphs_pass.h",
+        "encapsulate_xla_computations_pass.h",
         "mark_for_compilation_pass.h",
         "mark_for_compilation_pass_test_helper.h",
         "partially_decluster_pass.h",
@@ -397,6 +399,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:bounds_check",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -475,6 +478,7 @@ tf_cc_test(
     size = "small",
     srcs = [
         "encapsulate_subgraphs_pass_test.cc",
+        "encapsulate_xla_computations_pass_test.cc",
         "mark_for_compilation_pass_test.cc",
         "partially_decluster_pass_test.cc",
     ],
@@ -490,7 +494,9 @@ tf_cc_test(
         "//tensorflow/cc:resource_variable_ops",
         "//tensorflow/cc:sendrecv_ops",
         "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/tf2xla:test_util",
         "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index ae7a22f451..e0632ff7e4 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
@@ -58,6 +59,22 @@ const char* const kXlaNumResourceArgsAttr = "_XlaNumResourceArgs";
 const char* const kXlaHostTransferSequencerAttr =
     "_xla_host_transfer_sequencer";
 
+void SortControlInputs(GraphDef* gdef) {
+  int64 num_nodes = gdef->node_size();
+  for (int64 i = 0; i < num_nodes; ++i) {
+    NodeDef* node = gdef->mutable_node(i);
+    // Stable sort control inputs and leave the order of data inputs unchanged.
+    std::stable_sort(node->mutable_input()->begin(),
+                     node->mutable_input()->end(),
+                     [](const string& a, const string& b) {
+                       bool a_is_control = absl::StartsWith(a, "^");
+                       bool b_is_control = absl::StartsWith(b, "^");
+                       return (!a_is_control && b_is_control) ||
+                              (a_is_control && b_is_control && a < b);
+                     });
+  }
+}
+
 namespace {
 
 bool AreAllParentsGuaranteedConst(
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
index 926589546f..90354a801a 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
@@ -102,6 +102,12 @@ extern const char* const kXlaNumConstantArgsAttr;
 // Name of the attribute containing the number of resource variable arguments.
 extern const char* const kXlaNumResourceArgsAttr;
 
+// Sorts each node's control inputs by their names. This guarantees that for two
+// structually equivalent GraphDefs, we get the same traversal ordering on
+// node's control input fields.
+// TODO(hpucha): Move the utilities to a more appropriate place.
+void SortControlInputs(GraphDef* gdef);
+
 class EncapsulateSubgraphsPass : public GraphOptimizationPass {
  public:
   Status Run(const GraphOptimizationPassOptions& options) override;
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
new file mode 100644
index 0000000000..97ef8cd3cb
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -0,0 +1,360 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
+
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/tf2xla/dump_graph.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/lib/strings/proto_serialization.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/fingerprint.h"
+
+namespace tensorflow {
+
+const char* const EncapsulateXlaComputationsPass::kXlaClusterAttr =
+    "_xla_compile_id";
+
+namespace {
+
+const char* const kXlaClusterOutput = "XlaClusterOutput";
+
+// Checks if a graph node is marked to be a guaranteed constant.
+bool is_guaranteed_constant(const Node& n) {
+  bool guaranteed_constant = false;
+  if (!GetNodeAttr(n.attrs(), "_is_guaranteed_constant", &guaranteed_constant)
+           .ok()) {
+    return false;
+  }
+  return guaranteed_constant;
+}
+
+// Finds the `index` of an _Arg or _Retval node.
+Status GetIndexAttr(const Node& n, int num_args, int* index) {
+  TF_RETURN_IF_ERROR(GetNodeAttr(n.attrs(), "index", index));
+  if (*index < 0 || *index >= num_args) {
+    return errors::InvalidArgument("Invalid ", n.type_string(), " number ",
+                                   *index);
+  }
+  return Status::OK();
+}
+
+// Returns the data type of the destination of an edge.
+DataType EdgeType(const Edge* edge) {
+  return edge->dst()->input_type(edge->dst_input());
+}
+
+// Adds the control inputs of `node` to `*deps`.
+void AddControlInputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+  for (const Edge* edge : node.in_edges()) {
+    if (edge->IsControlEdge()) {
+      deps->insert(edge->src());
+    }
+  }
+}
+
+// Adds the control outputs of `node` to `*deps`.
+void AddControlOutputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+  for (const Edge* edge : node.out_edges()) {
+    if (edge->IsControlEdge()) {
+      deps->insert(edge->dst());
+    }
+  }
+}
+
+// Rewrite function to be passed to EncapsulateSubgraphsInFunctions that sorts
+// the arguments into the order expected by XlaLaunch computations:
+// 1) arguments
+// 2) resource variable arguments
+// See the documentation of EncapsulateSubgraphsInFunctions for the meaning
+// of the arguments.
+//
+// TODO(b/113166435): Ordering constraints on XlaLaunch op can be relaxed.
+Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
+                       std::unique_ptr<Graph>* graph_ptr,
+                       std::vector<int>* input_permutation,
+                       std::vector<int>* output_permutation,
+                       NodeDef* call_def) {
+  Graph* graph = graph_ptr->get();
+  const int num_args = input_permutation->size();
+  const int num_retvals = output_permutation->size();
+
+  std::vector<Node*> args;
+  std::vector<Node*> retvals;
+  args.reserve(num_args);
+  retvals.reserve(num_retvals);
+  for (Node* n : graph->nodes()) {
+    if (n->type_string() == "_Arg") {
+      // Check if this is a guaranteed constant.
+      if (is_guaranteed_constant(*n)) {
+        return errors::InvalidArgument(
+            "Guaranteed constants are not supported (", n->name(), ")");
+      }
+      args.push_back(n);
+    } else if (n->type_string() == "_Retval") {
+      retvals.push_back(n);
+    }
+  }
+
+  if (std::find(args.begin(), args.end(), nullptr) != args.end()) {
+    return errors::InvalidArgument("Missing or non-consecutive arguments");
+  }
+
+  // Reorders the arguments.
+  std::sort(args.begin(), args.end(), [&](Node* a, Node* b) {
+    // Non-resources appear before resources
+    bool a_is_resource = (a->output_type(0) == DT_RESOURCE);
+    bool b_is_resource = (b->output_type(0) == DT_RESOURCE);
+    // Uses the name as a tiebreaker so the output is deterministic.
+    StringPiece a_name(a->name());
+    StringPiece b_name(b->name());
+    return std::tie(a_is_resource, a_name) < std::tie(b_is_resource, b_name);
+  });
+
+  // Sorts the retvals by name so the order is deterministic.
+  std::sort(retvals.begin(), retvals.end(),
+            [](Node* a, Node* b) { return a->name() < b->name(); });
+
+  // Computes the permutation to produce the correct argument order, and update
+  // the argument indices.
+  int variable_start_index = num_args;
+  for (int i = 0; i < num_args; ++i) {
+    int index;
+    TF_RETURN_IF_ERROR(GetIndexAttr(*args[i], num_args, &index));
+    if (args[i]->output_type(0) == DT_RESOURCE &&
+        variable_start_index == num_args) {
+      variable_start_index = i;
+    }
+    (*input_permutation)[index] = i;
+    args[i]->AddAttr("index", i);
+  }
+  VLOG(4) << "variable_start_index: " << variable_start_index;
+
+  // Computes the permutation to produce the correct retval order, and update
+  // the argument indices.
+  for (int i = 0; i < num_retvals; ++i) {
+    int index;
+    TF_RETURN_IF_ERROR(GetIndexAttr(*retvals[i], num_retvals, &index));
+    (*output_permutation)[index] = i;
+    retvals[i]->AddAttr("index", i);
+  }
+
+  AddNodeAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, call_def->name(),
+              call_def);
+  AddNodeAttr("_variable_start_index", variable_start_index, call_def);
+
+  // Uniquify the function name.
+  GraphDef gdef;
+  graph->ToGraphDef(&gdef);
+
+  // Before serialization, sort each node's control inputs to achieve
+  // determinism. Sorting control inputs could help (but not necessarily) create
+  // a deterministic serialization and fingerprint. Other sources of
+  // nondeterminism include unstable node ordering.
+  SortControlInputs(&gdef);
+  // Fingerprint the function.
+  // Nondeterminism in serialization would not lead to incorrect results, but
+  // may cause spurious cache misses. DeterministicSerialization is a
+  // best-effort deterministic serialization.
+  string serialized;
+  TF_RET_CHECK(SerializeToStringDeterministic(gdef, &serialized));
+  uint64 fingerprint = Fingerprint64(serialized);
+  LOG(INFO) << "Subgraph fingerprint:" << fingerprint;
+  call_def->set_op(absl::StrCat(call_def->op(), "_", fingerprint));
+  return Status::OK();
+}
+
+}  // namespace
+
+/*static*/ Status EncapsulateXlaComputationsPass::Encapsulate(
+    std::unique_ptr<Graph>* graph, FunctionLibraryDefinition* flib_def) {
+  // Check for undeclared outputs before Encapsulation, so we can give a better
+  // error message.
+  // TODO(phawkins): merge this with the encapsulation code to avoid the extra
+  // O(n) pass over the edges.
+  for (const Edge* e : (*graph)->edges()) {
+    if (!e->IsControlEdge() &&
+        e->src()->attrs().Find(kXlaClusterAttr) != nullptr &&
+        e->dst()->attrs().Find(kXlaClusterAttr) == nullptr &&
+        e->dst()->type_string() != kXlaClusterOutput) {
+      return errors::InvalidArgument(
+          "Undeclared output of XLA computation. A common cause of this error "
+          "is variable initializers that depend on the XLA computation. Edge: ",
+          e->src()->name(), ":", e->src_output(), " -> ", e->dst()->name(), ":",
+          e->dst_input());
+    }
+  }
+
+  auto output = absl::make_unique<Graph>((*graph)->op_registry());
+  TF_RETURN_WITH_CONTEXT_IF_ERROR(
+      EncapsulateSubgraphsInFunctions(
+          kXlaClusterAttr, "", **graph, RewriteSubgraph,
+          /*reuse_existing_functions=*/true, &output, flib_def),
+      "EncapsulateXlaComputationsPass failed");
+  graph->swap(output);
+  return Status::OK();
+}
+
+/*static*/ Status EncapsulateXlaComputationsPass::BuildXlaLaunchOps(
+    Graph* graph) {
+  // Finds all of the XlaLaunch function calls, to avoid mutating the graph
+  // while iterating.
+  std::vector<Node*> launch_nodes;
+  for (Node* n : graph->nodes()) {
+    string name;
+    if (GetNodeAttr(n->attrs(), kXlaClusterAttr, &name).ok()) {
+      launch_nodes.push_back(n);
+    }
+  }
+
+  // Replaces each launch function call together with its neighboring
+  // XlaClusterOutput nodes with a XlaLaunch node.
+  for (Node* launch : launch_nodes) {
+    int variable_start_index;
+    TF_RETURN_IF_ERROR(GetNodeAttr(launch->attrs(), "_variable_start_index",
+                                   &variable_start_index));
+
+    std::vector<const Edge*> in_edges;
+    TF_RETURN_IF_ERROR(launch->input_edges(&in_edges));
+
+    const int num_inputs = in_edges.size();
+    const int num_variables = num_inputs - variable_start_index;
+    const int num_args = variable_start_index;
+
+    VLOG(4) << "Launch node '" << launch->name() << "'"
+            << " input edges: " << in_edges.size() << " num_args: " << num_args
+            << " num_variables: " << num_variables;
+
+    std::vector<Node*> nodes_to_remove = {launch};
+
+    // Data and control inputs to the new XlaLaunch node.
+    std::vector<std::pair<Node*, int>> data_inputs(num_inputs);
+    gtl::FlatSet<Node*> control_inputs;
+    DataTypeVector arg_types(num_args);
+
+    AddControlInputs(*launch, &control_inputs);
+
+    for (int i = 0; i < num_args; ++i) {
+      const Edge* edge = in_edges[i];
+      data_inputs[i] = {edge->src(), edge->src_output()};
+      arg_types[i] = EdgeType(edge);
+    }
+
+    // Appends the variable inputs.
+    for (int i = 0; i < num_variables; ++i) {
+      int pos = variable_start_index + i;
+      const Edge* edge = in_edges[pos];
+      data_inputs[pos] = {edge->src(), edge->src_output()};
+    }
+
+    // Outputs.
+    const int num_outputs = launch->output_types().size();
+    gtl::FlatSet<Node*> control_outputs;
+    std::vector<std::vector<std::pair<Node*, int>>> data_outputs(num_outputs);
+    DataTypeVector output_types(num_outputs);
+
+    for (const Edge* le : launch->out_edges()) {
+      if (le->IsControlEdge()) {
+        control_outputs.insert(le->dst());
+      } else {
+        TF_RET_CHECK(le->src_output() < num_outputs);
+        Node* output_node = le->dst();
+
+        TF_RET_CHECK(output_node->type_string() == kXlaClusterOutput)
+            << le->DebugString();
+        nodes_to_remove.push_back(output_node);
+
+        for (const Edge* oe : output_node->out_edges()) {
+          TF_RET_CHECK(!oe->IsControlEdge());
+          data_outputs[le->src_output()].push_back(
+              {oe->dst(), oe->dst_input()});
+        }
+        output_types[le->src_output()] = output_node->input_type(0);
+
+        AddControlOutputs(*output_node, &control_outputs);
+      }
+    }
+
+    NodeDef def;
+    def.set_name(launch->name());
+
+    // Target the XLA CPU/GPU backends.
+    VLOG(2) << "Replacing with XlaLaunch";
+    def.set_op("XlaLaunch");
+    AddNodeAttr("Tconstants", DataTypeVector{}, &def);
+    AddNodeAttr("Targs", arg_types, &def);
+    AddNodeAttr("Nresources", num_variables, &def);
+    AddNodeAttr("Tresults", output_types, &def);
+    NameAttrList function;
+    function.set_name(launch->type_string());
+    AddNodeAttr("function", function, &def);
+
+    for (Node* node : nodes_to_remove) {
+      VLOG(2) << "Deleting node " << node->DebugString();
+      // Ensure that we do not attempt to add control edges to nodes that are
+      // deleted.
+      control_inputs.erase(node);
+      control_outputs.erase(node);
+      graph->RemoveNode(node);
+    }
+
+    Status status;
+    Node* xla_launch = graph->AddNode(def, &status);
+    if (!status.ok()) {
+      return status;
+    }
+    for (int i = 0; i < data_inputs.size(); ++i) {
+      graph->AddEdge(data_inputs[i].first, data_inputs[i].second, xla_launch,
+                     i);
+    }
+    for (Node* n : control_inputs) {
+      graph->AddControlEdge(n, xla_launch);
+    }
+    for (int i = 0; i < data_outputs.size(); ++i) {
+      for (const auto& successor : data_outputs[i]) {
+        graph->AddEdge(xla_launch, i, successor.first, successor.second);
+      }
+    }
+    for (Node* n : control_outputs) {
+      graph->AddControlEdge(xla_launch, n);
+    }
+  }
+  return Status::OK();
+}
+
+Status EncapsulateXlaComputationsPass::Run(
+    const GraphOptimizationPassOptions& options) {
+  VLOG(1) << "EncapsulateXlaComputations(): "
+          << dump_graph::DumpGraphToFile("encapsulate_xla_computations_before",
+                                         **options.graph, options.flib_def);
+
+  TF_RETURN_IF_ERROR(Encapsulate(options.graph, options.flib_def));
+  VLOG(1) << "EncapsulateXlaComputations() half-way: "
+          << dump_graph::DumpGraphToFile("encapsulate_xla_computations_halfway",
+                                         **options.graph, options.flib_def);
+
+  TF_RETURN_IF_ERROR(BuildXlaLaunchOps(options.graph->get()));
+  VLOG(1) << "EncapsulateXlaComputations() finished: "
+          << dump_graph::DumpGraphToFile("encapsulate_xla_computations_after",
+                                         **options.graph, options.flib_def);
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
new file mode 100644
index 0000000000..99e9dfd598
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
@@ -0,0 +1,60 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ==============================================================================*/
+// Rewrites computations generated by the xla.compile() Python code into
+// XlaLaunch nodes.
+//
+// xla.compile() does two main things:
+// a) marks operators that make up an XLA computation with the attribute
+//    _xla_compile_id=XYZ, where XYZ is a unique key.
+// b) adds XlaClusterOutput nodes to represent outputs of the computation.
+//    These nodes are not marked with the _xla_compile_id attribute.
+
+#ifndef TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_
+#define TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_
+
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/platform/env.h"
+
+    namespace tensorflow {
+
+// Encapsulates nodes marked with the _xla_compile_id attribute into
+// XlaLaunch operators.
+class EncapsulateXlaComputationsPass : public GraphOptimizationPass {
+ public:
+  static const char* const kXlaClusterAttr;  // _xla_compile_id
+
+  Status Run(const GraphOptimizationPassOptions& options) override;
+
+  // The following methods are public only for unit tests.
+
+  // This pass has two stages:
+  // a) first, we call EncapsulateSubgraphsPass to encapsulate all nodes
+  //    marked with the same _xla_compile_id attribute into functions. These
+  //    functions contain the computations to be passed to XlaLaunch. During
+  //    encapsulation, we sort the arguments into the order expected by
+  //    XlaLaunch.
+  static Status Encapsulate(std::unique_ptr<Graph>* graph,
+                            FunctionLibraryDefinition* flib_def);
+
+  // b) we rewrite the function calls generated in phase (a) into XlaLaunch
+  //    operators. We also convert the XlaClusterOutput output nodes of the
+  //    function call into the outputs of the XlaLaunch operator.
+  static Status BuildXlaLaunchOps(Graph* graph);
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
new file mode 100644
index 0000000000..f643fb0cfe
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
@@ -0,0 +1,346 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
+
+#include "tensorflow/cc/ops/function_ops.h"
+#include "tensorflow/cc/ops/resource_variable_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_op.h"
+#include "tensorflow/compiler/tf2xla/test_util.h"
+#include "tensorflow/core/framework/graph_to_functiondef.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/lib/strings/proto_serialization.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/util/equal_graph_def.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+
+static std::unique_ptr<Graph> MakeOuterGraph(
+    const FunctionLibraryDefinition& flib_def, const string& function) {
+  Scope scope = Scope::NewRootScope().ExitOnError();
+  TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib_def.ToProto()));
+
+  auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32);
+  auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT);
+  auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32);
+  auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT);
+  auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE);
+  auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE);
+  auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE);
+
+  NodeDef def;
+  TF_CHECK_OK(
+      NodeDefBuilder("launch0", function, &flib_def)
+          .Input(a.node()->name(), 0, DT_INT32)
+          .Input(b.node()->name(), 0, DT_FLOAT)
+          .Input(c.node()->name(), 0, DT_INT32)
+          .Input(d.node()->name(), 0, DT_FLOAT)
+          .Input(u.node()->name(), 0, DT_RESOURCE)
+          .Input(v.node()->name(), 0, DT_RESOURCE)
+          .Input(w.node()->name(), 0, DT_RESOURCE)
+          .Attr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0")
+          .Attr("_variable_start_index", 4)
+          .Finalize(&def));
+
+  Status status;
+  Node* launch = scope.graph()->AddNode(def, &status);
+  TF_CHECK_OK(status);
+  TF_CHECK_OK(scope.DoShapeInference(launch));
+  scope.graph()->AddEdge(a.node(), 0, launch, 0);
+  scope.graph()->AddEdge(b.node(), 0, launch, 1);
+  scope.graph()->AddEdge(c.node(), 0, launch, 2);
+  scope.graph()->AddEdge(d.node(), 0, launch, 3);
+  scope.graph()->AddEdge(u.node(), 0, launch, 4);
+  scope.graph()->AddEdge(v.node(), 0, launch, 5);
+  scope.graph()->AddEdge(w.node(), 0, launch, 6);
+
+  auto out0 =
+      ops::XlaClusterOutput(scope.WithOpName("Out0"), Output(launch, 0));
+  auto out1 =
+      ops::XlaClusterOutput(scope.WithOpName("Out1"), Output(launch, 1));
+  auto out2 =
+      ops::XlaClusterOutput(scope.WithOpName("Out2"), Output(launch, 2));
+  auto out3 =
+      ops::XlaClusterOutput(scope.WithOpName("Out3"), Output(launch, 3));
+
+  auto consumer0_a = ops::Identity(scope.WithOpName("consumer0_a"), out0);
+  auto consumer0_b = ops::Identity(scope.WithOpName("consumer0_b"), out0);
+  auto consumer0_c = ops::Identity(scope.WithOpName("consumer0_c"), out0);
+  auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1);
+  auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2);
+  auto consumer3 = ops::Identity(scope.WithOpName("consumer3"), out3);
+
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_CHECK_OK(scope.ToGraph(graph.get()));
+  return graph;
+}
+
+// Makes an encapsulate body graph for use in tests.
+static std::unique_ptr<Graph> MakeBodyGraph() {
+  Scope scope = Scope::NewRootScope().ExitOnError();
+
+  auto arg0 = ops::_Arg(scope.WithOpName("a_0_arg"), DT_INT32, 0);
+  auto arg1 = ops::_Arg(scope.WithOpName("b_0_arg"), DT_FLOAT, 1);
+  auto arg2 = ops::_Arg(scope.WithOpName("c_0_arg"), DT_INT32, 2);
+  auto arg3 = ops::_Arg(scope.WithOpName("d_0_arg"), DT_FLOAT, 3);
+
+  auto arg4 = ops::_Arg(scope.WithOpName("u_0_arg"), DT_RESOURCE, 4);
+  auto arg5 = ops::_Arg(scope.WithOpName("v_0_arg"), DT_RESOURCE, 5);
+  auto arg6 = ops::_Arg(scope.WithOpName("w_0_arg"), DT_RESOURCE, 6);
+
+  auto add_attrs = [](Node* node) {
+    node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+  };
+
+  auto b_identity = ops::Identity(scope.WithOpName("B_identity"), arg1);
+
+  auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), arg4, DT_FLOAT);
+  add_attrs(read_u.node());
+  auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), arg5, DT_FLOAT);
+  add_attrs(read_v.node());
+  auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), arg6, DT_FLOAT);
+  add_attrs(read_w.node());
+
+  auto e = ops::Add(scope.WithOpName("E"), arg0, arg2);
+  add_attrs(e.node());
+  auto f = ops::Add(scope.WithOpName("F"), read_v, read_w);
+  add_attrs(f.node());
+  auto g = ops::Add(scope.WithOpName("G"), f, arg3);
+  add_attrs(g.node());
+
+  auto out0 = ops::_Retval(scope.WithOpName("b_identity_0_retval_RetVal"),
+                           b_identity, 0);
+  auto out1 = ops::_Retval(scope.WithOpName("e_0_retval_RetVal"), e, 1);
+  auto out2 = ops::_Retval(scope.WithOpName("g_0_retval_RetVal"), g, 2);
+  auto out3 =
+      ops::_Retval(scope.WithOpName("readu_0_retval_RetVal"), read_u, 3);
+
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_CHECK_OK(scope.ToGraph(graph.get()));
+  return graph;
+}
+
+TEST(EncapsulateXlaComputations, DeterministicEncapsulate) {
+  // Test that control edge insertion order doesn't affect the cache key
+  // (cluster name) generated by TPU encapsulate pass.
+  auto get_serialized_graph = [](bool control_input_reversed,
+                                 bool operand_reversed) -> string {
+    FunctionLibraryDefinition flib_def(OpRegistry::Global(), {});
+    std::unique_ptr<Graph> graph(new Graph(&flib_def));
+    {
+      Scope scope = Scope::NewRootScope().ExitOnError();
+      auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32);
+      auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32);
+
+      ops::Add e = operand_reversed ? ops::Add(scope.WithOpName("E"), a0, a1)
+                                    : ops::Add(scope.WithOpName("E"), a1, a0);
+
+      auto add_attrs = [](Node* node) {
+        node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr,
+                      "launch0");
+      };
+      add_attrs(e.node());
+
+      TF_CHECK_OK(scope.ToGraph(graph.get()));
+      auto get_node_in_graph = [&graph](Node* node) {
+        return graph->FindNodeId(node->id());
+      };
+      // Insert control edge in different order. The order should not affect
+      // the encapsulated or serialized graph.
+      if (!control_input_reversed) {
+        graph->AddControlEdge(get_node_in_graph(a0.node()),
+                              get_node_in_graph(e.node()), true);
+        graph->AddControlEdge(get_node_in_graph(a1.node()),
+                              get_node_in_graph(e.node()), true);
+      } else {
+        graph->AddControlEdge(get_node_in_graph(a1.node()),
+                              get_node_in_graph(e.node()), true);
+        graph->AddControlEdge(get_node_in_graph(a0.node()),
+                              get_node_in_graph(e.node()), true);
+      }
+    }
+    TF_CHECK_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def));
+    GraphDef gdef;
+    graph->ToGraphDef(&gdef);
+    // Before serialization, sort control inputs first to remove
+    // nondeterminism.
+    SortControlInputs(&gdef);
+    string serialized;
+    SerializeToStringDeterministic(gdef, &serialized);
+    return serialized;
+  };
+
+  // Changing the order of control input shouldn't affect the graph generated.
+  EXPECT_EQ(get_serialized_graph(/*control_input_reversed=*/true,
+                                 /*operand_reversed=*/false),
+            get_serialized_graph(/*control_input_reversed=*/false,
+                                 /*operand_reversed=*/false));
+
+  // Changing the order of data input should affect the graph generated.
+  EXPECT_NE(get_serialized_graph(/*control_input_reversed=*/false,
+                                 /*operand_reversed=*/true),
+            get_serialized_graph(/*control_input_reversed=*/false,
+                                 /*operand_reversed=*/false));
+}
+
+TEST(EncapsulateXlaComputations, Encapsulate) {
+  FunctionLibraryDefinition flib_def(OpRegistry::Global(), {});
+  std::unique_ptr<Graph> graph(new Graph(&flib_def));
+  {
+    Scope scope = Scope::NewRootScope().ExitOnError();
+    auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32);
+    auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT);
+    auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32);
+    auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT);
+    auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE);
+    auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE);
+    auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE);
+
+    auto add_attrs = [](Node* node) {
+      node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+    };
+
+    auto b_identity = ops::Identity(scope.WithOpName("B_identity"), b);
+    add_attrs(b_identity.node());
+
+    auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), u, DT_FLOAT);
+    add_attrs(read_u.node());
+    auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), v, DT_FLOAT);
+    add_attrs(read_v.node());
+    auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), w, DT_FLOAT);
+    add_attrs(read_w.node());
+
+    auto e = ops::Add(scope.WithOpName("E"), a, c);
+    add_attrs(e.node());
+    auto f = ops::Add(scope.WithOpName("F"), read_v, read_w);
+    add_attrs(f.node());
+    auto g = ops::Add(scope.WithOpName("G"), f, d);
+    add_attrs(g.node());
+
+    auto out0 = ops::XlaClusterOutput(scope.WithOpName("Out0"), b_identity);
+    auto out1 = ops::XlaClusterOutput(scope.WithOpName("Out1"), e);
+    auto out2 = ops::XlaClusterOutput(scope.WithOpName("Out2"), g);
+    auto out3 = ops::XlaClusterOutput(scope.WithOpName("Out3"), read_u);
+
+    auto consumer0_a = ops::Identity(scope.WithOpName("consumer0_a"), out0);
+    auto consumer0_b = ops::Identity(scope.WithOpName("consumer0_b"), out0);
+    auto consumer0_c = ops::Identity(scope.WithOpName("consumer0_c"), out0);
+    auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1);
+    auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2);
+    auto consumer3 = ops::Identity(scope.WithOpName("consumer3"), out3);
+    TF_ASSERT_OK(scope.ToGraph(graph.get()));
+  }
+
+  std::unique_ptr<Graph> graph_copy(new Graph(&flib_def));
+  CopyGraph(*graph, graph_copy.get());
+
+  TF_ASSERT_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def));
+
+  std::unordered_map<string, Node*> index = BuildNodeIndex(*graph);
+  string function = index.at("launch0")->type_string();
+
+  // Tests the outer graph is as expected.
+  {
+    std::unique_ptr<Graph> outer = MakeOuterGraph(flib_def, function);
+    GraphDef expected_def;
+    outer->ToGraphDef(&expected_def);
+
+    GraphDef actual_def;
+    graph->ToGraphDef(&actual_def);
+    TF_EXPECT_GRAPH_EQ_INTERNAL(expected_def, actual_def);
+  }
+
+  // Tests the encapsulated body graph is as expected.
+  {
+    std::unique_ptr<Graph> body = MakeBodyGraph();
+    GraphDef expected_body_def;
+    body->ToGraphDef(&expected_body_def);
+
+    InstantiationResultForTest result;
+    TF_EXPECT_OK(InstantiateFunctionForTest(function, flib_def, &result));
+
+    EXPECT_EQ((DataTypeVector{DT_INT32, DT_FLOAT, DT_INT32, DT_FLOAT,
+                              DT_RESOURCE, DT_RESOURCE, DT_RESOURCE}),
+              result.arg_types);
+    EXPECT_EQ((DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}),
+              result.ret_types);
+    TF_EXPECT_GRAPH_EQ(expected_body_def, result.gdef);
+  }
+
+  // Encapsulates the same computation again, verifies we reuse the same
+  // function. Encapsulation should be deterministic to avoid recompilation.
+  TF_ASSERT_OK(
+      EncapsulateXlaComputationsPass::Encapsulate(&graph_copy, &flib_def));
+  std::unordered_map<string, Node*> index_copy = BuildNodeIndex(*graph_copy);
+  string function_copy = index_copy.at("launch0")->type_string();
+  EXPECT_EQ(function, function_copy);
+}
+
+TEST(EncapsulateXlaComputations, BuildXlaLaunchOp) {
+  std::unique_ptr<Graph> body_graph = MakeBodyGraph();
+  FunctionDefLibrary flib;
+  TF_ASSERT_OK(GraphToFunctionDef(*body_graph, "launch0", flib.add_function()));
+
+  FunctionLibraryDefinition flib_def(OpRegistry::Global(), flib);
+
+  std::unique_ptr<Graph> graph = MakeOuterGraph(flib_def, "launch0");
+  TF_ASSERT_OK(EncapsulateXlaComputationsPass::BuildXlaLaunchOps(graph.get()));
+
+  Scope scope = Scope::DisabledShapeInferenceScope().ExitOnError();
+  TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib));
+
+  auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32);
+  auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT);
+  auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32);
+  auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT);
+  auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE);
+  auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE);
+  auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE);
+
+  NameAttrList function;
+  function.set_name("launch0");
+  auto launch = ops::XlaLaunch(
+      scope.WithOpName("launch0"), std::initializer_list<Input>{},
+      std::initializer_list<Input>{a, b, c, d},
+      std::initializer_list<Input>{u, v, w},
+      DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}, function);
+
+  auto consumer0_a =
+      ops::Identity(scope.WithOpName("consumer0_a"), launch.results[0]);
+  auto consumer0_b =
+      ops::Identity(scope.WithOpName("consumer0_b"), launch.results[0]);
+  auto consumer0_c =
+      ops::Identity(scope.WithOpName("consumer0_c"), launch.results[0]);
+  auto consumer1 =
+      ops::Identity(scope.WithOpName("consumer1"), launch.results[1]);
+  auto consumer2 =
+      ops::Identity(scope.WithOpName("consumer2"), launch.results[2]);
+  auto consumer3 =
+      ops::Identity(scope.WithOpName("consumer3"), launch.results[3]);
+
+  GraphDef expected_def;
+  TF_ASSERT_OK(scope.ToGraphDef(&expected_def));
+
+  GraphDef actual_def;
+  graph->ToGraphDef(&actual_def);
+  TF_EXPECT_GRAPH_EQ(expected_def, actual_def);
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
index 5dcf754969..3770eea6d0 100644
--- a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
+++ b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/build_xla_launch_ops_pass.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
 #include "tensorflow/compiler/jit/partially_decluster_pass.h"
 #include "tensorflow/core/common_runtime/optimization_registry.h"
@@ -23,6 +24,11 @@ namespace tensorflow {
 
 // PRE_PLACEMENT passes:
 
+// EncapsulateXlaComputationsPass rewrites computations generated by the
+// xla.compile() Python code into XlaLaunch nodes.
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 26,
+                      EncapsulateXlaComputationsPass);
+
 // from
 // third_party/tensorflow/compiler/tf2xla/functionalize_control_flow_pass_registration.cc
 // FunctionalizeControlFlowPass: 27
@@ -32,7 +38,8 @@ namespace tensorflow {
 // control flow structure (XlaIf/XlaWhile). Following passes must
 // handle those FunctionDef correctly.
 
-// POST_REWRITE_FOR_EXEC passes:
+// POST_REWRITE_FOR_EXEC passes that support auto-clustering to enable XLA:
+
 REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 10,
                       MarkForCompilationPass);
 
diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc
index f2473d98ff..1a29c3caab 100644
--- a/tensorflow/compiler/jit/ops/xla_ops.cc
+++ b/tensorflow/compiler/jit/ops/xla_ops.cc
@@ -13,10 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
 
 namespace tensorflow {
 
+using shape_inference::InferenceContext;
+
 REGISTER_OP("XlaLaunch")
     .Input("constants: Tconstants")
     .Attr("Tconstants: list(type) >= 0")
@@ -32,4 +36,19 @@ REGISTER_OP("XlaLaunch")
     .SetIsStateful()
     .Doc("XLA Launch Op. For use by the XLA JIT only.");
 
+REGISTER_OP("XlaClusterOutput")
+    .Input("input: T")
+    // Note: when replication is supported, this op will have N outputs.
+    .Output("outputs: T")
+    .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      for (int i = 0; i < c->num_outputs(); ++i) {
+        c->set_output(i, c->input(0));
+      }
+      return Status::OK();
+    })
+    .Doc(
+        "Operator that connects the output of an XLA computation to other "
+        "consumer graph nodes.");
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index d549e7bb59..ba1e3b2b4f 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -611,6 +611,7 @@ cc_library(
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
diff --git a/tensorflow/compiler/tf2xla/test_util.cc b/tensorflow/compiler/tf2xla/test_util.cc
index 3c6c9a91b6..f31bfb45a2 100644
--- a/tensorflow/compiler/tf2xla/test_util.cc
+++ b/tensorflow/compiler/tf2xla/test_util.cc
@@ -40,4 +40,12 @@ Status InstantiateFunctionForTest(const string& name,
   return Status::OK();
 }
 
+std::unordered_map<string, Node*> BuildNodeIndex(const Graph& graph) {
+  std::unordered_map<string, Node*> index;
+  for (Node* node : graph.nodes()) {
+    index[node->name()] = node;
+  }
+  return index;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/test_util.h b/tensorflow/compiler/tf2xla/test_util.h
index e6e4ae92ed..350a868568 100644
--- a/tensorflow/compiler/tf2xla/test_util.h
+++ b/tensorflow/compiler/tf2xla/test_util.h
@@ -24,8 +24,10 @@ limitations under the License.
 
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/util/equal_graph_def.h"
 
 namespace tensorflow {
 
@@ -42,6 +44,20 @@ Status InstantiateFunctionForTest(const string& name,
                                   const FunctionLibraryDefinition& library,
                                   InstantiationResultForTest* result);
 
+// Builds a map from node name to Node* for `graph`.
+std::unordered_map<string, Node*> BuildNodeIndex(const Graph& graph);
+
 }  // namespace tensorflow
 
+// Variant of TF_EXPECT_GRAPH_EQ that also compares internal attributes for
+// equality.
+#define TF_EXPECT_GRAPH_EQ_INTERNAL(expected, actual)               \
+  do {                                                              \
+    string diff;                                                    \
+    EqualGraphDefOptions eq_options;                                \
+    eq_options.ignore_internal_attrs = false;                       \
+    EXPECT_TRUE(EqualGraphDef(actual, expected, &diff, eq_options)) \
+        << diff << "\nActual: " << SummarizeGraphDef(actual);       \
+  } while (false)
+
 #endif  // TENSORFLOW_COMPILER_TF2XLA_TEST_UTIL_H_
diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc
index 7f260b3139..4475fa979e 100644
--- a/tensorflow/core/common_runtime/graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/graph_execution_state.cc
@@ -561,6 +561,10 @@ Status GraphExecutionState::OptimizeGraph(
     grappler::GrapplerItem item;
     item.id = "tf_graph";
     graph_->ToGraphDef(&item.graph);
+    // TODO(b/114748242): Add a unit test to test this bug fix.
+    if (flib_def_) {
+      *item.graph.mutable_library() = flib_def_->ToProto();
+    }
 
     item.fetch.insert(item.fetch.end(),
                       options.callable_options.fetch().begin(),
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 8c99598748..7ed4a67333 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -72,6 +72,16 @@ bool IsRunOnceOptimizer(const string& name) {
          name == "loop_optimizer";
 }
 
+// Check if the graphdef contains nodes that indicate TPU execution.
+bool IsTPUGraphDef(const GraphDef& def) {
+  for (auto node : def.node()) {
+    if (node.op() == "TPUCompile" || node.op() == "TPUPartitionedCall") {
+      return true;
+    }
+  }
+  return false;
+}
+
 }  // namespace
 
 #define MK_OPT(NAME, VALUE) \
@@ -338,6 +348,19 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph));
   VLOG(1) << "Optimized main graph.";
 
+  // Skip optimizing functions if this is a TPU graph. Currently, Grappler
+  // passes do not handle TPU functions correctly in a variety of ways (Note
+  // that due to the pre-placement TPU graph rewriting passes, the TPU-related
+  // ops are encapsulated away into functions). For example, TPU graphs contain
+  // TPUReplicateMetadata node that carries relevant TPU metadata and Grappler
+  // passes could prune that away. Grappler passes could also cause issues
+  // around shape inference. Since the desired and existing behavior is to not
+  // optimize TPU functions with Grappler, this check preserves that.
+  if (IsTPUGraphDef(*optimized_graph)) {
+    VLOG(2) << "Skipping optimizing funcs for TPU graphs";
+    return Status::OK();
+  }
+
   // 2. Optimize function library
   FunctionLibraryDefinition flib(OpRegistry::Global(),
                                  optimized_graph->library());
-- 
GitLab


From 8f9413bf41ff89672a3415eef606ecaca7c70a2f Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 13 Sep 2018 16:15:32 -0700
Subject: [PATCH 0175/1357] Ensure that the input image of decode.bmp.op has
 valid dimensions.

This prevents an undefined behavior with signed integer overflow in
decode.bmp.op.

PiperOrigin-RevId: 212897289
---
 tensorflow/core/kernels/decode_bmp_op.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index 750efca592..ae451be7e2 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -91,8 +91,10 @@ class DecodeBmpOp : public OpKernel {
                 errors::InvalidArgument(
                     "Number of channels must be 1, 3 or 4, was ", channels_));
 
-    OP_REQUIRES(context, width > 0 && header_size >= 0,
+    OP_REQUIRES(context, width > 0,
                 errors::InvalidArgument("Width must be positive"));
+    OP_REQUIRES(context, height != 0,
+                errors::InvalidArgument("Height must be nonzero"));
     OP_REQUIRES(context, header_size >= 0,
                 errors::InvalidArgument("header size must be nonnegative"));
 
-- 
GitLab


From 5dd20118a25e8d29b7684cf5fb17951657a4a687 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 13 Sep 2018 16:18:18 -0700
Subject: [PATCH 0176/1357] Convert logdir paths to strings.

This supports pathlib and other non-string path types.

PiperOrigin-RevId: 212897666
---
 tensorflow/python/ops/summary_ops_v2.py               | 1 +
 tensorflow/python/summary/writer/event_file_writer.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index 94c7d88b5c..a404507627 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@@ -234,6 +234,7 @@ def create_file_writer(logdir,
   """
   if logdir is None:
     return SummaryWriter(None, None)
+  logdir = str(logdir)
   with ops.device("cpu:0"):
     if max_queue is None:
       max_queue = constant_op.constant(10)
diff --git a/tensorflow/python/summary/writer/event_file_writer.py b/tensorflow/python/summary/writer/event_file_writer.py
index 2936a279bd..14dec982a6 100644
--- a/tensorflow/python/summary/writer/event_file_writer.py
+++ b/tensorflow/python/summary/writer/event_file_writer.py
@@ -62,7 +62,7 @@ class EventFileWriter(object):
       filename_suffix: A string. Every event file's name is suffixed with
         `filename_suffix`.
     """
-    self._logdir = logdir
+    self._logdir = str(logdir)
     if not gfile.IsDirectory(self._logdir):
       gfile.MakeDirs(self._logdir)
     self._event_queue = six.moves.queue.Queue(max_queue)
-- 
GitLab


From 3b438e4a24dd0f113f1d36d97196a027bd473fc4 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Thu, 13 Sep 2018 16:42:57 -0700
Subject: [PATCH 0177/1357] [tf.data] Changes `make_batched_features_dataset`
 and `make_tf_record_dataset` default `prefetch` buffer size to auto-tune
 (from 1).

PiperOrigin-RevId: 212900920
---
 tensorflow/contrib/data/__init__.py           |  9 +++--
 tensorflow/contrib/data/python/ops/BUILD      |  1 +
 .../contrib/data/python/ops/optimization.py   |  3 ++
 tensorflow/contrib/data/python/ops/readers.py | 39 +++++++++----------
 .../core/kernels/data/prefetch_autotuner.cc   | 13 ++++++-
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index baec238c62..c378b1ce8d 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -62,6 +62,8 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
 @@sloppy_interleave
 @@unbatch
 @@unique
+
+@@AUTOTUNE
 """
 
 from __future__ import absolute_import
@@ -91,6 +93,10 @@ from tensorflow.contrib.data.python.ops.interleave_ops import sample_from_datase
 from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave
 from tensorflow.contrib.data.python.ops.iterator_ops import CheckpointInputPipelineHook
 from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator
+
+# Optimization constant that can be used to enable auto-tuning.
+from tensorflow.contrib.data.python.ops.optimization import AUTOTUNE
+
 from tensorflow.contrib.data.python.ops.parsing_ops import parse_example_dataset
 from tensorflow.contrib.data.python.ops.prefetching_ops import copy_to_device
 from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device
@@ -113,6 +119,3 @@ from tensorflow.python.data.ops.optional_ops import Optional
 
 from tensorflow.python.util.all_util import remove_undocumented
 remove_undocumented(__name__)
-
-# A constant that can be used to enable auto-tuning.
-AUTOTUNE = -1
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 4b45cc7e36..a14781cd93 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -80,6 +80,7 @@ py_library(
         ":batching",
         ":gen_dataset_ops",
         ":interleave_ops",
+        ":optimization",
         ":parsing_ops",
         ":shuffle_ops",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/contrib/data/python/ops/optimization.py
index 4114b62e29..73840452df 100644
--- a/tensorflow/contrib/data/python/ops/optimization.py
+++ b/tensorflow/contrib/data/python/ops/optimization.py
@@ -24,6 +24,9 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
 
+# A constant that can be used to enable auto-tuning.
+AUTOTUNE = -1
+
 
 # TODO(jsimsa): Support RE matching for both individual transformation (e.g. to
 # account for indexing) and transformation sequence.
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index 4c466781f7..785b395707 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.contrib.data.python.ops import batching
 from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops
 from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.contrib.data.python.ops import parsing_ops
 from tensorflow.contrib.data.python.ops import shuffle_ops
 from tensorflow.python.data.ops import dataset_ops
@@ -214,18 +215,17 @@ def _maybe_shuffle_and_repeat(
   return dataset
 
 
-def make_tf_record_dataset(
-    file_pattern,
-    batch_size,
-    parser_fn=None,
-    num_epochs=None,
-    shuffle=True,
-    shuffle_buffer_size=None,
-    shuffle_seed=None,
-    prefetch_buffer_size=None,
-    num_parallel_reads=None,
-    num_parallel_parser_calls=None,
-    drop_final_batch=False):
+def make_tf_record_dataset(file_pattern,
+                           batch_size,
+                           parser_fn=None,
+                           num_epochs=None,
+                           shuffle=True,
+                           shuffle_buffer_size=None,
+                           shuffle_seed=None,
+                           prefetch_buffer_size=optimization.AUTOTUNE,
+                           num_parallel_reads=None,
+                           num_parallel_parser_calls=None,
+                           drop_final_batch=False):
   """Reads and optionally parses TFRecord files into a dataset.
 
   Provides common functionality such as batching, optional parsing, shuffling,
@@ -300,8 +300,6 @@ def make_tf_record_dataset(
         parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
         drop_remainder=drop_final_batch))
 
-  if prefetch_buffer_size is None:
-    prefetch_buffer_size = -1  # tf.config.data.AUTOTUNE
   if prefetch_buffer_size == 0:
     return dataset
   else:
@@ -323,7 +321,7 @@ def make_csv_dataset(
     shuffle=True,
     shuffle_buffer_size=10000,
     shuffle_seed=None,
-    prefetch_buffer_size=1,
+    prefetch_buffer_size=optimization.AUTOTUNE,
     num_parallel_reads=1,
     sloppy=False,
     num_rows_for_inference=100,
@@ -386,9 +384,10 @@ def make_csv_dataset(
     shuffle_buffer_size: Buffer size to use for shuffling. A large buffer size
       ensures better shuffling, but increases memory usage and startup time.
     shuffle_seed: Randomization seed to use for shuffling.
-    prefetch_buffer_size: An int specifying the number of feature batches to
-      prefetch for performance improvement. Recommended value is the number of
-      batches consumed per training step.
+    prefetch_buffer_size: An int specifying the number of feature
+      batches to prefetch for performance improvement. Recommended value is the
+      number of batches consumed per training step. Defaults to auto-tune.
+
     num_parallel_reads: Number of threads used to read CSV records from files.
       If >1, the results will be interleaved.
     sloppy: If `True`, reading performance will be improved at
@@ -666,7 +665,7 @@ def make_batched_features_dataset(file_pattern,
                                   shuffle=True,
                                   shuffle_buffer_size=10000,
                                   shuffle_seed=None,
-                                  prefetch_buffer_size=1,
+                                  prefetch_buffer_size=optimization.AUTOTUNE,
                                   reader_num_threads=1,
                                   parser_num_threads=2,
                                   sloppy_ordering=False,
@@ -739,7 +738,7 @@ def make_batched_features_dataset(file_pattern,
     shuffle_seed: Randomization seed to use for shuffling.
     prefetch_buffer_size: Number of feature batches to prefetch in order to
       improve performance. Recommended value is the number of batches consumed
-      per training step (default is 1).
+      per training step. Defaults to auto-tune.
     reader_num_threads: Number of threads used to read `Example` records. If >1,
       the results will be interleaved.
     parser_num_threads: Number of threads to use for parsing `Example` tensors
diff --git a/tensorflow/core/kernels/data/prefetch_autotuner.cc b/tensorflow/core/kernels/data/prefetch_autotuner.cc
index 533d0bd5d2..da357339c9 100644
--- a/tensorflow/core/kernels/data/prefetch_autotuner.cc
+++ b/tensorflow/core/kernels/data/prefetch_autotuner.cc
@@ -26,6 +26,13 @@ PrefetchAutotuner::PrefetchAutotuner(int64 initial_buffer_size)
   }
 }
 
+namespace {
+// Determines what strategy to use for increasing the buffer size limit. For
+// limits less than the threshold, an exponential increase is used, while for
+// limits greater than or equal to the threshold, a linear increase is used.
+size_t kBufferLimitThreshold = 2048;
+}  // namespace
+
 void PrefetchAutotuner::RecordConsumption(size_t current_buffer_size) {
   switch (mode_) {
     case Mode::kDisabled:
@@ -37,7 +44,11 @@ void PrefetchAutotuner::RecordConsumption(size_t current_buffer_size) {
       return;
     case Mode::kDownswing:
       if (current_buffer_size == 0) {
-        buffer_limit_ *= 2;  // Increase the buffer size.
+        if (buffer_limit_ >= kBufferLimitThreshold) {
+          buffer_limit_ += kBufferLimitThreshold;
+        } else {
+          buffer_limit_ *= 2;
+        }
         mode_ = Mode::kUpswing;
       }
       return;
-- 
GitLab


From 4137d84a3b41638d4048e45ab579662c18a06df5 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Thu, 13 Sep 2018 16:45:11 -0700
Subject: [PATCH 0178/1357] Use `dataset.batch(.., drop_remainder=True)`
 instead of map_and_batch to achieve the same effect.

PiperOrigin-RevId: 212901207
---
 tensorflow/contrib/distribute/python/BUILD                  | 1 -
 tensorflow/contrib/distribute/python/single_loss_example.py | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 87f76eaa94..aaecbb0eb1 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -485,7 +485,6 @@ py_library(
     srcs = ["single_loss_example.py"],
     deps = [
         ":step_fn",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:layers",
diff --git a/tensorflow/contrib/distribute/python/single_loss_example.py b/tensorflow/contrib/distribute/python/single_loss_example.py
index 5aa19cf6a9..09b351ffa4 100644
--- a/tensorflow/contrib/distribute/python/single_loss_example.py
+++ b/tensorflow/contrib/distribute/python/single_loss_example.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import batching
 from tensorflow.contrib.distribute.python import step_fn
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -59,10 +58,9 @@ def minimize_loss_example(optimizer_fn,
 
   def dataset_fn():
     dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat()
-    # TODO(isaprykin): map_and_batch with drop_remainder causes shapes to be
+    # TODO(isaprykin): batch with drop_remainder causes shapes to be
     # fully defined for TPU.  Remove this when XLA supports dynamic shapes.
-    return dataset.apply(
-        batching.map_and_batch(lambda x: x, batch_size=1, drop_remainder=True))
+    return dataset.batch(1, drop_remainder=True)
 
   # An Optimizer instance is created either outside or inside model_fn.
   outer_optimizer = None
-- 
GitLab


From 4b42a284683416ab6159f32c903321af9dc9a591 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Thu, 13 Sep 2018 16:58:34 -0700
Subject: [PATCH 0179/1357] Reland "Add basic type propagation for unsupported
 ops in TFLite conversion"

The original CL was rolled back due to op registration conflicts in the pip.
Resolve the issue by only including core:ops in the toco binary itself, not in intermediate libraries.

PiperOrigin-RevId: 212902838
---
 tensorflow/contrib/lite/toco/BUILD            |  6 +-
 .../contrib/lite/toco/import_tensorflow.cc    | 20 +++++
 .../lite/toco/import_tensorflow_test.cc       | 75 +++++++++++++++++--
 3 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index bea90f1ce8..96b88b60fc 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -347,6 +347,7 @@ tf_cc_test(
         "//tensorflow/core:framework",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
+        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
         "@com_google_googletest//:gtest_main",
     ],
@@ -407,8 +408,11 @@ tf_cc_binary(
         ":toco_port",
         ":toco_tooling",
         ":types_proto_cc",
-        "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
+        "//tensorflow/core:lib",
+        # We cannot embed the core:ops dependency directly into :toco_tooling as
+        # it can conflict with downstream deps when toco is used as a library.
+        "//tensorflow/core:ops",
     ],
 )
 
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 9bc23c4b3c..efc1007925 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -58,6 +58,7 @@ using tensorflow::DT_STRING;
 using tensorflow::DT_UINT8;
 using tensorflow::GraphDef;
 using tensorflow::NodeDef;
+using tensorflow::OpRegistry;
 using tensorflow::TensorProto;
 using tensorflow::TensorShapeProto;
 
@@ -1079,6 +1080,25 @@ tensorflow::Status ConvertUnsupportedOperator(
   } else if (HasAttr(node, "Tout")) {
     const auto& output_type = GetDataTypeAttr(node, "Tout");
     op->output_data_types.push_back(ConvertDataType(output_type));
+  } else {
+    const tensorflow::OpDef* op_def = nullptr;
+    if (OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
+      for (const auto& output_arg : op_def->output_arg()) {
+        if (HasAttr(node, output_arg.type_attr())) {
+          op->output_data_types.push_back(
+              ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
+        } else {
+          LOG(INFO) << "Op node missing output type attribute: " << node.name();
+          op->output_data_types.clear();
+          break;
+        }
+      }
+    }
+    if (op->output_data_types.empty()) {
+      // TODO(b/113613439): Figure out how to propagate types for custom ops
+      // that have no OpDef.
+      LOG(INFO) << "Unable to determine output type for op: " << node.op();
+    }
   }
   if (HasAttr(node, kAttrOutputShapes)) {
     const auto& output_shapes = GetListAttr(node, kAttrOutputShapes);
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index a00e136dd6..da248826a7 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -49,6 +49,17 @@ Status ImportTensorFlowNode(const NodeDef&, const TensorFlowImportFlags&,
 
 namespace {
 
+Status ImportNode(const NodeDef& node, Model* model) {
+  const auto converter = internal::GetTensorFlowNodeConverterMap();
+  return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), model,
+                                        converter);
+}
+
+Status ImportNode(const NodeDef& node) {
+  Model model;
+  return ImportNode(node, &model);
+}
+
 class ShapeImportTest : public ::testing::TestWithParam<tensorflow::DataType> {
  protected:
   ShapeImportTest() {}
@@ -109,12 +120,24 @@ class ShapeImportTest : public ::testing::TestWithParam<tensorflow::DataType> {
     SetAttrValue(t, &value_attr);
     (*node->mutable_attr())["value"] = value_attr;
   }
+};
+
+class TypeImportTest : public ::testing::TestWithParam<
+                           std::pair<tensorflow::DataType, ArrayDataType>> {
+ protected:
+  TypeImportTest() {}
+
+  void BuildUnaryNode(const std::string& op_name, tensorflow::DataType dtype,
+                      NodeDef* node) {
+    node->set_op(op_name);
+    node->set_name("Node1");
+
+    node->add_input();
+    node->set_input(0, "Node0");
 
-  Status ImportNode(const NodeDef& node) {
-    Model model;
-    const auto converter = internal::GetTensorFlowNodeConverterMap();
-    return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), &model,
-                                          converter);
+    AttrValue dtype_attr;
+    SetAttrValue(dtype, &dtype_attr);
+    (*node->mutable_attr())["T"] = dtype_attr;
   }
 };
 
@@ -167,5 +190,47 @@ TEST_P(ShapeImportTest, ValidShapeButZeroElements) {
 INSTANTIATE_TEST_CASE_P(ValidShapeButZeroElements, ShapeImportTest,
                         ::testing::ValuesIn(TestTypes()));
 
+std::vector<std::pair<tensorflow::DataType, ArrayDataType>> UnaryTestTypes() {
+  return {{DT_FLOAT, ArrayDataType::kFloat},
+          {DT_INT32, ArrayDataType::kInt32},
+          {DT_INT64, ArrayDataType::kInt64}};
+}
+
+TEST_P(TypeImportTest, BasicTypeInference) {
+  NodeDef node;
+  BuildUnaryNode("Atan", GetParam().first, &node);
+
+  Model model;
+  EXPECT_TRUE(ImportNode(node, &model).ok());
+
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast<const TensorFlowUnsupportedOperator*>(
+          model.operators[0].get());
+  ASSERT_THAT(op->output_data_types, ::testing::ElementsAre(GetParam().second));
+}
+INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest,
+                        ::testing::ValuesIn(UnaryTestTypes()));
+
+TEST(ImportTest, FailedTypeInference) {
+  // Create a unary op with no Type ("T") annotation.
+  NodeDef node;
+  node.set_op("Atan");
+  node.set_name("Node1");
+  node.add_input();
+  node.set_input(0, "Node0");
+
+  Model model;
+  EXPECT_TRUE(ImportNode(node, &model).ok());
+
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast<const TensorFlowUnsupportedOperator*>(
+          model.operators[0].get());
+  ASSERT_TRUE(op->output_data_types.empty());
+}
+
 }  // namespace
 }  // namespace toco
-- 
GitLab


From 97511100c88010d4e57a78685b476b4f8821059e Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Thu, 13 Sep 2018 17:17:30 -0700
Subject: [PATCH 0180/1357] Simplify the initialization function in algortihm
 picker. No functional change.

PiperOrigin-RevId: 212905536
---
 .../gpu/cudnn_convolution_algorithm_picker.cc | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index c607aea1a8..f528e62b17 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -221,25 +221,12 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
     allocator = &*se_allocator;
   }
 
-  // Allocate space for the input, filter, and output of the convolution.  We
-  // use a ScratchAllocator for this instead of calling allocator_ directly so
-  // that our allocations don't leak.
-  ScratchAllocator input_output_allocator(device_ordinal, allocator);
-  TF_ASSIGN_OR_RETURN(params.input_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(input_shape)));
-  TF_ASSIGN_OR_RETURN(params.filter_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(filter_shape)));
-  TF_ASSIGN_OR_RETURN(params.output_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(output_shape)));
-
-  if (cross_check_enabled) {
-    // Broadcast a constant to the buffer, instead of zeroing the buffer. A
-    // non-zero constant is useful for the cross checking, because zero-inputs
-    // may not always reveal the bugs.
-    const auto initialize_f16 = [&stream](DeviceMemoryBase buffer) {
+  const auto initialize_buffer = [&stream, cross_check_enabled](
+                                     DeviceMemoryBase buffer) {
+    if (cross_check_enabled) {
+      // Broadcast a constant to the buffer, instead of zeroing the buffer. A
+      // non-zero constant is useful for the cross checking, because zero-inputs
+      // may not always reveal the bugs.
       CHECK_EQ(0, (uintptr_t)buffer.opaque() % 4);
       size_t left_over_bytes = buffer.size() % 4;
       CHECK_EQ(0, left_over_bytes % 2);
@@ -257,19 +244,32 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
       DeviceMemoryBase left_over(
           static_cast<char*>(buffer.opaque()) + aligned_size, left_over_bytes);
       stream.ThenMemcpy(&left_over, halfs, left_over_bytes);
-    };
-    initialize_f16(params.input_buf);
-    initialize_f16(params.filter_buf);
-    initialize_f16(params.output_buf);
-  } else {
-    // Although we don't have evidence this matters, zero out the buffers before
-    // autotuning.  It's conceivable that using uninitialized memory as the
-    // inputs might affect performance if e.g. the inputs contain denormals, and
-    // this is easy enough.
-    stream.ThenMemZero(&params.input_buf, params.input_buf.size())
-        .ThenMemZero(&params.filter_buf, params.filter_buf.size())
-        .ThenMemZero(&params.output_buf, params.output_buf.size());
-  }
+    } else {
+      // Although we don't have evidence this matters, zero out the buffers
+      // before autotuning.  It's conceivable that using uninitialized memory as
+      // the inputs might affect performance if e.g. the inputs contain
+      // denormals, and this is easy enough.
+      stream.ThenMemZero(&buffer, buffer.size());
+    }
+  };
+
+  // Allocate space for the input, filter, and output of the convolution.  We
+  // use a ScratchAllocator for this instead of calling allocator_ directly so
+  // that our allocations don't leak.
+  ScratchAllocator input_output_allocator(device_ordinal, allocator);
+  TF_ASSIGN_OR_RETURN(params.input_buf,
+                      input_output_allocator.AllocateBytes(
+                          &stream, ShapeUtil::ByteSizeOf(input_shape)));
+  TF_ASSIGN_OR_RETURN(params.filter_buf,
+                      input_output_allocator.AllocateBytes(
+                          &stream, ShapeUtil::ByteSizeOf(filter_shape)));
+  TF_ASSIGN_OR_RETURN(params.output_buf,
+                      input_output_allocator.AllocateBytes(
+                          &stream, ShapeUtil::ByteSizeOf(output_shape)));
+
+  initialize_buffer(params.input_buf);
+  initialize_buffer(params.filter_buf);
+  initialize_buffer(params.output_buf);
 
   DeviceMemoryBase* result_buf = [&] {
     switch (params.kind) {
-- 
GitLab


From 2e11d827d656a671757d386881e925c97f0b3d9c Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Thu, 13 Sep 2018 17:39:47 -0700
Subject: [PATCH 0181/1357] Fix performance issue when training with keras
 model in eager mode.

PiperOrigin-RevId: 212908218
---
 tensorflow/python/keras/engine/training.py    | 37 +++++++++++++------
 .../python/keras/engine/training_test.py      | 19 ++++++++++
 2 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index c6749468c8..fed07c4120 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -209,8 +209,27 @@ class Model(Network):
     for metric in metrics:
       metric_fn = training_utils.get_metric_function(
           metric, output_shape=output_shape, loss_fn=loss_fn)
-      metric_name = self._get_metric_name(
-          metric, output_index, weighted=weights is not None)
+
+      if (context.executing_eagerly() and y_true is not None and
+          y_pred is not None):
+        # In eager mode, when executing metric_fn during training, we do not
+        # need to generate unique metric name and add it to the model
+        # as we have done that during compile already.
+        prefix = 'weighted_' if weights is not None else ''
+        suffix = metric_fn.name if hasattr(metric_fn,
+                                           'name') else metric_fn.__name__
+        metric_name = prefix + suffix
+      else:
+        # Get metric name that is to be added to the model.
+        metric_name = self._get_metric_name(
+            metric, output_index, weighted=weights is not None)
+        # Keep track of metric name.
+        self.metrics_names.append(metric_name)
+
+        # Keep track of stateful metric attributes (name and metric function).
+        if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful:
+          self.stateful_metric_names.append(metric_name)
+          self.stateful_metric_functions.append(metric_fn)
 
       with K.name_scope(metric_name):
         # If both outputs and targets are available, call the metric function.
@@ -250,16 +269,10 @@ class Model(Network):
             self.metrics_tensors.append(metric_result)
           metric_results.append(metric_result)
 
-      # Keep track of metric name.
-      self.metrics_names.append(metric_name)
-
-      # Keep track of stateful metric attributes (name and metric function).
-      if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful:
-        self.stateful_metric_names.append(metric_name)
-        self.stateful_metric_functions.append(metric_fn)
-        if not context.executing_eagerly():
-          # Keep track of updates created by stateful metrics.
-          self.metrics_updates += metric_fn.updates
+      if (isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful and
+          not context.executing_eagerly()):
+        # Keep track of updates created by stateful metrics.
+        self.metrics_updates += metric_fn.updates
     return metric_results
 
   def _handle_metrics(self,
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 380130095b..30be4131a4 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -2256,7 +2256,26 @@ class TestTrainingWithMetrics(test.TestCase):
         'dense_binary_accuracy', 'dropout_mean_squared_error',
         'dropout_binary_accuracy'
     ]
+    reference_stateful_metric_names = [
+        'dense_binary_accuracy', 'dropout_binary_accuracy'
+    ]
+    self.assertEqual(reference_metric_names, model.metrics_names)
+    self.assertEqual(reference_stateful_metric_names,
+                     model.stateful_metric_names)
+
+    # Verify that model metric names are not altered during training.
+    input_a_np = np.random.random((10, 3))
+    input_b_np = np.random.random((10, 3))
+
+    output_d_np = np.random.random((10, 4))
+    output_e_np = np.random.random((10, 4))
+
+    model.fit([input_a_np, input_b_np], [output_d_np, output_e_np],
+              epochs=1,
+              batch_size=5)
     self.assertEqual(reference_metric_names, model.metrics_names)
+    self.assertEqual(reference_stateful_metric_names,
+                     model.stateful_metric_names)
 
   @tf_test_util.run_in_graph_and_eager_modes
   def test_metrics_correctness(self):
-- 
GitLab


From eb5cd6926ef8d2a5a748f1aa978e51148e22dd97 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Thu, 13 Sep 2018 18:19:50 -0700
Subject: [PATCH 0182/1357] Make Keras relu use nn.leaky_relu when appropriate.

PiperOrigin-RevId: 212912615
---
 tensorflow/python/keras/backend.py                    |  3 +++
 tensorflow/python/keras/backend_test.py               |  3 ++-
 .../python/keras/layers/advanced_activations.py       | 11 +++++------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 529b07dc12..6f766c6257 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3462,6 +3462,9 @@ def relu(x, alpha=0., max_value=None, threshold=0):
   clip_max = max_value is not None
 
   if alpha != 0.:
+    if max_value is None and threshold == 0:
+      return nn.leaky_relu(x, alpha=alpha)
+
     if threshold != 0:
       negative_part = nn.relu(-x + threshold)
     else:
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 2f271c4f50..ab71589940 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -522,8 +522,9 @@ class BackendLinearAlgebraTest(test.TestCase):
       relu_op = keras.backend.relu(x)
       self.assertAllClose(keras.backend.eval(relu_op), [[0, 0], [2, 7]])
 
-      # alpha
+      # alpha (leaky relu used)
       relu_op = keras.backend.relu(x, alpha=0.5)
+      self.assertTrue('LeakyRelu' in relu_op.name)
       self.assertAllClose(keras.backend.eval(relu_op), [[-2, 0], [2, 7]])
 
       # max_value < some elements
diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 61ab69c16f..731d180a80 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -268,7 +268,7 @@ class Softmax(Layer):
     self.axis = axis
 
   def call(self, inputs):
-    return activations.softmax(inputs, axis=self.axis)
+    return K.softmax(inputs, axis=self.axis)
 
   def get_config(self):
     config = {'axis': self.axis}
@@ -322,11 +322,10 @@ class ReLU(Layer):
   def call(self, inputs):
     # alpha is used for leaky relu slope in activations instead of
     # negative_slope.
-    return activations.relu(
-        inputs,
-        alpha=self.negative_slope,
-        max_value=self.max_value,
-        threshold=self.threshold)
+    return K.relu(inputs,
+                  alpha=self.negative_slope,
+                  max_value=self.max_value,
+                  threshold=self.threshold)
 
   def get_config(self):
     config = {
-- 
GitLab


From 1831ef73ba693ba7f27a3ecb391b47601e6a3758 Mon Sep 17 00:00:00 2001
From: Chris Leary <leary@google.com>
Date: Thu, 13 Sep 2018 18:34:29 -0700
Subject: [PATCH 0183/1357] [XLA] Add hook for dump directory expansion.

Also puts a ".unoptimized" suffix on dumped HLO protobuf files
to avoid the unoptimized dumped HLO protobuf colliding with the
optimized dumped HLO protobufs when the same dump directory is
specified for both.

PiperOrigin-RevId: 212914100
---
 tensorflow/compiler/xla/BUILD                 |  1 +
 tensorflow/compiler/xla/protobuf_util.cc      | 29 +++++++++++++++++--
 tensorflow/compiler/xla/protobuf_util.h       |  4 +++
 .../xla/service/compile_only_service.cc       |  2 +-
 tensorflow/compiler/xla/service/service.cc    |  7 +++--
 tensorflow/compiler/xla/service/service.h     |  4 ++-
 6 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 76e36f3c46..ef70c1f8ac 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -193,6 +193,7 @@ cc_library(
         ":types",
         ":util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/synchronization",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/protobuf_util.cc b/tensorflow/compiler/xla/protobuf_util.cc
index 787725e884..b507a2ef79 100644
--- a/tensorflow/compiler/xla/protobuf_util.cc
+++ b/tensorflow/compiler/xla/protobuf_util.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace xla {
@@ -49,16 +50,40 @@ string SanitizeFilename(const string& file_name) {
   return safe_file_name;
 }
 
+std::pair<tensorflow::mutex*, std::vector<std::function<string(string)>>*>
+GetDirectoryExpanders() {
+  static auto* mutex = new tensorflow::mutex;
+  static auto* singleton = new std::vector<std::function<string(string)>>;
+  return {mutex, singleton};
+}
+
+// Runs all the directory expanders over x and returns the result.
+string Expand(string x) {
+  auto pair = GetDirectoryExpanders();
+  tensorflow::mutex_lock lock(*pair.first);
+  for (const auto& f : *pair.second) {
+    x = f(x);
+  }
+  return x;
+}
+
 }  // namespace
 
 Status DumpProtoToDirectory(const tensorflow::protobuf::Message& message,
                             const string& directory, const string& file_name) {
   tensorflow::Env* env = tensorflow::Env::Default();
-  TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(directory));
+  string expanded_dir = Expand(directory);
+  TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(expanded_dir));
   string safe_file_name = SanitizeFileName(file_name) + ".pb";
-  const string path = tensorflow::io::JoinPath(directory, safe_file_name);
+  const string path = tensorflow::io::JoinPath(expanded_dir, safe_file_name);
   return tensorflow::WriteBinaryProto(env, path, message);
 }
 
+void RegisterDirectoryExpander(const std::function<string(string)>& expander) {
+  auto pair = GetDirectoryExpanders();
+  tensorflow::mutex_lock lock(*pair.first);
+  pair.second->push_back(expander);
+}
+
 }  // namespace protobuf_util
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/protobuf_util.h b/tensorflow/compiler/xla/protobuf_util.h
index 3667621367..f22fc8b849 100644
--- a/tensorflow/compiler/xla/protobuf_util.h
+++ b/tensorflow/compiler/xla/protobuf_util.h
@@ -39,6 +39,10 @@ extern bool ProtobufEquals(const tensorflow::protobuf::Message& m1,
 Status DumpProtoToDirectory(const tensorflow::protobuf::Message& message,
                             const string& directory, const string& file_name);
 
+// Registers a function that may either expand a dirpath or forward the original
+// dirpath along as-is.
+void RegisterDirectoryExpander(const std::function<string(string)>& expander);
+
 }  // namespace protobuf_util
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index e5a6c28478..96bd2616f5 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -97,7 +97,7 @@ CompileOnlyService::CompileAheadOfTime(
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<HloModule> hlo_module,
         HloModule::CreateFromProto(instance.computation, *module_config));
-    TF_RETURN_IF_ERROR(MaybeDumpHloModule(*hlo_module));
+    TF_RETURN_IF_ERROR(MaybeDumpUnoptimizedHloModule(*hlo_module));
     hlo_modules.push_back(std::move(hlo_module));
   }
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 922ebdf0e3..b27a92f2a0 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -812,7 +812,7 @@ StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
                       HloModule::CreateFromProto(module_proto, *module_config));
 
-  TF_RETURN_IF_ERROR(MaybeDumpHloModule(*module));
+  TF_RETURN_IF_ERROR(MaybeDumpUnoptimizedHloModule(*module));
 
   TF_ASSIGN_OR_RETURN(
       module, backend->compiler()->RunHloPasses(std::move(module), executor,
@@ -1160,7 +1160,7 @@ StatusOr<std::vector<se::StreamExecutor*>> Service::Replicas(
   return replicas;
 }
 
-Status Service::MaybeDumpHloModule(const HloModule& module) const {
+Status Service::MaybeDumpUnoptimizedHloModule(const HloModule& module) const {
   const string xla_dump_unoptimized_hlo_proto_to =
       module.config().debug_options().xla_dump_unoptimized_hlo_proto_to();
   if (xla_dump_unoptimized_hlo_proto_to.empty()) {
@@ -1168,7 +1168,8 @@ Status Service::MaybeDumpHloModule(const HloModule& module) const {
   }
   HloProto proto = MakeHloProto(module);
   return protobuf_util::DumpProtoToDirectory(
-      proto, xla_dump_unoptimized_hlo_proto_to, module.name());
+      proto, xla_dump_unoptimized_hlo_proto_to,
+      StrCat(module.name(), ".unoptimized"));
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 44c5248b15..1f62fad4c8 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -271,7 +271,9 @@ class Service : public ServiceInterface {
   StatusOr<std::vector<se::StreamExecutor*>> Replicas(
       const Backend& backend, const DeviceHandle& device_handle) const;
 
-  Status MaybeDumpHloModule(const HloModule& module) const;
+  // Dumps the (unoptimized) module given if the corresponding DebugOptions
+  // field has been set.
+  Status MaybeDumpUnoptimizedHloModule(const HloModule& module) const;
 
   // Returns the device handle that represents the replicated device for a
   // single computation that is not model-parallelized.
-- 
GitLab


From 6dd278831a62be829ce6f15039e5b6b368b3727c Mon Sep 17 00:00:00 2001
From: Sung Jin Hwang <sjhwang@google.com>
Date: Thu, 13 Sep 2018 19:44:28 -0700
Subject: [PATCH 0184/1357] Added Pyclif binding rule for config.proto.

PiperOrigin-RevId: 212920113
---
 tensorflow/core/BUILD | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 8f32bc2844..1a86bff5cd 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1920,6 +1920,13 @@ tf_pyclif_proto_library(
     visibility = ["//visibility:public"],
 )
 
+tf_pyclif_proto_library(
+    name = "protobuf/config_pyclif",
+    proto_lib = ":protos_all_cc",
+    proto_srcfile = "protobuf/config.proto",
+    visibility = ["//visibility:public"],
+)
+
 tf_pyclif_proto_library(
     name = "protobuf/device_properties_pyclif",
     proto_lib = ":protos_all_cc",
-- 
GitLab


From 4a665550dacdb5e162e71b4afe039de178ffc49f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Sep 2018 20:08:15 -0700
Subject: [PATCH 0185/1357] Add missing #include to mkl_layout_pass.cc.

PiperOrigin-RevId: 212921868
---
 tensorflow/core/graph/mkl_layout_pass.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 2e644fe987..f5b0105862 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 #include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/node_builder.h"
-- 
GitLab


From 40010e6287980f63158807aef163276ed1cce272 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Thu, 13 Sep 2018 20:36:01 -0700
Subject: [PATCH 0186/1357] [GraphCompiler] Remove the use of XLA context as
 arugmnet.

- XLAContext is never used in the class, remove it from member list.
- Be more clear in the comment that the result is written to the context from the compilation device.

PiperOrigin-RevId: 212924213
---
 tensorflow/compiler/tf2xla/graph_compiler.h | 13 ++++++-------
 tensorflow/compiler/tf2xla/xla_compiler.cc  |  3 +--
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/graph_compiler.h b/tensorflow/compiler/tf2xla/graph_compiler.h
index ab7cac7100..e9f02201cf 100644
--- a/tensorflow/compiler/tf2xla/graph_compiler.h
+++ b/tensorflow/compiler/tf2xla/graph_compiler.h
@@ -55,17 +55,17 @@ namespace tensorflow {
 // op registration infrastructure instead of FunctionLibraryRuntime.
 class GraphCompiler {
  public:
-  GraphCompiler(XlaContext* xla_context, XlaCompilationDevice* device,
-                Graph* graph, FunctionLibraryRuntime* flib,
+  GraphCompiler(XlaCompilationDevice* device, Graph* graph,
+                FunctionLibraryRuntime* flib,
                 ScopedStepContainer* step_container)
-      : xla_context_(xla_context),
-        device_(device),
+      : device_(device),
         graph_(graph),
         flib_(flib),
         step_container_(step_container) {}
 
-  // Compiles the graph. The results are written in `xla_context` that is passed
-  // into the compiler.
+  // Compiles the graph. The results are written in xla_context stored in the
+  // resource_manager of the 'XlaCompilationDevice' that's passed into the
+  // constructor.
   Status Compile();
 
  private:
@@ -82,7 +82,6 @@ class GraphCompiler {
   // using `compiler_`.
   Status CompileFunctionalNode(Node* n, OpKernelContext* op_context);
 
-  XlaContext* xla_context_;
   XlaCompilationDevice* device_;
   Graph* graph_;
   FunctionLibraryRuntime* flib_;
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 105f3b61d5..739e47778a 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -325,8 +325,7 @@ Status ExecuteGraph(XlaContext* xla_context, std::unique_ptr<Graph> graph,
       step_container->name(), XlaContext::kXlaContextResourceName,
       xla_context));
 
-  GraphCompiler graph_compiler(xla_context, device, graph.get(), flib,
-                               step_container.get());
+  GraphCompiler graph_compiler(device, graph.get(), flib, step_container.get());
   TF_RETURN_IF_ERROR(graph_compiler.Compile());
   // Explicitly clean up the step container, to capture the cleanup status.
   step_container.reset();
-- 
GitLab


From 30e176f584d80898ebad00d2a2ff226e6c281c50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Fri, 14 Sep 2018 11:32:37 +0800
Subject: [PATCH 0187/1357] CLN: only assert gains >= 0 for normalization

---
 tensorflow/python/estimator/canned/boosted_trees.py  |  8 ++++----
 .../python/estimator/canned/boosted_trees_test.py    | 12 ++++++++++--
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 812c892363..7c04ff7970 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -1036,8 +1036,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
     feature_importances: A list of corresponding feature importances.
 
   Raises:
-    AssertionError: If feature importances contain negative value.
-      Or if normalize = True and normalization is not possible
+    AssertionError: When normalize = True, if feature importances
+      contain negative value, or if normalization is not possible
       (e.g. ensemble is empty or trees contain only a root node).
   """
   tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
@@ -1045,9 +1045,9 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
   tree_importances = np.array(tree_importances)
   tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
   feature_importances = np.sum(tree_importances * tree_weights, axis=0)
-  assert np.all(feature_importances >= 0), ('feature_importances '
-                                            'must be non-negative.')
   if normalize:
+    assert np.all(feature_importances >= 0), ('feature_importances '
+                                              'must be non-negative.')
     normalizer = np.sum(feature_importances)
     assert normalizer > 0, 'Trees are all empty or contain only a root node.'
     feature_importances /= normalizer
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 1ce4f7d765..3158ccca81 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -949,8 +949,16 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self._create_fake_checkpoint_with_tree_ensemble_proto(
         est, tree_ensemble_text)
 
-    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
-      est.experimental_feature_importances(normalize=False)
+    # Github #21509 (nataliaponomareva):
+    # The gains stored in the splits can be negative
+    # if people are using complexity regularization.
+    feature_names_expected = ['f_2_bucketized',
+                              'f_0_bucketized',
+                              'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.0, 0.0, -5.0], importances)
 
     with self.assertRaisesRegexp(AssertionError, 'non-negative'):
       est.experimental_feature_importances(normalize=True)
-- 
GitLab


From 9fcf40afede43c09243d06ba420ac44249067872 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Fri, 14 Sep 2018 13:44:30 +0800
Subject: [PATCH 0188/1357] CLN: remove unused import

---
 tensorflow/python/keras/layers/advanced_activations.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 61ab69c16f..6922d3ec1e 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.keras import activations
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import initializers
-- 
GitLab


From b43aeb053ec440ea5205a09c229339c10a962af4 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 13 Sep 2018 23:25:24 -0700
Subject: [PATCH 0189/1357] Automated rollback of commit
 eb5cd6926ef8d2a5a748f1aa978e51148e22dd97

PiperOrigin-RevId: 212936412
---
 tensorflow/python/keras/backend.py                    |  3 ---
 tensorflow/python/keras/backend_test.py               |  3 +--
 .../python/keras/layers/advanced_activations.py       | 11 ++++++-----
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 6f766c6257..529b07dc12 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3462,9 +3462,6 @@ def relu(x, alpha=0., max_value=None, threshold=0):
   clip_max = max_value is not None
 
   if alpha != 0.:
-    if max_value is None and threshold == 0:
-      return nn.leaky_relu(x, alpha=alpha)
-
     if threshold != 0:
       negative_part = nn.relu(-x + threshold)
     else:
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index ab71589940..2f271c4f50 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -522,9 +522,8 @@ class BackendLinearAlgebraTest(test.TestCase):
       relu_op = keras.backend.relu(x)
       self.assertAllClose(keras.backend.eval(relu_op), [[0, 0], [2, 7]])
 
-      # alpha (leaky relu used)
+      # alpha
       relu_op = keras.backend.relu(x, alpha=0.5)
-      self.assertTrue('LeakyRelu' in relu_op.name)
       self.assertAllClose(keras.backend.eval(relu_op), [[-2, 0], [2, 7]])
 
       # max_value < some elements
diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 731d180a80..61ab69c16f 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -268,7 +268,7 @@ class Softmax(Layer):
     self.axis = axis
 
   def call(self, inputs):
-    return K.softmax(inputs, axis=self.axis)
+    return activations.softmax(inputs, axis=self.axis)
 
   def get_config(self):
     config = {'axis': self.axis}
@@ -322,10 +322,11 @@ class ReLU(Layer):
   def call(self, inputs):
     # alpha is used for leaky relu slope in activations instead of
     # negative_slope.
-    return K.relu(inputs,
-                  alpha=self.negative_slope,
-                  max_value=self.max_value,
-                  threshold=self.threshold)
+    return activations.relu(
+        inputs,
+        alpha=self.negative_slope,
+        max_value=self.max_value,
+        threshold=self.threshold)
 
   def get_config(self):
     config = {
-- 
GitLab


From 3a2276ced02b217596080fb34654d2dce5069f81 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 14 Sep 2018 01:24:52 -0700
Subject: [PATCH 0190/1357] [XLA:TF] Make FloorDiv not crash on unsigned types

FloorDiv (which corresponds to the // operator in python) supports uint8 and
uint16 (but not uint32) in TF. Using xla::Abs on unsigned types throws an error,
but the rounding logic is trivial for unsigned types so just do a plain Div.

This isn't tested yet because we don't have any targets supporting uint8 or
uint16 yet.

PiperOrigin-RevId: 212946132
---
 tensorflow/compiler/tf2xla/kernels/binary_ops.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index df17da4c1c..0d9a768a6f 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -66,6 +66,9 @@ XLA_MAKE_BINARY(Complex, xla::Complex(lhs, rhs, extend_dimensions));
 static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
   std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  if (DataTypeIsUnsigned(dtype)) {
+    return xla::Div(x, y);
+  }
   auto zero = XlaHelpers::Zero(b, dtype);
   auto one = XlaHelpers::One(b, dtype);
   auto different_sign = xla::Ne(xla::Lt(x, zero), xla::Lt(y, zero));
-- 
GitLab


From e9f5df6d48eb0999281d73b85ce4a126fcfaab98 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 02:03:32 -0700
Subject: [PATCH 0191/1357] compat: Update forward compatibility horizon to
 2018-09-14

PiperOrigin-RevId: 212949973
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 1a1ed04e0d..8a100fe975 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 13)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 14)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From c335f3ae6872715c4873eb8af3ff2e42833bc6c0 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Fri, 14 Sep 2018 02:26:55 -0700
Subject: [PATCH 0192/1357] [Grappler] s/std::string/string/

string and std::string are not necessarily the same thing in TF, but this code assumed that they are.

PiperOrigin-RevId: 212952877
---
 tensorflow/core/grappler/costs/graph_properties.cc        | 8 ++++----
 tensorflow/core/grappler/costs/utils.cc                   | 8 ++++----
 tensorflow/core/grappler/costs/utils.h                    | 2 +-
 tensorflow/core/grappler/costs/virtual_scheduler_test.cc  | 8 ++++----
 tensorflow/core/grappler/inputs/utils.cc                  | 7 ++++---
 tensorflow/core/grappler/inputs/utils.h                   | 4 ++--
 tensorflow/core/grappler/op_types.cc                      | 2 +-
 .../core/grappler/optimizers/arithmetic_optimizer_test.cc | 4 ++--
 tensorflow/core/grappler/optimizers/data/graph_utils.cc   | 2 +-
 9 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index d273eddf81..56c8339d57 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -260,13 +260,13 @@ typename DisjointSet<Handle>::Rep* DisjointSet<Handle>::Find(Handle value) {
 }
 
 bool IsEnqueue(const NodeDef& n) {
-  return (n.op().find("Enqueue") != std::string::npos &&
-          n.op().find("EnqueueMany") == std::string::npos);
+  return (n.op().find("Enqueue") != string::npos &&
+          n.op().find("EnqueueMany") == string::npos);
 }
 
 bool IsDequeue(const NodeDef& n) {
-  return (n.op().find("Dequeue") != std::string::npos &&
-          n.op().find("DequeueMany") == std::string::npos);
+  return (n.op().find("Dequeue") != string::npos &&
+          n.op().find("DequeueMany") == string::npos);
 }
 
 bool HasAnyUnknownDimensions(const TensorShapeProto& proto) {
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index aad00ce039..83434ea40f 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -127,7 +127,7 @@ static void ExtractExtraProperties(
 
       // For filename input, the file size can also be useful.
       if (op_def && i < op_def->input_arg_size() &&
-          op_def->input_arg(i).name().find("filename") != std::string::npos) {
+          op_def->input_arg(i).name().find("filename") != string::npos) {
         Tensor tensor;
         if (!tensor.FromProto(t)) {
           continue;
@@ -153,7 +153,7 @@ static void ExtractExtraProperties(
     // When the input is a handle (e.g. look up table handle), the information
     // in the op itself is not sufficient to predict the op memory.
     if (op_def && i < op_def->input_arg_size() &&
-        op_def->input_arg(i).name().find("handle") != std::string::npos) {
+        op_def->input_arg(i).name().find("handle") != string::npos) {
       string new_key = strings::StrCat("parent_", i, "_op");
       AttrValue attr;
       attr.set_s(input_node->op());
@@ -320,8 +320,8 @@ void TensorSizeHistogram::Merge(const TensorSizeHistogram& src) {
                  buckets_.begin(), std::plus<uint64>());
 }
 
-std::string TensorSizeHistogram::ToString() const {
-  std::string r;
+string TensorSizeHistogram::ToString() const {
+  string r;
   char buf[200];
   snprintf(buf, sizeof(buf), "Count: %lld, Average: ", num_elem_);
   r.append(buf);
diff --git a/tensorflow/core/grappler/costs/utils.h b/tensorflow/core/grappler/costs/utils.h
index d2c7c67666..5fd6717712 100644
--- a/tensorflow/core/grappler/costs/utils.h
+++ b/tensorflow/core/grappler/costs/utils.h
@@ -80,7 +80,7 @@ class TensorSizeHistogram {
   uint64 Max() const { return max_; }
   uint64 NumElem() const { return num_elem_; }
   uint64 SumElem() const { return sum_elem_; }
-  std::string ToString() const;
+  string ToString() const;
 
  protected:
   const int Index(const uint64 value) const;
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 02a379fca8..80889afc86 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -1999,13 +1999,13 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) {
 
   // Helper lambda to extract port num from _Send and _Recv op name.
   auto get_port_num = [](const string& name) -> int {
-    if (name.find("bn_0") != std::string::npos) {
+    if (name.find("bn_0") != string::npos) {
       return 0;
-    } else if (name.find("bn_1") != std::string::npos) {
+    } else if (name.find("bn_1") != string::npos) {
       return 1;
-    } else if (name.find("bn_2") != std::string::npos) {
+    } else if (name.find("bn_2") != string::npos) {
       return 2;
-    } else if (name.find("bn_minus1") != std::string::npos) {
+    } else if (name.find("bn_minus1") != string::npos) {
       return -1;
     }
     return -999;
diff --git a/tensorflow/core/grappler/inputs/utils.cc b/tensorflow/core/grappler/inputs/utils.cc
index 5029dff877..def9198a69 100644
--- a/tensorflow/core/grappler/inputs/utils.cc
+++ b/tensorflow/core/grappler/inputs/utils.cc
@@ -14,10 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/inputs/utils.h"
-#include "tensorflow/core/platform/env.h"
 
 #include <vector>
 
+#include "tensorflow/core/platform/env.h"
+
 namespace tensorflow {
 namespace grappler {
 
@@ -29,12 +30,12 @@ bool FilesExist(const std::set<string>& files) {
   return FilesExist(std::vector<string>(files.begin(), files.end()), nullptr);
 }
 
-bool FileExists(const std::string& file, Status* status) {
+bool FileExists(const string& file, Status* status) {
   *status = Env::Default()->FileExists(file);
   return status->ok();
 }
 
-Status ReadGraphDefFromFile(const std::string& graph_def_pbtxt_path,
+Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path,
                             GraphDef* result) {
   Status status;
   if (FileExists(graph_def_pbtxt_path, &status)) {
diff --git a/tensorflow/core/grappler/inputs/utils.h b/tensorflow/core/grappler/inputs/utils.h
index 627dd5359f..4b9cb0a9ad 100644
--- a/tensorflow/core/grappler/inputs/utils.h
+++ b/tensorflow/core/grappler/inputs/utils.h
@@ -29,9 +29,9 @@ bool FilesExist(const std::vector<string>& files,
                 std::vector<Status>* status = nullptr);
 bool FilesExist(const std::set<string>& files);
 
-bool FileExists(const std::string& file, Status* status);
+bool FileExists(const string& file, Status* status);
 
-Status ReadGraphDefFromFile(const std::string& graph_def_pbtxt_path,
+Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path,
                             GraphDef* result);
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index e78239bd43..3521669b63 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -491,7 +491,7 @@ bool IsFreeOfSideEffect(const NodeDef& node) {
     }
   }
   // Queue ops modify the queue which is a side effect.
-  if (node.op().find("Queue") != std::string::npos) {
+  if (node.op().find("Queue") != string::npos) {
     return false;
   }
   return !ModifiesInputsInPlace(node);
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 39517edc06..bc838c6659 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -581,7 +581,7 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) {
   const NodeDef* new_const = node_map.GetNode(optimized_const_name);
   ASSERT_NE(new_const, nullptr);
   EXPECT_EQ("^x", new_const->input(0));
-  EXPECT_EQ(std::string("\0\0\0@", 4),
+  EXPECT_EQ(string("\0\0\0@", 4),
             new_const->attr().at("value").tensor().tensor_content());
 
   const NodeDef* new_mul = node_map.GetNode(optimized_mul_name);
@@ -625,7 +625,7 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) {
   const NodeDef* new_const = node_map.GetNode(optimized_const_name);
   ASSERT_NE(new_const, nullptr);
   EXPECT_EQ("^x", new_const->input(0));
-  EXPECT_EQ(std::string("\0\0\0@", 4),
+  EXPECT_EQ(string("\0\0\0@", 4),
             new_const->attr().at("value").tensor().tensor_content());
 
   const NodeDef* new_mul = node_map.GetNode(optimized_mul_name);
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index 5a7fe19265..d4ab444036 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -273,7 +273,7 @@ void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph,
   string name = string(prefix);
   int id = graph->node_size();
   while (ContainsGraphNodeWithName(name, *graph)) {
-    if (name.rfind("_generated") != std::string::npos &&
+    if (name.rfind("_generated") != string::npos &&
         (name.rfind("_generated") == (name.size() - strlen("_generated")))) {
       name.insert(name.rfind("_generated"), strings::StrCat("/_", id));
     } else {
-- 
GitLab


From 54cbee5d034af8693aa39cc5877c3dfcd62d3740 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 02:30:05 -0700
Subject: [PATCH 0193/1357] [TF:XLA] Split XLA Concat Ops that fail on large
 sets of inputs.

Make the test large to prevent occasional timeouts on CPU. This should normally complete in well under a minute.

PiperOrigin-RevId: 212953337
---
 tensorflow/compiler/tests/BUILD               |  3 +-
 tensorflow/compiler/tests/concat_ops_test.py  | 35 +++++++++++++++++++
 .../compiler/tf2xla/kernels/concat_op.cc      | 33 ++++++++++++++++-
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 2176eaebe4..97ed554171 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -277,9 +277,10 @@ tf_xla_py_test(
     ],
 )
 
+# This test is large because occasionally the cpu test is long for testConcatLargeNumberOfTensors
 tf_xla_py_test(
     name = "concat_ops_test",
-    size = "medium",
+    size = "large",
     srcs = ["concat_ops_test.py"],
     deps = [
         ":xla_test",
diff --git a/tensorflow/compiler/tests/concat_ops_test.py b/tensorflow/compiler/tests/concat_ops_test.py
index 37e5318bb5..2d225ad226 100644
--- a/tensorflow/compiler/tests/concat_ops_test.py
+++ b/tensorflow/compiler/tests/concat_ops_test.py
@@ -291,6 +291,41 @@ class ConcatTest(xla_test.XLATestCase):
             ValueError, r"Can't concatenate scalars \(use tf\.stack instead\)"):
           array_ops.concat([scalar, scalar, scalar], dim)
 
+  # The purpose of this is to ensure that XLA on GPU will not run out of memory
+  # with too many arguments.
+  def testConcatLargeNumberOfTensors(self):
+    with self.cached_session():
+      with self.test_scope():
+        for concat_dim in range(2):
+          params = {}
+          p = []
+          shape = np.array([7, 13])
+          num_tensors = 1001
+          for i in np.arange(num_tensors):
+            input_shape = shape
+            placeholder = array_ops.placeholder(
+                dtypes.float32, shape=input_shape)
+            p.append(placeholder)
+            params[placeholder] = np.random.rand(*input_shape).astype(
+                np.float32)
+
+          concat_inputs = p
+          c = array_ops.concat(concat_inputs, concat_dim)
+          result = c.eval(feed_dict=params)
+
+          self.assertEqual(result.shape, c.get_shape())
+          cur_offset = 0
+
+          for i in np.arange(num_tensors):
+            # The index into the result is the ':' along all dimensions
+            # except the concat_dim. slice(0, size) is used for ':', and
+            # a list of slices is used to index into result.
+            index = [slice(0, params[p[i]].shape[j]) for j in np.arange(2)]
+            index[concat_dim] = slice(
+                cur_offset, cur_offset + params[p[i]].shape[concat_dim])
+            cur_offset += params[p[i]].shape[concat_dim]
+            self.assertAllEqual(result[index], params[p[i]])
+
 
 class ConcatOffsetTest(xla_test.XLATestCase):
 
diff --git a/tensorflow/compiler/tf2xla/kernels/concat_op.cc b/tensorflow/compiler/tf2xla/kernels/concat_op.cc
index f410605104..0ae23aa6df 100644
--- a/tensorflow/compiler/tf2xla/kernels/concat_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/concat_op.cc
@@ -37,6 +37,16 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+// Used to determine the number of Tensors allowed in a Concat op to prevent
+// going over the max gpu parameter memory size. This is an issue because concat
+// is variadic and can have an unlimited number of arguments when called.
+// Concat ops with more Tensors than this will be split into multiple concat
+// ops.
+//
+// TODO(b/112613927): Remove the logic here and put it properly in an HLO pass
+// along with boxing large numbers of parameters.
+constexpr int64 kMaxConcatArgsPerOp = 500;
+
 // --------------------------------------------------------------------------
 class ConcatBaseOp : public XlaOpKernel {
  public:
@@ -74,6 +84,7 @@ class ConcatBaseOp : public XlaOpKernel {
     // Make a vector holding the XlaOp for each of the inputs that has non-zero
     // elements.
     std::vector<xla::XlaOp> input_data;
+    std::vector<xla::XlaOp> partial_concats;
     int output_concat_dim = 0;
     const bool input_is_scalar = IsLegacyScalar(input_shape);
     for (int i = 0; i < N; ++i) {
@@ -94,10 +105,30 @@ class ConcatBaseOp : public XlaOpKernel {
         input_data.push_back(handle);
       }
       output_concat_dim += in_shape.dims() > 0 ? in_shape.dim_size(axis) : 1;
+
+      // Concat is associative, so it can be split into many operations when too
+      // many arguments are in a single op. This is a temporary workaround for
+      // b/112613927 where too many parameters in an XlaLaunchOp later result in
+      // too many parameters to a single GPU kernel.
+      if (i && i % kMaxConcatArgsPerOp == 0) {
+        partial_concats.push_back(
+            xla::ConcatInDim(ctx->builder(), input_data, axis));
+        input_data.clear();
+      }
     }
+    // Add any inputs that have not been put into another concat yet.
+    partial_concats.insert(partial_concats.end(), input_data.begin(),
+                           input_data.end());
 
     VLOG(1) << "Concat dim " << concat_dim << " equivalent to " << axis;
-    ctx->SetOutput(0, xla::ConcatInDim(ctx->builder(), input_data, axis));
+    // Don't add an additional "identity" concatenate for better readibility of
+    // IR.
+    if (partial_concats.size() == 1) {
+      ctx->SetOutput(0, partial_concats.front());
+    } else {
+      ctx->SetOutput(0,
+                     xla::ConcatInDim(ctx->builder(), partial_concats, axis));
+    }
   }
 
  private:
-- 
GitLab


From 3c283b598f2de0376dfaf63ed50c4625abbf6e03 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 14 Sep 2018 06:23:56 -0700
Subject: [PATCH 0194/1357] Run buildifier on build_defs.bzl

PiperOrigin-RevId: 212972521
---
 tensorflow/compiler/tests/build_defs.bzl | 165 ++++++++++++-----------
 1 file changed, 87 insertions(+), 78 deletions(-)

diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl
index 7b114d4f85..a76f136736 100644
--- a/tensorflow/compiler/tests/build_defs.bzl
+++ b/tensorflow/compiler/tests/build_defs.bzl
@@ -4,88 +4,97 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_is_configured")
 load("//tensorflow/compiler/tests:plugin.bzl", "plugins")
 
 def all_backends():
-  b = ["cpu"] + plugins.keys()
-  if cuda_is_configured():
-    return b + ["gpu"]
-  else:
-    return b
+    b = ["cpu"] + plugins.keys()
+    if cuda_is_configured():
+        return b + ["gpu"]
+    else:
+        return b
 
-def tf_xla_py_test(name, srcs=[], deps=[], tags=[], data=[], main=None,
-                   disabled_backends=None, **kwargs):
-  """Generates py_test targets, one per XLA backend.
+def tf_xla_py_test(
+        name,
+        srcs = [],
+        deps = [],
+        tags = [],
+        data = [],
+        main = None,
+        disabled_backends = None,
+        **kwargs):
+    """Generates py_test targets, one per XLA backend.
 
-  This rule generates py_test() targets named name_backend, for each backend
-  in all_backends(). The rule also generates a test suite with named `name` that
-  tests all backends for the test.
+    This rule generates py_test() targets named name_backend, for each backend
+    in all_backends(). The rule also generates a test suite with named `name` that
+    tests all backends for the test.
 
-  For example, the following rule generates test cases foo_test_cpu,
-  foo_test_gpu, and a test suite name foo_test that tests both.
-  tf_xla_py_test(
-      name="foo_test",
-      srcs="foo_test.py",
-      deps=[...],
-  )
+    For example, the following rule generates test cases foo_test_cpu,
+    foo_test_gpu, and a test suite name foo_test that tests both.
+    tf_xla_py_test(
+        name="foo_test",
+        srcs="foo_test.py",
+        deps=[...],
+    )
 
-  Args:
-    name: Name of the target.
-    srcs: Sources for the target.
-    deps: Dependencies of the target.
-    tags: Tags to apply to the generated targets.
-    data: Data dependencies of the target.
-    main: Same as py_test's main attribute.
-    disabled_backends: A list of backends that should not be tested. Supported
-      values include "cpu" and "gpu". If not specified, defaults to None.
-    **kwargs: keyword arguments passed onto the generated py_test() rules.
-  """
-  if disabled_backends == None:
-    disabled_backends = []
+    Args:
+      name: Name of the target.
+      srcs: Sources for the target.
+      deps: Dependencies of the target.
+      tags: Tags to apply to the generated targets.
+      data: Data dependencies of the target.
+      main: Same as py_test's main attribute.
+      disabled_backends: A list of backends that should not be tested. Supported
+        values include "cpu" and "gpu". If not specified, defaults to None.
+      **kwargs: keyword arguments passed onto the generated py_test() rules.
+    """
+    if disabled_backends == None:
+        disabled_backends = []
 
-  enabled_backends = [b for b in all_backends() if b not in disabled_backends]
-  test_names = []
-  for backend in enabled_backends:
-    test_name = "{}_{}".format(name, backend)
-    backend_tags = ["tf_xla_{}".format(backend)]
-    backend_args = []
-    backend_deps = []
-    backend_data = []
-    if backend == "cpu":
-      backend_args += [
-          "--test_device=XLA_CPU",
-          "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64"
-      ]
-    elif backend == "gpu":
-      backend_args += [
-          "--test_device=XLA_GPU",
-          "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16"
-      ]
-      backend_tags += ["requires-gpu-sm35"]
-    elif backend in plugins:
-      backend_args += ["--test_device=" + plugins[backend]["device"],
-                       "--types=" + plugins[backend]["types"]]
-      backend_tags += plugins[backend]["tags"]
-      backend_args += plugins[backend]["args"]
-      backend_deps += plugins[backend]["deps"]
-      backend_data += plugins[backend]["data"]
-    else:
-      fail("Unknown backend {}".format(backend))
+    enabled_backends = [b for b in all_backends() if b not in disabled_backends]
+    test_names = []
+    for backend in enabled_backends:
+        test_name = "{}_{}".format(name, backend)
+        backend_tags = ["tf_xla_{}".format(backend)]
+        backend_args = []
+        backend_deps = []
+        backend_data = []
+        if backend == "cpu":
+            backend_args += [
+                "--test_device=XLA_CPU",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64",
+            ]
+        elif backend == "gpu":
+            backend_args += [
+                "--test_device=XLA_GPU",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16",
+            ]
+            backend_tags += ["requires-gpu-sm35"]
+        elif backend in plugins:
+            backend_args += [
+                "--test_device=" + plugins[backend]["device"],
+                "--types=" + plugins[backend]["types"],
+            ]
+            backend_tags += plugins[backend]["tags"]
+            backend_args += plugins[backend]["args"]
+            backend_deps += plugins[backend]["deps"]
+            backend_data += plugins[backend]["data"]
+        else:
+            fail("Unknown backend {}".format(backend))
 
-    native.py_test(
-        name=test_name,
-        srcs=srcs,
-        srcs_version="PY2AND3",
-        args=backend_args,
-        main="{}.py".format(name) if main == None else main,
-        data=data + backend_data,
-        deps=deps + backend_deps,
-        tags=tags + backend_tags,
-        **kwargs
-    )
-    test_names.append(test_name)
-  native.test_suite(name=name, tests=test_names)
+        native.py_test(
+            name = test_name,
+            srcs = srcs,
+            srcs_version = "PY2AND3",
+            args = backend_args,
+            main = "{}.py".format(name) if main == None else main,
+            data = data + backend_data,
+            deps = deps + backend_deps,
+            tags = tags + backend_tags,
+            **kwargs
+        )
+        test_names.append(test_name)
+    native.test_suite(name = name, tests = test_names)
 
-def generate_backend_suites(backends=[]):
-  """Generates per-backend test_suites that run all tests for a backend."""
-  if not backends:
-    backends = all_backends()
-  for backend in backends:
-    native.test_suite(name="%s_tests" % backend, tags=["tf_xla_%s" % backend])
+def generate_backend_suites(backends = []):
+    """Generates per-backend test_suites that run all tests for a backend."""
+    if not backends:
+        backends = all_backends()
+    for backend in backends:
+        native.test_suite(name = "%s_tests" % backend, tags = ["tf_xla_%s" % backend])
-- 
GitLab


From 6aebb0866718cae2c921e875f3fd74573ee9acc8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 08:29:15 -0700
Subject: [PATCH 0195/1357] global_step/sec renamed to global_steps/sec

PiperOrigin-RevId: 212986442
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 1ff04f5c26..23c54511ca 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -1774,18 +1774,19 @@ class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
         summary_writer=summary_writer)
 
   def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
-    global_step_per_sec = elapsed_steps / elapsed_time
-    examples_per_sec = self._batch_size * global_step_per_sec
+    global_steps_per_sec = elapsed_steps / elapsed_time
+    examples_per_sec = self._batch_size * global_steps_per_sec
     if self._summary_writer is not None:
       global_step_summary = Summary(value=[
-          Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec)
+          Summary.Value(tag='global_steps/sec',
+                        simple_value=global_steps_per_sec)
       ])
       example_summary = Summary(value=[
           Summary.Value(tag='examples/sec', simple_value=examples_per_sec)
       ])
       self._summary_writer.add_summary(global_step_summary, global_step)
       self._summary_writer.add_summary(example_summary, global_step)
-    logging.info('global_step/sec: %g', global_step_per_sec)
+    logging.info('global_steps/sec: %g', global_steps_per_sec)
     logging.info('examples/sec: %g', examples_per_sec)
 
 
-- 
GitLab


From 85b0ec839b6954fc7d3f396406e8797cc984d3cc Mon Sep 17 00:00:00 2001
From: Lasse Espeholt <lespeholt@google.com>
Date: Fri, 14 Sep 2018 08:53:44 -0700
Subject: [PATCH 0196/1357] Fix bug preventing one from not specifying
 additional function attributes.

PiperOrigin-RevId: 212989480
---
 tensorflow/python/eager/function.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 552ed29f65..962e334b27 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -529,7 +529,7 @@ class Function(object):
     self._num_outputs = len(self._func_graph.outputs)
     self._output_shapes = tuple(
         output.shape for output in self._func_graph.outputs)
-    self._attrs = _parse_func_attrs(attrs)
+    self._attrs = _parse_func_attrs(attrs or {})
     self._device_functions = tuple(
         self._func_graph._device_functions_outer_to_inner)  # pylint: disable=protected-access
 
-- 
GitLab


From 95338704198205c1bdec1e344e103f1daf05df68 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Fri, 14 Sep 2018 09:04:42 -0700
Subject: [PATCH 0197/1357] Internal change.

PiperOrigin-RevId: 212991181
---
 tensorflow/contrib/lite/build_def.bzl | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 5c705ea53b..52b994ee92 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -337,11 +337,7 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs):
         flags = "--ignore_toco_errors --run_with_extended"
         kwargs["tags"].append("skip_already_failing")
         kwargs["tags"].append("no_oss")
-
-        # TODO(b/115504899): Re-enable asan, msan and tsan tests.
-        kwargs["tags"].append("noasan")
-        kwargs["tags"].append("nomsan")
-        kwargs["tags"].append("notsan")
+        kwargs["tags"].append("notap")
 
     gen_zipped_test_file(
         name = "zip_%s" % test_name,
-- 
GitLab


From 7210ca23ce19e54aa3cbc21ff72e5e5d4189dfea Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Fri, 14 Sep 2018 09:55:23 -0700
Subject: [PATCH 0198/1357] Fixed the missing license file caught by
 do_pip_package_licenses_check test.

---
 tensorflow/tools/pip_package/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 50515b04a9..31a3712de8 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -210,6 +210,7 @@ filegroup(
         "@ngraph//:LICENSE",
         "@ngraph_tf//:LICENSE",
         "@nlohmann_json_lib//:LICENSE.MIT",
+        "@tbb//:LICENSE",
     ]) + tf_additional_license_deps(),
 )
 
-- 
GitLab


From 82e4edc50fb146dbf006cd81aaac6d01f40533a6 Mon Sep 17 00:00:00 2001
From: Raghuraman Krishnamoorthi <raghuramank@google.com>
Date: Fri, 14 Sep 2018 09:51:21 -0700
Subject: [PATCH 0199/1357]  Update description of contrib.quantize

PiperOrigin-RevId: 212997520
---
 tensorflow/contrib/quantize/README.md | 158 ++++++++++++++++++++------
 1 file changed, 124 insertions(+), 34 deletions(-)

diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md
index 27a933c0f9..3f1e7d2792 100644
--- a/tensorflow/contrib/quantize/README.md
+++ b/tensorflow/contrib/quantize/README.md
@@ -1,65 +1,155 @@
-# Quantized Training Rewrites
+# Quantization-aware training
 
-tf.contrib.quantize provides tools for transforming graphs to include ops to
-model quantization of weights, biases and activations during both training and
-inference. The details of the transformation implemented in this package is
-described here [1].
+Quantization-aware model training ensures that the forward pass matches precision
+for both training and inference. There are two aspects to this:
 
-This is done using the
-[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization).
+* Operator fusion at inference time are accurately modeled at training time.
+* Quantization effects at inference are modeled at training time.
 
-Literature has shown that fixed point networks provide comparable performance to
-floating point networks [2]. This is achieved by modeling the quantization
-operation during training in both the forward and backward passes.
-The fake quantization operator achieves this by modeling the quantizer as a pass
-through estimator [3]. Note that during back propagation, the parameters are
+For efficient inference, TensorFlow combines batch normalization with the preceding
+convolutional and fully-connected layers prior to quantization by
+[folding batch norm layers](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/quantize/python/fold_batch_norms.py){:.external}. 
+
+The quantization error is modeled using [fake quantization](../api_guides/python/array_ops.md#Fake_quantization)
+nodes to simulate the effect of quantization in the forward and backward passes. The
+forward-pass models quantization, while the backward-pass models quantization as a
+straight-through estimator. Both the forward- and backward-pass simulate the quantization
+of weights and activations. Note that during back propagation, the parameters are
 updated at high precision as this is needed to ensure sufficient precision in
-accumulating tiny adjustments to the parameters. However, for the forward pass,
-the parameters and activations are quantized to the desired lower precision.
+accumulating tiny adjustments to the parameters.
+
 
-## How to use the Rewrites
+Additionally, the minimum and maximum values for activations are determined
+during training. This allows a model trained with quantization in the loop to be
+converted to a fixed point inference model with little effort, eliminating the
+need for a separate calibration step.
 
-tf.contrib.quantize provides two rewrites, one to train for quantization and
-one to create a [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/)
-compatible eval graph.
+Since it's difficult to add these fake quantization operations to all the
+required locations in the model, there's a function available that rewrites the
+training graph. To create a fake quantized training graph:
 
 ```
 # Build forward pass of model.
-…
 loss = tf.losses.get_total_loss()
 
-# Call the training rewrite which rewrites the graph in-place with FakeQuantization nodes
-# and folds batchnorm for training.
-# It is often needed to finetune a floating point model for quantization with this training tool.
-# When training from scratch, quant_delay can be used to activate quantization after
-# training to convergence with the float graph, effectively finetuning the model.
-tf.contrib.quantize.create_training_graph(quant_delay=2000000)
+# Call the training rewrite which rewrites the graph in-place with
+# FakeQuantization nodes and folds batchnorm for training. It is
+# often needed to fine tune a floating point model for quantization
+# with this training tool. When training from scratch, quant_delay
+# can be used to activate quantization after training to converge
+# with the float graph, effectively fine-tuning the model.
+g = tf.get_default_graph()
+tf.contrib.quantize.create_training_graph(input_graph=g,
+                                          quant_delay=2000000)
 
 # Call backward pass optimizer as usual.
 optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 optimizer.minimize(loss)
 ```
 
-Additionally, the rewritten eval graph is non-trivially different from the
-training graph due the effects of quantization on batch normalization. Thus,
-we offer a separate rewrite for the eval_graph.
+The rewritten *eval graph* is non-trivially different from the *training graph*
+since the quantization ops affect the batch normalization step. Because of this,
+we've added a separate rewrite for the *eval graph*:
 
 ```
 # Build eval model
-…
-logits = tf.nn.softmax_cross_entropy_with_logits(...)
+logits = tf.nn.softmax_cross_entropy_with_logits_v2(...)
 
-# Call the eval rewrite which rewrites the graph in-place with FakeQuantization nodes
-# and fold batchnorm for eval.
-tf.contrib.quantize.create_eval_graph()
+# Call the eval rewrite which rewrites the graph in-place with
+# FakeQuantization nodes and fold batchnorm for eval.
+g = tf.get_default_graph()
+tf.contrib.quantize.create_eval_graph(input_graph=g)
 
-# Save the checkpoint and eval graph proto to disk for freezing and providing to TFLite.
+# Save the checkpoint and eval graph proto to disk for freezing
+# and providing to TFLite.
 with open(eval_graph_file, ‘w’) as f:
   f.write(str(g.as_graph_def()))
 saver = tf.train.Saver()
 saver.save(sess, checkpoint_name)
 ```
 
+Methods to rewrite the training and eval graphs are an active area of research
+and experimentation. Although rewrites and quantized training might not work or
+improve performance for all models, we are working to generalize these techniques.
+
+
+## Generating fully-quantized models
+
+The previously demonstrated after-rewrite eval graph only *simulates*
+quantization. To generate real fixed-point computations from a trained
+quantization model, convert it to a fixed-point kernel. TensorFlow Lite supports
+this conversion from the graph resulting from `create_eval_graph`.
+
+First, create a frozen graph that will be the input for the TensorFlow Lite
+toolchain:
+
+```
+freeze_graph \
+  --input_graph=eval_graph_def.pb \
+  --input_checkpoint=checkpoint \
+  --output_graph=frozen_eval_graph.pb --output_node_names=outputs
+```
+
+Provide this to the TensorFlow Lite Optimizing Converter (TOCO) to get a
+fully-quantized TensorFlow Lite model:
+
+```
+toco \
+  --input_file=frozen_eval_graph.pb \
+  --output_file=tflite_model.tflite \
+  --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \
+  --inference_type=QUANTIZED_UINT8 \
+  --input_shape="1,224, 224,3" \
+  --input_array=input \
+  --output_array=outputs \
+  --std_value=127.5 --mean_value=127.5
+```
+
+See the documentation for `tf.contrib.quantize` and [TensorFlow Lite](../mobile/tflite/).
+
+
+## Quantized accuracy results
+
+The following are results of trainiing some popular CNN models (Mobilenet-v1,
+Mobilenet-v2, and Inception-v3) using this tool:
+
+<figure>
+  <table>
+    <tr>
+      <th>Model</th>
+      <th>Top-1 Accuracy:<br>Floating point</th>
+      <th>Top-1 Accuracy:<br>Fixed point: 8 bit weights and activations</th>
+    </tr>
+    <tr><td>Mobilenet-v1-128-0.25</td><td>0.415</td><td>0.399</td></tr>
+    <tr><td>Mobilenet-v1-128-0.5</td><td>0.563</td><td>0.549</td></tr>
+    <tr><td>Mobilenet-v1-128-0.75</td><td>0.621</td><td>0.598</td></tr>
+    <tr><td>Mobilenet-v1-128-1</td><td>0.652</td><td>0.64</td></tr>
+    <tr><td>Mobilenet-v1-160-0.25</td><td>0.455</td><td>0.435</td></tr>
+    <tr><td>Mobilenet-v1-160-0.5</td><td>0.591</td><td>0.577</td></tr>
+    <tr><td>Mobilenet-v1-160-0.75</td><td>0.653</td><td>0.639</td></tr>
+    <tr><td>Mobilenet-v1-160-1</td><td>0.68</td><td>0.673</td></tr>
+    <tr><td>Mobilenet-v1-192-0.25</td><td>0.477</td><td>0.458</td></tr>
+    <tr><td>Mobilenet-v1-192-0.5</td><td>0.617</td><td>0.604</td></tr>
+    <tr><td>Mobilenet-v1-192-0.75</td><td>0.672</td><td>0.662</td></tr>
+    <tr><td>Mobilenet-v1-192-1</td><td>0.7</td><td>0.69</td></tr>
+    <tr><td>Mobilenet-v1-224-0.25</td><td>0.498</td><td>0.482</td></tr>
+    <tr><td>Mobilenet-v1-224-0.5</td><td>0.633</td><td>0.622</td></tr>
+    <tr><td>Mobilenet-v1-224-0.75</td><td>0.684</td><td>0.679</td></tr>
+    <tr><td>Mobilenet-v1-224-1</td><td>0.709</td><td>0.697</td></tr>
+    <tr><td>Mobilenet-v2-224-1</td><td>0.718</td><td>0.708</td></tr>
+   <tr><td>Inception_v3</td><td>0.78</td><td>0.775</td></tr>
+  </table>
+  <figcaption>
+    <b>Table 1</b>: Top-1 accuracy of floating point and fully quantized CNNs on Imagenet Validation dataset.
+  </figcaption>
+</figure>
+
+Our pre-trained models are available in the
+<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/models.md#image-classification-quantized-models" class="external">TensorFlow Lite model repository</a>. The code used to generate
+these models <a href="https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1_train.py" class="external">is available</a>.
+
+
+
 These rewrites are an active area of research and experimentation, so the
 rewrites and quantized training will likely not work across all models, though
 we hope to work towards generalizing these techniques.
-- 
GitLab


From c7458c97a5f752a2ae79da4cba04ced0dbcb76df Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Fri, 14 Sep 2018 10:26:11 -0700
Subject: [PATCH 0200/1357] Export tf.keras.sparse_categorical_accuracy. Copied
 from PR #21790.

closes #21790, fixes #21735

PiperOrigin-RevId: 213003724
---
 tensorflow/python/keras/metrics.py                            | 1 +
 tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt | 4 ++++
 tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt | 4 ++++
 3 files changed, 9 insertions(+)

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 473d8cd95b..fd3c39cf2e 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -615,6 +615,7 @@ def categorical_accuracy(y_true, y_pred):
       K.floatx())
 
 
+@tf_export('keras.metrics.sparse_categorical_accuracy')
 def sparse_categorical_accuracy(y_true, y_pred):
   y_true = math_ops.reduce_max(y_true, axis=-1)
   y_pred = math_ops.argmax(y_pred, axis=-1)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt
index 73b577da37..a296e13158 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt
@@ -104,6 +104,10 @@ tf_module {
     name: "serialize"
     argspec: "args=[\'metric\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "sparse_categorical_accuracy"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "sparse_categorical_crossentropy"
     argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt
index 73b577da37..a296e13158 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt
@@ -104,6 +104,10 @@ tf_module {
     name: "serialize"
     argspec: "args=[\'metric\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "sparse_categorical_accuracy"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "sparse_categorical_crossentropy"
     argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
-- 
GitLab


From 61743287362feb358dfe63cffd1e232f01ca2ab0 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 14 Sep 2018 10:44:45 -0700
Subject: [PATCH 0201/1357] Make tf.gradients() correctly handle captured
 EagerTensors.

tf.gradients() can't be used to take the gradient of eager
computations, but it should handle ops that take captured eager
tensors as input as long as the gradient computation doesn't depend on
that input. This change makes sure the gradient algorithm doesn't try
to access the op, etc. of EagerTensors.

PiperOrigin-RevId: 213007155
---
 tensorflow/python/ops/gradients_impl.py | 50 +++++++++++++++----------
 tensorflow/python/ops/gradients_test.py | 18 +++++++++
 2 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index 196161c661..056015d6b6 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -184,7 +184,7 @@ def _PendingCount(to_ops, from_ops, colocate_gradients_with_ops, func_graphs,
       between_op_list.append(op)
       # Clear the boolean so we won't add the inputs again.
       reached_ops.remove(op)
-      for inp in _Inputs(op, xs):
+      for inp in _NonEagerInputs(op, xs):
         queue.append(inp.op)
   # X in between_ops iff X is on a path of zero or more backpropagatable tensors
   # between from_ops and to_ops
@@ -196,7 +196,7 @@ def _PendingCount(to_ops, from_ops, colocate_gradients_with_ops, func_graphs,
   # Initialize pending count for between ops.
   pending_count = collections.defaultdict(int)
   for op in between_op_list:
-    for x in _Inputs(op, xs):
+    for x in _NonEagerInputs(op, xs):
       if x.op in between_ops:
         pending_count[x.op] += 1
 
@@ -347,7 +347,7 @@ def _StopOps(from_ops, stop_gradient_ops, pending_count, xs):
   stop_ops = set()
   for op in from_ops:
     is_stop_op = True
-    for inp in _Inputs(op, xs):
+    for inp in _NonEagerInputs(op, xs):
       if pending_count[inp.op] > 0:
         is_stop_op = False
         break
@@ -371,10 +371,10 @@ def _IsPartitionedCall(op):
   return op.type == "PartitionedCall" or op.type == "StatefulPartitionedCall"
 
 
-def _SymGrad(op, out_grads, xs):
+def _SymGrad(op, out_grads):
   """Backprop through a function call node op given its outputs' gradients."""
-  f_in = [x for x in _Inputs(op, xs)] + out_grads
-  f_types = [x.dtype for x in _Inputs(op, xs)]
+  f_in = [x for x in op.inputs] + out_grads
+  f_types = [x.dtype for x in op.inputs]
   f = attr_value_pb2.NameAttrList()
   if _IsPartitionedCall(op):
     f.name = op.get_attr("f").name
@@ -441,7 +441,7 @@ def _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs):
     if curr_op in from_ops:
       target_op = curr_op
       break
-    queue.extend(t.op for t in _Inputs(curr_op, xs))
+    queue.extend(t.op for t in _NonEagerInputs(curr_op, xs))
   assert target_op
   raise ValueError(
       "Cannot compute gradient inside while loop with respect to op '%s'. "
@@ -474,7 +474,8 @@ def _MaybeCaptured(t):
     A tensor, potentially from a different Graph/_function.FuncGraph.
   """
   # pylint: disable=protected-access
-  if _IsFunction(t.op.graph) and t.op.type == "Placeholder":
+  if (not isinstance(t, ops.EagerTensor) and
+      _IsFunction(t.op.graph) and t.op.type == "Placeholder"):
     for input_t, placeholder_t in _Captures(t.op.graph).items():
       if t == placeholder_t:
         return _MaybeCaptured(input_t)
@@ -484,9 +485,12 @@ def _MaybeCaptured(t):
 
 # TODO(skyewm): plumbing xs through everywhere is ugly, consider making
 # _GradientsHelper a class with xs as a member variable.
-def _Inputs(op, xs):
+def _NonEagerInputs(op, xs):
   """Returns the inputs of op, crossing closure boundaries where necessary.
 
+  Does not return any captured EagerTensors, i.e., the number of tensors
+  returned may be less than than the actual number of inputs.
+
   Args:
     op: Operation
     xs: list of Tensors we are differentiating w.r.t.
@@ -497,12 +501,19 @@ def _Inputs(op, xs):
     captured inputs.
   """
   if _IsFunction(op.graph):  # pylint: disable=protected-access
-    # If we're differentiating w.r.t. `t`, do not attempt to traverse through it
-    # to a captured value. The algorithm needs to "see" `t` in this case, even
-    # if it's a function input for a captured value, whereas usually we'd like
-    # to traverse through these closures as if the captured value was the direct
-    # input to op.
-    return [t if (t in xs) else _MaybeCaptured(t) for t in op.inputs]
+    inputs = []
+    for t in op.inputs:
+      # If we're differentiating w.r.t. `t`, do not attempt to traverse through
+      # it to a captured value. The algorithm needs to "see" `t` in this case,
+      # even if it's a function input for a captured value, whereas usually we'd
+      # like to traverse through these closures as if the captured value was the
+      # direct input to op.
+      if t not in xs:
+        t = _MaybeCaptured(t)
+        # Skip captured eager inputs.
+        if isinstance(t, ops.EagerTensor): continue
+      inputs.append(t)
+    return inputs
   else:
     return op.inputs
 
@@ -805,7 +816,7 @@ def _GradientsHelper(ys,
                 # For function call ops, we add a 'SymbolicGradient'
                 # node to the graph to compute gradients.
                 in_grads = _MaybeCompile(grad_scope, op, func_call,
-                                         lambda: _SymGrad(op, out_grads, xs))
+                                         lambda: _SymGrad(op, out_grads))
               in_grads = _AsList(in_grads)
               _VerifyGeneratedGradients(in_grads, op)
               if gate_gradients and len([x for x in in_grads
@@ -820,8 +831,9 @@ def _GradientsHelper(ys,
         else:
           # If no grad_fn is defined or none of out_grads is available,
           # just propagate a list of None backwards.
-          in_grads = [None] * len(_Inputs(op, xs))
-        for i, (t_in, in_grad) in enumerate(zip(_Inputs(op, xs), in_grads)):
+          in_grads = [None] * len(_NonEagerInputs(op, xs))
+        for i, (t_in, in_grad) in enumerate(zip(_NonEagerInputs(op, xs),
+                                                in_grads)):
           if in_grad is not None:
             if (isinstance(in_grad, ops.Tensor) and
                 t_in.dtype != dtypes.resource):
@@ -862,7 +874,7 @@ def _HasAnyNotNoneGrads(grads, op):
 def _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state,
                                   xs):
   """Update pending count for the inputs of op and enqueue ready ops."""
-  for x in _Inputs(op, xs):
+  for x in _NonEagerInputs(op, xs):
     pending_count[x.op] -= 1
     ready = (pending_count[x.op] == 0)
     if loop_state and not ready:
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 6243be6c9e..4f6e5dc473 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -531,6 +531,24 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase):
       with self.cached_session() as sess:
         self.assertEqual(sess.run(z_grad), 3.0)
 
+  def testCapturedEagerTensors(self):
+    # Test that we can handle captured eager tensors unrelated to the gradient
+    # computation (i.e. we need to ignore them).
+    # TODO(skyewm): make it an error if you try to take the gradient wrt a
+    # captured EagerTensor
+    with context.eager_mode():
+      c = constant_op.constant(2.0, name="c")
+
+      @function.defun
+      def Foo():
+        x = constant_op.constant(10.0, name="x")
+        y = math_ops.multiply(x, c, name="y")
+        z = math_ops.multiply(y, 3.0, name="z")
+        g = gradients_impl.gradients(z, x)
+        return g[0]
+
+      self.assertEqual(Foo().numpy(), 6.0)
+
 
 class StopGradientTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From d035a83459330c87bbc527e3d480b65f32841997 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Fri, 14 Sep 2018 10:46:12 -0700
Subject: [PATCH 0202/1357] Fix archive path

PiperOrigin-RevId: 213007422
---
 .../contrib/lite/tutorials/post_training_quant.ipynb     | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
index a96e2c4e1b..4929133bda 100644
--- a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
+++ b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
@@ -608,7 +608,8 @@
       "outputs": [],
       "source": [
         "archive_path = tf.keras.utils.get_file(\"resnet_v2_101.tgz\", \"https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz\", extract=True)\n",
-        "archive_path = pathlib.Path(archive_path)"
+        "archive_path = pathlib.Path(archive_path)\n",
+        "archive_dir = str(archive_path.parent)"
       ]
     },
     {
@@ -631,7 +632,7 @@
       },
       "outputs": [],
       "source": [
-        "! cat {archive_path}/resnet_v2_101_299_info.txt"
+        "! cat {archive_dir}/resnet_v2_101_299_info.txt"
       ]
     },
     {
@@ -664,8 +665,8 @@
       },
       "outputs": [],
       "source": [
-        "archive_dir = str(archive_path.parent)\n",
-        "!ls -lh {archive_dir}"
+        "\n",
+        "!ls -lh {archive_dir}/*.tflite"
       ]
     },
     {
-- 
GitLab


From 52d7ed1a133cb1c3a2e13532bf97beef19c1516d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 10:48:50 -0700
Subject: [PATCH 0203/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213007905
---
 .../internal/optimized/optimized_ops.h        | 193 ++++++++++++++---
 .../internal/reference/reference_ops.h        | 197 +++++++++++++++---
 .../contrib/lite/kernels/internal/types.h     |   4 +-
 3 files changed, 328 insertions(+), 66 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 659a65a8ea..464207d739 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -4431,9 +4431,9 @@ inline void LocalResponseNormalization(
   }
 }
 
-inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
-                    float beta, float* output_data,
-                    const RuntimeShape& output_shape) {
+inline void Softmax(const SoftmaxParams& params,
+                    const RuntimeShape& input_shape, const float* input_data,
+                    const RuntimeShape& output_shape, float* output_data) {
   gemmlowp::ScopedProfilingLabel label("Softmax");
   MatchingFlatSize(input_shape, output_shape);
 
@@ -4441,7 +4441,8 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
   auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
   // Compute the exponential first, removing the max coefficient for numerical
   // stability.
-  out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta;
+  out_mat =
+      (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * params.beta;
   // We are separating out the exp function so that exp can be vectorized.
   out_mat = out_mat.array().exp();
   // Normalize to get the activations.
@@ -4450,10 +4451,22 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
   out_mat.array().rowwise() *= scale;
 }
 
-inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
-                    int32 input_beta_multiplier, int32 input_beta_left_shift,
-                    int diff_min, uint8* output_data,
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
+                    float beta, float* output_data,
                     const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.beta = beta;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Softmax(const SoftmaxParams& params,
+                    const RuntimeShape& input_shape, const uint8* input_data,
+                    const RuntimeShape& output_shape, uint8* output_data) {
+  const int32 input_beta_multiplier = params.input_multiplier;
+  const int32 input_beta_left_shift = params.input_left_shift;
+  const int diff_min = params.diff_min;
   // The representation chosen for the input to the exp() function is Q5.26.
   // We need to leave extra space since values that we skip might be as large as
   // -32 before multiplying by input_beta_multiplier, and therefore as large as
@@ -4659,10 +4672,24 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
+                    int32 input_beta_multiplier, int32 input_beta_left_shift,
+                    int diff_min, uint8* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_beta_multiplier;
+  params.input_left_shift = input_beta_left_shift;
+  params.diff_min = diff_min;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
 // TODO(myenik): This is the same as the reference implementation, not actually
 // optimized yet.
-inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
-                       float* output_data, const RuntimeShape& output_shape) {
+inline void LogSoftmax(const SoftmaxParams& params,
+                       const RuntimeShape& input_shape, const float* input_data,
+                       const RuntimeShape& output_shape, float* output_data) {
   gemmlowp::ScopedProfilingLabel label("LogSoftmax");
   const int trailing_dim = input_shape.DimensionsCount() - 1;
   const int outer_size =
@@ -4695,6 +4722,15 @@ inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy
+inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
+                       float* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  // No params currently used for float LogSoftmax.
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
 template <int OutputIntegerBits, int InputIntegerBits>
 inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
 log_x_for_x_greater_than_or_equal_to_1_impl(
@@ -4809,12 +4845,15 @@ log_x_for_x_greater_than_or_equal_to_1(
 }
 
 // Currently just a copy of the reference code.
-inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
-                       int32 input_multiplier, int32 input_left_shift,
-                       int32 reverse_scaling_divisor,
-                       int32 reverse_scaling_right_shift, int diff_min,
-                       uint8* output_data, const RuntimeShape& output_shape) {
+inline void LogSoftmax(const SoftmaxParams& params,
+                       const RuntimeShape& input_shape, const uint8* input_data,
+                       const RuntimeShape& output_shape, uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label("LogSoftmax/Uint8");
+  const int32 input_multiplier = params.input_multiplier;
+  const int32 input_left_shift = params.input_left_shift;
+  const int32 reverse_scaling_divisor = params.reverse_scaling_divisor;
+  const int32 reverse_scaling_right_shift = params.reverse_scaling_right_shift;
+  const int diff_min = params.diff_min;
   // The representation chosen for the input to the exp() function is Q5.26.
   // We need to leave extra space since values that we skip might be as large as
   // -32 before multiplying by input_beta_multiplier, and therefore as large as
@@ -4896,7 +4935,24 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
+                       int32 input_multiplier, int32 input_left_shift,
+                       int32 reverse_scaling_divisor,
+                       int32 reverse_scaling_right_shift, int diff_min,
+                       uint8* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  params.reverse_scaling_divisor = reverse_scaling_divisor;
+  params.reverse_scaling_right_shift = reverse_scaling_right_shift;
+  params.diff_min = diff_min;
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const LogisticParams& params,
+                     const RuntimeShape& input_shape, const float* input_data,
                      const RuntimeShape& output_shape, float* output_data) {
   gemmlowp::ScopedProfilingLabel label("Logistic");
   auto input_map = MapAsVector(input_data, input_shape);
@@ -4905,11 +4961,23 @@ inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
       input_map.array().unaryExpr(Eigen::internal::scalar_sigmoid_op<float>());
 }
 
-inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
-                     int32 input_zero_point, int32 input_range_radius,
-                     int32 input_multiplier, int input_left_shift,
-                     uint8* output_data, const RuntimeShape& output_shape) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
+                     const RuntimeShape& output_shape, float* output_data) {
+  LogisticParams params;
+  // No params currently needed by float Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const LogisticParams& params,
+                     const RuntimeShape& input_shape, const uint8* input_data,
+                     const RuntimeShape& output_shape, uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label("Logistic/Uint8");
+  const int32 input_zero_point = params.input_zero_point;
+  const int32 input_range_radius = params.input_range_radius;
+  const int32 input_multiplier = params.input_multiplier;
+  const int input_left_shift = params.input_left_shift;
   const int size = MatchingFlatSize(input_shape, output_shape);
 
   int c = 0;
@@ -5042,7 +5110,22 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
-inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
+                     int32 input_zero_point, int32 input_range_radius,
+                     int32 input_multiplier, int input_left_shift,
+                     uint8* output_data, const RuntimeShape& output_shape) {
+  LogisticParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const LogisticParams& params,
+                     const RuntimeShape& input_shape, const int16* input_data,
                      const RuntimeShape& output_shape, int16* output_data) {
   gemmlowp::ScopedProfilingLabel label("Logistic/Int16");
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -5102,26 +5185,51 @@ inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy version.
+inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
+                     const RuntimeShape& output_shape, int16* output_data) {
+  LogisticParams params;
+  // No params currently needed by int16 Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // Legacy version.
 inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
                      int16* output_data, const RuntimeShape& output_shape) {
-  Logistic(input_shape, input_data, output_shape, output_data);
+  LogisticParams params;
+  // No params currently needed by int16 Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
 }
 
-inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
-                 const RuntimeShape& output_shape, float* output_data) {
+inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& output_shape,
+                 float* output_data) {
   gemmlowp::ScopedProfilingLabel label("Tanh");
   auto input_map = MapAsVector(input_data, input_shape);
   auto output_map = MapAsVector(output_data, output_shape);
   output_map.array() = input_map.array().tanh();
 }
 
-inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
-                 int32 input_zero_point, int32 input_range_radius,
-                 int32 input_multiplier, int input_left_shift,
-                 uint8* output_data, const RuntimeShape& output_shape) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
+                 const RuntimeShape& output_shape, float* output_data) {
+  TanhParams params;
+  // Currently no params needed for float Tanh.
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
+                 const uint8* input_data, const RuntimeShape& output_shape,
+                 uint8* output_data) {
   // Note that this is almost the exact same code as in Logistic().
   gemmlowp::ScopedProfilingLabel label("Tanh");
+  const int32 input_zero_point = params.input_zero_point;
+  const int32 input_range_radius = params.input_range_radius;
+  const int32 input_multiplier = params.input_multiplier;
+  const int input_left_shift = params.input_left_shift;
   const int size = MatchingFlatSize(input_shape, output_shape);
 
   int c = 0;
@@ -5263,10 +5371,25 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
-inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
-                 int input_left_shift, int16* output_data,
-                 const RuntimeShape& output_shape) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
+                 int32 input_zero_point, int32 input_range_radius,
+                 int32 input_multiplier, int input_left_shift,
+                 uint8* output_data, const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
+                 const int16* input_data, const RuntimeShape& output_shape,
+                 int16* output_data) {
   gemmlowp::ScopedProfilingLabel label("Tanh/Int16");
+  const int input_left_shift = params.input_left_shift;
   // Support for shifts is limited until we have a parameterized version of
   // SaturatingRoundingMultiplyByPOT().
   TFLITE_DCHECK_GE(input_left_shift, 0);
@@ -5363,6 +5486,16 @@ inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
+                 int input_left_shift, int16* output_data,
+                 const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
 template <typename SrcT, typename DstT>
 inline void Cast(const RuntimeShape& input_shape, const SrcT* input_data,
                  const RuntimeShape& output_shape, DstT* output_data) {
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 66f18ec195..111adbf5b3 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -2902,9 +2902,9 @@ inline void LocalResponseNormalization(
   }
 }
 
-inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
-                    float beta, float* output_data,
-                    const RuntimeShape& output_shape) {
+inline void Softmax(const SoftmaxParams& params,
+                    const RuntimeShape& input_shape, const float* input_data,
+                    const RuntimeShape& output_shape, float* output_data) {
   const int trailing_dim = input_shape.DimensionsCount() - 1;
   const int outer_size =
       MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
@@ -2923,21 +2923,33 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
     // Compute sum.
     float sum = 0.f;
     for (int c = 0; c < depth; ++c) {
-      sum += std::exp((input_data[i * depth + c] - max) * beta);
+      sum += std::exp((input_data[i * depth + c] - max) * params.beta);
     }
 
     // Compute result.
     for (int c = 0; c < depth; ++c) {
       output_data[i * depth + c] =
-          std::exp((input_data[i * depth + c] - max) * beta) / sum;
+          std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
     }
   }
 }
 
-inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
-                    int32 input_beta_multiplier, int32 input_beta_left_shift,
-                    int diff_min, uint8* output_data,
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
+                    float beta, float* output_data,
                     const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.beta = beta;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Softmax(const SoftmaxParams& params,
+                    const RuntimeShape& input_shape, const uint8* input_data,
+                    const RuntimeShape& output_shape, uint8* output_data) {
+  const int32 input_beta_multiplier = params.input_multiplier;
+  const int32 input_beta_left_shift = params.input_left_shift;
+  const int diff_min = params.diff_min;
   // The representation chosen for the input to the exp() function is Q5.26.
   // We need to leave extra space since values that we skip might be as large as
   // -32 before multiplying by input_beta_multiplier, and therefore as large as
@@ -3015,8 +3027,22 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
-inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
-                       float* output_data, const RuntimeShape& output_shape) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy
+inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
+                    int32 input_beta_multiplier, int32 input_beta_left_shift,
+                    int diff_min, uint8* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_beta_multiplier;
+  params.input_left_shift = input_beta_left_shift;
+  params.diff_min = diff_min;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void LogSoftmax(const SoftmaxParams& params,
+                       const RuntimeShape& input_shape, const float* input_data,
+                       const RuntimeShape& output_shape, float* output_data) {
   const int trailing_dim = input_shape.DimensionsCount() - 1;
   const int outer_size =
       MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
@@ -3046,6 +3072,15 @@ inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy
+inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
+                       float* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  // No params currently used for float LogSoftmax.
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
 // Although currently the name of this function says that it cannot handle
 // values less than 1, in practice it can handle as low as 1/x_max, where
 // x_max is the largest representable input.  In other words, the output range
@@ -3161,16 +3196,19 @@ log_x_for_x_greater_than_or_equal_to_1(
       input_val);
 }
 
-inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
-                       int32 input_multiplier, int32 input_left_shift,
-                       int32 reverse_scaling_divisor,
-                       int32 reverse_scaling_right_shift, int diff_min,
-                       uint8* output_data, const RuntimeShape& output_shape) {
+inline void LogSoftmax(const SoftmaxParams& params,
+                       const RuntimeShape& input_shape, const uint8* input_data,
+                       const RuntimeShape& output_shape, uint8* output_data) {
+  const int32 input_multiplier = params.input_multiplier;
+  const int32 input_left_shift = params.input_left_shift;
+  const int32 reverse_scaling_divisor = params.reverse_scaling_divisor;
+  const int32 reverse_scaling_right_shift = params.reverse_scaling_right_shift;
+  const int diff_min = params.diff_min;
   // The representation chosen for the input to the exp() function is Q5.26.
-  // We need to leave extra space since values that we skip might be as large as
-  // -32 before multiplying by input_beta_multiplier, and therefore as large as
-  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
-  // accumulation, but exp(-16) definitely is.
+  // We need to leave extra space since values that we skip might be as large
+  // as -32 before multiplying by input_beta_multiplier, and therefore as
+  // large as -16 afterwards.  Note that exp(-8) is definitely not
+  // insignificant to accumulation, but exp(-16) definitely is.
   static constexpr int kScaledDiffIntegerBits = 5;
   static constexpr int kAccumulationIntegerBits = 12;
   static constexpr int kOutputIntegerBits = 4;
@@ -3247,7 +3285,24 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
+                       int32 input_multiplier, int32 input_left_shift,
+                       int32 reverse_scaling_divisor,
+                       int32 reverse_scaling_right_shift, int diff_min,
+                       uint8* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  params.reverse_scaling_divisor = reverse_scaling_divisor;
+  params.reverse_scaling_right_shift = reverse_scaling_right_shift;
+  params.diff_min = diff_min;
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const LogisticParams& params,
+                     const RuntimeShape& input_shape, const float* input_data,
                      const RuntimeShape& output_shape, float* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
 
@@ -3258,10 +3313,22 @@ inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
   }
 }
 
-inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
-                     int32 input_zero_point, int32 input_range_radius,
-                     int32 input_multiplier, int input_left_shift,
-                     uint8* output_data, const RuntimeShape& output_shape) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
+                     const RuntimeShape& output_shape, float* output_data) {
+  LogisticParams params;
+  // No params currently needed by float Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const LogisticParams& params,
+                     const RuntimeShape& input_shape, const uint8* input_data,
+                     const RuntimeShape& output_shape, uint8* output_data) {
+  const int32 input_zero_point = params.input_zero_point;
+  const int32 input_range_radius = params.input_range_radius;
+  const int32 input_multiplier = params.input_multiplier;
+  const int input_left_shift = params.input_left_shift;
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
 
   for (int i = 0; i < flat_size; i++) {
@@ -3296,7 +3363,22 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
-inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
+                     int32 input_zero_point, int32 input_range_radius,
+                     int32 input_multiplier, int input_left_shift,
+                     uint8* output_data, const RuntimeShape& output_shape) {
+  LogisticParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const LogisticParams& params,
+                     const RuntimeShape& input_shape, const int16* input_data,
                      const RuntimeShape& output_shape, int16* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
 
@@ -3314,8 +3396,18 @@ inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
   }
 }
 
-inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
-                 const RuntimeShape& output_shape, float* output_data) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
+                     const RuntimeShape& output_shape, int16* output_data) {
+  LogisticParams params;
+  // No params currently needed by int16 Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& output_shape,
+                 float* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
 
   for (int i = 0; i < flat_size; i++) {
@@ -3325,10 +3417,22 @@ inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
   }
 }
 
-inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
-                 int32 input_zero_point, int32 input_range_radius,
-                 int32 input_multiplier, int input_left_shift,
-                 uint8* output_data, const RuntimeShape& output_shape) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
+                 const RuntimeShape& output_shape, float* output_data) {
+  TanhParams params;
+  // Currently no params needed for float Tanh.
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
+                 const uint8* input_data, const RuntimeShape& output_shape,
+                 uint8* output_data) {
+  const int32 input_zero_point = params.input_zero_point;
+  const int32 input_range_radius = params.input_range_radius;
+  const int32 input_multiplier = params.input_multiplier;
+  const int input_left_shift = params.input_left_shift;
   const int32 output_zero_point = 128;
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
 
@@ -3365,9 +3469,24 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
   }
 }
 
-inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
-                 int input_left_shift, int16* output_data,
-                 const RuntimeShape& output_shape) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
+                 int32 input_zero_point, int32 input_range_radius,
+                 int32 input_multiplier, int input_left_shift,
+                 uint8* output_data, const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
+                 const int16* input_data, const RuntimeShape& output_shape,
+                 int16* output_data) {
+  const int input_left_shift = params.input_left_shift;
   // Support for shifts is limited until we have a parameterized version of
   // SaturatingRoundingMultiplyByPOT().
   TFLITE_DCHECK_GE(input_left_shift, 0);
@@ -3398,6 +3517,16 @@ inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
+                 int input_left_shift, int16* output_data,
+                 const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void Dequantize(const tflite::DequantizationParams& op_params,
                        const RuntimeShape& input_shape, const uint8* input_data,
                        const RuntimeShape& output_shape, float* output_data) {
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index 023707d466..87e8ff0346 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -885,8 +885,8 @@ struct SoftmaxParams {
   // for LogSoftmax.
   double beta;
   // uint8 inference params.  Used even when beta defaults to 1.0.
-  int32 input_beta_multiplier;
-  int32 input_beta_left_shift;
+  int32 input_multiplier;
+  int32 input_left_shift;
   // Reverse scaling is only used by LogSoftmax.
   int32 reverse_scaling_divisor;
   int32 reverse_scaling_right_shift;
-- 
GitLab


From b2cb6e27f42cd8db6b105e686b494afe9b76324d Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Fri, 14 Sep 2018 10:50:07 -0700
Subject: [PATCH 0204/1357] Point VectorDiffeomixture to tf.linalg rather than
 tf.contrib.linalg

PiperOrigin-RevId: 213008118
---
 .../contrib/distributions/python/ops/vector_diffeomixture.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
index ece03fe4aa..3c8aae2797 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
@@ -23,7 +23,6 @@ import numpy as np
 from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import AffineLinearOperator
 from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import SoftmaxCentered
-from tensorflow.contrib.linalg.python.ops import linear_operator_addition as linop_add_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -36,6 +35,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops.distributions import categorical as categorical_lib
 from tensorflow.python.ops.distributions import distribution as distribution_lib
 from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.linalg import linear_operator_addition as linop_add_lib
 from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib
 from tensorflow.python.ops.linalg import linear_operator_full_matrix as linop_full_lib
 from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib
-- 
GitLab


From 81a063287a0449cfe2f20a82c036146d6e9356f9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 10:53:23 -0700
Subject: [PATCH 0205/1357] Removed unnecessary includes from stringpiece.h

PiperOrigin-RevId: 213008707
---
 tensorflow/core/lib/core/status.h                    | 1 +
 tensorflow/core/lib/core/stringpiece.h               | 6 ------
 tensorflow/core/lib/io/block_builder.h               | 1 +
 tensorflow/core/lib/io/path.h                        | 1 +
 tensorflow/core/lib/monitoring/collection_registry.h | 1 +
 tensorflow/core/lib/monitoring/metric_def.h          | 1 +
 tensorflow/core/lib/png/png_io.h                     | 1 +
 tensorflow/core/util/tensor_bundle/naming.h          | 1 +
 8 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h
index 49f74ff47f..eb0ff555a5 100644
--- a/tensorflow/core/lib/core/status.h
+++ b/tensorflow/core/lib/core/status.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h
index e7b17c9b36..6edff139ae 100644
--- a/tensorflow/core/lib/core/stringpiece.h
+++ b/tensorflow/core/lib/core/stringpiece.h
@@ -26,13 +26,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_CORE_STRINGPIECE_H_
 #define TENSORFLOW_CORE_LIB_CORE_STRINGPIECE_H_
 
-#include <assert.h>
-#include <stddef.h>
-#include <string.h>
-#include <iosfwd>
-#include <string>
 #include "absl/strings/string_view.h"
-#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/lib/io/block_builder.h b/tensorflow/core/lib/io/block_builder.h
index e2927689d2..117b6a0bb8 100644
--- a/tensorflow/core/lib/io/block_builder.h
+++ b/tensorflow/core/lib/io/block_builder.h
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include <stdint.h>
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace table {
diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h
index e3649fd0c9..38fb0c5d86 100644
--- a/tensorflow/core/lib/io/path.h
+++ b/tensorflow/core/lib/io/path.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_PATH_H_
 
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace io {
diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h
index c204d52cfe..9e4e1989dd 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.h
+++ b/tensorflow/core/lib/monitoring/collection_registry.h
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace monitoring {
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index 756e5c2af8..bc4365e439 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -21,6 +21,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace monitoring {
diff --git a/tensorflow/core/lib/png/png_io.h b/tensorflow/core/lib/png/png_io.h
index bb5d20fb68..c876c5156a 100644
--- a/tensorflow/core/lib/png/png_io.h
+++ b/tensorflow/core/lib/png/png_io.h
@@ -37,6 +37,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/png.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace png {
diff --git a/tensorflow/core/util/tensor_bundle/naming.h b/tensorflow/core/util/tensor_bundle/naming.h
index 6539d565e2..7b101971a8 100644
--- a/tensorflow/core/util/tensor_bundle/naming.h
+++ b/tensorflow/core/util/tensor_bundle/naming.h
@@ -35,6 +35,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_NAMING_H_
 
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
-- 
GitLab


From 825098107c1d7b63d3a7b29c094ddc5dbff7cad2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 11:03:36 -0700
Subject: [PATCH 0206/1357] parallel_for: add a bunch of converters for cwise
 ops and gradients.

PiperOrigin-RevId: 213010458
---
 .../ops/parallel_for/control_flow_ops_test.py | 192 ++++++++++++++++--
 tensorflow/python/ops/parallel_for/pfor.py    |  98 ++++++++-
 2 files changed, 261 insertions(+), 29 deletions(-)

diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
index d403b0c61a..6e276dee55 100644
--- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
+++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
@@ -31,6 +31,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import bitwise_ops
+from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import gradients as gradient_ops
@@ -300,28 +302,129 @@ class ArrayTest(PForTest):
     self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.float32] * 2)
 
 
+class BitwiseTest(PForTest):
+
+  def test_unary_cwise(self):
+    for op in [bitwise_ops.invert]:
+      x = random_ops.random_uniform([7, 3, 5], maxval=10, dtype=dtypes.int32)
+
+      # pylint: disable=cell-var-from-loop
+      def loop_fn(i):
+        x1 = array_ops.gather(x, i)
+        return op(x1)
+      # pylint: enable=cell-var-from-loop
+
+      self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.int32])
+
+  def test_binary_cwise(self):
+    binary_ops = [
+        bitwise_ops.bitwise_and,
+        bitwise_ops.bitwise_or,
+        bitwise_ops.bitwise_xor,
+        bitwise_ops.left_shift,
+        bitwise_ops.right_shift,
+    ]
+    for op in binary_ops:
+      x = random_ops.random_uniform([7, 3, 5], maxval=10, dtype=dtypes.int32)
+      y = random_ops.random_uniform([3, 5], maxval=10, dtype=dtypes.int32)
+
+      output_dtypes = []
+      # pylint: disable=cell-var-from-loop
+      def loop_fn(i):
+        x1 = array_ops.gather(x, i)
+        y1 = array_ops.gather(y, i)
+        outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)]
+        del output_dtypes[:]
+        output_dtypes.extend([t.dtype for t in outputs])
+        return outputs
+      # pylint: enable=cell-var-from-loop
+      self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=output_dtypes)
+
+
 class MathTest(PForTest):
 
   def test_unary_cwise_ops(self):
-    for op in [
-        math_ops.tanh, nn.relu, math_ops.sigmoid, math_ops.negative,
-        math_ops.square
-    ]:
+    complex_ops = [
+        math_ops.angle,
+        math_ops.imag,
+        math_ops.complex_abs,
+        math_ops.real,
+        math_ops.conj,
+    ]
+    real_ops = [
+        lambda x: math_ops.acosh(1 + math_ops.square(x)),
+        math_ops.abs,
+        math_ops.acos,
+        math_ops.asin,
+        math_ops.asinh,
+        math_ops.atan,
+        math_ops.atanh,
+        math_ops.bessel_i0e,
+        math_ops.bessel_i1e,
+        math_ops.cos,
+        math_ops.cosh,
+        math_ops.digamma,
+        math_ops.erf,
+        math_ops.erfc,
+        math_ops.exp,
+        math_ops.expm1,
+        math_ops.inv,
+        math_ops.is_finite,
+        math_ops.is_inf,
+        math_ops.lgamma,
+        math_ops.log,
+        math_ops.log1p,
+        math_ops.neg,
+        math_ops.negative,
+        math_ops.reciprocal,
+        math_ops.rint,
+        math_ops.round,
+        math_ops.rsqrt,
+        math_ops.sigmoid,
+        math_ops.sign,
+        math_ops.sin,
+        math_ops.sinh,
+        math_ops.sqrt,
+        math_ops.square,
+        math_ops.tan,
+        math_ops.tanh,
+        math_ops.tanh,
+        nn.elu,
+        nn.relu,
+        nn.relu6,
+        nn.selu,
+        nn.softplus,
+        nn.softsign,
+    ]
+    for op in complex_ops + real_ops:
       x = random_ops.random_uniform([3, 5])
+      if op in complex_ops:
+        y = random_ops.random_uniform([3, 5])
+        x = math_ops.complex(x, y)
 
       # pylint: disable=cell-var-from-loop
+      output_dtypes = []
       def loop_fn(i):
         x1 = array_ops.gather(x, i)
-        y = op(x1)
-        loss = math_ops.reduce_sum(y * y)
-        return op(x), y, gradient_ops.gradients(loss, x1)
+        y1 = op(x1)
+        outputs = [op(x), y1]
+        if y1.dtype == dtypes.float32:
+          loss = math_ops.reduce_sum(y1 * y1)
+          grad = gradient_ops.gradients(loss, x1)
+          if grad and grad[0] is not None:
+            outputs.extend(grad)
+        del output_dtypes[:]
+        output_dtypes.extend([t.dtype for t in outputs])
+        return outputs
 
       # pylint: enable=cell-var-from-loop
 
-      self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.float32] * 3)
+      self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=output_dtypes)
 
   def test_unary_cwise_no_grad(self):
-    for op in [math_ops.ceil, math_ops.floor, math_ops.logical_not]:
+    for op in [math_ops.ceil,
+               math_ops.floor,
+               math_ops.logical_not]:
       x = random_ops.random_uniform([3, 5])
       if op == math_ops.logical_not:
         x = x > 0
@@ -336,33 +439,80 @@ class MathTest(PForTest):
 
   def test_binary_cwise_ops(self):
     logical_ops = [
-        math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor
-    ]
-    bool_ops = [
-        math_ops.less, math_ops.less_equal, math_ops.greater,
-        math_ops.greater_equal, math_ops.equal, math_ops.not_equal
+        math_ops.logical_and,
+        math_ops.logical_or,
+        math_ops.logical_xor
     ]
+
+    # Wrapper functions restricting the range of inputs of zeta and polygamma.
+    def safe_polygamma(x, y):
+      return math_ops.polygamma(
+          math_ops.round(clip_ops.clip_by_value(y, 1, 10)),
+          x * x + 1)
+
+    def safe_zeta(x, y):
+      return math_ops.zeta(x * x + 1, y * y)
+
     float_ops = [
-        math_ops.add, math_ops.subtract, math_ops.multiply, math_ops.divide,
-        math_ops.maximum, math_ops.minimum
+        math_ops.add,
+        math_ops.add_v2,
+        math_ops.atan2,
+        math_ops.complex,
+        math_ops.div,
+        math_ops.divide,
+        math_ops.div_no_nan,
+        math_ops.equal,
+        math_ops.floor_div,
+        math_ops.floor_mod,
+        math_ops.greater,
+        math_ops.greater_equal,
+        math_ops.igamma,
+        math_ops.igammac,
+        math_ops.igamma_grad_a,
+        math_ops.less,
+        math_ops.less_equal,
+        math_ops.maximum,
+        math_ops.minimum,
+        math_ops.mod,
+        math_ops.multiply,
+        math_ops.not_equal,
+        math_ops.pow,
+        math_ops.squared_difference,
+        math_ops.subtract,
+        math_ops.truncate_mod,
+        safe_polygamma,
+        safe_zeta,
     ]
-    for op in logical_ops + bool_ops + float_ops:
+    for op in logical_ops + float_ops:
       x = random_ops.random_uniform([7, 3, 5])
       y = random_ops.random_uniform([3, 5])
       if op in logical_ops:
         x = x > 0
         y = y > 0
 
+      output_dtypes = []
       # pylint: disable=cell-var-from-loop
       def loop_fn(i):
         x1 = array_ops.gather(x, i)
         y1 = array_ops.gather(y, i)
-        return op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)
-
+        outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)]
+        del output_dtypes[:]
+        output_dtypes.extend([t.dtype for t in outputs])
+        return outputs
       # pylint: enable=cell-var-from-loop
 
-      dtype = dtypes.float32 if op in float_ops else dtypes.bool
-      self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtype] * 5)
+      self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=output_dtypes)
+
+  def test_approximate_equal(self):
+    x = random_ops.random_uniform([3, 5])
+    y = random_ops.random_uniform([3, 5])
+
+    def loop_fn(i):
+      x1 = array_ops.gather(x, i)
+      y1 = array_ops.gather(y, i)
+      return math_ops.approximate_equal(x1, y1)
+
+    self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.bool])
 
   def test_addn(self):
     x = random_ops.random_uniform([2, 3, 5])
diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py
index f9153b6d7d..e0f6d51881 100644
--- a/tensorflow/python/ops/parallel_for/pfor.py
+++ b/tensorflow/python/ops/parallel_for/pfor.py
@@ -28,6 +28,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import bitwise_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
@@ -1922,37 +1923,114 @@ def _convert_cast(pfor_input):
   return wrap(math_ops.cast(inp, dtype), True)
 
 
-# Note that ops handled here do not have attributes except "T", and hence don't
-# need extra arguments passed to the cwise_op call below.
+@RegisterPForWithArgs("Abs", math_ops.abs)
+@RegisterPForWithArgs("Acosh", math_ops.acosh)
+@RegisterPForWithArgs("Acos", math_ops.acos)
 @RegisterPForWithArgs("Add", math_ops.add)
+@RegisterPForWithArgs("AddV2", math_ops.add_v2)
+@RegisterPForWithArgs("Angle", math_ops.angle)
+@RegisterPForWithArgs("Asinh", math_ops.asinh)
+@RegisterPForWithArgs("Asin", math_ops.asin)
+@RegisterPForWithArgs("Atan2", math_ops.atan2)
+@RegisterPForWithArgs("Atanh", math_ops.atanh)
+@RegisterPForWithArgs("Atan", math_ops.atan)
+@RegisterPForWithArgs("BesselI0e", math_ops.bessel_i0e)
+@RegisterPForWithArgs("BesselI1e", math_ops.bessel_i1e)
+@RegisterPForWithArgs("BitwiseAnd", bitwise_ops.bitwise_and)
+@RegisterPForWithArgs("BitwiseOr", bitwise_ops.bitwise_or)
+@RegisterPForWithArgs("BitwiseXor", bitwise_ops.bitwise_xor)
 @RegisterPForWithArgs("Ceil", math_ops.ceil)
+@RegisterPForWithArgs("ComplexAbs", math_ops.complex_abs)
+@RegisterPForWithArgs("Complex", math_ops.complex)
+@RegisterPForWithArgs("Conj", math_ops.conj)
+@RegisterPForWithArgs("Cosh", math_ops.cosh)
+@RegisterPForWithArgs("Cos", math_ops.cos)
+@RegisterPForWithArgs("Digamma", math_ops.digamma)
+@RegisterPForWithArgs("Div", math_ops.div)
+@RegisterPForWithArgs("DivNoNan", math_ops.div_no_nan)
+@RegisterPForWithArgs("Elu", nn_ops.elu)
 @RegisterPForWithArgs("Equal", math_ops.equal)
-@RegisterPForWithArgs("NotEqual", math_ops.not_equal)
+@RegisterPForWithArgs("Erfc", math_ops.erfc)
+@RegisterPForWithArgs("Erf", math_ops.erf)
+@RegisterPForWithArgs("Expm1", math_ops.expm1)
+@RegisterPForWithArgs("Exp", math_ops.exp)
+@RegisterPForWithArgs("FloorDiv", math_ops.floor_div)
 @RegisterPForWithArgs("Floor", math_ops.floor)
-@RegisterPForWithArgs("Greater", math_ops.greater)
+@RegisterPForWithArgs("FloorMod", math_ops.floor_mod)
 @RegisterPForWithArgs("GreaterEqual", math_ops.greater_equal)
-@RegisterPForWithArgs("Less", math_ops.less)
+@RegisterPForWithArgs("Greater", math_ops.greater)
+@RegisterPForWithArgs("Igammac", math_ops.igammac)
+@RegisterPForWithArgs("IgammaGradA", math_ops.igamma_grad_a)
+@RegisterPForWithArgs("Igamma", math_ops.igamma)
+@RegisterPForWithArgs("Imag", math_ops.imag)
+@RegisterPForWithArgs("Invert", bitwise_ops.invert)
+@RegisterPForWithArgs("Inv", math_ops.inv)
+@RegisterPForWithArgs("IsFinite", math_ops.is_finite)
+@RegisterPForWithArgs("IsInf", math_ops.is_inf)
+@RegisterPForWithArgs("LeftShift", bitwise_ops.left_shift)
 @RegisterPForWithArgs("LessEqual", math_ops.less_equal)
-@RegisterPForWithArgs("LogicalOr", math_ops.logical_or)
+@RegisterPForWithArgs("Less", math_ops.less)
+@RegisterPForWithArgs("Lgamma", math_ops.lgamma)
+@RegisterPForWithArgs("Log1p", math_ops.log1p)
 @RegisterPForWithArgs("LogicalAnd", math_ops.logical_and)
 @RegisterPForWithArgs("LogicalNot", math_ops.logical_not)
+@RegisterPForWithArgs("LogicalOr", math_ops.logical_or)
 @RegisterPForWithArgs("LogicalXor", math_ops.logical_xor)
+@RegisterPForWithArgs("Log", math_ops.log)
 @RegisterPForWithArgs("Maximum", math_ops.maximum)
 @RegisterPForWithArgs("Minimum", math_ops.minimum)
+@RegisterPForWithArgs("Mod", math_ops.mod)
 @RegisterPForWithArgs("Mul", math_ops.multiply)
 @RegisterPForWithArgs("Neg", math_ops.negative)
+@RegisterPForWithArgs("NotEqual", math_ops.not_equal)
+@RegisterPForWithArgs("Polygamma", math_ops.polygamma)
+@RegisterPForWithArgs("Pow", math_ops.pow)
 @RegisterPForWithArgs("RealDiv", math_ops.divide)
+@RegisterPForWithArgs("Real", math_ops.real)
+@RegisterPForWithArgs("ReciprocalGrad", math_ops.reciprocal_grad)
+@RegisterPForWithArgs("Reciprocal", math_ops.reciprocal)
+@RegisterPForWithArgs("Relu6", nn_ops.relu6)
 @RegisterPForWithArgs("Relu", nn_ops.relu)
+@RegisterPForWithArgs("RightShift", bitwise_ops.right_shift)
+@RegisterPForWithArgs("Rint", math_ops.rint)
+@RegisterPForWithArgs("Round", math_ops.round)
+@RegisterPForWithArgs("RsqrtGrad", math_ops.rsqrt_grad)
+@RegisterPForWithArgs("Rsqrt", math_ops.rsqrt)
+@RegisterPForWithArgs("Selu", nn_ops.selu)
 @RegisterPForWithArgs("Sigmoid", math_ops.sigmoid)
+@RegisterPForWithArgs("Sign", math_ops.sign)
+@RegisterPForWithArgs("Sinh", math_ops.sinh)
+@RegisterPForWithArgs("Sin", math_ops.sin)
+@RegisterPForWithArgs("Softplus", nn_ops.softplus)
+@RegisterPForWithArgs("Softsign", nn_ops.softsign)
+@RegisterPForWithArgs("SqrtGrad", math_ops.sqrt_grad)
+@RegisterPForWithArgs("Sqrt", math_ops.sqrt)
+@RegisterPForWithArgs("SquaredDifference", math_ops.squared_difference)
 @RegisterPForWithArgs("Square", math_ops.square)
 @RegisterPForWithArgs("Sub", math_ops.subtract)
 @RegisterPForWithArgs("Tanh", math_ops.tanh)
+@RegisterPForWithArgs("Tan", math_ops.tan)
+@RegisterPForWithArgs("TruncateDiv", math_ops.truncate_div)
+@RegisterPForWithArgs("TruncateMod", math_ops.truncate_mod)
+@RegisterPForWithArgs("Zeta", math_ops.zeta)
 def _convert_cwise(pfor_input, op_type, op_func):
-  del op_type
+  # Note that ops handled here do not have attributes except "T" and "Tout", and
+  # hence don't need extra arguments passed to the cwise_op call below.
+  for attr in pfor_input.op.node_def.attr.keys():
+    assert attr in [u"T", u"Tout"], (op_type, attr)
   pfor_input.expanddim_inputs_for_broadcast()
   return wrap(op_func(*[x.t for x in pfor_input.inputs]), True)
 
 
+@RegisterPFor("ApproximateEqual")
+def _convert_approximate_equal(pfor_input):
+  pfor_input.expanddim_inputs_for_broadcast()
+  x = pfor_input.input(0)[0]
+  y = pfor_input.input(1)[0]
+  tolerance = pfor_input.get_attr("tolerance")
+  return wrap(math_ops.approximate_equal(x, y, tolerance=tolerance), True)
+
+
 @RegisterPFor("Shape")
 def _convert_shape(pfor_input):
   out_type = pfor_input.get_attr("out_type")
@@ -2009,10 +2087,14 @@ def _convert_biasaddgrad(pfor_input):
 
 # Some required ops are not exposed under the tf namespace. Hence relying on
 # _create_op to create them.
+@RegisterPForWithArgs("EluGrad")
+@RegisterPForWithArgs("Relu6Grad")
 @RegisterPForWithArgs("ReluGrad")
-@RegisterPForWithArgs("TanhGrad")
+@RegisterPForWithArgs("SeluGrad")
 @RegisterPForWithArgs("SigmoidGrad")
 @RegisterPForWithArgs("SoftplusGrad")
+@RegisterPForWithArgs("SoftsignGrad")
+@RegisterPForWithArgs("TanhGrad")
 def _convert_grads(pfor_input, op_type, *args, **kw_args):
   del args
   del kw_args
-- 
GitLab


From 9445d19a140561017992f0bf1364c9dc4733b7ca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 11:15:23 -0700
Subject: [PATCH 0207/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213012717
---
 .../internal/reference/reference_ops.h        | 140 ++++++++++++++----
 .../contrib/lite/kernels/internal/types.h     |   9 ++
 2 files changed, 119 insertions(+), 30 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 111adbf5b3..2d552909a8 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -4487,34 +4487,70 @@ void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data,
 }
 
 template <typename T>
-void Transpose(const T* input, const Dims<4>& input_dims, T* output,
-               const Dims<4>& output_dims, const int* permuted_axes) {
+void Transpose(const TransposeParams& params,
+               const RuntimeShape& unextended_input_shape, const T* input_data,
+               const RuntimeShape& unextended_output_shape, T* output_data) {
+  const int unextended_output_size = unextended_output_shape.DimensionsCount();
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_size, 4);
+  TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int input_ext_size = 4 - unextended_input_shape.DimensionsCount();
+  const int output_ext_size = 4 - unextended_output_size;
+
+  // The perm data is extended to match the output, each index incremented by
+  // the amount of front padding of the input shape.
+  int extended_perm[4];
+  for (int i = 0; i < output_ext_size; ++i) {
+    extended_perm[i] = i;
+  }
+  for (int i = 0; i < unextended_output_size; ++i) {
+    extended_perm[i + output_ext_size] = params.perm[i] + input_ext_size;
+  }
+
   int out_sizes[4];
   // Compute the inverse permutation array so we can do an output centered
   // transpose. Also, check to make sure output_dims is matching input_dims.
   for (int k = 0; k < 4; k++) {
-    out_sizes[k] =
-        MatchingArraySize(input_dims, permuted_axes[k], output_dims, k);
+    out_sizes[k] = MatchingDim(input_shape, extended_perm[k], output_shape, k);
   }
 
   // Naive transpose loop (iterate on output index and compute input index).
   int o[4];  // loop index (on output).
   int i[4];
   for (o[3] = 0; o[3] < out_sizes[3]; o[3]++) {
-    i[permuted_axes[3]] = o[3];
+    i[extended_perm[3]] = o[3];
     for (o[2] = 0; o[2] < out_sizes[2]; o[2]++) {
-      i[permuted_axes[2]] = o[2];
+      i[extended_perm[2]] = o[2];
       for (o[1] = 0; o[1] < out_sizes[1]; o[1]++) {
-        i[permuted_axes[1]] = o[1];
+        i[extended_perm[1]] = o[1];
         for (o[0] = 0; o[0] < out_sizes[0]; o[0]++) {
-          i[permuted_axes[0]] = o[0];
-          output[Offset(output_dims, o)] = input[Offset(input_dims, i)];
+          i[extended_perm[0]] = o[0];
+          output_data[Offset(output_shape, o)] =
+              input_data[Offset(input_shape, i)];
         }
       }
     }
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template <typename T>
+void Transpose(const T* input, const Dims<4>& input_dims, T* output,
+               const Dims<4>& output_dims, const int* permuted_axes) {
+  TransposeParams params;
+  params.perm_count = 4;
+  for (int i = 0; i < 4; ++i) {
+    params.perm[i] = 3 - permuted_axes[3 - i];
+  }
+  Transpose(params, DimsToShape(input_dims), input, DimsToShape(output_dims),
+            output);
+}
+
 inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           int stride_width, int stride_height, int pad_width,
@@ -4927,48 +4963,82 @@ TFLITE_COMPARISON_OP(Less);
 TFLITE_COMPARISON_OP(LessEqual);
 #undef TFLITE_COMPARISON_OP
 
+template <typename D, typename T>
+void Select(const RuntimeShape& input_condition_shape,
+            const D* input_condition_data, const RuntimeShape& input_x_shape,
+            const T* input_x_data, const RuntimeShape& input_y_shape,
+            const T* input_y_data, const RuntimeShape& output_shape,
+            T* output_data) {
+  const int64_t flatsize = MatchingFlatSize(
+      input_condition_shape, input_x_shape, input_y_shape, output_shape);
+  for (int64_t i = 0; i < flatsize; ++i) {
+    output_data[i] =
+        input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template <typename D, typename T>
 inline void Select(const D* input_condition_data,
                    const Dims<4>& input_condition_dims, const T* input_x_data,
                    const Dims<4>& input_x_dims, const T* input_y_data,
                    const Dims<4>& input_y_dims, T* output_data,
                    const Dims<4>& output_dims) {
-  const int64_t flatsize =
-      MatchingFlatSize(input_x_dims, input_y_dims, output_dims);
-  for (int64_t i = 0; i < flatsize; ++i) {
-    output_data[i] =
-        input_condition_data[i] ? input_x_data[i] : input_y_data[i];
-  }
+  Select(DimsToShape(input_condition_dims), input_condition_data,
+         DimsToShape(input_x_dims), input_x_data, DimsToShape(input_y_dims),
+         input_y_data, DimsToShape(output_dims), output_data);
 }
 
 template <typename D, typename T>
-inline void RankOneSelect(const D* input_condition_data,
-                          const Dims<4>& input_condition_dims,
-                          const T* input_x_data, const Dims<4>& input_x_dims,
-                          const T* input_y_data, const Dims<4>& input_y_dims,
-                          T* output_data, const Dims<4>& output_dims) {
-  const int64_t rank = MatchingArraySize(input_condition_dims, 0, input_x_dims,
-                                         3, input_y_dims, 3, output_dims, 3);
+void RankOneSelect(const RuntimeShape& input_condition_shape,
+                   const D* input_condition_data,
+                   const RuntimeShape& input_x_shape, const T* input_x_data,
+                   const RuntimeShape& input_y_shape, const T* input_y_data,
+                   const RuntimeShape& output_shape, T* output_data) {
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  TFLITE_DCHECK_EQ(
+      MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0),
+      outer_size);
   const int64_t inner_size =
-      MatchingFlatSizeSkipDim(input_x_dims, 3, input_y_dims, output_dims);
+      MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
 
   int64_t offset = 0;
-  for (int64_t i = 0; i < rank; i++) {
+  for (int64_t i = 0; i < outer_size; i++) {
     const T* input_data = input_condition_data[i] ? input_x_data : input_y_data;
     memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
     offset += inner_size;
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template <typename D, typename T>
+inline void RankOneSelect(const D* input_condition_data,
+                          const Dims<4>& input_condition_dims,
+                          const T* input_x_data, const Dims<4>& input_x_dims,
+                          const T* input_y_data, const Dims<4>& input_y_dims,
+                          T* output_data, const Dims<4>& output_dims) {
+  RankOneSelect(DimsToShape(input_condition_dims), input_condition_data,
+                DimsToShape(input_x_dims), input_x_data,
+                DimsToShape(input_y_dims), input_y_data,
+                DimsToShape(output_dims), output_data);
+}
+
 // For easy implementation, the indices is always a vector of size-4 vectors.
 template <typename T, typename TI>
 inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
-                          const T* values, T default_value, T* output_data,
-                          const Dims<4>& output_dims, bool value_is_scalar) {
+                          const T* values, T default_value,
+                          bool value_is_scalar,
+                          const RuntimeShape& unextended_output_shape,
+                          T* output_data) {
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
   const int value_count = indices.size();
 
   // First fill the output_data with default value.
-  const int num_elements = FlatSize(output_dims);
+  const int num_elements = output_shape.FlatSize();
   for (int i = 0; i < num_elements; ++i) {
     output_data[i] = default_value;
   }
@@ -4980,8 +5050,8 @@ inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
       const std::vector<TI>& index = indices[i];
       TFLITE_DCHECK_EQ(index.size(), 4);
       const T value = *values;  // just use the first value.
-      output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] =
-          value;
+      output_data[Offset(output_shape, index[0], index[1], index[2],
+                         index[3])] = value;
     }
     return;
   }
@@ -4991,11 +5061,21 @@ inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
     const std::vector<TI>& index = indices[i];
     TFLITE_DCHECK_EQ(index.size(), 4);
     const T value = values[i];
-    output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] =
+    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] =
         value;
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
+                          const T* values, T default_value, T* output_data,
+                          const Dims<4>& output_dims, bool value_is_scalar) {
+  SparseToDense(indices, values, default_value, value_is_scalar,
+                DimsToShape(output_dims), output_data);
+}
+
 template <typename T>
 inline void Pow(const RuntimeShape& input1_shape, const T* input1_data,
                 const RuntimeShape& input2_shape, const T* input2_data,
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index 87e8ff0346..fe84c1caca 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -361,6 +361,10 @@ inline int Offset(const Dims<4>& dims, int* index) {
   return Offset(dims, index[0], index[1], index[2], index[3]);
 }
 
+inline int Offset(const RuntimeShape& shape, int* index) {
+  return Offset(shape, index[0], index[1], index[2], index[3]);
+}
+
 // Get array size, DCHECKing that the dim index is in range.
 //
 // Note that this will be phased out with Dims<4>, since RuntimeShape::Dims()
@@ -936,6 +940,11 @@ struct TanhParams {
   int input_left_shift;
 };
 
+struct TransposeParams {
+  int8 perm_count;
+  int32 perm[4];
+};
+
 template <typename P>
 inline void SetActivationParams(float min, float max, P* params) {
   params->float_activation_min = min;
-- 
GitLab


From 89f9080ed0d1a43cb2fa253997b2553c6916f364 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Fri, 14 Sep 2018 11:23:46 -0700
Subject: [PATCH 0208/1357] [XLA] Support strength reducing bfloat16 dot
 products

There is no reason to limit ourselves to float32 dot product operations, we
simply convert to and from float32 around the reduction to simulate the
precision change.

PiperOrigin-RevId: 213014410
---
 .../xla/service/algebraic_simplifier.cc       | 53 ++++++++++++-------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 5458159d14..4ef1dffa73 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -745,12 +745,24 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   }
   const int64 rhs_kept_dim = 1 - rhs_collapsing_dim;
 
-  auto reshape_if_necessary = [&](HloInstruction* hlo) {
-    if (ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) {
+  auto as_type = [&](HloInstruction* hlo, const PrimitiveType element_type) {
+    if (hlo->shape().element_type() == element_type) {
       return hlo;
     }
-    return computation_->AddInstruction(
-        HloInstruction::CreateReshape(dot->shape(), hlo));
+    return computation_->AddInstruction(HloInstruction::CreateConvert(
+        ShapeUtil::ChangeElementType(hlo->shape(), element_type), hlo));
+  };
+
+  auto reshape_if_necessary = [&](HloInstruction* hlo) {
+    if (!ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) {
+      hlo = computation_->AddInstruction(
+          HloInstruction::CreateReshape(dot->shape(), hlo));
+    }
+    return as_type(hlo, dot->shape().element_type());
+  };
+
+  auto add_reduce_in_f32 = [&](HloInstruction* hlo, const int64 dim) {
+    return AddReduce(as_type(hlo, F32), dim);
   };
 
   auto broadcast_to_dim = [&](HloInstruction* hlo, const Shape& shape,
@@ -770,7 +782,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   if (ShapeUtil::Rank(rhs->shape()) == 1 &&
       ShapeUtil::Rank(lhs->shape()) == 1) {
     TF_RETURN_IF_ERROR(
-        ReplaceInstruction(dot, reshape_if_necessary(AddReduce(
+        ReplaceInstruction(dot, reshape_if_necessary(add_reduce_in_f32(
                                     multiply(Flatten(lhs), Flatten(rhs)), 0))));
     return true;
   }
@@ -804,17 +816,17 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
       (ShapeUtil::Rank(lhs->shape()) == 2 &&
        lhs->shape().dimensions(lhs_kept_dim) == 1)) {
     if (ShapeUtil::Rank(rhs->shape()) == 1) {
-      TF_RETURN_IF_ERROR(ReplaceInstruction(
-          dot,
-          reshape_if_necessary(AddReduce(multiply(Flatten(lhs), rhs), 0))));
+      TF_RETURN_IF_ERROR(
+          ReplaceInstruction(dot, reshape_if_necessary(add_reduce_in_f32(
+                                      multiply(Flatten(lhs), rhs), 0))));
       return true;
     }
     TF_RETURN_IF_ERROR(ReplaceInstruction(
-        dot, reshape_if_necessary(
-                 AddReduce(multiply(broadcast_to_dim(Flatten(lhs), rhs->shape(),
-                                                     rhs_collapsing_dim),
-                                    rhs),
-                           rhs_collapsing_dim))));
+        dot, reshape_if_necessary(add_reduce_in_f32(
+                 multiply(broadcast_to_dim(Flatten(lhs), rhs->shape(),
+                                           rhs_collapsing_dim),
+                          rhs),
+                 rhs_collapsing_dim))));
     return true;
   }
 
@@ -826,7 +838,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
       (ShapeUtil::Rank(rhs->shape()) == 2 &&
        rhs->shape().dimensions(rhs_kept_dim) == 1)) {
     TF_RETURN_IF_ERROR(ReplaceInstruction(
-        dot, reshape_if_necessary(AddReduce(
+        dot, reshape_if_necessary(add_reduce_in_f32(
                  multiply(lhs, broadcast_to_dim(Flatten(rhs), lhs->shape(),
                                                 lhs_collapsing_dim)),
                  lhs_collapsing_dim))));
@@ -1061,7 +1073,8 @@ StatusOr<HloInstruction*> AlgebraicSimplifierVisitor::OptimizeDotOfGather(
   const int m = left_operand->shape().dimensions(1 - lhs_contracting_dimension);
   const int n =
       right_operand->shape().dimensions(1 - rhs_contracting_dimension);
-  auto memoized_shape = ShapeUtil::MakeShape(F32, {m, n});
+  auto memoized_shape =
+      ShapeUtil::MakeShape(dot->shape().element_type(), {m, n});
   auto* memoized_inst = computation_->AddInstruction(
       HloInstruction::CreateDot(memoized_shape, left_operand, right_operand,
                                 dnums, dot->precision_config()));
@@ -1109,10 +1122,12 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   HloInstruction *lhs, *rhs;
   CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs))));
 
-  // Only optimize F32 dot operations where the dot, rhs and lhs are rank 2 or
-  // below.
-  if (dot->shape().element_type() != F32 || ShapeUtil::Rank(lhs->shape()) > 2 ||
-      ShapeUtil::Rank(rhs->shape()) > 2 || ShapeUtil::Rank(dot->shape()) > 2) {
+  // Only optimize F32 or BF16 dot operations where the dot, rhs and lhs are
+  // rank 2 or below.
+  if ((dot->shape().element_type() != F32 &&
+       dot->shape().element_type() != BF16) ||
+      ShapeUtil::Rank(lhs->shape()) > 2 || ShapeUtil::Rank(rhs->shape()) > 2 ||
+      ShapeUtil::Rank(dot->shape()) > 2) {
     return Status::OK();
   }
 
-- 
GitLab


From c20a7b81d79d30db9e990309ddb419bcb48120cc Mon Sep 17 00:00:00 2001
From: Piotr Padlewski <prazek@google.com>
Date: Fri, 14 Sep 2018 11:28:28 -0700
Subject: [PATCH 0209/1357] [tf.data] Introducing an optimization that
 parallelizes map transformations.

Stateless MapDatasets can be paralellized by switching to ParallelMapDataset. We set `num_parallel_calls` to 2 for now, but in the future a special value will be used that result in the optimal value to be selected dynamically at runtime.

This patch also exposed a memory leak which was fixed.

PiperOrigin-RevId: 213015223
---
 .../python/kernel_tests/optimization/BUILD    |  17 +++
 .../optimization/map_parallelization_test.py  |  84 ++++++++++++++
 tensorflow/core/BUILD                         |   2 +
 .../core/common_runtime/direct_session.cc     |   9 +-
 tensorflow/core/common_runtime/function.cc    |   5 +-
 .../core/distributed_runtime/graph_mgr.cc     |   8 +-
 tensorflow/core/framework/function.cc         |  13 +++
 tensorflow/core/framework/function_testlib.cc |  34 ++++++
 tensorflow/core/framework/function_testlib.h  |   3 +
 tensorflow/core/framework/op_kernel.cc        |  11 +-
 tensorflow/core/framework/op_segment.cc       |   8 ++
 tensorflow/core/framework/op_segment.h        |   4 +
 .../core/grappler/optimizers/data/BUILD       |  44 +++++++-
 .../optimizers/data/map_parallelization.cc    | 106 ++++++++++++++++++
 .../optimizers/data/map_parallelization.h     |  47 ++++++++
 .../data/map_parallelization_test.cc          |  94 ++++++++++++++++
 16 files changed, 461 insertions(+), 28 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_parallelization.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_parallelization.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc

diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
index 7e9ea68047..b3187bf61b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
@@ -73,6 +73,23 @@ py_test(
     ],
 )
 
+py_test(
+    name = "map_parallelization_test",
+    size = "small",
+    srcs = ["map_parallelization_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/data/python/ops:optimization",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 py_test(
     name = "model_dataset_op_test",
     size = "medium",
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
new file mode 100644
index 0000000000..dd547db086
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
@@ -0,0 +1,84 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the MapParallelization optimization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class MapParallelizationTest(test.TestCase, parameterized.TestCase):
+
+  @staticmethod
+  def map_functions():
+    identity = lambda x: x
+    increment = lambda x: x + 1
+
+    def assert_greater(x):
+      assert_op = control_flow_ops.Assert(math_ops.greater(x, -1), [x])
+      with ops.control_dependencies([assert_op]):
+        return x
+
+    def random(_):
+      return random_ops.random_uniform([],
+                                       minval=0,
+                                       maxval=10,
+                                       dtype=dtypes.int64,
+                                       seed=42)
+
+    def assert_with_random(x):
+      x = assert_greater(x)
+      return random(x)
+
+    return (("Identity", identity, True), ("Increment", increment, True),
+            ("AssertGreater", assert_greater, True), ("Random", random, False),
+            ("AssertWithRandom", assert_with_random, False))
+
+  @parameterized.named_parameters(*map_functions.__func__())
+  def testMapParallelization(self, function, should_optimize):
+    next_nodes = ["ParallelMap"] if should_optimize else ["Map"]
+    dataset = dataset_ops.Dataset.range(5).apply(
+        optimization.assert_next(next_nodes)).map(function).apply(
+            optimization.optimize(["map_parallelization"]))
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      for x in range(5):
+        result = sess.run(get_next)
+        # No need to run the pipeline if it was not optimized.  Also the results
+        # might be hard to check because of random.
+        if not should_optimize:
+          return
+        r = function(x)
+        self.assertAllEqual(r, result)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 1a86bff5cd..55715bb3a6 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1429,9 +1429,11 @@ cc_library(
         ":test",
         ":testlib_ops",
         "//tensorflow/cc:scope",
+        "//tensorflow/core/kernels:cast_op",
         "//tensorflow/core/kernels:constant_op",
         "//tensorflow/core/kernels:ops_testutil",
         "//tensorflow/core/kernels:ops_util",
+        "//tensorflow/core/kernels:random_ops",
     ],
 )
 
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index b4d8e285bd..af5d5b17e7 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -1202,14 +1202,11 @@ Status DirectSession::CreateExecutors(
     auto opseg = device->op_segment();
     params.create_kernel = [this, lib, opseg](const NodeDef& ndef,
                                               OpKernel** kernel) {
-      // We do not share the kernel via the OpSegment if the node is
-      // stateless, or a function.
       // NOTE(mrry): We must not share function kernels (implemented
       // using `CallOp`) between subgraphs, because `CallOp::handle_`
       // is tied to a particular subgraph. Even if the function itself
       // is stateful, the `CallOp` that invokes it is not.
-      if (!lib->IsStateful(ndef.op()) ||
-          lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) {
+      if (!OpSegment::ShouldOwnKernel(lib, ndef.op())) {
         return lib->CreateKernel(ndef, kernel);
       }
       auto create_fn = [lib, &ndef](OpKernel** kernel) {
@@ -1222,10 +1219,8 @@ Status DirectSession::CreateExecutors(
                                  create_fn);
     };
     params.delete_kernel = [lib](OpKernel* kernel) {
-      // If the node is stateful, opseg owns it. Otherwise, delete it.
-      if (kernel && !lib->IsStateful(kernel->type_string())) {
+      if (kernel && !OpSegment::ShouldOwnKernel(lib, kernel->type_string()))
         delete kernel;
-      }
     };
 
     optimizer.Optimize(lib, options_.env, device, &partition_graph,
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 1c9b69721d..472865ca43 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -414,9 +414,8 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(
       device_type, device_, device_->GetAllocator(AllocatorAttributes()), &ndef,
       &fbody->fdef.signature(), this, fbody->arg_types, input_memory_types,
       fbody->ret_types, output_memory_types, graph_def_version_, &s);
-  *kernel = new CallOp(handle, &construction);
-  if (!s.ok()) {
-    delete *kernel;
+  if (s.ok()) {
+    *kernel = new CallOp(handle, &construction);
   }
   return s;
 }
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index 6c146036ae..f7a2967d00 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -233,14 +233,11 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef,
     params.function_library = lib;
     params.create_kernel = [session, lib, opseg](const NodeDef& ndef,
                                                  OpKernel** kernel) {
-      // We do not share the kernel via the OpSegment if the node is
-      // stateless, or a function.
       // NOTE(mrry): We must not share function kernels (implemented
       // using `CallOp`) between subgraphs, because `CallOp::handle_`
       // is tied to a particular subgraph. Even if the function itself
       // is stateful, the `CallOp` that invokes it is not.
-      if (!lib->IsStateful(ndef.op()) ||
-          lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) {
+      if (!OpSegment::ShouldOwnKernel(lib, ndef.op())) {
         return lib->CreateKernel(ndef, kernel);
       }
       auto create_fn = [lib, &ndef](OpKernel** kernel) {
@@ -252,8 +249,7 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef,
       return opseg->FindOrCreate(session, ndef.name(), kernel, create_fn);
     };
     params.delete_kernel = [lib](OpKernel* kernel) {
-      // If the node is stateful, opseg owns it. Otherwise, delete it.
-      if (kernel && !lib->IsStateful(kernel->type_string())) {
+      if (kernel && !OpSegment::ShouldOwnKernel(lib, kernel->type_string())) {
         delete kernel;
       }
     };
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index d979353d2f..a17959a448 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -1294,6 +1294,18 @@ FunctionDef FunctionDefHelper::Create(
   for (const auto& r : ret_def) {
     fdef.mutable_ret()->insert({r.first, r.second});
   }
+
+  auto* op_def_registry = OpRegistry::Global();
+  // Check if any op is stateful.
+  for (const auto& n : node_def) {
+    const OpDef* op_def = nullptr;
+    auto status = op_def_registry->LookUpOpDef(n.op, &op_def);
+    // Lookup can fail if e.g. we are calling a function that was not yet
+    // defined.  If it happens, conservatively assume the op is stateful.
+    if (!status.ok() || op_def->is_stateful()) {
+      fdef.mutable_signature()->set_is_stateful(true);
+    }
+  }
   return fdef;
 }
 
@@ -1355,6 +1367,7 @@ FunctionDef FunctionDefHelper::Define(const string& name,
             strings::StrCat(src.ret[0], ":", o.first, ":", i - o.second.first);
       }
     }
+    if (op_def->is_stateful()) fdef.mutable_signature()->set_is_stateful(true);
   }
 
   // Returns
diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc
index c5a4f661d2..d5c203d276 100644
--- a/tensorflow/core/framework/function_testlib.cc
+++ b/tensorflow/core/framework/function_testlib.cc
@@ -91,6 +91,40 @@ FunctionDef IsZero() {
       });
 }
 
+FunctionDef RandomUniform() {
+  const Tensor kZero = test::AsScalar<int64>(0);
+  const Tensor kTen = test::AsScalar<int64>(10);
+
+  return FDH::Define(
+      // Name
+      "RandomUniform",
+      // Args
+      {"x: T"},
+      // Return values
+      {"random_uniform: int64"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      {{{"random_uniform/shape"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT64}}},
+       {{"random_uniform/min"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT64}}},
+       {{"random_uniform/max"},
+        "Const",
+        {},
+        {{"value", kTen}, {"dtype", DT_INT64}}},
+       {{"random_uniform"},
+        "RandomUniformInt",
+        {},
+        {{"T", DT_INT64},
+         {"Tout", DT_INT64},
+         {"seed", 87654321},
+         {"seed2", 42}}}});
+}
+
 FunctionDef XTimesTwo() {
   const Tensor kTwo = test::AsScalar<int64>(2);
   return FDH::Define(
diff --git a/tensorflow/core/framework/function_testlib.h b/tensorflow/core/framework/function_testlib.h
index ad61a76f16..a01743423b 100644
--- a/tensorflow/core/framework/function_testlib.h
+++ b/tensorflow/core/framework/function_testlib.h
@@ -84,6 +84,9 @@ FunctionDef NonZero();
 // x: T -> bool.
 FunctionDef IsZero();
 
+// x: T -> int64
+FunctionDef RandomUniform();
+
 // x:T, y:T -> y:T, x:T
 FunctionDef Swap();
 
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index c694e10193..80f2b12987 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -41,6 +41,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 
@@ -80,10 +81,8 @@ Status MatchSignatureHelper(const DataTypeSlice expected_inputs,
 
 // OpKernel ------------------------------------------------------------------
 
-// TODO(mrry): Convert to std::make_unique when available.
 OpKernel::OpKernel(OpKernelConstruction* context)
-    : OpKernel(context,
-               std::unique_ptr<const NodeDef>(new NodeDef(context->def()))) {}
+    : OpKernel(context, MakeUnique<const NodeDef>(context->def())) {}
 
 OpKernel::OpKernel(OpKernelConstruction* context,
                    std::unique_ptr<const NodeDef> node_def)
@@ -525,10 +524,8 @@ std::unique_ptr<Tensor> OpKernelContext::forward_input(
       return nullptr;
     }
   }
-  // TODO(rmlarsen): Use MakeUnique here. There is already a copy in
-  // tensorflow/compiler/xla/ptr_util.h. Perhaps this should be part of
-  // general cleanup of ownership in this code.
-  std::unique_ptr<Tensor> output_tensor(new Tensor());
+
+  auto output_tensor = MakeUnique<Tensor>();
   CHECK(output_tensor->CopyFrom(*input.tensor, output_shape));
   return output_tensor;
 }
diff --git a/tensorflow/core/framework/op_segment.cc b/tensorflow/core/framework/op_segment.cc
index dfc5aa7747..75ed4a4eaf 100644
--- a/tensorflow/core/framework/op_segment.cc
+++ b/tensorflow/core/framework/op_segment.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_segment.h"
 
+#include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -99,4 +100,11 @@ void OpSegment::RemoveHold(const string& session_handle) {
   delete item;
 }
 
+bool OpSegment::ShouldOwnKernel(FunctionLibraryRuntime* lib,
+                                const string& node_op) {
+  // OpSegment should not own kernel if the node is stateless, or a function.
+  return lib->IsStateful(node_op) &&
+         lib->GetFunctionLibraryDefinition()->Find(node_op) == nullptr;
+}
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/framework/op_segment.h b/tensorflow/core/framework/op_segment.h
index 4433a2554f..37d939ea2b 100644
--- a/tensorflow/core/framework/op_segment.h
+++ b/tensorflow/core/framework/op_segment.h
@@ -60,6 +60,10 @@ class OpSegment {
   Status FindOrCreate(const string& session_handle, const string& node_name,
                       OpKernel** kernel, CreateKernelFn create_fn);
 
+  // Returns true if OpSegment should own the kernel.
+  static bool ShouldOwnKernel(FunctionLibraryRuntime* lib,
+                              const string& node_op);
+
  private:
   // op name -> OpKernel
   typedef std::unordered_map<string, OpKernel*> KernelMap;
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 530c957068..e84df10778 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -19,7 +19,6 @@ cc_library(
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
-        "//tensorflow/core/kernels:cast_op",
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
@@ -56,8 +55,8 @@ cc_library(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
-        "//tensorflow/core/kernels:cast_op",
         "//tensorflow/core/kernels:functional_ops",
+        "//tensorflow/core/kernels:control_flow_ops",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
         "//tensorflow/core:lib_internal",
     ] + tf_protos_all(),
@@ -107,7 +106,6 @@ tf_cc_test(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
-        "//tensorflow/core/kernels:cast_op",
     ],
 )
 
@@ -164,7 +162,6 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/grappler:grappler_item",
-        "//tensorflow/core/kernels:cast_op",  # Must be linked for the testlib functions to work.
     ],
 )
 
@@ -256,7 +253,6 @@ cc_library(
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
-        "//tensorflow/core/kernels:cast_op",
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
@@ -275,6 +271,43 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/kernels:control_flow_ops",
+    ],
+)
+
+cc_library(
+    name = "map_parallelization",
+    srcs = ["map_parallelization.cc"],
+    hdrs = [
+        "map_parallelization.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_utils",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:op_types",
+        "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core/grappler/utils:topological_sort",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+    ] + tf_protos_all(),
+)
+
+tf_cc_test(
+    name = "map_parallelization_test",
+    srcs = ["map_parallelization_test.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_utils",
+        ":map_parallelization",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
     ],
 )
 
@@ -355,6 +388,7 @@ cc_library(
         ":map_and_batch_fusion",
         ":map_and_filter_fusion",
         ":map_fusion",
+        ":map_parallelization",
         ":map_vectorization",
         ":noop_elimination",
         ":shuffle_and_repeat_fusion",
diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization.cc b/tensorflow/core/grappler/optimizers/data/map_parallelization.cc
new file mode 100644
index 0000000000..305325e434
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_parallelization.cc
@@ -0,0 +1,106 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/map_parallelization.h"
+
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+#include "tensorflow/core/grappler/utils.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+bool CanParallelize(const FunctionDef& function,
+                    const FunctionLibraryDefinition& library) {
+  if (!function.signature().is_stateful()) return true;
+
+  for (const auto& node : function.node_def()) {
+    const OpDef* op_def;
+    TF_CHECK_OK(library.LookUpOpDef(node.op(), &op_def));
+    // Assert is marked as stateful, but it does not have any state (except
+    // changing io).  Similarly to CUDA, we do not give guarantee that the
+    // assert operation that would fail would be the first one, so that we can
+    // parallelize it.
+    if (op_def->is_stateful() && op_def->name() != "Assert") return false;
+  }
+
+  return true;
+}
+
+NodeDef MakeParallelMap(const NodeDef& map_node, MutableGraphView* graph) {
+  NodeDef parallel_map = map_node;
+  graph_utils::SetUniqueGraphNodeName("parallel_map", graph->GetGraph(),
+                                      &parallel_map);
+  parallel_map.set_op("ParallelMapDataset");
+  // TODO(b/114475558): We want to set `num_parallel_calls` to a special value,
+  // so that dynamic tunning will pick the optimal value at runtime. Because
+  // this feature is not yet implemented, we set it to 2, which is the smallest
+  // value that introduces parallelism.
+  auto* num_parallel_calls = graph_utils::AddScalarConstNode(2, graph);
+  parallel_map.add_input(num_parallel_calls->name());
+
+  return parallel_map;
+}
+
+}  // namespace
+
+Status MapParallelization::Optimize(Cluster* cluster, const GrapplerItem& item,
+                                    GraphDef* output) {
+  *output = item.graph;
+  MutableGraphView graph(output);
+  std::set<string> nodes_to_delete;
+  FunctionLibraryDefinition function_library(OpRegistry::Global(),
+                                             item.graph.library());
+  auto get_map_node = [](const NodeDef& node) -> const NodeDef* {
+    if (node.op() == "MapDataset") return &node;
+    return nullptr;
+  };
+
+  for (const NodeDef& node : item.graph.node()) {
+    const NodeDef* map_node = get_map_node(node);
+    if (!map_node) continue;
+
+    auto* function =
+        function_library.Find(map_node->attr().at("f").func().name());
+    if (!CanParallelize(*function, function_library)) continue;
+
+    auto* parallel_map = graph.AddNode(MakeParallelMap(*map_node, &graph));
+    graph.ReplaceInput(*map_node, *parallel_map);
+
+    // TODO(prazek): we could also remove map functions from library if they
+    // are not used anymore.
+    nodes_to_delete.insert(map_node->name());
+  }
+
+  graph.DeleteNodes(nodes_to_delete);
+  return Status::OK();
+}
+
+void MapParallelization::Feedback(Cluster* cluster, const GrapplerItem& item,
+                                  const GraphDef& optimize_output,
+                                  double result) {
+  // no-op
+}
+
+REGISTER_GRAPH_OPTIMIZER_AS(MapParallelization, "map_parallelization");
+
+}  // end namespace grappler
+}  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization.h b/tensorflow/core/grappler/optimizers/data/map_parallelization.h
new file mode 100644
index 0000000000..ac9cf7e12a
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_parallelization.h
@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_PARALLELIZATION_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_PARALLELIZATION_H_
+
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+
+namespace tensorflow {
+namespace grappler {
+
+// This optimization parallelizes MapDataset when function is stateless.
+class MapParallelization : public CustomGraphOptimizer {
+ public:
+  MapParallelization() = default;
+  ~MapParallelization() override = default;
+
+  string name() const override { return "map_parallelization"; };
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* output) override;
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimize_output, double result) override;
+};
+
+}  // end namespace grappler
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_PARALLELIZATION_H_
diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc b/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc
new file mode 100644
index 0000000000..b2a5d9b6af
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc
@@ -0,0 +1,94 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/map_parallelization.h"
+
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name) {
+  return test::function::NDef(
+      name, "MapDataset", {string(input_node_name)},
+      {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
+       {"Targuments", {}},
+       {"output_shapes", {}},
+       {"output_types", {}}});
+}
+
+const char stateless_fun_name[] = "XTimesTwo";
+const char stateful_fun_name[] = "RandomUniform";
+
+TEST(MapParallelizationTest, ParallelizeSimpleMap) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+       NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+       NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+       MakeMapNode("map1", "range", stateless_fun_name)},
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  MapParallelization optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp("ParallelMapDataset", output));
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map1", output));
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map2", output));
+}
+
+TEST(MapParallelization, ParallelizeAssert) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+       NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+       NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("filename", "Const", {}, {{"value", ""}, {"dtype", DT_STRING}}),
+       NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+       MakeMapNode("map1", "range", stateful_fun_name),
+       MakeMapNode("map2", "map1", stateless_fun_name),
+       NDef("cache", "CacheDataset", {"map2", "filename"}, {})},
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+          test::function::RandomUniform(),
+      });
+
+  MapParallelization optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp("ParallelMapDataset", output));
+  EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("map1", output));
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map2", output));
+}
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
-- 
GitLab


From 39f50af5634b8a4d2132b57bad2152308a0fd41c Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 14 Sep 2018 11:42:02 -0700
Subject: [PATCH 0210/1357] Improve output parsing for unsupported ops

PiperOrigin-RevId: 213017532
---
 .../contrib/lite/toco/import_tensorflow.cc    | 82 ++++++++++++-------
 .../lite/toco/import_tensorflow_test.cc       | 52 ++++++++++++
 2 files changed, 104 insertions(+), 30 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index efc1007925..2ccfd36b7c 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -69,6 +69,13 @@ bool HasAttr(const NodeDef& node, const string& attr_name) {
   return node.attr().count(attr_name) > 0;
 }
 
+bool HasWildcardDimension(const TensorShapeProto& shape) {
+  for (const auto& dim : shape.dim()) {
+    if (dim.size() == -1) return true;
+  }
+  return false;
+}
+
 const string& GetStringAttr(const NodeDef& node, const string& attr_name) {
   CHECK(HasAttr(node, attr_name));
   const auto& attr = node.attr().at(attr_name);
@@ -1054,15 +1061,27 @@ tensorflow::Status ConvertUnsupportedOperator(
       "_support_output_type_float_in_quantized_op";
 
   LOG(INFO) << "Converting unsupported operation: " << node.op();
+
   auto* op = new TensorFlowUnsupportedOperator;
+  op->tensorflow_op = node.op();
+  node.SerializeToString(&op->tensorflow_node_def);
+  model->operators.emplace_back(op);
+
+  // Parse inputs.
   const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
-  op->outputs.push_back(node.name());
-  op->tensorflow_op = node.op();
-  node.SerializeToString(&op->tensorflow_node_def);
-  model->operators.emplace_back(op);
+
+  // Parse outputs.
+  op->outputs.push_back(node.name());  // Implicit :0.
+  const tensorflow::OpDef* op_def = nullptr;
+  if (tensorflow::OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
+    for (int i = 1; i < op_def->output_arg_size(); ++i) {
+      op->outputs.push_back(absl::StrCat(node.name(), ":", i));
+    }
+  }
+
   // Parse if the op supports quantization
   if (HasAttr(node, kAttrOutputQuantized)) {
     op->quantized = GetBoolAttr(node, kAttrOutputQuantized);
@@ -1072,6 +1091,8 @@ tensorflow::Status ConvertUnsupportedOperator(
     op->support_output_type_float_in_quantized_op =
         GetBoolAttr(node, kAttrSupportOutputTypeFloatInQuantizedOp);
   }
+
+  // Parse output type(s).
   if (HasAttr(node, kAttrOutputTypes)) {
     const auto& output_types = GetListAttr(node, kAttrOutputTypes);
     for (int i = 0; i < output_types.type_size(); ++i) {
@@ -1080,33 +1101,40 @@ tensorflow::Status ConvertUnsupportedOperator(
   } else if (HasAttr(node, "Tout")) {
     const auto& output_type = GetDataTypeAttr(node, "Tout");
     op->output_data_types.push_back(ConvertDataType(output_type));
-  } else {
-    const tensorflow::OpDef* op_def = nullptr;
-    if (OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
-      for (const auto& output_arg : op_def->output_arg()) {
-        if (HasAttr(node, output_arg.type_attr())) {
-          op->output_data_types.push_back(
-              ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
-        } else {
-          LOG(INFO) << "Op node missing output type attribute: " << node.name();
-          op->output_data_types.clear();
-          break;
-        }
+  } else if (op_def != nullptr) {
+    for (const auto& output_arg : op_def->output_arg()) {
+      if (HasAttr(node, output_arg.type_attr())) {
+        op->output_data_types.push_back(
+            ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
+      } else {
+        LOG(INFO) << "Op node missing output type attribute: " << node.name();
+        op->output_data_types.clear();
+        break;
       }
     }
-    if (op->output_data_types.empty()) {
-      // TODO(b/113613439): Figure out how to propagate types for custom ops
-      // that have no OpDef.
-      LOG(INFO) << "Unable to determine output type for op: " << node.op();
-    }
+  } else {
+    // TODO(b/113613439): Figure out how to propagate types for custom ops
+    // that have no OpDef.
+    LOG(INFO) << "Unable to determine output type for op: " << node.op();
   }
+
+  // Parse output shape(s).
   if (HasAttr(node, kAttrOutputShapes)) {
     const auto& output_shapes = GetListAttr(node, kAttrOutputShapes);
     Shape output_shape;
     for (int i = 0; i < output_shapes.shape_size(); ++i) {
+      const auto& shape = output_shapes.shape(i);
+      // TOCO doesn't yet properly handle shapes with wildcard dimensions.
+      // TODO(b/113613439): Handle shape inference for unsupported ops that have
+      // shapes with wildcard dimensions.
+      if (HasWildcardDimension(shape)) {
+        LOG(INFO) << "Skipping wildcard output shape(s) for node: "
+                  << node.name();
+        op->output_shapes.clear();
+        break;
+      }
       const auto status =
-          ImportShape(output_shapes.shape(i).dim(), /*input_flat_size=*/nullptr,
-                      &output_shape);
+          ImportShape(shape.dim(), /*input_flat_size=*/nullptr, &output_shape);
       if (!status.ok()) {
         return status;
       }
@@ -1159,15 +1187,9 @@ tensorflow::Status ConvertPlaceholderOperator(
   if (node.attr().count("shape")) {
     const auto& shape = GetShapeAttr(node, "shape");
     auto num_dims = shape.dim_size();
-    bool has_wildcard = false;
-    for (std::size_t i = 0; i < num_dims; i++) {
-      if (shape.dim(i).size() == -1) {
-        has_wildcard = true;
-      }
-    }
     // TODO(b/62716978): This logic needs to be revisted.  During dims
     // refactoring it is an interim fix.
-    if (num_dims > 0 && !has_wildcard) {
+    if (num_dims > 0 && !HasWildcardDimension(shape)) {
       auto& dst_array_dims = *array.mutable_shape()->mutable_dims();
       dst_array_dims.resize(num_dims);
       for (std::size_t i = 0; i < num_dims; i++) {
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index da248826a7..8a236d4444 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -60,6 +60,28 @@ Status ImportNode(const NodeDef& node) {
   return ImportNode(node, &model);
 }
 
+NodeDef BuildNode(
+    const std::string& op,
+    const std::vector<std::initializer_list<int>>& output_shapes) {
+  NodeDef node;
+  node.set_op(op);
+  node.set_name("Node1");
+  node.add_input();
+  node.set_input(0, "Node0");
+
+  AttrValue::ListValue* shapes =
+      (*node.mutable_attr())["_output_shapes"].mutable_list();
+  for (const auto& output_shape : output_shapes) {
+    tensorflow::TensorShapeProto* shape = shapes->add_shape();
+    for (int64_t output_shape_dim : output_shape) {
+      auto shape_dim = shape->add_dim();
+      shape_dim->set_size(output_shape_dim);
+    }
+  }
+
+  return node;
+}
+
 class ShapeImportTest : public ::testing::TestWithParam<tensorflow::DataType> {
  protected:
   ShapeImportTest() {}
@@ -232,5 +254,35 @@ TEST(ImportTest, FailedTypeInference) {
   ASSERT_TRUE(op->output_data_types.empty());
 }
 
+TEST(ImportTest, UnsupportedOpWithOutputShapes) {
+  // Create an unsupported op with output shapes.
+  Model model;
+  EXPECT_TRUE(ImportNode(BuildNode("Atan", {{1, 2}, {2, 3}}), &model).ok());
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast<const TensorFlowUnsupportedOperator*>(
+          model.operators[0].get());
+
+  // The output shapes should be imported.
+  ASSERT_EQ(op->output_shapes.size(), 2);
+  ASSERT_THAT(op->output_shapes[0].dims(), ::testing::ElementsAre(1, 2));
+  ASSERT_THAT(op->output_shapes[1].dims(), ::testing::ElementsAre(2, 3));
+}
+
+TEST(ImportTest, UnsupportedOpWithWildcardOutputShapes) {
+  // Create an unsupported op with wildcard output shapes.
+  Model model;
+  EXPECT_TRUE(ImportNode(BuildNode("Atan", {{-1, 2}}), &model).ok());
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast<const TensorFlowUnsupportedOperator*>(
+          model.operators[0].get());
+
+  // Wildcard shapes aren't yet supported.
+  ASSERT_TRUE(op->output_shapes.empty());
+}
+
 }  // namespace
 }  // namespace toco
-- 
GitLab


From ba30af2c475ebd62ad7d75f056dba4f9d09030a8 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 14 Sep 2018 12:11:33 -0700
Subject: [PATCH 0211/1357] [TF:XLA] Bump open source llvm revision to r342210

PiperOrigin-RevId: 213022233
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 25698da1c9..4ca083c8a3 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -491,11 +491,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/738b5f5028ef39cbb023967f80fa2e5dd568556b.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/738b5f5028ef39cbb023967f80fa2e5dd568556b.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/ad72545325c087661feb3512efa54ebe5f888736.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/ad72545325c087661feb3512efa54ebe5f888736.tar.gz",
         ],
-        sha256 = "2bda8dd724ab432c162fb6eace259ccf8a97f13cb627336611bff68da2f33ec2",
-        strip_prefix = "llvm-738b5f5028ef39cbb023967f80fa2e5dd568556b",
+        sha256 = "66ed69443af00fbf9b912edbb6bc0fa796a12766b5e9ad504eb6b20f813dc163",
+        strip_prefix = "llvm-ad72545325c087661feb3512efa54ebe5f888736",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
     )
 
-- 
GitLab


From 8c2159a10e53e5301ae26c739a3d09fa53d3352e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 12:19:16 -0700
Subject: [PATCH 0212/1357] Updates to parameters, and to kernel helper
 functions.

PiperOrigin-RevId: 213023245
---
 .../lite/kernels/internal/optimized/optimized_ops.h      | 5 -----
 .../lite/kernels/internal/reference/reference_ops.h      | 5 -----
 tensorflow/contrib/lite/kernels/internal/types.h         | 9 ++++++++-
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 464207d739..8962d830a3 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -90,11 +90,6 @@ using reference_ops::Transpose;
 // Used mainly to convert from old-style shifts (right) to new-style (left).
 static constexpr int kReverseShift = -1;
 
-inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) {
-  return RuntimeShape(
-      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
-}
-
 // Make a local VectorMap typedef allowing to map a float array
 // as a Eigen vector expression. The std::conditional here is to
 // construct the suitable Eigen type for the constness of the
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 2d552909a8..77927af227 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -105,11 +105,6 @@ namespace reference_ops {
 // Used mainly to convert from old-style shifts (right) to new-style (left).
 static constexpr int kReverseShift = -1;
 
-inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) {
-  return RuntimeShape(
-      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
-}
-
 inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) {
   shape->BuildFrom(
       {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index fe84c1caca..f6636acc58 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -283,6 +283,12 @@ inline tflite::Dims<4> ToRuntimeDims(const tflite::RuntimeShape& array_shape) {
   return result;
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) {
+  return RuntimeShape(
+      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
+}
+
 // Gets next index to iterate through a multidimensional array.
 inline bool NextIndex(const int num_dims, const int* dims, int* current) {
   if (num_dims == 0) {
@@ -764,7 +770,8 @@ struct DepthToSpaceParams {
 struct DepthwiseParams {
   PaddingType padding_type;
   PaddingValues padding_values;
-  int16 stride;
+  int16 stride_width;
+  int16 stride_height;
   int16 depth_multiplier;
   // uint8 inference params.
   // TODO(b/65838351): Use smaller types if appropriate.
-- 
GitLab


From 7023196f46e92cb393dad03faff294b370dfd786 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 12:20:23 -0700
Subject: [PATCH 0213/1357] Automated rollback of commit
 5f28bab20d303e9f815bbe8611c24b7f751e6f9e

PiperOrigin-RevId: 213023382
---
 tensorflow/python/ops/math_ops.py | 34 +++++++++++++++----------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 7c59232e40..acd5a32e82 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2903,24 +2903,22 @@ def tensordot(a, b, axes, name=None):
         free_dims_static = None
       shape_a = array_ops.shape(a)
       rank_a = array_ops.rank(a)
-      # TODO(b/115583659): Automate this.
-      with ops.device("/cpu:0"):
-        axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
-        axes = cast(axes >= 0, dtypes.int32) * axes + cast(
-            axes < 0, dtypes.int32) * (
-                axes + rank_a)
-        free, _ = array_ops.setdiff1d(range(rank_a), axes)
-        free_dims = array_ops.gather(shape_a, free)
-        axes_dims = array_ops.gather(shape_a, axes)
-        prod_free_dims = reduce_prod(free_dims)
-        prod_axes_dims = reduce_prod(axes_dims)
-        perm = array_ops.concat([axes_dims, free_dims], 0)
-        if flipped:
-          perm = array_ops.concat([axes, free], 0)
-          new_shape = array_ops.stack([prod_axes_dims, prod_free_dims])
-        else:
-          perm = array_ops.concat([free, axes], 0)
-          new_shape = array_ops.stack([prod_free_dims, prod_axes_dims])
+      axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
+      axes = cast(axes >= 0, dtypes.int32) * axes + cast(
+          axes < 0, dtypes.int32) * (
+              axes + rank_a)
+      free, _ = array_ops.setdiff1d(range(rank_a), axes)
+      free_dims = array_ops.gather(shape_a, free)
+      axes_dims = array_ops.gather(shape_a, axes)
+      prod_free_dims = reduce_prod(free_dims)
+      prod_axes_dims = reduce_prod(axes_dims)
+      perm = array_ops.concat([axes_dims, free_dims], 0)
+      if flipped:
+        perm = array_ops.concat([axes, free], 0)
+        new_shape = array_ops.stack([prod_axes_dims, prod_free_dims])
+      else:
+        perm = array_ops.concat([free, axes], 0)
+        new_shape = array_ops.stack([prod_free_dims, prod_axes_dims])
       reshaped_a = array_ops.reshape(array_ops.transpose(a, perm), new_shape)
       return reshaped_a, free_dims, free_dims_static
 
-- 
GitLab


From cba65fbcecb828a3e6e7743f7e784c7d08d37ffb Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Fri, 14 Sep 2018 12:34:21 -0700
Subject: [PATCH 0214/1357] Define PreferBlockAccess enum to prepare for Eigen
 upgrade.

PiperOrigin-RevId: 213025676
---
 tensorflow/core/kernels/eigen_volume_patch.h | 1 +
 tensorflow/core/kernels/mirror_pad_op.h      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/eigen_volume_patch.h b/tensorflow/core/kernels/eigen_volume_patch.h
index a3d795813d..80ab745bfe 100644
--- a/tensorflow/core/kernels/eigen_volume_patch.h
+++ b/tensorflow/core/kernels/eigen_volume_patch.h
@@ -43,6 +43,7 @@ struct CustomTensorEvaluator {
     IsAligned = false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
+    PreferBlockAccess = false,
     Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = NumDims == 6,
     RawAccess = false
diff --git a/tensorflow/core/kernels/mirror_pad_op.h b/tensorflow/core/kernels/mirror_pad_op.h
index cc4b6941b9..62aa7d5c29 100644
--- a/tensorflow/core/kernels/mirror_pad_op.h
+++ b/tensorflow/core/kernels/mirror_pad_op.h
@@ -103,6 +103,7 @@ struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>,
     IsAligned = false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
+    PreferBlockAccess = false,
     Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = true,
     RawAccess = false
-- 
GitLab


From 9da83f0701bcece95372ee8da09f886dfd2fa2a1 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Fri, 14 Sep 2018 12:36:51 -0700
Subject: [PATCH 0215/1357] Make ReLU layer use nn.leaky_relu when appropriate.

PiperOrigin-RevId: 213026080
---
 tensorflow/python/keras/backend.py                   |  6 +++++-
 tensorflow/python/keras/backend_test.py              |  3 ++-
 .../python/keras/layers/advanced_activations.py      | 12 +++++-------
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 529b07dc12..5e1722ba20 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3459,14 +3459,18 @@ def relu(x, alpha=0., max_value=None, threshold=0):
   Returns:
       A tensor.
   """
-  clip_max = max_value is not None
 
   if alpha != 0.:
+    if max_value is None and threshold == 0:
+      return nn.leaky_relu(x, alpha=alpha)
+
     if threshold != 0:
       negative_part = nn.relu(-x + threshold)
     else:
       negative_part = nn.relu(-x)
 
+  clip_max = max_value is not None
+
   if threshold != 0:
     # computes x for x > threshold else 0
     x = x * math_ops.cast(math_ops.greater(x, threshold), floatx())
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 2f271c4f50..ab71589940 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -522,8 +522,9 @@ class BackendLinearAlgebraTest(test.TestCase):
       relu_op = keras.backend.relu(x)
       self.assertAllClose(keras.backend.eval(relu_op), [[0, 0], [2, 7]])
 
-      # alpha
+      # alpha (leaky relu used)
       relu_op = keras.backend.relu(x, alpha=0.5)
+      self.assertTrue('LeakyRelu' in relu_op.name)
       self.assertAllClose(keras.backend.eval(relu_op), [[-2, 0], [2, 7]])
 
       # max_value < some elements
diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 61ab69c16f..4ab786a184 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.keras import activations
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import initializers
@@ -268,7 +267,7 @@ class Softmax(Layer):
     self.axis = axis
 
   def call(self, inputs):
-    return activations.softmax(inputs, axis=self.axis)
+    return K.softmax(inputs, axis=self.axis)
 
   def get_config(self):
     config = {'axis': self.axis}
@@ -322,11 +321,10 @@ class ReLU(Layer):
   def call(self, inputs):
     # alpha is used for leaky relu slope in activations instead of
     # negative_slope.
-    return activations.relu(
-        inputs,
-        alpha=self.negative_slope,
-        max_value=self.max_value,
-        threshold=self.threshold)
+    return K.relu(inputs,
+                  alpha=self.negative_slope,
+                  max_value=self.max_value,
+                  threshold=self.threshold)
 
   def get_config(self):
     config = {
-- 
GitLab


From a9a1d5a673ad085777e6a8b14cbe39a427493e51 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 14 Sep 2018 12:44:31 -0700
Subject: [PATCH 0216/1357] Add --config=v2 option to the .bazelrc file.

PiperOrigin-RevId: 213027176
---
 configure.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure.py b/configure.py
index 52a513779e..e9d162fbd2 100644
--- a/configure.py
+++ b/configure.py
@@ -1572,6 +1572,9 @@ def main():
   if is_windows():
     set_windows_build_flags(environ_cp)
 
+  # Add a config option to build TensorFlow 2.0 API.
+  write_to_bazelrc('build:v2 --define=tf_api_version=2')
+
   if get_var(
       environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
       False,
-- 
GitLab


From 91fa9ad89589b7d20200bb19cf3c271d71fa3bdc Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Fri, 14 Sep 2018 12:52:57 -0700
Subject: [PATCH 0217/1357] Populate custom name in registration.

PiperOrigin-RevId: 213028338
---
 tensorflow/contrib/lite/mutable_op_resolver.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/lite/mutable_op_resolver.cc b/tensorflow/contrib/lite/mutable_op_resolver.cc
index 8ee63d2a02..d7c0181720 100644
--- a/tensorflow/contrib/lite/mutable_op_resolver.cc
+++ b/tensorflow/contrib/lite/mutable_op_resolver.cc
@@ -34,6 +34,7 @@ void MutableOpResolver::AddBuiltin(tflite::BuiltinOperator op,
                                    int min_version, int max_version) {
   for (int version = min_version; version <= max_version; ++version) {
     TfLiteRegistration new_registration = *registration;
+    new_registration.custom_name = nullptr;
     new_registration.builtin_code = op;
     new_registration.version = version;
     auto op_key = std::make_pair(op, version);
@@ -47,6 +48,7 @@ void MutableOpResolver::AddCustom(const char* name,
   for (int version = min_version; version <= max_version; ++version) {
     TfLiteRegistration new_registration = *registration;
     new_registration.builtin_code = BuiltinOperator_CUSTOM;
+    new_registration.custom_name = name;
     new_registration.version = version;
     auto op_key = std::make_pair(name, version);
     custom_ops_[op_key] = new_registration;
-- 
GitLab


From 0981b26dd4f5d1b9b3baaecbb61533a658a95c2a Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 14 Sep 2018 13:31:24 -0700
Subject: [PATCH 0218/1357] Disable the flaky test case in timeline_test

PiperOrigin-RevId: 213034078
---
 tensorflow/python/client/timeline_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py
index 03effde098..281d7f2e2b 100644
--- a/tensorflow/python/client/timeline_test.py
+++ b/tensorflow/python/client/timeline_test.py
@@ -134,7 +134,7 @@ class TimelineTest(test.TestCase):
     ctf = tl.generate_chrome_trace_format()
     self._validateTrace(ctf)
 
-  def testAnalysisAndAllocations(self):
+  def disabled_testAnalysisAndAllocations(self):
     run_options = config_pb2.RunOptions(
         trace_level=config_pb2.RunOptions.FULL_TRACE)
     run_metadata = config_pb2.RunMetadata()
-- 
GitLab


From f104b477ab22d5bc71afa757ec0cdeaca8666909 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 13:49:13 -0700
Subject: [PATCH 0219/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213037039
---
 .../internal/optimized/optimized_ops.h        | 326 ++++++++++++------
 1 file changed, 220 insertions(+), 106 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 8962d830a3..2fa5d6445e 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -81,6 +81,7 @@ using reference_ops::Select;
 using reference_ops::SpaceToBatchND;
 using reference_ops::Split;
 using reference_ops::StridedSlice;
+using reference_ops::TensorFlowSplit;
 using reference_ops::Transpose;
 
 // TODO(b/80247582) Remove this constant.
@@ -183,6 +184,15 @@ ArrayMap<Scalar> MapAsArrayWithFirstDimAsRows(Scalar* data,
   return ArrayMap<Scalar>(data, rows, cols);
 }
 
+template <typename Scalar>
+ArrayMap<Scalar> MapAsArrayWithLastDimAsRows(Scalar* data,
+                                             const RuntimeShape& shape) {
+  const int dims_count = shape.DimensionsCount();
+  const int rows = shape.Dims(dims_count - 1);
+  const int cols = FlatSizeSkipDim(shape, dims_count - 1);
+  return ArrayMap<Scalar>(data, rows, cols);
+}
+
 // Copied from tensorflow/core/framework/tensor_types.h
 template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex>
 struct TTypes {
@@ -3628,62 +3638,96 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
   }
 }
 
-inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
-                     const float* prev_activ_data,
-                     const Dims<4>& prev_activ_dims, const float* weights_data,
-                     const Dims<4>& weights_dims, const float* bias_data,
-                     const Dims<4>& bias_dims, const float* prev_state_data,
-                     const Dims<4>& prev_state_dims, float* output_state_data,
-                     const Dims<4>& output_state_dims, float* output_activ_data,
-                     const Dims<4>& output_activ_dims, float* concat_temp_data,
-                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
-                     const Dims<4>& activ_temp_dims) {
+inline void LstmCell(
+    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
+    const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
+    const float* prev_activ_data, const RuntimeShape& weights_shape,
+    const float* weights_data, const RuntimeShape& unextended_bias_shape,
+    const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
+    const float* prev_state_data,
+    const RuntimeShape& unextended_output_state_shape, float* output_state_data,
+    const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
+    const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
+    const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
   gemmlowp::ScopedProfilingLabel label("LstmCell");
-  MatchingArraySize(  // batches
-      input_dims, 3, prev_activ_dims, 3, prev_state_dims, 3, output_state_dims,
-      3, output_activ_dims, 3);
-  MatchingArraySize(  // height
-      input_dims, 2, prev_activ_dims, 2, prev_state_dims, 2, output_state_dims,
-      2, output_activ_dims, 2);
-  MatchingArraySize(  // width
-      input_dims, 1, prev_activ_dims, 1, prev_state_dims, 1, output_state_dims,
-      1, output_activ_dims, 1);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int prev_activ_depth = ArraySize(prev_activ_dims, 0);
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape =
+      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  MatchingDim(  // batches
+      input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+      output_state_shape, 0, output_activ_shape, 0);
+  MatchingDim(  // height
+      input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+      output_state_shape, 1, output_activ_shape, 1);
+  MatchingDim(  // width
+      input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+      output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
   const int total_input_depth = prev_activ_depth + input_depth;
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth);
-  TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3),
-                  1);
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
+                   total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
   const int intern_activ_depth =
-      MatchingArraySize(weights_dims, 1, bias_dims, 0);
-  TFLITE_CHECK_EQ(intern_activ_depth % 4, 0);
+      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
+                   intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
   const int output_depth =
-      MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0,
-                        output_state_dims, 0, output_activ_dims, 0);
-  TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4);
+      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                  3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
 
   // Concatenate prev_activ and input data together
   std::vector<float const*> concat_input_arrays_data;
-  std::vector<Dims<4> const*> concat_input_arrays_dims;
+  std::vector<RuntimeShape const*> concat_input_arrays_shapes;
   concat_input_arrays_data.push_back(input_data);
   concat_input_arrays_data.push_back(prev_activ_data);
-  concat_input_arrays_dims.push_back(&input_dims);
-  concat_input_arrays_dims.push_back(&prev_activ_dims);
-  Concatenation<FusedActivationFunctionType::kNone, float>(
-      0, &(concat_input_arrays_data[0]), &(concat_input_arrays_dims[0]),
-      concat_input_arrays_data.size(), concat_temp_data, concat_temp_dims);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]),
+                &(concat_input_arrays_data[0]), concat_temp_shape,
+                concat_temp_data);
 
   // Fully connected
-  FullyConnected<FusedActivationFunctionType::kNone>(
-      concat_temp_data, concat_temp_dims, weights_data, weights_dims, bias_data,
-      bias_dims, activ_temp_data, activ_temp_dims);
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
+                 weights_data, bias_shape, bias_data, activ_temp_shape,
+                 activ_temp_data);
 
   // Map raw arrays to Eigen arrays so we can use Eigen's optimized array
   // operations.
   ArrayMap<float> activ_temp_map =
-      MapAsArrayWithFirstDimAsRows(activ_temp_data, activ_temp_dims);
+      MapAsArrayWithLastDimAsRows(activ_temp_data, activ_temp_shape);
   auto input_gate_sm = activ_temp_map.block(0 * output_depth, 0, output_depth,
                                             activ_temp_map.cols());
   auto new_input_sm = activ_temp_map.block(1 * output_depth, 0, output_depth,
@@ -3693,11 +3737,11 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
   auto output_gate_sm = activ_temp_map.block(3 * output_depth, 0, output_depth,
                                              activ_temp_map.cols());
   ArrayMap<const float> prev_state_map =
-      MapAsArrayWithFirstDimAsRows(prev_state_data, prev_state_dims);
+      MapAsArrayWithLastDimAsRows(prev_state_data, prev_state_shape);
   ArrayMap<float> output_state_map =
-      MapAsArrayWithFirstDimAsRows(output_state_data, output_state_dims);
+      MapAsArrayWithLastDimAsRows(output_state_data, output_state_shape);
   ArrayMap<float> output_activ_map =
-      MapAsArrayWithFirstDimAsRows(output_activ_data, output_activ_dims);
+      MapAsArrayWithLastDimAsRows(output_activ_data, output_activ_shape);
 
   // Combined memory state and final output calculation
   gemmlowp::ScopedProfilingLabel label2("MemoryStateAndFinalOutput");
@@ -3711,56 +3755,120 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
       output_state_map.tanh();
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
+                     const float* prev_activ_data,
+                     const Dims<4>& prev_activ_dims, const float* weights_data,
+                     const Dims<4>& weights_dims, const float* bias_data,
+                     const Dims<4>& bias_dims, const float* prev_state_data,
+                     const Dims<4>& prev_state_dims, float* output_state_data,
+                     const Dims<4>& output_state_dims, float* output_activ_data,
+                     const Dims<4>& output_activ_dims, float* concat_temp_data,
+                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
+                     const Dims<4>& activ_temp_dims) {
+  tflite::LstmCellParams op_params;
+  // Float LSTM cell does not need parameters to be set: leave untouched.
+
+  LstmCell(op_params, DimsToShape(input_dims), input_data,
+           DimsToShape(prev_activ_dims), prev_activ_data,
+           DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims),
+           bias_data, DimsToShape(prev_state_dims), prev_state_data,
+           DimsToShape(output_state_dims), output_state_data,
+           DimsToShape(output_activ_dims), output_activ_data,
+           DimsToShape(concat_temp_dims), concat_temp_data,
+           DimsToShape(activ_temp_dims), activ_temp_data);
+}
+
 // Quantized LSTM cell. Currently just a copy of the reference impl in
 // reference_ops.h. See the big function comment there, not replicating it
 // here.
 template <int StateIntegerBits>
-void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
-              const uint8* prev_activ_data_uint8,
-              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
-              const Dims<4>& weights_dims, const int32* bias_data_int32,
-              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
-              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
-              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
-              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
-              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
-              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
-              int32 accum_multiplier, int accum_shift,
-              gemmlowp::GemmContext* gemm_context) {
+inline void LstmCell(
+    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
+    const uint8* input_data_uint8,
+    const RuntimeShape& unextended_prev_activ_shape,
+    const uint8* prev_activ_data_uint8, const RuntimeShape& weights_shape,
+    const uint8* weights_data_uint8, const RuntimeShape& unextended_bias_shape,
+    const int32* bias_data_int32,
+    const RuntimeShape& unextended_prev_state_shape,
+    const int16* prev_state_data_int16,
+    const RuntimeShape& unextended_output_state_shape,
+    int16* output_state_data_int16,
+    const RuntimeShape& unextended_output_activ_shape,
+    uint8* output_activ_data_uint8,
+    const RuntimeShape& unextended_concat_temp_shape,
+    uint8* concat_temp_data_uint8,
+    const RuntimeShape& unextended_activ_temp_shape,
+    int16* activ_temp_data_int16, gemmlowp::GemmContext* gemm_context) {
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
   gemmlowp::ScopedProfilingLabel label(
       "LstmCell/quantized (8bit external, 16bit internal)");
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape =
+      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
   // Gather dimensions information, and perform consistency checks.
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_dims, 0, prev_activ_dims, prev_state_dims,
-                              output_state_dims, output_activ_dims);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int prev_activ_depth = ArraySize(prev_activ_dims, 0);
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(
+      input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
+      output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
   const int total_input_depth = prev_activ_depth + input_depth;
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth);
-  TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3),
-                  1);
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
+                   total_input_depth);
   const int intern_activ_depth =
-      MatchingArraySize(weights_dims, 1, bias_dims, 0);
-  TFLITE_CHECK_EQ(intern_activ_depth % 4, 0);
+      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
+                   intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
   const int output_depth =
-      MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0,
-                        output_state_dims, 0, output_activ_dims, 0);
-  TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4);
-  const int fc_batches = FlatSizeSkipDim(activ_temp_dims, 0);
+      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                  3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
   const int fc_output_depth =
-      MatchingArraySize(weights_dims, 1, activ_temp_dims, 0);
-  const int fc_accum_depth = ArraySize(weights_dims, 0);
-  TFLITE_CHECK_EQ(fc_output_depth, 4 * output_depth);
+      MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
 
   // Depth-concatenate prev_activ and input data together.
   uint8 const* concat_input_arrays_data[2] = {input_data_uint8,
                                               prev_activ_data_uint8};
-  Dims<4> const* concat_input_arrays_dims[2] = {&input_dims, &prev_activ_dims};
-  Concatenation<FusedActivationFunctionType::kNone, uint8>(
-      0, concat_input_arrays_data, concat_input_arrays_dims, 2,
-      concat_temp_data_uint8, concat_temp_dims);
+  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
+                                                       &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes,
+                concat_input_arrays_data, concat_temp_shape,
+                concat_temp_data_uint8);
 
   // Implementation of the fully connected node inside the LSTM cell.
   // The operands are 8-bit integers, the accumulators are internally 32bit
@@ -3770,11 +3878,10 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
   bool gemm_already_performed = false;
 #ifdef GEMMLOWP_NEON
   if (fc_batches == 1 && !(fc_output_depth % 4) && !(fc_accum_depth % 8)) {
-    GEMVForLstmCell(DimsToShape(concat_temp_dims), concat_temp_data_uint8,
-                    DimsToShape(weights_dims), weights_data_uint8,
-                    weights_zero_point, DimsToShape(bias_dims), bias_data_int32,
-                    accum_multiplier, accum_shift, DimsToShape(activ_temp_dims),
-                    activ_temp_data_int16);
+    GEMVForLstmCell(concat_temp_shape, concat_temp_data_uint8, weights_shape,
+                    weights_data_uint8, weights_zero_point, bias_shape,
+                    bias_data_int32, accum_multiplier, accum_shift,
+                    activ_temp_shape, activ_temp_data_int16);
     gemm_already_performed = true;
   }
 #endif
@@ -3963,28 +4070,35 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
   }
 }
 
-template <FusedActivationFunctionType Ac, typename Scalar>
-void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims,
-                     int outputs_count, Scalar* const* output_data,
-                     const Dims<4>* const* output_dims) {
-  gemmlowp::ScopedProfilingLabel label("TensorFlowSplit");
-  TFLITE_DCHECK_GE(outputs_count, 1);
-  for (int i = 0; i < outputs_count; i++) {
-    MatchingFlatSizeSkipDim(*output_dims[i], 0, input_dims);
-  }
-  const int outer_size = FlatSizeSkipDim(input_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  // For now we don't have a model with a TensorFlowSplit
-  // with fused activation function.
-  TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone);
-  const Scalar* input_ptr = input_data;
-  for (int k = 0; k < outer_size; k++) {
-    for (int i = 0; i < outputs_count; ++i) {
-      memcpy(output_data[i] + k * output_dims[i]->sizes[0], input_ptr,
-             output_dims[i]->sizes[0] * sizeof(Scalar));
-      input_ptr += output_dims[i]->sizes[0];
-    }
-  }
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template <int StateIntegerBits>
+void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
+              const uint8* prev_activ_data_uint8,
+              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
+              const Dims<4>& weights_dims, const int32* bias_data_int32,
+              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
+              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
+              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
+              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
+              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
+              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
+              int32 accum_multiplier, int accum_shift,
+              gemmlowp::GemmContext* gemm_context) {
+  tflite::LstmCellParams op_params;
+  op_params.weights_zero_point = weights_zero_point;
+  op_params.accum_multiplier = accum_multiplier;
+  op_params.accum_shift = accum_shift;
+
+  LstmCell<StateIntegerBits>(
+      op_params, DimsToShape(input_dims), input_data_uint8,
+      DimsToShape(prev_activ_dims), prev_activ_data_uint8,
+      DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims),
+      bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16,
+      DimsToShape(output_state_dims), output_state_data_int16,
+      DimsToShape(output_activ_dims), output_activ_data_uint8,
+      DimsToShape(concat_temp_dims), concat_temp_data_uint8,
+      DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context);
 }
 
 inline int NodeOffset(int b, int h, int w, int height, int width) {
-- 
GitLab


From 19d66a950e2091bb598c6a2d375e14208f5773b2 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 14 Sep 2018 14:07:14 -0700
Subject: [PATCH 0220/1357] Disable flaky gpu_base_test

PiperOrigin-RevId: 213040362
---
 tensorflow/contrib/tensorrt/BUILD | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 9e8979bce4..4ea7216ef2 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -444,7 +444,6 @@ cuda_py_test(
 cuda_py_tests(
     name = "tf_trt_integration_test",
     srcs = [
-        "test/base_test.py",
         "test/batch_matmul_test.py",
         "test/biasadd_matmul_test.py",
         "test/binary_tensor_weight_broadcast_test.py",
@@ -471,6 +470,26 @@ cuda_py_tests(
     ],
 )
 
+cuda_py_tests(
+    name = "base_test",
+    srcs = [
+        "test/base_test.py",
+    ],
+    additional_deps = [
+        ":tf_trt_integration_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    tags = [
+        "manual",
+        "no_cuda_on_cpu_tap",
+        "no_gpu",
+        "no_windows",
+        "nomac",
+        "notap",
+    ],
+)
+
 cc_library(
     name = "utils",
     srcs = ["convert/utils.cc"],
-- 
GitLab


From b5594e6121e902f8dd2d5127653a1ec5f97daccd Mon Sep 17 00:00:00 2001
From: Mingsheng Hong <hongm@google.com>
Date: Fri, 14 Sep 2018 14:15:05 -0700
Subject: [PATCH 0221/1357] Added TFE_OpSetAttrTensor() to eager C API.

Also added some experimental C APIs for facilitate the use of eager C APIs in
S4TF compiler.

PiperOrigin-RevId: 213041780
---
 tensorflow/c/c_api_experimental.cc | 50 ++++++++++++++++++++++++++++++
 tensorflow/c/c_api_experimental.h  |  9 ++++++
 tensorflow/c/eager/c_api.cc        |  7 +++++
 tensorflow/c/eager/c_api.h         |  5 +++
 4 files changed, 71 insertions(+)

diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index c195c9e01c..3bcc62cf2d 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -8705,3 +8705,53 @@ TFE_TensorHandle* TFE_DequeueVariantTensor(TF_Session* session, int tensor_id,
 
   return createTFEDequeue(ctx, TF_VARIANT, queue, status);
 }
+
+static void CheckOk(TF_Status* status) {
+  CHECK_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+}
+
+void TFE_TensorHandlePrintDebugString(TFE_TensorHandle* handle) {
+  auto* status = TF_NewStatus();
+  TF_Tensor* t = TFE_TensorHandleResolve(handle, status);
+  CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  tensorflow::Tensor dst;
+  TF_CHECK_OK(TF_TensorToTensor(t, &dst));
+  LOG(INFO) << dst.DebugString();
+
+  TF_DeleteTensor(t);
+  TF_DeleteStatus(status);
+}
+
+TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx) {
+  // Intentionally LOG into INFO below for ease of debugging.
+  VLOG(1) << "TFE_RunConstOp called";
+
+  auto* status = TF_NewStatus();
+  auto* op = TFE_NewOp(ctx, "Const", status);
+  CheckOk(status);
+  TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
+
+  auto* tensor =
+      TF_AllocateTensor(TF_FLOAT, /*shape.data()*/ nullptr, /*shape.size()*/ 0,
+                        TF_DataTypeSize(TF_FLOAT) * 1);
+  auto* ptr = reinterpret_cast<char*>(TF_TensorData(tensor));
+  *reinterpret_cast<float*>(ptr) = 17.0;
+
+  TFE_OpSetAttrTensor(op, "value", tensor, status);
+  CheckOk(status);
+  TF_DeleteTensor(tensor);
+  VLOG(1) << "New op created";
+
+  TFE_TensorHandle* retval;
+  int num_retvals = 1;
+  TFE_Execute(op, &retval, &num_retvals, status);
+  CheckOk(status);
+  CHECK_EQ(num_retvals, 1);
+  VLOG(1) << "Op executed";
+
+  TFE_DeleteOp(op);
+  TF_DeleteStatus(status);
+
+  return retval;
+}
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index 522c91f67e..a3ca847d96 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -174,6 +174,15 @@ TF_CAPI_EXPORT extern void TFE_EnqueueVariantTensor(TF_Session* session,
 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueVariantTensor(
     TF_Session* session, int tensor_id, TF_Status* status);
 
+// Prints `handle` in a human readable format to standard output for debugging.
+TF_CAPI_EXPORT extern void TFE_TensorHandlePrintDebugString(
+    TFE_TensorHandle* handle);
+
+// Returns a const scalar tensor.
+// Caller owns both the input and the output tensor handles.
+// TODO: Remove this API with hard-coded tensor computation.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 349d9bcd7c..6f86ea80e5 100755
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -567,6 +567,13 @@ void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name,
   op->operation.MutableAttrs()->Set(attr_name, attr_value);
 }
 
+void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor,
+                         TF_Status* status) {
+  tensorflow::Tensor t;
+  status->status = TF_TensorToTensor(tensor, &t);
+  if (status->status.ok()) op->operation.MutableAttrs()->Set(attr_name, t);
+}
+
 void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name,
                              const void* const* values, const size_t* lengths,
                              int num_values) {
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index 337447eec9..a87d73ec8e 100755
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -311,6 +311,11 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunction(TFE_Op* op,
                                                  const char* attr_name,
                                                  const TFE_Op* value);
 
+TF_CAPI_EXPORT extern void TFE_OpSetAttrTensor(TFE_Op* op,
+                                               const char* attr_name,
+                                               TF_Tensor* tensor,
+                                               TF_Status* status);
+
 TF_CAPI_EXPORT extern void TFE_OpSetAttrStringList(TFE_Op* op,
                                                    const char* attr_name,
                                                    const void* const* values,
-- 
GitLab


From 84d8423bececc26f127a1c40c00588463d8d1650 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Fri, 14 Sep 2018 15:00:55 -0700
Subject: [PATCH 0222/1357] Generalize TransformFilter method in preparation of
 NHWC Conv support

PiperOrigin-RevId: 213049674
---
 .../fused_conv2d_bias_activation_op.cc        |  3 +-
 tensorflow/core/kernels/conv_2d.h             | 45 +++++++----
 .../core/kernels/conv_grad_filter_ops.cc      |  3 +-
 .../core/kernels/conv_grad_input_ops.cc       |  6 +-
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |  6 +-
 tensorflow/core/kernels/conv_ops.cc           |  7 +-
 tensorflow/core/kernels/conv_ops_3d.cc        |  6 +-
 tensorflow/core/kernels/conv_ops_gpu_3.cu.cc  | 81 ++++++++-----------
 8 files changed, 84 insertions(+), 73 deletions(-)

diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index 716bb87e38..e9e6464d06 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -497,7 +497,8 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
                                 FORMAT_OIHW, filter_param.shape(), FORMAT_HWIO),
                             &maybe_transformed_filter));
     functor::TransformFilter<GPUDevice, T, int, 4>()(
-        ctx->eigen_device<GPUDevice>(), To32Bit(filter_param.tensor<T, 4>()),
+        ctx->eigen_device<GPUDevice>(), FORMAT_OIHW,
+        To32Bit(filter_param.tensor<T, 4>()),
         To32Bit(maybe_transformed_filter.tensor<T, 4>()));
     filter = &maybe_transformed_filter;
   }
diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h
index de9b69828e..639c3062cc 100644
--- a/tensorflow/core/kernels/conv_2d.h
+++ b/tensorflow/core/kernels/conv_2d.h
@@ -137,17 +137,16 @@ struct MatMulConvFunctor {
   }
 };
 
-// Shuffles a filter tensor from:
-//   [<spatial_dims>, in, out]
-// to:
-//   [out, in, <spatial_dims>]
+// Shuffles a filter tensor from TensorFlow format HWIO to dst_filter_format.
+//
+// Note: Currently OIHW is the only supported destination format. Support for
+// OHWI format will be added in a follow-up change.
 template <typename Device, typename T, typename IndexType, int NDIMS>
 struct TransformFilter {
-  void operator()(const Device& d,
+  void operator()(const Device& d, FilterTensorFormat dst_filter_format,
                   typename TTypes<T, NDIMS, IndexType>::ConstTensor in,
                   typename TTypes<T, NDIMS, IndexType>::Tensor out) {
-    // We want a 3, 2, 0, 1 shuffle. Merge the spatial dimensions together
-    // to speed up the shuffle operation.
+    // Merge the spatial dimensions together to speed up the shuffle operation.
     Eigen::DSizes<IndexType, 3> merged_dims;
     merged_dims[0] = in.dimension(0);  // spatial dimensions
     for (int i = 1; i < NDIMS - 2; ++i) {
@@ -156,16 +155,30 @@ struct TransformFilter {
     merged_dims[1] = in.dimension(NDIMS - 2);  // input filters
     merged_dims[2] = in.dimension(NDIMS - 1);  // output filters
 
+    CHECK(dst_filter_format == FORMAT_OIHW)
+        << "Unsupported destination filter format: "
+        << ToString(dst_filter_format);
+    // Source filter format is FORMAT_HWIO and spatial dimensions HW are merged
+    // in the beginning.
+    Eigen::DSizes<IndexType, 3> shuffling_perm =
+        Eigen::DSizes<IndexType, 3>(2, 1, 0);
+
     Eigen::DSizes<IndexType, NDIMS> expanded_dims;
-    expanded_dims[0] = in.dimension(NDIMS - 1);  // output filters
-    expanded_dims[1] = in.dimension(NDIMS - 2);  // input filters
-    for (int i = 0; i < NDIMS - 2; ++i) {        // spatial dimensions
-      expanded_dims[i + 2] = in.dimension(i);
+    int out_index = 0;
+    for (int merged_dim = 0; merged_dim < merged_dims.rank(); ++merged_dim) {
+      if (shuffling_perm[merged_dim] == 0) {
+        for (int spatial_dim = 0; spatial_dim < NDIMS - 2; ++spatial_dim) {
+          expanded_dims[out_index++] = in.dimension(spatial_dim);
+        }
+      } else {
+        constexpr int kLastSpatialDim = NDIMS - 3;
+        expanded_dims[out_index++] =
+            in.dimension(kLastSpatialDim + shuffling_perm[merged_dim]);
+      }
     }
 
-    out.device(d) = in.reshape(merged_dims)
-                        .shuffle(Eigen::DSizes<IndexType, 3>(2, 1, 0))
-                        .reshape(expanded_dims);
+    out.device(d) =
+        in.reshape(merged_dims).shuffle(shuffling_perm).reshape(expanded_dims);
   }
 };
 
@@ -282,7 +295,9 @@ struct SwapDimension0And2InTensor3 {
                   const gtl::ArraySlice<int64>& input_dims, T* out);
 };
 
-// Reverses the effect of TransformFilter above.
+// Transforms back filter from OIHW to HWOI format to reverse effect of
+// TransformFilter above.
+// TODO(hinsu): Support reverse transformation from filter format OHWI as well.
 template <typename Device, typename T, int NDIMS>
 struct ReverseTransformFilter {
   void operator()(const Device& d, typename TTypes<T, NDIMS>::ConstTensor in,
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index 63b1bcda43..9e86a16b66 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -1018,7 +1018,8 @@ namespace functor {
   extern template struct InflatePadAndShuffle<GPUDevice, T, 4, int>;     \
   template <>                                                            \
   void TransformFilter<GPUDevice, T, int, 4>::operator()(                \
-      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,    \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,          \
+      typename TTypes<T, 4, int>::ConstTensor in,                        \
       typename TTypes<T, 4, int>::Tensor out);                           \
   extern template struct TransformFilter<GPUDevice, T, int, 4>;          \
   template <>                                                            \
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index d664a11e73..43bb5ea56c 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -901,7 +901,8 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
                               &transformed_filter));
 
   functor::TransformFilter<GPUDevice, T, int, 4>()(
-      ctx->eigen_device<GPUDevice>(), To32Bit(filter.tensor<T, 4>()),
+      ctx->eigen_device<GPUDevice>(), FORMAT_OIHW,
+      To32Bit(filter.tensor<T, 4>()),
       To32Bit(transformed_filter.tensor<T, 4>()));
 
   Tensor transformed_out_backprop;
@@ -1090,7 +1091,8 @@ namespace functor {
   extern template struct InflatePadAndShuffle<GPUDevice, T, 4, int>;     \
   template <>                                                            \
   void TransformFilter<GPUDevice, T, int, 4>::operator()(                \
-      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,    \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,          \
+      typename TTypes<T, 4, int>::ConstTensor in,                        \
       typename TTypes<T, 4, int>::Tensor out);                           \
   extern template struct TransformFilter<GPUDevice, T, int, 4>;          \
   template <>                                                            \
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index d26b86c712..bab91f5e86 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -1054,7 +1054,8 @@ namespace functor {
 #define DECLARE_GPU_SPEC(T)                                           \
   template <>                                                         \
   void TransformFilter<GPUDevice, T, int, 5>::operator()(             \
-      const GPUDevice& d, typename TTypes<T, 5, int>::ConstTensor in, \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,       \
+      typename TTypes<T, 5, int>::ConstTensor in,                     \
       typename TTypes<T, 5, int>::Tensor out);                        \
   template <>                                                         \
   void ReverseTransformFilter<GPUDevice, T, 5>::operator()(           \
@@ -1287,7 +1288,8 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
                          dims.filter_size(1), dims.filter_size(2)}),
             &transformed_filter));
     functor::TransformFilter<GPUDevice, T, int, 5>()(
-        context->eigen_device<GPUDevice>(), To32Bit(filter.tensor<T, 5>()),
+        context->eigen_device<GPUDevice>(), FORMAT_OIHW,
+        To32Bit(filter.tensor<T, 5>()),
         To32Bit(transformed_filter.tensor<T, 5>()));
 
     // Shape: batch, filters, z, y, x.
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index ef692418d6..6f5c8d8461 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -680,9 +680,9 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
                           TensorShape({filter.dim_size(3), filter.dim_size(2),
                                        filter.dim_size(0), filter.dim_size(1)}),
                           &transformed_filter));
-
   functor::TransformFilter<GPUDevice, T, int, 4>()(
-      ctx->eigen_device<GPUDevice>(), To32Bit(filter.tensor<T, 4>()),
+      ctx->eigen_device<GPUDevice>(), FORMAT_OIHW,
+      To32Bit(filter.tensor<T, 4>()),
       To32Bit(transformed_filter.tensor<T, 4>()));
 
   Tensor transformed_output;
@@ -823,7 +823,8 @@ namespace functor {
   extern template struct MatMulConvFunctor<GPUDevice, T>;                    \
   template <>                                                                \
   void TransformFilter<GPUDevice, T, int, 4>::operator()(                    \
-      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,        \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,              \
+      typename TTypes<T, 4, int>::ConstTensor in,                            \
       typename TTypes<T, 4, int>::Tensor out);                               \
   extern template struct TransformFilter<GPUDevice, T, int, 4>;              \
   template <>                                                                \
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index a1eed4e68c..5c2b88924b 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -386,7 +386,8 @@ struct LaunchConvOp<GPUDevice, T> {
     // filter: [x, y, z, in, out]
     // t_filter: [out, in, x, y, z]
     functor::TransformFilter<GPUDevice, T, int, 5>()(
-        ctx->eigen_device<GPUDevice>(), To32Bit(filter.tensor<T, 5>()),
+        ctx->eigen_device<GPUDevice>(), FORMAT_OIHW,
+        To32Bit(filter.tensor<T, 5>()),
         To32Bit(transformed_filter.tensor<T, 5>()));
 
     Tensor transformed_output;
@@ -514,7 +515,8 @@ namespace functor {
 #define DECLARE_GPU_SPEC(T)                                           \
   template <>                                                         \
   void TransformFilter<GPUDevice, T, int, 5>::operator()(             \
-      const GPUDevice& d, typename TTypes<T, 5, int>::ConstTensor in, \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,       \
+      typename TTypes<T, 5, int>::ConstTensor in,                     \
       typename TTypes<T, 5, int>::Tensor out);                        \
   template <>                                                         \
   void ReverseTransformFilter<GPUDevice, T, 5>::operator()(           \
diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
index a5fa48f85e..46167db3a2 100644
--- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
+++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
@@ -170,51 +170,33 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index<IndexCount> FlatToTensorIndex(
   return tensor_index;
 }
 
-// A Cuda custom kernel that swaps dimension-0 and dimension-2 of a 3D tensor.
-template <typename T, bool conjugate = false>
-__global__ void SwapDimension0And2InTensor3Simple(int nthreads, const T* input,
-                                                  Dimension<3> input_dims,
-                                                  T* output) {
-  Dimension<3> output_dims;
-  output_dims[0] = input_dims[2];
-  output_dims[1] = input_dims[1];
-  output_dims[2] = input_dims[0];
-
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    int output_index = index;
-
-    Index<3> output_tensor_index = FlatToTensorIndex(output_index, output_dims);
-
-    Index<3> input_tensor_index;
-    input_tensor_index[0] = output_tensor_index[2];
-    input_tensor_index[1] = output_tensor_index[1];
-    input_tensor_index[2] = output_tensor_index[0];
-
-    int input_index = TensorIndexToFlat(input_tensor_index, input_dims);
-
-    output[output_index] =
-        maybe_conj<T, conjugate>::run(ldg(input + input_index));
-  }
-}
-
-// A Cuda custom kernel that swaps dimension-1 and dimension-2 of a 3D tensor.
-template <typename T, bool conjugate = false>
-__global__ void SwapDimension1And2InTensor3Simple(int nthreads, const T* input,
-                                                  Dimension<3> input_dims,
-                                                  T* output) {
+// A simple CUDA custom kernel to shuffle dimensions of a 3D tensor according to
+// the given shuffle permutation in template parameters. Shuffle permutation
+// <sp0, sp1, sp2> shuffles dimensions such that input dimension 0 goes to sp0,
+// 1 goes to sp1 and 2 goes to sp2. For example, shuffle permutation <2, 0, 1>
+// will populate output so that input[x][y][z] is equal to (*output)[y][z][x].
+//
+// Requires that nthreads is equal to the total number of elements in the input
+// tensor.
+template <typename T, int sp0, int sp1, int sp2, bool conjugate = false>
+__global__ void ShuffleInTensor3Simple(int nthreads, const T* input,
+                                       Dimension<3> input_dims, T* output) {
   Dimension<3> output_dims;
-  output_dims[0] = input_dims[0];
-  output_dims[1] = input_dims[2];
-  output_dims[2] = input_dims[1];
-
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    int output_index = index;
+  output_dims[sp0] = input_dims[0];
+  output_dims[sp1] = input_dims[1];
+  output_dims[sp2] = input_dims[2];
+
+  // Iterate over output as opposed to iterating over input for better
+  // performance. Iterating over output will generate sequential writes and
+  // random reads that performs better compared to sequential reads and random
+  // writes.
+  CUDA_1D_KERNEL_LOOP(output_index, nthreads) {
     Index<3> output_tensor_index = FlatToTensorIndex(output_index, output_dims);
 
     Index<3> input_tensor_index;
-    input_tensor_index[0] = output_tensor_index[0];
-    input_tensor_index[1] = output_tensor_index[2];
-    input_tensor_index[2] = output_tensor_index[1];
+    input_tensor_index[0] = output_tensor_index[sp0];
+    input_tensor_index[1] = output_tensor_index[sp1];
+    input_tensor_index[2] = output_tensor_index[sp2];
 
     int input_index = TensorIndexToFlat(input_tensor_index, input_dims);
 
@@ -439,7 +421,7 @@ __global__ void PadInputCustomKernelNCHW(int nthreads, const T* input,
 template <typename T, int NDIMS>
 struct TransformFilter<GPUDevice, T, int, NDIMS> {
   typedef GPUDevice Device;
-  void operator()(const Device& d,
+  void operator()(const Device& d, FilterTensorFormat dst_filter_format,
                   typename TTypes<T, NDIMS, int>::ConstTensor in,
                   typename TTypes<T, NDIMS, int>::Tensor out) {
     Dimension<3> combined_dims;
@@ -450,13 +432,18 @@ struct TransformFilter<GPUDevice, T, int, NDIMS> {
     combined_dims[1] = in.dimension(NDIMS - 2);  // input filters
     combined_dims[2] = in.dimension(NDIMS - 1);  // output filters
     CudaLaunchConfig config = GetCudaLaunchConfig(out.size(), d);
-    SwapDimension0And2InTensor3Simple<T>
+
+    CHECK(dst_filter_format == FORMAT_OIHW)
+        << "Unsupported output layout: " << ToString(dst_filter_format);
+
+    ShuffleInTensor3Simple<T, 2, 1, 0>
         <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
             config.virtual_thread_count, in.data(), combined_dims, out.data());
   }
 };
 
-// Converts Cudnn filter format back to TensorFlow filter format.
+// Converts Cudnn filter format OIHW back to TensorFlow filter format HWIO.
+// TODO(hinsu): Support reverse transformation from filter format OHWI as well.
 template <typename T, int NDIMS>
 struct ReverseTransformFilter<GPUDevice, T, NDIMS> {
   typedef GPUDevice Device;
@@ -470,7 +457,7 @@ struct ReverseTransformFilter<GPUDevice, T, NDIMS> {
       combined_dims[2] *= in.dimension(i);
     }
     CudaLaunchConfig config = GetCudaLaunchConfig(out.size(), d);
-    SwapDimension0And2InTensor3Simple<T>
+    ShuffleInTensor3Simple<T, 2, 1, 0>
         <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
             config.virtual_thread_count, in.data(), combined_dims, out.data());
   }
@@ -937,7 +924,7 @@ void RunSwapDimension1And2InTensor3(const GPUDevice& d, const T* input,
   } else {
     int total_element_count = input_dims[0] * input_dims[1] * input_dims[2];
     CudaLaunchConfig config = GetCudaLaunchConfig(total_element_count, d);
-    SwapDimension1And2InTensor3Simple<T, conjugate>
+    ShuffleInTensor3Simple<T, 0, 2, 1, conjugate>
         <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
             config.virtual_thread_count, input, input_dims, output);
   }
@@ -969,7 +956,7 @@ struct SwapDimension0And2InTensor3<GPUDevice, T, conjugate> {
                                static_cast<int>(combined_dims[2])};
     size_t total_size = combined_dims[0] * combined_dims[1] * combined_dims[2];
     CudaLaunchConfig config = GetCudaLaunchConfig(total_size, d);
-    SwapDimension0And2InTensor3Simple<T, conjugate>
+    ShuffleInTensor3Simple<T, 2, 1, 0, conjugate>
         <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
             config.virtual_thread_count, in, input_dims, out);
   }
-- 
GitLab


From ceb72bcdbf90fd23204b26f8e43afbd3c0a46563 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 14 Sep 2018 15:16:09 -0700
Subject: [PATCH 0223/1357] [TF:XLA] Remove special base case from BatchDot
 that has been redundant ever since xla::DotGeneral was added.

PiperOrigin-RevId: 213052269
---
 tensorflow/compiler/tf2xla/lib/batch_dot.cc | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.cc b/tensorflow/compiler/tf2xla/lib/batch_dot.cc
index 64f2d781a6..5400e8834c 100644
--- a/tensorflow/compiler/tf2xla/lib/batch_dot.cc
+++ b/tensorflow/compiler/tf2xla/lib/batch_dot.cc
@@ -100,16 +100,6 @@ xla::XlaOp BatchDot(xla::XlaOp x, xla::XlaOp y, bool transpose_x,
     precision_proto.add_operand_precision(precision);
     precision_proto.add_operand_precision(precision);
 
-    // If there are no batch dimensions, use a regular Dot.
-    // TODO(b/69062148) Remove this code when Dot emitters can be passed
-    // dimensions to transpose directly (i.e. without requiring a Transpose
-    // HLO).
-    if (batch_dimension_numbers.empty()) {
-      auto lhs = transpose_x ? xla::Transpose(x, {1, 0}) : x;
-      auto rhs = transpose_y ? xla::Transpose(y, {1, 0}) : y;
-      return xla::Dot(lhs, rhs, &precision_proto);
-    }
-
     xla::DotDimensionNumbers dot_dnums;
     dot_dnums.add_lhs_contracting_dimensions(x_inner_dim);
     dot_dnums.add_rhs_contracting_dimensions(y_inner_dim);
-- 
GitLab


From 98342d8bea440c960a3a08bf3f27df737b2b2b11 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 14 Sep 2018 15:23:31 -0700
Subject: [PATCH 0224/1357] Disable flaky keras_test.

PiperOrigin-RevId: 213053512
---
 tensorflow/contrib/distribute/python/BUILD | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index aaecbb0eb1..f72b827e04 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -731,9 +731,12 @@ cuda_py_test(
         ":keras_test_lib",
     ],
     tags = [
+        "manual",
         "multi_and_single_gpu",
+        "no_gpu",
         "no_pip",
         "no_windows_gpu",
+        "notap",
         "notsan",
     ],
 )
-- 
GitLab


From 9eba75e54e87aa00efae482c69797794d7020950 Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Fri, 14 Sep 2018 16:08:40 -0700
Subject: [PATCH 0225/1357] Refactored some of the metrics code in compile
 function for better readability. - Logic change: Moved getting metric name
 and function out of the training/eval loops in eager mode - Moved setting
 metric attributes on the model out the function which calls metric functions.

PiperOrigin-RevId: 213060143
---
 tensorflow/python/keras/engine/training.py    | 233 +++++++++---------
 .../python/keras/engine/training_utils.py     |  64 ++++-
 2 files changed, 172 insertions(+), 125 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index fed07c4120..dc464c02b6 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -145,32 +145,34 @@ class Model(Network):
         if i not in skip_target_weighing_indices
     ]
 
-  def _get_metric_name(self, metric, output_index, weighted=False):
-    """Returns the metric name corresponding to the given metric input.
+  def _cache_output_metric_attributes(self, metrics, weighted_metrics):
+    """Caches metric name and function attributes for every model output."""
+    output_shapes = [
+        None if output is None else output.get_shape().as_list()
+        for output in self.outputs
+    ]
+    self._per_output_metrics = training_utils.collect_per_output_metric_info(
+        metrics, self.output_names, output_shapes, self.loss_functions)
+    self._per_output_weighted_metrics = \
+        training_utils.collect_per_output_metric_info(
+            weighted_metrics, self.output_names, output_shapes,
+            self.loss_functions, self.sample_weights)
+
+  def _add_unique_metric_name(self, metric_name, output_index):
+    """Makes the metric name unique and adds it to the model's metric name list.
+
+      If there are multiple outputs for which the metrics are calculated, the
+      metric names have to be made unique by appending an integer.
 
     Arguments:
-        metric: Metric function name or reference.
-      output_index: Index of the current output.
-        weighted: Boolean indicating if the given metric is weighted.
+      metric_name: Metric name that corresponds to the metric specified by the
+          user. For example: 'acc'.
+      output_index: The index of the model output for which the metric name is
+        being added.
 
     Returns:
-        A metric name.
+      string, name of the model's unique metric name
     """
-    metric_name_prefix = 'weighted_' if weighted else ''
-    if metric in ('accuracy', 'acc', 'crossentropy', 'ce'):
-      if metric in ('accuracy', 'acc'):
-        suffix = 'acc'
-      elif metric in ('crossentropy', 'ce'):
-        suffix = 'ce'
-    else:
-      metric_fn = metrics_module.get(metric)
-      # Get metric name as string
-      if hasattr(metric_fn, 'name'):
-        suffix = metric_fn.name
-      else:
-        suffix = metric_fn.__name__
-    metric_name = metric_name_prefix + suffix
-
     if len(self.output_names) > 1:
       metric_name = '%s_%s' % (self.output_names[output_index], metric_name)
     j = 1
@@ -181,24 +183,54 @@ class Model(Network):
 
     return metric_name
 
+  def _init_metric_attributes(self):
+    """Initialized model metric attributes."""
+    self.metrics_names = ['loss']
+    self.metrics_tensors = []
+    self.metrics_updates = []
+    self.stateful_metric_names = []
+    self.stateful_metric_functions = []
+
+  def _set_per_output_metric_attributes(self, metrics_dict, output_index):
+    """Sets the metric attributes on the model for the given output.
+
+    Arguments:
+      metrics_dict: A dict with metric names as keys and metric fns as values.
+      output_index: The index of the model output for which the metric
+        attributes are added.
+    """
+    for metric_name, metric_fn in metrics_dict.items():
+      metric_name = self._add_unique_metric_name(metric_name, output_index)
+      # Keep track of metric name.
+      self.metrics_names.append(metric_name)
+
+      # Keep track of stateful metric attributes (name and metric function).
+      if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful:
+        self.stateful_metric_names.append(metric_name)
+        self.stateful_metric_functions.append(metric_fn)
+
+  def _set_metric_attributes(self, outputs, skip_target_indices=None):
+    """Sets the metric attributes on the model for all the model outputs."""
+    skip_target_indices = skip_target_indices or []
+    for i in range(len(outputs)):
+      if i in skip_target_indices:
+        continue
+      self._set_per_output_metric_attributes(self._per_output_metrics[i], i)
+      self._set_per_output_metric_attributes(
+          self._per_output_weighted_metrics[i], i)
+
   def _handle_per_output_metrics(self,
-                                 metrics,
+                                 metrics_dict,
                                  y_true,
                                  y_pred,
-                                 output_index,
-                                 output_shape,
-                                 loss_fn,
                                  mask,
                                  weights=None):
-    """Calls metric functions and sets metric attributes for a single output.
+    """Calls metric functions for a single output.
 
     Arguments:
-      metrics: List of metrics.
+      metrics_dict: A dict with metric names as keys and metric fns as values.
       y_true: Target output.
       y_pred: Predicted output.
-      output_index: Index of the current output.
-      output_shape: Shape of the current output.
-      loss_fn: Loss function corresponding to the current output.
       mask: Computed mask value for the current output.
       weights: Weights to be applied on the current output.
 
@@ -206,71 +238,45 @@ class Model(Network):
       A list of metric result tensors.
     """
     metric_results = []
-    for metric in metrics:
-      metric_fn = training_utils.get_metric_function(
-          metric, output_shape=output_shape, loss_fn=loss_fn)
-
-      if (context.executing_eagerly() and y_true is not None and
-          y_pred is not None):
-        # In eager mode, when executing metric_fn during training, we do not
-        # need to generate unique metric name and add it to the model
-        # as we have done that during compile already.
-        prefix = 'weighted_' if weights is not None else ''
-        suffix = metric_fn.name if hasattr(metric_fn,
-                                           'name') else metric_fn.__name__
-        metric_name = prefix + suffix
-      else:
-        # Get metric name that is to be added to the model.
-        metric_name = self._get_metric_name(
-            metric, output_index, weighted=weights is not None)
-        # Keep track of metric name.
-        self.metrics_names.append(metric_name)
-
-        # Keep track of stateful metric attributes (name and metric function).
-        if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful:
-          self.stateful_metric_names.append(metric_name)
-          self.stateful_metric_functions.append(metric_fn)
-
+    for metric_name, metric_fn in metrics_dict.items():
       with K.name_scope(metric_name):
-        # If both outputs and targets are available, call the metric function.
-        if y_true is not None and y_pred is not None:
-          if isinstance(metric_fn, metrics_module.Metric):
-            # Call the stateful metric function.
-            if mask is not None:
-              mask = math_ops.cast(mask, y_pred.dtype)
-              # Update weights with mask.
-              if weights is None:
-                weights = mask
-              else:
-                # Update shape of weights if possible before adding mask.
-                # Update dimensions of weights to match with mask if possible.
-                mask, _, weights = metrics_module.squeeze_or_expand_dimensions(
-                    mask, None, weights)
-                try:
-                  # Broadcast weights if possible.
-                  weights = weights_broadcast_ops.broadcast_weights(
-                      weights, mask)
-                except ValueError:
-                  pass
-                  # TODO(psv): Handle case when mask and weight shapes are not
-                  # compatible.
-                weights *= mask
-
-            metric_result = metric_fn(y_true, y_pred, weights)
-          else:
-            # Call the stateless metric function.
-            weighted_metric_fn = training_utils.weighted_masked_objective(
-                metric_fn)
-            metric_result = weighted_metric_fn(
-                y_true, y_pred, weights=weights, mask=mask)
-
-          if not context.executing_eagerly():
-            # Keep track of metric result tensor.
-            self.metrics_tensors.append(metric_result)
-          metric_results.append(metric_result)
-
-      if (isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful and
-          not context.executing_eagerly()):
+        if isinstance(metric_fn, metrics_module.Metric):
+          # Call the stateful metric function.
+          if mask is not None:
+            mask = math_ops.cast(mask, y_pred.dtype)
+            # Update weights with mask.
+            if weights is None:
+              weights = mask
+            else:
+              # Update shape of weights if possible before adding mask.
+              # Update dimensions of weights to match with mask if possible.
+              mask, _, weights = metrics_module.squeeze_or_expand_dimensions(
+                  mask, None, weights)
+              try:
+                # Broadcast weights if possible.
+                weights = weights_broadcast_ops.broadcast_weights(weights, mask)
+              except ValueError:
+                pass
+                # TODO(psv): Handle case when mask and weight shapes are not
+                # compatible.
+              weights *= mask
+
+          metric_result = metric_fn(y_true, y_pred, weights)
+        else:
+          # Call the stateless metric function.
+          weighted_metric_fn = training_utils.weighted_masked_objective(
+              metric_fn)
+          metric_result = weighted_metric_fn(
+              y_true, y_pred, weights=weights, mask=mask)
+
+        if not context.executing_eagerly():
+          # Keep track of metric result tensor.
+          self.metrics_tensors.append(metric_result)
+
+      metric_results.append(metric_result)
+      is_stateful = isinstance(metric_fn,
+                               base_layer.Layer) and metric_fn.stateful
+      if is_stateful and not context.executing_eagerly():
         # Keep track of updates created by stateful metrics.
         self.metrics_updates += metric_fn.updates
     return metric_results
@@ -281,7 +287,7 @@ class Model(Network):
                       targets=None,
                       sample_weights=None,
                       masks=None):
-    """Handles calling metric functions and setting model metric attributes.
+    """Handles calling metric functions.
 
     Arguments:
       outputs: List of outputs (predictions).
@@ -301,20 +307,15 @@ class Model(Network):
           continue
         output = outputs[i] if outputs else None
         target = targets[i] if targets else None
-        output_shape = None if output is None else output.get_shape().as_list()
         output_mask = masks[i] if masks else None
         metric_results.extend(
-            self._handle_per_output_metrics(
-                self.nested_metrics[i], target, output, i, output_shape,
-                self.loss_functions[i], output_mask))
+            self._handle_per_output_metrics(self._per_output_metrics[i], target,
+                                            output, output_mask))
         metric_results.extend(
             self._handle_per_output_metrics(
-                self.nested_weighted_metrics[i],
+                self._per_output_weighted_metrics[i],
                 target,
                 output,
-                i,
-                output_shape,
-                self.loss_functions[i],
                 output_mask,
                 weights=sample_weights[i]))
     return metric_results
@@ -506,24 +507,15 @@ class Model(Network):
     self.loss_weights_list = loss_weights_list
 
     # Initialize model metric attributes.
-    self.metrics_names = ['loss']
-    self.metrics_tensors = []
-    self.metrics_updates = []
-    self.stateful_metric_names = []
-    self.stateful_metric_functions = []
-
-    # Nested metrics is a list of list of metrics.
-    # One list per output of the model.
-    self.nested_metrics = training_utils.collect_metrics(
-        metrics, self.output_names)
-    self.nested_weighted_metrics = training_utils.collect_metrics(
-        weighted_metrics, self.output_names)
+    self._init_metric_attributes()
 
     # Initialization for Eager mode execution.
     if context.executing_eagerly():
       # Prepare sample weights.
       self._set_sample_weight_attributes(sample_weight_mode,
                                          skip_target_weighing_indices)
+      # Save all metric attributes per output of the model.
+      self._cache_output_metric_attributes(metrics, weighted_metrics)
 
       if target_tensors is not None:
         raise ValueError('target_tensors are not currently supported in Eager '
@@ -534,10 +526,10 @@ class Model(Network):
           self.metrics_names.append(self.output_names[i] + '_loss')
 
       # Set metric attributes on model.
-      self._handle_metrics(
+      self._set_metric_attributes(
           self.outputs,
           skip_target_indices=skip_target_indices,
-          sample_weights=self.sample_weights)
+      )
 
       self.targets = []
       for i in range(len(self.outputs)):
@@ -600,6 +592,8 @@ class Model(Network):
     # Prepare sample weights.
     self._set_sample_weight_attributes(sample_weight_mode,
                                        skip_target_weighing_indices)
+    # Save all metric attributes per output of the model.
+    self._cache_output_metric_attributes(metrics, weighted_metrics)
 
     # Compute total loss.
     total_loss = None
@@ -634,6 +628,11 @@ class Model(Network):
       for loss_tensor in self.losses:
         total_loss += loss_tensor
 
+    # Set metric attributes on model.
+    self._set_metric_attributes(
+        self.outputs,
+        skip_target_indices=skip_target_indices,
+    )
     # Invoke metric functions for all the outputs.
     self._handle_metrics(
         self.outputs,
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 8e9fab81d6..9c303f4bed 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from collections import OrderedDict
 import copy
 import math
 
@@ -484,29 +485,36 @@ def check_loss_and_target_compatibility(targets, loss_fns, output_shapes):
                            'as the output.')
 
 
-def collect_metrics(metrics, output_names):
-  """Maps metric functions to model outputs.
+def collect_per_output_metric_info(metrics,
+                                   output_names,
+                                   output_shapes,
+                                   loss_fns,
+                                   sample_weights=None):
+  """Maps metric names and functions to model outputs.
 
   Arguments:
       metrics: a list or dict of metric functions.
       output_names: a list of the names (strings) of model outputs.
+      output_shapes: a list of the shapes (strings) of model outputs.
+      loss_fns: a list of the loss functions corresponding to the model outputs.
+      sample_weights: a list of weights to be applied on the model outputs.
 
   Returns:
-      A list (one entry per model output) of lists of metric functions.
+      A list (one entry per model output) of dicts.
       For instance, if the model has 2 outputs, and for the first output
       we want to compute "binary_accuracy" and "binary_crossentropy",
       and just "binary_accuracy" for the second output,
-      the list would look like:
-          `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]`
+      the list would look like: `[[('acc', binary_accuracy()),
+      ('ce', binary_crossentropy())], [('acc', binary_accuracy())]]`
 
   Raises:
       TypeError: if an incorrect type is passed for the `metrics` argument.
   """
   if not metrics:
-    return [[] for _ in output_names]
+    return [{} for _ in output_names]
   if isinstance(metrics, list):
     # we then apply all metrics to all outputs.
-    return [copy.copy(metrics) for _ in output_names]
+    nested_metrics = [copy.copy(metrics) for _ in output_names]
   elif isinstance(metrics, dict):
     nested_metrics = []
     for name in output_names:
@@ -514,11 +522,24 @@ def collect_metrics(metrics, output_names):
       if not isinstance(output_metrics, list):
         output_metrics = [output_metrics]
       nested_metrics.append(output_metrics)
-    return nested_metrics
   else:
     raise TypeError('Type of `metrics` argument not understood. '
                     'Expected a list or dictionary, found: ' + str(metrics))
 
+  per_output_metrics = []
+  for i, metrics in enumerate(nested_metrics):
+    metrics_dict = OrderedDict()
+    for metric in metrics:
+      weighted = False if (sample_weights is None) else (
+          sample_weights[i] is not None)
+      metric_name = get_metric_name(metric, weighted)
+      metric_fn = get_metric_function(
+          metric, output_shape=output_shapes[i], loss_fn=loss_fns[i])
+      metrics_dict[metric_name] = metric_fn
+    per_output_metrics.append(metrics_dict)
+
+  return per_output_metrics
+
 
 def batch_shuffle(index_array, batch_size):
   """Shuffles an array in a batch-wise fashion.
@@ -729,6 +750,33 @@ def has_tensors(ls):
   return tensor_util.is_tensor(ls)
 
 
+def get_metric_name(metric, weighted=False):
+  """Returns the name corresponding to the given metric input.
+
+  Arguments:
+    metric: Metric function name or reference.
+    weighted: Boolean indicating if the given metric is weighted.
+
+  Returns:
+      The metric name.
+  """
+  metric_name_prefix = 'weighted_' if weighted else ''
+  if metric in ('accuracy', 'acc', 'crossentropy', 'ce'):
+    if metric in ('accuracy', 'acc'):
+      suffix = 'acc'
+    elif metric in ('crossentropy', 'ce'):
+      suffix = 'ce'
+  else:
+    metric_fn = metrics_module.get(metric)
+    # Get metric name as string
+    if hasattr(metric_fn, 'name'):
+      suffix = metric_fn.name
+    else:
+      suffix = metric_fn.__name__
+  metric_name = metric_name_prefix + suffix
+  return metric_name
+
+
 def get_metric_function(metric, output_shape=None, loss_fn=None):
   """Returns the metric function corresponding to the given metric input.
 
-- 
GitLab


From bdca15c5e5c09e5c97f4357bd2a792da54746e94 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 16:12:07 -0700
Subject: [PATCH 0226/1357] Fixed documentation of Optimizer.minimize() for
 eager mode to match behavior of Optimizer.compute_gradients().

PiperOrigin-RevId: 213060585
---
 tensorflow/python/training/optimizer.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 2304a461c1..699162b30c 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -385,13 +385,12 @@ class Optimizer(
 
     @compatibility(eager)
     When eager execution is enabled, `loss` should be a Python function that
-    takes elements of `var_list` as arguments and computes the value to be
-    minimized. If `var_list` is None, `loss` should take no arguments.
-    Minimization (and gradient computation) is done with respect to the
-    elements of `var_list` if not None, else with respect to any trainable
-    variables created during the execution of the `loss` function.
-    `gate_gradients`, `aggregation_method`, `colocate_gradients_with_ops` and
-    `grad_loss` are ignored when eager execution is enabled.
+    takes no arguments and computes the value to be minimized. Minimization (and
+    gradient computation) is done with respect to the elements of `var_list` if
+    not None, else with respect to any trainable variables created during the
+    execution of the `loss` function. `gate_gradients`, `aggregation_method`,
+    `colocate_gradients_with_ops` and `grad_loss` are ignored when eager
+    execution is enabled.
     @end_compatibility
     """
     grads_and_vars = self.compute_gradients(
-- 
GitLab


From 1c2a300d483d9e5d5502cdd8131644f7647996c5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Sep 2018 16:23:48 -0700
Subject: [PATCH 0227/1357] Fix spelling in error message

PiperOrigin-RevId: 213062112
---
 tensorflow/python/estimator/estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 0f20acefdf..90280fd25d 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -329,7 +329,7 @@ class Estimator(object):
                                  run_config.TaskType.PS):
       raise ValueError(
           'Train has been called wrong configuration. Please use '
-          'tf.estimator.train_and_evaluate which calls propper API according '
+          'tf.estimator.train_and_evaluate which calls proper API according '
           'to given configuration. Current configuration: {}.'.format(
               self.config))
 
-- 
GitLab


From 74b9d6a48286f38807bbd204d9d55467e02387ca Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Fri, 14 Sep 2018 16:25:36 -0700
Subject: [PATCH 0228/1357] [Intel MKL] Fixes for unit test failures

1) Changes in partitioned_function_ops.cc are for passing
   Global OpRegistry as default_registry in PartitionedFunction op

   This fix addresses failure in MKL layout pass when PartitionedFunction
   op calls graph optimization passes. The problem was that the function
   library definition that is used to create function graph and corresponding
   subgraphs after partitioning did not use global OpRegistry as the
   default OpRegistry used for look of operator names. Because of that,
   standard operators such as "Const" were not available to graph passes.

2) Changes in mkl_cpu_allocator.h are to address failure in
   mkl_cpu_allocator_test which was expecting that max_bytes_limits is returned
   via GetStats() in MKLCPUAllocator.
---
 tensorflow/core/common_runtime/mkl_cpu_allocator.h  |  3 +++
 tensorflow/core/kernels/partitioned_function_ops.cc | 12 +++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index df9c3a686c..593f855ea2 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -277,6 +277,9 @@ class MklCPUAllocator : public VisitableAllocator {
     // max_alloc_size from large_size_allocator would be the maximum
     // size allocated by MklCPUAllocator.
     stats->max_alloc_size = l_stats.max_alloc_size;
+
+    stats->bytes_limit =
+        std::max(s_stats.bytes_limit, l_stats.bytes_limit);
   }
 
   void ClearStats() override {
diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index fc1c9003aa..ddb621967a 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -97,7 +97,12 @@ class PartitionedCallOp : public AsyncOpKernel {
         OP_REQUIRES_ASYNC(ctx, fbody != nullptr,
                           errors::Internal("Could not find handle ", handle),
                           done);
-        auto graph = tensorflow::MakeUnique<Graph>(fbody->graph->flib_def());
+        // We need to pass global op_registry as default_registry when creating
+        // graph. So that graph optimization passes can lookup all possible ops
+        // by name.
+        FunctionLibraryDefinition func_lib_def(OpRegistry::Global(),
+                                            fbody->graph->flib_def().ToProto());
+        auto graph = tensorflow::MakeUnique<Graph>(func_lib_def);
         CopyGraph(*fbody->graph, graph.get());
         OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done);
 
@@ -250,9 +255,10 @@ class PartitionedCallOp : public AsyncOpKernel {
     VLOG(3) << "Partitioned function '" << func_.name() << "', yielding "
             << partitions.size() << " shards.";
 
-    const FunctionLibraryDefinition* flib_def = &graph->flib_def();
+    FunctionLibraryDefinition func_lib_def(OpRegistry::Global(),
+                                          graph->flib_def().ToProto());
     for (const auto& partition : partitions) {
-      std::unique_ptr<Graph> subgraph(new Graph(flib_def));
+      std::unique_ptr<Graph> subgraph(new Graph(func_lib_def));
       GraphConstructorOptions opts;
       opts.allow_internal_ops = true;
       opts.expect_device_spec = true;
-- 
GitLab


From e179c17b96bcb855b2056f60851a24551b4189a6 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Fri, 14 Sep 2018 16:43:25 -0700
Subject: [PATCH 0229/1357] Makes tf.Variable arguments (non-captured)
 DT_RESOURCE function inputs.

Previously, tf.Variable arguments to a defun-d Python function were made captured inputs. This change makes it possible to parameterize functions on DT_RESOURCE inputs.

PiperOrigin-RevId: 213064739
---
 tensorflow/python/eager/function.py      | 60 ++++++++++++++++++++----
 tensorflow/python/eager/function_test.py | 37 +++++++++++++++
 2 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 962e334b27..f3fb48fd3b 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -65,7 +65,7 @@ gradients_impl._function = sys.modules[__name__]  # pylint: disable=protected-ac
 WHITELIST_FUNCTION_ATTRIBUTE_PREFIX = "experimental_"
 
 
-def _create_substitute_placeholder(value, name, dtype=None):
+def _create_substitute_placeholder(value, name=None, dtype=None):
   """Creates a placeholder for `value` and propagates shape info to it."""
   # Note: setting ops.control_dependencies(None) ensures we always put
   # capturing placeholders outside of any control flow context.
@@ -550,7 +550,19 @@ class Function(object):
           self._distributed_variables[component_variable.handle] = variable
 
   def __call__(self, *args):
-    """Executes the wrapped function."""
+    """Executes the wrapped function.
+
+    Args:
+      *args: a list of Tensors or Variables.
+
+    Returns:
+      The result of applying the TF function to `args`.
+
+    Raises:
+      ValueError: If the current device stack does not match the device stack
+        under which the function was created, or if `args` contains anything
+        other than Tensors or Variables.
+    """
     ctx = context.context()
     device_functions = _get_device_functions(ctx, ops.get_default_graph())
     if device_functions != self._device_functions:
@@ -566,7 +578,18 @@ class Function(object):
         tape.variable_accessed(v)
 
     captures = self._resolve_captured_inputs()
-    tensor_inputs = [x for x in nest.flatten(args) if isinstance(x, ops.Tensor)]
+    tensor_inputs = []
+    for i, arg in enumerate(nest.flatten(args)):
+      if isinstance(arg, resource_variable_ops.ResourceVariable):
+        if arg.trainable:
+          tape.variable_accessed(arg)
+        tensor_inputs.append(arg.handle)
+      elif isinstance(arg, ops.Tensor):
+        tensor_inputs.append(arg)
+      else:
+        raise ValueError("All inputs to `Function`s must be Tensors; "
+                         "on invocation of %s, the %d-th input (%s) was not a "
+                         "Tensor." % (self._func_graph.name, i, str(arg)))
     args = tensor_inputs + captures
 
     if tape.should_record(tensor_inputs) or tape.should_record(captures):
@@ -817,10 +840,6 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
       func_kwds = {}
 
     # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
-    func_graph.inputs.extend(
-        x for x in nest.flatten(func_args) + nest.flatten(func_kwds)
-        if isinstance(x, ops.Tensor))
-
     # Variables to help check whether mutation happens in calling the function
     # Copy the recursive list, tuple and map structure, but not base objects
     func_args_before = nest.pack_sequence_as(func_args, nest.flatten(func_args))
@@ -867,6 +886,26 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
     finally:
       tape.pop_tape(this_tape)
 
+    # Variables in `func_args`, `func_kwds` should be explicit inputs
+    # to the function, not captured inputs.
+    variables = set(this_tape.watched_variables())
+    inputs = []
+    for arg in nest.flatten(func_args) + nest.flatten(func_kwds):
+      if isinstance(arg, resource_variable_ops.ResourceVariable):
+        try:
+          resource_placeholder = func_graph.captures.pop(arg.handle)
+          variables.remove(arg)
+        except KeyError:
+          # This case occurs if a Variable among the inputs is not actually
+          # used by the function; we still add an explicit input for it
+          # because the user should presumably pass the Variable as an input
+          # to the corresponding graph function.
+          resource_placeholder = _create_substitute_placeholder(arg.handle)
+        inputs.append(resource_placeholder)
+      elif isinstance(arg, ops.Tensor):
+        inputs.append(arg)
+    func_graph.inputs = inputs + list(func_graph.captures.values())
+
     func_graph.structured_outputs = func_outputs
     # Returning a closed-over tensor does not trigger convert_to_tensor.
     func_graph.outputs.extend(
@@ -878,7 +917,7 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
     # Instead of storing non-distributed component variables, we
     # store their distributed containers so we can retrieve the correct
     # component variables at call-time.
-    variables = list(this_tape.watched_variables())
+    variables = list(variables)
     strategy = distribution_strategy_context.get_distribution_strategy()
     for i, variable in enumerate(variables):
       # If variable is not distributed value_container returns itself.
@@ -1201,7 +1240,10 @@ class PolymorphicFunction(object):
         self._variables.extend(
             [v for v in graph_function.variables if v not in self._variables])
         self._function_cache[cache_key] = graph_function
-      return graph_function, (args, kwds)
+      return graph_function, [
+          t for t in nest.flatten((args, kwds))
+          if isinstance(t, (ops.Tensor, resource_variable_ops.ResourceVariable))
+      ]
 
 
 def register(func, *args, **kwargs):
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index a0abefe666..c168b6060c 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1685,6 +1685,43 @@ class FunctionTest(test.TestCase):
         # pylint: disable=protected-access
         self.assertEqual(len(graph._functions), 1)
 
+  def testCallingFunctionWithDifferentVariables(self):
+
+    @function.defun
+    def foo(v):
+      v.assign_add(1.0)
+      return v.read_value()
+
+    v = resource_variable_ops.ResourceVariable(0.0)
+    graph_function = foo.get_concrete_function(v)
+    self.assertEqual(len(graph_function.inputs), 1)
+    self.assertEqual(len(graph_function.captured_inputs), 0)
+
+    self.assertEqual(float(graph_function(v)), 1.0)
+    self.assertEqual(float(graph_function(v)), 2.0)
+
+    w = resource_variable_ops.ResourceVariable(0.0)
+
+    @function.defun
+    def bar(v):
+      del v
+      return constant_op.constant(1.0)
+
+    graph_function = bar.get_concrete_function(v)
+    self.assertEqual(float(graph_function(v)), 1.0)
+    self.assertEqual(float(graph_function(w)), 1.0)
+
+  def testCallingFunctionWithNonTensorsFails(self):
+
+    @function.defun
+    def foo(x):
+      return x
+
+    graph_function = foo.get_concrete_function(constant_op.constant(1.0))
+    with self.assertRaisesRegexp(ValueError, 'All inputs to `Function`s must '
+                                 'be Tensors;.*'):
+      graph_function('Not a Tensor.')
+
 
 @test_util.with_c_shapes
 class AutomaticControlDependenciesTest(test.TestCase):
-- 
GitLab


From 2e44b4681a16f4127502b0330228d7d4b33f8ee5 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Fri, 14 Sep 2018 17:03:41 -0700
Subject: [PATCH 0230/1357] Switch to Eigen::Index in Tensorflow kernels.

Mixing index type doesn't work well with latest Eigen.

PiperOrigin-RevId: 213067224
---
 .../contrib/tensor_forest/kernels/tree_utils.cc     | 12 ++++++------
 tensorflow/core/kernels/bias_op.cc                  | 13 +++++++------
 tensorflow/core/kernels/unravel_index_op.cc         | 10 ++++++----
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc
index cefcc96051..dd5d028314 100644
--- a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc
+++ b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc
@@ -67,11 +67,11 @@ float ClassificationSplitScore(
     const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits,
     const Eigen::Tensor<float, 1, Eigen::RowMajor>& rights, int32 num_classes,
     int i) {
-  Eigen::array<int, 1> offsets;
+  Eigen::array<Eigen::Index, 1> offsets;
   // Class counts are stored with the total in [0], so the length of each
   // count vector is num_classes + 1.
   offsets[0] = i * (num_classes + 1) + 1;
-  Eigen::array<int, 1> extents;
+  Eigen::array<Eigen::Index, 1> extents;
   extents[0] = num_classes;
   return WeightedGiniImpurity(splits.slice(offsets, extents)) +
          WeightedGiniImpurity(rights.slice(offsets, extents));
@@ -97,7 +97,7 @@ void GetTwoBestClassification(const Tensor& total_counts,
   // arguments to ClassificationSplitScore.
   const Eigen::Tensor<float, 1, Eigen::RowMajor> splits =
       split_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
-  Eigen::array<int, 1> bcast;
+  Eigen::array<Eigen::Index, 1> bcast;
   bcast[0] = num_splits;
   const Eigen::Tensor<float, 1, Eigen::RowMajor> rights =
       tc.broadcast(bcast) - splits;
@@ -130,8 +130,8 @@ float RegressionSplitScore(
     const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_sums,
     const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_squares,
     int32 accumulator, int32 num_regression_dims, int i) {
-  Eigen::array<int, 1> offsets = {i * num_regression_dims + 1};
-  Eigen::array<int, 1> extents = {num_regression_dims - 1};
+  Eigen::array<Eigen::Index, 1> offsets = {i * num_regression_dims + 1};
+  Eigen::array<Eigen::Index, 1> extents = {num_regression_dims - 1};
   float left_count = splits_count_accessor(accumulator, i, 0);
   float right_count = totals_count_accessor(accumulator, 0) - left_count;
 
@@ -178,7 +178,7 @@ void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares,
   const auto splits_count_accessor = split_sums.tensor<float, 3>();
   const auto totals_count_accessor = total_sums.tensor<float, 2>();
 
-  Eigen::array<int, 1> bcast;
+  Eigen::array<Eigen::Index, 1> bcast;
   bcast[0] = num_splits;
   const auto right_sums = tc_sum.broadcast(bcast) - splits_sum;
   const auto right_squares = tc_square.broadcast(bcast) - splits_square;
diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
index 7b28c8e91f..e15ea82e7d 100644
--- a/tensorflow/core/kernels/bias_op.cc
+++ b/tensorflow/core/kernels/bias_op.cc
@@ -134,8 +134,8 @@ class BiasOp : public BinaryOp<T> {
     if (data_format_ == FORMAT_NCHW) {
       int32 batch, height, width, channel;
       GetBiasValueDims(input, data_format_, &batch, &height, &width, &channel);
-      Eigen::DSizes<int32, 4> four_dims(1, channel, 1, 1);
-      Eigen::DSizes<int32, 4> broad_cast_dims(batch, 1, height, width);
+      Eigen::DSizes<Eigen::Index, 4> four_dims(1, channel, 1, 1);
+      Eigen::DSizes<Eigen::Index, 4> broad_cast_dims(batch, 1, height, width);
       const Device& d = context->eigen_device<Device>();
       output->tensor<T, 4>().device(d) =
           input.tensor<T, 4>() +
@@ -247,14 +247,14 @@ class BiasGradOp : public OpKernel {
         OP_REQUIRES(context, output_backprop.dims() == 4,
                     errors::InvalidArgument(
                         "NCHW format supports only 4D input/output tensor."));
-        Eigen::DSizes<int, 4> four_dims(batch, channel, height, width);
+        Eigen::DSizes<Eigen::Index, 4> four_dims(batch, channel, height, width);
 #ifdef EIGEN_HAS_INDEX_LIST
         using idx0 = Eigen::type2index<0>;
         using idx2 = Eigen::type2index<2>;
         using idx3 = Eigen::type2index<3>;
         Eigen::IndexList<idx0, idx2, idx3> reduction_axes;
 #else
-        Eigen::array<int, 3> reduction_axes = {0, 2, 3};
+        Eigen::array<Eigen::Index, 3> reduction_axes = {0, 2, 3};
 #endif
         output->template flat<T>().device(context->eigen_device<Device>()) =
             output_backprop.flat<T>()
@@ -263,11 +263,12 @@ class BiasGradOp : public OpKernel {
                 .sum(reduction_axes)
                 .template cast<T>();  // End of code by intel_tf.
       } else {
-        Eigen::DSizes<int, 2> two_dims(batch * height * width, channel);
+        Eigen::DSizes<Eigen::Index, 2> two_dims(batch * height * width,
+                                                channel);
 #ifdef EIGEN_HAS_INDEX_LIST
         Eigen::IndexList<Eigen::type2index<0> > reduction_axis;
 #else
-        Eigen::array<int, 1> reduction_axis = {0};
+        Eigen::array<Eigen::Index, 1> reduction_axis = {0};
 #endif
         output->template flat<T>().device(context->eigen_device<Device>()) =
             output_backprop.flat<T>()
diff --git a/tensorflow/core/kernels/unravel_index_op.cc b/tensorflow/core/kernels/unravel_index_op.cc
index 62e814ff77..8d839ba85a 100644
--- a/tensorflow/core/kernels/unravel_index_op.cc
+++ b/tensorflow/core/kernels/unravel_index_op.cc
@@ -97,10 +97,12 @@ class UnravelIndexOp : public OpKernel {
 
       auto output = output_tensor->matrix<Tidx>();
 
-      Eigen::array<int64, 2> reshape{{dims_tensor.NumElements(), 1}};
-      Eigen::array<int64, 2> bcast({1, indices_tensor.NumElements()});
-      Eigen::array<int64, 2> indices_reshape{{1, indices_tensor.NumElements()}};
-      Eigen::array<int64, 2> indices_bcast({dims_tensor.NumElements(), 1});
+      Eigen::array<Eigen::Index, 2> reshape{{dims_tensor.NumElements(), 1}};
+      Eigen::array<Eigen::Index, 2> bcast({1, indices_tensor.NumElements()});
+      Eigen::array<Eigen::Index, 2> indices_reshape{
+          {1, indices_tensor.NumElements()}};
+      Eigen::array<Eigen::Index, 2> indices_bcast(
+          {dims_tensor.NumElements(), 1});
 
       output = indices_tensor.vec<Tidx>()
                    .reshape(indices_reshape)
-- 
GitLab


From 0d4cb43a540f08cb73c00fac662c961e4154ac32 Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Fri, 14 Sep 2018 17:29:46 -0700
Subject: [PATCH 0231/1357] Revert PR #21997: Fixes the formatting issue
 pointed out at #21762

It breaks. should be s/input_shape/inputs_shape.

PiperOrigin-RevId: 213070141
---
 tensorflow/python/ops/rnn_cell_impl.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 3e19183ff5..43cca1a498 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -428,7 +428,7 @@ class BasicRNNCell(LayerRNNCell):
   def build(self, inputs_shape):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(input_shape))
+                       % str(inputs_shape))
 
     input_depth = inputs_shape[-1]
     self._kernel = self.add_variable(
@@ -525,7 +525,7 @@ class GRUCell(LayerRNNCell):
   def build(self, inputs_shape):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(input_shape))
+                       % str(inputs_shape))
 
     input_depth = inputs_shape[-1]
     self._gate_kernel = self.add_variable(
@@ -705,7 +705,7 @@ class BasicLSTMCell(LayerRNNCell):
   def build(self, inputs_shape):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(input_shape))
+                       % str(inputs_shape))
 
     input_depth = inputs_shape[-1]
     h_depth = self._num_units
@@ -908,7 +908,7 @@ class LSTMCell(LayerRNNCell):
   def build(self, inputs_shape):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(input_shape))
+                       % str(inputs_shape))
 
     input_depth = inputs_shape[-1]
     h_depth = self._num_units if self._num_proj is None else self._num_proj
-- 
GitLab


From 08589aa0c4447b21dd73183cf5cfafff326324dc Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 14 Sep 2018 18:22:52 -0700
Subject: [PATCH 0232/1357] Make accessed variable ordering deterministic again
 when constructing defuns

PiperOrigin-RevId: 213074939
---
 tensorflow/python/eager/function.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f3fb48fd3b..e2874e25b6 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -888,13 +888,14 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
 
     # Variables in `func_args`, `func_kwds` should be explicit inputs
     # to the function, not captured inputs.
-    variables = set(this_tape.watched_variables())
+    tape_variables = this_tape.watched_variables()
+    arg_variables = set()
     inputs = []
     for arg in nest.flatten(func_args) + nest.flatten(func_kwds):
       if isinstance(arg, resource_variable_ops.ResourceVariable):
         try:
           resource_placeholder = func_graph.captures.pop(arg.handle)
-          variables.remove(arg)
+          arg_variables.add(arg)
         except KeyError:
           # This case occurs if a Variable among the inputs is not actually
           # used by the function; we still add an explicit input for it
@@ -904,6 +905,7 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
         inputs.append(resource_placeholder)
       elif isinstance(arg, ops.Tensor):
         inputs.append(arg)
+    variables = [v for v in tape_variables if v not in arg_variables]
     func_graph.inputs = inputs + list(func_graph.captures.values())
 
     func_graph.structured_outputs = func_outputs
@@ -917,7 +919,6 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
     # Instead of storing non-distributed component variables, we
     # store their distributed containers so we can retrieve the correct
     # component variables at call-time.
-    variables = list(variables)
     strategy = distribution_strategy_context.get_distribution_strategy()
     for i, variable in enumerate(variables):
       # If variable is not distributed value_container returns itself.
-- 
GitLab


From 33f57bd1311df97a25cd70784dfaafc8e44d07c4 Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Sat, 15 Sep 2018 12:46:58 +0900
Subject: [PATCH 0233/1357] clang-format

---
 .../core/kernels/extract_volume_patches_op.cc | 50 ++++++++++---------
 .../core/kernels/extract_volume_patches_op.h  | 12 ++---
 .../extract_volume_patches_op_gpu.cu.cc       |  2 +-
 tensorflow/core/ops/array_ops.cc              | 16 +++---
 4 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/extract_volume_patches_op.cc
index 0f1d566c75..52cd078a35 100644
--- a/tensorflow/core/kernels/extract_volume_patches_op.cc
+++ b/tensorflow/core/kernels/extract_volume_patches_op.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-/* 
-See extract_image_patches_op* files and docs for extract_image_patches in 
+/*
+See extract_image_patches_op* files and docs for extract_image_patches in
 ../ops/image_ops.cc.
 
 Rates are not supported as of now, but the comments hint how to edit the code
@@ -60,7 +60,7 @@ class ExtractVolumePatchesOp : public UnaryOp<T> {
       : UnaryOp<T>(context) {
     ParseAttributeVec5(context, "ksizes", &ksizes_);
     ParseAttributeVec5(context, "strides", &strides_);
-    //ParseAttributeVec5(context, "rates", &rates_);
+    // ParseAttributeVec5(context, "rates", &rates_);
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
@@ -88,18 +88,20 @@ class ExtractVolumePatchesOp : public UnaryOp<T> {
 
     /*
     // TODO(hsgkim): enable rates
-    // Rates are disabled as of now due to Eigen's definitions of extract_volume_patch
-    // functions; none of them accept rates as its argument and rates are fixed to
-    // (1, 1, 1, 1, 1). A workaround has to be found for this.
+    // Rates are disabled as of now due to Eigen's definitions of
+    // `extract_volume_patch` functions; none of them accept rates
+    // as its argument and rates are fixed to (1, 1, 1, 1, 1). A
+    // workaround has to be found for this.
     // In order to enable rates, uncomment the following lines and use
-    // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize
-    // calls.
+    // ksize_*_eff instead of ksize_* for the second argument of
+    // GetWindowedOutputSize calls.
 
     const int rate_planes = rates_[1];
     const int rate_rows = rates_[2];
     const int rate_cols = rates_[3];
 
-    const int ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1);
+    const int ksize_planes_eff = ksize_planes +
+                                 (ksize_planes - 1) * (rate_planes - 1);
     const int ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1);
     const int ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1);
     */
@@ -116,8 +118,9 @@ class ExtractVolumePatchesOp : public UnaryOp<T> {
                    GetWindowedOutputSize(in_cols, ksize_cols, stride_cols,
                                          padding_, &out_cols, &pad_cols));
 
-    const std::vector<int64> out_sizes = {batch, out_planes, out_rows, out_cols,
-                                          ksize_planes * ksize_rows * ksize_cols * depth};
+    const std::vector<int64> out_sizes = {
+        batch, out_planes, out_rows, out_cols,
+        ksize_planes * ksize_rows * ksize_cols * depth};
     TensorShape out_shape(out_sizes);
 
     Tensor* output = nullptr;
@@ -129,9 +132,8 @@ class ExtractVolumePatchesOp : public UnaryOp<T> {
     }
 
     functor::ExtractVolumePatchesForward<Device, T>()(
-        context->eigen_device<Device>(), input.tensor<T, 5>(), 
-        ksize_planes, ksize_rows, ksize_cols, 
-        stride_planes, stride_rows, stride_cols, 
+        context->eigen_device<Device>(), input.tensor<T, 5>(), ksize_planes,
+        ksize_rows, ksize_cols, stride_planes, stride_rows, stride_cols,
         /* rate_planes, rate_rows, rate_cols, */
         BrainPadding2EigenPadding(padding_), output->tensor<T, 5>());
   }
@@ -161,16 +163,18 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
 // Forward declarations of the functor specializations for GPU.
 namespace functor {
 
-#define DECLARE_GPU_SPEC(T)                                             \
-  template <>                                                           \
-  void ExtractVolumePatchesForward<GPUDevice, T>::operator()(           \
-      const GPUDevice& d, typename TTypes<T, 5>::ConstTensor input,     \
-      int patch_planes, int patch_rows, int patch_cols,                 \
-      int stride_planes, int stride_rows, int stride_cols,              \
-      /* int rate_planes, int rate_rows, int rate_cols, */              \
-      const Eigen::PaddingType& padding,                                \
-      typename TTypes<T, 5>::Tensor output);                            \
+// clang-format off
+#define DECLARE_GPU_SPEC(T)                                         \
+  template <>                                                       \
+  void ExtractVolumePatchesForward<GPUDevice, T>::operator()(       \
+      const GPUDevice& d, typename TTypes<T, 5>::ConstTensor input, \
+      int patch_planes, int patch_rows, int patch_cols,             \
+      int stride_planes, int stride_rows, int stride_cols,          \
+      /* int rate_planes, int rate_rows, int rate_cols, */          \
+      const Eigen::PaddingType& padding,                            \
+      typename TTypes<T, 5>::Tensor output);                        \
   extern template struct ExtractVolumePatchesForward<GPUDevice, T>;
+// clang-format on
 
 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
 
diff --git a/tensorflow/core/kernels/extract_volume_patches_op.h b/tensorflow/core/kernels/extract_volume_patches_op.h
index e2418334ac..7e0502b770 100644
--- a/tensorflow/core/kernels/extract_volume_patches_op.h
+++ b/tensorflow/core/kernels/extract_volume_patches_op.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_
 #define TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/eigen_volume_patch.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 namespace tensorflow {
 namespace functor {
@@ -27,7 +27,7 @@ namespace functor {
 template <typename Device, typename T>
 struct ExtractVolumePatchesForward {
   void operator()(const Device& d, typename TTypes<T, 5>::ConstTensor input,
-                  int patch_planes, int patch_rows, int patch_cols, 
+                  int patch_planes, int patch_rows, int patch_cols,
                   int stride_planes, int stride_rows, int stride_cols,
                   /* int rate_planes, int rate_rows, int rate_cols, */
                   const Eigen::PaddingType& padding,
@@ -38,15 +38,15 @@ struct ExtractVolumePatchesForward {
       output_32bit.device(d) =
           To32Bit(input)
               .extract_volume_patches(patch_cols, patch_rows, patch_planes,
-                                     stride_cols, stride_rows, stride_planes,
-                                     padding)
+                                      stride_cols, stride_rows, stride_planes,
+                                      padding)
               .reshape(output_32bit.dimensions());
     } else {
       output.device(d) =
           input
               .extract_volume_patches(patch_cols, patch_rows, patch_planes,
-                                     stride_cols, stride_rows, stride_planes,
-                                     padding)
+                                      stride_cols, stride_rows, stride_planes,
+                                      padding)
               .reshape(output.dimensions());
     }
   }
diff --git a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc
index 08b3386c13..c636493602 100644
--- a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc
@@ -17,8 +17,8 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
-#include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/kernels/extract_volume_patches_op.h"
+#include "tensorflow/core/framework/register_types.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 44908fe875..7ce4a39aca 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2609,7 +2609,8 @@ REGISTER_OP("ExtractVolumePatches")
       int32 rate_rows = rates[2];
       int32 rate_cols = rates[3];
 
-      int32 ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1);
+      int32 ksize_planes_eff = ksize_planes +
+                               (ksize_planes - 1) * (rate_planes - 1);
       int32 ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1);
       int32 ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1);
       */
@@ -2619,10 +2620,12 @@ REGISTER_OP("ExtractVolumePatches")
       DimensionHandle in_rows_dim = c->Dim(input_shape, 2);
       DimensionHandle in_cols_dim = c->Dim(input_shape, 3);
       DimensionHandle output_depth_dim;
-      TF_RETURN_IF_ERROR(c->Multiply(
-          c->Dim(input_shape, 4), ksize_planes * ksize_rows * ksize_cols, &output_depth_dim));
+      TF_RETURN_IF_ERROR(c->Multiply(c->Dim(input_shape, 4),
+                                     ksize_planes * ksize_rows * ksize_cols,
+                                     &output_depth_dim));
 
-      if (!c->ValueKnown(in_planes_dim) || !c->ValueKnown(in_rows_dim) || !c->ValueKnown(in_cols_dim)) {
+      if (!c->ValueKnown(in_planes_dim) || !c->ValueKnown(in_rows_dim) ||
+          !c->ValueKnown(in_cols_dim)) {
         ShapeHandle output_shape =
             c->MakeShape({batch_size_dim, InferenceContext::kUnknownDim,
                           InferenceContext::kUnknownDim, output_depth_dim});
@@ -2647,8 +2650,9 @@ REGISTER_OP("ExtractVolumePatches")
       TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
           in_cols, ksize_cols, stride_cols, padding, &output_cols,
           &padding_before, &padding_after));
-      ShapeHandle output_shape = c->MakeShape(
-          {batch_size_dim, output_planes, output_rows, output_cols, output_depth_dim});
+      ShapeHandle output_shape =
+          c->MakeShape({batch_size_dim, output_planes, output_rows, output_cols,
+                        output_depth_dim});
       c->set_output(0, output_shape);
       return Status::OK();
     });
-- 
GitLab


From 72359f9cfa10a08cecc3a179999a1b8ab835a818 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Sat, 15 Sep 2018 16:02:22 +0800
Subject: [PATCH 0234/1357] fix bug of lacking axis when using array.ops.concat
 in unwrap_and_concat function

---
 tensorflow/python/estimator/estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 90280fd25d..ff2baa0465 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1653,7 +1653,7 @@ def _combine_distributed_scaffold(grouped_scaffold, distribution):
   def _unwrap_and_concat(value):
     value = nest.flatten(distribution.unwrap(value))
     if len(value) != 1:
-      return array_ops.concat(value)
+      return array_ops.concat(value, 0)
     return value[0]
 
   ready_op = distribution.call_for_each_tower(
-- 
GitLab


From e517e2cf49a23d8561bcc5fcacbbb6674064b0e9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 15 Sep 2018 02:01:56 -0700
Subject: [PATCH 0235/1357] compat: Update forward compatibility horizon to
 2018-09-15

PiperOrigin-RevId: 213100589
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 8a100fe975..db850509ad 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 14)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 15)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From eab14a9303f6268d97fa3b901cc09a71c86bba63 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Sat, 15 Sep 2018 06:04:12 -0700
Subject: [PATCH 0236/1357] [TPU] Deprecate the computation_shape attribute to
 the TpuReplicate op in lieu of a new num_cores_per_replica.

PiperOrigin-RevId: 213111326
---
 tensorflow/contrib/tpu/ops/replication_ops.cc | 11 ++++++-----
 tensorflow/contrib/tpu/python/tpu/tpu.py      | 13 ++++++++++---
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc
index 15a2bb17a9..285e11d92d 100644
--- a/tensorflow/contrib/tpu/ops/replication_ops.cc
+++ b/tensorflow/contrib/tpu/ops/replication_ops.cc
@@ -24,9 +24,11 @@ using shape_inference::ShapeHandle;
 
 REGISTER_OP("TPUReplicateMetadata")
     .Attr("num_replicas: int >= 0")
+    .Attr("num_cores_per_replica: int = 1")
     .Attr("topology: string = \"\"")
     .Attr("use_tpu: bool = true")
     .Attr("device_assignment: list(int) = []")
+    // Deprecated. Use num_cores_per_replica instead.
     .Attr("computation_shape: list(int) = []")
     .Attr("host_compute_core: list(string) = []")
     .SetShapeFn(shape_inference::UnknownShape);
@@ -93,11 +95,11 @@ REGISTER_OP("TPUCompilationResult")
 REGISTER_OP("TPUReplicate")
     .Attr("computation: func")
     .Attr("num_replicas: int >= 1")
+    .Attr("num_cores_per_replica: int = 1")
     .Attr("topology: string = \"\"")
     .Attr("use_tpu: bool = true")
     .Attr("device_assignment: list(int) = []")
     .Attr("host_compute_core: list(string) = []")
-    .Attr("computation_shape: list(int) = []")
     .Attr("Tinputs: list(type) >= 0")
     .Attr("Tbroadcast_inputs: list(type) >= 0")
     .Attr("NumVariables: int >= 0")
@@ -114,16 +116,15 @@ Runs replicated computations on a distributed TPU system.
 
 computation: a function containing the computation to run.
 num_replicas: the number of replicas of the computation to run.
+num_cores_per_replica: the number of logical cores in each replica.
 topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
 topology.
 use_tpu: a bool indicating if this computation will run on TPU or CPU/GPU.
 Currently, only supports a default placement (computation is placed on GPU
 if one is available, and on CPU if not).
-computation_shape: a [mesh_dimension] array describing the shape of each
-  computation replica in numbers of cores in the TPU mesh.
 device_assignment: a flattened array with shape
-  [replica] + computation_shape + [mesh_dimension] that maps the coordinates of
-  logical cores in each replica of a computation to physical coordinates in
+  [replica, num_cores_per_replica, mesh_dimension] that maps the coordinates
+  of logical cores in each replica of a computation to physical coordinates in
   the TPU topology.
 Tinputs: the types of the arguments to 'computation'.
 inputs: the inputs to 'computation', flattened, in replica-major order.
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 0f9f7cd91b..815a087a24 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -26,6 +26,7 @@ from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import tpu_function
 
 from tensorflow.core.framework import attr_value_pb2
+from tensorflow.python.compat import compat as api_compat
 from tensorflow.python.framework import device as pydev
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -558,10 +559,16 @@ def split_compile_and_replicate(computation,
         "topology":
             device_assignment.topology.serialized(),
         "device_assignment":
-            device_assignment.core_assignment.flatten().tolist(),
-        "computation_shape":
-            device_assignment.computation_shape.tolist()
+            device_assignment.core_assignment.flatten().tolist()
     }
+    # TODO(phawkins): remove this case after the forward compatibility window
+    # expires on 2018-10-6.
+    if api_compat.forward_compatible(2018, 10, 6):
+      metadata_kwargs["num_cores_per_replica"] = (
+          device_assignment.num_cores_per_replica)
+    else:
+      metadata_kwargs["computation_shape"] = (
+          device_assignment.computation_shape.tolist())
 
   if ((not isinstance(inputs, list)) or
       any(not isinstance(inp, (list, tuple)) for inp in inputs)):
-- 
GitLab


From aa2094fc9dc6e67d6e440231828de05a6da3cf78 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 16 Sep 2018 02:11:30 -0700
Subject: [PATCH 0237/1357] compat: Update forward compatibility horizon to
 2018-09-16

PiperOrigin-RevId: 213161736
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index db850509ad..c246a98237 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 15)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 16)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 92c31bb620b0f8dd6590380dc6a5674f591ce1cb Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Sun, 16 Sep 2018 12:01:52 -0700
Subject: [PATCH 0238/1357] Introduce gmock matchers for TensorFlow nodes

I need these to write readable unit tests for TF graph transformations.  All of
my use cases will live inside tensorflow/compiler so putting it in
tensorflow/compiler/jit for now; but we can move these out if other users are
interested.

In the future we may want to auto-generate type safe versions of these from the
op registrations like we generate C++ wrappers today.

PiperOrigin-RevId: 213186810
---
 tensorflow/compiler/jit/BUILD                 |  29 ++
 tensorflow/compiler/jit/node_matchers.cc      | 458 ++++++++++++++++++
 tensorflow/compiler/jit/node_matchers.h       | 197 ++++++++
 tensorflow/compiler/jit/node_matchers_test.cc | 179 +++++++
 4 files changed, 863 insertions(+)
 create mode 100644 tensorflow/compiler/jit/node_matchers.cc
 create mode 100644 tensorflow/compiler/jit/node_matchers.h
 create mode 100644 tensorflow/compiler/jit/node_matchers_test.cc

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index f4e1bc5e83..1001c57f3d 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -599,6 +599,35 @@ tf_cuda_cc_test(
     ],
 )
 
+cc_library(
+    name = "node_matchers",
+    testonly = True,
+    srcs = ["node_matchers.cc"],
+    hdrs = ["node_matchers.h"],
+    deps = [
+        "//tensorflow/cc:ops",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/core:graph",
+        "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_test(
+    name = "node_matchers_test",
+    srcs = ["node_matchers_test.cc"],
+    deps = [
+        ":node_matchers",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:ops",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 # This target can be used by XLA device plugins to prevent circular dependencies, and provides access to all of the required headers for building a device library.
 cc_header_only_library(
     name = "xla_jit_headers_lib",
diff --git a/tensorflow/compiler/jit/node_matchers.cc b/tensorflow/compiler/jit/node_matchers.cc
new file mode 100644
index 0000000000..d8ace628e6
--- /dev/null
+++ b/tensorflow/compiler/jit/node_matchers.cc
@@ -0,0 +1,458 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/node_matchers.h"
+
+#include <utility>
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "tensorflow/core/framework/tensor.pb.h"
+
+namespace tensorflow {
+namespace testing {
+namespace matchers {
+namespace {
+
+using impl::NodeMatcherProperties;
+
+string IndentAllButFirstLine(absl::string_view text) {
+  std::vector<std::string> lines = absl::StrSplit(text, '\n');
+  for (int i = 1; i < lines.size(); i++) {
+    lines[i].insert(0, "  ");
+  }
+  return absl::StrJoin(lines, "\n");
+}
+
+template <typename T>
+bool CompareTensor(const Tensor& actual, const Tensor& expected,
+                   ::testing::MatchResultListener* listener) {
+  if (actual.NumElements() != expected.NumElements()) {
+    if (listener->IsInterested()) {
+      *listener << "\nwas looking for tensor with " << expected.NumElements()
+                << " elements, found tensor with " << actual.NumElements()
+                << " elements";
+      return false;
+    }
+  }
+
+  for (int64 i = 0, e = actual.NumElements(); i < e; i++) {
+    if (actual.flat<T>()(i) != expected.flat<T>()(i)) {
+      *listener << "\nmismatch in constant tensor at index " << i
+                << " expected = " << expected.flat<T>()(i)
+                << " actual = " << actual.flat<T>()(i);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool MatchAndExplainTensor(const Tensor& tensor, const Tensor& expected_tensor,
+                           ::testing::MatchResultListener* listener) {
+  if (tensor.dtype() != expected_tensor.dtype()) {
+    if (listener->IsInterested()) {
+      *listener << "\nexpected tensor of type "
+                << DataType_Name(expected_tensor.dtype())
+                << " but found one of type " << DataType_Name(tensor.dtype());
+      return false;
+    }
+  }
+
+  switch (tensor.dtype()) {
+    case DT_FLOAT:
+      return CompareTensor<float>(tensor, expected_tensor, listener);
+    case DT_DOUBLE:
+      return CompareTensor<double>(tensor, expected_tensor, listener);
+    case DT_INT8:
+      return CompareTensor<int8>(tensor, expected_tensor, listener);
+    case DT_INT16:
+      return CompareTensor<int16>(tensor, expected_tensor, listener);
+    case DT_INT32:
+      return CompareTensor<int32>(tensor, expected_tensor, listener);
+    case DT_INT64:
+      return CompareTensor<int64>(tensor, expected_tensor, listener);
+    case DT_UINT8:
+      return CompareTensor<uint8>(tensor, expected_tensor, listener);
+    case DT_UINT16:
+      return CompareTensor<uint16>(tensor, expected_tensor, listener);
+    case DT_UINT32:
+      return CompareTensor<uint32>(tensor, expected_tensor, listener);
+    case DT_UINT64:
+      return CompareTensor<uint64>(tensor, expected_tensor, listener);
+    default:
+      LOG(FATAL) << "Unsupported dtype "  // Crash ok: testonly.
+                 << DataType_Name(tensor.dtype());
+  }
+}
+
+using Input = std::pair<const Node*, int>;
+
+struct NodeMatcher : public ::testing::MatcherInterface<const Node*> {
+  bool MatchAndExplain(
+      const Node* node,
+      ::testing::MatchResultListener* listener) const override {
+    if (op && node->type_string() != *op) {
+      if (listener->IsInterested()) {
+        *listener << "\nexpected op " << *op << " but found "
+                  << node->type_string();
+      }
+      return false;
+    }
+
+    if (assigned_device && node->assigned_device_name() != *assigned_device) {
+      if (listener->IsInterested()) {
+        *listener << "\nexpected assigned_device " << *assigned_device
+                  << " but found \"" << node->assigned_device_name() << "\"";
+      }
+      return false;
+    }
+
+    if (name && node->name() != *name) {
+      if (listener->IsInterested()) {
+        *listener << "\nexpected name " << *name << " but found "
+                  << node->name();
+      }
+      return false;
+    }
+
+    if (constant_value) {
+      const TensorProto* proto = nullptr;
+      if (!GetNodeAttr(node->def(), "value", &proto).ok()) {
+        if (listener->IsInterested()) {
+          *listener << "\ncould not find \"value\" attribute in node";
+        }
+        return false;
+      }
+
+      Tensor tensor(proto->dtype());
+      if (!tensor.FromProto(*proto)) {
+        if (listener->IsInterested()) {
+          *listener << "\ncould not convert TensorProto in \"value\" attribute "
+                       "to Tensor";
+        }
+        return false;
+      }
+
+      if (!MatchAndExplainTensor(/*tensor=*/tensor,
+                                 /*expected_tensor=*/*constant_value,
+                                 listener)) {
+        return false;
+      }
+    }
+
+    if (input_matchers) {
+      if (input_matchers->size() != node->num_inputs()) {
+        if (listener->IsInterested()) {
+          *listener << "\nexpected " << input_matchers->size()
+                    << " inputs but node has " << node->num_inputs();
+        }
+        return false;
+      }
+
+      for (int input_idx = 0, e = input_matchers->size(); input_idx < e;
+           input_idx++) {
+        if (!MatchAndExplainInput(node, input_idx, listener)) {
+          return false;
+        }
+      }
+    }
+
+    std::vector<const Node*> control_deps;
+    for (const Edge* e : node->in_edges()) {
+      if (e->IsControlEdge()) {
+        control_deps.push_back(e->src());
+      }
+    }
+
+    ::testing::StringMatchResultListener inner_listener;
+    if (control_dep_set &&
+        !control_dep_set->MatchAndExplain(control_deps, &inner_listener)) {
+      if (listener->IsInterested()) {
+        string explanation = inner_listener.str();
+        if (!explanation.empty()) {
+          explanation = absl::StrCat(", ", explanation, ",");
+        }
+        *listener << "ctrl_deps" << explanation << " does not match expected: ";
+        control_dep_set->DescribeTo(listener->stream());
+      }
+      return false;
+    }
+    return true;
+  }
+
+  void DescribeTo(::std::ostream* os) const override {
+    std::vector<string> predicates;
+
+    if (name) {
+      predicates.push_back(absl::StrCat("name: ", *name));
+    }
+
+    if (op) {
+      predicates.push_back(absl::StrCat("op: ", *op));
+    }
+
+    if (assigned_device) {
+      predicates.push_back(absl::StrCat("assigned device: ", *assigned_device));
+    }
+
+    bool printed_something = !predicates.empty();
+
+    *os << absl::StrJoin(predicates, ", ");
+
+    if (constant_value) {
+      printed_something = true;
+      *os << "constant value: " << constant_value->DebugString();
+    }
+
+    if (input_matchers) {
+      if (!input_matchers->empty()) {
+        printed_something = true;
+        *os << " with " << (input_matchers->size() == 1 ? "only " : "")
+            << "input" << (input_matchers->size() == 1 ? "" : "s") << " ";
+      }
+
+      if (input_matchers->size() == 1) {
+        ::std::stringstream ss;
+        input_matchers->front().DescribeTo(&ss);
+        printed_something = true;
+        *os << "matching " << ss.str();
+      } else {
+        int edge_idx = 0;
+        for (const ::testing::Matcher<Input>& matcher : (*input_matchers)) {
+          *os << "\n  [" << edge_idx << "] matching (";
+          ::std::stringstream ss;
+          matcher.DescribeTo(&ss);
+          printed_something = true;
+          *os << IndentAllButFirstLine(ss.str());
+          *os << ")";
+          edge_idx++;
+        }
+      }
+    }
+
+    if (control_dep_set) {
+      printed_something = true;
+      *os << " and control deps ";
+      control_dep_set->DescribeTo(os);
+    }
+
+    if (!printed_something) {
+      *os << "is any node";
+    }
+  }
+
+  bool MatchAndExplainInput(const Node* node, int input_idx,
+                            ::testing::MatchResultListener* listener) const {
+    const Edge* edge;
+    if (!node->input_edge(input_idx, &edge).ok()) {
+      if (listener->IsInterested()) {
+        *listener << "\ncould not find incoming edge for input " << input_idx;
+      }
+      return false;
+    }
+
+    ::testing::StringMatchResultListener inner_listener;
+    Input input = {edge->src(), edge->src_output()};
+    if ((*input_matchers)[input_idx].MatchAndExplain(input, &inner_listener)) {
+      return true;
+    }
+
+    if (listener->IsInterested()) {
+      *listener << "\ninput " << input_idx << " does not match expected:\n";
+      (*input_matchers)[input_idx].DescribeTo(listener->stream());
+      string explanation = inner_listener.str();
+      if (!explanation.empty()) {
+        *listener << ", " << explanation;
+      }
+    }
+    return false;
+  }
+
+  absl::optional<string> op;
+  absl::optional<string> name;
+  absl::optional<string> assigned_device;
+  absl::optional<Tensor> constant_value;
+  absl::optional<std::vector<::testing::Matcher<Input>>> input_matchers;
+  absl::optional<::testing::Matcher<absl::Span<const Node* const>>>
+      control_dep_set;
+};
+
+// Matches a dst and dst_output on an input edge.  Today we only use this with
+// dst_output=0 but we will eventually need to support multi-output operations.
+class InputMatcher : public ::testing::MatcherInterface<Input> {
+ public:
+  InputMatcher(::testing::Matcher<const Node*> src_matcher, int src_output)
+      : src_matcher_(std::move(src_matcher)), src_output_(src_output) {}
+
+  bool MatchAndExplain(
+      Input input, ::testing::MatchResultListener* listener) const override {
+    ::testing::StringMatchResultListener inner_listener;
+    if (!src_matcher_.MatchAndExplain(input.first, &inner_listener)) {
+      if (listener->IsInterested()) {
+        *listener << "\nsource does not match expected ";
+        src_matcher_.DescribeTo(listener->stream());
+        string explanation = inner_listener.str();
+        if (!explanation.empty()) {
+          *listener << "\n\t" << explanation;
+        }
+      }
+      return false;
+    }
+    if (input.second != src_output_) {
+      if (listener->IsInterested()) {
+        *listener << "\nexpected output slot to be " << src_output_
+                  << " but found " << input.second;
+      }
+      return false;
+    }
+
+    return true;
+  }
+
+  void DescribeTo(::std::ostream* os) const override {
+    if (src_output_) {
+      *os << "output slot: " << src_output_ << ", source: (";
+    }
+
+    src_matcher_.DescribeTo(os);
+
+    if (src_output_) {
+      *os << ")";
+    }
+  }
+
+ private:
+  ::testing::Matcher<const Node*> src_matcher_;
+  int src_output_;
+};
+
+std::vector<::testing::Matcher<Input>> NodeMatchersToInputMatchers(
+    absl::Span<const ::testing::Matcher<const Node*>> node_matchers) {
+  std::vector<::testing::Matcher<Input>> result;
+  absl::c_transform(node_matchers, std::back_inserter(result),
+                    [](::testing::Matcher<const Node*> n) {
+                      return ::testing::MakeMatcher(new InputMatcher(n, 0));
+                    });
+  return result;
+}
+}  // namespace
+
+::testing::Matcher<const Node*> impl::NodeWith(
+    absl::Span<const NodeMatcherProperties> props) {
+  NodeMatcher* matcher = new NodeMatcher();
+  for (const NodeMatcherProperties& prop : props) {
+    if (prop.name()) {
+      DCHECK(!matcher->name);
+      matcher->name = prop.name();
+    }
+
+    if (prop.op()) {
+      DCHECK(!matcher->op);
+      matcher->op = prop.op();
+    }
+
+    if (prop.constant_value()) {
+      DCHECK(!matcher->constant_value);
+      matcher->constant_value = prop.constant_value();
+    }
+
+    if (prop.assigned_device()) {
+      DCHECK(!matcher->assigned_device);
+      matcher->assigned_device = prop.assigned_device();
+    }
+
+    if (prop.input_nodes()) {
+      DCHECK(!matcher->input_matchers);
+      matcher->input_matchers =
+          NodeMatchersToInputMatchers(*prop.input_nodes());
+    }
+
+    if (prop.control_deps()) {
+      DCHECK(!matcher->control_dep_set);
+      matcher->control_dep_set =
+          ::testing::UnorderedElementsAreArray(*prop.control_deps());
+    }
+  }
+
+  return ::testing::MakeMatcher(matcher);
+}
+
+impl::NodeMatcherProperties Name(string name) {
+  impl::NodeMatcherProperties props;
+  props.set_name(std::move(name));
+  return props;
+}
+
+// Matches a node with op `op`.
+impl::NodeMatcherProperties Op(string op) {
+  impl::NodeMatcherProperties props;
+  props.set_op(std::move(op));
+  return props;
+}
+
+// Matches a node with assigned device `assigned_device`.
+impl::NodeMatcherProperties AssignedDevice(string assigned_device) {
+  impl::NodeMatcherProperties props;
+  props.set_assigned_device(std::move(assigned_device));
+  return props;
+}
+
+impl::NodeMatcherProperties impl::Inputs(
+    absl::Span<const ::testing::Matcher<const Node*>> inputs) {
+  std::vector<::testing::Matcher<const Node*>> inputs_vector;
+  absl::c_copy(inputs, std::back_inserter(inputs_vector));
+
+  impl::NodeMatcherProperties props;
+  props.set_input_nodes(std::move(inputs_vector));
+  return props;
+}
+
+impl::NodeMatcherProperties impl::CtrlDeps(
+    absl::Span<const ::testing::Matcher<const Node*>> control_deps) {
+  std::vector<::testing::Matcher<const Node*>> control_deps_vector;
+  absl::c_copy(control_deps, std::back_inserter(control_deps_vector));
+
+  impl::NodeMatcherProperties props;
+  props.set_control_deps(std::move(control_deps_vector));
+  return props;
+}
+
+NodeMatcherProperties ConstantValue(
+    const ::tensorflow::Input::Initializer& val) {
+  TF_CHECK_OK(val.status);
+  NodeMatcherProperties props;
+  props.set_constant_value(val.tensor);
+  return props;
+}
+
+::testing::Matcher<const Node*> Const(
+    const ::tensorflow::Input::Initializer& val) {
+  return NodeWith(ConstantValue(val));
+}
+}  // namespace matchers
+
+Node* FindNodeByName(Graph* g, absl::string_view name) {
+  for (Node* n : g->nodes()) {
+    if (n->name() == name) {
+      return n;
+    }
+  }
+
+  return nullptr;
+}
+}  // namespace testing
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/node_matchers.h b/tensorflow/compiler/jit/node_matchers.h
new file mode 100644
index 0000000000..0437a7e95c
--- /dev/null
+++ b/tensorflow/compiler/jit/node_matchers.h
@@ -0,0 +1,197 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Provides a set of matchers for tensorflow nodes.
+//
+// Example usage:
+//
+//  tensorflow::Node* node = ...;
+//  EXPECT_THAT(node, NodeWith(Name("name"), Op("op"),
+//                             Inputs(NodeWith(Name("input")))))
+//
+// Matchable node properties (the expressions that go inside NodeWith(...))
+// are:
+//
+//  - Name(string): matches the node name exactly.  We will probably need to
+//    have this take a string matcher soon in the future.
+//
+//  - Op(string): matches the op exactly.
+//
+//  - AssignedDevice(string): matches the assigned device exactly.
+//
+//  - Inputs(<ordered list>): matches the list of non-control inputs to the node
+//    exactly (i.e. does not match a suffix or a prefix).
+//
+//  - CtrlDeps(<unordered list>): matches the list of control dependences on the
+//    node exactly but in any order.
+//
+//  - ConstantValue(tensorflow::Input::Initializer init): matches a Const node
+//    with the constant value `init`.  Implies Op("Const").
+//
+// Node properties may not be repeated in a single NodeWith(...)  matcher.
+// E.g. NodeWith(Op("Foo"), Op("Bar")) will CHECK-fail.  Since ConstantValue
+// implies Op("Const"), a single NodeWith matcher can't have both
+// ConstantValue(...) and Op(...).
+
+#ifndef TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_
+#define TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_
+
+#include <array>
+#include <string>
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "absl/types/span.h"
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/core/graph/graph.h"
+
+namespace tensorflow {
+namespace testing {
+namespace matchers {
+
+namespace impl {
+
+// -----------------------------------------------------------------------------
+// Implementation details.
+
+// Properties that we match on for a particular Node.  If a particular property
+// is nullopt then any value for it is allowed.
+class NodeMatcherProperties {
+ public:
+  using NodeSeqMatcher = std::vector<::testing::Matcher<const Node*>>;
+
+  const absl::optional<string>& name() const { return name_; }
+  const absl::optional<string>& op() const { return op_; }
+  const absl::optional<string>& assigned_device() const {
+    return assigned_device_;
+  }
+  const absl::optional<Tensor>& constant_value() const {
+    return constant_value_;
+  }
+  const absl::optional<NodeSeqMatcher>& input_nodes() const {
+    return input_nodes_;
+  }
+  const absl::optional<NodeSeqMatcher>& control_deps() const {
+    return control_deps_;
+  }
+
+  void set_name(string name) {
+    DCHECK(IsEmpty());
+    name_ = std::move(name);
+  }
+
+  void set_op(string op) {
+    DCHECK(IsEmpty());
+    op_ = std::move(op);
+  }
+
+  void set_assigned_device(string assigned_device) {
+    DCHECK(IsEmpty());
+    assigned_device_ = std::move(assigned_device);
+  }
+
+  void set_constant_value(Tensor constant_value) {
+    DCHECK(IsEmpty());
+    constant_value_ = std::move(constant_value);
+    op_ = "Const";
+  }
+
+  void set_input_nodes(NodeSeqMatcher input_nodes) {
+    DCHECK(IsEmpty());
+    input_nodes_ = std::move(input_nodes);
+  }
+
+  void set_control_deps(NodeSeqMatcher control_deps) {
+    DCHECK(IsEmpty());
+    control_deps_ = std::move(control_deps);
+  }
+
+  bool IsEmpty() const {
+    return !name().has_value() && !op().has_value() &&
+           !input_nodes().has_value() && !control_deps().has_value();
+  }
+
+ private:
+  absl::optional<string> name_;
+  absl::optional<string> op_;
+  absl::optional<string> assigned_device_;
+  absl::optional<Tensor> constant_value_;
+  absl::optional<NodeSeqMatcher> input_nodes_;
+  absl::optional<NodeSeqMatcher> control_deps_;
+};
+
+::testing::Matcher<const Node*> NodeWith(
+    absl::Span<const NodeMatcherProperties> props);
+
+impl::NodeMatcherProperties Inputs(
+    absl::Span<const ::testing::Matcher<const Node*>> inputs);
+
+impl::NodeMatcherProperties CtrlDeps(
+    absl::Span<const ::testing::Matcher<const Node*>> control_deps);
+}  // namespace impl
+
+// -----------------------------------------------------------------------------
+// Public interface.
+
+// Matches a node with name `name`.
+impl::NodeMatcherProperties Name(string name);
+
+// Matches a node with op `op`.
+impl::NodeMatcherProperties Op(string op);
+
+// Matches a node with assigned device `assigned_device`.
+impl::NodeMatcherProperties AssignedDevice(string assigned_device);
+
+// Matches a node with inputs `inputs`.
+//
+// `inputs` are ordered; `inputs`[i] must match input i.
+template <typename... Ts>
+impl::NodeMatcherProperties Inputs(Ts... inputs) {
+  return impl::Inputs({inputs...});
+}
+
+// Matches a node with control dependences `control_deps`.
+//
+// `control_deps` are unordered and will match the control deps of a node in any
+// order.
+template <typename... Ts>
+impl::NodeMatcherProperties CtrlDeps(Ts... control_deps) {
+  return impl::CtrlDeps({control_deps...});
+}
+
+// Matches a constant node with value `val`.
+impl::NodeMatcherProperties ConstantValue(
+    const ::tensorflow::Input::Initializer& val);
+
+// The main gmock matcher.  See file comment for example usage.
+template <typename... Ts>
+::testing::Matcher<const Node*> NodeWith(Ts... args) {
+  std::array<impl::NodeMatcherProperties, sizeof...(Ts)> array = {args...};
+  return impl::NodeWith(array);
+}
+
+::testing::Matcher<const Node*> Const(
+    const ::tensorflow::Input::Initializer& val);
+}  // namespace matchers
+
+// If `g` has a node named `name` returns it, otherwise returns null.
+Node* FindNodeByName(Graph* g, absl::string_view name);
+}  // namespace testing
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_
diff --git a/tensorflow/compiler/jit/node_matchers_test.cc b/tensorflow/compiler/jit/node_matchers_test.cc
new file mode 100644
index 0000000000..93a8994307
--- /dev/null
+++ b/tensorflow/compiler/jit/node_matchers_test.cc
@@ -0,0 +1,179 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/node_matchers.h"
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/math_ops.h"
+
+namespace tensorflow {
+namespace testing {
+namespace {
+
+using ::testing::_;
+
+using testing::matchers::AssignedDevice;
+using testing::matchers::ConstantValue;
+using testing::matchers::CtrlDeps;
+using testing::matchers::Inputs;
+using testing::matchers::Name;
+using testing::matchers::NodeWith;
+using testing::matchers::Op;
+
+template <typename M, typename T>
+string Explain(const T& t, const M& m) {
+  ::testing::StringMatchResultListener listener;
+  EXPECT_THAT(t, ::testing::Not(m));  // For the error message.
+  EXPECT_FALSE(m.MatchAndExplain(t, &listener));
+  return listener.str();
+}
+
+TEST(NodeMatchers, CheckAgainstConstant) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+  Output placeholder =
+      ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT);
+
+  EXPECT_THAT(placeholder.node(), NodeWith(Op("Placeholder")));
+  EXPECT_THAT(placeholder.node(), NodeWith(Name("placeholder")));
+  EXPECT_THAT(placeholder.node(),
+              NodeWith(Op("Placeholder"), Name("placeholder")));
+  EXPECT_THAT(placeholder.node(),
+              NodeWith(Name("placeholder"), Op("Placeholder")));
+  EXPECT_THAT(placeholder.node(), NodeWith(Inputs()));
+  EXPECT_THAT(placeholder.node(),
+              NodeWith(Op("Placeholder"), Name("placeholder"), Inputs()));
+
+  EXPECT_EQ(Explain(placeholder.node(), NodeWith(Op("Add"))),
+            "\nexpected op Add but found Placeholder");
+  EXPECT_EQ(Explain(placeholder.node(), NodeWith(Name("add"))),
+            "\nexpected name add but found placeholder");
+  EXPECT_EQ(Explain(placeholder.node(), NodeWith(Inputs(NodeWith()))),
+            "\nexpected 1 inputs but node has 0");
+}
+
+TEST(NodeMatchers, CheckAgainstBinary) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  Output placeholder_a =
+      ops::Placeholder(root.WithOpName("placeholder_a"), DT_FLOAT);
+  Output placeholder_b =
+      ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT);
+  Output add = ops::Add(root.WithOpName("add"), placeholder_a, placeholder_b);
+
+  EXPECT_THAT(add.node(), NodeWith(Op("Add"), Name("add"),
+                                   Inputs(NodeWith(Name("placeholder_a")),
+                                          NodeWith(Name("placeholder_b")))));
+
+  EXPECT_EQ(Explain(add.node(), NodeWith(Inputs())),
+            "\nexpected 0 inputs but node has 2");
+  EXPECT_EQ(
+      Explain(add.node(), NodeWith(Inputs(NodeWith(Name("blah")), _))),
+      "\ninput 0 does not match expected:\nname: blah, \nsource does not match "
+      "expected name: blah\n\t\nexpected name blah but found placeholder_a");
+  EXPECT_EQ(
+      Explain(add.node(), NodeWith(Inputs(_, NodeWith(Name("blah"))))),
+      "\ninput 1 does not match expected:\nname: blah, \nsource does not match "
+      "expected name: blah\n\t\nexpected name blah but found placeholder_b");
+}
+
+TEST(NodeMatchers, CheckControlDependence) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  Output placeholder_a =
+      ops::Placeholder(root.WithOpName("placeholder_a"), DT_FLOAT);
+  Output placeholder_b =
+      ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT);
+  Output placeholder_c =
+      ops::Placeholder(root.WithOpName("placeholder_c"), DT_FLOAT);
+  Output placeholder_d =
+      ops::Placeholder(root.WithOpName("placeholder_d"), DT_FLOAT);
+
+  root.graph()->AddControlEdge(placeholder_a.node(), placeholder_c.node());
+  root.graph()->AddControlEdge(placeholder_b.node(), placeholder_c.node());
+
+  EXPECT_THAT(placeholder_c.node(),
+              NodeWith(Name("placeholder_c"),
+                       CtrlDeps(NodeWith(Name("placeholder_a")),
+                                NodeWith(Name("placeholder_b")))));
+  EXPECT_THAT(placeholder_d.node(),
+              NodeWith(Name("placeholder_d"), CtrlDeps()));
+
+  EXPECT_EQ(
+      Explain(placeholder_c.node(), NodeWith(CtrlDeps())),
+      "ctrl_deps, which has 2 elements, does not match expected: is empty");
+  EXPECT_EQ(Explain(placeholder_d.node(), NodeWith(CtrlDeps(NodeWith()))),
+            "ctrl_deps does not match expected: has 1 element and that element "
+            "is any node");
+}
+
+TEST(NodeMatchers, ConstVaulue) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+  Output placeholder =
+      ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT);
+  Output const_0d = ops::Const(root.WithOpName("const_0d"), 42);
+
+  Output const_2d = ops::Const(root.WithOpName("const_2d"), {{1, 2}, {4, 3}});
+
+  EXPECT_THAT(const_0d.node(), NodeWith(ConstantValue(42)));
+  EXPECT_THAT(const_0d.node(), NodeWith(ConstantValue(42), Name("const_0d")));
+
+  EXPECT_THAT(const_2d.node(), NodeWith(ConstantValue({{1, 2}, {4, 3}})));
+
+  EXPECT_EQ(Explain(placeholder.node(), NodeWith(ConstantValue(42))),
+            "\nexpected op Const but found Placeholder");
+  EXPECT_EQ(
+      Explain(const_0d.node(), NodeWith(ConstantValue(43))),
+      "\nmismatch in constant tensor at index 0 expected = 43 actual = 42");
+  EXPECT_EQ(
+      Explain(const_0d.node(), NodeWith(ConstantValue({{1, 2}, {4, 3}}))),
+      "\nwas looking for tensor with 4 elements, found tensor with 1 elements");
+  EXPECT_EQ(
+      Explain(const_2d.node(), NodeWith(ConstantValue(42))),
+      "\nwas looking for tensor with 1 elements, found tensor with 4 elements");
+}
+
+TEST(NodeMatchers, AssignedDevice) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  Output placeholder_a =
+      ops::Placeholder(root.WithOpName("placeholder_a"), DT_FLOAT);
+  Output placeholder_b =
+      ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT);
+
+  Output assigned_add =
+      ops::Add(root.WithOpName("assigned_add"), placeholder_a, placeholder_b);
+  assigned_add.node()->set_assigned_device_name(
+      "/job:localhost/replica:0/task:0/device:CPU:0");
+
+  Output unassigned_add =
+      ops::Add(root.WithOpName("unassigned_add"), placeholder_a, placeholder_b);
+
+  EXPECT_THAT(
+      assigned_add.node(),
+      NodeWith(AssignedDevice("/job:localhost/replica:0/task:0/device:CPU:0")));
+  EXPECT_THAT(unassigned_add.node(), NodeWith(AssignedDevice("")));
+
+  EXPECT_EQ(Explain(unassigned_add.node(),
+                    NodeWith(AssignedDevice(
+                        "/job:localhost/replica:0/task:0/device:CPU:0"))),
+            "\nexpected assigned_device "
+            "/job:localhost/replica:0/task:0/device:CPU:0 but found \"\"");
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tensorflow
-- 
GitLab


From a6ee64cd216b3ac440262e1f4ec7872fe7026df6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 16 Sep 2018 13:38:24 -0700
Subject: [PATCH 0239/1357] Conditionally allow changing a non-fusion
 computation root_instruction shape.

PiperOrigin-RevId: 213191899
---
 tensorflow/compiler/xla/service/hlo_computation.cc | 6 +++---
 tensorflow/compiler/xla/service/hlo_computation.h  | 8 +++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 8c6903d766..601a008d9f 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -279,11 +279,11 @@ Status HloComputation::RemoveInstruction(HloInstruction* instruction) {
   return Status::OK();
 }
 
-void HloComputation::set_root_instruction(
-    HloInstruction* new_root_instruction) {
+void HloComputation::set_root_instruction(HloInstruction* new_root_instruction,
+                                          bool accept_different_shape) {
   // The shape of the root (ignoring layout) is an invariant of the computation
   // for non-fusion cases.
-  if (!IsFusionComputation()) {
+  if (!IsFusionComputation() && !accept_different_shape) {
     CHECK(ShapeUtil::Compatible(new_root_instruction->shape(),
                                 root_instruction_->shape()))
         << new_root_instruction->shape() << " is incompatible with "
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 91c5234a6f..a880e9ab30 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -134,9 +134,11 @@ class HloComputation {
   Status RemoveInstructionAndUnusedOperands(HloInstruction* instruction);
 
   // Set the root of the computation to the given instruction. The instruction
-  // must have already been added to the computation and have the same shape as
-  // the result of the computation for non fusion computations.
-  void set_root_instruction(HloInstruction* new_root_instruction);
+  // must have already been added to the computation. In addition it must have
+  // the same shape as the result of the computation for non fusion
+  // computations, except if accept_different_shape is set to true.
+  void set_root_instruction(HloInstruction* new_root_instruction,
+                            bool accept_different_shape = false);
 
   // Return the root instruction of the computation. The root instruction is the
   // instruction which produces the output of the computation.
-- 
GitLab


From 79458017805905a7840ec15039d08ac010ecb9d3 Mon Sep 17 00:00:00 2001
From: Jenny Sahng <jennyesahng@gmail.com>
Date: Mon, 17 Sep 2018 10:21:11 +1200
Subject: [PATCH 0240/1357] Update broken link to intro on ADAGRAD

---
 tensorflow/contrib/optimizer_v2/adagrad.py | 2 +-
 tensorflow/python/training/adagrad.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/optimizer_v2/adagrad.py b/tensorflow/contrib/optimizer_v2/adagrad.py
index c333d1e089..d17d6772e2 100644
--- a/tensorflow/contrib/optimizer_v2/adagrad.py
+++ b/tensorflow/contrib/optimizer_v2/adagrad.py
@@ -31,7 +31,7 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2):
 
   See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
   or this
-  [intro](http://cs.stanford.edu/~ppasupat/a9online/uploads/proximal_notes.pdf).
+  [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
   """
 
   def __init__(self, learning_rate, initial_accumulator_value=0.1,
diff --git a/tensorflow/python/training/adagrad.py b/tensorflow/python/training/adagrad.py
index 3508b98475..cc0da26b27 100644
--- a/tensorflow/python/training/adagrad.py
+++ b/tensorflow/python/training/adagrad.py
@@ -34,7 +34,7 @@ class AdagradOptimizer(optimizer.Optimizer):
 
   See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
   or this
-  [intro](http://cs.stanford.edu/~ppasupat/a9online/uploads/proximal_notes.pdf).
+  [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
   """
 
   def __init__(self, learning_rate, initial_accumulator_value=0.1,
-- 
GitLab


From 297fafbe9464372e1641c0f376f47569a23aeffa Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 17 Sep 2018 00:41:07 +0000
Subject: [PATCH 0241/1357] Support gradient_multipliers as tensor for
 optimize_loss

This fix tries to address the issue raised in 22295 where
gradient_multipliers for tf.contrib.layers.optimize_loss()
does not support tensor as input. This fix update the
optimize_loss to allow gradient_multipliers passed as dict
of tensors.

This fix fixes 22295.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/layers/python/layers/optimizers.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index 69d927e1b3..2ac58597c2 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -433,8 +433,7 @@ def _multiply_gradients(grads_and_vars, gradient_multipliers):
     if (grad is not None and
         (var in gradient_multipliers or var.name in gradient_multipliers)):
       key = var if var in gradient_multipliers else var.name
-      multiplier = constant_op.constant(
-          gradient_multipliers[key], dtype=dtypes.float32)
+      multiplier = gradient_multipliers[key]
       if isinstance(grad, ops.IndexedSlices):
         grad_values = grad.values * multiplier
         grad = ops.IndexedSlices(grad_values, grad.indices, grad.dense_shape)
-- 
GitLab


From 921186571f792562fa234f7f0a7516b67e867930 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 17 Sep 2018 00:47:45 +0000
Subject: [PATCH 0242/1357] Add test cases to allow gradient_multipliers passed
 as tensor

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../layers/python/layers/optimizers_test.py    | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py
index 29dede2a49..6a7df23011 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py
@@ -250,6 +250,24 @@ class OptimizersTest(test.TestCase):
       self.assertAlmostEqual(var_value, 6.5, 4)
       self.assertEqual(global_step_value, 1)
 
+  def testGradientMultiplyTensor(self):
+    with self.cached_session() as session:
+      x, var, loss, global_step = _setup_model()
+      v = array_ops.placeholder(dtypes.float32, [])
+      train = optimizers_lib.optimize_loss(
+          loss,
+          global_step,
+          learning_rate=0.1,
+          optimizer="SGD",
+          gradient_multipliers={var: v})
+      variables.global_variables_initializer().run()
+      session.run(train, feed_dict={x: 5, v: 7.})
+      var_value, global_step_value = session.run([var, global_step])
+      # var(0) = 10, x = 5, var(0)/dx = 5,
+      # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx
+      self.assertAlmostEqual(var_value, 6.5, 4)
+      self.assertEqual(global_step_value, 1)
+
   def testIgnoreVariablesWithNoGradients(self):
     _, _, loss, global_step = _setup_model()
 
-- 
GitLab


From 8e6599d2d7b54fe8fba37ad1cc045b62bd7e50e5 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 17 Sep 2018 01:06:54 +0000
Subject: [PATCH 0243/1357] Allow different dtype of Tensor

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/layers/python/layers/optimizers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index 2ac58597c2..d92de3b58c 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -438,6 +438,6 @@ def _multiply_gradients(grads_and_vars, gradient_multipliers):
         grad_values = grad.values * multiplier
         grad = ops.IndexedSlices(grad_values, grad.indices, grad.dense_shape)
       else:
-        grad *= multiplier
+        grad *= math_ops.cast(multiplier, grad.dtype)
     multiplied_grads_and_vars.append((grad, var))
   return multiplied_grads_and_vars
-- 
GitLab


From 7d8316fb85b21546e3df2aef701f1cfa9f92b6ba Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 17 Sep 2018 01:07:16 +0000
Subject: [PATCH 0244/1357] Add additional test cases

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../layers/python/layers/optimizers_test.py   | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py
index 6a7df23011..b4d1239e76 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py
@@ -250,7 +250,7 @@ class OptimizersTest(test.TestCase):
       self.assertAlmostEqual(var_value, 6.5, 4)
       self.assertEqual(global_step_value, 1)
 
-  def testGradientMultiplyTensor(self):
+  def testGradientMultiplyInt32Tensor(self):
     with self.cached_session() as session:
       x, var, loss, global_step = _setup_model()
       v = array_ops.placeholder(dtypes.float32, [])
@@ -268,6 +268,24 @@ class OptimizersTest(test.TestCase):
       self.assertAlmostEqual(var_value, 6.5, 4)
       self.assertEqual(global_step_value, 1)
 
+  def testGradientMultiplyInt64Tensor(self):
+    with self.cached_session() as session:
+      x, var, loss, global_step = _setup_model()
+      v = array_ops.placeholder(dtypes.float64, [])
+      train = optimizers_lib.optimize_loss(
+          loss,
+          global_step,
+          learning_rate=0.1,
+          optimizer="SGD",
+          gradient_multipliers={var: v})
+      variables.global_variables_initializer().run()
+      session.run(train, feed_dict={x: 5, v: 7.})
+      var_value, global_step_value = session.run([var, global_step])
+      # var(0) = 10, x = 5, var(0)/dx = 5,
+      # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx
+      self.assertAlmostEqual(var_value, 6.5, 4)
+      self.assertEqual(global_step_value, 1)
+
   def testIgnoreVariablesWithNoGradients(self):
     _, _, loss, global_step = _setup_model()
 
-- 
GitLab


From 2501870be0df24ca0e191710b1de139e195616a3 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Sun, 16 Sep 2018 19:50:17 -0700
Subject: [PATCH 0245/1357] Fix some typos in the doc for XlaDynamicSlice

phawkins@ suggested these in cr/212715067 but I accidentally made the changes in
another client.

PiperOrigin-RevId: 213208811
---
 tensorflow/compiler/tf2xla/ops/xla_ops.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index 02363500ef..733eeed3c6 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -121,8 +121,8 @@ Wraps the XLA DynamicSlice operator, documented at
 DynamicSlice extracts a sub-array from the input array at dynamic
 start_indices. The size of the slice in each dimension is passed in
 size_indices, which specify the end point of exclusive slice intervals in each
-dimension -- [start, start + size). The shape of start_indices must be rank ==
-1, with dimension size equal to the rank of operand.
+dimension -- [start, start + size). The shape of start_indices must have rank 1,
+with dimension size equal to the rank of operand.
 
 input: A `Tensor` of type T.
 
@@ -131,7 +131,8 @@ start_indices: Rank 1 tensor of N integers containing the starting indices of
 
 start_indices: List of N integers containing the slice size for each
   dimension. Each value must be strictly greater than zero, and start + size
-  must be less
+  must be less than or equal to the size of the dimension to avoid
+  implementation defined behavior.
 )doc");
 
 REGISTER_OP("XlaDynamicUpdateSlice")
-- 
GitLab


From 791f48d3aaaa875c23de1484f7ef1d0656fbabca Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Sun, 16 Sep 2018 20:11:53 -0700
Subject: [PATCH 0246/1357] Improve TFLite iOS doc.

PiperOrigin-RevId: 213210253
---
 tensorflow/contrib/lite/g3doc/ios.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/g3doc/ios.md b/tensorflow/contrib/lite/g3doc/ios.md
index a83d2c8fec..3b9fcca811 100644
--- a/tensorflow/contrib/lite/g3doc/ios.md
+++ b/tensorflow/contrib/lite/g3doc/ios.md
@@ -1,5 +1,10 @@
 
-# TensorFlow Lite for iOS
+# Build TensorFlow Lite for iOS
+
+This document describes how to build TensorFlow Lite iOS library. If you just
+want to use it, the easiest way is using the TensorFlow Lite CocoaPod releases.
+See [TensorFlow Lite iOS Demo](demo_ios.md) for examples.
+
 
 ## Building
 
-- 
GitLab


From b6a8ade2ce2b42ed4bed67aee40da4c1705e01fe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 16 Sep 2018 20:44:35 -0700
Subject: [PATCH 0247/1357] Add ZerosLike to schema.

PiperOrigin-RevId: 213212445
---
 tensorflow/contrib/lite/builtin_ops.h         |   1 +
 .../lite/core/api/flatbuffer_conversions.cc   |   1 +
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   5 +
 .../contrib/lite/schema/schema_generated.h    | 124 +++++++++++++++++-
 5 files changed, 126 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h
index 5e97b777fc..7f33942c90 100644
--- a/tensorflow/contrib/lite/builtin_ops.h
+++ b/tensorflow/contrib/lite/builtin_ops.h
@@ -118,6 +118,7 @@ typedef enum {
   kTfLiteBuiltinFloorDiv = 90,
   kTfLiteBuiltinReduceAny = 91,
   kTfLiteBuiltinSquare = 92,
+  kTfLiteBuiltinZerosLike = 93,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index f4d2839b1b..ceb2bbd612 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -618,6 +618,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_LOGICAL_NOT:
     case BuiltinOperator_FLOOR_DIV:
     case BuiltinOperator_SQUARE:
+    case BuiltinOperator_ZEROS_LIKE:
       break;
   }
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index f814b90d66..3b6a81ffde 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -673,6 +673,7 @@ TfLiteStatus AddOpsAndParams(
       case tflite::BuiltinOperator_FLOOR_DIV:
       case tflite::BuiltinOperator_REDUCE_ANY:
       case tflite::BuiltinOperator_SQUARE:
+      case tflite::BuiltinOperator_ZEROS_LIKE:
         logError("Op code %d is currently not delegated to NNAPI", builtin);
         return kTfLiteError;
         break;
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index f0db22d581..4c339317cb 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -174,6 +174,7 @@ enum BuiltinOperator : byte {
   FLOOR_DIV = 90,
   REDUCE_ANY = 91,
   SQUARE = 92,
+  ZEROS_LIKE = 93,
 }
 
 // Options for the builtin operators.
@@ -244,6 +245,7 @@ union BuiltinOptions {
   UnpackOptions,
   FloorDivOptions,
   SquareOptions,
+  ZerosLikeOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -588,6 +590,9 @@ table FloorDivOptions {
 table SquareOptions {
 }
 
+table ZerosLikeOptions {
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 8c086a5e67..03c227f987 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -229,6 +229,9 @@ struct FloorDivOptionsT;
 struct SquareOptions;
 struct SquareOptionsT;
 
+struct ZerosLikeOptions;
+struct ZerosLikeOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -387,11 +390,12 @@ enum BuiltinOperator {
   BuiltinOperator_FLOOR_DIV = 90,
   BuiltinOperator_REDUCE_ANY = 91,
   BuiltinOperator_SQUARE = 92,
+  BuiltinOperator_ZEROS_LIKE = 93,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_SQUARE
+  BuiltinOperator_MAX = BuiltinOperator_ZEROS_LIKE
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[92] {
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[93] {
   static BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -484,7 +488,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[92] {
     BuiltinOperator_REDUCE_MIN,
     BuiltinOperator_FLOOR_DIV,
     BuiltinOperator_REDUCE_ANY,
-    BuiltinOperator_SQUARE
+    BuiltinOperator_SQUARE,
+    BuiltinOperator_ZEROS_LIKE
   };
   return values;
 }
@@ -584,6 +589,7 @@ inline const char **EnumNamesBuiltinOperator() {
     "FLOOR_DIV",
     "REDUCE_ANY",
     "SQUARE",
+    "ZEROS_LIKE",
     nullptr
   };
   return names;
@@ -662,11 +668,12 @@ enum BuiltinOptions {
   BuiltinOptions_UnpackOptions = 64,
   BuiltinOptions_FloorDivOptions = 65,
   BuiltinOptions_SquareOptions = 66,
+  BuiltinOptions_ZerosLikeOptions = 67,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_SquareOptions
+  BuiltinOptions_MAX = BuiltinOptions_ZerosLikeOptions
 };
 
-inline BuiltinOptions (&EnumValuesBuiltinOptions())[67] {
+inline BuiltinOptions (&EnumValuesBuiltinOptions())[68] {
   static BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -734,7 +741,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[67] {
     BuiltinOptions_LogicalNotOptions,
     BuiltinOptions_UnpackOptions,
     BuiltinOptions_FloorDivOptions,
-    BuiltinOptions_SquareOptions
+    BuiltinOptions_SquareOptions,
+    BuiltinOptions_ZerosLikeOptions
   };
   return values;
 }
@@ -808,6 +816,7 @@ inline const char **EnumNamesBuiltinOptions() {
     "UnpackOptions",
     "FloorDivOptions",
     "SquareOptions",
+    "ZerosLikeOptions",
     nullptr
   };
   return names;
@@ -1086,6 +1095,10 @@ template<> struct BuiltinOptionsTraits<SquareOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
 };
 
+template<> struct BuiltinOptionsTraits<ZerosLikeOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1645,6 +1658,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_SquareOptions ?
       reinterpret_cast<const SquareOptionsT *>(value) : nullptr;
   }
+  ZerosLikeOptionsT *AsZerosLikeOptions() {
+    return type == BuiltinOptions_ZerosLikeOptions ?
+      reinterpret_cast<ZerosLikeOptionsT *>(value) : nullptr;
+  }
+  const ZerosLikeOptionsT *AsZerosLikeOptions() const {
+    return type == BuiltinOptions_ZerosLikeOptions ?
+      reinterpret_cast<const ZerosLikeOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -5888,6 +5909,46 @@ inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(
 
 flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct ZerosLikeOptionsT : public flatbuffers::NativeTable {
+  typedef ZerosLikeOptions TableType;
+  ZerosLikeOptionsT() {
+  }
+};
+
+struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ZerosLikeOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ZerosLikeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ZerosLikeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ZerosLikeOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
+  flatbuffers::Offset<ZerosLikeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ZerosLikeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -6219,6 +6280,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const SquareOptions *builtin_options_as_SquareOptions() const {
     return builtin_options_type() == BuiltinOptions_SquareOptions ? static_cast<const SquareOptions *>(builtin_options()) : nullptr;
   }
+  const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const {
+    return builtin_options_type() == BuiltinOptions_ZerosLikeOptions ? static_cast<const ZerosLikeOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6514,6 +6578,10 @@ template<> inline const SquareOptions *Operator::builtin_options_as<SquareOption
   return builtin_options_as_SquareOptions();
 }
 
+template<> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const {
+  return builtin_options_as_ZerosLikeOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -8782,6 +8850,29 @@ inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatB
       _fbb);
 }
 
+inline ZerosLikeOptionsT *ZerosLikeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ZerosLikeOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void ZerosLikeOptions::UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ZerosLikeOptions> ZerosLikeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateZerosLikeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ZerosLikeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateZerosLikeOptions(
+      _fbb);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -9235,6 +9326,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const SquareOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -9517,6 +9612,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const SquareOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -9787,6 +9886,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const SquareOptionsT *>(value);
       return CreateSquareOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<const ZerosLikeOptionsT *>(value);
+      return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10057,6 +10160,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new SquareOptionsT(*reinterpret_cast<SquareOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_ZerosLikeOptions: {
+      value = new ZerosLikeOptionsT(*reinterpret_cast<ZerosLikeOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -10394,6 +10501,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<ZerosLikeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From 1f7e51560e26992e8e56f6426525c1df1e53b974 Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Mon, 17 Sep 2018 13:42:15 +0800
Subject: [PATCH 0248/1357] Some changes for commit.

---
 tensorflow/core/kernels/BUILD           |  5 +--
 tensorflow/core/kernels/mkl_slice_op.cc | 46 ++++++++++++-------------
 tensorflow/core/ops/array_ops.cc        | 40 +++++++++++++++++++--
 3 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 2582814d08..f5682b6e13 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -6331,10 +6331,7 @@ tf_mkl_kernel_library(
 tf_mkl_kernel_library(
     name = "mkl_slice_op",
     prefix = "mkl_slice_op",
-    deps = ARRAY_DEPS + if_mkl([
-        "//third_party/mkl:intel_binary_blob",
-        "@mkl_dnn",
-    ]),
+    deps = ARRAY_DEPS + mkl_deps(),
 )
 
 tf_mkl_kernel_library(
diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index 86fb572478..20c4921390 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -48,7 +48,7 @@ gtl::InlinedVector<int64, 4> IntTensorToInt64Vec(const Tensor& tensor) {
       out.push_back(tensor.flat<int64>()(i));
     }
   } else {
-    LOG(FATAL) << "begin must be either int32 or int64";
+    LOG(FATAL) << "tensor must be either int32 or int64";
   }
   return out;
 }
@@ -59,6 +59,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 
 // A version of SharedValidation (slice_op.h) written for input that is in
 // either Mkl layout or Tensorflow layout.
+// A shared code to validate input shapes and check for identity, which is not dependent on the type of T.
+// We do this to reduce code size by not duplicating all this for all T (float, double, int32, etc.)
 static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
                               gtl::InlinedVector<int64, 4>* begin,
                               gtl::InlinedVector<int64, 4>* size) {
@@ -81,19 +83,19 @@ static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
   TensorShape input_tf_shape = input_mkl_shape.IsMklTensor()
                                    ? input_mkl_shape.GetTfShape()
                                    : input.shape();
+  const int input_dims = input_tf_shape.dims();
 
   OP_REQUIRES(
       context, context->op_kernel().IsLegacyVector(begin_tensor.shape()) &&
                    context->op_kernel().IsLegacyVector(size_tensor.shape()) &&
-                   begin_tensor.NumElements() == input_tf_shape.dims() &&
-                   size_tensor.NumElements() == input_tf_shape.dims(),
+                   begin_tensor.NumElements() == input_dims &&
+                   size_tensor.NumElements() == input_dims,
       errors::InvalidArgument(
           "Expected begin and size arguments to be 1-D tensors of size ",
-          input_tf_shape.dims(), ", but got shapes ",
+          input_dims, ", but got shapes ",
           begin_tensor.shape().DebugString(), " and ",
           size_tensor.shape().DebugString(), " instead."));
 
-  const int input_dims = input_tf_shape.dims();
   *begin = IntTensorToInt64Vec(begin_tensor);
   *size = IntTensorToInt64Vec(size_tensor);
   for (int i = 0; i < input_dims; ++i) {
@@ -152,7 +154,6 @@ static void CheckCommonCasesForMklInputs(OpKernelContext* context,
     // output.
     AllocateOutputSetMklShape(context, 0, input_mkl_shape);
     *done = true;
-    return;
   }
 }
 
@@ -172,8 +173,8 @@ class MklDnnSliceOp : public OpKernel {
     CheckCommonCasesForMklInputs<T>(context, &begin, &size, &done);
     if (!context->status().ok() || done == true) return;
 
-    // MKL-DNN does not have this limitation of supporting less than 8 dimension
-    // tensor. But we are mimicking functionality of Eigen Slice op for CPU.
+    // Though MKL-DNN supports more than 8 dimension and less than 12 dimension tensor.
+    // But we are mimicking functionality of Eigen Slice op for CPU.
     if (begin.size() >= 8) {
       OP_REQUIRES(
           context, false,
@@ -181,7 +182,6 @@ class MklDnnSliceOp : public OpKernel {
     }
 
     ComputeMklDnnSlice(context, begin, size);
-    return;
   }
 
  private:
@@ -203,7 +203,7 @@ class MklDnnSliceOp : public OpKernel {
       // 2. create view primitive descriptor in_submem_pd based on in_mem_pd,
       //    initial offsets, and sub-sizes
       // 3. create memory primitive descriptor out_mem_pd and memory primitive
-      //    out_mem_p for the output (the logical sizes should much sub-sizes
+      //    out_mem_p for the output (the logical sizes should match sub-sizes
       //    used in step 2, but the format might be arbitrary)
       // 4. create reorder primitive descriptor reorder_pd based on in_submem_pd
       //    and out_mem_pd
@@ -232,6 +232,9 @@ class MklDnnSliceOp : public OpKernel {
 
       Tensor* output_tensor = nullptr;
       MklDnnShape output_mkl_shape;
+
+      // If no dimension is selected in slice, the result should be empty.
+      // Just return an empty output tensor, and a dummy Mkl-shape tensor.
       if (empty) {  // for empty dims
         auto shape_to = MklDnnDimsToTFShape(size_dims);
         AllocateOutputSetMklShape(context, 0, &output_tensor, shape_to,
@@ -250,23 +253,19 @@ class MklDnnSliceOp : public OpKernel {
         auto input_tf_format = MklDnnDataFormatToTFDataFormat(input_mkl_format);
         begin_dims = MklDnnDimsInNCHW(begin_dims, input_tf_format);
         size_dims = MklDnnDimsInNCHW(size_dims, input_tf_format);
-      }
-
-      // Initialize input dimensions and strides to be used when input is not in
-      // MklDnn layout.
-      memory::dims input_dims, input_strides;
-      if (!input_mkl_shape.IsMklTensor()) {
+        auto input_md = input_mkl_shape.GetMklLayout();
+        src.SetUsrMem(input_md, &input_tensor);
+      } else {
+        // Initialize input dimensions and strides to be used when input is not in
+        // MklDnn layout.
+        memory::dims input_dims, input_strides;
         input_dims = TFShapeToMklDnnDims(input_tensor.shape());
         input_strides = CalculateTFStrides(input_dims);
+        // Create input memory descriptor.
+        auto input_md = MklDnnData<T>::CreateBlockedMemDesc(input_dims, input_strides);
+        src.SetUsrMem(input_md, &input_tensor);
       }
 
-      // Create input memory descriptor.
-      auto input_md =
-          input_mkl_shape.IsMklTensor()
-              ? input_mkl_shape.GetMklLayout()
-              : MklDnnData<T>::CreateBlockedMemDesc(input_dims, input_strides);
-      src.SetUsrMem(input_md, &input_tensor);
-
       // Step 2 - create view primitive descriptor
       auto view_pd =
           view::primitive_desc(src.GetUsrMemPrimDesc(), size_dims, begin_dims)
@@ -291,6 +290,7 @@ class MklDnnSliceOp : public OpKernel {
           reorder::primitive_desc(view_pd, output.GetUsrMemPrimDesc());
       // Step 5 - create reorder primitive itself.
       net.push_back(reorder(reorder_pd, *src.GetUsrMem(), *output.GetUsrMem()));
+      // Execute the reorder primitive.
       stream(stream::kind::eager).submit(net).wait();
     } catch (mkldnn::error& e) {
       string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 18cc529a9b..2dec430710 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1681,8 +1681,8 @@ REGISTER_OP("_MklSlice")
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &begin_value));
 
       // NOTE(mrry): We can't use `MakeShapeFromShapeTensor` for `sizes` because
-      // it might contain -1, which can't be represented -1 in the ShapeHandle
-      // would meqan "unknown".
+      // it might contain -1, which can't be represented. (-1 in the ShapeHandle
+      // would mean "unknown".)
       const Tensor* sizes_value = c->input_tensor(3);
 
       if (sizes_value != nullptr) {
@@ -1698,7 +1698,43 @@ REGISTER_OP("_MklSlice")
           TF_RETURN_IF_ERROR(
               SliceHelper<int32>(c, begin_value, sizes_value, &dims));
         }
+
+        c->set_output(0, c->MakeShape(dims));
+        return Status::OK();
+      } else {
+        // In case `sizes` is not available (`sizes_value` is null),
+        // we could try to use `MakeShapeFromShapeTensor` here.
+        // If sizes contain -1, we will simply consider it as `Unknown`.
+        // This is less than ideal but still an improvement of shape inference.
+        // The following is an example that returns [None, 1, None] with this
+        // code path:
+        //   z = tf.zeros((1, 2, 3))
+        //   m = tf.slice(z, [0, 0, 0], [tf.constant(1) + 0, 1, -1])
+        //   m.get_shape().as_list()
+        ShapeHandle sizes_value;
+        TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &sizes_value));
+        if (c->RankKnown(sizes_value)) {
+          TF_RETURN_IF_ERROR(
+              c->WithRank(begin_value, c->Rank(sizes_value), &begin_value));
+          std::vector<DimensionHandle> dims;
+          dims.reserve(c->Rank(sizes_value));
+          for (int i = 0; i < c->Rank(sizes_value); ++i) {
+            dims.emplace_back(c->Dim(sizes_value, i));
+          }
+          c->set_output(0, c->MakeShape(dims));
+          return Status::OK();
+        }
+
+        // We might know the rank of the input.
+        if (c->RankKnown(input)) {
+          c->set_output(0, c->UnknownShapeOfRank(c->Rank(input)));
+          return Status::OK();
+        } else {
+          return shape_inference::UnknownShape(c);
+        }
       }
+
+      return Status::OK();
     });
 #endif
 
-- 
GitLab


From f1d42c8967410db1e08c0b6d62dc1fc4844165a8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 00:39:12 -0700
Subject: [PATCH 0249/1357] Implement ZerosLike

PiperOrigin-RevId: 213227615
---
 tensorflow/contrib/lite/build_def.bzl         |  1 +
 .../lite/g3doc/tf_ops_compatibility.md        | 11 +++
 tensorflow/contrib/lite/kernels/BUILD         | 15 ++++
 tensorflow/contrib/lite/kernels/register.cc   |  2 +
 tensorflow/contrib/lite/kernels/zeros_like.cc | 73 +++++++++++++++++
 .../contrib/lite/kernels/zeros_like_test.cc   | 78 +++++++++++++++++++
 .../contrib/lite/testing/generate_examples.py | 25 ++++++
 .../contrib/lite/toco/export_tensorflow.cc    | 17 ++++
 .../propagate_fixed_sizes.cc                  |  1 +
 .../contrib/lite/toco/import_tensorflow.cc    |  1 +
 tensorflow/contrib/lite/toco/model.h          | 11 +++
 .../contrib/lite/toco/tflite/operator.cc      |  2 +
 .../contrib/lite/toco/tflite/operator_test.cc |  2 +
 tensorflow/contrib/lite/toco/tooling_util.cc  |  1 +
 14 files changed, 240 insertions(+)
 create mode 100644 tensorflow/contrib/lite/kernels/zeros_like.cc
 create mode 100644 tensorflow/contrib/lite/kernels/zeros_like_test.cc

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 52b994ee92..fc4d9b4f17 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -294,6 +294,7 @@ def generated_test_models():
         #"transpose_conv",   # disabled due to b/111213074
         "unpack",
         "where",
+        "zeros_like",
     ]
 
 def generated_test_conversion_modes():
diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
index 8660d29855..b0dfb0fed1 100644
--- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
+++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
@@ -866,6 +866,17 @@ Outputs {
 }
 ```
 
+**ZEROS_LIKE**
+
+```
+Inputs {
+  0: a tensor
+}
+Outputs {
+  0: A tensor of the same shape and type as x but filled with zeros
+}
+```
+
 And these are TensorFlow Lite operations that are present but not ready for
 custom models yet:
 
diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 40f28aeab4..f52d29ea76 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -223,6 +223,7 @@ cc_library(
         "unidirectional_sequence_lstm.cc",
         "unidirectional_sequence_rnn.cc",
         "unpack.cc",
+        "zeros_like.cc",
     ],
     hdrs = [
     ],
@@ -1284,6 +1285,20 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "zeros_like_test",
+    size = "small",
+    srcs = ["zeros_like_test.cc"],
+    tags = ["tflite_not_portable_ios"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:builtin_op_data",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index 14296d3a9f..6e35799c35 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -119,6 +119,7 @@ TfLiteRegistration* Register_LOGICAL_NOT();
 TfLiteRegistration* Register_UNPACK();
 TfLiteRegistration* Register_FLOOR_DIV();
 TfLiteRegistration* Register_SQUARE();
+TfLiteRegistration* Register_ZEROS_LIKE();
 
 TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) {
   context->ReportError(
@@ -245,6 +246,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK());
   AddBuiltin(BuiltinOperator_FLOOR_DIV, Register_FLOOR_DIV());
   AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
+  AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE());
 
   // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
   // custom ops aren't always included by default.
diff --git a/tensorflow/contrib/lite/kernels/zeros_like.cc b/tensorflow/contrib/lite/kernels/zeros_like.cc
new file mode 100644
index 0000000000..cce5240a9b
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/zeros_like.cc
@@ -0,0 +1,73 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace zeros_like {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  output->type = input->type;
+
+  return context->ResizeTensor(context, output,
+                               TfLiteIntArrayCopy(input->dims));
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const int num_elements = NumElements(input);
+  switch (input->type) {
+    case kTfLiteInt64:
+      memset(GetTensorData<int64_t>(output), 0, num_elements * sizeof(int64_t));
+      break;
+    case kTfLiteInt32:
+      memset(GetTensorData<int32_t>(output), 0, num_elements * sizeof(int32_t));
+      break;
+    case kTfLiteFloat32:
+      memset(GetTensorData<float>(output), 0, num_elements * sizeof(float));
+      break;
+    default:
+      context->ReportError(context,
+                           "ZerosLike only currently supports int64, int32, "
+                           "and float32, got %d.",
+                           input->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace zeros_like
+
+TfLiteRegistration* Register_ZEROS_LIKE() {
+  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
+                                 zeros_like::Prepare, zeros_like::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/zeros_like_test.cc b/tensorflow/contrib/lite/kernels/zeros_like_test.cc
new file mode 100644
index 0000000000..d3382d1d5b
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/zeros_like_test.cc
@@ -0,0 +1,78 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class ZerosLikeOpModel : public SingleOpModel {
+ public:
+  explicit ZerosLikeOpModel(const TensorData& input) {
+    input_ = AddInput(input);
+    output_ = AddOutput(input);
+    SetBuiltinOp(BuiltinOperator_ZEROS_LIKE, BuiltinOptions_ZerosLikeOptions,
+                 CreateZerosLikeOptions(builder_).Union());
+    BuildInterpreter({GetShape(input_)});
+  }
+
+  int input() { return input_; }
+  int output() { return output_; }
+
+ protected:
+  int input_;
+  int output_;
+};
+
+TEST(ZerosLikeOpModel, ZerosLikeFloat) {
+  ZerosLikeOpModel m({TensorType_FLOAT32, {2, 3}});
+  m.PopulateTensor<float>(m.input(), {-2.0, -1.0, 0.0, 1.0, 2.0, 3.0});
+  m.Invoke();
+  EXPECT_THAT(m.ExtractVector<float>(m.output()),
+              ElementsAreArray({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}));
+  EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({2, 3}));
+}
+
+TEST(ZerosLikeOpModel, ZerosLikeInt32) {
+  ZerosLikeOpModel m({TensorType_INT32, {1, 2, 2, 1}});
+  m.PopulateTensor<int32_t>(m.input(), {-2, -1, 0, 3});
+  m.Invoke();
+  EXPECT_THAT(m.ExtractVector<int32_t>(m.output()),
+              ElementsAreArray({0, 0, 0, 0}));
+  EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST(ZerosLikeOpModel, ZerosLikeInt64) {
+  ZerosLikeOpModel m({TensorType_INT64, {1, 2, 2, 1}});
+  m.PopulateTensor<int64_t>(m.input(), {-2, -1, 0, 3});
+  m.Invoke();
+  EXPECT_THAT(m.ExtractVector<int64_t>(m.output()),
+              ElementsAreArray({0, 0, 0, 0}));
+  EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 2, 2, 1}));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 3754b58b23..014c80b5ef 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -2834,6 +2834,31 @@ def make_neg_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_zeros_like_tests(zip_path):
+  """Make a set of tests to do zeros_like."""
+
+  test_parameters = [{
+      "input_dtype": [tf.float32, tf.int32, tf.int64],
+      "input_shape": [[], [1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]],
+  }]
+
+  def build_graph(parameters):
+    """Build the zeros_like op testing graph."""
+    input_tensor = tf.placeholder(
+        dtype=parameters["input_dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.zeros_like(input_tensor)
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    values = create_tensor_data(parameters["input_dtype"],
+                                parameters["input_shape"])
+    return [values], sess.run(outputs, feed_dict=dict(zip(inputs, [values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def _make_elementwise_tests(op):
   """Make a set of tests to do element-wise operations."""
 
diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index b52a79282c..3a534300ae 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -1968,6 +1968,19 @@ void ConvertUnpackOperator(const Model& model, const UnpackOperator& src_op,
   (*unpack_op->mutable_attr())["axis"].set_i(src_op.axis);
 }
 
+void ConvertZerosLikeOperator(const Model& model,
+                              const TensorFlowZerosLikeOperator& src_op,
+                              const char* op_name, GraphDef* tensorflow_graph) {
+  tensorflow::NodeDef* zeros_like_op = tensorflow_graph->add_node();
+  zeros_like_op->set_op(op_name);
+  zeros_like_op->set_name(src_op.outputs[0]);
+  DCHECK_EQ(src_op.inputs.size(), 1);
+  *zeros_like_op->add_input() = src_op.inputs[0];
+  const tensorflow::DataType data_type =
+      GetTensorFlowDataType(model, src_op.inputs[0]);
+  (*zeros_like_op->mutable_attr())["T"].set_type(data_type);
+}
+
 void ConvertOperator(const Model& model, const Operator& src_op,
                      GraphDef* tensorflow_graph) {
   if (src_op.fused_activation_function != FusedActivationFunctionType::kNone) {
@@ -2233,6 +2246,10 @@ void ConvertOperator(const Model& model, const Operator& src_op,
   } else if (src_op.type == OperatorType::kUnpack) {
     ConvertUnpackOperator(model, static_cast<const UnpackOperator&>(src_op),
                           "Unpack", tensorflow_graph);
+  } else if (src_op.type == OperatorType::kZerosLike) {
+    ConvertZerosLikeOperator(
+        model, static_cast<const TensorFlowZerosLikeOperator&>(src_op),
+        "ZerosLike", tensorflow_graph);
   } else {
     LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(src_op.type);
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index f103bb94ae..6c72e20121 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -1655,6 +1655,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kLogicalAnd:
     case OperatorType::kLogicalNot:
     case OperatorType::kLogicalOr:
+    case OperatorType::kZerosLike:
       ProcessSimpleOperator(model, op, 0);
       break;
     case OperatorType::kGather:
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 2ccfd36b7c..4c678e7e73 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -2065,6 +2065,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() {
       {"TopKV2", ConvertTopKV2Operator},
       {"Transpose", ConvertSimpleOperator<TransposeOperator, 2>},
       {"Unpack", ConvertUnpackOperator},
+      {"ZerosLike", ConvertSimpleOperator<TensorFlowZerosLikeOperator, 1>},
   });
 }
 
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 164b70f2df..0fd2732973 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -150,6 +150,7 @@ enum class OperatorType : uint8 {
   kLogicalOr,
   kCTCBeamSearchDecoder,
   kUnpack,
+  kZerosLike,
 };
 
 // Helper to deal with TensorFlow arrays using a different ordering of
@@ -1849,6 +1850,16 @@ struct UnpackOperator : Operator {
   ArrayDataType dtype = ArrayDataType::kNone;
 };
 
+// ZerosLike operator:
+//
+// Inputs:
+// inputs[0]: required: the input array
+//
+// TensorFlow equivalent: tf.zeros_like
+struct TensorFlowZerosLikeOperator : Operator {
+  TensorFlowZerosLikeOperator() : Operator(OperatorType::kZerosLike) {}
+};
+
 // Alloc's are used for transient arrays only. An Alloc specifies which interval
 // of the "transient_data" workspace buffer passed to inference functions, is to
 // be used for the transient array at hand. The 'start' and 'end' values are
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 1061e7c7c4..c59a28b864 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -1500,6 +1500,8 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
       "RSQRT", OperatorType::kRsqrt));
   ops.push_back(MakeUnique<SimpleOperator<TensorFlowSquareOperator>>(
       "SQUARE", OperatorType::kSquare));
+  ops.push_back(MakeUnique<SimpleOperator<TensorFlowZerosLikeOperator>>(
+      "ZEROS_LIKE", OperatorType::kZerosLike));
 
   return ops;
 }
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index 72e50a9aed..0bc591e647 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -146,6 +146,8 @@ TEST_F(OperatorTest, SimpleOperators) {
   CheckSimpleOperator<FloorDivOperator>("FLOOR_DIV", OperatorType::kFloorDiv);
   CheckSimpleOperator<TensorFlowSquareOperator>("SQUARE",
                                                 OperatorType::kSquare);
+  CheckSimpleOperator<TensorFlowZerosLikeOperator>("ZEROS_LIKE",
+                                                   OperatorType::kZerosLike);
 }
 
 TEST_F(OperatorTest, BuiltinAdd) {
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 6ab93d9316..4a1ae35cb5 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -406,6 +406,7 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(LogicalOr)
     HANDLE_OPERATORTYPENAME_CASE(CTCBeamSearchDecoder)
     HANDLE_OPERATORTYPENAME_CASE(Unpack)
+    HANDLE_OPERATORTYPENAME_CASE(ZerosLike)
     default:
       LOG(FATAL) << "Unhandled op type";
 #undef HANDLE_OPERATORTYPENAME_CASE
-- 
GitLab


From 0827dcb82b0bf3d8d543cef1d3a17d330f183848 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 01:59:43 -0700
Subject: [PATCH 0250/1357] Add fill to schema.

PiperOrigin-RevId: 213234759
---
 tensorflow/contrib/lite/builtin_ops.h         |   1 +
 .../lite/core/api/flatbuffer_conversions.cc   |   1 +
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   5 +
 .../contrib/lite/schema/schema_generated.h    | 124 +++++++++++++++++-
 5 files changed, 126 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h
index 7f33942c90..7809d114e2 100644
--- a/tensorflow/contrib/lite/builtin_ops.h
+++ b/tensorflow/contrib/lite/builtin_ops.h
@@ -119,6 +119,7 @@ typedef enum {
   kTfLiteBuiltinReduceAny = 91,
   kTfLiteBuiltinSquare = 92,
   kTfLiteBuiltinZerosLike = 93,
+  kTfLiteBuiltinFill = 94,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index ceb2bbd612..03af538073 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -619,6 +619,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_FLOOR_DIV:
     case BuiltinOperator_SQUARE:
     case BuiltinOperator_ZEROS_LIKE:
+    case BuiltinOperator_FILL:
       break;
   }
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 3b6a81ffde..a1c7434599 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -674,6 +674,7 @@ TfLiteStatus AddOpsAndParams(
       case tflite::BuiltinOperator_REDUCE_ANY:
       case tflite::BuiltinOperator_SQUARE:
       case tflite::BuiltinOperator_ZEROS_LIKE:
+      case tflite::BuiltinOperator_FILL:
         logError("Op code %d is currently not delegated to NNAPI", builtin);
         return kTfLiteError;
         break;
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 4c339317cb..3da3188c3a 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -175,6 +175,7 @@ enum BuiltinOperator : byte {
   REDUCE_ANY = 91,
   SQUARE = 92,
   ZEROS_LIKE = 93,
+  FILL = 94,
 }
 
 // Options for the builtin operators.
@@ -246,6 +247,7 @@ union BuiltinOptions {
   FloorDivOptions,
   SquareOptions,
   ZerosLikeOptions,
+  FillOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -593,6 +595,9 @@ table SquareOptions {
 table ZerosLikeOptions {
 }
 
+table FillOptions {
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 03c227f987..c7a59cabc5 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -232,6 +232,9 @@ struct SquareOptionsT;
 struct ZerosLikeOptions;
 struct ZerosLikeOptionsT;
 
+struct FillOptions;
+struct FillOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -391,11 +394,12 @@ enum BuiltinOperator {
   BuiltinOperator_REDUCE_ANY = 91,
   BuiltinOperator_SQUARE = 92,
   BuiltinOperator_ZEROS_LIKE = 93,
+  BuiltinOperator_FILL = 94,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_ZEROS_LIKE
+  BuiltinOperator_MAX = BuiltinOperator_FILL
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[93] {
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
   static BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -489,7 +493,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[93] {
     BuiltinOperator_FLOOR_DIV,
     BuiltinOperator_REDUCE_ANY,
     BuiltinOperator_SQUARE,
-    BuiltinOperator_ZEROS_LIKE
+    BuiltinOperator_ZEROS_LIKE,
+    BuiltinOperator_FILL
   };
   return values;
 }
@@ -590,6 +595,7 @@ inline const char **EnumNamesBuiltinOperator() {
     "REDUCE_ANY",
     "SQUARE",
     "ZEROS_LIKE",
+    "FILL",
     nullptr
   };
   return names;
@@ -669,11 +675,12 @@ enum BuiltinOptions {
   BuiltinOptions_FloorDivOptions = 65,
   BuiltinOptions_SquareOptions = 66,
   BuiltinOptions_ZerosLikeOptions = 67,
+  BuiltinOptions_FillOptions = 68,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_ZerosLikeOptions
+  BuiltinOptions_MAX = BuiltinOptions_FillOptions
 };
 
-inline BuiltinOptions (&EnumValuesBuiltinOptions())[68] {
+inline BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
   static BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -742,7 +749,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[68] {
     BuiltinOptions_UnpackOptions,
     BuiltinOptions_FloorDivOptions,
     BuiltinOptions_SquareOptions,
-    BuiltinOptions_ZerosLikeOptions
+    BuiltinOptions_ZerosLikeOptions,
+    BuiltinOptions_FillOptions
   };
   return values;
 }
@@ -817,6 +825,7 @@ inline const char **EnumNamesBuiltinOptions() {
     "FloorDivOptions",
     "SquareOptions",
     "ZerosLikeOptions",
+    "FillOptions",
     nullptr
   };
   return names;
@@ -1099,6 +1108,10 @@ template<> struct BuiltinOptionsTraits<ZerosLikeOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
 };
 
+template<> struct BuiltinOptionsTraits<FillOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1666,6 +1679,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_ZerosLikeOptions ?
       reinterpret_cast<const ZerosLikeOptionsT *>(value) : nullptr;
   }
+  FillOptionsT *AsFillOptions() {
+    return type == BuiltinOptions_FillOptions ?
+      reinterpret_cast<FillOptionsT *>(value) : nullptr;
+  }
+  const FillOptionsT *AsFillOptions() const {
+    return type == BuiltinOptions_FillOptions ?
+      reinterpret_cast<const FillOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -5949,6 +5970,46 @@ inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(
 
 flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct FillOptionsT : public flatbuffers::NativeTable {
+  typedef FillOptions TableType;
+  FillOptionsT() {
+  }
+};
+
+struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FillOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  FillOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FillOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FillOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  FillOptionsBuilder &operator=(const FillOptionsBuilder &);
+  flatbuffers::Offset<FillOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FillOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FillOptions> CreateFillOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  FillOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -6283,6 +6344,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const {
     return builtin_options_type() == BuiltinOptions_ZerosLikeOptions ? static_cast<const ZerosLikeOptions *>(builtin_options()) : nullptr;
   }
+  const FillOptions *builtin_options_as_FillOptions() const {
+    return builtin_options_type() == BuiltinOptions_FillOptions ? static_cast<const FillOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6582,6 +6646,10 @@ template<> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLike
   return builtin_options_as_ZerosLikeOptions();
 }
 
+template<> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const {
+  return builtin_options_as_FillOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -8873,6 +8941,29 @@ inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers:
       _fbb);
 }
 
+inline FillOptionsT *FillOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new FillOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void FillOptions::UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FillOptions> FillOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFillOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FillOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateFillOptions(
+      _fbb);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -9330,6 +9421,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<const FillOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -9616,6 +9711,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<const FillOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -9890,6 +9989,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const ZerosLikeOptionsT *>(value);
       return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<const FillOptionsT *>(value);
+      return CreateFillOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10164,6 +10267,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new ZerosLikeOptionsT(*reinterpret_cast<ZerosLikeOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_FillOptions: {
+      value = new FillOptionsT(*reinterpret_cast<FillOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -10506,6 +10613,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<FillOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From 5ff7f982846bd3f8056c8252a0afeb07e5b3e982 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 02:01:53 -0700
Subject: [PATCH 0251/1357] compat: Update forward compatibility horizon to
 2018-09-17

PiperOrigin-RevId: 213234942
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index c246a98237..0d2f2c9b9e 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 16)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 17)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 2b39e9861acaf06923e4e0802581dd7581609a01 Mon Sep 17 00:00:00 2001
From: tomguluson92 <314913739@qq.com>
Date: Mon, 17 Sep 2018 17:12:10 +0800
Subject: [PATCH 0252/1357] revised a parameter error

Hi, i found that when firstly use `interpreter `as a parameter pass into `eval_model` function, wrong spell mistake of `interpreter_quant`.
---
 tensorflow/contrib/lite/tutorials/post_training_quant.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
index 4929133bda..82abbc1532 100644
--- a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
+++ b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
@@ -542,7 +542,7 @@
       },
       "outputs": [],
       "source": [
-        "print(eval_model(interpreter_quant, mnist_ds))"
+        "print(eval_model(interpreter, mnist_ds))"
       ]
     },
     {
-- 
GitLab


From fa80a920f2a3bc00522fe95fc9a07a28d67fc055 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Mon, 17 Sep 2018 12:50:18 +0300
Subject: [PATCH 0253/1357] Add 'override' specifier to ReadData, WriteData.

---
 tensorflow/contrib/ignite/kernels/ignite_plain_client.h | 4 ++--
 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
index 750ebe605a..d12d56fdc1 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -29,8 +29,8 @@ class PlainClient : public Client {
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, const int32_t length);
-  virtual Status WriteData(const uint8_t* buf, const int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length) override;
+  virtual Status WriteData(const uint8_t* buf, const int32_t length) override;
 
  private:
   const string host_;
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
index d59ce91aba..372156a757 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -32,8 +32,8 @@ class SslWrapper : public Client {
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, const int32_t length);
-  virtual Status WriteData(const uint8_t* buf, const int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length) override;
+  virtual Status WriteData(const uint8_t* buf, const int32_t length) override;
 
  private:
   Status InitSslContext();
-- 
GitLab


From cac963862be3faa421c559f39033c9bfb3b27a51 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 17 Sep 2018 03:12:38 -0700
Subject: [PATCH 0254/1357] [XLA:TF] Enable int8 and uint8 support in the
 bridge for CPU/GPU

The test changes are awkward. None of these are XLA bugs, it's just that the op
definitions in tensorflow are really inconsistent. I tried to infer whether the
limitation is on signed types, index types or just arbitrary. In the latter
case just int8/uint8 is blacklisted, we should probably lift that requirement
at some point.

PiperOrigin-RevId: 213243906
---
 tensorflow/compiler/jit/xla_cpu_device.cc      |  5 +++--
 tensorflow/compiler/jit/xla_gpu_device.cc      |  6 +++---
 tensorflow/compiler/tests/argminmax_test.py    |  4 ++--
 tensorflow/compiler/tests/binary_ops_test.py   | 11 ++++++-----
 tensorflow/compiler/tests/build_defs.bzl       |  4 ++--
 tensorflow/compiler/tests/random_ops_test.py   |  3 ++-
 .../compiler/tests/reverse_sequence_op_test.py |  2 +-
 tensorflow/compiler/tests/unary_ops_test.py    |  4 ++--
 tensorflow/compiler/tests/xla_ops_test.py      |  2 +-
 tensorflow/compiler/tests/xla_test.py          |  6 ++++++
 tensorflow/compiler/tf2xla/xla_op_registry.h   | 18 +++++++++---------
 11 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc
index 7e159e3171..1afc305abe 100644
--- a/tensorflow/compiler/jit/xla_cpu_device.cc
+++ b/tensorflow/compiler/jit/xla_cpu_device.cc
@@ -65,8 +65,9 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_CPU, XlaCpuDeviceFactory);
 
 // Kernel registrations
 
-constexpr std::array<DataType, 7> kAllXlaCpuTypes = {
-    {DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}};
+constexpr std::array<DataType, 9> kAllXlaCpuTypes = {
+    {DT_UINT8, DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
+     DT_COMPLEX64, DT_BOOL}};
 
 REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_CPU, XlaLocalLaunchOp, kAllXlaCpuTypes);
 REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_CPU, kAllXlaCpuTypes);
diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index ef4466f005..4cf556524d 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -74,9 +74,9 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_GPU, XlaGpuDeviceFactory);
 
 // Kernel registrations
 
-constexpr std::array<DataType, 8> kAllXlaGpuTypes = {
-    {DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL,
-     DT_BFLOAT16}};
+constexpr std::array<DataType, 10> kAllXlaGpuTypes = {
+    {DT_UINT8, DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
+     DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}};
 
 REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_GPU, XlaLocalLaunchOp, kAllXlaGpuTypes);
 REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_GPU, kAllXlaGpuTypes);
diff --git a/tensorflow/compiler/tests/argminmax_test.py b/tensorflow/compiler/tests/argminmax_test.py
index 4155342787..68f52e796c 100644
--- a/tensorflow/compiler/tests/argminmax_test.py
+++ b/tensorflow/compiler/tests/argminmax_test.py
@@ -50,12 +50,12 @@ class ArgMinMaxTest(xla_test.XLATestCase):
 
   def testArgMinMax(self):
     # Complex numbers do not support argmin/argmax.
-    minmax_types = set(self.numeric_types) - set(self.complex_types)
+    minmax_types = self.all_types & {np.int32, np.int64}
     for dtype in minmax_types:
       # output_type is a numpy data type that is used to specify the desired
       # output type of the op as well as to convert the Python number to the
       # array scalar of the type.
-      for output_type in self.int_types:
+      for output_type in minmax_types:
         self._assertOpOutputMatchesExpected(
             math_ops.argmax,
             axis=0,
diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 17280e445b..900e84ab58 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -210,7 +210,7 @@ class BinaryOpsTest(xla_test.XLATestCase):
             equality_test=self.ListsAreClose)
 
   def testIntOps(self):
-    for dtype in self.int_types:
+    for dtype in self.signed_int_types:
       self._testBinary(
           gen_math_ops.truncate_div,
           np.array([3, 3, -1, -9, -8], dtype=dtype),
@@ -287,7 +287,8 @@ class BinaryOpsTest(xla_test.XLATestCase):
           dtype(7),
           expected=np.array([[-6], [-5]], dtype=dtype))
 
-      if dtype not in self.complex_types:  # min/max not supported for complex
+      # min/max not supported for complex
+      if dtype not in self.complex_types | {np.uint8, np.int8}:
         self._testBinary(
             math_ops.maximum,
             np.array([1, 2], dtype=dtype),
@@ -337,7 +338,7 @@ class BinaryOpsTest(xla_test.XLATestCase):
           expected=np.array([[70], [14]], dtype=dtype))
 
       # Complex support for squared_difference is incidental, see b/68205550
-      if dtype not in self.complex_types:
+      if dtype not in self.complex_types | {np.uint8, np.int8}:
         self._testBinary(
             math_ops.squared_difference,
             np.array([1, 2], dtype=dtype),
@@ -567,7 +568,7 @@ class BinaryOpsTest(xla_test.XLATestCase):
           expected=np.array([1, -2, -1, -5, 2], dtype=dtype))
 
   def testIntDivision(self):
-    for dtype in self.int_types:
+    for dtype in self.signed_int_types:
       self._testDivision(dtype)
 
   def testFloatDivision(self):
@@ -588,7 +589,7 @@ class BinaryOpsTest(xla_test.XLATestCase):
         expected=np.array([1, 1, -1, 0], dtype=dtype))
 
   def testIntRemainder(self):
-    for dtype in self.int_types:
+    for dtype in self.signed_int_types - {np.int8}:
       self._testRemainder(dtype)
 
   def testFloatRemainder(self):
diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl
index a76f136736..114793352e 100644
--- a/tensorflow/compiler/tests/build_defs.bzl
+++ b/tensorflow/compiler/tests/build_defs.bzl
@@ -58,12 +58,12 @@ def tf_xla_py_test(
         if backend == "cpu":
             backend_args += [
                 "--test_device=XLA_CPU",
-                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_INT8,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64",
             ]
         elif backend == "gpu":
             backend_args += [
                 "--test_device=XLA_GPU",
-                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_INT8,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16",
             ]
             backend_tags += ["requires-gpu-sm35"]
         elif backend in plugins:
diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py
index 6e18344117..41fe42a26b 100644
--- a/tensorflow/compiler/tests/random_ops_test.py
+++ b/tensorflow/compiler/tests/random_ops_test.py
@@ -35,7 +35,8 @@ class RandomOpsTest(xla_test.XLATestCase):
   """Test cases for random-number generating operators."""
 
   def _random_types(self):
-    return set(self.numeric_types) - set(self.complex_types)
+    return set(self.numeric_types) - set(
+        self.complex_types) - {np.uint8, np.int8}
 
   def _testRngIsNotConstant(self, rng, dtype):
     # Tests that 'rng' does not always return the same value.
diff --git a/tensorflow/compiler/tests/reverse_sequence_op_test.py b/tensorflow/compiler/tests/reverse_sequence_op_test.py
index 60c2337743..abc822ef36 100644
--- a/tensorflow/compiler/tests/reverse_sequence_op_test.py
+++ b/tensorflow/compiler/tests/reverse_sequence_op_test.py
@@ -85,7 +85,7 @@ class ReverseSequenceTest(xla_test.XLATestCase):
 
   def testSeqLength(self):
     for dtype in self.all_types:
-      for seq_dtype in self.int_types:
+      for seq_dtype in self.all_types & {np.int32, np.int64}:
         self._testBasic(dtype, seq_dtype)
 
 
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index 5b0e57f83f..04ea004fe7 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -84,7 +84,7 @@ class UnaryOpsTest(xla_test.XLATestCase):
       self.assertAllClose(result[i], expected[i], rtol, atol)
 
   def testAllTypeOps(self):
-    for dtype in self.numeric_types:
+    for dtype in self.numeric_types - {np.int8, np.uint8}:
       self._assertOpOutputMatchesExpected(
           array_ops.diag, np.array([1, 2, 3, 4], dtype=dtype),
           np.array(
@@ -633,7 +633,7 @@ class UnaryOpsTest(xla_test.XLATestCase):
           expected=np.array([-1, 0, -2, -17, -43], dtype=dtype))
 
   def testNumericOps(self):
-    for dtype in self.numeric_types:
+    for dtype in self.numeric_types - {np.int8, np.uint8}:
       self._assertOpOutputMatchesExpected(
           math_ops.abs,
           np.array([[2, -1]], dtype=dtype),
diff --git a/tensorflow/compiler/tests/xla_ops_test.py b/tensorflow/compiler/tests/xla_ops_test.py
index 1e600c44e9..4cf88fc523 100644
--- a/tensorflow/compiler/tests/xla_ops_test.py
+++ b/tensorflow/compiler/tests/xla_ops_test.py
@@ -181,7 +181,7 @@ class XlaOpsTest(xla_test.XLATestCase, parameterized.TestCase):
               dtype=dtype))
 
   def testNeg(self):
-    for dtype in self.numeric_types:
+    for dtype in self.numeric_types - {np.uint8, np.int8}:
       self._assertOpOutputMatchesExpected(
           xla.neg,
           args=(np.array([1, 2, 3], dtype=dtype),),
diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py
index 88827cb53b..df5c81243a 100644
--- a/tensorflow/compiler/tests/xla_test.py
+++ b/tensorflow/compiler/tests/xla_test.py
@@ -101,6 +101,12 @@ class XLATestCase(test.TestCase):
     self._all_types = set(
         [dtype.as_numpy_dtype for dtype in self._all_tf_types])
     self._int_types = set([dtype.as_numpy_dtype for dtype in self.int_tf_types])
+    self.signed_int_types = set(dtype.as_numpy_dtype
+                                for dtype in self.int_tf_types
+                                if not dtype.is_unsigned)
+    self.unsigned_int_types = set(dtype.as_numpy_dtype
+                                  for dtype in self.int_tf_types
+                                  if dtype.is_unsigned)
     self._float_types = set(
         [dtype.as_numpy_dtype for dtype in self._float_tf_types])
     self.complex_types = set([
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h
index 74a4885f1f..34e22a4510 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.h
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.h
@@ -47,17 +47,17 @@ extern const char* const DEVICE_XLA_GPU;
 
 constexpr std::array<DataType, 4> kFloatTypes = {
     {DT_HALF, DT_FLOAT, DT_DOUBLE, DT_BFLOAT16}};
-constexpr std::array<DataType, 9> kNumericTypes = {
-    {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
-     DT_COMPLEX64, DT_BFLOAT16}};
+constexpr std::array<DataType, 11> kNumericTypes = {
+    {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF,
+     DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BFLOAT16}};
 
-constexpr std::array<DataType, 9> kCpuAllTypes = {
-    {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
-     DT_COMPLEX64, DT_BOOL}};
+constexpr std::array<DataType, 11> kCpuAllTypes = {
+    {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF,
+     DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}};
 
-constexpr std::array<DataType, 10> kGpuAllTypes = {
-    {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
-     DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}};
+constexpr std::array<DataType, 12> kGpuAllTypes = {
+    {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF,
+     DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}};
 
 // Class that manages registrations of operators and devices for the XLA JIT.
 // Not thread-safe.
-- 
GitLab


From 055e5a0f71c83bab3f645d1c2e2cadeff5ff654f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 06:29:16 -0700
Subject: [PATCH 0255/1357] README s/tensorflow.contrib/tensorflow.python/.

PiperOrigin-RevId: 213262445
---
 tensorflow/python/autograph/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/autograph/README.md b/tensorflow/python/autograph/README.md
index cc54da4daa..bfe21b4765 100644
--- a/tensorflow/python/autograph/README.md
+++ b/tensorflow/python/autograph/README.md
@@ -65,7 +65,7 @@ pip install -U tf-nightly
 Then import the `autograph` module from `tf.contrib`:
 
 ```
-from tensorflow.contrib import autograph as ag
+from tensorflow.python import autograph as ag
 ```
 
 ### Related links
-- 
GitLab


From e0d6830999a6e7c92f047e6e89c3aba20911cc8c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 08:21:43 -0700
Subject: [PATCH 0256/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213275003
---
 .../internal/optimized/depthwiseconv_float.h  | 109 +++++++++++-----
 .../internal/optimized/depthwiseconv_uint8.h  | 123 ++++++++++++------
 .../depthwiseconv_uint8_3x3_filter.h          |  66 ++++++----
 .../contrib/lite/kernels/internal/types.h     |   2 +
 4 files changed, 206 insertions(+), 94 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index 70810ca784..f2d1319801 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -907,25 +907,40 @@ inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
   }
 }
 
-inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
-                          float output_activation_min,
-                          float output_activation_max, float* output_data,
-                          const Dims<4>& output_dims) {
+inline void DepthwiseConv(
+    const DepthwiseParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& filter_shape,
+    const float* filter_data, const RuntimeShape& bias_shape,
+    const float* bias_data, const RuntimeShape& output_shape,
+    float* output_data) {
   gemmlowp::ScopedProfilingLabel label("DepthwiseConv");
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
-  TFLITE_DCHECK(output_depth == input_depth * depth_multiplier);
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
+  // be implemented.
+  TFLITE_DCHECK_EQ(params.dilation_width_factor, 1);
+  TFLITE_DCHECK_EQ(params.dilation_height_factor, 1);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 
   static const int kAccBufferMaxSize = 2048;
   float acc_buffer[kAccBufferMaxSize];
@@ -990,6 +1005,10 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
     row_accum_func = FloatDepthwiseConvAccumRowGeneric;
   }
 
+  const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+  const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+  const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
   // Now that we have determined row_accum_func, we can start work.
   float* output_ptr = output_data;
   for (int b = 0; b < batches; ++b) {
@@ -1014,13 +1033,12 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
         for (int filter_y = filter_y_start; filter_y < filter_y_end;
              ++filter_y) {
           const int in_y = in_y_origin + filter_y;
-          row_accum_func(stride_width, input_depth, input_width,
-                         input_data + in_y * input_dims.strides[2] +
-                             b * input_dims.strides[3],
-                         pad_width, depth_multiplier, filter_width,
-                         filter_data + filter_y * filter_dims.strides[2],
-                         out_x_buffer_start, out_x_buffer_end, output_depth,
-                         acc_buffer);
+          row_accum_func(
+              stride_width, input_depth, input_width,
+              input_data + in_y * input_height_stride + b * input_batch_stride,
+              pad_width, depth_multiplier, filter_width,
+              filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+              out_x_buffer_end, output_depth, acc_buffer);
         }
         // Finished accumulating. Now store to destination.
         const int num_output_values = output_depth * num_output_pixels;
@@ -1067,6 +1085,8 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           const float* bias_data, const Dims<4>& bias_dims,
@@ -1078,15 +1098,43 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const Dims<4>& output_dims) {
   // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
   // be implemented.
-  TFLITE_DCHECK(dilation_width_factor == 1);
-  TFLITE_DCHECK(dilation_height_factor == 1);
+  TFLITE_DCHECK_EQ(dilation_width_factor, 1);
+  TFLITE_DCHECK_EQ(dilation_height_factor, 1);
 
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
   DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
-                bias_dims, stride_width, stride_height, pad_width, pad_height,
-                depth_multiplier, output_activation_min, output_activation_max,
-                output_data, output_dims);
+                bias_dims, stride_width, stride_height, 1, 1, pad_width,
+                pad_height, depth_multiplier, output_activation_min,
+                output_activation_max, output_data, output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
@@ -1103,6 +1151,7 @@ void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                 output_data, output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index f707279600..ccb9d1654f 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1669,33 +1669,50 @@ inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
   }
 }
 
-inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                          int32 input_offset, const uint8* filter_data,
-                          const Dims<4>& filter_dims, int32 filter_offset,
-                          const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
-                          int32 output_offset, int32 output_multiplier,
-                          int output_shift, int32 output_activation_min,
-                          int32 output_activation_max, uint8* output_data,
-                          const Dims<4>& output_dims) {
+inline void DepthwiseConv(
+    const DepthwiseParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label("DepthwiseConv/8bit");
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
+  // be implemented.
+  TFLITE_DCHECK_EQ(params.dilation_width_factor, 1);
+  TFLITE_DCHECK_EQ(params.dilation_height_factor, 1);
 
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
 #ifdef USE_NEON
   const bool shift_left = (output_shift <= 0);
   const int32 multiplier_power_of_two = shift_left ? (1 << -output_shift) : 1;
 #endif
-  TFLITE_DCHECK(output_depth == input_depth * depth_multiplier);
+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 
 // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
 // Jetson TX-2. This compiler does not support the offsetof() macro.
@@ -1703,14 +1720,11 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   // Call kernel optimized for depthwise convolutions using 3x3 filters if
   // parameters are supported.
   if (Fast3x3FilterKernelSupported(
-          input_dims, filter_dims, stride_width, stride_height, pad_width,
-          pad_height, depth_multiplier, output_dims, output_shift)) {
-    DepthwiseConv3x3Filter(input_data, input_dims, input_offset, filter_data,
-                           filter_dims, filter_offset, bias_data, bias_dims,
-                           stride_width, stride_height, pad_width, pad_height,
-                           depth_multiplier, output_offset, output_multiplier,
-                           output_shift, output_activation_min,
-                           output_activation_max, output_data, output_dims);
+          input_shape, filter_shape, stride_width, stride_height, pad_width,
+          pad_height, depth_multiplier, output_shape, output_shift)) {
+    DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
+                           filter_data, bias_shape, bias_data, output_shape,
+                           output_data);
     return;
   }
 #endif
@@ -1785,6 +1799,10 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
 
 #undef TFMINI_USE_DEPTHWISECONV_KERNEL
 
+  const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+  const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+  const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
   // Now that we have determined row_accum_func, we can start work.
   uint8* output_ptr = output_data;
   for (int b = 0; b < batches; ++b) {
@@ -1811,10 +1829,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
           const int in_y = in_y_origin + filter_y;
           row_accum_func(
               stride_width, input_depth, input_width,
-              input_data + in_y * input_dims.strides[2] +
-                  b * input_dims.strides[3],
+              input_data + in_y * input_height_stride + b * input_batch_stride,
               input_offset, pad_width, depth_multiplier, filter_width,
-              filter_data + filter_y * filter_dims.strides[2], filter_offset,
+              filter_data + filter_y * filter_height_stride, filter_offset,
               out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer);
         }
         // Finished accumulating int32 values. Now need to convert them to
@@ -1964,6 +1981,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                           int32 input_offset, const uint8* filter_data,
                           const Dims<4>& filter_dims, int32 filter_offset,
@@ -1975,19 +1994,48 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                           int output_shift, int32 output_activation_min,
                           int32 output_activation_max, uint8* output_data,
                           const Dims<4>& output_dims) {
-  // TODO(suharshs): Optimized implementation of dilation depthwise is not
-  // supported yet.
-  TFLITE_DCHECK(dilation_width_factor == 1);
-  TFLITE_DCHECK(dilation_height_factor == 1);
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
 
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
   DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
                 filter_offset, bias_data, bias_dims, stride_width,
-                stride_height, pad_width, pad_height, depth_multiplier,
+                stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
                 output_offset, output_multiplier, output_shift,
                 output_activation_min, output_activation_max, output_data,
                 output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
@@ -2011,6 +2059,7 @@ void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                 output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
index 0ce64f8c70..9fed53cafb 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@@ -3175,16 +3175,17 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
 }
 
 inline bool Fast3x3FilterKernelSupported(
-    const Dims<4>& input_dims, const Dims<4>& filter_dims, int32 stride_width,
-    int32 stride_height, int32 pad_width, int32 pad_height,
-    int32 depth_multiplier, const Dims<4>& output_dims, int32 output_shift) {
-  const int32 input_height = ArraySize(input_dims, 2);
-  const int32 input_width = ArraySize(input_dims, 1);
-  const int32 input_depth = ArraySize(input_dims, 0);
-  const int32 filter_height = ArraySize(filter_dims, 2);
-  const int32 filter_width = ArraySize(filter_dims, 1);
-  const int32 output_height = ArraySize(output_dims, 2);
-  const int32 output_width = ArraySize(output_dims, 1);
+    const RuntimeShape& input_shape, const RuntimeShape& filter_shape,
+    int32 stride_width, int32 stride_height, int32 pad_width, int32 pad_height,
+    int32 depth_multiplier, const RuntimeShape& output_shape,
+    int32 output_shift) {
+  const int32 input_height = input_shape.Dims(1);
+  const int32 input_width = input_shape.Dims(2);
+  const int32 input_depth = input_shape.Dims(3);
+  const int32 filter_height = filter_shape.Dims(1);
+  const int32 filter_width = filter_shape.Dims(2);
+  const int32 output_height = output_shape.Dims(1);
+  const int32 output_width = output_shape.Dims(2);
 
   bool supported =
       filter_width == 3 && filter_height == 3 && depth_multiplier == 1 &&
@@ -3234,26 +3235,37 @@ inline bool Fast3x3FilterKernelSupported(
 }
 
 inline void DepthwiseConv3x3Filter(
-    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
-    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 stride_width,
-    int32 stride_height, int32 pad_width, int32 pad_height,
-    int32 depth_multiplier, int32 output_offset, int32 output_multiplier,
-    int32 output_shift, int32 output_activation_min,
-    int32 output_activation_max, uint8* output_data,
-    const Dims<4>& output_dims) {
+    const DepthwiseParams& rt_params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label(__PRETTY_FUNCTION__);
   DepthwiseConvParams params;
-  params.input_depth = ArraySize(input_dims, 0);
-  params.input_width = ArraySize(input_dims, 1);
-  params.input_height = ArraySize(input_dims, 2);
+
+  const int32 stride_width = rt_params.stride_width;
+  const int32 stride_height = rt_params.stride_height;
+  const int32 pad_width = rt_params.padding_values.width;
+  const int32 pad_height = rt_params.padding_values.height;
+  const int32 depth_multiplier = rt_params.depth_multiplier;
+  const int32 output_activation_min = rt_params.quantized_activation_min;
+  const int32 output_activation_max = rt_params.quantized_activation_max;
+  const int32 input_offset = rt_params.input_offset;
+  const int32 filter_offset = rt_params.weights_offset;
+  const int32 output_offset = rt_params.output_offset;
+  const int32 output_multiplier = rt_params.output_multiplier;
+  const int32 output_shift = rt_params.output_shift;
+
+  params.input_depth = input_shape.Dims(3);
+  params.input_width = input_shape.Dims(2);
+  params.input_height = input_shape.Dims(1);
   params.input_row_size = params.input_depth * params.input_width;
   params.input_offset = input_offset;
   params.stride_width = stride_width;
   params.stride_height = stride_height;
-  params.output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
-  params.output_width = ArraySize(output_dims, 1);
-  params.output_height = ArraySize(output_dims, 2);
+  params.output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  params.output_width = output_shape.Dims(2);
+  params.output_height = output_shape.Dims(1);
   params.output_row_size = params.output_depth * params.output_width;
   params.output_offset = output_offset;
   params.filter_offset = filter_offset;
@@ -3262,8 +3274,8 @@ inline void DepthwiseConv3x3Filter(
   params.output_activation_min = output_activation_min;
   params.output_activation_max = output_activation_max;
 
-  const int32 filter_height = ArraySize(filter_dims, 2);
-  const int32 filter_width = ArraySize(filter_dims, 1);
+  const int32 filter_height = filter_shape.Dims(1);
+  const int32 filter_width = filter_shape.Dims(2);
   params.filter_row_size = params.output_depth * filter_width;
 
   // Algorithm assumes below constraints. It is optimized for depth
@@ -3279,7 +3291,7 @@ inline void DepthwiseConv3x3Filter(
   TFLITE_DCHECK(pad_width == 0 || pad_width == 1);
   TFLITE_DCHECK(pad_width == pad_height);
 
-  const int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+  const int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
   const int64_t input_batch_size = params.input_row_size * params.input_height;
   const int64_t output_batch_size =
       params.output_row_size * params.output_height;
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index f6636acc58..ac4626bc30 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -772,6 +772,8 @@ struct DepthwiseParams {
   PaddingValues padding_values;
   int16 stride_width;
   int16 stride_height;
+  int16 dilation_width_factor;
+  int16 dilation_height_factor;
   int16 depth_multiplier;
   // uint8 inference params.
   // TODO(b/65838351): Use smaller types if appropriate.
-- 
GitLab


From 0f4861d3a75744353cc6885987c0ec919102b2cc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 09:08:49 -0700
Subject: [PATCH 0257/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213281730
---
 .../internal/reference/depthwiseconv_float.h  |  90 +++++++++++----
 .../internal/reference/depthwiseconv_uint8.h  | 107 +++++++++++++-----
 2 files changed, 148 insertions(+), 49 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
index bb5d590775..a8428528c9 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -22,25 +22,36 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
-inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height,
-                          int dilation_width_factor, int dilation_height_factor,
-                          int pad_width, int pad_height, int depth_multiplier,
-                          float output_activation_min,
-                          float output_activation_max, float* output_data,
-                          const Dims<4>& output_dims) {
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
-  TFLITE_DCHECK(output_depth == input_depth * depth_multiplier);
+inline void DepthwiseConv(
+    const DepthwiseParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& filter_shape,
+    const float* filter_data, const RuntimeShape& bias_shape,
+    const float* bias_data, const RuntimeShape& output_shape,
+    float* output_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 
   for (int b = 0; b < batches; ++b) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
@@ -61,18 +72,18 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
                     (in_y < input_height)) {
                   float input_value =
-                      input_data[Offset(input_dims, ic, in_x, in_y, b)];
+                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
                   float filter_value = filter_data[Offset(
-                      filter_dims, oc, filter_x, filter_y, 0)];
+                      filter_shape, 0, filter_y, filter_x, oc)];
                   total += (input_value * filter_value);
                 }
               }
             }
             float bias_value = 0.0f;
             if (bias_data) {
-              bias_value = bias_data[Offset(bias_dims, oc, 0, 0, 0)];
+              bias_value = bias_data[oc];
             }
-            output_data[Offset(output_dims, oc, out_x, out_y, b)] =
+            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
                 ActivationFunctionWithMinMax(total + bias_value,
                                              output_activation_min,
                                              output_activation_max);
@@ -83,6 +94,37 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           const float* bias_data, const Dims<4>& bias_dims,
@@ -97,6 +139,7 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                 output_activation_max, output_data, output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
@@ -113,6 +156,7 @@ void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                 output_data, output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
index 5e3e8997fc..38aea14c21 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -26,27 +26,43 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
-inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                          int32 input_offset, const uint8* filter_data,
-                          const Dims<4>& filter_dims, int32 filter_offset,
-                          const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height,
-                          int dilation_width_factor, int dilation_height_factor,
-                          int pad_width, int pad_height, int depth_multiplier,
-                          int32 output_offset, int32 output_multiplier,
-                          int output_shift, int32 output_activation_min,
-                          int32 output_activation_max, uint8* output_data,
-                          const Dims<4>& output_dims) {
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
-  TFLITE_DCHECK(output_depth == input_depth * depth_multiplier);
+inline void DepthwiseConv(
+    const DepthwiseParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data) {
+  gemmlowp::ScopedProfilingLabel label("DepthwiseConv/8bit");
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 
   for (int b = 0; b < batches; ++b) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
@@ -67,23 +83,23 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
                     (in_y < input_height)) {
                   int32 input_val =
-                      input_data[Offset(input_dims, ic, in_x, in_y, b)];
-                  int32 filter_val = filter_data[Offset(filter_dims, oc,
-                                                        filter_x, filter_y, 0)];
+                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
+                  int32 filter_val = filter_data[Offset(
+                      filter_shape, 0, filter_y, filter_x, oc)];
                   acc +=
                       (filter_val + filter_offset) * (input_val + input_offset);
                 }
               }
             }
             if (bias_data) {
-              acc += bias_data[Offset(bias_dims, oc, 0, 0, 0)];
+              acc += bias_data[oc];
             }
             acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
                                                 -output_shift);
             acc += output_offset;
             acc = std::max(acc, output_activation_min);
             acc = std::min(acc, output_activation_max);
-            output_data[Offset(output_dims, oc, out_x, out_y, b)] =
+            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
                 static_cast<uint8>(acc);
           }
         }
@@ -92,6 +108,43 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                           int32 input_offset, const uint8* filter_data,
                           const Dims<4>& filter_dims, int32 filter_offset,
@@ -110,6 +163,7 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                 output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
@@ -133,6 +187,7 @@ void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                 output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // Legacy, for compatibility with old checked-in code.
 template <FusedActivationFunctionType Ac>
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-- 
GitLab


From 07bc3696135483612c727ca7687342922ff0d5de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 09:13:45 -0700
Subject: [PATCH 0258/1357] Removing unused code comment in AutoGraph error
 rewriting.

PiperOrigin-RevId: 213282302
---
 tensorflow/python/autograph/core/errors.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/autograph/core/errors.py b/tensorflow/python/autograph/core/errors.py
index 0750353423..23f8c5b52b 100644
--- a/tensorflow/python/autograph/core/errors.py
+++ b/tensorflow/python/autograph/core/errors.py
@@ -208,7 +208,6 @@ def rewrite_tf_runtime_error(error, source_map):
   """
   try:
     cleaned_traceback = _cut_traceback_loops(source_map, error.op.traceback)
-    # cleaned_traceback = error.op.traceback
     cleaned_traceback = _rewrite_tb(source_map, cleaned_traceback)
 
     op_name = error.op.name
-- 
GitLab


From c8a0dfc741736a59f8fd1776b71f38619d66da56 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 17 Sep 2018 09:21:14 -0700
Subject: [PATCH 0259/1357] [tf.data] Adding support for `tf.data.AUTOTUNE` as
 a special value for the `num_parallel_calls` argument of
 `tf.data.Dataset.map()`, `tf.data.Dataset.interleave()`, and
 `tf.contrib.data.map_and_batch()`.

When `tf.data.AUTOTUNE` is specified, the level of parallelism is determined at runtime. The underlying mechanism instruments the input pipeline to build a performance model and then uses the model to find the optimal values for the parallelism knobs.

PiperOrigin-RevId: 213283297
---
 .../optimization/model_dataset_op_test.py     |  17 +-
 .../makefile/proto_text_pb_cc_files.txt       |   1 -
 .../makefile/proto_text_pb_h_files.txt        |   1 -
 .../contrib/makefile/tf_pb_text_files.txt     |   1 -
 .../contrib/makefile/tf_proto_files.txt       |   1 -
 tensorflow/core/BUILD                         |   2 -
 tensorflow/core/framework/dataset.cc          |   1 -
 tensorflow/core/framework/dataset.h           |  31 ++-
 tensorflow/core/framework/model.cc            | 251 ++++++++----------
 tensorflow/core/framework/model.h             |  97 +++----
 tensorflow/core/framework/model.proto         |  30 ---
 .../core/kernels/data/batch_dataset_op.cc     |   2 +-
 .../kernels/data/map_and_batch_dataset_op.cc  |  42 ++-
 .../core/kernels/data/model_dataset_op.cc     |  25 +-
 .../kernels/data/padded_batch_dataset_op.cc   |   2 +-
 .../data/parallel_interleave_dataset_op.cc    |  31 ++-
 .../kernels/data/parallel_map_dataset_op.cc   |   2 +-
 .../kernels/data/parallel_map_iterator.cc     |  35 ++-
 18 files changed, 299 insertions(+), 273 deletions(-)
 delete mode 100644 tensorflow/core/framework/model.proto

diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
index 0a87d3e905..2b3ac85924 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
@@ -58,7 +58,8 @@ class ModelDatasetTest(test.TestCase):
     dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
                                                 np.random.rand(4 * k,
                                                                1))).repeat()
-    dataset = dataset.map(math_ops.matmul, num_parallel_calls=56)
+    dataset = dataset.map(
+        math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE)
     iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -84,7 +85,9 @@ class ModelDatasetTest(test.TestCase):
                                                                1))).repeat()
     dataset = dataset.apply(
         batching.map_and_batch(
-            math_ops.matmul, num_parallel_calls=28, batch_size=batch_size))
+            math_ops.matmul,
+            num_parallel_calls=optimization.AUTOTUNE,
+            batch_size=batch_size))
     iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -109,7 +112,9 @@ class ModelDatasetTest(test.TestCase):
                                                                1))).repeat()
     dataset = dataset.map(math_ops.matmul)
     dataset = dataset_ops.Dataset.range(1).repeat().interleave(
-        lambda _: dataset, cycle_length=56, num_parallel_calls=56)
+        lambda _: dataset,
+        cycle_length=10,
+        num_parallel_calls=optimization.AUTOTUNE)
     iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -146,15 +151,15 @@ class ModelDatasetTest(test.TestCase):
       x, y = c
       return a, b, math_ops.matmul(x, y)
 
-    dataset = dataset.map(f1, num_parallel_calls=32)
+    dataset = dataset.map(f1, num_parallel_calls=optimization.AUTOTUNE)
     dataset = dataset_ops.Dataset.range(1).repeat().interleave(
         lambda _: dataset, cycle_length=2)
 
-    dataset = dataset.map(f2, num_parallel_calls=16)
+    dataset = dataset.map(f2, num_parallel_calls=optimization.AUTOTUNE)
     dataset = dataset_ops.Dataset.range(1).repeat().interleave(
         lambda _: dataset, cycle_length=2)
 
-    dataset = dataset.map(f3, num_parallel_calls=10)
+    dataset = dataset.map(f3, num_parallel_calls=optimization.AUTOTUNE)
     iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
     get_next = iterator.get_next()
 
diff --git a/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt b/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt
index 1d6d9a60e5..0d8df93d11 100644
--- a/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt
@@ -10,7 +10,6 @@ tensorflow/core/framework/graph.pb.cc
 tensorflow/core/framework/graph_transfer_info.pb.cc
 tensorflow/core/framework/kernel_def.pb.cc
 tensorflow/core/framework/log_memory.pb.cc
-tensorflow/core/framework/model.pb.cc
 tensorflow/core/framework/node_def.pb.cc
 tensorflow/core/framework/op_def.pb.cc
 tensorflow/core/framework/remote_fused_graph_execute_info.pb.cc
diff --git a/tensorflow/contrib/makefile/proto_text_pb_h_files.txt b/tensorflow/contrib/makefile/proto_text_pb_h_files.txt
index 884461ecae..d982df9319 100644
--- a/tensorflow/contrib/makefile/proto_text_pb_h_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_pb_h_files.txt
@@ -10,7 +10,6 @@ tensorflow/core/framework/graph.pb.h
 tensorflow/core/framework/graph_transfer_info.pb.h
 tensorflow/core/framework/kernel_def.pb.h
 tensorflow/core/framework/log_memory.pb.h
-tensorflow/core/framework/model.pb.h
 tensorflow/core/framework/node_def.pb.h
 tensorflow/core/framework/op_def.pb.h
 tensorflow/core/framework/remote_fused_graph_execute_info.pb.h
diff --git a/tensorflow/contrib/makefile/tf_pb_text_files.txt b/tensorflow/contrib/makefile/tf_pb_text_files.txt
index e23f499214..f94d70db90 100644
--- a/tensorflow/contrib/makefile/tf_pb_text_files.txt
+++ b/tensorflow/contrib/makefile/tf_pb_text_files.txt
@@ -10,7 +10,6 @@ tensorflow/core/framework/graph.pb_text.cc
 tensorflow/core/framework/graph_transfer_info.pb_text.cc
 tensorflow/core/framework/kernel_def.pb_text.cc
 tensorflow/core/framework/log_memory.pb_text.cc
-tensorflow/core/framework/model.pb_text.cc
 tensorflow/core/framework/node_def.pb_text.cc
 tensorflow/core/framework/op_def.pb_text.cc
 tensorflow/core/framework/remote_fused_graph_execute_info.pb_text.cc
diff --git a/tensorflow/contrib/makefile/tf_proto_files.txt b/tensorflow/contrib/makefile/tf_proto_files.txt
index 5eae845d9b..8bec3e3e01 100644
--- a/tensorflow/contrib/makefile/tf_proto_files.txt
+++ b/tensorflow/contrib/makefile/tf_proto_files.txt
@@ -14,7 +14,6 @@ tensorflow/core/framework/graph.proto
 tensorflow/core/framework/graph_transfer_info.proto
 tensorflow/core/framework/kernel_def.proto
 tensorflow/core/framework/log_memory.proto
-tensorflow/core/framework/model.proto
 tensorflow/core/framework/node_def.proto
 tensorflow/core/framework/op_def.proto
 tensorflow/core/framework/reader_base.proto
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 55715bb3a6..4074232c93 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -178,7 +178,6 @@ COMMON_PROTO_SRCS = [
     "framework/iterator.proto",
     "framework/kernel_def.proto",
     "framework/log_memory.proto",
-    "framework/model.proto",
     "framework/node_def.proto",
     "framework/op_def.proto",
     "framework/reader_base.proto",
@@ -842,7 +841,6 @@ tf_cuda_library(
         "framework/log_memory.h",
         "framework/lookup_interface.h",
         "framework/memory_types.h",
-        "framework/model.h",
         "framework/node_def_builder.h",
         "framework/node_def_util.h",
         "framework/numeric_op.h",
diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index 5281c56f04..284dafb886 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -20,7 +20,6 @@ limitations under the License.
 
 namespace tensorflow {
 namespace data {
-
 namespace {
 
 // A wrapper class for storing a `DatasetBase` instance in a DT_VARIANT tensor.
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 4ee6749eea..91b1e61d3c 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -47,6 +47,8 @@ class GraphDefBuilder;
 class Node;
 
 namespace data {
+// A constant that can be used to enable auto-tuning.
+constexpr int kAutoTune = -1;
 
 class DatasetBase;
 class SerializationContext;
@@ -670,13 +672,34 @@ class DatasetBaseIterator : public IteratorBase {
     return strings::StrCat(params_.prefix, ":", name);
   }
 
-  // When performance modeling is enabled, this method sets metadata entry for
-  // the model node corresponding to this iterator.
-  void SetMetadata(IteratorContext* ctx, const string& key, int64 value) {
+  // When performance modeling is enabled, this method adds a constant parameter
+  // to the model node corresponding to this iterator.
+  void AddConstantParameter(IteratorContext* ctx, const string& name,
+                            int64 value) {
     if (ctx->model()) {
       std::shared_ptr<model::Node> node = ctx->model()->LookupNode(prefix());
       if (node) {
-        node->set_metadata(key, value);
+        node->add_constant_param(name, value);
+      }
+    }
+  }
+
+  // When performance modeling is enabled, this method adds a tunable parameter
+  // to the model node corresponding to this iterator.
+  //
+  // The `set_fn` function should set the tunable parameter to the value of
+  // its input argument. The function should be thread-safe; in particular, the
+  // state it updates should be protected by a lock as the function can be
+  // invoked asynchronously. It is guaranteed that this function will not be
+  // invoked after the iterator is deleted because the model node that owns
+  // the function is deleted when the iterator is deleted.
+  void AddTunableParameter(IteratorContext* ctx, const string& name,
+                           int64 value, int64 min, int64 max,
+                           std::function<void(int64)>&& set_fn) {
+    if (ctx->model()) {
+      std::shared_ptr<model::Node> node = ctx->model()->LookupNode(prefix());
+      if (node) {
+        node->add_tunable_param(name, value, min, max, std::move(set_fn));
       }
     }
   }
diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index 250b006641..b3fe357ea1 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -15,52 +15,28 @@ limitations under the License.
 
 #include "tensorflow/core/framework/model.h"
 
+#include <memory>
+
+#include "tensorflow/core/lib/gtl/map_util.h"
+
 namespace tensorflow {
 namespace data {
 namespace model {
 
 // TODO(jsimsa): Use `Node` subclassing instead of types and node statements.
-void Node::CollectKnobs(std::vector<Node::Knob>* knobs) {
+void Node::CollectTunables(
+    std::vector<std::shared_ptr<Node::Tunable>>* tunables) {
   mutex_lock l(mu_);
+  for (auto input : inputs_) {
+    input->CollectTunables(tunables);
+  }
   switch (type_) {
-    case Type::PARALLEL_INTERLEAVE_V2: {
-      for (auto input : inputs_) {
-        input->CollectKnobs(knobs);
-      }
-      int64 processing_time = static_cast<int64>(
-          static_cast<double>(ProcessingTimeLocked() -
-                              inputs_.front()->ProcessingTime()) /
-          static_cast<double>(inputs_.size() - 1));
-      knobs->emplace_back(
-          Node::Knob{this, processing_time, metadata_["parallelism"]});
-      return;
-    }
     case Type::MAP_AND_BATCH:
+    case Type::PARALLEL_INTERLEAVE_V2:
     case Type::PARALLEL_MAP: {
-      for (auto input : inputs_) {
-        input->CollectKnobs(knobs);
-      }
-      knobs->emplace_back(
-          Node::Knob{this, NanosPerElementLocked(), metadata_["parallelism"]});
-      return;
-    }
-    case Type::BATCH:
-    case Type::CACHE:
-    case Type::CONCATENATE:
-    case Type::FILTER:
-    case Type::FLAT_MAP:
-    case Type::INTERLEAVE:
-    case Type::MAP:
-    case Type::PADDED_BATCH:
-    case Type::PARALLEL_INTERLEAVE:
-    case Type::PREFETCH:
-    case Type::REPEAT:
-    case Type::SHUFFLE:
-    case Type::SKIP:
-    case Type::TAKE:
-    case Type::ZIP: {
-      for (auto input : inputs_) {
-        input->CollectKnobs(knobs);
+      if (auto* tunable_param =
+              gtl::FindOrNull(tunable_params_, "parallelism")) {
+        tunables->push_back(*tunable_param);
       }
       return;
     }
@@ -69,12 +45,19 @@ void Node::CollectKnobs(std::vector<Node::Knob>* knobs) {
   }
 }
 
+int64 Node::GetParameterValue(const string& name) {
+  if (auto* tunable_param = gtl::FindOrNull(tunable_params_, name)) {
+    return (*tunable_param)->value;
+  }
+  return constant_params_[name];
+}
+
 int64 Node::ProcessingTimeLocked() {
   switch (type_) {
     case Type::BATCH:
     case Type::MAP_AND_BATCH:
     case Type::PADDED_BATCH: {
-      int64 batch_size = metadata_["batch_size"];
+      int64 batch_size = GetParameterValue("batch_size");
       return NanosPerElementLocked() + batch_size * ProcessingTimeForInputs();
     }
     case Type::FILTER: {
@@ -122,7 +105,7 @@ int64 Node::OutputTimeLocked(std::vector<int64>* input_times) {
   switch (type_) {
     case Type::BATCH:
     case Type::PADDED_BATCH: {
-      double batch_size = metadata_["batch_size"];
+      double batch_size = GetParameterValue("batch_size");
       int64 old_value = (*input_times)[input_times->size() - 1];
       (*input_times)[input_times->size() - 1] = static_cast<int64>(
           static_cast<double>(old_value + NanosPerElementLocked()) /
@@ -168,8 +151,8 @@ int64 Node::OutputTimeLocked(std::vector<int64>* input_times) {
                  static_cast<double>(inputs_.size() - 1);
     }
     case Type::MAP_AND_BATCH: {
-      double batch_size = metadata_["batch_size"];
-      double parallelism = metadata_["parallelism"];
+      double batch_size = GetParameterValue("batch_size");
+      double parallelism = GetParameterValue("parallelism");
       int64 delta =
           static_cast<int64>(static_cast<double>(NanosPerElementLocked()) /
                              (batch_size * parallelism));
@@ -182,22 +165,41 @@ int64 Node::OutputTimeLocked(std::vector<int64>* input_times) {
       return std::max(0LL,
                       output_time - input_times->at(input_times->size() - 2));
     }
-    case Type::PARALLEL_INTERLEAVE:
+    case Type::PARALLEL_INTERLEAVE: {
+      // TODO(jsimsa): model the first input
+      if (inputs_.size() <= 1) {
+        return NanosPerElementLocked();
+      }
+      int64 delta = static_cast<double>(NanosPerElementLocked()) *
+                    static_cast<double>(inputs_.size() - 1);
+      input_times->push_back(delta);
+      auto cleanup =
+          gtl::MakeCleanup([input_times]() { input_times->pop_back(); });
+      int64 inputs_output_time = OutputTimeForInputs(input_times) -
+                                 inputs_.front()->OutputTime(input_times);
+      double parallelism = GetParameterValue("parallelism");
+      int64 output_time =
+          NanosPerElementLocked() + ((static_cast<double>(inputs_output_time) /
+                                      static_cast<double>(inputs_.size() - 1)) /
+                                     parallelism);
+      return std::max(0LL,
+                      output_time - input_times->at(input_times->size() - 2));
+    }
     case Type::PARALLEL_INTERLEAVE_V2: {
       // TODO(jsimsa): model the first input
       if (inputs_.size() <= 1) {
         return NanosPerElementLocked();
       }
-      int64 delta =
-          static_cast<int64>(static_cast<double>(NanosPerElementLocked()) *
-                             static_cast<double>(inputs_.size() - 1));
+      int64 delta = static_cast<double>(NanosPerElementLocked()) *
+                    static_cast<double>(inputs_.size() - 1);
       input_times->push_back(delta);
       auto cleanup =
           gtl::MakeCleanup([input_times]() { input_times->pop_back(); });
       int64 inputs_output_time = OutputTimeForInputs(input_times) -
                                  inputs_.front()->OutputTime(input_times);
-      double parallelism = std::min(port::NumSchedulableCPUs(),
-                                    static_cast<int>(metadata_["parallelism"]));
+      double parallelism =
+          std::min(static_cast<int>(GetParameterValue("cycle_length")),
+                   static_cast<int>(GetParameterValue("parallelism")));
       int64 output_time =
           NanosPerElementLocked() + ((static_cast<double>(inputs_output_time) /
                                       static_cast<double>(inputs_.size() - 1)) /
@@ -206,8 +208,9 @@ int64 Node::OutputTimeLocked(std::vector<int64>* input_times) {
                       output_time - input_times->at(input_times->size() - 2));
     }
     case Type::PARALLEL_MAP: {
-      double parallelism = std::min(port::NumSchedulableCPUs(),
-                                    static_cast<int>(metadata_["parallelism"]));
+      double parallelism =
+          std::min(port::NumSchedulableCPUs(),
+                   static_cast<int>(GetParameterValue("parallelism")));
       int64 delta = static_cast<int64>(
           static_cast<double>(NanosPerElementLocked()) / parallelism);
       input_times->push_back(delta);
@@ -248,23 +251,6 @@ int64 Node::OutputTimeLocked(std::vector<int64>* input_times) {
   }
 }
 
-Model::Model(const proto::Model& model_proto) {
-  id_counter_ = model_proto.id_counter();
-  std::map<int64, std::shared_ptr<Node>> lookup_table;
-  for (auto node_proto : model_proto.node()) {
-    std::shared_ptr<Node> node(new Node(node_proto));
-    lookup_table[node_proto.id()] = node;
-  }
-  for (auto node_proto : model_proto.node()) {
-    std::shared_ptr<Node> node = lookup_table[node_proto.id()];
-    for (int64 id : node_proto.input()) {
-      node->add_input(lookup_table[id]);
-    }
-    node->set_output(lookup_table[node_proto.output()]);
-  }
-  output_ = lookup_table[model_proto.output()];
-}
-
 std::shared_ptr<Node> Model::AddNode(const string& name,
                                      const string& output_name) {
   mutex_lock l(mu_);
@@ -294,94 +280,77 @@ std::shared_ptr<Node> Model::LookupNode(const string& name) {
   return result;
 }
 
-void Model::Optimize() {
-  mutex_lock l(mu_);
-  int64 processing_time = ProcessingTime();
-  int64 num_cpus = port::NumSchedulableCPUs();
-  std::vector<Node::Knob> knobs = CollectKnobs();
-  // The optimization algorithm starts by setting all parallelism knobs to 1. It
-  // then repeatedly identifies the knob that, when turned up by 1, decreases
-  // the output time the most. This process is repeated until all knobs reach
-  // the number of schedulable CPUs or the projected output time is less than or
-  // equal to the processing time needed to produce an element divided by the
-  // number of schedulable CPUs.
-  for (auto& knob : knobs) {
-    LOG(INFO) << knob.node->name() << " " << knob.processing_time;
-    knob.value = 1;
-    knob.node->set_metadata("parallelism", knob.value);
-  }
-  while (true) {
-    int64 output_time = OutputTime();
-    bool all_knobs = true;
-    for (auto knob : knobs) {
-      if (knob.value < num_cpus) {
-        all_knobs = false;
+// The optimization algorithm starts by setting all tunable parallelism
+// parameters to 1. It then repeatedly identifies the parameter that whose
+// increase in parallelism decreases the output time the most. This process is
+// repeated until all parameters reach their maximum values or the
+// projected output time is less than or equal to the processing time needed to
+// produce an element divided by CPU budget.
+void Model::Optimize(int64 cpu_budget) {
+  mutex_lock l(optimization_mu_);
+  std::vector<std::shared_ptr<Node::Tunable>> tunables;
+  {
+    mutex_lock l2(mu_);
+    const int64 processing_time = ProcessingTime();
+    tunables = CollectTunables();
+    for (auto tunable : tunables) {
+      tunable->value = 1;
+    }
+    while (true) {
+      const int64 output_time = OutputTime();
+      bool all_tunables = true;
+      for (auto& tunable : tunables) {
+        if (tunable->value < tunable->max) {
+          all_tunables = false;
+          break;
+        }
+      }
+      if (output_time < processing_time / cpu_budget || all_tunables) {
         break;
       }
-    }
-    if (output_time < processing_time / num_cpus || all_knobs) {
-      break;
-    }
-    int64 best_delta = -1;
-    int best_knob = -1;
-    for (int i = 0; i < knobs.size(); ++i) {
-      if (knobs[i].value == num_cpus) {
-        continue;
+      int64 best_delta = -1;
+      Node::Tunable* best_tunable = nullptr;
+      for (auto& tunable : tunables) {
+        if (tunable->value == tunable->max) {
+          continue;
+        }
+        tunable->value++;
+        int64 delta = output_time - OutputTime();
+        if (delta > best_delta) {
+          best_delta = delta;
+          best_tunable = tunable.get();
+        }
+        tunable->value--;
       }
-      knobs[i].node->set_metadata("parallelism", knobs[i].value + 1);
-      int64 delta = output_time - OutputTime();
-      if (delta > best_delta) {
-        best_delta = delta;
-        best_knob = i;
+      if (best_tunable) {
+        // NOTE: This can happen because we are performing the optimization
+        // while the model data is changing. If this becomes an issue, we should
+        // look into performing the optimization using a model snapshot.
+        break;
       }
-      knobs[i].node->set_metadata("parallelism", knobs[i].value);
+      best_tunable->value++;
     }
-    knobs[best_knob].value++;
-    knobs[best_knob].node->set_metadata("parallelism", knobs[best_knob].value);
   }
-  for (auto knob : knobs) {
-    LOG(INFO) << knob.node->name() << " " << knob.value;
+  // The `set_fn` functions should be invoked without holding a lock to avoid a
+  // potential deadlock.
+  for (auto& tunable : tunables) {
+    tunable->set_fn(tunable->value);
   }
-  LOG(INFO) << "output time: " << OutputTime();
-  LOG(INFO) << "processing time: " << ProcessingTime();
-}
-
-void Model::OutputToFile() {
-  proto::Model model_proto;
-  ToProto(&model_proto);
-  string filename;
-  Env::Default()->LocalTempFilename(&filename);
-  TF_CHECK_OK(WriteStringToFile(Env::Default(), filename,
-                                model_proto.SerializeAsString()));
-  LOG(INFO) << filename;
 }
 
 void Model::RemoveNode(const string& prefix) {
-  mutex_lock l(mu_);
+  // Nodes are not allowed to be removed when optimization is in progress to
+  // prevent the optimization from trying to access an iterator that was
+  // concurrently deleted.
+  mutex_lock l(optimization_mu_);
+  mutex_lock l2(mu_);
   lookup_table_.erase(prefix);
 }
 
-void Model::ToProto(proto::Model* model_proto) {
-  mutex_lock l(mu_);
-  model_proto->set_id_counter(id_counter_);
-  model_proto->set_output(output_->id());
-  AddNodeToProto(output_, model_proto);
-}
-
-// static
-void Model::AddNodeToProto(const std::shared_ptr<Node>& node,
-                           proto::Model* model_proto) {
-  proto::Node* node_proto = model_proto->add_node();
-  node->ToProto(node_proto);
-  for (const std::shared_ptr<Node>& input : node->inputs()) {
-    AddNodeToProto(input, model_proto);
-  }
-}
-
-std::vector<Node::Knob> Model::CollectKnobs() {
-  std::vector<Node::Knob> knobs;
-  output_->CollectKnobs(&knobs);
-  return knobs;
+std::vector<std::shared_ptr<Node::Tunable>> Model::CollectTunables() {
+  std::vector<std::shared_ptr<Node::Tunable>> tunables;
+  output_->CollectTunables(&tunables);
+  return tunables;
 }
 
 int64 Model::OutputTime() {
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index 98172909bf..f88ec06ef3 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -22,7 +22,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "tensorflow/core/framework/model.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -61,13 +60,10 @@ class Node {
  public:
   Node(int64 id, std::shared_ptr<Node> output) : id_(id), output_(output) {}
 
-  explicit Node(const proto::Node& node_proto) : id_(node_proto.id()) {
-    name_ = node_proto.name();
-    type_ = TypeFromName(node_proto.name());
-    processing_time_ = node_proto.processing_time();
-    num_elements_ = node_proto.num_elements();
-    metadata_.insert(node_proto.metadata().begin(),
-                     node_proto.metadata().end());
+  // Adds a constant parameter.
+  void add_constant_param(const string& name, int64 value) LOCKS_EXCLUDED(mu_) {
+    mutex_lock l(mu_);
+    constant_params_[name] = value;
   }
 
   // Records that the node produced an element.
@@ -88,6 +84,15 @@ class Node {
     processing_time_ += delta;
   }
 
+  // Adds a tunable parameter.
+  void add_tunable_param(const string& name, int64 value, int64 min, int64 max,
+                         std::function<void(int64)>&& set_fn)
+      LOCKS_EXCLUDED(mu_) {
+    mutex_lock l(mu_);
+    tunable_params_[name] =
+        std::make_shared<Tunable>(value, min, max, std::move(set_fn));
+  }
+
   // Returns the unique node ID.
   int64 id() LOCKS_EXCLUDED(mu_) { return id_; }
 
@@ -121,12 +126,6 @@ class Node {
     inputs_.remove(input);
   }
 
-  // Adds the given key-value pair to the node metadata.
-  void set_metadata(const string& key, int64 value) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    metadata_[key] = value;
-  }
-
   // Sets the node name.
   void set_name(const string& name) LOCKS_EXCLUDED(mu_) {
     mutex_lock l(mu_);
@@ -157,11 +156,16 @@ class Node {
   }
 
  private:
-  // Represents a performance knob.
-  struct Knob {
-    Node* node;
-    int64 processing_time;
+  // Represents a tunable parameter.
+  struct Tunable {
+    Tunable(int64 value, int64 min, int64 max,
+            std::function<void(int64)> set_fn)
+        : value(value), min(min), max(max), set_fn(std::move(set_fn)) {}
+
     int64 value;
+    int64 min;
+    int64 max;
+    std::function<void(int64)> set_fn;
   };
 
   enum class Type {
@@ -186,8 +190,12 @@ class Node {
     UNKNOWN,
   };
 
-  // Collects performance knobs in the subtree rooted in this node.
-  void CollectKnobs(std::vector<Node::Knob>* knobs) LOCKS_EXCLUDED(mu_);
+  // Collects tunable parameters in the subtree rooted in this node.
+  void CollectTunables(std::vector<std::shared_ptr<Node::Tunable>>* tunables)
+      LOCKS_EXCLUDED(mu_);
+
+  // Gets a value of the given parameter (tunable or constant).
+  int64 GetParameterValue(const string& name) EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   // Returns the per-element processing time spent in this node.
   int64 NanosPerElement() LOCKS_EXCLUDED(mu_) {
@@ -238,22 +246,6 @@ class Node {
     return sum;
   }
 
-  // Serializes the node state into the given proto.
-  void ToProto(proto::Node* node_proto) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    node_proto->set_id(id_);
-    node_proto->set_name(name_);
-    node_proto->set_num_elements(num_elements_);
-    node_proto->set_processing_time(processing_time_);
-    for (const std::shared_ptr<Node>& input : inputs_) {
-      node_proto->add_input(input->id());
-    }
-    if (output_) {
-      node_proto->set_output(output_->id());
-    }
-    node_proto->mutable_metadata()->insert(metadata_.begin(), metadata_.end());
-  }
-
   Type TypeFromName(const string& name) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     if (name_ == "Batch") {
       return Type::BATCH;
@@ -319,7 +311,9 @@ class Node {
   int64 processing_time_ GUARDED_BY(mu_) = 0;
   int64 num_elements_ GUARDED_BY(mu_) = 0;
   std::map<std::thread::id, int64> work_start_ GUARDED_BY(mu_);
-  std::map<string, int64> metadata_ GUARDED_BY(mu_);
+  std::map<string, int64> constant_params_ GUARDED_BY(mu_);
+  // Tunables are shared with the model during optimization.
+  std::map<string, std::shared_ptr<Tunable>> tunable_params_ GUARDED_BY(mu_);
   std::list<std::shared_ptr<Node>> inputs_ GUARDED_BY(mu_);
   std::shared_ptr<Node> output_ GUARDED_BY(mu_);
 
@@ -330,21 +324,15 @@ class Node {
 // for collecting runtime information and optimizing performance. It collects
 // runtime information about execution of the input pipeline that is used to
 // create a performance model, which is in turn used to identify optimal values
-// of performance knobs.
+// of tunable parameters.
 //
 // Developers of tf.data transformations are not expected to interact with this
 // class directly. Boiler plate code for creating the abstract representation of
 // the input pipeline and collecting runtime information has been added to the
 // implementation of `DatasetBase` and `DatasetBaseIterator` respectively.
-//
-// TODO(jsimsa): Add a mechanism for feeding the result of the optimization
-// into the input pipeline.
 class Model {
  public:
   Model() = default;
-  explicit Model(const proto::Model& model_proto);
-
-  ~Model() {}
 
   // Returns the model output node.
   std::shared_ptr<Node> output() LOCKS_EXCLUDED(mu_) {
@@ -360,30 +348,25 @@ class Model {
   std::shared_ptr<Node> LookupNode(const string& name) LOCKS_EXCLUDED(mu_);
 
   // Runs optimization.
-  void Optimize() LOCKS_EXCLUDED(mu_);
-
-  // Outputs the state of a model to a file.
-  //
-  // TODO(jsimsa): Remove this method once the optimization loop is closed.
-  void OutputToFile() LOCKS_EXCLUDED(mu_);
+  void Optimize(int64 cpu_budget) LOCKS_EXCLUDED(mu_);
 
   // Removes the node identified by the given name.
   void RemoveNode(const string& prefix) LOCKS_EXCLUDED(mu_);
 
-  // Serializes the model state to the given proto.
-  void ToProto(proto::Model* model_proto) LOCKS_EXCLUDED(mu_);
-
  private:
-  static void AddNodeToProto(const std::shared_ptr<Node>& node,
-                             proto::Model* model_proto);
-
-  std::vector<Node::Knob> CollectKnobs() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  std::vector<std::shared_ptr<Node::Tunable>> CollectTunables()
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   int64 OutputTime() EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   int64 ProcessingTime() EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
+  // Used for coordination between different input pipeline threads.
   mutex mu_;
+  // Used for preventing iterator deletion when optimization is in progress
+  // because the optimization may try to update the values of tunable
+  // parameters.
+  mutex optimization_mu_ ACQUIRED_BEFORE(mu_);
   int64 id_counter_ GUARDED_BY(mu_) = 1;
   std::shared_ptr<Node> output_ GUARDED_BY(mu_);
   std::map<string, std::shared_ptr<Node>> lookup_table_ GUARDED_BY(mu_);
diff --git a/tensorflow/core/framework/model.proto b/tensorflow/core/framework/model.proto
deleted file mode 100644
index 26000007af..0000000000
--- a/tensorflow/core/framework/model.proto
+++ /dev/null
@@ -1,30 +0,0 @@
-syntax = "proto3";
-
-package tensorflow.data.model.proto;
-option cc_enable_arenas = true;
-
-message Model {
-  // Counter used for generating new node IDs.
-  int64 id_counter = 1;
-  // Nodes of this model.
-  repeated Node node = 2;
-  // The ID of the output node.
-  int64 output = 3;
-};
-
-message Node {
-  // The node ID.
-  int64 id = 1;
-  // The node name.
-  string name = 2;
-  // Input node IDs.
-  repeated int64 input = 3;
-  // Output node ID.
-  int64 output = 4;
-  // Number of elements produced by the node.
-  int64 num_elements = 5;
-  // The CPU time spent by running threads of this node.
-  int64 processing_time = 6;
-  // Key-value store for node metadata (e.g. batch size or parallelism).
-  map<string, int32> metadata = 7;
-};
diff --git a/tensorflow/core/kernels/data/batch_dataset_op.cc b/tensorflow/core/kernels/data/batch_dataset_op.cc
index 887b8c8365..d1db1d7bec 100644
--- a/tensorflow/core/kernels/data/batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/batch_dataset_op.cc
@@ -117,7 +117,7 @@ class BatchDatasetOp : public UnaryDatasetOpKernel {
           : DatasetIterator<Dataset>(params) {}
 
       Status Initialize(IteratorContext* ctx) override {
-        SetMetadata(ctx, "batch_size", dataset()->batch_size_);
+        AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
         return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_);
       }
 
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 85e49355d3..80efac5d4b 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
 
 namespace tensorflow {
@@ -39,7 +40,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
       : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()),
         op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -77,7 +77,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       case 2:
         OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                                 &num_parallel_calls));
-        OP_REQUIRES(ctx, num_parallel_calls > 0,
+        OP_REQUIRES(ctx,
+                    num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
                     errors::InvalidArgument(
                         "num_parallel_calls must be greater than zero."));
         break;
@@ -190,7 +191,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     class Iterator : public DatasetIterator<Dataset> {
      public:
       explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params) {}
+          : DatasetIterator<Dataset>(params),
+            num_parallel_calls_(params.dataset->num_parallel_calls_) {}
 
       ~Iterator() override {
         mutex_lock l(mu_);
@@ -204,8 +206,24 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status Initialize(IteratorContext* ctx) override {
-        SetMetadata(ctx, "batch_size", dataset()->batch_size_);
-        SetMetadata(ctx, "parallelism", dataset()->num_parallel_calls_);
+        mutex_lock l(mu_);
+        AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
+        if (num_parallel_calls_ == kAutoTune) {
+          num_parallel_calls_ = 1;
+          std::function<void(int64)> set_fn = [this](int64 value) {
+            {
+              mutex_lock l(mu_);
+              num_parallel_calls_ = value;
+            }
+            VLOG(2) << "setting parallelism knob to " << value;
+            cond_var_.notify_all();
+          };
+          AddTunableParameter(
+              ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */,
+              port::NumSchedulableCPUs() /* max */, std::move(set_fn));
+        } else {
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+        }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
         return dataset()->captured_func_->Instantiate(ctx);
@@ -428,7 +446,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       int MaxBatchResults() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        return (dataset()->num_parallel_calls_ + dataset()->batch_size_ - 1) /
+        return (num_parallel_calls_ + dataset()->batch_size_ - 1) /
                dataset()->batch_size_;
       }
 
@@ -480,15 +498,18 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void RunnerThread(const std::shared_ptr<IteratorContext>& ctx)
           LOCKS_EXCLUDED(mu_) {
         std::vector<std::pair<std::shared_ptr<BatchResult>, int64>> new_calls;
-        new_calls.reserve(dataset()->num_parallel_calls_);
         StartWork(ctx.get());
         auto stop_cleanup =
             gtl::MakeCleanup([this, &ctx]() { StopWork(ctx.get()); });
+        {
+          tf_shared_lock l(mu_);
+          new_calls.reserve(num_parallel_calls_);
+        }
         while (true) {
           {
             mutex_lock l(mu_);
             while (!cancelled_ &&
-                   (num_calls_ >= dataset()->num_parallel_calls_ ||
+                   (num_calls_ >= num_parallel_calls_ ||
                     batch_results_.size() > MaxBatchResults() ||
                     (batch_results_.size() == MaxBatchResults() &&
                      call_counter_ % dataset()->batch_size_ == 0))) {
@@ -501,7 +522,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
               return;
             }
 
-            while (num_calls_ < dataset()->num_parallel_calls_ &&
+            while (num_calls_ < num_parallel_calls_ &&
                    (batch_results_.size() < MaxBatchResults() ||
                     (batch_results_.size() == MaxBatchResults() &&
                      call_counter_ % dataset()->batch_size_ != 0))) {
@@ -648,6 +669,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       // user specified level of parallelism and there are slots available in
       // the `batch_results_` buffer.
       condition_variable cond_var_;
+      // Identifies the maximum number of parallel calls.
+      int64 num_parallel_calls_ GUARDED_BY(mu_) = 0;
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(mu_) = 0;
       // Counts the total number of calls.
@@ -671,7 +694,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     const Eigen::ThreadPoolDevice* device_;  // not owned
   };
 
-  const int graph_def_version_;
   const int op_version_;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index c7f929dbc1..63025d3371 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -17,11 +17,14 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/cpu_info.h"
 
 namespace tensorflow {
 namespace data {
 namespace {
 
+const int kOptimizationPeriodThresholdMs = 60 * EnvTime::kSecondsToMicros;
+
 class ModelDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit ModelDatasetOp(OpKernelConstruction* ctx)
@@ -71,9 +74,8 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
     class Iterator : public DatasetIterator<Dataset> {
      public:
       explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params), model_(new model::Model()) {}
-
-      ~Iterator() override { model_->OutputToFile(); }
+          : DatasetIterator<Dataset>(params),
+            model_(std::make_shared<model::Model>()) {}
 
       Status Initialize(IteratorContext* ctx) override {
         IteratorContext ctx_with_model(CreateParams(ctx));
@@ -85,6 +87,21 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);
+        int64 now = ctx->env()->NowMicros() / EnvTime::kMillisToMicros;
+        if (last_optimization_ms_ + optimization_period_ms_ < now) {
+          model_->Optimize(port::NumSchedulableCPUs());
+          // Exponentially increase the period of running the optimization until
+          // a threshold is reached.
+          if (optimization_period_ms_ < kOptimizationPeriodThresholdMs) {
+            if (optimization_period_ms_ << 1 < kOptimizationPeriodThresholdMs) {
+              optimization_period_ms_ <<= 1;
+            } else {
+              optimization_period_ms_ = kOptimizationPeriodThresholdMs;
+            }
+          }
+          last_optimization_ms_ =
+              ctx->env()->NowMicros() / EnvTime::kMillisToMicros;
+        }
         IteratorContext ctx_with_model(CreateParams(ctx));
         return input_impl_->GetNext(&ctx_with_model, out_tensors,
                                     end_of_sequence);
@@ -113,6 +130,8 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
      private:
       mutex mu_;
       std::shared_ptr<model::Model> model_;
+      int64 last_optimization_ms_ GUARDED_BY(mu_) = 0;
+      int64 optimization_period_ms_ GUARDED_BY(mu_) = 10;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
diff --git a/tensorflow/core/kernels/data/padded_batch_dataset_op.cc b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
index 73eeafd797..7b01c3b4e0 100644
--- a/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
@@ -207,7 +207,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
           : DatasetIterator<Dataset>(params) {}
 
       Status Initialize(IteratorContext* ctx) override {
-        SetMetadata(ctx, "batch_size", dataset()->batch_size_);
+        AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
         return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_);
       }
 
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index aa5e613e24..2f2db09508 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -252,7 +252,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status Initialize(IteratorContext* ctx) override {
-        SetMetadata(ctx, "parallelism", dataset()->cycle_length_);
+        AddConstantParameter(ctx, "parallelism", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
         return dataset()->captured_func_->Instantiate(ctx);
@@ -1120,7 +1120,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
     int64 num_parallel_calls;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                             &num_parallel_calls));
-    OP_REQUIRES(ctx, num_parallel_calls > 0,
+    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
                 errors::InvalidArgument(
                     "num_parallel_calls must be greater than zero."));
     OP_REQUIRES(
@@ -1233,6 +1233,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
             args_list_(params.dataset->cycle_length_),
             current_elements_(params.dataset->cycle_length_),
             element_in_use_(params.dataset->cycle_length_, false),
+            num_parallel_calls_(params.dataset->num_parallel_calls_),
             thread_pool_(new thread::ThreadPool(
                 Env::Default(), ThreadOptions(), "parallel_interleave",
                 dataset()->cycle_length_ /* num_threads */,
@@ -1250,7 +1251,24 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       }
 
       Status Initialize(IteratorContext* ctx) override {
-        SetMetadata(ctx, "parallelism", dataset()->num_parallel_calls_);
+        mutex_lock l(mu_);
+        if (num_parallel_calls_ == kAutoTune) {
+          num_parallel_calls_ = 1;
+          auto set_fn = [this](int64 value) {
+            {
+              mutex_lock l(mu_);
+              num_parallel_calls_ = value;
+            }
+            VLOG(2) << "setting parallelism knob to " << value;
+            cond_var_.notify_all();
+          };
+          AddTunableParameter(
+              ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */,
+              dataset()->cycle_length_ /* max */, std::move(set_fn));
+        } else {
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+        }
+        AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
         return dataset()->captured_func_->Instantiate(ctx);
@@ -1459,7 +1477,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
             // not in use and there is space in the `invocation_results_` queue.
             while (!cancelled_ && (!end_of_input_ || num_open_ > 0) &&
                    (element_in_use_[cycle_index_] ||
-                    num_calls_ >= dataset()->num_parallel_calls_ ||
+                    num_calls_ >= num_parallel_calls_ ||
                     invocation_results_.size() >= MaxInvocationResults())) {
               StopWork(ctx.get());
               cond_var_.wait(l);
@@ -1472,7 +1490,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
             while (!element_in_use_[cycle_index_] &&
                    (!end_of_input_ || num_open_ > 0) &&
-                   num_calls_ < dataset()->num_parallel_calls_ &&
+                   num_calls_ < num_parallel_calls_ &&
                    invocation_results_.size() < MaxInvocationResults()) {
               if (!current_elements_[cycle_index_]) {
                 // Try to create a new iterator from the next input element.
@@ -1647,6 +1665,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       // Identifies the number of open iterators.
       int64 num_open_ GUARDED_BY(mu_) = 0;
 
+      // Identifies the maximum number of parallel calls.
+      int64 num_parallel_calls_ GUARDED_BY(mu_) = 0;
+
       // Identifies the number of outstanding calls.
       int64 num_calls_ GUARDED_BY(mu_) = 0;
 
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 0795987431..b584316d69 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -55,7 +55,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     int32 num_parallel_calls;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                             &num_parallel_calls));
-    OP_REQUIRES(ctx, num_parallel_calls > 0,
+    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
                 errors::InvalidArgument(
                     "num_parallel_calls must be greater than zero."));
 
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 0b6e587881..5f6052ce83 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/platform/cpu_info.h"
 
 namespace tensorflow {
 namespace data {
@@ -55,7 +56,25 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   Status Initialize(IteratorContext* ctx) override {
-    SetMetadata(ctx, "parallelism", num_parallel_calls_);
+    mutex_lock l(mu_);
+    if (num_parallel_calls_ == kAutoTune) {
+      num_parallel_calls_ = 1;
+      auto set_fn = [this](int64 value) {
+        {
+          mutex_lock l(mu_);
+          num_parallel_calls_ = value;
+        }
+        VLOG(2) << "setting parallelism knob to " << value;
+        cond_var_.notify_all();
+      };
+      // TODO(jsimsa): Surface the number of threads used by `ctx->runner()` and
+      // use it here for the maximum.
+      AddTunableParameter(ctx, "parallelism", num_parallel_calls_ /* value */,
+                          1 /* min */, port::NumSchedulableCPUs() /* max */,
+                          std::move(set_fn));
+    } else {
+      AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+    }
     TF_RETURN_IF_ERROR(
         input_dataset_->MakeIterator(ctx, prefix(), &input_impl_));
     if (init_func_) {
@@ -211,8 +230,6 @@ class ParallelMapIterator : public DatasetBaseIterator {
               std::move(done));
   }
 
-  int64 MaxInvocationResults() { return num_parallel_calls_; }
-
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
                        std::vector<Tensor>* out_tensors,
                        bool* end_of_sequence) {
@@ -235,13 +252,16 @@ class ParallelMapIterator : public DatasetBaseIterator {
     StartWork(ctx.get());
     auto cleanup = gtl::MakeCleanup([this, ctx] { StopWork(ctx.get()); });
     std::vector<std::shared_ptr<InvocationResult>> new_calls;
-    new_calls.reserve(num_parallel_calls_);
+    {
+      tf_shared_lock l(mu_);
+      new_calls.reserve(num_parallel_calls_);
+    }
     while (true) {
       {
         mutex_lock l(mu_);
         while (!cancelled_ &&
                (num_calls_ >= num_parallel_calls_ ||
-                invocation_results_.size() >= MaxInvocationResults())) {
+                invocation_results_.size() >= num_parallel_calls_)) {
           StopWork(ctx.get());
           cond_var_.wait(l);
           StartWork(ctx.get());
@@ -250,7 +270,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
           return;
         }
         while (num_calls_ < num_parallel_calls_ &&
-               invocation_results_.size() < MaxInvocationResults()) {
+               invocation_results_.size() < num_parallel_calls_) {
           invocation_results_.emplace_back(new InvocationResult());
           new_calls.push_back(invocation_results_.back());
           num_calls_++;
@@ -305,7 +325,6 @@ class ParallelMapIterator : public DatasetBaseIterator {
   const DatasetBase* const input_dataset_;  // Not owned.
   const std::function<Status(IteratorContext*)> init_func_;
   const ParallelMapIteratorFunction map_func_;
-  const int32 num_parallel_calls_;
   // Used for coordination between the main thread and the runner thread.
   mutex mu_;
   // Used for coordination between the main thread and the runner thread. In
@@ -314,6 +333,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // parallelism and there are slots available in the `invocation_results_`
   // buffer.
   condition_variable cond_var_;
+  // Identifies the maximum number of parallel calls.
+  int64 num_parallel_calls_ GUARDED_BY(mu_) = 0;
   // Counts the number of outstanding calls.
   int64 num_calls_ GUARDED_BY(mu_) = 0;
   std::unique_ptr<IteratorBase> input_impl_;
-- 
GitLab


From 422158776bcd9ffbde485610fdd3af498a2d5669 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 17 Sep 2018 09:43:24 -0700
Subject: [PATCH 0260/1357] Increase tolerance in linalg_grad_test to fix
 #19935

Fixes #19935

PiperOrigin-RevId: 213286535
---
 tensorflow/python/kernel_tests/linalg_grad_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py
index cd6a34d657..e52f303fe0 100644
--- a/tensorflow/python/kernel_tests/linalg_grad_test.py
+++ b/tensorflow/python/kernel_tests/linalg_grad_test.py
@@ -120,7 +120,7 @@ def _GetMatrixBinaryFunctorGradientTest(functor_,
       delta = epsilon**(1.0 / 3.0)
       # tolerance obtained by looking at actual differences using
       # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
-      tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.04
+      tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.05
       # The gradients for a and b may be of very different magnitudes,
       # so to not get spurious failures we test them separately.
       for factor, factor_init in [a, a_np], [b, b_np]:
-- 
GitLab


From 7820ead0c58c9d90d7776bea31a294bbcc9a30f8 Mon Sep 17 00:00:00 2001
From: Samuel Matzek <smatzek@us.ibm.com>
Date: Mon, 30 Jul 2018 09:46:05 -0500
Subject: [PATCH 0261/1357] Make full model before calling set_model on
 callback

Commit 1b67ccbe8006eacffd268553abd01310e8b187d6 removed the _make_train_function calls from Keras training fit_generator for eager execution.

This breaks some callbacks that depend on the entire model to be populated on the set_model or on_train_begin methods.

This commit adds the method calls back in but guarded by an eager check.  It is not doing a revert / fix because the fix that removed the calls also put a test case in for eager fit_generator testing which we want to retain.
---
 tensorflow/python/keras/engine/training_generator.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py
index 413c1f4fba..2e074699da 100644
--- a/tensorflow/python/keras/engine/training_generator.py
+++ b/tensorflow/python/keras/engine/training_generator.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.keras import callbacks as cbks
 from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer
 from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer
@@ -48,6 +49,10 @@ def fit_generator(model,
   epoch = initial_epoch
 
   do_validation = bool(validation_data)
+  if not context.executing_eagerly():
+    model._make_train_function()
+    if do_validation:
+      model._make_test_function()
 
   is_sequence = isinstance(generator, Sequence)
   if not is_sequence and use_multiprocessing and workers > 1:
@@ -233,6 +238,9 @@ def evaluate_generator(model,
                        use_multiprocessing=False,
                        verbose=0):
   """See docstring for `Model.evaluate_generator`."""
+  if not context.executing_eagerly():
+    model._make_test_function()
+
   if hasattr(model, 'metrics'):
     for m in model.stateful_metric_functions:
       m.reset_states()
@@ -342,6 +350,9 @@ def predict_generator(model,
                       use_multiprocessing=False,
                       verbose=0):
   """See docstring for `Model.predict_generator`."""
+  if not context.executing_eagerly():
+    model._make_test_function()
+
   steps_done = 0
   wait_time = 0.01
   all_outs = []
-- 
GitLab


From 66575e0537ba8952de8ebc45d45d1b9e4ba1b6ba Mon Sep 17 00:00:00 2001
From: Samuel Matzek <smatzek@us.ibm.com>
Date: Thu, 2 Aug 2018 13:39:48 -0500
Subject: [PATCH 0262/1357] Add unit test for fit_generator changes

Add unit test for fit_generator change for callbacks.
---
 .../python/keras/engine/training_test.py      | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 30be4131a4..465b4ad65f 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import metrics as metrics_module
+from tensorflow.python.keras import callbacks
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine.training_utils import weighted_masked_objective
 from tensorflow.python.keras.utils.generic_utils import slice_arrays
@@ -1190,6 +1191,37 @@ class TestGeneratorMethods(test.TestCase):
                                  use_multiprocessing=False,
                                  workers=0)
 
+  def test_fit_generator_with_callback(self):
+    model = keras.Sequential()
+    model.add(keras.layers.Dense(4, input_shape=(3,)))
+    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    model.compile(optimizer, 'mse', metrics=['mae'])
+
+    x = np.random.random((10, 3))
+    y = np.random.random((10, 4))
+
+    def iterator():
+      while 1:
+        yield x, y
+
+    class TestCallback(callbacks.Callback):
+      def set_model(self, model):
+        # Check the model operations for the optimizer operations that
+        # the _make_train_function adds under a named scope for the
+        # optimizer. This ensurs the full model is populated before the
+        # set_model callback is called.
+        optimizer_name_scope = 'training/TFOptimizer/'
+        graph_def = ops.get_default_graph().as_graph_def()
+        for node in graph_def.node:
+            if node.name.startswith(optimizer_name_scope):
+                return
+        raise RuntimeError('The optimizer operations are not present in the '
+                           'model graph when the Callback.set_model function '
+                           'is called')
+
+    model.fit_generator(iterator(), steps_per_epoch=3, epochs=1,
+                        callbacks=[TestCallback()])
+
   def test_generator_methods_with_sample_weights(self):
     arr_data = np.random.random((50, 2))
     arr_labels = np.random.random((50,))
-- 
GitLab


From da3ccfda9b75f3cf60eb237d9a4da68c436e9f18 Mon Sep 17 00:00:00 2001
From: Samuel Matzek <smatzek@us.ibm.com>
Date: Mon, 17 Sep 2018 11:59:14 -0500
Subject: [PATCH 0263/1357] Move test to callbacks_test

---
 tensorflow/python/keras/callbacks_test.py     | 40 +++++++++++++++++++
 .../python/keras/engine/training_test.py      | 31 --------------
 2 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index b6fae19823..28f7614463 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -30,6 +30,7 @@ import numpy as np
 
 from tensorflow.core.framework import summary_pb2
 from tensorflow.python import keras
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
@@ -1222,6 +1223,45 @@ class KerasCallbacksTest(test.TestCase):
             callbacks=cbks,
             epochs=1)
 
+  def test_fit_generator_with_callback(self):
+
+    class TestCallback(keras.callbacks.Callback):
+      def set_model(self, model):
+        # Check the model operations for the optimizer operations that
+        # the _make_train_function adds under a named scope for the
+        # optimizer. This ensurs the full model is populated before the
+        # set_model callback is called.
+        optimizer_name_scope = 'training/' + model.optimizer.__class__.__name__
+        graph_def = ops.get_default_graph().as_graph_def()
+        for node in graph_def.node:
+            if node.name.startswith(optimizer_name_scope):
+                return
+        raise RuntimeError('The optimizer operations are not present in the '
+                           'model graph when the Callback.set_model function '
+                           'is called')
+    np.random.seed(1337)
+
+    def generator():
+      x = np.random.randn(10, 100).astype(np.float32)
+      y = np.random.randn(10, 10).astype(np.float32)
+      while True:
+        yield x, y
+
+    with self.cached_session():
+      model = testing_utils.get_small_sequential_mlp(
+          num_hidden=10, num_classes=10, input_dim=100)
+      model.compile(
+          loss='categorical_crossentropy',
+          optimizer='sgd',
+          metrics=['accuracy'])
+      model.fit_generator(
+          generator(),
+          steps_per_epoch=2,
+          epochs=1,
+          validation_data=generator(),
+          validation_steps=2,
+          callbacks=[TestCallback()],
+          verbose=0)
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 465b4ad65f..d8510c1f23 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -1191,37 +1191,6 @@ class TestGeneratorMethods(test.TestCase):
                                  use_multiprocessing=False,
                                  workers=0)
 
-  def test_fit_generator_with_callback(self):
-    model = keras.Sequential()
-    model.add(keras.layers.Dense(4, input_shape=(3,)))
-    optimizer = RMSPropOptimizer(learning_rate=0.001)
-    model.compile(optimizer, 'mse', metrics=['mae'])
-
-    x = np.random.random((10, 3))
-    y = np.random.random((10, 4))
-
-    def iterator():
-      while 1:
-        yield x, y
-
-    class TestCallback(callbacks.Callback):
-      def set_model(self, model):
-        # Check the model operations for the optimizer operations that
-        # the _make_train_function adds under a named scope for the
-        # optimizer. This ensurs the full model is populated before the
-        # set_model callback is called.
-        optimizer_name_scope = 'training/TFOptimizer/'
-        graph_def = ops.get_default_graph().as_graph_def()
-        for node in graph_def.node:
-            if node.name.startswith(optimizer_name_scope):
-                return
-        raise RuntimeError('The optimizer operations are not present in the '
-                           'model graph when the Callback.set_model function '
-                           'is called')
-
-    model.fit_generator(iterator(), steps_per_epoch=3, epochs=1,
-                        callbacks=[TestCallback()])
-
   def test_generator_methods_with_sample_weights(self):
     arr_data = np.random.random((50, 2))
     arr_labels = np.random.random((50,))
-- 
GitLab


From 3fe9c54b6181bc2bbfa535b28ecb7d3b74342bd8 Mon Sep 17 00:00:00 2001
From: Samuel Matzek <smatzek@us.ibm.com>
Date: Mon, 17 Sep 2018 12:13:15 -0500
Subject: [PATCH 0264/1357] Remove unnecessary import of callbacks

---
 tensorflow/python/keras/engine/training_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index d8510c1f23..30be4131a4 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -31,7 +31,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import metrics as metrics_module
-from tensorflow.python.keras import callbacks
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine.training_utils import weighted_masked_objective
 from tensorflow.python.keras.utils.generic_utils import slice_arrays
-- 
GitLab


From 531d08bd10125b83030f1165d8562e23b20f4941 Mon Sep 17 00:00:00 2001
From: Karmel Allison <karmel@google.com>
Date: Mon, 17 Sep 2018 10:37:20 -0700
Subject: [PATCH 0265/1357] Minor docstring change: update link to
 saved_model_cli.

PiperOrigin-RevId: 213296537
---
 tensorflow/python/tools/saved_model_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index d8ba13d8d2..3dbccd1409 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -15,7 +15,7 @@
 """Command-line interface to inspect and execute a graph in a SavedModel.
 
 For detailed usages and examples, please refer to:
-https://www.tensorflow.org/guide/saved_model_cli
+https://www.tensorflow.org/guide/saved_model#cli_to_inspect_and_execute_savedmodel
 
 """
 
-- 
GitLab


From 12718f0204bad8aaa3984c7a176914451eb0bbab Mon Sep 17 00:00:00 2001
From: Samuel Matzek <smatzek@us.ibm.com>
Date: Mon, 17 Sep 2018 13:24:29 -0500
Subject: [PATCH 0266/1357] Fix pylint error

---
 tensorflow/python/keras/callbacks_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 28f7614463..467bc4cdc4 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -1234,8 +1234,8 @@ class KerasCallbacksTest(test.TestCase):
         optimizer_name_scope = 'training/' + model.optimizer.__class__.__name__
         graph_def = ops.get_default_graph().as_graph_def()
         for node in graph_def.node:
-            if node.name.startswith(optimizer_name_scope):
-                return
+          if node.name.startswith(optimizer_name_scope):
+            return
         raise RuntimeError('The optimizer operations are not present in the '
                            'model graph when the Callback.set_model function '
                            'is called')
-- 
GitLab


From e576073771a7484ec27f876963bf731d33b83e38 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Mon, 17 Sep 2018 11:22:36 -0700
Subject: [PATCH 0267/1357] [Java]: Release 1.11.0-rc0

PiperOrigin-RevId: 213305616
---
 tensorflow/java/maven/libtensorflow/pom.xml              | 2 +-
 tensorflow/java/maven/libtensorflow_jni/pom.xml          | 2 +-
 tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml      | 2 +-
 tensorflow/java/maven/pom.xml                            | 2 +-
 tensorflow/java/maven/proto/pom.xml                      | 2 +-
 tensorflow/java/maven/spark-tensorflow-connector/pom.xml | 2 +-
 tensorflow/java/maven/tensorflow-hadoop/pom.xml          | 2 +-
 tensorflow/java/maven/tensorflow/pom.xml                 | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml
index f9093ce385..cf6a64daeb 100644
--- a/tensorflow/java/maven/libtensorflow/pom.xml
+++ b/tensorflow/java/maven/libtensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.10.0</version>
+    <version>1.11.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml
index 1208956dec..978c3cbf6d 100644
--- a/tensorflow/java/maven/libtensorflow_jni/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.10.0</version>
+    <version>1.11.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
index 755449cb3c..d1378b5d56 100644
--- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.10.0</version>
+    <version>1.11.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni_gpu</artifactId>
diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml
index e1bf2c7dba..1342b0e9bb 100644
--- a/tensorflow/java/maven/pom.xml
+++ b/tensorflow/java/maven/pom.xml
@@ -6,7 +6,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.tensorflow</groupId>
   <artifactId>parentpom</artifactId>
-  <version>1.10.0</version>
+  <version>1.11.0-rc0</version>
   <packaging>pom</packaging>
 
   <url>https://www.tensorflow.org</url>
diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml
index b89f042567..19ff65a095 100644
--- a/tensorflow/java/maven/proto/pom.xml
+++ b/tensorflow/java/maven/proto/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.10.0</version>
+    <version>1.11.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>proto</artifactId>
diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
index 1b7995be2c..ba7e9f4c69 100644
--- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
+++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
@@ -6,7 +6,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>spark-tensorflow-connector_2.11</artifactId>
     <packaging>jar</packaging>
-    <version>1.10.0</version>
+    <version>1.11.0-rc0</version>
     <name>spark-tensorflow-connector</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord connector for Apache Spark DataFrames</description>
diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
index 0fe6f4dce4..f913faffa2 100644
--- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml
+++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
@@ -5,7 +5,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>tensorflow-hadoop</artifactId>
     <packaging>jar</packaging>
-    <version>1.10.0</version>
+    <version>1.11.0-rc0</version>
     <name>tensorflow-hadoop</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop</description>
diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml
index 0de90244b1..f6cb595885 100644
--- a/tensorflow/java/maven/tensorflow/pom.xml
+++ b/tensorflow/java/maven/tensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.10.0</version>
+    <version>1.11.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>tensorflow</artifactId>
-- 
GitLab


From deec3bf519bd51f743db15ae28a6335d43ad5dfe Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 17 Sep 2018 11:36:50 -0700
Subject: [PATCH 0268/1357] Fix and complete StreamExecutor's DoFusedConvolve:
 * bias_nd is set to have CUDNN_DATA_FLOAT, even though BiasType is not float.
 * double is supported but not exposed through the public interface. *
 DoFusedConvolveImpl has duplicated information in its template parameter
 list.

PiperOrigin-RevId: 213308435
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 54 ++++++++++++---------
 tensorflow/stream_executor/cuda/cuda_dnn.h  | 16 +++---
 tensorflow/stream_executor/stream.cc        | 38 +++++++++++++++
 3 files changed, 77 insertions(+), 31 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 3c533c7f99..63ab367086 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -149,6 +149,16 @@ cudnnDataType_t GetCudnnDataType<Eigen::half>() {
   return CUDNN_DATA_HALF;
 }
 
+template <>
+cudnnDataType_t GetCudnnDataType<int8>() {
+  return CUDNN_DATA_INT8;
+}
+
+template <>
+cudnnDataType_t GetCudnnDataType<int32>() {
+  return CUDNN_DATA_INT32;
+}
+
 // RAII wrapper for all calls to cuDNN with a cuDNN handle argument.
 //
 // See CudnnAccess::GetHandle() for details.
@@ -2486,19 +2496,19 @@ port::Status CudnnSupport::DoConvolveImpl(
   return port::Status::OK();
 }
 
-template <typename Type, typename BiasType, typename ScaleType,
-          int cudnn_data_type, int cudnn_compute_type>
+template <typename AccumulatorType, typename ElementType, typename BiasType,
+          typename ScaleType>
 port::Status CudnnSupport::DoFusedConvolveImpl(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
-    const DeviceMemory<Type>& conv_input_data, ScaleType conv_input_scale,
-    const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<Type>& filter_data,
+    const DeviceMemory<ElementType>& conv_input_data,
+    ScaleType conv_input_scale, const dnn::FilterDescriptor& filter_descriptor,
+    const DeviceMemory<ElementType>& filter_data,
     const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const DeviceMemory<Type>& side_input_data, ScaleType side_input_scale,
-    const dnn::BatchDescriptor& bias_descriptor,
+    const DeviceMemory<ElementType>& side_input_data,
+    ScaleType side_input_scale, const dnn::BatchDescriptor& bias_descriptor,
     const DeviceMemory<BiasType>& biases, dnn::ActivationMode activation_mode,
     const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<Type>* output_data, ScratchAllocator* scratch_allocator,
+    DeviceMemory<ElementType>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   if (activation_mode != dnn::ActivationMode::kRelu &&
@@ -2508,15 +2518,15 @@ port::Status CudnnSupport::DoFusedConvolveImpl(
                         "Relu or None activation.");
   }
 
-  CudnnTensorDescriptor conv_input_nd(
-      conv_input_descriptor, static_cast<cudnnDataType_t>(cudnn_data_type));
-  CudnnTensorDescriptor output_nd(
-      output_descriptor, static_cast<cudnnDataType_t>(cudnn_data_type));
+  CudnnTensorDescriptor conv_input_nd(conv_input_descriptor,
+                                      GetCudnnDataType<ElementType>());
+  CudnnTensorDescriptor output_nd(output_descriptor,
+                                  GetCudnnDataType<ElementType>());
   CudnnFilterDescriptor filter(filter_descriptor,
-                               static_cast<cudnnDataType_t>(cudnn_data_type));
-  CudnnTensorDescriptor bias_nd(bias_descriptor, CUDNN_DATA_FLOAT);
-  CudnnConvolutionDescriptor conv(
-      convolution_descriptor, static_cast<cudnnDataType_t>(cudnn_compute_type));
+                               GetCudnnDataType<ElementType>());
+  CudnnTensorDescriptor bias_nd(bias_descriptor, GetCudnnDataType<BiasType>());
+  CudnnConvolutionDescriptor conv(convolution_descriptor,
+                                  GetCudnnDataType<AccumulatorType>());
 
   auto cudnn = cudnn_->GetHandle(parent_, stream);
 
@@ -2933,8 +2943,7 @@ bool CudnnSupport::DoFusedConvolve(
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
-      DoFusedConvolveImpl<double, double, double, CUDNN_DATA_DOUBLE,
-                          CUDNN_DATA_DOUBLE>(
+      DoFusedConvolveImpl<double>(
           stream, conv_input_descriptor, conv_input_data, conv_input_scale,
           filter_descriptor, filter_data, convolution_descriptor,
           side_input_data, side_input_scale, bias_descriptor, biases,
@@ -2957,8 +2966,7 @@ bool CudnnSupport::DoFusedConvolve(
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
-      DoFusedConvolveImpl<float, float, float, CUDNN_DATA_FLOAT,
-                          CUDNN_DATA_FLOAT>(
+      DoFusedConvolveImpl<float>(
           stream, conv_input_descriptor, conv_input_data, conv_input_scale,
           filter_descriptor, filter_data, convolution_descriptor,
           side_input_data, side_input_scale, bias_descriptor, biases,
@@ -2982,8 +2990,7 @@ bool CudnnSupport::DoFusedConvolve(
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
-      DoFusedConvolveImpl<Eigen::half, Eigen::half, float, CUDNN_DATA_HALF,
-                          CUDNN_DATA_FLOAT>(
+      DoFusedConvolveImpl<float>(
           stream, conv_input_descriptor, conv_input_data, conv_input_scale,
           filter_descriptor, filter_data, convolution_descriptor,
           side_input_data, side_input_scale, bias_descriptor, biases,
@@ -3014,8 +3021,7 @@ bool CudnnSupport::DoFusedConvolve(
     return false;
   }
   return IsStatusOk(
-      DoFusedConvolveImpl<int8, float, float, CUDNN_DATA_INT8x4,
-                          CUDNN_DATA_INT32>(
+      DoFusedConvolveImpl<int32>(
           stream, conv_input_descriptor, conv_input_data, conv_input_scale,
           filter_descriptor, filter_data, convolution_descriptor,
           side_input_data, side_input_scale, bias_descriptor, biases,
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 9d88f971bb..74f6f935b8 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -674,19 +674,21 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result);
 
-  template <typename Type, typename BiasType, typename ScaleType,
-            int cudnn_data_type, int cudnn_compute_type>
+  template <typename AccumulatorType, typename ElementType, typename BiasType,
+            typename ScaleType>
   port::Status DoFusedConvolveImpl(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
-      const DeviceMemory<Type>& conv_input_data, ScaleType conv_input_scale,
+      const DeviceMemory<ElementType>& conv_input_data,
+      ScaleType conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<Type>& filter_data,
+      const DeviceMemory<ElementType>& filter_data,
       const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const DeviceMemory<Type>& side_input_data, ScaleType side_input_scale,
-      const dnn::BatchDescriptor& bias_descriptor,
+      const DeviceMemory<ElementType>& side_input_data,
+      ScaleType side_input_scale, const dnn::BatchDescriptor& bias_descriptor,
       const DeviceMemory<BiasType>& biases, dnn::ActivationMode activation_mode,
       const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<Type>* output_data, ScratchAllocator* scratch_allocator,
+      DeviceMemory<ElementType>* output_data,
+      ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result);
 
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 19d3b2389a..69558fd14b 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -585,6 +585,44 @@ Stream &Stream::ThenConvolveWithScratch(
   return *this;
 }
 
+Stream &Stream::ThenFusedConvolveWithAlgorithm(
+    const dnn::BatchDescriptor &conv_input_descriptor,
+    const DeviceMemory<double> &conv_input_data, double conv_input_scale,
+    const dnn::FilterDescriptor &filter_descriptor,
+    const DeviceMemory<double> &filter_data,
+    const dnn::ConvolutionDescriptor &convolution_descriptor,
+    const DeviceMemory<double> &side_input_data, double side_input_scale,
+    const dnn::BatchDescriptor &bias_descriptor,
+    const DeviceMemory<double> &biases, dnn::ActivationMode activation_mode,
+    const dnn::BatchDescriptor &output_descriptor, DeviceMemory<double> *output,
+    ScratchAllocator *scratch_allocator,
+    const dnn::AlgorithmConfig &algorithm_config,
+    dnn::ProfileResult *output_profile_result) {
+  VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data),
+            PARAM(conv_input_scale), PARAM(filter_descriptor),
+            PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases),
+            PARAM(side_input_data), PARAM(side_input_scale),
+            PARAM(activation_mode), PARAM(output_descriptor), PARAM(output),
+            PARAM(algorithm_config));
+
+  if (ok()) {
+    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+      auto status = dnn->DoFusedConvolve(
+          this, conv_input_descriptor, conv_input_data, conv_input_scale,
+          filter_descriptor, filter_data, convolution_descriptor,
+          side_input_data, side_input_scale, bias_descriptor, biases,
+          activation_mode, output_descriptor, output, scratch_allocator,
+          algorithm_config, output_profile_result);
+      if (!status && !output_profile_result) {
+        SetError();
+      }
+    } else {
+      SetErrorAndLogNoDnnSupport();
+    }
+  }
+  return *this;
+}
+
 Stream &Stream::ThenFusedConvolveWithAlgorithm(
     const dnn::BatchDescriptor &conv_input_descriptor,
     const DeviceMemory<float> &conv_input_data, float conv_input_scale,
-- 
GitLab


From f9cf0e5496569d4a9a1edb25fba1d6afd6dab6b7 Mon Sep 17 00:00:00 2001
From: Alan Chiao <alanchiao@google.com>
Date: Mon, 17 Sep 2018 12:07:28 -0700
Subject: [PATCH 0269/1357] Numerics tweak to symmetric quantization.

PiperOrigin-RevId: 213314024
---
 .../lite/kernels/internal/optimized/neon_tensor_utils.cc        | 2 +-
 .../lite/kernels/internal/reference/portable_tensor_utils.cc    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
index 27418178fd..36c15dbc57 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -457,7 +457,7 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size,
     return;
   }
   *scaling_factor = range / kScale;
-  const float scaling_factor_inv = 1.0f / *scaling_factor;
+  const float scaling_factor_inv = kScale / range;
 
   const int postamble_start =
       size - (size & (2 * kFloatWeightsPerNeonLane - 1));
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
index 77e60adc18..70d25c4bd9 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -55,7 +55,7 @@ void PortableSymmetricQuantizeFloats(const float* values, const int size,
     return;
   }
   *scaling_factor = range / kScale;
-  const float scaling_factor_inv = 1.0f / *scaling_factor;
+  const float scaling_factor_inv = kScale / range;
   for (int i = 0; i < size; ++i) {
     const int32_t quantized_value =
         static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
-- 
GitLab


From 779d87cfc1421eb6be2f9cc4ae29bca77c8d2929 Mon Sep 17 00:00:00 2001
From: Reed Wanderman-Milne <reedwm@google.com>
Date: Mon, 17 Sep 2018 12:18:48 -0700
Subject: [PATCH 0270/1357] Do not segfault in Conv2d/3d if cuDNN version is
 too low.

PiperOrigin-RevId: 213315830
---
 tensorflow/core/kernels/conv_ops.cc    | 12 +++++++++---
 tensorflow/core/kernels/conv_ops_3d.cc | 14 ++++++++++----
 tensorflow/core/kernels/conv_ops_gpu.h |  6 +++++-
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 6f5c8d8461..717a9f40a9 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -731,9 +731,15 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
   if (cudnn_use_autotune &&
       !AutoTuneConv::GetInstance()->Find(conv_parameters, &algorithm_config)) {
     std::vector<AlgorithmDesc> algorithms;
-    CHECK(stream->parent()->GetConvolveAlgorithms(
-        conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(stream->parent()),
-        &algorithms));
+    OP_REQUIRES(
+        ctx,
+        stream->parent()->GetConvolveAlgorithms(
+            conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(
+                stream->parent()),
+            &algorithms),
+        errors::Unknown("Failed to get convolution algorithm. This is probably "
+                        "because cuDNN failed to initialize, so try looking to "
+                        "see if a warning log message was printed above."));
     ProfileResult best_result;
     ProfileResult best_result_no_scratch;
     for (auto profile_algorithm : algorithms) {
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 5c2b88924b..83df4dce38 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -435,10 +435,16 @@ struct LaunchConvOp<GPUDevice, T> {
     if (cudnn_use_autotune && !AutoTuneConv3d::GetInstance()->Find(
                                   conv_parameters, &algorithm_config)) {
       std::vector<AlgorithmDesc> algorithms;
-      CHECK(stream->parent()->GetConvolveAlgorithms(
-          conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(
-              stream->parent()),
-          &algorithms));
+      OP_REQUIRES(ctx,
+                  stream->parent()->GetConvolveAlgorithms(
+                      conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(
+                          stream->parent()),
+                      &algorithms),
+                  errors::Unknown(
+                      "Failed to get convolution algorithm. This is probably "
+                      "because cuDNN failed to initialize, so try looking to "
+                      "see if a warning log message was printed above."));
+
       ProfileResult best_result;
       ProfileResult best_result_no_scratch;
       for (auto profile_algorithm : algorithms) {
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index afc611f277..21d135decd 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -142,8 +142,12 @@ class ConvParameters {
   template <typename T>
   bool ShouldIncludeWinogradNonfusedAlgo(
       se::StreamExecutor* stream_exec) const {
+    auto* dnn_support = stream_exec->AsDnn();
+    if (!dnn_support) {
+      return false;
+    }
     // Skip this check for cuDNN 7 and newer.
-    auto version = stream_exec->AsDnn()->GetVersion();
+    auto version = dnn_support->GetVersion();
     if (version.ok() && version.ValueOrDie().major_version() >= 7) {
       return true;
     }
-- 
GitLab


From fbd48c7a8bb088d92988fce4f757d1719e9c57a2 Mon Sep 17 00:00:00 2001
From: Guozhong Zhuang <guozhong.zhuang@intel.com>
Date: Mon, 17 Sep 2018 12:24:43 -0700
Subject: [PATCH 0271/1357] fix type error within an environment variable name

---
 tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc | 2 +-
 tensorflow/core/kernels/mkl_conv_grad_input_ops.cc  | 2 +-
 tensorflow/core/kernels/mkl_conv_ops.cc             | 2 +-
 tensorflow/core/util/mkl_util.h                     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index 52157ed5fb..f406ad2ab5 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -853,7 +853,7 @@ class MklConvCustomBackpropFilterOp
 
       // MKL DNN allocates large buffers when a conv gradient filter primtive is
       // created. So we don't cache conv backward primitives when the env
-      // variable TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is set to true.
+      // variable TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is set to true.
       bool do_not_cache = MklPrimitiveFactory<T>::IsPrimitiveMemOptEnabled();
       conv_bwd_filter = MklConvBwdFilterPrimitiveFactory<T>::Get(
           convBwdFilterDims, do_not_cache);
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index c38c9cc27c..a501ce2c93 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -713,7 +713,7 @@ class MklConvCustomBackpropInputOp : public MklConvBackpropCommonOp<Device, T> {
           TFPaddingToMklDnnPadding(this->padding_));
 
       // We don't cache those primitves if the env variable
-      // TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is true and if primitve descriptor
+      // TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is true and if primitve descriptor
       // includes potentialy large buffers. MKL DNN allocates buffers
       // in the following cases
       //   1. Legacy CPU without AVX512/AVX2, or
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 184e0cb003..b332edad0a 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -901,7 +901,7 @@ class MklConvOp : public OpKernel {
 
       // In some cases, primitve descriptor includes potentialy large buffers,
       // we don't cache those primitves if the env variable
-      // TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is true. MKL DNN allocates buffers
+      // TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is true. MKL DNN allocates buffers
       // in the following cases
       //   1. Legacy CPU without AVX512/AVX2, or
       //   2. 1x1 convolution with stride != 1
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 680211edff..5ea8f2ee47 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -2040,7 +2040,7 @@ class MklPrimitiveFactory {
   /// Fuction to check whether primitive memory optimization is enabled
   static inline bool IsPrimitiveMemOptEnabled() {
     bool is_primitive_mem_opt_enabled = true;
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE", true,
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", true,
           &is_primitive_mem_opt_enabled));
     return is_primitive_mem_opt_enabled;
   }
-- 
GitLab


From 0d9868d8f9c01c1402ae99d672599c4bac6e787d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 12:20:03 -0700
Subject: [PATCH 0272/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213316034
---
 .../internal/reference/reference_ops.h        | 215 ++++++++++++++----
 1 file changed, 165 insertions(+), 50 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 77927af227..09a4ba7701 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -511,24 +511,25 @@ inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
   }
 }
 
-inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                           const float* weights_data,
-                           const Dims<4>& weights_dims, const float* bias_data,
-                           const Dims<4>& bias_dims,
-                           float output_activation_min,
-                           float output_activation_max, float* output_data,
-                           const Dims<4>& output_dims) {
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& weights_shape,
+    const float* weights_data, const RuntimeShape& bias_shape,
+    const float* bias_data, const RuntimeShape& output_shape,
+    float* output_data) {
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) *
-                      ArraySize(output_dims, 3);
-  const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(weights_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
+  const int output_dims_count = output_shape.DimensionsCount();
+  const int weights_dims_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
+                                       output_shape, output_dims_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
       float total = 0.f;
@@ -538,7 +539,7 @@ inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
       }
       float bias_value = 0.0f;
       if (bias_data) {
-        bias_value = bias_data[Offset(bias_dims, out_c, 0, 0, 0)];
+        bias_value = bias_data[out_c];
       }
       output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
           total + bias_value, output_activation_min, output_activation_max);
@@ -546,6 +547,26 @@ inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                           const float* weights_data,
+                           const Dims<4>& weights_dims, const float* bias_data,
+                           const Dims<4>& bias_dims,
+                           float output_activation_min,
+                           float output_activation_max, float* output_data,
+                           const Dims<4>& output_dims) {
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(weights_dims), weights_data,
+                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+                 output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void FullyConnected(const float* input_data, const Dims<4>& input_dims,
@@ -559,28 +580,35 @@ void FullyConnected(const float* input_data, const Dims<4>& input_dims,
                  output_data, output_dims);
 }
 
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, uint8* output_data,
-                           const Dims<4>& output_dims,
-                           gemmlowp::GemmContext* gemm_context) {
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data, gemmlowp::GemmContext* gemm_context) {
   (void)gemm_context;  // only used in optimized code.
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) *
-                      ArraySize(output_dims, 3);
-  const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(filter_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
       int32 acc = 0;
@@ -590,7 +618,7 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
         acc += (filter_val + filter_offset) * (input_val + input_offset);
       }
       if (bias_data) {
-        acc += bias_data[Offset(bias_dims, out_c, 0, 0, 0)];
+        acc += bias_data[out_c];
       }
       acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
                                           kReverseShift * output_shift);
@@ -602,16 +630,47 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
                            int32 input_offset, const uint8* filter_data,
                            const Dims<4>& filter_dims, int32 filter_offset,
                            const int32* bias_data, const Dims<4>& bias_dims,
                            int32 output_offset, int32 output_multiplier,
                            int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, int16* output_data,
+                           int32 output_activation_max, uint8* output_data,
                            const Dims<4>& output_dims,
                            gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    int16* output_data, gemmlowp::GemmContext* gemm_context) {
   (void)gemm_context;  // only used in optimized code.
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
   TFLITE_DCHECK_EQ(output_offset, 0);
   // TODO(benoitjacob): This really should be:
@@ -619,12 +678,12 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) *
-                      ArraySize(output_dims, 3);
-  const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(filter_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
       // Internal accumulation.
@@ -651,27 +710,60 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, int16* output_data,
+                           const Dims<4>& output_dims,
+                           gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
 inline void ShuffledFullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
-    int output_shift, int32 output_activation_min, int32 output_activation_max,
-    int16* output_data, const Dims<4>& output_dims,
-    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& weights_shape,
+    const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    int16* output_data, uint8* shuffled_input_workspace_data,
+    gemmlowp::GemmContext* gemm_context) {
   (void)gemm_context;  // only used in optimized code.
-
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) *
-                      ArraySize(output_dims, 3);
-  const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(weights_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
   TFLITE_DCHECK((accum_depth % 16) == 0);
   TFLITE_DCHECK((output_depth % 4) == 0);
 
@@ -799,6 +891,29 @@ inline void ShuffledFullyConnected(
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void ShuffledFullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims,
+    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
+    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
+    int output_shift, int32 output_activation_min, int32 output_activation_max,
+    int16* output_data, const Dims<4>& output_dims,
+    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
+                         DimsToShape(weights_dims), shuffled_weights_data,
+                         DimsToShape(bias_dims), bias_data,
+                         DimsToShape(output_dims), output_data,
+                         shuffled_input_workspace_data, gemm_context);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-- 
GitLab


From 3fe7b38347eaf7f1fb764cc2ac92de0ce7bc51e5 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Mon, 17 Sep 2018 12:23:18 -0700
Subject: [PATCH 0273/1357] [XLA] Allow adding extra instructions in
 HloComputation::CloneWithReplacements

PiperOrigin-RevId: 213316504
---
 tensorflow/compiler/xla/service/hlo_computation.cc       | 8 ++++++--
 tensorflow/compiler/xla/service/hlo_computation.h        | 5 ++++-
 tensorflow/compiler/xla/service/while_loop_simplifier.cc | 5 +++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 601a008d9f..e9e70b2c57 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -916,13 +916,14 @@ std::unique_ptr<HloComputation> HloComputation::Clone(
   return CloneWithReplacements(
       /*replacements=*/std::unordered_map<const HloInstruction*,
                                           std::unique_ptr<HloInstruction>>(),
-      context, suffix);
+      /*extras=*/{}, context, suffix);
 }
 
 std::unique_ptr<HloComputation> HloComputation::CloneWithReplacements(
     std::unordered_map<const HloInstruction*, std::unique_ptr<HloInstruction>>
         replacements,
-    HloCloneContext* context, const string& suffix) {
+    absl::Span<HloInstruction*> extras, HloCloneContext* context,
+    const string& suffix) {
   std::unique_ptr<HloCloneContext> context_ptr;
   if (context == nullptr) {
     context_ptr = absl::make_unique<HloCloneContext>(parent(), suffix);
@@ -944,6 +945,9 @@ std::unique_ptr<HloComputation> HloComputation::CloneWithReplacements(
 
   VLOG(1) << "Cloning " << name() << " --> " << suffix << "\n";
   std::vector<HloInstruction*> postorder;
+  for (HloInstruction* instr : extras) {
+    postorder.push_back(instr);
+  }
   for (HloInstruction* instr : MakeInstructionPostOrder()) {
     if (HloInstruction* replacement = replace(instr)) {
       postorder.push_back(replacement);
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index a880e9ab30..e7c98aae23 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -333,10 +333,13 @@ class HloComputation {
   //
   // If replacements maps a key to nullptr, we remove that instruction from the
   // new computation.
+  // If additional instructions are used by instructions in replacement map,
+  // they must be passed in post-order in the extras span.
   std::unique_ptr<HloComputation> CloneWithReplacements(
       std::unordered_map<const HloInstruction*, std::unique_ptr<HloInstruction>>
           replacements,
-      HloCloneContext* context = nullptr, const string& suffix = "clone");
+      absl::Span<HloInstruction*> extras, HloCloneContext* context = nullptr,
+      const string& suffix = "clone");
 
   // Returns true if the given instruction can be removed from the computation.
   // Parameter instructions cannot be removed without violating invariants of
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 6a7bfe3f12..9a74f22395 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -252,7 +252,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   // Create the new while condition, body, and init value.
   std::unique_ptr<HloComputation> new_while_cond =
       while_cond->CloneWithReplacements(
-          make_while_computation_replacements(while_cond));
+          make_while_computation_replacements(while_cond), /*extras=*/{});
 
   std::unordered_map<const HloInstruction*, std::unique_ptr<HloInstruction>>
       while_body_replacements = make_while_computation_replacements(while_body);
@@ -265,7 +265,8 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   while_body_replacements.emplace(
       while_body_root, HloInstruction::CreateTuple(new_while_body_root_elems));
   std::unique_ptr<HloComputation> new_while_body =
-      while_body->CloneWithReplacements(std::move(while_body_replacements));
+      while_body->CloneWithReplacements(std::move(while_body_replacements),
+                                        /*extras=*/{});
 
   // Add a new while_init instruction that repackages the old while_init
   // instruction's elements.  We rely on the AlgebraicSimplifier and DCE to
-- 
GitLab


From adae337d05251963ef0905e024dfdc07b6d0aae2 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Mon, 17 Sep 2018 12:32:22 -0700
Subject: [PATCH 0274/1357] GradientTape: Documentation formatting tweak.

PiperOrigin-RevId: 213318051
---
 tensorflow/python/eager/backprop.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index be392c7a0f..11336efebb 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -648,8 +648,8 @@ class GradientTape(object):
   Operations are recorded if they are executed within this context manager and
   at least one of their inputs is being "watched".
 
-  Trainable variables (created by `tf.Variable` or `tf.get_variable`,
-  trainable=True is default in both cases) are automatically watched. Tensors
+  Trainable variables (created by `tf.Variable` or `tf.get_variable`, where
+  `trainable=True` is default in both cases) are automatically watched. Tensors
   can be manually watched by invoking the `watch` method on this context
   manager.
 
-- 
GitLab


From de3fa499bb05c595f9e341c7d507b47b8d44ae90 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Mon, 17 Sep 2018 12:57:26 -0700
Subject: [PATCH 0275/1357] [XLA] Add ReduceWindow test.

PiperOrigin-RevId: 213322116
---
 tensorflow/compiler/xla/tests/reduce_window_test.cc | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index 63491a90bf..c25ccafaf8 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -1303,11 +1303,19 @@ struct R1ReduceWindowTestData {
      /*pad_high=*/{0},
      /*reducer=*/Reducer::kAdd},
 
+    // The pattern generated by inclusive scan (cumsum/cumprod).
     {/*base_bounds=*/{4096}, /*window_bounds=*/{4096},
      /*strides=*/{1},
      /*pad_low=*/{4095},
      /*pad_high=*/{0},
      /*reducer=*/Reducer::kMax},
+
+    // The pattern generated by exclusive scan (cumsum/cumprod).
+    {/*base_bounds=*/{4096}, /*window_bounds=*/{4096},
+     /*strides=*/{1},
+     /*pad_low=*/{4096},
+     /*pad_high=*/{0},
+     /*reducer=*/Reducer::kMax},
 };
 
 string R1ReduceWindowTestDataToString(
-- 
GitLab


From 5da7359a9e0b832f608dc66d7a22e647f09ec035 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 13:11:51 -0700
Subject: [PATCH 0276/1357] Raise error on encountering bad indentation during
 Autograph parsing.

PiperOrigin-RevId: 213324570
---
 tensorflow/python/autograph/pyct/parser.py      | 15 ++++++++++++++-
 tensorflow/python/autograph/pyct/parser_test.py | 16 ++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/autograph/pyct/parser.py b/tensorflow/python/autograph/pyct/parser.py
index 112ed46a1e..63686350d5 100644
--- a/tensorflow/python/autograph/pyct/parser.py
+++ b/tensorflow/python/autograph/pyct/parser.py
@@ -31,8 +31,21 @@ from tensorflow.python.util import tf_inspect
 def parse_entity(entity):
   """Returns the AST of given entity."""
   source = tf_inspect.getsource(entity)
+  # Comments and multiline strings can appear at arbitrary indentation levels,
+  # causing textwrap.dedent to not correctly dedent source code.
+  # TODO(b/115884650): Automatic handling of comments/multiline strings.
   source = textwrap.dedent(source)
-  return parse_str(source), source
+  try:
+    return parse_str(source), source
+  except IndentationError:
+    # Because we are parsing the source code of entities that have already
+    # successfully parsed once, any IndentationErrors are guaranteed to be
+    # caused by insufficient dedenting.
+    raise ValueError(
+        'Failed to dedent prior to parsing source code. If you have comments '
+        'or multiline strings in your code, try indenting them. '
+        'Multiline strings can be rewritten using textwrap.dedent.\n'
+        'Offending source code: \n %s' % source)
 
 
 def parse_str(src):
diff --git a/tensorflow/python/autograph/pyct/parser_test.py b/tensorflow/python/autograph/pyct/parser_test.py
index d0b465eb73..d3a7b7a014 100644
--- a/tensorflow/python/autograph/pyct/parser_test.py
+++ b/tensorflow/python/autograph/pyct/parser_test.py
@@ -42,6 +42,22 @@ class ParserTest(test.TestCase):
     """))
     self.assertEqual('f', mod.body[0].name)
 
+  def test_parse_comments(self):
+    def f():
+# unindented comment
+      pass
+    with self.assertRaises(ValueError):
+      parser.parse_entity(f)
+
+  def test_parse_multiline_strings(self):
+    def f():
+      print("""
+some
+multiline
+string""")
+    with self.assertRaises(ValueError):
+      parser.parse_entity(f)
+
   def test_parse_expression(self):
     node = parser.parse_expression('a.b')
     self.assertEqual('a', node.value.id)
-- 
GitLab


From 8ae1021b028e9e6cc1b169ffab4dd186b4d2b472 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 13:21:51 -0700
Subject: [PATCH 0277/1357] Move from deprecated self.test_session() to
 self.cached_session().

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 213326167
---
 .../kernel_tests/interleave_dataset_op_test.py   |  6 +++---
 .../data/kernel_tests/map_dataset_op_test.py     |  2 +-
 .../python/kernel_tests/broadcast_to_ops_test.py |  8 ++++----
 tensorflow/python/kernel_tests/check_ops_test.py | 10 +++++-----
 .../kernel_tests/conditional_accumulator_test.py |  4 ++--
 .../kernel_tests/regex_full_match_op_test.py     |  6 +++---
 .../python/kernel_tests/regex_replace_op_test.py | 16 ++++++++--------
 .../sparse_conditional_accumulator_test.py       |  4 ++--
 tensorflow/python/kernel_tests/substr_op_test.py | 14 +++++++-------
 9 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
index a35cee594a..e7e51df65e 100644
--- a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
@@ -134,7 +134,7 @@ class InterleaveDatasetTest(test.TestCase, parameterized.TestCase):
         result.append([value] * value)
       return result * count
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for expected_element in self._interleave(
           repeat(input_values, count), cycle_length, block_length):
         self.assertEqual(expected_element, sess.run(get_next))
@@ -169,7 +169,7 @@ class InterleaveDatasetTest(test.TestCase, parameterized.TestCase):
             num_parallel_calls)
     get_next = dataset.make_one_shot_iterator().get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for value in input_values:
         if np.isnan(value):
           with self.assertRaises(errors.InvalidArgumentError):
@@ -195,7 +195,7 @@ class InterleaveDatasetTest(test.TestCase, parameterized.TestCase):
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       for i in range(10):
         for j in range(2):
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 7685d8dbdc..2ab74beb32 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -731,7 +731,7 @@ class MapDatasetTest(test.TestCase, parameterized.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tids = sess.run(get_next)
       self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
diff --git a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
index bd2339f31d..09c325f2bc 100644
--- a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
+++ b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
@@ -90,7 +90,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
       x = constant_op.constant(1, dtype=dtypes.float32)
       v = array_ops.broadcast_to(x, [2, 4, 3])
       out = 2 * v
-      with self.test_session():
+      with self.cached_session():
         err = gradient_checker.compute_gradient_error(x, x.get_shape(),
                                                       out, out.get_shape())
     self.assertLess(err, 1e-4)
@@ -100,7 +100,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
                              dtype=dtypes.float32)
     v = array_ops.broadcast_to(x, [2, 5, 3])
     out = 2 * v
-    with self.test_session():
+    with self.cached_session():
       err = gradient_checker.compute_gradient_error(x, x.get_shape(),
                                                     out, out.get_shape())
     self.assertLess(err, 1e-4)
@@ -110,7 +110,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
                              dtype=dtypes.float32)
     v = array_ops.broadcast_to(x, [5, 2, 3])
     out = 2 * v
-    with self.test_session():
+    with self.cached_session():
       err = gradient_checker.compute_gradient_error(x, x.get_shape(),
                                                     out, out.get_shape())
     self.assertLess(err, 1e-4)
@@ -119,7 +119,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
     x = constant_op.constant([[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32)
     v = array_ops.broadcast_to(x, [5, 4, 6])
     out = 2 * v
-    with self.test_session():
+    with self.cached_session():
       err = gradient_checker.compute_gradient_error(x, x.get_shape(),
                                                     out, out.get_shape())
     self.assertLess(err, 1e-4)
diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 27a674e223..bd4011d58e 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -785,7 +785,7 @@ class EnsureShapeTest(test.TestCase):
     derived = math_ops.divide(placeholder, 3, name="MyDivide")
     derived = check_ops.ensure_shape(derived, (3, 3, 3))
     feed_val = [[1], [2]]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaisesWithPredicateMatch(
           errors.InvalidArgumentError,
           r"Shape of tensor MyDivide \[2,1\] is not compatible with "
@@ -797,7 +797,7 @@ class EnsureShapeTest(test.TestCase):
     derived = placeholder / 3
     derived = check_ops.ensure_shape(derived, (None, None, 3))
     feed_val = [[1], [2]]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaisesWithPredicateMatch(
           errors.InvalidArgumentError,
           r"Shape of tensor [A-Za-z_]* \[2,1\] is not compatible with "
@@ -809,7 +809,7 @@ class EnsureShapeTest(test.TestCase):
     derived = placeholder / 3
     derived = check_ops.ensure_shape(derived, (2, 1))
     feed_val = [[1], [2]]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(derived, feed_dict={placeholder: feed_val})
 
   def testEnsuresDynamicShape_WithUnknownDims(self):
@@ -817,7 +817,7 @@ class EnsureShapeTest(test.TestCase):
     derived = placeholder / 3
     derived = check_ops.ensure_shape(derived, (None, None))
     feed_val = [[1], [2]]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(derived, feed_dict={placeholder: feed_val})
 
   def testGradient(self):
@@ -826,7 +826,7 @@ class EnsureShapeTest(test.TestCase):
     gradient = gradients.gradients(derived, placeholder)
 
     feed_val = [[4.0], [-1.0]]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       gradient_values, = sess.run(gradient, feed_dict={placeholder: feed_val})
 
     expected = [[1.0], [1.0]]
diff --git a/tensorflow/python/kernel_tests/conditional_accumulator_test.py b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
index 262352a9af..97ab23fe49 100644
--- a/tensorflow/python/kernel_tests/conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
@@ -272,7 +272,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertEqual(15.0, val)
 
   def testAccumulatorTakeGradSum(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32,
           name="Q",
@@ -349,7 +349,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       self.assertEqual(elems_ave + 0.0, val)
 
   def testAccumulatorRepeatedTakeGradSum(self):
-    with self.test_session():
+    with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
           dtypes_lib.float32,
           name="Q",
diff --git a/tensorflow/python/kernel_tests/regex_full_match_op_test.py b/tensorflow/python/kernel_tests/regex_full_match_op_test.py
index e81f562a2a..98746e7d9b 100644
--- a/tensorflow/python/kernel_tests/regex_full_match_op_test.py
+++ b/tensorflow/python/kernel_tests/regex_full_match_op_test.py
@@ -42,7 +42,7 @@ class RegexFullMatchOpVariantsTest(test.TestCase, parameterized.TestCase):
 
   def testRegexFullMatchTwoDims(self, op):
     values = [["abaaba", "abcdabcde"], ["acdcba", "ebcda"]]
-    with self.test_session():
+    with self.cached_session():
       input_tensor = constant_op.constant(values, dtypes.string)
       matched = op(input_tensor, "a.*a").eval()
       self.assertAllEqual([[True, False], [True, False]], matched)
@@ -68,7 +68,7 @@ class RegexFullMatchOpTest(test.TestCase):
 
   def testRegexFullMatchDelegation(self):
     with compat.forward_compatibility_horizon(2018, 11, 1):
-      with self.test_session():
+      with self.cached_session():
         input_tensor = constant_op.constant("foo", dtypes.string)
         pattern = "[a-z]"
         op = string_ops.regex_full_match(input_tensor, pattern)
@@ -80,7 +80,7 @@ class RegexFullMatchOpTest(test.TestCase):
 
   def testStaticRegexFullMatchDelegation(self):
     with compat.forward_compatibility_horizon(2018, 11, 20):
-      with self.test_session():
+      with self.cached_session():
         input_tensor = constant_op.constant("foo", dtypes.string)
         pattern = "[a-z]*"
         op = string_ops.regex_full_match(input_tensor, pattern)
diff --git a/tensorflow/python/kernel_tests/regex_replace_op_test.py b/tensorflow/python/kernel_tests/regex_replace_op_test.py
index feac3a8b08..d9b7ed28d2 100644
--- a/tensorflow/python/kernel_tests/regex_replace_op_test.py
+++ b/tensorflow/python/kernel_tests/regex_replace_op_test.py
@@ -33,7 +33,7 @@ from tensorflow.python.platform import test
 class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase):
 
   def testForwarding(self, op):
-    with self.test_session():
+    with self.cached_session():
       # Generate an input that is uniquely consumed by the regex op.
       # This exercises code paths which are optimized for this case
       # (e.g., using forwarding).
@@ -47,7 +47,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase):
 
   def testRemovePrefix(self, op):
     values = ["a:foo", "a:bar", "a:foo", "b:baz", "b:qux", "ca:b"]
-    with self.test_session():
+    with self.cached_session():
       input_vector = constant_op.constant(values, dtypes.string)
       stripped = op(input_vector, "^(a:|b:)", "", replace_global=False).eval()
       self.assertAllEqual([b"foo", b"bar", b"foo", b"baz", b"qux", b"ca:b"],
@@ -55,21 +55,21 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase):
 
   def testRegexReplace(self, op):
     values = ["aba\naba", "abcdabcde"]
-    with self.test_session():
+    with self.cached_session():
       input_vector = constant_op.constant(values, dtypes.string)
       stripped = op(input_vector, "a.*a", "(\\0)").eval()
       self.assertAllEqual([b"(aba)\n(aba)", b"(abcda)bcde"], stripped)
 
   def testEmptyMatch(self, op):
     values = ["abc", "1"]
-    with self.test_session():
+    with self.cached_session():
       input_vector = constant_op.constant(values, dtypes.string)
       stripped = op(input_vector, "", "x").eval()
       self.assertAllEqual([b"xaxbxcx", b"x1x"], stripped)
 
   def testInvalidPattern(self, op):
     values = ["abc", "1"]
-    with self.test_session():
+    with self.cached_session():
       input_vector = constant_op.constant(values, dtypes.string)
       invalid_pattern = "A["
       replace = op(input_vector, invalid_pattern, "x")
@@ -78,7 +78,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase):
 
   def testGlobal(self, op):
     values = ["ababababab", "abcabcabc", ""]
-    with self.test_session():
+    with self.cached_session():
       input_vector = constant_op.constant(values, dtypes.string)
       stripped = op(input_vector, "ab", "abc", True).eval()
       self.assertAllEqual([b"abcabcabcabcabc", b"abccabccabcc", b""], stripped)
@@ -99,7 +99,7 @@ class RegexReplaceTest(test.TestCase, parameterized.TestCase):
       (as_tensor, as_string),
       (as_tensor, as_tensor))
   def testRegexReplaceDelegation(self, pattern_fn, rewrite_fn):
-    with self.test_session():
+    with self.cached_session():
       input_vector = constant_op.constant("foo", dtypes.string)
       pattern = pattern_fn("[a-z]")
       replace = rewrite_fn(".")
@@ -107,7 +107,7 @@ class RegexReplaceTest(test.TestCase, parameterized.TestCase):
       self.assertTrue(op.name.startswith("RegexReplace"))
 
   def testStaticRegexReplaceDelegation(self):
-    with self.test_session():
+    with self.cached_session():
       input_vector = constant_op.constant("foo", dtypes.string)
       pattern = "[a-z]"
       replace = "."
diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
index 477720302d..a824d5c826 100644
--- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
@@ -195,7 +195,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       self.assertAllEqual([-1, 2], val.dense_shape)
 
   def testAccumulatorTakeGradSum(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", shape=(), reduction_type="SUM")
 
@@ -289,7 +289,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
           val, sess)
 
   def testParallelApplyGradSum(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32,
           name="Q",
diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py
index 4d163a0f6f..cd3fe14883 100644
--- a/tensorflow/python/kernel_tests/substr_op_test.py
+++ b/tensorflow/python/kernel_tests/substr_op_test.py
@@ -46,7 +46,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = b"ell"
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -57,7 +57,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = b""
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -79,7 +79,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     expected_value = [b"ell", b"orl"]
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -104,7 +104,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
                       [b"ixte", b"even", b"ight"]]
 
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
@@ -196,7 +196,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array(-7, dtype)
     length = np.array(3, dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
@@ -234,7 +234,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array([[1, 2, -3], [1, 2, -4], [1, 2, -3]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
@@ -252,7 +252,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
     position = np.array([-1, -2, -4], dtype)
     length = np.array([1, 2, 3], dtype)
     substr_op = string_ops.substr(test_string, position, length)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
         substr = substr_op.eval()
 
-- 
GitLab


From d7b4bf68dc80f1abf90bd6b857f079157028a861 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Mon, 17 Sep 2018 13:23:58 -0700
Subject: [PATCH 0278/1357] Add missing `watch` call to GradientTape
 documentation.

PiperOrigin-RevId: 213326503
---
 tensorflow/python/eager/backprop.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 11336efebb..e6cf9653a8 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -669,6 +669,7 @@ class GradientTape(object):
   ```python
   x = tf.constant(3.0)
   with tf.GradientTape() as g:
+    g.watch(x)
     with tf.GradientTape() as gg:
       gg.watch(x)
       y = x * x
-- 
GitLab


From a768624f1d0ae3629caf5b9784b4b6911b881c18 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 13:24:29 -0700
Subject: [PATCH 0279/1357] Move from deprecated self.test_session() to
 self.cached_session().

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 213326581
---
 .../python/kernel_tests/monte_carlo_test.py   | 18 ++++----
 .../training/functions/gbdt_batch_test.py     | 42 +++++++++----------
 .../python/kernel_tests/cudnn_rnn_test.py     |  2 +-
 .../kernel_tests/batch_dataset_op_test.py     |  8 ++--
 .../python/kernel_tests/map_defun_op_test.py  |  4 +-
 .../assert_next_dataset_op_test.py            |  6 +--
 .../map_and_filter_fusion_test.py             |  2 +-
 .../optimization/model_dataset_op_test.py     | 10 ++---
 .../optimization/optimize_dataset_op_test.py  | 12 +++---
 .../kernel_tests/stats_dataset_ops_test.py    | 16 +++----
 .../contrib/deprecated/summaries_test.py      | 10 ++---
 .../python/framework/tensor_util_test.py      |  2 +-
 .../hadoop/python/kernel_tests/hadoop_test.py |  2 +-
 .../kafka/python/kernel_tests/kafka_test.py   |  2 +-
 .../sparse_feature_cross_op_test.py           | 34 +++++++--------
 .../learn/python/learn/graph_actions_test.py  |  2 +-
 .../linear_operator_addition_test.py          | 24 +++++------
 .../metric_learning/metric_loss_ops_test.py   | 16 +++----
 .../python/kernel_tests/histogram_ops_test.py | 10 ++---
 .../python/metrics/classification_test.py     | 28 ++++++-------
 .../training/lazy_adam_optimizer_test.py      |  6 +--
 .../tensor_forest/client/eval_metrics_test.py |  8 ++--
 22 files changed, 132 insertions(+), 132 deletions(-)

diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py
index 9e6a146f67..13215ffabf 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py
@@ -42,7 +42,7 @@ class ExpectationImportanceSampleTest(test.TestCase):
 
   def test_normal_integral_mean_and_var_correctly_estimated(self):
     n = int(1e6)
-    with self.test_session():
+    with self.cached_session():
       mu_p = constant_op.constant([-1.0, 1.0], dtype=dtypes.float64)
       mu_q = constant_op.constant([0.0, 0.0], dtype=dtypes.float64)
       sigma_p = constant_op.constant([0.5, 0.5], dtype=dtypes.float64)
@@ -72,7 +72,7 @@ class ExpectationImportanceSampleTest(test.TestCase):
     # Test that importance sampling can correctly estimate the probability that
     # the product of components in a MultivariateNormal are > 0.
     n = 1000
-    with self.test_session():
+    with self.cached_session():
       p = mvn_diag_lib.MultivariateNormalDiag(
           loc=[0.], scale_diag=[1.0, 1.0])
       q = mvn_diag_lib.MultivariateNormalDiag(
@@ -99,7 +99,7 @@ class ExpectationImportanceSampleLogspaceTest(test.TestCase):
   def test_normal_distribution_second_moment_estimated_correctly(self):
     # Test the importance sampled estimate against an analytical result.
     n = int(1e6)
-    with self.test_session():
+    with self.cached_session():
       mu_p = constant_op.constant([0.0, 0.0], dtype=dtypes.float64)
       mu_q = constant_op.constant([-1.0, 1.0], dtype=dtypes.float64)
       sigma_p = constant_op.constant([1.0, 2 / 3.], dtype=dtypes.float64)
@@ -127,7 +127,7 @@ class GetSamplesTest(test.TestCase):
   """Test the private method 'get_samples'."""
 
   def test_raises_if_both_z_and_n_are_none(self):
-    with self.test_session():
+    with self.cached_session():
       dist = normal_lib.Normal(loc=0., scale=1.)
       z = None
       n = None
@@ -136,7 +136,7 @@ class GetSamplesTest(test.TestCase):
         _get_samples(dist, z, n, seed)
 
   def test_raises_if_both_z_and_n_are_not_none(self):
-    with self.test_session():
+    with self.cached_session():
       dist = normal_lib.Normal(loc=0., scale=1.)
       z = dist.sample(seed=42)
       n = 1
@@ -145,7 +145,7 @@ class GetSamplesTest(test.TestCase):
         _get_samples(dist, z, n, seed)
 
   def test_returns_n_samples_if_n_provided(self):
-    with self.test_session():
+    with self.cached_session():
       dist = normal_lib.Normal(loc=0., scale=1.)
       z = None
       n = 10
@@ -154,7 +154,7 @@ class GetSamplesTest(test.TestCase):
       self.assertEqual((10,), z.get_shape())
 
   def test_returns_z_if_z_provided(self):
-    with self.test_session():
+    with self.cached_session():
       dist = normal_lib.Normal(loc=0., scale=1.)
       z = dist.sample(10, seed=42)
       n = None
@@ -166,7 +166,7 @@ class GetSamplesTest(test.TestCase):
 class ExpectationTest(test.TestCase):
 
   def test_works_correctly(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = constant_op.constant([-1e6, -100, -10, -1, 1, 10, 100, 1e6])
       p = normal_lib.Normal(loc=x, scale=1.)
 
@@ -213,7 +213,7 @@ class ExpectationTest(test.TestCase):
                           rtol=0.05, atol=0.)
 
   def test_docstring_example_normal(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       num_draws = int(1e5)
       mu_p = constant_op.constant(0.)
       mu_q = constant_op.constant(1.)
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
index 73e41bc457..9d9941f696 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
@@ -86,7 +86,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testExtractFeatures(self):
     """Tests feature extraction."""
-    with self.test_session():
+    with self.cached_session():
       features = {}
       features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
       features["sparse_float"] = sparse_tensor.SparseTensor(
@@ -128,7 +128,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testExtractFeaturesWithTransformation(self):
     """Tests feature extraction."""
-    with self.test_session():
+    with self.cached_session():
       features = {}
       features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
       features["sparse_float"] = sparse_tensor.SparseTensor(
@@ -178,7 +178,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testExtractFeaturesFromCoreFeatureColumns(self):
     """Tests feature extraction when using core columns."""
-    with self.test_session():
+    with self.cached_session():
       features = {}
       # Sparse float column does not exist in core, so only dense numeric and
       # categorical.
@@ -213,7 +213,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnChiefNoBiasCentering(self):
     """Tests the train function running on chief without bias centering."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -316,7 +316,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       self.assertProtoEquals(expected_tree, output.trees[0])
 
   def testObliviousDecisionTreeAsWeakLearner(self):
-    with self.test_session():
+    with self.cached_session():
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -473,7 +473,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnChiefSparseAndDense(self):
     """Tests the train function with sparse and dense features."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -580,7 +580,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnChiefScalingNumberOfExamples(self):
     """Tests the train function running on chief without bias centering."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -685,7 +685,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnChiefWithBiasCentering(self):
     """Tests the train function running on chief with bias centering."""
-    with self.test_session():
+    with self.cached_session():
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -757,7 +757,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnNonChiefNoBiasCentering(self):
     """Tests the train function running on worker without bias centering."""
-    with self.test_session():
+    with self.cached_session():
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -821,7 +821,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnNonChiefWithCentering(self):
     """Tests the train function running on worker with bias centering."""
-    with self.test_session():
+    with self.cached_session():
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -885,7 +885,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testPredictFn(self):
     """Tests the predict function."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create ensemble with one bias node.
       ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
       text_format.Merge(
@@ -939,7 +939,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testPredictFnWithLeafIndexAdvancedLeft(self):
     """Tests the predict function with output leaf ids."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Create ensemble with one bias node.
       ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
       text_format.Merge(
@@ -1051,7 +1051,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnMulticlassFullHessian(self):
     """Tests the GBDT train for multiclass full hessian."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
 
@@ -1155,7 +1155,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnMulticlassDiagonalHessian(self):
     """Tests the GBDT train for multiclass diagonal hessian."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
 
@@ -1259,7 +1259,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnMulticlassTreePerClass(self):
     """Tests the GBDT train for multiclass tree per class strategy."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
 
@@ -1374,7 +1374,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnChiefFeatureSelectionReachedLimitNoGoodSplit(self):
     """Tests the train function running on chief with feature selection."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -1493,7 +1493,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnChiefFeatureSelectionWithGoodSplits(self):
     """Tests the train function running on chief with feature selection."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
@@ -1610,7 +1610,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testTrainFnChiefFeatureSelectionReachedLimitIncrementAttemptedLayer(self):
     """Tests the train function running on chief with feature selection."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
       tree = tree_ensemble_config.trees.add()
 
@@ -1720,7 +1720,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testResetModelBeforeAndAfterSplit(self):
     """Tests whether resetting works."""
-    with self.test_session():
+    with self.cached_session():
       # First build a small tree and train it to verify training works.
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
@@ -1854,7 +1854,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testResetModelNonChief(self):
     """Tests the reset function on a non-chief worker."""
-    with self.test_session():
+    with self.cached_session():
       # Create ensemble with one bias node.
       ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
       text_format.Merge(
@@ -1930,7 +1930,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
 
   def testResetModelWithCenterBias(self):
     """Tests the reset function running on chief with bias centering."""
-    with self.test_session():
+    with self.cached_session():
       ensemble_handle = model_ops.tree_ensemble_variable(
           stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
       learner_config = learner_pb2.LearnerConfig()
diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
index fda1b9f1b3..57793a8ff5 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
@@ -460,7 +460,7 @@ class CudnnRNNTestBasic(test_util.TensorFlowTestCase):
       grad, = gradients.gradients(
           math_ops.reduce_sum(accumulation), (original_input,))
     init_op = variables.global_variables_initializer()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       accumulation_eval, grad_eval = sess.run((accumulation, grad))
       self.assertAllEqual([28, 100, 100], accumulation_eval.shape)
diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index 8e368bf2bc..e2508de9e9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -742,7 +742,7 @@ class RestructuredDatasetTest(test.TestCase):
     iterator = result.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       for _ in range(5):
         sess.run(get_next)
@@ -813,7 +813,7 @@ class RestructuredDatasetTest(test.TestCase):
         .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(get_next)
@@ -837,7 +837,7 @@ class RestructuredDatasetTest(test.TestCase):
     iterator = result.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       for _ in range(5):
         sess.run(get_next)
@@ -879,7 +879,7 @@ class RestructuredDatasetTest(test.TestCase):
     iterator = result.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       for _ in range(5):
         sess.run(get_next)
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
index 83b723710c..25aea0393f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
@@ -116,7 +116,7 @@ class MapDefunTest(test.TestCase):
     elems2 = array_ops.placeholder(dtypes.int32)
     result = map_defun.map_defun(fn, [elems1, elems2],
                                  [dtypes.int32, dtypes.int32], [(), ()])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaisesWithPredicateMatch(
           errors.InvalidArgumentError,
           "All inputs must have the same dimension 0."):
@@ -225,7 +225,7 @@ class MapDefunTest(test.TestCase):
     c = constant_op.constant([1, 2, 3, 4, 5])
     map_defun_op = map_defun.map_defun(simple_fn, [c], [dtypes.int32], [()])[0]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       thread = self.checkedThread(
           self._assert_op_cancelled, args=(sess, map_defun_op))
       thread.start()
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
index bd7b50b902..d10da80442 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
@@ -31,7 +31,7 @@ class AssertNextDatasetTest(test.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(0, sess.run(get_next))
 
   def testAssertNextInvalid(self):
@@ -40,7 +40,7 @@ class AssertNextDatasetTest(test.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           "Asserted Whoops transformation at offset 0 but encountered "
@@ -53,7 +53,7 @@ class AssertNextDatasetTest(test.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           "Asserted next 2 transformations but encountered only 1."):
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
index dde115925e..e75edf6086 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -200,7 +200,7 @@ class MapAndFilterFusionTest(test.TestCase, parameterized.TestCase):
         optimization.optimize(["filter_fusion"]))
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for x in range(5):
         r = map_function(x)
         filtered = False
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
index 2b3ac85924..3b62a7e468 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
@@ -40,7 +40,7 @@ class ModelDatasetTest(test.TestCase):
     get_next = iterator.get_next()
 
     deltas = []
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for _ in range(5):
         sess.run(get_next.op)
       for _ in range(100):
@@ -64,7 +64,7 @@ class ModelDatasetTest(test.TestCase):
     get_next = iterator.get_next()
 
     deltas = []
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for _ in range(5):
         sess.run(get_next.op)
       for _ in range(1000):
@@ -92,7 +92,7 @@ class ModelDatasetTest(test.TestCase):
     get_next = iterator.get_next()
 
     deltas = []
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for _ in range(5):
         sess.run(get_next.op)
       for _ in range(10):
@@ -119,7 +119,7 @@ class ModelDatasetTest(test.TestCase):
     get_next = iterator.get_next()
 
     deltas = []
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for _ in range(5):
         sess.run(get_next.op)
       for _ in range(1000):
@@ -164,7 +164,7 @@ class ModelDatasetTest(test.TestCase):
     get_next = iterator.get_next()
 
     deltas = []
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for _ in range(5):
         sess.run(get_next)
       for _ in range(100):
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
index 909da5aee0..a3fb824ce9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -38,7 +38,7 @@ class OptimizeDatasetTest(test.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllEqual([x * x for x in range(10)], sess.run(get_next))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -51,7 +51,7 @@ class OptimizeDatasetTest(test.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllEqual([x * x for x in range(10)], sess.run(get_next))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -64,7 +64,7 @@ class OptimizeDatasetTest(test.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllEqual([x * x for x in range(10)], sess.run(get_next))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -76,7 +76,7 @@ class OptimizeDatasetTest(test.TestCase):
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(get_next)
 
   def testOptimizationLargeInputFromTensor(self):
@@ -87,7 +87,7 @@ class OptimizeDatasetTest(test.TestCase):
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op, {input_t: np.ones([512, 1024, 1025], np.int32)})
       sess.run(get_next)
 
@@ -99,7 +99,7 @@ class OptimizeDatasetTest(test.TestCase):
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op, {input_t: np.ones([1, 512, 1024, 1025], np.int32)})
       sess.run(get_next)
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
index e25570c5ad..719ce2e3fe 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
@@ -40,7 +40,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(iterator.initializer)
       expected_sum = 0.0
       for i in range(100):
@@ -65,7 +65,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(iterator.initializer)
       for i in range(100):
         self.assertEqual(i, sess.run(next_element))
@@ -84,7 +84,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(iterator.initializer)
       for i in range(100):
         self.assertAllEqual(
@@ -109,7 +109,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       for j in range(5):
         sess.run(iterator.initializer)
         for i in range(100):
@@ -127,7 +127,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     iterator = dataset.make_initializable_iterator()
     next_element = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(iterator.initializer)
       for i in range(100):
         self.assertEqual(i, sess.run(next_element))
@@ -144,7 +144,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(iterator.initializer)
       for i in range(100):
         self.assertEqual(i, sess.run(next_element))
@@ -168,7 +168,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(iterator.initializer)
       for i in range(100):
         self.assertEqual(i, sess.run(next_element))
@@ -188,7 +188,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator_0.get_next() + iterator_1.get_next()
     summary_t = stats_aggregator.get_summary()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run([iterator_0.initializer, iterator_1.initializer])
       for i in range(100):
         self.assertEqual(i * 2, sess.run(next_element))
diff --git a/tensorflow/contrib/deprecated/summaries_test.py b/tensorflow/contrib/deprecated/summaries_test.py
index 6acf2a6469..4038224a1c 100644
--- a/tensorflow/contrib/deprecated/summaries_test.py
+++ b/tensorflow/contrib/deprecated/summaries_test.py
@@ -27,31 +27,31 @@ from tensorflow.python.platform import test
 class DeprecatedSummariesTest(test.TestCase):
 
   def testScalarSummary(self):
-    with self.test_session():
+    with self.cached_session():
       c = constant_op.constant(3)
       s = logging_ops.scalar_summary('tag', c)
       self.assertEqual(s.op.type, u'ScalarSummary')
 
   def testHistogramSummary(self):
-    with self.test_session():
+    with self.cached_session():
       c = constant_op.constant(3)
       s = logging_ops.histogram_summary('tag', c)
       self.assertEqual(s.op.type, u'HistogramSummary')
 
   def testImageSummary(self):
-    with self.test_session():
+    with self.cached_session():
       i = array_ops.ones((5, 4, 4, 3))
       s = logging_ops.image_summary('tag', i)
       self.assertEqual(s.op.type, u'ImageSummary')
 
   def testAudioSummary(self):
-    with self.test_session():
+    with self.cached_session():
       c = constant_op.constant(3.0)
       s = logging_ops.audio_summary('tag', c, sample_rate=8000)
       self.assertEqual(s.op.type, u'AudioSummaryV2')
 
   def testMergeSummary(self):
-    with self.test_session():
+    with self.cached_session():
       c = constant_op.constant(3)
       a = logging_ops.scalar_summary('a', c)
       b = logging_ops.scalar_summary('b', c)
diff --git a/tensorflow/contrib/framework/python/framework/tensor_util_test.py b/tensorflow/contrib/framework/python/framework/tensor_util_test.py
index b1820c10c8..9b0b9b1e1b 100644
--- a/tensorflow/contrib/framework/python/framework/tensor_util_test.py
+++ b/tensorflow/contrib/framework/python/framework/tensor_util_test.py
@@ -186,7 +186,7 @@ class WithShapeTest(test.TestCase):
           unexpected_shapes)
 
   def test_with_shape_2x2_with_partial_expected_shape(self):
-    with self.test_session():
+    with self.cached_session():
       value = [[42, 43], [44, 45]]
       actual_shape = [2, 2]
       tensor = constant_op.constant(value, shape=actual_shape)
diff --git a/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py b/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py
index d796e43d87..f7f1189bb9 100644
--- a/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py
+++ b/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py
@@ -51,7 +51,7 @@ class SequenceFileDatasetTest(test.TestCase):
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       for _ in range(num_repeats):  # Dataset is repeated.
         for i in range(25):  # 25 records.
diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py
index 621911876f..08ebcdb544 100644
--- a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py
+++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py
@@ -54,7 +54,7 @@ class KafkaDatasetTest(test.TestCase):
     init_batch_op = iterator.make_initializer(batch_dataset)
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Basic test: read from topic 0.
       sess.run(init_op, feed_dict={topics: ["test:0:0:4"], num_epochs: 1})
       for i in range(5):
diff --git a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
index 28ddaa69a1..155d06a08e 100644
--- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
+++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
@@ -45,7 +45,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_dense(self):
@@ -66,7 +66,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_mixed_string_sparse(self):
@@ -80,7 +80,7 @@ class SparseCrossOpTest(test.TestCase):
         '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1',
         '55555_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_mixed_string_dense(self):
@@ -99,7 +99,7 @@ class SparseCrossOpTest(test.TestCase):
         '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2',
         '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_sparse_cross_dense(self):
@@ -117,7 +117,7 @@ class SparseCrossOpTest(test.TestCase):
             'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
             'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
         ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_sparse_input(self):
@@ -133,7 +133,7 @@ class SparseCrossOpTest(test.TestCase):
             '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2',
             '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2'
         ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_permutation_3x3x3(self):
@@ -176,7 +176,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F2',
         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_permutation_3x1x2(self):
@@ -196,7 +196,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_large_batch(self):
@@ -229,7 +229,7 @@ class SparseCrossOpTest(test.TestCase):
       ])
 
     expected_out = self._sparse_tensor(col_out)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_one_column_empty(self):
@@ -242,7 +242,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([], 1),
         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_empty(sess.run(op))
 
   def test_some_columns_empty(self):
@@ -261,7 +261,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]], 2)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_all_columns_empty(self):
@@ -273,7 +273,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([]), self._sparse_tensor([]),
         self._sparse_tensor([])
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_empty(sess.run(op))
 
   def test_hashed_output_zero_bucket(self):
@@ -288,7 +288,7 @@ class SparseCrossOpTest(test.TestCase):
         hashed_output=True)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[3735511728867393167]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_output_zero_bucket_v2(self):
@@ -304,7 +304,7 @@ class SparseCrossOpTest(test.TestCase):
         hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[1971693436396284976]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed.
@@ -321,7 +321,7 @@ class SparseCrossOpTest(test.TestCase):
         num_buckets=100)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[74]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_output_v2(self):
@@ -338,7 +338,7 @@ class SparseCrossOpTest(test.TestCase):
         hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[83]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_output_v1_has_collision(self):
@@ -384,7 +384,7 @@ class SparseCrossOpTest(test.TestCase):
         ],
         hashed_output=True,
         num_buckets=1000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       out = sess.run(op)
       self.assertEqual(6, len(out.values))
       self.assertAllEqual([[0, i] for i in range(6)], out.indices)
diff --git a/tensorflow/contrib/learn/python/learn/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/graph_actions_test.py
index d5c02124ac..33180b778a 100644
--- a/tensorflow/contrib/learn/python/learn/graph_actions_test.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions_test.py
@@ -234,7 +234,7 @@ class GraphActionsTest(test.TestCase):
         self.assertTrue(test_ops.resource_initialized_op(handle).eval())
 
   def test_infer_different_default_graph(self):
-    with self.test_session():
+    with self.cached_session():
       self._assert_ckpt(self._output_dir, False)
       with ops.Graph().as_default():
         in0, in1, out = self._build_inference_graph()
diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py
index 6a72df6dfd..d94ac73654 100644
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py
@@ -76,7 +76,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
         [1., 1.], is_positive_definite=True, name="A")
     op_b = linalg.LinearOperatorDiag(
         [2., 2.], is_positive_definite=True, name="B")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op_a, op_b])
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -98,7 +98,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
         [2., 2.], is_positive_definite=True, name="op2")
     op3 = linalg.LinearOperatorDiag(
         [3., 3.], is_positive_definite=True, name="op3")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op1, op2, op3])
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -121,7 +121,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
         name="tril")
     op3 = linalg.LinearOperatorDiag(
         [3., 3.], is_non_singular=True, name="diag_b")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op1, op2, op3])
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -143,7 +143,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
     op2 = linalg.LinearOperatorLowerTriangular(
         [[2., 0.], [1.5, 2.]], name="tril")
     op3 = linalg.LinearOperatorDiag([3., 3.], name="diag_b")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op0, op1, op2, op3], operator_name="my_operator")
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -233,7 +233,7 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase):
     self.assertEqual(2, len(op_sum))
     found_diag = False
     found_tril = False
-    with self.test_session():
+    with self.cached_session():
       for op in op_sum:
         if isinstance(op, linalg.LinearOperatorDiag):
           found_diag = True
@@ -273,7 +273,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -291,7 +291,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(3.2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -310,7 +310,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(1.2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -334,7 +334,7 @@ class AddAndReturnDiagTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -354,7 +354,7 @@ class AddAndReturnDiagTest(test.TestCase):
     operator = self._adder.add(op1, op2, "my_operator", hints)
     self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(
           linalg.LinearOperatorDiag(diag1 + diag2).to_dense().eval(),
           operator.to_dense().eval())
@@ -379,7 +379,7 @@ class AddAndReturnTriLTest(test.TestCase):
     operator = self._adder.add(diag, tril, "my_operator", hints)
     self.assertTrue(isinstance(operator, linalg.LinearOperatorLowerTriangular))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose([[11., 0.], [30., 2.]], operator.to_dense().eval())
     self.assertTrue(operator.is_positive_definite)
     self.assertTrue(operator.is_non_singular)
@@ -401,7 +401,7 @@ class AddAndReturnMatrixTest(test.TestCase):
     operator = self._adder.add(diag1, diag2, "my_operator", hints)
     self.assertTrue(isinstance(operator, linalg.LinearOperatorFullMatrix))
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose([[0., 0.], [0., 5.]], operator.to_dense().eval())
     self.assertFalse(operator.is_positive_definite)
     self.assertFalse(operator.is_non_singular)
diff --git a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py
index 4ec539ab42..9c389144ff 100644
--- a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py
+++ b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py
@@ -61,7 +61,7 @@ def pairwise_distance_np(feature, squared=False):
 class ContrastiveLossTest(test.TestCase):
 
   def testContrastive(self):
-    with self.test_session():
+    with self.cached_session():
       num_data = 10
       feat_dim = 6
       margin = 1.0
@@ -90,7 +90,7 @@ class ContrastiveLossTest(test.TestCase):
 class TripletSemiHardLossTest(test.TestCase):
 
   def testTripletSemiHard(self):
-    with self.test_session():
+    with self.cached_session():
       num_data = 10
       feat_dim = 6
       margin = 1.0
@@ -146,7 +146,7 @@ class TripletSemiHardLossTest(test.TestCase):
 class LiftedStructLossTest(test.TestCase):
 
   def testLiftedStruct(self):
-    with self.test_session():
+    with self.cached_session():
       num_data = 10
       feat_dim = 6
       margin = 1.0
@@ -217,7 +217,7 @@ def convert_to_list_of_sparse_tensor(np_matrix):
 class NpairsLossTest(test.TestCase):
 
   def testNpairs(self):
-    with self.test_session():
+    with self.cached_session():
       num_data = 15
       feat_dim = 6
       num_classes = 5
@@ -261,7 +261,7 @@ class NpairsLossTest(test.TestCase):
 class NpairsLossMultiLabelTest(test.TestCase):
 
   def testNpairsMultiLabelLossWithSingleLabelEqualsNpairsLoss(self):
-    with self.test_session():
+    with self.cached_session():
       num_data = 15
       feat_dim = 6
       reg_lambda = 0.02
@@ -290,7 +290,7 @@ class NpairsLossMultiLabelTest(test.TestCase):
       self.assertAllClose(loss_npairs, loss_npairs_multilabel)
 
   def testNpairsMultiLabel(self):
-    with self.test_session():
+    with self.cached_session():
       num_data = 15
       feat_dim = 6
       num_classes = 10
@@ -527,7 +527,7 @@ class ClusterLossTest(test.TestCase):
   def testClusteringLossPAMOff(self):
     if not HAS_SKLEARN:
       return
-    with self.test_session():
+    with self.cached_session():
       margin_multiplier = 10.0
       embeddings, labels = self._genClusters(n_samples=128, n_clusters=64)
 
@@ -544,7 +544,7 @@ class ClusterLossTest(test.TestCase):
   def testClusteringLossPAMOn(self):
     if not HAS_SKLEARN:
       return
-    with self.test_session():
+    with self.cached_session():
       margin_multiplier = 10.0
       embeddings, labels = self._genClusters(n_samples=128, n_clusters=64)
 
diff --git a/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py b/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
index 1d18d6beff..bed1ecb71c 100644
--- a/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
+++ b/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
@@ -31,21 +31,21 @@ class Strict1dCumsumTest(test.TestCase):
   """Test this private function."""
 
   def test_empty_tensor_returns_empty(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = constant_op.constant([])
       result = histogram_ops._strict_1d_cumsum(tensor, 0)
       expected = constant_op.constant([])
       np.testing.assert_array_equal(expected.eval(), result.eval())
 
   def test_length_1_tensor_works(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = constant_op.constant([3], dtype=dtypes.float32)
       result = histogram_ops._strict_1d_cumsum(tensor, 1)
       expected = constant_op.constant([3], dtype=dtypes.float32)
       np.testing.assert_array_equal(expected.eval(), result.eval())
 
   def test_length_3_tensor_works(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = constant_op.constant([1, 2, 3], dtype=dtypes.float32)
       result = histogram_ops._strict_1d_cumsum(tensor, 3)
       expected = constant_op.constant([1, 3, 6], dtype=dtypes.float32)
@@ -58,7 +58,7 @@ class AUCUsingHistogramTest(test.TestCase):
     self.rng = np.random.RandomState(0)
 
   def test_empty_labels_and_scores_gives_nan_auc(self):
-    with self.test_session():
+    with self.cached_session():
       labels = constant_op.constant([], shape=[0], dtype=dtypes.bool)
       scores = constant_op.constant([], shape=[0], dtype=dtypes.float32)
       score_range = [0, 1.]
@@ -155,7 +155,7 @@ class AUCUsingHistogramTest(test.TestCase):
         from synthetic data.
     """
     score_range = [0, 1.] or score_range
-    with self.test_session():
+    with self.cached_session():
       labels = array_ops.placeholder(dtypes.bool, shape=[num_records])
       scores = array_ops.placeholder(dtypes.float32, shape=[num_records])
       auc, update_op = histogram_ops.auc_using_histogram(
diff --git a/tensorflow/contrib/metrics/python/metrics/classification_test.py b/tensorflow/contrib/metrics/python/metrics/classification_test.py
index 3d0b81c1be..d6a670f97b 100644
--- a/tensorflow/contrib/metrics/python/metrics/classification_test.py
+++ b/tensorflow/contrib/metrics/python/metrics/classification_test.py
@@ -34,7 +34,7 @@ from tensorflow.python.platform import test
 class ClassificationTest(test.TestCase):
 
   def testAccuracy1D(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       pred = array_ops.placeholder(dtypes.int32, shape=[None])
       labels = array_ops.placeholder(dtypes.int32, shape=[None])
       acc = classification.accuracy(pred, labels)
@@ -44,7 +44,7 @@ class ClassificationTest(test.TestCase):
       self.assertEqual(result, 0.5)
 
   def testAccuracy1DBool(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       pred = array_ops.placeholder(dtypes.bool, shape=[None])
       labels = array_ops.placeholder(dtypes.bool, shape=[None])
       acc = classification.accuracy(pred, labels)
@@ -54,7 +54,7 @@ class ClassificationTest(test.TestCase):
       self.assertEqual(result, 0.5)
 
   def testAccuracy1DInt64(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       pred = array_ops.placeholder(dtypes.int64, shape=[None])
       labels = array_ops.placeholder(dtypes.int64, shape=[None])
       acc = classification.accuracy(pred, labels)
@@ -64,7 +64,7 @@ class ClassificationTest(test.TestCase):
       self.assertEqual(result, 0.5)
 
   def testAccuracy1DString(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       pred = array_ops.placeholder(dtypes.string, shape=[None])
       labels = array_ops.placeholder(dtypes.string, shape=[None])
       acc = classification.accuracy(pred, labels)
@@ -87,7 +87,7 @@ class ClassificationTest(test.TestCase):
       classification.accuracy(pred, labels)
 
   def testAccuracy1DWeighted(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       pred = array_ops.placeholder(dtypes.int32, shape=[None])
       labels = array_ops.placeholder(dtypes.int32, shape=[None])
       weights = array_ops.placeholder(dtypes.float32, shape=[None])
@@ -101,7 +101,7 @@ class ClassificationTest(test.TestCase):
       self.assertEqual(result, 0.5)
 
   def testAccuracy1DWeightedBroadcast(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       pred = array_ops.placeholder(dtypes.int32, shape=[None])
       labels = array_ops.placeholder(dtypes.int32, shape=[None])
       weights = array_ops.placeholder(dtypes.float32, shape=[])
@@ -161,7 +161,7 @@ class F1ScoreTest(test.TestCase):
         (10, 3), maxval=2, dtype=dtypes.int64, seed=2)
     f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
 
       # Run several updates.
@@ -176,7 +176,7 @@ class F1ScoreTest(test.TestCase):
   def testAllCorrect(self):
     inputs = np.random.randint(0, 2, size=(100, 1))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(inputs, dtype=dtypes.float32)
       labels = constant_op.constant(inputs)
       f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3)
@@ -191,7 +191,7 @@ class F1ScoreTest(test.TestCase):
         [1, 0, 1, 0], shape=(1, 4), dtype=dtypes.float32)
     labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4))
     f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       sess.run([f1_op])
       # Threshold 0 will have around 0.5 precision and 1 recall yielding an F1
@@ -201,7 +201,7 @@ class F1ScoreTest(test.TestCase):
   def testAllIncorrect(self):
     inputs = np.random.randint(0, 2, size=(10000, 1))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(inputs, dtype=dtypes.float32)
       labels = constant_op.constant(1 - inputs, dtype=dtypes.float32)
       f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3)
@@ -214,7 +214,7 @@ class F1ScoreTest(test.TestCase):
       self.assertAlmostEqual(2 * 0.5 * 1 / (1 + 0.5), f1.eval(), places=2)
 
   def testWeights1d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes.float32)
       labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2))
@@ -228,7 +228,7 @@ class F1ScoreTest(test.TestCase):
       self.assertAlmostEqual(1.0, f1.eval(), places=5)
 
   def testWeights2d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = constant_op.constant(
           [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes.float32)
       labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2))
@@ -242,7 +242,7 @@ class F1ScoreTest(test.TestCase):
       self.assertAlmostEqual(1.0, f1.eval(), places=5)
 
   def testZeroLabelsPredictions(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       predictions = array_ops.zeros([4], dtype=dtypes.float32)
       labels = array_ops.zeros([4])
       f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3)
@@ -300,7 +300,7 @@ class F1ScoreTest(test.TestCase):
     f1, f1_op = classification.f1_score(tf_labels, tf_predictions,
                                         num_thresholds=3)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.local_variables_initializer())
       for _ in range(num_batches):
         sess.run([f1_op])
diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py
index f08ffaa36f..089ecf597d 100644
--- a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py
@@ -236,7 +236,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase):
                              opt.get_slot(var=var0, name="m").name)
 
   def testBasic(self):
-    with self.test_session():
+    with self.cached_session():
       self.doTestBasic(use_resource=False)
 
   @test_util.run_in_graph_and_eager_modes(reset_test=True)
@@ -249,7 +249,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase):
 
   def testTensorLearningRate(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session():
+      with self.cached_session():
         # Initialize variables for numpy implementation.
         m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@@ -286,7 +286,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase):
 
   def testSharing(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session():
+      with self.cached_session():
         # Initialize variables for numpy implementation.
         m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py b/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py
index aa30919167..d49928e3f1 100644
--- a/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py
+++ b/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py
@@ -32,7 +32,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase):
                                           [0.9, 0.8, 0.2], [0.6, 0.4, 0.8]])
     targets = constant_op.constant([[0], [2], [1], [1]])
     in_top_2_op, update_op = top_2_fn(probabilities, targets)
-    with self.test_session():
+    with self.cached_session():
       # initializes internal accuracy vars
       variables.local_variables_initializer().run()
       # need to call in order to run the in_top_2_op internal operations because
@@ -49,7 +49,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase):
                                           [0.3, 0.6, 0.9, 0.4, 0.8, 0.6]])
     targets = constant_op.constant([3, 0, 2, 5, 1])
     in_top_3_op, update_op = top_3_fn(probabilities, targets)
-    with self.test_session():
+    with self.cached_session():
       # initializes internal accuracy vars
       variables.local_variables_initializer().run()
       # need to call in order to run the in_top_3_op internal operations because
@@ -61,7 +61,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase):
     predictions = constant_op.constant([0, 1, 3, 6, 5, 2, 7, 6, 4, 9])
     targets = constant_op.constant([0, 1, 4, 6, 5, 1, 7, 5, 4, 8])
     accuracy_op, update_op = eval_metrics._accuracy(predictions, targets)
-    with self.test_session():
+    with self.cached_session():
       variables.local_variables_initializer().run()
       # need to call in order to run the accuracy_op internal operations because
       # it is a streaming function
@@ -74,7 +74,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase):
     targets = constant_op.constant(
         [1.0, 4.3, 2.6, 0.5, 1.1, 0.7, 5.1, 3.4, 1.8])
     r2_op, update_op = eval_metrics._r2(scores, targets)
-    with self.test_session():
+    with self.cached_session():
       # initializes internal accuracy vars
       variables.local_variables_initializer().run()
       # need to call in order to run the r2_op internal operations because
-- 
GitLab


From 32ed8d488ad8088b63f046cde0c665e3b2aab8e7 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Mon, 17 Sep 2018 13:31:12 -0700
Subject: [PATCH 0280/1357] Add support for predicting models with
 learning_phase.

PiperOrigin-RevId: 213327633
---
 .../contrib/tpu/python/tpu/keras_support.py    | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index d8c3872363..776b9bff0f 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -970,15 +970,25 @@ class TPUFunction(object):
       # Note: this condition is possible during the prologue or epilogue of the
       # pipelined loop.
       return None, None
-    # Strip sample weight from inputs
+
+    if (self.model.uses_learning_phase and
+        not isinstance(K.learning_phase(), int)):
+      # Remove the learning_phase flag at the end. We currently hard code the
+      # learning_phase in TPUFunction.
+      assert isinstance(inputs[-1], int), (
+          'Expect the final element be learning_phase flag. Got {}'.format(
+              inputs[-1]))
+      inputs = inputs[:-1]
+
     if (self.execution_mode == model_fn_lib.ModeKeys.TRAIN or
         self.execution_mode == model_fn_lib.ModeKeys.EVAL):
+      # Strip sample weight from inputs.
       input_tensors = self.model._feed_inputs + self.model._feed_targets
-      inputs = inputs[:len(input_tensors)]
-      return input_tensors, inputs
     else:
       input_tensors = self.model._feed_inputs
-      return input_tensors, inputs
+
+    inputs = inputs[:len(input_tensors)]
+    return input_tensors, inputs
 
   def _process_outputs(self, outfeed_outputs):
     """Processes the outputs of a model function execution.
-- 
GitLab


From cd767b617ab00ffba993d62e4ff1f2028791fe4e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 13:31:40 -0700
Subject: [PATCH 0281/1357] Compute `axes` and `free` statically during graph
 creation.

PiperOrigin-RevId: 213327709
---
 .../kernel_tests/attention_wrapper_test.py    | 39 +++++++++----------
 tensorflow/python/ops/math_ops.py             | 20 +++++-----
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
index f2c43f30d4..1f3b533de9 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
@@ -919,31 +919,28 @@ class AttentionWrapperTest(test.TestCase):
         wrapper.BahdanauAttention, wrapper.LuongAttention)
 
     expected_final_output = BasicDecoderOutput(
-        rnn_output=ResultSummary(shape=(5, 3, 20),
-                                 dtype=dtype('float32'),
-                                 mean=0.11723966),
-        sample_id=ResultSummary(shape=(5, 3),
-                                dtype=dtype('int32'),
-                                mean=9.2666666666666675))
+        rnn_output=ResultSummary(
+            shape=(5, 3, 20), dtype=dtype('float32'), mean=0.11723966),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=dtype('int32'), mean=7.266666666666667))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
-            c=ResultSummary(shape=(5, 9),
-                            dtype=dtype('float32'),
-                            mean=-0.003545674),
-            h=ResultSummary(shape=(5, 9),
-                            dtype=dtype('float32'),
-                            mean=-0.0018327223)),
-        attention=ResultSummary(shape=(5, 20),
-                                dtype=dtype('float32'),
-                                mean=0.11728073),
+            c=ResultSummary(
+                shape=(5, 9), dtype=dtype('float32'), mean=-0.003545674),
+            h=ResultSummary(
+                shape=(5, 9), dtype=dtype('float32'), mean=-0.0018327223)),
+        attention=ResultSummary(
+            shape=(5, 20), dtype=dtype('float32'), mean=0.11601614207),
         time=3,
-        alignments=(
-            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
-            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
+        alignments=(ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+                    ResultSummary(
+                        shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
         alignment_history=(),
-        attention_state=(
-            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
-            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)))
+        attention_state=(ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+                         ResultSummary(
+                             shape=(5, 8), dtype=dtype('float32'), mean=0.125)))
     expected_final_alignment_history = (
         ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125),
         ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125))
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index acd5a32e82..f57abf6704 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2898,21 +2898,23 @@ def tensordot(a, b, axes, name=None):
         shape_a = a.get_shape().as_list()
         axes = [i if i >= 0 else i + len(shape_a) for i in axes]
         free = [i for i in xrange(len(shape_a)) if i not in axes]
-        free_dims_static = [shape_a[i] for i in free]
+        axes_dims = [shape_a[i] for i in axes]
+        free_dims = [shape_a[i] for i in free]
+        free_dims_static = free_dims
+        axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
+        free = ops.convert_to_tensor(free, dtype=dtypes.int32, name="free")
+        shape_a = array_ops.shape(a)
       else:
         free_dims_static = None
-      shape_a = array_ops.shape(a)
-      rank_a = array_ops.rank(a)
-      axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
-      axes = cast(axes >= 0, dtypes.int32) * axes + cast(
-          axes < 0, dtypes.int32) * (
-              axes + rank_a)
-      free, _ = array_ops.setdiff1d(range(rank_a), axes)
+        shape_a = array_ops.shape(a)
+        rank_a = array_ops.rank(a)
+        axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
+        axes = array_ops.where(axes >= 0, axes, axes + rank_a)
+        free, _ = array_ops.setdiff1d(range(rank_a), axes)
       free_dims = array_ops.gather(shape_a, free)
       axes_dims = array_ops.gather(shape_a, axes)
       prod_free_dims = reduce_prod(free_dims)
       prod_axes_dims = reduce_prod(axes_dims)
-      perm = array_ops.concat([axes_dims, free_dims], 0)
       if flipped:
         perm = array_ops.concat([axes, free], 0)
         new_shape = array_ops.stack([prod_axes_dims, prod_free_dims])
-- 
GitLab


From 838d9c859583717a151395ef9c28374e124f1408 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 13:32:33 -0700
Subject: [PATCH 0282/1357] Tweak test tolerance in
 segment_reduction_ops_test.py, which is otherwise flaky.

PiperOrigin-RevId: 213327863
---
 tensorflow/python/kernel_tests/segment_reduction_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index ce507e4ad7..2931877c11 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -300,7 +300,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
               tf_ans = s.eval()
               if dtype is dtypes_lib.bfloat16:
                 tf_ans = tf_ans.astype(np.float32)
-              self.assertAllClose(np_ans, tf_ans)
+              self.assertAllCloseAccordingToType(np_ans, tf_ans)
               self.assertShapeEqual(np_ans, s)
 
   def testNumSegmentsTypes(self):
-- 
GitLab


From 6add0fb2481756b276b1016033919c1c237abee1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 14:08:32 -0700
Subject: [PATCH 0283/1357] Improve the error messages in
 custom_export_strategy.

PiperOrigin-RevId: 213334465
---
 .../boosted_trees/estimator_batch/custom_export_strategy.py  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
index 78232fa0a6..48f12a64f9 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
@@ -51,6 +51,7 @@ def make_custom_export_strategy(name,
     feature_columns: A list of feature columns.
     export_input_fn: A function that takes no arguments and returns an
       `InputFnOps`.
+    use_core_columns: A boolean, whether core feature columns were used.
 
   Returns:
     An `ExportStrategy`.
@@ -196,7 +197,7 @@ def convert_to_universal_format(dtec, sorted_feature_names,
           matching_id.int64_value = split.feature_id
           node.custom_left_child_test.Pack(categorical_test)
         else:
-          raise ValueError("Unexpected node type %s", node_type)
+          raise ValueError("Unexpected node type %s" % node_type)
         node.left_child_id.value = split.left_id
         node.right_child_id.value = split.right_id
   return model_and_features
@@ -236,7 +237,7 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats,
         assert tree_node.node_metadata.gain == 0
         continue
       else:
-        raise ValueError("Unexpected split type %s", node_type)
+        raise ValueError("Unexpected split type %s" % node_type)
       # Apply shrinkage factor. It is important since it is not always uniform
       # across different trees.
       sums[split_column] += (
-- 
GitLab


From 4516558acc9763999b19d1af75ab1fcd6562e4f0 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 17 Sep 2018 14:20:13 -0700
Subject: [PATCH 0284/1357] Use a single thread in eager if
 inter_op_parallelism_threads isn't specified.

PiperOrigin-RevId: 213336463
---
 tensorflow/core/common_runtime/eager/context.cc | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 263467a5b6..18420b60fd 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -32,6 +32,18 @@ bool ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val) {
   return default_val;
 }
 
+std::unique_ptr<thread::ThreadPool> EagerThreadPool(
+    const SessionOptions& opts) {
+  SessionOptions opts_copy(opts);
+  if (opts_copy.config.inter_op_parallelism_threads() == 0) {
+    // Eager defaults to a single thread when no threads are specified.
+    opts_copy.config.set_inter_op_parallelism_threads(1);
+  }
+
+  return std::unique_ptr<thread::ThreadPool>(
+      NewThreadPoolFromSessionOptions(opts_copy));
+}
+
 }  // namespace
 
 EagerContext::EagerContext(const SessionOptions& opts,
@@ -49,7 +61,7 @@ EagerContext::EagerContext(const SessionOptions& opts,
     : policy_(default_policy),
       devices_(device_mgr->ListDevices()),
       rendezvous_(rendezvous),
-      thread_pool_(NewThreadPoolFromSessionOptions(opts)),
+      thread_pool_(EagerThreadPool(opts)),
       pflr_(new ProcessFunctionLibraryRuntime(
           device_mgr, opts.env, TF_GRAPH_DEF_VERSION, &func_lib_def_, {},
           thread_pool_.get())),
@@ -67,7 +79,7 @@ EagerContext::EagerContext(const SessionOptions& opts,
   }
   InitDeviceMapAndAsync();
   runner_ = [this](std::function<void()> closure) {
-    this->thread_pool_->Schedule(closure);
+    this->thread_pool_->Schedule(std::move(closure));
   };
 }
 
-- 
GitLab


From 28dd4d9fcbf8cac1008b2ccd2b4be3fa3c25afd1 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 17 Sep 2018 14:24:17 -0700
Subject: [PATCH 0285/1357] Keep only weak references to variables in graph
 functions

This enables cleanup of the variables referenced in defunned methods of objects when the object is garbage collected. Since one PolymorphicFunction is created per @defun, decorated methods before this change held on to all of the variables referenced in that method for any instance of the class (i.e. variables which should have been object-scoped were scoped to the lifetime of the class definition).

Raises an exception if variables used in the function have been deleted when it is called, which means no local variables.

PiperOrigin-RevId: 213337256
---
 .../python/mirrored_strategy_multigpu_test.py | 12 ++++-
 tensorflow/python/eager/function.py           | 53 ++++++++++++------
 tensorflow/python/eager/function_test.py      | 54 ++++++++++++-------
 tensorflow/python/framework/ops_test.py       | 12 ++---
 tensorflow/python/keras/backend.py            |  4 +-
 .../python/training/gradient_descent_test.py  | 10 ++--
 6 files changed, 95 insertions(+), 50 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index c6894e9013..f51e543624 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -1271,7 +1271,17 @@ class MirroredStrategyDefunTest(test.TestCase):
                             self.evaluate(device_result))
 
       for defun in defuns:
-        self.assertEqual(set(mock_model.variables), set(defun.variables))
+        # PolymorphicFunctions are specialized to the current device stack, so
+        # call_for_each has one trace per device. To check that the expected set
+        # of variables was accessed on each trace, we first retrieve each
+        # device-specific graph function.
+        per_device_graph_functions = dist.call_for_each_tower(
+            defun.get_concrete_function,
+            mock_model, *inputs, run_concurrently=False)
+        for device in devices:
+          graph_function = per_device_graph_functions.get(device=device)
+          self.assertEqual(set(mock_model.variables),
+                           set(graph_function.graph.variables))
 
   @test_util.run_in_graph_and_eager_modes()
   def testVariableInDefun(self):
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index e2874e25b6..4f1a85a274 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -23,6 +23,7 @@ import collections
 import functools
 import sys
 import threading
+import weakref
 
 import numpy as np
 import six
@@ -180,7 +181,7 @@ class FuncGraph(ops.Graph):
     self.inputs = []
     self.outputs = []
     self.structured_outputs = None
-    self.variables = []
+    self._weak_variables = []
     self.outer_graph = ops.get_default_graph()
     self.captures = collections.OrderedDict()
 
@@ -217,6 +218,31 @@ class FuncGraph(ops.Graph):
     self._graph_key = graph._graph_key
     # pylint: enable=protected-access
 
+  @property
+  def variables(self):
+    """A list of variables accessed by this FuncGraph.
+
+    Note that functions keep only weak references to variables. Calling the
+    function after a variable it accesses has been deleted is an error.
+
+    Yields:
+      Strong references to variables accessed by this FuncGraph.
+    """
+    for weak_v in self._weak_variables:
+      v = weak_v()
+      if v is None:
+        raise AssertionError(
+            "Called a function referencing variables which have been deleted. "
+            "This likely means that function-local variables were created and "
+            "not referenced elsewhere in the program. This is generally a "
+            "mistake; consider storing variables in an object attribute on "
+            "first call.")
+      yield v
+
+  @variables.setter
+  def variables(self, var_list):
+    self._weak_variables = [weakref.ref(v) for v in var_list]
+
   def create_op(
       self,
       op_type,
@@ -603,11 +629,6 @@ class Function(object):
     """Returns the graph from which this function was constructed."""
     return self._func_graph
 
-  @property
-  def variables(self):
-    """Returns all variables touched by this function."""
-    return self._func_graph.variables
-
   @property
   def inputs(self):
     """Returns tensors in `self.graph` corresponding to arguments."""
@@ -970,7 +991,16 @@ def _encode_arg(arg):
     return tuple(
         (_encode_arg(key), _encode_arg(arg[key])) for key in sorted(arg))
   else:
-    return arg
+    try:
+      # If possible, keep only a weak reference to Python objects. Weak
+      # references hash to the same value as the original object.
+      # TODO(allenl): Clean up dead functions and their cache keys if the cache
+      # gets large. Right now creating objects with a defunned method, calling
+      # the method, and losing a reference to the object in a loop will leak
+      # memory here.
+      return weakref.ref(arg)
+    except TypeError:
+      return arg
 
 
 def _deterministic_dict_values(dictionary):
@@ -1020,7 +1050,6 @@ class PolymorphicFunction(object):
       self._kwds_to_include = {}
     self._name = name
     self._function_cache = collections.OrderedDict()
-    self._variables = []
     self._function_attributes = attributes or {}
 
     self._lock = threading.Lock()
@@ -1066,12 +1095,6 @@ class PolymorphicFunction(object):
     """Returns the wrapped Python function."""
     return self._python_function
 
-  # TODO(akshayka): Remove this property.
-  @property
-  def variables(self):
-    """Returns the union of all variables referenced by cached `Function`s`."""
-    return self._variables
-
   def get_concrete_function(self, *args, **kwargs):
     """Returns a `Function` object specialized to inputs and execution context.
 
@@ -1238,8 +1261,6 @@ class PolymorphicFunction(object):
             func_graph_from_py_func(self._name, self._python_function, args,
                                     kwds, self._input_signature),
             self._function_attributes)
-        self._variables.extend(
-            [v for v in graph_function.variables if v not in self._variables])
         self._function_cache[cache_key] = graph_function
       return graph_function, [
           t for t in nest.flatten((args, kwds))
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index c168b6060c..6326a5b45f 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -21,6 +21,7 @@ import collections
 import functools
 from multiprocessing.pool import ThreadPool
 import sys
+import weakref
 
 import numpy
 
@@ -74,6 +75,13 @@ class MiniModel(keras_training.Model):
     return self.fc(inputs)
 
 
+class DefunnedMiniModel(MiniModel):
+
+  @function.defun
+  def call(self, inputs, training=True):
+    return super(DefunnedMiniModel, self).call(inputs, training=training)
+
+
 @test_util.with_c_shapes
 class FunctionTest(test.TestCase):
 
@@ -140,8 +148,8 @@ class FunctionTest(test.TestCase):
 
     @function.defun
     def f():
-      v = resource_variable_ops.ResourceVariable(1.0)
-      return v.read_value()
+      self.v = resource_variable_ops.ResourceVariable(1.0)
+      return self.v.read_value()
 
     self.assertAllEqual(f(), 1.0)
 
@@ -399,9 +407,9 @@ class FunctionTest(test.TestCase):
 
     @function.defun
     def tensor_init():
-      v = resource_variable_ops.ResourceVariable(
+      self.v = resource_variable_ops.ResourceVariable(
           lambda: constant_op.constant(2.0))
-      return v.read_value()
+      return self.v.read_value()
 
     value = tensor_init()
     if not context.executing_eagerly():
@@ -415,8 +423,8 @@ class FunctionTest(test.TestCase):
     def tensor_init():
       with ops.init_scope():
         const = constant_op.constant(2.0)
-      v = resource_variable_ops.ResourceVariable(const)
-      return v.read_value()
+      self.v = resource_variable_ops.ResourceVariable(const)
+      return self.v.read_value()
 
     value = tensor_init()
     if not context.executing_eagerly():
@@ -478,13 +486,14 @@ class FunctionTest(test.TestCase):
   def testDefunForcesResourceVariables(self):
 
     def variable_creator():
-      return variables.Variable(0.0).read_value()
+      self.v = variables.Variable(0.0)
+      return self.v.read_value()
 
+    self.v = None
     defined = function.defun(variable_creator)
     defined()  # Create the variable.
-    self.assertEqual(len(defined.variables), 1)
     self.assertIsInstance(
-        defined.variables[0], resource_variable_ops.ResourceVariable)
+        self.v, resource_variable_ops.ResourceVariable)
 
   def testDefunDifferentiable(self):
     v = resource_variable_ops.ResourceVariable(1.0)
@@ -1184,13 +1193,11 @@ class FunctionTest(test.TestCase):
     defined = function.defun(foo)
 
     x = constant_op.constant([1.0])
-    self.assertAllEqual(defined.variables, [])
-    _ = defined(x)
-    self.assertAllEqual(defined.variables, [v])
+    self.assertEqual(1., self.evaluate(defined(x)))
+    v.assign(2.)
 
     x = constant_op.constant([1.0, 2.0])
-    _ = defined(x)  # ensure the variables list remains the same
-    self.assertAllEqual(defined.variables, [v])
+    self.assertAllEqual([2., 4.], self.evaluate(defined(x)))
 
   def testPythonFunctionWithDefaultArgs(self):
 
@@ -1913,10 +1920,10 @@ class AutomaticControlDependenciesTest(test.TestCase):
 
     @function.defun
     def train():
-      v = resource_variable_ops.ResourceVariable(1.0)
-      grad = backprop.implicit_grad(loss)(v)
+      self.v = resource_variable_ops.ResourceVariable(1.0)
+      grad = backprop.implicit_grad(loss)(self.v)
       optimizer.apply_gradients(grad)
-      return v.read_value()
+      return self.v.read_value()
 
     value = train()
     self.assertEqual(value.numpy(), -1.0)
@@ -1943,10 +1950,10 @@ class AutomaticControlDependenciesTest(test.TestCase):
 
     @function.defun
     def train():
-      v = resource_variable_ops.ResourceVariable(1.0)
-      grad = backprop.implicit_grad(loss)(v)
+      self.v = resource_variable_ops.ResourceVariable(1.0)
+      grad = backprop.implicit_grad(loss)(self.v)
       optimizer.apply_gradients(grad)
-      return v.read_value()
+      return self.v.read_value()
 
     train()
 
@@ -2133,6 +2140,13 @@ class AutomaticControlDependenciesTest(test.TestCase):
 
       modify_same_flat(nested_input)
 
+  def testDecoratedMethodVariableCleanup(self):
+    m = DefunnedMiniModel()
+    m(array_ops.ones([1, 2]))
+    weak_variables = weakref.WeakSet(m.variables)
+    self.assertEqual(2, len(weak_variables))
+    del m
+    self.assertEqual([], list(weak_variables))
 
 if __name__ == '__main__':
   ops.enable_eager_execution(
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index d59adf3d48..c3a3437743 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -2142,8 +2142,8 @@ class InitScopeTest(test_util.TensorFlowTestCase):
 
     def function_with_variables():
       with ops.init_scope():
-        v = resource_variable_ops.ResourceVariable(3)
-      return v.assign_add(1)
+        self.v = resource_variable_ops.ResourceVariable(3)
+      return self.v.assign_add(1)
 
     with context.eager_mode():
       # Each invocation of function_with_variables recreates a variable.
@@ -2188,13 +2188,13 @@ class InitScopeTest(test_util.TensorFlowTestCase):
 
     def inner_function():
       with ops.init_scope():
-        v = resource_variable_ops.ResourceVariable(1)
-      return v.assign_add(2)
+        self.v = resource_variable_ops.ResourceVariable(1)
+      return self.v.assign_add(2)
 
     def outer_function(inner=None):
       with ops.init_scope():
-        v0 = resource_variable_ops.ResourceVariable(0)
-      return v0.assign_add(1) + inner()
+        self.v0 = resource_variable_ops.ResourceVariable(0)
+      return self.v0.assign_add(1) + inner()
 
     with context.eager_mode():
       # Each invocation of outer_function recreates variables.
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 5e1722ba20..60ed8e8c8a 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -696,14 +696,14 @@ def track_variable(v):
     return
   graph = v.graph if hasattr(v, 'graph') else ops.get_default_graph()
   if graph not in _GRAPH_VARIABLES:
-    _GRAPH_VARIABLES[graph] = set()
+    _GRAPH_VARIABLES[graph] = weakref.WeakSet()
   _GRAPH_VARIABLES[graph].add(v)
 
 
 def _get_variables(graph=None):
   """Returns variables corresponding to the given graph for initialization."""
   assert not context.executing_eagerly()
-  variables = _GRAPH_VARIABLES.get(graph, set())
+  variables = _GRAPH_VARIABLES.setdefault(graph, weakref.WeakSet())
   for opt in _GRAPH_TF_OPTIMIZERS.get(graph, set()):
     variables.update(opt.optimizer.variables())
   return variables
diff --git a/tensorflow/python/training/gradient_descent_test.py b/tensorflow/python/training/gradient_descent_test.py
index 56d82a5b88..1ddea598e5 100644
--- a/tensorflow/python/training/gradient_descent_test.py
+++ b/tensorflow/python/training/gradient_descent_test.py
@@ -252,12 +252,12 @@ class GradientDescentOptimizerTest(test.TestCase):
       optimizer = gradient_descent.GradientDescentOptimizer(1.0)
 
       def step():
-        v = resource_variable_ops.ResourceVariable(1.0)
+        self.v = resource_variable_ops.ResourceVariable(1.0)
         with backprop.GradientTape() as tape:
-          loss = v ** 2
-        grad = tape.gradient(loss, v)
-        optimizer.apply_gradients([(grad, v)])
-        return v.read_value()
+          loss = self.v ** 2
+        grad = tape.gradient(loss, self.v)
+        optimizer.apply_gradients([(grad, self.v)])
+        return self.v.read_value()
 
       compiled_step = function.defun(step)
 
-- 
GitLab


From 55581a5bed7108c2d39ab603db8c916b6d624648 Mon Sep 17 00:00:00 2001
From: Eddie Zhou <eddz@google.com>
Date: Mon, 17 Sep 2018 15:06:34 -0700
Subject: [PATCH 0286/1357] Fix testing bug where partitioned primals wasn't
 actually being tested (constructing Variable directly instead of get_variable
 under scope with partitioner).

PiperOrigin-RevId: 213345447
---
 .../python/kernel_tests/sdca_ops_test.py      | 33 ++++++++++---------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
index 1d2db1cec8..7a1914d41f 100644
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -134,7 +134,7 @@ def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero):
   return examples_dict, variables_dict
 
 
-def make_variable_dict(max_age, max_gender, partitioned=False):
+def make_variable_dict(max_age, max_gender, num_shards=None, partitioned=False):
   # TODO(sibyl-toe9oF2e):  Figure out how to derive max_age & max_gender from
   # examples_dict.
   partitioner = None
@@ -142,14 +142,15 @@ def make_variable_dict(max_age, max_gender, partitioned=False):
     partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2,
                                                                axis=0)
   with variable_scope.variable_scope(
-      name_or_scope='variables',
+      name_or_scope=('variables/shard_{}'.format(num_shards)
+                     if num_shards else 'variables'),
       partitioner=partitioner):
-    age_weights = variables_lib.Variable(
-        array_ops.zeros(
-            [max_age + 1], dtype=dtypes.float32))
-    gender_weights = variables_lib.Variable(
-        array_ops.zeros(
-            [max_gender + 1], dtype=dtypes.float32))
+    age_weights = variable_scope.get_variable(
+        name='age',
+        initializer=array_ops.zeros([max_age + 1], dtype=dtypes.float32))
+    gender_weights = variable_scope.get_variable(
+        name='gender',
+        initializer=array_ops.zeros([max_gender + 1], dtype=dtypes.float32))
   return dict(
       sparse_features_weights=[age_weights, gender_weights],
       dense_features_weights=[])
@@ -242,7 +243,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
     for num_shards in _SHARD_NUMBERS:
       with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(1, 1)
+        variables = make_variable_dict(1, 1, num_shards)
         options = dict(
             symmetric_l2_regularization=1,
             symmetric_l1_regularization=0,
@@ -290,7 +291,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
     for num_shards in _SHARD_NUMBERS:
       with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(1, 1, partitioned=True)
+        variables = make_variable_dict(1, 1, num_shards, partitioned=True)
         options = dict(
             symmetric_l2_regularization=1,
             symmetric_l1_regularization=0,
@@ -463,7 +464,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
     for num_shards in _SHARD_NUMBERS:
       with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(1, 1)
+        variables = make_variable_dict(1, 1, num_shards)
         options = dict(
             symmetric_l2_regularization=0,
             symmetric_l1_regularization=0,
@@ -521,7 +522,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
       with self._single_threaded_test_session():
         # Only use examples 0 and 2
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(1, 1)
+        variables = make_variable_dict(1, 1, num_shards)
         options = dict(
             symmetric_l2_regularization=1,
             symmetric_l1_regularization=0,
@@ -561,7 +562,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
     for num_shards in _SHARD_NUMBERS:
       with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(1, 1)
+        variables = make_variable_dict(1, 1, num_shards)
         options = dict(
             symmetric_l2_regularization=1,
             symmetric_l1_regularization=0,
@@ -598,7 +599,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
     for num_shards in _SHARD_NUMBERS:
       with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(3, 1)
+        variables = make_variable_dict(3, 1, num_shards)
         options = dict(
             symmetric_l2_regularization=1,
             symmetric_l1_regularization=0,
@@ -639,7 +640,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
     for num_shards in _SHARD_NUMBERS:
       with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(1, 1)
+        variables = make_variable_dict(1, 1, num_shards)
         options = dict(
             symmetric_l2_regularization=1,
             symmetric_l1_regularization=0,
@@ -679,7 +680,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
     for num_shards in _SHARD_NUMBERS:
       with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
-        variables = make_variable_dict(1, 1)
+        variables = make_variable_dict(1, 1, num_shards)
         options = dict(
             symmetric_l2_regularization=1,
             symmetric_l1_regularization=0,
-- 
GitLab


From bb30dfce198341b2ec80d0aa22b49eaa5eac533b Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 17 Sep 2018 15:11:22 -0700
Subject: [PATCH 0287/1357] Add benchmarks comparing Mkl vs Default Conv2D ops.

PiperOrigin-RevId: 213346439
---
 tensorflow/core/kernels/BUILD                |  21 +
 tensorflow/core/kernels/mkl_conv_ops_test.cc | 407 +++++++++++++++++++
 tensorflow/tensorflow.bzl                    |   2 +
 3 files changed, 430 insertions(+)
 create mode 100644 tensorflow/core/kernels/mkl_conv_ops_test.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 94d3ab4467..ef176a7de6 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -30,6 +30,7 @@ load(
     "//tensorflow:tensorflow.bzl",
     "if_android",
     "tf_cc_test",
+    "tf_cc_test_mkl",
     "tf_cc_tests",
     "tf_cc_binary",
     "tf_copts",
@@ -6228,6 +6229,26 @@ tf_mkl_kernel_library(
     ] + mkl_deps(),
 )
 
+tf_cc_test_mkl(
+    name = "mkl_conv_ops_test",
+    size = "small",
+    srcs = ["mkl_conv_ops_test.cc"],
+    deps = [
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_mkl_kernel_library(
     name = "mkl_tfconv_op",
     prefix = "mkl_tfconv",
diff --git a/tensorflow/core/kernels/mkl_conv_ops_test.cc b/tensorflow/core/kernels/mkl_conv_ops_test.cc
new file mode 100644
index 0000000000..a055351337
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_conv_ops_test.cc
@@ -0,0 +1,407 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/nn_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/public/session.h"
+
+#if defined(INTEL_MKL_DNN_ONLY)
+#include "third_party/intel_mkl_dnn/include/mkldnn.h"
+#include "tensorflow/core/util/mkl_util.h"
+#endif
+
+// TODO(ezhulenev): Add numerical tests that will compare results of default
+// (aka Eigen) convolutions with MKL convolutions.
+
+// -------------------------------------------------------------------------- //
+// Performance Benchmarks.                                                    //
+// -------------------------------------------------------------------------- //
+
+// Compare performance of default Tensorflow convolution kernels (Eigen) with
+// MKL kernels on CPU.
+
+// Before running these benchmarks configure OpenMP environment variables:
+//   export KMP_BLOCKTIME=0
+//   export OMP_NUM_THREADS=${num_threads}
+
+namespace tensorflow {
+
+struct Conv2DDimensions {
+  Conv2DDimensions(int n, int h, int w, int c, int fc, int fh, int fw)
+      : input_batches(n),
+        input_height(h),
+        input_width(w),
+        input_depth(c),
+        filter_count(fc),
+        filter_height(fh),
+        filter_width(fw) {}
+
+  int input_batches;
+  int input_height;
+  int input_width;
+  int input_depth;
+  int filter_count;
+  int filter_height;
+  int filter_width;
+};
+
+static Tensor GetRandomTensor(const TensorShape& shape) {
+  Tensor tensor(DT_FLOAT, TensorShape(shape));
+  tensor.flat<float>() = tensor.flat<float>().setRandom();
+  return tensor;
+}
+
+// Get a random Tensor for the Conv2D input.
+static Tensor GetRandomInputTensor(const Conv2DDimensions& dims) {
+  return GetRandomTensor({dims.input_batches, dims.input_height,
+                          dims.input_width, dims.input_depth});
+}
+
+// Get a random Tensor for the Conv2D filter.
+static Tensor GetRandomFilterTensor(const Conv2DDimensions& dims) {
+  return GetRandomTensor({dims.filter_height, dims.filter_width,
+                          dims.input_depth, dims.filter_count});
+}
+
+// Get a random Tensor for the Conv2D output (assuming SAME padding).
+static Tensor GetRandomOutputTensor(const Conv2DDimensions& dims) {
+  return GetRandomTensor({dims.input_batches, dims.input_height,
+                          dims.input_width, dims.filter_count});
+}
+
+// Get a Tensor encoding Conv2D input shape.
+static Tensor GetInputSizesTensor(const Conv2DDimensions& dims) {
+  return test::AsTensor<int32>({dims.input_batches, dims.input_height,
+                                dims.input_width, dims.input_depth});
+}
+
+// Get a Tensor encoding Conv2D filter shape.
+static Tensor GetFilterSizesTensor(const Conv2DDimensions& dims) {
+  return test::AsTensor<int32>({dims.filter_height, dims.filter_width,
+                                dims.input_depth, dims.filter_count});
+}
+
+#if defined(INTEL_MKL_DNN_ONLY)
+static Tensor NonMklTensor() {
+  MklDnnShape non_mkl_shape;
+  non_mkl_shape.SetMklTensor(false);
+
+  auto size = static_cast<int64>(non_mkl_shape.GetSerializeBufferSize());
+  Tensor tensor(DT_UINT8, {size});
+
+  non_mkl_shape.SerializeMklDnnShape(tensor.flat<uint8>().data(),
+                                     size * sizeof(uint8));
+  return tensor;
+}
+#endif
+
+static Graph* DefaultConv2D(const Conv2DDimensions& dims) {
+  auto* graph = new Graph(OpRegistry::Global());
+
+  Tensor input_t = GetRandomInputTensor(dims);
+  Tensor filter_t = GetRandomFilterTensor(dims);
+
+  Node* input = test::graph::Constant(graph, input_t, "input");
+  Node* filter = test::graph::Constant(graph, filter_t, "filter");
+
+  Node* conv2d;
+  TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d"), "Conv2D")
+                  .Input(input)
+                  .Input(filter)
+                  .Attr("T", DT_FLOAT)
+                  .Attr("strides", {1, 1, 1, 1})
+                  .Attr("padding", "SAME")
+                  .Finalize(graph, &conv2d));
+
+  return graph;
+}
+
+#if defined(INTEL_MKL_DNN_ONLY)
+static Graph* MklConv2D(const Conv2DDimensions& dims) {
+  auto* graph = new Graph(OpRegistry::Global());
+
+  Tensor input_t = GetRandomInputTensor(dims);
+  Tensor filter_t = GetRandomFilterTensor(dims);
+
+  Node* input = test::graph::Constant(graph, input_t, "input");
+  Node* filter = test::graph::Constant(graph, filter_t, "filter");
+
+  Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl");
+
+  Node* conv2d;
+  TF_CHECK_OK(NodeBuilder(graph->NewName("mkl_conv_2d"), "_MklConv2D")
+                  .Input(input)
+                  .Input(filter)
+                  .Input(not_mkl_shape)
+                  .Input(not_mkl_shape)
+                  .Attr("T", DT_FLOAT)
+                  .Attr("strides", {1, 1, 1, 1})
+                  .Attr("padding", "SAME")
+                  .Attr("_kernel", "MklOp")
+                  .Finalize(graph, &conv2d));
+
+  return graph;
+}
+#endif
+
+static Graph* DefaultConv2DBwdInput(const Conv2DDimensions& dims) {
+  auto* graph = new Graph(OpRegistry::Global());
+
+  Tensor input_sizes_t = GetInputSizesTensor(dims);
+  Tensor filter_t = GetRandomFilterTensor(dims);
+  Tensor out_backprop_t = GetRandomOutputTensor(dims);  // assuming SAME padding
+
+  Node* input_sizes =
+      test::graph::Constant(graph, input_sizes_t, "input_sizes");
+  Node* filter = test::graph::Constant(graph, filter_t, "filter");
+  Node* out_backprop =
+      test::graph::Constant(graph, out_backprop_t, "out_backprop");
+
+  Node* conv2d_bwd_input;
+  TF_CHECK_OK(
+      NodeBuilder(graph->NewName("conv_2d_bwd_input"), "Conv2DBackpropInput")
+          .Input(input_sizes)
+          .Input(filter)
+          .Input(out_backprop)
+          .Attr("T", DT_FLOAT)
+          .Attr("strides", {1, 1, 1, 1})
+          .Attr("padding", "SAME")
+          .Finalize(graph, &conv2d_bwd_input));
+
+  return graph;
+}
+
+#if defined(INTEL_MKL_DNN_ONLY)
+static Graph* MklConv2DBwdInput(const Conv2DDimensions& dims) {
+  auto* graph = new Graph(OpRegistry::Global());
+
+  Tensor input_sizes_t = GetInputSizesTensor(dims);
+  Tensor filter_t = GetRandomFilterTensor(dims);
+  Tensor out_backprop_t = GetRandomOutputTensor(dims);  // assuming SAME padding
+
+  Node* input_sizes =
+      test::graph::Constant(graph, input_sizes_t, "input_sizes");
+  Node* filter = test::graph::Constant(graph, filter_t, "filter");
+  Node* out_backprop =
+      test::graph::Constant(graph, out_backprop_t, "out_backprop");
+
+  Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl");
+
+  Node* conv2d_bwd_input;
+  TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d_bwd_input"),
+                          "_MklConv2DBackpropInput")
+                  .Input(input_sizes)
+                  .Input(filter)
+                  .Input(out_backprop)
+                  .Input(not_mkl_shape)
+                  .Input(not_mkl_shape)
+                  .Input(not_mkl_shape)
+                  .Attr("T", DT_FLOAT)
+                  .Attr("strides", {1, 1, 1, 1})
+                  .Attr("padding", "SAME")
+                  .Attr("_kernel", "MklOp")
+                  .Finalize(graph, &conv2d_bwd_input));
+
+  return graph;
+}
+#endif
+
+static Graph* DefaultConv2DBwdFilter(const Conv2DDimensions& dims) {
+  auto* graph = new Graph(OpRegistry::Global());
+
+  Tensor input_t = GetRandomInputTensor(dims);
+  Tensor filter_sizes_t = GetFilterSizesTensor(dims);
+  Tensor filter_t = GetRandomFilterTensor(dims);
+  Tensor out_backprop_t = GetRandomOutputTensor(dims);  // assuming SAME padding
+
+  Node* input = test::graph::Constant(graph, input_t, "input");
+  Node* filter_sizes =
+      test::graph::Constant(graph, filter_sizes_t, "filter_sizes");
+  Node* out_backprop =
+      test::graph::Constant(graph, out_backprop_t, "out_backprop");
+
+  Node* conv2d_bwd_filter;
+  TF_CHECK_OK(
+      NodeBuilder(graph->NewName("conv_2d_bwd_filter"), "Conv2DBackpropFilter")
+          .Input(input)
+          .Input(filter_sizes)
+          .Input(out_backprop)
+          .Attr("T", DT_FLOAT)
+          .Attr("strides", {1, 1, 1, 1})
+          .Attr("padding", "SAME")
+          .Finalize(graph, &conv2d_bwd_filter));
+
+  return graph;
+}
+
+#if defined(INTEL_MKL_DNN_ONLY)
+static Graph* MklConv2DBwdFilter(const Conv2DDimensions& dims) {
+  Graph* graph = new Graph(OpRegistry::Global());
+
+  Tensor input_t = GetRandomInputTensor(dims);
+  Tensor filter_sizes_t = GetFilterSizesTensor(dims);
+  Tensor filter_t = GetRandomFilterTensor(dims);
+  Tensor out_backprop_t = GetRandomOutputTensor(dims);  // assuming SAME padding
+
+  Node* input = test::graph::Constant(graph, input_t, "input");
+  Node* filter_sizes =
+      test::graph::Constant(graph, filter_sizes_t, "filter_sizes");
+  Node* out_backprop =
+      test::graph::Constant(graph, out_backprop_t, "out_backprop");
+
+  Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl");
+
+  Node* conv2d_bwd_filter;
+  TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d_bwd_filter"),
+                          "_MklConv2DBackpropFilter")
+                  .Input(input)
+                  .Input(filter_sizes)
+                  .Input(out_backprop)
+                  .Input(not_mkl_shape)
+                  .Input(not_mkl_shape)
+                  .Input(not_mkl_shape)
+                  .Attr("T", DT_FLOAT)
+                  .Attr("strides", {1, 1, 1, 1})
+                  .Attr("padding", "SAME")
+                  .Attr("_kernel", "MklOp")
+                  .Finalize(graph, &conv2d_bwd_filter));
+
+  return graph;
+}
+#endif
+
+// Macro arguments names: --------------------------------------------------- //
+//    N: batch size
+//    H: height
+//    W: width
+//    C: channels
+//   FC: filter count
+//   FH: filter height
+//   FW: filter width
+
+#define BM_CONCAT(a, b) a##b
+
+#define BM_NAME(p, type, N, H, W, C, FC, FH, FW) \
+  BM_CONCAT(BM_##p##_##type##_in_##N##_##H##_##W##_##C, _f_##FC##_##FH##_##FW)
+
+// Flops computation in these benchmarks are the same as in
+// eigen_benchmark_cpu_test.cc.
+
+#define BM_Conv2DT(kind, N, H, W, C, FC, FH, FW, type, LABEL)            \
+  static void BM_NAME(Conv2D_##kind, type, N, H, W, C, FC, FH,           \
+                      FW)(int iters) {                                   \
+    testing::SetLabel(LABEL);                                            \
+                                                                         \
+    int64 num_computed_elements = (N) * (H) * (W) * (FC);                \
+    int64 flops_per_iter = num_computed_elements * ((C) * (FH) * (FW));  \
+    testing::ItemsProcessed(static_cast<int64>(iters) * flops_per_iter); \
+                                                                         \
+    Conv2DDimensions dims(N, H, W, C, FC, FW, FH);                       \
+    test::Benchmark(#type, BM_CONCAT(kind, Conv2D)(dims)).Run(iters);    \
+  }                                                                      \
+  BENCHMARK(BM_NAME(Conv2D_##kind, type, N, H, W, C, FC, FH, FW))
+
+#if defined(INTEL_MKL_DNN_ONLY)
+#define BM_Conv2D(N, H, W, C, FC, FH, FW, type, LABEL)      \
+  BM_Conv2DT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \
+  BM_Conv2DT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL);
+#else
+#define BM_Conv2D(N, H, W, C, FC, FH, FW, type, LABEL) \
+  BM_Conv2DT(Default, N, H, W, C, FC, FH, FW, type, LABEL);
+#endif
+
+#define BM_Conv2DBwdInputT(kind, N, H, W, C, FC, FH, FW, type, LABEL)         \
+  static void BM_NAME(Conv2DBwdInput_##kind, type, N, H, W, C, FC, FH,        \
+                      FW)(int iters) {                                        \
+    testing::SetLabel(LABEL);                                                 \
+                                                                              \
+    int64 num_computed_elements = (N) * (H) * (W) * (C);                      \
+    int64 flops_per_iter = num_computed_elements * ((C) * (FH) * (FW));       \
+    testing::ItemsProcessed(static_cast<int64>(iters) * flops_per_iter);      \
+                                                                              \
+    Conv2DDimensions dims(N, H, W, C, FC, FW, FH);                            \
+    test::Benchmark(#type, BM_CONCAT(kind, Conv2DBwdInput)(dims)).Run(iters); \
+  }                                                                           \
+  BENCHMARK(BM_NAME(Conv2DBwdInput_##kind, type, N, H, W, C, FC, FH, FW))
+
+#if defined(INTEL_MKL_DNN_ONLY)
+#define BM_Conv2DBwdInput(N, H, W, C, FC, FH, FW, type, LABEL)      \
+  BM_Conv2DBwdInputT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \
+  BM_Conv2DBwdInputT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL);
+#else
+#define BM_Conv2DBwdInput(N, H, W, C, FC, FH, FW, type, LABEL) \
+  BM_Conv2DBwdInputT(Default, N, H, W, C, FC, FH, FW, type, LABEL);
+#endif
+
+#define BM_Conv2DBwdFilterT(kind, N, H, W, C, FC, FH, FW, type, LABEL)         \
+  static void BM_NAME(Conv2DBwdFilter_##kind, type, N, H, W, C, FC, FH,        \
+                      FW)(int iters) {                                         \
+    testing::SetLabel(LABEL);                                                  \
+                                                                               \
+    int64 num_computed_elements = (FH) * (FW) * (C) * (FC);                    \
+    int64 flops_per_iter = num_computed_elements * ((N) * (H) * (W));          \
+    testing::ItemsProcessed(static_cast<int64>(iters) * flops_per_iter);       \
+                                                                               \
+    Conv2DDimensions dims(N, H, W, C, FC, FW, FH);                             \
+    test::Benchmark(#type, BM_CONCAT(kind, Conv2DBwdFilter)(dims)).Run(iters); \
+  }                                                                            \
+  BENCHMARK(BM_NAME(Conv2DBwdFilter_##kind, type, N, H, W, C, FC, FH, FW))
+
+#if defined(INTEL_MKL_DNN_ONLY)
+#define BM_Conv2DBwdFilter(N, H, W, C, FC, FH, FW, type, LABEL)      \
+  BM_Conv2DBwdFilterT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \
+  BM_Conv2DBwdFilterT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL);
+#else
+#define BM_Conv2DBwdFilter(N, H, W, C, FC, FH, FW, type, LABEL) \
+  BM_Conv2DBwdFilterT(Default, N, H, W, C, FC, FH, FW, type, LABEL);
+#endif
+
+// ImageNet Convolutions ---------------------------------------------------- //
+
+BM_Conv2D(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3");
+BM_Conv2D(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5");
+BM_Conv2D(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3");
+BM_Conv2D(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5");
+BM_Conv2D(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3");
+BM_Conv2D(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5");
+BM_Conv2D(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3");
+
+BM_Conv2DBwdInput(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3");
+BM_Conv2DBwdInput(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5");
+BM_Conv2DBwdInput(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3");
+BM_Conv2DBwdInput(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5");
+BM_Conv2DBwdInput(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3");
+BM_Conv2DBwdInput(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5");
+BM_Conv2DBwdInput(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3");
+
+BM_Conv2DBwdFilter(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3");
+BM_Conv2DBwdFilter(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5");
+BM_Conv2DBwdFilter(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3");
+BM_Conv2DBwdFilter(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5");
+BM_Conv2DBwdFilter(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3");
+BM_Conv2DBwdFilter(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5");
+BM_Conv2DBwdFilter(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3");
+
+}  // namespace tensorflow
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 16f7b217b4..689679c838 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1216,9 +1216,11 @@ def tf_mkl_kernel_library(
     if prefix:
         srcs = srcs + native.glob(
             [prefix + "*.cc"],
+            exclude = [prefix + "*test*"],
         )
         hdrs = hdrs + native.glob(
             [prefix + "*.h"],
+            exclude = [prefix + "*test*"],
         )
 
     # -fno-exceptions in nocopts breaks compilation if header modules are enabled.
-- 
GitLab


From 77a1883c9dde50efdf9505528adf636ed991e431 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 15:11:35 -0700
Subject: [PATCH 0288/1357] Fix _check_is_tensor like
 _check_is_tensor_or_operation was fixed in #22264.

PiperOrigin-RevId: 213346485
---
 tensorflow/python/estimator/model_fn.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 331a9d1a05..0f26a5bba4 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -26,7 +26,6 @@ import six
 from tensorflow.python.estimator.export import export_output as export_output_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras.metrics import Metric
 from tensorflow.python.ops import array_ops
 from tensorflow.python.saved_model import signature_constants
@@ -467,13 +466,13 @@ class _TPUEstimatorSpec(
 
 
 def _check_is_tensor_or_operation(x, name):
-  if not (isinstance(x, ops.Operation) or tensor_util.is_tensor(x)):
+  if not (isinstance(x, ops.Operation) or ops.is_dense_tensor_like(x)):
     raise TypeError('{} must be Operation or Tensor, given: {}'.format(name, x))
 
 
 def _check_is_tensor(x, tensor_name):
   """Returns `x` if it is a `Tensor`, raises TypeError otherwise."""
-  if not isinstance(x, ops.Tensor):
+  if not ops.is_dense_tensor_like(x):
     raise TypeError('{} must be Tensor, given: {}'.format(tensor_name, x))
   return x
 
-- 
GitLab


From 3ec29c57b728f5f3b8f80e84f3189f70f86536ea Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Mon, 17 Sep 2018 15:27:59 -0700
Subject: [PATCH 0289/1357] Add api_docs_relpath option. Eliminate error when
 copying a file to itself.

PiperOrigin-RevId: 213349424
---
 tensorflow/tools/docs/BUILD           |  3 ++-
 tensorflow/tools/docs/generate_lib.py | 14 ++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD
index 4f7efe193f..b218e900bf 100644
--- a/tensorflow/tools/docs/BUILD
+++ b/tensorflow/tools/docs/BUILD
@@ -91,9 +91,10 @@ py_binary(
         ":parser",
         ":pretty_docs",
         ":py_guide_parser",
-        "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
+        "//tensorflow/python:util",
         "//tensorflow/tools/common:public_api",
         "//tensorflow/tools/common:traverse",
+        "@six_archive//:six",
     ],
 )
 
diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index 1cd9cb7ca9..77a3ca2052 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -453,7 +453,11 @@ def update_id_tags_inplace(src_dir):
 EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
 
 
-def replace_refs(src_dir, output_dir, reference_resolver, file_pattern='*.md'):
+def replace_refs(src_dir,
+                 output_dir,
+                 reference_resolver,
+                 file_pattern='*.md',
+                 api_docs_relpath='api_docs'):
   """Fix @{} references in all files under `src_dir` matching `file_pattern`.
 
   A matching directory structure, with the modified files is
@@ -472,12 +476,13 @@ def replace_refs(src_dir, output_dir, reference_resolver, file_pattern='*.md'):
     reference_resolver: A `parser.ReferenceResolver` to make the replacements.
     file_pattern: Only replace references in files matching file_patters,
       using fnmatch. Non-matching files are copied unchanged.
+    api_docs_relpath: Relative-path string to the api_docs, from the src_dir.
   """
   # Iterate through all the source files and process them.
   for dirpath, _, filenames in os.walk(src_dir):
+    depth = os.path.relpath(src_dir, start=dirpath)
     # How to get from `dirpath` to api_docs/python/
-    relative_path_to_root = os.path.relpath(
-        path=os.path.join(src_dir, 'api_docs/python'), start=dirpath)
+    relative_path_to_root = os.path.join(depth, api_docs_relpath, 'python')
 
     # Make the directory under output_dir.
     new_dir = os.path.join(output_dir,
@@ -497,7 +502,8 @@ def replace_refs(src_dir, output_dir, reference_resolver, file_pattern='*.md'):
       full_out_path = os.path.join(output_dir, suffix)
       # Copy files that do not match the file_pattern, unmodified.
       if not fnmatch.fnmatch(base_name, file_pattern):
-        shutil.copyfile(full_in_path, full_out_path)
+        if full_in_path != full_out_path:
+          shutil.copyfile(full_in_path, full_out_path)
         continue
 
       with open(full_in_path, 'rb') as f:
-- 
GitLab


From aec9a7077001e8eacb278839f2e56c228afdc4a4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 15:30:35 -0700
Subject: [PATCH 0290/1357] Move OvicBenchmarker class from app folder to
 source folder.

PiperOrigin-RevId: 213349833
---
 tensorflow/contrib/lite/java/ovic/BUILD                       | 1 +
 tensorflow/contrib/lite/java/ovic/demo/app/BUILD              | 1 -
 .../lite/java/ovic/demo/app/OvicBenchmarkerActivity.java      | 2 ++
 .../main/java/org/tensorflow/ovic}/OvicBenchmarker.java       | 4 +---
 4 files changed, 4 insertions(+), 4 deletions(-)
 rename tensorflow/contrib/lite/java/ovic/{demo/app => src/main/java/org/tensorflow/ovic}/OvicBenchmarker.java (98%)

diff --git a/tensorflow/contrib/lite/java/ovic/BUILD b/tensorflow/contrib/lite/java/ovic/BUILD
index 781289ceb2..bb0be04ca2 100644
--- a/tensorflow/contrib/lite/java/ovic/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/BUILD
@@ -44,6 +44,7 @@ java_binary(
 android_library(
     name = "ovicbenchmarkerlib",
     srcs = [
+        "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java",
         "src/main/java/org/tensorflow/ovic/OvicClassifier.java",
         "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java",
     ],
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
index a8d751ade2..b2e3a9bd7d 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
@@ -6,7 +6,6 @@ licenses(["notice"])  # Apache 2.0
 android_binary(
     name = "ovic_benchmarker_binary",
     srcs = [
-        "OvicBenchmarker.java",
         "OvicBenchmarkerActivity.java",
     ],
     assets = [
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
index 59457c308a..4adf94aeb6 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
@@ -34,8 +34,10 @@ import java.io.InputStream;
 import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
 import java.text.DecimalFormat;
+import org.tensorflow.ovic.OvicBenchmarker;
 import org.tensorflow.ovic.OvicSingleImageResult;
 
+
 /** Class that benchmark image classifier models. */
 public class OvicBenchmarkerActivity extends Activity {
   /** Tag for the {@link Log}. */
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
similarity index 98%
rename from tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java
rename to tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
index 113ab74a20..4cda258bee 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-package ovic.demo.app;
+package org.tensorflow.ovic;
 
 import android.graphics.Bitmap;
 import android.os.SystemClock;
@@ -22,8 +22,6 @@ import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.MappedByteBuffer;
-import org.tensorflow.ovic.OvicClassifier;
-import org.tensorflow.ovic.OvicSingleImageResult;
 
 /**
  * Class that benchmarks image classifier models.
-- 
GitLab


From 3365cd1cc7bf3dcb781c76652132119bf82133e6 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 17 Sep 2018 15:32:12 -0700
Subject: [PATCH 0291/1357] Add generic fallback optimized implementations for
 dilated DepthwiseConv.

PiperOrigin-RevId: 213350122
---
 tensorflow/contrib/lite/kernels/BUILD         |   1 +
 .../contrib/lite/kernels/depthwise_conv.cc    |  24 +--
 .../lite/kernels/depthwise_conv_test.cc       | 162 ++++++++++++++++--
 .../internal/depthwiseconv_float_test.cc      |  75 ++++----
 .../internal/depthwiseconv_quantized_test.cc  |  15 +-
 .../internal/optimized/depthwiseconv_float.h  |  52 +++---
 .../internal/optimized/depthwiseconv_uint8.h  |  68 +++++---
 .../depthwiseconv_uint8_3x3_filter.h          |   6 +-
 .../lite/kernels/internal/test_util.cc        |  20 ++-
 .../contrib/lite/kernels/internal/test_util.h |   3 +-
 10 files changed, 281 insertions(+), 145 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index f52d29ea76..daaf6714cc 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -509,6 +509,7 @@ tf_cc_test(
         ":builtin_ops",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_absl//absl/memory",
         "@com_google_googletest//:gtest",
     ],
 )
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
index 3e1ce60113..798ee849ec 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
@@ -184,17 +184,7 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
                          const Dims<4>&, const float*, const Dims<4>&, int, int,
                          int, int, int, int, int, float, float, float*,
                          const Dims<4>&);
-  KernelType effective_kernel_type;
-  // TODO(suharshs): Currently only the reference implementation supports
-  // dilations.
-  if ((params->dilation_width_factor != 1) ||
-      (params->dilation_height_factor != 1)) {
-    effective_kernel_type = kReference;
-  } else {
-    effective_kernel_type = kernel_type;
-  }
-
-  if (effective_kernel_type == kReference) {
+  if (kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
   } else {
     depthwise_conv = &optimized_ops::DepthwiseConv;
@@ -224,17 +214,7 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
                          int, int, int, int, int, int, int, int32, int32, int,
                          int32, int32, uint8*, const Dims<4>&);
 
-  KernelType effective_kernel_type;
-  // TODO(suharshs): Currently only the reference implementation supports
-  // dilations.
-  if ((params->dilation_width_factor != 1) ||
-      (params->dilation_height_factor != 1)) {
-    effective_kernel_type = kReference;
-  } else {
-    effective_kernel_type = kernel_type;
-  }
-
-  if (effective_kernel_type == kReference) {
+  if (kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
   } else {
     depthwise_conv = &optimized_ops::DepthwiseConv;
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
index 2af26ab80a..4a33a0319d 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
@@ -14,12 +14,24 @@ limitations under the License.
 ==============================================================================*/
 #include <cstdarg>
 #include <gtest/gtest.h>
+#include "absl/memory/memory.h"
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
 #include "tensorflow/contrib/lite/model.h"
 
 namespace tflite {
+
+namespace ops {
+namespace builtin {
+
+TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF();
+TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
+TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
+
+}  // namespace builtin
+}  // namespace ops
+
 namespace {
 
 using ::testing::ElementsAreArray;
@@ -28,9 +40,11 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel {
  public:
   // TODO(ahentz): Also test different activation types, bias, padding types,
   // stride values.
-  BaseDepthwiseConvolutionOpModel(const TensorData& input,
+  BaseDepthwiseConvolutionOpModel(TfLiteRegistration* registration,
+                                  const TensorData& input,
                                   const TensorData& filter,
                                   const TensorData& output,
+                                  Padding padding_type,
                                   int dilation_factor = 1) {
     input_ = AddInput(input);
     filter_ = AddInput(filter);
@@ -56,11 +70,14 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel {
     SetBuiltinOp(
         BuiltinOperator_DEPTHWISE_CONV_2D,
         BuiltinOptions_DepthwiseConv2DOptions,
-        CreateDepthwiseConv2DOptions(builder_, Padding_VALID, 1, 1, depth_mul,
+        CreateDepthwiseConv2DOptions(builder_, padding_type, 1, 1, depth_mul,
                                      ActivationFunctionType_NONE,
                                      dilation_factor, dilation_factor)
             .Union());
 
+    resolver_ = absl::make_unique<SingleOpResolver>(
+        BuiltinOperator_DEPTHWISE_CONV_2D, registration);
+
     BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)});
   }
 
@@ -86,10 +103,25 @@ class DepthwiseConvolutionOpModel : public BaseDepthwiseConvolutionOpModel {
   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
 };
 
-TEST(DepthwiseConvolutionOpTest, SimpleTest) {
-  DepthwiseConvolutionOpModel m({TensorType_FLOAT32, {1, 3, 2, 2}},
+const auto kKernelMap = new std::map<string, TfLiteRegistration*>({
+    {"Reference", ops::builtin::Register_DEPTHWISE_CONVOLUTION_REF()},
+    {"GenericOptimized",
+     ops::builtin::Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT()},
+    {"NeonOptimized", ops::builtin::Register_DEPTHWISE_CONVOLUTION_NEON_OPT()},
+});
+
+class DepthwiseConvolutionOpTest : public SingleOpTest {
+ protected:
+  const std::map<string, TfLiteRegistration*>& GetKernelMap() override {
+    return *kKernelMap;
+  }
+};
+
+TEST_P(DepthwiseConvolutionOpTest, SimpleTest) {
+  DepthwiseConvolutionOpModel m(GetRegistration(),
+                                {TensorType_FLOAT32, {1, 3, 2, 2}},
                                 {TensorType_FLOAT32, {1, 2, 2, 4}},
-                                {TensorType_FLOAT32, {}});
+                                {TensorType_FLOAT32, {}}, Padding_VALID);
 
   m.SetInput({
       1, 2, 7, 8,    // column 1
@@ -112,7 +144,7 @@ TEST(DepthwiseConvolutionOpTest, SimpleTest) {
                              }));
 }
 
-TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) {
+TEST_P(DepthwiseConvolutionOpTest, SimpleDilatedTestPaddingValid) {
   const int depth = 1;
   const int image_width = 9;
   const int image_height = 9;
@@ -121,10 +153,11 @@ TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) {
   const int filter_count = 1;
   const int dilation_factor = 3;
   DepthwiseConvolutionOpModel m(
+      GetRegistration(),
       {TensorType_FLOAT32,
        {image_batch_count, image_height, image_width, depth}},
       {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}},
-      {TensorType_FLOAT32, {}}, dilation_factor);
+      {TensorType_FLOAT32, {}}, Padding_VALID, dilation_factor);
 
   // The image matrix is:
   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
@@ -164,6 +197,41 @@ TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
 }
 
+TEST_P(DepthwiseConvolutionOpTest, SimpleDilatedTestPaddingSame) {
+  const int depth = 1;
+  const int image_width = 3;
+  const int image_height = 3;
+  const int image_batch_count = 1;
+  const int filter_size = 2;
+  const int filter_count = 1;
+  const int dilation_factor = 2;
+  DepthwiseConvolutionOpModel m(
+      GetRegistration(),
+      {TensorType_FLOAT32,
+       {image_batch_count, image_height, image_width, depth}},
+      {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}},
+      {TensorType_FLOAT32, {}}, Padding_SAME, dilation_factor);
+
+  // The image matrix is:
+  // | 1 | 1 | 1 |
+  // | 1 | 1 | 1 |
+  // | 1 | 1 | 1 |
+  m.SetInput({1, 1, 1, 1, 1, 1, 1, 1, 1});
+  // The filter matrix is:
+  // | 1 | 2 |
+  // | 3 | 4 |
+  m.SetFilter({1, 2, 3, 4});
+  // No bias for this test.
+  m.SetBias({0});
+  m.Invoke();
+
+  // Output:
+  // | 4 | 7 | 3 |
+  // | 6 |10 | 4 |
+  // | 2 | 3 | 1 |
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({4, 7, 3, 6, 10, 4, 2, 3, 1}));
+}
+
 class QuantizedDepthwiseConvolutionOpModel
     : public BaseDepthwiseConvolutionOpModel {
  public:
@@ -188,13 +256,20 @@ class QuantizedDepthwiseConvolutionOpModel
   }
 };
 
+class QuantizedDepthwiseConvolutionOpTest : public SingleOpTest {
+ protected:
+  const std::map<string, TfLiteRegistration*>& GetKernelMap() override {
+    return *kKernelMap;
+  }
+};
+
 // In this test we set the input and output scales so that the results match
 // exactly the 'non-quantized' version.
-TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) {
+TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) {
   QuantizedDepthwiseConvolutionOpModel m(
-      {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64},
+      GetRegistration(), {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64},
       {TensorType_UINT8, {1, 2, 2, 4}, -63.5, 64},
-      {TensorType_UINT8, {}, -127, 128});
+      {TensorType_UINT8, {}, -127, 128}, Padding_VALID);
 
   m.SetInput({
       1, 2, 7, 8,    // column 1
@@ -224,15 +299,16 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) {
                              }));
 }
 
-TEST(QuantizedDepthwiseConvolutionOpTest,
-     SimpleTestQuantizedFilterMultiplierGreaterThan1) {
+TEST_P(QuantizedDepthwiseConvolutionOpTest,
+       SimpleTestQuantizedFilterMultiplierGreaterThan1) {
   QuantizedDepthwiseConvolutionOpModel quant_op(
-      {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64},
+      GetRegistration(), {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64},
       {TensorType_UINT8, {1, 2, 2, 4}, -128.5, 128},
-      {TensorType_UINT8, {}, -127, 128});
-  DepthwiseConvolutionOpModel float_op({TensorType_FLOAT32, {1, 3, 2, 2}},
+      {TensorType_UINT8, {}, -127, 128}, Padding_VALID);
+  DepthwiseConvolutionOpModel float_op(GetRegistration(),
+                                       {TensorType_FLOAT32, {1, 3, 2, 2}},
                                        {TensorType_FLOAT32, {1, 2, 2, 4}},
-                                       {TensorType_FLOAT32, {}});
+                                       {TensorType_FLOAT32, {}}, Padding_VALID);
 
   std::initializer_list<float> input = {
       1, 2, 7,  8,   // column 1
@@ -261,7 +337,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest,
               ElementsAreArray(ArrayFloatNear(float_op.GetOutput(), 1)));
 }
 
-TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) {
+TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTestPaddingValid) {
   const int depth = 1;
   const int image_width = 9;
   const int image_height = 9;
@@ -270,6 +346,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) {
   const int filter_count = 1;
   const int dilation_factor = 3;
   QuantizedDepthwiseConvolutionOpModel m(
+      GetRegistration(),
       {TensorType_UINT8,
        {image_batch_count, image_height, image_width, depth},
        0,
@@ -278,7 +355,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) {
        {depth, filter_size, filter_size, filter_count},
        0,
        255},
-      {TensorType_UINT8, {}, 0, 255}, dilation_factor);
+      {TensorType_UINT8, {}, 0, 255}, Padding_VALID, dilation_factor);
 
   // The image matrix is:
   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
@@ -319,6 +396,55 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) {
               ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
 }
 
+TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTestPaddingSame) {
+  const int depth = 1;
+  const int image_width = 3;
+  const int image_height = 3;
+  const int image_batch_count = 1;
+  const int filter_size = 2;
+  const int filter_count = 1;
+  const int dilation_factor = 2;
+  QuantizedDepthwiseConvolutionOpModel m(
+      GetRegistration(),
+      {TensorType_UINT8,
+       {image_batch_count, image_height, image_width, depth},
+       0,
+       255},
+      {TensorType_UINT8,
+       {depth, filter_size, filter_size, filter_count},
+       0,
+       255},
+      {TensorType_UINT8, {}, 0, 255}, Padding_SAME, dilation_factor);
+
+  // The image matrix is:
+  // | 1 | 1 | 1 |
+  // | 1 | 1 | 1 |
+  // | 1 | 1 | 1 |
+  m.SetInput({1, 1, 1, 1, 1, 1, 1, 1, 1});
+  // The filter matrix is:
+  // | 1 | 2 |
+  // | 3 | 4 |
+  m.SetFilter({1, 2, 3, 4});
+  // No bias for this test.
+  m.SetBias({0});
+  m.Invoke();
+
+  // Output:
+  // | 4 | 7 | 3 |
+  // | 6 |10 | 4 |
+  // | 2 | 3 | 1 |
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray({4, 7, 3, 6, 10, 4, 2, 3, 1}));
+}
+
+INSTANTIATE_TEST_CASE_P(
+    DepthwiseConvolutionOpTest, DepthwiseConvolutionOpTest,
+    ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap)));
+
+INSTANTIATE_TEST_CASE_P(
+    QuantizedDepthwiseConvolutionOpTest, QuantizedDepthwiseConvolutionOpTest,
+    ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap)));
+
 }  // namespace
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
index 844ee6a53d..7600b26f5c 100644
--- a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <vector>
 
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
 #include "tensorflow/contrib/lite/kernels/internal/test_util.h"
 #include "tensorflow/contrib/lite/kernels/internal/types.h"
 
@@ -28,23 +29,29 @@ namespace tflite {
 namespace {
 
 // Runs the DepthwiseConv and compares against the reference implementation.
-template <FusedActivationFunctionType Ac>
 void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           const float* bias_data, const Dims<4>& bias_dims,
-                          int stride, int pad_width, int pad_height,
-                          int depth_multiplier, const Dims<4>& output_dims) {
+                          int stride, int dilation_width_factor,
+                          int dilation_height_factor, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max,
+                          const Dims<4>& output_dims) {
   const int output_buffer_size = RequiredBufferSizeForDims(output_dims);
   std::vector<float> output_data(output_buffer_size);
   std::vector<float> reference_output_data(output_buffer_size);
-  reference_ops::DepthwiseConv<Ac>(input_data, input_dims, filter_data,
-                                   filter_dims, bias_data, bias_dims, stride,
-                                   pad_width, pad_height, depth_multiplier,
-                                   reference_output_data.data(), output_dims);
-  optimized_ops::DepthwiseConv<Ac>(input_data, input_dims, filter_data,
-                                   filter_dims, bias_data, bias_dims, stride,
-                                   pad_width, pad_height, depth_multiplier,
-                                   output_data.data(), output_dims);
+  reference_ops::DepthwiseConv(
+      input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
+      stride, stride, dilation_width_factor, dilation_height_factor, pad_width,
+      pad_height, depth_multiplier, output_activation_min,
+      output_activation_max, reference_output_data.data(), output_dims);
+  optimized_ops::DepthwiseConv(
+      input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
+      stride, stride, dilation_width_factor, dilation_height_factor, pad_width,
+      pad_height, depth_multiplier, output_activation_min,
+      output_activation_max, output_data.data(), output_dims);
+
   double sum_abs_diff = 0;
   float max_abs_val = 0;
   for (int i = 0; i < output_buffer_size; i++) {
@@ -59,27 +66,6 @@ void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
-void TestOneDepthwiseConv(FusedActivationFunctionType Ac,
-                          const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride, int pad_width, int pad_height,
-                          int depth_multiplier, const Dims<4>& output_dims) {
-#define TOCO_HANDLE_CASE(AC_TYPE)                                            \
-  if (AC_TYPE == Ac) {                                                       \
-    TestOneDepthwiseConv<AC_TYPE>(input_data, input_dims, filter_data,       \
-                                  filter_dims, bias_data, bias_dims, stride, \
-                                  pad_width, pad_height, depth_multiplier,   \
-                                  output_dims);                              \
-    return;                                                                  \
-  }
-  TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone)
-  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu)
-  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1)
-  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6)
-#undef TOCO_HANDLE_CASE
-}
-
 // This function picks some random DepthwiseConv params, which may or may not
 // be legal. If they're not legal, it returns false. If they're legal,
 // it runs the DepthwiseConv test and returns true. This allows the caller
@@ -99,6 +85,16 @@ bool TryTestOneDepthwiseConv() {
   const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50);
   const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
   const int output_depth = input_depth * depth_multiplier;
+  const int dilation_width_factor = RandomElement(std::vector<int>({1, 2, 4}));
+  const int dilation_height_factor = RandomElement(std::vector<int>({1, 2, 4}));
+  float output_activation_min, output_activation_max;
+  FusedActivationFunctionType ac =
+      RandomElement(std::vector<FusedActivationFunctionType>(
+          {FusedActivationFunctionType::kNone,
+           FusedActivationFunctionType::kRelu,
+           FusedActivationFunctionType::kRelu1,
+           FusedActivationFunctionType::kRelu6}));
+  GetActivationMinMax(ac, &output_activation_min, &output_activation_max);
   // The optimized DepthwiseConv implementation currently uses a fixed-size
   // accumulator buffer on the stack, with that size. This currently means
   // that it does not support larger output depths. It CHECK's for it,
@@ -109,10 +105,6 @@ bool TryTestOneDepthwiseConv() {
   if (output_depth > kMaxSupportedOutputDepth) {
     return false;
   }
-  const auto ac = RandomElement(std::vector<FusedActivationFunctionType>(
-      {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu,
-       FusedActivationFunctionType::kRelu6,
-       FusedActivationFunctionType::kRelu1}));
   Dims<4> input_dims_inference =
       MakeDimsForInference(input_depth, input_width, input_height, batch);
   Dims<4> output_dims_inference;
@@ -120,7 +112,8 @@ bool TryTestOneDepthwiseConv() {
   const auto padding_type =
       UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid;
   if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width,
-                        filter_height, stride, padding_type,
+                        filter_height, stride, dilation_width_factor,
+                        dilation_height_factor, padding_type,
                         &output_dims_inference, &pad_width, &pad_height)) {
     return false;
   }
@@ -140,10 +133,12 @@ bool TryTestOneDepthwiseConv() {
   FillRandom(&input_data, -input_amplitude, input_amplitude);
   FillRandom(&filter_data, -filter_amplitude, filter_amplitude);
   FillRandom(&bias_data, -bias_amplitude, bias_amplitude);
-  TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference,
+  TestOneDepthwiseConv(input_data.data(), input_dims_inference,
                        filter_data.data(), filter_dims_inference,
-                       bias_data.data(), bias_dims_inference, stride, pad_width,
-                       pad_height, depth_multiplier, output_dims_inference);
+                       bias_data.data(), bias_dims_inference, stride,
+                       dilation_width_factor, dilation_height_factor, pad_width,
+                       pad_height, depth_multiplier, output_activation_min,
+                       output_activation_max, output_dims_inference);
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
index 2c0fc8433e..312d048b2d 100644
--- a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
@@ -199,6 +199,7 @@ void TestOneDepthwiseConv(
 bool TryTestDepthwiseConv(int batch, int input_depth, int input_width,
                           int input_height, int filter_width, int filter_height,
                           int depth_multiplier, int stride,
+                          int dilation_width_factor, int dilation_height_factor,
                           PaddingType padding_type) {
   const int output_depth = input_depth * depth_multiplier;
   // The optimized DepthwiseConv implementation currently uses a fixed-size
@@ -231,7 +232,8 @@ bool TryTestDepthwiseConv(int batch, int input_depth, int input_width,
   Dims<4> output_dims_inference;
   int pad_width, pad_height;
   if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width,
-                        filter_height, stride, padding_type,
+                        filter_height, stride, dilation_width_factor,
+                        dilation_height_factor, padding_type,
                         &output_dims_inference, &pad_width, &pad_height)) {
     return false;
   }
@@ -274,12 +276,15 @@ bool TryTestOneDepthwiseConv() {
   const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10);
   const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50);
   const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
+  const int dilation_width_factor = RandomElement(std::vector<int>({1, 2, 4}));
+  const int dilation_height_factor = RandomElement(std::vector<int>({1, 2, 4}));
   const auto padding_type =
       UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid;
 
   return TryTestDepthwiseConv(batch, input_depth, input_width, input_height,
                               filter_width, filter_height, depth_multiplier,
-                              stride, padding_type);
+                              stride, dilation_width_factor,
+                              dilation_height_factor, padding_type);
 }
 
 // Tests parameters for the 3x3 filter kernel.
@@ -292,6 +297,9 @@ bool TryTestOneDepthwiseConv3x3Filter() {
   const int filter_height = 3;
   const int depth_multiplier = 1;
   const int stride = UniformRandomInt(1, 2);
+  // We don't support dilations in the 3x3 filter.
+  const int dilation_width_factor = 1;
+  const int dilation_height_factor = 1;
   // Although the kernel supports only kValid padding, we test that kSame
   // is using the correct code path.
   const auto padding_type =
@@ -299,7 +307,8 @@ bool TryTestOneDepthwiseConv3x3Filter() {
 
   return TryTestDepthwiseConv(batch, input_depth, input_width, input_height,
                               filter_width, filter_height, depth_multiplier,
-                              stride, padding_type);
+                              stride, dilation_width_factor,
+                              dilation_height_factor, padding_type);
 }
 
 void TestOneDepthwiseConv() {
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index f2d1319801..f0bea7fa1d 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -761,7 +761,8 @@ struct FloatDepthwiseConvKernel<true, 4, 1> {
 // Accumulates the effect of one row of the filter, on a segment of one row
 // of the output, accessing the corresponding one row of the input.
 template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
-void FloatDepthwiseConvAccumRow(int stride, int input_depth, int input_width,
+void FloatDepthwiseConvAccumRow(int stride, int dilation_factor,
+                                int input_depth, int input_width,
                                 const float* input_data, int pad_width,
                                 int depth_multiplier, int filter_width,
                                 const float* filter_data,
@@ -835,10 +836,10 @@ void FloatDepthwiseConvAccumRow(int stride, int input_depth, int input_width,
 
 // generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized.
 inline void FloatDepthwiseConvAccumRowGeneric(
-    int stride, int input_depth, int input_width, const float* input_data,
-    int pad_width, int depth_multiplier, int filter_width,
-    const float* filter_data, int out_x_buffer_start, int out_x_buffer_end,
-    int output_depth, float* acc_buffer) {
+    int stride, int dilation_factor, int input_depth, int input_width,
+    const float* input_data, int pad_width, int depth_multiplier,
+    int filter_width, const float* filter_data, int out_x_buffer_start,
+    int out_x_buffer_end, int output_depth, float* acc_buffer) {
   gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)");
 #ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
 #ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
@@ -860,6 +861,7 @@ inline void FloatDepthwiseConvAccumRowGeneric(
       << "* stride = " << stride << "\n"
       << "* input_depth = " << input_depth << "\n"
       << "* depth_multiplier = " << depth_multiplier << "\n"
+      << "* dilation_factor = " << dilation_factor << "\n"
       << "*\n"
       << "* Please do not hesitate to contact benoitjacob@ with this\n"
       << "* information.\n"
@@ -869,14 +871,17 @@ inline void FloatDepthwiseConvAccumRowGeneric(
   const float* filter_base_ptr = filter_data;
   for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
     const int out_x_loop_start = std::max(
-        out_x_buffer_start, (pad_width - filter_x + stride - 1) / stride);
-    const int out_x_loop_end =
-        std::min(out_x_buffer_end,
-                 (pad_width + input_width - filter_x + stride - 1) / stride);
+        out_x_buffer_start,
+        (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+    const int out_x_loop_end = std::min(
+        out_x_buffer_end,
+        (pad_width + input_width - dilation_factor * filter_x + stride - 1) /
+            stride);
 
     float* acc_buffer_ptr =
         acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
-    const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x;
+    const int in_x_origin =
+        (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
     const float* input_ptr = input_data + in_x_origin * input_depth;
     const int input_ptr_increment = (stride - 1) * input_depth;
     for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) {
@@ -921,14 +926,14 @@ inline void DepthwiseConv(
   const int depth_multiplier = params.depth_multiplier;
   const float output_activation_min = params.float_activation_min;
   const float output_activation_max = params.float_activation_max;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 
-  // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
-  // be implemented.
-  TFLITE_DCHECK_EQ(params.dilation_width_factor, 1);
-  TFLITE_DCHECK_EQ(params.dilation_height_factor, 1);
+  const bool has_dilation = (params.dilation_width_factor != 1) ||
+                            (params.dilation_height_factor != 1);
 
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
   const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
@@ -961,7 +966,7 @@ inline void DepthwiseConv(
                                         FIXED_DEPTH_MULTIPLIER)           \
   if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) &&          \
       (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) &&     \
-      depth_multiplier == FIXED_DEPTH_MULTIPLIER) {                       \
+      depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) {      \
     row_accum_func =                                                      \
         FloatDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH,      \
                                    FIXED_DEPTH_MULTIPLIER>;               \
@@ -1014,9 +1019,13 @@ inline void DepthwiseConv(
   for (int b = 0; b < batches; ++b) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
       const int in_y_origin = (out_y * stride_height) - pad_height;
-      const int filter_y_start = std::max(0, -in_y_origin);
+      const int filter_y_start =
+          std::max(0, (-in_y_origin + dilation_height_factor - 1) /
+                          dilation_height_factor);
       const int filter_y_end =
-          std::min(filter_height, input_height - in_y_origin);
+          std::min(filter_height,
+                   (input_height - in_y_origin + dilation_height_factor - 1) /
+                       dilation_height_factor);
       for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
            out_x_buffer_start += kOutputPixelsInAccBuffer) {
         const int out_x_buffer_end = std::min(
@@ -1032,9 +1041,9 @@ inline void DepthwiseConv(
         // Accumulation loop. Most of the time should be spent in here.
         for (int filter_y = filter_y_start; filter_y < filter_y_end;
              ++filter_y) {
-          const int in_y = in_y_origin + filter_y;
+          const int in_y = in_y_origin + dilation_height_factor * filter_y;
           row_accum_func(
-              stride_width, input_depth, input_width,
+              stride_width, dilation_width_factor, input_depth, input_width,
               input_data + in_y * input_height_stride + b * input_batch_stride,
               pad_width, depth_multiplier, filter_width,
               filter_data + filter_y * filter_height_stride, out_x_buffer_start,
@@ -1096,11 +1105,6 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           float output_activation_min,
                           float output_activation_max, float* output_data,
                           const Dims<4>& output_dims) {
-  // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
-  // be implemented.
-  TFLITE_DCHECK_EQ(dilation_width_factor, 1);
-  TFLITE_DCHECK_EQ(dilation_height_factor, 1);
-
   tflite::DepthwiseParams op_params;
   // Padding type is ignored, but still set.
   op_params.padding_type = PaddingType::kSame;
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index ccb9d1654f..494cf70504 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1466,11 +1466,14 @@ struct QuantizedDepthwiseConvKernel<false, 12, 1> {
 // Accumulates the effect of one row of the filter, on a segment of one row
 // of the output, accessing the corresponding one row of the input.
 template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
-void QuantizedDepthwiseConvAccumRow(
-    int stride, int input_depth, int input_width, const uint8* input_data,
-    int16 input_offset, int pad_width, int depth_multiplier, int filter_width,
-    const uint8* filter_data, int16 filter_offset, int out_x_buffer_start,
-    int out_x_buffer_end, int output_depth, int32* acc_buffer) {
+void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor,
+                                    int input_depth, int input_width,
+                                    const uint8* input_data, int16 input_offset,
+                                    int pad_width, int depth_multiplier,
+                                    int filter_width, const uint8* filter_data,
+                                    int16 filter_offset, int out_x_buffer_start,
+                                    int out_x_buffer_end, int output_depth,
+                                    int32* acc_buffer) {
 #ifdef GEMMLOWP_PROFILING
   gemmlowp::ScopedProfilingLabel label(__PRETTY_FUNCTION__);
 #endif
@@ -1537,10 +1540,11 @@ void QuantizedDepthwiseConvAccumRow(
 
 // generic fallback of DepthwiseConvAccumRow, portable, non-templatized.
 inline void QuantizedDepthwiseConvAccumRowGeneric(
-    int stride, int input_depth, int input_width, const uint8* input_data,
-    int16 input_offset, int pad_width, int depth_multiplier, int filter_width,
-    const uint8* filter_data, int16 filter_offset, int out_x_buffer_start,
-    int out_x_buffer_end, int output_depth, int32* acc_buffer) {
+    int stride, int dilation_factor, int input_depth, int input_width,
+    const uint8* input_data, int16 input_offset, int pad_width,
+    int depth_multiplier, int filter_width, const uint8* filter_data,
+    int16 filter_offset, int out_x_buffer_start, int out_x_buffer_end,
+    int output_depth, int32* acc_buffer) {
   gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)");
 #ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
 #ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
@@ -1562,6 +1566,7 @@ inline void QuantizedDepthwiseConvAccumRowGeneric(
       << "* stride = " << stride << "\n"
       << "* input_depth = " << input_depth << "\n"
       << "* depth_multiplier = " << depth_multiplier << "\n"
+      << "* dilation_factor = " << dilation_factor << "\n"
       << "*\n"
       << "* Please do not hesitate to contact benoitjacob@ with this\n"
       << "* information.\n"
@@ -1571,14 +1576,17 @@ inline void QuantizedDepthwiseConvAccumRowGeneric(
   const uint8* filter_base_ptr = filter_data;
   for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
     const int out_x_loop_start = std::max(
-        out_x_buffer_start, (pad_width - filter_x + stride - 1) / stride);
-    const int out_x_loop_end =
-        std::min(out_x_buffer_end,
-                 (pad_width + input_width - filter_x + stride - 1) / stride);
+        out_x_buffer_start,
+        (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+    const int out_x_loop_end = std::min(
+        out_x_buffer_end,
+        (pad_width + input_width - dilation_factor * filter_x + stride - 1) /
+            stride);
 
     int32* acc_buffer_ptr =
         acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
-    const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x;
+    const int in_x_origin =
+        (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
     const uint8* input_ptr = input_data + in_x_origin * input_depth;
     const int input_ptr_increment = (stride - 1) * input_depth;
     for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) {
@@ -1688,15 +1696,11 @@ inline void DepthwiseConv(
   const int32 output_offset = params.output_offset;
   const int32 output_multiplier = params.output_multiplier;
   const int output_shift = params.output_shift;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
-  // be implemented.
-  TFLITE_DCHECK_EQ(params.dilation_width_factor, 1);
-  TFLITE_DCHECK_EQ(params.dilation_height_factor, 1);
-
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
   const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
@@ -1714,14 +1718,18 @@ inline void DepthwiseConv(
   TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
   TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 
+  const bool has_dilation =
+      (dilation_width_factor != 1) || (dilation_height_factor != 1);
+
 // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
 // Jetson TX-2. This compiler does not support the offsetof() macro.
 #if defined(__aarch64__) && !defined(GOOGLE_L4T)
   // Call kernel optimized for depthwise convolutions using 3x3 filters if
   // parameters are supported.
-  if (Fast3x3FilterKernelSupported(
-          input_shape, filter_shape, stride_width, stride_height, pad_width,
-          pad_height, depth_multiplier, output_shape, output_shift)) {
+  if (Fast3x3FilterKernelSupported(input_shape, filter_shape, stride_width,
+                                   stride_height, has_dilation, pad_width,
+                                   pad_height, depth_multiplier, output_shape,
+                                   output_shift)) {
     DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
                            filter_data, bias_shape, bias_data, output_shape,
                            output_data);
@@ -1748,7 +1756,7 @@ inline void DepthwiseConv(
                                         FIXED_DEPTH_MULTIPLIER)           \
   if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) &&          \
       (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) &&     \
-      depth_multiplier == FIXED_DEPTH_MULTIPLIER) {                       \
+      depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) {      \
     row_accum_func =                                                      \
         QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH,  \
                                        FIXED_DEPTH_MULTIPLIER>;           \
@@ -1808,9 +1816,13 @@ inline void DepthwiseConv(
   for (int b = 0; b < batches; ++b) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
       const int in_y_origin = (out_y * stride_height) - pad_height;
-      const int filter_y_start = std::max(0, -in_y_origin);
+      const int filter_y_start =
+          std::max(0, (-in_y_origin + dilation_height_factor - 1) /
+                          dilation_height_factor);
       const int filter_y_end =
-          std::min(filter_height, input_height - in_y_origin);
+          std::min(filter_height,
+                   (input_height - in_y_origin + dilation_height_factor - 1) /
+                       dilation_height_factor);
       for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
            out_x_buffer_start += kOutputPixelsInAccBuffer) {
         const int out_x_buffer_end = std::min(
@@ -1826,9 +1838,9 @@ inline void DepthwiseConv(
         // Accumulation loop. Most of the time should be spent in here.
         for (int filter_y = filter_y_start; filter_y < filter_y_end;
              ++filter_y) {
-          const int in_y = in_y_origin + filter_y;
+          const int in_y = in_y_origin + dilation_height_factor * filter_y;
           row_accum_func(
-              stride_width, input_depth, input_width,
+              stride_width, dilation_width_factor, input_depth, input_width,
               input_data + in_y * input_height_stride + b * input_batch_stride,
               input_offset, pad_width, depth_multiplier, filter_width,
               filter_data + filter_y * filter_height_stride, filter_offset,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
index 9fed53cafb..5087227182 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@@ -3176,8 +3176,8 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
 
 inline bool Fast3x3FilterKernelSupported(
     const RuntimeShape& input_shape, const RuntimeShape& filter_shape,
-    int32 stride_width, int32 stride_height, int32 pad_width, int32 pad_height,
-    int32 depth_multiplier, const RuntimeShape& output_shape,
+    int32 stride_width, int32 stride_height, bool has_dilation, int32 pad_width,
+    int32 pad_height, int32 depth_multiplier, const RuntimeShape& output_shape,
     int32 output_shift) {
   const int32 input_height = input_shape.Dims(1);
   const int32 input_width = input_shape.Dims(2);
@@ -3193,7 +3193,7 @@ inline bool Fast3x3FilterKernelSupported(
       (stride_height == 1 || stride_height == 2) &&
       (stride_width == stride_height) && (pad_width == 0 || pad_width == 1) &&
       (pad_height == 0 || pad_height == 1) && (pad_width == pad_height) &&
-      (input_depth % 8) == 0 && (output_shift > 0);
+      (input_depth % 8) == 0 && (output_shift > 0) && !has_dilation;
 
   if (!supported) {
     return false;
diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.cc b/tensorflow/contrib/lite/kernels/internal/test_util.cc
index 9b1fd9b344..5ae4b193d0 100644
--- a/tensorflow/contrib/lite/kernels/internal/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/internal/test_util.cc
@@ -43,17 +43,21 @@ Dims<4> MakeDimsForInference(int depth, int width, int height, int batch) {
 
 // this is a copied from an internal function in propagate_fixed_sizes.cc
 bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
-                      int filter_height, int stride, PaddingType padding_type,
+                      int filter_height, int stride, int dilation_width_factor,
+                      int dilation_height_factor, PaddingType padding_type,
                       Dims<4>* output_dims, int* pad_width, int* pad_height) {
   const int input_width = ArraySize(input_dims, 1);
   const int input_height = ArraySize(input_dims, 2);
   const int batch = ArraySize(input_dims, 3);
 
+  int dilated_filter_width = dilation_width_factor * (filter_width - 1) + 1;
+  int dilated_filter_height = dilation_height_factor * (filter_height - 1) + 1;
+
   int output_height = 0;
   int output_width = 0;
   if (padding_type == PaddingType::kValid) {
-    output_height = (input_height + stride - filter_height) / stride;
-    output_width = (input_width + stride - filter_width) / stride;
+    output_height = (input_height + stride - dilated_filter_height) / stride;
+    output_width = (input_width + stride - dilated_filter_width) / stride;
   } else if (padding_type == PaddingType::kSame) {
     output_height = (input_height + stride - 1) / stride;
     output_width = (input_width + stride - 1) / stride;
@@ -65,9 +69,13 @@ bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
     return false;
   }
 
-  *pad_height =
-      ((output_height - 1) * stride + filter_height - input_height) / 2;
-  *pad_width = ((output_width - 1) * stride + filter_width - input_width) / 2;
+  *pad_height = std::max(
+      0, ((output_height - 1) * stride + dilated_filter_height - input_height) /
+             2);
+  *pad_width = std::max(
+      0,
+      ((output_width - 1) * stride + dilated_filter_width - input_width) / 2);
+
   *output_dims =
       MakeDimsForInference(output_depth, output_width, output_height, batch);
   return true;
diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.h b/tensorflow/contrib/lite/kernels/internal/test_util.h
index 26078cef49..cb6d8b147c 100644
--- a/tensorflow/contrib/lite/kernels/internal/test_util.h
+++ b/tensorflow/contrib/lite/kernels/internal/test_util.h
@@ -31,7 +31,8 @@ Dims<4> MakeDimsForInference(int depth, int width, int height, int batch);
 
 // Computes output and padding dimensions.
 bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
-                      int filter_height, int stride, PaddingType padding_type,
+                      int filter_height, int stride, int dilation_width_factor,
+                      int dilation_height_factor, PaddingType padding_type,
                       Dims<4>* output_dims, int* pad_width, int* pad_height);
 
 // Returns a mt19937 random engine.
-- 
GitLab


From d5f4c3aa59aebc88f42a186a30ef6200857194ca Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Mon, 17 Sep 2018 15:46:30 -0700
Subject: [PATCH 0292/1357] Remove tensorflow/contrib/linalg library.  linalg
 remains in core.

PiperOrigin-RevId: 213352573
---
 CODEOWNERS                                    |   1 -
 tensorflow/contrib/BUILD                      |   1 -
 tensorflow/contrib/__init__.py                |   1 -
 tensorflow/contrib/cmake/python_modules.txt   |   3 -
 tensorflow/contrib/cmake/tf_tests.cmake       |   1 -
 tensorflow/contrib/distributions/BUILD        |  54 ++-
 tensorflow/contrib/linalg/BUILD               |  44 --
 tensorflow/contrib/linalg/__init__.py         |  58 ---
 tensorflow/contrib/linalg/python/__init__.py  |  19 -
 .../linear_operator_addition_test.py          | 412 -----------------
 .../python/ops/linear_operator_addition.py    | 432 ------------------
 11 files changed, 26 insertions(+), 1000 deletions(-)
 delete mode 100644 tensorflow/contrib/linalg/BUILD
 delete mode 100644 tensorflow/contrib/linalg/__init__.py
 delete mode 100644 tensorflow/contrib/linalg/python/__init__.py
 delete mode 100644 tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py
 delete mode 100644 tensorflow/contrib/linalg/python/ops/linear_operator_addition.py

diff --git a/CODEOWNERS b/CODEOWNERS
index b612bccffb..94cc865479 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -41,7 +41,6 @@
 /tensorflow/contrib/labeled_tensor/ @shoyer
 /tensorflow/contrib/layers/ @fchollet @martinwicke
 /tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
-/tensorflow/contrib/linalg/ @langmore
 /tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
 /tensorflow/contrib/lookup/ @ysuematsu @andreasst
 /tensorflow/contrib/losses/ @alextp @ispirmustafa
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index d98a24994c..e1af52cd96 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -60,7 +60,6 @@ py_library(
         "//tensorflow/contrib/learn",
         "//tensorflow/contrib/legacy_seq2seq:seq2seq_py",
         "//tensorflow/contrib/libsvm",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/contrib/linear_optimizer:sdca_estimator_py",
         "//tensorflow/contrib/linear_optimizer:sdca_ops_py",
         "//tensorflow/contrib/lite/python:lite",
diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index 9478e42b46..e71b0e0ae3 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -63,7 +63,6 @@ from tensorflow.contrib import labeled_tensor
 from tensorflow.contrib import layers
 from tensorflow.contrib import learn
 from tensorflow.contrib import legacy_seq2seq
-from tensorflow.contrib import linalg
 from tensorflow.contrib import linear_optimizer
 from tensorflow.contrib import lookup
 from tensorflow.contrib import losses
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index fb871acae9..1c432b6e0b 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -273,9 +273,6 @@ tensorflow/contrib/libsvm
 tensorflow/contrib/libsvm/python
 tensorflow/contrib/libsvm/python/kernel_tests
 tensorflow/contrib/libsvm/python/ops
-tensorflow/contrib/linalg
-tensorflow/contrib/linalg/python
-tensorflow/contrib/linalg/python/ops
 tensorflow/contrib/linear_optimizer
 tensorflow/contrib/linear_optimizer/kernels
 tensorflow/contrib/linear_optimizer/kernels/g3doc
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 2c878c1716..ed31351d9e 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -183,7 +183,6 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     file(GLOB_RECURSE tf_test_src_py
       ${tf_test_src_py}
       "${tensorflow_source_dir}/tensorflow/contrib/legacy_seq2seq/*_test.py"
-      "${tensorflow_source_dir}/tensorflow/contrib/linalg/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/graph_editor/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/bayesflow/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/framework/*_test.py"
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 9aadc634da..3ff7da4f89 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -25,7 +25,6 @@ py_library(
                    "`tf.contrib.distributions` to `tfp.distributions`."),
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:clip_ops",
@@ -61,7 +60,6 @@ py_library(
         ":bijectors_py",
         "//tensorflow/contrib/framework:framework_py",
         "//tensorflow/contrib/learn",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:control_flow_ops",
@@ -706,8 +704,8 @@ cuda_py_test(
         ":bijectors_py",
         ":distributions_py",
         "//third_party/py/numpy",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:math_ops",
@@ -722,8 +720,8 @@ cuda_py_test(
     additional_deps = [
         ":distributions_py",
         "//third_party/py/numpy",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/ops/linalg",
     ],
     shard_count = 4,
     tags = ["noasan"],  # times out, http://b/78588814
@@ -739,8 +737,8 @@ cuda_py_test(
     additional_deps = [
         ":distributions_py",
         "//third_party/py/numpy",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:math_ops",
@@ -794,8 +792,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -831,8 +829,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -852,8 +850,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -871,8 +869,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -907,8 +905,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -926,10 +924,10 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform_test",
@@ -945,8 +943,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -964,8 +962,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -983,8 +981,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1002,8 +1000,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1021,8 +1019,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1040,8 +1038,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1075,8 +1073,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1126,8 +1124,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1161,8 +1159,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1180,8 +1178,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1201,8 +1199,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1221,8 +1219,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1240,8 +1238,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1259,8 +1257,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1278,8 +1276,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1297,8 +1295,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
@@ -1316,8 +1314,8 @@ cuda_py_test(
         ":distributions_py",
         "//third_party/py/numpy",
         "@six_archive//:six",
-        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python/ops/linalg",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
diff --git a/tensorflow/contrib/linalg/BUILD b/tensorflow/contrib/linalg/BUILD
deleted file mode 100644
index 78b7970069..0000000000
--- a/tensorflow/contrib/linalg/BUILD
+++ /dev/null
@@ -1,44 +0,0 @@
-# Description:
-#   Contains classes that provide access to common method of a [batch] matrix,
-#   without the need to instantiate the matrix.
-#   This allows for exploitation of structure, as well as a generic interface
-#   suitable for iterative solvers.
-
-licenses(["notice"])  # Apache 2.0
-
-exports_files(["LICENSE"])
-
-package(default_visibility = ["//tensorflow:__subpackages__"])
-
-load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-
-py_library(
-    name = "linalg_py",
-    srcs = ["__init__.py"] + glob(["python/ops/*.py"]),
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:util",
-        "//tensorflow/python/ops/linalg",
-        "@six_archive//:six",
-    ],
-)
-
-cuda_py_test(
-    name = "linear_operator_addition_test",
-    size = "small",
-    srcs = ["python/kernel_tests/linear_operator_addition_test.py"],
-    additional_deps = [
-        ":linalg_py",
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform_test",
-    ],
-)
diff --git a/tensorflow/contrib/linalg/__init__.py b/tensorflow/contrib/linalg/__init__.py
deleted file mode 100644
index cbe4c03e4d..0000000000
--- a/tensorflow/contrib/linalg/__init__.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Linear algebra libraries.
-
-See the[Contrib Linalg](https://tensorflow.org/api_guides/python/contrib.linalg)
-guide.
-
-@@LinearOperator
-@@LinearOperatorBlockDiag
-@@LinearOperatorCirculant
-@@LinearOperatorCirculant2D
-@@LinearOperatorCirculant3D
-@@LinearOperatorDiag
-@@LinearOperatorIdentity
-@@LinearOperatorScaledIdentity
-@@LinearOperatorFullMatrix
-@@LinearOperatorKronecker
-@@LinearOperatorLowerTriangular
-@@LinearOperatorLowRankUpdate
-@@LinearOperatorComposition
-@@add_operators
-
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member
-
-from tensorflow.contrib.linalg.python.ops.linear_operator_addition import *
-from tensorflow.python.ops.linalg.linear_operator import *
-from tensorflow.python.ops.linalg.linear_operator_block_diag import *
-from tensorflow.python.ops.linalg.linear_operator_circulant import *
-from tensorflow.python.ops.linalg.linear_operator_composition import *
-from tensorflow.python.ops.linalg.linear_operator_diag import *
-from tensorflow.python.ops.linalg.linear_operator_full_matrix import *
-from tensorflow.python.ops.linalg.linear_operator_identity import *
-from tensorflow.python.ops.linalg.linear_operator_kronecker import *
-from tensorflow.python.ops.linalg.linear_operator_low_rank_update import *
-from tensorflow.python.ops.linalg.linear_operator_lower_triangular import *
-
-# pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member
-
-from tensorflow.python.util.all_util import remove_undocumented
-
-remove_undocumented(__name__)
diff --git a/tensorflow/contrib/linalg/python/__init__.py b/tensorflow/contrib/linalg/python/__init__.py
deleted file mode 100644
index c5ca3a623f..0000000000
--- a/tensorflow/contrib/linalg/python/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""ops module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py
deleted file mode 100644
index d94ac73654..0000000000
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py
+++ /dev/null
@@ -1,412 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.linalg.python.ops import linear_operator_addition
-from tensorflow.python.framework import random_seed
-from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops.linalg import linalg as linalg_lib
-from tensorflow.python.platform import test
-
-linalg = linalg_lib
-random_seed.set_random_seed(23)
-rng = np.random.RandomState(0)
-
-add_operators = linear_operator_addition.add_operators
-
-
-# pylint: disable=unused-argument
-class _BadAdder(linear_operator_addition._Adder):
-  """Adder that will fail if used."""
-
-  def can_add(self, op1, op2):
-    raise AssertionError("BadAdder.can_add called!")
-
-  def _add(self, op1, op2, operator_name, hints):
-    raise AssertionError("This line should not be reached")
-
-
-# pylint: enable=unused-argument
-
-
-class LinearOperatorAdditionCorrectnessTest(test.TestCase):
-  """Tests correctness of addition with combinations of a few Adders.
-
-  Tests here are done with the _DEFAULT_ADDITION_TIERS, which means
-  add_operators should reduce all operators resulting in one single operator.
-
-  This shows that we are able to correctly combine adders using the tiered
-  system.  All Adders should be tested separately, and there is no need to test
-  every Adder within this class.
-  """
-
-  def test_one_operator_is_returned_unchanged(self):
-    op_a = linalg.LinearOperatorDiag([1., 1.])
-    op_sum = add_operators([op_a])
-    self.assertEqual(1, len(op_sum))
-    self.assertTrue(op_sum[0] is op_a)
-
-  def test_at_least_one_operators_required(self):
-    with self.assertRaisesRegexp(ValueError, "must contain at least one"):
-      add_operators([])
-
-  def test_attempting_to_add_numbers_raises(self):
-    with self.assertRaisesRegexp(TypeError, "contain only LinearOperator"):
-      add_operators([1, 2])
-
-  def test_two_diag_operators(self):
-    op_a = linalg.LinearOperatorDiag(
-        [1., 1.], is_positive_definite=True, name="A")
-    op_b = linalg.LinearOperatorDiag(
-        [2., 2.], is_positive_definite=True, name="B")
-    with self.cached_session():
-      op_sum = add_operators([op_a, op_b])
-      self.assertEqual(1, len(op_sum))
-      op = op_sum[0]
-      self.assertTrue(isinstance(op, linalg_lib.LinearOperatorDiag))
-      self.assertAllClose([[3., 0.], [0., 3.]], op.to_dense().eval())
-      # Adding positive definite operators produces positive def.
-      self.assertTrue(op.is_positive_definite)
-      # Real diagonal ==> self-adjoint.
-      self.assertTrue(op.is_self_adjoint)
-      # Positive definite ==> non-singular
-      self.assertTrue(op.is_non_singular)
-      # Enforce particular name for this simple case
-      self.assertEqual("Add/B__A/", op.name)
-
-  def test_three_diag_operators(self):
-    op1 = linalg.LinearOperatorDiag(
-        [1., 1.], is_positive_definite=True, name="op1")
-    op2 = linalg.LinearOperatorDiag(
-        [2., 2.], is_positive_definite=True, name="op2")
-    op3 = linalg.LinearOperatorDiag(
-        [3., 3.], is_positive_definite=True, name="op3")
-    with self.cached_session():
-      op_sum = add_operators([op1, op2, op3])
-      self.assertEqual(1, len(op_sum))
-      op = op_sum[0]
-      self.assertTrue(isinstance(op, linalg_lib.LinearOperatorDiag))
-      self.assertAllClose([[6., 0.], [0., 6.]], op.to_dense().eval())
-      # Adding positive definite operators produces positive def.
-      self.assertTrue(op.is_positive_definite)
-      # Real diagonal ==> self-adjoint.
-      self.assertTrue(op.is_self_adjoint)
-      # Positive definite ==> non-singular
-      self.assertTrue(op.is_non_singular)
-
-  def test_diag_tril_diag(self):
-    op1 = linalg.LinearOperatorDiag(
-        [1., 1.], is_non_singular=True, name="diag_a")
-    op2 = linalg.LinearOperatorLowerTriangular(
-        [[2., 0.], [0., 2.]],
-        is_self_adjoint=True,
-        is_non_singular=True,
-        name="tril")
-    op3 = linalg.LinearOperatorDiag(
-        [3., 3.], is_non_singular=True, name="diag_b")
-    with self.cached_session():
-      op_sum = add_operators([op1, op2, op3])
-      self.assertEqual(1, len(op_sum))
-      op = op_sum[0]
-      self.assertTrue(isinstance(op, linalg_lib.LinearOperatorLowerTriangular))
-      self.assertAllClose([[6., 0.], [0., 6.]], op.to_dense().eval())
-
-      # The diag operators will be self-adjoint (because real and diagonal).
-      # The TriL operator has the self-adjoint hint set.
-      self.assertTrue(op.is_self_adjoint)
-
-      # Even though op1/2/3 are non-singular, this does not imply op is.
-      # Since no custom hint was provided, we default to None (unknown).
-      self.assertEqual(None, op.is_non_singular)
-
-  def test_matrix_diag_tril_diag_uses_custom_name(self):
-    op0 = linalg.LinearOperatorFullMatrix(
-        [[-1., -1.], [-1., -1.]], name="matrix")
-    op1 = linalg.LinearOperatorDiag([1., 1.], name="diag_a")
-    op2 = linalg.LinearOperatorLowerTriangular(
-        [[2., 0.], [1.5, 2.]], name="tril")
-    op3 = linalg.LinearOperatorDiag([3., 3.], name="diag_b")
-    with self.cached_session():
-      op_sum = add_operators([op0, op1, op2, op3], operator_name="my_operator")
-      self.assertEqual(1, len(op_sum))
-      op = op_sum[0]
-      self.assertTrue(isinstance(op, linalg_lib.LinearOperatorFullMatrix))
-      self.assertAllClose([[5., -1.], [0.5, 5.]], op.to_dense().eval())
-      self.assertEqual("my_operator", op.name)
-
-  def test_incompatible_domain_dimensions_raises(self):
-    op1 = linalg.LinearOperatorFullMatrix(rng.rand(2, 3))
-    op2 = linalg.LinearOperatorDiag(rng.rand(2, 4))
-    with self.assertRaisesRegexp(ValueError, "must.*same domain dimension"):
-      add_operators([op1, op2])
-
-  def test_incompatible_range_dimensions_raises(self):
-    op1 = linalg.LinearOperatorFullMatrix(rng.rand(2, 3))
-    op2 = linalg.LinearOperatorDiag(rng.rand(3, 3))
-    with self.assertRaisesRegexp(ValueError, "must.*same range dimension"):
-      add_operators([op1, op2])
-
-  def test_non_broadcastable_batch_shape_raises(self):
-    op1 = linalg.LinearOperatorFullMatrix(rng.rand(2, 3, 3))
-    op2 = linalg.LinearOperatorDiag(rng.rand(4, 3, 3))
-    with self.assertRaisesRegexp(ValueError, "Incompatible shapes"):
-      add_operators([op1, op2])
-
-
-class LinearOperatorOrderOfAdditionTest(test.TestCase):
-  """Test that the order of addition is done as specified by tiers."""
-
-  def test_tier_0_additions_done_in_tier_0(self):
-    diag1 = linalg.LinearOperatorDiag([1.])
-    diag2 = linalg.LinearOperatorDiag([1.])
-    diag3 = linalg.LinearOperatorDiag([1.])
-    addition_tiers = [
-        [linear_operator_addition._AddAndReturnDiag()],
-        [_BadAdder()],
-    ]
-    # Should not raise since all were added in tier 0, and tier 1 (with the
-    # _BadAdder) was never reached.
-    op_sum = add_operators([diag1, diag2, diag3], addition_tiers=addition_tiers)
-    self.assertEqual(1, len(op_sum))
-    self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorDiag))
-
-  def test_tier_1_additions_done_by_tier_1(self):
-    diag1 = linalg.LinearOperatorDiag([1.])
-    diag2 = linalg.LinearOperatorDiag([1.])
-    tril = linalg.LinearOperatorLowerTriangular([[1.]])
-    addition_tiers = [
-        [linear_operator_addition._AddAndReturnDiag()],
-        [linear_operator_addition._AddAndReturnTriL()],
-        [_BadAdder()],
-    ]
-    # Should not raise since all were added by tier 1, and the
-    # _BadAdder) was never reached.
-    op_sum = add_operators([diag1, diag2, tril], addition_tiers=addition_tiers)
-    self.assertEqual(1, len(op_sum))
-    self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorLowerTriangular))
-
-  def test_tier_1_additions_done_by_tier_1_with_order_flipped(self):
-    diag1 = linalg.LinearOperatorDiag([1.])
-    diag2 = linalg.LinearOperatorDiag([1.])
-    tril = linalg.LinearOperatorLowerTriangular([[1.]])
-    addition_tiers = [
-        [linear_operator_addition._AddAndReturnTriL()],
-        [linear_operator_addition._AddAndReturnDiag()],
-        [_BadAdder()],
-    ]
-    # Tier 0 could convert to TriL, and this converted everything to TriL,
-    # including the Diags.
-    # Tier 1 was never used.
-    # Tier 2 was never used (therefore, _BadAdder didn't raise).
-    op_sum = add_operators([diag1, diag2, tril], addition_tiers=addition_tiers)
-    self.assertEqual(1, len(op_sum))
-    self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorLowerTriangular))
-
-  def test_cannot_add_everything_so_return_more_than_one_operator(self):
-    diag1 = linalg.LinearOperatorDiag([1.])
-    diag2 = linalg.LinearOperatorDiag([2.])
-    tril5 = linalg.LinearOperatorLowerTriangular([[5.]])
-    addition_tiers = [
-        [linear_operator_addition._AddAndReturnDiag()],
-    ]
-    # Tier 0 (the only tier) can only convert to Diag, so it combines the two
-    # diags, but the TriL is unchanged.
-    # Result should contain two operators, one Diag, one TriL.
-    op_sum = add_operators([diag1, diag2, tril5], addition_tiers=addition_tiers)
-    self.assertEqual(2, len(op_sum))
-    found_diag = False
-    found_tril = False
-    with self.cached_session():
-      for op in op_sum:
-        if isinstance(op, linalg.LinearOperatorDiag):
-          found_diag = True
-          self.assertAllClose([[3.]], op.to_dense().eval())
-        if isinstance(op, linalg.LinearOperatorLowerTriangular):
-          found_tril = True
-          self.assertAllClose([[5.]], op.to_dense().eval())
-      self.assertTrue(found_diag and found_tril)
-
-  def test_intermediate_tier_is_not_skipped(self):
-    diag1 = linalg.LinearOperatorDiag([1.])
-    diag2 = linalg.LinearOperatorDiag([1.])
-    tril = linalg.LinearOperatorLowerTriangular([[1.]])
-    addition_tiers = [
-        [linear_operator_addition._AddAndReturnDiag()],
-        [_BadAdder()],
-        [linear_operator_addition._AddAndReturnTriL()],
-    ]
-    # tril cannot be added in tier 0, and the intermediate tier 1 with the
-    # BadAdder will catch it and raise.
-    with self.assertRaisesRegexp(AssertionError, "BadAdder.can_add called"):
-      add_operators([diag1, diag2, tril], addition_tiers=addition_tiers)
-
-
-class AddAndReturnScaledIdentityTest(test.TestCase):
-
-  def setUp(self):
-    self._adder = linear_operator_addition._AddAndReturnScaledIdentity()
-
-  def test_identity_plus_identity(self):
-    id1 = linalg.LinearOperatorIdentity(num_rows=2)
-    id2 = linalg.LinearOperatorIdentity(num_rows=2, batch_shape=[3])
-    hints = linear_operator_addition._Hints(
-        is_positive_definite=True, is_non_singular=True)
-
-    self.assertTrue(self._adder.can_add(id1, id2))
-    operator = self._adder.add(id1, id2, "my_operator", hints)
-    self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity))
-
-    with self.cached_session():
-      self.assertAllClose(2 *
-                          linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
-                          operator.to_dense().eval())
-    self.assertTrue(operator.is_positive_definite)
-    self.assertTrue(operator.is_non_singular)
-    self.assertEqual("my_operator", operator.name)
-
-  def test_identity_plus_scaled_identity(self):
-    id1 = linalg.LinearOperatorIdentity(num_rows=2, batch_shape=[3])
-    id2 = linalg.LinearOperatorScaledIdentity(num_rows=2, multiplier=2.2)
-    hints = linear_operator_addition._Hints(
-        is_positive_definite=True, is_non_singular=True)
-
-    self.assertTrue(self._adder.can_add(id1, id2))
-    operator = self._adder.add(id1, id2, "my_operator", hints)
-    self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity))
-
-    with self.cached_session():
-      self.assertAllClose(3.2 *
-                          linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
-                          operator.to_dense().eval())
-    self.assertTrue(operator.is_positive_definite)
-    self.assertTrue(operator.is_non_singular)
-    self.assertEqual("my_operator", operator.name)
-
-  def test_scaled_identity_plus_scaled_identity(self):
-    id1 = linalg.LinearOperatorScaledIdentity(
-        num_rows=2, multiplier=[2.2, 2.2, 2.2])
-    id2 = linalg.LinearOperatorScaledIdentity(num_rows=2, multiplier=-1.0)
-    hints = linear_operator_addition._Hints(
-        is_positive_definite=True, is_non_singular=True)
-
-    self.assertTrue(self._adder.can_add(id1, id2))
-    operator = self._adder.add(id1, id2, "my_operator", hints)
-    self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity))
-
-    with self.cached_session():
-      self.assertAllClose(1.2 *
-                          linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
-                          operator.to_dense().eval())
-    self.assertTrue(operator.is_positive_definite)
-    self.assertTrue(operator.is_non_singular)
-    self.assertEqual("my_operator", operator.name)
-
-
-class AddAndReturnDiagTest(test.TestCase):
-
-  def setUp(self):
-    self._adder = linear_operator_addition._AddAndReturnDiag()
-
-  def test_identity_plus_identity_returns_diag(self):
-    id1 = linalg.LinearOperatorIdentity(num_rows=2)
-    id2 = linalg.LinearOperatorIdentity(num_rows=2, batch_shape=[3])
-    hints = linear_operator_addition._Hints(
-        is_positive_definite=True, is_non_singular=True)
-
-    self.assertTrue(self._adder.can_add(id1, id2))
-    operator = self._adder.add(id1, id2, "my_operator", hints)
-    self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag))
-
-    with self.cached_session():
-      self.assertAllClose(2 *
-                          linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
-                          operator.to_dense().eval())
-    self.assertTrue(operator.is_positive_definite)
-    self.assertTrue(operator.is_non_singular)
-    self.assertEqual("my_operator", operator.name)
-
-  def test_diag_plus_diag(self):
-    diag1 = rng.rand(2, 3, 4)
-    diag2 = rng.rand(4)
-    op1 = linalg.LinearOperatorDiag(diag1)
-    op2 = linalg.LinearOperatorDiag(diag2)
-    hints = linear_operator_addition._Hints(
-        is_positive_definite=True, is_non_singular=True)
-
-    self.assertTrue(self._adder.can_add(op1, op2))
-    operator = self._adder.add(op1, op2, "my_operator", hints)
-    self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag))
-
-    with self.cached_session():
-      self.assertAllClose(
-          linalg.LinearOperatorDiag(diag1 + diag2).to_dense().eval(),
-          operator.to_dense().eval())
-    self.assertTrue(operator.is_positive_definite)
-    self.assertTrue(operator.is_non_singular)
-    self.assertEqual("my_operator", operator.name)
-
-
-class AddAndReturnTriLTest(test.TestCase):
-
-  def setUp(self):
-    self._adder = linear_operator_addition._AddAndReturnTriL()
-
-  def test_diag_plus_tril(self):
-    diag = linalg.LinearOperatorDiag([1., 2.])
-    tril = linalg.LinearOperatorLowerTriangular([[10., 0.], [30., 0.]])
-    hints = linear_operator_addition._Hints(
-        is_positive_definite=True, is_non_singular=True)
-
-    self.assertTrue(self._adder.can_add(diag, diag))
-    self.assertTrue(self._adder.can_add(diag, tril))
-    operator = self._adder.add(diag, tril, "my_operator", hints)
-    self.assertTrue(isinstance(operator, linalg.LinearOperatorLowerTriangular))
-
-    with self.cached_session():
-      self.assertAllClose([[11., 0.], [30., 2.]], operator.to_dense().eval())
-    self.assertTrue(operator.is_positive_definite)
-    self.assertTrue(operator.is_non_singular)
-    self.assertEqual("my_operator", operator.name)
-
-
-class AddAndReturnMatrixTest(test.TestCase):
-
-  def setUp(self):
-    self._adder = linear_operator_addition._AddAndReturnMatrix()
-
-  def test_diag_plus_diag(self):
-    diag1 = linalg.LinearOperatorDiag([1., 2.])
-    diag2 = linalg.LinearOperatorDiag([-1., 3.])
-    hints = linear_operator_addition._Hints(
-        is_positive_definite=False, is_non_singular=False)
-
-    self.assertTrue(self._adder.can_add(diag1, diag2))
-    operator = self._adder.add(diag1, diag2, "my_operator", hints)
-    self.assertTrue(isinstance(operator, linalg.LinearOperatorFullMatrix))
-
-    with self.cached_session():
-      self.assertAllClose([[0., 0.], [0., 5.]], operator.to_dense().eval())
-    self.assertFalse(operator.is_positive_definite)
-    self.assertFalse(operator.is_non_singular)
-    self.assertEqual("my_operator", operator.name)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py b/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py
deleted file mode 100644
index 86130a2c07..0000000000
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py
+++ /dev/null
@@ -1,432 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Add one or more `LinearOperators` efficiently."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import abc
-
-import six
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops.linalg import linear_operator
-from tensorflow.python.ops.linalg import linear_operator_diag
-from tensorflow.python.ops.linalg import linear_operator_full_matrix
-from tensorflow.python.ops.linalg import linear_operator_identity
-from tensorflow.python.ops.linalg import linear_operator_lower_triangular
-
-__all__ = []
-
-
-def add_operators(operators,
-                  operator_name=None,
-                  addition_tiers=None,
-                  name=None):
-  """Efficiently add one or more linear operators.
-
-  Given operators `[A1, A2,...]`, this `Op` returns a possibly shorter list of
-  operators `[B1, B2,...]` such that
-
-  ```sum_k Ak.matmul(x) = sum_k Bk.matmul(x).```
-
-  The operators `Bk` result by adding some of the `Ak`, as allowed by
-  `addition_tiers`.
-
-  Example of efficient adding of diagonal operators.
-
-  ```python
-  A1 = LinearOperatorDiag(diag=[1., 1.], name="A1")
-  A2 = LinearOperatorDiag(diag=[2., 2.], name="A2")
-
-  # Use two tiers, the first contains an Adder that returns Diag.  Since both
-  # A1 and A2 are Diag, they can use this Adder.  The second tier will not be
-  # used.
-  addition_tiers = [
-      [_AddAndReturnDiag()],
-      [_AddAndReturnMatrix()]]
-  B_list = add_operators([A1, A2], addition_tiers=addition_tiers)
-
-  len(B_list)
-  ==> 1
-
-  B_list[0].__class__.__name__
-  ==> 'LinearOperatorDiag'
-
-  B_list[0].to_dense()
-  ==> [[3., 0.],
-       [0., 3.]]
-
-  B_list[0].name
-  ==> 'Add/A1__A2/'
-  ```
-
-  Args:
-    operators:  Iterable of `LinearOperator` objects with same `dtype`, domain
-      and range dimensions, and broadcastable batch shapes.
-    operator_name:  String name for returned `LinearOperator`.  Defaults to
-      concatenation of "Add/A__B/" that indicates the order of addition steps.
-    addition_tiers:  List tiers, like `[tier_0, tier_1, ...]`, where `tier_i`
-      is a list of `Adder` objects.  This function attempts to do all additions
-      in tier `i` before trying tier `i + 1`.
-    name:  A name for this `Op`.  Defaults to `add_operators`.
-
-  Returns:
-    Subclass of `LinearOperator`.  Class and order of addition may change as new
-      (and better) addition strategies emerge.
-
-  Raises:
-    ValueError:  If `operators` argument is empty.
-    ValueError:  If shapes are incompatible.
-  """
-  # Default setting
-  if addition_tiers is None:
-    addition_tiers = _DEFAULT_ADDITION_TIERS
-
-  # Argument checking.
-  check_ops.assert_proper_iterable(operators)
-  operators = list(reversed(operators))
-  if len(operators) < 1:
-    raise ValueError(
-        "Argument 'operators' must contain at least one operator.  "
-        "Found: %s" % operators)
-  if not all(
-      isinstance(op, linear_operator.LinearOperator) for op in operators):
-    raise TypeError(
-        "Argument 'operators' must contain only LinearOperator instances.  "
-        "Found: %s" % operators)
-  _static_check_for_same_dimensions(operators)
-  _static_check_for_broadcastable_batch_shape(operators)
-
-  graph_parents = []
-  for operator in operators:
-    graph_parents.extend(operator.graph_parents)
-
-  with ops.name_scope(name or "add_operators", values=graph_parents):
-
-    # Additions done in one of the tiers.  Try tier 0, 1,...
-    ops_to_try_at_next_tier = list(operators)
-    for tier in addition_tiers:
-      ops_to_try_at_this_tier = ops_to_try_at_next_tier
-      ops_to_try_at_next_tier = []
-      while ops_to_try_at_this_tier:
-        op1 = ops_to_try_at_this_tier.pop()
-        op2, adder = _pop_a_match_at_tier(op1, ops_to_try_at_this_tier, tier)
-        if op2 is not None:
-          # Will try to add the result of this again at this same tier.
-          new_operator = adder.add(op1, op2, operator_name)
-          ops_to_try_at_this_tier.append(new_operator)
-        else:
-          ops_to_try_at_next_tier.append(op1)
-
-    return ops_to_try_at_next_tier
-
-
-def _pop_a_match_at_tier(op1, operator_list, tier):
-  # Search from the back of list to the front in order to create nice default
-  # order of operations.
-  for i in range(1, len(operator_list) + 1):
-    op2 = operator_list[-i]
-    for adder in tier:
-      if adder.can_add(op1, op2):
-        return operator_list.pop(-i), adder
-  return None, None
-
-
-def _infer_hints_allowing_override(op1, op2, hints):
-  """Infer hints from op1 and op2.  hints argument is an override.
-
-  Args:
-    op1:  LinearOperator
-    op2:  LinearOperator
-    hints:  _Hints object holding "is_X" boolean hints to use for returned
-      operator.
-      If some hint is None, try to set using op1 and op2.  If the
-      hint is provided, ignore op1 and op2 hints.  This allows an override
-      of previous hints, but does not allow forbidden hints (e.g. you still
-      cannot say a real diagonal operator is not self-adjoint.
-
-  Returns:
-    _Hints object.
-  """
-  hints = hints or _Hints()
-  # If A, B are self-adjoint, then so is A + B.
-  if hints.is_self_adjoint is None:
-    is_self_adjoint = op1.is_self_adjoint and op2.is_self_adjoint
-  else:
-    is_self_adjoint = hints.is_self_adjoint
-
-  # If A, B are positive definite, then so is A + B.
-  if hints.is_positive_definite is None:
-    is_positive_definite = op1.is_positive_definite and op2.is_positive_definite
-  else:
-    is_positive_definite = hints.is_positive_definite
-
-  # A positive definite operator is always non-singular.
-  if is_positive_definite and hints.is_positive_definite is None:
-    is_non_singular = True
-  else:
-    is_non_singular = hints.is_non_singular
-
-  return _Hints(
-      is_non_singular=is_non_singular,
-      is_self_adjoint=is_self_adjoint,
-      is_positive_definite=is_positive_definite)
-
-
-def _static_check_for_same_dimensions(operators):
-  """ValueError if operators determined to have different dimensions."""
-  if len(operators) < 2:
-    return
-
-  domain_dimensions = [(op.name, op.domain_dimension.value) for op in operators
-                       if op.domain_dimension.value is not None]
-  if len(set(value for name, value in domain_dimensions)) > 1:
-    raise ValueError("Operators must have the same domain dimension. Found: %s"
-                     % domain_dimensions)
-
-  range_dimensions = [(op.name, op.range_dimension.value) for op in operators
-                      if op.range_dimension.value is not None]
-  if len(set(value for name, value in range_dimensions)) > 1:
-    raise ValueError("Operators must have the same range dimension. Found: %s" %
-                     range_dimensions)
-
-
-def _static_check_for_broadcastable_batch_shape(operators):
-  """ValueError if operators determined to have non-broadcastable shapes."""
-  if len(operators) < 2:
-    return
-
-  # This will fail if they cannot be broadcast together.
-  batch_shape = operators[0].batch_shape
-  for op in operators[1:]:
-    batch_shape = array_ops.broadcast_static_shape(batch_shape, op.batch_shape)
-
-
-class _Hints(object):
-  """Holds 'is_X' flags that every LinearOperator is initialized with."""
-
-  def __init__(self,
-               is_non_singular=None,
-               is_positive_definite=None,
-               is_self_adjoint=None):
-    self.is_non_singular = is_non_singular
-    self.is_positive_definite = is_positive_definite
-    self.is_self_adjoint = is_self_adjoint
-
-
-################################################################################
-# Classes to add two linear operators.
-################################################################################
-
-
-@six.add_metaclass(abc.ABCMeta)
-class _Adder(object):
-  """Abstract base class to add two operators.
-
-  Each `Adder` acts independently, adding everything it can, paying no attention
-  as to whether another `Adder` could have done the addition more efficiently.
-  """
-
-  @property
-  def name(self):
-    return self.__class__.__name__
-
-  @abc.abstractmethod
-  def can_add(self, op1, op2):
-    """Returns `True` if this `Adder` can add `op1` and `op2`.  Else `False`."""
-    pass
-
-  @abc.abstractmethod
-  def _add(self, op1, op2, operator_name, hints):
-    # Derived classes can assume op1 and op2 have been validated, e.g. they have
-    # the same dtype, and their domain/range dimensions match.
-    pass
-
-  def add(self, op1, op2, operator_name, hints=None):
-    """Return new `LinearOperator` acting like `op1 + op2`.
-
-    Args:
-      op1:  `LinearOperator`
-      op2:  `LinearOperator`, with `shape` and `dtype` such that adding to
-        `op1` is allowed.
-      operator_name:  `String` name to give to returned `LinearOperator`
-      hints:  `_Hints` object.  Returned `LinearOperator` will be created with
-        these hints.
-
-    Returns:
-      `LinearOperator`
-    """
-    updated_hints = _infer_hints_allowing_override(op1, op2, hints)
-
-    if operator_name is None:
-      operator_name = "Add/" + op1.name + "__" + op2.name + "/"
-
-    values = op1.graph_parents + op2.graph_parents
-    scope_name = self.name
-    if scope_name.startswith("_"):
-      scope_name = scope_name[1:]
-    with ops.name_scope(scope_name, values=values):
-      return self._add(op1, op2, operator_name, updated_hints)
-
-
-class _AddAndReturnScaledIdentity(_Adder):
-  """Handles additions resulting in an Identity family member.
-
-  The Identity (`LinearOperatorScaledIdentity`, `LinearOperatorIdentity`) family
-  is closed under addition.  This `Adder` respects that, and returns an Identity
-  """
-
-  def can_add(self, op1, op2):
-    types = {_type(op1), _type(op2)}
-    return not types.difference(_IDENTITY_FAMILY)
-
-  def _add(self, op1, op2, operator_name, hints):
-    # Will build a LinearOperatorScaledIdentity.
-
-    if _type(op1) == _SCALED_IDENTITY:
-      multiplier_1 = op1.multiplier
-    else:
-      multiplier_1 = array_ops.ones(op1.batch_shape_tensor(), dtype=op1.dtype)
-
-    if _type(op2) == _SCALED_IDENTITY:
-      multiplier_2 = op2.multiplier
-    else:
-      multiplier_2 = array_ops.ones(op2.batch_shape_tensor(), dtype=op2.dtype)
-
-    return linear_operator_identity.LinearOperatorScaledIdentity(
-        num_rows=op1.range_dimension_tensor(),
-        multiplier=multiplier_1 + multiplier_2,
-        is_non_singular=hints.is_non_singular,
-        is_self_adjoint=hints.is_self_adjoint,
-        is_positive_definite=hints.is_positive_definite,
-        name=operator_name)
-
-
-class _AddAndReturnDiag(_Adder):
-  """Handles additions resulting in a Diag operator."""
-
-  def can_add(self, op1, op2):
-    types = {_type(op1), _type(op2)}
-    return not types.difference(_DIAG_LIKE)
-
-  def _add(self, op1, op2, operator_name, hints):
-    return linear_operator_diag.LinearOperatorDiag(
-        diag=op1.diag_part() + op2.diag_part(),
-        is_non_singular=hints.is_non_singular,
-        is_self_adjoint=hints.is_self_adjoint,
-        is_positive_definite=hints.is_positive_definite,
-        name=operator_name)
-
-
-class _AddAndReturnTriL(_Adder):
-  """Handles additions resulting in a TriL operator."""
-
-  def can_add(self, op1, op2):
-    types = {_type(op1), _type(op2)}
-    return not types.difference(_DIAG_LIKE.union({_TRIL}))
-
-  def _add(self, op1, op2, operator_name, hints):
-    if _type(op1) in _EFFICIENT_ADD_TO_TENSOR:
-      op_add_to_tensor, op_other = op1, op2
-    else:
-      op_add_to_tensor, op_other = op2, op1
-
-    return linear_operator_lower_triangular.LinearOperatorLowerTriangular(
-        tril=op_add_to_tensor.add_to_tensor(op_other.to_dense()),
-        is_non_singular=hints.is_non_singular,
-        is_self_adjoint=hints.is_self_adjoint,
-        is_positive_definite=hints.is_positive_definite,
-        name=operator_name)
-
-
-class _AddAndReturnMatrix(_Adder):
-  """"Handles additions resulting in a `LinearOperatorFullMatrix`."""
-
-  def can_add(self, op1, op2):  # pylint: disable=unused-argument
-    return isinstance(op1, linear_operator.LinearOperator) and isinstance(
-        op2, linear_operator.LinearOperator)
-
-  def _add(self, op1, op2, operator_name, hints):
-    if _type(op1) in _EFFICIENT_ADD_TO_TENSOR:
-      op_add_to_tensor, op_other = op1, op2
-    else:
-      op_add_to_tensor, op_other = op2, op1
-    return linear_operator_full_matrix.LinearOperatorFullMatrix(
-        matrix=op_add_to_tensor.add_to_tensor(op_other.to_dense()),
-        is_non_singular=hints.is_non_singular,
-        is_self_adjoint=hints.is_self_adjoint,
-        is_positive_definite=hints.is_positive_definite,
-        name=operator_name)
-
-
-################################################################################
-# Constants designating types of LinearOperators
-################################################################################
-
-# Type name constants for LinearOperator classes.
-_IDENTITY = "identity"
-_SCALED_IDENTITY = "scaled_identity"
-_DIAG = "diag"
-_TRIL = "tril"
-_MATRIX = "matrix"
-
-# Groups of operators.
-_DIAG_LIKE = {_DIAG, _IDENTITY, _SCALED_IDENTITY}
-_IDENTITY_FAMILY = {_IDENTITY, _SCALED_IDENTITY}
-# operators with an efficient .add_to_tensor() method.
-_EFFICIENT_ADD_TO_TENSOR = _DIAG_LIKE
-
-
-def _type(operator):
-  """Returns the type name constant (e.g. _TRIL) for operator."""
-  if isinstance(operator, linear_operator_diag.LinearOperatorDiag):
-    return _DIAG
-  if isinstance(operator,
-                linear_operator_lower_triangular.LinearOperatorLowerTriangular):
-    return _TRIL
-  if isinstance(operator, linear_operator_full_matrix.LinearOperatorFullMatrix):
-    return _MATRIX
-  if isinstance(operator, linear_operator_identity.LinearOperatorIdentity):
-    return _IDENTITY
-  if isinstance(operator,
-                linear_operator_identity.LinearOperatorScaledIdentity):
-    return _SCALED_IDENTITY
-  raise TypeError("Operator type unknown: %s" % operator)
-
-
-################################################################################
-# Addition tiers:
-# We attempt to use Adders in tier K before K+1.
-#
-# Organize tiers to
-#   (i) reduce O(..) complexity of forming final operator, and
-#   (ii) produce the "most efficient" final operator.
-# Dev notes:
-#  * Results of addition at tier K will be added at tier K or higher.
-#  * Tiers may change, and we warn the user that it may change.
-################################################################################
-
-# Note that the final tier, _AddAndReturnMatrix, will convert everything to a
-# dense matrix.  So it is sometimes very inefficient.
-_DEFAULT_ADDITION_TIERS = [
-    [_AddAndReturnScaledIdentity()],
-    [_AddAndReturnDiag()],
-    [_AddAndReturnTriL()],
-    [_AddAndReturnMatrix()],
-]
-- 
GitLab


From 8ea4ea95ad1e85948019daee7a9e70e97082f6d0 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 17 Sep 2018 15:50:12 -0700
Subject: [PATCH 0293/1357] Fix GraphConstructor and import_graph_def bug with
 variadic ops.

Prior to this change,
GraphConstructor::PopulateMissingUnusedInputMapKey() didn't correctly
compute the number of outputs for ops with variadic outputs. This
meant that missing_unused_input_map_keys could contain spurious
entries for unused variadic outputs, which could trigger a ValueError
in import_graph_def.

This also adds a new util method in node_def_util.h, NumOutputsForNode().

PiperOrigin-RevId: 213353158
---
 tensorflow/core/framework/node_def_util.cc      | 8 ++++++++
 tensorflow/core/framework/node_def_util.h       | 4 ++++
 tensorflow/core/graph/graph_constructor.cc      | 4 +++-
 tensorflow/core/graph/graph_constructor_test.cc | 9 ++++++++-
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index bacc1d72c4..42ec315a32 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc
@@ -403,6 +403,14 @@ Status InOutTypesForNode(const NodeDef& node_def, const OpDef& op_def,
   return OutputTypesForNode(node_def, op_def, outputs);
 }
 
+Status NumOutputsForNode(const NodeDef& node_def, const OpDef& op_def,
+                         int* num_outputs) {
+  DataTypeVector outputs;
+  TF_RETURN_IF_ERROR(OutputTypesForNode(node_def, op_def, &outputs));
+  *num_outputs = outputs.size();
+  return Status::OK();
+}
+
 Status ValidateNodeDef(const NodeDef& node_def, const OpDef& op_def) {
   if (node_def.op() != op_def.name()) {
     return errors::InvalidArgument("NodeDef op '", node_def.op(),
diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index 499034cab2..7528d3d306 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -261,6 +261,10 @@ Status OutputTypesForNode(const NodeDef& node_def, const OpDef& op_def,
 // REQUIRES: ValidateOpDef(op_def).ok()
 Status InOutTypesForNode(const NodeDef& node_def, const OpDef& op_def,
                          DataTypeVector* inputs, DataTypeVector* outputs);
+// Computes the number of outputs for a specific node.
+// REQUIRES: ValidateOpDef(op_def).ok()
+Status NumOutputsForNode(const NodeDef& node_def, const OpDef& op_def,
+                         int* num_outputs);
 
 // Validates that the NodeDef:
 // * Defines all expected attrs from the OpDef.
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 7399613f6a..eeb5c14eaa 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -1162,7 +1162,9 @@ Status GraphConstructor::PopulateMissingUnusedInputMapKeys() {
     const NodeDef* node_def = node_defs_[pair->second.gdef_index];
     const OpDef* op_def;
     TF_RETURN_IF_ERROR(g_->op_registry()->LookUpOpDef(node_def->op(), &op_def));
-    if (key.second >= op_def->output_arg_size()) {
+    int num_outputs;
+    TF_RETURN_IF_ERROR(NumOutputsForNode(*node_def, *op_def, &num_outputs));
+    if (key.second >= num_outputs) {
       // key's index out of bounds
       missing_unused_input_map_keys_->push_back(key);
     }
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 73142ebde7..3eef6bd2bd 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -199,6 +199,10 @@ REGISTER_OP("TestOneInputOneOutput")
     .Output("y: T")
     .Attr("T: {float, int64}")
     .SetShapeFn(shape_inference::UnchangedShape);
+REGISTER_OP("TestVariadicOutput")
+    .Output("outputs: N * int32")
+    .Attr("N: int >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
 REGISTER_OP("TestDefaultAttr")
     .Attr("default_int: int=31415")
     .SetShapeFn(shape_inference::NoOutputs);
@@ -1463,12 +1467,15 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapMissingUnusedKeys) {
   opts.input_map[TensorId("DNE", 0)] = TensorId("input", 0);
   // Unused but not missing
   opts.input_map[TensorId("t1", 0)] = TensorId("W1", 0);
+  // Unused but not missing
+  opts.input_map[TensorId("variadic", 4)] = TensorId("input", 0);
   ExpectOK(
       R"EOF(
       node { name: 'W2' op: 'TestParams' }
       node { name: 'new_input' op: 'TestInput' input: [ '^W2' ] }
       node { name: 't1' op: 'TestMul' input: [ 'new_input:0', 'new_input:1' ] }
-      node { name: 't2' op: 'TestMul' input: [ 't1:0', 't1:0' ] }
+      node { name: 'variadic' op: 'TestVariadicOutput'
+             attr { key: "N" value { i: 5 } } }
       )EOF",
       opts, &refiner, &results);
 
-- 
GitLab


From f5116dd366a5bb1d679e1682c13b8fa3c4830a84 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 15:56:43 -0700
Subject: [PATCH 0294/1357] Fixing the documentation of the
 parse_sequence_example function.

PiperOrigin-RevId: 213354240
---
 tensorflow/python/ops/parsing_ops.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index bb8da3162a..b3e03a0135 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -981,9 +981,10 @@ def parse_sequence_example(serialized,
     name: A name for this operation (optional).
 
   Returns:
-    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
-    The first dict contains the context key/values.
-    The second dict contains the feature_list key/values.
+    A tuple of three `dict`s, each mapping keys to `Tensor`s and
+    `SparseTensor`s. The first dict contains the context key/values,
+    the second dict contains the feature_list key/values, and the final dict
+    contains the lengths of any dense feature_list features.
 
   Raises:
     ValueError: if any feature is invalid.
-- 
GitLab


From 8ef1ece7d0ecdec633a22a8100fdae05cfbacb3e Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 17 Sep 2018 16:31:24 -0700
Subject: [PATCH 0295/1357] [tf.data] Introducing `tf.data.Dataset.window(size,
 shift, stride, drop_remainder)`, which can be used for combining elements of
 input dataset into "windows". A window is itself a finite dataset and, among
 other things, can be used for generalized batching (see
 https://github.com/tensorflow/community/pull/5 for details).

PiperOrigin-RevId: 213360134
---
 .../kernel_tests/window_dataset_op_test.py    |   7 +-
 .../contrib/data/python/ops/grouping.py       |  51 +--
 tensorflow/contrib/data/python/ops/sliding.py |   4 +
 .../base_api/api_def_WindowDataset.pbtxt      |  23 +-
 .../core/kernels/data/window_dataset_op.cc    | 215 +++++++++++--
 .../core/ops/compat/ops_history.v1.pbtxt      |  14 +-
 tensorflow/core/ops/dataset_ops.cc            |  10 +-
 tensorflow/python/data/kernel_tests/BUILD     |  17 +
 .../kernel_tests/window_dataset_op_test.py    | 295 ++++++++++++++++++
 tensorflow/python/data/ops/dataset_ops.py     |  93 +++++-
 .../golden/v1/tensorflow.data.-dataset.pbtxt  |   4 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   4 +
 .../golden/v2/tensorflow.data.-dataset.pbtxt  |   4 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   4 +
 18 files changed, 679 insertions(+), 82 deletions(-)
 create mode 100644 tensorflow/python/data/kernel_tests/window_dataset_op_test.py

diff --git a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
index 6eaa0b1959..8b7b3ac0f7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
@@ -89,13 +89,14 @@ class WindowDatasetTest(test.TestCase, parameterized.TestCase):
       return dataset_ops.Dataset.zip(
           tuple([fn(*arg) if isinstance(arg, tuple) else arg for arg in args]))
 
-    dataset = self._structuredDataset(structure, shape, dtype).apply(
+    dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply(
         grouping.window_dataset(5)).flat_map(fn)
     get_next = dataset.make_one_shot_iterator().get_next()
     with self.cached_session() as sess:
       expected = sess.run(self._structuredElement(structure, shape, dtype))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
+      for _ in range(5):
+        actual = sess.run(get_next)
+        self._assertEqual(expected, actual)
 
   @parameterized.named_parameters(
       ("1", None, np.int32([]), dtypes.bool),
diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py
index 099e10db92..020167e4d1 100644
--- a/tensorflow/contrib/data/python/ops/grouping.py
+++ b/tensorflow/contrib/data/python/ops/grouping.py
@@ -255,6 +255,7 @@ def _map_x_dataset(map_func):
   return _apply_fn
 
 
+# TODO(b/115382007) Remove this once canned reducers move to core.
 def window_dataset(window_size):
   """A transformation that creates window datasets from the input dataset.
 
@@ -271,7 +272,12 @@ def window_dataset(window_size):
   """
 
   def _apply_fn(dataset):
-    return _WindowDataset(dataset, window_size)
+    return dataset_ops.WindowDataset(
+        dataset,
+        size=window_size,
+        shift=window_size,
+        stride=1,
+        drop_remainder=False)
 
   return _apply_fn
 
@@ -556,46 +562,3 @@ class _MapXDataset(dataset_ops.Dataset):
   @property
   def output_types(self):
     return self._output_types
-
-
-class _WindowDataset(dataset_ops.Dataset):
-  """A dataset that creates window datasets from the input elements."""
-
-  def __init__(self, input_dataset, window_size):
-    """See `window_dataset()` for more details."""
-    super(_WindowDataset, self).__init__()
-    self._input_dataset = input_dataset
-    self._window_size = ops.convert_to_tensor(
-        window_size, dtype=dtypes.int64, name="window_size")
-    self._output_classes = nest.pack_sequence_as(
-        input_dataset.output_classes,
-        [
-            dataset_ops._NestedDatasetComponent(  # pylint: disable=protected-access
-                output_classes=output_class,
-                output_shapes=output_shape,
-                output_types=output_type)
-            for output_class, output_shape, output_type in zip(
-                nest.flatten(input_dataset.output_classes),
-                nest.flatten(input_dataset.output_shapes),
-                nest.flatten(input_dataset.output_types))
-        ])
-    self._output_shapes = self._output_classes
-    self._output_types = self._output_classes
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.window_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._window_size,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py
index 8025dcdd16..b0d6a16c20 100644
--- a/tensorflow/contrib/data/python/ops/sliding.py
+++ b/tensorflow/contrib/data/python/ops/sliding.py
@@ -67,6 +67,10 @@ class _SlideDataset(dataset_ops.Dataset):
 
 @deprecation.deprecated_args(
     None, "stride is deprecated, use window_shift instead", "stride")
+@deprecation.deprecated(
+    None, "Use `tf.data.Dataset.window(size=window_size, shift=window_shift, "
+    "stride=window_stride).flat_map(lambda x: x.batch(window.size))` "
+    "instead.")
 def sliding_window_batch(window_size,
                          stride=None,
                          window_shift=None,
diff --git a/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt
index 1bc3660479..01387b7527 100644
--- a/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt
@@ -2,9 +2,30 @@ op {
   visibility: HIDDEN
   graph_op_name: "WindowDataset"
   in_arg {
-    name: "window_size"
+    name: "size"
     description: <<END
 A scalar representing the number of elements to accumulate in a window.
+END
+  }
+  in_arg {
+    name: "shift"
+    description: <<END
+A scalar representing the steps moving the sliding window forward in one
+iteration. It must be positive.
+END
+  }
+  in_arg {
+    name: "stride"
+    description: <<END
+A scalar representing the stride of the input elements of the sliding window.
+It must be positive.
+END
+  }
+  in_arg {
+    name: "drop_remainder"
+    description: <<END
+A scalar representing whether a window should be dropped in case its size is
+smaller than desired.
 END
   }
   summary: "A dataset that creates window datasets from the input dataset."
diff --git a/tensorflow/core/kernels/data/window_dataset_op.cc b/tensorflow/core/kernels/data/window_dataset_op.cc
index 3975086841..ac44623ce2 100644
--- a/tensorflow/core/kernels/data/window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/window_dataset_op.cc
@@ -33,22 +33,44 @@ class WindowDatasetOp : public UnaryDatasetOpKernel {
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
     int64 window_size = 0;
-    OP_REQUIRES_OK(
-        ctx, ParseScalarArgument<int64>(ctx, "window_size", &window_size));
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "size", &window_size));
     OP_REQUIRES(
         ctx, window_size > 0,
         errors::InvalidArgument("Window size must be greater than zero."));
 
-    *output = new Dataset(ctx, window_size, input);
+    int64 window_shift = 0;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument<int64>(ctx, "shift", &window_shift));
+    OP_REQUIRES(
+        ctx, window_shift > 0,
+        errors::InvalidArgument("Window shift must be greater than zero."));
+
+    int64 window_stride = 0;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument<int64>(ctx, "stride", &window_stride));
+    OP_REQUIRES(
+        ctx, window_stride > 0,
+        errors::InvalidArgument("Window stride must be greater than zero."));
+
+    bool drop_remainder;
+    OP_REQUIRES_OK(
+        ctx, ParseScalarArgument<bool>(ctx, "drop_remainder", &drop_remainder));
+
+    *output = new Dataset(ctx, input, window_size, window_shift, window_stride,
+                          drop_remainder);
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, int64 window_size, const DatasetBase* input)
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 window_size,
+            int64 window_shift, int64 window_stride, bool drop_remainder)
         : DatasetBase(DatasetContext(ctx)),
+          input_(input),
           window_size_(window_size),
-          input_(input) {
+          window_shift_(window_shift),
+          window_stride_(window_stride),
+          drop_remainder_(drop_remainder) {
       input_->Ref();
     }
 
@@ -72,7 +94,8 @@ class WindowDatasetOp : public UnaryDatasetOpKernel {
     }
 
     string DebugString() const override {
-      return strings::StrCat("WindowDatasetOp(", window_size_, ")::Dataset");
+      return strings::StrCat("WindowDatasetOp(", window_size_, window_shift_,
+                             window_stride_, drop_remainder_, ")::Dataset");
     }
 
    protected:
@@ -81,10 +104,19 @@ class WindowDatasetOp : public UnaryDatasetOpKernel {
                               Node** output) const override {
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
-      Node* window_size = nullptr;
-      TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size));
+      Node* window_size_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size_node));
+      Node* window_shift_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(window_shift_, &window_shift_node));
+      Node* window_stride_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(window_stride_, &window_stride_node));
+      Node* drop_remainder_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(drop_remainder_, &drop_remainder_node));
       TF_RETURN_IF_ERROR(
-          b->AddDataset(this, {input_graph_node, window_size}, output));
+          b->AddDataset(this,
+                        {input_graph_node, window_size_node, window_shift_node,
+                         window_stride_node, drop_remainder_node},
+                        output));
       return Status::OK();
     }
 
@@ -101,37 +133,79 @@ class WindowDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
-        // Each row of `window_elements` is a tuple of tensors from the
-        // input iterator.
+        const int64 window_size = dataset()->window_size_;
+        const int64 window_shift = dataset()->window_shift_;
+        const int64 window_stride = dataset()->window_stride_;
         std::vector<std::vector<Tensor>> window_elements;
+        Status status = Status::OK();
         {
           mutex_lock l(mu_);
-          if (!input_impl_) {
+          if (!input_impl_ && buffer_.empty()) {
             *end_of_sequence = true;
             return Status::OK();
           }
-          window_elements.reserve(dataset()->window_size_);
-          *end_of_sequence = false;
-          for (int i = 0; i < dataset()->window_size_ && !*end_of_sequence;
-               ++i) {
-            std::vector<Tensor> window_element_tuple;
-            TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &window_element_tuple,
-                                                    end_of_sequence));
-            if (!*end_of_sequence) {
-              window_elements.emplace_back(std::move(window_element_tuple));
-            } else {
-              input_impl_.reset();
+
+          // Add elements to the buffer.
+          size_t target_size = TargetBufferSize(window_size, window_stride);
+          if (input_impl_) {
+            *end_of_sequence = false;
+            for (size_t i = buffer_.size();
+                 i < target_size && !*end_of_sequence; ++i) {
+              std::vector<Tensor> element;
+              Status status =
+                  input_impl_->GetNext(ctx, &element, end_of_sequence);
+              if (!*end_of_sequence) {
+                buffer_.emplace_back(std::move(element), status);
+              } else {
+                input_impl_.reset();
+              }
             }
           }
+
+          // If there are not enough elements and `drop_remainder` is set, we do
+          // not wish to return a smaller window.
+          if (buffer_.empty() ||
+              (dataset()->drop_remainder_ && buffer_.size() < target_size)) {
+            DCHECK(*end_of_sequence);
+            return Status::OK();
+          }
+
+          int num_elements = 1 + (buffer_.size() - 1) / window_stride;
+          window_elements.reserve(num_elements);
+          for (size_t i = 0; i < num_elements; ++i) {
+            status.Update(buffer_[window_stride * i].status);
+            if (!status.ok()) {
+              break;
+            }
+            window_elements.emplace_back(buffer_[window_stride * i].result);
+          }
+
+          // Shift the window, discarding elements if necessary.
+          int buffer_size = buffer_.size();
+          if (window_shift >= buffer_size) {
+            for (size_t i = buffer_size; input_impl_ && i < window_shift; ++i) {
+              bool end_of_input;
+              std::vector<Tensor> element;
+              // Ignore non-error status of discarded elements.
+              input_impl_->GetNext(ctx, &element, &end_of_input).IgnoreError();
+              if (end_of_input) {
+                input_impl_.reset();
+              }
+            }
+            buffer_.clear();
+          } else {
+            buffer_.erase(buffer_.begin(), buffer_.begin() + window_shift);
+          }
         }
 
-        if (window_elements.empty()) {
-          DCHECK(*end_of_sequence);
-          return Status::OK();
+        if (!status.ok()) {
+          return status;
         }
 
+        // Construct output tensors.
         const size_t num_tuple_components = window_elements[0].size();
         const int64 num_window_elements = window_elements.size();
+        *end_of_sequence = false;
         for (size_t idx = 0; idx < num_tuple_components; ++idx) {
           DatasetBase* window_dataset;
           std::vector<std::vector<Tensor>> window_component_elements;
@@ -154,7 +228,6 @@ class WindowDatasetOp : public UnaryDatasetOpKernel {
           TF_RETURN_IF_ERROR(StoreDatasetInVariantTensor(window_dataset,
                                                          &out_tensors->back()));
         }
-        *end_of_sequence = false;
         return Status::OK();
       }
 
@@ -167,6 +240,20 @@ class WindowDatasetOp : public UnaryDatasetOpKernel {
         } else {
           TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
         }
+        // Save buffer.
+        TF_RETURN_IF_ERROR(writer->WriteScalar(strings::StrCat("buffer_size"),
+                                               buffer_.size()));
+        for (int64 i = 0; i < buffer_.size(); i++) {
+          TF_RETURN_IF_ERROR(WriteStatusLocked(writer, i, buffer_[i].status));
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(strings::StrCat("buffer[", i, "].size"),
+                                  buffer_[i].result.size()));
+          for (int64 j = 0; j < buffer_[i].result.size(); j++) {
+            TF_RETURN_IF_ERROR(
+                writer->WriteTensor(strings::StrCat("buffer[", i, "][", j, "]"),
+                                    buffer_[i].result[j]));
+          }
+        }
         return Status::OK();
       }
 
@@ -178,22 +265,92 @@ class WindowDatasetOp : public UnaryDatasetOpKernel {
         } else {
           input_impl_.reset();
         }
+        // Restore buffer.
+        int64 buffer_size;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(strings::StrCat("buffer_size"), &buffer_size));
+        buffer_.resize(buffer_size);
+        for (int64 i = 0; i < buffer_size; i++) {
+          int64 vector_size;
+          TF_RETURN_IF_ERROR(ReadStatusLocked(reader, i, &buffer_[i].status));
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              strings::StrCat("buffer[", i, "].size"), &vector_size));
+          buffer_[i].result.resize(vector_size);
+          for (int64 j = 0; j < vector_size; j++) {
+            TF_RETURN_IF_ERROR(
+                reader->ReadTensor(strings::StrCat("buffer[", i, "][", j, "]"),
+                                   &buffer_[i].result[j]));
+          }
+        }
         return Status::OK();
       }
 
      private:
+      struct InvocationResult {
+        InvocationResult() = default;
+        InvocationResult(std::vector<Tensor>&& result, const Status& status)
+            : result(result), status(status) {}
+
+        std::vector<Tensor> result;
+        Status status;
+      };
+
+      Status WriteStatusLocked(IteratorStateWriter* writer, size_t index,
+                               const Status& status)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            CodeKey(index), static_cast<int64>(status.code())));
+        if (!status.ok()) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(ErrorMessageKey(index),
+                                                 status.error_message()));
+        }
+        return Status::OK();
+      }
+
+      Status ReadStatusLocked(IteratorStateReader* reader, size_t index,
+                              Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        int64 code_int;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
+        error::Code code = static_cast<error::Code>(code_int);
+
+        if (code != error::Code::OK) {
+          string error_message;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(ErrorMessageKey(index), &error_message));
+          *status = Status(code, error_message);
+        } else {
+          *status = Status::OK();
+        }
+        return Status::OK();
+      }
+
+      string CodeKey(size_t index) {
+        return full_name(strings::StrCat("buffer[", index, "].code"));
+      }
+
+      string ErrorMessageKey(size_t index) {
+        return full_name(strings::StrCat("buffer[", index, "].error_message"));
+      }
+
+      size_t TargetBufferSize(int64 window_size, int64 window_stride) {
+        return (window_size - 1) * window_stride + 1;
+      }
+
       mutex mu_;
+      std::deque<InvocationResult> buffer_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
-    const int64 window_size_;
     const DatasetBase* const input_;
+    const int64 window_size_;
+    const int64 window_shift_;
+    const int64 window_stride_;
+    const bool drop_remainder_;
   };
 };
 
 REGISTER_KERNEL_BUILDER(Name("WindowDataset").Device(DEVICE_CPU),
                         WindowDatasetOp);
-
 }  // namespace
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 57c6bda98b..e59958749c 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -75602,9 +75602,21 @@ op {
     type: DT_VARIANT
   }
   input_arg {
-    name: "window_size"
+    name: "size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "shift"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "stride"
     type: DT_INT64
   }
+  input_arg {
+    name: "drop_remainder"
+    type: DT_BOOL
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 7d9e7b2d3f..4d3f272c1b 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -396,14 +396,20 @@ REGISTER_OP("FilterByLastComponentDataset")
 
 REGISTER_OP("WindowDataset")
     .Input("input_dataset: variant")
-    .Input("window_size: int64")
+    .Input("size: int64")
+    .Input("shift: int64")
+    .Input("stride: int64")
+    .Input("drop_remainder: bool")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
-      // batch_size should be a scalar.
+      // size, shift, stride, and drop_remainder should be scalars.
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
       return shape_inference::ScalarShape(c);
     });
 
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 631b87a718..17d4fec662 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -407,3 +407,20 @@ cuda_py_test(
         "//tensorflow/python:tensor_shape",
     ],
 )
+
+tf_py_test(
+    name = "window_dataset_op_test",
+    size = "small",
+    srcs = ["window_dataset_op_test.py"],
+    additional_deps = [
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
diff --git a/tensorflow/python/data/kernel_tests/window_dataset_op_test.py b/tensorflow/python/data/kernel_tests/window_dataset_op_test.py
new file mode 100644
index 0000000000..fd4348426d
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/window_dataset_op_test.py
@@ -0,0 +1,295 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class WindowDatasetTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("1", 20, 14, 7, 1),
+      ("2", 20, 17, 9, 1),
+      ("3", 20, 14, 14, 1),
+      ("4", 20, 10, 14, 1),
+      ("5", 20, 14, 19, 1),
+      ("6", 20, 4, 1, 2),
+      ("7", 20, 2, 1, 6),
+      ("8", 20, 4, 7, 2),
+      ("9", 20, 2, 7, 6),
+      ("10", 1, 10, 4, 1),
+      ("11", 0, 10, 4, 1),
+      ("12", 20, 14, 7, 1, False),
+      ("13", 20, 17, 9, 1, False),
+      ("14", 20, 14, 14, 1, False),
+      ("15", 20, 10, 14, 1, False),
+      ("16", 20, 14, 19, 1, False),
+      ("17", 20, 4, 1, 2, False),
+      ("18", 20, 2, 1, 6, False),
+      ("19", 20, 4, 7, 2, False),
+      ("20", 20, 2, 7, 6, False),
+      ("21", 1, 10, 4, 1, False),
+      ("22", 0, 10, 4, 1, False),
+  )
+  def testWindowDataset(self, count, size, shift, stride, drop_remainder=True):
+    """Tests a dataset that slides a window its input elements."""
+    components = (np.arange(7),
+                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
+                  np.array(37.0) * np.arange(7))
+
+    count_t = array_ops.placeholder(dtypes.int64, shape=[])
+    size_t = array_ops.placeholder(dtypes.int64, shape=[])
+    shift_t = array_ops.placeholder(dtypes.int64, shape=[])
+    stride_t = array_ops.placeholder(dtypes.int64, shape=[])
+    drop_remainder_t = array_ops.placeholder(dtypes.bool, shape=[])
+
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    def _flat_map_fn(x, y, z):
+      return dataset_ops.Dataset.zip((x.batch(batch_size=size_t),
+                                      y.batch(batch_size=size_t),
+                                      z.batch(batch_size=size_t)))
+
+    iterator = dataset_ops.Dataset.from_tensor_slices(components).map(
+        _map_fn).repeat(count).window(
+            size=size_t,
+            shift=shift_t,
+            stride=stride_t,
+            drop_remainder=drop_remainder_t).flat_map(
+                _flat_map_fn).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual([[None] + list(c.shape[1:]) for c in components],
+                     [t.shape.as_list() for t in get_next])
+
+    with self.cached_session() as sess:
+      sess.run(
+          init_op,
+          feed_dict={
+              count_t: count,
+              size_t: size,
+              shift_t: shift,
+              stride_t: stride,
+              drop_remainder_t: drop_remainder
+          })
+      num_full_batches = max(
+          0, (count * 7 - ((size - 1) * stride + 1)) // shift + 1)
+      for i in range(num_full_batches):
+        result = sess.run(get_next)
+        for component, result_component in zip(components, result):
+          for j in range(size):
+            self.assertAllEqual(component[(i * shift + j * stride) % 7]**2,
+                                result_component[j])
+      if not drop_remainder:
+        num_partial_batches = (count * 7) // shift + (
+            (count * 7) % shift > 0) - num_full_batches
+        for i in range(num_partial_batches):
+          result = sess.run(get_next)
+          for component, result_component in zip(components, result):
+            remaining = (count * 7) - ((num_full_batches + i) * shift)
+            num_elements = remaining // stride + ((remaining % stride) > 0)
+            for j in range(num_elements):
+              self.assertAllEqual(
+                  component[((num_full_batches + i) * shift + j * stride) % 7]
+                  **2, result_component[j])
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  @parameterized.named_parameters(
+      ("1", 14, 0, 3, 1),
+      ("2", 14, 3, 0, 1),
+      ("3", 14, 3, 3, 0),
+  )
+  def testWindowDatasetInvalid(self, count, size, shift, stride):
+    count_t = array_ops.placeholder(dtypes.int64, shape=[])
+    size_t = array_ops.placeholder(dtypes.int64, shape=[])
+    shift_t = array_ops.placeholder(dtypes.int64, shape=[])
+    stride_t = array_ops.placeholder(dtypes.int64, shape=[])
+
+    iterator = dataset_ops.Dataset.range(10).map(lambda x: x).repeat(
+        count_t).window(
+            size=size_t, shift=shift_t,
+            stride=stride_t).flat_map(lambda x: x.batch(batch_size=size_t)
+                                     ).make_initializable_iterator()
+    init_op = iterator.initializer
+
+    with self.cached_session() as sess:
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(
+            init_op,
+            feed_dict={
+                count_t: count,
+                size_t: size,
+                shift_t: shift,
+                stride_t: stride
+            })
+
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+  def testWindowSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).window(
+        size=5, shift=3, drop_remainder=True).flat_map(
+            lambda x: x.batch(batch_size=5)).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      num_batches = (10 - 5) // 3 + 1
+      for i in range(num_batches):
+        actual = sess.run(get_next)
+        expected = sparse_tensor.SparseTensorValue(
+            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+            values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4],
+            dense_shape=[5, 1])
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testWindowSparseWithDifferentDenseShapes(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=array_ops.expand_dims(
+              math_ops.range(i, dtype=dtypes.int64), 1),
+          values=array_ops.fill([math_ops.to_int32(i)], i),
+          dense_shape=[i])
+
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).window(
+        size=5, shift=3, drop_remainder=True).flat_map(
+            lambda x: x.batch(batch_size=5)).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      num_batches = (10 - 5) // 3 + 1
+      for i in range(num_batches):
+        actual = sess.run(get_next)
+        expected_indices = []
+        expected_values = []
+        for j in range(5):
+          for k in range(i * 3 + j):
+            expected_indices.append([j, k])
+            expected_values.append(i * 3 + j)
+        expected = sparse_tensor.SparseTensorValue(
+            indices=expected_indices,
+            values=expected_values,
+            dense_shape=[5, i * 3 + 5 - 1])
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testNestedWindowSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).window(
+        size=4, shift=2,
+        drop_remainder=True).flat_map(lambda x: x.batch(batch_size=4)).window(
+            size=3, shift=1, drop_remainder=True).flat_map(
+                lambda x: x.batch(batch_size=3)).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      # Slide: 1st batch.
+      actual = sess.run(get_next)
+      expected = sparse_tensor.SparseTensorValue(
+          indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [1, 0, 0],
+                   [1, 1, 0], [1, 2, 0], [1, 3, 0], [2, 0, 0], [2, 1, 0],
+                   [2, 2, 0], [2, 3, 0]],
+          values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7],
+          dense_shape=[3, 4, 1])
+      self.assertTrue(sparse_tensor.is_sparse(actual))
+      self.assertSparseValuesEqual(actual, expected)
+      # Slide: 2nd batch.
+      actual = sess.run(get_next)
+      expected = sparse_tensor.SparseTensorValue(
+          indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [1, 0, 0],
+                   [1, 1, 0], [1, 2, 0], [1, 3, 0], [2, 0, 0], [2, 1, 0],
+                   [2, 2, 0], [2, 3, 0]],
+          values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9],
+          dense_shape=[3, 4, 1])
+      self.assertTrue(sparse_tensor.is_sparse(actual))
+      self.assertSparseValuesEqual(actual, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testWindowShapeError(self):
+
+    def generator():
+      yield [1.0, 2.0, 3.0]
+      yield [4.0, 5.0, 6.0]
+      yield [7.0, 8.0, 9.0, 10.0]
+
+    iterator = dataset_ops.Dataset.from_generator(
+        generator, dtypes.float32, output_shapes=[None]).window(
+            size=3, shift=1).flat_map(
+                lambda x: x.batch(batch_size=3)).make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r"Cannot batch tensors with different shapes in component 0. "
+          r"First element had shape \[3\] and element 2 had shape \[4\]."):
+        sess.run(next_element)
+
+  def testWindowIgnoreErrors(self):
+    input_values = np.float32([1., np.nan, 2., np.nan, 3.])
+    dataset = dataset_ops.Dataset.from_tensor_slices(input_values).map(
+        lambda x: array_ops.check_numerics(x, "message")).window(
+            size=2, shift=2, stride=2,
+            drop_remainder=True).flat_map(lambda x: x.batch(batch_size=2))
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      self.assertAllEqual(np.float32([1., 2.]), sess.run(get_next))
+      self.assertAllEqual(np.float32([2., 3.]), sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index c985e00dd1..93b3a7b93b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1115,7 +1115,7 @@ class Dataset(object):
     return FilterDataset(self, predicate)
 
   def apply(self, transformation_func):
-    """Apply a transformation function to this dataset.
+    """Applies a transformation function to this dataset.
 
     `apply` enables chaining of custom `Dataset` transformations, which are
     represented as functions that take one `Dataset` argument and return a
@@ -1131,7 +1131,7 @@ class Dataset(object):
 
     Args:
       transformation_func: A function that takes one `Dataset` argument and
-          returns a `Dataset`.
+        returns a `Dataset`.
 
     Returns:
       Dataset: The `Dataset` returned by applying `transformation_func` to this
@@ -1142,6 +1142,45 @@ class Dataset(object):
       raise TypeError("`transformation_func` must return a Dataset.")
     return dataset
 
+  def window(self, size, shift=None, stride=1, drop_remainder=False):
+    """Combines input elements into a dataset of windows.
+
+    Each window is a dataset itself and contains `size` elements (or
+    possibly fewer if there are not enough input elements to fill the window
+    and `drop_remainder` evaluates to false).
+
+    The `stride` argument determines the stride of the input elements,
+    and the `shift` argument determines the shift of the window.
+
+    For example:
+    - `tf.data.Dataset.range(7).window(2)` produces
+      `{{0, 1}, {2, 3}, {4, 5}, {6}}`
+    - `tf.data.Dataset.range(7).window(3, 2, 1, True)` produces
+      `{{0, 1, 2}, {2, 3, 4}, {4, 5, 6}}`
+    - `tf.data.Dataset.range(7).window(3, 1, 2, True)` produces
+      `{{0, 2, 4}, {1, 3, 5}, {2, 4, 6}}`
+
+    Args:
+      size: A `tf.int64` scalar `tf.Tensor`, representing the number of elements
+        of the input dataset to combine into a window.
+      shift: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+        forward shift of the sliding window in each iteration. Defaults to
+        `size`.
+      stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+        stride of the input elements in the sliding window.
+      drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
+        whether a window should be dropped in case its size is smaller than
+        `window_size`.
+
+    Returns:
+      Dataset: A `Dataset` of windows, each of which is a nested `Dataset` with
+        the same structure as this dataset, but a finite subsequence of its
+        elements.
+    """
+    if shift is None:
+      shift = size
+    return WindowDataset(self, size, shift, stride, drop_remainder)
+
 
 class TensorDataset(Dataset):
   """A `Dataset` with a single element, viz. a nested structure of tensors."""
@@ -2442,3 +2481,53 @@ class PrefetchDataset(Dataset):
   @property
   def output_types(self):
     return self._input_dataset.output_types
+
+
+class WindowDataset(Dataset):
+  """A dataset that creates window datasets from the input elements."""
+
+  def __init__(self, input_dataset, size, shift, stride, drop_remainder):
+    """See `window_dataset()` for more details."""
+    super(WindowDataset, self).__init__()
+    self._input_dataset = input_dataset
+    self._size = ops.convert_to_tensor(size, dtype=dtypes.int64, name="size")
+    self._shift = ops.convert_to_tensor(shift, dtype=dtypes.int64, name="shift")
+    self._stride = ops.convert_to_tensor(
+        stride, dtype=dtypes.int64, name="stride")
+    self._drop_remainder = ops.convert_to_tensor(
+        drop_remainder, dtype=dtypes.bool, name="drop_remainder")
+    self._output_classes = nest.pack_sequence_as(
+        input_dataset.output_classes,
+        [
+            _NestedDatasetComponent(  # pylint: disable=protected-access
+                output_classes=output_class,
+                output_shapes=output_shape,
+                output_types=output_type)
+            for output_class, output_shape, output_type in zip(
+                nest.flatten(input_dataset.output_classes),
+                nest.flatten(input_dataset.output_shapes),
+                nest.flatten(input_dataset.output_types))
+        ])
+    self._output_shapes = self._output_classes
+    self._output_types = self._output_classes
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.window_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._size,
+        self._shift,
+        self._stride,
+        self._drop_remainder,
+        **flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
index 87745420ee..c3ba2dba57 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
@@ -110,6 +110,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
index 6dd46365b0..3541671bee 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -111,6 +111,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
index 35b7105eba..b113c18ee0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -111,6 +111,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
index 8ae370af98..7210bf5db4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
@@ -111,6 +111,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
index 87745420ee..c3ba2dba57 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
@@ -110,6 +110,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
index 6dd46365b0..3541671bee 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -111,6 +111,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
index 35b7105eba..b113c18ee0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -111,6 +111,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
index 8ae370af98..7210bf5db4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
@@ -111,6 +111,10 @@ tf_class {
     name: "take"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
-- 
GitLab


From 0b80d098704c72f627f37bfeee0ae19788c06fa8 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 17 Sep 2018 16:32:12 -0700
Subject: [PATCH 0296/1357] Add basic op resolver registration to TFLite C API

PiperOrigin-RevId: 213360279
---
 tensorflow/contrib/lite/experimental/c/BUILD  |  2 ++
 .../contrib/lite/experimental/c/c_api.cc      |  4 +++
 .../contrib/lite/experimental/c/c_api.h       |  3 +-
 .../lite/experimental/c/c_api_experimental.cc | 16 +++++++++
 .../lite/experimental/c/c_api_experimental.h  | 25 ++++++++++++++
 .../experimental/c/c_api_experimental_test.cc | 23 ++++++++++---
 .../lite/experimental/c/c_api_internal.h      |  2 ++
 .../contrib/lite/mutable_op_resolver.cc       | 15 ++++++--
 tensorflow/contrib/lite/mutable_op_resolver.h |  8 +++--
 .../contrib/lite/mutable_op_resolver_test.cc  | 34 +++++++++++++++++++
 10 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/c/BUILD b/tensorflow/contrib/lite/experimental/c/BUILD
index ea4a543252..835fc2595e 100644
--- a/tensorflow/contrib/lite/experimental/c/BUILD
+++ b/tensorflow/contrib/lite/experimental/c/BUILD
@@ -68,6 +68,7 @@ cc_library(
     deps = [
         ":c_api",
         ":c_api_internal",
+        "//tensorflow/contrib/lite:kernel_api",
     ],
 )
 
@@ -93,6 +94,7 @@ cc_test(
     deps = [
         ":c_api",
         ":c_api_experimental",
+        "//tensorflow/contrib/lite:kernel_api",
         "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
diff --git a/tensorflow/contrib/lite/experimental/c/c_api.cc b/tensorflow/contrib/lite/experimental/c/c_api.cc
index c589cf71ea..1c3996fb87 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api.cc
+++ b/tensorflow/contrib/lite/experimental/c/c_api.cc
@@ -62,7 +62,11 @@ TFL_Interpreter* TFL_NewInterpreter(
     return nullptr;
   }
 
+  // TODO(b/111881878): Allow use of C API without pulling in all builtin ops.
   tflite::ops::builtin::BuiltinOpResolver resolver;
+  if (optional_options) {
+    resolver.AddAll(optional_options->op_resolver);
+  }
   tflite::InterpreterBuilder builder(*model->impl, resolver);
   std::unique_ptr<tflite::Interpreter> interpreter;
   if (builder(&interpreter) != kTfLiteOk) {
diff --git a/tensorflow/contrib/lite/experimental/c/c_api.h b/tensorflow/contrib/lite/experimental/c/c_api.h
index b429e76870..44b936aa87 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api.h
+++ b/tensorflow/contrib/lite/experimental/c/c_api.h
@@ -52,8 +52,9 @@ limitations under the License.
 extern "C" {
 #endif  // __cplusplus
 
-typedef TfLiteTensor TFL_Tensor;
+typedef TfLiteRegistration TFL_Registration;
 typedef TfLiteStatus TFL_Status;
+typedef TfLiteTensor TFL_Tensor;
 typedef TfLiteType TFL_Type;
 
 // --------------------------------------------------------------------------
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc
index c4dbc55cbf..0f16595811 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc
+++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc
@@ -26,6 +26,22 @@ TFL_Status TFL_InterpreterResetVariableTensorsToZero(
   return interpreter->impl->ResetVariableTensorsToZero();
 }
 
+void TFL_InterpreterOptionsAddBuiltinOp(TFL_InterpreterOptions* options,
+                                        TFL_BuiltinOperator op,
+                                        const TFL_Registration* registration,
+                                        int32_t min_version,
+                                        int32_t max_version) {
+  options->op_resolver.AddBuiltin(static_cast<tflite::BuiltinOperator>(op),
+                                  registration, min_version, max_version);
+}
+
+void TFL_InterpreterOptionsAddCustomOp(TFL_InterpreterOptions* options,
+                                       const char* name,
+                                       const TFL_Registration* registration,
+                                       int min_version, int max_version) {
+  options->op_resolver.AddCustom(name, registration, min_version, max_version);
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h
index b0ac258dcf..b8de7b9964 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h
+++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h
@@ -15,16 +15,41 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_EXPERIMENTAL_H_
 #define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_EXPERIMENTAL_H_
 
+#include "tensorflow/contrib/lite/builtin_ops.h"
 #include "tensorflow/contrib/lite/experimental/c/c_api.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus
 
+typedef TfLiteBuiltinOperator TFL_BuiltinOperator;
+
 // Resets all variable tensors to zero.
 TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensorsToZero(
     TFL_Interpreter* interpreter);
 
+// Adds an op registration for a builtin operator.
+//
+// NOTE: The interpreter will make a copy of `registration` internally, so the
+// caller should ensure that its contents (function pointers, etc...) remain
+// valid for the duration of the interpreter's lifetime. A common practice is
+// making the provided TFL_Registration instance static.
+void TFL_InterpreterOptionsAddBuiltinOp(TFL_InterpreterOptions* options,
+                                        TFL_BuiltinOperator op,
+                                        const TFL_Registration* registration,
+                                        int min_version, int max_version);
+
+// Adds an op registration for a custom operator.
+//
+// NOTE: The interpreter will make a copy of `registration` internally, so the
+// caller should ensure that its contents (function pointers, etc...) remain
+// valid for the duration of the interpreter's lifetime. A common practice is
+// making the provided TFL_Registration instance static.
+void TFL_InterpreterOptionsAddCustomOp(TFL_InterpreterOptions* options,
+                                       const char* name,
+                                       const TFL_Registration* registration,
+                                       int min_version, int max_version);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc
index db6e5251de..d86ad00d6d 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc
+++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc
@@ -16,25 +16,40 @@ limitations under the License.
 #include "tensorflow/contrib/lite/experimental/c/c_api_experimental.h"
 
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/builtin_ops.h"
 #include "tensorflow/contrib/lite/experimental/c/c_api.h"
 #include "tensorflow/contrib/lite/testing/util.h"
 
 namespace {
 
+TfLiteRegistration* GetDummyRegistration() {
+  static TfLiteRegistration registration = {
+      .init = nullptr,
+      .free = nullptr,
+      .prepare = nullptr,
+      .invoke = [](TfLiteContext*, TfLiteNode*) { return kTfLiteOk; },
+  };
+  return &registration;
+}
+
 TEST(CApiExperimentalSimple, Smoke) {
   TFL_Model* model = TFL_NewModelFromFile(
       "tensorflow/contrib/lite/testdata/add.bin");
   ASSERT_NE(model, nullptr);
 
-  TFL_Interpreter* interpreter =
-      TFL_NewInterpreter(model, /*optional_options=*/nullptr);
+  TFL_InterpreterOptions* options = TFL_NewInterpreterOptions();
+  TFL_InterpreterOptionsAddBuiltinOp(options, kTfLiteBuiltinAdd,
+                                     GetDummyRegistration(), 1, 1);
+
+  TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options);
   ASSERT_NE(interpreter, nullptr);
   ASSERT_EQ(TFL_InterpreterAllocateTensors(interpreter), kTfLiteOk);
-
   EXPECT_EQ(TFL_InterpreterResetVariableTensorsToZero(interpreter), kTfLiteOk);
+  EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteOk);
 
-  TFL_DeleteModel(model);
   TFL_DeleteInterpreter(interpreter);
+  TFL_DeleteInterpreterOptions(options);
+  TFL_DeleteModel(model);
 }
 
 }  // namespace
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_internal.h b/tensorflow/contrib/lite/experimental/c/c_api_internal.h
index 60c2e4e2cd..af675ac98a 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_internal.h
+++ b/tensorflow/contrib/lite/experimental/c/c_api_internal.h
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/op_resolver.h"
 
 // Internal structures used by the C API. These are likely to change and should
 // not be depended on.
@@ -33,6 +34,7 @@ struct TFL_InterpreterOptions {
     kDefaultNumThreads = -1,
   };
   int num_threads = kDefaultNumThreads;
+  tflite::MutableOpResolver op_resolver;
 };
 
 struct TFL_Interpreter {
diff --git a/tensorflow/contrib/lite/mutable_op_resolver.cc b/tensorflow/contrib/lite/mutable_op_resolver.cc
index d7c0181720..a36404399b 100644
--- a/tensorflow/contrib/lite/mutable_op_resolver.cc
+++ b/tensorflow/contrib/lite/mutable_op_resolver.cc
@@ -30,7 +30,7 @@ const TfLiteRegistration* MutableOpResolver::FindOp(const char* op,
 }
 
 void MutableOpResolver::AddBuiltin(tflite::BuiltinOperator op,
-                                   TfLiteRegistration* registration,
+                                   const TfLiteRegistration* registration,
                                    int min_version, int max_version) {
   for (int version = min_version; version <= max_version; ++version) {
     TfLiteRegistration new_registration = *registration;
@@ -43,7 +43,7 @@ void MutableOpResolver::AddBuiltin(tflite::BuiltinOperator op,
 }
 
 void MutableOpResolver::AddCustom(const char* name,
-                                  TfLiteRegistration* registration,
+                                  const TfLiteRegistration* registration,
                                   int min_version, int max_version) {
   for (int version = min_version; version <= max_version; ++version) {
     TfLiteRegistration new_registration = *registration;
@@ -55,4 +55,15 @@ void MutableOpResolver::AddCustom(const char* name,
   }
 }
 
+void MutableOpResolver::AddAll(const MutableOpResolver& other) {
+  // map::insert does not replace existing elements, and map::insert_or_assign
+  // wasn't added until C++17.
+  for (const auto& other_builtin : other.builtins_) {
+    builtins_[other_builtin.first] = other_builtin.second;
+  }
+  for (const auto& other_custom_op : other.custom_ops_) {
+    custom_ops_[other_custom_op.first] = other_custom_op.second;
+  }
+}
+
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/mutable_op_resolver.h b/tensorflow/contrib/lite/mutable_op_resolver.h
index c319041e9b..efd6cfac2a 100644
--- a/tensorflow/contrib/lite/mutable_op_resolver.h
+++ b/tensorflow/contrib/lite/mutable_op_resolver.h
@@ -57,10 +57,12 @@ class MutableOpResolver : public OpResolver {
   const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
                                    int version) const override;
   const TfLiteRegistration* FindOp(const char* op, int version) const override;
-  void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
-                  int min_version = 1, int max_version = 1);
-  void AddCustom(const char* name, TfLiteRegistration* registration,
+  void AddBuiltin(tflite::BuiltinOperator op,
+                  const TfLiteRegistration* registration, int min_version = 1,
+                  int max_version = 1);
+  void AddCustom(const char* name, const TfLiteRegistration* registration,
                  int min_version = 1, int max_version = 1);
+  void AddAll(const MutableOpResolver& other);
 
  private:
   typedef std::pair<tflite::BuiltinOperator, int> BuiltinOperatorKey;
diff --git a/tensorflow/contrib/lite/mutable_op_resolver_test.cc b/tensorflow/contrib/lite/mutable_op_resolver_test.cc
index db690eaab9..b70c703839 100644
--- a/tensorflow/contrib/lite/mutable_op_resolver_test.cc
+++ b/tensorflow/contrib/lite/mutable_op_resolver_test.cc
@@ -36,6 +36,20 @@ TfLiteRegistration* GetDummyRegistration() {
   return &registration;
 }
 
+TfLiteStatus Dummy2Invoke(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteRegistration* GetDummy2Registration() {
+  static TfLiteRegistration registration = {
+      .init = nullptr,
+      .free = nullptr,
+      .prepare = nullptr,
+      .invoke = Dummy2Invoke,
+  };
+  return &registration;
+}
+
 TEST(MutableOpResolverTest, FinOp) {
   MutableOpResolver resolver;
   resolver.AddBuiltin(BuiltinOperator_ADD, GetDummyRegistration());
@@ -119,6 +133,26 @@ TEST(MutableOpResolverTest, FindCustomOpWithUnsupportedVersion) {
   EXPECT_EQ(found_registration, nullptr);
 }
 
+TEST(MutableOpResolverTest, AddAll) {
+  MutableOpResolver resolver1;
+  resolver1.AddBuiltin(BuiltinOperator_ADD, GetDummyRegistration());
+  resolver1.AddBuiltin(BuiltinOperator_MUL, GetDummy2Registration());
+
+  MutableOpResolver resolver2;
+  resolver2.AddBuiltin(BuiltinOperator_SUB, GetDummyRegistration());
+  resolver2.AddBuiltin(BuiltinOperator_ADD, GetDummy2Registration());
+
+  // resolver2's ADD op should replace resolver1's ADD op, while augmenting
+  // non-overlapping ops.
+  resolver1.AddAll(resolver2);
+  ASSERT_EQ(resolver1.FindOp(BuiltinOperator_ADD, 1)->invoke,
+            GetDummy2Registration()->invoke);
+  ASSERT_EQ(resolver1.FindOp(BuiltinOperator_MUL, 1)->invoke,
+            GetDummy2Registration()->invoke);
+  ASSERT_EQ(resolver1.FindOp(BuiltinOperator_SUB, 1)->invoke,
+            GetDummyRegistration()->invoke);
+}
+
 }  // namespace
 }  // namespace tflite
 
-- 
GitLab


From 7a67406abda84cb5c2da02ed4d77a85ddfd2a417 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Fri, 14 Sep 2018 12:38:07 -0700
Subject: [PATCH 0297/1357] Update 1.11.0-rc0 version strings to 1.11.0-rc1
 (#22284)

---
 tensorflow/core/public/version.h      | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 1f71e24eeb..b043a69431 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc0"
+#define TF_VERSION_SUFFIX "-rc1"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 8442e58f20..d40ffb8cd0 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.11.0-rc0'
+_VERSION = '1.11.0-rc1'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 0cdf60ff8239a68326af9610e715f42c773be731 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 16:41:38 -0700
Subject: [PATCH 0298/1357] Make HLO liveness analysis correctly handle
 computations with side effect instructions.

PiperOrigin-RevId: 213361904
---
 .../xla/service/hlo_liveness_analysis.cc      | 35 +++++++-
 .../xla/service/hlo_liveness_analysis_test.cc | 84 +++++++++++++++++++
 2 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc
index 3a1dd471c6..5bf055f3c0 100644
--- a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc
@@ -219,6 +219,33 @@ void PropagateLivenessToParameterCallers(
   }
 }
 
+// Makes sure that if a live instruction is within a computation used in control
+// flow operations, we mark live even other related instructions.
+void PropagateLivenessThroughControlFlow(
+    const HloInstruction* instruction,
+    HloLivenessAnalysis::HloIndexMap* live_index_map, Worklist* worklist,
+    Workset* workset, CallGraph* call_graph) {
+  const CallGraphNode& call_graph_node =
+      call_graph->GetNode(instruction->parent());
+  if (call_graph_node.context() == CallContext::kSequential) {
+    for (const CallSite& callsite : call_graph_node.caller_callsites()) {
+      HloInstruction* caller = callsite.instruction();
+      if (caller->opcode() == HloOpcode::kWhile) {
+        // If a live instruction is within the %while body or condition
+        // computation, mark the predicate value returned by the condition
+        // computation live as well.
+        MarkLiveAtIndex(caller->while_condition()->root_instruction(), {},
+                        live_index_map, worklist, workset);
+      } else if (caller->opcode() == HloOpcode::kConditional) {
+        // If a live instruction is within the true or false branches of a
+        // conditional, we mark the predicate operand live as well.
+        MarkLiveAtIndex(caller->operand(0), {}, live_index_map, worklist,
+                        workset);
+      }
+    }
+  }
+}
+
 }  // namespace
 
 HloLivenessAnalysis::HloLivenessAnalysis(const HloModule& module)
@@ -257,12 +284,10 @@ void HloLivenessAnalysis::RunAnalysis() {
     } else if (instruction->opcode() == HloOpcode::kGetTupleElement) {
       PropagateLivenessThroughGTE(instruction, &live_index_map_, &worklist,
                                   &workset);
-    } else if (instruction->opcode() == HloOpcode::kWhile &&
-               ShapeUtil::IsTuple(instruction->shape())) {
+    } else if (instruction->opcode() == HloOpcode::kWhile) {
       PropagateLivenessThroughWhile(instruction, &live_index_map_, &worklist,
                                     &workset);
-    } else if (instruction->opcode() == HloOpcode::kParameter &&
-               ShapeUtil::IsTuple(instruction->shape())) {
+    } else if (instruction->opcode() == HloOpcode::kParameter) {
       PropagateLivenessToParameterCallers(instruction, &live_index_map_,
                                           &worklist, &workset,
                                           call_graph_.get());
@@ -277,6 +302,8 @@ void HloLivenessAnalysis::RunAnalysis() {
         MarkLiveAtAllIndices(operand, &live_index_map_, &worklist, &workset);
       }
     }
+    PropagateLivenessThroughControlFlow(instruction, &live_index_map_,
+                                        &worklist, &workset, call_graph_.get());
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
index 01b625c29c..e0ae1173c6 100644
--- a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
@@ -398,5 +398,89 @@ TEST_F(HloLivenessAnalysisTest, WhileWithLiveTupleElements) {
   EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "loop_var.1"), {2}));
 }
 
+TEST_F(HloLivenessAnalysisTest, WhileWithOutfeed) {
+  auto module = ParseHloString(R"(
+  HloModule OutfeedLoop
+  WhileBody {
+    body_param = (s32[]) parameter(0)
+    token = token[] after-all()
+    constant.2 = s32[] constant(2)
+    outfeed_tuple = (s32[]) outfeed(constant.2, token)
+    get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
+    constant.1 = s32[] constant(1)
+    add = s32[] add(get-tuple-element.1, constant.1)
+    ROOT tuple = (s32[]) tuple(add)
+  }
+  WhileCondition {
+    cond_param = (s32[]) parameter(0)
+    get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0
+    constant.2 = s32[] constant(10)
+    ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2)
+  }
+  ENTRY SimpleLoop {
+    constant.3 = s32[] constant(0)
+    tuple.1 = (s32[]) tuple(constant.3)
+    while = (s32[]) while(tuple.1), condition=WhileCondition,
+      body=WhileBody
+    ROOT rtuple = () tuple()
+  })")
+                    .ValueOrDie();
+
+  const HloLivenessAnalysis& liveness = RunLiveness(module.get());
+  EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "add"), {}));
+  EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "constant.3"), {}));
+}
+
+TEST_F(HloLivenessAnalysisTest, NestedWhileWithOutfeed) {
+  auto module = ParseHloString(R"(
+  HloModule OutfeedLoop
+  InnerWhileBody {
+    body_param = (s32[]) parameter(0)
+    token = token[] after-all()
+    constant.2 = s32[] constant(2)
+    outfeed_tuple = (s32[]) outfeed(constant.2, token)
+    get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
+    constant.1 = s32[] constant(1)
+    add = s32[] add(get-tuple-element.1, constant.1)
+    ROOT tuple = (s32[]) tuple(add)
+  }
+  InnerWhileCondition {
+    cond_param = (s32[]) parameter(0)
+    get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0
+    constant.2 = s32[] constant(10)
+    ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2)
+  }
+  OuterWhileCondition {
+    cond_param.2 = (s32[]) parameter(0)
+    get-tuple-element.5 = s32[] get-tuple-element(cond_param.2), index=0
+    constant.5 = s32[] constant(5)
+    ROOT less-than.2 = pred[] less-than(get-tuple-element.5, constant.5)
+  }
+  OuterWhileBody {
+    body_param.2 = (s32[]) parameter(0)
+    get-tuple-element.8 = s32[] get-tuple-element(body_param.2), index=0
+    constant.6 = s32[] constant(0)
+    tuple.2 = (s32[]) tuple(constant.6)
+    inner_while = (s32[]) while(tuple.2), condition=InnerWhileCondition,
+      body=InnerWhileBody
+    constant.7 = s32[] constant(1)
+    add.2 = s32[] add(get-tuple-element.8, constant.7)
+    ROOT rtuple = (s32[]) tuple(add.2)
+  }
+  ENTRY SimpleLoop {
+    constant.3 = s32[] constant(0)
+    tuple.1 = (s32[]) tuple(constant.3)
+    while = (s32[]) while(tuple.1), condition=OuterWhileCondition,
+      body=OuterWhileBody
+    ROOT rtuple = () tuple()
+  })")
+                    .ValueOrDie();
+
+  const HloLivenessAnalysis& liveness = RunLiveness(module.get());
+  EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "add"), {}));
+  EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "add.2"), {}));
+  EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "constant.3"), {}));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 6805a8b27759a530f0ebab0670593a05455a64a0 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 17 Sep 2018 16:41:56 -0700
Subject: [PATCH 0299/1357] Changing `OpInputList` so that it is a forward
 iterator and taking advantage of the fact in the tf.data kernels.

PiperOrigin-RevId: 213361953
---
 tensorflow/core/framework/op_kernel.h         | 31 ++++++++---
 .../core/kernels/data/captured_function.cc    | 29 ++++-------
 .../core/kernels/data/captured_function.h     | 22 +++-----
 .../core/kernels/data/filter_dataset_op.cc    | 13 ++---
 .../core/kernels/data/flat_map_dataset_op.cc  | 13 +----
 .../core/kernels/data/generator_dataset_op.cc | 44 ++++------------
 .../data/group_by_window_dataset_op.cc        | 51 ++++---------------
 .../kernels/data/interleave_dataset_op.cc     | 12 +----
 .../kernels/data/map_and_batch_dataset_op.cc  | 12 +----
 .../core/kernels/data/map_dataset_op.cc       | 14 ++---
 tensorflow/core/kernels/data/optional_ops.cc  |  7 +--
 .../data/parallel_interleave_dataset_op.cc    | 25 ++-------
 .../kernels/data/parallel_map_dataset_op.cc   | 14 ++---
 .../kernels/data/parse_example_dataset_op.cc  |  7 +--
 .../core/kernels/data/scan_dataset_op.cc      | 19 ++-----
 .../core/kernels/data/tensor_dataset_op.cc    |  6 +--
 16 files changed, 88 insertions(+), 231 deletions(-)

diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index e752599de1..4bbd6c3d7d 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -372,18 +372,37 @@ class OpKernelConstruction {
 template <typename ListType, typename ElementType>
 class OpArgIterator {
  public:
-  typedef OpArgIterator<ListType, ElementType> ME;
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = ElementType;
+  using pointer = ElementType*;
+  using reference = ElementType&;
+  using difference_type = ptrdiff_t;
+
   OpArgIterator(const ListType* list, int i) : list_(list), i_(i) {}
-  bool operator==(const ME& rhs) {
+
+  bool operator==(const OpArgIterator& rhs) {
     DCHECK(list_ == rhs.list_);
     return i_ == rhs.i_;
   }
-  bool operator!=(const ME& rhs) {
+
+  bool operator!=(const OpArgIterator& rhs) {
     DCHECK(list_ == rhs.list_);
     return i_ != rhs.i_;
   }
-  void operator++() { ++i_; }
-  ElementType& operator*() { return (*list_)[i_]; }
+
+  OpArgIterator operator++() {  // prefix ++it
+    ++i_;
+    return *this;
+  }
+
+  OpArgIterator operator++(int) {  // postfix it++
+    OpArgIterator old_value = *this;
+    ++i_;
+    return old_value;
+  }
+
+  reference operator*() { return (*list_)[i_]; }
+  pointer operator->() { return &(*list_)[i_]; }
 
  private:
   const ListType* const list_;
@@ -394,7 +413,7 @@ class OpArgIterator {
 // that are passed to the op as a single named argument.
 class OpInputList {
  public:
-  typedef OpArgIterator<OpInputList, const Tensor&> Iterator;
+  typedef OpArgIterator<OpInputList, const Tensor> Iterator;
   OpInputList() : ctx_(nullptr), start_(0), stop_(0) {}
   OpInputList(OpKernelContext* ctx, int start, int stop)
       : ctx_(ctx), start_(start), stop_(stop) {}
diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index 31c8f5c0ea..b3ab7e2bc6 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -22,41 +22,30 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/notification.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
 
 /* static */
 Status CapturedFunction::Create(
-    const NameAttrList& func, std::vector<Tensor> captured_inputs,
+    const NameAttrList& func, OpKernelContext* ctx, const string& argument,
     std::unique_ptr<CapturedFunction>* out_function) {
-  return Create(func, std::move(captured_inputs), true, out_function);
+  return CapturedFunction::Create(func, ctx, argument, true, out_function);
 }
 
-/* static */
 Status CapturedFunction::Create(
-    const NameAttrList& func, std::vector<Tensor> captured_inputs,
+    const NameAttrList& func, OpKernelContext* ctx, const string& argument,
     bool use_inter_op_parallelism,
     std::unique_ptr<CapturedFunction>* out_function) {
-  out_function->reset(new CapturedFunction(func, std::move(captured_inputs),
-                                           use_inter_op_parallelism));
+  OpInputList inputs;
+  TF_RETURN_IF_ERROR(ctx->input_list(argument, &inputs));
+  std::vector<Tensor> arguments(inputs.begin(), inputs.end());
+  *out_function = WrapUnique(new CapturedFunction(func, std::move(arguments),
+                                                  use_inter_op_parallelism));
   return Status::OK();
 }
 
-/* static */
-Status CapturedFunction::Create(
-    const NameAttrList& func, OpKernelContext* ctx, const string& argument,
-    std::unique_ptr<CapturedFunction>* out_function) {
-  OpInputList argument_inputs;
-  TF_RETURN_IF_ERROR(ctx->input_list(argument, &argument_inputs));
-  std::vector<Tensor> arguments_t;
-  arguments_t.reserve(argument_inputs.size());
-  for (const Tensor& t : argument_inputs) {
-    arguments_t.push_back(t);
-  }
-  return CapturedFunction::Create(func, std::move(arguments_t), out_function);
-}
-
 CapturedFunction::~CapturedFunction() {
   if (lib_ != nullptr && f_handle_ != kInvalidHandle) {
     lib_->ReleaseHandle(f_handle_).IgnoreError();
diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h
index 8b420fa5db..a10376bf97 100644
--- a/tensorflow/core/kernels/data/captured_function.h
+++ b/tensorflow/core/kernels/data/captured_function.h
@@ -42,27 +42,19 @@ namespace data {
 // context.
 class CapturedFunction {
  public:
-  // Creates a new instance from a list of named attributes and captured inputs.
-  //
-  // NOTE(mrry): The `captured_inputs` are passed by value. For
-  // efficiency, you are recommended to move this argument into the call.
-  static Status Create(const NameAttrList& func,
-                       std::vector<Tensor> captured_inputs,
+  // Creates a new instance using a list of named attributes, fetching captured
+  // inputs from a context argument.
+  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
+                       const string& argument,
                        std::unique_ptr<CapturedFunction>* out_function);
 
-  // Creates a new instance from a list of named attributes and captured inputs.
+  // Creates a new instance using a list of named attributes, fetching captured
+  // inputs from a context argument.
   //
   // If `use_inter_op_parallelism` is false, the runtime may use an executor
   // that is optimized for small functions.
-  static Status Create(const NameAttrList& func,
-                       std::vector<Tensor> captured_inputs,
-                       bool use_inter_op_parallelism,
-                       std::unique_ptr<CapturedFunction>* out_function);
-
-  // Creates a new instance using a list of named attributes, fetching captured
-  // inputs from a context argument.
   static Status Create(const NameAttrList& func, OpKernelContext* ctx,
-                       const string& argument,
+                       const string& argument, bool use_inter_op_parallelism,
                        std::unique_ptr<CapturedFunction>* out_function);
 
   ~CapturedFunction();
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index bf0aecaf3c..19c35f94a6 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -37,14 +37,6 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
     FunctionLibraryRuntime::Handle pred_handle;
     OP_REQUIRES_OK(ctx,
                    ctx->function_library()->Instantiate(
@@ -61,9 +53,10 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
     Node* ret_node = pred_body->ret_nodes[0];
     Node* ret_input_node;
     OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node));
+
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            func_, std::move(other_arguments), &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                                 &captured_func));
 
     if (ret_input_node->def().op() == "_Arg") {
       int32 index = -1;
diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
index e3c45ef86c..2fada22a21 100644
--- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
@@ -39,18 +39,9 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            func_, std::move(other_arguments), &captured_func));
-
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                                 &captured_func));
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
                           output_types_, output_shapes_);
   }
diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc
index ac5cc1b2c1..71a36314a0 100644
--- a/tensorflow/core/kernels/data/generator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/generator_dataset_op.cc
@@ -145,44 +145,18 @@ GeneratorDatasetOp::GeneratorDatasetOp(OpKernelConstruction* ctx)
 
 void GeneratorDatasetOp::MakeDataset(OpKernelContext* ctx,
                                      DatasetBase** output) {
-  OpInputList init_func_other_args_input;
-  OP_REQUIRES_OK(ctx, ctx->input_list("init_func_other_args",
-                                      &init_func_other_args_input));
-  std::vector<Tensor> init_func_other_args;
-  init_func_other_args.reserve(init_func_other_args_input.size());
-  for (const Tensor& t : init_func_other_args_input) {
-    init_func_other_args.push_back(t);
-  }
   std::unique_ptr<CapturedFunction> init_func;
-  OP_REQUIRES_OK(
-      ctx, CapturedFunction::Create(init_func_, std::move(init_func_other_args),
-                                    &init_func));
-
-  OpInputList next_func_other_args_input;
-  OP_REQUIRES_OK(ctx, ctx->input_list("next_func_other_args",
-                                      &next_func_other_args_input));
-  std::vector<Tensor> next_func_other_args;
-  next_func_other_args.reserve(next_func_other_args_input.size());
-  for (const Tensor& t : next_func_other_args_input) {
-    next_func_other_args.push_back(t);
-  }
+  OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                          init_func_, ctx, "init_func_other_args", &init_func));
+
   std::unique_ptr<CapturedFunction> next_func;
-  OP_REQUIRES_OK(
-      ctx, CapturedFunction::Create(next_func_, std::move(next_func_other_args),
-                                    &next_func));
-
-  OpInputList finalize_func_other_args_input;
-  OP_REQUIRES_OK(ctx, ctx->input_list("finalize_func_other_args",
-                                      &finalize_func_other_args_input));
-  std::vector<Tensor> finalize_func_other_args;
-  finalize_func_other_args.reserve(finalize_func_other_args_input.size());
-  for (const Tensor& t : finalize_func_other_args_input) {
-    finalize_func_other_args.push_back(t);
-  }
-  std::unique_ptr<CapturedFunction> finalize_func;
   OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                          finalize_func_, std::move(finalize_func_other_args),
-                          &finalize_func));
+                          next_func_, ctx, "next_func_other_args", &next_func));
+
+  std::unique_ptr<CapturedFunction> finalize_func;
+  OP_REQUIRES_OK(ctx, CapturedFunction::Create(finalize_func_, ctx,
+                                               "finalize_func_other_args",
+                                               &finalize_func));
 
   *output =
       new Dataset(ctx, std::move(init_func), std::move(next_func),
diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
index e4fa557598..8b417bb1c2 100644
--- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
@@ -42,50 +42,19 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    // Get captured inputs for the key, reduce, and window_size functions.
-    OpInputList key_func_other_argument_inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("key_func_other_arguments",
-                                        &key_func_other_argument_inputs));
-    std::vector<Tensor> key_func_other_arguments;
-    key_func_other_arguments.reserve(key_func_other_argument_inputs.size());
-    for (const Tensor& t : key_func_other_argument_inputs) {
-      key_func_other_arguments.push_back(t);
-    }
-    OpInputList reduce_func_other_argument_inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("reduce_func_other_arguments",
-                                        &reduce_func_other_argument_inputs));
-    std::vector<Tensor> reduce_func_other_arguments;
-    reduce_func_other_arguments.reserve(
-        reduce_func_other_argument_inputs.size());
-    for (const Tensor& t : reduce_func_other_argument_inputs) {
-      reduce_func_other_arguments.push_back(t);
-    }
-    OpInputList window_size_func_other_argument_inputs;
-    OP_REQUIRES_OK(ctx,
-                   ctx->input_list("window_size_func_other_arguments",
-                                   &window_size_func_other_argument_inputs));
-    std::vector<Tensor> window_size_func_other_arguments;
-    window_size_func_other_arguments.reserve(
-        window_size_func_other_argument_inputs.size());
-    for (const Tensor& t : window_size_func_other_argument_inputs) {
-      window_size_func_other_arguments.push_back(t);
-    }
-    // TODO(mrry): Refactor CapturedFunction to share the runtime
-    // state between multiple functions?
     std::unique_ptr<CapturedFunction> captured_key_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            key_func_, std::move(key_func_other_arguments),
-                            &captured_key_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(key_func_, ctx,
+                                                 "key_func_other_arguments",
+                                                 &captured_key_func));
     std::unique_ptr<CapturedFunction> captured_reduce_func;
-    OP_REQUIRES_OK(
-        ctx, CapturedFunction::Create(reduce_func_,
-                                      std::move(reduce_func_other_arguments),
-                                      &captured_reduce_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(reduce_func_, ctx,
+                                                 "reduce_func_other_arguments",
+                                                 &captured_reduce_func));
     std::unique_ptr<CapturedFunction> captured_window_size_func;
-    OP_REQUIRES_OK(
-        ctx, CapturedFunction::Create(
-                 window_size_func_, std::move(window_size_func_other_arguments),
-                 &captured_window_size_func));
+    OP_REQUIRES_OK(ctx,
+                   CapturedFunction::Create(window_size_func_, ctx,
+                                            "window_size_func_other_arguments",
+                                            &captured_window_size_func));
 
     *output = new Dataset(
         ctx, input, key_func_, reduce_func_, window_size_func_,
diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc
index 0768f46665..0aa802b874 100644
--- a/tensorflow/core/kernels/data/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc
@@ -39,14 +39,6 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
     const Tensor* cycle_length_t;
     OP_REQUIRES_OK(ctx, ctx->input("cycle_length", &cycle_length_t));
     OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(cycle_length_t->shape()),
@@ -66,8 +58,8 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
         errors::InvalidArgument("block_length must be greater than zero."));
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            func_, std::move(other_arguments), &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                                 &captured_func));
 
     *output =
         new Dataset(ctx, input, func_, std::move(captured_func), cycle_length,
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 80efac5d4b..83896219a3 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -49,14 +49,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  protected:
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
     int64 batch_size;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "batch_size", &batch_size));
     OP_REQUIRES(
@@ -93,8 +85,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                    ParseScalarArgument(ctx, "drop_remainder", &drop_remainder));
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            func_, std::move(other_arguments), &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                                 &captured_func));
 
     *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
                           drop_remainder, output_types_, output_shapes_, func_,
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index af301e2b42..f112e1dc43 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -38,18 +38,10 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            func_, std::move(other_arguments),
-                            use_inter_op_parallelism_, &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                                 use_inter_op_parallelism_,
+                                                 &captured_func));
 
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
                           output_types_, output_shapes_);
diff --git a/tensorflow/core/kernels/data/optional_ops.cc b/tensorflow/core/kernels/data/optional_ops.cc
index 6180df5af2..346e4ceebd 100644
--- a/tensorflow/core/kernels/data/optional_ops.cc
+++ b/tensorflow/core/kernels/data/optional_ops.cc
@@ -108,11 +108,8 @@ class OptionalFromValueOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     OpInputList components_input;
     OP_REQUIRES_OK(ctx, ctx->input_list("components", &components_input));
-    std::vector<Tensor> components;
-    components.reserve(components_input.size());
-    for (const Tensor& component_t : components_input) {
-      components.push_back(component_t);
-    }
+    std::vector<Tensor> components(components_input.begin(),
+                                   components_input.end());
     OP_REQUIRES_OK(
         ctx, WriteOptionalWithValueToOutput(ctx, 0, std::move(components)));
   }
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 2f2db09508..9cd46bf5dd 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -44,14 +44,6 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
     int64 cycle_length = 0;
     OP_REQUIRES_OK(ctx,
                    ParseScalarArgument(ctx, "cycle_length", &cycle_length));
@@ -83,8 +75,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(
-        ctx, CapturedFunction::Create(
-                 interleave_func_, std::move(other_arguments), &captured_func));
+        ctx, CapturedFunction::Create(interleave_func_, ctx, "other_arguments",
+                                      &captured_func));
 
     *output =
         new Dataset(ctx, input, interleave_func_, std::move(captured_func),
@@ -1102,9 +1094,6 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-
     int64 cycle_length = 0;
     OP_REQUIRES_OK(ctx,
                    ParseScalarArgument(ctx, "cycle_length", &cycle_length));
@@ -1128,16 +1117,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         errors::InvalidArgument(
             "num_parallel_calls must less than or equal to cycle_length."));
 
-    // TODO(b/114267189): Use `other_arguments(inputs.begin(), inputs.end());`.
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(
-        ctx, CapturedFunction::Create(
-                 interleave_func_, std::move(other_arguments), &captured_func));
+        ctx, CapturedFunction::Create(interleave_func_, ctx, "other_arguments",
+                                      &captured_func));
 
     *output = new Dataset(ctx, input, interleave_func_,
                           std::move(captured_func), cycle_length, block_length,
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index b584316d69..6abe6c8338 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -44,14 +44,6 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
  protected:
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
     int32 num_parallel_calls;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                             &num_parallel_calls));
@@ -60,9 +52,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
                     "num_parallel_calls must be greater than zero."));
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            func_, std::move(other_arguments),
-                            use_inter_op_parallelism_, &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                                 use_inter_op_parallelism_,
+                                                 &captured_func));
 
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
                           output_shapes_, use_inter_op_parallelism_,
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 0cf5db017b..c28c06da62 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -87,11 +87,8 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
                     "Expected len(dense_defaults) == len(dense_keys) but got: ",
                     dense_default_tensors.size(), " vs. ", dense_keys_.size()));
 
-    std::vector<Tensor> dense_defaults;
-    dense_defaults.reserve(dense_default_tensors.size());
-    for (const Tensor& dense_default_t : dense_default_tensors) {
-      dense_defaults.push_back(dense_default_t);
-    }
+    std::vector<Tensor> dense_defaults(dense_default_tensors.begin(),
+                                       dense_default_tensors.end());
 
     for (int d = 0; d < dense_keys_.size(); ++d) {
       const Tensor& def_value = dense_defaults[d];
diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc
index 6e515d6cc8..dbe31f37b8 100644
--- a/tensorflow/core/kernels/data/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/scan_dataset_op.cc
@@ -45,23 +45,12 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
     OpInputList initial_state_inputs;
     OP_REQUIRES_OK(ctx,
                    ctx->input_list("initial_state", &initial_state_inputs));
-    std::vector<Tensor> initial_state;
-    initial_state.reserve(initial_state_inputs.size());
-    for (const Tensor& t : initial_state_inputs) {
-      initial_state.push_back(t);
-    }
-
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
+    std::vector<Tensor> initial_state(initial_state_inputs.begin(),
+                                      initial_state_inputs.end());
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            func_, std::move(other_arguments), &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                                 &captured_func));
 
     *output = new Dataset(ctx, input, func_, std::move(initial_state),
                           std::move(captured_func), state_types_, output_types_,
diff --git a/tensorflow/core/kernels/data/tensor_dataset_op.cc b/tensorflow/core/kernels/data/tensor_dataset_op.cc
index e1cefd23d8..ca4ea25b89 100644
--- a/tensorflow/core/kernels/data/tensor_dataset_op.cc
+++ b/tensorflow/core/kernels/data/tensor_dataset_op.cc
@@ -33,11 +33,7 @@ class TensorDatasetOp : public DatasetOpKernel {
     OP_REQUIRES_OK(ctx, ctx->input_list("components", &inputs));
     // TODO(mrry): Validate that the shapes of the "components" tensors match
     // the "shapes" attr.;
-    std::vector<Tensor> components;
-    components.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      components.push_back(t);
-    }
+    std::vector<Tensor> components(inputs.begin(), inputs.end());
     *output = new Dataset(ctx, std::move(components));
   }
 
-- 
GitLab


From 6e8293f1cdf2efe3cec2efdcfa89174893b0bace Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 16:51:19 -0700
Subject: [PATCH 0300/1357] Increase test timeout for
 dnn_tree_combined_estimator_test to de-flake.

PiperOrigin-RevId: 213363558
---
 tensorflow/contrib/boosted_trees/estimator_batch/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
index 5fcb19a47a..14b6fc4ac2 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
@@ -173,6 +173,7 @@ py_library(
 py_test(
     name = "dnn_tree_combined_estimator_test",
     size = "medium",
+    timeout = "long",
     srcs = ["dnn_tree_combined_estimator_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-- 
GitLab


From 928389d4d61f0cb5932672aeeafadb1c18514dd3 Mon Sep 17 00:00:00 2001
From: Eddie Zhou <eddz@google.com>
Date: Mon, 17 Sep 2018 17:06:11 -0700
Subject: [PATCH 0301/1357] Fixed bug where a mixture of Variable and
 PartitionedVariable would break SDCA.  Added new test that fails with
 `IndexError: list index out of range` in `_get_partitioned_update_ops`
 without the corresponding fix.

Note that the effect of this bug is minimal, because for Estimator users, it only applies to sparse features that are not partitionable (e.g. [1,]), since all variables are created with the same partitioner in Estimator).

PiperOrigin-RevId: 213365956
---
 .../python/kernel_tests/sdca_ops_test.py      | 62 +++++++++++++++++++
 .../linear_optimizer/python/ops/sdca_ops.py   | 26 ++++----
 2 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
index 7a1914d41f..9ecf023e03 100644
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -323,6 +323,68 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         self.assertAllClose(
             0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
 
+  def testSomePartitionedPrimals(self):
+    # Setup test data
+    example_protos = [
+        make_example_proto({
+            'age': [0],
+            'gender': [0]
+        }, 0),
+        make_example_proto({
+            'age': [0],
+            'gender': [1]
+        }, 1),
+    ]
+    example_weights = [1.0, 1.0]
+    for num_shards in _SHARD_NUMBERS:
+      with self._single_threaded_test_session():
+        examples = make_example_dict(example_protos, example_weights)
+        # Explicitly make age a [1]-shaped Variable (which cannot be
+        # partitioned), while making gender a PartitionedVariable.
+        age_weights = variables_lib.Variable(
+            array_ops.zeros([1], dtype=dtypes.float32))
+        with variable_scope.variable_scope(
+            name_or_scope=('variables/shard_{}'.format(num_shards)
+                           if num_shards else 'variables'),
+            partitioner=partitioned_variables.fixed_size_partitioner(
+                num_shards=2, axis=0)):
+          gender_weights = variable_scope.get_variable(
+              name='gender',
+              initializer=array_ops.zeros([2], dtype=dtypes.float32))
+        variables = dict(
+            sparse_features_weights=[age_weights, gender_weights],
+            dense_features_weights=[])
+        options = dict(
+            symmetric_l2_regularization=1,
+            symmetric_l1_regularization=0,
+            num_table_shards=num_shards,
+            loss_type='logistic_loss')
+
+        lr = SdcaModel(examples, variables, options)
+        variables_lib.global_variables_initializer().run()
+        unregularized_loss = lr.unregularized_loss(examples)
+        loss = lr.regularized_loss(examples)
+        predictions = lr.predictions(examples)
+        self.assertAllClose(0.693147, unregularized_loss.eval())
+        self.assertAllClose(0.693147, loss.eval())
+        train_op = lr.minimize()
+        for _ in range(_MAX_ITERATIONS):
+          train_op.run()
+        lr.update_weights(train_op).run()
+        # The high tolerance in unregularized_loss comparisons is due to the
+        # fact that it's possible to trade off unregularized_loss vs.
+        # regularization and still have a sum that is quite close to the
+        # optimal regularized_loss value.  SDCA's duality gap only ensures that
+        # the regularized_loss is within 0.01 of optimal.
+        # 0.525457 is the optimal regularized_loss.
+        # 0.593014 is the unregularized_loss at that optimum.
+        self.assertAllClose(0.512591, unregularized_loss.eval(), atol=0.05)
+        self.assertAllClose(0.593014, loss.eval(), atol=0.01)
+        predicted_labels = get_binary_predictions_for_logistic(predictions)
+        self.assertAllEqual([0, 1], predicted_labels.eval())
+        self.assertAllClose(
+            0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
+
   def testSparseRandom(self):
     dim = 20
     num_examples = 1000
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 14f59a3f64..b98adf862b 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -400,14 +400,16 @@ class SdcaModel(object):
 
       sparse_weights = []
       sparse_indices = []
-      # If we have partitioned variables, keep a few lists of Tensors around
-      # that we need for the assign_add after the op call to
-      # gen_sdca_ops.sdca_optimizer().
-      num_partitions_by_var = []
-      p_assignments_by_var = []
-      gather_ids_by_var = []
-      for w, i in zip(self._slots['unshrinked_sparse_features_weights'],
-                      sparse_feature_indices):
+      # If we have partitioned variables, keep a few dictionaries of Tensors
+      # around that we need for the assign_add after the op call to
+      # gen_sdca_ops.sdca_optimizer().  These are keyed because we may have a
+      # mix of partitioned and un-partitioned variables.
+      num_partitions_by_var = {}
+      p_assignments_by_var = {}
+      gather_ids_by_var = {}
+      for v_num, (w, i) in enumerate(
+          zip(self._slots['unshrinked_sparse_features_weights'],
+              sparse_feature_indices)):
         # Append the sparse_indices (in full-variable space).
         sparse_idx = math_ops.cast(
             array_ops.unique(math_ops.cast(i, dtypes.int32))[0],
@@ -456,10 +458,10 @@ class SdcaModel(object):
           gather_ids = data_flow_ops.dynamic_partition(new_ids,
                                                        p_assignments,
                                                        num_partitions)
-          # Append these to the lists for use in the later update.
-          num_partitions_by_var.append(num_partitions)
-          p_assignments_by_var.append(p_assignments)
-          gather_ids_by_var.append(gather_ids)
+          # Add these into the dictionaries for use in the later update.
+          num_partitions_by_var[v_num] = num_partitions
+          p_assignments_by_var[v_num] = p_assignments
+          gather_ids_by_var[v_num] = gather_ids
 
           # Gather the weights from each partition.
           partition_gathered_weights = []
-- 
GitLab


From 6d9bb99ea7a697e465ef66dea821a86ca94f845d Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Mon, 17 Sep 2018 17:22:40 -0700
Subject: [PATCH 0302/1357] Addressing review comments: indentation

---
 tensorflow/core/common_runtime/mkl_cpu_allocator.h  | 4 +---
 tensorflow/core/kernels/partitioned_function_ops.cc | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 593f855ea2..01e5af5f8c 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -277,9 +277,7 @@ class MklCPUAllocator : public VisitableAllocator {
     // max_alloc_size from large_size_allocator would be the maximum
     // size allocated by MklCPUAllocator.
     stats->max_alloc_size = l_stats.max_alloc_size;
-
-    stats->bytes_limit =
-        std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+    stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
   }
 
   void ClearStats() override {
diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index ddb621967a..42f99a73e6 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -100,8 +100,8 @@ class PartitionedCallOp : public AsyncOpKernel {
         // We need to pass global op_registry as default_registry when creating
         // graph. So that graph optimization passes can lookup all possible ops
         // by name.
-        FunctionLibraryDefinition func_lib_def(OpRegistry::Global(),
-                                            fbody->graph->flib_def().ToProto());
+        FunctionLibraryDefinition func_lib_def(
+            OpRegistry::Global(), fbody->graph->flib_def().ToProto());
         auto graph = tensorflow::MakeUnique<Graph>(func_lib_def);
         CopyGraph(*fbody->graph, graph.get());
         OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done);
@@ -256,7 +256,7 @@ class PartitionedCallOp : public AsyncOpKernel {
             << partitions.size() << " shards.";
 
     FunctionLibraryDefinition func_lib_def(OpRegistry::Global(),
-                                          graph->flib_def().ToProto());
+                                           graph->flib_def().ToProto());
     for (const auto& partition : partitions) {
       std::unique_ptr<Graph> subgraph(new Graph(func_lib_def));
       GraphConstructorOptions opts;
-- 
GitLab


From caf40776971791d00c7dd14057125ed5dd7346d5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 17:20:42 -0700
Subject: [PATCH 0303/1357] Remove unnecessary side-effect test, since HLO
 liveness now reports correct liveness information if a control flow
 computation contains side effect instructions.

PiperOrigin-RevId: 213367995
---
 tensorflow/compiler/xla/service/hlo_module_dce.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module_dce.cc b/tensorflow/compiler/xla/service/hlo_module_dce.cc
index f7be5cae22..31d26cc51e 100644
--- a/tensorflow/compiler/xla/service/hlo_module_dce.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_dce.cc
@@ -50,9 +50,7 @@ StatusOr<bool> RunWhileDCE(HloModule* module, HloLivenessAnalysis* liveness) {
       auto* while_body_root = while_body_comp->root_instruction();
 
       if (!ShapeUtil::IsTuple(xla_while->shape()) ||
-          while_body_root->opcode() != HloOpcode::kTuple ||
-          while_body_comp->HasSideEffect() ||
-          xla_while->while_condition()->HasSideEffect()) {
+          while_body_root->opcode() != HloOpcode::kTuple) {
         // Only run DCE on tuple-shaped while loops where body root is Tuple,
         // with no I/O instructions.
         VLOG(1) << "WhileDCE SKIP while: " << xla_while->ToString();
-- 
GitLab


From 4338803b98cd825b0b1d810bcc51c9a79734feb6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 17:26:09 -0700
Subject: [PATCH 0304/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 213368723
---
 tensorflow/core/ops/ops.pbtxt | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 190f6aaa5b..4ece1c8953 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -36199,9 +36199,21 @@ op {
     type: DT_VARIANT
   }
   input_arg {
-    name: "window_size"
+    name: "size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "shift"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "stride"
     type: DT_INT64
   }
+  input_arg {
+    name: "drop_remainder"
+    type: DT_BOOL
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
-- 
GitLab


From 185aa89912376d4088c22615908696cd30f9951b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 17:49:36 -0700
Subject: [PATCH 0305/1357] Eliminate VisitableAllocator.

The visitor pattern is used to allow pre-registration of memory for
DMA access, e.g. for fast GPU/CPU i/o and for RDMA networking.  The
VisitableAllocator interface was introduced to support this use some
time ago, prior to SubAllocators. Memory registration works best if
it's done infrequently, on large pieces of memory, rather than on
every piece that's dynamically allocated/freed.  This usage pattern
fits the SubAllocator better than a general Allocator.  This change
moves memory allocation visitor access to SubAllocator and eliminates
the VisitableAllocator subclass of Allocator.

This change also more rigorously enforces the requirement that all
Visitors be declared prior to memory allocation begining.  This is
accomplished by requiring that Visitors be provided to the SubAllocator
constructor.

This refactoring will ease an upcoming CL introducing
NUMA specific CPU devices.  It also should fix some performance
pitfalls (e.g. accidental use of PoolAllocator) introduced by an
earlier refactoring of ProcessState that was also in preparation for
NUMA.  It restores the default use of the cpu_allocator() value (i.e.
no SubAllocator) by model executions that don't use allocation
visitors (since visitor registration must precede the first allocation,
hence can be detected at that time).

PiperOrigin-RevId: 213371553
---
 tensorflow/contrib/gdr/gdr_memory_manager.cc  | 102 +++++------
 tensorflow/contrib/verbs/rdma_mgr.cc          |  81 +++------
 tensorflow/contrib/verbs/rdma_mgr.h           |   1 +
 tensorflow/contrib/verbs/verbs_server_lib.cc  |   5 +
 tensorflow/core/BUILD                         |   1 -
 .../core/common_runtime/bfc_allocator.cc      |  21 +--
 .../core/common_runtime/bfc_allocator.h       |  14 +-
 .../common_runtime/gpu/cuda_host_allocator.h  |  12 +-
 .../common_runtime/gpu/gpu_bfc_allocator.cc   |  17 +-
 .../common_runtime/gpu/gpu_bfc_allocator.h    |  44 +++--
 .../gpu/gpu_bfc_allocator_test.cc             |  90 ++++++++--
 .../gpu/gpu_cudamalloc_allocator.cc           |  10 +-
 .../gpu/gpu_cudamalloc_allocator.h            |  11 +-
 .../common_runtime/gpu/gpu_debug_allocator.cc |  20 +--
 .../common_runtime/gpu/gpu_debug_allocator.h  |  20 +--
 .../gpu/gpu_debug_allocator_test.cc           |  35 +++-
 .../core/common_runtime/gpu/gpu_device.cc     |  64 ++++---
 .../core/common_runtime/gpu/gpu_device.h      |   9 +-
 .../common_runtime/gpu/gpu_process_state.cc   | 161 +++++++++++-------
 .../common_runtime/gpu/gpu_process_state.h    |  58 ++++---
 .../common_runtime/gpu/pool_allocator_test.cc |  68 ++++++--
 .../core/common_runtime/mkl_cpu_allocator.h   |  50 +-----
 .../core/common_runtime/pool_allocator.cc     |  45 ++---
 .../core/common_runtime/pool_allocator.h      |  27 +--
 .../core/common_runtime/process_state.cc      |  71 ++++++--
 .../core/common_runtime/process_state.h       |  15 +-
 .../core/common_runtime/renamed_device.h      |   7 +-
 .../core/common_runtime/visitable_allocator.h |  79 ---------
 tensorflow/core/framework/allocator.cc        |  20 ++-
 tensorflow/core/framework/allocator.h         |  28 ++-
 tensorflow/core/framework/device_base.h       |  10 +-
 tensorflow/core/framework/op_kernel.cc        |   9 +-
 32 files changed, 628 insertions(+), 577 deletions(-)
 delete mode 100644 tensorflow/core/common_runtime/visitable_allocator.h

diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index 726f74c7b7..bb06f1c41c 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -138,6 +138,8 @@ class GdrMemoryManager : public RemoteMemoryManager {
       Device* device, DeviceContext* device_context, bool on_host,
       StatusCallback done) override;
 
+  static void RegMemVisitors();
+
  protected:
   Status CreateEndpoint(const string& host, const string& port,
                         RdmaEndpointPtr& endpoint);
@@ -183,35 +185,51 @@ class GdrMemoryManager : public RemoteMemoryManager {
   TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager);
 };
 
-// TODO(byronyi): remove this class and its registration when the default
-// cpu_allocator() returns visitable allocator, or cpu_allocator() is no
-// longer in use.
-class BFCGdrAllocator : public BFCAllocator {
- public:
-  BFCGdrAllocator()
-      : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36,
-                     true, "cpu_gdr_bfc") {}
-};
-class BFCGdrAllocatorFactory : public AllocatorFactory {
- public:
-  Allocator* CreateAllocator() override { return new BFCGdrAllocator; }
-
-  virtual SubAllocator* CreateSubAllocator(int numa_node) {
-    return new BasicCPUAllocator(numa_node);
-  }
-};
-
-REGISTER_MEM_ALLOCATOR("BFCGdrAllocator", 102, BFCGdrAllocatorFactory);
-
 GdrMemoryManager::GdrMemoryManager(const string& host, const string& port)
     : host_(host),
       port_(port),
       listening_(nullptr, EndpointDeleter),
       stopped_(true),
-      next_key_(0) {}
+      next_key_(0) {
+  static std::once_flag flag;
+  std::call_once(flag, []() { RegMemVisitors(); });
+}
 
 GdrMemoryManager::~GdrMemoryManager() { close(epfd_); }
 
+/*static*/ void GdrMemoryManager::RegMemVisitors() {
+  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
+                                           size_t num_bytes) {
+    GdrMemoryManager::Singleton().InsertMemoryRegion(
+        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
+  };
+  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
+                                          size_t num_bytes) {
+    GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes);
+  };
+  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
+  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
+
+#if GOOGLE_CUDA
+  if (IsGDRAvailable()) {
+    int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
+
+    // Note we don't free allocated GPU memory so there is no free visitor
+    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
+                                                  size_t num_bytes) {
+      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
+          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
+    };
+    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
+                                                     cuda_alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
+                                                          alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
+    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
+  }
+#endif  // GOOGLE_CUDA
+}
+
 Status GdrMemoryManager::Init() {
   epfd_ = epoll_create1(0);
   if (epfd_ == -1) {
@@ -271,48 +289,6 @@ Status GdrMemoryManager::Init() {
                                "cannot add server to epoll");
   }
 
-  Allocator* allocators[] = {
-#if GOOGLE_CUDA
-    GPUProcessState::singleton()->GetCUDAHostAllocator(0),
-#endif  // GOOGLE_CUDA
-    ProcessState::singleton()->GetCPUAllocator(0),
-    cpu_allocator(),
-  };
-
-  using namespace std::placeholders;
-  VisitableAllocator::Visitor alloc_visitor =
-      std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2);
-  VisitableAllocator::Visitor free_visitor =
-      std::bind(&GdrMemoryManager::EvictMemoryRegion, this, _1, _2);
-
-  std::set<Allocator*> instrumented_;
-
-  // Host memory allocators
-  for (Allocator* allocator : allocators) {
-    auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
-    CHECK(visitable_allocator)
-        << "is not visitable for instrumentation" << allocator->Name();
-    // Make sure we don't instrument the same allocator twice
-    if (instrumented_.find(allocator) == std::end(instrumented_)) {
-      visitable_allocator->AddAllocVisitor(alloc_visitor);
-      visitable_allocator->AddFreeVisitor(free_visitor);
-      instrumented_.insert(allocator);
-      LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name();
-    }
-  }
-
-#if GOOGLE_CUDA
-  VisitableAllocator::Visitor cuda_alloc_visitor =
-      std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2);
-  if (IsGDRAvailable()) {
-    // Note we don't free allocated GPU memory so there is no free visitor
-    int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1;
-    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
-                                                     cuda_alloc_visitor);
-    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
-  }
-#endif  // GOOGLE_CUDA
-
   return Status::OK();
 }
 
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 3cb5e61fac..2784bf124c 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/contrib/verbs/grpc_verbs_client.h"
 #include "tensorflow/contrib/verbs/verbs_service.pb.h"
-#include "tensorflow/core/common_runtime/bfc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/pool_allocator.h"
@@ -29,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/session_mgr.h"
 #include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
 
@@ -256,74 +256,41 @@ void MRDeleter(ibv_mr* mr) {
   }
 }
 
-// TODO(byronyi): remove this class and its registration when the default
-// cpu_allocator() returns visitable allocator, or cpu_allocator() is no
-// longer in use.
-class BFCRdmaAllocator : public BFCAllocator {
- public:
-  BFCRdmaAllocator()
-      : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36,
-                     true, "cpu_rdma_bfc") {}
-};
-class BFCRdmaAllocatorFactory : public AllocatorFactory {
- public:
-  Allocator* CreateAllocator() { return new BFCRdmaAllocator; }
-
-  SubAllocator* CreateSubAllocator(int numa_node) {
-    return new BasicCPUAllocator(numa_node);
-  }
-};
-
-REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
-
 void RdmaMgr::InitAllocators() {
-  RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_;
+  static std::once_flag flag;
+  std::call_once(
+      flag, [this]() { RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; });
+}
 
-  Allocator* allocators[] = {
-#if GOOGLE_CUDA
-    GPUProcessState::singleton()->GetCUDAHostAllocator(0),
-#endif  // GOOGLE_CUDA
-    ProcessState::singleton()->GetCPUAllocator(0),
-    cpu_allocator(),
+/*static*/ void RdmaMgr::RegMemVisitors() {
+  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
+                                           size_t num_bytes) {
+    RdmaMemoryMgr::Singleton().InsertMemoryRegion(
+        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
+  };
+  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
+                                          size_t num_bytes) {
+    RdmaMemoryMgr::Singleton().EvictMemoryRegion(ptr, num_bytes);
   };
 
-  using namespace std::placeholders;
-
-  std::set<Allocator*> instrumented_;
-
-  // Host memory allocators
-  for (Allocator* allocator : allocators) {
-    VisitableAllocator::Visitor alloc_visitor =
-        std::bind(&RdmaMemoryMgr::InsertMemoryRegion,
-                  &RdmaMemoryMgr::Singleton(), _1, _2, allocator->Name());
-    VisitableAllocator::Visitor free_visitor = std::bind(
-        &RdmaMemoryMgr::EvictMemoryRegion, &RdmaMemoryMgr::Singleton(), _1, _2);
-
-    auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
-    CHECK(visitable_allocator)
-        << "is not visitable for instrumentation" << allocator->Name();
-    // Make sure we don't instrument the same allocator twice
-    if (instrumented_.find(allocator) == std::end(instrumented_)) {
-      visitable_allocator->AddAllocVisitor(alloc_visitor);
-      visitable_allocator->AddFreeVisitor(free_visitor);
-      instrumented_.insert(allocator);
-      LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name();
-    }
-  }
+  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
+  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
 
 #if GOOGLE_CUDA
   if (IsGDRAvailable()) {
     // Note we don't free allocated GPU memory so there is no free visitor
     int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
 
-    char buf[8];
-    sprintf(buf, "gpu");
-    VisitableAllocator::Visitor cuda_alloc_visitor =
-        std::bind(&RdmaMemoryMgr::InsertMemoryRegion,
-                  &RdmaMemoryMgr::Singleton(), _1, _2, std::string(buf));
-
+    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
+                                                  size_t num_bytes) {
+      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
+          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
+    };
     GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
                                                      cuda_alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
+                                                          alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
     LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
   }
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index 9fffc335bb..74b92cc9a6 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -39,6 +39,7 @@ class RdmaMgr {
   void SetupChannels();
   bool ConnectivityCheck();
   void InitAllocators();
+  static void RegMemVisitors();
   const string& local_worker() { return local_worker_; }
 
  private:
diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 1a0b5028fe..61469686e4 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -76,8 +76,13 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def,
   return Status::OK();
 }
 
+namespace {
+std::once_call reg_mem_visitors_call;
+}  // namespace
+
 Status VerbsServer::Init(ServiceInitFunction service_func,
                          RendezvousMgrCreationFunction rendezvous_mgr_func) {
+  std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); });
   Status s = GrpcServer::Init(service_func, rendezvous_mgr_func);
   {
     mutex_lock l(mu_);
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d55bd8d7ed..9bcf5b0865 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2783,7 +2783,6 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
     "common_runtime/step_stats_collector.h",
     "common_runtime/threadpool_device.h",
     "common_runtime/tracing_device.h",
-    "common_runtime/visitable_allocator.h",
     "common_runtime/process_state.h",
     "common_runtime/pool_allocator.h",
     "graph/gradients.h",
diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index 84c6285bbe..3843ea9e60 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc
@@ -31,7 +31,7 @@ namespace tensorflow {
 
 BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
                            bool allow_growth, const string& name)
-    : suballocator_(sub_allocator),
+    : sub_allocator_(sub_allocator),
       name_(name),
       free_chunks_list_(kInvalidChunkHandle),
       next_allocation_id_(1) {
@@ -72,7 +72,7 @@ BFCAllocator::~BFCAllocator() {
   VLOG(2) << "Number of regions allocated: "
           << region_manager_.regions().size();
   for (const auto& region : region_manager_.regions()) {
-    suballocator_->Free(region.ptr(), region.memory_size());
+    sub_allocator_->Free(region.ptr(), region.memory_size());
   }
 
   for (BinNum b = 0; b < kNumBins; b++) {
@@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
 
   // Try allocating.
   size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
-  void* mem_addr = suballocator_->Alloc(alignment, bytes);
+  void* mem_addr = sub_allocator_->Alloc(alignment, bytes);
   if (mem_addr == nullptr && !started_backpedal_) {
     // Only backpedal once.
     started_backpedal_ = true;
@@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
     while (mem_addr == nullptr) {
       bytes = RoundedBytes(bytes * kBackpedalFactor);
       if (bytes < rounded_bytes) break;
-      mem_addr = suballocator_->Alloc(alignment, bytes);
+      mem_addr = sub_allocator_->Alloc(alignment, bytes);
     }
   }
 
@@ -158,10 +158,6 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
   // Insert the chunk into the right bin.
   InsertFreeChunkIntoBin(h);
 
-  // Invoke visitors on newly allocated region.
-  for (const auto& visitor : region_visitors_) {
-    visitor(mem_addr, bytes);
-  }
   return true;
 }
 
@@ -490,15 +486,6 @@ void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) {
   InsertFreeChunkIntoBin(coalesced_chunk);
 }
 
-void BFCAllocator::AddAllocVisitor(Visitor visitor) {
-  VLOG(1) << "AddVisitor";
-  mutex_lock l(lock_);
-  region_visitors_.push_back(visitor);
-  for (const auto& region : region_manager_.regions()) {
-    visitor(region.ptr(), region.memory_size());
-  }
-}
-
 bool BFCAllocator::TracksAllocationSizes() { return true; }
 
 size_t BFCAllocator::RequestedSize(const void* ptr) {
diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index 20e1dab1d5..364071e066 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/allocator_retry.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/macros.h"
@@ -42,7 +42,7 @@ namespace tensorflow {
 // coalescing.  One assumption we make is that the process using this
 // allocator owns pretty much all of the memory, and that nearly
 // all requests to allocate memory go through this interface.
-class BFCAllocator : public VisitableAllocator {
+class BFCAllocator : public Allocator {
  public:
   // Takes ownership of sub_allocator.
   BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
@@ -55,11 +55,6 @@ class BFCAllocator : public VisitableAllocator {
                     const AllocationAttributes& allocation_attr) override;
   void DeallocateRaw(void* ptr) override;
 
-  void AddAllocVisitor(Visitor visitor) override;
-
-  // Does nothing, because memory is never freed.
-  void AddFreeVisitor(Visitor visitor) override {}
-
   bool TracksAllocationSizes() override;
 
   size_t RequestedSize(const void* ptr) override;
@@ -423,7 +418,7 @@ class BFCAllocator : public VisitableAllocator {
   // of the available memory.
   bool started_backpedal_ = false;
 
-  std::unique_ptr<SubAllocator> suballocator_;
+  std::unique_ptr<SubAllocator> sub_allocator_;
   string name_;
 
   // Structures mutable after construction
@@ -435,9 +430,6 @@ class BFCAllocator : public VisitableAllocator {
   // Pointer to head of linked list of free Chunks
   ChunkHandle free_chunks_list_ GUARDED_BY(lock_);
 
-  // Called once on each region, ASAP.
-  std::vector<Visitor> region_visitors_ GUARDED_BY(lock_);
-
   // Counter containing the next unique identifier to assign to a
   // newly-created chunk.
   int64 next_allocation_id_ GUARDED_BY(lock_);
diff --git a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
index 636cd43575..6bd29ef775 100644
--- a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
@@ -26,8 +26,12 @@ namespace tensorflow {
 class CUDAHostAllocator : public SubAllocator {
  public:
   // Note: stream_exec cannot be null.
-  explicit CUDAHostAllocator(se::StreamExecutor* stream_exec)
-      : stream_exec_(stream_exec) {
+  explicit CUDAHostAllocator(se::StreamExecutor* stream_exec, int numa_node,
+                             const std::vector<Visitor>& alloc_visitors,
+                             const std::vector<Visitor>& free_visitors)
+      : SubAllocator(alloc_visitors, free_visitors),
+        stream_exec_(stream_exec),
+        numa_node_(numa_node) {
     CHECK(stream_exec_ != nullptr);
   }
   ~CUDAHostAllocator() override {}
@@ -39,19 +43,23 @@ class CUDAHostAllocator : public SubAllocator {
       if (ptr == nullptr) {
         LOG(WARNING) << "could not allocate pinned host memory of size: "
                      << num_bytes;
+        return ptr;
       }
+      VisitAlloc(ptr, numa_node_, num_bytes);
     }
     return ptr;
   }
 
   void Free(void* ptr, size_t num_bytes) override {
     if (ptr != nullptr) {
+      VisitFree(ptr, numa_node_, num_bytes);
       stream_exec_->HostMemoryDeallocate(ptr);
     }
   }
 
  private:
   se::StreamExecutor* stream_exec_;  // not owned, non-null
+  const int numa_node_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(CUDAHostAllocator);
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 2d4c8d0201..44ffce77a1 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -22,18 +22,15 @@ limitations under the License.
 
 namespace tensorflow {
 
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
-                                 const string& name)
-    : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {}
+GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
+                                 size_t total_memory, const string& name)
+    : GPUBFCAllocator(sub_allocator, total_memory, GPUOptions(), name) {}
 
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
+                                 size_t total_memory,
                                  const GPUOptions& gpu_options,
                                  const string& name)
-    : BFCAllocator(
-          new GPUMemAllocator(
-              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
-              gpu_options.per_process_gpu_memory_fraction() > 1.0 ||
-                  gpu_options.experimental().use_unified_memory()),
-          total_memory, gpu_options.allow_growth(), name) {}
+    : BFCAllocator(sub_allocator, total_memory, gpu_options.allow_growth(),
+                   name) {}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index f1cc2eace1..6b6de80734 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -31,28 +31,20 @@ limitations under the License.
 
 namespace tensorflow {
 
-// A GPU memory allocator that implements a 'best-fit with coalescing'
-// algorithm.
-class GPUBFCAllocator : public BFCAllocator {
- public:
-  // 'cuda_gpu_id' refers to the ID of the GPU device within
-  // the process and must reference a valid ID in the process.
-  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
-                  const string& name);
-  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
-                  const GPUOptions& gpu_options, const string& name);
-  virtual ~GPUBFCAllocator() {}
-
-  TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
-};
-
 // Suballocator for GPU memory.
 class GPUMemAllocator : public SubAllocator {
  public:
+  // 'cuda_gpu_id' refers to the ID of the GPU device within
+  // the process and must reference a valid ID in the process.
   // Note: stream_exec cannot be null.
-  explicit GPUMemAllocator(se::StreamExecutor* stream_exec,
-                           bool use_unified_memory)
-      : stream_exec_(stream_exec), use_unified_memory_(use_unified_memory) {
+  explicit GPUMemAllocator(se::StreamExecutor* stream_exec, CudaGpuId gpu_id,
+                           bool use_unified_memory,
+                           const std::vector<Visitor>& alloc_visitors,
+                           const std::vector<Visitor>& free_visitors)
+      : SubAllocator(alloc_visitors, free_visitors),
+        stream_exec_(stream_exec),
+        gpu_id_(gpu_id),
+        use_unified_memory_(use_unified_memory) {
     CHECK(stream_exec_ != nullptr);
   }
   ~GPUMemAllocator() override {}
@@ -65,12 +57,14 @@ class GPUMemAllocator : public SubAllocator {
       } else {
         ptr = stream_exec_->AllocateArray<char>(num_bytes).opaque();
       }
+      VisitAlloc(ptr, gpu_id_.value(), num_bytes);
     }
     return ptr;
   }
 
   void Free(void* ptr, size_t num_bytes) override {
     if (ptr != nullptr) {
+      VisitFree(ptr, gpu_id_.value(), num_bytes);
       if (use_unified_memory_) {
         stream_exec_->UnifiedMemoryDeallocate(ptr);
       } else {
@@ -82,11 +76,25 @@ class GPUMemAllocator : public SubAllocator {
 
  private:
   se::StreamExecutor* stream_exec_;  // not owned, non-null
+  const CudaGpuId gpu_id_;
   const bool use_unified_memory_ = false;
 
   TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator);
 };
 
+// A GPU memory allocator that implements a 'best-fit with coalescing'
+// algorithm.
+class GPUBFCAllocator : public BFCAllocator {
+ public:
+  GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory,
+                  const string& name);
+  GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory,
+                  const GPUOptions& gpu_options, const string& name);
+  ~GPUBFCAllocator() override {}
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_BFC_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 67caeb3495..7112c3afd4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -46,7 +47,11 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
 }
 
 TEST(GPUBFCAllocatorTest, NoDups) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   // Allocate a lot of raw pointers
@@ -75,7 +80,11 @@ TEST(GPUBFCAllocatorTest, NoDups) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   // Allocate 256 raw pointers of sizes between 100 bytes and about
   // a meg
   random::PhiloxRandom philox(123, 17);
@@ -133,7 +142,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
 }
 
 TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   float* first_ptr = a.Allocate<float>(1024);
@@ -168,18 +181,30 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   float* ptr = a.Allocate<float>(0);
   EXPECT_EQ(nullptr, ptr);
 }
 
 TEST(GPUBFCAllocatorTest, TracksSizes) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
@@ -187,8 +212,12 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
 }
 
 TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
   // Configure a 1MiB byte limit
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc");
+  GPUBFCAllocator a(sub_allocator, 1 << 20, "GPU_0_bfc");
 
   float* first_ptr = a.Allocate<float>(1 << 6);
   float* second_ptr = a.Allocate<float>(1 << 20);
@@ -203,7 +232,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
   options.set_allow_growth(true);
 
   // Max of 2GiB, but starts out small.
-  GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1LL << 31, "GPU_0_bfc");
 
   // Allocate 10 raw pointers of sizes between 100 bytes and about
   // 64 megs.
@@ -264,8 +297,15 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
 }
 
 TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
-  GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
-  GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1UL << 60, "GPU_0_bfc");
+  sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator b(sub_allocator, 1UL << 60, "GPU_0_bfc");
   void* amem = a.AllocateRaw(1, 1);
   void* bmem = b.AllocateRaw(1, 1 << 30);
   a.DeallocateRaw(amem);
@@ -273,7 +313,11 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
 }
 
 static void BM_Allocation(int iters) {
-  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<size_t> sizes = {256,        4096,      16384,    524288,
                                512,        1048576,   10485760, 104857600,
@@ -289,7 +333,11 @@ static void BM_Allocation(int iters) {
 BENCHMARK(BM_Allocation);
 
 static void BM_AllocationThreaded(int iters, int num_threads) {
-  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc");
   thread::ThreadPool pool(Env::Default(), "test", num_threads);
   std::atomic_int_fast32_t count(iters);
   mutex done_lock;
@@ -325,7 +373,11 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16);
 // A more complex benchmark that defers deallocation of an object for
 // "delay" allocations.
 static void BM_AllocationDelayed(int iters, int delay) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<int> sizes = {256, 4096, 16384, 4096, 512, 1024, 1024};
   int size_index = 0;
@@ -363,7 +415,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   // only methods inside this class can access private members of BFCAllocator.
 
   void TestBinDebugInfo() {
-    GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+    CudaGpuId cuda_gpu_id(0);
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+        false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
 
     std::vector<void*> initial_ptrs;
     std::vector<size_t> initial_ptrs_allocated_sizes;
@@ -441,7 +497,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   }
 
   void TestLog2FloorNonZeroSlow() {
-    GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
+    CudaGpuId cuda_gpu_id(0);
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+        false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator a(sub_allocator, 1 /* total_memory */, "GPU_0_bfc");
     EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0));
     EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1));
     EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
index 934a57a5fb..8e14f1ea75 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -27,7 +27,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
+GPUcudaMallocAllocator::GPUcudaMallocAllocator(Allocator* allocator,
                                                CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -60,14 +60,6 @@ void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) {
 #endif  // GOOGLE_CUDA
 }
 
-void GPUcudaMallocAllocator::AddAllocVisitor(Visitor visitor) {
-  return base_allocator_->AddAllocVisitor(visitor);
-}
-
-void GPUcudaMallocAllocator::AddFreeVisitor(Visitor visitor) {
-  return base_allocator_->AddFreeVisitor(visitor);
-}
-
 bool GPUcudaMallocAllocator::TracksAllocationSizes() { return false; }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
index 856fdc34b4..3d1d0ef481 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <memory>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
@@ -29,20 +29,17 @@ namespace tensorflow {
 // An allocator that wraps a GPU allocator and adds debugging
 // functionality that verifies that users do not write outside their
 // allocated memory.
-class GPUcudaMallocAllocator : public VisitableAllocator {
+class GPUcudaMallocAllocator : public Allocator {
  public:
-  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator,
-                                  CudaGpuId cuda_gpu_id);
+  explicit GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
   ~GPUcudaMallocAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
-  void AddAllocVisitor(Visitor visitor) override;
-  void AddFreeVisitor(Visitor visitor) override;
   bool TracksAllocationSizes() override;
 
  private:
-  VisitableAllocator* base_allocator_ = nullptr;  // owned
+  Allocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index e4c834b30d..6bad66dcec 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@@ -73,7 +73,7 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
 // -----------------------------------------------------------------------------
 // GPUDebugAllocator
 // -----------------------------------------------------------------------------
-GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator,
+GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator,
                                      CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -111,14 +111,6 @@ void GPUDebugAllocator::DeallocateRaw(void* ptr) {
   base_allocator_->DeallocateRaw(ptr);
 }
 
-void GPUDebugAllocator::AddAllocVisitor(Visitor visitor) {
-  return base_allocator_->AddAllocVisitor(visitor);
-}
-
-void GPUDebugAllocator::AddFreeVisitor(Visitor visitor) {
-  return base_allocator_->AddFreeVisitor(visitor);
-}
-
 bool GPUDebugAllocator::TracksAllocationSizes() { return true; }
 
 size_t GPUDebugAllocator::RequestedSize(const void* ptr) {
@@ -158,7 +150,7 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
 // -----------------------------------------------------------------------------
 // GPUNanResetAllocator
 // -----------------------------------------------------------------------------
-GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator,
+GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator,
                                            CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -200,14 +192,6 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) {
   base_allocator_->DeallocateRaw(ptr);
 }
 
-void GPUNanResetAllocator::AddAllocVisitor(Visitor visitor) {
-  return base_allocator_->AddAllocVisitor(visitor);
-}
-
-void GPUNanResetAllocator::AddFreeVisitor(Visitor visitor) {
-  return base_allocator_->AddFreeVisitor(visitor);
-}
-
 size_t GPUNanResetAllocator::RequestedSize(const void* ptr) {
   return base_allocator_->RequestedSize(ptr);
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 0f9b72040c..0f27ff4384 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <unordered_map>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
@@ -31,16 +31,13 @@ namespace tensorflow {
 // An allocator that wraps a GPU allocator and adds debugging
 // functionality that verifies that users do not write outside their
 // allocated memory.
-class GPUDebugAllocator : public VisitableAllocator {
+class GPUDebugAllocator : public Allocator {
  public:
-  explicit GPUDebugAllocator(VisitableAllocator* allocator,
-                             CudaGpuId cuda_gpu_id);
+  explicit GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
   ~GPUDebugAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
-  void AddAllocVisitor(Visitor visitor) override;
-  void AddFreeVisitor(Visitor visitor) override;
   bool TracksAllocationSizes() override;
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
@@ -53,7 +50,7 @@ class GPUDebugAllocator : public VisitableAllocator {
   bool CheckFooter(void* ptr);
 
  private:
-  VisitableAllocator* base_allocator_ = nullptr;  // owned
+  Allocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
@@ -63,23 +60,20 @@ class GPUDebugAllocator : public VisitableAllocator {
 // An allocator that wraps a GPU allocator and resets the memory on
 // allocation and free to 'NaN', helping to identify cases where the
 // user forgets to initialize the memory.
-class GPUNanResetAllocator : public VisitableAllocator {
+class GPUNanResetAllocator : public Allocator {
  public:
-  explicit GPUNanResetAllocator(VisitableAllocator* allocator,
-                                CudaGpuId cuda_gpu_id);
+  explicit GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
   ~GPUNanResetAllocator() override;
   string Name() override { return "gpu_nan_reset"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
-  void AddAllocVisitor(Visitor visitor) override;
-  void AddFreeVisitor(Visitor visitor) override;
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
   void GetStats(AllocatorStats* stats) override;
   void ClearStats() override;
 
  private:
-  VisitableAllocator* base_allocator_ = nullptr;  // owned
+  Allocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
index 236a0afa0b..98283cd846 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@@ -35,7 +35,10 @@ namespace {
 
 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                       cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
@@ -59,7 +62,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
     EXPECT_DEATH(
         {
           const CudaGpuId cuda_gpu_id(0);
-          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+          GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
+              cuda_gpu_id, false /*use_unified_memory*/, {}, {});
+          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                               cuda_gpu_id);
           auto stream_exec =
               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -92,7 +98,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
     EXPECT_DEATH(
         {
           const CudaGpuId cuda_gpu_id(0);
-          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+          GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
+              cuda_gpu_id, false /*use_unified_memory*/, {}, {});
+          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                               cuda_gpu_id);
           auto stream_exec =
               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -122,7 +131,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
 
 TEST(GPUDebugAllocatorTest, ResetToNan) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUNanResetAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                          cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
@@ -163,8 +175,11 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
   const CudaGpuId cuda_gpu_id(0);
   // NaN reset must be the outer-most allocator.
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                             cuda_gpu_id),
       cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -205,15 +220,21 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
 
 TEST(GPUDebugAllocatorTest, TracksSizes) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                       cuda_gpu_id);
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
   const CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                             cuda_gpu_id),
       cuda_gpu_id);
   float* t1 = a.Allocate<float>(1);
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 2763ac0d4a..50e61b7e00 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -41,7 +41,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/gpu_device_context.h"
 #include "tensorflow/core/common_runtime/local_device.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -285,6 +284,38 @@ BaseGPUDevice::~BaseGPUDevice() {
   for (auto ctx : device_contexts_) ctx->Unref();
 }
 
+// This should be idempotent if already initialized.
+Status BaseGPUDevice::InitScratchBuffers() {
+  mutex_lock l(scratch_init_mutex_);
+  if (scratch_.size() < max_streams_) {
+    for (int i = 0; i < max_streams_; i++) {
+      DCHECK(streams_[i]);
+      if (scratch_.size() > i && scratch_[i]) continue;
+      size_t scratch_buffer_size =
+          Eigen::kCudaScratchSize + sizeof(unsigned int);
+      void* scratch_buffer = gpu_allocator_->AllocateRaw(
+          Allocator::kAllocatorAlignment, scratch_buffer_size);
+      if (scratch_buffer == nullptr) {
+        return errors::FailedPrecondition(
+            "Failed to allocate scratch buffer for device ",
+            tf_gpu_id_.value());
+      }
+      se::DeviceMemory<char> mem(
+          se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size));
+
+      bool ok = executor_->SynchronousMemZero(
+          &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
+      if (!ok) {
+        return errors::FailedPrecondition(
+            "Failed to memcopy into scratch buffer for device ",
+            tf_gpu_id_.value());
+      }
+      scratch_.push_back(static_cast<char*>(scratch_buffer));
+    }
+  }
+  return Status::OK();
+}
+
 Status BaseGPUDevice::Init(const SessionOptions& options) {
   auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_);
   if (!executor_status.status().ok()) {
@@ -303,27 +334,6 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   for (int i = 0; i < max_streams_; i++) {
     streams_.push_back(StreamGroupFactory::Global().GetOrCreate(
         tf_gpu_id_, i, executor_, options.config.gpu_options()));
-
-    size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int);
-    void* scratch_buffer = gpu_allocator_->AllocateRaw(
-        Allocator::kAllocatorAlignment, scratch_buffer_size);
-    if (scratch_buffer == nullptr) {
-      return errors::FailedPrecondition(
-          "Failed to allocate scratch buffer for device ", tf_gpu_id_.value());
-    }
-    scratch_.push_back(static_cast<char*>(scratch_buffer));
-
-    se::DeviceMemory<char> mem(
-        se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size));
-
-    bool ok = executor_->SynchronousMemZero(
-        &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
-    if (!ok) {
-      return errors::FailedPrecondition(
-          "Failed to memcopy into scratch buffer for device ",
-          tf_gpu_id_.value());
-    }
-
     device_contexts_.push_back(new GPUDeviceContext(
         i, streams_.back()->compute, streams_.back()->host_to_device,
         streams_.back()->device_to_host, streams_.back()->device_to_device));
@@ -867,10 +877,11 @@ PerOpGpuDevice* BaseGPUDevice::MakeGpuDevice() {
   return new ConcretePerOpGpuDevice();
 }
 
-void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
-                                          PerOpGpuDevice* device,
-                                          DeviceContext* dc,
-                                          Allocator* allocator) {
+Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
+                                            PerOpGpuDevice* device,
+                                            DeviceContext* dc,
+                                            Allocator* allocator) {
+  TF_RETURN_IF_ERROR(InitScratchBuffers());
   if (dc) {
     const GPUDeviceContext* gpu_dc = static_cast<GPUDeviceContext*>(dc);
     const int stream_id = gpu_dc->stream_id();
@@ -881,6 +892,7 @@ void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
   } else {
     ReinitializeDevice(context, device, 0, allocator);
   }
+  return Status::OK();
 }
 
 Allocator* BaseGPUDevice::GetScopedAllocator(AllocatorAttributes attr,
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 56d03d7a8c..b3eea55758 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -86,8 +86,9 @@ class BaseGPUDevice : public LocalDevice {
   // The caller owns the returned device.
   PerOpGpuDevice* MakeGpuDevice() override;
 
-  void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
-                             DeviceContext* dc, Allocator* allocator) override;
+  Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
+                               DeviceContext* dc,
+                               Allocator* allocator) override;
 
   // Returns the CUDA GPU id of this device within the native driver system;
   // e.g., for CUDA this is the ordinal of the GPU within the system.
@@ -125,6 +126,7 @@ class BaseGPUDevice : public LocalDevice {
   class StreamGroupFactory;
 
   gtl::InlinedVector<StreamGroup*, 4> streams_;
+  mutex scratch_init_mutex_;
   gtl::InlinedVector<char*, 4> scratch_;
   std::vector<GPUDeviceContext*> device_contexts_;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
@@ -135,6 +137,9 @@ class BaseGPUDevice : public LocalDevice {
   std::unique_ptr<EventMgr> em_;
   std::unique_ptr<thread::ThreadPool> thread_pool_;
 
+  // Initialize scractch buffers used by Eigen.
+  Status InitScratchBuffers();
+
   void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device,
                           int stream_id, Allocator* allocator);
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
index b18688174d..9ec740fabe 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
@@ -76,12 +76,16 @@ GPUProcessState::GPUProcessState() : gpu_device_enabled_(false) {
 // This function is defined for debugging problems with the allocators.
 GPUProcessState::~GPUProcessState() {
   CHECK_EQ(this, instance_);
-  for (auto p : gpu_allocators_) {
-    delete p;
-  }
   instance_ = nullptr;
 }
 
+int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) {
+  // Return the NUMA node associated with the GPU's StreamExecutor.
+  se::StreamExecutor* se =
+      GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
+  return se->GetDeviceDescription().numa_node();
+}
+
 Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
                                             TfGpuId tf_gpu_id,
                                             size_t total_bytes) {
@@ -93,13 +97,10 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
 
   if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
     gpu_allocators_.resize(tf_gpu_id.value() + 1);
-    if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
-      gpu_al_.resize(tf_gpu_id.value() + 1);
   }
 
-  if (gpu_allocators_[tf_gpu_id.value()] == nullptr) {
-    VisitableAllocator* gpu_allocator;
-
+  AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()];
+  if (allocator_parts.allocator.get() == nullptr) {
     // Validate allocator types.
     if (!allocator_type.empty() && allocator_type != "BFC") {
       LOG(ERROR) << "Invalid allocator type: " << allocator_type;
@@ -108,8 +109,17 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
 
     CudaGpuId cuda_gpu_id;
     TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
-    gpu_allocator =
-        new GPUBFCAllocator(cuda_gpu_id, total_bytes, options,
+    int bus_id = BusIdForGPU(tf_gpu_id);
+    while (bus_id >= gpu_visitors_.size()) {
+      gpu_visitors_.push_back({});
+    }
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+        (options.per_process_gpu_memory_fraction() > 1.0 ||
+         options.experimental().use_unified_memory()),
+        gpu_visitors_[bus_id], {});
+    Allocator* gpu_allocator =
+        new GPUBFCAllocator(sub_allocator, total_bytes, options,
                             strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
 
     // If true, checks for memory overwrites by writing
@@ -123,34 +133,25 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
       // **WARNING** probably will not work in a multi-gpu scenario
       gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id);
     }
-    gpu_allocators_[tf_gpu_id.value()] = gpu_allocator;
-
-    // If there are any pending AllocVisitors for this bus, add
-    // them now.
-    se::StreamExecutor* se =
-        GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
-    int bus_id = se->GetDeviceDescription().numa_node();
-    if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
-      for (const auto& v : gpu_visitors_[bus_id]) {
-        gpu_allocator->AddAllocVisitor(v);
-      }
-    }
+
+    Allocator* recording_allocator = nullptr;
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
       ProcessState::MemDesc md;
       md.loc = ProcessState::MemDesc::GPU;
       md.dev_index = cuda_gpu_id.value();
       md.gpu_registered = false;
       md.nic_registered = true;
-      if (static_cast<int64>(gpu_al_.size()) <= tf_gpu_id.value()) {
-        gpu_al_.resize(tf_gpu_id.value() + 1);
-      }
-      gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator(
+      recording_allocator = new internal::RecordingAllocator(
           &process_state_->mem_desc_map_, gpu_allocator, md, &mu_);
     }
+    allocator_parts = {std::unique_ptr<Allocator>(gpu_allocator), sub_allocator,
+                       std::unique_ptr<Allocator>(recording_allocator)};
+  }
+  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
+    return allocator_parts.recording_allocator.get();
+  } else {
+    return allocator_parts.allocator.get();
   }
-  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
-    return gpu_al_[tf_gpu_id.value()];
-  return gpu_allocators_[tf_gpu_id.value()];
 #else
   LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda.";
   return nullptr;
@@ -172,11 +173,12 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
     tf_shared_lock lock(mu_);
 
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types &&
-        static_cast<int>(cuda_al_.size()) > 0) {
-      return cuda_al_[0];
+        !cuda_host_allocators_.empty() &&
+        cuda_host_allocators_[0].recording_allocator != nullptr) {
+      return cuda_host_allocators_[0].recording_allocator.get();
     }
     if (static_cast<int>(cuda_host_allocators_.size()) > numa_node) {
-      return cuda_host_allocators_[0];
+      return cuda_host_allocators_[0].allocator.get();
     }
   }
 
@@ -190,7 +192,7 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
   // it knows is valid.
   se::StreamExecutor* se = nullptr;
   for (int i = 0; i < static_cast<int>(gpu_allocators_.size()); ++i) {
-    if (gpu_allocators_[i] != nullptr) {
+    if (gpu_allocators_[i].allocator != nullptr) {
       se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
       break;
     }
@@ -199,6 +201,15 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
   CHECK_NE(nullptr, se);
 
   while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) {
+    while (cuda_host_alloc_visitors_.size() <= numa_node) {
+      cuda_host_alloc_visitors_.push_back({});
+    }
+    while (cuda_host_free_visitors_.size() <= numa_node) {
+      cuda_host_free_visitors_.push_back({});
+    }
+    SubAllocator* sub_allocator = new CUDAHostAllocator(
+        se, numa_node, cuda_host_alloc_visitors_[numa_node],
+        cuda_host_free_visitors_[numa_node]);
     // TODO(zheng-xq): evaluate whether 64GB by default is the best choice.
     int64 cuda_host_mem_limit_in_mb = -1;
     Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB",
@@ -208,62 +219,92 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
       LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message();
     }
     int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20);
-    VisitableAllocator* allocator =
-        new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit,
+    Allocator* allocator =
+        new BFCAllocator(sub_allocator, cuda_host_mem_limit,
                          true /*allow_growth*/, "cuda_host_bfc" /*name*/);
 
-    if (LogMemory::IsEnabled()) {
+    if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) {
       // Wrap the allocator to track allocation ids for better logging
       // at the cost of performance.
-      allocator = new TrackingVisitableAllocator(allocator, true);
+      allocator = new TrackingAllocator(allocator, true);
     }
-    cuda_host_allocators_.push_back(allocator);
+    cuda_host_allocators_.push_back({std::unique_ptr<Allocator>(allocator),
+                                     sub_allocator,
+                                     std::unique_ptr<Allocator>(nullptr)});
+    AllocatorParts& allocator_parts = cuda_host_allocators_.back();
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
       ProcessState::MemDesc md;
       md.loc = ProcessState::MemDesc::CPU;
       md.dev_index = 0;
       md.gpu_registered = true;
       md.nic_registered = false;
-      cuda_al_.push_back(new internal::RecordingAllocator(
-          &process_state_->mem_desc_map_, cuda_host_allocators_.back(), md,
-          &mu_));
+      allocator_parts.recording_allocator.reset(
+          new internal::RecordingAllocator(&process_state_->mem_desc_map_,
+                                           allocator_parts.allocator.get(), md,
+                                           &mu_));
     }
   }
-  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
-    return cuda_al_[0];
-  return cuda_host_allocators_[0];
+  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
+    return cuda_host_allocators_[0].recording_allocator.get();
+  } else {
+    return cuda_host_allocators_[0].allocator.get();
+  }
 }
 
 void GPUProcessState::AddGPUAllocVisitor(int bus_id,
-                                         const AllocVisitor& visitor) {
-  CHECK(process_state_);
+                                         const SubAllocator::Visitor& visitor) {
 #if GOOGLE_CUDA
   mutex_lock lock(mu_);
-  for (int i = 0; i < static_cast<int64>(gpu_allocators_.size()); ++i) {
-    se::StreamExecutor* se =
-        GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
-    if (gpu_allocators_[i] &&
-        (se->GetDeviceDescription().numa_node() + 1) == bus_id) {
-      gpu_allocators_[i]->AddAllocVisitor(visitor);
-    }
-  }
+  CHECK(gpu_allocators_.empty())  // Crash OK
+      << "AddGPUAllocVisitor must be called before "
+         "first call to GetGPUAllocator.";
   while (bus_id >= static_cast<int64>(gpu_visitors_.size())) {
-    gpu_visitors_.push_back(std::vector<AllocVisitor>());
+    gpu_visitors_.push_back(std::vector<SubAllocator::Visitor>());
   }
   gpu_visitors_[bus_id].push_back(visitor);
 #endif  // GOOGLE_CUDA
 }
 
+void GPUProcessState::AddCUDAHostAllocVisitor(
+    int numa_node, const SubAllocator::Visitor& visitor) {
+#if GOOGLE_CUDA
+  mutex_lock lock(mu_);
+  CHECK(cuda_host_allocators_.empty())  // Crash OK
+      << "AddCUDAHostAllocVisitor must be called before "
+         "first call to GetCUDAHostAllocator.";
+  while (numa_node >= static_cast<int64>(cuda_host_alloc_visitors_.size())) {
+    cuda_host_alloc_visitors_.push_back(std::vector<SubAllocator::Visitor>());
+  }
+  cuda_host_alloc_visitors_[numa_node].push_back(visitor);
+#endif  // GOOGLE_CUDA
+}
+
+void GPUProcessState::AddCUDAHostFreeVisitor(
+    int numa_node, const SubAllocator::Visitor& visitor) {
+#if GOOGLE_CUDA
+  mutex_lock lock(mu_);
+  CHECK(cuda_host_allocators_.empty())  // Crash OK
+      << "AddCUDAHostFreeVisitor must be called before "
+         "first call to GetCUDAHostAllocator.";
+  while (numa_node >= static_cast<int64>(cuda_host_free_visitors_.size())) {
+    cuda_host_free_visitors_.push_back(std::vector<SubAllocator::Visitor>());
+  }
+  cuda_host_free_visitors_[numa_node].push_back(visitor);
+#endif  // GOOGLE_CUDA
+}
+
 void GPUProcessState::TestOnlyReset() {
-  process_state_->ProcessState::TestOnlyReset();
+  if (process_state_) {
+    process_state_->ProcessState::TestOnlyReset();
+  }
   {
     mutex_lock lock(mu_);
     gpu_device_enabled_ = false;
+    gpu_allocators_.clear();
     gpu_visitors_.clear();
-    gtl::STLDeleteElements(&gpu_allocators_);
-    gtl::STLDeleteElements(&cuda_host_allocators_);
-    gtl::STLDeleteElements(&gpu_al_);
-    gtl::STLDeleteElements(&cuda_al_);
+    cuda_host_allocators_.clear();
+    cuda_host_alloc_visitors_.clear();
+    cuda_host_free_visitors_.clear();
   }
 }
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
index cb41c3c6bd..43e9a31660 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
@@ -32,7 +32,6 @@ limitations under the License.
 namespace tensorflow {
 
 class Allocator;
-class VisitableAllocator;
 class PoolAllocator;
 
 // Singleton that manages per-process state when GPUs are present.
@@ -72,18 +71,30 @@ class GPUProcessState {
 
   virtual Allocator* GetCUDAHostAllocator(int numa_node);
 
-  // Registers a function to be called once on every new Region
-  // allocated by every GPURegionAllocator proximate to the specified
-  // bus.  The AllocVisitor is provided with a memory pointer and the
-  // size of the area it identifies.  The pointer is not guaranteed to
-  // be valid after the call terminates.  The intention is for this
-  // interface to be used for network device memory registration.
-  // "bus_id" is platform-specific.  On many platforms it
-  // should be 0.  On machines with multiple PCIe buses, it should be
-  // the index of one of the PCIe buses.  If the bus_id is invalid,
-  // results are undefined.
-  typedef std::function<void(void*, size_t)> AllocVisitor;
-  virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor);
+  // Registers a Visitor to be invoked on new chunks of memory allocated by the
+  // SubAllocator of every GPU proximate to the specified bus.  The AllocVisitor
+  // is provided with a memory pointer, a GPU id, and the size of the area it
+  // identifies.  The pointer is not guaranteed to be valid after the call
+  // terminates.  The intention is for this interface to be used for network
+  // device memory registration.  "bus_id" is platform-specific.  On many
+  // platforms it should be 0.  On machines with multiple PCIe buses, it should
+  // be the index of one of the PCIe buses (maybe the NUMA node at which the
+  // PCIe is rooted).  If the bus_id is invalid, results are undefined.
+  virtual void AddGPUAllocVisitor(int bus_id,
+                                  const SubAllocator::Visitor& visitor);
+
+  // Registers a Visitor to be invoked on new chunks of memory allocated by
+  // the SubAllocator of the CUDAHostAllocator for the given numa_node.
+  virtual void AddCUDAHostAllocVisitor(int numa_node,
+                                       const SubAllocator::Visitor& visitor);
+
+  // Registers a Visitor to be invoked on each chunk handed back for freeing to
+  // the SubAllocator of the CUDAHostAllocator for the given numa_node.
+  virtual void AddCUDAHostFreeVisitor(int numa_node,
+                                      const SubAllocator::Visitor& visitor);
+
+  // Returns bus_id for the given GPU id.
+  virtual int BusIdForGPU(TfGpuId tf_gpu_id);
 
  protected:
   GPUProcessState();
@@ -103,16 +114,21 @@ class GPUProcessState {
 
   mutex mu_;
 
-  std::vector<VisitableAllocator*> gpu_allocators_ GUARDED_BY(mu_);
-  std::vector<std::vector<AllocVisitor>> gpu_visitors_ GUARDED_BY(mu_);
-  std::vector<Allocator*> cuda_host_allocators_ GUARDED_BY(mu_);
+  struct AllocatorParts {
+    std::unique_ptr<Allocator> allocator;
+    SubAllocator* sub_allocator;  // owned by allocator
+    std::unique_ptr<Allocator> recording_allocator;
+  };
+  std::vector<AllocatorParts> gpu_allocators_ GUARDED_BY(mu_);
+  std::vector<std::vector<SubAllocator::Visitor>> gpu_visitors_ GUARDED_BY(mu_);
 
-  virtual ~GPUProcessState();
+  std::vector<AllocatorParts> cuda_host_allocators_ GUARDED_BY(mu_);
+  std::vector<std::vector<SubAllocator::Visitor>> cuda_host_alloc_visitors_
+      GUARDED_BY(mu_);
+  std::vector<std::vector<SubAllocator::Visitor>> cuda_host_free_visitors_
+      GUARDED_BY(mu_);
 
-  // Optional RecordingAllocators that wrap the corresponding
-  // Allocators for runtime attribute use analysis.
-  std::vector<Allocator*> gpu_al_ GUARDED_BY(mu_);
-  std::vector<Allocator*> cuda_al_ GUARDED_BY(mu_);
+  virtual ~GPUProcessState();
 
   friend class GPUDeviceTest;
 };
diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
index 583bff2c07..6b2f6547b0 100644
--- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
@@ -31,7 +31,8 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) {
       2 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
 
   EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
@@ -49,7 +50,8 @@ TEST(PoolAllocatorTest, ZeroSizePool) {
       0 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
 
   EXPECT_EQ(0, pool.get_from_pool_count());
@@ -82,7 +84,8 @@ TEST(PoolAllocatorTest, Alignment) {
       0 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
   for (int i = 0; i < 16; ++i) {
     size_t alignment = 1 << i;
@@ -97,8 +100,8 @@ TEST(PoolAllocatorTest, Alignment) {
 
 TEST(PoolAllocatorTest, AutoResize) {
   PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
-                     new BasicCPUAllocator(0 /*numa_node*/), new NoopRounder,
-                     "pool");
+                     new BasicCPUAllocator(0 /*numa_node*/, {}, {}),
+                     new NoopRounder, "pool");
 
   // Alloc/dealloc 10 sizes just a few times, confirming pool size
   // stays at 2.
@@ -123,14 +126,32 @@ TEST(PoolAllocatorTest, AutoResize) {
 }
 
 TEST(PoolAllocatorTest, CudaHostAllocator) {
+  int alloc_count = 0;
+  int64 alloc_size = 0;
+  SubAllocator::Visitor alloc_visitor =
+      [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) {
+        ++alloc_count;
+        alloc_size += size;
+      };
+  int free_count = 0;
+  int64 free_size = 0;
+  SubAllocator::Visitor free_visitor =
+      [&free_count, &free_size](void* ptr, int numa_node, int64 size) {
+        ++free_count;
+        free_size += size;
+      };
   se::Platform* platform =
       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
-  PoolAllocator pool(
-      2 /*pool_size_limit*/, false /*auto_resize*/,
-      new CUDAHostAllocator(
-          platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
-      new NoopRounder, "pool");
+  CUDAHostAllocator* sub_allocator = new CUDAHostAllocator(
+      platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
+          .ValueOrDie(),
+      0 /*numa_node*/, {alloc_visitor}, {free_visitor});
+  PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/,
+                     sub_allocator, new NoopRounder, "pool");
+  EXPECT_EQ(0, alloc_count);
+  EXPECT_EQ(0, alloc_size);
+  EXPECT_EQ(0, free_count);
+  EXPECT_EQ(0, free_size);
 
   // Repeatedly Get a 16-byte value, confirming that there's only
   // one real allocation.
@@ -138,6 +159,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   EXPECT_EQ(0, pool.get_from_pool_count());
   EXPECT_EQ(1, pool.allocated_count());
   EXPECT_NE(nullptr, p1_16);
+  EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
+  // Each suballocation includes a 16B ChunkPrefix.
+  static const int kChunkPrefixSize = 16;
+  EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
   pool.DeallocateRaw(p1_16);
   // Pool contents {16}
   EXPECT_EQ(1, pool.put_count());
@@ -148,6 +173,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   pool.DeallocateRaw(p2_16);  // Put it back.
   // Pool contents {16}
   EXPECT_EQ(2, pool.put_count());
+  EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
+  EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(0, free_count);
 
   // Get two more values of different sizes.
   void* p3_4 = pool.AllocateRaw(4, 4);
@@ -160,6 +188,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   void* p4_2 = pool.AllocateRaw(4, 2);  // Get a third size buffer.
   EXPECT_NE(nullptr, p4_2);
   EXPECT_EQ(0, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(0, free_count);
 
   // The pool is full: when we put back p4_2, the 16-byte buffer
   // should be evicted since it was least recently inserted.
@@ -167,6 +198,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   // Pool contents {2, 4}
   EXPECT_EQ(4, pool.put_count());
   EXPECT_EQ(1, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(1, free_count);
+  EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
 
   // Re-getting and putting size 2 or 4 should not alter pool size or
   // num-evicted.
@@ -180,12 +215,20 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   EXPECT_EQ(6, pool.put_count());
   EXPECT_EQ(3, pool.allocated_count());
   EXPECT_EQ(1, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(1, free_count);
+  EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
 
   pool.Clear();
   EXPECT_EQ(0, pool.get_from_pool_count());
   EXPECT_EQ(0, pool.put_count());
   EXPECT_EQ(0, pool.allocated_count());
   EXPECT_EQ(0, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(3, free_count);
+  EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size);
 }
 
 TEST(PoolAllocatorTest, Pow2Rounder) {
@@ -206,7 +249,8 @@ TEST(PoolAllocatorTest, Name) {
       2 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
   EXPECT_EQ("pool", pool.Name());
 }
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index df9c3a686c..538a70668a 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -23,12 +23,11 @@ limitations under the License.
 
 #include <cstdlib>
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
-#include "tensorflow/core/framework/allocator_registry.h"
+#include "tensorflow/core/common_runtime/pool_allocator.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
 #include "i_malloc.h"
@@ -40,20 +39,16 @@ typedef unsigned int uint;
 
 namespace tensorflow {
 
-class MklSubAllocator : public SubAllocator {
+class MklSubAllocator : public BasicCPUAllocator {
  public:
+  MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {}
   ~MklSubAllocator() override {}
-
-  void* Alloc(size_t alignment, size_t num_bytes) override {
-    return port::AlignedMalloc(num_bytes, alignment);
-  }
-  void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); }
 };
 
 // CPU allocator that handles small-size allocations by calling
 // suballocator directly. Mostly, it is just a wrapper around a suballocator
 // (that calls malloc and free directly) with support for bookkeeping.
-class MklSmallSizeAllocator : public VisitableAllocator {
+class MklSmallSizeAllocator : public Allocator {
  public:
   MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory,
                         const string& name)
@@ -75,10 +70,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
       CHECK(map_.insert(map_val).second);
       // Increment statistics for small-size allocations.
       IncrementStats(num_bytes);
-      // Call alloc visitors.
-      for (const auto& visitor : alloc_visitors_) {
-        visitor(ptr, num_bytes);
-      }
     }
     return ptr;
   }
@@ -94,9 +85,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
     if (map_iter != map_.end()) {
       // Call free visitors.
       size_t dealloc_bytes = map_iter->second;
-      for (const auto& visitor : free_visitors_) {
-        visitor(ptr, dealloc_bytes);
-      }
       sub_allocator_->Free(ptr, dealloc_bytes);
       DecrementStats(dealloc_bytes);
       map_.erase(map_iter);
@@ -121,16 +109,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
     stats_.Clear();
   }
 
-  void AddAllocVisitor(Visitor visitor) override {
-    mutex_lock l(mutex_);
-    alloc_visitors_.push_back(visitor);
-  }
-
-  void AddFreeVisitor(Visitor visitor) override {
-    mutex_lock l(mutex_);
-    free_visitors_.push_back(visitor);
-  }
-
  private:
   // Increment statistics for the allocator handling small allocations.
   inline void IncrementStats(size_t alloc_size)
@@ -163,15 +141,11 @@ class MklSmallSizeAllocator : public VisitableAllocator {
 
   // Allocator stats for small allocs
   AllocatorStats stats_ GUARDED_BY(mutex_);
-
-  // Visitors
-  std::vector<Visitor> alloc_visitors_ GUARDED_BY(mutex_);
-  std::vector<Visitor> free_visitors_ GUARDED_BY(mutex_);
 };
 
 /// CPU allocator for MKL that wraps BFC allocator and intercepts
 /// and redirects memory allocation calls from MKL.
-class MklCPUAllocator : public VisitableAllocator {
+class MklCPUAllocator : public Allocator {
  public:
   // Constructor and other standard functions
 
@@ -284,16 +258,6 @@ class MklCPUAllocator : public VisitableAllocator {
     large_size_allocator_->ClearStats();
   }
 
-  void AddAllocVisitor(Visitor visitor) override {
-    small_size_allocator_->AddAllocVisitor(visitor);
-    large_size_allocator_->AddAllocVisitor(visitor);
-  }
-
-  void AddFreeVisitor(Visitor visitor) override {
-    small_size_allocator_->AddFreeVisitor(visitor);
-    large_size_allocator_->AddFreeVisitor(visitor);
-  }
-
  private:
   // Hooks provided by this allocator for memory allocation routines from MKL
 
@@ -330,7 +294,7 @@ class MklCPUAllocator : public VisitableAllocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
-  VisitableAllocator* large_size_allocator_;     // owned by this class
+  Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
   SubAllocator* sub_allocator_;  // not owned by this class
diff --git a/tensorflow/core/common_runtime/pool_allocator.cc b/tensorflow/core/common_runtime/pool_allocator.cc
index fdad8de8d6..66dc8f3322 100644
--- a/tensorflow/core/common_runtime/pool_allocator.cc
+++ b/tensorflow/core/common_runtime/pool_allocator.cc
@@ -40,8 +40,7 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
       auto_resize_(auto_resize),
       pool_size_limit_(pool_size_limit),
       allocator_(allocator),
-      size_rounder_(size_rounder),
-      allocation_begun_(false) {
+      size_rounder_(size_rounder) {
   if (auto_resize) {
     CHECK_LT(size_t{0}, pool_size_limit)
         << "size limit must be > 0 if auto_resize is true.";
@@ -93,7 +92,6 @@ ChunkPrefix* FindPrefix(void* user_ptr) {
 }  // namespace
 
 void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
-  if (!allocation_begun_) allocation_begun_ = true;
   if (num_bytes == 0) return nullptr;
 
   // If alignment is larger than kPoolAlignment, increase num_bytes so that we
@@ -129,9 +127,6 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
     return PrepareChunk(r, alignment, num_bytes);
   } else {
     void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes);
-    for (const auto& v : alloc_visitors_) {
-      v(ptr, num_bytes);
-    }
     return PrepareChunk(ptr, alignment, num_bytes);
   }
 }
@@ -141,9 +136,6 @@ void PoolAllocator::DeallocateRaw(void* ptr) {
   ChunkPrefix* cp = FindPrefix(ptr);
   CHECK_LE((void*)cp, (void*)ptr);
   if (!has_size_limit_ && !auto_resize_) {
-    for (const auto& v : free_visitors_) {
-      v(cp, cp->num_bytes);
-    }
     allocator_->Free(cp, cp->num_bytes);
   } else {
     mutex_lock lock(mutex_);
@@ -164,9 +156,6 @@ void PoolAllocator::Clear() {
     mutex_lock lock(mutex_);
     for (auto iter : pool_) {
       PtrRecord* pr = iter.second;
-      for (const auto& v : free_visitors_) {
-        v(pr->ptr, pr->num_bytes);
-      }
       allocator_->Free(pr->ptr, pr->num_bytes);
       delete pr;
     }
@@ -221,9 +210,6 @@ void PoolAllocator::EvictOne() {
     DCHECK(iter != pool_.end());
   }
   pool_.erase(iter);
-  for (const auto& v : free_visitors_) {
-    v(prec->ptr, prec->num_bytes);
-  }
   allocator_->Free(prec->ptr, prec->num_bytes);
   delete prec;
   ++evicted_count_;
@@ -269,28 +255,19 @@ void PoolAllocator::EvictOne() {
   }
 }
 
-void PoolAllocator::AddAllocVisitor(Visitor visitor) {
-  mutex_lock lock(mutex_);
-  CHECK(!allocation_begun_)
-      << "AddAllocVisitor may not be called after pool allocation "
-      << "has begun.";
-  alloc_visitors_.push_back(visitor);
-}
-
-void PoolAllocator::AddFreeVisitor(Visitor visitor) {
-  mutex_lock lock(mutex_);
-  CHECK(!allocation_begun_)
-      << "AddFreeVisitor may not be called after pool allocation "
-      << "has begun.";
-  free_visitors_.push_back(visitor);
-}
-
 void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) {
-  return port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
+  void* ptr = nullptr;
+  if (num_bytes > 0) {
+    ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
+    VisitAlloc(ptr, numa_node_, num_bytes);
+  }
+  return ptr;
 }
 
 void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) {
-  port::AlignedFree(ptr);
+  if (num_bytes > 0) {
+    VisitFree(ptr, numa_node_, num_bytes);
+    port::AlignedFree(ptr);
+  }
 }
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h
index 607734445b..5b4623ba10 100644
--- a/tensorflow/core/common_runtime/pool_allocator.h
+++ b/tensorflow/core/common_runtime/pool_allocator.h
@@ -16,14 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
 
-// Simple LRU pool allocators for various flavors of CPU RAM that
-// implement the VisitableAllocator interface.
+// Simple LRU pool allocators for various flavors of CPU RAM.
 
 #include <atomic>
 #include <map>
 #include <memory>
 #include <vector>
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -41,7 +40,7 @@ class RoundUpInterface {
 
 // Size-limited pool of memory buffers obtained from a SubAllocator
 // instance.  Pool eviction policy is LRU.
-class PoolAllocator : public VisitableAllocator {
+class PoolAllocator : public Allocator {
  public:
   // "pool_size_limit" is the maximum number of returned, re-usable
   // memory buffers to keep in the pool.  If pool_size_limit == 0, the
@@ -64,14 +63,6 @@ class PoolAllocator : public VisitableAllocator {
 
   void DeallocateRaw(void* ptr) override;
 
-  // REQUIRES: The following functions may only be called prior
-  // to the first Allocate*() call.  Once allocation has begun, it is
-  // illegal to register another visitor.
-
-  void AddAllocVisitor(Visitor visitor) override;
-
-  void AddFreeVisitor(Visitor visitor) override;
-
   // Allocate an unused memory region of size "num_bytes".  Fetch from
   // the pool if available, otherwise call allocator_.
   void* Get(size_t num_bytes);
@@ -141,12 +132,6 @@ class PoolAllocator : public VisitableAllocator {
   int64 put_count_ GUARDED_BY(mutex_) = 0;
   int64 allocated_count_ GUARDED_BY(mutex_) = 0;
   int64 evicted_count_ GUARDED_BY(mutex_) = 0;
-  // Write access to these is guarded by mutex_, but not read
-  // access. They may only be modified prior to the first
-  // allocation.  Later attempts to modify will fail.
-  std::vector<Visitor> alloc_visitors_;
-  std::vector<Visitor> free_visitors_;
-  std::atomic<bool> allocation_begun_;
 };
 
 // Do-nothing rounder. Passes through sizes unchanged.
@@ -166,7 +151,9 @@ class Pow2Rounder : public RoundUpInterface {
 class BasicCPUAllocator : public SubAllocator {
  public:
   // Argument numa_node is currently ignored.
-  explicit BasicCPUAllocator(int numa_node) : numa_node_(numa_node) {}
+  BasicCPUAllocator(int numa_node, const std::vector<Visitor>& alloc_visitors,
+                    const std::vector<Visitor>& free_visitors)
+      : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {}
 
   ~BasicCPUAllocator() override {}
 
@@ -176,6 +163,8 @@ class BasicCPUAllocator : public SubAllocator {
 
  private:
   int numa_node_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc
index 447338e7bd..bcaa37fc8a 100644
--- a/tensorflow/core/common_runtime/process_state.cc
+++ b/tensorflow/core/common_runtime/process_state.cc
@@ -71,20 +71,28 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) {
   return MemDesc();
 }
 
-VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) {
+Allocator* ProcessState::GetCPUAllocator(int numa_node) {
   CHECK_GE(numa_node, 0);
   if (!numa_enabled_) numa_node = 0;
   mutex_lock lock(mu_);
   while (cpu_allocators_.size() <= static_cast<size_t>(numa_node)) {
+    // If visitors have been defined we need an Allocator built from
+    // a SubAllocator.  Prefer BFCAllocator, but fall back to PoolAllocator
+    // depending on env var setting.
+    const bool alloc_visitors_defined =
+        (!cpu_alloc_visitors_.empty() || !cpu_free_visitors_.empty());
     bool use_bfc_allocator = false;
-    // TODO(reedwm): Switch default to BGFAllocator if it's at least as fast and
-    // efficient.
-    Status status = ReadBoolFromEnvVar("TF_CPU_ALLOCATOR_USE_BFC", false,
-                                       &use_bfc_allocator);
+    Status status = ReadBoolFromEnvVar(
+        "TF_CPU_ALLOCATOR_USE_BFC", alloc_visitors_defined, &use_bfc_allocator);
     if (!status.ok()) {
       LOG(ERROR) << "GetCPUAllocator: " << status.error_message();
     }
-    VisitableAllocator* allocator;
+    Allocator* allocator = nullptr;
+    SubAllocator* sub_allocator =
+        (alloc_visitors_defined || use_bfc_allocator)
+            ? new BasicCPUAllocator(numa_enabled_ ? numa_node : -1,
+                                    cpu_alloc_visitors_, cpu_free_visitors_)
+            : nullptr;
     if (use_bfc_allocator) {
       // TODO(reedwm): evaluate whether 64GB by default is the best choice.
       int64 cpu_mem_limit_in_mb = -1;
@@ -95,34 +103,63 @@ VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) {
         LOG(ERROR) << "GetCPUAllocator: " << status.error_message();
       }
       int64 cpu_mem_limit = cpu_mem_limit_in_mb * (1LL << 20);
-      allocator = new BFCAllocator(
-          new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), cpu_mem_limit,
-          true /*allow_growth*/, "bfc_cpu_allocator_for_gpu" /*name*/);
+      DCHECK(sub_allocator);
+      allocator =
+          new BFCAllocator(sub_allocator, cpu_mem_limit, true /*allow_growth*/,
+                           "bfc_cpu_allocator_for_gpu" /*name*/);
       VLOG(2) << "Using BFCAllocator with memory limit of "
               << cpu_mem_limit_in_mb << " MB for ProcessState CPU allocator";
-    } else {
-      allocator = new PoolAllocator(
-          100 /*pool_size_limit*/, true /*auto_resize*/,
-          new BasicCPUAllocator(numa_enabled_ ? numa_node : -1),
-          new NoopRounder, "cpu_pool");
+    } else if (alloc_visitors_defined) {
+      DCHECK(sub_allocator);
+      allocator =
+          new PoolAllocator(100 /*pool_size_limit*/, true /*auto_resize*/,
+                            sub_allocator, new NoopRounder, "cpu_pool");
       VLOG(2) << "Using PoolAllocator for ProcessState CPU allocator "
               << "numa_enabled_=" << numa_enabled_
               << " numa_node=" << numa_node;
+    } else {
+      DCHECK(!sub_allocator);
+      allocator = cpu_allocator();
     }
-    if (LogMemory::IsEnabled()) {
+    if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) {
       // Wrap the allocator to track allocation ids for better logging
       // at the cost of performance.
-      allocator = new TrackingVisitableAllocator(allocator, true);
+      allocator = new TrackingAllocator(allocator, true);
     }
     cpu_allocators_.push_back(allocator);
+    if (!sub_allocator) {
+      DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty());
+    }
   }
   return cpu_allocators_[numa_node];
 }
 
+void ProcessState::AddCPUAllocVisitor(SubAllocator::Visitor visitor) {
+  VLOG(1) << "AddCPUAllocVisitor";
+  mutex_lock lock(mu_);
+  CHECK_EQ(0, cpu_allocators_.size())  // Crash OK
+      << "AddCPUAllocVisitor must be called prior to first call to "
+         "ProcessState::GetCPUAllocator";
+  cpu_alloc_visitors_.push_back(std::move(visitor));
+}
+
+void ProcessState::AddCPUFreeVisitor(SubAllocator::Visitor visitor) {
+  mutex_lock lock(mu_);
+  CHECK_EQ(0, cpu_allocators_.size())  // Crash OK
+      << "AddCPUFreeVisitor must be called prior to first call to "
+         "ProcessState::GetCPUAllocator";
+  cpu_free_visitors_.push_back(std::move(visitor));
+}
+
 void ProcessState::TestOnlyReset() {
   mutex_lock lock(mu_);
+  // Don't delete this value because it's static.
+  Allocator* default_cpu_allocator = cpu_allocator();
   mem_desc_map_.clear();
-  gtl::STLDeleteElements(&cpu_allocators_);
+  for (Allocator* a : cpu_allocators_) {
+    if (a != default_cpu_allocator) delete a;
+  }
+  cpu_allocators_.clear();
   gtl::STLDeleteElements(&cpu_al_);
 }
 
diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h
index 2892677333..cac312d849 100644
--- a/tensorflow/core/common_runtime/process_state.h
+++ b/tensorflow/core/common_runtime/process_state.h
@@ -30,7 +30,6 @@ limitations under the License.
 namespace tensorflow {
 
 class Allocator;
-class VisitableAllocator;
 class PoolAllocator;
 
 // Singleton that manages per-process state, e.g. allocation of
@@ -65,7 +64,15 @@ class ProcessState {
 
   // Returns the one CPUAllocator used for the given numa_node.
   // TEMPORARY: ignores numa_node.
-  VisitableAllocator* GetCPUAllocator(int numa_node);
+  Allocator* GetCPUAllocator(int numa_node);
+
+  // Registers alloc visitor for the CPU allocator(s).
+  // REQUIRES: must be called before GetCPUAllocator.
+  void AddCPUAllocVisitor(SubAllocator::Visitor v);
+
+  // Registers free visitor for the CPU allocator(s).
+  // REQUIRES: must be called before GetCPUAllocator.
+  void AddCPUFreeVisitor(SubAllocator::Visitor v);
 
   typedef std::unordered_map<const void*, MemDesc> MDMap;
 
@@ -87,7 +94,9 @@ class ProcessState {
 
   mutex mu_;
 
-  std::vector<VisitableAllocator*> cpu_allocators_ GUARDED_BY(mu_);
+  std::vector<Allocator*> cpu_allocators_ GUARDED_BY(mu_);
+  std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ GUARDED_BY(mu_);
+  std::vector<SubAllocator::Visitor> cpu_free_visitors_ GUARDED_BY(mu_);
 
   virtual ~ProcessState();
 
diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h
index 103eee03b3..9d59264899 100644
--- a/tensorflow/core/common_runtime/renamed_device.h
+++ b/tensorflow/core/common_runtime/renamed_device.h
@@ -72,9 +72,10 @@ class RenamedDevice : public Device {
     return underlying_->MakeGpuDevice();
   }
 
-  void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
-                             DeviceContext* dc, Allocator* allocator) override {
-    underlying_->ReinitializeGpuDevice(context, device, dc, allocator);
+  Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
+                               DeviceContext* dc,
+                               Allocator* allocator) override {
+    return underlying_->ReinitializeGpuDevice(context, device, dc, allocator);
   }
 
   Status MakeTensorFromProto(const TensorProto& tensor_proto,
diff --git a/tensorflow/core/common_runtime/visitable_allocator.h b/tensorflow/core/common_runtime/visitable_allocator.h
deleted file mode 100644
index ae0563a96a..0000000000
--- a/tensorflow/core/common_runtime/visitable_allocator.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
-#define TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
-
-#include <functional>
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/tracking_allocator.h"
-
-namespace tensorflow {
-
-// Subclass VisitableAllocator instead of Allocator when a memory
-// allocator needs to enable some kind of registration/deregistration
-// of memory areas.
-class VisitableAllocator : public Allocator {
- public:
-  // Visitor gets called with a pointer to a memory area and its
-  // size in bytes.
-  typedef std::function<void(void*, size_t)> Visitor;
-
-  // Register a visitor guaranteed to be called exactly once on each
-  // chunk of memory newly allocated from the underlying device.
-  // Typically, chunks will be reused and possibly sub-divided by a
-  // pool manager, so the calls will happen only once per process
-  // execution, not once per tensor (re)allocation.
-  virtual void AddAllocVisitor(Visitor visitor) = 0;
-
-  // Register a visitor guaranteed to be called on each chunk of
-  // memory returned to the underlying device.
-  virtual void AddFreeVisitor(Visitor visitor) = 0;
-};
-
-// Needed for cases when a VisitableAllocator gets wrapped for tracking.
-// Multiple-inheritance is considered acceptable in this case because
-// VisitableAllocator is a pure virtual interface and only TrackingAllocator
-// has default implementation.
-class TrackingVisitableAllocator : public TrackingAllocator,
-                                   public VisitableAllocator {
- public:
-  TrackingVisitableAllocator(VisitableAllocator* allocator, bool track_ids)
-      : TrackingAllocator(allocator, track_ids), allocator_(allocator) {}
-  ~TrackingVisitableAllocator() override {}
-
-  string Name() override { return TrackingAllocator::Name(); }
-
-  void* AllocateRaw(size_t alignment, size_t num_bytes) override {
-    return TrackingAllocator::AllocateRaw(alignment, num_bytes);
-  }
-
-  void DeallocateRaw(void* ptr) override {
-    TrackingAllocator::DeallocateRaw(ptr);
-  }
-
-  void AddAllocVisitor(Visitor visitor) override {
-    allocator_->AddAllocVisitor(visitor);
-  }
-
-  void AddFreeVisitor(Visitor visitor) override {
-    allocator_->AddFreeVisitor(visitor);
-  }
-
- protected:
-  VisitableAllocator* allocator_;
-};
-}  // namespace tensorflow
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc
index 2a7ee16a16..84cee5569c 100644
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc
@@ -196,7 +196,7 @@ class CPUAllocatorFactory : public AllocatorFactory {
   class CPUSubAllocator : public SubAllocator {
    public:
     explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
-        : cpu_allocator_(cpu_allocator) {}
+        : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
 
     void* Alloc(size_t alignment, size_t num_bytes) override {
       return cpu_allocator_->AllocateRaw(alignment, num_bytes);
@@ -222,4 +222,22 @@ Allocator* cpu_allocator() {
   }
   return cpu_alloc;
 }
+
+SubAllocator::SubAllocator(const std::vector<Visitor>& alloc_visitors,
+                           const std::vector<Visitor>& free_visitors)
+    : alloc_visitors_(alloc_visitors), free_visitors_(free_visitors) {}
+
+void SubAllocator::VisitAlloc(void* ptr, int index, size_t num_bytes) {
+  for (const auto& v : alloc_visitors_) {
+    v(ptr, index, num_bytes);
+  }
+}
+
+void SubAllocator::VisitFree(void* ptr, int index, size_t num_bytes) {
+  // Although we don't guarantee any order of visitor application, strive
+  // to apply free visitors in reverse order of alloc visitors.
+  for (int i = free_visitors_.size() - 1; i >= 0; --i) {
+    free_visitors_[i](ptr, index, num_bytes);
+  }
+}
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index ded120b704..8c23604625 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -387,13 +388,36 @@ void EnableCPUAllocatorStats(bool enable);
 // full statistics. By default, it's disabled.
 void EnableCPUAllocatorFullStats(bool enable);
 
-// Abstract interface of an object that does the underlying suballoc/free of
-// memory for a higher-level allocator.
+// An object that does the underlying suballoc/free of memory for a higher-level
+// allocator.  The expectation is that the higher-level allocator is doing some
+// kind of cache or pool management so that it will call SubAllocator::Alloc and
+// Free relatively infrequently, compared to the number of times its own
+// AllocateRaw and Free methods are called.
 class SubAllocator {
  public:
+  // Visitor gets called with a pointer to a memory area and its
+  // size in bytes.  The index value will be numa_node for a CPU
+  // allocator and GPU id for a GPU allocator.
+  typedef std::function<void(void*, int index, size_t)> Visitor;
+
+  SubAllocator(const std::vector<Visitor>& alloc_visitors,
+               const std::vector<Visitor>& free_visitors);
+
   virtual ~SubAllocator() {}
   virtual void* Alloc(size_t alignment, size_t num_bytes) = 0;
   virtual void Free(void* ptr, size_t num_bytes) = 0;
+
+ protected:
+  // Implementation of Alloc() method must call this on newly allocated
+  // value.
+  void VisitAlloc(void* ptr, int index, size_t num_bytes);
+
+  // Implementation of Free() method must call this on value to be
+  // freed immediately before deallocation.
+  void VisitFree(void* ptr, int index, size_t num_bytes);
+
+  const std::vector<Visitor> alloc_visitors_;
+  const std::vector<Visitor> free_visitors_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 794250a2c1..53ac639b4c 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -214,10 +214,12 @@ class DeviceBase {
 
   // This is overridden by GPU devices to reinitialize the derived
   // type returned by MakeGpuDevice.
-  virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/,
-                                     PerOpGpuDevice* /*device*/,
-                                     DeviceContext* /*dc*/,
-                                     Allocator* /*allocator*/) {}
+  virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/,
+                                       PerOpGpuDevice* /*device*/,
+                                       DeviceContext* /*dc*/,
+                                       Allocator* /*allocator*/) {
+    return Status::OK();
+  }
 
   // Unimplemented by default
   virtual const DeviceAttributes& attributes() const;
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 80f2b12987..3e34bf0418 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -265,9 +265,12 @@ OpKernelContext::OpKernelContext(Params* params, int num_outputs)
   params_->ensure_eigen_gpu_device();
   if (params_->eigen_gpu_device != nullptr) {
     Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes());
-    params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device,
-                                           params_->op_device_context,
-                                           eigen_gpu_allocator);
+    Status s = params_->device->ReinitializeGpuDevice(
+        this, params_->eigen_gpu_device, params_->op_device_context,
+        eigen_gpu_allocator);
+    if (!s.ok()) {
+      SetStatus(s);
+    }
   }
   if (params_->record_tensor_accesses) {
     referenced_tensors_.Init();
-- 
GitLab


From a76646d4b4ad5d56b5e63c139985bbd1eb98dd90 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Mon, 17 Sep 2018 17:50:50 -0700
Subject: [PATCH 0306/1357] Add type checking at the beginning of tpu.shard().

Otherwise a message like "TypeError: Tensor objects are only iterable when eager execution is enabled. To iterate over this tensor use tf.map_fn." will be thrown, which is confusing.

PiperOrigin-RevId: 213371676
---
 tensorflow/contrib/tpu/python/tpu/tpu.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 815a087a24..593f1d909e 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -847,8 +847,12 @@ def shard(computation,
   if num_shards <= 0:
     raise ValueError("num_shards must be a positive integer.")
 
+  inputs = [] if inputs is None else inputs
+  if not isinstance(inputs, list):
+    raise TypeError("tpu.shard()'s inputs must be a list of Tensors or None.")
+
   # Converts inputs to Tensors.
-  inputs = [] if inputs is None else [ops.convert_to_tensor(x) for x in inputs]
+  inputs = [ops.convert_to_tensor(x) for x in inputs]
 
   if input_shard_axes is None:
     input_shard_axes = [0] * len(inputs)
-- 
GitLab


From 1ede512f8c185a1cc2bd88830eeca3165283f06d Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 17 Sep 2018 17:53:41 -0700
Subject: [PATCH 0307/1357] Remove some dead code after migration from python
 to C.

PiperOrigin-RevId: 213372027
---
 tensorflow/c/eager/tape.h                 | 12 ++++++++++++
 tensorflow/python/eager/backprop.py       | 21 ---------------------
 tensorflow/python/eager/pywrap_tfe_src.cc |  3 +++
 3 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index ce038a4b57..49990b6249 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -440,6 +440,18 @@ Status InitialGradients(const VSpace<Gradient, BackwardFunction>& vspace,
   return Status::OK();
 }
 
+// TODO(agarwal): use an automatic mechanism for handling None arguments to
+// gradient functions.
+//
+// Some gradient functions can accept None arguments for gradients. The
+// following maps the operation name to the indices at which the corresponding
+// gradient function can accept None values. e.g. FusedBatchNorm outputs 5
+// values and hence receives 5 gradient values during backprop. However the
+// gradient function uses only the first of those values and ignores the rest.
+// The entry, "FusedBatchNorm": [1, 2, 3, 4], indicates that only the gradient
+// corresponding to index 0 is used, and the gradient values at indices 1-4 are
+// ignored (and hence can be None). The backprop algorithm can then leverage
+// this by not constructing zeros to pass for those indices.
 gtl::FlatMap<string, gtl::FlatSet<int>>* FunctionsAcceptingNoneForIndicesMap() {
   static auto* const m = new gtl::FlatMap<string, gtl::FlatSet<int>>({
       {"SoftmaxCrossEntropyWithLogits", {1}},
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index e6cf9653a8..907234b0f8 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -120,27 +120,6 @@ def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs,
 pywrap_tensorflow.TFE_Py_RegisterGradientFunction(_gradient_function)
 
 
-_tracing = False
-
-
-# TODO(agarwal): use an automatic mechanism for handling None arguments to
-# gradient functions.
-# Some gradient functions can accept None arguments for gradients. The following
-# maps the operation name to the indices at which the corresponding gradient
-# function can accept None values.
-# e.g. FusedBatchNorm outputs 5 values and hence receives 5 gradient values
-# during backprop. However the gradient function uses only the first of those
-# values and ignores the rest. The entry, "FusedBatchNorm": [1, 2, 3, 4],
-# indicates that only the gradient corresponding to index 0 is used, and the
-# gradient values at indices 1-4 are ignored (and hence can be None). The
-# backprop algorithm can then leverage this by not constructing zeros to
-# pass for those indices.
-_grad_fn_accepts_none_for_indices = {
-    "SoftmaxCrossEntropyWithLogits": [1],
-    "FusedBatchNorm": [1, 2, 3, 4]
-}
-
-
 def _record_gradient(op_name, inputs, attrs, results, name):
   return pywrap_tensorflow.TFE_Py_RecordGradient(op_name, inputs, attrs,
                                                  results, name)
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 9f2f4e06ad..99b46159a9 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1744,6 +1744,9 @@ PyObject* MaybeGetDTypeForAttr(const string& attr,
   Py_RETURN_NONE;
 }
 
+// TODO(agarwal): use an automatic mechanism for handling None arguments to
+// gradient functions.
+
 // Returns a pair where the first value of the pair indicates whether or not all
 // outputs are unused. If the first value is false, the second value is a
 // set that identifies which of the output indices are unused.
-- 
GitLab


From 71fab28dc4741dedf13fea732f6b134608719bc7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 17:55:26 -0700
Subject: [PATCH 0308/1357] Increase test timeout for image_grad_test to
 de-flake.

PiperOrigin-RevId: 213372241
---
 tensorflow/python/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 2dc2808152..2eeae773d3 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3090,7 +3090,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "image_grad_test",
-    size = "small",
+    size = "medium",
     srcs = ["ops/image_grad_test.py"],
     additional_deps = [
         ":client_testlib",
-- 
GitLab


From 3b7ca4b86416f6b6153de90bc1df6e6e5b41934c Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 17 Sep 2018 18:42:45 -0700
Subject: [PATCH 0309/1357] Num elements fastpath for eager tensors.

PiperOrigin-RevId: 213377426
---
 tensorflow/c/eager/c_api.cc                   | 11 +++++
 tensorflow/c/eager/c_api.h                    |  2 +
 .../common_runtime/eager/tensor_handle.cc     | 16 +++++++-
 .../core/common_runtime/eager/tensor_handle.h |  1 +
 tensorflow/python/eager/pywrap_tensor.cc      | 41 +++++++++++--------
 tensorflow/python/eager/pywrap_tensor.h       |  5 ++-
 tensorflow/python/eager/pywrap_tfe_src.cc     |  9 ++--
 7 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 6f86ea80e5..0bf3d9542b 100755
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -375,6 +375,17 @@ int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status) {
   return result;
 }
 
+int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h, TF_Status* status) {
+  if (h == nullptr || h->handle == nullptr) {
+    status->status = tensorflow::errors::InvalidArgument(
+        "The passed in handle is a nullptr");
+    return -1;
+  }
+  tensorflow::int64 result;
+  status->status = h->handle->NumElements(&result);
+  return result;
+}
+
 int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index,
                             TF_Status* status) {
   if (h == nullptr || h->handle == nullptr) {
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index a87d73ec8e..6323f8a053 100755
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -163,6 +163,8 @@ TF_CAPI_EXPORT extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h);
 // This function will block till the operation that produces `h` has completed.
 TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h,
                                                   TF_Status* status);
+TF_CAPI_EXPORT extern int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h,
+                                                          TF_Status* status);
 // This function will block till the operation that produces `h` has completed.
 TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h,
                                                   int dim_index,
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc
index b912f7d37b..d58724cbfa 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.cc
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc
@@ -125,7 +125,6 @@ Status TensorHandle::Shape(tensorflow::TensorShape* shape) {
 Status TensorHandle::NumDims(int* num_dims) {
   if (IsRemote()) {
     TF_RETURN_IF_ERROR(WaitForNode(remote_shape_node_id_, false));
-    CHECK(remote_shape_ != nullptr);
     *num_dims = remote_shape_->dims();
   } else {
     TF_RETURN_IF_ERROR(WaitReady());
@@ -153,6 +152,21 @@ Status TensorHandle::Dim(int dim_index, int64* dim) {
   return Status::OK();
 }
 
+Status TensorHandle::NumElements(int64* num_elements) {
+  if (IsRemote()) {
+    TF_RETURN_IF_ERROR(WaitForNode(remote_shape_node_id_, false));
+    *num_elements = remote_shape_->num_elements();
+  } else {
+    TF_RETURN_IF_ERROR(WaitReady());
+    DCHECK(IsReady());
+    DCHECK(num_elements != nullptr);
+
+    *num_elements = tensor_.NumElements();
+  }
+
+  return Status::OK();
+}
+
 Status TensorHandle::RemoteAddress(int64* op_id, int32* output_num) {
   if (!IsRemote()) {
     return errors::FailedPrecondition(
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h
index 1bc9c6531a..e55f1a0338 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.h
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.h
@@ -113,6 +113,7 @@ class TensorHandle : public core::RefCounted {
 
   Status NumDims(int* num_dims);
   Status Dim(int dim_index, int64* dim);
+  Status NumElements(int64* num_elements);
 
   // Return the op_id and output num if the handle refers to a remote tensor.
   Status RemoteAddress(int64* op_id, int32* output_num);
diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc
index f34ce6af79..5f44bd4fec 100644
--- a/tensorflow/python/eager/pywrap_tensor.cc
+++ b/tensorflow/python/eager/pywrap_tensor.cc
@@ -516,25 +516,13 @@ static PyObject* EagerTensor_rank(EagerTensor* self) {
 // Getter for `_num_elements`.
 static PyObject* EagerTensor_num_elements(EagerTensor* self) {
   auto handle = self->handle;
-  int n = TFE_TensorHandleNumDims(handle, self->status);
+  int n = TFE_TensorHandleNumElements(handle, self->status);
   if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) {
     // Cleanup self->status before returning.
     TF_SetStatus(self->status, TF_OK, "");
     return nullptr;
   }
-  tensorflow::int64 value = 1;
-  if (PyErr_Occurred()) return nullptr;
-  for (int i = 0; i < n; ++i) {
-    int64_t dim = TFE_TensorHandleDim(handle, i, self->status);
-    if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) {
-      // Cleanup self->status before returning.
-      TF_SetStatus(self->status, TF_OK, "");
-      PyErr_SetString(PyExc_RuntimeError, "Error while iterating dimensions");
-      return nullptr;
-    }
-    value *= dim;
-  }
-  return PyLong_FromLongLong(value);
+  return PyLong_FromLongLong(n);
 }
 
 static PyObject* EagerTensor_tensor_handle(EagerTensor* self, void* unused) {
@@ -777,17 +765,34 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) {
   return reinterpret_cast<PyObject*>(t);
 }
 
-tensorflow::int64 EagerTensor_id(const PyObject* tensor) {
-  CHECK(EagerTensor_CheckExact(tensor));
+tensorflow::int64 PyEagerTensor_ID(const PyObject* tensor) {
+  DCHECK(EagerTensor_CheckExact(tensor));
   return reinterpret_cast<const EagerTensor*>(tensor)->id;
 }
 
-tensorflow::DataType EagerTensor_dtype(const PyObject* tensor) {
-  CHECK(EagerTensor_CheckExact(tensor));
+tensorflow::DataType PyEagerTensor_Dtype(const PyObject* tensor) {
+  DCHECK(EagerTensor_CheckExact(tensor));
   return static_cast<tensorflow::DataType>(TFE_TensorHandleDataType(
       reinterpret_cast<const EagerTensor*>(tensor)->handle));
 }
 
+tensorflow::int64 PyEagerTensor_NumElements(const PyObject* tensor) {
+  DCHECK(EagerTensor_CheckExact(tensor));
+  const EagerTensor* as_c_eager_tensor =
+      reinterpret_cast<const EagerTensor*>(tensor);
+  tensorflow::int64 result = TFE_TensorHandleNumElements(
+      as_c_eager_tensor->handle, as_c_eager_tensor->status);
+
+  if (MaybeRaiseExceptionFromTFStatus(as_c_eager_tensor->status,
+                                      PyExc_ValueError)) {
+    // Cleanup status before returning.
+    TF_SetStatus(as_c_eager_tensor->status, TF_OK, "");
+    return -1;
+  }
+
+  return result;
+}
+
 PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) {
   if (!PyType_Check(base_class)) {
     PyErr_SetString(
diff --git a/tensorflow/python/eager/pywrap_tensor.h b/tensorflow/python/eager/pywrap_tensor.h
index bc042eb19e..4eaa1ba536 100644
--- a/tensorflow/python/eager/pywrap_tensor.h
+++ b/tensorflow/python/eager/pywrap_tensor.h
@@ -21,8 +21,9 @@ limitations under the License.
 #include "tensorflow/python/lib/core/numpy.h"
 
 bool EagerTensor_CheckExact(const PyObject* o);
-tensorflow::int64 EagerTensor_id(const PyObject* tensor);
-tensorflow::DataType EagerTensor_dtype(const PyObject* tensor);
+tensorflow::int64 PyEagerTensor_ID(const PyObject* tensor);
+tensorflow::DataType PyEagerTensor_Dtype(const PyObject* tensor);
+tensorflow::int64 PyEagerTensor_NumElements(const PyObject* tensor);
 
 namespace tensorflow {
 TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype);
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 99b46159a9..a0f6be459e 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -860,7 +860,7 @@ static tensorflow::int64 MakeInt(PyObject* integer) {
 
 static tensorflow::int64 FastTensorId(PyObject* tensor) {
   if (EagerTensor_CheckExact(tensor)) {
-    return EagerTensor_id(tensor);
+    return PyEagerTensor_ID(tensor);
   }
   PyObject* id_field = PyObject_GetAttrString(tensor, "_id");
   if (id_field == nullptr) {
@@ -873,7 +873,7 @@ static tensorflow::int64 FastTensorId(PyObject* tensor) {
 
 static tensorflow::DataType FastTensorDtype(PyObject* tensor) {
   if (EagerTensor_CheckExact(tensor)) {
-    return EagerTensor_dtype(tensor);
+    return PyEagerTensor_Dtype(tensor);
   }
   PyObject* dtype_field = PyObject_GetAttrString(tensor, "dtype");
   if (dtype_field == nullptr) {
@@ -1178,7 +1178,7 @@ void TFE_Py_TapeWatch(PyObject* tape, PyObject* tensor) {
 static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) {
   if (EagerTensor_CheckExact(tensor)) {
     TFE_TensorHandle* t = EagerTensor_Handle(tensor);
-    tensorflow::int64 id = EagerTensor_id(tensor);
+    tensorflow::int64 id = PyEagerTensor_ID(tensor);
     tensorflow::TensorShape tensor_shape;
     const tensorflow::Status status = t->handle->Shape(&tensor_shape);
 
@@ -1400,6 +1400,9 @@ class PyVSpace
   }
 
   tensorflow::int64 NumElements(PyObject* tensor) const final {
+    if (EagerTensor_CheckExact(tensor)) {
+      return PyEagerTensor_NumElements(tensor);
+    }
     PyObject* arglist =
         Py_BuildValue("(O)", reinterpret_cast<PyObject*>(tensor));
     PyObject* result = PyEval_CallObject(num_elements_, arglist);
-- 
GitLab


From f2a577888be8368121fe7ce16d4b72f91f53be60 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 18:45:55 -0700
Subject: [PATCH 0310/1357] Break cwise_opt_test.py into 3 files to speed up
 testing, since we are up against the 50 shard limit.

PiperOrigin-RevId: 213377776
---
 tensorflow/python/kernel_tests/BUILD          |   40 +
 .../kernel_tests/cwise_ops_binary_test.py     |  878 +++++++++++++
 .../python/kernel_tests/cwise_ops_test.py     | 1156 +----------------
 .../kernel_tests/cwise_ops_unary_test.py      |  541 ++++++++
 4 files changed, 1464 insertions(+), 1151 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/cwise_ops_binary_test.py
 create mode 100644 tensorflow/python/kernel_tests/cwise_ops_unary_test.py

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 6bba99b9e7..100240a626 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2798,6 +2798,46 @@ cuda_py_test(
     shard_count = 50,
 )
 
+cuda_py_test(
+    name = "cwise_ops_binary_test",
+    size = "medium",
+    srcs = ["cwise_ops_binary_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:math_ops_gen",
+        "//tensorflow/python:nn_grad",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:variables",
+    ],
+    shard_count = 50,
+)
+
+cuda_py_test(
+    name = "cwise_ops_unary_test",
+    size = "medium",
+    srcs = ["cwise_ops_unary_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:math_ops_gen",
+        "//tensorflow/python:nn_grad",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:variables",
+    ],
+    shard_count = 50,
+)
+
 cuda_py_test(
     name = "embedding_ops_test",
     size = "medium",
diff --git a/tensorflow/python/kernel_tests/cwise_ops_binary_test.py b/tensorflow/python/kernel_tests/cwise_ops_binary_test.py
new file mode 100644
index 0000000000..8028f93a8c
--- /dev/null
+++ b/tensorflow/python/kernel_tests/cwise_ops_binary_test.py
@@ -0,0 +1,878 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for binary coefficient-wise operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes as dtypes_lib
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+_ADD = lambda x, y: x + y
+_SUB = lambda x, y: x - y
+_MUL = lambda x, y: x * y
+_POW = lambda x, y: x**y
+_TRUEDIV = lambda x, y: x / y
+_FLOORDIV = lambda x, y: x // y
+_MOD = lambda x, y: x % y
+
+
+# TODO(zongheng): it'd be great to factor out this function and various random
+# SparseTensor gen funcs.
+def _sparsify(x, thresh=0.5, index_dtype=np.int64):
+  x[x < thresh] = 0
+
+  non_zero = np.where(x)
+  x_indices = np.vstack(non_zero).astype(index_dtype).T
+  x_values = x[non_zero]
+  x_shape = x.shape
+
+  return sparse_tensor.SparseTensor(
+      indices=x_indices, values=x_values, dense_shape=x_shape), x_values
+
+
+def _default_tolerance(dtype):
+  """Returns a sensible default tolerance for comparing results of a given type.
+
+  Args:
+    dtype: A datatype.
+  """
+  if dtype == np.float16:
+    return 5e-3
+  elif dtype in (np.float32, np.complex64):
+    return 1e-3
+  elif dtype in (np.float64, np.complex128):
+    return 1e-5
+  else:
+    return None  # Fail fast for unexpected types
+
+
+class BinaryOpTest(test.TestCase):
+
+  def _compareCpu(self, x, y, np_func, tf_func, also_compare_variables=False):
+    np_ans = np_func(x, y)
+    with self.test_session(use_gpu=False):
+      inx = ops.convert_to_tensor(x)
+      iny = ops.convert_to_tensor(y)
+      out = tf_func(inx, iny)
+      tf_cpu = out.eval()
+      # Test that the op takes precedence over numpy operators.
+      np_left = tf_func(x, iny).eval()
+      np_right = tf_func(inx, y).eval()
+
+      if also_compare_variables:
+        var_x = variables.Variable(x)
+        var_y = variables.Variable(y)
+        variables.global_variables_initializer().run()
+        print(type(x), type(y), type(var_x), type(var_y))
+        print(type(tf_func(x, var_y)), type(tf_func(var_x, y)))
+        np_var_left = tf_func(x, var_y).eval()
+        np_var_right = tf_func(var_x, y).eval()
+
+    if np_ans.dtype != np.object:
+      self.assertAllClose(np_ans, tf_cpu)
+      self.assertAllClose(np_ans, np_left)
+      self.assertAllClose(np_ans, np_right)
+      if also_compare_variables:
+        self.assertAllClose(np_ans, np_var_left)
+        self.assertAllClose(np_ans, np_var_right)
+    self.assertShapeEqual(np_ans, out)
+
+  _GRAD_TOL = {
+      dtypes_lib.float16: 1e-3,
+      dtypes_lib.float32: 1e-3,
+      dtypes_lib.complex64: 1e-2,
+      dtypes_lib.float64: 1e-5,
+      dtypes_lib.complex128: 1e-4
+  }
+
+  def _compareGradientX(self,
+                        x,
+                        y,
+                        np_func,
+                        tf_func,
+                        numeric_gradient_type=None):
+    z = np_func(x, y)
+    zs = list(z.shape)
+    with self.cached_session():
+      inx = ops.convert_to_tensor(x)
+      iny = ops.convert_to_tensor(y)
+      if x.dtype in (np.float32, np.float64):
+        out = 1.1 * tf_func(inx, iny)
+      else:
+        out = tf_func(inx, iny)
+      xs = list(x.shape)
+      jacob_t, jacob_n = gradient_checker.compute_gradient(
+          inx, xs, out, zs, x_init_value=x)
+      if numeric_gradient_type is not None:
+        xf = x.astype(numeric_gradient_type)
+        yf = y.astype(numeric_gradient_type)
+        inxf = ops.convert_to_tensor(xf)
+        inyf = ops.convert_to_tensor(yf)
+        outf = tf_func(inxf, inyf)
+        _, jacob_n = gradient_checker.compute_gradient(
+            inxf, xs, outf, zs, x_init_value=xf, delta=1e-3)
+        jacob_n = jacob_n.astype(x.dtype)
+      tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)]
+      self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
+
+  def _compareGradientY(self,
+                        x,
+                        y,
+                        np_func,
+                        tf_func,
+                        numeric_gradient_type=None):
+    z = np_func(x, y)
+    zs = list(z.shape)
+    with self.cached_session():
+      inx = ops.convert_to_tensor(x)
+      iny = ops.convert_to_tensor(y)
+      if x.dtype in (np.float32, np.float64):
+        out = 1.1 * tf_func(inx, iny)
+      else:
+        out = tf_func(inx, iny)
+      ys = list(np.shape(y))
+      jacob_t, jacob_n = gradient_checker.compute_gradient(
+          iny, ys, out, zs, x_init_value=y)
+      if numeric_gradient_type is not None:
+        xf = x.astype(numeric_gradient_type)
+        yf = y.astype(numeric_gradient_type)
+        inxf = ops.convert_to_tensor(xf)
+        inyf = ops.convert_to_tensor(yf)
+        outf = tf_func(inxf, inyf)
+        _, jacob_n = gradient_checker.compute_gradient(
+            inyf, ys, outf, zs, x_init_value=yf)
+        jacob_n = jacob_n.astype(x.dtype)
+    tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)]
+    self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
+
+  def _compareGpu(self, x, y, np_func, tf_func):
+    np_ans = np_func(x, y)
+    with self.test_session(force_gpu=test_util.is_gpu_available()):
+      inx = ops.convert_to_tensor(x)
+      iny = ops.convert_to_tensor(y)
+      out = tf_func(inx, iny)
+      tf_gpu = out.eval()
+    self.assertAllClose(np_ans, tf_gpu)
+    self.assertShapeEqual(np_ans, out)
+    # TODO(zhifengc/ke): make gradient checker work on GPU.
+
+  def _compareBoth(self, x, y, np_func, tf_func, also_compare_variables=False):
+    self._compareCpu(x, y, np_func, tf_func, also_compare_variables)
+    if x.dtype in (np.float16, np.float32, np.float64, np.complex64,
+                   np.complex128):
+      if tf_func not in (_FLOORDIV, math_ops.floordiv, math_ops.zeta,
+                         math_ops.polygamma):
+        self._compareGradientX(x, y, np_func, tf_func)
+        self._compareGradientY(x, y, np_func, tf_func)
+      if tf_func in (math_ops.zeta, math_ops.polygamma):
+        # These methods only support gradients in the second parameter
+        self._compareGradientY(x, y, np_func, tf_func)
+      self._compareGpu(x, y, np_func, tf_func)
+
+  def testFloatBasic(self):
+    x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32)
+    y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32)
+    self._compareBoth(x, y, np.add, math_ops.add, also_compare_variables=True)
+    self._compareBoth(x, y, np.subtract, math_ops.subtract)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
+    self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv)
+    self._compareBoth(x, y, np.add, _ADD)
+    self._compareBoth(x, y, np.subtract, _SUB)
+    self._compareBoth(x, y, np.multiply, _MUL)
+    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
+    self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV)
+    self._compareBoth(x, y, np.arctan2, math_ops.atan2)
+    x1 = np.random.randn(5, 6).astype(np.float32)
+    x2 = np.random.randn(5, 6).astype(np.float32)
+    # Remove tiny values--atan2 gradients are flaky near the origin.
+    x1[np.abs(x1) < 0.05] = 0.05 * np.sign(x1[np.abs(x1) < 0.05])
+    x2[np.abs(x2) < 0.05] = 0.05 * np.sign(x2[np.abs(x2) < 0.05])
+    self._compareBoth(x1, x2, np.arctan2, math_ops.atan2)
+    try:
+      from scipy import special  # pylint: disable=g-import-not-at-top
+      a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32)
+      x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32)
+      self._compareBoth(a_pos_small, x_pos_small, special.gammainc,
+                        math_ops.igamma)
+      self._compareBoth(a_pos_small, x_pos_small, special.gammaincc,
+                        math_ops.igammac)
+      # Need x > 1
+      self._compareBoth(x_pos_small + 1, a_pos_small, special.zeta,
+                        math_ops.zeta)
+      n_small = np.arange(0, 15).reshape(1, 3, 5).astype(np.float32)
+      self._compareBoth(n_small, x_pos_small, special.polygamma,
+                        math_ops.polygamma)
+    except ImportError as e:
+      tf_logging.warn("Cannot test special functions: %s" % str(e))
+
+  def testFloatDifferentShapes(self):
+    x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.float32)
+    y = np.array([1, 2]).reshape(2, 1).astype(np.float32)
+    with self.cached_session() as sess:
+      inx = ops.convert_to_tensor(x)
+      iny = ops.convert_to_tensor(y)
+      s = math_ops.reduce_sum(inx * iny)
+      gx, gy = sess.run(gradients_impl.gradients(s, [inx, iny]))
+    # gx is simply the broadcasted y
+    self.assertAllEqual(gx,
+                        np.array([1, 1, 2, 2]).reshape(2, 2).astype(np.float32))
+    # gy is x's column summed up
+    self.assertAllEqual(gy, np.array([3, 7]).reshape(2, 1).astype(np.float32))
+
+  def testFloatVariableOverload(self):
+    x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.int32)
+    y = np.array([1, 2]).reshape(2, 1).astype(np.int32)
+    var_x = variables.Variable(x)
+    var_y = variables.Variable(y)
+    with self.cached_session() as sess:
+      sess.run([var_x.initializer, var_y.initializer])
+      left_result = (var_x * y).eval()
+      right_result = (x * var_y).eval()
+    np_result = x * y
+    self.assertAllEqual(np_result, left_result)
+    self.assertAllEqual(np_result, right_result)
+
+  def testDoubleBasic(self):
+    x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float64)
+    y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float64)
+    self._compareBoth(x, y, np.add, math_ops.add)
+    self._compareBoth(x, y, np.subtract, math_ops.subtract)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
+    self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv)
+    self._compareBoth(x, y, np.add, _ADD)
+    self._compareBoth(x, y, np.subtract, _SUB)
+    self._compareBoth(x, y, np.multiply, _MUL)
+    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
+    self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV)
+    self._compareBoth(x, y, np.arctan2, math_ops.atan2)
+    x1 = np.random.randn(7, 4).astype(np.float64)
+    x2 = np.random.randn(7, 4).astype(np.float64)
+    # Remove tiny values--atan2 gradients are flaky near the origin.
+    x1[np.abs(x1) < 0.5] = 0.5 * np.sign(x1[np.abs(x1) < 0.5])
+    x2[np.abs(x2) < 0.5] = 0.5 * np.sign(x2[np.abs(x2) < 0.5])
+    self._compareBoth(x1, x2, np.arctan2, math_ops.atan2)
+    try:
+      from scipy import special  # pylint: disable=g-import-not-at-top
+      a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32)
+      x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32)
+      self._compareBoth(a_pos_small, x_pos_small, special.gammainc,
+                        math_ops.igamma)
+      self._compareBoth(a_pos_small, x_pos_small, special.gammaincc,
+                        math_ops.igammac)
+    except ImportError as e:
+      tf_logging.warn("Cannot test special functions: %s" % str(e))
+
+  def testUint8Basic(self):
+    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint8)
+    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint8)
+    self._compareBoth(x, y, np.add, math_ops.add)
+
+  def testInt8Basic(self):
+    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int8)
+    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int8)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y, np.multiply, _MUL)
+
+  def testInt16Basic(self):
+    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int16)
+    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int16)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y, np.multiply, _MUL)
+
+  def testUint16Basic(self):
+    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint16)
+    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint16)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y, np.multiply, _MUL)
+    self._compareBoth(x, y, np.true_divide, math_ops.truediv)
+    self._compareBoth(x, y, np.floor_divide, math_ops.floordiv)
+    self._compareBoth(x, y, np.true_divide, _TRUEDIV)
+    self._compareBoth(x, y, np.floor_divide, _FLOORDIV)
+
+  def testInt32Basic(self):
+    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int32)
+    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int32)
+    self._compareBoth(x, y, np.add, math_ops.add)
+    self._compareBoth(x, y, np.subtract, math_ops.subtract)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y, np.true_divide, math_ops.truediv)
+    self._compareBoth(x, y, np.floor_divide, math_ops.floordiv)
+    self._compareBoth(x, y, np.mod, math_ops.mod)
+    self._compareBoth(x, y, np.add, _ADD)
+    self._compareBoth(x, y, np.subtract, _SUB)
+    self._compareBoth(x, y, np.multiply, _MUL)
+    self._compareBoth(x, y, np.true_divide, _TRUEDIV)
+    self._compareBoth(x, y, np.floor_divide, _FLOORDIV)
+    self._compareBoth(x, y, np.mod, _MOD)
+    # _compareBoth tests on GPU only for floating point types, so test
+    # _MOD for int32 on GPU by calling _compareGpu
+    self._compareGpu(x, y, np.mod, _MOD)
+
+  def testInt64Basic(self):
+    x = np.arange(1 << 40, 13 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64)
+    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int64)
+    self._compareBoth(x, y, np.subtract, math_ops.subtract)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y, np.true_divide, math_ops.truediv)
+    self._compareBoth(x, y, np.floor_divide, math_ops.floordiv)
+    self._compareBoth(x, y, np.mod, math_ops.mod)
+    self._compareBoth(x, y, np.subtract, _SUB)
+    self._compareBoth(x, y, np.multiply, _MUL)
+    self._compareBoth(x, y, np.true_divide, _TRUEDIV)
+    self._compareBoth(x, y, np.floor_divide, _FLOORDIV)
+    self._compareBoth(x, y, np.mod, _MOD)
+
+  def testComplex64Basic(self):
+    x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype(
+        np.complex64)
+    y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype(
+        np.complex64)
+    self._compareBoth(x, y, np.add, math_ops.add)
+    self._compareBoth(x, y, np.subtract, math_ops.subtract)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
+    self._compareBoth(x, y, np.add, _ADD)
+    self._compareBoth(x, y, np.subtract, _SUB)
+    self._compareBoth(x, y, np.multiply, _MUL)
+    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
+
+  def testComplex128Basic(self):
+    x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype(
+        np.complex128)
+    y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype(
+        np.complex128)
+    self._compareBoth(x, y, np.add, math_ops.add)
+    self._compareBoth(x, y, np.subtract, math_ops.subtract)
+    self._compareBoth(x, y, np.multiply, math_ops.multiply)
+    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
+    self._compareBoth(x, y, np.add, _ADD)
+    self._compareBoth(x, y, np.subtract, _SUB)
+    self._compareBoth(x, y, np.multiply, _MUL)
+    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
+
+  def testStringComparison(self):
+    x = np.array([["abc", "bh"], ["c", ""]])
+    y = np.array([["abc", "bh"], ["def", "hi"]])
+    with self.test_session(use_gpu=False) as sess:
+      cmp_eq = math_ops.equal(x, y)
+      cmp_not_eq = math_ops.not_equal(x, y)
+      values = sess.run([cmp_eq, cmp_not_eq])
+      self.assertAllEqual([[True, True], [False, False]], values[0])
+      self.assertAllEqual([[False, False], [True, True]], values[1])
+
+  def testString(self):
+    x = np.array([["x_0_0", "x_0_1", "x_0_2"], ["x_1_0", "x_1_1", "x_1_2"],
+                  ["x_2_0", "x_2_1", "x_2_2"]],
+                 dtype=np.object)
+    y = np.array([["y_0_0", "y_0_1", "y_0_2"], ["y_1_0", "y_1_1", "y_1_2"],
+                  ["y_2_0", "y_2_1", "y_2_2"]],
+                 dtype=np.object)
+    z = np.array([["z_0", "z_1", "z_2"]], dtype=np.object)
+    w = np.array("w", dtype=np.object)
+    self._compareCpu(x, y, _ADD, _ADD)
+    self._compareCpu(x, z, _ADD, _ADD)
+    self._compareCpu(x, w, _ADD, _ADD)
+    self._compareCpu(z, w, _ADD, _ADD)
+
+  def _compareBCast(self, xs, ys, dtype, np_func, tf_func):
+    if dtype in (np.complex64, np.complex128):
+      x = (1 + np.linspace(0, 2 + 3j, np.prod(xs))).astype(dtype).reshape(xs)
+      y = (1 + np.linspace(0, 2 - 2j, np.prod(ys))).astype(dtype).reshape(ys)
+    else:
+      x = (1 + np.linspace(0, 5, np.prod(xs))).astype(dtype).reshape(xs)
+      y = (1 + np.linspace(0, 5, np.prod(ys))).astype(dtype).reshape(ys)
+    self._compareCpu(x, y, np_func, tf_func)
+    if x.dtype in (np.float16, np.float32, np.float64):
+      # TODO(aselle): Make the test work for dtypes:
+      #     (np.complex64, np.complex128).
+      if tf_func not in (_FLOORDIV, math_ops.floordiv):
+        if x.dtype == np.float16:
+          # Compare fp16 theoretical gradients to fp32 numerical gradients,
+          # since fp16 numerical gradients are too imprecise unless great
+          # care is taken with choosing the inputs and the delta. This is
+          # a weaker check (in particular, it does not test the op itself,
+          # only its gradient), but it's much better than nothing.
+          self._compareGradientX(x, y, np_func, tf_func, np.float)
+          self._compareGradientY(x, y, np_func, tf_func, np.float)
+        else:
+          self._compareGradientX(x, y, np_func, tf_func)
+          self._compareGradientY(x, y, np_func, tf_func)
+      self._compareGpu(x, y, np_func, tf_func)
+
+  # TODO(josh11b,vrv): Refactor this to use parameterized tests.
+  def _testBCastByFunc(self, funcs, xs, ys):
+    dtypes = [
+        np.float16,
+        np.float32,
+        np.float64,
+        np.int32,
+        np.int64,
+        np.complex64,
+        np.complex128,
+    ]
+    for dtype in dtypes:
+      for (np_func, tf_func) in funcs:
+        if (dtype in (np.complex64, np.complex128) and
+            tf_func in (_FLOORDIV, math_ops.floordiv)):
+          continue  # floordiv makes no sense for complex numbers
+        self._compareBCast(xs, ys, dtype, np_func, tf_func)
+        self._compareBCast(ys, xs, dtype, np_func, tf_func)
+
+  def _testBCastA(self, xs, ys):
+    funcs = [
+        (np.add, math_ops.add),
+        (np.add, _ADD),
+    ]
+    self._testBCastByFunc(funcs, xs, ys)
+
+  def _testBCastB(self, xs, ys):
+    funcs = [
+        (np.subtract, math_ops.subtract),
+        (np.subtract, _SUB),
+        (np.power, math_ops.pow),
+    ]
+    self._testBCastByFunc(funcs, xs, ys)
+
+  def _testBCastC(self, xs, ys):
+    funcs = [
+        (np.multiply, math_ops.multiply),
+        (np.multiply, _MUL),
+    ]
+    self._testBCastByFunc(funcs, xs, ys)
+
+  def _testBCastD(self, xs, ys):
+    funcs = [
+        (np.true_divide, math_ops.truediv),
+        (np.floor_divide, math_ops.floordiv),
+        (np.true_divide, _TRUEDIV),
+        (np.floor_divide, _FLOORDIV),
+    ]
+    self._testBCastByFunc(funcs, xs, ys)
+
+  def testBCast_0A(self):
+    self._testBCastA([1, 3, 2], [1])
+
+  def testBCast_0B(self):
+    self._testBCastB([1, 3, 2], [1])
+
+  def testBCast_0C(self):
+    self._testBCastC([1, 3, 2], [1])
+
+  def testBCast_0D(self):
+    self._testBCastD([1, 3, 2], [1])
+
+  def testBCast_1A(self):
+    self._testBCastA([1, 3, 2], [2])
+
+  def testBCast_1B(self):
+    self._testBCastB([1, 3, 2], [2])
+
+  def testBCast_1C(self):
+    self._testBCastC([1, 3, 2], [2])
+
+  def testBCast_1D(self):
+    self._testBCastD([1, 3, 2], [2])
+
+  def testBCast_2A(self):
+    self._testBCastA([1, 3, 2], [3, 2])
+
+  def testBCast_2B(self):
+    self._testBCastB([1, 3, 2], [3, 2])
+
+  def testBCast_2C(self):
+    self._testBCastC([1, 3, 2], [3, 2])
+
+  def testBCast_2D(self):
+    self._testBCastD([1, 3, 2], [3, 2])
+
+  def testBCast_3A(self):
+    self._testBCastA([1, 3, 2], [3, 1])
+
+  def testBCast_3B(self):
+    self._testBCastB([1, 3, 2], [3, 1])
+
+  def testBCast_3C(self):
+    self._testBCastC([1, 3, 2], [3, 1])
+
+  def testBCast_3D(self):
+    self._testBCastD([1, 3, 2], [3, 1])
+
+  def testBCast_4A(self):
+    self._testBCastA([1, 3, 2], [1, 3, 2])
+
+  def testBCast_4B(self):
+    self._testBCastB([1, 3, 2], [1, 3, 2])
+
+  def testBCast_4C(self):
+    self._testBCastC([1, 3, 2], [1, 3, 2])
+
+  def testBCast_4D(self):
+    self._testBCastD([1, 3, 2], [1, 3, 2])
+
+  def testBCast_5A(self):
+    self._testBCastA([1, 3, 2], [2, 3, 1])
+
+  def testBCast_5B(self):
+    self._testBCastB([1, 3, 2], [2, 3, 1])
+
+  def testBCast_5C(self):
+    self._testBCastC([1, 3, 2], [2, 3, 1])
+
+  def testBCast_5D(self):
+    self._testBCastD([1, 3, 2], [2, 3, 1])
+
+  def testBCast_6A(self):
+    self._testBCastA([1, 3, 2], [2, 1, 1])
+
+  def testBCast_6B(self):
+    self._testBCastB([1, 3, 2], [2, 1, 1])
+
+  def testBCast_6C(self):
+    self._testBCastC([1, 3, 2], [2, 1, 1])
+
+  def testBCast_6D(self):
+    self._testBCastD([1, 3, 2], [2, 1, 1])
+
+  def testBCast_7A(self):
+    self._testBCastA([1, 3, 2], [1, 3, 1])
+
+  def testBCast_7B(self):
+    self._testBCastB([1, 3, 2], [1, 3, 1])
+
+  def testBCast_7C(self):
+    self._testBCastC([1, 3, 2], [1, 3, 1])
+
+  def testBCast_7D(self):
+    self._testBCastD([1, 3, 2], [1, 3, 1])
+
+  def testBCast_8A(self):
+    self._testBCastA([2, 1, 5], [2, 3, 1])
+
+  def testBCast_8B(self):
+    self._testBCastB([2, 1, 5], [2, 3, 1])
+
+  def testBCast_8C(self):
+    self._testBCastC([2, 1, 5], [2, 3, 1])
+
+  def testBCast_8D(self):
+    self._testBCastD([2, 1, 5], [2, 3, 1])
+
+  def testBCast_9A(self):
+    self._testBCastA([2, 0, 5], [2, 0, 1])
+
+  def testBCast_9B(self):
+    self._testBCastB([2, 0, 5], [2, 0, 1])
+
+  def testBCast_9C(self):
+    self._testBCastC([2, 0, 5], [2, 0, 1])
+
+  def testBCast_9D(self):
+    self._testBCastD([2, 0, 5], [2, 0, 1])
+
+  def testBCast_10A(self):
+    self._testBCastA([2, 3, 0], [2, 3, 1])
+
+  def testBCast_10B(self):
+    self._testBCastB([2, 3, 0], [2, 3, 1])
+
+  def testBCast_10C(self):
+    self._testBCastC([2, 3, 0], [2, 3, 1])
+
+  def testBCast_10D(self):
+    self._testBCastD([2, 3, 0], [2, 3, 1])
+
+  def testBCast_11A(self):
+    self._testBCastA([1, 3, 2], [1, 3, 2])
+
+  def testBCast_11B(self):
+    self._testBCastB([1, 3, 2], [1, 3, 2])
+
+  def testBCast_11C(self):
+    self._testBCastC([1, 3, 2], [1, 3, 2])
+
+  def testBCast_11D(self):
+    self._testBCastD([1, 3, 2], [1, 3, 2])
+
+  def testBCast_12A(self):
+    self._testBCastA([1, 1, 1, 1, 3, 2], [1, 3, 2])
+
+  def testBCast_12B(self):
+    self._testBCastB([1, 1, 1, 1, 3, 2], [1, 3, 2])
+
+  def testBCast_12C(self):
+    self._testBCastC([1, 1, 1, 1, 3, 2], [1, 3, 2])
+
+  def testBCast_12D(self):
+    self._testBCastD([1, 1, 1, 1, 3, 2], [1, 3, 2])
+
+  def testBCast_13A(self):
+    self._testBCastA([1, 3, 2, 1, 1], [1])
+
+  def testBCast_13B(self):
+    self._testBCastB([1, 3, 2, 1, 1], [1])
+
+  def testBCast_13C(self):
+    self._testBCastC([1, 3, 2, 1, 1], [1])
+
+  def testBCast_13D(self):
+    self._testBCastD([1, 3, 2, 1, 1], [1])
+
+  def testBCast_14A(self):
+    self._testBCastA([2, 3, 1, 1, 5], [1])
+
+  def testBCast_14B(self):
+    self._testBCastB([2, 3, 1, 1, 5], [1])
+
+  def testBCast_14C(self):
+    self._testBCastC([2, 3, 1, 1, 5], [1])
+
+  def testBCast_14D(self):
+    self._testBCastD([2, 3, 1, 1, 5], [1])
+
+  def testBCast_15A(self):
+    self._testBCastA([10, 3, 1, 2], [3, 1, 2])
+
+  def testBCast_15B(self):
+    self._testBCastB([10, 3, 1, 2], [3, 1, 2])
+
+  def testBCast_15C(self):
+    self._testBCastC([10, 3, 1, 2], [3, 1, 2])
+
+  def testBCast_15D(self):
+    self._testBCastD([10, 3, 1, 2], [3, 1, 2])
+
+  def testMismatchedDimensions(self):
+    for func in [
+        math_ops.add, math_ops.subtract, math_ops.multiply, math_ops.div, _ADD,
+        _SUB, _MUL, _TRUEDIV, _FLOORDIV
+    ]:
+      with self.assertRaisesWithPredicateMatch(
+          ValueError, lambda e: "Dimensions must" in str(e)):
+        func(
+            ops.convert_to_tensor([10.0, 20.0, 30.0]),
+            ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]]))
+
+  def testZeroPowGrad(self):
+    with self.cached_session():
+      for dtype in (np.float16, np.float32, np.float64, np.complex64,
+                    np.complex128):
+        x = constant_op.constant(0.0, dtype=dtype)
+        y = constant_op.constant(2.0, dtype=dtype)
+        z = math_ops.pow(x, y)
+        error = gradient_checker.compute_gradient_error(y, [], z, [])
+        self.assertEqual(error, 0)
+
+  def testComplexPowGrad(self):
+    with self.cached_session():
+      for dtype in np.complex64, np.complex128:
+        for base in 2.0, -2.0:
+          x = constant_op.constant(base, dtype=dtype)
+          y = constant_op.constant(2.0, dtype=dtype)
+          z = math_ops.pow(x, y)
+          error = gradient_checker.compute_gradient_error(y, [], z, [])
+          self.assertLess(error, 2e-4)
+
+  def testAtan2SpecialValues(self):
+    x1l, x2l = zip((+0.0, +0.0), (+0.0, -0.0), (-0.0, +0.0), (-0.0, -0.0),
+                   (1.2345, float("inf")), (1.2345, -float("inf")),
+                   (-4.321, float("inf")), (-4.125, -float("inf")),
+                   (float("inf"), float("inf")), (float("inf"), -float("inf")),
+                   (-float("inf"), float("inf")),
+                   (-float("inf"), -float("inf")))
+    for dtype in np.float32, np.float64:
+      x1 = np.array(x1l).astype(dtype)
+      x2 = np.array(x2l).astype(dtype)
+      self._compareCpu(x1, x2, np.arctan2, math_ops.atan2)
+      self._compareGpu(x1, x2, np.arctan2, math_ops.atan2)
+
+  def testPowNegativeExponent(self):
+    for dtype in [np.int32, np.int64]:
+      with self.test_session(use_gpu=False) as sess:
+        with self.assertRaisesRegexp(
+            errors_impl.InvalidArgumentError,
+            "Integers to negative integer powers are not allowed"):
+          x = np.array([5, 2]).astype(dtype)
+          y = np.array([-2, 3]).astype(dtype)
+          sess.run(math_ops.pow(x, y))
+
+      with self.test_session(use_gpu=False) as sess:
+        with self.assertRaisesRegexp(
+            errors_impl.InvalidArgumentError,
+            "Integers to negative integer powers are not allowed"):
+          x = np.array([5, 2]).astype(dtype)
+          y = np.array([2, -3]).astype(dtype)
+          sess.run(math_ops.pow(x, y))
+
+      with self.test_session(use_gpu=False) as sess:
+        with self.assertRaisesRegexp(
+            errors_impl.InvalidArgumentError,
+            "Integers to negative integer powers are not allowed"):
+          x = np.array([5, 2]).astype(dtype)
+          y = -3
+          sess.run(math_ops.pow(x, y))
+
+
+class ComparisonOpTest(test.TestCase):
+
+  def _compareScalar(self, func, x, y, dtype):
+    with self.test_session(force_gpu=test_util.is_gpu_available()):
+      out = func(
+          ops.convert_to_tensor(np.array([x]).astype(dtype)),
+          ops.convert_to_tensor(np.array([y]).astype(dtype)))
+      ret = out.eval()
+    return ret[0]
+
+  def testScalarCompareScalar(self):
+    dtypes = [np.float16, np.float32, np.float64, np.int32, np.int64]
+    data = [-1, 0, 1]
+    for t in dtypes:
+      for x in data:
+        for y in data:
+          self.assertEqual(self._compareScalar(math_ops.less, x, y, t), x < y)
+          self.assertEqual(
+              self._compareScalar(math_ops.less_equal, x, y, t), x <= y)
+          self.assertEqual(
+              self._compareScalar(math_ops.greater, x, y, t), x > y)
+          self.assertEqual(
+              self._compareScalar(math_ops.greater_equal, x, y, t), x >= y)
+          self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y)
+          self.assertEqual(
+              self._compareScalar(math_ops.not_equal, x, y, t), x != y)
+    data = [-1, 0, 1, -1j, 1j, 1 + 1j, 1 - 1j]
+    for t in [np.complex64, np.complex128]:
+      for x in data:
+        for y in data:
+          self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y)
+          self.assertEqual(
+              self._compareScalar(math_ops.not_equal, x, y, t), x != y)
+
+  def _compare(self, x, y, np_func, tf_func):
+    np_ans = np_func(x, y)
+    with self.test_session(force_gpu=test_util.is_gpu_available()):
+      out = tf_func(ops.convert_to_tensor(x), ops.convert_to_tensor(y))
+      tf_ans = out.eval()
+    self.assertAllEqual(np_ans, tf_ans)
+
+  def testTensorCompareTensor(self):
+    x = np.linspace(-15, 15, 6).reshape(1, 3, 2)
+    y = np.linspace(20, -10, 6).reshape(1, 3, 2)
+    for t in [np.float16, np.float32, np.float64, np.int32, np.int64]:
+      xt = x.astype(t)
+      yt = y.astype(t)
+      self._compare(xt, yt, np.less, math_ops.less)
+      self._compare(xt, yt, np.less_equal, math_ops.less_equal)
+      self._compare(xt, yt, np.greater, math_ops.greater)
+      self._compare(xt, yt, np.greater_equal, math_ops.greater_equal)
+      self._compare(xt, yt, np.equal, math_ops.equal)
+      self._compare(xt, yt, np.not_equal, math_ops.not_equal)
+    # Complex types do not support ordering but do support equality tests.
+    for t in [np.complex64, np.complex128]:
+      xt = x.astype(t)
+      xt -= 1j * xt
+      yt = y.astype(t)
+      yt -= 1j * yt
+      self._compare(xt, yt, np.equal, math_ops.equal)
+      self._compare(xt, yt, np.not_equal, math_ops.not_equal)
+
+  def _compareBCast(self, xs, ys, dtype, np_func, tf_func):
+    x = np.linspace(-15, 15, np.prod(xs)).astype(dtype).reshape(xs)
+    y = np.linspace(20, -10, np.prod(ys)).astype(dtype).reshape(ys)
+    if dtype in (np.complex64, np.complex128):
+      x -= 1j * x
+      y -= 1j * y
+    self._compare(x, y, np_func, tf_func)
+    self._compare(y, x, np_func, tf_func)
+
+  def _testBCastByFunc(self, np_func, tf_func, include_complex=False):
+    shapes = [
+        ([1, 3, 2], [1]),
+        ([1, 3, 2], [2]),
+        ([1, 3, 2], [3, 2]),
+        ([1, 3, 2], [3, 1]),
+        ([1, 3, 2], [1, 3, 2]),
+        ([1, 3, 2], [2, 3, 1]),
+        ([1, 3, 2], [2, 1, 1]),
+        ([1, 3, 2], [1, 3, 1]),
+        ([2, 1, 5], [2, 3, 1]),
+        ([2, 0, 5], [2, 0, 1]),
+        ([2, 3, 0], [2, 3, 1]),
+    ]
+    dtypes = [
+        np.float16,
+        np.float32,
+        np.float64,
+        np.int32,
+        np.int64,
+    ]
+    if include_complex:
+      dtypes.extend([np.complex64, np.complex128])
+
+    for (xs, ys) in shapes:
+      for dtype in dtypes:
+        self._compareBCast(xs, ys, dtype, np_func, tf_func)
+
+  def testBCastLess(self):
+    self._testBCastByFunc(np.less, math_ops.less)
+
+  def testBCastLessEqual(self):
+    self._testBCastByFunc(np.less_equal, math_ops.less_equal)
+
+  def testBCastGreater(self):
+    self._testBCastByFunc(np.greater, math_ops.greater)
+
+  def testBCastGreaterEqual(self):
+    self._testBCastByFunc(np.greater_equal, math_ops.greater_equal)
+
+  def testBCastEqual(self):
+    self._testBCastByFunc(np.equal, math_ops.equal, include_complex=True)
+
+  def testBCastNotEqual(self):
+    self._testBCastByFunc(
+        np.not_equal, math_ops.not_equal, include_complex=True)
+
+  def testShapeMismatch(self):
+    dtypes = [np.float16, np.float32, np.float64, np.int32, np.int64]
+    funcs = [
+        math_ops.less, math_ops.less_equal, math_ops.greater,
+        math_ops.greater_equal, math_ops.equal, math_ops.not_equal
+    ]
+    x = np.arange(0, 10).reshape([2, 5])
+    y = np.arange(0, 10).reshape([5, 2])
+    for t in dtypes:
+      for f in funcs:
+        with self.assertRaisesWithPredicateMatch(
+            ValueError, lambda e: "Dimensions must" in str(e)):
+          f(x.astype(t), y.astype(t))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index 00d7f956c2..c5311ad834 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -18,25 +18,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-
 import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
-from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import gradient_checker
-from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging
 
 _ADD = lambda x, y: x + y
 _SUB = lambda x, y: x - y
@@ -45,8 +39,6 @@ _POW = lambda x, y: x**y
 _TRUEDIV = lambda x, y: x / y
 _FLOORDIV = lambda x, y: x // y
 _MOD = lambda x, y: x % y
-_NEG = lambda x: -x
-_ABS = abs
 
 _LT = lambda x, y: x < y
 _LE = lambda x, y: x <= y
@@ -74,8 +66,11 @@ def _sparsify(x, thresh=0.5, index_dtype=np.int64):
 
 
 def _default_tolerance(dtype):
-  """Returns a sensible default tolerance for comparing results of a given
-  type"""
+  """Returns a sensible default tolerance for comparing results of a given type.
+
+  Args:
+    dtype: A datatype.
+  """
   if dtype == np.float16:
     return 5e-3
   elif dtype in (np.float32, np.complex64):
@@ -86,1147 +81,6 @@ def _default_tolerance(dtype):
     return None  # Fail fast for unexpected types
 
 
-class UnaryOpTest(test.TestCase):
-
-  def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None):
-    if grad_rtol is None:
-      grad_rtol = _default_tolerance(x.dtype)
-    if grad_atol is None:
-      grad_atol = _default_tolerance(x.dtype)
-    np_ans = np_func(x)
-    with self.test_session(use_gpu=False):
-      inx = ops.convert_to_tensor(x)
-      if x.dtype in (np.float32, np.float64,
-                     dtypes_lib.bfloat16.as_numpy_dtype):
-        y = 1.1 * tf_func(inx)
-        np_ans *= 1.1
-      else:
-        y = tf_func(inx)
-      tf_cpu = y.eval()
-      self.assertShapeEqual(np_ans, y)
-      if x.dtype == np.float16:
-        self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3)
-      elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype:
-        self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2)
-      else:
-        self.assertAllClose(np_ans, tf_cpu)
-
-      if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign:
-        return  # Return early
-
-      if x.dtype == np.float16:
-        s = list(np.shape(x))
-        jacob_t, _ = gradient_checker.compute_gradient(
-            inx, s, y, s, x_init_value=x)
-        xf = x.astype(np.float)
-        inxf = ops.convert_to_tensor(xf)
-        yf = tf_func(inxf)
-        _, jacob_n = gradient_checker.compute_gradient(
-            inxf, s, yf, s, x_init_value=xf, delta=1e-2)
-        jacob_n = jacob_n.astype(np.float16)
-        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
-      elif x.dtype in (np.float32, np.complex64):
-        s = list(np.shape(x))
-        jacob_t, jacob_n = gradient_checker.compute_gradient(
-            inx, s, y, s, x_init_value=x, delta=1e-3)
-        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
-      elif x.dtype in (np.float64, np.complex128):
-        s = list(np.shape(x))
-        jacob_t, jacob_n = gradient_checker.compute_gradient(
-            inx, s, y, s, x_init_value=x, delta=1e-5)
-        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
-
-  def _check(self, result_tensor, result_np, input_sp_t, tol):
-    self.assertTrue(isinstance(result_tensor, sparse_tensor.SparseTensor))
-    self.assertTrue(isinstance(input_sp_t, sparse_tensor.SparseTensor))
-    self.assertAllEqual(input_sp_t.indices.eval(), result_tensor.indices.eval())
-    self.assertAllEqual(input_sp_t.dense_shape.eval(),
-                        result_tensor.dense_shape.eval())
-    if tol is None:
-      self.assertAllClose(result_np, result_tensor.values.eval())
-    else:
-      self.assertAllClose(
-          result_np, result_tensor.values.eval(), rtol=tol, atol=tol)
-
-  def _compareSparseCpu(self, x, np_func, tf_func, tol):
-    x_sp, x_sp_vals = _sparsify(x)
-    res_np = np_func(x_sp_vals)
-    with self.test_session(use_gpu=False):
-      self._check(tf_func(x_sp), res_np, x_sp, tol)
-
-  def _compareGpu(self, x, np_func, tf_func):
-    np_ans = np_func(x)
-    with self.test_session(force_gpu=test_util.is_gpu_available()):
-      result = tf_func(ops.convert_to_tensor(x))
-      tf_gpu = result.eval()
-    if x.dtype == np.float16:
-      self.assertAllClose(np_ans, tf_gpu, rtol=1e-3, atol=1e-3)
-    else:
-      self.assertAllClose(np_ans, tf_gpu)
-    # TODO(zhifengc/ke): make gradient checker work on GPU.
-
-  def _compareSparseGpu(self, x, np_func, tf_func, tol):
-    x_sp, x_sp_vals = _sparsify(x)
-    res_np = np_func(x_sp_vals)
-    with self.test_session(force_gpu=test_util.is_gpu_available()):
-      self._check(tf_func(x_sp), res_np, x_sp, tol)
-
-  def _compareBoth(self, x, np_func, tf_func):
-    self._compareCpu(x, np_func, tf_func)
-    self._compareGpu(x, np_func, tf_func)
-
-  def _compareBothSparse(self, x, np_func, tf_func, tol=None):
-    self._compareSparseCpu(x, np_func, tf_func, tol)
-    self._compareSparseGpu(x, np_func, tf_func, tol)
-
-  def _inv(self, x):
-    return 1.0 / x
-
-  def _rsqrt(self, x):
-    return self._inv(np.sqrt(x))
-
-  def _sigmoid(self, x):
-    return 1.0 / (1.0 + np.exp(-x))
-
-  def _log_sigmoid(self, x):
-    return np.log(self._sigmoid(x))
-
-  def _replace_domain_error_with_inf(self, fn):
-
-    def func(x):
-      try:
-        return fn(x)
-      except ValueError as e:
-        if "domain error" in str(e):
-          return np.inf * np.ones_like(x)
-        else:
-          raise e
-
-    return func
-
-  def testFloatBasic(self):
-    x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float32)
-    w = x - x.min() + 1.02  # all greater than 1
-    y = (x + .5).astype(np.float32)  # no zero
-    z = (x + 15.5).astype(np.float32)  # all positive
-    k = np.arange(-0.90, 0.90, 0.25).astype(np.float32)  # between -1 and 1
-
-    self._compareBoth(x, np.abs, math_ops.abs)
-    self._compareBoth(x, np.abs, _ABS)
-    self._compareBoth(x, np.negative, math_ops.negative)
-    self._compareBoth(x, np.negative, _NEG)
-    self._compareBoth(y, self._inv, math_ops.reciprocal)
-    self._compareBoth(x, np.square, math_ops.square)
-    self._compareBoth(z, np.sqrt, math_ops.sqrt)
-    self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
-    self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
-    self._compareBoth(z, np.log, math_ops.log)
-    self._compareBoth(z, np.log1p, math_ops.log1p)
-    self._compareBoth(x, np.sinh, math_ops.sinh)
-    self._compareBoth(x, np.cosh, math_ops.cosh)
-    self._compareBoth(x, np.tanh, math_ops.tanh)
-    self._compareBoth(x, np.arcsinh, math_ops.asinh)
-    self._compareBoth(w, np.arccosh, math_ops.acosh)
-    self._compareBoth(k, np.arctanh, math_ops.atanh)
-    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
-    self._compareBoth(x, self._log_sigmoid, math_ops.log_sigmoid)
-    self._compareBoth(y, np.sign, math_ops.sign)
-    self._compareBoth(x, np.sin, math_ops.sin)
-    self._compareBoth(x, np.cos, math_ops.cos)
-    self._compareBoth(k, np.arcsin, math_ops.asin)
-    self._compareBoth(k, np.arccos, math_ops.acos)
-    self._compareBoth(x, np.arctan, math_ops.atan)
-    self._compareBoth(x, np.tan, math_ops.tan)
-    self._compareBoth(y,
-                      np.vectorize(
-                          self._replace_domain_error_with_inf(math.lgamma)),
-                      math_ops.lgamma)
-    self._compareBoth(x, np.vectorize(math.erf), math_ops.erf)
-    self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc)
-    try:
-      from scipy import special  # pylint: disable=g-import-not-at-top
-      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
-      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
-    except ImportError as e:
-      tf_logging.warn("Cannot test special functions: %s" % str(e))
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.square, math_ops.square)
-    self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3)
-    self._compareBothSparse(x, np.tanh, math_ops.tanh)
-    self._compareBothSparse(y, np.sign, math_ops.sign)
-    self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf)
-
-  def testFloatTanhEdge(self):
-    x = np.arange(40, 40 + 6).reshape(6).astype(np.float32)
-    self._compareBoth(x, np.tanh, math_ops.tanh)
-    x = np.arange(-40, -40 + 6).reshape(6).astype(np.float32)
-    self._compareBoth(x, np.tanh, math_ops.tanh)
-
-  def testFloatEmpty(self):
-    x = np.empty((2, 0, 5), dtype=np.float32)
-    self._compareBoth(x, np.abs, math_ops.abs)
-    self._compareBoth(x, np.abs, _ABS)
-    self._compareBoth(x, np.negative, math_ops.negative)
-    self._compareBoth(x, np.negative, _NEG)
-    self._compareBoth(x, self._inv, math_ops.reciprocal)
-    self._compareBoth(x, np.square, math_ops.square)
-    self._compareBoth(x, np.sqrt, math_ops.sqrt)
-    self._compareBoth(x, self._rsqrt, math_ops.rsqrt)
-    self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
-    self._compareBoth(x, np.log, math_ops.log)
-    self._compareBoth(x, np.log1p, math_ops.log1p)
-    self._compareBoth(x, np.sinh, math_ops.sinh)
-    self._compareBoth(x, np.arcsinh, math_ops.asinh)
-    self._compareBoth(x, np.cosh, math_ops.cosh)
-    self._compareBoth(x, np.tanh, math_ops.tanh)
-    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
-    self._compareBoth(x, np.sign, math_ops.sign)
-    self._compareBoth(x, np.sin, math_ops.sin)
-    self._compareBoth(x, np.cos, math_ops.cos)
-    # Can't use vectorize below, so just use some arbitrary function
-    self._compareBoth(x, np.sign, math_ops.lgamma)
-    self._compareBoth(x, np.sign, math_ops.erf)
-    self._compareBoth(x, np.sign, math_ops.erfc)
-    self._compareBoth(x, np.tan, math_ops.tan)
-    self._compareBoth(x, np.arcsin, math_ops.asin)
-    self._compareBoth(x, np.arccos, math_ops.acos)
-    self._compareBoth(x, np.arctan, math_ops.atan)
-    try:
-      from scipy import special  # pylint: disable=g-import-not-at-top
-      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
-      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
-    except ImportError as e:
-      tf_logging.warn("Cannot test special functions: %s" % str(e))
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.square, math_ops.square)
-    self._compareBothSparse(x, np.sqrt, math_ops.sqrt, tol=1e-3)
-    self._compareBothSparse(x, np.tanh, math_ops.tanh)
-    self._compareBothSparse(x, np.sign, math_ops.sign)
-    self._compareBothSparse(x, np.sign, math_ops.erf)
-
-  def testDoubleBasic(self):
-    x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float64)
-    w = x - x.min() + 1.02  # all greater than 1
-    y = (x + .5).astype(np.float64)  # no zero
-    z = (x + 15.5).astype(np.float64)  # all positive
-    k = np.arange(-0.90, 0.90,
-                  0.35).reshape(1, 3, 2).astype(np.float64)  # between -1 and 1
-    self._compareBoth(x, np.abs, math_ops.abs)
-    self._compareBoth(x, np.abs, _ABS)
-    self._compareBoth(x, np.negative, math_ops.negative)
-    self._compareBoth(x, np.negative, _NEG)
-    self._compareBoth(y, self._inv, math_ops.reciprocal)
-    self._compareBoth(x, np.square, math_ops.square)
-    self._compareBoth(z, np.sqrt, math_ops.sqrt)
-    self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
-    self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
-    self._compareBoth(z, np.log, math_ops.log)
-    self._compareBoth(z, np.log1p, math_ops.log1p)
-    self._compareBoth(x, np.sinh, math_ops.sinh)
-    self._compareBoth(x, np.cosh, math_ops.cosh)
-    self._compareBoth(x, np.tanh, math_ops.tanh)
-    self._compareBoth(x, np.arcsinh, math_ops.asinh)
-    self._compareBoth(w, np.arccosh, math_ops.acosh)
-    self._compareBoth(k, np.arctanh, math_ops.atanh)
-    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
-    self._compareBoth(y, np.sign, math_ops.sign)
-    self._compareBoth(x, np.sin, math_ops.sin)
-    self._compareBoth(x, np.cos, math_ops.cos)
-    self._compareBoth(y,
-                      np.vectorize(
-                          self._replace_domain_error_with_inf(math.lgamma)),
-                      math_ops.lgamma)
-    self._compareBoth(x, np.vectorize(math.erf), math_ops.erf)
-    self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc)
-    self._compareBoth(x, np.arctan, math_ops.atan)
-    self._compareBoth(k, np.arcsin, math_ops.asin)
-    self._compareBoth(k, np.arccos, math_ops.acos)
-    self._compareBoth(k, np.tan, math_ops.tan)
-    try:
-      from scipy import special  # pylint: disable=g-import-not-at-top
-      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
-      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
-    except ImportError as e:
-      tf_logging.warn("Cannot test special functions: %s" % str(e))
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.square, math_ops.square)
-    self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3)
-    self._compareBothSparse(x, np.tanh, math_ops.tanh)
-    self._compareBothSparse(y, np.sign, math_ops.sign)
-    self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf)
-
-  def testHalfBasic(self):
-    x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float16)
-    y = (x + .5).astype(np.float16)  # no zero
-    z = (x + 15.5).astype(np.float16)  # all positive
-    self._compareBoth(x, np.abs, math_ops.abs)
-    self._compareBoth(x, np.abs, _ABS)
-    self._compareBoth(x, np.negative, math_ops.negative)
-    self._compareBoth(x, np.negative, _NEG)
-    self._compareBoth(y, self._inv, math_ops.reciprocal)
-    self._compareBoth(x, np.square, math_ops.square)
-    self._compareBoth(z, np.sqrt, math_ops.sqrt)
-    self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
-    self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
-    self._compareBoth(z, np.log, math_ops.log)
-    self._compareBoth(z, np.log1p, math_ops.log1p)
-    self._compareBoth(x, np.tanh, math_ops.tanh)
-    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
-    self._compareBoth(y, np.sign, math_ops.sign)
-    self._compareBoth(x, np.sin, math_ops.sin)
-    self._compareBoth(x, np.cos, math_ops.cos)
-    self._compareBoth(y,
-                      np.vectorize(
-                          self._replace_domain_error_with_inf(math.lgamma)),
-                      math_ops.lgamma)
-    self._compareBoth(x, np.vectorize(math.erf), math_ops.erf)
-    self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc)
-    try:
-      from scipy import special  # pylint: disable=g-import-not-at-top
-      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
-      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
-    except ImportError as e:
-      tf_logging.warn("Cannot test special functions: %s" % str(e))
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.square, math_ops.square)
-    self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3)
-    self._compareBothSparse(x, np.tanh, math_ops.tanh)
-    self._compareBothSparse(y, np.sign, math_ops.sign)
-    self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf, tol=1e-3)
-
-  def testInt32Basic(self):
-    x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(np.int32)
-    self._compareCpu(x, np.abs, math_ops.abs)
-    self._compareCpu(x, np.abs, _ABS)
-    self._compareBoth(x, np.negative, math_ops.negative)
-    self._compareBoth(x, np.negative, _NEG)
-    self._compareBoth(x, np.square, math_ops.square)
-    self._compareCpu(x, np.sign, math_ops.sign)
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.square, math_ops.square)
-    self._compareBothSparse(x, np.sign, math_ops.sign)
-
-  def testInt64Basic(self):
-    x = np.arange(-6 << 40, 6 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64)
-    self._compareCpu(x, np.abs, math_ops.abs)
-    self._compareCpu(x, np.abs, _ABS)
-    self._compareCpu(x, np.negative, math_ops.negative)
-    self._compareCpu(x, np.negative, _NEG)
-    self._compareCpu(x, np.sign, math_ops.sign)
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.sign, math_ops.sign)
-
-  def testInt64Square(self):
-    x = np.arange(-6 << 20, 6 << 20, 2 << 20).reshape(1, 3, 2).astype(np.int64)
-    self._compareCpu(x, np.square, math_ops.square)
-    self._compareBothSparse(x, np.square, math_ops.square)
-
-  def testComplex64Basic(self):
-    x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype(
-        np.complex64)
-    y = x + np.complex(0.5, 0.5)  # no zeros
-    self._compareBoth(x, np.abs, math_ops.abs)
-    self._compareBoth(x, np.abs, _ABS)
-    self._compareBoth(x, np.negative, math_ops.negative)
-    self._compareBoth(x, np.negative, _NEG)
-    self._compareCpu(y, self._inv, math_ops.reciprocal)
-    self._compareCpu(x, np.square, math_ops.square)
-    self._compareCpu(y, np.sqrt, math_ops.sqrt)
-    self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
-    self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareCpu(x, np.expm1, math_ops.expm1)
-    self._compareCpu(y, np.log, math_ops.log)
-    self._compareCpu(y, np.log1p, math_ops.log1p)
-    self._compareCpu(x, np.sinh, math_ops.sinh)
-    self._compareCpu(x, np.cosh, math_ops.cosh)
-    self._compareCpu(x, np.tanh, math_ops.tanh)
-
-    # Complex64 versions of asinh() and acosh() in libstdc++ only have 6 digits
-    # of precision.
-    # Small gradient values + low precision --> High relative error
-    self._compareCpu(y, np.arcsinh, math_ops.asinh, grad_rtol=1e-2)
-    self._compareCpu(y, np.arccosh, math_ops.acosh, grad_rtol=1e-2)
-
-    self._compareCpu(y, np.arctanh, math_ops.atanh)
-    self._compareCpu(x, self._sigmoid, math_ops.sigmoid)
-    self._compareCpu(x, np.sin, math_ops.sin)
-    self._compareCpu(x, np.cos, math_ops.cos)
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.square, math_ops.square)
-    self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3)
-    self._compareBothSparse(x, np.tanh, math_ops.tanh)
-
-    # Numpy uses an incorrect definition of sign; use the right one instead.
-    def complex_sign(x):
-      return x / np.abs(x)
-
-    self._compareBoth(y, complex_sign, math_ops.sign)
-    self._compareBothSparse(y, complex_sign, math_ops.sign)
-
-  def testComplex128Basic(self):
-    x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype(
-        np.complex128)
-    y = x + np.complex(0.5, 0.5)  # no zeros
-    self._compareBoth(x, np.abs, math_ops.abs)
-    self._compareBoth(x, np.abs, _ABS)
-    self._compareBoth(x, np.negative, math_ops.negative)
-    self._compareBoth(x, np.negative, _NEG)
-    self._compareCpu(y, self._inv, math_ops.reciprocal)
-    self._compareCpu(x, np.square, math_ops.square)
-    self._compareCpu(y, np.sqrt, math_ops.sqrt)
-    self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
-    self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareCpu(x, np.expm1, math_ops.expm1)
-    self._compareCpu(y, np.log, math_ops.log)
-    self._compareCpu(y, np.log1p, math_ops.log1p)
-    self._compareCpu(x, np.sinh, math_ops.sinh)
-    self._compareCpu(x, np.cosh, math_ops.cosh)
-    self._compareCpu(x, np.tanh, math_ops.tanh)
-    self._compareCpu(y, np.arcsinh, math_ops.asinh)
-    self._compareCpu(y, np.arccosh, math_ops.acosh)
-    self._compareCpu(y, np.arctanh, math_ops.atanh)
-    self._compareCpu(x, self._sigmoid, math_ops.sigmoid)
-    self._compareCpu(x, np.sin, math_ops.sin)
-    self._compareCpu(x, np.cos, math_ops.cos)
-
-    self._compareBothSparse(x, np.abs, math_ops.abs)
-    self._compareBothSparse(x, np.negative, math_ops.negative)
-    self._compareBothSparse(x, np.square, math_ops.square)
-    self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3)
-    self._compareBothSparse(x, np.tanh, math_ops.tanh)
-
-    # Numpy uses an incorrect definition of sign; use the right one instead.
-    def complex_sign(x):
-      return x / np.abs(x)
-
-    self._compareBoth(y, complex_sign, math_ops.sign)
-    self._compareBothSparse(y, complex_sign, math_ops.sign)
-
-  def testGradGrad(self):
-    np.random.seed(7)
-    shape = (5,)
-    dtype_tols = [(np.float32, 5e-4), (np.float64, 1e-6), (np.complex64, 5e-4),
-                  (np.complex128, 1e-6)]
-    op_range = [
-        (gen_math_ops.reciprocal_grad, [-2, 2]),
-        (gen_math_ops.rsqrt_grad, [0.1, 3]),
-        (gen_math_ops.sigmoid_grad, [-2, 2]),
-        (gen_math_ops.sqrt_grad, [0.1, 3]),
-        (gen_math_ops.tanh_grad, [-2, 2]),
-    ]
-
-    def rand(dtype):
-      x = np.random.uniform(
-          real_range[0], real_range[1], size=shape[0]).astype(dtype)
-      if dtype in (np.complex64, np.complex128):
-        x += 1j * np.random.uniform(-2, 2, size=shape[0]).astype(dtype)
-      return x
-
-    for op, real_range in op_range:
-      with self.cached_session():
-        for dtype, tol in dtype_tols:
-          x = constant_op.constant(rand(dtype))
-          y = constant_op.constant(rand(dtype))
-          z = op(x, y)
-          grads = gradient_checker.compute_gradient(
-              [x, y], [shape, shape],
-              z,
-              shape,
-              x_init_value=[rand(dtype), rand(dtype)])
-          if isinstance(grads, tuple):
-            grads = [grads]
-          for analytical, numerical in grads:
-            self.assertAllClose(analytical, numerical, rtol=tol, atol=tol)
-
-
-class BinaryOpTest(test.TestCase):
-
-  def _compareCpu(self, x, y, np_func, tf_func, also_compare_variables=False):
-    np_ans = np_func(x, y)
-    with self.test_session(use_gpu=False):
-      inx = ops.convert_to_tensor(x)
-      iny = ops.convert_to_tensor(y)
-      out = tf_func(inx, iny)
-      tf_cpu = out.eval()
-      # Test that the op takes precedence over numpy operators.
-      np_left = tf_func(x, iny).eval()
-      np_right = tf_func(inx, y).eval()
-
-      if also_compare_variables:
-        var_x = variables.Variable(x)
-        var_y = variables.Variable(y)
-        variables.global_variables_initializer().run()
-        print(type(x), type(y), type(var_x), type(var_y))
-        print(type(tf_func(x, var_y)), type(tf_func(var_x, y)))
-        np_var_left = tf_func(x, var_y).eval()
-        np_var_right = tf_func(var_x, y).eval()
-
-    if np_ans.dtype != np.object:
-      self.assertAllClose(np_ans, tf_cpu)
-      self.assertAllClose(np_ans, np_left)
-      self.assertAllClose(np_ans, np_right)
-      if also_compare_variables:
-        self.assertAllClose(np_ans, np_var_left)
-        self.assertAllClose(np_ans, np_var_right)
-    self.assertShapeEqual(np_ans, out)
-
-  _GRAD_TOL = {
-      dtypes_lib.float16: 1e-3,
-      dtypes_lib.float32: 1e-3,
-      dtypes_lib.complex64: 1e-2,
-      dtypes_lib.float64: 1e-5,
-      dtypes_lib.complex128: 1e-4
-  }
-
-  def _compareGradientX(self,
-                        x,
-                        y,
-                        np_func,
-                        tf_func,
-                        numeric_gradient_type=None):
-    z = np_func(x, y)
-    zs = list(z.shape)
-    with self.cached_session():
-      inx = ops.convert_to_tensor(x)
-      iny = ops.convert_to_tensor(y)
-      if x.dtype in (np.float32, np.float64):
-        out = 1.1 * tf_func(inx, iny)
-      else:
-        out = tf_func(inx, iny)
-      xs = list(x.shape)
-      jacob_t, jacob_n = gradient_checker.compute_gradient(
-          inx, xs, out, zs, x_init_value=x)
-      if numeric_gradient_type is not None:
-        xf = x.astype(numeric_gradient_type)
-        yf = y.astype(numeric_gradient_type)
-        inxf = ops.convert_to_tensor(xf)
-        inyf = ops.convert_to_tensor(yf)
-        outf = tf_func(inxf, inyf)
-        _, jacob_n = gradient_checker.compute_gradient(
-            inxf, xs, outf, zs, x_init_value=xf, delta=1e-3)
-        jacob_n = jacob_n.astype(x.dtype)
-      tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)]
-      self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
-
-  def _compareGradientY(self,
-                        x,
-                        y,
-                        np_func,
-                        tf_func,
-                        numeric_gradient_type=None):
-    z = np_func(x, y)
-    zs = list(z.shape)
-    with self.cached_session():
-      inx = ops.convert_to_tensor(x)
-      iny = ops.convert_to_tensor(y)
-      if x.dtype in (np.float32, np.float64):
-        out = 1.1 * tf_func(inx, iny)
-      else:
-        out = tf_func(inx, iny)
-      ys = list(np.shape(y))
-      jacob_t, jacob_n = gradient_checker.compute_gradient(
-          iny, ys, out, zs, x_init_value=y)
-      if numeric_gradient_type is not None:
-        xf = x.astype(numeric_gradient_type)
-        yf = y.astype(numeric_gradient_type)
-        inxf = ops.convert_to_tensor(xf)
-        inyf = ops.convert_to_tensor(yf)
-        outf = tf_func(inxf, inyf)
-        _, jacob_n = gradient_checker.compute_gradient(
-            inyf, ys, outf, zs, x_init_value=yf)
-        jacob_n = jacob_n.astype(x.dtype)
-    tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)]
-    self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
-
-  def _compareGpu(self, x, y, np_func, tf_func):
-    np_ans = np_func(x, y)
-    with self.test_session(force_gpu=test_util.is_gpu_available()):
-      inx = ops.convert_to_tensor(x)
-      iny = ops.convert_to_tensor(y)
-      out = tf_func(inx, iny)
-      tf_gpu = out.eval()
-    self.assertAllClose(np_ans, tf_gpu)
-    self.assertShapeEqual(np_ans, out)
-    # TODO(zhifengc/ke): make gradient checker work on GPU.
-
-  def _compareBoth(self, x, y, np_func, tf_func, also_compare_variables=False):
-    self._compareCpu(x, y, np_func, tf_func, also_compare_variables)
-    if x.dtype in (np.float16, np.float32, np.float64, np.complex64,
-                   np.complex128):
-      if tf_func not in (_FLOORDIV, math_ops.floordiv, math_ops.zeta,
-                         math_ops.polygamma):
-        self._compareGradientX(x, y, np_func, tf_func)
-        self._compareGradientY(x, y, np_func, tf_func)
-      if tf_func in (math_ops.zeta, math_ops.polygamma):
-        # These methods only support gradients in the second parameter
-        self._compareGradientY(x, y, np_func, tf_func)
-      self._compareGpu(x, y, np_func, tf_func)
-
-  def testFloatBasic(self):
-    x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32)
-    y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32)
-    self._compareBoth(x, y, np.add, math_ops.add, also_compare_variables=True)
-    self._compareBoth(x, y, np.subtract, math_ops.subtract)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
-    self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv)
-    self._compareBoth(x, y, np.add, _ADD)
-    self._compareBoth(x, y, np.subtract, _SUB)
-    self._compareBoth(x, y, np.multiply, _MUL)
-    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
-    self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV)
-    self._compareBoth(x, y, np.arctan2, math_ops.atan2)
-    x1 = np.random.randn(5, 6).astype(np.float32)
-    x2 = np.random.randn(5, 6).astype(np.float32)
-    # Remove tiny values--atan2 gradients are flaky near the origin.
-    x1[np.abs(x1) < 0.05] = 0.05 * np.sign(x1[np.abs(x1) < 0.05])
-    x2[np.abs(x2) < 0.05] = 0.05 * np.sign(x2[np.abs(x2) < 0.05])
-    self._compareBoth(x1, x2, np.arctan2, math_ops.atan2)
-    try:
-      from scipy import special  # pylint: disable=g-import-not-at-top
-      a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32)
-      x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32)
-      self._compareBoth(a_pos_small, x_pos_small, special.gammainc,
-                        math_ops.igamma)
-      self._compareBoth(a_pos_small, x_pos_small, special.gammaincc,
-                        math_ops.igammac)
-      # Need x > 1
-      self._compareBoth(x_pos_small + 1, a_pos_small, special.zeta,
-                        math_ops.zeta)
-      n_small = np.arange(0, 15).reshape(1, 3, 5).astype(np.float32)
-      self._compareBoth(n_small, x_pos_small, special.polygamma,
-                        math_ops.polygamma)
-    except ImportError as e:
-      tf_logging.warn("Cannot test special functions: %s" % str(e))
-
-  def testFloatDifferentShapes(self):
-    x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.float32)
-    y = np.array([1, 2]).reshape(2, 1).astype(np.float32)
-    with self.cached_session() as sess:
-      inx = ops.convert_to_tensor(x)
-      iny = ops.convert_to_tensor(y)
-      s = math_ops.reduce_sum(inx * iny)
-      gx, gy = sess.run(gradients_impl.gradients(s, [inx, iny]))
-    # gx is simply the broadcasted y
-    self.assertAllEqual(gx,
-                        np.array([1, 1, 2, 2]).reshape(2, 2).astype(np.float32))
-    # gy is x's column summed up
-    self.assertAllEqual(gy, np.array([3, 7]).reshape(2, 1).astype(np.float32))
-
-  def testFloatVariableOverload(self):
-    x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.int32)
-    y = np.array([1, 2]).reshape(2, 1).astype(np.int32)
-    var_x = variables.Variable(x)
-    var_y = variables.Variable(y)
-    with self.cached_session() as sess:
-      sess.run([var_x.initializer, var_y.initializer])
-      left_result = (var_x * y).eval()
-      right_result = (x * var_y).eval()
-    np_result = x * y
-    self.assertAllEqual(np_result, left_result)
-    self.assertAllEqual(np_result, right_result)
-
-  def testDoubleBasic(self):
-    x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float64)
-    y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float64)
-    self._compareBoth(x, y, np.add, math_ops.add)
-    self._compareBoth(x, y, np.subtract, math_ops.subtract)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
-    self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv)
-    self._compareBoth(x, y, np.add, _ADD)
-    self._compareBoth(x, y, np.subtract, _SUB)
-    self._compareBoth(x, y, np.multiply, _MUL)
-    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
-    self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV)
-    self._compareBoth(x, y, np.arctan2, math_ops.atan2)
-    x1 = np.random.randn(7, 4).astype(np.float64)
-    x2 = np.random.randn(7, 4).astype(np.float64)
-    # Remove tiny values--atan2 gradients are flaky near the origin.
-    x1[np.abs(x1) < 0.5] = 0.5 * np.sign(x1[np.abs(x1) < 0.5])
-    x2[np.abs(x2) < 0.5] = 0.5 * np.sign(x2[np.abs(x2) < 0.5])
-    self._compareBoth(x1, x2, np.arctan2, math_ops.atan2)
-    try:
-      from scipy import special  # pylint: disable=g-import-not-at-top
-      a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32)
-      x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32)
-      self._compareBoth(a_pos_small, x_pos_small, special.gammainc,
-                        math_ops.igamma)
-      self._compareBoth(a_pos_small, x_pos_small, special.gammaincc,
-                        math_ops.igammac)
-    except ImportError as e:
-      tf_logging.warn("Cannot test special functions: %s" % str(e))
-
-  def testUint8Basic(self):
-    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint8)
-    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint8)
-    self._compareBoth(x, y, np.add, math_ops.add)
-
-  def testInt8Basic(self):
-    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int8)
-    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int8)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y, np.multiply, _MUL)
-
-  def testInt16Basic(self):
-    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int16)
-    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int16)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y, np.multiply, _MUL)
-
-  def testUint16Basic(self):
-    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint16)
-    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint16)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y, np.multiply, _MUL)
-    self._compareBoth(x, y, np.true_divide, math_ops.truediv)
-    self._compareBoth(x, y, np.floor_divide, math_ops.floordiv)
-    self._compareBoth(x, y, np.true_divide, _TRUEDIV)
-    self._compareBoth(x, y, np.floor_divide, _FLOORDIV)
-
-  def testInt32Basic(self):
-    x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int32)
-    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int32)
-    self._compareBoth(x, y, np.add, math_ops.add)
-    self._compareBoth(x, y, np.subtract, math_ops.subtract)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y, np.true_divide, math_ops.truediv)
-    self._compareBoth(x, y, np.floor_divide, math_ops.floordiv)
-    self._compareBoth(x, y, np.mod, math_ops.mod)
-    self._compareBoth(x, y, np.add, _ADD)
-    self._compareBoth(x, y, np.subtract, _SUB)
-    self._compareBoth(x, y, np.multiply, _MUL)
-    self._compareBoth(x, y, np.true_divide, _TRUEDIV)
-    self._compareBoth(x, y, np.floor_divide, _FLOORDIV)
-    self._compareBoth(x, y, np.mod, _MOD)
-    # _compareBoth tests on GPU only for floating point types, so test
-    # _MOD for int32 on GPU by calling _compareGpu
-    self._compareGpu(x, y, np.mod, _MOD)
-
-  def testInt64Basic(self):
-    x = np.arange(1 << 40, 13 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64)
-    y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int64)
-    self._compareBoth(x, y, np.subtract, math_ops.subtract)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y, np.true_divide, math_ops.truediv)
-    self._compareBoth(x, y, np.floor_divide, math_ops.floordiv)
-    self._compareBoth(x, y, np.mod, math_ops.mod)
-    self._compareBoth(x, y, np.subtract, _SUB)
-    self._compareBoth(x, y, np.multiply, _MUL)
-    self._compareBoth(x, y, np.true_divide, _TRUEDIV)
-    self._compareBoth(x, y, np.floor_divide, _FLOORDIV)
-    self._compareBoth(x, y, np.mod, _MOD)
-
-  def testComplex64Basic(self):
-    x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype(
-        np.complex64)
-    y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype(
-        np.complex64)
-    self._compareBoth(x, y, np.add, math_ops.add)
-    self._compareBoth(x, y, np.subtract, math_ops.subtract)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
-    self._compareBoth(x, y, np.add, _ADD)
-    self._compareBoth(x, y, np.subtract, _SUB)
-    self._compareBoth(x, y, np.multiply, _MUL)
-    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
-
-  def testComplex128Basic(self):
-    x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype(
-        np.complex128)
-    y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype(
-        np.complex128)
-    self._compareBoth(x, y, np.add, math_ops.add)
-    self._compareBoth(x, y, np.subtract, math_ops.subtract)
-    self._compareBoth(x, y, np.multiply, math_ops.multiply)
-    self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
-    self._compareBoth(x, y, np.add, _ADD)
-    self._compareBoth(x, y, np.subtract, _SUB)
-    self._compareBoth(x, y, np.multiply, _MUL)
-    self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
-
-  def testStringComparison(self):
-    x = np.array([["abc", "bh"], ["c", ""]])
-    y = np.array([["abc", "bh"], ["def", "hi"]])
-    with self.test_session(use_gpu=False) as sess:
-      cmp_eq = math_ops.equal(x, y)
-      cmp_not_eq = math_ops.not_equal(x, y)
-      values = sess.run([cmp_eq, cmp_not_eq])
-      self.assertAllEqual([[True, True], [False, False]], values[0])
-      self.assertAllEqual([[False, False], [True, True]], values[1])
-
-  def testString(self):
-    x = np.array(
-        [["x_0_0", "x_0_1", "x_0_2"], ["x_1_0", "x_1_1", "x_1_2"],
-         ["x_2_0", "x_2_1", "x_2_2"]],
-        dtype=np.object)
-    y = np.array(
-        [["y_0_0", "y_0_1", "y_0_2"], ["y_1_0", "y_1_1", "y_1_2"],
-         ["y_2_0", "y_2_1", "y_2_2"]],
-        dtype=np.object)
-    z = np.array([["z_0", "z_1", "z_2"]], dtype=np.object)
-    w = np.array("w", dtype=np.object)
-    self._compareCpu(x, y, _ADD, _ADD)
-    self._compareCpu(x, z, _ADD, _ADD)
-    self._compareCpu(x, w, _ADD, _ADD)
-    self._compareCpu(z, w, _ADD, _ADD)
-
-  def _compareBCast(self, xs, ys, dtype, np_func, tf_func):
-    if dtype in (np.complex64, np.complex128):
-      x = (1 + np.linspace(0, 2 + 3j, np.prod(xs))).astype(dtype).reshape(xs)
-      y = (1 + np.linspace(0, 2 - 2j, np.prod(ys))).astype(dtype).reshape(ys)
-    else:
-      x = (1 + np.linspace(0, 5, np.prod(xs))).astype(dtype).reshape(xs)
-      y = (1 + np.linspace(0, 5, np.prod(ys))).astype(dtype).reshape(ys)
-    self._compareCpu(x, y, np_func, tf_func)
-    if x.dtype in (np.float16, np.float32, np.float64):
-      # TODO(aselle): Make the test work for dtypes:
-      #     (np.complex64, np.complex128).
-      if tf_func not in (_FLOORDIV, math_ops.floordiv):
-        if x.dtype == np.float16:
-          # Compare fp16 theoretical gradients to fp32 numerical gradients,
-          # since fp16 numerical gradients are too imprecise unless great
-          # care is taken with choosing the inputs and the delta. This is
-          # a weaker check (in particular, it does not test the op itself,
-          # only its gradient), but it's much better than nothing.
-          self._compareGradientX(x, y, np_func, tf_func, np.float)
-          self._compareGradientY(x, y, np_func, tf_func, np.float)
-        else:
-          self._compareGradientX(x, y, np_func, tf_func)
-          self._compareGradientY(x, y, np_func, tf_func)
-      self._compareGpu(x, y, np_func, tf_func)
-
-  # TODO(josh11b,vrv): Refactor this to use parameterized tests.
-  def _testBCastByFunc(self, funcs, xs, ys):
-    dtypes = [
-        np.float16,
-        np.float32,
-        np.float64,
-        np.int32,
-        np.int64,
-        np.complex64,
-        np.complex128,
-    ]
-    for dtype in dtypes:
-      for (np_func, tf_func) in funcs:
-        if (dtype in (np.complex64, np.complex128) and
-            tf_func in (_FLOORDIV, math_ops.floordiv)):
-          continue  # floordiv makes no sense for complex numbers
-        self._compareBCast(xs, ys, dtype, np_func, tf_func)
-        self._compareBCast(ys, xs, dtype, np_func, tf_func)
-
-  def _testBCastA(self, xs, ys):
-    funcs = [
-        (np.add, math_ops.add),
-        (np.add, _ADD),
-    ]
-    self._testBCastByFunc(funcs, xs, ys)
-
-  def _testBCastB(self, xs, ys):
-    funcs = [
-        (np.subtract, math_ops.subtract),
-        (np.subtract, _SUB),
-        (np.power, math_ops.pow),
-    ]
-    self._testBCastByFunc(funcs, xs, ys)
-
-  def _testBCastC(self, xs, ys):
-    funcs = [
-        (np.multiply, math_ops.multiply),
-        (np.multiply, _MUL),
-    ]
-    self._testBCastByFunc(funcs, xs, ys)
-
-  def _testBCastD(self, xs, ys):
-    funcs = [
-        (np.true_divide, math_ops.truediv),
-        (np.floor_divide, math_ops.floordiv),
-        (np.true_divide, _TRUEDIV),
-        (np.floor_divide, _FLOORDIV),
-    ]
-    self._testBCastByFunc(funcs, xs, ys)
-
-  def testBCast_0A(self):
-    self._testBCastA([1, 3, 2], [1])
-
-  def testBCast_0B(self):
-    self._testBCastB([1, 3, 2], [1])
-
-  def testBCast_0C(self):
-    self._testBCastC([1, 3, 2], [1])
-
-  def testBCast_0D(self):
-    self._testBCastD([1, 3, 2], [1])
-
-  def testBCast_1A(self):
-    self._testBCastA([1, 3, 2], [2])
-
-  def testBCast_1B(self):
-    self._testBCastB([1, 3, 2], [2])
-
-  def testBCast_1C(self):
-    self._testBCastC([1, 3, 2], [2])
-
-  def testBCast_1D(self):
-    self._testBCastD([1, 3, 2], [2])
-
-  def testBCast_2A(self):
-    self._testBCastA([1, 3, 2], [3, 2])
-
-  def testBCast_2B(self):
-    self._testBCastB([1, 3, 2], [3, 2])
-
-  def testBCast_2C(self):
-    self._testBCastC([1, 3, 2], [3, 2])
-
-  def testBCast_2D(self):
-    self._testBCastD([1, 3, 2], [3, 2])
-
-  def testBCast_3A(self):
-    self._testBCastA([1, 3, 2], [3, 1])
-
-  def testBCast_3B(self):
-    self._testBCastB([1, 3, 2], [3, 1])
-
-  def testBCast_3C(self):
-    self._testBCastC([1, 3, 2], [3, 1])
-
-  def testBCast_3D(self):
-    self._testBCastD([1, 3, 2], [3, 1])
-
-  def testBCast_4A(self):
-    self._testBCastA([1, 3, 2], [1, 3, 2])
-
-  def testBCast_4B(self):
-    self._testBCastB([1, 3, 2], [1, 3, 2])
-
-  def testBCast_4C(self):
-    self._testBCastC([1, 3, 2], [1, 3, 2])
-
-  def testBCast_4D(self):
-    self._testBCastD([1, 3, 2], [1, 3, 2])
-
-  def testBCast_5A(self):
-    self._testBCastA([1, 3, 2], [2, 3, 1])
-
-  def testBCast_5B(self):
-    self._testBCastB([1, 3, 2], [2, 3, 1])
-
-  def testBCast_5C(self):
-    self._testBCastC([1, 3, 2], [2, 3, 1])
-
-  def testBCast_5D(self):
-    self._testBCastD([1, 3, 2], [2, 3, 1])
-
-  def testBCast_6A(self):
-    self._testBCastA([1, 3, 2], [2, 1, 1])
-
-  def testBCast_6B(self):
-    self._testBCastB([1, 3, 2], [2, 1, 1])
-
-  def testBCast_6C(self):
-    self._testBCastC([1, 3, 2], [2, 1, 1])
-
-  def testBCast_6D(self):
-    self._testBCastD([1, 3, 2], [2, 1, 1])
-
-  def testBCast_7A(self):
-    self._testBCastA([1, 3, 2], [1, 3, 1])
-
-  def testBCast_7B(self):
-    self._testBCastB([1, 3, 2], [1, 3, 1])
-
-  def testBCast_7C(self):
-    self._testBCastC([1, 3, 2], [1, 3, 1])
-
-  def testBCast_7D(self):
-    self._testBCastD([1, 3, 2], [1, 3, 1])
-
-  def testBCast_8A(self):
-    self._testBCastA([2, 1, 5], [2, 3, 1])
-
-  def testBCast_8B(self):
-    self._testBCastB([2, 1, 5], [2, 3, 1])
-
-  def testBCast_8C(self):
-    self._testBCastC([2, 1, 5], [2, 3, 1])
-
-  def testBCast_8D(self):
-    self._testBCastD([2, 1, 5], [2, 3, 1])
-
-  def testBCast_9A(self):
-    self._testBCastA([2, 0, 5], [2, 0, 1])
-
-  def testBCast_9B(self):
-    self._testBCastB([2, 0, 5], [2, 0, 1])
-
-  def testBCast_9C(self):
-    self._testBCastC([2, 0, 5], [2, 0, 1])
-
-  def testBCast_9D(self):
-    self._testBCastD([2, 0, 5], [2, 0, 1])
-
-  def testBCast_10A(self):
-    self._testBCastA([2, 3, 0], [2, 3, 1])
-
-  def testBCast_10B(self):
-    self._testBCastB([2, 3, 0], [2, 3, 1])
-
-  def testBCast_10C(self):
-    self._testBCastC([2, 3, 0], [2, 3, 1])
-
-  def testBCast_10D(self):
-    self._testBCastD([2, 3, 0], [2, 3, 1])
-
-  def testBCast_11A(self):
-    self._testBCastA([1, 3, 2], [1, 3, 2])
-
-  def testBCast_11B(self):
-    self._testBCastB([1, 3, 2], [1, 3, 2])
-
-  def testBCast_11C(self):
-    self._testBCastC([1, 3, 2], [1, 3, 2])
-
-  def testBCast_11D(self):
-    self._testBCastD([1, 3, 2], [1, 3, 2])
-
-  def testBCast_12A(self):
-    self._testBCastA([1, 1, 1, 1, 3, 2], [1, 3, 2])
-
-  def testBCast_12B(self):
-    self._testBCastB([1, 1, 1, 1, 3, 2], [1, 3, 2])
-
-  def testBCast_12C(self):
-    self._testBCastC([1, 1, 1, 1, 3, 2], [1, 3, 2])
-
-  def testBCast_12D(self):
-    self._testBCastD([1, 1, 1, 1, 3, 2], [1, 3, 2])
-
-  def testBCast_13A(self):
-    self._testBCastA([1, 3, 2, 1, 1], [1])
-
-  def testBCast_13B(self):
-    self._testBCastB([1, 3, 2, 1, 1], [1])
-
-  def testBCast_13C(self):
-    self._testBCastC([1, 3, 2, 1, 1], [1])
-
-  def testBCast_13D(self):
-    self._testBCastD([1, 3, 2, 1, 1], [1])
-
-  def testBCast_14A(self):
-    self._testBCastA([2, 3, 1, 1, 5], [1])
-
-  def testBCast_14B(self):
-    self._testBCastB([2, 3, 1, 1, 5], [1])
-
-  def testBCast_14C(self):
-    self._testBCastC([2, 3, 1, 1, 5], [1])
-
-  def testBCast_14D(self):
-    self._testBCastD([2, 3, 1, 1, 5], [1])
-
-  def testBCast_15A(self):
-    self._testBCastA([10, 3, 1, 2], [3, 1, 2])
-
-  def testBCast_15B(self):
-    self._testBCastB([10, 3, 1, 2], [3, 1, 2])
-
-  def testBCast_15C(self):
-    self._testBCastC([10, 3, 1, 2], [3, 1, 2])
-
-  def testBCast_15D(self):
-    self._testBCastD([10, 3, 1, 2], [3, 1, 2])
-
-  def testMismatchedDimensions(self):
-    for func in [
-        math_ops.add, math_ops.subtract, math_ops.multiply, math_ops.div, _ADD,
-        _SUB, _MUL, _TRUEDIV, _FLOORDIV
-    ]:
-      with self.assertRaisesWithPredicateMatch(
-          ValueError, lambda e: "Dimensions must" in str(e)):
-        func(
-            ops.convert_to_tensor([10.0, 20.0, 30.0]),
-            ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]]))
-
-  def testZeroPowGrad(self):
-    with self.cached_session():
-      for dtype in (np.float16, np.float32, np.float64, np.complex64,
-                    np.complex128):
-        x = constant_op.constant(0.0, dtype=dtype)
-        y = constant_op.constant(2.0, dtype=dtype)
-        z = math_ops.pow(x, y)
-        error = gradient_checker.compute_gradient_error(y, [], z, [])
-        self.assertEqual(error, 0)
-
-  def testComplexPowGrad(self):
-    with self.cached_session():
-      for dtype in np.complex64, np.complex128:
-        for base in 2.0, -2.0:
-          x = constant_op.constant(base, dtype=dtype)
-          y = constant_op.constant(2.0, dtype=dtype)
-          z = math_ops.pow(x, y)
-          error = gradient_checker.compute_gradient_error(y, [], z, [])
-          self.assertLess(error, 2e-4)
-
-  def testAtan2SpecialValues(self):
-    x1l, x2l = zip((+0.0, +0.0), (+0.0, -0.0), (-0.0, +0.0), (-0.0, -0.0),
-                   (1.2345, float("inf")), (1.2345, -float("inf")),
-                   (-4.321, float("inf")), (-4.125, -float("inf")),
-                   (float("inf"), float("inf")), (float("inf"), -float("inf")),
-                   (-float("inf"), float("inf")),
-                   (-float("inf"), -float("inf")))
-    for dtype in np.float32, np.float64:
-      x1 = np.array(x1l).astype(dtype)
-      x2 = np.array(x2l).astype(dtype)
-      self._compareCpu(x1, x2, np.arctan2, math_ops.atan2)
-      self._compareGpu(x1, x2, np.arctan2, math_ops.atan2)
-
-  def testPowNegativeExponent(self):
-    for dtype in [np.int32, np.int64]:
-      with self.test_session(use_gpu=False) as sess:
-        with self.assertRaisesRegexp(
-            errors_impl.InvalidArgumentError,
-            "Integers to negative integer powers are not allowed"):
-          x = np.array([5, 2]).astype(dtype)
-          y = np.array([-2, 3]).astype(dtype)
-          sess.run(math_ops.pow(x, y))
-
-      with self.test_session(use_gpu=False) as sess:
-        with self.assertRaisesRegexp(
-            errors_impl.InvalidArgumentError,
-            "Integers to negative integer powers are not allowed"):
-          x = np.array([5, 2]).astype(dtype)
-          y = np.array([2, -3]).astype(dtype)
-          sess.run(math_ops.pow(x, y))
-
-      with self.test_session(use_gpu=False) as sess:
-        with self.assertRaisesRegexp(
-            errors_impl.InvalidArgumentError,
-            "Integers to negative integer powers are not allowed"):
-          x = np.array([5, 2]).astype(dtype)
-          y = -3
-          sess.run(math_ops.pow(x, y))
-
-
 class ComparisonOpTest(test.TestCase):
 
   def _compareScalar(self, func, x, y, dtype):
diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py
new file mode 100644
index 0000000000..77f182784e
--- /dev/null
+++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py
@@ -0,0 +1,541 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for unary coefficient-wise operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes as dtypes_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+_NEG = lambda x: -x
+_ABS = abs
+
+
+# TODO(zongheng): it'd be great to factor out this function and various random
+# SparseTensor gen funcs.
+def _sparsify(x, thresh=0.5, index_dtype=np.int64):
+  x[x < thresh] = 0
+
+  non_zero = np.where(x)
+  x_indices = np.vstack(non_zero).astype(index_dtype).T
+  x_values = x[non_zero]
+  x_shape = x.shape
+
+  return sparse_tensor.SparseTensor(
+      indices=x_indices, values=x_values, dense_shape=x_shape), x_values
+
+
+def _default_tolerance(dtype):
+  """Returns a sensible default tolerance for comparing results of a given type.
+
+  Args:
+    dtype: A datatype.
+  """
+  if dtype == np.float16:
+    return 5e-3
+  elif dtype in (np.float32, np.complex64):
+    return 1e-3
+  elif dtype in (np.float64, np.complex128):
+    return 1e-5
+  else:
+    return None  # Fail fast for unexpected types
+
+
+class UnaryOpTest(test.TestCase):
+
+  def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None):
+    if grad_rtol is None:
+      grad_rtol = _default_tolerance(x.dtype)
+    if grad_atol is None:
+      grad_atol = _default_tolerance(x.dtype)
+    np_ans = np_func(x)
+    with self.test_session(use_gpu=False):
+      inx = ops.convert_to_tensor(x)
+      if x.dtype in (np.float32, np.float64,
+                     dtypes_lib.bfloat16.as_numpy_dtype):
+        y = 1.1 * tf_func(inx)
+        np_ans *= 1.1
+      else:
+        y = tf_func(inx)
+      tf_cpu = y.eval()
+      self.assertShapeEqual(np_ans, y)
+      if x.dtype == np.float16:
+        self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3)
+      elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype:
+        self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2)
+      else:
+        self.assertAllClose(np_ans, tf_cpu)
+
+      if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign:
+        return  # Return early
+
+      if x.dtype == np.float16:
+        s = list(np.shape(x))
+        jacob_t, _ = gradient_checker.compute_gradient(
+            inx, s, y, s, x_init_value=x)
+        xf = x.astype(np.float)
+        inxf = ops.convert_to_tensor(xf)
+        yf = tf_func(inxf)
+        _, jacob_n = gradient_checker.compute_gradient(
+            inxf, s, yf, s, x_init_value=xf, delta=1e-2)
+        jacob_n = jacob_n.astype(np.float16)
+        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
+      elif x.dtype in (np.float32, np.complex64):
+        s = list(np.shape(x))
+        jacob_t, jacob_n = gradient_checker.compute_gradient(
+            inx, s, y, s, x_init_value=x, delta=1e-3)
+        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
+      elif x.dtype in (np.float64, np.complex128):
+        s = list(np.shape(x))
+        jacob_t, jacob_n = gradient_checker.compute_gradient(
+            inx, s, y, s, x_init_value=x, delta=1e-5)
+        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
+
+  def _check(self, result_tensor, result_np, input_sp_t, tol):
+    self.assertTrue(isinstance(result_tensor, sparse_tensor.SparseTensor))
+    self.assertTrue(isinstance(input_sp_t, sparse_tensor.SparseTensor))
+    self.assertAllEqual(input_sp_t.indices.eval(), result_tensor.indices.eval())
+    self.assertAllEqual(input_sp_t.dense_shape.eval(),
+                        result_tensor.dense_shape.eval())
+    if tol is None:
+      self.assertAllClose(result_np, result_tensor.values.eval())
+    else:
+      self.assertAllClose(
+          result_np, result_tensor.values.eval(), rtol=tol, atol=tol)
+
+  def _compareSparseCpu(self, x, np_func, tf_func, tol):
+    x_sp, x_sp_vals = _sparsify(x)
+    res_np = np_func(x_sp_vals)
+    with self.test_session(use_gpu=False):
+      self._check(tf_func(x_sp), res_np, x_sp, tol)
+
+  def _compareGpu(self, x, np_func, tf_func):
+    np_ans = np_func(x)
+    with self.test_session(force_gpu=test_util.is_gpu_available()):
+      result = tf_func(ops.convert_to_tensor(x))
+      tf_gpu = result.eval()
+    if x.dtype == np.float16:
+      self.assertAllClose(np_ans, tf_gpu, rtol=1e-3, atol=1e-3)
+    else:
+      self.assertAllClose(np_ans, tf_gpu)
+    # TODO(zhifengc/ke): make gradient checker work on GPU.
+
+  def _compareSparseGpu(self, x, np_func, tf_func, tol):
+    x_sp, x_sp_vals = _sparsify(x)
+    res_np = np_func(x_sp_vals)
+    with self.test_session(force_gpu=test_util.is_gpu_available()):
+      self._check(tf_func(x_sp), res_np, x_sp, tol)
+
+  def _compareBoth(self, x, np_func, tf_func):
+    self._compareCpu(x, np_func, tf_func)
+    self._compareGpu(x, np_func, tf_func)
+
+  def _compareBothSparse(self, x, np_func, tf_func, tol=None):
+    self._compareSparseCpu(x, np_func, tf_func, tol)
+    self._compareSparseGpu(x, np_func, tf_func, tol)
+
+  def _inv(self, x):
+    return 1.0 / x
+
+  def _rsqrt(self, x):
+    return self._inv(np.sqrt(x))
+
+  def _sigmoid(self, x):
+    return 1.0 / (1.0 + np.exp(-x))
+
+  def _log_sigmoid(self, x):
+    return np.log(self._sigmoid(x))
+
+  def _replace_domain_error_with_inf(self, fn):
+
+    def func(x):
+      try:
+        return fn(x)
+      except ValueError as e:
+        if "domain error" in str(e):
+          return np.inf * np.ones_like(x)
+        else:
+          raise e
+
+    return func
+
+  def testFloatBasic(self):
+    x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float32)
+    w = x - x.min() + 1.02  # all greater than 1
+    y = (x + .5).astype(np.float32)  # no zero
+    z = (x + 15.5).astype(np.float32)  # all positive
+    k = np.arange(-0.90, 0.90, 0.25).astype(np.float32)  # between -1 and 1
+
+    self._compareBoth(x, np.abs, math_ops.abs)
+    self._compareBoth(x, np.abs, _ABS)
+    self._compareBoth(x, np.negative, math_ops.negative)
+    self._compareBoth(x, np.negative, _NEG)
+    self._compareBoth(y, self._inv, math_ops.reciprocal)
+    self._compareBoth(x, np.square, math_ops.square)
+    self._compareBoth(z, np.sqrt, math_ops.sqrt)
+    self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
+    self._compareBoth(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.expm1, math_ops.expm1)
+    self._compareBoth(z, np.log, math_ops.log)
+    self._compareBoth(z, np.log1p, math_ops.log1p)
+    self._compareBoth(x, np.sinh, math_ops.sinh)
+    self._compareBoth(x, np.cosh, math_ops.cosh)
+    self._compareBoth(x, np.tanh, math_ops.tanh)
+    self._compareBoth(x, np.arcsinh, math_ops.asinh)
+    self._compareBoth(w, np.arccosh, math_ops.acosh)
+    self._compareBoth(k, np.arctanh, math_ops.atanh)
+    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
+    self._compareBoth(x, self._log_sigmoid, math_ops.log_sigmoid)
+    self._compareBoth(y, np.sign, math_ops.sign)
+    self._compareBoth(x, np.sin, math_ops.sin)
+    self._compareBoth(x, np.cos, math_ops.cos)
+    self._compareBoth(k, np.arcsin, math_ops.asin)
+    self._compareBoth(k, np.arccos, math_ops.acos)
+    self._compareBoth(x, np.arctan, math_ops.atan)
+    self._compareBoth(x, np.tan, math_ops.tan)
+    self._compareBoth(
+        y, np.vectorize(self._replace_domain_error_with_inf(math.lgamma)),
+        math_ops.lgamma)
+    self._compareBoth(x, np.vectorize(math.erf), math_ops.erf)
+    self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc)
+    try:
+      from scipy import special  # pylint: disable=g-import-not-at-top
+      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
+      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
+    except ImportError as e:
+      tf_logging.warn("Cannot test special functions: %s" % str(e))
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.square, math_ops.square)
+    self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3)
+    self._compareBothSparse(x, np.tanh, math_ops.tanh)
+    self._compareBothSparse(y, np.sign, math_ops.sign)
+    self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf)
+
+  def testFloatTanhEdge(self):
+    x = np.arange(40, 40 + 6).reshape(6).astype(np.float32)
+    self._compareBoth(x, np.tanh, math_ops.tanh)
+    x = np.arange(-40, -40 + 6).reshape(6).astype(np.float32)
+    self._compareBoth(x, np.tanh, math_ops.tanh)
+
+  def testFloatEmpty(self):
+    x = np.empty((2, 0, 5), dtype=np.float32)
+    self._compareBoth(x, np.abs, math_ops.abs)
+    self._compareBoth(x, np.abs, _ABS)
+    self._compareBoth(x, np.negative, math_ops.negative)
+    self._compareBoth(x, np.negative, _NEG)
+    self._compareBoth(x, self._inv, math_ops.reciprocal)
+    self._compareBoth(x, np.square, math_ops.square)
+    self._compareBoth(x, np.sqrt, math_ops.sqrt)
+    self._compareBoth(x, self._rsqrt, math_ops.rsqrt)
+    self._compareBoth(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.expm1, math_ops.expm1)
+    self._compareBoth(x, np.log, math_ops.log)
+    self._compareBoth(x, np.log1p, math_ops.log1p)
+    self._compareBoth(x, np.sinh, math_ops.sinh)
+    self._compareBoth(x, np.arcsinh, math_ops.asinh)
+    self._compareBoth(x, np.cosh, math_ops.cosh)
+    self._compareBoth(x, np.tanh, math_ops.tanh)
+    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
+    self._compareBoth(x, np.sign, math_ops.sign)
+    self._compareBoth(x, np.sin, math_ops.sin)
+    self._compareBoth(x, np.cos, math_ops.cos)
+    # Can't use vectorize below, so just use some arbitrary function
+    self._compareBoth(x, np.sign, math_ops.lgamma)
+    self._compareBoth(x, np.sign, math_ops.erf)
+    self._compareBoth(x, np.sign, math_ops.erfc)
+    self._compareBoth(x, np.tan, math_ops.tan)
+    self._compareBoth(x, np.arcsin, math_ops.asin)
+    self._compareBoth(x, np.arccos, math_ops.acos)
+    self._compareBoth(x, np.arctan, math_ops.atan)
+    try:
+      from scipy import special  # pylint: disable=g-import-not-at-top
+      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
+      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
+    except ImportError as e:
+      tf_logging.warn("Cannot test special functions: %s" % str(e))
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.square, math_ops.square)
+    self._compareBothSparse(x, np.sqrt, math_ops.sqrt, tol=1e-3)
+    self._compareBothSparse(x, np.tanh, math_ops.tanh)
+    self._compareBothSparse(x, np.sign, math_ops.sign)
+    self._compareBothSparse(x, np.sign, math_ops.erf)
+
+  def testDoubleBasic(self):
+    x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float64)
+    w = x - x.min() + 1.02  # all greater than 1
+    y = (x + .5).astype(np.float64)  # no zero
+    z = (x + 15.5).astype(np.float64)  # all positive
+    k = np.arange(-0.90, 0.90,
+                  0.35).reshape(1, 3, 2).astype(np.float64)  # between -1 and 1
+    self._compareBoth(x, np.abs, math_ops.abs)
+    self._compareBoth(x, np.abs, _ABS)
+    self._compareBoth(x, np.negative, math_ops.negative)
+    self._compareBoth(x, np.negative, _NEG)
+    self._compareBoth(y, self._inv, math_ops.reciprocal)
+    self._compareBoth(x, np.square, math_ops.square)
+    self._compareBoth(z, np.sqrt, math_ops.sqrt)
+    self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
+    self._compareBoth(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.expm1, math_ops.expm1)
+    self._compareBoth(z, np.log, math_ops.log)
+    self._compareBoth(z, np.log1p, math_ops.log1p)
+    self._compareBoth(x, np.sinh, math_ops.sinh)
+    self._compareBoth(x, np.cosh, math_ops.cosh)
+    self._compareBoth(x, np.tanh, math_ops.tanh)
+    self._compareBoth(x, np.arcsinh, math_ops.asinh)
+    self._compareBoth(w, np.arccosh, math_ops.acosh)
+    self._compareBoth(k, np.arctanh, math_ops.atanh)
+    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
+    self._compareBoth(y, np.sign, math_ops.sign)
+    self._compareBoth(x, np.sin, math_ops.sin)
+    self._compareBoth(x, np.cos, math_ops.cos)
+    self._compareBoth(
+        y, np.vectorize(self._replace_domain_error_with_inf(math.lgamma)),
+        math_ops.lgamma)
+    self._compareBoth(x, np.vectorize(math.erf), math_ops.erf)
+    self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc)
+    self._compareBoth(x, np.arctan, math_ops.atan)
+    self._compareBoth(k, np.arcsin, math_ops.asin)
+    self._compareBoth(k, np.arccos, math_ops.acos)
+    self._compareBoth(k, np.tan, math_ops.tan)
+    try:
+      from scipy import special  # pylint: disable=g-import-not-at-top
+      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
+      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
+    except ImportError as e:
+      tf_logging.warn("Cannot test special functions: %s" % str(e))
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.square, math_ops.square)
+    self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3)
+    self._compareBothSparse(x, np.tanh, math_ops.tanh)
+    self._compareBothSparse(y, np.sign, math_ops.sign)
+    self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf)
+
+  def testHalfBasic(self):
+    x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float16)
+    y = (x + .5).astype(np.float16)  # no zero
+    z = (x + 15.5).astype(np.float16)  # all positive
+    self._compareBoth(x, np.abs, math_ops.abs)
+    self._compareBoth(x, np.abs, _ABS)
+    self._compareBoth(x, np.negative, math_ops.negative)
+    self._compareBoth(x, np.negative, _NEG)
+    self._compareBoth(y, self._inv, math_ops.reciprocal)
+    self._compareBoth(x, np.square, math_ops.square)
+    self._compareBoth(z, np.sqrt, math_ops.sqrt)
+    self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
+    self._compareBoth(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.expm1, math_ops.expm1)
+    self._compareBoth(z, np.log, math_ops.log)
+    self._compareBoth(z, np.log1p, math_ops.log1p)
+    self._compareBoth(x, np.tanh, math_ops.tanh)
+    self._compareBoth(x, self._sigmoid, math_ops.sigmoid)
+    self._compareBoth(y, np.sign, math_ops.sign)
+    self._compareBoth(x, np.sin, math_ops.sin)
+    self._compareBoth(x, np.cos, math_ops.cos)
+    self._compareBoth(
+        y, np.vectorize(self._replace_domain_error_with_inf(math.lgamma)),
+        math_ops.lgamma)
+    self._compareBoth(x, np.vectorize(math.erf), math_ops.erf)
+    self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc)
+    try:
+      from scipy import special  # pylint: disable=g-import-not-at-top
+      self._compareBoth(x, special.i0e, math_ops.bessel_i0e)
+      self._compareBoth(x, special.i1e, math_ops.bessel_i1e)
+    except ImportError as e:
+      tf_logging.warn("Cannot test special functions: %s" % str(e))
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.square, math_ops.square)
+    self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3)
+    self._compareBothSparse(x, np.tanh, math_ops.tanh)
+    self._compareBothSparse(y, np.sign, math_ops.sign)
+    self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf, tol=1e-3)
+
+  def testInt32Basic(self):
+    x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(np.int32)
+    self._compareCpu(x, np.abs, math_ops.abs)
+    self._compareCpu(x, np.abs, _ABS)
+    self._compareBoth(x, np.negative, math_ops.negative)
+    self._compareBoth(x, np.negative, _NEG)
+    self._compareBoth(x, np.square, math_ops.square)
+    self._compareCpu(x, np.sign, math_ops.sign)
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.square, math_ops.square)
+    self._compareBothSparse(x, np.sign, math_ops.sign)
+
+  def testInt64Basic(self):
+    x = np.arange(-6 << 40, 6 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64)
+    self._compareCpu(x, np.abs, math_ops.abs)
+    self._compareCpu(x, np.abs, _ABS)
+    self._compareCpu(x, np.negative, math_ops.negative)
+    self._compareCpu(x, np.negative, _NEG)
+    self._compareCpu(x, np.sign, math_ops.sign)
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.sign, math_ops.sign)
+
+  def testInt64Square(self):
+    x = np.arange(-6 << 20, 6 << 20, 2 << 20).reshape(1, 3, 2).astype(np.int64)
+    self._compareCpu(x, np.square, math_ops.square)
+    self._compareBothSparse(x, np.square, math_ops.square)
+
+  def testComplex64Basic(self):
+    x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype(
+        np.complex64)
+    y = x + np.complex(0.5, 0.5)  # no zeros
+    self._compareBoth(x, np.abs, math_ops.abs)
+    self._compareBoth(x, np.abs, _ABS)
+    self._compareBoth(x, np.negative, math_ops.negative)
+    self._compareBoth(x, np.negative, _NEG)
+    self._compareCpu(y, self._inv, math_ops.reciprocal)
+    self._compareCpu(x, np.square, math_ops.square)
+    self._compareCpu(y, np.sqrt, math_ops.sqrt)
+    self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
+    self._compareBoth(x, np.exp, math_ops.exp)
+    self._compareCpu(x, np.expm1, math_ops.expm1)
+    self._compareCpu(y, np.log, math_ops.log)
+    self._compareCpu(y, np.log1p, math_ops.log1p)
+    self._compareCpu(x, np.sinh, math_ops.sinh)
+    self._compareCpu(x, np.cosh, math_ops.cosh)
+    self._compareCpu(x, np.tanh, math_ops.tanh)
+
+    # Complex64 versions of asinh() and acosh() in libstdc++ only have 6 digits
+    # of precision.
+    # Small gradient values + low precision --> High relative error
+    self._compareCpu(y, np.arcsinh, math_ops.asinh, grad_rtol=1e-2)
+    self._compareCpu(y, np.arccosh, math_ops.acosh, grad_rtol=1e-2)
+
+    self._compareCpu(y, np.arctanh, math_ops.atanh)
+    self._compareCpu(x, self._sigmoid, math_ops.sigmoid)
+    self._compareCpu(x, np.sin, math_ops.sin)
+    self._compareCpu(x, np.cos, math_ops.cos)
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.square, math_ops.square)
+    self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3)
+    self._compareBothSparse(x, np.tanh, math_ops.tanh)
+
+    # Numpy uses an incorrect definition of sign; use the right one instead.
+    def complex_sign(x):
+      return x / np.abs(x)
+
+    self._compareBoth(y, complex_sign, math_ops.sign)
+    self._compareBothSparse(y, complex_sign, math_ops.sign)
+
+  def testComplex128Basic(self):
+    x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype(
+        np.complex128)
+    y = x + np.complex(0.5, 0.5)  # no zeros
+    self._compareBoth(x, np.abs, math_ops.abs)
+    self._compareBoth(x, np.abs, _ABS)
+    self._compareBoth(x, np.negative, math_ops.negative)
+    self._compareBoth(x, np.negative, _NEG)
+    self._compareCpu(y, self._inv, math_ops.reciprocal)
+    self._compareCpu(x, np.square, math_ops.square)
+    self._compareCpu(y, np.sqrt, math_ops.sqrt)
+    self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
+    self._compareBoth(x, np.exp, math_ops.exp)
+    self._compareCpu(x, np.expm1, math_ops.expm1)
+    self._compareCpu(y, np.log, math_ops.log)
+    self._compareCpu(y, np.log1p, math_ops.log1p)
+    self._compareCpu(x, np.sinh, math_ops.sinh)
+    self._compareCpu(x, np.cosh, math_ops.cosh)
+    self._compareCpu(x, np.tanh, math_ops.tanh)
+    self._compareCpu(y, np.arcsinh, math_ops.asinh)
+    self._compareCpu(y, np.arccosh, math_ops.acosh)
+    self._compareCpu(y, np.arctanh, math_ops.atanh)
+    self._compareCpu(x, self._sigmoid, math_ops.sigmoid)
+    self._compareCpu(x, np.sin, math_ops.sin)
+    self._compareCpu(x, np.cos, math_ops.cos)
+
+    self._compareBothSparse(x, np.abs, math_ops.abs)
+    self._compareBothSparse(x, np.negative, math_ops.negative)
+    self._compareBothSparse(x, np.square, math_ops.square)
+    self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3)
+    self._compareBothSparse(x, np.tanh, math_ops.tanh)
+
+    # Numpy uses an incorrect definition of sign; use the right one instead.
+    def complex_sign(x):
+      return x / np.abs(x)
+
+    self._compareBoth(y, complex_sign, math_ops.sign)
+    self._compareBothSparse(y, complex_sign, math_ops.sign)
+
+  def testGradGrad(self):
+    np.random.seed(7)
+    shape = (5,)
+    dtype_tols = [(np.float32, 5e-4), (np.float64, 1e-6), (np.complex64, 5e-4),
+                  (np.complex128, 1e-6)]
+    op_range = [
+        (gen_math_ops.reciprocal_grad, [-2, 2]),
+        (gen_math_ops.rsqrt_grad, [0.1, 3]),
+        (gen_math_ops.sigmoid_grad, [-2, 2]),
+        (gen_math_ops.sqrt_grad, [0.1, 3]),
+        (gen_math_ops.tanh_grad, [-2, 2]),
+    ]
+
+    def rand(dtype, real_range):
+      x = np.random.uniform(
+          real_range[0], real_range[1], size=shape[0]).astype(dtype)
+      if dtype in (np.complex64, np.complex128):
+        x += 1j * np.random.uniform(-2, 2, size=shape[0]).astype(dtype)
+      return x
+
+    for op, real_range in op_range:
+      with self.cached_session():
+        for dtype, tol in dtype_tols:
+          x = constant_op.constant(rand(dtype, real_range))
+          y = constant_op.constant(rand(dtype, real_range))
+          z = op(x, y)
+          grads = gradient_checker.compute_gradient(
+              [x, y], [shape, shape],
+              z,
+              shape,
+              x_init_value=[rand(dtype, real_range),
+                            rand(dtype, real_range)])
+          if isinstance(grads, tuple):
+            grads = [grads]
+          for analytical, numerical in grads:
+            self.assertAllClose(analytical, numerical, rtol=tol, atol=tol)
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 1bd2804869355a7cd0cbfbe9e6aab7591b8a20de Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Mon, 17 Sep 2018 18:54:34 -0700
Subject: [PATCH 0311/1357] Add Keras TPU support for the new metrics.

PiperOrigin-RevId: 213378552
---
 .../contrib/tpu/python/tpu/keras_support.py   | 15 ++++++-
 tensorflow/python/keras/engine/saving_test.py |  7 ++++
 tensorflow/python/keras/metrics.py            | 42 +++++++++++++------
 3 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 776b9bff0f..bf445256b6 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -76,6 +76,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import callbacks as cbks
+from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import models
 from tensorflow.python.keras import optimizers as keras_optimizers
 from tensorflow.python.keras.engine import base_layer
@@ -293,6 +294,16 @@ def _replicated_optimizer(opt):
     return KerasCrossShardOptimizer(opt)
 
 
+def clone_metrics(metrics):
+  """Returns a copy of metrics. A copy is created for stateful metrics."""
+  if metrics is None:
+    return None
+  return [
+      m.__class__.from_config(m.get_config())
+      if isinstance(m, metrics_module.Metric) else m for m in metrics
+  ]
+
+
 class TPURewriteContext(object):
   """Prepare the environment for a Keras model during `tpu.rewrite`.
 
@@ -811,8 +822,8 @@ class TPUFunction(object):
             optimizer=_replicated_optimizer(cloned_optimizer),
             loss=self.model.loss,
             loss_weights=self.model.loss_weights,
-            metrics=self.model.metrics,
-            weighted_metrics=self.model.weighted_metrics,
+            metrics=clone_metrics(self.model.metrics),
+            weighted_metrics=clone_metrics(self.model.weighted_metrics),
             target_tensors=tpu_targets,
         )
 
diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 148dd23be7..02d99d5d69 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -370,6 +370,13 @@ class TestWholeModelSaving(test.TestCase):
       y = np.random.random((1, 3, 3))
       model.train_on_batch(x, y)
       new_model.train_on_batch(x, y)
+
+      x = np.random.random((1, 3))
+      y = np.random.random((1, 3, 3))
+      eval_out = model.evaluate(x, y)
+      eval_out2 = new_model.evaluate(x, y)
+      self.assertArrayNear(eval_out, eval_out2, 0.001)
+
       out = model.predict(x)
       out2 = new_model.predict(x)
       self.assertAllClose(out, out2, atol=1e-05)
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index fd3c39cf2e..e64241e5cf 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -199,7 +199,6 @@ def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
     # squeeze last dim of `y_pred` or `y_true` if their rank differs by 1
     y_true, y_pred = confusion_matrix.remove_squeezable_dimensions(
         y_true, y_pred)
-    y_pred.get_shape().assert_is_compatible_with(y_true.get_shape())
 
   if sample_weight is None:
     return y_pred, y_true, None
@@ -342,19 +341,14 @@ class Metric(Layer):
       # weak reference. This is to remove reference cycle that is created here.
       # This is not an issue in python versions > 3.
       if context.executing_eagerly():
-        update_state = weakmethod(obj.update_state)
-      else:
-        update_state = function.defun(obj.update_state)
+        obj.update_state = weakmethod(obj.update_state)
       obj.update_state = weakmethod(
-          types.MethodType(update_state_wrapper(update_state), obj))
+          types.MethodType(update_state_wrapper(obj.update_state), obj))
       result = weakmethod(obj.result)
       obj.result = weakmethod(types.MethodType(result_wrapper(result), obj))
     else:
-      # Converting update_state_fn() into a graph function, so that
-      # we can return a single op that performs all of the variable updates.
-      defuned_update_state_fn = function.defun(obj.update_state)
       obj.update_state = types.MethodType(
-          update_state_wrapper(defuned_update_state_fn), obj)
+          update_state_wrapper(obj.update_state), obj)
       obj.result = types.MethodType(result_wrapper(obj.result), obj)
 
     return obj
@@ -475,6 +469,9 @@ class Mean(Metric):
     Args:
       values: Per-example value.
       sample_weight: Optional weighting of each example. Defaults to 1.
+
+    Returns:
+      Update op.
     """
     values = math_ops.cast(values, self._dtype)
     if sample_weight is None:
@@ -501,8 +498,9 @@ class Mean(Metric):
     values = math_ops.reduce_sum(values)
 
     # Update state variables
-    state_ops.assign_add(self.total, values)
-    state_ops.assign_add(self.count, num_values)
+    update_total_op = state_ops.assign_add(self.total, values)
+    update_count_op = state_ops.assign_add(self.count, num_values)
+    return control_flow_ops.group(update_total_op, update_count_op)
 
   def result(self):
     return safe_div(self.total, self.count)
@@ -536,6 +534,9 @@ class MeanMetricWrapper(Mean):
       sample_weight: Optional weighting of each example. Defaults to 1. Can be
         a `Tensor` whose rank is either 0, or the same rank as `y_true`,
         and must be broadcastable to `y_true`.
+
+    Returns:
+      Update op.
     """
     y_true = math_ops.cast(y_true, self._dtype)
     y_pred = math_ops.cast(y_pred, self._dtype)
@@ -543,7 +544,7 @@ class MeanMetricWrapper(Mean):
         y_pred, y_true, sample_weight)
 
     matches = self._fn(y_true, y_pred, **self._fn_kwargs)
-    super(MeanMetricWrapper, self).update_state(
+    return super(MeanMetricWrapper, self).update_state(
         matches, sample_weight=sample_weight)
 
   def get_config(self):
@@ -600,6 +601,23 @@ class CategoricalAccuracy(MeanMetricWrapper):
         categorical_accuracy, name, dtype=dtype)
 
 
+class SparseCategoricalAccuracy(MeanMetricWrapper):
+  """Calculates how often predictions matches integer labels.
+
+  This metric creates two local variables, `total` and `count` that are used to
+  compute the frequency with which `y_pred` matches `y_true`. This frequency is
+  ultimately returned as `sparse categorical accuracy`: an idempotent operation
+  that simply divides `total` by `count`.
+
+  If `sample_weight` is `None`, weights default to 1.
+  Use `sample_weight` of 0 to mask values.
+  """
+
+  def __init__(self, name='sparse_categorical_accuracy', dtype=None):
+    super(SparseCategoricalAccuracy, self).__init__(
+        sparse_categorical_accuracy, name, dtype=dtype)
+
+
 @tf_export('keras.metrics.binary_accuracy')
 def binary_accuracy(y_true, y_pred, threshold=0.5):
   threshold = math_ops.cast(threshold, y_pred.dtype)
-- 
GitLab


From 2cb119b81fd08a1e680a2b44ff68c0a8c76eb017 Mon Sep 17 00:00:00 2001
From: Koan-Sin Tan <koansin.tan@gmail.com>
Date: Tue, 18 Sep 2018 10:40:54 +0800
Subject: [PATCH 0312/1357] [tflite] fix calculating of output pixels

fix an issue reported by issue #22310
---
 .../contrib/lite/examples/label_image/bitmap_helpers_impl.h     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
index 6fdcf78b69..7e09d4bc79 100644
--- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
+++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
@@ -81,7 +81,7 @@ void resize(T* out, uint8_t* in, int image_height, int image_width,
 
   auto output = interpreter->typed_tensor<float>(2);
   auto output_number_of_pixels =
-      wanted_height * wanted_height * wanted_channels;
+      wanted_height * wanted_width * wanted_channels;
 
   for (int i = 0; i < output_number_of_pixels; i++) {
     if (s->input_floating)
-- 
GitLab


From bb9958ab69a38cbe57d119947b635a257fa6b802 Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Mon, 17 Sep 2018 19:56:41 -0700
Subject: [PATCH 0313/1357] Register fp16 reduce_max on GPU

PiperOrigin-RevId: 213383647
---
 tensorflow/core/kernels/reduction_ops_max.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/reduction_ops_max.cc b/tensorflow/core/kernels/reduction_ops_max.cc
index 9cf953f4bf..8bfa44b2d0 100644
--- a/tensorflow/core/kernels/reduction_ops_max.cc
+++ b/tensorflow/core/kernels/reduction_ops_max.cc
@@ -50,6 +50,8 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
           .TypeConstraint<int64>("Tidx")                                       \
           .HostMemory("reduction_indices"),                                    \
       ReductionOp<GPUDevice, type, int64, Eigen::internal::MaxReducer<type>>);
+
+REGISTER_GPU_KERNELS(Eigen::half);
 REGISTER_GPU_KERNELS(float);
 REGISTER_GPU_KERNELS(double);
 REGISTER_GPU_KERNELS(int64);
-- 
GitLab


From 0b7125d3c5e7128470a7a74cf8a3543eab39c2d8 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 17 Sep 2018 20:25:23 -0700
Subject: [PATCH 0314/1357] Fix unused variable error on powerpc.

PiperOrigin-RevId: 213386145
---
 .../internal/optimized/depthwiseconv_float.h       |  6 ++----
 .../internal/optimized/depthwiseconv_uint8.h       | 14 ++++++--------
 .../optimized/depthwiseconv_uint8_3x3_filter.h     |  8 +++++---
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index f0bea7fa1d..114575a96a 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -932,9 +932,6 @@ inline void DepthwiseConv(
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 
-  const bool has_dilation = (params.dilation_width_factor != 1) ||
-                            (params.dilation_height_factor != 1);
-
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
   const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
   const int input_height = input_shape.Dims(1);
@@ -966,7 +963,8 @@ inline void DepthwiseConv(
                                         FIXED_DEPTH_MULTIPLIER)           \
   if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) &&          \
       (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) &&     \
-      depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) {      \
+      depth_multiplier == FIXED_DEPTH_MULTIPLIER &&                       \
+      dilation_height_factor == 1 && dilation_width_factor == 1) {        \
     row_accum_func =                                                      \
         FloatDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH,      \
                                    FIXED_DEPTH_MULTIPLIER>;               \
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index 494cf70504..ee3fe78a10 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1718,18 +1718,15 @@ inline void DepthwiseConv(
   TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
   TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 
-  const bool has_dilation =
-      (dilation_width_factor != 1) || (dilation_height_factor != 1);
-
 // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
 // Jetson TX-2. This compiler does not support the offsetof() macro.
 #if defined(__aarch64__) && !defined(GOOGLE_L4T)
   // Call kernel optimized for depthwise convolutions using 3x3 filters if
   // parameters are supported.
-  if (Fast3x3FilterKernelSupported(input_shape, filter_shape, stride_width,
-                                   stride_height, has_dilation, pad_width,
-                                   pad_height, depth_multiplier, output_shape,
-                                   output_shift)) {
+  if (Fast3x3FilterKernelSupported(
+          input_shape, filter_shape, stride_width, stride_height,
+          dilation_width_factor, dilation_height_factor, pad_width, pad_height,
+          depth_multiplier, output_shape, output_shift)) {
     DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
                            filter_data, bias_shape, bias_data, output_shape,
                            output_data);
@@ -1756,7 +1753,8 @@ inline void DepthwiseConv(
                                         FIXED_DEPTH_MULTIPLIER)           \
   if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) &&          \
       (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) &&     \
-      depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) {      \
+      depth_multiplier == FIXED_DEPTH_MULTIPLIER &&                       \
+      dilation_width_factor == 1 && dilation_height_factor == 1) {        \
     row_accum_func =                                                      \
         QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH,  \
                                        FIXED_DEPTH_MULTIPLIER>;           \
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
index 5087227182..e14d04ad02 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@@ -3176,8 +3176,9 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
 
 inline bool Fast3x3FilterKernelSupported(
     const RuntimeShape& input_shape, const RuntimeShape& filter_shape,
-    int32 stride_width, int32 stride_height, bool has_dilation, int32 pad_width,
-    int32 pad_height, int32 depth_multiplier, const RuntimeShape& output_shape,
+    int32 stride_width, int32 stride_height, int32 dilation_width_factor,
+    int32 dilation_height_factor, int32 pad_width, int32 pad_height,
+    int32 depth_multiplier, const RuntimeShape& output_shape,
     int32 output_shift) {
   const int32 input_height = input_shape.Dims(1);
   const int32 input_width = input_shape.Dims(2);
@@ -3193,7 +3194,8 @@ inline bool Fast3x3FilterKernelSupported(
       (stride_height == 1 || stride_height == 2) &&
       (stride_width == stride_height) && (pad_width == 0 || pad_width == 1) &&
       (pad_height == 0 || pad_height == 1) && (pad_width == pad_height) &&
-      (input_depth % 8) == 0 && (output_shift > 0) && !has_dilation;
+      (input_depth % 8) == 0 && (output_shift > 0) &&
+      dilation_width_factor == 1 && dilation_height_factor == 1;
 
   if (!supported) {
     return false;
-- 
GitLab


From eeb477cf661a16ee39e0621fd225d1f15859ffc8 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 17 Sep 2018 20:28:59 -0700
Subject: [PATCH 0315/1357] [tf.data] Fixing an error in the optimization loop.

PiperOrigin-RevId: 213386401
---
 tensorflow/core/framework/model.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index b3fe357ea1..112298c344 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -322,7 +322,7 @@ void Model::Optimize(int64 cpu_budget) {
         }
         tunable->value--;
       }
-      if (best_tunable) {
+      if (!best_tunable) {
         // NOTE: This can happen because we are performing the optimization
         // while the model data is changing. If this becomes an issue, we should
         // look into performing the optimization using a model snapshot.
-- 
GitLab


From b91e27a9c33d038af79a0944eb9046b926d483c8 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 17 Sep 2018 21:01:19 -0700
Subject: [PATCH 0316/1357] Refactor out the metadata_ops set from
 const_analysis to a per-op bit; NFC

PiperOrigin-RevId: 213389224
---
 tensorflow/compiler/tf2xla/const_analysis.cc  | 12 +++-------
 .../compiler/tf2xla/kernels/shape_op.cc       |  8 +++----
 tensorflow/compiler/tf2xla/xla_op_registry.cc | 24 +++++++++++++++++++
 tensorflow/compiler/tf2xla/xla_op_registry.h  | 12 ++++++++++
 4 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc
index 922ae7c79a..027ca6d2d2 100644
--- a/tensorflow/compiler/tf2xla/const_analysis.cc
+++ b/tensorflow/compiler/tf2xla/const_analysis.cc
@@ -29,14 +29,6 @@ Status BackwardsConstAnalysis(const Graph& g,
                               std::vector<bool>* compile_time_const_arg_indices,
                               std::vector<bool>* compile_time_const_nodes,
                               std::function<bool(const Edge&)> edge_filter) {
-  // Operators that don't look at the data of their inputs, just the shapes.
-  const std::unordered_set<string> metadata_ops = {
-      "Rank",
-      "Shape",
-      "ShapeN",
-      "Size",
-  };
-
   std::vector<bool> compile_time_const_nodes_impl;
   if (compile_time_const_nodes) {
     CHECK_EQ(compile_time_const_nodes->size(), g.num_node_ids());
@@ -50,7 +42,9 @@ Status BackwardsConstAnalysis(const Graph& g,
     if (!status.ok()) return;
 
     // If this is a metadata-only op, don't propagate the const requirement.
-    if (metadata_ops.find(node->type_string()) != metadata_ops.end()) return;
+    if (XlaOpRegistry::IsMetadataOp(node->type_string())) {
+      return;
+    }
 
     // If this node must be const, and it isn't a metadata op, then all of its
     // parents must be const.
diff --git a/tensorflow/compiler/tf2xla/kernels/shape_op.cc b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
index 2e0a69b70e..c8a0f31a03 100644
--- a/tensorflow/compiler/tf2xla/kernels/shape_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
@@ -44,7 +44,7 @@ class ShapeOp : public XlaOpKernel {
   DataType out_dtype_;
 };
 
-REGISTER_XLA_OP(Name("Shape").CompilationOnly(), ShapeOp);
+REGISTER_XLA_OP(Name("Shape").CompilationOnly().IsMetadataOp(), ShapeOp);
 
 class ShapeNOp : public XlaOpKernel {
  public:
@@ -66,7 +66,7 @@ class ShapeNOp : public XlaOpKernel {
  private:
   DataType out_dtype_;
 };
-REGISTER_XLA_OP(Name("ShapeN").CompilationOnly(), ShapeNOp);
+REGISTER_XLA_OP(Name("ShapeN").CompilationOnly().IsMetadataOp(), ShapeNOp);
 
 class RankOp : public XlaOpKernel {
  public:
@@ -82,7 +82,7 @@ class RankOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Rank").CompilationOnly(), RankOp);
+REGISTER_XLA_OP(Name("Rank").CompilationOnly().IsMetadataOp(), RankOp);
 
 class SizeOp : public XlaOpKernel {
  public:
@@ -101,7 +101,7 @@ class SizeOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Size").CompilationOnly(), SizeOp);
+REGISTER_XLA_OP(Name("Size").CompilationOnly().IsMetadataOp(), SizeOp);
 
 class ExpandDimsOp : public XlaOpKernel {
  public:
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc
index b0eeee3174..91d48125f1 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc
@@ -90,6 +90,11 @@ XlaOpRegistry::~XlaOpRegistry() = default;
                  << " have incompatible compile time constant inputs.";
     return false;
   }
+  if (x.is_metadata_op != y.is_metadata_op) {
+    LOG(WARNING) << "Registrations of " << x.name
+                 << " have incompatible values for is_metadata_op.";
+    return false;
+  }
   return true;
 }
 
@@ -350,6 +355,20 @@ XlaOpRegistry::CompileTimeConstantInputs(const string& op) {
   return &it->second.front()->compile_time_constant_inputs;
 }
 
+/*static*/ bool XlaOpRegistry::IsMetadataOp(const string& op) {
+  XlaOpRegistry& registry = Instance();
+  mutex_lock lock(registry.mutex_);
+  auto it = registry.ops_.find(op);
+  if (it == registry.ops_.end() || it->second.empty()) {
+    return false;
+  }
+
+  // The test in IsCompatible ensures that if there are multiple matching
+  // registrations for this op name, they all have the same value of
+  // is_metadata_op, so only the first match is returned.
+  return it->second.front()->is_metadata_op;
+}
+
 std::vector<string> XlaOpRegistry::BackendNames() {
   std::vector<string> names;
   XlaOpRegistry& registry = Instance();
@@ -432,6 +451,11 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::CompileTimeConstInput(
   return *this;
 }
 
+XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::IsMetadataOp() {
+  registration_->is_metadata_op = true;
+  return *this;
+}
+
 std::unique_ptr<XlaOpRegistry::OpRegistration> XlaOpRegistrationBuilder::Build(
     XlaOpRegistry::Factory factory) {
   registration_->factory = factory;
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h
index 34e22a4510..a4b624820a 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.h
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.h
@@ -136,6 +136,10 @@ class XlaOpRegistry {
   static const std::unordered_set<string>* CompileTimeConstantInputs(
       const string& op);
 
+  // Returns true if `op` is a "metadata" op, one that only looks at the shapes
+  // of its operands and not their values.
+  static bool IsMetadataOp(const string& op);
+
  private:
   friend class XlaBackendRegistrar;
   friend class XlaOpRegistrar;
@@ -192,6 +196,10 @@ class XlaOpRegistry {
     // Names of arguments that must be compile-time constants.
     std::unordered_set<string> compile_time_constant_inputs;
 
+    // True if this is a "metadata" op, one that only looks at the shapes of its
+    // operands and not their values.
+    bool is_metadata_op = false;
+
     // Factory used to build OpKernels that perform symbolic execution.
     Factory factory;
   };
@@ -256,6 +264,10 @@ class XlaOpRegistrationBuilder {
   // Mark 'input_name' as an argument whose value must be known at compile-time.
   XlaOpRegistrationBuilder& CompileTimeConstInput(absl::string_view input_name);
 
+  // Mark this op as a "metadata" op, one that only looks at the shapes of its
+  // operands and not their values.
+  XlaOpRegistrationBuilder& IsMetadataOp();
+
   std::unique_ptr<XlaOpRegistry::OpRegistration> Build(
       XlaOpRegistry::Factory factory);
 
-- 
GitLab


From cc3a7a847f0c73ae3de99f6b56ef02f56644ea67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 18 Sep 2018 13:06:51 +0800
Subject: [PATCH 0317/1357] CLN: minor changes

---
 tensorflow/python/estimator/canned/boosted_trees.py    | 3 ++-
 tensorflow/python/keras/layers/advanced_activations.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 7c04ff7970..f2e7b37f7f 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -1084,7 +1084,8 @@ class _BoostedTrees(estimator.Estimator):
 
     Raises:
       ValueError: When attempting to normalize on an empty ensemble
-        or an ensemble of trees which have no splits.
+        or an ensemble of trees which have no splits. Or when attempting
+        to normalize and feature importances have negative values.
     """
     reader = checkpoint_utils.load_checkpoint(self._model_dir)
     serialized = reader.get_tensor('boosted_trees:0_serialized')
diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 6922d3ec1e..61ab69c16f 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.keras import activations
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import initializers
-- 
GitLab


From 7c826588b058c14fd8c152bedb4e256c57ae1248 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Sep 2018 22:09:02 -0700
Subject: [PATCH 0318/1357] Automated rollback of commit
 185aa89912376d4088c22615908696cd30f9951b

PiperOrigin-RevId: 213394522
---
 tensorflow/contrib/gdr/gdr_memory_manager.cc  | 102 ++++++-----
 tensorflow/contrib/verbs/rdma_mgr.cc          |  81 ++++++---
 tensorflow/contrib/verbs/rdma_mgr.h           |   1 -
 tensorflow/contrib/verbs/verbs_server_lib.cc  |   5 -
 tensorflow/core/BUILD                         |   1 +
 .../core/common_runtime/bfc_allocator.cc      |  21 ++-
 .../core/common_runtime/bfc_allocator.h       |  14 +-
 .../common_runtime/gpu/cuda_host_allocator.h  |  12 +-
 .../common_runtime/gpu/gpu_bfc_allocator.cc   |  17 +-
 .../common_runtime/gpu/gpu_bfc_allocator.h    |  44 ++---
 .../gpu/gpu_bfc_allocator_test.cc             |  90 ++--------
 .../gpu/gpu_cudamalloc_allocator.cc           |  10 +-
 .../gpu/gpu_cudamalloc_allocator.h            |  11 +-
 .../common_runtime/gpu/gpu_debug_allocator.cc |  20 ++-
 .../common_runtime/gpu/gpu_debug_allocator.h  |  20 ++-
 .../gpu/gpu_debug_allocator_test.cc           |  35 +---
 .../core/common_runtime/gpu/gpu_device.cc     |  64 +++----
 .../core/common_runtime/gpu/gpu_device.h      |   9 +-
 .../common_runtime/gpu/gpu_process_state.cc   | 161 +++++++-----------
 .../common_runtime/gpu/gpu_process_state.h    |  58 +++----
 .../common_runtime/gpu/pool_allocator_test.cc |  68 ++------
 .../core/common_runtime/mkl_cpu_allocator.h   |  50 +++++-
 .../core/common_runtime/pool_allocator.cc     |  45 +++--
 .../core/common_runtime/pool_allocator.h      |  27 ++-
 .../core/common_runtime/process_state.cc      |  71 ++------
 .../core/common_runtime/process_state.h       |  15 +-
 .../core/common_runtime/renamed_device.h      |   7 +-
 .../core/common_runtime/visitable_allocator.h |  79 +++++++++
 tensorflow/core/framework/allocator.cc        |  20 +--
 tensorflow/core/framework/allocator.h         |  28 +--
 tensorflow/core/framework/device_base.h       |  10 +-
 tensorflow/core/framework/op_kernel.cc        |   9 +-
 32 files changed, 577 insertions(+), 628 deletions(-)
 create mode 100644 tensorflow/core/common_runtime/visitable_allocator.h

diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index bb06f1c41c..726f74c7b7 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -138,8 +138,6 @@ class GdrMemoryManager : public RemoteMemoryManager {
       Device* device, DeviceContext* device_context, bool on_host,
       StatusCallback done) override;
 
-  static void RegMemVisitors();
-
  protected:
   Status CreateEndpoint(const string& host, const string& port,
                         RdmaEndpointPtr& endpoint);
@@ -185,51 +183,35 @@ class GdrMemoryManager : public RemoteMemoryManager {
   TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager);
 };
 
+// TODO(byronyi): remove this class and its registration when the default
+// cpu_allocator() returns visitable allocator, or cpu_allocator() is no
+// longer in use.
+class BFCGdrAllocator : public BFCAllocator {
+ public:
+  BFCGdrAllocator()
+      : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36,
+                     true, "cpu_gdr_bfc") {}
+};
+class BFCGdrAllocatorFactory : public AllocatorFactory {
+ public:
+  Allocator* CreateAllocator() override { return new BFCGdrAllocator; }
+
+  virtual SubAllocator* CreateSubAllocator(int numa_node) {
+    return new BasicCPUAllocator(numa_node);
+  }
+};
+
+REGISTER_MEM_ALLOCATOR("BFCGdrAllocator", 102, BFCGdrAllocatorFactory);
+
 GdrMemoryManager::GdrMemoryManager(const string& host, const string& port)
     : host_(host),
       port_(port),
       listening_(nullptr, EndpointDeleter),
       stopped_(true),
-      next_key_(0) {
-  static std::once_flag flag;
-  std::call_once(flag, []() { RegMemVisitors(); });
-}
+      next_key_(0) {}
 
 GdrMemoryManager::~GdrMemoryManager() { close(epfd_); }
 
-/*static*/ void GdrMemoryManager::RegMemVisitors() {
-  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
-                                           size_t num_bytes) {
-    GdrMemoryManager::Singleton().InsertMemoryRegion(
-        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
-  };
-  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
-                                          size_t num_bytes) {
-    GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes);
-  };
-  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
-  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
-
-#if GOOGLE_CUDA
-  if (IsGDRAvailable()) {
-    int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
-
-    // Note we don't free allocated GPU memory so there is no free visitor
-    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
-                                                  size_t num_bytes) {
-      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
-          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
-    };
-    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
-                                                     cuda_alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
-                                                          alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
-    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
-  }
-#endif  // GOOGLE_CUDA
-}
-
 Status GdrMemoryManager::Init() {
   epfd_ = epoll_create1(0);
   if (epfd_ == -1) {
@@ -289,6 +271,48 @@ Status GdrMemoryManager::Init() {
                                "cannot add server to epoll");
   }
 
+  Allocator* allocators[] = {
+#if GOOGLE_CUDA
+    GPUProcessState::singleton()->GetCUDAHostAllocator(0),
+#endif  // GOOGLE_CUDA
+    ProcessState::singleton()->GetCPUAllocator(0),
+    cpu_allocator(),
+  };
+
+  using namespace std::placeholders;
+  VisitableAllocator::Visitor alloc_visitor =
+      std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2);
+  VisitableAllocator::Visitor free_visitor =
+      std::bind(&GdrMemoryManager::EvictMemoryRegion, this, _1, _2);
+
+  std::set<Allocator*> instrumented_;
+
+  // Host memory allocators
+  for (Allocator* allocator : allocators) {
+    auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
+    CHECK(visitable_allocator)
+        << "is not visitable for instrumentation" << allocator->Name();
+    // Make sure we don't instrument the same allocator twice
+    if (instrumented_.find(allocator) == std::end(instrumented_)) {
+      visitable_allocator->AddAllocVisitor(alloc_visitor);
+      visitable_allocator->AddFreeVisitor(free_visitor);
+      instrumented_.insert(allocator);
+      LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name();
+    }
+  }
+
+#if GOOGLE_CUDA
+  VisitableAllocator::Visitor cuda_alloc_visitor =
+      std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2);
+  if (IsGDRAvailable()) {
+    // Note we don't free allocated GPU memory so there is no free visitor
+    int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1;
+    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
+                                                     cuda_alloc_visitor);
+    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
+  }
+#endif  // GOOGLE_CUDA
+
   return Status::OK();
 }
 
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 2784bf124c..3cb5e61fac 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/contrib/verbs/grpc_verbs_client.h"
 #include "tensorflow/contrib/verbs/verbs_service.pb.h"
+#include "tensorflow/core/common_runtime/bfc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/pool_allocator.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/session_mgr.h"
 #include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
 
@@ -256,41 +256,74 @@ void MRDeleter(ibv_mr* mr) {
   }
 }
 
+// TODO(byronyi): remove this class and its registration when the default
+// cpu_allocator() returns visitable allocator, or cpu_allocator() is no
+// longer in use.
+class BFCRdmaAllocator : public BFCAllocator {
+ public:
+  BFCRdmaAllocator()
+      : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36,
+                     true, "cpu_rdma_bfc") {}
+};
+class BFCRdmaAllocatorFactory : public AllocatorFactory {
+ public:
+  Allocator* CreateAllocator() { return new BFCRdmaAllocator; }
+
+  SubAllocator* CreateSubAllocator(int numa_node) {
+    return new BasicCPUAllocator(numa_node);
+  }
+};
+
+REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
+
 void RdmaMgr::InitAllocators() {
-  static std::once_flag flag;
-  std::call_once(
-      flag, [this]() { RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; });
-}
+  RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_;
 
-/*static*/ void RdmaMgr::RegMemVisitors() {
-  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
-                                           size_t num_bytes) {
-    RdmaMemoryMgr::Singleton().InsertMemoryRegion(
-        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
-  };
-  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
-                                          size_t num_bytes) {
-    RdmaMemoryMgr::Singleton().EvictMemoryRegion(ptr, num_bytes);
+  Allocator* allocators[] = {
+#if GOOGLE_CUDA
+    GPUProcessState::singleton()->GetCUDAHostAllocator(0),
+#endif  // GOOGLE_CUDA
+    ProcessState::singleton()->GetCPUAllocator(0),
+    cpu_allocator(),
   };
 
-  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
-  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
+  using namespace std::placeholders;
+
+  std::set<Allocator*> instrumented_;
+
+  // Host memory allocators
+  for (Allocator* allocator : allocators) {
+    VisitableAllocator::Visitor alloc_visitor =
+        std::bind(&RdmaMemoryMgr::InsertMemoryRegion,
+                  &RdmaMemoryMgr::Singleton(), _1, _2, allocator->Name());
+    VisitableAllocator::Visitor free_visitor = std::bind(
+        &RdmaMemoryMgr::EvictMemoryRegion, &RdmaMemoryMgr::Singleton(), _1, _2);
+
+    auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
+    CHECK(visitable_allocator)
+        << "is not visitable for instrumentation" << allocator->Name();
+    // Make sure we don't instrument the same allocator twice
+    if (instrumented_.find(allocator) == std::end(instrumented_)) {
+      visitable_allocator->AddAllocVisitor(alloc_visitor);
+      visitable_allocator->AddFreeVisitor(free_visitor);
+      instrumented_.insert(allocator);
+      LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name();
+    }
+  }
 
 #if GOOGLE_CUDA
   if (IsGDRAvailable()) {
     // Note we don't free allocated GPU memory so there is no free visitor
     int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
 
-    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
-                                                  size_t num_bytes) {
-      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
-          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
-    };
+    char buf[8];
+    sprintf(buf, "gpu");
+    VisitableAllocator::Visitor cuda_alloc_visitor =
+        std::bind(&RdmaMemoryMgr::InsertMemoryRegion,
+                  &RdmaMemoryMgr::Singleton(), _1, _2, std::string(buf));
+
     GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
                                                      cuda_alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
-                                                          alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
     LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
   }
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index 74b92cc9a6..9fffc335bb 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -39,7 +39,6 @@ class RdmaMgr {
   void SetupChannels();
   bool ConnectivityCheck();
   void InitAllocators();
-  static void RegMemVisitors();
   const string& local_worker() { return local_worker_; }
 
  private:
diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 61469686e4..1a0b5028fe 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -76,13 +76,8 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def,
   return Status::OK();
 }
 
-namespace {
-std::once_call reg_mem_visitors_call;
-}  // namespace
-
 Status VerbsServer::Init(ServiceInitFunction service_func,
                          RendezvousMgrCreationFunction rendezvous_mgr_func) {
-  std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); });
   Status s = GrpcServer::Init(service_func, rendezvous_mgr_func);
   {
     mutex_lock l(mu_);
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 9bcf5b0865..d55bd8d7ed 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2783,6 +2783,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
     "common_runtime/step_stats_collector.h",
     "common_runtime/threadpool_device.h",
     "common_runtime/tracing_device.h",
+    "common_runtime/visitable_allocator.h",
     "common_runtime/process_state.h",
     "common_runtime/pool_allocator.h",
     "graph/gradients.h",
diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index 3843ea9e60..84c6285bbe 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc
@@ -31,7 +31,7 @@ namespace tensorflow {
 
 BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
                            bool allow_growth, const string& name)
-    : sub_allocator_(sub_allocator),
+    : suballocator_(sub_allocator),
       name_(name),
       free_chunks_list_(kInvalidChunkHandle),
       next_allocation_id_(1) {
@@ -72,7 +72,7 @@ BFCAllocator::~BFCAllocator() {
   VLOG(2) << "Number of regions allocated: "
           << region_manager_.regions().size();
   for (const auto& region : region_manager_.regions()) {
-    sub_allocator_->Free(region.ptr(), region.memory_size());
+    suballocator_->Free(region.ptr(), region.memory_size());
   }
 
   for (BinNum b = 0; b < kNumBins; b++) {
@@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
 
   // Try allocating.
   size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
-  void* mem_addr = sub_allocator_->Alloc(alignment, bytes);
+  void* mem_addr = suballocator_->Alloc(alignment, bytes);
   if (mem_addr == nullptr && !started_backpedal_) {
     // Only backpedal once.
     started_backpedal_ = true;
@@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
     while (mem_addr == nullptr) {
       bytes = RoundedBytes(bytes * kBackpedalFactor);
       if (bytes < rounded_bytes) break;
-      mem_addr = sub_allocator_->Alloc(alignment, bytes);
+      mem_addr = suballocator_->Alloc(alignment, bytes);
     }
   }
 
@@ -158,6 +158,10 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
   // Insert the chunk into the right bin.
   InsertFreeChunkIntoBin(h);
 
+  // Invoke visitors on newly allocated region.
+  for (const auto& visitor : region_visitors_) {
+    visitor(mem_addr, bytes);
+  }
   return true;
 }
 
@@ -486,6 +490,15 @@ void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) {
   InsertFreeChunkIntoBin(coalesced_chunk);
 }
 
+void BFCAllocator::AddAllocVisitor(Visitor visitor) {
+  VLOG(1) << "AddVisitor";
+  mutex_lock l(lock_);
+  region_visitors_.push_back(visitor);
+  for (const auto& region : region_manager_.regions()) {
+    visitor(region.ptr(), region.memory_size());
+  }
+}
+
 bool BFCAllocator::TracksAllocationSizes() { return true; }
 
 size_t BFCAllocator::RequestedSize(const void* ptr) {
diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index 364071e066..20e1dab1d5 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/allocator_retry.h"
-#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/macros.h"
@@ -42,7 +42,7 @@ namespace tensorflow {
 // coalescing.  One assumption we make is that the process using this
 // allocator owns pretty much all of the memory, and that nearly
 // all requests to allocate memory go through this interface.
-class BFCAllocator : public Allocator {
+class BFCAllocator : public VisitableAllocator {
  public:
   // Takes ownership of sub_allocator.
   BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
@@ -55,6 +55,11 @@ class BFCAllocator : public Allocator {
                     const AllocationAttributes& allocation_attr) override;
   void DeallocateRaw(void* ptr) override;
 
+  void AddAllocVisitor(Visitor visitor) override;
+
+  // Does nothing, because memory is never freed.
+  void AddFreeVisitor(Visitor visitor) override {}
+
   bool TracksAllocationSizes() override;
 
   size_t RequestedSize(const void* ptr) override;
@@ -418,7 +423,7 @@ class BFCAllocator : public Allocator {
   // of the available memory.
   bool started_backpedal_ = false;
 
-  std::unique_ptr<SubAllocator> sub_allocator_;
+  std::unique_ptr<SubAllocator> suballocator_;
   string name_;
 
   // Structures mutable after construction
@@ -430,6 +435,9 @@ class BFCAllocator : public Allocator {
   // Pointer to head of linked list of free Chunks
   ChunkHandle free_chunks_list_ GUARDED_BY(lock_);
 
+  // Called once on each region, ASAP.
+  std::vector<Visitor> region_visitors_ GUARDED_BY(lock_);
+
   // Counter containing the next unique identifier to assign to a
   // newly-created chunk.
   int64 next_allocation_id_ GUARDED_BY(lock_);
diff --git a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
index 6bd29ef775..636cd43575 100644
--- a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
@@ -26,12 +26,8 @@ namespace tensorflow {
 class CUDAHostAllocator : public SubAllocator {
  public:
   // Note: stream_exec cannot be null.
-  explicit CUDAHostAllocator(se::StreamExecutor* stream_exec, int numa_node,
-                             const std::vector<Visitor>& alloc_visitors,
-                             const std::vector<Visitor>& free_visitors)
-      : SubAllocator(alloc_visitors, free_visitors),
-        stream_exec_(stream_exec),
-        numa_node_(numa_node) {
+  explicit CUDAHostAllocator(se::StreamExecutor* stream_exec)
+      : stream_exec_(stream_exec) {
     CHECK(stream_exec_ != nullptr);
   }
   ~CUDAHostAllocator() override {}
@@ -43,23 +39,19 @@ class CUDAHostAllocator : public SubAllocator {
       if (ptr == nullptr) {
         LOG(WARNING) << "could not allocate pinned host memory of size: "
                      << num_bytes;
-        return ptr;
       }
-      VisitAlloc(ptr, numa_node_, num_bytes);
     }
     return ptr;
   }
 
   void Free(void* ptr, size_t num_bytes) override {
     if (ptr != nullptr) {
-      VisitFree(ptr, numa_node_, num_bytes);
       stream_exec_->HostMemoryDeallocate(ptr);
     }
   }
 
  private:
   se::StreamExecutor* stream_exec_;  // not owned, non-null
-  const int numa_node_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(CUDAHostAllocator);
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 44ffce77a1..2d4c8d0201 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -22,15 +22,18 @@ limitations under the License.
 
 namespace tensorflow {
 
-GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
-                                 size_t total_memory, const string& name)
-    : GPUBFCAllocator(sub_allocator, total_memory, GPUOptions(), name) {}
+GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                                 const string& name)
+    : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {}
 
-GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
-                                 size_t total_memory,
+GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
                                  const GPUOptions& gpu_options,
                                  const string& name)
-    : BFCAllocator(sub_allocator, total_memory, gpu_options.allow_growth(),
-                   name) {}
+    : BFCAllocator(
+          new GPUMemAllocator(
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
+              gpu_options.per_process_gpu_memory_fraction() > 1.0 ||
+                  gpu_options.experimental().use_unified_memory()),
+          total_memory, gpu_options.allow_growth(), name) {}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index 6b6de80734..f1cc2eace1 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -31,20 +31,28 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Suballocator for GPU memory.
-class GPUMemAllocator : public SubAllocator {
+// A GPU memory allocator that implements a 'best-fit with coalescing'
+// algorithm.
+class GPUBFCAllocator : public BFCAllocator {
  public:
   // 'cuda_gpu_id' refers to the ID of the GPU device within
   // the process and must reference a valid ID in the process.
+  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                  const string& name);
+  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                  const GPUOptions& gpu_options, const string& name);
+  virtual ~GPUBFCAllocator() {}
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
+};
+
+// Suballocator for GPU memory.
+class GPUMemAllocator : public SubAllocator {
+ public:
   // Note: stream_exec cannot be null.
-  explicit GPUMemAllocator(se::StreamExecutor* stream_exec, CudaGpuId gpu_id,
-                           bool use_unified_memory,
-                           const std::vector<Visitor>& alloc_visitors,
-                           const std::vector<Visitor>& free_visitors)
-      : SubAllocator(alloc_visitors, free_visitors),
-        stream_exec_(stream_exec),
-        gpu_id_(gpu_id),
-        use_unified_memory_(use_unified_memory) {
+  explicit GPUMemAllocator(se::StreamExecutor* stream_exec,
+                           bool use_unified_memory)
+      : stream_exec_(stream_exec), use_unified_memory_(use_unified_memory) {
     CHECK(stream_exec_ != nullptr);
   }
   ~GPUMemAllocator() override {}
@@ -57,14 +65,12 @@ class GPUMemAllocator : public SubAllocator {
       } else {
         ptr = stream_exec_->AllocateArray<char>(num_bytes).opaque();
       }
-      VisitAlloc(ptr, gpu_id_.value(), num_bytes);
     }
     return ptr;
   }
 
   void Free(void* ptr, size_t num_bytes) override {
     if (ptr != nullptr) {
-      VisitFree(ptr, gpu_id_.value(), num_bytes);
       if (use_unified_memory_) {
         stream_exec_->UnifiedMemoryDeallocate(ptr);
       } else {
@@ -76,25 +82,11 @@ class GPUMemAllocator : public SubAllocator {
 
  private:
   se::StreamExecutor* stream_exec_;  // not owned, non-null
-  const CudaGpuId gpu_id_;
   const bool use_unified_memory_ = false;
 
   TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator);
 };
 
-// A GPU memory allocator that implements a 'best-fit with coalescing'
-// algorithm.
-class GPUBFCAllocator : public BFCAllocator {
- public:
-  GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory,
-                  const string& name);
-  GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory,
-                  const GPUOptions& gpu_options, const string& name);
-  ~GPUBFCAllocator() override {}
-
-  TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
-};
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_BFC_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 7112c3afd4..67caeb3495 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -21,7 +21,6 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -47,11 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
 }
 
 TEST(GPUBFCAllocatorTest, NoDups) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   // Allocate a lot of raw pointers
@@ -80,11 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   // Allocate 256 raw pointers of sizes between 100 bytes and about
   // a meg
   random::PhiloxRandom philox(123, 17);
@@ -142,11 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
 }
 
 TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   float* first_ptr = a.Allocate<float>(1024);
@@ -181,30 +168,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   float* ptr = a.Allocate<float>(0);
   EXPECT_EQ(nullptr, ptr);
 }
 
 TEST(GPUBFCAllocatorTest, TracksSizes) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
@@ -212,12 +187,8 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
 }
 
 TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
   // Configure a 1MiB byte limit
-  GPUBFCAllocator a(sub_allocator, 1 << 20, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc");
 
   float* first_ptr = a.Allocate<float>(1 << 6);
   float* second_ptr = a.Allocate<float>(1 << 20);
@@ -232,11 +203,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
   options.set_allow_growth(true);
 
   // Max of 2GiB, but starts out small.
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1LL << 31, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc");
 
   // Allocate 10 raw pointers of sizes between 100 bytes and about
   // 64 megs.
@@ -297,15 +264,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
 }
 
 TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1UL << 60, "GPU_0_bfc");
-  sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator b(sub_allocator, 1UL << 60, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
+  GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
   void* amem = a.AllocateRaw(1, 1);
   void* bmem = b.AllocateRaw(1, 1 << 30);
   a.DeallocateRaw(amem);
@@ -313,11 +273,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
 }
 
 static void BM_Allocation(int iters) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<size_t> sizes = {256,        4096,      16384,    524288,
                                512,        1048576,   10485760, 104857600,
@@ -333,11 +289,7 @@ static void BM_Allocation(int iters) {
 BENCHMARK(BM_Allocation);
 
 static void BM_AllocationThreaded(int iters, int num_threads) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
   thread::ThreadPool pool(Env::Default(), "test", num_threads);
   std::atomic_int_fast32_t count(iters);
   mutex done_lock;
@@ -373,11 +325,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16);
 // A more complex benchmark that defers deallocation of an object for
 // "delay" allocations.
 static void BM_AllocationDelayed(int iters, int delay) {
-  CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<int> sizes = {256, 4096, 16384, 4096, 512, 1024, 1024};
   int size_index = 0;
@@ -415,11 +363,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   // only methods inside this class can access private members of BFCAllocator.
 
   void TestBinDebugInfo() {
-    CudaGpuId cuda_gpu_id(0);
-    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-        false /*use_unified_memory*/, {}, {});
-    GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
+    GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
 
     std::vector<void*> initial_ptrs;
     std::vector<size_t> initial_ptrs_allocated_sizes;
@@ -497,11 +441,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   }
 
   void TestLog2FloorNonZeroSlow() {
-    CudaGpuId cuda_gpu_id(0);
-    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-        false /*use_unified_memory*/, {}, {});
-    GPUBFCAllocator a(sub_allocator, 1 /* total_memory */, "GPU_0_bfc");
+    GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
     EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0));
     EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1));
     EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
index 8e14f1ea75..934a57a5fb 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -27,7 +27,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-GPUcudaMallocAllocator::GPUcudaMallocAllocator(Allocator* allocator,
+GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
                                                CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -60,6 +60,14 @@ void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) {
 #endif  // GOOGLE_CUDA
 }
 
+void GPUcudaMallocAllocator::AddAllocVisitor(Visitor visitor) {
+  return base_allocator_->AddAllocVisitor(visitor);
+}
+
+void GPUcudaMallocAllocator::AddFreeVisitor(Visitor visitor) {
+  return base_allocator_->AddFreeVisitor(visitor);
+}
+
 bool GPUcudaMallocAllocator::TracksAllocationSizes() { return false; }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
index 3d1d0ef481..856fdc34b4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <memory>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
@@ -29,17 +29,20 @@ namespace tensorflow {
 // An allocator that wraps a GPU allocator and adds debugging
 // functionality that verifies that users do not write outside their
 // allocated memory.
-class GPUcudaMallocAllocator : public Allocator {
+class GPUcudaMallocAllocator : public VisitableAllocator {
  public:
-  explicit GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
+  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator,
+                                  CudaGpuId cuda_gpu_id);
   ~GPUcudaMallocAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
+  void AddAllocVisitor(Visitor visitor) override;
+  void AddFreeVisitor(Visitor visitor) override;
   bool TracksAllocationSizes() override;
 
  private:
-  Allocator* base_allocator_ = nullptr;  // owned
+  VisitableAllocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index 6bad66dcec..e4c834b30d 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@@ -73,7 +73,7 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
 // -----------------------------------------------------------------------------
 // GPUDebugAllocator
 // -----------------------------------------------------------------------------
-GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator,
+GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator,
                                      CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -111,6 +111,14 @@ void GPUDebugAllocator::DeallocateRaw(void* ptr) {
   base_allocator_->DeallocateRaw(ptr);
 }
 
+void GPUDebugAllocator::AddAllocVisitor(Visitor visitor) {
+  return base_allocator_->AddAllocVisitor(visitor);
+}
+
+void GPUDebugAllocator::AddFreeVisitor(Visitor visitor) {
+  return base_allocator_->AddFreeVisitor(visitor);
+}
+
 bool GPUDebugAllocator::TracksAllocationSizes() { return true; }
 
 size_t GPUDebugAllocator::RequestedSize(const void* ptr) {
@@ -150,7 +158,7 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
 // -----------------------------------------------------------------------------
 // GPUNanResetAllocator
 // -----------------------------------------------------------------------------
-GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator,
+GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator,
                                            CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -192,6 +200,14 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) {
   base_allocator_->DeallocateRaw(ptr);
 }
 
+void GPUNanResetAllocator::AddAllocVisitor(Visitor visitor) {
+  return base_allocator_->AddAllocVisitor(visitor);
+}
+
+void GPUNanResetAllocator::AddFreeVisitor(Visitor visitor) {
+  return base_allocator_->AddFreeVisitor(visitor);
+}
+
 size_t GPUNanResetAllocator::RequestedSize(const void* ptr) {
   return base_allocator_->RequestedSize(ptr);
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 0f27ff4384..0f9b72040c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <unordered_map>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
@@ -31,13 +31,16 @@ namespace tensorflow {
 // An allocator that wraps a GPU allocator and adds debugging
 // functionality that verifies that users do not write outside their
 // allocated memory.
-class GPUDebugAllocator : public Allocator {
+class GPUDebugAllocator : public VisitableAllocator {
  public:
-  explicit GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
+  explicit GPUDebugAllocator(VisitableAllocator* allocator,
+                             CudaGpuId cuda_gpu_id);
   ~GPUDebugAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
+  void AddAllocVisitor(Visitor visitor) override;
+  void AddFreeVisitor(Visitor visitor) override;
   bool TracksAllocationSizes() override;
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
@@ -50,7 +53,7 @@ class GPUDebugAllocator : public Allocator {
   bool CheckFooter(void* ptr);
 
  private:
-  Allocator* base_allocator_ = nullptr;  // owned
+  VisitableAllocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
@@ -60,20 +63,23 @@ class GPUDebugAllocator : public Allocator {
 // An allocator that wraps a GPU allocator and resets the memory on
 // allocation and free to 'NaN', helping to identify cases where the
 // user forgets to initialize the memory.
-class GPUNanResetAllocator : public Allocator {
+class GPUNanResetAllocator : public VisitableAllocator {
  public:
-  explicit GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
+  explicit GPUNanResetAllocator(VisitableAllocator* allocator,
+                                CudaGpuId cuda_gpu_id);
   ~GPUNanResetAllocator() override;
   string Name() override { return "gpu_nan_reset"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
+  void AddAllocVisitor(Visitor visitor) override;
+  void AddFreeVisitor(Visitor visitor) override;
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
   void GetStats(AllocatorStats* stats) override;
   void ClearStats() override;
 
  private:
-  Allocator* base_allocator_ = nullptr;  // owned
+  VisitableAllocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
index 98283cd846..236a0afa0b 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@@ -35,10 +35,7 @@ namespace {
 
 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
+  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
                       cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
@@ -62,10 +59,7 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
     EXPECT_DEATH(
         {
           const CudaGpuId cuda_gpu_id(0);
-          GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
-              cuda_gpu_id, false /*use_unified_memory*/, {}, {});
-          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
+          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
                               cuda_gpu_id);
           auto stream_exec =
               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -98,10 +92,7 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
     EXPECT_DEATH(
         {
           const CudaGpuId cuda_gpu_id(0);
-          GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
-              cuda_gpu_id, false /*use_unified_memory*/, {}, {});
-          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
+          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
                               cuda_gpu_id);
           auto stream_exec =
               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -131,10 +122,7 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
 
 TEST(GPUDebugAllocatorTest, ResetToNan) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUNanResetAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
+  GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
                          cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
@@ -175,11 +163,8 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
   const CudaGpuId cuda_gpu_id(0);
   // NaN reset must be the outer-most allocator.
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
+      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
                             cuda_gpu_id),
       cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -220,21 +205,15 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
 
 TEST(GPUDebugAllocatorTest, TracksSizes) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
-  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
+  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
                       cuda_gpu_id);
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-      false /*use_unified_memory*/, {}, {});
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
+      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
                             cuda_gpu_id),
       cuda_gpu_id);
   float* t1 = a.Allocate<float>(1);
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 50e61b7e00..2763ac0d4a 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -41,6 +41,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/gpu_device_context.h"
 #include "tensorflow/core/common_runtime/local_device.h"
+#include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -284,38 +285,6 @@ BaseGPUDevice::~BaseGPUDevice() {
   for (auto ctx : device_contexts_) ctx->Unref();
 }
 
-// This should be idempotent if already initialized.
-Status BaseGPUDevice::InitScratchBuffers() {
-  mutex_lock l(scratch_init_mutex_);
-  if (scratch_.size() < max_streams_) {
-    for (int i = 0; i < max_streams_; i++) {
-      DCHECK(streams_[i]);
-      if (scratch_.size() > i && scratch_[i]) continue;
-      size_t scratch_buffer_size =
-          Eigen::kCudaScratchSize + sizeof(unsigned int);
-      void* scratch_buffer = gpu_allocator_->AllocateRaw(
-          Allocator::kAllocatorAlignment, scratch_buffer_size);
-      if (scratch_buffer == nullptr) {
-        return errors::FailedPrecondition(
-            "Failed to allocate scratch buffer for device ",
-            tf_gpu_id_.value());
-      }
-      se::DeviceMemory<char> mem(
-          se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size));
-
-      bool ok = executor_->SynchronousMemZero(
-          &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
-      if (!ok) {
-        return errors::FailedPrecondition(
-            "Failed to memcopy into scratch buffer for device ",
-            tf_gpu_id_.value());
-      }
-      scratch_.push_back(static_cast<char*>(scratch_buffer));
-    }
-  }
-  return Status::OK();
-}
-
 Status BaseGPUDevice::Init(const SessionOptions& options) {
   auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_);
   if (!executor_status.status().ok()) {
@@ -334,6 +303,27 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   for (int i = 0; i < max_streams_; i++) {
     streams_.push_back(StreamGroupFactory::Global().GetOrCreate(
         tf_gpu_id_, i, executor_, options.config.gpu_options()));
+
+    size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int);
+    void* scratch_buffer = gpu_allocator_->AllocateRaw(
+        Allocator::kAllocatorAlignment, scratch_buffer_size);
+    if (scratch_buffer == nullptr) {
+      return errors::FailedPrecondition(
+          "Failed to allocate scratch buffer for device ", tf_gpu_id_.value());
+    }
+    scratch_.push_back(static_cast<char*>(scratch_buffer));
+
+    se::DeviceMemory<char> mem(
+        se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size));
+
+    bool ok = executor_->SynchronousMemZero(
+        &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
+    if (!ok) {
+      return errors::FailedPrecondition(
+          "Failed to memcopy into scratch buffer for device ",
+          tf_gpu_id_.value());
+    }
+
     device_contexts_.push_back(new GPUDeviceContext(
         i, streams_.back()->compute, streams_.back()->host_to_device,
         streams_.back()->device_to_host, streams_.back()->device_to_device));
@@ -877,11 +867,10 @@ PerOpGpuDevice* BaseGPUDevice::MakeGpuDevice() {
   return new ConcretePerOpGpuDevice();
 }
 
-Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
-                                            PerOpGpuDevice* device,
-                                            DeviceContext* dc,
-                                            Allocator* allocator) {
-  TF_RETURN_IF_ERROR(InitScratchBuffers());
+void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
+                                          PerOpGpuDevice* device,
+                                          DeviceContext* dc,
+                                          Allocator* allocator) {
   if (dc) {
     const GPUDeviceContext* gpu_dc = static_cast<GPUDeviceContext*>(dc);
     const int stream_id = gpu_dc->stream_id();
@@ -892,7 +881,6 @@ Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
   } else {
     ReinitializeDevice(context, device, 0, allocator);
   }
-  return Status::OK();
 }
 
 Allocator* BaseGPUDevice::GetScopedAllocator(AllocatorAttributes attr,
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index b3eea55758..56d03d7a8c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -86,9 +86,8 @@ class BaseGPUDevice : public LocalDevice {
   // The caller owns the returned device.
   PerOpGpuDevice* MakeGpuDevice() override;
 
-  Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
-                               DeviceContext* dc,
-                               Allocator* allocator) override;
+  void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
+                             DeviceContext* dc, Allocator* allocator) override;
 
   // Returns the CUDA GPU id of this device within the native driver system;
   // e.g., for CUDA this is the ordinal of the GPU within the system.
@@ -126,7 +125,6 @@ class BaseGPUDevice : public LocalDevice {
   class StreamGroupFactory;
 
   gtl::InlinedVector<StreamGroup*, 4> streams_;
-  mutex scratch_init_mutex_;
   gtl::InlinedVector<char*, 4> scratch_;
   std::vector<GPUDeviceContext*> device_contexts_;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
@@ -137,9 +135,6 @@ class BaseGPUDevice : public LocalDevice {
   std::unique_ptr<EventMgr> em_;
   std::unique_ptr<thread::ThreadPool> thread_pool_;
 
-  // Initialize scractch buffers used by Eigen.
-  Status InitScratchBuffers();
-
   void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device,
                           int stream_id, Allocator* allocator);
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
index 9ec740fabe..b18688174d 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
@@ -76,16 +76,12 @@ GPUProcessState::GPUProcessState() : gpu_device_enabled_(false) {
 // This function is defined for debugging problems with the allocators.
 GPUProcessState::~GPUProcessState() {
   CHECK_EQ(this, instance_);
+  for (auto p : gpu_allocators_) {
+    delete p;
+  }
   instance_ = nullptr;
 }
 
-int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) {
-  // Return the NUMA node associated with the GPU's StreamExecutor.
-  se::StreamExecutor* se =
-      GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
-  return se->GetDeviceDescription().numa_node();
-}
-
 Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
                                             TfGpuId tf_gpu_id,
                                             size_t total_bytes) {
@@ -97,10 +93,13 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
 
   if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
     gpu_allocators_.resize(tf_gpu_id.value() + 1);
+    if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
+      gpu_al_.resize(tf_gpu_id.value() + 1);
   }
 
-  AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()];
-  if (allocator_parts.allocator.get() == nullptr) {
+  if (gpu_allocators_[tf_gpu_id.value()] == nullptr) {
+    VisitableAllocator* gpu_allocator;
+
     // Validate allocator types.
     if (!allocator_type.empty() && allocator_type != "BFC") {
       LOG(ERROR) << "Invalid allocator type: " << allocator_type;
@@ -109,17 +108,8 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
 
     CudaGpuId cuda_gpu_id;
     TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
-    int bus_id = BusIdForGPU(tf_gpu_id);
-    while (bus_id >= gpu_visitors_.size()) {
-      gpu_visitors_.push_back({});
-    }
-    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
-        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
-        (options.per_process_gpu_memory_fraction() > 1.0 ||
-         options.experimental().use_unified_memory()),
-        gpu_visitors_[bus_id], {});
-    Allocator* gpu_allocator =
-        new GPUBFCAllocator(sub_allocator, total_bytes, options,
+    gpu_allocator =
+        new GPUBFCAllocator(cuda_gpu_id, total_bytes, options,
                             strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
 
     // If true, checks for memory overwrites by writing
@@ -133,25 +123,34 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
       // **WARNING** probably will not work in a multi-gpu scenario
       gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id);
     }
-
-    Allocator* recording_allocator = nullptr;
+    gpu_allocators_[tf_gpu_id.value()] = gpu_allocator;
+
+    // If there are any pending AllocVisitors for this bus, add
+    // them now.
+    se::StreamExecutor* se =
+        GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
+    int bus_id = se->GetDeviceDescription().numa_node();
+    if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
+      for (const auto& v : gpu_visitors_[bus_id]) {
+        gpu_allocator->AddAllocVisitor(v);
+      }
+    }
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
       ProcessState::MemDesc md;
       md.loc = ProcessState::MemDesc::GPU;
       md.dev_index = cuda_gpu_id.value();
       md.gpu_registered = false;
       md.nic_registered = true;
-      recording_allocator = new internal::RecordingAllocator(
+      if (static_cast<int64>(gpu_al_.size()) <= tf_gpu_id.value()) {
+        gpu_al_.resize(tf_gpu_id.value() + 1);
+      }
+      gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator(
           &process_state_->mem_desc_map_, gpu_allocator, md, &mu_);
     }
-    allocator_parts = {std::unique_ptr<Allocator>(gpu_allocator), sub_allocator,
-                       std::unique_ptr<Allocator>(recording_allocator)};
-  }
-  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
-    return allocator_parts.recording_allocator.get();
-  } else {
-    return allocator_parts.allocator.get();
   }
+  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
+    return gpu_al_[tf_gpu_id.value()];
+  return gpu_allocators_[tf_gpu_id.value()];
 #else
   LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda.";
   return nullptr;
@@ -173,12 +172,11 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
     tf_shared_lock lock(mu_);
 
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types &&
-        !cuda_host_allocators_.empty() &&
-        cuda_host_allocators_[0].recording_allocator != nullptr) {
-      return cuda_host_allocators_[0].recording_allocator.get();
+        static_cast<int>(cuda_al_.size()) > 0) {
+      return cuda_al_[0];
     }
     if (static_cast<int>(cuda_host_allocators_.size()) > numa_node) {
-      return cuda_host_allocators_[0].allocator.get();
+      return cuda_host_allocators_[0];
     }
   }
 
@@ -192,7 +190,7 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
   // it knows is valid.
   se::StreamExecutor* se = nullptr;
   for (int i = 0; i < static_cast<int>(gpu_allocators_.size()); ++i) {
-    if (gpu_allocators_[i].allocator != nullptr) {
+    if (gpu_allocators_[i] != nullptr) {
       se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
       break;
     }
@@ -201,15 +199,6 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
   CHECK_NE(nullptr, se);
 
   while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) {
-    while (cuda_host_alloc_visitors_.size() <= numa_node) {
-      cuda_host_alloc_visitors_.push_back({});
-    }
-    while (cuda_host_free_visitors_.size() <= numa_node) {
-      cuda_host_free_visitors_.push_back({});
-    }
-    SubAllocator* sub_allocator = new CUDAHostAllocator(
-        se, numa_node, cuda_host_alloc_visitors_[numa_node],
-        cuda_host_free_visitors_[numa_node]);
     // TODO(zheng-xq): evaluate whether 64GB by default is the best choice.
     int64 cuda_host_mem_limit_in_mb = -1;
     Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB",
@@ -219,92 +208,62 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
       LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message();
     }
     int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20);
-    Allocator* allocator =
-        new BFCAllocator(sub_allocator, cuda_host_mem_limit,
+    VisitableAllocator* allocator =
+        new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit,
                          true /*allow_growth*/, "cuda_host_bfc" /*name*/);
 
-    if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) {
+    if (LogMemory::IsEnabled()) {
       // Wrap the allocator to track allocation ids for better logging
       // at the cost of performance.
-      allocator = new TrackingAllocator(allocator, true);
+      allocator = new TrackingVisitableAllocator(allocator, true);
     }
-    cuda_host_allocators_.push_back({std::unique_ptr<Allocator>(allocator),
-                                     sub_allocator,
-                                     std::unique_ptr<Allocator>(nullptr)});
-    AllocatorParts& allocator_parts = cuda_host_allocators_.back();
+    cuda_host_allocators_.push_back(allocator);
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
       ProcessState::MemDesc md;
       md.loc = ProcessState::MemDesc::CPU;
       md.dev_index = 0;
       md.gpu_registered = true;
       md.nic_registered = false;
-      allocator_parts.recording_allocator.reset(
-          new internal::RecordingAllocator(&process_state_->mem_desc_map_,
-                                           allocator_parts.allocator.get(), md,
-                                           &mu_));
+      cuda_al_.push_back(new internal::RecordingAllocator(
+          &process_state_->mem_desc_map_, cuda_host_allocators_.back(), md,
+          &mu_));
     }
   }
-  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
-    return cuda_host_allocators_[0].recording_allocator.get();
-  } else {
-    return cuda_host_allocators_[0].allocator.get();
-  }
+  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
+    return cuda_al_[0];
+  return cuda_host_allocators_[0];
 }
 
 void GPUProcessState::AddGPUAllocVisitor(int bus_id,
-                                         const SubAllocator::Visitor& visitor) {
+                                         const AllocVisitor& visitor) {
+  CHECK(process_state_);
 #if GOOGLE_CUDA
   mutex_lock lock(mu_);
-  CHECK(gpu_allocators_.empty())  // Crash OK
-      << "AddGPUAllocVisitor must be called before "
-         "first call to GetGPUAllocator.";
+  for (int i = 0; i < static_cast<int64>(gpu_allocators_.size()); ++i) {
+    se::StreamExecutor* se =
+        GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
+    if (gpu_allocators_[i] &&
+        (se->GetDeviceDescription().numa_node() + 1) == bus_id) {
+      gpu_allocators_[i]->AddAllocVisitor(visitor);
+    }
+  }
   while (bus_id >= static_cast<int64>(gpu_visitors_.size())) {
-    gpu_visitors_.push_back(std::vector<SubAllocator::Visitor>());
+    gpu_visitors_.push_back(std::vector<AllocVisitor>());
   }
   gpu_visitors_[bus_id].push_back(visitor);
 #endif  // GOOGLE_CUDA
 }
 
-void GPUProcessState::AddCUDAHostAllocVisitor(
-    int numa_node, const SubAllocator::Visitor& visitor) {
-#if GOOGLE_CUDA
-  mutex_lock lock(mu_);
-  CHECK(cuda_host_allocators_.empty())  // Crash OK
-      << "AddCUDAHostAllocVisitor must be called before "
-         "first call to GetCUDAHostAllocator.";
-  while (numa_node >= static_cast<int64>(cuda_host_alloc_visitors_.size())) {
-    cuda_host_alloc_visitors_.push_back(std::vector<SubAllocator::Visitor>());
-  }
-  cuda_host_alloc_visitors_[numa_node].push_back(visitor);
-#endif  // GOOGLE_CUDA
-}
-
-void GPUProcessState::AddCUDAHostFreeVisitor(
-    int numa_node, const SubAllocator::Visitor& visitor) {
-#if GOOGLE_CUDA
-  mutex_lock lock(mu_);
-  CHECK(cuda_host_allocators_.empty())  // Crash OK
-      << "AddCUDAHostFreeVisitor must be called before "
-         "first call to GetCUDAHostAllocator.";
-  while (numa_node >= static_cast<int64>(cuda_host_free_visitors_.size())) {
-    cuda_host_free_visitors_.push_back(std::vector<SubAllocator::Visitor>());
-  }
-  cuda_host_free_visitors_[numa_node].push_back(visitor);
-#endif  // GOOGLE_CUDA
-}
-
 void GPUProcessState::TestOnlyReset() {
-  if (process_state_) {
-    process_state_->ProcessState::TestOnlyReset();
-  }
+  process_state_->ProcessState::TestOnlyReset();
   {
     mutex_lock lock(mu_);
     gpu_device_enabled_ = false;
-    gpu_allocators_.clear();
     gpu_visitors_.clear();
-    cuda_host_allocators_.clear();
-    cuda_host_alloc_visitors_.clear();
-    cuda_host_free_visitors_.clear();
+    gtl::STLDeleteElements(&gpu_allocators_);
+    gtl::STLDeleteElements(&cuda_host_allocators_);
+    gtl::STLDeleteElements(&gpu_al_);
+    gtl::STLDeleteElements(&cuda_al_);
   }
 }
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
index 43e9a31660..cb41c3c6bd 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
@@ -32,6 +32,7 @@ limitations under the License.
 namespace tensorflow {
 
 class Allocator;
+class VisitableAllocator;
 class PoolAllocator;
 
 // Singleton that manages per-process state when GPUs are present.
@@ -71,30 +72,18 @@ class GPUProcessState {
 
   virtual Allocator* GetCUDAHostAllocator(int numa_node);
 
-  // Registers a Visitor to be invoked on new chunks of memory allocated by the
-  // SubAllocator of every GPU proximate to the specified bus.  The AllocVisitor
-  // is provided with a memory pointer, a GPU id, and the size of the area it
-  // identifies.  The pointer is not guaranteed to be valid after the call
-  // terminates.  The intention is for this interface to be used for network
-  // device memory registration.  "bus_id" is platform-specific.  On many
-  // platforms it should be 0.  On machines with multiple PCIe buses, it should
-  // be the index of one of the PCIe buses (maybe the NUMA node at which the
-  // PCIe is rooted).  If the bus_id is invalid, results are undefined.
-  virtual void AddGPUAllocVisitor(int bus_id,
-                                  const SubAllocator::Visitor& visitor);
-
-  // Registers a Visitor to be invoked on new chunks of memory allocated by
-  // the SubAllocator of the CUDAHostAllocator for the given numa_node.
-  virtual void AddCUDAHostAllocVisitor(int numa_node,
-                                       const SubAllocator::Visitor& visitor);
-
-  // Registers a Visitor to be invoked on each chunk handed back for freeing to
-  // the SubAllocator of the CUDAHostAllocator for the given numa_node.
-  virtual void AddCUDAHostFreeVisitor(int numa_node,
-                                      const SubAllocator::Visitor& visitor);
-
-  // Returns bus_id for the given GPU id.
-  virtual int BusIdForGPU(TfGpuId tf_gpu_id);
+  // Registers a function to be called once on every new Region
+  // allocated by every GPURegionAllocator proximate to the specified
+  // bus.  The AllocVisitor is provided with a memory pointer and the
+  // size of the area it identifies.  The pointer is not guaranteed to
+  // be valid after the call terminates.  The intention is for this
+  // interface to be used for network device memory registration.
+  // "bus_id" is platform-specific.  On many platforms it
+  // should be 0.  On machines with multiple PCIe buses, it should be
+  // the index of one of the PCIe buses.  If the bus_id is invalid,
+  // results are undefined.
+  typedef std::function<void(void*, size_t)> AllocVisitor;
+  virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor);
 
  protected:
   GPUProcessState();
@@ -114,22 +103,17 @@ class GPUProcessState {
 
   mutex mu_;
 
-  struct AllocatorParts {
-    std::unique_ptr<Allocator> allocator;
-    SubAllocator* sub_allocator;  // owned by allocator
-    std::unique_ptr<Allocator> recording_allocator;
-  };
-  std::vector<AllocatorParts> gpu_allocators_ GUARDED_BY(mu_);
-  std::vector<std::vector<SubAllocator::Visitor>> gpu_visitors_ GUARDED_BY(mu_);
-
-  std::vector<AllocatorParts> cuda_host_allocators_ GUARDED_BY(mu_);
-  std::vector<std::vector<SubAllocator::Visitor>> cuda_host_alloc_visitors_
-      GUARDED_BY(mu_);
-  std::vector<std::vector<SubAllocator::Visitor>> cuda_host_free_visitors_
-      GUARDED_BY(mu_);
+  std::vector<VisitableAllocator*> gpu_allocators_ GUARDED_BY(mu_);
+  std::vector<std::vector<AllocVisitor>> gpu_visitors_ GUARDED_BY(mu_);
+  std::vector<Allocator*> cuda_host_allocators_ GUARDED_BY(mu_);
 
   virtual ~GPUProcessState();
 
+  // Optional RecordingAllocators that wrap the corresponding
+  // Allocators for runtime attribute use analysis.
+  std::vector<Allocator*> gpu_al_ GUARDED_BY(mu_);
+  std::vector<Allocator*> cuda_al_ GUARDED_BY(mu_);
+
   friend class GPUDeviceTest;
 };
 
diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
index 6b2f6547b0..583bff2c07 100644
--- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
@@ -31,8 +31,7 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) {
       2 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie(),
-          0 /*numa_node*/, {}, {}),
+              .ValueOrDie()),
       new NoopRounder, "pool");
 
   EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
@@ -50,8 +49,7 @@ TEST(PoolAllocatorTest, ZeroSizePool) {
       0 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie(),
-          0 /*numa_node*/, {}, {}),
+              .ValueOrDie()),
       new NoopRounder, "pool");
 
   EXPECT_EQ(0, pool.get_from_pool_count());
@@ -84,8 +82,7 @@ TEST(PoolAllocatorTest, Alignment) {
       0 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie(),
-          0 /*numa_node*/, {}, {}),
+              .ValueOrDie()),
       new NoopRounder, "pool");
   for (int i = 0; i < 16; ++i) {
     size_t alignment = 1 << i;
@@ -100,8 +97,8 @@ TEST(PoolAllocatorTest, Alignment) {
 
 TEST(PoolAllocatorTest, AutoResize) {
   PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
-                     new BasicCPUAllocator(0 /*numa_node*/, {}, {}),
-                     new NoopRounder, "pool");
+                     new BasicCPUAllocator(0 /*numa_node*/), new NoopRounder,
+                     "pool");
 
   // Alloc/dealloc 10 sizes just a few times, confirming pool size
   // stays at 2.
@@ -126,32 +123,14 @@ TEST(PoolAllocatorTest, AutoResize) {
 }
 
 TEST(PoolAllocatorTest, CudaHostAllocator) {
-  int alloc_count = 0;
-  int64 alloc_size = 0;
-  SubAllocator::Visitor alloc_visitor =
-      [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) {
-        ++alloc_count;
-        alloc_size += size;
-      };
-  int free_count = 0;
-  int64 free_size = 0;
-  SubAllocator::Visitor free_visitor =
-      [&free_count, &free_size](void* ptr, int numa_node, int64 size) {
-        ++free_count;
-        free_size += size;
-      };
   se::Platform* platform =
       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
-  CUDAHostAllocator* sub_allocator = new CUDAHostAllocator(
-      platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-          .ValueOrDie(),
-      0 /*numa_node*/, {alloc_visitor}, {free_visitor});
-  PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/,
-                     sub_allocator, new NoopRounder, "pool");
-  EXPECT_EQ(0, alloc_count);
-  EXPECT_EQ(0, alloc_size);
-  EXPECT_EQ(0, free_count);
-  EXPECT_EQ(0, free_size);
+  PoolAllocator pool(
+      2 /*pool_size_limit*/, false /*auto_resize*/,
+      new CUDAHostAllocator(
+          platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
+              .ValueOrDie()),
+      new NoopRounder, "pool");
 
   // Repeatedly Get a 16-byte value, confirming that there's only
   // one real allocation.
@@ -159,10 +138,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   EXPECT_EQ(0, pool.get_from_pool_count());
   EXPECT_EQ(1, pool.allocated_count());
   EXPECT_NE(nullptr, p1_16);
-  EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
-  // Each suballocation includes a 16B ChunkPrefix.
-  static const int kChunkPrefixSize = 16;
-  EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
   pool.DeallocateRaw(p1_16);
   // Pool contents {16}
   EXPECT_EQ(1, pool.put_count());
@@ -173,9 +148,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   pool.DeallocateRaw(p2_16);  // Put it back.
   // Pool contents {16}
   EXPECT_EQ(2, pool.put_count());
-  EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
-  EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
-  EXPECT_EQ(0, free_count);
 
   // Get two more values of different sizes.
   void* p3_4 = pool.AllocateRaw(4, 4);
@@ -188,9 +160,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   void* p4_2 = pool.AllocateRaw(4, 2);  // Get a third size buffer.
   EXPECT_NE(nullptr, p4_2);
   EXPECT_EQ(0, pool.evicted_count());
-  EXPECT_EQ(3, alloc_count);
-  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
-  EXPECT_EQ(0, free_count);
 
   // The pool is full: when we put back p4_2, the 16-byte buffer
   // should be evicted since it was least recently inserted.
@@ -198,10 +167,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   // Pool contents {2, 4}
   EXPECT_EQ(4, pool.put_count());
   EXPECT_EQ(1, pool.evicted_count());
-  EXPECT_EQ(3, alloc_count);
-  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
-  EXPECT_EQ(1, free_count);
-  EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
 
   // Re-getting and putting size 2 or 4 should not alter pool size or
   // num-evicted.
@@ -215,20 +180,12 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   EXPECT_EQ(6, pool.put_count());
   EXPECT_EQ(3, pool.allocated_count());
   EXPECT_EQ(1, pool.evicted_count());
-  EXPECT_EQ(3, alloc_count);
-  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
-  EXPECT_EQ(1, free_count);
-  EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
 
   pool.Clear();
   EXPECT_EQ(0, pool.get_from_pool_count());
   EXPECT_EQ(0, pool.put_count());
   EXPECT_EQ(0, pool.allocated_count());
   EXPECT_EQ(0, pool.evicted_count());
-  EXPECT_EQ(3, alloc_count);
-  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
-  EXPECT_EQ(3, free_count);
-  EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size);
 }
 
 TEST(PoolAllocatorTest, Pow2Rounder) {
@@ -249,8 +206,7 @@ TEST(PoolAllocatorTest, Name) {
       2 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie(),
-          0 /*numa_node*/, {}, {}),
+              .ValueOrDie()),
       new NoopRounder, "pool");
   EXPECT_EQ("pool", pool.Name());
 }
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 538a70668a..df9c3a686c 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -23,11 +23,12 @@ limitations under the License.
 
 #include <cstdlib>
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
-#include "tensorflow/core/common_runtime/pool_allocator.h"
+#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/platform/numa.h"
+#include "tensorflow/core/platform/mutex.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
 #include "i_malloc.h"
@@ -39,16 +40,20 @@ typedef unsigned int uint;
 
 namespace tensorflow {
 
-class MklSubAllocator : public BasicCPUAllocator {
+class MklSubAllocator : public SubAllocator {
  public:
-  MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {}
   ~MklSubAllocator() override {}
+
+  void* Alloc(size_t alignment, size_t num_bytes) override {
+    return port::AlignedMalloc(num_bytes, alignment);
+  }
+  void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); }
 };
 
 // CPU allocator that handles small-size allocations by calling
 // suballocator directly. Mostly, it is just a wrapper around a suballocator
 // (that calls malloc and free directly) with support for bookkeeping.
-class MklSmallSizeAllocator : public Allocator {
+class MklSmallSizeAllocator : public VisitableAllocator {
  public:
   MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory,
                         const string& name)
@@ -70,6 +75,10 @@ class MklSmallSizeAllocator : public Allocator {
       CHECK(map_.insert(map_val).second);
       // Increment statistics for small-size allocations.
       IncrementStats(num_bytes);
+      // Call alloc visitors.
+      for (const auto& visitor : alloc_visitors_) {
+        visitor(ptr, num_bytes);
+      }
     }
     return ptr;
   }
@@ -85,6 +94,9 @@ class MklSmallSizeAllocator : public Allocator {
     if (map_iter != map_.end()) {
       // Call free visitors.
       size_t dealloc_bytes = map_iter->second;
+      for (const auto& visitor : free_visitors_) {
+        visitor(ptr, dealloc_bytes);
+      }
       sub_allocator_->Free(ptr, dealloc_bytes);
       DecrementStats(dealloc_bytes);
       map_.erase(map_iter);
@@ -109,6 +121,16 @@ class MklSmallSizeAllocator : public Allocator {
     stats_.Clear();
   }
 
+  void AddAllocVisitor(Visitor visitor) override {
+    mutex_lock l(mutex_);
+    alloc_visitors_.push_back(visitor);
+  }
+
+  void AddFreeVisitor(Visitor visitor) override {
+    mutex_lock l(mutex_);
+    free_visitors_.push_back(visitor);
+  }
+
  private:
   // Increment statistics for the allocator handling small allocations.
   inline void IncrementStats(size_t alloc_size)
@@ -141,11 +163,15 @@ class MklSmallSizeAllocator : public Allocator {
 
   // Allocator stats for small allocs
   AllocatorStats stats_ GUARDED_BY(mutex_);
+
+  // Visitors
+  std::vector<Visitor> alloc_visitors_ GUARDED_BY(mutex_);
+  std::vector<Visitor> free_visitors_ GUARDED_BY(mutex_);
 };
 
 /// CPU allocator for MKL that wraps BFC allocator and intercepts
 /// and redirects memory allocation calls from MKL.
-class MklCPUAllocator : public Allocator {
+class MklCPUAllocator : public VisitableAllocator {
  public:
   // Constructor and other standard functions
 
@@ -258,6 +284,16 @@ class MklCPUAllocator : public Allocator {
     large_size_allocator_->ClearStats();
   }
 
+  void AddAllocVisitor(Visitor visitor) override {
+    small_size_allocator_->AddAllocVisitor(visitor);
+    large_size_allocator_->AddAllocVisitor(visitor);
+  }
+
+  void AddFreeVisitor(Visitor visitor) override {
+    small_size_allocator_->AddFreeVisitor(visitor);
+    large_size_allocator_->AddFreeVisitor(visitor);
+  }
+
  private:
   // Hooks provided by this allocator for memory allocation routines from MKL
 
@@ -294,7 +330,7 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
-  Allocator* large_size_allocator_;              // owned by this class
+  VisitableAllocator* large_size_allocator_;     // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
   SubAllocator* sub_allocator_;  // not owned by this class
diff --git a/tensorflow/core/common_runtime/pool_allocator.cc b/tensorflow/core/common_runtime/pool_allocator.cc
index 66dc8f3322..fdad8de8d6 100644
--- a/tensorflow/core/common_runtime/pool_allocator.cc
+++ b/tensorflow/core/common_runtime/pool_allocator.cc
@@ -40,7 +40,8 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
       auto_resize_(auto_resize),
       pool_size_limit_(pool_size_limit),
       allocator_(allocator),
-      size_rounder_(size_rounder) {
+      size_rounder_(size_rounder),
+      allocation_begun_(false) {
   if (auto_resize) {
     CHECK_LT(size_t{0}, pool_size_limit)
         << "size limit must be > 0 if auto_resize is true.";
@@ -92,6 +93,7 @@ ChunkPrefix* FindPrefix(void* user_ptr) {
 }  // namespace
 
 void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
+  if (!allocation_begun_) allocation_begun_ = true;
   if (num_bytes == 0) return nullptr;
 
   // If alignment is larger than kPoolAlignment, increase num_bytes so that we
@@ -127,6 +129,9 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
     return PrepareChunk(r, alignment, num_bytes);
   } else {
     void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes);
+    for (const auto& v : alloc_visitors_) {
+      v(ptr, num_bytes);
+    }
     return PrepareChunk(ptr, alignment, num_bytes);
   }
 }
@@ -136,6 +141,9 @@ void PoolAllocator::DeallocateRaw(void* ptr) {
   ChunkPrefix* cp = FindPrefix(ptr);
   CHECK_LE((void*)cp, (void*)ptr);
   if (!has_size_limit_ && !auto_resize_) {
+    for (const auto& v : free_visitors_) {
+      v(cp, cp->num_bytes);
+    }
     allocator_->Free(cp, cp->num_bytes);
   } else {
     mutex_lock lock(mutex_);
@@ -156,6 +164,9 @@ void PoolAllocator::Clear() {
     mutex_lock lock(mutex_);
     for (auto iter : pool_) {
       PtrRecord* pr = iter.second;
+      for (const auto& v : free_visitors_) {
+        v(pr->ptr, pr->num_bytes);
+      }
       allocator_->Free(pr->ptr, pr->num_bytes);
       delete pr;
     }
@@ -210,6 +221,9 @@ void PoolAllocator::EvictOne() {
     DCHECK(iter != pool_.end());
   }
   pool_.erase(iter);
+  for (const auto& v : free_visitors_) {
+    v(prec->ptr, prec->num_bytes);
+  }
   allocator_->Free(prec->ptr, prec->num_bytes);
   delete prec;
   ++evicted_count_;
@@ -255,19 +269,28 @@ void PoolAllocator::EvictOne() {
   }
 }
 
+void PoolAllocator::AddAllocVisitor(Visitor visitor) {
+  mutex_lock lock(mutex_);
+  CHECK(!allocation_begun_)
+      << "AddAllocVisitor may not be called after pool allocation "
+      << "has begun.";
+  alloc_visitors_.push_back(visitor);
+}
+
+void PoolAllocator::AddFreeVisitor(Visitor visitor) {
+  mutex_lock lock(mutex_);
+  CHECK(!allocation_begun_)
+      << "AddFreeVisitor may not be called after pool allocation "
+      << "has begun.";
+  free_visitors_.push_back(visitor);
+}
+
 void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) {
-  void* ptr = nullptr;
-  if (num_bytes > 0) {
-    ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
-    VisitAlloc(ptr, numa_node_, num_bytes);
-  }
-  return ptr;
+  return port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
 }
 
 void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) {
-  if (num_bytes > 0) {
-    VisitFree(ptr, numa_node_, num_bytes);
-    port::AlignedFree(ptr);
-  }
+  port::AlignedFree(ptr);
 }
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h
index 5b4623ba10..607734445b 100644
--- a/tensorflow/core/common_runtime/pool_allocator.h
+++ b/tensorflow/core/common_runtime/pool_allocator.h
@@ -16,13 +16,14 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
 
-// Simple LRU pool allocators for various flavors of CPU RAM.
+// Simple LRU pool allocators for various flavors of CPU RAM that
+// implement the VisitableAllocator interface.
 
 #include <atomic>
 #include <map>
 #include <memory>
 #include <vector>
-#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -40,7 +41,7 @@ class RoundUpInterface {
 
 // Size-limited pool of memory buffers obtained from a SubAllocator
 // instance.  Pool eviction policy is LRU.
-class PoolAllocator : public Allocator {
+class PoolAllocator : public VisitableAllocator {
  public:
   // "pool_size_limit" is the maximum number of returned, re-usable
   // memory buffers to keep in the pool.  If pool_size_limit == 0, the
@@ -63,6 +64,14 @@ class PoolAllocator : public Allocator {
 
   void DeallocateRaw(void* ptr) override;
 
+  // REQUIRES: The following functions may only be called prior
+  // to the first Allocate*() call.  Once allocation has begun, it is
+  // illegal to register another visitor.
+
+  void AddAllocVisitor(Visitor visitor) override;
+
+  void AddFreeVisitor(Visitor visitor) override;
+
   // Allocate an unused memory region of size "num_bytes".  Fetch from
   // the pool if available, otherwise call allocator_.
   void* Get(size_t num_bytes);
@@ -132,6 +141,12 @@ class PoolAllocator : public Allocator {
   int64 put_count_ GUARDED_BY(mutex_) = 0;
   int64 allocated_count_ GUARDED_BY(mutex_) = 0;
   int64 evicted_count_ GUARDED_BY(mutex_) = 0;
+  // Write access to these is guarded by mutex_, but not read
+  // access. They may only be modified prior to the first
+  // allocation.  Later attempts to modify will fail.
+  std::vector<Visitor> alloc_visitors_;
+  std::vector<Visitor> free_visitors_;
+  std::atomic<bool> allocation_begun_;
 };
 
 // Do-nothing rounder. Passes through sizes unchanged.
@@ -151,9 +166,7 @@ class Pow2Rounder : public RoundUpInterface {
 class BasicCPUAllocator : public SubAllocator {
  public:
   // Argument numa_node is currently ignored.
-  BasicCPUAllocator(int numa_node, const std::vector<Visitor>& alloc_visitors,
-                    const std::vector<Visitor>& free_visitors)
-      : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {}
+  explicit BasicCPUAllocator(int numa_node) : numa_node_(numa_node) {}
 
   ~BasicCPUAllocator() override {}
 
@@ -163,8 +176,6 @@ class BasicCPUAllocator : public SubAllocator {
 
  private:
   int numa_node_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc
index bcaa37fc8a..447338e7bd 100644
--- a/tensorflow/core/common_runtime/process_state.cc
+++ b/tensorflow/core/common_runtime/process_state.cc
@@ -71,28 +71,20 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) {
   return MemDesc();
 }
 
-Allocator* ProcessState::GetCPUAllocator(int numa_node) {
+VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) {
   CHECK_GE(numa_node, 0);
   if (!numa_enabled_) numa_node = 0;
   mutex_lock lock(mu_);
   while (cpu_allocators_.size() <= static_cast<size_t>(numa_node)) {
-    // If visitors have been defined we need an Allocator built from
-    // a SubAllocator.  Prefer BFCAllocator, but fall back to PoolAllocator
-    // depending on env var setting.
-    const bool alloc_visitors_defined =
-        (!cpu_alloc_visitors_.empty() || !cpu_free_visitors_.empty());
     bool use_bfc_allocator = false;
-    Status status = ReadBoolFromEnvVar(
-        "TF_CPU_ALLOCATOR_USE_BFC", alloc_visitors_defined, &use_bfc_allocator);
+    // TODO(reedwm): Switch default to BGFAllocator if it's at least as fast and
+    // efficient.
+    Status status = ReadBoolFromEnvVar("TF_CPU_ALLOCATOR_USE_BFC", false,
+                                       &use_bfc_allocator);
     if (!status.ok()) {
       LOG(ERROR) << "GetCPUAllocator: " << status.error_message();
     }
-    Allocator* allocator = nullptr;
-    SubAllocator* sub_allocator =
-        (alloc_visitors_defined || use_bfc_allocator)
-            ? new BasicCPUAllocator(numa_enabled_ ? numa_node : -1,
-                                    cpu_alloc_visitors_, cpu_free_visitors_)
-            : nullptr;
+    VisitableAllocator* allocator;
     if (use_bfc_allocator) {
       // TODO(reedwm): evaluate whether 64GB by default is the best choice.
       int64 cpu_mem_limit_in_mb = -1;
@@ -103,63 +95,34 @@ Allocator* ProcessState::GetCPUAllocator(int numa_node) {
         LOG(ERROR) << "GetCPUAllocator: " << status.error_message();
       }
       int64 cpu_mem_limit = cpu_mem_limit_in_mb * (1LL << 20);
-      DCHECK(sub_allocator);
-      allocator =
-          new BFCAllocator(sub_allocator, cpu_mem_limit, true /*allow_growth*/,
-                           "bfc_cpu_allocator_for_gpu" /*name*/);
+      allocator = new BFCAllocator(
+          new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), cpu_mem_limit,
+          true /*allow_growth*/, "bfc_cpu_allocator_for_gpu" /*name*/);
       VLOG(2) << "Using BFCAllocator with memory limit of "
               << cpu_mem_limit_in_mb << " MB for ProcessState CPU allocator";
-    } else if (alloc_visitors_defined) {
-      DCHECK(sub_allocator);
-      allocator =
-          new PoolAllocator(100 /*pool_size_limit*/, true /*auto_resize*/,
-                            sub_allocator, new NoopRounder, "cpu_pool");
+    } else {
+      allocator = new PoolAllocator(
+          100 /*pool_size_limit*/, true /*auto_resize*/,
+          new BasicCPUAllocator(numa_enabled_ ? numa_node : -1),
+          new NoopRounder, "cpu_pool");
       VLOG(2) << "Using PoolAllocator for ProcessState CPU allocator "
               << "numa_enabled_=" << numa_enabled_
               << " numa_node=" << numa_node;
-    } else {
-      DCHECK(!sub_allocator);
-      allocator = cpu_allocator();
     }
-    if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) {
+    if (LogMemory::IsEnabled()) {
       // Wrap the allocator to track allocation ids for better logging
       // at the cost of performance.
-      allocator = new TrackingAllocator(allocator, true);
+      allocator = new TrackingVisitableAllocator(allocator, true);
     }
     cpu_allocators_.push_back(allocator);
-    if (!sub_allocator) {
-      DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty());
-    }
   }
   return cpu_allocators_[numa_node];
 }
 
-void ProcessState::AddCPUAllocVisitor(SubAllocator::Visitor visitor) {
-  VLOG(1) << "AddCPUAllocVisitor";
-  mutex_lock lock(mu_);
-  CHECK_EQ(0, cpu_allocators_.size())  // Crash OK
-      << "AddCPUAllocVisitor must be called prior to first call to "
-         "ProcessState::GetCPUAllocator";
-  cpu_alloc_visitors_.push_back(std::move(visitor));
-}
-
-void ProcessState::AddCPUFreeVisitor(SubAllocator::Visitor visitor) {
-  mutex_lock lock(mu_);
-  CHECK_EQ(0, cpu_allocators_.size())  // Crash OK
-      << "AddCPUFreeVisitor must be called prior to first call to "
-         "ProcessState::GetCPUAllocator";
-  cpu_free_visitors_.push_back(std::move(visitor));
-}
-
 void ProcessState::TestOnlyReset() {
   mutex_lock lock(mu_);
-  // Don't delete this value because it's static.
-  Allocator* default_cpu_allocator = cpu_allocator();
   mem_desc_map_.clear();
-  for (Allocator* a : cpu_allocators_) {
-    if (a != default_cpu_allocator) delete a;
-  }
-  cpu_allocators_.clear();
+  gtl::STLDeleteElements(&cpu_allocators_);
   gtl::STLDeleteElements(&cpu_al_);
 }
 
diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h
index cac312d849..2892677333 100644
--- a/tensorflow/core/common_runtime/process_state.h
+++ b/tensorflow/core/common_runtime/process_state.h
@@ -30,6 +30,7 @@ limitations under the License.
 namespace tensorflow {
 
 class Allocator;
+class VisitableAllocator;
 class PoolAllocator;
 
 // Singleton that manages per-process state, e.g. allocation of
@@ -64,15 +65,7 @@ class ProcessState {
 
   // Returns the one CPUAllocator used for the given numa_node.
   // TEMPORARY: ignores numa_node.
-  Allocator* GetCPUAllocator(int numa_node);
-
-  // Registers alloc visitor for the CPU allocator(s).
-  // REQUIRES: must be called before GetCPUAllocator.
-  void AddCPUAllocVisitor(SubAllocator::Visitor v);
-
-  // Registers free visitor for the CPU allocator(s).
-  // REQUIRES: must be called before GetCPUAllocator.
-  void AddCPUFreeVisitor(SubAllocator::Visitor v);
+  VisitableAllocator* GetCPUAllocator(int numa_node);
 
   typedef std::unordered_map<const void*, MemDesc> MDMap;
 
@@ -94,9 +87,7 @@ class ProcessState {
 
   mutex mu_;
 
-  std::vector<Allocator*> cpu_allocators_ GUARDED_BY(mu_);
-  std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ GUARDED_BY(mu_);
-  std::vector<SubAllocator::Visitor> cpu_free_visitors_ GUARDED_BY(mu_);
+  std::vector<VisitableAllocator*> cpu_allocators_ GUARDED_BY(mu_);
 
   virtual ~ProcessState();
 
diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h
index 9d59264899..103eee03b3 100644
--- a/tensorflow/core/common_runtime/renamed_device.h
+++ b/tensorflow/core/common_runtime/renamed_device.h
@@ -72,10 +72,9 @@ class RenamedDevice : public Device {
     return underlying_->MakeGpuDevice();
   }
 
-  Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
-                               DeviceContext* dc,
-                               Allocator* allocator) override {
-    return underlying_->ReinitializeGpuDevice(context, device, dc, allocator);
+  void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
+                             DeviceContext* dc, Allocator* allocator) override {
+    underlying_->ReinitializeGpuDevice(context, device, dc, allocator);
   }
 
   Status MakeTensorFromProto(const TensorProto& tensor_proto,
diff --git a/tensorflow/core/common_runtime/visitable_allocator.h b/tensorflow/core/common_runtime/visitable_allocator.h
new file mode 100644
index 0000000000..ae0563a96a
--- /dev/null
+++ b/tensorflow/core/common_runtime/visitable_allocator.h
@@ -0,0 +1,79 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
+
+#include <functional>
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/tracking_allocator.h"
+
+namespace tensorflow {
+
+// Subclass VisitableAllocator instead of Allocator when a memory
+// allocator needs to enable some kind of registration/deregistration
+// of memory areas.
+class VisitableAllocator : public Allocator {
+ public:
+  // Visitor gets called with a pointer to a memory area and its
+  // size in bytes.
+  typedef std::function<void(void*, size_t)> Visitor;
+
+  // Register a visitor guaranteed to be called exactly once on each
+  // chunk of memory newly allocated from the underlying device.
+  // Typically, chunks will be reused and possibly sub-divided by a
+  // pool manager, so the calls will happen only once per process
+  // execution, not once per tensor (re)allocation.
+  virtual void AddAllocVisitor(Visitor visitor) = 0;
+
+  // Register a visitor guaranteed to be called on each chunk of
+  // memory returned to the underlying device.
+  virtual void AddFreeVisitor(Visitor visitor) = 0;
+};
+
+// Needed for cases when a VisitableAllocator gets wrapped for tracking.
+// Multiple-inheritance is considered acceptable in this case because
+// VisitableAllocator is a pure virtual interface and only TrackingAllocator
+// has default implementation.
+class TrackingVisitableAllocator : public TrackingAllocator,
+                                   public VisitableAllocator {
+ public:
+  TrackingVisitableAllocator(VisitableAllocator* allocator, bool track_ids)
+      : TrackingAllocator(allocator, track_ids), allocator_(allocator) {}
+  ~TrackingVisitableAllocator() override {}
+
+  string Name() override { return TrackingAllocator::Name(); }
+
+  void* AllocateRaw(size_t alignment, size_t num_bytes) override {
+    return TrackingAllocator::AllocateRaw(alignment, num_bytes);
+  }
+
+  void DeallocateRaw(void* ptr) override {
+    TrackingAllocator::DeallocateRaw(ptr);
+  }
+
+  void AddAllocVisitor(Visitor visitor) override {
+    allocator_->AddAllocVisitor(visitor);
+  }
+
+  void AddFreeVisitor(Visitor visitor) override {
+    allocator_->AddFreeVisitor(visitor);
+  }
+
+ protected:
+  VisitableAllocator* allocator_;
+};
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc
index 84cee5569c..2a7ee16a16 100644
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc
@@ -196,7 +196,7 @@ class CPUAllocatorFactory : public AllocatorFactory {
   class CPUSubAllocator : public SubAllocator {
    public:
     explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
-        : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
+        : cpu_allocator_(cpu_allocator) {}
 
     void* Alloc(size_t alignment, size_t num_bytes) override {
       return cpu_allocator_->AllocateRaw(alignment, num_bytes);
@@ -222,22 +222,4 @@ Allocator* cpu_allocator() {
   }
   return cpu_alloc;
 }
-
-SubAllocator::SubAllocator(const std::vector<Visitor>& alloc_visitors,
-                           const std::vector<Visitor>& free_visitors)
-    : alloc_visitors_(alloc_visitors), free_visitors_(free_visitors) {}
-
-void SubAllocator::VisitAlloc(void* ptr, int index, size_t num_bytes) {
-  for (const auto& v : alloc_visitors_) {
-    v(ptr, index, num_bytes);
-  }
-}
-
-void SubAllocator::VisitFree(void* ptr, int index, size_t num_bytes) {
-  // Although we don't guarantee any order of visitor application, strive
-  // to apply free visitors in reverse order of alloc visitors.
-  for (int i = free_visitors_.size() - 1; i >= 0; --i) {
-    free_visitors_[i](ptr, index, num_bytes);
-  }
-}
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 8c23604625..ded120b704 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -24,7 +24,6 @@ limitations under the License.
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -388,36 +387,13 @@ void EnableCPUAllocatorStats(bool enable);
 // full statistics. By default, it's disabled.
 void EnableCPUAllocatorFullStats(bool enable);
 
-// An object that does the underlying suballoc/free of memory for a higher-level
-// allocator.  The expectation is that the higher-level allocator is doing some
-// kind of cache or pool management so that it will call SubAllocator::Alloc and
-// Free relatively infrequently, compared to the number of times its own
-// AllocateRaw and Free methods are called.
+// Abstract interface of an object that does the underlying suballoc/free of
+// memory for a higher-level allocator.
 class SubAllocator {
  public:
-  // Visitor gets called with a pointer to a memory area and its
-  // size in bytes.  The index value will be numa_node for a CPU
-  // allocator and GPU id for a GPU allocator.
-  typedef std::function<void(void*, int index, size_t)> Visitor;
-
-  SubAllocator(const std::vector<Visitor>& alloc_visitors,
-               const std::vector<Visitor>& free_visitors);
-
   virtual ~SubAllocator() {}
   virtual void* Alloc(size_t alignment, size_t num_bytes) = 0;
   virtual void Free(void* ptr, size_t num_bytes) = 0;
-
- protected:
-  // Implementation of Alloc() method must call this on newly allocated
-  // value.
-  void VisitAlloc(void* ptr, int index, size_t num_bytes);
-
-  // Implementation of Free() method must call this on value to be
-  // freed immediately before deallocation.
-  void VisitFree(void* ptr, int index, size_t num_bytes);
-
-  const std::vector<Visitor> alloc_visitors_;
-  const std::vector<Visitor> free_visitors_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 53ac639b4c..794250a2c1 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -214,12 +214,10 @@ class DeviceBase {
 
   // This is overridden by GPU devices to reinitialize the derived
   // type returned by MakeGpuDevice.
-  virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/,
-                                       PerOpGpuDevice* /*device*/,
-                                       DeviceContext* /*dc*/,
-                                       Allocator* /*allocator*/) {
-    return Status::OK();
-  }
+  virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/,
+                                     PerOpGpuDevice* /*device*/,
+                                     DeviceContext* /*dc*/,
+                                     Allocator* /*allocator*/) {}
 
   // Unimplemented by default
   virtual const DeviceAttributes& attributes() const;
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 3e34bf0418..80f2b12987 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -265,12 +265,9 @@ OpKernelContext::OpKernelContext(Params* params, int num_outputs)
   params_->ensure_eigen_gpu_device();
   if (params_->eigen_gpu_device != nullptr) {
     Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes());
-    Status s = params_->device->ReinitializeGpuDevice(
-        this, params_->eigen_gpu_device, params_->op_device_context,
-        eigen_gpu_allocator);
-    if (!s.ok()) {
-      SetStatus(s);
-    }
+    params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device,
+                                           params_->op_device_context,
+                                           eigen_gpu_allocator);
   }
   if (params_->record_tensor_accesses) {
     referenced_tensors_.Init();
-- 
GitLab


From 5d51afdfec8c6a96d48457d4678e2835100577a6 Mon Sep 17 00:00:00 2001
From: Bairen Yi <byi@connect.ust.hk>
Date: Tue, 18 Sep 2018 13:17:07 +0800
Subject: [PATCH 0319/1357] Support scoped_allocator_ops for renamed device.

This fixes #22274.

Signed-off-by: Bairen Yi <byi@connect.ust.hk>
---
 tensorflow/core/common_runtime/renamed_device.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h
index 103eee03b3..caf1300d85 100644
--- a/tensorflow/core/common_runtime/renamed_device.h
+++ b/tensorflow/core/common_runtime/renamed_device.h
@@ -58,6 +58,15 @@ class RenamedDevice : public Device {
     return underlying_->GetAllocator(attr);
   }
 
+  Allocator* GetScopedAllocator(AllocatorAttributes attr,
+                                int64 step_id) override {
+    return underlying_->GetScopedAllocator(attr, step_id);
+  }
+
+  ScopedAllocatorMgr* GetScopedAllocatorMgr() const override {
+    return underlying_->GetScopedAllocatorMgr();
+  }
+
   const Eigen::ThreadPoolDevice* eigen_cpu_device() override {
     return underlying_->eigen_cpu_device();
   }
-- 
GitLab


From 9cc7bbe5b476bec556d7dce235996a03775d7492 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 17 Sep 2018 23:09:48 -0700
Subject: [PATCH 0320/1357] [XLA] Refactor conv_ops emitters to make them
 reusable.

PiperOrigin-RevId: 213398930
---
 tensorflow/compiler/tf2xla/kernels/BUILD      |  22 +
 .../tf2xla/kernels/conv_op_helpers.cc         | 509 ++++++++++++++++
 .../compiler/tf2xla/kernels/conv_op_helpers.h |  69 +++
 .../compiler/tf2xla/kernels/conv_ops.cc       | 551 ++----------------
 tensorflow/compiler/tf2xla/shape_util.cc      |  14 +-
 tensorflow/compiler/tf2xla/shape_util.h       |   5 +
 6 files changed, 661 insertions(+), 509 deletions(-)
 create mode 100644 tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
 create mode 100644 tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 46794f7b50..3e823254d3 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -113,6 +113,7 @@ tf_kernel_library(
         "shape_util.h",
     ],
     deps = [
+        ":conv_op_helpers",
         ":if_op",
         ":while_op",
         "//tensorflow/compiler/tf2xla:common",
@@ -172,6 +173,27 @@ tf_kernel_library(
     ],
 )
 
+cc_library(
+    name = "conv_op_helpers",
+    srcs = ["conv_op_helpers.cc"],
+    hdrs = ["conv_op_helpers.h"],
+    deps = [
+        "//tensorflow/compiler/tf2xla:common",
+        "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/client/lib:arithmetic",
+        "//tensorflow/compiler/xla/client/lib:constants",
+        "//tensorflow/compiler/xla/client/lib:numeric",
+        "//tensorflow/core:framework",
+        "//tensorflow/core/kernels:bounds_check",
+        "//tensorflow/core/kernels:conv_ops",
+        "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
 tf_kernel_library(
     name = "while_op",
     srcs = ["while_op.cc"],
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
new file mode 100644
index 0000000000..c9a1be4940
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -0,0 +1,509 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// XLA-specific Ops for 2D convolution.
+
+#include "tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h"
+#include "absl/types/span.h"
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/numeric.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_slice.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/conv_grad_ops.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/util/padding.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+namespace {
+
+// Returns the expanded size of a filter used for depthwise convolution.
+// If `shape` is [H, W, ..., M, N] returns [H, W, ..., M, M*N].
+xla::Shape ExpandedFilterShapeForDepthwiseConvolution(const xla::Shape& shape) {
+  int num_dims = shape.dimensions_size();
+  CHECK_GE(num_dims, 2);  // Crash OK
+  xla::Shape expanded_shape = shape;
+  expanded_shape.set_dimensions(
+      num_dims - 1,
+      shape.dimensions(num_dims - 2) * shape.dimensions(num_dims - 1));
+  return expanded_shape;
+}
+
+// Create a mask for depthwise convolution that will make a normal convolution
+// produce the same results as a depthwise convolution. For a [2, 2, 3, 2]
+// depthwise filter this returns a [2, 2, 3, 6] tensor
+//   1 1 0 0 0 0   1 1 0 0 0 0
+//   0 0 1 1 0 0   0 0 1 1 0 0
+//   0 0 0 0 1 1   0 0 0 0 1 1
+//
+//   1 1 0 0 0 0   1 1 0 0 0 0
+//   0 0 1 1 0 0   0 0 1 1 0 0
+//   0 0 0 0 1 1   0 0 0 0 1 1
+//
+// The first step is to create a one tensor, A, that is [3]
+//   0 1 2
+//
+// and another tensor, B,  that is [3 * 2]
+//   0 1 2 3 4 5
+//
+// and divide B it by 2 to get
+//   0 0 1 1 2 2
+//
+// then we broadcast the B to [2, 2, 3, 3 * 2]
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//
+// Finally compare A and broadcasted B in dimension 2 amd return the result at
+// the beginning of the comment.
+xla::XlaOp CreateExpandedFilterMask(const xla::Shape& filter_shape,
+                                    xla::XlaBuilder* builder) {
+  xla::Shape expanded_filter_shape =
+      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
+  int64 depthwise_multiplier =
+      filter_shape.dimensions(filter_shape.dimensions_size() - 1);
+  int64 input_feature =
+      filter_shape.dimensions(filter_shape.dimensions_size() - 2);
+
+  // Create a M sized linspace and an M*N sized linspace that will be
+  // broadcasted into perpendicular dimensions and compared.
+  xla::XlaOp input_feature_iota = xla::Iota(builder, xla::S32, input_feature);
+  xla::XlaOp expanded_feature_iota =
+      xla::Iota(builder, xla::S32, input_feature * depthwise_multiplier);
+
+  // Divide the M*N sized linspace by the depthwise_multiplier to create
+  // [0 0 1 1 2 2] in the example in the function comment.
+  expanded_feature_iota =
+      xla::Div(expanded_feature_iota,
+               XlaHelpers::IntegerLiteral(builder, DataType::DT_INT32,
+                                          depthwise_multiplier));
+
+  // Broadcast the N*M linspace to [H, W, ..., M, M*N].
+  std::vector<int64> expanded_feature_broadcast_dims(
+      expanded_filter_shape.dimensions().begin(),
+      expanded_filter_shape.dimensions().end());
+  expanded_feature_broadcast_dims.pop_back();
+  auto broadcasted_expanded_feature_iota =
+      xla::Broadcast(expanded_feature_iota, expanded_feature_broadcast_dims);
+
+  // Compare the broadcasted linspace to the input feature linspace in the
+  // input feature dimension to create a diagonal predicate.
+  return xla::Eq(broadcasted_expanded_feature_iota, input_feature_iota,
+                 {expanded_filter_shape.dimensions_size() - 2});
+}
+
+// Reshapes a filter of shape [H, W, ..., M, N] to [H, W, ..., 1, M*N]. Used to
+// build a depthwise convolution.
+xla::XlaOp ReshapeFilterForDepthwiseConvolution(const xla::Shape& filter_shape,
+                                                const xla::XlaOp& filter) {
+  int64 input_feature_dim = filter_shape.dimensions_size() - 2;
+  int64 output_feature_dim = filter_shape.dimensions_size() - 1;
+  int64 depthwise_multiplier = filter_shape.dimensions(output_feature_dim);
+  int64 input_feature = filter_shape.dimensions(input_feature_dim);
+
+  // Create a [H, W, ..., 1, N*M] reshape of the filter.
+  xla::Shape implicit_broadcast_filter_shape = filter_shape;
+  implicit_broadcast_filter_shape.set_dimensions(input_feature_dim, 1);
+  implicit_broadcast_filter_shape.set_dimensions(
+      output_feature_dim, depthwise_multiplier * input_feature);
+  return xla::Reshape(
+      filter, xla::AsInt64Slice(implicit_broadcast_filter_shape.dimensions()));
+}
+
+// Reduces the results of the convolution with an expanded filter to the
+// non-expanded filter.
+xla::XlaOp ContractFilterForDepthwiseBackprop(const xla::Shape& filter_shape,
+                                              const xla::XlaOp& filter_backprop,
+                                              xla::XlaBuilder* builder) {
+  auto masked_expanded_filter =
+      xla::Select(CreateExpandedFilterMask(filter_shape, builder),
+                  filter_backprop, xla::ZerosLike(filter_backprop));
+
+  auto elem_type = filter_shape.element_type();
+  return xla::Reshape(
+      // This reduce does not need inputs to be converted with
+      // XlaHelpers::SumAccumulationType() since the select above guarantees
+      // that only one element is non zero, so there cannot be accumulated
+      // precision error.
+      xla::Reduce(masked_expanded_filter, xla::Zero(builder, elem_type),
+                  CreateScalarAddComputation(elem_type, builder),
+                  {filter_shape.dimensions_size() - 2}),
+      xla::AsInt64Slice(filter_shape.dimensions()));
+}
+
+// Performs some basic checks on ConvOpAttrs that are true for all kinds of XLA
+// convolutions (as currently implemented).
+Status CheckConvAttrs(const ConvOpAttrs& attrs) {
+  const int num_dims = attrs.num_spatial_dims + 2;
+  if (attrs.strides.size() != num_dims) {
+    return errors::InvalidArgument("Sliding window strides field must specify ",
+                                   num_dims, " dimensions");
+  }
+  int batch_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format);
+  int feature_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format);
+  if (attrs.strides[batch_dim] != 1 || attrs.strides[feature_dim] != 1) {
+    return errors::Unimplemented(
+        "Current implementation does not yet support strides in the batch and "
+        "depth dimensions.");
+  }
+  if (attrs.dilations.size() != num_dims) {
+    return errors::InvalidArgument("Dilations field must specify ", num_dims,
+                                   " dimensions");
+  }
+  if (attrs.dilations[batch_dim] != 1 || attrs.dilations[feature_dim] != 1) {
+    return errors::Unimplemented(
+        "Current implementation does not support dilations in the batch and "
+        "depth dimensions.");
+  }
+  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+    int input_dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i);
+    if (attrs.dilations[input_dim] < 1) {
+      return errors::Unimplemented("Dilation values must be positive; ", i,
+                                   "th spatial dimension had dilation ",
+                                   attrs.dilations[input_dim]);
+    }
+  }
+  return Status::OK();
+}
+
+// Wrapper around ConvBackpropComputeDimensions that converts from XLA shapes
+// to TensorShapes.
+Status ConvBackpropComputeDimensionsV2XlaShapes(
+    StringPiece label, int num_spatial_dims, const xla::Shape& input_shape,
+    const xla::Shape& filter_shape, const xla::Shape& out_backprop_shape,
+    absl::Span<const int32> dilations, const std::vector<int32>& strides,
+    Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) {
+  TensorShape input_tensor_shape, filter_tensor_shape,
+      out_backprop_tensor_shape;
+  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(input_shape, &input_tensor_shape));
+  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(filter_shape, &filter_tensor_shape));
+  TF_RETURN_IF_ERROR(
+      XLAShapeToTensorShape(out_backprop_shape, &out_backprop_tensor_shape));
+  return ConvBackpropComputeDimensionsV2(
+      label, num_spatial_dims, input_tensor_shape, filter_tensor_shape,
+      out_backprop_tensor_shape, dilations, strides, padding, data_format,
+      dims);
+}
+
+}  // anonymous namespace
+
+xla::StatusOr<ConvOpAttrs> ConvOpAttrs::Create(int num_spatial_dims,
+                                               bool depthwise,
+                                               OpKernelConstruction* ctx) {
+  ConvOpAttrs attrs;
+  attrs.num_spatial_dims = num_spatial_dims;
+  attrs.depthwise = depthwise;
+  TF_RETURN_IF_ERROR(ctx->GetAttr("dilations", &attrs.dilations));
+  TF_RETURN_IF_ERROR(ctx->GetAttr("strides", &attrs.strides));
+  TF_RETURN_IF_ERROR(ctx->GetAttr("padding", &attrs.padding));
+
+  string data_format;
+  TF_RETURN_IF_ERROR(ctx->GetAttr("data_format", &data_format));
+  if (!FormatFromString(data_format, &attrs.data_format)) {
+    return errors::InvalidArgument("Invalid data format: ", data_format);
+  }
+
+  return attrs;
+}
+
+xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(StringPiece /*type_string*/,
+                                               xla::XlaOp conv_input,
+                                               xla::XlaOp filter,
+                                               const ConvOpAttrs& attrs) {
+  TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
+
+  auto* builder = conv_input.builder();
+  TF_ASSIGN_OR_RETURN(xla::Shape input_shape, builder->GetShape(conv_input));
+  // Filter has the form [filter_rows, filter_cols, ..., in_depth, out_depth]
+  TF_ASSIGN_OR_RETURN(xla::Shape filter_shape, builder->GetShape(filter));
+
+  // For 2D convolution, there should be 4 dimensions.
+  int num_dims = attrs.num_spatial_dims + 2;
+  if (input_shape.dimensions_size() != num_dims) {
+    return errors::InvalidArgument("input must be ", num_dims, "-dimensional",
+                                   input_shape.DebugString());
+  }
+  if (filter_shape.dimensions_size() != num_dims) {
+    return errors::InvalidArgument(
+        "filter must be ", num_dims,
+        "-dimensional: ", filter_shape.DebugString());
+  }
+
+  // The last two dimensions of the filter are the input and output shapes.
+  int batch_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format);
+  int feature_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format);
+
+  int64 in_depth = filter_shape.dimensions(attrs.num_spatial_dims);
+  // The 'C' dimension for input is in_depth. It must be the same as
+  // the filter's in_depth.
+  if (in_depth != input_shape.dimensions(feature_dim)) {
+    return errors::InvalidArgument(
+        "input and filter must have the same depth: ", in_depth, " vs ",
+        input_shape.dimensions(feature_dim));
+  }
+
+  if (attrs.depthwise) {
+    filter = ReshapeFilterForDepthwiseConvolution(filter_shape, filter);
+  }
+
+  xla::ConvolutionDimensionNumbers dims;
+  std::vector<int64> window_strides(attrs.num_spatial_dims);
+  std::vector<int64> lhs_dilation(attrs.num_spatial_dims, 1);
+  std::vector<int64> rhs_dilation(attrs.num_spatial_dims);
+  std::vector<std::pair<int64, int64>> padding(attrs.num_spatial_dims);
+
+  dims.set_input_batch_dimension(batch_dim);
+  dims.set_output_batch_dimension(batch_dim);
+  dims.set_input_feature_dimension(feature_dim);
+  dims.set_output_feature_dimension(feature_dim);
+  dims.set_kernel_input_feature_dimension(attrs.num_spatial_dims);
+  dims.set_kernel_output_feature_dimension(attrs.num_spatial_dims + 1);
+
+  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+    const int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i);
+    dims.add_input_spatial_dimensions(dim);
+    dims.add_kernel_spatial_dimensions(i);
+    dims.add_output_spatial_dimensions(dim);
+    window_strides[i] = attrs.strides.at(dim);
+    rhs_dilation[i] = attrs.dilations.at(dim);
+
+    int64 unused_output_size;
+    TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerboseV2(
+        input_shape.dimensions(dim), filter_shape.dimensions(i),
+        rhs_dilation[i], window_strides[i], attrs.padding, &unused_output_size,
+        &padding[i].first, &padding[i].second));
+  }
+
+  return xla::ConvGeneralDilated(
+      conv_input, filter, window_strides, padding, lhs_dilation, rhs_dilation,
+      dims, /*feature_group_count=*/attrs.depthwise ? in_depth : 1);
+}
+
+xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
+    StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter,
+    xla::XlaOp out_backprop, const ConvOpAttrs& attrs) {
+  TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
+
+  int num_dims = attrs.num_spatial_dims + 2;
+  int batch_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format);
+  int feature_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format);
+
+  auto* builder = filter.builder();
+  TF_ASSIGN_OR_RETURN(xla::Shape filter_shape, builder->GetShape(filter));
+  TF_ASSIGN_OR_RETURN(xla::Shape out_backprop_shape,
+                      builder->GetShape(out_backprop));
+
+  xla::Shape expanded_filter_shape =
+      attrs.depthwise ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape)
+                      : filter_shape;
+  // Reuse dimension computation logic from conv_grad_ops.cc.
+  ConvBackpropDimensions dims;
+  TF_RETURN_IF_ERROR(ConvBackpropComputeDimensionsV2XlaShapes(
+      type_string, attrs.num_spatial_dims, input_shape, expanded_filter_shape,
+      out_backprop_shape, attrs.dilations, attrs.strides, attrs.padding,
+      attrs.data_format, &dims));
+
+  // The input gradients are computed by a convolution of the output
+  // gradients and the filter, with some appropriate padding. See the
+  // comment at the top of conv_grad_ops.h for details.
+
+  xla::ConvolutionDimensionNumbers dnums;
+  dnums.set_input_batch_dimension(batch_dim);
+  dnums.set_output_batch_dimension(batch_dim);
+  dnums.set_input_feature_dimension(feature_dim);
+  dnums.set_output_feature_dimension(feature_dim);
+
+  // TF filter shape is [ H, W, ..., inC, outC ]
+  // Transpose the input and output features for computing the gradient.
+  dnums.set_kernel_input_feature_dimension(attrs.num_spatial_dims + 1);
+  dnums.set_kernel_output_feature_dimension(attrs.num_spatial_dims);
+
+  std::vector<int64> kernel_spatial_dims(attrs.num_spatial_dims);
+  std::vector<std::pair<int64, int64>> padding(attrs.num_spatial_dims);
+  std::vector<int64> lhs_dilation(attrs.num_spatial_dims);
+  std::vector<int64> rhs_dilation(attrs.num_spatial_dims);
+  std::vector<int64> ones(attrs.num_spatial_dims, 1);
+  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+    int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i);
+    dnums.add_input_spatial_dimensions(dim);
+    dnums.add_kernel_spatial_dimensions(i);
+    dnums.add_output_spatial_dimensions(dim);
+
+    kernel_spatial_dims[i] = i;
+    padding[i] = {dims.spatial_dims[i].pad_before,
+                  dims.spatial_dims[i].pad_after};
+    lhs_dilation[i] = dims.spatial_dims[i].stride;
+    rhs_dilation[i] = attrs.dilations[dim];
+  }
+
+  // Mirror the filter in the spatial dimensions.
+  xla::XlaOp mirrored_weights = xla::Rev(filter, kernel_spatial_dims);
+
+  // activation gradients
+  //   = gradients (with padding and dilation) <conv> mirrored_weights
+  return xla::ConvGeneralDilated(
+      out_backprop, mirrored_weights, /*window_strides=*/ones, padding,
+      lhs_dilation, rhs_dilation, dnums,
+      /*feature_group_count=*/
+      attrs.depthwise ? out_backprop_shape.dimensions(feature_dim) /
+                            filter_shape.dimensions(attrs.num_spatial_dims + 1)
+                      : 1);
+}
+
+xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
+    StringPiece type_string, xla::XlaOp activations,
+    const xla::Shape& filter_shape, xla::XlaOp gradients,
+    const ConvOpAttrs& attrs) {
+  TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
+
+  auto* builder = activations.builder();
+  TF_ASSIGN_OR_RETURN(xla::Shape activations_shape,
+                      builder->GetShape(activations));
+  TF_ASSIGN_OR_RETURN(xla::Shape out_backprop_shape,
+                      builder->GetShape(gradients));
+  const xla::Shape expanded_filter_shape =
+      attrs.depthwise ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape)
+                      : filter_shape;
+
+  // Reuse dimension computation logic from conv_grad_ops.cc.
+  ConvBackpropDimensions dims;
+  TF_RETURN_IF_ERROR(ConvBackpropComputeDimensionsV2XlaShapes(
+      type_string, attrs.num_spatial_dims, activations_shape,
+      expanded_filter_shape, out_backprop_shape, attrs.dilations, attrs.strides,
+      attrs.padding, attrs.data_format, &dims));
+
+  // The filter gradients are computed by a convolution of the input
+  // activations and the output gradients, with some appropriate padding.
+  // See the comment at the top of conv_grad_ops.h for details.
+
+  xla::ConvolutionDimensionNumbers dnums;
+
+  // The activations (inputs) form the LHS of the convolution.
+  // Activations have shape: [batch, in_rows, in_cols, ..., in_depth]
+  // For the gradient computation, we flip the roles of the batch and
+  // feature dimensions.
+  // Each spatial entry has size in_depth * batch
+
+  // The last two dimensions of the filter are the input and output shapes.
+  int num_dims = attrs.num_spatial_dims + 2;
+  int n_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format);
+  int c_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format);
+
+  // Swap n_dim and c_dim in the activations.
+  dnums.set_input_batch_dimension(c_dim);
+  dnums.set_input_feature_dimension(n_dim);
+
+  // The gradients become the RHS of the convolution.
+  // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
+  // where the batch becomes the input feature for the convolution.
+  dnums.set_kernel_input_feature_dimension(n_dim);
+  dnums.set_kernel_output_feature_dimension(c_dim);
+
+  std::vector<std::pair<int64, int64>> padding(attrs.num_spatial_dims);
+  std::vector<int64> rhs_dilation(attrs.num_spatial_dims);
+  std::vector<int64> window_strides(attrs.num_spatial_dims);
+  std::vector<int64> ones(attrs.num_spatial_dims, 1);
+
+  // Tensorflow filter shape is [ H, W, ..., inC, outC ].
+  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+    dnums.add_output_spatial_dimensions(i);
+  }
+  dnums.set_output_batch_dimension(attrs.num_spatial_dims);
+  dnums.set_output_feature_dimension(attrs.num_spatial_dims + 1);
+
+  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+    int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i);
+    dnums.add_input_spatial_dimensions(dim);
+    dnums.add_kernel_spatial_dimensions(dim);
+
+    // We will also need to pad the input with zeros such that after the
+    // convolution, we get the right size for the filter.
+    // The padded_in_rows should be such that when we convolve this with the
+    // expanded_out_rows as a filter, we should get filter_rows back.
+    //
+    const int64 padded_in_size =
+        dims.spatial_dims[i].expanded_output_size +
+        (dims.spatial_dims[i].filter_size - 1) * attrs.dilations[dim];
+
+    // However it can be smaller than input_rows: in this
+    // case it means some of the inputs are not used.
+    //
+    // An example is to have input_cols = 3, filter_cols = 2 and stride = 2:
+    //
+    // INPUT =  [ A  B  C ]
+    //
+    // FILTER = [ x y ]
+    //
+    // and the output will only have one column: a = A * x + B * y
+    //
+    // and input "C" is not used at all.
+    //
+    // We apply negative padding in this case.
+    const int64 pad_total = padded_in_size - dims.spatial_dims[i].input_size;
+
+    // + For the VALID padding, we don't pad anything on the top/left side
+    //   and pad the bottom/right side with the remaining space.
+    // + For the SAME padding, we pad top/left side the same as bottom/right
+    //   side.
+    //
+    // In addition, if the padded input size is smaller than the input size,
+    // we need to ignore some training elements of the input. We do this by
+    // applying negative padding on the right/bottom.
+    const int64 pad_before =
+        attrs.padding == Padding::SAME ? std::max<int64>(pad_total / 2, 0) : 0;
+
+    padding[i] = {pad_before, pad_total - pad_before};
+    rhs_dilation[i] = dims.spatial_dims[i].stride;
+    window_strides[i] = attrs.dilations[dim];
+  }
+
+  // Besides padding the input, we will also expand output_rows to
+  //    expanded_out_rows = (output_rows - 1) * stride + 1
+  // with zeros in between:
+  //
+  //      a . . . b . . . c . . . d . . . e
+  //
+  // This is done by specifying the window dilation factors in the
+  // convolution HLO below.
+  auto filter_backprop =
+      xla::ConvGeneralDilated(activations, gradients, window_strides, padding,
+                              /*lhs_dilation=*/ones, rhs_dilation, dnums);
+
+  if (attrs.depthwise) {
+    filter_backprop = ContractFilterForDepthwiseBackprop(
+        filter_shape, filter_backprop, activations.builder());
+  }
+
+  return filter_backprop;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
new file mode 100644
index 0000000000..6e1b70a478
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
@@ -0,0 +1,69 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_KERNELS_CONV_OP_HELPERS_H_
+#define TENSORFLOW_COMPILER_TF2XLA_KERNELS_CONV_OP_HELPERS_H_
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/util/padding.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+// This header exposes utilities for translating TensorFlow convolution ops into
+// XLA ops.
+//
+// conv_ops.cc contains lowerings for many of these TF convolution ops (e.g.
+// Conv2D, Conv3DBackpropFilterV2), but you might want to use the utilities in
+// this header to implement a new and exciting convolution op, for example a
+// fused TensorFlow op that contains a convolution and other things.
+
+namespace tensorflow {
+
+// ConvOpAttrs contains all of the metadata necessary to specify a TF or XLA
+// convolution.
+struct ConvOpAttrs {
+  // Constructs a ConvOpAttrs, reading most of the attributes from `ctx`.
+  static xla::StatusOr<ConvOpAttrs> Create(int num_spatial_dims, bool depthwise,
+                                           OpKernelConstruction* ctx);
+
+  bool depthwise;
+  int num_spatial_dims;
+  std::vector<int32> dilations;
+  std::vector<int32> strides;
+  Padding padding;
+  TensorFormat data_format;
+};
+
+// Creates a new XLA forward or backward convolution with the given inputs and
+// attributes.
+xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(StringPiece type_string,
+                                               xla::XlaOp conv_input,
+                                               xla::XlaOp filter,
+                                               const ConvOpAttrs& attrs);
+xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
+    StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter,
+    xla::XlaOp out_backprop, const ConvOpAttrs& attrs);
+xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
+    StringPiece type_string, xla::XlaOp activations,
+    const xla::Shape& filter_shape, xla::XlaOp gradients,
+    const ConvOpAttrs& attrs);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_TF2XLA_KERNELS_CONV_OP_HELPERS_H_
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
index 674720e22f..cd7c820be0 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
@@ -15,12 +15,17 @@ limitations under the License.
 
 // XLA-specific Ops for 2D convolution.
 
+#include "tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h"
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/lib/numeric.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -33,250 +38,28 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 
 namespace tensorflow {
-
 namespace {
 
-// Returns the expanded size of a filter used for depthwise convolution.
-// If `shape` is [H, W, ..., M, N] returns [H, W, ..., M, M*N].
-TensorShape ExpandedFilterShapeForDepthwiseConvolution(
-    const TensorShape& shape) {
-  int num_dims = shape.dims();
-  CHECK_GE(num_dims, 2);
-  TensorShape expanded_shape = shape;
-  expanded_shape.set_dim(num_dims - 1, shape.dim_size(num_dims - 2) *
-                                           shape.dim_size(num_dims - 1));
-  return expanded_shape;
-}
-
-// Broadcast zeros to ExpandedFilterShapeForDepthwiseConvolution.
-xla::XlaOp CreateExpandedZero(const TensorShape& filter_shape, DataType dtype,
-                              xla::XlaBuilder* builder) {
-  TensorShape expanded_filter_shape =
-      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
-  return xla::Broadcast(XlaHelpers::Zero(builder, dtype),
-                        expanded_filter_shape.dim_sizes());
-}
-
-// Create a mask for depthwise convolution that will make a normal convolution
-// produce the same results as a depthwise convolution. For a [2, 2, 3, 2]
-// depthwise filter this returns a [2, 2, 3, 6] tensor
-//   1 1 0 0 0 0   1 1 0 0 0 0
-//   0 0 1 1 0 0   0 0 1 1 0 0
-//   0 0 0 0 1 1   0 0 0 0 1 1
-//
-//   1 1 0 0 0 0   1 1 0 0 0 0
-//   0 0 1 1 0 0   0 0 1 1 0 0
-//   0 0 0 0 1 1   0 0 0 0 1 1
-//
-// The first step is to create a one tensor, A, that is [3]
-//   0 1 2
-//
-// and another tensor, B,  that is [3 * 2]
-//   0 1 2 3 4 5
-//
-// and divide B it by 2 to get
-//   0 0 1 1 2 2
-//
-// then we broadcast the B to [2, 2, 3, 3 * 2]
-//   0 0 1 1 2 2   0 0 1 1 2 2
-//   0 0 1 1 2 2   0 0 1 1 2 2
-//   0 0 1 1 2 2   0 0 1 1 2 2
-//
-//   0 0 1 1 2 2   0 0 1 1 2 2
-//   0 0 1 1 2 2   0 0 1 1 2 2
-//   0 0 1 1 2 2   0 0 1 1 2 2
-//
-// Finally compare A and broadcasted B in dimension 2 amd return the result at
-// the beginning of the comment.
-xla::XlaOp CreateExpandedFilterMask(const TensorShape& filter_shape,
-                                    xla::XlaBuilder* builder) {
-  TensorShape expanded_filter_shape =
-      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
-  int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1);
-  int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2);
-
-  // Create a M sized linspace and an M*N sized linspace that will be
-  // broadcasted into perpendicular dimensions and compared.
-  xla::XlaOp input_feature_iota = xla::Iota(builder, xla::S32, input_feature);
-  xla::XlaOp expanded_feature_iota =
-      xla::Iota(builder, xla::S32, input_feature * depthwise_multiplier);
-
-  // Divide the M*N sized linspace by the depthwise_multiplier to create
-  // [0 0 1 1 2 2] in the example in the function comment.
-  expanded_feature_iota =
-      xla::Div(expanded_feature_iota,
-               XlaHelpers::IntegerLiteral(builder, DataType::DT_INT32,
-                                          depthwise_multiplier));
-
-  // Broadcast the N*M linspace to [H, W, ..., M, M*N].
-  auto expanded_feature_broadcast_dims = expanded_filter_shape.dim_sizes();
-  expanded_feature_broadcast_dims.pop_back();
-  auto broadcasted_expanded_feature_iota =
-      xla::Broadcast(expanded_feature_iota, expanded_feature_broadcast_dims);
-
-  // Compare the broadcasted linspace to the input feature linspace in the
-  // input feature dimension to create a diagonal predicate.
-  return xla::Eq(broadcasted_expanded_feature_iota, input_feature_iota,
-                 {expanded_filter_shape.dims() - 2});
-}
-
-// Reshapes a filter of shape [H, W, ..., M, N] to [H, W, ..., 1, M*N]. Used to
-// build a depthwise convolution.
-xla::XlaOp ReshapeFilterForDepthwiseConvolution(const TensorShape& filter_shape,
-                                                const xla::XlaOp& filter) {
-  int64 input_feature_dim = filter_shape.dims() - 2;
-  int64 output_feature_dim = filter_shape.dims() - 1;
-  int64 depthwise_multiplier = filter_shape.dim_size(output_feature_dim);
-  int64 input_feature = filter_shape.dim_size(input_feature_dim);
-
-  // Create a [H, W, ..., 1, N*M] reshape of the filter.
-  TensorShape implicit_broadcast_filter_shape = filter_shape;
-  implicit_broadcast_filter_shape.set_dim(input_feature_dim, 1);
-  implicit_broadcast_filter_shape.set_dim(output_feature_dim,
-                                          depthwise_multiplier * input_feature);
-  return xla::Reshape(filter, implicit_broadcast_filter_shape.dim_sizes());
-}
-
-// Reduces the results of the convolution with an expanded filter to the
-// non-expanded filter.
-xla::XlaOp ContractFilterForDepthwiseBackprop(XlaOpKernelContext* ctx,
-                                              const TensorShape& filter_shape,
-                                              DataType dtype,
-                                              const xla::XlaOp& filter_backprop,
-                                              xla::XlaBuilder* builder) {
-  auto masked_expanded_filter = xla::Select(
-      CreateExpandedFilterMask(filter_shape, builder), filter_backprop,
-      CreateExpandedZero(filter_shape, dtype, builder));
-  return xla::Reshape(
-      // This reduce does not need inputs to be converted with
-      // XlaHelpers::SumAccumulationType() since the ExpandedFilterMask with
-      // ExpandedZero guarantees that only one element is non zero, so there
-      // cannot be accumulated precision error.
-      xla::Reduce(masked_expanded_filter, XlaHelpers::Zero(builder, dtype),
-                  *ctx->GetOrCreateAdd(dtype), {filter_shape.dims() - 2}),
-      filter_shape.dim_sizes());
-}
-
 class ConvOp : public XlaOpKernel {
  public:
   explicit ConvOp(OpKernelConstruction* ctx, int num_spatial_dims,
                   bool depthwise)
-      : XlaOpKernel(ctx),
-        num_spatial_dims_(num_spatial_dims),
-        depthwise_(depthwise) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
-
-    string data_format;
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format));
-    OP_REQUIRES(ctx, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+      : XlaOpKernel(ctx) {
+    xla::StatusOr<ConvOpAttrs> attrs =
+        ConvOpAttrs::Create(num_spatial_dims, depthwise, ctx);
+    OP_REQUIRES_OK(ctx, attrs.status());
+    attrs_ = attrs.ValueOrDie();
   }
 
-  int num_dims() const { return num_spatial_dims_ + 2; }
-
   void Compile(XlaOpKernelContext* ctx) override {
-    OP_REQUIRES(ctx, strides_.size() == num_dims(),
-                errors::InvalidArgument("Sliding window strides field must "
-                                        "specify ",
-                                        num_dims(), " dimensions"));
-    int batch_dim = GetTensorBatchDimIndex(num_dims(), data_format_);
-    int feature_dim = GetTensorFeatureDimIndex(num_dims(), data_format_);
-    OP_REQUIRES(
-        ctx, strides_[batch_dim] == 1 && strides_[feature_dim] == 1,
-        errors::Unimplemented("Current implementation does not yet support "
-                              "strides in the batch and depth dimensions."));
-
-    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
-                errors::InvalidArgument("Dilations field must "
-                                        "specify ",
-                                        num_dims(), " dimensions"));
-    OP_REQUIRES(
-        ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1,
-        errors::Unimplemented("Current implementation does not support "
-                              "dilations in the batch and depth dimensions."));
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      OP_REQUIRES(ctx, dilations_[input_dim] >= 1,
-                  errors::Unimplemented("Dilation values must be positive; ", i,
-                                        "th spatial dimension had dilation ",
-                                        dilations_[input_dim]));
-    }
-
-    const TensorShape input_shape = ctx->InputShape(0);
-    // Input filter is of the following dimensions:
-    // [ filter_rows, filter_cols, ..., in_depth, out_depth]
-    const TensorShape filter_shape = ctx->InputShape(1);
-
-    // For 2D convolution, there should be 4 dimensions.
-    OP_REQUIRES(
-        ctx, input_shape.dims() == num_dims(),
-        errors::InvalidArgument("input must be ", num_dims(), "-dimensional",
-                                input_shape.DebugString()));
-    OP_REQUIRES(
-        ctx, filter_shape.dims() == num_dims(),
-        errors::InvalidArgument("filter must be ", num_dims(),
-                                "-dimensional: ", filter_shape.DebugString()));
-
-    // The last two dimension of the filter are the input and output shapes.
-    const int64 in_depth = filter_shape.dim_size(num_spatial_dims_);
-
-    // The 'C' dimension for input is in_depth. It must be the same as
-    // the filter's in_depth.
-    OP_REQUIRES(ctx, in_depth == input_shape.dim_size(feature_dim),
-                errors::InvalidArgument(
-                    "input and filter must have the same depth: ", in_depth,
-                    " vs ", input_shape.dim_size(feature_dim)));
-
-    xla::XlaOp filter = ctx->Input(1);
-    if (depthwise_) {
-      filter = ReshapeFilterForDepthwiseConvolution(filter_shape, filter);
-    }
-
-    xla::ConvolutionDimensionNumbers dims;
-    std::vector<int64> window_strides(num_spatial_dims_);
-    std::vector<int64> lhs_dilation(num_spatial_dims_, 1);
-    std::vector<int64> rhs_dilation(num_spatial_dims_);
-    std::vector<std::pair<int64, int64>> padding(num_spatial_dims_);
-
-    dims.set_input_batch_dimension(batch_dim);
-    dims.set_output_batch_dimension(batch_dim);
-    dims.set_input_feature_dimension(feature_dim);
-    dims.set_output_feature_dimension(feature_dim);
-    dims.set_kernel_input_feature_dimension(num_spatial_dims_);
-    dims.set_kernel_output_feature_dimension(num_spatial_dims_ + 1);
-
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      const int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      dims.add_input_spatial_dimensions(dim);
-      dims.add_kernel_spatial_dimensions(i);
-      dims.add_output_spatial_dimensions(dim);
-      window_strides[i] = strides_.at(dim);
-      rhs_dilation[i] = dilations_.at(dim);
-
-      int64 unused_output_size;
-      OP_REQUIRES_OK(
-          ctx, GetWindowedOutputSizeVerboseV2(
-                   input_shape.dim_size(dim), filter_shape.dim_size(i),
-                   rhs_dilation[i], window_strides[i], padding_,
-                   &unused_output_size, &padding[i].first, &padding[i].second));
-    }
-
-    xla::XlaOp conv = xla::ConvGeneralDilated(
-        ctx->Input(0), filter, window_strides, padding, lhs_dilation,
-        rhs_dilation, dims,
-        /*feature_group_count=*/depthwise_ ? in_depth : 1);
-    ctx->SetOutput(0, conv);
+    xla::StatusOr<xla::XlaOp> conv = MakeXlaForwardConvOp(
+        ctx->op_kernel().type_string(), ctx->Input(0), ctx->Input(1), attrs_);
+    OP_REQUIRES_OK(ctx, conv.status());
+    ctx->SetOutput(0, conv.ValueOrDie());
   }
 
  protected:
-  const int num_spatial_dims_;
-  const bool depthwise_;
-  std::vector<int32> dilations_;
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_ = FORMAT_NHWC;
+  ConvOpAttrs attrs_;
 
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(ConvOp);
@@ -308,124 +91,28 @@ class ConvBackpropInputOp : public XlaOpKernel {
  public:
   explicit ConvBackpropInputOp(OpKernelConstruction* ctx, int num_spatial_dims,
                                bool depthwise)
-      : XlaOpKernel(ctx),
-        num_spatial_dims_(num_spatial_dims),
-        depthwise_(depthwise) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
-    string data_format;
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format));
-    OP_REQUIRES(ctx, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+      : XlaOpKernel(ctx) {
+    xla::StatusOr<ConvOpAttrs> attrs =
+        ConvOpAttrs::Create(num_spatial_dims, depthwise, ctx);
+    OP_REQUIRES_OK(ctx, attrs.status());
+    attrs_ = attrs.ValueOrDie();
   }
 
-  int num_dims() const { return num_spatial_dims_ + 2; }
-
   void Compile(XlaOpKernelContext* ctx) override {
-    OP_REQUIRES(ctx, strides_.size() == num_dims(),
-                errors::InvalidArgument("Sliding window strides field must "
-                                        "specify ",
-                                        num_dims(), " dimensions"));
-    int batch_dim = GetTensorBatchDimIndex(num_dims(), data_format_);
-    int feature_dim = GetTensorFeatureDimIndex(num_dims(), data_format_);
-    OP_REQUIRES(
-        ctx, strides_[batch_dim] == 1 && strides_[feature_dim] == 1,
-        errors::Unimplemented("Current implementation does not yet support "
-                              "strides in the batch and depth dimensions."));
-
-    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
-                errors::InvalidArgument("Dilations field must "
-                                        "specify ",
-                                        num_dims(), " dimensions"));
-    OP_REQUIRES(
-        ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1,
-        errors::Unimplemented("Current implementation does not support "
-                              "dilations in the batch and depth dimensions."));
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      OP_REQUIRES(ctx, dilations_[input_dim] >= 1,
-                  errors::Unimplemented("Dilation values must be positive; ", i,
-                                        "th spatial dimension had dilation ",
-                                        dilations_[input_dim]));
-    }
-
-    TensorShape input_shape;
-    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &input_shape));
-
-    const TensorShape filter_shape = ctx->InputShape(1);
-    const TensorShape out_backprop_shape = ctx->InputShape(2);
-
-    const TensorShape expanded_filter_shape =
-        depthwise_ ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape)
-                   : filter_shape;
-    // Reuse dimension computation logic from conv_grad_ops.cc.
-    ConvBackpropDimensions dims;
-    OP_REQUIRES_OK(ctx,
-                   ConvBackpropComputeDimensionsV2(
-                       type_string(), num_spatial_dims_, input_shape,
-                       expanded_filter_shape, out_backprop_shape, dilations_,
-                       strides_, padding_, data_format_, &dims));
-
-    auto filter = ctx->Input(1);
-    auto out_backprop = ctx->Input(2);
-
-    // The input gradients are computed by a convolution of the output
-    // gradients and the filter, with some appropriate padding. See the
-    // comment at the top of conv_grad_ops.h for details.
-
-    xla::ConvolutionDimensionNumbers dnums;
-    dnums.set_input_batch_dimension(batch_dim);
-    dnums.set_output_batch_dimension(batch_dim);
-    dnums.set_input_feature_dimension(feature_dim);
-    dnums.set_output_feature_dimension(feature_dim);
-
-    // TF filter shape is [ H, W, ..., inC, outC ]
-    // Transpose the input and output features for computing the gradient.
-    dnums.set_kernel_input_feature_dimension(num_spatial_dims_ + 1);
-    dnums.set_kernel_output_feature_dimension(num_spatial_dims_);
-
-    std::vector<int64> kernel_spatial_dims(num_spatial_dims_);
-    std::vector<std::pair<int64, int64>> padding(num_spatial_dims_);
-    std::vector<int64> lhs_dilation(num_spatial_dims_);
-    std::vector<int64> rhs_dilation(num_spatial_dims_);
-    std::vector<int64> ones(num_spatial_dims_, 1);
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      dnums.add_input_spatial_dimensions(dim);
-      dnums.add_kernel_spatial_dimensions(i);
-      dnums.add_output_spatial_dimensions(dim);
-
-      kernel_spatial_dims[i] = i;
-      padding[i] = {dims.spatial_dims[i].pad_before,
-                    dims.spatial_dims[i].pad_after};
-      lhs_dilation[i] = dims.spatial_dims[i].stride;
-      rhs_dilation[i] = dilations_[dim];
-    }
-
-    // Mirror the filter in the spatial dimensions.
-    xla::XlaOp mirrored_weights = xla::Rev(filter, kernel_spatial_dims);
-
-    // activation gradients
-    //   = gradients (with padding and dilation) <conv> mirrored_weights
-    xla::XlaOp in_backprop = xla::ConvGeneralDilated(
-        out_backprop, mirrored_weights, /*window_strides=*/ones, padding,
-        lhs_dilation, rhs_dilation, dnums,
-        /*feature_group_count=*/
-        depthwise_ ? out_backprop_shape.dim_size(feature_dim) /
-                         filter_shape.dim_size(num_spatial_dims_ + 1)
-                   : 1);
-
-    ctx->SetOutput(0, in_backprop);
+    TensorShape input_tensor_shape;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &input_tensor_shape));
+    xla::Shape input_shape =
+        TensorShapeToXLAShape(ctx->input_xla_type(1), input_tensor_shape);
+
+    xla::StatusOr<xla::XlaOp> in_backprop =
+        MakeXlaBackpropInputConvOp(ctx->op_kernel().type_string(), input_shape,
+                                   ctx->Input(1), ctx->Input(2), attrs_);
+    OP_REQUIRES_OK(ctx, in_backprop.status());
+    ctx->SetOutput(0, in_backprop.ValueOrDie());
   }
 
  protected:
-  const int num_spatial_dims_;
-  const bool depthwise_;
-  std::vector<int32> dilations_;
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_ = FORMAT_NHWC;
+  ConvOpAttrs attrs_;
 
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(ConvBackpropInputOp);
@@ -462,172 +149,28 @@ class ConvBackpropFilterOp : public XlaOpKernel {
  public:
   explicit ConvBackpropFilterOp(OpKernelConstruction* ctx, int num_spatial_dims,
                                 bool depthwise)
-      : XlaOpKernel(ctx),
-        num_spatial_dims_(num_spatial_dims),
-        depthwise_(depthwise) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
-    string data_format;
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format));
-    OP_REQUIRES(ctx, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+      : XlaOpKernel(ctx) {
+    xla::StatusOr<ConvOpAttrs> attrs =
+        ConvOpAttrs::Create(num_spatial_dims, depthwise, ctx);
+    OP_REQUIRES_OK(ctx, attrs.status());
+    attrs_ = attrs.ValueOrDie();
   }
 
-  int num_dims() const { return num_spatial_dims_ + 2; }
-
   void Compile(XlaOpKernelContext* ctx) override {
-    const int n_dim = GetTensorBatchDimIndex(num_dims(), data_format_);
-    const int c_dim = GetTensorFeatureDimIndex(num_dims(), data_format_);
-
-    OP_REQUIRES(
-        ctx, (strides_[n_dim] == 1 && strides_[c_dim] == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-
-    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
-                errors::InvalidArgument("Dilations field must "
-                                        "specify ",
-                                        num_dims(), " dimensions"));
-    OP_REQUIRES(
-        ctx, dilations_[n_dim] == 1 && dilations_[c_dim] == 1,
-        errors::Unimplemented("Current implementation does not support "
-                              "dilations in the batch and depth dimensions."));
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      OP_REQUIRES(ctx, dilations_[input_dim] >= 1,
-                  errors::Unimplemented("Dilation values must be positive; ", i,
-                                        "th spatial dimension had dilation ",
-                                        dilations_[input_dim]));
-    }
-
-    const TensorShape activations_shape = ctx->InputShape(0);
-    TensorShape filter_shape;
-    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(1, &filter_shape));
-    const TensorShape out_backprop_shape = ctx->InputShape(2);
-
-    const TensorShape expanded_filter_shape =
-        depthwise_ ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape)
-                   : filter_shape;
-
-    // Reuse dimension computation logic from conv_grad_ops.cc.
-    ConvBackpropDimensions dims;
-    OP_REQUIRES_OK(ctx,
-                   ConvBackpropComputeDimensionsV2(
-                       type_string(), num_spatial_dims_, activations_shape,
-                       expanded_filter_shape, out_backprop_shape, dilations_,
-                       strides_, padding_, data_format_, &dims));
-
-    xla::XlaBuilder* b = ctx->builder();
-    xla::XlaOp activations = ctx->Input(0);
-    xla::XlaOp gradients = ctx->Input(2);
-
-    // The filter gradients are computed by a convolution of the input
-    // activations and the output gradients, with some appropriate padding.
-    // See the comment at the top of conv_grad_ops.h for details.
-
-    xla::ConvolutionDimensionNumbers dnums;
-
-    // The activations (inputs) form the LHS of the convolution.
-    // Activations have shape: [batch, in_rows, in_cols, ..., in_depth]
-    // For the gradient computation, we flip the roles of the batch and
-    // feature dimensions.
-    // Each spatial entry has size in_depth * batch
-
-    // Swap n_dim and c_dim in the activations.
-    dnums.set_input_batch_dimension(c_dim);
-    dnums.set_input_feature_dimension(n_dim);
-
-    // The gradients become the RHS of the convolution.
-    // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
-    // where the batch becomes the input feature for the convolution.
-    dnums.set_kernel_input_feature_dimension(n_dim);
-    dnums.set_kernel_output_feature_dimension(c_dim);
-
-    std::vector<std::pair<int64, int64>> padding(num_spatial_dims_);
-    std::vector<int64> rhs_dilation(num_spatial_dims_);
-    std::vector<int64> window_strides(num_spatial_dims_);
-    std::vector<int64> ones(num_spatial_dims_, 1);
-
-    // Tensorflow filter shape is [ H, W, ..., inC, outC ].
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      dnums.add_output_spatial_dimensions(i);
-    }
-    dnums.set_output_batch_dimension(num_spatial_dims_);
-    dnums.set_output_feature_dimension(num_spatial_dims_ + 1);
-
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      dnums.add_input_spatial_dimensions(dim);
-      dnums.add_kernel_spatial_dimensions(dim);
-
-      // We will also need to pad the input with zeros such that after the
-      // convolution, we get the right size for the filter.
-      // The padded_in_rows should be such that when we convolve this with the
-      // expanded_out_rows as a filter, we should get filter_rows back.
-      //
-      const int64 padded_in_size =
-          dims.spatial_dims[i].expanded_output_size +
-          (dims.spatial_dims[i].filter_size - 1) * dilations_[dim];
-
-      // However it can be smaller than input_rows: in this
-      // case it means some of the inputs are not used.
-      //
-      // An example is to have input_cols = 3, filter_cols = 2 and stride = 2:
-      //
-      // INPUT =  [ A  B  C ]
-      //
-      // FILTER = [ x y ]
-      //
-      // and the output will only have one column: a = A * x + B * y
-      //
-      // and input "C" is not used at all.
-      //
-      // We apply negative padding in this case.
-      const int64 pad_total = padded_in_size - dims.spatial_dims[i].input_size;
-
-      // + For the VALID padding, we don't pad anything on the top/left side
-      //   and pad the bottom/right side with the remaining space.
-      // + For the SAME padding, we pad top/left side the same as bottom/right
-      //   side.
-      //
-      // In addition, if the padded input size is smaller than the input size,
-      // we need to ignore some training elements of the input. We do this by
-      // applying negative padding on the right/bottom.
-      const int64 pad_before =
-          padding_ == Padding::SAME ? std::max<int64>(pad_total / 2, 0) : 0;
-
-      padding[i] = {pad_before, pad_total - pad_before};
-      rhs_dilation[i] = dims.spatial_dims[i].stride;
-      window_strides[i] = dilations_[dim];
-    }
-
-    // Besides padding the input, we will also expand output_rows to
-    //    expanded_out_rows = (output_rows - 1) * stride + 1
-    // with zeros in between:
-    //
-    //      a . . . b . . . c . . . d . . . e
-    //
-    // This is done by specifying the window dilation factors in the
-    // convolution HLO below.
-    auto filter_backprop =
-        xla::ConvGeneralDilated(activations, gradients, window_strides, padding,
-                                /*lhs_dilation=*/ones, rhs_dilation, dnums);
-
-    if (depthwise_) {
-      filter_backprop = ContractFilterForDepthwiseBackprop(
-          ctx, filter_shape, ctx->input_type(0), filter_backprop, b);
-    }
-    ctx->SetOutput(0, filter_backprop);
+    TensorShape filter_tensor_shape;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(1, &filter_tensor_shape));
+    xla::Shape filter_shape =
+        TensorShapeToXLAShape(ctx->input_xla_type(0), filter_tensor_shape);
+
+    xla::StatusOr<xla::XlaOp> filter_backprop = MakeXlaBackpropFilterConvOp(
+        ctx->op_kernel().type_string(), ctx->Input(0), filter_shape,
+        ctx->Input(2), attrs_);
+    OP_REQUIRES_OK(ctx, filter_backprop.status());
+    ctx->SetOutput(0, filter_backprop.ValueOrDie());
   }
 
  protected:
-  const int num_spatial_dims_;
-  const bool depthwise_;
-  std::vector<int32> dilations_;
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_ = FORMAT_NHWC;
+  ConvOpAttrs attrs_;
 
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(ConvBackpropFilterOp);
diff --git a/tensorflow/compiler/tf2xla/shape_util.cc b/tensorflow/compiler/tf2xla/shape_util.cc
index 9d1992205b..b589512dcd 100644
--- a/tensorflow/compiler/tf2xla/shape_util.cc
+++ b/tensorflow/compiler/tf2xla/shape_util.cc
@@ -41,6 +41,14 @@ Status XLAShapeToTensorShape(const xla::Shape& shape,
 // Convert a TensorShape into the equivalent XLA Shape proto.
 Status TensorShapeToXLAShape(DataType dtype, const TensorShape& tensor_shape,
                              xla::Shape* shape) {
+  xla::PrimitiveType type;
+  TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(dtype, &type));
+  *shape = TensorShapeToXLAShape(type, tensor_shape);
+  return Status::OK();
+}
+
+xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type,
+                                 const TensorShape& tensor_shape) {
   int rank = tensor_shape.dims();
   std::vector<int64> dimensions(rank);
   std::vector<int64> layout(rank);
@@ -50,11 +58,7 @@ Status TensorShapeToXLAShape(DataType dtype, const TensorShape& tensor_shape,
   // XLA uses minor-to-major; Tensorflow uses major-to-minor.
   std::iota(layout.rbegin(), layout.rend(), 0);
 
-  xla::PrimitiveType type;
-  TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(dtype, &type));
-
-  *shape = xla::ShapeUtil::MakeShapeWithLayout(type, dimensions, layout);
-  return Status::OK();
+  return xla::ShapeUtil::MakeShapeWithLayout(type, dimensions, layout);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/shape_util.h b/tensorflow/compiler/tf2xla/shape_util.h
index 58240b9c96..f7e34a5b40 100644
--- a/tensorflow/compiler/tf2xla/shape_util.h
+++ b/tensorflow/compiler/tf2xla/shape_util.h
@@ -35,6 +35,11 @@ Status XLAShapeToTensorShape(const xla::Shape& shape,
 Status TensorShapeToXLAShape(DataType dtype, const TensorShape& tensor_shape,
                              xla::Shape* shape);
 
+// Converts a TensorShape into the equivalent XLA Shape proto, taking an
+// xla::PrimitiveType to specify the element type.  This never fails.
+xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type,
+                                 const TensorShape& tensor_shape);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_TF2XLA_SHAPE_UTIL_H_
-- 
GitLab


From 0cf3690400e46bd89b48a206eff8dd08a660aced Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Sep 2018 02:04:31 -0700
Subject: [PATCH 0321/1357] compat: Update forward compatibility horizon to
 2018-09-18

PiperOrigin-RevId: 213414462
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 0d2f2c9b9e..157e699604 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 17)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 18)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From c6a060c83cc56c8c0cc0f1105550def4bff93c0d Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 18 Sep 2018 05:22:55 -0700
Subject: [PATCH 0322/1357] Simplify the interface of conversion_call to allow
 a ConversionOptions object that can be more easily extended. Currently any
 new argument needs changing a lot of call sites and there is redundancy in
 argument documentation.

Note: this does not modify the public symbols yet - it's not clear whether we want to complicate their interface. However we may want to use it in to_graph and to_code.
PiperOrigin-RevId: 213433379
---
 tensorflow/python/autograph/__init__.py       |  2 +
 .../python/autograph/converters/call_trees.py | 11 ++-
 .../autograph/core/converter_testing.py       | 12 ++-
 tensorflow/python/autograph/impl/api.py       | 83 +++++++++++++++----
 tensorflow/python/autograph/impl/api_test.py  | 24 +++---
 .../python/autograph/impl/conversion.py       |  1 +
 6 files changed, 102 insertions(+), 31 deletions(-)

diff --git a/tensorflow/python/autograph/__init__.py b/tensorflow/python/autograph/__init__.py
index c3448e6e58..5ed5e85158 100644
--- a/tensorflow/python/autograph/__init__.py
+++ b/tensorflow/python/autograph/__init__.py
@@ -27,6 +27,7 @@ from tensorflow.python.autograph import utils
 from tensorflow.python.autograph.core.errors import GraphConstructionError
 from tensorflow.python.autograph.core.errors import TfRuntimeError
 from tensorflow.python.autograph.core.errors import improved_errors
+from tensorflow.python.autograph.impl.api import ConversionOptions
 from tensorflow.python.autograph.impl.api import RunMode
 from tensorflow.python.autograph.impl.api import convert
 from tensorflow.python.autograph.impl.api import converted_call
@@ -42,6 +43,7 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     # Main API
+    'ConversionOptions',
     'RunMode',
     'convert',
     'converted_call',
diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index 6a606c450d..fc2075b781 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -238,9 +238,16 @@ class CallTreeTransformer(converter.Base):
     # Before we could convert all the time though, we'd need a reasonable
     # caching mechanism.
     template = """
-      ag__.converted_call(func, True, False, False, {}, args)
+      ag__.converted_call(
+          func,
+          ag__.ConversionOptions.new(recursive=recursive_val),
+          args)
     """
-    call_expr = templates.replace(template, func=node.func, args=node.args)
+    call_expr = templates.replace(
+        template,
+        func=node.func,
+        recursive_val=parser.parse_expression(str(self.ctx.program.recursive)),
+        args=node.args)
     new_call = call_expr[0].value
     # TODO(mdan): Improve the template mechanism to better support this.
     new_call.keywords = node.keywords
diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py
index 0a0c6f9002..7ce1b7c4c5 100644
--- a/tensorflow/python/autograph/core/converter_testing.py
+++ b/tensorflow/python/autograph/core/converter_testing.py
@@ -93,11 +93,21 @@ class TestCase(test.TestCase):
       self.dynamic_calls.append(args)
       return 7
 
+    class ConversionOptions(object):
+      """Mock version of api.ConversionOptions."""
+
+      def __init__(self, recursive):
+        self.recursive = recursive
+
+      @classmethod
+      def new(cls, recursive):
+        cls(recursive)
+
     try:
       result, source = compiler.ast_to_object(node, include_source_map=True)
 
       result.tf = self.make_fake_mod('fake_tf', *symbols)
-      fake_ag = self.make_fake_mod('fake_ag', converted_call)
+      fake_ag = self.make_fake_mod('fake_ag', converted_call, ConversionOptions)
       fake_ag.__dict__.update(operators.__dict__)
       fake_ag.__dict__['utils'] = utils
       fake_ag.__dict__['rewrite_graph_construction_error'] = (
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index 669d36bd28..ee2467e0dc 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -18,7 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from functools import wraps
+import collections
+import functools
 
 from enum import Enum
 
@@ -38,6 +39,41 @@ from tensorflow.python.util import tf_inspect
 # (currently we require (module + class name, type))
 
 
+class ConversionOptions(
+    collections.namedtuple('ConversionOptions',
+                           ('recursive', 'verbose', 'strip_decorators',
+                            'force_conversion', 'arg_types'))):
+  """Container for conversion flags.
+
+  Attributes:
+    recursive: bool, whether to recursively convert any user functions or
+        classes that the converted function may use.
+    verbose: bool, whether to log the compiled code.
+    strip_decorators: Tuple[Callable], contains decorators that should be in
+        excluded from the compiled output. By default, when converting a
+        function before the decorators are applied, the compiled output will
+        include those decorators.
+    force_conversion: bool, whether to force convertinng the target entity.
+        When force_conversion is turned off, the converter may decide to
+        return the function as-is.
+    arg_types: Optional[Dict[Text, Type]], type hints for symbols including
+        function arguments.
+  """
+
+  @classmethod
+  def new(cls,
+          recursive=False,
+          verbose=False,
+          strip_decorators=None,
+          force_conversion=False,
+          arg_types=None):
+    return cls(recursive=recursive,
+               verbose=verbose,
+               strip_decorators=strip_decorators or (),
+               force_conversion=force_conversion,
+               arg_types=arg_types or {})
+
+
 # TODO(mdan): This should behave like to_graph (e.g. convert statically).
 def convert(recursive=False, verbose=False):
   """Decorator that compiles a function to use TensorFlow ops.
@@ -59,9 +95,15 @@ def convert(recursive=False, verbose=False):
   def decorator(f):
     """Decorator implementation."""
 
-    @wraps(f)
+    @functools.wraps(f)
     def wrapper(*args, **kwargs):
-      return converted_call(f, recursive, verbose, True, {}, *args, **kwargs)
+      return converted_call(
+          f,
+          ConversionOptions.new(
+              recursive=recursive,
+              verbose=verbose,
+              force_conversion=True,
+          ), *args, **kwargs)
 
     wrapper = tf_decorator.make_decorator(f, wrapper)
 
@@ -107,11 +149,11 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
   def decorator(f):
     """Decorator implementation."""
 
-    @wraps(f)
+    @functools.wraps(f)
     def graph_wrapper(*args, **kwargs):
       return f(*args, **kwargs)
 
-    @wraps(f)
+    @functools.wraps(f)
     def py_func_wrapper(*args, **kwargs):
       if kwargs:
         raise NotImplementedError('RunMode.PY_FUNC does not yet support kwargs')
@@ -135,12 +177,11 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
 
 
 # TODO(mdan): Move to a private, undocumented module.
-def converted_call(f, recursive, verbose, force_conversion, arg_types, *args,
-                   **kwargs):
+def converted_call(f, options, *args, **kwargs):
   """Compiles a function call inline. For internal use only."""
   # TODO(mdan): This needs cleanup.
   # In particular, we may want to avoid renaming functions altogether.
-  if not force_conversion and conversion.is_whitelisted_for_graph(f):
+  if not options.force_conversion and conversion.is_whitelisted_for_graph(f):
     return f(*args, **kwargs)
 
   unknown_arg_value = object()  # Sentinel for arguments of unknown value
@@ -183,8 +224,8 @@ def converted_call(f, recursive, verbose, force_conversion, arg_types, *args,
       continue
     arg_class = arg.__class__
     # If arg_value_hints specifies any name, use that instead.
-    if name not in arg_types:
-      arg_types[name] = (arg_class.__name__, arg_class)
+    if name not in options.arg_types:
+      options.arg_types[name] = (arg_class.__name__, arg_class)
 
   # When called from within a decorator, this is the only indication that
   # the function is a method - it appears that the decorator is applied
@@ -199,23 +240,25 @@ def converted_call(f, recursive, verbose, force_conversion, arg_types, *args,
 
   converted_f = to_graph(
       target_entity,
-      recursive=recursive,
-      verbose=verbose,
+      recursive=options.recursive,
+      verbose=options.verbose,
       arg_values=arg_values,
-      arg_types=arg_types,
-      partial_types=partial_types)
+      arg_types=options.arg_types,
+      partial_types=partial_types,
+      strip_decorators=options.strip_decorators)
   return converted_f(*effective_args, **kwargs)
 
 
 # TODO(mdan): Rename: to_ops?
-# TODO(mdan): Looki into overloading as function and decorator, like tfe.defun.
+# TODO(mdan): Look into overloading as function and decorator, like tfe.defun?
 # TODO(mdan): Remove partial_types.
 def to_graph(e,
              recursive=True,
              verbose=False,
              arg_values=None,
              arg_types=None,
-             partial_types=None):
+             partial_types=None,
+             strip_decorators=None):
   """Converts a Python entity into equivalent code that uses TensorFlow ops.
 
   Supported Python entities include:
@@ -234,6 +277,8 @@ def to_graph(e,
     arg_types: Optional[Dict[Text, Type]], type hints for symbols including
         function arguments.
     partial_types: Set[Type], reserved for internal use.
+    strip_decorators: Tuple[Callable], same as
+        ConversionOptions.strip_decorators.
 
   Returns:
     Union[Callable, Type], the converted entity, which is the same kind as e
@@ -243,9 +288,13 @@ def to_graph(e,
   Raises:
     ValueError: If the entity could not be converted.
   """
+  if strip_decorators is None:
+    strip_decorators = ()
+  strip_decorators += (convert, do_not_convert, converted_call)
+
   program_ctx = converter.ProgramContext(
       recursive=recursive,
-      autograph_decorators=(convert, do_not_convert, converted_call),
+      autograph_decorators=strip_decorators,
       partial_types=partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py
index 54e12f0223..e0770ef4c6 100644
--- a/tensorflow/python/autograph/impl/api_test.py
+++ b/tensorflow/python/autograph/impl/api_test.py
@@ -32,7 +32,6 @@ from tensorflow.python.util import tf_inspect
 
 tf = utils.fake_tf()
 
-
 class ApiTest(test.TestCase):
 
   def setUp(self):
@@ -180,8 +179,9 @@ class ApiTest(test.TestCase):
       @api.convert(recursive=True)
       def test_method(self, x, s, a):
         while tf.reduce_sum(x) > s:
-          x //= api.converted_call(self.called_member, False, False, False, {},
-                                   self, a)
+          x //= api.converted_call(
+              self.called_member,
+              api.ConversionOptions.new(), self, a)
         return x
 
     tc = TestClass()
@@ -192,7 +192,7 @@ class ApiTest(test.TestCase):
       self.assertListEqual([0, 1], sess.run(x).tolist())
 
   def test_converted_call_builtin(self):
-    x = api.converted_call(range, False, False, False, {}, 3)
+    x = api.converted_call(range, api.ConversionOptions.new(), 3)
     self.assertEqual((0, 1, 2), tuple(x))
 
   def test_converted_call_function(self):
@@ -203,7 +203,7 @@ class ApiTest(test.TestCase):
       return x
 
     with self.test_session() as sess:
-      x = api.converted_call(test_fn, False, False, False, {},
+      x = api.converted_call(test_fn, api.ConversionOptions.new(),
                              constant_op.constant(-1))
       self.assertEqual(1, sess.run(x))
 
@@ -221,7 +221,7 @@ class ApiTest(test.TestCase):
 
     with self.test_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc.test_method, False, False, False, {}, tc)
+      x = api.converted_call(tc.test_method, api.ConversionOptions.new(), tc)
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_method_by_class(self):
@@ -238,7 +238,9 @@ class ApiTest(test.TestCase):
 
     with self.test_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(TestClass.test_method, False, False, False, {}, tc)
+      x = api.converted_call(
+          TestClass.test_method,
+          api.ConversionOptions.new(), tc)
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_callable_object(self):
@@ -255,7 +257,7 @@ class ApiTest(test.TestCase):
 
     with self.test_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc, False, False, False, {})
+      x = api.converted_call(tc, api.ConversionOptions.new())
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_constructor(self):
@@ -271,7 +273,7 @@ class ApiTest(test.TestCase):
         return self.x
 
     with self.test_session() as sess:
-      tc = api.converted_call(TestClass, False, False, False, {},
+      tc = api.converted_call(TestClass, api.ConversionOptions.new(),
                               constant_op.constant(-1))
       # tc is now a converted object.
       x = tc.test_method()
@@ -283,12 +285,12 @@ class ApiTest(test.TestCase):
       return x == 0
 
     with self.test_session() as sess:
-      x = api.converted_call(f, False, False, False, {},
+      x = api.converted_call(f, api.ConversionOptions.new(),
                              constant_op.constant(0))
       self.assertTrue(sess.run(x))
 
       converted_f = api.to_graph(f)
-      x = api.converted_call(converted_f, False, False, False, {},
+      x = api.converted_call(converted_f, api.ConversionOptions.new(),
                              constant_op.constant(0))
       self.assertTrue(sess.run(x))
 
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index 928ff9e7ea..a0d13c82a8 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -255,6 +255,7 @@ def _add_self_references(namespace, autograph_module):
     # internal modules.
     ag_internal = imp.new_module('autograph')
     ag_internal.converted_call = autograph_module.converted_call
+    ag_internal.ConversionOptions = autograph_module.ConversionOptions
     ag_internal.utils = utils
     ag_internal.rewrite_graph_construction_error = (
         errors.rewrite_graph_construction_error)
-- 
GitLab


From 25c99131362f034c3bc3805d741f0c4ab9d0cb8b Mon Sep 17 00:00:00 2001
From: "David G. Andersen" <dga@google.com>
Date: Tue, 18 Sep 2018 06:51:20 -0700
Subject: [PATCH 0323/1357] Add a fuzzer to test DecodeCompressed

PiperOrigin-RevId: 213441868
---
 tensorflow/core/kernels/fuzzing/BUILD         |  2 +
 .../kernels/fuzzing/decode_compressed_fuzz.cc | 45 +++++++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc

diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD
index 8bfa40304e..f2e0b2558f 100644
--- a/tensorflow/core/kernels/fuzzing/BUILD
+++ b/tensorflow/core/kernels/fuzzing/BUILD
@@ -43,4 +43,6 @@ tf_ops_fuzz_target_lib("example_proto_fast_parsing")
 
 tf_ops_fuzz_target_lib("parse_tensor_op")
 
+tf_ops_fuzz_target_lib("decode_compressed")
+
 tf_ops_fuzz_target_lib("decode_json_example")
diff --git a/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc b/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc
new file mode 100644
index 0000000000..0a56f4b63f
--- /dev/null
+++ b/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc
@@ -0,0 +1,45 @@
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/kernels/fuzzing/fuzz_session.h"
+
+namespace tensorflow {
+namespace fuzzing {
+
+class FuzzDecodeCompressed : public FuzzStringInputOp {
+  void BuildGraph(const Scope& scope) override {
+    auto input =
+        tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_STRING);
+    auto d1 = tensorflow::ops::DecodeCompressed(
+        scope.WithOpName("d1"), input,
+        tensorflow::ops::DecodeCompressed::CompressionType(""));
+    auto d2 = tensorflow::ops::DecodeCompressed(
+        scope.WithOpName("d2"), input,
+        tensorflow::ops::DecodeCompressed::CompressionType("ZLIB"));
+    auto d3 = tensorflow::ops::DecodeCompressed(
+        scope.WithOpName("d3"), input,
+        tensorflow::ops::DecodeCompressed::CompressionType("GZIP"));
+    Scope grouper =
+        scope.WithControlDependencies(std::vector<tensorflow::Operation>{
+            d1.output.op(), d2.output.op(), d3.output.op()});
+    (void)tensorflow::ops::NoOp(grouper.WithOpName("output"));
+  }
+};
+
+STANDARD_TF_FUZZ_FUNCTION(FuzzDecodeCompressed);
+
+}  // namespace fuzzing
+}  // namespace tensorflow
-- 
GitLab


From 97011c17de3f21ae7d40f89f09bf7513dc0e49aa Mon Sep 17 00:00:00 2001
From: Geoffrey Irving <irving@naml.us>
Date: Fri, 7 Sep 2018 09:01:56 -0700
Subject: [PATCH 0324/1357] Make tf.random_uniform([0], maxval=0,
 dtype=tf.int32) not crash

For integers, tf.random_uniform enforces a nonempty range with minval < maxval.
However, an empty range is fine if we're producing no output values, and
this degenerate case occurs naturally for some code patterns.

Thus, tf.random_uniform now allows empty ranges for integer random
numbers if the output shape is empty.
---
 tensorflow/core/kernels/random_op.cc                   | 10 +++++++---
 .../python/kernel_tests/random/random_ops_test.py      |  9 +++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index e37232539f..04a53697c0 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -231,7 +231,13 @@ class RandomUniformIntOp : public OpKernel {
                 errors::InvalidArgument("maxval must be 0-D, got shape ",
                                         maxval.shape().DebugString()));
 
-    // Verify that minval < maxval
+    // Allocate output, and exit early if possible
+    Tensor* output;
+    OP_REQUIRES_OK(ctx, AllocateOutputWithShape(ctx, shape, 0, &output));
+    if (output->NumElements() == 0) return;
+
+    // Verify that minval < maxval.  This check intentionally happens after the
+    // early exit for empty output.  Zero impossible things are fine.
     IntType lo = minval.scalar<IntType>()();
     IntType hi = maxval.scalar<IntType>()();
     OP_REQUIRES(
@@ -243,8 +249,6 @@ class RandomUniformIntOp : public OpKernel {
         Distribution;
     Distribution dist(lo, hi);
 
-    Tensor* output;
-    OP_REQUIRES_OK(ctx, AllocateOutputWithShape(ctx, shape, 0, &output));
     auto output_flat = output->flat<IntType>();
     functor::FillPhiloxRandom<Device, Distribution>()(
         ctx, ctx->eigen_device<Device>(),
diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py
index 0ef6a95cfc..d199a9d9dd 100644
--- a/tensorflow/python/kernel_tests/random/random_ops_test.py
+++ b/tensorflow/python/kernel_tests/random/random_ops_test.py
@@ -320,6 +320,15 @@ class RandomUniformTest(RandomOpTestCommon):
       error = np.abs(counts - mean)
       self.assertLess(error.max(), 5 * std)
 
+  # Check that minval = maxval is fine iff we're producing no numbers
+  def testUniformIntsDegenerate(self):
+    for dt in dtypes.int32, dtypes.int64:
+      def sample(n):
+        return self._Sampler(n, minv=0, maxv=0, dtype=dt, use_gpu=True)()
+      self.assertEqual(sample(0).shape, (10, 0))
+      with self.assertRaisesOpError('Need minval < maxval, got 0 >= 0'):
+        sample(1)
+
   # Checks that the CPU and GPU implementation returns the same results,
   # given the same random seed
   def testCPUGPUMatch(self):
-- 
GitLab


From 18b47f08b13c628ef87d9a99f7fde743baca5300 Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Tue, 18 Sep 2018 08:30:46 -0700
Subject: [PATCH 0325/1357] Automated rollback of commit
 19d66a950e2091bb598c6a2d375e14208f5773b2

PiperOrigin-RevId: 213453719
---
 tensorflow/contrib/tensorrt/BUILD             | 21 +------------------
 tensorflow/contrib/tensorrt/test/base_test.py |  6 ++++++
 .../test/tf_trt_integration_test_base.py      |  5 +++--
 3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 4ea7216ef2..9e8979bce4 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -444,6 +444,7 @@ cuda_py_test(
 cuda_py_tests(
     name = "tf_trt_integration_test",
     srcs = [
+        "test/base_test.py",
         "test/batch_matmul_test.py",
         "test/biasadd_matmul_test.py",
         "test/binary_tensor_weight_broadcast_test.py",
@@ -470,26 +471,6 @@ cuda_py_tests(
     ],
 )
 
-cuda_py_tests(
-    name = "base_test",
-    srcs = [
-        "test/base_test.py",
-    ],
-    additional_deps = [
-        ":tf_trt_integration_test_base",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_test_lib",
-    ],
-    tags = [
-        "manual",
-        "no_cuda_on_cpu_tap",
-        "no_gpu",
-        "no_windows",
-        "nomac",
-        "notap",
-    ],
-)
-
 cc_library(
     name = "utils",
     srcs = ["convert/utils.cc"],
diff --git a/tensorflow/contrib/tensorrt/test/base_test.py b/tensorflow/contrib/tensorrt/test/base_test.py
index e9ac833d55..7e9ffb05ab 100644
--- a/tensorflow/contrib/tensorrt/test/base_test.py
+++ b/tensorflow/contrib/tensorrt/test/base_test.py
@@ -183,6 +183,12 @@ class PartiallyConvertedTestA(trt_test.TfTrtIntegrationTestBase):
         "my_trt_op_0": ["c0", "c1", "add0", "add1", "mul0", "mul1"]
     }
 
+  def ShouldRunTest(self, run_params):
+    """Whether to run the test."""
+    # Disable the test in fp16 mode since multiple matmul and add ops together
+    # can cause overflow.
+    return run_params.precision_mode != "FP16"
+
 
 class PartiallyConvertedTestB(PartiallyConvertedTestA):
 
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
index fc647e4eb9..699f79adec 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
@@ -179,11 +179,11 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
 
   def ExpectedAbsoluteTolerance(self, run_params):
     """The absolute tolerance to compare floating point results."""
-    return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-03
+    return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-02
 
   def ExpectedRelativeTolerance(self, run_params):
     """The relative tolerance to compare floating point results."""
-    return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-03
+    return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-02
 
   def _GetParamsCached(self):
     if self._trt_test_params is None:
@@ -414,6 +414,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
     if not self.ShouldRunTest(run_params):
       return
     assert run_params.precision_mode in PRECISION_MODES
+    np.random.seed(12345)
 
     params = self._GetParamsCached()
     input_gdef = params.gdef
-- 
GitLab


From b1ff7c2cedcc7d49d430d56655870e6d68a0c8f7 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 18 Sep 2018 08:47:31 -0700
Subject: [PATCH 0326/1357] Creating an InstantiatedCapturedFunction that
 captures the instantiated state of a function to be executed, separating it
 out from the non instantiated regular state such as function name, captured
 inputs etc.

This allows us to truly separate Dataset kernel creation from Iterator creation i.e. each time a dataset is created that uses functions, we create only a CapturedFunction whereas we create an InstantiatedCapturedFunction each time a new iterator is created.

PiperOrigin-RevId: 213456128
---
 .../core/kernels/data/captured_function.cc    | 206 ++++++++----------
 .../core/kernels/data/captured_function.h     | 129 ++++++-----
 tensorflow/core/kernels/data/dataset_utils.cc |   9 +-
 tensorflow/core/kernels/data/dataset_utils.h  |   5 +-
 .../core/kernels/data/filter_dataset_op.cc    |  33 +--
 .../core/kernels/data/flat_map_dataset_op.cc  |   7 +-
 .../core/kernels/data/generator_dataset_op.cc |  23 +-
 .../data/group_by_reducer_dataset_op.cc       |  31 +--
 .../data/group_by_window_dataset_op.cc        |  25 ++-
 .../kernels/data/interleave_dataset_op.cc     |   8 +-
 .../kernels/data/map_and_batch_dataset_op.cc  |   6 +-
 .../core/kernels/data/map_dataset_op.cc       |   6 +-
 .../data/parallel_interleave_dataset_op.cc    |  16 +-
 .../kernels/data/parallel_map_dataset_op.cc   |  57 +++--
 .../kernels/data/parallel_map_iterator.cc     |  37 +---
 .../core/kernels/data/parallel_map_iterator.h |  44 ++--
 .../kernels/data/parse_example_dataset_op.cc  | 185 +++++++++-------
 .../core/kernels/data/scan_dataset_op.cc      |   8 +-
 18 files changed, 440 insertions(+), 395 deletions(-)

diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index b3ab7e2bc6..96ae8e16d5 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -46,10 +46,36 @@ Status CapturedFunction::Create(
   return Status::OK();
 }
 
-CapturedFunction::~CapturedFunction() {
-  if (lib_ != nullptr && f_handle_ != kInvalidHandle) {
-    lib_->ReleaseHandle(f_handle_).IgnoreError();
+Status CapturedFunction::Instantiate(
+    IteratorContext* ctx, std::unique_ptr<InstantiatedCapturedFunction>*
+                              instantiated_captured_function) {
+  // The context's runtime will be used for all subsequent calls.
+  FunctionLibraryRuntime* lib = ctx->lib();
+  FunctionLibraryRuntime::InstantiateOptions inst_opts;
+  inst_opts.overlay_lib = ctx->function_library().get();
+  inst_opts.state_handle = std::to_string(random::New64());
+  inst_opts.create_kernels_eagerly = true;
+  if (!use_inter_op_parallelism_) {
+    inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR";
+  }
+
+  FunctionLibraryRuntime::Handle f_handle;
+  Status s = (lib->Instantiate(func_.name(), AttrSlice(&func_.attr()),
+                               inst_opts, &f_handle));
+  TF_RETURN_IF_ERROR(s);
+  const FunctionBody* fbody = lib->GetFunctionBody(f_handle);
+  if (fbody == nullptr) {
+    return errors::Internal("Failed to instantiate function body.");
   }
+
+  DataTypeVector ret_types;
+  for (const auto& ret_type : fbody->ret_types) {
+    ret_types.push_back(ret_type);
+  }
+
+  instantiated_captured_function->reset(new InstantiatedCapturedFunction(
+      lib, f_handle, std::move(ret_types), *ctx->runner(), this));
+  return Status::OK();
 }
 
 namespace {
@@ -172,35 +198,34 @@ class BorrowedArgsCallFrame : public CallFrameBase {
 
 }  // namespace
 
-Status CapturedFunction::GetHandle(IteratorContext* ctx,
-                                   FunctionLibraryRuntime::Handle* out_handle) {
-  tf_shared_lock l(mu_);
-  if (lib_ == nullptr) {
-    return errors::Internal("Captured function \"", func_.name(),
-                            "\" was called before it was instantiated.");
-  }
-  if (ctx->lib() != lib_) {
-    return errors::Internal("Captured function \"", func_.name(),
-                            "\" was called with a different "
-                            "FunctionLibraryRuntime*, which is not permitted.");
+InstantiatedCapturedFunction::InstantiatedCapturedFunction(
+    FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
+    DataTypeVector ret_types, std::function<void(std::function<void()>)> runner,
+    CapturedFunction* captured_func)
+    : lib_(lib),
+      f_handle_(f_handle),
+      ret_types_(std::move(ret_types)),
+      captured_runner_(std::move(runner)),
+      captured_func_(captured_func) {}
+
+InstantiatedCapturedFunction::~InstantiatedCapturedFunction() {
+  if (lib_ != nullptr && f_handle_ != kInvalidHandle) {
+    lib_->ReleaseHandle(f_handle_).IgnoreError();
   }
-  *out_handle = f_handle_;
-  return Status::OK();
 }
 
-Status CapturedFunction::Run(IteratorContext* ctx, std::vector<Tensor>&& args,
-                             std::vector<Tensor>* rets) {
-  FunctionLibraryRuntime::Handle handle;
-  TF_RETURN_IF_ERROR(GetHandle(ctx, &handle));
-
+Status InstantiatedCapturedFunction::Run(IteratorContext* ctx,
+                                         std::vector<Tensor>&& args,
+                                         std::vector<Tensor>* rets) const {
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = CapturedFunction::generate_step_id();
-  ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) {
-    ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError();
-  });
+  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(
+      f_opts.step_id, [this](const string& name) {
+        lib_->device()->resource_manager()->Cleanup(name).IgnoreError();
+      });
   f_opts.step_container = &step_container;
   f_opts.runner = ctx->runner();
-  if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
+  if (lib_->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -212,10 +237,11 @@ Status CapturedFunction::Run(IteratorContext* ctx, std::vector<Tensor>&& args,
   CancellationManager c_mgr;
   f_opts.cancellation_manager = &c_mgr;
 
-  OwnedArgsCallFrame frame(std::move(args), &captured_inputs_, ret_types_);
+  OwnedArgsCallFrame frame(std::move(args), &captured_func_->captured_inputs(),
+                           ret_types_);
   Notification n;
   Status s;
-  ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
+  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -224,20 +250,18 @@ Status CapturedFunction::Run(IteratorContext* ctx, std::vector<Tensor>&& args,
   return frame.ConsumeRetvals(rets);
 }
 
-Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx,
-                                             const std::vector<Tensor>& args,
-                                             std::vector<Tensor>* rets) {
-  FunctionLibraryRuntime::Handle handle;
-  TF_RETURN_IF_ERROR(GetHandle(ctx, &handle));
-
+Status InstantiatedCapturedFunction::RunWithBorrowedArgs(
+    IteratorContext* ctx, const std::vector<Tensor>& args,
+    std::vector<Tensor>* rets) const {
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = CapturedFunction::generate_step_id();
-  ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) {
-    ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError();
-  });
+  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(
+      f_opts.step_id, [this](const string& name) {
+        lib_->device()->resource_manager()->Cleanup(name).IgnoreError();
+      });
   f_opts.step_container = &step_container;
   f_opts.runner = ctx->runner();
-  if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
+  if (lib_->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -249,11 +273,12 @@ Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx,
   CancellationManager c_mgr;
   f_opts.cancellation_manager = &c_mgr;
 
-  BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_);
+  BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(),
+                              ret_types_);
   Notification n;
   Status s;
 
-  ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
+  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -262,65 +287,17 @@ Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx,
   return frame.ConsumeRetvals(rets);
 }
 
-Status CapturedFunction::Instantiate(IteratorContext* ctx) {
-  mutex_lock l(mu_);
-  if (lib_ == nullptr) {
-    // The context's runtime will be used for all subsequent calls.
-    lib_ = ctx->lib();
-    DCHECK(f_handle_ == kInvalidHandle);
-    FunctionLibraryRuntime::InstantiateOptions inst_opts;
-    inst_opts.overlay_lib = ctx->function_library().get();
-    inst_opts.state_handle = std::to_string(random::New64());
-    inst_opts.create_kernels_eagerly = true;
-    if (!use_inter_op_parallelism_) {
-      inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR";
-    }
-    Status s = (lib_->Instantiate(func_.name(), AttrSlice(&func_.attr()),
-                                  inst_opts, &f_handle_));
-    TF_RETURN_IF_ERROR(s);
-    const FunctionBody* fbody = lib_->GetFunctionBody(f_handle_);
-    if (fbody == nullptr) {
-      return errors::Internal("Failed to instantiate function body.");
-    }
-    ret_types_ = fbody->ret_types;
-  } else {
-    if (ctx->lib() != lib_) {
-      return errors::Internal(
-          "Captured function was called with a different "
-          "FunctionLibraryRuntime*, which is not permitted.");
-    }
-  }
-  if (captured_runner_ == nullptr) {
-    captured_runner_ = *ctx->runner();
-  }
-  return Status::OK();
-}
-
-Status CapturedFunction::RunInstantiated(const std::vector<Tensor>& args,
-                                         std::vector<Tensor>* rets) {
-  FunctionLibraryRuntime* lib;
-  FunctionLibraryRuntime::Handle handle;
-  std::function<void(std::function<void()>)>* runner;
-  {
-    tf_shared_lock l(mu_);
-    if (lib_ == nullptr) {
-      return errors::FailedPrecondition(
-          "`CapturedFunction::Instantiate()` must be called before a call to "
-          "`CapturedFunction::RunInstantiated()`.");
-    }
-    lib = lib_;
-    handle = f_handle_;
-    runner = &captured_runner_;
-  }
-
+Status InstantiatedCapturedFunction::RunInstantiated(
+    const std::vector<Tensor>& args, std::vector<Tensor>* rets) {
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = CapturedFunction::generate_step_id();
-  ScopedStepContainer step_container(f_opts.step_id, [lib](const string& name) {
-    lib->device()->resource_manager()->Cleanup(name).IgnoreError();
-  });
+  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(
+      f_opts.step_id, [this](const string& name) {
+        lib_->device()->resource_manager()->Cleanup(name).IgnoreError();
+      });
   f_opts.step_container = &step_container;
-  f_opts.runner = runner;
-  if (lib->device()->device_type() != DEVICE_CPU) {
+  f_opts.runner = &captured_runner_;
+  if (lib_->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -332,11 +309,12 @@ Status CapturedFunction::RunInstantiated(const std::vector<Tensor>& args,
   CancellationManager c_mgr;
   f_opts.cancellation_manager = &c_mgr;
 
-  BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_);
+  BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(),
+                              ret_types_);
   Notification n;
   Status s;
 
-  lib->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
+  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -345,33 +323,25 @@ Status CapturedFunction::RunInstantiated(const std::vector<Tensor>& args,
   return frame.ConsumeRetvals(rets);
 }
 
-void CapturedFunction::RunAsync(IteratorContext* ctx,
-                                std::vector<Tensor>&& args,
-                                std::vector<Tensor>* rets,
-                                FunctionLibraryRuntime::DoneCallback done,
-                                const string& prefix) {
+void InstantiatedCapturedFunction::RunAsync(
+    IteratorContext* ctx, std::vector<Tensor>&& args, std::vector<Tensor>* rets,
+    FunctionLibraryRuntime::DoneCallback done, const string& prefix) const {
   // NOTE(mrry): This method does not transfer ownership of `ctx`, and it may
   // be deleted before `done` is called. Take care not to capture `ctx` in any
   // code that may execute asynchronously in this function.
-  FunctionLibraryRuntime::Handle handle;
-  Status s = GetHandle(ctx, &handle);
-  if (!s.ok()) {
-    done(s);
-    return;
-  }
-  auto frame =
-      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_);
+  auto frame = new OwnedArgsCallFrame(
+      std::move(args), &captured_func_->captured_inputs(), ret_types_);
 
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = CapturedFunction::generate_step_id();
-  ResourceMgr* resource_mgr = ctx->lib()->device()->resource_manager();
+  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
+  ResourceMgr* resource_mgr = lib_->device()->resource_manager();
   auto step_container = new ScopedStepContainer(
       f_opts.step_id, [resource_mgr](const string& name) {
         resource_mgr->Cleanup(name).IgnoreError();
       });
   f_opts.step_container = step_container;
   f_opts.runner = ctx->runner();
-  if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
+  if (lib_->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -426,15 +396,13 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
       },
       std::move(done), std::placeholders::_1);
 
-  ctx->lib()->Run(f_opts, handle, frame, std::move(callback));
+  lib_->Run(f_opts, f_handle_, frame, std::move(callback));
 }
 
 CapturedFunction::CapturedFunction(const NameAttrList& func,
                                    std::vector<Tensor> captured_inputs,
                                    bool use_inter_op_parallelism)
     : func_(func),
-      lib_(nullptr),
-      f_handle_(kInvalidHandle),
       captured_inputs_(std::move(captured_inputs)),
       use_inter_op_parallelism_(use_inter_op_parallelism) {}
 
diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h
index a10376bf97..1b10725082 100644
--- a/tensorflow/core/kernels/data/captured_function.h
+++ b/tensorflow/core/kernels/data/captured_function.h
@@ -34,59 +34,41 @@ class ResourceMgr;
 
 namespace data {
 
-// A `CapturedFunction` encapsulates a TensorFlow function and all of
-// the runtime support required to execute it.
+class CapturedFunction;
+
+// An InstantiatedCapturedFunction encapsulates all the runtime support needed
+// to execute a tensorflow function.
 //
-// The `Dataset`-related classes use `CapturedFunction` to execute
-// TensorFlow functions outside a the normal `OpKernel::Compute()`
-// context.
-class CapturedFunction {
+// While CapturedFunction (below) encapsulates the more permanent attributes
+// of the function i.e. name, captured arguments etc.,
+// InstantiatedCapturedFunction encapsulates the more runtime aspects i.e.
+// FunctionLibraryRuntime, function handle etc.
+//
+// The `Iterator-`related classes use `InstantiatedCapturedFunction` to execute
+// functions.
+class InstantiatedCapturedFunction {
  public:
-  // Creates a new instance using a list of named attributes, fetching captured
-  // inputs from a context argument.
-  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
-                       const string& argument,
-                       std::unique_ptr<CapturedFunction>* out_function);
+  ~InstantiatedCapturedFunction();
 
-  // Creates a new instance using a list of named attributes, fetching captured
-  // inputs from a context argument.
-  //
-  // If `use_inter_op_parallelism` is false, the runtime may use an executor
-  // that is optimized for small functions.
-  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
-                       const string& argument, bool use_inter_op_parallelism,
-                       std::unique_ptr<CapturedFunction>* out_function);
-
-  ~CapturedFunction();
-
-  // Runs the "Captured function" using the given FLR and caches the lib and
-  // handle generated during instantiation. If Run is called with a different
-  // lib afterwards, generates an error. This method takes ownership of the
-  // tensors in `args`, in order to be able to deallocate them as early as
+  // Runs the "Instantiated Captured function". This method takes ownership of
+  // the tensors in `args`, in order to be able to deallocate them as early as
   // possible. Use `RunWithBorrowedArgs()` if the caller needs to retain
   // ownership of the `args`.
   Status Run(IteratorContext* ctx, std::vector<Tensor>&& args,
-             std::vector<Tensor>* rets);
+             std::vector<Tensor>* rets) const;
 
   // Synchronously runs the captured function on the given `args`, and stores
   // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
   // possible.
   Status RunWithBorrowedArgs(IteratorContext* ctx,
                              const std::vector<Tensor>& args,
-                             std::vector<Tensor>* rets);
-
-  // Explicitly instantiate this function for use in the given
-  // context. This method, and the context-less overload
-  // `RunInstantiated()` below can be useful for calling a captured
-  // function in cases where an `IteratorContext*` is not available
-  // (such as a destructor).
-  Status Instantiate(IteratorContext* ctx);
+                             std::vector<Tensor>* rets) const;
 
   // Synchronously runs the captured function on the given `args`, and stores
   // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
-  // possible.
-  //
-  // REQUIRES: `this->Instantiate()` must have been called before this method.
+  // possible. This can be useful for calling a captured
+  // function in cases where an `IteratorContext*` is not available
+  // (such as a destructor).
   Status RunInstantiated(const std::vector<Tensor>& args,
                          std::vector<Tensor>* rets);
 
@@ -97,16 +79,9 @@ class CapturedFunction {
   void RunAsync(IteratorContext* ctx, std::vector<Tensor>&& args,
                 std::vector<Tensor>* rets,
                 FunctionLibraryRuntime::DoneCallback done,
-                const string& prefix);
-
-  // Returns the named list of function arguments.
-  const NameAttrList& func() { return func_; }
+                const string& prefix) const;
 
-  // Returns that additional captured inputs that will be passed to the function
-  // when `Run*()` is called.
-  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
-
-  // Returns a step ID for use when running a `CapturedFunction`.
+  // Returns a step ID for use when running an `InstantiatedCapturedFunction`.
   static int64 generate_step_id() {
     // Choose a step ID that is guaranteed not to clash with any
     // Session-generated step ID. DirectSession only generates
@@ -116,21 +91,65 @@ class CapturedFunction {
     return -std::abs(static_cast<int64>(random::New64()));
   }
 
+ private:
+  InstantiatedCapturedFunction(
+      FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
+      DataTypeVector ret_types,
+      std::function<void(std::function<void()>)> runner,
+      CapturedFunction* captured_func);
+
+  friend class CapturedFunction;
+
+  FunctionLibraryRuntime* const lib_;
+  const FunctionLibraryRuntime::Handle f_handle_;
+  const DataTypeVector ret_types_;
+  std::function<void(std::function<void()>)> captured_runner_;
+  CapturedFunction* const captured_func_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(InstantiatedCapturedFunction);
+};
+
+// A `CapturedFunction` encapsulates a TensorFlow function.
+//
+// The `Dataset`-related classes use `CapturedFunction` to execute
+// TensorFlow functions outside a the normal `OpKernel::Compute()`
+// context.
+class CapturedFunction {
+ public:
+  // Creates a new instance using a list of named attributes, fetching captured
+  // inputs from a context argument.
+  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
+                       const string& argument,
+                       std::unique_ptr<CapturedFunction>* out_function);
+
+  // Creates a new instance using a list of named attributes, fetching captured
+  // inputs from a context argument.
+  //
+  // If `use_inter_op_parallelism` is false, the runtime may use an executor
+  // that is optimized for small functions.
+  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
+                       const string& argument, bool use_inter_op_parallelism,
+                       std::unique_ptr<CapturedFunction>* out_function);
+
+  // Instantiates this function for use in the given context, providing an
+  // InstantiatedCapturedFunction that can be used to execute functions.
+  Status Instantiate(IteratorContext* ctx,
+                     std::unique_ptr<InstantiatedCapturedFunction>*
+                         instantiated_captured_function);
+
+  // Returns the named list of function arguments.
+  const NameAttrList& func() { return func_; }
+
+  // Returns that additional captured inputs that will be passed to the function
+  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
+
  private:
   CapturedFunction(const NameAttrList& func,
                    std::vector<Tensor> captured_inputs,
                    bool use_inter_op_parallelism);
 
-  Status GetHandle(IteratorContext* ctx,
-                   FunctionLibraryRuntime::Handle* out_handle);
-
-  mutex mu_;
   const NameAttrList func_;
-  FunctionLibraryRuntime* lib_ GUARDED_BY(mu_);
-  FunctionLibraryRuntime::Handle f_handle_ GUARDED_BY(mu_);
   const std::vector<Tensor> captured_inputs_;
-  DataTypeSlice ret_types_;
-  std::function<void(std::function<void()>)> captured_runner_ = nullptr;
   const bool use_inter_op_parallelism_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction);
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index e7ac368ae3..36a1837295 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -21,12 +21,13 @@ namespace data {
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
-    std::unique_ptr<IteratorBase>* out_iterator) {
+    int64 thread_index,
+    const InstantiatedCapturedFunction& instantiated_captured_func,
+    StringPiece prefix, std::unique_ptr<IteratorBase>* out_iterator) {
   std::vector<Tensor> return_values;
 
-  TF_RETURN_IF_ERROR(
-      captured_func->RunWithBorrowedArgs(ctx, input_element, &return_values));
+  TF_RETURN_IF_ERROR(instantiated_captured_func.RunWithBorrowedArgs(
+      ctx, input_element, &return_values));
 
   if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT &&
         TensorShapeUtils::IsScalar(return_values[0].shape()))) {
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 234856ea39..3de157b4bc 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -24,8 +24,9 @@ namespace data {
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
-    std::unique_ptr<IteratorBase>* out_iterator);
+    int64 thread_index,
+    const InstantiatedCapturedFunction& instantiated_captured_func,
+    StringPiece prefix, std::unique_ptr<IteratorBase>* out_iterator);
 
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index 19c35f94a6..dfdc16f347 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -131,9 +131,10 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
-    virtual Status EvaluatePredicate(IteratorContext* ctx,
-                                     const std::vector<Tensor>& element,
-                                     bool* out_matched) const = 0;
+    virtual Status EvaluatePredicate(
+        IteratorContext* ctx,
+        InstantiatedCapturedFunction* instantiated_captured_function,
+        const std::vector<Tensor>& element, bool* out_matched) const = 0;
 
    private:
     class Iterator : public DatasetIterator<FilterDatasetBase> {
@@ -144,7 +145,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -171,8 +173,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(
-              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(dataset()->EvaluatePredicate(
+              ctx, instantiated_captured_func_.get(), *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -206,6 +208,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
      private:
       mutex mu_;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
@@ -220,14 +223,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
     using FilterDatasetBase::FilterDatasetBase;
 
    protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
+    Status EvaluatePredicate(
+        IteratorContext* ctx,
+        InstantiatedCapturedFunction* instantiated_captured_function,
+        const std::vector<Tensor>& element, bool* out_matched) const override {
       // TODO(mrry): Avoid blocking a threadpool thread. We will need to
       // stack-rip the iterators and use async kernels.
       std::vector<Tensor> result;
-      TF_RETURN_IF_ERROR(
-          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
+      TF_RETURN_IF_ERROR(instantiated_captured_function->RunWithBorrowedArgs(
+          ctx, element, &result));
 
       if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
           result[0].NumElements() != 1) {
@@ -249,9 +253,10 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
           index_(index) {}
 
    protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
+    Status EvaluatePredicate(
+        IteratorContext* ctx,
+        InstantiatedCapturedFunction* instantiated_captured_function,
+        const std::vector<Tensor>& element, bool* out_matched) const override {
       const Tensor& predicate = element[index_];
       if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
         return errors::InvalidArgument(
diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
index 2fada22a21..3af8162137 100644
--- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
@@ -122,7 +122,8 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -238,8 +239,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         return MakeIteratorFromInputElement(
             ctx, captured_func_inputs_, element_index_++,
-            dataset()->captured_func_.get(), prefix(),
-            &current_element_iterator_);
+            *instantiated_captured_func_, prefix(), &current_element_iterator_);
       }
 
       Status BuildCurrentElementIteratorLocked(OpKernelContext* ctx)
@@ -257,6 +257,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> current_element_iterator_ GUARDED_BY(mu_);
       std::vector<Tensor> captured_func_inputs_ GUARDED_BY(mu_);
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc
index 71a36314a0..c7d8cfce90 100644
--- a/tensorflow/core/kernels/data/generator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/generator_dataset_op.cc
@@ -73,7 +73,8 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
     ~Iterator() override {
       if (!finalized_) {
         std::vector<Tensor> ignored;
-        Status s = dataset()->finalize_func_->RunInstantiated(state_, &ignored);
+        Status s =
+            instantiated_finalize_func_->RunInstantiated(state_, &ignored);
         if (!s.ok()) {
           LOG(WARNING)
               << "Error occurred when finalizing GeneratorDataset iterator: "
@@ -83,11 +84,14 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
     }
 
     Status Initialize(IteratorContext* ctx) override {
-      TF_RETURN_IF_ERROR(dataset()->init_func_->Instantiate(ctx));
-      TF_RETURN_IF_ERROR(dataset()->next_func_->Instantiate(ctx));
-      TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate(ctx));
       TF_RETURN_IF_ERROR(
-          dataset()->init_func_->RunWithBorrowedArgs(ctx, {}, &state_));
+          dataset()->init_func_->Instantiate(ctx, &instantiated_init_func_));
+      TF_RETURN_IF_ERROR(
+          dataset()->next_func_->Instantiate(ctx, &instantiated_next_func_));
+      TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate(
+          ctx, &instantiated_finalize_func_));
+      TF_RETURN_IF_ERROR(
+          instantiated_init_func_->RunWithBorrowedArgs(ctx, {}, &state_));
       return Status::OK();
     }
 
@@ -101,8 +105,8 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
         return Status::OK();
       }
 
-      Status s =
-          dataset()->next_func_->RunWithBorrowedArgs(ctx, state_, out_tensors);
+      Status s = instantiated_next_func_->RunWithBorrowedArgs(ctx, state_,
+                                                              out_tensors);
       if (s.ok()) {
         *end_of_sequence = false;
       } else if (errors::IsOutOfRange(s)) {
@@ -115,7 +119,7 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
         // finalize function.
         std::vector<Tensor> ignored;
         TF_RETURN_IF_ERROR(
-            dataset()->finalize_func_->RunInstantiated(state_, &ignored));
+            instantiated_finalize_func_->RunInstantiated(state_, &ignored));
         finalized_ = true;
       }
       return s;
@@ -125,6 +129,9 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
     mutex mu_;
     bool finalized_ GUARDED_BY(mu_) = false;
     std::vector<Tensor> state_ GUARDED_BY(mu_);
+    std::unique_ptr<InstantiatedCapturedFunction> instantiated_init_func_;
+    std::unique_ptr<InstantiatedCapturedFunction> instantiated_next_func_;
+    std::unique_ptr<InstantiatedCapturedFunction> instantiated_finalize_func_;
   };
 
   const std::unique_ptr<CapturedFunction> init_func_;
diff --git a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
index d6ee42a7c6..9cfcbbf8f6 100644
--- a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
@@ -192,11 +192,14 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx));
-        TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate(ctx));
-        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx));
-        TF_RETURN_IF_ERROR(
-            dataset()->captured_finalize_func_->Instantiate(ctx));
+        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(
+            ctx, &instantiated_key_func_));
+        TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate(
+            ctx, &instantiated_init_func_));
+        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(
+            ctx, &instantiated_reduce_func_));
+        TF_RETURN_IF_ERROR(dataset()->captured_finalize_func_->Instantiate(
+            ctx, &instantiated_finalize_func_));
         return Status::OK();
       }
 
@@ -214,9 +217,8 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
           if (!end_of_input_) {
             // Run the key function on the input element.
             std::vector<Tensor> key_func_output;
-            TF_RETURN_IF_ERROR(
-                dataset()->captured_key_func_->RunWithBorrowedArgs(
-                    ctx, next_input_element, &key_func_output));
+            TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs(
+                ctx, next_input_element, &key_func_output));
 
             if (key_func_output.size() != 1 ||
                 key_func_output[0].dtype() != DT_INT64 ||
@@ -230,7 +232,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
             if (states_.find(key) == states_.end()) {
               // Run the init function to create the initial state.
               std::vector<Tensor> init_func_output;
-              TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Run(
+              TF_RETURN_IF_ERROR(instantiated_init_func_->Run(
                   ctx, std::move(key_func_output), &init_func_output));
               states_[key] = init_func_output;
             }
@@ -244,7 +246,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
                       std::back_inserter(args));
 
             std::vector<Tensor> reduce_func_output;
-            TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run(
+            TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run(
                 ctx, std::move(args), &reduce_func_output));
             states_[key] = reduce_func_output;
           } else {
@@ -260,9 +262,8 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
           *end_of_sequence = true;
           return Status::OK();
         }
-        TF_RETURN_IF_ERROR(
-            dataset()->captured_finalize_func_->RunWithBorrowedArgs(
-                ctx, states_[keys_[keys_index_++]], out_tensors));
+        TF_RETURN_IF_ERROR(instantiated_finalize_func_->RunWithBorrowedArgs(
+            ctx, states_[keys_[keys_index_++]], out_tensors));
         *end_of_sequence = false;
         return Status::OK();
       }
@@ -380,6 +381,10 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
       std::map<int64, std::vector<Tensor>> states_ GUARDED_BY(mu_);
       std::vector<int64> keys_ GUARDED_BY(mu_);
       int64 keys_index_ GUARDED_BY(mu_) = 0;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_key_func_;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_init_func_;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_reduce_func_;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_finalize_func_;
     };
 
     const NameAttrList& key_func() const { return captured_key_func_->func(); }
diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
index 8b417bb1c2..2ea59bee5c 100644
--- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
@@ -176,10 +176,12 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx));
-        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx));
-        TF_RETURN_IF_ERROR(
-            dataset()->captured_window_size_func_->Instantiate(ctx));
+        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(
+            ctx, &instantiated_key_func_));
+        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(
+            ctx, &instantiated_reduce_func_));
+        TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Instantiate(
+            ctx, &instantiated_window_size_func_));
         return Status::OK();
       }
 
@@ -216,9 +218,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
               // Run the key function on the input element to identify its
               // group.
               std::vector<Tensor> key_func_output;
-              TF_RETURN_IF_ERROR(
-                  dataset()->captured_key_func_->RunWithBorrowedArgs(
-                      ctx, next_input_element, &key_func_output));
+              TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs(
+                  ctx, next_input_element, &key_func_output));
 
               if (key_func_output.size() != 1 ||
                   key_func_output[0].dtype() != DT_INT64 ||
@@ -233,7 +234,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
                 // Run the window size function on the key to identify its
                 // window size.
                 std::vector<Tensor> window_size_func_output;
-                TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run(
+                TF_RETURN_IF_ERROR(instantiated_window_size_func_->Run(
                     ctx, std::move(key_func_output), &window_size_func_output));
 
                 if (window_size_func_output.size() != 1 ||
@@ -448,8 +449,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         std::vector<Tensor> args(
             {std::move(key_arg), std::move(group_dataset_arg)});
         std::vector<Tensor> return_values;
-        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run(
-            ctx, std::move(args), &return_values));
+        TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run(ctx, std::move(args),
+                                                          &return_values));
 
         if (!(return_values.size() == 1 &&
               return_values[0].dtype() == DT_VARIANT &&
@@ -478,6 +479,10 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
       std::map<int64, std::vector<std::vector<Tensor>>> groups_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> current_group_iterator_ GUARDED_BY(mu_);
       std::map<int64, int64> window_sizes_ GUARDED_BY(mu_);
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_key_func_;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_reduce_func_;
+      std::unique_ptr<InstantiatedCapturedFunction>
+          instantiated_window_size_func_;
     };
 
     Status OtherArgumentsNodeAndType(
diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc
index 0aa802b874..91c298ce9a 100644
--- a/tensorflow/core/kernels/data/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc
@@ -149,7 +149,8 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       void AdvanceToNextInCycle() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
@@ -195,7 +196,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
             if (!end_of_input_) {
               TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
                   ctx, args_list_[cycle_index_], cycle_index_,
-                  dataset()->captured_func_.get(), prefix(),
+                  *instantiated_captured_func_, prefix(),
                   &current_elements_[cycle_index_]));
               ++num_open_;
             }
@@ -281,7 +282,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
                   &args_list_[idx][i]));
             }
             TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
-                ctx, args_list_[idx], idx, dataset()->captured_func_.get(),
+                ctx, args_list_[idx], idx, *instantiated_captured_func_,
                 prefix(), &current_elements_[idx]));
             TF_RETURN_IF_ERROR(
                 RestoreInput(ctx, reader, current_elements_[idx]));
@@ -301,6 +302,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
       int64 block_index_ GUARDED_BY(mu_) = 0;
       bool end_of_input_ GUARDED_BY(mu_) = false;
       size_t num_open_ GUARDED_BY(mu_) = 0;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 83896219a3..71d3335452 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -218,7 +218,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -375,7 +376,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                                    std::vector<Tensor> input_element) {
               std::shared_ptr<std::vector<Tensor>> return_values(
                   new std::vector<Tensor>());
-              dataset()->captured_func_->RunAsync(
+              instantiated_captured_func_->RunAsync(
                   ctx.get(), std::move(input_element), return_values.get(),
                   [this, ctx, result, return_values, offset](Status status) {
                     Callback(ctx, result, return_values, offset, status);
@@ -672,6 +673,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(mu_);
       std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index f112e1dc43..5b891b4fd5 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -122,7 +122,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -142,7 +143,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
         // TODO(mrry): Avoid blocking a threadpool thread. We will need to
         // stack-rip the iterators and use async kernels.
         Status s =
-            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
+            instantiated_captured_func_->Run(ctx, std::move(args), out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -167,6 +168,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 9cd46bf5dd..448cc93a8c 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -247,7 +247,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
         AddConstantParameter(ctx, "parallelism", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       // It is implemented so that it matches the deterministic interleave
@@ -685,7 +686,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
               worker_thread_states_[thread_index].iterator_creation_status =
                   MakeIteratorFromInputElement(
                       ctx.get(), worker_thread_states_[thread_index].input,
-                      thread_index, dataset()->captured_func_.get(), prefix(),
+                      thread_index, *instantiated_captured_func_, prefix(),
                       &worker_thread_states_[thread_index].iterator);
               iterator_creation_status =
                   worker_thread_states_[thread_index].iterator_creation_status;
@@ -919,7 +920,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           std::unique_ptr<IteratorBase> iterator;
           Status s = MakeIteratorFromInputElement(
               ctx, worker_thread_states_[index].input, index,
-              dataset()->captured_func_.get(), prefix(), &iterator);
+              *instantiated_captured_func_, prefix(), &iterator);
           TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, iterator));
           worker_thread_states_[index].iterator.swap(iterator);
         }
@@ -1047,6 +1048,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
       std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
@@ -1254,7 +1256,8 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -1490,7 +1493,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                 if (!end_of_input_) {
                   Status status = MakeIteratorFromInputElement(
                       ctx.get(), args_list_[cycle_index_], cycle_index_,
-                      dataset()->captured_func_.get(), prefix(),
+                      *instantiated_captured_func_, prefix(),
                       &current_elements_[cycle_index_]);
                   if (!status.ok()) {
                     invocation_results_.emplace_back(new InvocationResult());
@@ -1599,7 +1602,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                   &args_list_[idx][i]));
             }
             TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
-                ctx, args_list_[idx], idx, dataset()->captured_func_.get(),
+                ctx, args_list_[idx], idx, *instantiated_captured_func_,
                 prefix(), &current_elements_[idx]));
             TF_RETURN_IF_ERROR(
                 RestoreInput(ctx, reader, current_elements_[idx]));
@@ -1659,6 +1662,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(mu_) = false;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 6abe6c8338..822f06be9e 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -85,29 +85,11 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto init_func = [this](IteratorContext* ctx) {
-        return captured_func_->Instantiate(ctx);
-      };
-
       const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
-      ParallelMapIteratorFunction map_func =
-          [this, new_prefix](IteratorContext* ctx,
-                             std::vector<Tensor> input_element,
-                             std::vector<Tensor>* result, StatusCallback done) {
-            captured_func_->RunAsync(ctx, std::move(input_element), result,
-                                     std::move(done), new_prefix);
-          };
-      if (!use_inter_op_parallelism_) {
-        map_func = [map_func](
-                       IteratorContext* ctx, std::vector<Tensor> input_element,
-                       std::vector<Tensor>* result, StatusCallback done) {
-          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
-                                     result, std::move(done)));
-        };
-      }
-
+      std::unique_ptr<ParallelMapDatasetFunctor> parallel_map_dataset_functor(
+          new ParallelMapDatasetFunctor(this, new_prefix));
       return NewParallelMapIterator({this, new_prefix}, input_,
-                                    std::move(init_func), std::move(map_func),
+                                    std::move(parallel_map_dataset_functor),
                                     num_parallel_calls_);
     }
 
@@ -169,6 +151,39 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     }
 
    private:
+    class ParallelMapDatasetFunctor : public ParallelMapFunctor {
+     public:
+      ParallelMapDatasetFunctor(const Dataset* dataset, const string& prefix)
+          : dataset_(dataset), prefix_(prefix) {}
+
+      Status InitFunc(IteratorContext* ctx) override {
+        return dataset_->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
+      }
+
+      void MapFunc(IteratorContext* ctx, std::vector<Tensor> input_element,
+                   std::vector<Tensor>* result, StatusCallback done) override {
+        auto map_func = [this](IteratorContext* ctx,
+                               std::vector<Tensor> input_element,
+                               std::vector<Tensor>* result,
+                               StatusCallback done) {
+          instantiated_captured_func_->RunAsync(
+              ctx, std::move(input_element), result, std::move(done), prefix_);
+        };
+        if (!dataset_->use_inter_op_parallelism_) {
+          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
+                                     result, std::move(done)));
+        } else {
+          map_func(ctx, std::move(input_element), result, std::move(done));
+        }
+      }
+
+     private:
+      const Dataset* dataset_;
+      const string prefix_;
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
+    };
+
     const DatasetBase* const input_;
     const NameAttrList func_;
     const int32 num_parallel_calls_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 5f6052ce83..4f8e0489de 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -31,12 +31,11 @@ class ParallelMapIterator : public DatasetBaseIterator {
   explicit ParallelMapIterator(
       const typename DatasetBaseIterator::BaseParams& params,
       const DatasetBase* input_dataset,
-      std::function<Status(IteratorContext*)> init_func,
-      ParallelMapIteratorFunction map_func, int32 num_parallel_calls)
+      std::unique_ptr<ParallelMapFunctor> parallel_map_functor,
+      int32 num_parallel_calls)
       : DatasetBaseIterator(params),
         input_dataset_(input_dataset),
-        init_func_(std::move(init_func)),
-        map_func_(std::move(map_func)),
+        parallel_map_functor_(std::move(parallel_map_functor)),
         num_parallel_calls_(num_parallel_calls) {}
 
   ~ParallelMapIterator() override {
@@ -77,10 +76,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
     }
     TF_RETURN_IF_ERROR(
         input_dataset_->MakeIterator(ctx, prefix(), &input_impl_));
-    if (init_func_) {
-      TF_RETURN_IF_ERROR(init_func_(ctx));
-    }
-    return Status::OK();
+    return parallel_map_functor_->InitFunc(ctx);
   }
 
   Status GetNextInternal(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
@@ -226,8 +222,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
       CallCompleted(result);
     };
 
-    map_func_(ctx.get(), std::move(input_element), &result->return_values,
-              std::move(done));
+    parallel_map_functor_->MapFunc(ctx.get(), std::move(input_element),
+                                   &result->return_values, std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -323,8 +319,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   const DatasetBase* const input_dataset_;  // Not owned.
-  const std::function<Status(IteratorContext*)> init_func_;
-  const ParallelMapIteratorFunction map_func_;
+  std::unique_ptr<ParallelMapFunctor> parallel_map_functor_;
   // Used for coordination between the main thread and the runner thread.
   mutex mu_;
   // Used for coordination between the main thread and the runner thread. In
@@ -347,22 +342,14 @@ class ParallelMapIterator : public DatasetBaseIterator {
 
 }  // namespace
 
-std::unique_ptr<IteratorBase> NewParallelMapIterator(
-    const DatasetBaseIterator::BaseParams& params,
-    const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func,
-    int32 num_parallel_calls) {
-  return NewParallelMapIterator(params, input_dataset, nullptr,
-                                std::move(map_func), num_parallel_calls);
-}
-
 std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBaseIterator::BaseParams& params,
     const DatasetBase* input_dataset,
-    std::function<Status(IteratorContext*)> init_func,
-    ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return std::unique_ptr<IteratorBase>(
-      new ParallelMapIterator(params, input_dataset, std::move(init_func),
-                              std::move(map_func), num_parallel_calls));
+    std::unique_ptr<ParallelMapFunctor> parallel_map_functor,
+    int32 num_parallel_calls) {
+  return std::unique_ptr<IteratorBase>(new ParallelMapIterator(
+      params, input_dataset, std::move(parallel_map_functor),
+      num_parallel_calls));
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index dc26c5cf25..62e57e5335 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -22,30 +22,32 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
-// A function that transforms elements of one dataset into another
-// asynchronously. The arguments are:
-// 1. An `IteratorContext*` for the context in which the function should
-// execute.
-// 2. A `std::vector<Tensor>` containing the input element.
-// 3. A `std::vector<Tensor>*` to which the function will write the result.
-// 4. A `StatusCallback` that should be invoked when the function is complete.
-using ParallelMapIteratorFunction =
-    std::function<void(IteratorContext*, std::vector<Tensor>,
-                       std::vector<Tensor>*, StatusCallback)>;
-
-// Returns a new iterator that applies `map_func` to the elements of
-// `input_dataset` using the given degree of parallelism. `init_func` (if
-// specified) will be executed when the iterator is initialized (see
-// `IteratorBase::Initialize()`) and enables the user to specify error checking
-// logic that can fail early.
+class ParallelMapFunctor {
+ public:
+  virtual ~ParallelMapFunctor() {}
+
+  // A function that runs when the Iterator is initialized. It enables the user
+  // to specify error checking logic that can fail early.
+  virtual Status InitFunc(IteratorContext* ctx) { return Status::OK(); }
+
+  // A function that transforms elements of one dataset into another
+  // asynchronously. The arguments are:
+  // 1. An `IteratorContext*` for the context in which the function should
+  // execute.
+  // 2. A `std::vector<Tensor>` containing the input element.
+  // 3. A `std::vector<Tensor>*` to which the function will write the result.
+  // 4. A `StatusCallback` that should be invoked when the function is complete.
+  virtual void MapFunc(IteratorContext* ctx, std::vector<Tensor> input,
+                       std::vector<Tensor>* output,
+                       StatusCallback callback) = 0;
+};
+
+// Returns a new iterator that uses `parallel_map_functor` to apply `MapFunc`
+// to the elements of `input_dataset` using the given degree of parallelism.
 std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBaseIterator::BaseParams& params,
     const DatasetBase* input_dataset,
-    std::function<Status(IteratorContext*)> init_func,
-    ParallelMapIteratorFunction map_func, int32 num_parallel_calls);
-std::unique_ptr<IteratorBase> NewParallelMapIterator(
-    const DatasetBaseIterator::BaseParams& params,
-    const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func,
+    std::unique_ptr<ParallelMapFunctor> parallel_map_functor,
     int32 num_parallel_calls);
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index c28c06da62..32210ef677 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,9 +182,80 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto map_fn = [this](IteratorContext* ctx,
-                           std::vector<Tensor> input_element,
-                           std::vector<Tensor>* result, StatusCallback done) {
+      std::unique_ptr<ParallelMapFunctor> parse_example_functor(
+          new ParseExampleFunctor(this));
+      return NewParallelMapIterator(
+          {this, strings::StrCat(prefix, "::ParseExample")}, input_,
+          std::move(parse_example_functor), num_parallel_calls_);
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return output_types_;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return output_shapes_;
+    }
+
+    string DebugString() const override {
+      return "ParseExampleDatasetOp::Dataset";
+    }
+
+   protected:
+    Status AsGraphDefInternal(SerializationContext* ctx,
+                              DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
+
+      Node* num_parallle_calls_node;
+      std::vector<Node*> dense_defaults_nodes;
+      dense_defaults_nodes.reserve(dense_defaults_.size());
+
+      TF_RETURN_IF_ERROR(
+          b->AddScalar(num_parallel_calls_, &num_parallle_calls_node));
+
+      for (const Tensor& dense_default : dense_defaults_) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node));
+        dense_defaults_nodes.emplace_back(node);
+      }
+
+      AttrValue sparse_keys_attr;
+      AttrValue dense_keys_attr;
+      AttrValue sparse_types_attr;
+      AttrValue dense_attr;
+      AttrValue dense_shapes_attr;
+
+      b->BuildAttrValue(sparse_keys_, &sparse_keys_attr);
+      b->BuildAttrValue(dense_keys_, &dense_keys_attr);
+      b->BuildAttrValue(sparse_types_, &sparse_types_attr);
+      b->BuildAttrValue(dense_types_, &dense_attr);
+      b->BuildAttrValue(dense_shapes_, &dense_shapes_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(this,
+                                       {
+                                           {0, input_graph_node},
+                                           {1, num_parallle_calls_node},
+                                       },
+                                       {{2, dense_defaults_nodes}},
+                                       {{"sparse_keys", sparse_keys_attr},
+                                        {"dense_keys", dense_keys_attr},
+                                        {"sparse_types", sparse_types_attr},
+                                        {"Tdense", dense_attr},
+                                        {"dense_shapes", dense_shapes_attr}},
+                                       output));
+      return Status::OK();
+    }
+
+   private:
+    class ParseExampleFunctor : public ParallelMapFunctor {
+     public:
+      explicit ParseExampleFunctor(const Dataset* dataset)
+          : dataset_(dataset) {}
+
+      void MapFunc(IteratorContext* ctx, std::vector<Tensor> input_element,
+                   std::vector<Tensor>* result, StatusCallback done) override {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
           thread::ThreadPool* device_threadpool =
               ctx->lib()->device()->tensorflow_cpu_worker_threads()->workers;
@@ -196,7 +267,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
             for (auto it = slice.begin(); it != slice.end(); it++)
               slice_vec.push_back(*it);
           }
-          example::FastParseExampleConfig config = config_;
+          example::FastParseExampleConfig config = dataset_->config_;
           // local copy of config_ for modification.
           auto stats_aggregator = ctx->stats_aggregator();
           if (stats_aggregator) {
@@ -206,43 +277,50 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
           Status s = FastParseExample(config, slice_vec, {}, device_threadpool,
                                       &example_result);
           if (s.ok()) {
-            (*result).resize(key_to_output_index_.size());
-            for (int d = 0; d < dense_keys_.size(); ++d) {
-              int output_index = key_to_output_index_.at(dense_keys_[d]);
-              CHECK(example_result.dense_values[d].dtype() ==
-                    output_dtypes()[output_index])
+            (*result).resize(dataset_->key_to_output_index_.size());
+            for (int d = 0; d < dataset_->dense_keys_.size(); ++d) {
+              int output_index =
+                  dataset_->key_to_output_index_.at(dataset_->dense_keys_[d]);
+              DCHECK(example_result.dense_values[d].dtype() ==
+                     dataset_->output_dtypes()[output_index])
                   << "Got wrong type for FastParseExample return value " << d
                   << " (expected "
-                  << DataTypeString(output_dtypes()[output_index]) << ", got "
+                  << DataTypeString(dataset_->output_dtypes()[output_index])
+                  << ", got "
                   << DataTypeString(example_result.dense_values[d].dtype())
                   << ").";
-              CHECK(output_shapes()[output_index].IsCompatibleWith(
+              DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith(
                   example_result.dense_values[d].shape()))
                   << "Got wrong shape for FastParseExample return value " << d
                   << " (expected "
-                  << output_shapes()[output_index].DebugString() << ", got "
+                  << dataset_->output_shapes()[output_index].DebugString()
+                  << ", got "
                   << example_result.dense_values[d].shape().DebugString()
                   << ").";
               (*result)[output_index] = example_result.dense_values[d];
             }
-            for (int d = 0; d < sparse_keys_.size(); ++d) {
+            for (int d = 0; d < dataset_->sparse_keys_.size(); ++d) {
               Tensor serialized_sparse = Tensor(DT_VARIANT, TensorShape({3}));
               auto serialized_sparse_t = serialized_sparse.vec<Variant>();
               serialized_sparse_t(0) = example_result.sparse_indices[d];
               serialized_sparse_t(1) = example_result.sparse_values[d];
               serialized_sparse_t(2) = example_result.sparse_shapes[d];
-              int output_index = key_to_output_index_.at(sparse_keys_[d]);
-              CHECK(serialized_sparse.dtype() == output_dtypes()[output_index])
+              int output_index =
+                  dataset_->key_to_output_index_.at(dataset_->sparse_keys_[d]);
+              DCHECK(serialized_sparse.dtype() ==
+                     dataset_->output_dtypes()[output_index])
                   << "Got wrong type for FastParseExample return value " << d
                   << " (expected "
-                  << DataTypeString(output_dtypes()[output_index]) << ", got "
-                  << DataTypeString(serialized_sparse.dtype()) << ").";
-              CHECK(output_shapes()[output_index].IsCompatibleWith(
+                  << DataTypeString(dataset_->output_dtypes()[output_index])
+                  << ", got " << DataTypeString(serialized_sparse.dtype())
+                  << ").";
+              DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith(
                   serialized_sparse.shape()))
                   << "Got wrong shape for FastParseExample return value " << d
                   << " (expected "
-                  << output_shapes()[output_index].DebugString() << ", got "
-                  << serialized_sparse.shape().DebugString() << ").";
+                  << dataset_->output_shapes()[output_index].DebugString()
+                  << ", got " << serialized_sparse.shape().DebugString()
+                  << ").";
               (*result)[output_index] = serialized_sparse;
             }
             // TODO(b/111553342): User provided tags instead of fixed tag.
@@ -268,73 +346,12 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
           }
           done(s);
         });
-      };
-
-      return NewParallelMapIterator(
-          {this, strings::StrCat(prefix, "::ParseExample")}, input_,
-          std::move(map_fn), num_parallel_calls_);
-    }
-
-    const DataTypeVector& output_dtypes() const override {
-      return output_types_;
-    }
-
-    const std::vector<PartialTensorShape>& output_shapes() const override {
-      return output_shapes_;
-    }
-
-    string DebugString() const override {
-      return "ParseExampleDatasetOp::Dataset";
-    }
-
-   protected:
-    Status AsGraphDefInternal(SerializationContext* ctx,
-                              DatasetGraphDefBuilder* b,
-                              Node** output) const override {
-      Node* input_graph_node = nullptr;
-      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
-
-      Node* num_parallle_calls_node;
-      std::vector<Node*> dense_defaults_nodes;
-      dense_defaults_nodes.reserve(dense_defaults_.size());
-
-      TF_RETURN_IF_ERROR(
-          b->AddScalar(num_parallel_calls_, &num_parallle_calls_node));
-
-      for (const Tensor& dense_default : dense_defaults_) {
-        Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node));
-        dense_defaults_nodes.emplace_back(node);
       }
 
-      AttrValue sparse_keys_attr;
-      AttrValue dense_keys_attr;
-      AttrValue sparse_types_attr;
-      AttrValue dense_attr;
-      AttrValue dense_shapes_attr;
-
-      b->BuildAttrValue(sparse_keys_, &sparse_keys_attr);
-      b->BuildAttrValue(dense_keys_, &dense_keys_attr);
-      b->BuildAttrValue(sparse_types_, &sparse_types_attr);
-      b->BuildAttrValue(dense_types_, &dense_attr);
-      b->BuildAttrValue(dense_shapes_, &dense_shapes_attr);
-
-      TF_RETURN_IF_ERROR(b->AddDataset(this,
-                                       {
-                                           {0, input_graph_node},
-                                           {1, num_parallle_calls_node},
-                                       },
-                                       {{2, dense_defaults_nodes}},
-                                       {{"sparse_keys", sparse_keys_attr},
-                                        {"dense_keys", dense_keys_attr},
-                                        {"sparse_types", sparse_types_attr},
-                                        {"Tdense", dense_attr},
-                                        {"dense_shapes", dense_shapes_attr}},
-                                       output));
-      return Status::OK();
-    }
+     private:
+      const Dataset* dataset_;
+    };
 
-   private:
     const DatasetBase* const input_;
     const std::vector<Tensor> dense_defaults_;
     const std::vector<string> sparse_keys_;
diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc
index dbe31f37b8..d9fdd59bf0 100644
--- a/tensorflow/core/kernels/data/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/scan_dataset_op.cc
@@ -144,7 +144,8 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(ctx);
+        return dataset()->captured_func_->Instantiate(
+            ctx, &instantiated_captured_func_);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -169,8 +170,8 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
         state_and_output.reserve(dataset()->state_types_.size() +
                                  output_dtypes().size());
 
-        Status s = dataset()->captured_func_->Run(ctx, std::move(args),
-                                                  &state_and_output);
+        Status s = instantiated_captured_func_->Run(ctx, std::move(args),
+                                                    &state_and_output);
         if (s.ok()) {
           state_.clear();
           size_t i = 0;
@@ -247,6 +248,7 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::vector<Tensor> state_ GUARDED_BY(mu_);
+      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 6d67ba41f566e963e2c061ca7df63edad89e1fca Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 18 Sep 2018 18:56:55 +0300
Subject: [PATCH 0327/1357] Work out the endianness statically.

---
 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 5b42de4c5a..484cc4d6f5 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -20,12 +20,12 @@ limitations under the License.
 
 namespace tensorflow {
 
+constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
+
 class ByteSwapper {
  public:
   ByteSwapper(bool big_endian) {
-    int x = 1;
-    bool is_little_endian = (*(char *)&x == 1);
-    swap_ = big_endian == is_little_endian;
+    swap_ = big_endian == kLittleEndian;
   }
 
   inline void SwapIfRequiredInt16(int16_t *x) const {
-- 
GitLab


From 0c8a8289da120ee353c4fba5decb0bea9014e0a7 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 18 Sep 2018 09:28:42 -0700
Subject: [PATCH 0328/1357] Extend template expansion support for arithmetic
 expressions.

PiperOrigin-RevId: 213462334
---
 tensorflow/python/autograph/pyct/templates.py      | 11 +++++++++++
 tensorflow/python/autograph/pyct/templates_test.py | 12 ++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/tensorflow/python/autograph/pyct/templates.py b/tensorflow/python/autograph/pyct/templates.py
index 68c2a35fac..1bf0515745 100644
--- a/tensorflow/python/autograph/pyct/templates.py
+++ b/tensorflow/python/autograph/pyct/templates.py
@@ -109,6 +109,7 @@ class ReplaceTransformer(gast.NodeTransformer):
     if not node.ctx:
       raise ValueError('node %s is missing ctx value' % node)
 
+  # TODO(mdan): Rewrite _check and _set using a separate transformer.
   def _check_inner_children_have_context(self, node):
     if isinstance(node, gast.Attribute):
       self._check_inner_children_have_context(node.value)
@@ -131,6 +132,11 @@ class ReplaceTransformer(gast.NodeTransformer):
         self._check_inner_children_have_context(node.upper)
       if node.step:
         self._check_inner_children_have_context(node.step)
+    elif isinstance(node, gast.BinOp):
+      self._check_inner_children_have_context(node.left)
+      self._check_inner_children_have_context(node.right)
+    elif isinstance(node, gast.UnaryOp):
+      self._check_inner_children_have_context(node.operand)
     elif isinstance(node, gast.Name):
       self._check_has_context(node)
     elif isinstance(node, (gast.Str, gast.Num)):
@@ -166,6 +172,11 @@ class ReplaceTransformer(gast.NodeTransformer):
     elif isinstance(node, gast.Subscript):
       self._set_inner_child_context(node.value, ctx)
       self._check_inner_children_have_context(node.slice)
+    elif isinstance(node, gast.BinOp):
+      self._check_inner_children_have_context(node.left)
+      self._check_inner_children_have_context(node.right)
+    elif isinstance(node, gast.UnaryOp):
+      self._check_inner_children_have_context(node.operand)
     elif isinstance(node, (gast.Str, gast.Num)):
       pass
     else:
diff --git a/tensorflow/python/autograph/pyct/templates_test.py b/tensorflow/python/autograph/pyct/templates_test.py
index 66268cfaad..078d9a149b 100644
--- a/tensorflow/python/autograph/pyct/templates_test.py
+++ b/tensorflow/python/autograph/pyct/templates_test.py
@@ -132,6 +132,18 @@ class TemplatesTest(test.TestCase):
     self.assertIsInstance(node.body[0].targets[0].elts[0].ctx, gast.Store)
     self.assertIsInstance(node.body[0].targets[0].elts[1].ctx, gast.Store)
 
+  def test_replace_expression_context(self):
+    template = """
+      def test_fn(foo):
+        foo
+    """
+
+    node = templates.replace(
+        template, foo=parser.parse_expression('a + 2 * b / -c'))[0]
+    self.assertIsInstance(node.body[0].ctx, gast.Load)
+    self.assertIsInstance(node.body[0].left.ctx, gast.Load)
+    self.assertIsInstance(node.body[0].right.left.right.ctx, gast.Load)
+
   def test_replace_complex_context(self):
     template = """
       def test_fn(foo):
-- 
GitLab


From 30f28a7f44f39cb8f24fde17252c3e2539c22bb0 Mon Sep 17 00:00:00 2001
From: Guozhong Zhuang <guozhong.zhuang@intel.com>
Date: Tue, 18 Sep 2018 09:52:03 -0700
Subject: [PATCH 0329/1357] change per code style check

---
 tensorflow/core/util/mkl_util.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 5ea8f2ee47..387e5ee5a6 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
-#define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#ifndef TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#define TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
 #ifdef INTEL_MKL
 
 #include <string>
@@ -2040,8 +2040,8 @@ class MklPrimitiveFactory {
   /// Fuction to check whether primitive memory optimization is enabled
   static inline bool IsPrimitiveMemOptEnabled() {
     bool is_primitive_mem_opt_enabled = true;
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", true,
-          &is_primitive_mem_opt_enabled));
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE",
+        true, &is_primitive_mem_opt_enabled));
     return is_primitive_mem_opt_enabled;
   }
 
@@ -2098,7 +2098,7 @@ static inline memory::format get_desired_format(int channel,
              (channel % 8) == 0) {
     fmt_desired = is_2d
                       ? memory::format::nChw8c
-                      : memory::format::ncdhw;  //not support avx2 for 3d yet.
+                      : memory::format::ncdhw;  // not support avx2 for 3d yet.
   } else {
     fmt_desired = is_2d ? memory::format::nchw : memory::format::ncdhw;
   }
@@ -2210,7 +2210,8 @@ inline primitive FindOrCreateReorder(const memory* from, const memory* to) {
 
 // utility function to determine if it is conv 1x1 and stride != 1
 // for purpose of temporarily disabling primitive reuse
-inline bool IsConv1x1StrideNot1(memory::dims filter_dims, memory::dims strides) {
+inline bool IsConv1x1StrideNot1(memory::dims filter_dims,
+    memory::dims strides) {
   if (filter_dims.size() != 4 || strides.size() != 2) return false;
 
   return ((filter_dims[2] == 1) && (filter_dims[3] == 1) &&
@@ -2221,4 +2222,4 @@ inline bool IsConv1x1StrideNot1(memory::dims filter_dims, memory::dims strides)
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
-#endif  // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#endif  // TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
-- 
GitLab


From d0f6f7733f46d973326187ee4eafb6b9e94b25a1 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Tue, 18 Sep 2018 10:51:19 -0700
Subject: [PATCH 0330/1357] Adding #error when compiling for MKL ML Only, this
 has been defeatured

---
 tensorflow/core/util/mkl_util.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 680211edff..cf7ffd8149 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -34,9 +34,8 @@ limitations under the License.
 #endif
 
 #ifdef INTEL_MKL_ML_ONLY
-// Using pragma message since #warning doesn't work with all compilers
-#pragma message("Compiling for INTEL MKL ML only will be deprecated soon.")
-#pragma message("Please use MKL DNN (the default option for --config=mkl)")
+#error \
+    "Compiling for INTEL MKL ML only is no longer supported.Please use MKL DNN (the default option for --config=mkl)"
 #endif
 
 #ifdef INTEL_MKL_ML_ONLY
-- 
GitLab


From 14e9345a88b08f5d2a12f3f441b1d82c041d7ea3 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 18 Sep 2018 18:23:52 +0000
Subject: [PATCH 0331/1357] Avoid saving sensitive information in graph.

---
 .../ignite/kernels/ignite_dataset_ops.cc      | 30 ++-------
 tensorflow/contrib/ignite/ops/dataset_ops.cc  | 10 ---
 .../ignite/python/ops/ignite_dataset_ops.py   | 18 +----
 .../python/tests/ignite_dataset_test.py       | 66 ++++++++++++++-----
 4 files changed, 56 insertions(+), 68 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index e48fce4ed2..bdaed72387 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -125,35 +125,15 @@ class IgniteDatasetOp : public DatasetOpKernel {
       OP_REQUIRES_OK(ctx,
                      ParseScalarArgument<int32>(ctx, "page_size", &page_size));
 
-    if (env_username)
-      username = string(env_username);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "username", &username));
+    if (env_username) username = string(env_username);
 
-    if (env_password)
-      password = string(env_password);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "password", &password));
+    if (env_password) password = string(env_password);
 
-    if (env_certfile)
-      certfile = string(env_certfile);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "certfile", &certfile));
+    if (env_certfile) certfile = string(env_certfile);
 
-    if (env_keyfile)
-      keyfile = string(env_keyfile);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "keyfile", &keyfile));
+    if (env_keyfile) keyfile = string(env_keyfile);
 
-    if (env_cert_password)
-      cert_password = string(env_cert_password);
-    else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<string>(ctx, "cert_password",
-                                                      &cert_password));
+    if (env_cert_password) cert_password = string(env_cert_password);
 
     const Tensor* schema_tensor;
     OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor));
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
index 7d18df11aa..3d6fbe00e6 100644
--- a/tensorflow/contrib/ignite/ops/dataset_ops.cc
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -26,11 +26,6 @@ REGISTER_OP("IgniteDataset")
     .Input("local: bool")
     .Input("part: int32")
     .Input("page_size: int32")
-    .Input("username: string")
-    .Input("password: string")
-    .Input("certfile: string")
-    .Input("keyfile: string")
-    .Input("cert_password: string")
     .Input("schema: int32")
     .Input("permutation: int32")
     .Output("handle: variant")
@@ -54,11 +49,6 @@ port: Ignite Thin Client Port.
 local: Local flag that defines that data should be fetched from local host only.
 part: Partition data should be fetched from.
 page_size: Page size for Ignite Thin Client.
-username: Username to authenticate via Ignite Thin Client.
-password: Password to authenticate via Ignite Thin Client.
-certfile: SSL certificate to establish SSL connection.
-keyfile: Private key file to establish SSL connection.
-cert_password: SSL certificate password to establish SSL connection.
 schema: Internal structure that defines schema of cache objects.
 permutation: Internal structure that defines permutation of cache objects.
 )doc");
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index c0e24b1c69..7fc9e1fdd1 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -732,18 +732,6 @@ class IgniteDataset(Dataset):
     self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part")
     self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,
                                            name="page_size")
-    self.username = ops.convert_to_tensor("" if username is None else username,
-                                          dtype=dtypes.string, name="username")
-    self.password = ops.convert_to_tensor("" if password is None else password,
-                                          dtype=dtypes.string, name="password")
-    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,
-                                          dtype=dtypes.string, name="certfile")
-    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,
-                                         dtype=dtypes.string, name="keyfile")
-    self.cert_password = ops.convert_to_tensor("" if cert_password is None
-                                               else cert_password,
-                                               dtype=dtypes.string,
-                                               name="cert_password")
     self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),
                                         dtype=dtypes.int32, name="schema")
     self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),
@@ -753,10 +741,8 @@ class IgniteDataset(Dataset):
   def _as_variant_tensor(self):
     return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,
                                           self.port, self.local, self.part,
-                                          self.page_size, self.username,
-                                          self.password, self.certfile,
-                                          self.keyfile, self.cert_password,
-                                          self.schema, self.permutation)
+                                          self.page_size, self.schema,
+                                          self.permutation)
 
   @property
   def output_classes(self):
diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
index 933e62b804..5d74617690 100644
--- a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
+++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
@@ -35,28 +35,60 @@ class IgniteDatasetTest(test.TestCase):
   """
 
   def test_ignite_dataset_with_plain_client(self):
+    """Test Ignite Dataset with plain client.
+    """
+    self._clear_env()
     ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300)
-    self.__check_dataset(ds)
+    self._check_dataset(ds)
 
   def test_ignite_dataset_with_ssl_client(self):
-    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\
-      certfile=os.path.dirname(os.path.realpath(__file__)) +\
-      "/keystore/client.pem", cert_password="123456")
-    self.__check_dataset(ds)
+    """Test Ignite Dataset with ssl client.
+    """
+    self._clear_env()
+    os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname(
+        os.path.realpath(__file__)) + "/keystore/client.pem"
+    os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456"
+
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,
+                       certfile=os.environ["IGNITE_DATASET_CERTFILE"],
+                       cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"])
+    self._check_dataset(ds)
 
   def test_ignite_dataset_with_ssl_client_and_auth(self):
-    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\
-      certfile=os.path.dirname(os.path.realpath(__file__)) +\
-      "/keystore/client.pem", cert_password="123456",\
-      username="ignite", password="ignite")
-    self.__check_dataset(ds)
+    """Test Ignite Dataset with ssl client and authentication.
+    """
+    self._clear_env()
+    os.environ['IGNITE_DATASET_USERNAME'] = "ignite"
+    os.environ['IGNITE_DATASET_PASSWORD'] = "ignite"
+    os.environ['IGNITE_DATASET_CERTFILE'] = os.path.dirname(
+        os.path.realpath(__file__)) + "/keystore/client.pem"
+    os.environ['IGNITE_DATASET_CERT_PASSWORD'] = "123456"
+
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,
+                       certfile=os.environ['IGNITE_DATASET_CERTFILE'],
+                       cert_password=os.environ['IGNITE_DATASET_CERT_PASSWORD'],
+                       username=os.environ['IGNITE_DATASET_USERNAME'],
+                       password=os.environ['IGNITE_DATASET_PASSWORD'])
+    self._check_dataset(ds)
+
+  def _clear_env(self):
+    """Clears environment variables used by Ignite Dataset.
+    """
+    if 'IGNITE_DATASET_USERNAME' in os.environ:
+      del os.environ['IGNITE_DATASET_USERNAME']
+    if 'IGNITE_DATASET_PASSWORD' in os.environ:
+      del os.environ['IGNITE_DATASET_PASSWORD']
+    if 'IGNITE_DATASET_CERTFILE' in os.environ:
+      del os.environ['IGNITE_DATASET_CERTFILE']
+    if 'IGNITE_DATASET_CERT_PASSWORD' in os.environ:
+      del os.environ['IGNITE_DATASET_CERT_PASSWORD']
 
-  def __check_dataset(self, dataset):
+  def _check_dataset(self, dataset):
     """Checks that dataset provids correct data.
     """
-    self.assertEquals(tf.int64, dataset.output_types['key'])
-    self.assertEquals(tf.string, dataset.output_types['val']['NAME'])
-    self.assertEquals(tf.int64, dataset.output_types['val']['VAL'])
+    self.assertEqual(tf.int64, dataset.output_types['key'])
+    self.assertEqual(tf.string, dataset.output_types['val']['NAME'])
+    self.assertEqual(tf.int64, dataset.output_types['val']['VAL'])
 
     it = dataset.make_one_shot_iterator()
     ne = it.get_next()
@@ -66,11 +98,11 @@ class IgniteDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(ne)
 
-    self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\
+    self.assertEqual({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\
       rows[0])
-    self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\
+    self.assertEqual({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\
       rows[1])
-    self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\
+    self.assertEqual({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\
       rows[2])
 
 if __name__ == "__main__":
-- 
GitLab


From effced8f591441e0706377e2b31debb96ee9203d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Kr=C3=B6ger?= <moritz.kroeger@tu-dortmund.de>
Date: Tue, 18 Sep 2018 21:14:23 +0200
Subject: [PATCH 0332/1357] Moved example and changed wording

---
 tensorflow/python/data/ops/dataset_ops.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 2fc41a3b98..1b9ea2ed08 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1009,11 +1009,8 @@ class Dataset(object):
   def flat_map(self, map_func):
     """Maps `map_func` across this dataset and flattens the result. 
     
-    `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since 
-    `flat_map` produces a similar outputs as `tf.data.Dataset.interleave(cycle_length=1)`
-    
     Use `flat_map` if you want to make sure, that the order of your dataset stays the same.
-    For example, to implement unbatch:
+    For example, to flatten a dataset of batches into a dataset of their elements:
 
     ```python
     # NOTE: The following examples use `{ ... }` to represent the
@@ -1023,6 +1020,10 @@ class Dataset(object):
     a.flat_map(lambda x: Dataset.from_tensor_slices(x)) == 
       {[1,2,3,4,5,6,7,8,9,10]}
     ```
+    
+    `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since 
+    `flat_map` produces the same output as `tf.data.Dataset.interleave(cycle_length=1)`
+    
     Args:
       map_func: A function mapping a nested structure of tensors (having shapes
         and types defined by `self.output_shapes` and `self.output_types`) to a
-- 
GitLab


From a1ffaf3620801af2a7559b0ee393f962fb6ed7ae Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 18 Sep 2018 12:40:49 -0700
Subject: [PATCH 0333/1357] [SE] Restore int8x4 data types if that's the
 requested DataLayout for fused conv

This broke in a recent refactoring.

PiperOrigin-RevId: 213497416
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 38 ++++++++++++++-------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 63ab367086..3a77ba769c 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -132,30 +132,39 @@ string ToString(cudnnStatus_t status) {
 }
 
 template <typename T>
-cudnnDataType_t GetCudnnDataType();
+cudnnDataType_t GetCudnnDataType(
+    dnn::DataLayout = dnn::DataLayout::kBatchDepthYX);
 
 template <>
-cudnnDataType_t GetCudnnDataType<double>() {
+cudnnDataType_t GetCudnnDataType<double>(dnn::DataLayout) {
   return CUDNN_DATA_DOUBLE;
 }
 
 template <>
-cudnnDataType_t GetCudnnDataType<float>() {
+cudnnDataType_t GetCudnnDataType<float>(dnn::DataLayout) {
   return CUDNN_DATA_FLOAT;
 }
 
 template <>
-cudnnDataType_t GetCudnnDataType<Eigen::half>() {
+cudnnDataType_t GetCudnnDataType<Eigen::half>(dnn::DataLayout) {
   return CUDNN_DATA_HALF;
 }
 
 template <>
-cudnnDataType_t GetCudnnDataType<int8>() {
-  return CUDNN_DATA_INT8;
+cudnnDataType_t GetCudnnDataType<int8>(dnn::DataLayout layout) {
+  switch (layout) {
+    case dnn::DataLayout::kYXDepthBatch:
+    case dnn::DataLayout::kYXBatchDepth:
+    case dnn::DataLayout::kBatchYXDepth:
+    case dnn::DataLayout::kBatchDepthYX:
+      return CUDNN_DATA_INT8;
+    case dnn::DataLayout::kBatchDepthYX4:
+      return CUDNN_DATA_INT8x4;
+  }
 }
 
 template <>
-cudnnDataType_t GetCudnnDataType<int32>() {
+cudnnDataType_t GetCudnnDataType<int32>(dnn::DataLayout) {
   return CUDNN_DATA_INT32;
 }
 
@@ -2518,12 +2527,15 @@ port::Status CudnnSupport::DoFusedConvolveImpl(
                         "Relu or None activation.");
   }
 
-  CudnnTensorDescriptor conv_input_nd(conv_input_descriptor,
-                                      GetCudnnDataType<ElementType>());
-  CudnnTensorDescriptor output_nd(output_descriptor,
-                                  GetCudnnDataType<ElementType>());
-  CudnnFilterDescriptor filter(filter_descriptor,
-                               GetCudnnDataType<ElementType>());
+  CudnnTensorDescriptor conv_input_nd(
+      conv_input_descriptor,
+      GetCudnnDataType<ElementType>(conv_input_descriptor.layout()));
+  CudnnTensorDescriptor output_nd(
+      output_descriptor,
+      GetCudnnDataType<ElementType>(conv_input_descriptor.layout()));
+  CudnnFilterDescriptor filter(
+      filter_descriptor,
+      GetCudnnDataType<ElementType>(conv_input_descriptor.layout()));
   CudnnTensorDescriptor bias_nd(bias_descriptor, GetCudnnDataType<BiasType>());
   CudnnConvolutionDescriptor conv(convolution_descriptor,
                                   GetCudnnDataType<AccumulatorType>());
-- 
GitLab


From 723242c800f237368e238fe03bd50516807e3402 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Tue, 18 Sep 2018 12:55:44 -0700
Subject: [PATCH 0334/1357] Link to readme for distribution strategy from
 distribute.py and package init file, so that folks looking at API
 documentation can find the readme as well.

PiperOrigin-RevId: 213499832
---
 tensorflow/contrib/distribute/__init__.py | 7 ++++++-
 tensorflow/python/training/distribute.py  | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/__init__.py b/tensorflow/contrib/distribute/__init__.py
index 350f81f60f..823fe6a917 100644
--- a/tensorflow/contrib/distribute/__init__.py
+++ b/tensorflow/contrib/distribute/__init__.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Prototype of a distributed computation library for TF."""
+"""A distributed computation library for TF.
+
+See [tensorflow/contrib/distribute/README.md](
+https://www.tensorflow.org/code/tensorflow/contrib/distribute/README.md)
+for overview and examples.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index 21ca1735e0..419a9ec12b 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -195,6 +195,10 @@ class _SameScopeAgainContext(object):
 class DistributionStrategy(object):
   """A list of devices with a state & compute distribution policy.
 
+  See [tensorflow/contrib/distribute/README.md](
+  https://www.tensorflow.org/code/tensorflow/contrib/distribute/README.md)
+  for overview and examples.
+
   The intent is that you can write an algorithm in a stylized way and
   it will be usable with a variety of different `DistributionStrategy`
   implementations. Each descendant will implement a different strategy
-- 
GitLab


From e8be4d96dd4d3d9d6b12b778a5b8beee592a324a Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 18 Sep 2018 12:59:39 -0700
Subject: [PATCH 0335/1357] Only start_step/end_step on GradientTape if
 executing eagerly.

This prevents creating a context where none is required.

PiperOrigin-RevId: 213500408
---
 tensorflow/python/eager/backprop.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 907234b0f8..50a6ce6324 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -725,7 +725,9 @@ class GradientTape(object):
     self._persistent = persistent
     self._watch_accessed_variables = watch_accessed_variables
     self._recording = False
-    context.context().start_step()
+    self._created_eagerly = context.executing_eagerly()
+    if self._created_eagerly:
+      context.context().start_step()
 
   def __enter__(self):
     """Enters a context inside which operations are recorded on this tape."""
@@ -755,7 +757,8 @@ class GradientTape(object):
     self._recording = False
 
   def __del__(self):
-    context.context().end_step()
+    if self._created_eagerly:
+      context.context().end_step()
 
   def watch(self, tensor):
     """Ensures that `tensor` is being traced by this tape.
-- 
GitLab


From 199cb7746c7ad92d7be344363d8276c45fc7a4b8 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 18 Sep 2018 13:05:23 -0700
Subject: [PATCH 0336/1357] Register FakeResourceUpdateOp for the right op

Before this CL the PartiallyDeclusterPassTest.DontDuplicateResourceVarOps test
was buggy, in that it wasn't testing what it was supposed to test.

PiperOrigin-RevId: 213501558
---
 .../compiler/jit/partially_decluster_pass_test.cc     | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/jit/partially_decluster_pass_test.cc b/tensorflow/compiler/jit/partially_decluster_pass_test.cc
index 35872daa65..0feb73a89e 100644
--- a/tensorflow/compiler/jit/partially_decluster_pass_test.cc
+++ b/tensorflow/compiler/jit/partially_decluster_pass_test.cc
@@ -60,9 +60,9 @@ class FakeBinaryOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override { CHECK(false); }
 };
 
-class FakeResourceVarUpdateOp : public OpKernel {
+class FakeResourceUpdateOp : public OpKernel {
  public:
-  explicit FakeResourceVarUpdateOp(OpKernelConstruction* context)
+  explicit FakeResourceUpdateOp(OpKernelConstruction* context)
       : OpKernel(context) {}
 
   void Compute(OpKernelContext* ctx) override { CHECK(false); }
@@ -74,10 +74,9 @@ REGISTER_KERNEL_BUILDER(Name("FakeBinary")
                             .HostMemory("host_out"),
                         FakeBinaryOp);
 
-REGISTER_KERNEL_BUILDER(Name("FakeResourceVarUpdate")
-                            .Device(DEVICE_CPU)
-                            .HostMemory("something_else"),
-                        FakeResourceVarUpdateOp);
+REGISTER_KERNEL_BUILDER(
+    Name("FakeResourceUpdate").Device(DEVICE_CPU).HostMemory("something_else"),
+    FakeResourceUpdateOp);
 
 Status PartiallyDecluster(std::unique_ptr<Graph>* graph) {
   FixupSourceAndSinkEdges(graph->get());
-- 
GitLab


From 33170cc661f3838aa7d0d7fc19bb0c6ba4812a3c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Sep 2018 13:28:38 -0700
Subject: [PATCH 0337/1357] Eliminate VisitableAllocator.

The visitor pattern is used to allow pre-registration of memory for
DMA access, e.g. for fast GPU/CPU i/o and for RDMA networking.  The
VisitableAllocator interface was introduced to support this use some
time ago, prior to SubAllocators. Memory registration works best if
it's done infrequently, on large pieces of memory, rather than on
every piece that's dynamically allocated/freed.  This usage pattern
fits the SubAllocator better than a general Allocator.  This change
moves memory allocation visitor access to SubAllocator and eliminates
the VisitableAllocator subclass of Allocator.

This change also more rigorously enforces the requirement that all
Visitors be declared prior to memory allocation begining.  This is
accomplished by requiring that Visitors be provided to the SubAllocator
constructor.

This refactoring will ease an upcoming CL introducing
NUMA specific CPU devices.  It also should fix some performance
pitfalls (e.g. accidental use of PoolAllocator) introduced by an
earlier refactoring of ProcessState that was also in preparation for
NUMA.  It restores the default use of the cpu_allocator() value (i.e.
no SubAllocator) by model executions that don't use allocation
visitors (since visitor registration must precede the first allocation,
hence can be detected at that time).

PiperOrigin-RevId: 213505655
---
 tensorflow/contrib/gdr/gdr_memory_manager.cc  | 102 +++++------
 tensorflow/contrib/verbs/rdma_mgr.cc          |  81 +++------
 tensorflow/contrib/verbs/rdma_mgr.h           |   1 +
 tensorflow/contrib/verbs/verbs_server_lib.cc  |   5 +
 tensorflow/core/BUILD                         |   1 -
 .../core/common_runtime/bfc_allocator.cc      |  21 +--
 .../core/common_runtime/bfc_allocator.h       |  14 +-
 .../common_runtime/gpu/cuda_host_allocator.h  |  12 +-
 .../common_runtime/gpu/gpu_bfc_allocator.cc   |  17 +-
 .../common_runtime/gpu/gpu_bfc_allocator.h    |  44 +++--
 .../gpu/gpu_bfc_allocator_test.cc             |  90 ++++++++--
 .../gpu/gpu_cudamalloc_allocator.cc           |  10 +-
 .../gpu/gpu_cudamalloc_allocator.h            |  11 +-
 .../common_runtime/gpu/gpu_debug_allocator.cc |  20 +--
 .../common_runtime/gpu/gpu_debug_allocator.h  |  20 +--
 .../gpu/gpu_debug_allocator_test.cc           |  35 +++-
 .../core/common_runtime/gpu/gpu_device.cc     |  64 ++++---
 .../core/common_runtime/gpu/gpu_device.h      |   9 +-
 .../common_runtime/gpu/gpu_process_state.cc   | 161 +++++++++++-------
 .../common_runtime/gpu/gpu_process_state.h    |  58 ++++---
 .../common_runtime/gpu/pool_allocator_test.cc |  68 ++++++--
 .../core/common_runtime/mkl_cpu_allocator.h   |  50 +-----
 .../core/common_runtime/pool_allocator.cc     |  45 ++---
 .../core/common_runtime/pool_allocator.h      |  27 +--
 .../core/common_runtime/process_state.cc      |  71 ++++++--
 .../core/common_runtime/process_state.h       |  15 +-
 .../core/common_runtime/renamed_device.h      |   7 +-
 .../core/common_runtime/visitable_allocator.h |  79 ---------
 tensorflow/core/framework/allocator.cc        |  20 ++-
 tensorflow/core/framework/allocator.h         |  28 ++-
 tensorflow/core/framework/device_base.h       |  10 +-
 tensorflow/core/framework/op_kernel.cc        |   9 +-
 32 files changed, 628 insertions(+), 577 deletions(-)
 delete mode 100644 tensorflow/core/common_runtime/visitable_allocator.h

diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index 726f74c7b7..bb06f1c41c 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -138,6 +138,8 @@ class GdrMemoryManager : public RemoteMemoryManager {
       Device* device, DeviceContext* device_context, bool on_host,
       StatusCallback done) override;
 
+  static void RegMemVisitors();
+
  protected:
   Status CreateEndpoint(const string& host, const string& port,
                         RdmaEndpointPtr& endpoint);
@@ -183,35 +185,51 @@ class GdrMemoryManager : public RemoteMemoryManager {
   TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager);
 };
 
-// TODO(byronyi): remove this class and its registration when the default
-// cpu_allocator() returns visitable allocator, or cpu_allocator() is no
-// longer in use.
-class BFCGdrAllocator : public BFCAllocator {
- public:
-  BFCGdrAllocator()
-      : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36,
-                     true, "cpu_gdr_bfc") {}
-};
-class BFCGdrAllocatorFactory : public AllocatorFactory {
- public:
-  Allocator* CreateAllocator() override { return new BFCGdrAllocator; }
-
-  virtual SubAllocator* CreateSubAllocator(int numa_node) {
-    return new BasicCPUAllocator(numa_node);
-  }
-};
-
-REGISTER_MEM_ALLOCATOR("BFCGdrAllocator", 102, BFCGdrAllocatorFactory);
-
 GdrMemoryManager::GdrMemoryManager(const string& host, const string& port)
     : host_(host),
       port_(port),
       listening_(nullptr, EndpointDeleter),
       stopped_(true),
-      next_key_(0) {}
+      next_key_(0) {
+  static std::once_flag flag;
+  std::call_once(flag, []() { RegMemVisitors(); });
+}
 
 GdrMemoryManager::~GdrMemoryManager() { close(epfd_); }
 
+/*static*/ void GdrMemoryManager::RegMemVisitors() {
+  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
+                                           size_t num_bytes) {
+    GdrMemoryManager::Singleton().InsertMemoryRegion(
+        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
+  };
+  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
+                                          size_t num_bytes) {
+    GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes);
+  };
+  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
+  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
+
+#if GOOGLE_CUDA
+  if (IsGDRAvailable()) {
+    int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
+
+    // Note we don't free allocated GPU memory so there is no free visitor
+    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
+                                                  size_t num_bytes) {
+      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
+          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
+    };
+    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
+                                                     cuda_alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
+                                                          alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
+    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
+  }
+#endif  // GOOGLE_CUDA
+}
+
 Status GdrMemoryManager::Init() {
   epfd_ = epoll_create1(0);
   if (epfd_ == -1) {
@@ -271,48 +289,6 @@ Status GdrMemoryManager::Init() {
                                "cannot add server to epoll");
   }
 
-  Allocator* allocators[] = {
-#if GOOGLE_CUDA
-    GPUProcessState::singleton()->GetCUDAHostAllocator(0),
-#endif  // GOOGLE_CUDA
-    ProcessState::singleton()->GetCPUAllocator(0),
-    cpu_allocator(),
-  };
-
-  using namespace std::placeholders;
-  VisitableAllocator::Visitor alloc_visitor =
-      std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2);
-  VisitableAllocator::Visitor free_visitor =
-      std::bind(&GdrMemoryManager::EvictMemoryRegion, this, _1, _2);
-
-  std::set<Allocator*> instrumented_;
-
-  // Host memory allocators
-  for (Allocator* allocator : allocators) {
-    auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
-    CHECK(visitable_allocator)
-        << "is not visitable for instrumentation" << allocator->Name();
-    // Make sure we don't instrument the same allocator twice
-    if (instrumented_.find(allocator) == std::end(instrumented_)) {
-      visitable_allocator->AddAllocVisitor(alloc_visitor);
-      visitable_allocator->AddFreeVisitor(free_visitor);
-      instrumented_.insert(allocator);
-      LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name();
-    }
-  }
-
-#if GOOGLE_CUDA
-  VisitableAllocator::Visitor cuda_alloc_visitor =
-      std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2);
-  if (IsGDRAvailable()) {
-    // Note we don't free allocated GPU memory so there is no free visitor
-    int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1;
-    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
-                                                     cuda_alloc_visitor);
-    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
-  }
-#endif  // GOOGLE_CUDA
-
   return Status::OK();
 }
 
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 3cb5e61fac..2784bf124c 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/contrib/verbs/grpc_verbs_client.h"
 #include "tensorflow/contrib/verbs/verbs_service.pb.h"
-#include "tensorflow/core/common_runtime/bfc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/pool_allocator.h"
@@ -29,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/session_mgr.h"
 #include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
 
@@ -256,74 +256,41 @@ void MRDeleter(ibv_mr* mr) {
   }
 }
 
-// TODO(byronyi): remove this class and its registration when the default
-// cpu_allocator() returns visitable allocator, or cpu_allocator() is no
-// longer in use.
-class BFCRdmaAllocator : public BFCAllocator {
- public:
-  BFCRdmaAllocator()
-      : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36,
-                     true, "cpu_rdma_bfc") {}
-};
-class BFCRdmaAllocatorFactory : public AllocatorFactory {
- public:
-  Allocator* CreateAllocator() { return new BFCRdmaAllocator; }
-
-  SubAllocator* CreateSubAllocator(int numa_node) {
-    return new BasicCPUAllocator(numa_node);
-  }
-};
-
-REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
-
 void RdmaMgr::InitAllocators() {
-  RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_;
+  static std::once_flag flag;
+  std::call_once(
+      flag, [this]() { RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; });
+}
 
-  Allocator* allocators[] = {
-#if GOOGLE_CUDA
-    GPUProcessState::singleton()->GetCUDAHostAllocator(0),
-#endif  // GOOGLE_CUDA
-    ProcessState::singleton()->GetCPUAllocator(0),
-    cpu_allocator(),
+/*static*/ void RdmaMgr::RegMemVisitors() {
+  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
+                                           size_t num_bytes) {
+    RdmaMemoryMgr::Singleton().InsertMemoryRegion(
+        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
+  };
+  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
+                                          size_t num_bytes) {
+    RdmaMemoryMgr::Singleton().EvictMemoryRegion(ptr, num_bytes);
   };
 
-  using namespace std::placeholders;
-
-  std::set<Allocator*> instrumented_;
-
-  // Host memory allocators
-  for (Allocator* allocator : allocators) {
-    VisitableAllocator::Visitor alloc_visitor =
-        std::bind(&RdmaMemoryMgr::InsertMemoryRegion,
-                  &RdmaMemoryMgr::Singleton(), _1, _2, allocator->Name());
-    VisitableAllocator::Visitor free_visitor = std::bind(
-        &RdmaMemoryMgr::EvictMemoryRegion, &RdmaMemoryMgr::Singleton(), _1, _2);
-
-    auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
-    CHECK(visitable_allocator)
-        << "is not visitable for instrumentation" << allocator->Name();
-    // Make sure we don't instrument the same allocator twice
-    if (instrumented_.find(allocator) == std::end(instrumented_)) {
-      visitable_allocator->AddAllocVisitor(alloc_visitor);
-      visitable_allocator->AddFreeVisitor(free_visitor);
-      instrumented_.insert(allocator);
-      LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name();
-    }
-  }
+  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
+  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
 
 #if GOOGLE_CUDA
   if (IsGDRAvailable()) {
     // Note we don't free allocated GPU memory so there is no free visitor
     int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
 
-    char buf[8];
-    sprintf(buf, "gpu");
-    VisitableAllocator::Visitor cuda_alloc_visitor =
-        std::bind(&RdmaMemoryMgr::InsertMemoryRegion,
-                  &RdmaMemoryMgr::Singleton(), _1, _2, std::string(buf));
-
+    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
+                                                  size_t num_bytes) {
+      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
+          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
+    };
     GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
                                                      cuda_alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
+                                                          alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
     LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
   }
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index 9fffc335bb..74b92cc9a6 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -39,6 +39,7 @@ class RdmaMgr {
   void SetupChannels();
   bool ConnectivityCheck();
   void InitAllocators();
+  static void RegMemVisitors();
   const string& local_worker() { return local_worker_; }
 
  private:
diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 1a0b5028fe..61469686e4 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -76,8 +76,13 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def,
   return Status::OK();
 }
 
+namespace {
+std::once_call reg_mem_visitors_call;
+}  // namespace
+
 Status VerbsServer::Init(ServiceInitFunction service_func,
                          RendezvousMgrCreationFunction rendezvous_mgr_func) {
+  std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); });
   Status s = GrpcServer::Init(service_func, rendezvous_mgr_func);
   {
     mutex_lock l(mu_);
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d55bd8d7ed..9bcf5b0865 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2783,7 +2783,6 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
     "common_runtime/step_stats_collector.h",
     "common_runtime/threadpool_device.h",
     "common_runtime/tracing_device.h",
-    "common_runtime/visitable_allocator.h",
     "common_runtime/process_state.h",
     "common_runtime/pool_allocator.h",
     "graph/gradients.h",
diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index 84c6285bbe..3843ea9e60 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc
@@ -31,7 +31,7 @@ namespace tensorflow {
 
 BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
                            bool allow_growth, const string& name)
-    : suballocator_(sub_allocator),
+    : sub_allocator_(sub_allocator),
       name_(name),
       free_chunks_list_(kInvalidChunkHandle),
       next_allocation_id_(1) {
@@ -72,7 +72,7 @@ BFCAllocator::~BFCAllocator() {
   VLOG(2) << "Number of regions allocated: "
           << region_manager_.regions().size();
   for (const auto& region : region_manager_.regions()) {
-    suballocator_->Free(region.ptr(), region.memory_size());
+    sub_allocator_->Free(region.ptr(), region.memory_size());
   }
 
   for (BinNum b = 0; b < kNumBins; b++) {
@@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
 
   // Try allocating.
   size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
-  void* mem_addr = suballocator_->Alloc(alignment, bytes);
+  void* mem_addr = sub_allocator_->Alloc(alignment, bytes);
   if (mem_addr == nullptr && !started_backpedal_) {
     // Only backpedal once.
     started_backpedal_ = true;
@@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
     while (mem_addr == nullptr) {
       bytes = RoundedBytes(bytes * kBackpedalFactor);
       if (bytes < rounded_bytes) break;
-      mem_addr = suballocator_->Alloc(alignment, bytes);
+      mem_addr = sub_allocator_->Alloc(alignment, bytes);
     }
   }
 
@@ -158,10 +158,6 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
   // Insert the chunk into the right bin.
   InsertFreeChunkIntoBin(h);
 
-  // Invoke visitors on newly allocated region.
-  for (const auto& visitor : region_visitors_) {
-    visitor(mem_addr, bytes);
-  }
   return true;
 }
 
@@ -490,15 +486,6 @@ void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) {
   InsertFreeChunkIntoBin(coalesced_chunk);
 }
 
-void BFCAllocator::AddAllocVisitor(Visitor visitor) {
-  VLOG(1) << "AddVisitor";
-  mutex_lock l(lock_);
-  region_visitors_.push_back(visitor);
-  for (const auto& region : region_manager_.regions()) {
-    visitor(region.ptr(), region.memory_size());
-  }
-}
-
 bool BFCAllocator::TracksAllocationSizes() { return true; }
 
 size_t BFCAllocator::RequestedSize(const void* ptr) {
diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index 20e1dab1d5..364071e066 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/allocator_retry.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/macros.h"
@@ -42,7 +42,7 @@ namespace tensorflow {
 // coalescing.  One assumption we make is that the process using this
 // allocator owns pretty much all of the memory, and that nearly
 // all requests to allocate memory go through this interface.
-class BFCAllocator : public VisitableAllocator {
+class BFCAllocator : public Allocator {
  public:
   // Takes ownership of sub_allocator.
   BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
@@ -55,11 +55,6 @@ class BFCAllocator : public VisitableAllocator {
                     const AllocationAttributes& allocation_attr) override;
   void DeallocateRaw(void* ptr) override;
 
-  void AddAllocVisitor(Visitor visitor) override;
-
-  // Does nothing, because memory is never freed.
-  void AddFreeVisitor(Visitor visitor) override {}
-
   bool TracksAllocationSizes() override;
 
   size_t RequestedSize(const void* ptr) override;
@@ -423,7 +418,7 @@ class BFCAllocator : public VisitableAllocator {
   // of the available memory.
   bool started_backpedal_ = false;
 
-  std::unique_ptr<SubAllocator> suballocator_;
+  std::unique_ptr<SubAllocator> sub_allocator_;
   string name_;
 
   // Structures mutable after construction
@@ -435,9 +430,6 @@ class BFCAllocator : public VisitableAllocator {
   // Pointer to head of linked list of free Chunks
   ChunkHandle free_chunks_list_ GUARDED_BY(lock_);
 
-  // Called once on each region, ASAP.
-  std::vector<Visitor> region_visitors_ GUARDED_BY(lock_);
-
   // Counter containing the next unique identifier to assign to a
   // newly-created chunk.
   int64 next_allocation_id_ GUARDED_BY(lock_);
diff --git a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
index 636cd43575..6bd29ef775 100644
--- a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h
@@ -26,8 +26,12 @@ namespace tensorflow {
 class CUDAHostAllocator : public SubAllocator {
  public:
   // Note: stream_exec cannot be null.
-  explicit CUDAHostAllocator(se::StreamExecutor* stream_exec)
-      : stream_exec_(stream_exec) {
+  explicit CUDAHostAllocator(se::StreamExecutor* stream_exec, int numa_node,
+                             const std::vector<Visitor>& alloc_visitors,
+                             const std::vector<Visitor>& free_visitors)
+      : SubAllocator(alloc_visitors, free_visitors),
+        stream_exec_(stream_exec),
+        numa_node_(numa_node) {
     CHECK(stream_exec_ != nullptr);
   }
   ~CUDAHostAllocator() override {}
@@ -39,19 +43,23 @@ class CUDAHostAllocator : public SubAllocator {
       if (ptr == nullptr) {
         LOG(WARNING) << "could not allocate pinned host memory of size: "
                      << num_bytes;
+        return ptr;
       }
+      VisitAlloc(ptr, numa_node_, num_bytes);
     }
     return ptr;
   }
 
   void Free(void* ptr, size_t num_bytes) override {
     if (ptr != nullptr) {
+      VisitFree(ptr, numa_node_, num_bytes);
       stream_exec_->HostMemoryDeallocate(ptr);
     }
   }
 
  private:
   se::StreamExecutor* stream_exec_;  // not owned, non-null
+  const int numa_node_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(CUDAHostAllocator);
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 2d4c8d0201..44ffce77a1 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -22,18 +22,15 @@ limitations under the License.
 
 namespace tensorflow {
 
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
-                                 const string& name)
-    : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {}
+GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
+                                 size_t total_memory, const string& name)
+    : GPUBFCAllocator(sub_allocator, total_memory, GPUOptions(), name) {}
 
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
+                                 size_t total_memory,
                                  const GPUOptions& gpu_options,
                                  const string& name)
-    : BFCAllocator(
-          new GPUMemAllocator(
-              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
-              gpu_options.per_process_gpu_memory_fraction() > 1.0 ||
-                  gpu_options.experimental().use_unified_memory()),
-          total_memory, gpu_options.allow_growth(), name) {}
+    : BFCAllocator(sub_allocator, total_memory, gpu_options.allow_growth(),
+                   name) {}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index f1cc2eace1..6b6de80734 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -31,28 +31,20 @@ limitations under the License.
 
 namespace tensorflow {
 
-// A GPU memory allocator that implements a 'best-fit with coalescing'
-// algorithm.
-class GPUBFCAllocator : public BFCAllocator {
- public:
-  // 'cuda_gpu_id' refers to the ID of the GPU device within
-  // the process and must reference a valid ID in the process.
-  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
-                  const string& name);
-  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
-                  const GPUOptions& gpu_options, const string& name);
-  virtual ~GPUBFCAllocator() {}
-
-  TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
-};
-
 // Suballocator for GPU memory.
 class GPUMemAllocator : public SubAllocator {
  public:
+  // 'cuda_gpu_id' refers to the ID of the GPU device within
+  // the process and must reference a valid ID in the process.
   // Note: stream_exec cannot be null.
-  explicit GPUMemAllocator(se::StreamExecutor* stream_exec,
-                           bool use_unified_memory)
-      : stream_exec_(stream_exec), use_unified_memory_(use_unified_memory) {
+  explicit GPUMemAllocator(se::StreamExecutor* stream_exec, CudaGpuId gpu_id,
+                           bool use_unified_memory,
+                           const std::vector<Visitor>& alloc_visitors,
+                           const std::vector<Visitor>& free_visitors)
+      : SubAllocator(alloc_visitors, free_visitors),
+        stream_exec_(stream_exec),
+        gpu_id_(gpu_id),
+        use_unified_memory_(use_unified_memory) {
     CHECK(stream_exec_ != nullptr);
   }
   ~GPUMemAllocator() override {}
@@ -65,12 +57,14 @@ class GPUMemAllocator : public SubAllocator {
       } else {
         ptr = stream_exec_->AllocateArray<char>(num_bytes).opaque();
       }
+      VisitAlloc(ptr, gpu_id_.value(), num_bytes);
     }
     return ptr;
   }
 
   void Free(void* ptr, size_t num_bytes) override {
     if (ptr != nullptr) {
+      VisitFree(ptr, gpu_id_.value(), num_bytes);
       if (use_unified_memory_) {
         stream_exec_->UnifiedMemoryDeallocate(ptr);
       } else {
@@ -82,11 +76,25 @@ class GPUMemAllocator : public SubAllocator {
 
  private:
   se::StreamExecutor* stream_exec_;  // not owned, non-null
+  const CudaGpuId gpu_id_;
   const bool use_unified_memory_ = false;
 
   TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator);
 };
 
+// A GPU memory allocator that implements a 'best-fit with coalescing'
+// algorithm.
+class GPUBFCAllocator : public BFCAllocator {
+ public:
+  GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory,
+                  const string& name);
+  GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory,
+                  const GPUOptions& gpu_options, const string& name);
+  ~GPUBFCAllocator() override {}
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_BFC_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 67caeb3495..7112c3afd4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -46,7 +47,11 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
 }
 
 TEST(GPUBFCAllocatorTest, NoDups) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   // Allocate a lot of raw pointers
@@ -75,7 +80,11 @@ TEST(GPUBFCAllocatorTest, NoDups) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   // Allocate 256 raw pointers of sizes between 100 bytes and about
   // a meg
   random::PhiloxRandom philox(123, 17);
@@ -133,7 +142,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
 }
 
 TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   float* first_ptr = a.Allocate<float>(1024);
@@ -168,18 +181,30 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   float* ptr = a.Allocate<float>(0);
   EXPECT_EQ(nullptr, ptr);
 }
 
 TEST(GPUBFCAllocatorTest, TracksSizes) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
@@ -187,8 +212,12 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
 }
 
 TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
   // Configure a 1MiB byte limit
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc");
+  GPUBFCAllocator a(sub_allocator, 1 << 20, "GPU_0_bfc");
 
   float* first_ptr = a.Allocate<float>(1 << 6);
   float* second_ptr = a.Allocate<float>(1 << 20);
@@ -203,7 +232,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
   options.set_allow_growth(true);
 
   // Max of 2GiB, but starts out small.
-  GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1LL << 31, "GPU_0_bfc");
 
   // Allocate 10 raw pointers of sizes between 100 bytes and about
   // 64 megs.
@@ -264,8 +297,15 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
 }
 
 TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
-  GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
-  GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1UL << 60, "GPU_0_bfc");
+  sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator b(sub_allocator, 1UL << 60, "GPU_0_bfc");
   void* amem = a.AllocateRaw(1, 1);
   void* bmem = b.AllocateRaw(1, 1 << 30);
   a.DeallocateRaw(amem);
@@ -273,7 +313,11 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
 }
 
 static void BM_Allocation(int iters) {
-  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<size_t> sizes = {256,        4096,      16384,    524288,
                                512,        1048576,   10485760, 104857600,
@@ -289,7 +333,11 @@ static void BM_Allocation(int iters) {
 BENCHMARK(BM_Allocation);
 
 static void BM_AllocationThreaded(int iters, int num_threads) {
-  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc");
   thread::ThreadPool pool(Env::Default(), "test", num_threads);
   std::atomic_int_fast32_t count(iters);
   mutex done_lock;
@@ -325,7 +373,11 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16);
 // A more complex benchmark that defers deallocation of an object for
 // "delay" allocations.
 static void BM_AllocationDelayed(int iters, int delay) {
-  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+  CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<int> sizes = {256, 4096, 16384, 4096, 512, 1024, 1024};
   int size_index = 0;
@@ -363,7 +415,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   // only methods inside this class can access private members of BFCAllocator.
 
   void TestBinDebugInfo() {
-    GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+    CudaGpuId cuda_gpu_id(0);
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+        false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
 
     std::vector<void*> initial_ptrs;
     std::vector<size_t> initial_ptrs_allocated_sizes;
@@ -441,7 +497,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   }
 
   void TestLog2FloorNonZeroSlow() {
-    GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
+    CudaGpuId cuda_gpu_id(0);
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+        false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator a(sub_allocator, 1 /* total_memory */, "GPU_0_bfc");
     EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0));
     EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1));
     EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
index 934a57a5fb..8e14f1ea75 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -27,7 +27,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
+GPUcudaMallocAllocator::GPUcudaMallocAllocator(Allocator* allocator,
                                                CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -60,14 +60,6 @@ void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) {
 #endif  // GOOGLE_CUDA
 }
 
-void GPUcudaMallocAllocator::AddAllocVisitor(Visitor visitor) {
-  return base_allocator_->AddAllocVisitor(visitor);
-}
-
-void GPUcudaMallocAllocator::AddFreeVisitor(Visitor visitor) {
-  return base_allocator_->AddFreeVisitor(visitor);
-}
-
 bool GPUcudaMallocAllocator::TracksAllocationSizes() { return false; }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
index 856fdc34b4..3d1d0ef481 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <memory>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
@@ -29,20 +29,17 @@ namespace tensorflow {
 // An allocator that wraps a GPU allocator and adds debugging
 // functionality that verifies that users do not write outside their
 // allocated memory.
-class GPUcudaMallocAllocator : public VisitableAllocator {
+class GPUcudaMallocAllocator : public Allocator {
  public:
-  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator,
-                                  CudaGpuId cuda_gpu_id);
+  explicit GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
   ~GPUcudaMallocAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
-  void AddAllocVisitor(Visitor visitor) override;
-  void AddFreeVisitor(Visitor visitor) override;
   bool TracksAllocationSizes() override;
 
  private:
-  VisitableAllocator* base_allocator_ = nullptr;  // owned
+  Allocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index e4c834b30d..6bad66dcec 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@@ -73,7 +73,7 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
 // -----------------------------------------------------------------------------
 // GPUDebugAllocator
 // -----------------------------------------------------------------------------
-GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator,
+GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator,
                                      CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -111,14 +111,6 @@ void GPUDebugAllocator::DeallocateRaw(void* ptr) {
   base_allocator_->DeallocateRaw(ptr);
 }
 
-void GPUDebugAllocator::AddAllocVisitor(Visitor visitor) {
-  return base_allocator_->AddAllocVisitor(visitor);
-}
-
-void GPUDebugAllocator::AddFreeVisitor(Visitor visitor) {
-  return base_allocator_->AddFreeVisitor(visitor);
-}
-
 bool GPUDebugAllocator::TracksAllocationSizes() { return true; }
 
 size_t GPUDebugAllocator::RequestedSize(const void* ptr) {
@@ -158,7 +150,7 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
 // -----------------------------------------------------------------------------
 // GPUNanResetAllocator
 // -----------------------------------------------------------------------------
-GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator,
+GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator,
                                            CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
   stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -200,14 +192,6 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) {
   base_allocator_->DeallocateRaw(ptr);
 }
 
-void GPUNanResetAllocator::AddAllocVisitor(Visitor visitor) {
-  return base_allocator_->AddAllocVisitor(visitor);
-}
-
-void GPUNanResetAllocator::AddFreeVisitor(Visitor visitor) {
-  return base_allocator_->AddFreeVisitor(visitor);
-}
-
 size_t GPUNanResetAllocator::RequestedSize(const void* ptr) {
   return base_allocator_->RequestedSize(ptr);
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 0f9b72040c..0f27ff4384 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <unordered_map>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
@@ -31,16 +31,13 @@ namespace tensorflow {
 // An allocator that wraps a GPU allocator and adds debugging
 // functionality that verifies that users do not write outside their
 // allocated memory.
-class GPUDebugAllocator : public VisitableAllocator {
+class GPUDebugAllocator : public Allocator {
  public:
-  explicit GPUDebugAllocator(VisitableAllocator* allocator,
-                             CudaGpuId cuda_gpu_id);
+  explicit GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
   ~GPUDebugAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
-  void AddAllocVisitor(Visitor visitor) override;
-  void AddFreeVisitor(Visitor visitor) override;
   bool TracksAllocationSizes() override;
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
@@ -53,7 +50,7 @@ class GPUDebugAllocator : public VisitableAllocator {
   bool CheckFooter(void* ptr);
 
  private:
-  VisitableAllocator* base_allocator_ = nullptr;  // owned
+  Allocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
@@ -63,23 +60,20 @@ class GPUDebugAllocator : public VisitableAllocator {
 // An allocator that wraps a GPU allocator and resets the memory on
 // allocation and free to 'NaN', helping to identify cases where the
 // user forgets to initialize the memory.
-class GPUNanResetAllocator : public VisitableAllocator {
+class GPUNanResetAllocator : public Allocator {
  public:
-  explicit GPUNanResetAllocator(VisitableAllocator* allocator,
-                                CudaGpuId cuda_gpu_id);
+  explicit GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id);
   ~GPUNanResetAllocator() override;
   string Name() override { return "gpu_nan_reset"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
-  void AddAllocVisitor(Visitor visitor) override;
-  void AddFreeVisitor(Visitor visitor) override;
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
   void GetStats(AllocatorStats* stats) override;
   void ClearStats() override;
 
  private:
-  VisitableAllocator* base_allocator_ = nullptr;  // owned
+  Allocator* base_allocator_ = nullptr;  // owned
 
   se::StreamExecutor* stream_exec_;  // Not owned.
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
index 236a0afa0b..98283cd846 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@@ -35,7 +35,10 @@ namespace {
 
 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                       cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
@@ -59,7 +62,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
     EXPECT_DEATH(
         {
           const CudaGpuId cuda_gpu_id(0);
-          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+          GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
+              cuda_gpu_id, false /*use_unified_memory*/, {}, {});
+          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                               cuda_gpu_id);
           auto stream_exec =
               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -92,7 +98,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
     EXPECT_DEATH(
         {
           const CudaGpuId cuda_gpu_id(0);
-          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+          GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
+              cuda_gpu_id, false /*use_unified_memory*/, {}, {});
+          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                               cuda_gpu_id);
           auto stream_exec =
               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -122,7 +131,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
 
 TEST(GPUDebugAllocatorTest, ResetToNan) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUNanResetAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                          cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
@@ -163,8 +175,11 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
   const CudaGpuId cuda_gpu_id(0);
   // NaN reset must be the outer-most allocator.
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                             cuda_gpu_id),
       cuda_gpu_id);
   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
@@ -205,15 +220,21 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
 
 TEST(GPUDebugAllocatorTest, TracksSizes) {
   const CudaGpuId cuda_gpu_id(0);
-  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
+  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                       cuda_gpu_id);
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
   const CudaGpuId cuda_gpu_id(0);
+  GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+      false /*use_unified_memory*/, {}, {});
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                             cuda_gpu_id),
       cuda_gpu_id);
   float* t1 = a.Allocate<float>(1);
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 2763ac0d4a..50e61b7e00 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -41,7 +41,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/gpu_device_context.h"
 #include "tensorflow/core/common_runtime/local_device.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -285,6 +284,38 @@ BaseGPUDevice::~BaseGPUDevice() {
   for (auto ctx : device_contexts_) ctx->Unref();
 }
 
+// This should be idempotent if already initialized.
+Status BaseGPUDevice::InitScratchBuffers() {
+  mutex_lock l(scratch_init_mutex_);
+  if (scratch_.size() < max_streams_) {
+    for (int i = 0; i < max_streams_; i++) {
+      DCHECK(streams_[i]);
+      if (scratch_.size() > i && scratch_[i]) continue;
+      size_t scratch_buffer_size =
+          Eigen::kCudaScratchSize + sizeof(unsigned int);
+      void* scratch_buffer = gpu_allocator_->AllocateRaw(
+          Allocator::kAllocatorAlignment, scratch_buffer_size);
+      if (scratch_buffer == nullptr) {
+        return errors::FailedPrecondition(
+            "Failed to allocate scratch buffer for device ",
+            tf_gpu_id_.value());
+      }
+      se::DeviceMemory<char> mem(
+          se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size));
+
+      bool ok = executor_->SynchronousMemZero(
+          &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
+      if (!ok) {
+        return errors::FailedPrecondition(
+            "Failed to memcopy into scratch buffer for device ",
+            tf_gpu_id_.value());
+      }
+      scratch_.push_back(static_cast<char*>(scratch_buffer));
+    }
+  }
+  return Status::OK();
+}
+
 Status BaseGPUDevice::Init(const SessionOptions& options) {
   auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_);
   if (!executor_status.status().ok()) {
@@ -303,27 +334,6 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   for (int i = 0; i < max_streams_; i++) {
     streams_.push_back(StreamGroupFactory::Global().GetOrCreate(
         tf_gpu_id_, i, executor_, options.config.gpu_options()));
-
-    size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int);
-    void* scratch_buffer = gpu_allocator_->AllocateRaw(
-        Allocator::kAllocatorAlignment, scratch_buffer_size);
-    if (scratch_buffer == nullptr) {
-      return errors::FailedPrecondition(
-          "Failed to allocate scratch buffer for device ", tf_gpu_id_.value());
-    }
-    scratch_.push_back(static_cast<char*>(scratch_buffer));
-
-    se::DeviceMemory<char> mem(
-        se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size));
-
-    bool ok = executor_->SynchronousMemZero(
-        &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
-    if (!ok) {
-      return errors::FailedPrecondition(
-          "Failed to memcopy into scratch buffer for device ",
-          tf_gpu_id_.value());
-    }
-
     device_contexts_.push_back(new GPUDeviceContext(
         i, streams_.back()->compute, streams_.back()->host_to_device,
         streams_.back()->device_to_host, streams_.back()->device_to_device));
@@ -867,10 +877,11 @@ PerOpGpuDevice* BaseGPUDevice::MakeGpuDevice() {
   return new ConcretePerOpGpuDevice();
 }
 
-void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
-                                          PerOpGpuDevice* device,
-                                          DeviceContext* dc,
-                                          Allocator* allocator) {
+Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
+                                            PerOpGpuDevice* device,
+                                            DeviceContext* dc,
+                                            Allocator* allocator) {
+  TF_RETURN_IF_ERROR(InitScratchBuffers());
   if (dc) {
     const GPUDeviceContext* gpu_dc = static_cast<GPUDeviceContext*>(dc);
     const int stream_id = gpu_dc->stream_id();
@@ -881,6 +892,7 @@ void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
   } else {
     ReinitializeDevice(context, device, 0, allocator);
   }
+  return Status::OK();
 }
 
 Allocator* BaseGPUDevice::GetScopedAllocator(AllocatorAttributes attr,
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 56d03d7a8c..b3eea55758 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -86,8 +86,9 @@ class BaseGPUDevice : public LocalDevice {
   // The caller owns the returned device.
   PerOpGpuDevice* MakeGpuDevice() override;
 
-  void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
-                             DeviceContext* dc, Allocator* allocator) override;
+  Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
+                               DeviceContext* dc,
+                               Allocator* allocator) override;
 
   // Returns the CUDA GPU id of this device within the native driver system;
   // e.g., for CUDA this is the ordinal of the GPU within the system.
@@ -125,6 +126,7 @@ class BaseGPUDevice : public LocalDevice {
   class StreamGroupFactory;
 
   gtl::InlinedVector<StreamGroup*, 4> streams_;
+  mutex scratch_init_mutex_;
   gtl::InlinedVector<char*, 4> scratch_;
   std::vector<GPUDeviceContext*> device_contexts_;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
@@ -135,6 +137,9 @@ class BaseGPUDevice : public LocalDevice {
   std::unique_ptr<EventMgr> em_;
   std::unique_ptr<thread::ThreadPool> thread_pool_;
 
+  // Initialize scractch buffers used by Eigen.
+  Status InitScratchBuffers();
+
   void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device,
                           int stream_id, Allocator* allocator);
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
index b18688174d..9ec740fabe 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
@@ -76,12 +76,16 @@ GPUProcessState::GPUProcessState() : gpu_device_enabled_(false) {
 // This function is defined for debugging problems with the allocators.
 GPUProcessState::~GPUProcessState() {
   CHECK_EQ(this, instance_);
-  for (auto p : gpu_allocators_) {
-    delete p;
-  }
   instance_ = nullptr;
 }
 
+int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) {
+  // Return the NUMA node associated with the GPU's StreamExecutor.
+  se::StreamExecutor* se =
+      GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
+  return se->GetDeviceDescription().numa_node();
+}
+
 Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
                                             TfGpuId tf_gpu_id,
                                             size_t total_bytes) {
@@ -93,13 +97,10 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
 
   if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
     gpu_allocators_.resize(tf_gpu_id.value() + 1);
-    if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
-      gpu_al_.resize(tf_gpu_id.value() + 1);
   }
 
-  if (gpu_allocators_[tf_gpu_id.value()] == nullptr) {
-    VisitableAllocator* gpu_allocator;
-
+  AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()];
+  if (allocator_parts.allocator.get() == nullptr) {
     // Validate allocator types.
     if (!allocator_type.empty() && allocator_type != "BFC") {
       LOG(ERROR) << "Invalid allocator type: " << allocator_type;
@@ -108,8 +109,17 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
 
     CudaGpuId cuda_gpu_id;
     TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
-    gpu_allocator =
-        new GPUBFCAllocator(cuda_gpu_id, total_bytes, options,
+    int bus_id = BusIdForGPU(tf_gpu_id);
+    while (bus_id >= gpu_visitors_.size()) {
+      gpu_visitors_.push_back({});
+    }
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id,
+        (options.per_process_gpu_memory_fraction() > 1.0 ||
+         options.experimental().use_unified_memory()),
+        gpu_visitors_[bus_id], {});
+    Allocator* gpu_allocator =
+        new GPUBFCAllocator(sub_allocator, total_bytes, options,
                             strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
 
     // If true, checks for memory overwrites by writing
@@ -123,34 +133,25 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
       // **WARNING** probably will not work in a multi-gpu scenario
       gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id);
     }
-    gpu_allocators_[tf_gpu_id.value()] = gpu_allocator;
-
-    // If there are any pending AllocVisitors for this bus, add
-    // them now.
-    se::StreamExecutor* se =
-        GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
-    int bus_id = se->GetDeviceDescription().numa_node();
-    if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
-      for (const auto& v : gpu_visitors_[bus_id]) {
-        gpu_allocator->AddAllocVisitor(v);
-      }
-    }
+
+    Allocator* recording_allocator = nullptr;
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
       ProcessState::MemDesc md;
       md.loc = ProcessState::MemDesc::GPU;
       md.dev_index = cuda_gpu_id.value();
       md.gpu_registered = false;
       md.nic_registered = true;
-      if (static_cast<int64>(gpu_al_.size()) <= tf_gpu_id.value()) {
-        gpu_al_.resize(tf_gpu_id.value() + 1);
-      }
-      gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator(
+      recording_allocator = new internal::RecordingAllocator(
           &process_state_->mem_desc_map_, gpu_allocator, md, &mu_);
     }
+    allocator_parts = {std::unique_ptr<Allocator>(gpu_allocator), sub_allocator,
+                       std::unique_ptr<Allocator>(recording_allocator)};
+  }
+  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
+    return allocator_parts.recording_allocator.get();
+  } else {
+    return allocator_parts.allocator.get();
   }
-  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
-    return gpu_al_[tf_gpu_id.value()];
-  return gpu_allocators_[tf_gpu_id.value()];
 #else
   LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda.";
   return nullptr;
@@ -172,11 +173,12 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
     tf_shared_lock lock(mu_);
 
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types &&
-        static_cast<int>(cuda_al_.size()) > 0) {
-      return cuda_al_[0];
+        !cuda_host_allocators_.empty() &&
+        cuda_host_allocators_[0].recording_allocator != nullptr) {
+      return cuda_host_allocators_[0].recording_allocator.get();
     }
     if (static_cast<int>(cuda_host_allocators_.size()) > numa_node) {
-      return cuda_host_allocators_[0];
+      return cuda_host_allocators_[0].allocator.get();
     }
   }
 
@@ -190,7 +192,7 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
   // it knows is valid.
   se::StreamExecutor* se = nullptr;
   for (int i = 0; i < static_cast<int>(gpu_allocators_.size()); ++i) {
-    if (gpu_allocators_[i] != nullptr) {
+    if (gpu_allocators_[i].allocator != nullptr) {
       se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
       break;
     }
@@ -199,6 +201,15 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
   CHECK_NE(nullptr, se);
 
   while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) {
+    while (cuda_host_alloc_visitors_.size() <= numa_node) {
+      cuda_host_alloc_visitors_.push_back({});
+    }
+    while (cuda_host_free_visitors_.size() <= numa_node) {
+      cuda_host_free_visitors_.push_back({});
+    }
+    SubAllocator* sub_allocator = new CUDAHostAllocator(
+        se, numa_node, cuda_host_alloc_visitors_[numa_node],
+        cuda_host_free_visitors_[numa_node]);
     // TODO(zheng-xq): evaluate whether 64GB by default is the best choice.
     int64 cuda_host_mem_limit_in_mb = -1;
     Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB",
@@ -208,62 +219,92 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) {
       LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message();
     }
     int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20);
-    VisitableAllocator* allocator =
-        new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit,
+    Allocator* allocator =
+        new BFCAllocator(sub_allocator, cuda_host_mem_limit,
                          true /*allow_growth*/, "cuda_host_bfc" /*name*/);
 
-    if (LogMemory::IsEnabled()) {
+    if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) {
       // Wrap the allocator to track allocation ids for better logging
       // at the cost of performance.
-      allocator = new TrackingVisitableAllocator(allocator, true);
+      allocator = new TrackingAllocator(allocator, true);
     }
-    cuda_host_allocators_.push_back(allocator);
+    cuda_host_allocators_.push_back({std::unique_ptr<Allocator>(allocator),
+                                     sub_allocator,
+                                     std::unique_ptr<Allocator>(nullptr)});
+    AllocatorParts& allocator_parts = cuda_host_allocators_.back();
     if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
       ProcessState::MemDesc md;
       md.loc = ProcessState::MemDesc::CPU;
       md.dev_index = 0;
       md.gpu_registered = true;
       md.nic_registered = false;
-      cuda_al_.push_back(new internal::RecordingAllocator(
-          &process_state_->mem_desc_map_, cuda_host_allocators_.back(), md,
-          &mu_));
+      allocator_parts.recording_allocator.reset(
+          new internal::RecordingAllocator(&process_state_->mem_desc_map_,
+                                           allocator_parts.allocator.get(), md,
+                                           &mu_));
     }
   }
-  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types)
-    return cuda_al_[0];
-  return cuda_host_allocators_[0];
+  if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
+    return cuda_host_allocators_[0].recording_allocator.get();
+  } else {
+    return cuda_host_allocators_[0].allocator.get();
+  }
 }
 
 void GPUProcessState::AddGPUAllocVisitor(int bus_id,
-                                         const AllocVisitor& visitor) {
-  CHECK(process_state_);
+                                         const SubAllocator::Visitor& visitor) {
 #if GOOGLE_CUDA
   mutex_lock lock(mu_);
-  for (int i = 0; i < static_cast<int64>(gpu_allocators_.size()); ++i) {
-    se::StreamExecutor* se =
-        GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
-    if (gpu_allocators_[i] &&
-        (se->GetDeviceDescription().numa_node() + 1) == bus_id) {
-      gpu_allocators_[i]->AddAllocVisitor(visitor);
-    }
-  }
+  CHECK(gpu_allocators_.empty())  // Crash OK
+      << "AddGPUAllocVisitor must be called before "
+         "first call to GetGPUAllocator.";
   while (bus_id >= static_cast<int64>(gpu_visitors_.size())) {
-    gpu_visitors_.push_back(std::vector<AllocVisitor>());
+    gpu_visitors_.push_back(std::vector<SubAllocator::Visitor>());
   }
   gpu_visitors_[bus_id].push_back(visitor);
 #endif  // GOOGLE_CUDA
 }
 
+void GPUProcessState::AddCUDAHostAllocVisitor(
+    int numa_node, const SubAllocator::Visitor& visitor) {
+#if GOOGLE_CUDA
+  mutex_lock lock(mu_);
+  CHECK(cuda_host_allocators_.empty())  // Crash OK
+      << "AddCUDAHostAllocVisitor must be called before "
+         "first call to GetCUDAHostAllocator.";
+  while (numa_node >= static_cast<int64>(cuda_host_alloc_visitors_.size())) {
+    cuda_host_alloc_visitors_.push_back(std::vector<SubAllocator::Visitor>());
+  }
+  cuda_host_alloc_visitors_[numa_node].push_back(visitor);
+#endif  // GOOGLE_CUDA
+}
+
+void GPUProcessState::AddCUDAHostFreeVisitor(
+    int numa_node, const SubAllocator::Visitor& visitor) {
+#if GOOGLE_CUDA
+  mutex_lock lock(mu_);
+  CHECK(cuda_host_allocators_.empty())  // Crash OK
+      << "AddCUDAHostFreeVisitor must be called before "
+         "first call to GetCUDAHostAllocator.";
+  while (numa_node >= static_cast<int64>(cuda_host_free_visitors_.size())) {
+    cuda_host_free_visitors_.push_back(std::vector<SubAllocator::Visitor>());
+  }
+  cuda_host_free_visitors_[numa_node].push_back(visitor);
+#endif  // GOOGLE_CUDA
+}
+
 void GPUProcessState::TestOnlyReset() {
-  process_state_->ProcessState::TestOnlyReset();
+  if (process_state_) {
+    process_state_->ProcessState::TestOnlyReset();
+  }
   {
     mutex_lock lock(mu_);
     gpu_device_enabled_ = false;
+    gpu_allocators_.clear();
     gpu_visitors_.clear();
-    gtl::STLDeleteElements(&gpu_allocators_);
-    gtl::STLDeleteElements(&cuda_host_allocators_);
-    gtl::STLDeleteElements(&gpu_al_);
-    gtl::STLDeleteElements(&cuda_al_);
+    cuda_host_allocators_.clear();
+    cuda_host_alloc_visitors_.clear();
+    cuda_host_free_visitors_.clear();
   }
 }
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
index cb41c3c6bd..43e9a31660 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
@@ -32,7 +32,6 @@ limitations under the License.
 namespace tensorflow {
 
 class Allocator;
-class VisitableAllocator;
 class PoolAllocator;
 
 // Singleton that manages per-process state when GPUs are present.
@@ -72,18 +71,30 @@ class GPUProcessState {
 
   virtual Allocator* GetCUDAHostAllocator(int numa_node);
 
-  // Registers a function to be called once on every new Region
-  // allocated by every GPURegionAllocator proximate to the specified
-  // bus.  The AllocVisitor is provided with a memory pointer and the
-  // size of the area it identifies.  The pointer is not guaranteed to
-  // be valid after the call terminates.  The intention is for this
-  // interface to be used for network device memory registration.
-  // "bus_id" is platform-specific.  On many platforms it
-  // should be 0.  On machines with multiple PCIe buses, it should be
-  // the index of one of the PCIe buses.  If the bus_id is invalid,
-  // results are undefined.
-  typedef std::function<void(void*, size_t)> AllocVisitor;
-  virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor);
+  // Registers a Visitor to be invoked on new chunks of memory allocated by the
+  // SubAllocator of every GPU proximate to the specified bus.  The AllocVisitor
+  // is provided with a memory pointer, a GPU id, and the size of the area it
+  // identifies.  The pointer is not guaranteed to be valid after the call
+  // terminates.  The intention is for this interface to be used for network
+  // device memory registration.  "bus_id" is platform-specific.  On many
+  // platforms it should be 0.  On machines with multiple PCIe buses, it should
+  // be the index of one of the PCIe buses (maybe the NUMA node at which the
+  // PCIe is rooted).  If the bus_id is invalid, results are undefined.
+  virtual void AddGPUAllocVisitor(int bus_id,
+                                  const SubAllocator::Visitor& visitor);
+
+  // Registers a Visitor to be invoked on new chunks of memory allocated by
+  // the SubAllocator of the CUDAHostAllocator for the given numa_node.
+  virtual void AddCUDAHostAllocVisitor(int numa_node,
+                                       const SubAllocator::Visitor& visitor);
+
+  // Registers a Visitor to be invoked on each chunk handed back for freeing to
+  // the SubAllocator of the CUDAHostAllocator for the given numa_node.
+  virtual void AddCUDAHostFreeVisitor(int numa_node,
+                                      const SubAllocator::Visitor& visitor);
+
+  // Returns bus_id for the given GPU id.
+  virtual int BusIdForGPU(TfGpuId tf_gpu_id);
 
  protected:
   GPUProcessState();
@@ -103,16 +114,21 @@ class GPUProcessState {
 
   mutex mu_;
 
-  std::vector<VisitableAllocator*> gpu_allocators_ GUARDED_BY(mu_);
-  std::vector<std::vector<AllocVisitor>> gpu_visitors_ GUARDED_BY(mu_);
-  std::vector<Allocator*> cuda_host_allocators_ GUARDED_BY(mu_);
+  struct AllocatorParts {
+    std::unique_ptr<Allocator> allocator;
+    SubAllocator* sub_allocator;  // owned by allocator
+    std::unique_ptr<Allocator> recording_allocator;
+  };
+  std::vector<AllocatorParts> gpu_allocators_ GUARDED_BY(mu_);
+  std::vector<std::vector<SubAllocator::Visitor>> gpu_visitors_ GUARDED_BY(mu_);
 
-  virtual ~GPUProcessState();
+  std::vector<AllocatorParts> cuda_host_allocators_ GUARDED_BY(mu_);
+  std::vector<std::vector<SubAllocator::Visitor>> cuda_host_alloc_visitors_
+      GUARDED_BY(mu_);
+  std::vector<std::vector<SubAllocator::Visitor>> cuda_host_free_visitors_
+      GUARDED_BY(mu_);
 
-  // Optional RecordingAllocators that wrap the corresponding
-  // Allocators for runtime attribute use analysis.
-  std::vector<Allocator*> gpu_al_ GUARDED_BY(mu_);
-  std::vector<Allocator*> cuda_al_ GUARDED_BY(mu_);
+  virtual ~GPUProcessState();
 
   friend class GPUDeviceTest;
 };
diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
index 583bff2c07..6b2f6547b0 100644
--- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc
@@ -31,7 +31,8 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) {
       2 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
 
   EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
@@ -49,7 +50,8 @@ TEST(PoolAllocatorTest, ZeroSizePool) {
       0 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
 
   EXPECT_EQ(0, pool.get_from_pool_count());
@@ -82,7 +84,8 @@ TEST(PoolAllocatorTest, Alignment) {
       0 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
   for (int i = 0; i < 16; ++i) {
     size_t alignment = 1 << i;
@@ -97,8 +100,8 @@ TEST(PoolAllocatorTest, Alignment) {
 
 TEST(PoolAllocatorTest, AutoResize) {
   PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
-                     new BasicCPUAllocator(0 /*numa_node*/), new NoopRounder,
-                     "pool");
+                     new BasicCPUAllocator(0 /*numa_node*/, {}, {}),
+                     new NoopRounder, "pool");
 
   // Alloc/dealloc 10 sizes just a few times, confirming pool size
   // stays at 2.
@@ -123,14 +126,32 @@ TEST(PoolAllocatorTest, AutoResize) {
 }
 
 TEST(PoolAllocatorTest, CudaHostAllocator) {
+  int alloc_count = 0;
+  int64 alloc_size = 0;
+  SubAllocator::Visitor alloc_visitor =
+      [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) {
+        ++alloc_count;
+        alloc_size += size;
+      };
+  int free_count = 0;
+  int64 free_size = 0;
+  SubAllocator::Visitor free_visitor =
+      [&free_count, &free_size](void* ptr, int numa_node, int64 size) {
+        ++free_count;
+        free_size += size;
+      };
   se::Platform* platform =
       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
-  PoolAllocator pool(
-      2 /*pool_size_limit*/, false /*auto_resize*/,
-      new CUDAHostAllocator(
-          platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
-      new NoopRounder, "pool");
+  CUDAHostAllocator* sub_allocator = new CUDAHostAllocator(
+      platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
+          .ValueOrDie(),
+      0 /*numa_node*/, {alloc_visitor}, {free_visitor});
+  PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/,
+                     sub_allocator, new NoopRounder, "pool");
+  EXPECT_EQ(0, alloc_count);
+  EXPECT_EQ(0, alloc_size);
+  EXPECT_EQ(0, free_count);
+  EXPECT_EQ(0, free_size);
 
   // Repeatedly Get a 16-byte value, confirming that there's only
   // one real allocation.
@@ -138,6 +159,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   EXPECT_EQ(0, pool.get_from_pool_count());
   EXPECT_EQ(1, pool.allocated_count());
   EXPECT_NE(nullptr, p1_16);
+  EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
+  // Each suballocation includes a 16B ChunkPrefix.
+  static const int kChunkPrefixSize = 16;
+  EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
   pool.DeallocateRaw(p1_16);
   // Pool contents {16}
   EXPECT_EQ(1, pool.put_count());
@@ -148,6 +173,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   pool.DeallocateRaw(p2_16);  // Put it back.
   // Pool contents {16}
   EXPECT_EQ(2, pool.put_count());
+  EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
+  EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(0, free_count);
 
   // Get two more values of different sizes.
   void* p3_4 = pool.AllocateRaw(4, 4);
@@ -160,6 +188,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   void* p4_2 = pool.AllocateRaw(4, 2);  // Get a third size buffer.
   EXPECT_NE(nullptr, p4_2);
   EXPECT_EQ(0, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(0, free_count);
 
   // The pool is full: when we put back p4_2, the 16-byte buffer
   // should be evicted since it was least recently inserted.
@@ -167,6 +198,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   // Pool contents {2, 4}
   EXPECT_EQ(4, pool.put_count());
   EXPECT_EQ(1, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(1, free_count);
+  EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
 
   // Re-getting and putting size 2 or 4 should not alter pool size or
   // num-evicted.
@@ -180,12 +215,20 @@ TEST(PoolAllocatorTest, CudaHostAllocator) {
   EXPECT_EQ(6, pool.put_count());
   EXPECT_EQ(3, pool.allocated_count());
   EXPECT_EQ(1, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(1, free_count);
+  EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
 
   pool.Clear();
   EXPECT_EQ(0, pool.get_from_pool_count());
   EXPECT_EQ(0, pool.put_count());
   EXPECT_EQ(0, pool.allocated_count());
   EXPECT_EQ(0, pool.evicted_count());
+  EXPECT_EQ(3, alloc_count);
+  EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
+  EXPECT_EQ(3, free_count);
+  EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size);
 }
 
 TEST(PoolAllocatorTest, Pow2Rounder) {
@@ -206,7 +249,8 @@ TEST(PoolAllocatorTest, Name) {
       2 /*pool_size_limit*/, false /*auto_resize*/,
       new CUDAHostAllocator(
           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
-              .ValueOrDie()),
+              .ValueOrDie(),
+          0 /*numa_node*/, {}, {}),
       new NoopRounder, "pool");
   EXPECT_EQ("pool", pool.Name());
 }
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index df9c3a686c..538a70668a 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -23,12 +23,11 @@ limitations under the License.
 
 #include <cstdlib>
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
-#include "tensorflow/core/framework/allocator_registry.h"
+#include "tensorflow/core/common_runtime/pool_allocator.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
 #include "i_malloc.h"
@@ -40,20 +39,16 @@ typedef unsigned int uint;
 
 namespace tensorflow {
 
-class MklSubAllocator : public SubAllocator {
+class MklSubAllocator : public BasicCPUAllocator {
  public:
+  MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {}
   ~MklSubAllocator() override {}
-
-  void* Alloc(size_t alignment, size_t num_bytes) override {
-    return port::AlignedMalloc(num_bytes, alignment);
-  }
-  void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); }
 };
 
 // CPU allocator that handles small-size allocations by calling
 // suballocator directly. Mostly, it is just a wrapper around a suballocator
 // (that calls malloc and free directly) with support for bookkeeping.
-class MklSmallSizeAllocator : public VisitableAllocator {
+class MklSmallSizeAllocator : public Allocator {
  public:
   MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory,
                         const string& name)
@@ -75,10 +70,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
       CHECK(map_.insert(map_val).second);
       // Increment statistics for small-size allocations.
       IncrementStats(num_bytes);
-      // Call alloc visitors.
-      for (const auto& visitor : alloc_visitors_) {
-        visitor(ptr, num_bytes);
-      }
     }
     return ptr;
   }
@@ -94,9 +85,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
     if (map_iter != map_.end()) {
       // Call free visitors.
       size_t dealloc_bytes = map_iter->second;
-      for (const auto& visitor : free_visitors_) {
-        visitor(ptr, dealloc_bytes);
-      }
       sub_allocator_->Free(ptr, dealloc_bytes);
       DecrementStats(dealloc_bytes);
       map_.erase(map_iter);
@@ -121,16 +109,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
     stats_.Clear();
   }
 
-  void AddAllocVisitor(Visitor visitor) override {
-    mutex_lock l(mutex_);
-    alloc_visitors_.push_back(visitor);
-  }
-
-  void AddFreeVisitor(Visitor visitor) override {
-    mutex_lock l(mutex_);
-    free_visitors_.push_back(visitor);
-  }
-
  private:
   // Increment statistics for the allocator handling small allocations.
   inline void IncrementStats(size_t alloc_size)
@@ -163,15 +141,11 @@ class MklSmallSizeAllocator : public VisitableAllocator {
 
   // Allocator stats for small allocs
   AllocatorStats stats_ GUARDED_BY(mutex_);
-
-  // Visitors
-  std::vector<Visitor> alloc_visitors_ GUARDED_BY(mutex_);
-  std::vector<Visitor> free_visitors_ GUARDED_BY(mutex_);
 };
 
 /// CPU allocator for MKL that wraps BFC allocator and intercepts
 /// and redirects memory allocation calls from MKL.
-class MklCPUAllocator : public VisitableAllocator {
+class MklCPUAllocator : public Allocator {
  public:
   // Constructor and other standard functions
 
@@ -284,16 +258,6 @@ class MklCPUAllocator : public VisitableAllocator {
     large_size_allocator_->ClearStats();
   }
 
-  void AddAllocVisitor(Visitor visitor) override {
-    small_size_allocator_->AddAllocVisitor(visitor);
-    large_size_allocator_->AddAllocVisitor(visitor);
-  }
-
-  void AddFreeVisitor(Visitor visitor) override {
-    small_size_allocator_->AddFreeVisitor(visitor);
-    large_size_allocator_->AddFreeVisitor(visitor);
-  }
-
  private:
   // Hooks provided by this allocator for memory allocation routines from MKL
 
@@ -330,7 +294,7 @@ class MklCPUAllocator : public VisitableAllocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
-  VisitableAllocator* large_size_allocator_;     // owned by this class
+  Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
   SubAllocator* sub_allocator_;  // not owned by this class
diff --git a/tensorflow/core/common_runtime/pool_allocator.cc b/tensorflow/core/common_runtime/pool_allocator.cc
index fdad8de8d6..66dc8f3322 100644
--- a/tensorflow/core/common_runtime/pool_allocator.cc
+++ b/tensorflow/core/common_runtime/pool_allocator.cc
@@ -40,8 +40,7 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
       auto_resize_(auto_resize),
       pool_size_limit_(pool_size_limit),
       allocator_(allocator),
-      size_rounder_(size_rounder),
-      allocation_begun_(false) {
+      size_rounder_(size_rounder) {
   if (auto_resize) {
     CHECK_LT(size_t{0}, pool_size_limit)
         << "size limit must be > 0 if auto_resize is true.";
@@ -93,7 +92,6 @@ ChunkPrefix* FindPrefix(void* user_ptr) {
 }  // namespace
 
 void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
-  if (!allocation_begun_) allocation_begun_ = true;
   if (num_bytes == 0) return nullptr;
 
   // If alignment is larger than kPoolAlignment, increase num_bytes so that we
@@ -129,9 +127,6 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
     return PrepareChunk(r, alignment, num_bytes);
   } else {
     void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes);
-    for (const auto& v : alloc_visitors_) {
-      v(ptr, num_bytes);
-    }
     return PrepareChunk(ptr, alignment, num_bytes);
   }
 }
@@ -141,9 +136,6 @@ void PoolAllocator::DeallocateRaw(void* ptr) {
   ChunkPrefix* cp = FindPrefix(ptr);
   CHECK_LE((void*)cp, (void*)ptr);
   if (!has_size_limit_ && !auto_resize_) {
-    for (const auto& v : free_visitors_) {
-      v(cp, cp->num_bytes);
-    }
     allocator_->Free(cp, cp->num_bytes);
   } else {
     mutex_lock lock(mutex_);
@@ -164,9 +156,6 @@ void PoolAllocator::Clear() {
     mutex_lock lock(mutex_);
     for (auto iter : pool_) {
       PtrRecord* pr = iter.second;
-      for (const auto& v : free_visitors_) {
-        v(pr->ptr, pr->num_bytes);
-      }
       allocator_->Free(pr->ptr, pr->num_bytes);
       delete pr;
     }
@@ -221,9 +210,6 @@ void PoolAllocator::EvictOne() {
     DCHECK(iter != pool_.end());
   }
   pool_.erase(iter);
-  for (const auto& v : free_visitors_) {
-    v(prec->ptr, prec->num_bytes);
-  }
   allocator_->Free(prec->ptr, prec->num_bytes);
   delete prec;
   ++evicted_count_;
@@ -269,28 +255,19 @@ void PoolAllocator::EvictOne() {
   }
 }
 
-void PoolAllocator::AddAllocVisitor(Visitor visitor) {
-  mutex_lock lock(mutex_);
-  CHECK(!allocation_begun_)
-      << "AddAllocVisitor may not be called after pool allocation "
-      << "has begun.";
-  alloc_visitors_.push_back(visitor);
-}
-
-void PoolAllocator::AddFreeVisitor(Visitor visitor) {
-  mutex_lock lock(mutex_);
-  CHECK(!allocation_begun_)
-      << "AddFreeVisitor may not be called after pool allocation "
-      << "has begun.";
-  free_visitors_.push_back(visitor);
-}
-
 void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) {
-  return port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
+  void* ptr = nullptr;
+  if (num_bytes > 0) {
+    ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
+    VisitAlloc(ptr, numa_node_, num_bytes);
+  }
+  return ptr;
 }
 
 void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) {
-  port::AlignedFree(ptr);
+  if (num_bytes > 0) {
+    VisitFree(ptr, numa_node_, num_bytes);
+    port::AlignedFree(ptr);
+  }
 }
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h
index 607734445b..5b4623ba10 100644
--- a/tensorflow/core/common_runtime/pool_allocator.h
+++ b/tensorflow/core/common_runtime/pool_allocator.h
@@ -16,14 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
 
-// Simple LRU pool allocators for various flavors of CPU RAM that
-// implement the VisitableAllocator interface.
+// Simple LRU pool allocators for various flavors of CPU RAM.
 
 #include <atomic>
 #include <map>
 #include <memory>
 #include <vector>
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -41,7 +40,7 @@ class RoundUpInterface {
 
 // Size-limited pool of memory buffers obtained from a SubAllocator
 // instance.  Pool eviction policy is LRU.
-class PoolAllocator : public VisitableAllocator {
+class PoolAllocator : public Allocator {
  public:
   // "pool_size_limit" is the maximum number of returned, re-usable
   // memory buffers to keep in the pool.  If pool_size_limit == 0, the
@@ -64,14 +63,6 @@ class PoolAllocator : public VisitableAllocator {
 
   void DeallocateRaw(void* ptr) override;
 
-  // REQUIRES: The following functions may only be called prior
-  // to the first Allocate*() call.  Once allocation has begun, it is
-  // illegal to register another visitor.
-
-  void AddAllocVisitor(Visitor visitor) override;
-
-  void AddFreeVisitor(Visitor visitor) override;
-
   // Allocate an unused memory region of size "num_bytes".  Fetch from
   // the pool if available, otherwise call allocator_.
   void* Get(size_t num_bytes);
@@ -141,12 +132,6 @@ class PoolAllocator : public VisitableAllocator {
   int64 put_count_ GUARDED_BY(mutex_) = 0;
   int64 allocated_count_ GUARDED_BY(mutex_) = 0;
   int64 evicted_count_ GUARDED_BY(mutex_) = 0;
-  // Write access to these is guarded by mutex_, but not read
-  // access. They may only be modified prior to the first
-  // allocation.  Later attempts to modify will fail.
-  std::vector<Visitor> alloc_visitors_;
-  std::vector<Visitor> free_visitors_;
-  std::atomic<bool> allocation_begun_;
 };
 
 // Do-nothing rounder. Passes through sizes unchanged.
@@ -166,7 +151,9 @@ class Pow2Rounder : public RoundUpInterface {
 class BasicCPUAllocator : public SubAllocator {
  public:
   // Argument numa_node is currently ignored.
-  explicit BasicCPUAllocator(int numa_node) : numa_node_(numa_node) {}
+  BasicCPUAllocator(int numa_node, const std::vector<Visitor>& alloc_visitors,
+                    const std::vector<Visitor>& free_visitors)
+      : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {}
 
   ~BasicCPUAllocator() override {}
 
@@ -176,6 +163,8 @@ class BasicCPUAllocator : public SubAllocator {
 
  private:
   int numa_node_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc
index 447338e7bd..bcaa37fc8a 100644
--- a/tensorflow/core/common_runtime/process_state.cc
+++ b/tensorflow/core/common_runtime/process_state.cc
@@ -71,20 +71,28 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) {
   return MemDesc();
 }
 
-VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) {
+Allocator* ProcessState::GetCPUAllocator(int numa_node) {
   CHECK_GE(numa_node, 0);
   if (!numa_enabled_) numa_node = 0;
   mutex_lock lock(mu_);
   while (cpu_allocators_.size() <= static_cast<size_t>(numa_node)) {
+    // If visitors have been defined we need an Allocator built from
+    // a SubAllocator.  Prefer BFCAllocator, but fall back to PoolAllocator
+    // depending on env var setting.
+    const bool alloc_visitors_defined =
+        (!cpu_alloc_visitors_.empty() || !cpu_free_visitors_.empty());
     bool use_bfc_allocator = false;
-    // TODO(reedwm): Switch default to BGFAllocator if it's at least as fast and
-    // efficient.
-    Status status = ReadBoolFromEnvVar("TF_CPU_ALLOCATOR_USE_BFC", false,
-                                       &use_bfc_allocator);
+    Status status = ReadBoolFromEnvVar(
+        "TF_CPU_ALLOCATOR_USE_BFC", alloc_visitors_defined, &use_bfc_allocator);
     if (!status.ok()) {
       LOG(ERROR) << "GetCPUAllocator: " << status.error_message();
     }
-    VisitableAllocator* allocator;
+    Allocator* allocator = nullptr;
+    SubAllocator* sub_allocator =
+        (alloc_visitors_defined || use_bfc_allocator)
+            ? new BasicCPUAllocator(numa_enabled_ ? numa_node : -1,
+                                    cpu_alloc_visitors_, cpu_free_visitors_)
+            : nullptr;
     if (use_bfc_allocator) {
       // TODO(reedwm): evaluate whether 64GB by default is the best choice.
       int64 cpu_mem_limit_in_mb = -1;
@@ -95,34 +103,63 @@ VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) {
         LOG(ERROR) << "GetCPUAllocator: " << status.error_message();
       }
       int64 cpu_mem_limit = cpu_mem_limit_in_mb * (1LL << 20);
-      allocator = new BFCAllocator(
-          new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), cpu_mem_limit,
-          true /*allow_growth*/, "bfc_cpu_allocator_for_gpu" /*name*/);
+      DCHECK(sub_allocator);
+      allocator =
+          new BFCAllocator(sub_allocator, cpu_mem_limit, true /*allow_growth*/,
+                           "bfc_cpu_allocator_for_gpu" /*name*/);
       VLOG(2) << "Using BFCAllocator with memory limit of "
               << cpu_mem_limit_in_mb << " MB for ProcessState CPU allocator";
-    } else {
-      allocator = new PoolAllocator(
-          100 /*pool_size_limit*/, true /*auto_resize*/,
-          new BasicCPUAllocator(numa_enabled_ ? numa_node : -1),
-          new NoopRounder, "cpu_pool");
+    } else if (alloc_visitors_defined) {
+      DCHECK(sub_allocator);
+      allocator =
+          new PoolAllocator(100 /*pool_size_limit*/, true /*auto_resize*/,
+                            sub_allocator, new NoopRounder, "cpu_pool");
       VLOG(2) << "Using PoolAllocator for ProcessState CPU allocator "
               << "numa_enabled_=" << numa_enabled_
               << " numa_node=" << numa_node;
+    } else {
+      DCHECK(!sub_allocator);
+      allocator = cpu_allocator();
     }
-    if (LogMemory::IsEnabled()) {
+    if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) {
       // Wrap the allocator to track allocation ids for better logging
       // at the cost of performance.
-      allocator = new TrackingVisitableAllocator(allocator, true);
+      allocator = new TrackingAllocator(allocator, true);
     }
     cpu_allocators_.push_back(allocator);
+    if (!sub_allocator) {
+      DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty());
+    }
   }
   return cpu_allocators_[numa_node];
 }
 
+void ProcessState::AddCPUAllocVisitor(SubAllocator::Visitor visitor) {
+  VLOG(1) << "AddCPUAllocVisitor";
+  mutex_lock lock(mu_);
+  CHECK_EQ(0, cpu_allocators_.size())  // Crash OK
+      << "AddCPUAllocVisitor must be called prior to first call to "
+         "ProcessState::GetCPUAllocator";
+  cpu_alloc_visitors_.push_back(std::move(visitor));
+}
+
+void ProcessState::AddCPUFreeVisitor(SubAllocator::Visitor visitor) {
+  mutex_lock lock(mu_);
+  CHECK_EQ(0, cpu_allocators_.size())  // Crash OK
+      << "AddCPUFreeVisitor must be called prior to first call to "
+         "ProcessState::GetCPUAllocator";
+  cpu_free_visitors_.push_back(std::move(visitor));
+}
+
 void ProcessState::TestOnlyReset() {
   mutex_lock lock(mu_);
+  // Don't delete this value because it's static.
+  Allocator* default_cpu_allocator = cpu_allocator();
   mem_desc_map_.clear();
-  gtl::STLDeleteElements(&cpu_allocators_);
+  for (Allocator* a : cpu_allocators_) {
+    if (a != default_cpu_allocator) delete a;
+  }
+  cpu_allocators_.clear();
   gtl::STLDeleteElements(&cpu_al_);
 }
 
diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h
index 2892677333..cac312d849 100644
--- a/tensorflow/core/common_runtime/process_state.h
+++ b/tensorflow/core/common_runtime/process_state.h
@@ -30,7 +30,6 @@ limitations under the License.
 namespace tensorflow {
 
 class Allocator;
-class VisitableAllocator;
 class PoolAllocator;
 
 // Singleton that manages per-process state, e.g. allocation of
@@ -65,7 +64,15 @@ class ProcessState {
 
   // Returns the one CPUAllocator used for the given numa_node.
   // TEMPORARY: ignores numa_node.
-  VisitableAllocator* GetCPUAllocator(int numa_node);
+  Allocator* GetCPUAllocator(int numa_node);
+
+  // Registers alloc visitor for the CPU allocator(s).
+  // REQUIRES: must be called before GetCPUAllocator.
+  void AddCPUAllocVisitor(SubAllocator::Visitor v);
+
+  // Registers free visitor for the CPU allocator(s).
+  // REQUIRES: must be called before GetCPUAllocator.
+  void AddCPUFreeVisitor(SubAllocator::Visitor v);
 
   typedef std::unordered_map<const void*, MemDesc> MDMap;
 
@@ -87,7 +94,9 @@ class ProcessState {
 
   mutex mu_;
 
-  std::vector<VisitableAllocator*> cpu_allocators_ GUARDED_BY(mu_);
+  std::vector<Allocator*> cpu_allocators_ GUARDED_BY(mu_);
+  std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ GUARDED_BY(mu_);
+  std::vector<SubAllocator::Visitor> cpu_free_visitors_ GUARDED_BY(mu_);
 
   virtual ~ProcessState();
 
diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h
index 103eee03b3..9d59264899 100644
--- a/tensorflow/core/common_runtime/renamed_device.h
+++ b/tensorflow/core/common_runtime/renamed_device.h
@@ -72,9 +72,10 @@ class RenamedDevice : public Device {
     return underlying_->MakeGpuDevice();
   }
 
-  void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
-                             DeviceContext* dc, Allocator* allocator) override {
-    underlying_->ReinitializeGpuDevice(context, device, dc, allocator);
+  Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
+                               DeviceContext* dc,
+                               Allocator* allocator) override {
+    return underlying_->ReinitializeGpuDevice(context, device, dc, allocator);
   }
 
   Status MakeTensorFromProto(const TensorProto& tensor_proto,
diff --git a/tensorflow/core/common_runtime/visitable_allocator.h b/tensorflow/core/common_runtime/visitable_allocator.h
deleted file mode 100644
index ae0563a96a..0000000000
--- a/tensorflow/core/common_runtime/visitable_allocator.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
-#define TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
-
-#include <functional>
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/tracking_allocator.h"
-
-namespace tensorflow {
-
-// Subclass VisitableAllocator instead of Allocator when a memory
-// allocator needs to enable some kind of registration/deregistration
-// of memory areas.
-class VisitableAllocator : public Allocator {
- public:
-  // Visitor gets called with a pointer to a memory area and its
-  // size in bytes.
-  typedef std::function<void(void*, size_t)> Visitor;
-
-  // Register a visitor guaranteed to be called exactly once on each
-  // chunk of memory newly allocated from the underlying device.
-  // Typically, chunks will be reused and possibly sub-divided by a
-  // pool manager, so the calls will happen only once per process
-  // execution, not once per tensor (re)allocation.
-  virtual void AddAllocVisitor(Visitor visitor) = 0;
-
-  // Register a visitor guaranteed to be called on each chunk of
-  // memory returned to the underlying device.
-  virtual void AddFreeVisitor(Visitor visitor) = 0;
-};
-
-// Needed for cases when a VisitableAllocator gets wrapped for tracking.
-// Multiple-inheritance is considered acceptable in this case because
-// VisitableAllocator is a pure virtual interface and only TrackingAllocator
-// has default implementation.
-class TrackingVisitableAllocator : public TrackingAllocator,
-                                   public VisitableAllocator {
- public:
-  TrackingVisitableAllocator(VisitableAllocator* allocator, bool track_ids)
-      : TrackingAllocator(allocator, track_ids), allocator_(allocator) {}
-  ~TrackingVisitableAllocator() override {}
-
-  string Name() override { return TrackingAllocator::Name(); }
-
-  void* AllocateRaw(size_t alignment, size_t num_bytes) override {
-    return TrackingAllocator::AllocateRaw(alignment, num_bytes);
-  }
-
-  void DeallocateRaw(void* ptr) override {
-    TrackingAllocator::DeallocateRaw(ptr);
-  }
-
-  void AddAllocVisitor(Visitor visitor) override {
-    allocator_->AddAllocVisitor(visitor);
-  }
-
-  void AddFreeVisitor(Visitor visitor) override {
-    allocator_->AddFreeVisitor(visitor);
-  }
-
- protected:
-  VisitableAllocator* allocator_;
-};
-}  // namespace tensorflow
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_
diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc
index 2a7ee16a16..84cee5569c 100644
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc
@@ -196,7 +196,7 @@ class CPUAllocatorFactory : public AllocatorFactory {
   class CPUSubAllocator : public SubAllocator {
    public:
     explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
-        : cpu_allocator_(cpu_allocator) {}
+        : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
 
     void* Alloc(size_t alignment, size_t num_bytes) override {
       return cpu_allocator_->AllocateRaw(alignment, num_bytes);
@@ -222,4 +222,22 @@ Allocator* cpu_allocator() {
   }
   return cpu_alloc;
 }
+
+SubAllocator::SubAllocator(const std::vector<Visitor>& alloc_visitors,
+                           const std::vector<Visitor>& free_visitors)
+    : alloc_visitors_(alloc_visitors), free_visitors_(free_visitors) {}
+
+void SubAllocator::VisitAlloc(void* ptr, int index, size_t num_bytes) {
+  for (const auto& v : alloc_visitors_) {
+    v(ptr, index, num_bytes);
+  }
+}
+
+void SubAllocator::VisitFree(void* ptr, int index, size_t num_bytes) {
+  // Although we don't guarantee any order of visitor application, strive
+  // to apply free visitors in reverse order of alloc visitors.
+  for (int i = free_visitors_.size() - 1; i >= 0; --i) {
+    free_visitors_[i](ptr, index, num_bytes);
+  }
+}
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index ded120b704..8c23604625 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -387,13 +388,36 @@ void EnableCPUAllocatorStats(bool enable);
 // full statistics. By default, it's disabled.
 void EnableCPUAllocatorFullStats(bool enable);
 
-// Abstract interface of an object that does the underlying suballoc/free of
-// memory for a higher-level allocator.
+// An object that does the underlying suballoc/free of memory for a higher-level
+// allocator.  The expectation is that the higher-level allocator is doing some
+// kind of cache or pool management so that it will call SubAllocator::Alloc and
+// Free relatively infrequently, compared to the number of times its own
+// AllocateRaw and Free methods are called.
 class SubAllocator {
  public:
+  // Visitor gets called with a pointer to a memory area and its
+  // size in bytes.  The index value will be numa_node for a CPU
+  // allocator and GPU id for a GPU allocator.
+  typedef std::function<void(void*, int index, size_t)> Visitor;
+
+  SubAllocator(const std::vector<Visitor>& alloc_visitors,
+               const std::vector<Visitor>& free_visitors);
+
   virtual ~SubAllocator() {}
   virtual void* Alloc(size_t alignment, size_t num_bytes) = 0;
   virtual void Free(void* ptr, size_t num_bytes) = 0;
+
+ protected:
+  // Implementation of Alloc() method must call this on newly allocated
+  // value.
+  void VisitAlloc(void* ptr, int index, size_t num_bytes);
+
+  // Implementation of Free() method must call this on value to be
+  // freed immediately before deallocation.
+  void VisitFree(void* ptr, int index, size_t num_bytes);
+
+  const std::vector<Visitor> alloc_visitors_;
+  const std::vector<Visitor> free_visitors_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 794250a2c1..53ac639b4c 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -214,10 +214,12 @@ class DeviceBase {
 
   // This is overridden by GPU devices to reinitialize the derived
   // type returned by MakeGpuDevice.
-  virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/,
-                                     PerOpGpuDevice* /*device*/,
-                                     DeviceContext* /*dc*/,
-                                     Allocator* /*allocator*/) {}
+  virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/,
+                                       PerOpGpuDevice* /*device*/,
+                                       DeviceContext* /*dc*/,
+                                       Allocator* /*allocator*/) {
+    return Status::OK();
+  }
 
   // Unimplemented by default
   virtual const DeviceAttributes& attributes() const;
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 80f2b12987..3e34bf0418 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -265,9 +265,12 @@ OpKernelContext::OpKernelContext(Params* params, int num_outputs)
   params_->ensure_eigen_gpu_device();
   if (params_->eigen_gpu_device != nullptr) {
     Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes());
-    params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device,
-                                           params_->op_device_context,
-                                           eigen_gpu_allocator);
+    Status s = params_->device->ReinitializeGpuDevice(
+        this, params_->eigen_gpu_device, params_->op_device_context,
+        eigen_gpu_allocator);
+    if (!s.ok()) {
+      SetStatus(s);
+    }
   }
   if (params_->record_tensor_accesses) {
     referenced_tensors_.Init();
-- 
GitLab


From 964a32573bffbb798d0eb97ec9b37da0657c4dbd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Sep 2018 14:41:37 -0700
Subject: [PATCH 0338/1357] Clean up remove_negation pass in Grappler.

PiperOrigin-RevId: 213520177
---
 .../optimizers/arithmetic_optimizer.cc        | 42 +++++++------------
 .../optimizers/arithmetic_optimizer_test.cc   | 42 +++++++++++--------
 2 files changed, 39 insertions(+), 45 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 11ce121cba..992e85d2c6 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -1325,38 +1325,26 @@ class RemoveNegationStage : public ArithmeticOptimizerStage {
   }
 
   Status TrySimplify(NodeDef* node, string* simplified_node_name) override {
-    const string node_name = node->name();
     NodeDef* x;
     NodeDef* y;
     TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &x));
     TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &y));
     bool updated = false;
-    if (IsAdd(*node)) {
-      if (IsNeg(*x)) {
-        // (-a) + b = b - a
-        node->set_op("Sub");
-        node->mutable_input()->SwapElements(0, 1);
-        node->set_input(1, x->input(0));
-        node->add_input(AsControlDependency(x->name()));
-        ctx().node_map->AddOutput(NodeName(x->input(0)), node_name);
-        updated = true;
-      } else if (IsNeg(*y)) {
-        // a + (-b) = a - b
-        node->set_op("Sub");
-        node->set_input(1, y->input(0));
-        node->add_input(AsControlDependency(y->name()));
-        ctx().node_map->AddOutput(NodeName(y->input(0)), node_name);
-        updated = true;
-      }
-    } else if (IsSub(*node)) {
-      if (IsNeg(*y)) {
-        // a - (-b) = a + b
-        node->set_op("Add");
-        node->set_input(1, y->input(0));
-        node->add_input(AsControlDependency(y->name()));
-        ctx().node_map->AddOutput(NodeName(y->input(0)), node_name);
-        updated = true;
-      }
+    if (IsNeg(*y)) {
+      // a - (-b) = a + b or  a + (-b) = a - b
+      ForwardControlDependencies(node, {y});
+      ctx().node_map->UpdateInput(node->name(), node->input(1), y->input(0));
+      node->set_op(IsAdd(*node) ? "Sub" : "Add");
+      node->set_input(1, y->input(0));
+      updated = true;
+    } else if (IsAdd(*node) && IsNeg(*x)) {
+      // (-a) + b = b - a
+      ForwardControlDependencies(node, {x});
+      ctx().node_map->UpdateInput(node->name(), node->input(0), x->input(0));
+      node->set_op("Sub");
+      node->mutable_input()->SwapElements(0, 1);
+      node->set_input(1, x->input(0));
+      updated = true;
     }
     if (updated) {
       AddToOptimizationQueue(node);
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index bc838c6659..88839d944c 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -2353,9 +2353,14 @@ TEST_F(ArithmeticOptimizerTest, RemoveNegation) {
   Output sub_negx_y = ops::Sub(s.WithOpName("Sub_negx_y"), neg_x, y);
   Output sub_x_negy = ops::Sub(s.WithOpName("Sub_x_negy"), x, neg_y);
   Output sub_negx_negy = ops::Sub(s.WithOpName("Sub_negx_negy"), neg_x, neg_y);
-  auto add_all = ops::AddN(s.WithOpName("add_all"),
-                           {add_x_y, add_negx_y, add_x_negy, add_negx_negy,
-                            sub_x_y, sub_negx_y, sub_x_negy, sub_negx_negy});
+  Output neg_x_with_dep = ops::Neg(
+      s.WithOpName("Neg_x_with_dep").WithControlDependencies({add_x_y}), x);
+  Output add_negx_with_dep_y =
+      ops::Add(s.WithOpName("Add_negx_with_dep_y"), neg_x_with_dep, y);
+  auto add_all =
+      ops::AddN(s.WithOpName("add_all"),
+                {add_x_y, add_negx_y, add_x_negy, add_negx_negy, sub_x_y,
+                 sub_negx_y, sub_x_negy, sub_negx_negy, add_negx_with_dep_y});
 
   GrapplerItem item;
   item.fetch = {"add_all"};
@@ -2370,7 +2375,7 @@ TEST_F(ArithmeticOptimizerTest, RemoveNegation) {
   GraphDef output;
   ArithmeticOptimizer optimizer;
   EnableOnlyRemoveNegation(&optimizer);
-  OptimizeAndPrune(&optimizer, &item, &output);
+  OptimizeTwice(&optimizer, &item, &output);
 
   EXPECT_EQ(item.graph.node_size(), output.node_size());
   int found = 0;
@@ -2379,42 +2384,43 @@ TEST_F(ArithmeticOptimizerTest, RemoveNegation) {
     if (node.name() == "Add_negx_y") {
       ++found;
       EXPECT_EQ("Sub", node.op());
-      EXPECT_EQ(3, node.input_size());
+      EXPECT_EQ(2, node.input_size());
       EXPECT_EQ("y", node.input(0));
       EXPECT_EQ("x", node.input(1));
-      EXPECT_EQ("^Neg_x", node.input(2));
     } else if (node.name() == "Add_x_negy") {
       ++found;
       EXPECT_EQ("Sub", node.op());
-      EXPECT_EQ(3, node.input_size());
+      EXPECT_EQ(2, node.input_size());
       EXPECT_EQ("x", node.input(0));
       EXPECT_EQ("y", node.input(1));
-      EXPECT_EQ("^Neg_y", node.input(2));
     } else if (node.name() == "Add_negx_negy") {
       ++found;
       EXPECT_EQ("Sub", node.op());
-      EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("Neg_y", node.input(0));
-      EXPECT_EQ("x", node.input(1));
-      EXPECT_EQ("^Neg_x", node.input(2));
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("Neg_x", node.input(0));
+      EXPECT_EQ("y", node.input(1));
     } else if (node.name() == "Sub_x_negy") {
       ++found;
       EXPECT_EQ("Add", node.op());
-      EXPECT_EQ(3, node.input_size());
+      EXPECT_EQ(2, node.input_size());
       EXPECT_EQ("x", node.input(0));
       EXPECT_EQ("y", node.input(1));
-      EXPECT_EQ("^Neg_y", node.input(2));
     } else if (node.name() == "Sub_negx_negy") {
       ++found;
       EXPECT_EQ("Sub", node.op());
-      EXPECT_EQ(4, node.input_size());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("y", node.input(0));
+      EXPECT_EQ("x", node.input(1));
+    } else if (node.name() == "Add_negx_with_dep_y") {
+      ++found;
+      EXPECT_EQ("Sub", node.op());
+      EXPECT_EQ(3, node.input_size());
       EXPECT_EQ("y", node.input(0));
       EXPECT_EQ("x", node.input(1));
-      EXPECT_EQ("^Neg_y", node.input(2));
-      EXPECT_EQ("^Neg_x", node.input(3));
+      EXPECT_EQ("^Add_x_y", node.input(2));
     }
   }
-  EXPECT_EQ(5, found);
+  EXPECT_EQ(6, found);
 
   auto tensors = EvaluateNodes(output, item.fetch, feed);
   EXPECT_EQ(1, tensors.size());
-- 
GitLab


From c2d392e36a3f68c2e1b8fdfa280953efc6426c52 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 18 Sep 2018 15:14:24 -0700
Subject: [PATCH 0339/1357] Add error reporting TFLIte C API

PiperOrigin-RevId: 213526489
---
 .../contrib/lite/experimental/c/c_api.cc      | 46 ++++++++++++++++++-
 .../contrib/lite/experimental/c/c_api.h       | 12 +++++
 .../lite/experimental/c/c_api_internal.h      | 14 ++++++
 .../contrib/lite/experimental/c/c_api_test.cc | 31 +++++++++++++
 4 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/c/c_api.cc b/tensorflow/contrib/lite/experimental/c/c_api.cc
index 1c3996fb87..9c29f9d8b9 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api.cc
+++ b/tensorflow/contrib/lite/experimental/c/c_api.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <memory>
 
 #include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/error_reporter.h"
 #include "tensorflow/contrib/lite/experimental/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
@@ -26,6 +27,26 @@ limitations under the License.
 extern "C" {
 #endif  // __cplusplus
 
+namespace {
+class CallbackErrorReporter : public tflite::ErrorReporter {
+ public:
+  using ErrorCallback = void (*)(void* user_data, const char* format,
+                                 va_list args);
+
+  CallbackErrorReporter(ErrorCallback callback, void* user_data)
+      : callback_(callback), user_data_(user_data) {}
+
+  int Report(const char* format, va_list args) override {
+    callback_(user_data_, format, args);
+    return 0;
+  }
+
+ private:
+  ErrorCallback callback_;
+  void* user_data_;
+};
+}  // namespace
+
 // LINT.IfChange
 
 TFL_Model* TFL_NewModel(const void* model_data, size_t model_size) {
@@ -56,18 +77,38 @@ void TFL_InterpreterOptionsSetNumThreads(TFL_InterpreterOptions* options,
   options->num_threads = num_threads;
 }
 
+TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetErrorReporter(
+    TFL_InterpreterOptions* options,
+    void (*reporter)(void* user_data, const char* format, va_list args),
+    void* user_data) {
+  options->error_reporter = reporter;
+  options->error_reporter_user_data = user_data;
+}
+
 TFL_Interpreter* TFL_NewInterpreter(
     const TFL_Model* model, const TFL_InterpreterOptions* optional_options) {
   if (!model || !model->impl) {
     return nullptr;
   }
 
+  std::unique_ptr<tflite::ErrorReporter> optional_error_reporter;
+  if (optional_options && optional_options->error_reporter != nullptr) {
+    optional_error_reporter.reset(
+        new CallbackErrorReporter(optional_options->error_reporter,
+                                  optional_options->error_reporter_user_data));
+  }
+
   // TODO(b/111881878): Allow use of C API without pulling in all builtin ops.
   tflite::ops::builtin::BuiltinOpResolver resolver;
   if (optional_options) {
     resolver.AddAll(optional_options->op_resolver);
   }
-  tflite::InterpreterBuilder builder(*model->impl, resolver);
+  tflite::ErrorReporter* error_reporter = optional_error_reporter
+                                              ? optional_error_reporter.get()
+                                              : tflite::DefaultErrorReporter();
+  tflite::InterpreterBuilder builder(model->impl->GetModel(), resolver,
+                                     error_reporter);
+
   std::unique_ptr<tflite::Interpreter> interpreter;
   if (builder(&interpreter) != kTfLiteOk) {
     return nullptr;
@@ -80,7 +121,8 @@ TFL_Interpreter* TFL_NewInterpreter(
     }
   }
 
-  return new TFL_Interpreter{model->impl, std::move(interpreter)};
+  return new TFL_Interpreter{model->impl, std::move(optional_error_reporter),
+                             std::move(interpreter)};
 }
 
 void TFL_DeleteInterpreter(TFL_Interpreter* interpreter) { delete interpreter; }
diff --git a/tensorflow/contrib/lite/experimental/c/c_api.h b/tensorflow/contrib/lite/experimental/c/c_api.h
index 44b936aa87..f52ab8f9ed 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api.h
+++ b/tensorflow/contrib/lite/experimental/c/c_api.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_H_
 #define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_H_
 
+#include <stdarg.h>
 #include <stdint.h>
 
 // Eventually the various C APIs defined in context.h will be migrated into
@@ -86,6 +87,17 @@ TFL_CAPI_EXPORT extern void TFL_DeleteInterpreterOptions(
 TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetNumThreads(
     TFL_InterpreterOptions* options, int32_t num_threads);
 
+// Sets a custom error reporter for interpreter execution.
+//
+// * `reporter` takes the provided `user_data` object, as well as a C-style
+//   format string and arg list (see also vprintf).
+// * `user_data` is optional. If provided, it is owned by the client and must
+//   remain valid for the duration of the interpreter lifetime.
+TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetErrorReporter(
+    TFL_InterpreterOptions* options,
+    void (*reporter)(void* user_data, const char* format, va_list args),
+    void* user_data);
+
 // --------------------------------------------------------------------------
 // TFL_Interpreter provides inference from a provided model.
 typedef struct TFL_Interpreter TFL_Interpreter;
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_internal.h b/tensorflow/contrib/lite/experimental/c/c_api_internal.h
index af675ac98a..da3af3cad4 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_internal.h
+++ b/tensorflow/contrib/lite/experimental/c/c_api_internal.h
@@ -23,6 +23,9 @@ limitations under the License.
 
 // Internal structures used by the C API. These are likely to change and should
 // not be depended on.
+//
+// NOTE: This header does not follow C conventions and does not define a C API.
+// It is effectively an (internal) implementation detail of the C API.
 
 struct TFL_Model {
   // Sharing is safe as FlatBufferModel is const.
@@ -34,13 +37,24 @@ struct TFL_InterpreterOptions {
     kDefaultNumThreads = -1,
   };
   int num_threads = kDefaultNumThreads;
+
   tflite::MutableOpResolver op_resolver;
+
+  void (*error_reporter)(void* user_data, const char* format,
+                         va_list args) = nullptr;
+  void* error_reporter_user_data = nullptr;
 };
 
 struct TFL_Interpreter {
   // Taking a reference to the (const) model data avoids lifetime-related issues
   // and complexity with the TFL_Model's existence.
   std::shared_ptr<const tflite::FlatBufferModel> model;
+
+  // The interpreter does not take ownership of the provided ErrorReporter
+  // instance, so we ensure its validity here. Note that the interpreter may use
+  // the reporter in its destructor, so it should be declared first.
+  std::unique_ptr<tflite::ErrorReporter> optional_error_reporter;
+
   std::unique_ptr<tflite::Interpreter> impl;
 };
 
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_test.cc b/tensorflow/contrib/lite/experimental/c/c_api_test.cc
index 649dac8d1a..48a3714ec3 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_test.cc
+++ b/tensorflow/contrib/lite/experimental/c/c_api_test.cc
@@ -85,6 +85,37 @@ TEST(CApiSimple, Smoke) {
   TFL_DeleteInterpreter(interpreter);
 }
 
+TEST(CApiSimple, ErrorReporter) {
+  TFL_Model* model = TFL_NewModelFromFile(
+      "tensorflow/contrib/lite/testdata/add.bin");
+  TFL_InterpreterOptions* options = TFL_NewInterpreterOptions();
+
+  // Install a custom error reporter into the interpreter by way of options.
+  tflite::TestErrorReporter reporter;
+  TFL_InterpreterOptionsSetErrorReporter(
+      options,
+      [](void* user_data, const char* format, va_list args) {
+        reinterpret_cast<tflite::TestErrorReporter*>(user_data)->Report(format,
+                                                                        args);
+      },
+      &reporter);
+  TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options);
+
+  // The options/model can be deleted immediately after interpreter creation.
+  TFL_DeleteInterpreterOptions(options);
+  TFL_DeleteModel(model);
+
+  // Invoke the interpreter before tensor allocation.
+  EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteError);
+
+  // The error should propagate to the custom error reporter.
+  EXPECT_EQ(reporter.error_messages(),
+            "Invoke called on model that is not ready.");
+  EXPECT_EQ(reporter.num_calls(), 1);
+
+  TFL_DeleteInterpreter(interpreter);
+}
+
 }  // namespace
 
 int main(int argc, char** argv) {
-- 
GitLab


From 228572ecf387931b14e92555a2234dc085813e21 Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Tue, 18 Sep 2018 15:24:59 -0700
Subject: [PATCH 0340/1357] [TF:XLA] Document that the order of control
 predecessors matters.

PiperOrigin-RevId: 213528296
---
 tensorflow/compiler/xla/service/hlo_instruction.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 4f6cac1396..1ef8cd5036 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1616,6 +1616,10 @@ class HloInstruction {
   InstructionVector operands_;
 
   // The set of control predecessors of this instruction.
+  // Note that the order of the instructions in the vector influences the order
+  // computed in HloComputation::ComputeInstructionPostOrder, which may
+  // influence the result of the compilation by changing the scheduling. We are
+  // not sure if it matters.
   std::vector<HloInstruction*> control_predecessors_;
 
   // The users of this instruction. Users are HLOs where this instruction is an
-- 
GitLab


From 0bd8f45ed9ee929225e8be93e7b998085fd2ba74 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 18 Sep 2018 15:27:20 -0700
Subject: [PATCH 0341/1357] Automated rollback of commit
 b1ff7c2cedcc7d49d430d56655870e6d68a0c8f7

PiperOrigin-RevId: 213528716
---
 .../core/kernels/data/captured_function.cc    | 206 ++++++++++--------
 .../core/kernels/data/captured_function.h     | 129 +++++------
 tensorflow/core/kernels/data/dataset_utils.cc |   9 +-
 tensorflow/core/kernels/data/dataset_utils.h  |   5 +-
 .../core/kernels/data/filter_dataset_op.cc    |  33 ++-
 .../core/kernels/data/flat_map_dataset_op.cc  |   7 +-
 .../core/kernels/data/generator_dataset_op.cc |  23 +-
 .../data/group_by_reducer_dataset_op.cc       |  31 ++-
 .../data/group_by_window_dataset_op.cc        |  25 +--
 .../kernels/data/interleave_dataset_op.cc     |   8 +-
 .../kernels/data/map_and_batch_dataset_op.cc  |   6 +-
 .../core/kernels/data/map_dataset_op.cc       |   6 +-
 .../data/parallel_interleave_dataset_op.cc    |  16 +-
 .../kernels/data/parallel_map_dataset_op.cc   |  57 ++---
 .../kernels/data/parallel_map_iterator.cc     |  37 +++-
 .../core/kernels/data/parallel_map_iterator.h |  44 ++--
 .../kernels/data/parse_example_dataset_op.cc  | 185 +++++++---------
 .../core/kernels/data/scan_dataset_op.cc      |   8 +-
 18 files changed, 395 insertions(+), 440 deletions(-)

diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index 96ae8e16d5..b3ab7e2bc6 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -46,36 +46,10 @@ Status CapturedFunction::Create(
   return Status::OK();
 }
 
-Status CapturedFunction::Instantiate(
-    IteratorContext* ctx, std::unique_ptr<InstantiatedCapturedFunction>*
-                              instantiated_captured_function) {
-  // The context's runtime will be used for all subsequent calls.
-  FunctionLibraryRuntime* lib = ctx->lib();
-  FunctionLibraryRuntime::InstantiateOptions inst_opts;
-  inst_opts.overlay_lib = ctx->function_library().get();
-  inst_opts.state_handle = std::to_string(random::New64());
-  inst_opts.create_kernels_eagerly = true;
-  if (!use_inter_op_parallelism_) {
-    inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR";
-  }
-
-  FunctionLibraryRuntime::Handle f_handle;
-  Status s = (lib->Instantiate(func_.name(), AttrSlice(&func_.attr()),
-                               inst_opts, &f_handle));
-  TF_RETURN_IF_ERROR(s);
-  const FunctionBody* fbody = lib->GetFunctionBody(f_handle);
-  if (fbody == nullptr) {
-    return errors::Internal("Failed to instantiate function body.");
-  }
-
-  DataTypeVector ret_types;
-  for (const auto& ret_type : fbody->ret_types) {
-    ret_types.push_back(ret_type);
+CapturedFunction::~CapturedFunction() {
+  if (lib_ != nullptr && f_handle_ != kInvalidHandle) {
+    lib_->ReleaseHandle(f_handle_).IgnoreError();
   }
-
-  instantiated_captured_function->reset(new InstantiatedCapturedFunction(
-      lib, f_handle, std::move(ret_types), *ctx->runner(), this));
-  return Status::OK();
 }
 
 namespace {
@@ -198,34 +172,35 @@ class BorrowedArgsCallFrame : public CallFrameBase {
 
 }  // namespace
 
-InstantiatedCapturedFunction::InstantiatedCapturedFunction(
-    FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
-    DataTypeVector ret_types, std::function<void(std::function<void()>)> runner,
-    CapturedFunction* captured_func)
-    : lib_(lib),
-      f_handle_(f_handle),
-      ret_types_(std::move(ret_types)),
-      captured_runner_(std::move(runner)),
-      captured_func_(captured_func) {}
-
-InstantiatedCapturedFunction::~InstantiatedCapturedFunction() {
-  if (lib_ != nullptr && f_handle_ != kInvalidHandle) {
-    lib_->ReleaseHandle(f_handle_).IgnoreError();
+Status CapturedFunction::GetHandle(IteratorContext* ctx,
+                                   FunctionLibraryRuntime::Handle* out_handle) {
+  tf_shared_lock l(mu_);
+  if (lib_ == nullptr) {
+    return errors::Internal("Captured function \"", func_.name(),
+                            "\" was called before it was instantiated.");
   }
+  if (ctx->lib() != lib_) {
+    return errors::Internal("Captured function \"", func_.name(),
+                            "\" was called with a different "
+                            "FunctionLibraryRuntime*, which is not permitted.");
+  }
+  *out_handle = f_handle_;
+  return Status::OK();
 }
 
-Status InstantiatedCapturedFunction::Run(IteratorContext* ctx,
-                                         std::vector<Tensor>&& args,
-                                         std::vector<Tensor>* rets) const {
+Status CapturedFunction::Run(IteratorContext* ctx, std::vector<Tensor>&& args,
+                             std::vector<Tensor>* rets) {
+  FunctionLibraryRuntime::Handle handle;
+  TF_RETURN_IF_ERROR(GetHandle(ctx, &handle));
+
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
-  ScopedStepContainer step_container(
-      f_opts.step_id, [this](const string& name) {
-        lib_->device()->resource_manager()->Cleanup(name).IgnoreError();
-      });
+  f_opts.step_id = CapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) {
+    ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError();
+  });
   f_opts.step_container = &step_container;
   f_opts.runner = ctx->runner();
-  if (lib_->device()->device_type() != DEVICE_CPU) {
+  if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -237,11 +212,10 @@ Status InstantiatedCapturedFunction::Run(IteratorContext* ctx,
   CancellationManager c_mgr;
   f_opts.cancellation_manager = &c_mgr;
 
-  OwnedArgsCallFrame frame(std::move(args), &captured_func_->captured_inputs(),
-                           ret_types_);
+  OwnedArgsCallFrame frame(std::move(args), &captured_inputs_, ret_types_);
   Notification n;
   Status s;
-  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
+  ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -250,18 +224,20 @@ Status InstantiatedCapturedFunction::Run(IteratorContext* ctx,
   return frame.ConsumeRetvals(rets);
 }
 
-Status InstantiatedCapturedFunction::RunWithBorrowedArgs(
-    IteratorContext* ctx, const std::vector<Tensor>& args,
-    std::vector<Tensor>* rets) const {
+Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx,
+                                             const std::vector<Tensor>& args,
+                                             std::vector<Tensor>* rets) {
+  FunctionLibraryRuntime::Handle handle;
+  TF_RETURN_IF_ERROR(GetHandle(ctx, &handle));
+
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
-  ScopedStepContainer step_container(
-      f_opts.step_id, [this](const string& name) {
-        lib_->device()->resource_manager()->Cleanup(name).IgnoreError();
-      });
+  f_opts.step_id = CapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) {
+    ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError();
+  });
   f_opts.step_container = &step_container;
   f_opts.runner = ctx->runner();
-  if (lib_->device()->device_type() != DEVICE_CPU) {
+  if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -273,12 +249,11 @@ Status InstantiatedCapturedFunction::RunWithBorrowedArgs(
   CancellationManager c_mgr;
   f_opts.cancellation_manager = &c_mgr;
 
-  BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(),
-                              ret_types_);
+  BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_);
   Notification n;
   Status s;
 
-  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
+  ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -287,17 +262,65 @@ Status InstantiatedCapturedFunction::RunWithBorrowedArgs(
   return frame.ConsumeRetvals(rets);
 }
 
-Status InstantiatedCapturedFunction::RunInstantiated(
-    const std::vector<Tensor>& args, std::vector<Tensor>* rets) {
+Status CapturedFunction::Instantiate(IteratorContext* ctx) {
+  mutex_lock l(mu_);
+  if (lib_ == nullptr) {
+    // The context's runtime will be used for all subsequent calls.
+    lib_ = ctx->lib();
+    DCHECK(f_handle_ == kInvalidHandle);
+    FunctionLibraryRuntime::InstantiateOptions inst_opts;
+    inst_opts.overlay_lib = ctx->function_library().get();
+    inst_opts.state_handle = std::to_string(random::New64());
+    inst_opts.create_kernels_eagerly = true;
+    if (!use_inter_op_parallelism_) {
+      inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR";
+    }
+    Status s = (lib_->Instantiate(func_.name(), AttrSlice(&func_.attr()),
+                                  inst_opts, &f_handle_));
+    TF_RETURN_IF_ERROR(s);
+    const FunctionBody* fbody = lib_->GetFunctionBody(f_handle_);
+    if (fbody == nullptr) {
+      return errors::Internal("Failed to instantiate function body.");
+    }
+    ret_types_ = fbody->ret_types;
+  } else {
+    if (ctx->lib() != lib_) {
+      return errors::Internal(
+          "Captured function was called with a different "
+          "FunctionLibraryRuntime*, which is not permitted.");
+    }
+  }
+  if (captured_runner_ == nullptr) {
+    captured_runner_ = *ctx->runner();
+  }
+  return Status::OK();
+}
+
+Status CapturedFunction::RunInstantiated(const std::vector<Tensor>& args,
+                                         std::vector<Tensor>* rets) {
+  FunctionLibraryRuntime* lib;
+  FunctionLibraryRuntime::Handle handle;
+  std::function<void(std::function<void()>)>* runner;
+  {
+    tf_shared_lock l(mu_);
+    if (lib_ == nullptr) {
+      return errors::FailedPrecondition(
+          "`CapturedFunction::Instantiate()` must be called before a call to "
+          "`CapturedFunction::RunInstantiated()`.");
+    }
+    lib = lib_;
+    handle = f_handle_;
+    runner = &captured_runner_;
+  }
+
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
-  ScopedStepContainer step_container(
-      f_opts.step_id, [this](const string& name) {
-        lib_->device()->resource_manager()->Cleanup(name).IgnoreError();
-      });
+  f_opts.step_id = CapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(f_opts.step_id, [lib](const string& name) {
+    lib->device()->resource_manager()->Cleanup(name).IgnoreError();
+  });
   f_opts.step_container = &step_container;
-  f_opts.runner = &captured_runner_;
-  if (lib_->device()->device_type() != DEVICE_CPU) {
+  f_opts.runner = runner;
+  if (lib->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -309,12 +332,11 @@ Status InstantiatedCapturedFunction::RunInstantiated(
   CancellationManager c_mgr;
   f_opts.cancellation_manager = &c_mgr;
 
-  BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(),
-                              ret_types_);
+  BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_);
   Notification n;
   Status s;
 
-  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
+  lib->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -323,25 +345,33 @@ Status InstantiatedCapturedFunction::RunInstantiated(
   return frame.ConsumeRetvals(rets);
 }
 
-void InstantiatedCapturedFunction::RunAsync(
-    IteratorContext* ctx, std::vector<Tensor>&& args, std::vector<Tensor>* rets,
-    FunctionLibraryRuntime::DoneCallback done, const string& prefix) const {
+void CapturedFunction::RunAsync(IteratorContext* ctx,
+                                std::vector<Tensor>&& args,
+                                std::vector<Tensor>* rets,
+                                FunctionLibraryRuntime::DoneCallback done,
+                                const string& prefix) {
   // NOTE(mrry): This method does not transfer ownership of `ctx`, and it may
   // be deleted before `done` is called. Take care not to capture `ctx` in any
   // code that may execute asynchronously in this function.
-  auto frame = new OwnedArgsCallFrame(
-      std::move(args), &captured_func_->captured_inputs(), ret_types_);
+  FunctionLibraryRuntime::Handle handle;
+  Status s = GetHandle(ctx, &handle);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
+  auto frame =
+      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_);
 
   FunctionLibraryRuntime::Options f_opts;
-  f_opts.step_id = InstantiatedCapturedFunction::generate_step_id();
-  ResourceMgr* resource_mgr = lib_->device()->resource_manager();
+  f_opts.step_id = CapturedFunction::generate_step_id();
+  ResourceMgr* resource_mgr = ctx->lib()->device()->resource_manager();
   auto step_container = new ScopedStepContainer(
       f_opts.step_id, [resource_mgr](const string& name) {
         resource_mgr->Cleanup(name).IgnoreError();
       });
   f_opts.step_container = step_container;
   f_opts.runner = ctx->runner();
-  if (lib_->device()->device_type() != DEVICE_CPU) {
+  if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
   }
   // TODO(mrry): Add cancellation manager support to IteratorContext
@@ -396,13 +426,15 @@ void InstantiatedCapturedFunction::RunAsync(
       },
       std::move(done), std::placeholders::_1);
 
-  lib_->Run(f_opts, f_handle_, frame, std::move(callback));
+  ctx->lib()->Run(f_opts, handle, frame, std::move(callback));
 }
 
 CapturedFunction::CapturedFunction(const NameAttrList& func,
                                    std::vector<Tensor> captured_inputs,
                                    bool use_inter_op_parallelism)
     : func_(func),
+      lib_(nullptr),
+      f_handle_(kInvalidHandle),
       captured_inputs_(std::move(captured_inputs)),
       use_inter_op_parallelism_(use_inter_op_parallelism) {}
 
diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h
index 1b10725082..a10376bf97 100644
--- a/tensorflow/core/kernels/data/captured_function.h
+++ b/tensorflow/core/kernels/data/captured_function.h
@@ -34,41 +34,59 @@ class ResourceMgr;
 
 namespace data {
 
-class CapturedFunction;
-
-// An InstantiatedCapturedFunction encapsulates all the runtime support needed
-// to execute a tensorflow function.
-//
-// While CapturedFunction (below) encapsulates the more permanent attributes
-// of the function i.e. name, captured arguments etc.,
-// InstantiatedCapturedFunction encapsulates the more runtime aspects i.e.
-// FunctionLibraryRuntime, function handle etc.
+// A `CapturedFunction` encapsulates a TensorFlow function and all of
+// the runtime support required to execute it.
 //
-// The `Iterator-`related classes use `InstantiatedCapturedFunction` to execute
-// functions.
-class InstantiatedCapturedFunction {
+// The `Dataset`-related classes use `CapturedFunction` to execute
+// TensorFlow functions outside a the normal `OpKernel::Compute()`
+// context.
+class CapturedFunction {
  public:
-  ~InstantiatedCapturedFunction();
+  // Creates a new instance using a list of named attributes, fetching captured
+  // inputs from a context argument.
+  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
+                       const string& argument,
+                       std::unique_ptr<CapturedFunction>* out_function);
 
-  // Runs the "Instantiated Captured function". This method takes ownership of
-  // the tensors in `args`, in order to be able to deallocate them as early as
+  // Creates a new instance using a list of named attributes, fetching captured
+  // inputs from a context argument.
+  //
+  // If `use_inter_op_parallelism` is false, the runtime may use an executor
+  // that is optimized for small functions.
+  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
+                       const string& argument, bool use_inter_op_parallelism,
+                       std::unique_ptr<CapturedFunction>* out_function);
+
+  ~CapturedFunction();
+
+  // Runs the "Captured function" using the given FLR and caches the lib and
+  // handle generated during instantiation. If Run is called with a different
+  // lib afterwards, generates an error. This method takes ownership of the
+  // tensors in `args`, in order to be able to deallocate them as early as
   // possible. Use `RunWithBorrowedArgs()` if the caller needs to retain
   // ownership of the `args`.
   Status Run(IteratorContext* ctx, std::vector<Tensor>&& args,
-             std::vector<Tensor>* rets) const;
+             std::vector<Tensor>* rets);
 
   // Synchronously runs the captured function on the given `args`, and stores
   // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
   // possible.
   Status RunWithBorrowedArgs(IteratorContext* ctx,
                              const std::vector<Tensor>& args,
-                             std::vector<Tensor>* rets) const;
+                             std::vector<Tensor>* rets);
 
-  // Synchronously runs the captured function on the given `args`, and stores
-  // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
-  // possible. This can be useful for calling a captured
+  // Explicitly instantiate this function for use in the given
+  // context. This method, and the context-less overload
+  // `RunInstantiated()` below can be useful for calling a captured
   // function in cases where an `IteratorContext*` is not available
   // (such as a destructor).
+  Status Instantiate(IteratorContext* ctx);
+
+  // Synchronously runs the captured function on the given `args`, and stores
+  // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
+  // possible.
+  //
+  // REQUIRES: `this->Instantiate()` must have been called before this method.
   Status RunInstantiated(const std::vector<Tensor>& args,
                          std::vector<Tensor>* rets);
 
@@ -79,9 +97,16 @@ class InstantiatedCapturedFunction {
   void RunAsync(IteratorContext* ctx, std::vector<Tensor>&& args,
                 std::vector<Tensor>* rets,
                 FunctionLibraryRuntime::DoneCallback done,
-                const string& prefix) const;
+                const string& prefix);
+
+  // Returns the named list of function arguments.
+  const NameAttrList& func() { return func_; }
 
-  // Returns a step ID for use when running an `InstantiatedCapturedFunction`.
+  // Returns that additional captured inputs that will be passed to the function
+  // when `Run*()` is called.
+  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
+
+  // Returns a step ID for use when running a `CapturedFunction`.
   static int64 generate_step_id() {
     // Choose a step ID that is guaranteed not to clash with any
     // Session-generated step ID. DirectSession only generates
@@ -91,65 +116,21 @@ class InstantiatedCapturedFunction {
     return -std::abs(static_cast<int64>(random::New64()));
   }
 
- private:
-  InstantiatedCapturedFunction(
-      FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
-      DataTypeVector ret_types,
-      std::function<void(std::function<void()>)> runner,
-      CapturedFunction* captured_func);
-
-  friend class CapturedFunction;
-
-  FunctionLibraryRuntime* const lib_;
-  const FunctionLibraryRuntime::Handle f_handle_;
-  const DataTypeVector ret_types_;
-  std::function<void(std::function<void()>)> captured_runner_;
-  CapturedFunction* const captured_func_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(InstantiatedCapturedFunction);
-};
-
-// A `CapturedFunction` encapsulates a TensorFlow function.
-//
-// The `Dataset`-related classes use `CapturedFunction` to execute
-// TensorFlow functions outside a the normal `OpKernel::Compute()`
-// context.
-class CapturedFunction {
- public:
-  // Creates a new instance using a list of named attributes, fetching captured
-  // inputs from a context argument.
-  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
-                       const string& argument,
-                       std::unique_ptr<CapturedFunction>* out_function);
-
-  // Creates a new instance using a list of named attributes, fetching captured
-  // inputs from a context argument.
-  //
-  // If `use_inter_op_parallelism` is false, the runtime may use an executor
-  // that is optimized for small functions.
-  static Status Create(const NameAttrList& func, OpKernelContext* ctx,
-                       const string& argument, bool use_inter_op_parallelism,
-                       std::unique_ptr<CapturedFunction>* out_function);
-
-  // Instantiates this function for use in the given context, providing an
-  // InstantiatedCapturedFunction that can be used to execute functions.
-  Status Instantiate(IteratorContext* ctx,
-                     std::unique_ptr<InstantiatedCapturedFunction>*
-                         instantiated_captured_function);
-
-  // Returns the named list of function arguments.
-  const NameAttrList& func() { return func_; }
-
-  // Returns that additional captured inputs that will be passed to the function
-  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
-
  private:
   CapturedFunction(const NameAttrList& func,
                    std::vector<Tensor> captured_inputs,
                    bool use_inter_op_parallelism);
 
+  Status GetHandle(IteratorContext* ctx,
+                   FunctionLibraryRuntime::Handle* out_handle);
+
+  mutex mu_;
   const NameAttrList func_;
+  FunctionLibraryRuntime* lib_ GUARDED_BY(mu_);
+  FunctionLibraryRuntime::Handle f_handle_ GUARDED_BY(mu_);
   const std::vector<Tensor> captured_inputs_;
+  DataTypeSlice ret_types_;
+  std::function<void(std::function<void()>)> captured_runner_ = nullptr;
   const bool use_inter_op_parallelism_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction);
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index 36a1837295..e7ac368ae3 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -21,13 +21,12 @@ namespace data {
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index,
-    const InstantiatedCapturedFunction& instantiated_captured_func,
-    StringPiece prefix, std::unique_ptr<IteratorBase>* out_iterator) {
+    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
+    std::unique_ptr<IteratorBase>* out_iterator) {
   std::vector<Tensor> return_values;
 
-  TF_RETURN_IF_ERROR(instantiated_captured_func.RunWithBorrowedArgs(
-      ctx, input_element, &return_values));
+  TF_RETURN_IF_ERROR(
+      captured_func->RunWithBorrowedArgs(ctx, input_element, &return_values));
 
   if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT &&
         TensorShapeUtils::IsScalar(return_values[0].shape()))) {
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 3de157b4bc..234856ea39 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -24,9 +24,8 @@ namespace data {
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index,
-    const InstantiatedCapturedFunction& instantiated_captured_func,
-    StringPiece prefix, std::unique_ptr<IteratorBase>* out_iterator);
+    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
+    std::unique_ptr<IteratorBase>* out_iterator);
 
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index dfdc16f347..19c35f94a6 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -131,10 +131,9 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
-    virtual Status EvaluatePredicate(
-        IteratorContext* ctx,
-        InstantiatedCapturedFunction* instantiated_captured_function,
-        const std::vector<Tensor>& element, bool* out_matched) const = 0;
+    virtual Status EvaluatePredicate(IteratorContext* ctx,
+                                     const std::vector<Tensor>& element,
+                                     bool* out_matched) const = 0;
 
    private:
     class Iterator : public DatasetIterator<FilterDatasetBase> {
@@ -145,8 +144,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -173,8 +171,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(dataset()->EvaluatePredicate(
-              ctx, instantiated_captured_func_.get(), *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(
+              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -208,7 +206,6 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
      private:
       mutex mu_;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
@@ -223,15 +220,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
     using FilterDatasetBase::FilterDatasetBase;
 
    protected:
-    Status EvaluatePredicate(
-        IteratorContext* ctx,
-        InstantiatedCapturedFunction* instantiated_captured_function,
-        const std::vector<Tensor>& element, bool* out_matched) const override {
+    Status EvaluatePredicate(IteratorContext* ctx,
+                             const std::vector<Tensor>& element,
+                             bool* out_matched) const override {
       // TODO(mrry): Avoid blocking a threadpool thread. We will need to
       // stack-rip the iterators and use async kernels.
       std::vector<Tensor> result;
-      TF_RETURN_IF_ERROR(instantiated_captured_function->RunWithBorrowedArgs(
-          ctx, element, &result));
+      TF_RETURN_IF_ERROR(
+          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
 
       if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
           result[0].NumElements() != 1) {
@@ -253,10 +249,9 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
           index_(index) {}
 
    protected:
-    Status EvaluatePredicate(
-        IteratorContext* ctx,
-        InstantiatedCapturedFunction* instantiated_captured_function,
-        const std::vector<Tensor>& element, bool* out_matched) const override {
+    Status EvaluatePredicate(IteratorContext* ctx,
+                             const std::vector<Tensor>& element,
+                             bool* out_matched) const override {
       const Tensor& predicate = element[index_];
       if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
         return errors::InvalidArgument(
diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
index 3af8162137..2fada22a21 100644
--- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
@@ -122,8 +122,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -239,7 +238,8 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         return MakeIteratorFromInputElement(
             ctx, captured_func_inputs_, element_index_++,
-            *instantiated_captured_func_, prefix(), &current_element_iterator_);
+            dataset()->captured_func_.get(), prefix(),
+            &current_element_iterator_);
       }
 
       Status BuildCurrentElementIteratorLocked(OpKernelContext* ctx)
@@ -257,7 +257,6 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> current_element_iterator_ GUARDED_BY(mu_);
       std::vector<Tensor> captured_func_inputs_ GUARDED_BY(mu_);
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc
index c7d8cfce90..71a36314a0 100644
--- a/tensorflow/core/kernels/data/generator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/generator_dataset_op.cc
@@ -73,8 +73,7 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
     ~Iterator() override {
       if (!finalized_) {
         std::vector<Tensor> ignored;
-        Status s =
-            instantiated_finalize_func_->RunInstantiated(state_, &ignored);
+        Status s = dataset()->finalize_func_->RunInstantiated(state_, &ignored);
         if (!s.ok()) {
           LOG(WARNING)
               << "Error occurred when finalizing GeneratorDataset iterator: "
@@ -84,14 +83,11 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
     }
 
     Status Initialize(IteratorContext* ctx) override {
+      TF_RETURN_IF_ERROR(dataset()->init_func_->Instantiate(ctx));
+      TF_RETURN_IF_ERROR(dataset()->next_func_->Instantiate(ctx));
+      TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate(ctx));
       TF_RETURN_IF_ERROR(
-          dataset()->init_func_->Instantiate(ctx, &instantiated_init_func_));
-      TF_RETURN_IF_ERROR(
-          dataset()->next_func_->Instantiate(ctx, &instantiated_next_func_));
-      TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate(
-          ctx, &instantiated_finalize_func_));
-      TF_RETURN_IF_ERROR(
-          instantiated_init_func_->RunWithBorrowedArgs(ctx, {}, &state_));
+          dataset()->init_func_->RunWithBorrowedArgs(ctx, {}, &state_));
       return Status::OK();
     }
 
@@ -105,8 +101,8 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
         return Status::OK();
       }
 
-      Status s = instantiated_next_func_->RunWithBorrowedArgs(ctx, state_,
-                                                              out_tensors);
+      Status s =
+          dataset()->next_func_->RunWithBorrowedArgs(ctx, state_, out_tensors);
       if (s.ok()) {
         *end_of_sequence = false;
       } else if (errors::IsOutOfRange(s)) {
@@ -119,7 +115,7 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
         // finalize function.
         std::vector<Tensor> ignored;
         TF_RETURN_IF_ERROR(
-            instantiated_finalize_func_->RunInstantiated(state_, &ignored));
+            dataset()->finalize_func_->RunInstantiated(state_, &ignored));
         finalized_ = true;
       }
       return s;
@@ -129,9 +125,6 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
     mutex mu_;
     bool finalized_ GUARDED_BY(mu_) = false;
     std::vector<Tensor> state_ GUARDED_BY(mu_);
-    std::unique_ptr<InstantiatedCapturedFunction> instantiated_init_func_;
-    std::unique_ptr<InstantiatedCapturedFunction> instantiated_next_func_;
-    std::unique_ptr<InstantiatedCapturedFunction> instantiated_finalize_func_;
   };
 
   const std::unique_ptr<CapturedFunction> init_func_;
diff --git a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
index 9cfcbbf8f6..d6ee42a7c6 100644
--- a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
@@ -192,14 +192,11 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(
-            ctx, &instantiated_key_func_));
-        TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate(
-            ctx, &instantiated_init_func_));
-        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(
-            ctx, &instantiated_reduce_func_));
-        TF_RETURN_IF_ERROR(dataset()->captured_finalize_func_->Instantiate(
-            ctx, &instantiated_finalize_func_));
+        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx));
+        TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate(ctx));
+        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx));
+        TF_RETURN_IF_ERROR(
+            dataset()->captured_finalize_func_->Instantiate(ctx));
         return Status::OK();
       }
 
@@ -217,8 +214,9 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
           if (!end_of_input_) {
             // Run the key function on the input element.
             std::vector<Tensor> key_func_output;
-            TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs(
-                ctx, next_input_element, &key_func_output));
+            TF_RETURN_IF_ERROR(
+                dataset()->captured_key_func_->RunWithBorrowedArgs(
+                    ctx, next_input_element, &key_func_output));
 
             if (key_func_output.size() != 1 ||
                 key_func_output[0].dtype() != DT_INT64 ||
@@ -232,7 +230,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
             if (states_.find(key) == states_.end()) {
               // Run the init function to create the initial state.
               std::vector<Tensor> init_func_output;
-              TF_RETURN_IF_ERROR(instantiated_init_func_->Run(
+              TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Run(
                   ctx, std::move(key_func_output), &init_func_output));
               states_[key] = init_func_output;
             }
@@ -246,7 +244,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
                       std::back_inserter(args));
 
             std::vector<Tensor> reduce_func_output;
-            TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run(
+            TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run(
                 ctx, std::move(args), &reduce_func_output));
             states_[key] = reduce_func_output;
           } else {
@@ -262,8 +260,9 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
           *end_of_sequence = true;
           return Status::OK();
         }
-        TF_RETURN_IF_ERROR(instantiated_finalize_func_->RunWithBorrowedArgs(
-            ctx, states_[keys_[keys_index_++]], out_tensors));
+        TF_RETURN_IF_ERROR(
+            dataset()->captured_finalize_func_->RunWithBorrowedArgs(
+                ctx, states_[keys_[keys_index_++]], out_tensors));
         *end_of_sequence = false;
         return Status::OK();
       }
@@ -381,10 +380,6 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
       std::map<int64, std::vector<Tensor>> states_ GUARDED_BY(mu_);
       std::vector<int64> keys_ GUARDED_BY(mu_);
       int64 keys_index_ GUARDED_BY(mu_) = 0;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_key_func_;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_init_func_;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_reduce_func_;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_finalize_func_;
     };
 
     const NameAttrList& key_func() const { return captured_key_func_->func(); }
diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
index 2ea59bee5c..8b417bb1c2 100644
--- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
@@ -176,12 +176,10 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(
-            ctx, &instantiated_key_func_));
-        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(
-            ctx, &instantiated_reduce_func_));
-        TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Instantiate(
-            ctx, &instantiated_window_size_func_));
+        TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx));
+        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx));
+        TF_RETURN_IF_ERROR(
+            dataset()->captured_window_size_func_->Instantiate(ctx));
         return Status::OK();
       }
 
@@ -218,8 +216,9 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
               // Run the key function on the input element to identify its
               // group.
               std::vector<Tensor> key_func_output;
-              TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs(
-                  ctx, next_input_element, &key_func_output));
+              TF_RETURN_IF_ERROR(
+                  dataset()->captured_key_func_->RunWithBorrowedArgs(
+                      ctx, next_input_element, &key_func_output));
 
               if (key_func_output.size() != 1 ||
                   key_func_output[0].dtype() != DT_INT64 ||
@@ -234,7 +233,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
                 // Run the window size function on the key to identify its
                 // window size.
                 std::vector<Tensor> window_size_func_output;
-                TF_RETURN_IF_ERROR(instantiated_window_size_func_->Run(
+                TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run(
                     ctx, std::move(key_func_output), &window_size_func_output));
 
                 if (window_size_func_output.size() != 1 ||
@@ -449,8 +448,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         std::vector<Tensor> args(
             {std::move(key_arg), std::move(group_dataset_arg)});
         std::vector<Tensor> return_values;
-        TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run(ctx, std::move(args),
-                                                          &return_values));
+        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run(
+            ctx, std::move(args), &return_values));
 
         if (!(return_values.size() == 1 &&
               return_values[0].dtype() == DT_VARIANT &&
@@ -479,10 +478,6 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
       std::map<int64, std::vector<std::vector<Tensor>>> groups_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> current_group_iterator_ GUARDED_BY(mu_);
       std::map<int64, int64> window_sizes_ GUARDED_BY(mu_);
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_key_func_;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_reduce_func_;
-      std::unique_ptr<InstantiatedCapturedFunction>
-          instantiated_window_size_func_;
     };
 
     Status OtherArgumentsNodeAndType(
diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc
index 91c298ce9a..0aa802b874 100644
--- a/tensorflow/core/kernels/data/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc
@@ -149,8 +149,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       void AdvanceToNextInCycle() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
@@ -196,7 +195,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
             if (!end_of_input_) {
               TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
                   ctx, args_list_[cycle_index_], cycle_index_,
-                  *instantiated_captured_func_, prefix(),
+                  dataset()->captured_func_.get(), prefix(),
                   &current_elements_[cycle_index_]));
               ++num_open_;
             }
@@ -282,7 +281,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
                   &args_list_[idx][i]));
             }
             TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
-                ctx, args_list_[idx], idx, *instantiated_captured_func_,
+                ctx, args_list_[idx], idx, dataset()->captured_func_.get(),
                 prefix(), &current_elements_[idx]));
             TF_RETURN_IF_ERROR(
                 RestoreInput(ctx, reader, current_elements_[idx]));
@@ -302,7 +301,6 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
       int64 block_index_ GUARDED_BY(mu_) = 0;
       bool end_of_input_ GUARDED_BY(mu_) = false;
       size_t num_open_ GUARDED_BY(mu_) = 0;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 71d3335452..83896219a3 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -218,8 +218,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -376,7 +375,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                                    std::vector<Tensor> input_element) {
               std::shared_ptr<std::vector<Tensor>> return_values(
                   new std::vector<Tensor>());
-              instantiated_captured_func_->RunAsync(
+              dataset()->captured_func_->RunAsync(
                   ctx.get(), std::move(input_element), return_values.get(),
                   [this, ctx, result, return_values, offset](Status status) {
                     Callback(ctx, result, return_values, offset, status);
@@ -673,7 +672,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(mu_);
       std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index 5b891b4fd5..f112e1dc43 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -122,8 +122,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -143,7 +142,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
         // TODO(mrry): Avoid blocking a threadpool thread. We will need to
         // stack-rip the iterators and use async kernels.
         Status s =
-            instantiated_captured_func_->Run(ctx, std::move(args), out_tensors);
+            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -168,7 +167,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 448cc93a8c..9cd46bf5dd 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -247,8 +247,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
         AddConstantParameter(ctx, "parallelism", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       // It is implemented so that it matches the deterministic interleave
@@ -686,7 +685,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
               worker_thread_states_[thread_index].iterator_creation_status =
                   MakeIteratorFromInputElement(
                       ctx.get(), worker_thread_states_[thread_index].input,
-                      thread_index, *instantiated_captured_func_, prefix(),
+                      thread_index, dataset()->captured_func_.get(), prefix(),
                       &worker_thread_states_[thread_index].iterator);
               iterator_creation_status =
                   worker_thread_states_[thread_index].iterator_creation_status;
@@ -920,7 +919,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           std::unique_ptr<IteratorBase> iterator;
           Status s = MakeIteratorFromInputElement(
               ctx, worker_thread_states_[index].input, index,
-              *instantiated_captured_func_, prefix(), &iterator);
+              dataset()->captured_func_.get(), prefix(), &iterator);
           TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, iterator));
           worker_thread_states_[index].iterator.swap(iterator);
         }
@@ -1048,7 +1047,6 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
       std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
@@ -1256,8 +1254,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -1493,7 +1490,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                 if (!end_of_input_) {
                   Status status = MakeIteratorFromInputElement(
                       ctx.get(), args_list_[cycle_index_], cycle_index_,
-                      *instantiated_captured_func_, prefix(),
+                      dataset()->captured_func_.get(), prefix(),
                       &current_elements_[cycle_index_]);
                   if (!status.ok()) {
                     invocation_results_.emplace_back(new InvocationResult());
@@ -1602,7 +1599,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                   &args_list_[idx][i]));
             }
             TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
-                ctx, args_list_[idx], idx, *instantiated_captured_func_,
+                ctx, args_list_[idx], idx, dataset()->captured_func_.get(),
                 prefix(), &current_elements_[idx]));
             TF_RETURN_IF_ERROR(
                 RestoreInput(ctx, reader, current_elements_[idx]));
@@ -1662,7 +1659,6 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(mu_) = false;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 822f06be9e..6abe6c8338 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -85,11 +85,29 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
+      auto init_func = [this](IteratorContext* ctx) {
+        return captured_func_->Instantiate(ctx);
+      };
+
       const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
-      std::unique_ptr<ParallelMapDatasetFunctor> parallel_map_dataset_functor(
-          new ParallelMapDatasetFunctor(this, new_prefix));
+      ParallelMapIteratorFunction map_func =
+          [this, new_prefix](IteratorContext* ctx,
+                             std::vector<Tensor> input_element,
+                             std::vector<Tensor>* result, StatusCallback done) {
+            captured_func_->RunAsync(ctx, std::move(input_element), result,
+                                     std::move(done), new_prefix);
+          };
+      if (!use_inter_op_parallelism_) {
+        map_func = [map_func](
+                       IteratorContext* ctx, std::vector<Tensor> input_element,
+                       std::vector<Tensor>* result, StatusCallback done) {
+          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
+                                     result, std::move(done)));
+        };
+      }
+
       return NewParallelMapIterator({this, new_prefix}, input_,
-                                    std::move(parallel_map_dataset_functor),
+                                    std::move(init_func), std::move(map_func),
                                     num_parallel_calls_);
     }
 
@@ -151,39 +169,6 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     }
 
    private:
-    class ParallelMapDatasetFunctor : public ParallelMapFunctor {
-     public:
-      ParallelMapDatasetFunctor(const Dataset* dataset, const string& prefix)
-          : dataset_(dataset), prefix_(prefix) {}
-
-      Status InitFunc(IteratorContext* ctx) override {
-        return dataset_->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
-      }
-
-      void MapFunc(IteratorContext* ctx, std::vector<Tensor> input_element,
-                   std::vector<Tensor>* result, StatusCallback done) override {
-        auto map_func = [this](IteratorContext* ctx,
-                               std::vector<Tensor> input_element,
-                               std::vector<Tensor>* result,
-                               StatusCallback done) {
-          instantiated_captured_func_->RunAsync(
-              ctx, std::move(input_element), result, std::move(done), prefix_);
-        };
-        if (!dataset_->use_inter_op_parallelism_) {
-          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
-                                     result, std::move(done)));
-        } else {
-          map_func(ctx, std::move(input_element), result, std::move(done));
-        }
-      }
-
-     private:
-      const Dataset* dataset_;
-      const string prefix_;
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
-    };
-
     const DatasetBase* const input_;
     const NameAttrList func_;
     const int32 num_parallel_calls_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 4f8e0489de..5f6052ce83 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -31,11 +31,12 @@ class ParallelMapIterator : public DatasetBaseIterator {
   explicit ParallelMapIterator(
       const typename DatasetBaseIterator::BaseParams& params,
       const DatasetBase* input_dataset,
-      std::unique_ptr<ParallelMapFunctor> parallel_map_functor,
-      int32 num_parallel_calls)
+      std::function<Status(IteratorContext*)> init_func,
+      ParallelMapIteratorFunction map_func, int32 num_parallel_calls)
       : DatasetBaseIterator(params),
         input_dataset_(input_dataset),
-        parallel_map_functor_(std::move(parallel_map_functor)),
+        init_func_(std::move(init_func)),
+        map_func_(std::move(map_func)),
         num_parallel_calls_(num_parallel_calls) {}
 
   ~ParallelMapIterator() override {
@@ -76,7 +77,10 @@ class ParallelMapIterator : public DatasetBaseIterator {
     }
     TF_RETURN_IF_ERROR(
         input_dataset_->MakeIterator(ctx, prefix(), &input_impl_));
-    return parallel_map_functor_->InitFunc(ctx);
+    if (init_func_) {
+      TF_RETURN_IF_ERROR(init_func_(ctx));
+    }
+    return Status::OK();
   }
 
   Status GetNextInternal(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
@@ -222,8 +226,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
       CallCompleted(result);
     };
 
-    parallel_map_functor_->MapFunc(ctx.get(), std::move(input_element),
-                                   &result->return_values, std::move(done));
+    map_func_(ctx.get(), std::move(input_element), &result->return_values,
+              std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -319,7 +323,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   const DatasetBase* const input_dataset_;  // Not owned.
-  std::unique_ptr<ParallelMapFunctor> parallel_map_functor_;
+  const std::function<Status(IteratorContext*)> init_func_;
+  const ParallelMapIteratorFunction map_func_;
   // Used for coordination between the main thread and the runner thread.
   mutex mu_;
   // Used for coordination between the main thread and the runner thread. In
@@ -344,12 +349,20 @@ class ParallelMapIterator : public DatasetBaseIterator {
 
 std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBaseIterator::BaseParams& params,
-    const DatasetBase* input_dataset,
-    std::unique_ptr<ParallelMapFunctor> parallel_map_functor,
+    const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func,
     int32 num_parallel_calls) {
-  return std::unique_ptr<IteratorBase>(new ParallelMapIterator(
-      params, input_dataset, std::move(parallel_map_functor),
-      num_parallel_calls));
+  return NewParallelMapIterator(params, input_dataset, nullptr,
+                                std::move(map_func), num_parallel_calls);
+}
+
+std::unique_ptr<IteratorBase> NewParallelMapIterator(
+    const DatasetBaseIterator::BaseParams& params,
+    const DatasetBase* input_dataset,
+    std::function<Status(IteratorContext*)> init_func,
+    ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
+  return std::unique_ptr<IteratorBase>(
+      new ParallelMapIterator(params, input_dataset, std::move(init_func),
+                              std::move(map_func), num_parallel_calls));
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index 62e57e5335..dc26c5cf25 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -22,32 +22,30 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
-class ParallelMapFunctor {
- public:
-  virtual ~ParallelMapFunctor() {}
-
-  // A function that runs when the Iterator is initialized. It enables the user
-  // to specify error checking logic that can fail early.
-  virtual Status InitFunc(IteratorContext* ctx) { return Status::OK(); }
-
-  // A function that transforms elements of one dataset into another
-  // asynchronously. The arguments are:
-  // 1. An `IteratorContext*` for the context in which the function should
-  // execute.
-  // 2. A `std::vector<Tensor>` containing the input element.
-  // 3. A `std::vector<Tensor>*` to which the function will write the result.
-  // 4. A `StatusCallback` that should be invoked when the function is complete.
-  virtual void MapFunc(IteratorContext* ctx, std::vector<Tensor> input,
-                       std::vector<Tensor>* output,
-                       StatusCallback callback) = 0;
-};
-
-// Returns a new iterator that uses `parallel_map_functor` to apply `MapFunc`
-// to the elements of `input_dataset` using the given degree of parallelism.
+// A function that transforms elements of one dataset into another
+// asynchronously. The arguments are:
+// 1. An `IteratorContext*` for the context in which the function should
+// execute.
+// 2. A `std::vector<Tensor>` containing the input element.
+// 3. A `std::vector<Tensor>*` to which the function will write the result.
+// 4. A `StatusCallback` that should be invoked when the function is complete.
+using ParallelMapIteratorFunction =
+    std::function<void(IteratorContext*, std::vector<Tensor>,
+                       std::vector<Tensor>*, StatusCallback)>;
+
+// Returns a new iterator that applies `map_func` to the elements of
+// `input_dataset` using the given degree of parallelism. `init_func` (if
+// specified) will be executed when the iterator is initialized (see
+// `IteratorBase::Initialize()`) and enables the user to specify error checking
+// logic that can fail early.
 std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBaseIterator::BaseParams& params,
     const DatasetBase* input_dataset,
-    std::unique_ptr<ParallelMapFunctor> parallel_map_functor,
+    std::function<Status(IteratorContext*)> init_func,
+    ParallelMapIteratorFunction map_func, int32 num_parallel_calls);
+std::unique_ptr<IteratorBase> NewParallelMapIterator(
+    const DatasetBaseIterator::BaseParams& params,
+    const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func,
     int32 num_parallel_calls);
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 32210ef677..c28c06da62 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,80 +182,9 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      std::unique_ptr<ParallelMapFunctor> parse_example_functor(
-          new ParseExampleFunctor(this));
-      return NewParallelMapIterator(
-          {this, strings::StrCat(prefix, "::ParseExample")}, input_,
-          std::move(parse_example_functor), num_parallel_calls_);
-    }
-
-    const DataTypeVector& output_dtypes() const override {
-      return output_types_;
-    }
-
-    const std::vector<PartialTensorShape>& output_shapes() const override {
-      return output_shapes_;
-    }
-
-    string DebugString() const override {
-      return "ParseExampleDatasetOp::Dataset";
-    }
-
-   protected:
-    Status AsGraphDefInternal(SerializationContext* ctx,
-                              DatasetGraphDefBuilder* b,
-                              Node** output) const override {
-      Node* input_graph_node = nullptr;
-      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
-
-      Node* num_parallle_calls_node;
-      std::vector<Node*> dense_defaults_nodes;
-      dense_defaults_nodes.reserve(dense_defaults_.size());
-
-      TF_RETURN_IF_ERROR(
-          b->AddScalar(num_parallel_calls_, &num_parallle_calls_node));
-
-      for (const Tensor& dense_default : dense_defaults_) {
-        Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node));
-        dense_defaults_nodes.emplace_back(node);
-      }
-
-      AttrValue sparse_keys_attr;
-      AttrValue dense_keys_attr;
-      AttrValue sparse_types_attr;
-      AttrValue dense_attr;
-      AttrValue dense_shapes_attr;
-
-      b->BuildAttrValue(sparse_keys_, &sparse_keys_attr);
-      b->BuildAttrValue(dense_keys_, &dense_keys_attr);
-      b->BuildAttrValue(sparse_types_, &sparse_types_attr);
-      b->BuildAttrValue(dense_types_, &dense_attr);
-      b->BuildAttrValue(dense_shapes_, &dense_shapes_attr);
-
-      TF_RETURN_IF_ERROR(b->AddDataset(this,
-                                       {
-                                           {0, input_graph_node},
-                                           {1, num_parallle_calls_node},
-                                       },
-                                       {{2, dense_defaults_nodes}},
-                                       {{"sparse_keys", sparse_keys_attr},
-                                        {"dense_keys", dense_keys_attr},
-                                        {"sparse_types", sparse_types_attr},
-                                        {"Tdense", dense_attr},
-                                        {"dense_shapes", dense_shapes_attr}},
-                                       output));
-      return Status::OK();
-    }
-
-   private:
-    class ParseExampleFunctor : public ParallelMapFunctor {
-     public:
-      explicit ParseExampleFunctor(const Dataset* dataset)
-          : dataset_(dataset) {}
-
-      void MapFunc(IteratorContext* ctx, std::vector<Tensor> input_element,
-                   std::vector<Tensor>* result, StatusCallback done) override {
+      auto map_fn = [this](IteratorContext* ctx,
+                           std::vector<Tensor> input_element,
+                           std::vector<Tensor>* result, StatusCallback done) {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
           thread::ThreadPool* device_threadpool =
               ctx->lib()->device()->tensorflow_cpu_worker_threads()->workers;
@@ -267,7 +196,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
             for (auto it = slice.begin(); it != slice.end(); it++)
               slice_vec.push_back(*it);
           }
-          example::FastParseExampleConfig config = dataset_->config_;
+          example::FastParseExampleConfig config = config_;
           // local copy of config_ for modification.
           auto stats_aggregator = ctx->stats_aggregator();
           if (stats_aggregator) {
@@ -277,50 +206,43 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
           Status s = FastParseExample(config, slice_vec, {}, device_threadpool,
                                       &example_result);
           if (s.ok()) {
-            (*result).resize(dataset_->key_to_output_index_.size());
-            for (int d = 0; d < dataset_->dense_keys_.size(); ++d) {
-              int output_index =
-                  dataset_->key_to_output_index_.at(dataset_->dense_keys_[d]);
-              DCHECK(example_result.dense_values[d].dtype() ==
-                     dataset_->output_dtypes()[output_index])
+            (*result).resize(key_to_output_index_.size());
+            for (int d = 0; d < dense_keys_.size(); ++d) {
+              int output_index = key_to_output_index_.at(dense_keys_[d]);
+              CHECK(example_result.dense_values[d].dtype() ==
+                    output_dtypes()[output_index])
                   << "Got wrong type for FastParseExample return value " << d
                   << " (expected "
-                  << DataTypeString(dataset_->output_dtypes()[output_index])
-                  << ", got "
+                  << DataTypeString(output_dtypes()[output_index]) << ", got "
                   << DataTypeString(example_result.dense_values[d].dtype())
                   << ").";
-              DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith(
+              CHECK(output_shapes()[output_index].IsCompatibleWith(
                   example_result.dense_values[d].shape()))
                   << "Got wrong shape for FastParseExample return value " << d
                   << " (expected "
-                  << dataset_->output_shapes()[output_index].DebugString()
-                  << ", got "
+                  << output_shapes()[output_index].DebugString() << ", got "
                   << example_result.dense_values[d].shape().DebugString()
                   << ").";
               (*result)[output_index] = example_result.dense_values[d];
             }
-            for (int d = 0; d < dataset_->sparse_keys_.size(); ++d) {
+            for (int d = 0; d < sparse_keys_.size(); ++d) {
               Tensor serialized_sparse = Tensor(DT_VARIANT, TensorShape({3}));
               auto serialized_sparse_t = serialized_sparse.vec<Variant>();
               serialized_sparse_t(0) = example_result.sparse_indices[d];
               serialized_sparse_t(1) = example_result.sparse_values[d];
               serialized_sparse_t(2) = example_result.sparse_shapes[d];
-              int output_index =
-                  dataset_->key_to_output_index_.at(dataset_->sparse_keys_[d]);
-              DCHECK(serialized_sparse.dtype() ==
-                     dataset_->output_dtypes()[output_index])
+              int output_index = key_to_output_index_.at(sparse_keys_[d]);
+              CHECK(serialized_sparse.dtype() == output_dtypes()[output_index])
                   << "Got wrong type for FastParseExample return value " << d
                   << " (expected "
-                  << DataTypeString(dataset_->output_dtypes()[output_index])
-                  << ", got " << DataTypeString(serialized_sparse.dtype())
-                  << ").";
-              DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith(
+                  << DataTypeString(output_dtypes()[output_index]) << ", got "
+                  << DataTypeString(serialized_sparse.dtype()) << ").";
+              CHECK(output_shapes()[output_index].IsCompatibleWith(
                   serialized_sparse.shape()))
                   << "Got wrong shape for FastParseExample return value " << d
                   << " (expected "
-                  << dataset_->output_shapes()[output_index].DebugString()
-                  << ", got " << serialized_sparse.shape().DebugString()
-                  << ").";
+                  << output_shapes()[output_index].DebugString() << ", got "
+                  << serialized_sparse.shape().DebugString() << ").";
               (*result)[output_index] = serialized_sparse;
             }
             // TODO(b/111553342): User provided tags instead of fixed tag.
@@ -346,12 +268,73 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
           }
           done(s);
         });
+      };
+
+      return NewParallelMapIterator(
+          {this, strings::StrCat(prefix, "::ParseExample")}, input_,
+          std::move(map_fn), num_parallel_calls_);
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return output_types_;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return output_shapes_;
+    }
+
+    string DebugString() const override {
+      return "ParseExampleDatasetOp::Dataset";
+    }
+
+   protected:
+    Status AsGraphDefInternal(SerializationContext* ctx,
+                              DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
+
+      Node* num_parallle_calls_node;
+      std::vector<Node*> dense_defaults_nodes;
+      dense_defaults_nodes.reserve(dense_defaults_.size());
+
+      TF_RETURN_IF_ERROR(
+          b->AddScalar(num_parallel_calls_, &num_parallle_calls_node));
+
+      for (const Tensor& dense_default : dense_defaults_) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node));
+        dense_defaults_nodes.emplace_back(node);
       }
 
-     private:
-      const Dataset* dataset_;
-    };
+      AttrValue sparse_keys_attr;
+      AttrValue dense_keys_attr;
+      AttrValue sparse_types_attr;
+      AttrValue dense_attr;
+      AttrValue dense_shapes_attr;
+
+      b->BuildAttrValue(sparse_keys_, &sparse_keys_attr);
+      b->BuildAttrValue(dense_keys_, &dense_keys_attr);
+      b->BuildAttrValue(sparse_types_, &sparse_types_attr);
+      b->BuildAttrValue(dense_types_, &dense_attr);
+      b->BuildAttrValue(dense_shapes_, &dense_shapes_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(this,
+                                       {
+                                           {0, input_graph_node},
+                                           {1, num_parallle_calls_node},
+                                       },
+                                       {{2, dense_defaults_nodes}},
+                                       {{"sparse_keys", sparse_keys_attr},
+                                        {"dense_keys", dense_keys_attr},
+                                        {"sparse_types", sparse_types_attr},
+                                        {"Tdense", dense_attr},
+                                        {"dense_shapes", dense_shapes_attr}},
+                                       output));
+      return Status::OK();
+    }
 
+   private:
     const DatasetBase* const input_;
     const std::vector<Tensor> dense_defaults_;
     const std::vector<string> sparse_keys_;
diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc
index d9fdd59bf0..dbe31f37b8 100644
--- a/tensorflow/core/kernels/data/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/scan_dataset_op.cc
@@ -144,8 +144,7 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
-        return dataset()->captured_func_->Instantiate(
-            ctx, &instantiated_captured_func_);
+        return dataset()->captured_func_->Instantiate(ctx);
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -170,8 +169,8 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
         state_and_output.reserve(dataset()->state_types_.size() +
                                  output_dtypes().size());
 
-        Status s = instantiated_captured_func_->Run(ctx, std::move(args),
-                                                    &state_and_output);
+        Status s = dataset()->captured_func_->Run(ctx, std::move(args),
+                                                  &state_and_output);
         if (s.ok()) {
           state_.clear();
           size_t i = 0;
@@ -248,7 +247,6 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::vector<Tensor> state_ GUARDED_BY(mu_);
-      std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 6c8f6920e8bad10429ac0b88abbe0ace5a5e9a72 Mon Sep 17 00:00:00 2001
From: Mustafa Ispir <ispir@google.com>
Date: Tue, 18 Sep 2018 15:27:47 -0700
Subject: [PATCH 0342/1357] Updates documentation of Estimator.predict to note
 that an issue with yielding and graph context.

PiperOrigin-RevId: 213528782
---
 tensorflow/python/estimator/estimator.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index ff2baa0465..ffe1e30da0 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -490,6 +490,10 @@ class Estimator(object):
               yield_single_examples=True):
     """Yields predictions for given features.
 
+    Please note that interleaving two predict outputs does not work. See:
+    [issue/20506](
+    https://github.com/tensorflow/tensorflow/issues/20506#issuecomment-422208517)
+
     Args:
       input_fn: A function that constructs the features. Prediction continues
         until `input_fn` raises an end-of-input exception
-- 
GitLab


From e1a32c98210f8ebba42a0397259d948e1433c09e Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 18 Sep 2018 15:42:44 -0700
Subject: [PATCH 0343/1357] "Isolate" must-be-constant side effecting
 operations

I first tried to fix this issue in cr/209996730 but didn't quite fix the problem
for for XLA_* devices.  A node assigned to an XLA_* device must be compiled so
the cr/209996730 fix of simply not compiling the nodes doesn't generalize to
XLA_* devices.  Instead we now "isolate" these nodes, only putting them in a
trivial one-node cluster.  For non-XLA devices even this trivial cluster is
ignored because of flags->tf_xla_min_cluster_size.

I was initially considering a more principled data-flow-analysis based solution
but then decided the upfront work isn't worth it until I see a clear motivating
example.

PiperOrigin-RevId: 213531437
---
 .../compiler/jit/mark_for_compilation_pass.cc | 73 ++++++++++++++++---
 .../jit/mark_for_compilation_pass_test.cc     | 66 +++++++++++++++++
 .../mark_for_compilation_pass_test_helper.cc  | 21 +++++-
 3 files changed, 147 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index e6cc6e52ae..1eaedbfbfb 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -365,10 +365,13 @@ bool IsXlaFusable(const NodeDef& node) {
   return elementwise_ops->count(node.op()) > 0;
 }
 
+// Nodes that XLA can compile are put in `candidates`.  Nodes put in
+// `isolated_nodes` must either be unclustered or be put in trivial single-node
+// clusters.
 Status FindCompilationCandidates(
     const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env,
     const std::function<bool(const Node*, const DeviceType&)>& is_compilable_fn,
-    OrderedNodeSet* candidates) {
+    OrderedNodeSet* candidates, gtl::FlatSet<Node*>* isolated_nodes) {
   OptimizerOptions opts;
   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
       new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION,
@@ -411,6 +414,8 @@ Status FindCompilationCandidates(
     DeviceType device_type("");
     TF_RETURN_IF_ERROR(
         DeviceToDeviceType(node->assigned_device_name(), &device_type));
+    VLOG(4) << "Device type for " << node->name() << ": "
+            << device_type.type_string();
 
     if (is_compilable_fn && !is_compilable_fn(node, device_type)) {
       // is_compilable_fn has already logged the reason if it returned false.
@@ -439,19 +444,56 @@ Status FindCompilationCandidates(
               << node->type_string();
       continue;
     }
-    if (compile_time_const_nodes[node->id()] &&
-        !registration->requires_compilation) {
+    if (compile_time_const_nodes[node->id()]) {
       const OpDef* op_def;
       TF_RETURN_IF_ERROR(
           graph.op_registry()->LookUpOpDef(node->type_string(), &op_def));
       if (op_def->is_stateful()) {
-        // We need to be able to constant fold the nodes in
-        // compile_time_const_nodes given constant inputs (required by XLA) and
-        // therefore can't auto-cluster stateful ops since these can never be
-        // constant folded.
-        VLOG(2) << "Rejecting " << node->name()
-                << ": must-be-constant stateful op";
-        continue;
+        // It is easiest to demonstrate the problem we're trying to solve with
+        // an example.  Say we have this graph:
+        //
+        //   shape = RandomUniformInt();
+        //   reshape = Reshape(input, shape)
+        //
+        // Both RandomUniformInt and Reshape are compilable by XLA so, absent
+        // any other reason, we will try to put both shape and reshape in the
+        // same cluster.  However, since XLA only supports statically shaped
+        // values, it will expect to be able to constant fold `shape` to get a
+        // static shape for `reshape`.  This is a problem because side-effecting
+        // ops like RandomUniformInt() cannot be constant folded.  We fix this
+        // by putting `shape` and `reshape` in different clusters, which results
+        // in us recompiling `reshape`'s cluster for every new value of `shape`,
+        // making `reshape` statically sized within each compilation.  We
+        // simplify the solution even further by disallowing operations like
+        // `shape` from being part of *any* non-trivial cluster.  They're either
+        // not compiled by XLA altogether or, if assigned to an XLA_* device
+        // with "must compile" semantics, compiled into a trivial single-op
+        // cluster.  This approach leaves some room for improvement, and we can
+        // consider implementing a more aggressive data-flow-analysis based
+        // solution in the future if needed.
+        //
+        // One ugly problem we have to contend with: certain sets of ops *have*
+        // to be in the same cluster because values flowing between them have
+        // types that can't be live-in or live-out of a cluster.  These ops are:
+        //
+        //  - TensorArray ops operating on the same TensorArray instance.
+        //  - Stack ops operating on the same Stack instance.
+        //
+        // To work around this we avoid isolating these specific ops.  Because
+        // of this concession it is unsound to auto-cluster them because then
+        // we'd create clusters we could not compile (because we can't constant
+        // fold, say, a TensorArrayRead or a StackPopV2).  But we don't
+        // auto-cluster these operations today so we're good for now.
+        const XlaResourceOpInfo* op_info =
+            GetResourceOpInfoForOp(node->type_string());
+        bool is_tensor_array_or_stack_op =
+            op_info && op_info->resource_kind() != XlaResourceKind::kVariable;
+        if (!is_tensor_array_or_stack_op) {
+          VLOG(2) << "Isolating " << node->name()
+                  << ": must-be-constant stateful op";
+          isolated_nodes->insert(node);
+          // Keep going and execute all the other checks.
+        }
       }
     }
     // We don't auto-cluster functional control flow nodes containing resource
@@ -807,11 +849,12 @@ Status MarkForCompilationPass::RunImpl(
   Graph* graph = options.graph->get();
 
   OrderedNodeSet compilation_candidates;
+  gtl::FlatSet<Node*> isolated_nodes;
   TF_RETURN_IF_ERROR(FindCompilationCandidates(
       *graph, options.flib_def,
       (options.session_options != nullptr) ? options.session_options->env
                                            : Env::Default(),
-      is_compilable_fn, &compilation_candidates));
+      is_compilable_fn, &compilation_candidates, &isolated_nodes));
 
   if (compilation_candidates.empty()) {
     VLOG(2) << "No compilable candidates";
@@ -856,6 +899,11 @@ Status MarkForCompilationPass::RunImpl(
           "Found control flow node in clustering worklist: ",
           node_from->type_string());
     }
+
+    if (isolated_nodes.count(node_from)) {
+      continue;
+    }
+
     string from_scope;
     string to_scope;
     for (int to : cycles.Successors(from)) {
@@ -873,6 +921,9 @@ Status MarkForCompilationPass::RunImpl(
           node_to->assigned_device_name()) {
         continue;
       }
+      if (isolated_nodes.count(node_to)) {
+        continue;
+      }
       // Look for an _XlaScope on both nodes.  If both nodes have a
       // scope and the scopes do not match, do not cluster along this
       // edge. This restriction is overridden if the global_jit_level is ON. If
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
index c59770a4c8..4f9145b479 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
@@ -894,5 +894,71 @@ TEST(XlaCompilationTest, RandomShapeWithFunc) {
   EXPECT_EQ(clusters["fn_call"], "");
 }
 
+TEST(XlaCompilationTest, RandomShapeOnXlaDevice) {
+  absl::string_view xla_gpu_device =
+      "/job:worker/replica:0/task:0/device:XLA_GPU:0";
+
+  Scope root = Scope::NewRootScope().ExitOnError();
+  Output shape_shape =
+      ops::Const(root.WithOpName("test/shape_shape"), {2}, {1});
+  Output shape =
+      ops::RandomUniformInt(root.WithOpName("test/shape_rng"), shape_shape,
+                            ops::Const(root.WithOpName("test/minval"), 1),
+                            ops::Const(root.WithOpName("test/maxval"), 20));
+  Output reshape_input =
+      ops::Placeholder(root.WithOpName("test/reshape_input"), DT_FLOAT,
+                       ops::Placeholder::Shape(TensorShape({500, 500})));
+  Output reshape =
+      ops::Reshape(root.WithOpName("test/reshape"), reshape_input, shape);
+
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  for (Node* n : graph->nodes()) {
+    if (absl::StartsWith(n->name(), /*prefix=*/"test/")) {
+      n->set_assigned_device_name(string(xla_gpu_device));
+    }
+  }
+  TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
+
+  std::unordered_map<string, string> clusters = GetClusters(*graph);
+  EXPECT_NE(clusters["test/shape_rng"], "");
+  EXPECT_NE(clusters["test/reshape"], "");
+  EXPECT_NE(clusters["test/shape_rng"], clusters["test/reshape"]);
+}
+
+TEST(XlaCompilationTest, TensorArrayShapeOnXlaDevice) {
+  absl::string_view xla_gpu_device =
+      "/job:worker/replica:0/task:0/device:XLA_GPU:0";
+  Scope root = Scope::NewRootScope().ExitOnError();
+  ops::TensorArray tensor_array(root.WithOpName("test/tensor_array"), 1,
+                                DT_INT32);
+  Output zero = ops::Const(root.WithOpName("test/zero"), 0);
+  ops::TensorArrayWrite tensor_array_write(
+      root.WithOpName("test/write"), tensor_array.handle, zero,
+      ops::Const(root.WithOpName("test/forty_two"), 42.0f), tensor_array.flow);
+  Output tensor_array_read =
+      ops::TensorArrayRead(root.WithOpName("test/read"), tensor_array.handle,
+                           zero, tensor_array_write.flow_out, DT_INT32);
+  Output reshape =
+      ops::Reshape(root.WithOpName("test/reshape"),
+                   ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT),
+                   tensor_array_read);
+
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  for (Node* n : graph->nodes()) {
+    if (absl::StartsWith(n->name(), /*prefix=*/"test/")) {
+      n->set_assigned_device_name(string(xla_gpu_device));
+    }
+  }
+  TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
+
+  std::unordered_map<string, string> clusters = GetClusters(*graph);
+  EXPECT_NE(clusters["test/read"], "");
+  EXPECT_EQ(clusters["test/read"], clusters["test/reshape"]);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc
index 65669877f7..d56d0f8ccf 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc
@@ -14,18 +14,35 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.h"
+#include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 /*static*/ Status MarkForCompilationPassTestHelper::MarkForCompilation(
     std::unique_ptr<Graph>* graph, FunctionLibraryDefinition* flib_def,
     SessionOptions* session_options) {
-  // Assign all nodes to the CPU device.
+  // Assign all unassigned nodes to the CPU device.
   static const char* kCpuDevice = "/job:localhost/replica:0/task:0/cpu:0";
   for (Node* n : (*graph)->nodes()) {
-    n->set_assigned_device_name(kCpuDevice);
+    if (n->assigned_device_name().empty()) {
+      n->set_assigned_device_name(kCpuDevice);
+    }
   }
 
+  // Call AddDevices to register the XLA devices.
+  //
+  // It may be worth refactoring out XlaOpRegistry::RegisterCompilationDevice to
+  // make this more direct, but probably not worth it solely for this test.
+  std::vector<Device*> devices;
+  TF_RETURN_IF_ERROR(DeviceFactory::AddDevices(*session_options, "", &devices));
+
+  auto delete_devices = gtl::MakeCleanup([&] {
+    for (Device* d : devices) {
+      delete d;
+    }
+  });
+
   GraphOptimizationPassOptions opt_options;
   opt_options.graph = graph;
   opt_options.session_options = session_options;
-- 
GitLab


From 073c418695ac9ef02071de3e08394e781ceca117 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Sep 2018 16:10:38 -0700
Subject: [PATCH 0344/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213536334
---
 .../internal/optimized/optimized_ops.h        |   2 -
 .../internal/reference/reference_ops.h        | 277 +++++++++++++-----
 .../contrib/lite/kernels/internal/types.h     |  14 +-
 3 files changed, 210 insertions(+), 83 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 2fa5d6445e..6f4e135c94 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -2210,7 +2210,6 @@ inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4);
 
   const int batch_size = input_shape.Dims(0);
   const int filter_width = filter_shape.Dims(2);
@@ -2376,7 +2375,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4);
 
   const uint8* gemm_input_data = nullptr;
   const RuntimeShape* gemm_input_shape = nullptr;
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 09a4ba7701..87bcc8c219 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -163,28 +163,38 @@ SaturatingRoundingMultiplyByPOTParam(
       SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
 }
 
-inline void Conv(const float* input_data, const Dims<4>& input_dims,
-                 const float* filter_data, const Dims<4>& filter_dims,
-                 const float* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 float output_activation_min, float output_activation_max,
-                 float* output_data, const Dims<4>& output_dims,
-                 float* im2col_data, const Dims<4>& im2col_dims) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& filter_shape,
+                 const float* filter_data, const RuntimeShape& bias_shape,
+                 const float* bias_data, const RuntimeShape& output_shape,
+                 float* output_data, const RuntimeShape& im2col_shape,
+                 float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
   (void)im2col_data;  // only used in optimized code.
-  (void)im2col_dims;  // only used in optimized code.
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0);
-  const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0);
+  (void)im2col_shape;  // only used in optimized code.
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
   if (bias_data) {
-    TFLITE_DCHECK_EQ(ArraySize(filter_dims, 3), ArraySize(bias_dims, 0));
-  }
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
+  }
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
   for (int batch = 0; batch < batches; ++batch) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
       for (int out_x = 0; out_x < output_width; ++out_x) {
@@ -202,11 +212,11 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
                 // use zero as a default value.
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
                     (in_y < input_height)) {
-                  float input_value = input_data[Offset(input_dims, in_channel,
-                                                        in_x, in_y, batch)];
+                  float input_value = input_data[Offset(
+                      input_shape, batch, in_y, in_x, in_channel)];
                   float filter_value =
-                      filter_data[Offset(filter_dims, in_channel, filter_x,
-                                         filter_y, out_channel)];
+                      filter_data[Offset(filter_shape, out_channel, filter_y,
+                                         filter_x, in_channel)];
                   total += (input_value * filter_value);
                 }
               }
@@ -214,9 +224,9 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
           }
           float bias_value = 0.0f;
           if (bias_data) {
-            bias_value = bias_data[Offset(bias_dims, out_channel, 0, 0, 0)];
+            bias_value = bias_data[out_channel];
           }
-          output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] =
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
               ActivationFunctionWithMinMax(total + bias_value,
                                            output_activation_min,
                                            output_activation_max);
@@ -226,6 +236,35 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Conv(const float* input_data, const Dims<4>& input_dims,
+                 const float* filter_data, const Dims<4>& filter_dims,
+                 const float* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 float output_activation_min, float output_activation_max,
+                 float* output_data, const Dims<4>& output_dims,
+                 float* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template <FusedActivationFunctionType Ac>
 void Conv(const float* input_data, const Dims<4>& input_dims,
           const float* filter_data, const Dims<4>& filter_dims,
@@ -243,6 +282,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
        im2col_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void Conv(const float* input_data, const Dims<4>& input_dims,
@@ -259,6 +299,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void Conv(const float* input_data, const Dims<4>& input_dims,
@@ -272,31 +313,45 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
            output_dims, im2col_data, im2col_dims);
 }
 
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 int32 output_offset, int32 output_multiplier, int output_shift,
-                 int32 output_activation_min, int32 output_activation_max,
-                 uint8* output_data, const Dims<4>& output_dims,
-                 uint8* im2col_data, const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const uint8* input_data, const RuntimeShape& filter_shape,
+                 const uint8* filter_data, const RuntimeShape& bias_shape,
+                 const int32* bias_data, const RuntimeShape& output_shape,
+                 uint8* output_data, const RuntimeShape& im2col_shape,
+                 uint8* im2col_data, gemmlowp::GemmContext* gemm_context) {
   (void)im2col_data;   // only used in optimized code.
-  (void)im2col_dims;   // only used in optimized code.
+  (void)im2col_shape;  // only used in optimized code.
   (void)gemm_context;  // only used in optimized code.
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0);
-  const int output_depth =
-      MatchingArraySize(filter_dims, 3, bias_dims, 0, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
+
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (bias_data) {
+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
+  }
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
   for (int batch = 0; batch < batches; ++batch) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
       for (int out_x = 0; out_x < output_width; ++out_x) {
@@ -314,11 +369,11 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
                 // use zero as a default value.
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
                     (in_y < input_height)) {
-                  int32 input_val = input_data[Offset(input_dims, in_channel,
-                                                      in_x, in_y, batch)];
+                  int32 input_val = input_data[Offset(input_shape, batch, in_y,
+                                                      in_x, in_channel)];
                   int32 filter_val =
-                      filter_data[Offset(filter_dims, in_channel, filter_x,
-                                         filter_y, out_channel)];
+                      filter_data[Offset(filter_shape, out_channel, filter_y,
+                                         filter_x, in_channel)];
                   acc +=
                       (filter_val + filter_offset) * (input_val + input_offset);
                 }
@@ -326,14 +381,14 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
             }
           }
           if (bias_data) {
-            acc += bias_data[Offset(bias_dims, out_channel, 0, 0, 0)];
+            acc += bias_data[out_channel];
           }
           acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
                                               kReverseShift * output_shift);
           acc += output_offset;
           acc = std::max(acc, output_activation_min);
           acc = std::min(acc, output_activation_max);
-          output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] =
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
               static_cast<uint8>(acc);
         }
       }
@@ -341,6 +396,43 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 int32 output_offset, int32 output_multiplier, int output_shift,
+                 int32 output_activation_min, int32 output_activation_max,
+                 uint8* output_data, const Dims<4>& output_dims,
+                 uint8* im2col_data, const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data, gemm_context);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
                  int32 input_offset, const uint8* filter_data,
                  const Dims<4>& filter_dims, int32 filter_offset,
@@ -359,6 +451,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
@@ -388,6 +481,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
 void Conv(const uint8* input_data, const Dims<4>& input_dims,
@@ -4661,21 +4755,30 @@ void Transpose(const T* input, const Dims<4>& input_dims, T* output,
             output);
 }
 
-inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, float* output_data,
-                          const Dims<4>& output_dims, float* /*im2col_data*/,
-                          const Dims<4>& /*im2col_dims*/) {
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0);
-  const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
+inline void TransposeConv(
+    const ConvParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& filter_shape,
+    const float* filter_data, const RuntimeShape& output_shape,
+    float* output_data, const RuntimeShape& im2col_shape, float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  (void)im2col_data;   // only used in optimized code.
+  (void)im2col_shape;  // only used in optimized code.
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
 
   // Although transpose convolution simplifies to convolution with transposed
   // weights for strides of 1, non-unitary striding complicates matters. To
@@ -4684,7 +4787,7 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
   // computing their influence on the output, rather than looping through the
   // output elements in the typical "gather" access pattern of a conv. We
   // therefore must initialize the output array to zero.
-  const int num_elements = FlatSize(output_dims);
+  const int num_elements = output_shape.FlatSize();
   for (int i = 0; i < num_elements; i++) {
     output_data[i] = 0.0f;
   }
@@ -4707,13 +4810,14 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
                 // We cannot accumulate out of bounds
                 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
                     (out_y < output_height)) {
-                  float input_value = input_data[Offset(input_dims, in_channel,
-                                                        in_x, in_y, batch)];
+                  float input_value = input_data[Offset(
+                      input_shape, batch, in_y, in_x, in_channel)];
                   float filter_value =
-                      filter_data[Offset(filter_dims, in_channel, filter_x,
-                                         filter_y, out_channel)];
-                  output_data[Offset(output_dims, out_channel, out_x, out_y,
-                                     batch)] += input_value * filter_value;
+                      filter_data[Offset(filter_shape, out_channel, filter_y,
+                                         filter_x, in_channel)];
+                  output_data[Offset(output_shape, batch, out_y, out_x,
+                                     out_channel)] +=
+                      input_value * filter_value;
                 }
               }
             }
@@ -4724,6 +4828,27 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, float* output_data,
+                          const Dims<4>& output_dims, float* im2col_data,
+                          const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(output_dims),
+                output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
 template <typename T>
 inline bool EqualFn(T lhs, T rhs) {
   return lhs == rhs;
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index ac4626bc30..b70a87d0dc 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -179,12 +179,15 @@ class RuntimeShape {
       dims_[i] = val;
     }
   }
+
   inline int32* DimsData() {
     return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
   }
   inline const int32* DimsData() const {
     return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
   }
+  // The caller must ensure that the shape is no bigger than 4-D.
+  inline const int32* DimsDataUpTo4D() const { return dims_; }
 
   inline void Resize(int dimensions_count) {
     if (size_ > kMaxSmallSize) {
@@ -346,11 +349,12 @@ inline size_t ReducedOutputOffset(const int num_dims, const int* dims,
 }
 
 inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
-  TFLITE_DCHECK(i0 >= 0 && i0 < shape.Dims(0));
-  TFLITE_DCHECK(i1 >= 0 && i1 < shape.Dims(1));
-  TFLITE_DCHECK(i2 >= 0 && i2 < shape.Dims(2));
-  TFLITE_DCHECK(i3 >= 0 && i3 < shape.Dims(3));
-  const int* dims_data = shape.DimsData();
+  TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
+  const int* dims_data = shape.DimsDataUpTo4D();
+  TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
+  TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
+  TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
+  TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
   return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
 }
 
-- 
GitLab


From 86b8f034e4d3d3d12d1e9d1b94170b271491bed3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Sep 2018 16:39:12 -0700
Subject: [PATCH 0345/1357] Reject RESHAPE if new_shape tensor is not provided.

PiperOrigin-RevId: 213541006
---
 tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc | 2 +-
 tensorflow/contrib/lite/nnapi_delegate.cc                 | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
index c6587b3d3f..d85e576284 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
@@ -518,7 +518,7 @@ class NNAPIDelegateKernel {
         }
         break;
       case kTfLiteBuiltinReshape:
-        if (version == 1) {
+        if (version == 1 && node->inputs->size == 2) {
           return [](const NNAPIOpMappingArgs& mapping_args)
                      -> ANeuralNetworksOperationType {
             return ANEURALNETWORKS_RESHAPE;
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index a1c7434599..f23a0ccb80 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -512,6 +512,10 @@ TfLiteStatus AddOpsAndParams(
         nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
         break;
       case tflite::BuiltinOperator_RESHAPE:
+        if (node.inputs->size != 2) {
+          logError("NNAPI only supports 2-input RESHAPE");
+          return kTfLiteError;
+        }
         nn_op_type = ANEURALNETWORKS_RESHAPE;
         // add_reshape_params(node.builtin_data);
         break;
-- 
GitLab


From b9e6bbc95bcffa481d29e31b448a03a91ba17eac Mon Sep 17 00:00:00 2001
From: Goutham Bhat <goutham@google.com>
Date: Tue, 18 Sep 2018 16:44:59 -0700
Subject: [PATCH 0346/1357] Return OrderedDict as eval results should be sorted
 by global_step key.

PiperOrigin-RevId: 213541935
---
 .../contrib/estimator/python/estimator/early_stopping.py      | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/early_stopping.py b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
index 3eab21d5ac..e6e25e319f 100644
--- a/tensorflow/contrib/estimator/python/estimator/early_stopping.py
+++ b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import operator
 import os
 
@@ -306,7 +307,8 @@ def read_eval_metrics(eval_dir):
         metrics[value.tag] = value.simple_value
     if metrics:
       eval_metrics_dict[event.step] = metrics
-  return eval_metrics_dict
+  return collections.OrderedDict(
+      sorted(eval_metrics_dict.items(), key=lambda t: t[0]))
 
 
 def _stop_if_threshold_crossed_hook(estimator, metric_name, threshold,
-- 
GitLab


From 93b5dea9663c00d3bb06348143b50b73b6fbacfb Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 18 Sep 2018 16:58:32 -0700
Subject: [PATCH 0347/1357] Add ConstantScalar, WithPredicate, Disjunction, and
 OpAnyOrder (where Op is a commutative binary operator) to the XLA pattern
 matcher.

PiperOrigin-RevId: 213543953
---
 tensorflow/compiler/xla/service/BUILD         |   3 +
 .../compiler/xla/service/pattern_matcher.h    | 143 +++++++++++++++++-
 .../xla/service/pattern_matcher_test.cc       |  84 ++++++++++
 3 files changed, 222 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index fb80c78f68..68bf56c1b1 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -365,8 +365,11 @@ cc_library(
     hdrs = ["pattern_matcher.h"],
     deps = [
         ":hlo",
+        ":hlo_casting_utils",
+        "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/utility",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index 4869db79e7..7d4d62ecb9 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -17,8 +17,12 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_SERVICE_PATTERN_MATCHER_H_
 
 #include "absl/strings/string_view.h"
+#include "absl/utility/utility.h"
 #include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 
@@ -228,8 +232,46 @@ class LayoutPattern {
   LayoutType** matched_layout_;
 };
 
+template <typename Item, typename... Patterns>
+class AnyOfPattern {
+ public:
+  explicit AnyOfPattern(const Patterns&... patterns) : patterns_(patterns...) {}
+
+  bool Match(const Item* item) const {
+    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  }
+
+  bool Match(Item* item) const {
+    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  }
+
+ private:
+  template <typename ItemType, size_t index>
+  bool MatchImpl(ItemType* item, std::integral_constant<size_t, index>) const {
+    return std::get<index>(patterns_).Match(item) ||
+           MatchImpl(item, std::integral_constant<size_t, index + 1>());
+  }
+
+  template <typename ItemType>
+  bool MatchImpl(ItemType* item,
+                 std::integral_constant<size_t, sizeof...(Patterns)>) const {
+    return false;
+  }
+
+  std::tuple<Patterns...> patterns_;
+};
 }  // namespace detail
 
+// Returns a pattern that represents the logical disjunction of the input
+// patterns. The returned pattern matches from left to right, and stops on the
+// first match.
+template <typename Item, typename... Patterns>
+detail::AnyOfPattern<typename std::remove_const<Item>::type, Patterns...> AnyOf(
+    const Patterns&... patterns) {
+  return detail::AnyOfPattern<typename std::remove_const<Item>::type,
+                              Patterns...>(patterns...);
+}
+
 // Creates a layout pattern that will capture the matched layout in the
 // argument.
 inline constexpr detail::LayoutPattern<const ::xla::Layout,
@@ -752,6 +794,27 @@ class HloInstructionPatternTupleIndexImpl {
   int64 tuple_index_;
 };
 
+template <typename Previous, typename ItemType, typename Predicate>
+class HloPredicatePatternImpl {
+ public:
+  explicit HloPredicatePatternImpl(const Previous& previous, Predicate pred)
+      : previous_(previous), pred_(std::move(pred)) {}
+
+  bool Match(const ItemType* item) const {
+    return previous_.Match(item) && pred_(item);
+  }
+
+  bool Match(ItemType* item) const {
+    return previous_.Match(item) && pred_(item);
+  }
+
+ private:
+  Previous previous_;
+  Predicate pred_;
+};
+
+struct PatternFriend;
+
 // A pattern that matches HloInstructions.
 template <typename HloInstructionType, typename Impl>
 class HloInstructionPattern {
@@ -879,6 +942,21 @@ class HloInstructionPattern {
   }
 
  private:
+  template <typename Predicate>
+  constexpr HloInstructionPattern<
+      HloInstructionType,
+      HloPredicatePatternImpl<
+          Impl, typename std::remove_const<HloInstructionType>::type,
+          Predicate>>
+  WithPredicate(Predicate pred) const {
+    using NewImplType = HloPredicatePatternImpl<
+        Impl, typename std::remove_const<HloInstructionType>::type, Predicate>;
+    return HloInstructionPattern<HloInstructionType, NewImplType>(
+        NewImplType(impl_, std::move(pred)), matched_inst_);
+  }
+
+  friend struct PatternFriend;
+
   Impl impl_;
   HloInstructionType** matched_inst_;
 };
@@ -1005,31 +1083,50 @@ XLA_UNOP_PATTERN(Transpose)
         .WithOperand(0, std::forward<Lhs>(lhs))                             \
         .WithOperand(1, std::forward<Rhs>(rhs));                            \
   }
-XLA_BINOP_PATTERN(Add)
+
+#define XLA_COMMUTATIVE_BINOP_PATTERN(NAME)                                 \
+  XLA_BINOP_PATTERN(NAME)                                                   \
+                                                                            \
+  template <typename Lhs, typename Rhs>                                     \
+  inline auto NAME##AnyOrder(Lhs&& lhs, Rhs&& rhs)                          \
+      ->decltype(AnyOf<HloInstruction>(NAME(lhs, rhs), NAME(rhs, lhs))) {   \
+    return AnyOf<HloInstruction>(NAME(lhs, rhs), NAME(rhs, lhs));           \
+  }                                                                         \
+                                                                            \
+  template <typename HloInstructionType, typename Lhs, typename Rhs>        \
+  inline auto NAME##AnyOrder(HloInstructionType** matched_inst, Lhs&& lhs,  \
+                             Rhs&& rhs)                                     \
+      ->decltype(AnyOf<HloInstructionType>(NAME(matched_inst, lhs, rhs),    \
+                                           NAME(matched_inst, rhs, lhs))) { \
+    return AnyOf<HloInstructionType>(NAME(matched_inst, lhs, rhs),          \
+                                     NAME(matched_inst, rhs, lhs));         \
+  }
+XLA_COMMUTATIVE_BINOP_PATTERN(Add)
 XLA_BINOP_PATTERN(Atan2)
 XLA_BINOP_PATTERN(Divide)
 XLA_BINOP_PATTERN(Complex)
 XLA_BINOP_PATTERN(Dot)
-XLA_BINOP_PATTERN(Eq)
+XLA_COMMUTATIVE_BINOP_PATTERN(Eq)
 XLA_BINOP_PATTERN(Gather)
 XLA_BINOP_PATTERN(Ge)
 XLA_BINOP_PATTERN(Gt)
 XLA_BINOP_PATTERN(Le)
 XLA_BINOP_PATTERN(Lt)
-XLA_BINOP_PATTERN(Maximum)
-XLA_BINOP_PATTERN(Minimum)
-XLA_BINOP_PATTERN(Multiply)
-XLA_BINOP_PATTERN(Ne)
+XLA_COMMUTATIVE_BINOP_PATTERN(Maximum)
+XLA_COMMUTATIVE_BINOP_PATTERN(Minimum)
+XLA_COMMUTATIVE_BINOP_PATTERN(Multiply)
+XLA_COMMUTATIVE_BINOP_PATTERN(Ne)
 XLA_BINOP_PATTERN(Outfeed)
 XLA_BINOP_PATTERN(Power)
 XLA_BINOP_PATTERN(Remainder)
 XLA_BINOP_PATTERN(Send)
 XLA_BINOP_PATTERN(Subtract)
-XLA_BINOP_PATTERN(And)
-XLA_BINOP_PATTERN(Or)
+XLA_COMMUTATIVE_BINOP_PATTERN(And)
+XLA_COMMUTATIVE_BINOP_PATTERN(Or)
 XLA_BINOP_PATTERN(ShiftLeft)
 XLA_BINOP_PATTERN(ShiftRightArithmetic)
 XLA_BINOP_PATTERN(ShiftRightLogical)
+#undef XLA_COMMUTATIVE_BINOP_PATTERN
 #undef XLA_BINOP_PATTERN
 
 // Helpers for ternary instructions.
@@ -1070,6 +1167,30 @@ XLA_TERNOP_PATTERN(Clamp);
 XLA_TERNOP_PATTERN(Select);
 #undef XLA_TERNOP_PATTERN
 
+namespace detail {
+struct PatternFriend {
+  template <typename T>
+  static auto ConstantScalar(T constant) -> decltype(
+      Constant()
+          .WithShape(match::Shape().IsScalar())
+          .WithPredicate(
+              std::declval<std::function<bool(const HloInstruction*)>>())) {
+    std::function<bool(const HloInstruction*)> pred =
+        [constant](const HloInstruction* instr) {
+          const auto& literal = Cast<HloConstantInstruction>(instr)->literal();
+          auto status_or_const = LiteralUtil::CreateR0(constant).Convert(
+              literal.shape().element_type());
+          return status_or_const.ok() &&
+                 literal == status_or_const.ConsumeValueOrDie();
+        };
+
+    return Constant()
+        .WithShape(match::Shape().IsScalar())
+        .WithPredicate(std::move(pred));
+  }
+};
+}  // namespace detail
+
 // Helpers for matching non-constant instructions.
 inline auto NonConstant() -> decltype(Op().IsNonConstant()) {
   return Op().IsNonConstant();
@@ -1107,6 +1228,12 @@ inline auto GetTupleElement(HloInstructionType** matched_inst, Arg&& arg,
       .WithTupleIndex(tuple_index);
 }
 
+template <typename T>
+inline auto ConstantScalar(T constant)
+    -> decltype(detail::PatternFriend::ConstantScalar(constant)) {
+  return detail::PatternFriend::ConstantScalar(constant);
+}
+
 }  // namespace match
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
index a530581c34..b3a2c954b3 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
@@ -211,5 +211,89 @@ TEST(PatternMatcherTest, GetTupleElement) {
   EXPECT_TRUE(Match(root, match::GetTupleElement(match::Op(), 1)));
 }
 
+TEST(PatternMatcherTest, AnyOf) {
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module ENTRY test { ROOT constant = f16[] constant(1) })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  EXPECT_TRUE(
+      Match(root, match::AnyOf<HloInstruction>(match::ConstantScalar(0),
+                                               match::ConstantScalar(1))));
+  EXPECT_TRUE(
+      Match(root, match::AnyOf<HloInstruction>(match::ConstantScalar(1),
+                                               match::ConstantScalar(0))));
+  EXPECT_FALSE(
+      Match(root, match::AnyOf<HloInstruction>(match::ConstantScalar(0),
+                                               match::ConstantScalar(2))));
+}
+
+TEST(PatternMatcherTest, ConstantScalar) {
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module ENTRY test { ROOT constant = f16[] constant(42) })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  EXPECT_TRUE(Match(root, match::ConstantScalar(42)));
+  EXPECT_FALSE(Match(root, match::ConstantScalar(41)));
+  EXPECT_FALSE(Match(root, match::ConstantScalar(0)));
+}
+
+TEST(PatternMatcherTest, MultiplyAnyOrder) {
+  using match::ConstantScalar;
+  using match::MultiplyAnyOrder;
+
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module
+    ENTRY test {
+      lhs = f16[] constant(42)
+      rhs = f16[] constant(52)
+      ROOT multiply = f16[] multiply(lhs, rhs)
+    })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+  const HloInstruction* instr;
+
+  EXPECT_TRUE(Match(
+      root, MultiplyAnyOrder(&instr, ConstantScalar(42), ConstantScalar(52))));
+  EXPECT_TRUE(Match(
+      root, MultiplyAnyOrder(&instr, ConstantScalar(52), ConstantScalar(42))));
+}
+
+TEST(PatternMatcherTest, AnyOfShortCircuit) {
+  using match::AnyOf;
+  using match::Multiply;
+  using match::Op;
+
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module
+    ENTRY test {
+      lhs = f16[] constant(42)
+      rhs = f16[] constant(52)
+      ROOT multiply = f16[] multiply(lhs, rhs)
+    })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  {
+    const HloInstruction* mul = nullptr;
+    const HloInstruction* any = nullptr;
+
+    ASSERT_TRUE(Match(
+        root, AnyOf<HloInstruction>(Multiply(&mul, Op(), Op()), Op(&any))));
+    EXPECT_NE(nullptr, mul);
+    EXPECT_EQ(nullptr, any);
+  }
+  {
+    const HloInstruction* mul = nullptr;
+    const HloInstruction* any = nullptr;
+
+    ASSERT_TRUE(Match(
+        root, AnyOf<HloInstruction>(Op(&any), Multiply(&mul, Op(), Op()))));
+    EXPECT_NE(nullptr, any);
+    EXPECT_EQ(nullptr, mul);
+  }
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 867449616aa43f9306247cebdd1edac85b70852a Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Tue, 18 Sep 2018 17:22:53 -0700
Subject: [PATCH 0348/1357] Convert the new metric instances to (value_op,
 update_op) tuple in the EstimatorSpec.

PiperOrigin-RevId: 213548081
---
 tensorflow/python/estimator/estimator.py | 14 ++------------
 tensorflow/python/estimator/model_fn.py  |  2 ++
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index ffe1e30da0..2dc5d099a0 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -41,7 +41,6 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras import metrics
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import metrics as metrics_lib
@@ -1792,18 +1791,9 @@ def _extract_metric_update_ops(eval_dict, distribution=None):
   value_ops = {}
   # Sort metrics lexicographically so graph is identical every time.
   for name, value in sorted(six.iteritems(eval_dict)):
-    if isinstance(value, metrics.Metric):
-      metric_result = value.result()
-      # We expect only one update op for every metric when there is no
-      # distribution strategy.
-      metric_update = value.updates if distribution else value.updates[0]
-    else:
-      metric_result = value[0]
-      metric_update = value[1]
-
-    value_ops[name] = metric_result
+    value_ops[name] = value[0]
     update_ops.append(
-        distribution.group(metric_update) if distribution else metric_update)
+        distribution.group(value[1]) if distribution else value[1])
 
   update_op = control_flow_ops.group(*update_ops) if update_ops else None
   return update_op, value_ops
diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 0f26a5bba4..824789467d 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -308,6 +308,8 @@ class EstimatorSpec(
     for key, value in six.iteritems(eval_metric_ops):
       if isinstance(value, Metric):
         vars_to_add.update(value.variables)
+        # Convert Metric instances to (value_tensor, update_op) tuple.
+        eval_metric_ops[key] = (value.result(), value.updates[0])
     # Remove variables that are in the local variables collection already.
     vars_to_add = vars_to_add.difference(local_vars)
     for v in vars_to_add:
-- 
GitLab


From 38d8f893e0ab8376cf97c40fde78002f31776c92 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 18 Sep 2018 17:34:53 -0700
Subject: [PATCH 0349/1357] Add a new function to load kernel libraries and
 library folders.

PiperOrigin-RevId: 213549838
---
 tensorflow/python/framework/load_library.py   | 65 +++++++++++++++++++
 .../tools/api/golden/v1/tensorflow.pbtxt      |  4 ++
 .../tools/api/golden/v2/tensorflow.pbtxt      |  4 ++
 3 files changed, 73 insertions(+)

diff --git a/tensorflow/python/framework/load_library.py b/tensorflow/python/framework/load_library.py
index 535c6017f5..908a5f521e 100644
--- a/tensorflow/python/framework/load_library.py
+++ b/tensorflow/python/framework/load_library.py
@@ -18,14 +18,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import errno
 import hashlib
 import imp
+import os
+import platform
 import sys
 import threading  # pylint: disable=unused-import
 
 from tensorflow.core.framework import op_def_pb2
 from tensorflow.core.lib.core import error_codes_pb2  # pylint: disable=unused-import
 from tensorflow.python import pywrap_tensorflow as py_tf
+from tensorflow.python.lib.io import file_io
 from tensorflow.python.util import compat
 from tensorflow.python.util.tf_export import tf_export
 
@@ -98,3 +102,64 @@ def load_file_system_library(library_filename):
     RuntimeError: when unable to load the library.
   """
   py_tf.TF_LoadLibrary(library_filename)
+
+
+def _is_shared_object(filename):
+  """Check the file to see if it is a shared object, only using extension."""
+  if platform.system() == 'Linux':
+    if filename.endswith('.so'):
+      return True
+    else:
+      index = filename.rfind('.so.')
+      if index == -1:
+        return False
+      else:
+        # A shared object with the API version in filename
+        return filename[index + 4].isdecimal()
+  elif platform.system() == 'Darwin':
+    return filename.endswith('.dylib')
+  elif platform.system() == 'Windows':
+    return filename.endswith('.dll')
+  else:
+    return False
+
+
+@tf_export('load_library')
+def load_library(library_location):
+  """Loads a TensorFlow plugin.
+
+  "library_location" can be a path to a specific shared object, or a folder.
+  If it is a folder, all sahred objects that are named "libtfkernel*" will be
+  loaded. When the library is loaded, kernels registered in the library via the
+  `REGISTER_*` macros are made available in the TensorFlow process.
+
+  Args:
+    library_location: Path to the plugin or the folder of plugins.
+      Relative or absolute filesystem path to a dynamic library file or folder.
+
+  Returns:
+    None
+
+  Raises:
+    OSError: When the file to be loaded is not found.
+    RuntimeError: when unable to load the library.
+  """
+  if file_io.file_exists(library_location):
+    if file_io.is_directory(library_location):
+      directory_contents = file_io.list_directory(library_location)
+
+      kernel_libraries = [
+          os.path.join(library_location, f) for f in directory_contents
+          if _is_shared_object(f)]
+    else:
+      kernel_libraries = [library_location]
+
+    for lib in kernel_libraries:
+      py_tf.TF_LoadLibrary(lib)
+
+  else:
+    raise OSError(
+        errno.ENOENT,
+        'The file or folder to load kernel libraries from does not exist.',
+        library_location)
+
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index dd9f7c49e0..14ab885c91 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1372,6 +1372,10 @@ tf_module {
     name: "load_file_system_library"
     argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "load_library"
+    argspec: "args=[\'library_location\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "load_op_library"
     argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 9332e16bf6..323d2fc519 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1320,6 +1320,10 @@ tf_module {
     name: "load_file_system_library"
     argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "load_library"
+    argspec: "args=[\'library_location\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "load_op_library"
     argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None"
-- 
GitLab


From dff19b5a8b36ddf4aa51ce978d97b63129a7fdeb Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 18 Sep 2018 17:50:43 -0700
Subject: [PATCH 0350/1357] Add layout information to logging.

PiperOrigin-RevId: 213551652
---
 .../compiler/xla/service/gpu/cudnn_convolution_runner.cc    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
index 2a86ac265e..3310ee848e 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
@@ -92,9 +92,9 @@ Status RunCudnnConvolutionImpl(CudnnConvParams params,
   VLOG(3) << "tensor_ops_enabled: "
           << algorithm.algorithm().tensor_ops_enabled();
   VLOG(3) << "Convolution kind: " << CudnnConvKindToString(kind);
-  VLOG(3) << "input shape: { " << ShapeUtil::HumanString(input_shape) << " }";
-  VLOG(3) << "filter shape: { " << ShapeUtil::HumanString(filter_shape) << " }";
-  VLOG(3) << "Output shape: { " << ShapeUtil::HumanString(output_shape) << " }";
+  VLOG(3) << "input shape: " << ShapeUtil::HumanStringWithLayout(input_shape);
+  VLOG(3) << "filter shape: " << ShapeUtil::HumanStringWithLayout(filter_shape);
+  VLOG(3) << "Output shape: " << ShapeUtil::HumanStringWithLayout(output_shape);
   VLOG(3) << "Window: { " << window.ShortDebugString() << " }";
   VLOG(3) << "Dim nums: { " << dnums.ShortDebugString() << " }";
 
-- 
GitLab


From c2dc702159cfccb623b99daf2f9df875a1f3dbfd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Sep 2018 17:56:20 -0700
Subject: [PATCH 0351/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 213552354

---
 tensorflow/go/op/wrappers.go | 986 +++++++++++++++++++++++++++++++----
 1 file changed, 892 insertions(+), 94 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 322b35dd91..eb636dbf54 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -332,7 +332,7 @@ func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQua
 // Creates a new tensor by applying sparse `updates` to individual values or
 // slices within a tensor (initially zero for numeric, empty for string) of
 // the given `shape` according to indices.  This operator is the inverse of the
-// @{tf.gather_nd} operator which extracts values or slices from a given tensor.
+// `tf.gather_nd` operator which extracts values or slices from a given tensor.
 //
 // If `indices` contains duplicates, then their updates are accumulated (summed).
 //
@@ -1473,7 +1473,7 @@ type StridedSliceAttr func(optionalAttr)
 //
 // value: a bitmask where a bit i being 1 means to ignore the begin
 // value and instead use the largest interval possible. At runtime
-// begin[i] will be replaced with `[0, n-1) if `stride[i] > 0` or
+// begin[i] will be replaced with `[0, n-1)` if `stride[i] > 0` or
 // `[-1, n-1]` if `stride[i] < 0`
 // If not specified, defaults to 0
 func StridedSliceBeginMask(value int64) StridedSliceAttr {
@@ -1856,6 +1856,32 @@ func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_d
 	return op.Output(0)
 }
 
+// Ensures that the tensor's shape matches the expected shape.
+//
+// Raises an error if the input tensor's shape does not match the specified shape.
+// Returns the input tensor otherwise.
+//
+// Arguments:
+//	input: A tensor, whose shape is to be validated.
+//	shape: The expected (possibly partially specified) shape of the input tensor.
+//
+// Returns A tensor with the same shape and contents as the input tensor or value.
+func EnsureShape(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape": shape}
+	opspec := tf.OpSpec{
+		Type: "EnsureShape",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // UniqueWithCountsV2Attr is an optional argument to UniqueWithCountsV2.
 type UniqueWithCountsV2Attr func(optionalAttr)
 
@@ -2259,7 +2285,7 @@ func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Ou
 //
 //     output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]]
 //
-// Whereas in @{tf.gather} `indices` defines slices into the first
+// Whereas in `tf.gather` `indices` defines slices into the first
 // dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
 // first `N` dimensions of `params`, where `N = indices.shape[-1]`.
 //
@@ -2356,6 +2382,8 @@ func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Ou
 //     output = [['b0', 'b1'], ['d0', 'c1']]
 // ```
 //
+// See also `tf.gather` and `tf.batch_gather`.
+//
 // Arguments:
 //	params: The tensor from which to gather values.
 //	indices: Index tensor.
@@ -2445,6 +2473,16 @@ func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...Gathe
 //                      [9, 9, 9]]
 // ```
 //
+// `tf.fill` differs from `tf.constant` in a few ways:
+//
+// *   `tf.fill` only supports scalar contents, whereas `tf.constant` supports
+//     Tensor values.
+// *   `tf.fill` creates an Op in the computation graph that constructs the actual
+//     Tensor value at runtime. This is in contrast to `tf.constant` which embeds
+//     the entire Tensor into the graph with a `Const` node.
+// *   Because `tf.fill` evaluates at graph runtime, it supports dynamic shapes
+//     based on other runtime Tensors, unlike `tf.constant`.
+//
 // Arguments:
 //	dims: 1-D. Represents the shape of the output tensor.
 //	value: 0-D (scalar). Value to fill the returned tensor.
@@ -2858,6 +2896,25 @@ func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
+// Returns a constant tensor on the host. Only for writing C++ tests.
+//
+// Arguments:
+//	value: Attr `value` is the tensor to return.
+//
+func HostConst(scope *Scope, value tf.Tensor, dtype tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"value": value, "dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "HostConst",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Splits a tensor into `num_split` tensors along one dimension.
 //
 // Arguments:
@@ -3377,6 +3434,204 @@ func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
+// Bucketize each feature based on bucket boundaries.
+//
+// An op that returns a list of float tensors, where each tensor represents the
+// bucketized values for a single feature.
+//
+// Arguments:
+//	float_values: float; List of Rank 2 Tensor each containing float values for a single feature.
+//	bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a single
+// feature.
+//
+// Returns int; List of Rank 2 Tensors each containing the bucketized values for a single feature.
+func BoostedTreesBucketize(scope *Scope, float_values []tf.Output, bucket_boundaries []tf.Output) (buckets []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesBucketize",
+		Input: []tf.Input{
+			tf.OutputList(float_values), tf.OutputList(bucket_boundaries),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if buckets, idx, err = makeOutputList(op, idx, "buckets"); err != nil {
+		scope.UpdateErr("BoostedTreesBucketize", err)
+		return
+	}
+	return buckets
+}
+
+// BoostedTreesQuantileStreamResourceFlushAttr is an optional argument to BoostedTreesQuantileStreamResourceFlush.
+type BoostedTreesQuantileStreamResourceFlushAttr func(optionalAttr)
+
+// BoostedTreesQuantileStreamResourceFlushGenerateQuantiles sets the optional generate_quantiles attribute to value.
+//
+// value: bool; If True, the output will be the num_quantiles for each stream where the ith
+// entry is the ith quantile of the input with an approximation error of epsilon.
+// Duplicate values may be present.
+// If False, the output will be the points in the histogram that we got which roughly
+// translates to 1/epsilon boundaries and without any duplicates.
+// Default to False.
+// If not specified, defaults to false
+func BoostedTreesQuantileStreamResourceFlushGenerateQuantiles(value bool) BoostedTreesQuantileStreamResourceFlushAttr {
+	return func(m optionalAttr) {
+		m["generate_quantiles"] = value
+	}
+}
+
+// Flush the summaries for a quantile stream resource.
+//
+// An op that flushes the summaries for a quantile stream resource.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//	num_buckets: int; approximate number of buckets unless using generate_quantiles.
+//
+// Returns the created operation.
+func BoostedTreesQuantileStreamResourceFlush(scope *Scope, quantile_stream_resource_handle tf.Output, num_buckets tf.Output, optional ...BoostedTreesQuantileStreamResourceFlushAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceFlush",
+		Input: []tf.Input{
+			quantile_stream_resource_handle, num_buckets,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Add the quantile summaries to each quantile stream resource.
+//
+// An op that adds a list of quantile summaries to a quantile stream resource. Each
+// summary Tensor is rank 2, containing summaries (value, weight, min_rank, max_rank)
+// for a single feature.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//	summaries: string; List of Rank 2 Tensor each containing the summaries for a single feature.
+//
+// Returns the created operation.
+func BoostedTreesQuantileStreamResourceAddSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, summaries []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceAddSummaries",
+		Input: []tf.Input{
+			quantile_stream_resource_handle, tf.OutputList(summaries),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Makes the summary of quantiles for the batch.
+//
+// An op that takes a list of tensors and outputs the quantile summaries for each tensor.
+//
+// Arguments:
+//	float_values: float; List of Rank 2 Tensors each containing values for a single feature.
+//	example_weights: float; Rank 1 Tensor with weights per instance.
+//	epsilon: float; The required maximum approximation error.
+//
+// Returns float; List of Rank 2 Tensors each containing the quantile summary (value, weight,
+// min_rank, max_rank) of a single feature.
+func BoostedTreesMakeQuantileSummaries(scope *Scope, float_values []tf.Output, example_weights tf.Output, epsilon tf.Output) (summaries []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesMakeQuantileSummaries",
+		Input: []tf.Input{
+			tf.OutputList(float_values), example_weights, epsilon,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if summaries, idx, err = makeOutputList(op, idx, "summaries"); err != nil {
+		scope.UpdateErr("BoostedTreesMakeQuantileSummaries", err)
+		return
+	}
+	return summaries
+}
+
+// BoostedTreesCreateQuantileStreamResourceAttr is an optional argument to BoostedTreesCreateQuantileStreamResource.
+type BoostedTreesCreateQuantileStreamResourceAttr func(optionalAttr)
+
+// BoostedTreesCreateQuantileStreamResourceMaxElements sets the optional max_elements attribute to value.
+//
+// value: int; The maximum number of data points that can be fed to the stream.
+// If not specified, defaults to 1099511627776
+func BoostedTreesCreateQuantileStreamResourceMaxElements(value int64) BoostedTreesCreateQuantileStreamResourceAttr {
+	return func(m optionalAttr) {
+		m["max_elements"] = value
+	}
+}
+
+// Create the Resource for Quantile Streams.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource; Handle to quantile stream resource.
+//	epsilon: float; The required approximation error of the stream resource.
+//	num_streams: int; The number of streams managed by the resource that shares the same epsilon.
+//
+// Returns the created operation.
+func BoostedTreesCreateQuantileStreamResource(scope *Scope, quantile_stream_resource_handle tf.Output, epsilon tf.Output, num_streams tf.Output, optional ...BoostedTreesCreateQuantileStreamResourceAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCreateQuantileStreamResource",
+		Input: []tf.Input{
+			quantile_stream_resource_handle, epsilon, num_streams,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Checks whether a quantile stream has been initialized.
+//
+// An Op that checks if quantile stream resource is initialized.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource; The reference to quantile stream resource handle.
+//
+// Returns bool; True if the resource is initialized, False otherwise.
+func IsBoostedTreesQuantileStreamResourceInitialized(scope *Scope, quantile_stream_resource_handle tf.Output) (is_initialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsBoostedTreesQuantileStreamResourceInitialized",
+		Input: []tf.Input{
+			quantile_stream_resource_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Calculates the prior from the training data (the bias) and fills in the first node with the logits' prior. Returns a boolean indicating whether to continue centering.
 //
 // Arguments:
@@ -3491,8 +3746,9 @@ func BoostedTreesExampleDebugOutputs(scope *Scope, tree_ensemble_handle tf.Outpu
 // Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
 // misisng, the `output` tensor at that position will be zeroed.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // For example:
 //
@@ -3601,8 +3857,9 @@ func Asin(scope *Scope, x tf.Output) (y tf.Output) {
 
 // Computes the sum along sparse segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
 // dimension, selecting a subset of dimension 0, specified by `indices`.
@@ -3668,28 +3925,32 @@ func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
 
 // Computes the minimum along segments of a tensor.
 //
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#segmentation)
+// for an explanation of segments.
 //
 // This operator is similar to the unsorted segment sum operator found
 // [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
 // Instead of computing the sum over segments, it computes the minimum such that:
 //
-// \\(output_i = \min_j data_j\\) where min is over `j` such
-// that `segment_ids[j] == i`.
+// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such
+// that `segment_ids[j...] == i`.
 //
 // If the minimum is empty for a given segment ID `i`, it outputs the largest
 // possible value for the specific numeric type,
 // `output[i] = numeric_limits<T>::max()`.
 //
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
 //
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `num_segments`.
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
 func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -3721,11 +3982,12 @@ func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
 
 // Computes the sum along segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Computes a tensor such that
-// \\(output[i] = sum_{j...} data[j...]\\) where the sum is over tuples `j...` such
+// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such
 // that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
 // need not be sorted and need not cover all values in the full
 // range of valid values.
@@ -4353,32 +4615,39 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme
 
 // Computes the maximum along segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // This operator is similar to the unsorted segment sum operator found
 // [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
 // Instead of computing the sum over segments, it computes the maximum such that:
 //
-// \\(output_i = \max_j data_j\\) where max is over `j` such
-// that `segment_ids[j] == i`.
+// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such
+// that `segment_ids[j...] == i`.
 //
 // If the maximum is empty for a given segment ID `i`, it outputs the smallest
 // possible value for the specific numeric type,
 // `output[i] = numeric_limits<T>::lowest()`.
 //
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
 // <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
 // <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
 // </div>
 //
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.
-//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.END
+//   }
+//   out_arg {
+//     name: "output"
+//     description: <<END
+// Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `num_segments`.
 func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -5225,6 +5494,47 @@ func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
+// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
+//
+// Arguments:
+//
+//
+//	dense_defaults: A dict mapping string keys to `Tensor`s.
+// The keys of the dict must match the dense_keys of the feature.
+//	sparse_keys: A list of string keys in the examples features.
+// The results for these keys will be returned as `SparseTensor` objects.
+//	dense_keys: A list of Ndense string Tensors (scalars).
+// The keys expected in the Examples features associated with dense values.
+//	sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
+// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
+// and `tf.string` (`BytesList`) are supported.
+//	dense_shapes: List of tuples with the same length as `dense_keys`.
+// The shape of the data for each dense feature referenced by `dense_keys`.
+// Required for any input tensors identified by `dense_keys`.  Must be
+// either fully defined, or may contain an unknown first dimension.
+// An unknown first dimension means the feature is treated as having
+// a variable number of blocks, and the output shape along this dimension
+// is considered unknown at graph build time.  Padding is applied for
+// minibatch elements smaller than the maximum number of blocks for the
+// given feature along this dimension.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+func ParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ParseExampleDataset",
+		Input: []tf.Input{
+			input_dataset, num_parallel_calls, tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Returns a batched matrix tensor with new batched diagonal values.
 //
 // Given `input` and `diagonal`, this operation returns a tensor with the
@@ -6476,7 +6786,7 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset
 	return offset
 }
 
-// Compute the lower regularized incomplete Gamma function `Q(a, x)`.
+// Compute the lower regularized incomplete Gamma function `P(a, x)`.
 //
 // The lower regularized incomplete Gamma function is defined as:
 //
@@ -7886,14 +8196,190 @@ func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataTyp
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueDequeueV2",
+		Input: []tf.Input{
+			handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueV2", err)
+		return
+	}
+	return components
+}
+
+// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample.
+type ParseSequenceExampleAttr func(optionalAttr)
+
+// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Ncontext_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Ncontext_dense"] = value
+	}
+}
+
+// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_dense"] = value
+	}
+}
+
+// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value.
+//
+// value: A list of Ncontext_sparse types; the data types of data in
+// each context Feature given in context_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["context_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_types"] = value
+	}
+}
+
+// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value.
+//
+// value: A list of Ncontext_dense shapes; the shapes of data in
+// each context Feature given in context_dense_keys.
+// The number of elements in the Feature corresponding to context_dense_key[j]
+// must always equal context_dense_shapes[j].NumEntries().
+// The shape of context_dense_values[j] will match context_dense_shapes[j].
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["context_dense_shapes"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
+//
+// value: A list of Nfeature_list_sparse types; the data types
+// of data in each FeatureList given in feature_list_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
+//
+// value: A list of Nfeature_list_dense shapes; the shapes of
+// data in each FeatureList given in feature_list_dense_keys.
+// The shape of each Feature in the FeatureList corresponding to
+// feature_list_dense_key[j] must always equal
+// feature_list_dense_shapes[j].NumEntries().
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_shapes"] = value
+	}
+}
+
+// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors.
+//
+// Arguments:
+//	serialized: A vector containing binary serialized SequenceExample protos.
+//	debug_name: A vector containing the names of the serialized protos.
+// May contain, for example, table key (descriptive) name for the
+// corresponding serialized proto.  This is purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no name is available.
+//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
+// context_dense_defaults[j] provides default values
+// when the SequenceExample's context map lacks context_dense_key[j].
+// If an empty Tensor is provided for context_dense_defaults[j],
+// then the Feature context_dense_keys[j] is required.
+// The input type is inferred from context_dense_defaults[j], even when it's
+// empty.  If context_dense_defaults[j] is not empty, its shape must match
+// context_dense_shapes[j].
+//	feature_list_dense_missing_assumed_empty: A vector listing the
+// FeatureList keys which may be missing from the SequenceExamples.  If the
+// associated FeatureList is missing, it is treated as empty.  By default,
+// any FeatureList not listed in this vector must exist in the SequenceExamples.
+//	context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
+// The keys expected in the Examples' features associated with context_sparse
+// values.
+//	context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' context features associated with
+// dense values.
+//	feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
+// (scalars).  The keys expected in the FeatureLists associated with sparse
+// values.
+//	feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' feature_lists associated
+// with lists of dense values.
+func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueDequeueV2",
+		Type: "ParseSequenceExample",
 		Input: []tf.Input{
-			handle,
+			serialized, debug_name, tf.OutputList(context_dense_defaults),
 		},
 		Attrs: attrs,
 	}
@@ -7903,11 +8389,43 @@ func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataTyp
 	}
 	var idx int
 	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueV2", err)
+	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
 		return
 	}
-	return components
+	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths
 }
 
 // Computes the Gauss error function of `x` element-wise.
@@ -9188,6 +9706,49 @@ func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64
 	return op.Output(0)
 }
 
+// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace.
+type StaticRegexReplaceAttr func(optionalAttr)
+
+// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value.
+//
+// value: If True, the replacement is global, otherwise the replacement
+// is done only on the first match.
+// If not specified, defaults to true
+func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr {
+	return func(m optionalAttr) {
+		m["replace_global"] = value
+	}
+}
+
+// Replaces the match of pattern in input with rewrite.
+//
+// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: The text to be processed.
+//	pattern: The regular expression to match the input.
+//	rewrite: The rewrite to be applied to the matched expresion.
+//
+// Returns The text after applying pattern and rewrite.
+func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StaticRegexReplace",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes gradients for the exponential linear (Elu) operation.
 //
 // Arguments:
@@ -10054,7 +10615,7 @@ func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
 //
 //     [1, 12, 3, 14, 14, 6, 7, 20]
 //
-// See @{tf.scatter_nd} for more details about how to make updates to
+// See `tf.scatter_nd` for more details about how to make updates to
 // slices.
 //
 // Arguments:
@@ -11397,6 +11958,29 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu
 	return op.Output(0), op.Output(1)
 }
 
+// String lengths of `input`.
+//
+// Computes the length of each string given in the input tensor.
+//
+// Arguments:
+//	input: The string for which to compute the length.
+//
+// Returns Integer tensor that has the same shape as `input`. The output contains the
+// element-wise string lengths of `input`.
+func StringLength(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StringLength",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Converts each string in the input Tensor to its hash mod by a number of buckets.
 //
 // The hash function is deterministic on the content of the string within the
@@ -11747,7 +12331,7 @@ func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
 //
 //     [1, 11, 3, 10, 9, 6, 7, 12]
 //
-// See @{tf.scatter_nd} for more details about how to make updates to
+// See `tf.scatter_nd` for more details about how to make updates to
 // slices.
 //
 // Arguments:
@@ -12232,8 +12816,9 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 
 // Computes the mean along segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Computes a tensor such that
 // \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
@@ -12248,7 +12833,7 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 //
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
 // first dimension.  Values should be sorted and can be repeated.
 //
 // Returns Has same shape as data, except for dimension 0 which
@@ -12367,7 +12952,7 @@ func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, o
 //
 // Arguments:
 //	input: A string tensor of the text to be processed.
-//	pattern: A 1-D string tensor of the regular expression to match the input.
+//	pattern: A scalar string tensor containing the regular expression to match the input.
 //
 // Returns A bool tensor with the same shape as `input`.
 func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) {
@@ -14443,6 +15028,25 @@ func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf
 	return scope.AddOperation(opspec)
 }
 
+// Returns 0 if the denominator is zero.
+//
+//
+// *NOTE*: `DivNoNan` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func DivNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DivNoNan",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the gradient for the sqrt of `x` wrt its input.
 //
 // Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
@@ -15350,6 +15954,36 @@ func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Out
 	return op.Output(0)
 }
 
+// Check if the input matches the regex pattern.
+//
+// The input is a string tensor of any shape. The pattern is the
+// regular expression to be matched with every element of the input tensor.
+// The boolean values (True or False) of the output tensor indicate
+// if the input matches the regex pattern provided.
+//
+// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: A string tensor of the text to be processed.
+//	pattern: The regular expression to match the input.
+//
+// Returns A bool tensor with the same shape as `input`.
+func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pattern": pattern}
+	opspec := tf.OpSpec{
+		Type: "StaticRegexFullMatch",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent.
 type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr)
 
@@ -15947,6 +16581,23 @@ func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator t
 	return scope.AddOperation(opspec)
 }
 
+// Creates a dataset containing elements of first component of `input_dataset` having true in the last component.
+func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "FilterByLastComponentDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams.
 type CudnnRNNCanonicalToParamsAttr func(optionalAttr)
 
@@ -16806,7 +17457,8 @@ func DecodeCSVSelectCols(value []int64) DecodeCSVAttr {
 //	records: Each string is a record/row in the csv and all records should have
 // the same format.
 //	record_defaults: One tensor per column of the input record, with either a
-// scalar default value for that column or empty if the column is required.
+// scalar default value for that column or an empty vector if the column is
+// required.
 //
 // Returns Each tensor will have the same shape as records.
 func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
@@ -17573,8 +18225,9 @@ func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_
 
 // Computes the sum along segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Computes a tensor such that
 // \\(output_i = \sum_j data_j\\) where sum is over `j` such
@@ -17588,7 +18241,7 @@ func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_
 //
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
 // first dimension.  Values should be sorted and can be repeated.
 //
 // Returns Has same shape as data, except for dimension 0 which
@@ -19505,8 +20158,9 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min
 
 // Computes the minimum along segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Computes a tensor such that
 // \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
@@ -19520,7 +20174,7 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min
 //
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
 // first dimension.  Values should be sorted and can be repeated.
 //
 // Returns Has same shape as data, except for dimension 0 which
@@ -20266,27 +20920,31 @@ func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
 
 // Computes the product along segments of a tensor.
 //
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#segmentation)
+// for an explanation of segments.
 //
 // This operator is similar to the unsorted segment sum operator found
 // [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
 // Instead of computing the sum over segments, it computes the product of all
 // entries belonging to a segment such that:
 //
-// \\(output_i = \prod_j data_j\\) where the product is over `j` such
-// that `segment_ids[j] == i`.
+// \\(output_i = \prod_{j...} data[j...]\\) where the product is over tuples
+// `j...` such that `segment_ids[j...] == i`.
 //
 // If there is no entry for a given segment ID `i`, it outputs 1.
 //
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
 //
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `num_segments`.
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
 func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -20363,8 +21021,9 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf
 
 // Computes the mean along sparse segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
 // dimension, selecting a subset of dimension 0, specified by `indices`.
@@ -20433,8 +21092,9 @@ func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
 // Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
 // misisng, the `output` tensor at that position will be zeroed.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Arguments:
 //
@@ -20579,8 +21239,9 @@ func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segm
 //
 // N is the size of the segment being reduced.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Arguments:
 //
@@ -20638,8 +21299,9 @@ func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
 // Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
 // misisng, the `output` tensor at that position will be zeroed.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Arguments:
 //
@@ -21000,8 +21662,9 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output
 
 // Computes the maximum along segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Computes a tensor such that
 // \\(output_i = \max_j(data_j)\\) where `max` is over `j` such
@@ -21015,7 +21678,7 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output
 //
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
 // first dimension.  Values should be sorted and can be repeated.
 //
 // Returns Has same shape as data, except for dimension 0 which
@@ -23431,29 +24094,57 @@ func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, it
 	return op.Output(0)
 }
 
-// Computes the matrix exponential of one or more square matrices:
-//
-// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead.
+// Creates a Tensor by indexing into the TensorList.
 //
-// \\(exp(A) = \sum_{n=0}^\infty A^n/n!\\)
+// Each row in the produced Tensor corresponds to the element in the TensorList
+// specified by the given index (see `tf.gather`).
 //
-// The exponential is computed using a combination of the scaling and squaring
-// method and the Pade approximation. Details can be founds in:
-// Nicholas J. Higham, "The scaling and squaring method for the matrix exponential
-// revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor of the same shape as the input
-// containing the exponential for all input submatrices `[..., :, :]`.
+// input_handle: The input tensor list.
+// indices: The indices used to index into the list.
+// values: The tensor.
+func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_dtype tf.DataType) (values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListGather",
+		Input: []tf.Input{
+			input_handle, indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorList by indexing into a Tensor.
 //
-// Arguments:
-//	input: Shape is `[..., M, M]`.
+// Each member of the TensorList corresponds to one row of the input tensor,
+// specified by the given index (see `tf.gather`).
 //
-// Returns Shape is `[..., M, M]`.
+// tensor: The input tensor.
+// indices: The indices used to index into the list.
+// element_shape: The shape of the elements in the list (can be less specified than
+//   the shape of the tensor).
+// output_handle: The TensorList.
+func TensorListScatter(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListScatter",
+		Input: []tf.Input{
+			tensor, indices, element_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Deprecated, use python implementation tf.linalg.matrix_exponential.
 //
-// @compatibility(scipy)
-// Equivalent to scipy.linalg.expm
-// @end_compatibility
+// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead.
 func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -23959,8 +24650,9 @@ func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output,
 
 // Computes the product along segments of a tensor.
 //
-// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
-// segments.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
 //
 // Computes a tensor such that
 // \\(output_i = \prod_j data_j\\) where the product is over `j` such
@@ -23974,7 +24666,7 @@ func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output,
 //
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
 // first dimension.  Values should be sorted and can be repeated.
 //
 // Returns Has same shape as data, except for dimension 0 which
@@ -24999,7 +25691,7 @@ func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr {
 
 // Update '*var' according to the Adam algorithm.
 //
-// $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
+// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
 // $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
 // $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
 // $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
@@ -27016,8 +27708,10 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source
 // If `len` defines a substring that would extend beyond the length of the input
 // string, then as many characters as possible are used.
 //
-// If `pos` is negative or specifies a character index larger than any of the input
-// strings, then an `InvalidArgumentError` is thrown.
+// A negative `pos` indicates distance within the string backwards from the end.
+//
+// If `pos` specifies an index which is out of range for any of the input strings,
+// then an `InvalidArgumentError` is thrown.
 //
 // `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
 // Op creation.
@@ -27643,6 +28337,8 @@ func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ..
 // On GPU, if an out of bound index is found, a 0 is stored in the
 // corresponding output value.
 //
+// See also `tf.batch_gather` and `tf.gather_nd`.
+//
 // Arguments:
 //	params: The tensor from which to gather values. Must be at least rank
 // `axis + 1`.
@@ -28153,6 +28849,30 @@ func FFT(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
+// Identity transformation that models performance.
+//
+// Identity transformation that models performance.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//
+//
+func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ModelDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Performs a padding as a preprocess during a convolution.
 //
 // Similar to FusedResizeAndPadConv2d, this op allows for an optimized
@@ -28842,10 +29562,16 @@ func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (
 //
 // Arguments:
 //
-//	window_size: A scalar representing the number of elements to accumulate in a window.
+//	size: A scalar representing the number of elements to accumulate in a window.
+//	shift: A scalar representing the steps moving the sliding window forward in one
+// iteration. It must be positive.
+//	stride: A scalar representing the stride of the input elements of the sliding window.
+// It must be positive.
+//	drop_remainder: A scalar representing whether a window should be dropped in case its size is
+// smaller than desired.
 //
 //
-func WindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -28853,7 +29579,7 @@ func WindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output,
 	opspec := tf.OpSpec{
 		Type: "WindowDataset",
 		Input: []tf.Input{
-			input_dataset, window_size,
+			input_dataset, size, shift, stride, drop_remainder,
 		},
 		Attrs: attrs,
 	}
@@ -30063,7 +30789,7 @@ func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, st
 //
 //     [1, 13, 3, 14, 14, 6, 7, 20]
 //
-// See @{tf.scatter_nd} for more details about how to make updates to slices.
+// See `tf.scatter_nd` for more details about how to make updates to slices.
 //
 // Arguments:
 //	input: A Tensor.
@@ -30680,6 +31406,41 @@ func MapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...MapIncomp
 	return op.Output(0)
 }
 
+// Generate the bucket boundaries for each feature based on accumulated summaries.
+//
+// An op that returns a list of float tensors for a quantile stream resource. Each
+// tensor is Rank 1 containing bucket boundaries for a single feature.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//	num_features: inferred int; number of features to get bucket boundaries for.
+//
+// Returns float; List of Rank 1 Tensors each containing the bucket boundaries for a feature.
+func BoostedTreesQuantileStreamResourceGetBucketBoundaries(scope *Scope, quantile_stream_resource_handle tf.Output, num_features int64) (bucket_boundaries []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_features": num_features}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceGetBucketBoundaries",
+		Input: []tf.Input{
+			quantile_stream_resource_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if bucket_boundaries, idx, err = makeOutputList(op, idx, "bucket_boundaries"); err != nil {
+		scope.UpdateErr("BoostedTreesQuantileStreamResourceGetBucketBoundaries", err)
+		return
+	}
+	return bucket_boundaries
+}
+
 // OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage.
 type OrderedMapUnstageAttr func(optionalAttr)
 
@@ -30751,6 +31512,43 @@ func OrderedMapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []
 	return values
 }
 
+// BoostedTreesQuantileStreamResourceHandleOpAttr is an optional argument to BoostedTreesQuantileStreamResourceHandleOp.
+type BoostedTreesQuantileStreamResourceHandleOpAttr func(optionalAttr)
+
+// BoostedTreesQuantileStreamResourceHandleOpContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func BoostedTreesQuantileStreamResourceHandleOpContainer(value string) BoostedTreesQuantileStreamResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// BoostedTreesQuantileStreamResourceHandleOpSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func BoostedTreesQuantileStreamResourceHandleOpSharedName(value string) BoostedTreesQuantileStreamResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a handle to a BoostedTreesQuantileStreamResource.
+func BoostedTreesQuantileStreamResourceHandleOp(scope *Scope, optional ...BoostedTreesQuantileStreamResourceHandleOpAttr) (resource tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceHandleOp",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // OrderedMapSizeAttr is an optional argument to OrderedMapSize.
 type OrderedMapSizeAttr func(optionalAttr)
 
-- 
GitLab


From ff2e46cd768b9161235f10f6f8bbb23cb27314dc Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Tue, 18 Sep 2018 18:18:43 -0700
Subject: [PATCH 0352/1357] Update the grappler plugin to support the @defun
 generated function and ops.

PiperOrigin-RevId: 213554813
---
 tensorflow/core/grappler/optimizers/BUILD     |  1 +
 .../experimental_implementation_selector.cc   | 48 +++++++++++++------
 ...perimental_implementation_selector_test.cc |  5 +-
 .../grappler/optimizers/meta_optimizer.cc     | 15 +++++-
 tensorflow/python/eager/function_test.py      | 46 ++++++++++++++++++
 5 files changed, 95 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index f094c151e6..029205248b 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -515,6 +515,7 @@ cc_library(
         ":custom_graph_optimizer_registry",
         ":debug_stripper",
         ":dependency_optimizer",
+        ":experimental_implementation_selector",
         ":function_optimizer",
         ":graph_optimizer",
         ":layout_optimizer",
diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc b/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc
index eeea269fb0..2c36c9b7b3 100644
--- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc
+++ b/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc
@@ -32,8 +32,6 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-REGISTER_GRAPH_OPTIMIZER(ExperimentalImplementationSelector);
-
 Status ExperimentalImplementationSelector::LoadFunctions(
     const GraphDef& graph) {
   lib_info_.reset(new FunctionLibraryApiInfo);
@@ -43,8 +41,20 @@ Status ExperimentalImplementationSelector::LoadFunctions(
 
 Status ExperimentalImplementationSelector::MaybeOptimizeFunctionCall(
     NodeDef* node_def) const {
-  const FunctionApiInfo* info = lib_info_->GetApiInfo(node_def->op());
-  if (info == nullptr) {
+  // There are two ways of calling functions:
+  //  1. By specifying an op name as a function name, or
+  //  2. Via the @defun functional interface, where the real function name
+  //     appear as the attribute with type func.
+  std::vector<string> function_attribute_names;
+  for (const auto& attr : node_def->attr()) {
+    if (attr.second.has_func() &&
+        lib_info_->GetApiInfo(attr.second.func().name()) != nullptr) {
+      function_attribute_names.emplace_back(attr.first);
+    }
+  }
+
+  if (function_attribute_names.empty() &&
+      lib_info_->GetApiInfo(node_def->op()) == nullptr) {
     // A regular op, or a function which has no interface.
     return Status::OK();
   }
@@ -58,17 +68,25 @@ Status ExperimentalImplementationSelector::MaybeOptimizeFunctionCall(
   DeviceNameUtils::ParsedName parsed_name;
   DeviceNameUtils::ParseLocalName(device, &parsed_name);
 
-  string best_function_name;
-  lib_info_->GetBestImplementation(node_def->op(), parsed_name.type,
-                                   &best_function_name);
-  if (node_def->op() != best_function_name) {
-    // The current implementation is not the best, swap the op to the best one.
-    // There will be duplicates in the graph and they will be pruned by other
-    // grappler plugin since no other node is using their output as inputs.
-    // TODO(scottzhu): Update the tf.eager.defun to register functions without
-    // having to call them with input data. That will reduce the graph size and
-    // save the work for prune them.
-    node_def->set_op(best_function_name);
+  for (const auto& attr_name : function_attribute_names) {
+    string function_name = node_def->attr().at(attr_name).func().name();
+    string best_function_name;
+    lib_info_->GetBestImplementation(function_name, parsed_name.type,
+                                     &best_function_name);
+    if (function_name != best_function_name) {
+      node_def->mutable_attr()
+          ->find(attr_name)
+          ->second.mutable_func()
+          ->set_name(best_function_name);
+    }
+  }
+  if (lib_info_->GetApiInfo(node_def->op()) != nullptr) {
+    string best_function_name;
+    lib_info_->GetBestImplementation(node_def->op(), parsed_name.type,
+                                     &best_function_name);
+    if (node_def->op() != best_function_name) {
+      node_def->set_op(best_function_name);
+    }
   }
   return Status::OK();
 }
diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc b/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc
index 2368e577c2..3f1ebefac6 100644
--- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc
+++ b/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc
@@ -45,9 +45,8 @@ TEST_F(ExperimentalImplementationSelectorTest, NoUpdate) {
   GrapplerItem item;
   CHECK(fake_input.NextItem(&item));
 
-  std::unique_ptr<CustomGraphOptimizer> optimizer =
-      CustomGraphOptimizerRegistry::CreateByNameOrNull(
-          "ExperimentalImplementationSelector");
+  std::unique_ptr<CustomGraphOptimizer> optimizer(
+      new ExperimentalImplementationSelector);
   ASSERT_NE(nullptr, optimizer);
   TF_ASSERT_OK(optimizer->Init());
 
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 7ed4a67333..1ed1b22931 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
 #include "tensorflow/core/grappler/optimizers/debug_stripper.h"
 #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h"
+#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h"
 #include "tensorflow/core/grappler/optimizers/function_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/layout_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/loop_optimizer.h"
@@ -196,8 +197,18 @@ Status MetaOptimizer::InitializeOptimizersByName(
 Status MetaOptimizer::InitializeCustomGraphOptimizers(
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
   for (const auto& optimizer_config : cfg_.custom_optimizers()) {
-    auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
-        optimizer_config.name());
+    // Initialize the ExperimentalImplementationSelector here instead of
+    // CustomizeOptimizer registry, due the static link issue in TensorRT for
+    // double registry.
+    // TODO(laigd): Remove this hack and change it back to use the registry once
+    // the duplicate static import issue is fixed.
+    std::unique_ptr<CustomGraphOptimizer> custom_optimizer;
+    if (optimizer_config.name() == "ExperimentalImplementationSelector") {
+      custom_optimizer.reset(new ExperimentalImplementationSelector());
+    } else {
+      custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
+          optimizer_config.name());
+    }
     if (custom_optimizer) {
       VLOG(2) << "Registered custom configurable graph optimizer: "
               << optimizer_config.name();
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 6326a5b45f..4a1bde3f5e 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -26,6 +26,7 @@ import weakref
 import numpy
 
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import keras
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import backprop
@@ -1729,6 +1730,51 @@ class FunctionTest(test.TestCase):
                                  'be Tensors;.*'):
       graph_function('Not a Tensor.')
 
+  def testSwapImplementationWithGrapplerPlugin(self):
+    rewrites = rewriter_config_pb2.RewriterConfig()
+    # function_optimizer has to be turn off, otherwise it will delete the
+    # registered function if it does not get called.
+    # TODO(scottzhu): Move the ExperimentalImplementationSelector to be called
+    # before function_optimizer in future.
+    rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
+    customer_optimizer = rewrites.custom_optimizers.add()
+    customer_optimizer.name = 'ExperimentalImplementationSelector'
+    rewrites.min_graph_nodes = -1
+    graph_options = config_pb2.GraphOptions(
+        rewrite_options=rewrites, build_cost_model=1)
+    config = config_pb2.ConfigProto(graph_options=graph_options)
+
+    with context.graph_mode(), self.cached_session(
+        config=config, graph=ops.Graph(), use_gpu=True) as sess:
+
+      @function.defun_with_attributes(
+          attributes={
+              'experimental_api_implements': 'random_boost',
+              'experimental_api_preferred_device': 'CPU'
+          })
+      def cpu_boost(x):
+        return math_ops.add(x, 2.0)
+
+      @function.defun_with_attributes(
+          attributes={
+              'experimental_api_implements': 'random_boost',
+              'experimental_api_preferred_device': 'GPU'
+          })
+      def gpu_boost(x):
+        return math_ops.add(x, 4.0)
+
+      x = constant_op.constant(1.0)
+
+      function.register(cpu_boost, x)
+      y = gpu_boost(x)
+      y_value = sess.run(y)
+
+      if test.is_gpu_available():
+        self.assertEquals(y_value, 5.0)
+      else:
+        # Grappler fallback to use the CPU impl even called with GPU function.
+        self.assertEquals(y_value, 3.0)
+
 
 @test_util.with_c_shapes
 class AutomaticControlDependenciesTest(test.TestCase):
-- 
GitLab


From 9ee75bb6e29007b8b5ea4a6d981996d8a4d88373 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 18 Sep 2018 18:31:37 -0700
Subject: [PATCH 0353/1357] [tf.data] Add a test for state persistence between
 iterators over the same MapDataset.

PiperOrigin-RevId: 213555982
---
 .../data/kernel_tests/map_dataset_op_test.py  | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 2ab74beb32..ae04995436 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -397,6 +397,28 @@ class MapDatasetTest(test.TestCase, parameterized.TestCase):
       # Randomness is repeatable given same seed
       self.assertAllClose(random_values, random_values_2)
 
+  def testStatefulMapKeepsStateAcrossIterators(self):
+    iterator = (dataset_ops.Dataset.from_tensors(0).repeat(10)
+                .map(lambda _: random_ops.random_uniform((), seed=11))
+                .repeat(1000)
+                .batch(10)
+                .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      random_values = sess.run(get_next)
+
+      # Assert that one of the next 99 batches yielded by the iterator is
+      # different from the first.
+      i = 0
+      while i < 99:
+        if np.any(random_values != sess.run(get_next)):
+          break
+        i += 1
+      self.assertLess(i, 99)
+
   def testMapDict(self):
     iterator = (dataset_ops.Dataset.range(10)
                 .map(lambda x: {"foo": x * 2, "bar": x ** 2})
-- 
GitLab


From 9fe177881224571aff0c267593f747f5fd7a2967 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 18 Sep 2018 19:39:27 -0700
Subject: [PATCH 0354/1357] Getting DNNModel to work with the new feature
 columns.

PiperOrigin-RevId: 213561495
---
 .../estimator/dnn_with_layer_annotations.py   |  15 +-
 tensorflow/python/estimator/BUILD             |   2 +
 tensorflow/python/estimator/canned/dnn.py     | 181 ++++++++++----
 .../estimator/canned/dnn_linear_combined.py   |   7 +-
 .../python/estimator/canned/dnn_test.py       | 146 +++++++++--
 .../estimator/canned/dnn_testing_utils.py     | 227 ++++++++++++++----
 .../python/feature_column/feature_column.py   |  12 +-
 .../feature_column/feature_column_v2.py       |  14 ++
 8 files changed, 477 insertions(+), 127 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 152431d1b2..a8eeff6f6d 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -76,6 +76,7 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode):
                                          weight_collections=None,
                                          trainable=True,
                                          cols_to_vars=None,
+                                         scope=None,
                                          cols_to_output_tensors=None):
     """Returns a dense `Tensor` as input layer based on given `feature_columns`.
 
@@ -112,6 +113,7 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode):
         'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
           shape=(5, 10)]} If a column creates no variables, its value will be an
           empty list.
+      scope: A name or variable scope to use
       cols_to_output_tensors: If not `None`, must be a dictionary that will be
         filled with a mapping from '_FeatureColumn' to the associated output
         `Tensor`s.
@@ -132,6 +134,7 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode):
         weight_collections=weight_collections,
         trainable=trainable,
         cols_to_vars=cols_to_vars,
+        scope=scope,
         cols_to_output_tensors=local_cols_to_output_tensors)
 
     if cols_to_output_tensors is not None:
@@ -301,9 +304,9 @@ def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, 'input_layer',
-        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
-                                                mode)):
+        feature_column_lib, '_internal_input_layer',
+        make_input_layer_with_layer_annotations(
+            feature_column_lib._internal_input_layer, mode)):  # pylint: disable=protected-access
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
@@ -422,9 +425,9 @@ def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, 'input_layer',
-        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
-                                                mode)):
+        feature_column_lib, '_internal_input_layer',
+        make_input_layer_with_layer_annotations(
+            feature_column_lib._internal_input_layer, mode)):  # pylint: disable=protected-access
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index bfcc019dd5..2026bf8c4f 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -251,6 +251,7 @@ py_library(
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
@@ -273,6 +274,7 @@ py_test(
         ":pandas_io",
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
index 1c0c4581c0..344113a5d8 100644
--- a/tensorflow/python/estimator/canned/dnn.py
+++ b/tensorflow/python/estimator/canned/dnn.py
@@ -24,7 +24,9 @@ from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.keras.engine import training
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.layers import normalization
 from tensorflow.python.ops import init_ops
@@ -45,8 +47,14 @@ def _add_hidden_layer_summary(value, tag):
   summary.histogram('%s/activation' % tag, value)
 
 
-def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
-                          dropout, input_layer_partitioner, batch_norm):
+def _dnn_logit_fn_builder(units,
+                          hidden_units,
+                          feature_columns,
+                          activation_fn,
+                          dropout,
+                          input_layer_partitioner,
+                          batch_norm,
+                          shared_state_manager=None):
   """Function builder for a dnn logit_fn.
 
   Args:
@@ -60,6 +68,8 @@ def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
       coordinate.
     input_layer_partitioner: Partitioner for input layer.
     batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
 
   Returns:
     A logit_fn (see below).
@@ -85,50 +95,110 @@ def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
       A `Tensor` representing the logits, or a list of `Tensor`'s representing
       multiple logits in the MultiHead case.
     """
-    is_training = mode == model_fn.ModeKeys.TRAIN
-    with variable_scope.variable_scope(
-        'input_from_feature_columns',
-        values=tuple(six.itervalues(features)),
-        partitioner=input_layer_partitioner):
-      net = feature_column_lib.input_layer(
-          features=features, feature_columns=feature_columns)
+    dnn_model = _DNNModel(
+        units,
+        hidden_units,
+        feature_columns,
+        activation_fn,
+        dropout,
+        input_layer_partitioner,
+        batch_norm,
+        shared_state_manager,
+        name='dnn')
+    return dnn_model(features, mode)
+
+  return dnn_logit_fn
+
+
+class _DNNModel(training.Model):
+  """A DNN Model."""
+
+  def __init__(self,
+               units,
+               hidden_units,
+               feature_columns,
+               activation_fn,
+               dropout,
+               input_layer_partitioner,
+               batch_norm,
+               shared_state_manager,
+               name=None,
+               **kwargs):
+    super(_DNNModel, self).__init__(name=name, **kwargs)
+
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      input_layer = feature_column_v2.FeatureLayer(
+          feature_columns=feature_columns,
+          name='input_layer',
+          shared_state_manager=shared_state_manager)
+    else:
+      with variable_scope.variable_scope('input_from_feature_columns'):
+        input_layer = feature_column.InputLayer(
+            feature_columns=feature_columns, name='input_layer')
+
+    self._input_layer = self._add_layers([input_layer])[0]
+
+    self._dropout = dropout
+    self._batch_norm = batch_norm
+
+    hidden_layers = []
+    dropout_layers = []
+    batch_norm_layers = []
     for layer_id, num_hidden_units in enumerate(hidden_units):
-      with variable_scope.variable_scope(
-          'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope:
-        net = core_layers.dense(
-            net,
-            units=num_hidden_units,
-            activation=activation_fn,
-            kernel_initializer=init_ops.glorot_uniform_initializer(),
-            name=hidden_layer_scope)
-        if dropout is not None and is_training:
-          net = core_layers.dropout(net, rate=dropout, training=True)
-        if batch_norm:
-          # TODO(hjm): In future, if this becomes popular, we can enable
-          # customization of the batch normalization params by accepting a
-          # list of `BatchNormalization` instances as `batch_norm`.
-          net = normalization.batch_normalization(
-              net,
-              # The default momentum 0.99 actually crashes on certain
-              # problem, so here we use 0.999, which is the default of
-              # tf.contrib.layers.batch_norm.
-              momentum=0.999,
-              training=is_training,
-              name='batchnorm_%d' % layer_id)
-      _add_hidden_layer_summary(net, hidden_layer_scope.name)
-
-    with variable_scope.variable_scope('logits', values=(net,)) as logits_scope:
-      logits = core_layers.dense(
-          net,
-          units=units,
-          activation=None,
+      hidden_layer = core_layers.Dense(
+          units=num_hidden_units,
+          activation=activation_fn,
           kernel_initializer=init_ops.glorot_uniform_initializer(),
-          name=logits_scope)
-    _add_hidden_layer_summary(logits, logits_scope.name)
-
+          name='hiddenlayer_%d' % layer_id)
+      hidden_layers.append(hidden_layer)
+      if self._dropout is not None:
+        dropout_layer = core_layers.Dropout(rate=dropout)
+        dropout_layers.append(dropout_layer)
+      if self._batch_norm:
+        batch_norm_layer = normalization.BatchNormalization(
+            # The default momentum 0.99 actually crashes on certain
+            # problem, so here we use 0.999, which is the default of
+            # tf.contrib.layers.batch_norm.
+            momentum=0.999,
+            trainable=True,
+            name='hiddenlayer_%d/batchnorm_%d' % (layer_id, layer_id))
+        batch_norm_layers.append(batch_norm_layer)
+
+    self._hidden_layers = self._add_layers(hidden_layers)
+    if self._dropout is not None:
+      self._dropout_layers = self._add_layers(dropout_layers)
+    if self._batch_norm:
+      self._batch_norm_layers = self._add_layers(batch_norm_layers)
+
+    self._logits_layer = core_layers.Dense(
+        units=units,
+        activation=None,
+        kernel_initializer=init_ops.glorot_uniform_initializer(),
+        name='logits')
+
+  def call(self, features, mode):
+    is_training = mode == model_fn.ModeKeys.TRAIN
+    with variable_scope.variable_scope('input_from_feature_columns'):
+      net = self._input_layer(features)
+    for i in range(len(self._hidden_layers)):
+      net = self._hidden_layers[i](net)
+      if self._dropout is not None and is_training:
+        net = self._dropout_layers[i](net)
+      if self._batch_norm:
+        net = self._batch_norm_layers[i](net, training=is_training)
+      _add_hidden_layer_summary(net, self._hidden_layers[i].name)
+
+    logits = self._logits_layer(net)
+    _add_hidden_layer_summary(logits, self._logits_layer.name)
     return logits
 
-  return dnn_logit_fn
+  def _add_layers(self, layers):
+    # "Magic" required for keras.Model classes to track all the variables in
+    # a list of layers.Layer objects.
+    # TODO(ashankar): Figure out API so user code doesn't have to do this.
+    for layer in layers:
+      setattr(self, layer.name, layer)
+    return layers
 
 
 def _dnn_model_fn(features,
@@ -143,7 +213,8 @@ def _dnn_model_fn(features,
                   input_layer_partitioner=None,
                   config=None,
                   use_tpu=False,
-                  batch_norm=False):
+                  batch_norm=False,
+                  shared_state_manager=None):
   """Deep Neural Net model_fn.
 
   Args:
@@ -167,6 +238,8 @@ def _dnn_model_fn(features,
     use_tpu: Whether to make a DNN model able to run on TPU. Will make function
       return a `_TPUEstimatorSpec` instance and disable variable partitioning.
     batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
 
   Returns:
     An `EstimatorSpec` instance.
@@ -202,7 +275,8 @@ def _dnn_model_fn(features,
         activation_fn=activation_fn,
         dropout=dropout,
         input_layer_partitioner=input_layer_partitioner,
-        batch_norm=batch_norm)
+        batch_norm=batch_norm,
+        shared_state_manager=shared_state_manager)
     logits = logit_fn(features=features, mode=mode)
 
     if use_tpu:
@@ -370,6 +444,10 @@ class DNNClassifier(estimator.Estimator):
     """
     head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
         n_classes, weight_column, label_vocabulary, loss_reduction)
+
+    shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+        feature_columns)
+
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
@@ -384,7 +462,8 @@ class DNNClassifier(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
 
     super(DNNClassifier, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,
@@ -532,6 +611,10 @@ class DNNRegressor(estimator.Estimator):
       batch_norm: Whether to use batch normalization after each hidden layer.
     """
 
+    shared_state_manager = None
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
@@ -539,7 +622,8 @@ class DNNRegressor(estimator.Estimator):
           labels=labels,
           mode=mode,
           head=head_lib._regression_head(  # pylint: disable=protected-access
-              label_dimension=label_dimension, weight_column=weight_column,
+              label_dimension=label_dimension,
+              weight_column=weight_column,
               loss_reduction=loss_reduction),
           hidden_units=hidden_units,
           feature_columns=tuple(feature_columns or []),
@@ -548,7 +632,8 @@ class DNNRegressor(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
 
     super(DNNRegressor, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py
index 9799cf9e98..f2ac9a7466 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py
@@ -27,6 +27,7 @@ from tensorflow.python.estimator.canned import dnn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import linear
 from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import nn
@@ -142,6 +143,9 @@ def _dnn_linear_combined_model_fn(features,
           max_partitions=num_ps_replicas,
           min_slice_size=64 << 20))
 
+  shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+      list(linear_feature_columns) + list(dnn_feature_columns))
+
   # Build DNN Logits.
   dnn_parent_scope = 'dnn'
 
@@ -170,7 +174,8 @@ def _dnn_linear_combined_model_fn(features,
           activation_fn=dnn_activation_fn,
           dropout=dnn_dropout,
           input_layer_partitioner=input_layer_partitioner,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
       dnn_logits = dnn_logit_fn(features=features, mode=mode)
 
   linear_parent_scope = 'linear'
diff --git a/tensorflow/python/estimator/canned/dnn_test.py b/tensorflow/python/estimator/canned/dnn_test.py
index fc90b7c35e..e64cd522b4 100644
--- a/tensorflow/python/estimator/canned/dnn_test.py
+++ b/tensorflow/python/estimator/canned/dnn_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import shutil
 import tempfile
 
+from absl.testing import parameterized
 import numpy as np
 import six
 
@@ -33,6 +34,7 @@ from tensorflow.python.estimator.export import export
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.estimator.inputs import pandas_io
 from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import data_flow_ops
@@ -65,6 +67,14 @@ class DNNModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
     dnn_testing_utils.BaseDNNModelFnTest.__init__(self, dnn._dnn_model_fn)
 
 
+class DNNModelFnV2Test(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNModelFnTest.__init__(
+        self, dnn._dnn_model_fn, is_fc_v2=True)
+
+
 class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
 
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
@@ -73,6 +83,14 @@ class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
                                                   dnn._dnn_logit_fn_builder)
 
 
+class DNNLogitFnV2Test(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
+        self, dnn._dnn_logit_fn_builder, is_fc_v2=True)
+
+
 class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
                           test.TestCase):
 
@@ -82,6 +100,15 @@ class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
                                                        _dnn_regressor_fn)
 
 
+class DNNWarmStartingV2Test(dnn_testing_utils.BaseDNNWarmStartingTest,
+                            test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
+        self, _dnn_classifier_fn, _dnn_regressor_fn, is_fc_v2=True)
+
+
 class DNNClassifierEvaluateTest(
     dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
 
@@ -91,6 +118,15 @@ class DNNClassifierEvaluateTest(
         self, _dnn_classifier_fn)
 
 
+class DNNClassifierEvaluateV2Test(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn, is_fc_v2=True)
+
+
 class DNNClassifierPredictTest(
     dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
 
@@ -100,6 +136,15 @@ class DNNClassifierPredictTest(
         self, _dnn_classifier_fn)
 
 
+class DNNClassifierPredictV2Test(dnn_testing_utils.BaseDNNClassifierPredictTest,
+                                 test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn, is_fc_v2=True)
+
+
 class DNNClassifierTrainTest(
     dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
 
@@ -109,6 +154,15 @@ class DNNClassifierTrainTest(
         self, _dnn_classifier_fn)
 
 
+class DNNClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
+                               test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn, is_fc_v2=True)
+
+
 def _dnn_regressor_fn(*args, **kwargs):
   return dnn.DNNRegressor(*args, **kwargs)
 
@@ -122,6 +176,15 @@ class DNNRegressorEvaluateTest(
         self, _dnn_regressor_fn)
 
 
+class DNNRegressorEvaluateV2Test(dnn_testing_utils.BaseDNNRegressorEvaluateTest,
+                                 test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn, is_fc_v2=True)
+
+
 class DNNRegressorPredictTest(
     dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
 
@@ -131,6 +194,15 @@ class DNNRegressorPredictTest(
         self, _dnn_regressor_fn)
 
 
+class DNNRegressorPredictV2Test(dnn_testing_utils.BaseDNNRegressorPredictTest,
+                                test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn, is_fc_v2=True)
+
+
 class DNNRegressorTrainTest(
     dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
 
@@ -140,6 +212,15 @@ class DNNRegressorTrainTest(
         self, _dnn_regressor_fn)
 
 
+class DNNRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
+                              test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn, is_fc_v2=True)
+
+
 def _queue_parsed_features(feature_map):
   tensors_to_enqueue = []
   keys = []
@@ -156,7 +237,8 @@ def _queue_parsed_features(feature_map):
   return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
 
 
-class DNNRegressorIntegrationTest(test.TestCase):
+@parameterized.parameters((True,), (False,))
+class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -166,11 +248,16 @@ class DNNRegressorIntegrationTest(test.TestCase):
       writer_cache.FileWriterCache.clear()
       shutil.rmtree(self._model_dir)
 
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      label_dimension, batch_size):
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          is_fc_v2):
     feature_columns = [
         feature_column.numeric_column('x', shape=(input_dimension,))]
+    if is_fc_v2:
+      feature_columns = [
+          feature_column_v2.numeric_column('x', shape=(input_dimension,))
+      ]
+
     est = dnn.DNNRegressor(
         hidden_units=(2, 2),
         feature_columns=feature_columns,
@@ -194,14 +281,17 @@ class DNNRegressorIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, label_dimension), predictions.shape)
 
     # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    if is_fc_v2:
+      feature_spec = feature_column_v2.make_parse_example_spec(feature_columns)
+    else:
+      feature_spec = feature_column.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self):
+  def test_numpy_input_fn(self, is_fc_v2):
     """Tests complete flow with numpy_input_fn."""
     label_dimension = 2
     batch_size = 10
@@ -230,9 +320,10 @@ class DNNRegressorIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        is_fc_v2=is_fc_v2)
 
-  def test_pandas_input_fn(self):
+  def test_pandas_input_fn(self, is_fc_v2):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -263,9 +354,10 @@ class DNNRegressorIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        is_fc_v2=is_fc_v2)
 
-  def test_input_fn_from_parse_example(self):
+  def test_input_fn_from_parse_example(self, is_fc_v2):
     """Tests complete flow with input_fn constructed from parse_example."""
     label_dimension = 2
     batch_size = 10
@@ -313,9 +405,11 @@ class DNNRegressorIntegrationTest(test.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        is_fc_v2=is_fc_v2)
 
 
+@parameterized.parameters((True,), (False,))
 class DNNClassifierIntegrationTest(test.TestCase):
 
   def setUp(self):
@@ -329,11 +423,15 @@ class DNNClassifierIntegrationTest(test.TestCase):
   def _as_label(self, data_in_float):
     return np.rint(data_in_float).astype(np.int64)
 
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      n_classes, batch_size):
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, n_classes, batch_size, is_fc_v2):
     feature_columns = [
         feature_column.numeric_column('x', shape=(input_dimension,))]
+    if is_fc_v2:
+      feature_columns = [
+          feature_column_v2.numeric_column('x', shape=(input_dimension,))
+      ]
+
     est = dnn.DNNClassifier(
         hidden_units=(2, 2),
         feature_columns=feature_columns,
@@ -357,14 +455,17 @@ class DNNClassifierIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
 
     # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    if is_fc_v2:
+      feature_spec = feature_column_v2.make_parse_example_spec(feature_columns)
+    else:
+      feature_spec = feature_column.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self):
+  def test_numpy_input_fn(self, is_fc_v2):
     """Tests complete flow with numpy_input_fn."""
     n_classes = 3
     input_dimension = 2
@@ -396,9 +497,10 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        is_fc_v2=is_fc_v2)
 
-  def test_pandas_input_fn(self):
+  def test_pandas_input_fn(self, is_fc_v2):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -430,9 +532,10 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        is_fc_v2=is_fc_v2)
 
-  def test_input_fn_from_parse_example(self):
+  def test_input_fn_from_parse_example(self, is_fc_v2):
     """Tests complete flow with input_fn constructed from parse_example."""
     input_dimension = 2
     n_classes = 3
@@ -484,7 +587,8 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        is_fc_v2=is_fc_v2)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index 11f1e93630..3b3b63cf65 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -34,6 +34,7 @@ from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import prediction_keys
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -104,6 +105,7 @@ def create_checkpoint(weights_and_biases,
     weights_and_biases: Iterable of tuples of weight and bias values.
     global_step: Initial global step to save in checkpoint.
     model_dir: Directory into which checkpoint is saved.
+    batch_norm_vars: Variables used for batch normalization.
   """
   weights, biases = zip(*weights_and_biases)
   if batch_norm_vars:
@@ -244,8 +246,9 @@ def mock_optimizer(testcase, hidden_units, expected_loss=None):
 class BaseDNNModelFnTest(object):
   """Tests that _dnn_model_fn passes expected logits to mock head."""
 
-  def __init__(self, dnn_model_fn):
+  def __init__(self, dnn_model_fn, is_fc_v2=False):
     self._dnn_model_fn = dnn_model_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -260,6 +263,11 @@ class BaseDNNModelFnTest(object):
     """Tests that the expected logits are passed to mock head."""
     with ops.Graph().as_default():
       training_util.create_global_step()
+      age_column = feature_column.numeric_column(
+          'age', shape=np.array(inputs).shape[1:])
+      if self._is_fc_v2:
+        age_column = feature_column_v2.numeric_column(
+            'age', shape=np.array(inputs).shape[1:])
       head = mock_head(
           self,
           hidden_units=hidden_units,
@@ -271,10 +279,7 @@ class BaseDNNModelFnTest(object):
           mode=mode,
           head=head,
           hidden_units=hidden_units,
-          feature_columns=[
-              feature_column.numeric_column(
-                  'age', shape=np.array(inputs).shape[1:])
-          ],
+          feature_columns=[age_column],
           optimizer=mock_optimizer(self, hidden_units))
       with monitored_session.MonitoredTrainingSession(
           checkpoint_dir=self._model_dir) as sess:
@@ -441,6 +446,16 @@ class BaseDNNModelFnTest(object):
     inputs = ([[10.]], [[8.]])
     expected_logits = [[-0.48, 0.48, 0.39]]
 
+    feature_columns = [
+        feature_column.numeric_column('age'),
+        feature_column.numeric_column('height')
+    ]
+    if self._is_fc_v2:
+      feature_columns = [
+          feature_column_v2.numeric_column('age'),
+          feature_column_v2.numeric_column('height')
+      ]
+
     for mode in [
         model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
         model_fn.ModeKeys.PREDICT
@@ -461,10 +476,7 @@ class BaseDNNModelFnTest(object):
             mode=mode,
             head=head,
             hidden_units=hidden_units,
-            feature_columns=[
-                feature_column.numeric_column('age'),
-                feature_column.numeric_column('height')
-            ],
+            feature_columns=feature_columns,
             optimizer=mock_optimizer(self, hidden_units))
         with monitored_session.MonitoredTrainingSession(
             checkpoint_dir=self._model_dir) as sess:
@@ -508,8 +520,9 @@ class BaseDNNModelFnTest(object):
 class BaseDNNLogitFnTest(object):
   """Tests correctness of logits calculated from _dnn_logit_fn_builder."""
 
-  def __init__(self, dnn_logit_fn_builder):
+  def __init__(self, dnn_logit_fn_builder, is_fc_v2=False):
     self._dnn_logit_fn_builder = dnn_logit_fn_builder
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -533,6 +546,12 @@ class BaseDNNLogitFnTest(object):
       training_util.create_global_step()
       # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
       # the checkpoint naming is shared.
+      age_column = feature_column.numeric_column(
+          'age', shape=np.array(inputs).shape[1:])
+      if self._is_fc_v2:
+        age_column = feature_column_v2.numeric_column(
+            'age', shape=np.array(inputs).shape[1:])
+
       with variable_scope.variable_scope('dnn'):
         input_layer_partitioner = (
             partitioned_variables.min_max_variable_partitioner(
@@ -540,10 +559,7 @@ class BaseDNNLogitFnTest(object):
         logit_fn = self._dnn_logit_fn_builder(
             units=logits_dimension,
             hidden_units=hidden_units,
-            feature_columns=[
-                feature_column.numeric_column(
-                    'age', shape=np.array(inputs).shape[1:])
-            ],
+            feature_columns=[age_column],
             activation_fn=nn.relu,
             dropout=None,
             input_layer_partitioner=input_layer_partitioner,
@@ -768,6 +784,16 @@ class BaseDNNLogitFnTest(object):
     inputs = ([[10.]], [[8.]])
     expected_logits = [[-0.48, 0.48, 0.39]]
 
+    feature_columns = [
+        feature_column.numeric_column('age'),
+        feature_column.numeric_column('height')
+    ]
+    if self._is_fc_v2:
+      feature_columns = [
+          feature_column_v2.numeric_column('age'),
+          feature_column_v2.numeric_column('height')
+      ]
+
     for mode in [
         model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
         model_fn.ModeKeys.PREDICT
@@ -785,10 +811,7 @@ class BaseDNNLogitFnTest(object):
           logit_fn = self._dnn_logit_fn_builder(
               units=logits_dimension,
               hidden_units=hidden_units,
-              feature_columns=[
-                  feature_column.numeric_column('age'),
-                  feature_column.numeric_column('height')
-              ],
+              feature_columns=feature_columns,
               activation_fn=nn.relu,
               dropout=None,
               input_layer_partitioner=input_layer_partitioner,
@@ -806,9 +829,10 @@ class BaseDNNLogitFnTest(object):
 
 class BaseDNNWarmStartingTest(object):
 
-  def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn):
+  def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn, is_fc_v2=False):
     self._dnn_classifier_fn = _dnn_classifier_fn
     self._dnn_regressor_fn = _dnn_regressor_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     # Create a directory to save our old checkpoint and vocabularies to.
@@ -847,6 +871,11 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
+    if self._is_fc_v2:
+      city = feature_column_v2.embedding_column(
+          feature_column_v2.categorical_column_with_vocabulary_list(
+              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+          dimension=5)
 
     # Create a DNNClassifier and train to save a checkpoint.
     dnn_classifier = self._dnn_classifier_fn(
@@ -879,6 +908,11 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
+    if self._is_fc_v2:
+      city = feature_column_v2.embedding_column(
+          feature_column_v2.categorical_column_with_vocabulary_list(
+              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+          dimension=5)
 
     # Create a DNNRegressor and train to save a checkpoint.
     dnn_regressor = self._dnn_regressor_fn(
@@ -909,6 +943,11 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
+    if self._is_fc_v2:
+      city = feature_column_v2.embedding_column(
+          feature_column_v2.categorical_column_with_vocabulary_list(
+              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+          dimension=5)
 
     # Create a DNNClassifier and train to save a checkpoint.
     dnn_classifier = self._dnn_classifier_fn(
@@ -964,6 +1003,13 @@ class BaseDNNWarmStartingTest(object):
             vocabulary_file=vocab_file,
             vocabulary_size=len(vocab_list)),
         dimension=2)
+    if self._is_fc_v2:
+      occupation = feature_column_v2.embedding_column(
+          feature_column_v2.categorical_column_with_vocabulary_file(
+              'occupation',
+              vocabulary_file=vocab_file,
+              vocabulary_size=len(vocab_list)),
+          dimension=2)
 
     # Create a DNNClassifier and train to save a checkpoint.
     partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
@@ -991,6 +1037,13 @@ class BaseDNNWarmStartingTest(object):
             vocabulary_file=new_vocab_file,
             vocabulary_size=len(new_vocab_list)),
         dimension=2)
+    if self._is_fc_v2:
+      new_occupation = feature_column_v2.embedding_column(
+          feature_column_v2.categorical_column_with_vocabulary_file(
+              'occupation',
+              vocabulary_file=new_vocab_file,
+              vocabulary_size=len(new_vocab_list)),
+          dimension=2)
     # We can create our VocabInfo object from the new and old occupation
     # FeatureColumn's.
     occupation_vocab_info = estimator.VocabInfo(
@@ -1055,6 +1108,11 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
+    if self._is_fc_v2:
+      locality = feature_column_v2.embedding_column(
+          feature_column_v2.categorical_column_with_vocabulary_list(
+              'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
+          dimension=5)
 
     # Create a DNNClassifier and train to save a checkpoint.
     dnn_classifier = self._dnn_classifier_fn(
@@ -1072,6 +1130,11 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
+    if self._is_fc_v2:
+      city = feature_column_v2.embedding_column(
+          feature_column_v2.categorical_column_with_vocabulary_list(
+              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+          dimension=5)
     warm_started_dnn_classifier = self._dnn_classifier_fn(
         hidden_units=[256, 128],
         feature_columns=[city],
@@ -1101,8 +1164,9 @@ class BaseDNNWarmStartingTest(object):
 
 class BaseDNNClassifierEvaluateTest(object):
 
-  def __init__(self, dnn_classifier_fn):
+  def __init__(self, dnn_classifier_fn, is_fc_v2=False):
     self._dnn_classifier_fn = dnn_classifier_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1119,9 +1183,12 @@ class BaseDNNClassifierEvaluateTest(object):
         (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
          ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
 
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age')],
+        feature_columns=[age_column],
         model_dir=self._model_dir)
     def _input_fn():
       # batch_size = 2, one false label, and one true.
@@ -1159,9 +1226,12 @@ class BaseDNNClassifierEvaluateTest(object):
                                            .0]),), global_step, self._model_dir)
     n_classes = 3
 
+    age_column = feature_column.numeric_column('age', shape=[2])
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[age_column],
         n_classes=n_classes,
         model_dir=self._model_dir)
     def _input_fn():
@@ -1190,9 +1260,12 @@ class BaseDNNClassifierEvaluateTest(object):
         (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
          ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
 
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age')],
+        feature_columns=[age_column],
         model_dir=self._model_dir)
     def _input_fn():
       # batch_size = 2, one false label, and one true.
@@ -1216,9 +1289,12 @@ class BaseDNNClassifierEvaluateTest(object):
                       global_step, self._model_dir)
     n_classes = 3
 
+    age_column = feature_column.numeric_column('age', shape=[2])
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[age_column],
         n_classes=n_classes,
         weight_column='w',
         model_dir=self._model_dir)
@@ -1238,8 +1314,9 @@ class BaseDNNClassifierEvaluateTest(object):
 
 class BaseDNNRegressorEvaluateTest(object):
 
-  def __init__(self, dnn_regressor_fn):
+  def __init__(self, dnn_regressor_fn, is_fc_v2=False):
     self._dnn_regressor_fn = dnn_regressor_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1257,9 +1334,12 @@ class BaseDNNRegressorEvaluateTest(object):
         (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
          ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
 
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age')],
+        feature_columns=[age_column],
         model_dir=self._model_dir)
     def _input_fn():
       return {'age': [[10.]]}, [[1.]]
@@ -1287,9 +1367,12 @@ class BaseDNNRegressorEvaluateTest(object):
                                            .0]),), global_step, self._model_dir)
     label_dimension = 3
 
+    age_column = feature_column.numeric_column('age', shape=[2])
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[age_column],
         label_dimension=label_dimension,
         model_dir=self._model_dir)
     def _input_fn():
@@ -1318,9 +1401,12 @@ class BaseDNNRegressorEvaluateTest(object):
                       global_step, self._model_dir)
     label_dimension = 3
 
+    age_column = feature_column.numeric_column('age', shape=[2])
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[age_column],
         label_dimension=label_dimension,
         weight_column='w',
         model_dir=self._model_dir)
@@ -1339,8 +1425,9 @@ class BaseDNNRegressorEvaluateTest(object):
 
 class BaseDNNClassifierPredictTest(object):
 
-  def __init__(self, dnn_classifier_fn):
+  def __init__(self, dnn_classifier_fn, is_fc_v2=False):
     self._dnn_classifier_fn = dnn_classifier_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1358,10 +1445,13 @@ class BaseDNNClassifierPredictTest(object):
         global_step=0,
         model_dir=self._model_dir)
 
+    x_column = feature_column.numeric_column('x')
+    if self._is_fc_v2:
+      x_column = feature_column_v2.numeric_column('x')
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
         label_vocabulary=label_vocabulary,
-        feature_columns=(feature_column.numeric_column('x'),),
+        feature_columns=(x_column,),
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
         x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
@@ -1403,9 +1493,12 @@ class BaseDNNClassifierPredictTest(object):
         global_step=0,
         model_dir=self._model_dir)
 
+    x_column = feature_column.numeric_column('x', shape=(2,))
+    if self._is_fc_v2:
+      x_column = feature_column_v2.numeric_column('x', shape=(2,))
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=(feature_column.numeric_column('x', shape=(2,)),),
+        feature_columns=(x_column,),
         label_vocabulary=label_vocabulary,
         n_classes=3,
         model_dir=self._model_dir)
@@ -1453,8 +1546,9 @@ class BaseDNNClassifierPredictTest(object):
 
 class BaseDNNRegressorPredictTest(object):
 
-  def __init__(self, dnn_regressor_fn):
+  def __init__(self, dnn_regressor_fn, is_fc_v2=False):
     self._dnn_regressor_fn = dnn_regressor_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1473,9 +1567,12 @@ class BaseDNNRegressorPredictTest(object):
         global_step=0,
         model_dir=self._model_dir)
 
+    x_column = feature_column.numeric_column('x')
+    if self._is_fc_v2:
+      x_column = feature_column_v2.numeric_column('x')
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=(feature_column.numeric_column('x'),),
+        feature_columns=(x_column,),
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
         x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
@@ -1495,9 +1592,12 @@ class BaseDNNRegressorPredictTest(object):
                                                [.3, -.3,
                                                 .0]),), 100, self._model_dir)
 
+    x_column = feature_column.numeric_column('x', shape=(2,))
+    if self._is_fc_v2:
+      x_column = feature_column_v2.numeric_column('x', shape=(2,))
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=(feature_column.numeric_column('x', shape=(2,)),),
+        feature_columns=(x_column,),
         label_dimension=3,
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
@@ -1594,8 +1694,9 @@ def _assert_simple_summary(testcase, expected_values, actual_summary):
 
 class BaseDNNClassifierTrainTest(object):
 
-  def __init__(self, dnn_classifier_fn):
+  def __init__(self, dnn_classifier_fn, is_fc_v2=False):
     self._dnn_classifier_fn = dnn_classifier_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1606,10 +1707,13 @@ class BaseDNNClassifierTrainTest(object):
       shutil.rmtree(self._model_dir)
 
   def test_from_scratch_with_default_optimizer_binary(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         model_dir=self._model_dir)
 
     # Train for a few steps, then validate final checkpoint.
@@ -1621,11 +1725,14 @@ class BaseDNNClassifierTrainTest(object):
         output_units=1, model_dir=self._model_dir)
 
   def test_from_scratch_with_default_optimizer_multi_class(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     n_classes = 3
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         n_classes=n_classes,
         model_dir=self._model_dir)
 
@@ -1638,12 +1745,15 @@ class BaseDNNClassifierTrainTest(object):
         output_units=n_classes, model_dir=self._model_dir)
 
   def test_from_scratch_validate_summary(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     opt = mock_optimizer(
         self, hidden_units=hidden_units)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1667,6 +1777,9 @@ class BaseDNNClassifierTrainTest(object):
       self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
 
   def test_binary_classification(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     base_global_step = 100
     hidden_units = (2, 2)
     create_checkpoint(
@@ -1682,7 +1795,7 @@ class BaseDNNClassifierTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1713,6 +1826,9 @@ class BaseDNNClassifierTrainTest(object):
         hidden_units=hidden_units, output_units=1, model_dir=self._model_dir)
 
   def test_binary_classification_float_labels(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     base_global_step = 100
     hidden_units = (2, 2)
     create_checkpoint(
@@ -1728,7 +1844,7 @@ class BaseDNNClassifierTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1741,6 +1857,9 @@ class BaseDNNClassifierTrainTest(object):
     self.assertEqual(1, opt.minimize.call_count)
 
   def test_multi_class(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     n_classes = 3
     base_global_step = 100
     hidden_units = (2, 2)
@@ -1759,7 +1878,7 @@ class BaseDNNClassifierTrainTest(object):
     dnn_classifier = self._dnn_classifier_fn(
         n_classes=n_classes,
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1793,8 +1912,9 @@ class BaseDNNClassifierTrainTest(object):
 
 class BaseDNNRegressorTrainTest(object):
 
-  def __init__(self, dnn_regressor_fn):
+  def __init__(self, dnn_regressor_fn, is_fc_v2=False):
     self._dnn_regressor_fn = dnn_regressor_fn
+    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1805,10 +1925,13 @@ class BaseDNNRegressorTrainTest(object):
       shutil.rmtree(self._model_dir)
 
   def test_from_scratch_with_default_optimizer(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         model_dir=self._model_dir)
 
     # Train for a few steps, then validate final checkpoint.
@@ -1820,11 +1943,14 @@ class BaseDNNRegressorTrainTest(object):
         output_units=1, model_dir=self._model_dir)
 
   def test_from_scratch(self):
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     opt = mock_optimizer(self, hidden_units=hidden_units)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1849,6 +1975,9 @@ class BaseDNNRegressorTrainTest(object):
 
   def test_one_dim(self):
     """Asserts train loss for one-dimensional input and logits."""
+    age_column = feature_column.numeric_column('age')
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column('age')
     base_global_step = 100
     hidden_units = (2, 2)
     create_checkpoint(
@@ -1864,7 +1993,7 @@ class BaseDNNRegressorTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(age_column,),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1911,13 +2040,17 @@ class BaseDNNRegressorTrainTest(object):
     # See that test for calculation of logits.
     # logits = [[-0.48, 0.48, 0.39]]
     # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
+    age_column = feature_column.numeric_column('age', shape=[input_dimension])
+    if self._is_fc_v2:
+      age_column = feature_column_v2.numeric_column(
+          'age', shape=[input_dimension])
+
     expected_loss = 4.3929
     opt = mock_optimizer(
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=[
-            feature_column.numeric_column('age', shape=[input_dimension])],
+        feature_columns=[age_column],
         label_dimension=label_dimension,
         optimizer=opt,
         model_dir=self._model_dir)
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 9984379e9d..0d189320da 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -301,17 +301,17 @@ class InputLayer(object):
                feature_columns,
                weight_collections=None,
                trainable=True,
-               cols_to_vars=None):
+               cols_to_vars=None,
+               name='feature_column_input_layer'):
     """See `input_layer`."""
 
     self._feature_columns = feature_columns
     self._weight_collections = weight_collections
     self._trainable = trainable
     self._cols_to_vars = cols_to_vars
+    self._name = name
     self._input_layer_template = template.make_template(
-        'feature_column_input_layer',
-        _internal_input_layer,
-        create_scope_now_=True)
+        self._name, _internal_input_layer, create_scope_now_=True)
     self._scope = self._input_layer_template.variable_scope
 
   def __call__(self, features):
@@ -323,6 +323,10 @@ class InputLayer(object):
         cols_to_vars=None,
         scope=self._scope)
 
+  @property
+  def name(self):
+    return self._name
+
   @property
   def non_trainable_variables(self):
     return self._input_layer_template.non_trainable_variables
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 28c5c82d2c..289f6d0d14 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -2045,6 +2045,14 @@ class DenseColumn(FeatureColumn):
     pass
 
 
+def is_feature_column_v2(feature_columns):
+  """Returns True if all feature columns are V2."""
+  for feature_column in feature_columns:
+    if not isinstance(feature_column, FeatureColumn):
+      return False
+  return True
+
+
 def _create_weighted_sum(column,
                          transformation_cache,
                          state_manager,
@@ -2782,6 +2790,12 @@ class SharedEmbeddingStateManager(Layer):
     return self._var_dict[name]
 
 
+def maybe_create_shared_state_manager(feature_columns):
+  if is_feature_column_v2(feature_columns):
+    return SharedEmbeddingStateManager()
+  return None
+
+
 class SharedEmbeddingColumn(
     DenseColumn, SequenceDenseColumn,
     collections.namedtuple(
-- 
GitLab


From 587ec10386c3a939b4dbb7ead2d05516180d95c5 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 19 Sep 2018 03:33:51 +0000
Subject: [PATCH 0355/1357] Fix warning in text_classification_character_cnn.py

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/examples/learn/text_classification_character_cnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/learn/text_classification_character_cnn.py b/tensorflow/examples/learn/text_classification_character_cnn.py
index afda170e2a..b8506fa8a4 100644
--- a/tensorflow/examples/learn/text_classification_character_cnn.py
+++ b/tensorflow/examples/learn/text_classification_character_cnn.py
@@ -74,7 +74,7 @@ def char_cnn_model(features, labels, mode):
         kernel_size=FILTER_SHAPE2,
         padding='VALID')
     # Max across each filter to get useful features for classification.
-    pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
+    pool2 = tf.squeeze(tf.reduce_max(conv2, 1), axis=[1])
 
   # Apply regular WX + B and classification.
   logits = tf.layers.dense(pool2, MAX_LABEL, activation=None)
-- 
GitLab


From 1b2d0fcee82ec501cc692dc735065d73c6b5b834 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Tue, 18 Sep 2018 20:33:07 -0700
Subject: [PATCH 0356/1357] First commit for functional while loop. Supports
 single and double derivatives but does not supporting nesting yet.

https://github.com/tensorflow/community/pull/13

PiperOrigin-RevId: 213565971
---
 tensorflow/python/BUILD                       |  23 +
 tensorflow/python/eager/function.py           |  13 +-
 tensorflow/python/kernel_tests/BUILD          |  25 +
 .../python/kernel_tests/while_v2_test.py      | 252 ++++++++
 tensorflow/python/ops/while_v2.py             | 573 ++++++++++++++++++
 tensorflow/tools/pip_package/BUILD            |   1 +
 6 files changed, 885 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/while_v2_test.py
 create mode 100644 tensorflow/python/ops/while_v2.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 2eeae773d3..d70e9c5798 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1997,6 +1997,29 @@ py_library(
     ],
 )
 
+py_library(
+    name = "while_v2",
+    srcs = [
+        "ops/while_v2.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":array_ops",
+        ":cond_v2_impl",
+        ":constant_op",
+        ":control_flow_util",
+        ":framework_ops",
+        ":function_def_to_graph",
+        ":functional_ops_gen",
+        ":gradients_impl",
+        ":list_ops",
+        ":tensor_shape",
+        ":util",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/eager:function",
+    ],
+)
+
 py_library(
     name = "cond_v2_impl",
     srcs = [
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 4f1a85a274..a68c6ab3b4 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -826,7 +826,12 @@ def _get_defun_inputs_from_args(args):
   return nest.pack_sequence_as(args, function_inputs)
 
 
-def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
+def func_graph_from_py_func(name,
+                            python_func,
+                            args,
+                            kwds,
+                            signature=None,
+                            func_graph=None):
   """Returns a `FuncGraph` generated from `python_func`.
 
   Args:
@@ -841,6 +846,8 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
       `kwds` are ignored, and `python_func` is traced with Tensors conforming
       to `signature`. If `None`, the shapes and dtypes are inferred from the
       inputs.
+    func_graph: Optional. An instance of FuncGraph. If provided, we will use
+      this graph else a new one is built and returned.
 
   Returns:
     A FuncGraph.
@@ -849,7 +856,9 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None):
     TypeError: If any of `python_func`'s return values is neither `None` nor a
       `Tensor`.
   """
-  func_graph = FuncGraph(name)
+  if func_graph is None:
+    func_graph = FuncGraph(name)
+  assert isinstance(func_graph, FuncGraph)
   with func_graph.as_default(), AutomaticControlDependencies() as a:
     variable_scope.get_variable_scope().set_use_resource(True)
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 100240a626..a048eaa69f 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3204,3 +3204,28 @@ tf_py_test(
     grpc_enabled = True,
     tags = ["no_gpu"],  # TODO(b/111656070)
 )
+
+# TODO(b/116053459): Replace with cuda_py_test.
+tf_py_test(
+    name = "while_v2_test",
+    size = "medium",
+    srcs = ["while_v2_test.py"],
+    additional_deps = [
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:gradients_impl",
+        "//tensorflow/python:list_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:tf_optimizer",
+        "//tensorflow/python:while_v2",
+    ],
+    grpc_enabled = True,
+    tags = ["no_gpu"],  # TODO(b/116053459)
+)
diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
new file mode 100644
index 0000000000..d00e39d482
--- /dev/null
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -0,0 +1,252 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for while_v2."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import meta_graph
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.grappler import tf_optimizer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import list_ops
+from tensorflow.python.ops import while_v2
+from tensorflow.python.ops.control_flow_ops import while_loop as while_loop_v1
+from tensorflow.python.ops.while_v2 import while_loop as while_loop_v2
+from tensorflow.python.platform import test
+
+
+class WhileV2Test(test.TestCase, parameterized.TestCase):
+
+  def testSingleLoopVar(self):
+    x = constant_op.constant(2.)
+    ret = while_loop_v2(lambda v: v < 8., lambda v: v * v, [x])
+    grad = gradients_impl.gradients(ret, [x])
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(ret), 16.)
+      self.assertSequenceEqual(sess.run(grad), [32.])
+
+  def testMultipleLoopVarsBasic(self):
+    x = constant_op.constant(5.)
+    y = constant_op.constant(3.)
+
+    # x = 5.
+    # y = 3.
+    # while x < 45.:
+    #   x = x * y
+    ret = while_loop_v2(lambda v, _: v < 45., lambda v, w: (v * w, w), [x, y])
+    # ret = [x*y^2, y]
+
+    # Note: This is simply d_ret[0]/d_x since d_ret[1]/d_x is 0.
+    grad = gradients_impl.gradients(ret, [x])  # [2*x*y]
+    with self.test_session() as sess:
+      self.assertSequenceEqual(sess.run(ret), [45., 3.])
+      self.assertSequenceEqual(sess.run(grad), [9.])
+
+  def testMultipleLoopVars(self):
+    x = constant_op.constant(5.)
+    y = constant_op.constant(3.)
+
+    # x = 5.
+    # y = 3.
+    # while x < 45.:
+    #   x = x * y
+    #   y = x + y
+    ret = while_loop_v2(lambda v, _: v < 45., lambda v, w: (v * w, v + w),
+                        [x, y])
+    # ret = [y*x**2 + x*y**2, x*y + x + y]
+
+    gradx_0 = gradients_impl.gradients(ret[0], [x])  # [2*x*y + y**2]
+    gradx_1 = gradients_impl.gradients(ret[1], [x])  # [y + 1]
+    gradx_2 = gradients_impl.gradients(ret, [x])  # [2*x*y + y**2 + 2*y + 1]
+    grady_0 = gradients_impl.gradients(ret[0], [y])  # [2*x*y + x**2]
+    grady_1 = gradients_impl.gradients(ret[1], [y])  # [x + 1]
+    grady_2 = gradients_impl.gradients(ret, [y])  # [2*x*y + x**2 + x + 1]
+    with self.test_session() as sess:
+      self.assertSequenceEqual(sess.run(ret), [120., 23.])
+      self.assertSequenceEqual(sess.run(gradx_0), [39.])
+      self.assertSequenceEqual(sess.run(gradx_1), [4.])
+      self.assertSequenceEqual(sess.run(gradx_2), [43.])
+      self.assertSequenceEqual(sess.run(grady_0), [55.])
+      self.assertSequenceEqual(sess.run(grady_1), [6.])
+      self.assertSequenceEqual(sess.run(grady_2), [61.])
+
+  def testMultipleWhileLoops(self):
+    x = constant_op.constant(2.)
+    ret1 = while_loop_v2(lambda v: v < 4., lambda v: v * v, [x])  # x**2
+    ret2 = while_loop_v2(lambda v: v < 16., lambda v: v * v, ret1)  # x**4
+    grad = gradients_impl.gradients(ret2, [x])  # 4x**3
+    grad_grad = gradients_impl.gradients(grad, [x])  # 12x**2
+    with self.test_session() as sess:
+      self.assertSequenceEqual(sess.run(grad), [32.])
+      self.assertSequenceEqual(sess.run(grad_grad), [48.])
+
+  def testDoubleDerivative(self):
+    x = constant_op.constant(2.)
+    ret = while_loop_v2(lambda v: v < 8., lambda v: v**2, [x])  # x**4
+    grad = gradients_impl.gradients(ret, [x])  # 4x**3
+    grad_grad = gradients_impl.gradients(grad, [x])  # 12x**2
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(ret), 16.)
+      self.assertSequenceEqual(sess.run(grad), [32.])
+      self.assertSequenceEqual(sess.run(grad_grad), [48.])
+
+  def testPruning(self):
+    x = constant_op.constant(1)
+
+    tensor_list = list_ops.empty_tensor_list(
+        element_dtype=x.dtype, element_shape=x.shape)
+
+    def Cond(x, tl):
+      del tl  # Unused for Cond.
+      return x < 5
+
+    def Body(x, tl):
+      return x + 1, list_ops.tensor_list_push_back(tl, x)
+
+    outputs = while_loop_v1(Cond, Body, [x, tensor_list])
+
+    train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+    train_op.append(outputs[0])
+
+    def GetOptimizedGraph():
+      mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
+      rewriter_config = rewriter_config_pb2.RewriterConfig(
+          constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+          memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)
+      return tf_optimizer.OptimizeGraph(rewriter_config, mg)
+
+    g = GetOptimizedGraph()
+    self.assertEqual(len([n for n in g.node if n.op == "Enter"]), 1)
+
+    stack = list_ops.tensor_list_stack(outputs[1], element_dtype=x.dtype)
+    train_op.append(stack)
+    g = GetOptimizedGraph()
+    self.assertEqual(len([n for n in g.node if n.op == "Enter"]), 2)
+
+  def testCaptureExternalTensorInCond(self):
+    x = constant_op.constant(2.)
+    y = constant_op.constant(1.)
+    ret = while_loop_v2(lambda v: v + y < 9., lambda v: v * 3., [x])
+    grad = gradients_impl.gradients(ret, [x])
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(ret), 18.)
+      self.assertSequenceEqual(sess.run(grad), [9.])
+
+  def testCaptureExternalTensorInBody(self):
+    x = constant_op.constant(2.)
+    y = constant_op.constant(3.)
+    ret = while_loop_v2(lambda v: v < 8., lambda v: v * y, [x])
+    grad = gradients_impl.gradients(ret, [x])
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(ret), 18.)
+      self.assertSequenceEqual(sess.run(grad), [9.])
+
+  def testLoopWithTensorListPushBack(self):
+    x = constant_op.constant(2.)
+
+    tensor_list = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=ScalarShape())
+
+    def Cond(x, tl):
+      del tl  # Unused for Cond.
+      return x < 5.
+
+    def Body(x, tl):
+      tl = list_ops.tensor_list_push_back(tl, x)
+      tl = list_ops.tensor_list_push_back(tl, constant_op.constant(100.))
+      return x**2., tl
+
+    ret = while_loop_v2(Cond, Body, [x, tensor_list])
+    grad = gradients_impl.gradients(ret[0], x)
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(ret[0]), 16.)
+      self.assertSequenceEqual(sess.run(grad), [32.])
+
+  def testDuplicateAccumulator(self):
+    x = constant_op.constant(2.)
+
+    tensor_list = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=ScalarShape())
+
+    def Cond(x, tl):
+      del tl  # Unused for Cond.
+      return x < 5.
+
+    def Body(x, tl):
+      # There is an accumulator in the loop already so we should not add
+      # another.
+      tl = list_ops.tensor_list_push_back(tl, x)
+      return x**2., tl
+
+    ret = while_loop_v2(Cond, Body, [x, tensor_list])
+
+    for op in ops.get_default_graph().get_operations():
+      if op.type == "While":
+        while_op = op
+
+    body_graph = while_v2._get_body_graph(while_op)
+    # body_graph.inputs: [counter_arg, x_arg, tl_arg, *accumulators]
+    x_input_t = body_graph.inputs[1]
+    accumulator_count = len(
+        [c for c in x_input_t.consumers() if c.type == "TensorListPushBack"])
+    self.assertEqual(accumulator_count, 1)
+
+    grad = gradients_impl.gradients(ret[0], x)
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(ret[0]), 16.)
+      self.assertSequenceEqual(sess.run(grad), [32.])
+
+  @parameterized.named_parameters(
+      ("Unknown shape", None),
+      ("Partially defined shape", [None]),
+      ("Fully defined shape", [1, 2]),
+  )
+  def testTensorListOutputElementShape(self, shape):
+    self.skipTest("b/115982901")
+    x = constant_op.constant(2.)
+    y = array_ops.placeholder(dtype=dtypes.float32, shape=shape)
+    ret = while_loop_v2(lambda v, u: v < 8., lambda v, u: (v * v, u), [x, y])
+
+    # Get the TensorList output of While op containing the accumulated values
+    # of y.
+    while_op = ret[0].op
+    body_graph = while_v2._get_body_graph(while_op)
+    # body_graph.inputs: [counter_arg, x_arg, y_arg, *accumulators]
+    y_input_t = body_graph.inputs[2]
+    push_back_node = [c for c in y_input_t.consumers()
+                      if c.type == "TensorListPushBack"][0]
+    output_idx = body_graph.outputs.index(push_back_node.outputs[0])
+    output = while_op.outputs[output_idx]
+
+    _, val = list_ops.tensor_list_pop_back(output,
+                                           element_dtype=dtypes.float32)
+    self.assertEqual(val.shape, tensor_shape.TensorShape(shape))
+
+
+def ScalarShape():
+  return ops.convert_to_tensor([], dtype=dtypes.int32)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
new file mode 100644
index 0000000000..801217fe66
--- /dev/null
+++ b/tensorflow/python/ops/while_v2.py
@@ -0,0 +1,573 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""while_v2 and gradient.
+
+This is a version of while_loop that emits a single While op, as well as the
+gradient function for While ops produced by while_loop. This will eventually
+replace the current tf.while_loop implementation once it reaches feature and
+performance parity.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.core.framework import attr_value_pb2
+from tensorflow.python.eager import function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import function_def_to_graph
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import cond_v2_impl as cond_v2
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import gen_functional_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import list_ops
+from tensorflow.python.util import nest
+
+# pylint: disable=protected-access
+
+# TODO(b/79881896): Handle external control dependencies. tf.while_loop allows
+# control dependencies on external nodes with at least 1 output.
+# Another idea is to create const nodes outside the loop and add control edges
+# to them and then pass those in as data inputs. This should probably be
+# handled in the CapturingGraph itself.
+
+
+def while_loop(cond, body, loop_vars, name=None):
+  """Like tf.while_loop, except emits a single While op."""
+  if not name:
+    name = "while"
+
+  with ops.name_scope(name) as scope:
+    with ops.name_scope(None):
+      cond_name = _get_unique_name(("%scond" % scope).replace("/", "_"))
+      body_name = _get_unique_name(("%sbody" % scope).replace("/", "_"))
+
+    flattened_loop_vars = nest.flatten(loop_vars)
+    num_outputs = len(flattened_loop_vars)
+
+    # Add loop counter needed for computing gradients.
+    flattened_loop_vars = [constant_op.constant(0., name="loop_counter")
+                          ] + flattened_loop_vars
+
+    # Build a `cond` wrapper that can handle the extra counter loop_var.
+    def wrapped_cond(unused_loop_counter, *loop_vars):
+      return cond(*loop_vars)
+
+    cond_graph = function.func_graph_from_py_func(cond_name, wrapped_cond,
+                                                  flattened_loop_vars, {})
+
+    # Add external_captures of cond to the list of loop vars.
+    # Note that external tensors will be treated as loop invariants, i.e.,
+    # the value of that tensor in each iteration is the same as it was at the
+    # beginning of the loop execution.
+    flattened_loop_vars = flattened_loop_vars + cond_graph.external_captures
+
+    def wrapped_body(loop_counter, *args):
+      """Loop body augmented with counter update.
+
+      Args:
+        loop_counter: Loop counter which needs to be incremented in the body.
+        *args: List of args
+          args[:num_outputs] - Args for the original loop body.
+          args[num_outputs:] - External captures of cond. These get passed
+            through as is.
+
+      Returns:
+        A list of tensors the same length as args.
+      """
+      outputs = body(*args[:num_outputs])
+      if not isinstance(outputs, collections.Sequence):
+        outputs = [outputs]
+
+      # Return the external_captures of cond_graph as is, i.e., treat them as
+      # loop invariants.
+      # TODO(srbs): Update lowering code to create _Enter nodes with
+      # is_constant=True for inputs that are directly passed to outputs.
+      return [loop_counter + 1] + list(outputs) + list(args[num_outputs:])
+
+    body_graph = function.func_graph_from_py_func(body_name, wrapped_body,
+                                                  flattened_loop_vars, {})
+    # Add external captures of body to the list of loop vars.
+    # Note that external tensors will be treated as loop invariants, i.e.,
+    # the value of that tensor in each iteration is the same as it was at the
+    # beginning of the loop execution.
+    flattened_loop_vars = flattened_loop_vars + body_graph.external_captures
+    # TODO(srbs): Update lowering code to create _Enter nodes with
+    # is_constant=True for inputs that are directly passed to outputs.
+    body_graph.outputs.extend(body_graph.internal_captures)
+
+    # Capture `external_captures` of `body_graph` in `cond_graph` so that it
+    # expects to receive those as arguments.
+    # TODO(srbs): Dedup tensors that are captured in both the cond and body.
+    # This logic already exists in cond_v2.
+    with cond_graph.as_default():
+      for external_capture in body_graph.external_captures:
+        cond_graph.capture(external_capture)
+
+    # Export all tensors in the loop body that may be needed for gradient
+    # computation. We do this by accumulating the intermediate values in
+    # TensorLists.
+    intermediate_tensors = _get_intermediates(body_graph)
+
+    for intermediate_tensor in intermediate_tensors:
+      # TODO(srbs): Cache and re-use empty tensor lists.
+      tensor_list = list_ops.empty_tensor_list(
+          element_dtype=intermediate_tensor.dtype,
+          element_shape=_get_tensor_convertible_shape(
+              intermediate_tensor.shape))
+      flattened_loop_vars.append(tensor_list)
+      with cond_graph.as_default():
+        # Add a placeholder to cond_graph's inputs corresponding to the
+        # tensor_list.
+        cond_graph.capture(tensor_list)
+      with body_graph.as_default():
+        # Push the intermediate tensor to the tensor list. This captures the
+        # `tensor_list` as well.
+        appended_tensor_list = list_ops.tensor_list_push_back(
+            tensor_list,
+            intermediate_tensor)
+        # Add this modified tensor list to the list of outputs.
+        body_graph.outputs.append(appended_tensor_list)
+
+    outputs = gen_functional_ops._while(
+        flattened_loop_vars,
+        cond_v2._create_new_tf_function(cond_graph),
+        cond_v2._create_new_tf_function(body_graph),
+        name=scope)
+
+    _maybe_set_lowering_attr(outputs[0].op)
+
+  # First var is loop counter.
+  if num_outputs == 1:
+    return outputs[1]
+  else:
+    return nest.pack_sequence_as(loop_vars, outputs[1:1 + num_outputs])
+
+
+@ops.RegisterGradient("While")
+def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
+  """The gradient of a While op produced by while_loop."""
+  body_graph = _get_body_graph(op)
+
+  # Replace None gradients with zeros. This is needed because `grads` could have
+  # None incoming gradients for the TensorLists. If we pass None's through, the
+  # custom gradient of TensorListPopBack will create an EmptyTensorList inside
+  # the FuncGraph which is undesirable.
+  # TODO(b/80444525): There might be an issue with treating no gradient as zero
+  # gradient in certain cases. Consider replacing None gradients with Zeros
+  # for accumulators only.
+  grads = [
+      g if g is not None else array_ops.zeros_like(output)
+      for g, output in zip(grads, op.outputs)
+  ]
+
+  body_grad_graph, args = _create_grad_func(
+      body_graph, grads,
+      _get_unique_name("%s_grad" % body_graph.name), op)
+
+  intermediate_tensors = _get_intermediates(body_grad_graph)
+
+  for intermediate_tensor in intermediate_tensors:
+    tensor_list = list_ops.empty_tensor_list(
+        element_dtype=intermediate_tensor.dtype,
+        element_shape=_get_tensor_convertible_shape(intermediate_tensor.shape))
+    with body_grad_graph.as_default():
+      tensor_list_ph = body_grad_graph.capture(tensor_list, whitelisted=True)
+      # Push the intermediate tensor to the tensor list.
+      appended_tensor_list = list_ops.tensor_list_push_back(tensor_list_ph,
+                                                            intermediate_tensor)
+      # Add this modified tensor list to the list of outputs.
+      body_grad_graph.outputs.append(appended_tensor_list)
+
+  def grad_cond(counter, max_iters, *unused_args):
+    return counter < max_iters
+
+  loop_vars = args + body_grad_graph.external_captures
+  cond_grad_graph = function.func_graph_from_py_func(
+      _get_unique_name("%s_grad_cond" % op.name),
+      grad_cond, loop_vars, {})
+
+  assert len(loop_vars) == len(body_grad_graph.inputs)
+  assert len(loop_vars) == len(body_grad_graph.outputs)
+  assert len(loop_vars) == len(cond_grad_graph.inputs)
+
+  outputs = gen_functional_ops._while(
+      loop_vars,
+      cond_v2._create_new_tf_function(cond_grad_graph),
+      cond_v2._create_new_tf_function(body_grad_graph),
+      name=_get_unique_name("%s_grad" % op.name))
+
+  _maybe_set_lowering_attr(outputs[0].op)
+
+  # outputs[0] is the loop counter.
+  # outputs[1] is the total number of loop iterations.
+  return outputs[2:2 + len(op.inputs)]
+
+
+# TODO(srbs): Pull this into common utils for cond_v2 and while_v2.
+def _get_body_graph(while_op):
+  """Returns `FuncGraph` for the while body.
+
+  Args:
+    while_op: The While Operation.
+
+  Returns:
+    `FuncGraph` for the while body.
+  """
+  extra_inputs = list(while_op.inputs)
+  input_shapes = [t.shape for t in extra_inputs]
+  func_name = while_op.get_attr("body").name
+  fdef = while_op.graph._get_function(func_name).definition
+  func_graph = function_def_to_graph.function_def_to_graph(fdef, input_shapes)
+  func_graph._while = while_op
+  return func_graph
+
+
+def _create_grad_func(func_graph, grads, name, while_op):
+  """Builds and returns the gradient FuncGraph of `func_graph` and its args.
+
+  The returned grad_func_graph must be called with the returned
+  args + grad_func_graph.captures.
+
+  Args:
+    func_graph: FuncGraph for the forward body function.
+    grads: The incoming grads for `func_graph`'s outputs.
+    name: Name of the returned gradient function.
+    while_op: The forward While op.
+
+  Returns:
+    2-tuple of (grad_func_graph, args).
+  """
+  assert len(func_graph.outputs) == len(grads)
+
+  loop_counter = constant_op.constant(0.)
+  # TODO(srbs): For nested while loops will need to lookup this value from
+  # the accumulator of the enclosing while loop. For now use as is assuming
+  # there is no nesting.
+  num_iters_t = while_op.outputs[0]
+
+  args = [loop_counter, num_iters_t] + grads
+
+  # Note: The returned function does not have `args` in the list of
+  # `external_captures`.
+  grad_func_graph = function.func_graph_from_py_func(
+      name,
+      lambda *args: _grad_fn(func_graph, args),
+      args, {},
+      func_graph=_WhileBodyGradFuncGraph(name, func_graph))
+
+  # Add the popped accumulators to the list of outputs.
+  for internal_capture in grad_func_graph.internal_captures:
+    grad_func_graph.outputs.append(
+        grad_func_graph.popped_tensor_lists[internal_capture])
+
+  return grad_func_graph, args
+
+
+def _grad_fn(func_graph, args):
+  """Computes the gradient of `func_graph` in the current graph.
+
+  This function builds the gradient graph of the corresponding forward-pass
+  `func_graph` by differentiating `func_graph`'s outputs w.r.t. its inputs.
+
+  Args:
+    func_graph: function.FuncGraph. The corresponding forward-pass function.
+    args: The input arguments. args[0] - Loop counter args[1] - Total number of
+      iterations.
+      args[2:] - Incoming gradients for `func_graph.outputs`.
+
+  Returns:
+    The output gradient Tensors.
+  """
+  xs = func_graph.inputs
+  ys = func_graph.outputs
+  grad_ys = args[2:]
+
+  # Build the gradient graph. Note that this builds the gradient computation of
+  # func_graph in the current graph, which requires capturing tensors from
+  # func_graph. The captured func_graph tensors are resolved to external tensors
+  # in _resolve_grad_inputs.
+  # TODO(srbs): Mark GradientsHelper as public?
+  grad_outs = gradients_impl._GradientsHelper(
+      ys, xs, grad_ys=grad_ys, src_graph=func_graph)
+
+  assert all([g is not None for g in grad_outs])
+  counter = args[0]
+  total_iters = args[1]
+  return [counter + 1, total_iters] + grad_outs
+
+
+def _get_intermediates(func_graph):
+  """Returns all tensors in `func_graph` that should be accumulated."""
+  # We currently accumulate output tensors of most ops in the function and rely
+  # on the pruning pass to get rid of the unused accumulators at runtime.
+  # However, this can bloat the GraphDef and make debugging harder so we perform
+  # some optimizations.
+  #
+  # Optimization we currently perform:
+  # 1. We do not accumulate tensors which already have an accumulator
+  #    in the loop body.
+  # 2. We do not accumulate outputs of Identity nodes. When building the
+  #    FuncGraph, we add an Identity node for each output (see
+  #    `AutomaticControlDependencies.mark_as_return`). Accumulating outputs
+  #    of all these nodes bloats the GraphDef quite a bit so we remove those.
+  #    Since the gradient of an Identity node does not rely on its forward op's
+  #    input this is safe to do.
+  #
+  # Other possible optimizations:
+  # 1. Only accumulate tensors that will be required by the backward pass.
+  #    This will require running the gradient pass and hence would increase the
+  #    graph building time for the forward pass.
+  # 2. Do not accumulate Const nodes created inside the loop body.
+  # 3. Do not accumulate inputs that are passed as-is, e.g. loop invariants.
+  # TODO(srbs): 2 and 3 may be hard optimizations for the runtime optimizer
+  # since it requires knowledge of the while loop semantics. If so, consider
+  # doing those here.
+  intermediates = []
+
+  for op in func_graph.get_operations():
+    if op.type == "Identity":
+      continue
+    for o in op.outputs:
+      if (o != func_graph.inputs[0] and  # Loop counter.
+          _get_accumulator(o) is None):  # Has existing accumulator.
+        intermediates.append(o)
+  return intermediates
+
+
+def _get_accumulator(tensor):
+  r"""Returns TensorList if any containing accumulated values of tensor.
+
+  We try to find a pattern of the form:
+
+     input_tl   tensor
+        \        /
+    (TensorListPushBack)
+            |
+        output_tl
+
+  which satisfies the following conditions:
+
+  1. input_tl must be in tensor.graph.inputs.
+  2. output_tl or Identity(output_tl) must be in tensor.graph.outputs.
+  3. tensor.graph.input_index(input_tl) == tensor.graph.output_index(output_t).
+
+  output_tl or Identity(output_tl) (whichever is in tensor.graph.outputs) is
+  returned if such a pattern is found else None is returned.
+
+  Args:
+    tensor: The Tensor to be accumulated.
+
+  Returns:
+    A variant tensor in the same graph as `tensor` or None if no accumulator is
+    found.
+  """
+  assert isinstance(tensor.graph, function.FuncGraph)
+
+  def get_func_graph_output(t):
+    """Returns t or Identity(t) whichever exists in graph outputs else None."""
+    if t in tensor.graph.outputs:
+      return t
+    # tf.defun adds an Identity for each output, check whether that is the case.
+    identity_op = t.consumers()[0]
+    if (identity_op.type == "Identity" and
+        identity_op.outputs[0] in tensor.graph.outputs):
+      return identity_op.outputs[0]
+    return None
+
+  for consumer in tensor.consumers():
+    # Find the consumer that is a TensorListPushBack node whose TensorList input
+    # is in the list of function inputs.
+    if (consumer.type != "TensorListPushBack" or
+        consumer.inputs[0] not in tensor.graph.inputs):
+      continue
+
+    output = get_func_graph_output(consumer.outputs[0])
+    if output is None:
+      # The TensorList output of `consumer` is not in the list of function
+      # outputs.
+      continue
+
+    accum_input_idx = tensor.graph.inputs.index(consumer.inputs[0])
+    accum_output_idx = tensor.graph.outputs.index(output)
+    if accum_input_idx == accum_output_idx:
+      return output
+  return None
+
+
+# TODO(srbs): Add to common utils for cond_v2 and while_v2.
+def _get_unique_name(name):
+  """Returns a name that is unique in the root graph of `func_graph`.
+
+  Args:
+    name: String to uniquify.
+
+  Returns:
+    A string.
+  """
+  with ops.init_scope():
+    return ops.get_default_graph().unique_name(name)
+
+
+class _WhileBodyGradFuncGraph(function.FuncGraph):
+  """FuncGraph for the gradient function of the body of a While op.
+
+  Contains the logic for capturing the tensors from the body of the forward
+  While op which is as follows:
+  1. Find the accumulator for that tensor.
+  2. Capture the forward While op output tensor corresponding to the
+     accumulator in this FuncGraph.
+  3. Pop a value from the captured placeholder and use it as the captured value
+     for the forward pass tensor.
+
+  This only allows capturing tensors in the forward graph. A ValueError is
+  raised if an attempt is made to capture a tensor not in the forward graph.
+  To manually capture capture a tensor that is not in the forward graph, call
+  `capture` with `whitelisted=True`.
+
+  Note: The `captures` dict does not contain the forward tensor since it is not
+  directly captured. It contains the accumulator corresponding to this forward
+  tensor.
+
+  Attributes:
+    popped_tensor_lists: Dict from the captured accumulator placeholder to the
+      TensorList obtained after popping the intermediate tensor from it. The
+      values of this dict need to be added to the list of outputs.
+  """
+
+  def __init__(self, name, forward_graph):
+    super(_WhileBodyGradFuncGraph, self).__init__(name)
+    self.popped_tensor_lists = {}
+    # FuncGraph for the body of the forward While op.
+    self._forward_graph = forward_graph
+    # Dict from forward intermediate tensor to the corresponding "popped" tensor
+    # in this graph.
+    self._indirect_captures = {}
+    # Dict from forward graph tensor to the While op output corresponding to its
+    # accumulator.
+    self._tensor_to_accumulator = {}
+
+  def capture(self, tensor, name=None, whitelisted=False):
+    """Selectively captures external tensors.
+
+    If `whitelisted` is False only allows capturing tensors in the
+    `_forward_graph`.
+
+    Args:
+      tensor: Tensor. May be from this FuncGraph or a different graph.
+      name: Optional name if a placeholder is created.
+      whitelisted: If False (default), only allows capturing tensors from the
+        forward graph.
+
+    Returns:
+      The placeholder in this graph for the tensor.
+
+    Raises:
+      ValueError: If attempting to capture an external tensor not in the forward
+        graph with `whitelisted` set to False.
+    """
+    if (not whitelisted and tensor.graph is not self and
+        tensor.graph != self._forward_graph):
+      raise ValueError("Attempting to capture tensor", str(tensor),
+                       " which is not in the forward graph but in ",
+                       _graph_name(tensor.graph), ".")
+    return super(_WhileBodyGradFuncGraph, self).capture(tensor, name)
+
+  def _capture_helper(self, tensor, name):
+    if tensor.graph is not self._forward_graph:
+      return super(_WhileBodyGradFuncGraph, self)._capture_helper(tensor, name)
+
+    captured_tensor = self._indirect_captures.get(tensor)
+    if captured_tensor is not None:
+      # For GradientTape housekeeping.
+      assert self._tensor_to_accumulator[tensor] in self.captures
+      super(_WhileBodyGradFuncGraph, self)._capture_helper(
+          self._tensor_to_accumulator[tensor], name)
+      return captured_tensor
+
+    assert tensor not in self._tensor_to_accumulator
+
+    accumulator = None
+
+    # Find the TensorList that was used to accumulate the tensors of this
+    # intermediate tensor.
+    accumulator = _get_accumulator(tensor)
+    if accumulator is None:
+      raise ValueError("Reference to un-accumulated intermediate tensor: ",
+                       tensor.name)
+    assert accumulator.graph == self._forward_graph
+    # Get the While op output corresponding to the accumulator.
+    accumulator = self._forward_graph._while.outputs[self._forward_graph.outputs
+                                                     .index(accumulator)]
+
+    assert accumulator.graph == self._forward_graph.outer_graph
+    self._tensor_to_accumulator[tensor] = accumulator
+
+    # Capture the `accumulator`.
+    accumulator_ph = super(_WhileBodyGradFuncGraph, self)._capture_helper(
+        accumulator, name)
+    new_tensor_list, captured_tensor = list_ops.tensor_list_pop_back(
+        accumulator_ph, element_dtype=tensor.dtype)
+    self._indirect_captures[tensor] = captured_tensor
+    self.popped_tensor_lists[accumulator_ph] = new_tensor_list
+    return captured_tensor
+
+
+# TODO(srbs): Move to common utils for cond_v2 and while_v2.
+def _maybe_set_lowering_attr(op):
+  """Sets the flag to enable lowering on the `While` op if necessary.
+
+  Lowering allows while_v2 to avoid some of the limitations of Functions,
+  allowing users to specify devices & colocation inside of while_v2
+  branches, and enabling non-strict evaluation & partial pruning of while_v2
+  branches. This brings while_v2 closer to feature parity with
+  tf.while_loop.
+
+  However, we do not lower `While` in the XLA context because it is easier
+  for XLA to apply its own optimizations when dealing with un-lowered
+  `While` operators than with low-level control flow primitives.
+
+  Args:
+    op: The While op.
+  """
+  if not control_flow_util.IsInXLAContext(op):
+    # pylint: disable=protected-access
+    op._set_attr("_lower_using_switch_merge", attr_value_pb2.AttrValue(b=True))
+    # pylint: enable=protected-access
+
+
+def _get_tensor_convertible_shape(shape):
+  assert isinstance(shape, tensor_shape.TensorShape)
+  if shape.is_fully_defined():
+    return shape
+  if not shape:  # Unknown shape.
+    return -1
+  # Partially defined shape.
+  shape_list = shape.as_list()
+  shape_list = [s if s is not None else -1 for s in shape_list]
+  return ops.convert_to_tensor(shape_list)
+
+
+def _graph_name(graph):
+  if isinstance(graph, function.FuncGraph):
+    return graph.name
+  return "Base"
+
+
+# pylint: enable=protected-access
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 31a3712de8..f86cb03995 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -114,6 +114,7 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python/tools:tools_pip",
     "//tensorflow/python/tools/api/generator:create_python_api",
     "//tensorflow/python:test_ops",
+    "//tensorflow/python:while_v2",
     "//tensorflow/tools/dist_test/server:grpc_tensorflow_server",
 ]
 
-- 
GitLab


From 50e7f03591a5d2b6b2abc29e5549ea0077259706 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 18 Sep 2018 20:43:46 -0700
Subject: [PATCH 0357/1357] Putting `NodeExecStatsWrapper` behind an interface
 and providing a light-weight statistics collector for tf.data performance
 modeling.

PiperOrigin-RevId: 213566889
---
 tensorflow/core/common_runtime/executor.cc    |  56 +++---
 .../common_runtime/step_stats_collector.cc    | 182 ++++++++++++------
 .../common_runtime/step_stats_collector.h     | 137 ++++++-------
 .../core/kernels/data/captured_function.cc    | 124 +++++++++---
 4 files changed, 304 insertions(+), 195 deletions(-)

diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 84865397bc..d0a0767d6b 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -76,56 +76,47 @@ bool IsInitializationOp(const Node* node) {
 namespace nodestats {
 inline int64 NowInNsec() { return Env::Default()->NowNanos(); }
 
-void SetScheduled(NodeExecStatsWrapper* stats, int64 micros) {
+void SetScheduled(NodeExecStatsInterface* stats, int64 micros) {
   if (!stats) return;
   stats->SetScheduled(micros * EnvTime::kMicrosToNanos);
 }
 
-void SetAllStart(NodeExecStatsWrapper* stats) {
+void SetAllStart(NodeExecStatsInterface* stats) {
   if (!stats) return;
   stats->RecordExecutorStarted();
 }
 
-void SetOpStart(NodeExecStatsWrapper* stats) {
+void SetOpStart(NodeExecStatsInterface* stats) {
   if (!stats) return;
   stats->RecordComputeStarted();
 }
 
-void SetOpEnd(NodeExecStatsWrapper* stats) {
+void SetOpEnd(NodeExecStatsInterface* stats) {
   if (!stats) return;
   stats->RecordComputeEnded();
 }
 
-void SetAllEnd(NodeExecStatsWrapper* stats) {
+void SetAllEnd(NodeExecStatsInterface* stats) {
   if (!stats) return;
   stats->RecordExecutorEnded();
 }
 
-void SetOutput(NodeExecStatsWrapper* stats, int slot, const Tensor* v) {
+void SetOutput(NodeExecStatsInterface* stats, int slot, const Tensor* v) {
   if (!stats) return;
   stats->SetOutput(slot, v);
 }
 
-void SetMemory(NodeExecStatsWrapper* stats, OpKernelContext* ctx) {
+void SetMemory(NodeExecStatsInterface* stats, OpKernelContext* ctx) {
   if (!stats) return;
   stats->SetMemory(ctx);
 }
 
-void SetReferencedTensors(NodeExecStatsWrapper* stats,
+void SetReferencedTensors(NodeExecStatsInterface* stats,
                           const TensorReferenceVector& tensors) {
   if (!stats) return;
   stats->SetReferencedTensors(tensors);
 }
 
-// Sets the timeline_label field of *stats, using data from *node.
-// Returns true iff the node is a transfer node.
-bool SetTimelineLabel(const Node* node, NodeExecStatsWrapper* stats) {
-  if (!stats) {
-    return false;
-  }
-  return stats->SetTimelineLabel(node);
-}
-
 }  // namespace nodestats
 
 class ExecutorImpl;
@@ -1301,7 +1292,7 @@ class ExecutorState {
 
   // After item->kernel computation is done, processes its outputs.
   Status ProcessOutputs(const NodeItem& item, OpKernelContext* ctx,
-                        EntryVector* outputs, NodeExecStatsWrapper* stats);
+                        EntryVector* outputs, NodeExecStatsInterface* stats);
 
   // After processing the outputs, propagates the outputs to their dsts.
   // Contents of *outputs are left in an indeterminate state after
@@ -1312,7 +1303,7 @@ class ExecutorState {
   // "node" just finishes. Takes ownership of "stats". Returns true if
   // execution has completed.
   bool NodeDone(const Status& s, const Node* node, const TaggedNodeSeq& ready,
-                NodeExecStatsWrapper* stats,
+                NodeExecStatsInterface* stats,
                 TaggedNodeReadyQueue* inline_ready);
 
   // Schedule all the expensive nodes in 'ready', and put all the inexpensive
@@ -1513,7 +1504,7 @@ void ExecutorState::RunAsync(Executor::DoneCallback done) {
 struct ExecutorState::AsyncState {
   AsyncState(const OpKernelContext::Params& p, const TaggedNode& _tagged_node,
              const NodeItem* _item, Entry* _first_input,
-             NodeExecStatsWrapper* _stats)
+             NodeExecStatsInterface* _stats)
       : saved_inputs(*p.inputs),
         saved_input_device_contexts(*p.input_device_contexts),
         saved_input_alloc_attrs(*p.input_alloc_attrs),
@@ -1538,7 +1529,7 @@ struct ExecutorState::AsyncState {
   const NodeItem* item;
   Entry* first_input;
   OpKernelContext ctx;
-  NodeExecStatsWrapper* stats;
+  NodeExecStatsInterface* stats;
 
  private:
   OpKernelContext::Params* ParamsButClearingEigenGPUDevice(
@@ -1583,7 +1574,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
   params.stats_collector = stats_collector_;
 
   Status s;
-  NodeExecStatsWrapper* stats = nullptr;
+  NodeExecStatsInterface* stats = nullptr;
   EntryVector outputs;
   bool completed = false;
   inline_ready.push_back(tagged_node);
@@ -1613,7 +1604,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
     if (stats_collector_ && !tagged_node.is_dead) {
       // track allocations if and only if we are collecting statistics
       params.track_allocations = true;
-      stats = new NodeExecStatsWrapper(node->name());
+      stats = stats_collector_->CreateNodeExecStats(node);
       nodestats::SetScheduled(stats, scheduled_nsec);
       nodestats::SetAllStart(stats);
     }
@@ -1671,7 +1662,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
 
         auto done = [this, state]() {
           Device* device = impl_->params_.device;
-          NodeExecStatsWrapper* stats = state->stats;  // Shorthand
+          NodeExecStatsInterface* stats = state->stats;  // Shorthand
           Entry* first_input = state->first_input;     // Shorthand
 
           nodestats::SetOpEnd(stats);
@@ -1862,7 +1853,7 @@ Status ExecutorState::PrepareInputs(const NodeItem& item, Entry* first_input,
 
 Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx,
                                      EntryVector* outputs,
-                                     NodeExecStatsWrapper* stats) {
+                                     NodeExecStatsInterface* stats) {
   const Node* node = item.node;
   DCHECK_EQ(0, outputs->size());
   outputs->resize(item.num_outputs);
@@ -2080,16 +2071,15 @@ void ExecutorState::PropagateOutputs(const TaggedNode& tagged_node,
 
 bool ExecutorState::NodeDone(const Status& s, const Node* node,
                              const TaggedNodeSeq& ready,
-                             NodeExecStatsWrapper* stats,
+                             NodeExecStatsInterface* stats,
                              TaggedNodeReadyQueue* inline_ready) {
   nodestats::SetAllEnd(stats);
-  if (stats_collector_ != nullptr &&
-      !nodestats::SetTimelineLabel(node, stats)) {
-    // Only record non-transfer nodes.
-    // Transfers 'stats' ownership to 'stats_collector_'.
-    stats_collector_->Save(impl_->params_.device->name(), stats);
-  } else if (stats) {
-    delete stats;
+  if (stats) {
+    if (stats_collector_) {
+      stats->Done(impl_->params_.device->name());
+    } else {
+      delete stats;
+    }
   }
 
   bool abort_run = false;
diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc
index 836cb8ed14..a70ab93d4a 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.cc
+++ b/tensorflow/core/common_runtime/step_stats_collector.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace {
@@ -40,46 +41,24 @@ struct AllocStats {
 };
 }  // namespace
 
-NodeExecStatsWrapper::NodeExecStatsWrapper(const string& node_name)
-    : NodeExecStatsWrapper(new NodeExecStats) {
-  stats_->set_node_name(node_name);
-}
-NodeExecStatsWrapper::NodeExecStatsWrapper(NodeExecStats* stats)
-    : stats_(stats) {}
-
-void NodeExecStatsWrapper::SetOutput(int slot, const Tensor* v) {
-  DCHECK(v);
-  NodeOutput* no = stats_->add_output();
-  no->set_slot(slot);
-  v->FillDescription(no->mutable_tensor_description());
-}
-
-void NodeExecStatsWrapper::SetMemory(OpKernelContext* ctx) {
-  for (const auto& allocator_pair : ctx->wrapped_allocators()) {
-    AddAllocation(allocator_pair.first, allocator_pair.second);
-  }
-  auto* ms = stats_->mutable_memory_stats();
-  ms->set_temp_memory_size(ctx->temp_memory_allocated());
-  for (const auto& alloc_id : ctx->persistent_alloc_ids()) {
-    ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id);
-  }
-  ms->set_persistent_memory_size(ctx->persistent_memory_allocated());
+NodeExecStatsWrapper::NodeExecStatsWrapper(
+    const Node* node, StepStatsCollector* step_stats_collector)
+    : NodeExecStatsWrapper(MakeUnique<NodeExecStats>(), node,
+                           step_stats_collector) {
+  stats_->set_node_name(node->name());
 }
 
-void NodeExecStatsWrapper::SetReferencedTensors(
-    const TensorReferenceVector& tensors) {
-  // be careful not to increment the reference count on any tensor
-  // while recording the information
-  for (size_t i = 0; i < tensors.size(); ++i) {
-    AllocationDescription* description = stats_->add_referenced_tensor();
-    tensors.at(i).FillDescription(description);
-  }
-}
-
-// TODO(tucker): merge with the DetailText function in session.cc
-// in a common location.
-bool NodeExecStatsWrapper::SetTimelineLabel(const Node* node) {
-  bool is_transfer_node = false;
+NodeExecStatsWrapper::NodeExecStatsWrapper(
+    std::unique_ptr<NodeExecStats> stats, const Node* node,
+    StepStatsCollector* step_stats_collector)
+    : stats_(std::move(stats)),
+      node_(node),
+      step_stats_collector_(step_stats_collector) {}
+
+void NodeExecStatsWrapper::Done(const string& device) {
+  // TODO(tucker): merge with the DetailText function in session.cc in a common
+  // location.
+  DCHECK(node_);
   string memory;
   for (auto& all : stats_->memory()) {
     int64 tot = all.total_bytes();
@@ -96,31 +75,96 @@ bool NodeExecStatsWrapper::SetTimelineLabel(const Node* node) {
       }
     }
   }
-  const AttrSlice attrs = node->attrs();
+  const AttrSlice attrs = node_->attrs();
   string text;
-  if (IsSend(node)) {
+  if (IsSend(node_)) {
     string tensor_name;
     TF_CHECK_OK(GetNodeAttr(attrs, "tensor_name", &tensor_name));
     string recv_device;
     TF_CHECK_OK(GetNodeAttr(attrs, "recv_device", &recv_device));
-    text = strings::StrCat(memory, node->name(), " = ", node->type_string(),
+    text = strings::StrCat(memory, node_->name(), " = ", node_->type_string(),
                            "(", tensor_name, " @", recv_device);
-    is_transfer_node = true;
-  } else if (IsRecv(node)) {
+  } else if (IsRecv(node_)) {
     string tensor_name;
     TF_CHECK_OK(GetNodeAttr(attrs, "tensor_name", &tensor_name));
     string send_device;
     TF_CHECK_OK(GetNodeAttr(attrs, "send_device", &send_device));
-    text = strings::StrCat(memory, node->name(), " = ", node->type_string(),
+    text = strings::StrCat(memory, node_->name(), " = ", node_->type_string(),
                            "(", tensor_name, " @", send_device);
-    is_transfer_node = true;
   } else {
     text =
-        strings::StrCat(memory, node->name(), " = ", node->type_string(), "(",
-                        str_util::Join(node->requested_inputs(), ", "), ")");
+        strings::StrCat(memory, node_->name(), " = ", node_->type_string(), "(",
+                        str_util::Join(node_->requested_inputs(), ", "), ")");
   }
   stats_->set_timeline_label(text);
-  return is_transfer_node;
+  step_stats_collector_->Save(device, this);
+}
+
+void NodeExecStatsWrapper::RecordExecutorStarted() {
+  int64 now_nanos = Env::Default()->NowNanos();
+  stats_->set_all_start_micros(now_nanos / EnvTime::kMicrosToNanos);
+  stats_->set_all_start_nanos(now_nanos);
+}
+
+void NodeExecStatsWrapper::RecordComputeStarted() {
+  int64 now_nanos = Env::Default()->NowNanos();
+  DCHECK_NE(stats_->all_start_micros(), 0);
+  DCHECK_NE(stats_->all_start_nanos(), 0);
+  stats_->set_op_start_rel_micros(now_nanos / EnvTime::kMicrosToNanos -
+                                  stats_->all_start_micros());
+  stats_->set_op_start_rel_nanos(now_nanos - stats_->all_start_nanos());
+}
+
+void NodeExecStatsWrapper::RecordComputeEnded() {
+  int64 now_nanos = Env::Default()->NowNanos();
+  DCHECK_NE(stats_->all_start_micros(), 0);
+  DCHECK_NE(stats_->all_start_nanos(), 0);
+  stats_->set_op_end_rel_micros(now_nanos / EnvTime::kMicrosToNanos -
+                                stats_->all_start_micros());
+  stats_->set_op_end_rel_nanos(now_nanos - stats_->all_start_nanos());
+}
+
+void NodeExecStatsWrapper::RecordExecutorEnded() {
+  int64 now_nanos = Env::Default()->NowNanos();
+  DCHECK_NE(stats_->all_start_micros(), 0);
+  DCHECK_NE(stats_->all_start_nanos(), 0);
+  stats_->set_all_end_rel_micros(now_nanos / EnvTime::kMicrosToNanos -
+                                 stats_->all_start_micros());
+  stats_->set_all_end_rel_nanos(now_nanos - stats_->all_start_nanos());
+}
+
+void NodeExecStatsWrapper::SetScheduled(int64 nanos) {
+  stats_->set_scheduled_micros(nanos / EnvTime::kMicrosToNanos);
+  stats_->set_scheduled_nanos(nanos);
+}
+
+void NodeExecStatsWrapper::SetMemory(OpKernelContext* ctx) {
+  for (const auto& allocator_pair : ctx->wrapped_allocators()) {
+    AddAllocation(allocator_pair.first, allocator_pair.second);
+  }
+  auto* ms = stats_->mutable_memory_stats();
+  ms->set_temp_memory_size(ctx->temp_memory_allocated());
+  for (const auto& alloc_id : ctx->persistent_alloc_ids()) {
+    ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id);
+  }
+  ms->set_persistent_memory_size(ctx->persistent_memory_allocated());
+}
+
+void NodeExecStatsWrapper::SetOutput(int slot, const Tensor* tensor) {
+  DCHECK(tensor);
+  NodeOutput* node_output = stats_->add_output();
+  node_output->set_slot(slot);
+  tensor->FillDescription(node_output->mutable_tensor_description());
+}
+
+void NodeExecStatsWrapper::SetReferencedTensors(
+    const TensorReferenceVector& tensors) {
+  // be careful not to increment the reference count on any tensor
+  // while recording the information
+  for (size_t i = 0; i < tensors.size(); ++i) {
+    AllocationDescription* description = stats_->add_referenced_tensor();
+    tensors.at(i).FillDescription(description);
+  }
 }
 
 void NodeExecStatsWrapper::AddAllocation(
@@ -150,8 +194,8 @@ void NodeExecStatsWrapper::Finalize() {
   allocations_.clear();
 }
 
-StepStatsCollector::StepStatsCollector(StepStats* ss)
-    : finalized_(false), step_stats_(ss) {}
+StepStatsCollector::StepStatsCollector(StepStats* step_stats)
+    : finalized_(false), step_stats_(step_stats) {}
 
 static int ExtractGpuWithStreamAll(string device_name) {
   // Check if the device name matches the ".*gpu:(\\d+)/stream:all$" regexp,
@@ -338,28 +382,40 @@ void StepStatsCollector::BuildCostModel(
   }
 }
 
-void StepStatsCollector::Save(const string& device, NodeExecStats* nt) {
-  Save(device, new NodeExecStatsWrapper(nt));
+void StepStatsCollector::Save(const string& device,
+                              NodeExecStats* node_stats_pb) {
+  Save(device,
+       new NodeExecStatsWrapper(std::unique_ptr<NodeExecStats>(node_stats_pb),
+                                nullptr, this));
 }
 
 void StepStatsCollector::Save(const string& device,
-                              NodeExecStatsWrapper* stats) {
-  if (!stats) return;
-  VLOG(1) << "Save dev " << device << " nt " << stats->stats();
+                              NodeExecStatsWrapper* node_stats) {
+  if (!node_stats) return;
+  VLOG(1) << "Save dev " << device << " node stats " << node_stats->stats();
   {
     mutex_lock l(mu_);
     if (finalized_) {
       LOG(WARNING) << "stats saved after finalize will not be collected.";
     }
-    if (!step_stats_ || collectedNodes >= kMaxCollectedNodes) {
+    if (!step_stats_ || collected_nodes_ >= kMaxCollectedNodes) {
       VLOG(1) << "step_stats_ nullptr or already collected too many nodes.";
-      delete stats;
+      delete node_stats;
       return;
     }
-    auto& dss = dev_stats_[device];
-    dss.push_back(std::unique_ptr<NodeExecStatsWrapper>(stats));
-    collectedNodes++;
+    auto& device_stats = dev_stats_[device];
+    device_stats.push_back(std::unique_ptr<NodeExecStatsWrapper>(node_stats));
+    collected_nodes_++;
+  }
+}
+
+NodeExecStatsInterface* StepStatsCollector::CreateNodeExecStats(
+    const Node* node) {
+  // Only collect statistics for non-transfer nodes.
+  if (IsSend(node) || IsRecv(node)) {
+    return nullptr;
   }
+  return new NodeExecStatsWrapper(node, this);
 }
 
 string StepStatsCollector::ReportAllocsOnResourceExhausted(const string& err) {
@@ -446,12 +502,12 @@ void StepStatsCollector::Finalize() {
   FinalizeInternal();
 }
 
-void StepStatsCollector::FinalizeAndSwap(StepStats* ss) {
+void StepStatsCollector::FinalizeAndSwap(StepStats* step_stats) {
   mutex_lock l(mu_);
   CHECK(step_stats_);
   FinalizeInternal();
-  ss->Swap(step_stats_);
-  collectedNodes = 0;
+  step_stats->Swap(step_stats_);
+  collected_nodes_ = 0;
 }
 
 void StepStatsCollector::FinalizeInternal() {
diff --git a/tensorflow/core/common_runtime/step_stats_collector.h b/tensorflow/core/common_runtime/step_stats_collector.h
index 7206fbf427..4365b11b19 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.h
+++ b/tensorflow/core/common_runtime/step_stats_collector.h
@@ -36,81 +36,78 @@ class Node;
 class NodeExecStats;
 class OpKernelContext;
 class StepStats;
+class StepStatsCollector;
 class Tensor;
 class TrackingAllocator;
 
-// Wraps NodeExecStats and adds allocation to it.
-class NodeExecStatsWrapper {
+// Statistics collection interface for individual node execution.
+//
+// See `NodeExecStatsWrapper` for a concrete implementation of this interface
+// that interfaces with the `Session` layer.
+class NodeExecStatsInterface {
  public:
-  NodeExecStatsWrapper(const string& node_name);
-  // Owns 'stats'.
-  NodeExecStatsWrapper(NodeExecStats* stats);
+  virtual ~NodeExecStatsInterface() {}
 
-  // Destructor calls Finalize() to release the TrackingAllocators.
-  ~NodeExecStatsWrapper() { Finalize(); }
-
-  // Records the absolute time in nanoseconds at which this node became
-  // runnable (i.e. was scheduled for execution).
-  void SetScheduled(int64 nanos) {
-    stats_->set_scheduled_micros(nanos / EnvTime::kMicrosToNanos);
-    stats_->set_scheduled_nanos(nanos);
-  }
+  // Called when the statistics collection for the node has finished. Once this
+  // method is called, the caller should not make assumptions about the validity
+  // of this object.
+  virtual void Done(const string& device) = 0;
 
   // Called immediately after this node starts being processed by the executor.
-  void RecordExecutorStarted() {
-    int64 now_nanos = Env::Default()->NowNanos();
-    stats_->set_all_start_micros(now_nanos / EnvTime::kMicrosToNanos);
-    stats_->set_all_start_nanos(now_nanos);
-  }
+  virtual void RecordExecutorStarted() = 0;
 
   // Called immediately before this node's `Compute()` or `ComputeAsync()`
   // method is called.
-  void RecordComputeStarted() {
-    int64 now_nanos = Env::Default()->NowNanos();
-    DCHECK_NE(stats_->all_start_micros(), 0);
-    DCHECK_NE(stats_->all_start_nanos(), 0);
-    stats_->set_op_start_rel_micros(now_nanos / EnvTime::kMicrosToNanos -
-                                    stats_->all_start_micros());
-    stats_->set_op_start_rel_nanos(now_nanos - stats_->all_start_nanos());
-  }
+  virtual void RecordComputeStarted() = 0;
 
   // Called immediately after this node's `Compute()` method returned (or, for
   // asynchronous operations, the callback passed to its `ComputeAsync()` method
   // was called).
-  void RecordComputeEnded() {
-    int64 now_nanos = Env::Default()->NowNanos();
-    DCHECK_NE(stats_->all_start_micros(), 0);
-    DCHECK_NE(stats_->all_start_nanos(), 0);
-    stats_->set_op_end_rel_micros(now_nanos / EnvTime::kMicrosToNanos -
-                                  stats_->all_start_micros());
-    stats_->set_op_end_rel_nanos(now_nanos - stats_->all_start_nanos());
-  }
+  virtual void RecordComputeEnded() = 0;
 
   // Called immediately after this executor finishes processing this node.
-  void RecordExecutorEnded() {
-    int64 now_nanos = Env::Default()->NowNanos();
-    DCHECK_NE(stats_->all_start_micros(), 0);
-    DCHECK_NE(stats_->all_start_nanos(), 0);
-    stats_->set_all_end_rel_micros(now_nanos / EnvTime::kMicrosToNanos -
-                                   stats_->all_start_micros());
-    stats_->set_all_end_rel_nanos(now_nanos - stats_->all_start_nanos());
-  }
-
-  // Records information about the tensor produced by this node at the given
-  // output slot.
-  void SetOutput(int slot, const Tensor* v);
+  virtual void RecordExecutorEnded() = 0;
 
   // Records information about the memory allocated during the execution of this
   // node.
-  void SetMemory(OpKernelContext* ctx);
+  virtual void SetMemory(OpKernelContext* ctx) = 0;
+
+  // Records information about the tensor produced by this node at the given
+  // output slot.
+  virtual void SetOutput(int slot, const Tensor* tensor) = 0;
 
   // Records information about the tensors that were accessed during the
   // execution of this node.
-  void SetReferencedTensors(const TensorReferenceVector& tensors);
+  virtual void SetReferencedTensors(const TensorReferenceVector& tensors) = 0;
 
-  // Sets the timeline_label field of the wrapped NodeExecStats, using data
-  // from *node. Returns true iff the node is a transfer node.
-  bool SetTimelineLabel(const Node* node);
+  // Records the absolute time in nanoseconds at which this node became
+  // runnable (i.e. was scheduled for execution).
+  virtual void SetScheduled(int64 nanos) = 0;
+};
+
+// Wraps NodeExecStats and adds allocation to it.
+class NodeExecStatsWrapper : public NodeExecStatsInterface {
+ public:
+  // Does not take ownership of `node` or `step_stats_collector`.
+  NodeExecStatsWrapper(const Node* node,
+                       StepStatsCollector* step_stats_collector);
+
+  // Takes ownership of 'stats' but not `node` or `step_stats_collector`.
+  NodeExecStatsWrapper(std::unique_ptr<NodeExecStats> stats, const Node* node,
+                       StepStatsCollector* step_stats_collector);
+
+  // Destructor calls Finalize() to release the TrackingAllocators.
+  ~NodeExecStatsWrapper() { Finalize(); }
+
+  void Done(const string& device) override;
+  void RecordExecutorStarted() override;
+  void RecordComputeStarted() override;
+  void RecordComputeEnded() override;
+  void RecordExecutorEnded() override;
+  void SetMemory(OpKernelContext* ctx) override;
+  void SetOutput(int slot, const Tensor* tensor) override;
+  void SetReferencedTensors(const TensorReferenceVector& tensors) override;
+  void SetScheduled(int64 nanos) override;
 
  private:
   friend class StepStatsCollector;
@@ -128,9 +125,11 @@ class NodeExecStatsWrapper {
   gtl::InlinedVector<std::pair<AllocatorMemoryUsed*, TrackingAllocator*>, 2>
       allocations_;
   std::unique_ptr<NodeExecStats> stats_;
+  const Node* const node_;                          // Not owned.
+  StepStatsCollector* const step_stats_collector_;  // Not owned.
 };
 
-// Statistics collection interface for individual node execution.
+// Statistics collection interface for step execution.
 //
 // See `StepStatsCollector` for a concrete implementation of this interface
 // that interfaces with the `Session` layer.
@@ -138,8 +137,9 @@ class StepStatsCollectorInterface {
  public:
   virtual ~StepStatsCollectorInterface() {}
 
-  // Saves `stats` to the collector.
-  virtual void Save(const string& device, NodeExecStatsWrapper* stats) = 0;
+  // Creates an instance of `NodeExecStatsInterface` that should be used for
+  // collecting statistics about individual node execution.
+  virtual NodeExecStatsInterface* CreateNodeExecStats(const Node* node) = 0;
 
   // Generates a string reporting the currently used memory based
   // on ResourceExhausted OOM `err` message.
@@ -154,8 +154,8 @@ class StepStatsCollectorInterface {
 // Each DeviceStats object holds multiple NodeExecStats.
 class StepStatsCollector : public StepStatsCollectorInterface {
  public:
-  // Does not take ownership of `ss`.
-  explicit StepStatsCollector(StepStats* ss);
+  // Does not take ownership of `step_stats`.
+  explicit StepStatsCollector(StepStats* step_stats);
 
   // BuildCostModel builds or updates a CostModel managed by cost_model_manager,
   // using the currently collected DeviceStats associated with the devices in
@@ -164,11 +164,12 @@ class StepStatsCollector : public StepStatsCollectorInterface {
       CostModelManager* cost_model_manager,
       const std::unordered_map<string, const Graph*>& device_map);
 
-  // Save saves nt to the DeviceStats object associated with device.
+  // Saves node statistics to the DeviceStats object associated with device.
   // Should be called before Finalize.
-  void Save(const string& device, NodeExecStats* nt);
-  void Save(const string& device, NodeExecStatsWrapper* stats) override;
+  void Save(const string& device, NodeExecStats* node_stats_pb);
+  void Save(const string& device, NodeExecStatsWrapper* node_stats);
 
+  NodeExecStatsInterface* CreateNodeExecStats(const Node* node) override;
   string ReportAllocsOnResourceExhausted(const string& err) override;
 
   // The following 2 Finalize methods populate the StepStats passed
@@ -176,20 +177,22 @@ class StepStatsCollector : public StepStatsCollectorInterface {
   // User shouldn't call Save() methods after Finalize.
   void Finalize();
   // swaps the content of StepStats* from constructor with 'ss'.
-  void FinalizeAndSwap(StepStats* ss);
+  void FinalizeAndSwap(StepStats* step_stats);
 
  private:
+  // TODO(suharshs): Make this configurable if its not possible to find a value
+  // that works for all cases.
+  static const uint64 kMaxCollectedNodes = 1 << 20;
+
+  typedef std::vector<std::unique_ptr<NodeExecStatsWrapper>> NodeStatsVector;
+
   void FinalizeInternal() EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
-  typedef std::vector<std::unique_ptr<NodeExecStatsWrapper>> NodeExecStatsVec;
-  // TODO(suharshs): Make this configurable if its not possible to find a value
-  //                 that works for all cases.
-  const uint64 kMaxCollectedNodes = 1 << 20;
   mutex mu_;
   bool finalized_ GUARDED_BY(mu_);
-  std::unordered_map<string, NodeExecStatsVec> dev_stats_ GUARDED_BY(mu_);
+  std::unordered_map<string, NodeStatsVector> dev_stats_ GUARDED_BY(mu_);
   StepStats* step_stats_ GUARDED_BY(mu_);
-  uint64 collectedNodes GUARDED_BY(mu_) = 0;
+  uint64 collected_nodes_ GUARDED_BY(mu_) = 0;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index b3ab7e2bc6..8a5d30a27c 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -27,6 +27,74 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
+namespace {
+
+// Simplistic implementation of the `StepStatsCollectorInterface` that only
+// cares about collecting the CPU time needed to execute a captured function.
+class SimpleStepStatsCollector : public StepStatsCollectorInterface {
+ public:
+  void IncrementProcessingTime(int64 delta) {
+    mutex_lock l(mu_);
+    processing_time_ += delta;
+  }
+
+  NodeExecStatsInterface* CreateNodeExecStats(const Node* node) override {
+    return new SimpleNodeExecStats(this);
+  }
+
+  string ReportAllocsOnResourceExhausted(const string& err) override {
+    return "";
+  }
+
+  int64 processing_time() {
+    tf_shared_lock l(mu_);
+    return processing_time_;
+  }
+
+ private:
+  class SimpleNodeExecStats : public NodeExecStatsInterface {
+   public:
+    explicit SimpleNodeExecStats(SimpleStepStatsCollector* step_stats_collector)
+        : step_stats_collector_(step_stats_collector) {}
+
+    void Done(const string& device) override {
+      step_stats_collector_->IncrementProcessingTime(end_time_ns_ -
+                                                     start_time_ns_);
+      delete this;
+    }
+
+    void RecordExecutorStarted() override {
+      start_time_ns_ = Env::Default()->NowNanos();
+    }
+
+    void RecordComputeStarted() override {}
+
+    void RecordComputeEnded() override {}
+
+    void RecordExecutorEnded() override {
+      end_time_ns_ = Env::Default()->NowNanos();
+    }
+
+    void SetMemory(OpKernelContext* ctx) override {}
+
+    void SetOutput(int slot, const Tensor* tensor) override {}
+
+    void SetReferencedTensors(const TensorReferenceVector& tensors) override {}
+
+    void SetScheduled(int64 nanos) override {}
+
+   private:
+    int64 start_time_ns_ = 0;
+    int64 end_time_ns_ = 0;
+    SimpleStepStatsCollector* step_stats_collector_;  // Not owned.
+  };
+
+  mutex mu_;
+  int64 processing_time_ GUARDED_BY(mu_) = 0;
+};
+
+}  // namespace
+
 /* static */
 Status CapturedFunction::Create(
     const NameAttrList& func, OpKernelContext* ctx, const string& argument,
@@ -359,17 +427,17 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
     done(s);
     return;
   }
-  auto frame =
-      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_);
+  std::shared_ptr<OwnedArgsCallFrame> frame(
+      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_));
 
   FunctionLibraryRuntime::Options f_opts;
   f_opts.step_id = CapturedFunction::generate_step_id();
   ResourceMgr* resource_mgr = ctx->lib()->device()->resource_manager();
-  auto step_container = new ScopedStepContainer(
+  std::shared_ptr<ScopedStepContainer> step_container(new ScopedStepContainer(
       f_opts.step_id, [resource_mgr](const string& name) {
         resource_mgr->Cleanup(name).IgnoreError();
-      });
-  f_opts.step_container = step_container;
+      }));
+  f_opts.step_container = step_container.get();
   f_opts.runner = ctx->runner();
   if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
@@ -380,43 +448,33 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
   // (such as queue kernels) that depend on the non-nullness of
   // `OpKernelContext::cancellation_manager()`, but additional effort
   // will be required to plumb it through the `IteratorContext`.
-  auto c_mgr = new CancellationManager;
-  f_opts.cancellation_manager = c_mgr;
-  StepStats* stats = nullptr;
-  StepStatsCollector* stats_collector = nullptr;
+  std::shared_ptr<CancellationManager> c_mgr(new CancellationManager);
+  f_opts.cancellation_manager = c_mgr.get();
+  std::shared_ptr<SimpleStepStatsCollector> stats_collector;
   std::shared_ptr<model::Node> node;
   if (ctx->model()) {
     node = ctx->model()->LookupNode(prefix);
     if (node) {
-      // TODO(b/114104975): Use something light-weight here.
-      stats = new StepStats();
-      stats_collector = new StepStatsCollector(stats);
+      stats_collector = MakeUnique<SimpleStepStatsCollector>();
     }
   }
-  f_opts.stats_collector = stats_collector;
+  f_opts.stats_collector = stats_collector.get();
 
+  OwnedArgsCallFrame* raw_frame = frame.get();
   auto callback = std::bind(
-      [rets, step_container, c_mgr, frame, stats, stats_collector, node](
-          FunctionLibraryRuntime::DoneCallback done,
-          // Begin unbound arguments.
-          Status s) {
-        delete step_container;
-        delete c_mgr;
+      [rets](const std::shared_ptr<CancellationManager>& c_mgr,
+             const FunctionLibraryRuntime::DoneCallback& done,
+             const std::shared_ptr<OwnedArgsCallFrame>& frame,
+             const std::shared_ptr<model::Node>& node,
+             const std::shared_ptr<SimpleStepStatsCollector>& stats_collector,
+             const std::shared_ptr<ScopedStepContainer>& step_container,
+             // Begin unbound arguments.
+             Status s) {
         if (s.ok()) {
           s = frame->ConsumeRetvals(rets);
         }
-        delete frame;
         if (node) {
-          int64 delta = 0;
-          stats_collector->Finalize();
-          for (auto dev_stats : stats->dev_stats()) {
-            for (auto node_stats : dev_stats.node_stats()) {
-              delta += node_stats.all_end_rel_nanos();
-            }
-          }
-          delete stats_collector;
-          delete stats;
-          node->add_processing_time(delta);
+          node->add_processing_time(stats_collector->processing_time());
           node->start_work();
         }
         done(s);
@@ -424,9 +482,11 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
           node->stop_work();
         }
       },
-      std::move(done), std::placeholders::_1);
+      std::move(c_mgr), std::move(done), std::move(frame), std::move(node),
+      std::move(stats_collector), std::move(step_container),
+      std::placeholders::_1);
 
-  ctx->lib()->Run(f_opts, handle, frame, std::move(callback));
+  ctx->lib()->Run(f_opts, handle, raw_frame, std::move(callback));
 }
 
 CapturedFunction::CapturedFunction(const NameAttrList& func,
-- 
GitLab


From 65231a4c48ce3a1297d00e2a6310be05e79ed88c Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Tue, 18 Sep 2018 21:40:01 -0700
Subject: [PATCH 0358/1357] Fix python3 tests

---
 tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py   | 8 --------
 .../contrib/tensorrt/test/tf_trt_integration_test_base.py | 2 +-
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
index 62f4e525f7..d2f65344da 100644
--- a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
+++ b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
@@ -144,14 +144,6 @@ class BiasaddMatMulTest(trt_test.TfTrtIntegrationTestBase):
     # mode, which is a bug. Re-enable this when trt library is fixed.
     return not trt_test.IsQuantizationMode(run_params.precision_mode)
 
-  def ExpectedAbsoluteTolerance(self, run_params):
-    """The absolute tolerance to compare floating point results."""
-    return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03
-
-  def ExpectedRelativeTolerance(self, run_params):
-    """The relative tolerance to compare floating point results."""
-    return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
index 699f79adec..4f935a7665 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
@@ -134,7 +134,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
             dims[0] for dims in self._GetParamsCached().input_dims if len(dims)
         ]),
         max_workspace_size_bytes=1 << 25,
-        precision_mode=self._ToBytes(run_params.precision_mode),
+        precision_mode=run_params.precision_mode,
         minimum_segment_size=2,
         is_dynamic_op=run_params.dynamic_engine,
         maximum_cached_engines=1,
-- 
GitLab


From d7cc73c300b12e7c02507bcfaff146d6c4955f19 Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Tue, 18 Sep 2018 21:52:29 -0700
Subject: [PATCH 0359/1357] [TF:XLA] Change HloPtrComparator to work across HLO
 modules. Declaring the method out of line does not increase compile time.

PiperOrigin-RevId: 213571783
---
 .../compiler/xla/service/hlo_instruction.cc   | 20 +++++++++++++++++++
 .../compiler/xla/service/hlo_instruction.h    | 14 +------------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index e905f2983a..ad58833e4d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2910,6 +2910,26 @@ std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) {
   return os << ToString(kind);
 }
 
+bool HloPtrComparator::operator()(const HloInstruction* const& lhs,
+                                  const HloInstruction* const& rhs) const {
+  if (rhs == nullptr) {
+    // Nothing compares less than nullptr.
+    return false;
+  }
+  if (lhs == nullptr) {
+    return true;
+  }
+  auto lhs_module = lhs->GetModule();
+  auto rhs_module = rhs->GetModule();
+  CHECK((lhs_module == nullptr && rhs_module == nullptr) ||
+        (lhs_module != nullptr && rhs_module != nullptr));
+  if (lhs_module != nullptr &&
+      lhs_module->unique_id() != rhs_module->unique_id()) {
+    return lhs_module->unique_id() < rhs_module->unique_id();
+  }
+  return lhs->unique_id() < rhs->unique_id();
+}
+
 bool HloInstruction::CouldBeBitcast() const {
   switch (opcode_) {
     case HloOpcode::kTranspose:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 1ef8cd5036..d615df0831 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1693,21 +1693,9 @@ std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind);
 // To make the iteration order over the map deterministic, the comparator
 // should not be using the pointer values, but rather an intrinsic property of
 // the hlo. Exception: null pointer values compare less than non-null.
-//
-// Note that this cannot be used for HLO instructions across multiple modules
-// since the id of HLO instructions are only unique within each HLO module.
 struct HloPtrComparator {
   bool operator()(const HloInstruction* const& lhs,
-                  const HloInstruction* const& rhs) const {
-    if (rhs == nullptr) {
-      // Nothing compares less than nullptr.
-      return false;
-    }
-    if (lhs == nullptr) {
-      return true;
-    }
-    return lhs->unique_id() < rhs->unique_id();
-  }
+                  const HloInstruction* const& rhs) const;
 };
 
 template <typename ValueT>
-- 
GitLab


From d7a8e852941e8cd856caafddf7c93d857e83e8b1 Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Wed, 19 Sep 2018 13:07:07 +0800
Subject: [PATCH 0360/1357] Move location of Slice shape function.

---
 tensorflow/core/framework/common_shape_fns.cc | 104 ++++++++++
 tensorflow/core/framework/common_shape_fns.h  |   3 +
 tensorflow/core/ops/array_ops.cc              | 182 +-----------------
 3 files changed, 109 insertions(+), 180 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 20a07d86a2..20922d7884 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -1306,6 +1306,110 @@ Status RandomShape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
+// This SliceHelper processes the output shape of the `slice`
+// when the tensor of `sizes` is available.
+template <typename T>
+Status SliceHelper(InferenceContext* c, ShapeHandle begin_value,
+                   const Tensor* sizes_value,
+                   std::vector<DimensionHandle>* dims) {
+  auto sizes_vec = sizes_value->vec<T>();
+  for (int i = 0; i < sizes_value->NumElements(); ++i) {
+    DimensionHandle dim = c->Dim(c->input(0), i);
+    if (sizes_vec(i) != -1) {
+      auto dim_val = c->Value(dim);
+      if (sizes_vec(i) < 0) {
+        return errors::InvalidArgument(
+            "Out of bounds slicing on dimension ", i, " of length ", dim_val,
+            ": sizes vector cannot be < -1, but was ", sizes_vec(i));
+      }
+
+      dims->emplace_back(c->MakeDim(sizes_vec(i)));
+    } else {
+      DimensionHandle result;
+      TF_RETURN_IF_ERROR(c->Subtract(dim, c->Dim(begin_value, i), &result));
+      dims->emplace_back(result);
+    }
+  }
+
+  return Status::OK();
+}
+
+Status SliceShape(InferenceContext* c) {
+  ShapeHandle input = c->input(0);
+  ShapeHandle begin_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &begin_shape));
+  ShapeHandle sizes_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &sizes_shape));
+
+  // Merge to check compatibility of begin and sizes tensors.
+  TF_RETURN_IF_ERROR(c->Merge(begin_shape, sizes_shape, &begin_shape));
+
+  DimensionHandle ndims = c->Dim(begin_shape, 0);
+  if (c->ValueKnown(ndims)) {
+    TF_RETURN_IF_ERROR(c->WithRank(input, c->Value(ndims), &input));
+  }
+
+  // NOTE(mrry): Use MakeShapeFromShapeTensor to handle partially-known
+  // values, even though the `begin` value does not represent a shape.
+  ShapeHandle begin_value;
+  TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &begin_value));
+
+  // We check the tensor value here and will only use
+  // `MakeShapeFromShapeTensor` when `sizes_value` is null.
+  // The reason is that `sizes`might contain -1, which can't
+  // be represented (-1 in the ShapeHandle would mean "unknown").
+  const Tensor* sizes_value = c->input_tensor(2);
+
+  if (sizes_value != nullptr) {
+    TF_RETURN_IF_ERROR(
+        c->WithRank(begin_value, sizes_value->NumElements(), &begin_value));
+    std::vector<DimensionHandle> dims;
+    // If the begin and sizes tensors are available, then
+    // we can be precise about the shape of the output.
+    if (sizes_value->dtype() == DT_INT64) {
+      TF_RETURN_IF_ERROR(
+          SliceHelper<int64>(c, begin_value, sizes_value, &dims));
+    } else {
+      TF_RETURN_IF_ERROR(
+          SliceHelper<int32>(c, begin_value, sizes_value, &dims));
+    }
+    c->set_output(0, c->MakeShape(dims));
+    return Status::OK();
+  } else {
+    // In case `sizes` is not available (`sizes_value` is null),
+    // we could try to use `MakeShapeFromShapeTensor` here.
+    // If sizes contain -1, we will simply consider it as `Unknown`.
+    // This is less than ideal but still an improvement of shape inference.
+    // The following is an example that returns [None, 1, None] with this
+    // code path:
+    //   z = tf.zeros((1, 2, 3))
+    //   m = tf.slice(z, [0, 0, 0], [tf.constant(1) + 0, 1, -1])
+    //   m.get_shape().as_list()
+    ShapeHandle sizes_value;
+    TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &sizes_value));
+    if (c->RankKnown(sizes_value)) {
+      TF_RETURN_IF_ERROR(
+          c->WithRank(begin_value, c->Rank(sizes_value), &begin_value));
+      std::vector<DimensionHandle> dims;
+      dims.reserve(c->Rank(sizes_value));
+      for (int i = 0; i < c->Rank(sizes_value); ++i) {
+        dims.emplace_back(c->Dim(sizes_value, i));
+      }
+      c->set_output(0, c->MakeShape(dims));
+      return Status::OK();
+    }
+    // We might know the rank of the input.
+    if (c->RankKnown(input)) {
+      c->set_output(0, c->UnknownShapeOfRank(c->Rank(input)));
+      return Status::OK();
+    } else {
+      return shape_inference::UnknownShape(c);
+    }
+  }
+
+  return Status::OK();
+}
+
 Status ValidateSparseTensor(InferenceContext* c, ShapeHandle indices_shape,
                             ShapeHandle values_shape, ShapeHandle shape_shape) {
   // Validate ranks.
diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h
index e6f9f935f9..478f796516 100644
--- a/tensorflow/core/framework/common_shape_fns.h
+++ b/tensorflow/core/framework/common_shape_fns.h
@@ -293,6 +293,9 @@ inline Status BroadcastBinaryOpShapeFn(InferenceContext* c) {
 // Shape function for random operations.
 Status RandomShape(shape_inference::InferenceContext* c);
 
+// Shape function for Slice operator.
+Status SliceShape(shape_inference::InferenceContext* c);
+
 // Validates the 3 component tensors of a sparse tensor have the proper
 // shapes. This mimics SparseTensor.__init__ in python/framework/ops.py.
 Status ValidateSparseTensor(InferenceContext* c, ShapeHandle indices_shape,
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 2dec430710..325690eded 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1531,37 +1531,6 @@ REGISTER_OP("Size")
     .Attr("out_type: {int32, int64} = DT_INT32")
     .SetShapeFn(shape_inference::ScalarShape);
 
-namespace {
-
-// This SliceHelper processes the output shape of the `slice`
-// when the tensor of `sizes` is available.
-template <typename T>
-Status SliceHelper(InferenceContext* c, ShapeHandle begin_value,
-                   const Tensor* sizes_value,
-                   std::vector<DimensionHandle>* dims) {
-  auto sizes_vec = sizes_value->vec<T>();
-  for (int i = 0; i < sizes_value->NumElements(); ++i) {
-    DimensionHandle dim = c->Dim(c->input(0), i);
-    if (sizes_vec(i) != -1) {
-      auto dim_val = c->Value(dim);
-      if (sizes_vec(i) < 0) {
-        return errors::InvalidArgument(
-            "Out of bounds slicing on dimension ", i, " of length ", dim_val,
-            ": sizes vector cannot be < -1, but was ", sizes_vec(i));
-      }
-
-      dims->emplace_back(c->MakeDim(sizes_vec(i)));
-    } else {
-      DimensionHandle result;
-      TF_RETURN_IF_ERROR(c->Subtract(dim, c->Dim(begin_value, i), &result));
-      dims->emplace_back(result);
-    }
-  }
-
-  return Status::OK();
-}
-}  // namespace
-
 // --------------------------------------------------------------------------
 REGISTER_OP("Slice")
     .Input("input: T")
@@ -1571,81 +1540,7 @@ REGISTER_OP("Slice")
     .Attr("T: type")
     .Attr("Index: {int32,int64}")
     .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input = c->input(0);
-      ShapeHandle begin_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &begin_shape));
-      ShapeHandle sizes_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &sizes_shape));
-
-      // Merge to check compatibility of begin and sizes tensors.
-      TF_RETURN_IF_ERROR(c->Merge(begin_shape, sizes_shape, &begin_shape));
-
-      DimensionHandle ndims = c->Dim(begin_shape, 0);
-      if (c->ValueKnown(ndims)) {
-        TF_RETURN_IF_ERROR(c->WithRank(input, c->Value(ndims), &input));
-      }
-
-      // NOTE(mrry): Use MakeShapeFromShapeTensor to handle partially-known
-      // values, even though the `begin` value does not represent a shape.
-      ShapeHandle begin_value;
-      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &begin_value));
-
-      // We check the tensor value here and will only use
-      // `MakeShapeFromShapeTensor` when `sizes_value` is null.
-      // The reason is that `sizes`might contain -1, which can't
-      // be represented (-1 in the ShapeHandle would mean "unknown".
-      const Tensor* sizes_value = c->input_tensor(2);
-
-      if (sizes_value != nullptr) {
-        TF_RETURN_IF_ERROR(
-            c->WithRank(begin_value, sizes_value->NumElements(), &begin_value));
-        std::vector<DimensionHandle> dims;
-        // If the begin and sizes tensors are available, then
-        // we can be precise about the shape of the output.
-        if (sizes_value->dtype() == DT_INT64) {
-          TF_RETURN_IF_ERROR(
-              SliceHelper<int64>(c, begin_value, sizes_value, &dims));
-        } else {
-          TF_RETURN_IF_ERROR(
-              SliceHelper<int32>(c, begin_value, sizes_value, &dims));
-        }
-
-        c->set_output(0, c->MakeShape(dims));
-        return Status::OK();
-      } else {
-        // In case `sizes` is not available (`sizes_value` is null),
-        // we could try to use `MakeShapeFromShapeTensor` here.
-        // If sizes contain -1, we will simply consider it as `Unknown`.
-        // This is less than ideal but still an improvement of shape inference.
-        // The following is an example that returns [None, 1, None] with this
-        // code path:
-        //   z = tf.zeros((1, 2, 3))
-        //   m = tf.slice(z, [0, 0, 0], [tf.constant(1) + 0, 1, -1])
-        //   m.get_shape().as_list()
-        ShapeHandle sizes_value;
-        TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &sizes_value));
-        if (c->RankKnown(sizes_value)) {
-          TF_RETURN_IF_ERROR(
-              c->WithRank(begin_value, c->Rank(sizes_value), &begin_value));
-          std::vector<DimensionHandle> dims;
-          dims.reserve(c->Rank(sizes_value));
-          for (int i = 0; i < c->Rank(sizes_value); ++i) {
-            dims.emplace_back(c->Dim(sizes_value, i));
-          }
-          c->set_output(0, c->MakeShape(dims));
-          return Status::OK();
-        }
-
-        // We might know the rank of the input.
-        if (c->RankKnown(input)) {
-          c->set_output(0, c->UnknownShapeOfRank(c->Rank(input)));
-          return Status::OK();
-        } else {
-          return shape_inference::UnknownShape(c);
-        }
-      }
-
-      return Status::OK();
+      return shape_inference::SliceShape(c);
     });
 
 #ifdef INTEL_MKL
@@ -1661,80 +1556,7 @@ REGISTER_OP("_MklSlice")
     .Attr("T: type")
     .Attr("Index: {int32,int64}")
     .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input = c->input(0);
-      ShapeHandle begin_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &begin_shape));
-      ShapeHandle sizes_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &sizes_shape));
-
-      // Merge to check compatibility of begin and sizes tensors.
-      TF_RETURN_IF_ERROR(c->Merge(begin_shape, sizes_shape, &begin_shape));
-
-      DimensionHandle ndims = c->Dim(begin_shape, 0);
-      if (c->ValueKnown(ndims)) {
-        TF_RETURN_IF_ERROR(c->WithRank(input, c->Value(ndims), &input));
-      }
-
-      // NOTE(mrry): Use MakeShapeFromShapeTensor to handle partially-known
-      // values, even though the `begin` value does not represent a shape.
-      ShapeHandle begin_value;
-      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &begin_value));
-
-      // NOTE(mrry): We can't use `MakeShapeFromShapeTensor` for `sizes` because
-      // it might contain -1, which can't be represented. (-1 in the ShapeHandle
-      // would mean "unknown".)
-      const Tensor* sizes_value = c->input_tensor(3);
-
-      if (sizes_value != nullptr) {
-        TF_RETURN_IF_ERROR(
-            c->WithRank(begin_value, sizes_value->NumElements(), &begin_value));
-        std::vector<DimensionHandle> dims;
-        // If the begin and sizes tensors are available, then
-        // we can be precise about the shape of the output.
-        if (sizes_value->dtype() == DT_INT64) {
-          TF_RETURN_IF_ERROR(
-              SliceHelper<int64>(c, begin_value, sizes_value, &dims));
-        } else {
-          TF_RETURN_IF_ERROR(
-              SliceHelper<int32>(c, begin_value, sizes_value, &dims));
-        }
-
-        c->set_output(0, c->MakeShape(dims));
-        return Status::OK();
-      } else {
-        // In case `sizes` is not available (`sizes_value` is null),
-        // we could try to use `MakeShapeFromShapeTensor` here.
-        // If sizes contain -1, we will simply consider it as `Unknown`.
-        // This is less than ideal but still an improvement of shape inference.
-        // The following is an example that returns [None, 1, None] with this
-        // code path:
-        //   z = tf.zeros((1, 2, 3))
-        //   m = tf.slice(z, [0, 0, 0], [tf.constant(1) + 0, 1, -1])
-        //   m.get_shape().as_list()
-        ShapeHandle sizes_value;
-        TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &sizes_value));
-        if (c->RankKnown(sizes_value)) {
-          TF_RETURN_IF_ERROR(
-              c->WithRank(begin_value, c->Rank(sizes_value), &begin_value));
-          std::vector<DimensionHandle> dims;
-          dims.reserve(c->Rank(sizes_value));
-          for (int i = 0; i < c->Rank(sizes_value); ++i) {
-            dims.emplace_back(c->Dim(sizes_value, i));
-          }
-          c->set_output(0, c->MakeShape(dims));
-          return Status::OK();
-        }
-
-        // We might know the rank of the input.
-        if (c->RankKnown(input)) {
-          c->set_output(0, c->UnknownShapeOfRank(c->Rank(input)));
-          return Status::OK();
-        } else {
-          return shape_inference::UnknownShape(c);
-        }
-      }
-
-      return Status::OK();
+      return shape_inference::SliceShape(c);
     });
 #endif
 
-- 
GitLab


From 8d5a36ef0f7e65a84d64c800ca5527a3cc6ff2f0 Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Tue, 18 Sep 2018 22:32:56 -0700
Subject: [PATCH 0361/1357] Fix trt allocator and tensor name issues in
 convert_nodes.cc.

---
 .../contrib/tensorrt/convert/convert_graph.cc |  6 ++---
 .../contrib/tensorrt/convert/convert_nodes.cc | 13 ++++------
 .../tensorrt/resources/trt_allocator.cc       |  8 ++++--
 .../tensorrt/resources/trt_allocator.h        |  2 +-
 .../tensorrt/resources/trt_allocator_test.cc  | 26 +++++++++----------
 5 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index b019c99882..fe6f8b42bd 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -678,7 +678,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
 // Function to construct a funcdef from the segment and add it to the graph.
 tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
     tensorflow::Graph* graph, const tensorflow::GraphDef& segment,
-    const string& name) {
+    const string& engine_name) {
   tensorflow::Graph sgraph(graph->flib_def());
   tensorflow::GraphConstructorOptions gcopts;
   TF_RETURN_IF_ERROR(
@@ -761,9 +761,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
   tensorflow::FunctionDefLibrary fdeflib;
   auto native_segment = fdeflib.add_function();
   TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef(
-      sgraph, StrCat(name, "_native_segment"), native_segment));
+      sgraph, StrCat(engine_name, "_native_segment"), native_segment));
   if (VLOG_IS_ON(7)) {
-    VLOG(7) << name << " Function_Def ";
+    VLOG(7) << engine_name << " Function_Def ";
     VLOG(7) << native_segment->DebugString();
   }
   VLOG(1) << "Adding funcdef to graphlib";
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index c98b07ad8b..21fb459483 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -693,9 +693,10 @@ class Converter {
       // TODO(jie): tf protobuf seems to be omitting the :0 suffix
       string output_name = node_def.name();
       if (i != 0) output_name = StrCat(output_name, ":", i);
-      if (output.is_tensor()) {
-        output.tensor()->setName(output_name.c_str());
-      }
+      // We should not call output.tensor()->setName(), since the name may have
+      // already been set before (e.g. for Identity op where the output is the
+      // input, if its input is one of the engine input, setting the name here
+      // will overwrite engine input bindings which will cause runtime error).
       VLOG(2) << "Adding out tensor " << output_name << ": "
               << output.DebugString();
       if (!trt_tensors_.insert({output_name, output}).second) {
@@ -779,8 +780,7 @@ class Converter {
       // skip control nodes
       if (input_name[0] == '^') continue;
       string name = input_name;
-      auto first = name.find_first_of(':');
-      // TODO(aaroey): why removing the colon but not the zero? A bug?
+      auto first = name.find_last_of(':');
       // TODO(aaroey): use TensorId
       if (first != string::npos && first + 2 == name.size() &&
           name[first + 1] == '0') {
@@ -1301,7 +1301,6 @@ tensorflow::Status ConvertConv2DHelper(
 
   layer->setStride(stride);
   layer->setPadding({padding[0].first, padding[1].first});
-  layer->setName(node_def.name().c_str());
   layer->setNbGroups(num_groups);
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
   VLOG(2) << "TENSOR out: " << DebugString(output_tensor->getDimensions());
@@ -1547,7 +1546,6 @@ tensorflow::Status ConvertPool(Converter& ctx,
 
   layer->setStride(stride);
   layer->setPadding({padding[0].first, padding[1].first});
-  layer->setName(node_def.name().c_str());
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
 
   if (data_format == "NHWC") {
@@ -2697,7 +2695,6 @@ tensorflow::Status ConvertGraphDefToEngine(
   TrtUniquePtrType<nvinfer1::IBuilder> builder(
       nvinfer1::createInferBuilder(*logger));
   builder->setMaxBatchSize(max_batch_size);
-  // TODO(aaroey): use the allocator to allocate the TRT workspace.
   builder->setMaxWorkspaceSize(max_workspace_size_bytes);
 #if NV_TENSORRT_MAJOR > 3
   builder->setGpuAllocator(allocator);
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index d8f97bfbbc..f6cf72e07f 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -27,7 +27,7 @@ namespace tensorflow {
 namespace tensorrt {
 
 // std::align is not supported, so this method mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
+void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space) {
   QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
   QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
   QCHECK_GT(size, 0) << "size must be greater than 0.";
@@ -67,12 +67,16 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }
 
 void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
                                    uint32_t flags) {
+  if (size == 0) return nullptr;
   // WAR for allocator alignment requirement. Certain cuda API calls require GPU
   // memory with alignemtn to cudaDeviceProp::textureAlignment.
   // See issue #20856
   alignment = 512;
   assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
-  size_t total_size = size + alignment;
+  int64_t total_size = size + alignment;
+  // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce
+  // unexpected result when TRT tries to allocate more bytes than size_t can
+  // carry. Fix this.
   void* mem = allocator_->AllocateRaw(alignment, total_size);
   if (!mem) return nullptr;
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
index 6f94492083..0be3c4fd07 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -29,7 +29,7 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 // std::align is not supported, so this function mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
+void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space);
 }  // namespace tensorrt
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index f515ed03f2..06bfb3269c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -20,11 +20,11 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 
-bool RunTest(const size_t alignment, const size_t size,
-             const intptr_t orig_ptr_val, const size_t orig_space) {
+bool RunTest(const int64_t alignment, const int64_t size,
+             const intptr_t orig_ptr_val, const int64_t orig_space) {
   void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
   void* ptr = orig_ptr;
-  size_t space = orig_space;
+  int64_t space = orig_space;
   void* result = Align(alignment, size, ptr, space);
   if (result == nullptr) {
     EXPECT_EQ(orig_ptr, ptr);
@@ -43,25 +43,25 @@ bool RunTest(const size_t alignment, const size_t size,
 }
 
 TEST(TRTAllocatorTest, Align) {
-  for (const size_t space :
-       {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
-    for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
-      for (const intptr_t ptr_val :
-           {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
+  for (const int64_t space : {1l, 2l, 3l, 4l, 7l, 8l, 9l, 10l, 16l, 32l, 511l,
+                              512l, 513l, 700l, 12345l, 1l << 32}) {
+    for (int64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+      for (const uintptr_t ptr_val :
+           {1l, alignment == 1 ? 1l : alignment - 1, alignment, alignment + 1,
             alignment + (alignment / 2)}) {
         if (ptr_val % alignment == 0) {
-          for (const size_t size :
-               {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
+          for (const int64_t size :
+               {1l, space == 1 ? 1l : space - 1, space, space + 1}) {
             EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
           }
         } else {
           EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
-          const size_t diff = alignment - ptr_val % alignment;
+          const int64_t diff = alignment - ptr_val % alignment;
           if (space > diff) {
             EXPECT_TRUE(
                 RunTest(alignment, space - diff, ptr_val + diff, space - diff));
-            for (const size_t size :
-                 {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
+            for (const int64_t size :
+                 {1l, space - diff > 1 ? space - diff - 1 : 1l, space - diff,
                   space - diff + 1, space - 1}) {
               EXPECT_EQ(space - diff >= size,
                         RunTest(alignment, size, ptr_val, space));
-- 
GitLab


From 50125bf0d8ee9f47b868211f62cb545c5701a032 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Tue, 18 Sep 2018 22:33:38 -0700
Subject: [PATCH 0362/1357] Add xla.compile(), a low-level API that compiles
 graph with XLA.

PiperOrigin-RevId: 213574904
---
 tensorflow/compiler/jit/BUILD                 |  10 ++
 tensorflow/compiler/jit/ops/BUILD             |   8 +
 tensorflow/contrib/compiler/BUILD             |   3 +
 tensorflow/contrib/compiler/xla.py            | 149 +++++++++++++++++-
 .../contrib/tpu/python/tpu/tpu_function.py    |   8 +-
 5 files changed, 172 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 1001c57f3d..9544c365b7 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -26,6 +26,7 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 # Target that bundles up the XLA CPU and GPU JIT devices.
 cc_library(
@@ -628,6 +629,15 @@ tf_cc_test(
     ],
 )
 
+tf_custom_op_py_library(
+    name = "xla_ops_py",
+    kernels = ["//tensorflow/compiler/jit/ops:xla_ops"],
+    visibility = [
+        ":friends",
+    ],
+    deps = ["//tensorflow/compiler/jit/ops:xla_ops_wrapper_py"],
+)
+
 # This target can be used by XLA device plugins to prevent circular dependencies, and provides access to all of the required headers for building a device library.
 cc_header_only_library(
     name = "xla_jit_headers_lib",
diff --git a/tensorflow/compiler/jit/ops/BUILD b/tensorflow/compiler/jit/ops/BUILD
index 13804c6a05..f72224545b 100644
--- a/tensorflow/compiler/jit/ops/BUILD
+++ b/tensorflow/compiler/jit/ops/BUILD
@@ -4,9 +4,17 @@ package(
     default_visibility = ["//tensorflow/compiler/tf2xla:internal"],
 )
 
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+
 cc_library(
     name = "xla_ops",
     srcs = ["xla_ops.cc"],
     deps = ["//tensorflow/core:framework"],
     alwayslink = 1,
 )
+
+tf_gen_op_wrapper_py(
+    name = "xla_ops_wrapper_py",
+    out = "xla_ops.py",
+    deps = ["//tensorflow/compiler/jit/ops:xla_ops"],
+)
diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index d7583be6d8..3b0e8f6cda 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -53,11 +53,14 @@ py_library(
     srcs = ["xla.py"],
     srcs_version = "PY2AND3",
     deps = [
+        "//tensorflow/compiler/jit:xla_ops_py",
+        "//tensorflow/contrib/tpu:tpu_lib",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:util",
+        "//tensorflow/python:variable_scope",
         "//tensorflow/python/estimator:model_fn",
     ],
 )
diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py
index 60f5af1662..0aae695f92 100644
--- a/tensorflow/contrib/compiler/xla.py
+++ b/tensorflow/contrib/compiler/xla.py
@@ -12,18 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-"""xla provides experimental xla support API."""
+"""xla is an experimental library that provides XLA support APIs."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.compiler.jit.ops import xla_ops
+from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import compat
 
@@ -51,6 +55,30 @@ _UNSUPPORTED_OPS = set([
 ])
 
 
+def compile(computation, inputs=None):  # pylint: disable=redefined-builtin
+  """Builds an operator that compiles and runs `computation` with XLA.
+
+  Args:
+    computation: A Python function that builds a computation to apply to the
+      input. If the function takes n inputs, 'inputs' should be a list of n
+      tensors.
+
+      `computation` may return a list of operations and tensors.  Tensors must
+      come before operations in the returned list.  The return value of
+      `compile` is a list of tensors corresponding to the tensors from the
+      output of `computation`.
+
+      All `Operation`s returned from `computation` will be executed when
+      evaluating any of the returned output tensors.
+    inputs: A list of input tensors or `None` (equivalent to an empty list).
+
+  Returns:
+    A list of output tensors.
+  """
+  # pylint: disable=protected-access
+  return _compile_internal(computation, inputs)
+
+
 class XLACompileContext(control_flow_ops.XLAControlFlowContext):
   """A `ControlFlowContext` for nodes inside an XLA computation cluster.
 
@@ -206,3 +234,122 @@ class XLACompileContext(control_flow_ops.XLAControlFlowContext):
     if self.GetWhileContext():
       return self.GetWhileContext().back_prop
     return False
+
+
+def _compile_internal(computation, inputs=None):
+  """Builds graph operators that compiles and symbolically executes computation.
+
+  Args:
+    computation: A Python function that builds the computation to compile and
+      execute.
+    inputs: A list of input tensors or `None` (equivalent to `[]`). Its order
+      should match ordering of computation arguments.
+  Returns:
+    A list of output tensors from computation.
+  Raises:
+    ValueError: If any element in computation outputs is neither an operations
+      or a value that can be converted to tensor.
+    TypeError: If `inputs` is not a list or tuple.
+  """
+  if inputs is None:
+    inputs = []
+
+  if not isinstance(inputs, collections.Sequence):
+    raise TypeError('inputs must be a list')
+
+  # Converts inputs to Tensors.
+  inputs = [ops.convert_to_tensor(x) for x in inputs]
+  input_arity = len(inputs)
+
+  arg_error = tpu_function.check_function_argument_count(
+      computation, input_arity, infeed_queue=None)
+  if arg_error is not None:
+    raise TypeError(
+        'Supplied computation cannot be called with the specified inputs. You '
+        'specified %d inputs: %s, but the computation needs %s' %
+        (input_arity, str([i.name for i in inputs[0]]), arg_error))
+
+  cluster_name = ops.get_default_graph().unique_name('cluster')
+  pivot = control_flow_ops.no_op(name=cluster_name + '/pivot')
+  context = XLACompileContext(name=cluster_name, pivot=pivot)
+  try:
+    context.Enter()
+
+    # Add identity ops so even unused inputs are 'consumed' by the
+    # computation.
+    computation_inputs = [
+        array_ops.identity(x, name='input_{}'.format(i))
+        for i, x in enumerate(inputs)
+    ]
+
+    # Only resource variables work inside an XLA computation, so turn on
+    # resource variables for the computation.
+    vscope = variable_scope.get_variable_scope()
+    saved_use_resource = vscope.use_resource
+    vscope.set_use_resource(True)
+
+    outputs = computation(*computation_inputs)
+
+    # Restore variable scope after computation.
+    vscope.set_use_resource(saved_use_resource)
+
+    # If the computation returns `None`, make it an empty tuple.
+    if outputs is None:
+      outputs = tuple()
+    # If the computation only returned one value, make it a tuple.
+    if not isinstance(outputs, collections.Sequence):
+      outputs = (outputs,)
+
+    # Append `no_op` here so that return value of this function always contains
+    # at least one op that can trigger XlaLaunch node.
+    outputs += (control_flow_ops.no_op(),)
+    try:
+      outputs = [
+          o if isinstance(o, ops.Operation) else ops.convert_to_tensor(o)
+          for o in outputs
+      ]
+    except Exception as e:
+      raise ValueError(
+          'XLA computation function return values must all either be Operations'
+          ' or convertible to Tensors. Got error: "%s"' % str(e))
+
+    # Separates the returned Operations and Tensors.
+    output_operations = [o for o in outputs if isinstance(o, ops.Operation)]
+    output_tensors = [o for o in outputs if not isinstance(o, ops.Operation)]
+
+    if outputs != output_tensors + output_operations:
+      raise ValueError(
+          'XLA computation function must return zero or more Tensor values '
+          'followed by zero or more Operations.')
+    output_arity = len(output_tensors)
+
+    new_output_tensors = []
+    for t in output_tensors:
+      with ops.device(t.device if t.device else ''):
+        new_output_tensors.append(array_ops.identity(t))
+
+    output_tensors = new_output_tensors
+    context.ExitResult(output_tensors)
+  finally:
+    context.report_unsupported_operations()
+    context.Exit()
+
+  outputs = [
+      xla_ops.xla_cluster_output(output_tensors[i], name='output{}'.format(i))
+      for i in xrange(output_arity)
+  ]
+
+  with ops.control_dependencies(output_operations):
+    if output_arity == 0:
+      # When XLA computation returns only operations and no tensors, a NoOp
+      # dependent on the operations in outputs is returned. Otherwise final
+      # outputs would be empty and there is no way to trigger returned
+      # operations.
+      return control_flow_ops.no_op(name='output_0')
+    else:
+      # Wraps the outputs in identity operators that carries control
+      # dependencies.
+      return [
+          array_ops.identity(outputs[i], name='output_%d' % i)
+          for i in xrange(output_arity)
+      ]
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
index de16e3b157..0c7a38dbbb 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
@@ -63,10 +63,9 @@ def check_function_argument_count(func, input_arity, infeed_queue):
   """Validate the number of input arguments to a tpu function.
 
   Args:
-    func: the Python function that will be called to generate the body
-      of a TPUFunction.
-    input_arity: the number of explicit arguments supplied by the
-      caller.
+    func: the Python function that will be called to generate the body of an XLA
+      computation graph.
+    input_arity: the number of explicit arguments supplied by the caller.
     infeed_queue: if not None, the infeed queue that will supply
       additional arguments to the function.
 
@@ -103,4 +102,3 @@ def check_function_argument_count(func, input_arity, infeed_queue):
   # Since there are varargs, func can accept any number of arguments
   # greater than the minimum.
   return None
-
-- 
GitLab


From 6967287715a097c8b009b52010c53247ab658232 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 01:30:07 -0700
Subject: [PATCH 0363/1357] Modify Timeline Analysis to consider allocations in
 order.

PiperOrigin-RevId: 213589710
---
 tensorflow/python/client/timeline.py      | 3 ++-
 tensorflow/python/client/timeline_test.py | 4 +---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/client/timeline.py b/tensorflow/python/client/timeline.py
index 1e96ac5ed4..c3f38294b5 100644
--- a/tensorflow/python/client/timeline.py
+++ b/tensorflow/python/client/timeline.py
@@ -588,7 +588,8 @@ class Timeline(object):
       alloc_tensor_set = set()
       alloc_maxes[allocator] = AllocationMaximum(
           timestamp=0, num_bytes=0, tensors=set())
-      for time, num_bytes, name in alloc_list:
+      for time, num_bytes, name in sorted(
+          alloc_list, key=lambda allocation: allocation[0]):
         total_bytes += num_bytes
         if num_bytes < 0:
           alloc_tensor_set.discard(name)
diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py
index 281d7f2e2b..032bbf7c4e 100644
--- a/tensorflow/python/client/timeline_test.py
+++ b/tensorflow/python/client/timeline_test.py
@@ -134,7 +134,7 @@ class TimelineTest(test.TestCase):
     ctf = tl.generate_chrome_trace_format()
     self._validateTrace(ctf)
 
-  def disabled_testAnalysisAndAllocations(self):
+  def testAnalysisAndAllocations(self):
     run_options = config_pb2.RunOptions(
         trace_level=config_pb2.RunOptions.FULL_TRACE)
     run_metadata = config_pb2.RunMetadata()
@@ -163,8 +163,6 @@ class TimelineTest(test.TestCase):
     # At least num1 + num2, both float32s (4 bytes each)
     self.assertGreaterEqual(cpu_max.num_bytes, 8)
     self.assertGreater(cpu_max.timestamp, 0)
-    self.assertTrue('num1' in cpu_max.tensors or 'num1/read' in cpu_max.tensors)
-    self.assertTrue('num2' in cpu_max.tensors or 'num2/read' in cpu_max.tensors)
 
   def testManyCPUs(self):
     run_options = config_pb2.RunOptions(
-- 
GitLab


From 0d8942fcbcc9cb3a05be8acc843d1fc4b6dfc9f1 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Wed, 19 Sep 2018 01:55:55 -0700
Subject: [PATCH 0364/1357] Implement sort op for CPU.

Also don't allow parallelization for the sort op in parallel_task_assignment.

PiperOrigin-RevId: 213592046
---
 tensorflow/compiler/xla/service/cpu/BUILD     |  13 +
 .../compiler/xla/service/cpu/cpu_runtime.cc   |  24 ++
 .../compiler/xla/service/cpu/cpu_runtime.h    |  12 +
 .../compiler/xla/service/cpu/ir_emitter.cc    | 145 ++++++++++-
 .../compiler/xla/service/cpu/ir_emitter.h     |   6 +
 .../service/cpu/parallel_task_assignment.cc   |   1 +
 .../xla/service/cpu/runtime_key_value_sort.cc | 237 ++++++++++++++++++
 .../xla/service/cpu/runtime_key_value_sort.h  |  88 +++++++
 .../xla/service/cpu/simple_orc_jit.cc         |  13 +
 .../compiler/xla/service/cpu/tests/BUILD      |  14 ++
 .../cpu/tests/cpu_key_value_sort_test.cc      |  54 ++++
 11 files changed, 605 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
 create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
 create mode 100644 tensorflow/compiler/xla/service/cpu/tests/cpu_key_value_sort_test.cc

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 8cc522a59e..b3e4fab727 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -180,6 +180,7 @@ cc_library(
         ":runtime_conv2d_mkl",
         ":runtime_fft",
         ":runtime_fork_join",
+        ":runtime_key_value_sort",
         ":runtime_matmul",
         ":runtime_matmul_mkl",
         ":runtime_single_threaded_conv2d",
@@ -623,6 +624,18 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "runtime_key_value_sort",
+    srcs = ["runtime_key_value_sort.cc"],
+    hdrs = ["runtime_key_value_sort.h"],
+    copts = runtime_copts(),
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+    ],
+)
+
 cc_library(
     name = "runtime_fork_join",
     srcs = ["runtime_fork_join.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
index 8a44c384bb..7e1590955a 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
@@ -74,6 +74,30 @@ extern const char* const kReleaseOutfeedBufferAfterPopulationSymbolName =
     "__xla_cpu_runtime_ReleaseOutfeedBufferAfterPopulation";
 extern const char* const kParallelForkJoinSymbolName =
     "__xla_cpu_runtime_ParallelForkJoin";
+extern const char* const kKeyValueSortPREDSymbolName =
+    "__xla_cpu_runtime_KeyValueSortPRED";
+extern const char* const kKeyValueSortS8SymbolName =
+    "__xla_cpu_runtime_KeyValueSortS8";
+extern const char* const kKeyValueSortU8SymbolName =
+    "__xla_cpu_runtime_KeyValueSortU8";
+extern const char* const kKeyValueSortS16SymbolName =
+    "__xla_cpu_runtime_KeyValueSortS16";
+extern const char* const kKeyValueSortU16SymbolName =
+    "__xla_cpu_runtime_KeyValueSortU16";
+extern const char* const kKeyValueSortF16SymbolName =
+    "__xla_cpu_runtime_KeyValueSortF16";
+extern const char* const kKeyValueSortS32SymbolName =
+    "__xla_cpu_runtime_KeyValueSortS32";
+extern const char* const kKeyValueSortU32SymbolName =
+    "__xla_cpu_runtime_KeyValueSortU32";
+extern const char* const kKeyValueSortF32SymbolName =
+    "__xla_cpu_runtime_KeyValueSortF32";
+extern const char* const kKeyValueSortS64SymbolName =
+    "__xla_cpu_runtime_KeyValueSortS64";
+extern const char* const kKeyValueSortU64SymbolName =
+    "__xla_cpu_runtime_KeyValueSortU64";
+extern const char* const kKeyValueSortF64SymbolName =
+    "__xla_cpu_runtime_KeyValueSortF64";
 
 extern const char* const kXlaCpuRuntimeSymbolNamePrefix = "__xla_cpu_runtime_";
 }  // namespace runtime
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
index aa0e967123..e6345e0344 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
@@ -63,6 +63,18 @@ extern const char* const kReleaseInfeedBufferAfterDequeueSymbolName;
 extern const char* const kAcquireOutfeedBufferForPopulationSymbolName;
 extern const char* const kReleaseOutfeedBufferAfterPopulationSymbolName;
 extern const char* const kParallelForkJoinSymbolName;
+extern const char* const kKeyValueSortPREDSymbolName;
+extern const char* const kKeyValueSortS8SymbolName;
+extern const char* const kKeyValueSortU8SymbolName;
+extern const char* const kKeyValueSortS16SymbolName;
+extern const char* const kKeyValueSortU16SymbolName;
+extern const char* const kKeyValueSortF16SymbolName;
+extern const char* const kKeyValueSortS32SymbolName;
+extern const char* const kKeyValueSortU32SymbolName;
+extern const char* const kKeyValueSortF32SymbolName;
+extern const char* const kKeyValueSortS64SymbolName;
+extern const char* const kKeyValueSortU64SymbolName;
+extern const char* const kKeyValueSortF64SymbolName;
 
 // All symbol names for XLA CPU runtime functions need to start with this
 // prefix.
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index df8c2a636b..7e82375cc3 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -495,8 +495,149 @@ Status IrEmitter::HandleOutfeed(HloInstruction* outfeed) {
 }
 
 Status IrEmitter::HandleSort(HloInstruction* sort) {
-  // TODO(b/26783907): Implement sort on CPU.
-  return Unimplemented("Sort is not implemented on CPU.");
+  TF_RETURN_IF_ERROR(EmitTargetAddressForOp(sort));
+  auto keys = sort->operand(0);
+  auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr;
+  ShapeIndex keys_shape_index({});
+  ShapeIndex values_shape_index({});
+  if (values != nullptr) {
+    keys_shape_index = ShapeIndex({0});
+    values_shape_index = ShapeIndex({1});
+  }
+  auto keys_destination = GetAllocationSlice(*sort, keys_shape_index);
+  auto keys_destination_address =
+      EmitBufferPointer(keys_destination, keys->shape());
+  auto values_destination = GetAllocationSlice(*sort, values_shape_index);
+  llvm::Value* values_destination_address = nullptr;
+
+  // The sort is implemented in-place, therefore we first copy the operand
+  // buffer to the output buffer if they are not the same.
+  if (keys_destination != GetAllocationSlice(*keys)) {
+    int64 primitive_type_size =
+        ShapeUtil::ByteSizeOfPrimitiveType(keys->shape().element_type());
+    auto source_buffer = GetEmittedValueFor(keys);
+    int64 keys_size = ByteSizeOf(keys->shape());
+    MemCpy(keys_destination_address, /*DstAlign=*/primitive_type_size,
+           source_buffer,
+           /*SrcAlign=*/primitive_type_size, keys_size);
+  }
+  if (values != nullptr) {
+    values_destination_address =
+        EmitBufferPointer(values_destination, values->shape());
+    if (values_destination != GetAllocationSlice(*values)) {
+      int64 primitive_type_size =
+          ShapeUtil::ByteSizeOfPrimitiveType(values->shape().element_type());
+      auto source_buffer = GetEmittedValueFor(values);
+      int64 values_size = ByteSizeOf(values->shape());
+      MemCpy(values_destination_address, /*DstAlign=*/primitive_type_size,
+             source_buffer,
+             /*SrcAlign=*/primitive_type_size, values_size);
+    }
+  }
+
+  // Normalize the shape and the dimension to sort.
+  Shape normalized_keys_shape =
+      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+          keys->shape());
+  int64 physical_dimension_to_sort = LayoutUtil::MakeLogicalToPhysical(
+      keys->shape().layout())[sort->dimensions(0)];
+
+  int64 sort_dimension_elements =
+      normalized_keys_shape.dimensions(physical_dimension_to_sort);
+  int64 higher_dimensions = 1;
+  for (int64 i = 0; i < physical_dimension_to_sort; ++i) {
+    higher_dimensions *= normalized_keys_shape.dimensions(i);
+  }
+  int64 lower_dimensions = 1;
+  for (int64 i = ShapeUtil::Rank(normalized_keys_shape) - 1;
+       i > physical_dimension_to_sort; --i) {
+    lower_dimensions *= normalized_keys_shape.dimensions(i);
+  }
+
+  PrimitiveType keys_type = keys->shape().element_type();
+  const char* fn_name = nullptr;
+  llvm::Type* keys_native_type = nullptr;
+  switch (keys_type) {
+    case PRED:
+      fn_name = runtime::kKeyValueSortPREDSymbolName;
+      keys_native_type = b_.getInt8PtrTy();
+      break;
+    case S8:
+      fn_name = runtime::kKeyValueSortS8SymbolName;
+      keys_native_type = b_.getInt8PtrTy();
+      break;
+    case U8:
+      fn_name = runtime::kKeyValueSortU8SymbolName;
+      keys_native_type = b_.getInt8PtrTy();
+      break;
+    case S16:
+      fn_name = runtime::kKeyValueSortS16SymbolName;
+      keys_native_type = b_.getInt16Ty()->getPointerTo();
+      break;
+    case U16:
+      fn_name = runtime::kKeyValueSortU16SymbolName;
+      keys_native_type = b_.getInt16Ty()->getPointerTo();
+      break;
+    case F16:
+      fn_name = runtime::kKeyValueSortF16SymbolName;
+      keys_native_type = b_.getHalfTy()->getPointerTo();
+      break;
+    case S32:
+      fn_name = runtime::kKeyValueSortS32SymbolName;
+      keys_native_type = b_.getInt32Ty()->getPointerTo();
+      break;
+    case U32:
+      fn_name = runtime::kKeyValueSortU32SymbolName;
+      keys_native_type = b_.getInt32Ty()->getPointerTo();
+      break;
+    case F32:
+      fn_name = runtime::kKeyValueSortF32SymbolName;
+      keys_native_type = b_.getFloatTy()->getPointerTo();
+      break;
+    case S64:
+      fn_name = runtime::kKeyValueSortS64SymbolName;
+      keys_native_type = b_.getInt64Ty()->getPointerTo();
+      break;
+    case U64:
+      fn_name = runtime::kKeyValueSortU64SymbolName;
+      keys_native_type = b_.getInt64Ty()->getPointerTo();
+      break;
+    case F64:
+      fn_name = runtime::kKeyValueSortF64SymbolName;
+      keys_native_type = b_.getDoubleTy()->getPointerTo();
+      break;
+    default:
+      DLOG(FATAL) << "Element type " << PrimitiveType_Name(keys_type)
+                  << " not supported in the Sort op on CPU.";
+  }
+
+  llvm::FunctionType* key_value_sort_type = llvm::FunctionType::get(
+      b_.getVoidTy(),
+      {keys_native_type, b_.getInt64Ty(), b_.getInt64Ty(), b_.getInt64Ty(),
+       b_.getInt8PtrTy(), b_.getInt32Ty()},
+      /*isVarArg=*/false);
+  auto* key_value_sort_func = llvm::cast<llvm::Function>(
+      module_->getOrInsertFunction(fn_name, key_value_sort_type));
+  key_value_sort_func->setCallingConv(llvm::CallingConv::C);
+  key_value_sort_func->setDoesNotThrow();
+  key_value_sort_func->setOnlyAccessesArgMemory();
+  Call(key_value_sort_func,
+       {PointerCast(keys_destination_address, keys_native_type),
+        b_.getInt64(higher_dimensions), b_.getInt64(sort_dimension_elements),
+        b_.getInt64(lower_dimensions),
+        values != nullptr
+            ? PointerCast(values_destination_address, b_.getInt8PtrTy())
+            : llvm::Constant::getNullValue(b_.getInt8PtrTy()),
+        b_.getInt32(values != nullptr ? ShapeUtil::ByteSizeOfPrimitiveType(
+                                            values->shape().element_type())
+                                      : 0)});
+
+  if (values != nullptr) {
+    llvm_ir::EmitTuple(GetIrArrayFor(sort),
+                       {keys_destination_address, values_destination_address},
+                       &b_, module_);
+  }
+  return Status::OK();
 }
 
 Status IrEmitter::HandleTuple(HloInstruction* tuple) {
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 3df99464ba..daafef4eb3 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -163,6 +163,12 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   Status Preprocess(HloInstruction* hlo) override;
   Status Postprocess(HloInstruction* hlo) override;
 
+  // A convenient helper for calling BufferAssignment::GetUniqueSlice.
+  BufferAllocation::Slice GetAllocationSlice(
+      const HloInstruction& hlo, const ShapeIndex& index = {}) const {
+    return assignment_.GetUniqueSlice(&hlo, index).ConsumeValueOrDie();
+  }
+
  private:
   // Private helper to initialize an IR function for the computation.
   void InitializeIrFunction(const string& function_name);
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
index b4c0c09ec0..ede7f433ca 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
@@ -142,6 +142,7 @@ int64 ParallelTaskAssignment::GetTargetParallelTaskCount(
       opcode == HloOpcode::kGetTupleElement || opcode == HloOpcode::kBitcast ||
       opcode == HloOpcode::kFft || opcode == HloOpcode::kInfeed ||
       opcode == HloOpcode::kOutfeed || opcode == HloOpcode::kRng ||
+      opcode == HloOpcode::kSort ||
       (opcode == HloOpcode::kConvolution &&
        PotentiallyImplementedAsEigenConvolution(*instruction,
                                                 target_machine_features_)) ||
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
new file mode 100644
index 0000000000..cef5420f00
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
@@ -0,0 +1,237 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/platform/dynamic_annotations.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace {
+using tensorflow::int16;
+using tensorflow::int32;
+using tensorflow::int64;
+using tensorflow::int8;
+using tensorflow::uint16;
+using tensorflow::uint32;
+using tensorflow::uint64;
+using tensorflow::uint8;
+
+template <typename KeyType>
+void KeyValueSort(std::pair<KeyType, int64>* row_to_sort, int64 num_elements) {
+  std::sort(row_to_sort, row_to_sort + num_elements);
+}
+
+// For floating point numbers, we want a total order comparator. -NaN and NaN
+// should appear at the beginning and end of the ordering, and -0.0 should
+// appear before 0.0. Also we want to have a stable sort, so if the keys are the
+// same, we compare the index values.
+template <typename KeyType>
+bool LessThan(KeyType lhs, int64 lhs_index, KeyType rhs, int64 rhs_index) {
+  bool lhs_is_negative = std::signbit(lhs);
+  bool rhs_is_negative = std::signbit(rhs);
+  // If the signs are different, we can just compare the signs.
+  if (lhs_is_negative != rhs_is_negative) {
+    return lhs_is_negative && !rhs_is_negative;
+  }
+  bool lhs_nan = std::isnan(lhs);
+  bool rhs_nan = std::isnan(rhs);
+  // Exactly one number is nan?
+  if (lhs_nan != rhs_nan) {
+    if (lhs_nan) {
+      return lhs_is_negative;
+    }
+    return !rhs_is_negative;
+  }
+  if (lhs != rhs) {
+    return lhs < rhs;
+  }
+  return lhs_index < rhs_index;
+}
+
+template <>
+void KeyValueSort(std::pair<double, int64>* row_to_sort, int64 num_elements) {
+  std::sort(row_to_sort, row_to_sort + num_elements,
+            [](const std::pair<double, int64>& lhs,
+               const std::pair<double, int64>& rhs) -> bool {
+              return LessThan(lhs.first, lhs.second, rhs.first, rhs.second);
+            });
+}
+
+template <>
+void KeyValueSort(std::pair<float, int64>* row_to_sort, int64 num_elements) {
+  std::sort(row_to_sort, row_to_sort + num_elements,
+            [](const std::pair<float, int64>& lhs,
+               const std::pair<float, int64>& rhs) -> bool {
+              return LessThan(lhs.first, lhs.second, rhs.first, rhs.second);
+            });
+}
+
+template <>
+void KeyValueSort(std::pair<Eigen::half, int64>* row_to_sort,
+                  int64 num_elements) {
+  std::sort(row_to_sort, row_to_sort + num_elements,
+            [](const std::pair<Eigen::half, int64>& lhs,
+               const std::pair<Eigen::half, int64>& rhs) -> bool {
+              return LessThan(
+                  Eigen::half_impl::half_to_float(lhs.first), lhs.second,
+                  Eigen::half_impl::half_to_float(rhs.first), rhs.second);
+            });
+}
+
+template <typename KeyType>
+void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char* values,
+                      int32 values_primitive_type_size_in_bytes) {
+  // High-level idea of the iteration/sorting logic:
+  // Conceptually we have a 3-dimensional shape [a, b, c]. b corresponds to the
+  // dimension to sort, c is the product of the more minor dimensions (set to 1
+  // if b is the most minor dimension), and a is the product of the more major
+  // dimensions (set to 1 if b is the most major dimension). There are a * c
+  // many rows that we need to sort. We iterate through these, calculate a
+  // 'base_offset' value which points to the first element in that row, and add
+  // i * c for accessing the 'i'-th element in that row.
+
+  int64 sort_dimension_elements = b;
+  int64 num_iteration_elements = a * c;
+  int64 sort_dimension_offset = c;
+
+  std::unique_ptr<std::pair<KeyType, int64>[]> row_to_sort(
+      new std::pair<KeyType, int64>[sort_dimension_elements]);
+  std::unique_ptr<std::string[]> reordered_values(
+      new std::string[sort_dimension_elements]);
+  for (int64 index = 0; index < num_iteration_elements; ++index) {
+    // 'index' can be split into two values which index into the 'c' dimension
+    // and the 'a' dimension, respectively. 'index' % 'c' is the index into the
+    // 'c' dimension, 'index' / 'c' is the index into the 'a' dimension. When
+    // calculating the base offset, we need to multiply the index into the 'a'
+    // dimension with 'b' * 'c'.
+    // 'index' / 'c' * 'c' * 'b' = ('index' - 'index' % 'c') * 'b'.
+    int64 base_offset =
+        index % sort_dimension_offset +
+        (index - index % sort_dimension_offset) * sort_dimension_elements;
+    // TODO(b/26783907): We could define a custom iterator class that references
+    // both arrays. Then we could avoid the intermediate copy. However this
+    // would become more complicated, and it is not clear if the benefit is high
+    // enough.
+    for (int64 i = 0; i < sort_dimension_elements; ++i) {
+      row_to_sort[i] =
+          std::make_pair(keys[base_offset + i * sort_dimension_offset], i);
+    }
+    KeyValueSort(row_to_sort.get(), sort_dimension_elements);
+    for (int64 i = 0; i < sort_dimension_elements; ++i) {
+      keys[base_offset + i * sort_dimension_offset] = row_to_sort[i].first;
+    }
+    if (values == nullptr) {
+      continue;
+    }
+
+    // Reorder the values according to the order defined by the keys.
+    for (int64 i = 0; i < sort_dimension_elements; ++i) {
+      int64 memory_index =
+          (base_offset + row_to_sort[i].second * sort_dimension_offset) *
+          values_primitive_type_size_in_bytes;
+
+      reordered_values[i] = std::string(values + memory_index,
+                                        values_primitive_type_size_in_bytes);
+    }
+    for (int64 i = 0; i < sort_dimension_elements; ++i) {
+      int64 memory_index = (base_offset + i * sort_dimension_offset) *
+                           values_primitive_type_size_in_bytes;
+      memcpy(values + memory_index, reordered_values[i].c_str(),
+             values_primitive_type_size_in_bytes);
+    }
+  }
+}
+}  // namespace
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortPRED(
+    bool* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS8(
+    int8* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU8(
+    uint8* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS16(
+    int16* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU16(
+    uint16* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF16(
+    Eigen::half* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS32(
+    int32* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU32(
+    uint32* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF32(
+    float* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS64(
+    int64* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU64(
+    uint64* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF64(
+    double* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+}
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
new file mode 100644
index 0000000000..28e35e82c1
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
@@ -0,0 +1,88 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_KEY_VALUE_SORT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_KEY_VALUE_SORT_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/platform/types.h"
+
+extern "C" {
+
+// 'keys' represents a 3-dimensional shape with dimensions [a, b, c]. The 'b'
+// dimension of 'keys' is sorted into ascending order. 'values' can be nullptr.
+// If 'values' is not nullptr, the elements in 'values' are reordered in such a
+// way that if the element at index 'i' in 'keys' was moved to index 'j', the
+// element at index 'i' in 'values' is also moved to index 'j' (which means that
+// the same elements correspond to each other as before).
+extern void __xla_cpu_runtime_KeyValueSortPRED(
+    bool* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
+    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortS8(
+    tensorflow::int8* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortU8(
+    tensorflow::uint8* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortS16(
+    tensorflow::int16* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortU16(
+    tensorflow::uint16* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortF16(
+    Eigen::half* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortS32(
+    tensorflow::int32* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortU32(
+    tensorflow::uint32* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortF32(
+    float* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
+    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortS64(
+    tensorflow::int64* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortU64(
+    tensorflow::uint64* keys, tensorflow::int64 a, tensorflow::int64 b,
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
+
+extern void __xla_cpu_runtime_KeyValueSortF64(
+    double* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
+    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
+}
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_KEY_VALUE_SORT_H_
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index bf98064647..9ec0c8f657 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/runtime_fft.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h"
+#include "tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
@@ -202,6 +203,18 @@ bool RegisterKnownJITSymbols() {
   REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortPRED);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortS8);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortU8);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortS16);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortU16);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortF16);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortS32);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortU32);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortF32);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortS64);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortU64);
+  REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSortF64);
 
   registry->Register("__gnu_f2h_ieee", reinterpret_cast<void*>(__gnu_f2h_ieee));
   registry->Register("__gnu_h2f_ieee", reinterpret_cast<void*>(__gnu_h2f_ieee));
diff --git a/tensorflow/compiler/xla/service/cpu/tests/BUILD b/tensorflow/compiler/xla/service/cpu/tests/BUILD
index c55206eee7..4b129c95d4 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/tests/BUILD
@@ -180,3 +180,17 @@ tf_cc_test(
         "//tensorflow/core:test_main",
     ],
 )
+
+tf_cc_test(
+    name = "cpu_key_value_sort_test",
+    srcs = ["cpu_key_value_sort_test.cc"],
+    deps = [
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service/cpu:cpu_compiler",
+        "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_key_value_sort_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_key_value_sort_test.cc
new file mode 100644
index 0000000000..3934c03a04
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_key_value_sort_test.cc
@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h"
+#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+
+namespace xla {
+namespace cpu {
+namespace {
+class CpuKeyValueSortTest : public CpuCodegenTest {};
+
+TEST_F(CpuKeyValueSortTest, SortR1) {
+  const string hlo_text = R"(
+HloModule KeyValueSort
+
+ENTRY main {
+  a = f32[10] parameter(0)
+
+  ROOT result = f32[10] sort(f32[10] a), dimensions={0}
+}
+)";
+
+  string filecheck_pattern = R"(
+CHECK: call void @__xla_cpu_runtime_KeyValueSort
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(hlo_text));
+
+  CpuAotCompilationOptions options{
+      /*triple=*/"x86_64", /*cpu_name=*/"", /*features=*/"",
+      /*entry_point_name=*/"entry",
+      /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static};
+
+  CompileAheadOfTimeAndVerifyIr(std::move(module), options, filecheck_pattern,
+                                /*match_optimized_ir=*/true);
+}
+
+}  // namespace
+}  // namespace cpu
+}  // namespace xla
-- 
GitLab


From 4732df127d13ce3af5840607b087fc79d883601b Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Wed, 19 Sep 2018 02:26:56 -0700
Subject: [PATCH 0365/1357] Replace DLOG(FATAL) with an Unimplemented error.

In tensorflow we don't have DLOG, and we should not use LOG(FATAL).

PiperOrigin-RevId: 213595376
---
 tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 7e82375cc3..c32f2533ee 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -607,8 +607,9 @@ Status IrEmitter::HandleSort(HloInstruction* sort) {
       keys_native_type = b_.getDoubleTy()->getPointerTo();
       break;
     default:
-      DLOG(FATAL) << "Element type " << PrimitiveType_Name(keys_type)
-                  << " not supported in the Sort op on CPU.";
+      return Unimplemented(
+          "Element type %s not supported in the Sort op on CPU.",
+          PrimitiveType_Name(keys_type));
   }
 
   llvm::FunctionType* key_value_sort_type = llvm::FunctionType::get(
-- 
GitLab


From edae2fe261b4395c6b84a8d24b48abd37b14f041 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Wed, 19 Sep 2018 02:28:02 -0700
Subject: [PATCH 0366/1357] Enable XlaSort and TopKV2 for CPU backend.

PiperOrigin-RevId: 213595499
---
 tensorflow/compiler/tf2xla/xla_cpu_backend.cc | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/xla_cpu_backend.cc b/tensorflow/compiler/tf2xla/xla_cpu_backend.cc
index 23d04d43b3..ead229aacc 100644
--- a/tensorflow/compiler/tf2xla/xla_cpu_backend.cc
+++ b/tensorflow/compiler/tf2xla/xla_cpu_backend.cc
@@ -31,10 +31,6 @@ bool CpuOpFilter(KernelDef* kdef) {
         DT_FLOAT);
     return true;
   }
-  // TODO(b/26783907): The CPU backend currently does not implement sort.
-  if (kdef->op() == "XlaSort" || kdef->op() == "TopKV2") {
-    return false;
-  }
   if (kdef->op() == "Const") {
     AddDtypeToKernalDefConstraint("dtype", DT_STRING, kdef);
   }
-- 
GitLab


From 0b274dcbe12a9f0ddd96f94cc80c2e66c195fd76 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 02:30:17 -0700
Subject: [PATCH 0367/1357] compat: Update forward compatibility horizon to
 2018-09-19

PiperOrigin-RevId: 213595705
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 157e699604..8edd6419d3 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 18)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 19)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 227b819d55c3b24103026cdaf1897892422c5cd3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 03:14:28 -0700
Subject: [PATCH 0368/1357] Run CPU tests remotely.

Being able to run CPU tests remotely while running GPU tests locally required
multiple changes:
1. Unify how we tag GPU tests in TF; we now always use tf_cuda_tests_tags().
2. Tag tests using tf_cuda_tests_tags() with 'local' and 'gpu'; this makes
   them not run on non-gpu builds and always runs them locally.

PiperOrigin-RevId: 213601626
---
 tensorflow/compiler/tests/build_defs.bzl      |   6 +-
 tensorflow/compiler/xla/service/gpu/BUILD     |   4 +-
 .../compiler/xla/service/gpu/tests/BUILD      |  46 +-
 tensorflow/compiler/xla/tests/BUILD           |   8 +-
 tensorflow/compiler/xla/tests/build_defs.bzl  | 488 +++++++++---------
 tensorflow/compiler/xrt/tests/BUILD           |   6 +-
 tensorflow/core/kernels/BUILD                 |   6 +-
 .../platform/default/build_config_root.bzl    |  86 +--
 third_party/toolchains/BUILD                  |   2 +-
 9 files changed, 330 insertions(+), 322 deletions(-)

diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl
index 114793352e..b8583c9bdb 100644
--- a/tensorflow/compiler/tests/build_defs.bzl
+++ b/tensorflow/compiler/tests/build_defs.bzl
@@ -2,6 +2,10 @@
 
 load("@local_config_cuda//cuda:build_defs.bzl", "cuda_is_configured")
 load("//tensorflow/compiler/tests:plugin.bzl", "plugins")
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
 
 def all_backends():
     b = ["cpu"] + plugins.keys()
@@ -65,7 +69,7 @@ def tf_xla_py_test(
                 "--test_device=XLA_GPU",
                 "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_INT8,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16",
             ]
-            backend_tags += ["requires-gpu-sm35"]
+            backend_tags += tf_cuda_tests_tags()
         elif backend in plugins:
             backend_args += [
                 "--test_device=" + plugins[backend]["device"],
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 64b9683628..cbee4db06e 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -68,9 +68,7 @@ cc_library(
 #    srcs = [
 #        "partition_assignment_test.cc",
 #    ],
-#    tags = [
-#        "requires-gpu-sm35",
-#    ],
+#    tags = tf_cuda_tests_tags(),
 #    deps = [
 #        ":partition_assignment",
 #        "//tensorflow/core:stream_executor_no_cuda",
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index db4a33dc56..5da6f232d5 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -25,15 +25,17 @@ filegroup(
 )
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
 
 cc_library(
     name = "gpu_codegen_test",
     testonly = True,
     srcs = ["gpu_codegen_test.cc"],
     hdrs = ["gpu_codegen_test.h"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         "//tensorflow/compiler/xla/legacy_flags:debug_options_flags",
         "//tensorflow/compiler/xla/service:gpu_plugin",
@@ -48,9 +50,7 @@ cc_library(
 tf_cc_test(
     name = "gpu_copy_test",
     srcs = ["gpu_copy_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla:literal",
@@ -67,9 +67,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_ftz_test",
     srcs = ["gpu_ftz_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/core:test_main",
@@ -79,9 +77,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_index_test",
     srcs = ["gpu_index_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla:literal",
@@ -102,9 +98,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_infeed_test",
     srcs = ["infeed_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla:literal",
@@ -125,9 +119,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_kernel_tiling_test",
     srcs = ["gpu_kernel_tiling_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla/service:hlo",
@@ -142,7 +134,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_ldg_test",
     srcs = ["gpu_ldg_test.cc"],
-    tags = ["requires-gpu-sm35"],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla:literal",
@@ -159,9 +151,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_noalias_test",
     srcs = ["gpu_noalias_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla:literal",
@@ -178,9 +168,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_fusion_test",
     srcs = ["gpu_fusion_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla/service:hlo_module_config",
@@ -194,9 +182,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_unrolling_test",
     srcs = ["gpu_unrolling_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla/service:hlo_module_config",
@@ -211,9 +197,7 @@ tf_cc_test(
     name = "gpu_alignment_test",
     testonly = True,
     srcs = ["gpu_alignment_test.cc"],
-    tags = [
-        "requires-gpu-sm35",
-    ],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla/service:gpu_plugin",
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 30e3077edb..b49db029e2 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -29,6 +29,10 @@ load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites"
 load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_test_macros")
 load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
 
 # Generate test_suites for all backends, named "${backend}_tests".
 generate_backend_suites()
@@ -1797,7 +1801,7 @@ xla_test(
 tf_cc_test(
     name = "llvm_compiler_test",
     srcs = ["llvm_compiler_test.cc"],
-    tags = ["requires-gpu-sm35"],
+    tags = tf_cuda_tests_tags(),
     deps = [
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:test_helpers",
@@ -2096,7 +2100,7 @@ tf_cc_test(
     name = "sample_file_test",
     srcs = ["sample_file_test.cc"],
     data = ["isolated_convolution.hlo"],
-    tags = ["requires-gpu-sm35"],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":hlo_test_base",
         "//tensorflow/compiler/xla:test",
diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl
index 53f2c3bfbf..cc65a8939a 100644
--- a/tensorflow/compiler/xla/tests/build_defs.bzl
+++ b/tensorflow/compiler/xla/tests/build_defs.bzl
@@ -3,256 +3,266 @@
 load("@local_config_cuda//cuda:build_defs.bzl", "cuda_is_configured")
 load("//tensorflow/compiler/xla/tests:plugin.bzl", "plugins")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
 
 all_backends = ["cpu", "gpu"] + plugins.keys()
 
 def filter_backends(backends):
-  """Removes "gpu" from a backend list if CUDA is not enabled.
-
-  This allows us to simply hardcode lists including "gpu" here and in the
-  BUILD file, without causing failures when CUDA isn't enabled.'
-
-  Args:
-    backends: A list of backends to filter.
-
-  Returns:
-    The filtered list of backends.
-  """
-  if cuda_is_configured():
-    return backends
-  else:
-    return [backend for backend in backends if backend != "gpu"]
-
-
-def xla_test(name,
-             srcs,
-             deps,
-             xla_test_library_deps=[],
-             backends=[],
-             blacklisted_backends=[],
-             args=[],
-             tags=[],
-             copts=[],
-             data=[],
-             backend_tags={},
-             backend_args={},
-             **kwargs):
-  """Generates cc_test targets for the given XLA backends.
-
-  This rule generates a cc_test target for one or more XLA backends and also a
-  platform-agnostic cc_library rule. The arguments are identical to cc_test with
-  two additions: 'backends' and 'backend_args'. 'backends' specifies the
-  backends to generate tests for ("cpu", "gpu"), and
-  'backend_args'/'backend_tags' specifies backend-specific args parameters to
-  use when generating the cc_test.
-
-  The name of the cc_tests are the provided name argument with the backend name
-  appended, and the cc_library target name is the provided name argument with
-  "_lib" appended. For example, if name parameter is "foo_test", then the cpu
-  test target will be "foo_test_cpu" and the cc_library target is "foo_lib".
-
-  The cc_library target can be used to link with other plugins outside of
-  xla_test.
-
-  The build rule also defines a test suite ${name} which includes the tests for
-  each of the supported backends.
-
-  Each generated cc_test target has a tag indicating which backend the test is
-  for. This tag is of the form "xla_${BACKEND}" (eg, "xla_cpu"). These
-  tags can be used to gather tests for a particular backend into a test_suite.
-
-  Examples:
-
-    # Generates the targets: foo_test_cpu and foo_test_gpu.
-    xla_test(
-        name = "foo_test",
-        srcs = ["foo_test.cc"],
-        backends = ["cpu", "gpu"],
-        deps = [...],
-    )
+    """Removes "gpu" from a backend list if CUDA is not enabled.
 
-    # Generates the targets: bar_test_cpu and bar_test_gpu. bar_test_cpu
-    # includes the additional arg "--special_cpu_flag".
-    xla_test(
-        name = "bar_test",
-        srcs = ["bar_test.cc"],
-        backends = ["cpu", "gpu"],
-        backend_args = {"cpu": ["--special_cpu_flag"]}
-        deps = [...],
-    )
+    This allows us to simply hardcode lists including "gpu" here and in the
+    BUILD file, without causing failures when CUDA isn't enabled.'
 
-  The build rule defines the preprocessor macro XLA_TEST_BACKEND_${BACKEND}
-  to the value 1 where ${BACKEND} is the uppercase name of the backend.
-
-  Args:
-    name: Name of the target.
-    srcs: Sources for the target.
-    deps: Dependencies of the target.
-    xla_test_library_deps: If set, the generated test targets will depend on the
-      respective cc_libraries generated by the xla_test_library rule.
-    backends: A list of backends to generate tests for. Supported values: "cpu",
-      "gpu". If this list is empty, the test will be generated for all supported
-      backends.
-    blacklisted_backends: A list of backends to NOT generate tests for.
-    args: Test arguments for the target.
-    tags: Tags for the target.
-    copts: Additional copts to pass to the build.
-    data: Additional data to pass to the build.
-    backend_tags: A dict mapping backend name to list of additional tags to
-      use for that target.
-    backend_args: A dict mapping backend name to list of additional args to
-      use for that target.
-    **kwargs: Additional keyword arguments to pass to native.cc_test.
-  """
-  test_names = []
-  if not backends:
-    backends = all_backends
-
-  backends = [backend for backend in backends
-              if backend not in blacklisted_backends]
-
-  native.cc_library(
-      name="%s_lib" % name,
-      srcs=srcs,
-      copts=copts,
-      testonly=True,
-      deps=deps + ["//tensorflow/compiler/xla/tests:test_macros_header"],
-  )
-
-  for backend in filter_backends(backends):
-    test_name = "%s_%s" % (name, backend)
-    this_backend_tags = ["xla_%s" % backend]
-    this_backend_copts = []
-    this_backend_args = backend_args.get(backend, [])
-    this_backend_data = []
-    if backend == "cpu":
-      backend_deps = ["//tensorflow/compiler/xla/service:cpu_plugin"]
-      backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_cpu"]
-    elif backend == "gpu":
-      backend_deps = ["//tensorflow/compiler/xla/service:gpu_plugin"]
-      backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_gpu"]
-      this_backend_tags += ["requires-gpu-sm35"]
-    elif backend in plugins:
-      backend_deps = []
-      backend_deps += plugins[backend]["deps"]
-      this_backend_copts += plugins[backend]["copts"]
-      this_backend_tags += plugins[backend]["tags"]
-      this_backend_args += plugins[backend]["args"]
-      this_backend_data += plugins[backend]["data"]
-    else:
-      fail("Unknown backend %s" % backend)
-
-    if xla_test_library_deps:
-      for lib_dep in xla_test_library_deps:
-        backend_deps += ["%s_%s" % (lib_dep, backend)]
-
-    tf_cc_test(
-        name=test_name,
-        srcs=srcs,
-        tags=tags + backend_tags.get(backend, []) + this_backend_tags,
-        extra_copts=copts + ["-DXLA_TEST_BACKEND_%s=1" % backend.upper()] +
-        this_backend_copts,
-        args=args + this_backend_args,
-        deps=deps + backend_deps,
-        data=data + this_backend_data,
-        **kwargs)
-
-    test_names.append(test_name)
-
-  native.test_suite(name=name, tests=test_names)
-
-def xla_test_library(name,
-                     srcs,
-                     hdrs=[],
-                     deps=[],
-                     backends=[]):
-  """Generates cc_library targets for the given XLA backends.
-
-  This rule forces the sources to be compiled for each backend so that the
-  backend specific macros could expand correctly. It's useful when test targets
-  in different directories referring to the same sources but test with different
-  arguments.
-
-  Examples:
-
-    # Generates the targets: foo_test_library_cpu and foo_test_gpu.
-    xla_test_library(
-        name = "foo_test_library",
-        srcs = ["foo_test.cc"],
-        backends = ["cpu", "gpu"],
-        deps = [...],
-    )
-    # Then use the xla_test rule to generate test targets:
-    xla_test(
-        name = "foo_test",
-        srcs = [],
-        backends = ["cpu", "gpu"],
-        deps = [...],
-        xla_test_library_deps = [":foo_test_library"],
-    )
+    Args:
+      backends: A list of backends to filter.
 
-  Args:
-    name: Name of the target.
-    srcs: Sources for the target.
-    hdrs: Headers for the target.
-    deps: Dependencies of the target.
-    backends: A list of backends to generate libraries for.
-      Supported values: "cpu", "gpu". If this list is empty, the
-      library will be generated for all supported backends.
-  """
-
-  if not backends:
-    backends = all_backends
-
-  for backend in filter_backends(backends):
-    this_backend_copts = []
-    if backend in ["cpu", "gpu"]:
-      backend_deps = ["//tensorflow/compiler/xla/tests:test_macros_%s" % backend]
-    elif backend in plugins:
-      backend_deps = plugins[backend]["deps"]
-      this_backend_copts += plugins[backend]["copts"]
+    Returns:
+      The filtered list of backends.
+    """
+    if cuda_is_configured():
+        return backends
     else:
-      fail("Unknown backend %s" % backend)
+        return [backend for backend in backends if backend != "gpu"]
+
+def xla_test(
+        name,
+        srcs,
+        deps,
+        xla_test_library_deps = [],
+        backends = [],
+        blacklisted_backends = [],
+        args = [],
+        tags = [],
+        copts = [],
+        data = [],
+        backend_tags = {},
+        backend_args = {},
+        **kwargs):
+    """Generates cc_test targets for the given XLA backends.
+
+    This rule generates a cc_test target for one or more XLA backends and also a
+    platform-agnostic cc_library rule. The arguments are identical to cc_test with
+    two additions: 'backends' and 'backend_args'. 'backends' specifies the
+    backends to generate tests for ("cpu", "gpu"), and
+    'backend_args'/'backend_tags' specifies backend-specific args parameters to
+    use when generating the cc_test.
+
+    The name of the cc_tests are the provided name argument with the backend name
+    appended, and the cc_library target name is the provided name argument with
+    "_lib" appended. For example, if name parameter is "foo_test", then the cpu
+    test target will be "foo_test_cpu" and the cc_library target is "foo_lib".
+
+    The cc_library target can be used to link with other plugins outside of
+    xla_test.
+
+    The build rule also defines a test suite ${name} which includes the tests for
+    each of the supported backends.
+
+    Each generated cc_test target has a tag indicating which backend the test is
+    for. This tag is of the form "xla_${BACKEND}" (eg, "xla_cpu"). These
+    tags can be used to gather tests for a particular backend into a test_suite.
+
+    Examples:
+
+      # Generates the targets: foo_test_cpu and foo_test_gpu.
+      xla_test(
+          name = "foo_test",
+          srcs = ["foo_test.cc"],
+          backends = ["cpu", "gpu"],
+          deps = [...],
+      )
+
+      # Generates the targets: bar_test_cpu and bar_test_gpu. bar_test_cpu
+      # includes the additional arg "--special_cpu_flag".
+      xla_test(
+          name = "bar_test",
+          srcs = ["bar_test.cc"],
+          backends = ["cpu", "gpu"],
+          backend_args = {"cpu": ["--special_cpu_flag"]}
+          deps = [...],
+      )
+
+    The build rule defines the preprocessor macro XLA_TEST_BACKEND_${BACKEND}
+    to the value 1 where ${BACKEND} is the uppercase name of the backend.
+
+    Args:
+      name: Name of the target.
+      srcs: Sources for the target.
+      deps: Dependencies of the target.
+      xla_test_library_deps: If set, the generated test targets will depend on the
+        respective cc_libraries generated by the xla_test_library rule.
+      backends: A list of backends to generate tests for. Supported values: "cpu",
+        "gpu". If this list is empty, the test will be generated for all supported
+        backends.
+      blacklisted_backends: A list of backends to NOT generate tests for.
+      args: Test arguments for the target.
+      tags: Tags for the target.
+      copts: Additional copts to pass to the build.
+      data: Additional data to pass to the build.
+      backend_tags: A dict mapping backend name to list of additional tags to
+        use for that target.
+      backend_args: A dict mapping backend name to list of additional args to
+        use for that target.
+      **kwargs: Additional keyword arguments to pass to native.cc_test.
+    """
+    test_names = []
+    if not backends:
+        backends = all_backends
+
+    backends = [
+        backend
+        for backend in backends
+        if backend not in blacklisted_backends
+    ]
 
     native.cc_library(
-        name = "%s_%s" % (name, backend),
+        name = "%s_lib" % name,
         srcs = srcs,
+        copts = copts,
         testonly = True,
-        hdrs = hdrs,
-        copts = ["-DXLA_TEST_BACKEND_%s=1" % backend.upper()]
-        + this_backend_copts,
-        deps = deps + backend_deps,
+        deps = deps + ["//tensorflow/compiler/xla/tests:test_macros_header"],
     )
 
-
-def generate_backend_suites(backends=[]):
-  if not backends:
-    backends = all_backends
-  for backend in filter_backends(backends):
-    native.test_suite(name="%s_tests" % backend,
-                      tags = ["xla_%s" % backend])
-
-
-def generate_backend_test_macros(backends=[]):
-  if not backends:
-    backends = all_backends
-  for backend in filter_backends(backends):
-    manifest = ""
-    if backend in plugins:
-      manifest = plugins[backend]["disabled_manifest"]
-
-    native.cc_library(
-        name="test_macros_%s" % backend,
-        testonly = True,
-        srcs = ["test_macros.cc"],
-        hdrs = ["test_macros.h"],
-        copts = [
-          "-DXLA_PLATFORM=\\\"%s\\\"" % backend.upper(),
-          "-DXLA_DISABLED_MANIFEST=\\\"%s\\\"" % manifest,
-        ],
-        deps = [
-            "//tensorflow/compiler/xla:types",
-            "//tensorflow/core:lib",
-            "//tensorflow/core:regexp_internal",
-            "//tensorflow/core:test",
-        ])
+    for backend in filter_backends(backends):
+        test_name = "%s_%s" % (name, backend)
+        this_backend_tags = ["xla_%s" % backend]
+        this_backend_copts = []
+        this_backend_args = backend_args.get(backend, [])
+        this_backend_data = []
+        if backend == "cpu":
+            backend_deps = ["//tensorflow/compiler/xla/service:cpu_plugin"]
+            backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_cpu"]
+        elif backend == "gpu":
+            backend_deps = ["//tensorflow/compiler/xla/service:gpu_plugin"]
+            backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_gpu"]
+            this_backend_tags += tf_cuda_tests_tags()
+        elif backend in plugins:
+            backend_deps = []
+            backend_deps += plugins[backend]["deps"]
+            this_backend_copts += plugins[backend]["copts"]
+            this_backend_tags += plugins[backend]["tags"]
+            this_backend_args += plugins[backend]["args"]
+            this_backend_data += plugins[backend]["data"]
+        else:
+            fail("Unknown backend %s" % backend)
+
+        if xla_test_library_deps:
+            for lib_dep in xla_test_library_deps:
+                backend_deps += ["%s_%s" % (lib_dep, backend)]
+
+        tf_cc_test(
+            name = test_name,
+            srcs = srcs,
+            tags = tags + backend_tags.get(backend, []) + this_backend_tags,
+            extra_copts = copts + ["-DXLA_TEST_BACKEND_%s=1" % backend.upper()] +
+                          this_backend_copts,
+            args = args + this_backend_args,
+            deps = deps + backend_deps,
+            data = data + this_backend_data,
+            **kwargs
+        )
+
+        test_names.append(test_name)
+
+    native.test_suite(name = name, tests = test_names)
+
+def xla_test_library(
+        name,
+        srcs,
+        hdrs = [],
+        deps = [],
+        backends = []):
+    """Generates cc_library targets for the given XLA backends.
+
+    This rule forces the sources to be compiled for each backend so that the
+    backend specific macros could expand correctly. It's useful when test targets
+    in different directories referring to the same sources but test with different
+    arguments.
+
+    Examples:
+
+      # Generates the targets: foo_test_library_cpu and foo_test_gpu.
+      xla_test_library(
+          name = "foo_test_library",
+          srcs = ["foo_test.cc"],
+          backends = ["cpu", "gpu"],
+          deps = [...],
+      )
+      # Then use the xla_test rule to generate test targets:
+      xla_test(
+          name = "foo_test",
+          srcs = [],
+          backends = ["cpu", "gpu"],
+          deps = [...],
+          xla_test_library_deps = [":foo_test_library"],
+      )
+
+    Args:
+      name: Name of the target.
+      srcs: Sources for the target.
+      hdrs: Headers for the target.
+      deps: Dependencies of the target.
+      backends: A list of backends to generate libraries for.
+        Supported values: "cpu", "gpu". If this list is empty, the
+        library will be generated for all supported backends.
+    """
+
+    if not backends:
+        backends = all_backends
+
+    for backend in filter_backends(backends):
+        this_backend_copts = []
+        if backend in ["cpu", "gpu"]:
+            backend_deps = ["//tensorflow/compiler/xla/tests:test_macros_%s" % backend]
+        elif backend in plugins:
+            backend_deps = plugins[backend]["deps"]
+            this_backend_copts += plugins[backend]["copts"]
+        else:
+            fail("Unknown backend %s" % backend)
+
+        native.cc_library(
+            name = "%s_%s" % (name, backend),
+            srcs = srcs,
+            testonly = True,
+            hdrs = hdrs,
+            copts = ["-DXLA_TEST_BACKEND_%s=1" % backend.upper()] +
+                    this_backend_copts,
+            deps = deps + backend_deps,
+        )
+
+def generate_backend_suites(backends = []):
+    if not backends:
+        backends = all_backends
+    for backend in filter_backends(backends):
+        native.test_suite(
+            name = "%s_tests" % backend,
+            tags = ["xla_%s" % backend],
+        )
+
+def generate_backend_test_macros(backends = []):
+    if not backends:
+        backends = all_backends
+    for backend in filter_backends(backends):
+        manifest = ""
+        if backend in plugins:
+            manifest = plugins[backend]["disabled_manifest"]
+
+        native.cc_library(
+            name = "test_macros_%s" % backend,
+            testonly = True,
+            srcs = ["test_macros.cc"],
+            hdrs = ["test_macros.h"],
+            copts = [
+                "-DXLA_PLATFORM=\\\"%s\\\"" % backend.upper(),
+                "-DXLA_DISABLED_MANIFEST=\\\"%s\\\"" % manifest,
+            ],
+            deps = [
+                "//tensorflow/compiler/xla:types",
+                "//tensorflow/core:lib",
+                "//tensorflow/core:regexp_internal",
+                "//tensorflow/core:test",
+            ],
+        )
diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD
index 09ab4ed95f..b6dcfc4eb9 100644
--- a/tensorflow/compiler/xrt/tests/BUILD
+++ b/tensorflow/compiler/xrt/tests/BUILD
@@ -8,6 +8,10 @@ package(
 )
 
 load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test", "tf_cc_test")
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
 
 cc_library(
     name = "raw_api_test_lib",
@@ -57,7 +61,7 @@ tf_cuda_cc_test(
     size = "medium",
     srcs = [],
     args = ["--xla_test_device=XLA_GPU"],
-    tags = ["requires-gpu-sm35"],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":raw_api_test_lib",
         "//tensorflow/compiler/jit:xla_gpu_device",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index ef176a7de6..7aa1169061 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -50,6 +50,10 @@ load(
     "tf_proto_library",
     "tf_kernel_tests_linkstatic",
 )
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
 load(
     "//third_party/mkl:build_defs.bzl",
     "if_mkl",
@@ -1106,7 +1110,7 @@ tf_cuda_cc_test(
     name = "depthwise_conv_ops_test",
     size = "small",
     srcs = ["depthwise_conv_ops_test.cc"],
-    tags = ["requires-gpu-sm35"],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":conv_ops",
         ":image",
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 3a012c23fd..37475feebe 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -3,64 +3,64 @@
 # be separate to avoid cyclic references.
 
 def tf_cuda_tests_tags():
-  return ["requires-gpu"]
+    return ["requires-gpu", "local", "gpu"]
 
 def tf_sycl_tests_tags():
-  return ["requires-gpu"]
+    return ["requires-gpu", "local", "gpu"]
 
 def tf_additional_plugin_deps():
-  return select({
-      str(Label("//tensorflow:with_xla_support")): [
-          str(Label("//tensorflow/compiler/jit"))
-      ],
-      "//conditions:default": [],
-  })
+    return select({
+        str(Label("//tensorflow:with_xla_support")): [
+            str(Label("//tensorflow/compiler/jit")),
+        ],
+        "//conditions:default": [],
+    })
 
 def tf_additional_xla_deps_py():
-  return []
+    return []
 
 def tf_additional_grpc_deps_py():
-  return []
+    return []
 
 def tf_additional_license_deps():
-  return select({
-      str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"],
-      "//conditions:default": [],
-  })
+    return select({
+        str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"],
+        "//conditions:default": [],
+    })
 
 def tf_additional_verbs_deps():
-  return select({
-      str(Label("//tensorflow:with_verbs_support")): [
-          str(Label("//tensorflow/contrib/verbs:verbs_server_lib")),
-          str(Label("//tensorflow/contrib/verbs:grpc_verbs_client")),
-      ],
-      "//conditions:default": [],
-  })
+    return select({
+        str(Label("//tensorflow:with_verbs_support")): [
+            str(Label("//tensorflow/contrib/verbs:verbs_server_lib")),
+            str(Label("//tensorflow/contrib/verbs:grpc_verbs_client")),
+        ],
+        "//conditions:default": [],
+    })
 
 def tf_additional_mpi_deps():
-  return select({
-      str(Label("//tensorflow:with_mpi_support")): [
-          str(Label("//tensorflow/contrib/mpi:mpi_server_lib")),
-      ],
-      "//conditions:default": [],
-  })
+    return select({
+        str(Label("//tensorflow:with_mpi_support")): [
+            str(Label("//tensorflow/contrib/mpi:mpi_server_lib")),
+        ],
+        "//conditions:default": [],
+    })
 
 def tf_additional_gdr_deps():
-  return select({
-      str(Label("//tensorflow:with_gdr_support")): [
-          str(Label("//tensorflow/contrib/gdr:gdr_server_lib")),
-      ],
-      "//conditions:default": [],
-  })
+    return select({
+        str(Label("//tensorflow:with_gdr_support")): [
+            str(Label("//tensorflow/contrib/gdr:gdr_server_lib")),
+        ],
+        "//conditions:default": [],
+    })
 
-def if_static(extra_deps, otherwise=[]):
-  return select({
-      str(Label("//tensorflow:framework_shared_object")): otherwise,
-      "//conditions:default": extra_deps,
-  })
+def if_static(extra_deps, otherwise = []):
+    return select({
+        str(Label("//tensorflow:framework_shared_object")): otherwise,
+        "//conditions:default": extra_deps,
+    })
 
-def if_dynamic_kernels(extra_deps, otherwise=[]):
-  return select({
-      str(Label("//tensorflow:dynamic_loaded_kernels")): extra_deps,
-      "//conditions:default": otherwise,
-  })
+def if_dynamic_kernels(extra_deps, otherwise = []):
+    return select({
+        str(Label("//tensorflow:dynamic_loaded_kernels")): extra_deps,
+        "//conditions:default": otherwise,
+    })
diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
index 4303751452..7256a7d96e 100644
--- a/third_party/toolchains/BUILD
+++ b/third_party/toolchains/BUILD
@@ -32,6 +32,6 @@ platform(
     remote_execution_properties = """
         properties: {
             name: "container-image"
-            value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:ae58329b961e7c17d89725bf8fd72dfbd5850f4f3313de58e0cafbf5b0343735"
+            value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:06b585f42eed3b2030e9566b8f88f48d7472fa0f47e59765bc115376c8801bdf"
         }""",
 )
-- 
GitLab


From 7b936cb6c4ca47c8d3a63b42364998c86d87f2cf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 05:06:48 -0700
Subject: [PATCH 0369/1357] jacobian: manually setting the output shape in the
 output.

PiperOrigin-RevId: 213610324
---
 tensorflow/python/ops/parallel_for/BUILD      |  2 ++
 .../python/ops/parallel_for/gradients.py      |  2 ++
 .../python/ops/parallel_for/gradients_test.py | 26 +++++++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD
index 015181af47..07fc9433a2 100644
--- a/tensorflow/python/ops/parallel_for/BUILD
+++ b/tensorflow/python/ops/parallel_for/BUILD
@@ -123,6 +123,8 @@ cuda_py_test(
         "//third_party/py/numpy",
         "//tensorflow/python:layers",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:functional_ops",
         "//tensorflow/python:random_ops",
         "//tensorflow/python/ops/losses",
     ],
diff --git a/tensorflow/python/ops/parallel_for/gradients.py b/tensorflow/python/ops/parallel_for/gradients.py
index 460de0a97f..1f026b3660 100644
--- a/tensorflow/python/ops/parallel_for/gradients.py
+++ b/tensorflow/python/ops/parallel_for/gradients.py
@@ -42,6 +42,7 @@ def jacobian(output, inputs, use_pfor=True):
     [y_1, ..., y_n, x_1, ..., x_m].
   """
   flat_inputs = nest.flatten(inputs)
+  output_tensor_shape = output.shape
   output_shape = array_ops.shape(output)
   output = array_ops.reshape(output, [-1])
 
@@ -65,6 +66,7 @@ def jacobian(output, inputs, use_pfor=True):
       new_shape = array_ops.concat(
           [output_shape, array_ops.shape(out)[1:]], axis=0)
       out = array_ops.reshape(out, new_shape)
+      out.set_shape(output_tensor_shape.concatenate(flat_inputs[i].shape))
     pfor_outputs[i] = out
 
   return nest.pack_sequence_as(inputs, pfor_outputs)
diff --git a/tensorflow/python/ops/parallel_for/gradients_test.py b/tensorflow/python/ops/parallel_for/gradients_test.py
index 628c6764cd..5467f55af6 100644
--- a/tensorflow/python/ops/parallel_for/gradients_test.py
+++ b/tensorflow/python/ops/parallel_for/gradients_test.py
@@ -32,6 +32,8 @@ from tensorflow.python.framework import ops
 from tensorflow.python.keras.engine import training as keras_training
 from tensorflow.python.layers import layers as tf_layers
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops as tf_control_flow_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients as gradient_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
@@ -355,6 +357,30 @@ class GradientsTest(test.TestCase):
     self.run_and_assert_equal(answer, jacobian_pfor)
     self.run_and_assert_equal(answer, jacobian_while)
 
+  def test_jacobian_scan_shape(self):
+    # Shape x: [3, 4]
+    x = random_ops.random_uniform([3, 4])
+    elems = random_ops.random_uniform([6])
+    # Shape y: [6, 3, 4]
+    y = functional_ops.scan(lambda a, e: a + e, elems, initializer=x)
+    jacobian = gradients.jacobian(y, x)
+
+    expected_shape = [6, 3, 4, 3, 4]
+    self.assertAllEqual(expected_shape, jacobian.shape.as_list())
+
+  def test_jacobian_while_loop_shape(self):
+    # Shape x: [3, 4]
+    x = random_ops.random_uniform([3, 4])
+    _, y = tf_control_flow_ops.while_loop(lambda i, a: i > 5.,
+                                          lambda i, a: (i + 1, a + i),
+                                          (constant_op.constant(0.), x))
+    # Shape y: [2, 3]
+    y = y[:2, :3]
+    jacobian = gradients.jacobian(y, x)
+
+    expected_shape = [2, 3, 3, 4]
+    self.assertAllEqual(expected_shape, jacobian.shape.as_list())
+
   def test_jacobian_unknown_shape(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(dtypes.float32, shape=[None, None])
-- 
GitLab


From 27ea406e6d43ffc0d63f61782c413fe4d8483193 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Wed, 19 Sep 2018 05:20:57 -0700
Subject: [PATCH 0370/1357] Enable tests for CPU and GPU backends that involve
 XlaSort.

PiperOrigin-RevId: 213611371
---
 tensorflow/compiler/tests/image_ops_test.py  | 12 ------------
 tensorflow/compiler/tests/random_ops_test.py |  3 ---
 tensorflow/compiler/tests/sort_ops_test.py   | 20 --------------------
 3 files changed, 35 deletions(-)

diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py
index 6fe5a66e0e..bbe746e28f 100644
--- a/tensorflow/compiler/tests/image_ops_test.py
+++ b/tensorflow/compiler/tests/image_ops_test.py
@@ -605,10 +605,6 @@ class ResizeBilinearTest(xla_test.XLATestCase):
 class NonMaxSuppressionTest(xla_test.XLATestCase):
 
   def testNMS128From1024(self):
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     with compat.forward_compatibility_horizon(2018, 8, 8):
       num_boxes = 1024
       boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4")
@@ -644,10 +640,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase):
         self.assertEqual(indices_tf.size, max_output_size)
 
   def testNMS3From6Boxes(self):
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     with compat.forward_compatibility_horizon(2018, 8, 8):
       # Three boxes are selected based on IOU.
       boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
@@ -693,10 +685,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase):
     # Three boxes are selected based on IOU.
     # One is filtered out by score threshold.
 
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     with compat.forward_compatibility_horizon(2018, 8, 8):
       boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
                     [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py
index 41fe42a26b..4932819585 100644
--- a/tensorflow/compiler/tests/random_ops_test.py
+++ b/tensorflow/compiler/tests/random_ops_test.py
@@ -145,9 +145,6 @@ class RandomOpsTest(xla_test.XLATestCase):
         self.assertAllClose(actual_variance, expected_variance, rtol=2*1e-3)
 
   def testShuffle1d(self):
-    # TODO(b/26783907): this test requires the CPU backend to implement sort.
-    if self.device in ["XLA_CPU"]:
-      return
     with self.cached_session() as sess:
       with self.test_scope():
         x = math_ops.range(1 << 16)
diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py
index 51c04b5c47..dbf4beb693 100644
--- a/tensorflow/compiler/tests/sort_ops_test.py
+++ b/tensorflow/compiler/tests/sort_ops_test.py
@@ -48,10 +48,6 @@ class XlaSortOpTest(xla_test.XLATestCase):
         self.assertAllClose(v, result, rtol=1e-3)
 
   def testSort(self):
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32])
     for dtype in supported_types.intersection(self.numeric_types):
       x = np.arange(101, dtype=dtype)
@@ -60,10 +56,6 @@ class XlaSortOpTest(xla_test.XLATestCase):
           xla.sort, [x], expected=[np.arange(101, dtype=dtype)])
 
   def testTopK(self):
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     supported_types = set(
         [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
     for dtype in supported_types.intersection(self.numeric_types):
@@ -89,10 +81,6 @@ class XlaSortOpTest(xla_test.XLATestCase):
               expected=[x[indices].astype(dtype), indices])
 
   def testTopK2D(self):
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     supported_types = set(
         [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
     for dtype in supported_types.intersection(self.numeric_types):
@@ -122,10 +110,6 @@ class XlaSortOpTest(xla_test.XLATestCase):
 
   def testTopKZeros(self):
     """Tests that positive and negative zeros sort correctly."""
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     # Only bfloat16 is implemented.
     bfloat16 = dtypes.bfloat16.as_numpy_dtype
     if bfloat16 not in self.numeric_types:
@@ -144,10 +128,6 @@ class XlaSortOpTest(xla_test.XLATestCase):
 
   def testTopKInfinities(self):
     """Tests that positive and negative infinity sort correctly."""
-    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
-    if self.device in ["XLA_CPU", "XLA_GPU"]:
-      return
-
     # Only bfloat16 is implemented.
     bfloat16 = dtypes.bfloat16.as_numpy_dtype
     if bfloat16 not in self.numeric_types:
-- 
GitLab


From 22ff5db5d4a86c155e56ec1bdf36de503109abbe Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Wed, 19 Sep 2018 06:05:43 -0700
Subject: [PATCH 0371/1357] [TF:XLA] Enable ClipByValue test for integer types

This has been fixed a while ago. Even though TF allows ClipByValue for complex
types it's not implemented anywhere (and it doesn't make sense for complex
numbers) so blacklist complex types.

PiperOrigin-RevId: 213615429
---
 tensorflow/compiler/tests/ternary_ops_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/compiler/tests/ternary_ops_test.py b/tensorflow/compiler/tests/ternary_ops_test.py
index 55a992195f..98a07709c6 100644
--- a/tensorflow/compiler/tests/ternary_ops_test.py
+++ b/tensorflow/compiler/tests/ternary_ops_test.py
@@ -122,8 +122,7 @@ class TernaryOpsTest(xla_test.XLATestCase):
           expected=np.array([[2], [5]], dtype=dtype))
 
   def testClipByValue(self):
-    # TODO(b/78258593): enable integer types here too.
-    for dtype in self.float_types:
+    for dtype in self.numeric_types - self.complex_types:
       test_cases = [
           (np.array([2, 4, 5], dtype=dtype), dtype(7)),  #
           (dtype(1), np.array([2, 4, 5], dtype=dtype)),  #
-- 
GitLab


From 7bc9f39687ef41372acb3e5e25b43eeaa66d2c6b Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Wed, 19 Sep 2018 06:37:43 -0700
Subject: [PATCH 0372/1357] Distributions should raise the original exception
 (log_prob not implemented) instead of the fallback exception (prob not
 implemented).

Additionally, in a nested structure of transformed distributions, it can be useful to know which distribution is raising this error.

PiperOrigin-RevId: 213618306
---
 .../python/ops/distributions/distribution.py  | 105 ++++++++++++------
 1 file changed, 73 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py
index 578e7b7dd2..d6435d3bee 100644
--- a/tensorflow/python/ops/distributions/distribution.py
+++ b/tensorflow/python/ops/distributions/distribution.py
@@ -601,7 +601,8 @@ class Distribution(_BaseDistribution):
     return type(self)(**parameters)
 
   def _batch_shape_tensor(self):
-    raise NotImplementedError("batch_shape_tensor is not implemented")
+    raise NotImplementedError(
+        "batch_shape_tensor is not implemented: {}".format(type(self).__name__))
 
   def batch_shape_tensor(self, name="batch_shape_tensor"):
     """Shape of a single sample from a single event index as a 1-D `Tensor`.
@@ -640,7 +641,8 @@ class Distribution(_BaseDistribution):
     return tensor_shape.as_shape(self._batch_shape())
 
   def _event_shape_tensor(self):
-    raise NotImplementedError("event_shape_tensor is not implemented")
+    raise NotImplementedError(
+        "event_shape_tensor is not implemented: {}".format(type(self).__name__))
 
   def event_shape_tensor(self, name="event_shape_tensor"):
     """Shape of a single sample from a single batch as a 1-D int32 `Tensor`.
@@ -701,7 +703,8 @@ class Distribution(_BaseDistribution):
           name="is_scalar_batch")
 
   def _sample_n(self, n, seed=None):
-    raise NotImplementedError("sample_n is not implemented")
+    raise NotImplementedError("sample_n is not implemented: {}".format(
+        type(self).__name__))
 
   def _call_sample_n(self, sample_shape, seed, name, **kwargs):
     with self._name_scope(name, values=[sample_shape]):
@@ -733,15 +736,19 @@ class Distribution(_BaseDistribution):
     return self._call_sample_n(sample_shape, seed, name)
 
   def _log_prob(self, value):
-    raise NotImplementedError("log_prob is not implemented")
+    raise NotImplementedError("log_prob is not implemented: {}".format(
+        type(self).__name__))
 
   def _call_log_prob(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
       value = ops.convert_to_tensor(value, name="value")
       try:
         return self._log_prob(value, **kwargs)
-      except NotImplementedError:
-        return math_ops.log(self._prob(value, **kwargs))
+      except NotImplementedError as original_exception:
+        try:
+          return math_ops.log(self._prob(value, **kwargs))
+        except NotImplementedError:
+          raise original_exception
 
   def log_prob(self, value, name="log_prob"):
     """Log probability density/mass function.
@@ -757,15 +764,19 @@ class Distribution(_BaseDistribution):
     return self._call_log_prob(value, name)
 
   def _prob(self, value):
-    raise NotImplementedError("prob is not implemented")
+    raise NotImplementedError("prob is not implemented: {}".format(
+        type(self).__name__))
 
   def _call_prob(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
       value = ops.convert_to_tensor(value, name="value")
       try:
         return self._prob(value, **kwargs)
-      except NotImplementedError:
-        return math_ops.exp(self._log_prob(value, **kwargs))
+      except NotImplementedError as original_exception:
+        try:
+          return math_ops.exp(self._log_prob(value, **kwargs))
+        except NotImplementedError:
+          raise original_exception
 
   def prob(self, value, name="prob"):
     """Probability density/mass function.
@@ -781,15 +792,19 @@ class Distribution(_BaseDistribution):
     return self._call_prob(value, name)
 
   def _log_cdf(self, value):
-    raise NotImplementedError("log_cdf is not implemented")
+    raise NotImplementedError("log_cdf is not implemented: {}".format(
+        type(self).__name__))
 
   def _call_log_cdf(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
       value = ops.convert_to_tensor(value, name="value")
       try:
         return self._log_cdf(value, **kwargs)
-      except NotImplementedError:
-        return math_ops.log(self._cdf(value, **kwargs))
+      except NotImplementedError as original_exception:
+        try:
+          return math_ops.log(self._cdf(value, **kwargs))
+        except NotImplementedError:
+          raise original_exception
 
   def log_cdf(self, value, name="log_cdf"):
     """Log cumulative distribution function.
@@ -815,15 +830,19 @@ class Distribution(_BaseDistribution):
     return self._call_log_cdf(value, name)
 
   def _cdf(self, value):
-    raise NotImplementedError("cdf is not implemented")
+    raise NotImplementedError("cdf is not implemented: {}".format(
+        type(self).__name__))
 
   def _call_cdf(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
       value = ops.convert_to_tensor(value, name="value")
       try:
         return self._cdf(value, **kwargs)
-      except NotImplementedError:
-        return math_ops.exp(self._log_cdf(value, **kwargs))
+      except NotImplementedError as original_exception:
+        try:
+          return math_ops.exp(self._log_cdf(value, **kwargs))
+        except NotImplementedError:
+          raise original_exception
 
   def cdf(self, value, name="cdf"):
     """Cumulative distribution function.
@@ -845,15 +864,20 @@ class Distribution(_BaseDistribution):
     return self._call_cdf(value, name)
 
   def _log_survival_function(self, value):
-    raise NotImplementedError("log_survival_function is not implemented")
+    raise NotImplementedError(
+        "log_survival_function is not implemented: {}".format(
+            type(self).__name__))
 
   def _call_log_survival_function(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
       value = ops.convert_to_tensor(value, name="value")
       try:
         return self._log_survival_function(value, **kwargs)
-      except NotImplementedError:
-        return math_ops.log1p(-self.cdf(value, **kwargs))
+      except NotImplementedError as original_exception:
+        try:
+          return math_ops.log1p(-self.cdf(value, **kwargs))
+        except NotImplementedError:
+          raise original_exception
 
   def log_survival_function(self, value, name="log_survival_function"):
     """Log survival function.
@@ -880,15 +904,19 @@ class Distribution(_BaseDistribution):
     return self._call_log_survival_function(value, name)
 
   def _survival_function(self, value):
-    raise NotImplementedError("survival_function is not implemented")
+    raise NotImplementedError("survival_function is not implemented: {}".format(
+        type(self).__name__))
 
   def _call_survival_function(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
       value = ops.convert_to_tensor(value, name="value")
       try:
         return self._survival_function(value, **kwargs)
-      except NotImplementedError:
-        return 1. - self.cdf(value, **kwargs)
+      except NotImplementedError as original_exception:
+        try:
+          return 1. - self.cdf(value, **kwargs)
+        except NotImplementedError:
+          raise original_exception
 
   def survival_function(self, value, name="survival_function"):
     """Survival function.
@@ -912,7 +940,8 @@ class Distribution(_BaseDistribution):
     return self._call_survival_function(value, name)
 
   def _entropy(self):
-    raise NotImplementedError("entropy is not implemented")
+    raise NotImplementedError("entropy is not implemented: {}".format(
+        type(self).__name__))
 
   def entropy(self, name="entropy"):
     """Shannon entropy in nats."""
@@ -920,7 +949,8 @@ class Distribution(_BaseDistribution):
       return self._entropy()
 
   def _mean(self):
-    raise NotImplementedError("mean is not implemented")
+    raise NotImplementedError("mean is not implemented: {}".format(
+        type(self).__name__))
 
   def mean(self, name="mean"):
     """Mean."""
@@ -928,7 +958,8 @@ class Distribution(_BaseDistribution):
       return self._mean()
 
   def _quantile(self, value):
-    raise NotImplementedError("quantile is not implemented")
+    raise NotImplementedError("quantile is not implemented: {}".format(
+        type(self).__name__))
 
   def _call_quantile(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
@@ -955,7 +986,8 @@ class Distribution(_BaseDistribution):
     return self._call_quantile(value, name)
 
   def _variance(self):
-    raise NotImplementedError("variance is not implemented")
+    raise NotImplementedError("variance is not implemented: {}".format(
+        type(self).__name__))
 
   def variance(self, name="variance"):
     """Variance.
@@ -979,11 +1011,15 @@ class Distribution(_BaseDistribution):
     with self._name_scope(name):
       try:
         return self._variance()
-      except NotImplementedError:
-        return math_ops.square(self._stddev())
+      except NotImplementedError as original_exception:
+        try:
+          return math_ops.square(self._stddev())
+        except NotImplementedError:
+          raise original_exception
 
   def _stddev(self):
-    raise NotImplementedError("stddev is not implemented")
+    raise NotImplementedError("stddev is not implemented: {}".format(
+        type(self).__name__))
 
   def stddev(self, name="stddev"):
     """Standard deviation.
@@ -1008,11 +1044,15 @@ class Distribution(_BaseDistribution):
     with self._name_scope(name):
       try:
         return self._stddev()
-      except NotImplementedError:
-        return math_ops.sqrt(self._variance())
+      except NotImplementedError as original_exception:
+        try:
+          return math_ops.sqrt(self._variance())
+        except NotImplementedError:
+          raise original_exception
 
   def _covariance(self):
-    raise NotImplementedError("covariance is not implemented")
+    raise NotImplementedError("covariance is not implemented: {}".format(
+        type(self).__name__))
 
   def covariance(self, name="covariance"):
     """Covariance.
@@ -1054,7 +1094,8 @@ class Distribution(_BaseDistribution):
       return self._covariance()
 
   def _mode(self):
-    raise NotImplementedError("mode is not implemented")
+    raise NotImplementedError("mode is not implemented: {}".format(
+        type(self).__name__))
 
   def mode(self, name="mode"):
     """Mode."""
-- 
GitLab


From e1db78697b05be673562fe2b1c9a995d25a71d4c Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Wed, 19 Sep 2018 06:38:07 -0700
Subject: [PATCH 0373/1357] Enable while_test for the GPU backend.

PiperOrigin-RevId: 213618350
---
 tensorflow/compiler/xla/tests/while_test.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc
index 7abd8651d5..8b1b9e1519 100644
--- a/tensorflow/compiler/xla/tests/while_test.cc
+++ b/tensorflow/compiler/xla/tests/while_test.cc
@@ -763,9 +763,7 @@ TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) {
   ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
 }
 
-// Test while nodes that share the while body computation.
-// TODO(b/37245345): Fails on GPU backend.
-TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) {
+TEST_F(WhileTest, WhileLoopsWithSharedBodyAndInit) {
   std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
                                        ShapeUtil::MakeShape(F32, {10})};
   Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
-- 
GitLab


From f8655c08cfe3bd99ec1703211e1c9154a14a6150 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Wed, 19 Sep 2018 08:12:29 -0700
Subject: [PATCH 0374/1357] Add interface for HLO passes which run on
 HloModuleGroup. Derive HloModulePass and HloModuleGroupPass from
 HloPassInterface which run module-scoped and module-group-scoped
 respectively. Replace all existing uses of HloPassInterface with
 HloModulePass because all existing passes are module-scoped. Also rewrite
 HloPassPipeline to support both module-scoped and module-group-scoped passes.

PiperOrigin-RevId: 213629604
---
 tensorflow/compiler/xla/service/BUILD         |  21 ++
 .../xla/service/algebraic_simplifier.h        |   2 +-
 .../xla/service/batch_dot_simplification.h    |   2 +-
 .../compiler/xla/service/batchnorm_expander.h |   2 +-
 .../xla/service/bfloat16_conversion_folding.h |   2 +-
 .../xla/service/bfloat16_normalization.h      |   4 +-
 .../xla/service/bfloat16_propagation.h        |   2 +-
 .../compiler/xla/service/call_inliner.h       |   2 +-
 .../xla/service/conditional_simplifier.h      |   2 +-
 .../convolution_feature_group_converter.h     |   2 +-
 .../compiler/xla/service/copy_insertion.h     |   2 +-
 .../xla/service/cpu/conv_canonicalization.h   |   2 +-
 .../xla/service/cpu/cpu_copy_insertion.h      |   2 +-
 .../xla/service/cpu/cpu_hlo_support_checker.h |   2 +-
 .../service/cpu/parallel_task_assignment.h    |   2 +-
 tensorflow/compiler/xla/service/defuser.h     |   2 +-
 .../compiler/xla/service/despecializer.cc     |   2 +-
 .../compiler/xla/service/despecializer.h      |   2 +-
 .../compiler/xla/service/dot_decomposer.h     |   2 +-
 .../compiler/xla/service/flatten_call_graph.h |   2 +-
 .../compiler/xla/service/gather_expander.h    |   2 +-
 .../service/gpu/cudnn_batchnorm_rewriter.h    |   2 +-
 .../gpu/cudnn_convolution_algorithm_picker.h  |   2 +-
 .../service/gpu/cudnn_convolution_rewriter.h  |   2 +-
 .../compiler/xla/service/gpu/fusion_merger.h  |   2 +-
 .../xla/service/gpu/gpu_copy_insertion.cc     |   9 -
 .../xla/service/gpu/gpu_copy_insertion.h      |  11 +-
 .../xla/service/gpu/gpu_hlo_support_checker.h |   2 +-
 .../xla/service/gpu/pad_for_tensor_cores.h    |   2 +-
 .../compiler/xla/service/gpu/pad_insertion.h  |   2 +-
 .../xla/service/hlo_constant_folding.h        |   2 +-
 tensorflow/compiler/xla/service/hlo_cse.h     |   2 +-
 tensorflow/compiler/xla/service/hlo_dce.h     |   2 +-
 .../xla/service/hlo_domain_isolator.h         |   2 +-
 .../compiler/xla/service/hlo_domain_remover.h |   2 +-
 .../xla/service/hlo_domain_verifier.h         |   2 +-
 .../xla/service/hlo_element_type_converter.h  |   2 +-
 .../xla/service/hlo_memory_scheduler.h        |   4 +-
 .../compiler/xla/service/hlo_module_dce.h     |   2 +-
 .../compiler/xla/service/hlo_pass_interface.h |  35 ++-
 .../compiler/xla/service/hlo_pass_pipeline.cc | 191 +++++++------
 .../compiler/xla/service/hlo_pass_pipeline.h  |  38 ++-
 .../xla/service/hlo_pass_pipeline_test.cc     | 259 ++++++++++++++++++
 .../xla/service/hlo_rematerialization.cc      |   6 +
 .../xla/service/hlo_rematerialization.h       |   2 +-
 .../service/hlo_subcomputation_unification.h  |   2 +-
 .../compiler/xla/service/hlo_verifier.h       |   2 +-
 .../xla/service/implicit_broadcast_remover.h  |   2 +-
 .../xla/service/indexed_array_analysis.h      |   2 +-
 tensorflow/compiler/xla/service/inliner.h     |   2 +-
 .../compiler/xla/service/instruction_fusion.h |   2 +-
 .../compiler/xla/service/layout_assignment.h  |   2 +-
 .../xla/service/multi_output_fusion.h         |   2 +-
 .../xla/service/reduce_precision_insertion.h  |   2 +-
 .../compiler/xla/service/reshape_mover.h      |   2 +-
 .../compiler/xla/service/scatter_expander.h   |   2 +-
 .../compiler/xla/service/transpose_folding.h  |   2 +-
 .../compiler/xla/service/tuple_simplifier.h   |   2 +-
 .../xla/service/while_loop_constant_sinking.h |   2 +-
 .../while_loop_invariant_code_motion.h        |   2 +-
 .../xla/service/while_loop_simplifier.h       |   2 +-
 .../xla/service/zero_sized_hlo_elimination.h  |   2 +-
 62 files changed, 518 insertions(+), 164 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 68bf56c1b1..4c3208a242 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -2560,6 +2560,7 @@ cc_library(
     ],
     deps = [
         ":hlo",
+        ":hlo_module_group",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
@@ -2591,6 +2592,26 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "hlo_pass_pipeline_test",
+    srcs = ["hlo_pass_pipeline_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_parser",
+        ":hlo_pass_pipeline",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:test_helpers",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "hlo_cse",
     srcs = ["hlo_cse.cc"],
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h
index b864c372fa..9f8d0ee88b 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.h
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h
@@ -24,7 +24,7 @@ limitations under the License.
 namespace xla {
 
 // A pass which performs algebraic simplifications.
-class AlgebraicSimplifier : public HloPassInterface {
+class AlgebraicSimplifier : public HloModulePass {
  public:
   // Given shapes 'from_shape' and 'to_shape', determines if it is valid to
   // bitcast from 'from_shape' to 'to_shape' after considering platform
diff --git a/tensorflow/compiler/xla/service/batch_dot_simplification.h b/tensorflow/compiler/xla/service/batch_dot_simplification.h
index 79d37f08d3..5b625bf3b9 100644
--- a/tensorflow/compiler/xla/service/batch_dot_simplification.h
+++ b/tensorflow/compiler/xla/service/batch_dot_simplification.h
@@ -25,7 +25,7 @@ namespace xla {
 // Normally these would live in the algebraic simplifier, but we want to run
 // this to fixpoint (this pass reaches fixed point in one execution) before we
 // run the DotDecomposer.
-class BatchDotSimplification : public HloPassInterface {
+class BatchDotSimplification : public HloModulePass {
  public:
   StatusOr<bool> Run(HloModule* module) override;
   absl::string_view name() const override;
diff --git a/tensorflow/compiler/xla/service/batchnorm_expander.h b/tensorflow/compiler/xla/service/batchnorm_expander.h
index 76e32174f3..147f3ae7b6 100644
--- a/tensorflow/compiler/xla/service/batchnorm_expander.h
+++ b/tensorflow/compiler/xla/service/batchnorm_expander.h
@@ -26,7 +26,7 @@ namespace xla {
 // A pass which rewrites batch norm operations into more operations. Breaking a
 // big operation into smaller operations helps leverage our generic fusion
 // logic.
-class BatchNormExpander : public HloPassInterface {
+class BatchNormExpander : public HloModulePass {
  public:
   // When use_fusion is set, a multi-output fusion node is created.
   BatchNormExpander(bool rewrite_training_op = false,
diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.h b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.h
index 5dcd31b83d..cb3d12f0bf 100644
--- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.h
+++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.h
@@ -31,7 +31,7 @@ namespace xla {
 // optimization pipeline followed by a DCE pass. If other passes are needed
 // after this pass, run BFloat16MixedPrecisionRemoval first to undo some of the
 // changed made by this pass.
-class BFloat16ConversionFolding : public HloPassInterface {
+class BFloat16ConversionFolding : public HloModulePass {
  public:
   explicit BFloat16ConversionFolding(const BFloat16Support* bfloat16_support)
       : bfloat16_support_(bfloat16_support) {}
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.h b/tensorflow/compiler/xla/service/bfloat16_normalization.h
index 30b6346312..f48e925823 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization.h
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization.h
@@ -25,7 +25,7 @@ namespace xla {
 // A pass which adds F32 <-> BF16 conversions for HLO instructions that do not
 // support BF16 input/output or mixed precision, according to the passed-in
 // backend-specific BF16 support rules.
-class BFloat16Normalization : public HloPassInterface {
+class BFloat16Normalization : public HloModulePass {
  public:
   explicit BFloat16Normalization(const BFloat16Support* bfloat16_support)
       : bfloat16_support_(bfloat16_support) {}
@@ -48,7 +48,7 @@ class BFloat16Normalization : public HloPassInterface {
 // use mixed precision; it removes mixed precision even if the backend supports
 // it. This pass is used to make the HLO module valid for other HLO passes which
 // do not support mixed precision.
-class BFloat16MixedPrecisionRemoval : public HloPassInterface {
+class BFloat16MixedPrecisionRemoval : public HloModulePass {
  public:
   BFloat16MixedPrecisionRemoval() {}
 
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h
index 1ee64971ab..6a62439f88 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.h
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h
@@ -58,7 +58,7 @@ namespace xla {
 // BFloat16ConversionFolding. If other passes are needed after this pass, run
 // BFloat16MixedPrecisionRemoval first to undo some of the changes made by this
 // pass.
-class BFloat16Propagation : public HloPassInterface {
+class BFloat16Propagation : public HloModulePass {
  public:
   explicit BFloat16Propagation(const BFloat16Support* bfloat16_support);
 
diff --git a/tensorflow/compiler/xla/service/call_inliner.h b/tensorflow/compiler/xla/service/call_inliner.h
index c5cd88b9ea..08c4aff4f7 100644
--- a/tensorflow/compiler/xla/service/call_inliner.h
+++ b/tensorflow/compiler/xla/service/call_inliner.h
@@ -25,7 +25,7 @@ namespace xla {
 
 // For every kCall operation in the main computation, we inline the body of the
 // called function, and proceed recursively.
-class CallInliner : public HloPassInterface {
+class CallInliner : public HloModulePass {
  public:
   using InlinedInstructionMap =
       std::unordered_map<HloInstruction*, HloInstruction*>;
diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.h b/tensorflow/compiler/xla/service/conditional_simplifier.h
index 3de50cbd7f..2223ad6753 100644
--- a/tensorflow/compiler/xla/service/conditional_simplifier.h
+++ b/tensorflow/compiler/xla/service/conditional_simplifier.h
@@ -25,7 +25,7 @@ namespace xla {
 
 // HLO pass that removes kConditional with a constant predicate, replacing them
 // with their true or false computation as appropriate.
-class ConditionalSimplifier : public HloPassInterface {
+class ConditionalSimplifier : public HloModulePass {
  public:
   absl::string_view name() const override { return "simplify-conditional"; }
   StatusOr<bool> Run(HloModule* module) override;
diff --git a/tensorflow/compiler/xla/service/convolution_feature_group_converter.h b/tensorflow/compiler/xla/service/convolution_feature_group_converter.h
index 498894737f..ce0138e56f 100644
--- a/tensorflow/compiler/xla/service/convolution_feature_group_converter.h
+++ b/tensorflow/compiler/xla/service/convolution_feature_group_converter.h
@@ -25,7 +25,7 @@ namespace xla {
 
 // A pass which rewrites convolutions with feature_group_count > 1 into
 // convolutions with feature_group_count = 1.
-class ConvolutionFeatureGroupConverter : public HloPassInterface {
+class ConvolutionFeatureGroupConverter : public HloModulePass {
  public:
   ConvolutionFeatureGroupConverter() {}
 
diff --git a/tensorflow/compiler/xla/service/copy_insertion.h b/tensorflow/compiler/xla/service/copy_insertion.h
index d308f6bc84..c097089e30 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.h
+++ b/tensorflow/compiler/xla/service/copy_insertion.h
@@ -43,7 +43,7 @@ namespace xla {
 //   (3) The buffer set of the root instruction of the entry computation must be
 //       unambiguous and distinct. That is, InstructionAliasSet::IsAmbiguous and
 //       InstructionAliasSet::IsDistinct return true.
-class CopyInsertion : public HloPassInterface {
+class CopyInsertion : public HloModulePass {
  public:
   absl::string_view name() const override { return "copy-insertion"; }
 
diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.h b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.h
index 59437e88af..becee3f81f 100644
--- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.h
+++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.h
@@ -31,7 +31,7 @@ namespace cpu {
 // called canonical convolutions). This pass expands non-canonical convolutions
 // into reshapes and canonical convolutions, so that these non-canonical
 // convolutions can run faster.
-class ConvCanonicalization : public HloPassInterface {
+class ConvCanonicalization : public HloModulePass {
  public:
   explicit ConvCanonicalization(
       const TargetMachineFeatures* target_machine_features)
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h
index d49f7d7cc2..076235f887 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h
@@ -30,7 +30,7 @@ namespace xla {
 //
 // TODO(b/62548313): Remove this when buffer assignment is smarter
 // (module-scoped).
-class CpuCopyInsertion : public HloPassInterface {
+class CpuCopyInsertion : public HloModulePass {
  public:
   absl::string_view name() const override { return "copy-insertion"; }
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
index 6af724b2a5..a39a9d4724 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
@@ -23,7 +23,7 @@ namespace xla {
 // This pass should run early in the HLO pipeline and checks for HLO constructs
 // which are not supported by the CPU backend and cannot be removed via HLO
 // transformations (eg, sparse layouts).
-class CpuHloSupportChecker : public HloPassInterface {
+class CpuHloSupportChecker : public HloModulePass {
  public:
   CpuHloSupportChecker() = default;
   ~CpuHloSupportChecker() override = default;
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h
index a99cd99c14..3822d5300e 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h
+++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h
@@ -60,7 +60,7 @@ class ParallelTaskAssignment {
 // own embedded computation, which is compiled as a parallel compute function,
 // and which is invoked from a kCall instruction that is lowered in codegen to
 // a runtime parallel fork/join call.
-class ParallelTaskAssigner : public HloPassInterface {
+class ParallelTaskAssigner : public HloModulePass {
  public:
   // 'max_parallelism': the maximum parallel task count per instruction.
   // 'shape_size': shape size function used by HloCostAnalysis during parallel
diff --git a/tensorflow/compiler/xla/service/defuser.h b/tensorflow/compiler/xla/service/defuser.h
index c326beb899..aaa41fc4fe 100644
--- a/tensorflow/compiler/xla/service/defuser.h
+++ b/tensorflow/compiler/xla/service/defuser.h
@@ -25,7 +25,7 @@ namespace xla {
 
 // A pass which replaces all fusion instructions with the equivalent un-fused
 // instructions.
-class Defuser : public HloPassInterface {
+class Defuser : public HloModulePass {
  public:
   Defuser() {}
   ~Defuser() override {}
diff --git a/tensorflow/compiler/xla/service/despecializer.cc b/tensorflow/compiler/xla/service/despecializer.cc
index ba2a674d9a..b3549acfc2 100644
--- a/tensorflow/compiler/xla/service/despecializer.cc
+++ b/tensorflow/compiler/xla/service/despecializer.cc
@@ -24,7 +24,7 @@ namespace xla {
 namespace {
 
 // Pass which strips control dependencies from all instructions in the module.
-class ControlDepRemover : public HloPassInterface {
+class ControlDepRemover : public HloModulePass {
  public:
   ControlDepRemover() = default;
   absl::string_view name() const override { return "control-dep-remover"; }
diff --git a/tensorflow/compiler/xla/service/despecializer.h b/tensorflow/compiler/xla/service/despecializer.h
index 7be70add2f..46dcc3a438 100644
--- a/tensorflow/compiler/xla/service/despecializer.h
+++ b/tensorflow/compiler/xla/service/despecializer.h
@@ -30,7 +30,7 @@ namespace xla {
 //
 // Current despecialization passes are Defuser, ImplicitBroadcastRemover,
 // and BFloat16MixedPrecisionRemoval.
-class Despecializer : public HloPassInterface {
+class Despecializer : public HloModulePass {
  public:
   Despecializer();
   absl::string_view name() const override { return "despecializer"; }
diff --git a/tensorflow/compiler/xla/service/dot_decomposer.h b/tensorflow/compiler/xla/service/dot_decomposer.h
index fc38e31700..40e7a3b4c2 100644
--- a/tensorflow/compiler/xla/service/dot_decomposer.h
+++ b/tensorflow/compiler/xla/service/dot_decomposer.h
@@ -23,7 +23,7 @@ namespace xla {
 
 // DotDecomposer is a pass which decomposes batch Dot operations into a
 // sequence of smaller (R2) Dot operations.
-class DotDecomposer : public HloPassInterface {
+class DotDecomposer : public HloModulePass {
  public:
   // Decomposes batch Dot operations when 'decompose_batch_dot' is true.
   DotDecomposer(bool decompose_batch_dot = true)
diff --git a/tensorflow/compiler/xla/service/flatten_call_graph.h b/tensorflow/compiler/xla/service/flatten_call_graph.h
index 3cccec9862..986970f886 100644
--- a/tensorflow/compiler/xla/service/flatten_call_graph.h
+++ b/tensorflow/compiler/xla/service/flatten_call_graph.h
@@ -26,7 +26,7 @@ namespace xla {
 // Flattening associates each call site with a unique computation (for
 // sequential calling contexts) This simplifies buffer assignment and
 // points-to analysis (see b/36865746 for details).
-class FlattenCallGraph : public HloPassInterface {
+class FlattenCallGraph : public HloModulePass {
  public:
   absl::string_view name() const override { return "flatten-call-graph"; }
 
diff --git a/tensorflow/compiler/xla/service/gather_expander.h b/tensorflow/compiler/xla/service/gather_expander.h
index 7bd9ea5984..2b39359aae 100644
--- a/tensorflow/compiler/xla/service/gather_expander.h
+++ b/tensorflow/compiler/xla/service/gather_expander.h
@@ -23,7 +23,7 @@ namespace xla {
 // This pass rewrites gather operations into (roughly) while loops of dynamic
 // slices.  This lets backends that don't support gather directly to
 // nevertheless have a minimum level of support.
-class GatherExpander : public HloPassInterface {
+class GatherExpander : public HloModulePass {
  public:
   absl::string_view name() const override { return "gather_expander"; }
   StatusOr<bool> Run(HloModule* module) override;
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h
index 6e2e330edd..c3f58508dd 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h
@@ -52,7 +52,7 @@ namespace gpu {
 // The GPU backend does not implement a lowering for the batchnorm HLOs -- it
 // expects them to be lowered to cudnn calls via this pass or to HLO soup via
 // BatchNormRewriter.
-class CudnnBatchNormRewriter : public HloPassInterface {
+class CudnnBatchNormRewriter : public HloModulePass {
  public:
   absl::string_view name() const override { return "cudnn_batchnorm_rewriter"; }
   StatusOr<bool> Run(HloModule* module) override;
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
index f79b113f8f..ce0189543c 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
@@ -30,7 +30,7 @@ namespace gpu {
 
 // Modifies CustomCalls to cudnn convolutions, choosing the best algorithm for
 // each and adding explicit scratch space to the CustomCalls.
-class CudnnConvolutionAlgorithmPicker : public HloPassInterface {
+class CudnnConvolutionAlgorithmPicker : public HloModulePass {
  public:
   // If the `allocator` parameter is not null, we will use it to allocate temp
   // memory while timing the various convolution algorithms.  If it's null,
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h
index fbe7e98494..8d7c6fdab5 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h
@@ -24,7 +24,7 @@ namespace gpu {
 
 // Rewrites plain convolutions, backwards-filter convolutions, and
 // backwards-input convolutions into CustomCall HLOs that call into cuDNN.
-class CudnnConvolutionRewriter : public HloPassInterface {
+class CudnnConvolutionRewriter : public HloModulePass {
  public:
   absl::string_view name() const override {
     return "cudnn-convolution-rewriter";
diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.h b/tensorflow/compiler/xla/service/gpu/fusion_merger.h
index 7e3f5775b8..f19996edfe 100644
--- a/tensorflow/compiler/xla/service/gpu/fusion_merger.h
+++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.h
@@ -32,7 +32,7 @@ namespace gpu {
 // 2) The result of merging the fusion instruction into its users would not
 //    increase bytes transferred.
 //
-class FusionMerger : public HloPassInterface {
+class FusionMerger : public HloModulePass {
  public:
   absl::string_view name() const override { return "fusion merger"; }
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
index 75f414e47f..79c74e7e8b 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
@@ -34,15 +34,6 @@ namespace xla {
 
 namespace gpu {
 
-StatusOr<HloInstruction*> GpuCopyInsertion::FindOrInsertCopy(
-    HloInstruction* hlo) {
-  HloInstruction*& copy = hlo_to_copy_map_[hlo];
-  if (copy == nullptr) {
-    TF_ASSIGN_OR_RETURN(copy, hlo->parent()->DeepCopyInstruction(hlo));
-  }
-  return copy;
-}
-
 StatusOr<bool> GpuCopyInsertion::Run(HloModule* module) {
   CopyInsertion generic_copy_insertion;
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h
index 8ffae18fe8..4c7e38ffeb 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h
@@ -25,20 +25,11 @@ namespace gpu {
 // Besides the modifications made by the generic xla::CopyInsertion, this
 // GPU-specific copy insertion also materializes operands of library calls by
 // inserting kCopy instructions.
-class GpuCopyInsertion : public HloPassInterface {
+class GpuCopyInsertion : public HloModulePass {
  public:
   absl::string_view name() const override { return "copy-insertion"; }
 
   StatusOr<bool> Run(HloModule* module) override;
-
- protected:
-  // Returns a copy of `hlo`. Looks in hlo_to_copy_map_ first to avoid making
-  // duplicate copies.
-  StatusOr<HloInstruction*> FindOrInsertCopy(HloInstruction* hlo);
-
-  // A map containing all copies inserted to materialize operands of library
-  // calls. The key is the copied instruction and the value is the copy.
-  tensorflow::gtl::FlatMap<HloInstruction*, HloInstruction*> hlo_to_copy_map_;
 };
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
index bbb3340760..9c64b4d10c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
@@ -23,7 +23,7 @@ namespace xla {
 // his pass should run early in the HLO pipeline and checks for HLO constructs
 // which are not supported by the GPU backend and cannot be removed via HLO
 // transformations (eg, sparse layouts).
-class GpuHloSupportChecker : public HloPassInterface {
+class GpuHloSupportChecker : public HloModulePass {
  public:
   GpuHloSupportChecker() = default;
   ~GpuHloSupportChecker() override = default;
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h
index 11dc56a64f..e592a3774e 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h
+++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h
@@ -30,7 +30,7 @@ namespace gpu {
 // targeting before running this pass.
 //
 // TODO(jlebar): Also pad dots.
-class PadForTensorCores : public HloPassInterface {
+class PadForTensorCores : public HloModulePass {
  public:
   absl::string_view name() const override { return "pad for tensor cores"; }
 
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.h b/tensorflow/compiler/xla/service/gpu/pad_insertion.h
index a622e894ed..25cdf64c4c 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.h
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.h
@@ -24,7 +24,7 @@ namespace gpu {
 // An HLO pass that canonicalizes convolution instructions for GPU codegen. It
 // inserts Pad instructions before Convolution instructions with uncanonicalized
 // padding, so that they can be lowered to cuDNN convolution.
-class PadInsertion : public HloPassInterface {
+class PadInsertion : public HloModulePass {
  public:
   absl::string_view name() const override { return "pad insertion"; }
 
diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.h b/tensorflow/compiler/xla/service/hlo_constant_folding.h
index 4557983a9c..4a624cc7b8 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding.h
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding.h
@@ -23,7 +23,7 @@ namespace xla {
 
 // A pass which performs constant folding in order to avoid unnecessary
 // computation on constants.
-class HloConstantFolding : public HloPassInterface {
+class HloConstantFolding : public HloModulePass {
  public:
   absl::string_view name() const override { return "constant_folding"; }
 
diff --git a/tensorflow/compiler/xla/service/hlo_cse.h b/tensorflow/compiler/xla/service/hlo_cse.h
index a28c03599a..e4857fd3fd 100644
--- a/tensorflow/compiler/xla/service/hlo_cse.h
+++ b/tensorflow/compiler/xla/service/hlo_cse.h
@@ -25,7 +25,7 @@ namespace xla {
 // and identical instructions with the same operands are commoned. The pass
 // iterates over the instructions in topological order which enables the pass to
 // find arbitrarily large common expressions.
-class HloCSE : public HloPassInterface {
+class HloCSE : public HloModulePass {
  public:
   // If is_layout_sensitive is true, then the simplifier preserves layout during
   // transformation. Otherwise, layout is ignored.
diff --git a/tensorflow/compiler/xla/service/hlo_dce.h b/tensorflow/compiler/xla/service/hlo_dce.h
index 1fe69b1395..4012042672 100644
--- a/tensorflow/compiler/xla/service/hlo_dce.h
+++ b/tensorflow/compiler/xla/service/hlo_dce.h
@@ -33,7 +33,7 @@ namespace xla {
 //
 // This pass does not remove dead parameter instructions, as parameter
 // instructions cannot be deleted.
-class HloDCE : public HloPassInterface {
+class HloDCE : public HloModulePass {
  public:
   ~HloDCE() override {}
   absl::string_view name() const override { return "dce"; }
diff --git a/tensorflow/compiler/xla/service/hlo_domain_isolator.h b/tensorflow/compiler/xla/service/hlo_domain_isolator.h
index d36631fc2f..c0bf1b9e16 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_isolator.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_isolator.h
@@ -30,7 +30,7 @@ namespace xla {
 // used to break an HLO graph edge connecting two instructions with different
 // sharding. If a set of connected instructions have all the same sharding, no
 // kDomain instruction will be placed.
-class HloDomainIsolator : public HloPassInterface {
+class HloDomainIsolator : public HloModulePass {
  public:
   // Creates a new kDomain instruction for the edge between the use instruction
   // (the first HloInstruction argument), and the operand instruction (the
diff --git a/tensorflow/compiler/xla/service/hlo_domain_remover.h b/tensorflow/compiler/xla/service/hlo_domain_remover.h
index 97bc8ef604..0fc30fb86c 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_remover.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_remover.h
@@ -26,7 +26,7 @@ namespace xla {
 // Removes all the kDomain instructions of a given kind from the input module,
 // and calls the normalizer to propagate the properties on the possibly new born
 // instructions.
-class HloDomainRemover : public HloPassInterface {
+class HloDomainRemover : public HloModulePass {
  public:
   // Creates a new HloDomainRemover object tasked at removing all the kDomain
   // instructions of a given kind.
diff --git a/tensorflow/compiler/xla/service/hlo_domain_verifier.h b/tensorflow/compiler/xla/service/hlo_domain_verifier.h
index 81d6d69a8c..bea5cba38d 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_verifier.h
@@ -29,7 +29,7 @@ namespace xla {
 
 // Verifies that the domain instructions are consistent, and the each domain is
 // surrounded by the same metadata.
-class HloDomainVerifier : public HloPassInterface {
+class HloDomainVerifier : public HloModulePass {
  public:
   HloDomainVerifier(std::vector<string> kinds) : kinds_(std::move(kinds)) {}
 
diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.h b/tensorflow/compiler/xla/service/hlo_element_type_converter.h
index 44ded2c2fa..4d2a942925 100644
--- a/tensorflow/compiler/xla/service/hlo_element_type_converter.h
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.h
@@ -25,7 +25,7 @@ namespace xla {
 // inserting Convert ops. This allows a backend to support an element type while
 // only actually implementing the Convert op for that element type. This is
 // generally not the fastest approach, but it works.
-class HloElementTypeConverter : public HloPassInterface {
+class HloElementTypeConverter : public HloModulePass {
  public:
   // eliminate_type is the type to eliminate as the input or output of ops,
   // using Convert ops to replace it with replace_with_type.
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
index 5e02868eba..9964c6fdd7 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
@@ -90,7 +90,7 @@ StatusOr<HloInstructionSequence> ScheduleComputation(
 // A pass which schedules the HLO instructions in a module. The HloModule's
 // schedule field is set to the resulting HloSchedule using
 // HloModule::set_schedule.
-class HloMemoryScheduler : public HloPassInterface {
+class HloMemoryScheduler : public HloModulePass {
  public:
   // size_function is the function returning the number of bytes required for a
   // LogicalBuffer. algorithm is the memory scheduling algorithm to use. If not
@@ -109,7 +109,7 @@ class HloMemoryScheduler : public HloPassInterface {
 
 // A trivial pass which clears the schedule currently set on the
 // HloModule. After this pass runs HloModudle::has_schedule will return false.
-class HloDescheduler : public HloPassInterface {
+class HloDescheduler : public HloModulePass {
  public:
   HloDescheduler() = default;
   ~HloDescheduler() override = default;
diff --git a/tensorflow/compiler/xla/service/hlo_module_dce.h b/tensorflow/compiler/xla/service/hlo_module_dce.h
index 12ca2340a6..d472211d2a 100644
--- a/tensorflow/compiler/xla/service/hlo_module_dce.h
+++ b/tensorflow/compiler/xla/service/hlo_module_dce.h
@@ -28,7 +28,7 @@ namespace xla {
 // Sweeps through live instructions which cross computation boundaries (kWhile),
 // and removes code at dead shape indices.
 //
-class HloModuleDCE : public HloPassInterface {
+class HloModuleDCE : public HloModulePass {
  public:
   ~HloModuleDCE() override {}
   absl::string_view name() const override { return "hlo-module-dce"; }
diff --git a/tensorflow/compiler/xla/service/hlo_pass_interface.h b/tensorflow/compiler/xla/service/hlo_pass_interface.h
index f1ad0f9b01..fdaac34386 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_interface.h
+++ b/tensorflow/compiler/xla/service/hlo_pass_interface.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PASS_INTERFACE_H_
 
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -25,15 +26,45 @@ limitations under the License.
 namespace xla {
 
 // Base class for HLO passes. These are used with the HloPassPipeline to
-// organize a sequence of passes.
+// organize a sequence of passes. An HLO pass should not extend this class
+// directly; it should extend HloModulePass or HloModuleGroupPass.
 class HloPassInterface {
  public:
   virtual ~HloPassInterface() = default;
   virtual absl::string_view name() const = 0;
 
-  // Run the pass on the given HLO module.  Return whether it modified the
+  // Run the pass on the given HLO module.  Returns whether it modified the
   // module.
   virtual StatusOr<bool> Run(HloModule* module) = 0;
+
+  // Run the pass on the given HLO module group. Returns whether it modified the
+  // module group. Ideally, the module group variant would be named "Run" as
+  // well, but C++ does not handle overloaded virtual methods well.
+  virtual StatusOr<bool> RunOnModuleGroup(HloModuleGroup* module_group) = 0;
+};
+
+// Base class for passes which are module-scoped.
+class HloModulePass : public HloPassInterface {
+ public:
+  // Runs the pass on a module group by iterating through each module in the
+  // group.
+  StatusOr<bool> RunOnModuleGroup(HloModuleGroup* module_group) override {
+    bool changed = false;
+    for (HloModule* module : module_group->modules()) {
+      TF_ASSIGN_OR_RETURN(bool module_changed, Run(module));
+      changed |= module_changed;
+    }
+    return changed;
+  };
+};
+
+// Base class for passes which are module-group scoped. These passes cannot run
+// on an HLO module.
+class HloModuleGroupPass : public HloPassInterface {
+ public:
+  StatusOr<bool> Run(HloModule* module) override {
+    return InternalError("Module group pass cannot be run on a module");
+  }
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 6e4ed0de62..8c2f928ca1 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <functional>
 
-#include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
@@ -29,108 +28,128 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
-namespace {
 
-using absl::StrAppend;
-using absl::StrCat;
-
-void DumpModuleGraph(const HloModule& module, const string& message) {
-  hlo_graph_dumper::MaybeDumpHloModule(module, message);
-  VLOG(3) << "HLO " << message << ":";
-  XLA_VLOG_LINES(3, module.ToString());
+template <typename HloT>
+Status HloPassPipeline::RunInvariantCheckers(
+    HloT* hlo, absl::string_view after_pass_name) {
+  for (auto& invariant_checker : invariant_checkers_) {
+    VLOG(1) << "    Invariant checker " << invariant_checker->name();
+    StatusOr<bool> changed_status = RunHelper(invariant_checker.get(), hlo);
+    VLOG(1) << "    Invariant checker done " << invariant_checker->name();
+    if (!changed_status.ok()) {
+      VLOG(2) << "Failed invariant check:";
+      XLA_VLOG_LINES(2, hlo->ToString());
+      return Status(changed_status.status().code(),
+                    absl::StrCat(changed_status.status().error_message(),
+                                 "\n\nFailed after ", after_pass_name));
+    }
+    TF_RET_CHECK(!changed_status.ValueOrDie())
+        << "invariant checkers must not change the graph";
+  }
+  return Status::OK();
 }
 
-void DumpModuleProto(const HloModule& module, const string& dump_to,
-                     const string& pipeline_name, const string& pass_name) {
-  static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
-  static auto* const module_id_to_pass_number =
-      new tensorflow::gtl::FlatMap<int64, int64>();
-
-  tensorflow::mutex_lock lock(mu);
-  const int64 pass_number = (*module_id_to_pass_number)[module.unique_id()]++;
+template <typename HloT>
+StatusOr<bool> HloPassPipeline::RunPassesInternal(
+    HloT* hlo, absl::Span<HloPassInterface* const> passes) {
+  string last_pass_name = "pipeline-start";
+  TF_RETURN_IF_ERROR(RunInvariantCheckers(hlo, last_pass_name));
+  bool changed = false;
+  for (HloPassInterface* pass : passes) {
+    VLOG(1) << "  HLO pass " << pass->name();
+    MaybeDumpHlo(*hlo,
+                 /*after_pass_name=*/last_pass_name,
+                 /*before_pass_name=*/pass->name());
+    TF_ASSIGN_OR_RETURN(bool pass_changed, RunHelper(pass, hlo));
+    changed |= pass_changed;
+    TF_RETURN_IF_ERROR(RunInvariantCheckers(hlo, pass->name()));
+    last_pass_name = string(pass->name());
+  }
+  MaybeDumpHlo(*hlo,
+               /*after_pass_name=*/last_pass_name,
+               /*before_pass_name=*/"pipeline-end");
+  return changed;
+}
 
-  const string mod_name = SanitizeFileName(
-      absl::StrFormat("module_%04d.%04d.%s.after_%s", module.unique_id(),
-                      pass_number, pipeline_name, pass_name));
+std::vector<HloPassInterface*> HloPassPipeline::GetEnabledPasses(
+    const DebugOptions& debug_options) {
+  auto repeated_field = debug_options.xla_disable_hlo_passes();
+  tensorflow::gtl::FlatSet<string> disabled_pass_names(repeated_field.begin(),
+                                                       repeated_field.end());
+  if (!disabled_pass_names.empty()) {
+    VLOG(1) << "Passes disabled by --xla_disable_hlo_passes: "
+            << absl::StrJoin(disabled_pass_names, ", ");
+  }
 
-  TF_QCHECK_OK(protobuf_util::DumpProtoToDirectory(MakeHloProto(module),
-                                                   dump_to, mod_name));
+  std::vector<HloPassInterface*> enabled_passes;
+  for (auto& pass : passes_) {
+    if (disabled_pass_names.count(string(pass->name())) == 0) {
+      enabled_passes.push_back(pass.get());
+    }
+  }
+  return enabled_passes;
 }
-}  // namespace
 
-StatusOr<bool> HloPassPipeline::Run(HloModule* module) {
-  run_called_ = true;
+void HloPassPipeline::MaybeDumpHlo(const HloModule& module,
+                                   absl::string_view after_pass_name,
+                                   absl::string_view before_pass_name) {
+  const string& proto_dump_path =
+      module.config().debug_options().xla_dump_per_pass_hlo_proto_to();
+  if (!proto_dump_path.empty()) {
+    static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
+    static auto* const module_id_to_pass_number =
+        new tensorflow::gtl::FlatMap<int64, int64>();
+
+    tensorflow::mutex_lock lock(mu);
+    const int64 pass_number = (*module_id_to_pass_number)[module.unique_id()]++;
+
+    const string filename = SanitizeFileName(
+        absl::StrFormat("module_%04d.%04d.%s.after_%s", module.unique_id(),
+                        pass_number, name(), after_pass_name));
+
+    TF_QCHECK_OK(protobuf_util::DumpProtoToDirectory(
+        MakeHloProto(module), proto_dump_path, filename));
+  }
 
-  VLOG(1) << "Running HLO pass pipeline " << name();
+  const string message =
+      StrCat("after ", after_pass_name, ", before ", before_pass_name);
+  hlo_graph_dumper::MaybeDumpHloModule(module, message);
+  VLOG(3) << "HLO " << message << ":";
+  XLA_VLOG_LINES(3, module.ToString());
+}
 
-  auto repeated_field =
-      module->config().debug_options().xla_disable_hlo_passes();
-  tensorflow::gtl::FlatSet<string> disabled_passes(repeated_field.begin(),
-                                                   repeated_field.end());
-  if (!disabled_passes.empty()) {
-    VLOG(1) << "Passes disabled by --xla_disable_hlo_passes: "
-            << absl::StrJoin(disabled_passes, ", ");
+void HloPassPipeline::MaybeDumpHlo(const HloModuleGroup& module_group,
+                                   absl::string_view after_pass_name,
+                                   absl::string_view before_pass_name) {
+  for (const HloModule* module : module_group.modules()) {
+    MaybeDumpHlo(*module, after_pass_name, before_pass_name);
   }
+}
 
-  auto run_invariant_checkers = [this,
-                                 module](const string& message) -> Status {
-    for (auto& invariant_checker : invariant_checkers_) {
-      VLOG(1) << "    Invariant checker " << invariant_checker->name();
-      StatusOr<bool> changed_status = invariant_checker->Run(module);
-      VLOG(1) << "    Invariant checker done " << invariant_checker->name();
-      if (!changed_status.ok()) {
-        VLOG(2) << "Module failed invariant check:";
-        XLA_VLOG_LINES(2, module->ToString());
-        return Status(changed_status.status().code(),
-                      StrCat(changed_status.status().error_message(),
-                             "\n\nFailed ", message));
-      }
-      TF_RET_CHECK(!changed_status.ValueOrDie())
-          << "invariant checkers must not change the graph";
-    }
-    return Status::OK();
-  };
+StatusOr<bool> HloPassPipeline::Run(HloModule* module) {
+  run_called_ = true;
 
-  string prefix = StrCat(name(), ": pipeline start");
-  bool changed = false;
-  string message;
-  TF_RETURN_IF_ERROR(
-      run_invariant_checkers(StrCat("before running pipeline: ", name())));
-  const string xla_dump_per_pass_hlo_proto_to =
-      module->config().debug_options().xla_dump_per_pass_hlo_proto_to();
-  if (!xla_dump_per_pass_hlo_proto_to.empty()) {
-    DumpModuleProto(*module, xla_dump_per_pass_hlo_proto_to, string(name()),
-                    "pipeline_start");
-  }
+  VLOG(1) << "Running HLO pass pipeline on module " << module->name() << ": "
+          << name();
 
-  for (auto& pass : passes_) {
-    if (disabled_passes.count(string(pass->name())) > 0) {
-      VLOG(1) << "  Skipping HLO pass " << pass->name()
-              << ", disabled by --xla_disable_hlo_passes";
-      continue;
-    }
+  return RunPassesInternal(module,
+                           GetEnabledPasses(module->config().debug_options()));
+}
 
-    VLOG(1) << "  HLO pass " << pass->name();
+StatusOr<bool> HloPassPipeline::RunOnModuleGroup(HloModuleGroup* module_group) {
+  run_called_ = true;
 
-    // Emit label containing: "after foo-pass, before bar-pass".
-    message.clear();
-    StrAppend(&message, prefix, ", before ", pass->name());
-    DumpModuleGraph(*module, message);
-
-    TF_ASSIGN_OR_RETURN(bool changed_this_pass, pass->Run(module));
-    TF_RETURN_IF_ERROR(
-        run_invariant_checkers(StrCat("after running pass: ", pass->name())));
-    if (!xla_dump_per_pass_hlo_proto_to.empty()) {
-      DumpModuleProto(*module, xla_dump_per_pass_hlo_proto_to, string(name()),
-                      string(pass->name()));
-    }
+  VLOG(1) << "Running HLO pass pipeline on module group "
+          << module_group->name() << ": " << name();
 
-    changed |= changed_this_pass;
-    prefix.clear();
-    StrAppend(&prefix, name(), ": after ", pass->name());
+  if (module_group->modules().empty()) {
+    VLOG(1) << "Module group is empty. Nothing to do.";
+    return false;
   }
-  DumpModuleGraph(*module, prefix + ", pipeline end");
-  return changed;
+
+  return RunPassesInternal(
+      module_group,
+      GetEnabledPasses(module_group->module(0).config().debug_options()));
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.h b/tensorflow/compiler/xla/service/hlo_pass_pipeline.h
index 1d41a4dac1..09e7033ea4 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.h
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -61,10 +62,45 @@ class HloPassPipeline : public HloPassInterface {
     return *pass;
   }
 
-  // Run all passes on the given HLO module.
   StatusOr<bool> Run(HloModule* module) override;
+  StatusOr<bool> RunOnModuleGroup(HloModuleGroup* module_group) override;
 
  private:
+  // Returns the set of passes which are enabled. DebugOptions can selectively
+  // disable passes via --xla_disable_hlo_passes flag.
+  std::vector<HloPassInterface*> GetEnabledPasses(
+      const DebugOptions& debug_options);
+
+  // Maybe dumps the given module or module group depending on flag values
+  // contained in DebugOptions of module config.
+  void MaybeDumpHlo(const HloModuleGroup& module_group,
+                    absl::string_view after_pass_name,
+                    absl::string_view before_pass_name);
+  void MaybeDumpHlo(const HloModule& module, absl::string_view after_pass_name,
+                    absl::string_view before_pass_name);
+
+  // Runs the invariant checker on the given HLO. HloT can be either HloModule
+  // or HloModuleGroup.
+  template <typename HloT>
+  Status RunInvariantCheckers(HloT* hlo, absl::string_view after_pass_name);
+
+  // Helper which runs the given pass on the given HLO. HloT can be either
+  // HloModule or HloModuleGroup.
+  template <typename HloT>
+  StatusOr<bool> RunPassesInternal(HloT* hlo,
+                                   absl::Span<HloPassInterface* const> passes);
+
+  // Helpers which run the given passes on the given HLO construct. These
+  // helpers enable templating of the core of the pipeline logic by providing
+  // HloModule and HloModuleGroup specific methods with the same name.
+  static StatusOr<bool> RunHelper(HloPassInterface* pass, HloModule* module) {
+    return pass->Run(module);
+  }
+  static StatusOr<bool> RunHelper(HloPassInterface* pass,
+                                  HloModuleGroup* module_group) {
+    return pass->RunOnModuleGroup(module_group);
+  }
+
   const string name_;
   std::vector<std::unique_ptr<HloPassInterface>> passes_;
   std::vector<std::unique_ptr<HloPassInterface>> invariant_checkers_;
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc
new file mode 100644
index 0000000000..e16b4d4c0a
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc
@@ -0,0 +1,259 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
+
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+class HloPassPipelineTest : public HloTestBase {
+ protected:
+  StatusOr<HloModuleGroup> ParseModuleGroup(
+      absl::Span<const string> hlo_strings) {
+    HloModuleGroup group(TestName());
+    for (const string& hlo_string : hlo_strings) {
+      TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
+                          ParseHloString(hlo_string));
+      group.push_back(std::move(module));
+    }
+    return std::move(group);
+  }
+};
+
+// A module pass which renames instructions named 'foo' to 'bar'.
+class FooToBarModulePass : public HloModulePass {
+  absl::string_view name() const override { return "foo2bar"; }
+
+  StatusOr<bool> Run(HloModule* module) override {
+    bool changed = false;
+    for (HloComputation* computation : module->computations()) {
+      for (HloInstruction* instruction : computation->instructions()) {
+        if (instruction->name() == "foo") {
+          instruction->SetAndSanitizeName("bar");
+          changed = true;
+        }
+      }
+    }
+    return changed;
+  }
+};
+
+// A module group pass which renames instructions named 'baz' to 'qux'.
+class BazToQuxModuleGroupPass : public HloModuleGroupPass {
+  absl::string_view name() const override { return "baz2qux"; }
+
+  StatusOr<bool> RunOnModuleGroup(HloModuleGroup* module_group) override {
+    bool changed = false;
+    for (HloModule* module : module_group->modules()) {
+      for (HloComputation* computation : module->computations()) {
+        for (HloInstruction* instruction : computation->instructions()) {
+          if (instruction->name() == "baz") {
+            instruction->SetAndSanitizeName("qux");
+            changed = true;
+          }
+        }
+      }
+    }
+    return changed;
+  }
+};
+
+// An invariant checker pass which returns an error if there exists an
+// instruction named 'bar'.
+class BarBlowerUpper : public HloModulePass {
+  absl::string_view name() const override { return "bar-blower-upper"; }
+
+  StatusOr<bool> Run(HloModule* module) override {
+    for (HloComputation* computation : module->computations()) {
+      for (HloInstruction* instruction : computation->instructions()) {
+        if (instruction->name() == "bar") {
+          return InternalError("Module has instruction named bar");
+        }
+      }
+    }
+    return false;
+  }
+};
+
+TEST_F(HloPassPipelineTest, ModulePassChanged) {
+  // Test an HLO module pass which changes a module.
+  const string module_str = R"(
+HloModule ModulePassChanged
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT foo = f32[] multiply(a, b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+  HloPassPipeline pipeline(TestName());
+  pipeline.AddPass<FooToBarModulePass>();
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_EQ(root->name(), "foo");
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, pipeline.Run(module.get()));
+  EXPECT_TRUE(changed);
+  EXPECT_EQ(root->name(), "bar");
+}
+
+TEST_F(HloPassPipelineTest, ModulePassUnchanged) {
+  // Test an HLO module pass which does not change a module.
+  const string module_str = R"(
+HloModule ModulePassUnchanged
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT blahblah = f32[] multiply(a, b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+  HloPassPipeline pipeline(TestName());
+  pipeline.AddPass<FooToBarModulePass>();
+
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, pipeline.Run(module.get()));
+  EXPECT_FALSE(changed);
+}
+
+TEST_F(HloPassPipelineTest, MixedPipeline) {
+  // Test a pipeline with both a module pass and a module group pass.
+  const string module_0_str = R"(
+HloModule MixedPipeline.1
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT baz = f32[] multiply(a, b)
+}
+)";
+  const string module_1_str = R"(
+HloModule MixedPipeline.0
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT foo = f32[] multiply(a, b)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(HloModuleGroup module_group,
+                          ParseModuleGroup({module_0_str, module_1_str}));
+
+  HloPassPipeline pipeline(TestName());
+  pipeline.AddPass<BazToQuxModuleGroupPass>();
+  pipeline.AddPass<FooToBarModulePass>();
+
+  HloInstruction* root0 =
+      module_group.module(0).entry_computation()->root_instruction();
+  HloInstruction* root1 =
+      module_group.module(1).entry_computation()->root_instruction();
+  EXPECT_EQ(root0->name(), "baz");
+  EXPECT_EQ(root1->name(), "foo");
+
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          pipeline.RunOnModuleGroup(&module_group));
+  EXPECT_TRUE(changed);
+
+  EXPECT_EQ(root0->name(), "qux");
+  EXPECT_EQ(root1->name(), "bar");
+}
+
+TEST_F(HloPassPipelineTest, InvariantChecker) {
+  const string module_str = R"(
+HloModule InvariantChecker
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT foo = f32[] multiply(a, b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+  {
+    // Run a pipeline with just the invariant checker. It should not fail
+    // because there is no 'bar' instruction in the module.
+    HloPassPipeline pipeline(TestName());
+    pipeline.AddInvariantChecker<BarBlowerUpper>();
+
+    TF_ASSERT_OK_AND_ASSIGN(bool changed, pipeline.Run(module.get()));
+    EXPECT_FALSE(changed);
+  }
+
+  {
+    // Run a pipeline which renames 'foo' to 'bar' then an invariant checker
+    // which fails if there is an instruction named 'bar'.
+    HloPassPipeline pipeline(TestName());
+    pipeline.AddInvariantChecker<BarBlowerUpper>();
+    pipeline.AddPass<FooToBarModulePass>();
+
+    Status status = pipeline.Run(module.get()).status();
+    ASSERT_IS_NOT_OK(status);
+    EXPECT_THAT(status.error_message(),
+                ::testing::HasSubstr("Module has instruction named bar"));
+    EXPECT_THAT(status.error_message(),
+                ::testing::HasSubstr("Failed after foo2bar"));
+  }
+
+  {
+    // Run the invariant-checker only pipeline again. It should fail this time.
+    HloPassPipeline pipeline(TestName());
+    pipeline.AddInvariantChecker<BarBlowerUpper>();
+
+    Status status = pipeline.Run(module.get()).status();
+    ASSERT_IS_NOT_OK(status);
+    EXPECT_THAT(status.error_message(),
+                ::testing::HasSubstr("Module has instruction named bar"));
+    EXPECT_THAT(status.error_message(),
+                ::testing::HasSubstr("Failed after pipeline-start"));
+  }
+}
+
+TEST_F(HloPassPipelineTest, ModuleGroupPassOnModule) {
+  // Running a module group pass on a module should produce an error.
+  const string module_str = R"(
+HloModule ModuleGroupPassOnModule
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT foo = f32[] multiply(a, b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+  HloPassPipeline pipeline(TestName());
+  pipeline.AddPass<BazToQuxModuleGroupPass>();
+
+  Status status = pipeline.Run(module.get()).status();
+  ASSERT_IS_NOT_OK(status);
+  EXPECT_THAT(
+      status.error_message(),
+      ::testing::HasSubstr("Module group pass cannot be run on a module"));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index bd6dd79b67..a438671936 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -1198,6 +1198,12 @@ StatusOr<bool> HloRematerialization::Run(HloModule* module) {
           << HumanReadableNumBytes(memory_limit_bytes_);
   XLA_VLOG_LINES(3, "Before HloRematerialization:\n" + module->ToString());
 
+  // Initialize pass object state.
+  computation_peak_memory_.clear();
+  rematerialized_computations_.clear();
+  instructions_rematerialized_ = 0;
+  net_instructions_added_ = 0;
+
   TF_RET_CHECK(module->has_schedule());
   TF_ASSIGN_OR_RETURN(points_to_analysis_, TuplePointsToAnalysis::Run(module));
 
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index e2aaf18b3e..7330d73c09 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -33,7 +33,7 @@ namespace xla {
 // CSE will undo the effects of this optimization and should not be run after
 // this pass. In general, this pass should be run very late, immediately before
 // code generation.
-class HloRematerialization : public HloPassInterface {
+class HloRematerialization : public HloModulePass {
  public:
   using ShapeSizeFunction = std::function<int64(const Shape&)>;
 
diff --git a/tensorflow/compiler/xla/service/hlo_subcomputation_unification.h b/tensorflow/compiler/xla/service/hlo_subcomputation_unification.h
index d1cf644f82..fa34bddde1 100644
--- a/tensorflow/compiler/xla/service/hlo_subcomputation_unification.h
+++ b/tensorflow/compiler/xla/service/hlo_subcomputation_unification.h
@@ -22,7 +22,7 @@ namespace xla {
 
 // Unify subcomputations of a `HloModule`: if any computations are equal, choose
 // one arbitrarily to use and delete the others.
-class HloSubcomputationUnification : public HloPassInterface {
+class HloSubcomputationUnification : public HloModulePass {
  public:
   absl::string_view name() const override {
     return "subcomputation-unification";
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 42e3027bf1..0cde4a31af 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -151,7 +151,7 @@ class ShapeVerifier : public DfsHloVisitor {
 
 // HLO pass that verifies invariants of HLO instructions for each computation in
 // the module.
-class HloVerifier : public HloPassInterface {
+class HloVerifier : public HloModulePass {
  public:
   using ShapeVerifierFactory = std::function<std::unique_ptr<ShapeVerifier>()>;
 
diff --git a/tensorflow/compiler/xla/service/implicit_broadcast_remover.h b/tensorflow/compiler/xla/service/implicit_broadcast_remover.h
index 85bb4a8b24..9c48b7db61 100644
--- a/tensorflow/compiler/xla/service/implicit_broadcast_remover.h
+++ b/tensorflow/compiler/xla/service/implicit_broadcast_remover.h
@@ -25,7 +25,7 @@ namespace xla {
 
 // Pass which replaces all implicit broadcasts with their equivalent sequence of
 // explicit broadcast and reshape instructions.
-class ImplicitBroadcastRemover : public HloPassInterface {
+class ImplicitBroadcastRemover : public HloModulePass {
  public:
   ImplicitBroadcastRemover() {}
   ~ImplicitBroadcastRemover() override {}
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h
index df9cbab915..3e238f97a0 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.h
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h
@@ -366,7 +366,7 @@ class IndexedArrayAnalysis {
 // A pass that prints all non-trivial results returned by IndexedArrayAnalysis.
 // This pass is a no-op if !VLOG_IS_ON(2) so it should be fine to
 // unconditionally add to the regular HLO pass pipeline.
-class IndexedArrayAnalysisPrinterPass : public HloPassInterface {
+class IndexedArrayAnalysisPrinterPass : public HloModulePass {
  public:
   absl::string_view name() const override;
   StatusOr<bool> Run(HloModule* module) override;
diff --git a/tensorflow/compiler/xla/service/inliner.h b/tensorflow/compiler/xla/service/inliner.h
index efa8ed3abc..e20af08fb7 100644
--- a/tensorflow/compiler/xla/service/inliner.h
+++ b/tensorflow/compiler/xla/service/inliner.h
@@ -24,7 +24,7 @@ namespace xla {
 // A pass which performs inlining. Which can result, for example, in functions
 // that were previously being mapped by Map instead directly applied to the
 // forwarded operands (i.e., map({X, Y}, max) -> max(X, Y)).
-class Inliner : public HloPassInterface {
+class Inliner : public HloModulePass {
  public:
   ~Inliner() override = default;
   absl::string_view name() const override { return "inline"; }
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index c1fde8ecfc..7e1196fb7f 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -56,7 +56,7 @@ class FusionQueue {
 // with the intent that the loops which compute their values will be fused in
 // code generation. Derived classes define ShouldFuse method to select which
 // instructions to fuse.
-class InstructionFusion : public HloPassInterface {
+class InstructionFusion : public HloModulePass {
  public:
   explicit InstructionFusion(
       std::function<bool(const HloInstruction& instruction)> is_expensive,
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index cf545031d3..e29c199c42 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -281,7 +281,7 @@ class ChannelLayoutConstraints {
 
 // HLO pass which assigns layouts to all instructions in the HLO module while
 // satisfying all necessary invariants and minimizing cost.
-class LayoutAssignment : public HloPassInterface {
+class LayoutAssignment : public HloModulePass {
  public:
   // entry_computation_layout is modified to populate a layout for the result in
   // the case that no particular layout is requested.
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h
index d2c52651c4..0344626b26 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.h
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.h
@@ -44,7 +44,7 @@ namespace xla {
 //  Note that the reachability map is updated based on the original computation.
 //  This works because the reachability is monotonically increasing with
 //  instruction fusion.
-class MultiOutputFusion : public HloPassInterface {
+class MultiOutputFusion : public HloModulePass {
  public:
   MultiOutputFusion(int64 fuel) : fuel_(fuel) {}
 
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.h b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
index 256b231e3a..4bb22428f3 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion.h
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
@@ -29,7 +29,7 @@ namespace xla {
 // HLO pass which inserts reduce-precision instructions into the HLO graph, for
 // purposes of experimenting with the effects of reduced-precision storage of
 // intermediate values.
-class ReducePrecisionInsertion : public HloPassInterface {
+class ReducePrecisionInsertion : public HloModulePass {
   using InstructionFilterFunction = std::function<bool(const HloInstruction*)>;
 
  public:
diff --git a/tensorflow/compiler/xla/service/reshape_mover.h b/tensorflow/compiler/xla/service/reshape_mover.h
index 1e86a0823a..a3db439e34 100644
--- a/tensorflow/compiler/xla/service/reshape_mover.h
+++ b/tensorflow/compiler/xla/service/reshape_mover.h
@@ -24,7 +24,7 @@ namespace xla {
 // This now only moves them outputward across elementwise ops all whose operands
 // are equivalent Reshapes or Transposes, but in future could potentially move
 // them inputward also.
-class ReshapeMover : public HloPassInterface {
+class ReshapeMover : public HloModulePass {
  public:
   absl::string_view name() const override { return "reshape-mover"; }
 
diff --git a/tensorflow/compiler/xla/service/scatter_expander.h b/tensorflow/compiler/xla/service/scatter_expander.h
index 14f062c89c..559a85dccf 100644
--- a/tensorflow/compiler/xla/service/scatter_expander.h
+++ b/tensorflow/compiler/xla/service/scatter_expander.h
@@ -20,7 +20,7 @@ limitations under the License.
 
 namespace xla {
 
-class ScatterExpander : public HloPassInterface {
+class ScatterExpander : public HloModulePass {
  public:
   absl::string_view name() const override { return "scatter_expander"; }
   StatusOr<bool> Run(HloModule* module) override;
diff --git a/tensorflow/compiler/xla/service/transpose_folding.h b/tensorflow/compiler/xla/service/transpose_folding.h
index 3e5aa2db60..f95f982eb8 100644
--- a/tensorflow/compiler/xla/service/transpose_folding.h
+++ b/tensorflow/compiler/xla/service/transpose_folding.h
@@ -23,7 +23,7 @@ namespace xla {
 
 // HLO pass that folds transpose operators into Dot operators, where the Dot
 // operator is implemented by a GEMM kernel that can transpose its inputs.
-class TransposeFolding : public HloPassInterface {
+class TransposeFolding : public HloModulePass {
  public:
   using OperandIndices = std::vector<int64>;
 
diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.h b/tensorflow/compiler/xla/service/tuple_simplifier.h
index 8c91d6e69d..e126a53023 100644
--- a/tensorflow/compiler/xla/service/tuple_simplifier.h
+++ b/tensorflow/compiler/xla/service/tuple_simplifier.h
@@ -25,7 +25,7 @@ namespace xla {
 
 // A pass which simplifies patterns of Tuple and GetTupleElement instructions in
 // the module.
-class TupleSimplifier : public HloPassInterface {
+class TupleSimplifier : public HloModulePass {
  public:
   TupleSimplifier() : TupleSimplifier(/*exclude_entry_computation=*/false) {}
   explicit TupleSimplifier(bool exclude_entry_computation);
diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.h b/tensorflow/compiler/xla/service/while_loop_constant_sinking.h
index 2dba7d7f75..577bad6c70 100644
--- a/tensorflow/compiler/xla/service/while_loop_constant_sinking.h
+++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.h
@@ -50,7 +50,7 @@ namespace xla {
 // conditions as well.
 //
 // TODO(b/79121449):  We should also sink broadcasts of constants.
-class WhileLoopConstantSinking : public HloPassInterface {
+class WhileLoopConstantSinking : public HloModulePass {
  public:
   ~WhileLoopConstantSinking() override = default;
 
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h
index 2cdf20ce80..3031899f71 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h
@@ -25,7 +25,7 @@ namespace xla {
 // HLO pass that rewrites while loops to hoist loop invariant instructions in
 // the while body into the computation that contains the while instruction.
 
-class WhileLoopInvariantCodeMotion : public HloPassInterface {
+class WhileLoopInvariantCodeMotion : public HloModulePass {
  public:
   // If `hoist_constants` is true then constants are always hoisted out of while
   // loop bodies.  Otherwise they are only hoisted out if they enable other
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.h b/tensorflow/compiler/xla/service/while_loop_simplifier.h
index 78024f14dc..0bc5a0107b 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.h
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.h
@@ -30,7 +30,7 @@ namespace xla {
 //  - Elements of a while loop's tuple that the loop doesn't use are removed
 //    from the tuple.
 //
-class WhileLoopSimplifier : public HloPassInterface {
+class WhileLoopSimplifier : public HloModulePass {
  public:
   ~WhileLoopSimplifier() override {}
   absl::string_view name() const override { return "simplify-while-loops"; }
diff --git a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h
index a7f0e207eb..87294120d5 100644
--- a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h
+++ b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h
@@ -21,7 +21,7 @@ limitations under the License.
 
 // HLO pass that replaces zero sized Hlos with a zero sized constant literal.
 namespace xla {
-class ZeroSizedHloElimination : public HloPassInterface {
+class ZeroSizedHloElimination : public HloModulePass {
  public:
   StatusOr<bool> Run(HloModule* module) override;
   absl::string_view name() const override {
-- 
GitLab


From e00d7744dbab5c73e4d8ffa8a7d361f7b2dcefff Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 19 Sep 2018 08:19:07 -0700
Subject: [PATCH 0375/1357] Automated rollback of commit
 9fe177881224571aff0c267593f747f5fd7a2967

PiperOrigin-RevId: 213630404
---
 .../estimator/dnn_with_layer_annotations.py   |  15 +-
 tensorflow/python/estimator/BUILD             |   2 -
 tensorflow/python/estimator/canned/dnn.py     | 181 ++++----------
 .../estimator/canned/dnn_linear_combined.py   |   7 +-
 .../python/estimator/canned/dnn_test.py       | 146 ++---------
 .../estimator/canned/dnn_testing_utils.py     | 227 ++++--------------
 .../python/feature_column/feature_column.py   |  12 +-
 .../feature_column/feature_column_v2.py       |  14 --
 8 files changed, 127 insertions(+), 477 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index a8eeff6f6d..152431d1b2 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -76,7 +76,6 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode):
                                          weight_collections=None,
                                          trainable=True,
                                          cols_to_vars=None,
-                                         scope=None,
                                          cols_to_output_tensors=None):
     """Returns a dense `Tensor` as input layer based on given `feature_columns`.
 
@@ -113,7 +112,6 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode):
         'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
           shape=(5, 10)]} If a column creates no variables, its value will be an
           empty list.
-      scope: A name or variable scope to use
       cols_to_output_tensors: If not `None`, must be a dictionary that will be
         filled with a mapping from '_FeatureColumn' to the associated output
         `Tensor`s.
@@ -134,7 +132,6 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode):
         weight_collections=weight_collections,
         trainable=trainable,
         cols_to_vars=cols_to_vars,
-        scope=scope,
         cols_to_output_tensors=local_cols_to_output_tensors)
 
     if cols_to_output_tensors is not None:
@@ -304,9 +301,9 @@ def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, '_internal_input_layer',
-        make_input_layer_with_layer_annotations(
-            feature_column_lib._internal_input_layer, mode)):  # pylint: disable=protected-access
+        feature_column_lib, 'input_layer',
+        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
+                                                mode)):
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
@@ -425,9 +422,9 @@ def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, '_internal_input_layer',
-        make_input_layer_with_layer_annotations(
-            feature_column_lib._internal_input_layer, mode)):  # pylint: disable=protected-access
+        feature_column_lib, 'input_layer',
+        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
+                                                mode)):
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 2026bf8c4f..bfcc019dd5 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -251,7 +251,6 @@ py_library(
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
         "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
@@ -274,7 +273,6 @@ py_test(
         ":pandas_io",
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
-        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
index 344113a5d8..1c0c4581c0 100644
--- a/tensorflow/python/estimator/canned/dnn.py
+++ b/tensorflow/python/estimator/canned/dnn.py
@@ -24,9 +24,7 @@ from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.keras.engine import training
+from tensorflow.python.feature_column import feature_column as feature_column_lib
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.layers import normalization
 from tensorflow.python.ops import init_ops
@@ -47,14 +45,8 @@ def _add_hidden_layer_summary(value, tag):
   summary.histogram('%s/activation' % tag, value)
 
 
-def _dnn_logit_fn_builder(units,
-                          hidden_units,
-                          feature_columns,
-                          activation_fn,
-                          dropout,
-                          input_layer_partitioner,
-                          batch_norm,
-                          shared_state_manager=None):
+def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
+                          dropout, input_layer_partitioner, batch_norm):
   """Function builder for a dnn logit_fn.
 
   Args:
@@ -68,8 +60,6 @@ def _dnn_logit_fn_builder(units,
       coordinate.
     input_layer_partitioner: Partitioner for input layer.
     batch_norm: Whether to use batch normalization after each hidden layer.
-    shared_state_manager: A SharedEmbeddingStateManager object to hold the
-      shared state for SharedEmbeddingColumn's.
 
   Returns:
     A logit_fn (see below).
@@ -95,110 +85,50 @@ def _dnn_logit_fn_builder(units,
       A `Tensor` representing the logits, or a list of `Tensor`'s representing
       multiple logits in the MultiHead case.
     """
-    dnn_model = _DNNModel(
-        units,
-        hidden_units,
-        feature_columns,
-        activation_fn,
-        dropout,
-        input_layer_partitioner,
-        batch_norm,
-        shared_state_manager,
-        name='dnn')
-    return dnn_model(features, mode)
-
-  return dnn_logit_fn
-
-
-class _DNNModel(training.Model):
-  """A DNN Model."""
-
-  def __init__(self,
-               units,
-               hidden_units,
-               feature_columns,
-               activation_fn,
-               dropout,
-               input_layer_partitioner,
-               batch_norm,
-               shared_state_manager,
-               name=None,
-               **kwargs):
-    super(_DNNModel, self).__init__(name=name, **kwargs)
-
-    if feature_column_v2.is_feature_column_v2(feature_columns):
-      input_layer = feature_column_v2.FeatureLayer(
-          feature_columns=feature_columns,
-          name='input_layer',
-          shared_state_manager=shared_state_manager)
-    else:
-      with variable_scope.variable_scope('input_from_feature_columns'):
-        input_layer = feature_column.InputLayer(
-            feature_columns=feature_columns, name='input_layer')
-
-    self._input_layer = self._add_layers([input_layer])[0]
-
-    self._dropout = dropout
-    self._batch_norm = batch_norm
-
-    hidden_layers = []
-    dropout_layers = []
-    batch_norm_layers = []
+    is_training = mode == model_fn.ModeKeys.TRAIN
+    with variable_scope.variable_scope(
+        'input_from_feature_columns',
+        values=tuple(six.itervalues(features)),
+        partitioner=input_layer_partitioner):
+      net = feature_column_lib.input_layer(
+          features=features, feature_columns=feature_columns)
     for layer_id, num_hidden_units in enumerate(hidden_units):
-      hidden_layer = core_layers.Dense(
-          units=num_hidden_units,
-          activation=activation_fn,
+      with variable_scope.variable_scope(
+          'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope:
+        net = core_layers.dense(
+            net,
+            units=num_hidden_units,
+            activation=activation_fn,
+            kernel_initializer=init_ops.glorot_uniform_initializer(),
+            name=hidden_layer_scope)
+        if dropout is not None and is_training:
+          net = core_layers.dropout(net, rate=dropout, training=True)
+        if batch_norm:
+          # TODO(hjm): In future, if this becomes popular, we can enable
+          # customization of the batch normalization params by accepting a
+          # list of `BatchNormalization` instances as `batch_norm`.
+          net = normalization.batch_normalization(
+              net,
+              # The default momentum 0.99 actually crashes on certain
+              # problem, so here we use 0.999, which is the default of
+              # tf.contrib.layers.batch_norm.
+              momentum=0.999,
+              training=is_training,
+              name='batchnorm_%d' % layer_id)
+      _add_hidden_layer_summary(net, hidden_layer_scope.name)
+
+    with variable_scope.variable_scope('logits', values=(net,)) as logits_scope:
+      logits = core_layers.dense(
+          net,
+          units=units,
+          activation=None,
           kernel_initializer=init_ops.glorot_uniform_initializer(),
-          name='hiddenlayer_%d' % layer_id)
-      hidden_layers.append(hidden_layer)
-      if self._dropout is not None:
-        dropout_layer = core_layers.Dropout(rate=dropout)
-        dropout_layers.append(dropout_layer)
-      if self._batch_norm:
-        batch_norm_layer = normalization.BatchNormalization(
-            # The default momentum 0.99 actually crashes on certain
-            # problem, so here we use 0.999, which is the default of
-            # tf.contrib.layers.batch_norm.
-            momentum=0.999,
-            trainable=True,
-            name='hiddenlayer_%d/batchnorm_%d' % (layer_id, layer_id))
-        batch_norm_layers.append(batch_norm_layer)
-
-    self._hidden_layers = self._add_layers(hidden_layers)
-    if self._dropout is not None:
-      self._dropout_layers = self._add_layers(dropout_layers)
-    if self._batch_norm:
-      self._batch_norm_layers = self._add_layers(batch_norm_layers)
-
-    self._logits_layer = core_layers.Dense(
-        units=units,
-        activation=None,
-        kernel_initializer=init_ops.glorot_uniform_initializer(),
-        name='logits')
-
-  def call(self, features, mode):
-    is_training = mode == model_fn.ModeKeys.TRAIN
-    with variable_scope.variable_scope('input_from_feature_columns'):
-      net = self._input_layer(features)
-    for i in range(len(self._hidden_layers)):
-      net = self._hidden_layers[i](net)
-      if self._dropout is not None and is_training:
-        net = self._dropout_layers[i](net)
-      if self._batch_norm:
-        net = self._batch_norm_layers[i](net, training=is_training)
-      _add_hidden_layer_summary(net, self._hidden_layers[i].name)
-
-    logits = self._logits_layer(net)
-    _add_hidden_layer_summary(logits, self._logits_layer.name)
+          name=logits_scope)
+    _add_hidden_layer_summary(logits, logits_scope.name)
+
     return logits
 
-  def _add_layers(self, layers):
-    # "Magic" required for keras.Model classes to track all the variables in
-    # a list of layers.Layer objects.
-    # TODO(ashankar): Figure out API so user code doesn't have to do this.
-    for layer in layers:
-      setattr(self, layer.name, layer)
-    return layers
+  return dnn_logit_fn
 
 
 def _dnn_model_fn(features,
@@ -213,8 +143,7 @@ def _dnn_model_fn(features,
                   input_layer_partitioner=None,
                   config=None,
                   use_tpu=False,
-                  batch_norm=False,
-                  shared_state_manager=None):
+                  batch_norm=False):
   """Deep Neural Net model_fn.
 
   Args:
@@ -238,8 +167,6 @@ def _dnn_model_fn(features,
     use_tpu: Whether to make a DNN model able to run on TPU. Will make function
       return a `_TPUEstimatorSpec` instance and disable variable partitioning.
     batch_norm: Whether to use batch normalization after each hidden layer.
-    shared_state_manager: A SharedEmbeddingStateManager object to hold the
-      shared state for SharedEmbeddingColumn's.
 
   Returns:
     An `EstimatorSpec` instance.
@@ -275,8 +202,7 @@ def _dnn_model_fn(features,
         activation_fn=activation_fn,
         dropout=dropout,
         input_layer_partitioner=input_layer_partitioner,
-        batch_norm=batch_norm,
-        shared_state_manager=shared_state_manager)
+        batch_norm=batch_norm)
     logits = logit_fn(features=features, mode=mode)
 
     if use_tpu:
@@ -444,10 +370,6 @@ class DNNClassifier(estimator.Estimator):
     """
     head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
         n_classes, weight_column, label_vocabulary, loss_reduction)
-
-    shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
-        feature_columns)
-
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
@@ -462,8 +384,7 @@ class DNNClassifier(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm,
-          shared_state_manager=shared_state_manager)
+          batch_norm=batch_norm)
 
     super(DNNClassifier, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,
@@ -611,10 +532,6 @@ class DNNRegressor(estimator.Estimator):
       batch_norm: Whether to use batch normalization after each hidden layer.
     """
 
-    shared_state_manager = None
-    if feature_column_v2.is_feature_column_v2(feature_columns):
-      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
-
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
@@ -622,8 +539,7 @@ class DNNRegressor(estimator.Estimator):
           labels=labels,
           mode=mode,
           head=head_lib._regression_head(  # pylint: disable=protected-access
-              label_dimension=label_dimension,
-              weight_column=weight_column,
+              label_dimension=label_dimension, weight_column=weight_column,
               loss_reduction=loss_reduction),
           hidden_units=hidden_units,
           feature_columns=tuple(feature_columns or []),
@@ -632,8 +548,7 @@ class DNNRegressor(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm,
-          shared_state_manager=shared_state_manager)
+          batch_norm=batch_norm)
 
     super(DNNRegressor, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py
index f2ac9a7466..9799cf9e98 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py
@@ -27,7 +27,6 @@ from tensorflow.python.estimator.canned import dnn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import linear
 from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import nn
@@ -143,9 +142,6 @@ def _dnn_linear_combined_model_fn(features,
           max_partitions=num_ps_replicas,
           min_slice_size=64 << 20))
 
-  shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
-      list(linear_feature_columns) + list(dnn_feature_columns))
-
   # Build DNN Logits.
   dnn_parent_scope = 'dnn'
 
@@ -174,8 +170,7 @@ def _dnn_linear_combined_model_fn(features,
           activation_fn=dnn_activation_fn,
           dropout=dnn_dropout,
           input_layer_partitioner=input_layer_partitioner,
-          batch_norm=batch_norm,
-          shared_state_manager=shared_state_manager)
+          batch_norm=batch_norm)
       dnn_logits = dnn_logit_fn(features=features, mode=mode)
 
   linear_parent_scope = 'linear'
diff --git a/tensorflow/python/estimator/canned/dnn_test.py b/tensorflow/python/estimator/canned/dnn_test.py
index e64cd522b4..fc90b7c35e 100644
--- a/tensorflow/python/estimator/canned/dnn_test.py
+++ b/tensorflow/python/estimator/canned/dnn_test.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 import shutil
 import tempfile
 
-from absl.testing import parameterized
 import numpy as np
 import six
 
@@ -34,7 +33,6 @@ from tensorflow.python.estimator.export import export
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.estimator.inputs import pandas_io
 from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import data_flow_ops
@@ -67,14 +65,6 @@ class DNNModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
     dnn_testing_utils.BaseDNNModelFnTest.__init__(self, dnn._dnn_model_fn)
 
 
-class DNNModelFnV2Test(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNModelFnTest.__init__(
-        self, dnn._dnn_model_fn, is_fc_v2=True)
-
-
 class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
 
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
@@ -83,14 +73,6 @@ class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
                                                   dnn._dnn_logit_fn_builder)
 
 
-class DNNLogitFnV2Test(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
-        self, dnn._dnn_logit_fn_builder, is_fc_v2=True)
-
-
 class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
                           test.TestCase):
 
@@ -100,15 +82,6 @@ class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
                                                        _dnn_regressor_fn)
 
 
-class DNNWarmStartingV2Test(dnn_testing_utils.BaseDNNWarmStartingTest,
-                            test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
-        self, _dnn_classifier_fn, _dnn_regressor_fn, is_fc_v2=True)
-
-
 class DNNClassifierEvaluateTest(
     dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
 
@@ -118,15 +91,6 @@ class DNNClassifierEvaluateTest(
         self, _dnn_classifier_fn)
 
 
-class DNNClassifierEvaluateV2Test(
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn, is_fc_v2=True)
-
-
 class DNNClassifierPredictTest(
     dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
 
@@ -136,15 +100,6 @@ class DNNClassifierPredictTest(
         self, _dnn_classifier_fn)
 
 
-class DNNClassifierPredictV2Test(dnn_testing_utils.BaseDNNClassifierPredictTest,
-                                 test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn, is_fc_v2=True)
-
-
 class DNNClassifierTrainTest(
     dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
 
@@ -154,15 +109,6 @@ class DNNClassifierTrainTest(
         self, _dnn_classifier_fn)
 
 
-class DNNClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
-                               test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn, is_fc_v2=True)
-
-
 def _dnn_regressor_fn(*args, **kwargs):
   return dnn.DNNRegressor(*args, **kwargs)
 
@@ -176,15 +122,6 @@ class DNNRegressorEvaluateTest(
         self, _dnn_regressor_fn)
 
 
-class DNNRegressorEvaluateV2Test(dnn_testing_utils.BaseDNNRegressorEvaluateTest,
-                                 test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn, is_fc_v2=True)
-
-
 class DNNRegressorPredictTest(
     dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
 
@@ -194,15 +131,6 @@ class DNNRegressorPredictTest(
         self, _dnn_regressor_fn)
 
 
-class DNNRegressorPredictV2Test(dnn_testing_utils.BaseDNNRegressorPredictTest,
-                                test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn, is_fc_v2=True)
-
-
 class DNNRegressorTrainTest(
     dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
 
@@ -212,15 +140,6 @@ class DNNRegressorTrainTest(
         self, _dnn_regressor_fn)
 
 
-class DNNRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
-                              test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn, is_fc_v2=True)
-
-
 def _queue_parsed_features(feature_map):
   tensors_to_enqueue = []
   keys = []
@@ -237,8 +156,7 @@ def _queue_parsed_features(feature_map):
   return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
 
 
-@parameterized.parameters((True,), (False,))
-class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
+class DNNRegressorIntegrationTest(test.TestCase):
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -248,16 +166,11 @@ class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
       writer_cache.FileWriterCache.clear()
       shutil.rmtree(self._model_dir)
 
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, batch_size,
-                          is_fc_v2):
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
     feature_columns = [
         feature_column.numeric_column('x', shape=(input_dimension,))]
-    if is_fc_v2:
-      feature_columns = [
-          feature_column_v2.numeric_column('x', shape=(input_dimension,))
-      ]
-
     est = dnn.DNNRegressor(
         hidden_units=(2, 2),
         feature_columns=feature_columns,
@@ -281,17 +194,14 @@ class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual((batch_size, label_dimension), predictions.shape)
 
     # EXPORT
-    if is_fc_v2:
-      feature_spec = feature_column_v2.make_parse_example_spec(feature_columns)
-    else:
-      feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self, is_fc_v2):
+  def test_numpy_input_fn(self):
     """Tests complete flow with numpy_input_fn."""
     label_dimension = 2
     batch_size = 10
@@ -320,10 +230,9 @@ class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size,
-        is_fc_v2=is_fc_v2)
+        batch_size=batch_size)
 
-  def test_pandas_input_fn(self, is_fc_v2):
+  def test_pandas_input_fn(self):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -354,10 +263,9 @@ class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size,
-        is_fc_v2=is_fc_v2)
+        batch_size=batch_size)
 
-  def test_input_fn_from_parse_example(self, is_fc_v2):
+  def test_input_fn_from_parse_example(self):
     """Tests complete flow with input_fn constructed from parse_example."""
     label_dimension = 2
     batch_size = 10
@@ -405,11 +313,9 @@ class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size,
-        is_fc_v2=is_fc_v2)
+        batch_size=batch_size)
 
 
-@parameterized.parameters((True,), (False,))
 class DNNClassifierIntegrationTest(test.TestCase):
 
   def setUp(self):
@@ -423,15 +329,11 @@ class DNNClassifierIntegrationTest(test.TestCase):
   def _as_label(self, data_in_float):
     return np.rint(data_in_float).astype(np.int64)
 
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, n_classes, batch_size, is_fc_v2):
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      n_classes, batch_size):
     feature_columns = [
         feature_column.numeric_column('x', shape=(input_dimension,))]
-    if is_fc_v2:
-      feature_columns = [
-          feature_column_v2.numeric_column('x', shape=(input_dimension,))
-      ]
-
     est = dnn.DNNClassifier(
         hidden_units=(2, 2),
         feature_columns=feature_columns,
@@ -455,17 +357,14 @@ class DNNClassifierIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
 
     # EXPORT
-    if is_fc_v2:
-      feature_spec = feature_column_v2.make_parse_example_spec(feature_columns)
-    else:
-      feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self, is_fc_v2):
+  def test_numpy_input_fn(self):
     """Tests complete flow with numpy_input_fn."""
     n_classes = 3
     input_dimension = 2
@@ -497,10 +396,9 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size,
-        is_fc_v2=is_fc_v2)
+        batch_size=batch_size)
 
-  def test_pandas_input_fn(self, is_fc_v2):
+  def test_pandas_input_fn(self):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -532,10 +430,9 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size,
-        is_fc_v2=is_fc_v2)
+        batch_size=batch_size)
 
-  def test_input_fn_from_parse_example(self, is_fc_v2):
+  def test_input_fn_from_parse_example(self):
     """Tests complete flow with input_fn constructed from parse_example."""
     input_dimension = 2
     n_classes = 3
@@ -587,8 +484,7 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size,
-        is_fc_v2=is_fc_v2)
+        batch_size=batch_size)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index 3b3b63cf65..11f1e93630 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -34,7 +34,6 @@ from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import prediction_keys
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -105,7 +104,6 @@ def create_checkpoint(weights_and_biases,
     weights_and_biases: Iterable of tuples of weight and bias values.
     global_step: Initial global step to save in checkpoint.
     model_dir: Directory into which checkpoint is saved.
-    batch_norm_vars: Variables used for batch normalization.
   """
   weights, biases = zip(*weights_and_biases)
   if batch_norm_vars:
@@ -246,9 +244,8 @@ def mock_optimizer(testcase, hidden_units, expected_loss=None):
 class BaseDNNModelFnTest(object):
   """Tests that _dnn_model_fn passes expected logits to mock head."""
 
-  def __init__(self, dnn_model_fn, is_fc_v2=False):
+  def __init__(self, dnn_model_fn):
     self._dnn_model_fn = dnn_model_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -263,11 +260,6 @@ class BaseDNNModelFnTest(object):
     """Tests that the expected logits are passed to mock head."""
     with ops.Graph().as_default():
       training_util.create_global_step()
-      age_column = feature_column.numeric_column(
-          'age', shape=np.array(inputs).shape[1:])
-      if self._is_fc_v2:
-        age_column = feature_column_v2.numeric_column(
-            'age', shape=np.array(inputs).shape[1:])
       head = mock_head(
           self,
           hidden_units=hidden_units,
@@ -279,7 +271,10 @@ class BaseDNNModelFnTest(object):
           mode=mode,
           head=head,
           hidden_units=hidden_units,
-          feature_columns=[age_column],
+          feature_columns=[
+              feature_column.numeric_column(
+                  'age', shape=np.array(inputs).shape[1:])
+          ],
           optimizer=mock_optimizer(self, hidden_units))
       with monitored_session.MonitoredTrainingSession(
           checkpoint_dir=self._model_dir) as sess:
@@ -446,16 +441,6 @@ class BaseDNNModelFnTest(object):
     inputs = ([[10.]], [[8.]])
     expected_logits = [[-0.48, 0.48, 0.39]]
 
-    feature_columns = [
-        feature_column.numeric_column('age'),
-        feature_column.numeric_column('height')
-    ]
-    if self._is_fc_v2:
-      feature_columns = [
-          feature_column_v2.numeric_column('age'),
-          feature_column_v2.numeric_column('height')
-      ]
-
     for mode in [
         model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
         model_fn.ModeKeys.PREDICT
@@ -476,7 +461,10 @@ class BaseDNNModelFnTest(object):
             mode=mode,
             head=head,
             hidden_units=hidden_units,
-            feature_columns=feature_columns,
+            feature_columns=[
+                feature_column.numeric_column('age'),
+                feature_column.numeric_column('height')
+            ],
             optimizer=mock_optimizer(self, hidden_units))
         with monitored_session.MonitoredTrainingSession(
             checkpoint_dir=self._model_dir) as sess:
@@ -520,9 +508,8 @@ class BaseDNNModelFnTest(object):
 class BaseDNNLogitFnTest(object):
   """Tests correctness of logits calculated from _dnn_logit_fn_builder."""
 
-  def __init__(self, dnn_logit_fn_builder, is_fc_v2=False):
+  def __init__(self, dnn_logit_fn_builder):
     self._dnn_logit_fn_builder = dnn_logit_fn_builder
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -546,12 +533,6 @@ class BaseDNNLogitFnTest(object):
       training_util.create_global_step()
       # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
       # the checkpoint naming is shared.
-      age_column = feature_column.numeric_column(
-          'age', shape=np.array(inputs).shape[1:])
-      if self._is_fc_v2:
-        age_column = feature_column_v2.numeric_column(
-            'age', shape=np.array(inputs).shape[1:])
-
       with variable_scope.variable_scope('dnn'):
         input_layer_partitioner = (
             partitioned_variables.min_max_variable_partitioner(
@@ -559,7 +540,10 @@ class BaseDNNLogitFnTest(object):
         logit_fn = self._dnn_logit_fn_builder(
             units=logits_dimension,
             hidden_units=hidden_units,
-            feature_columns=[age_column],
+            feature_columns=[
+                feature_column.numeric_column(
+                    'age', shape=np.array(inputs).shape[1:])
+            ],
             activation_fn=nn.relu,
             dropout=None,
             input_layer_partitioner=input_layer_partitioner,
@@ -784,16 +768,6 @@ class BaseDNNLogitFnTest(object):
     inputs = ([[10.]], [[8.]])
     expected_logits = [[-0.48, 0.48, 0.39]]
 
-    feature_columns = [
-        feature_column.numeric_column('age'),
-        feature_column.numeric_column('height')
-    ]
-    if self._is_fc_v2:
-      feature_columns = [
-          feature_column_v2.numeric_column('age'),
-          feature_column_v2.numeric_column('height')
-      ]
-
     for mode in [
         model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
         model_fn.ModeKeys.PREDICT
@@ -811,7 +785,10 @@ class BaseDNNLogitFnTest(object):
           logit_fn = self._dnn_logit_fn_builder(
               units=logits_dimension,
               hidden_units=hidden_units,
-              feature_columns=feature_columns,
+              feature_columns=[
+                  feature_column.numeric_column('age'),
+                  feature_column.numeric_column('height')
+              ],
               activation_fn=nn.relu,
               dropout=None,
               input_layer_partitioner=input_layer_partitioner,
@@ -829,10 +806,9 @@ class BaseDNNLogitFnTest(object):
 
 class BaseDNNWarmStartingTest(object):
 
-  def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn, is_fc_v2=False):
+  def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn):
     self._dnn_classifier_fn = _dnn_classifier_fn
     self._dnn_regressor_fn = _dnn_regressor_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     # Create a directory to save our old checkpoint and vocabularies to.
@@ -871,11 +847,6 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
-    if self._is_fc_v2:
-      city = feature_column_v2.embedding_column(
-          feature_column_v2.categorical_column_with_vocabulary_list(
-              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-          dimension=5)
 
     # Create a DNNClassifier and train to save a checkpoint.
     dnn_classifier = self._dnn_classifier_fn(
@@ -908,11 +879,6 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
-    if self._is_fc_v2:
-      city = feature_column_v2.embedding_column(
-          feature_column_v2.categorical_column_with_vocabulary_list(
-              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-          dimension=5)
 
     # Create a DNNRegressor and train to save a checkpoint.
     dnn_regressor = self._dnn_regressor_fn(
@@ -943,11 +909,6 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
-    if self._is_fc_v2:
-      city = feature_column_v2.embedding_column(
-          feature_column_v2.categorical_column_with_vocabulary_list(
-              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-          dimension=5)
 
     # Create a DNNClassifier and train to save a checkpoint.
     dnn_classifier = self._dnn_classifier_fn(
@@ -1003,13 +964,6 @@ class BaseDNNWarmStartingTest(object):
             vocabulary_file=vocab_file,
             vocabulary_size=len(vocab_list)),
         dimension=2)
-    if self._is_fc_v2:
-      occupation = feature_column_v2.embedding_column(
-          feature_column_v2.categorical_column_with_vocabulary_file(
-              'occupation',
-              vocabulary_file=vocab_file,
-              vocabulary_size=len(vocab_list)),
-          dimension=2)
 
     # Create a DNNClassifier and train to save a checkpoint.
     partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
@@ -1037,13 +991,6 @@ class BaseDNNWarmStartingTest(object):
             vocabulary_file=new_vocab_file,
             vocabulary_size=len(new_vocab_list)),
         dimension=2)
-    if self._is_fc_v2:
-      new_occupation = feature_column_v2.embedding_column(
-          feature_column_v2.categorical_column_with_vocabulary_file(
-              'occupation',
-              vocabulary_file=new_vocab_file,
-              vocabulary_size=len(new_vocab_list)),
-          dimension=2)
     # We can create our VocabInfo object from the new and old occupation
     # FeatureColumn's.
     occupation_vocab_info = estimator.VocabInfo(
@@ -1108,11 +1055,6 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
-    if self._is_fc_v2:
-      locality = feature_column_v2.embedding_column(
-          feature_column_v2.categorical_column_with_vocabulary_list(
-              'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
-          dimension=5)
 
     # Create a DNNClassifier and train to save a checkpoint.
     dnn_classifier = self._dnn_classifier_fn(
@@ -1130,11 +1072,6 @@ class BaseDNNWarmStartingTest(object):
         feature_column.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
-    if self._is_fc_v2:
-      city = feature_column_v2.embedding_column(
-          feature_column_v2.categorical_column_with_vocabulary_list(
-              'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-          dimension=5)
     warm_started_dnn_classifier = self._dnn_classifier_fn(
         hidden_units=[256, 128],
         feature_columns=[city],
@@ -1164,9 +1101,8 @@ class BaseDNNWarmStartingTest(object):
 
 class BaseDNNClassifierEvaluateTest(object):
 
-  def __init__(self, dnn_classifier_fn, is_fc_v2=False):
+  def __init__(self, dnn_classifier_fn):
     self._dnn_classifier_fn = dnn_classifier_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1183,12 +1119,9 @@ class BaseDNNClassifierEvaluateTest(object):
         (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
          ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
 
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[age_column],
+        feature_columns=[feature_column.numeric_column('age')],
         model_dir=self._model_dir)
     def _input_fn():
       # batch_size = 2, one false label, and one true.
@@ -1226,12 +1159,9 @@ class BaseDNNClassifierEvaluateTest(object):
                                            .0]),), global_step, self._model_dir)
     n_classes = 3
 
-    age_column = feature_column.numeric_column('age', shape=[2])
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[age_column],
+        feature_columns=[feature_column.numeric_column('age', shape=[2])],
         n_classes=n_classes,
         model_dir=self._model_dir)
     def _input_fn():
@@ -1260,12 +1190,9 @@ class BaseDNNClassifierEvaluateTest(object):
         (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
          ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
 
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[age_column],
+        feature_columns=[feature_column.numeric_column('age')],
         model_dir=self._model_dir)
     def _input_fn():
       # batch_size = 2, one false label, and one true.
@@ -1289,12 +1216,9 @@ class BaseDNNClassifierEvaluateTest(object):
                       global_step, self._model_dir)
     n_classes = 3
 
-    age_column = feature_column.numeric_column('age', shape=[2])
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[age_column],
+        feature_columns=[feature_column.numeric_column('age', shape=[2])],
         n_classes=n_classes,
         weight_column='w',
         model_dir=self._model_dir)
@@ -1314,9 +1238,8 @@ class BaseDNNClassifierEvaluateTest(object):
 
 class BaseDNNRegressorEvaluateTest(object):
 
-  def __init__(self, dnn_regressor_fn, is_fc_v2=False):
+  def __init__(self, dnn_regressor_fn):
     self._dnn_regressor_fn = dnn_regressor_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1334,12 +1257,9 @@ class BaseDNNRegressorEvaluateTest(object):
         (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
          ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
 
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[age_column],
+        feature_columns=[feature_column.numeric_column('age')],
         model_dir=self._model_dir)
     def _input_fn():
       return {'age': [[10.]]}, [[1.]]
@@ -1367,12 +1287,9 @@ class BaseDNNRegressorEvaluateTest(object):
                                            .0]),), global_step, self._model_dir)
     label_dimension = 3
 
-    age_column = feature_column.numeric_column('age', shape=[2])
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[age_column],
+        feature_columns=[feature_column.numeric_column('age', shape=[2])],
         label_dimension=label_dimension,
         model_dir=self._model_dir)
     def _input_fn():
@@ -1401,12 +1318,9 @@ class BaseDNNRegressorEvaluateTest(object):
                       global_step, self._model_dir)
     label_dimension = 3
 
-    age_column = feature_column.numeric_column('age', shape=[2])
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age', shape=[2])
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[age_column],
+        feature_columns=[feature_column.numeric_column('age', shape=[2])],
         label_dimension=label_dimension,
         weight_column='w',
         model_dir=self._model_dir)
@@ -1425,9 +1339,8 @@ class BaseDNNRegressorEvaluateTest(object):
 
 class BaseDNNClassifierPredictTest(object):
 
-  def __init__(self, dnn_classifier_fn, is_fc_v2=False):
+  def __init__(self, dnn_classifier_fn):
     self._dnn_classifier_fn = dnn_classifier_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1445,13 +1358,10 @@ class BaseDNNClassifierPredictTest(object):
         global_step=0,
         model_dir=self._model_dir)
 
-    x_column = feature_column.numeric_column('x')
-    if self._is_fc_v2:
-      x_column = feature_column_v2.numeric_column('x')
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
         label_vocabulary=label_vocabulary,
-        feature_columns=(x_column,),
+        feature_columns=(feature_column.numeric_column('x'),),
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
         x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
@@ -1493,12 +1403,9 @@ class BaseDNNClassifierPredictTest(object):
         global_step=0,
         model_dir=self._model_dir)
 
-    x_column = feature_column.numeric_column('x', shape=(2,))
-    if self._is_fc_v2:
-      x_column = feature_column_v2.numeric_column('x', shape=(2,))
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=(x_column,),
+        feature_columns=(feature_column.numeric_column('x', shape=(2,)),),
         label_vocabulary=label_vocabulary,
         n_classes=3,
         model_dir=self._model_dir)
@@ -1546,9 +1453,8 @@ class BaseDNNClassifierPredictTest(object):
 
 class BaseDNNRegressorPredictTest(object):
 
-  def __init__(self, dnn_regressor_fn, is_fc_v2=False):
+  def __init__(self, dnn_regressor_fn):
     self._dnn_regressor_fn = dnn_regressor_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1567,12 +1473,9 @@ class BaseDNNRegressorPredictTest(object):
         global_step=0,
         model_dir=self._model_dir)
 
-    x_column = feature_column.numeric_column('x')
-    if self._is_fc_v2:
-      x_column = feature_column_v2.numeric_column('x')
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=(x_column,),
+        feature_columns=(feature_column.numeric_column('x'),),
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
         x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
@@ -1592,12 +1495,9 @@ class BaseDNNRegressorPredictTest(object):
                                                [.3, -.3,
                                                 .0]),), 100, self._model_dir)
 
-    x_column = feature_column.numeric_column('x', shape=(2,))
-    if self._is_fc_v2:
-      x_column = feature_column_v2.numeric_column('x', shape=(2,))
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=(x_column,),
+        feature_columns=(feature_column.numeric_column('x', shape=(2,)),),
         label_dimension=3,
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
@@ -1694,9 +1594,8 @@ def _assert_simple_summary(testcase, expected_values, actual_summary):
 
 class BaseDNNClassifierTrainTest(object):
 
-  def __init__(self, dnn_classifier_fn, is_fc_v2=False):
+  def __init__(self, dnn_classifier_fn):
     self._dnn_classifier_fn = dnn_classifier_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1707,13 +1606,10 @@ class BaseDNNClassifierTrainTest(object):
       shutil.rmtree(self._model_dir)
 
   def test_from_scratch_with_default_optimizer_binary(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         model_dir=self._model_dir)
 
     # Train for a few steps, then validate final checkpoint.
@@ -1725,14 +1621,11 @@ class BaseDNNClassifierTrainTest(object):
         output_units=1, model_dir=self._model_dir)
 
   def test_from_scratch_with_default_optimizer_multi_class(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     n_classes = 3
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         n_classes=n_classes,
         model_dir=self._model_dir)
 
@@ -1745,15 +1638,12 @@ class BaseDNNClassifierTrainTest(object):
         output_units=n_classes, model_dir=self._model_dir)
 
   def test_from_scratch_validate_summary(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     opt = mock_optimizer(
         self, hidden_units=hidden_units)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1777,9 +1667,6 @@ class BaseDNNClassifierTrainTest(object):
       self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
 
   def test_binary_classification(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     base_global_step = 100
     hidden_units = (2, 2)
     create_checkpoint(
@@ -1795,7 +1682,7 @@ class BaseDNNClassifierTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1826,9 +1713,6 @@ class BaseDNNClassifierTrainTest(object):
         hidden_units=hidden_units, output_units=1, model_dir=self._model_dir)
 
   def test_binary_classification_float_labels(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     base_global_step = 100
     hidden_units = (2, 2)
     create_checkpoint(
@@ -1844,7 +1728,7 @@ class BaseDNNClassifierTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1857,9 +1741,6 @@ class BaseDNNClassifierTrainTest(object):
     self.assertEqual(1, opt.minimize.call_count)
 
   def test_multi_class(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     n_classes = 3
     base_global_step = 100
     hidden_units = (2, 2)
@@ -1878,7 +1759,7 @@ class BaseDNNClassifierTrainTest(object):
     dnn_classifier = self._dnn_classifier_fn(
         n_classes=n_classes,
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1912,9 +1793,8 @@ class BaseDNNClassifierTrainTest(object):
 
 class BaseDNNRegressorTrainTest(object):
 
-  def __init__(self, dnn_regressor_fn, is_fc_v2=False):
+  def __init__(self, dnn_regressor_fn):
     self._dnn_regressor_fn = dnn_regressor_fn
-    self._is_fc_v2 = is_fc_v2
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1925,13 +1805,10 @@ class BaseDNNRegressorTrainTest(object):
       shutil.rmtree(self._model_dir)
 
   def test_from_scratch_with_default_optimizer(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         model_dir=self._model_dir)
 
     # Train for a few steps, then validate final checkpoint.
@@ -1943,14 +1820,11 @@ class BaseDNNRegressorTrainTest(object):
         output_units=1, model_dir=self._model_dir)
 
   def test_from_scratch(self):
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     hidden_units = (2, 2)
     opt = mock_optimizer(self, hidden_units=hidden_units)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1975,9 +1849,6 @@ class BaseDNNRegressorTrainTest(object):
 
   def test_one_dim(self):
     """Asserts train loss for one-dimensional input and logits."""
-    age_column = feature_column.numeric_column('age')
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column('age')
     base_global_step = 100
     hidden_units = (2, 2)
     create_checkpoint(
@@ -1993,7 +1864,7 @@ class BaseDNNRegressorTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(age_column,),
+        feature_columns=(feature_column.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -2040,17 +1911,13 @@ class BaseDNNRegressorTrainTest(object):
     # See that test for calculation of logits.
     # logits = [[-0.48, 0.48, 0.39]]
     # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
-    age_column = feature_column.numeric_column('age', shape=[input_dimension])
-    if self._is_fc_v2:
-      age_column = feature_column_v2.numeric_column(
-          'age', shape=[input_dimension])
-
     expected_loss = 4.3929
     opt = mock_optimizer(
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=[age_column],
+        feature_columns=[
+            feature_column.numeric_column('age', shape=[input_dimension])],
         label_dimension=label_dimension,
         optimizer=opt,
         model_dir=self._model_dir)
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 0d189320da..9984379e9d 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -301,17 +301,17 @@ class InputLayer(object):
                feature_columns,
                weight_collections=None,
                trainable=True,
-               cols_to_vars=None,
-               name='feature_column_input_layer'):
+               cols_to_vars=None):
     """See `input_layer`."""
 
     self._feature_columns = feature_columns
     self._weight_collections = weight_collections
     self._trainable = trainable
     self._cols_to_vars = cols_to_vars
-    self._name = name
     self._input_layer_template = template.make_template(
-        self._name, _internal_input_layer, create_scope_now_=True)
+        'feature_column_input_layer',
+        _internal_input_layer,
+        create_scope_now_=True)
     self._scope = self._input_layer_template.variable_scope
 
   def __call__(self, features):
@@ -323,10 +323,6 @@ class InputLayer(object):
         cols_to_vars=None,
         scope=self._scope)
 
-  @property
-  def name(self):
-    return self._name
-
   @property
   def non_trainable_variables(self):
     return self._input_layer_template.non_trainable_variables
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 289f6d0d14..28c5c82d2c 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -2045,14 +2045,6 @@ class DenseColumn(FeatureColumn):
     pass
 
 
-def is_feature_column_v2(feature_columns):
-  """Returns True if all feature columns are V2."""
-  for feature_column in feature_columns:
-    if not isinstance(feature_column, FeatureColumn):
-      return False
-  return True
-
-
 def _create_weighted_sum(column,
                          transformation_cache,
                          state_manager,
@@ -2790,12 +2782,6 @@ class SharedEmbeddingStateManager(Layer):
     return self._var_dict[name]
 
 
-def maybe_create_shared_state_manager(feature_columns):
-  if is_feature_column_v2(feature_columns):
-    return SharedEmbeddingStateManager()
-  return None
-
-
 class SharedEmbeddingColumn(
     DenseColumn, SequenceDenseColumn,
     collections.namedtuple(
-- 
GitLab


From 42832c2d44ae1845354afed2c22841555645ebe8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 08:20:47 -0700
Subject: [PATCH 0376/1357] Treat kDomain instruction as a pure pass-through in
 HloValue

It doesn't access the data in any way similarly to kTuple so it should
be handled the same way.

PiperOrigin-RevId: 213630620
---
 tensorflow/compiler/xla/service/hlo_value.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc
index 773fc7d225..8549487702 100644
--- a/tensorflow/compiler/xla/service/hlo_value.cc
+++ b/tensorflow/compiler/xla/service/hlo_value.cc
@@ -131,6 +131,7 @@ bool MayUseOperandValue(int64 operand_number, const ShapeIndex& index,
       CHECK_LE(operand_number, 2);
       return operand_number == 0 || index.empty();
 
+    case HloOpcode::kDomain:
     case HloOpcode::kTuple:
       // These instructions always pass through their operands transparently.
       return false;
-- 
GitLab


From e8cb8f2ae3122191418c07fd306d3494dcd0d582 Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Wed, 19 Sep 2018 09:09:27 -0700
Subject: [PATCH 0377/1357] Add build rules for mnist_softmax_xla.py so it can
 work internally.

PiperOrigin-RevId: 213637804
---
 tensorflow/examples/tutorials/mnist/BUILD | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD
index d4070fdd1e..99da44d6d5 100644
--- a/tensorflow/examples/tutorials/mnist/BUILD
+++ b/tensorflow/examples/tutorials/mnist/BUILD
@@ -83,6 +83,18 @@ py_binary(
     ],
 )
 
+py_binary(
+    name = "mnist_softmax_xla",
+    srcs = [
+        "mnist_softmax_xla.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":input_data",
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
 py_binary(
     name = "mnist_deep",
     srcs = [
-- 
GitLab


From 1d2e3ba88d56c0e76487042365c07a52eaa94424 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 09:28:33 -0700
Subject: [PATCH 0378/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213640434
---
 .../internal/optimized/depthwiseconv_uint8.h  | 20 ++++----
 .../depthwiseconv_uint8_3x3_filter.h          | 26 +++++-----
 .../internal/optimized/optimized_ops.h        | 47 ++++++++++---------
 .../internal/reference/depthwiseconv_uint8.h  |  8 +++-
 .../internal/reference/reference_ops.h        | 34 +++++++++-----
 5 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index ee3fe78a10..f892b8f661 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -24,6 +24,9 @@ limitations under the License.
 namespace tflite {
 namespace optimized_ops {
 
+// TODO(b/80418076): Move to legacy ops file, along with invocations.
+static constexpr int kDepthwiseReverseShift = -1;
+
 // Implementation of quantized DepthwiseConv
 
 template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
@@ -1712,8 +1715,8 @@ inline void DepthwiseConv(
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
 #ifdef USE_NEON
-  const bool shift_left = (output_shift <= 0);
-  const int32 multiplier_power_of_two = shift_left ? (1 << -output_shift) : 1;
+  const bool shift_left = (output_shift > 0);
+  const int32 multiplier_power_of_two = shift_left ? (1 << output_shift) : 1;
 #endif
   TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
   TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -1872,7 +1875,7 @@ inline void DepthwiseConv(
               acc[j] = vqrdmulhq_n_s32(acc[j], output_multiplier);
             }
             for (int j = 0; j < 4; j++) {
-              acc[j] = RoundingDivideByPOT(acc[j], output_shift);
+              acc[j] = RoundingDivideByPOT(acc[j], -output_shift);
             }
           } else {
             // Fixed-point multiplication.
@@ -1916,8 +1919,8 @@ inline void DepthwiseConv(
             acc0 = vqrdmulhq_n_s32(acc0, output_multiplier);
             acc1 = vqrdmulhq_n_s32(acc1, output_multiplier);
             // Rounding right shift.
-            acc0 = RoundingDivideByPOT(acc0, output_shift);
-            acc1 = RoundingDivideByPOT(acc1, output_shift);
+            acc0 = RoundingDivideByPOT(acc0, -output_shift);
+            acc1 = RoundingDivideByPOT(acc1, -output_shift);
           } else {
             // Fixed-point multiplication.
             acc0 = vmulq_n_s32(acc0, multiplier_power_of_two);
@@ -1953,7 +1956,7 @@ inline void DepthwiseConv(
             // Fixed-point multiplication.
             acc = vqrdmulhq_n_s32(acc, output_multiplier);
             // Rounding right shift.
-            acc = RoundingDivideByPOT(acc, output_shift);
+            acc = RoundingDivideByPOT(acc, -output_shift);
           } else {
             // Fixed-point multiplication.
             acc = vmulq_n_s32(acc, multiplier_power_of_two);
@@ -1980,7 +1983,7 @@ inline void DepthwiseConv(
         for (; i < num_output_values; i++) {
           int32 acc = acc_buffer[i];
           acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              -output_shift);
+                                              output_shift);
           acc += output_offset;
           acc = std::max(acc, output_activation_min);
           acc = std::min(acc, output_activation_max);
@@ -2020,7 +2023,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kDepthwiseReverseShift * output_shift;
 
   DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
index e14d04ad02..4809ddd02a 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@@ -49,7 +49,7 @@ struct DepthwiseConvParams {
   int32 output_multiplier;
   int32 output_activation_min;
   int32 output_activation_max;
-  int32 output_shift;
+  int32 output_right_shift;
   int32 input_width;
   int32 input_height;
   int32 stride_width;
@@ -75,7 +75,7 @@ struct DepthwiseConvParams {
 #define OFFSET_OUTPUT_MULTIPLIER 52
 #define OFFSET_OUTPUT_ACTIVATION_MIN 56
 #define OFFSET_OUTPUT_ACTIVATION_MAX 60
-#define OFFSET_OUTPUT_SHIFT 64
+#define OFFSET_OUTPUT_RIGHT_SHIFT 64
 #define OFFSET_INPUT_WIDTH 68
 #define OFFSET_INPUT_HEIGHT 72
 #define OFFSET_STRIDE_WIDTH 76
@@ -105,8 +105,8 @@ static_assert(offsetof(DepthwiseConvParams, output_activation_min) ==
                   OFFSET_OUTPUT_ACTIVATION_MIN, "");
 static_assert(offsetof(DepthwiseConvParams, output_activation_max) ==
                   OFFSET_OUTPUT_ACTIVATION_MAX, "");
-static_assert(offsetof(DepthwiseConvParams, output_shift) ==
-                  OFFSET_OUTPUT_SHIFT, "");
+static_assert(offsetof(DepthwiseConvParams, output_right_shift) ==
+                  OFFSET_OUTPUT_RIGHT_SHIFT, "");
 static_assert(offsetof(DepthwiseConvParams, input_width) ==
                   OFFSET_INPUT_WIDTH, "");
 static_assert(offsetof(DepthwiseConvParams, input_height) ==
@@ -189,7 +189,7 @@ struct DepthwiseConvWindow<8, 1, 1> {
         "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_MULTIPLIER) "]\n"
         "ldr w2, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n"
         "dup v27.4s, w9\n"
-        "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n"
+        "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_RIGHT_SHIFT) "]\n"
         "dup v29.4s, w2\n"
         "ldr w4, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n"
         "dup v30.4s, w4\n"
@@ -1166,7 +1166,7 @@ struct DepthwiseConvWindow<8, 2, 2> {
         // values from time to time when there are not enough NEON registers.
         // We use x9--x15 general purpose registers as they are caller-saved
         // temporary registers (see http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf).  // NOLINT
-        "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n"
+        "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_RIGHT_SHIFT) "]\n"
         "ldr w0, [%[params_ptr], #" STR(OFFSET_INPUT_OFFSET) "]\n"
         "cmp %w[output_window_height], #2\n"
         "dup v28.8h, w0\n"
@@ -2216,7 +2216,7 @@ struct DepthwiseConvPartial<EdgeType::kCenter, 1, 1> {
         "dup v27.4s, w10\n"
         "ld1 {v0.8b}, [%[filter_ptr]], #8\n"
         "cmp x11, #16\n"
-        "ldr w10, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n"
+        "ldr w10, [%[params_ptr], #" STR(OFFSET_OUTPUT_RIGHT_SHIFT) "]\n"
         "dup v28.4s, w9\n"
         "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n"
         "neg w10, w10\n"
@@ -2355,7 +2355,7 @@ struct DepthwiseConvPartial<EdgeType::kCorner, 1, 1> {
         "dup v26.8h, w6\n"
         "ldr w6, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n"
         "dup v27.4s, w7\n"
-        "ldr w7, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n"
+        "ldr w7, [%[params_ptr], #" STR(OFFSET_OUTPUT_RIGHT_SHIFT) "]\n"
         "dup v28.4s, w6\n"
         "ldr w6, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n"
         "neg w7, w7\n"
@@ -2532,7 +2532,7 @@ struct DepthwiseConvPartial<EdgeType::kHorizontal, 1, 1> {
         "dup v26.8h, w12\n"
         "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n"
         "dup v27.4s, w13\n"
-        "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n"
+        "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_RIGHT_SHIFT) "]\n"
         "dup v28.4s, w12\n"
         "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n"
         "neg w13, w13\n"
@@ -2739,7 +2739,7 @@ struct DepthwiseConvPartial<EdgeType::kVertical, 1, 1> {
         "dup v26.8h, w12\n"
         "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n"
         "dup v27.4s, w13\n"
-        "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n"
+        "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_RIGHT_SHIFT) "]\n"
         "dup v28.4s, w12\n"
         "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n"
         "neg w13, w13\n"
@@ -2910,7 +2910,7 @@ struct DepthwiseConvPartial<EdgeType::kVertical, 1, 1> {
 #undef OFFSET_OUTPUT_MULTIPLIER
 #undef OFFSET_OUTPUT_ACTIVATION_MIN
 #undef OFFSET_OUTPUT_ACTIVATION_MAX
-#undef OFFSET_OUTPUT_SHIFT
+#undef OFFSET_OUTPUT_RIGHT_SHIFT
 #undef OFFSET_INPUT_WIDTH
 #undef OFFSET_INPUT_HEIGHT
 #undef OFFSET_OUTPUT_WIDTH
@@ -3194,7 +3194,7 @@ inline bool Fast3x3FilterKernelSupported(
       (stride_height == 1 || stride_height == 2) &&
       (stride_width == stride_height) && (pad_width == 0 || pad_width == 1) &&
       (pad_height == 0 || pad_height == 1) && (pad_width == pad_height) &&
-      (input_depth % 8) == 0 && (output_shift > 0) &&
+      (input_depth % 8) == 0 && (output_shift <= 0) &&
       dilation_width_factor == 1 && dilation_height_factor == 1;
 
   if (!supported) {
@@ -3272,7 +3272,7 @@ inline void DepthwiseConv3x3Filter(
   params.output_offset = output_offset;
   params.filter_offset = filter_offset;
   params.output_multiplier = output_multiplier;
-  params.output_shift = output_shift;
+  params.output_right_shift = -output_shift;
   params.output_activation_min = output_activation_min;
   params.output_activation_max = output_activation_max;
 
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 6f4e135c94..6a7e664e85 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -88,7 +88,7 @@ using reference_ops::Transpose;
 // This will be phased out as the shifts are revised with more thought. Use of a
 // constant enables us to track progress on this work.
 //
-// Used mainly to convert from old-style shifts (right) to new-style (left).
+// Used to convert from old-style shifts (right) to new-style (left).
 static constexpr int kReverseShift = -1;
 
 // Make a local VectorMap typedef allowing to map a float array
@@ -977,7 +977,7 @@ inline void FullyConnectedAsGEMV(
   const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
                                       output_shape, output_dim_count - 1);
   static constexpr int kPeel = 4;
-  const bool shift_left = (output_shift <= 0);
+  const bool shift_left = (output_shift > 0);
   for (int k = 0; k < input_size; k += 64) {
     optimized_ops_preload_l1_stream(input_data + k);
   }
@@ -1090,7 +1090,7 @@ inline void FullyConnectedAsGEMV(
     bias_ptr += 4;
     reduced = vaddq_s32(reduced, bias_vec);
     if (shift_left) {
-      const int32 multiplier_power_of_two = 1 << -output_shift;
+      const int32 multiplier_power_of_two = 1 << output_shift;
       reduced = vmulq_n_s32(reduced, multiplier_power_of_two);
       reduced = vqrdmulhq_n_s32(reduced, output_multiplier);
     } else {
@@ -1098,7 +1098,7 @@ inline void FullyConnectedAsGEMV(
       reduced = vqrdmulhq_n_s32(reduced, output_multiplier);
       // Rounding-shift-right.
       using gemmlowp::RoundingDivideByPOT;
-      reduced = RoundingDivideByPOT(reduced, output_shift);
+      reduced = RoundingDivideByPOT(reduced, -output_shift);
     }
     // Add the output offset.
     const int32x4_t output_offset_vec = vdupq_n_s32(output_offset);
@@ -1195,7 +1195,7 @@ inline void FullyConnected(
   gemmlowp::MatrixMap<uint8, gemmlowp::MapOrder::ColMajor> output_matrix(
       output_data, output_rows, batches, output_rows);
   const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp(
-      bias_data, output_rows, output_offset, output_multiplier, -output_shift,
+      bias_data, output_rows, output_offset, output_multiplier, output_shift,
       output_activation_min, output_activation_max);
   gemmlowp::GemmWithOutputPipeline<uint8, uint8,
                                    gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
@@ -1219,7 +1219,8 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -1274,14 +1275,14 @@ inline void FullyConnected(
     if (filter_offset == -128 && !(output_depth % 4) && !(accum_depth % 64)) {
       GEMVForLstmCellWithSymmetricRange(
           input_shape, input_data, filter_shape, filter_data, bias_shape,
-          bias_data_int32, output_multiplier, -output_shift, output_shape,
+          bias_data_int32, output_multiplier, output_shift, output_shape,
           output_data);
       return;
     }
     if (!(output_depth % 4) && !(accum_depth % 8)) {
       GEMVForLstmCell(input_shape, input_data, filter_shape, filter_data,
                       filter_offset, bias_shape, bias_data_int32,
-                      output_multiplier, -output_shift, output_shape,
+                      output_multiplier, output_shift, output_shape,
                       output_data);
       return;
     }
@@ -1302,7 +1303,7 @@ inline void FullyConnected(
   scale_stage.result_offset_after_shift = 0;
   scale_stage.result_fixedpoint_multiplier = output_multiplier;
   // Note that this shift is negated wrt ordinary FC.
-  scale_stage.result_exponent = -output_shift;
+  scale_stage.result_exponent = output_shift;
   gemmlowp::OutputStageClamp clamp_stage;
   clamp_stage.min = output_activation_min;
   clamp_stage.max = output_activation_max;
@@ -1330,7 +1331,8 @@ inline void FullyConnected(
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -1376,8 +1378,8 @@ inline void ShuffledFullyConnectedWorkerImpl(
 #if defined USE_NEON
   const int8* shuffled_weights_ptr = shuffled_weights_data;
   if (batches == 1) {
-    const int right_shift = output_shift > 0 ? output_shift : 0;
-    const int left_shift = output_shift > 0 ? 0 : -output_shift;
+    const int right_shift = output_shift > 0 ? 0 : -output_shift;
+    const int left_shift = output_shift > 0 ? output_shift : 0;
     for (int c = 0; c < output_depth; c += 4) {
       // Accumulation loop.
       int32x4_t row_accum0 = vdupq_n_s32(0);
@@ -1443,8 +1445,8 @@ inline void ShuffledFullyConnectedWorkerImpl(
       vst1_s16(output_data + c, res16);
     }
   } else if (batches == 4) {
-    const int right_shift = output_shift > 0 ? output_shift : 0;
-    const int left_shift = output_shift > 0 ? 0 : -output_shift;
+    const int right_shift = output_shift > 0 ? 0 : -output_shift;
+    const int left_shift = output_shift > 0 ? output_shift : 0;
     for (int c = 0; c < output_depth; c += 4) {
       const int8* shuffled_input_ptr =
           reinterpret_cast<const int8*>(shuffled_input_workspace_data);
@@ -1575,8 +1577,8 @@ inline void ShuffledFullyConnectedWorkerImpl(
         // (16-bit, typically 3 integer bits) fixed-point format. The quantized
         // multiplier and shift here have been pre-computed offline
         // (e.g. by toco).
-        acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                            -output_shift);
+        acc =
+            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
         // Saturate, cast to int16, and store to output array.
         acc = std::max(acc, -32768);
         acc = std::min(acc, 32767);
@@ -1627,7 +1629,7 @@ inline void ShuffledFullyConnectedWorkerImpl(
           // quantized multiplier and shift here have been pre-computed offline
           // (e.g. by toco).
           acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              -output_shift);
+                                              output_shift);
           // Saturate, cast to int16, and store to output array.
           acc = std::max(acc, -32768);
           acc = std::min(acc, 32767);
@@ -1818,7 +1820,8 @@ inline void ShuffledFullyConnected(
     uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
   tflite::FullyConnectedParams op_params;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -2437,7 +2440,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   gemmlowp::MatrixMap<uint8, gemmlowp::MapOrder::ColMajor> output_matrix(
       output_data, output_rows, output_cols);
   const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp(
-      bias_data, output_rows, output_offset, output_multiplier, -output_shift,
+      bias_data, output_rows, output_offset, output_multiplier, output_shift,
       output_activation_min, output_activation_max);
   gemmlowp::GemmWithOutputPipeline<uint8, uint8,
                                    gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
@@ -2471,7 +2474,8 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -2792,6 +2796,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input,
     *output_inv_sqrt <<= -*output_shift;
     *output_shift = 0;
   }
+  // Convert right shift (right is positive) to left shift.
   *output_shift *= kReverseShift;
 }
 
@@ -5018,7 +5023,7 @@ inline void LogSoftmax(const SoftmaxParams& params,
         std::max(diff_min - 1,  // Note use of > below instead of >= above.
                  MultiplyByQuantizedMultiplierSmallerThanOneExp(
                      rescaled_diff_min, reverse_scaling_divisor,
-                     kReverseShift * reverse_scaling_right_shift));
+                     -reverse_scaling_right_shift));
 
     for (int c = 0; c < depth; ++c) {
       int32 input_diff = static_cast<int32>(block_input_data[c]) - max_in_row;
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
index 38aea14c21..ecc655cf99 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -26,6 +26,9 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
+// TODO(b/80418076): Move to legacy ops file, along with invocations.
+static constexpr int kDepthwiseReverseShift = -1;
+
 inline void DepthwiseConv(
     const DepthwiseParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
@@ -95,7 +98,7 @@ inline void DepthwiseConv(
               acc += bias_data[oc];
             }
             acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                                -output_shift);
+                                                output_shift);
             acc += output_offset;
             acc = std::max(acc, output_activation_min);
             acc = std::min(acc, output_activation_max);
@@ -137,7 +140,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kDepthwiseReverseShift * output_shift;
 
   DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 87bcc8c219..d315debdda 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -384,7 +384,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
             acc += bias_data[out_channel];
           }
           acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              kReverseShift * output_shift);
+                                              output_shift);
           acc += output_offset;
           acc = std::max(acc, output_activation_min);
           acc = std::min(acc, output_activation_max);
@@ -422,7 +422,8 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -714,8 +715,7 @@ inline void FullyConnected(
       if (bias_data) {
         acc += bias_data[out_c];
       }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                          kReverseShift * output_shift);
+      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
       acc += output_offset;
       acc = std::max(acc, output_activation_min);
       acc = std::min(acc, output_activation_max);
@@ -740,7 +740,8 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -793,8 +794,8 @@ inline void FullyConnected(
       // (16-bit, typically 3 integer bits) fixed-point format. The quantized
       // multiplier and shift here have been pre-computed offline
       // (e.g. by toco).
-      accum = MultiplyByQuantizedMultiplier(accum, output_multiplier,
-                                            -output_shift);
+      accum =
+          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
       // Saturate, cast to int16, and store to output array.
       accum = std::max(accum, output_activation_min - output_offset);
       accum = std::min(accum, output_activation_max - output_offset);
@@ -820,7 +821,8 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -919,8 +921,8 @@ inline void ShuffledFullyConnected(
         // (16-bit, typically 3 integer bits) fixed-point format. The quantized
         // multiplier and shift here have been pre-computed offline
         // (e.g. by toco).
-        acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                            -output_shift);
+        acc =
+            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
         // Saturate, cast to int16, and store to output array.
         acc = std::max(acc, output_activation_min);
         acc = std::min(acc, output_activation_max);
@@ -971,7 +973,7 @@ inline void ShuffledFullyConnected(
           // quantized multiplier and shift here have been pre-computed offline
           // (e.g. by toco).
           acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              -output_shift);
+                                              output_shift);
           // Saturate, cast to int16, and store to output array.
           acc = std::max(acc, output_activation_min);
           acc = std::min(acc, output_activation_max);
@@ -996,7 +998,8 @@ inline void ShuffledFullyConnected(
     uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
   tflite::FullyConnectedParams op_params;
   op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
@@ -1154,6 +1157,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input,
     *output_inv_sqrt <<= -*output_shift;
     *output_shift = 0;
   }
+  // Convert right shift (right is positive) to left shift.
   *output_shift *= kReverseShift;
 }
 
@@ -3464,7 +3468,7 @@ inline void LogSoftmax(const SoftmaxParams& params,
         std::max(diff_min - 1,  // Note use of > below instead of >= above.
                  MultiplyByQuantizedMultiplierSmallerThanOneExp(
                      rescaled_diff_min, reverse_scaling_divisor,
-                     kReverseShift * reverse_scaling_right_shift));
+                     -reverse_scaling_right_shift));
 
     for (int c = 0; c < depth; ++c) {
       int32 input_diff =
@@ -4959,9 +4963,11 @@ inline void Comparison(int left_shift, const T* input1_data,
   op_params.left_shift = left_shift;
   op_params.input1_offset = input1_offset;
   op_params.input1_multiplier = input1_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
   op_params.input1_shift = kReverseShift * input1_shift;
   op_params.input2_offset = input2_offset;
   op_params.input2_multiplier = input2_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
   op_params.input2_shift = kReverseShift * input2_shift;
 
   ComparisonWithScaling<T, F>(op_params, DimsToShape(input1_dims), input1_data,
@@ -5093,9 +5099,11 @@ inline void BroadcastComparison(int left_shift, const T* input1_data,
   op_params.left_shift = left_shift;
   op_params.input1_offset = input1_offset;
   op_params.input1_multiplier = input1_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
   op_params.input1_shift = kReverseShift * input1_shift;
   op_params.input2_offset = input2_offset;
   op_params.input2_multiplier = input2_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
   op_params.input2_shift = kReverseShift * input2_shift;
 
   BroadcastComparison4DSlowWithScaling<T, F>(
-- 
GitLab


From 414ca1cda5aec72b48d5da127f61b0d05fbdc22c Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Wed, 19 Sep 2018 10:20:33 -0700
Subject: [PATCH 0379/1357] [XLA:CPU] Add an emitter for erfinv(double) and
 erfinv(half).

This is used by the random number generator. Same algorithm as for float, just with more
precision. fp16 is upcasted to fp32 and then processed with the float algorithm.

PiperOrigin-RevId: 213648736
---
 tensorflow/compiler/tests/random_ops_test.py  |  16 +-
 .../tests/stateless_random_ops_test.py        |   7 +-
 tensorflow/compiler/tf2xla/xla_cpu_backend.cc |  11 --
 .../xla/service/elemental_ir_emitter.cc       | 169 +++++++++++++-----
 4 files changed, 136 insertions(+), 67 deletions(-)

diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py
index 4932819585..c423fa5004 100644
--- a/tensorflow/compiler/tests/random_ops_test.py
+++ b/tensorflow/compiler/tests/random_ops_test.py
@@ -69,16 +69,14 @@ class RandomOpsTest(xla_test.XLATestCase):
     def rng(dtype):
       return random_ops.random_normal(shape=[2], dtype=dtype)
 
-    # TODO(b/34339814): implement inverse erf support for non-F32 types.
-    dtype = dtypes.float32
-    self._testRngIsNotConstant(rng, dtype)
+    for dtype in self._random_types() & self.float_types:
+      self._testRngIsNotConstant(rng, dtype)
 
   def testRandomUniformIsInRange(self):
     for dtype in self._random_types():
       # TODO (b/112272078): enable bfloat16 for CPU and GPU when the bug is
       # fixed.
-      if (self.device in ["XLA_GPU", "XLA_CPU"
-                         ]) and (dtype in [dtypes.bfloat16, dtypes.half]):
+      if (self.device in ["XLA_GPU", "XLA_CPU"]) and (dtype == dtypes.bfloat16):
         continue
       with self.cached_session() as sess:
         with self.test_scope():
@@ -93,13 +91,13 @@ class RandomOpsTest(xla_test.XLATestCase):
     def rng(dtype):
       return random_ops.truncated_normal(shape=[2], dtype=dtype)
 
-    # TODO(b/34339814): implement inverse erf support for non-F32 types.
-    self._testRngIsNotConstant(rng, dtypes.float32)
+    for dtype in self._random_types() & self.float_types:
+      self._testRngIsNotConstant(rng, dtype)
 
   def testTruncatedNormalIsInRange(self):
     count = 10000000
-    # TODO(b/34339814): implement inverse erf support for non-F32 types.
-    for dtype in [dtypes.float32]:
+    # TODO(b/34339814): make this test work with 16 bit float types.
+    for dtype in self._random_types() & {dtypes.float32, dtypes.float64}:
       with self.cached_session() as sess:
         with self.test_scope():
           x = random_ops.truncated_normal(shape=[count], dtype=dtype)
diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py
index 1bea7d9355..f3861043b2 100644
--- a/tensorflow/compiler/tests/stateless_random_ops_test.py
+++ b/tensorflow/compiler/tests/stateless_random_ops_test.py
@@ -34,7 +34,7 @@ class StatelessRandomOpsTest(xla_test.XLATestCase):
   """Test cases for stateless random-number generator operators."""
 
   def _random_types(self):
-    return [dtypes.float32]
+    return self.float_types & {dtypes.float32, dtypes.float64}
 
   def testDeterminism(self):
     # Stateless values should be equal iff the seeds are equal (roughly)
@@ -124,8 +124,7 @@ class StatelessRandomOpsTest(xla_test.XLATestCase):
         self.assertTrue(self._anderson_darling(y) < 2.492)
 
   def testTruncatedNormalIsInRange(self):
-    # TODO(b/34339814): implement inverse erf support for non-F32 types.
-    for dtype in [dtypes.float32]:
+    for dtype in self._random_types():
       with self.cached_session() as sess, self.test_scope():
         seed_t = array_ops.placeholder(dtypes.int32, shape=[2])
         n = 10000000
@@ -159,7 +158,7 @@ class StatelessRandomOpsTest(xla_test.XLATestCase):
         # Department of Scientific Computing website. Florida State University.
         expected_mean = mu + (normal_pdf(alpha) - normal_pdf(beta)) / z * sigma
         actual_mean = np.mean(y)
-        self.assertAllClose(actual_mean, expected_mean, atol=2e-4)
+        self.assertAllClose(actual_mean, expected_mean, atol=5e-4)
 
         expected_median = mu + probit(
             (normal_cdf(alpha) + normal_cdf(beta)) / 2.) * sigma
diff --git a/tensorflow/compiler/tf2xla/xla_cpu_backend.cc b/tensorflow/compiler/tf2xla/xla_cpu_backend.cc
index ead229aacc..bc44301d40 100644
--- a/tensorflow/compiler/tf2xla/xla_cpu_backend.cc
+++ b/tensorflow/compiler/tf2xla/xla_cpu_backend.cc
@@ -20,17 +20,6 @@ limitations under the License.
 namespace tensorflow {
 
 bool CpuOpFilter(KernelDef* kdef) {
-  // TODO(b/34339814): implement inverse erf for double types and remove this
-  // workaround.
-  if (kdef->op() == "RandomStandardNormal") {
-    kdef->clear_constraint();
-    // Change the type constraint to permit only DTD_FLOAT.
-    KernelDef::AttrConstraint* attr_constraint = kdef->add_constraint();
-    attr_constraint->set_name("dtype");
-    attr_constraint->mutable_allowed_values()->mutable_list()->add_type(
-        DT_FLOAT);
-    return true;
-  }
   if (kdef->op() == "Const") {
     AddDtypeToKernalDefConstraint("dtype", DT_STRING, kdef);
   }
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 4bb1e071d8..515267edd7 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -847,29 +847,34 @@ llvm::Value* ElementalIrEmitter::EmitFloatMin(llvm::Value* lhs_value,
 
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfInv(PrimitiveType prim_type,
                                                       llvm::Value* x) {
-  if (prim_type != F32) {
-    // TODO(b/34339814): Implement inverse erf for F64.
+  if (prim_type != F16 && prim_type != F32 && prim_type != F64) {
     return Unimplemented(
         "Inverse erf is only implemented for element "
-        "type F32.");
+        "types F16, F32 and F64.");
   }
-  auto getFloat = [&](const float f) {
-    return llvm::ConstantFP::get(b_->getFloatTy(), f);
+
+  // Upcast half to float.
+  if (prim_type == F16) {
+    x = b_->CreateFPExt(x, b_->getFloatTy());
+  }
+
+  auto get_float = [&](const double f) {
+    return llvm::ConstantFP::get(x->getType(), f);
   };
-  auto multiply_add = [&](absl::Span<const float> coefficients,
+  auto multiply_add = [&](absl::Span<const double> coefficients,
                           llvm::Value* w) {
-    llvm::Value* p = getFloat(coefficients.front());
+    llvm::Value* p = get_float(coefficients.front());
     coefficients.remove_prefix(1);
     for (float coefficient : coefficients) {
-      p = FAdd(FMul(p, w), getFloat(coefficient));
+      p = FAdd(FMul(p, w), get_float(coefficient));
     }
     return p;
   };
 
   // Approximation for inverse error function from
   //   Giles, M., "Approximating the erfinv function".
-  // The approximation has the form:
-  //   w = log((1-x)*(1+x))
+  // The approximation has the form (float version):
+  //   w = -log((1-x)*(1+x))
   //   if ( w < 5 ) {
   //     w = w - 2.5
   //     p = sum_{i=1}^n lq[i]*w^i
@@ -879,46 +884,124 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfInv(PrimitiveType prim_type,
   //   }
   //   return p*x
   llvm::Function* logf_fn = llvm::Intrinsic::getDeclaration(
-      module_, llvm::Intrinsic::log, {b_->getFloatTy()});
+      module_, llvm::Intrinsic::log, {x->getType()});
 
-  llvm::Value* w = FNeg(
-      Call(logf_fn, {FMul(FSub(getFloat(1.0f), x), FAdd(getFloat(1.0f), x))}));
+  llvm::Value* w = FNeg(Call(
+      logf_fn, {FMul(FSub(get_float(1.0f), x), FAdd(get_float(1.0f), x))}));
 
   llvm::Value* p_addr =
-      llvm_ir::EmitAllocaAtFunctionEntry(b_->getFloatTy(), "p.addr", b_);
+      llvm_ir::EmitAllocaAtFunctionEntry(x->getType(), "p.addr", b_);
+
+  if (prim_type == F16 || prim_type == F32) {
+    llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
+        FCmpOLT(w, get_float(5.0f)), "w_less_than_five", b_);
+    // Handle true BB.
+    SetToFirstInsertPoint(if_data.true_block, b_);
+    {
+      llvm::Value* lw = FSub(w, get_float(2.5f));
+      absl::Span<const double> lq{
+          2.81022636e-08f,  3.43273939e-07f, -3.5233877e-06f,
+          -4.39150654e-06f, 0.00021858087f,  -0.00125372503f,
+          -0.00417768164f,  0.246640727f,    1.50140941f};
+      llvm::Value* p = multiply_add(lq, lw);
+      Store(p, p_addr);
+    }
 
-  llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
-      FCmpOLT(w, getFloat(5.0f)), "w_less_than_five", b_);
-  // Handle true BB.
-  SetToFirstInsertPoint(if_data.true_block, b_);
-  {
-    llvm::Value* lw = FSub(w, getFloat(2.5f));
-    absl::Span<const float> lq{
-        2.81022636e-08f,  3.43273939e-07f, -3.5233877e-06f,
-        -4.39150654e-06f, 0.00021858087f,  -0.00125372503f,
-        -0.00417768164f,  0.246640727f,    1.50140941f};
-    llvm::Value* p = multiply_add(lq, lw);
-    Store(p, p_addr);
-  }
+    // Handle false BB.
+    SetToFirstInsertPoint(if_data.false_block, b_);
+    {
+      llvm::Function* sqrtf_fn = llvm::Intrinsic::getDeclaration(
+          module_, llvm::Intrinsic::sqrt, {b_->getFloatTy()});
+
+      llvm::Value* gw = FSub(Call(sqrtf_fn, w), get_float(3.0f));
+      absl::Span<const double> gq{
+          -0.000200214257f, 0.000100950558f, 0.00134934322f,
+          -0.00367342844f,  0.00573950773f,  -0.0076224613f,
+          0.00943887047f,   1.00167406f,     2.83297682f};
+      llvm::Value* p = multiply_add(gq, gw);
+      Store(p, p_addr);
+    }
 
-  // Handle false BB.
-  SetToFirstInsertPoint(if_data.false_block, b_);
-  {
-    llvm::Function* sqrtf_fn = llvm::Intrinsic::getDeclaration(
-        module_, llvm::Intrinsic::sqrt, {b_->getFloatTy()});
-
-    llvm::Value* gw = FSub(Call(sqrtf_fn, w), getFloat(3.0f));
-    absl::Span<const float> gq{
-        -0.000200214257f, 0.000100950558f, 0.00134934322f,
-        -0.00367342844f,  0.00573950773f,  -0.0076224613f,
-        0.00943887047f,   1.00167406f,     2.83297682f};
-    llvm::Value* p = multiply_add(gq, gw);
-    Store(p, p_addr);
-  }
+    SetToFirstInsertPoint(if_data.after_block, b_);
+  } else {
+    DCHECK(prim_type == F64);
+
+    llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
+        FCmpOLT(w, get_float(6.25)), "w_less_than_6.25", b_);
+
+    SetToFirstInsertPoint(if_data.true_block, b_);
+    {
+      llvm::Value* lw = FSub(w, get_float(3.125));
+      absl::Span<const double> c{
+          -3.6444120640178196996e-21, -1.685059138182016589e-19,
+          1.2858480715256400167e-18,  1.115787767802518096e-17,
+          -1.333171662854620906e-16,  2.0972767875968561637e-17,
+          6.6376381343583238325e-15,  -4.0545662729752068639e-14,
+          -8.1519341976054721522e-14, 2.6335093153082322977e-12,
+          -1.2975133253453532498e-11, -5.4154120542946279317e-11,
+          1.051212273321532285e-09,   -4.1126339803469836976e-09,
+          -2.9070369957882005086e-08, 4.2347877827932403518e-07,
+          -1.3654692000834678645e-06, -1.3882523362786468719e-05,
+          0.0001867342080340571352,   -0.00074070253416626697512,
+          -0.0060336708714301490533,  0.24015818242558961693,
+          1.6536545626831027356};
+      llvm::Value* p = multiply_add(c, lw);
+      Store(p, p_addr);
+    }
 
-  SetToFirstInsertPoint(if_data.after_block, b_);
+    SetToFirstInsertPoint(if_data.false_block, b_);
+    llvm_ir::LlvmIfData if_data_second = llvm_ir::EmitIfThenElse(
+        FCmpOLT(w, get_float(16.0)), "w_less_than_16", b_);
+    SetToFirstInsertPoint(if_data_second.true_block, b_);
+    {
+      llvm::Function* sqrtf_fn = llvm::Intrinsic::getDeclaration(
+          module_, llvm::Intrinsic::sqrt, {b_->getDoubleTy()});
+
+      llvm::Value* gw = FSub(Call(sqrtf_fn, w), get_float(3.25));
+      absl::Span<const double> t1{
+          2.2137376921775787049e-09,  9.0756561938885390979e-08,
+          -2.7517406297064545428e-07, 1.8239629214389227755e-08,
+          1.5027403968909827627e-06,  -4.013867526981545969e-06,
+          2.9234449089955446044e-06,  1.2475304481671778723e-05,
+          -4.7318229009055733981e-05, 6.8284851459573175448e-05,
+          2.4031110387097893999e-05,  -0.0003550375203628474796,
+          0.00095328937973738049703,  -0.0016882755560235047313,
+          0.0024914420961078508066,   -0.0037512085075692412107,
+          0.005370914553590063617,    1.0052589676941592334,
+          3.0838856104922207635};
+      llvm::Value* p = multiply_add(t1, gw);
+      Store(p, p_addr);
+    }
+
+    SetToFirstInsertPoint(if_data_second.false_block, b_);
+    {
+      llvm::Function* sqrtf_fn = llvm::Intrinsic::getDeclaration(
+          module_, llvm::Intrinsic::sqrt, {b_->getDoubleTy()});
+
+      llvm::Value* gw = FSub(Call(sqrtf_fn, w), get_float(5.0));
+      absl::Span<const double> t2{
+          -2.7109920616438573243e-11, -2.5556418169965252055e-10,
+          1.5076572693500548083e-09,  -3.7894654401267369937e-09,
+          7.6157012080783393804e-09,  -1.4960026627149240478e-08,
+          2.9147953450901080826e-08,  -6.7711997758452339498e-08,
+          2.2900482228026654717e-07,  -9.9298272942317002539e-07,
+          4.5260625972231537039e-06,  -1.9681778105531670567e-05,
+          7.5995277030017761139e-05,  -0.00021503011930044477347,
+          -0.00013871931833623122026, 1.0103004648645343977,
+          4.8499064014085844221};
+      llvm::Value* p = multiply_add(t2, gw);
+      Store(p, p_addr);
+    }
+
+    SetToFirstInsertPoint(if_data.after_block, b_);
+  }
   llvm::Value* p = Load(p_addr);
-  return FMul(p, x);
+  x = FMul(p, x);
+  // Trunc back to half if needed.
+  if (prim_type == F16) {
+    x = b_->CreateFPTrunc(x, b_->getHalfTy());
+  }
+  return x;
 }
 
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfcInv(PrimitiveType prim_type,
-- 
GitLab


From 5d5bc6d2b592374d7862cdebbc53e07b47e29c95 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 10:32:48 -0700
Subject: [PATCH 0380/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213651158
---
 .../internal/reference/reference_ops.h        | 292 +++++++++++++-----
 1 file changed, 210 insertions(+), 82 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index d315debdda..76fa1944bc 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -2412,56 +2412,90 @@ void DepthConcatenation(const Scalar* const* input_data,
                             output_data, output_dims);
 }
 
-inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
-                     const float* prev_activ_data,
-                     const Dims<4>& prev_activ_dims, const float* weights_data,
-                     const Dims<4>& weights_dims, const float* bias_data,
-                     const Dims<4>& bias_dims, const float* prev_state_data,
-                     const Dims<4>& prev_state_dims, float* output_state_data,
-                     const Dims<4>& output_state_dims, float* output_activ_data,
-                     const Dims<4>& output_activ_dims, float* concat_temp_data,
-                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
-                     const Dims<4>& activ_temp_dims) {
+inline void LstmCell(
+    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
+    const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
+    const float* prev_activ_data, const RuntimeShape& weights_shape,
+    const float* weights_data, const RuntimeShape& unextended_bias_shape,
+    const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
+    const float* prev_state_data,
+    const RuntimeShape& unextended_output_state_shape, float* output_state_data,
+    const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
+    const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
+    const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape =
+      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
   const int batches =
-      MatchingArraySize(input_dims, 3, prev_activ_dims, 3, prev_state_dims, 3,
-                        output_state_dims, 3, output_activ_dims, 3);
+      MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                  output_state_shape, 0, output_activ_shape, 0);
   const int height =
-      MatchingArraySize(input_dims, 2, prev_activ_dims, 2, prev_state_dims, 2,
-                        output_state_dims, 2, output_activ_dims, 2);
+      MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                  output_state_shape, 1, output_activ_shape, 1);
   const int width =
-      MatchingArraySize(input_dims, 1, prev_activ_dims, 1, prev_state_dims, 1,
-                        output_state_dims, 1, output_activ_dims, 1);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int prev_activ_depth = ArraySize(prev_activ_dims, 0);
+      MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                  output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
   const int total_input_depth = prev_activ_depth + input_depth;
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth);
-  TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3),
-                  1);
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
+                   total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
   const int intern_activ_depth =
-      MatchingArraySize(weights_dims, 1, bias_dims, 0);
-  TFLITE_CHECK_EQ(intern_activ_depth % 4, 0);
+      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
+                   intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
   const int output_depth =
-      MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0,
-                        output_state_dims, 0, output_activ_dims, 0);
-  TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4);
+      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                  3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
 
   // Concatenate prev_activ and input data together
   std::vector<float const*> concat_input_arrays_data;
-  std::vector<Dims<4> const*> concat_input_arrays_dims;
+  std::vector<RuntimeShape const*> concat_input_arrays_shapes;
   concat_input_arrays_data.push_back(input_data);
   concat_input_arrays_data.push_back(prev_activ_data);
-  concat_input_arrays_dims.push_back(&input_dims);
-  concat_input_arrays_dims.push_back(&prev_activ_dims);
-  Concatenation<FusedActivationFunctionType::kNone, float>(
-      0, &(concat_input_arrays_data[0]), &(concat_input_arrays_dims[0]),
-      concat_input_arrays_data.size(), concat_temp_data, concat_temp_dims);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]),
+                &(concat_input_arrays_data[0]), concat_temp_shape,
+                concat_temp_data);
 
   // Fully connected
-  FullyConnected<FusedActivationFunctionType::kNone>(
-      concat_temp_data, concat_temp_dims, weights_data, weights_dims, bias_data,
-      bias_dims, activ_temp_data, activ_temp_dims);
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
+                 weights_data, bias_shape, bias_data, activ_temp_shape,
+                 activ_temp_data);
 
   // Memory state update (the LSTM "guts")
   for (int b = 0; b < batches; ++b) {
@@ -2470,24 +2504,24 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
         for (int c = 0; c < output_depth; ++c) {
           const float input_gate =
               1.f /
-              (1.f + std::exp(-activ_temp_data[Offset(
-                         activ_temp_dims, 0 * output_depth + c, w, h, b)]));
+              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
+                                                      0 * output_depth + c)]));
           const float new_input = std::tanh(activ_temp_data[Offset(
-              activ_temp_dims, 1 * output_depth + c, w, h, b)]);
+              activ_temp_shape, b, h, w, 1 * output_depth + c)]);
           const float forget_gate =
               1.f /
-              (1.f + std::exp(-activ_temp_data[Offset(
-                         activ_temp_dims, 2 * output_depth + c, w, h, b)]));
+              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
+                                                      2 * output_depth + c)]));
           const float output_gate =
               1.f /
-              (1.f + std::exp(-activ_temp_data[Offset(
-                         activ_temp_dims, 3 * output_depth + c, w, h, b)]));
+              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
+                                                      3 * output_depth + c)]));
           const float new_state =
               input_gate * new_input +
               forget_gate *
-                  prev_state_data[Offset(prev_state_dims, c, w, h, b)];
-          output_state_data[Offset(output_state_dims, c, w, h, b)] = new_state;
-          output_activ_data[Offset(output_activ_dims, c, w, h, b)] =
+                  prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
               output_gate * std::tanh(new_state);
         }
       }
@@ -2495,6 +2529,31 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
+                     const float* prev_activ_data,
+                     const Dims<4>& prev_activ_dims, const float* weights_data,
+                     const Dims<4>& weights_dims, const float* bias_data,
+                     const Dims<4>& bias_dims, const float* prev_state_data,
+                     const Dims<4>& prev_state_dims, float* output_state_data,
+                     const Dims<4>& output_state_dims, float* output_activ_data,
+                     const Dims<4>& output_activ_dims, float* concat_temp_data,
+                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
+                     const Dims<4>& activ_temp_dims) {
+  tflite::LstmCellParams op_params;
+  // Float LSTM cell does not need parameters to be set: leave untouched.
+
+  LstmCell(op_params, DimsToShape(input_dims), input_data,
+           DimsToShape(prev_activ_dims), prev_activ_data,
+           DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims),
+           bias_data, DimsToShape(prev_state_dims), prev_state_data,
+           DimsToShape(output_state_dims), output_state_data,
+           DimsToShape(output_activ_dims), output_activ_data,
+           DimsToShape(concat_temp_dims), concat_temp_data,
+           DimsToShape(activ_temp_dims), activ_temp_data);
+}
+
 // Quantized LSTM cell implementation.
 // The quantization of the input, output arrays is as follows:
 //  - The input activations are quantized as uint8 on the interval
@@ -2580,52 +2639,90 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
 // aiming for 16-bit fixed-point quantization of these internal nodes here.
 //
 template <int StateIntegerBits>
-void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
-              const uint8* prev_activ_data_uint8,
-              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
-              const Dims<4>& weights_dims, const int32* bias_data_int32,
-              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
-              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
-              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
-              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
-              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
-              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
-              int32 accum_multiplier, int accum_shift,
-              gemmlowp::GemmContext* gemm_context) {
+inline void LstmCell(
+    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
+    const uint8* input_data_uint8,
+    const RuntimeShape& unextended_prev_activ_shape,
+    const uint8* prev_activ_data_uint8, const RuntimeShape& weights_shape,
+    const uint8* weights_data_uint8, const RuntimeShape& unextended_bias_shape,
+    const int32* bias_data_int32,
+    const RuntimeShape& unextended_prev_state_shape,
+    const int16* prev_state_data_int16,
+    const RuntimeShape& unextended_output_state_shape,
+    int16* output_state_data_int16,
+    const RuntimeShape& unextended_output_activ_shape,
+    uint8* output_activ_data_uint8,
+    const RuntimeShape& unextended_concat_temp_shape,
+    uint8* concat_temp_data_uint8,
+    const RuntimeShape& unextended_activ_temp_shape,
+    int16* activ_temp_data_int16, gemmlowp::GemmContext* gemm_context) {
   (void)gemm_context;  // only used in optimized code.
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape =
+      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
 
   // Gather dimensions information, and perform consistency checks.
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_dims, 0, prev_activ_dims, prev_state_dims,
-                              output_state_dims, output_activ_dims);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1);
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int prev_activ_depth = ArraySize(prev_activ_dims, 0);
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(
+      input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
+      output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
   const int total_input_depth = prev_activ_depth + input_depth;
-  TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth);
-  TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3),
-                  1);
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
+                   total_input_depth);
   const int intern_activ_depth =
-      MatchingArraySize(weights_dims, 1, bias_dims, 0);
-  TFLITE_CHECK_EQ(intern_activ_depth % 4, 0);
+      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
+                   intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
   const int output_depth =
-      MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0,
-                        output_state_dims, 0, output_activ_dims, 0);
-  TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4);
-  const int fc_batches = FlatSizeSkipDim(activ_temp_dims, 0);
+      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                  3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
   const int fc_output_depth =
-      MatchingArraySize(weights_dims, 1, activ_temp_dims, 0);
-  const int fc_accum_depth = ArraySize(weights_dims, 0);
-  TFLITE_CHECK_EQ(fc_output_depth, 4 * output_depth);
+      MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
 
   // Depth-concatenate prev_activ and input data together.
   uint8 const* concat_input_arrays_data[2] = {input_data_uint8,
                                               prev_activ_data_uint8};
-  Dims<4> const* concat_input_arrays_dims[2] = {&input_dims, &prev_activ_dims};
-  Concatenation<FusedActivationFunctionType::kNone, uint8>(
-      0, concat_input_arrays_data, concat_input_arrays_dims, 2,
-      concat_temp_data_uint8, concat_temp_dims);
+  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
+                                                       &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes,
+                concat_input_arrays_data, concat_temp_shape,
+                concat_temp_data_uint8);
 
   // Implementation of the fully connected node inside the LSTM cell.
   // The operands are 8-bit integers, the accumulators are internally 32bit
@@ -2731,6 +2828,37 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template <int StateIntegerBits>
+void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
+              const uint8* prev_activ_data_uint8,
+              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
+              const Dims<4>& weights_dims, const int32* bias_data_int32,
+              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
+              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
+              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
+              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
+              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
+              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
+              int32 accum_multiplier, int accum_shift,
+              gemmlowp::GemmContext* gemm_context) {
+  tflite::LstmCellParams op_params;
+  op_params.weights_zero_point = weights_zero_point;
+  op_params.accum_multiplier = accum_multiplier;
+  op_params.accum_shift = accum_shift;
+
+  LstmCell<StateIntegerBits>(
+      op_params, DimsToShape(input_dims), input_data_uint8,
+      DimsToShape(prev_activ_dims), prev_activ_data_uint8,
+      DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims),
+      bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16,
+      DimsToShape(output_state_dims), output_state_data_int16,
+      DimsToShape(output_activ_dims), output_activ_data_uint8,
+      DimsToShape(concat_temp_dims), concat_temp_data_uint8,
+      DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context);
+}
+
 template <typename Scalar>
 void Split(const SplitParams& params, const RuntimeShape& input_shape,
            const Scalar* input_data, const RuntimeShape* const* output_shapes,
-- 
GitLab


From 428f7037bef6dbfdd01a4283a6c76221d381ef7e Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Wed, 19 Sep 2018 10:43:53 -0700
Subject: [PATCH 0381/1357] Fix estimator_training test flakiness.

PiperOrigin-RevId: 213653403
---
 tensorflow/contrib/distribute/python/BUILD    |   3 -
 .../python/estimator_training_test.py         | 248 +++++-------------
 .../python/multi_worker_test_base.py          |  53 +++-
 3 files changed, 110 insertions(+), 194 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index f72b827e04..ebea512c04 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -472,11 +472,8 @@ cuda_py_test(
         "//tensorflow/python:summary",
     ],
     tags = [
-        "manual",
         "multi_and_single_gpu",
         "no_pip",
-        "nogpu",
-        "notap",
     ],
 )
 
diff --git a/tensorflow/contrib/distribute/python/estimator_training_test.py b/tensorflow/contrib/distribute/python/estimator_training_test.py
index 5348512016..157618f72f 100644
--- a/tensorflow/contrib/distribute/python/estimator_training_test.py
+++ b/tensorflow/contrib/distribute/python/estimator_training_test.py
@@ -26,21 +26,12 @@ import tempfile
 import threading
 from absl.testing import parameterized
 import numpy as np
-import six
 
-_portpicker_import_error = None
-try:
-  import portpicker  # pylint: disable=g-import-not-at-top
-except ImportError as _error:  # pylint: disable=invalid-name
-  _portpicker_import_error = _error
-  portpicker = None
-
-# pylint: disable=g-import-not-at-top
 from tensorflow.contrib.distribute.python import combinations
 from tensorflow.contrib.distribute.python import mirrored_strategy
+from tensorflow.contrib.distribute.python import multi_worker_test_base
 from tensorflow.contrib.distribute.python import parameter_server_strategy
 from tensorflow.contrib.optimizer_v2 import adagrad
-from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.distribute import distribute_coordinator as dc
 from tensorflow.python.distribute import estimator_training as dc_training
@@ -57,7 +48,6 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.summary import summary_iterator
 from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import server_lib
 
 BATCH_SIZE = 10
 LABEL_DIMENSION = 2
@@ -73,130 +63,38 @@ EVALUATOR = dc._TaskType.EVALUATOR
 WORKER = dc._TaskType.WORKER
 PS = dc._TaskType.PS
 
-original_run_distribute_coordinator = dc.run_distribute_coordinator
-
-
-# TODO(yuefengz): merge this method back to test_util.
-def _create_local_cluster(num_workers,
-                          num_ps,
-                          has_eval=False,
-                          protocol="grpc",
-                          worker_config=None,
-                          ps_config=None):
-  if _portpicker_import_error:
-    raise _portpicker_import_error  # pylint: disable=raising-bad-type
-  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
-  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
-
-  cluster_dict = {
-      "worker": ["localhost:%s" % port for port in worker_ports],
-      "ps": ["localhost:%s" % port for port in ps_ports]
-  }
-  if has_eval:
-    cluster_dict["evaluator"] = ["localhost:%s" % portpicker.pick_unused_port()]
-
-  cs = server_lib.ClusterSpec(cluster_dict)
-
-  workers = [
-      server_lib.Server(
-          cs,
-          job_name="worker",
-          protocol=protocol,
-          task_index=ix,
-          config=worker_config,
-          start=True) for ix in range(num_workers)
-  ]
-  ps_servers = [
-      server_lib.Server(
-          cs,
-          job_name="ps",
-          protocol=protocol,
-          task_index=ix,
-          config=ps_config,
-          start=True) for ix in range(num_ps)
-  ]
-  if has_eval:
-    evals = [
-        server_lib.Server(
-            cs,
-            job_name="evaluator",
-            protocol=protocol,
-            task_index=0,
-            config=worker_config,
-            start=True)
-    ]
-  else:
-    evals = []
-
-  return workers, ps_servers, evals
-
-
-def _create_in_process_cluster(num_workers, num_ps, has_eval=False):
-  """Create an in-process cluster that consists of only standard server."""
-  # Leave some memory for cuda runtime.
-  if has_eval:
-    gpu_mem_frac = 0.7 / (num_workers + 1)
-  else:
-    gpu_mem_frac = 0.7 / num_workers
-
-  worker_config = config_pb2.ConfigProto()
-  worker_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac
-
-  # Enable collective ops which has no impact on non-collective ops.
-  # TODO(yuefengz, tucker): removing this after we move the initialization of
-  # collective mgr to the session level.
-  worker_config.experimental.collective_group_leader = (
-      "/job:worker/replica:0/task:0")
-
-  ps_config = config_pb2.ConfigProto()
-  ps_config.device_count["GPU"] = 0
-
-  return _create_local_cluster(
-      num_workers,
-      num_ps=num_ps,
-      has_eval=has_eval,
-      worker_config=worker_config,
-      ps_config=ps_config,
-      protocol="grpc")
-
-
-def _create_cluster_spec(has_chief=False,
-                         num_workers=1,
-                         num_ps=0,
-                         has_eval=False):
-  if _portpicker_import_error:
-    raise _portpicker_import_error  # pylint: disable=raising-bad-type
-
-  cluster_spec = {}
-  if has_chief:
-    cluster_spec[CHIEF] = ["localhost:%s" % portpicker.pick_unused_port()]
-  if num_workers:
-    cluster_spec[WORKER] = [
-        "localhost:%s" % portpicker.pick_unused_port()
-        for _ in range(num_workers)
-    ]
-  if num_ps:
-    cluster_spec[PS] = [
-        "localhost:%s" % portpicker.pick_unused_port() for _ in range(num_ps)
-    ]
-  if has_eval:
-    cluster_spec[EVALUATOR] = ["localhost:%s" % portpicker.pick_unused_port()]
-  return cluster_spec
+original_run_std_server = dc._run_std_server
 
 
-def _bytes_to_str(maybe_bytes):
-  if isinstance(maybe_bytes, six.string_types):
-    return maybe_bytes
-  else:
-    return str(maybe_bytes, "utf-8")
+class MockOsEnv(dict):
+
+  def __init__(self, *args):
+    self._thread_local = threading.local()
+    super(MockOsEnv, self).__init__(*args)
+
+  def get(self, key, default):
+    if not hasattr(self._thread_local, "dict"):
+      self._thread_local.dict = dict()
+    if key == "TF_CONFIG":
+      return dict.get(self._thread_local.dict, key, default)
+    else:
+      return dict.get(self, key, default)
 
+  def __getitem__(self, key):
+    if not hasattr(self._thread_local, "dict"):
+      self._thread_local.dict = dict()
+    if key == "TF_CONFIG":
+      return dict.__getitem__(self._thread_local.dict, key)
+    else:
+      return dict.__getitem__(self, key)
 
-def _strip_protocol(target):
-  # cluster_spec expects "host:port" strings.
-  if "//" in target:
-    return target.split("//")[1]
-  else:
-    return target
+  def __setitem__(self, key, val):
+    if not hasattr(self._thread_local, "dict"):
+      self._thread_local.dict = dict()
+    if key == "TF_CONFIG":
+      return dict.__setitem__(self._thread_local.dict, key, val)
+    else:
+      return dict.__setitem__(self, key, val)
 
 
 class DistributeCoordinatorIntegrationTest(test.TestCase,
@@ -205,22 +103,20 @@ class DistributeCoordinatorIntegrationTest(test.TestCase,
   @classmethod
   def setUpClass(cls):
     """Create a local cluster with 2 workers."""
-    cls._workers, cls._ps, cls._evals = _create_in_process_cluster(
+    cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
         num_workers=3, num_ps=2, has_eval=True)
-    cls._cluster_spec = {
-        "worker": [
-            _strip_protocol(_bytes_to_str(w.target)) for w in cls._workers
-        ],
-        "ps": [_strip_protocol(_bytes_to_str(ps.target)) for ps in cls._ps],
-        "evaluator": [
-            _strip_protocol(_bytes_to_str(e.target)) for e in cls._evals
-        ]
-    }
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
-    self._event = threading.Event()
+    self._mock_os_env = MockOsEnv()
+    self._mock_context = test.mock.patch.object(os, "environ",
+                                                self._mock_os_env)
     super(DistributeCoordinatorIntegrationTest, self).setUp()
+    self._mock_context.__enter__()
+
+  def tearDown(self):
+    self._mock_context.__exit__(None, None, None)
+    super(DistributeCoordinatorIntegrationTest, self).tearDown()
 
   def dataset_input_fn(self, x, y, batch_size, shuffle):
 
@@ -391,43 +287,17 @@ class DistributeCoordinatorIntegrationTest(test.TestCase,
         train_distribute, eval_distribute, remote_cluster=self._cluster_spec)
     self._inspect_train_and_eval_events(estimator)
 
-  def _mock_run_distribute_coordinator(
-      self,
-      worker_fn,
-      strategy,
-      eval_fn,
-      eval_strategy,
-      mode=dc.CoordinatorMode.STANDALONE_CLIENT,
-      cluster_spec=None,
-      session_config=None):
-    # Calls the origial `run_distribute_coordinator` method but gets task config
-    # from environment variables and then signals the caller.
-    task_type = None
-    task_id = None
-    if not cluster_spec:
-      cluster_spec = None
-      tf_config = json.loads(os.environ.get("TF_CONFIG", "{}"))
-      if not cluster_spec:
-        cluster_spec = tf_config.get("cluster", {})
-        task_env = tf_config.get("task", {})
-        if task_env:
-          task_type = task_env.get("type", task_type)
-          task_id = int(task_env.get("index", task_id))
-    self._event.set()
-    original_run_distribute_coordinator(
-        worker_fn,
-        strategy,
-        eval_fn,
-        eval_strategy,
-        mode=mode,
-        cluster_spec=cluster_spec,
-        task_type=task_type,
-        task_id=task_id,
-        session_config=session_config)
-
-  def _task_thread(self, train_distribute, eval_distribute):
-    with test.mock.patch.object(dc, "run_distribute_coordinator",
-                                self._mock_run_distribute_coordinator):
+  def _mock_run_std_server(self, *args, **kwargs):
+    ret = original_run_std_server(*args, **kwargs)
+    # Wait for all std servers to be brought up in order to reduce the chance of
+    # remote sessions taking local ports that have been assigned to std servers.
+    self._barrier.wait()
+    return ret
+
+  def _task_thread(self, train_distribute, eval_distribute, tf_config):
+    os.environ["TF_CONFIG"] = json.dumps(tf_config)
+    with test.mock.patch.object(dc, "_run_std_server",
+                                self._mock_run_std_server):
       self._complete_flow(train_distribute, eval_distribute)
 
   def _run_task_in_thread(self, cluster_spec, task_type, task_id,
@@ -448,13 +318,10 @@ class DistributeCoordinatorIntegrationTest(test.TestCase,
               "index": task_id
           }
       }
-    self._event.clear()
     t = threading.Thread(
-        target=self._task_thread, args=(train_distribute, eval_distribute))
-    with test.mock.patch.dict("os.environ",
-                              {"TF_CONFIG": json.dumps(tf_config)}):
-      t.start()
-      self._event.wait()
+        target=self._task_thread,
+        args=(train_distribute, eval_distribute, tf_config))
+    t.start()
     return t
 
   def _run_multiple_tasks_in_threads(self, cluster_spec, train_distribute,
@@ -489,7 +356,11 @@ class DistributeCoordinatorIntegrationTest(test.TestCase,
     else:
       eval_distribute = None
 
-    cluster_spec = _create_cluster_spec(num_workers=3, num_ps=2, has_eval=True)
+    cluster_spec = multi_worker_test_base.create_cluster_spec(
+        num_workers=3, num_ps=2, has_eval=True)
+    # 3 workers, 2 ps and 1 evaluator.
+    self._barrier = dc._Barrier(6)
+
     threads = self._run_multiple_tasks_in_threads(
         cluster_spec, train_distribute, eval_distribute)
     for task_type, ts in threads.items():
@@ -516,7 +387,10 @@ class DistributeCoordinatorIntegrationTest(test.TestCase,
     else:
       eval_distribute = None
 
-    cluster_spec = _create_cluster_spec(num_workers=3, num_ps=2, has_eval=True)
+    cluster_spec = multi_worker_test_base.create_cluster_spec(
+        num_workers=3, num_ps=0, has_eval=True)
+    # 3 workers and 1 evaluator.
+    self._barrier = dc._Barrier(4)
     threads = self._run_multiple_tasks_in_threads(
         cluster_spec, train_distribute, eval_distribute)
     threads[WORKER][0].join()
diff --git a/tensorflow/contrib/distribute/python/multi_worker_test_base.py b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
index 18b4503eff..9f92ba7dde 100644
--- a/tensorflow/contrib/distribute/python/multi_worker_test_base.py
+++ b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
@@ -36,9 +36,29 @@ from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.estimator import run_config
 from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 
 
+ASSIGNED_PORTS = set()
+lock = threading.Lock()
+
+
+def pick_unused_port():
+  """Returns an unused and unassigned local port."""
+  if _portpicker_import_error:
+    raise _portpicker_import_error  # pylint: disable=raising-bad-type
+
+  global ASSIGNED_PORTS
+  with lock:
+    while True:
+      port = portpicker.pick_unused_port()
+      if port > 10000 and port not in ASSIGNED_PORTS:
+        ASSIGNED_PORTS.add(port)
+        logging.info('Using local port %r', port)
+        return port
+
+
 def _create_cluster(num_workers,
                     num_ps,
                     has_chief=False,
@@ -49,8 +69,8 @@ def _create_cluster(num_workers,
   """Creates and starts local servers and returns the cluster_spec dict."""
   if _portpicker_import_error:
     raise _portpicker_import_error  # pylint: disable=raising-bad-type
-  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
-  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
+  worker_ports = [pick_unused_port() for _ in range(num_workers)]
+  ps_ports = [pick_unused_port() for _ in range(num_ps)]
 
   cluster_dict = {}
   if num_workers > 0:
@@ -58,9 +78,9 @@ def _create_cluster(num_workers,
   if num_ps > 0:
     cluster_dict['ps'] = ['localhost:%s' % port for port in ps_ports]
   if has_eval:
-    cluster_dict['evaluator'] = ['localhost:%s' % portpicker.pick_unused_port()]
+    cluster_dict['evaluator'] = ['localhost:%s' % pick_unused_port()]
   if has_chief:
-    cluster_dict['chief'] = ['localhost:%s' % portpicker.pick_unused_port()]
+    cluster_dict['chief'] = ['localhost:%s' % pick_unused_port()]
 
   cs = server_lib.ClusterSpec(cluster_dict)
 
@@ -139,11 +159,36 @@ def create_in_process_cluster(num_workers,
       num_workers,
       num_ps=num_ps,
       has_chief=has_chief,
+      has_eval=has_eval,
       worker_config=worker_config,
       ps_config=ps_config,
       protocol='grpc')
 
 
+def create_cluster_spec(has_chief=False,
+                        num_workers=1,
+                        num_ps=0,
+                        has_eval=False):
+  """Create a cluster spec with tasks with unused local ports."""
+  if _portpicker_import_error:
+    raise _portpicker_import_error  # pylint: disable=raising-bad-type
+
+  cluster_spec = {}
+  if has_chief:
+    cluster_spec['chief'] = ['localhost:%s' % pick_unused_port()]
+  if num_workers:
+    cluster_spec['worker'] = [
+        'localhost:%s' % pick_unused_port() for _ in range(num_workers)
+    ]
+  if num_ps:
+    cluster_spec['ps'] = [
+        'localhost:%s' % pick_unused_port() for _ in range(num_ps)
+    ]
+  if has_eval:
+    cluster_spec['evaluator'] = ['localhost:%s' % pick_unused_port()]
+  return cluster_spec
+
+
 class MultiWorkerTestBase(test.TestCase):
   """Base class for testing multi node strategy and dataset."""
 
-- 
GitLab


From 05ec322172958f6e67e4bcaef4681e6aa54fabeb Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Wed, 19 Sep 2018 10:46:35 -0700
Subject: [PATCH 0382/1357] Return error message with illegal input rather than
 check-failing in op_kernel.

PiperOrigin-RevId: 213653853
---
 tensorflow/compiler/jit/xla_launch_util.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc
index affeab4a8c..5f2f6801e7 100644
--- a/tensorflow/compiler/jit/xla_launch_util.cc
+++ b/tensorflow/compiler/jit/xla_launch_util.cc
@@ -275,6 +275,8 @@ Status XlaComputationLaunchContext::PopulateOutputs(
       VLOG(2) << "Retval " << i << " shape " << shape.DebugString() << " type "
               << DataTypeString(type);
       if (type == DT_RESOURCE) {
+        TF_RET_CHECK(kernel->outputs[i].input_index >= 0)
+            << "Invalid input for outputs " << i;
         ctx->set_output(i, ctx->input(kernel->outputs[i].input_index));
       } else {
         se::DeviceMemoryBase buffer = output.buffer({output_num});
-- 
GitLab


From a1b64cf2a6a995ffaaf384cf8643221f1c27db48 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 19 Sep 2018 10:49:11 -0700
Subject: [PATCH 0383/1357] Force-place embedding variables on CPUs ein eager
 mode.

This avoids problems which happen because most optimizers do not have sparse updating gpu kernels implemented.

Fixes #22042

PiperOrigin-RevId: 213654354
---
 tensorflow/python/keras/BUILD                 |  5 ++--
 tensorflow/python/keras/layers/embeddings.py  | 29 +++++++++++++++----
 .../python/keras/layers/embeddings_test.py    | 13 +++++++++
 3 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index b521b1430d..4a72c4b3f3 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -381,12 +381,11 @@ py_test(
     ],
 )
 
-py_test(
+cuda_py_test(
     name = "embeddings_test",
     size = "medium",
     srcs = ["layers/embeddings_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
     ],
diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py
index 629a9ec9a1..c6df5f2e26 100644
--- a/tensorflow/python/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/layers/embeddings.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
+from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import initializers
@@ -117,12 +119,27 @@ class Embedding(Layer):
 
   @tf_utils.shape_type_conversion
   def build(self, input_shape):
-    self.embeddings = self.add_weight(
-        shape=(self.input_dim, self.output_dim),
-        initializer=self.embeddings_initializer,
-        name='embeddings',
-        regularizer=self.embeddings_regularizer,
-        constraint=self.embeddings_constraint)
+    # Note: most sparse optimizers do not have GPU kernels defined. When
+    # building graphs, the placement algorithm is able to place variables on CPU
+    # since it knows all kernels using the variable only exist on CPU.
+    # When eager execution is enabled, the placement decision has to be made
+    # right now. Checking for the presence of GPUs to avoid complicating the
+    # TPU codepaths which can handle sparse optimizers.
+    if context.executing_eagerly() and context.context().num_gpus():
+      with ops.device('cpu:0'):
+        self.embeddings = self.add_weight(
+            shape=(self.input_dim, self.output_dim),
+            initializer=self.embeddings_initializer,
+            name='embeddings',
+            regularizer=self.embeddings_regularizer,
+            constraint=self.embeddings_constraint)
+    else:
+      self.embeddings = self.add_weight(
+          shape=(self.input_dim, self.output_dim),
+          initializer=self.embeddings_initializer,
+          name='embeddings',
+          regularizer=self.embeddings_regularizer,
+          constraint=self.embeddings_constraint)
     self.built = True
 
   def compute_mask(self, inputs, mask=None):
diff --git a/tensorflow/python/keras/layers/embeddings_test.py b/tensorflow/python/keras/layers/embeddings_test.py
index cab176ee34..2e42e403aa 100644
--- a/tensorflow/python/keras/layers/embeddings_test.py
+++ b/tensorflow/python/keras/layers/embeddings_test.py
@@ -21,9 +21,11 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.eager import backprop
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.training import adagrad
 
 
 class EmbeddingTest(test.TestCase):
@@ -78,6 +80,17 @@ class EmbeddingTest(test.TestCase):
       outputs = keras.backend.eval(layer(inputs))
       self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]])
 
+  @tf_test_util.run_in_graph_and_eager_modes()
+  def test_eager_gpu_cpu(self):
+    l = keras.layers.Embedding(output_dim=2, input_dim=2)
+    l.build((None, 2))
+    inputs = keras.backend.constant([[0, 1, 0]], dtype='int32')
+    with backprop.GradientTape() as tape:
+      output = l(inputs)
+    gs = tape.gradient(output, l.weights)
+    opt = adagrad.AdagradOptimizer(0.1)
+    opt.apply_gradients(zip(gs, l.weights))
+    self.assertAllEqual(len(gs), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From af134d14405a1ec154d2c6ab326cc827e50880e6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 10:57:40 -0700
Subject: [PATCH 0384/1357] Fix documentation markdown

PiperOrigin-RevId: 213655969
---
 tensorflow/contrib/estimator/python/estimator/hooks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/estimator/python/estimator/hooks.py b/tensorflow/contrib/estimator/python/estimator/hooks.py
index 66c46e66b7..49f7bbd320 100644
--- a/tensorflow/contrib/estimator/python/estimator/hooks.py
+++ b/tensorflow/contrib/estimator/python/estimator/hooks.py
@@ -53,6 +53,7 @@ class InMemoryEvaluatorHook(training.SessionRunHook):
   ```
 
   Current limitations of this approach are:
+
   * It doesn't support multi-node distributed mode.
   * It doesn't support saveable objects other than variables (such as boosted
     tree support)
-- 
GitLab


From 5330ede39fa2f1f7b3302bc316061baf180fab44 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 11:01:56 -0700
Subject: [PATCH 0385/1357] Enable large constant array deduping by default. If
 this causes trouble (makes graph visualizations harder to read, etc) then
 consider increasing the default value of dedupe_array_min_size_bytes.

PiperOrigin-RevId: 213656796
---
 tensorflow/contrib/lite/toco/toco_tooling.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index a7c17156b1..28d31e3797 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -367,9 +367,7 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
   }
 
   // Deduplicate large constant arrays.
-  if (toco_flags.has_dedupe_array_min_size_bytes()) {
-    DedupeConstantArrays(model, toco_flags.dedupe_array_min_size_bytes());
-  }
+  DedupeConstantArrays(model, toco_flags.dedupe_array_min_size_bytes());
 
   LogDump(kLogLevelModelChanged, "AFTER TRANSFORMATIONS", *model);
 
-- 
GitLab


From a586140da6d0460bbf18384556d5cc449b67b322 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 11:08:53 -0700
Subject: [PATCH 0386/1357] Python interface for Boosted Trees model
 explainability (currently includes directional feature contributions); fixed
 ExampleDebugOutputs bug where it errors with empty trees.

PiperOrigin-RevId: 213658470
---
 .../python/estimator/boosted_trees.py         |  28 +-
 .../python/estimator/boosted_trees_test.py    |  74 ++++++
 .../kernels/boosted_trees/prediction_ops.cc   |  38 +--
 tensorflow/python/estimator/BUILD             |  30 +++
 .../python/estimator/canned/boosted_trees.py  | 246 ++++++++++++++++--
 .../estimator/canned/boosted_trees_test.py    | 134 ++++++++++
 .../estimator/canned/boosted_trees_utils.py   |  80 ++++++
 .../canned/boosted_trees_utils_test.py        | 187 +++++++++++++
 .../boosted_trees/prediction_ops_test.py      | 166 +++++++++++-
 ....estimator.-boosted-trees-classifier.pbtxt |   5 +
 ...w.estimator.-boosted-trees-regressor.pbtxt |   5 +
 ....estimator.-boosted-trees-classifier.pbtxt |   5 +
 ...w.estimator.-boosted-trees-regressor.pbtxt |   5 +
 13 files changed, 957 insertions(+), 46 deletions(-)
 create mode 100644 tensorflow/python/estimator/canned/boosted_trees_utils.py
 create mode 100644 tensorflow/python/estimator/canned/boosted_trees_utils_test.py

diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
index 7ed77bcce6..11f60c8238 100644
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees
+from tensorflow.python.estimator.canned import head as head_lib
 
 
 def _validate_input_fn_and_repeat_dataset(train_input_fn):
@@ -33,7 +34,18 @@ def _validate_input_fn_and_repeat_dataset(train_input_fn):
   return _input_fn
 
 
-class _BoostedTreesEstimator(estimator.Estimator):
+# pylint: disable=protected-access
+def _is_classification_head(head):
+  """Infers if the head is a classification head."""
+  # Check using all classification heads defined in canned/head.py. However, it
+  # is not a complete list - it does not check for other classification heads
+  # not defined in the head library.
+  return isinstance(head,
+                    (head_lib._BinaryLogisticHeadWithSigmoidCrossEntropyLoss,
+                     head_lib._MultiClassHeadWithSoftmaxCrossEntropyLoss))
+
+
+class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase):
   """An Estimator for Tensorflow Boosted Trees models."""
 
   def __init__(self,
@@ -96,8 +108,10 @@ class _BoostedTreesEstimator(estimator.Estimator):
         negative gain). For pre and post pruning, you MUST provide
         tree_complexity >0.
 
+    Raises:
+      ValueError: when wrong arguments are given or unsupported functionalities
+         are requested.
     """
-    # pylint:disable=protected-access
     # HParams for the model.
     tree_hparams = canned_boosted_trees._TreeHParams(
         n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
@@ -115,8 +129,14 @@ class _BoostedTreesEstimator(estimator.Estimator):
           config=config)
 
     super(_BoostedTreesEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
-    # pylint:enable=protected-access
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_columns=feature_columns,
+        head=head,
+        center_bias=center_bias,
+        is_classification=_is_classification_head(head))
+    # pylint: enable=protected-access
 
 
 def boosted_trees_classifier_train_in_memory(
diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
index b1581f3750..e23d9c0fc4 100644
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
@@ -360,5 +360,79 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
         [pred['predictions'] for pred in predictions])
 
 
+class BoostedTreesDebugOutputTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._head = canned_boosted_trees._create_regression_head(label_dimension=1)
+    self._feature_columns = {
+        feature_column.bucketized_column(
+            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
+            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
+    }
+
+  def testContribEstimatorThatDFCIsInPredictions(self):
+    # pylint:disable=protected-access
+    head = canned_boosted_trees._create_regression_head(label_dimension=1)
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        head=head,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True)
+    # pylint:enable=protected-access
+
+    num_steps = 100
+    # Train for a few steps. Validate debug outputs in prediction dicts.
+    est.train(train_input_fn, steps=num_steps)
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn)
+    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
+                         for pred in debug_predictions])
+    self.assertAllClose([1.8] * 5, biases)
+    self.assertAllClose(({
+        0: -0.070499420166015625,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: -0.53763031959533691,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: -0.51756942272186279,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: 0.1563495397567749,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: 0.96934974193572998,
+        1: 0.063333392143249512,
+        2: 0.0
+    }), dfcs)
+
+    # Assert sum(dfcs) + bias == predictions.
+    expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
+                            [2.01968288], [2.83268309]]
+    predictions = [
+        [sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases)
+    ]
+    self.assertAllClose(expected_predictions, predictions)
+
+    # Test when user doesn't include bias or dfc in predict_keys.
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn, predict_keys=['predictions'])
+    for prediction_dict in debug_predictions:
+      self.assertTrue('bias' in prediction_dict)
+      self.assertTrue('dfc' in prediction_dict)
+      self.assertTrue('predictions' in prediction_dict)
+      self.assertEqual(len(prediction_dict), 3)
+
+
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
index b2efa06941..4ae26fb95b 100644
--- a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
@@ -334,30 +334,34 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel {
         // Proto to store debug outputs, per example.
         boosted_trees::DebugOutput example_debug_info;
         // Initial bias prediction. E.g., prediction based off training mean.
-        example_debug_info.add_logits_path(resource->GetTreeWeight(0) *
-                                           resource->node_value(0, 0));
+        float tree_logit =
+            resource->GetTreeWeight(0) * resource->node_value(0, 0);
+        example_debug_info.add_logits_path(tree_logit);
         int32 node_id = 0;
         int32 tree_id = 0;
         int32 feature_id;
-        float tree_logit;
         float past_trees_logit = 0;  // Sum of leaf logits from prior trees.
-        // Populate proto.
+        // Go through each tree and populate proto.
         while (tree_id <= last_tree) {
-          // Feature id used to split.
-          feature_id = resource->feature_id(tree_id, node_id);
-          example_debug_info.add_feature_ids(feature_id);
-          // Get logit after split.
-          node_id = resource->next_node(tree_id, node_id, i,
-                                        batch_bucketized_features);
-          tree_logit = resource->GetTreeWeight(tree_id) *
-                       resource->node_value(tree_id, node_id);
-          // Output logit incorporates sum of leaf logits from prior trees.
-          example_debug_info.add_logits_path(tree_logit + past_trees_logit);
-          if (resource->is_leaf(tree_id, node_id)) {
-            // Move onto other trees.
-            past_trees_logit += tree_logit;
+          if (resource->is_leaf(tree_id, node_id)) {  // Move onto other trees.
+            // Accumulate tree_logits only if the leaf is non-root, but do so
+            // for bias tree.
+            if (tree_id == 0 || node_id > 0) {
+              past_trees_logit += tree_logit;
+            }
             ++tree_id;
             node_id = 0;
+          } else {  // Add to proto.
+            // Feature id used to split.
+            feature_id = resource->feature_id(tree_id, node_id);
+            example_debug_info.add_feature_ids(feature_id);
+            // Get logit after split.
+            node_id = resource->next_node(tree_id, node_id, i,
+                                          batch_bucketized_features);
+            tree_logit = resource->GetTreeWeight(tree_id) *
+                         resource->node_value(tree_id, node_id);
+            // Output logit incorporates sum of leaf logits from prior trees.
+            example_debug_info.add_logits_path(tree_logit + past_trees_logit);
           }
         }
         // Set output as serialized proto containing debug info.
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index bfcc019dd5..7f2349954d 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -197,6 +197,7 @@ py_library(
     srcs = ["canned/boosted_trees.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":boosted_trees_utils",
         ":estimator",
         ":head",
         ":model_fn",
@@ -223,6 +224,35 @@ py_test(
     ],
 )
 
+py_library(
+    name = "boosted_trees_utils",
+    srcs = ["canned/boosted_trees_utils.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":estimator",
+        ":head",
+        ":model_fn",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
+py_test(
+    name = "boosted_trees_utils_test",
+    size = "medium",
+    srcs = ["canned/boosted_trees_utils_test.py"],
+    shard_count = 2,
+    srcs_version = "PY2AND3",
+    tags = [
+        "optonly",
+    ],
+    deps = [
+        ":boosted_trees",
+        ":inputs",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_library(
     name = "dnn",
     srcs = ["canned/dnn.py"],
diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 19f18015e4..36048a2bfd 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -22,7 +22,8 @@ import collections
 import functools
 
 from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.canned import boosted_trees_utils
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.feature_column import feature_column as feature_column_lib
 from tensorflow.python.framework import dtypes
@@ -36,6 +37,7 @@ from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.array_ops import identity as tf_identity
 from tensorflow.python.ops.losses import losses
 from tensorflow.python.summary import summary
 from tensorflow.python.training import session_run_hook
@@ -197,8 +199,7 @@ def _cache_transformed_features(features, sorted_feature_columns, batch_size):
   cached_features = [
       _local_variable(
           array_ops.zeros([batch_size], dtype=dtypes.int32),
-          name='cached_feature_{}'.format(i))
-      for i in range(num_features)
+          name='cached_feature_{}'.format(i)) for i in range(num_features)
   ]
   are_features_cached = _local_variable(False, name='are_features_cached')
 
@@ -228,8 +229,7 @@ def _cache_transformed_features(features, sorted_feature_columns, batch_size):
     return cached, cache_flip_op
 
   input_feature_list, cache_flip_op = control_flow_ops.cond(
-      are_features_cached,
-      lambda: (cached_features, control_flow_ops.no_op()),
+      are_features_cached, lambda: (cached_features, control_flow_ops.no_op()),
       cache_features_and_return)
   return input_feature_list, cache_flip_op
 
@@ -263,8 +263,8 @@ class _CacheTrainingStatesUsingHashTable(object):
     elif dtypes.as_dtype(dtypes.string).is_compatible_with(example_ids.dtype):
       empty_key = ''
     else:
-      raise ValueError('Unsupported example_id_feature dtype %s.' %
-                       example_ids.dtype)
+      raise ValueError(
+          'Unsupported example_id_feature dtype %s.' % example_ids.dtype)
     # Cache holds latest <tree_id, node_id, logits> for each example.
     # tree_id and node_id are both int32 but logits is a float32.
     # To reduce the overhead, we store all of them together as float32 and
@@ -273,8 +273,8 @@ class _CacheTrainingStatesUsingHashTable(object):
         empty_key=empty_key, value_dtype=dtypes.float32, value_shape=[3])
     self._example_ids = ops.convert_to_tensor(example_ids)
     if self._example_ids.shape.ndims not in (None, 1):
-      raise ValueError('example_id should have rank 1, but got %s' %
-                       self._example_ids)
+      raise ValueError(
+          'example_id should have rank 1, but got %s' % self._example_ids)
     self._logits_dimension = logits_dimension
 
   def lookup(self):
@@ -334,7 +334,7 @@ class _CacheTrainingStatesUsingVariables(object):
         array_ops.zeros([batch_size], dtype=dtypes.int32),
         name='tree_ids_cache')
     self._node_ids = _local_variable(
-        _DUMMY_NODE_ID*array_ops.ones([batch_size], dtype=dtypes.int32),
+        _DUMMY_NODE_ID * array_ops.ones([batch_size], dtype=dtypes.int32),
         name='node_ids_cache')
     self._logits = _local_variable(
         array_ops.zeros([batch_size, logits_dimension], dtype=dtypes.float32),
@@ -719,7 +719,7 @@ def _bt_model_fn(
     tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
 
     # Create logits.
-    if mode != model_fn.ModeKeys.TRAIN:
+    if mode != model_fn_lib.ModeKeys.TRAIN:
       input_feature_list = _get_transformed_features(features,
                                                      sorted_feature_columns)
       logits = boosted_trees_ops.predict(
@@ -886,6 +886,7 @@ def _bt_model_fn(
       labels=labels,
       train_op_fn=_train_op_fn,
       logits=logits)
+
   # Add an early stop hook.
   estimator_spec = estimator_spec._replace(
       training_hooks=estimator_spec.training_hooks +
@@ -927,8 +928,8 @@ def _create_classification_head_and_closed_form(n_classes, weight_column,
                                                 label_vocabulary):
   """Creates a head for classifier and the closed form gradients/hessians."""
   head = _create_classification_head(n_classes, weight_column, label_vocabulary)
-  if (n_classes == 2 and head.logits_dimension == 1 and weight_column is None
-      and label_vocabulary is None):
+  if (n_classes == 2 and head.logits_dimension == 1 and
+      weight_column is None and label_vocabulary is None):
     # Use the closed-form gradients/hessians for 2 class.
     def _grad_and_hess_for_logloss(logits, labels):
       """A closed form gradient and hessian for logistic loss."""
@@ -961,8 +962,196 @@ def _create_regression_head(label_dimension, weight_column=None):
   # pylint: enable=protected-access
 
 
+def _bt_explanations_fn(features,
+                        head,
+                        sorted_feature_columns,
+                        name='boosted_trees'):
+  """Gradient Boosted Trees predict with explanations model_fn.
+
+  Args:
+    features: dict of `Tensor`.
+    head: A `head_lib._Head` instance.
+    sorted_feature_columns: Sorted iterable of `feature_column._FeatureColumn`
+      model inputs.
+    name: Name used for the model.
+
+  Returns:
+      An `EstimatorSpec` instance.
+
+  Raises:
+    ValueError: mode or params are invalid, or features has the wrong type.
+  """
+  mode = model_fn_lib.ModeKeys.PREDICT
+  with ops.name_scope(name) as name:
+    # Create Ensemble resources.
+    tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
+
+    input_feature_list = _get_transformed_features(features,
+                                                   sorted_feature_columns)
+
+    logits = boosted_trees_ops.predict(
+        # For non-TRAIN mode, ensemble doesn't change after initialization,
+        # so no local copy is needed; using tree_ensemble directly.
+        tree_ensemble_handle=tree_ensemble.resource_handle,
+        bucketized_features=input_feature_list,
+        logits_dimension=head.logits_dimension)
+
+    estimator_spec = head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=None,
+        train_op_fn=control_flow_ops.no_op,
+        logits=logits)
+
+    debug_op = boosted_trees_ops.example_debug_outputs(
+        tree_ensemble.resource_handle,
+        bucketized_features=input_feature_list,
+        logits_dimension=head.logits_dimension)
+    estimator_spec.predictions[boosted_trees_utils._DEBUG_PROTO_KEY] = debug_op  # pylint: disable=protected-access
+    return estimator_spec
+
+
+class _BoostedTreesBase(estimator.Estimator):
+  """Base class for boosted trees estimators.
+
+  This class is intended to keep tree-specific functions (E.g., methods for
+  feature importances and directional feature contributions) in one central
+  place.
+
+  It is not a valid (working) Estimator on its own and should only be used as a
+  base class.
+  """
+
+  def __init__(self, model_fn, model_dir, config, feature_columns, head,
+               center_bias, is_classification):
+    """Initializes a `_BoostedTreesBase` instance.
+
+    Args:
+      model_fn: model_fn: Model function. See base class for more detail.
+      model_dir: Directory to save model parameters, graph and etc. See base
+        class for more detail.
+      config: `estimator.RunConfig` configuration object.
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`
+      head: A `head_lib._Head` instance.
+      center_bias: Whether bias centering needs to occur. Bias centering refers
+        to the first node in the very first tree returning the prediction that
+        is aligned with the original labels distribution. For example, for
+        regression problems, the first node will return the mean of the labels.
+        For binary classification problems, it will return a logit for a prior
+        probability of label 1.
+      is_classification: If the estimator is for classification.
+    """
+    super(_BoostedTreesBase, self).__init__(
+        model_fn=model_fn, model_dir=model_dir, config=config)
+    self._sorted_feature_columns = sorted(
+        feature_columns, key=lambda tc: tc.name)
+    self._head = head
+    self._n_features = _calculate_num_features(self._sorted_feature_columns)
+    self._center_bias = center_bias
+    self._is_classification = is_classification
+
+  def experimental_predict_with_explanations(self,
+                                             input_fn,
+                                             predict_keys=None,
+                                             hooks=None,
+                                             checkpoint_path=None):
+    """Computes model explainability outputs per example along with predictions.
+
+    Currently supports directional feature contributions (DFCs). For each
+    instance, DFCs indicate the aggregate contribution of each feature. See
+    https://arxiv.org/abs/1312.1121 and
+    http://blog.datadive.net/interpreting-random-forests/ for more details.
+    Args:
+      input_fn: A function that provides input data for predicting as
+        minibatches. See [Premade Estimators](
+        https://tensorflow.org/guide/premade_estimators#create_input_functions)
+          for more information. The function should construct and return one of
+        the following:  * A `tf.data.Dataset` object: Outputs of `Dataset`
+          object must be a tuple `(features, labels)` with same constraints as
+        below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor`
+          or a dictionary of string feature name to `Tensor` and `labels` is a
+          `Tensor` or a dictionary of string label name to `Tensor`. Both
+          `features` and `labels` are consumed by `model_fn`. They should
+          satisfy the expectation of `model_fn` from inputs.
+      predict_keys: list of `str`, name of the keys to predict. It is used if
+        the `tf.estimator.EstimatorSpec.predictions` is a `dict`. If
+        `predict_keys` is used then rest of the predictions will be filtered
+        from the dictionary, with the exception of 'bias' and 'dfc', which will
+        always be in the dictionary. If `None`, returns all keys in prediction
+        dict, as well as two new keys 'dfc' and 'bias'.
+      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
+        callbacks inside the prediction call.
+      checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
+        latest checkpoint in `model_dir` is used.  If there are no checkpoints
+        in `model_dir`, prediction is run with newly initialized `Variables`
+        instead of ones restored from checkpoint.
+
+    Yields:
+      Evaluated values of `predictions` tensors. The `predictions` tensors will
+      contain at least two keys 'dfc' and 'bias' for model explanations. The
+      `dfc` value corresponds to the contribution of each feature to the overall
+      prediction for this instance (positive indicating that the feature makes
+      it more likely to select class 1 and negative less likely). The 'bias'
+      value will be the same across all the instances, corresponding to the
+      probability (classification) or prediction (regression) of the training
+      data distribution.
+
+    Raises:
+      ValueError: when wrong arguments are given or unsupported functionalities
+       are requested.
+    """
+    if not self._center_bias:
+      raise ValueError('center_bias must be enabled during estimator '
+                       'instantiation when using '
+                       'experimental_predict_with_explanations.')
+    # pylint: disable=protected-access
+    if not self._is_classification:
+      identity_inverse_link_fn = self._head._inverse_link_fn in (None,
+                                                                 tf_identity)
+      # pylint:enable=protected-access
+      if not identity_inverse_link_fn:
+        raise ValueError(
+            'For now only identity inverse_link_fn in regression_head is '
+            'supported for experimental_predict_with_explanations.')
+
+    # pylint:disable=unused-argument
+    def new_model_fn(features, labels, mode):
+      return _bt_explanations_fn(features, self._head,
+                                 self._sorted_feature_columns)
+
+    # pylint:enable=unused-argument
+    est = estimator.Estimator(
+        model_fn=new_model_fn,
+        model_dir=self.model_dir,
+        config=self.config,
+        warm_start_from=self._warm_start_settings)
+    # Make sure bias and dfc will be in prediction dict.
+    user_supplied_predict_keys = predict_keys is not None
+    if user_supplied_predict_keys:
+      predict_keys = set(predict_keys)
+      predict_keys.add(boosted_trees_utils._DEBUG_PROTO_KEY)
+    predictions = est.predict(
+        input_fn,
+        predict_keys=predict_keys,
+        hooks=hooks,
+        checkpoint_path=checkpoint_path,
+        yield_single_examples=True)
+    for pred in predictions:
+      bias, dfcs = boosted_trees_utils._parse_explanations_from_prediction(
+          pred[boosted_trees_utils._DEBUG_PROTO_KEY], self._n_features,
+          self._is_classification)
+      pred['bias'] = bias
+      pred['dfc'] = dfcs
+      # Don't need to expose serialized proto to end user.
+      del pred[boosted_trees_utils._DEBUG_PROTO_KEY]
+      yield pred
+
+
+# pylint: disable=protected-access
 @estimator_export('estimator.BoostedTreesClassifier')
-class BoostedTreesClassifier(estimator.Estimator):
+class BoostedTreesClassifier(_BoostedTreesBase):
   """A Classifier for Tensorflow Boosted Trees models.
 
   @compatibility(eager)
@@ -1082,14 +1271,13 @@ class BoostedTreesClassifier(estimator.Estimator):
       n_classes = 2
     head, closed_form = _create_classification_head_and_closed_form(
         n_classes, weight_column, label_vocabulary=label_vocabulary)
-
     # HParams for the model.
     tree_hparams = _TreeHParams(
         n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
         tree_complexity, min_node_weight, center_bias, pruning_mode)
 
     def _model_fn(features, labels, mode, config):
-      return _bt_model_fn(  # pylint: disable=protected-access
+      return _bt_model_fn(
           features,
           labels,
           mode,
@@ -1101,11 +1289,17 @@ class BoostedTreesClassifier(estimator.Estimator):
           closed_form_grad_and_hess_fn=closed_form)
 
     super(BoostedTreesClassifier, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_columns=feature_columns,
+        head=head,
+        center_bias=center_bias,
+        is_classification=True)
 
 
 @estimator_export('estimator.BoostedTreesRegressor')
-class BoostedTreesRegressor(estimator.Estimator):
+class BoostedTreesRegressor(_BoostedTreesBase):
   """A Regressor for Tensorflow Boosted Trees models.
 
   @compatibility(eager)
@@ -1223,9 +1417,17 @@ class BoostedTreesRegressor(estimator.Estimator):
         tree_complexity, min_node_weight, center_bias, pruning_mode)
 
     def _model_fn(features, labels, mode, config):
-      return _bt_model_fn(  # pylint: disable=protected-access
-          features, labels, mode, head, feature_columns, tree_hparams,
-          n_batches_per_layer, config)
+      return _bt_model_fn(features, labels, mode, head, feature_columns,
+                          tree_hparams, n_batches_per_layer, config)
 
     super(BoostedTreesRegressor, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_columns=feature_columns,
+        head=head,
+        center_bias=center_bias,
+        is_classification=False)
+
+
+# pylint: enable=protected-access
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 6e28c72151..9409cb5cc7 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -565,6 +565,140 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
 
 
+class BoostedTreesDebugOutputsTest(test_util.TensorFlowTestCase):
+  """Test debug/model explainability outputs for individual predictions.
+
+  Includes directional feature contributions (DFC).
+  """
+
+  def setUp(self):
+    self._feature_columns = {
+        feature_column.bucketized_column(
+            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
+            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
+    }
+
+  def testBinaryClassifierThatDFCIsInPredictions(self):
+    train_input_fn = _make_train_input_fn(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=3, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True)
+
+    num_steps = 100
+    # Train for a few steps. Validate debug outputs in prediction dicts.
+    est.train(train_input_fn, steps=num_steps)
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn)
+    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
+                         for pred in debug_predictions])
+    self.assertAllClose([0.4] * 5, biases)
+    self.assertAllClose(({
+        0: -0.12108613453574479,
+        1: 0.0,
+        2: -0.039254929814481143
+    }, {
+        0: 0.19650601422250574,
+        1: 0.0,
+        2: 0.02693827052766018
+    }, {
+        0: 0.16057487356133376,
+        1: 0.0,
+        2: 0.02693827052766018
+    }, {
+        0: -0.12108613453574479,
+        1: 0.0,
+        2: -0.039254929814481143
+    }, {
+        0: -0.10832468554550384,
+        1: 0.0,
+        2: 0.02693827052766018
+    }), dfcs)
+
+    # Assert sum(dfcs) + bias == probabilities.
+    expected_probabilities = [
+        0.23965894, 0.62344426, 0.58751315, 0.23965894, 0.31861359
+    ]
+    probabilities = [
+        sum(dfc.values()) + bias for (dfc, bias) in zip(dfcs, biases)
+    ]
+    self.assertAllClose(expected_probabilities, probabilities)
+
+    # When user doesn't include bias or dfc in predict_keys, make sure to still
+    # include dfc and bias.
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn, predict_keys=['probabilities'])
+    for prediction_dict in debug_predictions:
+      self.assertTrue('bias' in prediction_dict)
+      self.assertTrue('dfc' in prediction_dict)
+      self.assertTrue('probabilities' in prediction_dict)
+      self.assertEqual(len(prediction_dict), 3)
+
+  def testRegressorThatDFCIsInPredictions(self):
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True)
+
+    num_steps = 100
+    # Train for a few steps. Validate debug outputs in prediction dicts.
+    est.train(train_input_fn, steps=num_steps)
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn)
+    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
+                         for pred in debug_predictions])
+    self.assertAllClose([1.8] * 5, biases)
+    self.assertAllClose(({
+        0: -0.070499420166015625,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: -0.53763031959533691,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: -0.51756942272186279,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: 0.1563495397567749,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: 0.96934974193572998,
+        1: 0.063333392143249512,
+        2: 0.0
+    }), dfcs)
+
+    # Assert sum(dfcs) + bias == predictions.
+    expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
+                            [2.01968288], [2.83268309]]
+    predictions = [
+        [sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases)
+    ]
+    self.assertAllClose(expected_predictions, predictions)
+
+    # Test when user doesn't include bias or dfc in predict_keys.
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn, predict_keys=['predictions'])
+    for prediction_dict in debug_predictions:
+      self.assertTrue('bias' in prediction_dict)
+      self.assertTrue('dfc' in prediction_dict)
+      self.assertTrue('predictions' in prediction_dict)
+      self.assertEqual(len(prediction_dict), 3)
+
+
 class ModelFnTests(test_util.TensorFlowTestCase):
   """Tests bt_model_fn including unexposed internal functionalities."""
 
diff --git a/tensorflow/python/estimator/canned/boosted_trees_utils.py b/tensorflow/python/estimator/canned/boosted_trees_utils.py
new file mode 100644
index 0000000000..85efc2304a
--- /dev/null
+++ b/tensorflow/python/estimator/canned/boosted_trees_utils.py
@@ -0,0 +1,80 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Debug and model explainability logic for boosted trees."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
+
+# For directional feature contributions.
+_DEBUG_PROTO_KEY = '_serialized_debug_outputs_proto'
+_BIAS_ID = 0
+
+
+def _parse_debug_proto_string(example_proto_serialized):
+  example_debug_outputs = boosted_trees_pb2.DebugOutput()
+  example_debug_outputs.ParseFromString(example_proto_serialized)
+  feature_ids = example_debug_outputs.feature_ids
+  logits_path = example_debug_outputs.logits_path
+  return feature_ids, logits_path
+
+
+def _compute_directional_feature_contributions(example_feature_ids,
+                                               example_logits_paths, activation,
+                                               num_bucketized_features):
+  """Directional feature contributions and bias, per example."""
+  # Initialize contributions to 0.
+  dfcs = {k: 0 for k in range(num_bucketized_features)}
+
+  # Traverse tree subtracting child prediction from parent prediction and
+  # associating change with feature id used to split.
+  predictions = np.array(activation(example_logits_paths))
+  delta_pred = predictions[_BIAS_ID + 1:] - predictions[:-1]
+  # Group by feature id, then sum delta_pred.
+  contribs = np.bincount(
+      example_feature_ids,
+      weights=delta_pred,
+      minlength=num_bucketized_features)
+  for f, dfc in zip(range(num_bucketized_features), contribs):
+    dfcs[f] = dfc
+  return predictions[_BIAS_ID], dfcs
+
+
+def _identity(logits):
+  return logits
+
+
+def _sigmoid(logits):
+  # TODO(crawles): Change to softmax once multiclass support is available.
+  return 1 / (1 + np.exp(-np.array(logits)))
+
+
+def _parse_explanations_from_prediction(serialized_debug_proto,
+                                        n_features,
+                                        classification=False):
+  """Parse serialized explanability proto, compute dfc, and return bias, dfc."""
+  feature_ids, logits_path = _parse_debug_proto_string(serialized_debug_proto)
+  if classification:
+    activation = _sigmoid
+  else:
+    activation = _identity
+  bias, dfcs = _compute_directional_feature_contributions(
+      feature_ids, logits_path, activation, n_features)
+  # TODO(crawles): Prediction path and leaf IDs.
+  return bias, dfcs
diff --git a/tensorflow/python/estimator/canned/boosted_trees_utils_test.py b/tensorflow/python/estimator/canned/boosted_trees_utils_test.py
new file mode 100644
index 0000000000..506d4ea6fb
--- /dev/null
+++ b/tensorflow/python/estimator/canned/boosted_trees_utils_test.py
@@ -0,0 +1,187 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests boosted_trees estimators and model_fn."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator.canned import boosted_trees_utils
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+
+class BoostedTreesDFCTest(test_util.TensorFlowTestCase):
+  """Test directional feature contributions (DFC) helper functions. """
+
+  def testDirectionalFeatureContributionsCompute(self):
+    """Tests logic to compute DFCs given feature ids and logits paths."""
+    num_bucketized_features = 3  # Includes one unused feature.
+    examples_feature_ids = ((2, 2, 0, 0), (2, 2, 0))
+    e1_feature_ids, e2_feature_ids = examples_feature_ids
+
+    # DFCs are computed by traversing the prediction path and subtracting each
+    # child prediction from its parent prediction and associating the change in
+    # prediction with the respective feature id used for the split.
+    # For each activation function, f, (currently identity or sigmoid), DFCs are
+    # calculated for the two examples as:
+    # example 1:
+    #   feature_0 = (f(1.114) - f(1.214)) + (f(6.114) - f(1.114))
+    #   feature_1 = 0  # Feature not in ensemble, thus zero contrib.
+    #   feature_2 = (f(0.114) - bias_pred) + (f(1.214) - f(0.114))
+    # example 2:
+    #   feature_0 = f(-5.486) - f(1.514)
+    #   feature_1 = 0  # Feature not in ensemble, thus zero contrib.
+    #   feature_2 = (f(0.114) - bias_pred) + (f(1.514) - f(0.114))
+    # where bias_pred is = f(0) or f(0.21), with center_bias = {True, False},
+    # respectively.
+    # Keys are center_bias.
+    expected_dfcs_identity = {
+        False: ({
+            0: 4.9,
+            1: 0,
+            2: 1.214
+        }, {
+            0: -7.0,
+            1: 0,
+            2: 1.514
+        }),
+        True: ({
+            0: 4.9,
+            1: 0,
+            2: 1.0039999999999998
+        }, {
+            0: -7.0,
+            1: 0,
+            2: 1.3039999999999998
+        })
+    }
+    expected_dfcs_sigmoid = {
+        False: ({
+            0: 0.22678725678805578,
+            1: 0,
+            2: 0.2710059376234506
+        }, {
+            0: -0.81552596670046507,
+            1: 0,
+            2: 0.319653250251275
+        }),
+        True: ({
+            0: 0.22678725678805578,
+            1: 0,
+            2: 0.2186980280491253
+        }, {
+            0: -0.81552596670046507,
+            1: 0,
+            2: 0.26734534067694971
+        })
+    }
+    # pylint: disable=protected-access
+    for f, expected_dfcs in zip(
+        (boosted_trees_utils._identity, boosted_trees_utils._sigmoid),
+        (expected_dfcs_identity, expected_dfcs_sigmoid)):
+      for center_bias in [False, True]:
+        # If not center_bias, the bias after activation is 0.
+        if center_bias:
+          bias_logit = 0.21  # Root node of tree_0.
+        else:
+          bias_logit = 0  # 0 is default value when there is no original_leaf.
+        f_bias = f(bias_logit)
+
+        # Logits before and after, as is outputed from
+        # boosted_trees_ops.example_debug_outputs
+        examples_logits_paths = ((bias_logit, 0.114, 1.214, 1.114, 6.114),
+                                 (bias_logit, 0.114, 1.514, -5.486))
+        e1_logits_path, e2_logits_path = examples_logits_paths
+        e1_expected_dfcs, e2_expected_dfcs = expected_dfcs[center_bias]
+        # Check feature contributions are correct for both examples.
+        # Example 1.
+        # pylint:disable=line-too-long
+        e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+            e1_feature_ids, e1_logits_path, f, num_bucketized_features)
+        self.assertAllClose(e1_bias, f_bias)
+        self.assertAllClose(e1_dfc, e1_expected_dfcs)
+        # Example 2.
+        e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+            e2_feature_ids, e2_logits_path, f, num_bucketized_features)
+        # pylint:enable=line-too-long
+        self.assertAllClose(e2_bias, f_bias)
+        self.assertAllClose(e2_dfc, e2_expected_dfcs)
+        # Check if contributions sum to final prediction.
+        # For each tree, get leaf of last tree.
+        expected_logits = (e1_logits_path[-1], e2_logits_path[-1])
+        # Predictions should be the sum of contributions + bias.
+        expected_preds = [f(logit) for logit in expected_logits]
+        e1_pred = e1_bias + sum(e1_dfc.values())
+        e2_pred = e2_bias + sum(e2_dfc.values())
+        preds = [e1_pred, e2_pred]
+        self.assertAllClose(preds, expected_preds)
+    # pylint: enable=protected-access
+
+  def testDFCComputeComparedToExternalExample(self):
+    """Tests `compute_dfc` compared to external example (regression).
+
+    Example from http://blog.datadive.net/interpreting-random-forests.
+    """
+    # DIS:3, RM: 2, LSTAT:1, NOX:0
+    num_bucketized_features = 4
+    e1_feature_ids = (2, 1, 0)
+    e2_feature_ids = (2, 2, 2)
+    e3_feature_ids = (2, 2, 0)
+
+    bias_logit = 22.60  # Root node of tree_0.
+    activation = boosted_trees_utils._identity
+    f_bias = activation(bias_logit)
+    # Logits before and after, as is outputed from
+    # boosted_trees_ops.example_debug_outputs
+    e1_logits_path = (bias_logit, 19.96, 14.91, 18.11)
+    e2_logits_path = (bias_logit, 37.42, 45.10, 45.90)
+    e3_logits_path = (bias_logit, 37.42, 32.30, 33.58)
+    e1_expected_dfcs = {0: 3.20, 1: -5.05, 2: -2.64, 3: 0}
+    e2_expected_dfcs = {0: 0, 1: 0, 2: 23.3, 3: 0}
+    e3_expected_dfcs = {0: 1.28, 1: 0, 2: 9.7, 3: 0}
+    # Check feature contributions are correct for both examples.
+    # Example 1.
+    # pylint: disable=protected-access
+    # pylint: disable=line-too-long
+    e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+        e1_feature_ids, e1_logits_path, activation, num_bucketized_features)
+    self.assertAllClose(e1_bias, f_bias)
+    self.assertAllClose(e1_dfc, e1_expected_dfcs)
+    # Example 2.
+    e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+        e2_feature_ids, e2_logits_path, activation, num_bucketized_features)
+    self.assertAllClose(e2_bias, f_bias)
+    self.assertAllClose(e2_dfc, e2_expected_dfcs)
+    # Example 3.
+    e3_bias, e3_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+        e3_feature_ids, e3_logits_path, activation, num_bucketized_features)
+    # pylint: enable=line-too-long
+    self.assertAllClose(e3_bias, f_bias)
+    self.assertAllClose(e3_dfc, e3_expected_dfcs)
+    # pylint: enable=protected-access
+    # Check if contributions sum to final prediction.
+    # For each tree, get leaf of last tree.
+    expected_logits = (18.11, 45.90, 33.58)
+    # Predictions should be the sum of contributions + bias.
+    expected_preds = [activation(logit) for logit in expected_logits]
+    e1_pred = e1_bias + sum(e1_dfc.values())
+    e2_pred = e2_bias + sum(e2_dfc.values())
+    e3_pred = e3_bias + sum(e3_dfc.values())
+    preds = [e1_pred, e2_pred, e3_pred]
+    self.assertAllClose(preds, expected_preds)
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
index dee96102fb..3b28d44cf8 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
@@ -928,6 +928,163 @@ class PredictionOpsTest(test_util.TensorFlowTestCase):
 class FeatureContribsOpsTest(test_util.TensorFlowTestCase):
   """Tests feature contribs ops for model understanding."""
 
+  def testContribsForOnlyABiasNode(self):
+    """Tests case when, after training, only left with a bias node.
+
+    For example, this could happen if the final ensemble contains one tree that
+    got pruned up to the root.
+    """
+    with self.test_session() as session:
+      tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
+      text_format.Merge(
+          """
+        trees {
+          nodes {
+            leaf {
+              scalar: 1.72
+            }
+          }
+        }
+        tree_weights: 0.1
+        tree_metadata: {
+          num_layers_grown: 0
+        }
+      """, tree_ensemble_config)
+
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto=tree_ensemble_config.SerializeToString())
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      # All features are unused.
+      feature_0_values = [36, 32]
+      feature_1_values = [13, -29]
+      feature_2_values = [11, 27]
+
+      # Expected logits are computed by traversing the logit path and
+      # subtracting child logits from parent logits.
+      bias = 1.72 * 0.1  # Root node of tree_0.
+      expected_feature_ids = ((), ())
+      expected_logits_paths = ((bias,), (bias,))
+
+      bucketized_features = [
+          feature_0_values, feature_1_values, feature_2_values
+      ]
+
+      debug_op = boosted_trees_ops.example_debug_outputs(
+          tree_ensemble_handle,
+          bucketized_features=bucketized_features,
+          logits_dimension=1)
+
+      serialized_examples_debug_outputs = session.run(debug_op)
+      feature_ids = []
+      logits_paths = []
+      for example in serialized_examples_debug_outputs:
+        example_debug_outputs = boosted_trees_pb2.DebugOutput()
+        example_debug_outputs.ParseFromString(example)
+        feature_ids.append(example_debug_outputs.feature_ids)
+        logits_paths.append(example_debug_outputs.logits_path)
+
+      self.assertAllClose(feature_ids, expected_feature_ids)
+      self.assertAllClose(logits_paths, expected_logits_paths)
+
+  def testContribsMultipleTreeWhenFirstTreeIsABiasNode(self):
+    """Tests case when, after training, first tree contains only a bias node."""
+    with self.test_session() as session:
+      tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
+      text_format.Merge(
+          """
+        trees {
+          nodes {
+            leaf {
+              scalar: 1.72
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 26
+              left_id: 1
+              right_id: 2
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 50
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              original_leaf: {scalar: 5.5}
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 7.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 6.0
+            }
+          }
+        }
+        tree_weights: 1.
+        tree_weights: 0.1
+        tree_metadata: {
+          num_layers_grown: 0
+        }
+        tree_metadata: {
+          num_layers_grown: 1
+        }
+      """, tree_ensemble_config)
+
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto=tree_ensemble_config.SerializeToString())
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      feature_0_values = [36, 32]
+      feature_1_values = [13, -29]  # Unused feature.
+      feature_2_values = [11, 27]
+
+      # Expected logits are computed by traversing the logit path and
+      # subtracting child logits from parent logits.
+      expected_feature_ids = ((2, 0), (2,))
+      # bias = 1.72 * 1.  # Root node of tree_0.
+      # example_0 :  (bias, 0.1 * 5.5 + bias, 0.1 * 5. + bias)
+      # example_1 :  (bias, 0.1 * 7. + bias )
+      expected_logits_paths = ((1.72, 2.27, 2.22), (1.72, 2.42))
+
+      bucketized_features = [
+          feature_0_values, feature_1_values, feature_2_values
+      ]
+
+      debug_op = boosted_trees_ops.example_debug_outputs(
+          tree_ensemble_handle,
+          bucketized_features=bucketized_features,
+          logits_dimension=1)
+
+      serialized_examples_debug_outputs = session.run(debug_op)
+      feature_ids = []
+      logits_paths = []
+      for example in serialized_examples_debug_outputs:
+        example_debug_outputs = boosted_trees_pb2.DebugOutput()
+        example_debug_outputs.ParseFromString(example)
+        feature_ids.append(example_debug_outputs.feature_ids)
+        logits_paths.append(example_debug_outputs.logits_path)
+
+      self.assertAllClose(feature_ids, expected_feature_ids)
+      self.assertAllClose(logits_paths, expected_logits_paths)
+
   def testContribsMultipleTree(self):
     """Tests that the contribs work when we have multiple trees."""
     with self.cached_session() as session:
@@ -1018,11 +1175,14 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase):
         tree_weights: 0.2
         tree_weights: 1.0
         tree_metadata: {
-          num_layers_grown: 1}
+          num_layers_grown: 1
+        }
         tree_metadata: {
-          num_layers_grown: 2}
+          num_layers_grown: 2
+        }
         tree_metadata: {
-          num_layers_grown: 1}
+          num_layers_grown: 1
+        }
       """, tree_ensemble_config)
 
       tree_ensemble = boosted_trees_ops.TreeEnsemble(
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index 7027e78df4..9e429a32a5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_predict_with_explanations"
+    argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index d8167ea7cb..56af1d137c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_predict_with_explanations"
+    argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index 7027e78df4..9e429a32a5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_predict_with_explanations"
+    argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index d8167ea7cb..56af1d137c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,6 +1,7 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
   is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
   is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
@@ -31,6 +32,10 @@ tf_class {
     name: "evaluate"
     argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "experimental_predict_with_explanations"
+    argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "export_saved_model"
     argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
-- 
GitLab


From 1b4999df0c2ef3c8c7d771415924fb58a5476c6a Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Wed, 19 Sep 2018 11:21:52 -0700
Subject: [PATCH 0387/1357] Add a space to the error message.

PiperOrigin-RevId: 213661062
---
 tensorflow/python/keras/engine/training_eager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py
index 939a7f2356..fb71bf2596 100644
--- a/tensorflow/python/keras/engine/training_eager.py
+++ b/tensorflow/python/keras/engine/training_eager.py
@@ -186,7 +186,7 @@ def iterator_fit_loop(model,
   # make sure either x,y or x,y,sample_weights is provided
   if (not isinstance(inputs.output_shapes, (list, tuple)) or
       len(inputs.output_shapes) not in (2, 3)):
-    raise ValueError('Please provide either inputs and targets'
+    raise ValueError('Please provide either inputs and targets '
                      'or inputs, targets, and sample_weights')
 
   for step_index in range(steps_per_epoch):
-- 
GitLab


From ebe769f166c35c16637cb919ea3ddd096e04befa Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Wed, 19 Sep 2018 11:43:04 -0700
Subject: [PATCH 0388/1357] Re-enable flaky keras_test

PiperOrigin-RevId: 213665390
---
 tensorflow/contrib/distribute/python/BUILD    |  3 ---
 .../contrib/distribute/python/keras_test.py   | 22 ++++++++++++++-----
 tensorflow/python/keras/engine/training.py    |  6 +++--
 .../keras/engine/training_distributed.py      |  6 +++--
 4 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index ebea512c04..48a7593ab4 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -728,12 +728,9 @@ cuda_py_test(
         ":keras_test_lib",
     ],
     tags = [
-        "manual",
         "multi_and_single_gpu",
-        "no_gpu",
         "no_pip",
         "no_windows_gpu",
-        "notap",
         "notsan",
     ],
 )
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 5f35e38189..8165a70743 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -732,14 +732,22 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase,
     with self.cached_session():
       keras.backend.set_image_data_format('channels_last')
       num_samples = 10000
+
+      # Train and predict datasets are created with the same input numpy arrays.
       x_train = np.random.rand(num_samples, 1)
       y_train = 3 * x_train
       x_train = x_train.astype('float32')
       y_train = y_train.astype('float32')
 
+      # The model is built once and the initial weights are saved.
+      # This is used to initialize the model for both the distribution and
+      # non-distribution run.
+      model = keras.Sequential()
+      model.add(keras.layers.Dense(1, input_shape=(1,)))
+      initial_weights = model.get_weights()
+
       def fit_and_predict(with_distribution=None):
-        model = keras.Sequential()
-        model.add(keras.layers.Dense(1, input_shape=(1,)))
+        model.set_weights(initial_weights)
         model.compile(
             loss=keras.losses.mean_squared_error,
             optimizer=gradient_descent.GradientDescentOptimizer(0.5),
@@ -751,12 +759,14 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase,
         train_dataset = dataset_ops.Dataset.from_tensor_slices((x_train,
                                                                 y_train))
         train_dataset = batch_wrapper(train_dataset, batch_size, distribution)
-        # Running only 100 steps instead of the full dataset to keep test
-        # duration small.
-        model.fit(x=train_dataset, epochs=1, steps_per_epoch=100)
+        # We have initialized the model to the same weight for the distribution
+        # and non-distribution run. If you want to initialize the model to
+        # random weights for each run, you need to run the model through the
+        # entire dataset at least once to ensure that the weights converge to
+        # the same value.
+        model.fit(x=train_dataset, epochs=1, steps_per_epoch=10)
 
         weights = model.get_weights()
-
         x_predict = [[1.], [2.], [3.], [4.]]
         predict_batch_size = 4
         if with_distribution:
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index dc464c02b6..7df72d45b4 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -422,8 +422,9 @@ class Model(Network):
 
     # Set DistributionStrategy specific parameters.
     self._distribution_strategy = distribute
+    # Reset the value of grouped_model
+    self._grouped_model = None
     if self._distribution_strategy is not None:
-      self._grouped_model = None
       distributed_training_utils.configure_and_create_session(
           self._distribution_strategy)
     if not self.built:
@@ -445,7 +446,8 @@ class Model(Network):
       for name in self.output_names:
         if name not in loss:
           logging.warning(
-              'Output "' + name + '" missing from loss dictionary. We assume '
+              'Output "' + name +
+              '" missing from loss dictionary. We assume '
               'this was done on purpose. The fit and evaluate APIs will not be '
               'expecting any data to be passed to "' + name + '".')
         loss_functions.append(losses.get(loss.get(name)))
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 53291c3956..d133595793 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import errors
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import callbacks as cbks
@@ -742,8 +743,9 @@ def _experimental_predict_loop(model, iterator, verbose=0, steps=None):
   for name, tensor in zip(model.output_names, model.outputs):
     # TODO(priyag): This is a workaround as we do not know the batch dimension
     # of the model's output at this point.
-    tensor.shape.dims = [batch_dimension] + tensor.shape.dims[1:]
-    initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype)
+    shape = tensor_shape.TensorShape(tensor.shape.dims)
+    shape.dims = [batch_dimension] + shape.dims[1:]
+    initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)
 
   with current_strategy.scope():
     # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
-- 
GitLab


From 1593b4a1508a57694c3263d045d55334cab8cfcd Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Wed, 19 Sep 2018 11:50:33 -0700
Subject: [PATCH 0389/1357] Change type to uint64_t for Align()

---
 .../tensorrt/resources/trt_allocator.cc       | 10 ++++----
 .../tensorrt/resources/trt_allocator.h        |  2 +-
 .../tensorrt/resources/trt_allocator_test.cc  | 25 ++++++++++---------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index f6cf72e07f..f6b4b4dcab 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -27,12 +27,12 @@ namespace tensorflow {
 namespace tensorrt {
 
 // std::align is not supported, so this method mimic its behavior.
-void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space) {
-  QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
+void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space) {
+  QCHECK_GT(alignment, 0ul) << "alignment must be greater than 0.";
   QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
-  QCHECK_GT(size, 0) << "size must be greater than 0.";
+  QCHECK_GT(size, 0ul) << "size must be greater than 0.";
   QCHECK(ptr) << "ptr must not be nullptr.";
-  QCHECK_GT(space, 0) << "space must be greater than 0.";
+  QCHECK_GT(space, 0ul) << "space must be greater than 0.";
   const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
   QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
 
@@ -73,7 +73,7 @@ void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
   // See issue #20856
   alignment = 512;
   assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
-  int64_t total_size = size + alignment;
+  uint64_t total_size = size + alignment;
   // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce
   // unexpected result when TRT tries to allocate more bytes than size_t can
   // carry. Fix this.
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
index 0be3c4fd07..dc9862b16c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -29,7 +29,7 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 // std::align is not supported, so this function mimic its behavior.
-void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space);
+void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space);
 }  // namespace tensorrt
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index 06bfb3269c..ad6b1d7d4c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -20,11 +20,11 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 
-bool RunTest(const int64_t alignment, const int64_t size,
-             const intptr_t orig_ptr_val, const int64_t orig_space) {
+bool RunTest(const uint64_t alignment, const uint64_t size,
+             const intptr_t orig_ptr_val, const uint64_t orig_space) {
   void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
   void* ptr = orig_ptr;
-  int64_t space = orig_space;
+  uint64_t space = orig_space;
   void* result = Align(alignment, size, ptr, space);
   if (result == nullptr) {
     EXPECT_EQ(orig_ptr, ptr);
@@ -43,25 +43,26 @@ bool RunTest(const int64_t alignment, const int64_t size,
 }
 
 TEST(TRTAllocatorTest, Align) {
-  for (const int64_t space : {1l, 2l, 3l, 4l, 7l, 8l, 9l, 10l, 16l, 32l, 511l,
-                              512l, 513l, 700l, 12345l, 1l << 32}) {
-    for (int64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+  for (const uint64_t space :
+       {1ul, 2ul, 3ul, 4ul, 7ul, 8ul, 9ul, 10ul, 16ul, 32ul, 511ul, 512ul,
+        513ul, 700ul, 12345ul, 1ul << 32}) {
+    for (uint64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
       for (const uintptr_t ptr_val :
-           {1l, alignment == 1 ? 1l : alignment - 1, alignment, alignment + 1,
+           {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
             alignment + (alignment / 2)}) {
         if (ptr_val % alignment == 0) {
-          for (const int64_t size :
-               {1l, space == 1 ? 1l : space - 1, space, space + 1}) {
+          for (const uint64_t size :
+               {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
             EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
           }
         } else {
           EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
-          const int64_t diff = alignment - ptr_val % alignment;
+          const uint64_t diff = alignment - ptr_val % alignment;
           if (space > diff) {
             EXPECT_TRUE(
                 RunTest(alignment, space - diff, ptr_val + diff, space - diff));
-            for (const int64_t size :
-                 {1l, space - diff > 1 ? space - diff - 1 : 1l, space - diff,
+            for (const uint64_t size :
+                 {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
                   space - diff + 1, space - 1}) {
               EXPECT_EQ(space - diff >= size,
                         RunTest(alignment, size, ptr_val, space));
-- 
GitLab


From 43908cc8e3cba7fad329e72598107fe5bfff5a88 Mon Sep 17 00:00:00 2001
From: HyoukJoong Lee <hyouklee@google.com>
Date: Wed, 19 Sep 2018 11:53:27 -0700
Subject: [PATCH 0390/1357] Remove non-determinism in model-parallel
 compilation

PiperOrigin-RevId: 213667385
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../xla/service/hlo_module_group_metadata.cc  | 20 ++++--
 .../xla/service/hlo_module_group_metadata.h   |  9 ++-
 .../xla/service/hlo_module_group_test.cc      | 64 +++++++++++++++++++
 4 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 4c3208a242..4b183b4350 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1169,6 +1169,7 @@ tf_cc_test(
         ":hlo",
         ":hlo_matchers",
         ":hlo_module_group",
+        ":hlo_module_group_metadata",
         ":hlo_parser",
         ":hlo_proto",
         "//tensorflow/compiler/xla:test",
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
index 9c01862a4b..83352ef91b 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
@@ -392,22 +392,28 @@ Status HloModuleGroupMetadata::AddCompanion(HloInstruction* instruction1,
   if (!ContainsKey(companion_set_index_, instruction1) &&
       !ContainsKey(companion_set_index_, instruction2)) {
     companion_sets_.push_back(
-        absl::make_unique<std::unordered_set<HloInstruction*>>());
+        absl::make_unique<std::vector<HloInstruction*>>());
     auto companion_set = companion_sets_.back().get();
-    companion_set->insert(instruction1);
-    companion_set->insert(instruction2);
+    companion_set->push_back(instruction1);
+    companion_set->push_back(instruction2);
     companion_set_index_[instruction1] = companion_sets_.size() - 1;
     companion_set_index_[instruction2] = companion_sets_.size() - 1;
   } else if (!ContainsKey(companion_set_index_, instruction1)) {
-    companion_sets_[companion_set_index_[instruction2]]->insert(instruction1);
+    companion_sets_[companion_set_index_[instruction2]]->push_back(
+        instruction1);
     companion_set_index_[instruction1] = companion_set_index_[instruction2];
   } else if (!ContainsKey(companion_set_index_, instruction2)) {
-    companion_sets_[companion_set_index_[instruction1]]->insert(instruction2);
+    companion_sets_[companion_set_index_[instruction1]]->push_back(
+        instruction2);
     companion_set_index_[instruction2] = companion_set_index_[instruction1];
   } else if (companion_set_index_[instruction1] !=
              companion_set_index_[instruction2]) {
-    companion_sets_[companion_set_index_[instruction1]]->insert(
-        Companions(instruction2).begin(), Companions(instruction2).end());
+    // At any point while building the companion sets, each instruction belongs
+    // to at most 1 companion set, so the union of two companion sets is
+    // concatenating two disjoint sets.
+    absl::c_copy(Companions(instruction2),
+                 std::back_inserter(
+                     *companion_sets_[companion_set_index_[instruction1]]));
     int64 index_to_remove = companion_set_index_[instruction2];
     for (HloInstruction* hlo : Companions(instruction2)) {
       companion_set_index_[hlo] = companion_set_index_[instruction1];
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
index 768b0c7eb3..278d94cdd3 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
@@ -169,14 +169,14 @@ class HloModuleGroupMetadata {
   // Returns the companion instructions for the given instruction.
   //
   // Precondition: IsCompanionWhile(instruction) is true.
-  const std::unordered_set<HloInstruction*>& Companions(
+  const std::vector<HloInstruction*>& Companions(
       const HloInstruction* instruction) const {
     CHECK_EQ(companion_set_index_.count(instruction), 1);
     return companion_set(companion_set_index_.at(instruction));
   }
 
   // Returns the companion set at the given index.
-  const std::unordered_set<HloInstruction*>& companion_set(int64 index) const {
+  const std::vector<HloInstruction*>& companion_set(int64 index) const {
     CHECK_LT(index, companion_sets_.size());
     return *companion_sets_[index];
   }
@@ -187,7 +187,7 @@ class HloModuleGroupMetadata {
   }
 
   // Returns the list of all companion sets in the HLO module group.
-  const std::vector<std::unique_ptr<std::unordered_set<HloInstruction*>>>&
+  const std::vector<std::unique_ptr<std::vector<HloInstruction*>>>&
   companion_sets() const {
     return companion_sets_;
   }
@@ -247,8 +247,7 @@ class HloModuleGroupMetadata {
   void DumpCollectedStats() const;
 
   // List of all companion instructions sets in the module.
-  std::vector<std::unique_ptr<std::unordered_set<HloInstruction*>>>
-      companion_sets_;
+  std::vector<std::unique_ptr<std::vector<HloInstruction*>>> companion_sets_;
 
   // Map from each companion while instruction to the index into companion_set_.
   tensorflow::gtl::FlatMap<const HloInstruction*, int64> companion_set_index_;
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_test.cc b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
index ebf790ba6f..b7b12cb72b 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_module_group_metadata.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
@@ -137,6 +138,69 @@ ENTRY %entry (a: f32[]) -> f32[] {
               ::testing::ElementsAre(op::Parameter()));
 }
 
+// Tests that the order of companion instructions in the companion set doesn't
+// change across runs.
+TEST_F(HloModuleGroupTest, ModuleGroupCompanionOrder) {
+  // A simple while loop template for core i sending to core i+1.
+  constexpr char text[] = R"(
+HloModule module_%d
+
+while_cond {
+  ROOT p = pred[] constant(true)
+}
+
+while_body {
+  param = s32[] parameter(0)
+  token.s = token[] after-all()
+  token.r = token[] after-all()
+  send = (s32[], u32[], token[]) send(param, token.s), channel_id=%d
+  send-done = token[] send-done(send), channel_id=%d
+  recv = (s32[], u32[], token[]) recv(token.r), channel_id=%d
+  ROOT recv-done = (s32[], token[]) recv-done(recv), channel_id=%d
+}
+
+ENTRY entry {
+  while_init = s32[] constant(1)
+  ROOT while = s32[] while(while_init), condition=while_cond, body=while_body
+}
+)";
+
+  // Try creating the module and the metadata kTrialCount times and check the
+  // companion instructions remain in the same order.
+  const int64 kTrialCount = 5;
+  const int64 kDeviceCount = 10;
+  std::vector<int64> companion_order;
+
+  for (int64 t = 0; t < kTrialCount; ++t) {
+    HloModuleGroup group(TestName());
+    for (int64 i = 0; i < kDeviceCount; ++i) {
+      const int64 send_channel = i;
+      const int64 recv_channel = i == 0 ? kDeviceCount - 1 : i - 1;
+      TF_ASSERT_OK_AND_ASSIGN(
+          std::unique_ptr<HloModule> module,
+          ParseHloString(absl::StrFormat(text, i, send_channel, send_channel,
+                                         recv_channel, recv_channel)));
+      group.push_back(std::move(module));
+    }
+    ASSERT_EQ(group.modules().size(), kDeviceCount);
+
+    TF_ASSERT_OK_AND_ASSIGN(auto metadata,
+                            HloModuleGroupMetadata::Build(group.modules()));
+    ASSERT_EQ(metadata->companion_sets().size(), 1);
+
+    std::vector<int64> module_ids;
+    for (HloInstruction* companion : *metadata->companion_sets()[0]) {
+      module_ids.push_back(metadata->GetModuleId(companion->GetModule()));
+    }
+
+    if (t == 0) {
+      companion_order = module_ids;
+    } else {
+      EXPECT_TRUE(absl::c_equal(companion_order, module_ids));
+    }
+  }
+}
+
 }  // namespace
 
 }  // namespace xla
-- 
GitLab


From a73942c6368eebc2d62096fb39dd397893d3f1b9 Mon Sep 17 00:00:00 2001
From: Shujian2015 <Shujian2015@users.noreply.github.com>
Date: Wed, 19 Sep 2018 15:04:49 -0400
Subject: [PATCH 0391/1357] Fixed broken links

---
 tensorflow/contrib/eager/README.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md
index 86d203452e..4bd2769e87 100644
--- a/tensorflow/contrib/eager/README.md
+++ b/tensorflow/contrib/eager/README.md
@@ -44,7 +44,6 @@ Installation instructions at https://www.tensorflow.org/install/
 
 For an introduction to eager execution in TensorFlow, see:
 
-- [User Guide](https://www.tensorflow.org/guide/eager) ([source](../../docs_src/guide/eager.md))
-- Notebook: [Basic Usage](python/examples/notebooks/1_basics.ipynb)
-- Notebook: [Gradients](python/examples/notebooks/2_gradients.ipynb)
-- Notebook: [Importing Data](python/examples/notebooks/3_datasets.ipynb)
+- [User Guide](https://www.tensorflow.org/guide/eager) ([source](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/eager/index.md))
+- Notebook: [Basic Usage](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/eager/eager_basics.ipynb)
+- Notebook: [Automatic differentiation and gradient tape](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb)
-- 
GitLab


From c8b376ac1e7fdb0297079c298eba939aeb252354 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 19 Sep 2018 19:06:34 +0000
Subject: [PATCH 0392/1357] Update to apply the clang format changes returned 
 by `Experimental clang-format Check`

The following chanes has been applied to align with `Experimental clang-format Check` test:
```
diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
index e195cca..b9967fe 100644
--- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
+++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
@@ -136,8 +136,8 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync(

   MPIRendezvousMgr* mgr =
       reinterpret_cast<MPIRendezvousMgr*>(this->rendezvous_mgr_);
-  mgr->QueueRequest(string(parsed.FullKey()), step_id_,
-                    std::move(request_call), rendezvous_call);
+  mgr->QueueRequest(string(parsed.FullKey()), step_id_, std::move(request_call),
+                    rendezvous_call);
 }

 MPIRemoteRendezvous::~MPIRemoteRendezvous() {}
```

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
index e195cca647..b9967fe76d 100644
--- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
+++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
@@ -136,8 +136,8 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync(
 
   MPIRendezvousMgr* mgr =
       reinterpret_cast<MPIRendezvousMgr*>(this->rendezvous_mgr_);
-  mgr->QueueRequest(string(parsed.FullKey()), step_id_,
-                    std::move(request_call), rendezvous_call);
+  mgr->QueueRequest(string(parsed.FullKey()), step_id_, std::move(request_call),
+                    rendezvous_call);
 }
 
 MPIRemoteRendezvous::~MPIRemoteRendezvous() {}
-- 
GitLab


From 238424ffcf04c38561ed48ebadb16b3b3a770e2e Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Wed, 19 Sep 2018 12:03:27 -0700
Subject: [PATCH 0393/1357] [XLA:TF] Re-disable testRandomUniformIsInRange

The bug is still there and makes this test flakily fail with fp16.

PiperOrigin-RevId: 213669453
---
 tensorflow/compiler/tests/random_ops_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py
index c423fa5004..36ef6ed5fe 100644
--- a/tensorflow/compiler/tests/random_ops_test.py
+++ b/tensorflow/compiler/tests/random_ops_test.py
@@ -76,7 +76,8 @@ class RandomOpsTest(xla_test.XLATestCase):
     for dtype in self._random_types():
       # TODO (b/112272078): enable bfloat16 for CPU and GPU when the bug is
       # fixed.
-      if (self.device in ["XLA_GPU", "XLA_CPU"]) and (dtype == dtypes.bfloat16):
+      if (self.device in ["XLA_GPU", "XLA_CPU"
+                         ]) and (dtype in [dtypes.bfloat16, dtypes.half]):
         continue
       with self.cached_session() as sess:
         with self.test_scope():
-- 
GitLab


From c8e17b08263f3ba61a5fdf785e231e1c9f4029ca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 12:26:37 -0700
Subject: [PATCH 0394/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 213673402
---
 .../internal/optimized/optimized_ops.h        | 60 +++++++++------
 .../internal/reference/reference_ops.h        | 74 +++++++++++++------
 2 files changed, 90 insertions(+), 44 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 6a7e664e85..1a2d45166a 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -3804,11 +3804,11 @@ inline void LstmCell(
     uint8* concat_temp_data_uint8,
     const RuntimeShape& unextended_activ_temp_shape,
     int16* activ_temp_data_int16, gemmlowp::GemmContext* gemm_context) {
+  gemmlowp::ScopedProfilingLabel label(
+      "LstmCell/quantized (8bit external, 16bit internal)");
   int32 weights_zero_point = params.weights_zero_point;
   int32 accum_multiplier = params.accum_multiplier;
   int accum_shift = params.accum_shift;
-  gemmlowp::ScopedProfilingLabel label(
-      "LstmCell/quantized (8bit external, 16bit internal)");
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
@@ -5063,8 +5063,7 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
   LogSoftmax(params, input_shape, input_data, output_shape, output_data);
 }
 
-inline void Logistic(const LogisticParams& params,
-                     const RuntimeShape& input_shape, const float* input_data,
+inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
                      const RuntimeShape& output_shape, float* output_data) {
   gemmlowp::ScopedProfilingLabel label("Logistic");
   auto input_map = MapAsVector(input_data, input_shape);
@@ -5073,13 +5072,13 @@ inline void Logistic(const LogisticParams& params,
       input_map.array().unaryExpr(Eigen::internal::scalar_sigmoid_op<float>());
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
-                     const RuntimeShape& output_shape, float* output_data) {
-  LogisticParams params;
-  // No params currently needed by float Logistic.
-  Logistic(params, input_shape, input_data, output_shape, output_data);
+// Convenience version that allows, for example, generated-code calls to be
+// uniform between data types.
+inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
+                     const float* input_data, const RuntimeShape& output_shape,
+                     float* output_data) {
+  // Drop params: not needed.
+  Logistic(input_shape, input_data, output_shape, output_data);
 }
 
 inline void Logistic(const LogisticParams& params,
@@ -5315,22 +5314,21 @@ inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
   Logistic(params, input_shape, input_data, output_shape, output_data);
 }
 
-inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
-                 const float* input_data, const RuntimeShape& output_shape,
-                 float* output_data) {
+inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
+                 const RuntimeShape& output_shape, float* output_data) {
   gemmlowp::ScopedProfilingLabel label("Tanh");
   auto input_map = MapAsVector(input_data, input_shape);
   auto output_map = MapAsVector(output_data, output_shape);
   output_map.array() = input_map.array().tanh();
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
-                 const RuntimeShape& output_shape, float* output_data) {
-  TanhParams params;
-  // Currently no params needed for float Tanh.
-  Tanh(params, input_shape, input_data, output_shape, output_data);
+// Convenience version that allows, for example, generated-code calls to be
+// uniform between data types.
+inline void Tanh(const TanhParams&, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& output_shape,
+                 float* output_data) {
+  // Drop params: not needed.
+  Tanh(input_shape, input_data, output_shape, output_data);
 }
 
 inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
@@ -6385,6 +6383,16 @@ void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
   output_map.array() = input1_map.array().min(min_value);
 }
 
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
+                    const RuntimeShape&, const T* input2_data,
+                    const RuntimeShape& output_shape, T* output_data) {
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
 template <typename T>
 void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
              const T* input2_data, const RuntimeShape& output_shape,
@@ -6396,6 +6404,16 @@ void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
   output_map.array() = input1_map.array().max(max_value);
 }
 
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
+                    const RuntimeShape&, const T* input2_data,
+                    const RuntimeShape& output_shape, T* output_data) {
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
 template <typename T>
 void TransposeIm2col(const ConvParams& params, uint8 zero_byte,
                      const RuntimeShape& input_shape, const T* input_data,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 76fa1944bc..bb1d30b216 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -1916,7 +1916,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
                                const float* input2_data,
                                const RuntimeShape& output_shape,
                                float* output_data) {
-  gemmlowp::ScopedProfilingLabel label("BroadcastAdd4DSlow/float");
+  gemmlowp::ScopedProfilingLabel label("BroadcastSub4DSlow/float");
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@@ -1957,7 +1957,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
                                const uint8* input2_data,
                                const RuntimeShape& output_shape,
                                uint8* output_data) {
-  gemmlowp::ScopedProfilingLabel label("BroadcastAdd4DSlow/uint8");
+  gemmlowp::ScopedProfilingLabel label("BroadcastSub4DSlow/uint8");
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@@ -2021,7 +2021,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
                                const int32* input2_data,
                                const RuntimeShape& output_shape,
                                int32* output_data) {
-  gemmlowp::ScopedProfilingLabel label("BroadcastAdd4DSlow/int32");
+  gemmlowp::ScopedProfilingLabel label("BroadcastSub4DSlow/int32");
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@@ -2061,7 +2061,7 @@ void BroadcastSub4DSlow(const ArithmeticParams& params,
                         const RuntimeShape& input1_shape, const T* input1_data,
                         const RuntimeShape& input2_shape, const T* input2_data,
                         const RuntimeShape& output_shape, T* output_data) {
-  gemmlowp::ScopedProfilingLabel label("BroadcastAdd4DSlow/templated");
+  gemmlowp::ScopedProfilingLabel label("BroadcastSub4DSlow/templated");
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@@ -3637,8 +3637,7 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
   LogSoftmax(params, input_shape, input_data, output_shape, output_data);
 }
 
-inline void Logistic(const LogisticParams& params,
-                     const RuntimeShape& input_shape, const float* input_data,
+inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
                      const RuntimeShape& output_shape, float* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
 
@@ -3649,13 +3648,13 @@ inline void Logistic(const LogisticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
-                     const RuntimeShape& output_shape, float* output_data) {
-  LogisticParams params;
-  // No params currently needed by float Logistic.
-  Logistic(params, input_shape, input_data, output_shape, output_data);
+// Convenience version that allows, for example, generated-code calls to be
+// uniform between data types.
+inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
+                     const float* input_data, const RuntimeShape& output_shape,
+                     float* output_data) {
+  // Drop params: not needed.
+  Logistic(input_shape, input_data, output_shape, output_data);
 }
 
 inline void Logistic(const LogisticParams& params,
@@ -3741,9 +3740,8 @@ inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
   Logistic(params, input_shape, input_data, output_shape, output_data);
 }
 
-inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
-                 const float* input_data, const RuntimeShape& output_shape,
-                 float* output_data) {
+inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
+                 const RuntimeShape& output_shape, float* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
 
   for (int i = 0; i < flat_size; i++) {
@@ -3753,13 +3751,13 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
-                 const RuntimeShape& output_shape, float* output_data) {
-  TanhParams params;
-  // Currently no params needed for float Tanh.
-  Tanh(params, input_shape, input_data, output_shape, output_data);
+// Convenience version that allows, for example, generated-code calls to be
+// uniform between data types.
+inline void Tanh(const TanhParams&, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& output_shape,
+                 float* output_data) {
+  // Drop params: not needed.
+  Tanh(input_shape, input_data, output_shape, output_data);
 }
 
 inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
@@ -4735,6 +4733,16 @@ void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
   }
 }
 
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
+                    const RuntimeShape&, const T* input2_data,
+                    const RuntimeShape& output_shape, T* output_data) {
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
 template <typename T>
 void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
              const T* input2_data, const RuntimeShape& output_shape,
@@ -4747,6 +4755,16 @@ void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
   }
 }
 
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
+                    const RuntimeShape&, const T* input2_data,
+                    const RuntimeShape& output_shape, T* output_data) {
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
 template <typename T, typename Op>
 void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape,
                                    const T* input1_data,
@@ -4822,6 +4840,16 @@ void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data,
             std::greater<T1>());
 }
 
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data,
+                   const RuntimeShape& input2_shape, const T3* input2_data,
+                   const RuntimeShape& output_shape, T2* output_data) {
+  // Drop shape of second input: not needed.
+  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
 template <typename T>
 void Transpose(const TransposeParams& params,
                const RuntimeShape& unextended_input_shape, const T* input_data,
-- 
GitLab


From 0b97c406413fbf71897b28461b35470f3f14fd7e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 12:27:45 -0700
Subject: [PATCH 0395/1357] Adds an experimental package group to allow Swift
 and ObjC targets to depend on the "c_api" target.

PiperOrigin-RevId: 213673549
---
 tensorflow/contrib/lite/experimental/c/BUILD | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow/contrib/lite/experimental/c/BUILD b/tensorflow/contrib/lite/experimental/c/BUILD
index 835fc2595e..52e71619de 100644
--- a/tensorflow/contrib/lite/experimental/c/BUILD
+++ b/tensorflow/contrib/lite/experimental/c/BUILD
@@ -1,5 +1,12 @@
 package(default_visibility = ["//visibility:private"])
 
+package_group(
+    name = "experimental",
+    packages = [
+        "//tensorflow/contrib/lite/experimental/...",
+    ],
+)
+
 licenses(["notice"])  # Apache 2.0
 
 load(
@@ -51,6 +58,9 @@ cc_library(
     srcs = ["c_api.cc"],
     hdrs = ["c_api.h"],
     copts = tflite_copts(),
+    visibility = [
+        ":experimental",
+    ],
     deps = [
         ":c_api_internal",
         "//tensorflow/contrib/lite:context",
-- 
GitLab


From 1e821cd9a02b59a90a8b983759cf74eded16265f Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 19 Sep 2018 11:06:40 -0700
Subject: [PATCH 0396/1357] Fix bug in metrics sparse_categorical_accuracy and
 sparse_top_k_categorical_accuracy

---
 tensorflow/python/keras/metrics.py      | 15 ++++++++------
 tensorflow/python/keras/metrics_test.py | 26 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index e64241e5cf..2fd3244800 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -635,7 +635,9 @@ def categorical_accuracy(y_true, y_pred):
 
 @tf_export('keras.metrics.sparse_categorical_accuracy')
 def sparse_categorical_accuracy(y_true, y_pred):
-  y_true = math_ops.reduce_max(y_true, axis=-1)
+  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
+  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
+    y_true = array_ops.squeeze(y_true, [-1])
   y_pred = math_ops.argmax(y_pred, axis=-1)
 
   # If the expected labels are float, we need to cast the int returned by
@@ -654,11 +656,12 @@ def top_k_categorical_accuracy(y_true, y_pred, k=5):
 
 @tf_export('keras.metrics.sparse_top_k_categorical_accuracy')
 def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
-  return K.mean(
-      nn.in_top_k(y_pred,
-                  math_ops.cast(math_ops.reduce_max(y_true, axis=-1), 'int32'),
-                  k),
-      axis=-1)
+  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
+  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
+    y_true = array_ops.squeeze(y_true, [-1])
+
+  return K.mean(nn.in_top_k(y_pred, math_ops.cast(y_true, 'int32'), k),
+                axis=-1)
 
 # Aliases
 
diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 4195ea18ad..43ac5b7ead 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -54,6 +54,18 @@ class KerasMetricsTest(test.TestCase):
       y_pred = K.variable(np.random.random((6, 7)))
       self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,))
 
+      # Test correctness if the shape of y_true is (num_samples,)
+      y_true = K.variable([1., 0., 0., 0.])
+      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
+      print(K.eval(metric(y_true, y_pred)))
+      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
+
+      # Test correctness if the shape of y_true is (num_samples, 1)
+      y_true = K.variable([[1.], [0.], [0.], [0.]])
+      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
+      print(K.eval(metric(y_true, y_pred)))
+      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
+
   def test_sparse_categorical_accuracy_float(self):
     with self.cached_session():
       metric = metrics.sparse_categorical_accuracy
@@ -79,6 +91,7 @@ class KerasMetricsTest(test.TestCase):
 
   def test_sparse_top_k_categorical_accuracy(self):
     with self.cached_session():
+      # Test correctness if the shape of y_true is (num_samples, 1)
       y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
       y_true = K.variable(np.array([[1], [0]]))
       result = K.eval(
@@ -91,6 +104,19 @@ class KerasMetricsTest(test.TestCase):
           metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
       self.assertEqual(result, 0.)
 
+      # Test correctness if the shape of y_true is (num_samples,)
+      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
+      y_true = K.variable(np.array([1, 0]))
+      result = K.eval(
+        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
+      self.assertEqual(result, 1)
+      result = K.eval(
+        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
+      self.assertEqual(result, 0.5)
+      result = K.eval(
+        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
+      self.assertEqual(result, 0.)
+
   def test_top_k_categorical_accuracy(self):
     with self.cached_session():
       y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
-- 
GitLab


From ff11877b101fe9c19021e8d7b43841031eb71cc3 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Wed, 19 Sep 2018 12:40:56 -0700
Subject: [PATCH 0397/1357] Simplify ir_emitter_unnested so that it doesn't
 take a look at conv custom call and try to understand what's inside.
 convolution_thunk does it anyway.

PiperOrigin-RevId: 213676051
---
 .../xla/service/gpu/convolution_thunk.cc      | 42 ++++++++++++++++---
 .../xla/service/gpu/convolution_thunk.h       | 25 +++--------
 .../xla/service/gpu/ir_emitter_unnested.cc    | 31 ++++----------
 3 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index 3a23ac1d63..85f3682a5a 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -29,21 +29,51 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-using se::dnn::AlgorithmDesc;
+ConvolutionThunk::ConvolutionThunk(
+    const HloCustomCallInstruction* cudnn_call,
+    std::vector<BufferAllocation::Slice> operand_slices,
+    BufferAllocation::Slice result_slice, BufferAllocation::Slice scratch_slice,
+    BufferAllocation::Slice tuple_result_slice)
+    : Thunk(Kind::kConvolution, cudnn_call),
+      cudnn_call_(cudnn_call),
+      operand_buffers_(std::move(operand_slices)),
+      result_buffer_(result_slice),
+      scratch_buffer_(scratch_slice),
+      tuple_result_buffer_(tuple_result_slice) {}
 
 Status ConvolutionThunk::ExecuteOnStream(
     const BufferAllocations& buffer_allocations, se::Stream* stream,
     HloExecutionProfiler* profiler) {
   CudnnConvParams params;
+  TF_RETURN_IF_ERROR(PopulateCudnnConvParams(cudnn_call_, &params));
+
+  switch (params.kind) {
+    case CudnnConvKind::kForward:
+      params.input_buf =
+          buffer_allocations.GetDeviceAddress(operand_buffers_[0]);
+      params.filter_buf =
+          buffer_allocations.GetDeviceAddress(operand_buffers_[1]);
+      params.output_buf = buffer_allocations.GetDeviceAddress(result_buffer_);
+      break;
+    case CudnnConvKind::kBackwardInput:
+      params.input_buf = buffer_allocations.GetDeviceAddress(result_buffer_);
+      params.filter_buf =
+          buffer_allocations.GetDeviceAddress(operand_buffers_[1]);
+      params.output_buf =
+          buffer_allocations.GetDeviceAddress(operand_buffers_[0]);
+      break;
+    case CudnnConvKind::kBackwardFilter:
+      params.input_buf =
+          buffer_allocations.GetDeviceAddress(operand_buffers_[0]);
+      params.filter_buf = buffer_allocations.GetDeviceAddress(result_buffer_);
+      params.output_buf =
+          buffer_allocations.GetDeviceAddress(operand_buffers_[1]);
+      break;
+  }
 
-  params.input_buf = buffer_allocations.GetDeviceAddress(input_buffer_);
-  params.filter_buf = buffer_allocations.GetDeviceAddress(filter_buffer_);
-  params.output_buf = buffer_allocations.GetDeviceAddress(output_buffer_);
   se::DeviceMemoryBase scratch =
       buffer_allocations.GetDeviceAddress(scratch_buffer_);
 
-  TF_RETURN_IF_ERROR(PopulateCudnnConvParams(cudnn_call_, &params));
-
   auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction());
   TF_RETURN_IF_ERROR(RunCudnnConvolution(params, scratch, stream));
 
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
index d7d1f91fba..f53bc54198 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
@@ -42,24 +42,12 @@ class ConvolutionThunk : public Thunk {
   // Constructs a thunk for launching a DNN convolution.  When run, it will
   // write a tuple (result, scratch_memory) into `tuple_result_buffer`.
   //
-  // Note that "output" here doesn't refer to the output from running this
-  // thunk, but rather to the "output" of a hypothetical forward convolution
-  // that corresponds to this input+filter+output triple.  That is, the result
-  // generated by this thunk is "output" for forward convs, "input" for
-  // backward-input convs, and "filter" for backward-filter convs.
+  // operand_slices should be in the same order as cudnn_call->operands().
   ConvolutionThunk(const HloCustomCallInstruction* cudnn_call,
-                   BufferAllocation::Slice input_slice,
-                   BufferAllocation::Slice filter_slice,
-                   BufferAllocation::Slice output_slice,
+                   std::vector<BufferAllocation::Slice> operand_slices,
+                   BufferAllocation::Slice result_slice,
                    BufferAllocation::Slice scratch_slice,
-                   BufferAllocation::Slice tuple_result_slice)
-      : Thunk(Kind::kConvolution, cudnn_call),
-        cudnn_call_(cudnn_call),
-        input_buffer_(std::move(input_slice)),
-        filter_buffer_(std::move(filter_slice)),
-        output_buffer_(std::move(output_slice)),
-        scratch_buffer_(std::move(scratch_slice)),
-        tuple_result_buffer_(std::move(tuple_result_slice)) {}
+                   BufferAllocation::Slice tuple_result_slice);
 
   ConvolutionThunk(const ConvolutionThunk&) = delete;
   ConvolutionThunk& operator=(const ConvolutionThunk&) = delete;
@@ -71,9 +59,8 @@ class ConvolutionThunk : public Thunk {
 
  private:
   const HloCustomCallInstruction* cudnn_call_;
-  BufferAllocation::Slice input_buffer_;
-  BufferAllocation::Slice filter_buffer_;
-  BufferAllocation::Slice output_buffer_;
+  std::vector<BufferAllocation::Slice> operand_buffers_;
+  BufferAllocation::Slice result_buffer_;
   BufferAllocation::Slice scratch_buffer_;
   BufferAllocation::Slice tuple_result_buffer_;
 };
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index b669881026..c792dd2ddb 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -465,35 +465,18 @@ Status IrEmitterUnnested::HandleCustomCall(HloInstruction* custom_call) {
 
   if (IsCustomCallToDnnConvolution(*custom_call)) {
     const auto& assn = ir_emitter_context_->buffer_assignment();
-    auto lhs_slice = GetAllocationSlice(*custom_call->operand(0));
-    auto rhs_slice = GetAllocationSlice(*custom_call->operand(1));
+    std::vector<BufferAllocation::Slice> operand_slices;
+    operand_slices.reserve(custom_call->operand_count());
+    for (const auto* operand : custom_call->operands()) {
+      operand_slices.push_back(GetAllocationSlice(*operand));
+    }
     auto tuple_result_slice = GetAllocationSlice(*custom_call);
     auto conv_result_slice = assn.GetUniqueSlice(custom_call, {0}).ValueOrDie();
     auto scratch_slice = assn.GetUniqueSlice(custom_call, {1}).ValueOrDie();
 
-    const auto& target = custom_call->custom_call_target();
-    BufferAllocation::Slice input_slice, filter_slice, output_slice;
-
-    if (target == kCudnnConvForwardCallTarget) {
-      input_slice = lhs_slice;
-      filter_slice = rhs_slice;
-      output_slice = conv_result_slice;
-    } else if (target == kCudnnConvBackwardInputCallTarget) {
-      input_slice = conv_result_slice;
-      filter_slice = rhs_slice;
-      output_slice = lhs_slice;
-    } else if (target == kCudnnConvBackwardFilterCallTarget) {
-      input_slice = lhs_slice;
-      filter_slice = conv_result_slice;
-      output_slice = rhs_slice;
-    } else {
-      LOG(FATAL) << "Unexpected custom call target: "
-                 << custom_call->custom_call_target();
-    }
-
     thunk_sequence_->emplace_back(absl::make_unique<ConvolutionThunk>(
-        Cast<HloCustomCallInstruction>(custom_call), input_slice, filter_slice,
-        output_slice, scratch_slice, tuple_result_slice));
+        Cast<HloCustomCallInstruction>(custom_call), std::move(operand_slices),
+        conv_result_slice, scratch_slice, tuple_result_slice));
     return Status::OK();
   }
 
-- 
GitLab


From c95353498e180e50f701dcb8331b994d9e5fad0b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 12:55:02 -0700
Subject: [PATCH 0398/1357] Fixes in ResolveReorderAxes. The main issue is we
 were keeping the input array, updating it in place and discarding the output
 array. That was a problem when the input array had multiple consumer ops. Now
 we're keeping the output array instead, which is the correct thing to do.
 However, in order to minimize disruption, we keep using the input array's
 name whenever possible, by means of some array renamings.

PiperOrigin-RevId: 213678219
---
 .../resolve_reorder_axes.cc                   | 81 ++++++++++++-------
 tensorflow/contrib/lite/toco/model.h          |  1 +
 2 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
index 8266e2c205..8e150db6fa 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
@@ -25,29 +25,57 @@ limitations under the License.
 
 namespace toco {
 
+namespace {
+
+void RenameArray(Model* model, const string& oldname,
+                 const string& desired_newname) {
+  const string& newname = AvailableArrayName(*model, desired_newname);
+  auto& arrays = model->GetMutableArrayMap();
+  arrays[newname] = std::move(arrays[oldname]);
+  arrays.erase(oldname);
+  for (const auto& op : model->operators) {
+    for (string& input : op->inputs) {
+      if (input == oldname) {
+        input = newname;
+      }
+    }
+    for (string& output : op->outputs) {
+      if (output == oldname) {
+        output = newname;
+      }
+    }
+  }
+}
+
+}  // namespace
+
 // Reorder the elements of an input_array according to the input_axes_order and
 // output_axes_order. Then adjust the shapes of the input and output arrays
 // accordingly. Note that input_array must have a buffer (that is, it is a
 // constant array).
 template <typename T, ArrayDataType DataType>
 void ReorderAxes(AxesOrder input_axes_order, AxesOrder output_axes_order,
-                 Array* input_array, Array* output_array) {
-  CHECK(input_array->buffer->type == DataType);
-  CHECK(!output_array->buffer);
-  auto& input_data = input_array->GetMutableBuffer<DataType>().data;
-  std::vector<T> reordered_data;
-  reordered_data.resize(RequiredBufferSizeForShape(output_array->shape()));
+                 const Array& input_array, Array* output_array) {
+  DCHECK(input_array.buffer->type == DataType);
+  DCHECK(!output_array->buffer);
+  const auto& input_data = input_array.GetBuffer<DataType>().data;
+  auto& output_data = output_array->GetMutableBuffer<DataType>().data;
+  output_data.resize(RequiredBufferSizeForShape(output_array->shape()));
   // TODO(b/62904716) Shapes should be used directly.
-  Shape input_shape = input_array->shape();
+  Shape input_shape = input_array.shape();
   Shape output_shape = output_array->shape();
   if (AxesCount(input_axes_order) == 2) {
     UnextendShape(&input_shape, 2);
     UnextendShape(&output_shape, 2);
   }
   ShuffleArray(input_shape, input_axes_order, output_axes_order, output_shape,
-               input_data.data(), reordered_data.data());
-  input_data = reordered_data;
-  input_array->copy_shape(output_array->shape());
+               input_data.data(), output_data.data());
+  if (input_array.minmax) {
+    output_array->GetOrCreateMinMax() = input_array.GetMinMax();
+  }
+  if (input_array.narrow_range) {
+    output_array->narrow_range = true;
+  }
 }
 
 bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
@@ -57,8 +85,11 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
     return false;
   }
   auto* reorder_op = static_cast<ReorderAxesOperator*>(op);
-  const auto& input_array_name = reorder_op->inputs[0];
-  const auto& output_array_name = reorder_op->outputs[0];
+
+  // Intentionally copies, not references.
+  const string input_array_name = reorder_op->inputs[0];
+  const string output_array_name = reorder_op->outputs[0];
+
   auto& input_array = model->GetArray(input_array_name);
   auto& output_array = model->GetArray(output_array_name);
   if (!input_array.buffer) {
@@ -72,31 +103,23 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
   if (input_array.buffer->type == ArrayDataType::kFloat) {
     ReorderAxes<float, ArrayDataType::kFloat>(reorder_op->input_axes_order,
                                               reorder_op->output_axes_order,
-                                              &input_array, &output_array);
-  } else if (input_array.buffer->type == ArrayDataType::kInt32) {
+                                              input_array, &output_array);
+  } else if (input_array.buffer->type == ArrayDataType::kUint8) {
+    // TODO(benoitjacob): This path seems unused.
+    // ReorderAxes is only used when importing from
+    // TensorFlow GraphDef, which does not support quantized nodes.
     ReorderAxes<uint8, ArrayDataType::kUint8>(reorder_op->input_axes_order,
                                               reorder_op->output_axes_order,
-                                              &input_array, &output_array);
+                                              input_array, &output_array);
   } else {
     LOG(FATAL) << "Cannot ReorderAxes unless input buffer is float or uint8.";
   }
 
-  input_array.copy_shape(output_array.shape());
-
-  // Update the edges of the graph to point to the input array
-  for (const auto& other_op : model->operators) {
-    for (auto& input : other_op->inputs) {
-      if (input == output_array_name) {
-        input = input_array_name;
-      }
-    }
-  }
-
   AddMessageF("Reordered axes for array %s", input_array_name);
 
-  // Remove the op and output array.
-  model->EraseArray(output_array_name);
-  model->operators.erase(it);
+  DeleteOpAndArraysIfUnused(model, op);
+  RenameArray(model, output_array_name, input_array_name);
+
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 0fd2732973..6e207fdf54 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -2084,6 +2084,7 @@ class Model {
     }
   }
   const ArrayMap& GetArrayMap() const { return arrays; }
+  ArrayMap& GetMutableArrayMap() { return arrays; }
 
   int64 ArithmeticOpsCount() const { return ops_count; }
 
-- 
GitLab


From f72126c164ea67b226368bf51811c8528d81093b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 12:55:34 -0700
Subject: [PATCH 0399/1357] Two improvements in resolve_tensorflow_matmul: 1.
 Before inserting a new Transpose node, check if there already is one that   
 may be reused. In practice, there are two cases: either the array being   
 transposed is a constant (by far the most common case) or it's not.     * If
 it is constant, then this doesn't really make a difference:      
 ResolveConstantTranspose runs anyway, eliminating these Transpose nodes      
 and also mootifying this change as it leaves no Transpose node to be      
 reused. So in that case, constant-array-deduping is really the only      
 thing that prevents duplication of data.     * If it is not constant, that's
 where this new logic really helps, as       the resulting Transpose nodes are
 here to stay in the final graph,       and this avoids inserting more than
 are needed. 2. transpose_a is not supported. However, rather than CHECK-fail,
 it's more    useful to have this graph transformation bail with a log
 message. The    resulting 'unresolved' MatMul node could still be handled in
 some way    at the TFLite level, or we could end up having support for MatMul
 per se.

PiperOrigin-RevId: 213678294
---
 .../resolve_tensorflow_matmul.cc              | 80 ++++++++++++++++---
 1 file changed, 67 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
index fcf30bd347..65346c4fe4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
@@ -24,6 +24,37 @@ limitations under the License.
 
 namespace toco {
 
+namespace {
+
+TransposeOperator* FindTransposeOpWithInput(const Model& model,
+                                            const string& array_name) {
+  for (auto it = model.operators.begin(); it != model.operators.end(); ++it) {
+    Operator* op = it->get();
+    if (op->type != OperatorType::kTranspose) {
+      continue;
+    }
+    if (op->inputs[0] != array_name) {
+      continue;
+    }
+    const auto& permutation_array = model.GetArray(op->inputs[1]);
+    if (permutation_array.data_type != ArrayDataType::kInt32) {
+      continue;
+    }
+    const auto& permutation_data =
+        permutation_array.GetBuffer<ArrayDataType::kInt32>().data;
+    if (permutation_data.size() != 2) {
+      continue;
+    }
+    if (permutation_data[0] != 1 || permutation_data[1] != 0) {
+      continue;
+    }
+    return static_cast<TransposeOperator*>(op);
+  }
+  return nullptr;
+}
+
+}  // namespace
+
 bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
   auto matmul_it = model->operators.begin() + op_index;
   if (matmul_it->get()->type != OperatorType::kMatMul) {
@@ -37,7 +68,13 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
   // TransposeOperator.  However, the second input is supposed to be 2D, so we
   // can actually handle transposition of that matrix, which happens to be more
   // common anyway.
-  CHECK(!matmul_op->transpose_a);
+  if (matmul_op->transpose_a) {
+    AddMessageF(
+        "Not replacing %s by a FullyConnected operator, because it has "
+        "the transpose_a attribute",
+        LogName(*matmul_op));
+    return false;
+  }
 
   // Reorder the axes on the second input. TensorFlow uses row-major ordering
   // on both inputs, however this is inefficient for the FullyConnected
@@ -46,18 +83,35 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
   string input_lhs = matmul_op->inputs[0];
   string input_rhs = matmul_op->inputs[1];
   if (!matmul_op->transpose_b) {
-    auto* transpose_op = new TransposeOperator;
-    transpose_op->inputs = {
-        matmul_op->inputs[1],
-        CreateInt32Array(model,
-                         AvailableArrayName(
-                             *model, matmul_op->inputs[1] + "/transpose/perm"),
-                         {1, 0})};
-    transpose_op->outputs = {
-        AvailableArrayName(*model, matmul_op->inputs[1] + "/transpose")};
-    model->GetOrCreateArray(transpose_op->outputs[0]);
-    model->operators.emplace(matmul_it, transpose_op);
-
+    // Need to transpose input_rhs, by inserting a TransposeOperator.
+    // First, check if there already is a TransposeOperator transposing that
+    // array, so we can just reuse it.
+    auto* transpose_op = FindTransposeOpWithInput(*model, input_rhs);
+    if (!transpose_op) {
+      AddMessageF(
+          "While replacing %s by a FullyConnected operator, created new "
+          "Transpose op wrapping RHS input array %s",
+          LogName(*matmul_op), input_rhs);
+      // No such TransposeOperator found. Create one now.
+      transpose_op = new TransposeOperator;
+      transpose_op->inputs = {
+          input_rhs,
+          CreateInt32Array(
+              model, AvailableArrayName(*model, input_rhs + "/transpose/perm"),
+              {1, 0})};
+      transpose_op->outputs = {
+          AvailableArrayName(*model, input_rhs + "/transpose")};
+      model->GetOrCreateArray(transpose_op->outputs[0]);
+      model->operators.emplace(matmul_it, transpose_op);
+      // Sanity check
+      DCHECK_EQ(transpose_op, FindTransposeOpWithInput(*model, input_rhs));
+    } else {
+      AddMessageF(
+          "While replacing %s by a FullyConnected operator, reused existing "
+          "Transpose op wrapping RHS input array %s",
+          LogName(*matmul_op), input_rhs);
+    }
+    // Re-wire: have the matmul consume the transposed array.
     input_rhs = transpose_op->outputs[0];
   }
 
-- 
GitLab


From 2c1f9e20aeedd398b813c3410553cb8485656df0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 13:14:10 -0700
Subject: [PATCH 0400/1357] Remove the CHECK added for debugging.

PiperOrigin-RevId: 213681549
---
 .../contrib/boosted_trees/kernels/split_handler_ops.cc     | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
index 51e0c2e431..af7006bff2 100644
--- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
@@ -579,13 +579,6 @@ class BuildSparseInequalitySplitsOp : public OpKernel {
         const int end_index =
             partition_boundaries[non_empty_partitions[root_idx]][j + 1]
                 .start_index;
-        CHECK(bucket_ids_and_dimensions(start_index, 1) ==
-              bucket_ids_and_dimensions(end_index - 1, 1))
-            << "For bucket " << bucket_ids_and_dimensions(start_index, 0)
-            << " the dimension was "
-            << bucket_ids_and_dimensions(start_index, 1) << " and for "
-            << bucket_ids_and_dimensions(end_index - 1, 0) << " "
-            << bucket_ids_and_dimensions(end_index - 1, 1);
         if (bucket_ids_and_dimensions(start_index, 0) == bias_feature_id) {
           // 0-dimension case which has a first bucket for catch all feature.
           CHECK(bucket_ids_and_dimensions(start_index, 1) == 0)
-- 
GitLab


From 125bf1dbb76c05bf5f88f14e77387ce35f986621 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 13:28:39 -0700
Subject: [PATCH 0401/1357] Fixes bits/bytes unit error in comment.

PiperOrigin-RevId: 213684048
---
 tensorflow/core/kernels/multinomial_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/multinomial_op.cc b/tensorflow/core/kernels/multinomial_op.cc
index 7a64788448..82dfece4a2 100644
--- a/tensorflow/core/kernels/multinomial_op.cc
+++ b/tensorflow/core/kernels/multinomial_op.cc
@@ -75,7 +75,7 @@ struct MultinomialFunctor<CPUDevice, T, OutputType> {
       // lambda.  Since we want to let each worker have its own copy, we pass
       // "gen" by reference and explicitly do a copy assignment here.
       random::PhiloxRandom gen_copy = gen;
-      // Skip takes units of 128 bytes.  +3 is so rounding doesn't lead to
+      // Skip takes units of 128 bits.  +3 is so rounding doesn't lead to
       // us using the same state in different batches.
       gen_copy.Skip(start_row * (num_samples + 3) / 4);
       random::SimplePhilox simple_philox(&gen_copy);
-- 
GitLab


From eb2fe78e60ea8465443d8c653381a788ec581e49 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Wed, 19 Sep 2018 13:43:17 -0700
Subject: [PATCH 0402/1357] [tf.data] MapVectorization optimization: C++
 conversion framework to vectorize a MapDefun function. Also implements
 conversion for two ops: Cast and Unpack.

PiperOrigin-RevId: 213686720
---
 .../core/grappler/optimizers/data/BUILD       |  76 +++
 .../optimizers/data/function_utils.cc         | 196 ++++++
 .../grappler/optimizers/data/function_utils.h | 108 ++++
 .../optimizers/data/function_utils_test.cc    | 164 +++++
 .../grappler/optimizers/data/fusion_utils.cc  |   3 +-
 .../optimizers/data/fusion_utils_test.cc      |   5 +-
 .../grappler/optimizers/data/graph_utils.cc   |  82 +--
 .../grappler/optimizers/data/graph_utils.h    |  26 -
 .../optimizers/data/graph_utils_test.cc       |  82 ---
 .../optimizers/data/map_vectorization.cc      |   5 +-
 .../optimizers/data/vectorization_utils.cc    | 341 ++++++++++
 .../optimizers/data/vectorization_utils.h     |  90 +++
 .../data/vectorization_utils_test.cc          | 600 ++++++++++++++++++
 13 files changed, 1596 insertions(+), 182 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/function_utils.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/function_utils.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/function_utils_test.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization_utils.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index e84df10778..7128a50be0 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -49,6 +49,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_utils",
+        ":function_utils",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -67,6 +68,7 @@ tf_cc_test(
     srcs = ["fusion_utils_test.cc"],
     visibility = ["//visibility:public"],
     deps = [
+        ":function_utils",
         ":fusion_utils",
         ":graph_utils",
         "//tensorflow/core:framework",
@@ -77,6 +79,40 @@ tf_cc_test(
     ] + tf_protos_all(),
 )
 
+cc_library(
+    name = "function_utils",
+    srcs = ["function_utils.cc"],
+    hdrs = [
+        "function_utils.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:utils",
+    ] + tf_protos_all(),
+)
+
+tf_cc_test(
+    name = "function_utils_test",
+    srcs = ["function_utils_test.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":function_utils",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+        "//tensorflow/core/kernels:cast_op",
+        "//tensorflow/tools/graph_transforms:transform_utils",
+    ],
+)
+
 cc_library(
     name = "graph_utils",
     srcs = ["graph_utils.cc"],
@@ -137,6 +173,7 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
+        ":function_utils",
         ":graph_utils",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
@@ -409,3 +446,42 @@ tf_cc_test(
         "//tensorflow/core/grappler:grappler_item",
     ],
 )
+
+cc_library(
+    name = "vectorization_utils",
+    srcs = ["vectorization_utils.cc"],
+    hdrs = [
+        "vectorization_utils.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":function_utils",
+        ":graph_utils",
+        "@com_google_absl//absl/strings",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/utils:functions",
+    ] + tf_protos_all(),
+)
+
+tf_cc_test(
+    name = "vectorization_utils_test",
+    srcs = ["vectorization_utils_test.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":function_utils",
+        ":vectorization_utils",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+        "//tensorflow/core/kernels:cast_op",
+        "//tensorflow/tools/graph_transforms:transform_utils",
+    ] + tf_protos_all(),
+)
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.cc b/tensorflow/core/grappler/optimizers/data/function_utils.cc
new file mode 100644
index 0000000000..e95ea1a4c1
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.cc
@@ -0,0 +1,196 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace function_utils {
+namespace {
+
+template <typename Predicate, typename Collection>
+std::vector<int> GetElementIndicesWithPredicate(const Predicate& predicate,
+                                                const Collection& collection) {
+  std::vector<int> indices = {};
+  unsigned idx = 0;
+  for (auto&& element : collection) {
+    if (predicate(element)) {
+      indices.push_back(idx);
+    }
+    idx++;
+  }
+  return indices;
+}
+
+}  // namespace
+
+FunctionDefTensorDesc::FunctionDefTensorDesc(const string& node_name,
+                                             const string& output, int position)
+    : node_name(node_name), node_output(output), position(position) {
+  full_str = strings::StrCat(node_name, ":", node_output, ":", position);
+}
+
+FunctionDefTensorDesc::FunctionDefTensorDesc(const string& input) {
+  // Parses node_name:node_output:position string into its components.
+  full_str = input;
+  StringPiece capture;
+  StringPiece remaining;
+
+  // Parse "node_name"
+  if (strings::Scanner(input)
+          .One(strings::Scanner::LETTER_DIGIT_DOT_UNDERSCORE)
+          .Any(strings::Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE)
+          .GetResult(&remaining, &capture)) {
+    node_name = string(capture.data(), capture.size());
+  }
+
+  // Parse "node_output" if it exists
+  if (strings::Scanner(remaining)
+          .OneLiteral(":")
+          .RestartCapture()
+          .One(strings::Scanner::LETTER)
+          .Any(strings::Scanner::LETTER_DIGIT_UNDERSCORE)
+          .GetResult(&remaining, &capture)) {
+    node_output = string(capture.data(), capture.size());
+  }
+
+  // Parse "position" if it exists
+  if (strings::Scanner(remaining)
+          .OneLiteral(":")
+          .RestartCapture()
+          .Many(strings::Scanner::DIGIT)
+          .GetResult(nullptr, &capture)) {
+    CHECK(strings::safe_strto32(capture, &position));
+  }
+}
+
+// TODO(rachelim): Create a utility class similar to MutableGraphView for
+// FunctionDefs, and use that to manipulate functions. It'll be more
+// performant if we kept mappings of nodes->inputs/outputs, so that we don't
+// have to search over all nodes each time.
+// Note that we're not using GrapplerFunctionItem because it doesn't cover
+// some of our desired uses (eg changing the outputs of a function), and the
+// FunctionDef -> GraphDef conversion isn't really necessary in this case.
+void ReplaceReferences(const string& from, const string& to,
+                       FunctionDef* func) {
+  for (NodeDef& n : *func->mutable_node_def()) {
+    std::replace(n.mutable_input()->begin(), n.mutable_input()->end(), from,
+                 to);
+  }
+
+  for (auto& p : *func->mutable_ret()) {
+    if (p.second == from) {
+      p.second = to;
+    }
+  }
+}
+
+void AddFunctionOutputWithUniqueName(StringPiece prefix,
+                                     StringPiece output_tensor_name,
+                                     FunctionDef* function, DataType dt) {
+  string name = string(prefix);
+  int id = function->signature().output_arg_size();
+  while (ContainsFunctionOutputWithName(name, *function)) {
+    name = strings::StrCat(prefix, "/_", id);
+    ++id;
+  }
+  auto* output = function->mutable_signature()->mutable_output_arg()->Add();
+  output->set_name(name);
+  output->set_type(dt);
+
+  (*function->mutable_ret())[name] = string(output_tensor_name);
+}
+
+NodeDef* AddNode(StringPiece name, StringPiece op,
+                 const std::vector<string>& inputs,
+                 const std::vector<std::pair<string, AttrValue>>& attributes,
+                 FunctionDef* fd) {
+  NodeDef* node = fd->add_node_def();
+  if (!name.empty()) {
+    node->set_name(string(name));
+  } else {
+    SetUniqueFunctionNodeName(op, fd, node);
+  }
+  node->set_op(string(op));
+  for (const string& input : inputs) {
+    node->add_input(input);
+  }
+  for (auto attr : attributes) {
+    (*node->mutable_attr())[attr.first] = attr.second;
+  }
+  return node;
+}
+
+bool ContainsFunctionNodeWithName(StringPiece name,
+                                  const FunctionDef& function) {
+  return FindFunctionNodeWithName(name, function) != -1;
+}
+
+bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
+  return FindFunctionNodeWithOp(op, function) != -1;
+}
+
+bool ContainsFunctionOutputWithName(StringPiece name,
+                                    const FunctionDef& function) {
+  return FindFunctionOutputWithName(name, function) != -1;
+}
+
+int FindFunctionInputWithName(StringPiece name, const FunctionDef& function) {
+  std::vector<int> indices = GetElementIndicesWithPredicate(
+      [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
+      function.signature().input_arg());
+  return indices.empty() ? -1 : indices.front();
+}
+
+int FindFunctionOutputWithName(StringPiece name, const FunctionDef& function) {
+  std::vector<int> indices = GetElementIndicesWithPredicate(
+      [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
+      function.signature().output_arg());
+  return indices.empty() ? -1 : indices.front();
+}
+
+int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function) {
+  std::vector<int> indices = GetElementIndicesWithPredicate(
+      [&name](const NodeDef& node) { return node.name() == name; },
+      function.node_def());
+  return indices.empty() ? -1 : indices.front();
+}
+
+int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
+  std::vector<int> indices = GetElementIndicesWithPredicate(
+      [&op](const NodeDef& node) { return node.op() == op; },
+      function.node_def());
+
+  return indices.empty() ? -1 : indices.front();
+}
+
+void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
+                               NodeDef* node) {
+  string name = string(prefix);
+  int id = function->node_def_size();
+  while (ContainsFunctionNodeWithName(name, *function)) {
+    name = strings::StrCat(prefix, "/_", id);
+    ++id;
+  }
+  node->set_name(std::move(name));
+}
+
+}  // end namespace function_utils
+}  // end namespace grappler
+}  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.h b/tensorflow/core/grappler/optimizers/data/function_utils.h
new file mode 100644
index 0000000000..d4ce824652
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.h
@@ -0,0 +1,108 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUNCTION_UTILS_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUNCTION_UTILS_H_
+
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace function_utils {
+// This namespace contains utility functions for querying and modifying
+// FunctionDefs.
+
+// Describes a FunctionDef input tensor. In FunctionDefs, input tensor strings
+// have the format node_name:node_output:position (if they derive from nodes),
+// or input_name (if they derive from an argument).
+struct FunctionDefTensorDesc {
+  FunctionDefTensorDesc() = default;
+
+  FunctionDefTensorDesc(const string& node_name, const string& output,
+                        int position);
+
+  // Parses node_name:node_output:position string into its components.
+  explicit FunctionDefTensorDesc(const string& input);
+
+  // TODO(rachelim): Add provisions to deal with special formats, like how
+  // GrapplerFunctionItem expands node output range if position is not defined
+  string full_str;
+  string node_name;
+  string node_output;
+  int position = -1;
+};
+
+// Replaces all references to `from` tensor in func's nodes' inputs and retvals
+// to `to` tensor. This is similar to `MutableGraphView::ReplaceInputs`.
+void ReplaceReferences(const string& from, const string& to, FunctionDef* func);
+
+// Adds a function output to the function def, ensuring that the output key
+// is unique, and maps to output_tensor_name in the ret dict.
+void AddFunctionOutputWithUniqueName(StringPiece prefix,
+                                     StringPiece output_tensor_name,
+                                     FunctionDef* function, DataType dt);
+
+// Adds a node to a FunctionDef.
+NodeDef* AddNode(StringPiece name, StringPiece op,
+                 const std::vector<string>& inputs,
+                 const std::vector<std::pair<string, AttrValue>>& attributes,
+                 FunctionDef* fd);
+
+// Checks whether the function contains a node with the given name.
+bool ContainsFunctionNodeWithName(StringPiece name,
+                                  const FunctionDef& function);
+
+// Checks whether the function contains a node with the given op.
+bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
+
+// Checks whether the function contains an output with the given name.
+bool ContainsFunctionOutputWithName(StringPiece name,
+                                    const FunctionDef& function);
+
+// Returns the index of the function input with the given name or -1 if the
+// function node does not exist.
+int FindFunctionInputWithName(StringPiece name, const FunctionDef& function);
+
+// Returns the index of the function output with the given name or -1 if the
+// function node does not exist.
+int FindFunctionOutputWithName(StringPiece name, const FunctionDef& function);
+
+// Returns the index of the function node with the given name or -1 if the
+// function node does not exist.
+int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function);
+
+// Returns the index of the function node with the given op or -1 if the
+// function node does not exist.
+int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
+
+// Sets the function node name using the `prefix` as a prefix while guaranteeing
+// the name is unique across the functions nodes.
+void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
+                               NodeDef* node);
+
+}  // end namespace function_utils
+}  // end namespace grappler
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUNCTION_UTILS_H_
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils_test.cc b/tensorflow/core/grappler/optimizers/data/function_utils_test.cc
new file mode 100644
index 0000000000..3739e20eb1
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/function_utils_test.cc
@@ -0,0 +1,164 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/tools/graph_transforms/transform_utils.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace function_utils {
+namespace {
+
+TEST(FunctionDefTensorDesc, Parsing) {
+  FunctionDefTensorDesc f("Cast:y:0");
+  EXPECT_EQ(f.full_str, "Cast:y:0");
+  EXPECT_EQ(f.node_name, "Cast");
+  EXPECT_EQ(f.node_output, "y");
+  EXPECT_EQ(f.position, 0);
+
+  FunctionDefTensorDesc f2("Arg0");
+  EXPECT_EQ(f2.full_str, "Arg0");
+  EXPECT_EQ(f2.node_name, "Arg0");
+  EXPECT_EQ(f2.node_output, "");
+  EXPECT_EQ(f2.position, -1);
+}
+
+TEST(ReplaceReferencesTest, ReplaceReferencesTest) {
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer", {"arg0: int32"}, {"out: int32", "out2: int64"}, {}, {},
+      {{"out", "MapDefun:output:0"}, {"out2", "Cast:y:0"}});
+  NodeDef* derive_node =
+      AddNode("X", "Some_Op", {"MapDefun:output:0"}, {}, &outer);
+  // Check that both the input to "X" and retval of "outer" are replaced.
+  ReplaceReferences("MapDefun:output:0", "arg0", &outer);
+  EXPECT_EQ(outer.ret().at("out"), "arg0");
+  EXPECT_EQ(derive_node->input(0), "arg0");
+}
+
+TEST(FunctionUtilsTest, AddFunctionOutputWithUniqueName) {
+  FunctionDef function = test::function::XTimesTwo();
+  AddFunctionOutputWithUniqueName("y", "two", &function, DT_INT64);
+  EXPECT_TRUE(ContainsFunctionOutputWithName("y/_1", function));
+  EXPECT_EQ(function.ret().at("y/_1"), "two");
+}
+
+TEST(FunctionUtilsTest, ContainsFunctionNodeWithName) {
+  FunctionDef function = test::function::XTimesTwo();
+  EXPECT_FALSE(ContainsFunctionNodeWithName(
+      "weird_name_that_should_not_be_there", function));
+  EXPECT_TRUE(ContainsFunctionNodeWithName("two", function));
+}
+
+TEST(FunctionUtilsTest, ContainsFunctionNodeWithOp) {
+  FunctionDef function = test::function::XTimesTwo();
+  EXPECT_FALSE(ContainsFunctionNodeWithOp("weird_op_that_should_not_be_there",
+                                          function));
+  EXPECT_TRUE(ContainsFunctionNodeWithOp("Mul", function));
+}
+
+TEST(FunctionUtilsTest, ContainsFunctionOutputWithName) {
+  FunctionDef function = test::function::XTimesTwo();
+  EXPECT_TRUE(ContainsFunctionOutputWithName("y", function));
+  EXPECT_FALSE(ContainsFunctionOutputWithName("Add:z:0", function));
+}
+
+TEST(FunctionUtilsTest, FindFunctionNodeWithName) {
+  FunctionDef function = test::function::XTimesTwo();
+  EXPECT_EQ(
+      FindFunctionNodeWithName("weird_name_that_should_not_be_there", function),
+      -1);
+  EXPECT_NE(FindFunctionNodeWithName("two", function), -1);
+}
+
+TEST(FunctionUtilsTest, FindFunctionNodeWithOp) {
+  FunctionDef function = test::function::XTimesTwo();
+  EXPECT_EQ(
+      FindFunctionNodeWithOp("weird_op_that_should_not_be_there", function),
+      -1);
+  EXPECT_NE(FindFunctionNodeWithOp("Mul", function), -1);
+}
+
+TEST(FunctionUtilsTest, FindFunctionInputWithName) {
+  FunctionDef function = test::function::XTimesTwo();
+  EXPECT_EQ(FindFunctionInputWithName("x", function), 0);
+  EXPECT_EQ(FindFunctionInputWithName("not_a_name", function), -1);
+}
+
+TEST(FunctionUtilsTest, FindFunctionOutputWithName) {
+  FunctionDef function = test::function::XTimesTwo();
+  EXPECT_EQ(FindFunctionOutputWithName("y", function), 0);
+  EXPECT_EQ(FindFunctionOutputWithName("Add:z:0", function), -1);
+}
+
+TEST(FunctionUtilsTest, SetUniqueFunctionNodeName) {
+  FunctionDef function = test::function::XTimesTwo();
+  NodeDef node;
+  SetUniqueFunctionNodeName("abc", &function, &node);
+  for (const NodeDef& function_node : function.node_def()) {
+    EXPECT_NE(node.name(), function_node.name());
+  }
+  auto* new_node = function.add_node_def();
+  *new_node = node;
+
+  NodeDef other;
+  SetUniqueFunctionNodeName("abc", &function, &other);
+  EXPECT_NE(other.name(), new_node->name());
+}
+
+TEST(FunctionUtilsTest, AddNodeToFunctionDef) {
+  FunctionDef func;
+  const char* op_name = "xxx";
+  AddNode(op_name, op_name, {}, {}, &func);
+
+  const NodeDef& node1 = func.node_def(FindFunctionNodeWithName("xxx", func));
+  EXPECT_EQ(node1.op(), op_name);
+  EXPECT_EQ(node1.input_size(), 0);
+  EXPECT_EQ(node1.attr_size(), 0);
+
+  const std::vector<string> inputs({"input1", "input2"});
+  AddNode("", op_name, inputs, {}, &func);
+  const NodeDef& node2 =
+      func.node_def(FindFunctionNodeWithName("xxx/_2", func));
+  EXPECT_EQ(node2.op(), op_name);
+  EXPECT_EQ(node2.attr_size(), 0);
+  EXPECT_EQ(node2.input_size(), inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    EXPECT_EQ(node2.input(i), inputs[i]);
+  }
+
+  AttrValue a1, a2;
+  a1.set_type(DT_INT32);
+  a2.set_type(DT_INT64);
+  const std::vector<std::pair<string, AttrValue>> attrs(
+      {{"attr1", a1}, {"attr2", a2}});
+  AddNode("", op_name, {}, attrs, &func);
+  const NodeDef& node3 =
+      func.node_def(FindFunctionNodeWithName("xxx/_3", func));
+  EXPECT_EQ(node3.op(), op_name);
+  EXPECT_EQ(node3.input_size(), 0);
+  EXPECT_EQ(node3.attr_size(), attrs.size());
+  for (size_t i = 0; i < attrs.size(); ++i) {
+    EXPECT_EQ(attrs[i].second.type(), node3.attr().at(attrs[i].first).type());
+  }
+}
+
+}  // namespace
+}  // namespace function_utils
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/fusion_utils.cc b/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
index 01a78c04b0..b3bfee138f 100644
--- a/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
@@ -407,7 +408,7 @@ void LazyConjunctionNodes(const FunctionDef& first_function,
   auto* if_node = fused_function->add_node_def();
   // This is guaranteed to succeed.
   TF_CHECK_OK(if_builder.Finalize(if_node));
-  graph_utils::SetUniqueFunctionNodeName("cond", fused_function, if_node);
+  function_utils::SetUniqueFunctionNodeName("cond", fused_function, if_node);
 
   GetMutableOutputNode(fused_function, 0) = if_node->name() + ":output:0";
 }
diff --git a/tensorflow/core/grappler/optimizers/data/fusion_utils_test.cc b/tensorflow/core/grappler/optimizers/data/fusion_utils_test.cc
index d5c6466080..e667affeea 100644
--- a/tensorflow/core/grappler/optimizers/data/fusion_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/fusion_utils_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -110,9 +111,9 @@ TEST(FusionUtilsTest, FuseFunctionWithPredicate) {
   CheckUniqueNames(*fused_function);
 
   ASSERT_TRUE(
-      graph_utils::ContainsFunctionNodeWithOp("Equal", *fused_function));
+      function_utils::ContainsFunctionNodeWithOp("Equal", *fused_function));
   const auto &equal_node = fused_function->node_def(
-      graph_utils::FindFunctionNodeWithOp("Equal", *fused_function));
+      function_utils::FindFunctionNodeWithOp("Equal", *fused_function));
 
   EXPECT_EQ(xtimes_two->signature().output_arg(0).name(),
             fused_function->signature().output_arg(0).name());
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index d4ab444036..b3f60e34f9 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -108,26 +108,6 @@ NodeDef* AddNode(StringPiece name, StringPiece op,
   return graph->AddNode(std::move(node));
 }
 
-NodeDef* AddNode(StringPiece name, StringPiece op,
-                 const std::vector<string>& inputs,
-                 const std::vector<std::pair<string, AttrValue>>& attributes,
-                 FunctionDef* fd) {
-  NodeDef* node = fd->add_node_def();
-  if (!name.empty()) {
-    node->set_name(string(name));
-  } else {
-    SetUniqueFunctionNodeName(op, fd, node);
-  }
-  node->set_op(string(op));
-  for (const string& input : inputs) {
-    node->add_input(input);
-  }
-  for (auto attr : attributes) {
-    (*node->mutable_attr())[attr.first] = attr.second;
-  }
-  return node;
-}
-
 template <>
 NodeDef* AddScalarConstNode(bool v, MutableGraphView* graph) {
   return AddScalarConstNodeHelper(
@@ -196,6 +176,11 @@ bool Compare(const GraphDef& g1, const GraphDef& g2) {
   return true;
 }
 
+bool ContainsGraphFunctionWithName(StringPiece name,
+                                   const FunctionDefLibrary& library) {
+  return FindGraphFunctionWithName(name, library) != -1;
+}
+
 bool ContainsGraphNodeWithName(StringPiece name, const GraphDef& graph) {
   return FindGraphNodeWithName(name, graph) != -1;
 }
@@ -204,18 +189,14 @@ bool ContainsNodeWithOp(StringPiece op, const GraphDef& graph) {
   return FindGraphNodeWithOp(op, graph) != -1;
 }
 
-bool ContainsGraphFunctionWithName(StringPiece name,
-                                   const FunctionDefLibrary& library) {
-  return FindGraphFunctionWithName(name, library) != -1;
-}
-
-bool ContainsFunctionNodeWithName(StringPiece name,
-                                  const FunctionDef& function) {
-  return FindFunctionNodeWithName(name, function) != -1;
-}
-
-bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
-  return FindFunctionNodeWithOp(op, function) != -1;
+int FindGraphFunctionWithName(StringPiece name,
+                              const FunctionDefLibrary& library) {
+  std::vector<int> indices = GetElementIndicesWithPredicate(
+      [&name](const FunctionDef& function) {
+        return function.signature().name() == name;
+      },
+      library.function());
+  return indices.empty() ? -1 : indices.front();
 }
 
 int FindGraphNodeWithName(StringPiece name, const GraphDef& graph) {
@@ -237,31 +218,6 @@ std::vector<int> FindAllGraphNodesWithOp(const string& op,
       [&op](const NodeDef& node) { return node.op() == op; }, graph.node());
 }
 
-int FindGraphFunctionWithName(StringPiece name,
-                              const FunctionDefLibrary& library) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
-      [&name](const FunctionDef& function) {
-        return function.signature().name() == name;
-      },
-      library.function());
-  return indices.empty() ? -1 : indices.front();
-}
-
-int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
-      [&name](const NodeDef& node) { return node.name() == name; },
-      function.node_def());
-  return indices.empty() ? -1 : indices.front();
-}
-
-int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
-      [&op](const NodeDef& node) { return node.op() == op; },
-      function.node_def());
-
-  return indices.empty() ? -1 : indices.front();
-}
-
 NodeDef* GetInputNode(const NodeDef& node, const MutableGraphView& graph) {
   if (node.input_size() == 0) return nullptr;
   GraphView::InputPort input_port = graph.GetInputPort(node.name(), 0);
@@ -284,17 +240,6 @@ void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph,
   node->set_name(std::move(name));
 }
 
-void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
-                               NodeDef* node) {
-  string name = string(prefix);
-  int id = function->node_def_size();
-  while (ContainsFunctionNodeWithName(name, *function)) {
-    name = strings::StrCat(prefix, "/_", id);
-    ++id;
-  }
-  node->set_name(std::move(name));
-}
-
 void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
                                 FunctionDef* function) {
   string name = string(prefix);
@@ -305,7 +250,6 @@ void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
   }
   function->mutable_signature()->set_name(std::move(name));
 }
-
 }  // end namespace graph_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index 6f431c232d..1652afcd9e 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -37,12 +37,6 @@ NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  MutableGraphView* graph);
 
-// Adds a node to a FunctionDef.
-NodeDef* AddNode(StringPiece name, StringPiece op,
-                 const std::vector<string>& inputs,
-                 const std::vector<std::pair<string, AttrValue>>& attributes,
-                 FunctionDef* fd);
-
 // Adds a Const node with the given value to the graph.
 template <typename T>
 NodeDef* AddScalarConstNode(T v, MutableGraphView* graph) {
@@ -76,13 +70,6 @@ bool ContainsGraphNodeWithName(StringPiece name, const GraphDef& graph);
 bool ContainsGraphFunctionWithName(StringPiece name,
                                    const FunctionDefLibrary& library);
 
-// Checks whether the function contains a node with the given name.
-bool ContainsFunctionNodeWithName(StringPiece name,
-                                  const FunctionDef& function);
-
-// Checks whether the function contains a node with the given op.
-bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
-
 // Checks whether the graph contains a node with the given op.
 bool ContainsNodeWithOp(StringPiece op, const GraphDef& graph);
 
@@ -95,14 +82,6 @@ int FindGraphNodeWithName(StringPiece name, const GraphDef& graph);
 int FindGraphFunctionWithName(StringPiece name,
                               const FunctionDefLibrary& library);
 
-// Returns the index of the function node with the given name or -1 if the
-// function node does not exist.
-int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function);
-
-// Returns the index of the function node with the given op or -1 if the
-// function node does not exist.
-int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
-
 // Returns the index of the first node with the given op or -1 if no such  node
 // exists.
 int FindGraphNodeWithOp(StringPiece op, const GraphDef& graph);
@@ -119,11 +98,6 @@ std::vector<int> FindAllGraphNodesWithOp(const string& op,
 // is unique across the graph.
 void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph, NodeDef* node);
 
-// Sets the function node name using the `prefix` as a prefix while guaranteeing
-// the name is unique across the functions nodes.
-void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
-                               NodeDef* node);
-
 // Sets the node name using the `prefix` name as a prefix while guaranteeing the
 // name is unique across the graph.
 void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
index c19ac7b880..6877c207c4 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
@@ -112,20 +112,6 @@ TEST(GraphUtilsTest, ContainsGraphFunctionWithName) {
       ContainsGraphFunctionWithName(new_function->signature().name(), library));
 }
 
-TEST(GraphUtilsTest, ContainsFunctionNodeWithName) {
-  FunctionDef function = test::function::XTimesTwo();
-  EXPECT_FALSE(ContainsFunctionNodeWithName(
-      "weird_name_that_should_not_be_there", function));
-  EXPECT_TRUE(ContainsFunctionNodeWithName("two", function));
-}
-
-TEST(GraphUtilsTest, ContainsFunctionNodeWithOp) {
-  FunctionDef function = test::function::XTimesTwo();
-  EXPECT_FALSE(ContainsFunctionNodeWithOp("weird_op_that_should_not_be_there",
-                                          function));
-  EXPECT_TRUE(ContainsFunctionNodeWithOp("Mul", function));
-}
-
 TEST(GraphUtilsTest, ContainsNodeWithOp) {
   GraphDef graph_def;
   MutableGraphView graph(&graph_def);
@@ -150,22 +136,6 @@ TEST(GraphUtilsTest, FindGraphNodeWithName) {
   EXPECT_EQ(FindGraphNodeWithName("A", *graph.GetGraph()), -1);
 }
 
-TEST(GraphUtilsTest, FindFunctionNodeWithName) {
-  FunctionDef function = test::function::XTimesTwo();
-  EXPECT_EQ(
-      FindFunctionNodeWithName("weird_name_that_should_not_be_there", function),
-      -1);
-  EXPECT_NE(FindFunctionNodeWithName("two", function), -1);
-}
-
-TEST(GraphUtilsTest, FindFunctionNodeWithOp) {
-  FunctionDef function = test::function::XTimesTwo();
-  EXPECT_EQ(
-      FindFunctionNodeWithOp("weird_op_that_should_not_be_there", function),
-      -1);
-  EXPECT_NE(FindFunctionNodeWithOp("Mul", function), -1);
-}
-
 TEST(GraphUtilsTest, FindGraphFunctionWithName) {
   FunctionDefLibrary library;
   EXPECT_EQ(FindGraphFunctionWithName("new_function", library), -1);
@@ -225,21 +195,6 @@ TEST(GraphUtilsTest, SetUniqueGraphNodeName) {
   EXPECT_NE(node2->name(), node3->name());
 }
 
-TEST(GraphUtilsTest, SetUniqueFunctionNodeName) {
-  FunctionDef function = test::function::XTimesTwo();
-  NodeDef node;
-  SetUniqueFunctionNodeName("abc", &function, &node);
-  for (const NodeDef& function_node : function.node_def()) {
-    EXPECT_NE(node.name(), function_node.name());
-  }
-  auto* new_node = function.add_node_def();
-  *new_node = node;
-
-  NodeDef other;
-  SetUniqueFunctionNodeName("abc", &function, &other);
-  EXPECT_NE(other.name(), new_node->name());
-}
-
 TEST(GraphUtilsTest, SetUniqueGraphFunctionName) {
   FunctionDefLibrary library;
   FunctionDef* new_function = library.add_function();
@@ -251,43 +206,6 @@ TEST(GraphUtilsTest, SetUniqueGraphFunctionName) {
             other_function->signature().name());
 }
 
-TEST(GraphUtilsTest, AddNodeToFunctionDef) {
-  FunctionDef func;
-  const char* op_name = "xxx";
-  AddNode(op_name, op_name, {}, {}, &func);
-
-  const NodeDef& node1 = func.node_def(FindFunctionNodeWithName("xxx", func));
-  EXPECT_EQ(node1.op(), op_name);
-  EXPECT_EQ(node1.input_size(), 0);
-  EXPECT_EQ(node1.attr_size(), 0);
-
-  const std::vector<string> inputs({"input1", "input2"});
-  AddNode("", op_name, inputs, {}, &func);
-  const NodeDef& node2 =
-      func.node_def(FindFunctionNodeWithName("xxx/_2", func));
-  EXPECT_EQ(node2.op(), op_name);
-  EXPECT_EQ(node2.attr_size(), 0);
-  EXPECT_EQ(node2.input_size(), inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    EXPECT_EQ(node2.input(i), inputs[i]);
-  }
-
-  AttrValue a1, a2;
-  a1.set_type(DT_INT32);
-  a2.set_type(DT_INT64);
-  const std::vector<std::pair<string, AttrValue>> attrs(
-      {{"attr1", a1}, {"attr2", a2}});
-  AddNode("", op_name, {}, attrs, &func);
-  const NodeDef& node3 =
-      func.node_def(FindFunctionNodeWithName("xxx/_3", func));
-  EXPECT_EQ(node3.op(), op_name);
-  EXPECT_EQ(node3.input_size(), 0);
-  EXPECT_EQ(node3.attr_size(), attrs.size());
-  for (size_t i = 0; i < attrs.size(); ++i) {
-    EXPECT_EQ(attrs[i].second.type(), node3.attr().at(attrs[i].first).type());
-  }
-}
-
 TEST(GraphUtilsTest, GetInputNode) {
   GraphDef graph_def;
   MutableGraphView graph(&graph_def);
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index a019b77eb7..07766aa7b3 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -52,8 +53,8 @@ FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
   // Add MapDefun node
   NodeDef* map_defun_node = vectorized_func->mutable_node_def()->Add();
   map_defun_node->set_op("MapDefun");
-  graph_utils::SetUniqueFunctionNodeName(map_defun_node->op(), vectorized_func,
-                                         map_defun_node);
+  function_utils::SetUniqueFunctionNodeName(map_defun_node->op(),
+                                            vectorized_func, map_defun_node);
 
   // Set attrs and inputs
   for (const string& k : {"f", "output_types", "output_shapes"}) {
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
new file mode 100644
index 0000000000..6a59eb0d32
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -0,0 +1,341 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
+
+#include "absl/strings/str_join.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+#include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/functions.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/lib/strings/scanner.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+using function_utils::FunctionDefTensorDesc;
+
+namespace {
+
+void AddMapDefunOutput(FunctionDef* map_defun_fn, NodeDef* map_defun_node,
+                       const string& output_retval, const DataType t) {
+  // Set to unknown shape
+  TensorShapeProto tensor_shape_proto;
+  PartialTensorShape().AsProto(&tensor_shape_proto);
+
+  function_utils::AddFunctionOutputWithUniqueName(
+      "vectorized_out", output_retval, map_defun_fn, t);
+
+  *(*map_defun_node->mutable_attr())["output_shapes"]
+       .mutable_list()
+       ->add_shape() = tensor_shape_proto;
+  (*map_defun_node->mutable_attr())["output_types"].mutable_list()->add_type(t);
+}
+
+void RemoveMapDefunOutput(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
+                          NodeDef* map_defun_node, int output_position) {
+  DCHECK_LT(output_position, map_defun_fn->signature().output_arg_size())
+      << "Trying to remove output that doesn't exist. Output number: "
+      << output_position;
+
+  int num_later_outputs =
+      map_defun_fn->signature().output_arg_size() - output_position - 1;
+
+  // Remove from map_defun_fn's ret dict and output args
+  map_defun_fn->mutable_ret()->erase(
+      map_defun_fn->signature().output_arg(output_position).name());
+  map_defun_fn->mutable_signature()->mutable_output_arg()->DeleteSubrange(
+      output_position, 1);
+
+  // Renumber outputs that come after
+  for (int i = 0; i < num_later_outputs; ++i) {
+    function_utils::ReplaceReferences(
+        strings::StrCat(map_defun_node->name(),
+                        ":output:", output_position + i + 1),
+        strings::StrCat(map_defun_node->name(),
+                        ":output:", output_position + i),
+        outer_scope);
+  }
+  map_defun_node->mutable_attr()
+      ->at("output_shapes")
+      .mutable_list()
+      ->mutable_shape()
+      ->DeleteSubrange(output_position, 1);
+  map_defun_node->mutable_attr()
+      ->at("output_types")
+      .mutable_list()
+      ->mutable_type()
+      ->ExtractSubrange(output_position, 1, nullptr);
+}
+
+Status ConvertCastOp(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
+                     NodeDef* map_defun_node, const NodeDef& cast_node,
+                     const FunctionDefTensorDesc& output_desc,
+                     std::map<string, string>* conversion_map) {
+  if (output_desc.node_output != "y" || output_desc.position != 0) {
+    // We expect the Cast node to have only one output, with the name "y".
+    return errors::Internal("Cannot convert Cast op output.");
+  }
+
+  // Promote Cast inputs to outputs of MapDefun
+  DCHECK_EQ(cast_node.input_size(), 1);
+  AddMapDefunOutput(map_defun_fn, map_defun_node, cast_node.input(0),
+                    cast_node.attr().at("SrcT").type());
+
+  // Add new Cast node
+  NodeDef* new_cast_node = outer_scope->add_node_def();
+  *new_cast_node = cast_node;
+  new_cast_node->clear_name();
+  function_utils::SetUniqueFunctionNodeName(
+      strings::StrCat("vectorized/", cast_node.name()), outer_scope,
+      new_cast_node);
+  new_cast_node->set_input(
+      0, strings::StrCat(map_defun_node->name(), ":output:",
+                         map_defun_fn->signature().output_arg_size() - 1));
+
+  // Add the output mapping to conversion map
+  (*conversion_map)[strings::StrCat(output_desc.node_name, ":y:0")] =
+      strings::StrCat(new_cast_node->name(), ":y:0");
+
+  return Status::OK();
+}
+
+Status ConvertUnpackOp(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
+                       NodeDef* map_defun_node, const NodeDef& unpack_node,
+                       const FunctionDefTensorDesc& output_desc,
+                       std::map<string, string>* conversion_map) {
+  if (output_desc.node_output != "output") {
+    return errors::Internal("Cannot convert Unpack op output.");
+  }
+
+  // Promote Unpack inputs to outputs of MapDefun
+  AddMapDefunOutput(map_defun_fn, map_defun_node, unpack_node.input(0),
+                    unpack_node.attr().at("T").type());
+
+  // Add new Unpack node
+  NodeDef* new_unpack_node = outer_scope->add_node_def();
+  *new_unpack_node = unpack_node;
+  new_unpack_node->clear_name();
+  function_utils::SetUniqueFunctionNodeName(
+      strings::StrCat("vectorized/", unpack_node.name()), outer_scope,
+      new_unpack_node);
+
+  // Increment "axis" attr by 1:
+  (*new_unpack_node->mutable_attr())["axis"].set_i(
+      unpack_node.attr().at("axis").i() + 1);
+  new_unpack_node->set_input(
+      0, strings::StrCat(map_defun_node->name(), ":output:",
+                         map_defun_fn->signature().output_arg_size() - 1));
+
+  // Add the output mappings to conversion map
+  int num = new_unpack_node->attr().at("num").i();
+  for (int i = 0; i < num; ++i) {
+    (*conversion_map)[strings::StrCat(output_desc.node_name, ":output:", i)] =
+        strings::StrCat(new_unpack_node->name(), ":output:", i);
+  }
+
+  return Status::OK();
+}
+
+int FindOutputToConvert(const FunctionDef& function,
+                        const std::set<string>& unconvertible,
+                        FunctionDefTensorDesc* f) {
+  for (int i = function.signature().output_arg_size() - 1; i >= 0; --i) {
+    const string& ret_key = function.signature().output_arg(i).name();
+    *f = FunctionDefTensorDesc(function.ret().at(ret_key));
+
+    if (unconvertible.find(f->node_name) == unconvertible.end()) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+// Helper class that vectorizes the body of a MapDefun node, adding new
+// operations to the graph that collectively compute the same value as what
+// running the MapDefun function on slices of the input would produce.
+// Each instance of the class encapsulates all the data necessary to vectorize a
+// MapDefun op in place.
+class Vectorization {
+ public:
+  Vectorization(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
+                NodeDef* map_defun_node)
+      : outer_scope_(outer_scope),
+        map_defun_fn_(map_defun_fn),
+        map_defun_node_(map_defun_node) {}
+
+  // Repeatedly tries to convert outputs of map_defun_fn_ into new nodes in
+  // the outer_scope_, until there are no convertible outputs remaining.
+  // This method is idempotent.
+  void Vectorize();
+
+ private:
+  // Vectorizes the map defun function's output at output_position
+  Status ConvertOutput(int output_position, const FunctionDefTensorDesc& desc);
+  // Given a descriptor of the original output tensor, gets a string
+  // corresponding to the converted output tensor.
+  Status ConvertOutputHelper(const FunctionDefTensorDesc& output_desc,
+                             string* converted);
+  Status AddConversionMappingFromInput(
+      const FunctionDefTensorDesc& output_desc);
+
+  // Adds mappings from node's outputs tensors to converted output tensors,
+  // creating the necessary new node(s). Generally, the steps to convert an op
+  // are:
+  // 1) Promote the inputs of the op inputs to outputs of the map_defun_fn_,
+  //    and modify map_defun_node_ attrs accordingly
+  // 2) Create new node(s) in outer_scope_ that act on batched input tensors.
+  //    These operations collectively compute the same value as what running
+  //    the original operation on slices of the input tensors would produce.
+  //    For example, a Cast op in MapDefun translates to a Cast op in
+  //    outer_scope_, since the vectorized version of Cast is itself.
+  // 3) Set inputs of new node(s) to the corresponding converted inputs (that
+  //    are now outputs of map_defun_node_)
+  // 4) For each output of the old node, add the mapping of output strings to
+  //    the conversion map (eg "Cast:y:0" -> "Vectorize/Cast:y:0")
+  Status AddConversionMappingFromOp(const NodeDef& node,
+                                    const FunctionDefTensorDesc& output_desc);
+
+  // Maps a tensor name to the name of the corresponding vectorized tensor. For
+  // example, "Cast:y:0" -> "Vectorize/Cast:y:0"
+  std::map<string, string> conversion_map_;
+  // Unconvertible node names
+  std::set<string> unconvertible_;
+
+  FunctionDef* outer_scope_;
+  FunctionDef* map_defun_fn_;
+  NodeDef* map_defun_node_;
+};
+
+Status Vectorization::AddConversionMappingFromOp(
+    const NodeDef& node, const FunctionDefTensorDesc& output_desc) {
+  for (const string& input_name : node.input()) {
+    if (IsControlInput(input_name)) {
+      return errors::InvalidArgument(
+          "Vectorizing outputs with control inputs is currently not "
+          "supported.");
+    }
+  }
+
+  // TODO(rachelim): Have some mechanism for registering converters and some
+  // uniform, simpler way to represent them.
+
+  // TODO(rachelim): Do step (1) outside of the individual op converters, when
+  // we know how to find out the type of the input.
+  if (node.op() == "Cast") {
+    return ConvertCastOp(outer_scope_, map_defun_fn_, map_defun_node_, node,
+                         output_desc, &conversion_map_);
+  } else if (node.op() == "Unpack") {
+    return ConvertUnpackOp(outer_scope_, map_defun_fn_, map_defun_node_, node,
+                           output_desc, &conversion_map_);
+  }
+  return errors::Unimplemented("Op converter for \"", node.op(),
+                               "\" not implemented yet");
+}
+
+Status Vectorization::AddConversionMappingFromInput(
+    const FunctionDefTensorDesc& output_desc) {
+  int input_index = function_utils::FindFunctionInputWithName(
+      output_desc.node_name, *map_defun_fn_);
+  if (input_index == -1) {
+    return errors::Internal("Cannot convert non-existent input.");
+  }
+
+  conversion_map_[output_desc.full_str] = map_defun_node_->input(input_index);
+  return Status::OK();
+}
+
+Status Vectorization::ConvertOutputHelper(
+    const FunctionDefTensorDesc& output_desc, string* converted) {
+  // It's possible the output already has a mapping, if it comes from a node
+  // that has already been converted.
+  if (auto found = gtl::FindOrNull(conversion_map_, output_desc.full_str)) {
+    *converted = *found;
+    return Status::OK();
+  }
+
+  int index = function_utils::FindFunctionNodeWithName(output_desc.node_name,
+                                                       *map_defun_fn_);
+  if (index == -1) {  // The output comes from an input
+    TF_RETURN_IF_ERROR(AddConversionMappingFromInput(output_desc));
+  } else {
+    TF_RETURN_IF_ERROR(AddConversionMappingFromOp(
+        map_defun_fn_->node_def(index), output_desc));
+  }
+  *converted = conversion_map_.at(output_desc.full_str);
+  return Status::OK();
+}
+
+Status Vectorization::ConvertOutput(int output_position,
+                                    const FunctionDefTensorDesc& output_desc) {
+  string converted_output_name;
+  TF_RETURN_IF_ERROR(ConvertOutputHelper(output_desc, &converted_output_name));
+
+  // Remove the old output and make everything that referenced it point
+  // to the new string
+  function_utils::ReplaceReferences(
+      strings::StrCat(map_defun_node_->name(), ":output:", output_position),
+      converted_output_name, outer_scope_);
+  RemoveMapDefunOutput(outer_scope_, map_defun_fn_, map_defun_node_,
+                       output_position);
+
+  return Status::OK();
+}
+
+void Vectorization::Vectorize() {
+  while (true) {
+    FunctionDefTensorDesc desc;
+    int output_position =
+        FindOutputToConvert(*map_defun_fn_, unconvertible_, &desc);
+    if (output_position == -1) break;
+
+    if (!ConvertOutput(output_position, desc).ok()) {
+      unconvertible_.insert(desc.node_name);
+    }
+  }
+
+  // If we've converted all the outputs of the MapDefun function, we no longer
+  // need the MapDefun node and can delete it.
+  if (map_defun_fn_->signature().output_arg_size() == 0) {
+    outer_scope_->mutable_node_def()->DeleteSubrange(
+        function_utils::FindFunctionNodeWithName(map_defun_node_->name(),
+                                                 *outer_scope_),
+        1);
+  }
+
+  if (!unconvertible_.empty()) {
+    VLOG(2) << "The following nodes could not be converted: ["
+            << absl::StrJoin(unconvertible_, ", ") << "].";
+  }
+}
+}  // namespace
+
+void VectorizeMapDefun(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
+                       NodeDef* map_defun_node) {
+  Vectorization(outer_scope, map_defun_fn, map_defun_node).Vectorize();
+}
+
+}  // end namespace vectorization_utils
+}  // end namespace grappler
+}  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.h b/tensorflow/core/grappler/optimizers/data/vectorization_utils.h
new file mode 100644
index 0000000000..bb405faa77
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.h
@@ -0,0 +1,90 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_UTILS_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_UTILS_H_
+
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+// Given a function, `map_defun_fn`, that is mapped across some input vector
+// elements via a MapDefun operation, `VectorizeMapDefun` attempts to
+// vectorize the MapDefun by "lifting" operations from the `map_defun_fn` to the
+// `outer_scope`; that is, replacing `map_defun_fn` operations with new
+// `outer_scope` operations that produce the same vector output(s) as executing
+// the `map_defun_fn` operations on elements of vector input(s) would. If all
+// `map_defun_fn` operations are successfully lifted, `map_defun_node` is
+// eliminated from `outer_scope` altogether. However, if some operations cannot
+// be lifted, and this vectorization only succeeds partially, `map_defun_node`
+// remains to be used for operations that were not lifted.
+//
+// Example:
+//   If the input to the `VectorizeMapDefun` function is a MapDefun
+// whose `map_defun_fn` performs the Cast operation, the vectorization will
+// eliminate the MapDefun. This is because the Cast operation supports
+// any tensor shape and can thus be lifted to the `outer_scope`.
+//
+// Before:
+//
+//
+// outer_scope     +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |  map_defun_fn +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   |               |        |   |
+// |   |           +---v--+     |   |
+// |   |           | Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   |           +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+// After:
+//
+// outer_scope     +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               | Cast |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+void VectorizeMapDefun(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
+                       NodeDef* map_defun_node);
+
+}  // end namespace vectorization_utils
+}  // end namespace grappler
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_UTILS_H_
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
new file mode 100644
index 0000000000..e129fa9237
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -0,0 +1,600 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
+
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/tools/graph_transforms/transform_utils.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+namespace {
+
+NodeDef* AddCastNode(const string& name, const std::vector<string>& inputs,
+                     DataType src, DataType dst, bool truncate,
+                     FunctionDef* fn) {
+  NodeDef* node = function_utils::AddNode(name, "Cast", inputs, {}, fn);
+  graph_transforms::SetNodeAttr("SrcT", src, node);
+  graph_transforms::SetNodeAttr("DstT", dst, node);
+  graph_transforms::SetNodeAttr("Truncate", truncate, node);
+  return node;
+}
+
+NodeDef* AddUnstackNode(const string& name, const std::vector<string>& inputs,
+                        DataType t, int axis, int num, FunctionDef* fn) {
+  NodeDef* node = function_utils::AddNode(name, "Unpack", inputs, {}, fn);
+  graph_transforms::SetNodeAttr("T", t, node);
+  graph_transforms::SetNodeAttr("axis", axis, node);
+  graph_transforms::SetNodeAttr("num", num, node);
+  return node;
+}
+
+NodeDef* AddMapDefunNode(const string& name, const std::vector<string>& inputs,
+                         const std::vector<DataType>& t_arguments,
+                         const std::vector<DataType>& output_types,
+                         const std::vector<TensorShape>& output_shapes,
+                         const string& function_name, FunctionDef* fn) {
+  NameAttrList func;
+  func.set_name(function_name);
+  NodeDef* node = function_utils::AddNode(name, "MapDefun", inputs, {}, fn);
+  graph_transforms::SetNodeAttr("Targuments", t_arguments, node);
+  graph_transforms::SetNodeAttr("output_types", output_types, node);
+  graph_transforms::SetNodeAttr("output_shapes", output_shapes, node);
+  graph_transforms::SetNodeAttr("f", func, node);
+  return node;
+}
+
+// TODO(rachelim): Use FunctionDefHelper::Create instead
+FunctionDef CreateFunction(
+    StringPiece name, const std::vector<std::pair<string, DataType>>& inputs,
+    const std::vector<std::pair<string, DataType>>& outputs,
+    const std::map<string, string>& rets) {
+  FunctionDef func;
+  auto* signature = func.mutable_signature();
+  signature->set_name(string(name));
+  for (const auto& x : inputs) {
+    auto* arg_def = signature->add_input_arg();
+    arg_def->set_name(x.first);
+    arg_def->set_type(x.second);
+  }
+  for (const auto& x : outputs) {
+    auto* arg_def = signature->add_output_arg();
+    arg_def->set_name(x.first);
+    arg_def->set_type(x.second);
+  }
+  for (const auto& x : rets) {
+    (*func.mutable_ret())[x.first] = x.second;
+  }
+
+  return func;
+}
+
+TEST(FunctionDefInputDescTest, ConstructedCorrectly) {}
+
+// Before:
+//
+//                 +------+   +------+
+// +---------------+ Arg0 +---+ Arg1 +--------+
+// |               +---+--+   +---+--+        |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// |   +-----------+ Arg0 +---+ Arg1 +----+   |
+// |   |           +---+--+   +---+--+    |   |
+// |   |               |          |       |   |
+// |   | MapDefun  +---v--+   +---v--+    |   |
+// |   +-----------+ Ret0 +---+ Ret1 +----+   |
+// |               +---+--+   +---+--+        |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// +---------------+ Ret0 +---+ Ret1 +--------+
+//                 +------+   +------+
+//
+//
+//  After:
+//
+//                 +------+   +------+
+// +---------------+ Arg0 +---+ Arg1 +--------+
+// |               +---+--+   +---+--+        |
+// |                   |          |           |
+// |                   |          |           |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// +---------------+ Ret0 +---+ Ret1 +--------+
+//                 +------+   +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
+  FunctionDef inner =
+      CreateFunction("inner_function", {{"arg0", DT_INT32}, {"arg1", DT_INT32}},
+                     {{"ret0", DT_INT32}, {"ret1", DT_INT32}},
+                     {{"ret0", "arg0"}, {"ret1", "arg1"}});
+  FunctionDef outer = CreateFunction(
+      "outer_function", {{"ret0", DT_INT32}, {"ret1", DT_INT32}},
+      {{"mapdefun", DT_INT32}, {"mapdefun_0", DT_INT32}},
+      {{"mapdefun", "MapDefun:output:0"}, {"mapdefun_0", "MapDefun:output:1"}});
+
+  NodeDef* map_defun = AddMapDefunNode(
+      "MapDefun", {"ret0", "ret1"}, {DT_INT32, DT_INT32}, {DT_INT32, DT_INT32},
+      {{}, {}}, inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  VectorizeMapDefun(&outer, &inner, map_defun);
+  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
+  EXPECT_EQ(outer.ret().at("mapdefun"), "ret0");
+  EXPECT_EQ(outer.ret().at("mapdefun_0"), "ret1");
+}
+
+// Before:
+//
+//                 +------+   +------+
+// +---------------+ Arg0 +---+ Arg1 +--------+
+// |               +---+--+   +---+--+        |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// |   +-----------+ Arg0 +---+ Arg1 +----+   |
+// |   |           +---+--+   +---+--+    |   |
+// |   |               |          |       |   |
+// |   |   +------+    |      +---v--+    |   |
+// |   |   |Const |    |      | Op0  |    |   |
+// |   |   +---v--+    |      +---+--+    |   |
+// |   |       |       |          |       |   |
+// |   |       |   +---v--+   +---v--+    |   |
+// |   |       +---| XOp1 |   | XOp2 |    |   |
+// |   |           +---+--+   +---+--+    |   |
+// |   |               |          |       |   |
+// |   | MapDefun  +---v--+   +---v--+    |   |
+// |   +-----------+ Ret0 +---+ Ret1 +----+   |
+// |               +---+--+   +---+--+        |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// +---------------+ Ret0 +---+ Ret1 +--------+
+//                 +------+   +------+
+//
+//   where XOp1 and XOp2 are not convertible.
+//
+// After:
+//
+// No change because the ops are not convertible.
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
+  FunctionDef inner =
+      CreateFunction("inner_function", {{"arg0", DT_INT32}, {"arg1", DT_INT32}},
+                     {{"ret0", DT_INT32}, {"ret1", DT_INT32}},
+                     {{"ret0", "XOp1:output:0"}, {"ret1", "XOp2:output:0"}});
+  NodeDef* x_op1 =
+      function_utils::AddNode("XOp1", "XOp1", {"const", "arg0"}, {}, &inner);
+  CHECK_NOTNULL(x_op1);
+
+  NodeDef* x_op2 = function_utils::AddNode("XOp2", "XOp2", {"op1"}, {}, &inner);
+  CHECK_NOTNULL(x_op2);
+
+  FunctionDef outer = CreateFunction(
+      "outer_function", {{"x", DT_INT32}, {"y", DT_INT32}},
+      {{"mapdefun", DT_INT32}, {"mapdefun_0", DT_INT32}},
+      {{"mapdefun", "MapDefun:output:0"}, {"mapdefun_0", "MapDefun:output:1"}});
+
+  NodeDef* map_defun = AddMapDefunNode(
+      "MapDefun", {"x", "y"}, {DT_INT32, DT_INT32}, {DT_INT32, DT_INT32},
+      {{}, {}}, inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDef outer_copy(outer);
+  FunctionDef inner_copy(inner);
+  VectorizeMapDefun(&outer, &inner, map_defun);
+  // They should be unchanged
+  EXPECT_TRUE(FunctionDefsEqual(outer_copy, outer));
+  EXPECT_TRUE(FunctionDefsEqual(inner_copy, inner));
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   |               |        |   |
+// |   |           +---v--+     |   |
+// |   |           | Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               | Cast |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
+  FunctionDef inner =
+      CreateFunction("inner_function", {{"arg0", DT_INT32}},
+                     {{"ret0", DT_INT64}}, {{"ret0", "Cast:y:0"}});
+  NodeDef* cast_op =
+      AddCastNode("Cast", {"arg0"}, DT_INT32, DT_INT64, false, &inner);
+  CHECK_NOTNULL(cast_op);
+
+  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
+                                     {{"mapdefun", DT_INT64}},
+                                     {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"x"}, {DT_INT32}, {DT_INT64}, {{}},
+                      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  VectorizeMapDefun(&outer, &inner, map_defun);
+  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
+  const NodeDef& cast_node =
+      outer.node_def(function_utils::FindFunctionNodeWithOp("Cast", outer));
+  EXPECT_EQ(cast_node.input(0), "x");
+  EXPECT_EQ(outer.ret().at("mapdefun"),
+            strings::StrCat(cast_node.name(), ":y:0"));
+  EXPECT_EQ(outer.node_def_size(), 1);
+}
+
+// Before:
+//
+//                 +------+
+// +---------------+ Arg0 +-------------------+
+// |               +---+--+                   |
+// |                   |                      |
+// |               +---v--+                   |
+// |   +-----------+ Arg0 +---------------+   |
+// |   |           +---+--+               |   |
+// |   |               |                  |   |
+// |   |               |                  |   |
+// |   |           +---v--+               |   |
+// |   |           | Cast |               |   |
+// |   |           +---+--+               |   |
+// |   |               |                  |   |
+// |   |               +----------+       |   |
+// |   |               |          |       |   |
+// |   | MapDefun  +---v--+   +---v--+    |   |
+// |   +-----------+ Ret0 +---+ Ret1 +----+   |
+// |               +---+--+   +---+--+        |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// +---------------+ Ret0 +---+ Ret1 +--------+
+//                 +------+   +------+
+//
+//
+//  After:
+//
+//                 +------+
+// +---------------+ Arg0 +-------------------+
+// |               +---+--+                   |
+// |                   |                      |
+// |                   |                      |
+// |               +---v--+                   |
+// |               | Cast |                   |
+// |               +---+--+                   |
+// |                   |                      |
+// |                   +----------+           |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// +---------------+ Ret0 +---+ Ret1 +--------+
+//                 +------+   +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
+  // Tests that behavior is correct when an output is used more than once.
+  FunctionDef inner =
+      CreateFunction("inner_function", {{"arg0", DT_INT32}},
+                     {{"ret0", DT_INT64}, {"ret1", DT_INT64}},
+                     {{"ret0", "Cast:y:0"}, {"ret1", "Cast:y:0"}});
+  NodeDef* cast_op =
+      AddCastNode("Cast", {"arg0"}, DT_INT32, DT_INT64, false, &inner);
+  CHECK_NOTNULL(cast_op);
+
+  FunctionDef outer = CreateFunction(
+      "outer_function", {{"x", DT_INT32}},
+      {{"mapdefun", DT_INT64}, {"mapdefun_0", DT_INT64}},
+      {{"mapdefun", "MapDefun:output:0"}, {"mapdefun_0", "MapDefun:output:1"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"x"}, {DT_INT32}, {DT_INT64, DT_INT64},
+                      {{}, {}}, inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  VectorizeMapDefun(&outer, &inner, map_defun);
+  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
+  const NodeDef& cast_node =
+      outer.node_def(function_utils::FindFunctionNodeWithOp("Cast", outer));
+  EXPECT_EQ(cast_node.input(0), "x");
+  EXPECT_EQ(outer.ret().at("mapdefun"),
+            strings::StrCat(cast_node.name(), ":y:0"));
+  EXPECT_EQ(outer.ret().at("mapdefun_0"),
+            strings::StrCat(cast_node.name(), ":y:0"));
+  EXPECT_EQ(outer.node_def_size(), 1);
+}
+
+// Before:
+//
+//                        +------+
+// +----------------------+ Arg0 +----------------------+
+// |                      +---+--+                      |
+// |                          |                         |
+// |                      +---v--+                      |
+// |   +------------------+ Arg0 +------------------+   |
+// |   |                  +---+--+                  |   |
+// |   |                      |                     |   |
+// |   |                      |                     |   |
+// |   |                  +---v---+ num=3           |   |
+// |   |                  |Unstack| axis=0          |   |
+// |   |                  ++--+--++                 |   |
+// |   |                   |  |  |                  |   |
+// |   |              +----+  |  +-------+          |   |
+// |   |              |       |          |          |   |
+// |   | MapDefun +---v--+  +-v----+  +--v---+      |   |
+// |   +----------+ Ret0 +--+ Ret1 +--+ Ret2 +------+   |
+// |              +---+--+  +--+---+  +--+---+          |
+// |                  |        |         |              |
+// |              +---v--+  +--v---+  +--v---+          |
+// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
+//                +------+  +------+  +------+
+//
+//
+//  After:
+//
+//                        +------+
+// +----------------------+ Arg0 +----------------------+
+// |                      +---+--+                      |
+// |                          |                         |
+// |                          |                         |
+// |                          |                         |
+// |                      +---v---+ num=3               |
+// |                      |Unstack| axis=1              |
+// |                      ++--+--++                     |
+// |                       |  |  |                      |
+// |                  +----+  |  +-------+              |
+// |                  |       |          |              |
+// |                  |       |          |              |
+// |              +---v--+  +-v----+  +--v---+          |
+// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
+//                +------+  +------+  +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
+  FunctionDef inner = CreateFunction(
+      "inner_function", {{"arg0", DT_INT32}},
+      {{"ret0", DT_INT32}, {"ret1", DT_INT32}, {"ret2", DT_INT32}},
+      {{"ret0", "MyUnstack:output:0"},
+       {"ret1", "MyUnstack:output:1"},
+       {"ret2", "MyUnstack:output:2"}});
+  NodeDef* unstack_op =
+      AddUnstackNode("MyUnstack", {"arg0"}, DT_INT32, 0, 3, &inner);
+  CHECK_NOTNULL(unstack_op);
+
+  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
+                                     {{"mapdefun", DT_INT32},
+                                      {"mapdefun_0", DT_INT32},
+                                      {"mapdefun_1", DT_INT32}},
+                                     {{"mapdefun", "MapDefun:output:0"},
+                                      {"mapdefun_0", "MapDefun:output:1"},
+                                      {"mapdefun_1", "MapDefun:output:2"}});
+
+  NodeDef* map_defun = AddMapDefunNode(
+      "MapDefun", {"x"}, {DT_INT32}, {DT_INT32, DT_INT32, DT_INT32},
+      {{1}, {1}, {1}}, inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  VectorizeMapDefun(&outer, &inner, map_defun);
+  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
+  const NodeDef& unpack_node =
+      outer.node_def(function_utils::FindFunctionNodeWithOp("Unpack", outer));
+  EXPECT_EQ(unpack_node.input(0), "x");
+  EXPECT_EQ(unpack_node.attr().at("axis").i(), 1);
+  EXPECT_EQ(unpack_node.attr().at("T").type(), DT_INT32);
+  EXPECT_EQ(unpack_node.attr().at("num").i(), 3);
+  EXPECT_EQ(outer.ret().at("mapdefun"),
+            strings::StrCat(unpack_node.name(), ":output:0"));
+  EXPECT_EQ(outer.ret().at("mapdefun_0"),
+            strings::StrCat(unpack_node.name(), ":output:1"));
+  EXPECT_EQ(outer.ret().at("mapdefun_1"),
+            strings::StrCat(unpack_node.name(), ":output:2"));
+  EXPECT_EQ(outer.node_def_size(), 1);
+}
+
+// Before:
+//
+//                        +------+
+// +----------------------+ Arg0 +----------------------+
+// |                      +---+--+                      |
+// |                          |                         |
+// |                      +---v--+                      |
+// |   +------------------+ Arg0 +------------------+   |
+// |   |                  +---+--+                  |   |
+// |   |                      |                     |   |
+// |   |                  +---+--+                  |   |
+// |   |                  | Cast |                  |   |
+// |   |                  +---+--+                  |   |
+// |   |                      |                     |   |
+// |   |                  +---v---+ num=3           |   |
+// |   |                  |Unstack| axis=0          |   |
+// |   |                  ++--+--++                 |   |
+// |   |                   |  |  |                  |   |
+// |   |              +----+  |  +-------+          |   |
+// |   |              |       |          |          |   |
+// |   | MapDefun +---v--+  +-v----+  +--v---+      |   |
+// |   +----------+ Ret0 +--+ Ret1 +--+ Ret2 +------+   |
+// |              +---+--+  +--+---+  +--+---+          |
+// |                  |        |         |              |
+// |              +---v--+  +--v---+  +--v---+          |
+// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
+//                +------+  +------+  +------+
+//
+//
+//  After:
+//
+//                        +------+
+// +----------------------+ Arg0 +----------------------+
+// |                      +---+--+                      |
+// |                          |                         |
+// |                      +---+--+                      |
+// |                      | Cast |                      |
+// |                      +---+--+                      |
+// |                          |                         |
+// |                      +---v---+ num=3               |
+// |                      |Unstack| axis=1              |
+// |                      ++--+--++                     |
+// |                       |  |  |                      |
+// |                  +----+  |  +-------+              |
+// |                  |       |          |              |
+// |                  |       |          |              |
+// |              +---v--+  +-v----+  +--v---+          |
+// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
+//                +------+  +------+  +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
+  FunctionDef inner = CreateFunction(
+      "inner_function", {{"arg0", DT_INT32}},
+      {{"ret0", DT_INT32}, {"ret1", DT_INT32}, {"ret2", DT_INT32}},
+      {{"ret0", "MyUnstack:output:0"},
+       {"ret1", "MyUnstack:output:1"},
+       {"ret2", "MyUnstack:output:2"}});
+  NodeDef* cast_op =
+      AddCastNode("Cast", {"arg0"}, DT_INT32, DT_INT64, false, &inner);
+  CHECK_NOTNULL(cast_op);
+  NodeDef* unstack_op =
+      AddUnstackNode("MyUnstack", {"Cast:y:0"}, DT_INT32, 0, 3, &inner);
+  CHECK_NOTNULL(unstack_op);
+
+  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
+                                     {{"mapdefun", DT_INT32},
+                                      {"mapdefun_0", DT_INT32},
+                                      {"mapdefun_1", DT_INT32}},
+                                     {{"mapdefun", "MapDefun:output:0"},
+                                      {"mapdefun_0", "MapDefun:output:1"},
+                                      {"mapdefun_1", "MapDefun:output:2"}});
+
+  NodeDef* map_defun = AddMapDefunNode(
+      "MapDefun", {"x"}, {DT_INT32}, {DT_INT32, DT_INT32, DT_INT32},
+      {{1}, {1}, {1}}, inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  VectorizeMapDefun(&outer, &inner, map_defun);
+  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
+  const NodeDef& cast_node =
+      outer.node_def(function_utils::FindFunctionNodeWithOp("Cast", outer));
+  EXPECT_EQ(cast_node.input(0), "x");
+  const NodeDef& unpack_node =
+      outer.node_def(function_utils::FindFunctionNodeWithOp("Unpack", outer));
+  EXPECT_EQ(unpack_node.input(0), strings::StrCat(cast_node.name(), ":y:0"));
+  EXPECT_EQ(unpack_node.attr().at("axis").i(), 1);
+  EXPECT_EQ(unpack_node.attr().at("T").type(), DT_INT32);
+  EXPECT_EQ(unpack_node.attr().at("num").i(), 3);
+
+  EXPECT_EQ(outer.ret().at("mapdefun"),
+            strings::StrCat(unpack_node.name(), ":output:0"));
+  EXPECT_EQ(outer.ret().at("mapdefun_0"),
+            strings::StrCat(unpack_node.name(), ":output:1"));
+  EXPECT_EQ(outer.ret().at("mapdefun_1"),
+            strings::StrCat(unpack_node.name(), ":output:2"));
+  EXPECT_EQ(outer.node_def_size(), 2);
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +---+--+     |   |
+// |   |     +---------+        |   |
+// |   | +---v--+      |        |   |
+// |   | |Print |      |        |   |
+// |   | +---+--+      |        |   |
+// |   |     :     +---v--+     |   |
+// |   |     ::::::> Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//  No change because we don't deal with control inputs for now.
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
+  FunctionDef inner =
+      CreateFunction("inner_function", {{"arg0", DT_INT32}},
+                     {{"ret0", DT_INT64}}, {{"ret0", "Cast:y:0"}});
+  // The attrs aren't relevant
+  NodeDef* print_op =
+      function_utils::AddNode("Print", "Print", {"arg0", "arg0"}, {}, &inner);
+  CHECK_NOTNULL(print_op);
+  NodeDef* cast_op = AddCastNode("Cast", {"arg0", "^Print"}, DT_INT32, DT_INT64,
+                                 false, &inner);
+  CHECK_NOTNULL(cast_op);
+
+  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
+                                     {{"mapdefun", DT_INT64}},
+                                     {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"x"}, {DT_INT32}, {DT_INT64}, {{}},
+                      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDef outer_copy(outer);
+  FunctionDef inner_copy(inner);
+  VectorizeMapDefun(&outer, &inner, map_defun);
+  // They should be unchanged
+  EXPECT_TRUE(FunctionDefsEqual(outer_copy, outer));
+}
+
+// TODO(rachelim): More test cases when we get around to implementing them:
+// [] A badly defined converter, e.g. doesn't produce nodes that have the
+//    same number of outputs/inputs as the nodes to be converted
+// [] Converter where the 'converted' form has multiple nodes.
+// [] Case with dependent nodes, e.g. ops with const inputs that are
+//    broadcasted.
+// [] Python-side tests to actually run the functions to make sure
+//    they work.
+
+}  // namespace
+}  // namespace vectorization_utils
+}  // namespace grappler
+}  // namespace tensorflow
-- 
GitLab


From 7dac251a7e0f1cd8fd55dffff942fd9c91218627 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 14:04:41 -0700
Subject: [PATCH 0403/1357] Remove LOG(INFO) in MetaOptimizer:Optimize as this
 currently produces a large number of debugging outputs in the INFO log that
 look like:

I0917 16:20:11.073992    9191 meta_optimizer.cc:334] Starting optimization for grappler item: tf_graph
I0917 16:20:11.079458    9191 meta_optimizer.cc:334] Starting optimization for grappler item: tf_graph
I0917 16:20:11.084827   12447 meta_optimizer.cc:334] Starting optimization for grappler item: tf_graph
I0917 16:20:11.089359   12447 meta_optimizer.cc:334] Starting optimization for grappler item: tf_graph

After this change those lines will simply no longer appear.

RELNOTES: n/a
PiperOrigin-RevId: 213690759
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 1ed1b22931..4b0cbfaa82 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -352,7 +352,7 @@ Status MetaOptimizer::RunOptimizer(
 
 Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                GraphDef* optimized_graph) {
-  LOG(INFO) << "Starting optimization for grappler item: " << item.id;
+  VLOG(1) << "Starting optimization for grappler item: " << item.id;
   optimization_results_.clear();
 
   // 1. Optimize main graph
-- 
GitLab


From 8878a5c4761c025d47faa123fe4978f173f950c9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 14:16:14 -0700
Subject: [PATCH 0404/1357] Added ABSL_DEPRECATED annotations to various
 deprecated TensorFlow functions.

PiperOrigin-RevId: 213693027
---
 tensorflow/compiler/xla/BUILD                      |  1 +
 tensorflow/compiler/xla/shape_util.h               |  4 ++--
 tensorflow/contrib/lite/kernels/internal/BUILD     |  3 ++-
 tensorflow/contrib/lite/kernels/internal/types.h   |  3 ++-
 tensorflow/core/BUILD                              |  3 +++
 tensorflow/core/example/feature_util.h             |  5 +++--
 tensorflow/core/framework/device_base.h            |  3 ++-
 tensorflow/core/graph/testlib.h                    |  2 +-
 tensorflow/core/kernels/queue_base.h               |  4 ++++
 tensorflow/core/platform/cloud/gcs_file_system.cc  |  3 ++-
 tensorflow/core/util/sparse/sparse_tensor.h        | 14 ++++++++------
 tensorflow/stream_executor/device_description.h    |  6 ++----
 tensorflow/stream_executor/plugin_registry.h       |  2 ++
 tensorflow/stream_executor/stream_executor_pimpl.h | 11 ++++++-----
 14 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index ef70c1f8ac..cc7390c6e6 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -245,6 +245,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:regexp_internal",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 623ae39de8..d8bb27beae 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <initializer_list>
 #include <string>
 
+#include "absl/base/macros.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/types/optional.h"
 #include "absl/types/span.h"
@@ -479,8 +480,7 @@ class ShapeUtil {
 
   // Shorthand for testing whether a shape is of a given element type and
   // sequence of dimensions.
-  //
-  // DEPRECATED: Use Equal() instead.
+  ABSL_DEPRECATED("Use Equal() instead.")
   static bool ShapeIs(const Shape& shape, PrimitiveType element_type,
                       std::initializer_list<int64> dimensions);
 
diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index a6fd4ac2dd..195474e7fd 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -43,6 +43,7 @@ cc_library(
         "compatibility.h",
         "types.h",
     ],
+    deps = ["@com_google_absl//absl/base:core_headers"],
 )
 
 config_setting(
@@ -458,7 +459,7 @@ cc_library(
     ],
     copts = NEON_FLAGS_IF_APPLICABLE,
     deps = [
-        "//tensorflow/contrib/lite/kernels:activation_functor",
+        "@com_google_absl//absl/base:core_headers",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "@arm_neon_2_x86_sse",
         "@gemmlowp",
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index b70a87d0dc..3e0308721e 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include <cstring>
 #include <iterator>
 
+#include "absl/base/macros.h"
 #include "tensorflow/contrib/lite/kernels/internal/compatibility.h"
 
 namespace tflite {
@@ -424,7 +425,7 @@ inline int FlatSize(const Dims<N>& dims) {
   return flat_size;
 }
 
-// Deprecated. Prefer FlatSize.
+ABSL_DEPRECATED("Prefer FlatSize.")
 inline int RequiredBufferSizeForDims(const Dims<4>& dims) {
   return FlatSize(dims);
 }
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 9bcf5b0865..4b2589aaeb 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2095,6 +2095,7 @@ cc_library(
     deps = tf_additional_lib_deps() + [
         "@com_google_absl//absl/strings",
         "//third_party/eigen3",
+        "@com_google_absl//absl/base:core_headers",
         "//tensorflow/core/platform/default/build_config:platformlib",
     ] + if_static([":lib_internal_impl"]),
 )
@@ -2287,6 +2288,7 @@ cc_library(
     deps = [
         "//tensorflow/core/platform/default/build_config:jpeg",
         "//tensorflow/core/platform/default/build_config:logging",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2319,6 +2321,7 @@ cc_library(
     deps = [
         "//tensorflow/core/platform/default/build_config:gif",
         "//tensorflow/core/platform/default/build_config:logging",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h
index ec93b9aad9..016d1a92c1 100644
--- a/tensorflow/core/example/feature_util.h
+++ b/tensorflow/core/example/feature_util.h
@@ -103,6 +103,7 @@ limitations under the License.
 #include <iterator>
 #include <type_traits>
 
+#include "absl/base/macros.h"
 #include "tensorflow/core/example/example.pb.h"
 #include "tensorflow/core/example/feature.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
@@ -113,10 +114,10 @@ namespace tensorflow {
 
 namespace internal {
 
-// DEPRECATED: Use GetFeature instead.
 // TODO(gorban): Update all clients in a followup CL.
 // Returns a reference to a feature corresponding to the name.
 // Note: it will create a new Feature if it is missing in the example.
+ABSL_DEPRECATED("Use GetFeature instead.")
 Feature& ExampleFeature(const string& name, Example* example);
 
 // Specializations of RepeatedFieldTrait define a type of RepeatedField
@@ -314,9 +315,9 @@ bool HasFeature(const string& key, const Example& example) {
   return HasFeature<FeatureType...>(key, GetFeatures(example));
 }
 
-// DEPRECATED: use HasFeature instead.
 // TODO(gorban): update all clients in a followup CL.
 template <typename... FeatureType>
+ABSL_DEPRECATED("Use HasFeature instead.")
 bool ExampleHasFeature(const string& key, const Example& example) {
   return HasFeature<FeatureType...>(key, example);
 }
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 53ac639b4c..446c31b17f 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/base/macros.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/refcount.h"
@@ -176,9 +177,9 @@ class DeviceBase {
     return nullptr;
   }
 
-  // DEPRECATED: Use `this->GetAllocator()` or `this->GetScopedAllocator()`.
   // This method is provided for backwards compatibility, and will be removed
   // in a future release.
+  ABSL_DEPRECATED("Use `this->GetAllocator()` or `this->GetScopedAllocator()`.")
   Allocator* GetStepAllocator(AllocatorAttributes attr, ResourceMgr*) {
     return GetAllocator(attr);
   }
diff --git a/tensorflow/core/graph/testlib.h b/tensorflow/core/graph/testlib.h
index bd0284d43a..b00196f587 100644
--- a/tensorflow/core/graph/testlib.h
+++ b/tensorflow/core/graph/testlib.h
@@ -32,7 +32,7 @@ namespace test {
 namespace graph {
 
 // Converts "g" into its corresponding GraphDef "def".
-// DEPRECATED: call g->ToGraphDef(def) instead.
+ABSL_DEPRECATED("Call g->ToGraphDef(def) instead.")
 void ToGraphDef(Graph* g, GraphDef* def);
 
 // A few helpers to construct a graph.
diff --git a/tensorflow/core/kernels/queue_base.h b/tensorflow/core/kernels/queue_base.h
index 5fb1c92f94..272aa3b4f5 100644
--- a/tensorflow/core/kernels/queue_base.h
+++ b/tensorflow/core/kernels/queue_base.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <deque>
 #include <vector>
 
+#include "absl/base/macros.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/queue_interface.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -82,6 +83,9 @@ class QueueBase : public QueueInterface {
   // NOTE(mrry): This method is deprecated. Use
   // `tensorflow::batch_util::CopySliceToElement()` defined in
   // "./batch_util.h" instead.
+  ABSL_DEPRECATED(
+      "Use `tensorflow::batch_util::CopySliceToElement()` defined in "
+      "\"./batch_util.h\" instead.")
   static Status CopyElementToSlice(const Tensor& element, Tensor* parent,
                                    int64 index);
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 83228fab6f..83ea8539ed 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #ifdef _WIN32
 #include <io.h>  // for _mktemp
 #endif
+#include "absl/base/macros.h"
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -63,7 +64,7 @@ constexpr int kGetChildrenDefaultPageSize = 1000;
 // The HTTP response code "308 Resume Incomplete".
 constexpr uint64 HTTP_CODE_RESUME_INCOMPLETE = 308;
 // The environment variable that overrides the size of the readahead buffer.
-// DEPRECATED. Use GCS_BLOCK_SIZE_MB instead.
+ABSL_DEPRECATED("Use GCS_BLOCK_SIZE_MB instead.")
 constexpr char kReadaheadBufferSize[] = "GCS_READAHEAD_BUFFER_SIZE_BYTES";
 // The environment variable that disables the GCS block cache for reads.
 // This is the explicit alternative to setting BLOCK_SIZE or MAX_SIZE to 0, and
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index 0f04b65f60..b9ca8ab395 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <numeric>
 #include <vector>
 
+#include "absl/base/macros.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
@@ -95,21 +96,21 @@ class SparseTensor {
 
   SparseTensor() : dims_(0) {}
 
-  // DEPRECATED: use Create() functions instead of constructors directly.
+  ABSL_DEPRECATED("Use Create() functions instead of constructors directly.")
   SparseTensor(Tensor ix, Tensor vals, const TensorShape& shape)
       : SparseTensor(ix, vals, TensorShapeToVector(shape),
                      UndefinedOrder(TensorShapeToVector(shape))) {}
 
-  // DEPRECATED: use Create() functions instead of constructors directly.
+  ABSL_DEPRECATED("Use Create() functions instead of constructors directly.")
   SparseTensor(Tensor ix, Tensor vals, const VarDimArray shape)
       : SparseTensor(ix, vals, shape, UndefinedOrder(shape)) {}
 
-  // DEPRECATED: use Create() functions instead of constructors directly.
+  ABSL_DEPRECATED("use Create() functions instead of constructors directly.")
   SparseTensor(Tensor ix, Tensor vals, const TensorShape& shape,
                const VarDimArray order)
       : SparseTensor(ix, vals, TensorShapeToVector(shape), order) {}
 
-  // DEPRECATED: use Create() functions instead of constructors directly.
+  ABSL_DEPRECATED("Use Create() functions instead of constructors directly.")
   SparseTensor(Tensor ix, Tensor vals, const VarDimArray shape,
                const VarDimArray order)
       : ix_(ix),
@@ -237,9 +238,10 @@ class SparseTensor {
   static Status Split(const SparseTensor& tensor, const int split_dim,
                       const int num_split, std::vector<SparseTensor>* result);
 
-  // DEPRECATED: use the form of Split() that takes an output pointer and
-  // returns a status instead.
   template <typename T>
+  ABSL_DEPRECATED(
+      "Use the form of Split() that takes an output pointer and returns a "
+      "status instead.")
   static std::vector<SparseTensor> Split(const SparseTensor& tensor,
                                          const int split_dim,
                                          const int num_split,
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h
index 7f99d81ef3..a4580d6462 100644
--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -22,8 +22,7 @@ limitations under the License.
 
 #include <map>
 #include <memory>
-#include "tensorflow/stream_executor/platform/port.h"
-
+#include "absl/base/macros.h"
 #include "tensorflow/stream_executor/launch_dim.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
@@ -359,9 +358,8 @@ class DeviceDescriptionBuilder {
 bool ThreadDimOk(const DeviceDescription &device_description,
                  const ThreadDim &thread_dim);
 
-// [deprecated] Use MathUtil::CeilOfRatio directly instead.
-//
 // Equivalent to ceil(double(element_count) / threads_per_block).
+ABSL_DEPRECATED("Use MathUtil::CeilOfRatio directly instead.")
 uint64 DivideCeil(uint64 x, uint64 y);
 
 // Calculate the number of threads/blocks required to process element_count
diff --git a/tensorflow/stream_executor/plugin_registry.h b/tensorflow/stream_executor/plugin_registry.h
index 49628ecd24..3065b5cb77 100644
--- a/tensorflow/stream_executor/plugin_registry.h
+++ b/tensorflow/stream_executor/plugin_registry.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <map>
 
+#include "absl/base/macros.h"
 #include "tensorflow/stream_executor/blas.h"
 #include "tensorflow/stream_executor/dnn.h"
 #include "tensorflow/stream_executor/fft.h"
@@ -97,6 +98,7 @@ class PluginRegistry {
   // TODO(b/22689637): Deprecated/temporary. Will be deleted once all users are
   // on MultiPlatformManager / PlatformId.
   template <typename FactoryT>
+  ABSL_DEPRECATED("Use MultiPlatformManager / PlatformId instead.")
   port::StatusOr<FactoryT> GetFactory(PlatformKind platform_kind,
                                       PluginId plugin_id);
 
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index d04025b681..4a8a270afa 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <tuple>
 #include <vector>
 
+#include "absl/base/macros.h"
 #include "tensorflow/stream_executor/lib/status.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 #include "tensorflow/stream_executor/lib/strcat.h"
@@ -81,8 +82,8 @@ class StreamExecutor {
   port::Status Init();
   port::Status Init(int device_ordinal, DeviceOptions device_options);
 
-  // DEPRECATED: Do not use; use platform() instead.
   // Returns the platform that this StreamExecutor is acting upon.
+  ABSL_DEPRECATED("Use platform() instead.")
   PlatformKind platform_kind() const { return platform_kind_; }
 
   // Returns a reference to the platform that created this executor.
@@ -255,15 +256,15 @@ class StreamExecutor {
 
   // [deprecated] Blocks the caller while a data segment of the given size is
   // copied from the host source to the device destination.
-  //
-  // Deprecation: prefer explicit H2D below, to avoid error-prone API usage.
+  ABSL_DEPRECATED(
+      "Prefer SynchronousMemcpyH2D, to avoid error-prone API usage.")
   bool SynchronousMemcpy(DeviceMemoryBase *device_dst, const void *host_src,
                          uint64 size) SE_MUST_USE_RESULT;
 
   // [deprecated] Blocks the caller while a data segment of the given size is
   // copied from the device source to the host destination.
-  //
-  // Deprecation: prefer explicit D2H below, to avoid error-prone API usage.
+  ABSL_DEPRECATED(
+      "Prefer SynchronousMemcpyD2H, to avoid error-prone API usage.")
   bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &device_src,
                          uint64 size) SE_MUST_USE_RESULT;
 
-- 
GitLab


From 5f05a18c576ba89a7bce5f2ed5c7104bc158d8f1 Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Wed, 19 Sep 2018 14:39:49 -0700
Subject: [PATCH 0405/1357] Fix variable name.

---
 tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 21fb459483..6283bd2300 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -780,11 +780,11 @@ class Converter {
       // skip control nodes
       if (input_name[0] == '^') continue;
       string name = input_name;
-      auto first = name.find_last_of(':');
+      auto last = name.find_last_of(':');
       // TODO(aaroey): use TensorId
-      if (first != string::npos && first + 2 == name.size() &&
-          name[first + 1] == '0') {
-        name.erase(first);
+      if (last != string::npos && last + 2 == name.size() &&
+          name[last + 1] == '0') {
+        name.erase(last);
       }
 
       if (trt_tensors_.count(name)) {
-- 
GitLab


From 4e7d5f008be62bb7ca3e1646af8d4f22287d9e50 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Wed, 19 Sep 2018 14:44:28 -0700
Subject: [PATCH 0406/1357] Add min/max version for depthwise conv.

PiperOrigin-RevId: 213698663
---
 tensorflow/contrib/lite/kernels/register.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index 6e35799c35..2f4b663a28 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -158,7 +158,9 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D());
   AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_2D());
   AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D());
-  AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D());
+  AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_SVDF, Register_SVDF());
   AddBuiltin(BuiltinOperator_RNN, Register_RNN());
   AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
-- 
GitLab


From c3014ec19e23e4aad7286b3fac6b25a5fb4a6326 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Wed, 19 Sep 2018 14:54:07 -0700
Subject: [PATCH 0407/1357] Allow the tape tensor to have unknown shapes.

This is done by making the TapeTensor a template rather than a concrete struct.

PiperOrigin-RevId: 213700425
---
 tensorflow/c/eager/tape.h                  | 118 +++---
 tensorflow/python/eager/BUILD              |   1 +
 tensorflow/python/eager/backprop.py        |   5 +-
 tensorflow/python/eager/backprop_test.py   |  12 +
 tensorflow/python/eager/imperative_grad.py |   5 +-
 tensorflow/python/eager/pywrap_tfe_src.cc  | 458 ++++++++++++---------
 6 files changed, 342 insertions(+), 257 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 49990b6249..41b5b8ff36 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -29,15 +29,8 @@ limitations under the License.
 namespace tensorflow {
 namespace eager {
 
-// Information about a tensor.
-struct TapeTensor {
-  int64 id;  // Expected to be unique in the lifetime of this process.
-  DataType dtype;
-  TensorShape shape;
-};
-
 // Represents an entry in the tape.
-template <typename BackwardFunction>
+template <typename BackwardFunction, typename TapeTensor>
 struct OpTapeEntry {
   string op_type;
   std::vector<TapeTensor> output_tensor_info;
@@ -57,8 +50,8 @@ struct OpTapeEntry {
 using TensorTape = gtl::FlatMap<int64, int64>;
 
 // Map from operation-id to tape entry.
-template <typename BackwardFunction>
-using OpTape = gtl::FlatMap<int64, OpTapeEntry<BackwardFunction>>;
+template <typename BackwardFunction, typename TapeTensor>
+using OpTape = gtl::FlatMap<int64, OpTapeEntry<BackwardFunction, TapeTensor>>;
 
 // Operations the tape needs to perform on tensors to do backpropagation. Named
 // "vspace" because a subset of these are related to a vector space, such as
@@ -79,7 +72,7 @@ using OpTape = gtl::FlatMap<int64, OpTapeEntry<BackwardFunction>>;
 // TODO(apassos) provide concrete template instantiations for TFE_TensorHandle
 // specialization, which is blocked by quite a few things needing to loop back
 // into python now.
-template <typename Gradient, typename BackwardFunction>
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
 class VSpace {
  public:
   virtual ~VSpace() {}
@@ -93,10 +86,10 @@ class VSpace {
       gtl::ArraySlice<Gradient*> gradient_tensors) const = 0;
 
   // Returns a tensor of the right shape and dtype filled with zeros.
-  virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0;
+  virtual Gradient* Zeros(const TapeTensor& tensor) const = 0;
 
   // Returns a Tensor which is filled with ones and like the input.
-  virtual Gradient* Ones(TensorShape shape, DataType dtype) const = 0;
+  virtual Gradient* Ones(const TapeTensor& tensor) const = 0;
 
   // Calls the passed-in backward function.
   virtual Status CallBackwardFunction(
@@ -114,7 +107,7 @@ class VSpace {
 
 // Traces the execution of operations, doing eager garbage collection, and
 // exporting a full trace so other code can do backpropagation. Not thread-safe.
-template <typename Gradient, typename BackwardFunction>
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
 class GradientTape {
  public:
   // If `persistent` is true, GradientTape will not eagerly delete backward
@@ -134,7 +127,7 @@ class GradientTape {
   void Watch(int64 tensor_id);
 
   void RecordOperation(
-      const string& op_type, gtl::ArraySlice<TapeTensor> output_tensors,
+      const string& op_type, std::vector<TapeTensor>& output_tensors,
       gtl::ArraySlice<int64> input_tensor_id,
       gtl::ArraySlice<tensorflow::DataType> input_dtypes,
       BackwardFunction* backward_function,
@@ -146,17 +139,18 @@ class GradientTape {
   // once) and produces the gradient of the target tensors with respect to the
   // source tensors. The output gradients are used if not empty and not
   // null. The result is populated with one tensor per target element.
-  Status ComputeGradient(const VSpace<Gradient, BackwardFunction>& vspace,
-                         gtl::ArraySlice<int64> target_tensor_ids,
-                         gtl::ArraySlice<int64> source_tensor_id,
-                         gtl::ArraySlice<Gradient*> output_gradients,
-                         std::vector<Gradient*>* result);
+  Status ComputeGradient(
+      const VSpace<Gradient, BackwardFunction, TapeTensor>& vspace,
+      gtl::ArraySlice<int64> target_tensor_ids,
+      gtl::ArraySlice<int64> source_tensor_id,
+      gtl::ArraySlice<Gradient*> output_gradients,
+      std::vector<Gradient*>* result);
 
   bool IsPersistent() const { return persistent_; }
 
  private:
   TensorTape tensor_tape_;
-  OpTape<BackwardFunction> op_tape_;
+  OpTape<BackwardFunction, TapeTensor> op_tape_;
   int64 next_op_id_{0};
 
   // Map from tensor id to number of remaining usages (i.e. how many entries in
@@ -186,8 +180,8 @@ inline bool IsDtypeTrainable(DataType dtype) {
   }
 }
 
-template <typename Gradient, typename BackwardFunction>
-bool GradientTape<Gradient, BackwardFunction>::ShouldRecord(
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
+bool GradientTape<Gradient, BackwardFunction, TapeTensor>::ShouldRecord(
     gtl::ArraySlice<int64> tensor_ids,
     gtl::ArraySlice<tensorflow::DataType> dtypes) {
   CHECK_EQ(tensor_ids.size(), dtypes.size());
@@ -201,14 +195,15 @@ bool GradientTape<Gradient, BackwardFunction>::ShouldRecord(
   return false;
 }
 
-template <typename Gradient, typename BackwardFunction>
-void GradientTape<Gradient, BackwardFunction>::Watch(int64 tensor_id) {
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
+void GradientTape<Gradient, BackwardFunction, TapeTensor>::Watch(
+    int64 tensor_id) {
   tensor_tape_.emplace(tensor_id, -1);
 }
 
-template <typename Gradient, typename BackwardFunction>
-void GradientTape<Gradient, BackwardFunction>::RecordOperation(
-    const string& op_type, gtl::ArraySlice<TapeTensor> output_tensors,
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
+void GradientTape<Gradient, BackwardFunction, TapeTensor>::RecordOperation(
+    const string& op_type, std::vector<TapeTensor>& output_tensors,
     gtl::ArraySlice<int64> input_tensor_id,
     gtl::ArraySlice<tensorflow::DataType> input_dtypes,
     BackwardFunction* backward_function,
@@ -229,16 +224,18 @@ void GradientTape<Gradient, BackwardFunction>::RecordOperation(
   for (const TapeTensor& o : output_tensors) {
     // Note: the tensor can have already been watched and hence be in the tape,
     // so we cannot check that we're inserting it here.
-    tensor_tape_[o.id] = op_id;
-    tensor_usage_[o.id] = 1;
+    tensor_tape_[o.GetID()] = op_id;
+    tensor_usage_[o.GetID()] = 1;
     tensors.push_back(o);
   }
-  op_tape_[op_id] = OpTapeEntry<BackwardFunction>{
-      op_type, tensors, ids, backward_function, backward_function_deleter};
+  op_tape_[op_id] = OpTapeEntry<BackwardFunction, TapeTensor>{
+      op_type, std::move(tensors), ids, backward_function,
+      backward_function_deleter};
 }
 
-template <typename Gradient, typename BackwardFunction>
-void GradientTape<Gradient, BackwardFunction>::DeleteTrace(int64 tensor_id) {
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
+void GradientTape<Gradient, BackwardFunction, TapeTensor>::DeleteTrace(
+    int64 tensor_id) {
   auto it = tensor_usage_.find(tensor_id);
   if (it == tensor_usage_.end()) {
     return;
@@ -261,7 +258,7 @@ void GradientTape<Gradient, BackwardFunction>::DeleteTrace(int64 tensor_id) {
   auto op_it = op_tape_.find(op_id);
   CHECK(op_it != op_tape_.end());
   for (const auto& output : op_it->second.output_tensor_info) {
-    if (tensor_usage_.find(output.id) != tensor_usage_.end()) {
+    if (tensor_usage_.find(output.GetID()) != tensor_usage_.end()) {
       // Found a usage for an output, so cannot delete the op.
       return;
     }
@@ -304,9 +301,9 @@ void GradientTape<Gradient, BackwardFunction>::DeleteTrace(int64 tensor_id) {
 
 namespace {
 
-template <typename BackwardFunction>
+template <typename BackwardFunction, typename TapeTensor>
 struct BackpropInitialState {
-  OpTape<BackwardFunction> op_tape;
+  OpTape<BackwardFunction, TapeTensor> op_tape;
 
   // Map from tensor ID to how many references still exist for this tensor in
   // the tape.
@@ -322,17 +319,17 @@ struct BackpropInitialState {
 // If `persistent_tape` is false, op_tape is cleared and backwards functions
 // not needed for gradient computation are deleted. Backwards functions that
 // are needed, are copied and returned in BackpropInitialState.
-template <typename BackwardFunction>
-BackpropInitialState<BackwardFunction> PrepareBackprop(
+template <typename BackwardFunction, typename TapeTensor>
+BackpropInitialState<BackwardFunction, TapeTensor> PrepareBackprop(
     gtl::ArraySlice<int64> target, const TensorTape& tensor_tape,
-    OpTape<BackwardFunction>* op_tape, const gtl::FlatSet<int64>& sources_set,
-    bool persistent_tape) {
+    OpTape<BackwardFunction, TapeTensor>* op_tape,
+    const gtl::FlatSet<int64>& sources_set, bool persistent_tape) {
   std::vector<int64> tensor_stack;
   tensor_stack.reserve(target.size());
   for (auto t : target) {
     tensor_stack.push_back(t);
   }
-  BackpropInitialState<BackwardFunction> result;
+  BackpropInitialState<BackwardFunction, TapeTensor> result;
   while (!tensor_stack.empty()) {
     int64 tensor_id = tensor_stack.back();
     tensor_stack.pop_back();
@@ -383,9 +380,9 @@ BackpropInitialState<BackwardFunction> PrepareBackprop(
   return result;
 }
 
-template <typename BackwardFunction>
+template <typename BackwardFunction, typename TapeTensor>
 std::vector<int64> InitialStack(
-    const OpTape<BackwardFunction>& op_tape,
+    const OpTape<BackwardFunction, TapeTensor>& op_tape,
     const gtl::FlatMap<int64, int64>& op_missing_tensor) {
   std::vector<int64> result;
   for (auto& op_entry : op_tape) {
@@ -396,13 +393,13 @@ std::vector<int64> InitialStack(
   return result;
 }
 
-template <typename Gradient, typename BackwardFunction>
-Status InitialGradients(const VSpace<Gradient, BackwardFunction>& vspace,
-                        gtl::ArraySlice<int64> target_tensor_ids,
-                        gtl::ArraySlice<Gradient*> output_gradients,
-                        const TensorTape& tensor_tape,
-                        const OpTape<BackwardFunction>& op_tape,
-                        gtl::FlatMap<int64, std::vector<Gradient*>>* result) {
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
+Status InitialGradients(
+    const VSpace<Gradient, BackwardFunction, TapeTensor>& vspace,
+    gtl::ArraySlice<int64> target_tensor_ids,
+    gtl::ArraySlice<Gradient*> output_gradients, const TensorTape& tensor_tape,
+    const OpTape<BackwardFunction, TapeTensor>& op_tape,
+    gtl::FlatMap<int64, std::vector<Gradient*>>* result) {
   for (int i = 0; i < target_tensor_ids.size(); ++i) {
     const int64 id = target_tensor_ids[i];
     if (output_gradients.empty() || output_gradients[i] == nullptr) {
@@ -416,11 +413,10 @@ Status InitialGradients(const VSpace<Gradient, BackwardFunction>& vspace,
         }
         bool found = false;
         for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) {
-          if (op_it->second.output_tensor_info[j].id == id) {
+          if (op_it->second.output_tensor_info[j].GetID() == id) {
             found = true;
             (*result)[id].push_back(
-                vspace.Ones(op_it->second.output_tensor_info[j].shape,
-                            op_it->second.output_tensor_info[j].dtype));
+                vspace.Ones(op_it->second.output_tensor_info[j]));
             break;
           }
         }
@@ -469,16 +465,16 @@ gtl::FlatMap<string, gtl::FlatSet<int>>* FunctionsAcceptingNoneForIndicesMap() {
 constexpr int kMinAggregateCount = 4;
 constexpr int kMinAggregateBytes = 128 * 1024 * 1024;
 
-template <typename Gradient, typename BackwardFunction>
-Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
-    const VSpace<Gradient, BackwardFunction>& vspace,
+template <typename Gradient, typename BackwardFunction, typename TapeTensor>
+Status GradientTape<Gradient, BackwardFunction, TapeTensor>::ComputeGradient(
+    const VSpace<Gradient, BackwardFunction, TapeTensor>& vspace,
     gtl::ArraySlice<int64> target_tensor_ids,
     gtl::ArraySlice<int64> source_tensor_ids,
     gtl::ArraySlice<Gradient*> output_gradients,
     std::vector<Gradient*>* result) {
   gtl::FlatSet<int64> sources_set(source_tensor_ids.begin(),
                                   source_tensor_ids.end());
-  BackpropInitialState<BackwardFunction> state = PrepareBackprop(
+  BackpropInitialState<BackwardFunction, TapeTensor> state = PrepareBackprop(
       target_tensor_ids, tensor_tape_, &op_tape_, sources_set, persistent_);
   std::vector<int64> op_stack =
       InitialStack(state.op_tape, state.op_missing_tensor);
@@ -522,7 +518,7 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
     out_gradients.reserve(trace.output_tensor_info.size());
     bool any_gradient_nonzero = false;
     for (int i = 0; i < trace.output_tensor_info.size(); ++i) {
-      const int64 id = trace.output_tensor_info[i].id;
+      const int64 id = trace.output_tensor_info[i].GetID();
       auto grad_it = gradients.find(id);
       if (grad_it == gradients.end()) {
         auto func_name_it =
@@ -531,9 +527,7 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
             func_name_it->second.find(i) != func_name_it->second.end()) {
           out_gradients.push_back(nullptr);
         } else {
-          out_gradients.push_back(
-              vspace.Zeros(trace.output_tensor_info[i].shape,
-                           trace.output_tensor_info[i].dtype));
+          out_gradients.push_back(vspace.Zeros(trace.output_tensor_info[i]));
         }
       } else {
         any_gradient_nonzero = true;
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index c1bc27d443..f80256fc2a 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -34,6 +34,7 @@ cc_library(
         "//tensorflow/python:safe_ptr",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
+        "@com_google_absl//absl/types:variant",
     ],
 )
 
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 50a6ce6324..d95e0fe721 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -608,8 +608,9 @@ def _ones(shape, dtype):
 _default_vspace = imperative_grad.VSpace(
     num_elements_fn=_num_elements,
     aggregate_fn=_aggregate_grads,
-    zeros=_zeros,
-    ones=_ones)
+    zeros_fn=_zeros,
+    ones_fn=_ones,
+    graph_shape_fn=gen_array_ops.shape)
 pywrap_tensorflow.TFE_Py_RegisterVSpace(_default_vspace)
 
 
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index f938ed5df8..32731747b7 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -1022,6 +1022,18 @@ class BackpropTest(test.TestCase):
         resource_variable_ops.ResourceVariable(2.0))
     self.assertAllEqual(gradients_constants, gradients_variables)
 
+  def testUnknownShapes(self):
+    with context.graph_mode():
+      with backprop.GradientTape() as tape:
+        a = array_ops.placeholder(dtype=dtypes.float32, shape=None)
+        tape.watch(a)
+        b = a**3
+
+      db_da = tape.gradient(b, a)
+
+      with self.cached_session() as sess:
+        self.assertEqual((8.0, 12.0), sess.run((b, db_da), feed_dict={a: 2.0}))
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py
index 5f027d107c..5f5af4ab6c 100644
--- a/tensorflow/python/eager/imperative_grad.py
+++ b/tensorflow/python/eager/imperative_grad.py
@@ -23,8 +23,9 @@ import collections
 from tensorflow.python import pywrap_tensorflow
 
 
-VSpace = collections.namedtuple(
-    "VSpace", ["aggregate_fn", "num_elements_fn", "zeros", "ones"])
+VSpace = collections.namedtuple("VSpace", [
+    "aggregate_fn", "num_elements_fn", "zeros_fn", "ones_fn", "graph_shape_fn"
+])
 
 
 def imperative_grad(
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index a0f6be459e..196e20e4d7 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/python/eager/pywrap_tfe.h"
 
+#include "absl/types/variant.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/eager/c_api_internal.h"
@@ -889,12 +890,239 @@ static tensorflow::DataType FastTensorDtype(PyObject* tensor) {
   return static_cast<tensorflow::DataType>(id);
 }
 
+class PyTapeTensor {
+ public:
+  PyTapeTensor(tensorflow::int64 id, tensorflow::DataType dtype,
+               const tensorflow::TensorShape& shape)
+      : id_(id), dtype_(dtype), shape_(shape) {}
+  PyTapeTensor(tensorflow::int64 id, tensorflow::DataType dtype,
+               PyObject* shape)
+      : id_(id), dtype_(dtype), shape_(shape) {
+    Py_INCREF(absl::get<1>(shape_));
+  }
+  PyTapeTensor(const PyTapeTensor& other) {
+    id_ = other.id_;
+    dtype_ = other.dtype_;
+    shape_ = other.shape_;
+    if (shape_.index() == 1) {
+      Py_INCREF(absl::get<1>(shape_));
+    }
+  }
+
+  ~PyTapeTensor() {
+    if (shape_.index() == 1) {
+      Py_DECREF(absl::get<1>(shape_));
+    }
+  }
+  PyObject* GetShape() const;
+  PyObject* GetDType() const { return PyLong_FromLong(dtype_); }
+  tensorflow::int64 GetID() const { return id_; }
+
+ private:
+  tensorflow::int64 id_;
+  tensorflow::DataType dtype_;
+  absl::variant<tensorflow::TensorShape, PyObject*> shape_;
+};
+
+class PyVSpace : public tensorflow::eager::VSpace<PyObject, PyBackwardFunction,
+                                                  PyTapeTensor> {
+ public:
+  explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {
+    Py_INCREF(py_vspace_);
+  }
+
+  tensorflow::Status Initialize() {
+    num_elements_ = PyObject_GetAttrString(py_vspace_, "num_elements_fn");
+    if (num_elements_ == nullptr) {
+      return tensorflow::errors::InvalidArgument("invalid vspace");
+    }
+    aggregate_fn_ = PyObject_GetAttrString(py_vspace_, "aggregate_fn");
+    if (aggregate_fn_ == nullptr) {
+      return tensorflow::errors::InvalidArgument("invalid vspace");
+    }
+    zeros_fn_ = PyObject_GetAttrString(py_vspace_, "zeros_fn");
+    if (zeros_fn_ == nullptr) {
+      return tensorflow::errors::InvalidArgument("invalid vspace");
+    }
+    ones_fn_ = PyObject_GetAttrString(py_vspace_, "ones_fn");
+    if (ones_fn_ == nullptr) {
+      return tensorflow::errors::InvalidArgument("invalid vspace");
+    }
+    graph_shape_fn_ = PyObject_GetAttrString(py_vspace_, "graph_shape_fn");
+    if (graph_shape_fn_ == nullptr) {
+      return tensorflow::errors::InvalidArgument("invalid vspace");
+    }
+    return tensorflow::Status::OK();
+  }
+
+  ~PyVSpace() override {
+    Py_XDECREF(num_elements_);
+    Py_XDECREF(aggregate_fn_);
+    Py_XDECREF(zeros_fn_);
+    Py_XDECREF(ones_fn_);
+    Py_XDECREF(graph_shape_fn_);
+
+    Py_DECREF(py_vspace_);
+  }
+
+  tensorflow::int64 NumElements(PyObject* tensor) const final {
+    if (EagerTensor_CheckExact(tensor)) {
+      return PyEagerTensor_NumElements(tensor);
+    }
+    PyObject* arglist =
+        Py_BuildValue("(O)", reinterpret_cast<PyObject*>(tensor));
+    PyObject* result = PyEval_CallObject(num_elements_, arglist);
+    Py_DECREF(arglist);
+    if (result == nullptr) {
+      // The caller detects whether a python exception has been raised.
+      return -1;
+    }
+    tensorflow::int64 r = MakeInt(result);
+    Py_DECREF(result);
+    return r;
+  }
+
+  PyObject* AggregateGradients(
+      tensorflow::gtl::ArraySlice<PyObject*> gradient_tensors) const final {
+    PyObject* list = PyList_New(gradient_tensors.size());
+    for (int i = 0; i < gradient_tensors.size(); ++i) {
+      // Note: stealing a reference to the gradient tensors.
+      CHECK(gradient_tensors[i] != nullptr);
+      CHECK(gradient_tensors[i] != Py_None);
+      PyList_SET_ITEM(list, i,
+                      reinterpret_cast<PyObject*>(gradient_tensors[i]));
+    }
+    PyObject* arglist = Py_BuildValue("(O)", list);
+    CHECK(arglist != nullptr);
+    PyObject* result = PyEval_CallObject(aggregate_fn_, arglist);
+    Py_DECREF(arglist);
+    Py_DECREF(list);
+    return result;
+  }
+
+  void MarkAsResult(PyObject* gradient) const final { Py_INCREF(gradient); }
+
+  PyObject* Zeros(const PyTapeTensor& tensor) const final {
+    PyObject* py_shape = tensor.GetShape();
+    PyObject* py_dtype = tensor.GetDType();
+    PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype);
+    PyObject* result = PyEval_CallObject(zeros_fn_, arg_list);
+    Py_DECREF(arg_list);
+    Py_DECREF(py_dtype);
+    Py_DECREF(py_shape);
+    return reinterpret_cast<PyObject*>(result);
+  }
+
+  PyObject* Ones(const PyTapeTensor& tensor) const final {
+    PyObject* py_shape = tensor.GetShape();
+    PyObject* py_dtype = tensor.GetDType();
+    PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype);
+    PyObject* result = PyEval_CallObject(ones_fn_, arg_list);
+    Py_DECREF(arg_list);
+    Py_DECREF(py_dtype);
+    Py_DECREF(py_shape);
+    return result;
+  }
+
+  PyObject* GraphShape(PyObject* tensor) const {
+    PyObject* arg_list = Py_BuildValue("(O)", tensor);
+    PyObject* result = PyEval_CallObject(graph_shape_fn_, arg_list);
+    Py_DECREF(arg_list);
+    return result;
+  }
+
+  tensorflow::Status CallBackwardFunction(
+      PyBackwardFunction* backward_function,
+      tensorflow::gtl::ArraySlice<PyObject*> output_gradients,
+      std::vector<PyObject*>* result) const final {
+    PyObject* grads = PyTuple_New(output_gradients.size());
+    for (int i = 0; i < output_gradients.size(); ++i) {
+      if (output_gradients[i] == nullptr) {
+        Py_INCREF(Py_None);
+        PyTuple_SET_ITEM(grads, i, Py_None);
+      } else {
+        PyTuple_SET_ITEM(grads, i,
+                         reinterpret_cast<PyObject*>(output_gradients[i]));
+      }
+    }
+    PyObject* py_result = (*backward_function)(grads);
+    Py_DECREF(grads);
+    if (py_result == nullptr) {
+      return tensorflow::errors::Internal("gradient function threw exceptions");
+    }
+    result->clear();
+    PyObject* seq =
+        PySequence_Fast(py_result, "expected a sequence of gradients");
+    if (seq == nullptr) {
+      return tensorflow::errors::InvalidArgument(
+          "gradient function did not return a list");
+    }
+    int len = PySequence_Fast_GET_SIZE(seq);
+    VLOG(1) << "Gradient length is " << len;
+    result->reserve(len);
+    for (int i = 0; i < len; ++i) {
+      PyObject* item = PySequence_Fast_GET_ITEM(seq, i);
+      if (item == Py_None) {
+        result->push_back(nullptr);
+      } else {
+        Py_INCREF(item);
+        result->push_back(item);
+      }
+    }
+    Py_DECREF(seq);
+    Py_DECREF(py_result);
+    return tensorflow::Status::OK();
+  }
+
+  void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); }
+
+ private:
+  PyObject* py_vspace_;
+
+  PyObject* num_elements_;
+  PyObject* aggregate_fn_;
+  PyObject* zeros_fn_;
+  PyObject* ones_fn_;
+  PyObject* graph_shape_fn_;
+};
+PyVSpace* py_vspace = nullptr;
+
+PyObject* TFE_Py_RegisterVSpace(PyObject* e) {
+  if (py_vspace != nullptr) {
+    delete py_vspace;
+  }
+
+  py_vspace = new PyVSpace(e);
+  auto status = py_vspace->Initialize();
+  if (MaybeRaiseExceptionFromStatus(status, nullptr)) {
+    delete py_vspace;
+    return nullptr;
+  }
+
+  Py_RETURN_NONE;
+}
+
+PyObject* PyTapeTensor::GetShape() const {
+  if (shape_.index() == 0) {
+    auto& shape = absl::get<0>(shape_);
+    PyObject* py_shape = PyTuple_New(shape.dims());
+    for (int i = 0; i < shape.dims(); ++i) {
+      PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i)));
+    }
+
+    return py_shape;
+  }
+
+  return py_vspace->GraphShape(absl::get<1>(shape_));
+}
+
 class GradientTape
-    : public tensorflow::eager::GradientTape<PyObject, PyBackwardFunction> {
+    : public tensorflow::eager::GradientTape<PyObject, PyBackwardFunction,
+                                             PyTapeTensor> {
  public:
   explicit GradientTape(bool persistent, bool watch_accessed_variables)
-      : tensorflow::eager::GradientTape<PyObject, PyBackwardFunction>(
-            persistent),
+      : tensorflow::eager::GradientTape<PyObject, PyBackwardFunction,
+                                        PyTapeTensor>(persistent),
         watch_accessed_variables_(watch_accessed_variables) {}
 
   virtual ~GradientTape() {
@@ -1175,7 +1403,24 @@ void TFE_Py_TapeWatch(PyObject* tape, PyObject* tensor) {
   reinterpret_cast<TFE_Py_Tape*>(tape)->tape->Watch(tensor_id);
 }
 
-static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) {
+bool ListContainsNone(PyObject* list) {
+  if (list == Py_None) return true;
+  tensorflow::Safe_PyObjectPtr seq(
+      PySequence_Fast(list, "expected a sequence"));
+  if (seq == nullptr) {
+    return false;
+  }
+
+  int len = PySequence_Size(list);
+  for (int i = 0; i < len; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(seq.get(), i);
+    if (item == Py_None) return true;
+  }
+
+  return false;
+}
+
+static PyTapeTensor TapeTensorFromTensor(PyObject* tensor) {
   if (EagerTensor_CheckExact(tensor)) {
     TFE_TensorHandle* t = EagerTensor_Handle(tensor);
     tensorflow::int64 id = PyEagerTensor_ID(tensor);
@@ -1183,16 +1428,16 @@ static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) {
     const tensorflow::Status status = t->handle->Shape(&tensor_shape);
 
     if (MaybeRaiseExceptionFromStatus(status, nullptr)) {
-      return tensorflow::eager::TapeTensor{id, t->handle->dtype,
-                                           tensorflow::TensorShape({})};
+      return PyTapeTensor(id, static_cast<tensorflow::DataType>(0),
+                          tensorflow::TensorShape({}));
     } else {
-      return tensorflow::eager::TapeTensor{id, t->handle->dtype, tensor_shape};
+      return PyTapeTensor(id, t->handle->dtype, tensor_shape);
     }
   }
   tensorflow::int64 id = FastTensorId(tensor);
   if (PyErr_Occurred()) {
-    return tensorflow::eager::TapeTensor{
-        id, static_cast<tensorflow::DataType>(0), tensorflow::TensorShape({})};
+    return PyTapeTensor(id, static_cast<tensorflow::DataType>(0),
+                        tensorflow::TensorShape({}));
   }
   PyObject* dtype_object = PyObject_GetAttrString(tensor, "dtype");
   PyObject* dtype_enum = PyObject_GetAttrString(dtype_object, "_type_enum");
@@ -1200,16 +1445,21 @@ static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) {
   tensorflow::DataType dtype =
       static_cast<tensorflow::DataType>(MakeInt(dtype_enum));
   Py_DECREF(dtype_enum);
-  if (PyErr_Occurred() != nullptr) {
-    return tensorflow::eager::TapeTensor{id, dtype,
-                                         tensorflow::TensorShape({})};
+  if (PyErr_Occurred()) {
+    return PyTapeTensor(id, static_cast<tensorflow::DataType>(0),
+                        tensorflow::TensorShape({}));
   }
   static char _shape_tuple[] = "_shape_tuple";
   PyObject* shape_tuple = PyObject_CallMethod(tensor, _shape_tuple, nullptr);
-  if (PyErr_Occurred() != nullptr) {
-    return tensorflow::eager::TapeTensor{id, dtype,
-                                         tensorflow::TensorShape({})};
+  if (PyErr_Occurred()) {
+    return PyTapeTensor(id, static_cast<tensorflow::DataType>(0),
+                        tensorflow::TensorShape({}));
   }
+
+  if (ListContainsNone(shape_tuple)) {
+    return PyTapeTensor(id, dtype, tensor);
+  }
+
   auto l = MakeIntList(shape_tuple);
   Py_DECREF(shape_tuple);
   // Replace -1, which represents accidental Nones which can occur in graph mode
@@ -1220,7 +1470,7 @@ static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) {
     }
   }
   tensorflow::TensorShape shape(l);
-  return tensorflow::eager::TapeTensor{id, dtype, shape};
+  return PyTapeTensor(id, dtype, shape);
 }
 
 std::vector<tensorflow::int64> MakeTensorIDList(PyObject* tensors) {
@@ -1286,7 +1536,7 @@ void TapeSetRecordOperation(
     const std::vector<tensorflow::DataType>& input_dtypes,
     const std::function<PyBackwardFunction*()>& backward_function_getter,
     const std::function<void(PyBackwardFunction*)>& backward_function_killer) {
-  std::vector<tensorflow::eager::TapeTensor> output_info;
+  std::vector<PyTapeTensor> output_info;
   PyObject* seq = PySequence_Fast(output_tensors,
                                   "expected a sequence of integer tensor ids");
   int len = PySequence_Size(output_tensors);
@@ -1362,180 +1612,6 @@ void TFE_Py_TapeSetDeleteTrace(tensorflow::int64 tensor_id) {
   }
 }
 
-class PyVSpace
-    : public tensorflow::eager::VSpace<PyObject, PyBackwardFunction> {
- public:
-  explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {
-    Py_INCREF(py_vspace_);
-  }
-
-  tensorflow::Status Initialize() {
-    num_elements_ = PyObject_GetAttrString(py_vspace_, "num_elements_fn");
-    if (num_elements_ == nullptr) {
-      return tensorflow::errors::InvalidArgument("invalid vspace");
-    }
-    aggregate_fn_ = PyObject_GetAttrString(py_vspace_, "aggregate_fn");
-    if (aggregate_fn_ == nullptr) {
-      return tensorflow::errors::InvalidArgument("invalid vspace");
-    }
-    zeros_ = PyObject_GetAttrString(py_vspace_, "zeros");
-    if (zeros_ == nullptr) {
-      return tensorflow::errors::InvalidArgument("invalid vspace");
-    }
-    ones_ =
-        PyObject_GetAttrString(reinterpret_cast<PyObject*>(py_vspace_), "ones");
-    if (ones_ == nullptr) {
-      return tensorflow::errors::InvalidArgument("invalid vspace");
-    }
-    return tensorflow::Status::OK();
-  }
-
-  ~PyVSpace() override {
-    Py_XDECREF(num_elements_);
-    Py_XDECREF(aggregate_fn_);
-    Py_XDECREF(zeros_);
-    Py_XDECREF(ones_);
-
-    Py_DECREF(py_vspace_);
-  }
-
-  tensorflow::int64 NumElements(PyObject* tensor) const final {
-    if (EagerTensor_CheckExact(tensor)) {
-      return PyEagerTensor_NumElements(tensor);
-    }
-    PyObject* arglist =
-        Py_BuildValue("(O)", reinterpret_cast<PyObject*>(tensor));
-    PyObject* result = PyEval_CallObject(num_elements_, arglist);
-    Py_DECREF(arglist);
-    if (result == nullptr) {
-      // The caller detects whether a python exception has been raised.
-      return -1;
-    }
-    tensorflow::int64 r = MakeInt(result);
-    Py_DECREF(result);
-    return r;
-  }
-
-  PyObject* AggregateGradients(
-      tensorflow::gtl::ArraySlice<PyObject*> gradient_tensors) const final {
-    PyObject* list = PyList_New(gradient_tensors.size());
-    for (int i = 0; i < gradient_tensors.size(); ++i) {
-      // Note: stealing a reference to the gradient tensors.
-      CHECK(gradient_tensors[i] != nullptr);
-      CHECK(gradient_tensors[i] != Py_None);
-      PyList_SET_ITEM(list, i,
-                      reinterpret_cast<PyObject*>(gradient_tensors[i]));
-    }
-    PyObject* arglist = Py_BuildValue("(O)", list);
-    CHECK(arglist != nullptr);
-    PyObject* result = PyEval_CallObject(aggregate_fn_, arglist);
-    Py_DECREF(arglist);
-    Py_DECREF(list);
-    return result;
-  }
-
-  void MarkAsResult(PyObject* gradient) const final { Py_INCREF(gradient); }
-
-  PyObject* Zeros(tensorflow::TensorShape shape,
-                  tensorflow::DataType dtype) const final {
-    PyObject* py_shape = PyTuple_New(shape.dims());
-    for (int i = 0; i < shape.dims(); ++i) {
-      PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i)));
-    }
-    PyObject* py_dtype = PyLong_FromLong(static_cast<int>(dtype));
-    PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype);
-    PyObject* result = PyEval_CallObject(zeros_, arg_list);
-    Py_DECREF(arg_list);
-    Py_DECREF(py_dtype);
-    Py_DECREF(py_shape);
-    return reinterpret_cast<PyObject*>(result);
-  }
-
-  PyObject* Ones(tensorflow::TensorShape shape,
-                 tensorflow::DataType dtype) const final {
-    PyObject* py_shape = PyTuple_New(shape.dims());
-    for (int i = 0; i < shape.dims(); ++i) {
-      PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i)));
-    }
-    PyObject* py_dtype = PyLong_FromLong(static_cast<int>(dtype));
-    PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype);
-    PyObject* result = PyEval_CallObject(ones_, arg_list);
-    Py_DECREF(arg_list);
-    Py_DECREF(py_dtype);
-    Py_DECREF(py_shape);
-    return result;
-  }
-
-  tensorflow::Status CallBackwardFunction(
-      PyBackwardFunction* backward_function,
-      tensorflow::gtl::ArraySlice<PyObject*> output_gradients,
-      std::vector<PyObject*>* result) const final {
-    PyObject* grads = PyTuple_New(output_gradients.size());
-    for (int i = 0; i < output_gradients.size(); ++i) {
-      if (output_gradients[i] == nullptr) {
-        Py_INCREF(Py_None);
-        PyTuple_SET_ITEM(grads, i, Py_None);
-      } else {
-        PyTuple_SET_ITEM(grads, i,
-                         reinterpret_cast<PyObject*>(output_gradients[i]));
-      }
-    }
-    PyObject* py_result = (*backward_function)(grads);
-    Py_DECREF(grads);
-    if (py_result == nullptr) {
-      return tensorflow::errors::Internal("gradient function threw exceptions");
-    }
-    result->clear();
-    PyObject* seq =
-        PySequence_Fast(py_result, "expected a sequence of gradients");
-    if (seq == nullptr) {
-      return tensorflow::errors::InvalidArgument(
-          "gradient function did not return a list");
-    }
-    int len = PySequence_Fast_GET_SIZE(seq);
-    VLOG(1) << "Gradient length is " << len;
-    result->reserve(len);
-    for (int i = 0; i < len; ++i) {
-      PyObject* item = PySequence_Fast_GET_ITEM(seq, i);
-      if (item == Py_None) {
-        result->push_back(nullptr);
-      } else {
-        Py_INCREF(item);
-        result->push_back(item);
-      }
-    }
-    Py_DECREF(seq);
-    Py_DECREF(py_result);
-    return tensorflow::Status::OK();
-  }
-
-  void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); }
-
- private:
-  PyObject* py_vspace_;
-
-  PyObject* num_elements_;
-  PyObject* aggregate_fn_;
-  PyObject* zeros_;
-  PyObject* ones_;
-};
-PyVSpace* py_vspace = nullptr;
-
-PyObject* TFE_Py_RegisterVSpace(PyObject* e) {
-  if (py_vspace != nullptr) {
-    delete py_vspace;
-  }
-
-  py_vspace = new PyVSpace(e);
-  auto status = py_vspace->Initialize();
-  if (MaybeRaiseExceptionFromStatus(status, nullptr)) {
-    delete py_vspace;
-    return nullptr;
-  }
-
-  Py_RETURN_NONE;
-}
-
 std::vector<PyObject*> MakeTensorList(PyObject* tensors) {
   PyObject* seq = PySequence_Fast(tensors, "expected a sequence");
   if (seq == nullptr) {
-- 
GitLab


From 237c6ccae40005e3b6199731c45e1c9f5cd86c5f Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Wed, 19 Sep 2018 15:21:21 -0700
Subject: [PATCH 0408/1357] Create a steps_per_run variable to be updated
 correctly in the fit loop to make sure we run fit for the right number of
 steps.

PiperOrigin-RevId: 213706042
---
 .../keras/engine/training_distributed.py      | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index d133595793..05b40c66e3 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -293,11 +293,16 @@ def _experimental_fit_loop(
   for name, tensor in zip(model.metrics_names[1:], model.metrics_tensors):
     initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype)
 
+  if steps_per_epoch is None:
+    raise ValueError('steps_per_epoch should be specified in the fit call.')
+  steps_per_run_var = K.variable(
+      value=min(steps_per_epoch, current_strategy.steps_per_run),
+      dtype='int32',
+      name='steps_per_run_var')
+
   with current_strategy.scope():
-    # TODO(priyag, sourabhbajaj): Adjust steps_per_run appropriately based on
-    # steps_per_epoch and number of epochs.
     ctx = current_strategy.run_steps_on_dataset(
-        step_fn, iterator, iterations=current_strategy.steps_per_run,
+        step_fn, iterator, iterations=steps_per_run_var,
         initial_loop_values=initial_loop_values)
 
   train_op = ctx.run_op
@@ -310,8 +315,6 @@ def _experimental_fit_loop(
     distributed_training_utils.set_weights(
         current_strategy, distributed_model, orig_model_weights)
 
-  assert steps_per_epoch is not None
-
   # TODO(sourabhbajaj): Convert this into a proper validation function
   if callbacks:
     raise NotImplementedError(
@@ -327,17 +330,28 @@ def _experimental_fit_loop(
       steps_per_epoch=steps_per_epoch,
       verbose=verbose)
   # TODO(priyag, sourabhbajaj): Add callbacks support for per step callback
-  # TODO(priyag, sourabhbajaj): Fix the number of steps run with steps_per_run
   # TODO(priyag, sourabhbajaj): Add validation.
+
+  # Calculate the steps each time on the device.
+  steps_to_run = [current_strategy.steps_per_run] * (
+      steps_per_epoch // current_strategy.steps_per_run)
+  if steps_per_epoch % current_strategy.steps_per_run:
+    steps_to_run.append(steps_per_epoch % current_strategy.steps_per_run)
+
   callbacks.on_train_begin()
   for epoch in range(initial_epoch, epochs):
     callbacks.on_epoch_begin(epoch)
     epoch_logs = {}
-    for step_index in range(0, steps_per_epoch, current_strategy.steps_per_run):
+    step_index = 0
+    prev_step_count = None
+    for step_count in steps_to_run:
       # TODO(sourabhbajaj): Replace size with a combination of steps_per_run
       # and batch_size
       batch_logs = {'batch': step_index, 'size': 1}
       callbacks.on_batch_begin(step_index, batch_logs)
+      if prev_step_count is None or step_count != prev_step_count:
+        steps_per_run_var.load(step_count, K.get_session())
+        prev_step_count = step_count
       try:
         _, outputs = K.get_session().run([train_op, output_tensors])
       except errors.OutOfRangeError:
@@ -350,6 +364,7 @@ def _experimental_fit_loop(
 
       batch_logs.update(outputs)
       callbacks.on_batch_end(step_index, batch_logs)
+      step_index = step_index + step_count
       if callbacks.model.stop_training:
         break
 
-- 
GitLab


From b2b98a5ad1b647b77cb42761671cd9b3cf0e87b6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 15:21:36 -0700
Subject: [PATCH 0409/1357] Boosted trees: Add error messages when tree
 complexity parameter is not properly set.

PiperOrigin-RevId: 213706101
---
 .../python/estimator/canned/boosted_trees.py  | 10 ++++--
 .../estimator/canned/boosted_trees_test.py    | 35 +++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 36048a2bfd..756d32d03f 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -422,9 +422,13 @@ class _EnsembleGrower(object):
     self._pruning_mode_parsed = boosted_trees_ops.PruningMode.from_str(
         tree_hparams.pruning_mode)
 
-    if (self._pruning_mode_parsed != boosted_trees_ops.PruningMode.NO_PRUNING
-        and tree_hparams.tree_complexity <= 0):
-      raise ValueError('For pruning, tree_complexity must be positive.')
+    if tree_hparams.tree_complexity > 0:
+      if self._pruning_mode_parsed == boosted_trees_ops.PruningMode.NO_PRUNING:
+        raise ValueError(
+            'Tree complexity have no effect unless pruning mode is chosen.')
+    else:
+      if self._pruning_mode_parsed != boosted_trees_ops.PruningMode.NO_PRUNING:
+        raise ValueError('For pruning, tree_complexity must be positive.')
     # pylint: enable=protected-access
 
   @abc.abstractmethod
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 9409cb5cc7..d4cb3e27d0 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -564,6 +564,41 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
     self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
 
+  def testTreeComplexityIsSetCorrectly(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    num_steps = 10
+    # Tree complexity is set but no pruning.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        tree_complexity=1e-3)
+    with self.assertRaisesRegexp(ValueError, 'Tree complexity have no effect'):
+      est.train(input_fn, steps=num_steps)
+
+    # Pruning but no tree complexity.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        pruning_mode='pre')
+    with self.assertRaisesRegexp(ValueError,
+                                 'tree_complexity must be positive'):
+      est.train(input_fn, steps=num_steps)
+
+    # All is good.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        pruning_mode='pre',
+        tree_complexity=1e-3)
+    est.train(input_fn, steps=num_steps)
+
 
 class BoostedTreesDebugOutputsTest(test_util.TensorFlowTestCase):
   """Test debug/model explainability outputs for individual predictions.
-- 
GitLab


From fb2918f81053e15801e08d1a90cf7960b6d219e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 20 Sep 2018 06:44:27 +0800
Subject: [PATCH 0410/1357] TST: introduce test case from upstream/master

---
 .../estimator/canned/boosted_trees_test.py    | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 1497d4253b..23687a738b 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -1099,6 +1099,41 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
           learning_rate=1.0,
           max_depth=1)
 
+  def testTreeComplexityIsSetCorrectly(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    num_steps = 10
+    # Tree complexity is set but no pruning.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        tree_complexity=1e-3)
+    with self.assertRaisesRegexp(ValueError, 'Tree complexity have no effect'):
+      est.train(input_fn, steps=num_steps)
+
+    # Pruning but no tree complexity.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        pruning_mode='pre')
+    with self.assertRaisesRegexp(ValueError,
+                                 'tree_complexity must be positive'):
+      est.train(input_fn, steps=num_steps)
+
+    # All is good.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        pruning_mode='pre',
+        tree_complexity=1e-3)
+    est.train(input_fn, steps=num_steps)
+
 
 class BoostedTreesDebugOutputsTest(test_util.TensorFlowTestCase):
   """Test debug/model explainability outputs for individual predictions.
-- 
GitLab


From c96841dbd199d3c1a15a89e8c44c7c1d164968b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 15:40:18 -0700
Subject: [PATCH 0411/1357] This CL adds a new `tf.print` operator that more
 closely aligns with the standard python `print` method, and deprecates the
 old `tf.Print` operator (to be removed in in v2.0).

It follows the design doc specified in https://github.com/tensorflow/community/pull/14 and additionally incorporates the community feedback and design review decisions.

This CL adds two new internal graph operators: a StringFormat operator that formats a template string with a list of input tensors to insert into the string and outputs a string scalar containing the result, and a PrintV2 operator that prints a string scalar to a specified output stream or logging level.

The formatting op is exposed at `tf.strings.Format`. A new python method is exposed at `tf.print` that takes a list of inputs that may be nested structures and may contain tensors, formats them nicely using the formatting op, and returns a PrintV2 operator that prints them. In Eager mode and inside defuns this PrintV2 operator will automatically be executed, but in graph mode it will need to be either added to `sess.run`, or used as a control dependency for other operators being executed.

As compared to the previous print function, the new print function:
- Has an API that more closely aligns with the standard python3 print
- Supports changing the print logging level/output stream
- allows printing arbitrary (optionally nested) data structures as opposed to just flat lists of tensors
- support printing sparse tensors
- changes printed tensor format to show more meaningful summary (recursively print the first and last elements of each tensor dimension, instead of just the first few elements of the tensor irregardless of dimension).

PiperOrigin-RevId: 213709924
---
 tensorflow/core/BUILD                         |   8 +-
 .../api_def/base_api/api_def_PrintV2.pbtxt    |  19 +
 .../base_api/api_def_StringFormat.pbtxt       |  38 ++
 .../api_def/python_api/api_def_PrintV2.pbtxt  |   4 +
 .../python_api/api_def_StringFormat.pbtxt     |   4 +
 tensorflow/core/framework/tensor.cc           | 112 ++++-
 tensorflow/core/framework/tensor.h            |   2 +-
 tensorflow/core/framework/tensor_test.cc      |  57 +++
 tensorflow/core/kernels/BUILD                 |  27 ++
 tensorflow/core/kernels/logging_ops.cc        |  57 ++-
 tensorflow/core/kernels/logging_ops_test.cc   |  22 +
 tensorflow/core/kernels/string_format_op.cc   |  65 +++
 .../core/kernels/string_format_op_test.cc     |  66 +++
 tensorflow/core/ops/logging_ops.cc            |  19 +
 tensorflow/core/ops/string_ops.cc             |  27 ++
 tensorflow/python/BUILD                       |   2 +
 tensorflow/python/framework/test_util.py      |  60 +++
 tensorflow/python/kernel_tests/BUILD          |  13 +
 .../python/kernel_tests/logging_ops_test.py   | 313 ++++++++++++++
 .../kernel_tests/string_format_op_test.py     | 384 ++++++++++++++++++
 tensorflow/python/ops/logging_ops.py          | 260 +++++++++++-
 tensorflow/python/ops/string_ops.py           |  84 +++-
 .../tools/api/golden/v1/tensorflow.pbtxt      |   4 +
 .../api/golden/v1/tensorflow.strings.pbtxt    |   4 +
 .../tools/api/golden/v2/tensorflow.pbtxt      |   8 +-
 .../api/golden/v2/tensorflow.strings.pbtxt    |   4 +
 26 files changed, 1635 insertions(+), 28 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_PrintV2.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_StringFormat.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_PrintV2.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StringFormat.pbtxt
 create mode 100644 tensorflow/core/kernels/string_format_op.cc
 create mode 100644 tensorflow/core/kernels/string_format_op_test.cc
 create mode 100644 tensorflow/python/kernel_tests/string_format_op_test.py

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 4b2589aaeb..e82dd13b31 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1067,12 +1067,18 @@ tf_gen_op_libs(
         "spectral_ops",
         "state_ops",
         "stateless_random_ops",
-        "string_ops",
         "summary_ops",
         "training_ops",
     ],
 )
 
+tf_gen_op_libs(
+    op_lib_names = [
+        "string_ops",
+    ],
+    deps = ["@com_google_absl//absl/strings"],
+)
+
 tf_gen_op_libs(
     op_lib_names = [
         "array_ops",
diff --git a/tensorflow/core/api_def/base_api/api_def_PrintV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_PrintV2.pbtxt
new file mode 100644
index 0000000000..4cb8955dcb
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_PrintV2.pbtxt
@@ -0,0 +1,19 @@
+op {
+  graph_op_name: "PrintV2"
+  in_arg {
+    name: "input"
+    description: <<END
+The string scalar to print.
+END
+  }
+  attr {
+    name: "output_stream"
+    description: <<END
+A string specifying the output stream or logging level to print to.
+END
+  }
+  summary: "Prints a string scalar."
+  description: <<END
+Prints a string scalar to the desired output_stream.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_StringFormat.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringFormat.pbtxt
new file mode 100644
index 0000000000..a82dae9e48
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StringFormat.pbtxt
@@ -0,0 +1,38 @@
+op {
+  graph_op_name: "StringFormat"
+  in_arg {
+    name: "inputs"
+    description: <<END
+The list of tensors to format into the placeholder string.
+END
+  }
+
+  out_arg {
+    name: "output"
+    description: <<END
+= The resulting string scalar.
+END
+  }
+  attr {
+    name: "template"
+    description: <<END
+A string, the template to format tensor summaries into.
+END
+  }
+  attr {
+    name: "placeholder"
+    description: <<END
+A string, at each placeholder in the template a subsequent tensor summary will be inserted.
+END
+  }
+  attr {
+    name: "summarize"
+    description: <<END
+When formatting the tensor summaries print the first and last summarize entries of each tensor dimension.
+END
+  }
+  summary: "Formats a string template using a list of tensors."
+  description: <<END
+Formats a string template using a list of tensors, pretty-printing tensor summaries.
+END
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_PrintV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_PrintV2.pbtxt
new file mode 100644
index 0000000000..e22d980424
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_PrintV2.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "PrintV2"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StringFormat.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringFormat.pbtxt
new file mode 100644
index 0000000000..8f0b1db45d
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StringFormat.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StringFormat"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 516afa517d..eb9c79ff2d 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -948,9 +948,69 @@ void PrintOneDim(int dim_index, const gtl::InlinedVector<int64, 4>& shape,
   }
 }
 
+// Appends the spacing between elements for a given dim onto a result string
+void PrintDimSpacing(int dim_index, int num_dims, string* result) {
+  if (dim_index == num_dims - 1) {
+    strings::StrAppend(result, " ");
+    return;
+  }
+  for (int j = 0; j < num_dims - dim_index - 1; j++) {
+    strings::StrAppend(result, "\n");
+  }
+  for (int j = 0; j <= dim_index; j++) {
+    strings::StrAppend(result, " ");
+  }
+}
+
+// Print from left dim to right dim recursively.
+template <typename T>
+void PrintOneDimV2(int dim_index, const gtl::InlinedVector<int64, 4>& shape,
+                   int64 num_elts_at_ends, int num_dims, const T* data,
+                   int64 data_index, string* result) {
+  // We have recursed beyond all the dimensions into a single element
+  // of the tensor.
+  if (dim_index == num_dims) {
+    strings::StrAppend(result, PrintOneElement(data[data_index]));
+    return;
+  }
+
+  strings::StrAppend(result, "[");
+  int64 element_count = shape[dim_index];
+  int64 start_of_end =
+      std::max(num_elts_at_ends, element_count - num_elts_at_ends);
+
+  // Loop every element of one dim.
+  int64 elements_per_iter = 1;
+  for (int i = dim_index + 1; i < num_dims; i++) {
+    elements_per_iter *= shape[i];
+  }
+  for (int64 i = 0; (i < num_elts_at_ends) && (i < element_count); i++) {
+    if (i > 0) {
+      PrintDimSpacing(dim_index, num_dims, result);
+    }
+
+    // As for each element, print the sub-dim.
+    PrintOneDimV2(dim_index + 1, shape, num_elts_at_ends, num_dims, data,
+                  data_index + elements_per_iter * i, result);
+  }
+  if (element_count > 2 * num_elts_at_ends) {
+    PrintDimSpacing(dim_index, num_dims, result);
+    strings::StrAppend(result, "...");
+  }
+  for (int64 i = start_of_end; i < element_count; i++) {
+    // As for each element, print the sub-dim.
+    PrintDimSpacing(dim_index, num_dims, result);
+    PrintOneDimV2(dim_index + 1, shape, num_elts_at_ends, num_dims, data,
+                  data_index + elements_per_iter * i, result);
+  }
+
+  strings::StrAppend(result, "]");
+}
+
 template <typename T>
 string SummarizeArray(int64 limit, int64 num_elts,
-                      const TensorShape& tensor_shape, const char* data) {
+                      const TensorShape& tensor_shape, const char* data,
+                      const bool print_v2) {
   string ret;
   const T* array = reinterpret_cast<const T*>(data);
 
@@ -963,17 +1023,26 @@ string SummarizeArray(int64 limit, int64 num_elts,
     if (num_elts > limit) strings::StrAppend(&ret, "...");
     return ret;
   }
-  int64 data_index = 0;
-  const int shape_size = tensor_shape.dims();
-  PrintOneDim(0, shape, limit, shape_size, array, &data_index, &ret);
+  if (print_v2) {
+    const int num_dims = tensor_shape.dims();
+    PrintOneDimV2(0, shape, limit, num_dims, array, 0, &ret);
+  } else {
+    int64 data_index = 0;
+    const int shape_size = tensor_shape.dims();
+    PrintOneDim(0, shape, limit, shape_size, array, &data_index, &ret);
+
+    if (num_elts > limit) strings::StrAppend(&ret, "...");
+  }
 
-  if (num_elts > limit) strings::StrAppend(&ret, "...");
   return ret;
 }
 }  // namespace
 
-string Tensor::SummarizeValue(int64 max_entries) const {
+string Tensor::SummarizeValue(int64 max_entries, bool print_v2) const {
   const int64 num_elts = NumElements();
+  if (max_entries < 0) {
+    max_entries = num_elts;
+  }
   size_t limit = std::min(max_entries, num_elts);
   if ((limit > 0) && (buf_ == nullptr)) {
     return strings::StrCat("uninitialized Tensor of ", num_elts,
@@ -982,50 +1051,54 @@ string Tensor::SummarizeValue(int64 max_entries) const {
   const char* data = limit > 0 ? tensor_data().data() : nullptr;
   switch (dtype()) {
     case DT_HALF:
-      return SummarizeArray<Eigen::half>(limit, num_elts, shape_, data);
+      return SummarizeArray<Eigen::half>(limit, num_elts, shape_, data,
+                                         print_v2);
       break;
     case DT_FLOAT:
-      return SummarizeArray<float>(limit, num_elts, shape_, data);
+      return SummarizeArray<float>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_DOUBLE:
-      return SummarizeArray<double>(limit, num_elts, shape_, data);
+      return SummarizeArray<double>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_UINT32:
-      return SummarizeArray<uint32>(limit, num_elts, shape_, data);
+      return SummarizeArray<uint32>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_INT32:
-      return SummarizeArray<int32>(limit, num_elts, shape_, data);
+      return SummarizeArray<int32>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_UINT8:
     case DT_QUINT8:
-      return SummarizeArray<uint8>(limit, num_elts, shape_, data);
+      return SummarizeArray<uint8>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_UINT16:
     case DT_QUINT16:
-      return SummarizeArray<uint16>(limit, num_elts, shape_, data);
+      return SummarizeArray<uint16>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_INT16:
     case DT_QINT16:
-      return SummarizeArray<int16>(limit, num_elts, shape_, data);
+      return SummarizeArray<int16>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_INT8:
     case DT_QINT8:
-      return SummarizeArray<int8>(limit, num_elts, shape_, data);
+      return SummarizeArray<int8>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_UINT64:
-      return SummarizeArray<uint64>(limit, num_elts, shape_, data);
+      return SummarizeArray<uint64>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_INT64:
-      return SummarizeArray<int64>(limit, num_elts, shape_, data);
+      return SummarizeArray<int64>(limit, num_elts, shape_, data, print_v2);
       break;
     case DT_BOOL:
       // TODO(tucker): Is it better to emit "True False..."?  This
       // will emit "1 0..." which is more compact.
-      return SummarizeArray<bool>(limit, num_elts, shape_, data);
+      return SummarizeArray<bool>(limit, num_elts, shape_, data, print_v2);
       break;
     default: {
       // All irregular cases
       string ret;
+      if (print_v2) {
+        strings::StrAppend(&ret, "[");
+      }
       // TODO(irving): Don't call flat every time around this
       // loop.
       for (size_t i = 0; i < limit; ++i) {
@@ -1045,6 +1118,9 @@ string Tensor::SummarizeValue(int64 max_entries) const {
         }
       }
       if (max_entries < num_elts) strings::StrAppend(&ret, "...");
+      if (print_v2) {
+        strings::StrAppend(&ret, "]");
+      }
       return ret;
     }
   }
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index 696fd277cd..5f5d2021a4 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -430,7 +430,7 @@ class Tensor {
       int64 begin) const;
 
   /// Render the first `max_entries` values in `*this` into a string.
-  string SummarizeValue(int64 max_entries) const;
+  string SummarizeValue(int64 max_entries, bool print_v2 = false) const;
 
   /// A human-readable summary of the tensor suitable for debugging.
   string DebugString() const;
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index 9a78cdc91e..fc05c86990 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -1295,6 +1295,63 @@ TEST(SummarizeValue, STRING) {
   EXPECT_EQ("one two three four five one...", x.SummarizeValue(6));
 }
 
+TEST(SummarizeValue, INT32_PRINT_V2) {
+  Tensor x = MkTensor<int>(DT_INT32, TensorShape({5}), {1, 2, 3, 4, 0});
+  EXPECT_EQ("[1 2 3 4 0]", x.SummarizeValue(16, true));
+  EXPECT_EQ("[1 2 3 4 0]", x.SummarizeValue(-1, true));
+  EXPECT_EQ("[1 2 ... 4 0]", x.SummarizeValue(2, true));
+  EXPECT_EQ("[1 ... 0]", x.SummarizeValue(1, true));
+  x = MkTensor<int>(DT_INT32, TensorShape({2, 2}), {1, 2, 3, 4, 0});
+  EXPECT_EQ("[[1 2]\n [3 4]]", x.SummarizeValue(16, true));
+  x = MkTensor<int>(DT_INT32, TensorShape({2, 2, 1, 1}), {1, 2, 3, 4, 0});
+  EXPECT_EQ("[[[[1]]\n\n  [[2]]]\n\n\n [[[3]]\n\n  [[4]]]]",
+            x.SummarizeValue(16, true));
+  x = MkTensor<int>(DT_INT32, TensorShape({0}), {});
+  EXPECT_EQ("[]", x.SummarizeValue(16, true));
+}
+
+TEST(SummarizeValue, INT32Dims_PRINT_V2) {
+  Tensor x = MkTensor<int>(DT_INT32, TensorShape({3, 4}),
+                           {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+  EXPECT_EQ("[[1 ... 4]\n ...\n [9 ... 12]]", x.SummarizeValue(1, true));
+  EXPECT_EQ("[[1 2 3 4]\n [5 6 7 8]\n [9 10 11 12]]",
+            x.SummarizeValue(10, true));
+  EXPECT_EQ("[[1 2 3 4]\n [5 6 7 8]\n [9 10 11 12]]",
+            x.SummarizeValue(-1, true));
+}
+
+TEST(SummarizeValue, FLOAT_PRINT_V2) {
+  Tensor x = MkTensor<float>(DT_FLOAT, TensorShape({5}), {1, 2, 3, 4, 0});
+  EXPECT_EQ("[1 2 3 4 0]", x.SummarizeValue(16, true));
+  EXPECT_EQ("[1 2 3 4 0]", x.SummarizeValue(-1, true));
+  EXPECT_EQ("[1 2 ... 4 0]", x.SummarizeValue(2, true));
+  EXPECT_EQ("[1 ... 0]", x.SummarizeValue(1, true));
+  x = MkTensor<float>(DT_FLOAT, TensorShape({2, 2}), {1, 2, 3, 4, 0});
+  EXPECT_EQ("[[1 2]\n [3 4]]", x.SummarizeValue(16, true));
+  x = MkTensor<float>(DT_FLOAT, TensorShape({2, 2, 1, 1}), {1, 2, 3, 4, 0});
+  EXPECT_EQ("[[[[1]]\n\n  [[2]]]\n\n\n [[[3]]\n\n  [[4]]]]",
+            x.SummarizeValue(16, true));
+  x = MkTensor<float>(DT_FLOAT, TensorShape({0}), {});
+  EXPECT_EQ("[]", x.SummarizeValue(16, true));
+}
+
+TEST(SummarizeValue, BOOL_PRINT_V2) {
+  Tensor x = MkTensor<bool>(DT_BOOL, TensorShape({5}), {false, true, true});
+  EXPECT_EQ("[0 1 1 0 1]", x.SummarizeValue(16, true));
+  EXPECT_EQ("[0 1 1 0 1]", x.SummarizeValue(-1, true));
+  EXPECT_EQ("[0 1 ... 0 1]", x.SummarizeValue(2, true));
+}
+
+TEST(SummarizeValue, STRING_PRINT_V2) {
+  Tensor x = MkTensor<string>(DT_STRING, TensorShape({5}),
+                              {"one", "two", "three", "four", "five"});
+  EXPECT_EQ("[one two three four five]", x.SummarizeValue(16, true));
+  EXPECT_EQ("[one two three four five]", x.SummarizeValue(-1, true));
+  x = MkTensor<string>(DT_STRING, TensorShape({5, 1, 5}),
+                       {"one", "two", "three", "four", "five"});
+  EXPECT_EQ("[one two three four five one...]", x.SummarizeValue(6, true));
+}
+
 void BM_CreateAndDestroy(int iters) {
   TensorShape shape({10, 20});
   while (--iters) {
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 7aa1169061..b0d04a7213 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2707,6 +2707,7 @@ cc_library(
 )
 
 LOGGING_DEPS = [
+    "@com_google_absl//absl/strings",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
@@ -2764,6 +2765,7 @@ tf_cc_tests(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4401,6 +4403,7 @@ cc_library(
         ":reduce_join_op",
         ":regex_full_match_op",
         ":regex_replace_op",
+        ":string_format_op",
         ":string_join_op",
         ":string_length_op",
         ":string_split_op",
@@ -4431,6 +4434,30 @@ tf_kernel_library(
     deps = STRING_DEPS,
 )
 
+tf_kernel_library(
+    name = "string_format_op",
+    prefix = "string_format_op",
+    deps = STRING_DEPS + ["@com_google_absl//absl/strings"],
+)
+
+tf_cc_test(
+    name = "string_format_op_test",
+    size = "small",
+    srcs = ["string_format_op_test.cc"],
+    deps = [
+        ":string_format_op",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
 tf_kernel_library(
     name = "string_join_op",
     prefix = "string_join_op",
diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index 6b6a14e9a7..8bafd5739d 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include <iostream>
+#include "absl/strings/str_split.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -74,8 +75,7 @@ class PrintOp : public OpKernel {
     string msg;
     strings::StrAppend(&msg, message_);
     for (int i = 1; i < ctx->num_inputs(); ++i) {
-      strings::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_),
-                         "]");
+      strings::StrAppend(&msg, ctx->input(i).SummarizeValue(summarize_));
     }
     std::cerr << msg << std::endl;
   }
@@ -90,6 +90,59 @@ class PrintOp : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(Name("Print").Device(DEVICE_CPU), PrintOp);
 
+class PrintV2Op : public OpKernel {
+ public:
+  explicit PrintV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_stream", &output_stream_));
+
+    auto output_stream_index =
+        std::find(std::begin(valid_output_streams_),
+                  std::end(valid_output_streams_), output_stream_);
+
+    if (output_stream_index == std::end(valid_output_streams_)) {
+      string error_msg = strings::StrCat(
+          "Unknown output stream: ", output_stream_, ", Valid streams are:");
+      for (auto valid_stream : valid_output_streams_) {
+        strings::StrAppend(&error_msg, " ", valid_stream);
+      }
+      OP_REQUIRES(ctx, false, errors::InvalidArgument(error_msg));
+    }
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor* input_;
+    OP_REQUIRES_OK(ctx, ctx->input("input", &input_));
+    const string& msg = input_->scalar<string>()();
+
+    if (output_stream_ == "stdout") {
+      std::cout << msg << std::endl;
+    } else if (output_stream_ == "stderr") {
+      std::cerr << msg << std::endl;
+    } else if (output_stream_ == "log(info)") {
+      LOG(INFO) << msg << std::endl;
+    } else if (output_stream_ == "log(warning)") {
+      LOG(WARNING) << msg << std::endl;
+    } else if (output_stream_ == "log(error)") {
+      LOG(ERROR) << msg << std::endl;
+    } else {
+      string error_msg = strings::StrCat(
+          "Unknown output stream: ", output_stream_, ", Valid streams are:");
+      for (auto valid_stream : valid_output_streams_) {
+        strings::StrAppend(&error_msg, " ", valid_stream);
+      }
+      OP_REQUIRES(ctx, false, errors::InvalidArgument(error_msg));
+    }
+  }
+
+  const char* valid_output_streams_[6] = {"stdout", "stderr", "log(info)",
+                                          "log(warning)", "log(error)"};
+
+ private:
+  string output_stream_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("PrintV2").Device(DEVICE_CPU), PrintV2Op);
+
 class TimestampOp : public OpKernel {
  public:
   explicit TimestampOp(OpKernelConstruction* context) : OpKernel(context) {}
diff --git a/tensorflow/core/kernels/logging_ops_test.cc b/tensorflow/core/kernels/logging_ops_test.cc
index 5e6958f364..a259d995fa 100644
--- a/tensorflow/core/kernels/logging_ops_test.cc
+++ b/tensorflow/core/kernels/logging_ops_test.cc
@@ -23,11 +23,33 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
 namespace {
 
+class PrintingV2GraphTest : public OpsTestBase {
+ protected:
+  Status Init(const string& output_stream = "log(warning)") {
+    TF_CHECK_OK(NodeDefBuilder("op", "PrintV2")
+                    .Input(FakeInput(DT_STRING))
+                    .Attr("output_stream", output_stream)
+                    .Finalize(node_def()));
+    return InitOp();
+  }
+};
+
+TEST_F(PrintingV2GraphTest, StringSuccess) {
+  TF_ASSERT_OK(Init());
+  AddInputFromArray<string>(TensorShape({}), {"bar"});
+  TF_ASSERT_OK(RunOpKernel());
+}
+
+TEST_F(PrintingV2GraphTest, InvalidOutputStream) {
+  ASSERT_NE(::tensorflow::Status::OK(), (Init("invalid_output_stream")));
+}
+
 class PrintingGraphTest : public OpsTestBase {
  protected:
   Status Init(DataType input_type1, DataType input_type2, string msg = "",
diff --git a/tensorflow/core/kernels/string_format_op.cc b/tensorflow/core/kernels/string_format_op.cc
new file mode 100644
index 0000000000..e4a1887f8d
--- /dev/null
+++ b/tensorflow/core/kernels/string_format_op.cc
@@ -0,0 +1,65 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <iostream>
+#include "absl/strings/str_split.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+class StringFormatOp : public OpKernel {
+ public:
+  explicit StringFormatOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    string template_;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("template", &template_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("placeholder", &placeholder_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("summarize", &summarize_));
+
+    split_template_ = absl::StrSplit(template_, placeholder_);
+    int64 num_placeholders = split_template_.size() - 1;
+    OP_REQUIRES(ctx, ctx->num_inputs() == num_placeholders,
+                errors::InvalidArgument(strings::StrCat(
+                    "num placeholders in template and num inputs must match: ",
+                    num_placeholders, " vs. ", ctx->num_inputs())));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    Tensor* formatted_string = nullptr;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(0, TensorShape({}), &formatted_string));
+
+    string msg;
+    strings::StrAppend(&msg, split_template_[0].c_str());
+    for (int i = 0; i < ctx->num_inputs(); ++i) {
+      strings::StrAppend(&msg, ctx->input(i).SummarizeValue(summarize_, true));
+      strings::StrAppend(&msg, split_template_[i + 1].c_str());
+    }
+
+    formatted_string->scalar<string>()() = msg;
+  }
+
+ private:
+  int32 summarize_ = 0;
+  string placeholder_;
+  std::vector<std::string> split_template_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("StringFormat").Device(DEVICE_CPU),
+                        StringFormatOp);
+
+}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/string_format_op_test.cc b/tensorflow/core/kernels/string_format_op_test.cc
new file mode 100644
index 0000000000..13130a5797
--- /dev/null
+++ b/tensorflow/core/kernels/string_format_op_test.cc
@@ -0,0 +1,66 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+
+namespace tensorflow {
+namespace {
+
+class StringFormatGraphTest : public OpsTestBase {
+ protected:
+  Status Init(int num_inputs, DataType input_type,
+              const string& template_ = "%s", const string& placeholder = "%s",
+              int summarize = 3) {
+    TF_CHECK_OK(NodeDefBuilder("op", "StringFormat")
+                    .Input(FakeInput(num_inputs, input_type))
+                    .Attr("template", template_)
+                    .Attr("placeholder", placeholder)
+                    .Attr("summarize", summarize)
+                    .Finalize(node_def()));
+    return InitOp();
+  }
+};
+
+TEST_F(StringFormatGraphTest, Int32Success_7) {
+  TF_ASSERT_OK(Init(1, DT_INT32, "First tensor: %s"));
+
+  AddInputFromArray<int32>(TensorShape({7}), {1, 2, 3, 4, 5, 6, 7});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({}));
+  test::FillValues<string>(&expected, {"First tensor: [1 2 3 ... 5 6 7]"});
+  test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(StringFormatGraphTest, Int32Success_3_3) {
+  TF_ASSERT_OK(Init(1, DT_INT32, "First tensor: %s", "%s", 1));
+
+  AddInputFromArray<int32>(TensorShape({3, 3}), {1, 2, 3, 4, 5, 6, 7, 8, 9});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({}));
+  test::FillValues<string>(&expected, {"First tensor: [[1 ... 3]\n ..."
+                                       "\n [7 ... 9]]"});
+  test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+}  // end namespace
+}  // end namespace tensorflow
diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc
index 639d211767..2034d3601b 100644
--- a/tensorflow/core/ops/logging_ops.cc
+++ b/tensorflow/core/ops/logging_ops.cc
@@ -20,6 +20,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+using shape_inference::InferenceContext;
+
 REGISTER_OP("Assert")
     .Input("condition: bool")
     .Input("data: T")
@@ -44,6 +46,23 @@ REGISTER_OP("Print")
 
 WHITELIST_STATEFUL_OP_FOR_DATASET_FUNCTIONS("Print");
 
+REGISTER_OP("PrintV2")
+    .Input("input: string")
+    .SetIsStateful()
+    .Attr(
+        "output_stream: {'stdout', 'stderr', 'log(info)', "
+        "'log(warning)', 'log(error)'} = 'stderr'")
+    .SetShapeFn([](InferenceContext* c) {
+      // Make sure that the input is a scalar.
+      if (c->Rank(c->input(0)) != 0) {
+        return errors::InvalidArgument("input must be a scalar, but has rank: ",
+                                       c->Rank(c->input(0)));
+      }
+      return Status::OK();
+    });
+
+WHITELIST_STATEFUL_OP_FOR_DATASET_FUNCTIONS("PrintV2");
+
 // ----------------------------------------------------------------------------
 // Operators that deal with SummaryProtos (encoded as DT_STRING tensors) as
 // inputs or outputs in various ways.
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index ef8b15dc8a..99159839d0 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/str_split.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
@@ -102,6 +103,32 @@ REGISTER_OP("AsString")
     .Attr("fill: string = ''")
     .SetShapeFn(shape_inference::UnchangedShape);
 
+REGISTER_OP("StringFormat")
+    .Input("inputs: T")
+    .Output("output: string")
+    .Attr("T: list(type) >= 0")
+    .Attr("template: string = '%s'")
+    .Attr("placeholder: string = '%s'")
+    .Attr("summarize: int = 3")
+    .SetShapeFn([](InferenceContext* c) {
+      string template_;
+      string placeholder;
+      TF_RETURN_IF_ERROR(c->GetAttr("template", &template_));
+      TF_RETURN_IF_ERROR(c->GetAttr("placeholder", &placeholder));
+
+      std::vector<std::string> split_template;
+      split_template = absl::StrSplit(template_, placeholder);
+      int64 num_placeholders = split_template.size() - 1;
+      if (c->num_inputs() != num_placeholders) {
+        return errors::InvalidArgument(strings::StrCat(
+            "num placeholders in template and num inputs must match: ",
+            num_placeholders, " vs. ", c->num_inputs()));
+      }
+
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("StringJoin")
     .Input("inputs: N * string")
     .Attr("N: int")
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index d70e9c5798..9730e9933a 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2324,6 +2324,8 @@ py_library(
     deps = [
         ":framework_for_generated_wrappers",
         ":logging_ops_gen",
+        ":platform",
+        ":string_ops",
         ":util",
     ],
 )
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index b7398238f5..c302072aa1 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -24,6 +24,7 @@ from collections import OrderedDict
 import contextlib
 import gc
 import itertools
+import os
 import math
 import random
 import re
@@ -868,6 +869,19 @@ def device(use_gpu):
     yield
 
 
+class CapturedWrites(object):
+  """A utility class to load the captured writes made to a stream."""
+
+  def __init__(self, capture_location):
+    self.capture_location = capture_location
+
+  def contents(self):
+    """Get the captured writes as a single string."""
+    with open(self.capture_location) as tmp_file:
+      output_data = "".join(tmp_file.readlines())
+    return output_data
+
+
 class ErrorLoggingSession(session.Session):
   """Wrapper around a Session that logs errors in run().
   """
@@ -934,6 +948,52 @@ class TensorFlowTestCase(googletest.TestCase):
       self._tempdir = tempfile.mkdtemp(dir=googletest.GetTempDir())
     return self._tempdir
 
+  @contextlib.contextmanager
+  def captureWritesToStream(self, stream):
+    """A context manager that captures the writes to a given stream.
+
+    This context manager captures all writes to a given stream inside of a
+    `CapturedWrites` object. When this context manager is created, it yields
+    the `CapturedWrites` object. The captured contents can be accessed  by
+    calling `.contents()` on the `CapturedWrites`.
+
+    For this function to work, the stream must have a file descriptor that
+    can be modified using `os.dup` and `os.dup2`, and the stream must support
+    a `.flush()` method. The default python sys.stdout and sys.stderr are
+    examples of this. Note that this does not work in Colab or Jupyter
+    notebooks, because those use alternate stdout streams.
+
+    Example:
+    ```python
+    class MyOperatorTest(test_util.TensorFlowTestCase):
+      def testMyOperator(self):
+        input = [1.0, 2.0, 3.0, 4.0, 5.0]
+        with self.captureWritesToStream(sys.stdout) as captured:
+          result = MyOperator(input).eval()
+        self.assertStartsWith(captured.contents(), "This was printed.")
+    ```
+
+    Args:
+      stream: The stream whose writes should be captured. This
+        stream must have a file descriptor, support writing via using that
+        file descriptor, and must have a `.flush()` method.
+
+    Yields:
+      A `CapturedWrites` object that contains all writes to the specified stream
+      made during this context.
+    """
+    stream.flush()
+    fd = stream.fileno()
+    tmp_file_path = tempfile.mktemp(dir=self.get_temp_dir())
+    tmp_file = open(tmp_file_path, "w")
+    orig_fd = os.dup(fd)
+    os.dup2(tmp_file.fileno(), fd)
+    try:
+      yield CapturedWrites(tmp_file_path)
+    finally:
+      tmp_file.close()
+      os.dup2(orig_fd, fd)
+
   def _AssertProtoEquals(self, a, b, msg=None):
     """Asserts that a and b are the same proto.
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index a048eaa69f..9dc6df77f1 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -960,6 +960,19 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "string_format_op_test",
+    size = "small",
+    srcs = ["string_format_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:math_ops",
+    ],
+)
+
 tf_py_test(
     name = "string_join_op_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index 82729b9e27..79fe9de62f 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -18,14 +18,23 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import sys
+
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
 
 
 class LoggingOpsTest(test.TestCase):
@@ -57,6 +66,305 @@ class LoggingOpsTest(test.TestCase):
         out.eval()
 
 
+class PrintV2Test(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensor(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor)
+        self.evaluate(print_op)
+
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorVarySummarize(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor, summarize=1)
+        self.evaluate(print_op)
+
+      expected = "[0 ... 9]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor, summarize=2)
+        self.evaluate(print_op)
+
+      expected = "[0 1 ... 8 9]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor, summarize=3)
+        self.evaluate(print_op)
+
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor, summarize=-1)
+        self.evaluate(print_op)
+
+      expected = "[0 1 2 3 4 5 6 7 8 9]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneVariable(self):
+    with self.test_session():
+      var = variables.Variable(math_ops.range(10))
+      if not context.executing_eagerly():
+        variables.global_variables_initializer().run()
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(var)
+        self.evaluate(print_op)
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintTwoVariablesInStructWithAssignAdd(self):
+    with self.test_session():
+      var_one = variables.Variable(2.14)
+      plus_one = var_one.assign_add(1.0)
+      var_two = variables.Variable(math_ops.range(10))
+      if not context.executing_eagerly():
+        variables.global_variables_initializer().run()
+      with self.captureWritesToStream(sys.stderr) as printed:
+        self.evaluate(plus_one)
+        print_op = logging_ops.print_v2(var_one, {"second": var_two})
+        self.evaluate(print_op)
+      expected = "3.14 {'second': [0 1 2 ... 7 8 9]}"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintTwoTensors(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor, tensor * 10)
+        self.evaluate(print_op)
+      expected = "[0 1 2 ... 7 8 9] [0 10 20 ... 70 80 90]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintPlaceholderGeneration(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2("{}6", {"{}": tensor * 10})
+        self.evaluate(print_op)
+      expected = "{}6 {'{}': [0 10 20 ... 70 80 90]}"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintNoTensors(self):
+    with self.test_session():
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(23, [23, 5], {"6": 12})
+        self.evaluate(print_op)
+      expected = "23 [23, 5] {'6': 12}"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintFloatScalar(self):
+    with self.test_session():
+      tensor = ops.convert_to_tensor(434.43)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor)
+        self.evaluate(print_op)
+      expected = "434.43"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintStringScalar(self):
+    with self.test_session():
+      tensor = ops.convert_to_tensor("scalar")
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(tensor)
+        self.evaluate(print_op)
+      expected = "scalar"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintComplexTensorStruct(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      small_tensor = constant_op.constant([0.3, 12.4, -16.1])
+      big_tensor = math_ops.mul(tensor, 10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(
+            "first:", tensor, "middle:",
+            {"small": small_tensor, "Big": big_tensor}, 10,
+            [tensor * 2, tensor])
+        self.evaluate(print_op)
+      # Note that the keys in the dict will always be sorted,
+      # so 'Big' comes before 'small'
+      expected = ("first: [0 1 2 ... 7 8 9] "
+                  "middle: {'Big': [0 10 20 ... 70 80 90], "
+                  "'small': [0.3 12.4 -16.1]} "
+                  "10 [[0 2 4 ... 14 16 18], [0 1 2 ... 7 8 9]]")
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintSparseTensor(self):
+    with self.test_session():
+      ind = [[0, 0], [1, 0], [1, 3], [4, 1], [1, 4], [3, 2], [3, 3]]
+      val = [0, 10, 13, 4, 14, 32, 33]
+      shape = [5, 6]
+
+      sparse = sparse_tensor.SparseTensor(
+          constant_op.constant(ind, dtypes.int64),
+          constant_op.constant(val, dtypes.int64),
+          constant_op.constant(shape, dtypes.int64))
+
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(sparse)
+        self.evaluate(print_op)
+      expected = ("'SparseTensor(indices=[[0 0]\n"
+                  " [1 0]\n"
+                  " [1 3]\n"
+                  " ...\n"
+                  " [1 4]\n"
+                  " [3 2]\n"
+                  " [3 3]], values=[0 10 13 ... 14 32 33], shape=[5 6])'")
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintSparseTensorInDataStruct(self):
+    with self.test_session():
+      ind = [[0, 0], [1, 0], [1, 3], [4, 1], [1, 4], [3, 2], [3, 3]]
+      val = [0, 10, 13, 4, 14, 32, 33]
+      shape = [5, 6]
+
+      sparse = sparse_tensor.SparseTensor(
+          constant_op.constant(ind, dtypes.int64),
+          constant_op.constant(val, dtypes.int64),
+          constant_op.constant(shape, dtypes.int64))
+
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2([sparse])
+        self.evaluate(print_op)
+      expected = ("['SparseTensor(indices=[[0 0]\n"
+                  " [1 0]\n"
+                  " [1 3]\n"
+                  " ...\n"
+                  " [1 4]\n"
+                  " [3 2]\n"
+                  " [3 3]], values=[0 10 13 ... 14 32 33], shape=[5 6])']")
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorStdout(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stdout) as printed:
+        print_op = logging_ops.print_v2(
+            tensor, output_stream=sys.stdout)
+        self.evaluate(print_op)
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorLogInfo(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(
+            tensor, output_stream=tf_logging.info)
+        self.evaluate(print_op)
+      self.assertTrue("I" in printed.contents())
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue(expected in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorLogWarning(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(
+            tensor, output_stream=tf_logging.warning)
+        self.evaluate(print_op)
+      self.assertTrue("W" in printed.contents())
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue(expected in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorLogError(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(
+            tensor, output_stream=tf_logging.error)
+        self.evaluate(print_op)
+      self.assertTrue("E" in printed.contents())
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue(expected in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testInvalidOutputStreamRaisesError(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.assertRaises(ValueError):
+        print_op = logging_ops.print_v2(
+            tensor, output_stream="unknown")
+        self.evaluate(print_op)
+
+  def testPrintOpName(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      print_op = logging_ops.print_v2(tensor, name="print_name")
+      self.assertEqual(print_op.name, "print_name")
+
+  def testNoDuplicateFormatOpGraphModeAfterExplicitFormat(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      formatted_string = string_ops.string_format("{}", tensor)
+      print_op = logging_ops.print_v2(formatted_string)
+      self.evaluate(print_op)
+      graph_ops = ops.get_default_graph().get_operations()
+      format_ops = [op for op in graph_ops if op.type == "StringFormat"]
+      # Should be only 1 format_op for graph mode.
+      self.assertEqual(len(format_ops), 1)
+
+  def testPrintOneTensorEagerOnOpCreate(self):
+    with self.test_session():
+      with context.eager_mode():
+        tensor = math_ops.range(10)
+        expected = "[0 1 2 ... 7 8 9]"
+        with self.captureWritesToStream(sys.stderr) as printed:
+          logging_ops.print_v2(tensor)
+        self.assertTrue((expected + "\n") in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintInDefunWithoutExplicitEvalOfPrint(self):
+    @function.defun
+    def f():
+      tensor = math_ops.range(10)
+      logging_ops.print_v2(tensor)
+      return tensor
+
+    expected = "[0 1 2 ... 7 8 9]"
+    with self.captureWritesToStream(sys.stderr) as printed_one:
+      x = f()
+      self.evaluate(x)
+    self.assertTrue((expected + "\n") in printed_one.contents())
+
+    # We execute the function again to make sure it doesn't only print on the
+    # first call.
+    with self.captureWritesToStream(sys.stderr) as printed_two:
+      y = f()
+      self.evaluate(y)
+    self.assertTrue((expected + "\n") in printed_two.contents())
+
+
 class PrintGradientTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
@@ -65,6 +373,11 @@ class PrintGradientTest(test.TestCase):
     inp_printed = logging_ops.Print(inp, [inp])
     self.assertEqual(inp.get_shape(), inp_printed.get_shape())
 
+  def testPrintString(self):
+    inp = constant_op.constant(2.0, shape=[100, 32])
+    inp_printed = logging_ops.Print(inp, ["hello"])
+    self.assertEqual(inp.get_shape(), inp_printed.get_shape())
+
   def testPrintGradient(self):
     with self.cached_session():
       inp = constant_op.constant(2.0, shape=[100, 32], name="in")
diff --git a/tensorflow/python/kernel_tests/string_format_op_test.py b/tensorflow/python/kernel_tests/string_format_op_test.py
new file mode 100644
index 0000000000..afa71db909
--- /dev/null
+++ b/tensorflow/python/kernel_tests/string_format_op_test.py
@@ -0,0 +1,384 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.kernels.logging_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class StringFormatOpTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorOneDim(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      format_output = string_ops.string_format("{}", tensor)
+      out = self.evaluate(format_output)
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertEqual(compat.as_text(out), expected)
+
+    with self.test_session():
+      tensor = math_ops.range(10)
+      format_output = string_ops.string_format("{}", [tensor])
+      out = self.evaluate(format_output)
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneVariableScalar(self):
+    with self.test_session():
+      var = variables.Variable(3.34)
+      format_output = string_ops.string_format("{}", [var])
+      if not context.executing_eagerly():
+        variables.global_variables_initializer().run()
+      out = self.evaluate(format_output)
+      expected = "3.34"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneVariableOneDim(self):
+    with self.test_session():
+      var = variables.Variable(math_ops.range(10))
+      format_output = string_ops.string_format("{}", [var])
+      if not context.executing_eagerly():
+        variables.global_variables_initializer().run()
+      out = self.evaluate(format_output)
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatTwoVariablesWithAssignAdd(self):
+    with self.test_session():
+      var_one = variables.Variable(2.14)
+      plus_one = var_one.assign_add(1.0)
+      var_two = variables.Variable(math_ops.range(10))
+      format_output = string_ops.string_format("{}, {}", [var_one, var_two])
+      if not context.executing_eagerly():
+        variables.global_variables_initializer().run()
+      self.evaluate(plus_one)
+      out = self.evaluate(format_output)
+      expected = "3.14, [0 1 2 ... 7 8 9]"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorOneDimFloat(self):
+    with self.test_session():
+      tensor = constant_op.constant([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7])
+      format_output = string_ops.string_format("{}", tensor)
+      out = self.evaluate(format_output)
+      expected = "[0 0.1 0.2 ... 0.5 0.6 0.7]"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorOneDimMatchesSummarize(self):
+    with self.test_session():
+      tensor = math_ops.range(6)
+      format_output = string_ops.string_format("{}", tensor, summarize=3)
+      out = self.evaluate(format_output)
+      expected = "[0 1 2 3 4 5]"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorOneDimVarySummarize(self):
+    with self.test_session():
+      tensor = math_ops.range(6)
+      format_output = string_ops.string_format("{}", tensor, summarize=-1)
+      out = self.evaluate(format_output)
+      expected = "[0 1 2 3 4 5]"
+      self.assertEqual(compat.as_text(out), expected)
+
+    with self.test_session():
+      tensor = math_ops.range(6)
+      format_output = string_ops.string_format("{}", tensor, summarize=1)
+      out = self.evaluate(format_output)
+      expected = "[0 ... 5]"
+      self.assertEqual(compat.as_text(out), expected)
+
+    with self.test_session():
+      tensor = math_ops.range(6)
+      format_output = string_ops.string_format("{}", tensor, summarize=2)
+      out = self.evaluate(format_output)
+      expected = "[0 1 ... 4 5]"
+      self.assertEqual(compat.as_text(out), expected)
+
+    with self.test_session():
+      tensor = math_ops.range(6)
+      format_output = string_ops.string_format("{}", tensor, summarize=10)
+      out = self.evaluate(format_output)
+      expected = "[0 1 2 3 4 5]"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorOneDimAlmostSummarize(self):
+    with self.test_session():
+      tensor = math_ops.range(5)
+      format_output = string_ops.string_format("{}", tensor, summarize=3)
+      out = self.evaluate(format_output)
+      expected = "[0 1 2 3 4]"
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorTwoDimLessThanSummarize(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(4), [2, 2])
+      format_output = string_ops.string_format("{}", tensor, summarize=3)
+      out = self.evaluate(format_output)
+      expected = ("[[0 1]\n"
+                  " [2 3]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorTwoDim(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("{}", tensor)
+      out = self.evaluate(format_output)
+      expected = ("[[0 1 2 ... 7 8 9]\n"
+                  " [10 11 12 ... 17 18 19]\n"
+                  " [20 21 22 ... 27 28 29]\n"
+                  " ...\n"
+                  " [70 71 72 ... 77 78 79]\n"
+                  " [80 81 82 ... 87 88 89]\n"
+                  " [90 91 92 ... 97 98 99]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorTwoDimSummarizeTwo(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("{}", tensor, summarize=2)
+      out = self.evaluate(format_output)
+      expected = ("[[0 1 ... 8 9]\n"
+                  " [10 11 ... 18 19]\n"
+                  " ...\n"
+                  " [80 81 ... 88 89]\n"
+                  " [90 91 ... 98 99]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorThreeDim(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(1000), [10, 10, 10])
+      format_output = string_ops.string_format("{}", tensor)
+      out = self.evaluate(format_output)
+      expected = ("[[[0 1 2 ... 7 8 9]\n"
+                  "  [10 11 12 ... 17 18 19]\n"
+                  "  [20 21 22 ... 27 28 29]\n"
+                  "  ...\n"
+                  "  [70 71 72 ... 77 78 79]\n"
+                  "  [80 81 82 ... 87 88 89]\n"
+                  "  [90 91 92 ... 97 98 99]]\n"
+                  "\n"
+                  " [[100 101 102 ... 107 108 109]\n"
+                  "  [110 111 112 ... 117 118 119]\n"
+                  "  [120 121 122 ... 127 128 129]\n"
+                  "  ...\n  [170 171 172 ... 177 178 179]\n"
+                  "  [180 181 182 ... 187 188 189]\n"
+                  "  [190 191 192 ... 197 198 199]]\n"
+                  "\n"
+                  " [[200 201 202 ... 207 208 209]\n"
+                  "  [210 211 212 ... 217 218 219]\n"
+                  "  [220 221 222 ... 227 228 229]\n"
+                  "  ...\n"
+                  "  [270 271 272 ... 277 278 279]\n"
+                  "  [280 281 282 ... 287 288 289]\n"
+                  "  [290 291 292 ... 297 298 299]]\n"
+                  "\n"
+                  " ...\n"
+                  "\n"
+                  " [[700 701 702 ... 707 708 709]\n"
+                  "  [710 711 712 ... 717 718 719]\n"
+                  "  [720 721 722 ... 727 728 729]\n"
+                  "  ...\n"
+                  "  [770 771 772 ... 777 778 779]\n"
+                  "  [780 781 782 ... 787 788 789]\n"
+                  "  [790 791 792 ... 797 798 799]]\n"
+                  "\n"
+                  " [[800 801 802 ... 807 808 809]\n"
+                  "  [810 811 812 ... 817 818 819]\n"
+                  "  [820 821 822 ... 827 828 829]\n"
+                  "  ...\n"
+                  "  [870 871 872 ... 877 878 879]\n"
+                  "  [880 881 882 ... 887 888 889]\n"
+                  "  [890 891 892 ... 897 898 899]]\n"
+                  "\n"
+                  " [[900 901 902 ... 907 908 909]\n"
+                  "  [910 911 912 ... 917 918 919]\n"
+                  "  [920 921 922 ... 927 928 929]\n"
+                  "  ...\n"
+                  "  [970 971 972 ... 977 978 979]\n"
+                  "  [980 981 982 ... 987 988 989]\n"
+                  "  [990 991 992 ... 997 998 999]]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorTemplatePrefix(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("tensor summary: {}", tensor)
+      out = self.evaluate(format_output)
+      expected = ("tensor summary: [[0 1 2 ... 7 8 9]\n"
+                  " [10 11 12 ... 17 18 19]\n"
+                  " [20 21 22 ... 27 28 29]\n"
+                  " ...\n"
+                  " [70 71 72 ... 77 78 79]\n"
+                  " [80 81 82 ... 87 88 89]\n"
+                  " [90 91 92 ... 97 98 99]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorTemplatePrefixAndSuffix(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("tensor summary: {}, suffix",
+                                               tensor)
+      out = self.evaluate(format_output)
+      expected = ("tensor summary: [[0 1 2 ... 7 8 9]\n"
+                  " [10 11 12 ... 17 18 19]\n"
+                  " [20 21 22 ... 27 28 29]\n"
+                  " ...\n"
+                  " [70 71 72 ... 77 78 79]\n"
+                  " [80 81 82 ... 87 88 89]\n"
+                  " [90 91 92 ... 97 98 99]], suffix")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatOneTensorTemplateSuffix(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("{}, suffix", tensor)
+      out = self.evaluate(format_output)
+      expected = ("[[0 1 2 ... 7 8 9]\n"
+                  " [10 11 12 ... 17 18 19]\n"
+                  " [20 21 22 ... 27 28 29]\n"
+                  " ...\n"
+                  " [70 71 72 ... 77 78 79]\n"
+                  " [80 81 82 ... 87 88 89]\n"
+                  " [90 91 92 ... 97 98 99]], suffix")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatNoTensor(self):
+    with self.test_session():
+      format_output = string_ops.string_format("No tensor.", ())
+      out = self.evaluate(format_output)
+      expected = "No tensor."
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatMultiTensor(self):
+    with self.test_session():
+      tensor_one = array_ops.reshape(math_ops.range(100), [10, 10])
+      tensor_two = tensor_one * 10
+      format_output = string_ops.string_format("One: {},\nTwo: {}",
+                                               (tensor_one, tensor_two))
+      out = self.evaluate(format_output)
+      expected = ("One: [[0 1 2 ... 7 8 9]\n"
+                  " [10 11 12 ... 17 18 19]\n"
+                  " [20 21 22 ... 27 28 29]\n"
+                  " ...\n"
+                  " [70 71 72 ... 77 78 79]\n"
+                  " [80 81 82 ... 87 88 89]\n"
+                  " [90 91 92 ... 97 98 99]],\n"
+                  "Two: [[0 10 20 ... 70 80 90]\n"
+                  " [100 110 120 ... 170 180 190]\n"
+                  " [200 210 220 ... 270 280 290]\n"
+                  " ...\n"
+                  " [700 710 720 ... 770 780 790]\n"
+                  " [800 810 820 ... 870 880 890]\n"
+                  " [900 910 920 ... 970 980 990]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatSummarizeOne(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("tensor summary: {}", tensor,
+                                               summarize=1)
+      out = self.evaluate(format_output)
+      expected = ("tensor summary: [[0 ... 9]\n"
+                  " ...\n"
+                  " [90 ... 99]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatSummarizeTwo(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("tensor summary: {}", tensor,
+                                               summarize=2)
+      out = self.evaluate(format_output)
+      expected = ("tensor summary: [[0 1 ... 8 9]\n"
+                  " [10 11 ... 18 19]\n"
+                  " ...\n"
+                  " [80 81 ... 88 89]\n"
+                  " [90 91 ... 98 99]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testFormatPlaceholder(self):
+    with self.test_session():
+      tensor = array_ops.reshape(math_ops.range(100), [10, 10])
+      format_output = string_ops.string_format("tensor summary: %t%", tensor,
+                                               placeholder="%t%")
+      out = self.evaluate(format_output)
+      expected = ("tensor summary: [[0 1 2 ... 7 8 9]\n"
+                  " [10 11 12 ... 17 18 19]\n"
+                  " [20 21 22 ... 27 28 29]\n"
+                  " ...\n"
+                  " [70 71 72 ... 77 78 79]\n"
+                  " [80 81 82 ... 87 88 89]\n"
+                  " [90 91 92 ... 97 98 99]]")
+      self.assertEqual(compat.as_text(out), expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testTensorCountMustMatchPlaceholderCount(self):
+    with self.test_session():
+      with self.assertRaisesRegexp(
+          ValueError, r"2 placeholder\(s\) in template does not match 1 "
+                      r"tensor\(s\) provided as input"):
+        tensor = math_ops.range(10)
+        format_output = string_ops.string_format("{} {}", tensor)
+        self.evaluate(format_output)
+    with self.test_session():
+      with self.assertRaisesRegexp(
+          ValueError, r"2 placeholder\(s\) in template does not match 1 "
+                      r"tensor\(s\) provided as input"):
+        tensor = math_ops.range(10)
+        format_output = string_ops.string_format("{} {}", [tensor])
+        self.evaluate(format_output)
+    with self.test_session():
+      with self.assertRaisesRegexp(
+          ValueError, r"1 placeholder\(s\) in template does not match 2 "
+                      r"tensor\(s\) provided as input"):
+        tensor = math_ops.range(10)
+        format_output = string_ops.string_format("{}", (tensor, tensor))
+        self.evaluate(format_output)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index df41933f8a..4c53f33af1 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -19,13 +19,24 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import pprint
+import random
+import sys
+
+import six
+
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import gen_logging_ops
+from tensorflow.python.ops import string_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_logging_ops import *
 # pylint: enable=wildcard-import
+from tensorflow.python.platform import tf_logging
+from tensorflow.python.util import nest
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export
 
@@ -40,7 +51,32 @@ from tensorflow.python.util.tf_export import tf_export
 # For users with Python 3 or Python 2.7
 # with `from __future__ import print_function`, we could also allow lowercase.
 # See https://github.com/tensorflow/tensorflow/issues/18053
-@tf_export("Print")
+
+
+# pylint: disable=invalid-name
+@deprecated("2018-08-20", "Use tf.print instead of tf.Print. Note that "
+                          "tf.print returns a no-output operator that directly "
+                          "prints the output. Outside of defuns or eager mode, "
+                          "this operator will not be executed unless it is "
+                          "directly specified in session.run or used as a "
+                          "control dependency for other operators. This is "
+                          "only a concern in graph mode. Below is an example "
+                          "of how to ensure tf.print executes in graph mode:\n"
+                          """```python
+    sess = tf.Session()
+    with sess.as_default():
+        tensor = tf.range(10)
+        print_op = tf.print(tensor)
+        with tf.control_dependencies([print_op]):
+          out = tf.add(tensor, tensor)
+        sess.run(out)
+    ```
+Additionally, to use tf.print in python 2.7, users must make sure to import
+the following:
+
+  `from __future__ import print_function`
+""")
+@tf_export(v1=["Print"])
 def Print(input_, data, message=None, first_n=None, summarize=None,
           name=None):
   """Prints a list of tensors.
@@ -66,6 +102,228 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
     A `Tensor`. Has the same type and contents as `input_`.
   """
   return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
+# pylint: enable=invalid-name
+
+
+def _generate_placeholder_string(x, default_placeholder="{}"):
+  """Generate and return a string that does not appear in `x`."""
+  placeholder = default_placeholder
+  rng = random.Random(5)
+  while placeholder in x:
+    placeholder = placeholder + str(rng.randint(0, 9))
+  return placeholder
+
+
+# Temporarily disable pylint g-doc-args error to allow giving more context
+# about what the kwargs are.
+# Because we are using arbitrary-length positional arguments, python 2
+# does not support explicitly specifying the keyword arguments in the
+# function definition.
+# pylint: disable=g-doc-args
+@tf_export("print")
+def print_v2(*inputs, **kwargs):
+  """Print the specified inputs.
+
+  Returns an operator that prints the specified inputs to a desired
+  output stream or logging level. The inputs may be dense or sparse Tensors,
+  primitive python objects, data structures that contain Tensors, and printable
+  python objects. Printed tensors will recursively show the first and last
+  `summarize` elements of each dimension.
+
+  With eager execution enabled and/or inside a `tf.contrib.eager.defun` this
+  operator will automatically execute, and users only need to call `tf.print`
+  without using the return value. When constructing graphs outside of a
+  `tf.contrib.eager.defun`, one must either include the returned op
+  in the input to `session.run`, or use the operator as a control dependency for
+  executed ops by specifying `with tf.control_dependencies([print_op])`.
+
+  @compatibility(python2)
+  In python 2.7, make sure to import the following:
+  `from __future__ import print_function`
+  @end_compatibility
+
+  Example:
+    Single-input usage:
+    ```python
+    tf.enable_eager_execution()
+    tensor = tf.range(10)
+    tf.print(tensor, output_stream=sys.stderr)
+    ```
+    (This prints "[0 1 2 ... 7 8 9]" to sys.stderr)
+
+    Multi-input usage:
+    ```python
+    tf.enable_eager_execution()
+    tensor = tf.range(10)
+    tf.print("tensors:", tensor, {2: tensor * 2}, output_stream=sys.stdout)
+    ```
+    (This prints "tensors: [0 1 2 ... 7 8 9] {2: [0 2 4 ... 14 16 18]}" to
+    sys.stdout)
+
+    Usage in a defun:
+    ```python
+    tf.enable_eager_execution()
+
+    @tf.contrib.eager.defun
+    def f():
+        tensor = tf.range(10)
+        tf.print(tensor, output_stream=sys.stderr)
+        return tensor
+
+    range_tensor = f()
+    ```
+    (This prints "[0 1 2 ... 7 8 9]" to sys.stderr)
+
+    Usage when constructing graphs:
+    ```python
+    sess = tf.Session()
+    with sess.as_default():
+        tensor = tf.range(10)
+        print_op = tf.print("tensors:", tensor, {2: tensor * 2},
+                            output_stream=sys.stdout)
+        with tf.control_dependencies([print_op]):
+          tripled_tensor = tensor * 3
+        sess.run(tripled_tensor)
+    ```
+    (This prints "tensors: [0 1 2 ... 7 8 9] {2: [0 2 4 ... 14 16 18]}" to
+    sys.stdout)
+
+  Note: This op is only partially compatible with Jupyter notebooks and colabs.
+    Because it prints to the C++ standard out / standard error, this will go
+    in the notebook kernel's console output, not in the notebook cell output.
+
+  Args:
+    *inputs: Positional arguments that are the inputs to print. Inputs in the
+      printed output will be separated by spaces. Inputs may be python
+      primitives, tensors, data structures such as dicts and lists that
+      may contain tensors (with the data structures possibly nested in
+      arbitrary ways), and printable python objects.
+    output_stream: The output stream or logging level to print to. Defaults to
+      sys.stderr, but sys.stdout, tf.logging.info, tf.logging.warning, and
+      tf.logging.error are also supported.
+    summarize: The first and last `summarize` elements within each dimension are
+      recursively printed per Tensor. If None, then the first 3 and last 3
+      elements of each dimension are printed for each tensor. If set to -1, it
+      will print all elements of every tensor.
+    name: A name for the operation (optional).
+
+  Returns:
+    A print operator that prints the specified inputs in the specified output
+    stream or logging level.
+
+  Raises:
+    ValueError: If an unsupported output stream is specified.
+  """
+  # Because we are using arbitrary-length positional arguments, python 2
+  # does not support explicitly specifying the keyword arguments in the
+  # function definition. So, we manually get the keyword arguments w/ default
+  # values here.
+  output_stream = kwargs.pop("output_stream", sys.stderr)
+  name = kwargs.pop("name", None)
+  summarize = kwargs.pop("summarize", 3)
+  if kwargs:
+    raise ValueError("Unrecognized keyword arguments for tf.print: %s" % kwargs)
+  format_name = None
+  if name:
+    format_name = name + "_format"
+
+  # Match the C++ string constants representing the different output streams.
+  # Keep this updated!
+  output_stream_to_constant = {
+      sys.stdout: "stdout",
+      sys.stderr: "stderr",
+      tf_logging.INFO: "log(info)",
+      tf_logging.info: "log(info)",
+      tf_logging.WARN: "log(warning)",
+      tf_logging.warning: "log(warning)",
+      tf_logging.warn: "log(warning)",
+      tf_logging.ERROR: "log(error)",
+      tf_logging.error: "log(error)",
+  }
+
+  output_stream_string = output_stream_to_constant.get(output_stream)
+  if not output_stream_string:
+    raise ValueError(
+        "Unsupported output stream or logging level " +
+        str(output_stream) + ". Supported streams are sys.stdout, "
+                             "sys.stderr, tf.logging.info, "
+                             "tf.logging.warning, tf.logging.error")
+
+  # If we are only printing a single string scalar, there is no need to format
+  if (len(inputs) == 1 and tensor_util.is_tensor(inputs[0])
+      and (not isinstance(inputs[0], sparse_tensor.SparseTensor))
+      and inputs[0].shape and (inputs[0].dtype == dtypes.string)):
+    formatted_string = inputs[0]
+  # Otherwise, we construct an appropriate template for the tensors we are
+  # printing, and format the template using those tensors.
+  else:
+    # For each input to this print function, we extract any nested tensors,
+    # and construct an appropriate template to format representing the
+    # printed input.
+    templates = []
+    tensors = []
+    tensor_free_structure = nest.map_structure(
+        lambda x: "" if tensor_util.is_tensor(x) else x,
+        inputs)
+    tensor_free_template = " ".join(pprint.pformat(x)
+                                    for x in tensor_free_structure)
+    placeholder = _generate_placeholder_string(tensor_free_template)
+
+    for input_ in inputs:
+      placeholders = []
+      # Use the nest utilities to flatten & process any nested elements in this
+      # input. The placeholder for a tensor in the template should be the
+      # placeholder string, and the placeholder for a non-tensor can just be
+      # the printed value of the non-tensor itself.
+      for x in nest.flatten(input_):
+        # support sparse tensors
+        if isinstance(x, sparse_tensor.SparseTensor):
+          tensors.extend([x.indices, x.values, x.dense_shape])
+          placeholders.append(
+              "SparseTensor(indices={}, values={}, shape={})".format(
+                  placeholder, placeholder, placeholder)
+          )
+        elif tensor_util.is_tensor(x):
+          tensors.append(x)
+          placeholders.append(placeholder)
+        else:
+          placeholders.append(x)
+
+      if isinstance(input_, six.string_types):
+        # If the current input to format/print is a normal string, that string
+        # can act as the template.
+        cur_template = input_
+      else:
+        # We pack the placeholders into a data structure that matches the
+        # input data structure format, then format that data structure
+        # into a string template.
+        #
+        # NOTE: We must use pprint.pformat here for building the template for
+        # unordered data structures such as `dict`, because `str` doesn't
+        # guarantee orderings, while pprint prints in sorted order. pprint
+        # will match the ordering of `nest.flatten`.
+        # This even works when nest.flatten reorders OrderedDicts, because
+        # pprint is printing *after* the OrderedDicts have been reordered.
+        cur_template = pprint.pformat(
+            nest.pack_sequence_as(input_, placeholders))
+      templates.append(cur_template)
+
+    # We join the templates for the various inputs into a single larger
+    # template. We also remove all quotes surrounding the placeholders, so that
+    # the formatted/printed output will not contain quotes around tensors.
+    # (example of where these quotes might appear: if we have added a
+    # placeholder string into a list, then pretty-formatted that list)
+    template = " ".join(templates)
+    template = template.replace("'" + placeholder + "'", placeholder)
+    formatted_string = string_ops.string_format(
+        inputs=tensors, template=template, placeholder=placeholder,
+        summarize=summarize,
+        name=format_name)
+
+  return gen_logging_ops.print_v2(formatted_string,
+                                  output_stream=output_stream_string,
+                                  name=name)
+# pylint: enable=g-doc-args
 
 
 @ops.RegisterGradient("Print")
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index b2c6937368..5d949467fd 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -29,14 +29,15 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_string_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.util import compat as util_compat
 
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_string_ops import *
+from tensorflow.python.util import compat as util_compat
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 # pylint: enable=wildcard-import
@@ -103,6 +104,87 @@ def regex_replace(source, pattern, rewrite, replace_global=True):
       rewrite=rewrite, replace_global=replace_global)
 
 
+@tf_export("strings.format")
+def string_format(template, inputs, placeholder="{}", summarize=3, name=None):
+  r"""Formats a string template using a list of tensors.
+
+  Formats a string template using a list of tensors, abbreviating tensors by
+  only printing the first and last `summarize` elements of each dimension
+  (recursively). If formatting only one tensor into a template, the tensor does
+  not have to be wrapped in a list.
+
+  Example:
+    Formatting a single-tensor template:
+    ```python
+    sess = tf.Session()
+    with sess.as_default():
+        tensor = tf.range(10)
+        formatted = tf.strings.format("tensor: {}, suffix", tensor)
+        out = sess.run(formatted)
+        expected = "tensor: [0 1 2 ... 7 8 9], suffix"
+
+        assert(out.decode() == expected)
+    ```
+
+    Formatting a multi-tensor template:
+    ```python
+    sess = tf.Session()
+    with sess.as_default():
+        tensor_one = tf.reshape(tf.range(100), [10, 10])
+        tensor_two = tf.range(10)
+        formatted = tf.strings.format("first: {}, second: {}, suffix",
+          (tensor_one, tensor_two))
+
+        out = sess.run(formatted)
+        expected = ("first: [[0 1 2 ... 7 8 9]\n"
+              " [10 11 12 ... 17 18 19]\n"
+              " [20 21 22 ... 27 28 29]\n"
+              " ...\n"
+              " [70 71 72 ... 77 78 79]\n"
+              " [80 81 82 ... 87 88 89]\n"
+              " [90 91 92 ... 97 98 99]], second: [0 1 2 ... 7 8 9], suffix")
+
+        assert(out.decode() == expected)
+    ```
+
+  Args:
+    template: A string template to format tensor values into.
+    inputs: A list of `Tensor` objects, or a single Tensor.
+      The list of tensors to format into the template string. If a solitary
+      tensor is passed in, the input tensor will automatically be wrapped as a
+      list.
+    placeholder: An optional `string`. Defaults to `{}`.
+      At each placeholder occurring in the template, a subsequent tensor
+      will be inserted.
+    summarize: An optional `int`. Defaults to `3`.
+      When formatting the tensors, show the first and last `summarize`
+      entries of each tensor dimension (recursively). If set to -1, all
+      elements of the tensor will be shown.
+    name: A name for the operation (optional).
+
+  Returns:
+    A scalar `Tensor` of type `string`.
+
+  Raises:
+    ValueError: if the number of placeholders does not match the number of
+      inputs.
+  """
+  # If there is only one tensor to format, we will automatically wrap it in a
+  # list to simplify the user experience
+  if tensor_util.is_tensor(inputs):
+    inputs = [inputs]
+  if template.count(placeholder) != len(inputs):
+    raise ValueError("%s placeholder(s) in template does not match %s tensor(s)"
+                     " provided as input" % (template.count(placeholder),
+                                             len(inputs)))
+
+  return gen_string_ops.string_format(inputs,
+                                      template=template,
+                                      placeholder=placeholder,
+                                      summarize=summarize,
+                                      name=name)
+
+
 @tf_export("string_split")
 def string_split(source, delimiter=" ", skip_empty=True):  # pylint: disable=invalid-name
   """Split elements of `source` based on `delimiter` into a `SparseTensor`.
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 14ab885c91..6ff4343e9e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1592,6 +1592,10 @@ tf_module {
     name: "pow"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "print"
+    argspec: "args=[], varargs=inputs, keywords=kwargs, defaults=None"
+  }
   member_method {
     name: "py_func"
     argspec: "args=[\'func\', \'inp\', \'Tout\', \'stateful\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index 018be7b9f9..c81c156518 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.strings"
 tf_module {
+  member_method {
+    name: "format"
+    argspec: "args=[\'template\', \'inputs\', \'placeholder\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'{}\', \'3\', \'None\'], "
+  }
   member_method {
     name: "join"
     argspec: "args=[\'inputs\', \'separator\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 323d2fc519..db90c007d4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -580,10 +580,6 @@ tf_module {
     name: "NotDifferentiable"
     argspec: "args=[\'op_type\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "Print"
-    argspec: "args=[\'input_\', \'data\', \'message\', \'first_n\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
   member_method {
     name: "abs"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1540,6 +1536,10 @@ tf_module {
     name: "pow"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "print"
+    argspec: "args=[], varargs=inputs, keywords=kwargs, defaults=None"
+  }
   member_method {
     name: "py_func"
     argspec: "args=[\'func\', \'inp\', \'Tout\', \'stateful\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index 018be7b9f9..c81c156518 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.strings"
 tf_module {
+  member_method {
+    name: "format"
+    argspec: "args=[\'template\', \'inputs\', \'placeholder\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'{}\', \'3\', \'None\'], "
+  }
   member_method {
     name: "join"
     argspec: "args=[\'inputs\', \'separator\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], "
-- 
GitLab


From 484a5c673a4e31748c99c104acc55ed2d7f4f98a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 16:12:31 -0700
Subject: [PATCH 0412/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 213716034

---
 tensorflow/go/op/wrappers.go | 962 +++++++++++++++++------------------
 1 file changed, 481 insertions(+), 481 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index eb636dbf54..1d72bcd2b6 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -3741,98 +3741,28 @@ func BoostedTreesExampleDebugOutputs(scope *Scope, tree_ensemble_handle tf.Outpu
 	return op.Output(0)
 }
 
-// Computes the sum along sparse segments of a tensor.
-//
-// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// For example:
-//
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-//
-// tf.sparse_segment_sum_with_num_segments(
-//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
-// # => [[0 0 0 0]
-// #     [0 0 0 0]
-// #     [0 0 0 0]]
-//
-// tf.sparse_segment_sum_with_num_segments(c,
-//                                         tf.constant([0, 1]),
-//                                         tf.constant([0, 2],
-//                                         num_segments=4))
-// # => [[ 1  2  3  4]
-// #     [ 0  0  0  0]
-// #     [-1 -2 -3 -4]
-// #     [ 0  0  0  0]]
-// ```
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//	num_segments: Should equal the number of distinct segment IDs.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `num_segments`.
-func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSumWithNumSegments",
-		Input: []tf.Input{
-			data, indices, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PreventGradientAttr is an optional argument to PreventGradient.
-type PreventGradientAttr func(optionalAttr)
-
-// PreventGradientMessage sets the optional message attribute to value.
-//
-// value: Will be printed in the error when anyone tries to differentiate
-// this operation.
-// If not specified, defaults to ""
-func PreventGradientMessage(value string) PreventGradientAttr {
-	return func(m optionalAttr) {
-		m["message"] = value
-	}
-}
-
-// An identity op that triggers an error if a gradient is requested.
-//
-// When executed in a graph, this op outputs its input tensor as-is.
+// Makes the summary of accumulated stats for the batch.
 //
-// When building ops to compute gradients, the TensorFlow gradient system
-// will return an error when trying to lookup the gradient of this op,
-// because no gradient must ever be registered for this function.  This
-// op exists to prevent subtle bugs from silently returning unimplemented
-// gradients in some corner cases.
+// The summary stats contains gradients and hessians accumulated into the corresponding node and bucket for each example.
 //
 // Arguments:
-//	input: any tensor.
+//	node_ids: int32 Rank 1 Tensor containing node ids, which each example falls into for the requested layer.
+//	gradients: float32; Rank 2 Tensor (shape=[#examples, 1]) for gradients.
+//	hessians: float32; Rank 2 Tensor (shape=[#examples, 1]) for hessians.
+//	bucketized_features_list: int32 list of Rank 1 Tensors, each containing the bucketized feature (for each feature column).
+//	max_splits: int; the maximum number of splits possible in the whole tree.
+//	num_buckets: int; equals to the maximum possible value of bucketized feature.
 //
-// Returns the same input tensor.
-func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
+// Returns output Rank 4 Tensor (shape=[#features, #splits, #buckets, 2]) containing accumulated stats put into the corresponding node and bucket. The first index of 4th dimension refers to gradients, and the second to hessians.
+func BoostedTreesMakeStatsSummary(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, bucketized_features_list []tf.Output, max_splits int64, num_buckets int64) (stats_summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets}
 	opspec := tf.OpSpec{
-		Type: "PreventGradient",
+		Type: "BoostedTreesMakeStatsSummary",
 		Input: []tf.Input{
-			input,
+			node_ids, gradients, hessians, tf.OutputList(bucketized_features_list),
 		},
 		Attrs: attrs,
 	}
@@ -3840,21 +3770,6 @@ func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientA
 	return op.Output(0)
 }
 
-// Computes asin of x element-wise.
-func Asin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Asin",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes the sum along sparse segments of a tensor.
 //
 // Read
@@ -4564,37 +4479,142 @@ func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// NthElementAttr is an optional argument to NthElement.
-type NthElementAttr func(optionalAttr)
+// Computes exponential of x element-wise.  \\(y = e^x\\).
+func Exp(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Exp",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// NthElementReverse sets the optional reverse attribute to value.
+// Returns an element-wise indication of the sign of a number.
 //
-// value: When set to True, find the nth-largest value in the vector and vice
-// versa.
-// If not specified, defaults to false
-func NthElementReverse(value bool) NthElementAttr {
+// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+//
+// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
+func Sign(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sign",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ArgMinAttr is an optional argument to ArgMin.
+type ArgMinAttr func(optionalAttr)
+
+// ArgMinOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMinOutputType(value tf.DataType) ArgMinAttr {
 	return func(m optionalAttr) {
-		m["reverse"] = value
+		m["output_type"] = value
 	}
 }
 
-// Finds values of the `n`-th order statistic for the last dimension.
+// Returns the index with the smallest value across dimensions of a tensor.
 //
-// If the input is a vector (rank-1), finds the entries which is the nth-smallest
-// value in the vector and outputs their values as scalar tensor.
+// Note that in case of ties the identity of the return value is not guaranteed.
 //
-// For matrices (resp. higher rank input), computes the entries which is the
-// nth-smallest value in each row (resp. vector along the last dimension). Thus,
+// Arguments:
 //
-//     values.shape = input.shape[:-1]
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ArgMin",
+		Input: []tf.Input{
+			input, dimension,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+//
+// output range specified with 'requested_output_min' and 'requested_output_max'.
+//
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
 //
 // Arguments:
-//	input: 1-D or higher with last dimension at least `n+1`.
-//	n: 0-D. Position of sorted vector to select along the last dimension (along
-// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
 //
-// Returns The `n`-th order statistic along each last dimensional slice.
-func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	requested_output_min: The float value that the minimum quantized output value represents.
+//	requested_output_max: The float value that the maximum quantized output value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//
+// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
+func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "Requantize",
+		Input: []tf.Input{
+			input, input_min, input_max, requested_output_min, requested_output_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// PreventGradientAttr is an optional argument to PreventGradient.
+type PreventGradientAttr func(optionalAttr)
+
+// PreventGradientMessage sets the optional message attribute to value.
+//
+// value: Will be printed in the error when anyone tries to differentiate
+// this operation.
+// If not specified, defaults to ""
+func PreventGradientMessage(value string) PreventGradientAttr {
+	return func(m optionalAttr) {
+		m["message"] = value
+	}
+}
+
+// An identity op that triggers an error if a gradient is requested.
+//
+// When executed in a graph, this op outputs its input tensor as-is.
+//
+// When building ops to compute gradients, the TensorFlow gradient system
+// will return an error when trying to lookup the gradient of this op,
+// because no gradient must ever be registered for this function.  This
+// op exists to prevent subtle bugs from silently returning unimplemented
+// gradients in some corner cases.
+//
+// Arguments:
+//	input: any tensor.
+//
+// Returns the same input tensor.
+func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4603,9 +4623,9 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "NthElement",
+		Type: "PreventGradient",
 		Input: []tf.Input{
-			input, n,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -4613,6 +4633,21 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme
 	return op.Output(0)
 }
 
+// Computes asin of x element-wise.
+func Asin(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Asin",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the maximum along segments of a tensor.
 //
 // Read
@@ -4662,61 +4697,37 @@ func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num
 	return op.Output(0)
 }
 
-// Computes exponential of x element-wise.  \\(y = e^x\\).
-func Exp(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Exp",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// NthElementAttr is an optional argument to NthElement.
+type NthElementAttr func(optionalAttr)
 
-// Returns an element-wise indication of the sign of a number.
-//
-// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+// NthElementReverse sets the optional reverse attribute to value.
 //
-// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
-func Sign(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sign",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ArgMinAttr is an optional argument to ArgMin.
-type ArgMinAttr func(optionalAttr)
-
-// ArgMinOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMinOutputType(value tf.DataType) ArgMinAttr {
+// value: When set to True, find the nth-largest value in the vector and vice
+// versa.
+// If not specified, defaults to false
+func NthElementReverse(value bool) NthElementAttr {
 	return func(m optionalAttr) {
-		m["output_type"] = value
+		m["reverse"] = value
 	}
 }
 
-// Returns the index with the smallest value across dimensions of a tensor.
+// Finds values of the `n`-th order statistic for the last dimension.
 //
-// Note that in case of ties the identity of the return value is not guaranteed.
+// If the input is a vector (rank-1), finds the entries which is the nth-smallest
+// value in the vector and outputs their values as scalar tensor.
+//
+// For matrices (resp. higher rank input), computes the entries which is the
+// nth-smallest value in each row (resp. vector along the last dimension). Thus,
+//
+//     values.shape = input.shape[:-1]
 //
 // Arguments:
+//	input: 1-D or higher with last dimension at least `n+1`.
+//	n: 0-D. Position of sorted vector to select along the last dimension (along
+// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
 //
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
+// Returns The `n`-th order statistic along each last dimensional slice.
+func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4725,9 +4736,9 @@ func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ArgMin",
+		Type: "NthElement",
 		Input: []tf.Input{
-			input, dimension,
+			input, n,
 		},
 		Attrs: attrs,
 	}
@@ -4735,38 +4746,56 @@ func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 	return op.Output(0)
 }
 
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+// Computes the sum along sparse segments of a tensor.
 //
-// output range specified with 'requested_output_min' and 'requested_output_max'.
+// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
 //
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// tf.sparse_segment_sum_with_num_segments(
+//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+// # => [[0 0 0 0]
+// #     [0 0 0 0]
+// #     [0 0 0 0]]
+//
+// tf.sparse_segment_sum_with_num_segments(c,
+//                                         tf.constant([0, 1]),
+//                                         tf.constant([0, 2],
+//                                         num_segments=4))
+// # => [[ 1  2  3  4]
+// #     [ 0  0  0  0]
+// #     [-1 -2 -3 -4]
+// #     [ 0  0  0  0]]
+// ```
 //
 // Arguments:
 //
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	requested_output_min: The float value that the minimum quantized output value represents.
-//	requested_output_max: The float value that the maximum quantized output value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
 //
-// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
-func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+// Returns Has same shape as data, except for dimension 0 which
+// has size `num_segments`.
+func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
 	opspec := tf.OpSpec{
-		Type: "Requantize",
+		Type: "SparseSegmentSumWithNumSegments",
 		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
+			data, indices, segment_ids, num_segments,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
 // Computes the determinant of one or more square matrices.
@@ -9168,60 +9197,120 @@ func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output
 	opspec := tf.OpSpec{
 		Type: "FusedBatchNorm",
 		Input: []tf.Input{
-			x, scale, offset, mean, variance,
+			x, scale, offset, mean, variance,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// RandomStandardNormalAttr is an optional argument to RandomStandardNormal.
+type RandomStandardNormalAttr func(optionalAttr)
+
+// RandomStandardNormalSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomStandardNormalSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from a normal distribution.
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
+//
+// Returns A tensor of the specified shape filled with random normal values.
+func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomStandardNormal",
+		Input: []tf.Input{
+			shape,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+	return op.Output(0)
 }
 
-// RandomStandardNormalAttr is an optional argument to RandomStandardNormal.
-type RandomStandardNormalAttr func(optionalAttr)
+// RandomUniformIntAttr is an optional argument to RandomUniformInt.
+type RandomUniformIntAttr func(optionalAttr)
 
-// RandomStandardNormalSeed sets the optional seed attribute to value.
+// RandomUniformIntSeed sets the optional seed attribute to value.
 //
 // value: If either `seed` or `seed2` are set to be non-zero, the random number
 // generator is seeded by the given seed.  Otherwise, it is seeded by a
 // random seed.
 // If not specified, defaults to 0
-func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr {
+func RandomUniformIntSeed(value int64) RandomUniformIntAttr {
 	return func(m optionalAttr) {
 		m["seed"] = value
 	}
 }
 
-// RandomStandardNormalSeed2 sets the optional seed2 attribute to value.
+// RandomUniformIntSeed2 sets the optional seed2 attribute to value.
 //
 // value: A second seed to avoid seed collision.
 // If not specified, defaults to 0
-func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr {
+func RandomUniformIntSeed2(value int64) RandomUniformIntAttr {
 	return func(m optionalAttr) {
 		m["seed2"] = value
 	}
 }
 
-// Outputs random values from a normal distribution.
+// Outputs random integers from a uniform distribution.
 //
-// The generated values will have mean 0 and standard deviation 1.
+// The generated values are uniform integers in the range `[minval, maxval)`.
+// The lower bound `minval` is included in the range, while the upper bound
+// `maxval` is excluded.
+//
+// The random integers are slightly biased unless `maxval - minval` is an exact
+// power of two.  The bias is small for values of `maxval - minval` significantly
+// smaller than the range of the output (either `2^32` or `2^64`).
 //
 // Arguments:
 //	shape: The shape of the output tensor.
-//	dtype: The type of the output.
+//	minval: 0-D.  Inclusive lower bound on the generated integers.
+//	maxval: 0-D.  Exclusive upper bound on the generated integers.
 //
-// Returns A tensor of the specified shape filled with random normal values.
-func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) {
+// Returns A tensor of the specified shape filled with uniform random integers.
+func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomStandardNormal",
+		Type: "RandomUniformInt",
 		Input: []tf.Input{
-			shape,
+			shape, minval, maxval,
 		},
 		Attrs: attrs,
 	}
@@ -11926,38 +12015,6 @@ func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...Fix
 	return op.Output(0)
 }
 
-// The gradient operator for the SparseAdd op.
-//
-// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
-// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
-// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
-// values of A and B.
-//
-// Arguments:
-//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
-// the non-empty values of the sum.
-//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
-//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
-//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
-// `[nnz(sum), ndims]`.
-//
-// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the
-// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the
-// non-empty values of B.
-func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseAddGrad",
-		Input: []tf.Input{
-			backprop_val_grad, a_indices, b_indices, sum_indices,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // String lengths of `input`.
 //
 // Computes the length of each string given in the input tensor.
@@ -12814,6 +12871,123 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 	return op.Output(0)
 }
 
+// ShapeAttr is an optional argument to Shape.
+type ShapeAttr func(optionalAttr)
+
+// ShapeOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func ShapeOutType(value tf.DataType) ShapeAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Returns the shape of a tensor.
+//
+// This operation returns a 1-D integer tensor representing the shape of `input`.
+//
+// For example:
+//
+// ```
+// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+// shape(t) ==> [2, 2, 3]
+// ```
+func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Shape",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the power of one value to another.
+//
+// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
+// corresponding elements in `x` and `y`. For example:
+//
+// ```
+// # tensor 'x' is [[2, 2]], [3, 3]]
+// # tensor 'y' is [[8, 16], [2, 3]]
+// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+// ```
+func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Pow",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes fingerprints of the input strings.
+//
+// Arguments:
+//	input: vector of strings to compute fingerprints on.
+//
+// Returns a (N,2) shaped matrix where N is the number of elements in the input
+// vector. Each row contains the low and high parts of the fingerprint.
+func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SdcaFprint",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// The gradient operator for the SparseAdd op.
+//
+// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
+// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
+// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
+// values of A and B.
+//
+// Arguments:
+//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
+// the non-empty values of the sum.
+//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
+//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
+//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
+// `[nnz(sum), ndims]`.
+//
+// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the
+// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the
+// non-empty values of B.
+func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseAddGrad",
+		Input: []tf.Input{
+			backprop_val_grad, a_indices, b_indices, sum_indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // Computes the mean along segments of a tensor.
 //
 // Read
@@ -12997,10 +13171,83 @@ func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Outpu
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "InTopKV2",
+		Type: "InTopKV2",
+		Input: []tf.Input{
+			predictions, targets, k,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
+type RandomPoissonV2Attr func(optionalAttr)
+
+// RandomPoissonV2Seed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// RandomPoissonV2Dtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from the Poisson distribution(s) described by rate.
+//
+// This op uses two algorithms, depending on rate. If rate >= 10, then
+// the algorithm by Hormann is used to acquire samples via
+// transformation-rejection.
+// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
+//
+// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
+// random variables.
+// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
+// Programming, Volume 2. Addison Wesley
+//
+// Arguments:
+//	shape: 1-D integer tensor. Shape of independent samples to draw from each
+// distribution described by the shape parameters given in rate.
+//	rate: A tensor in which each scalar is a "rate" parameter describing the
+// associated poisson distribution.
+//
+// Returns A tensor with shape `shape + shape(rate)`. Each slice
+// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+// `rate[i0, i1, ...iN]`.
+func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomPoissonV2",
 		Input: []tf.Input{
-			predictions, targets, k,
+			shape, rate,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -20288,164 +20535,6 @@ func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feat
 	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
 }
 
-// ShapeAttr is an optional argument to Shape.
-type ShapeAttr func(optionalAttr)
-
-// ShapeOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func ShapeOutType(value tf.DataType) ShapeAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Returns the shape of a tensor.
-//
-// This operation returns a 1-D integer tensor representing the shape of `input`.
-//
-// For example:
-//
-// ```
-// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-// shape(t) ==> [2, 2, 3]
-// ```
-func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Shape",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the power of one value to another.
-//
-// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
-// corresponding elements in `x` and `y`. For example:
-//
-// ```
-// # tensor 'x' is [[2, 2]], [3, 3]]
-// # tensor 'y' is [[8, 16], [2, 3]]
-// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
-// ```
-func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Pow",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes fingerprints of the input strings.
-//
-// Arguments:
-//	input: vector of strings to compute fingerprints on.
-//
-// Returns a (N,2) shaped matrix where N is the number of elements in the input
-// vector. Each row contains the low and high parts of the fingerprint.
-func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SdcaFprint",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
-type RandomPoissonV2Attr func(optionalAttr)
-
-// RandomPoissonV2Seed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// RandomPoissonV2Dtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_INT64
-func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random values from the Poisson distribution(s) described by rate.
-//
-// This op uses two algorithms, depending on rate. If rate >= 10, then
-// the algorithm by Hormann is used to acquire samples via
-// transformation-rejection.
-// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
-//
-// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
-// random variables.
-// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
-// Programming, Volume 2. Addison Wesley
-//
-// Arguments:
-//	shape: 1-D integer tensor. Shape of independent samples to draw from each
-// distribution described by the shape parameters given in rate.
-//	rate: A tensor in which each scalar is a "rate" parameter describing the
-// associated poisson distribution.
-//
-// Returns A tensor with shape `shape + shape(rate)`. Each slice
-// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-// `rate[i0, i1, ...iN]`.
-func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomPoissonV2",
-		Input: []tf.Input{
-			shape, rate,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve.
 type MatrixTriangularSolveAttr func(optionalAttr)
 
@@ -20959,66 +21048,6 @@ func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, nu
 	return op.Output(0)
 }
 
-// RandomUniformIntAttr is an optional argument to RandomUniformInt.
-type RandomUniformIntAttr func(optionalAttr)
-
-// RandomUniformIntSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomUniformIntSeed(value int64) RandomUniformIntAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomUniformIntSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomUniformIntSeed2(value int64) RandomUniformIntAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random integers from a uniform distribution.
-//
-// The generated values are uniform integers in the range `[minval, maxval)`.
-// The lower bound `minval` is included in the range, while the upper bound
-// `maxval` is excluded.
-//
-// The random integers are slightly biased unless `maxval - minval` is an exact
-// power of two.  The bias is small for values of `maxval - minval` significantly
-// smaller than the range of the output (either `2^32` or `2^64`).
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	minval: 0-D.  Inclusive lower bound on the generated integers.
-//	maxval: 0-D.  Exclusive upper bound on the generated integers.
-//
-// Returns A tensor of the specified shape filled with uniform random integers.
-func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomUniformInt",
-		Input: []tf.Input{
-			shape, minval, maxval,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes the mean along sparse segments of a tensor.
 //
 // Read
@@ -28116,35 +28145,6 @@ func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Op
 	return scope.AddOperation(opspec)
 }
 
-// Makes the summary of accumulated stats for the batch.
-//
-// The summary stats contains gradients and hessians accumulated into the corresponding node and bucket for each example.
-//
-// Arguments:
-//	node_ids: int32 Rank 1 Tensor containing node ids, which each example falls into for the requested layer.
-//	gradients: float32; Rank 2 Tensor (shape=[#examples, 1]) for gradients.
-//	hessians: float32; Rank 2 Tensor (shape=[#examples, 1]) for hessians.
-//	bucketized_features_list: int32 list of Rank 1 Tensors, each containing the bucketized feature (for each feature column).
-//	max_splits: int; the maximum number of splits possible in the whole tree.
-//	num_buckets: int; equals to the maximum possible value of bucketized feature.
-//
-// Returns output Rank 4 Tensor (shape=[#features, #splits, #buckets, 2]) containing accumulated stats put into the corresponding node and bucket. The first index of 4th dimension refers to gradients, and the second to hessians.
-func BoostedTreesMakeStatsSummary(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, bucketized_features_list []tf.Output, max_splits int64, num_buckets int64) (stats_summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesMakeStatsSummary",
-		Input: []tf.Input{
-			node_ids, gradients, hessians, tf.OutputList(bucketized_features_list),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Adjust the contrast of one or more images.
 //
 // `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
-- 
GitLab


From 9a7f252910bb2cc14092adc6e8163bd6e696c1f0 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Wed, 19 Sep 2018 16:24:22 -0700
Subject: [PATCH 0413/1357] [XLA] Add R2 strided slice test.

PiperOrigin-RevId: 213718019
---
 tensorflow/compiler/xla/tests/slice_test.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc
index a40c2d7de6..2cc33ab096 100644
--- a/tensorflow/compiler/xla/tests/slice_test.cc
+++ b/tensorflow/compiler/xla/tests/slice_test.cc
@@ -412,6 +412,7 @@ INSTANTIATE_TEST_CASE_P(
         R2Spec{511, 513, {{129, 300}}, {{400, 500}}, {{7, 11}}, {{0, 1}}},  //
         R2Spec{511, 513, {{129, 300}}, {{400, 500}}, {{11, 7}}, {{1, 0}}},  //
         R2Spec{511, 513, {{129, 300}}, {{400, 500}}, {{11, 7}}, {{0, 1}}},  //
+        R2Spec{8672, 512, {{8, 0}}, {{8672, 512}}, {{542, 1}}, {{1, 0}}},   //
         R2Spec{
             511, 513, {{129, 300}}, {{400, 500}}, {{101, 129}}, {{1, 0}}},  //
         R2Spec{
-- 
GitLab


From 1d78936a3989f6ee5a9945746cd329c37e82287c Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Wed, 19 Sep 2018 16:25:08 -0700
Subject: [PATCH 0414/1357] Add VerifiedHloModule class. VerifiedHloModule is
 derived from HloModule and verifies itself on destruction. This is designed
 to be used in HloVerifiedTestBase. This replaces the current mechanism which
 verifies HloModules in the TearDown method. The VerifiedHloModule approach is
 cleaner (less state on the test object) and more capable because these
 verified HLO modules can be passed to methods which require taking ownership
 of the module (eg, HlotestBase::Execute).

This change required some changes to the parser which enables constructing the parsed HloModule into an already allocated HloModule. Some trivial changes to HloModule are required as well.

PiperOrigin-RevId: 213718126
---
 tensorflow/compiler/xla/service/BUILD         |   2 +-
 tensorflow/compiler/xla/service/hlo_module.h  |   4 +-
 tensorflow/compiler/xla/service/hlo_parser.cc |  83 ++++-----
 tensorflow/compiler/xla/service/hlo_parser.h  |  13 +-
 .../xla/service/hlo_pass_pipeline_test.cc     |  24 +--
 .../compiler/xla/service/hlo_verifier.cc      |   1 +
 .../compiler/xla/service/name_uniquer.cc      |   4 +-
 tensorflow/compiler/xla/tests/BUILD           |  22 ++-
 .../xla/tests/hlo_verified_test_base.cc       |  78 +++++----
 .../xla/tests/hlo_verified_test_base.h        |  63 +++++--
 .../xla/tests/hlo_verified_test_base_test.cc  | 158 ++++++++++++++++++
 11 files changed, 338 insertions(+), 114 deletions(-)
 create mode 100644 tensorflow/compiler/xla/tests/hlo_verified_test_base_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 4b183b4350..2bc50c70cf 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -2605,7 +2605,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
         "//tensorflow/compiler/xla/tests:test_utils",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 3bc2d13781..735804e827 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -63,6 +63,7 @@ class HloModule {
   // tests). The versioned handle is used by the service in the compilation
   // cache. A default configuration is created for this module.
   explicit HloModule(const string& name, const HloModuleConfig& config);
+  virtual ~HloModule() {}
 
   // Adds an entry computation to the module. A module can only have one entry
   // computation. Returns a pointer to the newly added computation.
@@ -87,6 +88,7 @@ class HloModule {
       const std::unordered_map<HloComputation*, HloComputation*>& replacements);
 
   const string& name() const { return name_; }
+  void set_name(string name) { name_ = std::move(name); }
 
   // Returns a deep copy of this module including all computations.
   std::unique_ptr<HloModule> Clone(const string& suffix = "clone") const;
@@ -255,7 +257,7 @@ class HloModule {
       std::unique_ptr<HloComputation> computation, bool is_entry,
       bool uniquify_identifiers);
 
-  const string name_;
+  string name_;
   HloModuleConfig config_;
   HloComputation* entry_computation_ = nullptr;
   std::vector<std::unique_ptr<HloComputation>> computations_;
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 11caa89c54..37197b273b 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -64,14 +64,11 @@ class HloParser {
  public:
   using LocTy = HloLexer::LocTy;
 
-  explicit HloParser(absl::string_view str, const HloModuleConfig& config)
-      : lexer_(str), config_(config) {}
+  explicit HloParser(absl::string_view str) : lexer_(str) {}
 
-  // Runs the parser. Returns false if an error occurred.
-  bool Run();
-
-  // Returns the parsed HloModule.
-  std::unique_ptr<HloModule> ConsumeHloModule() { return std::move(module_); }
+  // Runs the parser and constructs the resulting HLO in the given (empty)
+  // HloModule. Returns false if an error occurred.
+  bool Run(HloModule* module);
 
   // Returns the error information.
   string GetError() const { return StrJoin(error_, "\n"); }
@@ -98,8 +95,8 @@ class HloParser {
       const string& name, const optional<Shape>& shape = nullopt);
 
   // ParseXXX returns false if an error occurred.
-  bool ParseHloModule();
-  bool ParseComputations();
+  bool ParseHloModule(HloModule* module);
+  bool ParseComputations(HloModule* module);
   bool ParseComputation(HloComputation** entry_computation);
   bool ParseInstructionList(HloComputation::Builder* builder,
                             string* root_name);
@@ -293,9 +290,7 @@ class HloParser {
       computation_pool_;
 
   HloLexer lexer_;
-  std::unique_ptr<HloModule> module_;
   std::vector<std::unique_ptr<HloComputation>> computations_;
-  const HloModuleConfig config_;
   std::vector<string> error_;
 
   // Function that gets invoked when we try to resolve an instruction
@@ -349,9 +344,9 @@ bool HloParser::TokenError(absl::string_view msg) {
   return Error(lexer_.GetLoc(), msg);
 }
 
-bool HloParser::Run() {
+bool HloParser::Run(HloModule* module) {
   lexer_.Lex();
-  return ParseHloModule();
+  return ParseHloModule(module);
 }
 
 std::pair<HloInstruction*, HloParser::LocTy>* HloParser::FindInstruction(
@@ -366,7 +361,7 @@ std::pair<HloInstruction*, HloParser::LocTy>* HloParser::FindInstruction(
 }
 
 // ::= 'HloModule' name computations
-bool HloParser::ParseHloModule() {
+bool HloParser::ParseHloModule(HloModule* module) {
   if (lexer_.GetKind() != TokKind::kw_HloModule) {
     return TokenError("expects HloModule");
   }
@@ -385,22 +380,20 @@ bool HloParser::ParseHloModule() {
     return false;
   }
 
-  module_ = absl::make_unique<HloModule>(name, config_);
-
-  if (!ParseComputations()) {
+  module->set_name(name);
+  if (!ParseComputations(module)) {
     return false;
   }
 
   if (is_scheduled.has_value() && *is_scheduled) {
-    TF_CHECK_OK(
-        module_->set_schedule(ScheduleFromInstructionOrder(module_.get())));
+    TF_CHECK_OK(module->set_schedule(ScheduleFromInstructionOrder(module)));
   }
 
   return true;
 }
 
 // computations ::= (computation)+
-bool HloParser::ParseComputations() {
+bool HloParser::ParseComputations(HloModule* module) {
   HloComputation* entry_computation = nullptr;
   do {
     if (!ParseComputation(&entry_computation)) {
@@ -416,21 +409,20 @@ bool HloParser::ParseComputations() {
     if ((entry_computation != nullptr &&
          computations_[i].get() != entry_computation) ||
         (entry_computation == nullptr && i != computations_.size() - 1)) {
-      module_->AddEmbeddedComputation(std::move(computations_[i]));
+      module->AddEmbeddedComputation(std::move(computations_[i]));
       continue;
     }
-    auto computation =
-        module_->AddEntryComputation(std::move(computations_[i]));
+    auto computation = module->AddEntryComputation(std::move(computations_[i]));
     // The parameters and result layouts were set to default layout. Here we
     // set the layouts to what the hlo text says.
     for (int p = 0; p < computation->num_parameters(); p++) {
       const Shape& param_shape = computation->parameter_instruction(p)->shape();
-      TF_CHECK_OK(module_->mutable_entry_computation_layout()
+      TF_CHECK_OK(module->mutable_entry_computation_layout()
                       ->mutable_parameter_layout(p)
                       ->CopyLayoutFromShape(param_shape));
     }
     const Shape& result_shape = computation->root_instruction()->shape();
-    TF_CHECK_OK(module_->mutable_entry_computation_layout()
+    TF_CHECK_OK(module->mutable_entry_computation_layout()
                     ->mutable_result_layout()
                     ->CopyLayoutFromShape(result_shape));
   }
@@ -3247,53 +3239,62 @@ Status HloParser::ParseSingleInstruction(HloComputation::Builder* builder,
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(
     absl::string_view str, const HloModuleConfig& config) {
-  HloParser parser(str, config);
-  if (!parser.Run()) {
+  auto module = absl::make_unique<HloModule>(/*name=*/"", config);
+  HloParser parser(str);
+  if (!parser.Run(module.get())) {
     return InvalidArgument("Syntax error:\n%s", parser.GetError());
   }
-  return parser.ConsumeHloModule();
+  return std::move(module);
 }
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str) {
-  HloModuleConfig config;
-  return ParseHloString(str, config);
+  auto module = absl::make_unique<HloModule>(/*name=*/"", HloModuleConfig());
+  HloParser parser(str);
+  if (!parser.Run(module.get())) {
+    return InvalidArgument("Syntax error:\n%s", parser.GetError());
+  }
+  return std::move(module);
+}
+
+Status ParseHloString(absl::string_view str, HloModule* module) {
+  TF_RET_CHECK(module->computation_count() == 0);
+  HloParser parser(str);
+  if (!parser.Run(module)) {
+    return InvalidArgument("Syntax error:\n%s", parser.GetError());
+  }
+  return Status::OK();
 }
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
     absl::string_view str, absl::string_view name) {
-  HloModuleConfig config;
-  HloParser parser(str, config);
+  HloParser parser(str);
   auto builder = absl::make_unique<HloComputation::Builder>(string(name));
   string root_name;
   TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(builder.get(), &root_name));
   std::unique_ptr<HloComputation> computation = builder->Build();
-  auto module = absl::make_unique<HloModule>(string(name), config);
+  auto module = absl::make_unique<HloModule>(string(name), HloModuleConfig());
   module->AddEntryComputation(std::move(computation));
   return std::move(module);
 }
 
 StatusOr<HloSharding> ParseSharding(absl::string_view str) {
-  HloModuleConfig config;
-  HloParser parser(str, config);
+  HloParser parser(str);
   return parser.ParseShardingOnly();
 }
 
 StatusOr<Window> ParseWindow(absl::string_view str) {
-  HloModuleConfig config;
-  HloParser parser(str, config);
+  HloParser parser(str);
   return parser.ParseWindowOnly();
 }
 
 StatusOr<ConvolutionDimensionNumbers> ParseConvolutionDimensionNumbers(
     absl::string_view str) {
-  HloModuleConfig config;
-  HloParser parser(str, config);
+  HloParser parser(str);
   return parser.ParseConvolutionDimensionNumbersOnly();
 }
 
 StatusOr<PaddingConfig> ParsePaddingConfig(absl::string_view str) {
-  HloModuleConfig config;
-  HloParser parser(str, config);
+  HloParser parser(str);
   return parser.ParsePaddingConfigOnly();
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h
index 1882a184da..3696035514 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.h
+++ b/tensorflow/compiler/xla/service/hlo_parser.h
@@ -30,18 +30,23 @@ namespace xla {
 // For details about the syntax accepted by this parser, see
 // g3doc/hlo_parser.md.
 
-// The api of the hlo parser. Given a string in the HloModule::ToString()
-// format, parses the string and creates a HloModule with the given config.
+// Given a string in the HloModule::ToString() format, parses the string and
+// creates a HloModule with the given config.
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(
     absl::string_view str, const HloModuleConfig& config);
 
+// Given a string in the HloModule::ToString() format, parses the string and
+// builds the HloModule in place at the given module pointer. 'module' must
+// point to an empty module (no computations).
+Status ParseHloString(absl::string_view str, HloModule* module);
+
 // Parses the text for a single HLO operation into an HLO module with a function
 // that runs that operation (with the same parameters) as its entry computation.
 StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
     absl::string_view str, absl::string_view name = "single_op");
 
-// The api of the hlo parser. Given a string in the HloModule::ToString()
-// format, parses the string and creates a HloModule with default config.
+// Given a string in the HloModule::ToString() format, parses the string and
+// creates a HloModule with default config.
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str);
 
 // Parses the result of HloSharding::ToString(), e.g. "{replicated}".
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc
index e16b4d4c0a..ee8cb12b23 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline_test.cc
@@ -19,21 +19,21 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 
 namespace xla {
 namespace {
 
-class HloPassPipelineTest : public HloTestBase {
+class HloPassPipelineTest : public HloVerifiedTestBase {
  protected:
   StatusOr<HloModuleGroup> ParseModuleGroup(
       absl::Span<const string> hlo_strings) {
     HloModuleGroup group(TestName());
     for (const string& hlo_string : hlo_strings) {
-      TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
-                          ParseHloString(hlo_string));
+      TF_ASSIGN_OR_RETURN(std::unique_ptr<VerifiedHloModule> module,
+                          ParseAndReturnVerifiedModule(hlo_string));
       group.push_back(std::move(module));
     }
     return std::move(group);
@@ -106,8 +106,8 @@ ENTRY main {
   ROOT foo = f32[] multiply(a, b)
 }
 )";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<VerifiedHloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
   HloPassPipeline pipeline(TestName());
   pipeline.AddPass<FooToBarModulePass>();
 
@@ -129,8 +129,8 @@ ENTRY main {
   ROOT blahblah = f32[] multiply(a, b)
 }
 )";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<VerifiedHloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
   HloPassPipeline pipeline(TestName());
   pipeline.AddPass<FooToBarModulePass>();
 
@@ -191,8 +191,8 @@ ENTRY main {
   ROOT foo = f32[] multiply(a, b)
 }
 )";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<VerifiedHloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
   {
     // Run a pipeline with just the invariant checker. It should not fail
     // because there is no 'bar' instruction in the module.
@@ -243,8 +243,8 @@ ENTRY main {
   ROOT foo = f32[] multiply(a, b)
 }
 )";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<VerifiedHloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
   HloPassPipeline pipeline(TestName());
   pipeline.AddPass<BazToQuxModuleGroupPass>();
 
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 50f39cbcb5..6eb6658904 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1057,6 +1057,7 @@ Status VerifySendsAndRecvs(const HloModule& module) {
 }  // namespace
 
 StatusOr<bool> HloVerifier::Run(HloModule* module) {
+  TF_RET_CHECK(!module->name().empty());
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
diff --git a/tensorflow/compiler/xla/service/name_uniquer.cc b/tensorflow/compiler/xla/service/name_uniquer.cc
index bd8fb17a23..ac2f79674f 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.cc
+++ b/tensorflow/compiler/xla/service/name_uniquer.cc
@@ -39,8 +39,10 @@ NameUniquer::NameUniquer(const string& separator) {
 }
 
 /*static*/ string NameUniquer::GetSanitizedName(const string& name) {
+  if (name.empty()) {
+    return "";
+  }
   string result = name;
-  CHECK(!result.empty()) << "name should not be empty";
   char c = static_cast<unsigned char>(result[0]);
   if (!isalpha(c) && c != '_') {
     result[0] = '_';
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index b49db029e2..fd3e3bfa94 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -154,11 +154,31 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_parser",
         "//tensorflow/compiler/xla/service:hlo_verifier",
         "//tensorflow/core:lib",
-        "//tensorflow/core:test",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/memory",
     ],
 )
 
+tf_cc_test(
+    name = "hlo_verified_test_base_test",
+    srcs = ["hlo_verified_test_base_test.cc"],
+    deps = [
+        ":hlo_test_base",
+        ":hlo_verified_test_base",
+        ":test_macros_cpu",
+        ":test_utils",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/client:xla_computation",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service:hlo_verifier",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 tf_cc_binary(
     name = "local_client_aot_test_helper",
     srcs = ["local_client_aot_test_helper.cc"],
diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc
index 8f86c528d0..8bd0a729b7 100644
--- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc
@@ -21,64 +21,68 @@ limitations under the License.
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/test.h"
 
 namespace xla {
 
-HloVerifiedTestBase::HloVerifiedTestBase(bool layout_sensitive,
-                                         bool allow_mixed_precision)
-    : HloTestBase(
-          /*verifier_layout_sensitive=*/layout_sensitive,
-          /*allow_mixed_precision_in_hlo_verifier=*/allow_mixed_precision) {}
-
-HloVerifiedTestBase::~HloVerifiedTestBase() {
-  // We can't call the ASSERT or EXPECT test macros in destructors, so we
-  // perform HLO verification in TearDown, and use the CHECK here to ensure
-  // users don't accidentally override the verification.
-  CHECK(tear_down_called_)
-      << "TearDown was never called; subclasses of HloVerifiedTestBase that "
-      << "override TearDown must call the superclass TearDown.";
-}
-
-void HloVerifiedTestBase::TearDown() {
-  EXPECT_FALSE(tear_down_called_)
-      << "TearDown called more than once; it should be called exactly once.";
-  tear_down_called_ = true;
-  if (module_) {
-    VerifyModule(module_.get());
+Status VerifiedHloModule::Verify() {
+  if (computation_count() == 0) {
+    // The computation was never built. Nothing to verify.
+    return Status::OK();
   }
-  for (int i = 0; i < modules_.size(); ++i) {
-    VerifyModule(modules_.at(i).get());
-  }
-  HloTestBase::TearDown();
+  return verifier_.Run(this).status();
 }
 
-void HloVerifiedTestBase::VerifyModule(HloModule* module) {
-  xla::StatusOr<bool> mutated = verifier().Run(module);
-  if (!mutated.ok()) {
-    ADD_FAILURE() << "HloVerifier failed: " << mutated.status();
-  } else {
-    EXPECT_FALSE(mutated.ValueOrDie())
-        << "HloVerifier should never mutate the HloModule";
+void VerifiedHloModule::VerifyOrAddFailure(const string& message) {
+  Status status = Verify();
+  if (!status.ok()) {
+    ADD_FAILURE() << "HloVerifier failed on module " << name()
+                  << (message.empty() ? "" : absl::StrCat(" (", message, ")"))
+                  << ": " << status;
   }
 }
 
+HloVerifiedTestBase::HloVerifiedTestBase(bool layout_sensitive,
+                                         bool allow_mixed_precision)
+    : HloTestBase(
+          /*verifier_layout_sensitive=*/layout_sensitive,
+          /*allow_mixed_precision_in_hlo_verifier=*/allow_mixed_precision),
+      verifier_layout_sensitive_(layout_sensitive),
+      allow_mixed_precision_in_hlo_verifier_(allow_mixed_precision) {}
+
 HloModule& HloVerifiedTestBase::module() {
   if (!module_) {
-    module_ = HloTestBase::CreateNewModule();
+    module_ = CreateNewVerifiedModule(TestName());
   }
   return *module_;
 }
 
 HloModule* HloVerifiedTestBase::CreateNewModule(const string& name) {
-  modules_.emplace_back(HloTestBase::CreateNewModule());
+  modules_.emplace_back(CreateNewVerifiedModule(name));
   return modules_.back().get();
 }
 
 void HloVerifiedTestBase::ParseAndVerifyModule(absl::string_view hlo_text,
                                                const HloModuleConfig& config) {
   CHECK(!module_) << "Called ParseModule when test already has a module.";
-  TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text, config));
-  VerifyModule(module_.get());
+  module_ = CreateNewVerifiedModule(TestName());
+  TF_CHECK_OK(ParseHloString(hlo_text, module_.get()));
+  module_->VerifyOrAddFailure("after parsing");
 }
+
+StatusOr<std::unique_ptr<VerifiedHloModule>>
+HloVerifiedTestBase::ParseAndReturnVerifiedModule(
+    absl::string_view hlo_text, const HloModuleConfig& config) {
+  auto module = CreateNewVerifiedModule(TestName());
+  TF_RETURN_IF_ERROR(ParseHloString(hlo_text, module.get()));
+  TF_RETURN_IF_ERROR(module->Verify());
+  return std::move(module);
+}
+
+std::unique_ptr<VerifiedHloModule> HloVerifiedTestBase::CreateNewVerifiedModule(
+    const string& name) {
+  return absl::make_unique<VerifiedHloModule>(
+      name, GetModuleConfigForTest(), verifier_layout_sensitive_,
+      allow_mixed_precision_in_hlo_verifier_);
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h
index 8fbc4fa753..388a99bb36 100644
--- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h
@@ -20,53 +20,84 @@ limitations under the License.
 #include <memory>
 #include <utility>
 
+#include "absl/base/macros.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 
 namespace xla {
 
-// A base class for HLO tests that stores a default HloModule, and automatically
-// performs verification on that module on tear-down.
+// An HLO module derived class which verifies itself on destruction. This class
+// is intended to be used in unit tests. Any verification errors are raised via
+// ADD_FAILURE.
+class VerifiedHloModule : public HloModule {
+ public:
+  VerifiedHloModule(const string& name, const HloModuleConfig& config,
+                    bool verifier_layout_sensitive,
+                    bool allow_mixed_precision_in_hlo_verifier)
+      : HloModule(name, config),
+        verifier_(verifier_layout_sensitive,
+                  allow_mixed_precision_in_hlo_verifier) {}
+
+  ~VerifiedHloModule() override { VerifyOrAddFailure("in destructor"); }
+
+  // Verifies the module using HloVerifier and returns the status.
+  Status Verify();
+
+  // Verifies the module and flags any error with ADD_FAILURE. 'message' is
+  // included in the failure message.
+  void VerifyOrAddFailure(const string& message);
+
+ private:
+  HloVerifier verifier_;
+};
+
+// A base class for HLO tests that stores a default VerifiedHloModule.
 class HloVerifiedTestBase : public HloTestBase {
  protected:
-  explicit HloVerifiedTestBase(bool layout_sensitive = false,
-                               bool allow_mixed_precision = false);
-  ~HloVerifiedTestBase() override;
+  HloVerifiedTestBase(bool layout_sensitive = false,
+                      bool allow_mixed_precision = false);
 
   // Constructs a default shape verifier.
   std::unique_ptr<ShapeVerifier> MakeShapeVerifier();
 
-  // Performs verification on the default HloModule returned by module().
-  // Automatically called by the testing framework for each test.
-  //
-  // REQUIRED: subclasses that override TearDown() must call this explicitly.
-  void TearDown() override;
-
   // Returns the default HloModule, lazily creating it if necessary via
   // HloTestBase::CreateNewModule().
+  ABSL_DEPRECATED("Use CreateNewVerifiedModule() instead.")
   HloModule& module();
+
+  ABSL_DEPRECATED("Use ParseAndReturnVerifiedModule() instead.")
   void ParseAndVerifyModule(absl::string_view hlo_text,
                             const HloModuleConfig& config = HloModuleConfig());
 
+  // Parses the given string and returns module as a VerifiedHloModule.
+  StatusOr<std::unique_ptr<VerifiedHloModule>> ParseAndReturnVerifiedModule(
+      absl::string_view hlo_text,
+      const HloModuleConfig& config = HloModuleConfig());
+
   // Creates a new module for a test, and stores it in modules_ so it can be
   // verified. Intentionally hides HloTestBase::CreateNewModule, to prevent
   // creation of unverified modules.
+  ABSL_DEPRECATED("Use CreateNewVerifiedModule() instead.")
   HloModule* CreateNewModule(const string& name = TestName());
 
- private:
-  void VerifyModule(HloModule* module);
+  // Creates and returns a verified HLO module with the given name.
+  std::unique_ptr<VerifiedHloModule> CreateNewVerifiedModule(
+      const string& name = TestName());
 
+ private:
   // It is confusing to store modules created by module() and CreateNewModule()
   // in different fields, but it allows us to migrate tests to
   // HloVerifiedTestBase more easily, so it's a win because we can verify more
   // modules. See b/80488902.
   //
   // Lazily populated. Access via module().
-  std::unique_ptr<HloModule> module_;
+  std::unique_ptr<VerifiedHloModule> module_;
+
   // Populated by calls to CreateNewModule.
-  std::vector<std::unique_ptr<HloModule>> modules_;
+  std::vector<std::unique_ptr<VerifiedHloModule>> modules_;
 
-  bool tear_down_called_ = false;
+  bool verifier_layout_sensitive_;
+  bool allow_mixed_precision_in_hlo_verifier_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base_test.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base_test.cc
new file mode 100644
index 0000000000..5c0263e811
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base_test.cc
@@ -0,0 +1,158 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h"
+
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+// This class includes unit tests which are expected to fail because invalid HLO
+// modules are intentionally built. Unfortunately, Tensorflow doesn't appear to
+// include the necessary gunit parts to test this test machinery (needs the
+// macro EXPECT_NONFATAL_FAILURE). The disabled tests can be run with the
+// disabled tests enabled and failures can be manually compared against
+// expectations.
+class HloVerifiedTestBaseTest : public HloVerifiedTestBase {};
+
+XLA_TEST_F(HloVerifiedTestBaseTest, NoModule) {
+  // Test shouldn't fail if no module is created at all.
+}
+
+XLA_TEST_F(HloVerifiedTestBaseTest, GoodLazilyCreatedModule) {
+  // Use module() to lazily create an empty module, build it up, and verify no
+  // failures.
+  HloModule& hlo_module = module();
+  auto builder = HloComputation::Builder(TestName());
+  auto input = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(42.0)));
+  builder.AddInstruction(
+      HloInstruction::CreateUnary(input->shape(), HloOpcode::kNegate, input));
+  hlo_module.AddEntryComputation(builder.Build());
+}
+
+// This test is expected to fail. See test class comment.
+XLA_TEST_F(HloVerifiedTestBaseTest, DISABLED_BadLazilyCreatedModule) {
+  // Use module() to lazily create an empty module and build up an invalid
+  // module.
+  HloModule& hlo_module = module();
+  auto builder = HloComputation::Builder(TestName());
+  auto input = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(42.0)));
+  builder.AddInstruction(
+      HloInstruction::CreateUnary(input->shape(), HloOpcode::kNegate, input));
+  hlo_module.AddEntryComputation(builder.Build());
+
+  *hlo_module.entry_computation()->root_instruction()->mutable_shape() =
+      ShapeUtil::MakeShape(PRED, {1, 2, 3});
+}
+
+XLA_TEST_F(HloVerifiedTestBaseTest, GoodCreateNewModule) {
+  // Call CreateNewModule and build up a valid module.
+  HloModule* module = CreateNewModule();
+  auto builder = HloComputation::Builder(TestName());
+  auto input = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(42.0)));
+  builder.AddInstruction(
+      HloInstruction::CreateUnary(input->shape(), HloOpcode::kNegate, input));
+  module->AddEntryComputation(builder.Build());
+}
+
+// This test is expected to fail. See test class comment.
+XLA_TEST_F(HloVerifiedTestBaseTest, DISABLED_BadCreateNewModule) {
+  // Call CreateNewModule and build up a invalid module.
+  HloModule* module = CreateNewModule();
+  auto builder = HloComputation::Builder(TestName());
+  auto input = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(42.0)));
+  builder.AddInstruction(
+      HloInstruction::CreateUnary(input->shape(), HloOpcode::kNegate, input));
+  module->AddEntryComputation(builder.Build());
+
+  *module->entry_computation()->root_instruction()->mutable_shape() =
+      ShapeUtil::MakeShape(PRED, {1, 2, 3});
+}
+
+XLA_TEST_F(HloVerifiedTestBaseTest, ParseAndVerifyModuleGood) {
+  const char* const hlo_string = R"(
+HloModule ParseAndVerifyModuleGood
+
+ENTRY entry {
+  x = f32[] parameter(0)
+  y = f32[] parameter(1)
+  ROOT add = f32[] add(x,y)
+}
+)";
+
+  ParseAndVerifyModule(hlo_string);
+  EXPECT_EQ(module().entry_computation()->instruction_count(), 3);
+}
+
+XLA_TEST_F(HloVerifiedTestBaseTest, ParseAndReturnVerifiedModuleGood) {
+  const char* const hlo_string = R"(
+HloModule ParseAndReturnVerifiedModuleGood
+
+ENTRY entry {
+  x = f32[] parameter(0)
+  y = f32[] parameter(1)
+  ROOT add = f32[] add(x,y)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+  EXPECT_EQ(module->entry_computation()->instruction_count(), 3);
+}
+
+XLA_TEST_F(HloVerifiedTestBaseTest, ParseAndReturnVerifiedModuleInvalidText) {
+  const char* const hlo_string = R"(
+HloModule ParseAndReturnVerifiedModuleGood
+
+ENTRY entry {
+  x = f32[] parameter(0)
+  y = f32[] parameter(1)
+  ROOT add = f32[] add(x,y)
+}
+
+RANDOM GARBAGE
+)";
+
+  ASSERT_IS_NOT_OK(ParseAndReturnVerifiedModule(hlo_string).status());
+}
+
+// This test is expected to fail. See test class comment.
+XLA_TEST_F(HloVerifiedTestBaseTest, DISABLED_ParseAndReturnVerifiedModuleBad) {
+  const char* const hlo_string = R"(
+HloModule ParseAndReturnVerifiedModuleBad
+
+ENTRY entry {
+  x = f32[] parameter(0)
+  y = f32[] parameter(1)
+  ROOT add = f32[1234] add(x,y)
+}
+)";
+
+  ASSERT_IS_NOT_OK(ParseAndReturnVerifiedModule(hlo_string).status());
+}
+
+}  // namespace
+}  // namespace xla
-- 
GitLab


From 5d2047029a77545c97c0fdf74d9c03c92d1dcb88 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 17:28:57 -0700
Subject: [PATCH 0415/1357] Allow setting a global override for the
 "allow_growth" GPU option via the TF_FORCE_GPU_ALLOW_GROWTH environment
 variable.

PiperOrigin-RevId: 213728460
---
 .../core/common_runtime/bfc_allocator.h       |  2 +-
 .../common_runtime/gpu/gpu_bfc_allocator.cc   | 37 +++++++++++-
 .../common_runtime/gpu/gpu_bfc_allocator.h    |  3 +
 .../gpu/gpu_bfc_allocator_test.cc             | 56 +++++++++++++++++++
 4 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index 364071e066..2d74bf2b28 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h
@@ -304,7 +304,7 @@ class BFCAllocator : public Allocator {
   };
 
   // Returns 'bytes' rounded up to the next highest kMinAllocationSize.
-  size_t RoundedBytes(size_t bytes);
+  static size_t RoundedBytes(size_t bytes);
 
   // Try to add a new memory region that can satisfy an allocation of
   // 'rounded_bytes' bytes.  Returns true on success and false on
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 44ffce77a1..42021e51f3 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -22,6 +22,39 @@ limitations under the License.
 
 namespace tensorflow {
 
+bool GPUBFCAllocator::GetAllowGrowthValue(const GPUOptions& gpu_options) {
+  const char* force_allow_growth_string =
+      std::getenv("TF_FORCE_GPU_ALLOW_GROWTH");
+  if (force_allow_growth_string == nullptr) {
+    return gpu_options.allow_growth();
+  }
+
+  if (strcmp("false", force_allow_growth_string) == 0) {
+    if (gpu_options.allow_growth()) {
+      LOG(WARNING)
+          << "Overriding allow_growth setting because the"
+          << " TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original"
+          << " config value was " << gpu_options.allow_growth() << ".";
+    }
+    return false;
+  } else if (strcmp("true", force_allow_growth_string) == 0) {
+    if (!gpu_options.allow_growth()) {
+      LOG(WARNING)
+          << "Overriding allow_growth setting because the"
+          << " TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original"
+          << " config value was " << gpu_options.allow_growth() << ".";
+    }
+    return true;
+  }
+
+  LOG(ERROR)
+      << "The TF_FORCE_GPU_ALLOW_GROWTH environment variable is set but could"
+      << " not be parsed: \"" << force_allow_growth_string << "\". Valid"
+      << " values are \"true\" or \"false\". Using original config value"
+      << " of " << gpu_options.allow_growth() << ".";
+  return gpu_options.allow_growth();
+}
+
 GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
                                  size_t total_memory, const string& name)
     : GPUBFCAllocator(sub_allocator, total_memory, GPUOptions(), name) {}
@@ -30,7 +63,7 @@ GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator,
                                  size_t total_memory,
                                  const GPUOptions& gpu_options,
                                  const string& name)
-    : BFCAllocator(sub_allocator, total_memory, gpu_options.allow_growth(),
-                   name) {}
+    : BFCAllocator(sub_allocator, total_memory,
+                   GPUBFCAllocator::GetAllowGrowthValue(gpu_options), name) {}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index 3470f7a9f7..d4c9cee89a 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -93,6 +93,9 @@ class GPUBFCAllocator : public BFCAllocator {
   ~GPUBFCAllocator() override {}
 
   TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
+
+ private:
+  static bool GetAllowGrowthValue(const GPUOptions& gpu_options);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index e313135d8d..60e82ed13b 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -410,6 +410,8 @@ BENCHMARK(BM_AllocationDelayed)->Arg(1)->Arg(10)->Arg(100)->Arg(1000);
 
 class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
  protected:
+  void SetUp() override { CHECK_EQ(unsetenv("TF_FORCE_GPU_ALLOW_GROWTH"), 0); }
+
   // The following test methods are called from tests. The reason for this is
   // that this class is a friend class to BFCAllocator, but tests are not, so
   // only methods inside this class can access private members of BFCAllocator.
@@ -510,6 +512,56 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
     EXPECT_EQ(10, a.Log2FloorNonZeroSlow(1024));
     EXPECT_EQ(10, a.Log2FloorNonZeroSlow(1025));
   }
+
+  void TestForceAllowGrowth() {
+    PlatformGpuId platform_gpu_id(0);
+    GPUOptions options;
+    // Unset flag value uses provided option.
+    unsetenv("TF_FORCE_GPU_ALLOW_GROWTH");
+    options.set_allow_growth(true);
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
+        platform_gpu_id, false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator unset_flag_allocator(sub_allocator, 1LL << 31, options,
+                                         "GPU_0_bfc");
+    EXPECT_EQ(GPUBFCAllocator::RoundedBytes(size_t{1048576}),
+              unset_flag_allocator.curr_region_allocation_bytes_);
+
+    // Unparseable flag value uses provided option.
+    setenv("TF_FORCE_GPU_ALLOW_GROWTH", "unparseable", 1);
+    options.set_allow_growth(true);
+    sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
+        platform_gpu_id, false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator unparsable_flag_allocator(sub_allocator, 1LL << 31, options,
+                                              "GPU_1_bfc");
+    EXPECT_EQ(GPUBFCAllocator::RoundedBytes(size_t{1048576}),
+              unparsable_flag_allocator.curr_region_allocation_bytes_);
+
+    // Max of 2GiB total memory. Env variable set forces allow_growth, which
+    // does an initial allocation of 1MiB.
+    setenv("TF_FORCE_GPU_ALLOW_GROWTH", "true", 1);
+    options.set_allow_growth(false);
+    sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
+        platform_gpu_id, false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator force_allow_growth_allocator(sub_allocator, 1LL << 31,
+                                                 options, "GPU_2_bfc");
+    EXPECT_EQ(GPUBFCAllocator::RoundedBytes(size_t{1048576}),
+              force_allow_growth_allocator.curr_region_allocation_bytes_);
+
+    // If env variable forces allow_growth disabled, all available memory is
+    // allocated.
+    setenv("TF_FORCE_GPU_ALLOW_GROWTH", "false", 1);
+    options.set_allow_growth(true);
+    sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
+        platform_gpu_id, false /*use_unified_memory*/, {}, {});
+    GPUBFCAllocator force_no_allow_growth_allocator(sub_allocator, 1LL << 31,
+                                                    options, "GPU_3_bfc");
+    EXPECT_EQ(GPUBFCAllocator::RoundedBytes(1LL << 31),
+              force_no_allow_growth_allocator.curr_region_allocation_bytes_);
+  }
 };
 
 TEST_F(GPUBFCAllocatorPrivateMethodsTest, BinDebugInfo) { TestBinDebugInfo(); }
@@ -518,6 +570,10 @@ TEST_F(GPUBFCAllocatorPrivateMethodsTest, Log2FloorNonZeroSlow) {
   TestLog2FloorNonZeroSlow();
 }
 
+TEST_F(GPUBFCAllocatorPrivateMethodsTest, ForceAllowGrowth) {
+  TestForceAllowGrowth();
+}
+
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA
-- 
GitLab


From 0ab89a599bdb9885532785a5e7b6bfe346e09ee3 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Wed, 19 Sep 2018 17:40:09 -0700
Subject: [PATCH 0416/1357] TOCO transformations updated to support dilated
 depthwise convolution.

PiperOrigin-RevId: 213729750
---
 .../contrib/lite/toco/export_tensorflow.cc    |  11 ++
 .../graph_transformations.h                   |  12 +-
 .../identify_dilated_conv.cc                  | 117 +++++++++++-------
 .../propagate_fixed_sizes.cc                  |   3 +-
 .../contrib/lite/toco/import_tensorflow.cc    |  17 +++
 tensorflow/contrib/lite/toco/toco_tooling.cc  |   9 +-
 6 files changed, 118 insertions(+), 51 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index 3a534300ae..3d1eb3978c 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -470,6 +470,17 @@ void ConvertDepthwiseConvOperator(const Model& model,
   strides.mutable_list()->add_i(src_op.stride_height);
   strides.mutable_list()->add_i(src_op.stride_width);
   strides.mutable_list()->add_i(1);
+  // TODO(b/): To return a working TF GraphDef, we should be returning the
+  // correct SpaceToBatchNd and BatchToSpaceND operation before and after the
+  // conv since TF doesn't support dilations.
+  if ((src_op.dilation_width_factor != 1) ||
+      (src_op.dilation_height_factor != 1)) {
+    auto& dilations = (*dc2d_op->mutable_attr())["dilations"];
+    dilations.mutable_list()->add_i(1);
+    dilations.mutable_list()->add_i(src_op.dilation_height_factor);
+    dilations.mutable_list()->add_i(src_op.dilation_width_factor);
+    dilations.mutable_list()->add_i(1);
+  }
   string padding;
   if (src_op.padding.type == PaddingType::kSame) {
     padding = "SAME";
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
index fdd0632451..4d213b3f9c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
@@ -133,7 +133,6 @@ DECLARE_GRAPH_TRANSFORMATION(MergeLstmCellInputs)
 DECLARE_GRAPH_TRANSFORMATION(MergeReshapeIntoPrecedingTranspose)
 DECLARE_GRAPH_TRANSFORMATION(IdentifyRelu1)
 DECLARE_GRAPH_TRANSFORMATION(IdentifyPRelu)
-DECLARE_GRAPH_TRANSFORMATION(IdentifyDilatedConv)
 DECLARE_GRAPH_TRANSFORMATION(MakeInitialDequantizeOperator)
 DECLARE_GRAPH_TRANSFORMATION(MoveBinaryOperatorBeforeReshape)
 DECLARE_GRAPH_TRANSFORMATION(PropagateActivationFunctionIntoConstants)
@@ -266,6 +265,17 @@ class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation {
   bool has_default_ranges_flag_ = false;
 };
 
+class IdentifyDilatedConv : public GraphTransformation {
+ public:
+  bool Run(Model* model, std::size_t op_index) override;
+  const char* Name() const override { return "IdentifyDilatedConv"; }
+  bool identify_depthwise_conv() const { return identify_depthwise_conv_; }
+  void set_identify_depthwise_conv(bool val) { identify_depthwise_conv_ = val; }
+
+ private:
+  bool identify_depthwise_conv_ = true;
+};
+
 #undef DECLARE_GRAPH_TRANSFORMATION
 
 }  // end namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
index d49857cfc2..aac77eb39e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
@@ -53,50 +53,11 @@ namespace toco {
 // thrown in just for the extra headache. Padding adapts non-conforming input
 // sizes, and can be discarded. The bias is necessary, so is kept.
 
-bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
-  const auto it = model->operators.begin() + op_index;
-  auto* stb_op = it->get();
-
-  // 1. IDENTIFY OPERATORS
-  // ***************************************************************************
-  // SpaceToBatch Op.
-  if (stb_op->type != OperatorType::kSpaceToBatchND) {
-    return false;
-  }
-  if (stb_op->inputs.size() != 3) {
-    return false;
-  }
-  CHECK_EQ(stb_op->outputs.size(), 1);
-  // Extract the dilation factor from Input[1] of SpaceToBatch
-  // TODO(mjmatthews): Support 2D dilation factors.
-  const auto& block_shape_array = model->GetArray(stb_op->inputs[1]);
-  if (!block_shape_array.buffer) {
-    return false;
-  }
-  CHECK_EQ(block_shape_array.shape().dimensions_count(), 1);
-  int dilation_factor =
-      block_shape_array.Array::GetBuffer<ArrayDataType::kInt32>().data[0];
-
-  // Expand Op
-  auto* post_stb_op = GetOpWithInput(*model, stb_op->outputs[0]);
-  if (!post_stb_op) {
-    return false;
-  }
-  bool has_expand_op = false;
-  if (post_stb_op->type == OperatorType::kExpandDims) {
-    has_expand_op = true;
-    CHECK_EQ(post_stb_op->inputs.size(), 2);
-    CHECK_EQ(post_stb_op->outputs.size(), 1);
-  }
-
-  // Conv Op
-  const string& input_of_conv_op =
-      has_expand_op ? post_stb_op->outputs[0] : stb_op->outputs[0];
-  auto* conv_base_op = GetOpWithInput(*model, input_of_conv_op);
-  if (conv_base_op->type != OperatorType::kConv) {
-    return false;
-  }
-  auto* conv_op = static_cast<ConvOperator*>(conv_base_op);
+template <typename T>
+bool ResolveDilatedConv(Model* model, Operator* conv_base_op, Operator* stb_op,
+                        Operator* post_stb_op, bool has_expand_op,
+                        int dilation_factor) {
+  auto* conv_op = static_cast<T*>(conv_base_op);
   if (conv_op->inputs.size() != 2) {
     // The conv op must only have weights, no bias.
     return false;
@@ -158,8 +119,6 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(bias_add_op->inputs.size(), 2);
   CHECK_EQ(bias_add_op->outputs.size(), 1);
 
-  LOG(INFO) << "Identified sub-network emulating dilated convolution.";
-
   // 2. RE-WIRE OPERATORS
   // ***************************************************************************
   // Re-use the existing Conv2D op.
@@ -206,9 +165,71 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
   DeleteArrayIfUnused(stb_op_inputs[1], model);
   DeleteArrayIfUnused(stb_op_inputs[2], model);
 
-  LOG(INFO) << "Replaced with Dilated Conv2D op outputting \""
-            << conv_op->outputs[0] << "\".";
   return true;
 }
 
+bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
+  const auto it = model->operators.begin() + op_index;
+  auto* stb_op = it->get();
+
+  // 1. IDENTIFY OPERATORS
+  // ***************************************************************************
+  // SpaceToBatch Op.
+  if (stb_op->type != OperatorType::kSpaceToBatchND) {
+    return false;
+  }
+  if (stb_op->inputs.size() != 3) {
+    return false;
+  }
+  CHECK_EQ(stb_op->outputs.size(), 1);
+  // Extract the dilation factor from Input[1] of SpaceToBatch
+  // TODO(mjmatthews): Support 2D dilation factors.
+  const auto& block_shape_array = model->GetArray(stb_op->inputs[1]);
+  if (!block_shape_array.buffer) {
+    return false;
+  }
+  CHECK_EQ(block_shape_array.shape().dimensions_count(), 1);
+  int dilation_factor =
+      block_shape_array.Array::GetBuffer<ArrayDataType::kInt32>().data[0];
+
+  // Expand Op
+  auto* post_stb_op = GetOpWithInput(*model, stb_op->outputs[0]);
+  if (!post_stb_op) {
+    return false;
+  }
+  bool has_expand_op = false;
+  if (post_stb_op->type == OperatorType::kExpandDims) {
+    has_expand_op = true;
+    CHECK_EQ(post_stb_op->inputs.size(), 2);
+    CHECK_EQ(post_stb_op->outputs.size(), 1);
+  }
+
+  // Conv Op
+  const string& input_of_conv_op =
+      has_expand_op ? post_stb_op->outputs[0] : stb_op->outputs[0];
+  auto* conv_base_op = GetOpWithInput(*model, input_of_conv_op);
+  bool changed = false;
+  if (conv_base_op->type == OperatorType::kConv) {
+    changed = ResolveDilatedConv<ConvOperator>(model, conv_base_op, stb_op,
+                                               post_stb_op, has_expand_op,
+                                               dilation_factor);
+    if (changed) {
+      LOG(INFO) << "Replaced sub-network with Dilated Conv2D op outputting \""
+                << conv_base_op->outputs[0] << "\".";
+    }
+  } else if (identify_depthwise_conv_ &&
+             conv_base_op->type == OperatorType::kDepthwiseConv) {
+    changed = ResolveDilatedConv<DepthwiseConvOperator>(
+        model, conv_base_op, stb_op, post_stb_op, has_expand_op,
+        dilation_factor);
+    if (changed) {
+      LOG(INFO)
+          << "Replaced sub-netork with Dilated DepthwiseConv2D op outputting \""
+          << conv_base_op->outputs[0] << "\".";
+    }
+  }
+
+  return changed;
+}
+
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 6c72e20121..f943da6d85 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -285,7 +285,8 @@ void ProcessDepthwiseConvOperator(Model* model, DepthwiseConvOperator* op) {
   const int kheight = weights_shape.dims(1);
   const int kwidth = weights_shape.dims(2);
   ComputeConvSizes(input_shape, output_depth, kwidth, kheight, op->stride_width,
-                   op->stride_height, 1, 1, op->padding.type,
+                   op->stride_height, op->dilation_width_factor,
+                   op->dilation_height_factor, op->padding.type,
                    model->GetArray(output_name).mutable_shape(),
                    &op->padding.GetOrCreateFixedPadding());
 }
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 4c678e7e73..e02d000e7e 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -641,6 +641,23 @@ tensorflow::Status ConvertDepthwiseConvOperator(
   CHECK_EQ(strides.i(3), 1);
   conv->stride_height = strides.i(1);
   conv->stride_width = strides.i(2);
+  if (HasAttr(node, "dilations")) {
+    const auto& dilations = GetListAttr(node, "dilations");
+    TF_RETURN_IF_ERROR(
+        ExpectValue(dilations.i_size(), 4, "number of dilations"));
+    if (dilations.i(0) != 1 || dilations.i(3) != 1) {
+      return tensorflow::errors::InvalidArgument(absl::StrCat(
+          "Can only import Conv ops with dilation along the height "
+          "(1st) or width (2nd) axis. TensorFlow op \"",
+          node.name(), "\" had dilations:[ ", dilations.i(0), ", ",
+          dilations.i(1), ", ", dilations.i(2), ", ", dilations.i(3), "]."));
+    }
+    conv->dilation_height_factor = dilations.i(1);
+    conv->dilation_width_factor = dilations.i(2);
+  } else {
+    conv->dilation_height_factor = 1;
+    conv->dilation_width_factor = 1;
+  }
   const auto& padding = GetStringAttr(node, "padding");
   if (padding == "SAME") {
     conv->padding.type = PaddingType::kSame;
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index 28d31e3797..a08b02485f 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -101,7 +101,6 @@ void MakeGeneralGraphTransformationsSet(
   transformations->Add(new ResolveTensorFlowSwitch);
   transformations->Add(new ResolveTensorFlowConcat);
   transformations->Add(new ResolveMultiplyByZero);
-  transformations->Add(new IdentifyDilatedConv);
   transformations->Add(new IdentifyL2Normalization);
   transformations->Add(new IdentifyL2Pool);
   transformations->Add(new IdentifyRelu1);
@@ -282,6 +281,14 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
     }
   }
   transformations.Add(new ResolveConstantConcatenation);
+  // TODO(b/116063589): TF GraphDef doesn't support dilations on its depthwise
+  // conv, so we need to make sure we don't convert to dilated depthwise conv
+  // when outputing to TF GraphDef.
+  auto* identify_dilated_conv = new IdentifyDilatedConv;
+  if (output_format == TENSORFLOW_GRAPHDEF) {
+    identify_dilated_conv->set_identify_depthwise_conv(false);
+  }
+  transformations.Add(identify_dilated_conv);
   RunGraphTransformations(model, "general graph transformations",
                           transformations);
 
-- 
GitLab


From 37632e5f0f8b0776854746152a3a7b361414f2cc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 17:42:13 -0700
Subject: [PATCH 0417/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 213729979
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 61 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 61 +++++++++++++++++++
 2 files changed, 122 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index e59958749c..2360432d96 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -38879,6 +38879,30 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "PrintV2"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  attr {
+    name: "output_stream"
+    type: "string"
+    default_value {
+      s: "stderr"
+    }
+    allowed_values {
+      list {
+        s: "stdout"
+        s: "stderr"
+        s: "log(info)"
+        s: "log(warning)"
+        s: "log(error)"
+      }
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "PriorityQueue"
   output_arg {
@@ -70187,6 +70211,43 @@ op {
     }
   }
 }
+op {
+  name: "StringFormat"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "template"
+    type: "string"
+    default_value {
+      s: "%s"
+    }
+  }
+  attr {
+    name: "placeholder"
+    type: "string"
+    default_value {
+      s: "%s"
+    }
+  }
+  attr {
+    name: "summarize"
+    type: "int"
+    default_value {
+      i: 3
+    }
+  }
+}
 op {
   name: "StringJoin"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 4ece1c8953..29e327753b 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -19520,6 +19520,30 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "PrintV2"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  attr {
+    name: "output_stream"
+    type: "string"
+    default_value {
+      s: "stderr"
+    }
+    allowed_values {
+      list {
+        s: "stdout"
+        s: "stderr"
+        s: "log(info)"
+        s: "log(warning)"
+        s: "log(error)"
+      }
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "PriorityQueue"
   output_arg {
@@ -32734,6 +32758,43 @@ op {
     }
   }
 }
+op {
+  name: "StringFormat"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "template"
+    type: "string"
+    default_value {
+      s: "%s"
+    }
+  }
+  attr {
+    name: "placeholder"
+    type: "string"
+    default_value {
+      s: "%s"
+    }
+  }
+  attr {
+    name: "summarize"
+    type: "int"
+    default_value {
+      i: 3
+    }
+  }
+}
 op {
   name: "StringJoin"
   input_arg {
-- 
GitLab


From 415455b0ef2d65504ab8c9084a6daa2899521212 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Wed, 19 Sep 2018 17:48:07 -0700
Subject: [PATCH 0418/1357] Fix the error message thrown when running eval on
 pod

PiperOrigin-RevId: 213730668
---
 tensorflow/contrib/tpu/python/tpu/tpu_context.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index 19359cb612..ac76712aeb 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -599,8 +599,8 @@ class _InternalTPUContext(object):
             .format(self._eval_batch_size, num_replicas))
       if num_hosts > 1 and not self.is_input_broadcast_with_iterators():
         raise ValueError(
-            'TPUEstimator.evaluate should be running on single TPU worker. '
-            'got {}.'.format(num_hosts))
+            'TPUEstimator.evaluate should be running on single TPU'
+            ' instead of a Pod.')
     else:
       assert mode == model_fn_lib.ModeKeys.PREDICT
       if self._predict_batch_size is None:
-- 
GitLab


From 9f05ca4ec89d9b03f740f881ae50d97d76a1b849 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Wed, 19 Sep 2018 18:27:52 -0700
Subject: [PATCH 0419/1357] Copy Tensor._handle_data from external_capture to
 placeholder for Variant tensors in Graph mode defun. This allows inferring
 the shape of values popped from TensorLists inside defuns. Remove "Resource"
 from {Set|Get}ResourceHandleShapeAndType since the same functions are
 re-usable for variants. Eager mode fix coming in a future changelist.

PiperOrigin-RevId: 213735462
---
 tensorflow/c/python_api.cc                    |  7 ++-
 tensorflow/c/python_api.h                     | 13 ++---
 tensorflow/python/client/tf_session.i         |  4 +-
 tensorflow/python/eager/BUILD                 |  1 +
 tensorflow/python/eager/function.py           | 43 ++++++++++++-----
 tensorflow/python/eager/function_test.py      | 47 +++++++++++++++++--
 tensorflow/python/framework/function.py       |  9 ++--
 tensorflow/python/framework/ops.py            |  4 +-
 .../python/ops/resource_variable_ops.py       |  2 +-
 9 files changed, 94 insertions(+), 36 deletions(-)

diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index 8486b585c8..247236b760 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -110,7 +110,7 @@ void ExtendSession(TF_Session* session, TF_Status* status) {
   session->extend_before_run = false;
 }
 
-std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
+std::string GetHandleShapeAndType(TF_Graph* graph, TF_Output output) {
   Node* node = &output.oper->node;
   CppShapeInferenceResult::HandleData handle_data;
   handle_data.set_is_set(true);
@@ -135,9 +135,8 @@ std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
   return result;
 }
 
-void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
-                                   const void* proto, size_t proto_len,
-                                   TF_Status* status) {
+void SetHandleShapeAndType(TF_Graph* graph, TF_Output output, const void* proto,
+                           size_t proto_len, TF_Status* status) {
   tensorflow::CppShapeInferenceResult::HandleData handle_data;
   if (!handle_data.ParseFromArray(proto, proto_len)) {
     status->status = tensorflow::errors::InvalidArgument(
diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h
index 4bcb5bde62..5cce84020b 100644
--- a/tensorflow/c/python_api.h
+++ b/tensorflow/c/python_api.h
@@ -54,16 +54,17 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require);
 void ExtendSession(TF_Session* session, TF_Status* status);
 
 // Returns the serialized CppShapeInferenceResult::HandleData proto for
-// `output` if its a resource tensor, or otherwise returns the empty string.
-std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output);
+// `output` if its a resource or variant tensor, or otherwise returns the empty
+// string.
+std::string GetHandleShapeAndType(TF_Graph* graph, TF_Output output);
 
 // Sets `output` based on `proto`, which should be a serialized
-// CppShapeInferenceResult::HandleData proto.
+// CppShapeInferenceResult::HandleData proto. `output` should be a resource
+// or variant tensor.
 // NOTE(skyewm): `proto` is passed a void*/size_t pair instead of a std::string
 // because I couldn't get SWIG to work otherwise.
-void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
-                                   const void* proto, size_t proto_len,
-                                   TF_Status* status);
+void SetHandleShapeAndType(TF_Graph* graph, TF_Output output, const void* proto,
+                           size_t proto_len, TF_Status* status);
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_C_PYTHON_API_H_
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 39a2922ac0..ef7527d887 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -463,7 +463,7 @@ TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper{
 }
 
 // Override default py3 behavior of attempting to encode into Unicode.
-%typemap(out) std::string tensorflow::GetResourceHandleShapeAndType {
+%typemap(out) std::string tensorflow::GetHandleShapeAndType {
   $result = PyBytes_FromStringAndSize($1.data(), $1.size());
 }
 
@@ -782,7 +782,7 @@ def TF_Reset(target, containers=None, config=None):
 %unignore TF_TryEvaluateConstant_wrapper;
 %noexception TF_TryEvaluateConstant_wrapper;
 %unignore ExtendSession;
-%unignore ResourceHandleShapeAndType;
+%unignore HandleShapeAndType;
 
 %include "tensorflow/python/client/tf_session_helper.h"
 
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index f80256fc2a..a2686c68a9 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -147,6 +147,7 @@ cuda_py_test(
         "//tensorflow/python:clip_ops",
         "//tensorflow/python:init_ops",
         "//tensorflow/python:layers",
+        "//tensorflow/python:list_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:resource_variable_ops",
     ],
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index a68c6ab3b4..bcb1881264 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -73,16 +73,36 @@ def _create_substitute_placeholder(value, name=None, dtype=None):
   with ops.control_dependencies(None):
     placeholder = graph_placeholder(
         dtype=dtype or value.dtype, shape=value.shape, name=name)
-  if placeholder.dtype == dtypes_module.resource:
-    if isinstance(value, ops.EagerTensor):
-      handle_data = value._handle_data  # pylint: disable=protected-access
+  _copy_handle_data(value, placeholder)
+  return placeholder
+
+
+def _copy_handle_data(source_t, target_t):
+  """Copies HandleData for variant and resource type tensors if available.
+
+  The CppShapeInferenceResult::HandleData proto contains information about the
+  shapes and types of the element tensors of resource/variant type tensors.
+  We need to copy this across function boundaries, i.e., when capturing a
+  placeholder or when returning a function tensor as output. If we don't do this
+  the element tensors will have unknown shapes, e.g., if a TensorList variant
+  tensor is captured as a placeholder, elements popped from that list would have
+  unknown shape.
+
+  Args:
+    source_t: The tensor to copy HandleData from.
+    target_t: The tensor to copy HandleData to.
+  """
+  if (target_t.dtype == dtypes_module.resource or
+      target_t.dtype == dtypes_module.variant):
+    if isinstance(source_t, ops.EagerTensor):
+      handle_data = source_t._handle_data  # pylint: disable=protected-access
     else:
-      handle_data = resource_variable_ops.get_resource_handle_data(value)
+      handle_data = resource_variable_ops.get_resource_handle_data(source_t)
     if handle_data is not None and handle_data.is_set:
       # pylint: disable=protected-access
-      pywrap_tensorflow.SetResourceHandleShapeAndType(
-          placeholder.graph._c_graph, placeholder._as_tf_output(),
-          handle_data.SerializeToString())
+      pywrap_tensorflow.SetHandleShapeAndType(target_t.graph._c_graph,
+                                              target_t._as_tf_output(),
+                                              handle_data.SerializeToString())
       # pylint: enable=protected-access
       # Ensure that shapes and dtypes are propagated.
       shapes, types = zip(*[(pair.shape, pair.dtype)
@@ -91,12 +111,10 @@ def _create_substitute_placeholder(value, name=None, dtype=None):
       shapes = [[d.size for d in s.dim]
                 if not s.unknown_rank else None for s in shapes]
       pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper(
-          placeholder._op._graph._c_graph,  # pylint: disable=protected-access
-          placeholder._as_tf_output(),  # pylint: disable=protected-access
+          target_t._op._graph._c_graph,  # pylint: disable=protected-access
+          target_t._as_tf_output(),  # pylint: disable=protected-access
           shapes, ranks, types)
 
-  return placeholder
-
 
 def _get_device_functions(ctx, graph):
   """Returns a tuple of device functions representing the device stack."""
@@ -435,6 +453,7 @@ class _EagerDefinedFunction(object):
     self._num_outputs = len(self.signature.output_arg)
     self._output_types = [o.type for o in self.signature.output_arg]
     self._output_shapes = [o.shape for o in outputs]
+    self._func_graph_outputs = outputs
     self.grad_func_name = None
     self.python_grad_func = None
     self._c_func = c_api_util.ScopedTFFunction(fn)
@@ -511,6 +530,8 @@ class _EagerDefinedFunction(object):
     else:
       for i, shape in enumerate(self._output_shapes):
         outputs[i].set_shape(shape)
+      for i, func_graph_output in enumerate(self._func_graph_outputs):
+        _copy_handle_data(func_graph_output, outputs[i])
       return outputs
 
 
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 4a1bde3f5e..e4513cc87c 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -48,6 +48,7 @@ from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import list_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -438,10 +439,17 @@ class FunctionTest(test.TestCase):
     def f():
       x = constant_op.constant([[1, 2], [3, 4]])
       out = math_ops.matmul(v, x)
-      self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
+      self.assertEqual(out.shape, tensor_shape.TensorShape([2, 2]))
+      # We do not return v directly since the tensor conversion function of
+      # ResourceVariable returns the read value and not the resource itself.
+      return v._handle
 
     compiled = function.defun(f)
-    compiled()
+    var_handle = compiled()
+    self.assertEqual(var_handle.dtype, dtypes.resource)
+    self.assertEqual(var_handle.shape, tensor_shape.scalar())
+    var_t = resource_variable_ops.read_variable_op(var_handle, dtype=v.dtype)
+    self.assertEqual(var_t.shape, tensor_shape.TensorShape([2, 2]))
 
   def testVariableInLoopInFunction(self):
 
@@ -465,10 +473,17 @@ class FunctionTest(test.TestCase):
       def f():
         x = constant_op.constant([[1, 2], [3, 4]])
         out = math_ops.matmul(v, x)
-        self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
+        self.assertEqual(out.shape, tensor_shape.TensorShape([2, 2]))
+        # We do not return v directly since the tensor conversion function of
+        # ResourceVariable returns the read value and not the resource itself.
+        return v._handle
 
       compiled = function.defun(f)
-      compiled()
+      var_handle = compiled()
+      self.assertEqual(var_handle.dtype, dtypes.resource)
+      self.assertEqual(var_handle.shape, tensor_shape.scalar())
+      var_t = resource_variable_ops.read_variable_op(var_handle, dtype=v.dtype)
+      self.assertEqual(var_t.shape, tensor_shape.TensorShape([2, 2]))
 
   def testDefunShapeInferenceWithCapturedVariableInGraphMode(self):
     with context.graph_mode():
@@ -477,12 +492,34 @@ class FunctionTest(test.TestCase):
       def f():
         x = constant_op.constant([[1, 2], [3, 4]])
         out = math_ops.matmul(v, x)
-        self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
+        self.assertEqual(out.shape, tensor_shape.TensorShape([2, 2]))
 
       # Check that shape inference works while creating the defun
       compiled = function.defun(f)
       compiled()
 
+  def testDefunShapeInferenceWithCapturedTensorListInGraphMode(self):
+    with context.graph_mode():
+      tensor_list = list_ops.empty_tensor_list(
+          element_dtype=dtypes.float32,
+          element_shape=ops.convert_to_tensor([], dtype=dtypes.int32))
+      tensor_list = list_ops.tensor_list_push_back(tensor_list,
+                                                   constant_op.constant(1.0))
+      tensor_list = list_ops.tensor_list_push_back(tensor_list,
+                                                   constant_op.constant(2.0))
+
+      def f():
+        tl, value = list_ops.tensor_list_pop_back(
+            tensor_list, element_dtype=dtypes.float32)
+        self.assertEqual(value.shape, tensor_shape.scalar())
+        return tl
+
+      compiled = function.defun(f)
+      output_tensor_list = compiled()
+      _, value = list_ops.tensor_list_pop_back(
+          output_tensor_list, element_dtype=dtypes.float32)
+      self.assertEqual(value.shape, tensor_shape.scalar())
+
   @test_util.run_in_graph_and_eager_modes
   def testDefunForcesResourceVariables(self):
 
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index a8aef3a009..68b3170dfe 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -762,13 +762,12 @@ class _FuncGraph(ops.Graph):
         if handle_data:
           handle_data = handle_data.SerializeToString()
       else:
-        handle_data = c_api.GetResourceHandleShapeAndType(
-            tensor.graph._c_graph, tensor._as_tf_output())
+        handle_data = c_api.GetHandleShapeAndType(tensor.graph._c_graph,
+                                                  tensor._as_tf_output())
 
       if handle_data:
-        c_api.SetResourceHandleShapeAndType(ph.graph._c_graph,
-                                            ph._as_tf_output(),
-                                            compat.as_bytes(handle_data))
+        c_api.SetHandleShapeAndType(ph.graph._c_graph, ph._as_tf_output(),
+                                    compat.as_bytes(handle_data))
     else:
       ph._handle_data = tensor._handle_data
     # pylint: enable=protected-access
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 343f52fe8f..8bb177939e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -2532,8 +2532,8 @@ def _set_shape_and_handle_data_for_outputs_c_api(op):
     output._shape_val = output._c_api_shape()
     # Set the resource handle data for compatibility with the Python shape
     # inference code.
-    serialized = c_api.GetResourceHandleShapeAndType(op._graph._c_graph,
-                                                     output._as_tf_output())
+    serialized = c_api.GetHandleShapeAndType(op._graph._c_graph,  # pylint: disable=protected-access
+                                             output._as_tf_output())
     if serialized:
       output._handle_data = (
           cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 55c2eb5fa4..9e477ab8af 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -48,7 +48,7 @@ def get_resource_handle_data(graph_op):
   assert ops._USE_C_SHAPES  # pylint: disable=protected-access
   assert type(graph_op) == ops.Tensor  # pylint: disable=unidiomatic-typecheck
 
-  handle_data = pywrap_tensorflow.GetResourceHandleShapeAndType(
+  handle_data = pywrap_tensorflow.GetHandleShapeAndType(
       graph_op.graph._c_graph, graph_op._as_tf_output())  # pylint: disable=protected-access
 
   return cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData.FromString(
-- 
GitLab


From 1f8788143d6da8fe707f78f0660b2ebb11b87280 Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Wed, 19 Sep 2018 18:34:16 -0700
Subject: [PATCH 0420/1357] BEGIN_PUBLIC It's desirable to run int64 compute on
 GPU. Rolling back the folowing CL.

*** Original change description ***

Register a new Sum op for T:int64 and Tidx:int32

END_PUBLIC

Automated rollback of commit a9a5929d06e5eb4dd38bef63d56c4e338bbd38a2

PiperOrigin-RevId: 213736058
---
 tensorflow/core/kernels/reduction_ops_sum.cc | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc
index e4ca89eca3..5318d8c133 100644
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@@ -76,15 +76,7 @@ REGISTER_KERNEL_BUILDER(
         .HostMemory("output")
         .HostMemory("reduction_indices"),
     ReductionOp<CPUDevice, int32, int64, Eigen::internal::SumReducer<int32>>);
-REGISTER_KERNEL_BUILDER(
-    Name("Sum")
-        .Device(DEVICE_GPU)
-        .TypeConstraint<int64>("T")
-        .TypeConstraint<int32>("Tidx")
-        .HostMemory("input")
-        .HostMemory("output")
-        .HostMemory("reduction_indices"),
-    ReductionOp<CPUDevice, int64, int32, Eigen::internal::SumReducer<int64>>);
+
 #endif
 
 #ifdef TENSORFLOW_USE_SYCL
-- 
GitLab


From 3cd7354b6e82650ae4977fcb08d0e4f7ac9b4589 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Wed, 19 Sep 2018 18:50:16 -0700
Subject: [PATCH 0421/1357] Update TF Lite subsite

PiperOrigin-RevId: 213737482
---
 tensorflow/contrib/lite/g3doc/_book.yaml      |  67 +++---
 tensorflow/contrib/lite/g3doc/_index.yaml     | 220 +++++++++++++++---
 tensorflow/contrib/lite/g3doc/_project.yaml   |   4 +-
 .../lite/g3doc/api_docs/python/_toc.yaml      |   6 -
 tensorflow/contrib/lite/g3doc/devguide.md     |   9 +-
 .../images/landing-page/assistant_logo.png    | Bin 0 -> 10942 bytes
 .../detect_crop_disease_in_africa.png         | Bin 0 -> 578440 bytes
 .../images/landing-page/fishbrain_logo.png    | Bin 0 -> 7764 bytes
 .../landing-page/fishbrain_logo_big.png       | Bin 0 -> 16308 bytes
 .../g3doc/images/landing-page/gboard_logo.png | Bin 0 -> 20159 bytes
 .../g3doc/images/landing-page/gmail_logo.png  | Bin 0 -> 35371 bytes
 .../g3doc/images/landing-page/loseit_logo.png | Bin 0 -> 12002 bytes
 .../images/landing-page/loseit_logo_big.png   | Bin 0 -> 25868 bytes
 .../g3doc/images/landing-page/nest_logo.png   | Bin 0 -> 7839 bytes
 .../g3doc/images/landing-page/photos_logo.png | Bin 0 -> 27152 bytes
 .../g3doc/images/landing-page/shazam_logo.png | Bin 0 -> 17783 bytes
 .../g3doc/images/landing-page/vsco_logo.png   | Bin 0 -> 17249 bytes
 .../contrib/lite/g3doc/tfmobile/index.md      |   2 +-
 18 files changed, 227 insertions(+), 81 deletions(-)
 delete mode 100644 tensorflow/contrib/lite/g3doc/api_docs/python/_toc.yaml
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/assistant_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/detect_crop_disease_in_africa.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/fishbrain_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/fishbrain_logo_big.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/gboard_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/gmail_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/loseit_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/loseit_logo_big.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/nest_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/photos_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/shazam_logo.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/landing-page/vsco_logo.png

diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml
index 1dffe30790..6f56e3139f 100644
--- a/tensorflow/contrib/lite/g3doc/_book.yaml
+++ b/tensorflow/contrib/lite/g3doc/_book.yaml
@@ -14,46 +14,49 @@ upper_tabs:
     - name: Guide
       contents:
       - title: Overview
-        path: /mobile/overview
-      - title: Developer Guide
-        path: /mobile/devguide
-      - title: Android Demo App
-        path: /mobile/demo_android
-      - title: iOS Demo App
-        path: /mobile/demo_ios
+        path: /lite/overview
+      - title: Developer guide
+        path: /lite/devguide
+      - title: Android demo app
+        path: /lite/demo_android
+      - title: iOS demo app
+        path: /lite/demo_ios
       - title: Performance
-        path: /mobile/performance
+        path: /lite/performance
       - break: True
       - title: TensorFlow Lite APIs
-        path: /mobile/apis
+        path: /lite/apis
       - title: Custom operators
-        path: /mobile/custom_operators
-      - title: TensorFlow Lite Ops Versioning
-        path: /mobile/ops_versioning
-      - title: TensorFlow Lite Compatibility Guide
-        path: /mobile/tf_ops_compatibility
-      - title: List of Hosted Models
-        path: /mobile/models
+        path: /lite/custom_operators
+      - title: TensorFlow Lite ops versioning
+        path: /lite/ops_versioning
+      - title: TensorFlow Lite compatibility guide
+        path: /lite/tf_ops_compatibility
+      - title: List of hosted models
+        path: /lite/models
       - title: TensorFlow Lite for iOS
-        path: /mobile/ios
+        path: /lite/ios
       - title: TensorFlow Lite for Raspberry Pi
-        path: /mobile/rpi
+        path: /lite/rpi
 
-      - heading: TF Mobile
+      - title: TF Mobile
+        style: accordion
         status: deprecated
-      - title: Overview
-        path: /mobile/tfmobile/
-      - title: Building TensorFlow on Android
-        path: /mobile/tfmobile/android_build
-      - title: Building TensorFlow on IOS
-        path: /mobile/tfmobile/ios_build
-      - title: Integrating TensorFlow libraries
-        path: /mobile/tfmobile/linking_libs
-      - title: Preparing models for mobile deployment
-        path: /mobile/tfmobile/prepare_models
-      - title: Optimizing for mobile
-        path: /mobile/tfmobile/optimizing
+        section:
+        - title: Overview
+          path: /lite/tfmobile/
+        - title: Building TensorFlow on Android
+          path: /lite/tfmobile/android_build
+        - title: Building TensorFlow on IOS
+          path: /lite/tfmobile/ios_build
+        - title: Integrating TensorFlow libraries
+          path: /lite/tfmobile/linking_libs
+        - title: Preparing models for mobile deployment
+          path: /lite/tfmobile/prepare_models
+        - title: Optimizing for mobile
+          path: /lite/tfmobile/optimizing
 
     - name: API
       contents:
-      - include: /mobile/api_docs/python/_toc.yaml
+      - title: API
+        path: /api_docs/python/tf/contrib/lite
diff --git a/tensorflow/contrib/lite/g3doc/_index.yaml b/tensorflow/contrib/lite/g3doc/_index.yaml
index b3f21e21ac..bc66cc5dc1 100644
--- a/tensorflow/contrib/lite/g3doc/_index.yaml
+++ b/tensorflow/contrib/lite/g3doc/_index.yaml
@@ -1,60 +1,209 @@
-book_path: /mobile/_book.yaml
-project_path: /mobile/_project.yaml
+project_path: /lite/_project.yaml
+book_path: /lite/_book.yaml
 description: <!--no description-->
 landing_page:
+  custom_css_path: /site-assets/css/style.css
   rows:
-  - heading: TensorFlow Lite is a lightweight solution for mobile and embedded devices.
+  - heading: TensorFlow Lite is for mobile and embedded devices.
+    description: >
+      <p style="max-width: 75%;">
+        TensorFlow Lite is the official solution for running machine learning
+        models on mobile and embedded devices. It enables on&#8209;device machine
+        learning inference with low latency and a small binary size on Android,
+        iOS, and other operating systems.
+      </p>
+      <style>
+      .tfo-landing-row-heading {
+        padding-top: 0 !important;
+      }
+      .tfo-landing-row-heading h2 {
+        margin-top: 0 !important;
+      }
+      .tfo-landing-row-heading-list ol, .tfo-landing-row-heading-list ul {
+        margin-top: 0;
+      }
+      </style>
+
+  - classname: tfo-landing-row-heading tfo-landing-row-heading-list
+    heading: Many benefits
+    description: >
+      On-device ML inference is difficult because of the many constraints—TensorFlow Lite can solve these:
     items:
-    - classname: devsite-landing-row-50
-      description: >
-        TensorFlow Lite is TensorFlow’s lightweight solution for mobile and
-        embedded devices. It enables on-device machine learning inference with
-        low latency and a small binary size. TensorFlow Lite also supports
-        hardware acceleration with the
-        <a href='https://developer.android.com/ndk/guides/neuralnetworks/index.html'>Android Neural Networks API</a>.
-      list:
-      - heading: Key point 1
+    - list:
+      - heading: Performance
+        description: >
+          TF Lite is fast with no noticeable accuracy loss—see the <a href="./performance">metrics</a>.
+        icon:
+          icon_name: lens
+          foreground: theme
+      - heading: Portability
         description: >
-          [high-level overview]
+          <a href="https://developer.android.com/ndk/guides/neuralnetworks/" class="external">Android</a>,
+          iOS, and more specialized IoT devices.
         icon:
-          icon_name: chevron_right
+          icon_name: lens
           foreground: theme
-          background: grey
-      - heading: Key point 2
+    - list:
+      - heading: Low latency
         description: >
-          [high-level overview]
+          Optimized float- and fixed-point CPU kernels, op&#8209;fusing, and more.
         icon:
-          icon_name: chevron_right
+          icon_name: lens
           foreground: theme
-          background: grey
-      - heading: Key point 3
+      - heading: Acceleration
         description: >
-          [high-level overview]
+          Integration with GPU and internal/external accelerators.
         icon:
-          icon_name: chevron_right
+          icon_name: lens
           foreground: theme
-          background: grey
-      code_block: |
-        <pre class = "prettyprint">
-        $ toco --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
-               --input_format=TENSORFLOW_GRAPHDEF \
-               --output_format=TFLITE \
-               --output_file=/tmp/mobilenet_v1_1.0_224.tflite \
-               --inference_type=FLOAT \
-               --input_type=FLOAT \
-               --input_arrays=input \
-               --output_arrays=MobilenetV1/Predictions/Reshape_1 \
-               --input_shapes=1,224,224,3
-        </pre>
+    - list:
+      - heading: Small model size
+        description: >
+          Controlled dependencies, <a href="https://medium.com/tensorflow/introducing-the-model-optimization-toolkit-for-tensorflow-254aca1ba0a3" class="external">quantization</a>,
+          and op&nbsp;registration.
+        icon:
+          icon_name: lens
+          foreground: theme
+      - heading: Tooling
+        description: >
+          Conversion, compression, benchmarking, power-consumption, and more.
+        icon:
+          icon_name: lens
+          foreground: theme
+
+  - classname: devsite-landing-row-logos tfo-landing-row-heading
+    heading: Companies using TensorFlow Lite
+    items:
+    - custom_image:
+        path: ./images/landing-page/photos_logo.png
+      path: https://www.photos.google.com
+    - custom_image:
+        path: ./images/landing-page/gboard_logo.png
+      path: https://play.google.com/store/apps/details?id=com.google.android.inputmethod.latin&hl=en_US
+    - custom_image:
+        path: ./images/landing-page/gmail_logo.png
+      path: https://www.google.com/gmail/
+    - custom_image:
+        path: ./images/landing-page/assistant_logo.png
+      path: https://assistant.google.com/
+
+  - classname: devsite-landing-row-logos
+    items:
+    - custom_image:
+        path: ./images/landing-page/vsco_logo.png
+      path: https://vsco.co
+    - custom_image:
+        path: ./images/landing-page/shazam_logo.png
+      path: https://www.shazam.com/
+    - custom_image:
+        path: ./images/landing-page/nest_logo.png
+      path: https://nest.com/    
+    - custom_image:
+        path: ./images/landing-page/loseit_logo.png
+      path: https://www.loseit.com/
+
+  - classname: devsite-landing-row-no-image-background devsite-landing-row-67
+    background: grey
+    items:
+    - description: >
+        <em>“TensorFlow Lite helped us introduce machine learning and AI into our
+        app in an easy and streamlined way. We could reduce the size of our
+        models while keeping the accuracy high. This helped us create an amazing
+        fishing experience for our users by allowing them to identify any fish
+        species with just a photo.”</em>
+      image_path: ./images/landing-page/fishbrain_logo_big.png
+
+  - heading: How it works
+    items:
+    - heading: Build
+      icon:
+        icon_name: build
+      description: >
+        Build a new model or retrain an existing one, such as using transfer learning.
+      buttons:
+      - label: Read the developer guide
+        path: /lite/devguide
+        classname: button button-primary tfo-button-primary
+    - heading: Convert
+      icon:
+        icon_name: autorenew
+      description: >
+        Convert a TensorFlow model into a compressed flat buffer with the
+        TensorFlow Lite Optimizing Converter (TOCO).
+      buttons:
+      - label: Read the TOCO guide
+        path: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/g3doc/python_api.md
+        classname: button button-primary tfo-button-primary
+    - heading: Deploy
+      icon:
+        icon_name: bolt
+      description: >
+        Take the compressed <code>.tflite</code> file and load it into a mobile
+        or embedded device.<br/>
+        See the <a href="#build-your-first-tensorflow-lite-app">tutorials below</a> to build an app.
+
+  - heading: Build your first TensorFlow Lite app
+    background: grey
+    items:
+    - classname: tfo-landing-row-item-inset-white
+      heading: Get started
+      description: >
+        <ul>
+          <li>Beginner: <a href="https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/" class="external">TensorFlow for Poets</a></li>
+          <li>Beginner: <a href="https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2-tflite/" class="external">TensorFlow for Poets 2: Android</a></li>
+          <li>Beginner: <a href="https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2-ios/" class="external">TensorFlow for Poets 2: iOS </a></li>
+          <li>Intermediate: <a href="https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193" class="external">Object detection tutorial</a>
+        </ul>
+    - classname: tfo-landing-row-item-inset-white
+      heading: Share your TensorFlow Lite story
+      description: >
+        We love to hear what you're working on—it may even get highlighted on
+        our social media! <a href="https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss" class="external">Tell us</a>.
+
+  - classname: devsite-landing-row-no-image-background devsite-landing-row-67
+    items:
+    - description: >
+        <p>
+          <em>“The release of TensorFlow Lite has allowed us to deploy an engaging
+          real-time experience to our users that eliminates the requirement
+          for a data connection. TensorFlow Lite’s ability to compress and
+          optimize the TensorFlow graph for mobile deployment has been
+          transformative in expanding the capabilities of Snap It.</em>
+        </p>
+        <p>
+          <em>Through TensorFlow Lite, our users can now enjoy a state of the
+          art, computer-vision-based food logging experience without worrying
+          about signal strength. We look forward to future collaborations
+          with the TensorFlow Lite team.”</em>
+        </p>
+      image_path: ./images/landing-page/loseit_logo_big.png
 
   - classname: devsite-landing-row-cards
+    background: grey
+    heading: Updates
     items:
+    - heading: Introducing the Model Optimization Toolkit
+      image_path: /ecosystem/images/tf-logo-card-16x9.png
+      path: https://medium.com/tensorflow/introducing-the-model-optimization-toolkit-for-tensorflow-254aca1ba0a3
+      buttons:
+      - label: Read on TensorFlow blog
+        path: https://medium.com/tensorflow/introducing-the-model-optimization-toolkit-for-tensorflow-254aca1ba0a3
+    - heading: East Africa Cassava App
+      image_path: ./images/landing-page/detect_crop_disease_in_africa.png
+      path: https://heartbeat.fritz.ai/community-spotlight-nuru-a-mobile-app-by-plantvillage-to-detect-crop-disease-in-africa-28d142bf63d5
+      buttons:
+      - label: Read more
+        path: https://heartbeat.fritz.ai/community-spotlight-nuru-a-mobile-app-by-plantvillage-to-detect-crop-disease-in-africa-28d142bf63d5
     - heading: Using TensorFlow Lite on Android
       image_path: /ecosystem/images/tf-logo-card-16x9.png
       path: https://medium.com/tensorflow/using-tensorflow-lite-on-android-9bbc9cb7d69d
       buttons:
       - label: Read on TensorFlow blog
         path: https://medium.com/tensorflow/using-tensorflow-lite-on-android-9bbc9cb7d69d
+
+  - classname: devsite-landing-row-cards
+    background: grey
+    items:
     - heading: TensorFlow Lite at the Dev Summit
       youtube_id: FAMfy7izB6A
       buttons:
@@ -66,3 +215,4 @@ landing_page:
       buttons:
       - label: View on GitHub
         path: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite
+    - classname: devsite-landing-row-item-hidden
diff --git a/tensorflow/contrib/lite/g3doc/_project.yaml b/tensorflow/contrib/lite/g3doc/_project.yaml
index b39666516b..d48d07be04 100644
--- a/tensorflow/contrib/lite/g3doc/_project.yaml
+++ b/tensorflow/contrib/lite/g3doc/_project.yaml
@@ -1,6 +1,6 @@
 name: TensorFlow Lite
-breadcrumb_name: Mobile
-home_url: /mobile/
+breadcrumb_name: TensorFlow Lite
+home_url: /lite/
 parent_project_metadata_path: /_project.yaml
 description: >
   TensorFlow Lite is a lightweight solution for mobile and embedded devices.
diff --git a/tensorflow/contrib/lite/g3doc/api_docs/python/_toc.yaml b/tensorflow/contrib/lite/g3doc/api_docs/python/_toc.yaml
deleted file mode 100644
index 1e1c44c692..0000000000
--- a/tensorflow/contrib/lite/g3doc/api_docs/python/_toc.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# Automatically generated file; please do not edit
-toc:
-  - title: TensorFlow Lite
-    section:
-    - title: Overview
-      path: /mobile/api_docs/python/
diff --git a/tensorflow/contrib/lite/g3doc/devguide.md b/tensorflow/contrib/lite/g3doc/devguide.md
index 90e7915c52..0eed516000 100644
--- a/tensorflow/contrib/lite/g3doc/devguide.md
+++ b/tensorflow/contrib/lite/g3doc/devguide.md
@@ -1,5 +1,4 @@
-
-# Developer Guide
+# TF Lite Developer Guide
 
 Using a TensorFlow Lite model in your mobile app requires multiple
 considerations: you must choose a pre-trained or custom model, convert the model
@@ -55,7 +54,7 @@ both floating point and quantized inference.
 ### Train a custom model
 
 A developer may choose to train a custom model using Tensorflow (see the
-[TensorFlow tutorials](../../tutorials/) for examples of building and training
+[TensorFlow tutorials](../tutorials/) for examples of building and training
 models). If you have already written a model, the first step is to export this
 to a `tf.GraphDef` file. This is required because some formats do not store the
 model structure outside the code, and we must communicate with other parts of the
@@ -205,7 +204,7 @@ The open source Android demo app uses the JNI interface and is available
 [on GitHub](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app).
 You can also download a
 [prebuilt APK](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk).
-See the <a href="../demo_android.md">Android demo</a> guide for details.
+See the <a href="./demo_android.md">Android demo</a> guide for details.
 
 The <a href="./android_build.md">Android mobile</a> guide has instructions for
 installing TensorFlow on Android and setting up `bazel` and Android Studio.
@@ -214,7 +213,7 @@ installing TensorFlow on Android and setting up `bazel` and Android Studio.
 
 To integrate a TensorFlow model in an iOS app, see the
 [TensorFlow Lite for iOS](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md)
-guide and <a href="../demo_ios.md">iOS demo</a> guide.
+guide and <a href="./demo_ios.md">iOS demo</a> guide.
 
 #### Core ML support
 
diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/assistant_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/assistant_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..ced0872ab2e69768cc3d1b759032a8ed7ece2149
GIT binary patch
literal 10942
zcmeAS@N?(olHy`uVBq!ia0y~yU}6Aa4mJh`hA$OYelaj;wR*ZZhEy=VxqEwkZ0Ozt
z9}lY9vKKu#@UUnR=OQN;c^(OqsN1tlkKdUxrDM?zf%ivd2wV{t?a_ZR$@cgiR$o((
zCl|e$vYMsjIy@Hb>#!536$l7CFrkCRD|h#^8M`@_UbbCmIHN%G?Y=dqpHHs&-+Dj)
z{nler$_$hOA0tc?5?=Scn{lw9B+Jm=Dm`yut#_dQzP)SQXLn6GJ>!;c>fN}Ykh4oS
zX6fq%?c3nv8+c_g?^CDW+mfQ~%0K_vcRg3~ot0t9-9Sc$i0Hpgdao~P9(Ahzx0!p-
zxp2{ydkvR8+qU#wm*)NbSJo}%)48ha>_1iWzwNc3-(Ba{O9uZ-ocZbN#=LcJ{!fs&
zve<mx{d=<F3=K~Mm;L!){JC=7norM@PTh}twcWdS+N)#hwCtsV{~ZmwuMzaS^WWr_
z?)JMMA0B07kY0DY(rN#j<gB)hr|i2+>(<1t5x&>hJl~^czt7p_?3(kh*P9Bai86eV
ze|&t^O{w7O!w3JrnIE}ocB9*0d5hG$b(wklBy(iTULP`HKCp1lKZQ7-x6@Bw`Sa{P
zcl_cRv*u4Jtv-5ZTkU$z$$SjI4i~!K-DAJ3=5vt0%q6EQwSN^4|KmA$<PQ@g!?rsY
z{)Pu`e_xsRa{JkdI?ks2$F%gn*e8EE&cKlT^YWeu)Av>tLG|ambe#YG+{rxY_c@+y
zP6jjn*_XSgthcMUwD<C=k_A3L`~LB<uyZg*-0iLi+PA>x?OMa9%deM6zWNyPbp5fP
zw&|@?nHt`IOFlJSzqJ3Tb>Da1E%sekU(97>n0EMBtF8KRtNM-m`K~XT+cGQu`~E!^
zYq;AP5`yo}5exozE9hp>+#|QxfBEOI9e#J?LlMJ+Ll5u$*A8B%`)}pTe@ajJ_dZZ(
zVldeLR{sC(`<KLn?`3EDrGEdqUq*s&_hM#-FY<!l`LFI+nQx{Qys!EgKR45dH?ONp
z_gzn#bMRHo>1SCVdkm}@By`@`S}%KM)?M%{`c1s*o{Af?^2?+f?thxTa;Mp{KU1E5
z-}9v)aJzWxOXdSHrne-5|1D1aKIvyv!i_m{YVr)xZM*$?R{ovFy4*P@Jo0(J|A*Jj
z3`efyb!z@U8+19b<avv%5KF?}Z|`<QY`?$dNT0!5z17b)F&y}zz3pl7Z|``W^{Hp>
zGTwi?-;0Z(Xa2vo?MhG2ADi+&Gc$9ic2M1lHR9LV8IHX<x=-tVUf|k-Tk@}KrZP7%
zBqvPxRB33+ALX}t+N-kTZLiDb30z}i*mzF9SM&YmDc2PKE}S7hS2&EHA-etUE~TyJ
zQBK)&+*0qx35CfqG(UVdIcaXaou0RY*6Tm5qBi0T4r{*6eW_mec;>9xzm_j$W;mT4
zur<Fow5}vaydmW2ZT7>*|En`>usOA>w%li@>~B_u(4XbvG1vE5$w)Gsnqn^$d@p~e
zPr$F&cMeTn&(9$9t^Qy5ktzHU8p1Q=X3oyPClV*gz!&%Wwx{r&bN?2tD&Syvv1@VH
zmAK@}46j?ZE5z;##z`@_UadKL>B5w$@sAW08Gdnx>TvsJR>%E#&A{;gXZ@*37p8pw
z@tKc}Ng#ONouUgpUpnS<=rL-%e#UJ0G+l@7te5L4{<B|F9T{A0ZwUm~T?)FOb5Fuz
z@^v<59<gupWP|Gt8C}rXFKZCY&%wykwm3LB>wglb$5~(FPg4vMewP224gS6QdE5LY
zN>5iG-okw6llbaCd;iNcOzK)yn?ECrVPlrv=O*TWODjOZx{Mc+66$`rvM*Tjd)ce$
zj104bey@sn>K>o=!*kVb+2BGAfrQr^qRU^j|5KT&S#RfK)nN4Wb^Jw!DO>6n#q+ah
z1nKb>zx=R{`<~IA(xe$C45_wOOY46eF__M`e$lD>oWkrMs{dd4^Wy~Df+d@>^tdN8
zDJ=Uoxo!IvowxU5bms{&9J`y4ct1P&)XfLermeO9Yf|v0vH#!cDenz7EKRi+`LU6~
zVBIOL`U+RJ1xsRQx1W8<DB(Zv$64#}90splmJB_!r`A994+`8a&e~8Y_<Z&bE#`nr
zhhJ|mKi;~P%|N2;_^~PPBj-L;U+MAIG;uBCkzG5BLfIEAIcmCBnjzuwp?Q;&Px+s@
z@L;M82TMokzhAd_>^(u@evRS5#`)$)rp%9<{ZM_S&(C)cH&|OStXa0`oPES?Mvav#
zW_dFo$eOv^Y=0$B+x#U~_ji}Nm-{e8@Fst`vVMtHy}=rjhNOkI^=sPZFZuoZx5XhZ
zrjF2kwQtxKEIEC64db8h`YUS_j-F#;IAV3*Xc==r(AFROHr!!I+WP*?j8FdxW$Px*
z+I^^qxkEEw{{2Jsm21SWw?Abvka%aOxon#xNW|yu+Qgj<BAW5Gg3JMz8ZGueUsJGz
z;n(hC-BaXe&0y51RDSTQU<t#sXP@t|@Y;Kp*5$n35G~%v;;`nntg(rWYCj8O&Z^m$
zlTP{jUi&aPB;@DvhXssBtl~{4f{gq3uJ1z^W69k>!>9JXuNbCW;o&uU5dUUw_hi}7
zz3=NQKQK6Qo^bxc6sN?0Sb6f@)$NK8FYUcHkMrRyhqUwiR0Z^lb}}`&HZEAMF=cb%
zu4<u>DaQpDDJiIYZjcc%ty<;xV2VZ&pJf;)BO~vr!)Hp~Go~#I_dlWh^73kqYk`iR
zX58Fez4ra`SHZvSf7Dg4Kc4^A=;^#Atc+ZV3v1(QPh8exVmPq6-u5V;J;%bwr*AId
zW>{jfv~<C*!qwY9KMJaUx;iZW>BBRnOZS~xee-?Bvu$SzSFg|3`29Eb+2XMH=X1mI
zpJtf_*k0cG`|@w08#AB&udrp>a>GfZMlzt0(P3&?*pJxAiQnI8d@JF<GvlYn%Ke{b
zU8<ccuhfuUuXyEpMXlg=Mur)exBqbY`rcOijk(cf&w|-k?|%(my}elQYF04kp4QhV
zQ#t%NzFzn!)Xl}jkh8$bTKhS>@%Hp(yq_{x?=O<GW4Tn~rR@ISiNjDzqG59Q9Q(ts
z*53J8_R;+4y>s>YUrWy^1`9g=DQr<|k&--7IwhH5!!K>oYw?_+oX-#BhQ(huPg@ta
zfq!P(ho`0-z8qgS9hUwt&*C7*|Kn?TUii`3&i^-GDSx+dneYGT<^wGO`my#qTNqAU
zkNYpUX8oqHskwc!x?f96CvcvL4YL>6{9p0P#UJxI8|Ezd7n%Namz}Y0L$uf4xzD8w
ze%V{HJ8~|Z*7x*bX9B~T##bNn??2a{wDjMp)FsAu`rkVQIIUQoCotcW!DQmK-`4h%
zroKEqGn4Ud+^g5Cvi>kC%y$fMTJhGhL0_1`H|d-6bGa!~Uu>S4*?2eQYnkN|>Cf4V
z*_BlkuY^B5JztQ)ch<xCQ=hLov*kw_?@H#LCBp0G|H*GrX^FB9ll53G$iS<neeQhf
zvRx;B?Y<VmzUs1x6@$d@s&@I73$a@Msti&due(kizjCQEZPoO-;V-xR|DO+*{QBrL
zX9Jtw-_83@oexROdJ*CMev$IZ7gMG!uP%)5%a*GXIOMQSg+Z$2?(LlsQ<tvWwC@y`
zmetesCnooD*mFEAW^qtnadlam>P|i5tC8C_8FhE{e0aj@)*|0haQjfyE_+`ihAF?a
zpNlOxu#xLQRnbhzhwO?adyXtR$^PKUf8m)c8S}KiEOxvzeKjwqJx8K~{vuBnhOIj5
zPF)o7uAM5~{fF_xD#eHFiabxuGlUo$zJ^be`o{Qyz4TdQR~yGMK0}Q^3N4K>&Xc(h
zT-iU}ICyf(-q_{A7Ymv<?qq9~Z@DnJ*~F6}AS%-G;FeGK3nf0VX-OTL?tO;Kfg|zu
z*_U-DoD2zH!`20~W-RCry7<8Hlj`O}{7x^*`epqmGcbhJUuSNsW5}6h8gO@ahBia_
zOwl?4r;Ezx7kM%?Y@h$f^rB^LxBn8Qg?ucMVs!#e`a63VW+cpUpZhXWFz=j?ZwRmb
zBBh7FKDmMnUC~xA$ROM!)v>tiLd0+D{SgZS&%O+KcUt!tt3u0$`3y4>)-<kK+jSu!
z{}2xYN8-KO7KRxK&(_48Is1>{#h%R`@(jl%C29l?IoP=}_!xxcKVQ@|#Z->%(Y+fl
z-P+_^KKQ5TF~~FgU?^x#nK<zwyW$;}Osm#2ED78U(G2V6|FJhd%<p7T9{JaS>A)H1
z1`C!KzdmuNwaK?!czLsehp{2MAuW#iN!^>7l63-3<tuHbG8|A)FlJ}ujK7zX_QR1w
zR(~T$11H<!2JR+~C%Pbw0e6{{7}O+e*g^zi?Dt(h^PyQ_+V13bh8YPTWC|P~%=<HM
z;X`)CHxp%94zL7+V&H-#gL%Nj2m4|YK?b@`sA*!Dkr6a&lIWw;$7&hAl+BIXz~AtM
z5#-PpY=!ELca_$jyY)LXQGQe7b{X3%iE~A`cI!S_E33F&+<jeXnaUgEMVqb)Sr(_Y
zE&BR_m)raIa+lx}0ulA8D--1z^n=!#ZOzbT*b8#%g|Cx}IreC;eC)HH`}Yp3bZ+N!
z>v~y!`#n2veE3X>`IMRa&%NP2UAFez+Rv}{n5Y+}$vVtB7#5Z$S^9VV`XXgf-iN2F
z&+^!FEPOFB{o;Ja%-&_I>fe?gT-SN(o4-=x@l`vdA8b3L{`ydq*tEAlon@Er@t^#<
z!9Qgg+m7C~EB^RvN!{t*pPTW+kz;qVUz@Q}{=J^6nXy8J+q6X9n1-IPEbu;ZQ{U-e
zUXAaTx;b23QeDmL4MjZslWw2kvFAv<DLp&=)xW<1H?7o#-W&|r;Jd1%ICtjSvvuob
zVlEVEh;nRIpC|fc*O`#?t7Yt>g60$A%}xF&wA|a_%O==$EqKwU+Gv#<1!++Z+f(dh
zrbVVlt@T^Xl<>Vw@7VtTf95fAfGh!36KNv*?}~B0KEBGu^wZCd&E1!RGS}~k=RD)}
zDScKQL(kNAum1>$ioR37J{y!owpYKYxFk~gG;hXj8=-5;s|-rF>b<dVYl{8He*EV2
zA}?@ZGb4ug{=fZQT|D0>od#ROag0xqS5j;HJTcC16=_)$niFT%nyrs!j!dlcTfOxE
zzjdZp8(*1d87Xz_?%}cLc)0DAgH^AI6YsN|h2i$5*NmdAbtF!|EIAN5GxOtHsU+sB
z$}3)Q*q8N!)Nb(m{{8ixfB)<4?yzrfKeVs7r0({?DavW~CGQ-U?mK1dTL1C<tG7iS
z?5E-<s!L1O2^@Ov!Q6Iw`CJLkZ`xD-Sx51+Mmt$9`ugu&e~Rj_Aji}7yRY89>AI_+
zZGO#(BOjUt)b=lE5-$Aq(qjXQ>;HN8<}QEPbj{&w#nzs<155vVd#bP6T9Pk(nBPg_
z{{*fst<4@Assr9w=WE<BZq}RaYPk?p<3z6fw@=~e{+(BDSK7Vg`2KW4BB*Gabh|6^
z-!;#P8|JyLd_V1R*436Gr`M-k+tZ)^N;!ABe#@2XR~>g9s{Pp#`H)>vWZ#}PuC`eX
z%e{D9_Z?r&c>1b$`xItSediyxY5UWXQ}?5`202<asy8aO<O!6Et@^XGcBa+cKBoB_
z7@h1ZmafgUKgAhVzx2>A?Zmja@0)*Vo3HK{z9jqZq#TP<%d(kgG*7wgx7z-%(8x>R
zR{zTD0-EY`_B{F{xn}(*-O^k8I-mY;xf-3R7%bpeq<KidY4e<TY1<h^eY3K{^`pyu
zk~p2t&0Dv9yNsu06K_a*^xs&1lON6xG*_>GJ^2-@vk~V#Wov;B0jZ}NOJ7deGre9V
zLf&ha;d{<KaeA_@mdSqi9|^C$cS<(Wa=&4u_urYT*WNqlYTb3;#%h;&ok%0c!oYQB
ze{ZU9{{PeYT;1-2nza*(I&O7`r?aftckUkN4EYU#XFuwH4Yk|<Sk->>%Bz=F-kF}v
z;mW~#aq|7eGR?OCpGD~3_Fo~%U0EOZW9`(&bKRSb_Md$9{#W9+-REAd-!tiNUrOFt
zA0zh7lj;?J*FISr+H!974TrC1$}MiN?~u3eOK7>k6B{2l_w(g{nmMypUk=LDH)4PA
z>*HJww~U@23<5v*JkJj;D!j_C&3HeYUuU8u!^7j@3KEJYhx?*xBIMNnB%biE<FVX5
z?F>Ul!W$kI<rX)KDaUXAlXySddVAZUJmtTPPnWK*XE?~Qkg@7hyhi$yZ;A3fJaw6%
zw7kT=fxSqUNyurhsAG}yzEk)238c=BKWy}S;jU?(x0;I?P76-x6>^eckkOl8rgZP~
z`Tbuvm72cf6=>Wez{t7qT3%_^MaC)2S2RC+Y={?K#PV>qqQ0#6FQz&3F3#le;4oy|
z;OPBdTznPFL;uoyTnw33t?WiF9!@KkD^1uQz!#vL?tCy#JBIU-jf472?gw?7xEwhb
zE~_duRbT#ZfA9&Rmh~>@)-ZEC$b8_$sd#5vyLv<WskO^Rw=*QhMx6HWcc^@MY_Y%;
zfma!SD&*pRYX3QCza#a6<y(nM!Ut-9@*FB!)GF}(fBVj}-SG_@&fVPZ5&ePjgDHzz
zOO<AokejcXNKUVqY6oZI>{oAf-6z#CT%XR=$hk1=TK3iwt#AL-y(G9^zg{AraB%HQ
zyF30#Eov>db||r{|F3U(bmde^f#d0gOH^;Ly>QKNTG4s$$D?O@f49$TI&|g2rl%99
zJ+OMPlf#dr)S&Zrp5OPyYC==r-VT@%Wi`X<T*)Qw2UjlDZsZN9Q7mDuv^Sl&#VY%~
z$A-iyOSjE){lyqkAY$`Jp(Sq9gjpVUw~8%P`D*$khxH|UL32vr!e)Lai$#22&3=Bk
z->l)aR<=IZ@%3*{_X25$J`uA&3N7Wq!jd!1^V@bA_f5JIb52j`FT)}Ke=BDkWLFe9
zcti4v%d6BfPOhN(s}1Ux_#gZzDpNen@AP9%)uY+7JdWJd5?LaccW=XnE%q<j1@_f!
zN;n8=C2hUpa!Ja!_tcxxWG{iqjJG}Q8(ynV7t#8m&@$UP>to{Cy}$FfE|B1zv{~*F
zzX7N{@XkAO3aE7B5171?yNtK%qRi{m-<+3(75-FI8aDAeY1G{G^HtNjZSQC(^)xJo
z{mPW0@|q;ZCVnT06U%4S{_yW@?`@G?EW&$v>p8XDgNX;^l@mVewkS;yH>>}7-;S>}
zW6AZ2+Y&?%I39Qw#qmF6?b7Pt$t#Pu38ZE$`95J;gUQ@OwL8TWXGbgkVL!#Fn0|c`
zkEt*_Bjficl^i!?xE9X2Zfw+C<e+k+JfOqPYLbwzV(vlXgO_*C|09<;+gS0-lW<LS
zm!`imsTs=_X4uZz!q(0>U7_H!N{eTWn}GbR#rzEdsTs?@mX)c!<X*wI<nj*v`K~XX
zxpE};7db`E-D=)-A;vkcw(ASqj=Def0=sq!9GbZ2&0p;}(Qk@w(-q>__RRUhb|nkc
zRJ*TGqnNW!$s=!p%*Mv;GQCPB*8^_EtNdkLH+@bsL)(UxeEKW)eyckr=q!89Q^TqG
z>6+PR7>XJ1IM{Uwv~9556<1^Vv?t(3u7=a%ziKlX3u-K9|1PpVwBw=Ek1e~Vd0Q$!
zSu3mIIC<-PrDBFV-TQkD^419K+9xu_*(p!ojZ;b|{cj4N!;en|?@k-G6eYI={P@VP
zv3!O2!+;jG?7QA+j4|<bj0f)sDBgMUrRUs|IYOMjd8RD5c+W45@d|f}&4kx#wtp0k
z?Ek+$ps({(7`NAjgwr{iGuZ_WR@`19$I&A2uMCuVrX(w+pYst}c(CYqm67m)zfZ1t
za@uh`EHjsw?QQ99%hs%9a!dQ9&w)FsUzvC6$2&#%uMyZ)S7a^P0BV9QX5pQ-x%))M
z2CfYZjkXJ&ew^y-tlz1wI!lOCEwC*uO8lGpiJXSlC#wZ+2`jfOa-ZkQswDL0x`wlH
zXu1Q3K%%xnT>po*g7YGcds?~NR3h|GUGOkp-e)|Qp`iI=*t^qfISM(-FFUNu-fele
zpUW*`Teh2+!aSzBHiimmIhA;)3C=sB^Mv-<v~qdOjefpRfOpE~P?iI?L)b56uA6Vu
zX3$<B{`E;{NWiN-ChV;lOBnx_Gid1Ft(a1^U1lNQ!*iN7KOaZw&h=tdElQIW;M{h0
z2h$O;142yi8RU4fp6|Q!E$$$@V$b#SuO(+XvZ@Qc@tm?KGVg(|`{msX9}+H<)|>xO
zXnFiDs8TD_AvWT4K$qK3rM=P&D^z{I_GmiE?OPr3E8c0Az_hnNHC!ied9TFqPYzVv
zH~juo*<#yLpE}`=o7Bd~G6%#&7E4}Z-QB=Yp+6_#4a>XJ=0?FBiD?SgSEz6GG*&uS
z*H~n7xT^Z}#4W}Q0q^=`-<>|oQO_Cp@AaHp((Du4TAJ>56qy`)o)OOA@V0yr!-9)d
zMH8x}`_+6UOK$2+S^RGmzb#WjxS(SZbJu}@g-_~?+D+88HgDB9xnGo@fpNmJ;2rw!
z-6SQ{?r1eV5l&&4m=L~5W$U-;TXr)%xN_;`9sjU}B0Pe5;`8=2@FcEy{N?bMy}S&s
z5AB-5a6|YCXU4nJS2ft*wmN>2H0tr#&>lD~>fPUbshQ^aY>XS0uSoabsW0#7>0t9=
zv3TOl(nSWQQ#RKzB`hk)y2yB^U*0|Pg<6Pz^5lg}uD;shvEg~(tg6WJXm1uZDIe}5
z?hSE=b}ZxDGwGnqoY<C2S594JIgqlJCBfQ(J5gc1szc_=E#VqByfyr6c`U8x9#CmE
zX0(vt=*||XpLSq=XG<yTsbKcC?i>v#(?BVmb&nFqLo>k>+?;<{*KgVSR-DtUg-Lgt
z^IM;Q#|$%s8xkEi80>plRoHr9_g#)3I$m2Yg#W*`#3gveL<S$8lD|(*YfPWR{rZmd
z;d?Kt?{XZdc(7<!<&MQAOs32X6$<uKFNd=z)*O5(IoI=nt>m+5vYgvwTca$PPcbt1
zbM9|Ebh7)x*U86S49Ym(nka~To2zl<(TcnRp3*s=?6S-lEDqk_yfgj$;+llZU!Q!R
z8?{TnF*XU*2$+~qIwjkXr?pn^DNjSABmcsdgvoc5{`}~=EB}Gn>EN^lF`x5eZ`dC=
zxc?4g*4C0$uTQ_evQ=9oXmQJ->C$%+a`YNID>PonP086e{omCr-lw7rE&8{2>bD2_
z^$Ps{d(QV02a|Bqt?<5<uR5p9rsr|C-gW!Qp5UGEI8bX&U{JxNJMXHBA~P#@i8pel
zUP#@nbnsZ<j-Q+KZkVz-1cFM}E332bOpgwXC}66cT7G=affPHjFPT&F*40S-{<mPl
zj)N82-yeU~SZ6D)wpyy}D1STC^u-m$54qQK{T5(aumU{bqTKS`LBn-IOi%E?Uggp`
zFJB$8<l3<^Pol)nx{cuiqq)bqvf7_UT)E;ct9Neyn8D;*WpJX7$MuegyYs~=F2)77
zvl=*_=)O`AKgIj+Z{)vEd!GG!&940U+1U<@YN^&c0r%Hz-)zju5G#1FLi+ntV-2ex
zeC5C2mu4l)FNtAjtGwO*`p`e+uAPc`+&lNqV7M?hOZV<<_D@moP6vw2Z)vRIQvdV$
z<m;!G0w*S%U${X1s(IkQ0~S_U9}^GtPfw0M5b;{cLoYPkU2}T_uiwk&ubcj|bX5h(
zbXxq5XK7fu!~5yVJOz1IMXATr&di>BOPXC!<H8n)ZBnha3;7-$kZ0&Jnfr+Cq|&?7
z_ZKK^Ew$!<;w9K4>%O31$<j-4r3c?R7y5VT^OW9@S7kU5Z>M);?~YToKXtf*)hCDc
z$<KFLaAAq*EUBWhT$LMPQ-0+AR_6K1_`ueUi#uNLVcjR|Dd~-0pY)%y_2gz;8^C|X
zrD<=E(S6nJD{lU?w!9|}&fJIk?N8S36l;x=*q*lTL7h>1!?qRbSy%T|iTpli`_eij
z&$hQroT0!?!7ljkVwZ!@u5CQCer32Kr$pl~^Jx9(@YSnxG=7zD(_7R2VV*sMu**(s
zyTapN9Xl_bZT?tgdVS%ZFJJiu3?`kJX>2U?X1T_n{r|4#&oz$p{>SiO<3fd>Jl~(z
zFKCf|mu5bpcAb8!)ecWdhSiLTGfUl?_9j*Q%s<C-<*H|v07JsGhWU5S)mk<c%?qj5
zuBkjL=~$z|_<;}91buaApQmh<@rU^TvmW+c;=EG9z`&j7`~6|urU}14?K@*A^37$&
zpST}J1vQq{>GCfb9c(kBpTCJ-m@xZ`e*No-iFu#@=zM2xkY@S4E3T+d%I@%A6}O4+
z_pH;8KT|h-)#N`KwHws0+>Y+!*;!kx60s=p=X$=|Yj;j5W;np?a@X+rygyp}Ppqc=
zxxd5mb9}41{d&I0MPClO81Go{^umgq$}E<=gVTSSyUzJ~{q8j(Mux8TU8|n`I;ncd
zU+&318KW<v`%3ThzYkEJF*mA&!IXg^==e-I&a$wGZQ*JAAG2!xkH6Mh`trn{N@EU&
z1?PJh@7Wfud42l-?#ERRFH7wUu=sba*?Z%2wv#tMSGmoSOJdlwZNb;Sg6ZNb4;Nee
zonLPGd1t@|>8qyhObjy`cU_MAcI?^CxSE~6rqB6#nn_lxrnUN$dKUlvuTOtwvDJgJ
z`BRw&adzj#t?qNau6W))`^&up-4i$Lixz*=lz8u3A`?Ty_HeE_^LmeS#BbZbwN`<-
z^gzImkL!1+vrGS;leZ`C?akfu{^)(5{JK?LA2cWj8qySI*tLdl$F-Xi6RxM;+x&QO
zLdv3duiBezg$uW73F&aKMuWzCe0Os*FtGE^o&7}cm3;Y5>#`XAbsY*jLqxZ{KDtk8
z^69@%!nU4$Q1waf>&FAlDF=9;{5>?ieDBsv3j1GH6-j=7lxG^N|5>SYZ6M3z+*gvZ
zp&umnF!#ursYLOw-@e({Ij>er<jtpm8}U6Bulrbjr>VTzeT8waN5prxrn_=eZ(i!S
zecin&cI7sySBL(s|Mu?t^-Y`h{r(}I7Mq>6Bk(Y<;8B-Frdq7RDjSoIxLndokXmfl
zyrkDo``WEGf%&1HEKWWqCJ!4s6*_NB^5OK$VtKYEdu}qj%31;61%h%D8bS{T7^(X4
z&fC1sbaU0TOAfkx>CfKGyqW#}x_0G%-uZj4_h<ioSz%dU!>5QjEsj&fLQ~?}qdA*`
zYBywGPWM`u|FUS;>L*$GCs*Em)4tT#e@e{XMNdjHHXok7cgM}*T0j4(bL#fo{CDZf
z{ntgicAtN_O>g=6>C1LKy8gRtZ|)6Y>ql1@8IBpgyH(<O@@i?VU-`5-a`)amNIT4#
zvv<qOkCRqcdj;RWvE6!|)U`)vEDRYM5<4!Ng}vt9t@bzc<kfO7Km8oDGmfd7<)>`>
zS9J2~)5B43x^fvArhi^tS9<*Tt~vW+i>x->G-$kLp1pgHo%It7>uqcde=ffdyL5D^
zZ+vy-7xn1yqSE~Nul;$d7xn#TX83cvJ~(srYj3~)Tr-^n_V}t5A8I$gVMwUiweS4P
z*Q(2V&$g*%{{NA7uwtw7G){&bldr6?llO%dy)u`cw#B`!xcWhxg)KwE#8Tg?_xW*)
zPrp3jt-Qj__TKidSGg<CF)+OQ*x&w1rj+ArMCPi~OblWPKUUx0w|jow&aHx1DmGl)
zpnp$-!QkTQztj7Uzn1#?_*LsV&+C5AwlOg9@E?n;^}BEI$oS>Nk0&0_NN2dPcG>rG
zOWWif=~368ez{)!^OgdG-v{lTyVgAk@w+0h>eso7Qq^aS3_Sct{_F4jwOrV~L2CWF
zp76*=AxsUER`Zuw{{6CqZ{ewmtxu=i5n^cAw{pGZ(&>L<d3R)2m;}wyVKA_plGgHk
z;je#*@qhL+GzkCS5MR6b;oQZuzUVvmZDwZhyRx#b)^EG?UCTAT{yb;jZD(URQ}L&K
zYRp^HZJ%Sle(t^c>kvaj<loQc|0cgYP<8L?H-?6{_dd_RmRkSS($@5eg*n56im5U8
zQ$o~l_P70h^p%kz?rZd}TMzf$kYp(6Y5&7BtEPI}AJ0SvnfI?-_PegyXUXtjtKFN&
zIsaYP>@s3_pfUeT?3wkyUjAHo_wTeHu?zu4rFWhB7kuN-4m|pwq2Z(S=XbyL9erz)
zdl(Wb49>kOsygPN=j!eiU2DRy;QUMJw!)dK>de+0ow;!j!;4uZD;;Bwy;^^yh$+EF
zA<f3<nuK0zYI1ySppHDlF0<Nm0(zzYZT9_=Jk8vYsCzG@=+{!gn0LR-{zmHXGwho4
z&ZuGb%bg!}Com*@EcLBg|M2qZtJ_%^y4OE>CAWBacuB^=^-K&KE?yCzGN(_%_V4>u
z`fLo`wLkwyuQy`2<k-&4sI%hhp4V?xm;Vc8cgR}(nz`ZV<>y~YotNK_|NCZx*lwnV
z*6R25hfloBD9!!V&5%&oGrgR5&bAB6j0ch=`m1$wtM2T`)b7piI`W0_!rCR>ytB8(
z)fW56Gc5IuH_p1173*g&o7ZtUc^8AkwMAN%w&gy`3Z_@vlXo$^2rsWWvp_ccRq-z6
zW!w#V%k!m8WZm1_Yy*$GFjoAsU(S5Y=Xhvvtk`Ad18(d7UzqUn$LfD(%h(&%`Od$6
zjKL$@(3W9J%-!rG+a{W|wI`SRurnN<cO$Uq*ZExDhAlyV3w_ubVrT2!{Kw$&)z?s#
zVM<J_c{Ae#xxSa6Nbq}X+FX)RGuLDpW5dV!w{_SYq{7Zw=r3bzm~{EzO`e93Sh>&q
z4T-wv45K!C#ZF$=`;*-va;2TcVa5r0AolfT=OxzOT>9(ZjCzJCG53?Uq)pv5XI|e=
zc8BSgV~u4PR(AhncQ|?VUL;?G$(v5AXVMI+%dcgeHR82A*Y}gn;pEG9x9n+|8Rc{3
zKl3(RTBhD}P3Q8leZLmX$Y+=m^Y>6j=CO9hSgFsv4U;Y}yvf%PQmX<s;xJqGw3Yd<
z{g2f!FL?50(ZyM*uWrkR9|iO7<gh!evJ?5t+hDhRInOnn;O&$2Aa)%G*;SkS+&XC=
zV?fcZ{4HrS+hm@D%rN=-9c1%|yB5mNq#0uU%B9yzE<1c!U^&Rh^DoU7n;!mBbH<)w
z*Brb2EoswceF->Y&)^jto46xw>aRKPI)1VrSpWTP)$KQI4y*PFfI`gwjfUk~>2;Ay
z>Pnv<PnKfP$gR3laaL*V`gb+qKFkcN;ol|JOD-|8k<who*l_8Jxzu{eCF;pm3=;PD
zv-X%Yn=|C~zhp?L(1;5!(sIkOUYY+YJ1L4m!}9Nej$CzK{v)%KqZl-D|E4N$>-{o$
zh8%;}Iz98GTgU$XznxUYAaN~eI&<Te-rxV{cV#jycydL`Sk~RWO(y-g3FC#QUm}y2
zJ<c#)#@+BSKIY^~;|!~5yUc2>mhm=BT3y{ct8bzB@nV&vTMRFX|8g$gcJ+$sOZlW<
z3=-d5VvDSHY-OCVN}IXisKLI=xi_{=;N!^-WM$}Hn0;AZa=Y%5^=u5?OP_4He?V4s
z>cYFFbqpIKK%w-cF8<Z*FJ~DNKJqxv2`j3)a^q#F)t-6f`;4ctH-s}S)my%sx#6hk
z9f9T3=lnbSVy{uI$=7P8hNIWEf4%DV{%OeD-lYr=5*Em+E<bK&TxWG>?UL?fONM}1
zuZ$jVpZQO38aG3&U%AwQ)OB@b8IO23#xO{HJ1Xt%r!R5nX-<up$~0ky*vacm+q@&K
z=Wz>q^B!Ps?3XyQbk*$};rp$pNrSS7r29bwJ<r^)OBoshuP@tg*ykNEd!K#WS5AhC
zpVo|5<CJ$^c>G`f@F~gXtPFcDeMp#d_34+MjWH+srfmC_!Ej*p%dpG8z5R|R=tX_?
zWn^#<E-IaSFlGg)C}jN*b>K57NlBi2dHv~^FCQHCSTZDNx4A1%jk%xiqJH=D>&oZW
z%nW@;H;AQAoAdAL1iwx3rIvg3`-B<lSmp#5Rb5)(x9U-6J;S!#jI#Y(pS*hBRx0(?
zx7POz6GP6LYvR-9@SXYoW98|W`pMH6687Hx$zJQ{fA;B*m2)=K|6?$CVzK?nm&H4!
z?Ek)Mx&N8H;o-iD+LJ5ic7MG5dEPhn2PywJZbTM+?PvROMefsM+j<7ST^~1ZQoU|2
z{W$p2zu>Av@}LsKGOo)`^YC)zr7QF2bZ`F4n2=Gvf9aDg|Bv*|fB*CR-NSVZ2LH5W
z|Ih9FAH@6Z%<=!fK_PW?4^zX(_^gvx&+~ludF2&dds4NYLG6p;!e82|%ae}u?=DM@
z|ND-iVdYJk4c#`<1#7OJ|CU?()!;}bBSWw$U*!EI_VV&c)~`Oi{E)Vt&v%ca7%R9Q
zR=vJ`%A9|vZ~V`@dM{5kR;HVm!JcK6{<JxKdcUpz#`=}#n|!TKiexzOAz;tCEmyyK
z|CssklKIQo-nBjY3_SeD->vh!K5dR%<cEj$8*YZTmtFt)(TE{o@k)J({rVT4etFMr
z^h>`i_t)=_J5t_%Vm^>9k=JqAJGi!DPe@#mmC4S}zlyVVo7GnRDJhZ>XZUdS;4YJ^
zsgIkRA5Yw&y1ds?=I{J3mUpk2e_amBZ#j$%hsz)LOj>QPxpUvjCtp5j?r;h)tKM|}
z<+pt+*ME$RW3X^#=k1k0rMYw0pQY`b`MNvz?LYnU<)q;4y{CVipRQ(m<#~zw$*bbO
zuFjQ@kH7nK-NTC>j0|V~>$ZH{^ykNyzxn-Y>+Ck%bpN&OU&{S=J61h;<@{LD=5@C9
ueZwaf<$Dfo&^Z19w1O08i;_%<|Ga`R9!e+oGJ{sCGI+ZBxvX<aXaWH8MVgHO

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/detect_crop_disease_in_africa.png b/tensorflow/contrib/lite/g3doc/images/landing-page/detect_crop_disease_in_africa.png
new file mode 100644
index 0000000000000000000000000000000000000000..45b3b4f6fe9ce69508d488f761e29f90c4304040
GIT binary patch
literal 578440
zcmeAS@N?(olHy`uVBq!ia0y~yVE)R$z+}b2#K6GtQs93z0|VEpOlRi+m(1dVoWx27
z29M6E)7V2MpA<X({P!hk-4e}(<}Y;{zDVsV*&sNHVPVSz0T&$&C9NG|E=x4EcZmcR
z=r*ij6+C)KDdNCBh3>9RdSWiFPiqe-zu8xQulo7l^E2<<-?_8+`MDb7=P?W?`e)YE
zap|x+Xz4o(JbLTa-PO17;XFo;2@HZ7OxkMRC(3#GzOi>adwc)uM)^*+lH(O>*Z2Sb
zAFq1$8Al|8!$FSY8(Y_ewD>D8ZaylW!q9Na=}**Z;~&N`og3CLXbL<OHlAZ}e6F#K
z`SI`R$%mZo2+A=t_@^1MaxzS?Q>=?p(`G#B9OiPP=lHWv0^b~ObWS~yqP&Oq;n9^V
zGmU$$FnoFZZ<6}Me9Om2EN7l!F+Be^HLQYv-GupvVm{Z)9seJe)s<1ltD7=GnAJ|P
zW9kgc?N3bX4L?6(YJN8J^t|5rPhBSZ|FGHI^3L${nY5}OozIuC{5oH3bG=*f+~k8C
zv(p`;IxOtnUr1V=_c?edYSEK|@Q=G>+APw`k|Lhiq!%eOJ&&9r^J=@mW4&+OE{cz4
zJ!Xx2WV?*(|0aDsXPNmEC7vnYQ!ZX;)@R@q!#w-(L(OlW4;?l9!((1pSLe0CV&3FO
zy5G2sosSk9PdL5Jz&xVH&eP}qgGx3#1&Ip|JR4Zj8n_+U)U}hB6?`}OC&g&4XxRL$
zYmW?H@4@7mI?Ezw%)HDvqq4nt{?EUEZ+>Sz`B=OrRg9q`Ou_G4Q~y7fhU61-r@f5-
zkXg>?Fq7rMq3UTK6D*%QpZ}9E;kqtoN~2JK^Sg<525e#n*en>tA8^(<3UDxAOi)c|
zbeX`#l3*IaZq`^6z){v*yFezS@$dpZ8FqC?ZVPt3#?=qhUa)>Yplrb5b|Bk;&#p<6
zqp3iEb&-?T1gj;?l8#~mvPDfciuD>SU2RDVSvuG|7nEK|TOpv;bZVh<fbAFK60uz@
zvF*PWuwM``;o950?|{Y(p3{d`ZP3f%x||q(gP*TC`~YWx)g0dE2dy6l?U1r#Ge122
z!R!yV6#{j<{tFnoS|+(?F6ap?nPRQQA==#K&KV#bz`8;x#B`P5)wWkocO7{b9SfL$
zF~UUD*I9b8>;=0Q1t#2c`|dT|Yt(bUzHs)%xi73=D1Nc{W!=^kcu*qojUkVZh#KQ&
zsmcAC2d^ZYOIVliF44_Eeq+QMn=t<D?ym>kk|%G--e|pneS`0fvK;v`3G2qg52qBk
zS-8gt^mU#-@bvMig5w3^h1(02clcKb#mVM3em`{lLFJE_8qR+j|Cs)n{bQ}`QgFSX
zctf~}Bj=FRA-zL_PSOv>5?c&hr>MOWl<iDBqVh;BNps`Giq;=9e#~=G%AEAZ!`n0F
zl3S4GOO}^XnH^td>NuOJ?4CS*vg?z%Poj#fe=`2GZFRc1bVopvhUApFQ>sp-o(evt
z&bg|klf}77ccOBj;zgap=tsROjzR91LW`EBY)k2UqIoiUQgRTBW~^qp=CUhnE4fx~
z4q^@xz7&#K{8Hg%$V=Hv?n~Eu@1Jt;<cm`kr#7U1O5r;lGfi^Yq7_jg<yzglW>&RS
zwOT24PrW)h^@Q3<u~Tx>-%gaBK6&EiN!=4pPg*^}THjowf711-=_i(-ct6Qr^S=In
zSI5wUp^b|-#4Zecxb4uAM@p{Zu6;?3Ns}Mh9@Y0UQT1P%veec~*UNv(#Zzskyr+h1
z%ZKb+WV6kqY~o?1Ym1s()x~W?8bf(PokFgyk_memHZveJ_-okH)zenSg~<j_4lfS$
z4$=<Wy})dhUub#I^Ht)j<3s9QCokT4DJWy<7Qb6N+Pd2M+dLjmaO*!dW!b#69ck;*
zCO;F_T_hH(9k)7SwR3h%bgZ>Q_1v=NZ_ct!e6i|*>HGbrpIfqSUfz;-VRyyuPOs{!
z;@O+Cmu>IyUm~^Xe@p&m{oTzL!Mw|H(?Q1OIOiJ|ZW&zMaOlG{!JL%jBc@gfSGG%R
zzL95`U-<PAWAdhrs|%MEHh-MZxu<i#=Q_{slhVPtmnAOSE{k1u|LmHxL1+4A*cweY
z{BC;QG~c*BJu&0prWG4@WP5J>vgypmoUJw4sVPS{T4j{ppOqz>u=nb&!@p+tuI)Xr
z>|)xLu-j(Wwp}V~(DUYB_I^S9=EFC3-h6sv?&jY)`N{JW>o5KDXHRZVX<y8zKmCR7
zLmkUaDK`QyPkdQ%qVPiJGm+O{7GGYlxy|_U&Raicc2DEJz4X}A*42&KlU^HMH_uDU
zW7=+<H~HPScdzf>whfTGC&wyhJI`p|!+B5V`R_H^E3`*y&#}F0_g=5s{yU-OPtCU<
zJO&|>I&QYfcdlP6-<!W&{-*ww^>^OS+`n-Df&ID+d>?pkNPn;hP@f=vLx`nG!m+w}
z9?NUi<?R1AiR?D1$UJ)Kx#Ba$*G{{fRxVua)ZbReQ_Ou^^qWSEeT>LGK}E@pHjC_%
zj4ml}(kL=GWs+5U<!r{&jJ%R;9z8XsWol_jGoNp&J?%CvTK|0fz5P!4uGhL=YflYd
zw#H_^SCMG()s+!GIVo>0-dKM|A<eW{sZTIm()?Ue`rGuqX}dRc-&`!??kD)%bx(3l
z;Gfeh$6NMtF77<nsoI$wQ=Vt}J$I6SQ0S$UqJW~@PbFR{Ucp*+re<Gt{M)9VdF>PI
zecJ2wW9g5de>T7T_VV|o<E(W}Qi>`uiaBQ|xJu2-o47BdJ455L=4PeOmtS>UUA}3_
z=8DL>**w>C;y)|h*8aVG>x->xw#vRXxt1GczU5x&--Wyv+IH~DT1Ht`S}IHLHNTdt
zl;?HtOZK|Ryuf$uXD80S{VC+L*)!h!kM}N3(4VM(LjK094O_FMtE9`L&zGHkn|gb{
z_y+Dgky)bCMC$GyyjA#^{j<DR{fv8(`!}agKfA7Z_r~3oyMKSTc)wD6lHMsjse95J
zw(pz~78CF%@&2mX>)va;R(%ux-u;b#+OO%`re#iltM@x<UPRscLu(hU3|d-L{qzar
zx5nG-v3zD>E5k&?YJX1r{rIo*z4lqFOILgICG)4t)zzsy^0@B*W&W$|DcM}v`>uY<
zVt?)Q%IbCQ>(~9OWbR&NFz=9Z>({lvR(<DjMfT#t#|zbdE;}UrpykDp)$^0<-`T$`
z-}G4IZ^+}!xtFJ=uSn;bweXqg*~hw_y3K1ZuB}b4JzuuPa?9MGN1sf;RV?*f>)h$N
z_e&cq6yG2Ea`a8N@Y_G%HhObTZ?g_7wb><8`ZMqOrE~MN>%!N?e%)2I^Yqr&k4%qe
zKVNq{-oCoIZt>G)XU{&%d;QLMf7Jh~WyhB-|2ggL+G}yU?`_+sS}UGseScrc@yF+`
z?(W%b{+{*T>(~76_uFr%N%&y*``>AHP5yg!XKF0IW;{+l^SDVp$lq(;Dx0<c&OX?_
z%yIU?&(3nrch5V`x0Ooc-TyE4$M5SC_dK8STvh){{FVJ%Dk}4?WN-O?byK+V`k(8U
zAN%xw!oS6zdjHM7T)*qjO<OrryH8;s%6{GL`|kBV+diW9#SD&l6~7F>KE9H2P<^R?
z>HL@Tckgqlt2EsDOZ#2+OY`RZ-1~p`+0XNvJMWn>>zSKj46QsCc5)5nTRr9r@G@)_
zXLz(}r(I>(g_wMM?+5lDWAtscr5U6Ig`Y4rlv*<^7g*gK!DZAW$XCp`Vd>I}Kf#A8
zyt{YCED>8`syTCBUH_MM3ucCWygvT^TYdcNS37pqaya}xzVly=+pNbP((z`_<wbtE
zEdLkPGlchZ84IkaFJNF`;4JWnEM{Qf76xHPhFNnY7#LW6GeaUuB7A+UlJj%*5>xV%
zQuQiwL8=)TZ0suv5|gu2OB9k)(=+pImEP~(ucVNfVyhHx>TBRz;GCL~=}}db8eHWU
zl3bOYY?-2DZ>L~WVO5b^kegbPs8ErclUHn2VXFi-*D9~r3M8zrqySb@l5MNx2(nNi
z!Z$#{Ilm}X!Bo#oH`&m{K*8Kn&(O@k#MDqn!N|bST;IS{-@r`Qz|hLT!pg`{0Sc7t
z6l{u8(yW49+@N+9rKH&^Wt5Z@Sn2DRmzV368|&p4rRy77T3YHG80i}s=@zA==@wV!
zl_XZ^<`pZ$OmImpPA<wUD9OyvQvjKmn3P{yVymPCH8!;(6Jc&%F*KZ#^K<n<GWv#k
z1|UyCe4ASUcL9pZio^mGg(=B!bp@qGIbe6DB<rW<q~@mPl@#k6>KVcfEy&0($uG{x
z$6+*F8pVQ&+yY-;xE)yClA8;+qPWDhtOUR6;*iRMRQ;gT;{4L0<W$$P)Vva9B?V9j
zSOsKOq~=7pWag&k6=&w>*(w<tm|G&tMIy<Wnj3%uw!q4{C^fMp)g`ec)mF*Kz|cb1
zz*5)HIK<G*%Fxit$VAt`+{yr|)HgpRGp!P<Qe!Iv3lycPC5b7CC5Z$TrzBgY<rn29
zme?v~<|d}6>KEjtD=EMO)5<wNC%?!yKPA;x$rxGO)7d$oC_gPTC)HL7+6e^v$Jf`&
zGq1QLF)umQ)5TV)BDX*<Gc(01HO11vEIBzvH_^}{S=S^b$y_%vEzwlhz$nQy$<oZo
z(%d8!rr#w$xfB%rF#Rbc>h~|oOwY_q%puzxQ2qs_8!L~@;*$KLN@xxXPA!D8axxXb
zNyREL8I)UWm5LL~Qd5+mf|+FdA7W^EMrs~LT0o9qh_d4Rw370~qEyH9)VvZ{DG~t7
zBnr;X3L3tN$(ebeOrr^NTXIHXUV3VZt&+ZyzMX=NJ}93<{G|`hDo9=i%ORBsFy%om
zZXhPO_yQGWb_$3xGBq#7R;ftI-Y!k~_ZtQd1_n<T$B>F!TmHUnk~!I{%khZenP7_N
zt9kCX*>66-5k7bB&FbB%rJ}~lcX-n484Qg3dBUdV-Cr1_W8|6Uwqy2eM)v2-|KI;V
z|DRh+fXhWuz==h{rA0*X<vY3mijw;!?2Z*w{hP^OZZ%ir)E`Gd4i*N+jG7fp3<^vW
z|GZXt{a<9_SLt2Vb@vS)nHKz?cwc>=u7}W9)mD)w+>8vO*<~`{{{H^^TmSyQzxV%t
zyI%kM_5Q!N<NyEKzW>ix{raCzr@#OHyU_mK_QyM9zo)goO1$;RNI{R&<eqPg`12Qw
ze_NcdoA>vp&H3X$Yvw(FTxmD|`R6y!E9dR~zWI24b$|9VyE(}fe-vKoHKo0^$h-f3
z|DUJ7_3QusyS~5f|7-p4`Sm;N|DWFe{`>Fm`NzNC{<v25{+YZq^Q+7j4Qvt#AOAfP
z?P^eJ;CkfOm9Ss6@aE@PSJo^3Khz%~bf<pzf6W#py+>0fUutdsWcg=%`v0qc85ytm
zyuZLE5t3ZlA<*;R`lX0CpLqZ8u9{{k{Zid|JC2J8vToR`p&skva;J93ZjP7AVj@TH
zvU|$ky?eLaV_nj&?t&j5E_(8n%P03dJZBiQBKgOQLlvodN*@&cJ}RmlTd<=~v1CUw
z&$ZQOm##R$?r>G)+N_x`FP@ooFKV}`HD9j#?z>Ued1-5Jo?SE3bgT1O297B$2PVjS
zH99=*TQ}cTL9bG92KTkQdneDex$7_gT;hCseNElJA0L0d&X?c+_uI$U_Wxe*|Mz%)
z{r~>@KY!o<|MB~N{onQV|9;&*{`_<0w|licKeBGjFMM*A(PhcI;J=aoOj(l_a&R)N
zD(3z7Fspn0@>O52f3)VFJR#=uMk58L*zhM`PoE3d4bXV>%IlAl=&t`}-`rFk1l@J2
zf7Qrj?O$}BJz8QtV~4nKccaL8**>}Ys>)4S-aY?|?%5uS{nXuc#B9d&>kAq*ADL>i
zygtRbQ|{KU_D3qUwL$`Wc5nE3_f^`?<B!Gi#k1eXhEEkz=Q!fie>iXZ<J;Tw_t*S?
z3X12a`v3p^y?y`P_x*SK;`<&?UsfJ$JY(TX4Ht*1<?|QY|NdDyZ+`#h&y{ua9#{4~
zfB9T8y6M?MCxvhPYrigEHSK$7X!7x}-o@&5jG?A$f3IU;5i+;mRu)&j*~Ys_{%A~0
zym&~B!T#zR?G?xTm#u2Mwk9%r*{ZZ#XQDQjMPz@BHeC_9CuQwoHm1(EfzNiWyBa1v
zi@#i6rf}AUy?t5b<$TWz%B$oLzdY<NzqeYxZr<ZJHGPFvzdy){|Ejc`|M%mY{?9db
z$3M&bmD$@rv3(7Lkod(<SDjwxUK0&#Ug5wfs-Y=j@o<XZZ0q9YZ&iDh4&8AtGH(n}
z6bKh^5_OXKXv|slCbPqI)@iG6-}dm8$k+C~vwIP_setF#0)ENnkCNrPPD^~$KVVf_
zqnGn&)7u6n4T0}AaqCvCI4BY@qcba4N=j8A_5Xv^zcaFot#1{|9A7KjzIB=J!UbIo
z=L~B~-q(Jyt9qZ@bMd!??K|C)jN;v$cN^StnM6A`9}*}}TkE=_V}_IB*$Y1W&c>Nq
zTupvm{NE<`v!9>*yz}|w^X&8aD{DS|cDAek@qBrG-H*qY-JcgsI&+I7NiD0;)FpHa
zqgL7~H3P;3-mH%F3p}@c6c#lcd!DV%<Px*$&!Q{aG=;9Zu9+Lz^+ugJ?4p|>Q;4gv
zscW8jTH0DuQ{!CUwTkic4@qs|<lD+@(AA~Esyt0I^~m}~c`ugt2j956dqZ(am3Y}l
zNBiW3X_moXzs7hc$hDmi_S$Y6zNfBC@Via^oRWplcQUBovGn*JnBIDP(&Zanno1dZ
ziwai-X!INw7i#@i)A*&rz*XbpzgMLv?uZ0NKiYbv-`^qT`oj6|XZ&`n(tNkxb;%XO
zkm~Gza@U=V^NrJrC#uUdR%joNt$M%r`}3b~o^PIizVM!#y?g$>Ez#lOUlp62r?zw`
zZI=@*Jod14>+0$$TedFyWVM*jF06dxsZ6hXc5$n+j~vt7x!cjC-)4=N2gezqxsm&m
zr-iOQb!w9m|F8AWFT~yd?|t;I{Qm=Gw>Uqit;l@*u=3CBRWoO%XNs2dFg$+PciK-Y
zWn1=Dfgsn2*5r`luYNnHdNJ6ZstGSGnjj(>)X!si;-QR)RrQ(=8Ju(8-P2q%cfKE=
zmFuV1R~9=j><Ic48no@}-tgR%kROHT9gONv*`M;&_Rn27`HWdw*tv^o+$DScU1FCh
zHE4y4o7BIky3wM<R~#s6cxj4nMe+Wvb@qL~|JMC~`uqESo4TK;_3MA$zF+s#{@+{u
z|3BZ?|D9j||M&g>KhN*~|M_}+eN9b`-Ms#Lza`%N^xc%DzKHF})`-~q;h)xC+Z=gb
zC++`!ai2Y5-!?9|vpSgR|AOx^r=veC+Ylcwwn@%o;j^p~MFvr;MbQBrOo1^PlBub-
z&#hM1TytNSs(LqcBUi?e!>4aemf1P^iJ91YJ?nt9jr+59>AYKg?pe*L7gK5+H*R_I
z>Dc1kH|jqwbE&<#rT$a7o_M)wU_?Fh&BEi)-o4xV{rTtM&;+TsIsZO1XEj-Lud+)B
zagEx#amB32*K13!GWOiFl##R6TckZ(I^l|5(ARy>;wP+nyLhkI(VCD)@vr_Ug=?sY
zdrx~K{Jb*OWBOXt{1r|N!LC!jNm<7Q=ZY=e=XCay^JcrYj&oC;*7Rx1i$BZ$u+!V=
zh-`qdcxmAd;YA9eo34}}W45oE@q0nP;hEjNc5c}@bB}-6{Qdb&`OhW^(+o_06&N<G
z`Z<AHWZL%=X{Gl1I~mkEZ_Qw6d6%jyyK&myuq`VNxGG3(S6W}z*rQU^URc!O5-I=H
zz2<~Y(2T#nd#d{0E&eTW&awa0J=MCv)_v)BpDCzXO{nVGKJC@%pKUw2-8<J$ik+#r
zI$HGfwMm;5PM*!*W~{{Evo&(J{PFF-xBq@yxBqTlnM|3&{k=U0?T-EA5!hJ1*{;g`
zt?c1c3rTy+hc~?gT~f<7gglK2Vv1VCAS@yfY$z<+P*5%yC@5T*Rep$B^V6<FU-ocz
zte$&6*ws{lVYz~t()$qRqrMA-Ohewx2w`QOeyO4~a+{LXqCZ#fER0lLl<(mGCZoe^
z;<;MoRgdhfG%nrcO|g|Wa#37hp}8(8i;4HmHAUtmjsz|zF4NyU4$OO+l~3oVzdIDj
z-Lqoy;)>30RV5b1M_(_6vMkCL6txQ7&Cz<!;@twymgo5g-~5;x(AfPdPNlFZ^W);P
z#Vp)%r>?%`49K04@Rv1*J#?DpIq`<Y|FRs*TRL`C=d9S{;3j9@ym_8z;DwhAp^tAU
zv@W}NO7We^_Rq^4R7GAK+1a}O`nz|@U+U&P|I9Pzug&>;=kMJ%C@z+NcQ^DdyGT^W
znXeMwA0$FO7W+wGjG1P=$hS!-J3I7G;k~fcU9bDCR*1~vNNOtXc)vhc<&?4N>wS@%
z{xjC?-^%#^Q@{PO_m6)ud#yU)5j?{~`SsbZl{39$cfJ-_+_A91$0hUm=A?tSKQ*sj
zzG`b&bkn3K4ZC)9Th%(;tz|fTlW}=~?t*oSUb~KO^RhZ}v|3K9vEtfmagMvJ9kXUX
z`Y7ozSHOc|ZUfJEE$ggRBF7W#HoG{mFgd^T+<foEC2^z1jVmG-ZJW3%R)g!R<+59D
zGQuYFqn9}Q2sk*Xw^!xoZYkV-x9@l5|8H+^>)+r1|Lgkwf8MUI|G9nt-?#Vwzs>*u
zv%mh&@%#Tj&j0`8xPATKx7Y92)zsL`OMjniwqiBg`P|g%72ID$I(Dp?wp1%T>A1)G
zhJQ78;$HjhJNhGa@9S4W*EJ9P+*&wSCy$$}bxZi=H*3Ur6kZ5~Dl%W3&k>v(vNLJ#
zJ(Jd+M|FAozHZs*<;{D*>cNGrpZz?VgwCcm{HwXX^a%6P(3wJoa~&6+n^(mANpbqU
z59hzWdToC3>=C!F9F-Ji;|b3ruAciC@m25n&z$q}=bfW3r=^vdSZk<tG->E}aIFwf
zUE`JXI^Fv0yj5mvzPfy5-?abX9fM@8ZMn0f>JzNW&&VigKf9vYI6KGx`?VLhloj{C
z+jjPCed_%$>(^`4Mf_R6by|g%$SU1azkgh)SfqdX?X3fvw>K}noy{P^#~AuR>T0gp
z;YtH<9%<WWN1N@={I<B7!(*{_<M)Mu*HlE-<(SQpIqds4gG=_+;gdS=J~tiP?Xa)n
zr1unY2i*g|9bPZCTmSt~C5L128cn0+8l4qaHV6GRX7}K4h}Dp@?_c$XS?uJzW9-L2
z7@Ga5^AP*nT*BAiWnWNVu=VnTJH;hR+SiveOwgER8oqYcjc*fG?Odj=-Z;O%UD@p@
z!<jD|9^ZcZ@9k|+gZ}&P@4pLo%bGhkD|DSVteE#V=kHG&{%Sk_x3Rll-b~?{<GhRi
z6a&-y?#OK`R=HeV*DCrf@ex<(2F+QS!TGCvU0XMDgeWX%<Pt1g^U8R(@rBT5s)z6R
z+|_)<6gXY+ot~G2D{I(ev(_SyX<sjji|EbTF0yXJjrb5Tmz{G{?woIN5)opam|GWr
zBT3}G)9h*kHWt0b)-IZdb+U3**?DgH2wE3ASS?!hsO8v`Ae(vCf`-ygJrX7^Po*Bq
zw`;IJIx_bTOLyzMuhyo&Zls)ga{J*N=2aEb*cSx1`mH*r?dW;TN#XsJ9nBMVnjhPm
zeUXp(;*O5R6Mo+~ZpeONj>ug7$&qmd3v5=aaqWNLt;8pHcI~A2)yGcmR=n<_{rtg%
zoogPwnY)+S{D}qQ#!#gz>x;i#akoDH`D2ZZ-1hU23wPgrzkRc*@mx_W&ca<6nHCFJ
ztk|%IRU&tX%m4YpvrQL^>&sn~Ftm=9KbX90L9vp)$JwSA+jssi>$Q0A{V_33ar1wv
z`gIZ0|Nj-R+pu~0CN@{$M4|R0E;`X)SA}ldwookN8|NMaj%}I8+Ex@SI$A25<aIZB
z@)DM)`NC@tzI$WJ{`iC7`A&sj9~FXn9px8W&uy}Bba@~o!_8&L<|%qn<MyYPGlA!s
zo+-vWZ7AmO{X9e4C*^O9ylKbjH0Sl-OJ;w2#Jj+v+jY$~4X+IhUt<#<C`cVyYbw81
zcKhue_s{*_{`>Fkzu!)Oe_#Li>Gb`7pX%5DejWe+@BM$@=Kufk98~?+|1<9X^ZaJ>
z6O-$ONguPj`1jdO;x76Z$!mFEsq&^Q!=5`|UQ4W6RkPtm+-KE4Poz&iVp+2@xlc-{
zFWy<BL-WLq_eZyTGVK#P&{#9^oZ$B8vMm;h%@Vf4D@qsajB{NgdF(?=gPLAw`R$#X
zO(y-?yw2=#Poqoc{uNzCu{NuJ{V#H@UqAOy>VZx3c3OJ%mQV4q7H@cW>G#i?V+A%p
zwbHh|y5F#D;S!%G(adpI*i1v4Um1jMPJH$^MZSccn<4u0>7(MOi|Wp1-D{Nj{9H$8
zr<zcVu;x)I9%f;W8IPx1z1L4>bXY9Ww>_IH)H@(vZFXwe6eU(2m+d>t6IkM^@2;KW
zdBp$mzUDL!9i`KPV*5n`XLE40Grfy_Tq5UJyo-NFm!?$847sx|OS(0V%;8(HGPY_J
zgYvahH^vR2H^i=~aUC%}%CPn5lS__?L6YK~sT{F3YAc(SCp@_$q#89d^3LuHmd7+&
z?j0}Cck*NSwsW=E0sr(MzU!7=b&}3W2j5>j>`}xsHGttxcXf8su{nh=oETEi-Qp;}
zU^?M<{`TMBe{X+(e_#FoZ-0Nk{hfGxt!&q{^tq=C>~9rD^*@|G>s#J)PT8Weg9SbJ
z_Hws83O&rWUBEN9>!q@YKyYY);<6;KR+nZAb;Zf~Go%$1obJlaoKw9hY-Ph%;UIy>
zavxVrGSgBQnA}#Dp|I62aN*^(N23JKu4HzV^bXYsy?AYBkO$NH%^vHF>hrP-(m#It
z)G2$ovLSoXn^#WkmQyZV6G@TJ3%IDl`m`i8``+$gu8@Tx%qJI$lm@D6M9Hv(u{#|t
zGkmpj>5d5^S09BRcY8Ijp>_^K*NjdBsfjhL@0<dA1zCzy-%V9;(de~{cTPCfI<F{c
z<C<BCq2HdAo$L7FF)#4y{3&ZM@E^F%xinz0MW43CL#<QWHa*LIbh<@Cy5h(x$sJlp
z*53HR^kw6*`-jhOe!YC#x0`G3_C3yMSBjXqk(D!guYP~s^-ZbqUv1~g%!}DE&wlrA
z5l8KpvhOB^*7-c!;VbbfAa3f-tR)d8xA$I4xBF6(`1SQ|g%s1ZpNy3bKM`HC|Ihy2
zNBXVh;$=c3G+bDHKkYEND(0*o>0#WsH~jlAE6eq_Jre?V<?23Y-RZbXA);!{-qa2r
zx5D=aW8S=LHa#usb>T~M(fe~U_YTdK(R=sd&ZZ--E>ju3Hm@r$+h6r?>d`&=S#{S6
zTwOXUlJ3ssEAKt$JbB4xk5%UuhFof1yk_Ia+^@DrTusaq_oVWjiC$A!npC8eWw}g%
zIb`Wgj-U@0`)>dJ_V)JM+uLuyot}UH|F6^W|3LWL^!@*yf8YQA@BX^K>*MQxfBg8<
z;{4`*g|FxCe&5PFTjBJ*iiE4L11G$<<<w6t-gxg>z{ja|MVdE@&OcGpRs5f@(dePb
zL~q`y%fo~|wW!P~&3dLD|04gU)&>plkjC)a%@*pDBqNWjyeuxsV@Qgb>X7N6``ISa
zcgpp3Q9MEyt{0xLy!6aZa@mL2^ZO?@esnUL_Ihs&i>H(M+S9jt**5jtwD)hef2*0s
z7UAV}C1kDmR%h1<B6k%ur4BI8U;4oycypfFyk}*b_&?rY%bqba)l94`{#9p0$@KEc
zFQ%SZxbsO2&-A{6jc>QTa|~#{d$cd+uIr+yK`+V-3l@p2V_f^}iqcQjM};10hV9lj
z`D$`ih21-Bg*)e{Dy{n&ttQT<r<BxIbIoR5uJ5Ea6??}J)u;{2B-#?KJWJWWO4z>o
z7xz4M-6j2rzcns2y>APxa=q)hTxiD8IRbZ{_Xh3@_kF*4UwNVaC*M__O-670cKs-@
zI=<lhf=4c$@?UG`9V&ctN&IM}!SR6CdiqCPo)tzNHg~*~F?H3!GYcaFVs1#Cxf12@
z?2MelwrI)s`5ghXZ@w-2Q*{2<zuRwb@4d@sJxfked1lC)w7gq7oUy-Z>=wAFmn2B+
z?s`3eakq}p)JZ&?q9FoWTv`c%_nVg)=zNHa5PAPft66Y&Ca*>3jE?Q{?q6I|Lfr+t
z8w7hdI`5j$_v4t;($z<=M4Pbf6pKIUdBMqSD))(4Guglo3O28gSVca29LN-y6XnM!
zkfLX9I92^_?3@g?pfg2%0?H*qNgPf=UAC+2nRyzFD+RwQi7r`Xs9|leVCRISqv^Aj
zOp^%bFyYc)r>c7HmhFt3qpJjkKTXRKSLvEik+7n1;fiGCyE=Y0txCEHzvr@Sc`$cx
z$}NXKhR1>qcEtofdnHiDV!yADVaXz;kibLSjF0ZM`TmmLe%-J=Ch*$nt)aUUyjMS~
z*36ptU|RM2*!iD+DxQ6lA?!2htC*XY#ucB$RctHRemMHn-Y$?|tfIg8d)>E-)mMJM
zOK#z^G~K%L!5)uXGlR_8u7#iOJUekDo%_k=2C>2}HhJTx@tc#D2kps_&Oh;e_o|1i
zE0{mrU;S<}TNvw7P9K+rVV7%9h|Eg3v>-my{s8NXyZz4(-Aq~6vQx-)j?pUTc^h^I
zvpv*XcvE)DL!-9&cI?*;7nb#l8s&$7cU)f0bIO6gY>SvGpUcTy?<cn0E2HKmBxrL5
zG@p4cq`twzLgoWwJKv7P9z&V1Vg<q8qWdq|?k>2#a!ybAsqc%f>YFt0<eu{4axjm3
z^U4mUvu`ZkTzgY?<1MK7{&#x(yZiM&znzZX_xIEF_<ujYzyJSd{r$S%Umrhy{Q2Y0
z4y$?=-j7y2Nktj)uT=BuBd&TY=-s&&vhKoF*8L{>bDosyI4o(iEdKaFmu>I=yxEd#
z?^TEW4ECMeZ#P5Myv!s28vEw7*&m!IT`lq3+!D8=Xv%!M+Y)Q<s_w9LeDBB*IEz=m
zHNLSl?78Q<yR*G-9+>O2e~QA<?Ui$<oiSZ>`}XWoDYZ<NqPI3?DOq+mHa@@9{(ZCk
z7v>9T*Cx%F^rR%b^oZap&0;o|q@xSO9lEz%p8YF$Z@K+$3-QC1?QbW~;g3A6etm||
zALhx9+x!;37Fd(PdVP`n>NCef<LjoLJ(Cqzw({~WW5f4cafhlhLwVTz{%C1eN=|>g
zdQaBlW1_RQzFi7X3RdJ3k1*3ROK5Pqa%I-`#)}Eg(WkW+1zevb*ZO4PheIOz&zohQ
zRBk<4G240Gq#52*b{D@sc;#UJqTK=q+9VskYkcI+=l{B9`2w@0f$#3TFxQ;#>v!qb
z^~2A5rn7uMzD+CalxY8rXX|3Tmrf~KDId6Mqohvj66J^=(_SUryyL~S!g$4tw;A{I
zw!gohcRzpo`|r2c-pzY^%_L-fSdfdw;?NGwX_<x>7qsm9qIfH%q+)s3x-zBsN&_o(
zewV3inQYAwBEL?G#1#1WookL+ebm(JAy-KAY!}n4oVI;WtZ#6bI4XU=HKl~_Pt&|f
z!SX>F7iQG$;z(VgVs6i%JE`@_FPS~hLgxH-vDf-9!&e~a%OVh7*uHv)Xi-{;cy^A6
zdXIwZ)DkJhWv4<`afK9}D!lnJ)Jgq>W4jbnXbUG>^9SKnOOHOz(98?v8_ou9Y+P$9
z#dplw<h(=6v^^XtN_;HsGo#-85jPZVbuw8udCHg8b!VL!7%bD??vu%hnKaF4XWj1y
zAH1{@PyTiI{(v#{>&_SMGuHQJhq>tpw63m;n$@gres$xicI}J^VSRa}NB$p6PF`T?
zfA(7BQvWl%UoY0mGMdIlOq|^Q*hjZ|%2(wryB8M7Cr-cYE`M29-6CtwnT-=-)-Zm0
zGkIBLpy)5Vy_O+|<BoOCx$X0k-KASy(P!I%>zml>9WwsqPyN0k*){d6bl4<@ra;G2
zD$jG|tHY1ouUBN<b9UcW9bcChvY&q)i`VX)(7?pM&adW6VcX_IPmXqU{FZ1|dDODt
zvUm4+)hV-H1b;jhAsnRXdE>~<F5NSc#TQ;WwQ$|5J;5N7#21u(WAn>JDzW}7d^3E`
z%A2uXn%)#=cJKSteWuTybix<i*>q;s%}hq+g{HQ7$N%1Y_x*9*{@UM9zx|z_56Z!B
z<@f*lbw0lSHz@bMJ>K7c{`~RJmEQzszbm<`bwX(+Pk3fuNU8qyXa7P^nn@o!$sXyZ
zcH{ZcJEDElyS{zuG#BdYR5ti&5`MX7!(7h%84eHiV;mwE<a|{&y%4_6a+X*+Pj~IE
zT~+gcX>lwRJHKn?FA-tUi3&Hb2D5KrRFu12$M8RDeZ|j@Z&EEjC7k>FWo!Bk6<+Bt
z#Xsxx)7Yn5vY8z;&B}k9{Gd|Ht|@B%IwMcJaQ;(ulS+e+&Jmd%wBlTUqS?Ij3$<#O
z@N?^P%WH;S4Lz)XGGf&y^MY@iJ~OV5J#98Ig7tUYo%jk%(_Pb9@4Q>V*UddK%R6xD
z!%ZCgJ2T1+_4P!y@(LU?v%hJWq-U2OxO`RBhX+of<=lZSJcl@nE1G#{tSn@@ZkP00
zd-0+x6O*;3FTYm*Z&u|zFG-~pYZe~y+cNuTWLQe3pHYysHCwU6i(vlOhsEa>USPL8
z{%p0ZMpDtwha2W}Y};5c^?=dr9g&aT-xA%(m;`P6~;&cUnQt_n?!5PkHyV*UEH
zP0OmXzrDS;uQvPp?LwKi)0Vm!Z%JWd7rnshr%^02<<_$m?>%|4%4hJDe!P;J9L&}h
z=b@UD!nM<%Tei1L_@S^+4rAJR$rW$TZJr+5m3c$iG--*-q&VTDPkPx)<}w9*4_eM_
z^yyGl)hQN5CDyK8ua9y)yZ2(BaMM&?*L8>1`fxP4%CVJi+{o9V&3?ouiurWbMa^@G
zQ^YE|Tt8p;HR*po=h6+6(u*5dJa0>ehhBO#<Gl4#-d7%ur#tGb;-(~-e(w}Xv|kt_
z=Fj_DbMEGlG}HN)_U>4B<J<exn}$EX$4&T=DkisQ{*lEIZ;o7$Qpz~RSbFSp*~<g*
zCv3j@J`UU2F{#gSmS$YVp9fFZ+`m1^D#A2edHvRd^L2a!k0t5ZaGM4fyo+`!>M?W6
zKVI0A_;$mVyj6B9e#r>-Jls5wfBBg&8@=w_yL_#9zs}a(iDC}7J!dlY9++uszWwLI
zqpbx&%U3jQcK-I)C!zDg$ID6c^3}dQIjZ@fUt{NcgPId%|3V%MrmxLA5+1XgPiOkx
z+CIC5S1i?it$5;AoL_SKs))u+(TBfP<y_{b?Ei6M?SbkH<xTteC)_)^Voz=5fg=Yt
zJKT|3(cSE7(zGgly6x4J%b(NRKNd}_-k>(~(Qj7&z;gR(nSxTxA)*~o6H6{{3|zHb
z%jSVo;X^k^x4b#OZMBXwNi5-<y@+?C^qDUiZ|~iDoAEt*cYXHv--W;HcK`kK_P74`
z{QG&^^N)Yb&fZ^JQ)f5t;MJLPBJ?gj@s~TZx{&X_&x9~tTfvoEOpYj(_+C2KU3#ES
zsWx(Zh316v7U`zBUM8MxYpi!k_4Xd=KNPpzW>%7e@1j%j<w|cnM7%_f#c(_3pHN)O
zZhcub@w`yUj?>{^FMXOGz<TsjzUqEUbID6q{u7>mZ{@L+U1?Bi^Yixl?_5{Y->(tR
zdGlJ9r~JF!u6Y5$zc&=bWv0q8i#vq6^PHNnOhsVw0iMvSvv`eHDCvBA^QCx0yR+_f
zg^btX<riOXTQE&r?OD%?wmsbu|8MCfe~#L*CZl(;*Xn)zm65XZIQ|%B$(;P`FLca#
zqUPMHxm$m_-Tq^g#JgwC^>@j=pRYZP=9@HW&GQvbldQyC54{O8U$xWgLXc^7kX9?_
z$-?#Tm(Fn02r6@u*Kl9qt9h)`+VE@b&8Hq#(R|MqN`AVbaPfSw!C$+{%Z>M(`1#Uk
zu8W-CHG@rVXW6rs-%Qy){l>Q>*+T7i=4t23RF2*{_e(Ayb-|AS!LJ``!g?>+Pyg`h
zgocx0Ms|_)mmLd_x;L9HSt6tBU2(DEwM0ml)K$Jf?z+`8E*v@gSH{`TUD-C)X4%A-
zlhSl~?mY{izhaMp?BQk3>-A-y{xe*A$TgGqjdb<lW#6ZBJu0n`S#HfebJ82T^1ueY
z5Rn=+EhqL+gN^rIdQYCraNk^U{d7~s>mlCOOo#5XZFUlME!A5+_sr7cY+cb?dKi7T
zJviF`rhe0;O*hV7&Q_mcvQc40h^p^dPv&UPDeQ-v)~;Lh+~l!Q*0X&~+3y)1M8>)7
zcc1yG(Y5sCCNs0?E0sH+Z)i2_IJb{;lZ)Z?Z!erQZ{A*Ub@r#FEO&J7?TE=~+Og)*
z;f=g^74>8OpDq!a!82>so4`=H|1NJm&nYRYaqP90ll>HNT6JUd#rMZ+WpaB8d0yW&
zQT)SQr=L*x+Fd^VeR9v|nO|!quI<uyPRcWRn!VP>GMUlyghXcIoUWL4`ja*G1<iF#
z)=|)?_Er-#&s1EJ{A+svOZ?T|Kelh*`oBE=C${aJ=sDq~FQylN*YZ_ax2}5n(`kpk
ztgk(_w4&xB=cCdU0@F6n(pxW5Za+8C=-S-ol_^KJ{G9hZZjOSqK=HxTlP7sDylxV*
zf-C*zH~qy+?yOLFR8Z)oE7{|*dYVb<Riho|p`kN21vsAzGFmUWze3>Q^UI=fwf6(Q
zdMfnk`5ccAWmuMxb#}{4Ip2Qg{=>Ht%OZBKl+7ydUcvGEV&D1WpKqSuutuQmci6<3
zpG^Gu>t-B~mcPFC@R3cc51q5poRX}@vnTe<*PEX2wJw^amL4lnTXr%$^4*`?>sN1@
zH$h^d+C;~+AZ8DzXces$Pn{+2RdlUclkD|+PSC0YdP4V_R-H@W*V(7}>gK&a=O*NB
zn6YQI>y1S*wOgjydY%<u#}^U4?SGodFR#Rt+{-o@s&3!CA)i_DK*Iw!yLnbG&6auR
zWcvl1cDOHRUL}zA$Zf{Yo2EY}-O_!bQai`-N@3aj2TZT?{-o$tZY(;cdZ}@}`X#%w
zH~c4=L{=&?#BTUH=ebVdO!t|u9ZpyZ{pm?Nqt9of!@p>*i%I-$rl)5WFUtJA_s-_!
z#o+74F-D>-lix&&u3xpv^S0VYN3j!a%6FHk)+(xMDg-evHrn^_%fiT}n@SAruB?)=
zkrsg#hx_ie_un&4RA>AxsHLICyuaed-XBv_AM6#pYOy|w=Zrww_D!m{G*o#GKEJ(c
zf6dgRh0PyM+o)^(-VqphihafW(9KC*OE%s%=3G>mmBjv5EiGiR)IvVxNuL)lYwxgR
zWwhmdammAW);wSS$!xPWTrA~z`B)~oM<GYx$(PK&>0h(9$MkK<Y?!2-`?Jz#)BRO%
z)bkiaO}ex01hVug91~w(DssPQ*PbIiU0h$@d1+mikGDxz;uGX=^AQMs^`$^8JGpWH
zJEQpgcMG+n5|Vbz78QQkysUs%)?J|ViqEX>=!u`All~u^Z@M^3Sa9nx#Z_iM)84sO
zaIdt~e7$ht?JaUloCloMM6<(FI~RUf)gL`!o1PoDrjS~8oODUdXUX4_C%Mmm*Sd2f
zd!4@i=d*JyV=P`svpu-;ue^Wy_iI*Hbt`;l_kKuVQZi)NcYCJTI{ENe#`_;$hTP&Z
z4?8#CVTSR^hc>QHG?zvd_dVSE{qw&)bHCp%w9a!sbJpjq0rQeqt6c6K?3pid_`KaY
z<BT7Q?{0qDF~@!)`x^fMm4+vVfn4b!KM&u%bNA>}skv2k7kj_G*_fHSpIda{q-4KS
zfo`q&{kH#Ba&r7%vE#dVV`BhIn9Y0f8}s4={;Xcko*Eob&${{9iO{(J4|h)uN!=oR
z*l@+W#k;>8-2Ccy!4JVMv1)^J8KvhJX?-pBIsR5FY3`!z?#8q&S2pc97<Y@QVOh3^
z?Btf#^n{uxg^sP$HM1mM2TnWm+}p6f{8Q(2ck%er$rINLUO1XEz5NZ#Vu=}^(-(6|
zC%kM@Vav^0W?ynL=D0$rftK&fwM?I1XYZDJloPzW=-;dpS{qt@YdS4{e%focXRguD
z-aB)aWw9TQTmE)8*9MbS5nZ80kJjkLiQS8PJT2kb5vNsts{7xlOg)#+GNo(bKL4ZA
zJ3CgWK9i_FDgNc$!^k|<7Mn~%>&EKO`=1~6E{iOETYuAMZc>f!zNRx14YsL^g(saa
z;eFKl(n+!Q%Rd|WiN90Mo-rxDf9ApEsjl5q7dg%7lDd24d`VpRW{r1WEBS)w9WFh=
z!I6Dy!>>ta;wt8x6Vb2!b}Dn@$3sW%AKJSum`D1=|D>#~J14zMmowk!e!?@<kv;op
z!|v-->qH#`%lOxA*m--3hQ7}qam_Q&4nLFY`8sDefBK;(mI`v3;d3|UZ%He8cw#}r
zl=jLAyq6b*n)lDp%`>|4-}p}MV}U8ouG5;m*SMvhIsEyIx9sgN2fy@eJn5Nk_&NB>
zyUV8v<9M!WePk}5e{M13suv=YP9-h&E!WwziS_cTy-!ws{3`fFW=*`a?6Q~Klm9v0
z;hcO$@s&$?$#csLHM1vY=En7EC9c$GE1h|DZl7~L@A8w0HX(kK*|gd&`Kv`UpW%(Z
zDkvm(Cfw4!(NVm!+feNOjqHd?hvz+&Q5AJv8X&B^)N$o(ZOyAaMhde$0yoGMo$0wF
z7Wl5{Ne9od2^ZTJm}c32e<1bxl$%x&`}9MXYY%^Feqtf}%RKe_5gv9{zwjOW#yh;T
z7p&f1^y=}d8)tN<Ulvdeoz3vdIppOGuCUO%It6PwOJ-jCD{)DY&8n6o#$cLWnw41Y
z3$u@0A-=co?J$}e>dKm^YSxjN5Vca*Z+FL*S(cwmjp}ATP1-E|UF1|%QIr0@Qk9><
zucxJ+UtBQ%+TU)GEkC~O*33|y^6u5fsok6Zr=4}USNol_Omy+gch6@=`{hVEE~>Z7
zKmWPRy6>^rl^x5(f{kY%mo%<B<bCaNa)s2(IWO10*eAj@J8ee%!H-;Ae1GiHZqAdw
zD-*J6w_3x(51aJnE}3#EQf+QS>7J#Rm@c^TR<wV8;J?+a{>8SRhdw7B+kEiUsz=_n
z=jWeb-Vx|^<k`itjcrfn+|+y!XE#;cTkL+yj!forMmGw(xWo2^-bi>BuOqUbr!v)_
z|G}|0cTakJv<xj(lk3*DUski@a+8DfmV<m-oo_D`Y-VN-Sy57`!Fpthh37M+Pp<_2
ze7?yQ_u|L(xZ0_i4Xy&m7iApXIpLhUq_kv<%7%`~ev*26j%z|f=hRMU=KJws#+4#V
z-m?b}|559jztVn^qpfSZwy48Bk+}EC5#jd_bX%)T;|kN8(l()ELiU5!-cNNm-zkVU
zGc@k@G5@gBFn3zs)GRjfCPtI$g4yn8^46&|=BOqelFP|eIL7;P(%tma1rH;xB+cQt
zX8Ne>_v-rOCg*)|XXk6Y?+kDLCwh3h(#dNU)85@XzplcDYj^mPl7jx3sW-J47^ZrK
ze$^GY)^9iOxxwo!Xa6q;<sO&q^zqvGJegyCbbjB<MW>yPxVQGN&HqxF<CSwp!m!gT
zE1TuS236g+Z}wa|ygYT1hD%bBt&3=%={o+n^ADa><T~f<^O&-sUE-W|L^5lh+Ue-D
z6rq^5-yiH|h?extIR2sMv8F@GD$XO*CR9EX$*r3)`Ck0-??+y*&p3N(VSll);!lki
zwwm9SO|wHkz8Aal<-?47t@Rt-t3{vcJjxEdSK@l;#j(sQoYUi`|K~E^!L>TQbACfb
z?ZGuK13ULtzO7a6C=3p4Ntd+pFMbgr#H;VDA>Nr$b#?YCqfOIXG_M#Qk?vPp`_aJ0
zcb1p2<&|^K{wc&*d{Uk1qpfMG8+m5a*2Nh@@isHGBo-a6eR6ke%$A88CRs)<>~PG!
zu;Ac@xdpSb47ScxySV$jOQ4>O%R;?Gi3eIAGQwg-9$lH#*R-hX(5~Yp27D7L<n@;%
zUP)^8TI!In`n}=ucNaL+C%zR`s=Ub--4(LDW7q86TNi0*K8*V5B+Sd{z_?0FB-?xC
zS`M!Z4H}j%j`B-CJxY>Y&u8bCprf!qW`b|ygY4Ymjl1q0J@ag}e$?Yfv)?s+n>#=1
z7{jW|U3HVWe}?|u=XL#b<$<G@67QrdcgXtBJNoy{RWZ+6`Tn@(tTeV0o6i1Wx_)By
zvpn7vwI5RS8~uMhtodD5ZI{3OccIMj!aIFsGfXdsJ!5zo<B$}TzK5;T@my!`Wa+PR
z*RQN!ou%OWHa*~JNW(kTy6|w>RebVtEA-7fRa~zyh0poB^Mr;tk7p8x=%-_5&&$ue
zyVvk|fAAyWZiZbO{#=+krT6hKuV2C2_|5O?n$1m+(VKlf<!7I<TiWSAHR><YIgh+P
zQ+2c1J@3~U)^k2DSH*tIDNCDWqEyZ69XDg`_dnMf{Cwj*-)&uSG)d6={*_s>?OUDu
z59fV<yhHx2L9UkC%>2X_r!_a6W-nh-qnY{hn&qeEC3C~y?RF8~al1I<*pqEeYaHFr
zBvo%vK4x+Jh2>hVcd_3Ke+7j0x+^=><gJX@J$Ffgcy$cZBJ<SSrW4-A^84@GsdQb{
z=Z}tZ()Y8Rr+U<S+haKHFexiqPkb!uHRENM$Mw|-K}vOjPJ5TQrrEqJa`tk%wMRWH
zN$1?=6t2c?y~f|4Yi@`>Z>-<*ce+f%si4gvJYvf(K6m8^P47E*f-^4d)Q+B2DT^nZ
z^vjbGH2QHwxjiUC@Z+ii)+;`rr|7e)2<FXvmA35J;!@+S#sWt_+N^Az`$pA2UM*PT
zq5bQlr}ZY^W8V1Puj8fA1jX5}74;X{O??=+Jc`lnl<g8r&ljs7pGq^YR9IQ)ZI~B%
zv#>n0+VRedeHo3Dn~#{c6$WK}pULOHaQW-#kn6QqPTaE0T-bX5Tu1AqN6&&8Iz0l?
zyj~WiL~%bbt*-0;reXA#<)v+~>fR$ci+B46cTK!?>#U`fOz-lh`i<F3GIE0FH*ffL
zJ;${8lEAXiT@U`4s|o6FwbgUHVi|Yo(3{IeaUy4Wx_`Q^n=JaQBKVx*)UWRP%m2B_
zI~mJOP|LWS;x+paN8|1vii;m+6iM}|xnI70_TBNff9(Ev@@)!s+hVB7;%FRN+M%6U
zpu08uK&0G~;)_!{()b##{?_4k*>K>us!zv_r4yyE2pke^4hcP9yZ>D86dvORT{G%-
z8|6N==_z^<x6(hz=9J}9eZNOqPdC5wxqW#;QOm4S=HH6hS+fov*!;dUz5Cj!Sm{@+
zmsg5h6nS&;5r@jcg+76tc1O-=e6b0-vLJZRQ!P7-DMpW;@aWG`loS?==6IMmq5f>+
z^E-A)?cq8V5$dx7(vIoHpFMZu*|UO$ht#i@a!r;v(|yLNn>nC(vvOHP_o48m%l1g^
z7OcFpSLEa7HILP@*6dlu_4DY_gEOKSk8aZ5Q*`R!`m&pUl=v5Ho^LxPd*f|e`R%{|
zmhG3{{=3lnono$ec2?Bk`$w;OmWTSAuPZwJa7Of#Pn?s39CGw$iZ4+3Z0?j|a;)I}
z;pjg>j~DMgaQS|jS}W^sEsIvEM$x7Sh9f*%ecJ_Mgcko3EuM3~wd?1ua4~T)Mm8P3
zzvnHSjEx_6SLJSOuATPddY=A_3o}mUi7tv1IUsd<nLtwCp{ZiKRIYbLe4aEvp)*2F
ze4Rwk9i9(LiENur^j7j^Sj_RgF*C6wjd$(M!adi%zn#(NE0`nCn9VlVrTNnQhVIuN
zxy)iUnEp@F4ElO$*GyHGB4Iff=ZeMi?atNR+joEe*XNaX=bt~e$$xCKyIh=|<@`bc
zo*+R<rq8=Rzv9?%Qm*jt<Z#_LSF1ml?-Jkl>d<<>n)qX3n>)U0<z*f<xgBvsTKsov
zu;xp-tatqDW~TFnJU0BQ^`hv~LC=6-0b?J7dCW(TU5?RR=C`Q&&%(r+6G9>iBu!&O
zbS&?&1+ehk4ED(qoBwoXa@Fe(-M30^S)UTGKI>cCrMh*BZ`e~kCA(i1g-&*tO=4#s
zxuA8n*z|ZE!_1RsEe~DoS$NfMfz-an3&CfPZQjk7w1Izmpt?|lZtg~%GX;vmADh0q
z?~z!wZ-H^{wYWMKZblLQySp0-_G&a9NZqh&=h3#GcMIYzW2%E@zlp1~E}L}t#LIn_
z?%by*R9$xaF->Bo#I))cM<@M0#8dtD&GPwo-Jid(W4o#Bv?i@==blpS2iK>h@$E9Z
zE-G~T{5(;){a4?1UcTJ^o9T`11;h7;^>cn_>O6eeVX6IF%5cGk%5x<@f1XS@cE0C)
z=-<z*H*Yap>RGc~OIY=cVe^jj+w>LxJiR@ol8tH46`6Av-H(=VWlo&(*xABm)`5vh
zzK+66%~UdbFTHrwcCo;^VE4m4TR-0}tb6^pxM%Xj*GlhXZk~71o5bYQdN}r|p!_^J
z;Rg1@A?$ZI^Yw0FG3*LDbCKn)>6YJ4eI{u$Z!KZm&^>F$bB3O$=F@J!<k>Q(D{ZQ3
z-Z>@K$rUFv7Y8To)L*{zC`bH(75a_^z7M}^N=<*c)<L6|lV_9HMU@A>;r@nEcFeOT
zX>AsBO&7Yh>g$CZ)z&3989P7v%qkR4H*sfL`n}`Cm4p=KqSen%-SINs6Q$|$R#<wL
zxc2SkI_{e<&f;+0{X;ItJLdAf`rE%11o#A;19aS&W=7g*<j+tK=V+SDDaG=`=gzt<
zTKlXLCPW|A+a$eFH@Y{x^zFRg?fL=#zU@5vbNT6?Z&J@(J^#ECw9BHbTITp$n_Fks
zY`u0Rt*N4*RLxV--D%FbJ&6V-l2daQhsB-YntuLP(o2=M`}!4LRY)h_kT@*LoYcS_
zv(;$grpAs3+2=exbB}54+Zfxx@~`~vJN+c9Q%4WJI$Eyed#P^!jz3<D^e&(GO%!N3
zwW}?{$amjUiCh~gPxla^R}u$9XS105D&`iJHi$|t+V}d?smHIKLauUfo!OaQH~Yvl
zEpg6__DE;zEvb?D?w8AETuslJG)Z}b!Q^|!o7eSCG{5F{$3o-3(<H-+vkPX37n^O|
zn6pOa#MLviwl0>v-*>xmcfEC9|Ko2N&nsUiW{Dg23P^r=SisV8KILMa;@x8H>~&v$
z-%pp@`&!hJxA}w7^rje17VEg(o-fQQzNfC*Y9v{9_T^UThp#&f+S81>k{&i5T)j$p
zQB{eUx@fL63xE8{WyaNMntfsUM|?Zo#4o0B`^f*6T|Y_Nw(|ecd;9!M4VE96`9tfT
z-~9!hALC~k?1|zjI<)qkjFH*8S(ztKK3;fQZz1D}0=Wpuwb?B07tbCMxt73Y`hauB
zW0@wO**EsgS+>|^>Kx6a>EfRP*w*^}UagU(n;Yq*wK@N}-jx&c`_qH>ba)oW9FTb8
z8@x11Win&Pq)pd5WxiZ}xz(a&_v8K7rqAYD-+3c>5^wqxuc_B+SrYVQ4jsJr`S9F(
zFPTdoCKPVCA30a|`rB0ZEs0+rei3<h|E5=k>gjJ=rrEiFYWDJt4z;Vzx_)TEdYK)Q
z<@RXI(s{>gQqeQ@`&8HcD$H?P-#qJDGVhl_&guFru6T0~ozAsvG0w5RN2fiv%59#u
zXHvZR+ABAU6Xs+~xnBsq^TIbn%lC$&vw;ZT;#I3wbV$h79mwBt_sbsH$G_Ls%RiQR
zT=M?%gcXktI~gcQw!Lt+aA>zsI%LjLFDkIi?d`qx8P5D&mOV+%XIw4)Hcr0Q&ij3%
z%-*^0Lb5m5g)A+Xl>5S#+FP~gbKqjhg*m=kJEOXK76v^Hy*R(*;8~Zm&re&`@JI3%
zcfULuSMPYD{HueHZb;gvk8|21b)pIvhL&0;Jzp?yNyKCUXNS;7Z8}DiLe~2QT*+B+
zN}^Bko~6TkkKjLN7AB<3xx7%J=+S~!-naAOCpWHZ%HTdSbKdXJkIBreX=yRlj;SiD
zBCDhvIDHR3;jaw8sOS8^s6}A)>e(Wz)}N8cQ(RnR`btM?ad!sG-oNvVmARI!FSjjD
zG{}A-^!)Sc+mGd)E-mS?Q`>j3+Wvf@%=5R``XA@yvYt?#tIM_iW$+2EX?HiLnNJK|
zBz41WzWANGo2Sp@CGzyy?fSaz+SZx7P6cQfI9)Y~(P-n*d(vgmKFKgE@rO5~mG{jx
z8d{+R+as!9F8o(AbN_;~;v6r(pDo*Wt9YMP5OaJwON1BiezW4jW3QfYEM3!aXyVF=
zOV}@0M#}`R+qGB5b>5Pr?@U62YIV+U*r4`7(8~Bg;wryG0zWUEp2~0FDpbU`b@A4;
zw-?f?{+G=-I7@DAq;aT^^Ta>)%d(Fidw7N^zS*xi{OYEfgR|q;bbXUPUhZ}0i+S|I
zqQF}^{_@>@-!tAu@3!sx{rTGY=Rdc6z9ROIBl7-o>8)0^6}w6-)OWXBzVMtor_A(V
zrN7Cc^JxOcbJBlriT8dK`s4K#9kKj;t8+fh-XevIs<hYXpEKQY^Y)$AcgoL?ewLCp
zUazw3ndyn|JSHbs&FoG#bf0CqIF51GjVbdlF9>#BU$;4VO5m~U$GHV|R!mYqmcO3m
z+s)wkMbk^XGPa3%e7{sV?fZvWyu9ka8_pz@O=WoR>ilfq>^w(NnS<v!{~0^3a9r(w
z{`i{y5Q)|E8J(;1b}C+(`9|62cjw6ft<qa_7^c3g<}o*6vZ-iv58+_nUg#myEPSTd
z_CB}Ism%&^-*<RlIB?x=+76XnoR-@HKI|#ToNeXQeo$56lj!_MUo^dYgY-6hvdKE2
zSoiCMki5!~<~YWM*OWKS<#$nB^FVB+GHdR}G}FV@-N{k<3#y+TDOq0pMRg{xLj9DJ
zd(U4gyr@~b%Ywz$?bL+2N$DwV&(FnayKcP9K4o`+irCugN#T1$ny3Gsw5Im1%>U)q
znOEOMNic7BubA0(RVDWahitN*SNjS_pCchr&p9m*2ww_m>sUI|CgIYefB?B``L$mR
z_wm1<$?9O`UZL;5{s`B(uNohd4JIch^kgtDKmB516USzymvi4<oPR!%Ki}Wp{%=ju
zoO|c(jujZmZOmBQ{#)W!``qR8JhwRM9q#FvTefHsTlb@VovAY#RvhHHea+<JWS=yy
z?AjGR);+J{ulH=0F<|DOS+`O9qvCeQ<=kf{h)Ftb$xG^8=C-;>?>kpejYoOV-L<AW
zI1{dMF-0>~h;}rJ^lp(4eVKVs@U>`wkm!}k&qY6FUvRzBn&7hJB#${K+jEm!mv}6u
zPTclOP-ZrVYv?pzv3HS2_eL70FgYEQ%8oHGF1>tjcX?=!T(rM_&;#*e*Jpf3*DDKs
zx9K;T!1he`F3<599G-uZdwq9${Q1~$wlOvA=Y;iYTEV*Ce^*_|42e`|pTB%=x_zJW
zshJL;r*E7U(aP$+8!C9p=<|h%i~hZwXf}0={sM6^1EIkE(k=IouXHYC63a_5_G}U2
z({B8{!gI;t3`3tIwxO@3&vmRWJiA({dGfPAS-U?STi7aoX433`rBnNVFrS_~ziO|}
z?^oLIlb)ZuHsjFxz)1b3SO>=ItpD@93pbyO&6Jy5zyDm#%sE^Y8aYKh^2f4YiOzkw
z?~3-p<=Jburma=%__}s$SwzOR{^&m!nO^&Tu1}kGPbAXo+yjMEpG*(zKE)7jdBpqN
zXYSzbXP&&|j$Got>St(9utlV%3}f%K?`dz--sT;Ty`AYk-7sNdY|^U3$-j7vdo;Ed
zt&mVuoFRGV&#K#%|F`_xa-lvlePe0z#EWLh8E>XqFi+OzpS$X1da-Ks)cU2*AFbYb
z`KxLr=af5#owQsx&bqcbXq734i1Rj~`{J_p+k@hrns~%KC5qITE!@v2eCk$++?KX1
z;zpD8Db|c=3DIofkInCTO3DM5MAgKZ=?QPH720_rHIGa0X6UJcXQ2ht?=4RF+jFy~
z#jZ#=;i_|}k)zIU4>oQ4J>I7p)milV>_VQtQM&tkg+X;pN1f5^b)S5WbWNX`C^YZe
z#@1UEHf%z7*DIeD)12<puw!9z=Ar#L4S_TEDSN(iek1fo?SRj@8833)91qF2++J~{
zqvg}5UWu2|^OwvqJiJJv$p7m(h4vpOUajxopYh5}?Q2hD?nl=XBDK|%UO)aH#8JDX
zL0w&4H7iwdZqn8#cUAiQ=i7G02OP2p4mq{li04h}=SH5q@GhB<^qe_5b?40f&wnNo
zu|;j45wmw{fXIT@JNt{3d{0(KPMvt#Wm(($IdO(|Hp`EDg;w%O-I(H@-Yhf0dFg7C
ziHs&!GdX#f9fGra?T&S<S}1ZUIAhD)b9PNGlM@9KUu}w7&BY=8W!_A;y`O9PKEFA?
z++P0gPs?A$bNcU9FYmv1p5x!ldrOsHu*IGiPR@95d;4sOxOP=lfgkUxq@UbJ&Iaw4
zxKyR$>SEW^9mJ-hR^J)PGUt7CQ^1aWKk{Z4XcjZQ<vW+7a-Esy;p(h1Nh!;Okbu@*
zAH<fQK4B*3)wMPGQ_1Ce{kf-{cP#yI=K{l0{{FkRPwy*hhqd?_J^W#1(XqY#;j|y2
z|KyG5FX6LpQRKY!Y4u96;Dv$3CUbWli7fsl@y@{6Ur2?)hE-1J>ci+p--CYq|854J
z)%E@EG~w@|txtPG@1H4-6WUO*GQrC<Ofve<%=3@0WUA!%82Wt4yH;&~{@$#$Lh-s9
zmjWNg$grGJG|)eL>^#@=hil3U8QDHKEnId+Y;p1&iB+)+yEab}>Q2^3(h3phSlM^t
zQu3q3oxH1C7AZ@eRZTx|EPCga4o022nfU>62hPXN*r)f>bfr};?~mUb{#x%4Tc7ZG
z>Z#qI<R+)ce|yM!F2HPeZO7tykF@iXSNeuEMAaNS87tm#Ga(>!cJbmL-ENwp8=otP
zHe^g#dv(^f=-pFG_n79oPk67ZvTfror|67jTvs*TEpFA0|GI+j^5VDYy29&{XU<gl
zJ?-*jCjBclXSCTaELA$Inza4&Z)0B3Q+c}f3)F5cXy3;CHjO=2M53^5h4o7bz2tym
z4XNinEBw2ayI%W?9}7+TJZ<9l;Q#8=)DN1yi`jBxXX=VWhjo5rT~IaFudIqXJzr^C
zVR8IA1J{oem*}0hGD_`ET*8!MFDGz@=iS<zgFgGU9&em!Akgpiqq6<FvgyS|C!DT&
zo<I4^c>>?A9n-QDqpm-F6R*d0LDKEaS-#9ZuL?ow;Ig@?7xlBVzZI1xOj#f@k)8E2
z|6FTU;m;d719VvY8TqWz=dEEnAbIys$F6!N=fD-|%b#9epLqFQ!KrH!7bo6Udz2Cr
zQhvRsQ(#hNZ+PPF-RE?oSw!Z)+LbMKd4jm9#l88y`<lelp2@Z(YjNy|WGnh*b>>vM
z4*yS^kVSK*@iZ4b6_D5Bd-=eF!DO<AxVqtW#_TVry&?;Ds$30EfB5BK$meCgEY87Z
zyZSXg9xwSUSzY^ns?Ld*tm1P+lAh1hn$@zlML|52&9(2j%WG%79lmG8*QY&R_IFFb
z!93aBdEdN$iavX?)OY^Pm7==lil%E9={ft~E#2W=en2!Nwb+tv_Qz+J*sYfY^E{m<
zIB%7bkimq>#}B?yxnmjZzU`7)=u*dvCwISmrySD0GbG07WnlqN*8%qP_s$o8d0t`n
z`=`g&Q2Y5Z&u`98p0il_&c8#y@=NB(re8O#*>NfS$jvJTD)ap&>v4C?YB_wqYl(Jm
zMHHizQpc+D>wOD1pJ9&OvuVw$P5K>EPjk%pEqLC6MZ3v&MF8`xtl-=#lNp>!a}+LF
zE>`?}W))|>u%wa8f@>kU%fhceu+Xg1@-_YRrs&}>i(=UyW`CY0$gaQ0zbZxKLs)Sl
zSMbk!t8AvP$cmd=rWmSpNO#NDGHaou=OdHMo+no%Tu(C-k$thU;)8(QdLj8e7KiuF
zxhBaFHf_dKZY$;-i=Rr;Ni2CQ!y@Z)djwc(PR+P4zrA|vFJo@Lg&r1+%m4mKcprG5
z-+MvnnUHBk4Hs6{ac_%fopIWy@?MNAchGd(r!_wxE)D+pAV7Zc_r$Ipx1`efqa97f
z()oRTl@}d1mDurf`a`2f99`mjjO@*9m%dRJ_FMM*yiCWoWm%`x)@>2k&+zkocXiC3
zp16x^&F{U)&-kjjbXVQ(Uc+4+oRW`>B<dySIp5RWQ+Vjdml@@*ZQIpareB!g>y+jj
z|9a=Lb7sm+YKQ+GIMX-#Zr<A+%dci-WOb*<t8t2XhOLq^kvMy8W8K81rK*X4EUSJk
zb$(_od}P%Xu~5q*Cav;C#umHoN%ED&y%G{zeUmvLbg9`!wPzm!{3>)DmuSzK=*(2W
z*&o!uOtCmh{PH_*3ASr|AKtxndXm0x$K+{28c`v8tk-jMh(BBr`(xSvdc~X0T9-bo
zmZ`Y-Y;}&u`L<ID{=E&$pZaFJQ%q_*$rzAxYU(Mq^S{)#d7e1KcT26I(<1BEjjJ{-
zQeQ+4bRT0lckAMG#+%EW;yy;qaoTyhX3F&k(HuuhmvI#a-M!{?xTJJ##kU?M(eKNH
zkA;+YCH?&TGMRC6rM>b2Efdq?o=J%}4{Z6hZ9_n#_i~QfHIuTXq~B_W%bmV?*fD?A
zp;epuzdYD;@10=C(e)FzEt#TzEbZ4m^|XhkqTXrqHpJvA-Z2-MwCU$3-H0xupBADs
z=k0u%-18{jMOOSl&W2|%7VO^fYr&3F0=x1mo4f0Gg&sKPS?M76Xy3x*IFtVj&6DHa
zv)=fw@qM|!UsP}p*L0ubb2Nglvs4@{)xSRL$eZaOpNbvsKD)c({Ik76fAuyg|C<un
zC9eEXdu@1`?4C6`{tJDi-PR=RlQ?}*`1j`}C5|a`6lL0HZd$qJVEe}c!}Q?Hb9~DJ
zCp$1Si-l`=tLomA-=-owbGJ**<&yhFR&&oiRNmX?&end`Ztd5L=Zfzf`}pSh+Zwz1
zZ<jrv{8)s0_SL+@?bdge^1t8vTH@OChy&%;)8Ck^$`Cri!_BS6@@)3%4wG{!+d3A!
zs+#v^u5p~g?rD}USI#cW?AdAdSl3^3#*2(q?Kj_USp0JB%Y|zlLlYi$xiBlPG(F41
zedVOOk(b5-Y3_Y*ggY*99ZR|Iz4NJDTZPeDKDCLbz6*Fg?+#^I&vK>nT}<TJ(<e7f
z{T#UIl&;VH{Oz~b-kp_ZH@D?k!<UQhCb6q-EuZ`CQ)PeMtE-03eGkg+yuM;z*3C0(
ziYCVQPJ1prXKL-m#6=&zfA^VF|Cc>@qsbLd{)^nl3pPhSn!U0$NO9Upot0BL(sC=*
z1*>---7d|}u;<X~gYi!keoJ&ES}E9?-<7nzpd`c=GHasB)vljYdf6ZE@OQBPQn_bS
zd7)*oMtq!>UY$<<f@O2B&b?}OFEjey+c|ek6$Jhy7XDA=JA8e?>PO4{rtdB~B_DXl
zIj+0<%r>L1yC)rCKXz=fpAPdj*Y(%eU-0KE;9YiYmaO~kySECnx3AroFE}gg{3NY4
zY0eHJYBRS_IBL7<tiWom|IfeLmc`$VUHJXd%jK6H>>fOtxKw*{>hvkMFR~wh`A_C~
zvCa9yzPfLMdNX3(o=fmlsdEUMuw-6m-}HLj>vv)YUgvB`s*;{^-*u{2tkg!{{;V{k
z%TE_&{Y&m&p#Skxbl}!0ivxG2Cds5XiLTS0a@j;}Gw%WY>{79PJC&`{FO+pnv6<Xg
z^JQXKsn;~uWo_opQ(2=XBwv}VG_Q1eSmy+r1C#zH7TSayKFX=Kpy-E_uTPSJ^VJ#A
zDNOUuTx2UbpQamjOlDD{_M1&oerz&fDP3`C`7`YPIyQUr7VR=h+x@~>W7bpkiA!Wx
zZq_i?UwJv%eZfM}6S+Lfs}!ad&Et8L@Iztxuf;aUo>f}B&R@Hz%JZ+p$3v5q%Nt)d
zn7^8ER@`abPG&iY3rADg!;`IJJk!KgA1<AAXY=YA=O$;CSWYUn7xK{;e0rkb{FkSE
zqVEDe&fU)xuygOmO(r*)R((rKatpJZ&iu@0=G>e)9POMZvU==(l{{Ya%dXjT@1Ecq
zx8Sui)J`0FAF8GCfmJIy;O$ke=UGpVi$A@wJ6S!t#BYt>^T5~lIl|fG%eF;LouRRy
zIQ7q}OQ$Dq;E(s+<W#lGY30^~voFQW+T82&bdlF>4L4TN<3^i&Uuk>GOt_ueGwG0!
zW|HmQ)}(ncW#vCUBq~>iC9mK$EqR@_O3k2sUVyD{$pP70mP!)YJU=d6+&W+Ep?zw_
zW*g?mHJW<acZ~vBdv;~4nA|i&Hg)O`3EwrD?=Q{%()YQ<*3hko&w{P#i^f0J%e#(T
zPj#51*_ZbH_Q$uk3$4%i%u?h_V-{f({HWyWU*w$AmMR>q+N@HytE7BUz@#kQTni>|
zrj(4IcMSSt8a?{cS4hO1GqGdRx0_<h9eZW=dTpC{r`0EILzuVO=WYM}x9raQ<7;QO
zCPaH}f9UM)=NvAxv)H6qUgt~dsbk@BSEHSOZJHZ(n{_(>sc%X@gM9t+claKu`1z}X
z|JmPeRXgDeosDL$N<VUD%-%I?cIBeAR>|Ubuh__FNh(~pqZ%KzQAgwQnhxo}wqmi<
zEBik^l&wwcQF+)~a%qy*6O~X)=Y%87j(sdPOQT&<LQG?V&$wRcn_{9_d^JV#Y^f<T
z+qOw;M?|%bnJm41gl&I(W3KqK{V$(?X4`#tPV~k|yAZ9TU;l7bY;akfx?a=rp!el@
zQ$Bf~3Tnz+C}-2K!pVnm<APh;%VgfxS@%U}`}QwhnP{1ymTpnj=I~)_S?=0dcE>o1
zPtTKU`k}19XnpJ7ZyE=?BjxV??)Vm=ve1_Q=j`u>yS{g1)cvUZ_W4c!=Z`=C{Q3Cj
zPeomwoqYf2pD&)zY}y&H&-5V2K2ME(3(q|aIA>y?aE_yE%K6%TyC!{pRXM+8k<6c~
zbsLlN)?d0ND?fMTc8&ELB;O0N##bn2bb5R^8f^2rF6N`%ukh^}k<VUsA1LBAJsVr$
zD827juDZdiZ|*NjI2oP043CHk&GWUq;G<$1dXZ(TpZDBpv%b#tRpPT-^~d(5s~*#>
z++!2p8LYp@Ipfc|xzBb4-#<3Pt>s5nX`9u*g$nGIYyut!HSSw+-Ps=I-{RKl$@*=Y
z#>u9QJ9?@%n;h|wFm;!=)ABqvQ<zb2pI=Aj^C|2aiQfve$`*&Q*4&IdJ0Ugh!aW}G
z2iIiTQhv|+wn14tp37%`AFp)Qp}##JT94*cSnTb8H%Bw<os6E)tXm>cQ7KjHH+wLI
zdD$%c`y=ngoTD4Xe=m*Vjejyn=i~1W_iPz{8=w51`1ekAVEeN(il!`fmD?OTGg?<K
zxX)?XdVl%ZM=biU!=|}s?yI?bddby}ddW}m7C*cX81ruLK2e@{Ey;Vk*dDnm?%5uF
z`&PIdxGWbq!B{^uG<&am>_MZ6Jee*D%DziGi>}QL{=QLP>r}|pC+eb3<`1_BT{38x
zxaNIof<x=XHwg~YlP2X<20dz861R|7R`#z;y;l3r3)713zN`vSYrWH7<8a1|OK#bh
z{_r)zC-3oG_n*G_@tYTI_cD3kPPms_BB?3=@WlM-XZ`H1$1Ti#B)I$E+hzCPz5TuY
zah>(7%n!bowe;E=W_@0>=s}yAktN^Mugu)h3{oyyv-8dBvZm>7W?p4g)%jiJ#j4jI
z{T0v3EYv=-`D&uY;?NlmB`p*8zumq2Ntz1Bf{iC`WL4eiyZyIp|NXr0xBp(7mAQF=
zQq+#yl|q8rJfQ+y^UQK~xc*-ID*4Cxmz%GDR`kzwGfAG&^eCz9tZ&e_w#+SS&L}od
z_|JRe?`oe4o^Y@99Wp_&cQZEc2wS9OYAkK%RL$%9MC^I<`KSDr^7dy9b}UWnNKXF$
zkF#|}zhOv7=>lf6M#b{2jHa6%SQn)%**z;XDR_<xOR<(%<_e}0C#-ix9eyHb@U-8$
z)KBGJNDSw`?W|9#YyN8QIlA^jUH#>{M-w~jH$1KEOxW$`_etT_423n!7OIA+3nKy_
z&eU2Zu!AW;-fN?%&e{Se-`aq)E=+C5ujI&VFT2zCIOBNY+Q_WVGqX$?*LZY%ysUFV
zL2coRX+=qYvn;*#z3%u?=T;<J&Y*nZh{3kyB~|5C9{w|@_IJ#AnO;Bdug&kDFVf$i
z|NQY|O-)T*P0ha_Z;$uO%iGu0)YRF@^>6;ZA=XbjLB+#gytI&=<2*;R<X63SPb(jY
zhCC`>qkeEL%l3sr>C5C_@n2#{IJMxk&_*X_FH!e16J|~KmK2eFBDqZTnt?6T6RF_x
zt;M@6runnpmw5Pf;pd*>UuhZ<+8Zmh)A_qu=Srq=%}~u<IY~oxX_SCq>8^9Pxy~5}
zrGGY#=ik0u_hK%u#6c!SX_2j|r#fd|YqVwOnx3-ywDYSwYH|^O8&)k@c;~9#gwwm*
zJtEW2fAhPta>=Au2jAV2tG;x$FKgLkK6j2;T@?*tA7`4`o($L*;m^3_$kH>J2c{}o
zzR<{Z`l-0fp(f!Mt6|%_l@lXpoBa^|%e&I5Zj*D}LXCHw>r?BDWHXyyn{prO;qjG;
zS+t>_YgfYWd&fC8pEfz;na3#WcRFFVLDq?9jm9QYa=GUBPTv;(I`!;z%h$gjJa6&$
zd;i?ygWak>N>AFiZGN>bh{tQ;p395ff`ezv2UQmD+9JNG{QQHw3ESA?`1a4}+Iw7P
z{p1@nZ+!N0sZIW9y^^KX@YC1Zy6w6RBJ!z@PxxZ3zohhJ?R>)>V{js`NSS}Lb4tUs
zw!0IJ_lcH=>Ku0eai87v`ef&M`e~MXROOAowOKq}X;ia=J7)HTb!G|6tMb<d@A}>O
z@rXs|<pQVUANbxcdY8Lo$`tR8H{tazDG~;0OC}^7o$PllCUl>6c81Bt;3((!x7dy!
zcp<^Oq)$)g68qlbGZ#a9*F4+e9m>D?@U89Dzw7Mx-Ot~CO-YeaAp7XvzR0RI`=<tZ
za<m&<lDX_TKTr2f5m(Xv2^&I{QrJISyy~Q^V%MBw!z$>RrRgtO)mrKjY*fR(cF!Wq
z+BSwalEqAdCO6kO&f4meW6Ni{wcw&>yupQ+EK>r{JUjN%u1(fmJ6^jj>WTM~!_l%&
z-8gn0_@;e1_MZFNea9BQ7yLNm{gnS_FV!7>kyLh+LrLR?i{~t<si%x4YgbB7Wok*%
zzZxc`mTg?}VbUb_$TzQ4jpkaON#1ejPzd`Yt>m=7yLPpSowMC@`EFub*ut3$KTqko
z#{Dw)!4avGIuY}jW-T`|yTu%KbpPfeR{d$FYxW=W{rt;%y7+tT>PP#dAL)k(zJFC0
z-pa4k@YsCM%HDnJIi_gU7P%~E-J7hM;2gH4bERo^)~#80^W1klZSn8ZIwbCX*2!h{
z>J`_G%;)S_%jTWFUUGwugkt@vlT*am-93|nCS<zAs4U8{`pYx9YJKMF-QPF&|9kVi
z((c^z$3JUq&K>{xqo$_L#@=3TzI=aw|M~O%=Ra54%|CCP`yqzSSll_S&Q)dN@wk(M
zg6$KPC++Mksj}d;x^q%-<@eI8Km%j8!lTa(SnFO}te$!C)E&=TDn?y}g=@@C8Ly8E
zm|A%xvuCHJ%_H8I^Q~icPgF7vk6b4IYaM^&zcZoNTaLe;Q8zVdYSrvduVx*}cDj}H
zv!I_<PHeMc@f6{gGxGv(ED!HGb3VhDvCr`;(}Yd?Z_Fr;nao)?v-sTW%|G8hGv{X3
zzklU}@x9Xdc~2tW)CMS>>|a+>dcCr7Q$x-@4=vpXw`Z8Md8`xgmaLuNdn?Y+IZZ9Q
zB>HahuX);Le3Q2vSRb2vL@d&ow`XRn39oso&#oil;oFv8Tp@Y7TKAjxwGV%I!fN%W
z`#j{9PQUiOIn~K|S9QQ0_rf<xFZ?G6s?A$jt>Ku_l($4l!sf=y1J_?Z@N4XRc+kXT
zX3DREkVwflsZ(agEL=UglIzHGxtKDqr|y=#&!*Sp*gks{9+BrYSzl}7O6G&EjQYE;
zpX&bm`j4*J@h9c)EbSk&#;>Z0v$b8aD8g}RiPg>UcWD|^Zt~afn*Z*6+Wix=Z}#p9
zl)iFB*T9<pbCR!H9-FXUv5o)vb>7xO&jchdAFz?ITI$^w%6`-}_Snl$3U;jX7tPP`
zKjHjK{Rv~`3>!`VgB=f8B#tlGI{o0k-;KuGN(^oa9T#&j6n-9Yt-(v9w!_pi=58vF
zsM&d0<}U|6xv#LSvcLP=<9Mj}rj>GM-yQ$>x9oq}?)$ga&TO5X>vl#tq(G>3o$)88
zOI8tS1#>0~lrNvw;l=2FdR1kcNuYOby!E1PnaIWG&#^N<dYV7`>(M3i^1pseW8UjO
zr>H+X&-t}k_Ux+Yw6lkLZXY_3G3(UoSu1w(eAQetGc97XtUO=uln3*7oZs(T;@>#`
zky7!$pcQYQ?3~W{tM0&Q?U%3O_1=Yl(M{Rdaq6w#rCofRn0|?En=&uWXJ+-3cB#3J
z+n#FI6<CFx=daY9tA4mJF7L+&7mnQ>yN)-eq)j`<XZXFGA?;L8jY?6@`i5OII@;5N
zCEmI0t1S$EKCh+wu=Azjl%sd2)CO3bdn;D=Y4_Hj(a!Fbr7l0?e>R?c>s9`6R-lom
z=H$7XswU~~Q5UJ|ENtyNv@tbidSjd!=gKlwN!Mys)h&-VJm$#uG2UDj5%l&jgYh2K
z>~%aok4n0#4xgC6{!CoXJs11gr3I5~cCMMDtTscBkLO{5<RQPZ?~|1eY1^_rU)*2!
z?eoVs>Gt!_AAkP%<IkEJJ5Wxwx3jmO*MI)`=bQ59)9b(4{d&>avFx;;>$+4<#c2lv
zXF9BUIRCK`@1eU3r;03hjycL4{x#^m_RA7Y!@OTwI^Cam6>sOfxa=!<Ix_pS$o#kL
zX*Jn<D%M?^zq^|8Ue|{m?{heR9v7&eUwCz9!`iCDN&ZPc_f|ceA>w}R`yv6uUX4)p
z&{n0xu`XAhDfL`d{uJ$1x9Wsu$D4HqFYhh(Gd$CGvNo%sdtKR5lQ%JwLh^ms8&lmk
ztFLy9<>1_YBJ@%Ib7!YcM}7%AH~GhwY&8ixejJnPZu%f*!zVmz!nzGitz0)ZHW*J{
zciCX`4MnAg6YVeDY;SoQ+oB-Yk|Fisdfm&He>!~slm~{t+q__PqV7)P{~xME^)K_p
zdHR>K6dIkFru^zkoGN$w9ES>vg(-d_;<xRLuN{?*>sna)Y)6BP(OUy$%cT)7{|IF&
z7te^;G)dy1{`#X6bAGK{_O|xRz11iBcifuk@G7Q~&!=+6H3P5LJRv(R0%J-!r2D^c
z<(&C?`s{_w$J0;wPh6Bb>33TD({6pwyBc@PyMNxAcJj7L2S@jXc-3y}Wo3as)+}1}
z{ojk{>HBN9eS2$Bm-#lW%y{=|*;Qw^ES(m|drN6r`l-tsch)G}*~%cezRorLU!h!A
zN!jxgDy%Q3EI)nt>Es_Oa*OycvVT<k(O}!k|AKQ~7V|8>3GWuQt$uh@CilbN&b^_w
zH+YSnoAP{3n4sWPw|J3S>t?rw%ao1@oZx3!yyUy#t}l)A8YQ?UE~~Qd%isQc`|Y=N
zcldVCjC%KO0!OdE^*#?bub@3uv(kGbm~6K^2>!e>+QfIqsk2)B(dT5Vt2{k^hHxHN
z*z&W$?X3LH1f9iKKlM-a4qIR$9`r=|ZSlJO$DT?~R^6w&jWa2Q-C^t5&Fr^aU*x^h
zJsNoK#niTPX$h{*51YP!wY_Kc>dN7>y(`<EwpP5YYCX;OS9^>7_r!nt7eD+I{$tS}
zQ?*p2Pv_ljy$Y^KU7-n=Yuzsuobd>{eyVKc{%Jwpx)Ix5p3S)6KS?+AfuXF&ZwZx@
zM@ug2@Ueuly6RrKIyLdL*2>+bE2ae=n$zI_?cSXE85W$%Z|^vX@9<8#tZ3CM^8JA8
zKHb|VZTHLlQ2ssr|JCT}S7$v=+7NvB{+c)JM_tACik@B<zu!#LbDm?<FQ19?cR8~x
z%h(<ezx~>oS#H~wnckTAwPr@Vme1DAZ&5FQ1#U49ImN&4)Q0Whk4pJN3Q`-F&s>~S
zbGK^$!xrb|JP%*qyX#-HWB$9jwLflFzu&v&g53P`pFh^r*xA|3&7a?Y{`}{Ug?4uH
z`_F&=`DSLKP|zx0x5-MIXQ^gYT{~j7bYqRvT_3kAacZe<|E6}7$m=~j%E!^h|8QO}
z_vtnL!Btz$CUkcscZvTLemh5}_1L20NgKC54Z1(gL^!;HQE7dJn~><Y3t0z4gg(S(
zZj5=HUMSeUMz6g!U{4xzyz1EnEBu8lMD<=eP1Y%GE3ccCGj00kJr<3tTkbadHJ@7B
z+auwWBrbQwAa%3hk!S3)+RUb&eYAedGN&im9AOsi3PtwkE~;8=6*PV+mz)t6GPy!J
z=kVIhX%?4_EZJ2yEXi`OwwzpiL_w|MltaY6_6JKgNd(2m?_Qu*m>;ftn$2+ft_3~|
z3k!43-PQ=6TlugwUshbZ_l1$jL%X{R!+q9HpJ2NE&x&btpBBw`3r{Xk`N?~7=W;f_
zsGxIi<ec~nibY<$yOH{G_eGzdvPs_;KVy1hY@Rvm?e}}_?fHK4o=4|PMi>dtH?aJ!
zG<o;*s=R&IZoR)+R9(REEjdHANw9{w{e!wYpZ-O$;*wKK-$zdrxO!=I-0Tir_9UT$
z=6!lzZIjQ>=GVXG$}#c$<DWPC`XB#TD|<h0d;G1BXWuL<Gu}P>?xCEsS3+jbVqBKd
zvb1dVDQ?a4nx?+7zrBUL!UTo(DyB?mzSnNwUcbwR-JVUpS-yGx6#i4}PnXzes;72X
zy@(RizA^V=y7lJl(tUE@syI3JM<}?&Olae_TX^w?_sf=4yM;$(8_Fg5%4M7PX1|xP
zeZsw{X<3>5o7;P9cW<rE*8Mi2#ifaL<x6ELOU8r$rWe%oeSfYZd#^UDqjeSAR6`}s
z>rSB)OfI%mHh*%ist7+9x#&m7x--s>D!W`iJuS0Yw^a1>x(3mMU%O^FNxfv+oxyi`
z4S(0nqSRSDTK?*m!rAw1Zkv8j_*ePmg;kZR>@^;#)=M#*FXCUlKfAQ@bM5W7AI*0f
zY!5EHWVuoLK+dup@mT^Nj?I!6kGrQjr|9>wDLU_C*@P6OoGz(&ynnTBmQCqewJ6~m
zZmhbBobn7xDjLhw&aN@dzAP#`ai;XlHydM)*fyNEc(0kI$ttTS)Tn-;;F6)Yxbs;a
zH-VICQ)e-4NoAS&uzS<(NdYTnJ~KM|#VPZYxa!3CsTb_Mg%A7RDfz%E@>f=5{&dF+
zQ~&#L-BVLY3{AT;?WS12qETn@<(mb9oEDPSxA$-;ZAeqP^~NHvJ?hcdhsN1r`}0@L
zvVXpLYl*n2@dg1`hcAD>aIR-K@uVPEVpIO{m=7o2opx>Pym0c|g2%sRzEpWE^Ec)D
zzn)G1YTN|n8~z@D{`@$o8n&5t-o4(={r$c33JeB&Uz?vPl+C*;c<e0CS+|bOzB-3y
zc9l=|x~dU8Ylicj#y>%qBaFHN^q1AWD4bZV>&N_Gd{uAGlar@cJ+_{|usiagc-XtX
zXZd?F>lQ}o_&e@<8U6PZ*Msom`*ipFXt_-eEPLBLQ{<SXZSz|pcaA5=jo#RNd-tQ?
z_mz`_=FVk~tSkFg%QA;oOcLWyKBy<UiRn{pV5_9S!SWu~um#^uleJ&yomyDS)5AD_
z)6RJ(A3dJk|L*a(v-x>t5z?0a=|(A=+L??$ZFFqTJm>jx8l!`uAE>nXu*IZ^efjd*
zLod4`9rol!%u3uV*f*oW_hR~vXqPoVZJzH)n_IE_^pi``Zha+fEUvZoU7O>M)*f@d
zs&BKURk_w+qU4sA^)DA^RsIsa)u_V1yw@sRQ%C&pjnoK>M02)^`K-0B=iO#a{XKI-
z?S}7N!h1E_wx69Dy~5LC@_B(tmLg79nQqiP)t@Y|FSv5;zAE!`m8r71ac;Yp-o5&)
zx0>Uv>P9Ikfj?^Nj<Ry^wqd&AJI~y0&2c{QLdEA6=P%#dy!HI!9~F1(`X1-KO)Hym
z_X^)t=Zj~evdu%AO<Z=bR`dL0vSjxJt%knNr|+ihnp$9RNZ{s4Kl|D_$u`F<e*due
zoi0EBu>4{9yYlarZ{A(5uT>#=CZObGzoxk0A%C~|zf;m)KR)1*y!1I!$EmE_*6K$S
zCfVP)dvikHzju2#^MB+k7gc9DTQH+$_s8G9@7lMhwV5pQ*5=OQvAh#}U0UkP!;*&r
z*6!2g?ytO)=FH_PC_Z)3ouFqzb9e7rb&YL>mYaOb%54Yo78jMA$t|q^pts|t-`7WF
z`sF37Q$GZW)H?(eGu7s3O-oQt+TYYa%j|^X{XMHxT|VrcpWIWoYIm~olDM>_Pm`n1
z&ONCq{NYEX@rKp;2Xl=YvP<61znb@N;&i@}x5{#5T&<f@Dp)*rt-N}sJLICk;wsh^
z9jiN5ie*Qy-8%d3EY^jO%eFk;a5!Pv*)6V?84LS68r-~E^Um`a>n`Tx`;oFb?dbCy
z-?k%fgAIF=mM=|nXs!?~nLA1AV!A+f-eS$&oxu*GE%|=SCVvdNz)&9dFYfq*^hf@G
zrsRDNRy+5)CcR6Om7z^|dTpC@#ZL7TKbC}uF}tUW`CW88tjm6AF0Xuw`wG*|Wis2R
z&HMJT(@a<Y^y7PzW->b$6|Iq&CuS-2apu<>kNCH2J|TJk%Bvqom?rM;m@!lCsD;WK
zldAX2`|W-i`}F+&EV<#Y@t#7vQ*zHAfBaEnBM(}`f4*+s!y|&B+nyhqK3}kVW2dLF
zLc-ZU-BmM{6%(y8FR@06?wK;zujUdbFZ1&i%lB+sukq1T`eNt4{L5Y+Vs-Z&?fcYw
zcty@k2_trP`<Vd~?S(D!ZC0<k8sFLRNzW{MmqDBy>-m}Mo(HJOMO96B>a<zB%>P#V
zj5nG3za1L&kM{X>&esbJ$x*3RDZ85Xal&iyv=>WrAAYsGuRrU(TVi!SGo!_n<S&LP
z$Flsyw)6KHe|S*0CEB`AVpiskQ|CN>GaN{&N}Tv<sgw4{wO@J@-wH8>_<y$!I}jJ;
z@O<Ncp)eUQ>mPB;&aK+uRkm|gx>>v8uf9I@WqW6@Tw-P5cVFQ~x`y|rcZ+@brDw%i
zZn%41@Zr3b>ra@fo2vHlsq2Rd^lUIb@=8-xza{i}n%m)(mFXJ`n;zMmI;fuYv*8cd
z&hI@+O?#H;sE8(JAKIxIW4_1IXWmk;KkLgRCQoIKnO8U=+<V_^165@^(bKoz1<i=F
zd0%f-x~_-gfs$*ao<jbLRhqK1A}`IfbLLaZ+vt4!MY{d@&2#y)B`4l^6ObKve|y=M
zvfx~Y({Dsf6?QIIG;`S^D|U0s*N>HMi*fcizLZire7v~Fc-5bK@^|Ool~1;5v)^s+
z{@&+6hq(8~?s@<3-fYnKF`fA`eELPE$)8moW|UU`vJh^1Ex#<+El54-j(J=3=VwOe
z9yBv4`994k%D&fkv9D03F2dyh#IV&RysTBWMNX@Nk9i!L-NAp|@vYNLjqsh)J9tY=
z%>@m&n#ROPG_DGYIy^(`@`*`P?;Uo~Ra~1fce&wnaVO3l9_j1nq;t&rtr5kq?4;xI
zBW+c}HRWYp9!7l~2R)>^1=gRDQqO<4`1`frnZCyb?)O^X|6IJIbHa|Ut*2{(>ym^%
z{o0Z-!@BL#m;LYietxUA&^K(=b9QU<2|T=S8C%gRDU-@l)tK}P<*MOo>+WAt{A+RR
zZHLhPzQuQWvdT=dxw5kuKa0DmE;zD;{q{$Bjd>Lcm3C|mf;YQ`L%R5uY^zjsy()g6
zGvVpQD^;BpRabY;465AeohTjp`DtgyzU!BdxUC8=-?)9k|M{5*942nRXFYS(kIyPC
zQdPS-X5ZA%xpd`@W0djNFMC=JDk*AwEDh~qT<B})D7<B{?5Z89t@i^OL$!4kSJkAn
ziFVb9R-Y1!Y4uik9VgHj-@>a?RohyY>S@z;!06DOYnFG;pHY`_Y_fHiU;O>RoB93E
zKYy&Lvuker{PXe0pO1h3{8>{s@BDM)J^IH#e5jd`w8rS-r}FX#PL1a^?s?|88+a_b
z`oxAcB*rw=`yu}^)5apDqEinOB?OK<O2}(3j$9NlwcA*E<A%wpKQ})-xAoDCwFx(`
zzF#Mk|6sqWsn7Z2kqJ8DKW5(k#CpxWGbuJLZ$genUBuRj$#$o`ADK>OR68U3FrX!k
zt@`PKHOq_6H{F{MTXT2K!(AV_F0IyhbUFR>gR6f|tdqFuaOx?`u3pCgKBrf+J2by7
zWBewk+R~%=?h^BKxl8```@h~hfBSFc?)$fQ$R0oYCcyBr*~y+0BJA^xQ&xF+cXnUO
zk*w@suybLtV{1+2ov+?4#>O5~E2#N<>)hS{)s{Z>Uh)1~aMn=^75ns8|AV}Bgzm{l
z>*XAi`^0p+U*~l@|NG{5Mr}OeMvp3Vi(k4fWYaQhig@F2bt=n)8KRTcpW3JMa^ef4
z<=bDoiEBmmomaSZ@wmj9Ikz1B<<E(}-F5MPiT+D7C*LRCfmcqR*~nCoyG-==WnZU#
zyJGUDeV^`WINklYtM&fKm-+5)^S0fwJr+N;Ix|x8U)zpLL6wH<kGTGpJ!BKRQf8sq
zB$Eq|CF<tgy(QT(f7MLWmel60*ZLOKN+oPq^Imk7*cI8e8YPF{Jo(P^e$Hv0P0CAe
zFRxzy{o(R{xzG3N?2cQkp8loVr1IZ|lRIx*5ZcH5?VIMs6#XExRO9d!x*hscxOMy=
z+2}29_PKFuMe~b!Ec#s*yqBcb?%bMqJY##}EnSlhA5$!j>RK#TIVQj->DRkMTi!3X
ztJtIM-cJs7VRo4pnOg;;Ix-s<vz|G&l<%s5;xYjjX#u8xdJktkS=`NY@q+8exdAQG
z`x6qUe2qJ7zx;}|;8F2_#jVHVMZYH=f7|zv_3&B79Im~4zn5+Kw&eq7pyWx;y>DKh
zkXXOHEkyWOV&vNCbN+k3I{v@dzHYs_Mz64e?CYYueTG}KqpgFQVzZC9h<3;6=@)Og
zd;IN=ySLt6%WJ=z_cr5JUzvo?N-KlQH5R5KyLs52o{C->zBF{Jam>al@y!)yZ5Pxx
zyh}=3^`^FkZPJ@7T^V0@K0o7r`QanM2TL0T(rj3KR_|H8scX05v#lX>mHqXO`~2H-
z>{q1FHSfYtWsw5e7k<}?HM~}1?A@l_e6q!HQ&&TS8c*_}7b}<R+G<EW-1M+mUg=iC
zvlUU<?la?Nt(+;h?NpVnz|n7OJR0jVi`J=}<Q8qWOg=GT@dRO>kBUO4CLEkG->t5q
zs;yUM%iQ$(Z~xvjp0#IM^}hm?PZe(3D|OYR@bvok#vfc)&2*(|Mm)dW(VW9wot-n4
zr}RykR$DV|Wv1u8CCr|s93Rdre+zl>h^t~ITSY~7-n-?K*-z;=+5Aju4t~G7?C#bH
zdQ1zqTZ{iZ_Pjt@+x2W-zU`7{>yvjVRu#JGGumD9^IO9dUZb{Uxt?O>h1WB?8$x6D
z-EtBwHU9W0Cw0+*8;0H;X>%iLIV@~EtDmZDl#WWgc}zk6u+-Nlf#wr`r!JqIvQUR5
zO`?H+*Y|>L@0_dB?f=$%tNZr3=HB_|Z};5K`(AdZzbv~f*tD6^*mcRDG`Gr77WP<0
z-ID84_eu|L{><{PR(aj!<o)xKR<3p`yKC`g;a+KvR@uDMm&Kw)j=LWCo|z&ao>O(>
zdDi)?eaF*}Ejg@my>^PHU%7~``;xYqoSHon7qiY<3d}hArTJ_-<7H*N=Czi`JUlb_
zULRIn?hzIFvT#YRe`d}0{t~5Qp^E0*r&%l7E7CRJ9y-_D{nGW9YJZvEs_*+=_cICY
zH~8>zcRK6SQ)_3L|7SgLo}v3C=Tk1<+lQS#mhauSub{z9dFfR<<4G%AVnQ#rKMUNq
z=K2{f`%N0b>3K<w-dO>wSFqjv@qf>$iOtHED)Sio59UiL#$KOWe==`(UVr#M-D^kO
zYw}K2gzAUdd<_k6>}uMzgZmj1SGeI_rl(xYsVVE3LM~sl6*FGJW+H5Si0zGt!ix6|
z+EOh_yncd9568aLNUtp`=h;!Qc$TyK67D_$*`0^mXZ6Z7TPRIeW@bAb8WTD<Fi{|b
z+3FXkp+|7xse^K1#g;m$Y=<WOIaKCXcFFYM-WZu%Y2PD~!&N>=h*{}&pW34>P|PQ|
zG%}aPP3mr*d$rtJ-|O~QFV!chZ*Y2&9=1u`?3~hxcWGHQ;qPK+mH18eIq{!Q;{T?a
z*FvrHiy!22&VH`rdFXT8<4G%Ba`*i13_Y_pYV+c)k2l<YyW{Tdw;QS>H09ndTa<Qf
z&B7Wl!H4dOO5snR{LB4mSGlNjxze;VI!u24k9U}@JJoaWQAc>kgujb;BrE4yR`05q
zVG)uXC-`o@cm4S@OIOyfc&Hxrq`flW-1;@so92C*{7cTeVu!GH{j2?*_xDdf*`@8w
z8}IhAdr`3YC5P5+lWuEeI+bb)t)7v^JzFOGtj-1@ZsoZvZUz6DGBI@8<&P|Z2Bp8=
z@0zv!{ewxZGkIhroy0kR=A3_-CGh(A%pIpsERcM^*#7wEpEmaAk01a1^Jh)Xx8uhj
zKR#}}W%KjJ^Ot|Wcl_q$su!m}D&JZ4)aYwnZSJ2}%i4Esn*Aw!eHBkvu+#kmCbo0b
z?Q6P&i<b7yN}GLQ;=0gtmmW`BJj1`f<HWVTAGSHi?bnC#Uw^QHGyc>igT%{|>s-0&
z{Xg#g=p}x(V)_4b6?4`mcU<yuIA5Xt>dLMSuYRe#=s2OI7I<}1S%0_u2@|%Xtr~wO
zRrG0lExXLQN959E&qqv4DktTeo=W+~>L2(bWM)rpQnvVG?Q@(5d6o!gSefsbZ+F38
ze);_K7tjBzu-R`rfBWz4zyIDYtiRWnfA(D3*)6GCuGeU~Xl#FYk*{j&tA*|kx4J!#
zU09yjyIzG&wBqIDb-zRM_E?!;S9M(bNz|Hs>ADX)3W8NcS5=2iO@BF0Y5I~!E%mul
z@kaG!o+m#Fl$Z4%(Q1`g@KEOc8ln5lEVCF)WsEPZiSzgt?I&rx{?uLB;J(1ju$ZmQ
zDwgSgyA4-ecrGUU_g&?~rE~V3+pZ+So_cNO5zVK0Isd2LW($+;oL?aG^YUUQ{jzBl
zl7HfMe9e!no%(w7t2;A}>)fpDxjpBqpT5TDQ}(Gd-!I?6BCzHwub`o}@sVkP(_bFW
zm3lIv;LVJP#z$GFdReQUpKFc`{}oi&?H_&OXz$E<Ec&-LWV;=^e)DQo#kRBJ>!-{W
zKc^H|wJyppFu0)U9H&`ZnfJ~a3iCcJZEd->;;d`+yG*HriOdBYljR(h9nIehHM<mF
zH*~pcX!TXbVCpiXIZrqyWhVG8P~IS6`l9r<|NNT5Ll2dl9e8H)+_pR`VJxh+>Gbv+
zN7n0<?)lQS^5NWLN~JR=zm{eB@F@DX#7kEvo`qY&c!S@#U;1rvO7Meb`qC4QS+&!I
zo^jt#3tzCrL3wt?QMsKTpINHtr>wV<IPibj|G(j-f5lx)%^u#+QI>6y^VFSrRN%26
z?>Soswk`S$vm!T_Z7JLQxD2!yq5t;TyoW0*ve{WYwXTNctzq5i<0|-E##DOq{8C@N
zDTZk~WIwsSl2GP}Uu|_tyZmnX>2JFG{uHzx<qcfN@>{`OA+W~(_MszdLY{c*++8YG
zAAO<zVBo)y9arCVY--XhP(7PJq0)5!4PH;?%KV4RdoBDYsar;VDh%e^yx{+<w;3L5
zmPG#W?{rGY{3z%=BlKGLm3gPPi#O)q^ay_2(4cx#v-wi%Vj&0Xy_1`HK6e%OY;JeF
zYaFA0{`q2iyLpW_?d{J!Pp_~0cK-bHpBCqzzsQ{R_RjLAIZM*wUOxY^IQ`kDoxWDB
zlJjC6H99=p6|QcM6Z95oX~>Lv+~lY5M@GA$t0O~;<-z$)_A3>HM7;AJe<<9({fR%T
zb<NC>J=;4TowxFdal10da!JL*?>WyxvoAhBJok6GM!Cb(yPv9)uN>XYEA-`wqPvUR
z5woeYf3I=+_;jz|`?NKk)_TkjE9zocLq7UsFivx+jC)$LD`KT-V&^R78GpE^W*Dfp
z_tdd1((-)w?tER*9Gm%%Yrfsy{`~RBJ@wUh_S?Suo`3vp<(AtQ^VS|c6Kou$ZP6&X
zLbfhy=FAD4JG}%OIc+`{6<dfNR^StPdOO_iNszFz`yAsZC4F<Z{ZZIy#m%Xa{Owqb
z>*ng87gKBJU)=1|V(0j4;+8opmK?s<WOs?Bn5BE(D^)eVGxIiYD))5rtF-5NQ)YSe
z&m@nJl00k^_}BBSI(bsCNdJ@S=M3k6(sgr<6!(7J@$l91jnyYlOn)<L$?LNlN*^e>
zA6+W(<Cc%O=D)e+*Pex2cHR+xS-q0s{*~XZdnz;|)er8MWB#zW{m7&JP1c8BaL>1Y
zng80sZ%1m>zR(lbZ%*FAJ+&#~a$A1+o1NFb>;KsMMl<Jg-g~wO+WZM+%6m4=Fw1YZ
zKiGR9TJl|w?3J#&wL7m*Py9RSp5wM7om-}coUHk<!R$=&^hJIL7~T1qBV9UX@Hnft
zy=pKy(W__jl~H;XUul5*a<k)wFC^XaojD8_uUfrJD%AaKfXktE28s_Ff+D#X1DEni
zPc^!}c!k`-y<cX!KamN)yw=Ly+^2?F)WK`E+RvSpK3Df=ZDw&6dK`4r#P{S5p;cT3
zEK{m?e|?;>RO^b9wHkZ*Of6>p@-JF(H=nr~Pwal*)wbSj_Q4Cgr0)HF|7T-vQ#e!n
z9j+jm1}pYGht7%Jz7)!-(D3uNVCBK>zYFi&D$G89x9{-|*|oA_rhCn_tzs{3)!g+)
z{gcF#+v_;>KM1tS%-l62{%}*VoQRRDgHp2cHr0PiH*i%51wMcAap#GXxA<3XikA>l
zE>Jw8w}f%uhq4b#7t|jN_{{J(%4}_`vG!*3r5~3h=FbR?J7amiBV%VxVwP5jB<J??
zJS7ItR;q335;3@Xt#7f;JKfL9Yo_*vY0mzwskXQ7|JORr50@X+HSV!=QjNKxseUuI
z>R8A}pEGql@{;nFZtv6U?an=a{<G%G=g*)ega0h->uT)m&VAne{kg^bd*>&fyAsF4
zc|1+pS5N+&{r8x?{MYaPeKp}qT+DyYBSAYfP1m?9tesbK$EH#w$n&nu*XXIP7eAi5
z!6T~DEI#pcQdipah6QdnHuv_qu`vBq4m>Y?<I&WWXPJynbEN3Y20WV@@vrJe$`i>m
z4Nqm3-~3(CcG6W@UNw}BwVPf3%)~7ktW3YpY-yd<F6_PL!@b+9xUR=6<Koo+`o{9^
z+-08Ile9MM^!jslit%C%Wk<~i?N?huE!_g#x)_;v^Bq6)?(dh+mVM`+|GoYB-;Vq8
z=YMZM{`T7r+53GG_1mtQBs~+&{4^&hui(;6gW9mxi7Ss@n4bE2?#_KjXLiOhOgEJ0
z`Ze!Gd&rzx#;nPK3yifAl9xX5e0urkj7+(is--OFmiBFX^_!#p?526EkMyspnvht;
zf2?mE*Qy{_;XT(a5B5|BSZsFJsIs#>x8<N@<Kg}a+36C~_UsQX+gI?1;mzVx=WnlD
z|1V?q$!l|NttrmE{-Dr~dk&*vkek5d)xECrlb-r+@!qZXlp%h~dFzb-``GsW_&epG
zlzFIJS)%5&#cJiUO8530J+Uyx^WxTw8s){GjqXgi{)9z<FPM-2joqu?FYD^R^6Y!T
zv+bAM^|`ZmJK0Is9hEux_|u_%%WnRT{=ad)#j^LuTsD=4oh-ZU_~-kAOHXI{P2M;+
zea-G<Db6P|w69rP7&T`)SS@9Az9DMf>dfzGoNJ(w=p4Hx*gtFeymgAnC4vr7&e=)N
zveub}Ts@<?%4$LG#_-0DTLC8uR&}lHnRsqPk6!J&ZY8^-A3E#H<!7*-GkK(Ils37<
z_1PTWG^yZ*V`*Zs#<6S>hmB@+StkfOIbWJ>A3mf1<(1AU1@&uQp1wZiQ*4kw)6?ip
zMJxx)v+l}<IKNeyFl*_)*_D6Gxodo6Z$6o|?1|1FZH;&96xFVrJjVU}RgDwRt*G5w
zAK&ZyU1+WI`ggQ--todMiz9;_1D=PN^30n&Cw5&~xA}uUgYwz;5`_h3`s>&BO!74-
zy{9*?rl#{v+ri8$R)r2>F6$JVPC4?r&J#L(_@(ipxYIUuH+}iuIjx&C;nP)(XZ;Dk
zPp2KZ?z>`&stRk|w}q<@%&L4E-4wy7xY?llLEOP3&s-C;l)~1Vec8%3<ARIMmWzTC
zcWw#JipqZIwnjAV!lEhh>!MHA-g%&K%9Q!XCeOV5Ypybvl$Y8Rr&sQ>k(3NwS^w%?
zV$b8hJjZAMtFbwM{O8Y)75^&i?9M+w{`2SKKYuLb<<B2~zIpyT=cd@m<16>I*!flb
z$W}VwAF}EbyTjSi{r<JOYWEd)o{au1E~##4aOv#VI;Mx3CwuExhn7`r-2HlkPSr&-
z6&)+jJnPF7)^EGc<$7S+>wo)>{(O~JBKbIfGt*2-)*CCf|LjT$eA4B+SW?OSXnnY;
zSFHNi3-bLRPWZ~}CLA@kbn!m6+_U^_&4nqVN8`U(7Ii<WmybEsQ@^OA`iiiX<s!wC
zq5Ln617}}5!eRXKfU3`u)2FH`MGIwx<&HSWmpp!vTw;0t_~VQHw?9_Q`F{TS+uNVZ
z_TPQq|NCv>ojiBY33BNZ*#ekm3$D2QLR;`}q{{CJA6lJiQj&HqzoV=js^hyh<Usx#
zsa=JKR$dS4WW7@Nwsd((bqZVV&GS7=c75qssnxc3`qo^JGt4{v=l9=uwdKrX)Bd8Y
z4Ut8HOBn0CGM)eMuD_(t8<@5#=6uX-K{d;xw>_TRGe3H2%U&LV`j;isnnmI!?quz)
zpL(PwF?658?))aB`svp?<}23SIHdM;_2kFai~J^@FD!hg87(2|a_WgmpIX?Zch6RT
zUgq?7Q~!f**9m7re$SPYc=tQ)U?GpKt4eH~mG~aEg45P71H`m8dawC%Z_a|pd#`*h
zN-kf%vD@z9Q`@~Ja{4hcC!8!!8GOi2)lV+3opt;5)fYL*l@q2e4K<m?!6xNY_MXeQ
zWRtnHn%*VF_6KU0uJ8#8Pj!<!claqE&kBvUc1HdydrW8TeA%(;Zpf8c4oVp^s;ka_
z?CaXu=XhNqtabC$xkf^14-`dm=iPfzm~Ac^w^#rCl+U?oH$JUeT9U-(?-BBSm6}tq
z=(Ve#-zbF|^UnT|puFYOx6YI`uS<%ued6~_60=WCZb;1$TE6Q4=^t<ZF793HdHAXP
z!}HC3>%)G&-*@yjXWdOlkB41LuGK3SYKb*OHJffdd@J#7-InNV_sw(kqOJR!S!eC*
zV!S79oK)7Smt_Buk)I(!^nK0EJ?kzo9@p4?yeFCcu!TMEp#n#pqy;f5vMkD)TFZ(S
ziFn-jkZ~nW&dkx}*CkIM&a?uh_y<{!xEuZ+54Y{B*DT)^qp9iV5h%GgXHt5MQmud9
zL8nh!7gUKVb@VVTJ^sdN;e`_U><Ma<xLz!Iv?40oSkmu^5C4G~3$GgT-dO4Q>6+M2
z_1AeWh5nYUb1$8oeCh7|&inm#$1IOY>RrCf|GcK}!|s|I8#(#&=kLq)_kaHQ_~)ND
z=a<hve{X&=kDn#qq%_;uW9!=WH0s|){<u7eBj$zb^(B(;oTbjZT<xoxH|0m-oT*>c
zR(##sv_by)?8&pv)y1v4_v+`RSd;K&-4?1b-nMJz#jKc9Y4ay(?f<s^7SW)StC#+@
zi<rIR@mH>o-EW?!nzj3`6`Ag;96kH?g}IhH6;7W|IF@S75+%0R@<3Gby}K`Lmxljd
zvE~eSx4@cN)9!Pe_M3i(*Jhsm{JkereyqE)a6zHR(-R`?%he|Td?TP4^Eg2A@Y>C@
zzh9m||M}x@Hg@yB|9)FmpH^FcCvW@hLK%1KCf3Y{9@?{h^Q8)H-DFU9$-2GWwDAHv
z`_Z}12Lk&xl+1TqXL|g^D_a}&RAIT;uIe*$-k)B)@A|33mw#+p{ve^p@Wh9=1?s!m
z_s)B%s4wzx!o(;h6>HxKX7WL4rd*|xQThj1zbss|s-S&U>wA0G<#x6cf4^K|uex{o
zueh3TDIT+LO$wZHUR|&Ma8AJX2ajgec?V8kB|B%jr)c$o-$kz~Oyetro9!0QSRVP0
zXV=lw+_$@aYpOjL&!|fBeEDMLt+kW9oSXivlHM|Hs(RcaSAC^izsG?Ou0D}@UvM_X
zZEaxmnFV*V<=B~w=EV0+5@)lC4^iT_4{r?qn5uR1{_Sr&V^^&I{U^L#Z?4DnIXz2j
z6w;F}pZlD?_rmT<9&=Isdwn6=X?w+XPwo<|on78)mV0>j%bQy?`A!Jih?qYRSg|$n
z9s6Ex{_^D|&lkAY=3P5`#K$MoE0jsOAT~5vwQuQWQ%Pnsc}*6FrS=XYCQD|=><VE#
z>9kg9!;#}_e$2VLZsAVrD;_Ily!@`^dTgFO>x@JN;~5XZC_84cy^Lor_z3U2Zjj~w
z@`TZ*+V;iyy&PgnadWz}nfuFEHN9TB%W~@pruQ3Xto@T;To(7dz(~0KWr=9%6iri)
zM)%J5%Xlt&U;c7r&(_Bq$~KpAvske3`|^8-Us8F#!gIl0#qxsdt@nikcC2R4`&;az
zoU;A2M7Hqj^&6zQ9=@*ejasvct^N$d4#%su%-Jtl5>~m$9)0*>^QkFfe`EqK*L^#5
z{MHT4Ke7xJ=7trTGj`N+xXjJw{$rF>Ix{Vw^{(-j!z*J#xI%P~P0-qqHo<M?Gv<eJ
z_YbXM;F$Qux9hFM6PbxT8<hl%w!f_1*)ebb<|*ZS)gPsnJBnMLWM67M|J>!jUwRZQ
ztCt=4{qxOv_kw(;we@v%HuCmz{r%4$FSeKe`{&JHA9GpG&6Ae?zMY)?<G#k7|6kj^
z%zCOnZkW!lylyU=*=&B_s{Cu;&e&}GvG|0}nf-5KxKCW}K5u%*YG$uEW528k+w7=y
zza#I&M8#M79^S37P}uimTD_q1ViTRe_pRo{asJynccLHvqUs$*i90LavWrhU>L7UT
z@>MO>C+{BKUn*KO^@6pk@;Y-Jp^nOrCJS%BJNPW3$)s(G*MV;*ZNr#tSJclqH%a)~
zXHol^YSpI?t*Dx16ylX~Fxkx7qKECJo3YLO#kRj5f7^3+-to7EGRNQ6?UvpCxa>~e
z!K*u6?NVJXWLNzPad>rwoyEc~X3iVQKT@wQ?D#dQE4uCZ-ZNo|QZl(u7bRJ1e(L;Q
z{&g?=#*^uK`5y}0j$XJVxolhDP4)G6WhGO*V{>Gj7O3nollK4ZTQzC!vj=jX^HRU<
zFI=g-r`@>Vz;gEV%9`_2?$l1(wX?WDO)7tD$kR#hUKagk=u++RoFY5<u)t|CjRuv<
z)RQ4kyNzw{G2VTU_n<?(c$KHbJdwWj(Z=;mPYRP?)^%FOGv15ty290R$D1>5|HVuD
zV(j`uzpr2L&h}LQ=YzdXAN}X(Rj;%=fA8t6=OL}!#b@?c&F`H3=+d6I&+PY`dsWV@
z$>CdA&D>zA^n23liiLGK-5>tOSi0wkM|UI`xVb*zbDem$=(E<QIh(C#KQb|yxU496
zXQ#=bwY|qK?va0|r+0X&$c2!FAr=NKY%@Igwe+_z^P2YezT#C1NwRpbNJM(VOZjC+
zw)->Q9p+@slE3wHUFRXS^Q(O_qWdmeoh?}+7?hQ6bjmEa%yzGo#T{m@&Sp2)GnSKz
z^9{O9ma$A-SJ{#O`+~O6HiMEktaruy|Atzf=1yNN(K>0%A=Z=r`5X`5hll<U<}Q8f
zF~e`09LuyR_bg&UYm3+)=RIHJ%+JeVfAW@Kim3d-ho1i0Gu&$D*PXPCothahb~)yy
zhuYPttN~>#XKu_obn>9YArAKoQpU@3*$!7oXjMm490=h(o?-r|WR}n=BR{pNs|0!*
z?aV)hRq(RD6O?<K{P_68sGE<~W%~dA%@GTju|@vAl31+P#?sXNOWltz<<fqjGuOT`
zQt~M0U6(~u_>Ff8=&`Qer<s?UbAQ3vQ~4G`^B>Evl5~lDIWhCFXN{o5AD>9+bpJhb
z^@^+(@2KBl-#-8F{KfWr=hx2pYf~-%_s#Ro_IAy6pDX@U)YQ%Cmp^~J|M>IcpxtMS
zzo)<W`pNXeq!%5wj(<}fbU*#~SaxLfyr$cJd$(8@pE)l&bNlt#rfdB5r%UC&EnDm3
zIp;iAgyNhXb*qYk*FEG=O4?LzQ+0DC)7)wQZ){nSz51yW@9M~#4-UjSeLLiH^wGqr
zbN@Zj_Yj@nqr58Q+HsD%r|XsrWvWJIMN7+1SG{paZ{nGdlU9|cX>+wM|1TF-j`O>(
zU#`)6VsmuY^_hx!LVJsgOc*2Y{k+ptGGVp;%8-qTuAFm3bi!A3^A(E9-eX*G`{nuP
z6<hAz+ARB)Bk^+Kop;}F7m6tVH!$72VPn&^r7pi-SQ>kFKeQILGnsg23a5EzttHpL
z17VN+Ul^KRuzu;Ve5zHP#nZ``g?Or$+442Nj7zQU<7b<E^GW7E;l5l!kHZOeMw)Zx
zajf&2@-N738RO**Lj8xQxkWtrJLSkRp2a=YpVCk6$<q6{E4<-)(qo_ZhbzCmS++Uc
z(7foA*e#Qs!{49Ik9c%3rgv52vuWxp|Hyf+OUeALG5h@q%b;u8?`O5Fw~*Za^RU><
z7|*4fUiGj2vG2g+ihi5;H9spqhARBDvU}>vv}468L;jm0D<{mCSobDmCTHKXtqZu)
zI@gPzJ#xkJLu=8=+ApW>H#Y5?qA|OgZC!B7^!mQJebM1(cBXeZm?iJquA1@b%uK$L
z!*gR41wQu1sI+A@ue;<ceI#V}OU+9%N^T#n6iV&hD4fi4Vf8Aj?rPB)2Q<7g1tv?V
zOuKxt?9%bgZ<alLCN2D1Wu;i$S_h4Mr3c5hFO~3{wv&JNa$aX)y?YYNlyWEYo&A5T
z^7+GO^A5%t2pVo_Tz4Vmt#X?1szp+63HEEJsV}RGt#agkBDG1?X$!lHe&W;CVCIQ`
z53qjhJ2Sma)86^>{wl8=eubZHx3*vYxOmp`<+dgvzKi*KSH-5Yn>^N-mBnM*^i1T{
z6QMM>eK`RK4(^fjjxxA(gnjkG*ngKjuYI1nQ#oquq~e~3lP*mCa?b8f^7M3;Q$58M
z5h_B9*2<b&zgsJt;d|v;ubF7?xg(b*g@;}izSk!?OMTIifDRAm4W*y|%xvW;dtH^V
z-k_G<-~*G=v-!LeKkSxJdUr3DZPWeQtEF$74muf{=5AfIX?hsTuOFtL;vO>Gy2uwS
z++uYoWOhnzezZ^Moo_X>-?to%fBif5pl^{v;&1h*C3bWB4d*4>{Hpl&+49_HoAb{%
z&+q^I5!6d`zwo;L_n$u>YihncKVE6~>!-|L%cjHL?}FIlmZiu0Z4JHCT))cczp&4Z
z`Q6tye%3bMSH}B!nfQ_yXM$%Z%k*kY$(*DcXL8*1=|z?=>aNE%Pd@9<F>s2piLi9}
z=J+n~PPcOLVaesrY43|X-zh46Sahmj8beI_Zyu$aFFT}9nNRnc7cpy(P3Ou()AIH(
zh}>V}leJ$ivdHB^0$*`yYD@Tr%qdkY4}^3kS1wxNm1_OAYWdU&5u9%%&-%X0Ja|)R
z!JO6U-+o@&HSOAFFRPYYRo>DaNoGFvvBA?AceSi<aBwT1v6^qQ+{t-sx0Y?W{r2&W
zyNj;a_dT}g6D!~{va!nv31!K&dHjfHT3^qqugrSi-!NK6T3t2C{aX9>M)>nNGx*-k
z4N_y8zBy9hw)@qe^I~LoomjBDU-6wsUVw)9r5Eb_{}<ftKA=2JSzI%PJN2B$gc3WE
z9*$otbPe`6?EPUm?^4GcsVfooj0=vqE|S01-1?!(+u!*`4pV5n_J@#@RT-Ns!)tX-
zriGaFON2V^sM0awjSu|3cGn%*iqQR&awA1P^Dg}@D|F)Y1m33bO&|4^MFuLQ2KJq_
zj1<`&uBdd+P~pCn`<q+yBugyx?Vm3CbF=K1r0<UAJ6GAKsi;TR+$`1?ZpeOe{arlY
z9;YAik6H_#sy}?BzhURCeD~Cu^VW9c=Vfh>WclXwx`sEA)1<2ZLKH`L&y0LcwL&w0
zSuWSflMgB`dUf}W<O^-Z3+vu$KIVKqVZ%kw<;I%3^aC?zY*4tyzPc{SVs_h%D_)y9
zeNF~_nAdhOpksHI+lSC~TZ=7KoFD#7V_a~R@#M@odYhgmd*9GMA38<(;e`#)7$(Rp
zyRhc-8LcSCKs%8=W`XrdZJT-5hHp6P+ILL+*L(5LGb(QtS(pA3=dfCDw(~%?lj*k^
zQXF#^{d_ZhS7VUdme5DZMz1gP*w?%Zy*g`^YgUd4)2fS7FJzuS-#M#bAAg|6iNK^=
zryhFEHrG0=QLA?6^v^f*&;NVz_os|}|L4z^=lqnWE?%hpw)y;J`SW#c$x0XaKtn;>
z9|v&X6X+0`=-|7?)9u;HZOatJ`YM($yLRhu*=|{P?)U{aHfH-=*_OrYZDzT7qtNW@
z5^;_U8&_GMtqrl8^6vSWk{~6M)wVeu?pE>M0ol(Uv(LzkVAVdv9xSwzJu2J%(9gh+
zs~t^P)rFQku8<7(<EvBO`}(NUzBE}O&(PQ>o|Sy{JoP*3`Rv{8&wu_|Gw1o|pMQEX
zRy}|I{PXAIpFeN^{24U9^YM?({P}-tc-}YOJRZ6?A)+Rk>2JEE`T9)d{lD`K&#J6@
za_MgFwBHT0um4Lf+-F{4zuI+usB-t?E}8IH8N>1ezGmE^6V7PMxxP0MEM)9H_+*8G
z+RXgs&*H@e^2XxtayEqMr^-ijN{O-@y}5YN4QIW{dzBvvZQMIgdz)8_!)y12cQc)K
z`o*1XUVJHix!gmm{aVZ$O3s`)F;^q{vR~fA%X|3zDttGd6S$!JLfLyu&BM?$+6OF~
z+p=ba>%4fjbl-eePniWgpRZWTec94i_9CD)YJ1w+OpTd&#|ve?KQ7ySH{ZRRSCdJ`
z&^Xs|S@|Agj<<}jRYlAuEtbDpf93m}K6`Eb1((_;g)G>uAToUkbD|&1p_@q`-!m)m
zFF*a`vY@w(+?^?%r_HQqSTUqMlhZ46*J#@9JjHPJ+S$+dG2Pg-$^U2`tBA**Wk=p9
zCzW^a{Jgbpnzq<B_I+Bvbt*(EUY`zW`jvbC+fRekP0Mc{c^|mAp}YUCsO3E2ZK*N4
zDnh~=1fx0Typg$=yZ(`An99Cu&$C?&8VlB1Hm5ZiJW^s<%`tcG+f<DW7me?~+_LV%
z8L^3_TNgaD+yD5@%pJS`7EKSkI%DF2r~R9?@2}&Fi#z0P#C>1!#~q(tkNPy~-rsp3
zm-X$F>}s~>*Pk5B4!9H&#`vCZ=ge<XTaO-f@94M|9K&Jd5?b5SA)LWf$g_x>?TWF7
zx1v$gr$ufV5sOMcPWr}Lx8EuM(^TyT3l0f4o|MV%64Lz<_OjxEv(Iv7*E6MJIq{Ka
zex^#Ng)LzTGpL&Nr1zm}L6gFB+eRA&ey&w3v_u7hx2$+GQRAP}zv3BA4|wPD+P7a^
zesaIRJ5$^vz9+uNE}F@&Iqar=vgr8J|I*8>UZ$KnJw;0VmG$(_;%gN=@5)pa5=Fl+
zo5mX`Vdqjkg-6ynTj}5-{UdC`|7X8w&)D_C%5xQaZt#n!nk5`EHs?S71RX^1@7L?&
z_Wxc#-hTe$?c<-X%lFIo7jqoR6JVZoi#JK+%fs`<bJE|}a;UtO=ijWdq@!WtepXRl
z-_A^t6KS`k`q+#!)!a8PmOZ}q?)xf}-Ll8G-~OBNwk&`9;+fkXXDl<_cY$q1*U7XA
zT?XnG-f~>N6H#)#dg)eY7v2@WBX+Ons(!m)b<wIV6EtJ<CyLFPd62_<PFk6;;nl;N
zesgi=-`mZ!Sc%n6;lBs#tdsAbDt)Y5cwRF5L_PcF9riDmpLqFC;@^wEi~Enic>dXD
z{`~XLAOHDNQS<NH&ySBkKi+=)c>nRo$A5mj{rRWiId}Qyx}uWlTs5<J-(Q#d8^8Si
z6@$;OAAX+vI3u|yuI5bLd)5z0Ec&w}@3_TYotnL`(%w<;T#1fVRbxot;Tun0A2*gQ
z?Ms`yu5~F(@TwO_Ctr^{+ZMi5@b2Vebv}X9woKQ)*(|<#-=udRT-WZo*`>%B!qBr}
z56ktBM!(}z?swHZj61G#?O)hL-J_Dl92Rfa9V$QiFd^yZ$<ytAyFX-w?vTCR$NA1t
zQCW>cQ}VV*P=nU76Vs(zRn}{_h+kGq`4luWFf1v2=3@tjcA@71uGv{-c2S!jSMI*s
zcl&MK?(Ns|+_}9{v|bb^7h2S|c*e|lvBWNljd{0U$^F-P`Xcp0t)lNb%Hof`E00Kg
zZu&R9>Avz|O=a5^ClXG`9lI=|qcg#V``n%1b00>?9N)ccR?1JEhf&>?f`;J*Ov^<k
zRoBixW|3L+O0@NC=DxzaE&*~+0>xiVJ2XklQ+M66wpwPhM=LrSW@SeGHkl||D(mX{
zL0)%{?4~38D;RR(;y<o)ez!Gf!Plv84t{Nqs7~}tt>BfL7dJ1);D2}TwWGpY>n=TA
zuq))y)nad@_14Cz+&`*>eGMPp+qv=WJmaT5mbM2Pc%Q7wXkXN<bNkiF<I!^FpAGl!
z;+UHMVfu<&`kUGJmG9m^W9w=EpeD<B=G4>2b?la`&+Fjc_VdyHQ+MYseDx!Bf~Mi(
zfT>S^^As6pbiK9-o|ZI6Tz1aS1s>({GOjk?3YI!)&RKNX+gduUg>P@uE5pxh(~Vag
zI&3H$b}{3aQT92lI*pnfx!5V^GTpqspRhiC-#n*fgW)lslYW-IUp@t7$Vi;~b1rD*
z2d3`_UM&teR1_Cm*RosvjdXtgnwD*EJyo|~b(<c<{$Ba}hGlcSH~xuyRn|7|%JUc1
zpEv!Oyl-a3`$x5mTKAf!XRciyl0J8H=FE8k`sVsyj%;`=KJD3(TOqTvnxocQ>Gq{_
z#7)sVvVmvv*_^2FCZaQqZrwZn^UeN$zdqXke=Pt1as8ja_WwR#um5wszy9B^KVKKu
z+Z}suIq$Bl)dKh3tK}MI%3aH2;w|cazDvNt`|;Y@Yq?Gr>Gs$A?wb~Ic<sxJTTErx
z`<>sF@0Bf+5wp8`eBt-%-S_jp-+o(X{qFni?Z5y2d%HvSe%|rF7I|x1H`f^KYfGB)
z=v3CsU;(@E+6sP|u)|++bw0bCU}*2^F?r4{#nxhwyQ+QBPlu4t4$M9m7hHQIAhy?z
zvHIkb8ILX<HCuJHeL`P>@7XhsvBy;O_2zdnh95A|Fj8=Ka(T`FJa`Fz{k*@M=l9#m
zJ^x%&1KJuof4{9=UCqBAruKIAb6%$ZFOZ)ZC8*XKzFvIK#kkYA1!l|_D!6AS@<X=L
zy#D-Ltx(nYD!EDTBZ~CnjKVjbd$)M2rH9{xs#nS06;c8RQWDn8j}(edO57?~?6~H~
ztd(0oYt4w=@^9Jh8*Cn{t7jcMK66SB*S>;yJ+79HCjP(KI@2BXk1QzYnd*7EwK*X_
z_1S^NZJdr%?#nnIZd}1|qGZFxPuIT8N$A#MdL1PYFS>rU#-kzur?4o|1<@(4T?Z1|
z42>RIi(HZm)2rL~UuEatMLXP=9%5EL+QyXTGe4=s>F%1<bNj#9<ZVCyxX`-q@idLP
z%?APxy_viz@a^TeNvZp{U;o}Lt{LEYLZoZM3h&dkH~kK-`d*@ZUp-)6pR-(v;)}L<
z{>5`|tmtZNR7zg(rhf6snO}~jh8RiTnUci*Gjpy-@a!!soI+o3kn8mM!B?lJX!+=2
z$^Lb_=b1-cR$jB@7;BXJ%`ZV>>D4iF63#8`^?$Kr*^0JI=WPP(Snr>!Fe%{t^67Ts
z^{x4nzxUSIoGnu;<-NYR#yj`!gD1X!s%|PacZU4fyX4towquRaoA=~>()}?lQa654
zj^49b`kDTbUtTbMFkGv%?ZP*w>iS9c5`XL?pMCSR>oD0hMc~s@ftw}H`Tkw&a~8WQ
z#8|V&%?)NM@O+c3@>uF=#2yyE_}<fI4_bEapSt{p_k|B<Zl_qze9rgGsMoQU^=fL}
zhTsPhc9(STYc7bokhkH}wZnDBVMS*&4qrDazLj+Prs<W7r`p0LY`-v`nq`>Q%TuJW
z=LYkNjCqDCN}6$g7tPKJzS^`*^O}eKTo?64TD<$C`<JAK?r1x?J4_(%2=6RWHrwxv
zH`QYEV+$(InmuEAabWK3tzVvscRQbZ8Nk=5zHa-m%y^ykb=w{9xtrLp4BI#9r=CGn
zL#X!Az-6wB*68jkShFK3thvAC^W<&H4i>shp_*wtvWKi%ywn(OD&}d)9KPacIsf_k
z{k4DozTf|^|NqbP|9{v2`~3f({Qn>Q|Np$bE`R>`%lYba&$`Zi#&G4#tQCUKT6S0U
zKML6#)v)qt7nk0`HkH#amA(ib@XrlXmRKWa{I8(Fs5o?1Ag||5^9bSKEos{iFWR^N
zen03Q=I#04-{0SVJpXOo_S<Lk%y-J}ki8a_?fJ2vThv0ywQbe(`!jdmp3*c?_heSp
zfukaGZRMO6JY6aBVYlDR9G0lmk_|}@C0QNL-Pzc)<mj1oUG+!SpG&NM7-{x8^_1*U
z+ok;VyXvjx*V?>z{$cZc&_bd4{h%88<ByN8pZEX&_2=X3=lT2X?CPp({#neQzt`T)
z{_wT+5x28nZB>f=mukeP_arDLM&n+O-TkotlFJJcwLYAh8uRtf`P=WOEBr2f^YD@S
z$64)XR^M7<_G8k&si&?PMTVZboND#r{D=9H@g|y5Gd9I~K2ll!B(vgYPx`b;*Gxp^
z%KnLjOTK%!qPX(@>Bqi@@3}De@Auv{eQT<W#HPa9kZGn1-Z)=zExxj4z4%3+XTpI&
z8}4U_vzK0y_A$L2a!Qt^SY>PG(G<7BobT*P0--zFdBiiFUWr^ZoFNs{mhXS$THNM>
z)9+hm=OhXV)jXcbJf-PYm3Zj9XXl@1ytRqaHrpqw+qCR7>lJsSVio5_0iSFuru;};
zXv}Z;t}%Y=Ma`Bj-?=?I92cz$x^~rZfj^^Tr}vSfBYK`}Q5-4n3=TF%`QJ5rb)jYR
z(?|W=Z9~p%*)`p4pW8fz>H8VXOSd;P7PkGpZFP1dcls#<Ewx#~eTS6xxpPnV-l`T+
zc-=>xd$mqQ$-@&L&!64)$ZomkYRB_>%a^=8dUj`3_Nk*Ww#x#nQ#(Y(OoXF%ZZdM>
z>RIi~UDkZNRzbYpqEe!I+oMJM_DJk{o%ZanYEzcG-v)la(;GNC@8`EAcbBS&RzyFm
zs95~BODW=c$}Wx2%}y-leSeOGF8lNS(gdBJn#Vj3PlrAVe|2=u&A<bK&u#?=ZZNEm
z%kbTA&AxAf{rbHP)2E$Qcz5LJ0nw+@kz&lVqjseooVih#Bj-_Xi$jrd7jNi;yqT)I
zdO9|gst0b9<`m|TH8P4gc(f?_ij>4Ep;LxSzT4<+C|#MmZjNcW*ty6%QjD&aD;HWm
zlbHAJ!G{}i=4t1yy0Uk>{QY#f?(6!qpX?Ux(@-_j`up3uYqf9j)jzY}zWicXVd?dM
zPMm=K<l8=X?@NVnr9G%pQe3gbWBxU%>}^{vny7yWc3eH{;G;#ECpR71vT~WAz0FLf
z1?Hvp_Vs^$J+A+E-2UJB|F7-;@Be?@{{Q*^_wE0Gp8wq5{?Fr+@78wDQ#QXm$KR1<
zuFZ3$gIwF!xhC*l;3?uwSSpg*;>0saMR%)Y(FrH3if?BlReBs$wr+T^vP#RYiBn$d
zZP|;$6NL{fZb;>s@4bKPZ`$|Sci+GLz5Vv~_xJM)L*(CDd@<N7dux^k+o1(seis>K
zXKC(#C&>L&dE&-`UE+bFydO9nPjxO>9dJ3Tx08`s&86k0l9UnazK5&UuZ=z9+Vd#?
z&*HRPPVp}ss~63AQvBhvv)66;zct7Dk3WC@y#Mp#>*w$P`}yPJ>&M5}pYOjvumAkN
zKN63p6gJ(v_9si+jZ4hrd!Ou=SK5WGC-!SJDdu<m4rh_C2wmtL*R$~Z`Iz}%#E)@J
zt2~sp`qwex_g7aZ?vm7VnK}FWRTkc(!D|*(*UCrSOM7T>+4EYS)!LO!2OfO=8haze
zqSIF7qRLb673!Sl9cNdzusvV7;<n<hDK(z=*te-%xy$iC>UQe?MYsP<F@9Ijeskf$
zLTx9l<-ZvegZ{{<up3+~_iI^X<Gfuh`x(Q!?Gt{xgzS9v;ep7tR|O(%uYx#Viq?v5
zN<RPnWcOYsy|j+Wk{evBc5}~J++X);%e{GzEzUjOpq#*T`9$_f>8+n*YLx%6|3B}!
z-zeQ&TvnCgx{C|frm0sfe)|;qtgGpJx&Gzys0*483a88duYdRNi{~btlb;TMd9nBW
z85<j=>$d`8jW6Hb+;oE_{)GM#2Bjze{U!)78cLkG<lrA3dVN;(Q=y0N(%#=)eTsio
z)%5gdf)hipI&*9;PEuNY$b9qncbA{ex!>4(F8V-b@UG2gSugMi%Q7tt&RA9UZ`X;6
z{mDTx%?Br*v~nmoByVLCT|NDRdYawi$I~WBe2U|n_H)Jgg^Y_<=6-w7kjBC!Z})Gx
z$m9C1>6r^|?hrXCr0ZLL@!0==97oUGl;(PO^Kr7ipl|Uzjl<Sk%e@Xs>3ijtmL?zm
z$+186-8&=3y(=$NcI?eoQ+sz#K&>Iq_7H<~#`f8JzaG3HB=6=Ee0o`V(K!}LuSGNM
zjvKHqiam6t$9<<{P}|OB`SmB7wXzPi@yRGG3i!=yol)x_bGTUf_UoK?90d|thaX0F
z+DkquwViPFwQXBu)}7T>ca}>vOI3e7prE?2Yr^z~Z~alfFI?d<dvna$>&QIo<1*p~
z#!Fj*I%a0sER|9)+@7AwZz;ViJoDh&b;@lOZid}l#x9FFN|wqNO1b~oo1UBeWA4{S
zJGpk>y*HUrhMBA3OP7_=$8V44UXjRPjfrpHF1kBE`J42s+^GjQe^A?!+kEbx<9Rin
zxr>`GK4R%9t~*mJ<zqi_?eSx2jeY_%!{QrkX9jg&%l-R||Nft6+vESeGLQfFY;*qp
zU*G<`ef?>}tIEZn)s_Z@uG;FME7cz$5OnUKXtyvk^HXNF2`@box3F_>UZfc@!_w4N
z=fpI@H;>k?%bA~0_^P{7Gc;v$`sP()+h;m;X!%qyJAePM>B-@T1r|D%%Tqnu4;MvT
zy8E)g;=`ng;(jxOE=|!$pDek8yOiO%U4w9T+>0F-K6<pXT7I1LV#AA%2ijHjEwy7*
zrw028s_58RKW$L-;E-3nXYl!YlKt}gSC4lpEm|FwB^nrb%VnF?)W2cpj-LAU`RS*U
z%Bnp!wlQ+?>-*PV|9rTr?#{L1*z;S(1QkW=-rdSQF!S|3(Z&D&tX=yq^o-@YbYt^g
zpJr`YUBjg7cfYSZ|3m5JjLWa2K4tc7iQn<^%$ynRa}P_(pBFvz=8nMT3bA!r-Aap&
zdFQurzFiey9lgVu`Q&VtERXdUtbg@KxcXewTp}8@B$_Sy;ANlf-#BMj@^l*>x^cEJ
z^VN)F6Q1|&$`jed7~9<O<g<8+z~SiH%vlZ_+h?i^RMdQB(0lhwsaMXylZpGi?S%7B
zc;?)e__&sNdrai+*ywLFSc=lyd`xSvPnpv2@NH*N$bz8mkSj`Cmu&ht?Q+qEj$Ilb
zFHT;i^mo<YWvNv!Tk2b!=PoiZ*WPybPHoz1-_y7MT_|sQGqXrSGKXvJg;&ShjN7ZE
zpUQE6vT(|Mz4&LEOtIVXCCjg=nP*nDU-Z3eyzbjoD+|l-dkViy(fG1+t6`Q?>-o(m
zxALDpeD2aqsp8u6i>E!CwdCTur!W64@o=B@@ZGwZjttkM*X*_ryt!fFrCPQH>uRNY
z>gTFSi^i&JZfTt3+%)4w6{Enc>0(!mJvwSDFQq>}JE1G_nb2I@w)ML=Kl~;;OJ043
zmZWiE!EB8nF7xj1Y1?B~@;<fSC30(0!G5io*VJAdHkvp4NZ>Tt@7}`oZ%u@L&1o&V
zQWc{)(dvC_(8n*AuCQ)ZvrnJRx1%6Q!KCcFhxu%6U*$EO-^}h++<JO5Af%zQ)1WbU
z>do&bpLB-?g?jzVy)o~gov%@xYOq&KRz`Ia%be$Yg+)^`a;|7f6$aOMJ1ota{cxAn
z>d(SKPUmu4&d)he%CYywl#kz;&TOCU$5d)}<?HWXQrB}Q95yYf+g&=t>x|HopIvjm
z&Hin;NSxicP4gS8?xNFfWqH2?y_s@m-JAJ$UG2=<La93@J6-tjuOT(aO0j*41Lvx(
zo5Ci2`tak!&-eDfpX%3tU46gq^Y8U?`|D!k>`xwcoqz3DiDv(EZ%tMg&Z?M%e;H*5
zYk0k6ckavjZgQ0`^@0b7sQAil2D9!hYmvKucQr42O4cs{#ie4K6DED%5~1Ggd6fOU
z`}$C?scT;xPHqX!+_*?xl_5(uH?AVmLjOc(|LILBem9kF#GK+!n_v`vXjjTvNz2QV
zUQ8%FvBO$-`QwKbHgf#OFMlktnDg%ayT10r4=YYs?rzdbXMVz7x12XVRpE|u)G1rm
zYw;c-OP!)Z!Y}%k3g2Clw>4&4QRMa0WtF$|ZvTAw>BomJAAX%&V&T7j`|04lHq+k5
zT=!m?w<9xr@s>w9OO!4fx80E3^Jh(Z8IRJ-Ni()TJHjp$sD5L1UvBdB2j%)(wT@Nq
zIDJ@J<-2Ymr`62+$CWKr@{*n9cYhLi<&u0Z_j-Oqf&TZm`CZ>OmOTH;?z{K+lP|q1
zEKBY>zX-bR6%^9aYvt46HcLQ6`REpVWr??u?<%$ks3eK-_E^q$44JV^(W>nq!`_VE
zV|)3_PH^&mo3r_INKNiq9or8{UjkPj<~?%Nsds&XTZwqH^s0kuUCR{21=Fr^IkVk-
zG1ukIj*!+J(uwQh9WQTjJHC0bZI5T?iZFGTF#l@JO3$2+%7!QW_buuEoV{mC#Uq!5
z7t2KS?ua&vndW${aE=NSc8>`@o>{`MyPqj^Cl~is7oAsUdjopimKSEeS`f401N+6f
z5;tp~Cl)z>k3P<Os@Zqa0guV2WNgK4Ke%5JWIe3e>!|6z(66yLyHcXPrEp1Q<>Q5&
zhDr?1vosr`Tc6c9tX%c>aKta(iw1VR+k51q{T4qc3i{p~^*hgduhgfnZc2WEUu0I;
zDK1u&UbTIGt*-gDz~-#NfZuO;)Wg$tzPjAATqpM8_0e@p9VcsB3z@U5m}rn-H|Oqj
zzCF{k+Ql=@%Su`8mDIQR6Ik&4&=N)|iI>4Q%2&Tx9`sKu=)*CtaL)5K{lCr{mb7aX
zx>z<Ih!uXEterJGbLr)jS+d8sDR1IuYp_deuAb1wTU2hlb>$Z0+!-Of9Nk)sJ&8>X
z3p)d~f|gF1u=<);=!%eLRsq3J84R<6#jE0EziQ-tuZ-j=TCB7oyd!Y^(kYwIyUv&y
z>ALJYySai{1>XY48GCP@l`7f0@zwN%$!)zgD>*Lg{Mx!sM)&Q4i7$<-Q{GfOe_#2~
z);Ox&FV8~3#_`oOTf>ljYP++JPgunM=-2Ldw-SN^RM)y_C$4__>Fo9Ue}0+o|Gzu_
z&%5yb|6YAwuYZ5fO8)7untp$Nx+yQn+pEN8$!=!59rknhER82G2-Do^C3^dz*B!2T
zCYis}r+Y6u_G0_YJuOd<J`PwV{9JVfhue}3B{!Uw9N}8ADfh47ek1c;S(_3nliQ;r
za?VL6L`i+>s#S}Mz8fEsvifVyx<y-0afPi-O5QXl_}rNuRb%!4{>=YW)Y5!8mnTOV
zslJ%x<z>25J$oX5dr8ctk2Cs?KP|D5@n3%VWktEw-Tgc4W&6vIAO2WkAv0}G0w<fC
zLZs{2Sqc5Vb1h~#wybms{i3v~Yu(m4uG^xvM{O&O)_;DwYTums*Os@Rp8ftD|N8U&
z`RAq|-(+{~tIpG)?OVGN+<lb$uYO#;_Hf&RlHZT!clan5J&jss`Q`lfuivY(1<eo1
z9Gh7GJ7A(?lX&t*#pmlXMSs4Xd*a->&H5iRZ`ks3+xI+M@MBW!YtF~Zw{Enp4&;BE
z+-)YEWt_G{$YXEAGOeq5>%0~%=`>y0<)xUZy?NEPmq#09uPmD<F|%slqNP&GKQH{<
z#$Huz@^;e0sw0;yBql!;_-6Yl;=c*!rWLL8`J|pKW548?YMIf!I9Z^6QUp_Xr(xN(
zeKU$#Jttpvl4h}s%<6lVanRtHnAW#VddnF13S88+%lA}K(%kIf)wD*_(EG>PTP=Z7
zt74`7n$@}!5?a|d7G`{KzVj-4PWywI$B#dgd(#$^|Da~~=~Ih5Jy(TYX4@!S{N}ay
ziv+KfmPj7eY<6Sin%Vyhcx2t)hp)LX>x1{B^AY<NRhhlL`a<I_f6v9>OEcP~#OGeV
z_)kxm<?z%McYoeGls(%+WJYGx*`-Yx7h)f-KXkF#|A0sC@>%PTRGc_JQ?yPhx$UE$
zq~YDg()+G%eCzl6iM#BKk1u*N!oO-A{J8km>!W6Vhc(<kOqvya`|W%It2Jvs_*}g`
zNvLeaOtY@+8P`u<lm0M&A&XSp&$%<RIjUygTF~*v*}{-*%7q92dCZR9R=M<RTUMdK
z`iq;HmuowwFX;NcJ1Kuw>bET8%2y7H4;d^DlRXo^*+|4%b4k4j_twV9`7h3|e37z3
zW9g9zV#Q16%}rdbnD}Uk(j4upT~UFpUZH^z8SeA;87JJIcIoQxbmyA2J^^JJP1){~
zf(38yw_Ls@G%fR)ox0Oj6R#2#CdTmZKi6E$|E14UUv%31;fJ*O_sazxdc=36C&Y^0
z3V!=8dDS|>ga@m1ckJ9htJinqiH#3FN(7wvFz2e9jzY&%p#XE;tuy$(@0%eZ!q&Uc
zD<tIF*H0ggKCl0ARKM<XdHw&R`E~#AK98TfzcM(^e(LehRr}Vw4qFl?%6QUn>Gb7a
zUg_0V%zxkCekP~5Uny$os#m-6ZmTVwrWKX7)@kdDu<W%?OT-SRs+q=3n=;M$+uLmC
zTyK?G?oSqej8tvl31STmJ;M;NvdgulEn?E)rCtq|A19pHxmoXnzh<%5>7otE$_Ee3
zou--?XA$UKZ(bUr8@*ENQ%aPkR@w0eS67KC#`67^si)>WaX5XbbE2_?_TptrIHw2&
zJ)0BkRTDGqvEux`pYPrk)Yq5S|9|xK@9F9K@BEKnzT18I?%mADKTA1-cC#;Wi<y3_
ze9<ngwOj43?wVF~^V-@eZ;Rse{a;@`{PSaCZAEQmMQvS8t)2b4*TK5tnzKC=MVCFy
z>$<qEw6jF{RK<>SJMVjh``yycf2(s>^112c$5~zmJ9(d6e7^dB;vZEjo5PPzX7M%k
ztvLMFO}2tR_-w)jdE<|}A`jly-@PbUd&kyfzvODGQ?ge354q`gt-BBxv~A|r)|RTa
zi6Pfc@nj?>-?0)3bQJjC=sP3lhv)LM3a=(GeiPqx>h*jH<)FN(r==V__HMIDka=`)
zRgLN0?*Z}&+vmQTIIr=&Nv-t_{wGh|+?t#goIBipa{a|g?xJQLpRPIG^x5!jqte9V
zLLSVWy2~%+JmC|&TM`(hR=ZND@43s8SwTtnCz|I}zMmUYc)K<5?JIu~3vTZC4W&!2
z8$SEs8F$|}C-^{T`>%VJcDgtGG&;jBo;tNkDQfHOZ8ps_92|rudN8>$yKJkuBjKBL
zhE?)c6^~44yTs4(!*3T|+vq&s`p<KVC@Y>5(QZ6#Myqb8yRrqFy)DiyI9z0P_whkC
zDUQU8{V(r`^0fT8c-APv&;4-y<ufLdVc`oF-d=6Jz})LULtnoK>pSP#o|e5E7Zr21
zKabGZot?NXf$dt*;aTTx&C5e7&$_QQ`~4x}VP#HR=;UM%-E{}do1<fHGI8Asv^;z5
z<+G~O$1dyL;d=OkrD~Dl1%++<>|X6zb$<R`iPyh{lnq+E_~nx39?5-Sw#}2vV@rQ1
z@4~P54pd*Q*dn#5we|Na-z^`FQg5(Jui*)GJu05tIi;hOMP-eSz{MjW!Y%=>n-_$g
z-pW_&w?^ea@OJALADU+-J#IcCP{b$0vhU`X4^hqgJ|vxVySRoi>&v5^%dXw-*_$bE
ze1mD1#&%cJGdsf%8>Ix?sFpv&^6*7@a_QdBFM3b%?U;A=@vL3z!|z+iW(!}PU=);P
zY7)5h*FA<~wwdWY46nWJhF#z8E7Q^H8Nq#$ZPN17kHh0@{%!yN(Y*e5`TW1H%K7L2
zefa0=>$f#4vnQV3H7)4OPPI)N!#vU$r|z;{+dsWHY5j6jg>s)s5eoxW`b4?9yxNxc
zIxK6g-V#r_pw}BGbWi?R{EVf*Y12&s|BA%(e>XBnnsRPh{CrmXS>bs{=K06Tg|arT
zzUsSyWrvE_1DAQCX&>17`MfQcM(h$UX4@dsU{_kgw<cuermLY#!WJ$!*4)|RyM<|%
zgcei#OAUuP8Icz0elvyRjkJq>XBI8GTxhBHvn0mvcuo2CJvBccU1k5iK5p-h{q=?Q
z^}DQfPc!|#F(>zFs?i(Y*RCm|k*?d;&e5NK`e)`|n_PeX>Bp}>KYso7;n!bZCfeHP
zOkbY6J7jIGQt{l`tT*!3+fGWG%IkYXc<;0=m)15Eyea<{Yh?d!cUk&J<L}>fx7@J_
zNXT7n_IS?E#}l8+Xa48ke>&Fn`l&wcmGV(1w(?!mUp;$A<aful>c<(J9YnGidisTb
zF3RyJ?3Ui7ykOF-T{Ai(1E+8=yyc{|=cUz)*Y4R$+RJ91xVQEC|Ms#1t+QVm?f!?|
z>h<2WOrqGamGKK#VR{aCVRr27Gg9{j&HQxVoSvne_SK<u_NUhhUfX<44kxFy9@xk!
zkhV2?b8r4mL&xRpl~=@@L!5U#Q?}}rYFTtq_|1jqdD}9b&u;rrWLO>~&l*{IL`14O
ze@QLR-A6oviS5rAa{1-7Ken#Q5N%!2Iy>~^9o>c7yf!f^7kU|Vi%$uiU31s{moL-h
zc`mOy`DUmVI6u#;nR)PF=ewU-Z;s~ey|7x~m&KP-jWv%qzdO&hz2t3IQ{yE275m#R
zXUCj<>7cb>;kI|5-`n~3Y<X%Z*iz;gnKA2t-0703Z|-{W&HVUwM$OY^?F~m+cmhto
zI9k)KxG0%r&2y=PZt=}do9;#>F<1nZ8ogApmAh+G=)THiGY?z)dc&P_CNB2jc)6kR
z=L+3c4_%c;w`I@$_8K_3Us${4PSZw13A^XvYx)aIrQW>D{`LIXcZR)JO>go_I5!{c
zRqdI(H>Z#Pwx)m8ujyLfUhdhx&)diEdcq77=PGe4MVqZjwll4!Sj!~nHuNU8+HIV$
zVp+!v1IgsRyseB`zxQoh{j2938=v@01&L6Bq-8E)H=Ac3m?9x}Ipvt@l#3q=jO%~i
zQZ09WCnjytdwkyCQ@=0Xo@x~T`-$SW_nTWjUvpaF$!t}}ZZdoMa~_K~cW%n2<z`RS
z+Il6(YvtAsE4!kCN1Rbt<c~=<ABZx0byHO#RJZ5Uro7v!*70_~|D4|c_jdgMv-j)%
zb;s9#-mNdczxvKKFQtI36DMWe-FA9uj`*cjt=T90XNra8tv)=(La8WvLG4<*M_P-P
zdTCzWF@4qSpJ#pi=JQ_9|Nm@?^Ma|q5y|y4VlN*)bF-E;r&(6swWC=lHzs^_grv$k
z;btM9vd2O(sX-c(g0y;boMaaL{~+RW^!&A|BCl!rlZ(7K8<yU@ec;?XrLSk3J#>SP
z25gL88?^e$A{C|Vm@}txUicflo)pZaYx&ei)6>gB?()lb1^*va)YtFYZ-4K;eEdF}
zd;2Z!-j|QxWAKOnxl5<E@yY<rs-0I(2iw|DTW<dP@axlu%by=NfBo}pS>2seORwoJ
z(r2H@u&5@<<hq9S%fs_4y3&tvCi6F}o_j^lFYo!5y==!G>b|~kuy;YYyXv#lLn&*o
zW;Xr5#*nqAw!yeX@zv+^9hQ!p($DyvyjkldtDpRaXJ&aLlb+3Uhd1HE&qF4jidw1W
z6u9B9b_mB+>A005&NmA;2(+Eco11lC#qGl9H}ReG`%X$oy}cTKVeJQ|rG?y^FK3;#
zd2DI3<+Jwjhts_oYWy-x_cT@<2})bQ<$uK}(au?A#$_IkjU61bBd(>)KO?fE;wq>9
ziqEFLi>uf?7$2AODKzC?;*n9?y2+ENm_hkg%<ac)e_y_NDBt#0aQ%DDO}<WPLT177
z4pme0Kj_xG|NW@zq?7euEL5=V`Gkzzc=y<KY2I?~^8#aZq`n;3kZ;z#YxAV2Hx~nb
zlzs4<zb{^OE5GV5gL|pjyR2&C|0J456|FC06<*N3j8nGCbGh}!4qJ<;E7x4kceQ1o
zXnL_^-NVV>WMkr6E;gJ|%=Ktlt+X`w!Sf6EW+qrFgs=sxy}eYBn<}5lyy(gr{X<6<
z9oo#B!13tn%IS)m_Q&X+*^w(@*v8X;|3F{PH7%9Eq>E+yBziaQc5BZMYCo`knSv4z
zSB324s?1H*{(Gek$@9JJWV3x!J;(W>?CYBw4KB}SUY)3V^Tj6P*2FtM8<x3>q}6oC
zB`TDiexDe$cK42h?*cbIX0{HlsgAFhQaw@qOi)E`Zt@WyrA029i#~N|Y*(0(_jSHf
z?_4f7uH^sQ94c-fGSC(dz3O%>Wt(TZOT#;<MYm<A_MJbwgZ;bkpKqsD^L;bp(Y9YI
zd;i6*Vy;`ItF>NOZdUB|ESj+=`i84@@|qu0t>PCZdxp3!nG&?^?WWvHZSSc`$~IS}
zHs&~=y*i0Qy?w%wLLG<3ysb92d;WcUZC~@f-2VUF`+wfP|MzYB{eR!Ok6-Uk+AY)J
zrF(nZy4Oqdrf*uS<QjTDs%s6uqquOt+k$iVX0x1}my~fLH=6my0$(`|mQw{KL7RBu
zp3S)-W|?@hf$?RLo0sMC6pre)dJp-RIs&b}B09NlETLI~tF~TSwKQw(k|{2cQ*$q!
zpQCdtP={SeA&If8>3|edM3G<H(UjJ!=Fw;KPs#gfs-El4WoVl^ZN`@R+b$tlE4QvV
zwd<zCHGv&mS}v>t4ziCA&oY+fdwDe0>h4nU^7iR*ALqy2(LEk)HTTlP2GxmP6N9#Y
zE~|@So?09!?;Y8CBxU<am9%$JF5wZ`J}gbe8W#n;cGP|M_t{%zw{ZW;@b2rcJvkk=
z+c7hmf3cKOcz^M)r@0WX>9sAsy^Bp|?7qLLoOOR~nLRI8{;ogi$ELq?E)%Ve&GxC<
zt=U-}QFAzc_4Z9pX+l@IRw}vfv`S-k<aJk+et0;<^574?PyGH~k;)!L$6LRi(VqKd
zg%z(obJHR#uAN)tcTD~zc-PMI>bvH^wZC_Iw!hq<b?oQsCI8e)MN8zbJ1$^U`0!GM
zyYe)bfp<#3=H_n~!jC?XyjID$)K|iR@6}6Vo8^^DCRA1Pb+Yyds~3D*ykP2&Z>t4Q
z+k8Gf=X~7#Y?c#E)rM{pbA-eWKRKpUY`V(*<t=~ZHK81<Vni3QKJx4n5IX#mL+SKN
zUP0cOmak4(+3qdY+#<a$-*+kdB$<-+F|rAs#!>0vu4lE{7&dL()TeU(=5yP+b+`HR
z;*Wgs@+#F^{M%J6yu7es^?IqEz-pJXcMaD}`F$ZK{jtn1+twLX{zgp(oU21m-`d4#
zvd%YQr_<3@%<tXy90@OR{*W;@zpylp!8qV*r?b%erG8co*Q#G{;0g$0?(Y&W|5v&)
z&`)_^)ZDLkyWeLhNQ)d?ZzXs4q|n)r1#+Q}tPWiLmY%Tt%LMBM3wzewN#B<{L*`d|
zoPOD{8)qI#*2?afwybP9t6TRWlVeA9=Y9*h`fj49?F2<LX^FNgUUQ!r*G-*ppZ`S7
z_rqF8;y0%l_OAS`^r?6GkCNac`vqQkRsPN~{WEvsS3f?_Om)Wy17@LCj<*&1pKRu-
zzxr}%Q)<wI+_Ij!r$?Lo41>S0y?wXkaE7aJ@+@tuomx8tW_WgRNZ+wox@POT*Hw{4
z_dX>?U(1a?y=_|2imL~1-%|DXof&1bw$)>ysKA9~rL|QRKOTKvU-$R*`TyVN|NmG2
z|LF7mKmWXZ*yH8nm8zSn{W^7T%(>g!npD)6Jax$7Y}-0zSM0V9?J`M2WA;t)p-HCC
zifgl0=w1E$Gp}8fS>qC)k$IZSB#(0^Uw*Q@BQ>Y!l-VbLwVxaJA2`$5X)^QrwSTia
zEp?Bq@^o?8a;qlZg)?cH>*+6|UmJd!#Uy=uePdVaj)bWTTCY8u+y8tI=U+yHpi|wI
z$BIvB>TYAKC@lAi`>xFz>*A8Na%<R1*OX{;?xnY0-LzWm6(TV;V5Vt$oRK_Vrsa~y
z6Z_jstd{M*{IDdV=$yhOt*suBskN7OHMT4~wV>sQ<K69&4WgWZuJ0c3Nioz(7WOfJ
zWPQJFwtq*9ZQ#RijLXkw{@SRoW{@kiY|;Jq5pS<q*r-3KFRztvu#i3a?0hMw@~OmS
z<}dg*t>+LmR=RRWNL%Tz@+Ujl>8e_@LP8?6CU02Kz;J8Vxv8Bi*qTqO{|uYDFgvmG
z!o{{$p|lG!zqT#-%74?{>y)!+zoW$k2NmTB>Hi~UlrLReWOd4SX`^g!w9rkp%aT0T
zZ?&C$(yt?=*uWC|Xw|88i3uxirA2z??zy||TGi3Z*@-%}Nsj`=E9U0iwo6Y{GqY*Y
zSAKiX{n5fB`SR?&BCk)Z{G6*Sc>6{0@|?vJ9yALV=Vc$`Sn)+7a)XtD^ORK#Lbmfx
zcPv;du_(P(!%BrAc4ktE5%W9gNPYW_r#DDQf4$qWYqf8Qdc;-7_VwoLmSkH>DxK|S
zxWe!+asQ3uYu%y^J+n(iSYJGloWH+4%zW|gSko^-^D=q&mOB2fc9c<NJgH!}-Pq?*
zox_Y1&3-Qgv#jhAwl*KhHV!VU^OM$CrK(hYQpu~Q!#XVDosMH$d6;sr&*GkM-@{Mr
zVp{$DjIglS$BQmjW!$x;*FIP5`n#t(j45Nyxw?xVR>tt=RE2E#=3TvJ^6e`QWuN?B
z6sJYaRTKW46v=r<*0kE-u*K>5YdypD816dC{(f2YSJE&-D(cfLKKr?S+|07pGxH9I
z@*i$%*t#@e@lC6@)+cUkf4?>`pE;TTr@&eRA4v}nkJVLSi+>(jGPUZBRgc0em9!0W
z4_q(I-~8n3&6UyIt)bW3b_$;Swpn>@37_ZgPu-e=hu<%ko@<kr!*_Yz#>>afFY8nh
z(66~I<f3BQ5wi5kro7jeC0S0*nfA26W=`MnwO4|&Vtu|Tamn75{(B+2ggHR*uE(`a
zcD29HZr}g+uK54k^?!c<|FC`j-)EbTzn`}wOH6m`l&?j4x7D_W-Q=4&y~aYva_JI>
zm8G%aSA!z5b(3~Jc5w+|ZI(WMlY7lyt~ryPo=pxryx_j-{wVF-sK4*mit0`>m~^;#
zVy3lx^GgZ4bH~``1Y5~`N{mdp^qxP>{m=xpv``&yB?pJ7vz7v%<CeYK%fvk~<E!L;
zx0>F$8&+*|4QLJ8*d05?tMFEc&)>5;(<SCin={?{oaNG|1s2Pu@GTc!p0j{s<yy{_
zTc<^x-nRDkw!EjC@}6(ac^>-ewQp&tZb0I+BOBj&%-Q@ia?;EfH<<;eF7!#rU`qYG
z)i~8J?CJ8bh^PaG9lgKu5Afz1u9-IX?hLD4!qGevo=DG`<G8H*dCRZ!Q*PYSvdMkW
zy=>)+7g4!KPI2zvc74K`&lX2@7uMGwDE=(ZeE!I$3mqp8?KrwS@aea+Cl#V!>`3r;
z<5k-+S89>s700Aiy7!rEuk|q`W`t_ZQ`1_=ubOmek)2~&z2wA0Kb14S6tMQ1G{=3(
znC_6p?9pR;wU;e-MR%TVU>;9%X_($UtLy9y3x1hIF&k@X<pt&$eo_-u+ray#mOuNn
zi^qee{0E;Oi9SnvU;M>j_wR)>k3UH6V6m>ODx6>ROy=v}c0Qv>9`hTDGAz|XT^8OU
z?G5)8uI)aeJ~48J#qx+tzlE4od<0lMT_>kEyGl$mTc+H*g@0A4NWQXML(O9L0}j8X
zPiefF*Sc%-me!>rm-%)}e__wKw6Am9rt4jzR}b<pxbFW*pyB0#%5YY8O|}JpD;F$&
zd|Xd^;og}pd+uiD?)p~js{VfOx$wgL50c7vqC+2^&Q1DMHif-$R{G|J{tgBXhU}@l
zu8MMc_imnDDd_)t<?*^Yt*aT;52`YmR=$u(H{r`!`Z6nJ|M@NRqIT%+=y`3lsb|WC
zhtF=M%8BiHE1dT}ooT;Q?#$F!FZtxf?%5YlJh^D*kfV0*VNvjTr+E+O6dUY%7vz3)
zU-^Q>n2?p6tfdiq-nfZ>oL+j0b7uOhD~(asl8gPFT-_We@%iTWtY6bD7k2-kS5SA-
zjias${ioFayg5rR@X<q^b0)5vA`d_Jc28qo=sCHSrz^s6Zqo{jhmNjWZ>{7h7g(Aw
zSKaUGlqsvYlIA9zDBylv=xh5blX03=lcq`gy2I*aslTpVcwBbaJV~P?L!IYMcIvTG
zP2*$V8=i5WJ9&50)T!Y~&yMerId^>b<*J+8l5g^bcAZfAXSL~7-NXxAMr&Jmt5)8A
zdiM2uyWfxa>;Jz#U;lBne*O2Ouk-JnP`hcnXX%vFk=d!&#E+NQJh}XEMsV1nux#C<
zzl;h#iBzb$FZ@=OlC}O=632P(Ij-fWr|rz{n!GhcQr2DKec2Y4`Dt6s=lU}-dzm?V
zb84#Z_6sgb-Me+tJa=wC&6__?KhO1XVUja!Vo_Qd>Uj0_i7elCm-UNUUj2ODu$?`m
z>sqLbVs=#R<dbJ4GZ~GNPoADsY!<QXDtmkC!^bC=b#QI1UOp}8RNAJsxzXM?vu<W-
z-i}h@_4PE5_PT9n&$N-ZKKrp-(z2FGk{ds}Z?h<WHshUkvyt_^+n*IY7cd2u{oQoq
z(s|YU%*HADR}Nf#q`mCM<D7<dhu%~zmP?$s<C_!TgdH2D4xIh>|5a4Zv~AO(ERRqA
z{a?cE-Q82WIk|QeS{iDf(%hi3@Yjd+hcbTH7zk-P-Z)ujD965h#jd{#N)In|Si4$;
zRpivFHHWlz32=o@^f=pHV*NZ&Lqkw7;IHPYi(TCw-HWm{uBC+>;|*im^;Dzm*5k9Q
zogaKK-e)@NpIX}OW<Tl1Xokx-j<1TEo!@s+D@m~Gh*oH8=TWBZH}-r8kcde-p?z@O
z<6^_!{fR=`BE2}1Yv&6e$VhoUW%J#G%Wis~Fe_&7QAh~>aVG8_qa%Cqj0|4&ti{D<
z;qgfZKljd>VOw|p{b4)5_O4|I4sANoo63<W;C(4?^Qy<+);#sS5ws$nwQF{qMd0pL
zzkSo@^y<kyP<x*{TX>gQng5Pe3#(0T9+TL<M7U@fZ_{G;QZq~bFDH-fpS4c&E#v!#
z`IDzbUaz;hZ~1<wx?IGazTU>pS8U90HGQV7TEM{a%JXoK<B8>Q@o|BBqCei7x8kf4
zk5uP;lbWgP^qhO%nqD|Aq&4fK!dtfHja*w2AL+gp+LFw*hQln?;`jvr^leGpN}FSz
zT&p&a5<Hz9-<#QUHf85}+vDcJZ*N~#{P}5@P4>*-13NAiTC;su*)KQg>KBn)ed_I>
z&Iu{LS=XX=qi>dDlu5<s)mzd^`1-DgJ%5$K5^(%a*`JK%d?zRHzsM-u#kyEh?zqgP
z(%y9mJ2;DXe|KM+A*@!lXM2d_=KRI){h6Mw3wz-mq3og-`|Zb*_m10k%#{Ah^TeZE
z|JMig>RsQyoX8i@{_42k%^Wpzm6-eXpDIdXN@j&R^Z1<Kxi)9zlG9anxB8xct<fqE
zyCy3;JzaXzw%hNg3WaoPZ7>vF`+WKFcKv-nUcImXdQ-pt@6&wy`oC{yzul9xI$BhB
zYsT83Q-`X`W8&^SeVll5j_b>sNd4DkJJ;Nfx}LjwDf>iz4UN6IB2M1@JL2jeO-pc^
zzg_Q>AIq~FdzP)^nIFAlpZ3MBZS1DIrb#||<9X+g+!?X588dWUPCvVO^XNaD6%9e3
z(oF>m&Yawrmo)vEx6R>IPEk`PR@R6YZBoyi7}VM^Ns7~5!Q*wC_ml~VuVQa(vE9}Y
z5p+uOz@+Jqiw<>rT-kUoa#hEbO;dxiRxVw)b;kbfAuHQ{g%?XKu&86@(A1AyI!PjM
zML?Qwr_h9(Uff=soU<G{Cve;>l>Bx^U_aB=g^Z~cLefhY8b7dj!6DIM)!gzsOFeC7
zzDerIH*3Bw(9<{=&9roV%tzyA?Rnt~ToydZf3U9iYF*Yl^&4-6yVZJLMfdo;k+{Xb
za!u{qSGSU*m9(~W%!*c>b|P=n_CpgISB1L#ZoZN(oqR+<vSZ`k9|uo8<P^84I;U1I
zm}mDQpK0Q{$uDNN&Hp0V(aN{MM!bhF^~#z>S0}z*zwrh4K3>7XrcCxjH`<rz<eipX
zZoTX9f^8e+nH{#-DkfFT-(|smvZYBZ`QS%+x9YV2_jRx8NjjWv-<%Q<xBg9Rfarfq
zo06oogHIys#XcsUmK6QOBhA^UmpNA~b2C%i@q41S`_gmV*Kd0w(q<&tmbO-AzIP4t
z**C``7J9ymUzeV9_L<mQlgiz%LsfRA<*vK`?P}@%qv7F;EhgRB@ZYG;`SFjxV!hd`
zcJidJ{&(T^<A(trKJ#;U&c4{Ok73F?{mS>Y9uMAI{QAy+i{I+YtoW71g^&GGWsP3>
z9(*UHe{{pi4?=r5<tj_&mE<?OnZ%zt;W<lF+2!qo8*6TTRlGM*?d#jge7im<O6~Oj
zBJgl-;O^}+SKbnuEcH^$?f34S!mAsLXT&Xdmm${~ao6nGnG0?ECeDkznEE5v_oNex
zeD=?)rnzP6;%S$69}fvi|6eJ5T>Gxu_Le<!A9|MLtx-zrxtuE-Df0XAH?@6AY}c$l
zJ$AVDIeV%{Z@a0TuWPnwQD>_t-(epWnL5FitB0jhUx{V1Xz*+N%IlRiz4LkA-o>l8
z+;3L?<FuDk`qgJew)`E2cNmvVR+|6pJM;G9kN3Qu?AZECW}?E|YR3nXUU#qUio5pt
z@vo9ScVzjOKQ6qRvO37*o1)2rEw|k!D*A|Sn)dbO?)CeBo?T!6zdQcVKlA;+pS^y+
z?fB+(tgp9C4RX<5op$WyjQ8*P%8yTJSR11^+s0<?^V3f^T?<-yMT+-+FVpL`GX(;<
zuj`IpjN_@5Z@c=SM!L<NFMHGS8U`kj-Rq5F8qem&M7d9S6qX)l?;LaO(3}b4=gu@3
zS(vw|1)tN@zgh0vcFXhTw1AwaVP5~f@Px0M`p~Iuag$KzjM7OVtF><Cd3R2YU@5W`
z_ZRnk_L%$d+(g&udvr8)6J0z`7g@;0oBV1s{T{j`Y-Lc_PVG}+r^8;UZSA-c)a`LK
zXo}YCzjtd5rbu*}ZlB}lQhR<%hf!jpPN(Fnhx?W=zFRC}%DVaa)dKFlE_=eg9;>7V
z6};e`b6TJ}QK;!a@>anXL9dXshmUzH&hhLy?rX`h?ue~nzRW|mU8`$XT2Ft@$Wnge
zP)GcHRkJC<S%-zYFBWW$SsnR?zr!Qb@MntE6t0lNd0ij>83%6kNr}!o-mbmoUhcsu
z5>vR&9ew}mcVJWV+^0MBO*Y<c>Um%<+iF{U^tsfp+Vri@*YXQ4-`jos+?|!n3Isy8
z$(>mDN9$}}Zhz29#@2wKr;1)%w#Qjrn0Y2NxzFJGzsTx*wOcDb)k@9?_Pe>pK5^Yv
z)x<P|<!3xBpSGM?wnI_>=~)Ngqtg?#o+PrZt!gX!{O$PLwN`IU+g_z|9KGn~(dSZ7
z=&pL!;V7f>y6wvXIyPvBwTkZ4TN>cp(l=YSBdK4_(Rr2M`*nVe^RL|K+;)p8VNJiH
zz`FXH&8hB^>KV81ot)iqfT{0{`qMylNArp1CJD|NGtNmrcVC`zlX0Q&b=6s$lX>)d
z52?64IoyBs%z_=7J{R-~a`c<;1#M?d+UJ_Xs1WkL)i*)S>x@b$GxJs3#spc-6<Qqk
zH~3$D<G$W3B1IrDNw{p6c+Be4t8V79ta8zG(p?$L`Qpj^3)hbS7Fssl;F(p|MbRa@
zKd<O~|649SFEUzBQhVmZjE)no9tm$UAMvwyzVB*x?Cz9QHeRON`po3RIh{}2-Tyq@
zx`27R?_$w9lSKdLmYe&MbXqdEr?>Rz-F`QJ-z=w#63P{&S1zrnJmcFZwvSEz@b22c
zkX0c;*HWs|zImUSbzs?<kNu?zmpNjO*~wnFHMV|#uKLg8rdJbhEt~7IxA@)*omG-b
z8%?fF314cNH@EG4%NL%dEpfq(0(-gzRUYN5cW3C9TCab8{J6RIDQ~~ilcs-|QRK8r
z=F;S)vc(Hdg$U&aT|eI5e%|i)Vg0&~KlSUrK249ev$J_vbJ6?ew%pZQ4YIxdJle8n
zPqbhAkyX=6t1GPLU;kBdQ}4B!Q|QIHr>=kAUhZ>$es*cz$+a(K=d+c()=o}Oy`>yb
zo#tVY@h0bHiIVv**5vPrXO<^3KPcOo(q9%n`Jn1{&96PtewO;3!pfU&&*92i-d(?0
z=!tt!_&x238KRo1B{_>!j(EvvONJ%t#vJ@QDe2kix$7sc{q@~r<D5^5&!_OsiR05Q
z7u|Gd|Fl~<x7Yoi80D?IT10nZllxSKGq3E}1DNzRbx%H<e(KGS19K)UIFt42f6hxW
z7rU1wT6*c(%o=-I4twx1K0a2U!W)n?*O{;MzM{EL>GmZFCcgJ}wXNfy+SzNC(_0c|
z_U@xZxRz<xPwxnpJy#@8&C_^ixzx~M&DsYSAFuD|DDs+WqS<-njHJ&Lod(C&DO-L$
z70I&?W#m7QUpRTI5!>&3Yi8shRnF7kJASmC@3qqwf%V!U2l>1&JlSD&lq1%1%U0cv
zXqR_qCB=J}spm3Vx?X<V_Pp!%Utiu7iA8pA1)R2MUH&-bUqHCuzV}`^m9lw^N%Pm7
z|1a@sIx|;-NrKD&eE+O(Sra|Bayc%Mjg)Fywe2e3Ej8QC0ewjsRa32Y9%5r(@}Xo_
z=7&Y$|02!59W!w~F1a%Fa^1|cr&3!K()T=da9)3Pf9}QAvyXOp@a=s%^;WUa&-2%!
zwU_N`;m}>eBljrFuH=34me>OozkOCt@0mNZa<b|Y<xlkz!cALVuBiERW9s1@sy@3F
zogK3Gmo;u`HuZ2{DKyLD-tw<z)e~MUwo$n|`<|N2haEfq-DJ5HBEI)_=lhLXf={2A
zEj^O3dZDN9A7O8c_J7CQ4;j5*_2z4&=9~R87e$_RC-gp53!M8Tx!85lW%*-Q99KJc
zFlNkMV4`Pt*kP^v>-{IL)nxsg`E3QCOway#GhAA3@m0;O-8^ma-Du-u4cA_Nf3U(z
zdV8dZN49fK@#=*3z)<UJ>*jlzx1D~`SCw^!Nui&EZR3tW?!ObYrfO|gS>?p3HOZ$&
z#D1!($j!+b9jO!DTMTFI_Eoy25;T9M?ES6n)6d=hFuU9Ly#Hh^x5cYw=WD&~c5(<=
zx942=$+_k)XKtQesq=B;^w%#qj3hQ5-J18htj<Pyy6V$8)6Q5H`=u0ReQ!1pPBnI6
zlD^Cqd;Rlc`F(#5udn<1JO1Ba@%_L5oHbv6I{0Mz<=a=7Ce68gx3IFZ-Xb+jv;X<&
z$Cp7_&Gov{(i0rMmFi4?qEvqWtDirwe`Yn?7Nef{dkZJeihGmj%CSq(O^B~!;r-@-
zCn2}Qwr*qQaM5I3Fvs%ug!7kn7k#s})wPtJHpgh1>hmodPFGIe{bQ4T-fD|O`|rv<
znru7cywd@*$Nd#D9gKO}!LBzeg83%P8LKyOHWzzY>VG@hYcq@g^u)`1LzT}x7MuQM
zN^Z37{G8jTw&kVjZn_q9ElT(53>OC>i~3V@7Ja@rM?*E`nW|#p){Fb5e(jtYDfaBl
zi3KdDyo*kBL{=~U(xG(Se8JjR2W`H+j*6PQY?qh$;jJHXF4xK)+#US8)itdp!Dmy%
zu8nDLbvWl{u++6SKK4Ak@pRVW-V5$#Ta%b`97H_kcyP?=^o%Sn`ePMnx}a-S*ufj_
zv!YLZd+D*zr&H$8yjjgV!;YuS$W^)Dwt_E>^Vjc%)&?xc9(EkpC_SxnWNxL+!3Zf8
z<3A_XI#rxL!r=IOqh`1S$L+wrFx&FiEK0ktl<J)=IJt2D_wA~NG7Bds@f=NdSSC1s
z=l`r{)1IhJKeF%jBZ<2$x@VY|9I?JP!}f==LR;&Or^%PDosHyGdnli`e(J9S6AvC)
zb?8h~?Rn8F6Kgj=P;zhLetaeMM26z_Z(qXqF5K<nKTCeXdc(4O!OR!8C)rIj-&k~)
z_1rmQmA!%g`0Smkf7kqEUh#BpD_@@Zyu%fhuQo^&m9H_my085LuXXmFniD)6f0ljt
z;G@yI#;~N&xJ2o9&(*K*9_;e_$=Nyg+h2vUUq>D9?btlYnrk|9(eumfy}sGjyDqG9
zTez%Qrg3G`XZ;{%%bnTgnu3{LH@<%h2)(t+hmGt04G(@DjxN4vA@f@m+i$NZEn<HC
zB6*&gC`Z17$JSdGGLIRguRL71!%5dw(tzusw2jrB=u<nc$y>Fp|E!nnWVGMN>icRh
z<0`RfcCR%|aw94hWKZ5D_hzQiTI*0Lk?7f~yIvRDs9fJ1s=HxJkZ;<>C0;2XA1Tf<
z-qP1~C7|@B-yBYH=fj?bJnt0FrXN0THEo~!?Kf?Y9h;VkEi?G={=U+}@41WjrEd6h
z$N1UKXPgz!gnN?$axZuDtr1weHShUpVP?&T8=gE?)MtOPW=@B3qbR4?+$C~q8z<$J
z*8X|-I=}AoZ2h`_r_a~_{hhzBuC{K^h09^*U%O_tW_elY{`~iDg3z_x+j_4rKmPeL
zNYB;l>ZJzx`^A-;<~RFHY~FR*Xyw0&a&6n!FwOg0SJ7uz(LE!ptFWoVnK5hX#?p`J
z2h)n5aRoW=Gtw|rFEBgN*1OJ_@AM`e%bAk;k2gFyyy?l2ZHKmtnOnDH?{T$hjyd^w
zRaV0K6R}}F9$q?!oCLnsPmNH$%5C;>c|$~;e=*B5)unrMjz2B1km0M0^gFG1fA5#6
z6U2*yUhm4CZCQQ2bnl$=(?9RLmbGbqkauMFm83njX}<4fsHa60y{;@dskyO9#klCv
zoUEisBfgtuOlKCbT=vS$S`hznLPG98f8)0%Usb=U%-J3r^7q5B9s6D$Z@PEls_DKP
zem5=dU15Hb{ORSRRYr#o{Cr{{u-qg>Xo=UM6M6ngdCdH5CI{zNp3zj74O^vj=vZRN
z%Fx*LLaHh`7w#^(B2!#avarj%J#hL91E26SE0*ljK4S1ke!EKC(T;<=JlD(HuRFqE
zn9|_8rC+VJYLh6_w?F3&JM4~Hq&ca)(|xiP#}T_#&%U+a`PDn)zIn}F-^aW57F<fQ
zt~|b~`tr-&E$7t!*{(fu&pR|!MY}U*1=AD0wOv{*&kDmm>|H!vRtI$36ir=e^EmsH
zZ$t0RS)m1HlAj-Bb9?Ht{gTZJ&HmheXLZ?1|H~J`9^F2@a?AYv{oO{AH)3>`Y}Ph@
z&RQL<bMnT<ot1OfHMhI@|NHtmQ~mLA2HyAkKC9iTkLGjB`FJs1(9cwI-omP>%<G&_
z?rQ$F>fF%>>wdjIe6V1W#<Rladm<SoiW|%vWsfzAm&{RZulI=#ywc^`I3a3_So5Sw
zon@;unzKUXZNA;U-yJK-{<c?*t9xmfl2_;3zrB&``;=CfYt7emuw9}XQ|d4=`1om-
z)yY?iR=bujoa<C(*;QgK9Ju=0727)H)|XW?Uy3^ir}f4?jQ;ieriPA+!s)F%2X}YJ
z-Zt~uE%@hVd%U)qj-hPxSq1*1wbFOOAFsbMCuGI7SstuWCq)m}or{Xv#;d_Q@n(75
zmfPy@EGC7UUx|MHZP)JP=mK}`chP-O)${XfxpMEmFFF1|l8@!Xv`2C6O=9^UCow(D
zlZ&z~PP`t~x$X3?FF%ePSC!@4qBCjovL%{=3pAIn>O3mGdR6wy=gZ&k`+GOv{{Qaz
z|KCmD|LfK1{B!-{mg2VSmtN@&5qq}#aZ$b1vLml|t@EG$`DfA2DQho<-PQ|y8LL?E
zKKj-B<N3eb>t<zFRLw}t3oAVkDtB*pllS^B_O>T_lg{prUVZbxRkiou3iLxAxA8AI
zQns<vbfe|cBMjMId!Eb@=JSr4ruux&%%VqU<_L+!f7mTDL)iC?k@QLzY5xQT?Tr%W
z8zUxiM0RkT{1*^9)rTu`)vBE5xkp$1Ic7TX%t_9BXZGlL$MIEeeBT{>YWd;2cR#W+
zYN`sjxK3SiZS9)d+wyLw#$G@Dbkn-&xwlVk%PkF^xP0p>jTHu)m0g{lO%a;bG9@E%
z+x)7N5t~*^8Wkt0%-DY6>P()du(hIEYZm7ue+hYc=4{xR-CI7MQkYfoCtUge%Q~5@
z0*`07H#=`H7PH%4o^xgshu@=4C5b1W1n(?U%S@eMq}ui@XjSEr+?gqVN=<i8H_~2J
zGI@T#iEYg+j#Z(tb5Fgvci@M<tNGuT7Ur{GRlkys>M{3Hm^QoDhwbR3TJL$)r?Qlt
z9b{!Ly>ahtv7Ny0H1GGFU8y0HZgfgjO?kQFA;*H=1B}TFway-o&Hl5ip>a2(WA%aD
z?ya1!{J7^nHBsB^E_!j<WW6U_4^&*glzoH4Bqt)^0M}fFr=AwKu6XOSKV0o$ai?ZV
z+rb;h*4QcWx}I(jzoNk1wIwV(^np{<>5c0(wq9Fwb<G{4v*!-_7K&w8zT=&w?@%hY
zwPe%N2m6^bRXFa;9O`(pzLBx-dHmgA>-9<BqS|`{B`rEXx*6nLXR(cTFFS7VR`B=Y
z*V8V}-n_SYY3afRvyV(Dx79QBV{!Mmn4-;d`b9$M=iFJly%;UF7KgC?>`5r%FE@;S
zafUN#!L@)RUquQ}T#eWjCc8NFh+OKO?E7_5Ndd;^>URj|pIw*IQ-9ekMc{sRu>6zX
z7iwQ<mB-FuQptJ8x+QJF)sqKrWm(Tp?^gYNxhs*CanEAcnerNER4zHc=-6G5-6OC!
ztnXEyO;4G;U~zZLl^WF_m!nosiSk}M)oUrsDi5DWoSnK|`Y{4>21}A$_AB1Z(Rm^L
zbBk+Yf6)H#Ggg@2n|Hm;?mdIs9`U>l{m-XX%L{+#`@Nu~x#wq1;r+70^W6czl$Z1#
zn{r^9!(yeW9@~58o!gRPC6f_pl77-Nh)Mm)tl(6K%|Z&Rrhk3;u=~8-|EK!(zdy&<
zeB6Ehe$97<>AhYPgSxYP*Bn1wR9|kRvvOL|zBT^OPe0w0>$<wE?%!t7Z{O$dK0mp2
z;mXSYt7U#ye0siA{PXo2>x1{F@BUQxGOM6Y(bc|l8vmo86O&HnpIRz4O@QI$DTBFF
z1+8QE<gvcn^dYgZv)(JGm(f@?%>1Wn^Bd`~=FS`M)Sj{UiTu}6O}@M9%JD6>>;69s
z+8T95wbN^Au5Q!UmG@^UFr`aqrdZ1G9Y6eVLh!lDla^0QEM)!8G(2StU%55!cGM}?
zC2MYPTQ|LQuf)yGS2lIVtXdJW;8t_L?6uA*6NHPB!i7}N`FKB5Rdg1f&>>{eY^+i2
zV;FK#$yDRg+^a#m{wz5(fmvv&ME3Q03vZ_vyj^v1d&W2A;>>p5o3B0F-ma*6nie^A
zkNO!tnN`yyy9LfMtjbyA-Mz81C4}t?Q|Lr3P7#3(ZVA2DUwh>ouqe8pkuYJ_ZI!y`
zy59O<l5MQ6et)qmS@w(jtK(};_qr)HOMh{TQRx1-ZVIdJrR|{!hL*E8?fSkg^MbFc
z)!lOn8Rzo0B-#XPd+*k%j<@G~8Y{BauBCioVbAKyEnA}mt}e-c^Kp;WnRPiQByXwa
z^2_ts1^xfa5Vv&2qvk+8$16gCSEbyx%WYp`K6}x(Z7vQ4=JCgG#I?Ar5L+4Aef9g%
zW0EEzPTx*k?|7>8h;8MP4M+L^Z5Nzt(0b{}t)z-GN!#V+51e+czACTon|zny)aHd3
zzFV7oU{KFqDZaKWD4;vQsr%ln&BYl?G5z~CyzWZnVC;T!I<RQTF8&a?$DcE_iVcfe
zoqzWpW4KeBDf{3<@UD_QNsCPOznaV)G&?!2{IE~`)(Fp?t)7zCFJ%@z{^e^eXc)%-
z;GV$|t0lTzyR7rFcIRJ<y*{V^<J#^2Mb!7V1UOv?{bZZD=Ap~UmnlkOcW$#PtlKSg
zJ6xbyr>N5B=cT`oJd$78?|c7i&63627X*Y#zu&M}%lE74QH@g>8|MmX@A{+t#Enas
z^VrPE(hq}^WpfzkY`qe+Mdj=TuS1p-yC(nW@y&SWICWL1?{@obC#{xB`*ry2SBZ_h
zkiYD^;`)H_Yl(B7zF*gT(dGZOV~ou<argJDu=EyWoSC2=lWxA~bkr|rhtSaNt8P5`
z=*iD6)3-$@Ld?(mQOgwFzRYB{;Je$t&bF`l{5rn&-{<(6f2;4;egFFUZA=75OV+Pl
zk2me9DYTK>bfwBJUQYk{=~V4duOA`{@-w6E#fHY;E|8x3<@c*B1FqUO|HV~bS?5dE
z^e9|?zq_kq{eF3K!|j(Z@K5HmmdJUmwNLuFz%1=h@w;1eirSc-C6=by8y7t~GG{Nx
zu}POVSsXW3e;h4!d~)!lIhw^@c6`NQi<Y*zdo7a;TFuLRG<~jFS!&?gjU~o6`~;^i
zdahpeX}OBFYniK;<@2WnF`p7GW!8#E@|9aI=jW03TeWKGl%Q8(+s+F9TYdHKyVrNS
zV!DF1HlO*sqqF#t>gP*!^DWeM&UpLXe9?UDo8gTPtu2$dCI=om`snfB)~kLc7weqO
z)Bo2=<-FHDU-?Xz)mp^DW_9yc<2&kGxuV2;#cPu4)mQd$KFO0bt>CvXoTb3hR_4XC
z@zuUXxqo^XQ<W2sxIWG9xpnG-249NGhA(Y9md?`t6}9m6>c+$_`Tn$b3j=dE<d{ud
z_V~b$GZPIY`TPzPny{;#XL&WbR<^@@$0Ei4MV}fUUKJ6TR`yWPO!vp$-kHaJ`|kzV
zR2P^==|tvluxZ-6_I8`7uZ_v=&U=>`*Tha@V{9w*ZpgcN#dd~cvs`NUwld)*M}I__
z=C$th{@ncI$_?I+3vyR3;$8CIv*<(iVYeU4&IJDVzv>lqa$iqZnPQQ5n9)vSVPo$<
zp5Y6Bg=g$a%iW$JzvNWkj19%Ne9UGPsy|=Fc>afT{++$b*@t(}ILOoJ?$zwVr>uQ`
z@_wi1JLd1ZoEy{fF2(79O4(7-=wQ!6Q5m!It4ikgUeVt2F_`!J#oh+bTdb|Vj@MTG
zKDMJXuv+A!h8Ta&<VAwpN*I_EepuAH%_w|oGB0oE^R|`w-tKqwe=bfkuX@~~x=i5t
z%dhRtQ*J)A6l(f#A@auLSu%+yKR)xh`lojv^Wjwi-+%46C~5X)&Mbirk9L!diC+vV
z9vlubO^y4q@uY&f^t$b9cq64IS6qIO$9ntN%%u(yk}C7cm(H4Uc-g9XMXFc*WeVq>
zPx0ta*W0WfvSF!j35(q~w;S67!;j55o!In#BcH??n_caGw)wY@@XkB;`E%MW-)CQL
zG8(KC>+A@-bXciGyD0MON6zEI)0pM^&di*tsp_l4>B_0~_0O}9-RJlHczVD7+v)iK
z4?nNp-ad6|Klf^lt3d~T{wuMP<zMo(D9(3UuuJym$FJp$pY5}|cQ3W{>idgkF`4R(
zl@Gm5+_Uz%u37*7>Y@u@9e=G8XT7|yLh60V)?jVlTZ^S*+g7|~+2njZcg>ZLbHCjQ
zIvl2V^K|E#&dH)jPH<{|d{|&1`<|~|RsG!02~4jNKgQa9Nqsl}`u@*LxB}1fhHcQ|
z*7Xud6<oDwsfXw)uccX2gF;WY>iNIjlcKa?aqF}>#pfy`i%$LAAX9u!-_N~n=H%8i
z)`rtt{>J@SpKZc$JL>h$5G(CK#+ys-*L_GyYf~-sd%5Goq>U<m0cwdEO==QHy&YzC
zEVvZla&{i8EPKV<O4CZSFV{ksG@sgOX+C#)u5;YoyN?eyiChl9b5byK-m|ZME>|D=
z@iZ)T%3{5#)^56SRnclwMz-tItbF7;lU}$_s@rH}e!;6;I6L}plgcEU#ccd`wdcLI
zuM^-+eAH&sT6y|}dHBBLx$Zqx8zP)LPw4%RIC{nUdPfz{vRen69M<mJ{$F6$%SUGv
zyc)~Gdg6}1YrN;bZ<eF@VXhUG1-JK#u-2u!lpK35x=3MPF>6T8dUK71r5pGc<<D(i
zr9Sb%Ywv=WpPu<`xomd1U#otJ;WuqrMuBCF<_|wcue13w#dLW=dy=!uDlgfeeb>HK
zBr)eK;dFA!nephZu<-4`@~OouvZ7uD>9C~T<u*vW{=#8zZ{RFd8<s<<8jsUlWZf>G
z{TjsaIH5Y?t!9<q#j6+BZ(de@;M?VOJC<Aitvv3uFnXfUjNInr?8k?H?-Mz6LI2AR
zOR1ScyPagu+pgT5ULjz*`rQ2~_wyI2xftEQ5O7($;OzQ*x7OK*Z(ihT->Z^zNcxVG
z%8NC|45o!P=D~gExk}EYAJ}ul!Dso!w^353y_vnEzbw#?Dqz;W<IJ4a`(40j;rZ*<
zZ@Ukb>zotsGBKQG$;Z5Ar9Vf>wAs9tJ+;nre{VXx{QK4)_IpIDH))B5t(|2tbK8+r
z?{|SKqpTG!Uv0W_PW{Nn%&mL--|k$#W#y?ErC+yY{?2{XSN0<6gyco*n7Kzk9De`i
z+Qsr~>(3vMDUCR7x4)pHoB6QvlDj`|EoqOwIZw~RW?8CdyK`5B^Qkr5To;49e}0>t
zU;lZwe*Mqc`E|dK{(SAORTAT;Z>;^pG1d1#_wG3hU+;>&RvMeVvQ00oD%#+~_ws%F
z7FRv{`+CW*eSZ%%Sl#`4;_>OPXP#fyyz(@+)y{va?Y}SKk8efW&Gjr>xHWS6kx6ru
zj$Scg{E&1i?BUj#VGlHt41&+SET|}s5c7M(G=B=8x7o%2lRj_K@%FOZoc!gc*~>?N
zyxwaEpV>ZR#>p49Q(6LATUM;P=Cw-T;i_YoCIu~>5~}@b+3Zz64jj3m#5w0v(TU7R
zzSBh(GJVHWBd4YAw7&cEyJJAm_V;3|e_dbO`TKh5-D}H>Lv<NcpUtU=3_AC6!sW!1
z%P$vM>3KUbO)@;)>CxgT)Zsa!Wl7Wv#=}nDEM}`s7`m62I;ekPY+C+q+Hn_V^K<-B
ziAr7<D$d;%;M+F&cV+ZNi7$p}yLU!SnCa4TL{v?vGIzm|6%I)u-EzSXE84bP%{{&L
z?Uqeyeec$}ZJBrI(Mx3~_QM+`IE&~0wB3=rhrOxG$%jdLo$b<%hf22AN_?~x)Lpdu
z){=i>(>6SOJWH!x>a71R(`A|po|C6)B*khRTQbG$q|5tj>+U2v+s?0ves*L|OUi%m
zM{EU#SLWQ*{G*)|lfc1ye|bYktdRPxr9Nlh`OK4Azph<e)x!1u->nnma+dAW{I2=$
z%dYJ*1!~@brFN%!!ne5Huv=d;qxJ05S`ih6jXC_@idWMzH+<zbWIQ4i)_&!xdGO;D
zU0?3F4g1&R&VHXW<JjZFezk&K_0PVAX<DT`nbmcBTB-AyCwak-`=?!y)Lx|a<y&cb
z%U%1}V~2Rxp5cf)deu3ol%?e2oWC2kxA?OBf4xSTXW<&V;A`7=i-~4L@d~_ryWjFi
z(>D$ET=$bx*jA|s6`V{zrTXRFo3)QGMMp7oRTX}E(7SkIbn?uXJ8#!al$m|3&G3og
z<9-*XdAx_yzYDT=xOgm_R({{$h4rK6>a?<LQJ1!R_;1J#6VaV;W54r8zvS%&CYL|I
zczO4)+w&>A{#89$*1K_S4DZ6X+k&1=St!jiC4@26_sprr{7v%iElU*3la^+19=<Wx
zddA!z9G%xUT)$9h{Po={uFt*i_NGlXD^tFkw}k&q;=$`*ZvFkcIlU(HvGbm{PICP{
zwsDuIRz<mP%dNJW&)>dBW)rWI@rwghkD^}gu#MaI>(lJ}wcpO>+t>g3wEI(p-|^kk
zzpuCR3GoQi{kh{og~!*V(%_q2jplFmPO=X#w4dmlHzU4KJBjywO#6$qhtns8Z~2|R
zEirbc-?!cO%1cBm?$7^KtM+HRf3)-=p64djJ$r>1R~qj5{bp_Bw^`Ho+L~?@o1<Oe
z*~vJgXntprA5YS=*5D)^Z=;^`iJtAM&sF7%;*6vpEo1#YX?0)km3fjUJN~iRvKa;n
zc_}SgvPi&jrIO25FH!R+Q?-;*Z@+rFrr${@#=Rl<T&1Pn>4RGT4u7n$nb&vx@!h+p
z!{t-D>w~UpZJoAh-RjCp{psC5tG4G>TjZ~Q?fUg3+aJs2hZX0)nS68kIZI<94i!yq
z4-c)()E|59*QuUn+sbwR2;asxJr^D3*Gb<`4cNUY(e*%a-8q8^`I}EkmwWYp*rRDS
zd()y=wnZLqb~3woE?ssc=w{v4nR^em-&m--*K4U`)f3H>-k)5rR`@loTCmgW_ax?3
zwO3aga~zz!X4S0ohhLxXtLbADF8A8?Tw`Y3iFGeJB4ussFE6%|N}C;Jp8G39S8e*)
zZ6Q*req2jST$A3I-p||C#lif^ZB_rP^f%HnQ<pE|x~gR=R5#UvA*^xT#e#N$M?c>i
z)bZN%m1}Q1av{#*W=lxMg(oaB!Bv-BKj^HKmfy9A=R%6_+S_lU_P#Lv`@?Qq)`6qE
zvc1ZN&%Uodpdp_m?I?Z8xF-0q;^U7Nn>U|(o&Uw5t>RAE4YsJLW$)7GB#LJqEokty
zzMN?{Yx!x76`FgrPn7@O!geL^>4h05i&tB)`dZY;p1$!S=!@>6?7eafyr1(!AH8d=
z`*u;}Jj>nQ*ZDQO*JfPz+<9R^*s8z%e|xs9e9m%2_LJ(Pr(NHeOkOk1l8b6@Y}=<_
z>b%>3DgTD8v-R@|gO6$SujE;A>PF`ZHNGmF2FJtQ2Jdto<aCYNJWV;<&YUo)oZsZ$
zzG6+7uB+Gf$^}1n^*Hn=FP<o9eD-UpgWd74FptpBE4ECTaX(-BnonnNlgg<8#l%x{
zST#3>%n-Qm?%|Sme67;AE9(t&1b-I_c`z0WHE<}cu~pB%!gu#y^Y)OCx14KxbL~SO
zUCv0jlFh$it!7Yu-5s;WTWhmJ?z<nq+du!|#>0nWR{H98o!BL+|1mf&D606DSWcw=
zR=b+tKdy$axBvg?<?8oO&hGNqXTSH)ho`5f@3Ngze974^Wy6WfyXNZOj#{$gfarOz
z8~=9{Zq@o5dBD0l=A2v_zeN15-DYblm!#ZpyP{?OsH4pJN_6R-u1DAQy`9-4Yuzf!
z_VRXbe?XO<Y0-_z?@K&?Gi#;I&wYFC(S}gbkj|M$=gkl0WGYRqpHgx{^QM)8YWHg|
z&J&%}b3*3td9vrp6yxV-*fJxh&G{_kRsT(YVZyUFE4My4QnkS-X=8h{0B7LZEmO5Z
zyI+KD<;V@~4ia6vD{uPgpE-j28En{0^g$gki(B@fqobyU>&Msrc-U_1xz}suQi*-V
zdt<Jh-nMRO&VAdwPtS7B1qXBMTF&h|op{o6`I(zd9*&2!d=5nkdornpsd0bXR3EF#
zINQV9b+%x4(A2zB^4G2xE;_ZxLw=+7?yGw|+be5?wnes`V~kE%GH14)Yp3g~{^`Q%
zjS>lbStoW%yjoE#*SW^IBaiX2u<br|Mc)rXN-Fbo+XN*h?@|B9z$v(Dkx)wlvw>uQ
zu5WPg;~wr#zcZyF%m<9_i5Jy!TNo}2?Co%q*PpXJX%=s`MrcRS1jW*0{Cz)NC0grG
zc`RG!&$!pbd(YR%-0lThixNJIN$_88)m){bux)mn%Wk#vK65T*iG{8#tlT+q;p(;r
zwadRV#MWJ2`t4trOI}=l<*i5YbN|Q-JDSV>I(<z=GecS^S*y2p!d%-oKi%%;KU_IA
zDYz{~e}|UvjLuGrDVBGuSIo0-QEXf|bK+eayY>{*Z>`?~bjzxbUYNs_d@OAG&Pv7G
zH_V!UZQuQER=NM~Gj8sx49E6YI3HRrC~%wYN{>c;DO>x68~k@-(zegP`*!NtV235=
z-*%}do>k$wZToRc^RM3lcYWqe_IdIu{jKIIzGH%E8LEC8p6#8LQBX7Wi`Lv4dBb0P
zf(LH2e{0aZbK(7>)F&_Rbu5_t^-A?w&&s=OCz_4Dm9<<PdorySS9=@ro_)~!z@=8L
z=hd{PY%NF49kO?fy4vNB7alomYMPcNIlWsdK5AAuyQNdaE+_XT)0#FdRJk={(*0DQ
zQ$Z7jW@!FAeAr)O_TA}POQU+0^?r8O+Q<CC(jh3RxG7^o()|{>%0FeH=51xWzPkN-
zw`<ew{8^tpGI`?a!*-d)9{Xi?`2NJdHc5}#CW+N1E!;J=D)RK@O4}!&FHiFKz9**V
zDWCoG7k~b~AFn>1&cFVCUd7cp@8j<7uc@@qd#%P<b*9tvRFvNI+`Cy*8?9=dHvTmJ
z*Zf`Jc>KxF?m=Je#Mb?-^y*GM-j;n~OY&9kl8t(SWzK7EFRHl{^pMA&Y0v(|g!<+2
zv2*wF-hKV$_g<SUwij!&S<W|i&Aun?ynprTl6^N=lp80#3Olkfo+rWiM38WB(y6f1
zDrUa*N#|zrTgvl^O`ke#j*|DyO;*9Do~`(rYI*o^<k>GXh5K$kwP)a7*b#Oy=~Ugt
zpd}nvy_ROJ&0e|H?xohR3{lV1?7rR^Jw8v1Ep(0_F50tu_x6u-S6q8^rf&9+{M%vM
zzP=2SJHL9>;uPPMiNU94@_SpQoYPEo)A*se(P&Af+>FkJKL6G;*Bnv*tK}zsQSrq4
zRgNKFcKc?!zhmk5UB0<{#@51pv$x&8c-TX;Q{zOd^0IS}(h`G&0-nfi(%RX>a%IV*
zzvAlazupvx=Df4e<l`0j!}+D}!v8q?EPvHI_tb<_61%JIV^vptsx?ozXL$8ho$uTQ
zEO+@<*?LEmUI^M=lh`Xixk|zGR>ZE;1#L@Se>;)Ul>2-6((t8Ma`==b7Rwr#?M_!r
zOxx@(?08iuYwOBF)0z{1Cw%<9BkOkgkN6ixQA?+VXfN8@_+f7J#pZQMEABTfz2)KD
z_SL&#iBeWbb*I1H&Cbw62@gKMUvM!+rqW$RuQo`1?X??|ji*d)`XriSYZSe0k?ZeW
zvL`Z^=%()Y9nkh9a;oBr19tOwrfq#+9DOIFdXC?p<1=>{K6CB8x~_~V@pJj=7t!-#
zeHYoUWMWg@!{^kpZ@v8G-1X*6Qae0V`)>1|7d!GoHRylh%KV0Qp2X_kGv`R&zkVY3
zl_cMs*dy;wtlY9cBQr^BiO)q@hqQ(lq8I;1buEciFO0pO=p?)Kz5EWXcj@;JZ7%dW
zThN?&M#`G;PKeMM<%8R1%})4RbvXR1?eclIg0C()8k})C+wX~~?4zi5Gtq7DKhG_Z
zsd`kRYtK_$5><D%!%K5(n%>g9*=iX^FPCiMQg;(G@CXvWedA{E>MKFsYp?#<7V3Je
zdFkt7n*;t=<-*Pd-I$!WPUl(m4bw8i^qkLnc_tlVUCT^f&tzNPlfB{AY~@uK(oJvj
zDM>6YxM#$CuR7j9M)@=ABO!}bq1)EZ(SNP@-1(gTv@L0KR-U^2boKgsH6O1YzJC8|
zyJ@!7+_<_QkN(+R+&RtoYH#R*U2)rT*DjrKWnu1LyX$v%Uwu*i?8n((Kjt>gDUV99
zT&l{N<hHjyhjrc4Y?X%2RTA<q`rjsg_`8@PZ?Z6Z@AZl&Hrh2c<@Q!JoHOk`h2NPn
zFl}FcRE8naU2%4@y~H7%K-OP=_J*=WNf8<1&!+@En{!$5xvIYRxz5_Z)0(s&eVl)C
z;k+}JhWeJpk5rcm8>fB`ytQu&m)F&(K$fMe1Z`Ho2<u*1yRGT8rH?X0X41K{VFe#F
z|8J6+cKM;+{)&?)<(6jcSbz5Kp4YpgGQy{RUbaiCGqL;3w%r!1Iwt*n{V}@kT53R2
ziW}RfCYB^8Hb>X&(3K+lFRt4r5X5<A&s2p{n>|TqE*4(vJ+$ca@5>=pd8>WT2pvvw
zJGExYfq>4B+a_oU2d?%x`6+~R<KIVdT@pHnayR|E%kfiXrfjBdg4{~qyUJN%hu<uD
z(XagW>N4K>vUa`mwunxcw?fWEy)1UV|ID|(kNTIia+?^hT(px#>0#ZHx-KpE%S+a6
zV0-JcMULV0gCicAjkQlVU;d^cSrOv8X8tD!p86M@t3MZ6f2tL_>2A}vZRTG4{1u^B
zbT4WKSenfgUz%mi`}s=SR$rU%Uccuh@ip~&?)G=pF`g-+wdjb~)$U!fk@bs@s4IDi
z%<WnFVq(B!4Xxm2&6#Jeh5Q%vTXd!UyM*whv!8R@#g9CCUb`ZHWAVBur~kA6U0v*9
zF0D`|z0vWNrMj(Yu2|Z_UN^o>kBct%emjaintadl#ff$Xt3`Fw-X@sL+hKAnH74$9
zys_oC;&&6hFM0Y{NgQN5`pK6ybN1wT>4zI`d;IQJPikg79=|Z1lW)TRl?E^7*89vW
zIPI|fFuRH8#RZP?Dn<wV5)Gp~%f-4^sxI2MO>)n+88KGXOY<*x^7AZ7jBm)xd)0jU
zd2!m;YhMiNN&@{0E*~?_P0ZQ-^u&dYtLC!y?Ou9Jd~)1<W$PNHP0Ar2tEL5QZ<$iF
z<7u~BVxo{^l9=Fa|El9_vwl?<g{*HAsej>GBDM6z1ueFlIYEtYig~|8wl6#O#a-6!
zY`E;h4^wB?@t*DG-uj1s@5;BwbbC+zo_^NqV($0vYx{23is~+%cYeC+Q)e%K_PSFZ
zTj%Y6tiQkN^U>?i^W~nbl$qeK|1N${Md1qm^{LjYMJ9Be*mZU7tk1o>etot*UG?W9
zZ$nay@V~;DJ0!lAY3k=({(Xf*W{Obfm8hku%T}(~B&2%P>%R9(b5phYRd+Jl8fMPD
zaD1`(@*h_gao)cgoEg04of*q5<@i4fHGePua7wf-Vb(;U(2L(1jOw@@{5AH?*;*-M
z+_-6@m2mpvkJdBfHJ8rW!8yfJ)%)Yw&grW1YZN!EIX`En@$xfs`ZN<0i%R%c28FKL
z`0KV-X!ko2p@nP3CN4^i4Yyg@<>Ilz`1ka~4-0JO_8ouxu);!S`s0`Hj-H;P6dCBH
zS!x{ef7{yB+o!h8)t}y;wdbGLlC;QkH#uD8c4Y7O_nE25yzk3{DZ!^Wy$>Do2w36J
zvgFF9tD7Q!q#IO39kA}y(rH}Lx-#>`_GqRPI~?=6Oup5>Dc4jLOf);TXOlyy>XJn=
zJN-D8Zc5c((3zV!Q~u5UMe>(KE{h6ncoEv<?;KQa5N$H?nZZ;$)Awl+uVNi?^ndrL
zt*XgvU@DwDaa*GA?;0xswcR$mOK(&qi(HvF_m=4mD~5s@oCYj9$3Jz{DjG<L+axe*
zMa_)xVSTdOWMac)ql0CcSN<^GdJ?VtF#7hdUXP=XbC@@|USi7bwOZ>ox6Ewri62cj
z>L*W<Jbs+{S=ak3JZ)!Gn9fHEyxpY`@I_nY?M(KPzZ&heGuUoeI*V%Eo7vDI?BVmJ
zhaqp`qGm@M=a%1p&fX5Jj*XgA^qM7NVZvtZ-9GB;*f{ThyC8ml-tn)IUrfHsJ?LVt
zQW0Z6Z{eIilY3oGn8~{{cXzG3e)RCw1nFB-i-eh4+FJDA2=F=gzK(l8_hd`RuAeN)
z@=JK9Rp*M|5MMbre6`jsj^%bDPqW=`H~h<d-Fb5Ep2ZQd{ny!M924-qzB<L>zMarz
z*;jr|YfW_C?EX}$HP7(Y{iVgfq$k8ZFPZv~S1Q6he?CuHIdj#=C0vX<3hQ?VaCTi3
zema?9rIyz}4k1y;?)}`wk{u@J7hWoe_n#W|>e#FWXSj^y=3f1-<JrP%C@y7v_0^*}
zQMcVTx=zx#%Q3ZQw%k7bM5zq785gJh`sTE@{mg^6)syB;Hq^;VxghY-cj}R==a#>I
z7JA?6@73*W^WScM_cry;woU79KP}j?=5^}XD!u!)uJJWre|#;!UEaUtTFoBoy%mM!
zXDV)-3ku6<eYGnuTXX3oi&N3}JpcZmm;5eWol80C@!t^d9l9DO=Vv=gOi#7cxz=uW
z#j1ItWyo>|C4-r1%ddUfd+hrH`^DN1UtQ)qtZMT;PGerZ<Nd$BS(m+enjg%bXW%2*
z$Gh4hX4fmhJBwyqTM+6xHBeGW%Hm^=eYmMY;U-}{)pIwes<fYJSJl7SwrJXusWW3=
zOfFtgKj;5pZ?6+RC)i$_{3_GjbT!~Bhv-5roi43w&p-z&$jdwNEcDp0`dNj|y}5nM
zAA<%z{Mx5~*Z=G;Z+g#B_x85j>D|vi&C{R$`RU8EKR4xyuRPjf?(KDYl9;iozNUI|
zONQ4L6`f9g&gITlx<98(e_G(d?xnIUL`*ZvY?4xG@Ml%;K=!=>7QVKwnon*ml+M|o
zzQL&5lyi>IYPHZnUl*08dFiK|Yx5abHx{-#2R9nd+x9V@AyDjl&#o`^D(hYvEH-MK
znAN|i>723l?=6=U^7{6gYcEq4EH9gU!*T0|W9u||uk1Y{!>2p3(B|Orq_f$aqO9fK
zjvbRN1S}nQY(HjF_$lI+-PQaIo>HedTq0_20&ADqn>kJBa9npoS#6h<`<Bn_(fyAv
zUJ3N@Ud=vhp_z)Y(=^ZJay;^!A6Lb$RIYt^=WzMD(@WEo)W7and*j1~^%hQjZnoF8
zjCAHc&f2U$v(>;#=8jOlozA|C*@gS1wx*hFUBD$Uapt3iAMJllT`=dt1Fp=O3w&N3
zcG>c1XJo0V!QU4P*E!F>;}&3aL$uwkPf(8Y-5SxD(9Bw?#m%0z8hPIjABkpj+B@qf
zZ`6P7jmJ4;6|44%NZBmAp%~`zd&3r1n?#3a``Xtn`Cbz>RdShcj|5+R^V@gH@8;c0
zG%|2-X8(V93j@2`wF_4la3!x@I@#s_Os=XL_Qx+h3cW82@DwdQctGOXburn?KYe5m
zdMYlgoL`jG<FP%@{K!Y&E!XuwD3<9Siu4g#sTJztA-YxS{;|E*OY&5+_g1`%j|jOF
zq8u9FmD>J%1y@O8P{rQ$E7mv%DYa|~(~Q1rraAldd+&)_w|-64&}Q7wRT7o@ezngB
z_q#5p$}8_Z_v+!i$-}z$qRKYUNlhB5VuIp4@1%VmthM^1@cQnLv=1?y&pEbtet!~X
zd{t|csHW?-pzXfr^3TQBMDG7}_}f`&n{}QWbSf=XrkuX~^WoN<)m>A9uC1No6uMRG
zC`-K@>xJDc_uq#<TK&&*^~2b*1zHU<FSwG@f+j_|_S{hWt*RunV8fJG^#a`o=YP2T
z(PPiGNMEy?Gw*HuKl6E^m91|G=eDq82kQ0}T3x=!=aRgt*r(h%!&i60Ikz{=sjFw*
zj@)Y09QgKzE5nH#yDZHLy{7clPb}5_G^I_ePl~0V&imXu$$n`{!r$G@#_0{MJGiE5
zJ>NQQm1~FyQ^?a*0*R~tc~6bf1D)FUx=+*N%;`i+x&HEU{de!)`7OWvvcyV;PrmL)
zlkXkpC~w`=-D{?we){RlmlC`A*H8bf*=J+BX6rmxEkV`U@AO|5L`<8*H`CwSPWENV
ziS{$c=kT2_^2qe`SgGUgyCf)l)zZ9lt!&-ZS6;1<(l!;jdLjFFEq9~s8lHbfL0%ml
zk5~fMnpx#;{-oy5z`p*jrorTUF4CM=ws9#XJ+9YQsSZ0V8h*pe@BG>;Zjw^dKIMMB
z$2;GvXW{ma!tZ))9<vV@YrSI^u6B`M#Mo(QBy#r4NxqqhMqcIe2`v1_kNDXhJXIZ{
zw(P-+GyR;hU*?MyuY5JrJk(TE@Uz0Dg^8UDjjT5C-h6jEbz{8iimVf9%?7thzby(q
z?5M3+Qp(-o{UJSin`rFy@Bb?zGNzo}z{9h^_hC-!d6!N%&&lg=-0HCCkT==A%*rM}
zIr837vA&y%TOMe&G^m&Kp7Ho_S%_Et{n17Bg5LxRw=iwXD){m^DYbXW{Bvualy{Wy
z&GzisvhZ`N)b-YfE`K9^Hb-Zzkh{O{B5&oSdmOv>?TwRdu$aN->U_ia-d4AFy2;<G
ztk~=>1sFt%MmcYnzjfK?H|M%G_I<b5xLh=S`BRQgHIV(daih^9y|6dJmjZL;wQPcB
zbkFO!;<q(W#yTsx>hAq(_Rg+$_jvbi%(R}ix*%lvu2A2VvrRkOELMgcU0I~8+M#uC
z^R8Xn5?xlCbSSSBOnLnJ7^mi<6^oQYv^`auBdtT$W`6Hrcyh8aWpcgZ%AkW&m*ym7
zM}~SROf%CLOcj6Y!87mmwv*d3HLpr>+fS~Mm%Vx2aqHKg4Zd@3KXZP}a^20PHeS`|
zOkjxArf0dVmqWfAZ$80dRP)Yx+49uvr}^F=pRfDk<?bFVdGGw{sMp(aU(cGqW8VJT
z`+gn%^)}mP&gC#)BgVVu*59xB{Nb7JO|H(I&}~XfvqG=huk5IQ*SxU)(4XHo-)HJ;
zbw|&ze9Af}C`mK=a@ER7sw-8~U+?e;Sz@%q>EDX^Z*uQ-Zs#+(S9e3|PpQDeEyrJe
zugqs-I<jM(Y@62OC2hw}mm4&kS-fzr)kbCIS2uEB^#zH_AIw!=?o`^7H}^)Ymp}k#
ze15Ql!p#Tv)j4lBEt!(QxzGEY;k9(Xh)+ckVza+L-*ZP^=m5*(FWI4`tNVilLlgto
z9a^N)r1fNKwr>CZx%X>+uk{sM^E_hC#d@2nqxEM_zpU8x|HtE$Tb#)(w>$pV*v0ET
zKV5b2+SfljR>tV0{V1`S)7O6aV@3Is$Bf5Y`Dgae<oD_{UCO+pz)n?b#q1pMn|hn_
zmY8;33M(xsZLl>sqQP}gIqj*-Nxyb31D7RQ+47<ll_Fbf>&249%`UU3d~hiJdic7W
z|D~%pe%Ianxv;T+q25Kw2cIP7e`;1ym+RHN*R;aWvy|z=*Q=#DZyzxpPfY9;y1(4`
zl+4W~E=&e9WU?|B|4N&8Rz||-$cr+cwNg97@+8z+1r{9Ipy1KLwaT1#Ro3x|wF}lC
z&F#Ckb)r=P%i=!ujiC)$rw;}z3Cib(DqX)5T=Al0#y#&Paz|>sX3IIfT(v-E@1?{E
zsjbUy%)Ko<W8s9n$1mTnJ}&G0t7O+w6^8A5Laq9rTzI@c`o@N<MtZk4o>xA1ijz-Z
z<$KA51|_44s0W|#GC$Ic@^gKCEwWm<a{H&u!?!o?xEeV9>03Yj6<?1{Nu4#PzenQD
z@>k8Px7Is-&+Ku#W{}kL(c{+N1>yY5GLz-@o%(Pn>+sb{p-;+`ubn6j+duQy=|$mB
z&b>~L`Q7&RQksIQzSYdfp_ASP-EF-->%tP#w`;#Ic&Ii%!OOYUwZVi%+^M@l)%*B~
zw=XVSD)0Ymq7qd7KT!Kh$r(XOXHJ{7)%OnWd93|v#qU6kg{#z#%~<ioNJ~z9sX^2R
zjx1Nr39GgSt<~F6zH&*A{l+IRxY(2%U&gNv3s6kr;GDRL>uQVY#56W-{=9VN=lRjP
zuG@UK`+5hieE3cL(!t%b%gu9=4ArjjuAh4P;YYhgSzo>u?{5BGl3jG^{oh*`1EW}$
zMD{#?+P}CbzntT5_rtQz`{jmzqhvWcC7$uT$X*e3dROlB&kqBid~}qX_u<2l>86^x
zGc_-k)qVc@?Zel{kA){sc(LtjsrJ!{u6vZ9J?Phb=XhPM?Rn|*_wQYINOm_mdaqmZ
z;K<7f(~owEg|1S1_B_e&n~!SHWbx}>TNhpXzV&2TW3spBpJMLPTRn}BCth+r>SA8`
z>wK-S`9GQMvwL4YV*cv+MuUm<uF#a1rT3S9mUymv+pV^2`C{kCT9*&mggk0gYxz6d
zSo(=wx|#Kwmx)DQ?RzY4UcR%>Qr@D_Z<<$%v`pZXuAr41;SX1>Xwr)L8gW&`Ze7KW
z7(M^z%YUpptDMla;qk{EGJK~$R#?bP+r4}Dbp7|XNdZf)Nwxg@{PWY6b+@<oKb>u^
zy7cqMf<4x<@8jph-Iag;p09n5W9X~aSHdk0CuUBc^K(N|wCAdv)>SK3ZZ8!%w4-L`
z!BZhe;})dYiawun;FQuV)g{__ib*a>O~DCyaW=hQGgdZTe&FX3yrXqxuypMdzTO4{
zS=YsnRW5O*W`5fpD86vZ{>yC}Zq0LJeG$OB%G%q_$38>rqwUEaouAx3ZN9x%woaL$
z%X(|igS~IeF9m78+FI&7@4(e;TMdPKQ#XEh`?on<v2DgKmflFir3%iWZAGVUd|^EN
zaWUf}71^Vl!7EA|CLGDW(e}qGCAc+H?)=@gwI^ifDims8by#(5cS`EYq^Wzm4FnF_
z*|i3L`MKd`x`6S%uzx&q9{VRuVHdEAI<r>v`yCE-L8)uM7#Ry)WHL;QWcIeJ-(-`V
z(B5nC`^tQusG~A7<bHp2zVG?W^+UePiuYP<slRRJ%Uxb0Fv(O_KdmIv{dw^FXRmWt
zEwb`1?Np03TD)HA`{Ng$EQ@{r-+jDdOLbyW&w4@rW!oY--Iz;#{$1tRfB(hff?Hb~
zSFCETwhxle_;qmh>+s|EV|YE+yO{eaZ@6&h+bZLjj8EU#(^tPK{nyjHZH@fy8FLm+
zyu_GSV7Rof^XmSaV%J+9{twBQdRdk;Z<}kzFPYaF#}BMXOwMmCSrwG29l|};Y3quR
zwMkkvv)L*-<XBbA+hx`+6k4+{obl=^o+;0sY<@ArXAxia=Vw=n^S6d+x_V!2NX}aD
z#7!vILUxa0m)N{jb1&EKxNQ6W{j2s}z1Nwx9S$>j?sR{~ZKhzivi^p7TN8tq-@B~Z
z_BX@inU(vZ_|SkGUuVC#x>i2S@86}MQ@gH(23`x+ExK89w`1b8ISami{QCIk!`->D
z{WCqcgseRL=5@c=>N5}Hetdp$r8>k@SoV=_{GId(nvtf=Q3-0RzdE^SO=7y3^Iv@O
zrthqOSNGXGD7VtMm%abnn?8+}j~CZI>d)(4w_N0WuIRA^&QgL!+rLf=uWysQ@#oyy
zGrxH})TZ9~Q8l}xg(-8<j5+;cq1|&XP5PYZ>FwqHEY<Va9{-!#{|--@o_eyfa-z7M
znu6qB*C|<7y;8W(K04vI$|?G2+ViO%E4Rkzy`H>pnwsL1DK}0y9tVwo%<&VuP`Yb>
z^~~a+Nt>p2KYbsk*Y+xA|IV7<va<X?OUn1`ww9a561Amii_-+o<9l>YXErXGd|{iT
zi&x-E6}e5fk0l@BxjN;>2MdNNk*+3@Uz%IiPmE3q&|X)OvG|wZb;oy1ESfA*m0o#m
zS+kDy-Rn|%_SW(4?|T>iS7}^ae26<$^VfFS<@5bIRoAvl1l2#j`FwFynAftk>NB>r
zoZNJ4(I)>h1}9#x+b4fC$JkdSa4GLhhgZ`oFTdPjvFf^|?qu<`>pqpZ2dywFFEfoP
z4R-$Gk`xwo`A+ugvrBgJ@9I1|xwX0Ly@$=G5-w(@hmoBQ<_o26$fsm5sWQLbq1EKN
zb=FCS33>eu0lz)}_NJEk{@$@6Q;JzT&ec)HILbaw^0VmyiQjPxis!ib_}x?C+E=&u
zP4=FNACFpR#J{c2m2X}5BY(HI@o!)DEtXT4o}Jl#<k~{5V4h94_pu)R_adj*HT+>;
zb?98DfKs#5Zrl>jzUJw^R5F!P_`AFC+!2mh(~>e+S5|~HOQ+rzOD$Uxcl1=nQL`)0
zT$V~DNv!3N3$MR$Qe?Gm`O3!F<E39puY9<WxRr5>q({kt#8}Q3h8=xwA*J$quQz1B
zJ1XgxEYs$<cy(JE-`6EE+0t9|B7*lO-n;R(`NHP-(BEHsudR)mVA`tVAE52&s<|aM
zWSiN}?|U?r^M4+m-|4v1-0z{bkbqX?p6aztX0fJLTUKcZxhI^S@IP?v1)YD=oMq7=
zm1{O!xL?P6q*CSCyW&;4yr=%idOiP{<yEffx%SoJh}Cw%1`CCY81ud_?rZ<Eb^g<1
z3MPxRF7h|so_O7gNkM7m`oqehKCS&riY91n6`MHALZ35e+t*J&K74T4rTde^=;q%k
zJ3dbn=gl`Qd8N|x|MQ>4zg0e_o3nW}YlxiDT)yGp%^s&!1|k8>;j6FsYGwYNw~gVe
z;JxqPB=_&O+CQ<jb)v&Z{(tBFE;$R&TB^D2{_DSc9}4WgmRZ+-e!*oY8J0y_Ue+_-
zEKA_!U-j~*XpGQ=hB+GQ=Vpp0e@%%i+BD;yvtPUOw)@d-;c=f7pU&C*R7_A^QKK+a
zWTE%OttV0!Z@rLud2N>I-2Q-2!Hc<jW9H4<_qtW&`IKp?m4)S1KR^9D`uFe4l0E-l
zq#V#op1bDu(~p0?Jp1@@ce{G%MwRu4H_1%feYw$m)r8WApOhX?P+eJYGbQ?U)xxMX
zK5O3<G;Hcx^^-+njljw@TLykdt!d({Uj$Y8c~01E+rDtGmT+fBu*K27pJu+VRy99!
z`8@UA3G3{N*~@N7mV7(BTl1~NcK4|-@5nCZd%OCK-TN1-TywjBhy6W1Lv~_?$dl3l
zB?ApXb(YngmKz(R59ZuaFNsL&cH`P^{B?e_jI@5)mZ)8EGtS?&KiJ!PDPBurX(hus
z;RPzP7w>=Yx)=UcJ-v6=IWcx2_UBhJW>_p-^t}FN@iB?=oCwJWciX32)ScwsFIlm-
zsl!!k<<X+jKP$FoHk4g%<?`svYSyf5Uuf0p<LhahXYl!S_@XS9=+(^yPunz4a{S_v
zzrN|1#ry*Sa%=h;ifqznOkwHWe)!!NSy`VYYn-h+)$+FFGV^-Qv=W)QD}2?vuZlBD
ztzX=1QB2>h`grD<04uo%zMl<)ble4g7qfqy^Qz0T(0gjmQGO{|z4xm&lx#m6x_@zQ
zeE8m=^3uBO3BUTUUkIFQyji;}zg2GGPZ80#&I}g?iW9569c+CPdCX_eHn>%LB`(v<
zsO(sIi3ICCjqmw>KbBoe7qeWvcFv7Y_jKxA4=VJr@aBuInz2%JYnEn+nfya<)x|F)
zQl_^>T|Ks7W=5P<#iz~=hlF2ItwO)I9a37Y;Inq0dG53w_u{=K2E{zQ>HqeGilE6(
zQMafyvN46`XYO@xkE`Ci#blmp?7SONR<ikh>Fct13|39s{mG}niNpLh*PH4W&bP|+
z7ybPv%_ecUFC;Q6Y-*5MX<fKxbY)GA&HCx8p&KupJuvCf-<hxKU&a6We0}LhJ1@=9
zRhm{7&s{YY89T4>ypC|3B`kdLX_44Otwmq{_DJcfygUDIa~pfMI9pwE{`FfPJ>AZK
z+bbi}jbuLGwtKKM-|4=(^E=xZuDZ<{Rm!#jtSf{Us<LSRD|~ZtR^u~+YPsTbV$%;;
zKAcb(aS7Bg`&kjucl_ywCyQBAW%-XM+sXBvo^;UXh*Qke?OxFnx4wu<xm$8KMSF`_
zWcReDpq*Nx7o#$wPhWog>#h0v&V3JBo=%9rGdFJUp8pS;`QN>N&wu=}yL{cB#qxi5
zTr1icBPTB(x4-i9ud|0|U-x&hnAa!2XIK5#PfuUwt&ExXDsqL4e^b%P+e__i*G^xq
zB2>p=P?*N*!NL^EX58&szdb{4sicUI_9Vx}S#n9u%QtiwI8U9ath?@|b_s{n#m2vv
zG{0{YE<OKthfxzZN6DuzWhYC+*E3xdE3mmN**tfRt^d+uw&+^6=_hB@%*#obe?T(F
zcaHe?PGMsXJ_*<JdwOIy-JNK7_RXUchMA|;Ci}>6F4PKgRPehSe|7mpgAI+x3huU?
zJGD+!{T0)4iG$r4YlD3H<Tr5M?=pY)A}{KUVFbTmTSv)Mmf-oh=a(N)xVvqh+ng&?
zHo24^PME3`7Q4as^wH#AH+NH=2R+t7n!6m6QxZ1C1ebd_n6w=GxZ!zTFhl$4`;VGu
zhTh5YY`C#vUvAEc)i$bM|F=9(pSV0v(^R%T;rZ6!3WFt0+g0~Yyk6U}nx}Aw?z(fX
zvi-~Ua@9Zh`}>mTv56(8wO%ns_I<nP_*p`~Kl!<-i1k@B9v`DwaUT<eI2O7uyc{!Q
z(&OcZ(--}2Z##95<$}^%8J%;<TLSoHnO=Vls$Tx5bFa%2f%jR8xi(?-i8FR)@8~R>
zbop8kukluClX<Jv?=4Bnew;3Qsiu6Jd2W}MOOwRM72$ib_8Ra<%ogi)^z3{azoPwe
z@RKK<9xGQgMXeOkl0MZI^uW_y+2h89h{L5bSFYaAetHXQ^#)6+t*eZt2rb&T{77z^
zu|Q?(s({m5<fNt3_*j?TsEqaAzGn7j{s$*qZXZ=kmN|ReyE9wU#n?`{cy~lnpWTf0
zvnBpVMb(GC&;BtjY86YKkNdxlMLB2cB#Y#}%f_ksPE1OkylCyTw?&rIb5Eb)v+H{1
z`epAH^ZSdnYWj=*e`|X5%C#o@qj~$uY~9;YuU~Fp=xLeq`QwHHTRpANrCDoNmB#A3
zrSyJh{qoQF<+LAP_%ENilVEiIvFxntPK`ZG5Bqn``*L^P{f8PMfBy+Bd>?wJzW=-#
zhsU?@=;|{IQ%iz^KW&d<4~S#cFw05NFkb4?DcB)rCp&HT<Dzn#J9DR9F0`D>*X|rQ
zt;XWX9DdE`yQjP_3QD@DvMI!MW!24Bk9Y2=ydyX7TIkYcB_5vBR5hLko&NSUoIk$i
zn`ARzJLqP;yLW4UTx_3T_v@wl{JKvM{r^2$-t+nCr)OsmUyrZ<eRli(+MicnFE9Ri
zvET0Rm&x)CUDuX(<u07`dfOh0>Br5#zU++&o75%t#wEe;iJjilBTG_F2WA;3m3Ms;
zo?JOYoU71lm6MPJWAZ}(bIzvnj|&!Qi%eU5^~7{v({~?Ne|UdN*)ihwnMJY*&$e86
zCa_?}xkIW4oCRDDe7n`LRQN?z>bhgzEm5iYVQ;1+YKCg6`^_<G-`)JJz?jF_TAGFL
za_L3I?<a%pJr&L<x|*Jw^E0oLEo{I4$~7&kgbo(@ye+i-=CO8%xp&8rf1(9jD<`x(
zV=O%4V$`jrb!<y~(8lRg^4<87W@JqIDy?9!be|JrV?gJ@W&EFnuIbDaa?;`zof%;z
z#rrd2+kZFXxbO21n(^N1EIwOwNa(BV;;rA<WVgP$P`~vmXGi2iC$=n}vff8=H<$f5
zmA83YLhOCnpCJp58cL+t--`TZlA-u&lbpeEt;|(7?<L)Fir615HT(LrN1?lUE*@UH
zY{G=bt(hCTs@e)#K4%<{O6u8s>2`Ip)R(7oW17TTglZOk6aU~d>6gG2>$@KF)Yw;Z
zCN_TZJLw{Gv&8%F%I4A=)0~%GO7c11=w>oWqHWXXtXog7OcGeMXuswBTmE*1tF;`Q
zGi~=Tl}t5T`|-fm#)sdgOU=~U*3oM^d6UrU4F>MH8E-dSTXgkx^*UE?Lr%M_#w&)c
zJ6GG^Qk^n?wdfh~*WwD?t6bd!z0``z=N(MAtz>%aU)+m}j@8dN(vk!jjaPju(Yvd<
z@*rcgqxMzJnQw0`Z=Dv?cVzLsw1^pR4=r;`zFsZ9yE?G5>7Mtq#$|zX#F>A0?K9lV
zAh=?ame<9gHyO%PbMMaERwO7Gz$ST&an+6ZYr#yie<vQb-FujG)q7>DM!W8{2TR*m
ztANh;u#}(HI&F>+^MjfNhn2RjxUy@lzkmPhUF)WY-b#2<&3U8nar&zn&F|O7UM&CX
z%bU&k{qu_GJ*AsgZkwa9_v6p<P2bO*V4O0~KlS(jyIF3}+tTVK-zt^P`jb4RZdKg=
z8TZ~Yo1M*hWH$dw9>bKxW$eB+=g!QWDBQ;_&;PRG%<|-Y*1F}(4<BThetMHm`;?+n
z-HPGw8XtGQ+dR*|!shkkpP#mzN;UqwaJk0(zD~ZAG9IrlKYslBz1{!2`Str`;`da2
zJ-Hl|(T>lr`Sej;zV5?ffBU;$f8XtXzwg)K@cT7i-~KGzTmSXb<9_>JFWTkzS*6~Z
zr<cF)p4>jo{`+hA^RKnH?}}46^(gM=5t&n_fftWF$Sjwf7C4J#weP{auQm^-ZQAyK
z^%;ZDmmD_E@TofdcFn_!J$G8dKHt_86S*H$z;n^`i<LxEY@o=a|9T8bK8&-koX~R1
zXI5|Nyrxy{*BQ)J$Y<;@XHnJjvis)T9y}4V>kdkbFI4ngcGEO8Bw~T7P<C?9OP!-}
zhrRwdZFqF`?|R0T;7yt{pYI9Z_;YX8`I$N0J<}$O7HM-ghHr4X;=9tWk;|=B^Of>C
zslSZ1O9M-G*a!=AHU^5WiPDUidr>7zWyM#+M1hVs&!@?ql`(6Sa6SC-M~75j)T_1W
zlG`&b$TdDTs;cmOUbOzjWS>I`0`|G@TfQ<Ys3=WvIBBrpq_uNf_<!4(9lQUnOy1S$
zJaNt2SMRPE+BDC(XUl!7d;N<ilb$WRa#HX1{PQ1EB*bRF{cz-KO~7$QbpzEot&bCq
z)NkR`WM0H_X+z+PnCi0+?j`C-9on;w@$>D2iazt^-})~0zG_O}uPWw_o4f69Jz3`D
z`2AtL?0)B8*SvU=O&7iYTauF#uxnMYn&kBf%hZ~^AFkqQPVL>|9dl(yoKFIyv*cVO
zZpHIQ4MO^MMrXUO4AJH>y~MME!P4u%_v9(ZnO5JnT{}@E@cV+`(7<a)@-iQ;c=G%3
zf{j;M%y&h~vK9ISEIG2wsQ>l7<qqo(ZO&F((q0wYofs}~!QflC;QPZ<)J|xBbryaf
zKC9&E-G2>7>{6bF_5c3n#yI)7!m_B3`wcrn8GOB5oBRV-PszP~t=EyWb7B{ZgOO8j
zdDz?SuRpGz;<)+glet3EEPeUjYkbc0ys@$Vtn0Q-bI<!v5mFa;JVDs<hyTtixwp5S
z{#mm#=31`k#P>_Hw@XU!e%IL-ZgYLDyd&rDFDL%qPw@$OpsBNYy@Q)^xc<k`^>#Zu
z-mQQ2Y;om2`L|zYeJ>U6F+U`4y(fI{QvaMz*WJk>>#w-{nkE{e;FcDDbI&QsHg+zJ
z+v}%!&HlMz$>T&z*=dK9Zp=A+(e2~0XJ@#dpV8cJ@$~Sdc#+y&S3WPbo3nlS>8E?9
z1+`8oit}5(`|+kbvi@za>ObE-Y<_>wzo+_jKM%66ulvyKU;k<H{69a`?f-r3ulv#L
zZ&ztEfB(N{xAXsfczgNs<I~~m>%Lu{Usq7S$EN1<-SGX@Ki+kpew}}QdH(tQeY*Yp
zrP1v>14^vyzRswY^YO^853D)0^zfqD%LM;M-@Uxdh|_kb%SEZG*%w4y{$$+$Xx(yY
z-OB}&wtqU`?euKfl$R6AUL1LS;qIB!7hifLeEVgm>UZ*}rDgfqz3SDa|DIl8=bvBF
z_wm)@RXg6abUJs-w@7bsG-<oEv#>qKD1PxV=EfPhmmgZ37At&OVJZEz{lIlDuf9hG
zJjb0Qmd$)P@%7_BZpmAA+Mj(Ekd(W&^E`j-<-;m#l(Zxw{ydrd`-X#w+V0-nzBjIx
zM(a-7{@+{1<M$4h){tHMb`+N?KF;b})pm1k)L&WA{n3n0ohl7mO<2^f|2&kuXU>}Q
zt2X*(x$!Mma_s%qYd9(MMeiFe{w+D%FZQYU-kFj3Ggpvv-n*2mk`Wg6GmmHWY*@`(
z8ni0D-uA8X`)67?k6NXZb90~mD>=#|yeQ7-?N!sumBNOcNrK@is;@Tl_smUMwtC`~
z-?PoEX7qpC);i;$v+l~033vB7TQcrlRj#JFZ(C33uBmY*etL`7gwDOU+_vB0qO-^y
zU+(Q)!osSmY?F58Ts`X+kS{u;#dFn_S0AJ=Mr9?uw_c^~XI*_s(4+W&r1eVMD8KDi
z-COcDm-|#5b$_<)`kd#l|3%)6v7Q_Bcyh;wc?D0_^Lh%K{}DegHgj#f*yaxkt-0-0
zr;5Js-!Gn%Izu*VdVZT-zJl(Z6@T&qIqrvj>(}E@Q&}>HPg7TVBgc~GT2(a?>I^5-
zQ&|MxM#<Mk8~)OJ%6E+A-FE}C{m(D_c^2<}o^8IW)z-K#N-ip?t8c$7@v&4l@Ya3(
z^wWoDXCFU4{IqCg=s%C6zvf?c>bcGI``@do{G@`f6_@&he`Ig*Q!aX~;wyju!rzeS
z%=3AB>(;APe))Xy-S%I1>|J|{?=DTa=Kij4fpl5d^SKu07V{S6N1ordcebKa%f^}E
z#_E2Z{N8ptKPxO|r2IeHy<5C|dFn~c<x{3zPPCNiD=%OEIZXci>&K6OzI?eUFI)HP
znVFW1-Ad077kaLVl9$)F|Nry;|6k$z|2?_fU-xbE{J%%z|GZ(JU-zk5{?CKk*Vpg=
z_ic9k|6j#xe~ABov%c=b$Nu;``}f%V`|$O8{GNZ$x<7BPulVt7_3`!p1N73>d@m}p
zZuQ+=eE#nGT6du(?{6@>9a_G!L0Q-H-rr?g4?KR>ovpin^Of7;i~8)!tr@n*RFy1E
zdm^p$L~KcIlR@kytK|;Ibrbh>r&NY|<jvvZODlRKw7q6M!#3X2T=Uld{I^1$t6JFj
z=CXq(mbacuT>QD^`u?M*zJ;C8lx2{1y#IkAX|i77&+dp8=M(c*=||f*th>$YW)yXy
znL#Z~?2CKM|NqNpl)V$k5_tB&O`%EPh;rGPbGOvpR;j2s|6p{@=*_HMoXuD0_h3d+
zV4`wWh0%L~NR^H8U7?+86Ge+NEsA<;-W^@5n`7gfdi}GBN@vk0h0PLEh13nsPy2Aq
zdjp@(q1Z1=D(<@)uHX^=SaI=@*0KVHdh@(~{wAN+q(veZdM^s7-OBv`|3Y=w?Ce{1
zA79>FvnJTFcI$I<CvJm#?N<A_0=hR#trnFv)L8rN+v~Co%rCa)i3x|Atgx9Emy-B|
zaod*gzH--?=!`8TjN4B<P%99$F4I{j%C*hvf<RDjzV4TsLh<cick+eIoALeavKcaN
z@?}O1vnL2;Ds9MOTH3&7@~@%t;>{ms*CdW;Hd{<#mOXR!=Av)GPtvouJxtiv*u0P<
zQB!V>^;!KpO??usQDSd0?p%2G$KX*5mx$V_^s81DM|Zz>Stc8%aHoLxx2yB5ZOS{A
zpZIEb{(8@nFSVMJX1LjH-chsZ`h}~rR(lGR9NNhDUHAK4!z&H}Cm!%TiptWwYIcjO
z!$UW%TFrZk`0}$7OIdSQH+5_)yvQGA^2B<HX~lH;)SS}t<ev;*&wameHf<I6v0o3u
zs*km=n)3C}g4DIQA3vQC?8V^nYTLT$rz`iyWUZ|~5+Ern`zKv&wU3_sQpN2WO6@fY
zx4!Zvw|(oqUiEyI4BH1e(|T{2`+DWF*XMKXIxBG0di9?7H&5LSFwAV9aY;9EPMoK0
ze$CPBHLund{_E|3o70%ds8p-M>-Fy7mIdxi)@7<oOAOZjo5bIK_+^QeT)a$Q`RRv!
z(;wfxyZZ3Qt1pj!tBBjabxM%8X;0F`(_vfZ`7NKm{$0Gxrd2gx+u!f|@#y}aN8<bc
zz25)tb^WL7|DUki|9R4G|MTGW`1-$Br^o-Becb&2m-YWX#{YXfzwX21-~4tz9-MWT
zul;>B{rh^mA4X5j@7IK$_fK1$JFDByvt2j%clXU}k!Rz@U3OmZ*!JVoy@=OeyEdJT
z)wvlm`}yQwEb_ufc4`(_d=SY<yOcPg$L#97821b<@0acuWZp4s$e%wsuu$Vwb@;Q0
zLiwG0uO+{zn6Lchdbn8sKCSGN!E5*Ih!TArG3jUbq9E5j5<T}i4=zeby12nC;Pk<O
ziMLdKgsz@?h3!{j|ASqsi`zf*%$zjIU}J|LYbr;Qbj_0KikBHIu4(1o%KoZ$xFwM-
zIw*9d)VXy6;hIx?kGGUfHCf1aD1)Ij#E`Rh_TfWCIl(obDi$nQ@ru{W>2UXX?ucC5
z9)I@8WLK?(q_$%p<P=pGrC8J$l`K%dyLdg*0p~C5ca{lUpYU#>iLa+6w};!2Qzl`V
z^KT?6#jL+myyn)2!tF=qs{L?|xmNtp_l3B#O|)yfQi&b6Vb1Bar>put|85Mj?)u)a
z(QD5lH@A5ow=R=1o-W|`{OAs@%%bVbzIM*<xh*A}+}D;A=KuB-|DB-rvOku)PTJ-1
z>{|KcVCl^F;rivC?b~FQPgH;K<8;aanNYTw3hfIykM!^?w^Q8Y+o3ZnXT~fCR=E%A
zAN4GaxdIuYcFuopbbT_@@51|f3GEFlLsav$)jAUrpC-&Z=b%38@FcbE?mkOqi_7p%
z)w1H{Ep1W1>oswaVEZYXj_Frh?GpV2d#!gxp1&_#+q^LK@VuR`E_24)y$+o#xY*#u
zzl;eEO)p*;du!@?TX`+a)qWk8CHjbEf`{(X4vtCQ1-|o~1Lp@kP`h9xp>%K4-C1)V
zua<Zoy`4+D_S+*fj_%wP(<iQXJov1#bib7O2urPYUhX^7h-d2J)YU=yuT>vC>0EUs
z%4<f~OUs33d%fGfuX*<PyoT*}C7q{p7;{p)yPWd2T+a2pQqR+C+RV4#ZhtxB<q5tk
zsvjTSn*6?Y-+%jy!PYZ7s;v*-_$&FoW}A7HdhKe~N58jMs_6x8Y|+_0_13jp+Ad;t
z-jB|*$ah*URehd%^7zw|@;%iX=fpjEdg5h)h3uw_VN)!1y}tZ<`1SDRpL_DY>&O53
z&_4g)m*w`>^Y!=tznlMm{{O@OUq9df=V$%L`2WxUznTBPd;Xtv`=4JH_wWDr>U918
z_y3>m&fotp|KIBRuP4>*|NL0oZ};!V<@Njj+`aqvZ+U#(x3}W^>wo;pjy-S7XZtMn
zo0h~~TbHg0HxFIh@LjCkYwp^W%$>`%EB0)9zVbrDF4?Z+6)h9hPufZLD^5zC<Sb&;
zYPR?^Ygs?X|Enh_Uu3R|l;JmDeea~(*90l8YJ0gwyRPr-mFL!<=dN?B^J{I&+h@0~
zY?8Ffn;Y}V&c^P9BzxGR?v9D!5ss_YeE6aI)^h2peNodbXN8@7RI_EL=I_IcezILT
zbf43y^Y5MFTM?5tw}h#ue5*ZTa%#)gNG7R7jb*15QU#SicU*rExU%JP@2~fdBVTV{
zxn&NMQlP}y{a)g!&U^2e2TtzVl72wv18eVTKiRUr=2bTo*MD0g&UW+Z!(Ve&+|A`a
z7N%5~GhxnA5nJuWn&+i%^DSJT7a@C8WkUJ(*ES3}_ge0@v>ev)o@noA9I*c;^NPZb
zl4+}gzfb=(RrrAHeC8{c)#`oY8fWt}-jUGfJt02p@`Aj)=eIfiR8)KxtG;MsUw>A<
zR-t3{dzS56q%&9)j)a&;No?(%dG0~mUmXtT2frrVF{oMSXYwWgz17N}VTOW}r&I__
z1_~Kvo_V9m*g1tU`jnAItF-1pAzO_LVJw1*g&g{~R~V@6WmE3puv-7u@WH_o&%R&(
zGdbhs&A)cFe7eV0tF4_T-zVqgwQ*_Dx1~4l-aUE6f0AJJsxx;cb>-ULm^@2bEJ4p`
zapBjipJSe6EPmwS;1}6*?}C}Qoux!M|M}FWeX)zIq$3w?**o(jYu3sObN6&_aPc|2
zQOI~gk(J(QPSfr+TfIbGl~mVmU9n}S=@CxxN9op8Cbim1H~FvZcqkjO?0vMr_kC$H
zr&jaa`yug6<#t|W{l#V5w%^gYwXM3g@}`*K<lZe|%WESRP4&OIuG{pNoZz;Ku#oTb
zHtv7_byuQBTaeeXB;%dAU(&3kJ(^sLSltfvzMbgzxwe!wE~oqWnuz)O9ap}u@BIJe
z(Z5rhinM#b|Gl@_UaRfRF0}=U5hixAUcR#Pg7)2-<~()Pst_hmMMjlE#h4Gz7&Qy+
z3hZS1%G)2mtk`8O7bnxF|EcJ~7M<hbueYsx8g%N{)9LpoJ(K_c<M{ta&HeVj|K`_!
z|9!va_pkat-*z9b|62d+eEpA?$L;^R+yDOddcEEMPu=yu-#^*^cm4m>{q{9qUp2dM
zee&e@c=_@6`i-$~%GT}uq@4Fq()ktdGVP8(Rg2>ed$BC|_Of*HoR&S;(_3?9aonC?
z!n@_P^^9LX6wHp?V)FILxqF_==b`@G3GcdH=gxoojpu<vom!go*;!KUjCsb@JThl)
zWbNoRmy0_VoV!FiD?sFaqt_R!Z_`iiQfp`wQP+O9aQ}6G-F~w#U#xmWE@tjr=CpD8
z@`*+#`B}=J3e7%X!j^7wuhqGZZ$_|EQ_x$<H3>c+dM}um9`HymK2_1!oP6Za6}I*z
zAzgE~uGp}0rseb)<y_@(+0K;(k98(l<}v!Ita1uDdOYEom`P3hgyUbN8U^)oAN`vv
zypl~iP+jcc2bnCP7YaHPLk>TwUYP%@T5r;%i@uS2QiJ2?8gmF5UYT&U_m%KD-}*JY
zi&vj(kzsyiSQ#uh{fbDAjBxt*CA;R^qzKq?yGj2p$$PfofaDLw#pkM+!d7Tl1-Q=U
zSf%^1AokA-yDTd&y;(gnxr#FW8j?R%7ag+eklVjIF5vu22P>b(3)>h)W{Pexj*MM8
z;g~1WT;}5jM|ga#*Jxhf>$(2WFD5BL!^=#G>rG~DKPmh0vEm$$1uTAHcZ5{~lvL(l
z-hIT1HQ-EyRf5T~Jnsce0g|SMPgSNR2BmV}5#Fh`{H0QIo&9>lg6vCKn}b%bylL@z
z)uM*h&@EPNnO;lf=kA_->>jgoLe9e_Dba2bF=bDVa!8~cXIe6MlHRLbf{#tyEd5W1
zWUY;hZ}XMezJ2w_k3B!ii*i4{<NyBdTb_c<ZxMUrk{`XQ+&6-A9&TYc$=YycmFtpS
zL6@f+M(O^$a4h;;KKG3e{f{FqIfrO`4Q_aD{U>tysx0HLrKe+k3Ua=@T*0DdmeBYu
z>hIjGX0azj%Jl1`--i9Z$$oR!stfaf@wcsSC_nf0_|iAl*|ni7L$|LrkUo8%o53KY
z=Bv`Pb;9+Fy02s`&cCv1$DxmXiafDOqOX0^9)FwA@lSpAoBuJv-?o=>Z)dgIe!p+#
zK2wHe`A=&7?g%a4GduQ@cO-{;n%V}jrC)i@JzswPOl4%>^2>i;9$md#e0`sOoNV9s
z<>KGjYXh?dCz|&!{rB&*zumtV?)Lv)zI^xZJLo>QU-SQ+eQ*E!(f)t>|GVpdU9bOn
zSpMISr`?yY*Z+9)|Fivn{r{il|Nkui_e1;g-~I7B3w0Sg9sJ|#Ra1-S2mi6J&VRd0
zbp65HDE*I%Z_TkNs|{M3s28j8(D{aA@LSeplM1eU;p9B^>X>HMxj=TlUG*3KP77!?
z`m<Skm&Dr7F<YWvq(uGdWqQ!D#b|$?^}{m`r|+0r)|!a#s6I6HT)^_V6U*j*jPnsR
z`td{QjPN!2TBdjFi%z_MJ5Md>(UC{zPiRKgL@nEzJJBs!!G-5)v{@y~iitDNh#YSy
z<lL>)k;@~LwS=SV!{g|X?6nfiHjCSi*&GRVHG1paq_4DSM(^tN{`|~c_iq%$-CsSq
zZHk5CjgUK0tGWVO%U;?YSN5L0Zr{V=Tc?gM`Tfnve0#+amhG%ehw2w6giM>#Cb(*X
zQD&$?VU|+TXZa(=cjEVSBs;HhF_dTzNnWfNdp5Ct?pxLi7e6!Z-ygH&{v1`_8D73h
z7q`6BW!-)-QK}(hKkIVa+eHTFTvorhzso~?@9XI0K|7p8zZuD!Kl!hh9Vn}wS9^<d
z))%|8E0WiAJlEcE(_r6o_4nF4+<x@Ws@SmDMDt;tyFs#99;aA=_cY_iUxwRj%I+6>
zS$=k!vVe0+r_cmX;SCW7c~llX@iu!u|M0X|K{_9=n2QuNi8(*X787$_yC`RSb8`8c
zrc@UD<>#4hhk0$RF1=_HII%^he!1#pO_QGOR(@yQR-C>dYPwoXs9C2WWQLWO$*V6O
zB@1p&a6HVD_Gam_ZW|jvg~$6c%Tsr6bCngbty}QwyX3`fW+KI}cltk-QPw^-M|^UW
z)t58prPBO5r#~)Q{?*~ZA}`HG&+Zi=^+nR$VnT)-32B@6uqNy<UM`rw>fr?*&udFI
zUHSAiZKjc0W{dwU4_}MNpOuP@ioK2<Ucj|>)zl?@KacE)TomhTDRTba6BCuv?E?Ip
zbN>Xs>bWg6_q#||;(CkSTH3knx8wE(b>*(U_3_ZDGjoE%E{VMCR#>s(Qqs&=i7jzw
zB_z|7_p15%O-hozy6w=MM|_jMYaIVSM=`qo--&hhJ4-vt)BmrKnX2`%;Y#Ll_Mq0O
z1&V%&L2YWPao5}5zC8PQcK-Cs-MgpD-`)G`$GfBMFAJtm`_0mEB2_!S_VeA-;^oUP
zub%&R@$dI>^?zUQ|Nm9q?*FUT>-YW3|2O;pr*`{4FG}{;d_Fq;|2z9X`S<tL{rYm*
zpJVx-M~kajgOmhT@A>!2d#bMC>eE~I&d8r_ZQ9$;;ki{R%4Lbz^kxsy-^HTR8(j<K
zHNJjeoLOPZ%CPV(!(+xrrQ+&*43)|vW{26M=RW`V(s0igyJC(Ur|DT)3W>iwuB>91
zx2p2L|LWf}A4`98f40Rozh<dro|F2&vjxTrS-HNfzb<rS!{g__&OhF|`ETC4Ny)ah
zYS)Foz5J@UlZ(l<c-t)w>n~?b#6`aOZu6Vss1|lg^XU{`p)7j_L6@c1qP!wEPMx8!
zKvr1kMeZb~fI#1imu|6Mou>0G)@y%o!p5bH{M%2(=6Wk;ZJF~Ul*46er)&0BSN@Go
z4odUPmnGdyeb;|icE=Jendu=Om92X^r+WNlbv)4}?-ysG8Jr}v!Zbd`z=`GG$sfNu
zO*>>aa$givy3QYZ`IM!P@sD!1PX`sapHJJp=B)A6{OxMX<TYLX6}Y80&zyU39!Ksw
zn~(R|K1uAVZk>7T<#xq$a~W7y+*8=x__0S^=a1lvl_9sbiH2v?95r~++GMpY!`v^i
z?ZrLshVQB}m(~?Dm$9?vz3$khl0NJGg4M_Kcx#WFM89ca5MID_WXcTP-ek``2D&aY
zRho@7nvFNKY&M&c_}7xz_*@tNCf1lvEy=LBn?)-)x>hZSTC(cr;@a08M^dIHBt3Z@
z`D(?y48O(8!t<-<{%bnKe>&RuM%u6Yt9Q(}?Q&zQxXaYU<x_tp+`Lu$V`G1u!sWMJ
zYqn3ddRJ6(ZT|#z|DNfywn_b$eJlOp-AdgO1C|#hTkoAoKfEDnlT*yZ%SGk8gl(5h
z`IBev9TqAW?5U<?U6#AqLU&*9X2U9JUxU<m<+CDFx7~kW^0$Oxj(vK58%L+qGR`xz
zI>b-8TkSj0E<Z1Bj**ML|Lw=KKYdZ~?=OvhIFVyfvSrz3|9Ris&a<s}CoBHT{kGlJ
ztE;{&fAzkxeEJ53MNv6--~KXW%kdXA?J3`V^5R`fRih8}!rtA}i~dz-y(_+({_w?u
zbe{e5Z(I)ga^5`fzx^xwdY|s1zhbX+gS#d<%}Tl0u~k4r{kvCzVC43%5C44m_3-TV
z^6&C@_y2nG?yCCfm#oWm=9n!!zSSl+Y*AJH_ebgWzmL!V|E~Q1`~MH?YbyT!`v0f?
zXMX*+&GP?Ws2^Xy|IerU|JwKe{Lsu__h<6_lxwPY=f`now&tbYmizxf)Mde;B`Uq;
ze2tv>I-j=v$XZclnV-3}Hius*G&8Ve+GZ;=qiJ%l^dh!sE=~y05}t7}rCsWu_DAn2
zo(n?-jhv>NIs6jZFjYQ|@raDorlost^%+^8s%pw#%J6Pi{RKU1^GowN7N1eonrgQ1
zeSTf_9%i|o*=>Jrm-%IGU;q773{#DzV`GL$Zkgt`vX~T&f717M&n#eMzpy2BLwj|k
z*7R2S&~NkS3t23$iQqf#ut(2Q-OtQaV{2aO?wr>@i+1L`-j*AB=gMjU{R^SDzJxj2
zRW)&~Fxr^;Om){CyL-;&zQ05G`Oa^&-Ouvuq}fFtuT`Q6N{K>^D^@uOUfsnxdG6+0
zlUF{9z3{A8TvWdDsMCUz^O)L`${D#kr%u|&6tuB@mBN<YajRB@e4M_ZHM2#$W{+1%
z#AB;T`zKBM9HRLrBTD(J`M#FyqDA*|7v=IQ_FdW`(s}LHLbYIC6P_PS`5(`Uyee08
zc*Wg$LP-e+pW9|vRbRLo&S8EayLR8rEeuC4dH*$hRKypg)@o#B_fWRbc6a`}RVm#P
z?(<4>cegPz|H$CDGpi@ATG?(LD_d^boAO@E-!nB0m(Q6tN9-8W*(IGCjNHkc@spQp
z^Y4+B_!;WyQuF-enXFCK>p~JePiQhq3IwQzG-f{Nh`iZV{q1blyj3EX&ehC6Drxxi
zCI5cKMUIUpk8OB-)x5i8O}(K9uZfYA<B_dg!ri>}H&#o@Z{EW9h0W%96%V_8*K?M~
z2UqV~zy0g4pqFO4@xNY7IcHIR=5T(+8^&YP^fx?zV;AGPS$&I!f%ze&<%eHZ>{@fD
zDDli+g{$J`X4@uB+mS1<x<aDEYqr?|`SX`^*=Ds?TSW%`VNX1FdQ#Cg3Db=wkAkL(
zT7DFK>2kdK*GK>N|6k4j_u(@Cd*h3jU!OLwx2XSm_1D$uPj9c^e%w6!rpxNCDSUr3
zURMPRezAOcPy35ZQptn8!uzkfeu!Z>xnt+Q%l}{YzP9*y)FMUx>z8X+b{SU5O`8#E
zV49nh!F+=CSWZxa+_4GE7{XRIzgTpYhlN|jM^i9R^{2fT=RWZhf0%3j*}qKw5LP&?
zPgINH%8b7mp?4X=cG~3fzrOr>dHee~n|+mEKk~n~`yrvJT_N1?;?IW<N7MKJKe$}}
z|JC^aoAvGg|K0!Z`v239AHV+pZvXH7|K6nA|9t5GZ}X}jNALfCRUTjSck}#zUk-=s
z$I18~51;R0n|(82TYLV$13`@byxW<g6c;5uGz#jj`06ZlOGdU^F8Hy9Ys!g>%M`_~
zn77zQUehn#u4tB<CJ^kiSkTBf+EZw9F4vjNh}fjW%L^Q1o->EI*`(fK$ZVDUJu~xr
z4$I}D69vD7%gn2bmQH;vuKsP&mq%GIp1l#S5<j@`(!<W_#Zy0Lo}Mr*Cv|<scIKX7
z*`C*lviqAKhL<~feU^Lk$YiVL5!)|!4>+|?>2H7BeYsn_{CjzM`SHsyODxj-6s{;`
z-@KM<z5e;}&p$u@`BGB1Yt8f0+b&&^0%zu2&@%cq?JBGDHqA1oIWu@Ix|mp|&X=0U
z;dsBmeABHtaiUC1mV|0_hON+;8RwJDoPC2`*+=|LUCi-+m&<IGoSwx@&{`TYg`+HN
z>&|_V9%nYi`buBES6b++6JgtY=GUYjtM?uJB(ptH(cWqPqs&_sGZV^XcZpA59I9rL
zSfRqs&u(Ax=E1JTipx%Yvp>1xk384iWmXUUzVp0zeC}$NlXlV7xmkzbl<IuDamLU4
z!HR|kX|9T_m$}Sgn-~jsWu@)7zDG4@5_=gV<A%r4Ti^aY>lK?naSHpCb8A2D4)%Jy
zcJA!9gLmHWEY<8h9DgI5PxgRE+uKwFjr2Gnv1P0&E9RVun8Dx4e}>&H%xTSRf%O7w
zjtlqmZD-%LG~J*%ki(PF)pT;^!bx1B(m%J$hD29C`+M0Zx^nsC#0BbCc1~KXuIVpv
z-Li~FZcdw`RrxIj9@{A=k1S;~W_c5Nk?|APDZk%t_re}oOP4xWG_e`HSy1_U`j54q
ze@Z;>W-z%uZi{~G`S{p7-{J+O1~CbFY(a7^ZDvi(8=j~x-;=dAG)wnDU}*N+e=?u*
zc>Z*UU05iqf8|nalA8PVs;v(hB|q^8>lx0do$le{ac0S?Ih~U=gyn_hU*C2Ad3nBE
z{ilAu`j3mlkDqy6`S;iBw~zH-pT4g%|8LyBzwaJ?D!F-WZBF_`p~?vX7vFBWma8ln
zR1_2=?>_rZ5&tL2^D?rQw$?LEo49|R*?sD<;J3O~>%Gc5bHtK(zt<?7Z(DGy(0j*!
zwKr2&Sml`C%&Ic}qnh`%Hi>VhiO}bV1v3t4Y+V&1yD3M_{)@5jyZV4x7ktl(zN|mB
z{%3l|Q@6R>z8SA~J;D*n$Z^Oe`|h^4MRhe5f4_XVt8V|R`!|1m!H#N8ug=de|6ToA
z^KbY5-$yU|&--`M{-3+$$Nj&z*Kb~X`~TnnAMgKt8vpN2`u`vDq8I=FjQ_vB-+%i4
zf2a3P;qRB<vt$475A2dlO0Df}*YV$<7?M6Of;0P?!^+SlSETw}J2JM~Y|)SsQOO8l
z(?7a+*Hek}lMl1RT1zksyPVke{7Tuixl`3nUU=rB)N!b<<7!av3=xOiO$tx#rX=TH
z^7*_)XscS2&}QGl=!y0cap~%F%q%okc3ru(^xurMfQys9JP}wPTalZ`s`@^INBQRZ
z-g{RyCkP$-RaX?}8t?RdLh6}gYc%(+@P51Og428HoSyj_x|;L(j{m;=clGr3ad-Fc
zs{i=u-Me=c7SE=5db(`SwO&8{^V3fimiBhG`|2wGKKpp~@#V)qi`MOu<TjS`v1{3M
z-DQT$LdIQFHke&mb<o-6%-Wusx60bDADT>@*~NRGW%6qSk4SCrm5r6(au|ZY8*glM
zE!xDBWH3SKcItvvE}pGRZuNKsX|BxB3tIL3u!Fnx%y53D<HnMwrbtfk*tRJ?`+VkS
zFV7XNqWp2bcjr#CV-$J&NMJ=i&#G@nEf(odh+}(m^yyvESIs*Owr=LrYPNmVR%Q1h
z#c0pNvc+xY*YyfswC=OpJYil@M9Frq@?8J*GcK-_OZ=2k&%St5We->C#g~n{<*zJu
zzW(y2VYYSPx~$t~NwOMqst@X(eOO+-cVFMn<GFfCtlAcz4|#A}empcK(m1oNXwi{5
z!t%v-dO!OvJ532u+$S8|uvGsO&!N<>wWgPCnL{}|XY)pHU7_aNcC)ZHCOZ9=>zwrd
zg~Dq+9zQAH-@(_O%%37rb8{~LQl)dn*FP$HJgx})y(751rsn$<v!^q;FE05yZB>(D
zcFVkSS00|Q;)WuRW4n%W-?Mn+=F|61%KrQ3*S&i~xo@zo<15K~ee|VTqR-8vch?@^
zN$~V`T9tZx+ooLCmmV%kL91u*=^vaQuRCu?Ka1MzLv}$LZ7&M5zI+rqX12;Ud(}&?
zTNSsSrE^y}+I@QY^kK8V-}356zv*)pXy|Ljo&5Cd<<ZB}UQ~R4cJ}bomocXtmk4Mq
zpJHivsch$z(pdfJoAPv@9euOn86S@l>)WJj4{O3yKj)s={k3-GuHZlHXY>pmFMn5h
z@OUl5@$EGs+8^gmHIbN_|73DZZI$tP#%~#u%vR|<zIMWE(Ue<fCOA&np)0WJr`AhZ
zAC155b5{Oe{!;SG`Gs~&>)wAk__Enn-uL*7Qu|#o^$UUmPru%EHCOlcw^x(pW#7ex
zK05MpvVVQVwXe_j|2X>d<-_=Y-Sx|s{C~aw@Ao+$?*IS(fBXJ_%l|)F|L@g$x%t=j
z|D3%)_EX>!cdg^o%k?kaW!Ja=`S3V@fBfHNG4t*@xOyi_hlB}D=+tXE#HYO?rdN3D
z^6+nIyUm*vSZCdg_RMY)&54{MYq?HAdY*a9xfflVf-fXLvpjS@a%<Ge#VkUC-vq)F
z-&TIsU3Pd~$?_Y*lDgOV%+t(bljf9O(0o*U;mM7|7oYazZQYb#<G}jOf@gBXc3sBf
z``+l5dkLR?dEop_#~ee)v!T0nO=cJzIMDM&uy@6c5-*)+<tOId<rkSUBluZKOyBqN
z@A7x*KU`#&ulsS(eSO>>tGVy^+LQe{r({iyI+c2Rp5F80pD#cD_^!f2uhKIi$aa0u
z)*m)Y9eG_JePs}y66hKk=*Sb&uHssIUqt%di>4N)OCk1oIi{?WS8+}5h!s{@^hk<#
zQr6Ucjjlo(l|rl%Cp61uE{hTgEicc|T`96M)O+gMD^aUMCK}8#cb?-}f4e&(bmz$f
zN_neeFKgBEh?mA*KF>e(ox}m3i9y+7nL><=2J`%dR~wg0nzYTj=c}Go_2z>RduKd*
z@sd~b;vTN~cWfTlqPKDuBG=x!m3IX`h@N6Od5`*%&daWRk2aom3%Kp0dgu=4FRK>D
zI|niijwS8+ea|u6jb~A|>Hi<kFQ;eu9@RPHE?@LYcdK1;5A*HfnkpIIjZfbg7Wr`p
zr$x;$QfV|&R$ZbhTV(aD=G4m*FBcqHwc&24hm2=a(}IB1>{wghU0$w-<wKJTQk|yB
zRXwe{`r#Fud~w~z;~6g%XG$gSIm|QvW#H3;@71QSFmsF6mN|YkSbD9wtGIFY)~T*n
zPMU4!$v$QBPFl%hT~gc2U3)Kcu)C+X#5eyeZBn+~ymP;oIbUk^ap#t&X;Ft=J-#kj
z@H(B%bQ#MTrK3}pE(!}NoFB9{^pN8Pw+ol#B+eGCm$hA(J2N5u(Vhrt|FoX!w?9J5
zET?~)raOgij@XhjmQOE#{P0nI{j_GIlbb%=?QVWlvG0z3+!m$KpvFsWL1v*(4;NX;
zP202Q0{^-G{)*3S*Sov20xxL=WozCFySsJL7S#+D9)s+uS)X67Rb7<5-0FgHWAa~~
zcU!r96UFZ@KDXcS(S`C#tNW>(vTW++CSHH{c6@mLvR%=8GG~s`_p7r#jdN5#{hxm8
zxBYh2A8i()_iGzp?2&wULo{sSRI}P2+ortTG*8~{$HVwL`+r4b3LbKouiq6XSO52Q
z{KQlHf1UrYs`~Ht{~xChe>`%g{-^)Hi}t@?*9Tqubb9}P-<#Xs+f{vCEPto|(<fFx
z`I;Y(=g&Lu@4vr#)%0H94uz~+wvE9(Z>`PU_Xva*U0HhYc5A}p((hk)U$L9V6Bg&_
zD`BWs%a*ct<-Owx0zHL+1zcQPwA7;&IURc)QZ`>XX1I*4sQ0|=lwBOL?~N*0gj>Fz
zDt{PNz4%7O$ynyi*H!m1rwhGJywYg-;*!)X^=WGz_`lk4uAC6I_R$<?uMEd1^YW5p
zDT7@#<{TpXii0=r+WK*lFWZvMlW)!Sn{zqo)SPhr_}VY87W2o~mHdD7^zO%Z1r}+3
zZu3@dU3a^z+-5%C^2-mmJW<_P?{{%mo>r8vp}&vbgI9+%<<CU7En!IDK6J^GUr18+
z;EpHHZhY=A*|^3eFm#pJG-+im@3(<rD+{75f?gC0{eC}pwo8FgeTGoBZ)Awe_MqaG
zSGE3VZ4FyK<>dhj8Kxtt0oGeL=4KT1+~B>tUTXF<SNFwzef^UEm%L&0pE$Kewp+0&
zUuW8_=<Erz{;XWPLcP3PeF0xomRzp$3=6}IS*_2KT#`RjcYLw<yk9bR<?ch@(sJ)z
zac^EI?%=(ZA@8O9dc`!W7dB^J)qV}UF`Hps%)#0k`S+IJ6Vo3YG+4!)FY!9cY~Ak3
zw*@7+yV8F6dvq2%wW+C1eyXBi{Pawv=jlm&-d4|QQtTQw3z<)ho_wVvLo{$*+o`Eh
ztZl^t6Rgh7W6|=OsS)m&6wSo5F>LKA_NSVUgz`DJU&)pY*6*oSJMct7s)~uR;=%C)
zMWK;B5)B&n*O<Fo%vhw@yXU8$p^I?OsSVycE%#oQyYR+vo>k6NgEjWQYyZc|Ox;^r
z7144g=;O2(e#ruJeK~fxM*P-e_Bi9ctaa+rEX_yZ4Kq9MEWXld+w743Yv-~}b2Cz=
z@x5I8cYWsRSyC!L{<(eYaFM^S?q~Vbd0Fda&HM5{Lzn3;zBFkD--XAke?MORyU1?(
za^p{m-CY79EFMQ94=SxX6Jg}+=^`GO%CT8RcIF*J2SGumr>T)elY+LNUOFu|+WdW<
z_tc;zUB7p!Y<a2IcW~Xc@>jy<D<6dJJH~tZ*}H2ySSRhBSy%9Q^NZY)7YzbSDlK<>
zu6r#|YvISzp7x+kB7KoYJm;Bzdv2B2J}ce*)%gEO>E{0|xu-XLuTWFXQDE+0()+-F
z(v;KAe|P2G4*PQa)2C+tV87EJzkFOi-_Ca5&)@NXU;Vp!y8h?-f1%I5eBS^2>fdhh
z(+@w0+y6be|JU^YZ~lDwu{!?mx7GTf*~97oAFZ$dv$#J_HvU?F-IvGa`TMH(#@U=&
z;k8!DrOj~7qxQCvRW06yEzAe}w9=kG`sefERl`EfFgM#7*S1<2Zey3YJS*mR$8y!>
zvn`e{oqYU?@`2jG=_Y<&&8**b?b_uorc2qpRd-jPYTPu%WeL|xkF|Ug(!Ta}w`=--
z_}um;RxZfJfa~f1{dfMpoUY<oc9(OXYsl3zHQjfNvl+QqI^U(8fBCTF`<!|I|DP^M
zJ2z*Zn*U^91{SUrS|Lxxx_9fY()x67Hvjw16*G3Hhh7tSps4O25|)zYvSaqlRi~og
z{k<D?`l0d02i?B!x&KM++p%<ggh+0#lSl89WYNEiL^9^c?>HzRU>GRXp1I%p&YS!^
zK6CTkhZC*dEW2yr=hMTmXwf2PsTsn-Sz6+y%lgEOW4l*&)hBhN$;91qOHAep<dj^s
zFzK^e`BAkkOV+H7D!bs5@!k1M%8Mnrp~kDWMY#o-uo<tD(iOk2%v5!ak$vLS2E|XM
zXV2V7ns)wB+%@;k_@pW8r5F!?+$jBHWAfV@uP1F@^^}Fd;n#$xZJdU0%u4^NWlmPT
zlDj=o{>RO$w{BnIU-)(BimCtamoLth=n9V$e0`~N_oiKKQOEMR-wDPYuZ};vVY|lH
z=ao+SQ#)rn{F)lP>hi8HTfeW)_CNA^|JO|!+@BjBmj3+vCgaF+*ZS?zFRC``$}d!3
z&$9US)9Ll!dULo}olO@C`(E%**m|@03<W`zEiDrE@)cj?JKK*ae?Fmij*-hEN3?pT
zyXdMNzvK?E1iA=J6uHU}z`z^Odt~wL8Ie<6Iy*vcYZ=U&U$XQ(WA!qFqvB1M?RGEh
zS6=r&V_WAmCV|(Ri*En(S+j4-G#lB;|5iU|PF!Q9cw&om*5=;@vmY)#o%;9hKFjKx
zPH~1GKUV%PntCHgUp2kpXk^~CS#L8|gxv~1DpB~o)kOXv|Lwv9nFlyo*JfziZ86`z
zecR<$j)kXBzi9io-t6Su<FB<e%Cm!#qJB>DT71n&<jfh5lN`LBjlC-#e0=-o;q0mJ
zCVqb>b>p(h-ksKS<WJd5Kh3IdtYEC!p;%GNzas1kPv?ZmiHt!8{LTwjU($D3ut4j?
zmfQZ9^W4);|J)RFE;DxSAC09?N+&Lw<hnWQ;<ksyPkIi|NL~|h_T5*Dck52cvy}Wh
z!Su<!r7beuy~RExJ=xsiO`jz5i&|xUdz0#od3T&z-=qq>v0tV3oJ%dw-mmq4{Ov;r
z{ynf-*m&#DCt;;2bHY#dCq?n}zPJCy5`0Vko!#F@r|<u{tN;I%{{LSOA1=24y}5o}
zQvJ{H`s(89&G&ud>ptGx|Kqd$-w*HZ>ev5Vy?+0{w7LENum3+CZ}TRP?|Z(S-Oo2)
zzC?UloW9(PccV~oo1&CM^|NZ<_cPn2g#12<xMT=dm0$jKW}eLno30-Xcg$;bO}Q_Y
zx^2(h>Lj{2#&CD`%uC<WO}R|F-lWgqT5T~wLg3r4hUdlV$2V7;)W}jYlx8cgUa>V@
zK2g52FI4Q#n=R*4+-Do?G`%RDwYz)rt41FC8!U%JU$1f$<GA>Ctz!+(91Yhk2d^z$
z?0EdPA)Awen%pIe-ph5j-nZSg@JW^u=I@;3ylan>!xF8llY*AKDbhQ=NpI(xr73|M
zz5G=#{hAq2JDo@2Kz5b~n_Eca{~P8d*JU!6EVDDpUQ=aUym3wB-x*5vQ~rE>A^zan
zb3uj4+<&qc&HnoGpx>ha(`x}ws%zhOUDHwh=UEfpurRLJVR^35v{f7(FMPJ7w+8H9
z_;!_o)9$_pEp3N0|F>=VX{T8hePd@}weF7C?(=J}UocazDmP1LUvfX~-?fF-JZ5ue
z{B2-hf88P;FXthBa=D&u+}u3d<EP_~2Yx*kbLT^AZS)q-pWB09rm|dL9s2WgX4>kz
zZ@3ccOcxy!KbrqvaoZ&gy$88}`g?CYU1aZ+(Dmi!lNHsQTxJOIG50F_Gxuxup1GHB
z{g>RW(g&4XX$?y<yt0y({c{oGw3sqe^4GVXIqPG8O5adopMCF4;SWx$g@;PzuLwT;
zP&xnF&FJk?`>(x_5ZU>yoAa=>f5_4Myrm5+3vTfD>~?u1c3)UtIP_(A>0~3L%X#nq
zZQAo{!bV1!8&kq#OLvz({Nc>{k6YdGvl8<@CBdxOr_!wi)?_l2YKToUbTXL5u-wqs
z*^6VDT<mR${ieUJyj*o{g2~qvca*+Ft^f4uZjR&2s<Nrq|7Oh&HE}&JT6x9Pctua^
z(=+eut^ZG(e`@(|^Zc^y<(cMh-EHo?=lj`vBJ1^>>#FYcPCj!UDX*BW7@7O9X5D4c
zWqy-8H~aMpU7XT3J2T}NOB<_%MdiO0zPm0jnxwjZ=bD>mI8I!dyG$@{mC`b|6NS%a
zCiTBs{d20^|2u6)(wqyX|M`<3+Mg@=siq~*evMqU+T)+Ki{#gwNO{f2*do7DY}0?E
z*H8Z&{jU<7@a6Ho%l{sK3tjhX5A)S4pZ>f%{JqOWVCigg`Q;xA{y)2X`sJ71^*_$;
z|9}7g-Q)80pVaHWrT_o^|6l&UxBLH=^MC)|e)r$Mckkx^-yC1_{q6Ss|Gt|4KmPOO
zzjq(j|GPc^Vny};i|PLF@7LGcb?ar^S`i|&(&^2$YrUr%S1@=b<Q2YZ``+f@(xtfG
zyvbr)!Q@R>4U4@uAKEsrVfDwY-fI>nTV}c@?kfFs^PBK`@rCP40;TKfnBC*NH~;yj
zo5uHB+*I&BL(mM~1)Ey6CM~;~c~&t>{9VHhZGm}z<8`h61Yg~@;oz?91*@LqD?Lb`
z@=iRfd4tyRWScYn9I~sIx^Y!}$%$4qeI=m&Op({w%UMczxy6_2P>Y3<Jj^q?PVHE>
zXIY{7JHE?#k>5{md7`vx*|f7Uf0$0>HQzn)W?$Ju3ky#{*WJ8EW*SV}uDo6QZNn+|
zqg=HmU-^#m+{^yIN~+m!7OUZ>&+-?AV>3f*exGC8D8$h@&1y=L#?lTEO{sdz;|C@<
zT=5ajwsE?=Zp$?pYftT1`*j-}O+U<RIUi?Ru$H+j#($D|Ec2Hqk~bEGemLb><@z!E
zS<`9}`x-U%weuRqSuai9|5fxW+cwSk)Enkf&%b&fd9B1A^(&t@U_X=iBU6p^(+l?3
zab9cQA0K#o|E7}T+0_hj!Y39kH@G%`f$e<*hxGqFxAx_Ubw)i+v)iW5<feRn9h1jZ
zNjpB9-b|^>N2UnzcCsyM%;ezc<9=sbex_khxT}!}>z9CRAvX;!jes)|zMM0CR{iJP
zwdMVfcPqudcT}7`pZSRC)#iU$UwfiIB=@#%@(u0Tmh1O-pM$G{gR271u618582|5E
zRm9O5AgUy5_^4F**wXNWJES%o`%=Q-wYq%K-v^PsH||?sFs?S={c@|s@Aj)a=Ud){
z+rGXgSQ@Dpx^oxr`IEBAFFKQZSQ?ZM%&730bHJx-iq6L)D=$yo*R@7pviXdg^yLTr
zmu#=d#$H~`cF+8omr$nIg=wGJmT=5G#p<8`wEU@|{o3jMr;pEn@HSVD-J7|#YJa+S
zRLIc}hdo`6Hnl(0S9`SS)veWyzoIW~C|}O7K&>yLNQr~@@DBN6xo2M&E!tNsciF{5
zDeBss>sRmQMftcMnA{Mu^@sVveG!dq8m%9)JOj^iHLd9AS~V}JX{pC$UEAusyh)XB
zckT>Z{oc&{yFg^FgY=(`Q`hcYH{*3$>fOnI=U<Tho#p*GexvmF{>bA;emvt>{XNUp
z{=wOX6Y>pA4h*a%nx`M{&aX>#h`&(q{p8`29e*FKp8q8E@3->!|4-xpUH$*V{=a|y
zzw`ee>HqJq|Nj5y|3A0Qzkjd)^?AMB?=O$z|NKe6U-$R#@9*;ew%31_x2rDxyjeeQ
z`RsP{`E~z(JiMD3Ip>aGvd^JYDk=y0W<OS(<(PWNYeUIZF0F%V7ZqL7ZZ}N++VSo&
z|61$o)st%eE=<d1NR)oZUwu;PU%&;|)BJh-b6>sPzHZN2b#sgIdtIx#I^CRBeHXpM
zy1?m8??juf?T=WJu7sSOeco*Tk3Z+S7Fe|Q-blX4nb`AT<2u=>h$hnsQQTtREB1A9
ztlAWkV`TfMx$OJ5e*#73>Wd7H?%FuRa>9|my&fwC`ZawsZ!9g8I;PqC*q}G_tZH-0
z*B&MZC+48^nY(^i#1+g<*>?TW#5-wv>WAK*Sod(<>~%}M$~Un7WesJ2#dCVywucK}
zeAM-k-E-vXWu6ej;F1)<@YO-REFlv^mUQqe6`B8{`$vm>Ad_ic?5slTuNSuoDqMTe
z<~M<jBYJJO{*lhR7Jnl%_m!@k%gDFAeaF_n0gLbNzmWORdnaqz7S1<a$uEo+#y@5F
zFL?fg7x&?~TUWRfdDi`&VKzU!BQ>GAK;e&^|9i2wTlZR?Z`O>rbzaZ<cj}JMG1D5)
z*WK%nU(e#YO*5;LgU5HJ9p8;92aGfJGV=OYc$x5+^kqt2&XaLl*7Aa5uFDCBoP{Sh
zon0Y#xZtzuw9hA&W}Woex{mw9OvOcqie#$A=6C+Tbmh)vL9dLp2VY98nf*oQ-h>FL
zFyjD^#<>e`>xR6R-y<lLG%@}5cGq?6rFV2^Y6%6Vwh0JKXbSt5UH@~J{L-BhXRZ7A
zl%xK(Ny*BK9_Pgmew%aeXP3e0Z7jR=xYr!keWd36`dju_7nM8zE-;)uYBfhvm_f;x
zD|u0(0~@Pml*%Ta*-I`QSoDH7&v|pho;jj_?+M+X=iR^U;@@dO4@E6<8TXhTpFP3B
zAuU#P-ls0@7d}OvpYK*wTi?C&HvfHIdppabNugab^ZnA^KH@R>^lk5p?hmWnZe4q^
zaPAEKMQjt=6nU8$x;$jNBE34<WGW=eJVUpx5Ig<)m9<Ln)k8|J(&d#BE<EA%<m_!Y
zae_<dxa$1w{r^|BJ^Q4z``p^x$^ScMt&&?|b$*qzyglcV6<sceAFZCn5HoAm3;Rh8
zccQk5ezOjIeb8wCa`qqp-zWY3UgrNTStQ(a-JyRgY+bI(U28fok}&b>-QD?&9SVOR
zy?a=<-M;4Q<n?;}{rmsij<5T8)&KvW`Trl4Sp7R2|F8Z3*ZSYv&-^&5&L3a%@1(l?
z-P-@(ZrA^Kd;R}E{r}$<xBstulDTK+?#c=aySfvqzeC*i)+iV-Fbb>WUM@fW_NblW
zoxhD)<(%GU-U!BSzudO|#>U+nFE9I&wMOIh)tA{H&un`3VyD$4^Y1S|2X5QiGqH<z
zMfX;ZfSWE%xrU5+8hUI==ccoEYUZ8sIVrLvGlSz~%x3p%dv<R*ntE{S?rrm#Z*4wg
zb<QZ6DNA(E^H*i6|E8L;R|QEbD9z?PVpyR%aqW`YJ#JqXMa>UvKG;wm+O;k6N0{<#
zr^$T3*BBY{&vBDu^J3=hyyU<vtUMu-`Q<9%$TJERT^Ucj9>;a{E@1QUd!al*drjfp
z_OPWLmXlVdr${I%1)qMgdit!RdK|qMVnV{)S1u4)Zhz3NuRTGpG23A^@5#WRSuL)i
zvwwzd$uWxwe^-%k<l9NJYk{*DeVLy9+~#2K@`&~?H<(OcpP6kIyyn}9pM6Q!d3K+V
zsl7Hu-}iCI^2g^>4Z|fGe*L{Ptt#}t>9xiK{d4qV4~DF1?~dNy_FAnWWb(uhqIX}3
zX<zYne#7q*u`8do`sJl;sq4JU%9mPHiJiVG#gcR7+&!ngs(0OP_pWk~*dW!rMagXb
z<*@A@k4yrjDkUyi@IAKR>11Ez^5Ue~)xT1^j96Tjsj+a^EZz2qsnNqX`|&y#myade
zy7adBn#=$4VscA4^5v5L@8;)Xc_l*s^>!T%<^OOg`}Wqv@~u&&AK6xm9xR;oL_ci%
z#lFWM94|6`zEY-L7P?=^Ok2N<WzM=;m8rk(1$eLBTKCQ6xO|0k7U%URwm)jp>bI76
zo%CV7aIvh0p{QknF@urXq7(-g;ln0N?;couc9&k|*`N?nmpwO3ik&ikoT^UEeqL~(
zj3<7+|J%Js*8X~bgYn+|1XGqPGbZUZp1KqJ`j(4P!=nkkDcM@-aT4qs1ang&Gv$sy
zJe&Cavj018z4!O`+o#*7t^K6GR;c04&Fx%wFYVs==B!Uw`s&jMZ2TT?nLX{a&Z=3y
zUK1AvD5X7{!sex>we*6)DlUl`3Puqdx7ilnw-z!`R_f98-J`rv-Bnv$m(4|kt0BiT
zWDoyG9?|>d?tHtnI#y*R+q>W3wDgd1SYs`4SLA_J__qk9-RHj5>isyR$M-W*Kz`q+
z3E3y!*sHA)t3RUf+`Z%FeD|t{^$#a4kWW5)m&5y8PxaqF<>fE#g|Fpb>mk+<`RVcQ
z{Qq?~d=FbJnf>zFw|}eW*L*y^|Myq@|Ih5}zFyY<_eH$^+uiH?f3M#E=e56GmGP(f
z@A%v2|NXLh|L@=X|Nr~@IR5Wf{r_K{$Nzhzp5OMe;QO=3<@x*LKM85@EGc-*tTm~9
z_H}{PH+sx2&u5i8E~>h&s;~L5bbR+ho})YS&d>F@Ahdtc`J-$1ZK+EB-Z|ZOQpMVe
z?8)_ao?M9jQT}wx7tZx<ev=9wuA0%A75KqHKal%rPkL$FKWP!3vq!Ra%$=Mlu+D7P
zmsgev`fq%uKHMGn=a%v2y!4NzTC#ax{%V-7c;_v;U3N!Odd%T)fxL*)?p3RnMtWT}
zoGbm*&@KKt7u%<nqz{owJ9U-WG_o2JSnSFay6*OitH1kjm`#J#<#VVT^BhL4Z(Cz>
z880?y34Q9SS|rLO>1fY(WvNZ%-8aALQyfaxr3TO3C37l9_won#KmoUa3FZ!q9_|#?
zboILO<H_YetL_E9W|@7V@qdB0%R5%nuNANKrt%5MeM{cE=&pItl84u{BDL39G5$N&
zWL#QcyY2TOslA&+H*VQEZ@XvG*?R}|b{sD7-u!NUPHygna)WDX#i4vxZg_1wS-;`j
zrbCQ>_qaRkk^27qwdU*4i|fxkws!m`FX5qRxB5P>)qkFx1J!$eJ)PZ~R=g)v)9bhB
z4b~C?hQ`3P(>OXMn<pE8vf#Ts;aH<^aZ=j;hdL%x^mZPbcE7FT%$2Hy(%&K4_mtnv
z3p}DFw3l_EWoz4_2Mt{Et4}wnZF*<5@}K6t7Zzb}-_K5EmabW%y`OpYwJ`s?N(aqi
z_bV#>nlSIo`oBL|Sr*(*ToAr#VeY1+Yu}TuU76C6+fdnRxFTR{#<gG8OzRhB&P{sF
z?#;ZXFXKgm$=d73j+yWU2E2{>GfyF@Y=gd_f*>CY&j}CVM<$;v0(MPFjhm+0Jwq#N
z;v&<H9jkV4+n1AZ`}4lUy|3oQfA5=hRhMInNAH3alZ$0;Deu^1`e(OVnwNv}Qd$4<
z-?M-J{b%4O{Ep3b0?!5~%k@PwyQgt}63^#bu5b7A?(N5gnl~3-vCZU&U&(g+$Vxp$
zC$(qivt)v#j%V%{7L47ra%E@Z#D(^YW#9QI^ZdSeWlP7DpiPXkS1B`Ylq>Y$oMEUc
zC3LvwghR_Q4?eCn#^-0LiMzUGD+g{|`ptrEEuU{AhcG|Ok{O?kjHM=ZuF&7@^Y{9z
zwA7WN3H@w4y;6SHUj5lGc56?C-@X$Me;QVmR9tklQJ=KW<ln!^qOa{INPIiKvbSzZ
z&)cIvi@v(={Uy6c!Cd-4m)ovOzu(vGw!Xu6dHU}o-}$%yzWd>;{hwdg_y4-O|5y0`
z!}Y)Q|L^|){Quwh|Ly-D+uxHdv;Tcke184Eo8t59zx}=a{eIo|Z~1jUXRo)bp0RB9
zZgcs&^?y!&tT~hYk=y+-j~a83inDu$RFKHuYv-&#)y-{S*dbM6R%f(ri|>A$E%Vdk
zyfe?RpVZA+9r`lj{Dih0>t$VB*KhT^va8eOt@xRB`kFZ(B!injaL?|HG!$kP3|Kqw
z*|(VQjS^L7rai9yGu`5eT};+T=HJ1OkLp<ME0dkI`ljiV2S2~<pV}I&5*EI7Yy8Ud
zreYF)!7rlg>-o%X`z}2oJ~8Igtm1Q))8C|KdvTqRPCUKpb40f2`;QO#6uza;YFH&|
z=#cD^&|bA7yJXdgQ#*g$Hh2AKACj)z5gmMSZ^+l(Z$r*)aGY?c?b)~YvpQQj*#3na
z`Q{qC;LZ1V=hXqmmu4o`hFF)_2K+R1$eL7Ro-$SFt8KT(#076+rvLb8-SqnU+MKhe
z-(=T!#CEfO@%qsqZM^cJR`!`JpO4vpi?4dlD&5)XyD(wJ!^}O4Hk6r4E1UXfu2>bN
zF?avYsPE6kHk)nLbzdzhwXRKSL*_KIv!Rzu3%)K4TK@F)yT)lDRY%Xi$)BuU(e<}L
zH2-edTTAK9o`+uDkc{`qyUqUSy+thRHM4-^|ECu}`Ff3$r-$?E-e*TRrmAr`FDiVJ
zDAQ)xdT_3#p`p_3h2Do^PO{zO=V6v+R1mz}w&jz148PuS-iEZ$uPf37`%=G7t&)E*
zk3m1J?aISw<%x`|mhQ3KuAL;X?pxjLM5gT_rmLNfOYGJ9QgcVj>Rqo^;)*Y#2Tw3<
z4}QDNMPt#`<9ves4?o}kA)LJ@S7dS4wS^Ab&K;`S>-+Y=g?7K|$KHxh%s=1%l&|}m
z<>5zF?|U9}MBgz!63{3xaboSd(<TN+!oMFjO%RYUP<+?1ym7gU=hHdGC%GEd2sKO1
zKd8R!?MBwC$L=5a`{mEF6<+(ymTvd^Cp%Ru>~0E&1*71_&rAPw@7gkb#th-*i5`z#
zGUp#_zdgJE-TQZW-+yPy94qu-mX2m~3_Ot?siHsOYvS~CzYDBZuACjb=BwAOUXQh{
z@(O}Gzbt)uG-QES$t4Tl*SlOc<?r}$!R4q(poXC8qlt_RJ)uHpy-#W=30`>3-Yf3J
zsj0}Hax>U0JFfa_tCFdhk+X_#slR@xR*-+Si;Jq$qT9dQ=huAQeBSQYl(4-r_GhLq
zKmF(7+uQliD)m@HJWMX>y*;&}ZDURCG(UcZ^G3(xn~LJ5+`e&Aa$Dhp;x#v>`h2lJ
zD)YNP?85(yHMQ%*9DbbFH#{W2@#cot38LXqT`y}Ye?5D9{c>Hsac#}{-M0_lJ$rrs
z-}e1~U+@2QGCba9&(9Z|&)fZeR=)r5?|Az=_ujqd-#!2D7xnu8r{iVykKh0EYy16w
zH~s6sJ&lzr-_D<ZZ2NuPqA!lCoK#NzC~%iO683i1!KCcQ$-Gl}ct5#@S4}x)%)s-I
z_sc5jHA`2|_t86d(=R@|Gl$pNbM5gQiJ#eqF2XmWee*lCBeq{pn0nz^NNJ#`;m4n~
ze=`4GI;~Tp`_C-El}%B}@N50vt}nMc_67=P9`Px^YFjqfD5`w+Oxx&+8=_g4MLBK=
z*+pOQ&VPGQOfDzdRXRfDs#5CN*PHY{Tj^KEeV%f9%B2G-A4P7m{avIKw8d}3)#Yxh
z)RL6KHg&m)<wv_7-==IAefNga4%TA=?kbxmuCCgU=dZQ(0qfp9QzJvCtdCt4_-A&N
zp<>fY2_=htXOEs^Il<-jPLt=U!;fNRoy+0DR~Z+kEpzR3ong9EMalJ0?Q3p<?m3dV
zwlSxTZeLokPU`$?^)H{}_}9Oyd2=LqwoMB6gstIauC>n-&U$s*_OqG&&dgc1aq=O-
z&=Z+Eyn7s0v(DU5)atOivu?UzLcv`2t50X;z4LQXVOCLcsL`CW!NT|LD}haG4<0>t
z?Z+g`i~ICq(!=-aT((N}JMz?4I4b|=;nKAGg1Of2ze6={xT$~DP;%R<5Uj{-Xdui#
zCrFA-uFd4f7qN-F0arpy#5G-AMHo6Yd4-Esbxu+IeP@c$!lE^|wR>|H*1O16Fg@(y
zw4HIdOk#qd@aGeUXB=Y*pLWARC0>5+dYRa-6K^{=THjI0W76&NThEs>%lp>(MHz`_
zmur2r)Y_uCb=8gl^^okT%2%{r-*_3z7{5NWNR}_{arxF>(FpdU4bQ|Y7QWgjemN++
z?f%)5cMdu4e%rsedZxt08Nw1mMTZ&<e^#*xND4DB^E3!Qv{*8+Vat>`#evKZ&V5cc
zm073Pe(-56hy8<~gXhYU%htxfW?ooO&7Xb0zcl%Y0-y4?qs;1lj}7j=`<BmVE`R@i
z{hxyJ>KSc{3mx@#&6)mn6OZt6Mjq~!JqhiM@7$u?w7O=A22PwK+UOvXX){qWWA@LT
zs-4H!U8B~d2nzby_#B?1&Tz!kdu4<|2}6vU0$bF<j?S5t3P&ait=iC4<t)%*C;rP=
zK~h!b*pVqmMWoiwSedGFB=^LQmEUX>XU%!lpO)b>jbnn{&sW|5AJ+ey|8KVaZ}IJ!
zQGW90j+ei;|MPDB|6lk2f11AEM(%m(M$`5CW(JB*tjl_ILw--(s)bwmes1OdZ2J3R
z>D0*iZL)U1Xa0C{+<$$2$kG1uFAD3QEPA+KNc!E?mw(x<*5(#uXRLnu`10-hb=&KU
zE#8#v{`=Qme*fQpx9|TwZ2#wM{h!(PKabAuxjg0bui5wie7o##H*4RoN8RV`em~mm
z|2#ha-rnCIX5X**`|+@zqJR7C!*>rC&htLm<y*&`kkar@p7WckTix<l+xZ);e!Mzy
zY}VNY+}nAYvo?SFro1l8%fx^4;kO@hj5Ts*|G2T<?QhntvJ<)L@BXhi`!Qna>vbnH
z)ffUkZxGi`Qc}CRKyqosXVt{h9}A}cOLP7he6jsU=?%`=HCyKX{kLfCsh!(zzT2EI
zW9_Q_&lc}7>dRTUP3+6PSJ&1)6q~E$l^k}rbKC399*kWtW_?bLJ$L%4&HQ7DGM7%w
z>9CMo`eboN;BnI_K}F}X<F3z{9#f<lxnzad)HSNvhHJS!z7<XkoGr~I5m@2zYIe7a
zgk@NtJePk=b?b%7Zs{CTu8VAK8*@|IrISs+U7qcAF`$=|vzMdB;YdT*0WmIx3tbD2
z@HPmAnlLgf{Pj;VW`Xk|9aT;-#lW!AqkY|<0$JuTy;>jMo3+#Bc1x=B|GgdC+O!@0
z?(WWMd+Zl}Z=0XQzSCyATJIf?IGyx*#_u%KoSo;^$b5cOdHz~XUFcMw6=_||UMQYl
zq`+18Z-#C0QJeEoZzdgT+Hza|+73mo-S>U<Y+g#3>CJ2Ea=nwX(zNXRD~^M8mfv_%
zI?m^5Z<)S2q~>hA^s4V=;`zNkSFXjby&kcHt-*_z^`yk*iWIl9g{F$kn`U2L+OhK4
zqHMjT9hwS(O^lJjtF3jPDIApjIJIj<miDJCq0Q&t#QqbQcWVd#;(h8~KFZ4tdoLR-
zeI0%$*JJx>_E~d(|Gf4ws^oU&%3UAyUu<UlbvDJfV?o5Tm2A43KQp<pOw5n*J?lH|
zT|W<V-kZ3Eh4CLETi%Ertd2EF*zdnp_s@^g-w!umxuy2#mXS!Z$h9!74wrYko1Wxb
z7cXLQldGAK!QjB;|H7N0#e$*F<?sQ{gIh#2gRY2NaFP+)b(~lI&L^WY8{F<S=DvIA
z=DAzTV9oynIo|hrGICsR$g%#rP$II;eCe4?iDR>8-|m;cSM%+|v&rqdcmFlv?YwX;
zb!UuTzyEU6^_dT5-+ozOU3kc4Tk;Ock48aaB89hB|5EpyH!EquuH5&JCSKHCSfi-o
z`Z?;FY4M^&b3da@mzC3likuT?G%z*IdeYN!W=h+UGno>J4D*wZmdTo^rnxDZG5DGE
zFn{jy<8|UL>WX(atzRpt81<z;`_rE}Vl0XWj~rz=Uh@Ct?DOy5pZaNFFMDs!x1(X+
zAHICCh}X-W_x$Ok&$Eh8Mlv`$*51?&x>h>%O}f^~CM~HAT*^xtG?RVXGn1=cU*l?6
zU$Ua&?A`3ieeztq|C8CS{^yKkjH%Z7FZ}1|-|*%CZ64RO$jj}?y+8Mx-uh=kv8jJQ
z{M)^McYX2iqv!v<dAL~L{`aR(XW!R-|97{&-+r%+ot*ykpSSb(|NVCR{rx+(ciw!D
zulf4({l33d`(o-o&t6~u@3O!BuESG~?f%{Vz5ji<=C$kJJF*V$%ukR}c)^y#S9!bU
zKX1h)>m${#tM5)aWE>eLJ8OG(9`~}3;oehSr1ctht@?cYyV)+Tz`koct2ypQdrdd~
zW6OD6<btbO+@0*X6%nr)v{r27)w`4bV3SN*T3XVx2_-qpDsJU<SDf7CC?2|GE$h0?
z9)k7r+upqj$w{8t@Wj!#$Mnr6=2yF>USO!tFjv*eKKK0RrxH89%nwgDtnbPEB%*%h
z#3rrRyYyZgo&WsQCjMFJz9)OS!oOK&Yeu;=#JXyA_^@QDt!ij^rD>CO{j>&;Mqq7P
z@RJV;TOVnPICcEV>8Y>Pt9&lLN`ikwlh&!@tBgZ~ofKG?Rcw#pPPqGggQrXux8qiY
z^!w{q{qpbhnBmgE)zu*-DkKu%Eb{#7<YSYMe&<|vY;F>_dG06eWg*o^Z=}z@wr1_^
zwcIP#UR|c2JIiC`{0ZMCH9b?i{j5`uQ$5M4tjXo}eAZAIg-=GITu+n=SyotY+J5TX
zhn?2kiy!7{wf;&A7KpK$e0Hl)MRA|A_xCsKeQom|Z<+ezI@g?ce-=)6y|tR>_l8Jw
z$J*;=k}Do=UUBl5M9>DY^D*B88o!#UgqbQChc3v9?^~$BtYXTh9J=ArZ==($#bSb{
zE@^IlVY_c+%z6CVL@+o|)Ra|ls*q&vvIRj$zi0U>JZ%#emomI`z`}s{um#`a8D%Ro
zGyh!l`f({XDpxN&s;uUto!R9zr7T+-T7}|t1ElI!IR-i~cA7NgDsIfaAJ+QBIh&Pt
zi(dWR!24?NWsNMIZb#*uGRywn)Vo3X@BSv2UEh|)*)H3-cKu3KcWI@G+67LRI^Ps<
z9pkv+H-$rp$(XODQ(;%$GR7W8i9!z(zf6Y@np;=>K7J+S%z6ejl@=|{qq`P5a#xG}
zTVVY|bovc7t~IAGg{A%OShVV>%{ik&sUWFKTW;UKf4~0I%jNUyznq+X`|uHujHpdh
zoLVx%u9e1yq=uf}U8}Vq>Y7WD@x%iP%Z$3a)~A{sO8%;qBb&Hl(iYdPQc^!0m(H5=
z^-QOW$jlQFt^$o6&g=pkcV-<8>6t9BTcdBT)$4dZ&$A&eqP;@Ammi$6y0A>IS}#(4
zk=fn$fTvr!ZvFRBXj=4hclrH4zh=jKR!T5V`)TsL=>Ny%^@SEO&mQlZ^ZfM3k3YUu
z{4vmfK4<!(Cc(!{4q?}f;-_s|Ar{a&$C1UmZBng4gO<qbmlaMXt(#_EUddp%t+F=7
z-~91=xw&(9UicUI>Ye?&?-GBWcP#p={^#>YcT?lox~0=gTPkh$|9`f9ztx@ks`}mk
zpM0J#)>wJAQuhoGga7f%@Av)t_PF2vq09Gtxo_=%f0w`i_pm(v|1b0TGXHKq&oX#h
zez~qbM%?_x4YBiGQj1brELax^p9o4#O1l5sw3c`8flF5($<5yyvbE>7h)TrSxa(RK
z*;S(35137?&Hwtj{q{2MZN4?~>x`GjYc+)UuhlwcmLT06CH_(7b7fZu@1u9_OGWnI
zIBD`W&_%6MMC-=aZ*|tJ&N4Z(GapW!ryti}?Xgvr<;oV`&8sHli7uHg9&tXa`r-eE
zm{X~7Li1+`SvE@?xZ;u`rS1}RanHKfM(3yhEV7gLe}4I8kzKss^PeU2^rx4`s;r+B
zel|<wo81b9rPq#oc-h5X@|ahs;~o1jxhG4eJbCV&tGgH%+CEk{lhaVlnqz&>_|mb|
zO>9TjGe&T%TF`Ru-;r0_;uf0zJC&t$Esgc1-TG^j6%?3fr~jR0we`qv!`!a_VG|O6
z|1`cRxQFH1!_uM*b7j}pAJfjy^YfUye*5j$IXfe^W!^O0rlI$j_iFy4BXc;kZyi0a
z(*1L<q=&`k&F61>7cRWK%&_$}`_$aY@hu73iQVd}-c01>{h<@aH6iDAP^dwQEc<t#
zWv>#K$lrT%?d|>S#SyRB-v>U(sq3$Jm2>FOvhK|hfrqBGZcco7*5j=4ROWdR$#?ho
z^E}?!s3r12flc^gj0=m)8j(O#6*h(XmM!e_!UUslJa%<Gr1^-`BWRAc(zOMTu7<sy
zDfecD*fGVA7E26!op13T)+^n#?Dd@MZ>McbpPBx5GjFz9boi_M?Rh^cGdH`G1YEnt
zvSR0pKwi&_8Qg0ET5hGs>mIazT=ltc`}>A<Z)(@Co&HMuP~0EZ^`&|@BCU4a=3oDu
zzt728t?7fAfsm?SOV5l8?O#s#oaC+zf2LqC^YO6>28>1yObb{JpI~n4%sLlz<;(^D
z8M%u@=l6Z{VOz4!t6nnd8}p>5ZJK4ZiMucBe7YjAA?<@uky4W1VS~5J4!=En_wL`j
zhYz1kv=?aTnl^PNgT}=j_uo%X6+8VpRX0^|%GFJhKArC~4GgX~dWBtZJ9STEX2kS;
zT3L&3Cto__(r~eGAx9^>8XE(rmJ5U7tFVa0pG-gLmK)eAbvNvo)qd3RB<m43iDiw)
zTXr_wDn7nq?+Ov~TgQ+2v3-vccvAE8+3xtiSMQ(t`0}y7-LF4&<@NQSZ~i?#zxVQr
zD81#6A3xqMZ&&lV<kz#mm*3CpyKgtwZ&e#-#(_kiGrND9tl8Y8dPJou>&%pAm66wi
z!eY|TZC*Eh(sP}iE2dj7tCd~wNR3PWNnZIq;~;~wvbB8Gm$aS!*K73t-!bujJ)2|S
zUZMNh`rG&Zzp}P5w^@TZSoitz^80nach^hQ|5M+6`0(=iuiL-hx3BwAP+nbaAtN{6
zuIBsS@AvmtmltoAF2DTt?%D76Yk&VceEEId_qXTw)p#^BD)mk-5@L1r=jB?vSMZhD
zt<8Q}{2naVciFLQ(!PJrd*?y^81|3XGHnc;=0u)dEPZBs>n@g>?a!o+1~2eE|7y#g
z@b%ShLQP5<Gk)yxjum1FENg#!`h%BPFN@p5nokRBz1D7dzT?Bk)}B)e`-}~B`Fz7e
zE6Ys3ycL)*Q%E3g{iL{uBG-MJ|Np%8JNe3iBZ<AHlAU|lRJJ?X?^e6IX2m(n$3Guw
z9|>BLax&ZQ&9u8)q=SrF=bX0E|6F4!>wo<5)0Y)hR&(c{TfY4B%a=8FvC~pDRcD_!
z30bg!QS0YnpT*a%8+aAAD3=TOzw>+)I4vWBb;FH&6S9`9OS_QGS-R9hX=dd{{e@0u
z8@rgkxH5$X$6fjE5>k9zDyGHp<OThsYO@dBV!Kv$Mmv7{lI&QQ3FkKnPmH#*$*_|;
zC*T(7FS~D9;N#6!OaAL$XS@7vfm!vkC3~dKlpbQ?$r3fa>GyS#!Q$&*=TsD!=6;&S
z&)Ik;Y07$rMXoM?WDEFA7Knb~4v=2FKl*9pQd`sh_}Hlrr`oBXSbD?2+p%}`@=M0&
zR$Og0`ugcd_W!ASO_Xz%zs=T;+5PBaM);nIt`Ee_E``^gT9A9_`@c8l9X_edY5cXu
zZQmq>x#u?Sa1QI1T$v%L8|ji{z|WxBC|R!Byh<qPiB09dW!G(PO_I8!bh)cKcg=i*
zC<7q{!R3kvEtX`;EGwLK?61X}p!O||v&8;o75=I2+w=Tz&4IIaO)ETH&i<{uI$x3D
zSJ%{;CjvSH7K#`I^sL<SK1<3ud}H_fH4M{yV?Q)oa^7B0_4;i{V}^Ft3$0JQzu%-E
z(pIs1@J;vUi95+>1UfZ;^2hi7<iGyVVCfO&ru2C~Gv+7vw#l4K@8~fJa1zvva`AEL
zb#b+=*yp#+v@5^qRp^R?xj%Rw+&`(Z#&gH|kZfI@8BJfWyuY~N-Is~0Lg!3A|Iza2
z|6_)RiZjIKetTR}yd&~@Y2>ujdH&B&Uykwfj^s{J_VStGacrYcRMEtkX}2a-{ZPqK
zlVu4lPT%M{U2s#3t8Q4=ib$_HLe_pA0*%5-5gj*6XJ7M}G0~GVq=Qqk<JS}uW|oag
z3p9B%6W%bEN^P0LR<vpQNseW_S7i<NZ<<!KCuU#$@3_!?(~FLrum5-QZ2IoZ`r=~u
zhMy0=omKyxy4QZ*yM6zr?fa*8`FoL7ya<;f59jj2gvW}i%WOX~7XErNS$}z~mE3*%
zdGoIIytX=<Ze17A|9-yS@||mLx(HnH<<R@sv`A3o{Z*mR*9FhdzGYILy)<Bc?nT~p
zt`q84y!(5x;c5M?qJQ%F^Zxh$$XYkGZ&{1$wad5j>q@K3Kfin)|L*(m%gX!ef4@B&
zK3_iX+wsTkpMQS2`~Cj?`*&^bynX-vy=CFKpR?cZ`~U5?zx~e&^)3^h8Ivj<*xKUn
zb@MGQ`%;#&(f7ygM&Ehe4(rx`)SjhRmge!F-T6hyrH|herngkE&AE4DXQt_@7w6Vw
zy^4~)9nm`1QNr<H*VT0~8(;m%_R!d|`pP=tE*XEHL<KIU2n}(oz=ZFA+Gp7$1sxH8
zdh5jOA3X8g32`N5i7)jZUcT<=uJ_`*`kiG*%!03Gbj&J>J-GaI*R8Wlw)s1p2{^UK
z&3=Dc<n^L?(`VmJw10anQ6WKd*7dJN^Ugm%efjds3LD#be*MQUU;bG#>-pzRXX5S&
zGwc+T?g-%U^hwIf^kXdB6l<DO_UPrTtqvWpSXzH{%-(puCN8wl`qt|u+nY~nd#{PQ
zBUPNjtD7`C$?d95hWV~l4y=t*5o^+T1U(vet@SdkY<XWc`*-$c-CxT+Z!|4C!x5v!
zE#}Qut$HL;+p(~E#Uk6uE#=z+R_|tgbHy^+BI)`gx1(!K)r-1UG&Jlp+Wh<P49kh(
zvYj1*H|!<Z%~xmYNr>@<IH`%;G_qe=c60B76-G55Ub#A*5a3wzpR-Ibr7k!~RP?v3
zZ>&S?@tieXEkAfx@h(1Yd;4_6y44P^)9W+3f80{=o6S)pd?<1Lh2^(IA0{k#^;Bxc
zFP`}qT6(T@IGov}<sqnlf+yDaMcb`g#uADXFKt^I5**0#@r{OhdRy`R{QrDKsim5t
z2iH6N>l79$a571g73!1YnX)YL2Jc}z+w2;Kx0S!7N|dIh^DR`{y8h51sh-e-Z3{YF
zGCZ!e_6Dvto_$KHk->v^$pXWli`Uy3-?$=TQfBQCxP9lNn_A!MlJ&CF)$@I>zg3OZ
z6=sU%6qW5dpLgAF>iwL}H~NgG$0%>=&h5C9Y$%{sWWy&AxT0jmKlQTIzCfG04n~fH
zudBU#Iwg)?)r-44+pVDc_u&&?zw33jXIU+qF8upkOG9H!^|TGq_1uo$N#7Q3i=4jd
z?NQs^TU2`w8}uqGu0FBr*`~{iyqc=VPEJ}fr9JWP>aHoNJ8kBc`7cjNXEsz0FwjnA
z+9TU$<QEm<5~j%*EB1Ds#p>T(1=CGm=t)dbs*L=6B6*REPe;NfHb&9McdoFko8iv&
zXU!}oE{;|5Kc`Fx`c&uSI8o8?^5!iLueD;DR$r_;Jv(R4qKY()16oBf;XginIrk;z
zOVLUBckl1L(JxP))n?)+eDA*8vBP(lzUluQ@u1Dk+D`WKE+Mf=Nv<c^(&U!!o;|z&
z|L^*r>nB`WzyGIo-Rk42{Z%)f-u=?>`0nGEhYRh_R>m@LEsWGmjZ{fLD^w>H=+NNe
z!t?4fQ)q7ags&R*#oFu7uQj{)L7V$xxc3YDT|53S&t$9HvB&&(^6B68O|`C{c^8Uq
zefsIk-`nr^@2jp*&^=vKxvOT!&5tI%nt_@>-`?IXUtePHyXXG<<(YfVeBS+Ce&64B
zZ_oZSPV+eI@;KvlR@W;t=bnFItBbdYKA+nqxc2-8_L{~|YuMD9&EoIIY}c(=l3CsS
z_0lc&(%!lA!$rl{Ut4Gv{VDIk@;!Gpsh#8eX(%<z|6T0^*(K&%bvu_j&fyg_t?Aft
z@Lue`2_2Ke9c2xcn?Fnps5;|w^mdAF<dTl8&m2+Nj0?8<JvsGknZ+)q^S8gU3hCc@
zJNr_`54JS5gecbk%j`7Q-wsi^wWI4os($B8&x=R9UQ9ZjI<Nov=fqp@+cLeB1C1Cp
zH|f0oSz;-7{&=#;_v4>a&sCnYj5aj!(rxE)Z8-7W!0yW)sldg?WgDLI=bhqqNjNk?
zG?bgY^kQdP^gZAG*)HPCu9Uu=w!GsSqoPKDMaDfPfu;#Y3!9c!<o}(;Y{p^eA2RpC
znVVtfuSSTn+Fbd{FZM^Xd_&G3an%nMTXbja6rbwce)>%pZ?o4&&fMo=)`9Wat`V9!
zMmxlGGWTv>TKn)>_x2NJhgO|@cXGXt&x}`d8yHx$d=|Vq5UiG><d(v)GXMS5<5#pa
zt~fP&gc`YcWE?Q{ZGO!mZ|zahdjG1}w&)0rJl&SWt-rs>6`O9@P;uD!;=$J+Tehua
zO`h`KC+LP`aaCD(cOk>)PmI>nCoE!LU1>W>An<~8NJ#UdHdoz=N+RW_JUcB}nT|R)
zZRdE>wRw`&!<Bk<p~VqNnSQr@m$B<EJb&V5hRB9$56(6hXD?@|wk^_^^p<KK<hDGe
zJ;UbD!sIp2Pvoo<bjrEe(R;~1+})j3OIzT|A0Fm_fK_`N7A{a+(7>fJb9YRH@9Qv+
zvmz%q9@}_*t<&n&t}g#W*M~N`=`KH9?D|^spv$=;#ydp|S1uBpC9rPVd(K9Qn_K&P
zn3;rC*rq!PB|9mYsHOJFDJWJ6G#<A7d(Eq3mVo5sQ&C;h)+|d&%W_rTsJcw$mC+HF
z=EShIl4f?voy#}m*%=FTH?-;QO*s7R>g4&#aiWFY9sS)+9TtMcUNtvl@7=j~|6SRV
zY14le`SqOem@&zn{nLR?fdj|>D!yH2!fUg%(z43eZavdYHIMSgPi`5-7HbNf)5>?g
zvnT1nWs&8IJ6^MKo{?c)SSc&i>%z$xdPF42<gV$jxo(%F@^?1bo#+zCbb5AVW0i@C
zOX}*Cb~jRO%K5qK^GciMymNEpULC~7u(?}f#lgzR{r}$mDEV`C!neZ(Gur;1efaKM
z+c#_D3ahw%+7*7*yO+-O(DXj3&KPx4$M-VF<G=5^*YErB-2V3;eI9?#oT-n0ecL@r
zT&`^QZf3#diAx?<Sk4a#W1754fMbH<&gh1as|<w;5~VK9@F|^}F31_$utHxd^hk}@
zt9vsJm?~I`)CzNzuX~f;Y~jHCSKm|e|NQr-_ntZbZ{oce6T{d(`F-_Oe+qU>pWand
z@%`KEPC2=EGd;a#o}Yf4zkhqXtopsT<v)J>Gn&7@`uDfb{_o3$jh0SqH%__Xk@v+&
z>)0;$^0V&sU!@OSFB6+BwLnyAuaQvn|JbL6xjFf2%T}q~&02A8NsjR4&@UCokFYx*
zKOHiYpXDsS598`p>%Gjgnq)L8H1Z$l+MMLnR5UJfS|qWngl$T;^p?izTSpEWyDsq%
zkq`e|vG!4+GRL|$O{qz%i{iNsuE}L9nSI}V$-3)GVH=-3I$h6f^3Btru=FJBjupi_
zW@)8}PE3vc{M2Z^-}J&6Hp{0q9E@p=pH^u*A2haLGq>;g={@VRHIvj<1a=8bSY#~|
z5@PmqUS(WGvEpu@S!+_IGdE0Q?C@Z;2s(P<>AX^fyEiI*rSGb>aosSvw}ba9r|;gK
zA2)Yy*%$sw+w<#<G-KgZU$-SIp4j->XD4?(|8P!YUr1Be`g5GD#Y&UiO2jjT-!7iH
za-n2-dXMSt2@CVe8S=8`>Uv$0l(=zJ^w_m24AvStzpAf(_#cqUogq3a;P@oLIqxF*
z*6zzMk+>5yVMD0C!;)Q7@7Q)PoBr%&>AxdpZ^ZuRh)m#--T0h)=7KK{f1M(hx8;`A
zh`xE(rS`(>`ZrPA=WG|Y{M&2w!`xrIOu4FNX@Y4>`pSvY!osT~T-WOu&c1ln;ma!i
zf_K{lSFAF0`rzsvDB>*kbkUhZDjXSk>%PkLy?SxnLO56W*^j+TWHYq$Dg*LOmqwjk
z{FX;>N^s_#=~*rvoio~8nuV<{&$x8N#E5t0LbeTsMRjj>UQgvWE56`1!O{HsFWIA4
z54@ehG}%F4Y0)adV2!1mH=O=Y6^blmR5Cqev0!45?@K=Y;Ik6x#vv2idOn`lxng@I
zXrn{)EAyYKo(~<CEhrA<<IAm!IWT3%uWhwvj>`<Mxg{@S5p18uA)M&s!E<KDqJ8FB
zanWTTZH*W!pUjg~R$)-O&?q^vYnSoiGZ#WayIdb>Zs0EvXh@iI<HC-P;!x>13!g7m
zXj<iV(@~o5%DSUl?5?IIZ1*|p%r5n(tHa{shS}fb@9zKkXtMu(nLJ^Ij&;*dA3bnn
zS!PoDIsUwu!w097E5C1hnO48kX5H(jHCB`7>8xK-uz=;VU(vbGF$|kL=EgHfn4gcG
z`XR3;L8TyJsfeNTvm>1s7kD@>(b{-?n^)ICxsr=l<JRo*j%^k4a!gBeS-|4frluCa
zc)s*<J!|~iO}pM!IbEL@lr_yY&6uqvXj-wQUNMt*TFZ=O9}8|=KHE4Wd}Hy&ZV86V
zZ{PNwEBo_qhN}9J!zO(e_ukw8d^LH#d>ec7mzT5k=bzhe8!H!nbxl*p;krA_97+c3
zUbAjFuxeGLZu6}7r#f%$>6R|sC2jF>L%_M!^}C|BtYQ6gLfDv9KH`_;fdj(*%p2Ca
zc3JA)D$lw#$$fj$g%#Nf_3@3p*R3ZsY?AsD?sV+WagC4BZ`u~EJN@&=mxpiPe)(Cq
z|F_=s?e}Zh!$P{cbUxSE$n_teec0qpnYByAwWoi7fA8;CueuT4bi_wu7JKvS$GhZ9
z9=P@Hnf3mH*`1nw96h;H%cdPv*qy)9vh+po?0u)(1CzAEmM?y{Fq5r+!{_6Tk-6Pp
z&)nW*A?ho7M0^WZQt744&^6h!ySo@p@bI2Yt#Ig>W58<|qsa7#gY!+0@a~(FuP<g(
zirO?yXOe5zmE)Jg=2V_!XE?$T$Gm0RsdX*6p;aRM+Za+a)^HrK(kq_()^qJ&L!UUW
zps7zTF#p;0rNTJyq}sCAC)3o}7}?yEgG~%IXI;06zm|G(dgaXNrMgG6_N=jw_jMFK
z5wQ0#<HlK;cUnItE;U@VXi}26tJL}#oLhhGxfrlZC-(NFeZOX{H&)$!>RwNW$&4FU
zgl7xfY;Rh1YZ<RvR@)+llunPEFRdJ(FK_TLy|UY3_MPzQo5R*`zJ8<kW^C)7km+W+
zX};^H+3({}zO%!CZJwO)8*2yV)cc>0?A&&A598vMf-?kvI{3RDJe>X~H2>7Ys<Y9(
zAA9>*ewS~$y5IEQwJjoR-5yT6Aol(BG<U|^EAGvjb##}-&$BC@FReIjc_2c?K<RtJ
z^VOY+&a$)iYuwh^e9O>^*S62mDQ>sny<c~BOcyjT)#RD~{Bf*cm(}O;;AdOvxy-Wb
z4=rZf8eVhaz@knQ@l>^CN8VKB#3u!G=uWsG&~fv$*7VSw+vZ8|ZdF#_npyi`lj<!0
z%U-gpm%WNuX4Gsbw4_CI;_9WHY>OsVsCaRjTW)mPb20on+okt6E%sk{d*B%7*Htr}
zxZIAk7^<o(gsknd`t-)x!9>7Sje)n*qM*~dM?}=s#bgCvbevHW|KEf1208oQI+!{K
zbVuoZG8Pe-xgj_2b<ozR^BtMmOp=|3+agbxsvP3s)C_c5#I^V1EzKL>I65_VE;p;$
zG9l*QVUITfvL5$-|LFBR5%$!ovtvTnl$fHx&hUj(y#pVgx{{M%C*vmfp~fy&kBRG5
zd4cf#0P(K^*_-A~FB0|a)n!Tk9B}-u@E7ezi4y#~XPa;T{{8pu*@>Hie3s}`X{kP+
zQ~vwzyR<pWj`%dsU+Q4C<gUTn^7fx<>!+W7x5NK9e~nfAr>G*P358L|H|j*Ly)<=W
z>WL8j>ezWZCHpum1o_|G>{UK!!FSlCqifc$^c^>KtS&Q^oSeIsd+m}{5p#bf>+ojI
zVY|Gj>D}v~zy`nR0h14`?>IQS;(}7N)tiV`9*&j^K9eLRI2h(WKVI7U_~Hz%jX_3!
z%%3gzF2DWvEa`L6zMkE+n=+UDJ-fxa$C^!P7T?QnlYjf$%iN#){J0XUkLJaS8GIfy
zIQmzldNK>Gns#-Dk7H=<ucfOhV=WKY)f@K;ix+q=kX+s)=e_al#yx+8ZY;gKoPYiu
z{@+DW=Z{zJJ5w23thp$N^M%%`ofS<pElhS9H{bBC;Q97z=G!Ab{Dn6EpC0_^f1rT^
zyQ+|g?&+6@cYpdgd-m<yncL@|??2AsvgcV+$LCbbxq6)$GHz<quKGRy`S<bThZAP>
zc6OfZYRNhDKs16Op_}P_Opve2?SvD5+0K~VIU44pb@iaYw<Z~$TF!<apQNs=E?PM`
z`=(lH^sOq>8{hWzi@CgeZ2j?7s6(<-T&m2!y;oN)VSE2;)zldpu7&<RQ#vfZP4Q?@
zj5=1g@lFb3ovOFqq@^h<Lw|%xAF;O%5D^qzG3#4gvrhie*07rW)-P85+Vm=1#&rF%
zMGu!Rkh0b2a`Cv<)H?t4ilm5SB_+$%T3U{b!WWX{<Sx}c2`UonzuftJ|J6SXi-TIC
zHCq*!BtmP1??^A4CAc6<BSIrBQ^G*{{=yJ}g;!6R*xZ;MW7VN!dU(lWLDxIJ7tUy?
z9Xs2(*72kp&(3K_FUQ`#dOGFoQ*X!TH!n5KQd4(HiQbdmo4Iw}zWk?Iu8ujIX79ST
zp3C&D;^n#(TV2GNrrTYcG1E%<+sTk0ZIVvnLW@&#AEn&rn7Oyc=_(5^?~SVcUGmE`
z&duMns<1e6M)k4nCo6huGhR(UHhJeHJ@bD$60tkGIC~DnDEMt{IN~RA|1QhBqHn%)
zW_fS8=o5P2M!yz!0?QG-3rv$f@~|@NZ`pD;FZFfduAf`?3cTC?WA`=l<mszj1fTFQ
zFhu%<&R8>HqK2?>_i1yEzdU6^duM7Y9$%&$lB^UJvUAF!*(;=!m))GHG;zzCRbP!l
z*FMkbI3>y1GeIaUw!({fx#H4^mWpZzrcBr+>M3s7^WnG0zVlxf=hlZmY_pT@ob#ef
zCu6193(l;m>F<~B>pydK&R%I*dE=|)c0Nl=c@|yE@tvBSykO&{mDv(Ywj~{l5_8_X
z`^vXd+hy+j<8^y(8Zx`%`uCpD{~iZ8e2!$aJlMsT;^F73D3g`yZ@2&Z{KbcFHgIzG
zG+Xid@ui)gtZwwI#bfD<S*p*Kye8(vbYD2}msQ2@v4x+ou@>VKot+&W%%^5Co;1F(
z@uX6+*?z@sS$UHet0Yz33SO7peE9H$Wrw%OTfW?+x3i(^vewxIIlZ3~LT!F)x=xc7
zW=e8Ol4}qA{LtW@Oxy0i1vc}}O+Wq9B;F@=roQ;THAg23vT!9aNy}e-#ARU8f8J3o
zi6LZbX>8#G6JGP})eL8shA6o+Xmo5#+3`TVxsg*yG5oS#Qb+7eM#m$k!xkLzcp=2>
zWO8@Ox3{y*w?4nf!FyD+BUwn<P&w)IS=((H!p!HVZ9nQz(QxGPgc5_O^Rv>=7_U9{
zeDXzx*HP!5f3AuAF8sc&-P}CBrsns<m!4jZ0<BXz0z{AA{5Mnh?u~~H4AYlCwpzc;
zYb%@Zgwi$>-ru`xXYF|6>UHqMDQ2gX1;?y|0|V}Te0O?&?az~6799WjrDWe6eaq)T
zEDSR^v-fQ4h<Wz3#OnO#)VOIuUYbeE>^iTQz1<a+6Fs%>#{0nadk$3{eeP<z?-kcy
z_bb=_PYjH;XPaIrTARsQ8msUBx@hO_-Me@0pMSo8`sGRqchl3AbEp5Tv3h^(Nq>m(
zZ2i+OOJ+Sk+_%#&A(}1s`i1z3H(z#L-(INuyYC76{Qt6(&L7xPC1&bSxy<h88q>WM
ziG0yk%i7i*=&pX7WW0T^FRu@mX6xmmn#8?=3(O_F-`?Xr<Ir^Hpo7TNTB#MYcspY=
z-`p{BdgCIhm&I{=HQR2dO>TdJFZu?GT<O2SFUWlCb7Oo(_D9jS+iIRhf|r$6NqH(X
z3WZM8_;+>64Ti=R57E#YmQ!Z$eWkVU`1KzToxg2f+#q&2XVTh~iCbBw9X`pLEA&c&
z`FYrGA2;VGa)!*T0z8wQrFa5sS2K7utdc&cq~ydgCzb0-QB>BE<M*EA-o4GA@xrNc
ztwy*~h;vZ?%}-9ZjknbtTD2@~hi&U#uGNY=vRsyF?Xzvq+PL+t)2gp~XPPOxY`*br
zX3pIoRm_L4*Cwa`=1~>q$Ti_xlM{Yxia`YLvKx*muZ4~~_Oab*F_N+}x3~VXE5tN7
z*u>E};Oi3xgTljINl)UlDn6Vk&x*1!TXb)>y_j9uRIRJ_GUhruRovo5ibww4DsfM2
z?MeRPIJbax%c^^<2OPRrF49`H%kI>dsFK@B=^LAyzfP38yDs7uo4k4Kf#SRWvJcOi
z;QaGyYett=))BW=Er$;Icy*ldN$@$wGp{K#>%=_OzEvSAhMsL}vRa}p2W<Pm{QUV0
z=aMYNu>aThPv%qdbUD_edF3*5ahIlH(5hWG!?(+<x5%!4aO3uok_gvc2|@1JCl`y&
z`k2NU#@Bmn_qoO&1#k0xa#x*g^=Z`5dot_A?;OUrb53zJc1ML4TCbU~a^24}6HYAu
zl;(DX=ZX=p;DmyVBL^xX-8!B$SIV;p{5UX=*QQx)rjV52hX1w~ITTq}F<v~t!zry`
zrlOwX(ABcQ*{frPYq91|k*gu!cSK!g>J$*2*q$h}ywKs8rl8L2s+p(vbY1zpN!2nZ
zEb+}-jvWh^a+>L7-kui8y;0#&qQtVl?>5Z;wr$$w_T!Hd6nL_NrQDtuGj3F3{5*w?
zN9X+k2F{ZjsuB_GmrO1dcplF4=$Ur?X^q|a*E)F`>cR8VDg%^`3a!`rapt{n0b@hT
zp~C(1k7b^)WbESqA;q?E?d?a7YbL&W?B05upP^z;gQW3{RX1i$U&K1Gaid<w->R$H
z2M+Qd*?Udjmo3La%CBot2G22*9f~{LI@YfWadG82TQW5@uJ8FKyN6Ggq<T(!s5ISG
ze|!G9<CjzStw{-fWOCr};eXx!dDBYmvZnE{CVFW0US7gAvqGj~(QZTD{+GX(m%pE0
z+4;Wf0FUYGqIs7m9Q*4qC!~8y(~^dbTMz9B_F;9LFtaehFLS$hPUZr|=c0n`i4w;%
zC4#(|jaB{RR?NI!I`8@CpEXu<&prQKX{Ud>Ch}QoT-TIIOD*1rZ~OgOv?z7+-}0N4
z(o0^SQhL2K>iFvF-j18?cW%l%o%nZP@;mvt7xu56b%Dp>RFT#EKK^_;zVF}P@0UMS
zq~zkI6Lqchtfl^GCWcO7hBe*Szi+qk37=8$djIQL%y(I5-PBh7v-!pQ=i9bU%NE*V
z!E}E1Hz#=}uXyc30S3;pIh~C=QubZ@B$_YpHG}Q<yQclK<{bZD@&9nnWCm0JwaJCN
z3ocpncuq_zI?rjV+I-`orCrKmt$puRe{7u-Eby^<k%u6EUFdSzoO|<rtz}8+-6H-W
zcIBK8TXk-~UA}68>YA2`Z(ny_Tcy<Zy_|WH06TN%3^SfjR}bC2wfLj}&&dE$-8<J>
z+*Wnm(yIK%;mmus*d~l|<*c5CPA!jogBYGnNMZ~>x9a@84cyMF?-tbfgfLeeK9s<r
zA7s2%ee(~?w~xh@b2~Oo(~?;H)@((amqqv6;6PR7uRolbHto7n*x$+=8hubAzc^cV
zyG0Y*f9|c7HcSPdmrH#6uu=EZhFd#dDP5K-*ssi)_owMs(1p7aafQ48`!(A-Uu^wx
zGJ=1Z#Uc>_7s1+3-5Ppr2`=~VCdt<Ru3NF9`dD`q<L+zcV_s&&-8PMzeX03CVD463
zkzJ2=oUnQp{PoDn<?Fg6S#mCI;1%1-xJl#8W3RJJx%xZ)gvl3OD?1R%asB-7T8G;?
zx6g}c{Lrc5eH%5y>1^QIw1wA0Kk}&M^Tb9t&WmiXyQ!?OdQk<B=MRpPS0biNyuHD6
zmlwZrX}~h>^SgYO^{(nl;P7GLn4)v^z;W&08@#G(^RLggtv)AXT5WkEv$;99Dd1_^
zlP5(S2AsmiOMgffvP_Qkf3_j1TZhfyhDfvs+k3X{YhG$zKl=CU%_pY{_kS0(wwH9P
z*yR6Y??QE!Rh#EEEDfAuHqTh5UaQ&v&h{%>@BJkuZN4+aw74+D{Z&p@^1f&kA}hcg
z{Nu+;zRNcj6jmfk2r-CU3~~uMSg<)D{K5YHQp*w_+?{bjLin=9G82(~Yo@zyp6$DF
z@uX{u&)m1m>e^`)xN`HKZMLfo#Tz{4v}NA9d*`n0o%{EA&!`0*vF9yk-B5psEx>Ep
zo+UQRG$;OC<*)ofF<VoOtHXpxxnZe`%Z6z+v-FQYofK|b{5GuP#p>->8yrG;8X7`R
zTzj)3IYq*4+YRO{>xUC2^vY@mhP#-SADz3o&O~_IPoL6btuNQUi=N2gDjHSdt25oi
z?8KA9VythrER9$%B`xs%@{ea{5C2Q^QohwNMW9n?a`i&}Y|C?}A5S{(8m=)#-)w!A
z-N{V}kqMgFJ8vZUm&@DN8tNZabmmIief#C*^Kwg`yubc4_N1i1jO9n1y;dnbwzzx$
z&HX!f@4w62xAfuR-R19=l`_vwJ6k-jom=RGhxEffh6^2cXJqUv40lkDDL$zZ!gFR>
zV!^)?CcTFZmQ4xrYO&xsY|zK&7dkaHQt$a`oAbvne=M<_e{T8Xk0n;~^`8GMnt5G@
zOP;edv-C~g=9u^Ir)t@I9xcwfvAb@~!~Wdgv1_ZZB~CJ}vP}0j@m*=!cGq50@j1V6
z-Kx+dmIXnlitOxXzI=Q4Zr*n5dG;$lS@=XI#!inpwQC)VS4+Z#;(c@UKkr$_*fLx6
z(N*7L+BtLf^;aog|Ic{5<ucn}Meo3y%2msku^n|_WNG4(Rjzsd+@&Ja!28zm9^SuY
zTkm{)z;9^4F#S)+{8$;GXn}ki&$FTCZyetqS$HOq{aMHx+lyh{0n-=VayhB?$h3B%
z&dw>5TsL=gWU`-i>i%<)<=)aS1`Bir<?828$yvPf%7VN#I*PqQ_kS<axY>DE@#c)P
z=btSO_$(itH{rORSxCzyBXypsBDbFTe>)!e{GF;h<4SEi8z;F?hXV{pVw@Q5giB2g
zuDn^^@V2B+-T1?ss3j|c7piahVtwHpL!a(9l`4btn|dC&c1<d=ero=Ca{Vlpu7oDV
z?!KS03@d_s)p{cL-MZ>}<HNygpY2!Yyjj)#dG!J*u9Zrzdp4~+s~q}YF(=se#k23T
ze{POFAJyFd>c*{pm$!xUHYD=YJaxUnQS@YP<BwThXL~Cq#xH$lbV@0xCitpRnU+pf
z^qRP%I}e8D74j~6V=?#Oq*W?;DzEQo#cuSB2+n=$WpVk7r)anq!>MISO-c_oaIHwZ
zF;y}$`0j;uAExK--dKD1mc_xf8{KPuUaEWXa(iW*h}7KJg9k!paBSgPa+G08<HQXS
zhRP<V?G~~cg{mn&_+Xpglb11T<153ptG22A{rVwLf9uQ{il%}#g{ra&3`<sj-`Xg$
z{oE$a{|{%`WW`2bRMNH4S5>h%C;sivdykOhH#w5W-WXoA3pzS$?#8!$GaY;H=r>2a
zpVp<mX10R7#SXFKXZ|$aEhxBc?~?r{<gApJD0kZnTg!??_arI<ay&lw_RbXACU)$w
zPtx_^znfk-3iWMwcQQM=OM-=I&7<m1BEtNA?+hXY&!?m<it6&<oDtLP7m?heXzc&E
zLV`WO&_IzXKZs}Zxo6&jcg!S9zqFa|xV`Pi;ao$97q^=%;`XRMGU?ep`}S{t`@e6x
z+s)ZoV#GCWx?EuqJYIN5K99eBw#hpURTUHA(89E(T^@0tY}P-w)XzQ{d5uTQC3Cy7
z(}|$I>6dqZ&NK{WXWc4v=FSFr$)>EfmPBqQ(*QXSm(Zi0<w0INyF2z?<arao)3|o`
zhTm*=cUjJ>v=b3J`omy?(Swcm64L?_l-7zDU##Lt%<;Qo!=5a0>8*Qy-7j_f-;W;O
z{_8T$heM>XQ$#1-R!%?vzWm3EZ8|Nj#txP*7oXj@U3_NoxthBFUreTp&N%jW-qj5n
z3^g^~YL`D2wpQA9iit9L*<@TwoROFJE`Qm>vT_AymYSOX?~3+`oSb#lL3m@@R2`QS
zbFwD%S1t3=xxR-p>Ha$=WA!!@)j5yuDt$V>-BXi`;}u_=pYqZ|8=u|3XWyRldD8XN
zNWITBJ7dmG|NQv!$CocFYOLnzKR<2sJSc|q759Y$hm;f~H}zHDldq5n);s;OB68ZC
zVl%}fSJW&+V;OAycbWZuQao{&{T-u6Xa0F_{x~a(&EvJv^F8mrw`ZEm9z2t>eRhfF
z=RH#<T`%?B<IC{8GICn!cIWz~E-D{%xCQF%dDl9hy_s#-^zDc{%W^K41m1;=$Ijg2
zt}<}+uVQRm@$%@`&2d}0wCk7lHy>s@bZ<*ITVH+Kuic6*7uW4RuKk!TF}Z-befG4O
zmkwkmBz^i>>m~E3dWi#XLa?@}a^Z~IUGM$9KWg1~PFtfjHMy>T>O(o{AHEV74%pX*
zY`J+=WmRIE(^{3ar@maiJKsvMxHZEms!M>`c%hcj!>vZ_+n(><s;MPm;C6{!)8W%e
z^XjDOB8E&XT&t>8zvtH3O+UTkrP%7qYG>ALm&;c)^QNv&c2$b<-F+}h_gdo8)ho9s
z&sJthUwnId;`OR)XJ$WBty|WVE%->VL1${3j+}op$FtuFrM$j7c(3@F-g)?pk>Pgg
zP3MTJE?cK`^E)OA9Rfm3w~p{E)L*A;#y!{J@PnMr718P&?;7X%_srcB&3@>}!i!&z
z{q@$n`}IkDD6`RX-xXIjbN(&){y}yBn!~3x7j0!euD0Z7;^I_k>7@HQ`KtBPllB(=
z72GJeTw~jdy4N#W-`;S3#3K@$wVuTzZ+EGCSAu%jy2g)JKHvQ?^U{XJoAQoo=WXZ}
zU(tNM>XWY1O4F*lSGL|g)^)6eS3gjB$+H{I3I;w)oP6fBg}na#!EWU>U$^SyjchUb
z=Pu65XgkR+d8b#`sf{i3wPUT3U-*&J^41qOIjnVS^bwfRWXCb*FWcg-O`Kn2&Mw~k
zi&r?P!;H5<bV|>Stq0jyp6s1l6)S(x`&?GRy`}vVqdsmE>^ocdb;rR?E6%;)-DZBP
z(ycA}ynKPf^Lqkjf4D-PiLltPXJ?7FOlnVRXR`bG{6gII5WO6!3Jwhc!yQgbrUZH2
zO=@R6&2TZ7N9v~(XRZV5m5VCJGo~4+`f&2zyv1{T4qu8Mt8!X~O`Q0;20_8)E}^^i
zl)heUozj+g>#qHtpP#z*>wkSzw>FurpSp8OX=te0yk(iU-nL~X$@Ro!q$nI^ImqF9
zde6N1e#eg=|7qkM=r29zSfbQrgLirS+w<SOlbbj1oNJff)pyOgb%I^1GPe4-{z>C^
zo-s>A^-$rQYn3NoFUs<0eqOz#@WGCsECvae7GCU%vQdi@X_U#*+Ip_v?)aVm3@trp
zKRnEzb-STWF;nNtqq>5pdp)Ht|2@uc|M%tc`cFR(pUq(XvM4pww0X|cnz-<|o8iGp
zLDwdwM;LXioVc`B$+vQ2YOH+!@y|8$PT%}6t!<0^d-t1nW!~kzx0|bPwde1-d2;<>
zY;u-wmlfW5Usk^TdwKiax;b9wl$v}pYpmw?oi<wiS}HvDyyTfnN6su>xndoo#m5)%
z&J1i`ZU-`V?A*Is@?=+1O5y2Q*Eg**Wj><*bjR(t@7}+A_wL=d-G9$MY@6Yw`FdBJ
z-gMLT+2^KzuCbXXCvR_KE9?LK@*chHb4#Dh3ID%;Qg@Tk%?;as_aFcNu>Q~IcK^0e
zyPE%hzu&L<{O|4U&rjD>PMuR4yC$Wbr%Au!596z#f<=bUc3qvrqI>k@<F9dZ!`dZe
z|9=+y?)xe+s3>y!T@#+p&Wk?FGg;*Nk0+nYK6f&*VbwC%qG_ckE0<1jb5q(<H1+Pq
z-W?xJ9d*kVUgaoDRop7k|Ko$$>OB?@4zWGBx@Y>2QjhoiFMfE){861fIWUy>*P7>`
z#VNloRXhp26V05TCs2HN){@qY11&jMUA(7s2o*mvDezjjXZ_{s^^%JWw%XYoRaQQF
z*!F7f)_Fqf6*Zbqo#8yytQj=<?)M`LSL-CaFgDrs^2ycoz*eQifPli}g_VuYSxT#%
zlwPr~G~Balm8t@xtBc=~2kNR!4C;MzovuA|aT5$=R1Yw-YH(VybMG?uy{otc&P=)e
z<4RVLsMMtSo5K?IH|nli+9?#cRB(}I(`t8tlHli86im%uNojhlnJdUEskkT9?o;s=
zzqL^%!D(CCSQ~g=+3;Q1k~X(Jh1Dm}^i0r;TImo~f9du4vu3l+&3I>$!_)3t#XVJl
zqowWP!PTsVYd@`CvgBObf@MApoF*c>rEHQPdQ5Ogp1(A7*@IOV0+_e1P;)W}GFUac
zu>0?X^T+NSe<}P}d+(|D!pg_a2CKS@NVWDxIvwfTl|4&n_01!?0a>rJPB3)n7;V_Q
zR#mM(<$GWEsuK!PbJS+6iQBk}akKmd;fI^w&5GKa*U+JVZSML%k5})J<aS>nIq9fh
z1j8NqoTd{e+*qDzx+Y|KIURX-Or__9`YM9}|GADu52cT8+R69+wJFP+h2Ha)o;q^n
zn&!verFwmv>(#gQtg?Tz)^*RM8^O^ELB9X{WCZ*E-w9a0aAWY*ZSumoR+Dy^UNwH4
zB=u@vsd4$KS-V|Lr1l6TO%(3T(#oB{_KNMLB+vE6$q`2_*KJvOeR=n@v-ckhJa_n;
ze7BH$>zePWoysK)E4K+>V-m9FU1oNh^Qx)DM9m$#*F#ccz4fAMg5PXSjcDPV=;FXw
z)b?}ILWUX7aw3;3(9kmGV>V?yC3;*eTWenb*O?m}EB}h+YQNf*QkML7%l-Rz@78~L
z_SoO<XG%XqrQU0;%F<8E=FeVd*K06ahe2zS@ItYjbNu_|<>c@0x1X=?I%PtT*LHJr
zhba>*dVk;j_wL!Z!>Rk$WV2m(#ozbu`1;B<^BThxXUv>r9oVMG``Bbj;>OT!rVb+k
zZT?Q--_KLS&P=(XpIe^nE-T-?(CT6D`b8{l(Fa(L<-TcFU@b`MyzM$+OXTG<JvTB_
zxYBx#bcA`Go4!Zx`qwSb=A5=!zudCEV%MHp8~b_BK1Yeg`iO@}RJhE$a;5BWhOzy;
z)2Wr;EB^iZ_~-WFh%$?}ZHG<Xw0+F8zx!vi{l&lU6!!ny{rUHMZYlr67QDajmXw#5
z*B8%dH_<N+^W6RM$B&AdKOeq4Jp8jHNLV*4On&L19rBi{fh-~g9S5iOF<iE|XK!<7
z`jqEaHy2JXJ;<|6El9>>xsv7c)OlMug!>O4*dyOo=polQttig*>pM5Ockkujz4u%G
z+2;Ib8~xXt)_9-G)?E6eX6EzMND(iorw{A?{P_3n^!jhRchA0D^Zo4h`?cS{{rECv
zI_u}I&pEG^UdLQ>EfQNUd2C9=#Zy6HC$wiw66xRNb4KnEQ_CH@{0(_4yLGRz{_@(t
z?E9bK2~~d*Pqkz$i<-7)PI{1TcY0w@ro{4(HC9uDx<r<>bkylRkGW?0{LGrp83C!%
zwQ;=f-?ne{__}DnmdZMw9LL5r$^Kv7Xa#Hzd&FM2lF3l}R;;;e-z&3q+v>jDZ24lh
z^Lo^-71QJWR&QM9x{+&Rp5t4?U0*hctF79yqvP|M%WI=zTx;XgyF^qM*87@jYAFSE
zb#+Yt!MJL*pF59Mi{hrl^_-dYx$mtm8cWAtt=|5_hug6t<z|~i-0XYoJT9%BkKVEb
z2N>1db!qB2a3+C6%}sQ5p2Nzj5T4mPFWBTAIr(>4n#0vFX|*I%@#-Imud@`DH!phf
zI$ikl<C#5YIV(#)$8HyySS7~%bHQva#@2_c_OITxU2jLB?9`0K=4@3$S8ci1`QEPz
zU2DFfT|;=$j7L)wb{9Tc+c@3kl-54)^`Bnn?LB#8#p#Xx-;!pB1T%MD5x&PNe5Q0x
z{8gQ^3gQ>KXL-K<YO9gadVfV|;fng{uM7pAy;(d<ZQ<1tZ`0MAj`_JxZ|=3<9ec*K
zT951Wjz4u05)3!0RaT`7#-ICCJMHwtAhTO*yu&Wb=@{I3XcCsN`nz@GBrZ;8O@+Gi
ztDTgD)AgD>XP7n%cZ9C`Vsnu5;ZpY6V$R#5ol9@IZJ7Qx_V2H}Ke76!&qfLNEViiY
z5e%I=@ef<`m6P*BIfUEiPd?GBb}^UDzpeMKOo!N&<-L_*#Zxp)A0M8k`D`(liFfF}
zO|4A(+wA|<N?6qA2qo5b{o2p6{8_+@_Y-gKT`LiE%j+kjapBo0-{Xh9_OE^7=*sbU
zZ`W_Bq&#6kkGQy_IWDhruW@|4dW<VnEBVEnpRbg9mc;kZ&ix<3`R{6$L|ar=n-J4F
zW;Xr>zIFOr&#Xz{IO?p}^8dPE!?imn@2xRNOqz1F=%S9+ymgPt=kc!=b?|KInZ(%b
zP-fj0r56>@z$kdkP;h0->loLyo`<vVn9g!OCF|hE!m;$pjK1xG>Viv6dLHL(-*0>8
zP5JI4nuZ4N+Y1eDdJ5jxcpCI+m)>iww4;_`jQ&d6yJF5Qzx?q>$*k$FA&i}s0Se4>
z*n*m;ESvo|(Y}xQQqIy-+@;KqcF)$@CcqW_vdQqi!m=etm=lX;HLo&Ox}x)Of<&y*
zvrh{jX{-w9IQrQ1fZ5lT*Js>))gHt1x6pZMZgybq$;g9F&L&O<rz>r3ZR>vD-Cp3g
z?DOSaGSk2Ac~*J$^{nfQJ_nu4p6PnEVTN#@frh^l>t;?LYngAmbCxf9Ki_Y^)j9t6
z^7AVWE-~NU-v9ocz4zlE7yqVb?9lo7vFO*gA0Zqw`R1K^{3Ju__{(q8jz9eN@ZFaR
z>po_wm}lHlk&TPkRvfYN@tMMKaLbX23B7BZ*bI0c>-jDSC^Fle=#eGN^lZ<pS@&;O
z#_qL<npQb)`QN<r9)7IKhYDq4F9m<fGw*HxTNhh=Qg!|7N#dFlKTjyOn|uCqk(-wJ
z_ZID`#cg|gn(x|8F!0^JLqtSTQ^jbSs8^U^z{a31MwMrc$2Yy$o&S7&{pTHXKGx`L
z2zq4FYpr4wJTYjt`%Mq-vs^4On>Eg=|INs-J^5MQ_g?cg<GeEt-OoNRjhS|ub5;F=
z4fYC0s`m12IB-}{c5~9iq{R`A>uywTW~g7FztL{9>;o&Y4VucY9&d^hb94)2*eI2B
z^>x95Ylp;o=ZE<;gq^Exf71Vb2a^WtttWN`5wTo8tGgbl^LTW`q<;4~Ge>B%`4r8M
z%XS{mo+e-REJWO4(Mndc<ts!c>jXUiKJmiVmrb?~bK*X>Mwd?tn|Y{BW~<(f)mNvn
zP1S1LrfcE!SbpE`2UpATyG?h=I4%wdt(zb?V^-nfmI}#Rj}n!YFCBLDyn1=<fmhcY
zE>3A!{XP2X-Z|^$98k-jyy5Dkaz>Uj6BkB>Zs9e3{JcUz_wK6|EE*438^q7+Hf(zB
zet5_Bt#=dlon~iV{2=MkG}i2Mhu#HpM6DIPVez)qw@36v$6wV*|7TXJIhs!ke;%D7
z^IBrJR$cbh*V#%r;&NL(ZtBfm%6)4k-}+zkMOOV3U;Sls8gG{GbEcd}GPlFh=iL`+
z4r<dbbK80*uXk~z`lWk!{Ou2@b#G<1F}`{1*`en<eLu+@{!=C%!M(wBRb6U&y|BV6
z;V=>HL&nd~9N;`Xi&KZ4ampQI#UG2rT(it&_8v<3{cxp^eE;R6MO$@L@0^J-UBBj3
zoSA)jZSdAxDXxN?A#;UZ#){3{m~Lsf*ubdqJO}5k%jX#wXY82RW4&r;e70N58J9QC
zpI3>$KYoIvZNedu9jZas`vv}8_^{77qQy-8Ut7zBm`^XxN9LQpDw%I}Y{h-k4lR}?
z6U*)}9Lrk!I%vLm#s}{^qRtKHzHfRb%QuOCtLeo2mICP;$9*L%HMW{}R^4s?5)t@e
z%U)x?_v_7ldoDfZPw~I-S=?Q=X>)AZ*B!Z=r+kZxmM&S)-6FC1<tr^#(a<lRT9wW2
z3LadU=6%~i{g2&CR*PD%Pd%wKYo;RajE0~^TNBQ$Dw|pyA%5`H`(8Goi~nnSKUVj)
zs4p_%J8bc8S*90r$C>Ao6;$NUtnfS;wB*U2b+2vCf8Nyb>wve-%$vf6-rp3HPu3Z%
z5*3&o&UmNSu4qZ-owx0UDa#_RU%B{lT62@gVcV?tYgR|q9+)<x-SDHsqO73g3fIp;
zAy?e|ls{S&c%_A~sHE>^n~`%<c$M_bo9iSV7A|jB4w`#nnPGSL?CzOYH0P{%{p;V`
z+wq3`WH+2B(t53w%3IZ=wO|R$<-8|}3}VdDn;9OvsI;hUm>nP{<8OBR<)r)TKAFv)
z<Ny3~bn@QX&lk_z{rqye-CX{@&+qy5A1CuKU;eqWvdZT9+&+in{O_loerYpb-rqg&
z=G*1%?f&;_O7_oY(_LK?el5sHP-xoGv<SxIiN~K9-kR+qHE~UcUJ|Q7u-D-y4{JQm
z%n9>Lmh8xoIi4s{e6BLqwPl8D>DlZWJ0%8-zQ+bUEg>^FH49JOY+o9*q@%+>`s{-u
zOZk5P*H)hw=vdfyooKpzP-SyulHM*Qrj47Dqz>xTn6fde@7jB}w&Xv<(QBvwyxVQQ
z|99xWtM~W({WqIi>;9X3`(H0E&z^m|a^CdHdCx!ZidnM8@3V+0!zWRP-nNX!*bnZ%
z3hV!`xL@Uyy0?bMM{<cyI>YIoi_{n*ycCZdx#RY*uD$F0N0);Qf_FV8{`F-1(Dmov
z&qq>kTUUyicW3Wr%#x^Np1snoHs7#}EhBQxyS*D%JiY3fe{0Uxuj`+=zu&ij?aQu-
zCuWO$<#H9)I&HRxUE#=)DKS~+OH1Up-O}=1tMvEnjy2z|t(qpdaaQE(Q%`=`9{0YO
z#mQ$~uDf&FkMG~=q)Rq0kkY%b?0on_$+_)ImStEuvV=IN^K#tUXC8KKLafOVS6$N$
z8~qqLPB`dZ3)wnZ@%Ey_0TV@fJsCazZdkEfa(dZ{->$qXjP67)UEz3GXF-Zv*O5L}
z7lx0A?`*X4aya<>qus{GMInYIQ&(3lc8;xN*n4=}<-_63_on5w=}O($?<RapGROG9
zkK`?-pBL&SF=Vw|@{-D)r`wfs^nzmGch!x;92K?ax9V{1k9QZ&naF+7%*3&~xMSPu
zOSd(ur!DMwu|fH(*8c0$S9*3FH#-rsb^Eprr7TRMMh0vl&Pmhvx39j)V5=i;F70`F
z@vY6L@?ZaJ?z(qLCCj8F;NWVGl|?LbdIbu&j8Yuh)E0jCW(X~P5VB)ROvk41)WU0%
zCr{JfAis?_^ziP-U;hdIny^WnNv_#U^w{cUtn+6_*QE6SI5VZ|?M>PFtiQ^5j~>`l
z$Y0vh;h~h~!*oh;f>+NCJrAy(3*FYfTo^Fb_Q}1fRZB%(OWsUfuws?8LZb407vKB8
zXa93p;$tvzmCf?L*McVxZ?U<kJ!OJVhu($t+1A~j%XjULJ}IAhqI5#Y;?l>CU2R<p
z<d_;lyH|G2&DtE<lKFd&*1ly~MOliyrtiyFzkl6wSa-wh-*abK@8N&()%ln}V=Z&^
z{yW~*x39J~--xzY7nP{wz#<rVy;OJUJk48Acl>&w=DWSz{^w8s`hU+DPH#MOyxf2B
zqDV%W)HxyoOJ~e@+GKZkTl#|B-rJ5owT}e0>76h5=-6>NM!5g5LBXa;{qlA<KEAk7
z&ted&8+oXE+V#>>=cvGwJ4FHuR;w<*aZxc)sUT7JQOuH<X-hv$Fc9oJT(bGLeA`3L
zf<ndW^MRey9U4!x-l$q7wDQ>;Z_mehGUboA$n!FEAH8sQYTHpoxt=3w4xAkv7FRpo
zw^=k;oOwOz%;akadn9%*cjRsnzc0OtV_T*F8y#N9nW0a<l>B1L^w9|0)Hy>-QQa?7
zLiq7RgL`-TO{Tfd(|fIxk}}Dm+$E|f@<I!{;mqrwc1-!6Df4o}nfIT+gY$B^|9koW
zU;O|7d3g5k-GAR!_uEzf|Jz>l@7%RRmgkQz|6W=9|4T`fo(-@3#lvs!{(JXr+QZAI
zKmPq$b4uRpNPxn$(Ce1UOB1^`8z0Jkt)<(ow`kA(-c0qyzin-5f5dpnwLL7bcvHT6
z&Sqm7?}<}xT<H4Yy70(J>6R6)Y|NXd1hLAmUMUnLC1m{EbpCn%>6bmXBpB&bR@!7)
zrJiu}a`5Tc*2K9$aq)~T)^axsE`2Q9E^RUK@1yzkpWcZ-4v)9}v%CKD?DuuM`B;Ac
z{ql6W{m$LH*{5E9-2SsDX8O(jZ%fa*dh01xM;2Q!ABmO!aI4^TyY>$~{dceQP4}wB
zKW=+&pgLhwT!?nYr58o}-hAgS?rMrW@^xO8M%JRZ`;(39U*0;-`K|d`)aw^D8&qFJ
z&OMR)G4$=iSmq2?rL5yezuT>zUXU5B``VSsU3jbAl@p~o7bi*|4^y}%G<&vL80)t+
zx(B763m)C;Eq(Z<=%(LOnq~_t>)F>-EUq?*oNzUE?kscfi!PmP^X+DRT=Bm8We~Gn
zxx&QfClBU6dhN0|Z(i2*Q;csWF*tb%uHs~I%?P;o!op`I-{S4UDmHDU_Kp+ZJNCLh
z?cdZD`us?)#WKd?lB?P)C4M^aHt%{DbW?+cGkVeTrin^IAH?_@(vqaR0vh*BX?!(f
z>DsoUJ>9>ra$Na!PgpQu{afX&hgPke^6}JK$2)>2ISbdOGaYX8&0px7Q7RNIf4A*+
zjCD-sjf%dE3#_GAruf`j_BVDB)3t8BRrl&z`Wf^h-nrT+uGnaH#l6~W&lg>L)^)C0
zQj0g19Pr7C6|MMx>z~z*kj2erH)ka+Xlm`?aCM0>>yx;5PizvmuF+3XsSxApu)u_`
zBG*sM*II7yLT2rW6D$)ZoSYyiEYuoxKXLwwRg=7)1?~6|+p}ZUtPZh2Mb))w@6)bj
zMBDE1`?SVvg4h3J_BK1#*)2~0EO=Y&$5qxR4}{;W)x4LTnClnF>&3&_;m|W_fuM$>
zp}?%mCq$$VO<keua$;5GlB)Xb%Ud3L>~?T%U*7gdG4zgg$qktu*IvZ0U1-gfdiAG!
z;rq)~NowjBT~>(-{|=acQg+SM)TO)^;x509NyufCGH@t+v|=ISril|p^7USSaOAzY
zjmI(GZ(C}l$+ghj`!646?TnXiytv}}!@p{m?@K8=nXG;<I&ppK#Z}SfY$wa^GUgp#
zmUmtFkMC;FEo;+@qqLf)Ou8NuwthiQv(^gDM+`H1+7JJ|JKg^0Cx5$Jw=<*8E2~=E
zjtScuQlZ14Hp|<<EL2;5!}IozDG9Djxv!S(Gjvs2bR^S9vr%{X<=;xvIn=)F-hWJ}
zE7SJSRhiQXs&6t{BP~7}7#LPx=<He<DJZ&Lq^GUL=kVEw-^%`fd04V<O5|>hnR>qh
zxA%AkvM8{MPFS>viQ}=!8-D9hS0+)7Y(6uk8JbT*L-cG4q-L<^$2Xkl(&=b&aXD7^
zCBUVHfyqkNW$UxMZ4$;Vjf?s|1n2T_aKD%P`DMwJO>0<1IHz*9C(3+#T(bH8`(=d^
zIdc;3++MXj)ojTN@rFkA#Rq4!T(xLSoqK-z>6#;7E<Wu0a@N3p>h}2hAHVniT>W_W
z<8b@Guil+K?0f6~NpXA61OA^|3mgu(cAeBpWt(=|$KO5SM)IZK=D*v`e^0u8I_3(`
z5gvtj76;3>8oUijVFy;lGi!QVPG`FO?(EsKM^>)+S!AhyI@Mx%&?kc>ck|5o4l6v~
zm8LW;XwL%{emOTkXQ^X`yv}#_d`(telqj)^HRkTcy|>@;|NdLKfBy8FKE0=dE~T;>
zA7prYd6mZwsUw??wPu`HvPjig?{!^yg4{CMzi-aJIO{H7`|IWp`Tu`+@Bh1+|Nj2J
zTf6=}QQ!aL&t?C7d;1ycQ@jK<!-Q`)>P-K7_;EXbzrQ!ncTt`;L&lrC);$0Gv}oq@
z)RQ}p9Af8LyerJj<x32+(bhd_DspXtg0833;@8@&i8|b3YO#KG_a@E&9{=UW{QZ6*
zp<j8V9sIaf3%mJF<p}m#)23wjuO>{QF?O=-ypx}{C)}R#%QPka&Zk?EIY)dp2rXOi
zulhseUaS3)HnZ0Y?Of-y=9Y$afW|eqYxicFv&4S?``Ap!UFow`t6<C==LgTC{N@C)
z9IH+YxSM$A+4qUI><i5&h28vpCeFuo=?t-{N?+rDHE)@18}8}AA7ohZZl&p}ot$N%
zXPGDDN;AmwUT=E0Dxhz2xJ!g1t82_MSJR^>KeW17sfBkvSbfBUk=xKrYEocmNb9Wo
zp<N3EW_2k{i%1p;y;y%`#zd+7F0uWZx8j`}4#}t`KM&aMaHGm&mlyl%JO%+}rPcnv
z`?BoJb4^?4*sQ8pCF;25OEhQlQ~~)HGw+E7dfV22=Wv`Achq=i)3GgvZ(~nB7u>}2
zp#J`i2~$smES=}_!ZOPxT2-vR@XvBJQI4lG>}>^jIeEAA=U!T8wpl76VWU+yZ-DNj
z*<CtTrEeE4y4o>OByb`(mv(nof~H2(lUpWA>87F=xI~R=GwtvGXjV+)lik7WnI(Uh
zY3c0clAs-DNltzqipu?8lKVHfhhCjxG~I|}y^jln!m3CwEtRe$2i@|QKbML8&SIa>
z{V~4(-L69^kEWJ9w`}4uKUAk`E>)Hs(k-!Ohm7k{o7+-TCfs@4s35rD$;Ly^O5&B2
z%=(N%lsRWzh<W#B@{J3}6DRMRX*GYpt4HIi%vGIS7vBU;XZ?CAuhM&K?vv$KJJ&y+
zbfl%FQ{(DO+wyx?)oL;?=wv&Ym6}ZL+?;skWzw`PJ&{F%&M`}bnrm-tn{1f+MOszv
zVatnxU*CGV?pY|DdtlkFt1i`fnEPtWkJ&28+)s0NO5KoA(>+wa+w9jJkqXJhMWtI8
z?*GXwypw-(M@Q$(9qYEWZJ8}0k>Iua@WXfizCEm4zGKafeX}nc7)c(Safv0WD?&^&
zOX7N@=#vn|hSld+Chd-rT>Cb{UFp*#9n~P+>8CT#whA?LS*+zaK1Imbk$0k*LQ0C6
zz~P{|2cO88)Jry&lqveK>}G$JDYfkO;j?f5mBfgh<cd|?uvRlRwsT`~K%kkKgQ>vf
z%m;}+nuV!M5vrHl=lUEB=}vjrSaHBaO-^{#thFx3IE60mkYDz<LDQ2_H{qzt@lTs|
zAHQOg&AQ~YW4FzPBVWAUzFEWT_1NU?x5E$rJ^S|W+p}i|d_0kTk^PbWt7A4jQ*sS$
zWSAt_+^Onz+9M-MN8fUNw%+L}6Wngx|8Un<{{Hmx`?cH8eg5|?t-ji0_sjJ8e_dv>
z<Zdf1mtth`G!nT`IqxawdEtXwwzv0xKV5XLGIn#L+Wqa%Bd3RaE5B@US83U%b^Gk6
zD}A<!FPb*xQeyq)+Y4{y-8(z|bl(2x&rJzuV!rNu=TLrjUVk7b=kdZh#|k|>T?G@*
zu3pt8V(R6*=tzr(;N6DKN(rImCOW~ItRBaWF4_dl3$qoQ(IBLhZo#a>m9?TRZ<m2m
z@MEQ2>sW&i9kzJ)b+YE&J2rP@_?Z3c|Gu}Y`TsV*-+%h)kI(J@-#nkk&;HQS%S^CK
zUHw7>+pmK1-8Es?V*WJFy;M-Wd;i|svmb6+Gre+O%(Ti#z3D#>Uw&M<FXq|L8oTq)
z9~W6pjd3{IW7BhvK{d!O`3ECI+-!UANk+Os*K++6cIdqRS>%^_V2Xm%qV~W4)RumE
zRwi|<^YV;}55}i-cglwCGvuw>6mR=|jlMwc<A{9k9|h+YuVmv?fA%F{vD#|Y*foW>
zLqoSrNqXk|$7;&XbCcIhuI@Hjou#?;R8#xv3D-IfeQ;V3m*&88!o%>;akezIXX`i;
zT~<|CW^H0#z4D`@@WdH+yl+mh=X=KeKZ#p8%jU%@lYflquHIh{zFzWL`;NVfNP_LF
zWWW5kyoXOYtz73Ze_lbg+avF+?eg1iT~y{g8EBE>5vHDz!FDBwG1NCD&C!<CS?Orr
zU6-j3Jg#L)F-?fz-l54oW3tqS>!o5FBiH?Re6lPp>Da9FjukJ~sJ#8;!Wp<SYyNG8
zopEQ_xcV4)1^1b4ck4Y>xLViovFO!;sdmCWW(#8iWMYF3u-q+>KU9%p$Y)j`+Iyn4
z@qV-Kg!3J`w>GaVX1eA+b>7QXcM+M;Etj|ah}|>i@|vjH^;2~&U7F%-m?gW`?KP|D
z$+%_9JVV&{e5Pb*>I5yx@Kkf{<`A6dY<x9DcFj&r^8)P`8*Ul=Zdsibz2>9u#T`|5
zPxNhPma(W@cz&zx2NrGv14ZSuB~Fi&Voc8}C7)a>5-28YxHHIe#?e34Y$r{pAM^J0
zKfA^6%(vqbE7s^Y9)1w8@$0fuXR#UEuE{35GD~#ux17|~@5^Cz(pJn1&<~wCML|_W
z;roRJyMGG4N_$r|KVbeNwpCXj24%ilX?N;vRpcG^HQyioEBmS}SS6R|m6!M^SNfG@
z)XC{f6`fbBG2AyOT`M%H+M~na>XN5P3NB4<e=BM>3A|x5zm`$G<mR%nFF*e-D|A0}
zC27l|uJ&6QEoTqE{{6!BwN>Y?rI*bjv(MfM+gkql$1>rZxelrqa-C;~_cQ$u@9bcd
z@e5MP(^;#&Nccf0SF?b!VOQEj)#+QOEIqO7)S-^c4-Qq{Y2sTH{PD_*U7wsyf{$Nl
zGh8|GV6C%Z&tn6>!#kAa&IE8Xbd_y3cT<vU7i47hK6leqEbqOWOX_7mrEv8>a{Ad?
zX>F}{x~x=O7A4xt=6yHceL3?i>wHhe4Vv9iO`0=0vy_%@%91>7urqJPlu6znkN#Er
z;pBg~ud?mOxi&T@CX*z#G=~tcvK{v?>nMe7$_hIdAa*rr>I0E|d_DQcrPVKGy*CrI
zUoP<{aGUh;*|Y!NJ$&}?bNTvzCl4P^oGx&^vhM#ollj;AyJfodT#Kg#-TeD%XJWEc
z%C}m*nF;$&JS~#@wnd8T;J;_j4DR-noX-qmKAS2iU2fj~+xC51lab1#BE2H3siGSj
zL?^lmWX9ybd-2ak$M{t8v`31kRy4eJz07d=tOd`CBrU#68%rbmrk{SiYoBInz8Rm*
zfhlbB_%83#Q(gah*Rz;uoAgxI2OUUIcvfijhW~M%OuGq>rs3j+Q?7>oOWU?CJ4UtR
z-yQ+|B^(nqe77inV-3-)ewG=*7kSR<xzlkT`EyU-YSu@5VOp{8Jd3Nt>D99R+AkV@
z&sH)&GA)R0N6p8pjMwHL+VuZ){r9`S-|x4!m%YEY_P2Wdr|$lHHB&4sIp=UN9`SN)
z`D<|ZP2M6s6{TmlXWxB$He|UUhldT%_V3>x=f(TzO+S74@#Wv|_x=4=zW-<Y{Xc*6
z>woj_|994W`RQHz_ujvMr{m_!lDO%iTb{^#j%q64Pbz*tvnz#r*5+W<EX(G3W(o{@
z?C-pLP*83yn|C}>rjy-yO=fVo`Pr{4FBs2T@3X^D#$}1G(Aud@>l7DsD(!3G+<mM5
z@pG+3OC>X1rI#-@x*Qc%mCGf=`+A{g?ymUa)^*0#P7|69_dNHKEfFhzw97s1nVq|>
z+|S$}@vTQiIvu%|<t2X*tXuLo<ctSvpR@Sk%inhOyix58e!T8e&x)@1D|zD%`uc}Y
zTq=F^-Uj{|dxB1e?bbcQr0%9trW@3mbZ^00QJohz-@afzZf5Rf+r+3@yw$n<c98s%
z2E`Q;3o~U8UR$lkv+rNh757&Q!se7Er8%4l<#^4V#K<e>Xj^=dp~J>Td`g2rXtGVx
zMgC${`2_vi&3dY5Vxm(_E3fU^qp`wq&uyjC_4=j;d~8L>XE)4vv0MI#;fi%pUzc*W
zaM%@e21zG4^d0A4=(xct+OkLYSl#N*Z3ZhiPOe<bl#_SSmo-lHN@>`hqdQkb&VO}B
z#9GcGhfVME)K@!#Rr51LPn5rP4OPv&s1(#CCU4gHWYN)?8>2kMW(#S$eZO)|<jT}<
zg7Q(752hY#Z<*4;_N^pS>O{6=()*G>kFRJ%7J6{J2%o&_X;6_|(K*+h5-V3Y)~mPv
zTV%B9zz*#!&YhDl&WICD;$6FO@9nDrytORMTWqd<`?JbFc-5};ecP@%?X<R$SRvK8
zy0Bx-zAR>zkSj-gxLlsN+;PzS@!_`j)?Fc2b)(l#RM5F+YZB^v;~n!aM#q<%Yi?DY
z3^nt!n6c=Te`(I4e|u8%q-9>LKAYCuVIL(un`^DpN?+T5w-^MsEIMu7&J)ITzId_u
zS4EZvmX+?;cLiU2{@r($$iHbBhn25<<9fg0=B_L=&DgU~=O0zsa(mN$`}+O%{Q^RQ
zD?BwVr^np-v-{rex_hfOHBKqAn?HZT>B>0$&p$V<%f1#Aaq{c(*C&p!7Hqy{a%ukU
z2FI<DcNZQLi;7+&V7&b94(p!Zhj+}rG+}}=|1rbO9s4-dlctOPjJ?;zd7bfi@rf%o
z!Rs%*;1CROz3Or_WC9n1vhU+}8}8qKx9qfyIH$zytvdqW#{PL1rY@>^Qy@3ZV-C-S
zsf^2>Y}~A98gSTmK{~^Nq@WgVW!GC$5?vW_p-Z?{bW|jzy6LDX99`mJ@qbJ7l6Lp>
z(;Dj!+O}Bq9DZ0(QL&@?^U>q;|GhcA-mYp#;Le(gDw}or^3ThcKiznPXT>WG+pmpN
zn`asrYBDe{?%Bqdy6kC)imJ<^i4uIjA5NZqcX|8Yw+20t*B*bYnR&WqX7I^OuWB#(
zxBRJhA3vRBxOr`Y!YUS{w{I8wf6sDt@pBIh4KP}`=}=7+W8n;riLte=DN3g+>uT!%
z{VQufQ?~oIdYa#5175QNyPKQVJU7+PKIa-H7%bh%=636S+4kRs9(w%-rUw$;PiXc&
z-m<29%A;~s^)`lQtLF2bnbIa={B};qj7pK6!LxeLbKhB-YUCX0`-LSg&2U!YxnEy)
zMBP-C{`5AEH8H~CWnKM`AB#V#$G@Gn{I84ry?wtw%#Q#2_h-ewlm2zTex}dAdru}~
zj$7K~H=F{@OEYI(nxJIZJ3;W-<&_-;Qf-A&#{@6>e9GH%_wF5`+LTqFAB(@Q`*ibu
zea-iLpT7R*zuO<~|84(w{{Nf%e{|2@{rhy~zYh-&fBaDb+I4^VWkuDW5C0DT-v0b^
z;3=;2&XH>*J1;%>K1=zqqbS1-^W=#=A?{xzJip0m@x?nl+O=dE-<10fEIY0~;A%X+
z^2I4Nzx6Zr>iyfw`8C9K%jR##wX+UM_r7vI>>O3;tNU|h<ArT=rtC6QEn%4zy)x$i
z%cu)eF0rNNooH4#bL7vx<28H-3vcvaxt@|0^@?qgS8q<Wq}c65E?0pt-i#d+Rwkb+
zjkMqT;>e8NnL-nPC2l|IVBY!WYu@(MzpYG*H=T;S`ghIxC8j^Bb_MOd<J`iL@uXGV
z#bn7u&7xKJ4oJ;@(&{w#=Z`;P&I&mmbpgVMLVt+-m@>&{Go!GChtc7<#d%8>EbuVw
z$j<kYKJ6}%5+jr$Ew*1!^ObqoQvJhXM>}Ir+}-x`o|XINwDm{XZoM(G);?@(pkH_L
z%4+pB93QW~3ZE8`GCSdpi(%^$-7b#RCbf%ebB=24N)%e@oV`ufp}w^xCr|kvLz!Us
zN3qro>QAEU{ucEfTKh2~D=X{K%uiZTp_z*&9X-1D=w;JC&(D4C=9{qRCD#@UlXG3G
zAI$hWzhOtI7NfLl(z3=&4mJTvM;a%UzwiyN&7J$<&66doj9!@Vyte4G6rZT{=5e!j
zYJIG9!fB!IfJYHBTTe6m?0VPVVE<vxn%Rr8#aEXmB(4cMvGy3-i^r{2A8Mw~b=|oB
z)oVHK$;JYc8>J*)edW(8_xgL6YwEMhK2IV_V-mEKR(|%&UHV-ut17Uk?bjXo<!3aP
zHU`gr#2@&<y>oues|a7!M1@Wp)z{fcd~bKP7MzUn3)$Ysl@j-;y=O(1$D3E8+vcX2
z7qcrAos-?hcRZ^g^z{GkReK-bvJ<{|#n<QloUP_3&1QT4uUXsK-*JsyWzPib()YT8
zv!=YU3!SJH^;U1W)bR~^uXmTfU*7t*DC+$4&p}>h0Se77GiKOa<ykq|(wuWy{1;6@
zMb%Fx*A`q#TKw;WK~MYNvxm>#J-g-pQboqVCy@d|MxPI87Or}c=i%&iVBuBXqq-pv
zOq&*Vo6P#FAnyK3BzW>1)*0+l$6kIb_*_t3y(^|@r^Jkk8(fRZD|i(pR1yR=UT{5V
zO_Zs0Nb=*be9?IO^Zv%%wT1j4Yf>cL6doz|w&-;0Jl~@icBP%|z!|Mq{#ElmXGn2)
z1P0vx<<`XaqW79pqmH0&@-6?vCd(8z&Wb&E`gZy8`+IF`YoqK_qItHiQwdmo;Hbl`
z2Pat^*e2MXTo@DZ-L1t&&8z+H!^B&*J-ZJd{%b3I_H&WlS$VrX`{(%oG-7_T_u}6P
zTQ1x@vO=LFQc(NAp4|)CXBO==jPN^t)W>JCXoNm1!vr@MHU(X#Ij+x6fBJX#@^wxx
z&DXPU|Gs_r?TlkV*G~TgE$W;<&+oZ+Tz5y;v^m$eoNLv*E5jzOm~!q>qY+Do&*emO
z*|KMrv-Y{UhX-`JEs*{_<$#an&&HWcBt9{=?`FR1e^^m<nepEEl&jtsb6yl_#)w;V
zzP%jq>0pauZ%d1XA;0|Ioe%AQ|5)9BZT<eg@529o;P3aBulxGla{X!X_`07j)8}6_
zSYo)4cb$-5k7kJ*C#O%!Jl9i6T0I#)lXCaXX>06AndNpe)z;pw?)TT*_y0b7`L<m7
zvw!`^^Z%y*fBOI1{XeJw*WHqriQTXFde^$so96gE&)@&^SN^|W`u~5e|9|-Yzq9fC
z|9?2hXlJT$u|wd9K%qoi*ZN}#CNrcQ1hs?s-xkbXkZW^C@67h<CDH7H%cbnE=y=_T
zJi1NAmpgS=>D2YdLqo4`Txn@@`s??-BJ9#P7dE|J9bo$Ex7T9x7qP`*g)F^+GP8b5
zNHH1iI^J!a;CJ}$b@$q<dI~>3Ufjo>p1N|DSn<KVdFwCDb^0jqnKkc)_05N(rd#JW
ztXfbdo#-|pOvWZd`XSrIn!7E%l71~3ybJC#Z!>(9rxUV|@v~xR^-Qyz9lcA|uAP2R
z>OVhYV{#><kDgLNg5b(kTC2P(_DtIPW6y<<xyiieTlSpy+Zy2Fru|~VO*8hy6`I=r
znjLOG*?Lh}_fD^aU_*2Gr3dHEX4xJ(lDq$^@6L&mcieXWT*13dX5}qGlg^2+g9SMP
z4%+Gkt|^cge)!z$uGf#_PILG83U(fH725X7?fHRolX!g}3q6{0c23m6Ng}fRHm}8Q
z&slx;!mZ=C`)uZK`)zzMD<OIE!JSu5pIo0MWWICOiZzqEJtqdG=uX=FBs6TgNQH9F
z!n$PzLg7B5%a6=i^iYlOXm7cS)XaUH%6_-R7n}=8U$V^jhSX`TXwNgVf&yjR5@kLa
z@Hn@4b!%o?WIEk$KEGkE?1>H6zAwzW7{1Xkcj`669r3R2d5eFqtvjBrCfT-SS&=DY
zfbYgUqm6NK(XrA`V!H$sOq>~?aFtzsSDe2hIDAw1mDr;~-CkK0Eov4!*4ezBB^DyN
zJn+uv1DZ=sDhxg6yjnc9Gx?#=j?=5`FX#ICU+BNbd@ZAGUW7~5nya607X5p4YHt_!
z!8Ez7m$e?(8)oT+ELuHf<)y8+e4VdOx-7Y3oka1*`#E6~*FJiG{L78vORtXipAWT+
zHHh2&pVQN|NH2Su&_0tl$Bp0Fxn16K=KcM;2i%T!r;BEGFDtZZ=u_r-JM&-%XP}ve
z^p-u$T@5*D?_ayF$~=G5>4-`1<G*j;zWtZxb28QTRd7L5<7JCJXQ>$+K7vy+5)Msa
zlRvo2PVIR{$dVa~D$fdKju-mO;Zd*-DxTNP!Jz8*c)|qtG=?duvC~TfHK#`RG-NE|
zGwl8sC+O)Y+MJ%J#o_Gr_{5%Rb3$1aVq(^AU-@0^+hyP5H#Y=oX74;<6@Bf&mj*Eo
zFQqCaAteb$!=Gx?1#Lf?UrWw=%VA~Ob#m7E=iGcRjrh{%-K&pyv8Se@{?EJ2%J>pf
z7t<nJXG4J%EA3t}Iw^Q5Wmu`~N&dNFPN8JZoU$`6&QkfyGR>Eod@|rWJY(72f8WaL
zcUS-U@UY10oUw>UwG8)L2fdwZx*{`mcQq+0z0dpc_`I8gbDZDutsaguw%n-F%sf))
z)p>l4tE<n~nw>SD|K6^-czS7##o6||g+7mW>3x1$wC~LF%atdyze;+a+dQRaqi9(~
z!rlJG>$2_i158?HC|T-V&QA>87#6ZhOi<-fW2Q&v<oUmsN?gr(&hj`<V)<Q@ckFWP
z6;0*K^Sw0P8d5k?x?U%}<_X}_uoh$9tFCCY=WhS*^K5@K{ytH+|M_RL|Nm#x>%Z?_
zzyIgA*Y}_8>6qqk_wU7HbN<T)hD_B{6CeHJW?$j9Oj>D$M<nOvBFoubGY`C#oiVL6
z_Il;MGfPkW{d@R)-9PpD^*>dYJDwFv^wQ*gnNS^H_4nue|2O0R9JT-bb^X7m|9}1e
zSO52Q{onQb|2>nw)eyC0-uliCm#d4VeAr60IE92}S3i4j%eU?Pout_}s;kAWW$|wJ
zIn3MBeC^7|pq09IGs=bid>FG|E?KxOe?sgFR?D!>d52x3i>$9VHfS$7X!TS2+5{&D
zuDvOt)d9ZBJYS5Ak6LZ)l5dDUq{a}LGGo)#<`Y?k`e*H4N1QnqtR2*Nuy<|8Gv(9o
z*vss<X)WdxIT;nTk}*s7rRaM1zV&Y!t6v^WyPJLGk5s@d`STsoOLTX=x|+H_!Ql!2
zVYQoI9StOBY@9hsh)b}#PA|tvbN<dsD-p$;t7E?8n#8d`6!l3<J65W){LH0&xz2}I
z^xSSc&JlQ!V}p~?Yo=?;tM5Dvd05^%_m;x)z!T2Xl{R#4+O~6l=vCLx6+ep(AL#yV
zkl(GoI%|%Fh+T)l^<TH08E<|OTzGZjy!G{t*&6;g)(g&ymG^zSsqpD)waV8Fi(Y1O
zZj?1zCieZBt8<R#ZryX`!be!`7yNUX&EcYbx9m#L77^XZP1nCKexhUP&AF2=>&RmR
zVfAN?mnK{~U{fHqXj6ac;{CS&rZEZgpYvNT)g;Awd)tN19gnYlt5mRQEw1x8JfqE2
zRPT&a){VHIYHs)LxvbI-Sk)`PrNH)9gjd1ai8-9xb9q0U`r3EC?O&noL$Q*Lu`z37
z_a95Y>9%(Ltjt7ZS7W30nwCs~DGXN+Ij=Nawf@o3TNh?_EDSB0xx}~Te$}+}U4PT>
zU6{syV$Po)x4sYg{ij{kx}VG4GTYU*dd*{lsSc*$Z8Ba5nP&!w9tmhTa@paa4bRHz
zEsoDuE#*rRIa<5<z_m9Hcd8!rhA_%{u540f60JU3ezz!8V14raLhqm(OCrLzYdt6}
zk&0_o%Xyu_K5LQK*_~^$Pe!UF?O6DI$;?$#b+YZ|`!yRxo0+=KD*JudWa->x3q{Um
zO0;{rIT~w3&h@HfjZ<?-%HH#IL&I(biJ6t=Z_6GZX-L&w)Fu{n$gx-B=c(Bmf>Mi&
zE*dP!oOP_w;YiS_U29Hn+M!{+J~DJ>rq9a_xBVY?bOfC;>iwK|YG=nI6VNjF6t4Ee
zi7}t6tYnMrteRBzIc}V05uRFW_~_F$n{-JF-phG&uI))He7hq|Gfb8zA-2qY6+@FY
zgRt7ui3t<uxE?It5+b`-f$hMWiE6Tq+fO(+D2QGX=wo0u6`$UDV%Hk~<EJGH7;3gZ
z%A2v_h0XfsIjb9IM(70vH~46I#+Xi;bv<Y20uLo?m1+gWnVS=e{<TFcY{~TCis0Gf
zE4932$IbJgL&*={es6mIcu0nZ#tNm^T;fYbIXi`xC6-UNTllhI_Pu?!brt22#euBB
zQO~D#PP}>ETr_%GjZOTTC8=A>{l%}lc<3oj>RzYn9njWu((B~uqFpCq4CX(7oG3Aw
zF>Nu^ifJ4ZFL*wE5g{>suaIR6E5or&mt!V;@3w#ZlfkUJYg%ZK=VHFciS^rmH?cEK
z(peqzX_2?CFUJ%|wk@`|ZL>E`@@xn>HnH4hBVYNl?dI*jci-NB3v@EP`ThFuUw;-^
z{ePnUEWT%c&8FMFoQzv$Oq1B<wr2Mgov>ixZH`M;a0v6c>@M^<a@pYS-FyH4q}N;7
z*8I}{_efvvUX6)gu#}sh$ke<aw@i6U>i>UuUjOg;_V$-X@&Dxi|B65V+K1!T!m}%@
z>t<<IWKCQ+^O|Von#%Rhg!CC^USH(>;^Jf8$-HmBHSm?KOp5-v&#^G;$I_6+tHS<G
z*y^$&swLsK@?NpkEx(t^?o0ap;M-iw(6w5{d)BD$PmP^?I8^r3`9?m0Dk)~8gn~@Z
zrFYvW#A?eabX8vsGG1Gf{54tG)<pV&+V1a@`Lo%4CP%dFJl*v9#Uq()QA@=v_lTGL
zQeC@tDP@^&JQG@(J9j;UIZK>C+Z9m<#fb_V8YU)Rxgd7%L)J2%lM%jm*%E`+=H`BU
zBo)c5v_UTTQfksQjrhYKc3nHR{!+SAoIt%{Wp-A_n@u-<v>nQj;jj8X_hHz_Gt~w2
z)GXwyowKg7EjC(Oc=*JDV@;vExvDdk^l>d@d-Z3^ttQP6UBa1LRX^JI?)|=6uWiQ5
znMP(i7A}-boc=_TbC2okmEx^iisWYhVXTr-iYffL#;S1T`uDqcsqoyBzN`ALTVlqn
zE$2MWcFmY^{d<r~_0;=yBAQ2+L<rvhar8{xgC~+tl|Mh|yQy{Bar>8FlJ5?B#64>N
zdPDvDt&HHpo!g~m-RW*?k$4jQ?Cgh?%N8Vk^YMJ|_o<g#^1aWU-_K4zT-(fAZTfiI
zW0!4(xpUQ>pPD7AcND*~_{LeDTlD_Psj&K_mhEi;%MJ5nCQEL5#j(TL`252){tCu{
zpLc6F$!*!Dlea}c*H!hP+O3@a8xmixool&e#ItJRO0}>;*+c`W#_Fa<22au0wX!Y$
zb{Njz>nmXOMrvJJRsaXf0tL<)3@hVhPL!nvWc+%TzDJ5-mDj(HdbZuCboShuv*mnE
z_U!1%A6D^STNkiy{m<2%W^PjZst$f}e`9KLc=xZMQ@eIK9bYLdvEF)0jA+!dprW`N
z94u!o0;MYNF!!(GaNQiS<@%fhJl<mAX2+K_b?}6CGnphTU0;8uLzq8P>axa88(X`D
ztE$CoFBtGVp0!kD;@-Z)GbDl~QdaHnRjz64Y%_V|QgrV1u33?fc6=2!7T`bg*kGOI
z?1u)uhi9}gGM+ZMu<6YE`}?iuJ>SZabbjr{S=VpeOD~zTq3hgp?#EkZSKRVFy{W>_
z#31i>VfQxK-*48OaCT0SE5E!W`o|RS3y!b$wsR!}U73Dr-d4fzIU2@=ix_5EX*M17
znApJPIeCev&f4zSQ!^hZ9T9z{9JcA%nxrh<%Vz^Zwpwg;m=<~6hvV_T|EF)yxAiVc
zc{%q%WXRc?-O+CtRW@x3n{+61X5Peq|K~qnSbairlf@LZ?9)CB8}Hq{d)DXc9Akd>
z(6fKsScF1d%~VuW7+mgrc%8SbY})(ZwX4&gKhM`M+w;!toldlzn)GqUZB1|Zb51N;
z(Zp~jar50{2VP%&^{D3ikAlj-KQ5nr_}6)#+E&I8Uxq6Rim$V7e}7$9e9g9AbNlt<
zvuE4%CEfg_BErnHa%Rd&&&?O6_<mD<7ku-4XjtsV#$D?>ulP(!oZol(Yr*$|;(hn_
z*zB*`w>v8Gie~h*Im`ZjT%n{it2k`#V>@1*vb$-!HQ9N#-8r-9*W6{962~k`v)^7%
zU)cYA_t(#FUp<@g?Xt!yr>&gck^Sv+j^8WjU8LYPVMb4n?BP9kbN;>AZl6;=d;jnD
z{r~g-|9x-2@Bhz-59?&q|IFV1^X&KgZ`%{E9@YEy;DGw!V+m{qLg$|{b9HfCKJlCV
z%0Bmrq5JE;y?Od{^Y!rjy3fzH*Vk1Rwl;R&bZ})o%i+9KUqW!*_0{`-ZvX%6@Nxe7
z-|_!G|M=OVy0%JgHN%;iTU56m+;Xyl_j#%DwZ<PwRUe-IOy25xv~qgr49(W^PKDym
ze7kMsXT3I_`=zjd*_A56W%29w#=Z<lv5WLwkWqA{d)cR+!;=N%BaS%#+`uu*{FH3$
zmK5XZmzU2{c@|nNeb1O-WzyySs~R;{{OJF6Xi3fPpi5c)p$0o1a0?1e7rJjKbac+v
zjFSa^?mGSAK^In?RAh1eJbA{1EkYiDa%UD=cb|EpbzzfPr)ji<O17|3_@9J|O;LLJ
zi$s~{|9t1}I`N9Ucz|$j_w5ZK`@T9CoovvR*<@V2bIpurjvr&Rv_<|cSrM7=O~Icb
zP2<U|XUn%aeKgOH3>4&@n-+YXv!P>&=(TrqyG2CT+IChs&fc1l5t06R`I{+f`@j0O
ztLywK4VwP?s9BPas_dzl08?gfuQ}^iF}fNvI!~3kX@Bs-B&&(1su;W@{~uA_`(|#2
z+e;m_=C7aB4ous0l_%k}-o<%KLZ+9^e!d~g*mOeJoz;;E5iJ(abu4vP*POjsbZyOw
z-mt@po6cWz=2^N=z5m?WO4+c<4Vps71@@SKPunSRF1IZq+_388i%g}g{lzg7XF>{e
zyY72r&pPU}?StzyruFx(TRgveYx|~o1(v_^Y^F&qPFub?T1V&Fp=s-$ELilsOj_ZP
zQs=^(Jpw0VY;NeKv`-fmoxA+OwbTiT%F~msrp~O8@IN=prReGXaK)Q8%i=4z3ocq2
z88I8Pv85?6sU-)ne7x=&_H2(r(3;<(2Rl=Dwl^?zJ(-{VSa3>4v7rD9<B7IM&b2DJ
zD}}Gh#LZf&K96IS%97M64_4--%{<+}ZT!p0w%>o+5_Na=N8(|(Ccjx#A9?DjO-*4$
zm6zuxod=e>K~mj@t7drTvi)XgoaN=^EGO6h`)`4a`pMJ_jVB`hELd7vKGjk5>B^Z4
zr>zs<)GTm0{P>^62|*Xm2^u>Z)mjYtf~A%}wD?l{KH>1yUH+EK=j_&OaR{)uc;mY)
z!-{X0O`e7{Nb|9(Go`&!Ffdjq<k_e#@#X8P;Hk4;pTBNiemv3ZmD|m#FHv8Ul_!V=
z_jRgWeRG=S-jQ|DYs2L{BFoR7?$0wjHCwCLU*c?m*USq}$7MQQd^S3=#4_b2Xfbj4
z-0*Ype_T9g->M`AVR30G*Vor}?BtluoqXy<WZ}fylXTUmF0pK8(wh0pZS@NU22qp4
z7B{DK9RB%v`u+WX|Fx&5@9E^U@cmg)U2pDt`Qwir@5&9uUQV2;I7v__Sl;5>>bk3I
z&0p_$x$AgneZ+m=`Ih&2e$LR1zIY&&>zbf}GsoVpyZ2W=x0$F|`0&(<36Tab&d6kh
zl{C-$o5Cx>x-P6JwX@Iixs8gX#ethQ-#vPh<0_jryDjnmj{=)Fm!}kcGBH<Hau5i1
z5QtsrbuQD##bcKCO%|V!V~JA73Vo7K8Rkrun(4Zs;{fZ{)>UecD{3k%-jttmy4d9B
z=GN2IBf~UHQINgr-y5M1<+b<!KDPh&@bL2U^?we(FZSyH{de~D^}E&uv;S6<5(-Y>
zGTnUgzC%ZYgio;4853W|85-}(rn@bCf4yUgQp<v64#!$=$E=cf;b;soa;W?E=hLG{
zf1W)n{@OBog@l5o&*h%(l^1JncXKdYn!?T6eN#!2`{aji5Al#$;Q?{4qc69bPTITj
z+N0|sn}y$Ii>&SJ;Qw;=NlQ-rQM<Eqquss+iTJ2ZbzZ7|@MBx|DY?@J7OUt7^#-^q
zsC}t0o^NJ)&RKfy;Y~}Hi9Jhpu#(nu)D~F5WZ!){;oPQGdW(L!Y+N*b#pi3TORhHY
za;QHoS<AM;D0<BeovYyjPj=04(n=Hi*g41K|K~M|GafrUE50);Vf9Y!tjNW!@@=iR
z;?yfr7rfcsbAZe1;AguNtyZbWUNRk=buEfj_CnBtS)n^^v*sEtj@ZrIP?YH~wSL9Q
zMKg_z!<o%z3K~vWJYTCiZK7eor;w{x3y<zx)xq=Nj!y=cfP9?hs?R_6I4;!6h+s2S
zJhSa(@cESLX|Esgn;7&TpZ%l#<r(=El>vU1g$|p3t-9-OQFmM{cm3quC)4NYt`<(5
z=o%wcu*&$9lxT|jQ>V+X=18#m-`lb1%|^H1@oVB%P4QZ-ek?0RL&UkuYuX%Bch8-(
z^7ifv7g)K{na@#eU8o&<czX52S4a0wTqz}5mH$xndQ4!(HO1_Ohpzb?`MmUbY30%F
z2?|MzbT+@J=sUw~a{Q$5{3*3zKAP|5#ywNiIekxm!%Y7N-%FQA?5s>sHx@a<y6R=i
zN={S#v$H&<UUO}h2)I-3l(6MMY9&X|-E$S^yWLnV<0|LAigOj6sI-R5chZ~e^`GWm
z?^%9rw}^t8)yGMk5{(xndISD4e7$3Bw3e&v$Ry#7FD!HOLTU<=&z8UM{H}T^fWb)Q
z*y=F7Wt;CXMxT(GcjNo(iN9H#I)5zX5xs5q=7?c!lGv}P&i_XqF~|SgryBoq{_p<c
z7n`5Fi8eG848E}H#ing7x}K|_^03(}9Std#d%8pZTmSLH2EL1yxutz>T4rRn!E~1C
zBbL=YVkexlCw`jYBFuHA`jQ2UT7b8m>qMQ*Q;icP`wwq?zx(Q{_q!#S-T8b(oZkFi
z_nJxZd+hvWn$Ked7lpM=@<_>WkV)!^eZyot;pp?}=l56t{B?JC|9Af5io8LeZk+zG
z=SK=h%*90-4iA3DDErmQ|CX?Q-X_sC$?4#hds#~ll$mKT8zeD2(CNIegMZ`VOre=;
zy)Vt%5+QoffM3SPW$#?qPfSau2p78El}gr6T-$iiCELD#mMVW_YY`Lcrddvp^k#9{
zD+ZrC6j`}-SIxgyzr*w6<Jaw6zNF0O=Yf{%+ka=;ba5G;-DbnWAYzij^u$9v{qCyU
zIoqFyr~gjB{df24zy8;M`?vp@p(wsag=1z5gUgpkspn<#@;C2b3*eo=*p$ZfI@#v&
zq+Dh;1<T(q+rM4sT@`2S?Z{iY<>j-A-8a`eKUsbC=(6bCLv1k{6V4iI?LBVH5V}+%
zY<1}U^}F`*%XIyap2AcbyFE%%q9<6YP4To~|78dM_QO+*Bv_a~281rLS@!VX$B*Cg
zI5ypFn<ujSLgvDQJ4|M99tqG(IsRpK_vX*{|9@OQ|6cu%FE1beE7(zA@#CTSef#};
zV%Qf2%_+S8)n&@(DXOb8*b-HInHn$h99en#PPz2qh7*&Dq}cj=8qc&#`yEZo@96R5
zYz(;mdf)rS_a7@6*k3!*JY#G3#%D8`7G3M{`S2}n_3uQt*4d06OHItSdbd7FFuiUj
zzqV7|?&tco8#l}5+~;}_^*>8FVcs-%*RZuulh_zz^_b7(S{w7Sq`B35^4i+2Imq6W
zYJ6kHjGi}jdxP$7-y{2fX7>x36FpbH-#NOKQ?xWb&|&G{0(qaGanCm`U-UWsq`}#i
zd?hR14Xx^3T-Gy}IonnhFp9I}i1<2{NNU~6G<|P$J<|K4O40$dVu{o7{s}LR^9rsA
zyXxS%DnBaK**VCFJCrqwZTHa!5l8rSZ>&$<f8ftslao8cN_><}cUkXTIzQs+Lx}{-
z(1niX8$;aAW~aH<bT@E$iJn`lkmdR{q<w$u%(sf_9xPc;I#d`gsPE}Xdc1Vo=P09P
zX`Dx!yjtWmqJzpdgnT=CAl-FSyo$v-cSo(KS(01R449v+iV4uwWnZYtuyw=br;AqP
zz2E41!tSE8>C&S-HxCNW@M&xm=RR{XF8$mdZR`G1+x0oiPN_9EXSAlxN}0MOa?if}
zWL<s}SFNAR_BlDb-Jhh%F;z-*_3{&YpTD^!^HpnEoB8hY>yNK+eRLOJ$EeUECVfm*
z=Ehz{^<bVg&t(*^ePmadXnK6{b5#wUq?i6lvHq*-Vqz3a1+RQ^Njs7?`=+wq+l9+T
zQ+l-*Oj7DK-Yvi9Rq6J{3-`N)p6y6+i;8q)*($3sab28mI&<l(z;l+_e5H2+i+yIX
zZ+zCU!1P(C_0E|+CzvEB8+vA%EO;Z;ExxMbq?Z1Ixg}BTuiE5%Z)dDs5R}C2dys`Y
z!BJcPhSbT=Q)Jt?&r3WoT2>L*aeMl)hx&ilas1f&zd~v1@yqK!bTj{~KlbC^)Gil>
z1k0tFr!-fG+}}Mn#kZ<JKzZV-w3976Cj>ce?m28CbG*<iudy)W)fw?ai`c&=3jQqK
zb<BMsr$P`<th}|`lqrhZQpW^lS{!?D)!fjXFSetRIWh91=d>QpU`5g2yF~;#IxI^J
z%Va&oi_cABU3HV;;B(gm2bsmi7BeS0CN*qdf3N2IugA}K?{0r+5&3nYw8+$D)#(>i
zo*Hdrb7gaoKHh25s-6C2sqrLDkFdF$TlV;WQWwi)a||#v;f}q&KK$_$4T0>14BXbX
zTe)v0PxV@@YWmH*Vh&$xpHusRgoG0@=@|=>GB?c$WOeawP2FVBJypRa%sH6lK;71z
zRo`+p-#w<0GIK(2#pdY#q;^Ml*=y63j(*~eynN<Ycj}|B%O2<5%`dxsHu3h|ww!6-
zvy3cu?o6>>9pt|4-6gl>%KVRBO1-Md4o&Eqebq`~wF*Czk;?tkj*={zQ)Xm}D2ktu
zlJuyOxYR%Y=?ppj>(9eypWd8z+(*b^hQcwulJxZS*=M-~g3e`0CB6B5bya+!xT9}U
zX3g%rtLx1yPaE_cu9)%Z-;DO!It!k5&*eQ#2|W!8Y@SaFZrs0fujbv_{@FJQ#gkhs
z=C9;vP*~V4H+%Q(x99ZFt=H54_wsxFpOV_o_y4`F-}~>!$MgUHoZnylIcs}d>BXtb
zaxQd!ne=(eBoCp&924eZ)0_nsNiPd-u$x`J;GSZC)AHE5*VkF{9)ud4%nb1S{(bx5
zf)>kji?eQc|K2`DPGjarr!OyrX1r=~Z#Fus&vb9nwjB)#LDlDeXKEMMgzCPoU3n{D
z;?(Mwx4T<89ZM&^jrecXT-GyN!egfLjB5thjh1RF&z}5}bCFZ5{*G6hRU#@=``2}7
zuQ9Xw$(|jeTb0GKK;0|6)YH}RmS<CFc7#vJ)x+**{(Q<!*|unp{?Xct>^}`Rd3;-F
zd1=Mb1&ei`eQfi+8O?6?G&_-T#dg&_rnM)Nx~{S)?ppBn%%?d~ZzU6FFP?qYNO*;T
zlKr|LE(NEIC+tv@x#D+2Gk)#uC<`fV9pSsLpVUfiooN5)O!%kEJPxmzqc7MpDXep6
zY<^|8?Ut?xtM#-AYq;8URw)GNZ$JBfnNa4_yQ*R(YtG8=dfr~?>KOGoGVPODlWk=A
z*@NeA%<5S7-JK!SK`+lQ)Y<qV*Ol!~@=E_zPCKr2;_zvRSZKdbE@p+smS+pZON*bz
zUXxjx%DU@F?-rNXn6mg@j^Og7uNT87HrTm*&pwe~w{B*>V>65Y(&^`1&ZPEn#Qb3l
zu$cL#NH(HC#@Ep!G{$DZ(ywy@7tGtn{Z{7Vm$H0W?lYSw{Yc>3wPn%u)z4az{5Rd4
z#`AXh&62p*9SIC8UN|qE_S^XNhJ7>k>lZH+cdRZ8YQCjewK4VnJe7$R_NJ3Ry<T+M
z+OTNZs~lkiVbeoe8Y*l?U;ZeZZ_&P1nDdGIr0O5_)U%B%EvIv5Ec@uScb5BuoA-EI
zy&c>_-26PAC0BpS4Z5j0^`ULp$9?>dycyT}il>T-EOd2w=yC9rKw;Cwvxa=!W-X>0
z&s{$D_2iPf`+mIl_ni9U%Gyu8oqu1>dlqlJs(#hGRe_pmvo`yjWN+TQ>5|w*nWNj|
z#A>)W&YTeB{XC=Xae>70DVsZ)uY@doDzs~Uy&!YoJgcs^f(J#CUhKMHXe6zb@ZduE
z->j?a!&isyf0guQOY;dsgW`kVm<*3e)U?Pqmov&ZbIi*UQ5IaqB5wL)*9w*n5r2js
z@@i>nE~n2FR91fdd;2<Qxa9BI-s|^#T6nJnUI<?&#?Zgr%$RFI>-LOElNp8B`D6v1
z6+)grjNELlXsFa2IL~BhPlm*f==EWfJ?)lVns~V;y81#-)Y;IFkB+4+o3ACfN|S9#
z#|gF-ku8&UJ2IRpx@OX;`Fo2~u3|?E>$MH6;mk!3_Pw9!nJ6TDRYgs)?dmT1lsPFf
zPdYsM408qLp8b^kx6}Fl)PRUNp9*U0tGDlG_hr({`c!r@xNFlho~((?Z5FCuHW-Mq
z9tgd^-Z{O}%hKPnQL5m?6YaY(+sjjqF)%9pa-3?a`J`q>-_M6u>&}Pm{`_uFyJ)Y*
zWVbhAvHS16D@(t9z{c;f0nb%ef5jsQ9Am%5)v+0iOq|k|xM#QJlb0WFx%tj&F>&EL
zQFmk6oZ}xqzWbMUHeLGG$6d<D^u8&!Sjn)nye{1vm0Kir*~E5t)wef~K7IQ0NB;jy
z{`&v@|G)gc|NpW9kJhK#_iOH#+8P^58t%Gp;+H(@n5OTtS;sh4j3+WrS>}1A;Zusj
zqM6E_E4x~5<Zj!2Htg5ewqyq3)5jBUJh!R+^Wk5`jq_d|R(>DYBc4uPvC>1#I5BKX
zK+~@Rx#*v7R=?uoS=h+1rciD6wqmiBAxz889bC)WsT-7YR4M9TsO#&LwYGOJh?xk#
zHxpUVb<2^lC+necG;`F0tB07w0;P1tFPF>pgnW6Iw@%marD%_(cIuyq(Ckcy3kJQs
z)%l<MgKmnuB}|=UqSf}eJI##m;G7rEUm0(n`l34La`n`ke<H7aJU!#mCeazU*;dTx
z-E?|GWL<o0N8P(Kyz+;HE23RrRV1xhB)Dxt%c84l$J~C;;GSD{Kx((MXzDqa!>2xJ
zTP3qPJAd2NIHe~>s`1rHFFXIjSF>gsR2!-pcZSN!$BSHB;JPVnKfBW*Aw$*$3}^P;
z4A_2SXXqvy*PV3%T(1Pr-I^3@+psj|f83+y^iW&D@H6+0>&uo!NQgviS6F(>e2r5!
zvjTsZdGO+fB_3_3{Qk5HEy>R4Y2Zk*(7$@-LdUGmsL+)yJ0CgC+^TR-FRZk#R6utL
z7mK=1*U^s6oF-|C(cJtO)NZ8)Z@pkN_v#hp<fC7I^*rc~&)Vr2^0eW`x^Pk95LZ@X
z(X}!KN=gh@vhoiu;?g;pYhfSL&AZIxsM+$k<g=dYM})pyJ9YQheed<oy|MmZFMUjn
zGHr?0-dMnyTBCLTUsKLk-m~`ea_YMjYb<wZEAoY{d306d(v}uhji@u+FAk{be7QF1
z{)A|`?fjmr^w*quKRJVorE!+v%3sYg67Tmbq?}J&{h3=SfrTY^hmo7=qgQ9cnd~{F
z#qZ8cXq^%5_TyR;^C^v0zht>*uHBLsvDqw5AhkO+`^Bl?`91Pn@q7>FPw3|m+?P@B
zpWmFn!&Gekhw%FI`&YauZ_`-u%H*lxQqA;w1-qB5XUdZrE!g;z-&?&|RCFcq)0DPC
zso+U-d|7O5vu4h>zW?H1i$LqmNahZa<TrC8Pc(`uwCR<szM8c)D)-vzuUS{W*NP|g
zT->TJYRa2>;QrCNtL275)6HyI`eJz$ugJB<$hpT%y1F&hnf3pqMhU^0qStS~{hWS2
z&bI#B@9*#BmS3E5r6sN3Buv7vIrOrwXvosc727+!{3hv^#meR|T=-*Aa!s4b|FB^?
zV~EHri>ft2g?54oqW5OaO1tPKJEQr)YMyOR?(UB0?Pz|yUm-Oy#r%?Ym^s^)h((ij
zEn=M*(wEL(WNt9^gx3TgV|OPNlW#vg_d8kn9j<tASUI1msNX>^Z0!?1^Vz#IIL<OT
zU6EO)%u{WvwsAqqgJSQHDV*(xe^k`Z>Dywt|IWO(eUHCvkUx4rLE0y_<#XwI%f81J
z3L6<$EqwCkb{caElY*eD%ApqrIl~S({WMsXDD{qi`}`H}3$||ZO}~5eSCP1$fg6{s
zG3&{WDaCSq%tr0|_&2sJ&k$2xyYuUUTlp;?3v4bO&r3P6EMsHz{QNT6BM%F!WI{Lf
z96!9#-cR~%wZpL!XKMKqmL1|a<KmHEY83V44QNQ{)!EtR`t#!dZQuX<_v_iaK|4E*
zL;q~PKX-nv=hnqS*WVsp^LR$F6nlVEkD<~e?&uSb1V2q-e|~gj<eRXBRf{59Vz*80
zTV~{Y`S07q20Up=#{#l{HAQZz?A5dE&onr@Y|CARC<YFOH#ZlinXYzZ^yyfz>ymbT
z;=HF>2Qy~yuQRSVmzw&x!&uohL}j{C{8GM8r$6wfB?npF+}g6%N%P%lPUf}q)<qh-
zwlk_HZe7}?qT-evHJf$QG4Cw@2$e^>R=De%Z&+aP{6`S;7t!Tsf7z@sQv849@2BgQ
zo408&Z8>=`z4Py*)EyFyuPr9cvZ?0_Uil)`Va>4{!L24j6T@z%o?SlY^$V$(6!pVP
z^^<4sGwUomKRYZ$MsLf})O|1a)jTPhw@csgB-8FGj+>12IA-_ku=>|^;*x17n`m{C
zONN8p76+B}2Yh??OlW_$xcvS6S<fY}Y>(s(bhUL9+G?_W?dgq5uHRZ@F24G5bklS9
z&-d48xh`;A<)+H9bb_My)u-M9+77aF=J#~O%YV39uy&EnhvUol8^2m+=ENp`b=EAc
zNrJ0A6!LZ@nQ|P-6@DzvZTa%<voLN(566=Mf*vb(vWQK!6Ez4tYO+#G@Rs;iuh4JF
zdk<}x@M!Ip1-UccJ#>yJsyRC)#5uIFJABR!oAj2v!!{?`v=$XzPm_{t=33G9qogm3
zDW;b*>w$#4kq4thl~;l9$<^0)O<le$ET-%1m9;mS;`DT9DXV4h#i|u&tqm-l<jng@
zaltYPdq$aAXMeI6_eFV1htGH^Yw$Wu%<P?)e#6nM6`z{dK0MeUQ~uPRrNQPFmu-XN
zRrv>-*{^tYU#eg%t(fmRDT9wu`QWC7+VZoy-(S4&{QKsZ%N<++dw5=*{Btp)FPzy#
zvA#TaqkqYvlRRx}^Hf()W>{Tu<CB_(o=&vs4e3?-KSSz2FIkY+v!ZqGvm-Jex9*xV
zkMC;zMj!i?kLG)PsGqyD?Bi`7EitFi;Aw@YG|xZzy}aH~u&^Wc`uE!V2UlIzOG}oV
zn`fv$_pZWoNek0hzEewMcy#6FgdBNu;!NzDu-f}a`x+$-3py@7|8iqlr=1daWA*>|
zGd>bd9foDKF<<xkzuwqU+#w;k^60|`NyFcJR!d#4HH<NO)8*RL@%!JSM~~jl4qt!o
zZ_VEiGeQJUPRpNn+wgQ@&ay6_Wm=Iz0&YDUXCFJ6TqL|?mSwP~NWigAjm_z?o<W9&
zo;Nl!DlN=Z{26*ta$$7WsasdGM0U?pT5wl!frmq5Y#0|ClUv6mUG?HaxtaN^dO0Tk
z@%|}0aq<MV;Hx%EA3pq0F!}i6B92B$D+h)NU#qUlAA5aU`NHE}_e&Rw8?uy2D+O^{
ze3895W!c9a|G)gaEdT$*zvK3GzY417zmPD$@pgr=saJ-IlP2fF*tneew)=0r)9YKj
z;NCskKDEmxwjOfjv&F67sXl*faOa)8^|QYgeU)B%^K}j^3Q#)Qu;g06IlfQ<$C=9*
zpW0<?=d|Y7arranrUgN*FP_d(UCqG2?$D!XX3`mEXKv`mryc!uHD9O8Re_n;&#$lB
z5f#r@ewcmF-rb%G3$l_l+Ey5(rEV3t`7)QmjmJk;xoOstGcvpOS?|8Fx1#Xtud}cJ
zf0_U9w0!-am;3)r-@kY7-v8bE|9qaVKY#b)$zA8KhM4$zsx)VI*ljC*KmXa3V!2Op
z79XDFTrbsT^El4fQ&i+z^^SR6n5~4JjZNR<C0XZQeKfwX^l9DJYDMK)!Sj67Os2@V
zR&Ca+?r?3b{9Up+<Kb!MtyeF-opgCsu;65){o$e?{w?+>+|-eFGN?>FblT!uC-jUD
z?z&dLf}u^z^psca>-g2LTqiu~ncrghaL=uz?C85Kh8FHucbP5waOY1^Jf8}8;7;A+
zSrY?NnL`a2KF_#NY~wm>A;W5!KU)qKG=9`M9r<GJ%g(FYb{Q20UYcID_31)k$#Y&e
zy=<>@t=%kkc~ixeGgldB%(Y<p$ek7K=~t4P?>iwY%Rk9TAb}~FD~;`7!LbeJ|GF+q
z<Nu<zwn~-j(x(;|_RT*ppJYi3D!A9MLaBknsY@l-#oa^c=J)=drJOodI?*q@yW|dX
z&s)@=sTiJm`suw=|EhIiH<`lPr}^Lhmb7Dr)Wgo#2mM&Amf9R<J-veO)9Hv%?u`L!
zJ48Ztw?uUuJYpWf7NdNR%SQOd0g)LeLU^Wj%GU9<EKA|e&XK+mz4O^$tp%)qZzhzj
zug<>kCo#bHIk(XdpUA8;(>}!|-E9=z!?L33@QOVlJf`AH&TyVSb58fmf_VWu9~``H
zIp_TPv!!AGO0=T;w{e^23$2-FuE(vj;h)v%uYXNKJi4ZqM)CfRtm$8?Vd`+iA>^~o
zs(YTcaUD6&CLX>d`Q$AB8R>&g(OF5dQID*(l($ZiQeU(x=)&!uo>i}Pv@WWJseRYr
zYRI{I?QNFC*UP-GwH=S&@(Md?*_m>2iIQT%7e~&}j1|-0>&dTkwGq9aHJfS2Wx<9y
z+mc#b&Taf;Co^T9!*kz_Q8zfnAHIm#-S74=UrGD>YaKtmRV&r+OU#k3*|>U1>$2xp
z5`JGb`ai#a-_PrRHhWCd-59gzX7_KMWnB+8-?r@7edd7ko89~3bHi3EtoC!dlCx~W
zR;z$%31<K6PRL4{t`vMWr9G*c-Gy)C``;%7k6vl%xyv+Z84K&og*(q{`e~L~roa7d
z*S79;$0G5)7Z>*=YcQ6@g)XR6T6sKnreynq21OCZn!wz%uR`p7HNUL>e7gJny*+im
zKmC-yS1<AHu@d{b8x1uK@5}9{-LzX3b#7<Q$JLE7n;8~PvWi(PbGAi>NhwRnbJpbi
z7p;+hC$5|Ix5M!Qo1VrrB{gMn%^ihhnfJa23JDc+_^@=SsvD;UH_kcDTr}<ctw}yH
z-~QLhG;>Ao`~T-({{P4SJu54I|LtC#o-RK9`5D<o0SoSa%aJ(CP!sJOBlGv3-ptMn
zspB6v)bH89d*{DDFPG1+|8=;3|Nobt{eORytG*f2qHFY?!HG?QwQu!@XS+|Y{(OD?
zx_j#$y?ggAV{zub{qpUH|2_NnFe+}(t+#rTiMPJQKG7>EfAH7iIM2yVK{MZN__4Uh
z!_wQq)Yz$mgEN;&rm?8);{r{UJASh_Z;AHw-=f6#`l{5#Nt~?@TGcEocDJnRUL`0s
zNx^R?+p5`u495$$y}fT^Uw=BA>zkfz-K;hSH-?-8i)L)<(_~gwn!z<I>zTx&9gWgF
z%H?~yYGd-lcHh03w)gG7$N!(d|MyV-|3mrziDyoJxc~Rr+t<A1K9ep#vEp1Nu;a17
zu65CC7iU??yjyPKaY|_wlUvHN8p)&>!3j%kR13W{%Vd%zE`Ry)Lt~Lkim7xM?;Mu&
ztRrt%uKBldRm1I7YtA^tY+Q3i?J?{2w~WuPro~@N+cjhM%BXer-P1nZD?0b?^F*!7
znVWMnUL4=_x~<YK^37(}`(az`OZ_v9r{0N|JfOtD!D$d6S#c#w_U1;*jvk-WO7of9
zBN8eDTAB8Br2BVdW?j1#o{($2W+wMOqy5(<Z-fUGXiq(0yN}f>F!X2cw*G=c5u2Ya
zTG8t-D}S+xW1;cu+;bYOQPUE&PtMPquy&@<Y_8AR=eLT<B~&CS&UT(S<0Qk4qu*bY
zJ>p8)aHzr1?fa?U=~}L596KKTXBSKKS@2>(%}0w>m5pz_m;_u~V(v*ivpS;uXwgaK
z5aD@}E+JD)RTp!6M=nnlh)-YMy4hE+Ow;-9rN$Yv#Afp7oITRrnDIOQ#<CyFO4&HS
zxy(sin{=?1G1z<BEcX7_I`v}d#nVFK=Uy&2xQE|R%}7<vXnonKBdb@b%`V#Kr+4*L
z($NbyqF!v>@BdEGZT9E*eM=5*$=>*Mb)@Oc$V9Cw#jx7>Y;94KG-ga_u6Oc1pMLgC
zQkUxtrqKFVx2_#JsHNp%{+V0k^IiwrSzhe>9HO2|)yH+{?3-EEDk2oHSTmY8ep8#R
z4wu#Jr_NJ%_V_)?dHmz(vaDk_%w>1hU*%%`%*Dd({;G4$>d<-I7j(8MAC#Iqe`U*c
zp)2RZ7QTG%eO*H}bz#Q01rw&<k!QC{I<?5I&vVgDALk?cj@3-%v~xTwxKU|G{Eskc
zzva_zeeG`$da&kaw!`^n>rGckEM6wM@?6ub?!e0P*FI<%7%zM<`-)VJ*Os4`FU`^y
z`Qg9c?w{1jKjGWif1J<Z&wBsXdG4G~GcL_2nlvqB=N{dYp8AU4YVWWAwWIHXt7}u{
zq&b(TY~EtuX1e6sehWdVtH-xFxw>u3v1gAzCgAB<q@=7IBH+DhS*PVvgI0?piY+(F
z@2+~k{A$+TcfaR^|K(y>#62thc!c28RcV@?ODZMULwPr5pZC#PY#Pd}uyn!GRmv=j
z>MqZX&zo<*e~-=WyS$Zrp(W26o+~vfoevk3Y>x8ddo{Cqv06^Js9XB`S>4444wy$x
z@>)1)(-*}Sy-$YCGo)sjnG}8Go4mg+E&Q0G*?c|?O@o3Z0-ZDFq-1nyDV$Vktok4q
z;&LtLp1bh&1%(T41d4aGe2Q!T`}b}A|J(mReRz5K^!4-m{y+ShU-$d)>FfIE^|!4q
z-TrFV=d-^oJkPb~7`DiHFKF}f0?pp<_+Rq(<>B4m-`nr`|K;W7uV25u{d+gjD(!?s
z>a0l_;!gYTxv`pF%4?pj7q|CD?3}r4U)H!vdp3wH^OryN`0v-V?YG18<L27G{(Uv;
zYNfmQZi#*$_uKxHbeJ_438c5IF>Caj<{t7fnft83j6~537dEL$U%I>e_k4{7hI{r@
zl<vE?_FAk@HY@XYZDr95F@d2Tem8ZL)0!U_R@un$M~W!0ysF+8^YN;c&HXRZHCt!%
z-966Fzu?K!xe_zDo(YDY3G#Dyd*h@&i}S`24h17&(d29IK0R9h|7?8wzT0`*|Nq_p
z|L*bm^(qcbz4hM?=iN@e?)tJ;UCg4a@alT=^ykqBYRZ3qeD>?{--6-_n<Y~WHz*#>
zlRad0OmFoHDWf)t2?9Jtv-*xF&um;7CdYo2MO)*a-wa;Qn4PavXZbd$>WHlFxYEI6
zYsH{_C0cZ~4!f=84ejdq_SGSGYXk3gcdYVEP29F<#k=&C`()NC{hlOyQRPQ*Vg3i#
z!v41_gWA7$TUs!Na!33W%Rb{4D<`(o`mB;w*37LNTDI9w)Y-CcS?0Ud6L-jXtm1x9
zJDVZQz|vIiyG`kQ=MB?dEl54el6zv!+L)*}GEv8$d@fkK(8qi81J88_o0{(nHT%oY
zRt?;}&t!IWPug_{hl2VH#)}qbS);GnN{K%&t6kXliPisjkir(#l^b~<PH32N#njT*
z^Wj3TMG=L6T?5Vru|G-CYVg&(sP&=gb_3Us&x=-te9kx;{MtrV$5g;FH*4an%?sw<
z;LOi|7?pcMirq0%vGSF0bDZst?&(LXO*JMe`0DaLQL)k5su8i9!87#i{riuSRf5yj
zT((zQp(Jasn$c!d`7E#dm*MdrQ>3=-ecpEVU^=(8hS`hV(<QU!m2UA6Dmpb`v+c@t
z&kuNYcOPg8`C?i3J)`i=l-zAr*H&v4Jmb306Px_9UT1Yzmf7iLO1go0=bZK4tZTZr
zYKK0vX2Ao?mun5YUiUutQpz&Vb>+|%Jst74)9U*79R|C${EFk9t+L|BvWZ^Wn^aEC
zn7byjE2BK~<lM$Yhtq9`y$;8JF^kx+sYCZzmgAl^nirSuTkpO#ZQ>K_i23);_XpgZ
zt-v6rBYN;p@#G&DWy&KKS_gbuvLfbvYfo2$dVBBI+#@!d=ii@TIsf9ci`(Kqb@{Ez
zKD6E?rT*-j56h2#`Jv5dv3%pU|Fb^T89p|267~$9w`|VlkZ+P(&i4J*m(bYZv5)&o
z>4}ucOCNXKm*+ohplT@ieU8}iaNBEM3R6}mNb}3RdMLj;Z};8Y>$4YROg0EoP}AN~
zH8n>~%=f6ul-X+w%{eVu#HKo3{Ostw_wCwq2TsYvhOBsSsWITHkBM(^7%Ttb*G@02
z_p{8LXnikd*V$$LN0P-p%SLWCvwRV0dq$x9Sa^VrFq5%}U7DWUSG9Geib}?;Nx>&h
zO1@)xdRjuDtakpQ2sz(dmo~(lopf|X-g8a~=b79qe_j)b)Lig6C?+D`P41dnl9mQj
zmfONJFJhlDH!N!r`?bR|e*WC{w{Abn?)b|a`EGg_X7y}-{r~p!@^!zK@Be??{`Y75
z-*@Bxy?c84^x1XYEXQs5(pH!DFkVntYQxvQ`}XYf^X>Lk|9|(q+`jhDkB`rumDOAH
zB%fN;mf>QS>BTf-`|i^HX%BK{Or3m5p}@yzo#c7*+aJ`9?UPoht*Nl+`#%4@-Q791
z%6npt>#^KhQ|-LCL*l@<mj=F)lMk|3X!vk3Utlq*T)`o=$k4e%<ovbOxyLno`}_HS
zfBbd)eC7n3l@CtKm;GJ#{#x#q5N`=rmZY`}mKV(*r}Vc!d}ttdOq01KbdSybJ#n8!
zU-uOjZ+ln&*=j<a6x(wSX+@0_hP;Olm)P8s@BhAfhu!_yoL#bvvl)a_`3@G}Iv|i!
zy6x}k{r^s%{(Sf9&u9OC&;NINxBb7mIeo`X{ymNV`>tmD&98;Ky=QIW6gqmfYWK~#
z<=TC<4oxP0zyB(F8i`uGdHF4@_xIEvA9vnb^IIbM_=yy?$0oPlY;H4ZaaOyo*<Aei
z71Nza->s)=g*FE@bnH94{`JOLy`fDh+Vf*(E!h{cV^VGUB-hn<_Wjvt`L3q#O5Wxt
zUS1cPcCE?!-TG$V!?g?f=B%{p*ckTtU$N)e#n*OOq|Tlh9g)kLd@X5r&$Ycf^x5_t
z+mZWoWvSH-3DKmhyK+StZ_T*2IlNR*b5}&{g-IS4+hWX?zh6|W^*TO6=5y$@_NgzL
zd>3*#1bC_~cM8Zp|4`UEC;5)Z^5<_vbQYc#ZSUDoA$G>tV*Uq1NhWcr-SN$D%)I}v
zdTQ&avsOAxTw_61XROrz)Xq0*nL%4DCvQ<Z!Nu{ar_!WREXyUxL%N0SopR>t@~nT!
zx|jZ3+84R7YghKhDQY1Lba@-UO;y+5rm`mZ@9ALo)Rh}IbZ*$#xN_ReZ)|J|VLRr=
zNU?{ypZ@KARmf!FF|Le+MxRDr&&HS%6}ym4O^4hWL^5o2vNlZ)`?T}=&Lrn_!Ma+e
z>E{dM8nQR$YfDTuU$e^H>ty=WzAGPo8m!-7vt{Pm9m-dN?UtN5dp7G=(CNv>;g_22
zzY8*4*}Q0TR*-M%q=c&rE?>L#lt;^{CgA()bmPE)?HRM%j|(zdUQZ2tcRtiwX{YAa
ztKa)htqDAS_G|3x>YSIBTjfJ$e-uC68<{ELFLmVRir&)|k0QUC9zPNGG;(gA#uK5h
z*KWl7EnoGuR`Z>9(xxY?vc&|?YdN+EEL?rSWV+tYo=v)vnU%B7Gm9Oz)Gw=>+b#aG
z-bK(a!*PSaGaUx=Z>{n>jn3>zoLjv>zxPzU+V&bov#L+m|7N>b_+MT#@5`tEN^|-Y
z6*R2k6}-J>1xe1h+4CwlTUv=P_I`fY-89*1Kcz|;x%cnh$;tH{R=IapG@!v>Zo#tZ
z>!<eZE8hHBsjZ^2@aMCquRquIdX}8wI1wk_v24z!6osCHsoxCSBy6Wy?O>6&)t$xk
zJNLakm*<VW$ti|zqKRo;3)L=6Z1Omz+56Z+GU<g=x;CFn&$3CX{FySUjEoC9($?O1
zrZ$yPS%Y=wY@0J77tZ>fIntB#s(8LrxyFKZV(dZl-o-UFH#Vk-Zo6=N2bZSm3`X-P
zdG=+?CM=w=Z^f3Nx+*1Q7nQtY3@cibBtKd2cJ}VreLpy*$t<y9$E&jQ;p^|k?XB5&
z?;V5v<;VXv$HjfE|F(Pot{ppe{QUCy_x$?$@BbeEfB)^<ujlghKi-zF-(M{me%Da_
z#V?5ycjsN7zs|n?$A{1U_WOSSyu1AT{QSH;{`SXH_VhOjX(X=@Gr2wQ!^b({ufL|H
zyESKe8QpnrzwpbJmvL+2<6>h}u5LemJf-N&<+FdE^M9Yuxbe7v5;GG^)JG{L?$o=_
z6_<NXx}{MjaCf#Z+exktDaJ*S={vgqzWVvuyyx4`%fDa$zWuo3$IHw9&YKh;pYDEs
zzD#`f)n&<QYo>gQ(iIYJ3~JC!6`oxrb!oyfmn*S3ar<x7Zht+GHM;qjLrvj7rZu~F
z*It?Pxr9T4;gf}}jhtOv{C;a0`D=e~o=(3XJ9q!f3gJT08xFR?XJhs>q})~8_xJ4U
zt6gEU-~In#|Np-I-v5USZ*}wj|9AiY?CJXJm~#1lA2;!1SXAg!b3`Ds`St3rWp`}6
z=F2ZrvAXy}<GWQBJAa2n()85jR)Xty)s@faJO1&7ls1DVN70(;Tcd*Oul%~VXT$g1
zXZx$yOuXxt&#JJ<>#pXjh)AYluXh0#uTGEMk@5K1mAgM;%pR>YUo$VMw><L6;vE<M
zP0_k9ot-=N#i6<`-^BAFn^&J&uNlB`u5ZbzlbMZMeYg!$AMUGG?BP+puxiogdGmI=
z`o)A=CH>ocE~U9uO-}rc^&Hk`4aI-Bx0O12|IliDCu$sbtZ=Ktf=b1!W?Nj0E%$RB
z&0g!j{P8-i%^}rF2e-Q_8|*i{FJZqmz+2;2iK<!9x9^Ro3s-7I74%<iiPUmr33Vy`
z;w@b|-D(Dds7S{Lm$wh53ap4&$MWgcBo-IeiHkHh1tsgM_&>d?>mqxl>tXn+eJLxi
zhl|!ma+<7KGW*lCYO~zQK3693&aPa)>HD+rxKi$|tXWHEpS|kt`scON&Up71S!rtP
zwwfD<1%!SIvu2Ps)E2+ep(!jWr93fkuIZE>9fdV=4=aUUS*oeV99pj*o#EoZJ7G(J
z-x}A}>)p=2-5z_R?B^`c_5RPXRZZ%3upG}_`JB|luNG@%&&=3m!5;G<*v)K`>n0Dq
z|35k(FFq@^<3Zv|r8*PKv&-4CW@p^y6wz%GyDPM6;qAq*Ufe$VJoI|#)d!}MCqCY;
zyBHX)871r4uu8I7YSK(C2C1yjwud<;TV2EV@3=KvEoUEZ^a`UnK4%4aT%zA;X)tOD
zF<NQvK6B;l!H6iY8Uy*jEpJ!LGb%<L$twNYpM5Uu!@T&!ZQrhltdVN$?G0((d|+?b
z|AHc=I!WciU+aH7RoHuLotXOJ^_mw1_t~8O-@fA2OYzl}E`P-T`lg<ImgaGC*5qYV
zHm|BvEtYJwDEV8q-P-#0ytf^>RTUoXhyVTi_pv}`^F+qTm;17$=GeP+Yg{|^O;~V^
zhfC5MJ$=3X^`F1}4qv}*;c=Y_SIPrhrl|U7$}HDhE!2MFt6_;@+rIAA`{LLB-uHgj
zy4yL|UVmNnde?=c0!)Q(C110?e^nE+_6W~ROSfkZejXZ24yvr`2$awjWI1Of%lCc$
zy7l?*@_4trKa?mFy3k^I*ODoJ*DbnKZKdLPmgS_5fKXwAwzadf3s+3?`%VF0mEK9q
zRVGi#;WhA3a!kJ78oEGvQI$o()(Xq{n;qWUZs1VSRuFrjGfhQU<-?4)kSS5Mx>Ksx
z-0XUK#_7q~o8M}V?+(4O$EyD8-_7Rdx4-W7oN#-7{GHaboBuzp|9gL4Y^>}%yS@J(
z-!6Y&SM~qja{m4QAD{g_fBxP*ug;tAd3UJn_sh@E-+p`Z?%Th|yZhzu@BRPd;p4w|
z{}xIt_sk07H8t9j|NP8{9~|4~mzM9V`uu55<^K);P5d4k*f!KCb?fZtvOalP?bzK_
z@^bwBzjwcGpMAp6>Su4K((!$-&HhTx<@PI^k;Anh$BSu&7MrAl)}$a&)0wiz56`%?
z`SWLS{kY^O=gZ4~JFb_{KmO;pzj^uj^XubcZTH()-~aYH^m?$Qte6(tuI;mS1$sCv
zo8q$YeOdVJ>*BA&v+a^!IC4%%TC#feh1P%np4HXYSnxF&_P%`g`K`R&{XNpl4>RYx
zZqkf@bI;>NRYLILIdgt7Yn<5-ciiq$_j~>S-&g-nt!4bb{{Lh9KM$YJ|Nq*)=Wpt=
zv+*%;bDrnTcw(_s(9Pz}^}S^+W|t0bbKWsQLbpHCq<;=;-;{|HtlqWnkv;ZRaCc{1
z&5qjg{rmG1azhQ3m!JH3=k!(uJOB1svXR?YY(27U+hrTgy=xg>tub<$Gwt!bDABL;
zueQ$7Qku6u{Mb#icA0}`xbn?58(j!7aTI3$ZkMHPTdN%A`Q+<%aTCpHSG06Os@0fy
zTtb$!{ktEVeO|G`BQ5k<z~$BAi9Pm{(x0t4+a+?j>Dii3($nvDZxK4Od-kVA3oj&e
zNiOINjW&xAWeWNI_T{!4x6~TNWcb26suyl&sZ-b@vBoTGjc3ZW7Y*vSeg`efip`CT
zetkg5?3GskqJUMGw_Dwh<Pi`&nQd95S2HzecG1DZ362kbFFAZ(z>#I+rDr;BOI>#~
zIJhm9T_Mo)&7yowqV~tX=d+tLltqF?y_~C}w9R~CT_3Fw=NCM9=mpyh!Np5oYH#>#
zCb90{2}z##?wE{5bpz|ouTO7V|8(}b#eZ%}Mm=ABk7L36IkF;LU0c71DufD!Fq$q;
z5DnX{IYG0!*?C3WY)hjv(dAB;xE3C~R&4NlU7tsH!qz>P>)uxJB(A*_^{;(V;@W~+
z#j~@%6`giiyJNZ4e)nuIxu#u9i;Ct<n_0j1&E8ba14=p!tLJ#<>T5oo^5==Bb;19A
z9XvjpUNdLc&Uf7uaq@I<`LTt;i<SgDPAHSpb(UUHqW>;duT*x0lvgLOrosPXzqghh
z+_ipDgQdvPrFktOs-3ypFSk^BZ~AR>GXBi(w=7%VPujCn-QaU>%gSq0HgO%%wleU4
zWM;7S&4Z*B6NOtOB$B3fnr{7@B#^iHSL#0YoO^t_Y(GzH?qpxS#pFRgcjBAruU}4f
zJwCn0>yKL2C(ZNE>N%GEn|<`}^gnafpX#c+nC{K8Y}PWHzDlo2OI)TdG|&Eg|H#JI
z=C{ut{rT(>j|ne7zZK7AgJn8XH|<>VRO?J)%e6<Z7v|P59l!hT(eCT_|9*S>UH@E}
zV25_wRr3d{td?fZIi~r1lWoVv*T40w?#!E8^S-t2jTKLdh5EZ)ww)Zkib918?KbH@
zn|Pu0m7-vZqRBCysw*yse|-3Ox!wNXm(R~<R|~#na(9}TWA{Mk_%`R}l_#&y{&R)B
z->_`K1jk!$SLKiYd!nXtcGhvt-pS2T8(daJOcH5yK3^z%cG9meTh82T?7p@nCP&Yg
zDJRN`SvX1Il7NESmX@{&-X9;NFid$P%i3_%E=T|Ty?^hH@{4cTcyZob+v>`Xuabnr
zC%lti_BBoT{Px+O-`?DO`gHf#&#O0w=f}s_|9bbgzy8O|<@SFb%Gd3mE*K`eKD=<N
zS;I$*-s2y4*!LaQxH)%k_sOKk<x`V8il03cIB&lH@7>wg)8zk8=(qGux;)|i`tJo5
zHE#;sw+USg?O3h1%;s%>fBV;)ZQ+@xCaZLaT>mTien<c8yE_}%o@uT>wkPRcTBV4s
zRL`oDe|s1fXH-c%xw}<k&#iUaU%#zd%_iHi&h09Hd3kyN;kR}FU%k57-MxDC=gq#?
zH+y<Eed(UU!#GtX+AoXYZr<lVuil*9RxMW0`>NBZrNi*S(VKm~myiE#?|(1XH~(14
zZ;P55n>#7eGsCrRZZVFS|JKa2@o!|J<>@76M$+#uXYt;aEtRhN8h(5KndxO8Gbg<M
z-oC&0y1DpIpJKW8jVJc-z3|LUtCZ?bw&43a<HkeV88`9;xCJx}vpbysUw*#1{?GgQ
zdGGACC0?EU`Cv^3v-T99xv@86BtlI&SNG|93tLtjh$j^bvp26w%hvFTb+PeeJz&=R
zfak#GoeT2k{QeYvwc~}_<jZairRDe5JY5-_@8agX*s_0`lY)k|u+AxEH?4)=?lf2`
zvXrSvoI5p#b>rF!=eH@1JzxK7%*g#NEm9hEH*CuHl~FThU6<`x{Vm-sr`oXRv(MBE
z#@}*QI=ipmP_y|}jic_PrD{J9q=gvIX?A;Ua8^Lv<Lpem*n(odq^nwAW=>sccd|^B
zd#Y^H_crdZijOlFM1EYYG@(%Ri@9Hi5=(Q8o0J0|lUsV~Z1*QjmvRc9X^CkaY7M&f
zs_4k_v?Xe93;T*^@;7xjJyO>EJ}XOBGc2}J_Q2xuZBZg;wj403^wGU2^Vd5|?)3Mo
zTt{Ao6iVvsydqm8SZh4{XH-m>@On)KWsMnUE+3y}^{m&xb-}c+nQ3W`j-D4aBvvsA
zDn&bFta#kjxOJybP^eOon|j5k^-4^uyuV+W%*t^`Hrh`7f0StK3qMw+;IF$+{$3e;
zs;|9Tr>MAA_Q5^A|MOFhEel##$Z}{`qh@=TSa+c2`(Q6k(RCfB+0{4g7#o()TXBAc
z`bo>@Q&hXXZgNgJbgTOPtKDB6ZFSzQJ9S|Br{cXmcb2bcI{G+E^%p;5?WCaM$qTu9
z8e|eI-{*Dhd$LlweAzP{-S50dSQ<X7WjuTt5OKuXlKa$_O>@116vb2`ZuVrYd@s()
z*{GxQ{JZff?uSRbY}Oy07PfGiMYP`kS?u3em+IP`to>KUo4fym{m0v?zr!z1`_KI9
zcmChjc$TEv*X4DyXTSXIcUmcU)-sjHQ#MyfurnzLU(eX)w6t>~lXuFAtjLu5IWB?<
z3<4t41Lr#Hg#0nxdG+$Gi#;8O(yIUcdir|1eq4Os`7oIi3JPEK4suS6bzX0|TOpj$
zvgLqc0S}+N;nzQ5qQVDfc|_K%=(Y*Fe`D*7rbWzb4lM_IR^KyOzT}LXMnahd&;0fF
ze?A_cU-$3l<*#2KAAT&jX{{4W>k?m<^tIQIo;vec!B#uLdd>EnL>`By;;S_m847oK
zeoAzx%#)n4h$Hiz1=~CG+IcRM1s1<t!+Lz%uGkBgPYL_Cs3<3Oh$y?KZRS`tQ{>?E
z?f{F4lA($L5$pB$)&D!Z|M%wl?NV>v{mK&$+r94omx;U+3_K@&v{?3dMtg$Lwbj?}
z@2~y%>L%zU_@B?;|9@QnhyVZ2_y5-4?l-??D8L*rZ&JF`?*}eh+xoXl?6W#n+%Miz
zx4ru7Zu{Tg?e|#9w^x*Z|M&631QTtUAiw^XinsD^+xOpnm$Ut>p^eP)IsL&_>zvb5
zf=?Yby!oTz%ELc7*S~*k(lbx4m*GFWWu{5zvNgfi3{Ow^Z1|l0>f#p_C;Bs`+CDz~
z^rma)>6=$h7!+?mCK38D(dyOP<%bJy$d|8Qr*~b})r9H%nN$@~>z;1~Gvt=p2z*Z4
z9KGH=mgfqC<X4f$4(YpnXPvX#^Z)SQuU}`p=eztiahJK5j5;gBYs)!|@oRSJGJ2l7
zTp}XNGwH>IuZ%A)tUY!?edFHT_v`DN`xig`@bmD$0&7=;vzGcVo^6)CkbX+cdjosG
z$&K#LLQ1JVv2`*3|9njIQFgd~Zkp=-%9#?E42quVY!)-<bBi!84@gW4ymmy5e>G$5
zyPanR16p(5vsh)!m}Ay*=vb`&jjlAI8l|I|&Z#TT&u41V4&1tSnJNR*`UevyF#pn*
zx%Vt2Kq<#*PS%Rlw0Vi9p<2ln%*<&C$9vWuYyELKTJobn*Qf4h53?K1Q>!?+b#2ea
zU5}a=zVVaKjc-$r#5jcSTM&As?(k=wghH0yV#D1bCsl9!NXco@wLh_HQP&oWu$3)y
zw|?Ef)oRAAqy-JNy!#bgC)9U~B;-9flYFv9{IF}yBPoXYm%JHI27I_7d_;B9;u~R}
zo_lAke68@+aI4UaYE}Qw&!Z>CA2sVaw)KnVp0rhEwoWUI<W`*VE;u+L=%K-{(6=9)
z4&^_IF#RgIXzq&6+%moybA4<Q+jgB*^U66I^>f1MgyXk)9D<Eg*f<>6-pp2AD|nu9
z%0!=qFNzz^G^Cy0VSlrGT3bzBn2Rjit__Jdieev`_$%njiU$@K?Qm_F&=6v4C7xy)
zB$#)eMI$Ds?n3GnyDy=|yLqJB9~^J${`7qBWZ@Zqww+eG*=*`kG)<?@H2Le*q{4Tz
z)V-dsxvY}uHOc$XJbvv{tv=k{Rm~gDAG3_8db;pY+F$-`(+H8R;iiUD<Th9)OJBZR
z?)20sJ5qFA<xK9TO<@P0PIs75>0SCH?dr*>u>N&gpN_LXl=*k1<j~sw6~`mzo@a<y
z{F%*2@A*5Wuk6#)5-POre7-vQ;Ywvz|2Z%2b2J@VKke&%z6n;oQ?r{ZqRaSybU(ZP
z|M&lc@z);JTwdR;FiW;>{^pfawq&?{P`mU>my^RKt7yuU_QLu(%l;M$>4sK&ugH*W
z56$98YdO)k&QxVtL0R3`zqjAlReb*T^W7Tvw8MHVY43Kgd%s&kNpREjkGyU^sydSd
zLSlCtFPjo#tkfdV!Exp0y8t)lqs*_@FR)|$``zNOoaHf&*LEzSXER<_NXYm1&$p|u
z`v30o*X6%|?{+j=^QdaOrM2JTzyB(Hg4a#t{O6;>B>yQ<^4{X-y$no-K?%$=mok1m
z`F^H?>ETTo5(+CG%NCt0JbHJP^rH#OLT+$ItS@7&5876E?oU|tn=VH7p2is`rxbrp
zntExuMINV$!MEERs`u@^mE@nfKr!8#=X?3I42gIDe|(m|UteQ?@%fII_wUrkm4AHo
z=gp^@?RzbvOACE6tLr@Pbjw{bl;mP)-ha(YU$DGdx9DW_)fH@QGS}1B-~acsJpW(P
z?euB4-~LiM=I<#dI=Rs8#LbMBP@dbqG0#shIjqo>4cZ>0U^w@aNv74Qgcaw{oj)J;
zdh^v^6*0DQ#~!OhDr%=~J#w5=euiD@&zu7FO#4%c_xaloJM(Ba>qVPHw8s6q8hL%~
z)CU@iWsYgy^?&*AX5{|pqjqf$K1UjiZXK@Mb+7;WWVz&evE6+)Crr{*WV@_XQz})O
zpmb>8>-fDj%{3oiy4m05mDF9Q@omAc9~=Jfv6XhZBX}&O=!Df$9*w(NLb-g04?myH
zzj#mZF4hy5IFIKfu-}zG_AEbUPFbCr*zpY30FiypmXkX>COH1)F?+UnX8DYxS5jBC
z2+BUU*`BtvZ=Ir0z~V=|e_Jvd4eeaph2lfI4n`hdRh+c)+|k;=+v1Ta-L6J6EF}UC
zOjkX4%KXUIBkRi>tBizn3=()Zx;5-OknG~d<Iy5=D)aFyX*DIcG@YMYL}pm*@L*pZ
zRl0Cr`_U543A6sCEG&-?kx*OkXw%hCDI&R^bG;Q@p8gS;<Tv@U#JrvZ;t$(=XL$7n
zKC6-soqzxKok#jloXx6lr|F)1lvgXY_mZ7u=#?hZfa|Ub`a);K_$B8*o?iGQ*!uRP
z?B6LzczRFz%4TG(ImEiY@2dP;{#8Z3OpXUT6HodU8S6-#Q~a{@%NJ2kMs9`$i<_3@
zK6rWL>j965X;C-+9Q>@U-(sY3DP`5F7t>Y+r%zK=*`{`=&Nn@H&yvsobuM+K+6UOS
zsh>M^G;7t~y{=vB)z0l~6k3&i<exx@i)*M+XwlidE44N;xbyg(3RiJ6GM3N$@#kK1
zzs<F?41tXu7tVNp?n=`Vnmp@zND<?fX;b-HKfk`BS@6$k^}h8fd0SZC8mU|k`WEK-
zV2)2fo>2a!&PLzR))^k$FV|W93B6kS{p9OkZycp}R@}*ce|t)Lqz}uc6DChhQsuu*
zU%YD1=PzIUHVS`~oFgOhG;~_EZ+6%e<M>=btqnX&X0KEUo6fAeQ(DW9{mkyxSuv4j
zEMJ*5oYbRJoXn40y`&qsgyD)+tLpTvrMcTu8qXR`{`8iWEl*JOt;LMTCvKVC7rPnz
zlrQ6Ca71e7ey5J6dZ)>sy>(_CRsZbrxlGEm^}n$87yT%XKWY}A#aGo-f3g)zYI-r{
zl7a8$A9aoGM*U?@J+f=9lw~fN+_`IS_vZLx4b_fqjXJCHy}QHDSnqhyAIz-sWcBIi
z^*^82{}kW<^X=xlk?jW(Yy+>mrhQVm^&s&Pt5Uq4l-9%#jDp2p%uc5k_nc&zq1ij}
zl#=vB!BaEV+EsUQHH2MdYzRHa+9IH=@WRA5c$VGWnEGA&D*u1{^|yWZ@83PU_T9*h
zv#Tzv+-b?vUtjV4^X~KW<&H0&^XcWBosFtfnPUTU(mc*Mo)+5Sq9(Ofvu9S9iCn9F
zegI=n&5@H0GB;FtO*58%GLCujV#+z8X_r#YKl$Y)yoG0-!pW0CMkf^>O*I!{Y1r{7
z&$;o~<A>jx@A8+=zrXtF?AsMn9$x<YT)zI-m-hK~eqH9}<<rm4%iZ?+{r>;&^y|Lw
zem;Hr;m42P*Z+C!KYzV^yC(O~$J{)EH`PN|sa6X}^=3bu6e3>Qeqn8RxW3-@-Jx%E
zmv7!@{r*r|i)OYHlZxRCA-N!npcwW8c8g9-Id*(vGsE*33y#c9zkmM@?GeA5b-O&<
z>=I{*#3Z3sht&(Dk4-RAk5YakbcgHi$r2mC{=-|?Ra+Bo6jz-nnzk!f-dwi4YtCk$
zpJg6*H+P@Sldk5IHF~h+{Pyz4{rxYSFC9)hn^>&;@Ofv?i=H*^Z!R=hZwPdB?LRK_
z=w|-^H(%d1)mLr5T7T*9m!FTaet){E-SXAK;rjjd3bqR~SsE?4IuyUXe*3HDa_Qy8
zGT+)2--@cWK4I05oa#R9$>*~9)!!!HuMdip-~3_og*}WD&ibgN8z1-0a@HzLQ40F(
z$FQbL=U~r-F0a-NFRi(9kHoHf5Rn<GDK+KQw5iLC^dD|zogMt?!>dy}tq$GRI;5%}
zcSKx}Z<>ZJ^JLd+K}Y4Iqxr83RO|1bD6o^``rTF6n=)6XTw@YF_D*KGqcQL26dxOJ
zujpB#E2M6I(zJj4bM2*fr)4&ZeqUvJhb#S&+i%e`U*=qXwD0J(Y3`NE_b(Ks8#pti
z+wjlv`0#Lw%irbmrl~hui|T*bc*AtsBca@`aQ!K>H~dK1_LIf?xX-dQpR`qOGBZNG
zYJ?;*Y;zqJxc+5SY7$wV=V`AwTeKuVP45{GcdYdaMvt4nmAtf%aX1P(Zo2Kt{iGnU
z&%!+2@r;RPN`Fwk2y?UgsSu~G;4M54Px5TM^y^{B{Lf#@>kkIn#BO@HY5p2jjhjjH
z*10X~Y6@tYHe>BNHW9@i``CRPC1(oGoYK2AVd)Lg+^(=gF0X5!<#EsF+ZD0@Me!`=
zt!&FhyEni7mAz)FyQpZ&?w`-|_XtcDnZANa=<KaeH(l;%XuG_)%&nvr6cm)=q#A5g
zV7vFJ&Lr2(qRDG#yk=Qt60rK)85iAd&ub=6r)*Nmt(?cb?`+PDhzqj{!~(7gSNg4<
zA^bw7J(^!|?bocSx2L^c)b;3u`)h}Wf6t{(9NjwoNqObQi?in$BxOy=y?N&0ACGlU
zbn69kcia#8^(|F0^K{3Vv>I2ZB@Pm=d^^?iS!yOO5A0(;+FHlXJMmfCE<Pjw{GR1g
zR>mI)xV3)P+xhow|5s_9o8B@fK1xSt-`wRvbK;jjtaSR2)4tO~?s(y;Nv_ZOF3z@4
zVpZe2Q$9WXdHD3zZ?$}XA3p9se|^5(u_Zyzzi#e!eLnfNjUrpK7Gpri-OcIx|9<KJ
zd-eaL`TuWcuYML4_F|hHG3|cGK4JB?NgsMc`5QbWoD#lmyRhh(L90OIQGW5Ma~O{b
zXl{@c5Ou0CWjdF?aM$Cjg=uOWo=+-l>}2{5|K^vstJ(FtX0yG_!uRLm_g4J-_Vo1j
z^>K6NA3yxC>}vU<%wJE^Zb#XRFvoPSG%;m141Mh2;lQIRb#(qm8|kpF<;z=EhAvyX
zP;<$YUJDk!iN&7JS6XV@nIz2WZ&q?%rM_mN%Ir3tCM6@8R0$PdrKMU<25w9HcICcU
zeCqG<^0L~=@4j7sZ{MMk`twW0|9}6!y-So(4}NmE;LnGbw&f4|-~apZc=!BudV0$a
ze>`sg=f}IdzyBJn3@Qy1R+_Ohqp)(PslfM`w-Q1H9Ixw&x4E*hn#B~}IGQJ(6JYGj
z7_?wXfz63nzm{vPVZN75PSwtvbV<c*rRwD#t;ZFmWpb7|zdRJ58}c9D?J4YVKIMq5
zQ+rw4HOX=gL%~H0pEaLknGvAA*|48|;%>t=$9DAY=~~#`aa>US;l*p=+inGJ@8mJN
z(>woN+kEz&%k;`;b9_9km^?#Hbx*?WO>ZT_c=l#5KAyOv`Udm&=i>Q#>yoZLK7RZA
z<+q8O@9wSq{V8X6ZN&i-Sqq*t>q#6UqLrttY<m_b?Y|dP!&o!@^5O_K5f;aiW4<%5
zy-2$7?)ol~&GAcj9+Fw#6f=EUmXcBL$&|uvJHIVmq{m$BC_YWY^So;5{LEUviJ7cP
zVrG0REYH`id*eNAi?l{}!10yuOZ|7me3EFoc1rG)5rfwAd1o^BWGy{1U-Efx=-W24
z_g%8DUQXS6y6di%EpukqhgB{a51iurORofce;+?*``U(UOb;Tmx(np0s`n~{ExfgB
z#{F>f)T?>FLzqs+>&^XO*qy|fD3}nfdY`-Y+@B?@7X55mc--oNeXq;>?CA1~q6}xI
z7b``7@N>OdoFx8nw)dZJHQ%N+e%~%X^PlyJUf&~bZ8=M_&a8U%iP1sJ&+~JM>6JB%
zfi8E8Z)D72SyDGoXxDYI9d3JXJ({^<LCXR!FQqPxdtW&jS9R;`{G!3KdF$=BY^o2o
zOE~UPo?*F|*Kgrd=L@Hl<GJQNlUn&#{*Tnn$C<7D%hycMH#(;EY0rlpSy_{dxzd@s
z7fyRF_T!1WL#?@3fTZROPAz50P0c>My`4U5W}OiX6bv}I=bK63eJLivbBCWV^D<)f
z-;iS=YGP1w*G?gl@qwcFS8v%k;azhw4E0teD6RE+bm;Au+WjjQEf=4xxN26)#y20O
zBDH0Yuj&kYCebBTI5pH&C*4=`c}P>v)~ufs#VY5X%?&9|NnG|KL`Yw^Gx`PFm!ES3
z#rOW!T*3O`mIBk$ce&<*Q~!!?F}2${f7Tk&?{%tDbx(J!y1k(3%}SmHw|aDAe)}KV
zEM_#<EbsWW7(qo=rgaC;8}mGmzFeXs|0(><vgPF`#aT|WKK?Xcdw%4zOXn_C|K7UP
z=HKe&-+x^3YTh4k`1brCm+!wyyy8=Q>hhFjp38K&c70&|y;v{nMfh#dIPl+Z=K9yl
zzF&S@^8Le)4fZLStNo6@nWfpaX!m=rOM;7~gpZyr`~B(Z>iX~I|DUa||1N%iZ`ID;
ziMC~jPd@U^=L&n)bTvj*X;s%5Q+ojpjgpP=YZ(GVm5(3b(K?r|#GBJ*Sn<Z_+|Hw0
z#ncKWuyoDxVSD`W<Huj0&)z-!@$T;3uWuhNJo5I}t51(^=I{G`_O-bFI=!-5buDqj
zPyasrsJKyIRZ}7&B@n>C#js?;)~b>NJgVK^6=zS>K06(|ca_H}(Sz5vt!g-sCvqxs
zqTtNr`!gG#6wMP!JiSKnkgH0xWj;edYoEI;d+5f@OH&+LY_wQ=1f7=b=>6MfDbH^o
zTR%&#|L|W&x$mnbbPgQ<`}nKDvXdd4Cwxr(A5Up3l*zNZYp}Vi=kSbWOU_v<X=&)s
z)SPplt6C$*@cfNl+3&Gt(#9z-oa%Spw6R}O+m_R?Ovxzq#N?+FkA?18aN&4Drp>$l
z{{HrtA1%~a6~Y#M&03wmZ}*K{{q2mO>t=o9?|;v3!*N7lzT(`@wf)L+BAd%1%a*J<
z#=-dTgoR>y^PJKNe>9wuU#Y+1P!4|KWd0=Y!q>QK;+N)ob|~JJ&*Qr++g5n_aFw}K
z{hjA~W$a}Sr}~HIh|A6`FS+(mtl+D`9r<N{tBQBtn_FI6UitIzZF70|$}^U#Ctn-C
zx?WLHBXRuo9P2~6#)TYKzQ6xI{8(`(c8%eh(D|OXR&yFD@V2eIcq(_}22<^O=JC6x
zH}%?D-|doLYngoI(vr&;&j@tJv_y06+@`XA``S74_4p>vbUzi5p>%Gaj%m)sxgocf
zc3i895%0dsWzlZAY7ZOF&P!KZkGS^7?ws}6`)bhT!*h0r%)FKN=FqD<-z_clGrqGX
z@Vh)P+<Mgg@}ebK|MxM>Wzfq|)Aihv_Ic4fzx3R13R4QpbzjeAzGdMUwNWV|{`j5T
z8*{#M82^;pFC@kt|4e<?_pJ@tB`1>)Ha*B-71`Rf;M%jT42#$6b%}@FnDy(WY4XVv
zYZJ0vRm9ULcW+2s=Rb{Y<DvbGS}VO%x?I+XOc3$Bn0@uyffc8nJdRA@Ir_>bmGRye
zol1G0*E|Y3`8%c_Um(=AYV-dq0U?2`-P2cJ`5bXiKH}K4P~{!g{#=q#2UiGrUw!6r
zE^U`>rN(1TN%n^-HvdxQ^?Tdeox3P(rhY@|P|})PO){~nOVj$cn}509leXxeJI9nM
zy-X8kv8L$1W;JDyY~}i`?qr(aWMryl7ChA`nrXwTHHu1WFUUAu?>~Rm^W-GWHERDu
zjy%+E)ru4|UDvm2w<@bfr?K&zXDj;_>UGUHF~j#r>em%)i`10#&Q4r2$#>~of7QN4
zXOwLWj-RrfwRv@=NqES=H^nRN6&<%)In#Szlv~(Fd!3$bU-MMAb7*@+z6`jzZrY*f
zvjVb}tg2i$UCUitpB>#HVJ7KY>HhadPVW8A=4~t4)^1A4D(p{H+4SOIuBE`Xr)8Y}
z!s>ql1C9BewH+?jdERI;Uh{c+@<N8^y-R96Cm$@z{S#%s`e5hIHKpADzk9sdpLF%;
z`gOtu8?6tnUt?D{^`3jNU+}5qQ;}L{FP=G2TDc%a>)6KXZ+|}h+}vv;$A8>V_4|s)
zGTYu(HGbmYYG@Q_n6V@J>+Q{(yYK(|)?WYndj03y@AvP%^EOP;rOn~M)VELj|5)X(
ziYN~-O-&2<_;cH}#R4;Tf1TxI66&`7k(bxx6_Z*Lx;SDMJ*%x*e)@`$3G3-fsZSLa
zef{$GwN>9A{`yw<=SQKwSo!VlaQ(PFm4AOd{Vbk-+xPx)ftHp3<v!;9|6#y)-2PwP
zeD#F$8=V%d%3pV^!d{l4fmu?ZL$aZ3*ZGV0^*oJ^=bFu#dH-A6EYp=NYCJxTYX#g6
zrl?I)JhwgKtg(v<le3@<J8OWjD5KyNwcX)d0_JzCgstWtyWRfx@ZGzQ|Hi4mRC_tY
z@Y00P-skzt&&~fd<@1*7S|Z<mToSTny5OaIVbv||+pe2FRiD_pv30Sl%&rYlYdfu-
zxeBZ6zG>amFpIr^Xoh^~RUgqwGt|7bDn<1zzBne%x>UHMdPl9tO_7;SQ?72=zx#A1
z&&1>jZ>u7{e^1`!-*v`$f0fvyr&G-O+4m?Pv6WlKpgAcwY-4n}<);;862VfJri2`2
zyE;KP<#8rg@~ieMwtRQbI^W1SoYuUG|Kz1#I&<cJu4?5_zG3n*?($g<d5LpM^V}`o
zzmPon>hs^bEJdYd=k`h@Zr}Ce-R}KHv+Dl*sE@sWCuZ-0&3CiBg=OXY+jmcJ>JbP^
zNE4MkRyf6QUBH^jHPb%Li`=DW;L9VaF#EIU9u~!rjK+3Tu69A`LkC_x4_Yd1nd#SX
z!t{dF#0i|v905jNC+6mLsLGyv9j|3&Q7@yZFlG7DwBwpHR(XW2W$?&ue4m@ZU#fVa
z$*}Ms`<HgrX)AX)$vzZM_51oFEY5#v$Cf9Dvz8up?R=j#-O`L}W!{6VcVE{Wa@UcV
zyDfPA!~*WiITr;)11IQe^0lg@@a+;mp0hgoOPbEI*&#V>UFVal<02}#8yD{j%TfI|
zxjp%$kkgF$mnxLj{(ok?+)iYLgke|mwT%iTpO%H*?$^EYgst0P-!|`}MM<63U!$rQ
zE{b|L&F?zLk(^aWa<Wc1om$4VWJQ+?>(;1m8ymP@9o;+Mp2wJ#BWla?FvIf21|m!9
zzBa6y?0vCx(#_)NJC@H_t66&#&OQwe<d(kX6TsxsI(bK`&c?2N?piYoH(zD(lR7rp
z_pj3HHzyx4`|tes`AA-2?7UmOf9+p7`90#h;^2Bkv5hgZLBuzJIqSfJ1g|w&ZH^aN
zCUjoa5tzN7|JlY&RiVZeRkA!QZxkIk6&>b~A2L5)u84o_G57M70u#+<aBn>w>}?p)
zb|%e1*N@9@@|5iR4d={6_nT`k=i=JzwPlV!8*6&#=WkJK^;bPQA#?Rj<@PAM?Nw4D
zTC$bfS023>S6kT4(c{{Azbvoi>Uv%Vt`l-*OLO0UR}tNn^yvAiS+AdltY5wC^5RRb
zHxEtQDpGb(>D)H9%5bX_N>1684;Sh#TC&IaK$eaE7S<#_b!n~N&%+PwNLW-eTkOAj
z)BF#!KYd&3SZlfSXRymp*XU2nU3>0}TzhQ3uQgxnySMu%`~2jDv{x6Fd#;Vy_4UIE
zrFjh*jFxRT1P^`r@T1_rh0jWss%f&(x5H;uU#{Ydz5jjo*>|r#9i45SzrX(XUH<!f
zYrp=<+Wd-1@#mv1o@Xof`U{=8UC(U1F3L5CE$+!e@eG!u5|#~Iv%I{fai5F}VVpSA
z)If0c%J_!Tqy(O}1(x#P`_1{cUpFuBm$!?p-*wYoCgs%KqdzxKUmv%>^6Q_YpHJT{
zyZd#b^MiHVg8E7Scke&({qlzy$LFuN_fZq6`FOod?v~u9S4wFLYGG3&=SJ^1ut3rz
z+(CavW75Uw2+#e3j2y030l(Ot-Apb8y<NQ3<qYeMYmZA^F7WuCSiT}{$te!SfIDB)
zpBEktwv&^S=kGs0N0%j3M*ZyMhNc7s2iBGRPHS>lR{V)BZVvsuZp(t>v+u2_U_5(4
zLtx_Lu8sE=yY;jze0-zyh^$h%;EaPhvfudozgHV-Ch><f=q%6_lqlK5+{JZM&|uo;
zCxI3d9-OcL@z;Lej*cI1ioTzCSW&3fcDYW5r!Q8fJYD(W=Fe}lb~nv-Fc<q~sdV9K
z+x)qgcgY`l$G7;cyUntf7B35Bg6AFaV7a=;$^Pkw4et9Eex0@}^xm4cpK89oiF^~`
zxVekTCrkKN+`}@DyNYW|ZSLQHeRuicw9j{b#Xi$c4eNdWe0}k~`nn%Ke?9v?+uu1N
zeqGqEMOz)`e{O5<U`r9P<g2vcaW`|?GG%vR#B8-&>#jxjm}ttl_TN-HlEXSFdD|QY
zr60|_E3b5|(+)WO?)wb4lQ$WhLMQBq7wz1kIhFhU!5VR%t$R$An8Ow%$rzrV%Bh^D
zmMV4aM}MwanEcUKO)PG6L}Pq!CB3WG$^5`@>C~^HshNc<oDyFx*m|aVwN>X#8GqHr
zRiP(c9$0h4>7A3#+|Bt(yHa`|r^ew|k*%&vR?iX?`JErA(V3FH>9^OhoOwxWWQ{+?
z92fs*t8?m=dX!Xyx?*rcZan{Hr(KHs{X*9Jw$ux`J)QN^Qa4+1L*$>m-&7C1?KV%_
z_9Q9wVe;gs(;eTPysmj&TChoI^UR5}*n3tAX)$N6wOO$@|Ldl!mwvzeQzU-b-Nk6j
zE33Aoj<pUKxy*wuEL!~Csh;nrUT)~q={g0EPk-V)9Ty&}aQ)7@&GJvU8~l!X9Bv8>
zeZ8vcm08}h+O-*ngxj7K{aR(3ndA02x>UM(hsjAF-lHZ8oKXxxS31rGn>q#=D;sTT
zcoWDPwmRF*G?^`VJ!9A2GZR;|uw*UeH!WV@p%|9nvU9=~NjaW12MV+#-5hIP)+E(@
z*gIi=iOTFky{EAzHWzbloLP2AiY2gE?)ASj^_qDrLMN}W4Bs$)%CCKH&c8H{X7GJu
z{3?@Tlu{5|qnvb8>3f1;RQP&-CX-bgR+i=diVpoGJZEoYnEmG`J9l2?Rt~u;7Rxg2
z>+FY0uX7$c=Q1}$F8$oX*uSkKV}t9={N=Z1yej_KY-DhBgUiL+`q>wAx7~cS>CDmm
zX|L;pW_{9Tj(>jtTS)HzFB;XWrWso-{PXA3rGMga2d1am>1`MJ-?Z1M_k!^R9m`a~
zvq6<b7UuaU7V!Ml5;(m4_4)U8|0;^hZDc-~bgf+NxI)!JsPIkL>aXwK{CRYCcKH3h
zf8XskzrVNY<Ey0dZIPvoodKt7Zu9%aeA&Y0^0|4H&&Ow-k&LIWd(Mo0)7T(;yo2+!
zn}A4&$?`cX-}D?X-q_1zC^XCOaz%ad_kVxe`QO{$x%2k>@fsOd!?@U(y){37J?)<U
z{Q2y4-7+@Ei~@R2Tv_}0^!59Hef>OD|1j^}y;n|m;k09kg_qCV*f(hpdsY;4!E1;0
zDvPeoIc=GD*5U4J=B^o%H^onEtzEjS{M!|QRhQ(mCth54>EbSp4VMfyM2bK9mQ?-m
z*{81HegA(H&S+b*h~+>Ji`SA!-R9%FcKliImljYukNe!s&alIVTbI3cY!y;cWao>0
z6K+`M()wQWTp_1n&b8HN-3|&HNjPnPb^iKgfuBa7mVRZ~GV6)r$+}a*GiLQhUI=jv
zcN5tyVC|P|<SIC`uYLai5B~odm(6~?`u;yPyXt94_uppp9xt(x^WA*n#`@s<^Vf#m
zEla%}mscIy+8tY7dhcuPiDj#&oG&qcQuE&;_Wqmq@^1VR0;khX927`)niIqodro}A
zJ=e{lZ*&g0s71IuUuNYyf&XzqZTatyhyU%EetmXk|7>^F7R8q7D|Wrx{`~gc<+~pq
zpPv_g`{mcwvMugWYai|~jA1lVI&#49@s!IZT0v!7Kbt6|yeZgy>)Q(VOUkzb69nc=
znH0z-wpi)s1*?@R1`nE)+T9w3XSkg_xhijWmS7hX%k0OOS-<373QnH)?2jyegXXbU
zNqcgp=6t@8Rl46K@rTUSsI*wS*wA=y29Ai6hnz$WKC&B!6x2Pwv;AxF)%BN{#3R+d
zzc}#QRq?(;{paNA8&qzJ)ui2Sbyj{9>Ty&_Q6z`$k?vBbrSC-e-+f|OKWXmEQ0G9i
zu*qI4-&6)Zy?%42Zdda!@dG{lF)Zq~wXe?pS=+XtS&e<xPp4u(_nnR6pMF2PnK#ce
z=Psj;EPFur^T?^(0qiWS+zlD=0&k~<79DNX7L*ibHwz9?-8yMyV@%}~*R;i4Drqqn
z?&NT&+ej9?Ua07NbSJ|KU2O-QN!hN#+u!Z#Y(4ojdxfg5%tk#GakfKEnQK;@dX)6x
zCC4;jIcZy&=a=@s+%2}P;qs(Kde$GJzaG1OYHrlot>*>AxoWc=gQK_tLtIsp4P1?x
zoD)ip>uS!D;ucffH^YUi%cr61-Na<$<nQ5=C07Vey6U+}@+#lHE|%vTQzpin`zSBE
zzUAo2^eqinDlTTTI%lZ{ZDlHzn*4ps)YuCVg@sFhzZdvC`_k5>y5)YGGqf`1N+o5N
zothDU=h>^^{zZvLei`pD&2>DNee<iu`l}49pGvJ{JGbijW=RoEzu4UmGB-`s-8BDM
z)#eR)w~9{Yd^>P0<=&+ahgMd+5n;%`uN9%K!j-ngnUUMNr{S<Z&yo2d=lvf=e!P7y
z<d5*vH|u2%+uzl$ah7O_7kjzC=K9}N4{ht7TK|7_L+;<v<lmLkv(EYWYW4?vMP4bt
zALd`Kv&=t*<KVac@BiO^dw$-&eqXgmCX;ag_K*BiEz9m^?Y%oMK6Y=#$45_BPuIV{
zr|R3En^%9n%Mtw@A#mb>qVRH?FRhPXX||o1wSa-4LBx^w@3*|i0<MhL1d>Anb>`We
zwR{=4b+Ukx;3Cb5jT5?-J^uLY^H+cSzke?K%iGoM@^pHA_UF-~%i?p(o`;Kzi|>A$
zclD@*GP_#Z69y3x&c?9Uo8RC6ckRrK3IATczjwDrN_N6)cWJ!|M^%rwG5T!!aW_DQ
zk>OQ)+d1vR?CtxU54p;2J2^3(BQ4ooIk5C*)J@xIvL+KlRW>^9{cf|HCE@D#@cg*{
zA8tPne|-1t!(UPYnnsmd7AmovwVZ9JFj;iX4W^u??&M7tQ@ytY&W-!^=41ztpfktc
zwA!+Lw!7b4&sO^6rN3H2iH&b{-QK%BmrM57_iQ^B8llX*W~Sb+cR$VM9sgG$VSm@Q
zPC)c>%8aYWo<uXfarXbaYIfU_Nuds(xA?!5|CjgwW$=&3{vX@#uiN)qFDC9lmh9#p
zO?$ca<onOBX2pEn?JK|C_RX~|_tRS*7h1e)e>_EAq;FyJk&Xl{jgueCW47!*5xYM%
zj=A$y;eL6$IuEs9-$Ls*^2$2990-tO5q6w+X-Z%J@!cs~!uQCrd~fe*|GZ+s?0v7#
zZQCvJ<Jq@&{Pt=b=eK9G91YaC*?N#wP$4QI;gTkAn{WV|!_^-@CZwFH+Gib|Zyk1`
ze;Gep_2b4}o`pd^A<VyAcsWB3p4!#xb%<|~rtjk=k~dqAhVip0JPP<~`{Ym9dKbY)
z$(=7EPE7yPwJK`+^(f!PQ93!h=G-~I;nbo%I#bqs($I?WjS%^+mDQwXH}|NOp#GA1
zR-4x!t=&4Ksw-PjEy(%enP)erZ8iFxHhcNC^0TH5EBQ|B={jzdIsfSQYXy&s4~1;L
zY8Du@QaQxCR$nP~a~MzUl^0Td_tpK^uhQJ+n`UhP?D_wO-eVaS`Crzi?K;f#B-1}V
zd->nKu%5V|5vNwvefxXv@Nwb7{!p*V$oMUZxvf)o7H->j*NVGsO{mn4f-9EUZ9Ow|
zSd5$lRh0GS+e{N$Ung(HpXzy*BcyQ!YsuMj4$oC11!WJNJa)oiYf^!Y^vUGFlc)6D
zB`@wb{iJn&ZxV02(^Gv0RfXI|Gd?v=k-M-@XW_G#zjeyDF5my&)4xT1vq8<T`>Ug#
zdL}oh=56J)_gdAZw5n;rPak(drp6|&A_dU^N5LfxZw~mf)$6ZlRhAY|mK0##d0jZh
z@o|uBeft{S`dKcw*D{29dcRvZ?{UzEu(wC@3e?iyGz2~jd+^A%SuFg{4zJDTva>_y
z-by;nBh#L^NG&V!Z0WbP`>uDL{n2!&@9ZMp7gmO@&Y>)>A}Yrs%Qlp(I`HU3<0pgf
zQy#AR&vb$H_v(N5cFi%2Xj=8EgfUStH&D7hx?*Kl{l=FE#N3`P^{YR3?Dyrasg+xg
z%h@};`D**`SI`%6w>v+)r~bV>oBcm4lh}__vR}9E7kcQQw02+Y?Z=nozh0gBH(X)P
z{SPWOwT1U{toFo*OrD~m>bp|+T&-Q+qZ0<+DVaC!|NkdnpR%)^GkH(dKN|)CAyz)c
zmaAE%g_S@59PM7c`gwTx`Lfw>tFC?*R92jrVCS{)qk@Qud#7zdw`b|!vyMmK&b-?r
z?0PTi=K6j0i>9d@J!0r`GELFfv*=o8kOAMq*SqxE4lG&V`T669{X0QhX)pKN*Y4R_
z%fgc`K7IZDJvZN;bLv?5I=uEiE02$1*~ZtFY&<y*3^^w_w%m-X`yY1J<&4YI!*`!+
z`d`?X{mxeH+EVST1h37Ut3R-GwR?oBYBPijHJk2C*{Io+>ENK0wq(zVYei=hH`<Dc
zr@lP7K;X}c9_O_V*33a1iM6FaAGN=)|NZyrPt!xs-@beIFE5B&u39@XvX|$IZb;bA
ztmS;OTi=NAggkpQqsxpj;fTPB*SqePeSh@x=;rCK&+>5=uiHH75SM%3!(Z>(X1pt(
z-F-K0bKj)p7k}-jD%|$C)c$L`9q(n9lQM7knPyB8U}kyqi?^To`4q*KGgls%kZL3M
zF8}G@>L35M&%Xcn-Jh$kO<N~?w(GUzyRg>K$jK#+@w?4E*}VL3zmF%@-|&B#p;3F^
z@Oa^*<Hv1$92!@?o?yi%&G%eJFMPGJim+{3bxlo;h$Lgv-mY+KWjSUZ9;bO9XC3{t
z<yE3^=b?i7iu^f@x^oJ9*563Fo-Kapi-P^Xk7Z{*J!(B_z}J6yhHGZ?MFq|Pw=>HY
zzfdrnsp=rP{P4ejin5#Ss((KFS$s$?Xp4vl^ViOV19>Lrk|qUfeF|H}!7D7kSa1U8
z@{0m7Cd*5wbR3<TzI6$6h^gtv|1XcGbM!<oNuQZpz0B-%Shvzr&Ubs~t}TnLS;g~R
zcS3^Zl_Q=XTfeM&bfj4RvDl0*vk<;>ZMWJQ;v1H~KInC&;wE>m#k3;cuKdzSMw6|p
zLOH+soLCo;S+BZwr~VqN?3r7Gd~0s9H_Mqk<%(auB|s-Lu_fmH{m8HJN591%wVBVh
zU%%~pyyU+*deyIYTyZ|^^*`YS+XYGM8x_wi`tKL8eHVF|eDRI`g6G^D1kSz*6IZS{
z@G2?yqEz3nl@cNc?W8Iq&nvFb(yF}Dxy@pM+0XD`Llxt)8%i&j#N)dwjLH)nMDI`M
z7xkH&p3S9@F@e`7f>)wPyJky_qwY+G)9v9}5$*dLnS$P~Jrlc2PkU3maYggZtJ`?i
zRekc<&N}7bzOEZHwWqalwa(h|Z%KoJq4R}PmrA#;&@c>g5O#GFOmRGOqOpt7T!HnG
z(99Pe-bvpcD$Z0itSDH#^XM-Z)^J{<(^I16M~Q#qp4d21b6VoYrr%m;F6>B46uDP)
z)n?_{EcMye(a*mM8%{JxEEC<RAlxyV?d13FphzpFS>A=~!!}FL3u`q~52)PHq1CWL
z-|oT&jW27yW~&zWYsqh3n0#g9^kS1J;XOMwZ=bZ_C~j-k)sct@TNm(n<*dZejiLW^
zvl(7(NILoHPuS1yibqTJx0PB)hKOk<cXDz1DE_HxJ#)Fc{3H9z*YQgh?f(#O;@ci?
z^7|jxqG$glUj3b~^KQSvgSyAMx~2~%@AvQ(6k!N0J!O?T*JbIVxm^wGR8~t#^c;Wr
zu&$=^=bNY9r&n)2z4>$Z+p_Jis?*et+-SX9w%OYH^>4dld}kbtmnilpPm7s6qf6)7
zZg!=zlDT3Nm|F!poK>wRUp=;U!G(DdJ>9F0dX==V4(mAJ<61Op*_5JlN2VlA|0Ti9
z{NnN9!^`{S@Bgp)`|$DGzja)NU%&0Xzo+u^tDB!!pWSt#f<eKpC${6_=OX1DD^_&e
zpU88*Z2JDn-_03*nJ27jtM;7LTDEq2kb{caw>R>^DwpPL_#sm35f;?q#U$=5*lQNs
zd!A4B?NOgITHQaSjg3neGo9g&Zg0paR8Njxq$p%`<ZapaXJ2Q(zrUyUTKks$w{x!F
zU1h)Q%_+$@n!H-mHYj`%^;>4=e^4@_KWMF-J3|PQ%Ao^^qKB(Q&1BbG-}|yBR#0%&
zst##O*DJrb9zUFX>hjyz{gK~S@9S2KIK>#gNcPaK{!M-T>~bpIw^$N9W(p_@a2^$1
zV`kH9AQv3Cm&>#5Z^~l>TRXn;^56EqyVLdK<{Q5+nm6fU;w*vhmJc5q+TZb&x3{UO
zs{dY4ZzFeXj<KLpZ-+`!<Pm{grbj~`<Xv4`CbBNbiu+?vBg6TVdM<L``}yDJGI6xn
zz7_hYe2`_@^5k3hd2@dKz1_Wmt9j{Pi<*74^EWptzF8(4-=?>=D(yj{$NPJ>Z}>C!
z*0P-CseK^6Y{`r(o3fbZ7D-Lw@nD)UvDEAE^7Hc@UmSh9`tGmF4RY?&1TR0Fxaq?+
z1EE4LyXRV-ijTqsu09Hx-f~$XHQ=Vor!bqHY<`!TwM-a|*WO%r;7#nR_Jmz)&Aeyd
z?KWde@!b9S$Pd;&Hu1o81-7mQ2~E$u<o{b%KMqK-R9&}h#}41bzO+rQW^1obTlVk6
zR{JwQC%%(jr(&yffipoR;?GGV=EIY_M8&?XYMn2`Ws}+|`SqMzl;7GNcE>}#ON-X?
z@<w)7=fC{;#Qkz}O=eFm({cNm>uzd3IXA7<PVr??wU_FP%xO23YE%#JW9rUM$@A7+
zK6yX;^hN(?m2v6LJE3fFYSs?Z#8us`dO{W-Fa4aMa%7E?2g{NGrM=O|TjkEJO0w*J
zzhYJ6#urDW?_6z2H?-C`cg0)$@|BgH*%cxlb1#a{Iwp6JgST@H(;ZF@8QtIqeWK}Y
z8ApF-bYHoaA1Wj&lipymJARt7X!MHk>C3;o47cgjTkCwuRUqzbTFC9JsTI?nuho!R
zY&fgI;<@EwmqiY%jx}kW@LKaMlRx#yy0o+$mYfwCt6seqn)-%Od*$YUX+Z(O?Q>5v
zvw0;XJ&{m-z9#Zyllk_dsF}Qa0ZTVbdGK0E;?#TD_B><l>GfV=%l<1n26SCtR`Wh@
z``4oVl~t=2aEWElpKN!EtL0enF3*$wt|2O=i|lv&te$5xA%Fi=shj5oXL1CHF#h=Z
zON{H~*4q1~OlwYdWHfGF<zzZrIMILE^Q$j=)n|RtKXq9@W9HSJ&GWvT6`8>t%<<uf
z)UV|}tACu%`JsPdUVWAAhvf+tKfJ%(%&$nlSZAs6eRloD`ESa^j_$QhjDNI#tMHc6
zpo{+%XGET>^fFTOY<<-5%tPEfm|1(h!W*;a>;L`H|Nrd%znkUx^Wtso?eD+Y7W*dS
zn#?M7yItPbe0<c;Cd;*(^eSD_n3_6!{Z*%?C%>~59tkABu_%*i`mexuBxlPP{;eS$
zhc1g=eRWCMK_#t3Hs+jFuV=}T=J^W8AO86H`R#B1>)W@tYaT8u|M=<a>i7Gqe?L0<
zv~2S$W#J|dF;kTk{{xz{os~W;nkgtDGGo`>cW-`Qzu)_~?o9ii&3E3&hu9uJ^}PPT
zLGGx+N^ZlM%l!jvkGj5&Y;iSSzsg83P&Rua$1@d0h2sfXMo*u%uMD%{@!0IQYl>=d
zTdd$a!<%K7_wTT|zfLJq%_NEGNYX-H9yf`b5+<2nKZj3wfBl_oC~K^Mi2zTp&9}|m
zcig*DmU#9Z|0rnH73=R3;3U7AYXXPl=97m%7D%*9B;EhoxFT8Ttmfri_SV+FeK*Z{
ze|d_lA|vMl$rK09+85U@$Qt(B$ZqNon!zc+Hn*UD_Wv3DzZd^|c3}PeeQXD2v(HtV
zy{u5CRI1d%r~UVr0`|)jRlhD1WaMqGlu#{<S^N0ap6K<KWk+hKeKl|~<uO!XTXZjH
zZP~vc6)XXQ^Wq+y{=B(VB4OA5y?giVKRvtj|EsU5+YjD8oU#4G%V$4+{J3o3dG6=e
zo6?I9uR5;RlDWv{=H0&^XDGC-<2JXAZ_YjAqN>bf_+mz=+SHZ|#iZ?v9v1G{_q*cn
zhi5sz53dg2KXJ1-|M&Uw->)x@dt9P&%3<=QDY}_ru7yGt=M=ZjaK8{DAkGzZJ5wR;
zkrmH&Dcz?BSRP3z7dvj9)itq0RsPB9sA+2^UA-Qex+0(TKqRBim1&3eSznXdutv1>
zKErIkGvXRS)y(qFD}GuXNdNZqWkyQg3fD5RJ^$yb7X}vJnc9DvOXS4Fe~g`f)3vVr
z4%$3RN~d#*-6d^n)j7vkbhj_pG+|oSB_cInqoXlH`uWezdsZ}Z+3#|y%V4|a;JtOd
z{<GCTrg#-?kyN+fzyD>mVSnk2AE8=boO*)yImhpDTz$qbVdk~A?9G|yrkbV7Eq8TW
zni7&+G3}WBqFsk`0|Y-G-7jS9Rkp5gk{DN#W$&s_OLy2iY-S4J%u}0nT8Y^-#5vnx
z`lS;uHZ^D*stGjxvOv%{NND;K5zc+@#qMm3<(M4zO!<oO=Ag4S6{mYe)V$X_@v5va
z_SUGHwyk@mY3q8=+`O9tV&ZqEXaAb~V<MM%PUN=Uu0Z?Xcn)Soi|LLRMM5V?C}<l9
zDR%}2^lI>^967b-2-lhgNiw<;Q(qV#oW<Mz;2SH0YKp`8or^a8%UCJ?B9V<{SxC)=
zFpb&kzBz?_+`W<KfAnl7ZoQo5G)cu-iwrkTWZZm3YRi-#LNiQR?9+~Th<L8HIB~ss
z1JAV|->z)0VOVvPkDKX0-o4OrW04h;r9RoH<$d4QUCn#iA;Hdh!UUe4(~~%_E@x0n
zjV?XJqs6jSIKW@@QMG)#vFpo|EAsMwzFDI6*L33QRQ*S@-T7pckAF~S{rsc*seaw;
z*w5TWb5G^{3l%x3pFVBBSN|9L*6Ed(LTUx8zP|4M9T|6V-@Fb#O@GfxS9)gN`X(%(
zq`h{>?R~%gmH&VJ|J(lmS6_GE)%w<GdE`=wxctuL4`<9ecQ8!O;NX@S9=lnCH+OVR
zwc%nvyGl}~Q==sI&9Az>?|#|2Ffj9M33!-%wKH+D*0jF%nNB?nX<gHFCihtJ96wwr
zv;3t&-*^7@-|dGDJYHNozg{nXU)9%7S3h@mzfHUR)%LhU%PB6URTBg}dnc(Ja#;FL
z@lwE=wh32*Uc9)udG-7L{C=Y)<(ChCjF4H^;&Y-WE$HR9dArhGjx<_MP<0k?3jG=^
z?YKA9eX7_rCbk7h$2@#mS(4P}uF=iCvvr61BBMZUH3k<4wyw4r$Nrvu{`cd@%kAI4
zfB$|wamF!CW}{LLmNODfp6Akxc~;mkrB&uEW;(OHJAL6!9d)D9MrFm>k@Jo*%PO(4
z1oXv9%1@D6XvJ=mDx@jxaPE{N&lMHsln&eYve(;JPidD?v0u^0CaKxTSaf}D#@_DA
zO(v-#`?L4Dx^5__k#OZ-Sp6dW{5gF+m4pB4s%p+0PKb;wnl947nXHtfQOc6Q_cyD$
zuI%ThH=iP}=QB7QxndLC_4gxdLjvR4)5+!YC#y(*FRKxo_x9RPjrsOA9Wi$Hw`1qz
zI-4t%D;3;${`&UAzrXvf*s7%U!eh7h)r#|RUiI0;n|!Lur)csI=EI?vR%C3wU~)3@
z;ODgs%t0G6ZT@}vcl>SD_ity*_3J+0F1d04e(gT{J<?D8GB?baay+nqvCJ|X4j-qJ
z3XIxPjV_Y9{#*x+Hg>qSzJJ-+ZG1t{bDhxo{#-YQfPjfQ6SC#jUb|8>`*M=kjf%UI
z#gqPZ8r?{>oZ1knV)bvm(z6n^c;QTDZL37?)o=EN6o<Wje5gmi@5bdVh0#~m-AH=X
zkukAg>y^V1wUZvpgc;_WrU_P^{{Kza^X`|~9jmLZ&YthK@B6354SUy~_>ofNZ)|yd
z->(N*<;r?nHNqk@@=_h<+A@7ESi8U}y^c?B9{=aPQN|q%vw0`o+8uS|<Pu&Nr#64}
zyi!?jhpty!e2TP|o_%{yr!C%ZJ%{9pDKk26cvY@<3n~^r#c@bKFyz9TqkTLf99>h7
zC#_W~(RPSF{6kK(;flVN3A5q%D$T1Yy2tk^3N2j8?J}{0XJZO?#gPnQU*(45YpbeX
zEcfP3-SkR(YufF&Emk*V?ga2{O?zusxZL#T^I)%k*H=xP)pyJ7a9g+5qGLrfR|zgs
zFqrnHge_IcP$+~k=}5*Ju@;j_Tq`GO&78odwC&La!Bn@e-nU=6ZdEdgzbm{@Qs8QL
z4-exOg_mBMkAkdAZ|Aad25Lref4!D?ZtKT2Pkfb4l$F&I_!MVwbq58AahgtOnmMz%
zOQUm<)J7rBz^khr8{JHTH0yL9ZBff*?vPUK*cHz>GsO332XDr?1D}r14;Ad%I^8$n
z)~v^;_8xb=^m)_D?W$J%54}Bg&nSv*{<`hn?|nswmv<HaHf=dk^;AKzRqgc0_=1`D
z|4&kr<Ndeg-M^RT9)<gDcz*1c|Fj<;&aeLXvED70|Gw6{{qI<QOcQ#q{YNJK#!?+m
zDfVrZZ$%k>4Vl;fe!Dq5KW=Yb={{#OMI)7DC5^P3PMh*xiMTkV^;|SOb}M}YSDDf4
zRTtJ;{N!=;bqSsobZXWlg%q`C2UOfrZXP!I(4D`6=TQ%b;*PFqGnpJ!5+~cpv>kqU
z`S^Kr{@;HMc{v##-@N%*e0|*Bny-(3eqQ~#+gDtXjf-(6PvAu(*117`l|IM!@+JIx
zv2@nT$YYOYeYWU5e)yvz%O0EiZ`+F%Zud7=u)4H#xEA+3J^FjiEauYMgeQBvCZ*0|
znSQ*)xz=2}>GVvAPR<Rj|F&Lnw%*t~T{5#a$a<;x36_u}$F}U>S66?d|GW8nyV@Tg
zUOs;O__2j=(g~K9K*k9-PXzDEl4uKM%;Vmw@o3Sc=$Iu%JtEAm&-4;fQuH}oOrMs_
zVRJi_aY%96(iC1*bLUWhNwv=(*_jl29i|2JZ{QOAs8w?5>yjI(J40_Pryb@Lbh^KO
zd!bZ<)TUI?j}xa@tl-q?`~C9S<&S@g_TP=m2@9X#&}R5hCF73f1s0PwJ<qVIYi5N^
z-0|M*`R(rM&!?|HcP{=*VRM@0RR&|>ll#62lvky_Hog7u;-wkOn(cBn^R>VH@#bgh
zo*g@Gtl{}Nt5Qkf?24Zc510P>d6#{e=$;*?7RYE!;J$idO3<uLHd%jMjwPkL^@z+&
z=8!e^o^^#eDbTVZZST(Ce~y>e*L?oA`~Cj^51*b6*Z;0BFBfn3Zhf$whU!zBm;VZP
z*!J-+w&VL{@JjJ{3WI_ozuIEGrtm)>ADL;M4_~=shLYn_rq1*yLWRmfMah$*1B&$W
zL%0p|ZfEGZ+KIJvire`fe5hQr(tgWDTZgOOHR?ZhSZ?-kj`^1vI`gsj!N@25*G}}S
z3!LEEmviU$%xRnN^d||=-B!U}v*_Z?;`G<CkAi!Yj$T|d$<*61>$=c>&G)n8I5=z4
z9c*T2bgjuS<gfq4uq}Vbnw*<$I;R?!pMNhl`8rQWTB%dSB=gy!O<k)GPRnG~`(|>L
zX-$q%o7jd|_gJ<|KIpk#UM;`PyfUqS?j(Z~f=xYVM4GZ1U(MLmv{1_`X34qjp*#VK
z=GXcIIl~XV(k(n)F?rj0gZW-T?pJ4he63t>s^nbcxo}#)JMUfIgd-sbj!)%}+{W+w
z!RP9oh{ry;oN_wZ!n1Z;urCPzy6U;4*!tqy;EzilTE{-LDSUo*lH6?j>rR4SKdiTu
zT((6jt1Thw@)e23UI|ZuqxFtjOMK>ZDsK`vz937-%SXn*z;I!(>&YIDo2UM!COok?
z)7Ds6+oSzM#vn5xNaayXXQAD*Z+E9!I*J-jxzMB9e`GpaZ|La*XHE$*tz-=rOl1wb
zyx?H3f#Qr6tjr(ZJ5DiToY|#u<5GF2*=DtA&H6eTmM*2U18&-$W9I&!7?5oEYR{gm
zv{gTs6dv$vd*8NM;+&t@a|8AlQeo)<*&eT!@89x#xAZKoe{65AtoeKO$d%<mvhUZ$
zw8=0XJz!YSbH}ys;g<!6*7v`C`+Cv*%e$`q`+2z3+VSnbBF6iC`G59n-pTsu`>*Kw
zr`6vCzKPcBhCOgR*<YeHqc(N*atDuv@%dq^{Zf?{JDhlN(1&T&X1ijx2zCc%wj&N}
zM3%nuU3+lqGuL&~CT?H3bV}P}LvJ>%16HdAcX;={C|Vr&t?b2R_a_cK963Cb+7eIw
z{FnIu-{Y@$A17Q{TRwey`1!bfHD4b+?M^?he_nsv>rj;-<3tmOkB=<V=RT>BX)|1|
zVwoNGp&?(ye`8qiRGmqeXM8p}X>jVw+Q(79e2kvvl$z;=YMhr?bx@GU#O>afY#EzR
zQLbw{dCuIwxJ$ov5zE#CiY1Evp1#vU)}&S@em%3ysJ2vmf7T<Lja)aT<!s5Z<juUn
zf7#%vrspY7g+?|Zf%>MiFTeUSU07rCd1<HdL<x?@PNl%(5}eM4e!r!z={&c&BymP(
zh9UcRc9Hs4tqHRxE;bDeo#U6Dkd-upg?asi_;0KEGR>EkPLtkP-H^lKAyyTY|G1=Q
zlG@3r`{nh&45RF2U;OPaudV+UUjOaer=wM^?8n-V8}d$8FfQ9$RG`q^F{`j+bzSww
zdvi)RzVEnhD75J4i~CzvESX{={<cFaWunea2i=Q_>#Yn=i5}+4^sVtz@v(hWxBb@s
z%8ic=zN~)u>+{2RWe-ZE719-rjm|#PHVEFLVxpvCkesr><4)@Z;Zla?l{;@=dA=#w
zYR~T<kL&-f|NrvOpFjV9=KuTq^VQL3ImP?-%d|BwENH*k*Wdp7_usFHyKXPP{kNok
zhTJieXPlo4PfQLwS1Y=u$xKIAY+}dvd8u__p^p~KIyxyS?1ZS0@->$4tVe>mWIxS}
z_&m=%dhPXJd@DZJL~A~(_TT*JQC!z85x;q5H<~h^%dEbc+P!?D&u9I#(_Bh|?M!9S
zUL9Je#G<A?4>|rTVx3N&{!yM-i@vC!n4^sg7g>sHuU+vp=+XI^TTM%@nSKv>_&ubY
z^X=0)eJT5y!av+%->;`?8`ylnGj&tVJN>^WSl2|@?l5?7DVFeY5#xu>eBG7%=E#aO
zC;a`g;J`U<jRmrrS>IZEZ0m!L9z1?3h9!R0v&irO+o`1vZwosfe7@}}pW%ITLwkzo
z`UUTwX5_hx*+v`Qx7y4qxW(gSXOBQ<V=kj&BJ))z6>aN%!it<{jX#%}hQ@~Od?aw7
z!$DN+#;;FWr<-amPsN6D?>EWFxoWaacd|75sy~aBqNOIWHaP5kFW}<#_0Dr1i3vwM
ze41BH;Odm(?hr5zatI3x;ycpj<C1kIqs1rgMsn@63|+UbTX(AYOFTn;nTrj%X70Qz
z#>3I*A-O`TJonwRlN+Vet~@YoxF@lyaK++wp`FX67IjsvU7sZu;_4KTDj4K^)pbh5
z=G`}W8jl+leK>YiYp?wCrhR*URBlgSckKAG=9OxE#;b&XS8+<|Y>ZS*zP7=UMclmX
z@7mI-?VlO=#GgEvnH}(J>H3~s&x_uP|CpqAw(w+d0^ga5hm-31|0}6$+hrQm-E_~c
zPuh}~wqN5#{0EhX;a`~l@2Y<jzSdJL*6No2h30*o=MFBtxb0ox*@VJ%KNXv0I-IY6
zHx!6snjhAYId9Hd-BlhgA`>NMGDt3X5}<6jfa&GrwL*H)#@dS<1fKdi?H3YcWn6Hh
z^lP@5`?*gm)7q|156?Wya;9gJ=}Qa0@BHol*URPCzkess-ZlHQ`1-j0fB#)w9X@^f
z^WB@zrtOYe>%`HLF-bVWOuWV8p1gd%d|v+hyzj@47fNkjwr27A?W_vj%hp6*Idk;a
z$pudu1d{i^(g+q4k65I4Ci_gz1EVz8O<$LBi+`)AmuS;bn|C-|&LH$`h|i;EVIe%R
z*S}gaW=wNUI(oUG-2A1X!iq?>*($OHR)@68p3fHFU;BIaY2WgSJs}4YXMH=a{9;LD
zsOtrXh^3c~YT2E8qb<Hz_d<C`Mvnvsvo#;@g$Wzoo>$CP)AB6(7$n$kWT<;9#yHqr
zvU^u1kEqW?SC1*m#}7<&j=5}gEOgS>DZ8e~tMqrdtyy<z;u52o`~QB;Q3#mSDW8A&
z@Y@~#PM<B;kB?v1_xeWtF8e!ncJJDgjY4%6MH)N3>ewYPBSg{HjQ51$k$ZQ~bR>va
zxp1q_2z3cQH*-$+P2nA!rQBr}eLp_zxSRX9>T32rwb$FV7e2r6eOI)+{JZu-nRb(<
zZ}^p`&s)7viDT9ghnBf&Qk$9#1D^;KK1zGT62KC8`SFJjzvcfYe%^k2b@%E0zaHv4
z?YHG#efOMziJRQ#I?;1hR??Sh+mf3eKd$(`p?=5i+xMT$FI{DP&}_jnA=N;S$#&kw
zuCj|(hck=o&)PZX&K9fe*h!1I*ExOs7PPWk>!|NS0X@;R4R7a8d;Gatchm1_f`Kh+
z##b-Cda%<n`yzMCQ?4H(|2kY#Vsz{6beDVOJ&R;`A6Nc-hmQKs&#~9HNg2;y^noMm
zk87;e6R94}x7Yn{X*C6kKUYvT3>1AV`rIK<@}_4->gM@%Z_kuBYn;g_|B-cL(bg|r
zY}z`9q^~dYmeh(CHVVC_Ao);9Ow8x%Yu?<r?oVFxWF$-#cSgjmyjbS7Ot6x@wfpYQ
z$k|ET3Vec1lZC^A^!LZeCF=@_pKUjCc&c^uK&V)A@jE8VXB*#`+py2HxpPNfOPBxn
zueYmA&kBTc6nHW%o)rK3hlr-klVv%xg0HWza#>k3d7Dk*#=r!wXG==UzL~Cl>$Gdb
zl7->!>u!k$*R1}#(LP}dZ^K%(jE6NFe^@`1_kCz@pv0JVWx<Ih6HR7xOfq6$+#}OD
zwP}T0U)Cy(oHZ$2Paa0ZJTZS_?6QA`&lRqa^@YZaoEKNWw)DEV(Cb;JKkKT6QJbc5
zp4`2zX!DHFr5##^leN4zBm`Qm<-EBn>)i32n{`>!LM=HuC+Xkvd7$|u^WcM=UwhSV
zhdaA}Trnx&ZmIs?^XmQYcFO20dvDpL$?U?ss^;9yoC{&{+(sIDY|-6w_E&mbWKrDm
zXU6^_qdChW9)_!Y$bB8jds@VaqcQpO^?#?oX#Jn#x$CcS+q?Ziy`Qfx`dJ@wvA=lt
zSN%8qV$a{r|7EmnmF3eZLFX1N>drqBZj~Z3amLK5YZ;P8D<-W>IU5?g%EM<N|HkJx
zDqF*wlpJLz7|82(*KpLAIC8(^ZpcxLTG3aL!}4#k9IyC(jqQflST$BG%e-~pzNYH`
z%j5R@en0k~pZ`6${M(zSv)}Lkf0jT0-oBkXZhVn#^^jrENaweot9EgTRj1G8k~{M9
z-;W=zli_=OF~w+x%KQFID<4hCw8Mp$rnt&HO?)Leb0PP?oIVMQ4Xc)^{FA*>x^-9F
z&DA1m!BYej+=RC43y9hJuB=~j=-R0-nk9k7WnFz~u_fC~z1glzcDXU5)nulhmb=Bh
z(-x1Ou1?R7|9dol-|y3_W6kB}DYi~gT{>m*6O$D)X3dzvbw!BNc253J1<hM*)3qyK
z$%U_rPvL1<==xcpuyfA?2N??&HMRnOr-xGo6>t0W8eI3C<>)ta=3UlBO-mUU%@asg
zdE{|rcAdy>Q&~onZ#(X}dQM}X?c(E8a_@k5n8cG06Fhf4zb*d1{@K6X-s_Zaeq#0A
zAup(8D#YLz8oK>`_1(X2wg2-Cv(J7x^E$eT$*44~L;22`XZ+&NU6>RiYxBQL>8;Ze
z>t-=hR_>dffBW}#f9=@cS6@E*_E<OG{?_F>>s1UaMN8*8+d4=8-j+8l_c4d0=tP(1
z6^6S-LKuQY1!Y;A3pfn>(_J3ya9y!$VSUAy<F6lg^&Pv=mD{i-ZI{KO(A56!cP}62
zeXfvgdk{0B&?nQFfjeu->D1yYbNXcOwEV8U?#aO5cVS6WMIuw+<qh}k|31FH|L^{P
zZ=arg`Sa+jErq7(b9i=lZCv+vm$X@%a=6%<>O1%9*Q{Sx|4MRQW5D4v7iV62<;SZO
z&88IUBzo}Cy}1{SoXju9D66R}#ZSK4V99$ic&!horCL4Txf4$<gJ)Y8xSHH6-?Am_
ze8X2mu8HS%`|L2^vVL)My2`$ugXbg<=r}KZXeagiqd;{}0mB~uBXL_#%=Mf<!9+l6
z)hf>oF6T}boUyp}yze=u@z0zc&tK{_U7o-ly!eUY?EqiRVBtsep3dA8J!5&<kL~lj
z?O0YAnq_gYd~6b5c*MAIe#gXR=~V|SX9oH`by=d|`fyXvyQRDw-GcI)%y~Fo9{4e3
z)zxQy%sLh#%U7Kc*{Gh@WmqCHF<;<!prCT9sPPe#=?j#CTDo0i6_*))uFW{NP~T~v
zqQZ$SM_5!$RhgBTIC*-6?(5BwdAuM)#N4{WMBPkC$>2;EgZjIKXAFv(8C<utZhcww
z$C%Z6=e{Dg8wXi<Of?ez{mD8$!GWi8ne9~*ABUW&ohMQmyMyZ-N|qdYG_@t5aqcE%
zv8N7?I-0w>+jXs%XFa*n<9T+*gl_i(LI0SH7uoK7tDxK9$XlGeVcA=G4JKx<Gv)$;
zHa$in8s!g}CI}iZFw9<MWm-_^I5W&f^Wv<=pPk#av;3wwCloG{Iyq&f?6(O4QC*9?
z_c>lI2<x`~`Z()f%bDE?pXFChR}^e{c1JTH*!fC{=NX9=H|Mu*SRa;g&{)XsOs_fn
z+;gp~lPixdm~|=eyyF64;}c#+GlHUZg)P(E&$#q;bk<Mq@7$l6|9{W_7xqTaqwKPN
z>_69&yfpIN=dUN;AFT-v|Ma}<qJB>P*J4&PD|IK2*sj!v&6UC}M>;%KoNFq%9p~B+
zn!@2+*r>Si^QtVl6BE6A4cOHeDL&fsQt$%9i8l|tf;;x7{+%JW+2a0%Cr@W&KKj-m
zBr)TK#k=jdm;XNQFK=I4@%`oF<-6b7>GYk8-&6JX*V);pPm3Qv?*0B^j)3H`xTcmu
zp2r$WObezkS(V;3vbwJqp1^!?$9s7>bMyT5>%P_QmtD$#i{FrU&h7^SHw;%>9X}Id
z!_XIbk|R)QtMtUVo9D#$bbP3=OzcYNyvaR9$7-3<6OIWkN6x4w@&|K2m?CneM52*V
zGc$d0=a*~m59Jw`>=iBut*t5fvncc0zM`pzPmBM(TxR^mR>R0HXM61R_6ZXTUPgYj
zjEHn8e$y|(a4F`H1B=g#P8;RqmG^qimzABXeqJN=Uh<$~g_8eA$N9T|F57iF#-h_$
zIa0?|#eYMA%L}cqIft6>uH<uw+V1CW|1N~7LxOY3st{9-RVV(&wfjlF`noCa#P%cJ
z$GzM8W#1p~y?F0>fa!x9mT_CY{ycf{b?EOyfz~Bve2FruD<m9bIC&LLXec>41;{n=
zEoL=j>{T+C%53=C_icAo<kr~!+qR0CSAVQ3>P@dtI{oX>O1o7O@8z1J(v4O~E?m{w
zQ{DEl;%>&#zx%3h*e&H$+?TXS^IeyNjBe<|moMM#|NH%aS>eZ3fA9ZWe|3?YgHecq
z%c?B3aPzmf9Okg6MCxzjo%^q(=IsY{&q{lpDJDtcy)!Rv5#qij{8X&*(LA}c0j{nJ
zFTZ_snsMzc>zWDI9XfB!@>#?X__u!tr(u9WXJ=c4|LmIcf)cAM&*;4Gm{PL%`I!?l
zqvpGPU3pVY{9xMkTBD~bYuJwdXF2z&wx(dJ+P<4>VwPOY*>^xPNlNXz$D|Lsfm_#B
z6osl=_(rJ+_|=?wBfO|G`mOkZf|Y&OCe^z53QBt4GwNk3TkvyQUNuv&gVTght0qoV
zQjiL4d>>IJede0NX8i=C-Opw<o7`=dTCnKoJG*Ad`G%K&t4Z`Q3S3%KdAdlnUv-mr
zj(uXEl$G{Oqupz^uTr{XvOSBXM$&^bTVWHUxC>*HoZjQxdn#tUb%+o3-n4D%hQcm|
zOs(B#MIC=`bnRf_>X3?b;CyG}eodL_@|NRTKZJWE@2ljA?%nPa63jaBW<HPQwCHO4
z=UbO|shpm^?8iP;ma}DbbE<?3{{0R-qu1>EdWnKT8ar2)#;TRhUzaq!@{m3HU(@N0
zFU#BsRz+EYS;<^d%$9`~3WsiFBxF7apTQWvDbl8X!{@0zf<2NZXU|?PkmvV2WmTpU
zZ7_Z2?7628F&()e{^Y^F#+?gJ<_opVu>ADQcbV9YnSU3a$@cW``tD$)A{;2B6e#$3
z&YG=LY_515oTc*T&l<yvOL*MQM=i``PH8H9^{+wlMbbXS1C}RFryVt&*pMA@@67z_
zBCc+k=d-(*XMbNZX~}Xmb&E8SW`T=;^ZtGH7C3pnz54fjk&?g3%jW){DK=Msv4614
zqnqEqN1m7cH*vS0O-GYQ$4mWSO;NAN^PRi6n=<Z~_VZWEpHUFgKPD}+Ir%x`gc%39
zRThX%S$^b7GlPYy^FgB>x&O{-y4;?*dvnLhpRsXaj4uNdxUAX|x7Szg`dRY#<KeHz
z`LBQHk2bWj+PkOb>#N!C@6DMjt2=FLd79a`-+%Yr{(e}Xse_Sea``>;(%A6Oqb@B;
zRx956eow5}vseDTeE#>?id)}qJG!?9dhhglRLyG2<s~vFx$KEj9E*^S#<ueG7+IzV
z5{C@9B);CYIKriRK9DQK`v)6yK;Wqmdr`?YyR{7`1ttWq4mo+((@K)%_i{&T-o3qh
zOJ#eGJ=0n+tEP9Ikqg81ZL7u8rzhK6eCpl5YuCP=SJ`=+j~p>M_B{XBg{Mw2r?({-
zJxe;#dGkpMPmxRCNx>x-wb)DrLmZt=;?3sW-j(~>^{JMdQ1b$_RUI=El*}8KN~rs>
z&M_2rYn#ewZ07q^LObo<uIP)p6R+@^dE0*xa_jjjTd2+2p|<0G-|Vvq&ul~YF-5WD
zM@3h>DlzAAnw#HVw$k@%2;b{d`r@I%mB-57?0kPLQ6!MrrHHRH#Pq}s{=X8(3_Y&*
zM@uj_FX-N7#lX2ZtD=U7FF|7ZY*`lB+FoC8-?y`ltrvfN&0Lg~b9P;Y#9;}3*+UC9
z-OgEm_~x^WpGjLY_9!zr_$g;dgsL8KG+tkp`=VUlZqMKR`oI78?fa8<QAc{+qvgGv
z6Sa<?d&qQf0#AU<+6O!Avd&A`%u2A!3UZEmlj|_+hvmk%daXLaGrMMWupB+nbh#-Y
z?a_~Iic8*XSQcZHIf3KbggKUlJbcq+bWZNISAHHH`A+3{z?&M+13g##Sq(1gC;GJ<
zl-%8Jd|KH}<#L4ok%{|{o>66~+@L$}<HgD6swA2`|5}@fF3eiB>TGb`pL;5~AJomB
zURi0ndez*0WhbU?m^p7neAr$!_u90B^?Dzt%(88MF896YTB_G_#pK34F1tCGhF9-<
zrYmz|iGtf*r=;f!D~?Tdcq6!N)4`Ma>z+P+7t+(~5%~4o&(}5kmRiqZ3T{~{JW)_n
zS+a4`(pwR;o!GhBmL+}k<$hT>)3wD@L9x5@eDlf3ZuMh1&)VlUadUpqNZLHX*z&lN
z+ZmDX8kwzTtDpL2ba$STzwc)v`%c)fxJ&)WnRSN_R=;1(!mBOlp;IyK3x6P|+_JKn
zKA~qOUCmi_Dv_ZgY-ei23NH!!<4pn#u2&bbg|$EGkS%W2IW;fxpvdL)n=_>>z30lX
z97z)qz7%3m)R7{R$bBZ0Jx_3#OM`>jw1Qc)1g<2w6_;hy1Tjiw<s9{7t`l0<b5^p!
z|9;IS<wvQ89TSAh1X-LHskki(`L%sMv)Xf>Wf|3h%z-H?XO-&$5>vxc7k_bB@Z^Ex
zoT8r_mmbzwx+Qe-cJA((hW7TGObz}mSDU?GR!4l_lT(M&*Gd#yL|mMrt|H<0e(TQ8
zMJA2GZOLlW{j9d$m483)SI(yG_vgm`XLkSe((Lp8oOAzUPTNoUu;1wC<LmmG-&o(5
z{P{V%b;05nGpuGjJRl=Kb6<u}L*kTuxA_lSv>AH**<<ni)v|>*4kt2MHCb-p>D5zd
znX$F!Q-aXr!gm&Pj$XF^Kkd}66%E$B2L)J@1^asb*8MIium1Y+)!*Cv;o;wB-+fS1
z{q51w)#mwe`}Wyb+po-06S!I^v8(p_+wZjku8(%*)!Yp$eZBft<@>$KPFv(k>`NXW
zPA)I5{{8RrSAF@sb?<hay|*h+HOtv}qJY9V4S@qHE7mi}SRKnS>oDDt#;_u->B$*|
z9x=A;HA;$aS+%>q%~O!t+VW+miiSw4C~v}<$uF9BemDR3Xr(5L1DljJ=kGwa2@O&X
zj=aYM+b+6oz1SeQ@zd9;(m$V+LSjt!C8}^L@;Iuh%oW`lqJC_C$R7svC((X4BVRFl
zR)k#6+0$oWA)9C-apcZ|sq0to%ABTNT3lAMDQ=C{r@pyTe2O{kiS~SVEqJt)#Iofl
ze^k79B<O|{4~Ol7@9$b>Hu_xsCd4pRJ0t8yxy-Q@%+Z(rxw59bX#2a5KdG=aU}sq*
z_qwfX|LTS8wOnJ~t-SK@q2kv&SIP)+9K0tSkUG!dB6C%dsNrl)C*?~mhHrlVd-knv
z!bQQw$*etZt&dG*si?TYU#;2|!e-91?QT{m?`zX}a#y>bo#;FF?wxo~CZB@Hg20^Z
zcjNEd*2Zj2cR&96>)}`LKJp#dm_4Cv*^(j;7ej-|f9pQK`>UUC|L@nQqpZb6i@g3!
z6c5){Ti&#st&vl9(Ja=E=e$0t+g&#9*|sKC*Q>v;Q*xrlP2Zx)IVM|=PsuoQ^@7gQ
z(6I7V0<XVb2okyKbn419znLrEs81~Y^h8A=%}8yzqy5Pm<8yyqHlAS-b@DR#^W>~z
z$~omam(3~leCN2GPcO(hX}2(yb!O#<%@?w6i64zxy=mV0o(n6Uo>wzDb71ur_Y;we
zYpRYt`gODC_nE4{S07*EG8SCsTF_YXuqABEN@w+2L&KCw6M74ILOZ_95MrBQ{vhh(
z%nY`0wIjYY8!r?Eu5tbDI7QevUhS8jj_~ir>&?Fwi=}g={aDE8esRi7-=kAJENwM~
zG%{xeU;f9$dnIIwS~1hPfaN>&IuDpg*)Is_HIkJ1EOunkj!A9`vul0^?_R00f7;nk
zax48T*Dky4rg7C+X~wb;&vQG|Ig0P4Ei*oUidXo-gGn=N=62ki&&Te2RQSH?d-V^i
zkEQA~*Zn&tnsDt@Xr0o`pLz^^0gP7_7FdS5Kk?YRa+1Z81ick(V)^H0Ygw4425+0O
z+thuI64ypH34^eEn<6W@min=Gm;I?QR#FNqzR5mKL5`*Jgh#}y9gn*M1C*1)449vr
z?Assn>S+-3=S7c>wmy(C6t-R@V4Z(?AAixFBX2Iuh_#yXSxHv;Ot^0|r{=~@D_9i6
z6nkIQ<xHIOlH+A}-@EV-@AH~Hyx%A9Un`QaT#IM+<W<IwU%Wq=*9D57u)p7PUiWRz
z`PDayJ_@A0uf5`yQ5EItqHxjv@&5l!5AXfE-t}t#`_%vZ7k|H9eXwaI{|j%&JNJK7
z?GqFA&|lEirf^up@zF(N+uI7WlV`46S^KslL{z0mvB%MulT$>({k)#u2>}t4hDU4S
z%)Pp&T&>cv<eyz@{X_b|y=IfZRnHVn3>>+w`0kb!*Vq32clUPr{<<AI_sKJvb}trR
zA6NhR?e^*G_5J;>hn{6na%^fb<WTy(Ywd-rrNO4$;il)VJIs+tU9!NS@9){SXV35d
z_xEl4cKQ7Lb@KA^<#$)8iKVl!^v;=8@pFdQ>_r_63Q0@SmMI?Tcd83^5uPhq@bq?Y
z)SWBe{kQQSw6VTkwEg{Wfi)JAlWaJ}N`<a0dMmol{h-A1m+qd-y|Em!5eI&3ac7yh
z;r+JRZ<Ed6P5-(mDScn0exGc?<5yj>tG>qWzy0=D;e-g+kRt{L21;FaN>@tm3j}X8
zm^f?WmmL;mTZ`vyZkA^cR$BG?mTq|N=3Oys_gn6dxTf1-x=!tR(~D*A=kNOaxbF9r
zx4#u8F+M7M*1$29(d+pfCMMzSJ`t81-Oe7e$mu<{tF<h&HO|+~s4wzt;F$v|FY>PQ
zE?%L?ENJPz?<=>{CsTf(2eY$ohUdM#J1gPgfyG^?E?c~5uM0^n(|*D#wV>!vU`XG^
ziM-5w&doRU#6vHC5!zMAu_|lL4W7R{ZnHWir#YMRC5jZpxmK}#UjE`lb+}<+*b5dz
zg-m(5y!rR$%hhg<+G}HN`Lm?j_8XU#P^gPvA`5qLZnD(%Lx2BXz1_e6&xga?kEmxX
zTeKpe@amPV>f-9FXZ(@+b>VIMAGW@&LCewv&v(oTu3`wB<l3W>StL`vy1P5#tjV*C
z&CBO1M#?`+{AKY*QL8D#<cUw{)>&=Uo;#;4x_Mqs$k>3-$MR&)-g!CyHmj`;naRD;
zNkKhB^Zk^|a`#k!a$2rm)up@a0(;IQ^=0RO8)s^JZ<-*rbCT|Ii{dwy^Q~ONiY<A3
zTNavlPIs2&KmKcH{l*VRf>s_l$Y<l<{6li@p?S82o3}TrEna`jN~=dhc+r{Te~OBD
zjacmcehMnBG~AWvdE<zrlls@)Nt<@uIQV+*($;22t?r9UQqC+{Gi$axchPjOT=#1h
zR~<wId<<W*uvlIbkdN5BbA`*SPY**C76w<p`7>kD4aKewg+#9v($d0?t0r%byw1F=
zX@95b94F;j%v;UouMt?j`qrNFIedpbjQ0ChpF2EX(9cTBC-%cP(>3hpDrQ}Do9!KQ
zc@x`N6UIkZ7Hw~5^gI}7GxhAmu1tk5NBlw~j=FeUS=pqe*=&`-B;Fg-Ge_yV<cV2K
zabmenOP`r8Gb!?)zG#Juy1W~w{W1qHUFWd0i4$f#Hh0d-S@SAQtStCzmRej)!O6Tw
ze3RZyQMBE7N$+<X|Fg$Fv-=-L1sDoCoA9`;X%vk#;1Ez=xoM%)S*a8+iP=e08qRe1
zNLtEOtQ1_6(2`tTxNDA2+Mk;)Uw8J}X&(JiIqTbok6iQJOTGHTgWg#N*-Tn);GS#z
ziu<m;ecbss*5B-_r?jt^NdCb8=Xa&of9B$UHm!3DQzXl}Eq*7S;kC9<uCBY7UcX_=
zPo+Og)<lGwwivYCm{IrPulw5=#+%`%uU74i+Tt7A8X_j`vPehj`Z>WVooX43D%)-a
zYqs`WiPlcqvru(qQQEiDpP2rL-Pe_R*XqORnsUR|Hl}`0)$h~m-{0G}e{U_zgU6RI
zpANsjr{?dgM@N7Dst62snR1}c<<072*S9#NeLEL^M6OeF<s&XJhhyvR$Heac|MU9%
zf9LoAd;50z_uF|djy5$p3a#Dz`@VzlJPC!kNX@1{j8kO{nB95wg{#usmUT>+d}`<C
z$CKZ`+Wh<V^4Y6<JJ!rv+}-`PZ2QlW(ycf7C#vUjc+8r5LgmNCWX>eT8D3fyb6)HZ
zJ7pofGB%l;n>k|p`*-i;x9@&fu~EP&>Aqgryx6x|{yh%2-~Rjeu5j~PpJPiFy2SKI
zovHKtc)i!J<Z$!s+?p4Hi%ZYh2n#Z->S$?Lc4HeyUFAbPvDe}1%eQ{NyD#|SG2`oB
zH?8YmmG$<I^3LUd_le6gGB+$broLOUtFY&CqvRy3Pc3VUKc<*J`fZTs+n&Z~#iY@<
zOsx3SGByFH8*jfmx+p52;t`Zd4i>h4e`Kv}Uk~@8rOSiebu~{_TtB(|?B%Upp<e|*
zb3PCZdK`P&g+;_|=dS|OcgH_({=E2da(MXSp51x--?qKo$KUqABhWRKr+F2RA`5Tw
zgv2X`4)#sY&nl=*czm5LL0L)i-L|_|-#$y;ZDDO`WoK<CpI55iTx@EspJ=dg%P+m1
zp%?1**ZzMwz5IH9jO>L&8}83MuqsymvuCuv<4hKv6`gN(-A&s(d&A^RaT)Ig>N-g+
z4_8Pr8BI)Ab93faVB(q*$>qWEG44Q*$oiy<6XX5td*@YmpOe{oDcE{q`q?zM2yJF=
zkz&7190H6!M}5K`C0C}kb-4G;nY=_Nb*g6AErZR;$B*=NTK-cmWH)o(zvQa)0Uox1
zEG2%qgJztMZe~ck+BE)*zwtTaZsP3FO)pb6uQ~f<>-u+D{0lCzU%dTOwPoJ=eQS9>
znOQbGX4CZjl98D$Sj=q}x!G>9Z@Q3=*-H(fm6Ff%Yz|9Kux^~9b6xZ7Pd(TEw)1Ze
zn_ZO^5Z0c(Y153NSD}kDGG}C7d-}#KAk^z@Sb!F54%4=@54TNJF5Wp|rONy4{_bhp
zgp7mRBKdV^ES#8`cXW}@k@-G+CqsB$7EH{Q(zmI~h@D&UOxSp1jjGwfx9ShP6!pa#
zG8?s3*SKbC^@LA5F>6zL-jdFoF3y{Fl7Sk}e05pVdjzjcI3eN7!+J)OEy!PK*`YNH
zrp?MfGE>8X&9sDT)rvj|Url#k-D%1K%?WJfQi@uVU21c0_(mF@a1`IP$wI4f@}*Z3
zVq%XS^U88Lwq)9d)f-=|`r~NQ{WL*AMa?)w_>3!;!TavRFZZra>d;sbkykRM%DIDS
znYE4btO<%zjZaTzocb-$SXlF5K~T{<#)X_wIyD=1C`|dW;^0k}dFz&zacIo^q&%5X
zdvO(4!}bfS(_VJplr<>i3|j7gt8LkPjh&Xf2kyuV9!>W%`u&|P{r}z1f9*HtZLt64
zFQ+o~Bg6OkYsK4nA5>IG&pu=x&vN&F-kmsC)z)m)$L-J0m9vzsTNU>DYt`O3-P>tL
zHcm>EIBwYCA@W5d{lwX%ry2|$$9_z7JQFNEiBZ7#NZs`>e;<Vhif{fCrnGOS5{KLI
zb!~}n=Dm}%-}n2&yX)WI-><)uXC1G9?wnrSzN*@vpNeX0u56j-prYc$ELgd!#?^zb
zGgvkLmxb%Int<2sb-y28*8l%^{r~y(fBy7um%nfSW*gIz2Wjb+%fvW!widM<Sf*Fi
zXM1tc*L3&pUCR>h*p|-T+w)2$K_Tq;vm3h6`{S;?-WIDXHg)xH1DUPv|9>st-;?<N
zzC&GQ{Z?t0$q!BU6$PF$Q!IHiw^&SQqsnQ^Sz*UJeVRF07jSj4tx~JKe*fOR*!#9J
zy5e^##I@^hhKJT}u@am3dh%u0%T_5ZhJHC_3#}9-7cqu}n*6KtOtz0GQgm|Qwfg3%
zDk|7r&9i*#$BFs=vz{KRe7|Schb{F+S6w$}tX_3&8(R+J^b^&b&5rp7O{^_?Rfif(
z=H72;Sk-WZjrY;5aIcu}f0=I`6Kgr8uxGMe$0R1>V}jZ(0!f7{dW`rU8_dv{sQj&O
zwV=3|TeDQA;}*5zmV*JuKIZNJe{<u^wP)o*`VL;R(ouET&RxEJ_UqT}lP`<shHedg
zYNM|e7n1Yv;kTEb7q@mVt(0SXecbSbK#GjB#Bawn^*7$+zhgUp+tB8#vZhtD!0mFz
z)>wh+nj3W%(sA-@<KpMV+q^BS-LF`7q{n5!oVm=Grv<0%*!T0t>Gt{8)%TTcxPQO&
z_q)32{9l)^K3eiooBd3i!iKl=<mSsYvoHjUsu;PX9Y4~sWSNKF=HeqJUaumyq%rh9
z&r;Hyz<gA~c417Pr=_vn=MNbft0q`}U$!~vR?mzJZ?0{4bYP~fdeEk61#VrZ4^_?E
zX1wO^1cmud2QE$!e*Mz;d*jSKk88S$WB8Xv@400npCFic`NM>xu^Nr{g@R7#na*y{
zPQRbXb^TD!Pm9X76*AL8LN?3n*nYlpMts-|HFJ?=cCywz*UlLSmMJ_?SN^aiy?22U
z(}tMU6Q^m%n`~MoHEH5XsiQ~MXW5((;kjzEwKT0xQLOv?noD0luX%B2LFA{4M|@7b
ziCNVg;1v?kl*D@Uke_d#)ZEp#X9!GfxLA{Ks5Fgt(uzp)g*uC0Grp>vwkE{*%A^ga
z`dz%HrzJTVH=R({a(S-uP_RM8YLl;A{T$w&;^M50S}WDod~k`_BBOU!)y?4jfz}X(
z)y84ZHjDo;u+0+IF1*H_b@RK4sI0TzGUl-EPtgX2F84foiw@km;uCqgecs}B_BjiU
zC+v_t_rA_cFn3W5BYT%eT8D_^vRM|sM^_~Y`b;pKWU|%ZeBW}D*US57FYDs;npmx8
z_vy<Thcp$(G!xgwbFR(X|5n{CujWbOqUNWEVx2fzjx>E-=g@TGP}dTVi|Y=rXiHg9
zXmLt~;p&}z;`u78R%Avt&$za%^>Fl=l#ce|n$8<jthYT3>B!z}W<N);D2Od}QuA}g
z#-LAgZTdYvpI<ri@OlQue~S~Q|NsC0_<LREw3V8Fax6bo7w<V}{O3dXv+CYbZ6zt+
zEl(eGxlecgv@UPf(?zj*v$n;Bt>{XgQ26G!fRf#Tol7d0#w*OrotP*7c+s2A8v@Kq
zmhNvZOB#K@TK9Ha`$p9e2T_G1rFGkH+upfb`{V25-P`5!^WGg#a90->*Vl{NS5sM5
zc1cg_xy2=42hK$jlAdx$j?PcnIBVLJ3ul@#u9y4Eudn;NegEI@`~Q9U&L4iA|8`wS
zn}?;_xtw1j87!>KU6nz{OM~^r&K7NrkXs*?e*Cp~`s!^9-#&Zx_v?Cj`8+w*o=cKE
z_YN<5=-U0pX~W({LWYu)A8pFJz4_|jyGP&t4VEu!lZtjRyk0#+LTHh)(qWTWQH>49
z?x+Y_KY4KCK!`|>1LK(qYu@kvef)U8dHHMC=()1yeK$oIXU<(;6MWq_w7N{xdhM$l
z)3aA4+c)(Y+Hxv<KCNAM@^Sc$6)7D|YE28&8qV;oPTd@}HQQZ}XEB>}`vv~TW%oO^
zF0N9F3l;5On491ryy#qs{-)`L?^p6C25?oTA9`^>Bk9MLtgTa-geI@&5aFEDDP8?I
zQRY~~#qSPV-tRhFW;{=(k%e>d3MC0e%Wp5l`a&Zn%sDKzkdZ<3X8G^8@8VuAjJlO{
zQ~8bW_6%{OZ@ZU&_;~m6-M0twZeCm4lf3`#42$PCRNlOMzwP^X_w?t-S6j6_40O4&
zM9GnhZGl1GrQGe?e|2nNyrU%etIJk_&4p3m(1Hy&%jD(D<^A>IV&mr7+t}IW=I>%W
zQK_Q7lFjNYbLYR@P4BmD@85p=pQ^P`&8^da%cl6P7yljl__}EL)@++=-*+F6-5=gt
zWOA<c>6VXOO+9K+JWMVttphI{OuDWWu;hozzmh2HsX|{kzL+z#w*2)hKXNBobmzp6
zvSkK$l2>&fT4|x2s2XtjMcD(Mt#kfvnzCH=LB!VC2Hzzj^E~&am-Stat1L=CuRMQg
zmFDl161~OK#MU@(_YwQxy*j1v-t^FvO>F50>rP!+w*Q2crQDjk+^g@M*(bK!>)WqR
z$5u<>9<%+r4zD(D?z|xRbYIW^^Is&+Ue{4_xFVu+Ro}zi^H6y39Nzek`%4#XOK~%J
zcrcEAv!?s}PP?)M$pb|vS=7wTLW|k7K7Q$3tCMNhpL%S@>n&ad%t8|lk}MAzWWNZ=
zD!6=?O<BWqpU8y68`(GB7FpwW;V;Vzox~jF4lB-?Ou~&#ixOpi^Y8O@x?*!j*>6p!
z$k(emTiQ6c+j;+9_9|NGO!nH!Khnj@jfWgLH%*m_oik}qyyxq4DX*?vP7+R0jWc?b
ztIXEDIDh$;K<#EFi5W9krZEIHS_QD03Ah#>kU4y!ZDk|ljEPr_x&#?Hc{Qhr2Hy%y
zUzIWGL#sgOqX&Ep=WWkke&JPU+P8Cl`MhHPr?peAFeWg$XUN2+Z``MN_v>QEV|+_K
z&oMiGCAo)DKv6&`P{C2@K)|{$4qYpbFl45#>zKpi=_#D96da=Bn55GB!?36R8M{v)
z?}`hb7nblicxoJ6xM;qODQin>drzG%)A2?H!T)nMh#NB`)db#=7d!m_`iXD#7TLe|
zcPwvaFFJEZ@{4<p)3@V#;u!+3FEvNq>9yY<JHdTb&0Vw8Q`c*KnkOS5&{<RvcdSIe
z<L%XiRZh<es?wJhRbC8KII=1&<(u!}bxK7BJ4+rI&j{bwJZk~3nsQ5E%7nrj@4s!o
zzFq!aea-)OmmeRVw!_N0{^P5!uf?Z}FIRW>ecb5~>cXTUv0>KHy^j0#HP2)cjbUYx
z+?}`krcK=ax<4<k@BjPwdc0lCeY>~65|*?H9-6RQ-#eJ!aN~F1+d9)ufBpFD%aa#x
zzWn&|<Hd`Lol&~CLtkqz|9$uF&5IX*T|S@x-u_-*yW_&5Yhl-R1^)W&YhLzSol|*5
zuH5$b_i7_#uQzwBwk`EwI5EMRL8Ew!l1u9e!L~Oyb*h!!p06mBW1sp~;jh5V3k+>)
z3*Vdf_xI0!TV`*Q_w9li_j?J!$f=k3lP>DS?c2fPV)*vg!rSH18wHp=jGj#6@}Dy=
z{l}ju&IvKDg+hT03QBpizWw;|<jt2gMR9EgFK+Z7w%|$Tuy#3r<m@Hsus1uJu5GlG
zyJ$7BR${yJe(5&90s#S6R+H8@vOxw{9)xC_q%IFop5?q+_k%@CqJ*x7F!RB(&lRPA
zKVH4*Dg3kJUNhsy<ogpoKk&5F66!g?V#ZQ&tEBeA)2h8zQrh-2>}HgyCCW_SUAtfY
z*VB(bpFTBTzFfRx!)+TITOPJwKfZi9eVhNus>R<zPj8TBlbrW!U$Bx=i6~<zmsLyE
z{f`DsQi`GLF0D#^&s^-l=g}h7_VHih=Y`fkKNr=ORc)%Oe?9+OSI?6RRlYh72D86v
z_0>x3O7JSZI%!MR@5DPyHYJ}-Ua2#Ed!rq5-{#%+-|ZFp^H22@#pIkSbvQCP=~JM(
zzIBjIyYPCx&zH<fPbeKZD&cc@uiH_3WnSfnaa$zbO)i=r@+Ig{GiTzw@9B|MdUa{u
zuZo^LQ1|8RSmdv9^^lM}Q}L%ydpBgCF|iim%2aU-yy8`P>4}-wq1vj4&z1M3h4rdA
z`)R(oDwKHoRJbRz|4XJhGfEQL-(5f7$N9U*yzzYNjYk*Ga`oKwnc>^;O`@80!5y8s
zf3zKDeAwr}GI8a~uqxX(H-E0+V(@CYxK#M&sk9koJH2M<@pG`RIq0)!%lZChldH>g
zvJB@PVbC|WHVrM%+Z)q#^iETTO9a>CG&}vnKQzup^-DQ6iKaz)R<*m<#MpF;IQpJu
zSFV4V^^{pCR#frz_csiSJcE5FmveM_7@I!YuuS2e&r}1u>y!8TZaolW<eXEsQ)Bit
z9)s?!Q;bg)n0ct~5<NFd(D44o4Qo#BQ;=FE%O<yc{mo~K@}K$4TxOIUpd2V@q#C+;
zfzlb#$r73!3$;3!q&FxVCNp<+nM(=^%$6vKIV3qrXu={xVS{JV3lHV&Ghe*LBar3L
zpJ{zFyH}R4ePXO;e5P?e@A9J8LPy+7*RXALmEO9i=i^%2-8N6cU9TE6G$iqHBnqe~
z^?kNg_MCM+kd=j}V{<c~>ABl1BCS0#mlBnNg?!qc$NZ41-x1aFGvXh2>w~##`aBCg
zzyIYi)awth`O|o=;OW!UnQ|7Mbq6e+ZYyOwGhX>yc!9T$Q~dLtuz&mIl7HG4@A#k7
zw*Bzb3-1rtym@^vVb08tP8Sld7<zP=@>@7|8k(IAd^&quoK8$|Vd&CLSz%=zUTcKz
zM;J~Qy(`tx@h0i$Ucs4JS_^p=cR1|%ZPsvakw06`zV!z(*D#ofv@}?~+91Ep|MuC(
zX`6TM+xP#&zgNG0<!yhvH~+lex&43N?G9fbCnvvt)hy1dAq*^w+NRemW1E!5rSZMk
z$w1=R!-pULy?S=}>eZ{qU;Dqe+q<{2;!fFOhV)r2N?BHqPi!onR~vhE_0eBXUVQoT
z<<Fllf8IR#Q&GDsO6T^jm2=AbW;7_Sd3AKxs<5^B(Z7#}*57AccJc9vU9#W*nB@F^
z=Xq!Wqq+a>`rohb?`(|wUMsOPX~T`W)d`->hguYyX8aReCR20jx}nqaow=*UV(Pc2
zE)n>>_iR9O&);VsKR$c*?%$i5*sG?YrVAf$IV}+AU}bG>Z@>S;z1(fH`Q`0mzB)&(
zeio?}UB)lP$!AzJbL(`j<ChAZpNMr|efsqD>C>k}cVAFg%#(co_>08*{MGlb$80oQ
z75F4lKxk2V*4*p~QBU?}=$mQrI=TlM@`^2-bs*wsLjNpD>2ME&LxQEd*WE5%<Xm&&
zeHs6EesgDuS!oCFH5YgEoH!wB*?;DU$Aq?P2H$qaZh!Bz+rrvv<*jcMUxY2?PTXC4
z=iZ&$XPX)3$L#s_@9FEw+af2u%u_Dk&M$9Q`{(8I`Q@?y|IYsxWSk@sz$U%@@4G|?
zhj?c56EUHsx~~IELU@>aZ4brWz4z+cE3s9x_LwJeU&<|;|ISQa{B6<JPriqJ#1@@#
zO<mj~xwDZWrK{)bCOf;hbsHZ(om6hi_t3Lsz150W84}07K3;eGyMNlG=&P4FCw^S_
zvoLX`r^3&9u{sG}Nq%!0IVN?AY~ISqB6Rqc(A9fKOfq~fFYEiUKuPNRho}p3k}u=_
zNj>wrv*!sHw^q8@BbDE50?O$}Y>ktdo+%g}6SYzQ!7TRf!{+B%2X{>OIwfM^`N^Z{
z)WOe!_RkcIYU-Tgrk-b8R<Ag_nD5xH&Sn3ryUsq3Rgd?%nmX5TbM=Oj#HZ&zJu&jS
z#;PUG>s2WI*kWsX(89WJs!uxB9!xpl?&xIFckC>W|KiB?$2RK+F`lqfu70*zj`w-H
z;xVPz<h1ExYE#}df0PcH(`uq-9?n&J`%80&eNTr<$gI5&rsS=-JiAYA&lAN3>pn~t
z;qh_)Q?7C|M)w=n5}o`x`<L7H+_bqc`_(NI>17)p7^P?AsA|9QKC>>@-IHBQY<0|p
zIUoLV2=LCXc)&GpvEDM?V<KwXgshK6p15weSj~^+Z-o82l%@qbS%S8ba~Hjs$>ZwS
za&vZYg6sNIZZhV2GSbmgIawJwXLM*B{U#J2vPkfh+9Dpmtd^v8f(<>ZJf6%p;gH{P
zZP^W<l}qQHv1#B@amiWY!?i}^Vvfnz6#x8-7B}4w@AY`MKxeaoU~iL$hm(twlSoTf
ztD{D$$l5(~ID%J7&zzXWGv)bNk(r(^+vXjTGCMe{$o$WnGsixC(D{D2D1K&dpV`mv
z2D^2GjvSsB&vBaF{@c0##XqBd$7t5Plm5-TH{=_;IOBd(yT>-ZKdL!z*9WaS{`_rI
z{9mU!_Ibq-W$iy&@1JAv4Lv5%miUHUvgyUb8hPvOj2A28w{Bf66V9h9&)T&jWD9q|
zTqTx^>pS>n_J)Q=FuHK|vX=7Q@8UI=`uk>z-Ta+zSQ$^aZCQ|bKi{1H`gVEyx?ewD
zzIwcT_U_xq6|R|dU%otfy8b!+bA8uV@A|}G#m1|ZWGOgJta`z+i_8iQ4t=&3#xkXQ
z>UaEqdiQqypI@ijU$5T1{P^*=Wju=<1PTog2)=If=&QZF<@C{_y)kp=&Y3qqZqA%J
zvhs6f*7n~@yBLz&_afEs90%vL*OS9b1NQUZIOu!-yY0$@9*Ht-bHCn|XqvIH!^Y00
zw0!P{fU`$lD(9)(Wa_?nZ`G=ehFqCvcpuM-D^^QdrIq%%W$9MsW0M`){=Izn?61E4
zd|k2Z=h92A88S~isIl$6`=e)1Pv?iPPCZ&sT)p+SP0al}5jS5e%wRG+v^m4p&!+DU
zqpsxNlq)?S<~)A<`1I-1mp5;ob@v8;gGOS{T2H0KH^1+Nw(u#t^PB$LB$&hWVCDib
z*(Sau=4kmXlM4+b9j~02Xq*%(zC=*c$2}?BG{T(w`TPP2VfVu!A-oNHKkwUg!=OQL
zr@4SpFQe<zmmgog%yiyi7bUNMOFh2cMat=dVvqmr-LrqIKh7!*Gkxv;WqZiZTb17~
z^nUpAz5RCk@zdaa!keSkzFxIz^(h4jzVEl&&;BcPQHa|Vy{UQCx}#zfV~u$p*NN}a
z3$f0b9=dtUE=K29$IqU9eBRwXd)3ZWoii8ce>kplJNu_KgGcGs2-D@-^_Q-wyLXH4
zewcSZ{%yPe-5;JQ+D6~z=9bSce4}vb;UuvPk0X7`+va7gN;WoT^8F(tp4fNlOyKn;
zhs^swO;(ax#Wd^aLp#ZLtRX&39z4&pOddXWczdR4u3Gh>mxpH5TCjSGUslUJVy?=f
zqI~Seb~l!l6}`(_o!<nX>noq(WxVi7Wje#glqKyqBYMg%m?gy@IeF%Q%tf<=^Lw5a
z&N?=Qf5xHW2`?wN?XN6Ql&g5QzwVpnlh#8|zBryevNY&NbY4J&j*g3ndvJ%{z6|dG
zQzz}ZFG`|IcQ7g)^V_Lnb*$cIUzO*A>pkZeJ~!BNvmvmmdf&078mSwOusk?3#Ygj^
zsOh14fvFn1uWc?gP|tZB%gZ<CIJ?fX9d2D}{H-VX?JehfS^ji1iE!lf5p~)nyy=+s
zQxi|$c+Zk;=jwJX4Q<ugdRZ~;zKwsp{?b$GTB@sBBs{OR_lTTIvek5JYVkXHcc!k{
z{|9dwVy3K5nsGSH@K3+RZN&$SlU7!QUhokWx;5!CZ<M3yIle<ojty7mGzE*eh9$Hg
zY2Z3?fW^sy!9^qIh#Q;UEq@W-;H20CNdln<T~xhzMGP62awtEa|Jl|2zvj}VPOiS6
z;xp17Imdm|)D%+J4>XZ?tJ89+n)EQo)QCAusVQSuqo9)JL?zA%3PKAM1Q{CjHy0Jz
zE_>Un`Z%nHJtC_sG}Sriz>n^LtVx@7f*1tU)*dxVw78pdlHpOY(QNf+f<MpCw`408
zc+^t8vFNrQzl~_<rU(;$zq9+EeVb){Y2Lxwe=kBGoaWzspY^>@XUqNX2bLTw`IDah
z{=C?l)T51`;~(BE_%F%ypL_pLnezO!_{e^r*U^HK&Js>9jw>WBom}d+#WCqysmdAw
z1Eqp{3#^Q~P8fE?y$|#WaZzAVQ+m%RaMMF_q5+4|CM#psl;bg7(n0roJ7*={I;dn6
z>GQIs?nd7Gb?fSXzP!6W|9<_SlD}JSGGDB!Eh#Je_vq-)s;d^0Hr~&02$npYk>&Ab
z&idD<7W<zKT4mJd=*FCSe$^U<@BaP!_tk&j9$)|S>EGYC&o1LvYDjQ-V`HiIyhAoH
z_^R*Sb<<xjzIyTI$(JWje!Tc9V{6Ryvx`23hKAk?4OBgnVb(8RQI_BpbA3z1*45jx
zq@I^1v@<Nu-Tn7n+5aCFJ;?@-XS8r1bQ4r6IJYs`NjP}wkCbOS0>lH;#iZZx->eDv
zb53ig`0fdQzJW{c=dHJ|`C(nHf9QVgYv07Z30Lm!yI+1ur|(u-<(3>tw#oa}#^|VT
z)r|Pu>K|~)Z}UOpo#ld)cTBoma&PzI#fz_Ig<daxyGOa~oTb|Rop<Z*f8E>j_}fd*
z8`tk`_kFzA+{r~*jhR8s=o^RWLx~cHn=2=<ZfX!}3amML(NyV)&vz}Y<^{$3iVDl~
zvnDPU3VeRfx=5tIYC+81Rf1oILw4s??5Kay{_w?<HMvpmqZTEUNT>Y%v14z21;ez{
z`P1CprxsT)UhQ~f4b$Vl729v#pS#!M@AdzG_SgP@+@E{b?MUYCFRv&6y59f)#-6?R
z-pDtEdAPf9t2~bh4b^2*$h`m9;zHSrUvaPZZt-3neL1WC|Ceu%&n8`s5#kO!7t(mv
zc=E@e>y{mPQ~P$or)?{GIHJCtcHR0eb?4j4>$SJa%lG!xZV%=Cm~N`P@=Wox$+NGV
zIU#7B($_YvrQ>l<$=r-Srk#DPlbFK~x|pu$TB%Yr=gGR|KIP3b&)cc9Xg2#fq%XX%
z<j|3iJ3ho|n6AIIr1Nmd4(b0&Uk@+qwm7q>{9>H_<i(2xSq<cV%)0h){`T84T4$~L
zjg~Bm)_uG0`~%Cv-V<|{ZQi5$`^}Zj=?nc@o?D;Sk2U@j5j^egwASpyJ=bQ`duz^U
zj|i~*D0$d{<3L*$+meevnpS<$$(UU=%Olj&rPEh=_MHopbmc>PFYUB@s`=o<5AA^W
zKb4=&To>_9$-#x~$&9m0=Y5#*uy3=~%{8*x!X5`!Ea6XMYu>T`#Kdsje;X<{Z`0J0
znX&QNTnEn;mn2F8`dFOZ{7UyG{FIg6Z*z(H9RIYrOhOGpNjZWWdKk2}CNYTIW(y8H
z)o9PY|A6{QiDbW*$+OdqKW(?1d_Q@aMbwGL3jv{ypSN*6OIWCRHufI>3~miaMV0SU
zwUSHvv==x9B$%3%wcT^+iJUrR2II{L<x)WjM#(vhk{g(>SO_$yMRU&-oZYZ-r2+$k
z^PTjjXI#D>7gN0hcFy?5sidT&rO5Yvfn)QLRp&iwk{lS*MBKV|GdmY~c%`JQFi<}I
zbQXJn^@7V?N6v?&L~92*9J08Xobr-EX_3eZPDVr5CPNi=KNm|$2OXJ|o{hFeZfze$
zTb&$@Sv&5Quv^tkeDG+c*U22q&mV1s1q(i%zxi;FRd?I1E7|S|NgZiU9Pj#5(nO!V
ze`%j$a{T@do<AQe-pS9tSh(!#e`7|*KZnX&;tfvJzx{o#{hfXJ=b3-6WwwM}t&{my
zd%r(fVD8<$iSw)<o~nL-sqtjUyGl<>V;2Wu!`G@SW~^W_;ZO6(;dml(rh)hJ)B}qe
z%U}KWc#(8;#<uRn4GamFdTY1ez8iagU+wRg)8p-8;_rW5Ja<m)oOyHOV&=@9Ge>|a
z@kZ{u_Ir#k>UPT>Q{W1{yyfkp*e%^h4^^Kpe8aogGeB1L#t!cO-OJ<c>;4?SU7o*w
z-8-|{b|=4`+nE3Q>cyh1D_*_0m$B+sN^Q+1yGZ@($6tTFc{6i&MA$yH=N%Sdx=C59
z4Gw91voK(tP$%;)PyXJ%z5l*W_b<P_wtP*~?${f5`TNa(ec@0#>DZ8<W!52}Xz^K(
zSBYtcTuhjqzm%amqx)>_0>kSzT|B8tO_NqQ8A~6(d-$;aVTan|$@$LWadX0_-p(s?
z{Q2a`)0;mdqpi2i^(@~KV-&Qeqi9{!y6d@*&HVX)=h@6DeCBr{Q+(b>*;NepKK^>~
z;>nXGD`)?>_c>Hlxo_(Cd9upy%6{K1GZuQ7wKqm@^)53P#$LPCH*8f-3I5vh*2USS
zhuuZ?V#nH5bJl0IRBb=bKB2u`+T`;&%lBMwqy$Ww6B=f82A}Sk+2P5N_`<YhqrBWa
zv)Om|Me5u>+WFmd(E?fC!=FAryu93h_S=2d_VMvC*L;-|tS3uvc1$qjsj2;5cA-6g
z-~Z|6_v6>}MJ-|zxws;3N0sCE+5QQCE$;ANQaoK1di|@Az_E?ahfZB@@K%vwm;3$K
zqR&}=)>NIagtG779>?pSch;A_YPW0B1PRwP?fpv%ge4t1;utouwal1PTpDUBsvljy
zt5m+^v&pyZk2M&0RBf`3PUX6Iu*u<PV?)a`S<B-tGiH3>xv-Wi+MDg8>w_K%t&9N9
zNx!0-Jw0~qhzM%xx@}xtWN<a`QO{`;OH<vs!83vvO$h%{DB-C5<jj#C#;Afm<1co<
zKBXV7{iZVgr)TJ+_|;NJBt->yzAlh1%BwoA88Ex%^2Mn$9)zE-x#o~^G-v)6la}UH
zm(L&h-P~IEDca}4o9F+-roB0%@Y&6jIpI?5nN2(LxjuJ0neZi6sR~ZKAUT6Wh0|oW
ztethOPX8pX1Iy0Kx}{B8&A-{q;-YW5vUSp%*QYi5BeySe^kMNaU%2B)VpnYV$^K4(
zyZ?6<>waZp4BW{3T({KGb=jm9layH+mWZ6^-pjGF<G$7W0{$i2IV!ds*?vlz$4HHJ
znMQ}odP_h44?&hv*Ef0v&K7kGn0arWh2v+Tt7fq$88UA@d*ZM|lwat{By*1^+zz30
zS=&D6G4bD6WZ6GUOfG8K=1A!V)lbzAA4jhY=>BHi`$KNVnmjHgx3-I$bhFl+;d+xi
zce1Ndo8bm0h9?}yMLf70{$y+WKfJ~9Zwk+2KXK6`FQy!dF<h=SX>a4oBIghn=QGEb
z^ci~nS^vT#guygWIV{P&OD0@#iul`EmtJ?VhcA$*aujTB=xEyL+~O2;aDqYV;>UZv
z8a8{@`5F1J=WTlG;-It0CGIJYYlVS8l%A1x-3F6|k8OPaF5PGId()eF+xuo*nHzL<
zEt|r}xuS35|F_sbH%|-yeXFJaf7~U%XRd$rPX6mQeX##y{-29;4v6nD{q|?Koy^DO
z@2mfv`4#na;YzlNhAnn2hAcDd&wcJ*(RSio_`PcJnSqt=c{jgLpTC~NLP$vM<+(>r
zU6-Qt{bVMJu}!-qFHs!Zwyc9CX%(Z+0=6v+jK1Cd_weD%XTSa(K7RZ4>*Ht3a;`m}
ze*AcPy87|stFNwFYty>1|KRC`+YS15-!0qzzFFh<u2`MvufKjQ%$U9G+c`r)iG>m^
z>+avTxwn7!zF#l@F5e!0oqu<&yvPZ`@b06({(O0Kc6EN-+_`$Ir4Frl?7`5TwlQv<
zcy9F1RctMZ62}ayCYahO3wADE>G3!rMIr0q<>lM||9H8+{`brKb$`DoCe&|PrG4z5
zqRriJzj?n}^D(}%a$}I}b;y+AnR~T2Tbo~FuBXqXD7&YRK8ihV)S6Y)no&4Ms)XbA
zghg#Lrp`_|WqjpgqkI|v`}_Y?9_Q_^{}h_awS2j_db<9!*Hg=%_3G)x?Wro<n33(m
zq7rX@zxMmxRi*FiIOUJC2Wqc9Jni&vt}nW;i#uOCxU7Eq>*>?g;?MPG?Yg$0X;Q5D
zxhva!wG~eowB^}<t9@>9!b<xv3&S~8nH{T^IC81<?7h`-Vrp@c$D3K(*3I7bX<d8>
zce+WL|Lg^&dt-FUzNTF-%@T9EQ678!`>br2$IESAS|n~U;M@J~<;$1v9&+AXfBksw
zW)tQMRa2@qKlPX2zB|^!F7tfcmTpml?-|hnOv#)-Hr$Q9yFYr)f$sd6trZ*%Jn9U(
zfz2I9>MrWNI;F@qaqsunM>knnTHegrR{Q<RcV>qb+0XwLt%<!Kct3N$`PBq7g}1Y}
zRo|%L(eh{#44$z!I4$@{FUPC4SyBwv4GR)l?)5mzG)DgneeK_O>-v>ih1&F45lR=m
zjgHzBEwp}SY*(Clz%r%j&v)VVLT47KvtQA9HGPr70>$g9ETWu_3M`lR@af+_?2@P5
zy4BcF*`1rWaNQlXH_k%(CpA(RGaghjW>0?@m8Rk*emX)R{lxQ0>hmTo7u-6XVRNB}
z(I&}F<wmCJSK~InG>u|dq<*q%>&a&(FV|T6Yw=B6Wo6UiwIoMHPN{kE_lBT(Kf9m%
zMn0OBciP4^x=k~C*R5y!vp?4|_*KuDDbBG^&HCoU1^q{NO@1~{<=v+*{~sCK%u!wc
z-!b{Yn-5+5bH3bqyyUg}Z>9~IQePkWb1!1czr5&JTG*tO329~Vvl!Si&CY!03E0WB
z^wHgy?~>;8YyH%nYBx>F`+-RB!5N}gQtj)L#4a8=<WsWXQMjp#Lg>{Q>nsaCJ-K1I
zY+cjJ-t8u5)wpVE&T5!OW$fZ{m{gD(HPPjJz^?hT6*E*GXWY_XnRdWtf~rtJRP8je
zbv6F_a_wLI#I8<p4|h-s*m`2O4~K+Vssp<M*Tu_E&bUmJwQ`tnGuMvq?2-d+GyPRn
z943?*&nz+BTr}m=%Z~S4N=lANfBjF)nfPNlLsrSX<<jevT$Sc9S^9*YoAyKe<2C<G
zW~uLTN9AqbAKy^1d4fa451To6et2|5S^X5r_3}`U|DAJsO3Z<at@f{3G%HIq=IHn^
z9{<w0@W+#%JpNyg{`_pYID1;{{3*Y0{L$8&Dkm=Zq5bdoZ~k46r=6a>-{QyZ{LYU*
zmd2lcCf8zb^{_sU<D0$FL-A{ok%Bjh4?mh-ce^I$=9<?>_PHFpVsV3g;-y&b;45lc
zjYrn4dcCXY&EtysRR)X9bH$_`6pp`<@G_o1RW!CztElVo@#Cd2TV^sYRX1sz#kVZ^
z#r$u#kMDjxd%OI*d$lz?DkWV1zWQ<W>FVjzr;8tV_l>qb-(b+zVC%hKT(WtF(IRCY
z$M${UhZn5qdaSf&TJG&1rxYjlI{*Jt^8MlE_WAefYszcu_sh36IBZ>iYw^{StHsy<
z|8tk$zV7q0M;Wort$EjT%M)eh7W-Zf33Y8T>q~s|I8a!H=deM~;u*)bvRD`Now#W5
z?e^Vg@Bjb!x&Qy)_xbj9x4u??GRbLmOkux!?(~c12F+QL!69BZr<~ofuK(Vis@keM
zZ_2jIzkg?bYo=!4ojZ$GYHA#lQ<`=}$<0MUK>AqW4Swe}{-#WeRTc?cD=n?tV;8wq
z<?eBQds~}0TbsMv-u}uc-DX#J@SOC-MN%6-=WWfIy^D##b=9k*v(4|%Tl;sz&Yksl
z_!2kp)cM^Kn5t1D#dFGe->O;Dr?0R7^YHw1eLdUo!hPotOnRbRbR_rb+E2GPzTb8C
z@nzepb+=fp`^0-Z;=0~Dvbbi2wsN@f9%ykncRe(=I{xat!-v+(TD-J>k@0v#oX+pB
zULoKA8hkl?_wL=Zch6o-I?5|y>Bh3z;ENE`szs-6eBHf#Zs*$mu+vAmIvI0hGIw8|
z{d(i=%O6jE{HdxNc3`SPiO6}c$8Ys&cKkDO`u?na`tj+_AN2R$t*$s*!+5~M?LhAC
zj8|;R6OMF#zF{^m;pW@h3)>BQSWfZHkX&fEweW+~x5II}R;^#9o}_a9if*a3)MIXm
z!{-He3GU~6_IUEqmzi%@%T?sxFbG$ZTeIoHjG2*-@BX}`FZ0Kqv2I@J66beTGiMY^
z3QRJwd3GZFq@9$noqupob@^nC)s8HWk3QMg^U>t0K#}uX<14iX-J%|?J?p78!}6@^
zbB>#H<QdB@mNl+4=e+!1rR&7rKjo92#H;;zE_g0-@uaLT*Rnnt9NB-aV_}hnOKVSo
zpJ3;A!?WteD&Z=7FB*09bvE#=)H_<$Yss0|#Xhk`Gf8IuN)4`pe`@yDZ`Q2&!<Q?P
z`t7`3Z_klUp26pxs+I^G&^c-!n73wuHGf?@$Auu_K$Wk8&T_n$SQuV*hJ4Fpx8d;4
z-N2SIO=I`P?{g#meEYeuw^RJE$1%ZUoCyjIQ#S1p-jtZ8zU8o4!;{X|An}N#e)hoK
zCLzMEDtulKzfaFeF1wMoOLNl9l|pWAl`@`ZPfTP~oG{V%RL#xiMVgI;(;hMk-<Qwn
zGoHV5x>ufsW&fvG#qhPadL~XP+WGQ$UvV|_xeYd}4qRM1`HZInhv%n$Q-3LG=106G
z_gvakRr8F~+n*`uEIj7_dB&NEENTgSzPvXbL_B;&W;W`sj!rxFGB`Em0gof+*_mxW
zCN4i=$+|`VE8|l=j?k+DF0M`-j!qM79^KXu<2e%0*kdv4(uAptF89QQd{B7wic|1v
z(V-;?F~#qb!&o>3FQ=Q7|C{Nuskn(pW0jJY@+u`J7jYq9JIAd%s|C1~=6eV*W_&xc
zq~`C*IV*aij%JFA>VHm)KRMx+c>3c@XXpO?*A*4ELh!hRb9BOs5BC?}{y&fNsGahM
z1Mhjh@7KIkZ@B!IJnO+D>tFtzKRb`_b>94F-NbWRzxIAIFb)uuynIXMN`Y2}N4N@$
z!?%5Tw^ps1squ~1O|sYP(WV1SX56T5K5@8V%Y(l3{7X!aSY}n}8t3jbnZ~_HBGbI2
zzx?}rySTl7zh2(G`t0Sn*y@^}pRTUXzZbK|!rCrsTkmHc9|kLrjMux~t=e_JJXHAA
zu3WL?`}~JBk~9+U<e4ovmC+ZiX2x;5uKa7k_jj*;9e#WG?>2dni8YsX{QLa&SN{BU
z{QCXBKi+O%e@=heR`tmnA9YpAuqmxtbt+Vjp@~KEs*kHKkMsWf@5*lPd*9S2kpGLL
zxn=p=ci$d9yk0JEU$g7x8*5Y7WgCKx928{*8yN*Ps;)`S6tNLl8>U|W?{fS<`Tt+_
zpSxc@dwl+de{XYtTv;s@7~-nZu}DbJG9@*bnJp>BJ;Q|Wao%qEwu_tYhVeG6$eP_P
zE^c4<`)pzK-P#{tUmm_&_S=Bx@5NU<@dplny}Gr$`|6_H)vm3Z<WpF#F5%y}cw27Y
zwbR=&7g;`D;47?ti{XexQB;GVaQvM3e^1Z<|Ma;0{(QOlRZ3G80_QLZb1L?%I@GbX
zI3V=a?!Oo8?e1wF=GqdgWBa=!F|<3NIO*FNnc!}dFAh)kuD`u)U3OYo-;5Psrz$gW
zUMjy7wkeLUR^Dx6;*Osm9$w%7KliBUoEV)~#}rrhB;TLeQO@V`yDnhs*R0yAvRfv6
z3LYH&{QTwR<#$g!Uw(hB|M^)*m3QkNT45&jMzB)i<ze@oyXtH#Yd`-wyM48?$!)fs
z(+f`5y!*+TaQe(XxjDg-?Hk$4_I{~-;pYGK<gb{e^)+0JZ5n%BUae+Gc%#9xKIyMt
zSzbY2#;UZ9YktM<zrP~$jQaBo6(J@avDNSQ{rq<J^z>Zcw(8XO4Gt@OJVM*^?(pAD
zdg}f$xnNm}T6+N_i$n0!E0XJHGA!?y(dKEOwL6{JK;f*!GmRVED&~DFG+PuDSFGro
zvE)eRr&n%FYsES^8mAuGm#$PNG&#K}WaX!s#~#ZtoRm8d5k0%na^{R?gQ@=>l;nxY
zZM>BdbN%JV8JAS7BBi!W2uOJJBtq!DW73l9Lo#Yp)O{D8IMaVD<`(OZrOjr^iJNxF
zIX-_Wy(ClZzfakzuN=mup{FnKr%7a+q|Lgk^8aDUj@5pyc9#R<o06aIGQP^`s<OT7
z*ra}wtXZoH=S<=c*>U6p%Zw!tRQPLCSq`RiGdn01F+6y5oBJZ;vaBO>?wq?m`R2s`
zH!}jxRp~7Gu;k|Pc$G!>&N578=n3Kt-nh-L!=KsV!ihe`kWl3fK`$8C7VvTMs&9Y1
z<hg5q<gTN~E-*Yhos@X%K##58&S`5SX1(RUHBa`DyYON4%Yj|&3c2x%%Uz6PxR-01
zeV=`7d4r<SO22I?vxO9I7M0xiIh#qhgG=a)K$4HW*1;SHo;4D)Kg?OExq>-RfK@rT
z@09HZ$(ejAZVOm?j;$!%x1DE8?K02goY%pD6BUG>{}wRmIM-Gkz;M>dR$=)pW|hSr
zS2}mKMdd8fpS?*&cUw~LY3p1Er!)s6m!ls1?J>o=Nj}RQlkciENV>;0N@o0f<61x0
zck4Zt%~|<J4!O>>J;$|+-Fv~cv@aW<pQ^a$BUg0Eo3Ufs^4Us)tNb)|ySML&aQ$Uk
z-*>bAK*j&)ZGZ0bKD(8Z&HZEMQqE_t6aTMy{(Sob%ky?k#_bPF_Ww|Fe{p)hNZs7`
z!FNI(4@;!*NFA9JF-eB|qpONbN_?nqY~^&<2NIq0W!YyKUF%@US#cm_#fqErgbi;l
zc-pt}WyaMb(w7;mW__vMzjyE6%HN-F$JhP)@-l1l^sL%nufFoz@A>y_cK7McM;EQL
zv1D8JW1>1=-_6adSM6$+ne+1ZzSqCj$<N#N+px#^dg;8q?k-%n?&r&`fA?;^{QP;}
z4zJ*NayzZGuI%HlN1uL&uisz)^Hb8^xQgc-4ri;|wtedpm^in5DzgfgOOFr}tK)eE
zxg!gm%v<Viy#IE){q47RA76ee{Ji0&eQtZKc?T0ib3*ztj}#U!o1L119VZNT?OOR-
zzV6TU|F{2ttp9($zN$3jc53kZZ@<sx6<>eL+$1<-L8OPBJ<}73qMki+DJchIOdVWW
zHU``Xz54R!&HnmdALIZ3aZX)$P%xl8_Ib_joY!41KR(O4VY8|H+{dbm6;k*2*6zRi
z?vb=%%Z1Xp8OGI3H^jq(r*K=w#41-lpI!g+^Zq}}>wiuc7Z=YBjTGQr{A3;P)dd?v
zWEwj!R(_3Bee?U>zkhiw!dtiHX}`Z@+{c;xsPV?`Qy&eS7+l-;J=@jgt;XOUbM?(R
zJqI_*GNYX1``(3nvt9ps_}jmC@Bcsl|B~s;`u)2ttn*h#9X2Sb7WlX>XzM!ZS+3PJ
zWxJyEJ9Jwf{`>aq)vM+V3%lCdvcijN-Z9>nVrgwq=veId|HqBLpI%?ym%ac0$4@U_
z?1|-B6>Fd4n&-Y(?6*il?9S2)Z!PS49{ahk;}E}R%^VP8rfpT<dgH!rPJ1D@$)wF`
zfj_%`?~`Jmka)wEx9|Jcmy2RUeqU2uc4V2RgodE9(nM?f>YtCd+wZfnUN6(SAyigN
zN>+zKi&;w5CU4tqzP$DC<m9bGbsqPmEw24$>A!Hx4`KB~Eay8D1%t1=`4V8j%$v4o
zV&~E%p`<I)P8S4Rt|Y0>6LYDtIrC|OyzO_%6CuBzcCV^2czu4SVMIccpta{tU9EQ;
zB{M%gNpbL8S*KTgy649N%g?I=wU02D-hX!9yywXmp42G}tE67nUG7raJz<fc=B}3(
z{Xc?dSp5EI-@V0GH*P22YJ=aLCzHDU3NQX~`*Ho$g7`O?g}b(_u~`0EjPd9sr(;HG
zDzA%<O-PA8Z?Z2n&dKYh+#$I|uT>tO)D35elxi?MYdA9~^QI}^W;0EjGbLG#B2QE7
z85E?2W*I%HP@0l;-sAE4^ZI9{{%!eICpNL;%grzIw2BSS9yGidmgs4E{^+qlDgA|)
zclh-mU$*4F&D9kRNhT#eA%9a=CA!RQ+uSK<y1ey~@sUo|lVbB{O7QG|m1L2UdT#5v
zn5EOW-Whb26z&cZ`ndA4_Fez8481&4*DSchJHL2w(Vc4-7*xcP7D+AL$~Hq|Q)7_H
z)8-9a#;ZA-t(eWGubSHs;y78?{f3NCaNX>i;z6Ajp0l-PJ^!7tPh2*%`I(@->MKu0
zvk#LTfB7HQc<V0I`!FQW)KK-K<ZIQ^pyOZc-4?~x#IjB~<KgNhYd-5}YW^OTBV31%
ze4Deau-KE?P;hOR7{lggn_lkfS}Oc!^}ad!Jd=xSt!K9K{hw+gv?^6Aa?bm==G~X-
z=EpI!TmI6I<@wNGSj_eCvvKkTn-f8HIzQ{UKD)l%&wX%ympUK!6EzdPLyi-7)F#?D
z8b3Q9vfkpkqJ8~TAJe#~(4vg2Qyz{w>k_|(MsMHxSw?em!!oy!yr-PSc@76M99)(y
zntX+IWwQRp3%*xX_Fh`SGDj=Z*}U)f-^-8Rem(17|NrUp`F8hgOJ4Np#qY2B`gZ&J
zdwcBm+eGSUKes4UyJUK4p>V;y&5u_d%cvEWZC`l*ZP{;GrFY+}O($-Yt_nD8_h$XO
zclmPj<hI`xU|5+o`|Ht{KfkZPw`XTXVPVmxFLJ6qcjGKw>kb|;cFx<egkz>Vmq+7~
z%JA@gxv>(j-@beKEiUg}-o5%=RrR&sH{9fBR9U;ZJ6kt@S3sz<Vw6Jlx|+>L9*eL4
z-~azl{QsxF?dPr$TPyvV`}fD$yI=2~{qNo7IQLGyV>zpQXIqG#D2q{fbSu0&O4LVS
z%7SnvyXwM!kNfZcf4%?T)3c|=&Az<eR{8VU?}hTRzqjStS_yDI-f%Zhe*0}{PNA~!
z@cnPfq77Vm7v8^r|L(T9@>>06@ik6Ig6~v@|IXSPv3EzsRlRF(lm5>Cf4ctfzuWJp
z<@RQ3pJY?en(^GiNlA_S)r8xt1UDz{znN!#*P`tA!;IQJaqIRLduS}`V7w?(I44{w
zr()53X0>8g3B~zxvbpE|-@mudwb^T9UtN09#x^f6KhNC!`~M05_wBE)s`~ls%a17s
zOnB53-?6-U`s&e}l)^0qa?UU2$;sL6tKNN|KmGag<?8Cbd)+@XL?4hnC{RB~=)tRB
z@4o%3+xlx^tU<ANfltQ+-#g0-_lWP5T|2FC<Lh4w-e=#}pRT{}|G(Yx_iFri)ZOa&
z?HIGW>2i4O{+sXLANe)wp!Jt|Gj~2;C2#QM?z6((5o@M#pO@ixSNxPEu$I5$#f|+I
z^*<l|2xVTq;r82I?5k1&--}<Jxma}9GPU>L-~D=aIsX5*?XPFg?kO{zu}Dfr`%0SS
zs*-ue8<VG7&EyGjUA$R;VG#pMh|l8Z`YWP>KAtYn|F~A3J?@NgZO&Xvjw^C5^EWja
zNNPP2?l5xU3f8{7BqiDGp$bEtQu3ojb3vs;G0%-RH(l+%v^n#WZN^6%28juoK6Uo-
zQx-YCdhIpe-#c+h$X4f{_0Az5g=LH%Z}{NtE}NRU{6?i(mqF_JsIsr;({@E#*IK!L
zx%txaG;78eW<>_0u33Txi;lGIe-Rya_Q>KF&a=FxZPK@^JYw9quR_z|amkS>lR6Ss
zzU4d}+qmp&`@uDxtf!Zy2W?Mkt@y&paAZN2&D5J4I~4^bS0BsgEPl(M<z;lFQd;=1
zviFzZYDu{VPMeqc&zSRUR&Z*en|*=**}#gYPffem6-++S-K!&#pypTWvypw5%q0#s
z!~Jc+=LMQoR?j*6;&BAe|IAx@Dqnu|M!L*Y?e{)rE&BV(mubp>WSqDfRs^0nf4yc?
zMnd<b*gt=k9g7O_KAZUR8Q<lk@5aHdF<eJk3>PYDHG2Q35=}ntmT|Ddv`oam?!8u+
z?@FacPXjjA;v^~G{hvR4UwBp}SMBVh7JVhfj0;BhqXPt(8ATjSjW$&L?dg+Y4N`FJ
z`=feh%|*$5d0{1m2KUb&`r`7lHoa?E+9D<|HpSSTotCVtf+ijKv^L7PNBjNuP15&g
zoVnz0zxUq9yK^_2%<SykpSt6Jm&t;(Iq9dDJXoD{Caqzmrex5rpG(>MtU0dBTYPvk
zU%qet>dtzXhyQ{sUMyJgR<bKSOwszcm3q0H+_c+DH>RJl=!pNF#C3#6aQ@G1zW?jf
z_kXxz!DF^muKQU7yS~S=6jzZ2FAVyUC)|`irnZa8l0hl_s@)~SWV1O%8iw~WEVh?r
z-x5D+^)9A;roh#PLJ<SbG{s#fIt5ZpB#$qA{r&dw<HujGe*OKu-QNENyKiO{m;8Km
zb^HB)AMPG~`ZMe9w$rm@pFC0XyZWHb<4MQbRSvppzB^VWvHIUB>PvQL5jdpqu<mm~
z?e`BKw#d6Vl=Q~Wk(*!p|JB*=`tkGEhp#ObI9AA?lXUK=pxUx}gME#>XCim$3s06*
zDm=QUV1cRi?&>|&x8CNxlb4%U_S;eP(sIc(t~rbioJ`DauT-Ukj21q>{Pg4ef0xVa
ze_oy~uI@gSnPv6utJ~M_tF72mEA{>TyZolsM{njnYdf*Fn~~#NSnyU6g%CrNsHEC0
zcDrjnzMXyj{+#*q)=tmfZ`Zb2{NstTH(F&W9m|Zq%syN8`^%Q<s+t}Duk;^Z{`+m2
z#4$e;9{KzG<;(iFm-jE`2#A_<+efd!CpUQQ^wqAH_wB0r`nbP7a;^V$clGuAe|~#X
z;hSO9r{?secvrlyXy=4R9W^EGRR?CUanC+`IdAvN-vVxm3%+LgIy6}IC24hu99yvJ
zWABPpi6Tmg?ksZ-*KjSn7x<LZ({^|FX7T&`|2=#6>)o?oX`65U`}2MO{kmOtmR6B#
z#e72>dmNp1NA25b6Fo;K#yjhA+3z18@BUuCd->I0DL3Vsul@FIKPPQjCO30pZ%6s=
zkKf*P##-2x?|XM_WAT<x|LzDIH$A>xw`1}LkweV~-u+1Vy*R(WKd$Eg+ow-|ZhZOa
zSK9A~m&>m6=iZ$+Z_lf?<;R!rdtWw1Yv!v}Nz89{1s>Z_S)-(IH9^e9Elo{r;+K{A
z;@z7cZ=QDh-NO5M@^ahWx5~%wsc3$_Axl=VI#2)Hxx*Gbe>dEIo4z>CWsM=z(Ey(*
zU7@OSs|!yav5CAs`E=-&E00q0yM#iTJ3F%8=w0?W&ap~txBsSDu?m(ad97YcZE`*%
zA!jtpn1$!`O-tKv&y9urHrbxFN=%ALU`z4&QSWiNf9k&T9Dn_je;qs{{@7VFf_uuA
zLSf&=Bag1mRQYW3PFzH-aM6n;FSU+KdA$v^Of{KRw0yDRQo%_{y)#u!)IH9nZ9jQ&
z$}+yzlT-RPH|rNqc;v8rg0;S=W2oq+wiyjJ77u&7Z|*;Gl*i$!iz%C???H>R@-I_6
z4MQ)UP`%*u^@JabNl37c>$;0_X5Eg@|FaiuUcJ;O{Z&AiLYnW|?Ng?y-}G#qaLw$8
zW%zrAjdR4DJPZBr#5bjE-0;M?X4-v&A2wPO%-AORcr>_hEfVxPE*WKhKH|u}^)@=I
zCW?h$;`wbX{zf5=RXFEV8f&guvf)$DbI+Ro$*c)~r)H?^`tyu>tI3t1xOZn~&+yt}
zx_`0aQl<?x<p%H1>E7pTYETlG`6P<TsP~DoQE(e`&faI|m$4{^8L0}W^s6lW;rCV2
z-86T4Fz;@+cO_q5^sLg75>ZmI+10SAuK%UKL$9;F%jG^v$0uJNFxtIpnA==@&U)(U
z%BJ&fjcTktsx#F71mF3#V$#w33wY8wH%jlXyc)F4r^U;`hig{jW-<1v8R7}`vPzlp
z{JWm1rzZVZYFHr^eCLQ{f=|kk-prkscdmDT|DXBahQr$nrZvAQKl|b5@>C1$dIt-M
z7xQm_&HeZ0&!M2NugiJ<<SxJP+`xCUzQ)J%amUyHS(f^KQs&B4$rBV?COr9lXXcE5
zk!%GW`?AD*FIyya@oqI-+87q~NhdLWvKTAd2?6((iiC^{GOyjwHX1WI3T`lXR|Xp0
z*<LPhcX#jqit_C*c~)PZK7IQ7`2BUC&mMjHvMAD8ytwG+jT5`4@k}#Lai3)t!SPIH
zvbTw}l2p54QEvI~fBy=1-)3dlo^zc$>sH#=M}MBazPx$!)1sGioE=2D6__VwW`6Wx
zZ(r6V_Gspj4mX~A8(Cxy6?`wKuPy(+<L1}JS`!rIUs&F(ZV2Afqmy*jX8ZB8zuvs~
z^fl=w*P*bhoXw2$*YxkN|9N)zZU67H+c(Oap4gMn&oT9paN|sE6)}-Dof8~bd8c(B
zJ$-umbaj2du-8-br=Q)lDmy&%-p(8Do)dQ_txAmfet5(E{@u%$AAdag_0k2UhaYVJ
zEAZysuU9WWmP!0Jd@<$rxk#V*pPMH?4%gTB^Y6=kuYX?u{+`Osar1S<++TmaRmrcG
zkoBs_&~MdFpDQ-3Dg1mK8_V_><R05NJ$J&!_q*@rJ-=c3yn*L{*7F&bW~$cDA9)HM
zYG_I8OMc+Vx<!81zT5f7H^?vLHp<y`eE*)GmuH`zULO8^|G!V)4*%}oKHu(cT<rSm
zs~0a8omu31e~!sI$A^U%{@(byeS7%r<!7r3YpO~v{%GyYnfGn?zlD*lX|2A|mPLt2
zZtrei?pt3JHa|Y;svS$E#Bt%K<Y&r@L=PRGReQkt(ebBM-yg?EY+a|F^ICe5)Q2B0
zI==tCala((-;WnZHEJh(`0zn{VMuSq^NSm`*xUl|=6;)+EwOUl-*cr5NoglI`!_`B
z^i6x+wd&o<`dxSJ^UM$4-+k)(>RaZ%t#1~_nw_35eqUz2pQPpCW5rB*`jdpWZ<vvv
zwBWL!u+XHH%bv~Axjoyh*l*KQk9(EO2|QP3scQEvQmyyf)}|uz>`qy8{1crUjlb$L
zEavYl*;JbPs&skg#Mnct+cxJ_pU(N!$+{?Q)rw_pEUi59u@?-2gEAgXQE7LZWoL5r
z%d!8FU#4G)iGEQU*=xVR;>^vftPkhwX`MOprsKq!55}R~&;M*m+N;Fic_&cC;Nl(=
z%^AH@E(`j0ZJnqor8=qS?94OY&amejA4#&yP|xt+rnXu`%_QZ@kCT&bDxZ9PGjP(g
zOP`wc;sj<-?Qv2wc;=Nmd*Ww}OEVvboi=I6%!xcEqw07<!}-L!nffQ2IHM+fd(Opr
zPk3vVcZICO?Ct{B?;Bn|oqMvWLt<<1oYw&iZ1cM0Kh2KO<XbW^bMZ#!bskGH1&a<W
z>c5z(B~{;eZPv>xyQan8mo(((&*-$cEcE&7QJ$a)Qx2F+RBtq?%i!JG@VVGAY*FP>
zv3L(vY2m$|jNXr~Irutn>vZ){ogt{9xH770rOv9O0?T7qQVmQLi#?iNJbJ*mS+Dd@
z&ZAS=rkzuKCK~$wT9u@vAeFytC4b<VrY0?x1XfWKRW@grtr>YX8K1UB_Lu(i2sp%h
zc+KqfZysC?5jiuZ<Fa$pjfI9;kM2&{>U#10<&OEc*)GM}H}e?3^y1Im@@;n7jHDga
z$6A`4-978nv-jB?^ACG-=9IRoaWQ+&{qwa}|Ct~E%g=aRe`Hbe<VM52&fTwT^GY^0
ztJt0SxAXaa`MS!RR=<tjYM02*X}WP)zrSq5+JAEy-rmXmBI$2gv`)oCAW%@a^^4CO
z0mhUE6XvXcy=+~L*V2j0q??|;PN=+?Q<d-E<}h1qe&&rd1_y49#V0Sk-&StE{r&yg
zKVRQo{d%{|++nj--N#>NPn+M5*|#S`e(kMoCzrfRU;nH`N=-T`&)D(u9apat=dQoK
z@V)l_+g*E0ZIiY}$ozc9al`t$$=62}l_i@pvRIgw9;r18?wn-1RIX!5TFQ$^t4T9C
zJOx!(*x1?b{r%(Rb$<DE@8rt<N+{_t@qh68$a4COr1ljh=H&J6>F=H})zoh1&VRo<
z_H{USPL^8HwumdoS07!q()=%9h>)4N7>}UQ=2_FCGXg?Q9z--MJ!_RJ;(k8s)b-NP
z?PnL2h8`_EFT`D&%+476bnfa1sjRSb<`u>D`(OXMY<I^#*FNrUj$ApDn%QAr0f~jH
zS9PBj|Nl$B{@>~E_t#GM=DxJ0`|HJvMH_R(*RHoUJl5q{#wPexw0CXCvQ-k^RtFZX
zms{Di?B#7g7m=B<0yp1&@3Y)ia(rokO!S=MmCToX4ta2}oa0gHd3>StZr%R-Uk}$!
z$rIUCyT{Vrvi{@Y)sGi{zWOO+Z_NKcm+e9OQXf9P`?fj%c2#ZJm3Ps<etcj3c51SC
zo4~h&!Fw&@wz;p(pR=fQgV*hH-kqhPma3ICRe!!DCVM>>7k|C@@~%9|Ia(_!4z(?C
z%MlPiGJnRpjDx<L4Fwi8ewch{-j5Y=I{p5Y_AO`mZpzP}|Id=;db!mX`H61NTV$(L
z0%e$2wCt1bpLX0`U48dkgD*`JEZClG$eryPy!qgb)<WaFce`xgT)4aab?WVPF_%|t
zI}u|jm~rTcN}=TD-*Tt3IE=R_+I^T|DJZEcYClV&qp_*g<@ChB%{xPr)kBox+&{`W
za6i#wvQY~;*OKaH)6?On{QCWTZVqALI<rT|OIv*Px-9CJhDCMs7I;4N-@184;eta-
zmtXzq_J}Dn^@~3Hrs(|C?JLhHKYY$>ByOR4cayHUTFToC%xld=FHd>6ug1J1ctPGY
zuDNUHtKZ(G7Lwq2KYiA|n;{#sm$iJ<ViwB%ASga%hD(@)s99@ma`pyUBb~`z@~RbA
zAN@bo6QJ#Qz=lJqNsO&+rspQ{$0F;uMkXko6RT)wp10QelB1xlw~%zqBndl1y<mr!
z%dds^Gj++F_%t`AJHkddg~>Ke=--+9Yb0ZWKW*g{3Qv3d(%{M)&u%XFO=lIHUotmb
zk(=vYVYFq-<AzF+vU`0K;+T8CUQA_oF|#c8S>E=HIor<f%aN99t?64_RBdON3vdX9
zg{D7~JAMD%nYB3ypOr5ES^co3Y*uY<hC-%v+fH7d&7rD|`5ZGGSz8=WxEwekv&xx^
zL*~2e$4>zU!J8YcKgXZSjz5)cD#fX~H~w;mKG!0R&g=YKQ{HUjRP`v1s+4PZHt)rk
z!gG<oDqmJt_AK*lPxBL>q}pqL{<~_-?0wH#er~z*Ywj!AQx7tN7az&D3Jc=D`J8V?
z&ZNyR_k|@aKY3YVwxh|)y&fx$wk}P3Cpr1dzqt!<Y5wAF*H}|OYjc0u4$*o4^!^=V
z_`fE;>{pe*{{vT9`CoCSuT7j$X!d!L?VMQ2`tCcGAO4<Ktzl*e6x^yHb|mQZnS(}x
zyfZph?^>mOe2UBRNx$7Tq%SykvvW?!q>8o2KXnOh{JbJ1SwQmF*~e#>`@gTN{r_1%
z+`N45$45WjJT<rf^W)Rm*O$A$7DZOdpJfP<u>RZ~a`VW;KV0I!MHj7lw6aL<=H&G!
z!;jUmxZR&KbJO=T=O!jAn5h_C5<Jo)(eSv?&U^F3L`Tm+6-famL1u|biZ{x3*ZzO`
z_<r5LpU=O`%bV>syv7xIadX1GrnC&Mz&)F4zMDNS-FfY5VE&JXX`65IEr0#%=*v$j
zPqX=rviikrn%re%pK@#yYnOO7&qtwe*UE*8i)Vh@wfp|vbz$kxkGI@g{P~TRLl>vM
z^0uJFM>BXBTMkrTzY@DUN>^V$&LTek{yke;ySKm3O*n92SyK_qp{kt~pC5n!|GWP0
zdHa1;k(=L_&t(V;eH&AfeS3=Rk{=aUGM+LUyD|Cr-+B?`n#O)Fe5!m_P|9=O1c|nL
zI#HoE)x|3x&6_b}g@Wp$bvayGT8dv{)rHO+$>GWRd+%Osy@l=WKi`g)=kNRfucWr7
zu4>C4^Z(o9|NYxg{bJ(ryBq#*`2Y3Y-0$-y_!iuh*>6#MbIrA0T^Zxa(K`M&!e7iO
z=Wcp?BY(a9y?gigPG9}_^5tvS>cF4I$CSCZ&NR<F8^*!CMR}R%<w<kWe=jM^zaYtz
zRnEryU{$8S-nsr8J8SPN%!`|2%J6S`{PDW~7k^LOYx(>Yo8^ZKlUW?U9bf+3zyG)4
z9qabO@K<}H*NUxnjjrZjm$h@{g1cp3FaLe_^5efBPj-CcZWB8rEODYk&BFGDvf*Nh
z`-^YbF5NZb#8PqT3kRBJbR89pKggmpqc1B-D$r7@xHy3!DYNhVslQWJUVT(>?6CTk
znjQb=vi}sHwrGZ7^UG6CC&TS+pXV4Er1)oh-F~)n{#CU#j?-3El$k^*UC2C^nv!UA
ztN+tp&dEh`@@qp{j&^)G{W`*nFT;Aps;x2M^#@L9&Z(Lm&&;RikoNiA?LYml^}b%d
zO_>kBs9V<rUAe4x<kB9Kn@+(ZJi+pgYzKX^>%Gm&=6LqEmvo)vZ`$1JP_=A|s~4Ar
z8)L;Oo`{Vz9xztw`72EGTXko9Z%VLrgv-8&FUC<TCpvj~2x=dZnK8XxG0EtZ#EFBm
z1y7qxAKX;@;@O=A#?BYTZ(O(%x+3$DfBx+~tIlcjdU=R#y5rOGRBGz`oTufNKK+hz
zT$nV6GksI-J(b#j6E{{yDk@wu`E`~zVPmeV>0-gpbDf@V)hH;q>!L01F@ya?+Oan!
z>rFZy?O-g*(dZUe3UzZ#WmFY#Qxs-$PIYR3@o4pQi|<_@BI^&gEOj@ST$3tbJo!k*
z<;+*+q8iHkv^W;H6=;iT96Mv2vHAQ1nN@wP$<4vho0opyx>7T<Cj96N^GSg!AEV~H
zPo5ja^XvHXkU8%Uo>4hmqGQ#{=hC$2*Y8Er>h&f3Z0DYTe&J%NZ@;&sSf%IMrY{O7
zu9X~}^Qp9~;nYU^XU0EeAIkB+-_rJ<#Y#SX%K6HyuLl_If48emNGUX{wR=?m?(Y9X
zC%;4*Rs8$=_)F#-!3k;qpI@)bY@Yd~*COA+=eU{227!l`R>{ww*SiYJPF%13z9VhH
zWywj}p<1@}tj{_B@bo*IFe!>`*eL%Eydb0I*VpOa-`n4@wdvhwTlw?X+1c`SH9voS
zeU%Yv-go1KYP05<AV!YQr~R`ebwaEem=nBfzpyyP2o|}nTefO#`0;eD6DQtieEZI)
zSW<hxREsgBh=cWWFO#9xu_K&a)AjEP{MMWxP#|&acKh!6_x}Cr-~N8xJGt+RXB28q
zI~^bOt0$pLJ~biK@7{*VWtMv3O?k#`yYq^-zRhRbD<hW9eXRZN*SpW^u9tf}(RMf7
zsv*dH+>K#nR8Yv}ykia$f{acQ@8sU!x_X#f__N%}r;&}z-datYrNUD5M4Q?53&We6
zvTLsi<pf^;?s0^VM`h_|0l|ZXSFbG>Pfr(5Uww6x(zi2r6+TM#rizF8^;PT@xoBf8
zc4cxSqoSd4Reb0=sgw{_=_C#>Q9;*(;iaL>!iGh6770vj@)S5DspRD5a7e<dphF-<
zB6Qt*`@dhO@7EVw{XTAwUDUSKkGWQFJpA`V-HrA<;~X>o-5&d|ElAiBvBvIRZI#5l
zx)%G`Lr+)5?%tI(d)7_M{Yrc8+w9x@GsS4lIlupV;{Gi^w*OO2-!`MT6&ted?5y6v
z`l@xNdxqzA2l>V_F6+aL1&cSNUSEHWVb3!6Mp@al{&SZbzOk=Uy1I7NhRdsz(#%{1
zC7t(2tqFJUK6}~uOU9x_$=hReYM##deq-C~$%~C^1>coOy#Ln!x9;<mUt+T1^Nun6
z{lH@2&=i_>)Fgbq%S4M`t-=?YFIYY*nl0k{w7+n(p1?7WIokEsKMN)$^efzCS$k4G
z>s!u_|LR=Fmj7LFHvf0nDfj29dguK*cB)L!(pF414-ws@d2QK7HP`DVcim4ISt__Y
zl|J+8+~0R3Jj~0F@3X7<;rF_yH*P+@V`kc_37QKYnOr@4Z3f2&*Qt6pFZim9S{}4|
zJl&mxY0}-c%g;B>+0Ny~A+)<)N$TNFL+el#hJ~JvoDZKrulRG|T-werlNX0Hd0I?d
zJ-K5>U_*kMn}n=*>$#cw^<iG;`Ml(ppZ1ZuXP9TY{7Sr(-Q?$OjSZ7Nak4lhiP=id
znv>c2Z~B3AFRepnL`QHK<SK~<3t4+#vnpSdE`8MGN6@}g0e3#Cw3?gxIuvftSQPtY
z-O^8T2F@zF&z6d0x3!d<E${r%;p->VD!G$6y?Q?9!Q|o~wKLOPCeNE~x3w#5#jTgB
z{uixh_+EM%CcKn+u8sq1W2<mS$m#PIU5};S??1MLYl%mP(}IjA4xKGN0+JUM-7fPq
z2OjHq)-*3=;p}UymtR-~A5%VOUUkQGt-9)@1CPEL7R^gC&og`MTXuV<NnlU2#nP(H
zo96AGp;@E9@h6YTGRd<F-!e0Gx;?^mEH1p)jh-6Xy(#X|@8JByTfXS*cAZfev42@{
zdd=dqH@7!zmd^fib(z%0^9gU*p6#=`^Pm4&_Wl;O-|2NrIXfyJ9NEbH?9b+Vt2#G+
zwm#zXyXx`P&2tKO@;{kZa`@+Bd9}k=b53SWahh|!CvEet%ZHvFWmzsXvr6_$aka=I
z#finGmqWIuYi;`Hqg(YYdG2>j{{u6A6o{~}N*>$ydi(C<zkkQq|NHrTzJ1L79Jj*L
zm&5PZ{6D<?e*N!9PoHM(^=X%UYJI9?@yumEUl#xVQzSV1Of1)L2E)oARee3N<pJw%
z_kQ-U=W9_&^pTx);LQw9%})O$H%^gLeG(INO0R2A5^CzqG4p?KSM%fe`uFSd<iA}k
z<8L|g(y-%vvmM*Sn)z=xzuZ1;*~Q7iZrASCRor=J?yT>>JFzAz=<C04f3N<%d$>$`
z!iu!#mMs_Fx(h0Xdz}m|?Ww5|^_*VKq$2I~|AuYuQt|T-<}g()YHFA<`5339WC*js
z=FbT?-v5@(ZLekLn)=ykNt%*kRdi<rWAe}G<>x{s*4{N~du)*PhTUX6>o0L#-pDo&
zUH6Zfi#a@=Xtnog9S9Sf+mi5Cps6MCO?x2+n}EygZIKdJ&RHJk6y#u7^}=vb#Q}!S
zx$9?bTl;(2s;o3KVdvZYxBJcC>#aMlcdk#oyVU#aubizGkyrdfx4(Cj@MUZ?bvUxW
zr2p>O!)Fiw`?qoT?|*0c*X!@EF8sG@o%p^e%lN}5`oD+sm)BjatFQbQKkaL|X}o}p
za+p-={w-x)si7<Zv$v%27CHZ$_>=KRV50Ul*1Hz>&CXWZw-mB8N4@>h{{EkS_tRgQ
zzdtLOyF@QK(069V?bilhwneP#pEmn!(dR9!rb(-AugcSQ78UpqqWgO5oG*$ttemkg
zw(U;*$dWd-IF|kP+qowKs@`mNn!9_l#<54i8-&hqTr7^77QwSb(?zY1Wom*@iHx(+
zlNDl}ro!(g*Tq}Rm=@eHpa1)1;~(BvZ(lyO<;Ryv0tVWdXKaq#tdo20+cR08Gr-&M
zuS?>5k$F7&I<}4*xf-}u+S=AW`Wxk{YQw%tnK85Kiu*PGg=%Y5Q{UJgnscL5b3?+)
z560j4&UDvyH+|O0vg|&4taqAY+=^rS!YBN>`zTSgw#M>EPv<68`$f6SCaPK$7jcFd
zc0WirS<AWNdrb4;%92ANM~!yt$4?RAnr&)a&!sG!&i38%yb+_@C40l~!qNXWO+F)W
zd7;k}8S8I8eKXE_TsfvtJ3-N9)9Po1ViSYZ6esFF->;_<U1PVc!DR1Rt}gY$np06n
z(p0v(=r*lvQ;Jo8)_RV822U4v>xqIxm!5_!`L^lFgcquvr)_7Hi;H+&WprC|NAc~=
zcEiQ*7Mj>TNysgFl3}P2*;YPnyW%<9tuypDe_OiKAXKaQS)PpPQ3)xxuC;wEDPA6J
zT~Zv(Ng<ad8WrcQb-0)!a<-^dXUVeQ)w$;kruc7E)30fYcU_a9boZFKuvE~k27yM+
z4-dBq@I7JCDc|k1;=EB-p}<pzPR=E7^jEmK)zw(5PS4%5Y595`xmgJ<X33%HOg|O%
z3$Ilr8kCyXTiy@RTerE3dv<WMqP9lahQzbns@jW~cl@7lQtqv{+V6Vy1@{t;?K^UQ
zrG>%N<oZnWKayKN_eXa<3s|)?uJ(cPpHJr27cUp|nRv};tNecNN?7tLsiXrgZ#S1X
zG3i^z|J?QI-qEFh7^5@f4q7Z}QdQ?>Vk-Ef>uex-JnwTwb@|tiuMW4b{{DWw{QRnL
z+1k&KzMeL}zvtJt*_%H<P7Aa?-f7>qc{#(Q=dmZ_<DdEbi%>{sIB_h&rn-&6LqwW`
z(bv_<WYVV3Cxe7+r=B$N-aN@?(TqwbhXAI#Z@-6czi$8c*So)0|6aE1Fihe9_(snp
zQ;v<rfh~vONSoA5X$Ow(ZB|j0@3qdaxYfV=?B%@u_iZgQ?P_=JuB^E8uBssP=>(hP
zcLJ;zJQ!LeuJsm&aj<o@Oqu)dci!&}^|#vgg%_T>7olvU{g*+rc&{K&OsL|j^{ckU
z@*Lh#yGyD`fk9aFSX;;{ZT7am3LRVG*7b>Nx93P1+Qs#@CtCB&Ek9@Z{NnjfO%s+K
z+f%0EQnW@<!ojB0_EOT$7o~4pcPH+Cz3Xq@{`;i~E=Nv3&Dt3e(l<A~Ohj`g+cCXo
zdJJbcP89pr?Y%bn>g8Rzx8L6At(7m?e)oRej(;ydCB6L_dj0*ndF$Sne{b&XJkYlC
zG^6a~RgHoNAHRFI{{Iboo4a@K-L3n6_wmV(Kc997yV$>b@$FQZ|LMhFXJ7mK;^Ca!
z+-puX28&m9b}=bVQvRf6y2AN$NA@J%OKfjueUN&2>qH29OJjkFtj~;#y0<=GpMJKm
z{NIk++r963=P`WaU=VoJ_x96QZsor}{{5}9SJ?41FjPNsZr&j_Rvr<V>u;ml0>U3L
zgzPfxQM@wy?uOg;9evYw&wkY0r(CJ!d#w2C@s61mixxX1cV3ln&^`4n@rvP*W&xM0
zY2VvUObIkF?fH<-{9{*9e*END_1`P21h%Vs@h(~V&8$5CaEkie6>oR0<=(==@OW2c
z)4~N)ccwWB9?{@DR-U+O;_(=rIXf2qRPguyqH)x@W;c^x>iblUb2}uw_xxGW8vIdO
zW{2k5p0u=(O%oOva@j02KcZ9UDzGf-j?Cv-e2qd&&vs92R-5rM=)+OhUp<^Nez+Pu
z+^4hqW~Q2B>Z^un2VSQv>c|TD`0DUNt$8=U|GM)cd!>*<eg4G97OFF5UNknJnm>El
z%GtNBzp+v3cl>^M$J<}A*Wd2i$M5KJ<yqV03QO+?GG{(B2rV(wI@>=%L#<Ki%<B>^
zW2<Z7qG>L>VrMHA1igKzy}meWiI?^AFI5ryv}Awmy2PioYlD{oL%OWmOXrDs$4y#~
zB-mLzsQH_C^x*Q~ewR;m&AxFBISiFz%4d!obm}?EEbl&Nzv(&YRhlPxW^P*La8a^l
ziKF1mh*K%^zf><+@ih9{^vkK)O~?J;3C~u0bLWehsZGYYP0#p!_q?2z&h~J+r)K;1
zcR&0CcL}S%TA`&hD`U;fjt!byjq011Z3rlNB;C|_Y(kToPKnJEfo1C+)wDZUyfK&^
zwCG3Zr9US7&Ys@MW2YQ^?!e-w%#1ATI}03KzTZB)_itn7zw%>W>e>73Y-4h|n3Zj`
z8IG^NacBRUzvr8GoPYk}=jN3hu2$(W%bMOb-+wtXtMB~pO9|8FYoyOt&1jh6I(b{4
zqsue4xieSJm-)SFUG{q!;rg<Fk9el#Z{BtCgoejj%M3QFTNXKT@^X3W>}r2}`S<tu
z?c3^x_1_-NJ{^94U(MfVZ;t-_R1p~}Qyjp-(4)Gsx<Rw4^79kNzVDS5No<p=mjx|V
zb24FRZ}3|v)@!U}eg2+t@I>zp&2?hDmoKJx-gR-3`2FwMzh}?4gT|3QzRR@MlGr5s
zWI^I0s|c-XV-=@xL0!%z*9|zB7&a8=K7PF|tj(}z%gb%M6L(90dwi^R*FBru^1~}u
zFr_eb@woJ;>`XngXn{r|&tnEP&27gy7?hH?*Z->c`tN;yebT~wzxax89)StlgeJ}2
zv;4%9n7i9vKTa;Y`uK3!#+Z3?=h}XAHs^DxDJ=W;=*^1T{QLL)HDQv|z2<&))v{B+
zXT?5^QO)X$Z0dPz&}S>?W3cGwzgC6@8NpR67aO!Ja5`|}PQlhospdWYw|&juncrO|
z&&G7`o!M;P+t+*yn6A9K$>18on3ZQ!&0x~|tN1A2t)p2RV@ylEzci#QOXS%7`Zj<4
z-<7xC%bCqS8~^`fet7xyy<fW8Tw1FZcD~9<IFN97$NpV)`|rP%+r}?0KK=K<-IK3Y
zRaETSRmBu(Ve@YLIpa%{ZY;HMP}{RwOF?m=veNs2v(kD8t}T&$6qMN*!Tk1EMB6N#
zw+&BxdMmd^?OK)h?QxdGd*=$JSDm{A*qEQMuBxrsazFoMF5?07ZPIdeKfiqUzfkd&
z*EHqcjM-0Ly*ROc`Xft;9>-tHm;cV&Z+=wsYLb9-T(Q*G1`Q{Vo|UsWlj{#!ywXt1
z^b~fAkxP%TO1mi+c$t&6J@b}6@Av)RJM2TE-xxF)UuJl*;dB|F%<Q|7O0L$7`HyPy
z5*#kG7(Bl<{rOJO6mOQgq@e$w&pNnHj$2_S&%69ItKYJTZ$yvpsct!vdt}pG1D%j;
zn+A_FxeUhBy%sGLRk`rR-0M+UoHwh%$&)+1LU}ccRxqqFd48kDxcl3kko@@K^J#9G
z%cegO<m8tsGd`<6SMlUIr`99W61dD%mH4s?6f|Ex6*Au_r(^q#@t}<3(;aJ9JzW&(
z&g5`8?4I&Y!wB=9+3TwB<j9-dKK{1ue)%G+pYjuyZP#6@@^Q^9o(_Y%xmzV}<u<uK
zJI(1AE~a74efi7E&TyeKH)CdqdbqKw9rj*6MUipE)7%-6f^SSkx2W3sDgS#`aJJ8Q
zW0}!EXHBE6hc|6~nd}m>tfIr*Tl>C-&(wpT6&D)(D){tV=0h5f`sWLOf+dPix(Ldw
z51ggmm1gFs;_+zjk~do>s=C$5B%JJ0eVcH3VVOqk>a+he_8v(!Iwe?Zty5_$VWysG
z{V~G(NLm)-;fh-Gtuf!^zTetu6IGy+bj0TllTVf7uN7IFe!I?3F8eS0;<UlhCEbUA
z&;0r%!Ln+_W${~=p2x|}3FOiE)-7X`$}3oZcs}!f#|Ovtv#dGdKRiD(wPk%<=Kh}h
zn*4=xY@04T)AXDj*!QXS-R2`+mO}fqY;Q6;p5QxlV}ilw%T`NS%r5fnYdd`Z?lTo;
z?ORii#jIIc)GqLUg3JE-|2sY(IQM4tvX$D;D}J3kTD528U90{7x2%YqaQ(f4k`dqU
zyU#v;yxjg>{@&jGJNLcabwS5ZU+?~&im%W9JSy557wUV|ZsU)67Zrk{#1pFG4i&f8
zYf2q|ZtggHtDoP;&0ly74V;)%^4G27J>W7^*4qB3)8p!C-gR7y>iRP}9CK@GcK!Z&
zJ^tUf&;IiH?{@{VMr>AO@4i;Lz`w(gdE)9>;>(ZUm(!PDC%@kAZXd&{oNE?Q=6xGq
zO`Ezk??%x7D{tTD9ozkT-|c?2H8Z=OW_f**@C%-}vbB&W(IE8OksOza-U^8?-o3Z`
z`*-^Omkm}?pMCFM@wsTsr7Gj``SPZ$+mAPYe!RKZrTpWgM;XOs_xWe9e!VRB-=F37
ze}A04xxC-J+`N3Ypm^QJRbr>JE{1F^HWjLV{(4uJ<x&B+=MoK)nsyGRMu!E?o-wXU
z6Q11Y&Jf7ED(A?XTdShBgoaLZc(L?#O2L8O2W@Q(BJO`byXfORK_zA-OY=6?jqC}}
zS1rquS*%$ZZyDa1asT@wriQe`W##|B&;R8wfB&m<eQE99x8It$=1h<foH!-%?#4@u
z6Lm8FZ`u9&emVc`+sBX3etdM}yJP<{Eu!WantV5pzR<QX_tqWex!!J%vZa5=Gulch
zo3VLcm{{Pt!z^OLZuhD;?aQQ()yWuF3Z6Z2W`@7mo2+`fw%S<RZ&xqxt2UCBliOZv
z&%m^iX~v6xkH6}NUzhjTS=e1#yl0)ibd>yBvC!I(%7izK7q6K4&(5=7b?M37*^95}
zJi0O2+Egn~&(uY5MFz)0c^#DvbCwjFRCztRI5o#sNccr(0sm+F|C;A*?cyKR?&RGQ
zBUkkGGRyHf8mv2CsB5PS+%7ZqoR<-KY58-X%|&U>jz5-q`X*;rmrPjg`^IWUv;Nu&
zlL?oP*(JHFoer#7DUvQ4BRGGuT<lMktvx%#O;s*&gg2_#radm@dwh18)Lg4c&l=V*
z$$hkC@$nZg`=)sJ-poEdH|6ELf{O6jvU`u3b_FwM=kfXZT=`^`ocH;Hj!$CJgU!e8
zJULU$+PT@PdCED<g{_}$L^n7s6cSbp>AQ7H|Ms(rofc90;oW5yw}gfYTAIo<<?XJ!
zcrEm4=S;I5?{^0(<;kRp{rfbXQ^n<?uA&=<)X}?JLN${QY;L<#6!UCF@sUn#fmw{t
z`)>a!RCJozuzZTYiT6KC2WRC=E)pk}CQrEZXu;;xIS&{5byRMe&-XPZr)I%=e}(;?
z1*cDKJa)Z_C*9TRWAzoLR=)*TRs8Cg-9LYI_oRXo@0aHMo~JQ+)_0#JTAgpF@8UCH
zUZW(=ea4_PSAEKqLtDaTFL}{)TvzHf&($ekuFcss$H@MC+wn_l1CO}7ERM8npDM>Z
zciyhFEUr~6G&6Hrx+Hol4ZL|Wc9xvorZ;n@u>8hI(-W=t$`bw`V9u|+ch1i~)cfJ&
zm8-JtDw6XQla$noHh!FE>DIMLYxdvo`|~gURG#&BPvp1Hp>O54{C(1X<W57q#iPT<
zj6(MNH~cl^_mq6X-}Y;!Laa?u*@w#pI<AM)&i8a2U7Ei0lY#oB<c~R7GgN|IxD|O8
ze{noM;k4NN@JE6RiuYyBx)nI5)JRB&Q`741-i-oVJ_Y76mhjYWzn_!0KHqN7|1S>@
zUp;)7?ZVtSpzV5Rk3N0<c=F}6ogeOPJlqnYdBx;UPX1@sPoFktnAQn)R28hZo3MKk
ze{f@uO@)H-(i^Pqn@{UcyEjo~f8Z^bto`?6WAFa?_3m){>)*NdS$Tgp>95JUppm>n
zcIU16@q2#g|9`3f|7FhK>;J#|->of|b~BVeCcHs>$!{(*?{CLNOZ(&x{jJ-4Kex@`
z$P2s9R|%__*14&fnV7mVwY=bD<7G<Vcp1mLC@m@R#qW3T9v-}W^^ku2ym_HtCj~MH
zO9(J9S~C59{JDAVYO&K#U(Q@VJAZ%m=G$-Y-@AMNZQJF~pY8v@)X&$uwNYN{dB%kc
zy@$6oC-lsFw8i(_Z?_}I5_|SNap5~zD*H6pt1z2c%%_QKaVMKVf{NOzJ>Tys9$s-+
z;F}}wmgx^SzFzfrU+lKfr?wlLJvdYKy*$2Nym<5H&!<f`ib9qKJjoLjRxaaAeAAwH
zJGTG&A}cR}U`JzywiLgOEv!p69q3lppValv{C1&)+4g<83oKXphq-gd|16Z)eRtp5
zXnzsL)f-zDzPT_%;^fgQ`GrOTYm_(NT*DZhQ}*ui*}Bc&_hsEItP|6iq#mWb^-TAw
zsL-qHR+$~%A$d6M&G$qT&ZZ@YH%K4nYxH?2_~PBl$dKdRufCY>wYpZ;tKHtwklcTJ
zdEI_)t%)%)Tvn2uxkYS`dP0MCtvzr>rQ=8#XX~NKr{~SN5!)%1smjr|qVmZ6oX0<A
z_kTLCWPAS9zfb8x=>lh6oEa6Aj+HPJ=H)N{wEq09U4?!hdN$37npsw`ipwUXkIRHF
zjq~=;RkydOd_S}7v3P^QKhHT1Z!Ircyqs#dZDPB`5lazy_4{UL1zMz>XPi`CYgRk^
zBA>cuO67v`+YjfrS2jjo;^SDZb)_==b>e}q+xx@14{0S`i_A^?H`CYqV_wHbGl>ft
z+0TzY&W@DS>x?`7ta?VuDxtYELhmauC|Z8%T|d2aZ`SR_N0WAD+}^crPkLE{2#;9q
z)hkoW=gsUCn0H{8-jlA)f0%@PtXq$G9+eC@&d09xyVv`ri*?f*X8+bMcDox-9{8V|
zP$9gfBEvP;&X3tDa%S3*-x^<&{xSGH{4!I@RQm5S;UAloA`Z{|7GETuHbY3*IJNlL
z(Gx#3&MaASv-S7P^+%umwXbA0t`*Iiuv$4o&cb6)>WS~g-P|Suof27FSf=Ti-?dcP
z!jR9IcJTI@idnu>GN#N|nYrGDeMZ4)#_N`MSsFj7Ue9scujFFHbfxF^r4Z4kG_?hW
z&%%9fnFSW+>{4SeXiPk5p82elF|g|BY33u#d_FgZ*e}nME$B@Ov0puR<Mqpi)n)fA
z-^aeqey{yLhDG3cr0-pMqx-k*UoPH$K4JR*DW1y>{yY!e+AcQxaO1@{-woynH@rCg
zG{Ip0lS01ZGnd}k8e_Z6$VunAOR%ca<xMfnE*(C<Gp>4F|L|p9Agko@jq=A7^=^Dk
z_&e?OFOG?|q1$~Q%LcbSOIhJ^^UlL3$NN*KTs)@cy#M~4`*HELJF32aJAC!)-+hyh
zzIyWL)9mZ{_vY;15wj*N^zBXe;@!5Nf7KrBnJuu!$~8P~we6ojf-FDYe9(OVWRJoU
z5i4c4b`fsAjVGo_YWV#$>Pg{9NaFHXl(=L6?!RB}?k<;K|K9xWips>UlgU<lLmw&6
zWSF^QQP$0M{r>eo9=6;6f4bZI+U~_y51E%Mt_TzI3($A8;y-)n<E_Wc8a%fX`IGsz
zSOkjAP25h3$e3@OWviRJd8W8g-t3tdT^yMw^{=gMK6tpUxU~N6wM$mQvTX)Dj%QB%
zUbaf?UiiGl@2~dEe6_5wrlxQo&%;O0#l!h;zu))!-_@t5kCi9N9JXk4%v+yl_vh2M
zl%1<;UEg%bC<$91=d&>gy5pjDes7eCaIjK+WPt4Ph4Q}?c}zoJYKl8e5ICfv#m2P2
z@z=)JZVW4B9xhr{^*+kfob7J;VF?%B-;R6i|Gl%0ke_Srd-e^3hSy3Kk-4*zv=)E+
zYtW}G$k>>)q*y7iWb%tQllldHWxlToWRNYLvQNG(_k;WCqqDTXO>It4Ta?^kt~PZ-
zO0`zQ(HT#><T84fi5s+gtxA;mnt1!~WBKj&e|HH^dAjS{)<8MGnKL>j{}tDMFLh+;
z_tye)zKJrw49Xm>rZ%j!_GAoL%k8}Vtvu_ueQm4$KK%2h!{r#`x7`MOSx#wFVr4IG
zK9`yAI>q$CmnZ|{!aXxqxMXiy;TqDn?EB9*6B;d6x-u<G-Eh|AY@%71v`umH|6@n;
z+t?YUXTSdR^i0oeU9}>Y!fW<lp5F+0H)Vg_={q|++)vsyrFg1UtP7NX_xAKYy|vdj
zEYM0@xcNy;pR_|@abMc^zE3}T^LtF*UDjDV>x|qARUZ*u7M_@htBWpX@=mJQe9~{u
zEHk4Y<!aL<WFA?}FL!c^bB*M@q?Q%cxyDcJV$AAa{~y0SKTpN8&p19L^z50t$)``)
zYOh{ZZo)9hSSIL6+|K9!>uva-pW(4oVPJakE^2d8|6Ot8tG^SvXS=7FN<BESWeUU9
z*vOqOJ7&NBRJ)&lv9`|1|H&OuP5!?O0(q8NEB@HB{Kl4_Vz+;@39tqp-PN7kxwyzI
z!#=j%#d|u#mc|=P7=$J{X>E}dJ-R~2-PKaB>rzJYAD&4PD|LJ{-idma_<G-FDYP!`
zGI`bgTVmG>&BmM0>}FJbe&T&ZajAYb&r<i^%?3(AQ?9k_Zokm(<Eaq-tA64Zg98b#
zzbcq~;5i{7Hv3q~hVY$UZFP<FGQXV3{ozy6pTE1xU;>ZcD#nRseg{4uEH9JxUbK@x
zL}X@DkH&`WwMRl<R5tv+!F(Xa=ELh90u|Ch-xeH?;eGYn;)1ZGSf1R~2~{~V4`MHy
z>4jY^?AT#{@wmzK-p-19liu{N{;{lO-W|(CdBzvtD_#U;#AmJElgBH=&baT$WRu+g
z0vx}s4bO%e9i5y~{lB)omT!;O2W6)#OM0%Q2L6Z-4V;=9Dv><p+f3($8<MW=U$W!B
zYTxbo>mPhqeYroPe*RXQNsEmGzZtV#<~;xMfmGIe;ogTUcbQbpsZ2;dIc?5NF80Y2
zCQa%vzPGy2Pxy_y%ag_TI0Pphlx>+gapui!rMFFDFYi2Obx!BfM`;yDC#B$5n?hQZ
zniU0(*xzNHwCAY(TUjOHE+(~-ZELTe2A%lA|K5II{r{K$5<5fY&D+1L=I_y)N1y&F
zi0gIVW#HkIxW?d%hW+OT`IzEJgX(;V12MtVs`zfIb3Obw;Z*Z+g=Oqjic9v#F8ZZf
zliBXV!IUF-<7U~<9~Gsg)y4Z)tYy$*bbIx5lEcTppQ@RfpDvjp<CoU{`E$2;{2rTm
z=R97?PEc;;;@J_jOYp`_IsXfSJI-Bw{Z=UK`?ff4Yu^Qbo*c4YwU$Fmj76)V$Ek>+
z=ftrWPBYJy*-bp$Eh6IRwM)2V*V=q}yWTH+Z@(3O{jeiSCrmqi;i_fo>eHuB*Z1#x
zrraF6y*g&C`0ZU<CbN@FB<IhOpMUTETsc3twpqEKKkT?urmJwXth~JV@4{7Xx8}SK
zUwX-5g-B*H2WO*2Q4zxttK&*sQ-#&fgw3$teLJUYHgAW>(raxp<)ydRY`^{cYSG4+
zsM8({Cp5%3Pj8awFG)O5xI1>ec=y+&o$GFgxomu~>Xt9RUwmZPGKZ%NE=Em%Tx7O#
zSxkOeZ>d<hb+kF}f*v<7=}m7AMBCr=Z`A0%s#srbWc<r;<?(F`85|zJD%~4pe)?p^
zvISpeF5Q;9HSiSwb&lFTKM6y&A6*t{PHarA9Zj=K|Gs<m%>Lh><M;oT{M~UlDT{yZ
zO0{b%4srWznLn3#f|F}UhD*R?v(%L<6%;oFT>pOUtxe#?>lF>tT4&z=9eVtI`KnhT
zU(asQl1;w5<yukDk{`<2lVaFvI$xBgtaS`La3mt#R#PoXu{j}X&$=m-GOYdHF*C@q
zKFm*%Pf=U?CZl$l;(-~Ty-X^uxy>lwwOH|oqGXV$=$%axeK*&X+_#_j!PEE6@~@qf
zBz-D9HqLfX;R-fY3Yo!EZs}KB(C{#!@^q7ik57+}lh6}QMbA^*aw;cOY&l(KY%E@P
z{l;b$aWxgwKA!sdDHr#y@eI0>?UVA-cWQ^u%`MxP^Z&}yDQFNhc$WE-S2Hc_T+C$6
z1|glPEN`Yda&R4B)OuklE3tOcq<@QeH0>VhafCc}>EoO`UnzHs)|N|NnM+M3ss%+c
zYO!7M`gwNt_7$bqHWl11ePbNX8tmWb&?r8u%HYKQs5xI-{Ekh|-}HXs-a}j$_Ad_n
z;(KP%lrI7bX9}KmG_R6r(O9yROC#jpjl_fx7h7+{Pn{5MTBtv@J8tvaN)Zl*w7YK{
zjx?N%Z#X#jQvN=Uq%)gSICoAu8p$K=EyIzw*+j)hN3BQlw1`3j<K`(YsS8#4PgW*Y
zO5Cu#WBs<;Ik!iayDfgfUYnc~g1<Wa-OfqcmU?`Yu4(%+d&5nhe-i}Qjur|&6%TmR
ze4w^$O1SdI{mB|X@)rp>wg`B%o)_4ybMn=XAHO%W`#tbG8J~9Y-hGqP^2?rbDrPyY
zEy?T(j0{UPT(xSIOz*0|$SH2pu3nlvUszkuoPSyTYyAh7yI;J|{hh!5(SNp0XZ1Bd
zevi7T5X!mybir#@qZ%*A`~wp$X8(NiPjA;JXQKp<sws7oT4r*v&a@O14m3FUyH)bW
z&2`zgw&m_#mTMiKdonIcoO8mpTOzx3rhYtMI)CQ_Wy#G`S2bj)YVaNGe7kM;S<s1+
zSHHG@pT90&Zq~80eENRp_2c(c)fQD*&Ghq0^bnq;G4+su-SGn_3{wtPw@#@Je>!t%
z(LSkz^Mk8eb(L4Hc&lW8muusz8}H*>TqZV@Jd(I5=(PFv8nN!DS$kJozg9S~=&+&(
zgPm<x;KEBwxJ~Ev_nqVS*X#51^Y7>9=Rd_%kiPTXj+Zx-l(#%6yYO~N*t)x0%gb-e
z-@o(k!{g=s^6$21YxgBx+ub6%L^#@7?4ncD$|aFkqcWG~+-_toY;bauW>^`mzU}p`
zU7JgfKPxP~y$*D?g5BM_Hac?VyW3<HX1sj!<m>D8ar<}K%;~*o_Wu6e_}V?YE$lZt
zJzg~D{PW9&op;`utv>qr(Z9mG5f(G#_x^j-z4@}RNZg$}VT&)uOw==B%vu_8fxZ6s
zg~bKy7Hx>w@4hKE{dU>*IP=}Rv`%}S>f62R-d(#pwLA9htBqK5`<9}{Qnkli+@YGY
zMA#grt-iYM-dgk9PqTL4zS?*51=ocuy!Y(y<d^O4bIoL3vbw0Ms^r?Obvu6sY~E5D
zemyq5(!}uUk}FcPqAvVQlAFK#y++UB$O#RR9zR=*w<KNv`s&v^;Z5`U`%PcwnLIkB
zFzFi0srNh$7mr^wUQ}^q;*qbK(YJ5?`d0OEcm3ao%ge);&6y+bE5&kMclNQZOE|YW
zT{<O^S|hlqV@r?*%L=a8(5X_{#U?Ya?EZZ>@8ZqayYF<&=Vn{w-T0Yvt!nS?{~!O#
zTi=VFw@>17&EB<ZQ_p-^T$`qw)NubhJI`XKpcNuQ3twCCyk{=bRp~pnu~y=q@qu{{
z;*vZ+_hfyF+{65gajwCdSJj7CtFLq}TBtN}k@^v<mY7iHx^2hbTzX=6)YE<A1BaM@
zJC8bDIpa~)@_q8m2Qd=_7-#FSOSr7r`zg3ZG*N4U;d16B%5e^J7C0pe3O%3W{&Q1)
zz@AOhRRt?u4_LA>rq2}?lHBv`r*e!DW3tz!rsuB910TuQ1WHU^_-KxuuAb^aS4+u-
z5iXuqyEi}2)=@svzv5!{g~LS*#g#4-626MXO7N&YW}G~Ik>hryi)%%;TRR9OdHw0?
zDKQ8a6sq)Cn3`#IyCgQdIgmH-=>zMRp`n>8=e5?o-P<uECiY^ex2<`bpuxLawNlMr
zwsi!|ZDo?$yu$nbtCod!n;0}x1i1Y8D!&!ZY<s@(E8ly|Zxc^4|K<65HP`MlU)02k
zxy4-Doj&*pdF-t}_RFXK;_;-V_a5tCsoqqt(&AY6tn0<zJ)R;DmG5yjNOH7tb~JKc
zczUtavP{42yz8Em>CaB>^=G+!zvK3bJ%8j|;!YY1-+Pk#{j~l0mnCM)vK&0th`G+&
z{9NC^o}=ZSN=&2S4|SOixxK9?XV1T9?c%d%+LV`boJ5MuyYsa~l2(?6PI;VVBH4Rt
zkxai^YT#7Y$Qg$+oISi+dkU0VZ0r2=oZWw_GJX4hWk=k<MLW0M=Y4p&=>N`gjrUv*
zn_POoIBG_oN%#L!Jwf7+nZ`!L=}V8KGq?*%S6%e2kmHY@UeL^2sH(V<x5aTzGGBVw
zZL2bc>V2sq0S!xfRvk;t3`mSg5O&p^c7Dajdn}FuPmY=GK3h~-UR?h7<*!$l4<}t+
zG;3bmynQ=rDy!<M3M}ONwNph}WCVOpU!H$4C6UcJNb69vaL1$P;@0~wR6LwK?}Ym;
z1%~5|;%=pj1Q<54ZeTiRUwUiT&D?V9_1uDwbpGjd)pK};FHWj5u(FvwH|9*AKfm8O
zKK|$I$-!BSm!sC*6g1{Ea(H{|=(BgPE;H-O%dfk?xBl~CfBTwUdsps0*Z9zY^^0wt
zh2l-F=JxESl>$@TxfVEyoWAC@TF_yNqifW}&6gr~zs`BQ;D*`Pq{6K`YRf<W*m3XL
zIYw&{_x|pye-Hod&;Ms4+pD$ssO0UW<k-uGO_EcUoiF5E=jZ?aoxl5d>EVdE^Y;Jx
zXI}sR>&?!hYgf-ce%mWNPtNajplgeRijGc#_47I0=c|+4Zr@zCuG}|z_px`^Zm-LY
ziniRc>A>-$j}9+i%|CzLyKrf(Gj9saB*ibEjM%K4cH6A`Xw}}>cV(q_x6Dg+=UU6A
zbiL%&w|Cz@eth`o=uX3>K~HnaO6oo>+I%-_@4a<vwr+hZA^OT|=ER^QEy~*tJo0=u
zck+f@^)r{eB^V~e-sxb!%bfG}TUjCZnw-ZwlATnN4zASncIx?&V$ZP2<rXu)NR`#1
zlCx=r|9?F5mw#`+Hhxac&%62iuRnfu_wVx8PY++a7rER@k@eV0t}hX<C*5SoWX<pp
zUE;+$RnPr$=f&e^Zg0D3^0q8|{k7d!v$o%Q6E^$quC2TaIn4V{zg~9J?vB0R*OOPz
z=3Xw{yKrv%rf_?EjzyabF1)U2vi_J5E8r&oS4m^x4)zolcIK#aakcx8=l-_&?NId9
z&@S=8vg27^D^-tVtXOI@wfKNsM{3jc?!rI$Ld!0(ELil?@Ra{A^~Z8@k(V6h6>mMd
z@srW=*^U!eVg#5x=fp2(uXnoQAyARxw_onyfw&pxoSy7oRO%-v@N>0E&WRaw71T^7
zxGz2`C1|{H#r6F>^MdwnN(#95l`rPVh5v7o4O|w<n{eg*dXca`E3ts*#JQbI+}}Na
zn|MT2z=X-~xa`ayJq0fIhwcZbD+ipC5`XwmdEUb79vjUUZd@Q-7T7J?bJ(+{*Tp4n
zd7BGQfr0G%TY0xlcJIpEeU6oBP0G&LJL~xi;;SZjEmb+i*>^ic^v8~{{`1czBpx@n
z?0heD^mr3*(+VLMrAK~`qhE5Kvy0ZryC_iI(B86H*Y|_-g(Q!bQ%`R0S?<~VS4j2l
z!cSY2mj~=Q@gPz6uh;SLMa@PErBfeTA73K=UQ&5dEj!a0%`Ed%lM5QwC`~ar_3b08
zkB5#^1Cx(kv+3*$-#z&?-FWSTotHUQNd5PowpY`?jroT4&B@Wzltn|7PMrQ<bS5tP
zt-MBDgD(rqllAOf&;EF5FR<9dudnlE+rfg}NB0Oyu5wLcTlXa{ku%*+Q6OnyM$FX9
z7b~ph^|?P@wf@c4uwxl-oi%Ga`wxCGJCi9|!S?ar_WzStD%T5g+t>Y(b6!z?ozM5$
zZ~w(_?2omH7T!-#sZA-7w-leSS|VXT(*))~Bg-W`6J}3JWD2V3oZgx+;r-4Nl_$={
z7w!w@{v-4JiD#jF*^-VWp^-BeDX^%YW1P_PMqJV3K`2|<ozkyqduwX;?Ay0@*RI-%
z2s5$OH!r^Y`Sj<}?#Y)YpDYQPqIQO**+tB5{tNB%6ZKd_!xY|VFZpn@?0@C0xO}H+
z{~mVz-7&ApM)_Wwj>@A2&Mzcn7AU)<Z;#!cecS5GosHK76c~jJ9MWuN$HmO6|M=?5
z*VU63KQ6K{TwJ)Yb{<#vwQcvzmKt`Mcxlg#y>sv1?`K(C|5p4j*dAeb?XB1m3D-w6
zW-79zrB=%c7Cwr)K5>E5<DFbynYl+CuSdLIvNqOIujFa?(>J^3Zp~&pyehYH^XqAS
z<qdj9j7JN1$LO2+8n0RvuCKSR_HXZ&B%efUZKlvju~S#RrtQ6}SAKR`VThbe?4Esp
zUfn+Z+4=FVyG4cN|9@0e=gxln=fw({eoxJF&r0l4t`}YYwC<(Z?zG!mdvoukZN8b8
z#n_a6=jz$Rj~{*e_pWenQpn_=SNz*J4n~EqH-G&!Yim?)d9<|CQOA~(iX6h0A#bz3
zR@9dV=g0SREnYFB@7dcYDYp0KoC}*Bx;rMXVUAva_$kZ45ScT4{QP}4ZykU7H>vc#
zRr`ZE=exgr=l%WEvRT-Tv0dby^tPnyyG5g4+*-Fa_w;VQZO032j;<7Y{C3qn7v?4|
z(@YhWfJb*3zUF=ZclWn^eMPX?<oX}K|9`t`zCN<FYS-I)v8Dax+cqE4um~y$xHQo%
z^VQvT!nSke=gyV&Q(x-#Kx*OFb!QeCv|kohzq{;S&aAgtyQA{t%yy^VU2{9{__?=c
zxqE*d`TF<K<#oB%_P^eC-&+&Lx1mt}09TWXR#Woc=0ly!Ygs<8Xg=}$Q@Mz%c!a%B
z&*2Ce4y}1J`_k7OTN2S~_ei<tSB+%<$!E{b#y<>qsCxYG2G8VZE2gIg*4!UD?`j=7
zwP=E>b)|~9#PW~V9gfZ7VVZGLv*y42BF$fq^}L!kS?R>jW<MZnXl!+SZ-Qlw=D8j7
z<MzJ&+iK=Ahe7?_*#{5KNM2{2`=y|0zR_tz<?9J;qD>Y*U%#~Ooby;&)Nq|b<cuZk
zF5e_v)*lvMX4huMbjwiF=F<(Pk1sb)2rqfr@2o4FBE|DYCPL)&xt$Zdr7p$S2#Qt<
zb?p|_+)!*ZzwGwilD#+c%6G5Z{3zzyG(OH|1rH`Y-kquZ;YK?+eor^CakNltGWG~s
z!1L|De}GwG5T8cl|IRnP5e_U$CwMyMD2e`hawBe*lvjpVj7`fEw&Ty|&8^)nb=|(w
zYvYd(=JQX9>`Y{6`774v?C$8aa^G<q)4Khk2XgMs+H>_%IUi@&t^anlO?HkN=iE3O
zH0Nj>xwy;uu>9MVT+c15yyEw^6<;jpoj8ld);D-T{==iYe>~oDPyEZdN-xL%+BUaq
z!kU;A98?RtW(08^T0h~s?4948yo!d_`VpTln`-Yp5MZFc>e4%*MgfsUfgz$tOH5|@
zs!hI_64SeSlhH1YJ2Fd4TeOY~&7X2Ds!h0I-`?6H>pyQ5elM@P8Q-w=ru)oC|19bq
zDh_UuKJ&d-pm|Ykz2v>F784HBifsZXPv=NU1kK}OGdekkr#(m@M*ZQbZjW0p#6`~h
zIB{-c>Ackja~65-5cNnj)#7?Qc~Wpgn`=DN;sqOC&sw+cdiT+{XWuSgJ^S_TqiN}-
zhL$@mtgAnM`tqm7Zholp{Rf6^5k)_5_IcN>dvv~P7Nfx1n{W5kT)WJ3c#EL^>*Beo
z>6a@eiYO@^&ygtOUBp`CaW3-ume;%DN@Gu-VX(R<#^RvFwLwFjU4Q?dU)%Tp{dYTj
zxxU{q<>aYi{jURCx?~->qcRs+*iJ0lT)Ss~d|sZ;hm@+K$}gpnmx?OCKRev+dzT}4
zOX<<Vh2aWO5B+jZFY(B9a9L|&uq$lM)OCg1vQJMdUAi)5!nIqa&v~R1uX09SS|iDD
zVxvWx;PV+XQiJ;9YQNsS|Nq(hx2yKmM{sm2*~<F!rCt8|tMKJfL$NO}9=!PT^v|0`
zRvKHfeHS)-c=s#qbAkW4lNSpO<+vx7cQAF9zB#frFZ_0B^7Xl^tuwdp{*}0M$Nu>D
zWwDoUzFl`SuiW?gq_D@$A5)CX+$?XGY`eMEto!%VqK#Rz&RO-PDkx79Dme8z@#d{%
z>#pbS-F)_jKv(dkiPIkCl<k`JHt+p=uI^nS9s$>79V<%;f6jhim(8&bG#~oo@!Q95
zcir3ekDG&IQFnHPBO7B`L&K7jyQ8i0!gsqr-*wLPcG=FYr%QiZ-1g}%ikZ52l}u)+
zl2oCR-vTMo)<6x8nD^V>-`n@|as8jbmVN*Kz28^y_v;QV9i8Qi^Mclj#xh^$SaqTO
zb7rZT_Ufc>FSAOw$21v!4lJy!ve4_<v2CUK?Yl|A<-V8Atkqejaf@hmC%G+NG$;49
z>+Rf}%ci^2w&#X*e@(kwcFmJZfUi5ptMW!+nND5Cf~ooqCk~g|KD^h-acsfmjMbuC
zhaDAPC>H&kEWG*R-+R5kf2LMcz4~Ifj-xP2q)yXS#CY)~6^Cyn$2FJTJOAc;OxXv^
z-V5_mvNK*!-*-c9g4#m%oku3RIoZ0szZ$Twk@<6|PD+a+Zv%^rv7z?b-zNjV{AcB@
zI%D&Rb&dRYjukbDN`HkriWwQdO*kZTyRmjvc;8*md%iC#=ZZXEulYhh^V1DOp@}KS
z3uM-ODb^No_3+S1<aU(QuvJ@DIVImoOWA(0GS}V79tl^CJA}8Lk==bRa(=++#I&g~
zPfuR%YckPFw33qbInMt2W7g)Cv(;EyYsDUDZG701xl{2}Xj@&g2Pbdk(i55=+t@d3
ze{fgTU#v<`S9G$Urb%#%2aA_O%`1-Y1yLuian3j)cu<~k%DM~t_PwwCZ@W)f<ZMr5
zL$aHR)5o8C8?PT(InhPQ^X#JcdI3Tqg4OqrFQ{t%clwQQ;CzvrfA?xRYa1+!FzeBd
zR1*32@5#Ye3%++O7ia$ei1X_8Q#_K7?bSM5FPHwB(^x9hUH$D6SJ}VslYBp_o_NLT
z(D}_)=Awd&%P+~|6Y8GczK0~DIhkg9I#%fyp3(8%c_}gJk^R)bRR&c-p_8*r*10X-
zw5Ds5$<m8%+G{(ee2NoYI_3Y~psp>~emxJ~IB!qU|CtBt&OH46`s~)dGuYejoVnq~
zD`i<})YxTZ%{gfn&vTL8v9C-`5APIwRD8cj|3mA9J?BkpCBA%YJowOAF#Emmc?TJ$
z=8n=NNu6fi4|C$O`(9=XYc4*$N{eG^hmd2z0jC5}#>R%yZLiB-fBpOS@zvx0^7e7|
zHfwZN-+XxT=hN5X>-G5i`<HF*G<4yz(Q2OeZ^ilh28Scvdo+$e6y7oY;bNQFN&WM;
z=RGcv?NE4gqjcKZqZt>KLslGE9%b0go%l)4B2zZ{^tHyXQWpK`0s#$Dp$ofD{_NJ@
z_y1~s{qMWq#oHH0c1lSruyb(q9T42=w2524{r%I%6*hL3_VtxEb~F6a7B?UET=?_V
z<Nn!ickSDk$X63T(dY0n4Td`#H=pQpVbr+L)^knQ;&xN^LDSgnTd%G$+3dGEY?tr#
z7g84r+qtH&J<ep-n`u9jr<ebkeq3E;W!+7Y)7z4Rx_azpob}`9x8DQq($>B$>z#W}
zZS&8-$lePYfeu}1E@x`$OZQgAL>y;7uAXM&rn1zuAvtkd@7CSRZbqzQ)VW=@vu@A6
z-8=V2=5CA4-dW7Ua7=`K^UWs@4u~9!O5eRJ+idmGSA`pI<w>RQ)v;ToE}C`7!d9fU
zDXCG{;`W<eS0DfVl*rq+Z1Um4%J>yUAt5Umbh<;+jBM@p)Rg{wn-em3{v7#pY4txo
z%iG<HTU)06dX2&+15J)3MxXP_>uVAO+zOhul%9J#Yin(UiTQl7#hWyaUp@Ho&zCPR
zYV54nb$M#*boD9)ICyQ`d_6q8{!hRC+p@Fy^}lc5|M$&&@kfrMLQRTVCYo;!n7Z{p
zUwrfR<>~AF_DhMk3-398Ov$1-FXmO@!p<){LP8x`SE{}^{<SFd{kCm)Z*F6Kz9U)j
zxH5z5Ev?|}&0DvXB`&jOs6MjEu5F^L`?8=Vdmjqy)oF=cke58eB<VmBpYWQ1UmM?_
z_qb8d=&0x%P|cdfv0|wKmwKz$#GsQRuU>|)luKoaFF8`V)rRxA%!^<DlpgF57hS;f
zL7b6)sy?rhvx|YZvr~=HQ7^M=E#e|)0=_(SSL9h-Ebu(pK>4Vs(WC{>FH~Jf`y%S{
zq5RZu=_T`&8@eB_l~~(#{>+S`_M&O*vrg(1<S&;EyROsoXhP>qM$yEO4wo{QYmp3s
z3pp(2^W+G6G&uQavefA^9(fVfm*C{X5NiMZ<O^*Lg*Qi{u9t4xyzXsI`dZ$5|5f<D
ze~S<4SaEVwkC*YvQ#VsgWJ7nqJy+Hz_<f%C-iue32Uxy%x#>5*wM>(;kc4s1?a2>!
zUcaH-_4T|~{*2JVvuxG-cC$QE_#)%Zc~h3@-;NTV%mXre#R7FNPwuWMeXh7Yv+tVv
zjpO$dmYJNmB4qbm_tXx{mODkSBL3`8ej>T>_zh+ru9x%A?wnj}>l%OIz4%|r!={`e
zd+ZKI?^X8QmcRVlD&~_p`&M@M<yYMEcylqNY2x%lXZ-^=+)&R;Vq5;TwQkM}=YQ4m
zvSM$XSTq^=fBP$GG$*$v74auUW&8|ps7j2>7w)}NRlnuO$Ne0N89{EV0|Wh5eXKB%
zo4WYYl&H+LTf&a<zu9^zO4I(7xtq3s;J3}~do~@9Z`=Dv`uGpqUapVp^*{c;n!p!4
z!7k?a=lkjNEtojY<P}V-4*Wg&y@vkH@B4*4Et>60jo4yRjO_PYactK9|F6~6cyq+_
zHR0uzSMMJv7nOVMvA{dFx98%DE&JUh8w1y{734<em-m0?H^06;eEz$2>)vnQeN5!>
z<?hp`udmnF)93Gho*FuVY3UOFPXCXWFL5W&m)(%QKL1OOXZ`i|<C4d%B_2Q3`QgL$
z$+$B^Zq<a1pZ^{{=f7blW5brDcDq@;3>Ow?xRkhrE!=eN)gqU*d*bIF-ptl0vdU@p
z$v-dt?0&!R|Jm~W`z-9{W*MfG*7!`eDD2I?op$-oUA=3kd*8}%m-uyke6u@#zm47M
zW$crSQnm;_*%k11e%+sk-##w9Z7HA5z<E~t$=*hf7g3wqRs|_0gsv(K-SWCGnnkhf
zMx^$-w2&31+pXVuX>5@{kl|bVxWOrEA+wU!>W@2S2~9e0UevVq=|;o$&yrmC<L=*i
zw`B7&8DF){%(*-QN|K634eZB{-zhT>zka%%opFxv%!?eHjSUNv^LFJuk6}Kc5OaO^
z(WJufA3kijvrX2*Vd<9&o0-1LF2D4V+`D@F?W)Bg?KiIG-j2$SWZ{@0bc>@UvQ^nt
zaEe>+>P1&nVt2>s$mPAC^HyzfvtLwpVT@mT`{rq{cirBkwQTX?KvVTK$F9~`Nrks&
zMql3&8~c7vgc8fA4OV^uoeh=6N{?Jzv{jFYt$96b?$J``tV|Qp_Q}buRd%)|*E-W$
zSBMH3Slo`RuKxUzfB*l(MLVnh{#u>C|L;02@A^351@A9%1+kSZRZrjidAj?0{b%Bf
zlgyT7GX6^3sIcvbpY~!7(Tt{zinemTC#TGQy}D|1^tIf#yUuN$o6Q*Ibb8xb^V?<H
z@8(56-ywgzYO8ANkuCRfp2{0}1`6>TA9uX*e3F9FHKt~v|0kL9-q-br#Z_`0IC=hI
zLH=)TyK730a<A0CchwZG51Nvsqa0LuR?t~tr=c11bX!YfBgeDpm+uxwOpl+;cd$`o
z@8U2Oy;p~(?K3xeWZ>%YO)kbRlXr5oIMe2M50(0TuB;Aq>loed?N@o~bu8-qk_R*2
zDk|4}Uuko5ky2hy;HDS1S2%t9E#LFrdCe|MpAVil)GasqCof&&-^D4puhd;&t-%BN
znlDpZB8%7b?NJhPJz&`GtCQ$Z`0Rx}=V{)wmsYpSw!O@`T^bwk<Lg|m(+Lk#I=q&;
zEtL#^ni8UWdQs+DkLMOTslVjsam=ip-ZDdB(Ioqi%tub<r~I*)x_35TK;o3k4n6hn
zOYbr5{W;ZBMC<)d8NQv5tKB3HhZ(qJv`pY~oEr0Tqr8IswsfCKZ$G&G;*n81v2()T
z?H8@hA79Z`X;>(*{EeQAt8IC(QEF_+_Qwq!e>cxH<=Zh?@wvOul9G&>XEbagxwh-)
z9(b*~HzmH++;*1jrD<~#Uo`Za9}Y;Fx>hv#x#V=M=Kqu4+*d4Iy#CYSMcN{YcEuK(
zUsgYF)qmv5aiM(I6HDpB3vz1T1-6;r^YwQ8BXWFk=jYeUyABG7Sed`#{O-WO6{sCL
zD=8-Qah8pi+fvQbi%fQ1@T!j6tQ53l%OyAIwNt+p@Gte${A*(Q@c$8e&Aq?s@2mg0
zlc(UN(R5t%mS)O^=V#~SW+psRV$<Gz`1d_|#iqifDfflH_N}kmdyI$Wz|k|tEQL&S
zAFunIK6{77?VIuC6Ex1Qkj+RFQV40fwDnG2x$owhnmv1W?*3I$UHp4TgsIQbgCD!Q
zPoM4<pMG4u{qoHv8WUDHGl(b!F+2+QHH<ZA)fRZC`L0kw`K$H+2FKz#(@rfE=v?zo
z;u<%L#+ex$W&Vpe4cyKzQH`=%6;iPF=jG|S>JBRcCSK~uSm2OysBZ(aNN~^8bskfh
zVlD29GG+$%_4&p9d38Jf|Kt3B@47F4-gr$aO2{RILuuOWW4Yn8pFaM2_v>+f`@bJv
z9^Q4g^vwybAXB?tJN8sm8pvfXmFPUWu8)tu@7d*3FLK{{8|{DfP4flEnh72{UDpn!
z+02vcUsZIw^l@zNw!3~a7+V{r3a=A4onYjesLnb?xnSC&lX_3Ow4D@rSxa_Q$Hedb
zZ;-Y!eD><y$HkYNmke2{e%Ujz>+ZI={G*w{y~nPuU}D+k!=l6C_4?$S2Uo6sefR5C
za&4^jl%F>vP30zjGTD3W=CbT;_HB8y+mBzp`!{L#+DHaZ(Onv=Up;t`U^Y|eQrYI+
z^))x%ggxb0C0KYMeld63b%n>h?W^OjYz~W%Tr2qMY}M`>l?NY7xT5yg*Kd!w_B?&P
z|9Sq`X|>-Ag0}~~DvZ8;^l{ai)i0OK=<C-%zdyo8uWa_&$G;9XIj0B|Huy0eyPIxy
zWafl~8P_|Db(mxo8ZRp-=W=en$SP=+?YnS`6I0<eyF2&)pZ@=6`fC6AdiwkSTwT9!
zzT6oP?WHy6Q?-oe&S>5^t9;iivFGj2YFOB#j=A;nr(K=CKGRCl`t^-%T-ph0E{l6s
zU0EU388mxctQqe%S?l!1H4};toY|IFzPf6A%yw(<Yv-kYyUbi*)yDJg)FcPNt^iGW
zzmw-yJ0(}Xo@%-8Vz0>=#utGJ0-@UHU&qZey><MN+=bI-kB(dvZ){9ocSyg#=_|W)
zS4+EHC0mtL%889tasT$$A9#E|>W6aC+{+;{j}ugFUTE^~=CNa}kotAe-n7Q|z0f`7
zcN(34*U1Ss)G%FPNY?%I<e<pEx~>gx51TJMe(%e=c?+*txOXIQU8rS}suv3pyn5l|
zNpUBG1BS}Q9<6GL&o9lG-#R(<vbe3|1^L*84srK=6b#o-pTK;{cher%1PQfyEPfpu
zHaNdnlYM(z=I(1%buvy@m?|ype{N%BWHd=!cy)!zzGFAvuGnlSYBEdfbJVfTlWZOp
zGo+rpbM2BnlVQM$8|lgo#q+FR#c$Af^>%4+lafc=n$Frst5aIO9Z*oJ4_T<N*g(+u
z`?2^1Yra1FTdf*+uW^CC;iNAo;#Fm2S3J}gN<I8;-y8qe>l#vy-w0u_Sv$8NzlE<-
zpV4;F=H*j8j&ObD{TX28A9(GVs;k7K`x6Y-eioVVL+^#<p<W)xx=%8k=@-P8Zh6*l
zi{pMA$Mpv<mVT2`6=6~kv0s14AcXVxvJep^k1S>bzBig2BCU(Ib^WTy;mHYOa7lUT
z&QVw>;id0-(Np-cMTo=Q$$`7HP8XWWO<lZ7LUh-KC894Q?guZ)*N%UhoxA1Y5|c@n
z{vYrDeURmuzJ1I*#%DEo$N#i23I3m)e5}N-^U1l@|Li@l#UGk5Z|8r@-*e~wnX^Pw
zkKJR_-Si6&II5r8miMjY_dcC>q~d_FSN83rkI(M@{$9Jd?n+3Si%Ra^Xgj@k@^-PY
z@$vWdubXzS;^X7*KY#Ar`Sa)e`}>|PUKOCyVs^x+$*XMgmi#aOzFJIOXt#uIv(DCp
zn%A$Z6V&yV{B8_-nCZJQ!vAhS$CHy0?H&GRFEvl_UOwZT*~`?orr7U*_Ac9JLY*d6
zdW=GXTA5Qd1E((VS}@CDvRTy3A`?sN{ePa#j<5ZHcKtpZ`?(TNpZ{a&;JZ<H`AR>3
z|NDD;cJAJrXMQ@5{ajV9Ze&}VV~FFj9>&_16<v$w^c-)!#ud%}s3}v>DNLxPprp+0
z;+ro&y7Tq@9{fADcK6e}Mayqk{$Iw-bSvsr>f*0uYge+YK3KVS%W^H&hbhNTyveEl
z|L)cP?f<`Dy{Q?!Jy*uMuCUJN*d?ZhpmU4An`ABh+vUt)@g|L9>$<ed&65|mx8I(9
z+nT*$X&9>%?=pqUj7;UZ+i&RPzn9N@FDEZ|-S@c5E{)Y$M!Kh08MJOL+k01c`s=4z
z7k9A*C}%!bN$D@nzry+2wMpX5ajw?f-Bo+GZ+$y!mk!s`4|yMd9G9=lvikRR`u%+s
zg`4iKOPl<&>h>0&J2#KFvnyxZo@r*F_dI^jpJ^TI-rKJ&f9<>Z?$y%kH_z#RUTeX!
z^n}EvHi5soPb2n3i#mABuu`|qc$Y7y{&enU6WQ|u3odkZUVHsizV82T^Zfhs=FP4D
zd)8cCeR-?q6eR}7r46SxTFnYg7cO1zZZtoxC-YWrPI4Km=qdyD%biXUYs25@Si2`)
zT=(ZliPcOwxijhAPq$@SZ`1MeXw29)O?QW14Ev)?6X&%h&Xen45Kvz)uqg47R{Tl<
z^Y_zFo=fjtwB(^}u-3cB7r4GZ6#lg5@rR#NbJr_*Xl@D&f2F<pOAF`!gg^h^%zwf-
zHE#LDYmIRxnvyCd@=}w7UVr%PH!uBYeM96Em1lNWW$tlEPG<|6E2VAadGVv6iQR&0
zGhH>zgno+r`8nOpn?e53EaRmf_ChD;wkehfv(??1H&OPe-_KKwlmFN(H}10dVij6+
z&Nz9_7tK9NcKo`tx)-h0>X%OabW~_}z&6g7TOu=<WNftzcf7y*iLK3X&g5%lzS*z0
z<yzmi6wm#2=Ew`1K*m6Ukb;7Y*4Dt##8sC>TC=Q50&}}HTf$YOFD;)^Dbdo9`ZeoU
z;NS97JRS#lTpld{apG^)k>9T-7{u8*o$;zX-0)@AJCSK^4J;x->zTeePOx}exM`_v
zlG6be?sY0R+*imL?4I-ZOV79IzB`xve)(kY`^$#?H81`jVpt)eV>apQg$tWMY`#|2
zpf_djpOrqY56fFwge(*Vb)@#%%-L(b*GGBPx<$c8m(mTAw=YR!t3DE7a`T9meajzK
zwJ8N>Dj7r^o|=DTxpx26NB`go_Z)@>mjH>@KaJiG`~QR*-mp&mQT<R?`HX>Eoq@!^
zMc=1ZU)}e=<;CLtwN7tWtXQ>ZRoBGl32IS(R@I>j&o$(g7jEJF`uG3GM|C&YpPf~$
z3%l0r%-}4q|7T{Szm=!Tq$`K#T%G;I*=N}kKfW`y2|qXS|LRkS_3V4<>Aw4ZT+2~~
zBVS(XbgFu!MCde^=EVNUn`gr>v*T`F;n#2WIpOiKvGeB1^rW|5RC=4XyCTBQPG`Gy
z_>{?uA8$V09ll&W{qxTpBdt|VqNnUT{`j~$3M@=~BEi7G`D;#w|BVpOU*b&+fBog&
zwJf<RSny^|_m7PG$ErRUA54%u9hfwYNq1h(^}q5?WiFL@O-h1~CKRs-lnmkOaM~$<
z$6sv1n$~NLFM_5n=IXwgQfFarWo!HY(O2>KeZL<4`H^B~Bo(<(z@V$7H(F0#{@wfa
z>+E9V?=xM#_~+=Gqu$Quv#-w14Ue7{ds=rfv*_m>879%AFA~h;!<Qb?WqabjCB5*#
zLQd_LBojM3`@hd#Ul%u!yIERXUb_3P$=uX*R+g5D(HmZD+o761HCIhpNUY4kqw9u|
z_wgHo+vl2W+Iri(zx(y}d{&0ii(9t_?y0KW6@ABUjae_RqC;tLetC5ITn(<7Q#6`1
z+T7SDKfe6<Xx7dsyGy-+tTM8`-AygrTNj;8`~30o*Q?8KKiyTvH?eDx&gr6<Q01jt
zUcX7(9lQS8?6cc4rQ6r#KI-VNp5-z{y<ko;Ba^1U#<IN?JCqg&hPRe^nccq1Z@>3{
z!Pl4F>-W|CeR}=#%@m_q)3UbQdRG?vT;g_Bd3E{ryE*QuD^2H|(N}-`^669Y{@2Hk
zW);>}@4cCKYuVaz-|LY}zoeMWl=X2>K72<vtpD`W$Lnrx6lF-gav~sAI`01ad#3WU
zW|gns)%{ugfFJXf+i!nwe{Z*^zP_U3&)x6)Ev@%!oECCEwpKvXWpScWkb3)7ZL<m+
zd;LtVxvRsqeOJxu*%}raY0%GjB+|Rp(aHap@PtDm$3GwbTBfbAK_Il*?M1)^tA&de
zY&zU_^yh(RG1YT+6u2<&SXW=C>1z0C>de@Vte5QHHmL9O|6056ocIsv6UYDRZPx#2
zyZ5N}d(J&-?y4qf^UQ^}%FIzY$Mj}yeQn?42i=_q;$>bvSJ(P-;P<~qrNkR^Q*>>2
z+}zLCa{fn|;pH<fzP>qrH8xQt^(i4Nf`Uv^c??Pym6i36WER<9th+Z)rTLUyy=*Yk
z4EYJ_b<4kGw=+f>TS(Si5N9$nVwm(vfJ0F*Er3h)$b!^8T%LLNSYGi}izhNLJ=oF^
z>L}CnMuPp=nJ=+lb+`q$9m>6aH8<Ma^=?^Q>;1^pH45DugzQf`Cj6gk)tVq8<~Fs}
z%uUlZBR1yJ7flZto%@mr><i^jAAMz+ziUsRg~Fy*n`O4%N16K88*vARgfTbB=dk`y
zaj^O3RLP<!pfZ1F=bM$U82%mXerRaaus+Ru`h?hd>z>U~lK4FJiRAY#o9f9&0<7;a
zUp!&_eP;^aq&H7*OP^cz%bMMQE%KKzkCNTvpPX*-CzXVqmbXdN-3ifkjBofMSi`T#
zIcb@rLhOb0?3=>#FMSQ#`JsfRQ1Pe2ifW4&HJ80^xaYOyw;fV*yPz)II6=Wtso<H(
znR#Bbd89OV=$UzOba4HC!mbolTPuI)d2iTK%~MGsvsM|n>Yx6ouB*iOQ$_44yH1pL
zvPWjTMW%$8_b*e8IzOi7|GT|vZ`w!LFFqsxsASWVqv`*wTyFZ*H9kAJTBlI`Ps=4`
zTMoZV>XUL_$@DnIt>bTAvb}=ikli(2#`!t>T^eTA25c^!FY2Ml(e<|Mbo=UM_wL=>
zv;NrPq>v0}55GC7>5el$KKznUS6TSy)u#_1GK{oLnFN)YEEL;wA`dV!YluGNo4E5i
z(>MEywwPPT(m8hjX%|g9o^$hc-QRDq8b^Mpty8VKlu=`teE;muW*wd*A5JdTR(sjh
z&-wLwO-6=h&Y?d$)fAj&=Wf3n#aLQYRajJ2Sy@+ES5sC~Q&Ll5BJc0DxK~I-{oJ;7
ztIco!p1phZYX9!1n=*HQOsOj>DyuBBSiRuNW&inddE%zlx3_HD+V|H*WGaKgj6*%=
z{mz~9^UHsCJ@@+8w9bQCaU3fQ7O%3A@bBX>Nf+;)uK#}8>(#M)H*54cPwUE#J<Jo^
zeQn$3O&U{;QX^Mvdcb>q_qtqN=X6nS!wql4cJ~S^2LHS8=30MwzP|rO(|r4wx3j{Q
z2uwWQ-u`;^>btK`M;dyDsWS??gid2uKd%1zY&*Za+`DzB1$(ttYNf_*kI=KP+4Jwm
z!?caJ%J@2aG*4}kk&Lx?w(RD%-D%tJzS*{W*;^TFw|7#<{R9r|d~)K!Ja<8sC_7%3
zwQt4v`Psgd7`Yn8Ue~$ydiCpmzB@OMo-8q2>lV6js*{%6*JbOr@7|TU>ulA|C>^d`
zzUS=f%iW)+uh;L^mx^9*w)*I&#LYL&c316;u(PpITBX1L->W}Ie`dr)+>WXJ^<laE
z{~yoZ&fa=ABQ|c%zN*5an`>_Q=iA@CcPH2UZQ9;w{cG#>_3wVXShn@o%jM=*EB=(%
z|BsKa|9G{+$kE~SHU7d#7e*nG#YuHlKe%S_r5{(R+Mc^>+wFDceoJ(`mxe}8Idg7f
z#>>CUIUKs4ZEnt=A{VN>_R8JI$;rF7t=_&sYT3k$tdyPG<}G_SC%G{2wq!}3N8&eO
z&5jNYFV>9a<ZW+cxRigz8_qU!EH*wSQJ1#O)?#Xg|CG;fHrFdi@m*SU<S@?-VI`IG
zdk=MdaQ$Ms*Zxh!o>S-j;|!7opKt7&ul@Y6oY1@_pBAdzzICBWwDg<eqwk&xxqM5w
z7vB%raokQ*>Vp$kWd=h?|9v?%!ISqsth(OvWyJ!8yDoot<UE9a&b}NmNwsQ@)3N}E
z?qfFUG6^+JH*8<rICR{aO^kcm6b+$(hk-tx-lCg)f9=}bFU4zbtsLbrw=wxpw6h@d
zOy5lmlddVYrT)yUOy8MzJNEXwS=;h<FWcO@HA<5~pjFAmL1p1S@iYcCm$g$ap33-i
zQnau72yd5v?q^&5FYoj`L)=fw-MATH8PY7|dxNP-rov&PX{TG-`A46t6#mZ$=(1S!
z=I{lJ`DMuw@0q^N$^2y2;c-lMhfmOMCY1wweRt(ce9!P|K4qO~pi(9L=<3nJh_0DN
zjPny$E;yh+G2+WR{v&6)??}tB%wUuCyx-lPwCcQ@vjdyrmy<gf>iA#go2X1+S@U-P
zPe-k?FJG&=H+e07Yq6jG;DrmzCRm@ibK_qdOJI{qofE(1dM@U-Q<Q!P1y2$?>Nlb4
zV_Ay5>On2lkc_S%UM{(-x2Ffy`g3WXdB4?cmqtn8j7u)5eL?^DHDA1+8g@$~HX}GA
zRwGtp?L-mH#j5v1cyllAG>oy^&RJ_V@s)mOoxpdO1^W|xgguXO%wT4J^71%K_hcS+
z=|=x+>m!WWg5%F0|LJaYUFhP1ozrsk1QTO^D(?92f4=zW9Jl8dAKuDrK0S@I+CNdK
z-7n|yw$r<A>Q3kCbP&{HaPbY9u_<%Hhr9mK+r-a)`|_f|PPU)xs7FiFn<-0vSh0Dv
zMCBVk+{68CeSf_V`=)@Lw3_)EkM?JtHe{0dUC`Sg#jD?bz2nP;=o`j-=?xp_BnUpq
z`S*@R$w6r;W9>E0*2S83)^iVU6AoHD`LTHWa{YaOKHU~i7jM5DxpM2pRUd+CHq6Z}
zzkT*mVPSdk|ANmOZeDXWwX(OjwzIUgv$UM?qHEUIr0N0#v$m`U+den4sA&G@*<!YM
z^5e_hmp9M9S9|MC&i(U84r@s?F~%mB?>@&Rd*)gDadmNhJ-fQ7_1AJQcXIq*%q6iz
zRD09IWw|Lu7M8ZN_t#XK>1A%daVOUPTDGF5*Q$oZ-I4m&XCKYFTXHt-qS@}RhtFDz
z?7nT*%et^>=JwZL9~A~(-+Jbl!xe2_k%fkfZ@xMDdb-DtZ;xlc_J2S1)v?P(JF6;w
zKRmqr_3f)!*UOqxCTs~}n`m|0wDk6yS?e-ux2#QfX;iRIP-o(q!>d=jsQpv*DUIh0
zDaT`PcW&5_Eyc}Iy8HDGTZ?(~;$)<qT~>4znUvXOpS4Wiy)OCo*4XW<Z$>Tsl3_IS
z%(KfkKR(=3UHfuQ@w(S?@8#?N@3t37{kl5-|FaKI0y2d*rXSbmTc*D5^7iub%O~HM
z@Bi2Se&3ICZ}0rxynX-wyYlzqYAgOfzRjP1Z?C<*y}j+*SF<Ajov#1A{ByUsYhdI5
zi8il7CT>ue6!bKu#3M+<XmJsbPFI=lWuxNTCWm4|ryAW_esj@x2X((i9t*$JSZQ}B
zna#7&nclZEue`k6eD>KW#aRFK|0Dh<R{j^bk#mS?ibh#nn!}a!#-uN?np`e5+Ghgm
zZacrLz7%O+BwW$7yXo)J#cv*ReBz(<d}~)>+Ed<xaTZ)ZFK^SovoxJ;Lql@n!3lvP
z^*i3|o^XD{`y!`3Z&>#nKWO%?^~QwvthVxobF4P*al7&O>2voz^RFE}W!AV?lJ%sp
z@cfIVb^BaTHdhOWJu5by^5p*R`X5KVL|au&T4p@Z+%f;6*26jdbENg<(^#I)5oWvM
zA|coDcb3#<JFZQ>s=v1PFy7QY#pyeRq4hZzw{WW~=PrS(B0AgWOe%0@XlKo{jy8Y0
zEpJ<?@w!u&Bz#ZtcWhA-`Z9g0`uEMn3_f-fW+?>B-ryhG8!ax^Gx@iaHFrW?(_0Vu
z(;tgmUiKO`&N-BLPsebsC+9{Bv4ECIO1&yejgDqFySPl*@4vTD`<j}@oOlb%TFt5~
z*6M|uc-F5giA`ZVv`O8m>;J9WLA8(1ek|p9R9BhTuGSp0bMI-ZMXc;%&+pvqdt;H3
zRO5DP;>EI-8fLeJ(oSrCOI>^>emXAMyu!|!znLpSVa6XpQO)(y3m+sqmB}>uIDc^a
zAX+%h@Bjl#pNmJ$fw)t8PWI)BF0Kw7r`oHR99i`z{LByU?$&*h6V;`o;;&8qc7HQh
zQLQV-td0w>i+irQWiR%~&fRj!j^(KMFR#qCTQ40GS!%ZLi1`xJNqv9jy!m&hx!<-{
z`icJ4PxY01^#A_&Hl6d`_kA+odmg5**}|#+-d$8ic&Fv*39UI>{_lL__A;DtXRzk~
z=u4AN&#RMrvz)8>@E!T4N6)|a%@sa==)4KLA^)A*KO_${3KuZgJrWLNE?c#A_OgA^
zw*S67T)z17=FOWgcYoe|dGX@O&CQX4eVtxTZ>LUp6YxtR<9xZVZsCslIiKgIRCG>Y
zX_)fKdBK*%?<?$jEN9s!9S=9Z)@FCmd4ua7XEXhGmyb@{wD>XW3(q}V7c}eZCpx*x
z_|zyVv~W0_a|+)4_R7P9-9Jyhc=UAobA3Ix)JOqMfwch(8Fufwrz_s=e|`Qs`@1Hk
z*&&s6m6atWbrm%cy;{K;i{~inxi8ZY(M-E;b-i@0>xY9c6Splbtu&ZBUrzqK+T-0v
zr7T{T{dJq)Dsb`E*T;W%f6v!De|&Lwr&O=6sZ{UkMHhB#Ucsbte6rAv!+k5AwfK*@
zr^o;QWqyCY&)HuOKW6<~xiT^6?DDHit}MHkx85#hZ|(PocbC6DnwHFQ_@0Ykna-Oz
zZ`~GuN-(<`)%fM&F}uSvw}dUe$m6N4n_(ZbxAONl^(#9f^zZHYUGRTL-dc@N1}T<J
zbCY+!wz^%qEqm9xmvc7%T>A2Jl}nqL&PTqy5UvSFuJ87Gy=cW2P3za&cE<*;xR~P7
z*R|5l%68US?Y3)%8J7-9x;Ut~B$;%zZhJeepVwGCG_qHHnZ)#?91TY;4S3eB>UvpI
zS7KnfvP)BlVZ%&+j^$scy?giWS$lhY{nxKIKU$Vm6#jVh^{M#&-+$l#`+NWY*T+}e
zx7YuB`0A~7a##Mozhy?VBcIROmS|Xbr%g#Psd<-<`VuYylVu?-OMEYyylH>iaEMRk
zL7VVP>15rL=M*okkl1UpTF+m<VL|ro*RTEg`M-yT-aFGDZvOpsyZh^y2i#2@Oq!w$
z!nGHLqN1Dvxu#yb_kCi_gnet?N6H)YI5q9<__RN2=4ThPrr)3U{p)mVo^z{U-emdd
zu7!L*JWi~*B*-Y^aeV%K_S$7h$Ck4%Pq{40FW9LXazp*diuvd5W?#CYWXmzZvgHNa
zMuTGu6Z(Jt{OK<LZDH5PD9<~4MJBg?Na<vf^i*9Mbu3EVXKj;Tc70SE^EL(bNS4Ki
zA1Ed;h;tfvT$%h&khNx+lg8a&XPA}<7>hqP49*tF5N>&}+-V^{ql37c!yD7<3`~z)
ztd8%#mREi@Yq#`N*M(byR32<mN(<>bA<geNhhwwowgcDPSG0;K-hWplccR6?uI<|V
z=A9Q<Ci^@}*ZX?H=lFyy`6y?Rg(gRme9SrpZms@vaUqkMsB*QkaQ9Ec$OCtO6>jxE
zGFkntNs8l<DJicsI{n*!+x$>>(m&&J-t)~R?!%kDEU?KbTW>yLU+x>_b>Bj_P0|r#
zKD9vnV9V6A#ieuoRh-xs7Toyh&A$1t_Obde*CnfdYX{t4^Ycvd2P4s5IcJBB9Q~Xr
zIy`<3b2dyUXlPRtm7X9_m~kk`V1|pAL>0U4!Z3kjYgoK0)7PrJtV!T%>3HzK&Yxer
z$4242+Q%cwqHW*hti_h}-FkjiW9d}GQyG``9SJ|BA<Ho%Ds$~tGq>4IQv#P=*%fk2
zBR3$^;Y#!2dWMW6f6lxr4f5x!{qvLk&}ZdN6OKwd>sRR)ts(=w^1aUHtYhqZ9Jowy
zl6312#;V7qrITFuz4a|>+qL<;ZOpHNH8nN{UskeQtbTG=O^ms_Vk`Iei8|Hx)lspk
z#!>TM9O`3Tle}?T_N%*TTX*c=TlYWB#@@=t#>&dt($d<}a`yRt^~;%-Qj)<xijP-@
zSa+8u>3*A?U2yGZUUasy;^dpBcREPUou}`@<<c=FOiJ?wd(*;$%*nFd|95kFH2<Hm
z_XpRjYS%OE-{ZQJ5A56cE!xmgMf6?p{(ET?&v{+8T=eSEpRZpYWYpEf^t#UqXq>>Y
zA$xb)?wfb+-;a-twY_7{aPi@v7f+u2`SGL1%G#(^jde*tGPlgGIwrBQb+6Zy#+fzL
zarSXFcm>P(@T)!U4D+(m<`7cSWMmOhvUpy0d)D3A_Wui$-gN9gxGipt`R-kXhOgL)
zg@u&04hMv7zAV1J{_D~3{g#pstG)$@1wX&?@Zrlfv&5F{$lYJ__tmGPu3vZC|M~Il
zZiU(JSKZ!B-{1XhpMNj*W|YSqruG*$XPO<STsjkbY6^R9NNcd`^KUg<_x-*++y1_;
z{IuI=i}q&gdSCdrkfS`7{mq=@+Z#8ZYrPw^=d<A=DMS9l9#8uOCSIF0<x*;7&+;mx
z#NZsO(;k<99(<CpZNu%G51)KB*I)kGP#}8a8y5$OrKPdQuXVlMkyvofbhhtZld~B{
zc2;&X{rFouBLhUDZ{|fg8PApzeJXL#C2adV->>)oKAXSo_FBy|C01*@1jQ`Y9aC37
z{`qD~(5wGH?H|0oy?ypmubGeD{rh-#H~-_02T!h)Qp{LdljIo}qM6FDwc$tF))fJo
zsn+>38J9#nn#|R<RiVQ9;S>pNyQ7XRrGasC=JfS29$EKp)gqVr`-aua_ILQN_)sL%
zU;a^le?$CYMS+P8rH)yXEc!hkaT=&3*Ol!)x4)ormeJ$(tRE}fJW6XnU0$ZiWh|^@
z{JYPUDd5s>_CxO{@3GXfZL8ZWH+7@`I+w<n)fcjN7f-BSrs=oh!v~o<rw5!_ie8DP
zfr^)(U;Y-z@44`Och0Xn+%p`KYl}iH4*t?KTEvsRy6(g*Q}3PE&1Z2MJuU9<ZL=$m
zm{+-}*yp8_{v(kqRtF}yxCF`<%@O>pd?#3mL!5EqoXxd1H|8~1+wSM!5Ma1@bi=HO
z1DhPuWAB=1yDq&r^`e{h(g~5$Zx_o=h$(2X5<G3Uex~A*?T&JLxIWEUqH;rg$@Uvg
z{?F^X(q4X*yLnpAvsB1eJkjyt{M3_7uV%I>Epd`@@(SQ)&U~}kxbx`hWdaPc0&ETa
z2kKho=3NkA{hj$&Kr69Fx5wt{RUX@>li7Rp+ovCS%qA1p_G!&jS^MgQC0jb?ta$u1
zV&}T`->s6bq$~;2lvymp#28TZPvnW4$IF8o>uM#YYNt#ITQS9cQ($ZL<XWND<P`RE
zk!eo(OME__;cA#LMSN@WqTj9G&3$$L#wjEfAMoZq#Oi5ye)ghf%k5I$|G88i^#*wb
zM_5-(&b{({g3^_H`m47nt@W6E=}Eow3XjP~aWj*B61#$Cd%bL$q8K|Nh~e<By8BnP
zES3nRGA{cO`TTPF=4JC7UmQ_Lu2~iEf2YuXbBU$*3_m9u{$>#5>kcW4Oe?mTdpA8I
zt#k3La#Kr%;`mLSfuD}eFID3?^WbAV%ibUNthVXQah>yh+SDoa*Vr9?GB&KYw%vGO
z`iyD2&s{6s8@FCwKHsjc!dUWbKVN@epI^WKIlg{>K0f~ClM4f-Ty$5scF1lDe$)KA
zAu=X?_cftOhx?js!(MJO@mR6d&52>D!`INP87dn$Y`tbF?k4=NRegTatM;=!i-OE$
zbhgGFzOnhVhUCM5$}*$xwv1`3(*+X?ba<+)tb?*l=J|AOy1+7vp=ECF?6-LvBktV0
z6L<ek-uB&DCRVoA*0%Qc78X+d>0Z(fZX!V)4re@G$ZBz%+v=Na{dx}*tMl4t`*VF~
zHB54fTJ$7@B_JZ=#<iT?tmgGi+a9GQeQEFCe){UMNIvIwIq_HP6O@8|`Oov6kL%<2
z<GQ*sHu>1(RT5|Y&!=5=;a*g_`DKNX>~$rvkAIGy-v9e+IoF#NTH@WIck?3elvVyd
zZvXH0#g8tvzu$g5`0&Jh!#WSPhEoDe6MNJaZ_;|bZ0Fwk8(VUpGferI-o;>jTWN!n
zgUE5Q2oBE)zoT1h19>*xVsm6;%2!cY92&XuV$10)_7e-#uCAFQYstEM+gusH_&qkE
ztY+&Jw{>wP<jl>@+5YlX=;pV#qOxvZEscF2@a0qC#uy#`e*f-OLKnDLId7etsHJw9
zIYcVp==$|jwD|bkHf#QV;C)`l!8hu~mp4DI->>~#P+D63_2f3Osg--;|D4|c=XO<a
z{qMWm_2XjB@bh&oa&T$Yy0_zA$+|t6kxtdCr#AE-`1n*eFi&;qzZ8Kt#m}8J#CzP7
zm#)Z+Vkue2e>?e`;aA1CR(HP(sjU~9r!;YrNWsGkpAA1Z#kGCj`FH7qc*|4A?OvGv
ztybD}IGyc#V$j<~Gol*~J+WK%wP?*`w&zxMrj4HWA|Dj6OwF<X*eSeC%GOxz@&Sdk
zG!2<O*0qdxHw*81+F|^6kIT99#VfYOADUXB#N}D6cV9rM;8L!;!}RAS@63~(v-5jW
z`X_pN=6Tr)uB~q<RJ!RrrS_;|a!3VBnADNyLd6<i4{NS7h?)6B+j8n&=Xw90nARxh
z+*_)8aQmr$E?;M}tKTWvvo>+%N{uNOy#$Lwt=yw3*P96}RIr%PXUUdW7_{n3bKuFZ
zn;K5+ou#=YRHD#+W84{CnIAL%Jc=m)_SQFu!<vCHO_5RQe$!%ceW6CLG9yp%IQ?1d
zOkMrXoxSe!U*5Sc<-zeLzTv|k{vC1&ACDf`ykO$NuR;ZDF7`i&z7hBKu<ezQC+Rz*
ze$4TS{?w&8aamPWkjxYIQvzzgbM<@MbN)NCI2Q6p#>iJ&{)>y0s+e>yA#017S9Z{`
z$x5D@AB?44<_8{4x@cL#ccXLb8;00r&z#%>6jz>qKEFaJM#I~KQEUB~kD+z~KX=6k
zv3Y2{)?_@~zWIR#Q|Ai9^$WgVi<tlSZMBe#Z<j!gpQ!52iT~sOdu!(QeA`tMJZn+V
zu7G0`j9l70m?!;f<-4($%jV}y!TGF)2NSk@__ME+!@Sz)Z2uywnlOvWU*ZfIm+gO4
zQxn>Br&?Meec@C8{qc9Q@29SR|90oL4d#Le_iwQE%e*pUTPnvHJLNgfN3L%E^t<Gz
zu&Q=M$YzBjA-yGHEloEHvvaS%DSLf2?PJCNmzRHEtT3~kasIh`ySw}5n>9u=`;Sdt
zq%lRSgX;*>R5lT@jeKiL8}yjs92I{i*2KsN2lh`9U}SqfXLH@0lP?cDxXgLJ<F}~S
znN=JIBaS>OJt9)<_UGa8Np)^KZhG}KflS}pLuHj-E?UzQ7}f8yaH^XNTiR`t*H>SC
zd-m?xvu_^@A4>2&J1(vtH)qaVzh%b3ArsFFEM@9w;rhE)S?SzX-`gf@zuG+e=h>gL
z^rq>y%`XZUB$g?-aKvS$%62zwezS}H`rmywdrv2Aj>;?VO+Gh6^5Gm&7d8{O-n39t
zGpD@t?fkdfx5oM(Uv}_VLgY)Q*_*UpdOW=O_*YGA{l}}@<LiI(Zn=H*@Ll;d`xt#I
z-6czQ*ZzJMANZ!~@9X#VwRL5NtGMig*b`M;g0`*RUb^>I-nOsnGIyWb)bQ%;jyEEo
zc3fG!=}pw{3%R?lbqbs~kUw>ck~8bJ9~;@ZI-NH@kttoobtUZeDw&lhx3Jx@)0;N?
z?#(|}UL<Y3nX`>|p=NhyU~2B$O|Ru%S4HOUUN-HPQ#$K{fFtKjZ^!PwJ=L|bf#p)<
z_0qYQvTP)^m$JPG?e7ms^wT`#(emQRMoklevO~+Ki^t2$7#(=~_wnJsceD2H+qd`U
ztJV7P`|C=JYHZ~C-4{<Zbn6u077}zUICC`G&gBH#Z}V2&YyS*|t{i+MzF%nmvJTfk
z!|u{;nOTW`fnkp<LQD$Hrh0MNR|Jd5g$LHlCqz8@yVZbUv#s*<>HDMR89$ihQk|t&
z-8;GZ73am9Im_ptT&}!o{rjcv?MI4&jMlzikkjc}|AtMaHTOyIHMywG>e<J)bnr^I
z|KR>rWpMe+`=>m==FeVy*kq>cL(TXnn}6Q@P;<}W8_VR7JTLj@m(w#@S3Bfi>Sf9k
z34AEzI3+Gvapu0TUkvN+Rh@r0!PV{gAr27@w{K4GJRc-XV%@(@h+%_X$5O|ib{fmR
zCBzxFOlas>rd-cl?EfZ3@b!+o*<bm%7x#oMnQ%1JO+4zS3+wwYNfY{ZzRY>PCRzQ^
z4AqHkGbeu8F~P#HZOPP*c73%o+}|>e#yLAz^Z$C&Dm}y6X2rzCviGBNT19`JcW{)S
z_Gv*-!p!$3GZZU-*Hk2y%z5wl-6AH^@!R2#eTy_cy|2`XJag;bt7ZCGdrnIjSJc+u
zcC%j3lgm}ud-XC;s{D@4;u$l=W>#Of+MNE3$JX?v+wmW3-P|pWCUjlg#UrS9Z}(qO
zP2XD@p&6kWkC)%P^*DLaJ62Z3Yu}~jvOYf$sVKzFCpee2Z}YQ&i^7xqKTdul`l&D>
zZokh${s{pWmVe1Rv?H~5Uu@ICSC_3Dl!Gi>I$iqwbRJFJE_W=u&FFNV$rY`qxouve
zyEJ-NZqeJSdEx(y5QSCq`hOa?MI2GzS{HVq>&<zwe~;8<dJY-hSO0cc`pl-x!u`@G
zICKI_C(oU%a46A(;gPTC?;M$%>YKgvR#iIPd&7Trx%12i=PK%o_HS2mudh~O>`LZ)
zRQ<f!aZ{m>%;J^<S^QVjeD)nRFynD8llgDaw;|iHUFlM8_U)40U(0@e`0z3~GHmh3
z43qh1{hojRSyE*%&&Pf7&5DwMu;kb*&y!E~omNwD%AFD>dE!>|8KEw}Imy}9&kL<y
z%ruuc9brH7d58DmxH^BYJvBi`ZBEo(QLEKz(f?uCskqrgTp?ta2Iqm}mJ=AtW5e^!
zRv$n5_UzlccMq3c*9eW-v%|v9cDB4+*yD|wTtQkI)0FQVex9tow)a|>z>M7MrlpTx
z{{DB;VM)M>*Ke{m-deXkM^>#cJY%!Xx(T(Hm@G1K%WrQB&2%_am$o3IlRu8lk$2gW
zMJ{bsmu=R}cK7ym-(C7U;o!718%?zptXf8o0}W@c*rc_}OEY|ylP2@38UE?<^*?`Q
zSZ&Mwe)xU;{hV$0>;C_J`n3CGN{mFlD|2F`-t_!?`+vW@d%OMUA*<)&`a-FyO9PLy
zx9j^oTdJ9t|L*!~sU4rY;?6vho|klE()-CC!de9vuJD`+4PM%{QO|wN*=3vGCY9Eg
z=UPXLJziJH-~IIM<HZSqVV46f3;U+;uDW<_^|riMy4#Gxj{f<O5ayS*d8X2<S;w}n
zHIrh@y}mU!dy8SJpsyiwpzpK~3m)-!Y}<XdKfk^zLci`WfBpa0_y1k9{P%MCcKbgc
zp8dUgmEZo)hnJVH-rlai@5iIl)2Hj}#mAiE<I_@cIy#Nj)xyPe;{T#2v7H;YE#0du
z6SStwelEM!D~rwVa-z*ktwK337XQ^)b~I5a`uJv-iPLo!EZDxH%_9E&`nLzCpOjGd
zH(Rz};Y@);NXPT)q?9FHyW(}*4l7=?`5fo8s8X20a!HAM&WVDD`?l-eP_Fnrzn}54
zM9{pzbKfVf+p8nAsjyb*L))*qhPUrH6Mi02xL6W&E2l<e$)$LequIPYeV(>+82Wf>
zCw58%{;G|hSn{SNv0GUpl8xo}!}s&tTl$jsi7N5!Hu9`m+Sqtm-)*Y?dd;fk7B41F
z%8{C(`blx`w@dly%$2-Tzx^o9pYQTN<F@g-v?vLu1g3&VM>lespM7I>diS!(+^P1>
zt!vkwT6Mu~a*&J27t4M7@^4?WI(;rJ`p6WQo4aie-|U@qV$zJ+vxTE&6YRH3C{DIY
ze7EhjwcN?z!$Ay^+zS~pE<H-Rq5F(mx$EV|eeXZ&)Vh7DjFVj5{@cqutW)KHVR*!e
z(<+ub9Fp=Tb)1;6Q|m8_Y1!o|mX{uLPTu<d)0`_x!Pg#tzI<Zg9P4wRgHE_?iTQRW
z^vE5dd4+W~8ywd3G}<~ytA3N8_(9}MW!#dW7mEU{ih@HoOrP{gIznGNRh?^!)AI|O
z1sWP+7Z)y=e8VI;CS6mBLH*`0BaZT>Hnm&3FV}xGi<1v`eYs2OmenJ5Wv_<00gv1R
z9RHX+)O6r<d=?pDnC_w-6eYQqJ9x^m5U=T8;x23+j9Cu7D;PchA7!?eUhVD1acs@v
z-_ndTKi*rgoJs${oJxIv%ej++vwv~f|My*?XXt!u`GV4p?n5<E^AB%f+vG8S#>bf{
zQ|+euR;|~4)-prxkm&s^{owL9&O0k-1n}Mbrdd*@rMc5!rNSe=d+eTuS|V*5{&yYL
zeYvLijBHuy=IC|PyzYu0J-$k&f7Qv9B7?bd=lJ{h`1qEyZ_cv0mKHK&qSllN<?f5F
zc`T_3m{ot%Lxy`ZvtZ$yHD6t185Gw3ah9_<p(l7G`2OKLXXke=aZp}#?w0HSg-t7r
z+ysBu%CXqx+&B99wbhANrDef^#;x0Or=NcMb@}bzw|`%)V)poa`0?Y(moIPbe);CW
zj6?37-&^#~|KiBA4C?A~5|3I~5*xeyYgkM(69ZoWqY=Zk-q>|zyK@guW0voAG6~eE
z);RXh$f9g6L&Qb9!vZ3Li<mgSSn%+f2~6TkJ^rpR{Qb7q->s`xbF-e>mUs4T^6}*M
z;tOpIEKzCEw~sXnzkGGhtS2nAU0nbF%m2UY_u9=__df4_-Hu(qKDM{dew|<Ub+!J!
zf1lobNr{m>dMQuNe(&#>_WyqTHkZG*wmkZNp~8d>!i|e``ud;k|MlwWYjO3r>sFdu
zo7&9QX!xn|?p^!on+6=JEJu0H#JoJ&eOX-n`SYe7x8J-<`@Z4ko%?ok=AT`@*f}p_
z`_|RDw|BT*-=?Fhbk#T7`X!4rTgps%pY!^0^UwAjyP9D$OJPBAST@sws_m;^&(+jc
z674*B^5gCM9wIwB7B%cn`~L3mZ2Ny-mM2;L+Fk!gKfdPc+O0K3A3t549{>MkyZpZo
z?f-xL_;~nz-S^+CSHITZ|0n7D>}Oxa<LAtuJ2h2RC--_&^CA<i6#XC1jVh1&+$n#)
z>sXxMw)%`Emm3c3T9+vu|C)<Q(@BMs-PWb7*k+ST&ZjMlK5P*ZQ0kL@6R_ne^YZ2G
zui{@XJ?qlV{C&}W`M(+m@(+2YO6u=5(cn#F5jrjv@hRe<+>h(8y9})LzxVWO-00T$
z?QrGCxy3y7F`kE>ta*O=i0jJxr$ldVTJQbtuX?0c@qd$#n{O^iJ}GhF@8bpH>z_|v
z^v3<%rK`<S7wg@xq<{2Uq*L_g#fSQP#mp>^CY_5i>eWs&`DEak^ugSuHLqJCy)kul
z`Z+Uh9mWUe_RTHe^K|jZ?6=^qo3Jp=xn`yCos%-5jx3JbBrp8BbTcpdy$B=Q-t_F%
zn{;|Fc7=G<zn6KxYu&q9+p=|{KV~+>DbCEfyjL}p<A@^9^Q$-PON;`;=LcMVUHG_q
ziqreWhhmkA>{C?)7=Q0;Wn@@Wx`cxzw#7;K$d$D}#0^9DFWm51yDQ>mOSh&D?}qua
zkFL`VQTcfMMd8NC4}TauL>HxAWNy+By;S+T-KgW{o74JlcBe}0GN^V->w9rT=O&}k
zdT+ZMy+`*y=@aIdxaSeO+`}ag1!m|5#<eoV9ZZ_SCAv<t=Tf7raG%PjR=X-?Yi?og
zPSvs(A`vMpy$>dc8X0$-ew8nfUw_OZ|J$Ler>Cn}ghD<Z_?9*A$9@mSrpxv$Z9P))
z#tCi9zv$?DJ)UHs(HXeyUZd=_jM}SDj1PxS`N*ur|HColWA@~KPmlhvi)sIS((2!y
zZvpeV1>fy<yYWBdqsPI8cWjsS@!R<Q{a?8@<%m|X-tPD3tG-=cx3BWRn;mawOS0Td
z&Uu<P)wcXv`Whw^3GNP#<)Jp!dTI3`_Klv;RIX;Qe||jeOJk#}>>^=X9<QoBbHg|M
zR`mJvV%^H!+v3)Wb*$K`bvnsr<{AEH$J^VFe?C09dGY3#A5WIpX={gyMQJbfV3=`p
z=Yi)RBE!$ry?@ZMHhx*)9rK1yZ9f*Ab#D;3FBO$=;*iSZsho$8JHJ}N5g_toi^Yi}
z-HCgi-fTRwF1zgQx=5XA;q%w!=gZHUcDksfsI082tgN6SqW5^z%*GfAqZ!NZA5#(v
z3>7^lz;t_EW|{QuYj1WmpJOzNetygU^wGoX@&s4v^f)Dmry20@$#7afb5Qus*VyMd
zA*3WONq?8eJ5wh0yX)@WSu6H_>!EG?gl?_e<gl0dl2}i%@^#xu(ur%McIUmVy1uUL
zE!XR-Roicto>{emuP?st=hx}(!IrjmbL9WMY?rUA`Tp+yzvKUR$Jt%EU-$Xd)1MxX
zAN_mw`1gFfx|;nl>ticQ3=AeuFg$wk;KA_tnvZwQ<7MP~-sQd9^F~6@W8sbfg&!BC
zzvM-q+2Sqta_J1g2EMfR|3BjY{k*>a@7WBaxHWR?*XLhb{WWd#>oqd+u`=@Wm?I7~
zxthn`?$~-mP*YtY#YIxFSIX`4;qLJD@pAKfyH0JMyyY50LhSaq_1k8DeVKCT+{#5K
z-1%ql77I?`;Fz)d+vWD}|G(|7kG_68{_nH*esOy=jtL*On0dC}&o6#Y)z_ob<NyBq
zzW@K<_x1JF)w}<FKbv<wr`%dVaZ#XY)@(QNs3RLXx)=pB7pfTS$bI_ri@?p>Q<;;l
zNB&oGcT_WA&U~9+y2#*A>ht6*R_79tO&f~!jtHpK@qO``8gI6&K2G<ias8|v`G4Oh
z2jqs#6S`!VtZVrGx9l_(K_0u~T_+Dt_`tJE{`bLxqA#}hmpz%q;_$^(BejuXCVSwf
z=OW*j&nZaxyiCqK(>X8j`C9=yrGTVQ%+Dr;$vhBq%AM=^>vyQTh2@I%Arp@#-v~bH
z^G5c-!jSGYYxeP+^t!4Oq2;Wv^n6p8U{a^>vWnvGY_nHzr+*VG5Gxa`QBhEl;ylnf
zU)$9<_Q5gdZ+niVTz31iE+C#~F~j8hSG;a-ogF1G<L0()Z|-C~@k*W&w$L_v?b_XW
z;oD22qi<JE^_$Y4sPZD<gSd-_kTO%}@o799t`>Xt{T32s4^XR_wMjs-_n_J9(r5qX
zc5!n*3)o!jz{dVM^wu4|qHIH#|4Dt{C)!zc*?Ppyso=6MFsc6}_k*)|a^?L+NymP9
zxCC<6F}|2;@;k?O!Wz9QEA<IFDi-<^gE*WIb)`QG@|;#tAyw6?xb(o}^b_4}m(=;Y
zl|F1LU0fO|vUrnDX<*jluirIqpEPIS>Qr!9yE$3y;t?@fMnz!@7WYRJa@!yL>3#5<
zLv4PJ=r$g?KaO0C%{v-7_N)*$a$cLz(3zgqY-_}jW+GU>NQiGzOY7fXOjX~V0;&s&
za#R)ft4k=kUD0P!tmxEgI%4?hJkO(QUaR-hH~X(J{;_81i<dud%P&5xxc7^|DT%;;
zN6u@V_j!FVJ7&?|wwacjZ+|j&nRDlXykA-SPv_+S_21P?{%&LaINj%HL5A-~(c%V1
z2foPH>p2%GMwqVibk5Te{K+B1^-NviWbJFkI7^?^wcK*k#FlU}=BBURdOP;-j+mIc
zvHj|ofBt#$>B-UW@io7nW*Dh2RboH8f7ikd-`?1#J&#IcXYf)i_Tgk~brH(bV4vsz
z%EodQ%Z$#@``cOOMm^&Cme90@{q(ucf-?d}Tpf>>-P3#b{(as%xp(s6v)4|rw6n9c
z+iPKCH}Bjs*U+l(HBY`g4i;_NztA(;RBMWx;Om^*X3@9v%6H$*E3e-lr+@8r+ug%w
z&mOPp-TW~l#=z_OoYd3Xl3xcjPvBS-Q8qVuTiwx9dS8?n3*4Wq-o&w=m8DxJ@B5BD
z)yi*5Hp=)qh;x}T3UwrDSj}6ryJ>sY&bMK=wf9YaJ^R#-Z-ra$l!|q4l=MGak+f`b
z(UR_y8Fd!6^Zf7ctKR?X%gghzcXQ<W(iUHR^zGfdWqGd{Hhnv57Z@3m8aVIFv-RiV
z=g7^UB6Z~MHs9xq82N<frp!Ot#BjpV>w#&L%ZlZjf7btP|Nra%&*%T||2!%w=<l^w
zraotGZl#Hph3(!s@^fyKC0}V;z`$_eP-R}Dk;<CVrCY9csj;`WtEYcneAy{8#eSEF
z!1Zl!^S0-{J{B4M-fWe@lC!~%F+1{f%eTeeiTk_0{_k}8`d=GoozsikziyfFg*FGl
z10LDI(>6E%y!i3upAS#s|9!3BSNr+ddHX**Zs&z{AAi5^_q(U7!<QMHdSP*tXVdR=
zPNC`&=brO_?mT~{I`vGz^|f31j4wIV@C7mHs08h>|0S~0VpE#7nTtq!X~2{F{GY7t
zPXCvkv~R(7t#1!sE?m%aY{!o;3-+z~&Sn_@vVC`bNAFAb{bwdE(EF`?Ks`mKLMm^b
z)>8@HDa-Cp`cx8R5~%OnAun}%gNYos>~@7FX&Z%0(w!T3E#24ArJ6j&-*Jk^FZ~H?
zx)SSOKAJkiV!7s#rR(K7_e5WL|3#_yjN&Sf*w?Ol_Z~mF*b|ra?ojQ^_l^Q(8znpJ
zk~^OT^V|#*+ThB#ZN`H4b?eSMPMoo~f1<~cH#G&On>#ERg%UVEnR2uk>#u(||NY)K
z9!@&~+;(^BxWs2iO<eZc<;KSB-OJwI3XGoGYBtMe;^yLQ@7}D-oV_e}_X6&v27#Wr
zoGQBN#|p~&-|)D;J)ffEAXd67d`rb@r}-Z98^k9Z736Mz63yD6w})F~`Hcl0Rr2Qq
z-n(@899-wn;%GZ_Pqn~meV+4GN+R~cixnfn=1nOw>S=UN<oIyh&PT2A=t6&iQwNrN
zRxA5w?@4(p@#LuIzi^{6VeX?F;@A6s-OiLcW5o~E9rq6>tl7YNg8PPJW>Bl)R84o+
z$QOq+f|opsdug!W-9A@cK0xQhnMV^iV>jeTC$gXOOqV>lu-s>{LRZj?o7)*VRr;QH
zGKennx?I)yF?{b5tt$p1?HodZi}$k>C_I$;=)fnKHZ7r~<ecSkE0!EK#f&|lY)^1E
zuWOj_K>S7VrX^d{Q*GZgD%3AqRv*^d|NHz&yH!eSlD}8moD8%O5MDX&+)LqK3ho_=
z`TxCMt-t5S5GbaS87S2}IZ)t3KL5L~=DbI@N4U({keu%9{OyaOxQecYN2#YZ4@aV;
z9Gkz#$0sj}MgBCabFcasvTeh{G68Q9p=s+f=WgAd`}fC=eY>M=)@(n$?dP|rr#D|*
z!8Bo8vOUjN4p}FGwCtmn0(_Ct7ccR4H9fU`VR@Lt-fx}IIj^r8wjwNEv#vGvnN&}9
zQ8D3Dc(5b)R@Lsk`}gkNx2tMTd_>#FlCq+@!kQvG+Zp1iOMMnZB(8rw(ROlT#5^Ib
zrM`(%l;-Q~xtnKx`|Xk??Z0o!*Zq2Uod11T?rqNUzQh<wSB5n=uYFyY%^<?L(1~T~
z5iT<^2A%SS7LPZis(f;8y!TCBZ4=MgqT=5lgM+sp7dM(4+R*#OcSTv*&5~nVm%Yrn
zduNTk>HFtVn}3&7Z_bqpmGxOByq0%@Vykj;*Q!}()1Id<&%gJlqPG5b`|;NoSFk*~
zx=k%K<mrwNj%hsXk6#|lQPW|+eDmYMj|VIKE@-Jd&)E~UsWJP>(UfADevc1NZ8L-C
z&71fC+3)?opV$9;etrG^HCoCSG&gxARPJ|a*d*o4$Jcd{V}bJ0+DS9rCru7wSaf89
z*(nj$nVV*bT)i<xv^BPGfrW^QOS*1!`t5aVXBS-&^K!jcu;UBEg4eU&rfrVX6XX9b
zfA8Oy{Quwf=gG<YuL{&wiel7!Sl~W;kwUWGRLA6!$R72}+wK2;`FHsE)uPJ!^8c&j
z@9wWCto--LH8S+n<G;UGu$>A>`cd*o#!JJ0A9wQq53ak<<z1V-?)9AF*z27;G8S>M
zZWq6$?y``-{^F_ZJC<xKy(+|PB{>hrd0akkaAuzU2iDCu7x}+c)HhPzzvkni4AHI{
z9*xc|wZ27DQc}*VU7vsVhti@G=g+toi6*aOl;|^foc`%^e#3;_qCZ;wg^n&RP*Uf2
zoX;?4CQDO;vfbXuOFo$oZoG2dQstDqjcZzr`~CA*mOlT_^40a6^y}@i2Vc!~OJI4>
zmX~s5PQ<+HFErZ>oAenk7z($`PV~qxf4B9|U+a#OT?Z!iwWu)o6w0pLb6S)=Y5qM)
z^A9TREavs6f9<}$X>y;dg<8QD*Tf#qfBL7_ZQb@V=kl#}#c%%P_<Y^8<IlUvYZ8vj
zG|EqT_+jRQ*$;o@MLb>>Y0hsS{mouRMqNUw{2*(BS|X3a1RGC{sy!1T_SD472q}7(
z6t;`|{rc*2#l!yk-qV*eb@^-*h34;^`M&D@y%~0e^0R->d}GNulj){vmn4VT&7hPS
z{Bs|htW>Y{+>)?NM{xe~HkTt0*k^bw5LtC-N@!s3sa0C5oW$FD0#CW5ul-had`7jr
z@Sz(U1&o-wm@XdRn<^A^j{mmJ_B;CPem_y-sgibZ<2o0m@8Qz4;6`BGr;Gl{%sjT{
zg({~@s#ZNScj3u9#PLBW<c!l1rYRAx=5CF$ej2gy$Nv{|l#ia_i~PAfIODzGL3f}3
zdp@rcm*$(?HeX5S|Ei@Q7?w{d^Ek=S$MAv2B-qI4V?(*7ZNj9-nW9gvJQwJ5u8=xr
zwRfAzY0C+X>sfwH(ek=-^+%z_gFg=@i+=j&td$XTNc!-bIgd3iP5dk9^hZWye(w2_
zI!2w96K*WEUH66Qgos+_-jCX0b49#sL>RXzU9-Bb^K@fvdilD$+it&)-(#V#?s}YW
z!X8G^32U!!vYZsquaFrcWwMYjviX7gKZO~4W_l?0s7#*ub@RhHdbcJwv1MIY&?@|V
z*Secmx{n`y^zY%X%V*Q}hVHVlvb3|awzssNdxmd|W<YDh<a@<8gb(~#?2_u`cqJ-3
zHe;>e!CPg!Z(m)jDs}eQ<i{_6OkU{E&%e5F;hL_CHzaaJ1ST_OH$S^7sHNz_-uRp0
zK<?=`Ygm~S);`wjy(#=$aBrcMY@Xb9^Y>=4#~){=K3~*h5D}!fM@o>fXF=i{iD})(
z@A~I&aM-nTe}3-rtx~<KoU|Q|daQ_;I4e6?Y}J)DOMdp-|NnVhUjFQgHOE3jeOG0z
z>D8MeH}lhz0z17^SF?Cmc!l<!y|Jy2g>zGQFT1K3Gt-JEn%{DiRRWI5%%Au7ul=9v
z^?&sLe>wX4`r?lg7X(+GtjLkJUg;K<xwKZPqg)`-OY-}|W~CkvugqMpwOmVk7>X8n
zRM!{^O)y{6vnp`m7N?}=EwWnm?#7dDth<|M{=Iy=ynWq{Umrd`dv$sF>i+V6{<(*~
zy?bY0_vhoMtJkNChdR1Wa8Te#GL)Me6H~*!py%SPJwG3we|`1U`TAd<KYjf8^|k)~
zzjdb>j;kMcjdap^{r9)z@BfbNa*F<c?}Q#np4IewRqpP$dCwn4o-3_uIn#7({pKUb
zw<<Jv|1|1Yu*l|^A#>uK84DjcFthx>Sud0(Sl!L2QseT^U*$Q^lw~`=U0NS_;Pm?H
zTMArtVhiSWa;nrnI&z4EwJr1eqz!$HC#L7rT$=squ~oFe+C;Wf5hr{180M)6WNlK}
zzJlT7L-xSrZrfma_N+%D2{YNt__H>J)OfFBKD)nhLGatdhN{zdB>zZmjCtpssnb34
zfc&p2Lz}aaEf#+s&-r>|pPi28?_~>~e?Ibn>3*+42h)TJB20<AdVdv)Pdt`ZVQHV<
z!eBY2iAm=|)bT@pG1<9^Q!aT0TI~8;cO`0@Ugpl7@jDarB|EbgZh707AV2j^%bz*k
z){5so*==H9leXFZ=I*S+8U`wC4*ge?-b^;Q%sNpboaG3EgG|it2AwA^3@47eq|~<5
z%=7t|!>DHQ!OHvG;+i}m{bu1K-G3|3t5~^gZF`<MztyyXpX<xzUyb?p5<&)f6AS!1
zWOL(B{(Zc)VBZQeuj$7^Tq8wOGfz25I=gA7dSowRh*Nssz{<k4N<6LTTq84M*a0sO
zPEGwyA75Fq38?7zD}L0TEzrQ-z;Z$&cH6}H!akFJp4OVl#%X*tWJO_XttMBa`mq<=
z1Mbajk!wGALc)jRgh$Rz&Es=RZ$&;`lXm>jHOr?mk+aRXJFZQBzgIQ<|It-{GQR{1
ztvtQY_^W=+r1a;eGrtLbeltaG5_9$Cl{rdNG9?rz6bo@SI9BPT`JewHc-Q6cL(w&Q
zEayvK&q%d%nVDkp#)h*&E@GyEylcC|7m3ZH?cWdSrS5qXzJ12krI(_AJmOPWr8hZB
z;nuqco^Ihc!j~<q2zi^brQIU;y_I14+u!R-&#t?j`)c}nzS9#q8t!K1dL^5*J~-gy
zk(jV;Qc%N@u04sdHv|<YJ3Fku{?LQPVX5QS<##7nsYvolRBxWQU4nB%iSPCpop<Z<
z^Vj8vuijc}Gw-awU;Lao^L+Wz7JrP8oLoKgd))0aO(zW}tE8AnC2uigNXXrO``xS9
z?yS0^8nbIAdR}iIl!sq0We-}g{K2H4Q<iU)WTa#T7foPVx`j<-x8Us(KBaq3NebMo
zuY2N?7;db)T{c(QNa2P_itB|Ay%Q4emC}yziHfUAr_T2M{qk||v80`OI?f9dY_ztx
zecF?*z?kHEwWOx1@}9}+)vwF*?QHJZZ=3w{PeNTuNlB4~{j9#@?3Z7DOj&dDE@*Q{
zZusn~on?8~&2qOGR7whNdOSs3Wb^ailNtji8cja=W_JGm-|h83^w-Djm-1h}qN7zX
zKs$8mQl+JWCJQ&`Uf(MBbJCB5St3fV4dzn0CK=Za%XR#0>iHK79Bo%V|E%48`SN8t
z6Ihlghj^Sxx}8>D`}ZYiEqL9pmw#_>FE{@_`)k^D6TU|8w`E^vf0w_%=eNOZx%10s
z1^m2stMcxaYggYq`0^?Bc%Ic<z4!08uRdC}^Vj40KgSOz6&9EOKU;o3HfG-pzw})O
zw;rE9vHpSOfkTUb&24nFJiXRJ@k$rhAt$FQR*r<*X1>A29X%dSQarae%!_RKyQ$&O
zmq-T-x2b%}xd|K=r?%Gl_s3P%y_x>7Zt{y|;rFTobJkya`Ts$rOOe()OC@(Bnad6@
zr}8py{>#p%wqyq5q(?uZo)mM<|1tB|R<)oxhh3-ewn?@)%s<i091#;2?_Op3twcHg
zf%+HwcQ5-4mP&C7P6`h^Zu8m3)w}J$0f|TKiK~_WexLcJ>(c6oY6;JL>AS_fitoI)
zPFkdzbzn~Nq77+seCew9XZ?8R!DM#xXbN*!T>KK|8@fd?to7So`^*ex%$IDLn<QI3
z-!q%p*vxWj<f0X&4X-^<$((7`mvCXaw}?%1J<rFV@3nkSegAZ9L;l0h4Votc-&H@E
zFyZQJ-ej+01IP4hJAI5CJU2QttT<?S;X!(%#D}Um3u5-N3Vaq)?%QYl+hd#ZB%66m
ziT&%o-ZYgzR3P%#m}A<#KezYCZQ9OyJO1K#A*Yi~Z&dsD@Tf~H4sTM*Qdu05=#%M_
z*|l%qcZV|*n3gWmnkR6=TD;Zrfm-XF_1-5`zpwi7P_%XW%s+g`;tz{VUq5kXGnbOs
zW&S$V*9R4UB_?gRh^beoF5by+AjYPc;FuyJ7%=bF+iGbwwF0FRO**G6i^58$g}8@l
zubwz1tvBbOq9Etw2}0ZMJy25Ee<%KTi>9Sd)Zf|1+>|uu>dMMb*ueJt^ztn!KFyW-
zo;=T%D=htH>1^1ZvP0PK(h7-m@tcthXWrc25i!eK?4h_>e&9~a2`ygr`|W-@HNQLF
z9$Xa=v*CJF;<*De{QLGjl<l-IVLR$No4sha+~WC~v26;s&wP21%sExwiPN&|An)-r
zbMLMVlV5vxU06B)_xQNDrpe3JIWAeXs?c<n-?7VCCZ?{LJ_}7I$tNsUlV@dd;NtLH
zEg-|&l6gzWgFA7K!ucZ~Jj_;bq+4=!J>IrX{=NKqySTeC_w%kvr=ICP9e#ZI^UsGT
zUrfpBV$yJVylloEo6qVmBoEhjxv#fKbazp-7Hph*`(4@X+_O!ohEnJIxf;I}mVVyx
zbH~jtQ30h;h7FoY+5Jy)xKDM5hX!T@-4MLOk>n~Mvw6CDvX-q=8{;O6;1`>_Z=Rn1
z^xz%Eg1xU}&K|wdGr@yXZ~|jVOsK&skDSd>IgWR>mGuP*X0$5kRo?m&d5&q<rOA<%
z6&AC6GIp;hH_ceQd@}Reu8$RVvt`d6b3MuTYFq5O>*m|}**tdNEz6E|@b=t)l5cU_
zmyqOsA?2l8G`n7XdGjZH|F3^fSBF17d82&ZL>(uWQ)PEoXr;3Umz_=9ti-~o#`+{6
zbM2qNBZvALT^T!5uSXUBd&T$9MoH+|vbPciR`xblwxP2eIE9$Iir;4K{qg1FU;g@k
zm;LSc?fg~nwV-_e-D_tUyf%mNteASw^zL#0`EmPy6&cObP}%QUenz(Vb|m|P)t|mR
zdi3VWlr^{4PLsd4ulDoh+2#57WAESD_y5Dk+3)xLe)M|1-+4dx#}_5MV+9nLBN$I#
zs#N3L{5ocHtiJs8^4RR&b!v-$=G2(T%8P{vXfa&fxollF<3f&;oZ)PNi`EpJ;5zqg
zPK(Qn2H9$jM+sjh^n{<6JyAVr!kPB#{Yt94Ym|;!UVg6NxZtan&sW=t`H_Y3j|G@=
zm|x8^4i!Ej*ZlcqSMa=#g(n-d%@*7}A-p(mz4Mo@4~jFCrex_T{Cz6>{dgh&oV_Yd
zou_ANJ$Bgc?Z3KCcBg)~g3{6hodOd#2q+f2yxm}<X>1(%V)J_DZ(bcQE-E{8GWhab
z4BGhXeS6@|Mb<Bl?s+@&=tgI^pd%OaTIU@UJa$)OqK?$HJc-r^4G)S>$tWvmeAzT<
zLC>dE-#55U4B200<UZ$tUFib$y?q-DqKg_&iF|rgRlWVyYzdi~RS!j5CQeg3oT*s1
zZpGA^FL(OyELstCVdLTFr{=|MoD<YLlTkp{u=SzNvdB#T`}}ry1#;V&I=W`oO!D8-
z#NryfLn%|>Xy=cZuz(*PGJPL5Ynwhk|NCEJz5a#z{Q6468_kUlAM{O`Z4*6>G$Z4K
zzTK*I`J^rNX<ix2;uQhA3~puQZZSJ05h`+OMZ`7fr8O_6+8@4j@!&be1st6df}FgR
z%jPk^b~@F6T{7W9Wa5rbhmFh<Ui`k#u<*x@!dKS~g-UYvmMSexW^Y{0{)?M=0<)Wf
zo8z+w^6&PxD@GZM{O_~g;^A^dDX@WgA=ln~jw7l;vu-L08Z-RWk^lVC=U?Pg+kfgU
zFa4*xJp5bn?)9>(a@^m4+xZ1<RXn97`gD$;QT)m8LNXPv#12kTp7E8T<L3s=Gz;!$
z;ZH7Ctlsn4vwFppRVU^dNZy>GbAGb<{Llr<rslK!H)dq}F4b^}YtLngply2ZR>u}!
zh_xv+WGZE~ncd~M;@+EEyCQAw{r~dp_{)klThm@MT#zuH?U#1><iQdvY5!wSJp?(q
z6*Sj&Z_-+QN@SVQvVbT~O@+l<f^PBW7~Zk<dEval;LM65t=Zpu44FB&n7wb`dYe{V
zUR_>XTV4J=LAQT7d%Jsky0|!d`|+n4i+VYI%{(snrd(?dP_Oj5cHsAW`OkBfto3;C
zGH31E8plPRp<ex-{c{d%+qPn=p;z`&&y`Hw39B;&C*)p9%vd{BiOF@_yPV?(IAnCb
z-8s#6NHIF(=*y!QkFE~1ym#g9x;XQ-$$@LQ0;M=Sl_woqbYxY4RKnIfk!`WeJ*}5g
zcqZ|`P+rfI%2;;i>fLRVb1kQy*5`kgdzI77I&OX5ddE)ZwWl{KwdPFFa=rca)33W>
zJa_Ki&3_%m&~ak-;@bb+O8@?T7GLMTCD2Pc_#7W!{N8_`K7D#xVrAI+@qY99;*`c+
zD`#)BoFgwYUrunx)lc&{cCQmFa9PA5IQ9HFM)pgb+qY@CT`mgIxZ9t@xz+IQx_ehm
zh5Y!_7N6uWWh!aP+a4QR`|HEAv-9i!d~BEh_vQK3$E()fURSEHV5?a2p3^ri<JL~Q
zzTN!v)mM+c%;}u6e|f~5;%iaIeY`>+fApCBGb2Qy_UqGdeZBu**8hK9|M$zYx9|V`
zEq`xc_vh8$-TC`|fBST``*Zi?myWHwHs{^md3IY|?(5RmYf59&XMg=xxH)=D@8yt?
zr#W`EwpJGFyC%9BujG3tZ=Y+HePqXlk4!l(E?2EtnAxlwjP}N_iO=WVcCR@@b)q`6
zWo1-92j9FSwJU^<dfm9c<{?K<sSWE#TdhxtdLrNdH6~8Fw1c5~m1H+V%{I1^FYf%Y
zj7fjDc%mcY#FwZ3UQ%3=_HD`G1(q+rpB0kmJ<w^)uc6d4vnzgHrv0y@$rdO76o+Xp
zGFd6VBzx%wFBdh}I;OYx%Mv9DYUWk`coe+D!%WEkoz=;krv(1o6u2bV(iB>#zfEgy
z&zU14FO@Q6IU2&Q6!WS+H9Q_+_|AI%1c~)03Y9E7=f?<qZA=Y(+Va=nUFU2o-xrUb
z-8ui<>5k)yiaomxAL&2V(ObL1@RtPhhkKKY=dFHXzuL|1K>Abtjb~UVB{V*8*?Z<j
zp8>}np8K;u`=`2UI4Lx7eP7-(Pf^{eiOJ=KOq*@-j~uO|XN6kDHPaq?$`{UmBRNAX
zy!wRGNrzgaB^*x$<!t-yO_QB}`fa-UR<!Yx`IXv*JNlCk)bVNx#y@^OL#4|>^GcND
zfA5z9(W`qDeLbRDbt3-nf5?=WSf;UH0>fO6MM(<k4LXk+EZ?2J;6A%y@fIFON3LCa
z3%si4v~cA-`SB`w{*{?iEF@jsf({>OIN{Lnz(L^R(h@@j)fbtf|4xakGaZ`tSLK4|
zbxsy5jikvJ68<eK_R=hKQps^-G<XrOw(4MdbJ={^yY=g|f9TZuemgMZ&z8ThJ5OqI
z7>a&$oG9EpU17~Ti`{E~ANp&&_C&C<<39VYzF!B$ey#kgu)mY7<(W*mr1=XSp@`)3
zcLkJ~rZ_#QF7@qd`p)IOe`P{hqR%SP2Bw5U9p<h6{nalrZ6YgFgtQED=5E#F3uh3R
z>v8(eg**E4@|}kqy<DQSb4x;dk3GJ)LPqn8v-_1}GZtyBUL_U!_+o~Y)Oj^__Q}mJ
zGmPen&Ds)nN?@Xa75kw?X2wQsaerR7{|t%kXU^TK3b(h@sY%^`^Uah?$JN!<)6+j6
z{+Ux3(&x6s$3&uKg}gl*r+an7{h|rnPwEO)Tncke?v#*cZ@6}=ZKp7Yd3d<_?yJjU
z^X?SgzS*F<N=-R+ic!}Tr=<)h3$@?uDdFktY8K3RvrYHvG;22POI9N8d;)uZ$O*7?
zPBGkd<>`tQYGP71udKDmmNi@JujaaRMZhkdj1aM@*Z6aTx1Z}e=G5!9_~VL8Qi=<r
zBpO`TOy2f5mN&ch!M+)CqM=tKckYg=ui9U;XaB10zUfOVDj63GHM%|K+<x})qr$5>
zc6#rw&(8G`N^@XLW4RV3xs>5TTZgD>$1#!1H~*|&zwhVQ_4UOyMQcP@pGy2lzPvTI
zy14v$)bCeUpMHE9czbPF_v~wCtGB((VO|j_W2w%R#A<Ek^Z4VzqaSZReR*`p8Vzep
z!3D2xy)A3*c)Egzh55+Utgnyz>wg`VulxS&Y`?kr_Wtku+g{EQO}8w*w}9csy4PW+
zfA8LXHZ3<+J?eLn;nK+(BCaY+wi-UoFwtE7F(s(ieSwL_-{<o6Kfb(teE8AB$IHL3
z-&gVT==AC8>Bg~<+OKoV_utcv+P?XX4(KYqtk+TLtqz)6>)xwBeiabBa)HFln~#^D
z&D!g;EZC_?ZmHMaf67*=Dw*F-JYVC!z{aICdD-O&e{*;LS-R0tW=Wu&c=x2aRze)_
zCHm#LBD)z~h5c9B@u<WfX5{$XQ1&Nhs{T&q*OqhoYS_-r=g@V2tN-R|%-#yS^Jf-q
z2<}~D_AnsvnHJ;!9xl5H4$ZzgvQ7sMHY;%3Og!?~RY9ptMyr}Xs`F@`-^_z9Q)B{e
zFt1QO%*d<Rw|9Z@&7PXbw%W7PJ@*;!6R5CeH=1l<>?ZuSVf7pBJxh*tO>g(Ory{_{
z$+hUY#qSB8NA`Z(^nHm<@0Z?5_D=--U!UAF>C(g|B~jK6#?ZXp6XF_&3+s$-oeT*1
z^W~=QdAmD>Gd_0iT`KjP<*AQY>rPjGV@@t^<^@VGD{s8y;<R5F-uWZO-KlGf(mAes
z0_}oHh8ipS4z0C15kKMIEtx0#mfdbL;?hdpy`k=L^GQ(|`;zyjE<euql(R25WK(0s
z-__mi691?<Xn|;!uiNBnBCBdHJ@oWh5<0`?M>E%v=~8t<uTl*a^Urr$h&^`~DD-%5
z+dt>c(ThpKQEpu;x6Fu<Qmp+EsTuVzO4Zu#$StKK9^6_E_cyQ^Gbl1DvsZV%w^@|9
z_H*}V50!5%K8i|9r)*vO|CouA!XKwvKeo2}|Mz{&S9pG4-s_tGvTsETCb7QYdNS>1
z&K&tY|0k`Dbp2=k<IKGIs;U2G%wOv0GBxA9xVN%ky^;|-*TczI!<pEgbGN+M#Ct)U
zUm%U`evMTci^@_@VQ;yB>EFeFzq)YXgnED*Ba@Z*PoFpGKYdsf&PBO626<^7kIiP3
zzV+tS@1stcT|3NGE-mRbi^|kGl@-#fKDoKMdGX<&52vr+f5qWA`{%<SQ(U41gBGM0
zS$$JJXW#P8BYpR_SpDne+rMAGZvLI0`*n)VJem1(=IpVUFE1Oq_|z0pP0^H|7cYe~
zmp=NVo}Pa3`gc3S8}2WfoD>yQj~rUNS$2Z}6O(e(v}1+3mwxJ<DSMl?GcP>)R_UX%
zt)K5@tm9Kljhqn3HeqfkPoL~&Uj2P;zN!*Vk9QPaGR*W|Te?=5ebxyLR*p4ugje3q
z3pUSs(jYQPaKbqy6PXPi4Q(ZfXAYJat(1Cjtc<}+GqlHja`VkKCTxah6=qK8lo4HL
z(QtUtq99+lsU<}=#>u|#>(;!Nm%I1ZW!BAkbI*7!RevnG>(<rs?%iLrVz;kNEjpKO
zz@;G0dUpBbjVos@yPcQ)W=AsP+GUq-n#b3F_W%EOf9?0T+b^G#IDBg&tIX}NBX7%Y
zzn(1}P*PV|R$>vUSKj^f)x(d^7H*VgkMQBTqQNGU74SIAO1f9i@7%fm^L>1L%NL(I
zZmHPt+KTnUmbZD~+G~9hB_agZZC^eA-oFo^M$NyM{q;2!6?bx9Z%a-{I<BPB;dJEV
zCWf@KO_6KGyIo_=7ccs<gWoknM6_#-lDCRicJPd>kPu}7!A0Tmb&<E*Cf5IYdA$92
z`*!=k8{+QXzh7@*Ip6Qug5ztKhMtyg*Ht#&ko-KxYpa9ys<+dktJ!lVc;9>Zv~+va
z`#Ic;toV0?teD~?_;Kszjtx5)tiC+F@Z@Wjq9yY$&+z>dd&K6<d9Azi?CY1tJNa+(
zR!!2Fuk6V&cS7Fu6icpVRtu|~$t!uPQ!D43Jfd!XOzG>-Ne*>-ZmN?~lv<fSbhaw^
zb{oluL`>zE@ofxd+<1mb=1AGHl8%<1Ng+Y0jj}ght2eXqfA=WZX%KYDU6^UP0ndxi
zEgRUMsm#p!AGdU85rasv{zI0KjXNK5H2EwkmU-rz9(<FfT|s`EhRd3c8LBHj|8m+|
zpDCg!EX00gcg078UsGIpcGbAJsu<c%`nD|LVE(jDciX2NpNm$i|6peLey({&;G3II
z6g-4@MVTdd_;~m1J*5<O@l$cZAxEvJ^Qv@ieXEk+!Sm6iy1%efXnMq=BVjQDpLT0U
z9C{=n?ieAXxKQYkN=35g<y{Ys2YB2QdC2SK!5G85moI&N$?0!iwF+~}j~jf+_x*C{
zN5kXu-m_BfKNMy?_oLYPZhHQ^m{a-|IkC*8aVIVpi)6HVEeu;BD!G<x>EWn}B9$k7
zxYqa1IjUj&)wsGq$?l-KV*3OYm4J7N8x7`PR67{b^WoqYh2|%=2Ta87iO+2R-F4{d
z83qk5R)?S454PW^60xeV+!rqq`K`dI{`#-p#D+^uDJR!5EE1GaQhc?4)e+e*xi?e)
zUcYwf{JD#%D~@`9-10HTpEGXed9wtGo$9YtF1>$#;QWuC<-czR$$kIv-RR4Hwz9^Q
zqW8<~XQ{CBy8UZzIcdB51?#ir49_Zf9ji`#<FOO`7Pjz9gF1syTABmvl8XwKr<EF$
z=NwH<DT`N9`t(SxWZv9fFYV_tDO2vp-4ybA_Mq2=_0So+Wu|t=ghN;aJ47_smL#4E
zG@Z4t`uoOJZt?s6J?cI^{rTs^&6^w?S-ATgEj$XFd>dVFly19u_4M(hXTQGvx_tK4
zD(_n|va)mM%$1uzSLO_V*D95zo{lDQ$J$eqbR09}4&0tGmpQE<CzhRqi<e0;Du5y3
zW|YvyI7UUKB<t%2B@G;ptZwJNioI>7;j{K_L|NbMb;V~23@*Acb{Mf5DMUZlm?NPS
z(V)bVlrFQKkE?a=vSW=?ADkC-49ZE|{p_n(0JpM<U-bH{7Y7<G7+6C@c9|Sa@w!^!
zYxuXxBlKy7%`B}{ftXF13LA_fI%ckld?*pJ+_TfQR_WASsR->Id-q50sn5>cz1PCp
zcBa3ZyL++j>9XLwuq|tkTC&JZZ4JuUoo6Q-KW~2cs)`-)JI<A@V^)go)sNrzXZ`;l
z{rmrY&F6dm`H+Nb-QpKF%&wU|y7TGd$A9k}r$7E!5fsXLDrK*v?zC;Wa^}9;XDd>6
z2YO!q^x(;ZA31jRb`~?v^Qk@dT<GnkVa=ldB=7dJ%-26N;v`%ToZa^CqksLs_x~U7
z|NpUn|Nh;(-&;LjldPVz_Y1G9;&1L%-xr_G+r4*p^^KhEpKmrEQ7Bw`vO}}Q`HhI8
zmeS(ZnVes@SnaZ@J{ujr<;S*Y8ymf8<+<DA)~l<hU%l9U%BNw<CY@=~?0yq2A82Iy
z)i*ankonBq(9-PP>k9SEe$4ZC?h1%3Jat*JN=H_>DX8&$(%R(p``f?x`0a_83|PH+
z_WpjcgE>VVw#(J-KTB3?dwE`k$wl#eUq@oJ$jW(>3g=zkW7OBG@KI)IdfW2%1svyY
zal1^(Ija3L<xyl@{G*p_eo=o|8ii!fzY4KAao*GHSA*7sd6F|0F&N}#cCJ{z>r?v8
z1JVLv-80;I6He?&^ISFIUdB|mh$&apYugns9&8U>AR}gTL%ldruk2j99drBEem})T
zw&OlNFBaa`+LZR_!!dpPz3k3YHk{u$wRZKo{Uw674>eEzH+;AuIZ~yTPiPXq=aGZk
zdH!y(_g~)kah=^f?dn<H$0E<X{^{{cmf_QHjjJ(J<}chhpTDkq_r&u|W`gr(d@qWv
zJE6y+%af$X*;T$+-@|iD`Qr{Dw=}i{p;KLxHT=0YudR4^MB;1R!pSGMPcpPFe|q$G
z)2~18A3WpAQ2xJOGfI88L!kugq^VN%TLaV|M*cp@`s{AiocqFzD+8YXuK%idYt|Nf
zC+CaZ6Zal-Jfh#Ab3jRN#y`bf&3`=Sva+z*=T6c+rX}*2OW7o2QsWc@(+!?Fh6UDN
zU9&S1B089I#9SOgxDGEhn41uPhr82Y<EOnA9Sbz1i&#Y$1wFZ(&e3Raz)(=&YJBAV
z|9c*PTmN_Wr<|?x-&_&;ZaRm{gHz6PqR$rLdyCIZ{QW0)dg~g``P+38^DZq``JQm-
z_xZjn)81P5e0TpTSk-VaOfNH1@{5v8imSis=7YD~<m_(Puli-TDt^|{oqQ9ISaxU$
zT<{Ng<mWE%XV0qseXjn3NoneS3^f-vvRuu7HEGl1X-AhgEM(fBeQw3%$bdb?eMc8E
zzif!Qb45GK{+|7eMZ0$H-d$bs=iTb{`)%y)SDiYvI%27Ff`Ge>ne?{TX0y*8zI*uX
z*Sml3{#}=MI;*Cps_fsRnz|Z`Sv@|99RU-Lmdd$yPkOT6F8sprUu|E?3VY5~##&j3
z7l+xb4~Wo->{e=<;>1#Tt!!>?gwCSGhugAWuX{U(RgAS~T5kI6b(_!K+$QbozAR)(
z&8n@RQUe6kET+%fvcg4NB~1J9iwP1JjI0=Y*EOG=6yE=+!_hnUqSwRi9p@U?1`3CH
zg|E`ljtx}}oYTyeWz=Qz%hKLRfir?Jt<YhCZ)aN8&Q$kD0#j$5`|z}T=c84NX77Ib
z>u)=M>Fq^TrG;kl^78Yy+@4(Zd&AAE>t>!^HF@#kb7iNWKK_$XRaDZ^C(hr0ppU`r
z>WeR*KFR<4CtvsX?|OfKzb%(q98})a1ogVUyS1&drshV@_UE4`Z(7B**xR=B-IhH0
zcV+$MuS0L=+_dVwdh+P${P?|A))sR@rW&?>+SvT?nfQM9ga^CsT0YDQ(saK4_SxOv
z=KtUS|NQ^YJO2OwHdJi5_A`-#*?>uT<NR+5m6vT3PxxiudiU*L-o~i?6^$#lhW}P<
za5Fiu>$30dlGw{7zKOkC%kEz5jaZpc5;XVIoU5IBvD;q<oY-2ndsXSdWu+mOhWq}C
zD!erAc65l5xg#*CWB;AGrzia|SY|J5F`Y|R-{7-pLr~pSnI&nA^O|?2*`D9vqbN6X
zp=sDEK`w&_1?m4;=SOO=KTq3qH7M-<yTex=+_WwAx17x5)pp*>L-L#Aftx$$+G;0y
zgvc|MZ&FiRr~XS!UHx|Z&7YtBdvbOOtz@|OQPL(i(bJFf>%qO8X145mI5gB|v?=f~
zE;`UHnefsm@*6|Xh3O(Qnu6G+vYZ~=bDhQ{!G6*}@#9weTg+E4F<VK@Np?yRQq_DO
zoitmvfwj)4@d>wJk98?qn?<FNLV=Q?;;id6g^PvWUD0ufC~A69@L6E#pTGa2B_fqV
z3p6q&DJ*zkv1h5==kA$48mp3&_PS2u;?iLJprCyIYvI~Swso%GmhLq=aP-*8<ebXn
z@JV~#W||sH<bR)|n$jXs*3MA*Y4e-yiZ_ze6ZF>^-}g{RagsZ!Hp|g&&Qyns`vqAN
zg@QVtJYQmIp)oV%h0lpQs^|MEBOAUY9v1Nn;S`tqy1&!8Hpol!lAY(>%>|3n!jp6u
zH*B}BitM|8cymF_Vpi{po^!srg6f++FSLK%J2S;_`6Ts1uNgj$atkiL5B?jU{^-Ek
z1Xi5|E*?LXd^jcwdbZ5y>}Z)WG4z69>AK5(YnJ*=d?&*CROr{!R;8yC1pWzst@XS4
zD>Z1&`M(E^Pj51rCC?z~P?L})map1pIm?$ltiNUH`_&%~9^{v_PVY$ieOhR4oXO-*
zC7Z?OI-fhZ;nRClD+M8aBYn1Nwf1B6jIB#JMHeugxpZKW5NErt$Bf#=Z1L}Zo)Tl`
zygGZY?e@q$muJa#cia2y`pgkr*0#Lz$#2_4N2juh6W+cq7P#Uf*D%Mdvro`tjcbX+
z+U#=c167;jbj40T&H8)RJbr&gVVQxXpoU=5@*_;$uXAp1S$*}@w{P#BU4C1%)3Igo
z;>C|Ie}26Aa`VqM5|X)l?2L1CJvCKyGYjVx>`{0yxAt83>kI6Ln*04ZTbOwaS*|s0
zFqHeReD3D5yqxs8n=fp={kClL-AF;jl;e+FWFn4VYr1<YFD`uZrdeXUI{JT2VSBl?
zMr8%3V1RO#qwWg<U5Cdjl7mjHy{oh9sPpTdW+$C44PAx@ha~;2*Y#*EbYa!<2)hxz
zeo9cz%}6m-J{be+Jjtxnb4xE-=I$);c_UJwDBGD}`tWr4<;6dbUR^$0zV6q{U$44E
zR-BSK@BZ}Q!!HjCtSVEq^z`-P_wL!VZ)aKTVh{7)T=U&uvy_?GH#aZ-d^*4W^X>ir
zckj2iwbk6xdCf^MMPE&EY5Co}&9|~*&+zkgFEVO1Yw_4~bM13;bN*M3G3sw;?RH;M
zRTSX&T-f<m`~sab3g=EMA2o_9DcByHvm|ck+vQK+{{4G)|DV_Y|7^Ga|FCYWfQw<5
z>kJ8wiwQT*FtswwJCN?;u!-UO>7!>KJ=~gmc1MZckIS+<!geIzzOij9@7fK;Ym;~1
z%$xr8@UrR^|NpP~TRC0j_{683T^*}t{uIhy$KWBL(!FNY%XgRapVj`?|9J29yqE7*
zB~4_Ra6<K4L3wnD>yy8VtFp4>CUP!FD=65I=GL-%_RLd7@87>-Ubp=6_1^gS7H5vV
ze7iq>TzLO&*}ltZ-OKfNef`P#bAiFlkCWf!usO6pVK85$E%3l1bJ6sIm&;F1OzY-W
zTK1^UuUFM2D8z4@(3Rz{-&$_^^5SB&k%7RfIX8D&U!Pz5Nl|paZctx?Yt6Ad)8~_y
zOy2Co@j~gTtN35%u%9Bg{EihS{QVQzA!=`3uJ50qbEbB!_}iaLPJH<JSeGG0-8juV
zGgIu1kLLz%rI<5!HShk&%nsmL)*VyXu5mJI;=UrKgsww@A|BKJD5t*t&-0UAC1BoF
zhHo3(HQ3`{U2s{#bM|v%u)>>{y{TeqC47-b3^gpLc12DpReLte^mp#c^0!PIrA{dY
z=xpzBEnwS`kt=mA@@3S7<(_APzp<T9U$E3&TT?Z3l`fCY3?}jT_{o|Pg=vvVd-oq-
z!y9|*vhU?J=X>8NHi~TacKiBVC5_vecfl$S1+A%h$#E+~vr?L5^4m|&n`Us@_2YjP
z>5`kO_51!Dd;VracaFi8<ZIbmJvCQ`x<8uy{E0?{_Su4!!d0I>pS4{sBFxn=qlmRA
zd|`*#vBV22PBA2TyFR}#x%f%2fSAb2pjo9IOx;G>n^~R-B&YbDvvD&0DC41bzc_zR
zyNTs0sn1n8X1a-8kLG@#x~h0*_`Rg7I~_K$Z(0yhC)#bbobN>A*{>%h-`{WJDAQ2#
zI<!Pn(8B5YiOt-JB}?utRFV4kWR9}ZzoWl?>x;bn{NqJ>`p>OXo^uP$4f>q@@JYtW
zLZ3C?dJHc=IvyYLtvmF<^9ej3vgayBu=+gHGCV!c=U2}UpT5+ycbF2UX&llq%<q`@
zGQY>|`32J_QeK8nEU%P=$$0GzH#w!QU7?aCr9RQ})`!hqUmR50pPZQ3oMZ00Ow;Rz
zvC0)2=`A(aH?2C){pgQl=aLW)!7t|~o;F`DuzJJarc0YHZK(evawze-{hhn_{y*fe
z|M%|pdi`^LvZh)`bb@c#zst+Zv#<G6Qd)ZV`{S&wm4APIeQln9K7P-Q9P@o27pyW|
zwtP)#$(sX)mgf8(lOo)Ob|_jtV{w_oxyf{9mx__baf4qLD>uFU9UvM}`NY7dFlX7}
zfA8MCE4;tCL1nGR0wu!}2ZZ|zXDvIpc%IpGX(b^Cg){Qnij0~oP9JS`yu4V8i9=%o
zhfjOK*F=H)`!jc+O?B30f5GY@8tP)9beNfy-RA9A1D_npr%EfVzW#lDH@AN0(sPRb
zN{5V$cnswt)B;lrmH4(OzEG%gD3RS`6>WXB`}A~<ANK#Q|Nrwa-=1aS{kqS~zB;YD
znRD%J*5|LYuTOveyu7?z=Ig&Y`>!^#zchY)Tb+Ne=I`6v_y2smZU67(+1cA;ZzmS9
zE?s?a=dmd}5+6C+&zm=IZ_UrFz4v~wb)N8ySgRKoyRTQPy!~!ohoa5DFaOp*e?Hy!
z_4)13l?09nSnxfTNJ@C9$<uzgPO5!=-Jg%ozWsXq?sD4hsvZ0PpT7S;*Zk{^rR@?M
zO<AWUyqlxU(Es8YqveFoYi>{F?e^N&e?NTspHPT1hoM`ap--@k`#d*)38($<ql+w#
zZk(UT|M=>O>w@nyujWfhShF5a?r8t*y#ICR(#VTD?5idH%%4l|k81AP9B4k_v&)r$
z%M+|6tGg;3B(E;9wqiddEXe&_Gk;f`;@){r3{F2@p~bk7$y55&gilkX3U`Xe=<L>>
zZd(`kDQ-%~%Q+k8WX)p!{2}7l1hKV}*R-p8PG8pI|0x;JsQ3K;tdF<59fg+du1Pzd
z{B&sy51XNDo2>n>>~&poyfd|yL|(I+zEoYIsW|25Av4pK^C6|%ewk0nyye|-<4b(^
zTqDg-rp?OT{TKN};^$@ue0Z5@;^EQsK;w3A&KIA}O{Ew2>MYUQb>+w^EmfOMR!u@F
z(HpBLotl!fAbEq{>9k|d&k9Y-{+`s_rZh96BC2qjWw?F3*|F=dRp+|eEn-Rwal0zQ
zl<aw+GGY4n=buI91?8Ka|Go8-<%^hZ`|I169o-qy$?bl)T0^JjOZ8{|skh8L_*flS
zFFV{^v~$+wsfQ}psa)|(&|p>573!S7{{7@{jURqWJ`LJFRd2<l-8S`Nt=h|rL`(yl
zxF1M{zZ6_x(!SyBmj3gcALr|z_WHN+>FHIQm{z+nhE13fpLDUhFG#QR@D^|1DG$#~
zWlp(xKuC$buQ7QG=RQUKhTtISbA767?cQCH37U80u}<mig}Yx$?w!(+?oe>+`dRHz
z*N`HwNHvvJk7PPI1Q!S`3|88hs+e}mAYH!cCclP3)iQz09BR4;>RbQCJ>+hD_F?9h
zj_13YmF`Yj{x@~&!3#%CRxBv`v|o3s?s5KW>UwXE9$NKF`(M|oe=jaYA2Q8$ZdxX%
z?|m|hZ8t;rF^xczTW{XoJZ-i1$wZr7Z43bxT(37)b3bp-ytQ&^<HXOti}^y;#WZ(+
zFY%a_G+AWsOO87-72YrQJdW7B@kQtPyPJ}pag>O<PT#gJMO>9*b%V#t`GP0by<Yh~
z`G*N_yYqhOV|RbteZOz-4hz+y#|1m~@Bj7T<KN%@*SBv^p5f<r{@$M2&sT467oR@;
z?kZ=d2Q%0L1E$PaprOPwM`OdP;!l!di~BT-uXt!`EHZQxOzdQkIxKQnAUWV^%cfp~
zx62dF`?Brdek<S3|NE~74+E#ltURAE(PtV?KFhQ1T@{5AO<SUcu19vR4;ABDE|SUE
z7pVOG@0R&h8GnC<fBE(L^0yYXO!se1aSkr(6F1y8Zm-+ZS^mA&M=zuEaYgypkB@iD
z-=DLkV3S(!9F+%)U$LB6rSQmN(Q>|x!5>`KMd$mS_p|Dcz5Vvx`+U3FKOg_L^V{#~
zKU`3@Eooj@toj-o-|O@9^77xwm)Fin@n87yU-|ui|IXL{JHGw?pJ#tQJw3Ym`s}aA
z62CcUGzGc+wq}!UQ}9|Jw{PE`9TBB-zn@r>T(EBMk6*Lx*PQpWyHoe$<Kg4C?-rDP
zf4Kku>;LaxJ^c3Q&Cy-a-7gl}^E{S#bmY)u3mY37nQ#5OkN-ZL*#4fep?%?d{`M0W
z+v=27t;lihcRu^k>dA#B$rDe~89e-?e|&sZ`(yuuKX1-V77u%>av`d6`^#I$Ec*^$
z`0h~H^;aZsd4=eYSzjIRu&}(HS**J-KXUt`UAt{%$`0?ioXMZRud?vttCHGVrcc6i
zx5wt2t)BW_calBNuXi8aj&mw8HXDdHa!zzEvXa)DY|gIXdfUR?V}{JvsKU6VIa}uQ
z@QSA`+Ec8}|5_{j>*mBwR}1!@y50O-cx~Sa_Nz=~norlSe6!!<)y&E6;yDe^53o#7
zzq6xfa!XSE*`|+?MsZX7kIvfa^Fc?cP-nWi)vVPHN21;_%=|Iwd2i=4J<nyPk9R-Y
zw=|-DvbeGEhEU;bq0NGyIW8rijR{iMT3OT?<s*11>eN<!+4E`3gk02AMBaP22p+up
zBs65TvFdCF)$R*Azc#m|%{aUv<iOMqtA2?X=4KfjJrpuY%(Yls#QsB0;cR2+F2^%D
z6Mx=4&9`UM+1hjA;p(^K?lb;4>X^DL<=IPHJ)1K>-Xz*>es(#H{pdMYN0y)<Mn#3i
zmDlcWxVGkx&P4C<^(npUWX!K`zWi+Fn&{ed+N({MYkj>PBD`UeTE)aA)hc^_zq#Ey
zUzzLC=Y7$&pVFePdanwwDLud?IEB$-<1K@U3=;(|E|e4%Ipe3nrF3A`#MCaER*O~v
zCH6GUr%{jQtN5mrJqveQvnO!X8|$o%td*SGmVEH~dvD3}sUG*PUt49L8YHO7_&mg=
zRp6-VlExX+#U^R?v_{A-P+Gb4{*R6)Dh5?$eX^62`y`nEaMhl><ov<=srh+L!{@jD
zaxx^^$#u<;)7JNm%FJHqQj@p7=XlrVE!SdRNY_s-W>|ec=-unAhhP32*_Wh!)86~q
z`C})}9NTy#d82BWjwOSVQhU#=wf8nC)Q6r5DSA1}OoM$Vn`ek~o-3>D)BYfr5M2rF
z&qk)_rBA5ciBdm$UHssl)>$j(TCR&UO`AS-;e)Kwm7%wF67Sfzl-m?aJhnJ;|KGE(
zRgZuDd-wSG>iKpyHa4qX-F$iT_4fSxdunUz3b(zIQVHdBTYK<Q;*C38u1Ec(PR(Ev
zIMKMXVOza(s6XetH99hfM3lIte>*xQTt21lytu7Ss{HSUg?|sf`}VHx{`Y4Qe*0vm
zE=gfhoH{lCeE4e72Y;RGCDb$ayqEfv5-P6Pw3ug~fWzZm4BQ*qEetK1|6e+p_4U`M
zHFpoyW$`5oCo;PnQf!;s_c&4fU7ox=U#)=O^F=JZ&Juiw{}mRzahTa^%Hh)>IDv6S
zWU_;ToBopv+wW%W&b|Kn8gu@&+mqkk-kv{eTlJgv#QGg}Z)RPy65h9Om*tnszgOQr
ztY~z2)zSQYKhM_xT>t+#=;VsCCtqf5j-5Zpr1!Xu-f82Y?&qxvyX4ufZ+*T#u0&m1
z@$KOsJL)U$*te~?ST1b3_!5gnkxcvU_QXFgAGfc&YqpQ|qNdNpnXSFP_PxO~+63Ap
zoQtY8v^#cfQP_0YJwPyUQ=@vPe2B;%$<h<m&pV~0mi>Et{5$`5{@Y)-eU5#7cUR{1
z*y_Sfdo3&@x9YVV&Z{q%c&Bd1viExC@{bE_7!4K`_sz<^{v-GHyj(N!?RO8KUH;qR
z=xhGpnGF_Z^*=6~@2~!QclGq?%a`9?mwV!;#)reZ`|bbKn#+_(nR2+NrCmG4JnO=%
zbE1-Rid`H^s^2s`o-se3Jh3>lMzr7k5f|$<j?bG{b8vsGWT{r$Cc>;~Ykup>%>B!|
zl6b5%XUcnLZV+=iQ_L`DG2^~#BF|DUbjW!NuM$c<chE}eojR`%?<XDM#pmZGSsh<K
zCBka+F(oD2*tjJd`=0!W^Y!PMYEymUu;W^`mJV-4voM3-%eAMydOSNlbe6M7QSPJ-
z%Cp>#6<>QL9KZ1yKOgVJNZyXO-A|SYyloG6=i+l&a$~9JQKRKWzrWe2r(V;@UVG`-
z&f8~lqYFdMP77W8>`ja6?JHZR?KBKt^JUxAN2>MgN2ONoZ16w3ovm)+x*4BiHm?nL
z=yUjdzQcdj&t0LbS7|U!T9tX_C{JmiTX_DGsdpzeHJLD1iIrZfc^px*LQdW0`nSZG
z4?dgHR!;bO_FqI}^0TGad(Qr{<aj^%q^%q;$Fv92Hpv~>)O1RXGdt?&zbzq-g3?bm
zH$>cF7Hj9c@&CdM7E2>mIcJ$eCT#{sHH%WbL$~mX9`xCJNZoSbY0d5D=Bp(fEuGmb
z-1=l)@vW?=mXNSOm(zQ^b+?rt;gePDZk?cVX1QQzWaE^GxvFObm@G@=PBy2@TL@Sy
zDfyif|L+}OX&-!~K5W-IQKrup%imQfM>V8fja!`X{`3L6$BlJ{|Ae}2_bxkrp#F?#
zjN4Op#_cP9?EKEK^~s4ds{0D1B7+RV_gb%;-?eoygKS_^^EvOIBDq0Z8d@ul9-4Tt
z+MU;0z%Sd^#rJmN#iaeS^IJDvC~27!)&Hy4n_1scMYr1Hu%PsFb;al1J_6jiyF%19
ze=M<iW54geXOcwO?)-D-Zfo+CfB#tUB5XD2pq<m*-Qv^LZ}*n#B|4exElhCqK4)1V
zc~<Svq}vuou5<6|o;<qn)r14pSy5Z^<!35;FWMovFk9;WwzbOb5>rjT1phC15&om#
zd%^b)7yN@K`DXm;V?E-&V)n84HP=~cz7|yMxt)3WSLx+7d$afE@2RXik)Ei@Xc3}z
zL1R(e)rnk(tFFe(o4?P>KG*zCZ|v*c3&iiO<w%gpuikO@u%1ze@oGVVtlJl!-u(6`
z=dq_Hn@WHfOPO9j>kP{y5&;ZTHb$DSEnoNXL5J$5RoSz`*Wce$_t&iD$Ghe7b~QgT
zEACkLI}}JPdwu)v<HL>2mhbiB_x$^8|8IHykIVM^D}FM`{#^F)UOHnwtF?gXk(Y^Y
z<lC9I_C}jG_w4>#P+$J}<EvM7-vu1oJ{0WUy{mpt-Ho^3obCN|E-kiWun;i%!>ce+
z*&+0y#NvX0r(05s%>F+9^Tz*`r+2;6^ApS6O)VKk9ag`Yn_Y9eG<SQ^y3Cr}rK>Mb
zzAV06Jbm`tlUZBeM*Y@idS7lOE61nk@rKdu(FLVR`7E}9&g)9^*1j&=of#kVWBvV0
zf0wtfE)SQ#U%%sT!GwbEk9>CD-~ad5(f8MHUwd?1?%VI#tM~7(FE6jR_EGFz=rZ{b
z?_A}gY3H_0S)#3CkjmNebj#z~pnK<4H(yKKqRP$LClsk2b!4N$V@bx3GJk`orbYV9
z%8Fe3&+>TGS^v*64*l-T&5JA5^+E;aU**$1KH=rn33|@8TQ)>xt)Du}Wl@-QtsZNp
z+}kB5_bNX>Vd>vyqSYUuvu9e*GmY*Ua~^XWhq<0kvF@JB{b6sH=1CR58Ot|KZMnSd
zx92L0<<^Qq3X^;*wzjlS)wGk_-X7&Leb#NElUeVVE?{!V4R=|!O0V%xaJu1UyZN?B
zn@?_Oa5qtoVp#34;LYsQw>~RfIzMZTN#4>$n@_&ftc*80|I1s)>+MmEw}E?;&QDM{
zqNVbk^Bc4HlHNxw#}-%pi~Ms=NlGbyrYkFJXMn4KN8+}y0~bHo{kFQCp_%Q^Q=)u+
z*X`1qzwP$!Fsv3ywKSTq7|yYT^F`N(nSFZ>J+9vtd0f5tSNiX(sV~F4rWiSUaK@YU
ziFfEIo;o75h(&#(g_1?F-OSn7O1>p`JQi85*{9f(D78GZD|m`V+ogjk=b9EDIFMd<
zN8M?^)a1pMN3PE^3;KI!)}Mx1SE?eb^VFwxysYqaX_><yJn6}VgJ*V{i|TM13Tp{#
zEfQGiZK7DzTQ1Ylp(Gi7w0_~H@LH=w*Pnac(08$Y{rvx8yXAZ0UleEEs`cDa<@e}o
z<;I%O+s%jSW2*Q|&s!zS^u_4-@7&4yHALm}%MP*BYv=D&?Yx%uRO3t&U#G|3v!7nQ
zaabEQ!z7WJA(1zkMOEo}^dkY+`3@VO|BV&<x@y9zz*G(y2JeG!m-#NArW;Z8d*!)@
zQjvO-r(K+|)5T?GxK_f!=r-Gq!aMSIZQmc))z?<l-78JsS6Ny4^VOe6kG|yWjx1f>
z*YPPl&^0hcV4JX$Xz=|xS(V#vHJyljf8vbcv<60jB8C@ps@}gz_WqLR;{IZybm!}J
z+0y<NM#U1#6Ysy5fB5d-zryYFEw;M3I3#g1na-K|vhA3iR{Ot133>NroaHaBpDWMZ
zoHZ--@|vjoW;1Ry$xWFOl+|w4bndgul^GjW#jXvxJ1s-`Q_|JBrZ3tue|Kn?|1g<%
z`e^sRhicyhMGjQQtd%aZ@?&OPf9sfV<?}u2Djh;@vZ8E^bDOw@q(rW7<yu~*ef&hj
zyDx7ZrR*-9=jVTY?s56LAGaU>3!Km(bw5XH{i2%6pO4P}|M1-Y?_v4)y)_rtM*S|a
zWZ~{OTbHytnI+)F!N(seHt&Af`f}IL4gW8E-@SVEYJYkAcfXjMeKx*7epiIAv6+{z
z_uU$yP=)$gEl-5L%#Ac%mUO*~TU5#9Etk-f&GHjYnD5Js@|a`il*4Gd^W5)knYY%(
zminsqtj)f+uj1$1>-%eqKYps&9BIDRIiQbCK5F~y>Ce}Pt$pAARbaQG$>ce1y^ax$
zF`Lz{cUHxizgv`jH|x*Y9k%ahw3XM}-??}1&sY8W-8S`m@6M_F`}X?XU6O}>e3!TX
z`#OH#|B6y;zT`O)F6q&G7C9H4bL}%dCL!q9%2SvBlW$c_N|)sK#O}J7S+5E_zprhZ
zRLUb_m!7P%V)m)iv)a?$t?TDL-6ttJ*~Cp@L&U~3!&a;I;#qH-<ieL6n7ZSccR<aW
zqBqZ1mg}*;o*uhrN_T?Bna3xKwx!3;w=u3}+w8DJ=)U!H8ONtP4#{jzz1ps&`XnOF
zV2MD3(3H6A*E{E}+!m@ETzO^rlh&DiDL36**JSc1JuY9|c~`VJr$6=hEb*21ic<m>
z*&bTGZsWD`>vI>oeq~zSFoBuBgCUUTtY7HX#Sz|by%`sN?vVZLI`v~=s_nd$$x9!v
zT)loKPcLgm+zvAiE|!kZZ$nqVDHP1>=vLDHZDt@Fc{NGv>?JYwywpD$Uk`k4t(|l`
zT%bROebLhIDutp81Qtm>D)8LK!ZO(_@usALmR6@mqt=94hN<PdOoGmf`&24@{`qKa
z#e-rU^=CRy&WFCQ(s}N-Hq-Ux$DaK$BC%_?rUmR*ar9fY(87zu<|ZfC1Cb7a5DyWH
zERMoihfI7PiyX_8T5h1aTXgfw_RPw)*T2?^)lQxC`X$d2ujG04y3Yb@GuNhf?M^XT
z5;k?i(OX&qjlrRgMIKps4hpVQL`wYHqSc&)=B_dEVsUi&axXY6L3@JS%i_NFSNq>I
zUx-&ZANk_t!;{6?iCt?yPG7p|KmQZC$DjZIl25L>c7bh;NZYSBOpA9_e{xW)TvK!<
zNa~eMed5%(&<5to%GVcs_T!uwYOypbEGf#=omJ_k?Sa4vH==iGC2?I2Iv}!tTg82E
z=_h+?S$#a}ru;m9`p|xx(?(uVtG1ll9JN(W^+uv+LkkN_M&*Ugp`EExKcA_|d~1LB
z>FLew_v8LOGtbwH%b9g-^^ynQj|D&O6tO4}>AvjD{iW;W?1HG48;rs4#Z-#UC@xU)
zNZA(3p!8hYXy07*99x&C>t5%mtzDEj=a2zk^^LcD?R7ni)|UG|=2YyGn5kab>)Q7B
z!^7qMGLI9@PaM%vQb}~ks=99acG+9A)n!*r=YF57{{Ez;f{=&{%lm+cCT8<@zjxTT
ze7%`=HFvJHd)>yYb5_@neogE373NU4xBK+<_um`+0+FWgzJCu7f4}_jtCYQUg>{v6
zRhvGY&1&Up5M(yqwLqpdhQp>sz;)KTmmG6HnD}U2D0_YOr04dmU2SuWynejC|G(_(
zhQr5lw?A))?f7-KeE<LD^?#nP-}ftP>nnj$spA?>Pv-K!k#;bS&Jf`<oWy?Z$K&Im
zmAsD+U)aC<f2WjGTiy2`1@-0jev?;l8GL5Zcp@;_gD1q*b!Oy|Z8KU9thn}kwU%}J
z6AeQH&YIVAKYMuboql%O^18{r?Q3r?I8nKXZ)51!Q#U5;zp*C#?W>1n>cRDww%p%W
z`TN(Oqo=C*PZ;(n1>Tr=KhMwKf7|*8Mn|?MSI8`DS9mOOtWKs~z`13GK<L@kLZwI4
z;`ZIGGu|Wft9>>9_xE)_$_4iB-T(Lb|4;Md>;I-*fB)b9|J(h4=l_2kFDEY-_1@9s
z^jhA3?dN{Z5%Nme{A<+*vsxd{gJ-TC&UsuMBAu)?C;ViTuIm&12Q8sH)f%`XOMWS>
zyjgKhB)ek5p2V|m-<BR)7PP?L`i(QobAx<lt^5_|mN?aH%Q~6u6EtP{!B;!aRX5pq
z>aU)}6te7iq~n@tDsyMKo${1F{z<0#km4fWtBOz4o_~s4{YPxYD<+q#JSWa?o~5t0
z<wer&MV(q*9-%sV|9XlG`QFV^J-kG7Qfpe=qnuq1p$uUz1@|)V30&CIvm*WTuOpY&
zaQ`g|n;fR3^Z3Ryp?H(qGTw()iE+)jHl<ofz@&uVOY`dUhx1Yk!(Xl4^6K>L+>nJv
zs?9<c2I7e;Z-^@EUn|!C`?GPe-B<l{n@e>CEi}a~PVW2SS-Ui~c&ce~>T3J<4qG-Y
z)??qgc`}Rl&-k_0udXYIikNzzdvh?`lhxH=>G4eIui|UZeEhaLb7g?Xv=-NrBgT2`
zv!+HpWz?|p|2WgM&4RH)uqRMpF;}O;<~D)lhAo;t!7|G;xq>?si+tWrd)X!W+OaWh
zXP;Wi#n0TFt+$faPQ7zsUTo&tm}xCan_IU91aI~aU%lzf88(M553jUITTV7HaSBc3
zSh4HKH=UVE87$jOinp4d?oPS<MStP_KT)>cwX<KRJI*MLv={$rd0H#{s<H3`@l!>1
zmlw&beqCqk@?28w{AM>M*9*(fwycPFJww0gc|4z>V#G?`Lr#4$kFQ92yH7eNVC<f>
zGhoZySsPY_E4e&>Gu`qzLy=6Z)mbAR9bVQspY>$<;~E`Z#C#TH&CR}aVaf_ctyF&f
z)~YFYx31hCsG_PQdg7-I!>Th@JRdH6x4xRWwdw1z%J)ewLDwtpo36{{U~Q^U44fu0
zwO2biU&UkB0?y7`>ptI$ezndag*SMLrqP1_Cn`rKDo66vc8V$%JgHPsP?|2Asw0y$
zCqw42h!VrP@VjMut-ky=^qKKI*G#5c+5Ura@r<Uc%NlqVFfkocbh&1D%IegrtD(zf
ze~3<0e?I4PZySfw71MQvzZ>QFw(!1P6<vDv+QpNn=C1i0wq>E|;m8Jt-@6Z2T~_3E
zX^{AR_-?=btLh7<H+%ooUBByD%I(VMu9mUWS@s#<h<fwAe|LNAi}r_~ma<F8{kAxE
zHtld&c-5{wHowoB$G_*>{rdRfzlYb|{Z+Pg-<^52pU;|$i$A|t{Y=9~_S^5n`_f(S
zo9z2~ZlX|+;hW_P`ID!-Ed2jr`~LsG<M01nJO5bP-KNVDhb?##XSp;ko^eBvlTkN_
z$E9U4tNGMj?ZS6Ysuyg@uDvYP$b2&SvZ?QDV?*E2t73N<7&#p{4>GPcDBwsuVsIpP
zx82@~jk$jxZ@6#Ivs*krRpRbSCyv0@R#%pj3ze0=@LFz<JiPLe_C*D$$VV2ue+^{X
z4zIK4Nt8Nd;``S`)=5I7{j&G>v*piyh4by-u3P_q)9dy1zrGy)d0Kq(i~DsyKK^>O
zD>KT!MQK5$J?G7(OSN?7#XPzeJoBj9#*IyG&n8vOE(+Z$!MQPN-D_jNC6!@6pDMOZ
z+pX;VPIv9O=c_&&UyIV6nzQPRG0Va-SGm)X^Y!efi=DBQ|7>CTA)|4f<~!!}ZjpnZ
zdJpR7rOiJ&<LPmWfKTQt%XzO!{Xg}5&h+nEE7*!3O$pZf7Mb4l=JKxS=YKVr)aonv
zQls}gzUdUOH*=EOF1FKard^$C<Ml+tz>a_6YRieK(;~0kx}$sc*}al)mhLVmc)T=D
zJvkscJ->=~irl}ckK*TP)oopq{b`%HP>98J-<2OWP3>E<k6ZL~xHp53h^vrSQi)@M
zXph>=37VM;HMSSd(zae<)VWft>~d~|kCdibZv%_v-zolzGxs-eY~GW9#Au01anU5>
z-5*LzCoZcg*{>j|@+<SFWl>Z>WRx!Bf&~$aZ?4q3R8TozW7?9O8JZpqqDP-dOqiE?
zdb6ncagEhYLH}yzb8Mf-RnhhMXxgNmf=Qx6Pc;M;g^p<`o@yx+IOyReAi9SuGAFG^
zXqiQ?VvEF~lml%w(V9XFmU=02CHu|V^!g0fQ?G|xWX#W0E`Hi)wse)l$?YMmqQAAD
z*vM(zV4NtUm&dq8Z?^m=o7qO}T!AMWjW`%rM(o<yken>ybnB98ju`{<tdgter*U45
zw^wY}Keh48=Ch4**T1P6Z{2oP&vyHKYjL~iJMxincE5X$>$90E$ndW2GAnSr^>@vU
zHF4>yJB~;s@r2E?t9_DKcy{@Zl`5u=fkqrN)B{y_+bk7X`FV+x_Pv&_2Ik9`ALQh&
z+WskOFN^Rwt*||FB=1JIKe+mAYsu#z%X_cq#<w=EeU|NZHeK`KT%A3)-`D;5kyyGZ
z_qJ$k+`Q7c&R;?8i#@HEI8Ukc9FkJ?X|LP=zTL2+NkPL>^?B{)xb@+y&sp*m-um8N
zd*5EB?XSVcOJ&t%byannUPZOfJ7M|$(uw<=9wC7$de>)iba0(EDNIp)?(gq+&fkpX
z<Hrs88cS;#Cn?{raJnSXd^G5ZBI}|BY=X}uM4MinxZ}RP>#W<Y-fgST?RLA!yh6Qj
zoh(z$4ynIt`}QoD9D4o2vZZS;_cd&P%Q}aJx2m>ichve5woJwuGQZm0#iQT*UCX|j
zq0A)4>XO(I=z8Jjvah}c24DX^{J5|F#`|^oHUjLC<=HM4U)BnpHgM_7y*_W=yluJH
z@4xx>@ZXm&f8Lx?OyWq~f4_Yt|HAw4zxDU;*6esctL(1n>M+h<2KQ|4+_Sl3D<}6}
zF7N%jb@FBWjyHT+^xkG0P3A0ZTOC<=-f(+v=Eqd8h@j`PmM%r-D(_GF?sATgyDY9|
zHRq9qNsUhqX$7kK2n8y&OgyaNtRKw3<locx|2}+u%B}u!xdku(_xW|d3jTh)eDv3n
z&yp3n&my0n6tuDyw_Riv6qGk-OW$^FKAB|(-!2>Y)Y`vmFEsbrefVG9)552JkCy#@
z`Q+QTtnKo@SNq$?-i`NV+I^T~LGO1Lm1h^z^!9}x(y=P~9CBmQN#E9)D%C;Lr2UtR
zM9o|%()S~-W4}U3k>CyU+nK_4;@)j~cH5UsRXL~0vRdcP!RHrvO4$VL1RuzsX4`V<
zL`~Xcf7b;eM)T%fG&(%l;NWt(yQ-U6dG<b^lUjVa=XA`T-7CYkt#B3V40KWGa<Me}
z@MtslJCSZKE9Zyeo0n9zEm3{;l9Q3M{|`r0CEtUS_TgRzvl##16t}p&M*N!T<1fvN
zgcLj9Do&00RrXVC*;CdP^CvHyHBIeuw~Ftw*Wsa0d?);L+9r7JK>F$XrWT&Z-g!(s
zqV|3<Z}wENvqI+@Zf-i_wx;P)fo5QkZz9uFqx6NWs~aYEbHCK^<m>vkV$HG;Mwgt@
z>8^L9#NP&3uksH6vS)|Y)kCS?D%V~+Eed}w=nz(P=6&VaLlO6cyXXD>F;(Yl9oN*5
z6)J_<UFT;m{af*5Vg08aR_|C`rUWjNIP-X^9OvBcxl<Ud44D=xY*`RDHN->0BCAIz
zX~Ra_<$DCrPEhOS*Zmv+^4BT%0LKj*mc^>oAK#e5uC44DXmUd(+(7hu6SsI~)G~=V
zr<{0nyG<?!&Q0T;I4Md$gIz&7$Vk(vCDMY6{aI*7u&`6dzwWC>|5kbaQ<_%!^Yka(
zk5f2SeSNZT+v92fcQOS0U6sou=fB~Dy~ghPBE6T7(oX9|)bf1^OW<9_p=8*0Kr?CI
zCeFuAcehrsE?T8<=8(selM{tQSsq^cAG+G(LZv^y!XmTZ?nQcxvrDEqEnE7aXNT88
zMVF=euB+O=mpUkfO|?_om1F6-RqsXSJbkfk4bH4rES}5yT>kOz@NdPo#>l(&|6U$l
zy)JuC`8i=r6W6)rdK%s`#}7Yz_w3!XU+?}E+HW*yT9B-I-$Y$q{rU6T>pq{k{BFbk
zeY<~`d@tDjJ~Csk-A;@BHKldm9=%D~eQ(XJ(Ccfjt-faY=I{>dH*(*0C(d$lZMAGG
zl;Goi&!3!-yZh?X?$f6)^Pbot&-dza-IIx>a)B8|6<QooB?V@BtC<=bxQ=(Wb2u-k
zi)mfJwBzeN^Cu@JDw!rfc`f}hwq(KCowwImJ+~6^-=Va8gVYz5c{$r(f4x<0?#rmK
zsBPWS&zIkx7GHk*ZI_4hj1b9_Lb0+*mwzqGzW!z^`-96CZys;C|Gu5U;oqf=pDcP`
zge?EK;r3nIJN9qN{+je&HrZsb`uo|NFJHcV$+<wti|25`i}K%hC2p*hGT(9|;KGj$
z_hawI-nY5;?%VFVH_KP@7Y1F`aJV;bvDwmj$*o?$CTcNnVBF6zan3`fxu4IOKe4fV
zKi6G}fm@Lw)jNQVf#I1T<LztZxzcS0K9@^oEVJ`*=20#_kjcPz_H_5<`X6u0@9(SI
zy!-c+rWY1*d;foV__zJ~bp7-GX1R|~RQ{gU^<vL<%aw^a4h+3VxLyU!_A}SE2v6%>
z6WnoFqU`U73-a&E`OC}0uTNLM9=^PM{<~jyPwdugInnS+`n`01+qPE=mpi<W&hOo#
zJYA^i)1GFXNUjaj?k1aB9G*FOw@&DEf$#B#vpt2geHiKwiOkHIyzc29XYXqjm!3Er
zWIL00Jji0oA(K$UUt8X<h)i9@EiYkLtP?1AAt3O!yzQrr-(1gG-As=wcjS$jZIE5e
zAtWt)`FzjOyGtz(a9v?45ou`UT{OWgXp+p~Da?YWe-xL=pAVR;;;QlK>zUJY++R+$
z75a25Yu$10w8?V)b2el#P7lpobY|(>#&ebHBN%<(sB$-~Jh=4-=Ub@+(a$TiUI_oc
z_fd33<fnOaP8vL)uHsQsRCmtp;+`E|H?_Y#6wp%GaN~`M)u)8HR-s}OgEoa*y*p&H
z(%N**c4_y|`<L)~RVFTdx<c~C4uu0xp0qq)Hk;!#!<j1?658D&s}CiX<)zi}G0A-1
zEOvL19&1sUfs^8{1jZJJ)(er-ZiPBHOma!|OgqKM)BQ8;nLy@Iy|rcq#@|jE*{_P;
zr&I8%PUu-m#9A58tl!>GJtG_sT0Y<D`r%Q>r#~@Q8>>W@_sE<*BH=P)>gt`7c;9AE
z_WM`%Om<eT;?Khu7cXP`s+n9ebJyBglL|gPk?WnG{K(n&?^0fm%i)GK`xO?o)<kN%
zDJ*h{sF{-G#J9mhAyhPs^N{dqiATF+!_QvWeb3(7`$tjd`uT>fS)Ow46{ps(3H@py
zJ=?)pf_>Vu)m)A1t89+`Tr>Ii%sa(4X7gm_RpRf=JIJ%?xcZ%EmO}hlPIKkXhHr`3
z?$R>TS?&MHITlU+rY<*4bx$d^B+V&(eqy4;cd_RleN#G@itb{#c;$@8IhOk}k&#(%
zieLIab4$(>IH6}ew{q!LZ?6p@?;U3KY*y1|65RIU<Cj14zb=Tl8|(69ieyO74D+vl
z-!5NW@0wM!J?nb?jXm$A`c*h}S1*!zY#}2j|L)y8`<flUKHhEjzy3Y>3P;#|Wy@Q6
z+x6q;#Kz>zZBDegV|(xZJ=;6B@0N4iEAKbI&cFQj?8}!gzrFeM=FgWeK{>Z-t5o@R
zbNPFJfBaiscZ1o0!9(JGUVeMyy7%9cC%pan>dmL4H*Z>fS|I0t{KMs|cmLL<u!_ks
zTL&@S+0xt5Vi5Vpd*){WhD}`^Zk^5n=52*t!A}mnzP$NrS$7}9tOJSL7r(ofA9c&5
z<d)C`_OD${SIl<A=Kpx}<xxuYrEPT*6BxpLiwk$vRu&f3ZOZCUmHW0kknyCjrP7^s
zf404zwJme%mAPxL?XX?*>tErG&yEwrVt1QG>t(T6^0b%B+x_|d-M(hmPVPXNrnZXu
z;=hZPpO?&vtL9glsLcP^;)p;~%iKO2*=334H{Sn>wU^PE(q$QS#DI%qw%EN}HXD~3
zEQ)Lp$#J=8aq3#`^SQ1Z6P2G|+~PCin)f21vs*o$$XKx`WJy2Q_mA7Mu2w?s+wO-2
z#W(DEE?ZPS=h)4*+kE~0U+4dSng9R$|4;Gv_r$dSJ69d@V0ZiN{r~>v@BjU7_vg#K
z#pg`t%~5~8WZN~yshP41X1DV%>bMv9ctLi?<~;xN*YtmGxu5?o{MX_)x82U`#hv>4
z_}%f<uYYVv7GV0)7jVR>-$yNN`>fA$+G0zcik{z4yK5P&`gG?FCh15^j?np+_x_1G
zxNurKf8e#8Cv3d^sYae2nVEMi6V5G4S#V17c)Cfi=d(#=eciX7&j|mil2BtA(HdkD
za{J6<!^M#jzMok_7v%_9Xa40kHqdzfFD>(u;Li;uI$;N!wL}bN&FBdT(Op(_ai`61
z!;~|;ViWkTo@+l8cyN<pfX5->Q@hts+M`mueX-Jpl=J$_Rx}vfq^+~*c2Zd`uX3UM
z+y-6USx+v`(-anP-B@<-r7_3MqXj#D2pU}f{aIm`(eG*fp<eIuW@>dU&{FhFdZMJT
zKxM9Q)=kbOoXr=7d|pIVUR=#7;?(tMRm-PoJv~OuOXuy}(LcF=Yt{7U<xU~7afKq4
z?X0(-d1$ZEdtzigyH%-+JGDMGvvbvk0-+N(qZdBCW)QWdR`cMbCe>+YoL25wH{DN5
ziy^{NbsF>EnBxzXUN9}3B{Qw#d62QQ>K}Wt7c%cfKEFSsw{}L1yvq|#>9PlN6<n@(
zNQCamVtzFxaFO82nYw-~p^jZ|@3dHXGEKQZxoZFN74fswUZ<ZhF|a<qRPWtdmu;H+
zo*6c4IlQvjq;q^$@Mo#0GrAKNgl%9EUF?<Na!q5AjEbL(bI)UwzPGEgcb(h*Z{@+S
z*Uz5JPVIdw9yq`2r8sXzs^z*%tAoS}olYjb{U2#H)m~fq)Rom+v|_(aKmF+6^dDy~
zRoE$~eCFBpC%Uh7`-+e`3zv6Hy{_@IV$zOHYl4)xyY-UeweDLkDn7TJw~~RUlmC>G
z(-Vcx+=l1Z-)i_yN-+C!?)0W9+V2<wOlMpa>tf|hW1QKvbhlK=)suUctbYDcLus1a
zi;G<b?Shh<gI8@Z&M7^tw(XpZse?v^uhR6SjJLc;Bo5s@d-ngI)BArei(VW4_Gznf
zi$P<H#ka$cAHRF|?bqS<{`u?f|NZiE`S<Yc{C`Cng$1Wi$k=?-|MG#F%EFzwx1Oc!
z=2T)Z@O63;-GAcYhUypP?W<S6&f9t~&;QzL(W7<s^?yH9oR<k$?A(#KW7j^WN3V|`
zxBI{*INNRSYrk#x=gjw+sNSl0kH@iPYN*YnpC@lV{mIE>|4#0^v%T9qrzbgwbwdP0
zrV6;H_1W|$ay4DKaI!7N`NlDhe*Uwy%b!Qj*k)yBIOB)M#og;(Zc|qmzdbqly{({G
zueEJt{JdN}zw4{nEZ)e!<2!SpYu!sf|MUKSe&Mm#xAuPbo48vy|4!BIHD&J}9Av!s
z^XAKwC)2jRlhM5Vs_<&B;XS)|a{SHZtEY?WUyrr+ef;p_zvJ>t?C;E<H{WD!cx#>1
z_s0(tUzIO>e^|o8&G4B<x51gm3OWUXC*tH?O5|eW&Z+QBJgn1r!o}cJMuv#XIZX+j
zQwk0Zyr$asOMP!!MVa0`eR}$divN%Q7TCYb=R2JErElBob-Di6R(Jn@3o3xV|9}1e
z$6NFLg#`sJ4W~{WzG5#|_Tp~V=E5mPOYEi>Og^Uh`2=SuqbtJ{2idg|&N8KUY){>Y
zom2bw?Cq^qzy3Y`-F)J(L0{pf2`;Cqd?p_GVYXe(eP*yJSDoAxfw|tZ_io}is`au&
z?qFzi(VsheepkpXI&bIs_U!qowt+um3K)$%m%4}bch??i*xDs0l6Ct1#YG97m(Ty)
z+?^8;yKhd^ry1H;T@QscEf-jLhW}yA_Ot8Gy!spTnCH}?)|h5%tu+ib#*dCfJ<2+~
z<%vM3Xxc<e2lJcm0{y(1D^<d0d~CV;#r)^ftivT6*_UXnzS+~b^jB=l7n#TG&#x=F
zxP;tO+d9K(H;3#}zx4(u6_Y0h-dM+#&gSD3D5Sc@$$qJMis+d_xtU=f{4{1x*qo()
zFt>|$y@k}~$HA`{BpeupH$|tEY;zYnv&UCMqkhxQDT`S%!Zy!3vTJT+$Eu1Yyl&Pl
z^UqwEUg`IF@0zK`dYgS+r;2Ijuj~+-IB8Z`N>1?)1G{o*#c9(m?)R&zTZyh&r{isM
zbjp$&7xpBHYb8yIxhQ)6@fsuhx{D`2&)VG8f943QsfGKJgo_E;wYTN>Z&|c|{j3W+
z1w^KvNaRvxs8sK}k?6v~!MG)~SHVxxQ8T#wfX<uErx*-RmwfVDtn}c-owxeO`Q}&l
zUCVnhM}HpMj?EE)r}BiwQ|eFsV_Dm4%++v!VFlmXm0Na7bZI>mv})SautKv~<%osP
z;|puTa<8BJy7$rMx|Q*-m#w{d%zc-k?zz)C{MPRTTzX$bvuSHOX54&jU;V}IZ~s?I
zmr(W24*S)YJgXfJZJNHZvB~rAnS@-+$3H96_2-CjtNZx0?PM2czU+QdeoqY_cZ)~w
z`)jhM3%ryjCU5z<*XsG6V9_lL*gaMSDCu`i;O9twudtBAA?3NOmHPt)wzbC?t)8<!
zOIA5((<$IMF*(M?UqEqTai(TZ-vkEFn)Vl-i&k{FgoZv`B7C}TkH~fAhC^O_6L?bG
zPH>%JGjyA3@$LHi`+wv6w%xwE?05RBIr86)-tzBm-#vTv{Cj`DT=su&U%PAf?&_MF
zH^)0wT@PponBFsg63974X6x&9(dDn#-JO$vzU8W*smr;Ynnz~u{=2+?`(KsSSATu^
z@?`btv&%B$Vq<LX$(Q{;E}?pC;d{$ZB@%54d*pwYUk~42>wF}BZ)NSHzps9Y<-Jqc
zz*Mn+mxcY$%iH7YKVMyX>dV7de~;h3ef#di!s~}s(oB4kRea0T=Bb=G;+X8fa9(Hj
z<;i7Rlcp869CqaUUGV?MhYi&-%R6n>UVr=Y+FI!&d3STSovT)O754V#(PwY=M6VSo
zeDV9R#j%CYPi(n2r_n5SbL@JlcI}{`jweeaE!VFQT;^GGdCuJVbLY+7vbKDM;iUZ8
z3>KV>X3x`~|NpT2{yqzP>o@Z2-{r~k?Vf+{y?oo{t0$uv_#S^KsJPSHZr8^%Y0{pK
zOD(qEn<t%N@XlB<>F~1bU8UEz@K!oZP^nuG^^<|YNs}*FsNZ@i--2K!)r3zs=FN@S
zSNpN*ZpBO`za96#{=R+qhe5`Jv#Tua?Je!CV@sFo$N%|e|M#+e{qM8eb8mC>^9WfO
zaWJ!_mOj}f*82AOg;$-n?$+UTf4zLd|4cBDYrFgS-M@E<FDkcIeZG48vbg@aX1_Oo
z<LiE%to^ZsW8MkNAhCeNheZ~Xm6c{$C|GS4ci6VFK_m6duQj`m>lZ24i<&=>49<Mz
z9_;%*XyN}mldm<+5jr>J@~g<Dr^Mo&Z2C@GO0Hd<YMXe7Yv$p$OP)XfIRAd}*n`_A
zWY*NP?)+s=LdjP`w}r8~PBD>HTCTM<?EaUo&li?2QN7mn!{_A5iVHqxJUl8zJli`O
zW?WbLny8d^M7Xn~SZUeBodU{D>;fUDe$B9K-of{AuK4$$?%0(FByzVDEcxdn%cQ94
z)ybqa@$jZ&s!m21_KV&y>fg=KBKp>9<IOouD^=cHU!=6zN@?12t;JDxi#IEnEWD+X
z{#ebj*r+YjZoWyp@wTlF45}qBF6r>Fayz&9yqR)I!1Vr>h6ggLVR!Cq_fTzG_2YHe
z#KvjqQ<i11E@bFFl^(2c%Uy9-#97|?E5*_ky)LcV?RB<o>Ji`mT%nzI=jsxis+Tgo
zd~q@4vD$KNQ9H3^m%M{zR71OkW1M1meM3xJm5N!{sid+mj`(4_*&u6LPUggF)#@*I
zQeI#FHko6-&BZf2wblr!IyeZb_Vg)bICQWGZeh%IkYN%%7+WT`yt1l~`JBk&693b0
z{@BD7z6xU5`eK_vpV`FEveT5-s&E!@Sp`p$(7k%7W7WD0=BYMbLCuWJ7VND1yMNlv
zS^Ox#wN=&Nw8*0uhYo(7o4@A_OY+~Nhkrf(-y|CAxizTx`F7Q<<)0nYa+jTzH@NNj
z#5n!@`MIG}zlro+Nc!67^23;|iFIORR_Dw|Ngly5dS=V6<QzQ_^Y9L<a9AHNcZN_$
zW$c$6_N5b8*Rwo1xzq3Jaf6u;8k2Q|&8nwODqi<-fy=i}?~A*Zr$xD18kx#*?)|>t
z)WqkJ;ktW0d+Gv<+=5P6vH18&o_SKy{^ZS@@K%p=rt^+C&RJ6R;zqN<dbZ9pk&`ap
zdMguL@?z4?38A7#JzIXy_6OZ4`Lt;F*Y*0_3wip@ugly2`EZ#3{{J8UUcJhbSj1tr
z`t7qNU-y0OU3o+L{mR$3&X#SxS*9T3rgq48_uZ1)CE3kQhnU|_sgybX`}o7c`^=Bb
z_0PrawXl!P&C8Rszbj+mHf_bL?hP5`H~znl|3B9z&p(X$bJbPbZ_Xm30ZRLC=G@y;
z_xIb|*_$ux`Rx9>E4u$~p443Dk1S5h_RqcT%DS9GFk*2=z54ZFdq4d{KTnJAd+D&j
zo@cwcxy{@6@AB>A>V=kE{(O1;y|{g~U!B?(KG)!jd46khUhTf9x!cx;_bPgED7vss
zoTl8a>Ek@(mWRZwW6d4RA3geHn5^7qzb&h-+H@~xkMjfro|@kqepl4)f4}@{)#tO<
z!M8;1w6X3@+*7mT&+C5sKY#!B_nX_wO1a7$y3t=7!sh?vL>_AzqjX$Ukb~aqxz5)u
zO6JYk)wZkj_PVI&vWwnEu?9S=m}Rom$K~CY&tBOAPNf=0CJR-tpM8DV{QlmbVonwR
zFZ<iqR@B!PZ?3*^RNxldc|EiB@pIRQ$370%kDs&G!al$D@depm|Ni|;+<t$vk(Cus
z`|*DQA=aNFx`pQ%ubcYVd};csq9+UAfA8=AyU@L1-R+o|eQ$QH%Z=Thr64ye@9FGo
zU8V2$7V9}_m?qkO4Q72Vb2jq!%iBxWI(5A~vo`pX)U=B8%XfIioa(4Qw<bl|H$<hV
z<(#mklLn)<Yi0brNi&vwb4aMX!E}b{^Q{jrT+TFWH70*j*3+?@#e3xQn}ypt4_%t*
zJ*D~OYYRV}TSqH<);6goZo0HAeP`vxV-8;f!yV^kMjq5qIMc%Mpv2Mm*dhJaJ=1)S
zH>;E{_`4@cZD~SD!y}I=GMpLD?6nwMQV&g?^3p%J#gXx#Vy9I|;@OFQ4y)#zY?(iG
zOH$s7Im+Bumdv>ly|+DVM$pO~bCm_voTg9uAM`OXf7Q~3t5-2kauJ%Atm+*3|4FLF
zvq^g&d@8c-wz8HAJ{a|6hf7P_Ef&SYPiJI1K0mXuWYen5Z>7if>iqOM>ff`|LZ2)C
zl*xIM_A@^PpA@B}7&X4wbN;x}j#Igo%T5@(FD>?voarg<vaUh1xSrcGxJ+}m_8w1B
zt>^nYPp#b`u3@z9ki1E(d)s8EpQ}v0R`$eS{ypcSQR((cN$#hbG;4R18Wd*s7w}mG
zurl~5t(qjaW@Td4qOIqgrra#(Icb$Rt1TmZv2w5~N0Zf+7CB~54j+%3K}si;Qo?>L
z$@4!c5LrAwsz0){=#IpH_kSANHB)YuwppIHj_qpEm|;B4@cetm6HDDEZEok(HP-yJ
z<Gw(S&LJJf!!DEOYwtN<v2p&j8&Ue|ZyVQq@(~XBY`D5))f`X5<_X%Wmy%+;k54uK
zcjotYgIdK$o8R5iztNdJU);%7H1eODiEDD8*QzHX#i6IKpV$-|>hZyjfw6PhnZyLo
zo}i284K}+t&A4Fx;-uyE9eqli(h9CQd()UqiZo03BUZN>Pw`#fV8q?}U{;&dbB8sr
zwrQBIWj=Iz^R*;X!HzfkPF^wMj?CDQDf{!wk|2j7uN3|gEu-cwlhu!Gs`kDd5t9Aa
zb`kTEo{y=~4h>p<fB*g6E&uO!+S6nAYwlLo?D+lV<Evl4_PxLP^X<!-`M2}7My=l!
zdVOtJ?Cr9-#d^y<wQhXM;|RRba%Yu}P^d-A%a4WS<=-oAd{68;ci5s;LUM}D&tv8c
zT8CT?9eOFG8Zz~79e>|1fwI3hw$;fviMST^W?!3Kdf7C1&O7<`e;*2p>&yTDFz{hs
z5LKy=;G%7Kn^CKaLsNm7zp$rzriGtj&%-Z|YWBue&s_JTpxE@@z4-b)ySf_QR{ee4
zemPt}zT{ke&5mUL9cBNPWS$E@8+lvcxT96Zp6uIfp}V%-T{rvOWcI+Bi=J2{hO(#%
zmOiPu{j03?oN^}5$A1q$-u>-g&i^}aYwho6tL^`N+`WEZWogkazQV5T8(+3pb@%`6
zlN7!3N&3R%V&hBJpU(tM+StX!skrM}`Rr{kWvmM1YQ4`bW}VphQn=TeY0~Q0S>iu5
zdt^BGxIFQ6mVczO`dF_2ZQ=d4ul<hgu(o+;U-#p~hTFZD8T!Jr%j<5=oA>ir;^Ttd
z-(nxD__QnDyL<26y*pq2GKQSEXu!+=`?#jp<sWY@%n`qQ{CMv&)h%4zuT1=Qx3}MY
z>^Ea~Xn9ptSy9=ovbi%pA6=}mDsaQ4x=C9+Y(l%W4*KQozROX#kNfS%`3+X94$Pdp
zTvX%qs^p*hi<zD}xlNXxcfsjm?5sQ21VbWDK6h+XQFVy=7AB?D{o>+BM(;va6;8u2
zDUZ3|e@C>nFa6Qxa>wP=<e)1yAI|@rb#rk*3jeCD7VW{$50v$Y#XjA$&t)zn)6$u<
zgEOW!h_E!6Of1^8hPfat;z@VPx71~pVj7FJG;hs0C$?ebFD2$~@s}y<-WC_!G&JO#
z++mY@bnSuAr!l6o%~6vU@rZ5}o3Ny@WK!}{`-vQL-sXOgNQgh$8CBiNdThqp1`URm
zluBDqz5EpEm={OIk6Eeux@NkzpQ?W6sz1$ZW{^?RtGNOvE-e%akn0p_`84lj6nnB@
z+K&~IXEf^hj&5t*`DWJBqNf@Qj)aD+xFon`T8FHo+(*lW--N_wPh(SS((>qvyg29V
zR?7)HUlgWYo$hL@Hfi0|JzWfzE8|MeiBJB+vC(s5MpDDM-EDKK<{oW08oek={f^=p
zCm!3dr#C}2u7xhYrN!vr{b?#E+X{!e)kb{tl|}S;)~r&{>5#~k$(ygmxMGF3*QsMk
ziGm)VCfF^UDf;=5ld8Ygsb6ak$Y(vBQrEcYN6xjN)C<>q?wmQndu2^x@Lbo3pd)Dv
z0-xX5s77lPvqda5U|M^T@#?DJsZ$x1+3I`NS8lp6=YRgocVef*Yu$Q-bCW*2=)PhQ
z`nPpu=&xeE8_!*=y<MIe$1~Oo`l;XTimEY~(HMOCdUIy1>0#qy8Q(&;=TD+Kl_qUs
z((XTZX{+kQ$1Dz7UHcX;%e_^~{ou-~;_5?%O^>@0CVowGE133paZ`g!r-Xp80*iwx
zmx7>GaK|lW6U)V}qLGvDgb5vFKgHSeq+`!#-lsj$>F4vZUv>&@4vXw-*rmC~Y3j`d
zM;~;Yv1#a7W7zn{vGM1JivNGV*XLHwirZVI*mGs>{FpVjj9uT)-j;iN+uK>&vTJT{
z+4u3DW%h21HiOno2~L-(p=zB{Gm{?}^d2_gYp<=p!M|ch?Alq|Zky<C@;l73W(CX6
z<$izs1dB9x2Qe(u><j)dZ_@o+f>r_#{%-hRA#Uq+-#+f{oxAVL1FxE>l^x3Vy65{*
zMQn8+*O5g`OqUNx{mQJlGe3Ub<(pyb91kD<`&jqtyYS%x3!dM*f4}aZ7xTistVfo0
zl3dECx~*BY%xAe&f26rg2z_q;`l{#dS97+r9k1wb;Y{?px3zChm&X)_KR-77e*OLZ
zzW*Pa8ESvu-CqCaaQXeZ-)Dd2SS!ZA{Z_yK@0W)kpDnn|a%Sn~(hfe+&`4L-x77lR
zy!qo97@N|b-s*k7ZQn~9D+UoIhjb72lj1X`{54jaCfmrM)%Z4e@iES0wKCrh-{|jX
zQSURq^!@PR$9Z?`?QCrL9!or0A#j_|TzY@h#PU}T0j@LT<=>eu;F9NSKOx9@QL(Wv
z`Tx7S{o(ijUff%;wM^5BabCdM!nS@5cR9Xx=llGJ4@}&fzfD~H_S<dI<-PeAZkEL`
zs&E~D;yEGb9djC+PfCeO{=(Cmyt~dgwFH0d^zi882v~BeHq>HPkG-<!zt9MS-R{9k
zR*Q@ojr3C1CoNpFZSMB-5e<<c>4l|H*K6a}%wkhiJrUgPV{rM|vkfA1EMNRwuvY%a
zN+#bQqHFv$W102~rOPb6Wtr{Aa;@l&`pML-j+K5}S1CQZvh=&lm1750FQf>vPcu!s
z*{&7V>v>G%){44woU>f-Z<}0T@%qx~`KgzBzHTw$oq8gUvGK#iPjS<aPMSM`g_9-0
zTvk=wFl5fPL)|WG-^f|YpI9?9s&ij=nc_@=Pf`vSW70Q!{mkSHIAG}N*cIybCOzQF
zvZMfp=&8%DrzR;oDk`{6v=rYFF~Rui{ZmVsRDExBteL5KeQn3r^(_f1-l+^irz_*9
zC@sBFU$&%UbG@5jc1^cX>eR-Jk1NvqKFm3sy;{yCs^RU)ErzPU%(Qn$s!r}Gy4&O4
zZERCCCwzv)+MG?@NzvWP3l1#r+jWq2I@hPC`Eyfdx=y+_xBi;c(w4L}&S#$a>#z57
zX%Q5WG`OK=m@1{QfNPOP1q=JMEKbeM%UDjF3%EZea&o%Q=H-=%pX09eJ!L*vWiv1K
z=go%6wf0*#s6VaouFX%{73D3K<@S1o*3M56Cbe0+JdHk_G6}bczIU+7`h)ixUYoTS
z``JG)J*7Q+)2WP*<9n|PJX)F?_fX^8n;C24roZ9(QS2`EDEKkwwMu{gwEz3H4wa=n
zv8$ZDWz%VW%dk|g#+i$zzWRMaruA*fc_GiZFUupBH*}pkBq%7#dHa}Y?CaSr*B@xM
zX&SBO;#h7I?pe7pqR6`|)HA|zQ)tyIq1P*pR2EFRxp4Lp=N`SEQ(CmkH!nINqc)wh
z{AHkP%M*@H!J~ejbJj1tb3Fap_N#jrJhO6f5$20@Y0)@w`0dyDP^l%~{(O4#=jhE}
zCT<GPV;^5CFY3*Y^4?im7S(-3!kOdojn=c%7Pm;uur%sVwE4pR+^Xf8>1^*M%if0F
z&Dt9`e_d32A7cm4_B?s<Guv#8KCU$MG%Pau5UFY2^6yuo70)@rBL?r<GH2W<JDB3_
zJhiCf*~xjj>n_}!ZLrCxMWFoglE*9lK1;c+#Anqfp=6LTg=1mA@S>=qnZ<ciUM_z<
zyX^7)y3)d1Yh(iwdRo?IuZRx3-Kgxj%Wt!XRZ5`i^P1~sYxAe7`aCxIX219U5B>k&
zuFKoU%#V%v^HTo*!`=65ua=x$e)sP0+5YeCYJa?Z{CoE9+qDu$R9$C%)b9VV>d8rS
zzsa1;7cB4Z%)7iT`+fM6Sw|-LN?-bNJ7r$)?Qf@-wLdufW4U0(q?ZEx{g)pm%KUD$
zTw|7-v;6M6kH6*j_dixh+*M!0Arm@tjtTF&uonhJ5{|Z2e22eO{BL+P``Bg!D<}6S
z9HLGV|DV?XJ#VfbSNi|w{%zBz%W+P7==fPE#H1}#qOGp}#`|xpcEznfA3vx3?y9#@
z*DaK;*obbstbQzWPMg5ui>i}Ogt(ZhhFUVPl$<+X6?STK_Upq2+U@D#=b2<ymfkvI
zTy%K8dEnWeLpJ(llCzAT-?;tqOxMdLj<4%<Pu)Bf<9y`BCW{}3)>+w$r`9zF%oTfi
zG4gm}!o?yfyT$W*9~Fs5ZTP-a@?@FD(a0GS{TF?*JDZgdF>gxJ0Ut)jjSs)P(My{t
z>|EOROP597^-})sz$m>rg5vTC+rpk375WL*O)|20{D`OgaEFRYXp#3hG3zkl?A)Vn
z9tTgJED1EWm>+T2=&fYvP9wkPvOQC3*Dn*eb8|x0N!_nDh4W@icPmc2^n^3Nv@(PD
z!}%h^%dO%~-3<y#ow^<#8xPp0Iq){5*i8t$+x2I0MEfZxqt&TAIgTsq-HX1dc7EFR
zDx>Yc&h)U)uN$?Zq(WnZM71@P7frhQ#Hd>Nv#ovq&4mRjg&$>etEQ}SXY<HyICHOO
zaorQm1v2SI^}HrWEN}h%A-L$N)f#uDApgdt6PiBx9W}}7Shn!IvCABGhMTGlkqwjV
zlCPc#NMdY15O_JlwfNk|2~mrhw#x{5uw6AVIwkQl=xO+c^+^+T-|~pbsD*7={Yxsw
z#VdQ?OXH=>P1erRlvaDWYSoKZ2PgB%vaN{t=ocuv!{qRVGY7-F^EQ^4`(!ti-2SP0
z?OElW^P9E)%n}U##~pU!8tZMZ`Tu9lpS>=5?)m}|g%3HIedk>-y$W9Q_I$|qUw4F7
z7|6c5zg^>T@u{uPZgf1pF|lOks??~B9NgS0ewQx4@9*fFQ_Lp87%)}CP@%Gr_vf){
z|6OeoPBSd}gw*?#pLcs`Xtq4F<-U3G#3G%99i>}W_Y2BcvO6qW;Oo4Ar?-fgahF6-
zXH%u*^q|t<1){4ietSE&j5jDiP|+d4T)TBz+Gi1+i4zr`G5EdU6u7|AR5;=H?{@iJ
zuRotXd9yow`LU2KYm=h*tv~e^Kargn_-V?_zfs{~H3r<rH4f<rB}ud=N*rG(&uly?
z`*z;d(tEqvb`)FiJuch(YSy_#jTPcW8dIjt)MEWP=jFE+9;+jo20e^(nORaxUPOt_
zvsO5A%yPGni91);(Zvh~l|n0y#7gYE@2~zgt96l}vNfy1S2N=htyu!OhM$)6hyQ(8
zQ2+her!RL_zW&~*erJoi`0~p~SI$#hCt$4@_^QFoC2pf*R>y(e>r|U8dO!a9_DsM2
z_wW7n|C<H=eA|8h&$Hd@S00vMZ};c-{rVpt5BrzzZm+XwvDj!Nbkpae<#Z1(vrsh`
z_7^QuPDTM*rgQVddf&@fsV-;G6qVt=;ILZ2Q+4yJ$}E`*fk&Rp?D~Ei$Q|0z|GT}=
z!iQ^P?85vBif?}Zwvhex*S70TdA}B8ppE10gvSgmk1YgEV|VAR*nC=X#k=Em=HDD8
zIRjcJ<mIoc`|<F7{lCNVPd^=cBDXCxIMK;TMR4A+jqlqJ=h^otGp)I{`fS<excpn|
zTwRLzgk6-hpK@|2w!AF3;TybOfo+mIbMmGoK_|Z@o|`Fl|LLh&pQ~nlTB(1EXHt&!
z*Bf<llMV$Jd@!E5=KStnp&3VWY%Q;KT@rJiR3LJ~ZAQXpgNG~yI+=G;&&>bM@uNf`
zN!0IJ5OZMHG3OXQk5_>eUstR>_GQ6>l$l#KCAl1@TXetDI9Ik{6T3n5+nA5%tK`-c
zT{bp|W_hzM*g@ga5>CNIn|7|}*4;co#Xs=hmLT=i_k9f~uVs6jT9c>7ZL{H#&R<J!
zu~$2OD)F(4hz6xNi5T~<VmQBh#q%<)82thPUxybP&SeFyO=t^QB7P|7&fd?9TuS3^
zsP{a3lXA>zlj8QEWg?BMy4!vRZRqYOQZ@J<V4!STXjb;@fV-n3i_u9wX3eJ4@{?6M
zctc#~)$HItcU}Cf;e^j}&YL!h>a^T)ut<>H<gYWsJ-c$Pz{*)x%uAL;O_W*AFrzPW
z?dk5BDl4P5WvrSMbU>Sj!IHsjt<b7X-hb^={nyyE8$>->ujtfT<8AvbDUl;Ic>11J
zi|?$TbyskIfAnbb<w*}GUb0|ZB2iyFU*Geg!Re(NwN^~>Za&Z>-nG$!i&NG7bCUvx
zoPp@E%r$ySr;eTSNvoK!r|3jo$=}phLKjrsvc<lNR*1@lPEXypa^{x#@n(^SgO8@&
ztZ$Uz^X+hQ2)gU(nYgs{()PbT`?C0_&s_W=vhJKymvVQ{a^;Y}h8~_zryr18#o<=V
zRs7p0x9-p4OV7=Z{9rx)u6WWyouak*#yWK?l_t#5x1H;}Pvx|s(4V*&mQR=4{*CiY
zNxzZ*yhG-hV6#i7GIz1Wv9zmOR&RS5b-(OhazWt{!7ECJ3pbg6UUQW(gt4#5&)~JG
zi)6#6m7SLAmc`s5p;yiCSu#C6pd<PE;T&cY-#D|Y8#D}*CKRt*xJ;Sh=}C*i4R2af
z%pz~N>L{(UbZeiqWd4ye`i3p9ci)!RJ9mFiP3_K`>rPk-_62@^EIhZkZ%=Vw(7(VW
zNyV9O0xV{kxw$xTINS3amQbpGe|uZ$Jb@`DRUt|_yN;bRI1?=EE)p0qbFz$K<$_5w
z^$NFm2)111tnhc}o8_kDl@j{3r8hhJeMBi&VUobvd#Cn1a?p8@<mgx`z^BA4%-LY^
ze8LUKJKI*5UF8Y<y?~K{(el*WtA`)Q?vKk2(DYf<vo=2Wy883w;_2$om*1Y;{du$a
z^XKc&>+dn{v@FfNUXpv;)T`*mclmX8|9<n^zd82sw*5cQ)l8fMbw57d-T(J%{{8=7
zUOxPo*cGg5bA8Eq))cYMoTaQ2N_EzBtXsG&d)BqBtB;w=O)%aNkseoEcVJ=RNl}gq
zlIP>*x=HYFte4^MKfGc7GP^d%WfLunwtu|6Idr?~^9EI(#}%?{%ehWHS&%z{W4D-e
zzJ+JhR`uu0_0Rk3hp!HOQ1GI?JRCHs#s661=#6E!euZAHxL@TWAh?N9mcf#ptNri8
ze-HDv-`8CIxo-E}IXV8<W^Yvu49Tl$v=n!eRZ{3luK!<f`+Idj=AQgL%U2ZZ&MB@v
zF;O-B=GS<(ZAwgT3)}@74(R;+7r5e`;#$k(ufg8)yL)TrMJBM>&2i42xJg#jQP{=5
zW8&v^ti>&hM88O`2(R1J^)x71{gTd!FCI~Kd*x0p6cIc9e=(=v%(Z6z$DZBVes4#@
z)Wvq`YEv&c`JQ)=cx)ZGWm~_fOOe#d=(|Ug@<JHTJ$lZ%YD1gt8E1nv6>N(hq)#?H
zxaZE{nG>5Emh6z|5Ibxba?U^b>C6OXRn->hTQR%^l9qPiYtF1o&CZ;BXx%LBY};d}
zcV~F)du0@;-yHEfeoa?{O3ph!&pSJ#rcYvfbw1MMQbPE2S5>(}AAwTC#%2@kL_znK
zj<m04Y-wRBOZx4<iiU|ME}WBbZSQigxk3MYPD-A5Z0)sse##|<kab@l|Elo~e$J)7
ze$`6<DWCREJJ0Aa;ZJ|+lrE*9r@ZPv*Bj?qt2j)!@#%uirDe|MfnH&|geUbrnfB$1
zk@oyh=dXA6t$IB#OS43ftMJeF%1yC`%t6y<D=cI!?NV_2xaQH??nvQH#+vo5M!pRy
zf?I{Trhe-6KWf|UwL*zQVGi5ZX--L}s!x90H7i=Xw#c?F@cza>J*D^JuO={D{&sfW
zEuO107w7bryX^bXf9ZBY1e0q3qpC@pO-N?))U5T#4jx{!|N7B{I<}Q^yf;3tVSOF;
z<i(i^IquIn-%h1p`R4xkdr;Uz{U{6jn-dq-@m>GGI!V)JX7j^K>CwxLL*!FDkIt@N
z{?RMag(advw<>IY-p&h;t_w^tWolO9WESc&^68T6NU(9a?(=rlYqQnMUSHUHwrj$S
znjS^1=W7{OrzOm3)O2iQ7SP(_@x)<b<kYjtLEH3R8VFsP@x`XaGm-1b3CsQ|YyUSb
zh;wm~;GE0uQZP?Is>m*ZXR(KAdW*~d02ME(sMmFx;(N^W*#o6TwYl08Z|vW{^Vh$p
zucxa&U%jkQEk5ns2PFshbsz7&QV3XNGGCBYr1X{G0Y#;Z8BNOWFO=Mi*5%q*rtn?1
z;7zQtcvF73Zjyo)pNv{!cW}tEGiSBZInRELTooS3+Mt_qN6^d8reoJS>$`aY9}Bl%
zuj@LL>NatHSk8Zk1*_Lt-p<)|PMcro_>$)inF?{+mfwE+?AOMAshe8dMJw`dv?uQO
zf5-JHQKGE&|HEIWEbo`C-u(Hq`TBc*9&X?N`||Akxb3s~YH#m+`*K_CMTz#0d3*Nl
z-g)=Exp?~Y{J3|^|J}WRZ*TSg!@vEnm-GJ?SoTLGRKfK*&wgdig}R!m!7dz)*(-0G
z++CI#_1a(er1RvvOFo>vbVP0HkF?#YrBWqtlJ`rOe_y%jxq^%eo8Z;FyK#H&&D$T9
zY+NW~$iU#mqwt7xuJikol{r!cE-d>BZ*QI)E}p*LY_>5&mCYCP-@9kGw;!+L33TdM
z;J3(2c=oaC=Wo`Ox~z~obi@C0-S+#oHhhmC+<x}wPtESTX0fm5y1OXMaNob8^qCo>
z;zNl?cMsov`_^~=ZR-Qwmp7lD+<o}(VaN37St@(>cJB!=h|=%$<jvP8-~2fJhf~zD
zvkhPLZ7y?6|4|X^@<HV1uQeR?laf@mIPEwxPWf$IF4?VdE-vbiQd@BypZfi!Hyk}U
zQi~$l7f*RwTK7lo<sYV*?VkfIW#u?Te{c725J=meCl{q9_IZC;&)-$=boeD%8QPPg
zxhFHG9CTYLW7vLEQ%=6g#V_@m!p$8AmjpzJZa%LSXnN9%<zQ%j^06unx!%vk2j6v^
zd9u*hAj*5k8>Q7J99Bfu#&4YWR!2a4$J9+tOQvv5nqs)q;bh}VvjgH!V>*Jbww{ws
z=n}XZ**uv!nDLWYrDFNnS2}B3BUT3cl%^T^Dy|ATwo^N6k*8AR$|WbC1>cjJJ~w=s
z&+3_etq$`=)lQ~ncV|16EKyQmIG?+Ef!Kx2-qy2Ir)I^?e)_(ZWkI0GM5AcW%?n%t
zk60>gx*L8gKZ4W$<hk^0e(&5(@d@X-0t5ZdO)j~h{IKxsVo}xQzn<^(^Z6oBvns73
z>dE!fn^|>OwbviDVGqA(vT>P@(xoQ{efu>9&o29@Q=z?D?1{{eE>+GGZKhgl9JG!!
zDIJ)0TEWowW9hShFFze(uGr`Qo2M>t_5KX6?Ei06EMuQV<?9Nza?jzMu~}X5>h~3M
zqHE6X6VVcrYuvMPqx6~UpH^L3y;|;l+3TKF=e^2H7Owey^yuHsncY{{hDJtyOmA88
z!+3S${NQ`_md9!@zLj~~ll|ROb-M3b`DF$34#>=mT$A$q4Zot&iJmz+<((dF4qi?S
zf)-lG1(?m|n#I1}Q{1ckJXW}}tFWt#L-komrW1>3!{Q0$S1K4D8c0Ug_^rKAWYD!@
z!sKO&w{M;I`}s3+a^WT&hADd{a_lR9;t<*D=sM}px+sCAAqQ8T6A%ngcz)*2l$9A>
zT0)24J^c9l`}_NGd+zM3xX3Wm!ukkD>OUuzCnv1EnK!&{a}E9@;MDBF#pkJXTvP3o
z`bv*C?RWqFJ^S|U*{^5s?(V;yEYUlE>YZ;hO7j>PDkcA`JUuRGcFt6;qv7ffrE^J_
zTRm)duKW0;mPbM|)K_QD$>3DCWoygz<M&ql{^a_+b9r2a(vyjg?>^=Yw7FaIdfU#<
zYRL$WEgQ``Yc6*&{9G1U+Be6A^RkHZjAI++`FGcq7nhe8f8X#w@5kfW>*LE`i>pWH
z$XLi6uPCnn|L<9q{IR!3f4=?q?(kjz?d8|M_kTZZ;5scNd)2LniA+LMw`2u~x|YtX
zc|L1u_O*Snzpr}UF5x?Q=WNo_TBCE*dc(gh5*AqLuE=T7mw3bf_wjxAdz0lWH@$Xu
z74fL$e!l*Cetr2)tB;Crmd%r#+BlQd;eyF;UQ=Ex|CZk7vt^%OJ^J(d?X#teL**a6
zE2y~h{&yjlV?^MQFn2ze<(V?=%||%qS$z8U@#C{!S=;X`A4uC=`Sa1yZ(o)ey9K*$
zU0t%q^L?U~nZol6{m%8(rF(Z*NBn&DG(Z0TxAJ<HYyZFA|L=aQ_hx};PoT5b#enUc
z9+OtN6||h2=(u!ZK*&6s!1;59uf3XG<<{J-ywywGu!vtUbZhM13rC;m`uKAQwzW)Y
zX06Rv>^o;PM}0S|Vf0)*kJQwjsln?v^XpY}6;EEU$m{rovs-?xFlW80k{9+{C4Uhw
zOR8ao?jQR#JbWDvomt0|<dg*qT15RhR-ZcYgpvD-@j0E6D2^%VTQtux7+==!Oz;d?
zwQO~WT}VusZ<MF@hEwY1N3;aP7yqc@=sv#qvsR_?+6Aqj7VNG$5%l6*%gOo)30aH0
zR?To?>-cmfz(KSBnBwLq+!b<-n>3=c>m3tsok*G$by6{{^8Dn|sT~Rv8iZDMNW9X{
zmGlzJf5e-5I*RMn(Iu-|l;d5jj$OEOUaP}}Md#0vG|tnrD#R3~tgg-D+!#_F;>)=-
z;$<+~s=I<W+aIm5omCmOE8xlO;#&t|u5Lc$7RDJeBb9Uim5K8o-uE_kk34+h&N&g=
zbA_8Sb}}&bU-{2!qRplG^YE!VHS^^PjaMHqn<f+Aknqi6lOWSM1|J4?FD?~_rWuo#
zO*q)}cEghZ(eT%o-b}Ku|NBO6jh<ZTzS*yMJ@>VDRjS{0XHq@dtFeFL3~RYF6SsG0
zO-s#w;LpnFxYS7d%t>a|mV?YPs;6EB#V@vhZ)>}5>DQe;NAw@Iig6l!DEXonKVh<7
z`~6=clRj%Z7EbMd<UMKiCdOlrr!vX0%TEbm7EIli@{4QAguB*HQ#9Z2aq-KrvF1IS
zp~vR;naQFv^yjTD_bNZF%?{VjoN-6wxWLp<p<KIKl~)d(ftnrpHciT(WR_29VRQd&
z5ZWfu;=y=9E8jIVkS~zsVxGd}dqzqMRsv!wrS~iqJ}r}SRXWV<%xY$k+J8-V&eM3O
zPVvCi|3e&?7yc?K{rf(Ae%#!gYq^&>quzK)ZdbW2Xj#&7ZgvApNUrm9fw@1MM0ZVF
zp60aanj+8Re-9sm&LUslZyy_b_pUwP9Jw!6=YMwbi>x{xBzNz&{`;`i;m_68XQ#>V
ze{WwXul7Zbx3}Fu^Lyf#IJJb24^Bj13%&P!hKNLH<>s4ld-m0Qe6%P~F*4UI_xjG)
zzh{3q>}Oi0>&bJt?uEQ_2ItYq>?&V)HBDaZ$(EVO%#!|0$wg4Kv%T*B<KOe!Ca%wy
z|Mziq{=b#KpRrumsj031zkU9_zh7Qn&U=+w@ZH|_+u=G1wr3}D6pdfbQ)t##<Xk=D
zft1LcU1Ga?qvw{q-Su(qVzwA=hcyQ*wN>s0hGcUz227mNFj1ZJuf;p({Gi`<KFZ!2
zVb&}nA8yUPboBb_&)a1y3u_;HFdaIwh^50LZbjn#s{0j1atpKn{(Si}eEIYEwY#{*
zW<4@kCc>F&D7@x$oNmpY-TxoHub*Cb%PiJ{$2>gT`TqPl^Y;9G1!`EGEqm<y%Q<5G
z#?RifCGRQZ6n`@~+J5`??$yyfACBkO|9@WpXZiiQ-@oPm|9hAJ|MUHx?lh0sz>;&b
zCm+0}&JdD%qM~}$5&57=-{!fbXLLThQu%GZsue5qBGDJtryC6Xt2ITxcO0Knz4lcy
zqp4UcN5i5{Ce`Pg54<^PD-@xn(`hp6c(m)+k1id?+r^AmD%y%0%>6s{mhEES?9}vM
zr!;L6>oz1aet7cZkQS?j|24&xjwj`uwnc3ZpZRxdXkyUy0*m}>8|AJ}+i~|#nc71y
zE}KI>5{CCz2?uJQFIp1fyUTfws&>?D8SjWR&AFO8|4w@4<?`T!@lJ!(8%k3drp$c%
z{p|eSbBxTA&vf0B9RveCC+@oDspw@Y8lrRJ^UO^%HZK>u>0~_Xp~jsp7J6^=ca^+x
zoU1iQcFH%6bJ1F%Y$o+hOqqP|YqgRO%Op+R?iH~wR?O6KVXsKR<-!%YjtgbN1=SK3
ziPW=%XrKL@wJ0X^<gT3Sy3c#XIuC0V=)add)pAep?3NrCF_*%ZtGvTgEIT${OKs4*
zRlUOFR=~qI#k%_bk;(31N1~3}C>t|Q%lFL_4*uV_d9~*9q|Dey;XPFz^DGP<mMl=q
z6ytDkDOklfAw;t>Gx+wDjh0h98&7M$=zQQMu2pn_^>ad8?2CzqV#I%m%!v%1wr%O=
zbBb5>X5?<+QfkPoTi<nH4U1)(e(fI4H4#T1dCp(F`R{X$x!(P+ey6u;|1kb<XU)4$
zVS0VuvUTTQWbL!rd3;y+kzK$3ZJPi0rtSg#Gw-f0_ilcyT6&LLdqdWVq=L9T4}Y1g
zc3IkU)FAl@=jnEZl|ub%gl4#x@tVb6UL*CscKgjAX2&#JGz$b$**?#V*u?5QXNHaW
z+Vc{xe>%o<)U04|I3%F{abKZaz=YP^4gFa!t9}}7v~~$}@eFJccAGrKqv%EN6GySo
zvh_C00=E=11RA)eHNAW$C}8pBcYC?~z1oV>%8#!~UteU*3fcCw?O`hCRDn6}_br|`
z7})-BkyY$;=uz-FXZc<-WD29hjMnCl+L4yGZx?$-+^SpHHRIi;4SpNF)|7)!w0i!2
zpXL2+;n(FKUtP|jqbzKZDDt%7lZ5*e0ndb|HoMMfH(IjK6%gEY)$7`p;};gJ3v+p%
zofGp$);w>oRrYy3xp(EsEs0MgmKshs;uK)wT58ZZg*98p{ml1|M-I1tf6u>rZ<a;v
zM$6iryzT$=?f?FmE?@Vfr2KY!<{o~YLqfS6x}R@|b54}8d@tbX9FS1jwBqck(&+uS
zw%uKp8+OlhiGZNxRe_^?%e}UpHDge=2+|i!x%AP1clXBo{R{gqYZQv~9WtDFg&`nt
z%9K5M`nx`d$L?ko&79#OmGsomfKkz`H#_>%J?l>lYZvEUk6m|NKYaD+ysf+wGo>b_
z?UHfj)OKNc_wL!d{r~I#|K9!fySV!D+sm@&+|CP|o}PaD>#mPWlw_1G*_X`|Ii_`O
z%cPvB`D|;1_#Pkr%Ios)@cVtg+wK4UG~fSoTXb%oUCeTY>_D9|i|1=!oyuGrbWwyS
zHQ8KjwsyGqvwu~^Gelp^dLdtynP^*SY1fe!BP+V<w#>w+6&<O6Pj{tQTxprq*0bxp
z^_j=*me+Djbf(^!lNG$&^QF$`8N4f_X09p<oiML;>7tXjJFRLj|B_j|vi_IL+l>tI
z3QJa+Cgo|l=<=A%{KONisJY9l|GDk?Ia4B9&z>t0oP9GY*u1%M(zII@E)FZ0y4VhT
zZhG=_^|S*=Eu)MI<$~reQeA(y!HDHp#L+DKer}&dX}awT=LAgjjbv$&I@t9<=)8*2
zbzaY`>2oC}RV`V=8RSxOUiaT>k%x1dCV!FOe)lbiH724n@+kjT(|rw<OCH(ZT{6qu
z?xe-?83I<T4rN}ky*@`Z#mv{)wEh0Hkfu}1o62HE?BDr>ANiDdei_qQnQtb-6GSF2
zQkv!9eA8)$s-D>-4+CbF+xF33%XrVqJoU3xUADBmjl)r8tu)Jo&#MaSCI=U|#BL9e
zvR>-4&1L6IXNQ8aYuB_s-|AlT>&m<MiY1#1AD{HK$ewjsOjUB0Y5mqcD=w=xU5xlK
zL&fl?ld3@R>}D(DO2!Gjzc<_7VG0v@b)#4L!I|`>Vy;)WFL`-jQdz@~lzV|H9hPkV
zMP5@S7*7Rkw-7F$8YEE`<=GH)lR0r}fvJ1EV$=Hg)0f^Znt$KrkJGv*=R-ck2c1{0
zJIM9z%V|B~$KuYq6^}muaOGwG_eiKdabta0^DMh$F<zZOm1jXh&)FTf?lV%EXKVSa
zWM<{Vxwn$HN|cK_o|X#YJo898FFbbht6Qa~8xKnyGC10)nq<`_IJHsv_qCtL42vgb
zoLj8pyLNNq*`u14>7EKNcP#l5<I3XdD5R5TmKe$upz=x1@<>tK`DqSo?}mK~QB_Xw
z)@u5}b}{kNFD~wOfe+7~z54Ahzkk;ro6Ku+8&p#QKDfBtGhdfmb9vJDd)v=dt6UNZ
z>sZ1i<6>d({jq`HWsxS2kdBTGcXK9`^f=qM8Xj0DKk3Vnj`<!MX*-T2UzxbEbB_JZ
zIeTh<zWRQ@W^>-u!~gys)~$3|*kUar+7c+~a>81P-Av<b*Xa@uiJ9kiwrARZx?%N(
z>25nSla1Y-ud94_-!y(JsciP*h-F61?Ftc-Pk}DiCpas$Gu$#$(#g57#AJQqmE(Ks
z{(rpt^tABx6Nm4<-T&wI{<{Ak59hsT=E`8%^w98(e~prpL;E>lp{@-Ki-WHRT>kkr
zbGKb?jr)^}N~@3Xlo{U@cbq!Gqq9M#B51kbV~t?>_PbZ!AA4xv>-<DPNSEQnozyAG
z84MgJ9zWai)=wrt=<)TF%UoVaU#}>7!YO@!&br;xS6}sh?_aw)cCEi{yI$Wub$81s
zO<ReTnNjWUHdx;=Tz6pqjd}Ct&6zVd=lbe((OYGf?-QS;c7Myc-B)fGx-d+4+F`;P
z5_w3wL4l)S|NYF4_3?Y|%*)wd^|R{sT33fi3zxH97Q!4BpUPU*K6)!nR@uE|@t4Z9
zO^eR`(^#9k=IWah2`eJq;wS2@GB{TykX5<V#btuajm_yITNm89)TXN$8hX&`$s(64
zOP^e_(dx*^J=s(jlOo!=-Aq{a(>ez&-&f)eJS!ia;=WMzHbBVXBZu_H2`>8XE<w9a
z$wbVr71c;R8eqqyTlN3>yoXFntv0q-xTT7I^Dj!=5t8qjc}T}tNJj5aeplt8Tl$As
zTa+SmJynn9tKLuFYC1#H!m_)>(l*rR;<A@U>P2(3PkrKg!<EwfT%|0L^YIZKOON@A
zi>9vhE8eFa*0VU~l=>X!cO@oEm&&b_?UVHJU&nbw>S~bhsU;pWR3o>DaE4y`<(c~-
zb>GV6iPpxOIaU{+e&CRqmZ)EQE^qbA#?uDZCEvxT{(Y7+Q|7$ZvehRoC-0QlI(1&h
ztj!0OY1~_!Tch)DipT0RkC*H(<O}S$!1hpMQX5a^`i(~(g>3n)wVmz2)F{=hk0eb(
z7#H__(Ar$~Q8M#bQFe=e+WOGe-WkDlzP*Pt^SXE3nHG{Ec(zB$`2~Z;ktOF(eziT9
zsXS9POG0JsR?A`+ZM|N<UoAHl6^C!$QIdT&#VqscnG0HHF1%<zX_6!~=ZNT`CHwvB
z603Kww!L>Kru8V-gVhR;DyMwf<M=I{GwzG=y_@Gnzpt1td|dzd{|<NO-^>5U#5?SY
znElUc<+-M|pOVRrdl%YPt<wqHzG#8YnMo2;Um68nI66(z<ob8b0|My@Q*4!T&m>G|
z5?uPZRq%wTgXmL%q$g=lcjs=mip=*jPvrQUC-FU4qFE|F&eMWx@kKMnhU6{T&MLge
zuRPLllvJ`T(tDX`Qe+q)xKQx;Sw#oF6Pkibnpzg`j~HK^Bh-JCXX=;w0GryYF1-#e
ztQW1b=1eLG=uk53YyZnHf3NoIud7dI=kHqgHYez2MTy6~$)BItOj3V-(mYJ&rUFx|
zU~Ay<%surL6?^J?&MdlAQN(xa>ap#5AGX~7zPq--Ld;u<NnzKVnn3;+C$?pr+j;rz
z%lG%{KDV#mSGm`gbB}v>kI=T|8?6jx#VI-dKEk=_V6{}=^C?^kDFJ<jU52aKL`7#r
zZkhRQ&*N&*-q`2o=j6{iTlO|u*4h53r=X+cluoGy$1M9@v`z)<ue`#>+LydzZ|DE#
z|2gFAe%0---m`Ppt+(YDBbX<gpW~zG_a`fVlVHc8MH8hmuAP0hBzkVi>b1++7$O`Q
zEY(-c=+d^eJiGVd#mNFLJ;MFfa^Dhfv?rSLwhO4Au4XoAU^=A3=)X(2ly9!GAfIr1
z;;e6ii;ih-a@{ScWWsTGm96c~xH<Fp#jcI($lGhrc-{2ws>U*DzSVQho%V9vUJ^Vd
z3UtQIUYp$&wLd@2HqYN*`T5zWHzijqlDU{Wyb?BW%Rhf<-Jm|*gsbI==J5bVZg)<Z
zLup&HZnJeRQ<RH&KYQDK`C|*k6Ma4BPSCJzV909zlcn8}k@HyV<fd7B7KBMU{wz83
z?#9j?*P{*uN#?F?*sN<|mA^*slZ(H6YUZuXKBrcZ2K83?c|wn~YYS_437>2FIBUz=
z&4xzrzZ=SyTdaQ4&AGlIHRszyfy}n&B1w5qMPue9ZT+YA_P$-c+*zfXfY5UvzPw8J
zTX=dwhQ*_u`?d1Je(4BsHcmXTrpSof|1~e`v#AMklO#NNav%9kGe59Q?Br*wVga3;
zxUUSQ%T}zqX`-6iwfuID-eluFM+1&V+;ExZv_iw<m2K7|0Vln;pFbFOc9>r8eYVso
zKxFEgnN#l^*_Ev0e(tG}L*lQA9IjtBGjClaf7vL-iffC{=2b<iONtbpcRju0__%%2
zSMTF~`xez!&04v1s%kBpo2+Pk#L5XJ=Z>%b%e>RW&mzz#f<xddUvK<^q$rCdPxJ0B
zrHl~I!VO+x#<s?NpRSzH`Oz|C%iazrEuBR(=Y%UtX};d^HtbxlY-)^-T5(|6`$I~X
zJ&k_2&r=mmQeAN1vC6Yu6IXC2t=aYO6YH~&6>Y+D{C61w&TO3^AGKiHPSdrrs*f+s
z@k)Gnimg5Rg2PgcmtS94Rj_?Eil4Hh$GL8q*tt70GxHTTYc?@$ws`!7yMa^R<$21w
zFE#!urmid2eq!N{p42|SaQ>^0W^cvMz7efwoV)B-n(Mq}=c}#i&dFBC+_C>D{zvKI
z3-OoreFuckYZPtS<)FRKu~g0bUQU|uWR>I}M!Nn7SEcy8sM2|PBU5kPD%PJdXVN{>
zxHtlr%x9Q7VOQw&-0M&F<{wME@BddrGD)mi_Ki!^f)q~E89qxoTG+xKYe+QZ%UCi!
z5BfTd-^<&&#8>UCz$E6|tjvo{p4hZa6t32r$f#ZY*C!;f<tL}E62rMlc9&Hb)n>^&
zzI*m>{QderXOFJ_JUyLv(sw?Yrs5~E3r+^l`77fv?R$ddip)+c-{AZ5e1GkXl$DH2
z=YC$Bzbkb6y)CP!R^FU@nmga=wBGctkp2WN35WLQ&!6xA|7`pJKX2vtRsT-9{eoFO
z!qli;Lh1Ij+++3=7teXpYUM2++HDph(s)3q)wEG$ZJ>II;R6jp!2{dRZY$ikFw}is
z)ceF&XFr`XWb)~lcjD)=+*=Bb%>IkGqy#-!k2l`mFK@T+f1r*`L!{ueTWw#wIy-nS
zd#yU)ajP`1^ir;0*zC#Ywr5yXi+LLHUQ;<LV(rs#!BRnpsm-9!;z*+Suh=<Ne9bM2
zZ3;rcj`6MAbmV4gNvTVIyZphzQoV4Nle}ryrdhm#bHj5k%->t{xBdOTzn@Qk-ri@{
z-=FyX!f}m`XZGI}jNb9L->s8c&axq>XkX>mS6}7pf4{xH{{EiYpT9o+$+>IB(WS7{
z)A)45VwM2^Pbn+`ekN8&CGR-ht7MEyyk0i3QewHmN5609`pe79FPo@s%nfoDn5ezt
z0;{9C{kF;O3}V7>h5nXoS?Xu+x9ZtKo?VwVc6|-LGGT{U*{Y5gYrNf8RhCb9{{3XX
z+O|cFJ}>9kt1SxICYHJM)pN6DoB8Z`XP9g@cCb%BKTGvcWNyFM?g>6!KUxd*L!6e0
z#HaMKsAg0}_FVB=XjAGpf7jKmA>SB{n0FhlOwf*EyY+*)xZNyR;@O?^lAm}ZR*L0z
zo?3M7QmU@n`W=hYC-$c)ry8Al66(28W^#wZ#A#DSGwiJA&6}3_C~1q#8Af-fuov2A
zz1vRtDXg?wBUAG-DA2)WdFphflNKL$N+uTlvM|!{kK2Fcb7XDN-z$@kEa$Xx-P!Q<
zj38sq=0?5Kr5vCAI5)m)6iic7PHL@WbY)#Bv;CyD%hajTUuRCdKZkvabCype?@1N+
zm+tAl?YlemLPA(m%rv5}c%Mo$n7w}LwJDDtyl%|gwQ6p$MpJ@(>MDVycg}2Hs8Kuj
z6l>$m&lBfMZ|-I(+9oBiWQwYWuKI)Grx9@~k3Lp!DOkBo#WhIuke0}MPv7<K3m5&Y
z-Y`r3a?rY<8y!FQZ+1Vr<;P9;{NSQFsj*kz1lso}t_-)?^&my8`QD>XHEuhnv`ZzN
zi_~W4d6_LTPwBKE)AC}bI5w4MsoG1w_Qo>QHMXvlj4o?$TJ-PM%)NDtHv8vneqB8G
zUU92+-Nj3D``KHpTD|NyOZ+;z-tDX8=iJ0UwuxSub<+-i+^ll<RhfrcLdo6ULx<wl
zFJe)hZxML>bnBcYvTmWqHg&VIQdMSoFLvR)_5POy>-}x#cBj3L-ThIvxAwsSC8ddu
zdmEptzW*&M>M$YZQwUe;2J`d%c1M`pokOC{Ec(wWPx6)FWj*fVrMK89=+F+aqc`^K
z_Q<L>-gkRJkc*N+U~`;jlg4T{$zLxIFTX9nukz2Uv*FWUuPt|Ac_g!dx!~G#jQ}S1
z^yaBg*BG4^Vdh?ZR)kYTWmBJ{?1KY$+AVx0`u>`AZTl?m2_cgf?!DOHaAWPR{C&S(
z+5i9P|Nq0Qw>NKb%O9H8-swEy#3IYRhA#`w2t0Tut+4UwB&#boj`^ubv0Kc1_13P8
zg-_6SN#JC~Ynn?G->ix@U2sO`=Au0+P3%gae2@1Qgv@MOwZQh>jT3G{&Ni2(q)IL5
z{L#5bxj%z<_VTi)X3tlD&3qlL(&eyiNthtFvFYWdwKF)6IX!4-Ty7!zsG{StB40J%
z-ZsxlBV(><9j7WqyRYW9Xz)lC)O57ZzOYqGW@>Sj){4)4v&8O1-HqJ-_t)3k^)<Es
zj;)M0?Ki6JG5mJ;hQ*HkyZ`_A_xS4X{{G#^4K^}O%C|AO{r21H_4ocg?63dv>gnpu
zZ%^*ZmYt|2{l`?)Ib-cs?do)|Bb(mbU;XGs@L^sqt%*ye4_mN_$sPN5L4J|r4A%&j
z6Va{uN~^MDU6yHctXR<Gtm4jdF(yK3(%hA$-G9D#PW_(zQbp{@&dVxuyaQQOjU}RL
z`+sH~b-H_CPo3DbmL)>EEDOIx2C}h+-26QAa(pD)rZev83Td<Eh}|vXwbXx_ck<%~
zasLUiYL!wyws>e%J~}#WLGe;1W5>O(0v@c)juaH`JoBnyO>CI$y<2LxC#4ooo}?tW
z>4siypW4j*n;fRT&rM?T{9Zm!wS#NL=~=yp=B|l1nD$`Zl)Vlo3}<b2Y1y2|&dqRF
z#AHg(;-fi@Gbc>mV?Co;(APj+W5T6rtc8kfhu`QP_X^S3oDg+#siauw$LZ%M8LT;J
z_;`WPQl7%d0`FHQCK_>*MW%I&9pPqZnbLQYsc%Bi@=0@zXYuH2<jmf3=B4|Z32V+T
zxTE;7)GOv{&HvXv8@K2CT&?+{A$#J$&6kJ$|68&OX(}3cPqT^K^QxwD6NBOkk4K)1
z4fN|oTJJ1b8hwQ?Mv-MUf7j8?E1&#K*lH1zmbXgos-LJySC(6!Q`KkD4Mmn$lmETS
zk9efLO6rHlmWgk-EQva6|3%}_?-N42t`8D~C8Fm`vvN-jvS>bMA^qBwv(fRA&U}U6
z$G(1k7gW3Kp8u7*TF<*T-GA)4=i|hOTNd@-3XyZ^ximM$Qf-I1h=9VffO|ff+ZJqo
zQgGsz@_d&f_l8+M&wjc#&Do@XINkc?Qa1Mf2FqV>82*VLnpGgEQ*`87%`*MSb4%-P
zo{8&Udm#SO)9iz5Uj|-k?l`8j?>x6}?JfC-#)*5)R;jBP{7B}CVzZt+SJ_Ch%XV38
zo5bd08i^7oefNFzy>61b*~<2#uK#0^!&ACuY}s>p=BakG-)e#GVhb~_Jipd;d+A<(
z2ZxCVO+ETzlC9hgc<i;ASAOqHW)bt?V%)OPQ7AxAVmGtQERVxRS)w5`IBl(z=Ck+~
zcJY_@+sEAf_pkiVtDLuUtWt{Zm6WnBnt#tjG*nSQXre&UjHDj3Yo{!Y53JcJf4p|b
zuIfEI?PZQ-*sEn&Zgc40;lWwjaQo~x^Z44ov)AvpvcI{a?XF!MZ~N?p?=5d+l%2cj
zyLd*VR<7$uA5jwvDXCXhwnE%~2a_%YFPfzqQki6!FyEr2_x+yZY$2DqpNhDx?06xt
zVwu`B5f7spcCo<1YyGq8Pcv7l^1rxfdSl(+S6gD|Z`tR1U`1z|`=P9fifp?}I$Gqo
zLrt<I4sDP>?kw}&=Bp4}?_@8QP6ey?Q|9OH>5d9qe9SS$;rf<)6B7~=MN7`D4a>g2
zX8DCLlWTTW*4$_ff1Z1J%k9uF9P`($=lj0hrK0rn!*9>(UNjq3eR;xp*Q@6DudB1`
ze;u#?^|yTg{Sxi>GE058H?5S(4rrQYu((CVJIq7WWLL_%GDCGU%d2HeSD#?`^`T-p
zLyi`&ud%Vy?E8zgES}3KPMKS{)hs~Zr00swy4!TRL^n+_6J|Af!EQMJqvyups4qQi
zA+L`qoMkl#-m!6(snQcwqb#mAPY&TDbGSnK#8y<9dib~-f4+L&WlrJeNqfX3yNmXG
zb<>e<)M8yR>!$v4ugxmq+PXpWzJ#q!nRQ)fUIw?kgW}HfVOL+RxoFvc^q!HE2InIe
z4X<y9HgTq#m1a)V78Tnlws&h~`swZ40+|*xPnpi!9kA%;EF0~ejN%%%KJ}@-nywe`
zK1(4X%JGSnrL<jxM}&Zj#?Qj8{o)fQ>EAhWcMg|NcK_zxmX{~?iUfOy+ihRDUSVn5
zUw^q9Mi<*9b!KgSuz6W?TDgwk8BeDGm47GyHd!i*avJDf3Y?$eHO+L!v`41p*IG1v
zYc9?ZOR(0k>D}Yiq;zUh6_d%4Wl51QjX9UUQ7ty{@ajJpdC2M9vVS@aEMiNd&8kCn
zXJ%FGwmdz<zH_SBfdh|pDz7zkT2}0c@b~fzeOR*U!TrS!YkH=J*=(}7@?Yumyi+QP
zPx>Yup4YKdTw=!0zn-R5oBn&OTpl7Mci`Cdm(E+iPFX2b@>0DjPi}20+oGl3jY`**
zT14XOV+3DMKcXMC@zN(Pj-y@GM*e)~7HkmNVp%@@rI^2lqQlqoJvv%{f1RJCRA?yl
z-_l)2?~2)Zzh~Kxi%d5hOyAb&Uuk_>mW#<w_06AMY>$jX|6TU1|0;JT@KU#bsA5-3
z#!d&NPdP<3x0;qJc)HKLxbVrIl|7eVdS`u{`J%(8>|M+zlchS<i*siz^LeC^*mG8E
z+v;=ct*)CHnj}i~iujv#EESoR&TM~L*2s8&M}bJ9qWijq7Dp>*DJ69Bd^1pTEt=vX
zY?*n<^np_Gy^S-vw@KACeDAXns_fH=IkM-}MBxcL6qg@p3@v@r{#}0Ezfb>O75#o>
z{`~cvA5GJqFFZLRNg_}*Rb-mPp$q-B`N4e8Ww};xZ1yvhS@!Sp-M8(tXZv3-KP<xA
z{`}M4MV~jzaM_<}J05#|?)<r?bMLnwzIylS?$zJ#*Z=zP@L}GQ1yeFyL$d;I_IG^s
z)_kz{mWi_FBJC$qu8Kt-n$wP7lbYdlomD1_?~02m({m5bE!nlNrz-a!7CA0)Od;k@
zNTvA6vL6p8XGl6l*IBqMU&nfVLHy3Nr@MOdueHpJlyuxv>~bV9Ss=0Ch0Fr?tdpG1
zJck-BJ!U+*;{Vvrr@1XoUHcvLtOx&2HVO4TmlnIlw1R1EeEhuqJ8L!yy*-|Jy?^hj
zb>VkkzTEl!_U+$4K3@JYz5ajQ?qAn^i+_Fkl2cQ2!`9|bTrJaquVuY2&iU!LT(G@Y
zX0vlya;f4j^Zfh!{=RxW`EszZ3a8R?iSjdv>)4hGY&dXqQ=!wOuGdPDT+W&$CTn9(
zI=;SIZT`um?SSQ_n>{8^=6v_%m@`+S^QWEygV(tkcPqUMrl@qdi*!wjXf9>Q{QBj3
zjYP_im{}IgOGS3y)C^wrVoKta70+kPi=E{BK=$nOCpNb%-d%iFESJ8twpa2m)4Vky
zn@=ZZbv-eW-yy~vcXYe<X@!8fmJ37LR=ZB#@IqC^ee#kv&Hlv`(~hZxXeONWeVie8
zQ&VALImd#DA~QGxC+>LBk~Ot#<F!<C)rBXcM0_G<Ot?P9TQuDFxR+}jpZ4mYrC*At
z9Z@-DxhsdSQ%h_4nKf4?J)UyUU0S}_>qBW4XA}2PoxnxwS1_v|*tt_c^jv3arFO<=
z*;(_=cHCJp{nYwpoy!V~JMT>Lnlp{TYD%h3Yv=@p^+wAtitG!R>Hh4M(;Gd}X)1Gc
zw!Pu%+cQI2EJ|3+sF-Vl*8aack4D*^ST;#`wU?&Cv304Z8T|HCd#Jd3>{ec3q~H^g
zf4xt`MU2JkqCZz&A8Y8G#um?e+11znq-l0uT%>*N<)yoQt=qU*BloFX%AOecJZ8d`
z1ih@7mPJKZqn<DImz{RBl2`S1spG<fsfD4XZ?;Yq^^*Cx$Ibgyscv<$?oGZGPqh_n
zCZ~O1*}W*z-c<Y$t3ccQ&`mapI(iQR42*>~Tcqzhy7fcfGfDPkJV&(tSbYzad*$ye
zI#1-pX`v@G|Nl4H%Jnhb`Qk_K;Nmr3!yVE@uBBUA&kwyTBmVuz>hH|+FGl@cWnc2}
zQM%`81;d$Qs$rdanheJrKAx+#pI!H+*u+*%amo2f9GoJ-zjmm7U1C%$l4uraqh+Yg
z{%k?8{B2S5+{d$yZI|=ln7TaBbP;=qP-y3=z)hjiyt!%HG_MORi=1?_j7L^;fka@J
z(Zr59&Yf!-))lULbu)36%#A<66OR`Cj%nWKe80*`ocTy#%TWuS11p5)mUlGp1+o}=
z$mH4A{<>cO`}NbC-KV?nn{)U|ZIc%FabV7H+fmSCcq~(fzy05a=K>rHKl-sGUCHFy
z?fn1YvtPga<@3t=ADeU<a+oj)XsBkTdy24p?dX$n?@&_uWANtxw~vSaJ$!bY|GnLv
zx|$ode2*j6w3j|8?3%yr<-}D5n<B*u)s-~7xePWI8a%ro;E;N9<D3^4mcFc<^wWUb
z&C~O)+oC5LhZo9!`)lG;8M&`!&I+5ay@CrnJ5H?3UKkm?J#*I7&$2tkCM>cv$;nXQ
zkqJ?*%8mM@yF+ZQ&!-O`3{)DeTRI<~xU5j<!lxiORk!%fO!s!X`P-VOz1wkb-oAgY
zUSH4mbGEr-AGh=G;?M8iJo)nG%b~>64<8=>QStZXxBonTzdybX&$n_)t^0j7`n~jh
z6X!V_vY9w6ckQsS-(OQ$`u)S>-?zU$+Y+05pxTA!P%uw0PjJV?nH+n+=U(1&Z?RTt
z-hTBnON@?WP0)GfwEEk7u{k-97bZ&lc9u_`qwT?})ZU?!<JbOX7n7)(fV<_pQ%4*)
z{O9ec+Hzpcaru^Fzvkkdj;1z#=e}>OZQUF@`B`F3-30YbwVC>FHeHT<8~JwO#-Q8h
z!_Mp6UtzShsA<*T^?xQG5I_Cr$fA>~&QWH%=G#PfN~QO_$eD8Fk*aQ0ugP@H%VKjk
z^B8^FWAQd3h=J+Ga<^~4gx>1-yZcUC=E;@(X6=zg-`*xMwwpm~CvUIxnwFe0(=N)L
z;mPbN8aMMd_iUck*?CL&Hdkc2e6nOB_miJ;8+vbX9&Cwh<(}g5ZAMv0(q%a@(E>RQ
zb)z#McJ-Jmw)<&5d)jwY=&7}8wOGieJfHqgE4<#RKMP3Rs=zdHO5vQj3lu8t)>)P+
z`lksWQ<(Ul^R38=CC)4Jw(rmg`m`iaguzx*$s(#pw*SEI^O_v5mPH9p&kVCz9;>J6
zxA~0vOl#r5_M8>`3OU6F6I>oQ$V_#(`XV?ehFL(dlc{*u2QSO|FH#dcj_SnDGF_#l
zxm(CMU)E^-#v)GTWxXGacL=TJnJ%e5`<`=S>t5xJ^66UTtrF`?9)0eQF{_H&ohdr6
zr)Ki|y?JJD4y^V0+?}%MkiHhb4U@^o_?a0tvSAn2+^ssKP{DQWpk@CX@!k*5J*?*5
zz4UWsd#Zk`>(aSO-W^<%TQ~ia6G>U8Aed08`zr9wmFJZfQK$LS7ybJG)}AY|%8&c1
za*^nP%^|<zCrAGN|3ki$-`VQ({xw0H+m|f;C)53X^}=YsnECbjhCQ2?Z4qMaT%aVw
zDyygz9of5Nl~8Cz(5*yC=9Qn82(=!s%Q=>Jx8(M==&io5ca*-E;>@DZCpEEk>dazx
zpP7woJ3QvK1WKfwTf-#C5XR%h=Wx<esboe6cjl}03I`;m7fG&lobjRlL|506hBJp;
zdlKr67wuie#V#OK*8hIr|J(noK0Y$fk6UwYJ0GJ3SCe{)me%AWg;H*Oj*%+I3MG~$
z&S?v@n6-Q+L+jIm7xuBS|36&*z54iX=Q$!1Oy19b{W>~y3TNU~v-H)W&u{-TxZD2k
z-M3%$e@_4Z^LzKz$A91ceOJd(E4NDLT9l+oPsWp+X_ZF74Xmk!o+U0lvr>PZN!u^P
zR$`gs6X4j$*dw9O6`0!fc%eLByFnY%7rQv;he0lB_NI!JCAz=FW}B8qUtcSIU1^d=
zmzK8n#La7xI!_5tFkAk5a`^Ie>FY9Ozu({A|NFx~;VSoglZy`ts0XMe_#|sw)1Ov+
z+(|ZAYv-}}Jr!TS&Az{{w)!SN|J$tEpGp<)x4rgtK3g#(KyLZI`*-j2{9bt9_Rr7$
z`hWiafB4t`dHmjgySmKN!u|hCs+)gTe|md8KK9Q`|Nk3ot$$D7|L5(qM4O(&$8SeF
zruMZL+VnWv^R*{(C@L&0-<NxPUFNQ1)r*b=KaRb<<89U6yf9u<J<es967{>MWG`N?
z(Xi=+MNj)-#kLs-6*#yw`EPJ#FES}U^EmOj&c!gU)lV$bp6ub;TsJ#S`_-I$*3EZy
zZJ%)#mR(Y^aDM$%Wv+6s(?W+1J-*M8XJ)SL;V>>e{EqQlXtb;M`M8u*K8MrOHFsPH
zUFB8ga_(+3XVktuGmfTAS#Kok^L?W4|EW&u;?pfJHHZm&_#U}5WwwLM*DsR6U-N(T
zbqS@3adS^pJ-Li;>QW^QQNuH4`Oces?<%Raeu(hxzPje}rDYpf+?S?KSemzd<NPV*
z`uh2u0>T}IaWk}w&oE62acOo=5p16#bvu$J{)4X13mHrGR?#Jk8Yby!g}9&7*}uS@
zDR#!(wUGjuEu5}Cx=yc3JT8dIPC2kP>(i8*#SyW}X>WskULAY#P)vGS;Ug;z?FC9J
zH*L_cJoPYi>9Z9zTf7~A%}GiNP;>G=IfGj~c7e;M^E{kc&E1u5l?!L3SXwMsSSjaN
zck1u5RTGcS(t0>$Zuq*Yhy<(6Q9LatB7=69I(}w))2X+6MS#I^#sh`(ZX_|(+O!^3
z=UKGW;c-R6>hrsFwk}dXIPu5IZE@MQ`j?WUr;A-we37ocb4SCAM@3Bmsr>TO4@PoD
zrwLx2f3s-9rA)i{(B!Am=e|UqxZKimZQCR{?oNH?pKqj2NDF=XaEI@aanqFZZ>6;A
zPoDq(YySE@{mx%MOt0IT-uC^6eBihLw>0g-mOp4(73p%t@>r9r&izx%r2Ln5%#hux
zaBAY@)mGA<Co;HhnkIDgSd`!8KOLWRXC37?E79A1e@&wR-?Tc@jU~4~=k9(rr#(~3
z(xp3CP%Yr;iOMc!j<%<Ms&;NC_O1FNQ2JzY|Ft9PT4%Ht9a<#BIJHO4Z-JNAgA?M+
z^P9R{p35#^iux1ow_Z>rth9^eoMx7Yt6=?&yZ=A@jsO4j?9-d}e*)(g?GNU^z3r_?
z%B#ay+6!fV2QDh=F)ZSlsJwVl`H?e*pYpc<|M2nh?(gr{=jGjWe7twXtgR9<OyZdf
zm9nn?2z{;Zw=Ms?U--J+yKCp<wb$;i|Nf5s`Y(xQ4Q(#L$eX=iTbw`lT{yLO$-=5t
zXN`4cK4}oHu4fgRI#b~_i^~m>GJ%6(4q7?~ciHthbNoG__(^2Ca&>H(Y*|g%aYZew
z18lRVzV?gR{_*ayoox&^ZrErB-e7WWX!v`pRNpWD@4vI*`zwD=YX2Ai?`u3i|KW#)
zSz)o8)fm**2)QbAHpl)-Rr&0awf4MKVa|(N>#pVR`*rsA_WN-$^GXhTUY~pWNYn8R
z^%XmQ6@349{QK_q=!uLIuC#wUGJl_q^<JB3b6Iw~o@PrKv)PlUuaBQQFP`gx#G(~3
z_wW8WegEI5m;aKuE?f9HK9X3j*`n#jx{>LIpoN!ED#ORVbAEnt@q4O1Kl*z5a(C!u
zvj;Ng-nA+-Y#062UU#3LSzXAN>5ut2;fse@maft{^h!oxVX;eb#Hw><b6tIU&b?g6
za8zYNQpuSKb5}npIhGQh8um=(D`&{c72i~YXE;7Rd1<CxrAc{EsG2*sWuE34vAs`u
zB#i^_8XrIY`M~B|+;Qg_(;cE*->=J(ifCJ~?BdB_m0L|kwyP^dHMo<6G}Ldt@vb`h
z=kBuu8{c#kq@2v!e9%>A(GCg5=^XinT2pSdHcoTuO}ac;Np*etuU6MS&4wj6`f|@r
zl9P3L;Il6??Rv6lOw`r|j48Z=2}u&mm?rwitv9h|XB1fUMQcrZLD<ph0gH5G*h2%&
z0#dztjEo*9rA1C(l)BG$b+${7Y({s$0xjL;HKIq)GtCytkqx@P(lMx|ZS54stBR{9
zl{fg8FqPh&#H!ZvwEt()s_6;8Omrh+PxhVOy{O1PQEThQUr{qy8Z|lAygMCwmg%8P
zSCc_OqWnZxfr#&|-nCU?%+?HyjEV}r&-0$y$Z~3&uAR2PDRWv!G3WZJuNGxZ<2HPe
z8S^q|(#p@ey;D!H&0ml<Z-&Zz@l*P1&5wOq9r%0}v+Szpa?9HEXD{Xq760-#-Im#+
z|D($5-`@YH-f?zswUaC5o?m=7Y~B>3J^uB+26m_QPWmf*HC`^0ulNz9_4cytuDW%8
z^>r)$&s_i8d-dm^%Pu^>`s?fEzdN)3{h#^i-*XkKmGUk&-?%dUGC~pr!$ci4XQp(s
ztu*tQxt7zxhb7t1ccYAx>S|uk&pWO^(No*8;LH52Nu_Gifq@CrMLq_qvkFc-dvjN`
z>D^ry`<5rV%wjN5P;N4iJn0-^=rrTV&tp4}RZd;OkYgdP;P`w``8j1Kuar(hT`^To
zk;%_bX*_&6Wnzj}vLZXj18FA5?1rKV;>`^1YV#j03OF*cf$#fu(A|*N;`iS;mG-sb
zbK#SRXMY|&o#*`cfV2EEf#nARXSi<GnkXQ1N#uC@-G_&lzb=1ofA7v6(++X*<u`WS
z{b#^yc&y|Eqm|Qrfq7={o*eyod%E~^_1Bk|1&TeXur0Z4azr9h;>~rTP@ld}f40p`
z4+^+;u-{+WX@%<i8HXENIp@mGbMp-{)B0{RKOi+*uA(d0{*#FBx(MGRs{&>o*}d*L
z4-?0WE2ZmVcfUKbe%roS>*u~3^)-8WFECG3XmrkK+5Ps}QTcs8kJtZsdp&*b{~r(U
zniTSI-PmApyn{o?QNm)C%L9$`{0qcY@2=54`+SAa#j4Gg*4FoPw#Dvd3=BG6SXHrO
z@4p`pe?9)ay8r!q`Mg?jKfT(^KhqZ5+TXK@n|n9cFI-)ybbB>VVpgs4>a862Y;C>-
zSKN5NTtep2<HK_c1TG#h<Z@39OZ7XwZA+?{m}qC>thP)UfiEewzuubP|Cj&&ga7{8
z-(R;yZoM~Ko4dcedx}(vh0L--o$eItjkT8<7`Uw$@!a3!Qs%m4%gUyl`8PE~O@FIc
zi_VXlI%O40*H6U>UyWZDOfb3PtG1b^tI_4~sW}T)g`H0n`+VxpO;=4jaj}lA%>4D!
z`oA3sm@fZkrTg00Xw_5MWee<VR~snK+`Ru<oJ1g3;fXyRG4Dcxsu!zYT%&tT=&2$9
z%e3>(Ru@lB4E51iwd6qMF8k%PqKu|2k@t28*!(hByl{Hx&nS_GpMRJ3m~Q4-;_`Ky
z+L32`Tn&8zOB2ewzSxOteQWTlUKS$Hxru`_Q*rHsq}r5SY0etP%7POdJU?ypIg*pJ
zw6(u)9^3URd%6~Lb6%L~c+D$zMMjY7xw1azh?c)0C-+*GFTBI8^u&l)Qg}&y+O{jv
zYBi^BpICbP|NNcy*^^YPH<>ubt?QUIYo`9jg!3-tUqu3izaADaz4+&Uy`gigX{hDY
zPF;a+{cS5(ok+F&d!Ss{)U=$RVP>!+qk__dhiS{Ss`T}r*DO~%u`y(VhPky5t5Cw>
zcFAWAdzIDRMTSd1Ef-hX%0EY>J>ugb%Vt}r9Ii)>k-PryaJ|EOdh)RwoGo*|cf35i
zVEbH|#ooz395<xi;a~jd+0K_8a&snLI{$jjqZ;*FRps#}sq>F_<yyD>=$xy-Ti@aL
zYq{Kpz$*)0G+Z=Rc`eyf;bSBbB*;~q`{7%k<~`?UvfrI6{)mcKW-tpcJf7%PVRG7O
z<F{FtK5d&2=kmyA`|f`|SrYRDuU(!#_j$V29RELT)Bo+C?|S;D`-vh0i9fe5hR?~A
zoOkzZa+9tzQ~7?W>_^piohw^)A4{4vzcM;2W4dhWqkMB=*(<wu&U|v)P^n;(>ooOd
z`A?<uzJE6HzuafCa+#}1x~tv6Fe7z~Xtg*Se~;iG*J)=8#Dr95MhJT*B^Ea8Zg`Wo
z)k$Nr6Ypb38SjndynfRS&P?c7rfE1a@?(0S;<BCACnl<`4Q!W;@aup7|I6|EpZfbN
zN`D<Sm#_cx@9<%N`+ar4=huI{z24$vX6$uQIgUF?uajCWWv_qz`0wHF{r^53e_!{b
z;QyA}v45XkzhC!v^?I@R|DW!Dw_jV{7W240r1W0A|N49P{=NIY|L@!F`DINtH5M}c
z{o8lH-u=^Vea2sIZNWlA#Vc1PUCXxQO1)x!^SaOcP4_=A#4OJ>;j+n?5+;!UZr<H%
zZ&u&TO1W3$`RZRpSA@?h`LnOTxH7b<ESdW*Z`s|iTXy?hp4uTaA(3&%GZ)Q`XO?))
zF_!Hsvw0exSylb_?d!kUr~3K({rUU-Z%R&)d2}x230wK5oJ!q#*@Ig8QrF@&1VURm
zPQNV4|M~IZyN3_|y?gBc{(k-chs)p3m$OT=j=SryzI^q;&e-{Ne?C0_|LOk6f6L|X
zzqudte%<%ouV26ZyJfcgym`~+?EnAq?&TsYz2%kWv9WK3n~Sei__!R|xJ!7W<@FO2
zZ_I9<qN=L%rKIN9hmY?L|NXlujs4%x@cnxiuJ(-ZsAAx^x_00!U;E*gTMn$=X?HGX
zYSMilt2O=Hnd@FGI(a@L`u6ULopz;O+e^*(g)J;1c3R13+=!XYRe9i(*--(u+=m5=
z`!>xl7Cm@#8rzi}|I*E$o}a<+wK~+Q?}O#nhcS<xd7JFs$Y0<OzUA|~dP4RD4Xt|$
zQxsNAn7!=wguLA@=dVA>np{*K7$zj;H0{?BvAHo#U3=XYKDxM6<$mFk3chRWifY&K
zU3jp2)AamEuA)iY0h>H!+9VS5trxFY>biBdfk5-;#0yeE0jZ3WHXM!Fn7X#tH>}jm
zTwGvt+0ElCx}V&e7aCHMDv&AbePxZymK{4jt*Z){xms+d_3TMnQ&&hRJ3dL^lQI@=
z5oR{%XDH1P_tU+<xBmjy5tmt>NAAvKnQd{qS3&6U_RHS7>zJQ@p0xds$lhg=cjov!
zR%7?`J9t$8rG7#C7n_rcnd{nE*{$vWiE{SV+8h+BOmDW~U;AN!piSUe-NWC_jz#1u
ztq%Cz(`_JpXu-9IZ|hzyy7pN3-JkT2!3^<Jy0(}Vd=~Tc-d-i<*0Yb{$;5p}O>ZPz
zNT0{Mrc&T$vFvgK>!?|Q@*m<o7*}8FmAI$P&gdGVazMg|m*eBgUv<jHzjmwL|F2QW
zQ$MNke^Ft-ex8@z|F6G&J|11OdfShe{N6S^&rY<xDbWA6G+pNWrWInHY@1dldLO=a
z?)5_3-upHaeg-Ld+zv>wHg%TG5xLaEvARRUXs!Re{`SKqv&weup7}g>)_2b<Vv#d9
z|8|mO`6fDZgNI%at6*7LqUWqhD~u-J;EJA@T^qla#dK<TbL2huldN|bxJp)N8?NSX
z*<<zL(fJvgTG`SDX*+Cd{(Rj3@B07mhuiZ%7yYRC|8BkgzYirbb^pKJ4qtLI!tqVV
z$CMpW*XP*R{P}Twf6e#x^?y#c?{0tn`t|*Pzozg1|2%&GzhB?}eSG)w;lp=#W0lIp
zl2T9T#r}OXJHPJ#{JQVA@Be*t_Uj(GckiwpZhn4dX4<pJu4LcC72(=^SN^6+Yg<m7
zT0QaS(xcZ^Rex=G=K3&unun~!o!*(v$Dgb|sASE%HSG1})kYOMPwP0hw+nN!G8(_F
zD3?wCy`*!-+2WE?l_l0Q^Pl~Fz54d;yXX8)_rHH<SM%dXMa-wSUK=h~)$I8Fa=m?R
z{r4{!?-({4N2zrm>^XDhn2cyqmENO$A3S-UaDOWL^KpIszx>Sy{rvsQmp^X)eeq4_
z&xgOt1PjCB6i;`heod{kvRU*0+sAji?f*V}`_}ybpWXNWzyJTU{{Oev`PYs<>;HWG
zc>D3khWe2m`T2SI`FZ#ERBfHVzUAmc$$e{<U*5HD+UA+g90fZJEqRn?T<)t-FS>Np
z_W#H8|EIM)__=-mpR2E5&z|e|u%sw|ev!fI%DS4Gm}^VVz4Jc4HmXU=<%`a3*F}G{
zZrd%-wbAB(yx}3ok;o&}XZUSdlAE`&KR<P#%zy4)?x|;b+m3$Lw_b1maB6P2&GxfG
z{o*Q*iyR)A1?-WyCprIz$kwX<l`ju&%hZ>Rtc+7G7b&%9xG8;Mfy}L~q3c*K1-|0R
z3s^Jbul8>Cu&oU3N^V_Ib-9(@fw_x=*muf4Z*Fq@ZZtdJIrQF(54TvhnwGLYxY;DA
z{C|B~OV81On!@iZEF}22BZM|~c)#@9xm>2~jGLkZN6b`LuHF{w+a<4M9`r7$lIdqJ
zY`0#!bl*JZG~rbR%3pl@rnP_7dB0D!IQVq-+qEG-ul0I9Sj{PtVp<=>WPDn&coo}~
zpj_)$mzrE|aa)QUF+IF;@A1-$516-gZfDhKb8YzU%Gq}C{f2k)UrW9wuCMy5CBv|J
zalmb<?P)g}SzkGKu5C{<@#Q`D<-6*`t-of^WuNpkt?-7k)Q3aUwsGFL7#VRmOi234
zLp6!tm976G>s76y=e0Y&oOvyo;rW-(D-Ty{WtVbXm&knoT-12p&mUD0htl{2KWg2%
zKTqIA`HKx-T(h6}ZrfP<Q(Y%i>BJeq<*^J8JMF{2@JT!TX&3$8yr_Hsi`RelPX2wq
zcH#4P6TGi_JkDZ0@0z8~u2ei@w{_%`?k`~#b5>4QEoh9Hc0BgbyUQ`h&&MU|-Dp|$
zi!UIJ;gkDRzvwpwiup!8XS`}eJ>&T1OgFuMX7}Hz%QDN?wk&>{bMu5uLwc0|vRS5E
zUm9&q5Rr&n_BCUhTggOkA<mHLCbJ?cCwW<9=YG3uv)*sY(iiS4k0!ZKVeUv+Y9@NX
z)_TJ9r_1I4)mYVuEK99SdcOSeryr*e+wcGT&Gh{`5A{WvLTWpnUjF#u^6v8bKcCD0
z|7y3tYcH3VAOH8=_Wi&A+W&nvdwu@;Z=N^4X{t^;azf|GDx<gM*Z2SV7QX-Q)%<(=
z?)gtyxN4POM2)FY2m6T<g?H?$=Oozmu{><O{nL2Uqc#S~cQa>RDVsYr-6`*#*hQHH
zgY44=&wT>|4%%#&oUnP4ikJ1wbt&bGFZ(UmzW9WtxpB^s)N6O`V)x#=TUY)2+x&fh
zzplRf?%`wi`d{1S-^i_2eqL%>SM%Y0{f~FYzrXi8y=hGwSIx&JM%_Q{Z>M}nDzv{>
zTKZW#;m6A#KkWb2|9^XTxBXe;x8<K+U48fVcICcVcZEJ4dNga2u)Kd+dHJ80%iH<e
z`%iyNoTz=_<=evjm)<Yez8Cwp)8Kvi=f{t^s;%VaWX@amdi82^^Y!b>!dE%@yxt|_
zcRV$==p18+>E^4ScCCAM^Wo{d@>}nx?^!eL;g^4h{r}&u|GB<>`S1LDHD71P|9N%w
z>|2IeJoU<{r}_Q+Pj9Mdm~8M))x#xZYgcHtU4Wx$h@<?mg=y|ecDE&j1D`w#G44}7
zP_XgPTlv*iPhAyE7V#bv6FP8K=thwzyU~|2?$AVwFNc=i^If&@*Q7+xI|d(|<5*06
z*v+yoTG{p#>^*g)_T+`o2WHA$(ixsn^8^0#gx99}YOg$YeDBe%O{*Rj#3bgpu6pig
zty?XbJ@Z+t@E;z}yJ0K`cKnjK*AyiG{j6Kdr%MinGU~w(XX%#xdH&H>h9^#UQ7wzf
z#IvzyCUqUYdvoq#|E)VWUz{%QBOthat$%~HhHBdjY0I0DHNJCOLqZKpGb#_h4F9bu
z7|z0T)#tm{^MeznH(Y!ym6mP5u`otUTt(uDT)B_w1CPrYtu+%Gyk@a)t`M!hdN$xs
z4C`doiMOn~thySOhjq=Bz3md~8ftnX`P7EF0jn|}p59O}<r=eVwn&x^&z|FI@A4;_
z=iUzXp3pIS;k<3#%XVmbzCJqj-W9uEm3yx<?=A9=-}TJkNouZro%r!T_3UQ_(kIpD
z{AAoZks<e+bV$w1r(wCBF15jhoQo^vo^jn0c(}c3(Yu=xSFHsd{0{A{(l?1qyxOq%
zx;=Aj^s0&9G!$dK3J*Nby1n1g?SJv|7k@fs?)=~R??%~#r$KjCym|1>@aP#o<}Wo@
z$_fKGRXJCy^k$##F4~iR@O0~vWB-=ce&*kqB<9@`_x<wG7i|t4{5f+b9-7A(ue&>S
zR=IWPMU$m-cdvcoCaJK=biF#$6s<q!iqtMG2&`;dyJ6}Bp7iqyMS)NK8dJ0k=lI=e
z+;rxZ-|M8FAi=zn627E!H(g>}yq%ev5-oqs)t%KMIWN=lZv*R=Px<-#>a5=@ay4<S
zUH<3a;lKO;Jv<M}<o(CLzP-J^{=5JF-^=g+`}TJB?Yn0(59N6kPx6eIsawXnM)Pgi
z?$@hVzn(HhR`cxczhB>dIieN9As+JRzW0>2$->f=Z1E9W;*xA*@BH6$rjA#ybobJP
zj#3lG<Q?vebEe-|Bh-2N&?J`@$8*OP*~FgQbN#OGtaU2BE;(yiW?!nA`fdICxcxh;
z{=PkXJATi<U*YTb?+V&i^zY-Z!*Ac+J^avPUjOms{POebs{X%weE06hicfDsf+uu5
zaLeO)KK=N<1>9Uk_bfJ?(A7VF_^|o?nE%hh@7I03diM7EZ`;=|o6_?_C25t)Dwi4y
zo%+A8{d4AY%rJfXO(Fi{yT5<u@3Y*u^X~q=wWXzJ%`acg`EYrcjZFV4A^8U;OaK1M
zns>>@msRKOM7A7;GdZTGr(LhHTDN?6`|tk#@_zsR<)t6rw737>|3~um!y|0}zlpEU
zk6)R!z02_09y=SGHI<U9?ES1VXPsH2w0YZd_01idHf*o1SiJq?**kJOO8TtXGxVQX
zU3E);;??zbQGLk%W0SYG9yFi+_QF=TZQAvR3QM<Lm0PwwbgfTv=I*a^3<W*(+Z97~
zR(_uvFx`Dy(r$OPP{;S;q8#Vdykq8^I>QqhICUQTpEE)+O;69qq=?+*IQo6fA(d1g
zyQ~iavV2Qso6Krr^JT2*ykM@e`qnHpzDm)~BeyQNc{Xk_KAfTa>E(J~?ghD%A3WGH
zYv=6QEepKa?w4N9s<iyoIpyfpyBj>CtJwbWnQ|L<EOG3O(hhHP;?cVv(UGxBfkkz(
zZk*4ToM5pH!o@v%SKZq=ZS&TB`pQAie;A9ceJIxbEFk3Z+Gkm*nQ7lw<$o@l`T5wN
z*ph&^XR};YX4TGKxir??n=A3lrV!1#+W*@E?WfD9f7d#~s5Eo>sh>iwm2VF3bue^(
z7UF(>Zvt;l;KC;h#jR|lE=cMcK8#W7XZ&!bSgQSd`Gkj$C(FAYvtM%L{pq#d4}yEt
zl;ors_pLC0?`UCLxGrc*^aO!JPLD0JW0mqePI)c0+51AO`<{Y}PqgZjDUEx*f&zU#
zZ|m&e{pYnwQ)kDdJr7Lk|6Pxr{>f%0=VL>@U$ZAn`SYm!qw!SJKcDL-@Bhiqes1xa
zip49tS4*T;#4GviV&As4>(LyI*!a6~duunR$A5a6dA9Q8%~faCX0&j`R+z-auhm@7
ze5>MhyZh3hkbs-h`xX{)Of~%dN_*$;Oxq_W%clg*QoGgNu~z-`>sJl4cLpw;%&6LT
z{ot8Ak;`*WJTLt0b!lqA7l9kSPBG^f8_ZEXV6B)?q`l<%x+<PayPJhsO%2kSCtRQU
zV1vq`f}-gA>*fEw2@l^t>#JGfG|{%wo&P`l^RNH)S^nRb>%W&@p1uA4pY{9yKfnL)
zSNQhr*FWoMI!gG=xLjiD@7`!Qv$>h+Ws2Mf&bi;NZ};?TT0Orm<XNXC6T8w{&lNHJ
z^XAGZI5&nC_L@BM5C1AY|2doHDy_R!Po}&PKAv$lVHt1veC-2QEIU;$M!KA4k4&Gr
zzGS=q<*Ca=wnu4XU$%Ll_kI2T%D-pz<NiNd{r9$c`1btudHI)g_Pr}>zrH?xf9>B_
z?=q(K+yD7;m)~Bl{>Q)b^7rn`&KE1<VEL#gd@yx~d)w95r@cNtx9YE8jk|yE-@EPm
z|9)M+ukvqN{pT$6^J_RYKi1j5UtgxRX^!9fx~l2%{-58+-?X2tV)o<m%aqEfzlZ(h
z|Myz#-}kqye(U7BXPcfrzI^xKwX1LceMs@q&QaYx_4St7lU8+QaICkQZ)0_8Pn4bR
z^54gQ|33cv@XH<REccjhTlfC{{@E^5UjF)JRrh4cDy`*DzkK*Gq1f%a%%&A;bEkcM
z@=hsszg+kGDJn~i=D97E|1Mo&o&0R=Vi7*^=Vr>R%pG5N^iN8^o$h_Wa%Jwf*O#A#
zPG){kd1$qXe`arGeqxR9<l9?+n_6XYWHnkR9kftMcy#k(ui%H0Z%>Y7ElTrW#B%rA
z>{YH``1I~s*h$~schSH+(N_7vl&E*vb2o?VdLhZu?3VI1V}|xx0jXWBt~buko<ECW
zJ<FF#)_1DsF-5<~XP7r#M>@)x=g*#NMmj?A+E(pO2j5PwOc5&oY|AU4VWG5r$Fk1D
zH;d1$>Qs2Q?$-Ww*FRP+(O%J2+3nJm?Z#&5?!M%>L_;S-_}yLk+BwlIEaEFKxfpe>
z^x9)6=c`lr;hnbM%9@Rz=2g}PwLLhs;?mriGv?f6K7TCF)!*xxjsm;p4A#9rz9wcG
zR#;T&eA#wK-fi9s%iZZrcQ-%OGhe!vU-9%>->ftLr`I$btbM^fU+wgpWm0=m?q+WO
zv&l-Qu)cP8ko#-fBL^-Tuj^vi<9z$rvY$Q)7E5#*^^Ws4xNp3Bf8D3Njf=PLI)1Fm
z=Px@$?aT_6nuQPE@aR1=<|-9ES5jPjQL;=?B0PAT0QbsGZ3p5^i&{-oeLXKPnPswa
zn$rF-DP8wDoG$tw4R^b}>+wBxo`<*g(1*`4LH`>b{Yw(RxOByT#ri8tjB=C=+ivSV
zk$k7<_)a5xf;)?$@`~ooXV&lkyS)Bi{XhBtnfd<j|9@(KU;pRf-Mhz+*F?Qmn$Z5{
zs6uIhsc*N}@i!utZfCi4PM2m`x+@-BqV-`#fqtS=(KjEKk3D;ItRiPkUUBBSXL@}9
zCOz+B>1mey6QvK%?Y!Hi(i|PP>v%)FOUER)ik;%YtF|!aaW3llxMjMUhQe_jrndC%
z+MJwkuBw8wQ!R2>54%pbDo(RrYyWJ1%EaB_{Oi|$3-{Dq|NXsv&G)~%+nwvaKAT<t
z>+k#e-?QWQ@4DA{Ord*bjBEb-_3?M_*xY?8Tof9)QL^Y8&%Eb<&wf=&F<E~7{@<rx
zziu_Oo%^Q1DBdwlc8kLKX9Z#fkJjzhR-CtE!SBVqA3t2WvhdhT+vzNQr{DXz$d!40
z-E=`EbCOB7%A_fmOkVQL)ld~=oH%)NK+wg?TW{Cr-Mh2r@2hXOuV0UkKfgZhuJPMx
zKKq^>J+SKc>$_K{%JG%||8~5-alJqP_p(DvKCg**Dbkjcq`2Ymg^I(^pR}BppU+q=
z<fk<2lF3=?N11267IjW}+V%6R$=$BsnLjNy*Zh9=b$$K8%YF(!Q`#R_S?EqP-hIEm
z;!k(rzHjCWQfj71T6wd{eXgq5Q616u`|;(~hriwq-+ujb-t<dnjQlTOHF4j%e$~cb
z4b}JF#=l=b-8??^blSTQb8OR#jYZ?8Kh=2i&N98+=kk}w{Py?kZFWc}+;e-cr1R)q
z&BE{63y&mb>F(n_wqZqnz+OAaHNvM3=vDIHU$xBP+agwX!_#8ttZq%)!~e+3ByBE3
zMe|Vx-){#@E2na0&1IkH`aV4BzLWB&HHM8d+_zsbJGC*`q5Z@>BPUi?k8Pbh_lC6u
zy|{ljggdY4ZDG5d(Kfdq7Q3UfKFX|^>9kMZGd+k!D0;8t;?<cKj^AEkQv5};Z2dJa
zM-}FY8#vV)_!71Er#ogDweeiYjW?N+wD3s>+p^i;jz`M0DWsTW2(?eWm96~bood$E
zhtJksy{u^4WwxqJP%M@8%HwC8D%|G;G%}1|N8Q@!sM5&JBszU!@d1fNLhBuK+=5m~
zq@|^HxftXL+LZp0w`P3TbR(l;XXll<5ehB|{2f7^|AoXkW;!#ZT?(4?Ic$bl(5#O)
zpZhBAzsr%BlFFkoXT=$_^%L2O+Gi}Wm$I31dVbg<gNQX3rEKq<Z=G>gTJMkLsvSO!
zGhI&IlYP+}`TX8CVUEZCc2)dU|Bbg;-BI_qsS4LOD1UvpLEQ98&9M`=R5r~Mo?&j>
zJaMV<nmrqC3eM4N(R0~sbC@BqB2!;7JG!dK;jf0x-NrSmn7fpAPdB)rb^W(`S^Lss
z;l5wxIgUG9zi{%LQE)MSccJd%nfp4T*9NAm{Fw6Q-_!Jo?hjs`m){;g<6Gf=@qc1-
z&x+s9)&HlE%X$5@*}G|S=I8rz(%!`xEstI;vAlHB&$H(H|DOMU{QuAYzrN1#xBv4n
z{{M&d|DW#vzkUCoABTUpw@*HN;-X65)wVd9%dJVeTfW|(bXd96DrL#L57WNh{T9L@
z9Q3>F#>t0M9=6nMnzP9-^VZo}lVZA6w(Sv6Ja;5bwu^z!yy?30?$@$MjU_EN3l+ZB
zyzt^g$A?)(n>ePI<mGL5HoaZ3qT%ji%l=rWKqJnU%Pv0JpB^8Mx0iXpZk^K1jOCm+
z!=H!e@Be%Eeckuj_y1JYZ!SLdPC+}mN#*kF@cj7r`~OwG2l)hREqRxtcO-NB{>s1K
z%v9H;#MgiQ`hNP;uYV86*H!GlQ*R@Cjiao0_6?=8ab44Q`Z750^GNSldF_Jn<!cSb
zhgLJpv0rYpg<VN$am41!7r66-Jk&1EeIc5CsOD&9lGp}|$?n{$MwzpMy2Rg{GPo<h
z`T1}5v*w>aR_#hMkCx;2m#^QoyFN;MaZSWLzV~vio(Igsw{L!zxwpC|s`-al-2)@p
zoShy&*1Tp|JX_Nvm#<KKQDh4L{A<dU(?Wl!<auwKyz}P6iO;@#U48xc?Ah9$K1_>E
ze`{99UH|i?V9$R0x*fZB*IQXVdALL7)8kX>&AZYq_`a2Fuc`UJ;=TO${JgyUvfqDA
z;+Iwmr6z^Wy)@;L$gN-|-{4*M?k`zV%fq><N>0mXajB&YfBW*o^8bG<-_0*y_1DG6
zWtPy!8IdPtaz5le6gr}Ler;Z~s_$C~_LT|Us?#b?wWuy(lB<$ZWV1f>RM^gr<3{z8
z0@KB&#pxdc1148iHr&20yhL;hucu>lx`PPIy?XwJ$JJWPBmZ7CijkZ>vsU-dsZCs0
zlyV$jOn-T-=ERls)wcsvkDi%&(D>ArN#;AvY{G1$8;(v;diXH?_=mY$Bp$t5=HMLD
zr|TW!cF4u)ohn=R>nAFk4hq`LdpBL@R+iHN8P@3N2MQb3O>34|yyJQ^W6V_3-hvD3
zHYR=in6X-0Sz(gp1HFeER5vdYOVBqu8g_LBhl5@~sk_F`os;skiZ_J?y_g)_f70(x
z-l2{uy_0T79-h(JadW40a<+vJ!@1WnBC}7r*A&~GYFYB?d-nyIhmpoJXKa~OIny&K
zZ|zfQoe521hvnSY1>6?+_#u<=oX+%K+4``e1+VHWl(Sc=v>fy{oMt6`?Sd<JwuJDb
zMy_RL4vUtWrnHE!nma?{&>o$cRuTaj4c>D-cSZj{!_3AZ-Fd6Es(X91@tnl+Q>)9^
zdVU;n{bV!qam{ZFpAXxkCw}X8w`OcE`1bqJWYyPf-iLY5oqEpp($D+hy!3!)-;V}+
z3vrfcd74gLWvBJqGkDSszSSEF9*gq2m8$Gp;8gW)=fTYr|EeCiU+k<~e<`K=`M=Kn
z$*do~|E-Qs+5WU!jFm@wZudRMrfM_u;%=th_Ydl>_B&9#t7>=s=hL&-*L|J6{nM-I
z>+8SIj<5Ut_4fa7_Wz#l|NoHRzUIe=4LYlLm#X+iAGy-H*E;vcuii(OuB=eKGrP(u
zLC5d3O1okzPuQ_#35w^Zg>Qcox^A+G;6zsCBb(aq8Fet76=UD@u=rBvoYr?LhBc3*
z9NslvO@3ROduPhDxyn~dFIY3~{b;Jq?NY1}qjC1g8-wWSIo+p(UoX+TG5`19w<ht8
zU#5f~I^`1Fb#MCQbHDF?oid3_T}(i`vmhyGLdLA+cdyoL6*u&K`S$j9&oAFzJ-&SR
z<?=uCHiZ|;Pi%g8<j|@$2QS^OJ9pgwrn*$(KNs;+f8!X8H>>fwO_+XqLD5^)Yj-Dj
zE}Oed+tb?5+GldN#Nn9>7kRp*rp<~dTQDUyQ}ug|&As|1$&0pl?~XLl_qMZLv%LNI
z@%G>CzmwyPc%Gdqnp)<mUZmDhwCTZ?iHGN`-nHb;@dMuFmoChj*1h{&-Idrolb5}o
zeLH{M`gJzXQzJhmJ*(KY|Np)BcVj~L|9kcN^=$Js>z&>$&C#5x@iTPaJpSE39~RiG
zoyNa>a&TGHW?AvaZw%Q@?AN_J-R}Ok;P0!eU(dd+skM!Nd&!3{F>vm(Om(e1y*2Ce
z^Ol>(FE!Ej-nd0<D)$-Q1Lw|u-)&w|TL1sUbNl~4OQO!~IUD0R<<Q3mc2AhiYPd2Z
zmUaDGdatC!QRe1~Dw{idV=4<AI-~m+D3@t1IBr$dYgNhMbm_rr>A#C+=jMH7F!R^A
zevMUsZKCvww1yqV4}#WDj>;?g-gZgNdijd%q{bVYw{6?>FL9aqvB;}(36JMjZ|nZL
zDB<Z1HlDjjXULje(@B-oKKrlZt;zaHH?no31vDiJUS+&(Hn6VR*c|A5af;BxIQ{nv
zO#S>{)Stgp?{#0$aNWDwn{snzIlU6$7xS7bVy-iL`tpf)9i7tDl+sdOaWeI6yEQS_
zkMTpXXmR=HcdM2v#=P-4b-G}?kitS8Utg0+Lf$Xqo!eI>=!Uqv>g{WswzzQdtG-n;
zlQ{Uwl3GfyC+YgQzH&a|!lwRM;6l~@?BDFUIlC_u@$dTa&FAWcMd99JK@Pm9mrFB8
zFW4v^UGuIu%rb7$jD7)){x?-xb9uz8^1oa?xX8IRXOU)|*O8=!+RZgSPy4oB|2A7v
zZELBf!<o>V_r;Sc(hfhk*vyx+$gs`vyu-cpnkqYMzsQ+|nG^oUXqqU!ZLEuJZts3M
z+2Y*44yM_T`rMlrfA(3*p2_dGXi}o5ZRp}Phaw_FPVLz@?Z={3QvcT)?n%G*jL%rG
za@U6_!}+$Jjy`qoUfXlW)xP6@y>F%e*)#v2f4NxqN%B>7V9)7Yy3RYL0v$edEHP2$
z_jq=*fy+ej{iPDq_*nn&-z96S7v8@U^EQ9|{-598zMVb$|Lgt#x7*kKzx=nbvGu;|
z;rQNHC#{sXe*3*aWm))v=^pk6+|Mv)m3==ZcX;C`w`Hkkf6j2Ve9N@O=(UT>zIoHW
zPRZS<wEl_Nf{6w1HVGNGdi6xBO&4}-V8|>{Oq`S5__O2iE)BK^j(>Ve(%j9KFPXfe
zKrQLn5|uf;YID{tn|XJ-sGrZNB?e6gZul?1+}>Zlrl#1~w^Q9$=cMJt4FyV7XDmM~
z6`HRpI(M1L`N@7-T9RqYFYkJG`}OU|mksq_YwGK<=&nh3%!m<EbqvyGG)}iz_v>bc
zNT1=dwLN_=T%ShDDjXFF36+j*cbYGnq>~t^Y9^Ncv{L`FU(m%hyFY0~Ouk^V^zM`y
z>g91ft5eODo@EBL6kiH!JXxglxyI`0<%8ER@0ure{1n&R$BvPk@6VU3UD*CxJ-(RZ
z>ded}MNwwIyf*Kd9nrCjue9WyQ+!pPKRL7N`u&=(x3{m4Q=Bg+|9+m_`*-i&?f?Jq
zcze6M=jGSt@xGUJtk$+=&T?F{YWM29Hs=rR@qYd8?Dl`B#H{76AHMte@SJ<x`}fyZ
zRawb~TQjYGdOWo@Cd^uI_wM$)Q{(Qw$-8%F-^}Hb44ao_3VEKXvRZfl*7`E@-<;J+
zitBq#ic88|j@WQo8=t>ivv+^rrNes;235>bQ!3v)#iYV4kkfGDlwb4MK0G?+Tm3PH
z=~w3M4X@&?_Gz4(<+ZOO?I2I1!0~@Pj~kYsou6E+!>ST|uvVhLK4j7jn;HCzCoEJl
z^0>8Tq2br8jdp)(Ud+i`XEE*C|AOW(#TP?OoDXT-y{ognO`2;h%iTqDX8aOr)PHwx
z=Wg!ow~D?|&)?~6E#N)dW$q|+?8S?<Tudesb7n47Vw*0f**dF~wWFCw?&1Bf&MH%*
zqvB*%?wz@PraHf3Rm{9iKc01;J$$-dZ`NLk11&d~bZ+s!5IUuj_1aUxTOB3~UWM#%
z=xqsR+_}R*iz8lU&BL-Ad*UTTueJVt$+vQC&}`|@guw6o$6ACJf6#xomMbBB{X>V)
zJ2q{1r#+t})4@7b`bbjaF`jQ%xi<K9%;BEbBl+ac;;b3lG=($fyCi5YlRpw{%RI4T
z+x@F)2K`Qaho%QJzSi$6ySDnjxs!^Z;p;11Y>Dr3elN<F7V<iEX|4U)IM2nr`NmDl
z-QGJguAFwketL$+u?0PfJGS>v+CI4=q2yT!>)++ef80JdFHY~{{OLY_xr?{&4!qkc
zWaoW+#cr9zZDR7<Zti-ZWVFDP;ouIfdcK}HQBT!<X9acb7Y!6ryD9QCH}q>#-CNb`
ztaWW6&pvBKU--{+^e@A*AJI|`x$l*nC6DJaiX5#?i53-bR`(Cpk6`l8+;{8#osOV3
z+0R~i+t<hc{T04{Uwze&g6|I>?y#_znLfuhf-&%h^l8<bhb?5EZC>oULtHZb5P!_4
z*8+D~-^gwLb9B?D7~k;l%rIZ!&kvt0-_vqEB4S0``t5;%KMze@a`ueRE4PI*T+3W0
zKC+I`SaQy7-@G+Po_<JUaB4oDF-yHzTr{oJU93q(be^k*^qdH%LuMPb7>^(Rc$huT
z@AB2FC0lPZ`mC98=}1Y$n;knuQ}cIRGCaHM{&}qu3%j|W4u5?4@k7P`A0K|){eJ)d
z!}Il3_1_CipL@Dil$-9o`~H5+`}q5B=3jf-Z~ynlmNjAq6}Qw@lp8wMwl9&AoN2Bq
zJ6Ex|eg51I*Y3<qJRb@!xbv=^oRJjfIr-f0%vqtr#=AH9Cj}p}`S?%k?fSCacc;#_
zv(Y>Kcb2%>tkk57v25F8ocm^1?)RQN>Fw39SaHj_*PcGHocy#V>c&c!bm9H`?(MIt
ztuC%EuCARc{`+)!{omK$-+!8=@8^4b@<#5BJN;fH{o=RPo#(s0EdSn~Us0O-ckcUX
z@$%uj!!IA+efaR-uV<o(^)~k`KOMdt>$UmRp4)Hp^W*Q|dzH66e5Py1)#aJ>rKQ!y
zpPyZQy?y=teYN%fAO5}j`*-_%xp{JT=ZDNU>hou}-((lC%Rl^k`)y5M1A*=(DvQoc
zo-H@+u*rGP=d<tTF1pgdzW3*(Ntp+aSgubC+i>mS;uOzJt&@?n15Qbujr?KxB<`=4
z^w#pZ->voX9A4#~zI|u!yfEP&8GdfdM{k(4{-0j^L_6^5SJuM2jzUM@R7AR*h-*0T
z+qElQTK%40C-?cK$)}G8?daYmVVi#KbxJ8iH#>K|LtjY$;`y;YF?A=B*jvhH{w_Y6
zcXnzncl#>GS4LfO=H`C>v!xqXz593R;hk!!mA9*uJ?5O9yWyEnkgNSyPKSq^4?8Yt
zzI~&8%iKqmE6+NtJbP=`(wFNZB(5=s)*RU_T4;6kPS>Kt<r6H=#JKMMwxMtGslan7
z1%f`E%nmP0-^6w-du}*ES9zP1)q{6aOr-YozO${nq_v{!$My`%L(^|`UH@Sf<TmZ@
zT4_dEu_h_^BMMwICft=&dL-#{=9J4hRn8eo2U2CYy)U?IU-VvSecR;|dsd}gJ$q-{
zx9G>0tG8*a{xoNP^z|(tdYhhK`|%;@OF~m%xdB(*q25O>A!3t68y`9r3uxZH`tahd
zS@k;}uQFJ&BjG^J$IS^@@2)>;ID0hUa{qx2?%+S?ljP#3Jig4fwt2dMZDL8C;_}oF
zNqHiX&0DLupFWu;$-}nUyML9&tSw2aMME{yZmryrnOrB|7cHhXG51aJ-ERiVPhbAZ
zt#-@mFhi|h?*C1p1)o_zm}k8EpZ>eKeQTt=6My!6jfqihD?@evMBHzjVX<+EiU01o
zT5GenwLks%<;RQDFCXtN=l|Z%v+ri?%tXJRJ8n&~oqb$G-thJ^MbH#pU+4}Fj*aj2
zMV@LsT~W@+pYY~ad)VYPQx&~dlwA`RoPVICwAj@5a^86sMV)Q4W?z3OaFDC<u<jR|
zh3aXEis`4{y>sDfTqZQVQ^`6lElt!nr7duo*!(_^Y-P7uQ#|g<2srrErbeqjtTf+v
zYS$*`OHWi5O@7rE+1bil_G$9$^~-BE9e$h{<abK`zJ1CGllSG-<+CGNr?@z;ng?29
zv!lA+M&{G5njatj`QQKh^f$k~t**LwL+Y&M`SpLh{~upp|MT|x`*+U#T7SRx`{TpM
z5AQmbd%g8=iJY^Q*8APBHW|Oyc1{#<+Z^&`Er;ODNKL8cD>6qPyUh7iGc_;nefjRr
z37pwY{%?M_{GIA!eD?aD-N$U&4+|fETx6vbUDolE*SaHirlXj*L-`9E&C@JPN_|i7
z(o>K2-=|a6yMA5V+tj-6cm0naepvt8|Nl$#{XfsnHea9aTmR>Cyj|U%<ORXpdVV6i
zSoZ!fTJg-jOT(k+*rr21jh+!@m#^BLebxQs_Y?k3*O#}9b!(zN>sYDIoc{DwEid<1
zo7jz7p)Mb6Ue4C^4Zr+$_U!fi>)*?--}l?D{`+Be`?XK9=k9sY^|<{v|1{=T^8M?D
z&bIt~_s`Gt?!7(xc2>IT<-{#2S=5@>KIcH=<A*ao-x4{(k~lGQp7sw{mnA7&hn-cG
zRvpR7%ii|k$)a87SBK4-&sn*0;o9~&vPmyzo_agE@7PsyhZEN4TIZ!+{xD;S>}T8l
zCp(JbJH^#(JLd=*@0z-#ShH%z(-|*X=bh^fDD_rx%J0ce7KvIs<;v$R3aoDR2X3}(
z)Hzqdv3$SC;}g!B$BrD9KkQX2EPAs)=zwEb0axvQM`!)O^f?dQLes?(jq|1YtJYhI
z-dy7D;~3jC^`+qEvX_4YW?%n4Ps%+pxHmA{X@=_q-&=v10&$Zflh?%WW?I+Cnb`d5
z!m<jnQ`+lT+O?K1P{~#~GD&=OTRTh5jOfM3=58`DYzn<8vi=>PuT$+T{nF#lbH7Kj
zGslHV-Yfm#BW1aDX@%BgCC13RI}PU4m?i}#o^xrPrF~FPlvgAoR!Q_vstr%v{OJ+f
zD?;)_8dD{neRQ6~Je|{G)!OFxptU)?_kTp1{#zjY@ROX_vyd&?xhe&ElQLaxHZvTH
zsrVHmaN)+Y*AIQp*uT-au<@gf$ad9Y9_DS&ba=BgK3l(ik**xEuSVr>&Lx-9Wd;F8
zyL(utuTEp*kM;OE<J5~;FXHCS3yn6?-0@sw;g2iH9_tr*nzVki^Gv+I+M}cR$xV)>
zb0)tucKVinE%HP6=XYuG2OrGa$H8n_J+;29=-T<9$NBp0_5JG)rf;dcXRux-<*Qk#
z_Hvb|DVeeJgmtyFpC11B?(pyS#drT#{Qr3PZ#)0@_x5}Ke|Y!!uB}np%#?Z0S{~2x
z&*prvGSBptztTmHY4RaII-cAVQwSAZd3&c8|E`S{KY|RsHgPN{(DA<+7SF%!w*Te4
zOD5;TH+{;wd8f;I+qrK_3?|cNtDIV5%CWhtMmxXVK`zB7Oh8y$)lXqU&pnaO8*<Ni
z9%cHS;q-eg<f9~`x&HH4)73IFrcdn%(coNuL}!uJQj^slN~({q=Gs1uI6hBo{`d9!
zew9V}Z#d(Zd26Mj{!bV8J=Ndewafpjs;IJ(<0~&Om$$oj@TXV0`Q|s5{pvqn-M;_-
z+x+_PUsp$dI{Y!w{N*uun;X5!QTKm6c(~@}!lQSZcrsOTMH_=>O;S_cFuQeOw<xoL
zit+C~m;Y{kqZU-AIIDDDVBOxyv8R?SFX??5mg256b535&w8te?HhS%<>)kIgb9>Kf
z%AcvJ-TK;Ta>u4k9a)PsRcBAUK6lCX_4n>Anfp#H<@ewJ-}e6lHN*DZl9!wRM&Dpl
z$F)V1gb$tn^HPva>#^UN>^Es8%M~($!Wbu7*F^fYS6kPH?4SKJFKxC3r*2F3GTVoz
z&ODfTQu%fNw|AFiwrX@np5wkYc{gXu^6N#Pr`CTh_)~aP_><Ht12tini*nz}l*NR0
z?ADOI-e3RgU;Uf)c_Ej#Z{IKSeD+<_s@d0EUeDQVYiq0f+NfosQm2pR%*uk2U%%xU
zBm_khmt8H|c4zJC^V3f5ZOKi%8S53#W*L`dYWch@;PkBZ0{bp*fBT~H(-{U6r`js%
z#>TdwKE)q9j_p%9{(M`pgz}AFZyS~s+TUmF->+WIAGmg+_Y`H``!_1rsg)<1bRF5_
zefuJ_^XX)(Q&THueC~a`iC4Ax)r8qeskfJy6qN8hDC>~Ey~~KbVMnCAbZL@H;sNG&
z2_KhfX}L8D&AU6h=iQ`tbu#OY%#7M~A&mWf>BpGe^Q^v1e6*EA!^uG7?@a5>w&6i*
z9jA!+@v$G7VNvFq9-33TW2f@A02Ol|zUfXswywLI;uINnWWi3YE31FWUHQS|%5?3M
za;3sL4&%;>kN=y_Wb)SWIbHVh5sL6pKIK|twkm#GD(|a4jTFb8EHQa)7P)O=hZaZf
z=1A5GnqqZ*QihJXz2Ry*|Mkn_gpO?MJI%qX<of^p#I!SikFSi^-c;-rk~VX1yX{f#
zr26EXeA}ZMna=lxjug!^*fw9_sd3^Nwk554F}X2aj_TiQuI-boWT;>7xtaI=qIa@p
z!t<xM=JZ`I=0CRi|6<?vonNjlQe7K1d#M@6b*bXozWCs!Hd}m;Xr#?OGP_BB_AJr!
zKjXI^k?+23*YG%fui$*%H}z%wzc%kOyk}PbIIr=)?d^`Wr#NhXuDPS|%wR&~oSvg1
zrd@n9!xtsn?fd^y|NrCo%Xg>B^S$4{fB)|4J^StLY-Rh~Q!Oh)%9MmTmx{PLY`gke
zG%xJurCP63A<TV;9OD`C&cy7Ej7_?x7INw8x}d5W#rC884nn%8H&xil`Sl;Kv_JcM
z?h3!~-A`CPf8C)o&AZbe?t##p&4*g~=SRHso8ow{&t<~ZcVQKeeYC`d&N%9|d8XV`
zH8+aB`M^&-o-Zf=yScFDpOamNr)K(ev`%u{&oE7iPb;$ba>?Ad_`CNkdX#&3PTibV
z{{G(5x%chwJXfu5OZPFV|N85-`T9)Tr@xOs?w5ZzWqIwbyYJTJtuITt`B~G!>Clt6
z0aEL(h$-D=XZ>omUa&+pC&H=tjHwN?isxk;&hFWhk8X(XjBRQ9;o0-kYkmCL-?g(^
zPWbf3y<NBO?)!W1T)*z<_}sfR`e4rL)cD$2m(P|~&AR-s?&QxYUmKIX!qjtbhBFCf
zd#yetvWbzSQ_a)q5c{cu3tP3mJpSb&U++^6T7U0<|NqmEUtBshcQ>y-oRz||@YxCe
zWv2vM1dO|l4U=yzeLY*-H)r14l8TD|f7|8%7ghaHeE7Ju>W2D!*G}<AZ?2wvFzxzL
zet*rC7GZPK)EQ?^mi_!tQ=ac<$(jBMp+5>@{EbwT%-mJG75dIEnY{Mn&n`!a8Pj`?
z_xIobf4ltm-97&MXU4_vm2=im{;B(~EcVmm%fWHR58{fOA3cfFZRV=q$MHz{W0*?_
zgS~G19P#ummnNL=s>zeMShy|ZVP~MH_YAj<je1w5u5+f`EB%>Vc%Q*a#n(qjLG0VZ
z%MyPMg{^O&Rv&zmzpDA*ci}6ODmJ*A=UlXVG}EXzdZNnWJM*?{*UNGL_Wf(vkjj4U
z?6nCy52l`Yz?ivh!m+FFrj^%@Ee-$i;{R<G^FM3$y?^;3_HJPM{@Z@b7Rh*KD>aIr
zE>V9G_Whf}&O2fE*X}sVaI(g1=NdnaEhef9cA3037Tz@N6Z`kvV-eAv0tbbHY!>rv
z{^53I<;Bz+ojbo)2PIo)dlcL+YTuEvVCy!mi401*hdw-Wd;ILLg?HSeXN6A~E^HDP
zGwv|n$n@y0xs0=3a=Loj!%d9J*Om#aPSq=K(&;V`{ARA`KlRZS(L)KFSEc@Yu`s{i
z>Kymx=RN(s3G=O7t*^ziCV6gjX_a=l^Z0M#L@iBek#}d9KTNs7%B5Rb`PI5i{%y$Z
zU+D=4`!-f+a2&YD<Z`G`Sj5o2JN%KaOOA0VU*B)<uQym1{xQydEM6SCA~ALM(Qn?>
zE$19wwsEgptG4}e%AAOn6Kkh^zr?%M_I%f4`<N%2bGO9x$Ffgk{CKfC^W{gU=|Ao~
zJt+Q7_Q#|ZepPwCe|M*So&U$~r+ozP6${gXkcz&kUEgwc9*B_iKi*&eZ~OmE5AM#+
z{`#k)+-mN$$0v)o%{grr*WxnAqEnz-?G%%S@MKHB*HV9cmEK11Iw?GVq^Yend&XJ!
zqV%grgg38n^SxR8(q_u^IJS3;CT8t|jk<@!PIs?8Ub<_S*JqQnm$%Fg3R?8|)aumt
zFKb=BRMxogg)Pis2wbAUmhP*1Wlz*^k^I_B6V*juXQim^sEoUAa&&vH58HE}FDb{D
zzrOytW@^XBFDKre@e`Z6O)$D<(HTK&JyBJ^i6>X<aG11o2z$JE`0j+T;MwcH*MGNO
z9xkZ)-i2lH<;=eo|92d?_vZeYP0h=Xw|{+ExAEu=r@sMGQ^Ss3pUn`f6=_+z@lnSz
zHIa&=JHne+xvHKO-Yg=b+!#7zj>IGn^%a-$w(s7z_tsfW&W(X~H;W>8&0qe?ntthD
zRKd}ouNq`G3EUCyH;)Pm5>>lVRb^6iR{M2Gci)eHmijWwy$d3PgT&SxIDYvP$Feuu
zo|U&(|MW}TCc{_${{6L+%dcApFS=^eEOY1UMh8)4(Q|$+x{K@@c??ue{!7#Ed?$Bb
z_jv#5zrPQ++soPgd-&l;_`$nZ?cT2sx>TJWZ~Oey?pX?v+b&P+_%x~i^VQyEmdf(i
zAK$%voZtVwz05uNznMoTW~RnXfBNy|7CT#ApLI^bNi!D-n_hLOKW-B2;I?Jmw;%uh
z?Y93@YX9=f`Tc+5zlC4-6zb=25mK_7d+m7p@j1nMUA+q?Ezi6q+ovt;Vztpm%P}$V
zV(M9WyL-t+GfU6xTw=&r%^gy2HgP4J&ooE500-@unX4WwE98ISW_oJsn;xOE;BuxF
zLEqNSUR%aE`{4)G)Pjc_1I{1G>1E-sEYtn9sWW1g#-i2$v2Mq}EnokJ9^~siGw)u=
zl`DN}&i2jq{LMi<aSU>MCTd9h%sms~yh`BCU8Pm}`=mIgpGjjDmodGyEI-s)JhZ!_
zT)p+s&Moub<gVKh$-ZYxnqGpmh<bDIN!u0D&)H7q=W?&`myw#b@0;#Z{Sc-jVc~)+
zq{<l_gxZ7z6|0qbr~G(Qv2D)YAI?b!y&qmk72CnP==F^A)zZ_pJlZwO^H{=<JuiDK
zI4-0H1Q|R&_2lBcu&aTl>cQ*u_!&33_URh4`_2?@o-=j!mwnRp-Z>TPAGCYjIn}<x
z$HHND?#<5}3pTs2zOmusqbK_W-H&f>_?^Ch?V*_8X6eRWlONI(>y*ySwwd}VvLnWD
zeM7fGsP$%+1dj7g*UfURl6$r|2=ib2|5VUhV|w?6g(639Y}p@i`C;%T!I-{_RgD|Y
zAAY`BiLudt+QR42VL5j$xy+re+Va*v_(zj#ov_p#p{VG&Eth<@>)d+wJ@@i;sY55W
z|LSS~7QBw7icRUak^MXSpt{dbzi}lg{c#Wbu_8^OcyofoqPz>)&f%xyg`?j5c=&Po
zpT95j)}Ptt5^b<-&jH3$8a3i7j+$FS=0vvoXSZYu39imEpI@k)Cu+#h;bPimkjGb^
z`6^xi{QgSagKKh&WZ!LQV+fOI-f&2FzDeZ0n^(W)&Aa?BZGKRcpZe*UyM#Avnfvg%
zl2U|Y!sM1Zb|(eZX(6>;cRd97Ydr!zO?pBrx0Xg;S~AOH$_<$fkE`|^|Ml$a?74m}
zdEdX!Gj^WQslM^4OM#fGSdpT5#iS{#>`nJ<)lyQ~xQoFk(@T@pUH!xy5ef5&E^E4H
zZ}#_EE<BGfB79y&P{)kvQ&;S8_f);h8nMSIh)J_$alHRRk4(>o*6brkw5<G<UcD3h
zT3fT|W@L=|yz)Kw<KKSYeOFI&v+nCpTjt1Jd#c$jyTA5tTK(^DtJj~obbRu!$#buX
zraW40Z#!?ETt=j=gw(YD)z3K-=5~fm>c~*fJ)RIfNln%No#eF7o2txIPoB<7GhoR$
zeD`P6^AgqHDc!fTUKbwair1a?=Fgc>v3Ei1!gqFLJjzu6&j0@Z)A)b-AE!=N-`a0c
zw|}19if5~{_wEV{x4!j}BQ7&-?z7BUEsRlbPx4tFHeEXVb@X$WH^)smci(>b^5x;b
z51r4sC#j!0{jtDCXP*C|9lK_I`#tm7k~1pnIyI;JoPWfc9Qo#9`}g`yN3RFp^ZlKl
zw|}DI9HqE5k6(WIQX+9J@{Gzvze%5OP46w8D>_@cT%p>m`G<>CYeuiy-!G45y3ci+
zdw42;-QI1xtM+X&OwoAb#IcXpnCG#q+q#P#t&B`6!9K}9ru6Un%~pL>wmmiA>g}Z`
zcn@4WIq8;3j&4+EwpH16W1HY}CG4*bY+bVOmHeg0hnpiL6im6ZBSf#rs+&KlbXDsz
zSo1fJ!*^Nb=2*LWLjxX*)eTF^%>ylpeVZ9<u2{3WZIOJ}cZg*V!`=se(MvWR{VMk5
zuqppSTTW*)vH7gltoH6QoII13om}fHH6yZ9HYKi<k7w1tHxm*<Z*MrTHZOAJtnkP!
zDWRgGL9_XGmw%U&db33O_+4KYnF~e7!dS8^rMZe%^aRcbap@>NXLvVEFy59;OGU@&
z#L?KtNj?*%oSI>B<Li-g$!Cf){&u#i$SG`!Sp7-B;r5l(nmlLwZlA1<<J%Y?7ap<W
z*mNUF?z-9GErK~AJN+iCKizU#c9p;cq2DK#xSoHsc2_}SQn|9A@&?fvx%Vv?niH!8
zPA=%=?E1g-;;m{iG4bX9X5QfT6$+1T6?8S1DAHeD61OV${MxqjPx}tfc9F3ADcko&
zaw9*Re3@XgVYK!kNf{}#w6M$z3l)}iZBUsQ(p4QeF=W$Pr;UeO=Sr@0cRD|3x}xmO
z4~F`mr?$uaJgpu5tp0x1zdbvSXUP0(pYqiH_4L?x%MTxZ`r*wyW`zqpFMXvVQ_J5h
z+jgPog#W%}M@kQsJk!y1U(~kxxT}NOq>h$!v3Y3^&rDqSRM7jr%JMtME1NUdsBJKi
zc_wLNcim^fimSJlFV&kcThKq@oXWhLH$Ppxet6gZxy$cPDf3L<ILCH&Wm&Vc(xp~a
zo8-7n6^<-L#*D(wF^^`8<QgO%USZSo%U{Siq0&;<!E4oLy~?=FitA7R{rK+i*|-1R
zzSfR7XDKiK<mjRtwT*L1XSoRPj8gN}dsh0Jb;fI#1cMeqZXdCr#z~%$w~lXLcF4$c
z*80$h$g49xz58|h{=aE4-}CFLz8Bc++r8U*&GM^7G2dAn&(2wyt`;$)#iVnYXK<FZ
zo4fttMcw?9=B94mbEvmf<>Z#hQ&MI<%`*3$yeYXdO6Pe^)bzuX#81t9yP?Kp)9dTI
z%{xsmzO0Bj!}qh`#P{!~?;d~rQRA7hzY>RsjGz&F2>-i<O#S(hH#%i1ZJFMsbwADN
zzBx;??YFAW*-j7Fj*~Z2)H{WH%3kwn*R;-^RueU$cTawG>x?_hZeqWq#O4d#{PpGf
z{y(qp?zWFV|LK|9*)K1a_aB+@{rVPfel>sf{nl&wxpn=l9-n>vrE0x(s^hXZb(3b-
zSg(BZef##`umAczzZo3m>euu5?&X(TbZ$=Dvu4`Hc|D0PCTEL_E_u4Jx*ciC$vJX_
zjlF2w;maSL?JUE7J^N~&zdnBTnLF$u3@^6mc*k`f{q1~aWA4_V-&;Hd@A!6jJ+L^P
zw|(=JnJbPMENr?R7gB5XI_>)Vl8X3;>xzBe$=c@K=1~ePDO5glK`nck!ENuv*q0`2
zjgA>lexUAuexhuo!GzpB3vA!8U#k3OvS!krRtF(gjS3!j>&MR*hDxr8KH=Y$e*XQg
zUY}jzhZirIX4tVL`>TClt9RxhWxmx%=Lei+Ru2jJYjIuIzV}Li+P$Lcb6s6(Oi8zP
zNoY-*Uj5{HS=aHoQ4#NJZ6<q|#!H@Az~sLqHeINHLg3Zcn_DUjymHOlX0jY!b=9Wn
za_fVQ3v?~Frbtz<HTWZzx^jz<LC(I7J*)P8o@e?zFHtK{Y{Ks81{)<7va@f>3OGH*
zw^cTwB4_PQht34Um1!IiEYH@M>iBe@m?9<7u{yt_^MTLNk}W?S_*Mzai8)`I_59r=
zP3Q8&oCX_bzPvSu7R~?cwD9Rdo=FoA-%ES(EMZ6CpL0hRsqRx-_FH00Sjgs0o-<Ci
zwzSH)NXa`Sci)&QboQF<UW@CQ(_@~jyOw!G;=xS=`DjKr(Z4Z09!Hk@%=?pBX<O^`
zEcmjr+Yj6JZP)Z)M!m2(ye?a@QaMLZv51pVL-25QMjJ<AbMb1k5Wg42CL6v_3JIDV
zv_1L5n#Bh{|G71N*;nyrEVtFap6)sMeE$Nr)$_bF=hgGu98<Bmy)2=k{ztH`)V!q!
zn&az2K9rX*<(_}jurt+M-Dt<p8KQ<67ah2gic`MdczV?)L-3}99^;JZ&1V|s<feu{
zQ|Xit@IC2xGnsM1TE9OD2fL^IyL;4c!I6%|pWdI|v5ccyiDk9Af0=Oz>rSqsnGqGs
zQcbO&X4?CPYo497%yV7Z_R_PbtD^4k99bnS=$Ulx@DA-GwQ@U`UvX!X+irO$=7{Uo
zu;Z@gFK+h6y11&IO=91^M{eG`dLR4P*x0|HUSGF1{`K<luac6}1_qtG4$r+BGPNW3
z?5dL?lhu4DcQi=y1r{iAzUvB4oUeT2!>U&Y9<7#mZKCRRglpp8ukYsP-TU(>e1El-
z&EDTX{{1eu|M%nH^7r@ue)#zD;g+<=Z4))0SD)PAu_)N<@{u!fXOGJy965Ad|L=_I
zZF>Y#Jv}y`+x({NWtLax(J=eE8mqZ}EX5+PxPzT0wZ8fNdv|^H_qg@(_v4o5{Qlj3
zeEvQC{`RL^6rM+xu=!8&wP_Dz&fBM_!@1W->oTMH=NT7-yL?u-1<e$VZ{KZv@b;C~
zlbw+>LsPD~cvXH%{g&EzvC8Pks>M#*xqU-tOr84S%g4v@(+{8hdeuDszTcM9MIzrb
zcUc+A$-n)#<KLH$A3i={4_<tCkNuydhhMepx9;~+G5C<_dHLV#_4UTSpZ(J2Doien
zvUqCJc=f$pp^cT$*(I8)YV*rAeK)H#793HUFhNaJ(Y&bJvCd3~dtTqqm+xLaPCC6L
z*gUcE+MM_A-pkGBOAE_pRJ$-mP3csn?5q4MQml&>t@=It=daaw{rT->{rpRFDtE4O
zX5M_|*ZlMCp%1V3vR2RTZ9UW=(-G6tw*AY$xX%yL9{GA0@!kx-GgaWcZQA*~vyaRq
zVz;h%)iz69HPrOXRK3GjJp@vPZ?&b|cx24``Q@uEE?0m3o^UDZ{I0xXPj|U#*teDI
zr99EN{%ws(R*jzGjUd6-N&Ea}XHWZTp^|fO8}E*o2RfIcr#kKlohMYpY4B{bSNrkp
zY&R_KNBz;ex_R%4mF~Xb*WE*9Jl5JS=_{SZz-hH+@$K0Pf<I0%L@ZDc4o>vAAbQjM
zM@Z1yeRm!mc+WJwD{A^D6_Hl|v+R2Od$wP@{(E-Zteun8l;mFa=H~cK6g=y<NQmiH
zo{wR=*bHUq<2t>qOG?5D9lal=P1HKg=YCrMbI{JMUNSX1ZC+g9_%g-k*01a?=e&+h
zOP}lRTf9~3@AvQf6rOsW&Ykb4w=AEjL+z5w#a9*54|sPKuiLrb_018H#vOVSCm+3*
z`1SmU&^=3^Y_WQl6L@j;pY(ITZ#nYbWBFOVR7ql|()owK#Z9gEoYg+uz4eKOSf=z;
zi6=*`6{Q5Z@8q$p>XHrgoa{BpBy*ODujle5%nt6Rg%UZ^>iZw3-kx%V(O>`S{R843
z-<uz`wLdOV%UJ!)#_Ui2)vremFo@?er2lC3=AOC#aKP_OUb~J>)n(DP(f$&$%yU+2
zoQw3}m{L5gXyH-6JFDlfxpi*skK5kYkG|{P%;g$ZC?pid+UT26YOe5N4oi`=k>F#6
z??O5E51GDZTf{I`(mCVvOx6{#{k)0{=hSaZy1w`&-|p92>bH7ZKh0Fy_VtN^dSRJJ
z?at6i5*%&<ZH78|w_ousTGsKf%Ix52ujvnyBz4TrukxG`R<wPOo$R|ht9`p_Kc6<g
zUvre(B02IJlLGslm@8g|zHc^lsrhTFdS`NVvG25+bJtba@!-3{WyXqPa*f-?E?=H?
zX-e6u$$rI`Ox~7X25s=Wo?rL3JHBQ|{r~Cm|32CO`?K8s|A)hmi#C0*I?nFkGWnH`
zQKyQa;)G(sx6^FAH9tFOmP=I5idgQcK2L3y(A)AGjc+#C?W^9kySk+5UCUL^DJ`p~
zUZ~qu{r~^Z_pf&!zrO!Z)%MAU+xcJL-MfC~*{il|XKucod3L$#`kR3#{JFng-Y4{N
z(H}EyBmReaGuC~W-J!njyt?b}_D8Q1g489{l@#Yr-tew6=q6`N_=KIQ#iwIhFC|Qf
ztg}ivc3C!K=X~|q>cU^&l>JYUf9EeRcYS%PtC!KoEjAMWcSP;me@itjK6n4_y?g(*
zxm>aljL|&%xu&{!?~T`QmLK((Oe~GHwM<m^(p;Hg5}EKxsi`Nmr7`TWM{T+H<ULi@
z%!P~J@T^?lI{E(H`akd1-?z21duF<L?vF*6O=ee{Wgjs4@<4vY2Cnm(FTcJ!{Lp#3
zQCv`e(v3Uu5-x@QeiN0XoK`pPI1<7BZ}u;j`ASbZGL8SOwy|C(aEB@6wSx_N$!__2
zvy_iv6D~9yT{HXrF>XP=*-5%y-7K?WBMRf>uBr>%S=3|YqE=P(sp(^uLB%}-gLp4?
z-EjN-_1(WF<-2`cd~X|1j;tyFnhH;L2UlT@Ei703ejSl~mFEz-ptg}|j^yTo+kO8&
z-C5OMY`Qe$_M>asQ#@AbNhmL7y8BdSB2UA%Q#`V)ov(aF+k{=e8Oxk`rfN6Styjs#
zqgDIN<WA9fozeG#8zT~Lc=1G@*!xr>EJEX)p;LJ)Ly)`H-S-jJ8iIVwVmtqB;^1t%
z#&tonXGhF5kwVK&3y$edVYkTA$-A;rA^J+sx=FoflxHk4uH5yxHk(5)SFzcA*{_9x
zDOG7R^yK$u6)rrRzEj4vbwh64MCpZLtCpAiyxwxt{X{6EL&NGR=Qz%sQssCZY4CBu
z?T60|#HDKgUzt&Nqo;P`*_o<UeA&<67WZ{4oIcCGEq`~`wtG9}o)ka&dQbHT_sPfh
zav?T%#H#|2h^tI9zmjL`?-DsVL-$?jC5?Bcs-2tmO?W?bMa9>)KQHS3xi`+=7gPM{
z=Ruply^Cb((is0sf3eb!p5^{S>fg3}9((sg#-HYPB&1E#l|FOwl*Y?TW-AuT$Q;wt
zTIe(VlH{xjU*sEC`_*O4Vw@n^xgpihv&CV}8c%g~iJGl4A7|dY+kGYc)E-V-#o4!S
zEMKyqTrvM+dh+5gcWNe_+Ino?4(q2^KWCn_e4k%#b<*U$%hkKBH#l{U@r14{SJ-K7
zxtmkk@ZcHdQ<|%${#ci1_$)_U$9ST((E_HlYe(MkMPF04zq7x7`|g?R_us4kJvC)=
zfBC*+dIugi8ZvL5ddJRm$HeqcGMziuWvQ+4I>{8l>Zu`OSJ5cQkdPed`gKL-^lO!T
zU-zk{IX-%M_Uvkvf*TKiY^gh=e%f#16V=Op%U64<>(0Bh=tTNLrK5$-GX*y<SyUr9
z?Pr#kFn8)gn~AZOYS-J3KYsb|q<>_2u@Gx}N5mm1?gMA-_UyNwCs+TcaM!Fv%jZ+?
zUY+wPYq@IlGSlj~ySyq5&*KVv!)umn7YLUuYMp-7V7lU)$#<&P@ChpgE%cLb3GxcR
z{`>FizwgqfmuX39$)3M?EhoFoJl^=s42MVVYgYe0snBEk-2HU+tg`ia!iK6-r>HE=
z+-@Ct?BYq;)z;^^e>&T}_47Y7B{$Mx<`vb6ezVGEl__a^U-6aub!E=RLyXBzK{F?1
znM7-OHpy~D-+Xxd^2L+7DTQBm8GZgPZ};!R<No89H*!upwAAeE<lTpDeN}JVTwJMj
z!$Rn2*7ElL<(FSRuod{4qb8^Pd74`B!y}J2G2|#s?qwGLs#VU;mAy}g%Rp}Rov#e4
z$1k?8t+f7VR`KM#+?pL3b=pGwA_v?JxNfi7aIWicLe$fndmK}yn;-byBjb~o%5a0#
zbwTcyYYBg9<vc@~*PbeCG~Xqw_ll#(*o$Ym@`XZYn<v~1=S7+ArtqG%Y*kR$dM8_}
z@?=Q5v6Dc}NzpUDHc_ju3e`kUNL6BTi0N8dknrkrnQef=57VuV(T5-P?ru)g;wWft
zw^;sov!c_)!qZK+-Y^C^m9fk5nBI3gvG4Bh*V6wVe=v4RxxpFXvh&+Z-d~d6G7=jE
z9DEctA4&0Qs#f19Y;HPo#EWg?hiTL2+~z2iS+ky%^SJG)Es_jXOYeIB@Rrtll&g4g
z#fu~NB>Wz)m}ADhw2)<krH)x}mfpjRg0v!MweaU0K0IICv`;iTx|~p0;F29)SAUl0
z+o@y6S{rU`X0+p-YM3k%_PMe}#BSl61>Q`5-X8h!W~PK)*4}@|&Yu>j_B_4suj0WM
z4qNk9JpHE5-nZ7e-eXFzP@w11RsNbwO;mj~GiN?(|0LJC{<z5&wS-rk&9{Ha)%!Ow
z{I~6@wsV&GbJqAzx`BegKS96y^Ske5GIt#ayVW-7+sx}5d>HN|cQkp46>jP?b+Enr
zI#IzTPeCn(dH0IVTAUpRnMB1Vs!ZM}?QwI{^HT-cj1P~#77<kOy(n?HYu6>~`rhN~
zIF%J9w9GkiV&=5tS2r)6yKL^V$mN+{tIDEs4CBMi->b`N9T#P?Fq+Dlc|qtPOP|RD
z2_w%QsXKS?ZZdrS)2lS-z}c81(=H$OFSlQs`F-{6)j!4K6>q<M$C$05b5iN^)=#Yr
z$wkdv6Io+jl+vAiRF-niniC%G>8ZVdzxAX4n<*2IhDBalsW#oByIbnxsSn}1|IVKM
zdiUYSRYnG$mnzNQt}DMBnv|$75xQZCmT!{9`qy5Etb8-)xkgNJ@jN0qQ&)YBaE*J{
z<z08=uN~eryL|WSY?U=rY~RTxs<j%=IQ#NrVr7-hN|wjUiy7vf;Hmt|w1!deW@TDA
zqfNKk)2=)|>FrOiGC!Rca!jj!M(&Y)Gn)QIy!oHlr?O#vjgtKYpUZF0{(DxofB!vS
z;pPdwn+`Wztu;J<v$jyjFWcbj<lj#!eAfgg`KmB|lJMHScAH+MrRS{7dGGw%-`}hM
z|L_CDfj3)jJ^c9N-@A`@|6XoCe);d?%a<QkL@B#5${qDz8LJ}|=zr3M>7Tr>i{v_m
z*NK)g(+*!Qy74}5w!)7cYvkqMy_c8!G^=~Qu~_~pE8m-E?uMqX+_n0L2<xq`n|J?i
zu!$_pd+_Ppr1R^H?#)?LIxV1UEu*V=#ja`0oJuL#k8CeISX~|0<hDhCKYQ;h7XEVG
z+MA^Y^V&018U%mGUTf7`GnZdL{gOfR+R9$(qg5-9YJbZRS-nLue`Y{MbjFky`#+!6
zoAqP=#TV6ku73V4Cq1LP-Fury>mG$u8%mGl8G77URq?8Ax|HdruCFJ#)92pOYqhs+
zc<d_@n7m{|ND`y4wTz5@jJkr?jDNqE=p2*IlS<1en{edcl2@r&iTSN}m6rt7%3e}9
z6QKLPCOBgE^+R&4dKad%f8~`rxlAr^f|auNC#}z7?XMMoEt#~@m-T&Az*!zORYg_z
zNVUn#dgo5?xotFVJE7jp`o`s1;=$a*_qTmmb)}r={paP~+W$&IgQbeOZ~oh%`6%6p
z<)2Gqquzb#O`-x)j8caRECQx}-6|8Nr#P2c<qgl%{})z>?eSU`Ct#ERNH1f_<(!kx
zYMJ&=P5JQbg>;I2?#@>6U!ULg+y0i_<uYM@{ao$|wI)@6HcxAsFMe4<rg<WJ&nElS
zkhii~lNNJjPcrFLo4d@!H#p?$qLq_Mf;Oggx*6SN5Ko@-nR&uL#VND*uia7qjN?b;
z{gXe1R!qwHeeyp)>yMtqp69dg^q$MPBQZswMBQw{!_T%#;W4Fe!e7nuXPaCjE*A7(
z^K{>~ExBoFeserKmna>dVY0Hz>4<6Sk{rv~sSyVQlCu{sJ~8is;ol7`6FK@<|9@Ah
zZzYxG=&hrqWN@B+&GFDnR&SU8UHx~;vb$f;m<ZbH?qO+`NZ9`4p;F-FlNM7vSd1hZ
zb2gqhDR%YR^V?A|ym@@HmPcL|^(+$-kLznMx4--H-R%1{zq`$?AMdHMyF1g;*yYij
zZ#(w3oG21gdVcl6nKy|Eoiie?D%|c^C2m~(HT<fTa>hQxEN;mO91*-JI_u7bU!D^Z
z=>ghK^e$`Ock8)JlWsX(;8ZFyR#SgysTwx-S?sJy(PhycXRfhUdY*8y+gP-$z5Vjv
z%bcEh`*z)pjn_3;lU-&WFk|=KvwLso-Z2ienW3WiSEVpHy3_SS>xrxEYIinm=McHF
zZ(f7gV%3Nrw-!W|PkCC@(V5_A8TdxRQzSR}u;-_uGud&|4uAYoP+L>``PbRH`n|KC
zPYIe8>XFbjN22)9z16E(3tHXv8vLY=9V@7wnQlE(dZ&?~?8ZM|AAf((-|zA&E$#It
zJ6V7J<xdkM=Wo(|cC*NAd-~gK(d`|blhg#u(k{7VUb~w&Ytktd?N9yX@Ap^#f4paR
zZSm&hM~5GleE;&}-{a%QkE=%)Z|yj46B}as{LGnLRfYq1IaZuHb->%zLukX#8NK4$
z`Y#2@C0{x9VfQop#n$e(uSVp~e=1Y>-D@J(Y}X@;?TVLd&7Qwlg!y7x!-?~yOt*zU
zKbm?lopE02IniVH=l#|=`}e$ltnFEDK6Q)lAD;>QH92;5O2T^~7T?F+u~#;qDU}pU
zJp4R<ma}*JGe*AW0SjI|WN2Tb@rHM{X4=C8M!d)FT~Ij25}>6zS^2C=O}*~)2K^7B
z+v_><6SS5tlXsOhl<NE6t*G-(p>Ug9Qs*1iyn7zyI}YABbL38_a0j<uaR%p^z|S$~
zLp+`J!>iSMpV^djK2y!ual#{Oa`cgza_7_A|BIKYb?n@E^npTRVY!lG=ZQ3~YO~G4
zOHx*O_gZp$&zfF!*Jjt$8On}=dy>wJakG8f<jgleW^?EFgetyy{7b$uo=@RBzw*ru
zwUl|Q`75-yOT9Idh*|Z=#(jd@%hGE19Ribj=0vXcvAM^|c*oO8`}kw^C=EUT&X_}m
z${KIyNOJyE-m!Sq%ei_UNd|s3smkGpxGytam>!vRzRT@OQTiK$bN*9)otPfcb&<8u
zZqsqIrX9?CcI<GFYS^|<ptYyHpg&tHd2yzm^6!vi-&RHSY>P3<KHV9yz2LlI6o;MK
zz6~k$ncr#}4<C)+$97<6$;b57?+T|)iDotUk#oH;aZcZK_ct{^ms?6ZmadMx+BYM3
z+oHTL)4sQ)KT_;zcxpUR(oH`0e^%eb#^CBx8ejH0P47Ia%yKX+dE$j{=O&*kK4m#O
zDe`*Ek|&`Lu21$-5<7gH{rbf~$-pE1LGeP<RBXP4GZmIw=&MKjFR%GjlO29N{PLFD
z_cvuk@pZ3#w{s1PE6clxxjauEG3@l5lw&Ej;0#N$M)S?PHq%Q=&6VR?Bh#n3=**k6
z`PauEAKQ1&jDI=XeEa3hkCzK;ttM4(H5Oo8B6XNET6SKn@N~AFJhOaST0=u7M|rey
zG4Fn&&}Q4&b!yI>?&C*m3{!-xF729U)a|o7a{aTrU*psj7na5zJSTPFcT(5%kYx*%
zguA)8C+N!O+A>LQ+a%h_y-~*R_1(vrjPZMS&0PQ8rdu{A(sue4*79a+o$4LeAML$3
zMd|C@l~06s8%FMweH109n)tDRi*t>cQ3ivl(A7m&Et@l9-?TL!(zczTl<e4&)FLMB
zsids<y)?EcuK)P*-R=GM=XS3)IeXbg)mNv{PuHQ(YiIGNhqHv!a)ZjY&$&D8>5r7k
z(sGI2|3&qVJmNcWC+zdlTGdTv6}Pgk{k}KZ^-kyA;<i;A9OiC{JZI)>a^5NP`@fHW
z<@fzx?6dofjqmQ`j}Q01umAP$uD^e~^X8f(OV`f|@;ZKbrOD@~%-J7x)IzTw$c$JU
zazOeew_WTR#>LaTW~|!1&3ElClcig}q^jk}{nKA5IPu>5MUI{J61lgRNUoUDFZT6A
z@xKZCzDu4Kx06r)RFRlhr`i^O{>;Mitc(6GvmYi(GUszm{Pmar=Dn6Ud3WX6A+L=!
zoFxLfXRce_%HYzqLqO}a&b~d&Mthdmc*fdjsCkE<y<;)=s=Y&y*4q842V;v@J55b8
zjGwZwp?gWpJ(p{r4SunI-w-)rcXf+ZWB1KvQu&9Qq-rN_?ycc}$8l_$v`AYNr|}AY
z|42)ttQMA2RWt9-y(Rs9&AW!f7T5MCaw$Dbcqh!gouTa#w>x)hl#|F|V`b%amK>bI
zk_yaB_6oB?guEVU=$`p|tE4^rx3JL!gKc>U<<6avZzdlyNH|l{8=kOx%i~_-Cu?>|
zCM8O56Vf~SG(gMGtiW@z<Xk4{zjZ$&W9Q6iH9g1E)@m*3%6$6yqMkNsCuwn3@lC#3
z+l}V!llkL*HtV?7|DV!95A93dHD;`R`ylJal=?kyT^_9~Y7@zloYQi0rpIl))?_ta
zmo(k$4%$<fFdpvdblO;SeYgI)C0r5<F6rc+{5}2e*7z0W``7Gzo*w)7`Qw`Z8$GHn
z>}Q^}xq9JoM(%n0HEPcU)I5FHU~yy5n_q<&lMm>eU2#JvXQQ_4nos9-R5z~tvaeY0
z&?hb1GYM)lTKSthY!+<wR8gOM$xoeQsfzaVH~VH?ZC$hdx7M*uu6E2>E$(^E@~@6P
zQ!>30vEQgM;R&CFqOj!erpKF}HP2ml{jQzFtjx4eZ+`AD+P#~@@JwRfO#vIPmF?!o
zgQmKiT->UeoYB^<V7L0y)MX~!Dr!+Rs+<43d$7D&KfnI()o}COU$y^#bgr|I=&3HT
zIs8CCpnIcd-><mryNQN7_<rfQo0z!o$&Trjy4~<<N}<!XJ<sOk#@b3h?)-GPNaf<+
zcTwxrW{Ub)rXAxF^$gmzxI(OB&5P*iZ=P*9nyY%=hb@|wLx1m1o3*bGo=A<F^KKrW
z#f_IKr%$fAk!V@+=gHL*qPYtdMZ8iE+<2jLY1<-&Pp5WH{xMP0X!jWnug?}+oB|JI
zyim=1>}{^pRmS0>BcM3VK)kWFbM48Vhdi6-h+E6{IsWrqc+CCpgelJ&B7<f<xD)<U
zMCZYb9S83w-pt{wm@0YaLEEOwI)|fPALUC{*)ZjCWM$7(#zUo!v$}SiIFQnps%Ukp
z>i>2A`>D(N+mBanx0jm+I%DYZ-QSOkiu7mBH#7(f6^y*THOk|{oX>*gp{HhZ%$7*~
zU=#OW|Hzstt}>zi(_1Dk_N%$&d;YM;dO4*dwmFVkZzr(JCoFarllix^N#^#O6H>nK
zl@mABO_z{hjy)VN@Z(PJHrwdk%S^=1C$?558XsQQr=st%XqKD#HQs{t3mPH<t}R#;
zu-ryE@u~;Uv#59G%U(`12t3SOxr1l%5nhgnIpT6r-x^9kwecOgYjaD0<L}bPb(a>`
z{azeYJ$dVG7F``vwwmmFDH9FiHWXAdt&q!LDXMTyPW+I0^+4Odl@2~%Iu-9&8t_^3
zD<<eY?6?}^#yowqUi;~5{0uXr_h!n3Z4KM|iu+c;1c_6BsxG{3*=!{usKuluwqwPa
z(@`t_gf-=CtSf8S(~>xKMH=&3KhHU9pLBOm7WEP+X$UBiUQwn}_lEsk+IL;wlr`+<
zrYhvtZaKMJkLR{r;Qd?mIc1e4`&cr<CMYVbZ&94?@pw_p)240uK}<QlEAJSowX!_o
z_+aXAQOWFK{ua;P+o#1(ohasB>rpPU#ku*v?4c=EEM?4$w~sJ?F57l7C-pCnHM1Yz
zvmd{1e|OJWFv)#R!JD6+Dsfg}Yq#Cf_!zo0JJNebtvFlJ`5<YzWd%oD{yctu<@@~Y
z=eLzqK8RucFaM4Ge^1fhe2tIN3D2Hxczp4kH$yY4is}7VpXwI8p1PeoPmbw<e*NdU
zD+Bd11MW{XHowbvszTx1{-eG!mf2o0Q`Rauow6;u#(8Ct%+&Vf+v4MFQat(gYEPcN
ztG?%)_A|!xsEM0zzI^#b;qq0J*rl0%nPH(5JwKgPmRi}H>6JWZ?rqc1MY*@~l&39o
zad}o&b!L{z(ny8VMNxdm_pIZywvDh`^ZNDm_`Q3B>gJw&^HAfS(T&Ay+A+E3QuChc
z#+WSYSrsN=bw)^|M`(WJ>)%SxN<UrI*`2z1^3$WAC;teXt9I3LVn=Gqj-9Sa*#`NM
zo*~_=XH@l1SH;*$A8fTP3Q7uGH_0Vk{j6mB)#-0mUwoN(@UiB%ONRFrtvai5{eZ=z
zQ;y>OeHTveDN>fuWsCKFs3&HAKCIx1kBgRLko?ao+r{n&3N{#T)^NEJ$kp~rEZayZ
z?b(vV2vzg)^YfIC`rNI$e~-`dJ4d3fpIOO22S!i!mo~p9uRC_3<5<tc#}~}X4_Pjr
zn=0kF*G1)>ZGp}q_pY;@k3Tjm>i?|x{^fF}ZSDVe?CngR#`kS)=S_Q>vqNI?%sIuz
zHyC-h1aZ8$^5jTfvr1{O(&wEMd77`W>HaZ2b!pMt1i543TXSA?O*tg}l3`cxvkAxF
zI(qikD#|S7S$?ebhidIX4fRL8AEuuF#C<UJdZ2q{=<!}(+ZHx0g>9P_TN}SF4K5Ne
z{w>$vR&sXLeL*$N?^&W;b6>{3GD%~Q7hhM^eX#3Sd(Dbfw@zQw+8)GXs+-1b#hO1$
zL5<xh=TP7b&R$u^pHj~ABC8wEB{UpJZ@Dj_yHomGlBU|8FYdBu`1pnWH4EA1AKp-*
zSJ>4dbJJoUH{<4%_M_{(LNtHmhh1Bn*Jf<Wy**I;*um&(A+6>;9-kd^LgMHB65{CC
zayPX}CR@hQ+qg_WEQbB)#YuY~+3nmh`SAZJLx-!M&n;fXD6W0mSeKzZrz-d5%32RY
zJ5J>tecL477XI;hp_rx1m;Isnc(-(fxH3n#u;lCxkK$?HxpqAJG$lE_<;X*&NAC_b
zG2LD5k@+d``h{mDu?HL1uHNx4>!IU)r{@x1Pp|R%^G0iab&hfG1o?d-VV~M=-<%$O
z=bc~9D&<EEo`zf{(NmQFOn-PpD~2UUCr0gi)a&12kG2ce9lm3fuXJYCs|`G_PR*b4
zud&ti|67*1`|CISJ7j%){^X>;y1w#p|8kF|YID4CaJk<0Zy#sH1$*sWiK9`gJ1==X
zuvytyZFKcPR7ShTeB1Y|$Eu%kYV2lhC_3?lyT`A4mbbzi!^lMwt_Jj&Fl|-TNxc)h
z$Mvwfic~XCnb;K0^0}T<pU>&7`N}l^<cZ~%Uv7A_Zu#ojnX{JPU9xJLzvMeVG4Br=
zUs_!qRHa&fH>}Dj5VUxl;`Z}_`bjM>>yzv{{Wpzdwye@!e)(fT`HyF>^YhYT*H^}V
zet5+7S(dEIiKRMUBtDCHZZg<>Gs|@Q(cfiOpJJBsWHPp=SC|<)vCX~Xm*#(R`rH)X
zDYA~!QzjkBG+(N@@tBPGwx|nFAD&pQIzKErtZ8{mrqJA^z3K(AVUaw?n{91xy=<Ao
zm{i`i)Ykj5amy5Mt1XMpm<C>DN&oIOwa9zMjKw?DzstBRHu4M?vb*jg)?!)NTGz<B
zYE9?NhpWZ^OV8l?tT(~J<WYj;g9BT;ies4$?Ok$dfyS~sMy6UxbC>VvWtLDk66{@=
zu=jm#N_ftLrl*UK?hO>&;$$%;`QxEgMypwB4xf?Lo@h}J)yKc1+<&?7vY%IH{|#UL
z+?2)1Idt}=BS&7(ivHZ8*yK4i**@WBQdhrWg~)loyJwHxysfZ^XVTJ)fBNhziquzc
zeSGXU<0i}0`LfG>_Dy9w5)!jyQ<y<VQ%3Qxy#>4Yw!QiMxOT@Sw!`M<wo6_V5<Tyv
zJv(!Q$5pMW8NE5jgmpqrBwl5Va%O(T(ibNX_)Tmj^Z$^p@~9c@YW=6PubODyVR_(_
zyfo^Y=GC?Bit%01o%{hVY<ZU!xSI<1D>C-l#ci-Zy6Bfu%wHXe<(WqpNgg?t9ijL|
zM0{R&z46vni&tc41x!B?u;_z9myB5EffHgmi#HXX3Z3OSM|an030ox{iNhv4KAw2J
zScK6}gH2g8IXBC}kAMBNXdm?{EjG<lPR^Zan;xY4FtB3J&g%L8?~gCMvEfsd*cyXv
z3sy}1*Rb^6ZQB?D;m^;eN_9Oxd#}BNN0sHknZ*})&$*Xh%wN^1w!c%<^-D?DBa4D0
zy_(ktubppZ?%4fyYpLP`yP6kn*SbGH-RxL(U3mIMm3=!Jj(Bh%V$imoJlFhYBLDW!
zWt&7-gkSBx{#;bBA;v8*LD6N8g7d0GLDzTFozm<@?Q>UtO77A>%-2`l7;nFK?~d{v
zhvS~y|IdEr+<)e*KVn@!p0@tp)_81s3Dfz1@(B%#X8oR38~Ir?y_jXe+A~+IH4?4Y
zJ%4oVp?IAKgP~#Wg-2DU^A9FQUvW#G_%Url+Sa$v^JXmZcoo(-L6|2gCr)8!pCNxU
zSIhqWXPQ@O99pO`MRdjDgOfai|NK5A!~Lb^OTgnFrJP)I%Y+3t?~qP7;<%t^E|b@z
z+=V)q1bzpt(`b0XqO4QdCjFGzEA~{j-P{S{+f#S$tDU;+r`pY`N6BVNOYh|s-#y&=
zFT!=tS?k$nr$j%<nCroAB5YXmO|;I)c$rU4(VPeB2PPfxd^Ih4lHcykS?YpCr(aJx
zyXMRhnQakv>&l`&7pZ*8TwY^qyd}VrcV2jOj_uD~Gb9fttLWx(Xxy@!CDgN{=u=zO
z)^p6aMDI&nxx&J^O6sTn>Y1tQr}57X5pL$pyC?Os!lk84clI3CLrSw|m~N8SVl?`c
z_<GuP$CZbE96F@Sd1q#S;=H~cZtJ!iZRPQho2zYjJ%6U-)Y$NKD$l3c1Z=y1H1hzj
zGtXkCIQ=gNj3%m1Q(=uby*8qAibj!gq_IpIOZfM>=gh?flW#g*l}enVFz>mxz*AHA
zxT8Nf#nVDV1N%(fb_TJ%7LUJU#Q$xZpPWeU<mY^BGV7n8+oa<-W9}8T^Szs-16+Sv
zn9pVV#c9eDc5K$ZGYmCzZpH7gii>)XY!JD~?@z$tZ>!u_9I%~vqswn)n!%OJE4f5&
z?PE!Q8L^^k*4arxKU8(hwu-)Vm5*GUDKxRbbHa*L+g}-?`o~r+(Bt$LjW)3CnHI1>
zap~t>wK}dj;x%D+PwXhu>y@3(Fzsw;h%=A7deY9FKUkN`=63WQ%@pvP{>eQ$;d(=g
zP}*jtFAR+*O%8mz#}pT^;bZEC!%C0xuAVTsvR(FNSNes|k7n|&tK#5Qb`L*QVY8=D
zh&?SQ`i{ff6I#tne?6|0dCX;=lr-JqYrwRWk4r3aw*}A65)rq)t*^Dj%2rf*;|XK`
z-(jYIVuWLkaoO!jp1M1??bsg{nF-&1KJCcQ?LJh*cKmOq?Gv6Qrye?T&%Acc<Ffqa
zh3`8p%fvfv+pov0I?Wz+;78FM)wi|l>ZI8P)@o<5Sgfn=y7Aq8-}P&A1g1UR^k}WT
zspf+y{@CsohRsKKzR$S4NycYl!qld+^)g<*Yj5Rdo@(sn+Nb#=dQIre!avNftLqF!
z9@sD2BOm5}d+qhljTfHtZhd~#){I-RuB>H&Vdn3qh#6=9zT9Fn*N@fu{CQ3GJs*#J
zX{-reIQMrsch-)wT`LNMd6O#FJzG6tM`BIrx-DLJ`|S4evENA4^K^}Hu$~+9prANT
z*Sgqj@~bsg<r?2FN--EW#H<v0A@we7GE>})U277Ywq3rwYu+`J*LnMAU$QxCEok1H
z`uxq44W`vASoTIV-KvgD*Zt5izoD3|Xxq#wtlEbI4CLQwUyhCQ@9*c|{Wsd@v(Bv6
zqH}p&3jz%SJ0>K3>2$dD)yiWB-<u1p`<8Z>G2F~8jc;s@R6BJ@yEyEe%zk4N&spko
zg}9ebx$~*WX6@-s^RDf9+A6^o{i!PAQz7FfHM2#nHMZ;W@^0QeX?*h$<1&>=L2NA@
zOWz$^{962~ly~aFQ}Q)(rCI7uDTVjK1)B^uTJJnqB>AEBQIDm|Qn`k%1y^^^beLf(
zcX81H#@&HG`?Bj_+<U)u#nk91$#M(NNTVz5-hr!k{F)Z<dYbEwLk%Z+lh<i+P2MH4
z`D5cV$G5`w11sADSI91`61ke?x-{UQxOkWN`*;!EGXY7@FR%;mY&camb@8K?YSqsi
zr%ikr)qOKp<;*CWX*79N(VM@=-z@M;vXNgiZ{_`LF{R8qe5;g-)}38pxcZEKr~vE1
z+sC3>7hYLAb4%#@Jflc<(SZ4ya~M_5Tc)I@sn=wlyf9~q;|9%x4}6ctA1>G%6mqBY
z!h*+lZ7x1}>uVjI%#?lW`T^#g`!@MArTJc{ZJc}Czr~k5&?PRBFUvReXVT%2>BkFy
zt1cDZ(-OPLNrTh&`Fbz;B_<V$m%H@72Nzh&`JCDFF)wFI$n3Yr1@1K+JRexa&9VB!
zx5Nd8f2wXMIX6k3Rp7F(D@hGx*{D%{@$=SpUzZ08fsNw3L-#jb{JN^<W7k_DR~D{?
zHcmlN8mdjwnj4LD{#+BtGiF!Pe92O{x=JrH(e%8Bh7|wG6T6#k91Qb6^!)b({fW{s
zlfEoEvZpvYG3G+pMK{*A&u&UlMfXcQQ`<ZQKfQ}NsvfsBSf|vw>(;CM=f8L+EmC$3
zp8m1n?Sr>+c`^UXp1qGea%blCed5>e_J7n=e_Oo#!qe;jH~BYIU%$BEz(+0K!x5S?
z9c4NV$>y<fN**D9-qrd^&FV-{D-ZPhtNczOY^BDTiN+g#iYfOjN((yj<E6aC{^S4S
zpMRe(7Pt4pzPXpD@8mwF|36Q3k=yMSF_Vi%ZY&a(FE*@cSJi)Z^UR&Xb^Eo;3oVwj
z9u!!YD5hg5y@u85x?<sC_o*^=%g%1!C;CG8l)+Zxe9sk<^S7@3?mhARlZL08bi0nu
z<_a?M>ApQlH{IyD0nhckM(KH*=foU(Vz5at!`t_(gnO~8j!<Xn>D5nDPcEOctaMiS
zBF~T+tWJ4u3hJM)o;Evm`c`<vo)_j_mWRaE70++c6JxmX>4>pl&Dmp~Z+1MnJL&mf
z?VGVHCj=%HNwgoFc7R3o<1UpcTzQP)De4nvSLW_ukhpWlsKzk9QMAP1^r2}t)E}CB
z^!w^{Ids;#Wtrx}o7es<sGmRm=rrLuH&hMIzMfTU8hv(Cb=tHRhc$X(5zF2+3b3*W
zE{b-M3=Ezk?9i1G`Rce`yM<YpS?RmRJntt#c2h%w)w@?HcR!fqG`ZVyyZA$gRQBWd
zcOK^6t9@wF;{OZ%)}~G}$Svmxb}<rf+{`g`?Ew#0xow3D7@l4+j?|vklw5jm(fw14
zJB(Q??*~pPIrB-&a-;Xc#1P-0{S&r%zi%>DcQSd@`?;~}@`cLst=`3IZPlUt;)0c4
zuL{bO=dEb?rn}~mPU5|(X$5*0XT_H>6sc+pC_b}eU1*lFBSa&t-|MgCWJ$fK8)Y#}
z=?q&u%;k(ykI$C)8&Vb5eC3O-VCBcv!LM&rBzIZG@A!6j*F~LOZjut^+wOc5Jfd-t
z{m$~9uj-bkMWu`O{3vpb+q?FSz|$K~O_a+wI`J~bRGfU?Ch&NH$UED0H<EYS``i=E
zXgPK3<%FNl?*_@&Hrs0-T)$rJ*Nyq^^W9yhOqdgTnfG<$qCV~5KQqoBNV0nAm!qTg
zwl#BhM0r|g#`GUEZPen~So-C41?SD|)Qvgw_JE`pgE~j?mM70H{4!i=s&##0u#27R
zm(bFWN5Zv)4saBRPPwu1l5f+TN98TsG=AJTk+|e|+@tJ-#s11UX+3|6DleD)u=#gx
zMU~S#U;F(bS8Wb|HZ|^uW;uI3erfA<x#bi2l4jqM3T9QzSW)fh)Rge@L&p7S7qd>B
zFA6(1SAKqER*c`#x05?~_x&-Jbop_HO*L!7uUW;LrLP<A31DB}D(&NCC6S$K<mo#_
zdi(2xlQcNPY7_KXm)*(zb|$v(m1|DKGbz)TiyfBz+xTlvdY17Y_v;_0D<^zt;ukwF
zlBr+&#_sCp*c{R5TiY$qOgyW5?NZ2w7wOjLA`Wb6T6FRw_oS)^>L&G7r5BS=EIy)^
zH|h13dFOvF6i>I<+V;xlb;LT?Dg7!>O=1P-rr1u@z1e#siy>fcfryIBR=;oS@+GvV
zl~lKVQ#C!1(B|Tp`8w0`mdW`{^HzO_kj$<NmEYg+3Yq-s-yOc$PwbAZbjVbuA}2nz
z_RX8px2gVqv+uBzmAao>rw1de=R>6mm3ohveA8!qVM<Ndrr=-YdB-ThH2SXJGi@7T
zqs^Ty?K!hDZ!LdWbxG6kbCFHewmoaMtvPY*z>>>LHe0XF-gf!b>dKxoiWhBXls}H;
z?ait=@AoD9v1(pT)du%R;$BNCYzueo-F)w6>%CxS&CEHQ#hheL%)ODc-}Z*xso8Q`
zvnn|6^5(^zzPD)F#ATL=EqCH3yOpSQ2YtM5)oXRuid(LzElWJcT<}Hz<qwR{jTzJb
zWXUttopSJ+66BRRkCAU<aOT}#>OmS2&bF>ARC+wlq-M=y2=w}V^<R~J<tEeHyMpd7
z+_-All%zTFRhr8-tz=kIwL;5m@7~bqO9U*8mzes;g=XZL`==*8@H!&p_2GPM{`2<a
zsV8%m?UP-1-|pvO%a?Ie52t_pq`K$T<t~4on;Syy112d;@o$#5mA-$sc#5pK<I@*r
zE1ZuU<CB`X<h=LKhp9E5`;=AAotqG#QheDayvjd)E5F>nx+TTY{5do0Y&iNev#)Di
zKk(sQ|BvnI2_+`J8fL$zbWP3_yRG(LN}0i6R>spa9#Knk4s%DGSQ08ER3xLS_c(*E
zE3md?k6ez#n)6rLuT`(zx0WHc;W+<%Nj3J6U5^i{3fRPK%;)%-aYSs_^a8JSsnQG%
z8vjbxX`jEr$r5fPHM`K~Z1Z+eXV<!a3-$#>B(AA;Kc)KhhLOC$<Wkiw7q<z<9%+hS
zxAwI5Yk?w>f0IAESzh)J7chUJJwt-2OR}!s?2fsw(T_jP_jXC&dSv+Vsp*St;s;c%
zKc_u8UHjzY&*c0se1+<j8@GoyZ&^{Yq+!3(lx@o-JpWo5`Q<E{7Vg)4E6$Z^;>t-O
zApw(awp7eGF^M@xj*FY0r_AHwqy3lnXMg;w`sw@p%;LY%)olA*)~UzrbJ-?v(ZA;W
z?TQ!1C8}JL_zm;!an!l>n~CSF<6u|r>~wKGypieF6Zbi}54@sE*rP)%v=?*StSUT^
ze*Ml1MaeBbU-NCgBprEjW(`m3ywK-M9;tsT{c5oEnq%@?<;v*btv@EEMQI+o7-Ug3
zks<6)!Nh*+ur+cjj=G-9b<Rj>hA2o++`;U~a5((CRISAAEgQZHooL|kFZv|<uVmKq
z?1^(Puy`#<(d%%Py7kL<&guB$$Hfeq1lPXH-gdR7G@h3&{%&|uvRRS1QtO#n--36|
zTlP9KT{XdW&ZkWZ(q~M#yeCGcFHB)67f;`Oq0u}qd2`5gdHq8nD`NIts+G9I(dK&i
zQP`9_I%_<WH*(icvr&?%c6gNcUB+ztSx(UhJW8L$re9cbUq`UzWZt`po5dItGoF@v
z99pI8ap1|BRhkDS`M77K#y(sWWfKu~`IPRD*i+Xo-K~{)+4L)>+gg+{R6a0%W)O#V
zNaa4S&sW!2t?iDEyBpwKF(cY1wRlRQXa3^{cIEM6oob%D@BXX0r)u-7%Icu;LEY&M
zyCV+G<B}=nn9KQV8H>$w&K|C=C#@H6Yy5YaeZhP6^lt8KpS;Xxr{7L{e^m1wci4hu
z*X{cCUoKD9);!Lyb!yw=7@jk=TMO)F-hTH<+j{jy-P=E3hh+cLv$AH--r}pY<LGIg
zb(a_2%xbx)$j5TqQ~TTmgV_iFrTIE4`6!5P_4wOTsVZyOdo)gc(Ffzp&aN6f_u8I0
zF3LS}wymMh=%xPe&gpr9_r!w!B>gn`As98qAoR4|>I4oJ4VIr)l`E=NtxH;}ak<;c
zWy!nZ<k@$Ac)w+npTBj{biu-=nNu$>GG6hqrn3K_gyj8r#l4JuKW-Enm$%%I;_JH@
zz~9a--6A|=Ay1>><||*#H@1j*zk0reAuHM};<<6?*$3)j8om$f!bFlQ?zJ`Ndp~Wo
zXMg{2&vEIS^WN5EuvM@7$GSm$?L?0|%dV_>^)~vg^i84c7Yuh)^BS@uj_tTCU-f){
zvC^L!hxlZpT^Bd_J5;Xj`{%@w?z$<@wKX+C-^FRpE19rYb62(|Ub?yUQkqO+y7rGf
zKh}Q|_Y5}ppe+=&t%do@O@=7f_;1Ne_mux`zyAI9*Qe8WM(KCFtoMuj->Ls)(*oZ4
z|E|UVays8Eu{*e$r<j2;CGF`Y&8L!;v%lrsRaBZ35?$pOy@ok~@%xN^<EQ_3s4|uJ
z>TY*>RvF&#-2TwB$&4*cKC4nWdD4pnCp3i~HJ!+$YGx*v`7pv@{=ye*?;Q89KRw;(
z(<<phZnlhK3!8G<XG^p!ZS`1mbe1)*7PE-9Z2xl!C%ZV$9G^2n4bsep6LJ~PoOKO*
zrhRdyMYG0krd5hTD>L?a{1Z$QQa`oe&AwUs&%DnCKC+(7sK%qH@p;KE@5^6p;@^MK
z+Bo6q)oVRDDIO|klvKQw5>ighn|}4vq`RjA@+0;Zc}qQY<7K@Zz_(?h&572?kPg)(
zmNg#g%pHdrZg05RYvt+6{YfM3dB(r3RwrbZcdhW$unm(FI-??F9<l9_hPcqwN1S|>
zP8ZpyFN_I}Ied0g?~<8y65rKX7PD`9P$hL~=jZh+Rn4y|CK*2tTHa|Oyt^agk%?+(
z>GwVB-aR{dMnUvh{CoM3!&=1*Ml&U!uqX!wPBmFxY5)G4&ZO(5vC|z?Ep1M`%$yZc
zT3NN^+WaYgrw%`mTlDu>lGak`_<Q?IJ+r@j&OII&9JaEywa-;c@@4OVK;6~1yFYG@
z+yD02<W<wQH-FeLQKr8+E@sZ|k58BW-l?!K@?lo$;pWTxe%I`3T5;*?i^>mA1S;kB
zZrHTtTuO8yPw#;RYcu&%-ZK=Gxs<=Unf5PysTniFiog8x%!{OX`Rd;VJ(k*ZTqm^S
z{&wB-#|o~#xc%wehsxct_m_ss>}%d)`l;sR`I|qruX;TTi};Z$9$~{`<JZ1wuJQ^u
ziBgR%PsJWy*7vS$eEjkIAq%-T%^Lrz^Y(mO;Bn+$UzdeRo~Neql7oi>p7iN%>$p9M
zWBQ&&wHJ4U&PdSl=9W&?h;9G6WWmZ3v;T9BIo5ujnV@_zQzWuS<6mKDsQDh#qZ<tV
zuDVxy!QV{tXMiDhR?4)FYSTMkmp{G~wa$n2gSq(k+(!jspKdP~IB&3&W!e0d0e5Ny
z<|ai4ZuzQe+}HMDrIBLR;orMnX?|Uju_Ve%P`hJA$y7g8l?hw+Y<V7ZV3C7f*8U&T
zzu8|GA8Y>2ebjP+{{PS)OQ!wXywu+A|5V2xcg*{AWf#?ZJ-WPYZpY#~=R>|+V(waX
zIip)4vde!~>+L^_c~9;TkXUs^GTo^9;pV4TmcL7@``P+npYxqP8m?{+15}ybUOuA`
zU1ZjNaB9m`FL|v~Q@A(oDKb2NC+z6Mx8h~_7qb@}b>I-LWV<10o1zh;(-5kX@Ou5P
zTieg9ZPE-*&{*mvn#4ZY`00VWDavb<rln2SymaEo-io`)PSI6gPV|(dr?{8J7oB_8
zUETKl#!aos93nO+x6FV3H*ep)lWP*T&td$0mBYkymzQz6i|PA!X{UERNj=Lrr9->C
z;>N0HHj_A&GFFPc^zD-K_?Iz}i7k;giaF@jcVX_u$8;Gxv|?HozyEG#C!Z#FxNG{!
zgHb9Csgr}YHWfsio5x!kw9d1`Qa>$QR>3govWC;FDIK=Icdb!;_HvDJnuq3r1ihm=
zk66S!7fH#*m@iI0w!)))-J#l^^rD2qjK|C>r`dK$KULiwJa^xgl|GAaUR%O6O=+68
z<oYvAo?2E*GuL0{^V_y=+d7GuLoOyuj|k=+U87ZcH{NK5i}IiCGSVdzKh|vPTUTx~
zUFL|8?Z!sWPZDh{|NP^u_*S=ZeQCY%SkK+Q{FkZTrEh&ZO0*1Xs~Tz^@<~jzOV2Jh
zDa=p3-TRBp#9Mp1rc_c}kKP~c%hK)nhVE+@Xq{eMw}W?mLA-9rpY0-*-0@GEOoFV<
z1OIK4>f^DWwrhI=PitU)>?$?(t(lq&S5H6sHQm@hEse8O-hk!WHHljfk7ZVi<URh<
zyStRbS$v|wjezsa8!T!Xf7q<t$?n-LskFBHg}$qIx_$JCDPC2=3az<EqL$wKvbL>x
z>5OSnRvtgI`J+?Zr&a{=ZQZJH%2LX0ZILEV%dWLY?D}@EI`v4=_4&*n&GUlqZI4>s
zThDLW)SWRSckXL{whvcYwN|J-zUEwhWa0w*DktW3=esW^T+#iK5|bDF)~ayDm96Q=
z&pq|8o>BB)ZlMLgzF58sOYVw?(YeRVW__q!=r`BkYG-3es^1-{##a->SmbwI(Ri0(
zaFWS&YScvwA?^=sd)9yQO1mb0ZN-W?{5uZoE^2-wf9Cwc{gw$2y0z;6DA#TG*JG54
z-zEQm+h;a6b6s0-;Z%!Tb6zwR?qhnfc;9oO{g3b2slMG_nlm9Hn&a-zxyPG}c7A%S
zr@L`pp{uNoMQZZf^DNu0&diy$EOLs7dZbvCv5chL?66Qlw!&cJrj<b!Y<cUe=X&ga
zaIeGoaMPj%AM<BMUUn2V&VCT>^|I|vsl>H}KZp4ecO>67J;9fvu3hw7$D*pONb852
zc=NQSQXM)P2UxrOr^w#SDLrrbJmbF5WCP8KPHH#rzPWUDNwLzxJ?ql4_pCo-^wP#N
zDLndf(I(UNdGpQ|nHlq&#(CQ=(K;ZgD(2E4e8!eJE|>k{UG8e1Ijt_u6W_b&Se-~_
z3JY7xQ+ng{q^@0dm*TQrZ!flQ<4lWZFZMKV>B#b0#S{=1c6#S0!^n`!I#;*9ou|tb
z>fq6N*yC+mYGm%#SsO!>7tc7c)9{m0^hB;xZXGv@wjKBve$j7XWS$*w&|y2<7PFY9
ziq(H^PKcf&eL|){^P#7+LB(H>l^&0n{_03LGzs^FnrId)&1xy<IlKDyr1sw}ohrV<
zhEL}#HLARm6#v-APSrC?g!{%1Haq)U84uE~3cVK&KT>wB|8vhx@8xrttr*zn9e!rF
zcb=;&YoF9(qvHP1OWnzdzl6Fxq<!7?vrjV6vbFXPVDWo$H!en~-#co<HSG|WV<EC8
zX6EtpezBNcj9PNGvb(A5)K!if6RtM4I6j)ze&vARQjSNPmwH4WJ<Zk<|NMgJ$s31G
z@0j{P=Evk)bGArbS*`i4_0dw7z$;f=^WH^GJi)A}7N%?JvGegBmOkFvD`EefRSxOt
z9GUntf~Dcv!6Us9OM>ng@0f7i(B}4SjlH)_cTTA}{7^`;YF=W%MI+Xbj)>EIAyHEp
zGUq;I)YF;J=b`y_*5bF~hnBhq2_<A(bYHRBZu25eS1I<wwY;~CdRrA|<+HAPY<%>}
z=G6hRtm$i+q<$}8+PXUI#UbgqA5l|HPBZsTZ*P|^aF0y7-spM9ywCA_{PX8;?~AGR
zr|hUI*|GiUZ||a4zZxqaUT^sCHp|;GHmg1;S@!?DOF}#vjty<61QpkocC=4>8_AdP
z{A1|#{<eVSiwvw6g}*wpdEf8alWiUmM#bm1s(qN2ID6lU=%1H_e#wX4d@O%gWsk+w
z`gI%XGh6yEF5T2S_27%5m2sC1OlDX27|I_0_H9l~^gTVjwwHb)**A@Ivzol;cP-<5
zJpI-M!+5Fm$@|myPj6V#eRW40^U;+un@)88EHj_H`}f7-Ertwd4ykEgYEWhk*3nq7
zd0vLSRC7VyywJS4>Dpa=9`^hyE;W4%mJ~2gp53tKb&ayq`r3&d8+}&w7V4>(9u#@Q
z(r9_Y*ho!i$Hv_Os;gNpDBp3^l4Ff6XnGjHc1YovRrai@>K8SaT1wi?54*tCRDDFt
zvRLUX+wz{TJ5n}3s!3&AvR*57_ANoScwLKa6XG<C*Bmxv3O?zSIepoZf_UBbV|!nC
zAK~(uHA`gD%*e?ZUM?HvJY=glvyJUV7PnT{7TZL<$!X`0`D6z9KACrY^3+VLqb64G
z_?S5i)H+%Oj=C<;Tq%^WO-d;+b@42TnytA)E9Wfhbah%Te`)$9E|JFc!_O8Jg$1<N
zC@gB|XB3&;6~rIu9&otw$EH(jHnt=PYHdtBG^Zm;z1!nr*GlV+Ubi-$JHx5BZB1q5
z-7|}p^D5=cQWDkv_3+lY>KWO}oqO2ga{aDaHSgCrc;87&aIW+9>`x2|uht&wuCrE4
zVoy={zwf~N!)+yjtEVMSIN!S7bi(<E=Q$qmS|0y1Y277r&N^pR2PcurW4hm#f1N+q
zRP5^d*~XR+&d7b4{AyiB!qUVEvF@MJ@AF$NSrj#G$HKcWEzhT~*}bB{Y=Ksu*n@em
z)Sl=557J}!&a1c6BK=Te=<2vSv#q^K%N`1M3%1T@ytC`xu2!XKPd+8CdlP$3?9;<f
zZ@$I09G)r?D;hIb-q|KxVukCTkljj8Z>{)ckeHNuVTS#+2OE@^8LtXcQ{$ZHd?w*c
z_6pWjq0Hx}Za0mxoyGleg@g2NlP_g5ReJgf<;!;*<leMm>XnNn2RRSL83?8(++V9c
z{b$elZ~iTdP9Hn3k^5?4%vJUpvBLqoX3n~}VdwXYm$_Em;);G2P*}S~U1<`7)U&;D
zH+t4;gmam#xNNYJdvd4p*YC!UyZrq=o<F%O_xI8BH&-w-?AmYfu~94R|9;(9Tk<)V
z)X!3?TavR?d+W~6&bKcIv@ZI(WtCvn)9d}lG4~Wy&t3Um*vk5NaqoLwq4|z?<Gat>
z2h5-T<nNyFmz5SGwF|TUcdPvPv3vjg&5Zl`6FL;-*BdUqAoK63z`FH2(u+g(KD+s6
z-?XnmYF(i<n!Qu^@V?4=VOAb<!*gM_47>KPR>!}RCRvxymVZocd@HCh@t*7~Kc&<+
zYKHSSNjh`~AJLx5*&?!*W2?rx8~I`W>D|d1&8z=4^ZnKic;TOMMoePeriu@)Gdreg
zn6In6`pHB<>FbW$E7&?ss;2&$Q4zz>);3vS&T)y=MS%@Ws}8WP;8VMlFlB{7NzhzZ
zHO|R5i=xgN32xMQrFndgOv#t2JvP(*Q#U@?veEtI(uG{Hh3hnzge-n>cMi)M1-Cz|
zRWCa@IAw~378`H$;8?8HmME5#8rnIjyED>6?bI~hBV8w!B&AJKxszk-(@@#L+v=J+
z%hdPeC$ENOoPAHHw9YlXHpPu6Z0&g-mo?8)jXO+EZV48QIF~pzd1}<v1(T0+v50Nl
z=W%(7r|s^wJBw^vvTvj~?lf8Att@*jIApR1*SW2`E04b9p5qX7e6f@6{_Q4rz1D<q
z)pqV*#^l_wPT4bd_KJ5Q<vyHlKetqEeHEmdpXGJX%43!6AIa!~+#OZxHP^T-KK6DI
z=kl1@aiKr1lwRB$;}$FSc+G>G3g1qthUhXjt@Hl%<mS!x($Gs6HRq%|tw?Vd?2k{@
zDAy9~vRXQ?z2Ickdr{MpBXXWM4~LwrohrWlzPm%m&n%nCYhrv>Z#7QLIeS1_``%7j
zZHvx(S&B>#-)OCzoXj~zX{Xq&ereHwP=QZBpLV#F8;FM~a`WH2UsbWuL~>`#mcS^z
zQqkoL!#DT}Z~M97;>|mAtuuEwXr6w*W|30qf%yMhEoAou_Gg!fv{`O+^_puCnH|Zz
zTzKWdjn-?8XFdDiqI+vG2XCDPuSb#oOwra$mySvt4Slfq-O_9K_|ny{1>85#su7Ue
zx4<G)hWU&@YGP#C3(<h^m)x~$M7MYIT;hMYHgKD$kiJcPeAE_^wb|)GyfZ)Qci!42
z@0zzOLQT!Xw)Cgd!2=6_rrdLQInzYP`QxLi2gO<1SC^eL$w|<=(!Jvk|2%Hzir;^e
zC0gb_jrl4fw#ccH!IZVcQsa-$lpjw&C$Ec6lj1%+Z$k6CP)T-vn{xs)tWBEqfA^ge
z={J10qUZO4^eaDWJ8GF<Zhw0Jaq&~zeM{#4to~wLA79u%U5x9G<YivH{~a~!K3l(L
z`1N6*?LpgczIP{9KbQIQVWYuaF2BfAZp&B;7iub-wf@-bYWeZBb4Av&u!r3C&rY9v
zZ0xIN9L&P+8OXCk@%pR8c_sG#&pw!%AE|pY(fL&DwG&5|NiCdpN5)q0X@%pp)_arf
zU+bSfD>6SyaAmUbM9*ZNwyGDoXH7ElV$u{pEjyMsyUv2=*@lQn58aPi<vkWAZ=He`
zO<DDPd#by3$Hd}o5qGA9UwJ&8<zk!6zFFzooTituY@XkkZ&Vl**c2Ve+*zP0fBMfn
z^~T~ab7r|rKK5+J)XPgUy;wGxF8916)Y7?d<&OBvQ<hmCofYB|BE0!Y&gn!!F@Mb^
zri*5&J$<lkRak0&wossAk+0GFWl6_Mtz^Zc6?b<=%xpPvDsVZ6Wlz?lncfN7)i>H5
zSi<jjrWQ3zcI}@Qou}OtG+B*P%T^|#!>Ic4rQ_Qa>o#r*ko?M|bI$ct!vs^KiL-Y<
zz4ma?n}~MNdd0U_{`UpReYD8P*sy|Sx`DCE#*k9k!lZewX;BtVJWf(C&*msg&04ge
z=~%MlpWio@ZfJRQyt`TI{Bni!`c?tEnXR_gD!1D0R@km~w6}D<NaLXoFRpc2DW$xc
z{;mD5&(0TXdoEhHCGkshrCetVi@&vW^DbW2r23GnlILA5XJ?ALJ?h~13=neuqLlP}
z?Tb%u9Gb54Nl#4<Hk+)FJ(-=|OmO4XcTG)_g;@t%9l1oFE%;#Mc<aaF(|ZcrGFCGQ
zW<{?0m^iWG<qm_aow~(b%2Q{pt*R(DC^DPbZqU-ikzFh(7kAm~?$XT$MkPL}0jiyM
z%{iBzVd0;zFeBvfQ+fNnyRYxM@7fVF<BqbM=d3e+7n%iU+bl~nD(wEI#I-CsV4n5C
z(%p5P++8PThR&Fy;C1H1q8S1OM=w`0PIq1*bhB-jtnd7zxrwp6-vn)HT;jpAdK#<#
z*$?|G%I<8J&N$X`F8-BoY|VuaXCh8T=ao6GbgG!4%l&mjh}`N3xe(pMuMdbiH+<t0
zb$7Q6a_u;A_kfT`4bR3U6$|$qQ`jmsN7g4j{Y1c9Qys_bVmG1i!d=x@b4_0x=Kr;y
zz5glOd#n2#3m>JPR$Q_5&cnQKk`4d<PIz+g=Ak<r+n#$Cv~IZactYT0`MY-m4el0j
zFZI)X_jd9hnLm48{yksow*6fAA71ZCpOb$7JRmpw^xI(jyRJRw#kD?JvoEh$*=2Y9
z%-zLl+GQuDax7G=%O@7ces4@IUh!S!gz<mL_$EId<&B$t&ClIcpI%?~>>J1RM*p58
zrv+}$>Da?yJ2lhKN<FrlA@M-=RIRmj>V=z*N`>sSnCX)I-Q8y|OBa7<zaP`9$4qi6
zwtj4@7cV@KA*!U+Q1iL%w1#*^Xl{nW=?-S~;AIMZ6{lq;IQ-z)YN%qI?PaypBzxAB
zty`GW!WKm^H1Cd#d9;)%<IMaGFIMgl73Z;03K3YpIO9aB?F3z`r6$~SI(;P81i3sv
zrL(4WqtdfuK31x_$wkR-6{RyOY@a^e5fvag)ncl=8QX=Ahq{E+C(ORN>DgBG6UK+n
zZtCXRY<1mIaE>#Z&%4zJZ9Ud)yHyqSdDcwzX+fusyo)_h(Erv^L(#=j$y(-|i<RnK
z)zcAT(_@4^R!;gPv1(G<%(BT`$JxCj&OE&Ko>_#Gv3r%l?!(?&W*P5Z)xpKS<Fcd6
zs;+}}(JO8{#ny+<5$EAgFE+92tW0`&bnlV#^G^x#HZZB2o_zb&O`GTk0?hV;&tguC
zY5KXE1uIR6wGcVW<F<wM$oz!xdGTgv#E+@itob&pqH$JMazp;3NkLoV>d!J4EZm_k
z)s*nkMkr;*{yDww8$2)N6m8sBe`nLCz&-uS_p74?PVCqs!!%>jzCQius$vXpwJUs%
z?2T2Dh+9$D@=c4G^+d$RmA(6n{8qX7J(hf?I3du9r|+V@`<+_Z*IVMUr}R(LGdZ9y
zmnWSrSft;Rn)z7rd7?{5q=AHn(jHgMYmp^?elKiZzU=e+qjNo)4_am4vfvB2C04w_
zp>wX%yj46Mtv6qX3URFeac^c|<bDRFpL(;()wN?oIp3~mD%BHI(Vfc>r2b5U(dkX!
ziH|RWLvkc!1Gg5ZG*?upGB!=SZIP4V>fOI^D_ipE>0KN5i|~s~J?r|^;oHHrSN>hy
zetV7n`Z+tQyJwsWYKy+kStWO?TJ8MPxQ_6}J54(MTrH2C3P1g|dfAJGjhUTptMrn$
zUvuc2c=Jw8;Ze)uj>@GbT*m$HuWp|CT5WIBdFjHQUH`4J9-LkA$7gH5nS0K)6XM72
z+{u->eYW%Z#G?kw`=jDpBtz$fDVBV!tvKtTQ!Bv6abR2cx*p99%{N!{OLzrrul^Qw
z=lU!=w;xaCm+g0-_?h*o?Y=X1zcp2!o_{H9cb<3miapP}ooDO+NPD#;)%fR1;|jeS
zG8-gQwNp<ouUn+{e!WwWTvzG(D~Ei4{_~8Dh~#|m>44nr`g=9SdunZGN~mni(JA2z
zxEhz_Rb%=(E9>JDR@<tY#@8uJ@9e+B?BA*QRIYdKI=52_GP64mqy^u|p1khd>XHgy
zuJ%m_I;SYhpOT%O=-s&~HBj-P#<XPq2WcsCk=>_sR5rZ1*;+YkS!Mm!+wY(Kos)Le
zQ?@Rq@A%UVYr0zx+;W=czv$4B%CkC$-W<(RQIVRy-rri#Z{cT=#JO9py$Crnf7Ycb
z4mXSRQe9;HH&y;{x$*F3)3b<MZ5BeVB5su*OCxQU-My51?F^Ip0awqdu2(ndw!hJ~
z`7L16pQV?}<L*8A)s&7yqFo&i8kE;p#`5%OWiOOknH#Eec#)oG)6By{UmISxWF}dx
z_1^YIF=z3-a&M)1o7XLuI50hTPkNE|Q?}_26&{V<tP#s)=4gK0<E`1*tW$mNX6>r-
z;FLD^ZDFUAHJL+|6vHlhrU>cQZ&G1um5+4Ee(rFjpkFUMG+4-V%|rDv{`A%c7ZXpf
z)4P5dtobUo!&_t*`<9y3n>qiL@$8(N=PtVLj@rEB7w^m-?%KGAUGdKZMwX<`3zkC5
zvw9?$i_fWQ*m{RWy|CR@bmRHC#)<d$ySjuOk2JrqeQVXz3Ffw5HG8sFI88Q9YWNnq
zvR0GNyDddCJK(&^>ia^;Df=sIyJYnyG*t-Q6WHR&w?0_<TjE6Pp8I>l6z)uzwaDkp
zI}XVT#_K(0ZFUBloYNSX&nZ~69P+uV`g&{Es`U&E+l&r9-^Q=SW6z-zec}76Es0ll
zR_88cdzYw^Vlqed^s}|RPnJ$pSsJ#M_tEbrhXVC&KmK+vUA_5#!p3PKWe(1l_&geg
zzsRo-O*!~$ncUhtZX2IYeDN^qaO?Ynu`43w-|3xM^M)}-!^}cIOhan*tyL*2f9*YY
zLSgN(Q))#M8;ez(-fRsQO?u9))%mvT&Ck7O(*tjv_nsSgHeB_q#A()>N_}2s`8|hM
z-|XD25pVX7k*8@Llk_TX(Zr(m0Om?jW#_YT8gZYmo#DQ5GdoK4Y5+%!+r{GJPZqAu
z&bnM67Tz;S`-tx)n`NO5GtD2Kkruf5G-~4UrirIFZQpJt(JuYAV5wZ02~*zfySF;G
zTrF%pRF=gMx$?PucktXfQzcewo!%ICe|M$c<C^VTQnemFe^tEW{ol9=C7+iX{XFgV
zb^edk9SL1WJ<j~vJ(q#`<&PlqqHFQTI+TKL%El?Q#u?~7yX#lfIRCQI$yeXj^LQJu
z9L#&X;>FG7^QB)f@+O-39Mb!{MAvrTw_9)7k9|`PRWFunF6%!SeCMf_k?P)A!ENiq
z@^>7Xt<4#mCRd@mS*C28MVzeIevKQ8`pkIll-S(mJ&^qD{!XQx9H*8(`6bYCMDoqM
zv@q9tRh=~~hc|TzsS92;@eQ6e#Y5&%fPnS2!zZUtow+>o-rM!xwqO7J)#h#bOw($=
z<44#}cpbg{h;^O7^tXHF|B6&lIg~l;(cSRS&|Z0;sm@G+Fa8v4f3x7tfAjSEUn<el
zV#|}N-xt68s`!Z4@;XllBa4TCi>c$RC1$mk=j+Z~c{U}>nD?$wYK~b?3bV>Gr_j`=
zAKE$!_J4n~_e<h?<CyaP>5)1r-BW+Y9qTUGUw2OK+OM@q8hK6&HWd93YxTWa&(wS5
zT;HG1{WgY6O;k@V+ordAfAAy=r_(V>?`lLOR#c>|EilU1;Vme>{Bl*+4E2KxRzE#C
z<K~nEi<g$FCoQ{E+E(x}RIfYND7F4E^TH{U=f}IWvP>$uHFJYY?fTiKRjqf#?I#pJ
zGf@=_Qq{hq=Kpa@$CEj#nSVdbT5@EH?!+LI+lOX0?GfSeeJ0|T^+~E*OW4>jUH^6P
zyx>`usn4EtEuCer<oH&_Sl882<@eu0^S9H!r;C(cY;8S0$5J&d@ait*2U<rI7(9cr
z-WoFZJU`}Hk~&R>S6Sxy@m80K(cAAHuQDkU*drONu%NE?Z13!%o4xM@m@CgE{1w}?
z^!Cb+dUwQYOAjUPV-;SQxbbG+-BTs^8*10RmwwlI&6n-;SK*h(N(w|eZU}T8TblXd
z=xfv0i&GXGtgW_+pEBVpLvLKkwTZJsd%akv8gz#45o8K<xcl&CcZ){E-6QeKYcGB)
zu@*XJd-y@e0fpTz{fj<_JL@Xc>Zs@xJZ?W)D1WHvN1V$o{-Eqa#d|ptJR(<OwO=u6
zNz9pXh((C8Ct}k5X+qLHX3BR{OhcL19BpC@Qk5(X4mQ1Cx=DAQ*U=E0U0$m`R%Iw3
zzr*(Zbk*8?g&>bdCsJPBj6LG#rN+@|%C?eQ(79~pj7bGW8`BoQHsXA=Ysv&&RsqFt
z3(69g?KCNl&tNrs;4d|=f7-#Cy%X}Uc<;**oRz(E?KS~p$#vNs_X^FgUQh7+?99dd
zp(j;(^VJ&JrW0Cy^9(yz%<Onw-~7w=MNmbIebN^N^W`4B#Uc;iDLr~uD(SRxyGQ>5
z)(|IE^Gw!#-uZpq{-KYR1N)Qt465AybI&ETf2jJQmbKdF$}_RhxycMOEj@c#&p6%c
zHd1%4aFqYJHQ@Q>(93Hr|I5isU8|q|#_o>d^r!1{qo(mkY1ExR^)F3s+E&xc@1L^n
z_>+=rmUg~z)<qxPnwL*<1FZw4SMHMV3gpg=>8h*XPCvx3a|N^U9lor-s%2&Ka<lH8
z^$`vECTlI3cPF*vjbZN5Ea@$FdwOz3%adkpKBGF_M?JLN%i>Ji+G&>z9~C9fKK(1Z
zgnj1GIT4Fw4hT4@yk%HzH)~p;OYghOL2N4yi#@7HYjoCr{W3$#>DU8{39Uw-=PW<`
zZ-@Px&o&d!G_3K;j$FQZ-uV=d%30rbS8hN5+2pLT@1bLn!CIPy>#BEXL<O3LPg-I$
z$9koiNypTf^;z52#j!S>@G?kDQp%8#)4#$fI?<6cjc@AbExvi7o=RQ{LCNRjJPc-P
zxqIn&r!mbG3jC&ZB)@3p3{~G^C6U;*>&lM2jFVaHXkNq_t(K;BwEU6V8kIT5#x1N%
zLLX|G2Acm|;A1PQTUxeSI&7g@=NgCb8QY}0rQa`X6%louHDS5SnPmo-8&@4v30rdH
z&6z9azhAGM^GT?6O2H%+C0#QI??VjA6Ccc$nh-HbKy{*z3FAz|jVkq_!Cp7_atKB1
zb#zK>JoDM^^cIN{%jn3}W-bhVnwvMI%XkQ}d}P}9qBB;=aiP?cwZ}fLi4=KZDHP3V
zm#=d0z=k3@_BFN!^UM^@&&Q=pL~%WEmO3phw<=MpXSJyI>9o50T&(KVVb>>F<lhz*
z-&nh7+oNTT44)J8CEB@8&;4I}T<-4+iQUoW&(|$8vybS|tp2w7aQv)?`#<Tf{&(-{
z#*l@LW_`*wc{~68xR<!?u3&h~N!Cdx7Jhdz*mQxnXlv@Rm5-JQXP!K|cv7&EH+Rpp
zIi9vR1ypz!%Y2QzoP6~|X4$diqo2ESHVH<txO`CMShsw7Binrwj+4vn1Oz$y50rjr
z$Y=A|VZV{(etpu0#bq}`mu?hh5M+NCwEBaKL{#bLtsHrKW=`8F7kx}_dV`uxx8Mwq
z1^R~;E0kKzQL35ZdgJ>cCq|uo|IUbD!;RHH-==l`pF8O#<7&ybNz6Cmr48p^y7^G#
zVnL$z9oPHXr3Y(EBF?PjdR*Ok%JyED?ZjE_nHKqb3~cwXFI47#yfA<%Bua=|c*^!%
zA8E71%QF)Kt_#GABwzK5WnWgacKPwOhS}G@J_uV_wNhl)3F#G&?408bjxK3R331!H
z#Zb__-=k&C&zcLu@oR2bobhm&5G8H3^QynZ_lsqnephCPow>67N9E%`e{FfUp1*#_
zl!fcQ|5K(Kd&hIU^15x`=j;0}xxaYd{wuG0UglkoXI{MM%c0+zcMb{~1btYgA+_n8
zVQ5}N-H{7kyS^0}FYb$dctGQZ#+&RA{!(7<{<`>mQOjfg+{~RUx|jX)TM1dYMc-PS
z+jVu^>dtZoPV>rMdw$!^2c28Z3bOKN@pHVMablKC#c!5byX&O1H>UkAOcgadw_(D}
z@}t+h*yYcuc^+Op&Dl>!>7@U&J@#^CzyI#Be)H*_!ezcG*PNDzYObHNMZ{C{=^PiK
zjpk>(vUoTg!b(g96dCs>`FPG?VKXf?@9BS*JZtqk2V?Ovp54wySJj@i37&bniZ}h`
zwEAxw-Hq$>GFH0?1Z}lhYV&;4tW{m<vzAp&-(4sBG3r;MZlsWr$5x5Pg%V2}B@#3W
zxuXuG_XJ)kI#(%_6!cy3OYgRV;Hu`o9BXc|sx8_yW61~C*+HgKUY`}W3SC*nsCi!R
zV`}~3xkiutm>=pLGEodx)e7OgG)ahiwvy4Fweou=O?sjuIq5{8nP%`Tjs)XLEP|gS
zbpJd1`Yu^?wx#o^{;br%jhA13%(T_N;4pQfO52htGa^Ip9(?3wqt8C`L^yNkt@Is<
z9eX9*bKIQSCONLV`|g2b$KA#lv!vp@xhJpsJ*d}?Hop6Ls>$cI`qz?5%(ySbb$-fS
z=y*6dO8G^(+$5LJ&1Qj*W(T}IHGOsAk!D53Pj^>qWlQjd{nl}CZ2PE~ce*N@Ki~Rm
zKyCgNy9<lw>|o!^=NvYDjX>qRr85oJoH}tkzgW|6QRCyI4$X>6KHK9Swa5Ganz3=Y
z^HF61=XX}hf`YG^eR#b!POrF^ce7CL)`r0P*r~i>M-^VRmYKgY3U<;sDa5`h>}S~?
z1`)MGrhn!p^stCu+Tz=BfA+Si)uwmbwl<vbD%yPSd)RIdSA&@ctJhEP6_~{G#X#@n
z56&Z!#{ZsQujzU6NM@mmij(H6>UEkCTXxPrE;RpTQ-<ZIk{@ju3wkFS)V<hd`Gx16
z2<Kux)@H`J>;a|e&thC+c*PFynzi|~OizaIO7(e*v#kGYT^%@O>PJ>#LE|f$0ouLm
zv!>=*zwnuy_Uyfp>%Li6@}JKZuUKVvRNj1osoPs;3y(a*vNLz@GQ_7InY8kd(BpMW
z^|qE}&&+S`Nbq1=aU|{3gRVFW<|7dg?q(-{4O(+ociPv(@uqjZvTscNT)d7u@{ir4
zYKE5QPww8Y_xmqfaq4V^+~PlT+uSE~?yJ{&=i$Tp&}iq>{l@RAjK6J<O8v3<aq+^W
zfZb=NE$*E@>y__)i<Ui{6YtGRsx&!K#BZK``sTy%*rEgGPlN90mc4lHSi0!$m)GUC
zd*cHX-2;u1XG~CCbY#sueXqMS4u~JD`WiE}%=t(9nRO;iZ}X$B^%t<b`o*r*x!C;g
z<Tvlm?vf8qV(C*p`X=6Jxh8vj>&?*jnX}5<_Xc0hE)r5-I)&3XXyei6%m&)p=}OA2
zA{s1{TzfX&+O;MsL2cu5e;p^ybfvO*Yad&_<1tIuzx66gmtjj0n7rYu@zg|-Eps@Q
zmG<>dl$^M8;*{WIR+rE>eNU%Y`gaN+@;%Mq)1v+B?*6(JRTkc}gu5?9u9eU%U`$^V
zb?%g6?gq;+Q!(#IA%B%Muf3e}k_7I^+<RWdwtD447Sra!wWX^Jmj9Y;ULBdNn_%Xr
zc2f0L%C%1%GY_cwO<ybVnoGT3wBBjz+ew<y8@*D~JatZay2^NhE}!vDiZtDMfK8=A
z#h@qR;j)8IHc6E^u$)@dom6Qxwe;(}rE8UfPQCl$anCluTRtT5l%;#Tc#c@jfgqC!
z67CYElUK|Ve`=!<Zm95lL!g%(d+O^O&)&agJmRY!BEBS;S^d(na`E;V-)HKCHXTxS
z+S7YAyZ3Z&WlL^<w_fnZW$J#DB*e;FZZ_CRT<5cEc_wu=^Q?vIp}=cLPG6eUwf65v
zuUEF$zU%z0?u!>r5)1xrdF0arqk0|#V?oI;pMAA?)b)RMYV+2fPS%b1U$Q}di^Q7P
zt5aY7>+<@_#3sAnkhO4uptp=;@ye)YGk&spIj%S=9rj9#JAH$t%}%$B#LKqFy7RyK
zcIfFJ_{G9ewZp9Ul75IkTg+z1Lvk|nkDOkXm%wRSzo!1-H@4N0ym|{fa-HsPdO5lN
z@ulNh0$p1=Ri?D6RA#VsI?L|aY_h7}rv2c#n$ta8Q{B$*&9vrv7QL%ISM0#o=bLVA
zQL5it_ehJq_=kOtcQ<dvB>lGeCRZ;1Vbo+(a@=e7aBgkWwlmMP3_hHjGnspB#EF?p
z4quqYzW&|QPX<bVI1ZObX!_TFjX2LV@ihNcv*``yCi9+i*L*2>|8Rz4qwDGGQ<g4J
z%jSCRZ4xDBqVZ|<UZJR&tj{t%lX%)H7aZz%)BEdQR_%M{>YOuM?QXV5)|nan`7Qf&
z&(YPk-w)Itxc$p#;eT7LY^mi3uUoH?TT|P8&%i8ckKy)9XU~1zG=Jw+?yE=jP9$iD
z%;b5(X>*t9>fhkfTG1sxW(0p`-63YOjeT**v6rEj1j9^}KK_`*Rr+$)3ijfVGymGJ
z|NY?lB;>(^ir@Pp*UVeI&F@U7;=h;29-Mi}uc7edRA|JO*O%CQ4lhr;@HW>x*~_^?
zej=08gk1*d8$5&*T^y2m*XV5IpW>(c-F5Rs&ghGxJSrR?b&}>>{p#e?x~9TnansaE
zlXO=dQ=QrQr1JfwVBM&ki;osMxOk~u5Q?x3ZnhGbEF9!(=H}DiEE@ChDDS7UwowaZ
zd+&uCE}hXCS?drJZ{%crWeWFmo422j++210zT$GzpV#ue8Vy5q#FRBv<vAWD>4+Wv
zaU#b-T>3zV$JPzmKQ*r^tA#z<pe46r>Fk4l=B(Puq#&dwsdVm<<ZAotGafjlo$_>P
z*{ogfHCz2)&*QYE7Ak^`DMvO4sjqbRF;0CS#Gshsq&90t+98)M$+ry8x>zsr@o<@`
z;4*Up<Mhhbj)UbsB2(<Q1(j&-5|dhLaPBQ*$kv0!b8anJWm)Lw*}|*0Y|8;7BU|wo
zak|=x>D`AccQkG?JiFcIXTik<FE^PVzj<`2oY1R@XRDd7?VZ_m<=fk&kF|vn+iy%;
zm?HmDW8t|~QJq~z^8Bnf#8eL)F524kaP<}cMH6!-$SaAt>ukN2zeaWL^e(Aj4wDX3
zN!IsYudO|DSipK)$lQx^Gkr3`+Wda3?DSAF<tv>%IgerYGVdM#g8n{on5v<$R86wq
z)vl1&Z~gmCU$1Fc@uoQnhbM1IDx4a8d%@lnp2fXyqU%e7|1XP}@n?>|bUDlKZPV`P
zyzY}voP6V5pW?*Zu~t{jj|lTvTu^=2AmMd2>d%4S)y5uNi4)^lmra;;Q!VtK$cL)c
zYdZpZ8A_NtT3pq$Y89{inx|zt*@o%p(^^jp!=)RpZagk}{ME!?Ch>pHl=RR2D(WpC
z9>?)fT6QDXU7a)i2f|#feP&dK%)7F!G@*FQ`70KC?#+s9>e_wxhC$=H5Z1@DZ6)ef
zyp=LI^?&A+h@w+oN9&(F3KoCs7uFvpz3bUIiS$LYU+%Nv5xA%tf8&Vtof}ql4mNU9
zFAwgLx#K@${VSC%VhSr5H%@!r`{P!dK*%>yT}~T8y^d;`nfrt;+8jCb=x}w;vrP&P
zR~ardy-oi8LtA6!!u|j6G5-!sJ;Yq~a#7u%T8ruD&%EEi`zZr&9*>;ElEb~hy)8cX
z4X+$uF#lJbchB?1N~?azxynxH{9xa5gGs#b(k4#*K8<tF<23duSzNu@WRt#YZ%I4<
z^z7?4x1QxmJqVV#tf4bwQ_WMe%&!(LKC4Q%cD2vASa4SF*9W(`rYskDTMqJd7q|Gm
zD3CIdF1eT!{xrQ+c(e5lCTk6+NvuATRD2hu$tiV*&OLBuRbA5h*Olcq=Qhn>=Bc*O
zht;5OzgSb6*@6QnPWg0B7J2dbWylJTMRQW?EtX6vs;a4o@XGX8c9yL<qq)??x07Xc
zWa4M%1)O=JQ+FJAc3m)j!ui{q<)U4=`D5l7ojt{{E;}H5LedVCDSwjJvvK=+r`lgx
z;J88T^HR(1C1+T^y=4&YkNl>{m2uJhaL>jys;Y%Atr{6~4;7a0-}#>7g2L1VVehV~
zZ8#LPRq$YJzIb4MMUCX7;#0+GT5+o8XPwi17QDKWtn&P`O-F^B^P?#>87^mZIxUU;
z&lrC`vM2eT;MpU_#ox|EB_+=4_{{M?Om*UuXQ$`1IIKAu%(o_}^v~gW#cp$t&Wh~1
z(_|37B;@w|M33m(cDMA_Cue6d%gizt=sY`P!Py1wNlU*z<~ZNPd28n8nbt90sz06P
z@8V^8(#UGI=;mvY^_k+w{wA$gXM3Hy`<3(mlcJ6Gd%}DLQ?|BE{5D4^XhZge@Zhgk
zURdAX=2pSu<Zfr8`bfk0!;I|BwnA$!+2$1AXy@L*_wV%;HLLQ@rw#`$?NRyguxfhq
ztZI`QoA~QrP1{N>uIeXL&%1K=jkTS;)UjI`2TZQVb$B>Ey7A_g4vVOuywlPUZR_0?
zQWqu&xu|sd9Q0J$+bsQbS!ljoy7oGE>B2w0f_8k>PMY${(fxr_e|a@o?=Lmhk=nNC
zoXhHfW!E?(uiSk5PONZ2K+5yZ9sZ|_SzI+PTI^rWVI}@Q)1zf`$MM+fd*bge4?2_G
z?y+fuPrS~KsQEU>EWW5@UT^yG!T#1tMuT8uZnver^4~r#<=y)`qP=usqQ|AD+w`*Q
z?T^XzA8+enk&Fni%aZ%~V6mD3_lX&*&)-Dd6InS|V4}x{mvgS|TPl5f=e4%Dby7BI
ziL<3r*y{K!I{1Fv%z4Om>$pLA$NhsU$#pya3x~31oOn9rppdBkK2C07q3bnPoI9qS
z7P4TnY?(Ci{t2T!(H*66hm%%a+gPh|J@4<<{pE%I++r_Sr#{`ka?^3O#~-S{b7T}U
z+(=tfIQeYPMeX~iPjKIf2(*oU#eZ8i@B4=54x+t^sy|Qr9{6U|!J;)$s;AO$_w$R#
z0?n(P-JEB;<gNPlOS&b`{Z#zc<+^RmR(Av$`gib&dHI$ePfp0|c{s6EO}Kr2p!(CF
zhMK*b=A1CmNe&D<=5cIoup{rssfU)n?=nlgcb4PY<IGNv$Vir8B_`Ebe7o-^Zl9JK
zD<156kmJTP9nJHaOYhe0x0iXAsn_-SWg*k~LnlJMP0r+)6lTQncA3tZHCt@vPAl@8
z>{EH`?B~0ur!4#Y_;IGCxbd?Yb(_CS?EahhY>(YMe}DC86P<4pbNP}Ut@_BonR(!)
zQ^Xtds_${uO^Xh$k$h-V5VP@mhl=Rd2(ig$6wFk4(^gE5HsCV8tFk6)hO5ft>X$o=
zT-k4T-sX%ks8sA|a7<+U@8ubBW5LpjqXG>p&-(0oyQ5Qs@sV4Ed(ZFw&h7;QiN<QG
zia~6fJ3VF@Wn4{qwELcf$+90`BA(WTI5h4)pxU|Q&Bnw9Y&++77@t1!=4oBpDIei#
zU;jfkohgMu3Bqx!<MVTTmMmF4HTB}AuqPfh2F2&@z4xDVYv1kt48qG*P9CheYSYcV
zP=WtzwCtU&L7E><AFN(jmY$IO*FAV%&4(#(f4y()ekl~a`zlAzABzvHWs~ifT$exb
zJ;<~|`tHXA3m#ZadvZ;rGF#fX;`J}(|Aw>MmU!hJRM}{&6c(JsroR4^Z|8wmf6JX;
zDuuB8bDz;9Z2oV_hodday#)^_FJm-KJ2*LV?Jf2W4!@@I#oJbu-d*`HYB{q(@v7F<
zvmKT+39acX%AO@rb!Nrq+?zX9=d5sS+;wJOuS~l^^Q`rn6CYpK&ipp7<=7v^Z;8{k
zo9l^X=}+$X@l5xF<Dp3wlGDrDq?LC0m@z-xkXg7~lrvI@v1G;kw(Lvt|04XugR6Q2
z4GS+RwBJcrew`a?=>4D~_>sz-WEFo;i`M%Sn-5>HxqV2-&Q~zebM3zM)4Cpt^4}Hc
zY%Y-!+26LfpI1L4;oySDzNNxu^~HBAdJgUTY|s1rqV7?ddlxwJo~vzs#8D!1iqFL<
ze|uxz!B2@RA5HrsvW@MC*red3;0qeHjInY-9qjo!6CBLB%1drpB%8$69M>+ejx5g(
zk+>&UpSp*~&|#`#)SB76%3*U1g{se%@|7;0-1AqNscix?E3+J5n((xJwx6$g&h1#a
z%+)v4J5YPstDxvB0ayRrf3*1Mcenc=w(@+oEPnf*@$4F*tBEc5?`%A9bOYys?~1QV
z)PtpG=U%nz4KsXl<<inZYbPGtk27NH^A(Ca3yf2E`h$;lcMHTvPy5m4-4lI`gI#4=
z;htH4H$6JBjd{=QqqbY47|(1f&D=Iy=wSY|z{va4YE*QBKkb~#snTYDX|e4e=iFyi
z5sgx9e?<FMwX8f}SaF3Tp!rDOg8l=M@dtwhH7d@%DM}Zx65nl`&E%OmZ~5WBcmJOL
zeK&FbW}VlWmijRVF6YfF6NvJ>nQ~6p!{49(dC@hc<GcR`@~yU;d+upXRPem(e`_q>
zy^p*5{O*?z{|arhBTf3Olp8mAoSIy8anb3^CwUHftV^G~vFfPjR9~^1MlLB6964jZ
zYToASo!J`XXB4VtDrx*OD>b&J%Z({*;u#;uYp*^g|C^;Tb5f+Bp{Hk%m*q@1FCW&j
z9|jWQT<^?SdM;_U1~Tv{@SgL&7iV?ngh$#}Cy~S^PN!{`-%7+hZD&z6-n41elqs7$
z&fUB%y!c>?XF~abkXf@z3j6%u9dRw36!dIL$K(@7CUqJGDtY->uBv(dN~M4EsW(X-
zdfYGdxzB8QQ5E-!@BGfEjJ+Sq&#h+QlFxN9dOpQv$pU@vwgZ*BII;`Zy8WK9+h%$7
z%T<4}HeXwFe&5VkU8ebs&$s-NG7Na)UHjzfQ-dDm+lpsGc|-QwhWLI~c(dlBgI+>L
z_+;(aC9O?nyN-t)yR>9i>6Qrpt6U~B!rK|9R0t-?t_z#l6DGW2t?eSA$DSdLk$*GT
zr4uduRKHEw7%N?o@nWM;^Jfi}I3}a_p;@K^&(*d+F5^(-T@|lzOv!EeA+y;f^XA^q
zSGoK6#oXMI^1WvHsRI1h4)56TsOw?(>(2RtnOncC=t}6}G~Pa0>t~-}-Sm~_X3N&{
zzrC9CRn%cdMTrsj%$dyBXI4fZKFV{CwR5AIP*-AOX~<y%HkTLsZ*Mur+yCVL!q+c_
z`&;dHI2s(D;b3Su;e@exQETPd^(;NJ>lxoRv>yrDGbwe2R$4xXWW}ASM`!GgSE^AB
z|2gl!WZ2`aNA_QObyeov!W%DsH|{db;8i_VX;3vOzf>&5aK_`*RZI1EI$WF}bmP#*
z?CGj9KZ*njUwT#e{VuRp6u-aum+WHk6@f*MRRYYfbx+n>yNbR2-eIjY^G9J5ls4;E
zrm8jw2FciIstWAcA#M^~^1G%oG=7W7)kRTT*08PKxN2j~nf>mC=l?o9fB$oRO+)d{
zgMoZH9q$|OWFGw+wjw$D_1o_iw+rU(EaL9J+<)-xf5F^Kk1l-CS>~3iyDFf-Ff5f_
zIVAHb?^YgjwZe=!PanO^-db(`+koFOeD=nFAAV<_d|k0Fck21u=il|7jC!Aa)~DwE
zW&7a2)&@$mj;a*4S+sJ?928sFVZ2DiEhOMdyOxeZ(n`t4@6Ket`rNI0OhBehYR1*p
zs%ES8vi6$KGv|3$M4n4jPfhwEsgrr?bKSkWZ@(XRKF6fJ{PWH=tX-bVOj4h0Su{t?
zH*(qCIYON;etlW6+<oTDiW%>}ZqQj?8hL*CCNb@{!-oF5OXj~j-+%h@?&YbGwV@el
z*%!}DuzCBP-TL;fb<AHkOt;WDcBJoKV}$n2V*!&3(}Zu=)|c$DI(w+|=EgeKRc=R?
zWOZ&nKi9VS!5*{!J?A$s3^Z|n`?^Zq^N7wnzs(glS3g>6DSTHTGEpTkeAhmMc@w7;
zXMCKkvvcW>nKLC1v(5~4>x=k(V}tQ4h0@0YdnO77&&rrpv+tY_Yu>tN9Y336S8`-u
z=e*Ccx$?%$C!Q*sQ=ZMN6gzolro~m+Xd%xtky$&YdUP*L4|cwD!!0GCxYn;A%Yh-)
zQ*-Gf$*ie=MUL8jobq|f=N(dcjdP=a{+hu0@SAqNjF9P@#{n$&KI}9*`cok*CFioR
z{*x~fCA)SUy>swD<ocu6QukSy|CL|N<|4?bc`|I%jfVBJAM|ByUt4S}cPFR$z1h4&
zj%C_4jxW})oN#1;NY9y7Hw=}xo|q)Tp!v4uVb_8FE35xSRd#7z->RJNcl%+^^Nj|p
znh(CHpK+K~<4oMYU&l9`)5ubl@2*+(QsBmgp5NC?TW7O$_EerQUF`Sz{rXd8e_D37
zNpAhkX*)05JNNj}EqW^c6Pcb)+I{UtgQlTy=gb*Pc`h?GC4HFji$Cn_mAe}w<=vRr
zu07utA;JFtg?8rz?~e~uUOLUqn0YJhShMMp$R|ST&pb6OU)lM^iN8_YBbgf|UU^8T
zF8}I=9TVaWjy!+pH)Dg;TCLC{!CQ^~KCTSh9cIVtvF_9d=~sJhfBo~U$yNQVTXo}x
zt3R&=M>sIM?cW-GyRCkKS)hVnbJkhWgDN$Fi`y@qJECAI*%I36$+PdY(pP^Oe(zP;
zaUQE1ZYgbKbY^e8cJ=k8<1F`YGRH1(-(zX4tRgS5Bl-T6m_@;VpX9!XJMQzYLh$p+
z^%653f`lec(KxYD(<$qY`?Zg$iE+&{SBiy(=Bn<S`SY&$QCps$)qQUl|N6DBLZ)Vi
z@u8#jwfSA=+-mIY+*-~ae<=0u)4>;dw^G8BlhtHo)*0Lp`dqHGl)q+1^|^`f|K7e?
z_crRQVnc{Zu`QSKT&+!3?JQ|>dpDoiYc5f`in)K$tq-ybxh6XXn26aP*s5E;qwChJ
z(^@<C9E@r0a1jzLes*M*jiN`DyTlQb>D-J4UP^8`%UC*Q9k5qg;-DCol)Ir_%1TnF
zsX>6nTeV}R@9_|SKEFxJk4!e$ypFYW_8Cu?j9X7Lx8HutUwLj4>zd`CZ^cd3DLyyT
zHB8%o*E~<*#OXP=mhuQLO|`V2Q*rP4&jO1%e#<XMc}4m!U9(FsbNl>fckSLkvlX=e
zxJx8h$<<PqKV6i=f71IjtBEV8E(&>D9e8C%qepWwbJC<I%763TmoMEX@5RRLHK*Y~
z%7#UWJI%~Oi$#+C;#ST~ztybbeY^1xL+kgv(2qx0+C9XcTYGv`C>>t-bYp30Mf}1>
zscIMBpseoYZW%`1Qsz<JUVlUtxi6Uqoys-3evS3YcHWH2cCOTdS(}!0T1IQKT|Mf=
z6n)mR<3j0juPa%sj^1BC#a>b|@}70mI6HMx!=38F_y6B2+diDOR&-y2xs!;*sZDE6
zpL9R?@taGQ<?5`|pyJjNt&K~coD+0k7d^$);cUxJn>(+LoO7{IUG`(8LEEjXjCR`t
zRB{-8NT2+C_N3SET;(kG8snE-&7Z&iQj%bC-}+;X{rcHFOHb~oOxAcLw!-^%jnXR)
z?+eE^U2@bh%DB^*xb*(Mg%y!UZad~4)~{gHy;^kY#xKDY{Es)3T3a^Vonz2CvC78e
z)fUmPAO9{zem<0P$Z1X33dt0!w_J1DW-TeRz8bXW;)dYEn|ZFO{-2Rwt-i~p`xncy
z{=#jir$t{?+I2%-$oSN?Vg?pLwwWq<)lA+z{F920FP&C)y7fkL^v?ZTt4*eVWaHhb
zdtit3s-1VGo&V$n$MW<_w8b6W@6TC2<%!;v#vN<Tq!>JI*L_KhRd@dRe0lG)2PHxA
zH#~OeGq>pPxjDn!v0}IHY!m-&UuJioSDLqdQrtHO@y`K0GE&=Be;ht<^USq>@=>|w
z4M$Ur*u~ZAytDt^Ji7DF*V1RL{ReMc6hGrN%VW<;IRRy#vd<qMH-<|8X1Q?V--7jO
zdC8r25x0x;#WPp$X5Mm6Eq3KW2CKP!Q4@~scpbLi%141s*vrk6L0QnlX;pl{5qGyz
z!7RPVRVTw%EnD?Us7C!&#D<qV2a3Pix=m!ud*fmk@}fRo;e2uG-|sBhNnhqZU|R6=
z{)YOUil29=eE#6O{m_H`ALJS&j&K?MKV22^@mt?Btqp?DpL@)IwEK<Qj1HmjSDaC<
z=XF2)Ometxw&-yEA~lwT33d!zI=Y5=`<}(Vk9#^TcR9bq*%>oKTrHNSJPS@Q44Dzi
zIKTe!tsH@WFH`+oSXFY)Sj~RxP%XxxpXQ&H{^)2gw+e^&t^=Z3ZnHh*O&Q%~j1;f>
zAByZ)ImPtbmqSNv-d?Ub@mcTe?&c*a4i}fs5tlFOUZQf@#NSeN#k`W*D1YrKEnKgs
zxTtwqAIn<)^iRR8cm3^^+vk6}d^7c}n|k!L&%3N&{M#{~Z*z+0^3=+REe_L5FSgDQ
z=2TzuS0yXrWLN-Ww9EPE-QBNtEEJmY_ixht^p-o$GlGB2ldig7>pM@!A?$DWJHsz4
z<hpMpoaAi1^Zzb~<lI**Y`Z5M{{H)n=aB{fe7*<yO8rim!@KR~=BPdYPKsaWcxN|j
z1}}4(@UumQ3~tZXR7qGYU`g~}WisVshiTFh7O{zQJR`4P5^yMWKh#{}u5sY%ObtQx
z2?DC_E2^T_KIv55oU;DS%Dh)Ar+>YaCorwc_p|YhEo;(e9J_Xo^Vgh?BO;~E_ruiH
zd^C4AZoKMoO|CtPhp*2$&%ycom%!bP`hWgO=v*wV$lr4Fz-M;0y4<LIwuik<`Oy*y
zJdfU5y?859|AX%?d;H;YiRSj+>~$&YgMTjT*JF9^TNKA1Rrqo-_o9mdU3)h&_FM2X
zOP}buT6iq<Nzb~F0+TG`b00Ubty<%=>T}HJCf-ASGSX!m-ir3Wx~g}it?)#*{o}C9
zdJ!_rm+yUVw0?8jSdD2**c{&tH>IMlOkbh(F{|&$gZRB)_iax}yfq<AZ`S&Ce#d_7
zF_q3`w!D5I?b{MAd7oM9BP!PJs=g91&B|5cS?<Htc{k%GF5do7^J3cXE7eZ#7JN$Z
z+PUaj26HQCy2e))gY~CvZB;m*A9&W_8nksCZ^_Y!nBwruA+J@orbMjTs62Jk%`?yK
zc5Qt7VZ$`Bzpcx<*UU+)kqnr2K0NpS#dF1`yCgiS!tZaiwmGr9BH?3bYG0uJ<;T1U
z%TzYbW$(=4;;pa$BVjy=W2?E@^D`B1cfGV(Cni`ETzw##|DBoI%kPu)W6yqCeX1*J
zoyqmWsjKz$-^`Y1RX3D1GF$w_Ei?Sdm4rNIA(l9~UwU(AsWRHN7DV*zlDHpoR;Ks)
zN0zeP`}Ut;s@v!C?j+Ow=vSp{zuo6cezRMC-`a2N(Th~=^>6O~dw#pBdEQ4u{q<AQ
z>y}h;ODU}4>G&STxBs$YM)8E%T-Ot_JDYcke?L?!er@g4+ZkQA_wDdHJB9Hhqmi)E
zWV4*9jdK`ol-qpX*iaOiwSld3#)89F&#26?+<qiIdrA$<c^)y(c00Y>&i4LaGSA5`
z?VKq#Isf#pfG;c8_%B^^NZWJb5|!*pQ(Y!Uq^u8#k6V1w)TZ5FhDa;V6eq5E&!&`T
z)|<b5UuLPE6gPe8l%Q)%r)-^e!RY*yXNRXesj;%vUA}aUf+0u19~S>k)zei`_xIP&
zSeAL~{AazT?kcLLO_OHL-8kdszq<Xi%iA-(Ci%VhPc`i7Uoc@}yJlyKr_<_}QLlJB
zS5+=ii&~+%>lPRDS5c=N9T`P7l{3et%rGgKc~ks|6Wis`#U@{c&eo)K%)0pS=SSHe
z54BSzW=e88^&OfL)U;vcca~7ip2Qk+FSD7kDu;chs5}+9KH+xtR3~Z0Tm^UE`rIW)
zrUa#l8S|fcl%+6bm!N5Wuy60QBu`b(Sw)l1OgR;`C1Up6wu)ITkq(DET3iYY&K<E~
z?0n#K`%F;#cTMk8qW$;Jd^pKo{m+WsarT37JHhEs*SwZwybu(i^k~lH4J)#CD`=}v
ziJAER?kp$fG^zTN53E=6dTHF75M1=zZ<W*AsH1s`eZDpv#;eb5o;bOB>CU@ig^Fj_
z&QuhSVR?6R+g<t8?Y_H9C%rUw%)2MCj)if(s`8_?%T`tHnmN(MV@qM_%!2efm%=xj
ze$Q$89;7{aq3FwVX~r84Y8Kla&F|VU&FyMzYu)7KY&YL-IK53#^6Qz!rVCfCIn<+h
zI793jbH`-;f;%?n<}C7jk$Cv5u;bgc1(Q}>*=*A!*1SbBykq9=*|U;c%(=dp$*x#C
zvqON@ZTiW_RnpHH%-3EnJh6Bq!!E^j`3w?MJNT2mIy-~cicX$l@OkzseKqzrY4hi6
zcC@WLl)3%M-fiCR8+8{?yd5Q0_UFNk7kUnC*@+*vN=ihCO=_CYJpFF>28me}%3Njd
zI7Myt?5bJ#Yw5%pFN5;39{0V|-#LTtlf}H}Z_|41Sn~E4T>GM7p1^YS+TldwM|qlS
z#NW()sB$=9#RrQgopX&3aVWY)HOF-16i6u~&Ei}(OXZNH)9K@iT{|LAT~ymRd*yEX
zs5#ETr#`4kKHvR8`7!?)r<CM*Qi~i_57g9nu4>-7bf2W5T<#l7@dIw%D+9Mn2Tghr
zrD!nOWXaXS_3vxVo?WeFFtqh3)cD6aE&p@qIy-~pIq$b^_?O>!FUIRbwa=sPvJd0W
z-Kw7ce=mpS-2IoX);04q{BF<u{FZm|O71P^J`^Y)`*wtFQ{AqXIVo=xEYg_X?e70s
z`(iQs@;5(LicWn0vTn!C;z>D9@0YwcdAf|*eA>Kg_7QJ{0@og}Z9aG><VnRdLkH!u
z9u*~?Ls4Fan;R}f>b>^fDF1fZ`R||asoEOt-Lz%OqBQPjQ&$LxPc#0(XtDgufj@p9
zS@a$(GMQ7>tnMJeWMC0EYtoY`%WUM!c0aax8|<}b+M-n5Y)#k1ty7dL+~>L`?9y3&
z{5U&n#HIs2o3&<(U48WR;N_Plws!mW+<yE1+w$d;XD0S4vNU|%qqXHt>Gs=akK1<7
zxo3Z{r^hE*E$B>#f$vOVPd|;cr4x2C_soje68_m`Z_28ct|*VAe3D%G`9-JZ%w*Nv
zykSM480Y6r`9g^s)HXD#d{b&zb3S5WVcO3d^)tFRs5DM&?e1&8$@2Nw)V!}^Yl}q$
zkDqX1nSc6=^6~}YGYf52ZA@m`o5L2JYw-8T7Y#!-K|h1(d$l`?<^`W)2-aL-qpoD;
zzMsP)W6HUcAOFvrp&p>-wcv?!tzTqNn&32*IT4ezi&qrBpJOdDf2xQ~WN>Wvx>lQA
zr`=o61kdY`sf*uwB#`kStAkF?lB{Q+Qxk<I3RJ0cPOsNr?0ICj%cmLs|9y-$`>Qna
z?b}+Fu{v?GeQE6F_{hSnCsLnzeU5K7RPzyU)6)OtBh_!`w~Ir0w;0#MRSz>3GQNGc
zf}3@<@f?rMUW)hM@h#c6<3;?<kNPQW>c_9;KG0bFGN`OMy>C-v@RJu?U))*ubwvT=
zy%bg>UY4HjTTR<JSk|%fUeDxLw3D{syMK6U%fk20!ERr)m+`VLYU8q%iFbZ<LE}Zn
z?Axy@Ry;Yk%ju|wMCNBjo<fa<X2!uwtumPemx|=h_CIR5(`5cpml<8tek3n-TNHNJ
zdxpT0W#x_YPhXQsUv|A?Mwo8&h72dikm-uzl16U}dY|3%pFQJf;9QejhHqDevRWFp
zMENdVakJ;l-+(o;jO76}zfJh(&&{5#`_XLHTQ~oIjhUMH3ML_j&dri*!hQ<9y5RMn
zF?d&?R*U)iW0SU;`ek#5_C_{|rw9oCxcIitcK$1YgIRmbU&Zm6m6-nNsaiBY^Ul%O
z=_|LMpY+*&!=62%pBH;FRn92n?>U*~|5$25OzP6Y_-mqV_ge%UnG!tjh_W<t^hB<C
zdwvn~+T)R&w`yCTeR=fn@UJ=Vbxv)c{QEw8WxeSk`L5sZ7lhvX)gGP9Kku#{^K?xe
z+wCVk>yMq)zgp80ef;f?^bCm~3%|AtN_w2V=JfngfJ6Mww(BMv-Z8s|%}O=fw)<*i
zHRq)#4GZoj*?cs({Pg+!i}OAk>|wLv_*9Wxu}*oWmsw8dx4fqgPjnjQ2E2G(*7<(2
zQuq?T2Q#{&8Z~EH=}kE`cda~k?HSFNKYOPs>3DuSARFoTOtk;?`;u8=p8QAmP5iQY
z>7T_Ht&>INUV6SwYL;LzT594sE4BQ_+4k6bzLO%nEp6wW-o^G%F)d8)*%8a7fAj90
zy}V1$|M~GvdTdD>uP%`(%L?++e4UlGPtSI~|8rUY-<9=qrl*z$^Io61aIMAdlOfOV
z>Yac2<KkVD_0P92f4U`#^%q0L*-0I5)aLbYU*^&}k~WV?K>hCy`~3Hp)F!Mo(q6?H
zzEdI7%Vnm;(vus@W}bMu*d;5)C2isx7avoF%a4oJ`ZXo9d)uA<zQb{)S6QQV<!+BF
zc{Os21vJv$$rNSwoIiA<e_2>#+z!*o#D9}^-I#mzwMnmkz^a9_!j^7z*`-zDnJFab
zVKkxAK~J6O@MZo<0zw~Ej5<>$dZwh!Y*l}Bvb&mVg?6|1cA442pMO8Qkoqp=C6}AY
zl+v$v9wkgY$fIO=I9L6OlhoM-|FWJv=wePXNM_t#blJFV)x-l=!*u3Mu79S%DcCWY
zA;2^-jCs4o(x^*Od)D67Hk%`Pre<<t{ajb+J73>-My$v+U$pM~&s`x=De8ZkBD5DP
zZ#{m%dXCoNjL;C*M<!ua+%3w&lMW;;YWL(#z8hh&<DpAntkAU;;=VWZq)jzg+|&fM
z)!%Av7du<v`)1jz#d%6A_tkd&yD+ok+n+hhnoG{Sj&!cjW$?JZBHVxRmYI7(FK)bY
zuPr}8=YaGs7XHt-8Nww@16QyM@tJd<IAGeaO8c3O{cO*m;$v4=M4fj~5nCr6`*6jB
zo&#K;`L{|Qwpp7O)3YEp?yKWF)jDT}-d#;x3b)QA-rXKiIN__H)MT*^sRx^8mA+!o
zGcj{ID=zxs^sxn7^Rg7Ka7b{shVcH}v|(dWyPD~9=D_H!>^?s~oqU!0bH$eIBd^{!
zic2<gCzo28E#aNfFQ<I>%)UQw*7Zj%irQK}^SH<44iVGBq9%)H9Suj0v<UIoPJ4Bj
z?PtD$!x;ucvr4wD76+PEKU}DNrqJ~Kj?CB}cEZ8y_T{XqVMx3<!)`-tB0Jm1thHUY
zgfm_o@-9Dqba(!n`FB}V9_rR?|5jqKgYAA(=gaP!2jf{6{c&u5Q?6;YZ+lFg^7?~M
z!yi0MxH(5a{)_dONwU`;HS{t)Ki2=~!P1Sl52Ve#<ZBila^bA~{ic!t^L?(i`Vv_w
zhxKDu6e~Sw{<-XUZ>6!<L&LIn-zFaq-@hX={b5yQ_+O<rm%g!SNGvE`v2;T#%fi;%
z>>p09U6ACVls0ANlJaF~2CQqBt^U2Y*QS#(&G@F`(ldX`bpP*o!JH#{&hwdPS+JE)
zrM<892OpzuAEob}<&!pj$XuFw^wzECVo4$=<&RDF`E4WrJk$QErni{Vn&(T`oZdBO
zt;qG`AwMm4U$pnHymz+kQ%POawaqJMz2DUu+C57|cik0#&nX+$y}m4bU!H&W;k+uF
zm6NKrg^P9ixI~{n+I9B#-I^Kv>)&6R^5gExIicP>3mxAXmFbp0a#<tew%=)q+O_8N
zV>>x`Z}iM4+WL2GcD9Je8f`}(myVgj7RKtHmcDZiq?o=tW@EnI%1wmV<ld%IsTWcg
zUVU84J$Hr7ug=nHqc^Uil4pKLarMtS&28m3|M#<--u3h6nR@KvKdSw0cWYtWp5s%d
zba8z$*=Z~hU{+GzwBw51#+2_Dmzj9qvXq>BQ_yPR4DM%&oyVtqozeA7I=j-T<Az}D
z^dh_0yA~a+cQ`ZY^OJj1r#!k}TCO{B=@CnHvHO!*ZkJq0aA8UpVvAr=nzKsa=y`!F
zOWeiUqL^>;HYD&|eS1{br0btS5$lZk_c`h=%&^{3xSDC6GM`D>w?l_kYF+%hMDY0{
z{sq%Mo{(1Z`t2!GJX1xt=jFBUTYhfnQEt4bWiGVN{GjRW`CFHYm7b4x&fGSs&5Uba
z+st%{o`*|B-u>7vWtcVh{bjY|SJFLW*FBZr7IJcZ=sESn_dh8J2L*6t1h}^5o!-+J
z+wHgLv$cDYchlJwMG}?_yqbaAZy4w$-MpgWY5K}dWEH!_(fTPdOAoVf9F~w3P-k)y
z%HoJzbxV|?`Tq3v-L1SWtC$(rdDU?3`uAgM^!j^w54-cOZCo<Z_uc7w=ey5cegwP;
zyD4S7%Inv~$82qT&mD8PQE@KRTh+Gd?XSQ$vo<Gcbb7OY-`cs!-?MV^vn&2R84Ck{
ze>o^}di$i=haE0_GG|(RCAmxC%p8e{J8V`=un^2NEm@w`&arf6#|{J6jw>Ikr#;Or
z-J;pQ&GupH)7`0W`+ha}vU%>P&@P{PC~)V&Z`-EZZducI@O|9U9d~@(Hb#qY*wxd{
zo))(L{n~5axiX5cxn)P1+Wg$fmJ{9fSnR|8^ZffaA9)^lf_ahj($5whvRN558xmI}
z>DDxy?U(j@x$DbL1Id_g9La|ZfB(KG_S4xerG<62`MhiXK{>DInH_rduXEyylEv#2
zm(32@kn0;-x~Yfv4U_myV~5}aaSJq^HaxL&cRrz{yJ+9RHS4tVR@<mYRkQ6l`sYl-
z(KQVs8f_lTE9&dM3203eKEHqVi|EV8kGyuN)VKUx=T+@ly`=fkieHcZe2F^Utuz1G
z&(%}*$9`>maM1s5%z1@{CQDx$yp4PF{I5!IQ>3OggRUF{Ba1Ve?TOhwD(81w`e$lB
zU%tG(H1=$E4C~W*%cNQ)#RM;In&)@g#CP}Ke+Aq1PFwTpa(1gQWh!cfg<iNB@%{MW
zFGbHhr+t6^H8b}7E-~(%?K}E9H98B@{CC9}UrTy^iTl3B&KWb;PRu?w=~%PAu94BB
zjUsFduPx<L^ok5}bMxX+OkSoEvN&63G8fZ?x9*--?^m1)O_-Z?C(2s4_i(HQ$0YWT
zMQgRstU5kDXL6vHv-#^6ySgmfCswUna{Y_a-kl1H=R<BY+3`edbnBdNto>VL{tV~L
z%bb>*Rq_>Ur_Fb*shB^*DLvkLLt2BpTdmENIXp&NH%)uWbM;-7S{YOS`+dvixC*sv
z7&90t<t<oY+Bo|M)BTI9JRNn;Xk6WL=Gjw5%MdxnB&W%*7sPM49r-NaV1L?yLWUoA
ze<~)kI5qdQ`u0e3);V+vsNPgySXJ#Gz#ZIg`O0bHs>LgkPW5fdSh7*;>GRn-Z`fQj
z*F8(zb?m|0ik0VbZm+$P%+kJW-L24LOnX8Pgq>X=Au2jgJ9pu$mHm~`2F<+90t;Q;
zc6eA99G;pUw)xhsETviIpSe~hE@yhrr{-nOb|CMmq~|}~d)sGDPj2yN7T(2{_1(<P
zz~$DggFRQ)NT_f5!xjFb(l;lxly8EhbIT0{y&KHZf=dsqYyI9Gu$*&8+NIp5PtD)?
zOEbSeE@-m7Blo(K#Ku;^B;$*(_9<nV{hd}2KHcHf<h>W24u6Qf#Zdjt=DMD({4062
zMJ{qLL%kE*c$=Dhd+mMR?OC<#+-;_>cmD-Ue<Yd0W!&7G<8W?EhlbN;ZF3P`-W_o{
zbDPfA8>Xy}mXMp#VNs~`@w&|1^1G+rCH!{ZGpis#G+}YrpP;hRj{+=*3ObHIc`h7L
zxR_<#8iRe`rnH~E-5z}LfYXer(Pvt}CN#YAD6CrdTs`DqZZxaQ&%Afe1@(4PbqfTQ
z8mv5xjEe<xdA{hztVv|+{%LFXVRM{s)rMG`n&&nDi>8#X4wsnK^7_@UogQm+qd%YG
zmOdVAvqn7KmVbWk7m?Ck50W2AmI`J}UU&7W-aE#Ji!{2<v2o5dj;{$^#x{YYMx6if
z){ATR$-d%#JZ)-%z~@lwI~U~QzZyL5_HbVG=|}5pW08ZLfltGIlxF3Gv_zhXIN8$a
z`P%5#;i`<#V292rH*L~5JuH{cG4%CdzvQwk({G#dtO(b0ou<ntpWN=cabExHU#+*!
zH-tMGpLChk5jKBAvFve<=dttpi|;+pv=<JJ?2--$uus$U{cPhcWZbQ2B|O>Z^Pb!9
zRZm{NTxB)?Y|n!S_p98CUd%YFz&WR-_xjd3=U?a9$-H~-=bvt^pYZI-*Og0j!q(`V
z-n1??G<M_u`_C=q<CgBR{(QL8Z}H{UPNR;AMuwkV_B88if9l%$!>c19X5yu@$5=i;
zUeL$2)~l3h@f&uo&aDE5NzAK*6upFA>}Lu0^LS)%-|X(5Cv3UFrM&z+%bx6FyC2w^
zcYl|0Z&T(i=60(khH5=uzni+X9=UwpR$cP(Rq>rCG(F#*ogt8D`)tpYAG(j6taaO1
zCaPFYbZ@mdWB2Mu=Vz75NhX`m2yuq(EHVh*do=lMqL0iaj(hhu1X@;c@EtCR+%lzW
zPsjF#ql)S!oK2nI+}iXxv-;A%Pg<nQr7*AkxT!?{;efF6H$vrU){A2nt*h-kaen^w
zxw+@h$MV)RFSx9sa4@Dz+o+@_d0&skIoCxKUL}36n)pdW{QkX%x=Fqj7otN%{OgZi
zP!s?9FDL#8Tc;qu$&u4aiI3A|Bz3mFec~Z_)K^u1Tj##RE}gv3Wc{|@sNy}+=2&r@
zaq-b>8>5#$wzV>FaV#x4Q>z@u{`uKGk9vvI0%uHY4m=LgWtr-jmRt7X+0tc!Y_dwZ
z&)xIqrSqTZ@LjLiDd4DL`6Slqs-I4dTV0!^2h*B2%z=3}D(gGW<XjLc{Bg2iYx2%T
zuMfUp@;>P7RD5{(=ZJ?EE6x5DrB1ubY`UtfUi<Kw7gn6dGk@r0&$+KWZK~|)Nb5@*
z)7Jcu`4+f6KDX}HX#uBR?&{*^mfm#-y}mzxE})gm;yc@Xi#^91(Utp;-`KaKwmx*B
zi>ayTy2bhDUOa33_K!2=nf*#B8I9*JK58|83ip_0u<@R}>A%QTlW#jHpXRDj&EXDm
zoox6ZjnS)Fm92So%f^dOKd5!DmkQbC+jgsM=dIQivu7-w_1x)Bs5wjTD``2i>(2x>
zs%QMZ89#Y;@}rBde=AH6kx85TaLYTb|MM)odG0Oae^uP@es0X}m1p&DPA+?VUn+J(
z$;9#vtsfdb*jn06f6F*8RxT#|+vEL7&w6ckyPj(eJe+${woo`yVj*+Fp`}HfHXg5c
zHP2sfzscO{%(FR1OD6}s{CI3}%a*lCGiF4b)!m|J@@d0mEmI!PW&9<clV)6$dbv2R
zVUnXGL$~1CEU%O!r`9A)K9a(8ZqBmGXMYc-<eN<tJvZg~k>XE#LOG<>XRLi5a;?Zj
zTkmto*C`XXu6rHqH|g`9`DMo~y>u3=(dLP|qQ6FE&ZQ}8H|5JKy-shMBj>Nt+OaJ2
zmOTIOzj7Ns7lrA*aIX{jo@dx0>@<T@$EodeWhCG3yEgLg`nsb6OWz)x^6bb?oz^MW
zQZ@UzVtowXeyZDld%wk#za@26^X2@a&InGJ${scMRL_)hBXw(W>-7=hYbN?cp5b?q
zJRn-iaX|N>*t_-sh0yMPb|uScK??gC3t87@S7>l@Z+(%tJ#<6Bv?GT!xdk;tP72@V
z=}C&}m~uHQLNx2%Ew8VALMa@I$|{1+;c3@zT~Abu2{fs-x&H95>6z8F0ttND4zo|2
zso7pv(&XTAs+=biDC1f)&%#$J@>0nEA`ib+nPGfK);wg{XVaMT_(J7`_0|iz0>ZRg
z4d+Z#NIYjbJMsgc`N2IZ=lGwhMSYLDwsiHQtZMsTL0&PJ3tUdP{x+$Sp2;A#p?Kz9
zg~V@(J0G*F%(_4I&)%h%Le6o{(A)g<_Q9v8_pVBk6Ztg#Uo6kUReQ5#>XH*$VwXQr
zTz%C<Q~Arm-KyW;&TO4!vuVD^>`R}Wf6l*AaBubYO9n0H`TyBnSTOar-m1s13_c$}
zuxg*{4o>Nsz+>W#dde%?bd1*i3<|3;{H!$pX0pUvjt3s6R!1*MN_Q5!_H?oClk#lK
zzRheqPOZH^t#4~eN0Ub7$MZ4qwiU<q*wzZ}NfCc?Rzu68jlX$Y`Q|l}a!1WJ9{POw
zgvYB~<rn*|WeIVJ8fMiLTQD5Fp&e@A!1}J}3YUt0R@6rJ5B1?uAyL8`j)!U4Y5u##
zuDtK;sh#`Zz4P7viszlS**=E3UpB@`KjGwc@Xq=qC|4#?Q+%a6oq6G!RO#}4qJ<q5
z+VeMV?4Pygp#8KojkGgL0#~O=Jux}hxLZVeQQH|MPp0Gs+ev(fA2*&p+~~F|FIT}%
z<d^9?YrQjncYKKc?G_`|T=-dXs)~Vw#U|dzdXp=>Urs6zSU!jMh&cN@H>Gsp6rN`>
zTc$+GZha`S=X%Ab&)?mm<3zsCUd-Zr;PuyYWuF({*PC4!c(y$9_tk*PKWrN~&;7QY
zxomp;!y-$DeW_x*^<F0jy=vn$a@GtINeH{VXz9nub^g;-CQVX#s1oyi@#CB&@i%xY
z?uYq^Utzv;V&#*<NXA|7X5Y%2DrKqEFs-yCKBQUyq0WL1(|#8&=0?xZz~rPQg==oi
z-I?X$6(A;TDCVyF>z%IiGn?#L=jRx{k2_c1TA+1$(-fzp97;>-s;xI2$eH(i&fY5}
z%8P>YRs1dAU*0l1C24}hg4J1u)3;Bq^vgPM+Gpl-pO8I!Ca8R_yCqvb=jET0xYDOb
zeCuTYwOnYvrphalDBhy!w>)*%>`yl7CsR!|yyum*CpT@F8ydEF$?CPPUwbx%Yh3L9
zTXSb=?6fn_+do$Yt!EZecYhG(z<1JyQ~lJ;=_%P4Q<pyaSh4jE<1yRPP)(1}&xV34
zXI^)*wZAhVs!XSS?c^D2SKQxs)oqrFs-Vy{l{vv)owaeIqQ=oPml-{seByBB)#Xdq
zJzCn;`B&Q5|M%yTy;h9tkEgMS9C4djtX|{JF61Y*ZZfZbN!PpV#b+KSw)^SK=}2Uo
zX>nF~;ta#nQ<giQvi1u;CEBFf>8g_2&z6um=QrDtC#zTU{oU7dzD1~O>Q)`^9^==H
zQ;YpHr>1_*kxDE6yLa{n)xdLhDOU><6zleXUUAlVzjn}*?4#=IihDFJSTQVE)?6%X
ze&osa?s?tKy96cmyANr*oP7G@c^hkA{+bi!73ad5V(MoG{s|U5<P%dk^U2KPzC~xR
zr*t$huCCm%(f{Zy;m+AAIX0<(pYp5qxO5-7B#<uQ_4(|9HN4TvU#njvyESDT_?rB$
zELC*x(pk>}en+fOPJD8uH=4z9yR7i6cdL$zm#PX$eXz?iSiihgIc%!CuFFyvhN-)m
z<h*}-8~vWqBGNGRqV(J1Nd8tf;cH2?{o$n%b@>vr_}6k-wjJY-erlWlE5n52yg}>T
zXPyy@7nPI>Cvc_iaKGPiMYH3tv3$!RAyvDOL>H+)HtQeEOlZ9P;l=vd8$NJ1$Gw$~
zy>ZrXQt@HE25D2S$HzWQTrZSuAEH}3-CW%H#=I-NjiK@eR}Wp*TegcgO=|r*r%sMJ
z#?3Yt%(If4+J4EIE@}5{iFLS8H95~;rbfYeA5%UL*Ws5XoJ@z;?zwb(z0-1wEBfgV
z6^vJJU{GA^sG)dt=fj{Ej;#NTJ?{!#uj32-oV=~q;9>7gV}`2G#rxKtpLllOLXrC`
zs*XRBQ}#~3P|<G|T63bURh!XTZ>L4G!>5~xzYA7edjIl!uV=mJyxSVd2isP5FWhyU
zb#j~4+pGH*xLj(OEO~2T?d!}=q5J1e)r+^bhE<#VJjT<htnjqR{o=XM7~5dp)zxnc
z`2J2att#B`TH3U)<7(30_s`dbSD0$u{LOCI9mT@z&E%uD=+W0&rIT(>SuS%dxt~vZ
zcDMU_{g=!3JF35WHLky3^ZV)heN*PV<J<lD@^AD0etv)T;M3ELQcrDurlJ#h>PhP(
zlZhPaX^jUrOmb8{=Bhk%YOhWG0V9*mK29yEwySqVG<}+*dh+F;FC|qfO;3&}u{ge&
zadC5tHs3ATcW07TEx$Y|cwW)G^3Ol_%wpt{xVA~}`R83a2Hq;Ep~2B>-(7iOAiwPI
z--(eO=U(5OJU^?iVfHPXIjh~~dHc-~_Bt3P<K&_@YvVzV!1adZu~H^F{#h~QZyWOz
zzuLx^^vu%`a*$=-EU0cKmYm4Z$f>#R`jG@f!^bW+QUxYQMqUr<IzEZr=+J{rlcxx)
zD|Luo4vn1`J@u^0r8OZRxt6QgC9=+38Tv?zJ)Sk?n#KK)R@;p`dOT;O%O81h<o?zv
z%Wmp)p2@tmbkAgyr6!e4Q&lETQB{4Hx%JXNYlgrg?x3wRZJ7<URrCI6HGSW?+hF}`
zuZg!`C2Axd@liSw9T3Fb;V!NFG2l;_X>qVrK#EIzqN?+3Nv3;>vzK|Y*ek4+Ikq)S
z-~Vy;vS+=&8rZd?wu^=;ZT--iyy$v=(D4s<#cn50ZC}Cta2|(s%S;2~_{)rct4g*m
zTy^lqNnx3$MZFEv*>?&wI$e7Dc*c`wjJsGQ!o7W%wM{o4HmH?+W)}1O;*}$&EIAH5
z5_>{IV$I{;@7fSBt4RE-=!-wU<C59yHF@-V&)zcddMm&6x1v@u4`1&z;eD;??rT^U
z90;v>TyR->!mmSlA<^8PE$V7w?(JLF^uOy{dt%u+gY<n1<=ouQeGZk}EodF8tNr=H
zk1NxU@tit!MlDUwn=3SE(WC8AXA{)^&#A6me{G}Gy0@7fwR{YW)~u^$&fm#8`EvsA
z4XOKo8O`}EUQWHpn~+xB!Ps-`Qe*p3X>rT^FPah$Ra<76mqk`?t>>B%8P5{Qs<-&l
z0=A`vzav<mFg)ry=rqM8AhGV<l1XAOZ-3D}<n*Y&(^Of%KkKdTf=TVuS6VOm(X6Tz
z-`0B3^rV>VMt(K^WV2qmt!cBu%@V(yi818d__pEvl{Q=PHEFvgBRH<Db2{>5U*DIk
zx4AD4zInSO)37b@>ht$TKCzQDwom^0)}i`J+g-W;@iVsOhkTn}-^_or=J)>JZ}fXF
z=j^lWk+1IY=Cseh@=A0`ZJ}V%_je1*owD}1WN%n>dfTko4Q4Ei8Sm@EpLpy)JA0${
zi>AXlC-wS5>cq^dg0n>=ZI-QQwBGp7(PNKJ$@4T9HtvJr9Go@(R6j}_+p43uRKQ40
zWt++bmUBlsPxwC6QSnT(+V|tl@_GmR-!B&#_^n@FJF9R0zN)XE45nT_Y+v_t_x+l`
zTbH~P-W2|R-@WO^677}$Bm#8PFI<ii3vGS%vnQBGR3*LqPU2NA<Dz!qlX+&FpYGYg
z-8D0K-m|+tInO_5#)>CRo8!WAY?WxrgWig{2EV`ASII2eG)3L=WTE}p&n9!vXKzeO
zjl9|V%HY5Z)r~U_acHld)6wFQbL#WE9qH#LT`M)bY+%&6<EpXXX3=Cp@6MuSk-4!O
z*F-#aQWOj8>sxczru3$H%$K@CEB|9NBCl2o3I-o-(=cibaQm&#$@J@_OHzho4yVCm
z%h@-QE%#n7iV6>w=x|j~oSnF6%4DA@>WPXPB`3ppUR|9QZMrA&<;B^e+I~9c`Xnxu
z?z+7qd+Kc8J%9JyNlJBfs+imo?3X#s<ay@&&pMXNO@xokv{dp`@%FMiH~mVh)ujj5
z|EzI59bcyXYRNLWh!^df9>4X~=h6FD=`Y*4zjbncL;k|OCG!soaQ7c$s(kh&chaH0
zQ%`&Kp1XD0Z}?!f^hk2m_B6A7(>P3KUR+QqVR4)J%ulKOwp-75mBqKLGH`pzxZS<@
zEuY%wY4=KN7amQ`xt7pY`m@ya&H)Ayv1!XDh$zPID!LjFyNB=Hv2PKZEq<^s{P5wN
z*YD|CtEa!(&y=uPf8T<-#ZnR3emA~p<-H7lJ3U5A)Oy84n_%}_`&Mz~Z{cV1P$`_c
zy*QR7e#Q1zGt$4LC0AHR8QuDzv#B(*MAu|znj*(>HcOrrhj&zN>2vviF?#Ci+jZHi
zr8Qk=CtRP^8#b5as%VH{OJ1}|VCy8&dMS=W1*Tk@3!do2ga$YNycpL0TU=C-M^vmO
ztmG3z;oEtyKHhUbUo3Oxu}#m1-Ci{}7%aaC@0}}sE$}Y;tx`W}H^Yw}GqakimofT0
z)aK7rly1vt)B3q@SDuuC99vRxPfr8ysYk~oU%e~Tf03(F-*-@Cl8s!J$c77QEi04M
zFLvzuuj_K={i9D83^Mr7m&|oC<Yk-lY}bTcTvJzBfBHH3`SGv9EXsFEg|Fs?x}@ah
z^{Aao%(-5BLcXJZ*R36VM-6tF&Ofe`wTpFW<+k!2oy+%F9@;Olw149Ehb+G~9FnW}
z_x+18%VU!~<3q<?dBT4uZQA(BC1-&|=e6{|$FzBF%H<wumYHN8mVZzoH21Uj$_p<Z
ze%LECzt`;Z!Gud`KDHB=wkc(>sDy0EvVFWpZ)>g8%6Av?Zm0k7xDl)&apYj7NK-fG
z3`Hiv@M+Du$8+Wc-+LRkf8YN<m-WB?*}nhRtJ}w~s_fmf-@ARW>hAw97N5UdSO5Rx
z$9EqDZ$3P0{yIGV+~+T!&)fg^c~b7*|NqO={Qob@_s3onp78DS>bp};efm;i^>~d=
zCu3yj^(0rvGr^xE_lT)-{@aprQ~vv9o9UbMo~Oo|y6p_L7rb2cY+I9g!&#La;femc
z=1i|VcX>|v@5<P%YhFiQvN><`)MUAn8<*NMC!U0(zZQg@+m=-CzWZ?9|D<_*yXIYg
z8sgI#8Bug9^3oD7FKurj!xUv>zoscVYqyALt(HE(Ae{VaM~ztaNAo+ct<64ZCOmZw
z>sjyq;VQ>(tq_4tNhYn*0-Y0E@66TWdv8={G2Q&>ACaY2?7Ppe(E7*C-O;M<7s)Y6
zZP_8~nSGb}mfG0uskWLsJ?M<L^8KFz!J95PEIC&D{A|iXt+kO$oGXqUyFHW5($i~_
z@oASiug~dtdTIJjQd=fsy4d5$6}5>|g1rQvhjsssyWijZf${s%1)HXQ-NQ2dMrp2!
z%M@Yb$urv)Hr6nUD4(1X<aH@^LXGM5b(>1n(|<4UFtb>zb-rig`G)rrZ6bHvUe@;A
zk6prS*KM}UBx~oq8ee7UWhQ}@tvdyz?VoKHKfCsh@A|cW%sW(Wah2NyZhbpz{qB~v
zXE$kauMLW@5#P0x@j}@(j#r_~{EZKcW-M3{)-=&2J6AjJY?6e`;)AEGmU(Sumi{<%
zHQ&**3tlI^vY+#O-_*^!G!k+a&I|o$GNoa|tW`%$OE#a}I4gr)H`n#V#HflqNg<c~
z&9@&K>+%=QUTnj2{m_}K*ZLit8SS4luK6kL>?OJFw$$f&&37A2wlUn1n!1`Pp?TVO
z<_kPZ`xwm3lEeDfw*Kt6Hf!SD2gY?@_dBfJcbq|VY47D-+oTgM@*f#b<94kv)ji<A
zWYjc4f#<)A*$11IuH~*es%CX3v>#sE>eByn+cv4SNA54-NT`_9Znj=J>mvJ&+|_qV
zbQP36_eD)N@Ke3!u}eLEl_yW;ntz>-EMf|(Ig_XGeMy&%?*AjP(>0g3%68BH+Jm$G
ziVVW;AKsC?Pwn#6JN)m&-u;u$G@Cbj_R`mn4ZZq*EHLZQi@I}l!@QTZ1^@ca>U=$8
zzsqCWA%_(zzxmkT=1#uQYVo|vHDU3Uf6t7sFMo5M|ARE^|E`V3cUZQ?GF6;B5*>Up
zHBA52stlW$1#Yj}PQJ)qE#I~+$fZSU{ZGc$EFK=K3GL>|d-z@~&incx&FLX;#=_Q5
z-{cq0Kf>I%`PPQB5AQ@}*SfZ-UpZ(K(re4K(vv&7`V<3qOYe-}HUUnLLr$%2g6sTM
zW_q5}^xys0aQPOSJ^#PW&j0^!d%nWU<@Uc{)>z1-35s}DS)9nTpMISEsn7B7&)wn`
zUr##zVAr?1^_K5-l-$?Zf4BU9$zT8Jr~CSQ^}l!5|M+UJS+L)J(yQ(9f8Sk{bC|h#
z#stCXTRT)vp7EJ<dCUCghx=w+UpghpV4978aoVAxxQl=09?CQDvy@~s@SLoowsg@^
z5tm&$uWRhz^Zma3IOy6^o$m9spXV^PHuFYCcyfPv!g2NJ6rL^nt>5(RzFW9`-t|o@
zX14ozzLkvKuWFzy!L7X1K(veB=Sq*{PQPapR5^>)(l%&@Y+ZA--q-#A{3{BcUn-dz
zw{V?WGObJV$uzNt>lDJ+mOtGx?}lHPdEbf#St${wH+SqCvfs{G?jKk4Cw>2q^!6$H
z{w$CG`|I@mfA41J%gsNx`c>A^4oy|}e%9s1%F#T`v(7)yeh|`9;eETz+C4%eb=sVi
zPj#8!e$9(uFl$yxJeg~z?Rm#@ZV=ar{|mOf+m$4@`gh)Z^R1iKy#D#-R|Na!U+KB0
zgpF5cX$vQdIe+*ScPO{h%tDe`e@Cdd*tRPk6)G1xDm%>&@=91e<2SpX68`<moAZX(
zH*V@m+O+#=UaaIhhtm5EWePE?d>p3yxb;}%sEnDLhlxkH{p?<spy?}Ho(kI~+*>v4
z(a{ZxN0<X|gkQ^M-g5a_%sbNx<^P7Ie2cu_?@UgquvloA<es$lWWlw&Zd!+%o#s{h
zNE|A8!M%A~QF;FLfGt;AqVk?<m1}ICS!v*E!mu*6<9fxXXF@&>$&N+dE0$i{A1cXK
zzVOVnS25a|(o=UIj=6ka(NkJ{J)=|3GjUd4&5&Fnnfvzba=ZC7Rvs^B7MQe_VQQ|7
zYm=T0$Jfx@inFsHS*<hLuwa7ZWT`uj$6{6qpELjYZhq&RDF;qYWP0_zUi;n7iH5ry
z6?bWN2TeFLYhxT+M5*)spkCVqW#RN*CO3iQg{8S%dR=ArB%WqhIOWH<M_e(x)Mpp4
zBlk+=>^Y39L}ZhC6!ea%nzwMeWTo--v%9qP3jI8v!YOC-(!rEpCbuz>$G~HTV(r(h
zt&9mV?ICki`4cWkx14>S>yW{o_SI$gpQfc&+a663;t=F4G1j{gm6dm6R&+<=k^8Oe
zM~`jWm720Z%4S{2OqV}BcInD?^2@%~TA4{Dt`~~4U%->Our}P?){O01`j;Qu_b&@A
zyT=>tsekPy+s`Y{d!)DCW>xsS?f17`;Vb_iaagtN=ouEV#aCxn7Wt{KsJ>SgYEa$Y
z#l4M{?T{t2*WB}G7Mx}boAW7Jddt@6@`{x;7giYF+;n68RTFJfrl8tMcdk1ppHgm#
zlsJ>7Vi|bqjb@%r-{+XbYe%wq6op071m1S<)%X&0S^LA|DcmQyndbU;O<BfrFxYbD
zr#-jVzP~4|>is$YUfpm1_xF5$A3nZ5es9I!kKI%4|9xS%|MO$B^SRes{yq5ovs=FI
z<Im&rbLH;+e|!JW^rtoV&VD|8`L{WLzPJ|u>&ubn4JLaoySv3+?(>|)HC`L0T-tK`
zzJIXRQ=K&}m$GgL#XLVZN67Gb<vGpu7VDR9nkN=K$3J!ZWzFYzP1Zh7jSWvbciBZn
z$<x<!#>vT(CuzORT6$obYxs+-iq^k(>&ojrURTw{T)Vt#>5NX%?WSI}TsA@)7vE%V
zV$sd<;Y;~?-2Fi0r72~fk0?av+OD^lELIV*CdEv5QpUVZ-7-R|i5r<hwU_P?ovCu<
zt%BCnS<_pWKZ!H{mgXYt9CR-8)TYx$@6%gt=bT$!s@pQ9cwX^5hb0XgRhSFs+5dl}
zUjOg;{y#4&DyqJJGvEL3tNq_+*W>GcUY%W@e}BH5{5-$qTXmL-1S<z+v_wq$Wvgx(
zEL60>-z%^8E92WKpX+YzwUB$JX{)#V_|r9Krl?JtyW~t~XOWk2Fq`V4Gjlp3REoWH
zbTpUFQB4&}oa8IOY35-ew^ci1ns9Sw<hiIYqxeLYizfb)#Z0eep7Jv`;^t;OKBtRe
z#qLQf_2M@aJ~Y^zBI)trg^@JJuCi^~IZ^WGpZv)bdgyicCFix<&dc~_y`KK+0lTwI
zMEHc>Fq<gn*@spHTW>qmd(lTEe$Hy&`_I?ewA@~Nbf%~L($gC5dk&m733Z$td1OJw
zjX0rr<)W%{{S|7h2Et7%RGRY@YZjSrzUtYl_2P6}-l~%}g^AmWHP`Q#Ju6s#){XPC
z&s`3eO$%mcZ{5UjHmK;w-mqVBw_L6?t+khWvCBN~RganU^R`^h<284ty_xs*_3y%v
zde5h;w<KC0NK;$+wzs)W;_V%dT`p&{SKm1CaAn@j__vzv)&A|%S91NiaC=oAuk5oV
z^O6&#Ub$%@3OWMq0vVGv9p?n<UKQ@Vb$MlTy_B=@B4(vSW!-t}yNo9UMy#1(9l3vF
z<>Be+Ek|}LYs9LbZVc#8eU|n_fA2>5sd{^6n(}o|NT04|9=Ez~+lL<o(JeFhrRUbY
z-F|Jg{+T^fUPRu?V%ac3%hh9-cm4J)wITIJ9v2VlI{&=!a_z#eA1Cit)|##M^3qX`
zeNVz{FTW}9pVPteSgSXA-QGg~Q(1XY*-LiMvo+eYZ^h?WwR;7sKaD1wU%Y{>&}&Zl
z;)Uji*J)iV<7uzHZl*PTo|N^p2Nhw;`{ynY*0b}AUi_|AV%hyQ)3c<c7IkhD4BN4@
z_khI`d8==ySKTehbUk|X`u?1{EL&ZdC2TV;*CZ?5yUmgF$)v;c$p2SuJ<AU7<~X@;
z!Vxy(#1-$^ubjKOI)0_N+w2XK?Sxhyl0M21{Ph9L<)nfaTx?4>XFqq~Ni>*Xv&r{?
zTGaJAWAjgpZi_etI~|tz{G5B@`qDS^&$TbD`FQjBp8danuIl$c{jcK4rhS#)zdgRL
z&wnO<{=B;HAKv}=s4ido>*eL}`&p;_pI^5B`)m8>zkly`hug@%kG~`y|L4Wz>Fej;
z+nf6Ay#4<_PrJjPHyu9RU-!%1*h?`YQf}Tk{{GjO@7Moa9bf-9+fSu)`CXg$<(f-B
zZ^|mH`?Z8SAk5QAV7vE4lfb({)xUZ?{EXzkwQ~kn21Rd(y4dtNbK+k;-^m_Ix~aNx
zI}gs_iZo>6?Cw8x^hm<0vQ4UQ0-S#TZcaHFto3|R>eWLVSXx|E?7Rd_czpc|%QrDz
zsG1{cCZ^R{e9B1GNpW%;^Ed1C{io&kR!ua{4r~Znxu(oY*M%uqsIYz2QGZ?~FU{qe
z+MAoE9i10u>ec@D;pX%9zfbnt{r~Z`yFX5*eD}-M>-Ycium2qX@89+PbsE|mJ+q=q
z1z6OMi?6Tyv^l=!(@TE)eftj|pI`Usrnvp@!}IsofB$#)clrH2wV&T^&%d{)`v1GX
zhl9eJrv{zb5>eQ{U2yJJn|afZ_S~6uE_d0f^5c*B?S8-EuluxE{QGb7<?ZqHf3I%m
z=hv3l68yD0G419-qrjwdE^~_3O)@_hoc8g=Myq*Wt<5*7N4{$A<+71Zd49~`($!NZ
zPdO>GxvM<&3{4KYYE*cJ!^86FjQiWYxg-Q$IZw!4{aNKj&Q&>yFDV%zVOyu&on+(O
zYO*8t=gvUc7g7<Pv%MCs|CfD%-Ns=JU+T|A77u)DQaU!i+GlfP_lCT%M;m8k1YF5}
z@IK8U?seM)(UrNI_O2>B;W+b9%39fn-C3fC1eX1}C*8P2YsaiJox8ss*jT&KrCjlw
z0B`j{(+9jMOly{%J$T;PLrqU|F=Ke{t~3b+hwF<TuTm%uTxfi3@3YN%Px2~{J@op{
z7Z6m`sbnU~!JV5iL-)d?rjHDTu7=@Znd_yTDxb#dU(*r&%DzA(*Xe>`>eJ=}!h9E!
zd7FyU5B1)Z;or!1zF+zH+{RfOGiA>h2`+VU*jRBl%gF7)gfr*8lODe-ZYWWdS$Fp5
z+qoIfdA0tmUODx_`?hSxUjoxPp9uxlpVINuSa;d&OZIkK*{>=RYRmT&YW?UaJbmuf
zow}H*;Sy)gO1!nw6}!>t?PrydeECGrW{<V5Lgv?lLjMTeHe0<*de7hA=Id(y-&@4v
z@F=S6*khGPPZD-L^uOm&;c$NQ(YC;>8%j-;%*>OLnHO!J;KQRG;(Q^Q%UEWO%?%y1
zldOWT3u>=i`?Y(M`&za5h|NonG@eZORwi&gRps)x;NLuQb;5hg*49cM+%Ik!Y%Xef
z!{_t%)c-I3FtANjnYa3Nes<Zh`$;>BN*C(8>#eTb#qc(M#f(?4b<;GKab4MQ<sg%d
zPVH`)1QD5UmRxtI7%!@>v}P%|y*7qZl|4JySMa?c2g8+s9DzISWi##+yv~m~Vc?vv
zuj1n*u*~II<zp4LAnjK=W#RoiyJtz9R!tLHvgKV_{H&8jcJ-g%&aVIUwcB!OrWKd(
ztmShu>MJ(hmXAog(f=;KW~<(rq|4{ZcDG8f3P10UuRZj0!-Xm3?LP}9Xv)7UEB=>p
z@BMw7J^#Ok=f~H7IeGW)+*_5?e;iHRxBc+-{C#uOZ@%5cb!krd@xSjj80)6$1{kDA
zfBf)H)}vp)Pt3X|^IBT?$ytKkH@ub?Ic?0GwS4Ckb=QNtUhR3j$NEfl>}h76R81!x
zw}=yB{PW(iSgMH$tqGC)s-o_9#B<r-DbH=*FVxNn4W6!UCAgi>&9gIN(t*i|5-Kxm
z^kuG2)m#`NGn-4JFsJrNdUuV*n|(i?eC^)-_weJxw~x0kKgzXo?%5kYF|CK+x_#Nn
zectl^efdu{RTgVcA7-+=xBu7X`k(s$-|qj{ZU5usbN%;a$0N`A_y7Nz|L@nAXK!CV
zoV%}nb4O8<Qh&OtcEd@VyT<Rs=W@NjRA_Z(_v7yH`BOHnd;RmvmoFcFeE9eA<Hwa#
zL=CQPTK76D`{L6r^Y+($etZA_H}mD|_aBowvxCDqT;<XA^>P0{>eu}{zW>L|yyNlv
zewUqm{i|2hFKcpSlVPITtVH7p9DPT90*@`vib-#IrCwsa_m1qPF!udD&Q&dzjyqKJ
zPG0s*I5W|C=JJ?5`|=xCZ)NdUvh*!%zgEb(-AG)~=hF@wW{XRndxPZ9?cVTPV%_z9
zdoG@9tN3euCYjk=`tirZ=XY$qb2;z2&*sFeW4;`TVaumpoZphjC)#ymW_L}knAhd)
zeI<KdDREeAJI}uMo45E~uLW$iR<Z(@d~fM&-l)=)-O#QXD}SPa&5!%UVQn6je-W#;
zzF)CFXWPOHrh!3UdYPGf4~pN@FPMHhz}xYb&sm;{g)s}w?Tm6}bJxE;e8xl8ZL@E9
zdD!j?V!nr@`5&)fKj>B*u-t#8l#7$;nqx<fO$qY#ROAzD{mEc6XXfk$n)herwR|?e
z^Wg2dbsWht0am~LsyEB+JT@nO(=UMz%cmcvRV>YZcUyA7iok2vUnG4wu;zo+gJX*p
zS6i)`t9I=DrA@mI?>%+*V;Y0{`K_tJm$_~%?5%9h((UGdcJph()`RC7_HEDJCD7e}
zs7=w#=FHw3H%hkHu9#B!CynJpvqn&PZK-6l&T?1FM%@+Mk82j6x~El=_9x6hjjy;u
zY4@!H;q5;hH7~V#{?Ce$Y40zvwVB0rL{j6VWW0hR&$;({-=3ek^Q|<v<@c_|WhpkH
zGr6>Qctcxm9GzpnmZfq+&X)2S`A$y@Up{yfE~sa1X_H>G_TSQrpSJ$o%`5PH*Qxt;
z(KWiA`yW26J!iJ@;uUGOw!GsZms<DaJPzGkrX)S{&;ho<=gDE-ETuw`65AGSntr;t
zeP(`+<Es3<%O2$26RDq6@cT>i)^w)YfTSy5R`9-0ojT|7|89Z9y7@t?=UkXym8i1p
zh#gn3Qs`E>1wFfFJ--<Lv~k)Ti8IqaRR<Z@e*cj6U;Fm_`+k>Q&dj{D$NJ5?cYW=T
z^G;Py%(Son^+;h(-S;0q0#0>{uje~{SAO4~{q=u;y?y+1_ILiQir*h1&YjlzcJXw0
z@w_j$zc2sJ@4uUQ>7F_B-}V1~T(^Jv&%D~2%KHD$zC6o0T{GkPU6(Vb7o}?Q>BP;S
zw*0&Qk!it?j+6!~sl1V{YI>2C60z3CcJ7)fmD^8OibZ-~+?1i2YXAQE?!T3hr#2m{
z)>Ar^vcc8S&1cf&l$VA}+dhT({c&0Fc=hhPb^CYQclk^@_x!uJCYP$v<S7=aOf3?V
zPTrWYpl&)(vS0Z8^%^Nlo=HEQVGz1B?%e0Q|9+h8zHa~L%j4yr58wa$E&O@@{Uy#;
zAL8WB`=6HdEBW+Xm?v21k>&e${m1VH>OPEByeR+Y2minF|Ly<(>HmA<A16~j`9@XM
z{~sSV+<sgC<LC4I`S<p3p8Qbd(4>~g2+P$IKE0e>zW-l){Qt+R&j-(o-*e+_#r(;f
z(Wh?6#_g@)jSUawzr*I?+x;pta(><4-S_wXT>bt1ynFhuB_{-hDKX9v342pe{{69j
z-G|5e`zi`;r1U@M-I6cceYvk!`0NqMV?M^8`*OlWBb2-zMi)M;{WSf&khoguMUKf6
zmbgvK?#f=LAZ{=9ZTHKHJ@e)G^CuP9)?^2&YCcVk{Po}tD?>wkX5P9|z4?yjJ{<D4
zS0>KlVP5TabqC9Y_yV!aWZ!ik9nYNqT+FmB;_(&h#o52tU;MT&<B6EmRi>#<5z(sa
zo~nm$b@|%(U4BU{`_>81)~;V6{{65@&+K`7&+jq|YUDmKqld$-Wy!K>r+X8v&cq*J
zdAZj0?pgNQ*44>Nr!Esp5;BP2(QfGFzfy{MM#s{xLD5pBi{<mm^lO&3_k|~h%<NOi
zkoKJ*zUuZ{25C`072X@hCyrNYhpaPn|0>v<S0@oGxhB$kZQr)Q_uTDX&1YsFEsT%U
z{hqmCf5ZIQYq!M<C?2@#vghvX>*{5j7fBUgFx#bj*x+;VD`mHrYxW;i3{1?v*?)w2
z_M~>FFKwrkh2BK|vN)LSwsOAY%~Q*5;`hutDm&xA5}%xF2dCwk8kos*-`y~?M>4|Z
z*!GX5k1vNx*Sucga5(AWgz#>~JG>69jAr6rEt1wwtq*wIdL+`=H0F@}>VH4iS!<m4
zOmoby-_^Fqkx9zrU(?E?(LOJZdNpldRhA$S!87?*#hhDDwWTlQ**?wky~%U@N~v^d
z%#p{-xo(s!nEjJw)#Q!;tJXZ7ZFzZrp4^l#N21T2IX1`BbW)jj)ZwW={@t7#u76iL
zpV2n4tNO-6SI7Fl_Gan#KW|zvG5SAa?#&nfc3u=sIK9y!ms8(vX<=vQdh_Oer^J{}
z#O*MDQ{p03EI)zA-#)`;YJntouXm)HWPjf8wewEhO6>H?R#}n$`q+wlTh>XLP2|j-
ze_(}*4AVv?6&EK3_jeV#hjXo2#J^TVWNEmm)XnQy=W`^+!nSM9<c~k+*8CKCX*fw}
zx#y-cpZD1Bx%2-0x6e~%s@UuPdbjb;{L{<l@16CoFMjXeXY%!bck9=GU+S^1{%6*|
zACGrGu6=k!tGs=>{jWKn?%p-`_uv2fZhri}J@?<*&3{*>A7A^$IqKf@@AvKY&gHZA
zd~{;gCJoKRtwm9%E-OU_a5Stt<$37B1f``6uk=`><)%Na+2r)OPE066Gdprx>Gu1+
zyFH7NLX9>UI9oiQvtXHDr0M*1D(t_FgeNA|#w^mSKDzsH;=bEUZG2z;`S5MaoN%GV
z4iAklx_Cr|1*>`<Ss`Z}uxB^Vp(vl=M86b6mWe(&NohWlCa5i$zWZ_a-*;yZZ;ziZ
zS22%oPkqJbAG_D@w_D398ma!s$Uo@C<xeb2Kd<t-Tx0X^&-}k9&;R?t-~avlp^E?i
z&R&oIyKc_>xII;0lP~AX-TU)+_I<l~=lwP)F0Qq!`FAn>`^z8S4%U6$UZ1YL_UOMK
z{q=TtWji~L+AN$mvuGQq;1RD2DsC3`HhX?v&9DFa?cv|cKbHiUdmidhShB_YXZ!q`
zUq4s(8@>4Q!^1Pt?CfO|-`A@)zCO3FG>Oeq=<*ZKeWD@Wo)+qMXFey+6Z#x=DyT%&
z&@+Q&@4_?dw>XL|s`Sz?7cQGTqf*0B#kwi@)R~~Jiw$$wPlU{TSj%cC>2vmSPo{_A
ztM;7dX~B~VzWFXbzF^U$Abknug;qT-%h<N=UVC(<B$JKSn;%*avzgm6BKz)Gc250s
zwb^*-)F0=5GVr%bRyC+x&AeUu^X9w7j+GatT)vl@(39<6e)Z!~`3KARB3cE4rd)6K
zoYKy-d6t1O*Q=dT`&GhndKYJgZGA3hF1>wOs8sS{i$25Nl&RmnOnffvy1d3&UHNk_
z8<*?R)Fhe19}mp?^wjZadC1Mk6CPQXR~5EKAK~JcTHCv{XSb>R**~=tlf{D5|FcLY
zHMUsY>R%$!cc5GA`oRO&GOxKxsC+Eees^o$L@jflN6$QuGt84Y(Z85we)Y!-YL^f0
zmYVVI>(3mGxP*vTi8s<7SeZ8`UwF;W-9B}G)RrWdWyk-&i+tMgYu?e%(k82_Qxn(C
zSh2!`X=}0Q!RGAGp1RA-!>Ye5TCY6KpesT`y!f5Ab6V{}n@d~mwM5S5BuoALpB`r(
zA$-5$se#VUj+L{t53%tT7ke~(&d%hCNmbag=Jvma$5r`<l>;{JJ74?s=DB4L0)p><
zD4*cBIj=C;y!X(~s9VM9dp};=Z`2~#+g$wrlb*1fLkh1=+QKr?(07Iz=8KrxIj^{e
zJDT#n-@PP9{9Qzds7-07NBfuf>)$1O%zq@f>E=bx{m^*7X*PEoufZcWDajq@8hg&q
zk(%P-tls!)&D!eBo)rcrw~tB~?F$unnDpB=O)Bpg|Gm#%+3B;>G_-y#+_%1>+QC;j
z)LKn7rFomtx?)d-rU{<H3#!i^eJfBl#liZm%M!^GxA=Qj^{W(ah<$QT^uWWfTST5`
znMtZXpCWfD-1C_ww{XvCFaPu>KMVfP{=4V=>!Paf-|jyC-9Dx4evRqf_v(WGuFgJw
z<kQFF@_T>%>Hfa_c>evm;wb;kZ}-(ygzwlh@ABW%{dRSKUwWnS#7?_>d6SK9*a6os
zOI}~j(q3kHI3X<I*iJnGZB0J5tD81CF6^HB_KASyipYhVrUaQRw|TF2HEGom0}200
z--6>xE$1h?X&pPJqG`xoDAa!U<w~22!aPx@9{$U-llx@Sn0SBPQk??1|5GB5T!}xc
zBWW<dcW1>;R}GFXDfV-|DuqHTA7`*#yZq~|`TM%v_m|6xPk1tA)1AAr<(F&X=6!kl
zn|10p)rEILb@|`R`&mSLwm$vpzP|3mWcj=McO>7~y*+>bpSSs5k#<#IUmd+$H|x^h
z{C|nF)HOBzje}<`yDR_qhxq<KkLy3){CIVJ-Pfn}zh3YEyL|u8`*Qd8MmyC1TVDU^
z*A9OdPmxg5u2Ux%CwA|fmUMq#{f}>NcPF__Ib@eH$Kvk&yMJG7K415#Sw8LP<=fvo
z`##^DQ#miie5vK_Ra0uxy%K_1U4vPcZ#Xj7%VkE<uNP1M_S^maQ8&-u-~ax;+W%>h
z{u4Gw8t#ZT{KNn3`(AJJGbLM<cIho^W#X4Q80B|6L^tO4FM~<D9Bn;?6paExzw~}S
zwJa@t9Ye|nCzm@x(aWFuxCE@0w#z%d+@f;j`{JM1R6csyTsZ6}J$v&dnbo3Cs(NpR
zbTB{T&rPw5kos}R@#^+XhCV#(8?Q`Lu$UAo(p<Npk84KQ*@hc|-<zkj{7#eLJHACL
zjCIF*=2^3E>@R+uFtaYx)RSA-v-eQPvX0hB{owSyvtIUweek(kdh5%n#I5fFgBR$z
z%Rb6kAKcLXtvPywn)<P@tf2kO(zlkB1kTxc&_QL@(uGeSJ6^F;k=`O1-mv%YlJsS9
zxmQdyR1zJ|B&9`e?+W^QM%r)c?fk!qA}jk0KdBe=D9P?EC_Na~ttJ<yB_mc~a$7UD
z!bo4r^SxJ-fbU67hZsTeGk30ho!wO_@S*eiM~UgOU-!pOGIltyLRI(2n$yaTOCD>?
zR)~=4niUf2dNsngTWI#xSf}Ir9=ny_IVWbg`r~=tr#IaHToY5iYk07Rd&i9vvJci>
zc^{c9^UiVp@!xVe8f&7})^W4j++6?Ywe-u(h^)xkuRJ=(UNvcK)ziP9<#vmWC;H-}
zZ;@%YBK~h@TPA(;!k(|kj%?TQi8=IavO?pA{km*YMQ+E`swQtf{Z3}{y)7kA?Pk^P
z+@(?_6jLYH&H0N#i2ce9j`N#M*LVGPY`T0*Yk~CC+OszMQg+*~Z}|UCOFo>hQF{H)
z57j?rzWm>Ma=LjQ6VpPMDe|+got$|0ozIfLw-3)}J$81xymZ0EFvsmL?W@;p`+st&
z*Xy-Ye=JKAEn4q8t=X`0nbo}+#fy$8WG}q&?x{{z>Fkq2N7{BBJ@IwtWUW6_*UdNH
zF;B(y7VGW~frdq$6~f$8JX}5{xrXa{H=c_6QBqY^)6nrbVoK+l7uWayYd@a9&oi^$
zs@cQt-<QYJ^~=32>%YDYznoY1|A4pU+NZ30cd5rMe_U8^XQS(%8aZuwaAdD;;KL%l
zZ8LVao!%w2a#G=fDQC}gId#vJs*y@R;`TaIP)w*zNFw!WYUC%w^Sdi!&uL$gO+7O$
z?PR-W>D8+bUE(AhHl5E@(mg0PN8azqlav`YzU|Z5H-Gx^FYvnYf+N<RmNq$>jB1kw
zr8hcCN-cG3nIP!M;B;z=MWe-16`!Pt@V}o{=gZ08Ki}?R+|tr@@KpEj+Mh4J?ppKu
z@MM<ec`Y|Pd;`yUHwyXMynSz9S^EFuYNp@MX6N6p`x>hG^s&F)&o7h1JHOUc*}pH#
zoV6@dZPMME@9g$}4%dIy|9^J=-#`6v@4wmqJlG$1|L))Vf7AE>dcFVu?f-?5C*S!0
zziD4xIitK&SkvLm&eh?{#vNftgK|Pw2%bK|7J7Qp{vSU++y73l|8!ZuZbHYj%Zt~}
zvXp;r{jA8`UiRDasTGE1<-G!~rCEhBvkI-_C*M3JaAWo1f9>*n7k&CP`|v;c{k7j`
zm-DY)`CVq!ts87{%Y+4;&oBS__VLTx>-SsP&eh>KUwCcVoQ@S!luY@ipU|>zFZ%Se
zwsdFk{FezS%iOnF^_QxZ&v81Dyt3P5_ijDaz}(^~XQ%zOl+C)V^<lyK{k3O49mwKw
z%nE<?BWsDK?`sQti@R4NUp%T7IJo#lR?yrWBl}xZ{g0?D+`nQ5qj5UhuCAu$HC9>O
zzav-@9q+|_xgn+X>fNDuuS_*d{a}w{i+}%W_`Wiy_rPO~cZV0mw;JDxy&#cQXEH%L
zYT<p3M-$J7oKD!D{d@iIndh#2+W9Mh=dpzDFUJEtQCU`34;@*o;N~irx8aXe`m);9
zad+I0-g^9CSJ%C-Po#R7Ja!13+~PL(`0mrSbFN-G*KuXfhGi$7B?}cwvc<pb-`n)0
z&0QqXH2YF+p=nvSoV0i2TIoH@=6-S9$MbR(&&tzcJpIo62fGh>v$3V0pUV*w=la1b
zXZs=dc@Z(jGuQ)VcGXE3aM<uf&pi9NY-dKIl9|Nhsu||%&UD`4Ebn`Lqwo97b+QjX
zI<XsA`Y9gXw)>7${;Z@OUSd2G%>Eo}Wm>^@s`YU1owa^0jM|SFmsv60?(OKA`{G$i
znvu~yRmIK^>(x|^d6JeCF0+#QQONlq*QiUeYRR!eLzTi5iACGJZhR?iYP+duzW8%N
z&!XDG81M71l>Ds9ryor22>X+%x9aS&vekC`9QW?2+5OQcME8ZHrSIumi*#FGFIup_
z>{?&+Gk#N{=|YDBr&e5Ce<2~~)|+W&&Y!){J_%j_>V#?Mc3z!DSy_#1lO{Bjdaqr~
zEgW>zxPL+)W6@J<*_UG4`%YEm==bG*vH7RGdgiL^f-`5Bmt>dbuV9khayE(AB_-+E
zoS8>mrmU&xS{tnB)(~>{Id6|&$qlE;yNY}tIQRyXvqqg=P^K=xZKE41^fWT)oQuZk
zU3P1iB?xIXEc)U9+T@%{#Mdud=6sQM4pLFA`F+;@O<(aWmVlF*wX-kV)c$_^aCZN@
z_wnz1&#O7j*%I;Q6!-MwrL{37ngz!%MfLD5Iuq%__HsSTD#<qor$wFmRN{B}h`@@e
z#gTr?@7B#L)12?Z{YRM1%xli~lU{1Z(=S9cF5R^`Ey?py_`R;Lg_TRbr|Ry0thl_U
zqI8N5fBwDo&qKL|i@o|jswW9Is%}yV6LGb55==Swqr2>?hv&gLlRVBXN?FE#x>9rb
z%g*xMM|RIK*0ev>{pVLf#Jl(n7qh~gtb%`EXE2xc4~YE#^7j3Gzux9;e|7G|N%i@A
zD@x~oQVCgBX)e#V`>>+!@;%o74))joTmS!!{{I*0^+o?biKf@tKDD20zsvqi`Tu|G
z|NXuH=kEUx^*`<FzdpM=|Nm$EzX#pdzpsrAPQEVV`2MupqLhSbZ;l8oGg1BZ<*WMt
zZ`134p0@uzUH{we_U-xd^7s6Azx?XH{$2e3nnhm&Pn-Aq^PfI7&-lb774>-%JJ;0c
zxlK~O(mnT5f9cio_xEh=_Dr5$UjBJb-|<aZk>R1W+t+AocFYXElDM#)@k&H^?T>G_
z^Y_`!_us0=on7SB>9A^P*0mm)U!E#QZf;(?W2sa4?s>PBZiZ*vy>sbEoum62pY0k`
za+gHqdvN+P7ucre?3X{hWp>pDwp%4Hn=<M)3h*kf4e&kOqwE^>qO9k?zKC$G!Q3F>
zqXOaG&Y>1L+)as9-4f~-8csj_xT)(xpWD0xrrx?Hp1OzDOf$%hJ;%0jmQ9z%?pa5e
zueRwWJ8`nVo$`8@abJ{V?Do*luNcFdmn)=wbpDfV8vQ%UF6~yYx!jT65<7Y>`Ssk)
zyz8O%*yXsYqV9*yo3_tRnUNQl+I(Ya-(vH{$FKccp=j~YfNkcM;82crrUko~@2|L4
zXuh@Ev`0j3M&;fcbB?cFQ?_o(kv7(zT}{6PxIan$i@DpnJ%K5iW5u%fy@w)pYAspq
zvAghNR8Z1mM^l|;+3$8n|E?8L$Q8D;dw;kn&Sl98j@1&zn>YP>+UJz^%>3%IJ&vW%
zw%)E5{+t><Po}41;_CE<`5$e5yB|u5KN-WB_wWdJhdTeka|ud$9@<U;VFwpqJ=|T=
zr?uVuv{S>3;x-M{n@t%njXvmB-8viozu~FaW9FdH?|NKCdYew9Ts!_jr_<GD;R4<X
zw>GMY78VOyx1D;iBv<RD!W}~%xhWBEKDmWn+80;1tmVGegWK<(C^5a-y*Zz=nNeJ{
zZ^PXC{0FyAZ?3s@X@?!p!#%&m%+4Opa8KLC_5a?xmr+^Q(&7rYcC6|PUN|RZ`~1mg
zKJD0NyPmD#NYsI^tY5a(J<$%_sibe1^6%@J9UFJ=c`<9-^3b>8ubTEx(i7a^ojsv?
z{dd6}i)>-`hX>|tXt?RpQ?Yf?Js}>?(ksmCvbS-UOS!Qqhb^lA{+S`v?p^uX{!c9u
zn#SHqoD&|I_*<qjoW5LXtG_F%=VG`ktE{@0;nL{D)svhK9_tjEx$gP$V691;W)wzR
zPR;ptq-fEfD3;Z)UVWB1nY5(sQPArki{}wLHv~q^J|XNMJa5_DgDZ5jGv}_?lC+ea
z$@9P|b!%zt^22ra-iLJ0HFZ<rx#_8A92Okxf2`q9m*=EDL*|^XhcZoD*QJ(Qy!rO~
z@87_psCi<#3G*3?jT8lE7={=~iiAx`x}Fe_yU*4_>*R(>vlNB3eLAm&e$GmLSmQNm
z>73J#J=JHvQ(b=e@68fLjn7=s*A_83hy-;cJiWU;{{PkbAOCIl{`~da{?AAGnt#le
z?<Q^%RQJ?;em8IXZTV%#m!G%$eRBT)&+-2*p8xma>EG3J(uFRh=-d5!;a|49a$dsg
z{lCxG|F-{s|9}4beLsK2|2?W-|8esCJ8zezM6EXo`JXkvNY!)MDV6!P-)`>z`859D
z)A|2DvCprY#&i5{VCmV)Tle3V|IXa>xx2mmcc$OF_@zhY2%kQ($J==67ni47K4c}$
z^9^Fw^nd>8-^V@ECY66q^qPO~e&iY#xqIuf75(LJXPBN8%|5+r&h!o;W8?Xs4VQ~}
zotUB(a74y7#@T>n-r6@OD}2*$xlFg)VQT#T?ISregT~;mDM{R#(l6%Z>ej7w-79cu
z`%TSUmc=$4*YY2nShb^Ro2NVD+J48S8`kUJ(pYfP_XpP#&%5iM9h*}jb&T7IDd!5a
z*X)HE5%C|?CKfE%SZE<yt@~Eide`!`ejk$SG(~>axCvx7crG>!TfsCpLt(-3D>s#1
zUp>9prCD`j;$)sM+q-)_)0VupxIL@ksoX-nZz;ybyrPXWmb&dSiV~94=oGkd#_6)9
zR&(azWZ6w&21|D4*shwYS!e5e+~9ZMjEZ#)f!uPp?A%Qm)fTN}WKCTCvoBR3K1ZzW
z_0{}^4zZPu{Cq3<xx*E0H_x9}!+ki={Zwhu`^9UmcCO#HJ>hP_|1TvAC5;QE{|8RK
zvHj)UWk;s>8ZEC2+w<_zv}M=od={oH|7^*jU2G_pC>mxO-jVt6VW-UN9l2TO9=v;_
z$!?-nvb}BBKmM4tENpANzs@!9>HJ=h%NhB<^8BRRFCM)X>l4Yd$~X9_5pKrzPs4pi
zWRIffoyN%(1rJ-QxBu(hkm%exan@JHLJ#j(x43rxF?=vBQ}3>2bbLyA!w=!e!^^Ax
zxwBv0wvof_=Y<UlQnK$>9GjH8`RLozf%|eUyQ!R0`z*O<?zS3>N7-LWdFNepEU4Gr
zdjG|niyhGn>t=r5;kc>FZ<?^*yVcVk7p>eCc;EiMW60a3^*p-IP2MkiT=@N^`ufQ}
z`+r>xkKY&TxU8>w^~xY=Y46wuuJBhz<&*y%2sxH({85AD?xUY~mxmbNF#Mpm*24PE
zA{#SqQ=jt2Zig9@Dl{xNu4xMmy_lyusq6Q{XZ4e%jtQ*McvW?9-@G-oMp|Br|1T;?
z67v>bs&gbtTdwier!8~j{i9lQ;*2Y=3*BWhYV=54%{KM$>5eCz0SWUrsZ3L;OgkC0
zWy8%)(-yJ$P0igK6{vmd<D?r`Y}Un#duE<mIYX?}Vy5tUSucLY8G@TNOG0G~&Q`{L
z+7&S)sl`(zb6cvDp?1RZb)p$YiJhU*cNc4mYLo=V`fz{y-u}1y@xOm7(rs<$m0sP|
zd9k3@c;W;fr*j8=l3KS2tlD=~-I9%ARfB+NK%1aOv%0gLw(c&wbyGgi`E*8d_g=M&
zQ>Ltlh`g4n`TFuIf7kFszrOst{{O@F=a-v(ifT&zmwOi7J6kQkrO-?B>D`(?e>Ttm
z^Mt+r!>@mfHB-AeO_yg{-Mjz)qWmA1Y3_l4`Tu{t{{P?ozxDTZqbmL{xBvZ-n=vri
zqnLH)td+C0m?tmsxBvI({-00!|99K}f68z7yJmWN*y+n>cnW9jiBPjW^EvU}+03Xb
z9m0iv3ugX&{P*Jq>wWcB|KFYc^v_3ILMP>s?$@M>`Z)jBT!-?!SSH9G6`pkO_U)x>
zG(v?V1)I<KEIL}EyY^U#^lE|Un!8=}mih?hrLH>VW4UyW%PKY=$0fNnCm4TUxA}FR
z!(pSL=E99(dp@Kkn!Wp%a4W#m?M19{81L2&!5@{2WJ(R9M3aioew)T2`L5#qN0X<=
zVxJWL3HmsHujGvbpWDy;ytX?me|oFsBe96JlmFNlZgV>+kk7JxkK=)zZ?CQR7W;Qk
zU($(<mbMCIOVqv;W?QHwhGp~Iv-<t3|M12cf|F*P$zPcD;>|Cs9nZ5StXlXoq_OQ@
zM~RZ0+iHW2-m~^JOEopN<vFpPaQ5y_k69Vjr24RP?csMFTHox%WZC=^r==OYsAll_
z3;i}T-Zp>jZ71b}VVr@FkD6OE<{mi1tbFwDuRMcq!55dtTs+Hgy`*ZoXz6h?t!|GQ
zhaNpxs+!ewR>6CvknGFoRxZ_7#`|Wh-dgWqc<ejV{q($`TI=3)F9wAxIi>eFRD?}6
zm0YL9u4hhd6Ee~GTjA$-b}b+4ItOdp*o9q3^rW>PY<lc|I&{UhnCyM8a`~^W47^kG
zx$da(0jZh#DuHT?B_*s2Z5UR@1$&;}pVu4NUYsk;Jm1=TzP9F0sh-{PX%`k>nZopm
zM|9#1PV<$O3_tWG7M~ZL-BVb0f3x3XQ(5`KhTmOlYY)~e%-H`cI_KI#={qWRH(p;5
zJ!ceE&+}%R_Mg8V1&<dx>@N(8_chHs^IktBRQy-<;^^$g=aResE)+WzJgfZkp80&o
z|Ni~^aq;!<^XvZvh1Y(6HrL?bQhmXzHFs@Uy8V9@O*!5<>-T*}TZ^{Y-)bXv-@5Uk
z;;o$UuL{N4o436#U|78+;<a$glcnk%XAeK*WYhi{ziN)&{F)ixmp}SHx5c%mYDv;%
z(cEuyO(m9Ab21sSPCPPGC3@BpRlcZ=Pe1OlnRj|o<F!xytT&wE{WO+cVEDBnR9=3E
zn8-V~6vK&A)Mu7e&N3D}w>;HSca_&X_hn(z7M;$@I&(ycxoTCYPLf+%K<L$^4)Oja
zYx8yQv8l$Ms*3YEskwBDaiU`X!)ZR6pY@(AJ2zfF<f<Iy!+R-VUyc~NI75_|WwfO<
z&!=BU-)%V08Fj75PIr0e)u^*eR?pHBK6>PIKql9MzS74Gwk`U%XC8i2xPk4~&$wz)
zL&1qtgicR+7CWn4Q{6!K^{0xsd-MI@`}y}T)mgrL`TDx=-|PSG54`v^eSbx1%|XrJ
z>;p4|{eQpAvzISA#V7dvS9imrSw;Uo^6&p~{eRNQIsaeD*L|P;|Hu4)@m{<By{`Y)
zJ^!x1Wutbv0K>IS^ZdKlS{43#(|!K`r~SXT*MCg^|LStT-ECjvMvtIRA1g#<IhNPX
z>%W{h{h507%*oZ9@pA9$za8bbtN(iP@$cpP{o?=Mz0RM1zB}v#o8bD?+TRsbHu`Qi
zd=D9I?09lSiED-Jp2+Tr6Si#Znk*l9t<+NApkhjB#3q+zHY>##luphs&~^2_`S5!s
z(?+c;Hx4}GOtZbYRrh_i<hL~^3X*?FUN|1sXmDH7;QPO+Z3mz8G9<p3p0{Go{OI)J
z3yvPMR_ONCbH6ARH8HtcS9&JQZtG>6S@$0_O+C!3YSy4PWBb9q&JXy0$p<~3F57ZA
zc1q4d&sW*^I*xvvI!}GWZ0EJ?%ZyFerB~<&%;gX<k#%sY+0?i!_Ju?A%9EzQx7^ye
z)cxRw(DG(MVTGW;l?J@N`%bw2GR)XxtFi3j8Mi4j+}BJFUp;%H5WmLxk5zByWt5n_
z4|{z<N6XY>^)V*Fj*vN$`wwdTe8$e;nbct0-@5A>vr=zxhxEDQw{rM6&*`$+-CJrE
zv$>*==VM;%RF+2jNgWfMpP#(Zuwnbb9S3FKMIDq{%)irJBcv=!`YgYqV51lJrs>A+
zzaOmm$*7RH-BXEa#wyPs&DO}7Q#~UNrlh25AK(A;B(Lse-|(c%cg#OZ3A4R8!O*;_
z?*Bv1M~Q#-DsE%BQ$K@q(JWy{q34mT{e=;SQ;YkyPK^)Uts>@M#~^UB(1GWI*^Gl{
zKTWXaOJzQ?XWISm$9<zT+tw~)RdUi4<XE+!yKrIdes-JtSr5M1=lA9Eg=pMgupm%v
zSMH*{H=p;4h5mc+<Ic8T$8G`X5{qVs=VAhz`~Up@|Hn^8P49*MUDdF^CSUgKdf`>z
zFJ2g}>FuR?zVhDFzlrrbtncmJvA@3X`=h6KkFW2Wzt_rUZ}p$`^^e(vvZjUgc&f!7
zJCytJSdHq*_a0iGCCpZBTP1fSTE0)J+TcaL0BhBiUmw>-{JnG4rD%TX<_W6{J9<3M
z&Uuh{mM?LE%Dqy9HD0v~**t}elRXU=J*%^_z1?+8im~uDchED?J-9Pt4(+pB_q=_w
z9;@z*kf?Ibyy+`vg`Dr1Up7hW$!a;bxQgYT%Pi0C?(8^SsoNd(gkjU#uyfgw;r<UE
z9ocv=Lrg>LA<u!3t!w<2ro5KEb}oczqT~5pPc9ct?<+rUlJ4o*y=nQ<O$BL|0bS2d
zN;m{^95^z0OM!6vj9<~~yf+Fdh$WrVT&nq0Q}_8%pWstUoCRCU9K9U^SLZ8+XM`rd
z6cSVwoWTG8BnPX-hK?sXH$Cr5Mow88kmS>2`SkC<1MY@Siq$sr<xYQ!xVHUuRo(Z0
z@Bcm9|EA*C$>sJnwdDm{Cw8caX-qdhac7A)%jfA66T`fWZp)VK|8cWF-LL)Shy8!<
zuK)jd|L^(t_WzaoRR8Pp{C{tb+Hhy5UEU<MLGS68l35o2A1$x{vHss5{eM5_|M}b>
zS8MZJ^HJJT%UehEXX*Sjc>liq`0l$OIsLT-J^C{L{aNf^_vdB${JZ<F{S7kNU-|3m
z;Z0K<K5g_cd+~eu=iirC9?&REOY{0{BKxh{bB)jG+28$-YJK;S;#;=n^)8)<l`BIV
zJWf?=Efr|m%Bb{p36GlK$;>8CMtyC~iH4Frtp|Bq3{F^Wmzr9Z*#Bhfg^!=zR4*+x
zs99?2y8DLSacNtx4>uJidPfJ!3KsKoEzrB)J>#CBzQ^ooi#ZKeIDXpc{lL1qYR1Z|
zmo`gh?TmM>?Y^zBPx_|G_lC&tf3`h|WnuD^+9~9dWxH96(>lE=vv{lP-e$kO6E*ld
zU9whj^tK(`HLHL9!9xs^&AStJWZPRLox8)3m16iYcSUZ|8=of~k?ii*<jVtCOafh8
zmVfJ6J!?hz#!dMzHI?`7y!rdq7OBNvC;ONGIJmlMmLg9~oJ{UcMnNx?jW^BKuAa<z
zOE5oWfjIlQPSY)SK7<r2uk<jTW|4k*jn{MYYN2OqD@zW?mKw@0p5@yr>*0BLUGC-R
zUa4OkcOFQox+T}1pR4tHeVXW#>!o7b?^NZzif>jGWOfPhnK`+GOM7$IjZ>}H4oXUX
zaNhm1^Vu|(y|$_XCuTi(W%BCzHQ7$@Zl%wyle*WLKChW<Bh<6HVs2FuJ2$tLdY|_8
zEC=tf4pn1SA))x#h~kR43O(~}i+Dv}ZS!i^JE?YWcf~r%PL~%9hAu~<?w{aw^16QW
z!F3azTRbldKmN11zVzdT^-s82|17<==l`uT2N&}PGs6GO&7ONkZ35SVY|eifi`E{R
z>b>>+Gk&8Q_nZBUsk`=G{53syy&dn#`-amD#QZdUi|0LmS#aYm|L?<h{}q&5+>?DD
zzi&@<VSRmlP33gEx`X*U-=v=tN-Vm%(7vWC>t161<>UNUp6z+#-ElB?6Q|tjx^)v6
zR#u+9@UN(8DewB?JJS-DJ&{xqa&uEM)n3al@hZ8{z_s;0w>p#LmFIU>onla*sPc8G
zv8U#x1#P|auKcNtFYCG<)cM(`^3hKN&Ti4GS5K{6<G*^X+p&3%4~8l|4t6}?65(;?
zbN`t~pB3K-^{h~i30F9yq<K1Te(<8tmp9D`kL68}*x1;0Gm0U(u+{9O$|Tk$0zVeD
z-eFZaS9&gQW`_L^(bq?N4~l6^p3b}}-(Pv|@|4q&3mw?3KijEWp4qc*kKUVU`;(@y
zbjpYdxHkXakhUa6<N5b<#oPY=y}M<0xymb*`BOe`G5@+#IWux^f?(nzk&H}fi`}I$
z(WOjw{YSbMRq8be1)n>kS=^@+E+Be1(s#o1J+nU-daZvSC%xFgKkNOu)65<B=I{Tt
z{QjPM=U+Z`yc*!3{qp9T4dsz(tv*{tqQf%`ubw$kd2g?e{l8}UGrPCU=B@vFz5ajr
z|A+M(zI?g=U;h8m`Tss1Kd#dtFg@vXW#Z%ce_t&B|0@3P>G}Wv^uK=}w`5Ck=&te^
z2M!!>Ryp^*U3~rf`EmBPwt{lgMOF0szsJ{ndAa(x`0h+4&-L;1-j`>J#hI*`a<6s%
z^-s?peq2*#u4?2PnKrR<`i!`_TW$1K+s*NR)jIXF1xLfwq@^pg9&DWS$VgSta{l?y
z1wwVU`{gfB>3q89)Z?TXWs`c>>_}g|d5iPX?(93htqZQ-;Y>4);n7-@lPomx^puYi
zv?dz}*Y^MV`%NId+LHfM?MkamIc3k|)l=MkU%Iu%dY@C67gH{{SY>6OegzBvmrT{~
ztZomwS1xr)d@TOuz{80gH!s{*VVE7&Q`gzGD&!pBMOB+4NB{cF(bD%hD>20-#P`PW
zOAq9P?mTnc`YOD`!lZ5X!_vh`Yl|dI%!~IQuiPx^=D+>9RJx&+)`c(o&n#A*&OUkN
zc7Bdi{^IIa=B*Z8`gK*gOVo=Eb1$fw#!o!Za%57?idX&}udNsUdnwS*`&ukvoi{^h
zN*eQ%OG^8uPS2?<>7DT6?leQrs&j!ykKJEU%3jErZ=OC;<L>Rpijxz0bay@Vcz4&V
z-^PW{$n}F}#R9|D7Hb3RzHL3*CNg-k35#tzvc|P1(zQfa^~`bC8KGBu&aA20^VV&l
z#h)8zyB^*4$h~<sQRwG8rUH>S1qtjk%x3Lg)+@hAZ5H3j1X)v-B&FV`3ob63)Rh?1
zy-=Y3m6LSpsxzHQ=5mpOHZKJ9{jR?=-!Z4_b7@4xo1~cEJ}Ml&Z`L~gP$-puVD<I!
z^{?7*zrJLwIU+B$iD!MzmDfML4jVoAw|=A7?37ph*S?1zt$q8uebYauCC<(>a<kcH
zhZTm%KZ<<u@7wl-;8TyE{WUqIkm>Vo@|2)SbCze$dv<rv{r7#x4}W}Du*X{F9slmX
zdG$6j@ptauy(`~;e79}fovBa%26LUCZeG0SX=ESMzlGY5v)R{wKXo81X3NU=*7Nks
zSXaee2-bdg``ZFn4~wVA&Hw(plkIcabM5LY7o-=jTFLXjT1tG|-jfClr-WW}wkj3g
z*8Tq6-(ya^(ixvstu4!J&ld~1MNa#=XPtkx<!n)7Q!}~gmVI@?i4zJO{P!Gh-}$mg
z=>0k;@z;H-52pmrYiXVK`Q^vEC&S98Nl%_2d2NHKmQO^=q&?np)7+ap)CIjdRVS*P
z+$||!tIzWEh?2}s-AQ*YGfb%5!JzYei}kx{!J65PX~N3pA%2=~)7P**Pg)h?5$QTZ
z{b7XdZehbON5gy^)9biyUp=j2`Su;Z%A(bc3YLP_1}iQ4G-vU~=<mz24k;|j-TF0U
z*Nn9aOO6DqO`aK{ad1Y@$q4b{XY;Qt=&haiUHHEL@kvIX{(L!mJAQe6Ox*qH%k%x-
z`$v9mnRD<^QlihBnvxAO(l>K_`+i*h&xgx%GK6OR`)vQ`YyF@2|LgPb)qMW?|DS&S
z&*k>NzI^Tbyy?Zu%)gI5%h!Fox!nHWL;m-C<(5w!XWw;wV%RUJ=;ZhL_3v<bTibon
z=Xb3-@#p61e*4<57v~rWb}rfd_utdP?XxdW7Bd$5xNIVCh1L3WQ?K1B9tusW&mNzd
zzi-#?Pk#^JzFefm<+0}C)T;|puLjkWU-MI%{%C3t6Pq!Eo~OogmPy;+UtfOp-v|G?
zy6-{tUvKZP`~O3CLmb<DzJ)Ww#jejNXUuuHVqZ)1-7AyarfBSVy<P9&HQth)Kijt#
zNSdfL@Ee~}Q+OA>qtRibq$J<;DBt)+7k8a#mhUkSzB*C%$t^Z}3HPth6OyEVwNLag
zp5VEXTiJ3_fc6@Wv{%i$&aL3rsgIq0TqJ(SCU%*{DiiZn53%jq^<Qh_zPF4V21at;
zU(d!#x^yn-J?ygNZ}lV<7NuratrtqNOU~*|FFK&_y<GXw#Jt&zvlP<4|NZ^oSKzmu
zdi;{h4o{nt{fWt%!K}_%iD_!noEJt?Tb||^Zxj%*jTAoUT)y7&phf5BgDc#tzuJ`V
zQ+OiSwlnGv-^aeCOPAhaJ;=X-BbFn_zS<`Gud?;+eFqsj%s0RF-YU&~|Ht#Q3xd8r
z`I|32Pb2Kz8|n5bLaRQoF5~&UaDqotV7``i{p3h4aj|WSj=d@UcVO=RgYhpFCIx;n
z`;#wyW|J+~w<h&XYbWfieb@Hlju_M3<_}IUuO<Wtes)$!WSqE7YM<^&nRfeWCtc<o
z`79%;)WB)3@vJ~7_xRT{_nmv{?`|+utWo7)E%{-4YtdBp9Pw3Ia%ZN$i0nzukTc$C
zmVL#JQL6aewqG(*n?L96O3wco)~@t*nws2fyLGeYCy8IvuG`nsdH3JV9Sc&D4hJvR
zcCpWk%*pTj*8V{6-HFWYiRv?#nfPDMvzPB{e_U8^A=7vK@V^cAGT(k5uB*TEmhbo7
zj~lFI_>VufdEd9$?{cu0Qt5@e5so`D?(Vv`&XD17PuTSrv$gNMQoQti-vI{WKHfFT
z?|hv_F9feWbUpZK+q%;!Y!jQ-aBf+<%(I_2E7WY;VR1IGWxhPKE3)!yEUd3Py0++@
z<6}P3G0C8K(Hsw>^X#5J%iEWK&5ZR+wG|af;t&ph<GhFOj!a}-t{C^V-3FW$N%LAJ
zHLE@LnXGcz?*033zaPtOxcs=%cJ|9R-j<2iwoWO&=z8|;vCh;<ChB5AOOGu}P!n6?
z`6*9{F*qtAre(_%6}uacISP6r_i2A!e%D0)S!KP}>&Q}3^(d~(Q&Q|tFIsklbs3x4
z8n-o>T<LD2GE#ybyl$s<Eh(NgDdptlInTB{zgtxF#IVRq$@oOr)~qXUr@fQfzgd!T
zOXlSL{;XjgSHrY}T;i78-TS1oSVuqR>(*Ua@<*q%Y*A44RN`a{uKoSt?9V?xcHcMj
zdwMbMZT#Bx-}U+3AGW;UI8pDgVS;Gete;WJ%MbsXeBSO~@g^P1|LyyKp8x+N{{Q{|
zk2l@=UH|#?{vVGDzQ21Le6BtI&m(`kzi%d2&RTM6ah~hnEqqQhJ|)`ivD^3K+rzuR
zcHEJTule|D@$b6_Gf$p*xnaJ2Rb_q6=Wl<5a;%Llr*D$bIG=PZ>)iS?!uQ`Uzh2(H
zyWGG3_2pA+n+}{`IkhxYSNb=9@jN5LtH0t_ta>K3$f@_7>plBR_y67vxBqjp(J=Jb
zg4I`r%UjpwHmwg(U*{{vlg2ge&xVxytFGif6u6}#9?kTlZD-)indb{H@3>tyTbTWh
zpp}I8f9dR0xhY$0mXw)g{A~MovEkqio7cHz5&EutjPDPeNKiDB3q7toRe1L6UAOYB
z^LMS~Zs9z1jd>4K&jFsa<YPB9LUlV`?1g>_o4u8D6yKQ3=pw#droSuwi>c?)<`yBD
z-rzg#KffODT)sjpI-=#qjM$lH<-Vo0&GT2;s^oSfYfaMQ_7%Y_mzQ;~-59D;{Pj;j
zLvcdVoZ7hLbv8#h-mO_&kle<hUF~pW$3707UFR80ujJ*j#qIaJIYrso^w`_a%#D)a
z4a@3hSP8FK=os_yiF*5PafvxvA6Vu*dX(NX>s=w6jZgc|-En5W?&zJD|Fmf5qq;@D
z#aiL<j2m1S7|+ZQJ*Fn2nXPz#`j7gR9Y*f0>r+x%(!&aO%(-mFl&!GX_IS>Q{~@;z
zuXR~^uD$za{p>rR4?KP=u|n+pn_ruvG^%Bjor0X5QZKxf{&~_<%=qN28Qy1{=H0dn
zXfwaEL7HRkYp4A;*JgC=7Zu~xR11GA9M9>iV{uq-m)hgD(>dE>r6w5F%6Ghqk=nnR
zUpM-8{u$--&zQV6&+TsBwtsnQU+wc(VmGAbtkCCLZ}M*XdBGotbWZQRpA#j;G;g+M
zZ&&@krQ6=~+IJRsO)8%Ce9!&&eBc&FdAWrg|MABU3oLv-o1FdpZo}=j<+~pz+P!&p
z_wF9+C7H99w=P|xa;b^4rLl0z*1d5%j8wIrtf{(@vRP@7K}~e+!Bs}bdrqwCUbi$P
zvbMNJG-`b?<77{f??>JkSDQ+-1u(@Nw5XVU^xR!Xe<z)H`fJjAOcqRQUc|D)B}4Fp
zN1#HBrspZk^hBZ9(%Kk*{^vzuy2m<Lp56L(<l(F$uQ@UclU;pH&BC0fv<O){bWBdq
z@tn0;<)-ZS_TP{9%)kCQN^Az_Ay;<hNJDFJZMSE;_UTx<r(ZfP)7k4c@6(gQ4F<uJ
zm{L8tH}`+2Zs$}EzLexLrM%Ma%=61PzxN0R&4@_za(8G<(pWbmJh<RorB|lcnVh35
znwk^SR!?1CG)Y+Ga-zNbx65<NE!RE|4Nn&H<eXI+>uV~)s<uoj+2FO-`FTqVN`+S6
zd|VVY{i}`q>6#5Q->0UDv2`Bu==ER>YEyr|><E+RkwYOK)1Q9vP|}*v-!CuQ|EcKS
z^yQy-W!zKs(|mHG?|WqJ_ZA(`6m|B>pOwEZzchGX*8cy?_Ww_}@BbP9U;e*;mCfIW
z{{KI0@Rr|O_w%Ou{5Q*18cn>-^!va6&G~kolT;$-N!X_ehFafIR(ik6E-fuRE#1-k
zfF9%j|LF}JF1r^TO=z5#oWAV!sWoR-ojTJ|U~U|mm{yY@7#LXG|K`=JH*Z$F@!&5v
z4i>b#_uF=lljRCwccGL0%a5Ozx7*x$?s)t5<Lx?@Mm1-3Irmo=*V)L{)$E!k&}`T`
z_t(!|cK5p`J&`)4^zMjD<)X(A3+&|V?u5m5PrZ8LX4lMVrB@f-b13+@q9esvf2qnu
zi6t{L%>$ZtitLRQFaPu9a{HrtJKGQ&t((^l=DhxA?Hy+Cc;Q)W!|J_U!i#lH#FQBg
z_BC~E*{~?w!;*L7|C4%&><eBR`vtH^_r6#YKd-M<QG82l<nI~Jc5}XyUS8|%^yPQ>
z@!ajwE0Qlr+<5&$@?Puo{E|z)9q)TTZ#Pt#y`Vli!|nZpbGa@-!DkN2zR^7+B3pap
z(UqRnhL6(%&hYfj{QFIfZ(o?nY{hA-Z`}QVqpii-T!~kuZ{9^|4UgA7y}1{+n0nv)
z6FplyAz{{KzVjjehjPz^Oyqxi%rID~V#VP{g%2FA2JbPI7Sdg`hNI`(-K(>#OiW*-
z@Z7IxuKn6FH>^CMJwI{jgKHBScll?miCDFMmsWgsmuo<9a#LLMrybVmndy=DZ3A@k
zN_51wvop^w+F!cys*Y9Jy|*zA(kVxdPd(T$Q_1HBlSKCP$Ht2jVmT+S@0rmQZIb)@
znC!n<>YKN9Jd0)N7r*pTZlO(<t(4a6LUZL6PrmK?`_g*O#E6@6iK`z-xZ5sOwp_OT
zcr5SJyNQDG2WPOisaq>3-OyBj$NK&Eyw|O&CvU4S%@>TWvS_`#)73J^(%-ar!_of!
z&b@^zZ}=Svyq+TQ{>9l+4{3#B;|mr20r$*2rj%WowO8nv{oMH5fg1BCCH$Hja4hr4
z{uys=m+y1_{)XkB-{b!!%HK}M9jgBrZMuJdP|acH86J~VDkmLIob~)=h0Pp!zvGub
z{wc7M<NusEYk8*Cnad_iHGMxPTFE_otSJ9%$~n!mC*~MQ%*+f)Wcw}YocDQ|(u>be
ztq#54{p_s&rG~A#Q&KPcZwtO;wP`i)>)IQICGpZnKTS={?Ox-Tm2x-t(fZAcYJRZp
zvge!4uX!oZEY7X_+{2D(Igi!S1eP$JykL;{reS(%?0WO)+`TLHmaX0?B^cdek?>f-
zKxL!B#+}oHoLC)sk}8g*Ty5HVj$fs4o^kp-zt1HW20PZ6hi86Q-z9x0Y;{-6N~`p2
zTVWNqb({j_+pf)OR<yL-smGSaUde6MtYCC^?T&S;(lnR<v{`>TH?m{1fx5a{xYq;b
z&RMH(rY;OxF{5*H^Jf#;bfx%p+|uFu=2=SKJR27OCG3;uS(%y>#q%P%cC4&cJm)ca
zitFmirG6VOtZUcv%vkd3U84N2n$n>7>!;h7PB|LO=v~t!Ai1JpPE6v&Tc=gd&GG1%
z*&-WsaS4mhq~du$w*7m$Ti)mE+voY`mp7|y2><^->eSmJ)vub*=LosHIP(6}7yG$>
z@9){v{pA0D{Qqxr`#-<`KmY&fZh8Gb`Txi3zaKS^_nMWp@R+nx?G2X%mzmXXO`g_N
z{qgGM=KK87<Seg0kNV`6&xLmK&r}wKavl*-+Hv}8n4L|mIIqhxv#;tZiC#YqmOh?x
zZPU7q){B#NWoe$hx$osp*CS_qdyDTqIa|tf(zB%_q_^YKl*q8|g?g_p_Ma`k5q>-N
zg58l9?-nqX3yal1mAV}i_rdAf{*CM6p4|F>d3Dy78*Ajw-8@imVDAU{^;y>D`xoZ(
zm-2E*p7`?dzw!R&x*Yk`BL&}=)}P-fyxM$@U+ATX?jYGI6RJ(lc+|_Yd|&uQf#q_G
z^V3VJ59B^^6%}pclF4nh`Xb{ja6Bh}b8qjmCJotb$=9Pc_N2|pIk~ZL`j&8`XRiZ4
z+)7Yj)oA<v{n4$$70t)GR~o(9wKy;1#yXY4H+Pbo?b0}8yB&9gm*lMQT+Ot9vwFvt
z_wO0md|2MOcvW4`y_UJbxIOAJ$3@4xKW%EBi+tVfnLc$(I8TY|-S=TH8MQa9y7Vcr
zH~F2=>Mv2NbC)LCcEtUjkv!#ag27?~-Z^u&EjM4~8D|@}HfW~Q!c~lHI(F_Gq;{Xq
z3A`zO?M4RknJBMBH_bB%GiEt0{#^BOLjSyJxpv&ynygzps~R(Ii8-8Q*;u>iTS%xB
z@AexKpLbuXl+ZnV^6nG~k*U?SlH2tAtlOPq8LGBi)@z8oEd8g+zEpbUkGM0Je+%%O
zm1^7;Wj5>5qIY?Td)G#<SDdwrH{V?9&<dr`*7oNcK4+Jl%scV&z}-L>R~L_Mtzr8E
z*4IZp`<H3F`n^kZwavMC^M$ikg%|v&J@~viIP1rg+&}g8D?Mx8{+3)E+J3i7c&CB)
zsY<W*$2B&xe#;NPEU}Q`Z-4x-#!j}sy>i~CDa$pN&slbO&N9vOpGz!!KPx`9^i}sz
z3rJ!&5}Y|r%J=2hs@GF`6ODwEHgbit<lCn#7F+N>!8!6p@0&aI7jJxve*VPv*TPT7
z4{d0f#dZ3Hi)=P$gnsmm@NM?Zd*1d{vE{4Qao=WMvTv!|>`4=Do>VH5Ht?HLES4C1
zYS)_7*t;EF;Ze0$+HNrZcU4*L@+?h?rKDV`t@XsRht1|cGLlpZoKD%us(EhG+jZwm
z?``%22L;3zUk$sv;=<LtSy5d(ja#IC+?#sp?6svcg03!8YujMVQFu(ag<+FZURtY?
zvG(&h{hw>BXH7TwJ!$h~=LvnxqA!wGX^YN~lE^&w?v!@VLC-TCdmsJJ4b)xb7UKDT
z-liv(ucNxeIhM|xd!y^<g`G)<B~mi#G;<s##l1AC^fGT*z4B*jY4q7s6HOl8c%h&2
zhD$6lO?Oqyj8<2b&Is;h>>hK@gkPOMy{LAVO?^ex_oKVFzrS}z*!sj3?~Au{ucqqm
zl<7Neqk7Waa^BM~Kl1PGvEKjpjluu+|4;4zOt1gb|6jqf{-62(M{lhK8?PLck4rlI
zYsrqgQ&bu}>V7j6CQj4zy`(e$-H{20ODev9s5oO^x96UjyJSk_*PpxYg;md3lGuA>
zi`;*wx_Orvmv4RXbem25`s<&Tyk07m&nDU6QDnZA=^XD~p-Tm)l{$kXwq7q?8nf)o
zq!Zp(WG=l~uRF~qEuj3w;dw`1luy`Dz3lAWt-B_&oGjn)dbjF5vv&sz{P%=y{%|4L
z@M9m#rK@bVnXCQh{+h!PJNcLnuV4Q@k3H76?#8e1`|HY==s)YD*~xoM3dgxun|pt>
z2tGbLul#)8xmz#pgffNv&*p!aa$p79lY`kk8NG}88B)Xq?({^Ty;r^E?!$2Bz_#0Y
z-ZHlvW(JmO`OMIK@L|u^J-pkb-(O^7kehPv&f=+Vy=lz`lbW-xl*@hye_6X{osV+r
z%Khom9UB*J+<vw8vX+416&)VVm~iuLU*8$dGqO-v*z<92i+FEI%f6XRnO}k=uJPW;
zYO`ChzdEc*pHnIHxq<WL3#nIUPdhtDP`Sp1+4J2UzutY;Ij(u(vU_)A{`NaId&=7A
zds=4urWNLhnQ$vlvYY>@aPK{%4JWjJG|Czz3zem6>%>fnj1V#|p19-c4iPr-AO^3c
z(qVB4Ch>`7KR+yfS7)(5nuX`c$sgaOm^i-o#od`3@UHIri{H~UMHlcTv6x@m=AgN$
z@YUTFGtc(lZV_GovGAS0*`5C^z6~D_wVyrHHt(}cqV5yDcP&;QFPWXZpTKyaViDg?
z$qcs(3l4sro1I<zt!au}&Rb>y@pn_+y$zc`=lK4(*ZYeW{?I=f&ikR-{5}88?0{2l
zJ8!w)Y&sUlIA7$>{hg84IhW7u@XPdcS*Cfm{qV;Ui@E*BAJ<s;9KW1scjmLr+;hhd
zKQwqcXW5kHE3bY3Jm>jgllejOK9{J>J8<)%ANPT?;q5a{zS8qcdhKOtG$SjmwO=t+
z`R#J<qBT)ztKTl4_Htv7yqWUbyvD83r&PJ(`P}~8kl0f7tu^f3(sip<pI^1C>Zmy(
zy_<XA%`3Nh_Nxh-&Xb(e8Yv*uH|2mp?5Vdux5eZ?J2vM*VtJ40G}oDO+2x5UVn$l0
zrKH|_q_-FtA64FWcJmC2jXsB%Hf(m7G~?nft*>VS`BWxdc$KRAwfTgCqS!r^mty5!
ze@|a2x%hS0tZVsGm0yQfIjm=W*v{%_u=VSvHSMXhme1Mj(>bqu>LZo{wvNdzo@dU)
z#dvSbzL*qowri!-j-M7YEIr?TKC5AN%Wv13)50fet^6)0`UP{f%sOu*TNK$PdR_JS
z4TtMVuflwMSFYdO<J59fV)4co_j{CIl||;iTax$mLBHl~71No@{}|S8PHya2@<eUo
zMX58^;jLO-vo39YQMA*p_Q#HYXU*elK0Y&0?>3j+Gs8B1-I1OtE<#UTBoBQ$dOiMM
z{6BWn<^O-%|5^V36aW9jx{C5SPvZalT0PmJ?Q8n!?UjPNHfiSkG?0A~*rDNht3}vF
z;nXC#{^jkDe;$9nDElts<jb3;JwKhR`^x0&W3~x5+L`O-Jo$Y7p0DNqpUdU{e>lE;
zw%)dTKHW2CWxAVmbgardDm!uJ%qX3{nKyaf1?b+4Y{}kuV=L>ugZcZ<Xm9q|@X~B*
z_(|sEr}rv$z9_sNThJeyvWWlk@3~7~Y(1iV{~NchS?cXgJx-r|m2cLp_<DY2!=Jzp
z8{_rA)L%*7y1lS!uc?*9#yal6y>BD>bv-NgWY1RR@s$$%l(gmWRMje-?XqUp#|)a6
z&2Uta3GrP$*S|z8bi3*y7d;+<+23#6UU=-ogkK3eJ{^}zmY8m;WXy4f?O|fVlPBA5
zTAJT2Xui~(eXxG{<^vOy*g~WudL-J8^|U={+`VuDe{%7AhP?}C7#cV52ngP1ak=(n
z9#4^=8n1`RZsFN&QB&Vl)?JBxn&Id*)#>rycT&qX-`Lo^?HRkm{VPtcrAN}jzY6&T
zu2)~Zw@J_<!wz&rU-SD%ZXdOLwrS5d2q_osyeiB&OXBX_GaVNtblcV?Ih(Hu$xHE0
z=6-J<aU&wzQ9mY3PpICLQA~qz$*K)f*K8ElzUy8aryaIF{(+vve5-V~$nNmY=#sT>
zVvTnRC*{vz+11S_)y5}w@=VP7lKvBu=L)QL|8|XM$2Btv3E#heuir|K4}9@v)x8Z;
z-RAS&ugL4>&5u9+bHRgp&gwV9zkVs*aAUZaGvoI8ms_6N^1Mt|Q1@DERnzM5KS=aH
zch}tO|Bl{0@bBHO%9FPw3%*Kd>M^TJL`zEVbC#dBcK(lFe!DBfPuwf^Q?faK`Jsh;
z|L2!Av-pc&6!W!L&U=11ao+RHSwZJ4{bx>oa(K=&P5EawXD6xq3462oA35o`AVmI6
z$2%3iJ{x29?c4t^m>8oi=<xT|>BQMr?9Moc&U$#)dbessE7Jz+h*wHm+-4qHT<x&;
zPpP;2ePyFr6=&niTd)7*^OnDQTxas@5BJzN840$ZQY~Xy_|yLk=cJ}ZiFvP!Vy{2n
z6q6U_b;qr)-zs;5c3Prhv94?Q+e@up?CS$}v!tEVvGkl~vb4lwj)LbWo7ub8t)6Bf
zC><sK#%<e_uiIwKG~HOD8@jqmU(epKI9$Cs^USlr&)Ufa0`i)<um5&16svFDH0AbZ
z-Sd{O>Xh_P3*8PEdnUo~REOP5{p!{L!^^C#*RmPoQ<`Rl$_s`k3mnjSa70C6>(k1s
zNog$44rk8eWB+urYwh->I<EPvENAYTT|8-l(9?tO-@V%6aQ)G(KZ4998@8?4u=?JY
zMcuF87HG|ouiVtMvP)6+j1s5PQV&l}RY5^9wKStN;|xJ#{nXfNMY|_z`Plz|73-&Y
z{`iqOLHpv?Wi46ds;g!c=Hq&=?&r7vf9ijkEByMt|NrIu|A*`U)EmV9%m06Q|G%%{
z`ToIfI<GILC0t2epUV^TQ?4<6?lMk^>*ZU6gq|HXk^j`9)zT7qMOvnKhS>E>(O&}Q
zN@UcQpSoZ3^XT;c;5hqx`~EXuW6ks1rg>~}Q~27%#8AsX(WQ>t@+!-or7cz1_*KP;
zEjeMs!(as~b>m(SRnyX6PuJYsD|c!2lQjzsj}>pfdn!8QimY_rg<l~}r_{Ex?<u@l
zep9ZQZU5bnSj*eqPb#zx{dUIv7K~c;-fi2uhxc>sx_4h$B=zEU4EH^qH;b88zPykf
zpsR6YH}j*-m#V7!C7T|!m8AzTi&<>@pi>kfe=NMbV9!CRq~jL9auqiAocgxHdr9P<
zw&<hGn;%~_-M6>U=7N6pX)&%`>EsK}UQ*s{>sH?Dy(hsqXX@GeddrVJkx3C*AF=Fv
z+3tPuOu6#{%hkVn{mcpKJFuj4ve|Z)8SOI`)Nb6!UUe<AHZa-Uf7y!n4_5FypIu*j
z(BaFE6E5>3FKXv<WWF;n-?3_IUQ6^<(R<})WoG>xljTlzU5}UCvPQ$?u|sKwIb-IM
z^_zvyNT(m>coWn4KU%8rZ_tYF^&e)I2Gy<D@JjUfnM0Z<cNvMZ9nj`tC|I&7!$4%i
z+b>oEIdclFd8bbQJK5mqy3?EIW}G@cXZbo4?&k{E88*!mYJ6Lo-LLl5uwd`D&h4pN
zzIU28CM3_`i#{7t=i2z6``UuIIRUd}S&BSoF4Zz`yJFMBbtCs-N%@3deIB>^Z7R;H
z-FXmy`+vDHM|<7sRTJ90q&><5H|c+$qo4kve23KkFHh>0ANwen|Ni@w_1j7$oQvEG
zZYUgiufn!&>zC{!+jjT0Ms4vDI%j#dm`~!|M+^DB&ovfL<^(^>Jk@^K<m_devz6y8
zuP3ScT0Wh#Eb@|0<usF}CYw{*1%zhII5PFjTDGPnceB0Kn`Zym$6CeD<X6m{5$d1S
zndlOFn|*gf*1V(d<npGc%J3a|bLQN_yZJ1q)3WN$+M2KHVA~vW#FIOz>t5LI&_@q5
zgnwV~ITYzp^g~W~=WJ2+=}$D-73Qz0*!k*hrS5f(FD}ZpSLN08<kl67v`t?7Ds7io
z@M^<*hXgDRKajZ}G09}9jeq7jMThBUDrYULjLXZ8JgX_l!gO6KB+K>s?X`FERxO<-
z*m$5st~1n8a&593^UjcX(a5>Vp)3h|r!Xyg&t?1~>-AQh?XE#gjxG&B`JT&Wd}<Mm
z?yJ-?4tX1ySz4>tJyC{5h`F+1&P1h<?$*Wow<hgdwQ_@ke@^is6#+9-<Exv*bXSFm
zm6l$fww%Y|kXGPHQJti0MfLKRk960uu4`RY`tj;I|E#4KR;I*H<n!~Kd)Bhexl=Jm
zE$?J-lK0oITe^O=NvxbXt$&&3$C4TM_f^kXw)lzVQ|EKjgTjoX^1D|?UOfHzbJb3j
z?c3w)|Jl#y>%af!!~dW9`+tA4|LeZ{?wkL=?Emks|E+(2Uiy|~+1n0fm*etsEO~ZX
z>o33Y_mBF^Zw5uoI}H}<O{omFDyS%4%=+oF&0g!equXEqe!uVUd8Iqb1qTZFwq4ky
zmk~H6*6NId5Tj%FO0ABFN2@M<leL^^#J%lClbQH^YmeUyb`tU}@r$0`iM%JhtVY7@
zp7J;QjVuz|Z|X{OKHifvV{86W>)!w3yI<V=wQM7Q{%vmUb^WvM>ZWFP%*aZNlW?A`
z?Xdaf@!P6b_T6O^GKqLl7PtP{fiGeA!{5u=JYr-I&8ipHOp&=_v7~m+-5KgpwUc>f
z<a5Zr;XQq?d2ih6#b@W<-YCoZ!ffT6q~lT>Zh7jw_{gN}og!`d%}QJ8+p2@7-4ujd
z7cHw4{x<*2!|SupE=ZR#4cDky?(Tkl-8Qdn%a-ToHBRrGSFU(0H<~H9=-wN~b@u}1
zl}@sj`1oCPPHx=89iP7SehJ7Be$O2(kZJJeq+`k*rE14*wp!N0+3Tk~d}!0X{TxHz
zMw1hg3F>`|?(RwYty|asIL@N)+0=Clou!l_H}5#%eP%6}QfXmtxzmiDQx5EFJi)Zy
z+`zCrbY(KL_TgX6HXMEzSdXpO<BOKxm+o?xbGgrZA(pJ0J6RoD_$J>uv9$W&PIDGt
zqhz~Nw@$V>#O>*-`k%72|DuwEP}}>6Th^A>r}}L@XM8wdZt<3-yZC-3=j`3}KKtNV
zoBm#Jmivxc7k}t}j9$ak`|F+R-%TRvKT>{sthgWkXMK#S{_EfW^!Prr-v6pUn|a=@
z+Y-M$gW4Ug^fZaZ+mtYtneR?db8yj6^|L%1Jd6MGLj&2q&o4`CWcWVYES(d4PV=P6
z(@d|+CQGL@O*^dV?NvD|=#=JJ&GVDeJdM5C)ED{8;V3oB$$Mp}o4ebs{^!n>&Qo_e
zW~=8c(7NiT>GyZ~Y&-Vcozh?bo>(b=?cT!~j5>eZ-j?5*Fk?gSsb5>Wd)cooSi9xy
z8%Ylx&5Z@RmDx*<i3LY~wm70V&&XRy$tZcV&|bEgFSDL*TQ_}^op|)gJ-hnbIC}M1
z`B+(Iu3FbSQ-)cB&GQW_2hU6)^@m3?Pi3B)B%EIG%;xD&1Ll}%r8_mbi%&5KX4)8r
zmF$fvjeOMfb((O*=Au{squzYdT66oM*!$U5GY`h<J}XUTP&*p7_dxgSRjC(^rmATA
z@6=hMI?ul0hEAL0IpIr{O2*+=pS!M?((gOdWZBqxl+#DfEOmw@m&aW%#ZN_#zp9<~
z<!GJh>*;+lDCD}t@r=O!hUO1i#_c6`$82+CYQ0tKOCHQ~pBjDsMd<0{N5d|qUW&Hc
z*C@lYQ(&@c*~SUsjW)Yy&)AqcO-oKaI4sq$$oTsF|6iv6tg)NpS2>IE)Et*fRlDQX
zPcMyJJ4K>X=T&QFokh*xpR>*9@BR74=KuQtANlXs{XYKR_g`bgzxMjy`f~imuQD$A
zatE;g$}RZnc~)`z&Bsw*f!PnPMkV-k_7~3+`tI`Z$0@h4GhT+~{+9ll^MilQ*5Ci@
z;q3MDeC=GPmQCmj{oZ!Xg~^RIcg1c)jm{mxfyL!%UYZQyr42H{zGBm(9wjpi{XSQB
zMYJl-kmK)j)zGht=ls^HsGq{`eCxO2DGQOcTQwJdEH)53V&YL!(=PP#cAMFs`^|A>
zCm%^?%P{-x&Ncd8+4=pzc{}U;BkhH6ZyTkrE=yPtY;{aBfmiO+tYadLjk7mjXtKYY
zrT9{8zt;J6x~_X?zv@j}vGC|yucNWX!O!<p#Ab(@@-{aoeTexkSSMo@)_Pf@SajoN
zp6l0_{hpaS!{Xk<V8dlA_bvNWB9+^CxcpWQ_t$ri&(`*K`5Rk&?>){o?Vz^rIqsQy
zkM6CJP|Kgh`F%#R@z;!H>tgqL8{Dm7C|z^SN^yPKNrzVgs(ezjyYpRMzOahWcyayh
zHktJGYxl6Tb;aj09oj3UEyl(^^`s-8@`i)A*6dqVXx1YkRk8W`saA{oNfLsatr{fR
zg4zsP1d}@!q)%hgSP<56XRGkG4ctAq*_f<LI^KWE{=443ZkKC{bpGOUK4<Z^-<&1~
z!p?4+DitN%H9LfHgWNM?kqZ;|M=zFt-=<c7RAN7SQMir8o!C29B5V5pe|P)zvG0R>
zeCUGY^WUH6nMB->&MH?7wNsdwd6@lZ(B#+N)8fKDG*_Q|@Ba1ej@$fSUj}<WUZ^4e
z^;hZKVsX(ob0z$lHy6%c+#)>l;nerLYok}Z4Ln+C`ZqY*Y3Y=rb4I?J{+1k;?T1ay
ze|~5o`)sFHfyz>y%2UDTEYk%i&QVqO>@4>BJmq=wlQTx0XLKYdF(?W-DXn`~X3BeW
zZ)s;Uza8hTWdb*rp9oo<6U{Yy_cc$Etw%K?&#UztuMcY3Zt!M7M{aWUs*6{*UCmCG
zsHrwcUeKX?OJwUj&RpBlJ1Mh@L_;UfzGd-$g}uU@1*J<3Szpb*^B`?k#Lkyhv8UI*
z*fc9lLqu^$&coghC*&SHNSc+cxlBq_YsO4Rp{{39cdk!fb?s?h){$2V9Rli1dDBz(
zip9!Kww@h%_wM9LaV51{`-G-=?n!N^&{#g3!SLCdBZu6cZCvfDAa)~pm%^Qp-?|%=
z1q3d}&3qZPWs1rqwUg{$S4Z=|%AVrKR^jL59u{02FBWh5FG1zGOx5QAx1#q<np#Ds
zvDdyHs`O)E^qP2X+EJgAzCz)BB^M?9vNS&~J#liX&)KWDb_wd-*|1WPtEj)g{qv$#
z*Sp1ZFGqcPIB_}i1eu$*nk^QMonAGIliZda5qhI0CR*-rGPU;5Z~OmG&)zma{@8ID
zkD8}(<Frk8=B?MOnen{c{Bnuasz@uD9|aY~b-(Z1@0)$E_V53{=lB0S%WwbZ{r}Vd
z-H!f0yZ@hfzI>mtaKh}Iz4xvv-zwGU`L0!4ZnXSr?CbV>XLNnEpFjR`bN1&#KZ!%x
zb3C@*ndW+W)(rk&F-F6)?LY7CKELc`z;({JwWT^t4x!OG&Of$iKh{6QaOZ05K{cWL
zc*Tg2$uaJZS0=|;OFgyQe&$cG-p%??Wk<tG_pV!Vr#1YiddNzbx6<p9yZy|Z(s#eT
zen(kzIb&8__n$A-@7(mi7&_URhdk}Lax8-<$8)B#c<b@*Gkc#s`zjdee`wL$-><)%
zPuOt$%U!0+Vh@#bV-xPm-u-<zZbE3nHtUU>TUm}~#YWENy0`b+kEL5S<sCILHfWpk
z#hOXkbP+R~X;pAe;+eHe?L4{`OnIibFzjc((jr-*gU7X+?!5f!a5?J74%ee+?grmZ
zTYq)Vg95b|2O{qNJav4{iQ00(+d4UXo9217*?s%A(e&;e<*(lhicJ%|7jg1!4p*Bm
zE}mGvVKVoTZto!NGiTo1*}U}DuMMA#Z;7uy#Kq$qyTNQH?~>?=)@uY-ORX@v#CG~o
z#B29w5l8wrODZxwY;NIC%~&b1zp9&ydwoJcqJ#E{6<S;dCFk~e?%OpxdCrmF=NL-s
zqkbezzqK}C&aWA46-O`clzy`P@qx3io1ZzHRb8>`p~lbZ52uf<&Uf~C<23Jm(35|w
zHWz=`&^~8+(}wdgTSanxVpX4t{@i+b`VZ~BtM~;Zmz|wy`D?=OKl?Tw(Q-8~dF12b
zv3~WEn()eZEf?*4o^03c%FXNf%a-dQm)Ye0>Z{hfzU6m4U3c4EoXvUc`^-Sy?|W8-
zD@jk0)O;>+%0F;}e`Z&`Q7Fqf8~;kL%O<{;rxbZ<dQO`X<Q05M@s#JJ$xl=+AF(W)
zWOG(W*n3&r&FZ=SLN|P`Z;iNm<Hl<FNz?fKZkz4A6}jf+n{0-);RPR7RIdHjyln%I
zsh`M;9*0E6a{=YAjbD6gT0H-xzKqb<mAPp*kL~AMbnsg3Otueu4D&lxJi8;8N~!Ce
z<D4{8xKqG#chJt2dZ{~go<^1EhP_H+dm}JS<%q)8DMFDtYfqj%wJA%p`T}!A%M68>
z<BA{89PtaT-6<)4LS;^x@!nYd>&s`0DW_lFy3F>NcHQN#MVq5HUOh8MC^XP|apd&s
z-b)plotLMC>8^A9KlPo}g*g>Z1vEFYZ|iY6!_oLCFjQq8zxzy~`k2$F-fg^p;ex>R
z16MnrA2*h~=&toTtJ^B9qrvjI4(GBc<Gptt8;AKvl*XQ_idF71deyezrpdh0t1+`?
zt$cXzVqVs*bcts@ii|A>eZR;5Pb~bqAtvOs*5}1Cd}qX-C=`{oOU|}B9J^KGl1Q13
zQnU4BBh~23Pye=mxBvI$+vf7jc{5z^ifemoW}nDud|5Cf;>yE@r7cs=wO`(EZ97k1
z?(_ct{{Me|um4m3cm4l=Wqbd9|Nkw%_V3Z#mZ}SS3Svu6%a=(`JG^3(Q`xlAy?3T9
z%_yG0{x$f%on76(cZFVOl2vn-t=j%HY}(U5KMeeD#&%b3EZP~ne!J_|$dw0D1eLwg
zrUk8Pv<*vs8liB=;m8@y8_ZK*uFLowJ27HQ!t>*+7OU@;zL(Fg_D}QBA9ded>r*f1
z*xj@Z+hAR=ak8|D(KC%~Nw=d90>!xhT-w4DZED%aR&ElqrMYmzCyR>nT`!z-<k@=<
zPpc6&=DEB&_h*1S%S5>qVcpf4|2Wi&_|9cc4vP}zc)!HVH}z6iZ`FZ<p3W1t|N1^|
zJzBfx?CaiJWtLVywM9*jF@93`bt{kA<hDcW3W1rWOkZ#Knid3@`#xaJI9$Ewm-fxc
zb3X~jBxQyF*ulP*J9EQs?&}*pbgxRUDh`O+>(lafqKd@4?pZny1p_ZTweI!5H|xR9
zrc9^#k2g-g`;%u6o0rmpYaj1PT1S5fnQ_zN&y57L^?Euy6EY2=*CskNK6zQOc*a>%
z4SvA@XVIQ*0&zLg%?6^y{fq}X_|=luol7)dCbejtZ*Tl+wM5Q^eJctk9%9vq3VG-v
zxGM3Dwei6NsT-?bow4QFzVqH~r6rv&GD|uI;>y#K4yX1W`<8p)uK%(Vmy;)KRXea(
zd!NA^srjcK9nzor<FWZg_th(m?`*i-;?h2|q*=&BYp(m$_qumZz0A9P!)bDsO8nhJ
zKg?`oS|+|@PTKPIc|yM9G~NB*mTtKB{lb}v)tB9a?{VBdY@wO(PGV}eH1DzFx(vtm
z#N2ZDx-Xig+E%Fcd#KH}15>v)?U1s3u9<$)a`}<VQ;PjA&v|UG>FG5o$SbnQFF54S
z<tdw%=va0xnNq~U-KiodcydzFqd6++eupNhq!z5wiW1wf<KquYy;}(?&hf?Xvv26;
z3A|otxUxJz|CfiuT0t{?pR6MfR75VZt1I_&?fP{0g2P(Biq1{n^8RFTa`R4poX3{#
z@!+45pzs;pS9|z0BTqckSrOu;*irbX;p*;7o2Gr;q%(bK%$7B)SMzdCJhD-4&Ei=P
zR;`|Tp*vgV5y!HW%{o`Q7UX`~HR~=<n%a?*t+G|R-aA)_%`>u`C>pLF?r<Vz{rO*i
z7V4aT_UdHJjO9APPF_b8_@B9QFEKSP=2+g`v2I^bt4f$goAPI~mM0BAD$boescY8r
z?3^0=vpHSso~^dLe*H?4w1Zq{h|=Z56Ysukz8TdMQkf_?M`-z*ExYudJWDhcUUxxG
z$y|T?*1bBjmdz>J!=99-Flo-o*zT#fqXe}>mx(F~+`CoLzrbK|*o^$ky!|_><M-CY
zbc=1x$)07B+Wy4uPtoFCiy1F+rS+{;iJp6N?de~C{(QOo`}y+cFF$6y-gIx?>94OA
z{4DTMnxmdHaZYRS*C$JJp8Zs*|NXswa*Eyl%J2R5e}2sV|4rId_y4c>y1!4;kI$6s
zb6nV7-Ew&EA)_5fuHXLp`10kiRmPhiTy^?t@G*S;{>oyzWeSV<x%i9s$L4qbe0%t3
zg~g89vsb^m3{t3_HPzAOiA&JT0Mlco!8#MTlWJ~XU2yDCPuYn!rD>5fHZs@!;LDXf
z=PG?sxUcch-|HI1TI(JJdo2r>^}BoHJLmfcVKF=!+s{7UInnuD-S=(_|NpA%r8By#
zUcR|tz3n{ryxAA~9^bjR=-%I%>of9p{|H>A)W5Oz&i2JspBG%q44M1zt^fVRrq?`u
zr%Zz!U*~9a75snBE0WBs^S0w%JzH78)vUd;HDP<7aA`yfceb29G4oybcU^&{x(V+>
zcgIcFjF!0B*Sw=v<(MPap<<&Nm#W-X$ByvjZM<6d-0e>Nw_V$IPc^iC&iXEeaqgGA
z$*)&#FZZ}Pkw--0byUpvS1YnKUWu)E=5kPDhTvQ-t`OC_GpDx4oS1b}By5F8hTHVF
zJ(1famPBR<ZM@ZB`Bim7Khxyetqd%CZpr#2B)=7WuaU6wX855!T^^A$BfB`fA8uX2
z=;oDXU}SXry6N3{_ntglAba>gyp8u)=Ih`7fAb1@6UX|(_vl4Ox$oyQL|e9;<u09@
zJblN-Wl2&Kro1q_Q8?l6b>mXA2mR(@tG<i8WxKI;nyjJ1o@2lA#2cSwNbOI2d#5q|
z3)i;&XG7P_bDiq=H1oo}FC|8|*33S9GUMUjJHZq8UX9o@_vnt#kIZKJey_N<@(=s0
z<K5S9a5mp8V^()9VxLjuRXJ;!=Go$TTulKcVy`koos-twsQ|0-TN9(7kbaE`8
z5)t%&{)Dd{eseZ?B+U$($LVzDh?9!nBb5e$Zs(IH*XnF9)m(OMfA0O(9lWb{pF2`u
z{I<yM{jJk~^_UMkZx&R~x_a8v;|9m-nY&i6nz=ux>3!bIx6;P%RqMEKDK)u@-wBx2
z`}nY(h@^p;ScsY5s@raAORT0Z`MT?lT}+<;_NPI6u81|wWadeE^6u5SL#9I4!Y-Fy
z-MU3@<MM35?H9z1)x$Z;jx?{bSz@yEr_FPn*KfDQ&7b+lAh2l3p*1P5nADcuZs)&z
zx&38P>{)JS)lT)o)+P<7eZPNnbZ%JX)}Yg<a3mq0J9~YA+PeG2y+4Z1@u;~ob+3${
zzs}P_JuSK>Hm1AMh<o3%BQB?W9FG(WS%26yYv$}%PC=X2K6~`UN#)(r(A2MY`gJ#m
zN1sYs$I{<$#l-Nrj$goPGv6?C<rzm8Y^=44aPc(nE%44>SvcA5lw9%}R^?qUf<p2u
z_h#-}TH~Ol);?u(ageZK;|xje@=IkwXU!~+SlaIW|Fi%8zlZwM=il-2`}3@%#$x{S
zDalFV;nN!wANf67D7^Tk&i-G!;}h+kT$Zo<efPV4&G*ALzLm3nd_Ha&YFs3L@3Cmw
z(QU_=64I`}{`&J}W@KL4+0yi16;}0sf2JRwQ)#?zg7C`UQDM5TAAkJubGCVVCX=N?
zR@mD!h37+V2&$+Sn;W0YjM;zK)-C<k!Hy-GhTY{QeGS11<+uKDe^<I*XK~$l$=S`a
zKhn><fBk;{>O<d_Z4W>A>&x|jho`gM{3rH7{i>bx!*@&F^Tk!;|5~x!s@#&Z=3AVm
z)U0Rw_ND4e6v{I3<mlA188i7l7R=qQyou?quK9|H@5$SjDZF;Cu4z6qbwQF%j>k4m
zp18AfDt}&@B`KY}r+u%m)%UsnpKZ6U-|rs8Q_WC+HidWp;iN^;Op6UV9=P6~_N{y+
z_mu-Sdm?M1Ej*`po2BVX2qdPqXFWN)??&K{?)d8M3h)1&k;;G7lXmrqZ0*MTcCsNs
zy<6@$yO_^rUR1?AGkQyXo}*~DsSKma#>a&cM<woEyf1iv-`*cHr4mjCGEZ1x`gZD+
z{YUra?i1RwN7v-^<!u`r->+%zI(Ue$T+DM->&(zd#X~_`HgsIcTItexeyM>rSN7b5
zybDFU_>EZ;(?$2%H-1;RR=4Bc_G<^+{w&k=mx@+>vHR5~&ugy(Q&%vuhHkzTJY#3B
zAoKKtXE(n5s`1Hxqhh>|*s*0rAKLQ0_doo-+4tL}`VDn1{9L>2bmYJLy)Bhx_V{zJ
zEN1ci$Bz7J5*(TA$6of#s!r>>{K0m+=063gm+prRYUDRBt?HPT&-ME+SKBtNZQT)U
z8G2=Vd8(7<Ei;jS_PEe)9^diDCcc{fhO_+*SG~V#ZG8NNYF3h9pHf`V*(Z)HUsWb{
z?Vh0VF;ZyxkvWrmlqPRBP~RZHGUr4v$APILJVvph;fs?}`hLIsD|ScjOv$s1CA*%S
z(4LYT-N64q@4dpxim+tq69yZY(n4MCO3wOt^BvDpM(-zwFXZj8-llqf8VggR)aD6m
zQn%!u_P>6LC&^5`?4-$3pLNTgbnD5lyZ-(9(@nFk^>}{SP&vUs@z=Toono7}{>WNl
z%CTXwWoKag#b2KVC%b$LnG!IGmB(*}Jl|)_-D1)C>`S*FnXmtiuVV4E?n#k9j^6wI
zw)w^JC;o2c(^BVM*VKR0s${$Rl=1<FZe#0!F1^>QR++w=>#QuewAI5aRoAq!g||Q@
zSx|jO(%x0O^=oYH<@zp{oY}TxeHQmb<H-|*r_2Z|R#N?>Q)4$lboX74ZoTZCcOzz~
z8m2E@+G!?ht7#oD>)onX1)oH9bS!;eewg4J+nuF(<QQif%j@D50Ur_=B+S!x_RgPF
zwkfOhg{PO^o=N-T*Pnm=cFyv4lk8c&1qNYmX2CNIZd9+anU*?N{rn`4yz}qtcl~*m
zRzK@p|MUGnzpm$7{`q5xmySzF*UEJl(+Xl=HbwTFoOwyJ_V3x}45}aG|NXiA-Tu!E
zzIK!6`uFBt<Oq6Ed3XJ)oyRwZX>>Ow%WCl~+s?oI_+!KN;Oo{>f=9n@yYt}RyN?f7
zm%o2DB{z1a>s9-jKRecOw<@hl<GlLemRT87(u72|?JB~>n{Mq2@Ja|%a@CcO&XX>C
zGRME@%&(FL^QkZ6!|&{O+1%r{ZW`Cq$DbH~`<AYk`aAzua=F5xi~N7RB<)Qkc0Zcn
zwua%`Zl)g<(@Q>{+%Z+I)u+$!O+G`wbtjt$?L!MEtxn2HG&ug}-mdHey;ryJ<=2?Z
zd%I+Hi|hoB8y~wjY)(rQh<&^2?DeXtcDy~wkq3{QtDX|L-Q`fZ*Vh8cqW@*vMfc{e
zzx_k_i+Ogx^!pUy`%=r_*4;kqQyy|;SF=*O+;g$s;4?0a5)HyCff1LIFE_+)yz8xY
zJLL<@t)<tu3tilkE;v*8wt({X=4ID&4@(4#20hn1yxGfPRdHJ1Pxn0%iYZCv+$UUQ
zw}s!kGb2r^C8|GnPG|Aw(4-0{fwIGTh0?B~21)NCjyA5m&-4B&_q(rVrAF(mC(d}h
zJZQ%<%S{FmAxfJCHWg`h?1|pcq~%n+p?UkFrugh*?t*;07uW8IW=Qh6wPB&k%5L$T
zJ5sNBHG3C*XgD1e`f|ag4Trgxf3^BxpdJ2AaFy_O?u5ANLv@MFG5Lv~I4(c+z1qF&
zyVR?Py1kE=#{bac<cjvazkAL{0k&;C@675QtK;{2nq7*06QatQzv<kJ<%@5dtehAT
zwL5>YQt|wW`y9Pjc72$`w&|jF-W}d61?9)D%x;jJlrrnm%aV!;8+*Gs{mT!3ESd3q
z-GPa7f`z6<-7&tdoGdL_`Y*dD)>=d?@J;y9)dgAmC-9tnaK^@P`GS)I8xLo@h1TrO
zidrKQ<bGCZ+1$VbE7fLldUxsHRcU&Bm(}6W9{&7Ae*(6*3Qjxh?(O3i*U`0d+xIos
zofYr9Uw*smmRi`Idy}nF)`W8>KX-dFtNJdF)0^4Gvd$*GlP70(hJ1MOd+m#&;Jq)4
zB1;V~%}`~C(f(a=$MuqlFT?Fc+olw0Z(Fs}vUEdjitxhMI_=pfXSx<m^0erFa-{R~
znfByIn{NE(&$qv`Pw{8l(}RBn7P|Pbi6w>E`+R<WBz0DPl*pQ5t(kdm!u78Q<)xW_
z=DrwZ6q6pGuJ>$<&iU7&R~<agy1f=iNYa_OOuD02WOiahK+im}!0I_a!`Ic%|8Zo|
zn`tlHsuZK|gz0)koSlFA*PeCP7cEOW8m2vc(NYP))ybj$?I$0vN!#WW_cLEk)z-3Q
z=G;@e)~#Bw_0jsx9$xR_#4ayd<~H?5{QSac52_Azne98CceOc8IAO>7y;J5G9XPsX
zhM2Co=4&6-;E6WcGpmxq1-L7uJ(w7#9pOnf`u6zekE`kNwI6SWFTd=(;_6{0j{c2%
zw||&&@9m)(PbTdD^^DuYFZkcx`E`H3mEZTftnxDd-W+~m9j}yIOtZgfWyn9tJn;DC
zpPz>h_rKTQ9weq-I(yQdY8!hw{!2T=|4!b%%07Ob%eQ9_YpmuS;FLV^bJ?%u^XvAk
z_4awaQu4q|)ob(i95ghYXOWgxa8W>P@)8eav!-@AUejHv+9zrbFS)9jBt0>#yl%<2
zs)pzPXTJI^C|;|dU3*gg-;v(yH}B5>^Y(_PRvZ_nnb(=QKOV_-?>O96`Z4SNudRi1
z`p>TXDPVsgAW-@K(%a8wo!BSlu9%YX{pbV5bxRI-9pbT?>S}OUz-EVLO3icTuZn`k
z=l(R_cz5@TMUakt;+vx4^z=^+wpTRVZ)Pmt>h-nS&~Oj;&df7i>nqyde%s40eD_#=
z^|A5=$<Y^XY?#T`d-SuLQI^cnuq)mRUMY(&Q2NlyT43z8QnLQ)v7OJCf0g~t7IS5y
zCF4Xrr<jGxg{RH7Ht)X?zTo_&i_3qkXuP^EW0B1|?w$-|hCB=J^ko}GZ<~2PHe1eh
zEF&TE;nDcF@_mzCwgqH`mp|o}P`~<Er9rH7)u%%a2`gqAMY!%<bBguI8STv*v?3(5
z1wKYBZRk2A@aN37nT5w+9ISn6Tou0Q^NhCNT)xW#MQ^{_bbFn><NMz03PHDKZ`|?I
zN2_6lYg1@#-IO1FceB45z5IGk%z&kgVKI+<w2mA1@f$7;6^;@T1-2g^KKNQV>D}DP
z#!DiSAG91(KIs?cxyOgMTj<XmyUp@0-%8A{FW23;W*^Ts-`^`QE%`HVo@K}%+3CUY
zRaL1Iw|$Nc7m1aKXqb{RZ~5bo1y-{1a{m4O&ySlw|NK*gbF=012{(^)1h?!oU6?$_
zw_B%kt#<z}voy63>)9O(*w(e(nKbvv9M-eGuP<HQ!h6ek->Y3OqGEWjZ4-FRuIi=X
z_Jwa!q!H^6<x4lRHn_*U_{g<7dcs$oz6!A<GbLv2n3LT%vu*{gex$u+<_v@79TjV%
z+W+24iMakrK->T7l-?}qXVV0mq%>|H;|wjic_mwrZO(F+cdt`5&9@%?x++X#+kymv
zY>vk#JR_Fbq@Ub1r6}^tuB^c8+diyZ8LBTSwe^wvo}S=I2i(=B>0DOy?fmigYsJpL
zvKQBP+*`G3UH5I{9R)@lYgfmM?Ygrml4-A?$0ijnHF2{GT3dwTyY4AiJ$F^;G<v=H
zR_9mSH#aj(<C!)qIlc@RG!9Hk^H_b1$Ezk*e%;gAQ|fttp89k8`E#BZn*s`zHBWGb
zby%%Ep!-w!VdtM?Te#wDe`Wm3dA)W;oH+Buby_M8yoT$hudtTnbeN#B#EmtqWU1*3
znV4%+R`Z=@2wfZfdDr=@4bv}N%l>TQ@=`uPQGWT4+t!Ps%ojZqyirr!A++|H`Xi=!
ziHD{L1qJWe)UiwNwC@ayk2=rS+x`ADTYrD0g`LVgr9U@LS~?!64HB4o^;kwagPH7S
z^ZR?eJr#bI+y8l4zW;aMjQ{WQ_g8*5kQX{0HgEQg)Xj4anW-vsv>d<uGk^cDPcMVk
zJ$=r9Iw`7X+VRI9Cm7$^#mJaYyzAbz<<C!>obT`N*HkT?@TB|IcKe$Dcfa4)e;t*(
zzCrl7<f1edFV9Dpv(uiiE$3K&)zrpY?#Jp20o#nVX1$9Fl9Jrhn_IQ^MRVPzKIXN3
z$3I-}`w*|7AMZB1rb=r6%a-!Rj{@61KDsY4OZScVo!F_@yAK39ZaG+9cEi2zwfY@5
zH@m!T$E$U@u4&mwU)i|3<xJe2>hNmzXBvKoj=3JZ#@v&<#rM~ywY(?hM*O)~fAyQU
z>F(WE>qD#s9v_RobGN5Lu<E8>!0HF`#?Kw+-wQkZB4FM=tAq2(jPBjYEkE}pBfVEr
zgQY-X>-NWXi<bZ2wfnGBOJ?n4!<1X!cuVKMGqbce{w#JkikWj!K99(Z**gn3rE;QK
ze+ayL{C(*ShteF^%6_I5_vI!_b_VeBf4DxaF87|@yUkS#GAa(mWT#DHIV#b&>P&gs
z3~B3`hMO<1+rwQxskwUhZHpY;)ExGMccyV#UrXx9y?KYN{K$rt4|DycpIoboX*f8w
zFaOkprm3B-ty?>?M1Co~mRc%$z~N$$hMKGIhRqjaKX{7!3wM{Vc<>?P{YmTFDgtLE
zn**cFa=!oS^xJxLrE0!A(=~>zyopzIKKgD-k=C5<IA6i}-RF>P8(pqHy{2K7ck+;J
z{EwrTX1qH*&%EGWnNsQf|7j*tP6ye~SH}i#of^Ae{8)|lZDYe~ZAr$&75zKjbL#9p
zzv1FWzvzA6+wLBEAitMe=Y{INx28>cGmSlhtmgaY-?Q6O{r%a;vp>(yZf|eD{IOyN
zf3e@BA}>SU^_d?Ss~^cdG&EQ))mF6s!MnyaXAVt}(!c$qTk!f7gSiPDjA?3fY$j~Y
zaY;K}wRhe1t5>#ZB<?<4?Xomb^_h;++zk&NdgjOpG93_G&%WK?xc!>EB9mU;(cG(-
zK6$MOEWLBrGb!F#hj-^*t;pJS@2<V7?R>7>vWrXZpy#Saua%0sU$q&jHZr`K6g<zR
zCAKtn+SjVfQ)5ncWj}Flbq@&0oUIc1th0FM+S8v?_ukpF<s<9%Q~z2wuRF)q+@wEY
zj)k}KhaD+1h5Vm>yP34#?VlB6pS|@zryraZ{A&`_7fiYR_Lcngn>9-F(=BB#PkhN?
z)u0xjB=&u~CAaZL;Y_{ks&7eUnor$o3ai3u!*(AEU))-v`E+HB{_Cwf1kz14#G_6p
z9-SWkd+O)wKJG^P-<Ho#I>-I_Y~GV;Et?~aQa`(xG~ZXv{}HIEwB00DZP!d)jqvMR
zPrQoo$n!hD>UvOEF|%%zmsmjAtP^wY&WV}6=}7OQlx6IePc#qTnRd0yF#M~IH&bDx
zcch7(kz$LI(z7d(H|-P_X2m9j$S+sZ30SoJ>$a?&%U(`<w%Lb$JM-s8m4^OvDojeD
zv6H-vtiAjuosL>}{pj2vuZhJzDVod63O_dNTCHZ1E}-H4@73#aP2b>i|KIL@U;n><
ze|YKtuh;YC(i(5=+7K=J)YHqdv*3K6tMkGqD)IZPtE1n%44WoDJ!zWjGR=!@cXC$o
ztHy2-v9-68JC|=Do45V;@A-9qKF_cJasU6r|KI=4-xq&cch=$^lbQNnN_7_Bd97QX
z-e%BVdROZA>dnQ;H}*?gmnGK59?h$nnk)8rA;YtIm-v3zM?Ttq#re<Hr~eHyZ!P;!
zJ88@1hqt*?tbhHzx6b8L@0&VviBI2@E!yJttJm>~*4NM8)138tLuq=*hieQjyji-F
zQm34~!~JgV&84eKI5u4K{*$D3ZrhaY9jv*J55|d_xO!B_ZWprmSsTW#aqmaV?G@K+
zmtT_<dL+$!+hfVOc`;0%B68<e%>A}u|HQR(8E?#DSuoXqeUaaVjC_vDRlJ{G>fT;e
z==54-`U+dA=Kt$|&OABOQZSR{4_ne9)=5V=<uoRJdzjvHDgI3@OCA5q>_w&d(QJL`
z+s?lKz2Zmky~%eww!eCIJIZ(MLNO+j1l`%wZ=E@$tB`u9L}2#Ob>Sc0UQGIOujo&8
zzr4iJK-2tO@2Ppl|IS{#x9YyyHda9u)8_3frq!`HvMv<~+hDN%PWXgE$Mr5E^VVAN
zztZ?qZxywE-UNwH8HQqQ3uK)><`qBHzoKhuI?YUTn)Q{nvjnxGPwzZ-J-AijeLzg^
zUZ3*7y^A?M-21uN_6NsvljtXxjSW}4J!<VS<(=bq-;Fo9o|%bnC_H{FZgMu8Z`kt7
zN^|b(er+&Fo3U)}!@qj9*OZO-hW?M%-}*HxI%7_y_}~5xy{wt?dwuU0pO1R;NN@3~
zI(N(Ek2CG&_djoMH_w;5x2Lw^^RtH^Kh{{u`yW4CW8o7lCdnZz{i;lOE#IS%&oipB
zd^4o#<6LxQ=08~G(P_TiKD6|Jr^^$WRWhGfeA}y27M-`fR5R`kpZu=fNz&T`W<;Km
z%~3F1&#S&ICaQ?llH1*C(Pjz9yWQ1=@u63@sBXyr*Y3E2TPY(T@>b}s<?9}ahrOLW
zT~#jY(SmKRcIL<1Eh=YO_y*52+Br2<bNlVLMVDO<dv7US_+bN6^ITUW|3@dobg#SS
zvzjeA(=)q8{NJ32$?k6L?Hu!LpMAe<D0@TBKCUkJ-_p&E^(ueP{bB#y?<tn%k(3s@
z@xz)$8r(D0BK+!-tOU-wZhq-A?JVB{%a4htCjV~y>tWgKpXI*X{18{1u%z=%@kQFV
zdzX73n_9z|_fC5<Q~d1zYfnsP+W7S6v-iF8EpDr%PU>&nTA}3sYLCWh)dwd2%T?Yq
zOv*@fn)hUaf7;PRFE^bjlN3cy6kj~O>t0Cy(mPY;ofM0eUD9&8YPVSEgtguGX7o*#
zznt~VlKXSXiB;MYy@mX4@<!d6Zno_@+oWv1i*@05UaQJ9A7xA2XEsAPbLZM;-kaEG
z_PjERII-BID0JeJw6LySQ#aN9`0#JnJ-*AJ{kMHJI}MI4NWGDJIpMQZ+v5V|NoV3~
zzPYJ1U$(FN{&)NSpN~K7srmWy@ng&LoknZkEm7_CxbZG>)}0lHj{li(`TG35KR>?w
zdHC(iZx5}WXU^KxF(JEr)q=E8PQkqFpC2Dz&b=ys|Nocc@9X|Qp1=S9VfniMf4&%s
zU6bz7XS(Wh`1FAc(N=5YqH`OW14G-lPR-l$G`92Y;u&=jFYf0%HvWFg^;h)h-@Q+k
zrptxae$b9N{MUDD`^z`xo7VqMDz~coGkMnhz%v&rbq}a%u$Emv#-abY%>P}(spd?P
zwb@e6eY4&!6m#j#y0PbrkB!QzKG!GDICE_a*)R6!-@7)M=U=;q|ICv;JCb-#n0#x9
zwkdX32&p=~?(w$RmOV2wdsndUO>qyJeuO=%RHtwc<L#(+!@PIxJ2re++h-qS%pAt{
zP0wI&{gwyGW$Cdex1HHw$G*D0Vyb)ja)w!r^CWF`cTIZnYQglJ`|E$HhXptn9GYRU
zq4TIY+o|qbo3pn4@>uttbxsfGvsNFODGypC@(Xy=%MA+eUMMK9ESbX>Iiql+<}|Z~
z5#bg2|DV3=mDc}yA?I#&+}ftDS*vs}C1<gPX;-ZJIp4A{Gx)dGTTb67X=kkkc`mme
zXs4|=j@!m0mAv!a@5GPlCGXbHO|4j(@sfLC#W#U`cIky~ji%code$CK(U8A)%kiTB
zdX1;IKc9<kiod<%+NpaRLVvE^(QnF^Z~J1~eV6T=+Y8S(q-3r>J4^f4kF9*&p*+@t
zb6!7Ra;|D_!1LnS?=x-(A8IdIDXzHnZe+c`w8YBYb~V#YXYxdETK`kv1oy2&)eey=
z{y!_`^c_F^^5KtncW)m*-rj!v^5fZ;A3r|)vBE-z@3Kj#f#2NA-_3Y_Cu`h$aqM~F
z<s`jzt89N}zIKt&eZSrIJ%hr&r~HfR9K-XY^~{swI=iNSEsMS`G<7>~w_wgY8G+3z
zCxv|c+IF^u+{j{QVqSUejIjZ$ihdi@>dZoIH`5DS*?0A{wXZlM;h-UsS)90yrPAW<
z<t3S$7w10ObnD~{E$>H1^76daoqLlo$!Ctn*-7e*d+wYn+B-GiSw_vKi#tU+X0W++
zF?k==ORe0eJKa_MZdCHS`GL1i7q7g#>#Eh^nX1fA0!rUZme)A^tN3~K`1AET8&Bt_
z{L_6HJ!Q3J(KF3XmGZEMQ}bFktCgpm@#MRF+2-oX<yvdKljhzK{`scn$rruz@iP6v
zhEaJD;gPzQs`{IMH2&$GJ;V3`r`MVVq9F{;HeGhJI$ygl>VI}${CA{ef!#CjNvD(E
zP1c^eBx=qam8G8*)#pceMVaWTPA^iKwbEzXnYg~5NXz1LnPOt+3mTt>OySN@jc=6x
zu=(NMS65fg&|!TYD!#Qe(Q22C%oCfFB1^rD7Jpq9X13WwwCkwwfrFn_uV;MGJz}_9
zQS#S|m6g?JuIxDx<e**CC_a(dfKgKA$h(ymCtfNvwJh3x`)%afDWzv$o9J5lSK9e}
zHYq-%5Is-Dm|Kchc}Fb+=kl5N_geqg{ySCd*YWptKi=)Ozd!A1e*M><ho|TF1%F*x
z*)7wvgT*CbM)nDYnG;z1kJrq5`aIn}WoFQ-Rj(puwMX=MoU}c8{PfqC^7YCsPk-%+
zvv=yAz9}YuT@<(Vj@^dJOG|c7UCZ_MTC{@Hj0fFEPVG2p&+}XK%YuFPYo<9jcFR^>
zIJ@T|%fHA&bHvNjgq1Hi+<$-k$m+-0@~+?JCq(btb@2PeD=(zO7O%SH{*H}pr{9YQ
z(RXwj6Qpa-urV+CE*bXO<gL}lU%bY~g5QfdCa8I=XgZj;e~F-`X`G~f$;aq3&($)P
zygmD|QOT3{hhtuz+XOAg%!pr6hrUf&qxF4P{DzX9)w#{P{1(R@R5ogUckoxT(o=?K
zawQjhCptAhy{Nr(uGQ6LoePdF{TutOptU2+$Y;f*T^AWLK4s)-E#mm!l`6inw{D}<
zo>=M3Yz4db{YSG~uI~*#(tO2uLe$31Jzpd(qQZi%mIkw4`F5c4(g*I~3qrr-&wkCG
z?b>VaVr%OA%I|yOafTaT6P&-EXXdEwUUi4PJLyOJ#-%U5*3MS4a1WK)#A?hY5+f1t
zSZw3vPX?>q`L#PvOr3JUcG@=CnDtk`n<(tNHUEcq?!ReLJNK&YpTKHp)t9{_)?{PB
z6&FU6_eH5Zi6>6oEo1$DtU-64NzV5zlb@U3QE*uCFwC~#=KD&=!wanvcpm;fI_GG6
zR=4EAK1+u^*2$~IHGZtVYC6{~($Ai;&|U6Rp@q?krkZ*Yo|kF+YK8x<I@EUj$d7~E
zH+OU^EX|zv>1Bz9EdTT4phDy4$3H)wef(2kH@|=R=Z6(G=h`ioYyL0~y?E}%9V_Pv
zUDq{kWHBn1nJXK0Pv0cRJXJ5eIw&u0W#Ar<!*<*FM6@34_{*X_^VQlrd8wLlQML8^
zSG)FFtlqfGveR((tkuB+?;i9BJYIGEqMfRqU-!*d+h&M<R`{^>>TNrrmf8jLa^I!D
zU%h{0?ZF2(Rs=>W&%gP2pEJL3=jG<!gh`h2L8lJ!Xzz-fUKD-(byl{=|5bKV+Y*I3
zT2m)!N=|>h%kJ*9*S_8P*8ksi9GK94UA64$dJlnuX(rEWW~7MuB|OYLwa6~^+V-Qd
zPuxGqE7;GtuW&AId#~t(MJZlsDJh)?&nQXpu`4m{elqpRF}?sVo5Ed7{>%wZiponh
zRaG^dHBl`#{rgOVM)hMV9tT+FubtDAR=%)9=om|l=e!g4=AS|<ug?Bmt*gCJG1f7B
zd$ghMu3ak&WX!x$@|K*66MD&FE!5YvCh_M|pE<If6NFkcxX(ASUs!y>N}XN!(yO|Q
zpZ2_3v}=~}jdZ<)dCOMc`n~-5^XQMOFF6GStrb4MA+x={`QWx|ZKbmhhL%TM37%cM
z;^EUhD};32w(ZbuN<QU2&G?9-u*$EkQ!Y>0>>^;&;dA}zved|FTkd?y-L-U%rSIC)
zkLLu>Q{h~0wIge?_{SqJ-|qf=f7*IGyZC*7Py4^G|MQ{3uI}T{*~#;k&AIV@6_=D{
z!o<?UAKD-Pc~|o1L!njI2C?w1tCkcL$Df_o{ja2QSLV*Crh@98`RA7Z{Kybn_wRA}
zE}er+EX56hNe}Y#?jBE)dKkvXw7KH4ZQDP&81?VDJG!qwzr}FV_UO*wh2MRS{;jwd
zZ@}`ttY>fH;c2gLwj1eYrLK7pe?{s0&M$UB>)%_>`Y=)B@!}<E-`ASfsQjG3BK>{K
zw_U5BR-4z!CvGka3VoK}yi$=#qPah#Tl#6P&Gzz--=;6UHRI2|vYq-CXE=3d)TwO0
zCerox5`T3`$fjnc`|gkLa@=0oBy(Is!uZa$Yn5gz-re}OIj<?5XIfw5v&iEu*NR2m
z4^F?IAAaF^%C_T-zdkT8O8Z>q$hOVtsPqzbrSpR7PJbqR?ahx^@72R15%7I&X=gv1
z@WcZ-zbAbyy!9mIu!<xzQ-=Ajud$5V);{^3_3EBQ>%1(7TSv;4PF{GcJR>c^jpu!~
zi)!hk<X-1@>v-?$Rb}0pareo-(w#Ah|7M@v^H!~<L-JnqT5Hd;AC|9L&-<)eD8$B`
z6T4ANkk!}qwyp+`mdlcbnq~X%|4p=xbx0Pp-gD#jp|_zArOkXE$3N6LwBmf1QRQN<
zDsxMNT*jKaPcDcj|GUMMl<?`BqCv+RyUlU2R<_;ZwgK6b_xupJW4p)h?t}~dD=&Vz
z|G4MZxAmu*l*HF&t?JaCFQ_!<%(4C_8gJ!){!Z4C+qN`)e&-waAHU<9zcF0;*vQDt
zIY~8r=JS^|c6K&%<j*Z{KYsb=!;&f+dm9@W|Ko>0TF5{9X|OcYOOx&X)NIbIpDXkB
z#R?lP==`;hcX#t?r->!9k8AkCVm}ueBwnuU6P921GV@8E!pZ~3KA7$@TD$D@*P`9`
zuDxF6`qj=T=R}|Y-|+(~o|~5bdK(!iQaa<{L%l19&xQR+D%-qiMW(a+r4^=!8Ll~a
zA7aaTepzB>xo{`%iaX}Pr9St9EB3s1+jCOna3{k;6J=kXMMc`LOEs@A)p_L=HRnNQ
zlIO{@GjBG()SIJvHd82l_q0zX(ev|{-3mIl^4YCm`NV`8v)9p+Eqo7kS}r$9pJ~MH
zd91|3$MWLm;JSUbJ7QG-cYaV`lzrf*@9hPL@65?b4PUY7mcqd%x&ICd({I%li_M-a
zcd5uKPOdNR#p6ZYsiK>%3QM2r^*;G7X<F#>Mf{WZG}hMbtqVN0_%c(KS*mN=Po^)|
z?yOtAbJ3b9PakKQ7oPt#=gFfT26tt+m%FBj#(yrE{biHS^-|4UQmM}-vCfqA(wK5a
zefyeOr;KLHIjS#9J``D@X1~JsLFd-Mb$8Bs8}8S8wMfZwsrqL7m3zW}C;Un&Gu=Py
zJ`eZAjnYkVuckVL?qWD!-PvEe>OotzbIIP61&1QumIYcTn)$UbtV^^yv%LND;lqz-
ze?DAeQQ#F)_~_ZY*SWFbPq!@D(sJ_ronVe{e(n!HybjyA)oSP3)$2U}+@1aS^3z`r
zi+(=-UOz9b?BAQs?U!|w_V7y_5;J(h!Z`Wo?e@#}>wZ41ihb77dD1s_iJ;P-uv78(
z>VG_3E`L9M-rN@-YyRBU-~a1Ri3P`sJO6&nzMtYDEp60sS8-0#IzbnK{=05RUwWLn
zsr>(Lui(YAT?hC5Fe-aKHE?Rak>C2y();9F-#VP!cib$0YG!q_^~QHsV=WVzZ~fe1
zarD5oTE~gEm!5v_QCa=6YV*NOxsuCv?@YZi!{Pj~gU2p^lhxXIH$&pjR_R+er6*Sw
zyv@r?%WKzop?4}({JQJ<rJL@R`$bo9hZ-96?t1*TwQ$+7iC;86UkqE2vZ8chZtU7z
zo3~L*zUH50n(NnVADXOv?8f3{E6d*;iQ5-_zJHSXx}{m|?|(j9`ojAEwg11v?j0<=
zf6cY3_Hfh<wS5KZ>De0tu5D{_F+AiRak<l?<^Fmph21%|(@VB9O8vd3-#GVK!ROU|
zj}M9c<13fnwfL@F!^OX4S9zTK*nS$aAD7v<>B)sM-^M7ulXlWa6WDnQcO0B~vR9!t
z|1$fZtF_Ue=d^ViY-paJ$9%Oq;LMI4_YXFmoujePe`W8lGj=h|KKD)pv8YYi)o|g$
z#0dV%>WN1mI*Xh3J^UMNR>xy^>DQ&N3;Oq}fB$EA=fd0D>_W3A?{||bV9k1$@ul{2
zv%dn{o=B0`;zbRTf{lmoynW>N_Tb?Q5=@Jq@80+3ZuHf#ZM*+U_v+6K4;7!$^j~Q4
zc?NAR<wx^WHYx6ouKVqJ+u?28vW55V_Qd`x3M&>@d-V9YhDxPEIwSXUO<BLsKR=dO
zoqhiKXN{e$%^d&bkAFTav6}DCzx-#x9R7CDIwBSIhDf3C(wg7<Z|HE!hP=&7NejE9
zl9kci+PURx4CCcO=hVjjWnxty)7ti`|5z9kYsAJjS@2Bh-njMWr$61bqHLN*uko8z
z5^J9L2|ha#AaLi(4NqtL$oG2c`f1_1=9gVd17ok+)U^K1eE4qxk5VV^u7f=HZp>cD
zKH=?R`+(lZlDyM}u1}aU<M8&9g9QxUOf#Cy{hz%KI#sH>J!y5N<B^#KZ0d=L%Bp2a
z(~q?OESQs4Hs$nMoBOA7w+6{dJLa-}+ryl5eA89?D3#?Vs(w73bC*mKj*(b!^~2|<
z1)Jx5>YsFf^8GF;Yv+#69gFVz#9p1cpuuoLP|Ab@$9ePSPb&Q0WLObus#+{?SXkF<
z!c2|WEj!lvotvQiLw8C;^AqEx`(J!*o_74I-DDd_p%b~jtd*uJbI#rspT4?KukBGl
zL13hq$g@K+9OpOmr<}hK`sYwdYP68p#PsivLe3sw=vKYtGON>1XY=HUnLT`IH*=Xd
zZyuTB`193fMRlS5`;6_yi<>;-Dq<bYKXHF?suSF`Y5kI8C7Yk7{mVK1IjU<{`}x^7
z_B>mg6FMnB>ZTQ&=CW;JqDGA^w&}Bfl+5`2`0?T0=kM2j`+58PJz0NsF~-e1S5Hfg
zoW4}#@}?Q0%-z;2Z@zU~Hp@bFn#M<GxdmDN{^#ZI*GGice}AX(;r+iS^*;-KzRllP
z>NshR+@>R6y|*>b33)26wCk^he|-JFcRvqTe=<_PX;j9c(2*E>>htmEw?7-V=yXn(
zHt=tk^qQpdJZAC(r4q?G%QZ#vG`;8YL~b}1E8(dkq5Om|V@2s1-zwJU`x_?9e~y1$
zd@3Q|=KR9Euk~F`&hrEI=CApD*Vmlo@VCp)Tc_RoE+)17p3EifZSwcCzka{#@O;sg
zO}Xl8GTzMat#P<3-=!FB7{mW>5yyw!kM`Zl+WEfn8^eoJGW*h#-t^37-f`i(Uu(kU
z%?*mPnaYcc&F+e7965AMI<B$f&d!_huT6I)y1in%cHG0msC;F*Ek6U_`UbT(3M-;x
zyw>JxcbwcP>{i{{u|3zazvxa}pk=7;k=RvCd-7BF{+>}CD$KjVx?1Fzu1V{$B{R<1
zv_78tZ<Wz&iJTnK6ZcoE)hE6<#NF*7y3~~WL)h!LCdJiv5+_ecbXUl&i2p2cyU^qL
zqumcqcyTPyElr&^(dvY+;OEZ`Z=>d$8?(!&1^jJ_cS<SWQ!hWKe|^UXEpyS`N0xT*
zc%>OITw_0T?{%wywSo2YsK-V-7+kq`3NHS;=Jqjm{a?L*&9{9le<!y(Q0wu#7+I~|
z_cz<`+a-Ildf$5fQ2wm-va0_W<nG?#vU_mr`1?x#U$5HZ=T=KAoIUVZmh0=I-FH@h
zeHY{?CAl?k?VF<ws!>*K+zO38?CJ*YvA#MFjtKZU_uSvc^P*|_?hjMG-P!h|>ekCU
zwKdF{SAJH^7ioxLsr346BkSM(`S8n!pdQY{KhM4t*v<EUemK!iuK)8#16jV$4-L+;
zpNv$wzD0AI+OgiRnSUI;CTfRh$Eh0^l?Jg*;j%n2Mbb3A>A^A<iJmL3rrd6O{Ozd9
zvRs+9*&niv++NtO*s=Ed>90R`#awq2S-A1%%608rpC%lcq{JYYcciC)$LT;Gi|X#o
z%4<azH_ce*TAKXsQd~ou-)!!eQ@8&tUt`+D6EXkewu79eAJfie9_UNtebW<S>;KNp
ztKejb#j`0#J5NQOTB|dC>zylMpC6@&<_pv>nssdPtmP(WKO4#iy!iF8a_^lzGhRMW
zeqL=Gdv|G3=o4dw$P_P~JB56!yaJ!As+rfu`OB|&S5>Jit5MN^5*2p&kmntbaP^aU
zDN?g^C3rqc7#w<hw`Je8S<BBd9C<KBLdWIkt%9qvQ;y8+71X~O8WIvZDZSF@=jUI4
zd`iykoM02!b-kJ|pz+p|mY_Fb4(8=YUcXmhmQ+tKdv>TMdB4FlO<Ui~)yMpw)JNVu
zV`sJgoZg#B9X)#()H>%CEeg68E%^1Mjp*69Be`kw^j`DWb;@k^xtV@JbLnxLxkiV3
zWlRn_Ou0O1!vUS-M4i6VUiS5S_D+9&IX8B$*!QDfqOLtkInsKDQT>~kUF3Oxu_B{j
zGhHo@x>TkqX)<#D$MyGH)&5MatC)~E&1R{}q{#-eBO_10-4>I#JyloNPF%tK>+Rf~
zG3$!ge2j3K8gpgdy!q=-e=V_@-~U|SzWVdGvycCOylr3i|KIZW`+i@Z{rP8&m3rdM
zIko>jnX&9-cz*cHS@HR?dh*2=N?)^{dS;*<)^k?<nTkcqPMg{vhd=*z_N&_c_u=jL
zYc{>mWn1y=oKlwCla*4Ot1>psJD0F^*M9*;Ptj?&Q)LbxNVT87>72paq9eCjcl^#a
zyL#R0&$>>zKcNybX|uPl4nF<9!ft_KkC;B!iriTd-+$enw~$@yjp}UO&^qaw`!8d+
z-QKV^<+j<qZJMF0k8T(I@`33_wsQRQc}tGZ=2JiS^V!CnT)w#7D%(0eXS40Kd-$Fq
zd9}yOb!+o-_RUL+XK%<^TbX41HqY+;LM>-k`S{ysT2iZ{l?9L5Sf~r!-yUgq!OQGV
z+4}3NYj}#@Pg2=q5NR%VQErZ<PZWD)e5mu)pLWIDt0wGTZI--<e`m6;{{{CxtzL<b
zvUl1p)(AaY@kREH%iH;v5*T9NZA(piWA;>^ts%p?Z-K}AX3j->`YV$f{z!311TwQ9
z4Ae}BcUyA7Bi?Aj&ev^ajOLHeUQkO8xW%x(Z&Uft*!6Sx?N%+DnRxAtTIqzS)&Hul
zlr-|)R!ED!EgyB|SJ%s>JL=X?yR-e&0^W*U&)<G|!&uk3LjHVPUroRM8JDeLz1FL~
z&drHVf8w0K{juuSw-Ryk6BG99u4y}w)!S*hz+e?8^X~pk=R@zxHNTv4OX@iO`aK7?
zf@90aZ``ZC)^c4@+U&u#r)v7?x1W69Kb(4XW74k2iMg@Y(=tCYx@G5{eyrJESY|h$
zpTB>(dA|I4et!P`=Z}Au*vR*{KmJ%@b*BAifk)+|DVI#1zC57Wt+GMDaK>IowYgHS
zbUfsJy;zD&CQprawPGr`>~%?1{gKz9M_KzgWK*-{+4VMAF4wzz?~=ux-bjmWnGv-+
zZS3vs<MX<I#qDX`T{+w1ROKm6PT}ZO0Rdr0rx(#@^j4h@n-`kDbWK|BiY=<LeZ`wU
z&MH$@Z#Fo%^;M8n)48o8rcUQM-dq+9DL$+5fW?VZ$1g$gWQoPn$CZAEwx2HAyY}?w
zP4}*ZZFl0W5!mCptcznw=Bdb_NiM5n?PIP#&yF<B@yQZ%+;_3`hwaUS<|`AE@|df2
zU$LYKrTK|D$c4wwpa1#kv{wDoZ<$XECx!+a8!ZY_(6KZ=(fu-NjYyLcV^V0;j-_W9
zbfVRqp3GMDyL{xsPlMN9QP++wFDNQD4Q4-lch8+o`s&)6C;h|h#8^3>|LpvrzIe4q
zqKs?bq2-+YnoCdj=1uc73S?@L*|tGEuytaCjD_+27V)d)s(-bXYd<{y;r_&b;n(Ni
z4V(0%?e5W@h$EL>=Dh7F)f6vPQJ#HbisKud7M7&VCk@a3`C+i%Qm#1gSLM;qZ=V#+
z`H?VfKKE|F$i7IINv~rrZJeWS`*Ky4>29%Fy{D01pOqNy-xb##+t^yDs-<Xt(8Z*E
zrQ$ml)wtkYDs^6-^73y#K0MU(`fXa>jO91~7R(6t)AX;L_x$OGHQAbBtoaL)j_GK+
zF3^oCb<WmwsrdZt?&HImr!*(ZJd0F0`TpZyfBXOMzSsL){&H6QeP6K7h8*eLmHMo&
zOD7fA)zmO(vJ0(<yS*lD`t2aEO5^kSeZ}WKt&Xp)o$<N9{?}*z`d<%s-~ap0eE-k0
zpFeM2wMsH#&Y_!%X>RW(F1!AW#l=Na)nNM82!}gM#I7x@uXQcBP&)l>kYWA7`HT~9
zhxs(tC#LO>7JF|p^M?E7d%ZV~JzVfU{aWqB&4%?2jhEe}+sos-O3V0b^fQleA4tD$
z_W4~?xp9v1$~J)v_Z|6im!ghE^d3wr3_oz#;P~ww6JMr&P51l6Wp$#uEa5dH$CoUT
z(ChX)tPfTgf8u2j{jM?NmegLi)n@uz&D)G}m1c4sxR+73afaEJ-J6`{c4f3(l<kOq
zQJc$jtK|HzDSKvd>`G>6xmaP{B9y2hS(SV9(|)%+);Tv1ufLa_xqRNyi_^Fgl&*6{
zKA-VHIq(13Z&nvPGDXs_i%au-kMJ=+(XxLVx3os+57r$b4F?(hY&n+k&yMblS6{es
z2{VsW{Vv1Cp2Jg5=RHeM|9s#|<kzQC4_3LQ_$aIhzPdl8VT*gpsy$0wo6KfCxVt#n
z#m7yhBGUGK?UFMhAJ^yciA(Wt`W8NKU=}v6kgeF{vs7TUSdqkAht}BF_3cHA_P*Ki
zbmJXizw)yOyj?EcwUD>1-Tj~O+9i=!w@$8Rke_WWxx@RSlvn<-AN{Z0Cv8*87H_(^
z$mVUR(Zg)JSKm6*x4o5EUQy>9bMLHpX~p>oE&>L|8@mN&s5lB8Ww5byz2~L-?xONW
z+43Dex7l=~i@tsc$uHyL54L9#e?Lj(xrzL;!-aNp`9zC<KC7{s@Av%k%bI!o?Jo_^
zelDEHe>tSPx?olPbQaIKVK>tq4KFB1bXR6>Y;oS#-P|ebCL+J(g^AK?zJHIHHyZoR
zu$<|5#`o@(1y@b4^9dTKGalY^=gY4z4_|)zxNFU=zFl=%e7m#zzO3;$*&6y-XmVu^
z<BMf=g|%9*KkvHdl~L5O{akLvMCn6i>H4qdZMBG3T3NSY&4Ck@8hO92_#KJ7)gA86
zrN$JPbWYg+xXpaEG`;O#e|_BLX0Y}s$FgH-O$-wS6gi5$GQBJ(zsTKNvuEn*EZ6K%
zpYnreepfB~wbDXnU$j=h7a5<1Vy9DcHg#;!J({<DWrmohOZaE$Lqd`f*V%$YHZO|f
zVzu?m(t9e)!NPl8SjSK7naZRaXDvHZCN7FPV`3-quy38CqDE?wEhCT3!Mt~pIW4Sz
zY=6)9oqzCVA%pF+mzy<{W2a|Z7FaDY*_h(CX-dxAxsxjmRE4j*t{2;zsrlsnvr4bY
z7XS0MzVBXFFx`c($)#@N6BVy#=G}Yz)_LnW1|8xmU!!B@n>q9H=OYcj6{nU?jh-2P
zuG?Glk(sVps;|bQ?Vc|)rbsB&2%NFpzx3q6SNmqIf7Si->>p3le?c3Hi$1jm%;7#f
zqatZuF=O4n*#Ecr?`NNj-~aoK<E4^VK1<o>FVD_y|NPRzUvv4CqIq3mH7bFjrbg~=
zrczta-??7O_@czhueeVr*KS$-|5s}lu9!N%zV83+`?czm_WgYH*0ExF*t?gG&N>%u
ze7_W&Q(B<Qa=d5WJpH9p*4zEBnYTHzvHINQ_5Yq-fByL6arwH6eKAd^w&v<fDg_;W
zo+NjJ?Y+Vz6`e)f+WyH+<WCTj>e(mGcCbdhms76c+t-(BOg9%Ckl@YO{pQBOGKbiA
zFaI^gZk+reKk9I9;*Ijxdw%}PNI051`5T|kJ)O+tYZHGnyt}&VT=w?g%O>oaUuSwg
z{r122)0RHlmueQZbV)n+I$pbrU%%T-$WJ)Smcp#GUSIY^kHj;deKVtKZ-2;hm&!N#
z&1k$qGkt?-$c?6VC-=NsyIO{QgW?32C2Ly`Uw<c|s(N_-m3#H~dnc~uyU$@NlB)3b
zg51`ZYneZ7+`lSh!nZo>ySx3rznB(yY4iD+Zx=mrIP_T{A}7Q)jqefH6xHHYPh!_Q
zTD^TE$J6oe)uF8TuX&FH@7F$B{r-^Q3snb(y+@WmV6|9$HsW^F6FIFH>jdu|Y|w1H
zG2=#cl54i~9<9kcPRf|}a2bAU-|Y5i0ZW)h)SYLn`R3~P4a@8TUH4tvQtlMWp3*8Z
zv-ZjEs`nYQdK=95b0vu$c1TQfxVq(7c65DGJZH@=_2=KFybBlCu{&au*ut}C)zwTh
z2BTFkwrd>~$l92{nBn!lztu-|u6!@O@x16tO!mEFpIXHDZQq#Rjpt;!bK7ahK9*dE
z$6?GfcWA9&EFhHV_h7~QhjTc5lqRsG2`JCJ&9Uk}OV487{q57=ykjgef7iDD{noN4
zKAg`iW&PXRKY#pq_VMG#FCTs^v6J_Ie)(mM73fUij|DcqpXU_&UAFOc_Y@8i6Z2?O
z$ug}y!?68D`wEwfjLRnaNSvR?ciZ3n#BG^UrQ~yGo^{=2-OM=AJkDD3&KaI%DOx#c
ztr1Eu#B}3s#O{rmU-#?u{QsXmw?BXFTfKYBd6}H&rq};ByjHR83lcdV`t{_g<6p}(
zpGIwAQ9GXf=}n&Hy|`C;_jxSL1qD6p?G0^we0aAlXs+DVSvKd6Pp0tG_72a~NmGLC
zWR^KI{eJtisPcbFwcf6_m5O`TPc@ciIrF13_>^U8;kDPh>UO*;Rb6^6i))dutW~1=
z?(E$c-#IE3KaQ|2IMA*jywvlPPUb0v8DjCL*XTW9isQJol%e-|kg#3%uBb1K%r=RW
znR4vPzx;~h2-v>Jb*+Pk(Q&cU-9N=nzwu*@O4}6aa727lw9aWq)urm6_V0*SYb==e
z<gaF}&Dj|r=EZQ!s+~LJ<T0!G;mR8<UU{3Rvo9}y=6+E3w0rFQ({JAf|5TDsvz&Wm
z?y31#rUxWmoxMP*w{wZ?A)&YJmb)i!cGghcKBwf=X_LTjS6@C0x%5hPcjNP@j|R+f
z5%c?AfBLp+krlK13C5ynH3Bma^3O9Ca^`tAGgoMOiqf&PJ^PmbNuR&Z|I5D#Zwmd6
z-!zo`JpcaxJu`H(?(g~g=3~KyrE8>`bu1nh%uw+>vp@TVNmSciKS#&co7TNnIuz{p
z@VS}5k!3gc|NOL3C;rd#{QrM0_rI^V|Nqk9{L6<5XD&J3E?3qGICJay&jYfReg*2G
z`*pK5t4>^go`2sa-tOPi$L-5cf1Vy+^Y7>M_?oX@!&lTi^0hrQb^1c<6<!^|5}Jk$
zi!QcHh3?G#f6+{rVaM^m3Vlr7*DkG0+_i0?V6yV}t&c>$B>nvDWNak&Hg4*aeV_03
z*1U}K<J+v4uWYNJRMGzSbHnF*mbriXPDUi}-pSqf>qF9njbCa%8*J=W;JMwg=vvLn
zLXU;X8n?G=xg5E|x6mr|z=`9pbML#g9$psw)z@^NzV?NrzFU1SkN#MoCvd3j;q0o^
z$t^rhcLip;^QPxk`bOtvf06uN>RW6kB{{izt?;iftD?~I%R4_V+Vy+Q8<C3#R<kNw
zP0c;>JV)T9+75}8rV>5z(vM^`vN~_82i)~Z4#~Kt$kBK_Gx&l-_HBn3@?nRPPc6OP
z?+{*apxWg7g~^MWH_0wG{i<jEgHPgK^eO|V9AEeD4Qrg{CR~a>a!l%a+0tCk2Z}d&
zclKF(tYSAwGdFBgGk>O#5Rr5L^|^GGUHilCnD;h+O%?x;w6uNSnZ8SL=5N?4g)Tjt
z_4fbGq-~Bt3TJ}07-R>U>aH>{TCH<k@Zt9F>wdiWbW_J|pZ^*;Nh>=U&+^?%3eztn
z%=!3rwc%biUyE-=N5iEA-rW7Vc&>`tTj^_Q#!K@Ic^MU*lKXqrUaay^HaY8AV|Fkk
z`aoW2=Mm?1r?|w}Y+7GU5&WGf#_pVTl;7Zz9q*IAhTXq!H-s#jb>La_fd>ppjU1Pz
zEVG>Ncl<bL9n0s(vyUG?{`u#_5}R}Fj~|v;$@VS(Y2bf(&T>n(Z<CCccd9lEZ*Dlc
z`<%ARhUi-P`$GK7o^U*qT<fvrb$qheC!->RB=#+SM;6UyQ!!ec&`?;KHd9)%u(8EJ
z&+?$>$uRxbpN~JEf8V~oX4k!Ic1h2tGrm8gBlO<2A@b&lsWqni#r_9O<KKS!^DaHt
ztGg7={jO`;@V)rBtx#!;!m9ZJHj{JN!)yO_gyc^C781Amn#NnR4L3RVKbm86c81@i
z%?I9mOWa$x=Z@XIH3vlJt`Jdac(Gr><ZQ5?WuVi-PkYv!H{ZQ7>+P?a(^s-BcKy?<
zY)`AMUz4nx<MQBvnBemjZ(4(&gibBmT@=Z3?2PtDshZCZkMpQiPrc&uhwat2?8$E;
zPlTMj)1vxkVyN@;efOr_e!4A2-!xito<=}!|Jkc^Jr>TJad3~oU7^g1<r1g1Sat1|
zYGgIhossvm!eUP8)PDQ;d8P+vEDemhIYTDp<ew+M%3c3Y`_$Rzed<2fzWsXhXV)n|
z4cELs;mTX~BIXH4?)rp2mTqp}s<LQ8?A<Lhv|9hWN$><;E`4gjAH-B`xb^B@ld1B;
zk}=D>Kb~LWr#)L;LC`lY?W?4Xu<sIM6=o(MP5&O1DXi^N_Mcj=Z}6Av%i<658nq$2
z(%4uu?eAZ^|Mz2i^uqYse?QZYPu6_#^I6}bcLzQiu-DYgc=zSr`nWx-o>oQr{H?Gl
zI5qnQgTlnap8I}2J3afc`Tc({&)@%RVEFm||6gY<{hc`V%iVoHJ6#b<*#0=c_i6yo
zvl-7Kb3gKbIXwSgP2{xr`p;KOb&qbT_Dj;V>NE&HYch-9QoxT#W!kI*jS{;8*09G1
zPPy05yt?Cl&$c(O?Bt%#<JmnszGjv(`vq-7wTfQ<|5^(5EH=q|Ubs1O@gKEqOZ<1h
zS3d2xTVLANLmQ@7WESQ$e+@ADJ>%e}t4*RGJp58#99EDp=4gBM@#VCYCzKvsROi{H
z?%y+G$J#A-kA|C`TbW;KykhgmHnromD-wR~+_l-=+*>uYg^z9BcZU1<%6cZ#{8p#j
z-jgj7wODvX%>}t&)=WNAhIhKN%Wuu;eVypMyLsKkw|C|`L>mRq{}$Hbe0ynW*4)xX
zt_dG+?YvvO_s*l|>>aOPc27U^MefAEF0-?Lzb4$e?z`gu92w3BUtM&I-8GiaQdnLj
z`+&JxORJ`|xx4GgrSmsBSl@Zc9Ne2wmR8QTxOT49$<Jzcm^z&6qh7vWd$Z@mmfrSS
z{TVkWr*t^xosrq<Wd1~>>Oe8$RVDk5%WDp~&fw0z$dsm=WgvTK+xwK~-;eIy?!ME=
z%WSuw^Vv5`KL4KEv%igNO~<!@d+rfVI%zA@_ivl{Lho#E-1X}{@ijF*du}df`@_&a
zxiG;ye{CJRz+z9mSs5!<-L<)M)kn!m^xN4vDk@ewBI_%c)EEVCy<v1<4qNcA1>usX
z{$>=o<?ib*j40jm#+1db@KmPVx#OQ7{wb&et*1Tv__jHJf4-a_|MJT(P0m{SU;bDz
z>$&r}$fC5F5kf8pPAUef{AhD}{_2^WeQ~XL*y$IIpW=>gm3CP<d)q9_#qrf|V>d*J
zth~0I*B~iv(zkaiX~ruG*3OqmT6g5hwjH|L?&+PM-tTX^#?$<=Iscz;&ytq*8l}3d
zzI^uY$uED4{ub<wyH={171x@$`|Q;Bp__GEwfO4N+6uTkzRBr%Nr>3!R#rwkCU)!a
z=Qv+IyrFnm$$^qHlV--`pWlA_a_(L+BR>1$T7ww@(uN+7OrAcj@tN=`<XZpsPc_l`
ztvsgkoOXKrb62hxu8nyY&bIcu!ZfJ{rlt)(nrA)L65Cczy}c`LU2){mvnQuNGQX@O
zQkmz>Fxh&ixT;d`<Avv*Rr7Iw&G_cU<{El)bynoHuT|0c=G$j<H!})uYr5roa1PHa
zPKMK`witQO+jMYd_@|>AXU*j7m@9nV-D=LIp9LB}D}QG0)Dq<8=2UbynAv<b{AcCf
zN4w^nDE53kr+(gxMZYFT?z5|($zMPHnfjyeufmIj)Rco+x9`mVFnNbw=j#buYu|V#
zvYvYAsP1`EZTg~J>n5hh=Q|{Czo{8qCcJi)xqyR^-qJ-eUS_9Se5N=~K5=A5<K`3M
zpFdsw`PRJsuz2M^&9n6jzRbPI#NIMte*V+j_WvuQL-+sr)cskq_0P|3D^^7ZTFX7t
zRONJcIX!v3yXE1YyzO7BqFXMT$R7BZ!)fZK+97v;&cAs#!v5#q|M#K&e(h<`e^>9<
zo^=<joVnqO_YJP+dDpyl>|V9*XjhnMir32$tIPiO{~y2q|KTsc{hx34|Ch_(|DRLs
zRdytNYEVVP*LL|%Nlzg`Ex&eK)s7!|75}9!q}l(>Q@&JZS-ReqcSFe@w)yoROXJUe
zzw^Ft_tf`#S)0z4?ELrf-V#2Enl6dCGxx1sz0s1(E&k%U+}*rCm0qRDywBZIBP}SH
zYk9L^($v%#^Ii8$T>UsyH@*Kb|M0O=k>yvGh&&50ET53SI+Shg+Hm7VVMaHe72fF0
z(7M~)R+M^N_U-?!aE_(N1M|hCg?8~@f8`)_e?`N)b0O2EOWm$tt1(=7^~1Gti+KMd
zQZD`zXB@8WXIgL55T&{yU(ADN(z>i{X{RLJZ{ZWA%F0*ooV+qnb>mg}QzzDF%xbz7
zV{L!#?5z@^yQQ}x#120%UoH1wh1tO}>l&lSm5cxGd$g?j?!sGfjyxMvC3?37-Z2)I
z)Rb(l>9)=*kuuH~Gb^8Jz~69#<CWSJ9hGBBj}9Jn*t&RyM~Yg*&S{dz1V3yq{a39c
zTC@Jy?VQ^;%rc%N7(HIboMrXfZ0Cyyd<NYKx*ERA0|E}5VKoRf`|{$v_`T@;?BBob
zy(xPrL+9iBN{02@v#zDo>%Ltg7~y=q!TOnQ?nNzI?+5FX`$C`c^M2iUKasKa&*mt$
zu%&COgCmb~?7zq%C8K+ETHE2LCk_d4E%ck(awW1ZHPx-w-b3{Jiu<~!%f!vR-E4nf
zeUv8HxMa$5(CGv}E9z`)_f>+{?Vp`({`~Vx&>Fh)$AdizmrYroY4>DK<f4lwO4lDa
z*%>LM;Jfw8PLCCGpKnFoygO;<CU&Wly^^Nwd5aZ}v(z2A^n1dKr)hao%<oRP^D;CB
zv+c0BdxM8(qrvvw8<;uwO%jjKm$$d8uZWtz-qrT&qAMlL!8?rwS%mIR<rbYWG5)HZ
z&gtK;pC0~N6nlEtD&Fwtr+;k8-C)eML*j2|;r%;y8Cm+~$<Ok57yZ2{{@+cv@<ykp
z(euYs_|GtzM_&)hJ6#o<8)~j3eqSZ~<4NU{HfJv@$_p+1v`0r?|Mc<I>yGyNtnBAH
zvv*;Cs?Iy}&p!iZfAPI75hASIHD!im>c*#0C4qWV98<!0x~CoBJ$i4SmdxDQ%`DFp
z7`>A`?*>Wyy0W8cm;CXr$UyB^T2WWJGB<7rYkuSz^*Zl(?}?y`Gbb}>7hZH~adA5G
zD(At5=Edi>I2?JGIm^G(KJI$=MC(Z%F3J(9duHCf?4+URvZ!V1)8uphZk3_2*W#J?
zKYQuE*xpKa>W3%q^fV8yT(LD&EhcGgd~CPwud}<ZPhgpJQ_cHWRMfW=n_RDDfvFxk
zwob?2FsK)8a60+=zyFNK^UitPyma_#o$l-VMZcon<S#3EmRa=c+KVg}Pluqd^Y7Jn
zY(0^Fe6f|NZq$}FyX@xp9e-@&Yq|85LvDAaTHL9sojO?-^Uhta=?L4~o%O2q*uNhI
zKW@j@mhFtG|NVD<-S6A^%a8nj^nRb0=EmC+$69jbV(eGhuDQ^#sA^~I^wQWZD<n71
z@mqfR@ZtA$KOXM?cm4mT+5dm=`}_N+3HHd&FXL~RnpqRovGH#1!^4H&gQlFAF}Kx4
zE9kM^%~k6oF5dmS*Xzsf;76)qk5UWmKj`k3{SrL+jCktVcV$`fSM?6Oc*>Bd@BKFD
zka&vseYX9(xvp>36FbS+w|5n*$kVNpwEJ%9_6sH~;Q5`FckF)E{34&LNwpUoCnWwm
zC?N92@EiBT$&wp)7Oy?6%~F&QV<zZT9(8C&L4VBm!e8MpZ_kc<cusT2Qm<<(YumpV
z&-|ESb%$w%loFG|_JX`)`W$i77k%})=hve3{gvzA71`?#T*$iRcDeGmt=q0H`3srz
zOXmB&vYoT?|Mib+O}=xjS?9JkYWw7$7cS<l>@fXvv!<K**j<f^BN5wtj9#DX51-E`
zKac;*(VqNu{nM@Y?|b;M_R`^Z#&@-%AA1$7d=$7tv1O}^gY-k=Gbd&qIdLQ5kW!w3
zvSD&raGGLbx>&No=Gol^37@l1O`2?M=y0v!*46%d_F8+?opOsT4%}XT`LXZ%#`GsV
zlIra|dPfvQIGiQ4x0~pt>7}Nsg)Y#Ta^?(=?UVFBEA|KQ<aMnKWZ1kiYh(TXyMJ5X
zEZ?QNcOHYUw7agl?z6|y;rC}>S$ibEvbitz#_7CCn`-%Xp5DAWiC3VFSuFGTf-aS;
zZr^tSe)$52e82T*Nv_yhy;#<3{j=n>`1-RejyW++2+7U<zx_57^PP#c?yXWn7a7@t
zre*pafBf)D;&lGw?dJW@+dsdov6}05{Bh>I=b7`KA2#&gY$C_!(lNurvW4N%5rHhG
zJ3l^KXl;&?V%pvC&8{t6iTS6;XPLwPE0-TUx-9>9?pt=_Vq;_Ov(;bhvec9mSBjRG
zbtjsu2+o|$nfg)s=2^Y-)1SX?UtW6ggv{H>`q^Cll80ob1qg|JbgGNK&OiP1@7G^n
zR_&a&b;a*IJ-6L$rKfjFpGr_Rs0_KAb)+^T<n89#iuAnQ0dYJzzUvJA`H~blKFzs2
z<-|&z)zhwCE$w~R;j^~r)Fbs2r5P$S`!5&TsZPHA`}EVl#XCi`pFLAZTe(bwi$~eg
z$?J-I+OZ8x<!}0LuG((ed^qI#(aW#QR2@pT7HMy*GV)1C)MQ&LyFP?h(sIrkjX8&n
z4=5e8b9;8>no>nc(}CAffyJ(~UYU5thU>51DSbHd$Xu1B2Y&64+O7Wejo-7hZ7*hZ
zb4{A4^NM@tEa4-s1@2zE{g&z6oaLH~&sOEVvC;VCpt!T_^Nbj=udbP{pH%KYF1qsR
zX;%O7FRVBH^Hwkawa`F(-K>*Vg=@MsJ_amgb*T1U^zqtqM>V$RTdE?jUh&<nIH5Yh
z{l9qfzm!8q1Wz3DD-h%VV3Phc({Q(yT}=M_-hZY~+s)hSbvuLqU90e%P}#(BV&1gd
z{O{}8KKwi!DdD}5-(Ng5I^25xp9+_AO}nOYu4bJ2XnW|~h)+i*aV)*w`B0_*<7}0h
zZ|@$)|9Km}dfNRzAEw{`_pE%<hw}aZewh?5`ouJqvBbp9y^>|sRG+Oe`KP~bJ2&&{
z!A!N8>&$EG_Dx$9y=&5B>!&tH#l`cus?<YOKh4;;;@Iv49&yHlGddley$Zthe;l*P
z*dlq`u=@Y1|9iZSuRiow@mGns-uGMkPdLUORJ*X)_tw6HR*lvx53Aji``NjVU79Dp
z<aFR-N73^VOxZPaw&s<H@G~22yBM>!r+sR}m-&lU7tgJAxYlmPvipb5`#U^uVo&mH
zW2n!a{r%dj+<%LvTTP1EByrcfCq85KtSZ4;hX-dQlCum<EKJ4Kx+Gtpv0Q$uQF;!O
zR`0eeZl`C+YgXO(Bif_OT9%@pyT9^x+wI$iF<Y;g8EoAA=y#9CeecK}o9!O#GoG@=
z@t=XBY()JXiHEoM-;ZA1mSUu~>DSETbHghpb0$pI>Sxd{|NeX8y8Z<h1*h+v?^u=0
zBr)fN#2XINn)`=b>m%KN#4L1V5j$}61Z$FrP;FY{q)y}69TvNdkEcF-!ys*aV}_&5
z7W>TsW&7(y7OY)cDDIp%`*iyOfmM<zw;N=ZnzEfbo4}p+Flfq5#ZyZ~*tk4+(*iE@
z7I(auGS{v7?S|_wg=XvOtbP3A^(D6EWjQkZc?Tuz9@J~k+`{`!@<rkNtG&1T`c_#!
ziOrbte(!-Bd|PH1ZsdQfEpgG=pxMUle5qpFEX4zM8<uGuP3JI_w%Hnb`TX1co35<6
zBXDPt*XL~e31{NxsY>^j3%rUrzkThlG&e7!^Oql%R8-i^llSZIf8Kul^UsepcJurA
zpCA5MU?U5<?4|#*$?`eDvsC<bW?DK<x^D8V%VnX5N5dAAn^w_`cLOK5weEepApU#*
zWw%FPeZxa;UXhmDc~VrV?5xrcF@<DPQ*&kiw4|da&njk$t)97~Ge0kUcWj>j_18{E
zZ)}@tXzEmYMw%yQQit)(8%J|=PhbA}^Y7))pL1iGq<6X=ESXYtb>(mG+{E}*Grz~K
zx_&03F7d2fOUK*S%F6<nm?m0mTCOu`W~*z9((JOxJKk5PxEAow`hE6fqRJe#nf#X%
ztvV8a7VO<uujjvgQ=9yXofBq1n5KHeFUR)q!@hr6$`{Hls#+dL74>F2t@%+Dxm94T
zz+;x;#aB;0vRT!W==3yGLP_M{`@U^*g6|ft>^Oe-%$YM&RW`4jxq9X8thgQpx2!B3
zna1p6r~E=}M9-=@6&Rb&J=^N2*Scnh^$}-}sWTp=?%iqmDbQ&7nXIKh&t-)>om;}U
z*vI4`=eo3Y1+%XFT)d`!)9q?qb^ZC1>yH0<a<upGmItEiPe1XpOF4D@%%&ATf&)w+
zM;eDWOo(YQP~8=@bLC41&dxU#S<`Dn->;Zl_~ME6@lMGp9!5($gQ9|dG1jTR*!8d6
zfBJrVI|cc(f1}HiS-Y3`^z@(Bjh)Yb{O}b1K(6@p-PUI^LTy=cLM^%tc4*uU`(3fq
z^5K^aayHzr4AO$<2sWNPk@9T){;J)p?oIo2xc&S8AGhs`=KcRS`}}5(om*RZFS_yW
z2wvc_f-7j!rdRF!=U>lR_iVcB^sDQ)Z@SYpd-B_&*x*yPt5wpJvZT5M6FU|}@CqiX
zO?<2Qe#*(VRjKl)w3b&tn*6`GX?5I+p6w@X+kThi?2l(Uwp+K~`plZ!N8hQw*s+-L
zjc`fyq9<*?UtCkV!+QS>U!7FB?t<Jjvkzak@0j-Qa_EEUS8i_=zTWxgoy;e>%6Fo(
z`e!uG`tEr;;P6keFYlxcrS>&GGT%7)vO&$KWv^czzGJ=B>9On^t;CA!^S8}EQ$A_Z
zE{nGB9U+D0Ogt*O^|=MUhod=i=l1XVdW@s_;Fg{0S_ji!x8B~yT%!KtQ1}09pBgqu
zYVLUL%O3Lm8k5nD>^G}ht}lv~EWgC8{><#{R=$?|Kl(Eoro39-ckc$TzE@$xJJ&n6
zC$mYHitF!sYIpR${qz^l9=9c`a&N7bb#&i&^YQ+<F>?=Jmi0?XZ<g-qyxJ=JM(JIa
zPEM9k+0qMkg)A!=c+y$4mq~qW>9qYY{l%#}wt1gK7d@Czl)q8pfg;c9r)!(IUzIB?
zikaaW$+FaB4Tp1R=v3Z^amL$cT{`o4My2%C>eQK+k6GUO*05h?y;P)u^V=tEk358V
z^X09N+TXhK_{F?nDJ$M9VVl1dTvS=0oWfrJQ_QAg;mwXEKC7Cf1n)gmnsv?U$~n<L
z$G`3`OY@Vx%6fDC`S*{txv$4ewfgk@$>X}j{D7~0AvsMm3f>qNpEC0OSun@{`QyWf
zUp_nwnvQI5zx)!kT(^Ju;~E>E&xv+tDrXhXQ}La&$%9S3fXjgI01Kya_WSjF_s;&S
zQ8Dx3i~nDGpM5{byKS~X1@9um(9H>EZ7Fv>uCAWW&fwg9A${5+O@j%RL95bj<LBg^
z-t_0o!=jy0nkifPw&|X-x)){UmG`Lob#Y{l?)2B6FF!5XtGoJGWLK=6yE9X*lhkf*
zuNQ)ELU*3Krc(6#ug$HBJ*BJfi5oGUG?G>G<SfwMb|)`2(73mM&gw|R;Qj-97d$O^
zbhyw?=J{chi;u1^e}4P->#E4LC8}DheoqNV|Mu{><XO`j<{5j1ZWQIdUesW0ak|9k
z^1}mjP8D5Vw2rOZ(Eku$Tx<8s23v-o0uQgP?y&f=BEMRDA=8;B7Z&BEJegCuS|Y1E
z*w9qRV6(*ri;Zg)%nMH|B&~@H2`>_F7dBx?f5MU^n5I~Kb<yhX%_^S3dRYk`LI#VZ
zU&Lmqoo8a7lAyLsiqq5IMc&JDx4lr>B9?0r-)EVNs!7ktktu0x_3V&LdDFEbQLFdB
znYj@zDvWQcF1togzh%1b@YQYiU-P|B2~IgCXUM6#k7<9n`_6gOUsfzy&uO1ksi)xg
z&L@F&@BSE}|2{3}`;IfSXHT8>+V}d`X~ABZ^OhgBc|N7<((&c5{q5^DqWFyG`&*{2
z6ttEQej=-&Jl(SP=hNM(y4&xsvH$<&;rIQ2zfJl4=V`Ljg_ZB?XY5N2FAml97Cbb^
zxMQ7nB&U4W-=Bxy9=`mxOn2j5qihe&qBPIStgJb|c&-;R2L4#P>h9b*M<e$voAv(L
zp4z4k!4Ii(&Ax|E`Ic<A=RI?~-CM0A`jxzQ+p`X|A6$_yGsFKwXq%d_Z@$%@2fm*^
zANsI8dHRm`TQ}_!e86_{wFG;_+_{0B9FskI{kl4%&L7U~3cp=Y@J#OTFNrguuakoJ
zfALv1@zRdDY4<X_eDB#AR94jsEnWTVg!cEg`(MIS4jx${_CQbF;LWor36pDi?{d-v
zRu+U#)B3^M({cOy#%Z^Aef46NJR-n#Sc?DBnFUjo!{gt~(A@Iz&JLbi^EA?u+_p;<
zv(IHTJ)c$_lVJbiWp&@gh5LRoxXYZ7xY%Yj;cM*XTlcpwTxa^;V*A3`OE30CRG00q
z{=w#Xp+Nt=;GEjBsNb%6NskxK&RzH7$2AL~YiAhK)6(-x_ef|L7X=%1b8g-lX=>*3
zRpZivt`Dy?lM{=@h04s+)YH?{%v|OzexH)^Vfy*{&$rz!?`X{`4dge@u3`QdesOB(
zjPSnM{yU~D(pq(k&-V!Xm7LS=o2}>8ZlB8Y+E9vf=M(+Hj>mF~wqEJGtDNSS(xzy<
z_NVupvOqEGyu`KVlysYv?_Don7~A_*%hqb)yE{8hW-~ozGC2G&<C7zwRodU`r425r
zLi_ixcURqLuy|3?ikD?ti#DEnU(0b(E<7-H@AduZX&VaWeKL9exX@0H|9N{d?}A#J
z`RCg&AAVV3BiDEQr-A=x$7Pz&J%iFbm1e36r$2I;xIVP<+nbN&^32*B7H>|HsCx6`
z{g+O~u=uGzZtU6-`nUGL>8&ijM?zlTiF?g(|EkaJyC-8Nty-~k(dC?VF3&dHylW}`
zG<(tO_UF&{@2Rh<(p$BF?|4ZzZ&8e?+nb}OkKa!H7*w+J-n!ROUV9%`-nY%KSba9`
z(i5}w@h5XtW!@L^u1~$=YP%(Ud5}Vf<)xm>M>4%M(~GVzU2}R@O#Z54I+LQl_;GJ7
zFi)DK^ZDh64;6MxJo8V#{rmOTznwAHXWaQ7nEbrR+U?oZeVgh7{VJ4CnkjS&%Q*d1
z_*^-!-&pcxR@jv@n`R26daAuli&(qBn02?YZOd}s;y~8ihh|%Mtk7C`^6c6bD}@g)
zZS!pLTFbd*^+&TKOog$YP5NdBh4)NYpO&rJqmrP$;%8Rg8-}A(j`nVuF>~YRmj!eB
zk4NZwoW1<j%A&k|;tus42fLkneU)`x^B$xZ2U}apcBp=k`g_?wR3`Ro+m*|PTCw5b
zs*4KVhFtfG4HQ&oESy!cSEPv5bC1CpiIAncZEfF$-nwzDR^dsHj`;Tc`KM>!{#;+a
zyLRSt`$OW<SEqVB+7|wOzW#KHf|}3I&e}ZH40-i>m!3A`^f}8aPgUx9+5ZET-af@i
zY;BxAM}G<#=pDPcJo(qJ)!V0Ueer((|I^>=|M^Zj{4l=mqs8Y0*WTJJ8IFmC3$soq
zrC#Kj=sfFEh>ZODr@Q&j-~apc@a59jt~T?<ZvB(2B`r2~gw6jVp(LsBnWaE8>UH(9
z6TLgC_b?hs@8IeEBe6;Dd%RS<pj|HepDzt8yH~%N`sv=uHcc1pr?UmhF8E)vjc;Wu
z%r?IM?d!3CFoiW<x4XB+KECnxHGBBF{UMvbu$s&fRCE-XSJ9Tg{PB;!U%2LMD><Wf
zJyXJYQrMHLo;krY`Q^9j@?PE^$HKgJ;yZ(v(E)|GADXDj++3X>aD!9Jc*Rszvn@i*
z-_G^Uy%J@*ll9J#CqidGHA;oMy!FZ1FIjtGH4A6mzZaKS4vXB|v}9YS{bB3Q8D>X)
z-acrHxZu0x>Zkoxv(mheu*o#9+G*9>sLPfyLFT~rWlYI@o9_#LOcvUcxqWZ;kK^Yj
z@qfENk?q}npW6pzPb`&wDBW;b_O|qiuLTRzE_Th_S{xe7(j&(<W42**$R)KyN@>S(
zmRW^-PfL?M`u6MeH;othQnZXW?zWcp;Al)ySrA)hcV_N(N5|Va4Rb82Bpmk#986O@
z#lcj!qD1A`)y6A=f}y&uXRpLwz2kHC!lV6n*DMRz>&DQX?;BTY(|>7`;Hu4ME#5iL
zw(c?9bS!Jj{C*V`p1Z&KE=G3$%3EFbo^eHP>5bW*X=xJf;YU`?V0$Gr_uUTW#YM+1
zs(=5;zkA;Df3e#H?XIuCp!Rp~HUFzV_ov6~ZZ0zR+PuVY@v9QMIp??TsIRb?f3E%U
z#{vsEr^NZo<~XItFa6eOS^U>^k;IMdzRuJ0dCszAJdgZVnB2uXDfoH^ud*cXWK+p0
z6D>pc?%27@l~aEHnjkODsIHY+7k{0<^5}i;vUvMj*%y~g3n|TJ3bE<*ew4oKwZHuQ
zx?ey3)M)RD%>DJNdAHqUnbT1(igxOmn_gWcy&|FH%G)&&(v`|8Pj^q=$LPek;Lv;X
zIvaJ#zyoqh=AZX{UiE2y!R2$BzLE#3qNktE-MhAN%a<c-lX;ycxjzV-^`yXVj{kD!
zd6^q;pP&BvxA)E|W;@FtWvzQv`Sw90LsPZh1@9IAk1bD7*Dw^EqPpCMF-PpA`?Iuy
zvmaP?S&1gCHhOx_bCX%4k^16!2OKv{IAl3nn|tD9(W^;v9W_bc^muzMWxoe5+8z2O
zDE+Qc>JJw6XIpnzs}`{BPBx4`dB(JO8q0*Kv)6hTvd=ls&)+|P|HGcGYvx#p6{iMX
z7HahoQv1<WRIGKx?-Adp6g}@i)A=4vma0lmmaJ0z(ag}KtFdTOOcwv+JFeTsRGV2@
z#L{j?M5s;U+*M`j8h!gZx58Hb#$sNf?D;k(0;Y#&_AWfs*(o`5hTfxkgWoav@lU_-
z{bBjFfphlR&hX9Or>BeA&F9O%Kc`>w;-(jGtD>)^>Ru0;wp1W*`t4tzfAfE~`THt7
zzV6#QLr(q=`}a;*FDJ)!>}TS?-}`^w;{E?}|KG>?b)T($EdTGW|5|qLO{?RzuRLzc
z-i4iSNi{XM@?%e$W^;E!UCpjPAJ4z9|5p(izID;AHH<fN^>iC7m+x7ZZJl@c&{{*4
z?w_?X#)3~IPm2}hhM($VzQF$e>%9y|A;0|r_a666be`|C{QvTB2Hz;*$$H0QkM26W
zuV&Nj4|>O6RBE2fb@!ANs5L)!EFzilalq|u52o&&w)><qqkFc;j5UWIN8J-z^-6g`
z)<ucmGhRg$Z*p>c=-Z#B$h7j+pMB}F>&kwqoK1eAY*3Jvvze#Na>wuN>4y}itk`kp
z+rI>X_-m(*yzaTWEwrqo;flt`hW+brPhPPo&^~D!W5KF5wcCZ*cX0P6O;~WR<C?7g
zeb!Truh$;+c_PMT_4D5E_iGp~XyxWw>@fSoV|gU)PR6EbWskPc%-!R@p#IjZO_!Iy
zy~F<@_WHKe@QBxDfBm+;|1LD$k?CqxV9tAe-uffmJh!->@Nq`YeH%7)y>su)MOE9D
z<al0*xb;O&$@5M}?~H7{peesXE3}{Fohc|ZOTQ_%NkXhSlV{VtzXrAo7Vb0kVXO&T
z%Q*dr;N{aip$rMC+$qb#0=+a(`Yb+S9$Xl@_Ve$AIWu<7+C1^v^u}H1-rhUeqr2~5
z{O8qgn9U4wN{`)MXs0OQT%on@XgmAG^25!Ip=r9B8?RkjyXfn~k1JBEmh!cQbapr$
z3MxL7#Ub^5g72aK^S9aOPJZIC&LlAM%9*>$MiV2l^!4s;zO(am@&EtwJLS`sEuVdN
zjasC3!yTnNzM)U<@GGra_Dqmd%+lCXghR#1*yhgJD=C)GZJi9yrT5*8|M6r7bN|d6
zzc~-8NOFrGP5fvR-?m&aK6qaJsi-Y0=A3)|v#7FSk4;)?ePzWCo0OodPm6Z0SsuKi
z@YJR*vDh%p?kvIHS#d66(<WSh-LkR%<D3I`kG^BxZlAa!sCmK{eu;^JReP1M9Cr43
zoVM^#<7{(t<JD`ohHcl?sxr==dGLZz;@b~3VeJ>#&S`A*x|kaKYURAOOxrFobxeKz
z`P<L-@AL1~@3Ola#l5vIJmKuR6)$SezA4+i(S!figl69P%kAptWp$SDy>U4H)ow-o
zJ;mD2(ngO1(M566;<0D)7tQ4h47a=8aw@1KFm2PVTi0@o^Uoc*5_<LVtab;NQ>&gI
zKRw%=zgwg_&)>dgPhCaUdERS_Z7Mc38{SQRSdq>rGi|+MhLBvXcU^O!uhzy%)0h_I
ziOy7z)+mgd+PZS5y}41lX?M4tiK*cU$&K1;lveQ__?4rnlqZ|^OjoYo$@-|-^!VCk
zRUC)im`~hXxS;jL5-!mlOWrj7c(r5a;uej-N459XU0Wq6{wylmcK-F#;rvy*m45zd
zXo_4hQ7CNcqCFeUA9XxGb#mY5cQf-JI*Prra7&x8K}2%v_lbS1MNO6}N|O%-KMK0h
zyT(%dnbOmkH8Vp6JZ9XP6Dj<EJtK>kP4kK;D}q}iN;DTKd97NsYH9JF7eznc{MeU!
z|Hdyr{rMs5v?5nn9>_amexrZB{rtK=4<A1K^KAC|`*Y-eZn3eC&(6JCll|}K&u{-8
zFMs~_+gWq_-_KsJ*LyzsTjImIa+&8A&W7Kfef(m+|KH8apD+La760${|9|y=9`Bm>
zPyYYM?fKUaeW+NsMQoBv`F9D{9mdA<uL*CII=%JbZ~po9e}4Sjw)NVY+^etHHm_X$
zV3BL+{Q2`QvmTHMUMcW<m6ou%adG=D4%Qrl>v`Xc;$|&<>d^Dy?SGl2zwbvqOr8<>
zOwd$Veyi1jaN|<G%{A}#G)Gq2&+3pZ+?&3#Z<3g`9pmOnvel_JPv1A4G@ak4<RcZi
z(zD&?TtELp*7rxuw(~ekzPtP|>Xl;n6NQgU=09n@E}nYh^x63Vj<Q!|{VZNZ9t(ba
z^;)kw<B5o62j?!IFgK;}_!e8a6vs4`#@XIa<`?rEI=Feyz4x2u9Ef*lI4@lIaLUqW
zCE~H~4k@ma^IU!TocbqMCB3_jf<oq;H<zjl_q=H_xW@H9T>8A<?z@|H%6G4*_;dH_
zm*2lFXRExc)T@qs-k$zDzW4NfR_ltw{TB}0?F|xqoOL?eYKQ;AMV}1Lu_cHEn3)Go
zUF6!T8f59~<1P45htb!Sq4(vE6`MLd1Y8~*Tq3kW%UP)6qOQT8B-IY)b>=GJTxH)h
zq+)N}w6)I@G!kw#5fn6Ct(6veUwrekdp?gnjKg!Ey$fjH-Mw(*<ew9Rt!0ewEPVbc
z*y>p(+l1cxD*STic;4LAvq~vdy0d?u=2_0oUwmTN&(z-d()?qF^Nl%~-~UW=oFm$m
z82OBSMM~}cWT#)swm)Y~)IV_2ebR+b$M+b$uD+LjNa%Q%{4wqmaqB%aQ)5r>(&OiU
ze%!pDKVNRqzWUG4K0f^Tux96+^UFW~tl1ZH?Q+yK*XgQDEP|NQg*-!V?l}_QCoc4p
z+2joW8bdn)-Qe}p((C4&VJn(YD8EvOF|VMReX`2KZ`aO5WpB;B9kzY#)mu^j`OluU
z5lUWM>oPsQ<?Xj^v94R!cxPW-GwbT+xqi<>Z%?+Z{q^zqd3pQ0X6ovE4svl8b&sc&
zeQt>K<2*JwS^j$@|Kw_J_UY_jTlU^aG<l!Up)g<C__tZnzVtSe2p6webC!B&F4f`M
zTy;C@>o(Kv9Byi%SA%BU+$>Zmv_`zLqPVVNk4VAqKQ*=g|NQw^T$?A-sJvMBT~Kuu
zr_cxeIsG=*>N)025=l)B6L99ze;qaL=&G%2{TAD4EcEc=UajuXw!5XUU^&NbgUign
zuksd5)Ojkma8GmibjBjNADn`T8$V{c&b_P27ZWSZwW4E30rO<<#L!1>A<gXTR(Ufn
zyqEVi>)jcxyxiM)Q76{yx|+dsO6ab_jzY}{ttkl)UKRA6oVKZW<CA<dv7k*+b9-Bs
zv}`iSOW|GPCtmhm<f@3&G1=hgO*c7Q;%;te;E4M2rD79T_6N4&aNSxaL1$aRsUG(Q
zVrO(ocZzAQ&d_{)D#YQ`8IBeQP3fQg=f7RNwesR(560Y${iO$d#S|x={d4~P{p^XY
zFW>L`{qF7I!<DhG-$u#$Z@<=aH+HLyjm?_Zi<I;?lzut2M^*gWw{L}i-|FwH{{Qi%
z{J#&6|Nom`=f3F=|Ngq~JB)ZX-t4}g;g{DZJNfvt64UDcHMReLe*PL3ZMu4E-1@N9
z>>X?DXLxHa75OZYnki<($DkE-%1P&}&NkJkh!TTaPnOoODQy1172J1xdah;sDjoMl
zzxTf^zdN<Wr!l(Z=1+?k=bfM3RoElXYQt`}muZh&XPL9Ka`~(0`qz9cI`WS5%w~M)
zS#?V3)R_=2;hgDH?sKjvRu?$odM@IjtgxKk4bG&_E6IiaUZ?nPd|ZE}%6-@ExxtH~
zAIED6_X*cXZBXFZ?fvN_3*(G8E2q5ayWz@oF2KqB-%@4uNr9~vl4^&1CdhUPv~pHy
zh3+f$yE9ky$-!T*yw;YepIE@-s~{VCvi79JUG^P{=6im=+tsg7xJb@dQ_083&Gh8`
z35IUB%oY0PabH=jDkJz_-#pgB?X=ANut?uuUp{`@$Msu@r9M<6Xdlx<TM0{@RS&HH
zrFraGyjh~oG%fW&Y|G5MGeVg^JXc$DMoe|Vtf@g~B0?i4eBI;jdsu0m7GKcWn7!{r
znYved1uk5iR{O>EJ4;-el4@aLidXz9t|NO5jf;zq<SDYa_?`*9yJn|*r~D%h<MK?i
zJrmCyJ9o?`crF7|j{gCPcl<twCUfQ&&(HDxt=sm1E6vX*W%u*vg<sw$O?$iN1e3SM
zf^xY-cV`=$m@VRYDX@OQ9hP@))eG;JepXIZdbV2V%IP()+~=))qO;%mV|DHM<OMM`
z*XOx?c0Cg!rt7^TXWC^=Q_K8&c6R$ZpWHls`0&$D1(xx8&o5`%&i9U-R(kcsf~!iq
zHfeYZy{qhB`RMms)v~N33)Am~7T>tu?Xg%t;ZCO92a5^SrhLjWuYO;+6}|OZ*6QfB
zTk~$0-OkGWsGR+0#=GhhMZ11Xc<}CQ)aGNZs`dS*n{SzYp88~0?Vm4)`~Bsa`d*gT
z72CBKzCYPwvwn6(S<m~3pFE4wXKnakc}1{EJzdQ__3y(YHkyAgFxte*3#4~RM|ilV
zOjL?ud3XEkw%FC7-`P^G9$R#3QnGKy>qDn^+3eYAV;92`VjsU=e%|ZnXU%5^?b%h(
zwS517Q4xNbd`?{{tt~H{Jv_xN=bmR-xBcv0=3SCqQmb+`U0b9Nwme|BD&xCzCXvN`
zMlWBb^_9<3n{9ivPkTO+Wi_tMY55kER3vnydH+rcu8kXF=B|!hEq(danW-7fjEDGk
zRjyte`MKADDSus3>1MUk-L|zq9`rpe(R>{lx^&HkUH4wZ?9d25CVK6}r8SAZC%8Rk
z^m1r9wWddzZC-Kc<k<vvr=EU>d36(Z?B3pC+Ee55`ISyUYHYX0O3R)z4ii2s-mUmN
zE70L6>tWX|&7oCHT>^*i-ae+VB4FbAIQy&5+rNDXn8v#0pr_@;O%w0?)!e-$|L;#T
z!>V8A_v?ON{rROri|tbO-n@uYoAjPOEYe&w=hfB7jkZOr%U$jTBz9f89>4$RS^fHd
z4>$k)dAR-mpZY(2x4*rfzu#Jud(NHN?YGyze<ZxfTlCw<u>JM_A6~wE^=!+@*J0bE
zRT_Uh*?1+$MMZAO?qB!X|Jhw!%E#e6)upb8YsdVFHr^ipY(M|jzpgNO{YO9Nn*H<m
zmYmdCDp`78&b`)t!~8`v926M&ZceW4ki7J2xA@8UA94MRA<mm8wlC^rY3DQC`P^?B
zkG`0{%$^NrjXl?N$xcbR5zaKBt#`qCF}It>d*c4@dOPzu%W842RSZ*W9k1-|FWJmj
z_~v1Vrqv_9(=3q;!m<hSN2c(JpQ@Wuy)XAn%A?+nGmO(V9*}t1>C^nWZElF1{bWW~
z;X}LX>y+j+<vw$+dC!o!r2f88LHoQ3hpf+8#$V){;^QeG8F}r%#g4b{66B7Vo(UJa
z@0%9S^66bVhwG~lk3-KYl|OFT|Ev6NpY7F3u`7z7&y>wRCf7DgpX;1Xf}sE2dBwY;
zf|sog3OVZ&>g($gs`};Vt3`SX=LeRC3r>CTtXp}Cy~`<<K2d&l+nnzTGLIho3c26h
z_x#<Hi~ihqboRTx*5LZ6wn&MsJh*hb#z*Fri}TdpJ$tq;E9v0%FGp^^Ep#p4zF*#J
z&AKJ2yG|c{^|s1CZAQK0@y8|8Snf}pvadw^{jscjwoc6b-6s+RQ$2f&oZdH!&HQs>
zqQ?BQ4vSVaN7?OJZ}RFV-}NOv$9ylpd(wT%d-WG5g?fKYEv@C3i>%fzzg%Uv{`u#h
zp9-ofENu7fw6U{`kvq5i^v@c*`RA5@e!6Gf@~z%)Bd3OXa!q$qc{o{p;*sw14+6)z
z@6X<Na<xr8-)`%L-bsreyj2xd?>zDA%n{{(dQH^{x|`B$SuECG&0SrZeS6#8ZMmxh
zSI_3v-1oGn%4f%n&I|SX5AsaiersFS*$cOxeR~r$|6cvCe}6wOU!AM>tio}s)sDwM
zymwt`Hpo1$eSAXiN54#=V{fk7eR5W3-t+#NY04c1E7o2Y<xKCpdvb5T{h2oV;DdXW
z8&+-PH<IA=(p>Jn(`Js}>s1TBmE5(n$(7H4dvnj*?QCvq9>v`G%-#E@JN>)j^=XqO
zkH3@q-8Nb8dFXW2C(dglBO@XnzHv*^m{n}}e$$?ib43+x8e%q?w<qb9zPce2aUuB^
zo9U4Rg|zg-#80=f*3OJ;Sts*KftO80<!aZY2`0<4&KU$61gow~*s*5S@?+uV#>)NQ
zPy1Zi#{0bK+O6WfvH7LBub1jg-@MMX<@~AsY)vimV^hzHE^0k-LTa&SlY#WXrk$e8
z6QUkS*qzk+R&KY|&C)V}bD7GWq?TtLiy9&tLQi>?ikdYwnq7H4O-fN=!rKImgNkb{
z15<?>B4=eJ-#t|nHLdu8JI51&XGP9?owjV<T$%agh(LdPr@@^ATn+Q`zJ9;|Ut#Hl
zSNZpAzkXe0XjowJfPebwT{@fYXEc2{xt!0}DlFg6g@;p&^YZn4`MQ5sAGh<%@Be?f
z|NqIq*Zzdx-%}a0RV7BBjepbWX|1y!uFRF6H-G*1vxm8#u-z7TD0E}$sijja=7r2)
zi&x}|788{%ED5arme>8lb&d|-ioToQeg?Z9_W$2h?>fn*zS8cRQj5f)r^+QS%9icQ
zaG!eq=cdj$FMr*lsFV7UAAZ~X|FMZrVS%F4%%Iwt#`7%GWuKfm<g`e{@#@^=GP8a^
z30xLZdp_vY;)L^DGCSw*ww#vk{Jewd#r&5sAI~WsSRgL*YHtmLy!|!4U;jBz3BH?h
zd(w0HWt+EOc2D{pe8{<T%1!qlicVGT0ct^fwTn{YxQx^$v`y@q^yGx{kCzLg)Zc9W
z^v}Fq`N+J(6L(BLQ)zUAZDAivitwMF^!*iDeyaTsw!CGtn6#X!v03K+?%u+9-(S}p
z^m;BR$>^o{+_@sxyW{q*eG%uGe`Lq#oeAzY`pi}4V7aXRTilgn(obc+MP1udceghp
z!nL_<{hLS67Ky3MUGnlveL!bDFVB{a&ehdZSk5TDDsV7W?p_@F<eXR0z5D(*e^(22
ziRzYCW!K->v3bV~&F+mWJ66uvt$k*#<r-7L!cb#l(eIN!*JLU_?v$5vob*ht=dz^1
z)9UFKHFrw<!<I{Kxc_^(&d-ha8|U59<0=1U>&U_W_NyLOs+^|u#n)OZ)2?OlyPR6A
zFKKm4^fl8vqhBf$PCs~4@FOF0M)qg<5-+9K##0|}4R<=Vcw>KH<h4yYPm3()o?HI;
zXUWbl`{n%jr+<F>vB(a*^|&PJ{PM3QQP;LkQC%k;sH&-S$|iLOW3|Su9a6_^PqS}2
z^2a6l)sHEL9~;}vjH;BsOKf*r_{4Ivzhm!V>78F6?%o=;eeJc|VXwos@5<epyLIN)
z36IT<Kc_vDjk(GmeDbimQuebJu@ABl*}Lv$2><@`@A7Z|?a{5xA0Hi)J9DaX*Otc>
z@k$qtb!7X8*)KU)us@(o?T+^5MX|yAz87B<>GBqg?b6dyGT44=+v_yOmv5GLOlX_e
zU{T;+wQo&7|MbtB9$kBN`R%udyJ}ax&AKOZuX1|rz73CG>{FRx@$bpJ+@^W<mv~$b
zrOcjOVVSL|+O|7v?NzsPQIFid9<Ak+=A3*~Eb>P|<5R_5-s+JZqC3AA7a6;?yS;U8
zG|8D9xVp4@S7mX9&f=Ow4M)}&nuaq9&zd|_ThZqq@1d$+XY=cR%l~Wt|KZ!iFApC-
zumAPD{?~_>j}QM`bY}kF%+G6MbFY35@Bi`e+r9N)_QY;~zD?(8&ekovSf+g2VmId+
zyJPY8YX=@o@G}Z;`r4;z=bk7zJ0-2o(}mOJ-IqNR=Wa@wmMh2X5>#WWdoJYm*1WCv
zW;t6O5#U?&ilMV#wWy-WgC(fsl#_^(pnAU8ii#aOL-Vr~UNmWB?hsH;GMw?^^V)xp
z_y5_qHLU*Y>HR;Sy*|Ck#`b>QpFb}vckfj!`W$szVf~Iu<IT@>Vx|R|gbD8so^&UQ
zcQWsErTevie;ht;e%*fm|A)W7tGXE9-%}ZpQgy8Vj#JsQwKtdF{#LR#a$D1aGYp@Z
zzs_*o$|ANX<Ke-bF6Nfp)+cPAt&udc_cfXEE=ZsMg}c=bhyLa*|3BORlKb_*ubbb%
zM6UMx?Wa2@v-Hn=f2m3Bg1WqOgM|LppFY8>%{H#!Z?yiMoGRd=ZlWHv`sk9N8H&y-
z8k5v!^V~}I`E$rZw(1|J?4hUpJC2-qSZKLM)op{{{~c;ye;ICk_0;ghj`vB+A|LJ5
z{3Y;<dEUyb+K7q5#`i4w1NU)nv~DRh&tfvP5&N@n-g~tJ8>=M!AKu7%8CZS(xcBmS
z-o<vJayd;W9nCVH@9v#!oTyN0?j9iLzbsEoY{T;@*O>qH&M0A$i8d;+^nLz~r{{d^
zu1|Slv-6eC_{S;D3%L7#`j5z}zE6%<=j}VrAt86geOYYJoM^AKv@&+<?qko79nsS(
zNK`BoGu-I6iYG1l%Fm-+E4re7<Zgau{{7K9J)Jd=-o06;dO4xU{nB&&!*A{dvpsvS
zwNt**=9qX{dU>K*gRjK7b%vA0!#N`Zg^jbN?#TBG)`@(-$F}6o-8)Bfc|JKGXH|OF
z6}{)i(laVee^))%>=w)H<+Hev@UZ8N<+I;HX+{?s;@zYcvp+HN6jUu#QcX-&_#q#;
zY%-g!`qaF+n!d8D4%?pcDl78yC~WzDufR|0A*Up}-Y${Q*lSC5mg=NNo_@Jy-udM$
zvGe?=w_kp`Y2LZlFSpD)xBN1w*-@PuzeOj=>p{Oo8P6(b`x!r;uTlBYx8eUPhvfCM
zH_V^DapTv5<x5lNsqU)%zGL}e-|Vo*pA@niD(9cSemnPe*50eRs{<cxzZTW||HkXz
z8`c<dTbH+m+i&DG5IA^2Vn@{N6)WzG$ltI3_b269)`R+||BiRRZcvNWiZko9@s5hG
z_&oV>>GQf*E024|-13dS7`IaA`O-C7t9){zvu|J9_Ui0pqxFY$6nz;bK78F1Bj?XQ
z{d1)Fy+6NR?yB8!?fmRm*<A6%5AqoP3nlc<U-P+rZ*6+u1Zj^0(=vlYPHs!Px_8me
zr&9Mf9tu}t*ScesuBD!!^g(f|?2+z$M%&mnJ3i*xCo&`1^#0S`M~>V(v%0eQ%d58E
zAv!7cE7aCbZRK3iAbBNn3WNEj?eFjX{rms<|KGRw|DOH&^X2XK@%Q)qJ^K0W=luGg
z-NujO?fyL7|My>c^zD5=f8Blc<^JjYf1dvSes)90s@p}o*PTrf6j8f)s3z{n%x$-G
zt8bZHf3eNby!^S`Gxbx&dry2{%sww-|6PMQs(#0kOU1fOw`AQrd!JLeXhKQ4s_~o^
z2X{N~`G1nnOd;)6XoS*IX*W6X>5;3qYwdBA;?`F`a95H=m7_7z^~#)l-v57=zrXvZ
z?DoBR=dbtA|NrOj_uHk}TkUG=l-8f#wW^C_cXjsKFy;kXuXjxgI;8pfouuDHn;3y7
zZ)bnL|L>W&+n;uMyMI6X?OC=?zh^sVn(GF2`D0&di)(h}zMnrY-*Kz&;_TvVmidcK
zW~N4pZRca>Iqklu?MAPw<@}=JjF(KioJC~IF0AJ-)_pc5;LM{b|Gn34)Bn|4BkfkG
za_G;l+He2Fj?b~$e>^bZ+m*g&3I^*wTlF7&aJ`>z-oGmW`9=<Nn%zUsY3_To{Jg}P
zCtR_TY;TUmvTw95jTIAiXqx-2Oom<U%EH6C%O#HP*9wuS*!cYTy|}MUWl;=1Wnndy
z1&7xE6MEIvsGs@E`j6er^w^rq$6aDS+NG&mubZ{pGdP@)ed+T4!%LSiRrK$*t($at
z%PjXS!;DXM$D4h<5~5^uGyR;-q&Ux&d(C<N&rC<-hbL!O&gVB5l5wzq+Hvo*#Q)k<
zJ3cMPcUzKX6(<EZi!d*dyRw^`$CAC?lKGv|9Y3Fz9qzUII?WyD7Rd&z4C?Ci)XZA-
z>e0DptNzw6?O4G!*_dtVMt+Xcijdg?FV~bE+w$6aS)P)PfY7EXe8<Yg3k$^$oR!sk
z_Gq1tuJg`>i)piW&R98P<?5AN)3TmjnCCjBI?KS}nVXYY?tZ>@6*oV<zAfw5WxSUC
zlahNYaK)MRb2E3Fm9rh*b3>Ga(__lj;QxH-pBUl`)`%^>!2jpShD&9RzW?rTIJ3-Q
zzkTqp2mka`lGfDk(RsZ~=kzAM(^1zhOP!kY*|Pih=hWVxe_!rdxBRJy{ki9x*1X;|
zE%oB24lUQWie5`3BPQyvn(y;&{qHl9-)Fzb>r_#@vBB}EfbC6-oh`jNEpkDoIa>}M
zO5DyN(Dkc!_uZtN`0cl{?q=m~omIKkGs*PxI&QCrM!Pp3vANBdALe<2akb0T?nxZ$
z{N(NP&WG>68d>~v*VX*KUyFA?H;%jc?&In>zD1m3AD^}x`@dE{nW!Nidw$oPDN{LD
zT#w#udfw-3?Vn?Z&jjw!`Ss!1pKl*GO?t30&fdoM|A&zETXXYr?_9j2E|j>bcA{VN
zZ|1Gh`^)4b`({{o@cHO4sp&>tQ&M`kV25q)HdbqvT@4JWHY?_Y%sDY5c7~@{VEOB{
z3H#UWNS`Mxc06m_wNq(%`zFrj2$^%$W9C(t9W2}3OJXGwb_!LNF#djgw(*%kl5?Sc
z(2Si{Ge2JP(v0cyh^+r>UjO}W`Tf6-_y74P|L>!`=Y5^iKf~|WEq)sK_t)+Cf4rx&
zKFqKE{dRWh&!^?}-}nD}|NrO5hpX@Z`6T}K*Wcy;f6f2k|G%QBasAt80%2SRS3OU^
z4VuQidxgn%hu0+;LW1r6z9$a0%{iRd;!>8?zPlvv-9kME!|vIgjB2-Yqpxk6bFNHz
z_q|^+>(w{7KH>f5Z~SVW&BKI(BC!VM8{FmDS7%1A4&5qfF1FHqnG53rM`cexj}X~C
zf_49=e?PzdSDNL|($)UgkAHP}@VYenw%V^fI<DO;Tx&vaZ`1WzcY2q|)pK)t*1B-q
z?@WB{S^w?r?C87Ovg#}Uzx1!KlxNtv{{DwWAtuW>7&-;*BY%JV_2=Wy%+;o$>>Ll{
zU2pH6y`%S9scAN+pKsQSj(uExE$(>{0j+!%8?ANEO+S68-v92u`~~mc#Jj5q3R(uf
z+UWB8+#}JGcW1o1?>_U;@r<M@&xERrA!q(P2zPS|E=XZ|%fMNd9k5Juvh43qjKW7I
z>^>`V;>3n?S2(A=RN3~%+N)CZ)JLcF8|!oIH-;RvkvaBm`qMXNJEF_KpLR^+J6bDY
zP@4DK<=O0cB9;#(>@zp>nEBbgs9VZ?lSjeEvZ+5!^51><Gx3{g+(kdDMQ1KH@4o#+
z?AV+~7K`?k)vvNXCQ-5Cm4(ymmYE^VlR6uAAMEX%t8hxt>b%pV6!(SP^7E%#S5D^F
zQaaTlK5zBmll{M4tDo+7TvV)MxZ6aN_qxa;uO$lho>_%YHIw_V&Gc||yyK%@k)&{9
z^31uViAf*3mnp@c3F#IK4VJuGTUdU+UR3n^hLs%bE$Yd41$6fF{Yt74R5_w_>-kP4
zBlgP8hm9}#9yvSX&}Hes#qS(4Dtuzj`kZmGy|n-CS)RM6r(EI{;km@hmMyuv{C4u}
z+BH|-XRnvzl}-EjroBC=DL$L$<<ij120s-M4izsR4aw(?s<j)ZrrNH4*IMnbaXPTJ
zO5HW1=>EIsr@Sjp9{9DBS!4QXRi)L}N+bPWYyP*2mGhhaxoY2>bI-rlSkCo(T@~fO
z{PHF}#U0|hi#~o=dGbXi%CqbAmnHHtt+w+G@BaB+SG*#O?aLBwvB%Au86H2m_}aPs
zv0`8U#k)Tk{hy`!y~ti0y>{!>wcBoGt=$@CJY%a^-CM;4Nn)Zi1Q$P_C_Rz+?5xOh
zRx8cI6;1yA`1!5GczM%&C)+p6HY*;kJu6+-V=JjL*Vc7b{L%|DU*1kko3YY|L0J3c
zqD>uQQRiHz-d!7;y*G5V*{e5<H+#+)MpoGB_xCS1pMH7f(fQk6|6YFk_w#Re)>z%F
zS#^A}m|g$d=RdYDE~@ak828>s#X2hN>7mm**Gw<9)J?gOcXrjzN_7*5sobAWZR!&6
zR?c!+a!`n$Ym%!<jKpzI&#35=`*Z&pU(LT3`XKFF%GXzMiL-w{`|7pHbEV&eW9FK<
z!cS*UnrLdQCeSUtG5E)UN1r36ii<Lv1@La3n8CRJ>ZiN6*PnahZ(sBOdHuiX{r(oQ
z_x4%Ghlp?Yt+btcuPE@suYZ@nUcS6-j^6$sNAvgnd}qC=y#DvMv!9jzESKLgr|#SB
z{l8!B&Y$PM>+0?Je_wv9UtaIW@4a_xsbTrrum{hMsq+3-{GBGK`%2MPy5OsT;s)aj
zRiU!m=FR9j$|<zNP2y^_YC`0+uQg7K-|pNM$-gnksX?~dU2|stV#}9>DIb^QFeRyp
znwPQP*<+fn?6iB8$b=~6Mx%)vEOgF^l~--s_4mU=_iK+VJbz}{z6?5ZXO-aPu-BWW
zOgO#$S!s6Z>LW{5Eq#$0yTxVB<2uV#1~*PC+5dj_@n_Wb=-(e|et({SfBL2sAHw;$
z1ucYDgybD~e(RpSZEf8?Q{$vk^Bps{cB~asJ6+Y~vSiApQ0a#K5zE_yoI+=49A4Y>
zUQt=PV*1v%adT?QTz5Wu`QdQwk{kO^CjMqsI_>|*#%;^Z2eW#Ea{DjZNp+SLzhiuS
zu3WhH<ME^yZ@&MyEH11#E8x_g(*YqZjqeOpl2!!#a^1alTDm1i+3L$0TMw5>zv|fX
z@Z3}FIUYau#Z;HNcFXd#a5gejscuTIW9D0Y#I|2eHgtVtVZ7JJYMp>Q=W4$2U(*in
zuiMhXcf5e<rS;!x{&!xS_vP*!Zns;Ouh3HYNjr$iuv~?=TjtRz$Gc*#fh(Q{FnoVK
z$?}zkkB;K0&Nb3eT|z5N*=@W|uAFdg`SQk)7Zwxu3hTbNJ8<%Vk;pdNU8V7t4KMG{
z%r}!!D~@eAQ#WnNOvCmEI_*CWKj2<?Q9}LN+Rq*rKPz8+$#Ue(-0)5gcOi4p?JHjU
z`mSBU#jdLV{JYk&Rj*db)ZHm~w5vU}{Do_T@y|m0)*th_t#|ubuq-??r-QL#(x<yg
zcN3nSW0L&3ch`)r*%5mRuCKdrQRDsQyf>w_fBye{zBc{r?!z}_WG1$jw|y%#Ywhn#
z>6dOgtjrZuvF6XGPdog--OZHyd8g+}wz?RHr{~#KJ3jBYU+kpsc=6@ps|v4uihnVC
zPUc@W&vWP8h+7L?csDv(#-Dqwsr|b&_Wbfv+qKU>SM8j0e)`v%xa*&*?AD&%rSm%I
z+EyLz?1@1x;$lxvtX{<N<B4FBMbhsHiZ>*tT)%nDsj%VQOS5*FT?|{=HTAsytlQvz
z?98z<`%KHfr(L^s;1SD<I-alDy_@c3<*wa!Eh{_iZ1j@r&s26z*M6tP#CN)k`OIFX
zXDMvT4=4QFJ;Ai{*0$_c$3IgK|7(4?kUP8Dm~&6k%w5x8o>Bc5@Jwd=>?6iswatG8
zYCCtm>YQGz*==z6?5nl6)<)%8^XabKaPHKuX-}6d|6EmPu|IOU@j?6hxBAb&eSCF2
zudce!&PzROt&CZ|ud}(cRkk8G-16W*WeZP>19LZt#h(5Z;5WT>X8!H8*^#CaGqx_z
zE)dk}5%`|_Otf*&%$O#guq1WOH+@^L#&`bLKmAF1QTvQQ4W6tCyf^OT`<*}H-?RFJ
zad4}B)Kk~fkzbGbZ*AmQqARe!K+uS3qWHm0D~!z+`Lc9n?RoL3So3|3_O`t%Z_mDV
zpem?RXV;WNe;8udUClqgeg6MH2d8~K{`@(Y&DOW|zwf@cySMKD`}wwi{@(xhEqLnk
z^#2|17jEb8|NHIj(wb1er{VR#`zrrF{QbWE$4AGn?>(pY&;R%5W&5%x+|sW+R1!rs
zUx}HkEBEc%;nC9canY(c8NLX4hGTXyGiN5|?#@-Pn}1GRv?<@;bZ!2L9ZPH2y3*qO
z_zRtC9-4&B*xI|ZLwdI5o>^BrEI;Y;y?S8MF=fZ1J6h#G7MW(Z917~Xv};mU&~Z+l
zP0w_m_@!A&<e$yD8^YRG8k@N=MP!44HFsmo5qY^M;`jGtns2w?_y0%9-^26kY}Q7d
zy4^l?YUfPz@BOoD|G)hF+uuJrSF<2ZVCVK{Yxxr7doT8QY`#4sy!1tLb3oUI4Kstw
zJ)fTa&bld1?GxLLJeKvJ_jld-$GO+wUu-NR+ti<WkFw_b%A~4ZminErxi*vcZN>@3
zl6QL1^X47={zdYY+)tf7Ngvis@^E=}SJz+)k8+J!`guD|9{0Hd&o0&;=vEFbY?uA8
zal?GJj>Q_!Z<aLZcl1<TkMm~?e^<SPp<k&!?$G&L>h)J|n)o!|JmwSDC-JXjxl-BD
zI3|u8YQNq2vv_QJOYQWQ{b80%XzkqNZS-otY;cp|+@wjiHx^b`SToIk$z9PUpwO&5
z<@(H)JBwe)oKQC3)x6dH$of6cr0lDd%lMxBS@f9u`Ez5DdH<iONgr1YFQ3Wbxok~(
zHv7va&kua<Jju~+<<B-;U@udV+;{J5>AZjJT_!pkIIrA2(!Fct>|I@++G%lK97!Qd
zrzD+P6tm^Xc^0-ayKlIwaBwNT2=qGAtnL5h(A`_5Mvnb2h1Rh|H9GgcTX<WtrYPB0
zW5P=Bj@Bx@lM7e)e=gS2|K0!Z`GncgfBvaNeEXPHuk}&(PV2EXmWrzG@;<lXek^6u
z-!ID5oF|oQy~;WAq>}SQ!<vn?E&I8Pmh>NVe}AR>b!Pmba;f)ESdL|xe2$gvE2`Y_
z<jKE1^R9g@;gUU9wQJ6~=a*k@TDLqk_Ij#rYNXSI#ZfLvIqO}7H;e6l{O*HNne>%^
z`(&J%_Rc<{-d0jnx|e_LoxO<}MR)Eb7~eMX-4dPfcCu5(%DWp?o>U|iuiBdS?Bo=-
zw*}iS+%=QfX&rgt-CgO8f&1-bPDW(*nrEM5N&Ju<VX{oQqwvWjnI36tgGZ`+-nAA>
zO9%%2{*dA|UD_b6yJgd+Q&rP8scl$wHEV0-ccJ4?g+EJ0hKR@dJ(VeY736pBOHKCs
z^<OWaUB`RXIOxo8AJxXLGlwcR8r*wd`aq(se7$&rU{Io{rjFLGRqK|{IaQ<;b!B#C
zcAnd2{@DwavNfM?(pWul%brb+E^C~G-DjyL#r(Z=>;Ak``qzpJ|8Kmux|*45Guxes
zsj5{XP7CIXZB_ElUYi^G<MYaz4Q07o7v9y%yBeJ>{erV-**CY1dZ$tgHN#`)z5H^1
zs@}Q&MRlvY=luJ&y}o=`O{#BR^zUay|2{n5e<$y}{r{u-epl;%e2)M3rRML|{JO8^
z_iMlJy~+IhyZyhX<@^6X*8em8|Ksh`zZTW~|MGDE-_Q23_BDTgU+&7kzwhra@##ya
zT+@4BS6Kc1u<O&?mk;0mZeREN@O-nc_VvI1{pP=KzTTO4wX49lNrE?71x_=VOq{Sn
zVoi92a=@g|x}`H_tgoxGIUcv}-YVuJW+IPY@Jw#J>fHCv(?#=0DNEBv^TNEcyE{zN
z+Bb+kmrGLS%fD&W)V#@Tld$3&C+R=0^fo$(2z3@&r+#yZIkhM%D%&OR+15N4*M&+!
zO3yn^$|`tV+#VlWmsoTA&9~*Rm;2BE_d(ri{+B;zZ*S7zG=C&N&;IYnc6oc-SVN6c
zpLc1upRIlPpiXt-qNp=DOQsyjJ{++?*tDHlXUc2;ccsTJp4!g;e)8e2n>UWG&u06l
z|IaW^x<BoYL;884vg^-2=h*I+zsbDg_I*S7Wx~$thPHd3J-iv$|Mao`=B=Hbp$nKE
z##$TL?lDf-<S*ysELNGAtYP~o>g9sGX?yuR{246$cW$s&pH%GETD^n!|B`Lh<=ny_
z4t4CFeq;T!;zt^#M<u5B@M}!h?p)OHr8MB!iZ^flcg<FndoA(1yT)AYVrgC9z3uOQ
zzI-qCvh!)b@|p?OODi_De?8T0B)WR~P9BBc{g!4g;{9hF&sOu}-<VYqyTIy2;^I3t
zHWu#e6*-;d{4VC4pH)m=UzPg%`qAq7e>QB)K2pTXs>H!AoBxE}K!ww-l7)jcv}LZ%
zlMI1Voux&N-?{bIF`u8Hk|-RwW9L#kb)9XlSMP3(+<tr2?9MKY*|XU4-7Qc3x^Q8&
z*P2sIRhl!eRNi?0@pRCF?z_fweI6}4k+0gOR4_enim;(@vA3OC+lzbEU#+fxSLpvR
z%SWR1s{z+#hxxO2KWS+xkT~vh$oSB{Mcl7Ga$k5T{zcNbZtLRL)w}o(FX|L5`j+_5
z)6>?w<(${^m~WTtopXPCe&6*X`}@8x@2np)2hA^zbF01o_515g+qI{QqOM)uwa(k~
z``aL|mlL-fd@ItES(q@fqr8*z6NBW<sJ}taPR6&ooci{&w*G%LZ<5&8y}M_dGp&g<
zc-NG@pZQHT_m*0ZJf#YwBj)eql>a@reZ1iQ@jG?%CI_yvieOT{bY}MMo#sm7Z*NXq
z8Sv#<&l#3;Oy5I485TV$koexy@%&$WnVR_hg%va3nKvq*eW;gT=_A{6K;Y`rO>3qv
zUBk30ucP9gUS#6;4g>L|G*#P|FE`E8Ti$;9=aEe&i_^2^=iOe;J3F|$;?0~7Nxx42
zTskLrenjoVliTLAt^5)GaFO92{=&eD>v#6VZQqvrQaGXP`@HQM+^2WyIMuL*_y1@<
zk@@L{Uch`Y!{)p91^)_dG5H_x`u(eK2cuMV7Km;Unj5Kiru)&B3p=L>KRNT%<=TTI
zSqpL#FJC%vKEymR`gD6%z2Ty+^)tmpr#xB`9cj1u|7G5aegAsu{(Sg-dUyW5+8^(B
z$N&3f{-fshy4x>n9+o~_f6wm4A^!e9-|jxYEqs65d%fvj&pInVe*07B&$AN26VLwL
z%#(g{_Imv3muKbo{r_=y_UEtLZ1?{Ead-RusqgFVMu}?PuPv{usrd8o<?;UW@?5JI
zwk0k&{@`Hrub$63LdTYLADy$dL-6~J){{ysX678{^3N-5{?1l@@r3`xmekcQcRqPA
z$)40_Y5V$bo2kIIW#*T>?o>GHY9+9CAGkSp#?8)_t2+7x6P>&i_C;3z|MPQ`UQuPu
zl|xd-rpDa)VjLUG!*p-8Z!zG{*H_^2&@!1BnG*T)b@}vdd#mrQ+h70x<KcGy`k(*)
ze0}`5YM;rc^U=RQzWjOl@>bu;u1pJOUe%R4Wq5!+P}4QU=F`pUPQitYT2UdLH=BN)
zmiuz-@5K6oHw|i+9OCbk3Y>T3{9b>!|FAsIOoO_b{%OzrY9wzyuQu#U*3pi$XZ@yl
z;$i#-b>*{eW^$a@fAS|x@LIC@xbeo*GoG)_^0T!(HvieoWYKG@U%DLq6MH_<UHHo-
zYq{;+ZWY_@{U;pjJ9Sv^#epODm}GV42RmA2##epll-EAZ+-?)P_r~EvFP1jd__xhz
zSTOmprTmRrw)^t#)eesSTxZsuG_1Y5qUWjI{q%LsKkMB0UXI&6|KdgC+3XpO3j?ov
z)%&wsY}URbV=HHJc#p-N>E)Bpt!q5A*FI<Y%l>qOT?aDXEvyQ5{9m{F?LXOKrlsyj
z&&yiPS{;~J;jiptu+rFl>U?kUh8r`LzaHPLa<riRc$WQ{bvp0#^z?MrUHfSn7c%Rc
zRh*Y*+9It*L0!96Y0o~jAiGO5#HR1>jHk<2t`c8r<j%Wik5Fr6NX5)8!hh!-FBd&w
z+!tEVoH5DpqG*iW|I>C(MG^fgj#x}K7QcSPNA_OR-p9cLclU`;uFL)@ZXdp9OPl|{
z?0eI-Ue@@?vo^DAjQdji(>%dKalhy0BNf8Q8HOPf583aGmYw-!@4j+%cgxswGm}g0
z=K8(fHEqSq>6bU@xH``X_0sg#?DklAj=#vF=g~&H#Jw4hoW8uSf7dkQ8HWjv_uBVf
zt8U6)jMsT!n7933<5Q1)M`iA<3H3JeyL-s^cGg<csD@{L)3Rl|-hBMnR4pT4boihj
zkA#EnqgS1G9=J%>vX%w7r3ia2dG5ATUU}E<o4>^6(;|1ApW&2tZ?D7OqLXY6E^!ws
z`u^RS%fu?Azk1I7InzGx(mA>|cl%jeo4VGe0@8+?C+rq%=AOEF-u)OkxpT`?EjLbE
zd-8JI^0wvAW*`1_B>O=jTdJiJYkZ@NtlW#wAM9o?vM>zw@Tl6SWV!RJ=ei9XJGbu2
zy=Jp{Z&GOzL&q7>bp>ttdgkTIjD}}U9yk=F(wVLFB;?G!4f7)Bas0pYX1_%JlC4Jr
zBm@pS?dj6#Ia;VxnecbloPAp2InsGsEyS(`Uu3FMS4n&GZ}wK3@Vaf%S<!dRJ{`+D
zHR0*slk?60FMs>;%XE94|Mu4PUjwc!I>x<z`gZw!b>FX+$M3)WX7i!?ub=0;h6k^o
ze}7-)>F1y8Q-1D_-}e9IUFQE!zxyxXfBNO>?EJc)v*Tme{{8Uy`1a@QzqjoD|L^hP
zr@0r8YwMR-#fRvfv9I|vQHnEV;?+$hj{7-x?{JRrh)7yEW6O^jy3rpVSND{yYYkO8
zeCce|-j_dX>b}2l$_o>lx`e^NaCOF!BZ7q%+aGdB)N@3!s62bL?A^1qRvp|I_Jj&0
zu@`bIo9q1HboBy>4#ljz?YHXwL`==R7^LT=vUEM$-lH502|7Vu1@mSuxvi1>{q}N|
zQ!0k`QDJ{?ZqLt86aDh|IDh@mkB4uU=f8Y-aAy6Fn%e&#t8VL*RQ^>eEBby-;EnnA
zz>m{X6_bpo+jh#mb9<u7mY#NYQ@++L+lx9YX0kkz^>{ktv*h!HmH&UMeRsZLU9tP2
z%xu|349_}N+eVac*ucim{-=n;;N|=GZ|<)Dq93f6(9!ppv+G2irqZG*4$}8tT3H7z
zuJ^xt*x^O8-1%tLqZg_*PaS_;eMz>@SAzGX>anV;lh4Zr=3YAgxwF>ZVrq~0J0>;7
zO%wgBe>iwl+sAd@J9^Q^cGa;jGVf1D|BUDrp5FgH-|t0={W?{q&Bxu;a;EV;kD2qt
z)!^Lclg6`s88-Xu)N$T$<nPrB8b^5~Z(Z72zcaJbrCac%>cm$so2xC}>_5X3+PM3x
z@O#sqy9SK=oZKIVFPipq#_yiJ*-xeyzWEazS|2{uVE-JJZyetplfEuob?f%oga%bx
z0iO5OOCye383u>1{Nl-=a&Y+>xw=SmF;0oSk<TAzd!F6vC9LOq*X{24b&poXEqKKj
z>ak#1$W&RSYm$~{<o;dw{qwKlxxD|&md1-5Ea~HqZ1AgRi{Sa)S{;AW{pk*FhX;?J
z|E~XXE$YPgt7o0M<TcA`|D50KziIw{8~N}VsbYUF8QEG)EzD05aM>TN%4@4~YTi!y
zOPiOM^$LFsxaYlpx*Frfl>GTQ-|pOOxcG45?RUPqhhui@q(*u-7&+&z_r4h9l<#We
z^R(0Vf{d4IK*95<PmOb~R<2ZTX*qtpW!|j|i=CSj9#1}$RpT<ZPxxQgl{?Y7*IrJS
zOcoXX6L6p1Z22`Y$FRSLW!`ss+?rEaRM;9O(Z^O)@TDp3o{`24*_0&=mCO7;3URHf
zTT_0+zF_@%8?Ubw9K}x`OrHI}^>XhKW3}R{H;JbX^on}PJUG`n-Bj)S>Q%cc@2(18
zf0jdsDKROOuV!YG$gUGFi>fMWcC5)Ze6jZA)>pw>S6_UV!}7dAg)M`r+S|lt>z_@B
z>bGv1e^5#@m}z&UeYUEDx>Hn_LGfzQXRBr%PT)>8-?MXKa5k6vp=Wj3&+bgt+Hv~q
zWj+s{s9Etd{9NaE*iWbls!dzTr#9)3!I52&M?wqYrnjGd`S|Ilt1oMIZ++e$zVGVf
z6V96ar$4PID$?G5oB3pP+`Y9+Cz?F)%JB*53d+p$PdV?xdba+Dah-vt%DN|qcFmji
z_3h6u6;bD2e|omsJbv%*EKTl(cKa%K?AclKeRh28nuo{V|Nrvwu)S^m`|F>#@BeG6
z`)j%VzmJc9_wWDpG+!)IPM<$tPVV2QFJI#S|5?8Ne(m4y`~Q8tz5V^aw7>n!zWm|8
zefe9+l&sZfvyUw7e#RldcP+7!dsVLC)@ytGw@$p#wev#Jj>xqN$Exi1|9qyMef8D0
zY~vYcX7A)}Ow|{;@Q^`?Epy)iQ=OELlN=la71Xw$y%8a1l+-p`IK4`;V~wFpPvQr8
zyN{WXtxw;+ED?%RI_+`ghK*D346pe%8x3UK<W!f=Y+v6UafZP>)pqWw^ZRSR|NL3D
z*S7xOpFj8O|FcJjulV=z_;-2x_`?Fb?U&zOlY9H?tF<4OFer5GshF{K>5`)scbc#K
zG=7(H*IL|Z!`-z_wx_>dklP=&oa1zM%(MD#t{=-StgAn&RF_WvfAlhEhn;Y*lYiTn
z`7ia#rgi?~ZM~5Ft9@_&gc&!#pPbH-S(fvlVbS|9ZnG*+M}|6E)hBZl9p|gGP&>9t
zN%T)_eBbp|`zL0bhMo6wku-iMYhJm1&4Rxh{~pqrX!U+>^Zs(p%>Pfr&qdwyzULz&
z^^Zkzsb`4eyM&_Dz3K&TqS@1!tdC~d9oam2yTQEMkKb~c*6jH&JS{zkwV%&eCU559
zAgPCfukK%;v8wXW_1%BmTO3b#@~28KcKWbj_l}2s>{geve+lJmPpv83&N6GN`o8r{
z4#@$gN5zHjKfiqT@1~@_F9(-g>UBJ0aLzbIpve8U_L+A!K{qzZiz=2UJwNjxafdU<
zyMsTCRMt<L$?E2KG%?LgZ<VjbvmIJe0#%_OFMKF*+sBc8amL&4Z3kSob{^dj9}#j~
zP2$KG)sAB;7@s*h^xHEqOnkC7AmHEPckjjCY*;3fcF6ALW9J!{>m8?bpLx`G<)ufL
zqs%)+))#tn79KmaXU;><$R2T@4Sy%Req`}5BTq)<McmRmydCKWRpuqB-hHXI{sQ}q
z-Gya(>kE$DN?LJjFT2e*p2kgUmg+2BvviH?^hKJR`u$5MSWHnmYh+;?IA!PCs(iVM
zJ0gkw3`grM{EYwIeEfK?V6pp~>&XH6X=1mhU3j!(X0?mIS^4I)YiFt%a>M?dU38>>
zVeQ(pYmP_Hyjc;|*<qj}_s%YL$_B<~PxzQ6PfYN<d5+0AneU<W3hTDJCj}&?&dcAd
zUFg1L_5}guMg0dieVzFu-B^A`TKe?~{vWpZ%SJxhcr$TA@DT<5MXPm|Z(7qjGdlb0
zwyjxfg?t{kJX#`lf8XrQzp5hj;$!vrrx$B3t=hS@D7z}VsCY|6(Wb*PdGCHMGH%RS
ze%D{KF}~$T;wgnWZ@$h`xFR6>HGP)W^j6NaYmXdPyUH4T7m1#|TY9#xmQsxLEcH(n
zSJ>`tZ!_exVpLC6GJkZ8aTgz7PsJV^)u2bg4^N-U_xo4zYmeUf3Ad~2qN1~<o3Exe
z+MOs8(lM|9{PS}Cg6H?=UCW>Lbny%J39AqMI_BIOs=%_kVRqnVHi0))YybSzQaW<#
zk9(B=>&LUr<M&qYv5Eiics}>Z%H5})_ka2L*gpR4t~u-H+uyu=`TKkOE7E@-9_Rmm
zd3%2S*RRbJ_Wpn9FK@Re-tW2mym@P;v={yRS-$_z`~Cl?_s_R`wQ|L(gO3h$iWaPR
z$vL-5b<xr{v8nwZ-rY%__nOVT!c~>&;?EkkKfk_Q$vU&h)nf9I$p+e~8D=hG(+=I)
zA$3<vYhuRds_eLh#{@LreJo+C@w`5#ajC%4bs`024pUnXe%X^-|MN#<=N-BGYhF7?
zUJE*P=uFBrhIf^27uN)&m{q+p&wOzHxBFA463dtR`zn8b`Lm@q-oB=)>fhaZ_sG1n
z`O(eaH(JUVe7m+S`}VqftKJ2Q`RJr-7CzH)om3^myqIJCRi-CBzBfYqtsdA)ZIwRw
ztcE*rhw$hBbE|$gXCxXtD(gvT7xZyhBOzlX@bKG>*N>CeKVbMPlCsV^r;^9Q^6k;d
zhirlqI<~ZPb|`F?i_eamaJ`|W?B-&D4?Zd<Vp?J~CKt~A9h_rxicdQ@>6Wf=gZC*}
z$EJci{ptx?_k;Wn3HQuzFQ}jPj^_x!JHzLmY?hDODQ6;Yn^dITeO)W_b$6p_cJcBX
zlMO9*HD;|#x^(Y>MV7<EDGDMl_0?QfpQ(JJ7L;S+%D(KwJBxW~w`R12NJeg1-Ics1
zzacHhaMvZ}eO~7T51jE!IQ8q^#Gj{r*NA;@X^%Vmv1QYPzjigM7J2#cKQ4rQjFGAf
z?BHiF?E7%{;_1H{y1&wL?x%e`;dNR??1X{YMZdxlp~SMX*8#_QB+t#0W^kP3Eb%${
zeD!Hvlkcytg)sN%3q9Spa@vaL!poB#uY`zdMui-)Sn_k#s#jvI`<!=A`lE2>@I)Rn
zuj9^p?;L+%*gna?^?jhiq<N1FHQbhWeyQL1aIGo-yh&Leb`$Qm?CdOlp1bSt10M4?
zoq2{XC%AQ_qF-{&={(xnT&{MLd8UA^j_Rxk*-z_s7r7KPzuCFjm@A=Wud2aI4%2xD
z{y8&0Hf#GnO|zD*T;$@E#7?&b)$9siV;xf3SReGdE_!)hXVcBE_0wna$$aUO`4V>A
zpx)1Vn#BF|zwhiizQp!C7Hg8}SF$^^Wv0dLC~liH@rT>vzPLY=oVE3b^YMS)B0hSD
z7M)z;wZm>^@E-qX8Atqb6gQO=sd+2xx7;Y)Gc)RG*v&H^lhUg!irF9iRCqpn8Mp1M
zZ+=U6cppx&oB3<i5s6oeyu1|B1*Qu*c<MU&ghk)ZTAe0(k7M4%x{LE0V#K1(y?(s=
z`02wt1@vd@Up>7w)AZgPsRZi`CGoI?7Tw;?mbv_&!Vb+@_$A5jxPEi-hW10=>qQs>
zFS;^jMd!{=Jyg8^-lrX9PPwgnrX1~1IxK49@AA&YSHLY$aZ%S6u5yKPb0?0^hc-=l
zT65{^8<nduovWi&^I{%x?30S+UEMY1lU&`pueU?@Yx7){{(S7o89zCpvdBfBRu)X~
z4soAUd{IKafg^5q{9L^i+1cmi*Ug`I>$(4y&{g-oyjg4e_p<$@DYs<w{{Pwi|I^pc
z*Wd4}dwqU>e*KTbRl6+S%>VzTeR+HP{Jqu54T8B_wJfK5ocx^dzx}0-`%A5AxqEyG
z9`7@EiGON*Z8-A-TU6NkEmx%_I}IAwru7s)%YUwQKC(t8deKGajUks`A5)u9bbFn@
z%k@pWL`p9OIjq~m!IEb%YbWa*^_1fF4P7gQZS%K(Kkwh&#xOtPON>|KQD62xIlgSS
zRR`yVhi<n0y&<|v(#7q<qWJo+A5Q;z8@#su&!3-{m#gcuGQPSgULtgT<@K!Fx|23n
z=jxpanzq%nyD@3b;*E28mNjKb927nj>?Ohb!+YQ9m11+;J3sRtW1n;=e(lQtn<9CP
ztqmUgxba$D(5Xv!^d@EEV`u)m6Ds{D|NeBnk0<+H>Nn<vlZ1oSf+p&`vrNjaoYh>M
z75Tt=;_}xsgi~3{)W0{UrOn&GlW5u4lKio0BFp{%&t`fZ(|p*Glzrl2b+uWStJRK}
zO>^yBeyH6H?3n9R;COt3kIYe}^<}BQopQg-t@*&|R(n#1W9Ki&cuRv_mg`TqdgO;Z
zGq2p?cPLbT%bkzM_I@)ew~1TlbRf9k_T8HL9}<TwT+$L$6@$XmU#GBrTJXzi#rxwO
zHp^}?-?3VL`Pq@4ssx5_QLXO}xRtkUn%=WYu*>9njQ$Ua&%v2eOz(P=pB(?f@3rEv
zSLJz|*YTH&Mcy0|N&PJ&EgtkXqtr%_Df8ghKcXiRB)B@3&2}w)cXZA5@9)AEz0;3v
zTEA-1y0oZ}J3&+R*D1XUC>J#q6aBdE$D7HS%cghcFL=DNpkVc}kPruf_ljn<y*>eN
z-Yb@=b}avA_qzHk!;jh4EzI*?y;IA(6cI7!?#(+lPuBEHs6T3!D!LJw)UE2g;=cEv
zlX@=2$vsDM+~P96y_IA8>}dPsuZ3mQtKA!V)7Lz?r?Id))6LP};YQZsw3p9R#pb+V
z@%^&k{GIF)vw~MU8Y+Ht_bjd#pDjOG^!=}EGZm)mcVzek1nGuc)_H9^;l>)?;GE5K
zZbqN@uzBBkGlkHw#<M5S9bnq<c3-)?l9OCax6Q-@a~?+e=}xPCAdn?d$k4k-`@FK#
zVNN#l#4X3R_r88MaoMRu9dp)vT*7dFVj}Y)sjVsZD-F(;y}OxnFSz}IHBVAp@h;=p
z6NNfYE48*x?Mz<yFz;H{=2r$v?>U7(MsAosdENA{pDK3R#E321^s{JlYVlr4b9d=0
zxjYwcaojv}*<+Vjy~XZ#x=(*UwJTcx?aDptz_k2`(3NviBD<|$U0ZYPrS81y?Hyl=
z%L=kj9cks+!B{3Pc*a?D+GPec@xZQM(M#&HCwYc)1?sMzwsiy7oVAB|m(001&GB0T
z+r?)oJw;I;zdgLX{QPVI!MrE_EvHy!=0z(8M!D2BU+?(#!k|5GRiCEa%KNv96)#Tp
zm_I9KHq)99#}aZ_f*c<H`zv4noxlD^{`2kmbvrXE@7bMP{@%XsLFvzbA3y&7&j0r1
z;qUVIj^ADAE!cZTzQOiUfJa1V`c0=PzV5+Z_2;U(9^6#k9l#l4{<$pr+NL$?68CTM
zGQ=EWaks5v)qIy8mek(7XV#4Fi94=WRhO-1IKnVPbSs<19gen5S{^2OL6?G#J(_i+
zbE>P<pPl#Y?oH#c;V5{Ys_AO_C_!nXnBAvs-!$^e)jvt3v#4KSRI{IZy8Qm0-+Qi}
zUm!nk{aJ@ME2sJFkD7BkWoy*kDzCQhGZ*Ql=5BR8;_q~6%|XSDj}uD0AJ5@X@_ykE
zdM|&8p#p>Pd8b~>uV2>*9@sCn*!Q#T&#pJ=C*HO<es!%sQ0*wz`X;2!dlk#C+f$<~
zOL#4p?YZ___4`F$tBJ)%XPQ;GtCmkObDh_8r+aPjx-+&ShVC<7+A=Gt+RZOZ{;^Hk
zqH&&-h^m*3O6UyX4HGqeY*yZH-<Hn*>!?fDW4@3Bwyl4+PRpxj@BIIj|Et1NC-$xB
z6W^En9+c}kQ^2;{+wOYVi;pt8X5n8x|9X0EwbbORIi*#%7i;dje{zN89N)b{j|E<s
zEnSeV-@S0^_mHzf2Lxuh%{?M_B6?9rJ}1}NeYzGFn~y)3SM6}6#crYg?kF`D9r-Jv
z#W}CElU7$xf0?1__UcAL@ejqIq%g-b^>da?ZG9-1aPQ;Xn_5qwT|KF5H%a2|W`l!*
z8*e?fYiAbNv9V)kMp~@q9h)cLH7|(w+vo%a_BZZz6PmVa#m-e-GrTlPU3M>CEv;Oo
zc!gi**mTn>qw}AhmM;F`@#EHS)x!Sg$}9!l_2Op5;?vf}8GbwF@`j1OWZ90-4BCt9
z`y>n=&0tBjNbBG7tnb8}XWMMzt!fojCvxjg_~6u#UT=H;p@-!m`R?0$uiNpsv$d8N
z$+U<aW0sM(?TdWb>c%zw%EdFCyhq->xhEp^e1~O7uX+{#TgRR4kM<?Yrx^?W%-Htp
zw8YDhSe++p#h)Ks_a<Y`pY{!x%sx-6s`;|z)*0j1Key(c&G=vPpnAJqtXQKWZ=<~R
zM9VWL9r=osPEU>0J7@EH*PpPS6A$jKid$cDJ7aa;!WDDhy<L23lkl}mjxH|y4of^!
z%dNX@bZy7ydiAi#|38XDk9xPw6I^l6jNw|3L!5Ku=5h&v4Zn&s``^o*=U?vXtvKcE
zbm?iUMVBwwqO?(cAA1q2*gEEC6R*D#KVtD`k)r*#*SmcRJ$AG>oI8~odnzj8$gG1m
z)}EJlQhP32^rkIK>j<~UQcsui^0T)0Cw_NyP!v&0E7T3^I3vMQED$kslSHW6x2)Om
zu9cQkE#%s-OyfxLESkjX`XweMQLM@G?yRjxjqa_|5Pa#hsOQuR#e|o8yUW@Zo?4{y
zh^6@Uo$0B%%gwv1FLs4pG;7a&r@%4i;e)83KX2ZC{<b7*g7(YHUl*^g`(gaQQKoEj
z8TT3$hoF$|O#(_b%F}Em`#UFGNjVZ)?&p8iqu;r#++lHJ2}4}|IiJ%ni+*M$KCD`8
zut|)0N}+|Y;G#3CN=3=pw_Yi6Pc{uGZD-u@%sf!pg7Mmwvs)@O^^R_x64c=l(dzo0
zRg<|T`mSBveC2x@4h-7fFAeev-OVm39?n?2@4a%(@+pPf2N$i`xo2n1<NJSiZspJ8
z^*a<BH>dx}jfBeVYgsl4zrD0Kv7Fjw+r8o2*NN(<=QJ6K|K?l3x!Y^G*yWh^H%2XY
zcD~(Jc8{NNpZAid)geFj8?8FNde!&4vc+Gx&la3x7FD0hdGDkydt;@$)+g?#axY9-
zE(A`RE5!OPPucg3*>nNc^b+=Wmme!W?tHRSNAdJbfAJS-Ub^RB3&<Uf|D*ZWyL*FI
z+HSTZyVX;lW*C)-e{5W_;iqHMq~_y>k?ijS*Zb`5SuWmVD9&-$HScq<>zB*RIL`=l
z|NXn)Kg6~Ds!Qd2$KzG1UO7Tdzv>h(g+xBBJ9l@c?2$LlllI*{=&s;-Bcbd~O?%%5
z#S3fv4;NRQow49{?0c2k3%p8Mj9kJ8gwM=tlK&Pd`}Y>ZPU(feHS*-P?AQ7HeudRv
z;n{a?NVI&ZwVN-r;AfS*V(7=$6ZwBTF1dYwt0K<>$z@YKo0OTljm%QstmK<yFMmWu
ziE++nrZ@hVcKAocJWyNL@+j@)%Cj?W>%0x^Ua@+`RGp8pz9Bw3tEvJ!O>V_K;$P{<
z*6gu;?y^^`cW3j@+M;mz;bEh8Ip=;>^<P|a)$?@kF#3L0TYVsQ(*+T6!!#wcMhWTY
zPZhx&EIb_L|5E;2oUpz2fODdkkV!Ac^_dzrVypZZHkY@=@V)=Lj_s7zy#6VTDv@8?
zDy44o7w%5io3h+kh_UbduX}f{Ci0naf90|epT0^UxpaHevvopjum8;16eT+;<WS5t
z`OiNsbOl``)t?%eTU0sD<C1y5N;*63-P&6M*V^waGc8W&d3)B1YsKxNwa-Ik#pdmC
z-nHTStYoIA?|OGET{>%yhN#8j9m{POaVI>N+jEs?JMZmdYronQf7mlgIB*ZkzT^ch
z%X2MWg!aGc36;2Ad2eB1$FG%ZPFLAYUAkjt^jDd40+Oz-GpcPAHEZgM>pp)y{Ip2R
z^@@kNzPWv<7TdNHUV?&c(Jj|`#UlCn)t6f+YwT3{u_^K0Q<Wz_BK!lUacEDTv~JfN
z@z^c*olY&$eB+f?zvrYMgSocUGHoY^KgO&Amn&yz1huWM7F2w8Y)k77UzY{*a*R&0
zok`qk*kb7_d&tIia+S}VncUnTcfL?kNnpNvX127e*$xBl*Qb0|n3r!p=UOau>XwVo
zlqD^DTT252uesiN`ej3e^3ItYx83*-NHyN@$*ul!`)Gs$!(^ScX6u<2hnXJ=J0+)f
zo!h^y#NYD9nMG@hIG?3-m03*`^H~!Z#vV0Cl0{d`>)FIbj$v!_Zom9kQ~7;?|Mg#Y
zm>kVYZppR?KJA^@oXCCa6zjch&#vw*RK6(i`ACkx&%+OF`WI9`a6EAM<m6Ej)10vG
z+Pu7Q(X>OGbkw8$*Tt9|;B%ST?DzSCdduNt1-Z@gq7$7I6cYt)rx!(?j#_0}qH8E9
zsI$;6?}^*J1T*~?TSNr&))sorRm+!k44t96ZN(V@&dTkd4ZoK*u_-lQtn9IyyZ^<?
zqx+9K|6Zyj=%S<KV|>~0tNLU9-sEZgb_oLB^O=oZqw>Sf9XOZ#NSyV_-Gv47dj;R`
z7Ff9~!Nzn+XF)~bJC#XGcV~7Q3tnvacmDB2lZ79@Kg@EP)5NQ05>T2rZNkFsM`KQu
znR=^8`k#zhz_?WQFT;7;TP)x2zwB{(<K(FP?BwN!-Ie!V&wHx!&}U-gW#?Vb*Sj2*
z%Wx94bMZPWBy#J$*a63;>RXTPvgHI$?fuTJt;1I<?|=KW;_Iq|=a*DA?3``j<`cwU
zus1qqwb!x*C6f#9mo&y_wSN0qrLX<|UfW7**~dQ<871CzEY!VyyLI=Urr4XNM;;sL
z2B$nvQSdCRzTcj#QWDQU*<nxp+Bt^R8X2i>S8j0ZUGgs9(^{+d!MraIx(=O;4cRp_
z%(VSWR?@0<Q9eGk8DE_ruU(h5kw3ItG%zgiUGkrbi>qF3%;j9PYxz0VFBZaPwNa1H
zY+7lZcVnHnWa{<?!Ozu=yB&_`Mn$xIaQ>AQE8y?az58IpqrAIG6PqgE9TbdxX`b$L
zI62y>`OjgCJ9i)FwH5eXkx)7E?)OUJ9?f|NP9KjvQKI=a>{i^J>gmdr=iZ#XWb@`7
ztIOJawLOnt_}<;qu|sS1%$cjb0(IB*Rvvo){&&Yjzo*~5gx@ioNeEUuW3qBubH&Z&
z?^o_O-~8-aec<YqXI2UZPZhIlX-l`sVM@qc*}8OJ3WwvNfRiehEV@lTaV%?<HacRp
z>_@NA!pAxtcli!ZR-ZBR(x%)MQ?owbTH~(1N3Td$=Sl3s)+N~=#ntZr`SkJZXSD_1
z)?IqbH^um!-X*a3YQsNg@6Le8HesHOMT?#n{miPZDYvuNVO2@<e!BJ2rHI-3BG+1S
zteSh}1BBY#k3Qv?<rjB-uEE64dlNdl&X;HV&RRIX{9Tn;+oW|xRT0-tD;)}(EiD}3
zard@^*4aE()wsxzC7intsJ{=bdE(sb#jso7A+hMevuhUHzweDdyK(aD<EP?(KX6!b
zy8lP3{fW7p+|J6@0wLdJo}Eb+7p%AwKhJQT#EBR8)Y_*+M20L8(e!RKOz7okoT;*k
zv9{PLG(UY?=*o@Z>Ywjzin8PT{#y6T^c)>0uQ@v(RZqRqE!&ahAN#mPA>rJ{=NtT*
z=Iq$$<azei+p5W}=lh?3e%#rx>*_+0{^)p*N74QM0c*s}RmGQiZL8e8%`=kO^z|bl
z_xawRd1Hm7s{+isEfzmG?5Xd-Teo&L*L?rw-P<RNrbUYCtlarf|M+=@y^Zfq9Wvy4
zcB*b`)n+cv*m>!9pUS=cq@<&jBzCm+{oCS5N0+o~`HAx+KJ3w1n!UC4#!`!~mkUFD
zf2-7Ax_AHP_2ZxU`Ro==v|OqA{~lAl*4blGbKg%a2tLA@7Bu^^!h-Xx?>wUK2LC9_
z57I9_$ZqtrQFB`9wh3Nh+&=T%qF?$u`1Jgi3zbhkn{ljp_LSLkGG*I;ubJ_q#bD3E
zjgoIY4^NPM#xFIO|6A<yD__opTu9usmpMtUQ1_v&hl}%pbIx6Jgg@;+7FWyu>Y?Ml
zm#aR%u(Ie{xV!&<U$%sPkjrAh-XG60?kQV@$DK^+@ph>`d)d?UGjl?I=EcV4bJTh6
zMt2=nl6_sebYjb@Gf&<<nR16c>Aku443nB2kDuMWA})9>_*Rs8S-H+<75`6_t*zGE
zUu<~y==9oe^KMiNJgxq5YSE3k_fLs$|0klOymHUi6<<3K3aEShd&n20<i9_%ui`{!
zuSCy7{r86!D$MRMbl)^(_DrvWBA3uurSGRUzcc3#tqb}6bMfrF<7pO_69lK*EqFhN
zKfcf^=V9B`cJ-@Azet{(`&FZR*AKHL${x+f5BNPfl$3WS&&Qz4bf09l`z<ZYiH61V
z?reDJbNG)W`(@3U>t!~!PKd4dFgdaJ@cnz8%g;!<?<sD1Zy$O_b9r@!Mup4{#}hAo
z&ZHz(F8|B!HqWX4{a&?lQFfV>zD0S8M}-1Yjl)B=<&2iuUM;tuk+I|(W6NVX^K-$=
zTN3&TYu7&yuA6-L`;x?}2T{9>(k~cT#~x=~^wRvn*2=<1f6G`W29(?O?%i%_Q!P-`
z!lUR_v&|>#>y7<iCW*^A@TX?Fon0aMi1&E*!O}C4Pkj$BocQG2ceZ)c|63i;e)9K1
z@83gfg2H0Yc}I%0Y`ePc>WTL~?LquX?Ba2HPd|P8^Wj~Q)$~J4o-O@U^m*x}tTT4g
z7OjkbGDFtL=#T9aS045lt3UhluO0ZVVJ`L2==<c^hgVFBJn)1i?NaEel~Wew#pR#v
zZ82%JK3lLw)ac@&k}zHGOdm15<`0H!g&U&ToW3gd2`_MA%MqLWLBMZ=3%kTB&CNT4
z9{7p)#Pze^wp;t=qjui06m4(8#>C=PGuYcdUh`F6<k@Cnwr1nM{sO^Rv9w0v$x+U`
zC1XOk=eL}=o1fOZXJh!XV@+Q7?yNR9F4r<>lQwp8Q&7IRPH^M#>7DD$A3vS^<%UZu
zv&79@&JuP7|BhS+&I(J>^-Zq~uB_En*X~|7ee2yE=f^5DN=t8w>_648OJC>AoeMnE
z%)^)L+%?&vG4a@;yt7*~e6-I<&sO>7-g#n%tI4}HYfT0J?YZk$s=dfCF+Sw+1NmIV
zvX_F(CitaKh$!ml{ad`&-mdmnNv+ZMl&w#kuN{p|)ZRRy^ZJA{R#i9j1i~teA6s5H
z;jDjdsfO_Jo$frL8{TZ#Q>oCja|`?3VwV0Dhpuluq}=~vp4CZv!BYoK7+TJhoJqUG
zEy%}p#3uiA{N0J?&+M+s|0ncg)0g|p6)Nua&zYf8F(q4T%`No;mq?S6=bya~>`7Ni
zJky-BXM3~p&nU)g<}VF5i$4;%IC06u>HKF7&uq2#KWTV&QROUwc8^a_mm6qWyKBW9
z3${>rqgOP;^7HGCO_H~sw+C9EZ#)0sRR5KTb`4(_k6UvWGxwhL>6<6IcYou@Sj&ur
zyOkXVZ8s|TZ08wmb=q{|Z#bX+#|rxb_3U|9eopD*QQb6m8rwVPt}915zdN+@Fnze5
z(OXj?^LcgD)<5&TKj?g8XxW)8yy5))*L!Cy?=ShiyFaE&U24kjSu&M9_pW{~z9E&?
z)oQWx%hM)-MwQu$-x8BwXfA)q`&^>l!$xuUDOEMTGgF*rdaM#Po@lqfd~3%+ug*Hp
z*SVJ>%NFz%Uimf2U)H;-`$NjX)WUZm(<3)dEB?H{iNoMp(%pcA2^D8P*>*&liTvG>
zY%a)_+I^yEb9(u@YyaLzoLW)BSs{NoJw$TZbo-qg4ux~uryA@yAs6YrK5~Iop4{ep
zLb5!!9%pizIPJB$(X`NUOTfcw!(tUrhLZ)wGb%Lo6c-7ZHLsdEHBGPc`VC)U<@ky3
zR{ho~nGjU%r19AH&SuM{{DHpS&TArTxD$mu{4zg_E%DAic-+TXx6R=Bk1vlkGYkXI
z?mRhHAf}M@_#GYhAAi+9zh=I+)v7q;^}LgECTFKDuAe1;YP~<JTh+a}yG7V<YVKE?
zc=Y2>iNoBQL4LdDct>&zZoie2ea*pdjn}!x2IigXHpNUkzFz)Z{`FK{CZWYkU#_fL
z8DgfNsV9)0)TZdqF2>cC5y5e}N9OXci3b`KYLjyO-lf}r59Pco`C3WYv8%=E=-u1b
zUX?O0;po4zE^b3``mY4RI2N6h%o!f%D%xs1DtdP<zW&!Gj_I<UPtcr<Z+T@!J5=ra
zT(2k8_fK)0Ibp-2yg1$!u8LmIR`i@b!Q=MqnE=ZYPDi8AkMAT;&$g@NJQP$CIqme}
zqNuw8`Nxjj-fUZ6RTiw7Y8iVf>e0fqug7wf3bM>6zdB*7FRA$A@|$TUKCh-t-^1jb
zZXj@fWp<m0&?}pkkp6;aCwgA4p54H(X1eg?9Dh@x%vW1&KYx9=`f^dEe8<b<=cUco
z2WQQ(F7@CS7FF4_Hg(N}gV8apg{+;Egf+E$?{F7q^BFZPe7QqvK}J&Hvb8raZ(F;4
zcF{>UuOKziPB&)XcXMyet(q>;Y@#L<_k->C$Dcoc`^(SEH~-wT+|N@gBeYiI(>ImZ
zhi89(zDOsNfjgaV<0_$qwV4(6GnFmoK3KKJ^)ai;*_AFX4{YnRFY)%SRd^rD`{VSd
z(qAh#KI~?0i|3j2+oiXx`SG6c8@VfU{Ct>hCCVHT^m10Z>pW9d?y~kq$9OBPUk8ux
zj&id2*)COOQ*`ocRQ>zPdBtwi(r;wl<b9^dyVO&E+vm#n8Bd)x^#m^-NbL9Q&RzcI
z`={C<jl}ST`_s!}rHqct3*XQ6sXD#)_;QYkn+0|Th+KR3G4JzS(Jjn}-H$yf^qkr<
zZEte9(!&pDCRgwseXqCV<BOS3kHoegzVp!KuGr}hb5?)2I8(RigAAAdO2-#dYWWxs
zXT<F=w|H@VVab^*2OGUaCJGsCDsL}|SK9n+!Sb0Q4i-$um-idV^c_*Lni9d@SmKq;
zp*Lrmk6ldtrNl4g*KSqDGqT*dyVQD9v&U1Ht`#S%`97PRzuWsGcJ;37d`+F-=f3(T
z;XhHR@*7X3m=Vhx#bf16-&yhm5<X^r+u2w4Z;e5BWX2~`r}a81x0g-3{CCa!<NH^v
z>P)$z>nt+Ks6ebt?s6?})!`3c-7jg}myNXHJn&8S_!j@io~)v~o_tVnzv!RmvPSr5
zfN6By(Mz_{ENMa$N*7*^d1@WQc3PzNpQL!>atWJt4AUYPh_lzN3Fpyy(VxCY@7j&;
z^CpYzPv7gd(e7|D2VZ{V{p!stK6xLhG~E1S!p9vQ)-gwpxU|fkxk`_Z_ifPpd;Eg`
zbbh}VUw>RwBBA27*)7lJ$Fa`#bzhc5S=8{v-jR__o<G;oyZ7&phm!*}mHpTr3M=e*
zdQI_Os=wN@#k_}V!*hPUc+MuGmi|pSY<>2R!)3W*hEfwN@{`M2EKlXnJMeLd{_H!J
z6`HnBJ{BBFv^(tLw96xMdXZL?$+vTBXKh{a#A((n*Q&^pojUya^5^oeZQ^L&dhz6w
zoGntE9-UKERed%l_3xeL@#XtQrcEr2d?6oAo=HbOV&}P&t-@#-cl4w5-JKgaR&q7p
zKKm*-`@9=V+>;G5XU)$u3Kx}XC$LLpKi#M$aW?Oavcofj6~T;#iw(`4Uaz$-pUZ3&
z*Y!~FF}r^38*SSaxf@ypE;TSkF|e6`;N5Zb73YUzo4m5}x?U(1<R1Ldxl6fy^2L?i
z(_CE>L)K4io#rZeX<gaxJsV#YExi0Q(Q<9jjoZ0*^R&Ai&b-_<v$KV@^82f&CTSka
z6?OKRmg{fXV6?hzdw}bfj7TxXi;KR7U9|Q6y~Rxaqf>#{#YL;KqP<1`on2ohyzqGW
z>&M~wYpykzS01rlQBq~gaV}4LCPz<Ab97l>ujUPFF~*BtEPcm)inFg}DY!<aOqlEZ
z^>Sg{qXVV4?c&#WuP~S~cZOe-SBqk#ja~4=Qwt8re!Hl-`{0c^A8)?qm$$F^_2=Za
zbTdv*51;9>JEmMdzklDKU$Z|y+HU{n`Q#1H|4mptbIMX3L$4SwPv2~r15XY`g>*b{
zTrGRl<Nk~bJ-zRze^4=#{dldsdedP833jJ*@uCH@zn<Fg<3plJ#fozrXP#V}>J__o
z<Ko9ZAE?N%^6qy#ZmHGv(j(-}R_VZbJho}s6SOO)H@;;yyQSQC;?AL9QKt(rsdAPw
zyID7V?ystpT5-_j#J8z0SNP4Xp78P<b1z%dvN_W=9WqZy_E$aHs5_IX)T+B-GUs7V
z=N_T@QzAA#3(DtkFI(=NZzZui#ORL9VK4t!?tfaIoR_mxgch02W3Wz8d84w|Pxhq#
zUy+B5U&Q2=|64GHCyn3#?#YT<zDFZ={4QtxX&3gq=7(FbU}@4LQ?-M?-UTjqj`?y?
za#L`?OTDWZb>}PlGhQ*=IC=lh=H_<Ba*y)rWuJF^PVSq}nsLWcE_$zp+4t<m@+VSp
zpHG#4`kGPRZfLp0YG3~OUuPe&l!@^PGTTTepE=hk@TTJ0_cJSRT<qZs*=%rkSyDVx
z-TbY!`tQ4!XC052wBY2umuHli3{=+TpX*qWvg3l{yT$x6GnNI_SsC=ZPn)n~$;uhu
z4z3YkTH3$j&F)hN4cnsv86wm-I!+IHe`V2xFHI*N9+KU3;ptQzCzXzuF4JEK>~1MO
z{8)6-W9}NAH|q?~WO%>-TD$$~u7kR5mjWNTn`nC(cbgl1yP$Zai}x?XL-yNOIFFWU
z6k9pEKl|x8U;0guta;0~jghb0qRg9jr+&&Y{yyVWw775CJWY)$7AH5&@36AWn!DhN
zb<)RsRTp`+_wM6&=R7gjXrY-`P11Yq_Ckm1`R|V8rLpZXRyZ55IGu@qzL)$}na#1s
zTHa(VynVE9Px|8bN4l1<_I%0Tz@hm%Xxi2nix$nwOr7{+>dK75BQrGgUl&!`%$GmE
zyxmoTC*$$fk2^oEbkaT&vGl`*<Q;|{76H*_hgzJO?>*q<oBHoS64P%1tFNBNHo9B!
zN%kAuk10Q9HYakcz>2$3yJzK|b8%bt^xT0dmn7QsTUry^q^nDd6vA#6JYQ7ZwzK(+
zs0F9+Vxz534i+4<Qkoo}P~7EXKFh=Wi>v&JlZ*xJCQ%MY6%{(d7QZS?^b$L{O6lms
zpcM|CYjR85)0ZyMp0U}h@9pQ@-MPEGeB^JiUft8ULGVdQq^s)|n|bH>*NR<x7Q#C%
z`rb)qm)$Y^3uKN&uRAgAvzB*=p3bTmm225STh?&r>b`%$SN-)(e(;UtYf*P!g>^pA
zRXVgG_IB0E*HM1`>Dgy*2@7U0ojB0Td8#f+-_=KBYKq}!4r2>%i$9$<kJK0n*XLyU
zx=amN#wl}0NJje*-_G37Cp$AH{H^BUT=^}ne|NKQMD2v0&3TFDw{O_T&YwSTUesRQ
zC$AShPmMjj()4%9UfcYuS<x>0>VEGq(R<_Y`q)yBBViMh(iDPv6NFE3Pf|SO+Ik{-
z?@Y(7PuK4HZBltYaFN4Z-q-5)r*i$*(*3CS^~3sr?B-_{?@v114Bc?^AAkR27Y|Q|
zbqTj#F7oy%+jmg=_E}lJQww&=sdPQs_vD97SN8M9^UnQqZu(XvBuOW}^*h`?ZPG`{
z_su0d7xi|Yn-pmE?2~}5<*^kuOI?@zTc7UvGhk-2m(PK|U2_C%+4r8j8&=%&uj(3~
zvWdov553_opQ}GS^LyxWez$GVm+xCr5A9)k-+Cg$l0W3izkcO2&knp%E^5#EzFjpc
zq~g`5igW9p*52*=`_Xn*d*=a{goPW9Ke;Zy<ir)LY_I7J^Io0t*dIJ0xFB6}UiROM
z5<EY6Hp_^9P_BHS`ZY<%+%3BF!i+PrKlEzP2u~~DvH16a`$v?goMhK4EGn_OD{Ww4
z@N6#ojAf4xJaI1aQJ9-{b#B%J9)0JS(Ad?7B`t&(Wi8or^6p|e_K+KQuDYJ_**b$Q
zBf`1i@X8CGo0fHDURFFVUU>1Ij!K}qsFlr%SxTGuvug9swK47A{;a8bJ?CB3)~*S=
zkBCmXv;Reg>V0-kSD}u%{YHDmRXpCkoEI7C!N>a`W$$+>!4_M_68p(>PqcL0oSgXD
zR>X2@)bFb72eFfHrtvN3>6^9D^5f^&$;K=F0~XD3yYR3>hR-OJ$wP&u@?*!R4|kVs
z&kTQYM6Bnxb6?xhZR`=HPQKZ-b^V!X@_}J;uYT9fWtWoQn^$tbzMKEn--#u5Y95<&
zGG0u3r1q|1w#Xf8rV9Bzr+Zp9+p!gWad$4UJtJ~%XCeRL8(O<PgIF)5>fG%&5j%M4
z;Ec~E%ns{3tOIAJUY9K6%Y5@%?edN;y(qubNU@|(X}ULTnYSL)(TeflT0i~ir*97*
zo=uJ9KD=XMNR;&2-s`-#7O3%@3VAfgl4EI#uKke>wmT+2H}L#sn_6DnzCCHm)X98V
z$N3)Lc>iSE_Qe`93D*L$Zfw1E_5#DE%DWe<1>7`NJz$OTI8)G^doPmb@YiX+%iY&3
znl*oF_Y31l<t8r4qMP!EV$L@tc4Y*=dnYd#D>iKw$M+M<IBXo0OLYvz3bu9l6(~PV
z)GTsevPtm$#30Q}=gUtA{|~zTEhf8IVA|Elt#i6ESWm3Z-4V4q_2JfQQ(r96Qktmk
zS{z}Oo>=q!pS7j%<ejnF#`>weMz)5P*;f^nHofptSIUa!zNI0!^Y#q+)nfS>DY>^_
zZ9e$PE_H*sP3;e!@9pON)64tcZOwBF>6mgL?ICwTnl<MP+ie=>svns@y|DMG%aH?{
zz8rIMWsT5K?yC+8th*ps@S1Jcy=_WMJzgHHoyOq$F2Q=*%#Whkl6|@pZm->T`?B!7
z_4zLP@6W~C+U(mKBNn?o`uAP=x<7|Me=FJj_vzo|@9){p`}6bj=i(O+lkUcxbxn<&
zW^t-&2e&&D`{Wf_npqp7=D%8NRi9>el$puh<gdx%+4Dkz_ufkv{c-)+XZyFCWuuQ-
z6hAqAVs*{(yUOxi>}n+izGcl^{!<J-#LwJfEt8_Gsj{I*)@FU|iHYoU`P_xy_qCo-
zS9{~$aG1H&KX{sVOu@t54)W9cr{-UAlk7OXeotfd4o>%#XFd0vE^D~GZte`_((hVN
zQ_tnumz6Q5iDWy@lUyZ{Uie+`XYmX68IA&Kn>&+>*MEItnKY5#%Ydin`I{J?(kts+
z<qKQR<jvS&UU{~xbn=_i8)XkS)UIr_z4^Uk;+zJLg0zo&gd&<>UWtBo^8KxH<&QT)
zOpVG{Xb1<-OEx<w62Eoz8?}mCQ^R-cKiapEck*_wFSkzpQdoU+_o2IQ?tHW?Xyc0)
zFPogY|Mlh-v+i6h*NBqok$If%v_A7>YRUQ6DGnd29-li?Jhk7W%FigJYyXbzDtv+m
zn5~}i<%yNrc+0o77A@bArSimS*EDtM#(h$kFTA!kcE78y)HmTz@fH#1W%`qSmUX)y
zNnb1VUU*%m|MYpQ&vhsWWcF!COr9YWF>!|M6L*Ekeed2EZ!){EI4Mc#VJFvgfwLT^
z6~AWfw>YVqv9+N6!@-2N#uF<9432-O^U=04HhNVpac7Q|4SVe)k*nDoFD&=e=sqlL
zeDd+Ja+bdPldh$|ldgGYAbXfyO5D%ZE&KaBl}&aBEgHA?*Js=P^X}i``stJV;-r^a
z8Ed)U^?wgwm-9Vx@7<B-i@Tdn9`vhSQ5}A#{KsDFuRH6upAy(8y=jN4Qj&2*v$W#J
zF5M6ZyNQj4zUIfy8LdCq=*68n+hbGB@eg+C-xO!`Y3g`i+$5sQbYS-OsI{g%G1D4a
zDi%Invi$hvpCA8xc(!MatIU>#CULW-oW0%_)XD3$IKrOgxci+(UVD>3WfP`PpWc1D
zP^a+B`0LlA=9T)l%B}M%E;(OQ&^fee#fFOJ{I#*x@^d<71?v5l*qP+>gQJ<tqjUCe
zHZzsrvO_2OEW!+e71yX7ijkfA@XVS?X|oLfC$I@bKX|CcecI6V`|Kr;_Iy1Wct%W7
z?Uu~!%VOmrdcC_pES_o3y<23pM&Xa+i+e68tTVa2t!#IgL*$9QbIx<$TxxgqcCoU0
zz7NZc%a?@(OJW&rb!6u;-IR@Ya$0b?@yO9_w=$HU76l3US?+MYw)L^s#fdknm&DGo
zTpJZR)pc#4)%UX<QxAmQz007p{Og||1y=gk&xace>J_Yu-Ws;|o?of;Ou^lo7Z@~$
zY!>bl-WL4#kZE#HgN^wiCgat;2NnsuEe%~IVf!wiqIz1c=IKSQmv5@7EK;m1QB%0t
zRWM=2iyIBIZh!mZbNk!dr-^@GeR=lq%d;9QUDudb&%!q5emy^betpgVm;3*B>wWz#
zUsogld%}cNHt!H`!_N}zKG*mJ6g`TsnLRkQXK&@=<;L9&KU`fdwqzgaE>Zrcz5LUE
zwj{o9i@IKhe!ISJ)BD9!C8r*n{Ar1{8dKiqs>=EWi#hzOdTKocR^8ou_MP!H!K&U&
zH7U2s4+g1Azh_u|e_Z&+_)z`)v!CKr)gQ*~xZJ>A>cD5{c|=(?tm()+UzP92&!6}+
z+dE-jweqsJjXZLvPgs4ZO+Bm>dEIhj#qoOCCzdl7E^TgRX^1P#`qH&FJ}YsftnlW&
z6L=lxoR7OJD|ByvD<_Nn5%Yz=-zQhTt9?BqzFc2(=>n0>>vV3*`ZfrC7x-$eF8=Ng
zhheSww*&l!!L2J#O*ygcb=k9XDf2GAQ|z&+p7JtO@9OE2Q@hVq&DZD`=by0I+fUo=
zC!gv(b9MbcCgqprD4M=FcBgjH`QS5}-hJjvuS+z=W=I?4&P;f3RMsk4R&?V=$@D6b
zm8Tw^Ii!1L>nR(ZZE0a4!As?5`1T9#Tjn)?YHW*ZOl#}W9oj$Gwdbd*q*n5MQZ>)s
zcz1@?$;6f!yk}pw^K#U_n{uGzP`7Q$HYSfZ+=j6SB6G@O&g;zUnHFd?*XX$Hq?Y9c
zbCz!4m8`s%{OeNV%+Ta#cYghAde1rI_xYY*`_!+bNu^x*u6bo)+?~mbRoJW_@2L!$
zsuJwBOhAw6&dz?Twx*QltSM8cBp9)98LO$9ZOUA0Y}I~iS@Ub*!kfB}SAO{T@w!Ui
z^PS0uC2oJuEjXXFOmMgNUf$P}W`{g3IcuwvF=OYAgypuMb~by@boX${yEpfa%AbRc
zPCNIXO}3eLz4nzA+s?%IUe&etAGdI9tVu}<m72LY@XqQTJFi~bmbSKa_jKmk6>HRO
zZRhFb`#m>bUaGrPpn9^K_tW61smgaFq>nWH-?iMVFW<aOO)()W?d(Mkw-k}*Ed4(^
zYhN{YKK&z9Kfl&2TkvAQyd9C;M?}O0pKhAANu&AJv#{MucAlQ>oa4xm820ZZlg9?Z
zkX5{!#SgZx?%&}NQ+sM3hyEFcyJuEsY)CmXQKwn*tAmfm#b3SrCI^g}`+O5-T;8Ue
z$89Ja`iNJgG{0|6#LU(^mb+t4e{6WuD&zia$_Df7=*?LyKTf$soa+{tFXxqg)=Yqd
zH&1UXr%y<Rpw-+Ptq-nr>}*ds&%dxnPOT{7z?R6}+m0=%D)XIMe!VI><;>BWQWK7^
zo5$dgcJ+L>#$@iC<5P_S&wT29`)$wEQp>s5lh)Slx*Dx|{e(dBq|j44Jp%8#xNJ@M
zZr#Y)D9!C|K0Rj3?n77CMrIf4ae5qFqT?dqtu4`R&3k)Wt(fj2Z%<i)P8}uH;59AZ
zO{;y@wTL<$Ikx)6dAa**QbTo@c4!$-<nYt3|NidNmp^H%|NVRXR^PrZ@BRND)8+4N
z=Ma1ye6~XA?i!=^efH-M|4?1E$d&8y!@z6qf4_-8zuauN{h-!DO&;dR2$_SQg)`>N
z{1|Rv@Z4a1_zvMeLC4OPF#JjEk#pbVu_|88j;A7i@uh5r<H})gOUnx4pE!i5?A#*K
z(_TAACa$i(%8tWu=2u>)?5*yRfh`t%?H@eC4jtU@qj*H%S!AiLt=BRu?fJ(0{I?m;
zNvNr*O;1jqa7%c)Iv01cSovp1_N~vSRqg41H1~&y@qUq|KAW#*CfOu>W%u;lV`*Lf
zeBQ%3{*f2wbo1%HSohU3#(ttjtD>FNB;8*duYLXDp5vls#%e5nZ@WnG?t%rg+u!Ma
z>fHVEkTt8>M#Gh9o!rXLb}|W-Wh>TlG%T5U+(@O&%kK8WwroZhxi5Erd}(p`9+qln
z<uM~J$9s9|k-JN>%f1>_#69hC+Fw3(a*DnF{(t;+>GS1H=D+bs<vVpmASrbk1H(y+
zZ8;T(=IBf?h;!F)TK3z>(sE+O8MjM?i6<%o`kgH2Y*JC`m}z4iedLzc^cO0*>c$1C
zN(xhbT|$h1gx%=&^7|S)#WjU*4~OHA1Ij8z%O?vx`Y+2rf3D@s6D6NCU02<f^z3Ll
zv}DWre>%&5HQPL0IQRI5<*IcoFQj%g2+J3FWv+k!`K0pS&F(!{l|ORLN$j`2Vb|4J
z!nRA;W^?A^Ddz?EDLaU!Trr*#6cu;o@W)4oF0uDdEI8`i_w3wX$EoZSs?U8rp3q;m
zBPeV3r42J2<V$M%${B6f>`qND<TGkn{l78R(KgL{(qz_myQUwmw$*h1W6(JF(n^O!
z=a1+77ymo<ZN6htvBTy_jhF75q#vx5Jd;u3_r5sELga*v`#-s_-&=Te-o+h^(>q$!
zGgWX#daCZOHQt)6&%)N`UR|Y{ZL6}!Y14<7iIt$$LLWoI5*t>pJbm&@$jY1ut<I;_
z4%H#fvx;?2ct5+yIBTb6s5@`<1B3Su&dGcami{s4<?;U^E~)3G7&|wtThzVod8o(R
zw9LQrcsQ?ceoTCQe_^$Ps-Dv96CCd!bbdW3)a|Bh5g9Ady~0{+N1}@Rfx`-Sr*iGC
zjGy5>>w)5$)zda#I=iN=iRX^b?DZ{ETRWsZnR#~K({r+|`l)zV_olb@Q;qPJR?kb{
z;(AS=u&B6xiSgrq{q^&;gihAP=g(X_4!Z5pyu4E9%(kszvybns?LYr^Rz{=FseLy#
z-ws}VdfN%94GT^@K3gHe$G_q7zq){XG8-!m-m}i{_jNsf@AJ2s$%o!~$1FWBKW{JR
zXD63STJu(K66y>Vh-75z{^YS{f`bwJwSx?8=H;d9uc+KD*0XEc+dDVbW0SmE(T}fX
z!H+I)n&Y+d+KEmt!APa#hUK-t93l)pM|FLw)Lgae$<N0f(_K#W^FN<o|NGtN;@k7=
z>VN%fxBq{+{{P3{-&Y2BF5zifntb-xgL&U3GO+HR8#Xn6f3?Y7KmLTX?awy$)$Gli
zpVg-L;aUCr6LP`6m1m-6ZJWQR+V%MAS7O=;{l$Dgzs!|t-IpOzJ=J`l@l6)PJ^mg}
z^-+B)?{DtmpEcvNHLvlR(A}33k1@adcHMEecXi>u^_#3~)t?{eKCv|Nh;8PM%l+K;
zOWb{=KZ>V6PpqHtKG7qv>|)8e_nCatKRw(#Cr2!}^ZwZj_x8`No$qjjz5QK4XOCM0
zzjb@K{eH_CZ$8{~UpQ^rQpw+!Ojq1B_1pVI!YpOa9N&WP=AVDC+FaJM3T3`?du?r)
z-bS&2m-6g~ji;=7F7^HN!uGZvu?y<q&-O6ve6DtGhnEk-({NwCKU&ALBwm_+=HL8k
zQfHBMj?fKZ<{vLa=Ks4TY~w!F{zL7@b1kA*${z_gADSd`;)~Azs_xgpQ7*Ig?pnAZ
z-Kz2C;(h7DV$;;t88|+<+f||QDb)GQ;WaX^vjfEibtRYId|l0F!CK5@wYaB0uU`L~
zYGD4(Ej!O!&9>WjH(BrQvBy3_i%a-KR~)GI(7TkqWw)eSRF=JLYdQOxC&#C;9juI#
zW9@%a^ha=$>zN~ZXU;jAOVx%?`=KEJLi)AHgt(j4<^5`Y$9L%-IQU@G^Ww?fUF@O?
zk@hQhzpP)Lvh&F7)P2sk%k1C!ecdSXbjf4$DVFp8RD1Im9C}mB{^-YtS8v=4^DiVP
zY?t46a_X(OTWn)X&ObYB(KU0zrz$qZuB#j_rR}BbbCrxZ@440)@5^wyF1WimJMh(q
z?z&%f+}2M@j!W#nqodq>=i3?kBS~g#N0oNzoSwB}&aSOhtHXL_OdL3vV!wX6`t;-M
z<DX|=-n7bS$0Vusz5cz^R!{Z1^=NkY`BRG4>I<~@o!Fbqd3JT>mG8PscrRDZ_m`h|
zbJrKCru^-1Q%_2Kn5m@X<MnjO>0N7<AI-lJ<!rp~@@k=tn^qm$W3;I9@iC2!VuIT|
zqRyX`luZwIFwR<gKwNW4*Fw%#;~0lCuPpC%Z&B8AJ>_|N<pv|p*HK@V+&YrQJX88a
zqzw1P$SoUo1a}li{g}`Yv4_X>FyGbJtF-y|w`g@AIJqj?bce8{O;B@fWVfEvoC$Yp
zj6K9GHs`&Y|Ef&+L{-=F$lJ3_rT-<oNwce1&=*>wnVS9f<u$wfi%v(PPOn|5vq<X^
zFK>&$)+G`Wv9I66rd_-Ec;OP6>)S)`&&w;&KD|ljwc6~fY=I_=)_y#jk!W%M%({%;
zlSfrkqm&x$eYg{48csxC?LE3%_+;muyEBg7wA{|Tv1sXpJ@MOreW|IoSlcbawqt3A
zw2sxrt-n4Q8=qDBsC;zCJSMi!I-lj_{qEO(t@!)n-_PT3e|-$TofW_K7|RLQ)|oa<
z^CLGs{iu?*qKjkVqfZMWrkqO4F<E|~DQ@N0{V_-Wf4%y5&72v>R@5G~5M!zF&+0ae
zU%y(i@vP*hfJb-jyh_p~_KFLAIOtcP@UdJf`c(OQjx*oYGvx)|oONpn{E=|5q};9C
zHC6sWZI*e~joKWO7NyJA-vpL;Ua`x%#&1)<Z1JS@Z%2JpS6{CEbKt}ZnFlGeL|*R@
zu2z}VfBC@tH`@%wjy38Q%Ez6KH%VS{&FN9^>$kCaX%3TJXBi6qf5v%{k7Mp4?iY5k
z6&dN5Z(52jyWFMaGx=wXklF$3`0uK@miIK?s@Tpeab)Ss>z%W51;fp~rL9co&rLtM
z;12uO@)=q+E0@pXSUK6`$=B__8LrH~5IC`Qmhe@@DYGtJbX)M%fk(DPBl7b1_Yb#f
zye@R@U8x|y$f)S)l+=0ZY7^TwFbm$ktNPO~`S|4X+ZF1#Hgj#}KBK@BYWR_7@1$i*
zSIlsmk$mobY{->I;uZZTgf>plbLY#O^5dTR_e*bYCnSn)=m}IhFMj&QES}Wwes`AH
z?dR82<i2=rmf|(L&rdleZsf{)N>9G?y(LF=7AxnTI6tK&t4o^RUU}+Z({f{;!_~g0
zFVDKV^_NQ2Je#t}I{R#5=B}lNpWKa-9;N8)=Foer*tSb^?mDGAC;w}fJjq)md-!+D
z9*b{JtP=DeKisUt&cRj3_f1zhmw)Gx>cs1r{;qK|S}T~m`@HKovaIy7)PsvuW<LGS
zWfXAz$%ga`>F3uRt$7?MSLrGL`fQQIqc>L{{<WR?W5=R>)w_P~dvWe~&Zz?yA$wA<
ze%-V}tmqc#R<I~jiy0D36FeV&Ev)_V>eI8^$3M>wj&x~E3h|l7d)n>kwJ*1BSO!Tu
zowomDZQImy=BCWqfU{ZI+r!K^YrJzVYg&9cgFl9)F3C&$;cXwDV*Xg8%`09g#dw6r
zO;b|hYks@mbmalYE$P=>moC{X#?vN#T<W`RAuEq?hJV?@4c04m&Jb4;ys_xQ<eWW0
zl{KFWB2%mX{djr#?XNorL{7Y66Pg&*)ulCq_lb&YYHFmXY{tD=#x9-Wf~iG^uI4hI
z+3DTNli_Lia+BqgZTfFhWe=zqRG8)`1W81G-L~~wj-~FdjL0pU%FFD!e^v^L-fPI-
z>~+9PXpe&6x~q%7yM^;t?6*~(w8FzuchAqY*M9Bk<oenr)*$xv=uTF)71}~P?Go)<
ztPEEliAw9*q-?RqYGdb=uSII<3zxq=#U{K!KwNA>-h#6?RIIc-{HxwCFIN;4OVyJ(
zJg28m&Zw=V)VSL?!e;Ab9UZ3h;`)8D+pBi!u==pAKBncxkg2)xUFqSvTMa_S2M*LF
z9zI!Rr{Mkb)#~GKf3E&s|0{jdtOq;t>=YMm__8y3jrqmhnTcVc6Frp8gOm;1cvIi4
zc;>kI-AmKyZ{mf2eG=9<v$x={lkf+xzt^|VpC@p_!2RQdDe|o+*>^oy(2}@*dFG<E
z@<y)iNqYU3xvTyg^82x`5aQ`CR()&uZa1sY<dY>X3kvMK7C0XHH1ll1@?#QZwK6lz
zOhm(lrxgDaObeL5DYL9aqOD`T`(NG%eW_NuA!RE4)=cg4>zw#a--jGl-;>H8diQ>`
zoFDU@&f}Z+lzfhR_;$nMhYB;!%Ewk;;f|X(_fLDprr*mCu6y!Te1g}Lx_$3;k4-(&
z-m%7D=f<9W*(yB~Wxl)D9yO2Uwu$+6Xr}e{;JY_A=5O1`{r)$L$mZH_=Ul4|oEY?P
z?Kqb<|KsnyQnIJE-FszFaqNcwNAbIjeUGg;YW-*2Uf21*sq^m?NzQhw6o1oa0y7HA
zs{Yzeu6>;BcQ@gd^Mc8avJ;jcn9wXa@9K7r-Ki_4t}x=BF6MTdMWLhZci0{eE@$^z
z`E#17-V^=OOT1L1HyP~SVc^W3ZG1n(@|m})aP1YV_+`8)MWqTX#!Go)PaanJ%%$VD
z#CDBNBmbPLa7TZ?hkI*MQaMuJq|Rg8`yx4Dhr*6zS<UBPJ{&yHSoUY>_A~33%G^qO
zYQ5{(qYj_P2TQzseZyC!M(?~m^T83rm?IB=@G*%l`=N1v;`EbyPoB}bzxv;g=tG|_
zvIbrL@vWlYW&S}MU&|J?FI7DGIu&;JWY5`c`QcpS?^lrS{CwZ9bw}UpZ2il0|6{3#
zS#*W$3EqnWJ<HnmZRnc2WWpb-(|M0HSl%mk|Gr$bYfWmX;>xv=xofxeon|yFnfiJ5
z<=Of7Z1(J3Gd-wFBe+oW>Z#4swZFW&wSYG-S!J{Ol=O9OX<Z9vhHhSabL+L)TW9}%
zmS_EJ-{L!QGm{%;#>?k@5zdtPu}g!yyGumq?9t-Ax!2aHv$Gtq=-eo)w)!_S|CWX@
zvDABaw1X^L7kedCEMZ;JvnV9=)h4mH|L?-<zyJRu|Nr><dogzPzaH-YcldqXpMQ@Z
zA9hZx`u{lo-;a-fmzM?WP5=5ZXj<;->*viUi0jrcHm}Gvc=qhkS_AFPhFJ#Jm~Xu}
zrlhE^$no`Cq5473wp*QI7E1RU4;;#BkiL3$TXyWJAivjDk=xZjbE%0wbWKSWN>i5D
z*c_Fu)f(#6^zC9+?}nRpb!VjR-hDLp`l6h(>(6>!KCRT<aA)nxReD)l7cSkNV}6~b
z!NyiaqH==Ho%Y3rAB=l$trZN|S5;hPF?r$E)%hANQ!5qOypuP%9Dic?obS`MZgc(9
zT0d18EZ7Y<Kfimdq$c2kb?muLlQUW=SE6obnZ|c-vX;6v^+c+oQj^$Bk?X%V>lY?=
zUS(YqX?j0ub=uM?sgd_;zkc|7ySt?HiJrc~wMB}#QeU>e>+SjwmB@C(^`wPs!(@%=
z%R_DNH<q|{+DvA3j1PG5+21{8@0Jrjtg7D>mK8quTKiEa=DnKZw3D}9O#11%fcbHO
zeq3MgX@jYk;;!`{UaHOi+p9L6Yv<Ew`Q(+?qMqon&zxeASW#wawPf~*M;sz2`F6iw
zbgcA3GuyY5@*975l-WHv)7bgzaIx?qpGm)dK0Llx{=3{AL0b=&>MSkao6M5$SsQp0
zcL}B{mB|M$k-cxTF@Y^XFn*TJeeKe|x5@YG`wtqkzg_%#DPPd@gL*HG|Nnk6FY)s1
z8^(1Tn1#<bnrYeS&kE{$`g4j5>+1A1tnxj#=K7qh);pk>>^LcpLn-gsZk<;fY!7=(
zJS5xM^e)_Tb7H2-{SVp-xAx|q?%8(Z>U-{0dP+Rs<I|nXWnO=nws`*@MVHr_69g}A
zcx+HmFIlqt%(`GMfo7h_%9d}fM|U3QalN6-9^Nc;fMb!%dzDh-y3C&Yos0Csjl~j$
z#kZ=hSvn;sBvEP68nrVqUEi4IyGC5GWcy?IJ@Dd+H%sjsW|(f6qjm53_d{>)sz^=v
zf6?!|(ENfw8;@59>VISpo_lPq>K)r(cVj0RI>(w`;@xGNu6eZP;;#kSf0-k8#;>!r
z6|ddKGePx(U8sR%>eR-t#-+=y-1)wCY4V-Qiw;wEZ|Pn6^7vg|OF7dICwB@vo4mTq
zJfq{Rk%!3<i_U{5GymD`WPf}0@Vvf5Mow;440>gid=9^*uZl+4dT9R;og3w*ApNOY
zt}RMHQo^Gvb^9f^4f<uR7i8D=wV&<Ud|Y6HWY*R+o)6mGDerVm<N9aRIJ!Ny?F(jL
z6u%fc?enfV*EWfyZOVEZc6(Y<A;;zmMb}P;FF#&By?y)huS>L4@}d?_eRA`OWQ}po
zyL;bVGxgudui08MVMqAqYdP6rYh$x_N9{J-&DeV~N!|XU?aO;*Urwn^cwV9Z%C}%k
z(N&KJ396b(e)r5)Uq8ET%Dwxgj+Q$n#d@psO}<zaBxX42STnbc?50HH<y-EqaA1kK
zCOxhE{@+jU|3Cl#^vj12%g@jM@#*a2^<N&&{&qY5&!fbN{qO((*#GyieNEoi*ZKRa
z{=7=ud|Uqi7xBXAnJs_H_SG%mpTAnL=K|N&sizMuW$d+l`&#Xuhf9Odb{X#JrIMA}
z6DM(RSS2*sIx2UiVCVI`4`*|WjjwOjc|LjF?3ojS1OA7n2}PVJWXx}i2-e#b=jRfV
zwei}mvdz~HMV?T&J!`geV2oGb$`teB+$gT8jE3s5U3$8WDhKS2cKvKU;kZY8FW1D*
zHE(<#+%8Vt8z*<|=|&y%gK4RoTVo>gTcT~G4xKjJy~oz2hi`&prX$CKPjZ<`f|gTv
zK94-}y;f&UXoJeVh)=i6imkhKp5*Xk^-k^b(#&%B5PK$8t<}-z@}m<?hk4R&<h^}a
zrn>BM5X-j@71xB!?4%|+i3whvac%l0mL~a=5)G^Lyam(dt3I5;S$(5eaG%0CPW^{E
z+v`&MKCIWQc=o4Lv9Yr7_|cPE40j%R7tQaRyxZUE@D?*Q5uYl#-;I6$9vw~DwDL{T
z={27&*3bL4V2#DrRQcA1X0fB4!D+p!1>Y5n;#%&0`EM5HaHz+pM^^D#t?z}eT6@|x
z1a>R$p2?h2v^?kQ>E~BVPxds(ob5ZaK>CZy^l<wPk|J{~{Mt6L%&%GID#v<%Px_`$
zir#Mo?wBNHv(<hu{SX*l*wA|8onQSMM~>Zmuk3F2G<!apVapkuUb;h#|Eq!gi-pP$
zPT!5(CB^<(OmlX5>dTTfTmJA(o#5US5#G15L%xJ%v-eky^#=?0Guyo?y4kqXxXN^E
zhPVRDBAvFWb1ro3jK0xY!hC8+yGn8eGxHq#CssT8AFjNf+H*wmRo%=I@oF_Qr6YlS
zZ_Tz}c$!@yTinLn&y;KPklEVZdCTNpj@_2rn|C#ztn7L2>$AjSr{mU+9jj(IwcIgy
zRUMkRkzwN3`zw5Xg6GXoZClNkvG2=?Sv;q1{ob|X+om(8l}Z%C_K2UzJiX+}qZ@C|
zEoCm=;+<FZ#hp*9W=2!nt~rqh=6$SL$vSJ}O4~<0d)RL-d8(1NPGaIrj&~eq&f2`q
zG~6S9kaxjLw}qShH~eC9-xXdPr^&ab(8%t)fcE=u_agezG(z`Tc_%vkSKD*CrRKZ&
z*O`&;cUQD~FeypB<7+GtKVy^o>a3}P)drJ$Prv@W+3Ka<IAh|Whio%~U;pMQnJDlq
z@|Q{Tqnq7gRw3EK0ZG?VKKY%0EAmx;&6VZJDgCQbCn@cERTOzDQ!p^wEoEEeYSVkW
z5-zTJUR1Z+Zl87hy*bldTVi{5Sl#c*Uyy$MYyji(E1z1H@jIJ}2Zt92Z2pjbYwfnP
zx6Zy{+t1Xy;^Ps{`qq6PR1<~X^%UP|T$#c&slZil7srX)VVC8qc=r3O-?G!RnPr=D
z?xpVJM8*u``#M{Me!s~OibyxxV$8idBxcE;pU=|!U%&ib|Ks`pPrK`HDYSmcum5}7
zEyv~lzc=ao_vK#rnr{C)io5pzOZOX}zMb8Ee@~^$?SEem%l~_A|Ic~%%&h2h)AR4m
zIo~9GtN8J33vs;$*Ig}wtI|CxwuJspR&M*Hcj`=%>9UEtvjqIs{`wYkz4&T-Xt-|o
zwX=77@0j$k=&)rSS@2-8mEeI>UZ;0W`+Ue$AWm-A?sJ!UXLzgpY?3Zm>+4!N)wH7J
zh|ew&^Y72BJ#9AEY3SV8y~aXZeP#PQ%hw-|Uw->}YQps6Uq5|YllFYq+&SV?{XeX(
ze&)gVLW_?>+~UBqV+;{CyXwkIgU!o4=geHyFvnBy0CSpHt8ur*pIvv?ZNL3^lgQPA
z{}Pd<!P!fhor8oL4(H^Dc%NAkHH~%Y*5|J;zkPZ65KETM<X^j*UA!Zegljn_B&VNL
z5RLOvY&-0hC1)&OG~vaAg}<FD_wBasFRc7;R{VDE)~Vl`s<vOMGOs8-nd(2~g-Oc8
z#_K+TT(Xkw?WG5_tD8SY+&uct#<SdNA(O33*Kr-w&n(Ph4UH+1N6*@{epM`Z>A!oQ
zWRiDNp1jTX%W}*MFAJVm2xwk?VxE(|oyEKj3*YHWbk6wb@>uh7`|0ndKhH?^g-hS{
zEcbYJIylt$xwSRxx9(R<gFbDoVV<YORaCB+G_^Kw@8s?ZBN>Cu0UClE`TzVr`9J)`
zaW#eG2X_?iU|jz7y<ds^oY@*GyX9<|_w4<jC;wxSBI|_1*2Y=(JRN?|_wN^3o_Oy{
z;|DL}-hYjsJ}5nEU0b$q<NFuRo8L-SOBsITjxJvRjKA%cAZx?!@MCk?ttxjI8ZLCp
zJyiYYN#Khqk>_GNpR<USo#wWGFnQjU!lITlDq{STT0Ziai&?bJ339m;z_oKyM+MWT
zqpXXhKF9<oCWgujPF{HKyT{>4Yb5VZnq+@qmhH4j{ML%gIDbE~y{Dc~>-_ZI@~4Ys
z?(F?hY?3ba{f7A3LZ8w~c43v*)ro2W*M(0>q{=)J+En3I|7j-=L!r;!HHRPP>M!1{
zv)nlJ;@#b`rBQ7@edSR<e#{h6x;StCG3B|Y3-xQy2@6c(n7?VS`=SEYFTrIQ7k3IT
zF3z}H%xtYcskmHB-e1kRP){c!MM2nVWr~b@;Du;@W#8A0--YID*asb$qa!>;DPDM9
zdWO?92X+ss{^N1qUYwJA|8<AhGxZNUYJdLok>&KcylYnQWMvnpT{?^}l9jT1R|wAB
zDiPY)dV1Ho=|w+Zl~it?BY%1m*X~Zq+y&3w=4-E8(7Ph>Oms~{0EhA`)uuydHf}9j
z{q|bc+HE`UJ}^&6Hd1^3K~dJNr~J>|c`?@VmyO=e)l65JcgweYd6SdQBA<mN1@BJy
zS_R1^*}Ywu`g*fJqu7N*Zw@@W@#euyiGbxl^ddRB&D4!`r1$AQ4fJ1Kx!2CN_ScWY
z$8QJu?f?1p{v0{EYppZa?V96p{z*;kuDkQh%W7@*rbe!Op}+rMXzi_e>$h(ECjal<
z`+xEL{r12Ato-wE{_WrX;yU%a<9>wI|Np|sJ$<XhK{I95Ylm)$PLkwyKcjWZV8>0B
zz7v0}*YC64{&wL!x#^q4Y**SuFYubWkwdtcdF#B@)3O6MhnWXXNxj<ouv?nr>?dBX
z-OHQjMoekBvti~(-Y(a3TPDo@x<c;MyXn7s)-Gn6P`lQBp6+|W83pBvXVaN(%*wwW
z9r?7zZhd^LT;6TzXOl(JZyDJ9zC3mF^8>~U_ZurTI!HO5;FSyT5=_>exa;WLl;bW!
zR!ORimrHN2+dg~G-&b37(%x>nduGkHS*PQL9@&~JsQ*~_RXiyysek9xn64E%uJi5d
zKa`aoshjd`!+DQsmyLQkziqKo($ZSBSn5#W(ev(wpEjob2;%Rp&o8|H{GhSr^L2k-
zM8AI0eRfXL87F?f*y?kaj@_#%TmP2fOtr=Hd5erhzO%3RIYsf%%RgQATzpH4`pegy
zn6c?YVwt^(*xiqv23wD^7+nwCtv1h~RVOjS=acV(8LF~-1rNJ+C&!we-LstUTgR~@
zjq@va$WBmVe`S$WCU;oj(FE>Fmk#FjS7w`Zw(ZOK_GZ1)H|06?w=T`KSiHEZU2XSk
zp0bC|Vnx?4@kJi_H|@;A^)`#<s)fH0sSx-f-DPO}UMp(8GPl~DKYgF(acM2r+-9bJ
z^U$5g@!xM=D2aZgaIl{9iig|#H+$|W&z!ltqVs!E`X?=)dfpk)98N~JQrk3-Zr}U5
za#>N5#Ma`X*T3U5PX|1!J>2yBCSxO8NLkmywl6OyXH?nUP?9oR+jq?-<AJA{<h!F+
z!U}wnX0*;#;7c<PRt=r8Lu2)W|L1S*FZV1`xjA=1OrYuajx#I)SFd^(S=_NTzW?9(
z?M=m<r}H*Nwz{Xa%wBoouD#2b#CLL>4e75ITx9qpnwYmtt-redx>riHqxXxiTNfMe
z4KBKJY)zb?F5`n$MvFE)Dmkw4@wfYgg)8s!JeT~Rb&@AV<$&3)2!8X_ewJsRiS0A*
znDw~YS{l0SSh=eINBGQB^J}&~%9~`fIPblR<4m<1o&7zNtc318K6U=lzTd6JMow8?
zi6+T4$>-Nu?aNsn91^Q`b{hAk%n~Q3k0!HIvs0dRZg`lvv!hUDg^}79%j56oyT4F!
zx3s#!=ffjw;O@MW`<!%#5x2^@vfRG;zuGmXS<jr8KdZQqZ~0lKMXzl3@=SEFQF+DZ
z|NijLi1>$-?;CC?{MebvabLrAOV-UTSG1;VGyl%U+q7PAo%FV~arW_X^*i>}?2L=|
z3JlyOqqqBX^yl^MHQAf2*UT5Snvi%{P}T4C^`^5q`DNM1Z`aABwg2Xm6JS0h*6p$5
zU3VzY=lY@zPfdysIjH<T8Gh_&r?kPUAn%nt%*<U{zM5t0uOB(_G;Z;{xmQ;<Ol+?-
zRz7va#=o;qP)%TWT;0cS_Z#+4su%rVA|sw(zkAyMzsvpYGh8cwZoKyG+qb$CuSJ!9
z{G1;DYH8l|wJ*waMIP7xoc{mk=k@#l<=6jyc{%>y-_!s9?3DWd?ft*=^85aLSo!wt
z<@oLKb${>M{r}=We}B!tr`M;~|IOE*9~Y<hRDS=T$IsXAmuuRdoPRIPfoFlL&peC1
z1G(R{)b{)SI{5f*-}b|Iw@v@_Y303rZ_BbDt?kHPYEe*p-*fspqY#(#_oH^ib&K6x
zw{^w6KW(-Wb9vU(Gu$hT=zc3urpwB@CUey+o{kl_gS=8MJ>K+q^2J3z%f9~OxtQsZ
zZT*t>?X$Xrr%!+V%&)(`>DCpOTMiBnSZ*G;@>Wez=UCR8p0G=M8uw)j`pqyjigY-7
zS5(i{BXh0n+Dy&sr>{nTJS?@=v{{CgRki7o#i}m~$5r!n_et~$tlf0=)61%zd#C*U
z`}%tPKi*8!2cP8JHIJTcnYnIOwzklOD6h_5#y5{o)ue5n>o~JQZCV}Qt$8bC{smh7
zvzis_sgPV^8`rjSN9<%#g}K$j?!9Y%{oZ_np*-Gh+R43H|EGUEHAQmK^u)kGp5F(y
z>i*yG%cOSY8>6J|;#WdVCz?{%922bG*^{vGSGw&%Bb8YxU7as1Z4+AK^fg&8rykm^
zu=J|cOyysm+V8a7CI3BX5K4NnVXtrn`{&Q)I^uukzM8u{O!)c3PZjm*p{k4xPrCNo
zB<s)PeVfYpb|-h<xw8)UI4`?gc)r4$CAvRboQo@*TSL87V`sp^KIR2ybG(AZ?)XNm
zV=fnEeZ6Y$qq$X_51QA$yXUayj`Z>??g<-@Jv#SyQiEu3aJz_awc_O}%l27I?j-)W
zzCJtalvw_k3EPgXJzaUeN6norN$bNxj~zUZ`TTU1+>%Pl-1PDimoUm^_A1<Ym(sNN
zPS^9xR~5N-?VK)@CG-A+%!&-oq-EKftAswt_#Ui%X<@V?yMM>j)Qb6PcN*2+`-k!z
zejGdX{}dnh_d<Pr5ruXJ{c_v%Y;y0MTP-UdXmR>caA$|-m%V3%Zuoj;2KOxfy~%dP
zH^tv)Lsxk}lG~fjXSrmyCBKCJ`dJxV6N8nupDS%!(q_VEK6g`JOD><~UY8SVcCuR8
zmhGrZdbEU3ed{XDyHoC6d)Ad&BX|11%AU<JUQzE;lisgoFDOX4cBJRHrP8Upzg>$q
z%00N9&hcQC&zy`GJJ+*6U!;AGd8w+?b3GlY(=Yz{z52Cdk+)KCimlj!CEuqkkv+SX
zQ<han=~3$Bw^fyYA71|Y`RS*ZLHbKq9f;wZJo|S3?i$+<*9urTOt=IXA3lqka9#I^
z>(1*Fp6xTg-Cve{eC^i5e--#Ia77puZEIhBR=9G`?8A&JmTF8e=(1dGx!Zcz)6PQ`
ze}q`GdbSCzn!C1c(Tz(lKFIB1QVdNI?$r2kcdcQkYHU)6p~flwPfveseffXu{fy)G
zkDuMIt1kUr|F^yV|NH+h=ijTV{r^>Me*KS+e{V-O*WRq#Gwto$*Z2P)eN+G6|NqB)
z`#)cO@2^{WFYoRBfB#<l$J_n?dR$*R?pt#B&o{UG^Pe8DfBn3kZR?-M*YD@)&9DEn
zy#D+7`roggU*G=yeqHtNx7TmK{r_wF|M&Onf8U(2W{C}d*F4!Lo9A_SKWED>`F#Dl
z_P<|0zm?ti9c>u5I?q-5XI@$q+a!K3U5%|-x=KZN@}@8xb`IStmYH-`iRF~wt9sF0
z<_CM<zSeVnwR+i9LxcKvl@ZrY@ajKY92qE}Jk4nS#Cel0ewEqrkYV4{>2IIzsx5kO
z{qTcz-!nfxoH)0{>gbj5ipeaMmUmsIBq{ld=1FbZ_93&I`)D(V0Q0SYT2bAl3EEes
z&Yr0a*VyLWdS%8g#YYod(%YV&I3vulT5C((tL0?}>+hQ_Ow2H1SvZ{`A<vn|DkV_h
z=BsH3rk%8~$nMcR?NVI0?Au1k0``5cCwo3BUuq%G`APi#`3RlwHj8U+ddyRHo-%*-
z9oA|5{aR7eE5qEk2|RaVpKRA5y66J?kC!*)&djbTEOQiC^zD=S1F0|TOQXEh4Jvz9
z8fLvyHO;B~R^WE$ONUVB<PeW3Q&O`&tVw0%vHvw~x%sWyy?5nhF9dHo_*tU9qPTF8
z_GaV6`O{v@Uy%G*?Q`{Vd84FU@ho3fwFiuKi<3Wne)#v=^2Woj)U@-ywAfjyswLfh
zRrUVuftn}Q)#(T1o`(z0GF-meW8o|d_YLj-oh}w0cYLg_mDOEOPAyIIJ{9}ns;8LP
z)w>#ZE7Bv5CB2@#MYHFdr^^a2*XuqhdyPE8R;&Fv*(!HCfyc=EdA(Cu=eZ}Zp9hMY
ztzy_+Y$IaCw4^9aMpDyhp$kXCp9}~0s_)lcx)q;%srde7TX<oI)ypf3-o<TkSjn+;
z-|S3HuGDkyg%>jG^Sy1YeYxp3L)tXI8;$KT>h8y!1?D^7)3fV5BVTqSdAWAcPW|?O
zoc)(l)|Ec~G-pX2<1?XaEuJ4v{i?d-v~tU<;7Y!WCnwzHcU3Pw(^@BAqL3f*ZIx5F
z>&vBux{f)1){e6#KA)8F)I4bE<)bgX>(&`7#m+Gjm?0Sc^sbwAnqZ^CiP@3d=Xsys
z;p<D5NN5uZcFAcGo5W=JUC+$Wf6LN99m!3Zb2c1odnw28nr#)&XI62O)+8>z;w7_6
zuJCwx^D+o6oXi=_AH3yj%{#MxJFi*0Wy2j8YAkM9b$c1B$i}|~#$DN>jopjlzI=*`
z-@j+izYm*Y^>5CQtn8>a{r&ZB{qB>|+U=`Xb2~*X7MKulAtPr|AP=j|6u+CZ-=EEM
z-_5FiWUpg{N#LHEL$MLp=O_L+{pRk&6Bfnm-hWv-_tCnjuIMFMGo4Ov+u6BYIqG$5
zm|;r0ZJLvdtC@of-}l)kA0{O&YS`i6Z|tla)ivdl-lzGq|5n}kSN7-EulxUQ$8UfA
zy8h4e>#wIq%1z(@=WV`y%<I=1f4`nS{eQ&%cW=2*dq4KxE3+^Ew|@QZQ~wv|J>C9Y
zKJU}hPfzo2pT2MR|Hbdyv*qLKeq3C*e#O(<z5(}E#DDp8`ToDdPhacj|Nj)f)O36D
zlD}N8S%N~g-LuW6GA?lzHQl`LZNbl$>+*JgKbTK13wYzS^3m=B{<nd0xgw>yq28LS
z@^aia-xbkPN}I-8;yE?(^R`&6Z>Q1}x=;SQx5mNyYWDmBtq_y6Z%SQ@4C-Y}ihT}s
zysHp)6=XcCVR7VS`Q7rcJu$2DcCVhnu<j?*i}(9G-yb-!`TN2N9*rC^Mt`;Txa;<A
z3b5K5BFHa1KSQx%#@_e4_g4w7j0!pQ;-ZrL+7JVts=1d%CTr<fvvRbyarkJ5K6NU$
zzUBE?HjLq_#d@~~;XP|ag3PZf2?ltqNt?^M_fYD?uZ(X}Jbxe9-1txa*u(Q@OWVCG
zqKb{4@40@`c8<!mgR*nhc05|yeSN9O+x4<Ge>W{F&@=W)O?|)SM`+|dyDt^pQzKm5
zJzvbZ+_iE0RaOgs%SNvWi_^}ho{_lyz{Om??f8a*G%0WP=8}suG%kfV8=w5?`6t-F
zvG=<Bj<@z5+z&s!UBbkw@ptv3fM-6%Zs#AL?hm|ayT0bt9y|6@$9H$cgP(IPUtz;}
zGgiX7*ydiSyHD7uEcX4==ldRhwcxX^jYgn{=8ovkI&SA8jo2Ld0+uPXt@1geH%*~Q
zqT#!}?vzIY_r5wrzBP!mI{fqA+D5LACt76JzGpwZajCA2Yu{Yf@1LJJyGwk}kAKc#
zzfY^p*Z$6WzwR$fTRm(A42#^?OiG=2(C~Hp8KL?m;=Ubx)0VKD;)(8c4%BdIY3)2R
z+iX$imIJyoLl*wHaIa&f%POH~FK-D?+Ldgy<<(Kf;AhW*7p^+GCv(aVTg!DBnztS-
zEt!#aan1|p5Y<<UD&Bq%We8eq{kmH3Ywi61mlkuDPwvZnBdisf@-o({f3tL5+taV6
zQSAFGH=J;OZLy}~?&DeeeNS~)r{!jxy%A#*7%Hcw@jfe({W-hIS?Rr9=l=&~uMIq`
z{QNuD(aRGL+*wt+QulJ&ROjD$_a@JoHk&^*Wy=DogK@6~m6t!b@NfBX>*Om(?ymKj
z=J7xw#!8Mi#^%Dc*du{EqSH1Qt!dh<{_&w#!lmA8r-~bek4HFNuhk8C*vVOxe#-mY
z<pp0I&Lo(AiL7`r!AqsENukRjaC?-Omd~1*Th*M_GV$(SyYFAY&p(%6pMLJWH)hi}
zgQE1`Z_EDtxmH&>dHXW<qsrMD7f)0g_O5Etnp!9$b#%_z${Fv^=BEj!UR7zD{lHFm
zt-QoM+3p29g%^L{lfcP&TxE5N=7yR#3*EUccbad%=AmDzy()FJ$;5!E>v#X<*xIxz
zEh#{7Jrk?c(}GOBYr;3v_H7TyzPL7{ccXli?(ehp_x3-k|MUOL_4sZ7KRtcADfihT
z{pr{L|9Pr!|L^PR=kh1tKK=FUvj27YdWO6EWHo-hJU#vY=l?(Fn;nh!`TxBBZ~p(|
zPk&AS|6{)1l==Uk#>d;ouK$`fCH~(1^2h%5f1iJk-~aB}`yl(P(~B(izJ4m&nYX>l
znooDulpjBOj+tcly>(7+3F;PG`1!_K)BAC+zka@2SK*o+&myhTF-Pv<O|Fau88s}@
zp9}Ky@_5uk*m4?<ozN0<HIXoB_s&gvucq&s_2SafD@pF&>84iq?Ft;Wa%CJB43jc1
z|2)U}$R_<o9GW?NA-CTeJZ9UYp?6SCh5zOMCx36Gn=`3icD(fNe(%9I?|$C%U+~#*
zZDc)5XtSw;)2D6s>;C<^-LJoX{&w&EzaQ{rr@1HJlu&o**%gw!S7y1NL6&Y`jc!y>
z;HAFD8Sc~DE4toiGbW_ny)9b!sQ29kiKQJKtqKz!Tt4-Zc_Qn36*2D=4ZBH?)@*%n
zS>85r)s8m;RSF^3@3f01ZIgewOjjzt$mJ~GrYB$gZOp#^UEQ|Kqa-I$@|v;ir~Z{e
zYRjF1cJY*&y71PEsO^01w^>AQNfT$r{7qjN`z^X}xW_C$FU7IQWz)Lp0*rc-R-LOk
zEFhW6zB=gAo5J)3&pa3C316}6`n@tlc5cCYjZac#$w%%8p1OJ8^7GMm7neRanRZgc
zHu16FzFSxJ*;j34uS+`SSlDwcjs1D-<ExfS9$zokI&XcWe&M5@d}rlzI+yx(NnPpg
zN_We!Q+n(Xb@q_Ue4F|WXAdN~^gi7Fc*)+s%y({n487mHa$b(N!UfCAZ?xvdb$wNz
z+7cxB=$qfVIZC#3GBy5A`!;#M<Fhv|ep<U~*f!p1+Q_kOhgXQ(Qu*s{42H>9ZoX3z
zHB8V{FxEGHnwvT4{de_DEw5*1XP-E`<c9XtiFrvczV3Aj%?@3$t4Q)jwr1qPMJ4)q
ztG;=sElLjEs4A9sL1?atUmKrz+nGu3J?{fgR^7jS`pAvs#gp%U)|wHp+cR|m`=Q06
zRRU32cQaS+;kaU3b<aSEYxQ5YdHM(Se7XV>8G^jm7H)C-kgxyGy8OqJRYw9cD!$+N
zxkSswz@k|^F)cAA;=l?A#>%({b3L}~Vz~0WA@yN*VQtB#t%5;kE{93qvP%~ElJ`z`
z5<^v_z0Zj^Gx(2Rs$Ls%OJ-Ne->_%m;-M=9KAvSed~i|U$H|8FfAsGPEYdurX?1_i
zMC~P4R+V1f6>4bt&29PHvu6Vw8pCq*Z=Zht`uYACyT3nzw$JE1z_)q!@9U3$9KZds
zKmS|l_2#KrOb2|~4I2trnH+?+Xa_iL%WJPRmOFR1w!gGZD|Drl3y0*TId&Ug-s4!~
z+4-(dZc!V1;)5A4mfOb%Zd<jq<MQmA_c(S1Z{uAR)$N`mq1G~6Z+Yv=Elm3zLa(em
zn|YXNsi!W_CbN)J%Q&XWd#(t*>B{2!#{BPB-@qlDO9Mjwdsp7oi+@%2_tVR#$It7}
zUq65Sb^V|vF|T+3{jlizw$J<jKmL0E-`A%W`?h>nDwBAsw)MfYfA{}CzrX)(-|Ol7
z_tbB$|9$-a>=&n(`_Hfc@zVZ8z15Pvf3Gk8`-$Iw2aA`k?ZuP7r#iZ<cowy!qhtHw
z{&LH*+j{TrG1Tw9rmr{s8Osu_qgxln98%Y~ma+Sy^n!amYnNUzus0IzVta96jpyww
z&FQ_{m-!h^oLf_VbCt+~W6Z0Qu5^_0Y8KqNxHW9<`a0F6V%9c)Br_Lw>So%zbSE%Z
znB=Wke|?ku)7di{gN%e?nl1WRa>@(2(%MXR*E+On1s!H#`B+zCeU@V}`xhancW-|@
zeSLlX{QWsmkCLXZ_<Vyi+Ek?J$g4jw@{f-yipQTWywjH~Kf@|;LXbD_ow$0#^dIaj
zOJ9VoKBmH{+><tOuil2`)z&KV9qQdzcI>aV-{4<&;?1PYw4+i_ZnbZ`sa7t@c3k(A
zU3QA!G=rCYvdZ$7lNPUd#dh1ZWx>yLVsDRkI!%8N+~@mP_WZ?4{&kV&2P~$}KJv@?
z<>I$}_xPM`XV)8trqo`a^GnO?gGP6s<@ddO)gBWz**-k*NyqNobDm!+{zt#R)>}G3
z``_Yv6(d%UiP|Ffj*BeO7OTDZAoyN)rk`_B&2it~DK@%`8Nv^=&X8obs0}+~6kAZI
zdZbo_VctK_OfF9L>fYzSYrlvpIQAci(sDV)vE$3pFS`>|k1s!7nRx8NBbAe~OhJ~F
znqj2}Wxk%}`;fWkac#oQ_Ulr?o0_M*K4sFLWbo-=LZHF-aIc4*&padl3Y35PD7>=R
zH_`5}#e4oq>$aZW@l`PM+U^A|eaiwY*}fk<5pp)*%n_lB!R%Kn`@VcQ(JXg%tCNCQ
z^9hbqQzzVbuFbmY=-rU+Z;R|F8bobT(YM$5xyn@KfLz5{pW~LSs;P@2Kj>I6<uzL!
zf4Zdd!h`#--}_HUbPD=qnh<mG{$!qjXG`{d+od}B-Qmq*2WP02bS$2%@LY4hT((t5
zC?|`w??Q$ZOEbz>6)jtNcG9~NEu9rxR!nC<X?HQDTeHrs_2~)4s@+#?^`8AGZo4nk
zS>;|3J@34y){NW;zW14apH-gE@DTZaE&ZEWgLCs9?wNf1XI5N({X5~dhTHFs7m>+p
zu5yPP`kh^_u*}AYC*8<N+)wi(U!z#g^<BBrF1HGA6z)h}u`%R@YI7#2%Tc%P_A<qW
zV&VGff3Mg6KYsh;>FVF5$Magf1jD5cE6?CkP|2DV&})z&-g)xx!?a^=+XD0%?UIx&
zFNhpDwM&BQ_6zsBt&h@Pq^`2jdA{-7vP=5|y7C@<SnTO$aPf*ae>*pOn@-t-ouOeI
z5eKsaLvz)18CSgY>VGb&@$p5rqsE(G4~}ns+<EO*FH^QPr&WIbkFcz}>z}?Z`@84w
zm+SI&v9Ag@fB&CTB%}Xq{=K}fw|{G&dLC4l@%Ho6s-=5&-SoHLw`b3(=l=Hl@BjPv
z`h5Fuy-)W0>h3=M^sDIapX>Fz{@vdHXnMTJJEc`o+gC*|-6?i8s`c5qBb8qsD*4`e
z_uKr<{O_+zKc^kve*4?kSeYoZo9k}uxlnp}SLVv2XZycD)BW>x<16W3r+x+;{rv7(
zyGe%Tbf&I5QZJ6a*seCcX>CxaQj}HU>DsKsoq2MPLPI#*9RqtaS*CF)7djqP(^y+s
zxH@#5c&P5ZEa7qu#gdG(F=FyFFK=pdK9cJszih6TSYdOP6Wgxmx9=u=eA9Vx|J{50
z_wC=Gcik)SD)*JhqkHA|r3X6ZI3FumefaBR^S%?8^YioFvrmfkToH2FF?C1e+ElSx
zxt-f~<%)6^OmIK=$HL6=r~Z!(Zu{%^-|<|yfBl)A=Dobyj}^>(eDB%hxi6ijY;YvW
zMtm}hi?@6210Cb{EQbxRzMFk(hVAmxVbeP_dn!ZhYuT129+Ka9Drytw8|Mp!dsd%P
zIO6EC<%NswvJR%n^*hdqOq+B5pTNTBa$S=P0xuXU-Fu(B=d;z4qR57p)d@xB?_Wq3
z@$ou1tr97!di}MytV?^vEPf67e}B|}FSvR2?5~egtjfZFK2Bse_UN;X_j>lE?!}vH
zVI^kS2bvbX&$#2U=z`^*_mgbbEa^V_evYT%yCvUeHqWt2{dO~GQ-*TE%<@0#XJVdw
z?LT;6j>EjTMv?ao{_@ZIm;Yj_Oq=w)<g(9{Qw8l>Y+F=M9Xas!>Gs24ziJil+?rF_
zv9$4_tnIY#jC;!6E*|G;(P1xAn;$E+BJ?+JXPlgSnbj6|)ifLZ-?KF9uRXZI)=)en
z`r*mczWssSySl_Ud+QdlhlXsO5_jL(`(hF+YgEkVe;lGb?&hX$du(^>U;O@LZ)nuH
ztuGd6`N{A4e(>w7g!n7j3%^PlO*qq}+NOO!xJ_w`Ng3-o*~|W2^D?Y5Jz~3r=iSta
z-a0*tA!vSc<ol#YbAERuq<DO-s%?D2v#p7vxMKc}hYJg&x=$P2Df@k;#AxC3Szi{j
z`8(Ss_3r-=&;GD(j^;CylS1|n8r^U8?9NT(EsMLlT=}Gmm4Y98-M3$?D}_BKHg`wv
z;NGxDrn*}GO7EXqw(H)zWEDSO^wQM`^;S^W>-uK`XK28ZxYMVnpMJi7Pu<Vc%TIfY
z_+4C*ar^JL&vn0l-+udi`?u2PXRjP>4ZL%F{=LHs7G<;_yT<O+cywuD{>Entx_hFy
z0+*^~T0UQADS3%go$qXv^)!*wPh$=%pa03i+p6W_`1M(iNcSHmr_NcI?|->hy!3-_
z>VBt?n&yrZ54W&%H~DeSbn)D{Q)^8mi?zGzq$k{N)AKf7N%)?`d`sns!=gjd_m({R
zd;53({(s-TeV%O=xH+pP_2~cSJN~@h|4;On{CfTWiMRLt`|#NQ->=L1r|SPconHUv
z_V2Hkr`P}feSiPGy5Hsh->tXbo45box9@)qj2T@Ug<j}$ELb{4OSyGZwpP+xl?68r
zK2E!~XThNz|Nm^gy1I7XmbI^z*4$GGS*~*1(KbNqkdQv#YxAcbOZwE+wpE|?)n1~-
zlfe+!Vln4EyM7mwW~G9PE~9Q*YTmsij!kR$8>^I#EzjO~JW(Yj()CL0%@<E!*ZtY{
zK5lhhsN$uVM{QF*Z=adEl*4J=758<MrrehJAbs=5<5^aB7i?8-y1JJ^PGj5QTi3sD
z*S~(<f7j8!p{yatqt2eaX!`Cb_k}-oJ$m21ef#!0fB&|$<Vmib;y%2V+yRTPbec@s
zaPL~wGZmS`3%Xnb4jKy<^T@szxTVeZyDsFv&Wxt=<|Bd4XXk7R{QBj~F9W9X*;QV*
z=hphkd}dmx<;57XMdeEBB*XXjI++*Q^*nW0RbnD~&AVl7nX$sPqp{b++ACO}&uP+>
zId#7!Zr3Yk^|s;#GnqE4T@Kjo@uE_w`*GZ=Ns4|ZEyaE}OW6<0)y{HA|9#ciaB^?c
zJx<}*A*MIaX!3SRyw6KeIT!Lb?(k9DY1himo&T~}=q<~NpC{^0x8C@faDs1<p}6kP
z0>yi#oDy>{`uvyhc(ZU`mjU<Dlf`q*?a%EDYM#g$q}m!JV{oF=dT!G8fR`r{>Uw-{
z%9tLTB~cN!sQ77k;>;bVyN>;z;4#JVgkX}b%x#J5*=md@CED5=LVq_no8}oSRdzmf
zKENR>CGwvq`GM`X=JK_2?_IXvFOHg@zvR#11;N`FHGF?qSCY?lcEjom&jijmn@kVb
zxRO`4=Y^I31RYUVaS>Gpiv?-{Zb7;4>p2{*+Ok-(27C`XvqCgVAXsZkT%(TT8CyL?
z{lpWycOQE!DfW#y{jJ5~9q(qpo49GN{L{tS@64Y`1WVs_YMMVw#y|H|%bn*=)~epI
zXIuZh;1IuOkQKQrZsyFzF?MlGx43`hY&*XH+ZSP%rStz!e|jmVuqTA?qlb0Eb(gfD
zM0?@j@(%UNfR{OLyLf60l`G=6&g|Iu-uO;$LEt5x$IGI&-!`@=OuUlCfAQ?U761P~
ziD<jY_AaA+=iyFP8>zK>jb2;C&Yt}u!^vX$;ndbumyX(eUh2`Emh`)%_V3GIFPC3G
zZ@)LjK9+Iu<=K_O`+vUO{`YNt-R{3N*8Al4S^GH5ayS{}!g|V}MfJE!$!lkR%WqPF
z_m6X^EcE15mb^CIZ&$DOlf5U4qa02!>weoUxBuZ$_ZJhpI5IUAPQKc!y3=gc2TOkG
z$y?-dm^K+P_11`+DgE5y>E*ip`3k19{TcChVznoxo$54wu}`~`p=HX249CWwiM+?(
zUc1L{T+ArKTVekFcK)w#v*owT$FJW${rdlppTGY9tABm}?~m_n?!62;_4U_d`~M}?
z|BvsF`+9qQ{r|78_0xPeRXpsuUfSisBmZd;zyFSAr)-<NwWgJK_152Z?Opo7V9!k1
z3va)Ma^>9re*0eEvo8VjD;5jnJxaUCvHSVt=@VEEoOBcBSXCfw*1UtEpddfBS)^q1
z%*jVisP!JNlQD3d`o%4|rr2p$g#4f2)eK744ydhp&tCuUZSMb1p4)lv*<5)mcyO8%
zm+zZrYcxupI)A@DFMX=Ms*CkICf#?7bPk^Fn(ku!yKMK}zQ>y~LpcJyo@p)+uC7vk
zB5SiF&-nA20;#pNe}CQ1-~aDcRsHH!ZVap%GB;<O*0cUlGkvW?a{Lk1c+oiy1v8cZ
zSo0l=ecF0C>P_L*`gH~cr=%CHc^BG~aQ$Fmhw}k9DT{@RELW%~xR%PW3w~Qtt5cMI
z_ls*r%S;_pzF5idOvTXk)vbd44HuVY^lcQgyvh_LBR}Jqvd+ryK}UanjxlCwljAzg
zXBD^XaM9f5i*<4xf)*;)=CthGFLy}B$Fz8oMB2;q#eWWHIm~rs-rbP6V}?PCtG1v=
z^I`)R^&&sz7?JoHb=NHaG0*F}vM>1G+HR4aWv@A%K71*gfBC}2!~@kW8;*Tg%D(G(
zUVw1P+@y$wFS69nx-2XSesSr^yibdcWtcthcSvcSw#Y7h+r`4K`M()`yLZ1ik|*?I
z9n13iyWI}j{j&n!+3zTwelFc)sgm)7HT>rvKRfeI=%MxpyM6I1EH~-!er&8{kXPE?
zAzV`RhO1g4Xw#j~|Gv!7*(Kc(l@jrJZmr)WUe8<Cl#g02NOC;>N{`{a!JT#H7bCb2
zh-&{ek}Hs!*k=?Ws2dj(=+d3R>eCdXbUC(5FZ|tE^V9=#+4OeutImsBx$~6&wJ3W&
zowwf~d&Zww7s2<++<J{p$Wp&-!-8IU)wx<LOV68{vIMsq>#nw_e7AU?{_6EVQ%**4
z94q!Z`;W(E)>Mwo*TiQn{g_#Lt#6&8<CKn;{IV7ET`JidwOCyRWt~L+Sy)#XIWWZT
zx|(d-8K9+klg;UP)8>O23{_5^Ht!vu#VFW*uDG;W(TYdr(qb+qk;hEgd1fgS?oOD}
z{g6eB*YeJdH*9CQwKSr(1!V>5&b)D1>LK6trq|P_PnVCk|Nr#s<$nF!-U}rTYgS&!
z-}Hai{=c`c=l`$xT%K>fnAO(hChLcbcc(M%^U`^nG54ruZ;ubVus>_izNOb^7jAUw
zKXz+Q+Q+)rxrckgrng5veo~Wi*hlxLNgB^937MTcT%R+4o0Id)MQDe#j6l~;EAAt%
zeFcx5?#Rylu5mJd;c?^Sm4c<G)wL$N&)aRu!XM3|^)*Ou{q?GgK?)}3Q-VU8zda84
z+H+ZC#`N#oy*Hm-T3h{J>#6Uasa1PSi#M)+_3i85*ZVUSD|eVaTcTjD_%6@=bDFGz
z@ZQIF{+9k-7qQsXYbw*vJ-O0e+5c|;{<ryk^~ASNU#VT&^Y&T0?yT!PYr3sXPyUkp
zEO=ATfd4SdQjZma-kPggefzdAUf^+YkIO2JrS__KC3muHPQNPZE_66lJntGy(Iu%2
zttx^0_ioR=rx(BO_SdZH>}qdAN#lkxz34~#6x07sH=pr5zM!LX%byUNdwRTahkSTE
zjH~Z|-~RWnYv>Br*<4LB%QWAfNjELJ((Gq`{PyOa#*6pw?fd)dwf@?zH)}hl1#K01
zpEYO3$}hiWEzP*wwfQG!pJiar@~Ab_`Ic|*n3M8owZV;_DOvZs+2%xi^|oDhz(@M+
zq&FXwnND01Q~Ypx;bVrKJQB@X!Ux3ma3sin*Sq=Q&AGJ5pNh2$dxCj<#m|&S)$*KY
z7QP@Xrpy~`^z(^`<z?%0i?kfeMVF=L=j9#T7<Tl>+MwePx;r+{kL&i?(*1p|@*y1#
zpVj-t((6^<8#OK7J?~<~1jDuQ9>xhmeFqoWcDT0Pv~5tdRn3!;b~!cIopaBrjpsS0
z+w;V~TDK=8=)@$ieN8IiM#<AQC|4@TKYS@0@hXkiJyZSCDT&<VtRoYib-T2E`Myps
z>#p8ECpP9U?#*|nKQI<M+RHG@&}Tup0OzNi2Fay?K4Kc)|5U~PJziYJ$9;;oT=C}V
zX{=B7>L<HgOE~%CP)9`H_nJFyw{}nG<1D?i&~Im3Z?8tiPlb;1%;Y-vC)4x)e3|5`
zDpmgD!2Gl`E^-_7-YM}Ni@mk>&&4@Y6!?9TTb>v#GInpCpK<PkTc6i;(?5%ulta99
z4Ggaf9o-w;eaPjIo#l<5+R7)kaY;(=G}5b0<|%W%(-r^u=6I%Lld{3Nt9-Bh!t%C>
z2VPe1v9K>#UYU0J+p!Si2?w5kz7+h<%l__@DO0C?-Mh@bNKOAvZdvV=aJ%9~Hcpj~
zluJBC_f_+)*ptlkNm*7SV$v2v^YZN$bGlwl`f|aY{qx)-sTM+?s<INal4bR$nRCm$
z5@=taHK)!xeV;_A`rWG9<!3BilIAZ^4Nq?TeQLhw&OKs(MZ<o+v281x@adO~qxn@X
z5#6mR_Fla+SPz|cT|a;Q{QsYRK0W>VYq0CWpL%W#IZ4*v<NI&h-@E_sV_rV{_ng+*
z0S`ZlWQH@m?Aj-+v~azXp#Pi+7VV!5<qsTWIyFxxuVy-n*wG37vD5AxJ>;yo^^onf
zyCLx$8A6Jhm!gc{Kl_&|Jcp|{p<ia+lWSWtyUfD_PIkXyy{h5+;DtepPNx&|vv*0=
zo73bzDKmQQ*jN)`c}J&8w{Zny)T?brB`@UZeoR^QwCZNen`^uNpH1Yuy?Q&h-Xk@=
zw-!=!rhE6UKap`i=lIqbr33v8bJm^oE8ZS?KmG5jj$QLpR$koNnYSux&CQEG2Y6RF
zmESL~ySw+~ZrOlVb5XJK@7Esf`C`W5_iEbbsfFFg=D!eWldNW5>SCoXYf~Y!)?&N<
z^`ez?HrKNWnQlMkTD&&Jj-6>^jjv(oo?OpaT;>-Kd9Av=?|x6|{(bx6?Do!>Y0ll`
zWoCIWeDghh?;NoXj+uQ*Tc&U{HSX&8Dk5kwCF>qXZ+y<9;~VdPyB(<IrF&uRvkc{y
zn2Vp?RdPe}WbSv)5m9$~xBc(8e|Alo$6e3uXs`=><z*&brKzz>QS6n|LH8cET{i^$
z?^Jd-{q(b3Fk@!$cV15Sv$hqVV`JA$K49aeCd<0ukn`Jzj5%))Zj3ctbs;KQ@W_;l
z^Mxl}v(LA?clXbA!OYq@^Od7n-yKytJwx%v9qmhXn?eohjtNYd@Gx<HcVqFV>nocc
zN(Eazi|V-gO!h%BLz`9D)nK=Czk~S8H}<{IDVkW)JxN`t^1eh#PDGva7t;*$(=9Dx
zMX4c2UN&yt`6(x&Vo&=shhpBiUkNvEe-@l)v90uFfrzG_)ql>_`I6j!_KGm5fBpMM
zV%PEI+@c#aQornDaMhXO<zLvF6ytj2>~Y&y#{X})HykNaw^4mkU|6g#IBm|`HXrV-
zHOFm?ErOr>+&Ow)@@%r7Q|-qkDy!muu`OP)?0ccW3s3X@DeGQjSRDwk@LJgauGw1R
z%%r(bp0xjK3ig|xqPl7k>ou$O>a$kZE%jV<Y)MH2-z=U_%L*PwpPjJ#^6@7g_4bUV
zOQnpeWZt!S6@Jwf=9${U<<-Y_VX>)IcZQLYrL^*&h2P4*<jBs6DzW4?zF?+W?onKF
zoqNX-?q0uq*~^C<zbYNb3s(EZvov4FpwUv+h3)K{6%t#oO$t1`YfFEP15?41{Nrn{
z+r+gbWxk5ck-J-cJ@qH&_QOJM5?rOTJ^6lK?FcTuW@LTu;)P;0TmR;qw7w<EhnfCJ
zH19Q#)8FL~#3ubWkJU_FwwO`MkWr#UOKpSJg%k4>t#)jm)K?&+v)(H0T*;N3tyzI9
zb_AE6yS??n)6ZW|$L|NNE!cbI!;i0bd2^n>jo!WY?OOxguSKD&wx&JHbL(!kx@r}*
z^<ul*y4yQ9ENE~R?9_SpJMVAL$M;Td+YQr>G?iF8R)mMiu--jpW?sn^GR0dn`s|yw
zd6N~hQ<ip3b85Z&C*<~*bMuz$%5ezHZED{8D7xlS?1gDBv-WQJ`k?GfX8-dh*+pBg
zoMyWyF=OV=z2<ipw1%xV?@FrP$#pT${oeYsM<ZQVYdyR-mF?cFwmULQq;DS#GFl)#
z(J0XCYM$8gV2O<R6~FseHacl<nYJr8>Qv$F{0*x=RjobR-NN0zsXqJozqQwbvNWqE
zYPr8@QaZHCTjKnS?l;dDZ|QJ3^dNm_rL`2_au4A!_Ep&{qq6tU5Wcm}YLd&+#nx%l
zud3(Gy_*@=A}Zi0JmFRE@7uf@x0mkw_u=Qaud8>qep!5df)H!@g^D8&H>7f`EjDHE
ze<tG5CmJifL|H)8*eu&`@r-V%vl-7`x&%GbQ9S$k%f_bu@+BW!%<lF7ExY|*<&%p3
z?WcdYy-qOlUY4{vX*H{s+~!$Eiw!K7A2W_Cl=J)Eb^iUy`xV#x%m3eBaYt?cc7D@k
z2UI1SB-UDqE-7+SxEQ~w_SGD_xhBtD)T<^5&V6A&V^V&s@0K3h;|J_@3_iH>T$Wum
z`QX+1Y*rhe&k~~ZlbY^W>%PltHmv{ee)8i5Z=GLXmrpJHcV&if`Sr+i^A~@)*4cC9
z=7wDk9vlAM++#dPcSnDQb>nVXza;xjkwH^}k`~+f9r|be@ZPk&ZVWZzpPPb9)a8z<
z6!ATY%2DZ@sNk1*%-?b?<LuWqxpV#nusm#eZuoP8Iqy!tGtYN#;h#9~$;R*3=iQt5
zOM8CJ0msRE%oV@9d-5eHW@@<exB6{{^-ntEU4^>0FqaqoS3RQ8ryMJm?96QOq1&>c
zxNudA)1f<|$@j0mx3im2Fn_N0<<2AT1*ZPE@}mEo(wi-bSug)*ZnUkQe#y0c*36B8
z5s}wsDaTsqZ#b`>m^sHf&`Ru|Pvg9xIg7Z&^%cMOINXi6xLNpI>aPXoR)jn-Nv}{l
z@F-YLi)|g}Y1iY23Qm2^m7mQwIsf0)&)JLZ4kqwVD!yp7WrAYryhhH5$-<BRXNCGI
z?p=Q`aovg~3LKR!VWocOPOwUJUVdzUZvNI=y_3&&2+J;TzOJ+X`HIsHOP!3EYUF;-
zY`yk);_k1KALTy%=+0A8lgRvLeDbG^j_%@od4qEs+j=5BFP?nfbAA7@$fr)-t7ZS~
zl-RwgzSTp*L0+9%Rd|h3TnKkic5g|gX7<|Y{<oLQ$H&*#?A#Y7^4>aXsi*ayip}56
zt@(=6_r|T$57k^NyZ!s^-*2x)*>@?)ud5H<c}Rj&(e`mHTZC4?49-(eU!@;1DC|{z
zG>KV(-=|g1@Wc_V7aeMC)0(()g7lY$Y@N-&`OVC)0-NVQonYIpxopYxJx*H>oH%?~
z-^%gUjIu((^X0B7e%>d4Fr6=uS=ALbtvgRYl%?!Lg>_DOV2-Qj)@R4h&I(+(Yvrs-
zm*#HRv@$G|ncuwoYv*%Dv1ygr%R~brH}J~bowCZR?RV8i$t4digmzzBrSbc>^v^QJ
z_4?O$F{{WHJk`t1_r3k}YthcUtvNdi%<ZFI&zd&%j{4_`v+WyB2)9iOI#3pIdPk$%
zqHUY5Zi)>sDbi~T)bM3_r);+6c|vlaQGkfZqMvGa7AVg8pwY>9{P)_N8*yfzs@}XW
z+|uYJ|8r~LrtdzlHHs%4%noSQTA{=iy5KSE%RF7*P$uzN+DAiOAGle+5tz8@;u42$
zC6%bg@*vstcz5M9mDlQS_b106c_^TkvGe@<;CXVh!&|3CxhSt`=w_JI9a*rh?Y#9@
zd-=GpbH5w;g)RJN^=i_a1ve8@ihiE>dgkJ%MISHT6IBU~oc>}?>ZYAr1cF-@?~=$l
zyhm~F+)W#<S{U{`Z+aP0mzWdj^kAFD_sxbiQTvJ(s_IQX6*v2SuVvT)9)oQsFE+0e
z?&qBRZb4;@0o$9}0~+5ugKti-y1lWr*XV(rqZiW)lh8#QbNwD>uWvE9`0V38<$n)y
zKD}#ttm(_j%(wU5tY6hf6jR%->8f;<zGaX#xP0dc&lZn!FRet>u4dm_%H=I|x5VuE
ziMUvqgFex_Ow|53&k6qBn6h8(R=67fzboEr!xPmmd`o4K|2^k@j*rIoZ8P5ejfiwy
zZEGyObfWKt@Fj)zTHXbBs`VDGJ|w$f{&ijE$@Xh<4>#`lp*l^M_y2@x-Rf7aOxV%?
z$SgH#(Tk*u5^vMHF8$I@QRxmaX4UkToNsk3QSx2B6)(q*ugzS(9J&mFGi>*N56}0X
zsaj#yI)BZH^!Jx;I#o~7>+iEU{oT|lCsCSL!h4^e&8A-hU-tO*y__lCT-|arG-w%T
z?!_l7+&EcYZDL=&Rirz73!l#2#S_-J|I$<D<@58?KRtu*PJmgvmb~*`3AbCH9~JBq
z@H1}Itdo)1U{^RVc(R~M&fG<hlh;4)^eJbtP22mkpzcJ$`$Xy4cUDe$eW7{ZpR0Q`
z+25~>H$BU6{6_ir`v%AUxo*u^xr%kk=7|EjwpP1OeZBnjGQa+M`+e86H|cUtZN1EQ
z@7}Gqw>Q?`ep?`@>3X`fdT&mNc7E%97WZN&oqHDZ9xEpa&G?l(p-;7Am6-0TS9<q)
z^73LTMc&NkH}nx%6uesN^+Yq_4-*QNC1PDPQ?*xI=)M-!Qj*Pl?sEOyGrLdRJUVNJ
z@-!Y}wU(omno%LWo2+kVR#?u_w%K~`=CibjIH!!A5{g~Bc1Bx&bF<QV5q;<Ry{O%i
zok81*R_67lZH&@ZV$C=totbHI_=T`?k7JESy83m!<t$MlC!g#qSIr2!S6Mza>cxh`
zC+05Q7pEsKnD5@*lGeuBcHFu(PTzHV(Z%qKv)`VbuKvZukxOuje&XRJ*O(k;Z|<DT
z`s(cW^}EB?PT#dRMC?L;%zq2c-kD6wp@pi4csd)EmZ_Qkn9<NJvr5h&yG!=%j<;K<
zy?v_m;Nw33cOR5OWF<u+`FHN#U8UT;+wEH8BBmJO*cV&xo4s2ma4m}E(jNh3kyE{S
zcf%%Uzwoe*cQU`;ps4t_tonBUZCT&PPoJu~=`Zx29<sagh&}7F9=VT?)ThY$3vYh@
zZo6^LgWs%2YW@rG>&^NYo-UYV7^F9e!@XThM{`f2%uL}^9J9Tr$s667tvO|$a@)!u
z(sH+ch)4d5bxX1Gt83SAdnUFipY^9ffULT?>Zz);M-Td_v01(9J$y%o`?+ZC<>!^z
zxBe(=7lnwu@liil`A(_KRXpd%sVX=AUBCAF7dt*H-~LO}uzb%Ir;BWbOSwYYH!W3m
z3$GQQbmUU};^m@;CoQb`wEM$v?Ip*V>Q6bP^+iNjpKI9jZepy(hnR1ThvR3@u80@f
z;H1O7F6sNe4HjEBW(VJ>P3shvK0mjt>gKB%H6qs@AB@_@asA(g^j|+DmRHTwIWPJ#
zT~l_RpjmZ~!-h{Q?!Ld48lAoN-5bxVmD1Wjd8RanF8|wZ|7Y`}&ckabzT!LkTJ)be
zzpy7?+p!RZ<J%gGUYvf`Q&C{Kb4&P<IGz(*%iTl@-LJR?{BoG$p2?_U=sEx0N2>{6
zs-36(^!Rv()qla|Oy^^shi0x>|3TNl>6MN82}b?GxqT7ms-&Ff2+Ob6^1AqtPpW%u
zRCeRxm<bm*8@cGOUR{4<!`#K|d>>S<_^5vJ%H5mEQ_`QwPAk3j__EOR1?=t%!_6#~
ze}6hBx@JSI+xjO;dI!ZmiM1?@Q~%ZQsc+|nyIq@?f92i8a{f{H7t13o&c(l++#<eu
zxJkdCnZYT-wRY9kDJ)N11!g==b6Oe_W1l<!|EHH<PoFLeZ7fw<c;jL2_rG`Z?)N=j
z>l&&f!oBveYxc!%IqOyOujDQ+aj_9twD5}*-;LBShPBT<+pomTx#Rm=Er$QKp19w!
zV<l-iecS!+R83rccm3v_tDnz0^KJ24_OtU_%6A<;qN`H8$2#mns_D_4p{y5P<cdvu
zZCBbE{NfUm>v`FtHXU~(hSk&79=m(DPt3^ImwV+ZEtkErZw=y}6y2O~F0g4|BUfe<
zTjt@i`{_9^lxMDcB<Jg0ztHhwu)|TERev;QGhGu?ysgXnZdp|P>#(`^3uF!{zlfa4
zl4HHsbZ3nD+Fh~cp}D^k+gBg*@#CIcccRCAvQznGfu)*>e&V4r=DYuvY0q|wKfNsA
zYnoxr^%ntKIwH8-o?qJY`HW(esK#tr@l6~tDeOMQ0q&;?Z--X?{jw?6e3$I|<2J(V
z#wluydS%;UBxi9y(Au!%)YsI`#~r5Ic5r3~URgUMI=lU{{`Av}rl&1;p6GW)tJ`!-
zx1I~DQcZPz_D+52c4__T-b=XVSa6+j+kK78<4Kmi81I}lR(6rUna)qKs+hxOZ}Iy?
z`zbbMryP%~)rPH-StlAN_}6Sx%ke5sljIFu^XyUP%b*W^;xoQkL^>t2o;PL>neX2d
zuJ!EH%GnFIt26R0dC#_N?uGMC5<W`TIa1_2<(J1Sc6?~RSh0A%!VE@3nflqeLF+e&
z|FjFgQ9FqzXjj3n2VqS99rY&DG*i?Z5>G0>e$qchy)z=CY^LUBiTt2@M+@py4!uxk
zPJL$gsQFPH&-?j_k>Skmc5zFZJG@`YTCCq_TDWq%f-LK1huG?(=bD_WPwSR6`0v`l
z^H<b%p4yqcyq7o5ayzv0R_xS0dz2rn4}SS>$^09quio9a>e+`clJ5kjPI%K-ZQO4?
z>CBmFM>p7&ZB+LOj97PdcfMzEJNKL%R@nnv?$kY>dv%rKpBud&7fSD5-+xV5W2WTk
zZLGy}A2fOB`Xn~{1qGBW@+dqV@Lp%af6L66W2_VQOmf^K=KIai=8BxvJ>#0PXJ^dv
zABNu1J-=(glI!mtf8AWxZg;^%CdFro^sU{e6!z>r=W#)rEl<b&`jqD?m)uL$R9s3Y
z*X-W+ee##<lQxFWlh^(|O||{uMuBo^dFcs_Tl0=tR)0<8n^)R0FXGd8wVZ;SEt>D#
zLQ^}{{ZrGneHTgW)X*0GD)%^B#5;#?(f<uR67$Y09iQ+*(X?vKoOGu9EwU4wkInqK
zw#9O-<>7NPjwQ?IpD5IDWL4c!8I{E=(J^!8S51G%z0+Q2|NS%ly1#wQ>o$+9Q>8mj
ze!Q0#fBRpZjQQrnR#l9CmUm`XtXvhoHjeLt?~)^P16>|jUivbBcX?_Nlgi`dyn9Dl
z_0o<xE;_*MsCP<7{)KtZ!hbiUefio}&F0^-rhE2mi78!L0v`|GnBz9(=c2RU?%kDI
zsIk<~?(nQOH^KCUAsh;7MGKZ^WNi|8W_;S9z}Y3lTk`^!!y5xBO+MGXQ{L{H8gypO
z-C5fm6*!Kmd6XOHU+Zgb3JP4s)wRD`vn)`vYxW7lAQ{hcc8)0(Nz6Mr972LlPS|og
zqeoIgQ`73x&PS}_W><2RGj)SjdsHoSKE3dUYL&=}X%@kIBwy~XFboa7TjJb0W9RKN
zV$miyRu>znP4EhFT{@%v{F07OckliC=-aa1ey@wwg4z7Rffh#}C-c1#Jub0w!Ihc~
zQ-fBr7F>*NS+(>4Pf7ac$7LK(zJC7t^mDgZu6g_nv7b+N&hDu={$>3#+0ML0QjK3%
zsU`&eP3|{gXSY>q<~M8(betNrb$=eyB^}P!^FQy1Tacl*?#!FNZ|}~&dopv|F3tye
zOGO;K0yjQ;=D7TBmW8e6;Y`&RGoJsSdh&+zuH|P=Ec^O0w*HWe-0~N07e9(Auih3P
zw1mI$@PxIg2LjwVWR;boxR*UJiOv2wb;tL>|4JO15108&o{^Tjo#*}ZM#lH6_)6wo
zYRWuwc}>bw>1|1|VsC46lpieV>eczxqCU^-T5H^ut>+$Zmz-?=QorXxvckR2%v<NS
ztq9*`6`>s*c5!oL+aK-p@CaA=!Y@_lygACJ``B=$7jqcdKIb?YbfMAk<oyZdDnEa-
zeQK%x>8Ct1H@HinsLmvmyZejscCkhG4*qxU+LQ3*qG4$&N5Rdz8pYR7>O^<6Z&>*9
zNc6*&s!tnPBAS;6njeqVpDXM*FZPQ34?Ve%7Y7z;D~ezCSYEq6k++|7qX>WG+CDF>
zX@0*hG(VhqcY8gr)~gSB(@rk7P)@o2m3v1-{ke6Ip19td@A2odmLt!Z33HU27vI13
zN2l3cJ+QF)zl(vHS;F_zr?mde>=b;HwtwEM1(w@RS2<K(+Ry#2c606V!|u{QKTS;J
zd}Ps>96mi!uIqIEnd;3xZW^lUNs7wuiVClkM50T&EFP~@@!c}3N@(uXI*%zk?`_#S
zWe4YiBR8shPR=N~yI<#fY=&4<>HMqpr{1|&SRC^*@xC-KZb!@|)lYwGLM@vz*_ZVy
zDtXJq&C|<i5mt$Q@^hEDF*k>-(fU4#ssx{xM>1LW-23u)7oXU_9u8?g-9mBYJ6HXm
z++t0T@t25LJ?(T@R$!>&ojZqoBDcnz`nGFt%}u-iUw-}CnHT={zK+Y4v$xOY-S4}-
zeevB{rl-Z-=frIJYr&@-8lYeL`E;I1evr{qi<PgeR<K0uIx$s5amfx#|1Vp2?~Okp
z$=E8rG@akETc`WQ&CS1>W}ccL=Jzt#h*!b+MvZ&w*A<s_)LN7urk!;S`K}}|@$MSd
zyXhyjq^qvUsZP1NKTOxu?!4|BkJQz-zXoM*O*@|3Ut{RX`%U2P-RMu(i(H@N<}<k_
zuH6w57bCi9_Zy}5$j>`J`joI3o#v6MSgcdJCbFS6j4|UJtMj(tfc2%j<L(F~S4rLJ
z+3~Z?RzYm@H{o+VO7GH?(v((how6xb`-sNEH5>OmW}WXO+NyD(yGt~G$x@lq_iz6`
ze?KJk_m<45L8mVKaag#tVj<U^xsN&)#sr2=aC73!4e~t|#;0rD;;=Ks<U;HNll`^3
zr>>e`|6@<A{`U3T|K7eh>-@~jL(vzfPIG?ue#uwy)fXq0_K7IQNQDUBQxj`lbJ6$C
zO`loa&IxA8wao4B-~H_uIq|C~#ky~Me*E3so@H#V4xfAF3iP(-xoIh;E?knxx%|5N
z{7V{7%6qEz@7A9t`e083i%E20r|!j$8pmT^Y;kuOyZY|PdhuWM`BK*I&M#hgr%NW}
zLTbVd$2rRTb5e@f_B1)W?25^scY9jo`3=c^?4J@ley6Ke1f5melpn@oyF4)E$#$NV
z#}XpfG4v*0?#fr^SsSb_{4+l0YM9~*#k<YFZ5X6uw(BaJ{|G5Pzh>EwShkrOwOdwk
zEnFiq^Mxlf7c*}cpH)<0%2T~9f7vhGE}!yUzQgyCcA@zlc|~VMVfP;OioS{&ugfR)
zzEJQzAFiqLX|`3^!!6$&pFjC-eLVcVXj_nCd(6olFE1&`-ch#{-IIOqok#Ys#r^a6
zZod)eo?~ub6E#D>?xKWG+L!JFFJC6a2khkkH%I-@4?k7KN$M)akz2&HYwHc~-SbX(
zopi3Q>))Thzy6hcPgqfR{Ee+~=Zd`vDQ6U%=U)2W5g5IqBu;<2OS@#>#ogN?WZhn_
zc6Oe)dH>JrwOzL=b{(vKzTNJ#ACJEJ_0UruM-Tt-vJ6*@(7pWT%0U)?wm7Tr-(CoZ
zo`^l!wQfSk$=Zlqt988UT8=eqLq%U*c(EvG%PCgZmlGMzs^0DLo-nm|?cd8nhWT$N
zUcbJ->*s+r9!Ji3OxkzkY-#0-gHs!xe^V-+nUrvU(&dvUl5@{pc)rDOq2u)S^-RJx
z?lbuoDotCI^l8f0a7jIb<(4s9n4i`h>s(wf&l=@@GixQ6>z<p7?bKYjU!DH;>+$L3
z^6S^fZ+ok>s<!H~?cDA8$N!aW`My_n<%;v7%&lM2zSV8Hwd7jE=GCjB8uDUZ@^#Ek
zZPmBUvKD%OjPr7<+y4C3UFwTcjPB)}e`I1be_D@c<&5P{^>@TH^pAD@UN>7p`tUjL
zcfwodtlJgt#me#S9s9|G6UjfUERWn${QTC&OSnU%_0_i`&eXoFuWq+zzdg$rF0wZC
zO01OoqMgR2^Lv-EeO<IEbWh&XSF2ooA54sVnyZqXR;0M0Cr9}$<4T3?yYF0DIJq!z
z+NP;N3L6&Rym!ZiXOUW!$<f{04|4~KeHKYC6xmmvtE3ZkinZy|mA4yogeuY{E+70V
zAgVdVY1_iMiAhh^-mR-HzxDKTn3=3&kKC$bizb$-aLZWpmxx>Q><;e=u{}{P+MZ#u
zSFa>{?L?b<(bc*7^YvHhe^E1c`6%$XE9dfVR<@ZH(tg~+?JQ!vUf+B>8gw>J(Vb;o
z-DP6_`SGUx`RiJvWJ>t&HnT~kRWTeiHEFTvd;Iq8-7{gy^A}y&bhL8DDw{@0<;R;>
zTsu+jyLJ8i+@#Y9FL<v1o*Vc|ev|9EqqR1ChM!KXIp8?wm7an7t);c+PE{Q>s?}L&
z7|fY-Kxb*2SVG@(ktnzKYon%F8ZIj@sknHKGxGKK3!XBCjF!h9EI(=VY@)4Bk@8=U
zX$tyWHGEx$ixb!-{)i;jt+Ofo<W=COQS~eO_{`(p-qo_k=Z>zt5_xR);Wrim#*-Jz
z8$EFKZTs9l`$t#cRJP?AX<j`u%I9Tp++Q0n{`T<X>r1uHgnyJU)a0vL#qqLa)ij^Y
zQC%5jCObZxEI*TN;kMq{!CXpr-R-@b*^Ra{S>AcvRM9g(p35^)P*B-asN&reBZ)?P
z&C2#U&CGJHKNnA*bW+?Ryj%U@^;Pxf)=n`l*)(&-f?BVqfh?zmKh)Tpzx(iY&XuUw
zt?w%X4D)t-bjcolvFp^Vr~jmaKi@sExjo70{qjedXHN(o+#-Lp+Wsl`iw{#cpPm((
zFI-UZ{LfqC>bDCH@ugo>Wmzr$YwxuK5lMEB@7OQ*N{pPeC^=}Ef55p7A(CYoXAWe_
z&v{t!lz;n%X1%#@tHfvB4ctF@vU0YGtY5EaQQi8if*zGq>s04NFPi5zTV|Tyo${`k
zs{iX)SWD#|KKb!YcslcL1F4<A&*+~EnDEeo!%x81p{iKwW>fP^6Y=7Fk>XjE#rfyV
zKG-=2{xVtgphWW8WTy=o8{9$;+&L8}u`2y^X|?%Yp6bZy^$Rt!rYM$7ei(hB>Fv@z
zH8<n-<y^lWv~5DJgXX<Ee7pC`ZeRcV-`cI;)7nqp^5|r@*|t*VS{~o>8D2libvI2l
z7SZMUA;O~*_WSeAFAi@j*j7fQyxqw4s;%7oaYbp03<Fzy)y}Jo8nHfSUKo9n*zv~d
z{CwBIUXe$ouJVZ@vpqt0ba2i-QPC#;I*UoHXhQy@cR78{VS(E=?Ty)%+uR{rS7ZI{
z_SxCT!|Ff$`}J9%Ss?3$lJA`*0js7>kFxW;+N5T{D?K^wlZ@vqk;HQsFO}3qZAtvZ
zB_>-@cPY+P!tnUM=<Ii4F-O+KxrFe2Y(8}0)~-^sIo{hGe;QV2^95_H5)0&dQ&8bv
zXEXDDxouQnVE21hmL!FOn&MLnw@z8>&|Y%h$!@Ns+TpV|mV29AikKs+bcjt(OKi7b
z-qJ9|GqZM7eEz;ZKkCP)U%!4GR=;|;&!<^TOI+EP!+MvK(y5ipl$xA6f87iAd+sW+
zjY)H^eb&9a;~(Gd%@MyFv3|vpE0ZkhtpZ(+8}GjpCh~6k_wC!)uaz}w%;%^t%NApv
z%Hn?PgiBzy=F*OiojbQ2snMONQ6p3Papu~O^$YjB*Se&5NU-4R6(&1F*>|x^FE2Dp
zc4)Jdk`&as>G9oNUsZDNq1V#9bAv0^eAxDH+FtE*b?&@|>2F?tD($=(_orcTPh1c0
zz7{^Ukn9uB&r5ye3AH`BZ0EW5WkoAzr&@ni(w^KBQuS=VO_kcUg*n>~f8+eI=e=77
zgW*gs=Q%bL4QyA=*!d)N8KcHSvp=h}WhVQ}PAs4LDVg>9{M`$KFR9*}T7LLxDuYC_
zOXrU5sT|6Z0@u`yY;R|uD%tU<_Pze2OWS<f4;y`+5Wtf)!Rm4DI@ebx?^>PuEd0DO
zPWWG3dv7p{)cK{7b>jBMPChdaK5cx&wUf81&;5SO{Jp~856_B}@9XFcj9MFM8at=h
zXui(*vvUIOE2d2Af5h{o_E7kvzx$V_n!e8}=S(u4vc%bx{fNMh9lMXzzcgekX{@?e
z^;*v4^t~x6->vFI{yzV|Lnb5n;Qw|rk;^L_=4|vje4)&%LSm=P>C1PQx;O6Se{tx^
zmIOhY$BEtDJ-jpdP8l{o*gSiqU*^xM3vF|*3wJ*L%lUcBv`fDo?5CR8$$O-*EZ2Lp
zI`H2-?%X-&_Sn6P7Ed#jH<>tXf{az>6G7A3Cs$_4yA|gzQC+e5{jF)aD(6<!OD_JV
zdYjqbWWlOUYO{|oyT4?n#q;Q0=Q*c^7dvvS+<iiGUzK~}-fPq4_2&QoB*Yn}<QCE~
zDedg{?|=Vpzx{Um-n+lPo2;_4T70#wMKZ}eopbHK7B<z0RmZ0mAI;soI`QzASO0du
zymfbt$CBG38{NO~<sR)i;;mY4-)tqYeo91=NBEp(wHga|^<?X;7{9)N`-MV>E*+i3
z_v69$#A~PCG=-EmPw}W*Cg959^|6~_QJKu|?SKEiPSicNv1I#-h%IFgC-h5SN<T42
zq%dx4%+?8Y#c~A`jb`te!<F`_>_wvI#Y@#E-q^D<MyRdHPjS{<GgDT#Va^w|vY2((
zi}pUqu$j}znigMp+GhGu)nzwIn7e~ki)sF}Sby-~^X-<0Lt_@U=G==Hc9qghnp)TM
z<#yvUhjPAVrIo(wimJJ7X<2eR6Mg?QbaCBn3Z2|%)+qgbyX>n~QQN)uTHJJ*(!|1D
z`ubf%NvGSkE$(WIlP>VJUUDe4UJzB<o!i*<Rlw#};o-L$`q3+6w#nD7zngt+O?&ml
zXaBza`}Xgj+F?7vRL7k2Z(eXeG@PCqxOVH5MJmqMu1|I1pSG=(XOG+Ozm-$ZMa@>_
z(P*&rO1X6Cq}}ZAvXdT|)J{G9Wta8#X%&klPg*1gh1<VbGdb1L>E=P^7itkdS6r~a
z{nuY{O;NZ{S;di}q`CeZ0=b@^IrTdxNzsGnfav)Not@$DPW;cC^Hfv0%{WR~!}R28
zA5X7k`E}=W4{9>SH{SU<b%oCCLchs{WqC8I_jp9Iha3-S)Se-9KGAo^-DaCFQzi1>
zI6i*owck`U^_!q_xyao4hVe&NMTwa)&t0@AN@(IEX5W^^cc(Y?Uryao^Ly``R}~qo
z$0X!}G!plnw^}VCx+(8g?A9-R$HM3E%0GFuKvt@t<H5uxzByCBEx&oSnAf`McI2-k
z=4`88y($Q2d>Hs(*Q3si{_<J!J{`e!*|I{u4@!O)&OGUtAvE7dW|=^a9~bNLxorP0
zNR~dj+c1T%Dr(<?E6Lf{LrO#Tc)dFmBc3QKCV5)HX#G*!s?_;nt?M*9-|qU>>H7Wq
z$Ah+Y*DfS0m0DWA&buae%SCeca?a3G<$o94S|1p-#NY3Evcs(vkBiGzexG^&{CYbr
zhla*WNA|8)dRX1es200Y_?OJdj@6pXk5}%UUAJas-;<ZGA670}m$~uflFK6h`6umN
zekIQ5fym}LFVg3}vU$3q&%S74G~@Rta~`(N&tLFocOPTo9nMon=5$EzcIZ4W{$hsB
z4$0|Cdas^F#ns)}_vh=^r=OSRO+3hGCB63T@sDM1?tgono4<Q&$MUay;$=FKEsxaR
z7Tk?K(`Ka|X!NjrM#vm7&bgOb55)+b{88O}P_)9L!u3l+%(oNAcAoUJnmYf|lr1Z+
zF?DQDOH)*c-gacUYhISB<z!j0w^FVyCw-SV_*As)lwO)I!TC}_p@h9@#bwFsoJx-#
zYTnNWRnpu4R@hHa^bwSm5X#fNWV|aRfK}{h$->p9UB$wom+t1NJrmGXl5?_{J}=Rc
zD|AK4Y+=jKe(bJmT#{bw<q;Ov&$xC*%9Ul^zqq|C_vGp8zgqS_-=^=yf^@GvU-E+X
zWV~$LVPdWuw2kRN_sxS0YwfC*Gc|TKZ4xc7Ivvc<b+;?5XC?m@k#94ZP10Jp-bzT8
zy|(dOuOw~qVfH=V*YEE2HNVSaN!s`R_ie_?-(McP2Hc%poh*LQX}7(g%to2NuB#sj
zo>nP76VN66K}pBf|FEc1$>zs%i(>Xah`Qo@rX$GY%jw0cacTM@m)7jvzkmP!`_pR7
zcl=&DCCF<@;cKCdVx?!oR;?21ebl{-NzPKu|I4{-#oYya=E*<i|9qx&rrnhb!YMm_
z7PsGW5tzN~MfdfE2j)~v&_0naxBOtKtWv|gi^oL1oR0K1w4Bv^Rj2r%bzn((kJH56
zD{oX@t4psu!u-PTnxwngOS@(4i?_(jd}oy}`nK((b5E+3*Jt^g=I>X|k6$L|&%9La
z?F+l^SIw=*q|~n6kDa_#>Sa{JY=(Kw=L2&-Fq^;iFyPkc`^Goj*wTFAihy0Z-_Bol
zcd$#&)boiDmXGUM*(<t4;K7#r1t)jjdv#HzB|`gSU&zOTx2l5so{MWLHSAu^bysth
zl8bKEiTJC<)2^Ak`0ais-0Zl*bGv%Ak14au)+<y<oxSk$u;P`@Qx|Pxm)y0gV`_1j
zCVFa*H<O}sibS5+md?^whnEIt`u^-#*&@8ytF1^s|E@hVXT-Ol*H%mu!xu<3x}6q0
zvG7yrauM-YmCg+h-@jPA?4*f*Yem97lWGQqFReC;y-yDEX+^XfB+qRT4c|CnUfYhm
zP}yg?*>%6-(n}13zi!Y8e_E(pv~H>4u`TDntKE%XeLAG}t^3X!nbn7;tuN+(KWEm&
z3m2#V7SbqOlP}cw_O|SkA2spLEqxvWLfkyXeov3GysKl;J-)V6Vrf06N%89^evh7L
z9p0-f6>&+C{jXqAn!`O#w`1Ou_0o7$_s%&~=Vy5TuG53dEX%I&uD6f1|NEn;Qa4q2
zMa#*%vsw2)z0tqToO|WO&f5?E^1DX0uiv?4ujAaf1w5*@>2`1Lh}_+pYuQy&_d#a;
zgcYfA0`dzgHv}Jxs8x+^G18Wjt~%wG%s6Ao|Fm~~95a5sShu7h>1+Uda`wBXqr7>c
zOBmF0ocK%MZ#vkOY<F@=X7&pf))QqnUp-a2Qg*w4d)n^q?jDVES2SBhXQk-|b#LVe
zDG7dGz<ry4#|EvfCPn$jCOL*FPj2^PP?XW0KB-ge**k09l-sH&RFsaMHF6h?)7o}g
zY+GZ+pOB@8%r-cruKwD7{q*+OKJ$r6t8OK-KA9n#<eB5xo?zG`l{$G_^zGT^{+dUP
z6icfv2I*f-s!kUX-8<z|(CgRBLaP>pet5(yps96lPN&CpAz5x`O${aS2TZNg4*#f|
zas5GBN3_DukOfgD6a2R4@1A!1X_fBnrM1=N<(yOPO}j+TUdh#&+QOe*7oON)^78Hr
z9+ri>Zx_le+jH}wlkTc)_jSrM`<_oRoptl~@855~-4l<0_-As|Du%RWclV24Sy~Vm
zx6M~{+O#|EzW-~j)Xr^|wlTT2f3s7aM)&8%U0elc3jGp3N1V31qS$qLlA_h-!lzrG
zPxP60?nLRNogZvoY6V?bY-BmR!i#m%);r5>-bhyKeKlTmA~Ms!Eak8HOHW<nkRIo&
z_qNwdMxC#$OA0Xl@vtTT@1s56xfYsSC^7pU`{VF-=1Ae^8-g^_4Bh9XNL)T57Fw>f
zYNCstfy2zk!_OByydV4KjNl|z_UiuL_X{hGGz+;ZHoS4SeIMh=*}u=_TgJPPSw#-w
zO0l{@SA+Zx%gwsEw?4w>U<|{=O?In3X1hOdxX$M*lP;muvQ8syo_T?z*<z7m`8D5u
z+UR9j9r5w|_ugxsb?@Qnud<&V^w`jKaeLOh*IITjrJhY}V%#2_kT6~M+>0ygj(s>~
z9aewj^*pN=g-W_pFW%jfSa{h^((1#R`r4xB-+Ozi%pVj@3tPR<@4e28jiFy$i<D14
z+4sNsTT6IsAg|MkPn{7DJ2OgsUbxS!`Co3t79ebLXmX|EJ(~$@vK%V61U+DUT_1i&
zKl$@-`PyH1LM^A)eQx|7Qgr_Kn(g1-iTVB2J3n*N-FZ>d4|-L3{f=V)^L@QSLY&Gj
z=jzn?g$v%TJYZ?XySX<)dwFF+smk+fb~c8=Tz%{Qy^1S5v1^7U?`Z?y)6RA)<}Pv(
z-yWl&w{Y?5#cb)zH@P=Pudn*MsrKjV<<s@`zXtv4iodgW>$D4Q^DQpaoM^w;(*5po
zEJOaiyjH!X8nzez8-{<~mnZVzOATM%bcV=pg>2s6r}5ZMTRmIt!L^y5mdDtv=Dbwt
zc)RnWhvM#xi&ZOEJvvsP%i+rP?zEI&a@85nJ2{-yQZ~lzKiD*ubjk}pnzQ>~MavTP
zaESn$+%+vi6W&(s+_!Jas<(eVeg^O13Ey;k#**i80!)rq-|nhhoFu%|qq$O4vsC%Y
z#WQCN!*e>qbv)CprB+ROyC|{6BlGfspzrz7-4_<Da(%ruPd}Watm;_mEb)oFK_Poz
zef#z6@Wr~q>Uw*XM?asPDP3ljVzeYU@KU$jt6x!ismxCVgs(6BJ2x=GOYrVG@e>!L
z9xr>k^i{U*^dK|VjAN6|sGeOR6Kc3UEw=T2qk8wMUrIrWewHp$M`e2q949L98m|7M
z6nD3xx_bTAyy?Aazuy*YcG&w=NLs|g>*Uq{ntLzp7WZB&o7!AmyxQqHQ|QOEwX(Zq
zUwyjv?eJMH@9nRTt#D_XCcfyx#;HZw+rNKLw*U3p_P9ss?k#DxiM$NY<jyZxb#2jv
zGbPe;a-ZT>Uz~qgJvrOqpTUX8g<2<`cqGWaEqoUL)h}maPGG3@%dgg5iIQ>Y_xNA<
zDb=5gSo`PsnpYytA(viq$7n>9|601R;(2UnZ({J$Iliku=PR$d(!M$;=)-S)xu7eZ
z=^Pu6FX{faqoi*k^K(OMhaZy<dy2n5nYSy%X*Ev^qlL4)L2<0zX$JO|sqV{pyuS2p
zR~B46?`zV6x-HQ;uICon)Gg7EJMp}k#V=CD!FFfB1Fa7?a@KG7VEl^JXvg%zhuJp+
zykz!l)+;>k>L<SFdBVoi5?j<IAD^7%T~$=-!4ms$X&c{F*P0r;mCv}gsz!Lflipi?
zS#eXx!Jj7-q(5b@vF4k(?Xz%Rp6<43T#uVOqs+XQf2@r+mYlP5hmZEEZ^DNxr*8CK
zYJRlMl9NRv<ZYPZ)zDqjPII+q7tMJ0<<Fap1^<f9ro7uJ9AonTIp?F}UH5n1OGumM
z_cZRiwDsrAjUSG_v^+JhNSssq-ct#Smv1g-++O|L@Oi~GiMzj-i-(<0h<vnWwc~t`
zwrV>^<=VL>dHrWzzinna<Kg^h;pyWiFVxJtV{hHJk!MqQ!G;N?QPn4ekJUL8Thx2(
zidn9H<(Y`Z=e3h0E>*8jy|h_j*RCD>-?j2fv_jfKFEWZ7_P0ctO;juRu6NNhy|1(_
zUEKWPzu)utCH=f+ZGIWC^7Wkhw?5%hnrB_~S!UN>{_g9`ilXH^7JIFXTL1Lv>93dj
z^{=15{#MB3%@nJx*ZPu|3t!<o7!%fK*&w4_S)ww(`v6l>NJ+$E%bufkO*_u*p7{CO
zTY0Z<^SarW_SaWmGIDIv2+$M~ySFq|N^Z;1);R&my0av?oj2)+*w&_A-pjFOk3nRQ
zzF`w@N|bKY7MXYLmrG_o?YX|T+o4BS-Kwi;(bkyw_-(I`uHAZfPv!ENQ*)||c1|;r
z)Cu?)lx5vlX)MrM6v*0@xlw6_t(0bn?Issr>9sqp&2>|mHHGRJeqAy3=reV`!rQ_A
z?(2pZlQ=Y%I^BHp?AiMH`~MjK3wnCJevj;N#Q>??{9fN9yZg6@^RId&64-U~BEyCa
z85dU_eUW?OhpM%4$Kk+*rK>}iIO?sszAD<$Q+56h319C3AIAVuJ*|6b&pbkoP2sxR
zBARqS@lwo6?o<t3uEjp9<lLsz?$5fZCmwXmu2iI?P%?8t>a%Z4Sn_u8E5Fj6ux_of
zDo6S4{=-rSxBvZCb|-FQ?$*AT_Hz^7F5RUTI?2Gey1qX9+JXss#~35FuNBE%yVO~=
zT|vz$^vWuR-rg-UC4N20^`G-O;olblfud`X>ub)lp4j2@g#Dy?xVha4t=AsxuM-4U
zN_bQRo7=9LXKJ(Ohn>WWc`g=*j~sQEwX^?`wlOc}e4(lO7U|<#mcD+UZt(Dpvf7!&
z|GUpU&TRUTlE(V+WO%jMi*J{&hc=h~EU7*lrzE(1yMW>DV-J@pOiL<iX(>o@UZ{6z
za-3lb#{r?qf96cvyo+zwGu8Vqy`zpXyma1uE#bh&X|m^v>m9Fs`Rg`2qMt>@&93`<
z%4r>^UH7BU6&zVUeQsk!$)X<rKCR8wek_g+y&)6BOxvcq?RkC3Fz;fav9BT5d!LyV
z_jDdI=Ux}iJ>MBKxzSzHTPT{9VWDr~{`0<v4==E>+>yc}kntkI_0+5GkA~b)i?^iD
zE$C-pdcKV@y?4TzWzu(2kLxdcJ#|{+<<tX@4lqx?v}h%BLF|JW&AWwb9qeY>dS)^i
z7RN5x6;xu7>N<Cok)`>Yk2~*%&3eHwf7%??ze#f+C5wG&_VL*t{PukKy-2Ck>0awy
zb@~GLeP`I8nYDMjr%v=#t2?{rP7@B^_eZ^~`SF$6{+f3>1gz(CoeTcpy!<1h)@t3`
zH=lkpj&(hvs8%phth!_cx6cvRo+KsT1R=u-vQHkH#K&yq=e)SutT*{RkEQdiish3J
zue5sCE#EaOGP~q)K)=n#p8gdE9xI=I%B=qT@YAQKMSr(k^%lL*tFm_H*0i?<*@cfw
zf2+N-T|UojvB;F8x*4(FS$s>Ug+AR<=eCK*Vx!*wPjPQ1Dr;O|TC1vFB+@>+e~H1f
z%?}^(z5FXL_P6%m!G-Bz3l@avZvBy%cKZ9Z=LX-)rn$e=^bb=NaNXb-dOGi(ka+(R
zY5wI7N=Y3Z<pERH=IKwb+Us&zi0xxy$Yf)=3Wxhl4_=%rtH{>l>&l#>(7W|e=j_iK
zqT(6eO#w#)yw3kvIQi@z$$(JrCy(1a=9zqBm=bi@@~x!s-JJT}TlaE$n=$A`YZm<6
zwLO0AN7414n%2s-PyY1p_3diwJN#u*yPuTkA69YGojyytv?HtQ-pgDWHxbq@7tQt2
zD@DQ<w6$!MU;5>M$;)Los}#+cubz1pBkpCUYTn_*QzW34>bdSE=c3@gRRXJ4$d<i$
zTU5C=y0=|5`AqeU=FZeDjXzGLreAv3RG3*(dg-i#mP_arpT28*X5QF-`*;5P{Q0L!
zFRyLL6EnLXrDX52`CU+}h3l=tn%}>!uI0U>C%Y=j+tAo}cbwkOLkp}o@6!)mc+TMZ
zA#qW8`F7@tIW>tF{T?xg-JYvrtCDGD*e!8xwjk%Jz&pivefn2buvE^w(|Y>ht2r~b
zXV%&a)|-6Ok<~8y6#wyv&dzp)JD>MH;fa1dZ~pJ7pN5COzlrXB^C9qRKUe+TBTe2~
z?^L2~BUF~nY5Qal_L^hPLZ(A+{QR9JPcG5)G>K@IWIr=EIj>ic<Kk8COe6om-6i%e
zs~#WfpVE`F?&8Ax`X23PGM{|$JMrnKmxs}{kG~guIl5fYyWZaWsGr)$MDw@t)8`+3
z*e~?|&E+1ysZ2Y*7m7N!A6of@Lv!U}O)tSiY_rThDOdMSYyPY#J#&YT&jn+-cegI<
zcCzb<i9Npc_q)NR*N;`rF6Eq!wJ!Jj{dT6e`$OhcPSb?KLR$HzckJbPb6Doz^NF3E
z1&a2U&hk$b`*=oe(+tCTo?9<4=AX9+dOe+GNtJhKa^y0-5Tl6D;I)@8f17rrd*S&=
zRV$<R?-56uXK<#>Q8Mbg^7G}~IX8Mf1}6PD^IZS@vWSfw))qIn7sqUTz`V0X|4$?1
zx%Qq#dc3m^&wj<tX8d~Vx!vz4%#rDDzdJSgx6L*WqeV&nHM(E&xwN@WzU3ERnZV)G
zwD9TMg?sf1(l0*{*&HZUnd0%5C;b6)OKR!$a}&PV6-ky~&y;QB|C^P(c8+g+N3l@z
zv-_JJ9UGa}7X1u~udk{6`>?2T<=3D`pG%6fuk|gKJ$t6<o)qU9?hUQq7X=mk(%-D-
zTPSxl&}*frNd7clmy*+Fn`W5a<ystAd_HAfjnJ03Z8feEch8=2XUXB`Jh{0uc+GLe
zGd$v(90FKey{<-?i*AixTp3={`-W{!8P}#|hd%nXiKWH}#)+=qs#PGI&w3}sT=eLK
z-IF?vyMn@6E(GaEYaTuOX2I;~i^3j=7~Z@6>+6<`FVD)XZ}o23^51IZqoaFgRBtw)
zF3q(t>VaEk%!6${7EPDW-eo#yZ?biP#jiW_B>A4EYDZmpd2N;{cb)y6)gKorJ*c*>
ztKENh-J=4hQ`<UUJe_*&%$+wGCEnBW(%+_^SzVsQ@bcNp2^vZcfoJl38yB&x?BklM
z6?oHqubp=A!fw;_B{#%$Me}5NAFR%}(dD)BR><tal!;#wKS(@LsR>`G@*-n5v-@4W
z+o7)Cw%q91e5Sbn{__mU=XN&E3wCh2M6S9ZTr-EaPx9&8Z{KeJzAmr7UjOMPN8PF@
zE<N#}ZU?tF8m=c6zs-N2e{<zGCC^arm4#nRGQJz9cSk$=wsrBAyinGEYMu1%b8x}>
zu>TAfdsNp>OMNC~bzlucs=@nq-5#Zgo_R&@C%@5}rFghQ!A-Dai}!WI?<t2(7WarT
zfA9L4%G&XH-2;;eF%vmY%lzFkwNC83)k)E25u;_&*?EsQ9X=s=Lh|de$!~T~b~t}y
z2EW<!;1~TfvINz2e}49_x*{N@w6Rn5YTUsUWsK*8B1NxS{8nbw-XY9o;c}7j6aU_e
zcZ6(WDs^nm?WlKlG5mK!SmeWRy(f`8?QaTxM3nX{D^O>c{o2ItXKrDh&kcE-`D(``
zeH4tnbp2$S%a>WW|MPh+%b+!H#;JWb4{uib=iic0%P;poLnHluM&<hNuA#9qDJp$E
zj{mM`eAu~MUHkPH_oJ)#tp3OLy^ckr<@fyb)BN&zXZuyZ_RC28^blgJjCvQmA%{_M
zmhj&b$7Nn^Go84Xxyi5Kj8TfEQFT(!?1wta4@H`v{@#85<s`9<R}#5f-mEh_Kj(m^
zmo@jvW!>zbQsybj$DaMIz13i2K@p?A!b7X{Z!32!XMeXZJMNCym#TI7&nG?m!#3m7
zUCTZ3l~ZI2r7y@l?3dkW;hVsb*e$W?h&ku#Ouvt{JjZIL>0b6)eVFgy&rK2R$1Z&q
z+IeTmoZ4?S#|x~UD))vZRld-j<YF%uT9TB!^ToHQ_<b?)`(pI;L$#IG-<~B~o$bDP
zapZ#srcaJ|$*-SxF#oVsd;GK9+VUq$b~%N7efo6i0-5cx71lRCl{@7fO1&6eIyW?k
zCq$8J?aCHD@vXC;Zm4{cbKdsbuD!FfQdWeuuE-EwyW)1Lw)lQiJ&E6{DGqK+zCZDD
zn762O)3>rX7V#B3WwqF*WKW$`@TKOjirn7aCj-1Ro#eLPz4Ia@v}4*@uCsT}$(^dZ
zP&$XL_ui+vGj}KNocZ#hv`fd{t<n#o9{=;woqBqcPLPmEzPq!oK&t%|L7%@rcG*5#
zr?WLfSMkBbhsTQ7AB%dq<?!FKEsGD`xxcS2d$)7;#b2L(7Hj_ccRT-hu7AFJNchW(
zZzo)IT_xbF?-i))YBI6^%&jw5UvJZ`iIAPrC-+Rjz;L0Es#L?FC^Pfzi#G;u5Lu(8
zWpm>0-Io)LS}$v+Uh^wI_us$Sx$m}OQ_!ojo3}2mxp`#H1Dhh1>1npBn%vZvr1*RG
zm)acq`Oano=v=CM`)V(}tqRoj5-+yrTe)>rla9C2%J<uE-`;L+9Bn((b?ddPwP#y3
zm&|m1GC|8LaAEb6cX#)CKRMr%^KV0rx{c;#&n*&JrHaqjoU)a_CHnqsdHnJ9VvQDi
zFY8XuPQMVm@U^!KqxyQjh%d#0FRG>Nb=}|IHI4L~etPMF&q<k@AAVb(UjFL9&R-|h
zK9`(YJ$=e7Q=8S(ZJTyxvAHttmp8Cg^iwN8$GWhWzjph)@Tux0C1E>d4SlaxeAvaM
z_2^_<!*cB<d;b@om>SC=Jn_Z5CYQ(FRk{`P4%)w#`z@pXvZYht+^d3f3m-q{onE@{
z+j*rNJq_1+CU=5o9%HwD)}^NSVXjeO?Dw*Vdk%RPhR^-BOE64W{F<!||A~f}IR`}M
zo}9B*@T2w88}k3|E7worpXW6*Y3hTP^4Pv}k|zEy9hFoAa?0ndHhAhL;k2WB^Jl&<
zh9MVc&EKG>@}WkrUhLxkdGjXtudb~7?((>1(nPagCB;V{3(wfszVTgUUH!Q=v6A<c
zRURdN-~8w5WGnWQ$ERl&9gF?`NBQId#;b{E<xekp`sWMlO#WEzkM`Bxy*kbBbNdcS
zT-%%>9(V3eyjQ?&%Tg<$-UAuGE2M1y`q|IQne39#C#I99CI3<TFO&4q7B$<Qkq_S4
z+Gb4bEn;1M?()N%{|XyCmjBIOVHg%4&Ofuh=-ENpG{(gDR#EqMy9b2mr&iy!`~URo
z*W=e;XS<$sx_0*4_uGZmYi=s!TyoiJq4V|D!<-44%OBRxIBN9hJQH6@#o}qEk+ruh
zo@M8sWS+8a%k*{I{Lk(R-n6wi<;0B%ih3r;SS^L-uM&H^sXFWGrm2gL=ATvX-Yi{k
zK5fm;b4PZreOoH9>(wKtyoQb~T<WL33MEW_7<584rt`h8weqFt?0aX<daYems_8gw
zd*vKi*M&SQEQ{?gy7!BiH-*h<Rk;{5>%xmgsU_XT98$WC<$n@XcFr+SozrKsX-b4?
zNdE7YjEh1hDi!Q#ZVPZ<VSWGpJ%y!L-WKh=wl+^cJaD5y^l7E<O*T6$gPj=^Ll{cA
zj4oR!6f+ARdv`%aWaF0;DeQs^yLKLAy%43bC}h<x>!ZoWA)>2pZP`+DIEXoEy7Y(A
z4YI2wQYTH3u_$o7oAuE^<H8AR+nqjBJ*S^|mT>FQ`x5KE#iqiIVpn%$Ug_Jm|84&E
zU!PXy%#M5Xox3wobE!cY)5Gl>Yxlq1zWu_N=O<f39N*Y&n=LUlweeci))n5$Zclz~
z;Hus5{c-V=d-4iC$-a!Ctz{>-zVg(yiO?u~8j{e>Qd4`@Zh?xJ*I$F8WvTJcRVp7n
znZDHFf9cMu{yo9-9YZ6I=zce4eE0LP^XY<F!cE>4PgXoj)mh|TXz%b?W<Reyv&Hm9
z_AHzh|Bu^Wn3}js^p}Es^T%79>Z{9U8maKeo}cUdD<C21*ypk{el49(A876KTk`ep
z(bCQfM|8N4SqD6|toq6_OZs8diR<sDJbvOcMSI`R%+O7nRxm$_3795wq;=Q*!*+A`
zS;&NUip=YLzx*MSNU?sz=85SAMGH<gwRHdEh!%QrY@vDinU6Dj);=!Y{Zl^oz)zR=
zTmK}TdwTno_>;RDH_o_+yjDtfes|-8gZBGXN>h2}p4L0Bp(H1!`z>O@pQb<O<`!1f
z*6eWnvzPPl`~O1gj9ZRK`y97!cih_@9W>wa*_Yai``$GI>Ix6$o;dwJ>&dz6GZQ|`
zgz7F8@3;_kZq>!<tmkD@UZ+SL4F3J~?qc&l{{AO3y_HVre15N(?DBP@;*n0C$rCp}
z&^%<mU9;MW{mq=u?ms?G-2NnUeQdn^#tvq=X_1MO425g770+=l+3qSo+p;myXs(h)
z;r{-l*k1*aN>2~4UtsaOooHjfwwWu7=ilEuhxE_exI`?F)?5GeYy0%q&$IVt94Tdw
zywtbtKA*MJUjC~W?pMWr7U5(&5<GR`o$G%aIQDeD{dVnMsN%tA(lwu_=z8Q!GrjLq
zP+v7|;mP;MZeCrn@N(w!IoXpcTAZp^u39CSDjjt`;QP{Tv+pJGD<r#iEew6U`Qzys
z_b2yRW~sF1Y+D)9xbywd*zPbE*DJkwZeE(LtgE871$nV1q`kFJ%9^fMHbX?ksQ6-F
zT4`jc_9@qG-fO4#wm(U~$Zf*$Am{{_=n7B6$BAdXyQ1BCJ|9Z<UC^r5Iq?O@!z+E-
zO}epJnx(>mo6TZuK5pCRus-^t_r=$00maSJf>!T%^M1F7^|rE|(>CR%u0Fo@?RMoP
z!6k=(2{k<^+V%YDlA7L6cjq#d7soKT{&-%@cSqLh?q}W|6N+vqrrgw1YGPXWspDv)
z=tply%?yXbE4`viPdVS2y+~H={-(FSF0aD$^{1C@DVuRQjPFT;z}E`3mj$x}CtO-{
z_ul>d>$8tCTzdH|YR%0;>%8rc%dWg$dTr5B&9$bJwuXi3uFNnj-v4&{?YCk|3oE`n
z4h#x4Km4faRDnamYSlaMd;A{%cUy69v*{m+EoUy|@-PRAHSlkk&DeLw?p<WdbIvzA
zwS-RW{*~2rN}N;vn8Y&cj(1ys6tjJNV|}%6osDWwvB>v5`di(4em>AX=bJz0-fNH5
zhkJf8CGG3|Xw7``^U>$*_S^P_IxqOesG9jrPth#ly69$><sI`x&pdD{+2XOob<yhn
z;1K5Ew1cv<-Tme-Vw^0x{h0WJn7>PY+_9YSdCKPV1%BPBiPvl|`1xCOo2>n9=~J$?
zbw@(SIX~W?3l4mHSnW|8cSdpR^k?U{7zI9Btu;B~4zF&-g$||(pJ&K^k6PCM@uUB|
zI~(PHFNlirx$-)Bl4-tD^Oc8t|1wXjc=ntvPxF>8`>M1M2G=<&F09E?&`X-Od*;`Z
zyWU6c4`11DdZOIz_m1$1k6kohsebcJzn;9!G&lOBaMZK8oPFMWF5z2sPyJs0R_wRo
zi_L5NGd?5*OHCF}6S!b>Hv0DU9G3aBc;;Cgda7*BQ@1?vkx0K?__OO%CM}OI^(xt8
zomD&W;rdtCkLM+CO#P7a>v(flb}U2dp?j)Jx6C|u`E|AOS?9^k3lewfxU}TCNj7KN
z?26v;En8yNp&qNby@uzMwdX8RIdSIaj2^y(FU;Bs*PKPaZjFoEUUl|SoJaPT!+n1<
z+EwS8i%j`4F;5`euX~l-f}67(45h0da-Py&V3D)jbFv_>sQYcr*QpyLl#4G+^J*-g
zyz(9Y-ua9f7gGxv8)vXT&iLTqs<>|{pHkP{i(l6JUU`*!%<_6l&*ep3JP$eUY!6!X
zO7D~3_n=R)4i}C}GX^}Ks^{Q(u|T0>^Tvb4iz|Y4GnW?zMI?P>oDy})^|b4&tyjD^
zmJ4&8)p%w5aEHaZsC8R|R$ZK%mi8_v>&4_d>^t{ySKYlEwR)4ImBlnAj_QV#?JOCA
z3~w!R>|gAgQ&<-mvwrKoyc=Jx_1(>>Ub*$!qOTe6&fTuF&fBps?$y?5xwkdmX6Whd
zJMqM?@>nw0`%6(Q)1O@QnpN#FG3(}`l`r#7Uh5LF3|XL|@OQ&CzJ=4Xb)$FO4$x9!
zT{uaa<pE#R%#!)*nniV!jaII{tMXp*d)bWW-TU@T3)0wrP(Y>dm`~9>y%(1~cik?m
zuK)jb`~A2LZxgc5o_(2iZPwefvoAV*z3x8EFKJHo$DoaA+rR%U+g*Cd;gdl3w1)fq
zOtS-QTK+561|50sFZ$<Z?wj{j>vk-9^ZbauXz`BA>B8q%UrTK~re6Qy&)&`D{drl-
z78LEvvv~gEdB%re$1N^ACxnmw{p(s3D0=5<quor2^MZMN-)zj@7W3}C^3}QJzG!Ad
zbEdrZ&bSQS=MOvIe}A~8?9a8osz(L?o~iyC$kQDnd|Nu__nb%ETX;BctWZA2rtq0}
z$$^?Z*9Bfo^}DBHvUAH{R^EfbukXojwNZDe-Qsy&bK|k9GXELhn`N73mS5P<Ixp+a
zo(I2dq?;VVy=pU_xQhK`{^LAd{iOGSE6?@nyRxdx-2*Nj*wHKSY{HisllK3M&wnZD
z)A7A~{JX&O<u$XWT{>!L)XK3TMY-m>{@R0{GBXNvu14Q6n-mzIdG<MT=U;y<&kqM(
ztENl|mh<Ml_m(S(f8E@^o8c8NRX8L{qJpbBLMIp>u#Wt|9<O@r<wQ1DN#*cDpX)x8
zZalkrB=i4-#LY`}3jW&{t$o+;G(BBU_KvWezxBD@-&RJw*=_4JDPZX?f7Xis#|2-S
zrOBx$e44Yg$GuQRS?~0LN>7IPg93qPkBiJ-9P72GHl&@AkB9%=n$UKandi5q+&sHh
z*tu`-mnACq)veCReshxT-7J$e?Mt5g3WYP(^)HIObXRDtJRz2y(=Pkooo7;L@l7k%
zj+f$##aNGCVZJ%9>s`C_(;6w4UsHK+y*wij+~<1oQKNXtr1<-RpY6CF<kgv#*l=By
zI;U&)ep;|M`^-h&dqjecZZ$1Wu74Q(Zm;T(7Z%=L6TTli?%FbS6Q_v5hXAWNJs(5V
zdXhc+xEB{#pNY~Fxa2Co$ar&s$?>|wcb(2F@AVOuY-9bNcGI?P{{D?SL{2!~KO41K
zk=I!>YU-nRW~b5xKN#4ztX(ke-=akw8G$^HL|M4ZvyUg9bu*V$ZsF2w5V<>h|J&`%
zYP+s>=UuP5Y`Z1r*}r$Y@AbVty);L<;qSI|<=5Iit?8dkqOCP_wyv6X$7##6v}=58
z883c65&c%6Q-ba4+MG*g&irlYe1Ci2-n^%aq8M}ycKlA;7Alaz!D-;u=B%;umq+Q!
zs}~=Kg+9(LuHJohZOppqx!N0fllfn^C<qsypBbRJr0iB*&F{Cb?N8k=Ezf)U_wU(T
z57!DNMVrdC1i#GVTj3EXs~PTiN=(qay1M#y{%<7_F+ttb<^yMz?=*XJ+C?{Yb*QL2
zj~bKF?sqmne+WH^o%d+*>o3Vy&t0)hs5)6;?{$bdQ~6Qd|A^c}DO2X>d|;IAWSD#A
z?1?33FBr_-_3K>#^Lb1CDZd*+X9~6m-`~_S<BL{nbDq&hal!8n(;w=*4tx_l-Tuvk
zOFvCEPdF4g`O4FC7J`PyV}4do`k)zhh{H_s*N&a9B`!`;w*32LB5(0q&79iWa+kHr
z3R|6J4(pz0x$|Ye$xD@)FVFt|vPDvAp4_W{zR7M1>9eNxSAPH0*KVV|dU4Ck<nSja
zc(^=wo<Flc*6-syna<yCUzl~i?pXdpu`^C1)ceV;SqW38cTB$8dE7qf%`v-sN;MAU
ze-|97-}HyEE<MNE|J~9n>g>WAT0%#&@^<I%X3!L@U&dtb-@o;XT;Jl7pYhuyY>X1y
z9IAH4bT|Kgvgi1t-1sSOvD5E=4b%I)@g?)3h398pykX1mQtwTPEPsK=y-5y=_r88#
zck@U2_VWFn=Wi~2oS`sn!{y~^%zqlKQ>xg)ZY0|;_cLys`Ryp{KK&Kfx~t0#Pft@_
z{l{;v&-S&&0d-=D`TOgRRGm-eSn_4g2ZN^`Y7)mKev7Fbn69?K*zM4<ke4Z)^EbRH
zoz1<b@>TNTTX(lizE<7E7Qa2gI5zdpi@OIW6~B|Ses9p8x@Wphsj<I1&nFh)#@QEc
zmSykWdUq{rhO<Nq-;yhpZ7rOPx=VL-X{|JPe6neC-`nt4zh6FD&-AzW{Gy0Ot^P-@
zyz82@G~|`TQ;~;f(iNZTUF$!^5mxw;dlQpLe$IBW+ID_s-HM{gwQnTn-%Q!-va+dL
zOM%bQVR5DP9?|Xp*wfk8ObH5^Dq<bQ^Zu~*8bitMJ)e$V7hM#z*0sCK!@6%%wx;Uc
z_IGI?eH-7s+LICJSp4qYGd-za>Y`qOp`u!wSDx73TTz_7ZmGwX&=TFb`NFRb1}%7%
z#+c<Z-HS;*=5A|^^{xE)`}h0PK2&8+U!8Y7XxpQ9PS-s*1dG4jKCP~~HRrl__5H)D
z=j)htJh@`$2}~_8UX`)Zb?cQ!Cgz{7Z4o)9THv@VgGGb2^spLJM7H}HZja~BN~?8O
zv1)Mb><YcRAdB_giLLKntrGc^m{3(E84$Jk`}XKO%aEwz#QEuIY#%0Yp8IXF&A3b^
ze}COycjo(LwR`tfTKCP~xv--;`<@j4p&nmf5ye)Az}|y<m6Y=T{C)fOM)}Jl4{lt$
z`u34Xv2=}G`RUThS!+Wbve(Y{asFew&#Pw7tfv;2e%k!zT6{>3Jw{5^>fOTs9_8kZ
zds*i*&z2N8%^x}~hK*&h?#p69p5)N@RW@0%&!uZ$@G@on+p=%D&krd%*E^59A9Pk3
zzbLy`uFq&4)%BaR>$K_o6MMc}bliBZ$o_8I)xt{?>nr3Z&zC#ud3Me8=X!}f7Mqu^
z5PLS~rODse)#s<Y_F1c?;3_ydCq6xN@|PbfAF}qv?`#k}G=15N9JYf^D<&U}Fs%{i
zZ~nHWDdFF|+DDclTex;Tz5XoL<i(qx8tUt~ukV`BeS8f+M`+Tcd(Nk>ImS$L)_Uc|
zYxtslWqf7j!o6_^-^lE$NbE4V#(6mE&E4bs`qW!Lu|Ij!J-ymYeWuQ;i7wKqmM$x}
z53h~3+4XI;v7ghuqDwcHE^FX0Xqx-Qu)fB#P@nbrT>bLGkU9DnFW+zcZ1*TjHi~ao
z+=bVZ;%A@XFLC^2#Nv40!LRsj!DRk4K8KFeK63M=l>Ag|o@?>nH@x+DUEptK;o1)>
zUeW9f<*ffIs?JGo%n}ovpZRsmlstdaedlj{nptC-b^OH|Tf^?t3+MR;%ZsO9=0D@=
zq@_J!^UZae&&@Ji{^RbcoTha@j%V-|^Vi=~=9BxsOF8E8)ajKsR>&2HYDli!@n?O?
zr^tquqi4=mnAk@-sE6PBKc#lns%c70Qf~~b*W5ibEBf_?8t!!atq!KKOL-diD^2?2
z#n|n(prmWkobT%^j=kbo{&KR9+L2AucTdkdx#C#k#E^=tdY3s@7Oh&kqRmpG!F;pE
zyXyU~-xcRhnL2UP&XY4f-fz|Ncb~b+uXW7_zLTP5hu8SVYE_16B=E3n-tD<If6I(!
zao39umvvU`f7r(yA~<nTZcy!`+bk|;&Su^V$<`Hd+P-S*&dMi~zH3B=a<1AL;2r*b
z*8*n!DIZkgofY)1c`)-%>08mX%qWi0@v8StGjnFImCQlQoWv%IGqLaAcQb$c-vu*H
zeS9}v{BGR#)7iPz&B6<IUcRAWKJjhp>a97~Z+$zg`%d^elf&I=lk{t4BD*BLZEk3}
zhMr-$xO7Ez)*KgiU(Ov`Av~FeX89%gdE3wC?ylRFr5kms+iUHsF6W0*uZ)hf<StwP
z<%`_qrOaoRDCV}`z4!g?rq)k)InBf8{;rek{;H6-gL|n!Qttix`|k1N@2=bRPK*7F
z=q}OE*Z!1kzJ2gnj^OEy4&uK!pQK&O+rEAA-G*TPz^u-ojaQ~jsHp!dHZ|(;iH#Xt
zB`4S0?l$;%m-YMQueRq-wg36aby8|ceeZJ-tH{TVKWuMb`D>H8tg>yH-9MGI9T^R)
ze?>-Vd_3Ry;@qizzeewPwojes9=Qlx7;0!tTxNRUy}I1Q{wQt7bCx^*uh`+wepM%>
z*pl<k-p2R*>r#bQ8NHtW^~Rw?mebwWtB5Du?woTdwBhc>!x3xwSJenq+{umId%EYd
zPUzGK@5Y&26WhyVI&52h@ccfp_VHXd%lFZ4XTp}u{<{6dq#akL>TU|MxR&vKW!Jk|
zw+z29o?mr+BHIE_+qi%`>JiqJtJVZDOnCGEADhHinTn)~-!*pgOW%px+P~$y;ND#|
zS+YjE#mc|mVmKq?@yBxGy%pKwWp`z6b#-fBoo>G3)Fj61yqoLxbt}&N@bpZ{6Ao=1
zC-JE*Z|8XQ*WYt;sk^Y|+_w4rUbZ@pVKZb-99?Sak|)X3;J7<JP3Gc34L|h<lQlV8
zUoe$aonZ1&3g2~pzNMy7<lpr_V#+?e?UdR1Nn$_0M?~1<=TCJ$1q5B)y+@m&eAn_u
zr(XH>Yj6IV9=Yo7?hhFp`>spnD5O6B(41~^yg;<?`y?^j&fSKWMA{abWj?*T@y(Li
z(wPBPx?*=uGalL9k)<YIXV&p^-~ML3ev2<M0x3R+vJUsoY<%`%-}*Y2JCYA}Yg*6k
zt=YG^X!H9=y}RY#mfc7+44ulcRrUi*sbbQ--4=VLR&3EUlk94nd||`<SypKZ&ARo+
z=UdILOZ&WEW6ts3Ny1O<G&F02ZP%#3wNP8K|9!#xYrF3ja+{lPe$dPF@|@9~7i$Z)
zo6n7q<Xn9G6yFLZaRx4CoAYmuPUY5Inh~h0wPdT>lS$#rnRl;DwL03wAGKx0s+rqs
z)8}NXUAuEPBrx8r!DNQ?nXYJESIx6K)87O=k(wZOB<1Blu6Tufuk%+XB?$kVxANAb
zpnx=H8NFvgn?4t5AJP`-%V)oz5w@!P&c1ik!&h$2(@(8lU)wx^dB)>w9@D4i?gkxk
zu=ZG*!ec8FjYEc4&n9njFiHN;G9fBUBQ1HiLukI==N(r=6St?WDzb<@;>vk{uj}!r
zdTpndUk_gyzs+0sXaifse#bOk#+gz(ELgv$C^|*iJv>)7gTqU6X_%awvGHuF<*Dl>
zrq4gu@=gBu!*69Xw#?378hd5ClXl+PoB!_J%imYOPF%2mnKIL|={Dk-x%Kt+#@3h4
zis#Q;wDg*WVrcd>+j+AuL}lq}B^fTX5Pwqgk8{D1&+`vY-kp}q<aT=TYNvm8zbBV3
zlAY&yO5Idr{wY>DosIytfEf~eI*vV4e!o!&Qup`ye1CZ>-?_7MKiJ1Fxwkm8z5R^3
zL;9Pd9moFt4B&qC=}n@J#o>&C|L3A?(wnw&y?43v?%g5xuPr6do=pB(TbFz;u2@)B
za`yrDO~!ot&qSDhXL-bD-n-m7jnA-K=98S$W35+NY7(orMjCzRS%0s+`j@qFhNh6|
zgqQ8!7gXJTyq=)g+q(CgLKmyu1%Jg3e&<;WSPrgvowHf<q0#Z$XQ!UcSgKqunY!|B
zxbab@1lRliH@-CVlukZhm~j82%CYP6A*DZ<`HeoPuTD92SAK(q;%B+5Th$LsrX7-;
z=4Nzp-Cu9R#or9$UwWI_2Va=`sXfDT`SI8PRzKH#|JZN+rzbz3I}6-CVA1=BTb8kE
zX0qvupT%LbEsmsrnR0J&qRJkx>1$MW-n6=QxB6jSoN#H-(YgMH&w8u&Z~A$6;@>Ru
z69tPZ_hhY4oxihH@m2ZLx?J}qFScBJW3i?+@PdE*pRTO9yWtTrU-<oaX4cnzzvOYm
z{M$#jv?$N!9<fZ7pf66EhfW_kUvOvT@u-F+%3DkReYkm!KT_yoyVUb3xh<?xy~PY3
zO{Fi@mQ1>q%#*_Dl3<Y0nsoMizV*?!8TanYo_~SA>%ipY4muYuyIo4Y@JQ{Fk#$<#
zs^W#FYBI}3FB&bIzvq+ihlHkmem661o75_2o{4o*VX1r+=UjP#$D8GEWrgXT->P$F
z>25K+=urMKvqpTEN#;ZL2LXC|Z|>D?eqOe)D_)_?sW3}7NU5ns_@qz9d{=XBiIVv%
zcLv;<m9l9vgGThpRW9B>3v%;Yrg)1km%TW%vUcUtRd4mogDX;ZFA$k{II}W2g8g*$
zy*E-?`cru%r%%|iqxDft#yq7tC8^8qTg_UuvAMD9cihqSQQdj$Z_6xP&uW^#dmpwx
zXS#p%Zox-yKX`!7ZN0Nbc2VfI*Qvp(m3!X5wb=7h>WJ<mjZ3lROU#>Abp8Gz!Q^$i
zb^80=(s_?=iT*4IS*tZc?cde><LCDNxlz$5_F~;7E>}e}o63zw+@X`!2QFBi$*yj2
z<LqpSCK;E^cK$ch+r`zdu88~?-fGsM*S>S+%qdKPhqyv3X2kX#OWywd?!DMJ)~)xA
zo$pVYzI5x~zkj0_7`Ja?aL`;j)3Uaw{-m2(<MpnnsZPbGYs756uloD)`(JkE1!sJD
z3f~?oZkglyeI}#9^Nt7Yv!Ch}$DfYj*>+0k=n7@4jt?K#FJzrDbE$mI^6v~%e!eF@
zIXA!Dm10uLRl6%)xMSzC@WOu#$IT}j^YlFCk=eOHd)t#p|F1u8er>jLS8fVEq-Sq-
zb?)wcDRw@~XLne{E`PK1^_*R&-U)tNCY&-a^NQV*-&}mT7uHBuNcpUPCfcWAJ<mi+
zf+c?$Q@Fq9;XuyTWYJlw)=D$1jxIDlu+_T%lapv~p-`Je$TUv9h>A;I>Timcy*s&g
zj`mfD?;B<XDwMws`Ty3YqI)^(r5_6Ee+3p^vwEUboZG-JpIgUX^4pNz)qr7nd2elb
zt*6)iW%ayHZ?z_?GtS!;v(kjuSmqexjPna~-zT{ySALScX664?aXzp9zT1Z$?>Ts?
zKh<(t*uM973QNBmw3$e+j{I3U*T_5iedWTL+oXhV%us1>c6VTR+Wv%3>*igr7dHPA
zmwT@g|9XDT^Bw;mRn$LTb6?0n`Iet1`-{-4e?q@zT$?4ET=(8E!sm(ScJ1D;%Y0OC
zMfCCRznXL+&c=L(VoF5%Yv!qk`4kHTi#IOZ>CQQMQFXYS-fQDuO7+jJCLG$kRa5)-
z<kt`9@fx11aGf9jt^Res`VrH@`C{viUOatMs^h}Vd;JlgTdG(N`_BD#?%fNejOLZo
z@2Il8>6V=-$?7ee|8RnO-wMT@)y_{|?r!+l)>RbZrTlH#;rU4}t2Mems$5w;?d{av
z`h0Et0(ZSMHB{Y}3s%0V%@cNRxO&K}Hb+Ck=|JHUq0i5ct_^GKZW0JD)t$IS*LfCC
z&%TAT-P4XSy6Q~RzxDce(3S-)53gok`{iOe#Y}ZoLB5)+_gYpjYlVFmS2s;yvovCH
zeJNMD{Ggv^RNAj@feF{5ZU^dbeXV;%d7JXOePXSbj-Gvc!d5CIAoS0UhU-DzE6?6l
zd{!cSbM4)h`Ag%pO3F8zNHRNRX*RTQKYF%(*VMN~d-v?i(ENRS?c3axN~Q};PRH~n
zy(?=8bqKi8@3+IS>De0Dx*aoi$hdmvlm@D7Hgn$g<KDN&w+m~u*H3ue8s!>th3&iD
zyeSM@wSG@F?fIa5#VozN)+yg_`rD^U(yTt~4~Zl%-Q?34<s}$8z2mEB`?b?c!>&1b
z6h|3npMA-G`)oP;!V7jxPT`zKjju%A&NnZQfB!e))6QrP#a*wCJpL*9VNPoJdDfLp
z_VaT;DV|A^d~<}^T!Ynl*7@SXQ-+mFm0`>6ydOlSH(GQy$yrD%>p%J@t9n?r?Q;Id
zC1Mh5+6vdL-(2p;$LSu^owBAv{&u;@v67iTW=tq)<7)fCxmy3_!cFpfE-s&*$)hBe
zd!}4>s``!#KSC^)PC7k%>FiC8g2mGw&Yu~Upd&r4n&<F=g%uCZi!NL?XQk4dPj^GJ
zP6gi+xxOmC+b_+iIZ#ydMNG=G%|9jIUo_U7{kTb7_HTU41cNJL`;II+8FTcz&i92A
zD&=~({0>d?NxVK;D<QzyZQ3`hz`fra7GHOm>nnAB_W6syZq{!4cJThitM>E%+2^d9
z_j6in2ZOQa&i4~c`y?W2RvckYTPqRtuVZ1D`1z@P^TMA`d4KI)eOX%L<`Y~`Y*!X8
zxu)~Qb?#EWd6%DDSNavFbi|WSg?;MY=C6y4zx^|2KdX9Z?UwzA3>>cA^>z5u$^9W?
z^`R*5^Fk#{e@rg-yQjY2_ilfhn3%D7`R09#U&JW<l5F(ynf}g`qm6yCz%-4*{s{RJ
zcAcY2OLon_*?1_&dG+I@N1SVCSN)z+RQN7W&iTqyzJ?D1eBaL)Uj5qe^WK%q3-5pa
z9}_i0eEE!Gfs3VCGh^=TdFw86M8!nIOrVilIk8Vmz<EugZM@f}C+e>`?Bt&Fb**T*
zx?Ww~`FwKCx$hb7-Cy>4q?=o~SQ~r&UKbI{9qQ^Wacpn+t!#~}j|wew3#a|p^uAgZ
z*gB)~dW^uWz5AvFi!iCJeY50PYeDUD=7vdYltNn%sXbb?h2ewo)L+T}t}%!0R<M33
z_ri23w|B^5k#gpThB4nCihP#SW-gDi{?in-y+||a$<hZ=t5?XKTV|OOmFDth>gzKP
zJbs6Me#qb^^|dN=S;yA?x6fW0FW)`AiL3JNDQ$fhuN!wAi`!+kr=87WZFzTgu9NK6
z6d9{)FYeu{+j3Y>Uq9+q+0u?17hNQl-P_>Kzh?Ptb`@dQg#tTPdFk5pxXGP6e52*r
z6}PoqrBPpsCM;s<HE7>(r);W{{ejq}D>|n8zI^fU)agTi_9Z=aih0>ttH*L?j#BAr
zY5m;$#wWGU-(Kk-;>xIy^X%T9tlHYjpC8-hpG4JftF_N-KYl##Y=87^X=yI5skL^R
zla@Fc-zsG3$*8)ixhl%b@?L&-p@88;h7F>+4c2Gpo>Kf8xu*1m(>?p>JNNr@e;3<7
zi)vJUETgEuV{_b@GiSBmFSU|?#XRZDrwDIe#|k!%M}-fXRvxd4x?^J-;%7g7dDa5y
zBkp`BdD0~`e2zC=nP}Pfmc3ukFgAiI_<cZ5ZB#?Bg6*#d1ye5iFML+cd9yj^%KeUW
zyYr<jPWL7sOVU|t9Q4fON6+kI>`xz0`Ea6hhOgzr=ZfXVf6m;Wdt2B^&|vL}@KY(e
zqVrx}G5)hRUbTnyz>NH_YnxuZoGt&ZT2}qQoF{!D^G%&U{NdphI{GIkU0?9`tq;5`
zJ--!amZ}Mt>wcZNY$MN{_EN{o(VKbxAK&om@WV%!Ty6Poao$_WaV;ifj%bx3$CZHI
z&9l8+LquX6S)Wf0lut<b=xDi6sPcqG_WJX$x8AJM<yN#S{{2^w@lx#5-e8NfziuXI
zK5sPDdhqee3!$=uhhMusV~e)h7WP6!{ZYPWwUkgz$;`0#?023QwpNyNPtl(6x!*!W
zL;h+=ZS;~iZ_gwdSMU4uGRyHw+DH5N;OuJ#H$Da=o9ZWbwoVdmtUUF`t<>0q@A22R
zgSwr<w^-Fo4oKb>%#>~PZ~NLDBU;$K_SHY-w}Gnrmx~_GjI2pYENe<xBgEd&e(rIH
zV1b*GU8Utw)}l?V=9~2QyE9%A=jWWFDEsKyHJxi|Q46ishdesQ8a3hZp?TjfRr5^b
zTP8C__egLr<Er?ljgPN%Z@RRQb=4|W;eb86?cZ+s%P}*nqwHAvo+}BLetwM7ckM}0
z*4_GhwNXLq%yq>UAAj$*<2g~-a@hQ;?@|q(oJ7lWrXOxxKAJpdVb0?#oz}jgs$ouR
z9Ya?;bqC~Z@9-{5me}taY34d{#=F<|X5IX_@3Cw4(QiTaf!o&ZIN`^iW#xG?C2D5W
z4bxA0v#g7)XI<lUJu1tpIX%dQFZ|5=Og(Qoehuk>z&SO`XJ?qDZ7KV(V{7j2tk4$)
zwXZzRZ8~=2+k`y#&yU0OUAbSCHgmn~auUoxy=b3)&7DY>6<+1KOQ&SzY<q7L6D6==
z*Pfn@FBE0<R?W&(&EhjIEuDP(-0a|h1+0!O%iV*;O73Z1jXQtp^2d3-d$)i4_xASJ
zueaJWkNDg<aX8G>Wr;v}{@J_RUw?afdiwr9m*bxXy?whqzCP1rd)l=#XZW+Ltt%qC
z>=W&ZoUFdz-gp0=rIdR^V<uBn_R=eMl}mR{)^W{VyQ)Jp;(%eQ?V=RUHIc;&|NYVU
z|Kz^($*#RsM>;e8C0D*yRxJ$l>6e(W^b>=;yLSa!jdaDdk9>#Ri{G?YUidy?=kw2x
z-pxA^KK+Ek^B+4xZOo!Fw@uS{W)RL3!D!Un*LXF+%C+S(|Gp(FmgeQY3Yxb{{{;Wx
z+#L(<UbklM)=rYyF!AP-c?Zu=ls=fctaa<V_-l#IY|MAWE3!?_Pp->8Y4==W+V7<G
zrS6Sg%MLmfZYnLBJHIb_<@E2t5gS&;TWq;J@tnA4k6vB*PkW&$5AEJoObT~$nc5`&
zOJdhEqu6QZ|K5y{Do@hlX}o-+amBYqPZl58wtKds^7iF-YXn;TnpUiEW1A~cb4l^|
z3oRLUy}L383Ym{Qy*8QW-%XwRpaAjpCSr{Y;_>q0)<=FiKm0uR&d(nat%~K$tbAr|
zb1dgA|I_Ak$ZqMxht;xdeusbOT`YL{^Jrj+>NWH3-+6Zr3Y<8}a%Wv(sW8{h6$e|V
zyniCC!+ze&>&Fj)Bkr#kq;je$1ufoTUM^qyT$0l&L|A#_6TX`~P5u(`M|_&k6?&{M
ziA{T!&!IR+Fn%iEt#fN;d=P$r`O_R*Tg`Xx<-Xoe4EABQe78uof$!<ttp&fbAM`PF
zTGig0=AWDIXJtBZmvWM>-;s5BueML$y)}*T8fV(nJZIbHk32hrk7o(Y`S&D7*lmV8
zzn#xY2cZS${Ege$9hyspR}_W5%&Oj1{rujEp5(WU^>(WoS>psJ%ed|h5xI5oj?TUu
z^HoRYFWSNwA=|O~h1{QWSFgy=GZaZ;UGK_$M4?@!sQzQT$1XGDIlgIqH!FHMw@mpl
z&0EuH#?I}H`(E97rxfL??V_m_6>`I6$y;B29oFlsqBTp*PF*ZEV5@m^PDAMU{A2Im
zC$&%Y)?5`eWv6<2!JU_NZ_Y;XM+LPCt=jq`jqkhL&Xp0{W4b%u*4>#^zi-OgDZY1G
zH57$UF>5c{$Kp`>e68%;jIWEnuF{_$f9GzWnAA<1rY^k)nUj*vs+`%bG4tUaA6ZxP
zMh_Ov4v~Y2Yj^J6v0{bTLXq!UL9BV7Ua!xO_ulFse&tA_gX;@M?%1dDUAt!%vNwOZ
zd?mK;cJHH7ZATj+L0#3n2EIL$lmeFT?wrX~y88O{*UQh(FI>6*);b54Uq@RWXS^+w
zIV<&4D1Jsq_msYw58r;P+kN+v_3iy@PE-|t_F<dNGhHq(RAA@TmgQ;BI2SEGz2gZ#
zkJ`MBxZQp$<To$>+;c?oW2@-TgPqb<>%?aq-e@yj-2OuIw92NX_334$?@z>QY5gp?
zcz)}rsCSa*I=w@F83r`Ce4nfI?T^FC#-x`aygG*_6yC5~c5Tz{%=tB)dv7?+7ry^l
z;K1&cG4X6Zxqf=RkHueEc&fFjU6Ny~3vr9nv=;f#vPX5gjr^Rrmy(xEdNvEpVRb&l
zuOe~D^3bBWy$dQg_J(zP&6=N-=*@HO%%$WdULnC2>sL9SZ2mI!>59mO8iMogK76nI
zsjTpU<^1}qf6tz)yL@cr;|Pzft4@4yiEMK*db8yIO;uef!AH)83{U!&ZsF*CcKe*>
zEosYjtzz%4zC0{;#`>;+KldWX4{td7)Gg0*f1iBIGcVm#>ii{}I`#8EbIRq6mS48q
zz-LtPUH<&#SM0v$E}vk~X<6|5YTEn0FbmCdMaEAA&Mal#ar3Xj&0j}X+p2|Y-`yL2
zL-v@3d1F8KY4^f=-}^qll(jwUIPIm*{{OQRSAN>+*6`OrQTJ)(B_+#{RAqz4d5v4|
z`@Vjru=wOmMNWC~&=w`f!&~O*TRbvc&Ql;^{q9mtwcds`Ejl)LioH9od@!A_k)3H5
z_w4%{!CT2|76-jqV0uCDrd3f~dE(cOKnGVbUv=XWeOC6#9!j;D?@o7ac%{4O=DxMc
zch@X-nsJlK+sgI2&eFpM`rSVzA8XpidN2zuaeW-L%W&m4J-c>c0VY?cLzlK{a2{q9
z<jmAy5?#ABPt?i$R^mOQ1|F%cS#J)c9R2$w+(;<?M&OH^XDYX!-2CB&g6orj(hJG=
z{6C(^@ox|3k3XE#zd_@>lGfJgsn4EeE_-NnNGPi@%u$hR>!O^izM0qZ+PUJl1$A9I
zTXgc^1pdHjw!dr6KA)-|b>`0OQYki;D^V;#S(;n<3a)l&Tw0rRy(sl(PY~bTgUeoV
z{t^}3vG8s0yX*e?x#mUNnHl?+9k155*;lx?yW#BOS)zhF_vA$BT{{!nAAP#K*v?Ff
zPrx(2bhYd0u&0lHpLNh=WeNz)Ub~bdw(Mnt%(bWm0l!YC?!KZlC3*YYJu^i#zea6+
za@u;*(Os8Wo`>I>Bi-9vud(cQ9Y^H0D6d}g67%a_tHiFZ`g-V)QrxQ0Ra#!24x0i(
z8FrbP8*l!5Bmei?4EAnUF_pP3>{A+^z0c3ze)~5A+wmoPrZ{cd>h^0}?kcS_ml>O)
zv>6t$KRR1E@q^v&k9;pasC)gXW2rdz<5`!VVgJp`uC@Q-9!##^#~!z!dZwIjOINz_
z6zh!#zaQBy-<#}o_Tu8zyP{WC`wP#xoV)cfcaM)^-%FW|cf74SEI*rWNRyqXk;{GJ
z)9&}zj{Yx@OE1cu|1M~Wf#B4N3)2tHwNg$>aC7QP+tIZ<Tm47x#49Q)^LxZ9z8I7#
z3aan3ZO^cJx?Xs@+5Tn|qm)Fi)=w>a=dZXe^5E;=(l&Osm+PZ*4xgLH;c#xTewV(h
z<Aa68f!{Ag&7Gi{U1<60;)K`@CFc?rTz5Hg$??T${fX7T?zAg)eDyljwCYH}hqX(-
zeGd3FQ|dXtg@OFPADed-);Z*;33!@L=QeQ(DP}V*xM3*C&3I9B@2uzSlWR0S%rkiv
zJhim^#M-rbVVXDoZYwww`n&X!>n@{7+n23u@(=5He(+$5+ILgY_E&75Qk?Ic{_DfV
zsMevxuD17W8{fnWnTraw^#5OA|10suMnCz&tS5h#e^8eE&-G)i?d|s$y<AqTs(x^2
zar?QO8(5M&e?Ob_`swuDb)`%)OaX6f_n4`FSF(GP5j**Dah=7brto_HYr5ic%KP41
z%`-i)GWsd&Y71|HC(j;wvmUcQFRQ-toq>4tnc2#J-Y?mGCVXOhl-&2ksgK{@Xg<<&
zN$`Qnbw#slcXY(Xo8m%(CpRp;dio|uc=$nqDf6ckK6!O(LA$0&EW^jm&08iMyc^B<
zaGI^>{OLzeth8>Jx+`(_wIJ@hN2`@pG@^v{14CH0bnbYkc`Qct--$EVH|}|}a^jXK
zm(a3@dQKN7ZdtQ5<-viDqzgwMZ(^zx=@M#~duHp`qff7`pChI#+Vnx?MVV%9`t9;;
z^Kz!EcX=*_ME#$TcIx(_9fu@6xVpA?c(#U>8Z(A&y&}byw?^TI&+7|LI|CSUzwCYQ
zWtuWo<6+1Cxb@#&?yApzdo4pDyhvu@!~&V`xmx%0?%#VE<|VjdT9o+=iOjwC0u-41
zk`^YnE!%oxYh0gp{<AMZtFKk<jM=uU@yep#8;{H1jk?X^nyURYb+wM@tqqsUH?M!c
zW$nJ4sOH7H4m`K(Fy1%6tw%#u{jRsBtW$m(AJ-NUN6oKW_pOLMWxa1}`6(SXSBXnC
z=GngsWqxmbzx{F9?fd<2H(k|gxRBJs+<N=tw{Kr9zG;f|td7sxF){n%2TAtD3Kn0s
zhBa<E;eN2>+Na1!{;ofj``;$<$j&=zxHsWiP3O7aahET|->ARC^+9Kn+q`Eh^-B*;
z`d6Hu9GmV@F8r9~YUz(_CTG;Rj2F(>89M#K(O#a*>zL*~jE`urQ?~n@Vr_T7_|49s
zWRbs(8yv-q_~(C`c&sE#Z}D-3UHS7%<{sO$K1ErwRirqp<9<m(=(}t0BA)e5n7n?a
zyWdx@_Tb}pYZYP}4BhHt9n7!nk_|m|t|CIrVD6TR^j&i6H;alL%Cn8yl49xC=Wugp
z9@9I=^M9+4{%|V%pYk!}*;x*6o7E5AbH7yW=B>XY-C2F@Xzht`@rXG`gOdZ!s_*o;
z@Lu$9O_J})><1#HA1|1l^|@Ec&%YwsZ)(NzS%**j60qaVsR}uB=kE*l&;Q<^Tzzd4
zyE<P!*NrXQzuu*Y^RHV_ZGSjPtLM|+(?!ojW(B`C=6@YwQ2g*DW7V&a4PPQZ3cMDm
zJof8_zlGqm$j2H1_F12=tue9D;d*tdK32tI<)u&O@{b5<-)m+&*52W%{d<G?$1N7e
zt_rCyKDAxrqw`E*r<$(L{^b61jPE^#MStu#_v75UBsrGDGWyz)9wtgD;!!2r1^HiF
zCP``KZHzgg_jjGI&!q5al_h3pODBHo&*9zH@hRn|>Z%8CBwp?^@ROPU<Ny4v_1V*&
zcx^H<j9qp6jK2%Ri8HHS$$1r9sJQ<4|Np{=)U;nlcdx!#<D;O|w>$UhQTDs$#@~e<
zL^&H%m>3_l#Gl#rGIedf;S0;>292AZR(oF;`)0UJ$E@pH?N>)uk*G6WD`!r4$18r9
z#WF&<p#8a<&eW+}x3RhkFL9bYY5&o0X}_08YySTD_n?+g#^cR}J631)KbGBS9ipeX
z%t-F>^1x{v9>SVx28BljAKgnkGgB>XX~2wCvJZ=%=|_icZ4>U0s=M+*;-cD-Gcheu
zX}(%jDvPB86nkD3$}}BV?tisl#SXt|Y#XIq=d1ZDp74HAz*ZXETE^*^k`=MTzg9?+
zF;w?pQ1d3)l(!*e5m9;?LV^q1a*ah3*T%7G>v|uLP<I!b`19iN--7SL*5p**EDPoK
z)pj)2IG6Ii$5Jq;^Xw_<rgr(M37TQsHs#J{TAC3kbCsXJJZMEqW6n~W1tO*woceZO
z{}*zdH_WJRhWN}op3!|L_a>Ad<X6)T5qa_8R6@)327@<gQFiseLh3G6BouZB+-^GD
z{Lua5V!7LeGX0M$LUxKXA5qX@y~*kE<L}?uzbC4lySFx5^O}{<-D8_=lpMsa29+$y
zD|b5C^O)g$!4bW*KhB-O(;X&X-moZiONP@+*JyqJ%&%wXR@{9zN&k?0`-3%!aU%A5
zKh)PJXj&Oe+<N-=u}xhq540>T&Q4m)T>ZKEtepO>OWS2vo?f;`{QHE)uP^J*8QwI=
zFXUi*{=n$rY0su96JA8<p9?wOmY>f#>m<))`4e;OFEINjsUDB06}x!x=-Y!k|AdJ?
zHaTCkP)+yt`-M-ZF&e*^|Ki4M9;Js)29JO0793>f=$(G&YGYjTdyC7zPA#mtc*gd`
z#=vI+NuKgPHF`6b-G1?V)>ZBH&Cfr5xwIp$>$OUN(&39c?Ee0?Eqt``%QOAOeLs?%
z{wBRInQBxpSE2q(mL)?N^Ap=`Q}(#5a;#r(C-Z|RGFSRwz(oh1<ivQ%jjz`&uAg1=
z)O^Q_^s2qzt3N*%*S)rq>*>m?Q(p%c3#!LQarAKI{^?{rUi!<oQ1H~etSc)WS=G3=
z`S(@bzhNrSDe}r|%|GK3leuSCH~j0a&-q)$muAcPL@m4cW8ax;znzbL_%&TbGdtmj
z&b!}-L!4KN7(UbRe%5<xx!#+bKdx9b>#jMq`-h!Tsd9vBs->?|2<tuNbxNt*4szx_
zJaL=BE+%l#mckgvT<>)szl1$EH8Q?$6<8ZGtHYtxBt^*jY`W^tvQN?rCQH?L?UX(|
zE$5naTC}e5LWb6;$DAz8UZI@}HIvPcyNH#&72T8>sHA#ggV>!PMa?{|qGuB>mh=`J
z5nms8-?o8WY09w;>?<zJS^uSJVQq%VtFRpXrV~-7b2^^x^WA!QQMzoL^Sntd{tu_x
zI%prb;cCVz5*old@#hAHkgIFfpDeL?8T~AkN1o$MmxmhH^%Z}wTuR-~%(ck6S5WSu
zps3Ky%bKhjPoy_Q?leB+!ew%0vEYRp4Uu=Z8$D!7W?`&K4pB6Z6fBa_le@aNYJZ)^
z0yfb_@4}QrRxRk_U6I;<YI4-v2X8;;)@#3yXMHz$|CNXjl`ZUQD|nrjW_N^yO-j{%
zx-2V7{%Su*xq*CVj>qR$Z<Ftc>L@)FeD!Wg_lm5YcejY_wXfLE9=k^>J5cPfwMvzk
z;ha6yPn!eR%zC?XNyKZ*GpC+K&b0^)*xNC+VU}6%rb%BOJ8U@hvEhHo@0XwbrDgk%
z2l@ni1cmS@#Wi2OduaQI_TRRT94!x=4cT`0p50r)bpqOJ>iDk($EY5;^5W&Pt;Z}o
zUCK^{c^P`|XPUg`SbcYOSl#<6hckr~m#;OpPkaAYd!cR2n-A)9PhFnSC%^kd5YNI6
zzD>EiIR1QCXu-mN@vLKJ^r?rnTROeZv|NpBshaKI%d0JvY^C`9bn>@DT$ySqfp71a
zu|x-4Nn(#*f3wDXaoMkCd86xg+iVY=za#K%P1U(W(M`HLC(jMOnY?z|F%EtGmYr8?
z_{5k}6J2!wALnM+>wdLm%Zoz^YSm}v-%0tR-v9p9o5^j@&TrUjcqqHou6q3io;8mn
z_dUDEGvnTw`>y6S+XQnsJbfoO%(xL{<?LF+a=FiY>G~e_ov&N7-dw7zyVAJ--}2=)
zj6Rl={T8qFOFJ+{weEsJl5Nb6BY9d6mt>Wf>sU=a_*n7$t105F|4K4r=3PAh=r_Na
z%IBgvIah=?XZL+dORfK#8&WX$&U`jU?RSfBrft5*_wYg2mh;n>1T0S}()*b<d-t9P
z_x^tScxzSrOs<#psXZqvvwx^uk?*{CBGxeF+4n_vg=2E}ir0yr656Tr+~LL2_|u=m
z7m9qBcQ4hMrLm>r*1PTUn|@DWsSJLpaExQ}BU@)3vH2HfIO|B?zJCAX=CuzW<saht
zKdtguaitTVvf;dU#|y5UdexH=_GNQ!>B)&x=V{qgzB=!9;+}HmDgMyVt1jI$vbsN<
zIg)Yyos)-Web#x4DH)fy3kZe^Kkc|K{7v`7solYW!4a>NRFsx~|I4^oyu<ajuPblH
z^?56{g)Q-JbD2=+qa^fY$;uNN+|e_hW>^-4Z9BE6F|6)>LZa`vpf`uk@*GgKxpCS)
z<@iA!xl7YRbpwJ$9TuN%G}<`T&rO9nTSxnreVAaY)El+m%lGC)^*cu>H*K96C0Z1`
z+tKw$f=grDBB#`(bH@$%PA6nPDQs}sZxAwr`OOp$Po^24UN1NjxOEpRrzMMfhkHxK
zG;4zy4UAmh&ga|zeLB6M^8c?on=IQb|96){+%yH`bib=q-^y4~r4bphZLdi1)@qwY
z*Y0R7Ikrzds=aHa=pCNCuW^q*3YadhxcM#D`1Us2^#|GcwjVA(?V%l_G4+FiS8d9o
zIM<7t#P{C(mdS19?{?s6-1DD~mN7kY6CYm+y5e9cbWuFW$x`ks-~Uauzh0(S?%6MH
zEUMY%|NCF%ZL<@*!+$wiMYe7gxl?xAyn6T4?dP}q$LH<bvF^C}#Dev&Z_ch(E`M*9
zJT<4vLnZ3C&JpKVA%QWPP5;|A)lEz|_lV*2w5fBn{uhV5ef?q8>-GCPdjB*>w{G26
zu~oWXK!N3&xYUn{`+j8R&gEL|s1eouTk(-(z$}}2X<>?Kn=Z^sIGP%3z^xGZL?yS*
z|J9+CBda&2h;^*;?LKw?x7^MZAFf|intrHGa#vx=*JM4XC7-tX^Un?q5o^u<z>-*#
z^?~EydtV9hd1szVSF%;E`SRU(<*x5?nYDs*BYB;lN(Y~uz!<c5hulfcvq2>rC$H@l
z^7ngxN51FBlR4X;<jcwwKVo}XUfI|lJKs;;uj|Cjwf~&ne4b^sf5*#K|6?8fPwc##
zK3X3AaWZoAlgsW;JUA~de)h>vGVRgUN7a@&@+)UJn;ufV|H8d$27lE&RgKC(Hlgmi
zT;5~HKDT^QdB6C9Ws1~km&R=0(<>vgKmD50{y0g5b@`<9+=EtnuBFp{965B}%Gk_h
zdi3o2H*Z*`F$SsUP4bCcza*(7B4^k8XD?+~SyF%H?0BQSJJ_Wq_vQ@`otAAH^Ox72
znIqybTS&qE>xC=NpXi3E>n=Bq@LPHDLF)3OohA=2bGwB0c?dNWNS*4Qv*wzoe27j^
z=+g|w;z+-<oc`RZFJ>p1bYyZHbEK~ooTqi(LcTmX!)(``!k4!yHl5qMhi^e}&w3H9
z*t^UZ#U$CA1DyosO<Z{K#je_i7f#g%iM(;H5I)Q5wEvva-AQjXlNbb4vSX9GT+^>@
z&^i6RhhK{G%&fT`D+69AX6lJFb_V>7YyL3n%LKunDn7~UR<2QZj<=o9d0n3`!olJG
z(QdIutEFs5gHqRCC}CBQF#J?3anwv>ThO7ItG;)1NC~O0un(G>(K1W)M&7ng(PphB
z&-T1uF;T(Cs`ti~RiC46<+yU#x}IIAkt{k9axE!P$jM`UoP1vN&UFVXI%ODE|ERv3
zHD!W>jRgN|odY4#s~yY}?B(v1-KqQeY4fXZufPBP{`>FmzdPjLKJvQh%T_JODS0a3
z{FF&cB@e8gzBcFgG`;Q>ijCWPK8YQ8li>aS7O&}}hxhl@YP+x9zI{Q??PW){z1<~h
z`{Cv7LSv^27aN=P;(|hgC2mVK{=D0veX=N6cUGRqv0oNPA6d;x6XOyTnI08;HP~<d
z0})@(=Fk5s{`~r}Z2q?MyYgo{6v*_~*8jeId-?bIb-!+3zrRoJz0k~~Zoz+RZrA_(
z^YiW7%eQaeetWs@_ZH{CGxzq_$Hs;JKIRh|+W&asWa$-ME-4G<dT6d%c2N1nUxqjQ
zHFlA^|9wrC*Ae)edZXcJ)%R-Uua}iS-k-GPsyati?V0(!>B*v(<6jER_~lr+fo1WN
zCvwfj^SAD;=8@%UJukM?ZJ#}F-Sim}9OwFLB|FTUl2z_~<n8`4r`G9w;-jodkq+yE
z=ku?fbT`6e{xeC-LaE~)ZeH3j;h5oN?x-p3(Y1R7oGgw?%d>RwEjxR>H&pC+j856|
zrh+T!JEq)~(B*1rwh>ys#(RxOP-jQY4wcrrGsiYOm?1fRMsk6g#i!N`_sf;SyL{sl
zRxL_qG~cXW84~`*Z~lU^+bwY%4305Am-VGSeVxA1!RgsI^(OhQf?2VC&)4p~P`pg?
zWy_Z%0>7M%rk1&Mmed`+b--82%750xb(fo+k|m{WPA1evyohgjChyC=Zs{MVlM+U0
zPp=%%*(9tJ>f#^a!BBEcZ8PT^jmt)}Gd8LnJP^@U*=#*8+WblXA>obetBTVL_0pCp
z3&}^6O-d*{#J|1Fwz5iMmqJ6q)s}r*0w2{Fb}tg-R@=AW@h4YTCvmG_`NtXpvy9(u
zEU%dryZ%wN*_qVcLPjh)E^`V_zEo9u(X_>EM@PDaj(gp+L&x77XA7&o%#pRnY}bPg
zf;@X|)92m$wOntJf63`@stmdRN?R&;%fB_Ov&etow>~A4ZGHK^w7>-k(ypn~4=pfu
zias;h>=e7J&%rsTk6*PFUhH!r<V}yuuV;H^1%D3Pk-jW*!@`#7M-BwP%Kx7r#>Ma~
zW6lz5#Sf<}Y!10DJ-{%r;W~$SXlkRpZrql)(oL(To8IZ45dE3)xPjZk*&Y+z-4>@x
zE<d|;;^94P?_aO{ntGpYLfg4x>m*G*{STb%P+XC>efC?msvXvsey)1x)aq(y8!K?<
zJJZ(kes;y*X}f<mZ<uz#>TT-fg=S_CC2J2<*`G?&IGnhFr|6e+me0&Hiv>(9CGJVS
z%G5N8(2NfG{q1#iZHS=Edw#C(=jZ3#m%snM|2_Zutzmu9UFm$YzL*L+_wW8)7#=E^
zx4Tu*Npi`OAlrW15IIIEk)uo4=57s}=+5%})%H!f%a-0?`yaI3SF7>2;QOon8NYYW
zleX8X{`l|v_uJEJ|L<9-p{kU%UoE%5pvuqMFZq5#$jTMlB8z{xT>kts@bTQ^j~o5#
zet(YtwyDy#?)UHe=8xys{d+mT?%&V+`*pvke_y|K*YyJa(7&Iap4Q+0XV3fXf6H!v
zyX&)dN>ERV?W$__gU@6Wmo61ENMZX_Rkz@F*cp>q{|XYAubq-x&sAP$u-4vC|6*dV
z?d5Ej2?c$vs@Edx=1=~(*oiMa`i1-F=f;d;4F7I${Of-iB6inS+&)JBn!7O<tNXJT
znV-xq{JZ?}+PNDOgDa}CUQ}rCr$?wuZYk3XX}x8nx9Fh8ZeNMzGlF0KDW6|@H(1s9
z>NOkA^__zMe-+)c&Uh`;>p6jqf0m-!H?OLgGhUOXh5FWM@h|`Rb(hXeE!m((h4T!r
z0xz$blW*ys?(3Xx_ryp(_VZph15F2q3++#LZY{a+^kGZl))QT;*8cK~nLqJ~1>5((
zEgY*9LX;kC=9}Fh>)={rqoJ4+bcox{lf{82>FTXjjGq=yJ$v-br^S`;xZACked0QO
zdyc*M>Z^~ZEHId-buiRz&w=0mZ*RO<=3;7@Ba^W^JMI&A`9bf%TQm1{`E7XUy;b;}
zrq1gd^-?R=dADVoe7Uq`*>;8RS9<xH`c8XqJKd4vf8o*wX>V`$2brapFaO<;z!SFg
z#DzbFTmC*j+qLH4L;pnw96$Hwoe?mZr>_#aTQ+0nlw0pkMe+m~fA{vfU$b%Nyf8Mg
zd0VX(Z;E%bWBy^Y_s@|Bt|rc5;?Fk(mk0=`JG~G&zp|cfUs>2k?%8i1d|r{ozdYJ)
zfwD*Ts|~wW#VuR4Z}qzkO{J4UF0Cn7a66H}AJ*Hkxa!HR`Kvj2^=EB9x_Ij^m)*@_
zyZ3!wlJa(D_us7o96Mb$`A?l~lO1;Zl+-<w{>2L>huz6Oo>#Qy!SMixX$P-o>fUFM
z_xy2APv_9fN%6^X6ILBvQ0THZe7&Wwx?gztuKMcZb9!GF#OR;8XqMoh{mg;!YRBB9
z4GMe<`{!~Tnd~qnb!y+#^4)hf^>;Pb{g|;e#ia4VXOZ?pX4<#aCRAPNIIgxWcelpE
zUf%hW^Ox?^3^^oeX;LP{I*ogB`}QZV(t>4KtEF2v#IG&fA-^v-`q+_Y*M3fMx{_`t
zwMDs1TSQJMbXCc_RgyWuihbV&u2?kn{NBD>KJMGv9Mz+iajo|@Z#&?qeAGks>ZL6|
z<lJQ#POnSV4igFe!%@}bb2C6n)4eGpa8_G2lS!+A#A&rGO@rGFcg>i-zv4e{x$Owc
z&aKW$Ef3cOIqJ^uk~mZKsJFy1aCc9M&(tOPQ*X5GW>j6kBic3f)5<$tk3ZJ+|K4t4
zm;XCm?&FVd@8Y*9#sB;B^s{}<&oA@;Fa0BBYO{xJL4N*xyZWD*x4R-QUJ|gL@TtP)
z-S^*TEQJ<sQT3nXW3*?R@QuSh+mo^+w@Nn&{HT$#OK;mFmH9X0{@x2Gt?X;UcIm%9
zqSNYsW}ji-TjPW4x>)}^gr3{^{^;c?&s%r6MSEz@%jyn&xNDwMOzQW;-_A<h+;XJr
zu*3)Md2)Y~HM=i2)K7i1#W(KX5|0&!vu^M@wEPl!W^#HB_v<;_@qfN-E&cyeAn?U!
zl^-p)B|UB=Iz&2c{M`3uxt!!8iGz{;d^ayodpyOFf96h~KXae)bPJR}{A+&j@P%t%
z)it8qer}qUS-HToBw(xR1iN1<^RL*wm0m1(E~?o(j{l<CM{$#@DN<aSf{PqlI@o4q
z<=xW1&8*jw8KHE8(`U7Ck(F$GR*Tryvk8|>zMICCpS!!+E+Rs`;M6PYe0O#>ma4b_
z?=42>IK@}9eA7%{sXq4;r|qrUo3%j}Es1k(-Tf@Nm5t{y^I!RA_a>g_UM1LOHRJo_
zjbDWJ_fO{RJaE*-Iv`+jjM=opTv->h!#2l*=lPyG(pOfibYtEWlL)PotUbHF?W{Rx
zTenMp`Q#<rpZvX5Ajs*;$kcS=i@C4NuV1TnrDU*qi6j`FHe=U~lc=%3XJwaobcdPC
zly5%lzl+!X_nP)&Vtx0UsX}!EMuN$kmrnCe3jJQC`?=cY>)uTjOvj?KU9RZ;?K@+$
zPBMA#iI<08ZatV|wJY%>uWca9FR^eo^=9Vp+<o5Jdjl3U1nEvrxp?yN%2O}-kA7ZI
z8+mgXlX-lTp2$5nEs5wQsunqtI?S(sm7OgxPvH7x%z5@=*Y7=3dACi^y?^G6eE+W%
zPP4zaJQbA8?$*|wxpZ?zWtQI4(EQ~`W=~x^MZoevfP?AB&BYb%mRXInrY+i#;GEd!
zep-bgWnH6~X>hRO>%S|$3Y6~dtvMZ-I;kX8<g)Kq#eyvjS`up`<h&*tzxwv>2ER+s
z%Zzgy7qwl#!IE|2)TG=Khti~dpKo%C@IAsZbp=z`rq2CGzioQkD)_ZfAasXhkLdUN
za_*L=!i=sQ?b;g9?DHw$YEEkP1BYdMXQin=?fJkQ5TO}n(zK@6^h)^@=KkyQat{T4
zHC10$uyN%yzd6bv_s%rEXI1!OF-~#bBh|Gm$~$U}c8f|V&HuQ`WghR=1isYUdsv-b
z_j@!ton4bN->&x8=jr;#zs1V&$!YCd?QzpC_vz=`+xEWw|Ksc1<$G)Xe(Q+3+Z=Fv
zo6POkJK|kjp8NxWeJ-u5JZ6}#T3Koke0c&#XsI@v!Ha_yf;>M({#H4r{AaZMbt3NQ
zQP-bIMc=BuCaOLtzMy=pCA09++|^7ar7Lx&dChq`F*atO-Fese6Mhv^tv=4rm&Cip
zUoxDwsK;h$z_}0FKFg{ehrE8ZWzuKo?11)J7CBE+p5}#UERXx!nSE;VvIl>3<QwIe
zKhNwxqUx%ux=8%7gxIme6E2i_`b}oq>ZCt`QQ_oTH@!P;mh0dCpI^FedR*`MkpAH1
zU!9^2oL+^`d&poLr@!&r$$mw7XQQWDO`fM;Zje24V@(>9>z0LCZI72&ReT7V{_Y2h
zN+hcR=Mn`$m(AYW8zg#cLuT*f3zFSc<ve>=!ps%CeIj-Fs;&J#s~`K#zH!djCch@-
z^P<>iOxjGDzk<a(|LFXzWa{-eJmvdhW()1y4O$1)iob7>n{fT5`~O)Y%fot(d)8@M
zd}WZ$WB(`ccK4jc;u$aA^rht)ssBk$ie7Yd3#V!k%T>l%`j;nuEID)TM^E5I-k<*`
zU-O!G_=a2co)>cA&-b^r1SNBwzgQ4p;hvu$WF@LvDD0it_k7J|dxf)ex_&-goc8L?
zZwvd6-}cUu|Ig7|qi<`r*gxr$<So;}8X;!J*M|${n0IUB<^?@vNjW;R=6{ianW@iQ
zf%%&rJ}drv(8%|R<$>Ci8ODm1#jIk+oids4bY?wvh(8u)sP-<M{moMSH){5=FD``m
zZsk{7QsEi;`n>YDy<BQq`@+3XPV19;86)!S_lr(Ht*6^^x5v63i8i03-xj<^_3|bC
zbQ7can6m5qQyY$-O<+p5VK^Vka3Cdy?^WPA{aKtlIvV066(4+A(z1P@OybfhwZ^)E
ze=BSzsrIkU&0%n2WHz~dG2kIvPFA~=_sJRY8@P_-xn?l_IK0?(`*UXp*J+p3t@5^7
z*{<L^I)UpvOH!j)b4QDG=7siI&zI<(F1fsErdYT8kG~I#B3^YbQJBImf83e-)V9c*
zAuqR9)pm45aqIg<POm;&`T6$kv`zQ-@7;g@{Suc_!ITSn0hSRxZy%hTJNxCbFjFOu
z{;tOsS+-u9w;S@57|jpOo8ja7%Wy};EyqnxC$vIEqq-tb3cC6QPhNF(Nh#MQ(Hq}u
z<ae%8x%nx3b7J-HFW>s@|2qE++5h+S`lv74w3DiT|M{t3|LduK{jZmAKiAd&`SbjL
z+3)xF>lRlFd94&UI*W1koX7e1_wV1ozkE*BY`-YYpBrSJ-I}<2iPlnw7sed3BIbSX
z&VL^D`d{OtI@j$C>#uyver>;Cj`*LGONEXGEo1U%yuGDy+pI?{r=ndLs{R>9T<<%b
zmB;d1M~myjmnoU1)jhxL{zoke+~;=d(x)|Ys&)Ne!*aE{WA?u)VA*3dp)85>+Tm%=
z{X!zS@B1~M$xT}Ry6ms}t7^OckFz#7DQVwrd;B!!*JG6x$BbMawYE&`b&^`WBZ9?v
z|EH<xzmnbGUOu`-A|m|hW<CBupSrI(-;eF8c`#SKa=*9zrLqafR<1mrBV5IHaYjg_
zYKuiX2g4N6ub<bjzxQ;r<m)Kot@u>w_haH}DJ{{HSL~)RKK1IX^oxoO`?&gD=X%@I
z;uHDw?*5JxzYwbU`fTrV>q6cG2PGvff803y%R#4!V@+AB>Koq**4+**|HW3HORf0W
za-ZqkP222`ZP{9EDRJc$Ue@-W7po3$mJtn}Gxcu%0?$uItBroGQ*Ps${hYsK!h&69
zUN_GE=zhH7rsfAG;evnCCrXa3Kg=6xXuJ6GyI1?nsuH-gMY!E})oW?2l8Q|JG^_IX
zJLRYROH-0>8+No?u1Y_jEu-{%-KCWR>Ndt}4J0~}Uteqe?a;TeGx)B}vhOvKVf>=k
zL+;MM^Tm7P{s7Ui+gDAr&RpHtc6hal&GSbx&rjz6d()DCR*NUAE$qB$>eN>;M-nPb
zZ!suKGTjRGk(|6L;zgimf5zSkDk+zZZWa6xpLACF@|?|1MKfL2wbkXG+_9XOQ~R*;
zoRf>0tCAQrCoGtK&@9;1#wO;_Z=<k-JaRodFCXD~G4Err`u$&9x8^==?R>InWsqFp
zYK@g^nAfgMstuX<kL~ITKQFaU=MD$n3N~=g%H6(6l=Gw6<9@@?56VINl5Q_^U1BBZ
z6`rK+<(m51@o=_F&hA@jJQH7KX2xyz+t?s{Yum+_rcsq$mnElk?yr0t)$8jbD*8!)
z=R<$)EZ2hG>I#;{*P8PevSxj8H#oAla?|$cn`RLT+>vZ|ovW0lDC|kP{r2+m+})0K
zy@!uFzY09O(%`n&C8wPNFByUkmj!2ed_C+c@$UQg_JE_wpUbZ}Z=aiWLqu?Ac+;FY
z70XZNy2z|4zp?C~N2||i7VDacQB8F#UQB;Gz4pHRzaRf9$`^a3xHm0g(*6DK<>}}1
z_q~~)KmX8Pk>H2=^}jy(|9>a{@6Yr3_wC-lKYqRb&t?DrAJ^C0wg?v1%X~k)v^!U7
zdDvCueRY4U_S+}vg>tUQJo%s_=j6qLMnw_h9M(zy?t5<e_u%1=<7wYENn}~iDz8!c
zld$pDZ4JGOzv25M)lJvLUF^JT%`U!3`{6aAM{{yE2h1|Os+z2^xcf=@4*TiH<9w=a
z>Fk(qVIaeET6LZB$4$Ii3zNmIQyI@+QaE>IUl^O~_MCa&Uot0s_L*v-9~NgATp@Sy
zYj1nmrx_kB%l>gs&*c>3lb$cxdSTm5r+lH5!oqlimAwUBrC0Qnk7s{T`pj&9c9PXW
zh2`((@Shc1V|vZ5Wkrm+&5MxvS6(Z)EB;lTKgD;6TXUAN%OXjo>U$Svyf8V)b?$Pa
ztmZ>YhYwG<Rs1~+=9=ud6C-oz)w}3;wxAc^uBcyUI``tjbyoxXX^wLqR40Gaxhu9}
z?fdFOPfmE%Wcr`Z?qZw&Z<V&k&88U}?!{<#a%9_otUg!x??PmA_Wi9Ui<+lh58eCN
z?7%HXU)FP~XQ!;bm9Zmjx<x@*V@^r7Yx{GPA7MO}b%!nV-8DWK-l+(=yL$8T^s5iI
zysf$wv~BL+q{_&JTRwktUcKy=oYCeKr6V~XrFa8=Z^@bZbN1soGtD2rs+gGaH}3y=
zzn8hjzrXFj^zb`J*Y^isSb_}2wBP=4`SfqexAl_g_B&H*e{Olju&#Msc1=<L+k>}M
zA9KFkvqDZ;{riJdhJReOt)}gpvs)MDIx)Y!e$9PxR+qJ;w(f-KaphK=BAx$F-9NNt
zdF6IPzg2%rpBcGx#HL#QP(J=o;)oGngNQ@_hgrPurX-)4E|8^Bz0ue6+Tp(sJLg<a
zy!!6C)%j_Xk=J%}_dLq{c-28;-^zPDT-JX+GGwx4e5vK%{hmw7Ak>&`uhnF|`I{f_
zNtxwo;gT>r_qU(Btz2Msx`RN&%C`3F&MX~gzpXV;;;ZfFW$N55|NfojWA1X@4^gT*
z(mbBek7Uj%54zs?=<4gex3;ZpdKaS?Ys3&5?6KnbI)*25b^PC7beV>l>TKq^pnT-?
zx-3pX7FNcUL9R)=Z<g(LwSDzh@L}BUEg>ud<@@iy-_|+fMw;oY#p~okr!LdwJ-gc7
zV&{ig^Bj#OVU|iQg5Q5vSNvG~;A8ZI*n4K`s}!!h2%Y6KSKses$a%}x-`kz`{MeWO
zJJ<X+kHv*{hM8>NEUtL}Tv>C|?!Cs>Kt;nh<!M@t0)pC$e!o?BJ(3nBQvb8v{_juy
z`hPE<`~N=s@1cJE&*l67JYD|(^ZfsQf}a9kKb*QaD^JQ`?eULaUtj<IyLRibv}IAt
zt}cp*i+}ve^s1Pr%)P`UZ>ASqZ+_G=us+S%<|w@2Z-b=0p4mLpeG}yS3zvTlo_DsP
zvi(@9%PxtjaUVXZh3hR|TzF`zO?BDtm^w}a`5o$UDXX<rb8C!d#c6q-`|D-s6ZL1(
zcT@TN*p}@TX|ovX)ok=$ee4jNRk(MOzCsK4v!_p*&K$5To4de(<#O4lFH0Nz?;eVf
zT*EluSLyogwoKkw@7|3<@t!Xtg!k@<+kCjiN>x*LzcBaP*Csi3Qy2c69J-55?fBAL
zOKusyaaiqYK3Bi?z5FH4i#d1xuAC};a$?Nak1QqAcFs9Di^EB3&9eJ~5n_gSv!_nV
zJFETgk%!zQb*3LTRyN6tf1jgrQ{Dg7r0rKDDqr84EAMmoT3YNpUA@m;vKkwnyjoH=
zDf>gCPRL^MTRQt@g*wy)Na+5b{XHt|;l<p<)0Kti*1M{gl!{%+a;se0BcJfUE;OFS
z^5R9Qxu0&_jM>YcQCyqJT(d{3usv(R@w*C!I$irV+ArOaU{c#HT|MQq;KpfRe)2Ce
z{qwV#v1OH4An!sJ_e-wZL+7yEX>st{H_hYx!j}Et?EWWI{C5f#JNx8yaMpd+@0Ht`
zZZ;mDV9ll9vw7RU*$dvS-jTiGv3|xI)0tw=;~!=+Z(xfvO@C{t-0=G0a-JRDK06le
zNwc);zL0iq*15yRkBr>2`2*8Cgf0oXbSJZ%R(O}=6{xXB{ac~d8+Mj;vL6aFLXu6H
zbVQcB3Ab^$n0GmPZ%gfr=dssN3|e+=g8CdYpRFszCUe;*aY(2*`E2ZZwETBU&8r^<
z!O1Z>wqIQ(^LI`DRB(-#^O2=u&#fqji8XIt$UE2Bt&%Nv(+$v$+A_uW(5r`a6&8*M
zMATB<rPdre+%FK6dY$R+*KIH3cqS-6a$KqBCzv7gW$`M3NuR`fTP|r|W0${nG2UZG
zch1@OXH8cz`HH(Ru5NVL;WD)#{aMDU%(7!UuLqlUvGl*)c7NZSj}|ID4J?t7yDP4)
zc{yv-sbgoFw|sxUwQ7Zujil-as|PIso2OVPnnZ=<Ocq<rFgx$gtBFkOK2%*PsJAI8
z58m5;pV{sWN6p;VGpA*UKD@j7eqhh+$HE)Bwa#pPe%I4B%T|NG?za4Vvv<4RZ@gN;
zZy`DBHPef~7XSYJjqMU`uj+Vp-v0le=k-6@_3dkS*6y0xJAZ%8&(HJ!{rS9He}C<7
zw_|~?e?NYBaf;NULjs|nLB|yDx0U0ccX(}xaKCeC@YOpTpGDv961`kf_u<Yh-QC@$
zQtuB}KL|OI6rv)s&)~tMkZbYspMw3oCf2{6^!G{P;rB0cHl8p{VRTw2XybjeF>I2X
zvufIut0}3+7<Qduw@}D@#uhzqyHc;5)!MDKdbS<+x|ggJ?^6s9vSvH+BW6d!$_zs>
zKAY-i8;mr>j$RTFT;nilr{|1{-IF<H%V%$yW@KJ<=AKjeW7XTGe}6su)~xSSen@y>
z{m;FnMs8oHT(uHq%Q*adom1YID9_oY&$PG}ToF5ESFWSGId7f7tm}odPPvt~oD6)`
zfAi)h-M!Iyr~huLS<duQ%t?6eGyP{zr_4E&mifWkq2&bUrax5|I)&e*=IPCU_)9(Z
zT$H<3n#HOZ>CSA86N`MFapz9e5s5iBdq-77(Q1FTJ=fb!#8_@T{rmqy*9NZe3(E4h
zb#HSQ*`Bxluw<EBy5jZywLgFRMTPLheEpQJ^HE}^<`u>2ldKcfFWh-Fx#r@o6@Hyx
zCp_M^+OIX;QIxf2*4588oAw{E)jjQ>XfVa2&eJL+t<~Q8{GZvMf-H9&*}3&&@s`Am
z-+sT?BJ}sbyz_>e-%s7k|0zJL_!<XaPYByO_u%ql^Xld8_P%)c=zjwD?p5vVMk`nY
zc<x;6&{fxC=zOy1T0y_*9tM5({Ley9lX9<}d(|bj>Q>s5H{os1zF6+@<aTch={|EU
zVw21ctB&W9$Jlyi$|u!1ZO!@-EmJ?~$D$uo+}A|#%B`K#+|2%z@yUE93)diFUB(vz
zVGBG3Q?4u&TG7uT&UR?=cklJhzty(?yrR8tp7ts=naR>Sq?v3KOq#d$+&?DcwKeCn
z%RF<HT$LMM%*IZEd12dl7uE6CU-_`0y4-U0QbTi@kWLPMuB`{U?rz&8s$Ae=vnu@k
zuDT4C&J+GB;vCG+t)}jaZQc9J{mhx$!hOj?tJq9GJQPdxHM`{!D-t*(DzJ4ow~KeD
z#~Z8M&HncFcKhmW=Lp`Lve9opd&?b>MXBbqI_^jsevzAZ_wbpm=NsOInOAIBJ3mAy
zW3p?+=^L7hwu<K6ENci-Fn16wf3EzeW7)*~IcNE|?KokydeXzIKNooMYTU84o|&*C
zJj`y%zg73`@9z8a=cVuASHC~LD_`|y{-ewOuA%$tt?TaYd%gC^_h$FMr{~xGo4@bR
zr*EI<e?Omp!<YMX-JegN=iAl)TJrwB&7T#=_g>R-3UyoX{DI8zkGJ#p+x8{zT@uWF
ze1*Ua$z6q3%|c$9X>QjZ{7N(Q-zpz+eEsC#6+*jpCQkEjV2f0W*SLQ1edXDoALRni
zG4DSeQn!ccUxnm`k~QZQV)R}=iu>ZW{{8PWcJsFdSNret;&<rR_Kgqjl1ZKU_)OQc
ztIPhrk$aqVKIb6Y1J)zo-@N=Y!;pU`OX#B*v;HmfKT6oJ&fcfAa-HsB9i}vkUtN-W
zXUv?e*!<Mu*-K5M1=FwVtS>G*V0@WL_SJsBM!mIyw=QS?VUXYF_05!3+%@1?>DHRt
z#jEtMF)1A^{<*32?(Ow&^Wyw2POwwDqW(YCzffhZZJn~%@|Tg(8lRYTo~fSfI~{vI
z{8sH8pG>DrON;R8WjdOxmtVAbI<+t<Ms?2>|K=%Ech%@c>i(%+d8~84rc%w+kJW+B
zC3mZTt=;gaMeMI}V_>bw-Zy&ZH_Z8LX#T2K+%77zDRog~rnIW@B|VGSU%L67_CCw4
zUG+>u9yX-uxeGjz>9AFMAm1kS|AKgOa)?yM-xYp>$;s-kEIv7GD)@MO{am@WZ<^oh
zUkGmKG5-H>kuR5#(dB(BXBK#^Q`Md+G<TzHuiC}VnIVSHt{ix&vA1IKtsfg=Wj^0L
zJauQ4Xrb<h#>$U2&o30dT5@U5V_o@|i(f9*;XQQo+NCA>Z$ERMoU-bi#d&+{ZML~B
z7w_#Tb(}d<DW#=w&Y>F{N)+F?c+6xBRe89t^6!3*|M^|}J8FeO1y-FqE6B%I(f`Bm
zzuG71FZB}t<o+ofHPK+=|MF>bZL#g!NYO=K1^>MZRCSBLw;_yifxh;!<j&RY<!@%#
zz4#xj^xSo4Bj2^88{N688ShMEH@qzg@%23N((u;%sud0^)V3{tljizZ@N3}@!8L+%
zi!PkWOa9%r$f)nwx+MXwUK=+>a>P90=xk8AWdG7tu&Lw9|JiN3dvaDzY`Ly;ocXq^
zMo7=Zol?2;Q=;2C0<&|LF@;@FbqTq8f$f-*QkL3mkJba4N?mVr@7v3M{cGz!FKcCT
zu)Rx5*p)K@QyzUSJfXlHtZejP+M>TLS&@mlalY>AcaClo6coFi7Iji;qBmz()=l60
zU9&{X%iWYJJld-F%lh3f=5WuRXIQKB=zCR*gXsy5^U59fL?-vY|Mz!$^6_O!|L)3v
zcXxOt`_Djociq1KS@BOV9KXJsGtd5SO~UG3;W>|lIBwhBwX6O4=jZ0Xzpg9)iIb1N
zf1khW`BW!{tO>^-|9hRkU+%c!tzTExGOZB2d;OknqzJ2*VQ}}f%{JOihC6=kW&GJa
z@kZ9#<E=|eTa6x+yKc6>^m2=FeWLHjrF_DFn9uOk$OPT)K6T=DeaN@D($~ZaY=b`7
zD5_syY;d*p>=(|HUuUN%ea~#!udtw5$tv~xU0XBH&&fL#Y8vm^wHNAN+<AS|zb!vx
ztiEb)+WAcR;iap#<!%<=U02toKlAy>>HqIZmep(9idQ8`&pa9Z!<Xr-5np%1|J;pR
zY`e4O^dIY#O!Tq%84z74Ieq#SQD@y+)6!k1`B-H$C0-X-zs;}SAr}6@s`^aeY%{_0
zzB96>OE}L9zb^jt>Sng*hbzsd*YehtcV4^t<n<($V#Bh%8~w^{<6kr_^D66lpZwAK
zbdb>hdxnb7PagGD{P?OUd(YdMTBWn@KW|(Xo4xG1{Fg=^uGb5uUM{syF<Tsa>egIE
zp<6}kls^7A!R~c9sZ8;_O`hiKZ6#fT7Z*G)t}?iFf8t@|kA4#@?;cjYQ6czZe}?WR
zg>_raN?iP3btF7a2~v6}Br7yq+|zQk<IXFu-?F~e+VO-x^Tg!j0<%|UD{GU*Pn&#U
z_ur5_^HHy<(YyA^>+WY7pEv9}|K-ZkjwdrWA9)(l#&Jja_m8KHg*z8jCYtc{-ne7)
z>5IF)^={F{Z{?Rx4SuWlz`<Q<%k{IXKW0YG?45td`re-T-`fATOa1@ue|!J^2VL(Q
zn3rWuVY=tg`-A77=TTSwgWH?S9aZ18TQBuG6&S%<{%Jy0fXpjb)du$FgS$T`q@PTU
z&#-*6*JOiy?M!_+_ZLYe$MT|l<OKA)p7d3}_qu+quBC^M%emfR)yk+F>u#@&Fznb-
zZJSj!<@WoKxdP>(?m^PC`2015<hFL?WC_~LxySCI>{{ou#nPs3@7mKI2@DSnug70(
z%3L#}rRB(;k|?fOdk&g4JMe8tia2iFTR2T2AxTU5)V}xmzmw}zoOju%d+@7E>O=<y
z-<~Siz$Lhqz0q`9OuGu7km2r`(zzc+d&50U@0JNqcX{|+e6P0M)Uyjh7j4RY7iGOa
z=#>8Mxuv=uZY|;d;z?yudvzz7+^hTj_IFwomkt*nZ<~Do<LVpR9xn`gTTxv2Ce?Zc
z|E=xpNB3xaKNNkj7BrYTd1h1G-hXem$M3i8n{QXEwD+WW<L}3y9w85BblCG--_+k$
zvZht6ZCNE^Y+C$n<wB01`N#eC$lZIyA91_jX2PfTrAHnlK2p9f6Sm}==862IJ^IUE
zWSxE5`lKMqozIkG_l|Y9<CIgpjCb_tTDI)23{9JDD#7UFw0!fAM}88AuUA-JJkJvI
z<!5)?j*2-Vfr`t-%GAOqFL$(U3wj?SGS};8$fDccpWhknli44-dTO=Tn?G`eabgA2
zRC6=;>=9XWO~cRna`*ij!;<q%p&K9lt$L*P?7-^c5B_teZG3+sJ><wD?b#ic1-Hbm
zyRztIuT)>2`TtJm^3GpLZzERt>ZhK(JJn3-@Vs}MgD0zBXGr_=@aLOiZyw9L-V$rO
z=j3ik?>|#icuxDz5~(OFo|v|G;(vta?|vkH^tz_dBTWWvZh@IO*6XTybl&(Km3;Iw
zWaZkGVnwGmSi48@JdO9cde-5oNse`T<Sw4y;hzt-tcmFmSUxHLh24fZPQG)D)eZKh
zW_Y{|tozRS`E+*niLdP+HMM^%e13Ar1vx1#$5$RDz7uUue{K?5e!3;u{pQT$doF1`
zpLywob9Z*i&#q{@2y@-!o?P}1Z%R)55tP(8d0(`_&fjC&XIlj?m*(~JA4wel=qtMA
zxOW1>r0aSq5vTpO-D15sxoYaw7gIL95$WtUu_(T^OE_#XSKu)z!`aWSdqgUvd0lwf
z8<&&2cG54IkM`RCYBjB2+Xw$^{g}7x>xNh62Yq*V=QQzdb1uI)-&N5-yN^ZDl~wzR
z-k)c6{QJ4zPjD8T9eV8g@5jIQs+BzT_}w|RLqOMg-hrn)GMO17|1y@{II47Nhv@Ay
zHub>^cI{R^Hp?=9^0V!-!RmZ-B}2BHC^Npj_KkE)+-mz(;nVLFef5a2xXn{NE62#z
z@X%eGoaM%^I5xI%)rQ_s3eSjKknYGI;J5qTlsoRc_F6kvYwD!PE}9^iD3`x#d#&))
zS*vr7s`M6j#w&VWYrlTH@OQ~=``p`WTfDPgT<+24m{ezR=j4SZwm*t51U&z}%*dNA
z6jRJ~JbicS?k^#^`kpMS0u@>A9`K#|<nUrM);znUuPo_xsV^lxd)H>K-Fke%QeF3p
zu}Q}tSN#9BWA9X@+sEbxDld_H`2Bc(fBesL$D{9_+LpWhZT+9q%m05m|NZffw`JPD
zE-!z3KK|Q+shghmEtNYS|Nq<e{~uqk|Nr6i{4-vk8(Ht)zrVl!zud#ur#D}62%XDl
zH(?8-hbHTVC%^wjy#06a*{h=;>RuIY5YYU8K&Un_^YQoNCh})IZ%ofW@S67sL$}Uh
z;dM(ns<=+<I-R`0b+Uh&U-<jZ4X)R=wVa!AXG-FZX#czCI40Ox&z6~B_W0eR&W~Tt
zRBK){Se3WeHJ&G$Szt+Jjs?e#d&;MOyk%YY%qE~o=feiWOXnOKe}-5VavOeRp5rvh
z(J^wh{Jt;VG0&q{yJUxn*Jvr8pZp|p=2!Jpoks$5SQ1r2#Pc3ksP~?_!#y=FuTZP_
z!nyV^`;SlNMm*7(V5(iRqPebMeW45c_xrl9l`2KGtV<q!VicHseOcf<QHK9+{oKzB
zMBZ+=vOMeGC1$55MYmK0_@2w9oqe9Qe8sxtFPDEveOj#5Ene+Ep@EO_3ung->*N1=
z;<njG-Z{?~(H6?My6*!kOW{=EK0`){&DH*bd`E=pHABpt&PkXm&pH2ox$(k+n{#=z
zW2LW@zgm#B_?-S;>*_MuM@r|S+fJ;$Qs>M5JnON<<hzP~jAC_F&)YosQ<+6io?mE_
zZ17t<jQ{+}{w%j6e-q+`YZpj1ciyZk^m%9Oy<ql(+s|^N8r;*~fBinQ;q%r{hr5^G
zI3BSo*eLmo$^9+Ak0ia7C{g7Ks?v*7bz0_{*4bbio4PVFbK`2J2>;&LgW)p21B
z*%~DF)m7*n%`P>K*r>CisG8R>w@=ySi@(1|+pKLJ+GSq;$D()bSFAJaIvU^ke{s(L
z&?kAvI8G^EH9y67*R{;c@}2d^lW)}awB89YyBNDu@*a=9>z~D@%Nm#eJ$_{6e$6`L
zzhWQf?wQPg@cXO#4b>O9zHXZtdZ}=cdHn*XXy0_F)|>-wH?KH-SRfNRhbR5}l_b|A
zuQ;|%{8DJGdZkPB=Dm{o-4iX|eXp{ACCII+`F@(i>6$*fpi5=*D?NH21!@HBXj%MJ
z#_VwD{<pV(+}l#f!yI$KqVnbr^X5|9RlBF(JrXii(o7-A=~!T)O<8A>+f?O~xo3pj
zqQkOytIQ8g3Vdnml2U#9lyCoTmu+&M*H487wF(LLHN188DJ~W0`?Am9drR5wR=+cW
zE-O}QsJQSxyqma7RCH<gik9n-;-;`ZI{5w7RHX}Ywrv#;mrfMVxw+0<db`1vy9ozW
ztYqCoMMaHg>|(y*Rp@o+@%#7n|FhozuDiYM_HX6B4fQ{NueY!L_vPv3x6l1=U)Q&O
z`|)4h`gY?-6*m8V&j0`W|F`-7Pfx%7y#CM2=jW?$b~k?f`g;5K@86f`^0Y0Ond-eV
zK-)ER@3VAA?I@!=TDqE6^A4>xkgR`UXz#ouJ?i+LebbeGrF2c|4oE-x_Po->Pronh
zzMt*wSEL-gc+b6^kJ@i;J3Hx$nAuEu>%Uts9sO~-Wx;ft-d1l@hp>Grs<toEmouF6
z>{WR*vtjc8WnCPfz0OPI&M6YSHuvz){Zl?y{9E(NBH4aJM$7Nn!GB&DPcIF-<lVGf
z^hRmq#t=>Bjkn|vuAL{B+-#>V>2I}W)7@5+s%!Vnj|KDxo-CLoG5K5Rg|2f2M+&@;
zv!0W^myjW0<#Og!aiCwxqSu<bbyucUm_6E>vi4hiPhXCo?#{S}nFp7ya{IH9SLSJ|
z(`R<mlT(-7S`w(3_56>_OPN%?r`K3Lzdx#TW8Z5xQ8GN5sZB9--K}mrkEge^CIuvZ
zs8W7nzHZK|ouAW|@P9JYxDa!x^88QNO6JFx;vO|!{^xPcu<o~8w!60J;_}ur%N2q{
ze>W=Bak3xF<hlI!se$*2sH)4y);4PI6udIwVL^eJbLyXh8F#Pw&JDk_kE1;8-sY@x
zlkYgYE|3z;_{$*5G1cgdSM!<+=D%$3x$3?+YtYBn@;73Bb7#bRwizx@Oa5I*_1eF}
z++g~8Z>}5t(sxcz{<O(9*J-+?Rp(^=-R%s|1Kg@aji1@hx)Q0_ytV36@sGj>*M&|P
z1Wmj3vm`0w!Ie6u^sv$;e1@{z0j*K`AFJ;#Y`88O@1-+a>l@32FKUWO(<_WyUY0+4
z-}OFgt4R6zIVy<}N%1xzM>USFzG(i7w@i5N$~~I*Ld;J2y;(JTkwjMkv*?d4)9df#
znSNOD@k52dzV1J<Pxg2Jx%zi?#&N4J*Ujx8Kd*fr)a|mMW7V4pvo#Oh{jfmh-8sFF
z@2}tfQs?=dHKkg4!3DEh>wYiKy?yHOUbo&AOUn1=-rLl&l9OqH=-0AvbBFd8iKQ%p
zPnHE8n8ljm;ZiM?sbqbUV@I@RwxCP=uM9Tl5N_KSoJA(Lb)9~6e9Ks?*q^nPDM<8c
z&!Ysd@)p5|aq_0Co;>nP<htPLcJuVVoi_XA-{rEflw=7rC@k_4ZJe@cX;?t$hJ$yH
zORVu%d&Z$#?JikX;5GGDRO01NVMk1-@a(;MT#V((hhKkJuuskU8^O?aY{A{;9}ZpS
z&lujyySN;yH0iW^`7Y1zR@kY1Z{zmvie9inu1D%oUCr+B8-M?O_+I}1>-GEpK26{M
z=jHVI@9g)=zE}P5_xAPofwp(H9?9ST?{EFz`2Y9Ap4$I;Z2$N0{JMXq_3hug{{N-_
z|I^pk+anh0I&D7G)|8uZChbpefbQ)uHC3MMea!Rt)_E(u@>z3)HN5}mv*J@b&&+Fo
zz158Ws=C+xo1cChW~z6o`~2tD^_4&Q!#VYz9qS3ayY0o)D~F6{TJAjb<iOR~0HMn>
zJ7zla@q9|#GQBxC=N;3;J$9QntjTK6$qNx<I5R2Av*G#NBUerxPdN}<BzJq&G@ek!
zsUl~cla&mwrM$OaaK<Wg)z3xIa|GSb8~=VDU#Y4b*=eP|uDqE2@43^5Z?4upTv~nZ
zS<?MC-<QmtEV<6U%lg<qp`=|B6^x162iY}VDs!KZ5}c)W&}>oUvH7PNrpdGW#N0go
z`00^I*WUa8{q*<cHhf4><Kftvy6U9x2kFC_A177*XbL+SdTZ+A+E%t{f=^r<)YA97
zdO5j%)hd6l2^UYyt-qJCrM>^r$z+4vk5AYSW$CnEoT=Mzur{y9EzK@d%fn=US*Jjj
z`K_a?f_JW9Z_u{MvdAcXa9o<n)oN!|Pa(Sr+oH~Ms;VL!fq_{D*Y1X~cWkY0OFq49
z6O)(*)7Oq`%gw_rZIevo6t5&m7H{UWmwmmtZmG+kgN=Uti{Bn)X<uTnD?e?^>zG^a
z6T-Cxubl|4*>KG5yR)sSZMP$f%*RN}fcMc?pSJh>>5`BR_`-0!?Nt$f|GXsa*Nc86
z{d;j@LrJw%+|qrfPu-VAc4nMfE>XhSzp$NUSKjXQ_^rlzNkMEUFP5$KzxQw^8_PpC
zG3|9_{B|sQ6ED9P5Dh+Q%k8i<r}CuvQt=auR!tEyS^sp!_5;$%%lAaq+<Wu++l>=*
zKY5<=`|G30CTjJ;_v`+SKl%Ls@AH>y9bMLxdMYwb<=vTpU1j%vvF)F^vZejHU&4t8
zhaWC^<~8TU`&$d57BDS+yQy9)%4E^C$BIk~xBg&dJz*xn=6ZP+!^B6SS2#r_0yfI-
zSUK~S;-ghMBK;3<G(FQ>p}C`p@q6BiT>g!5ep`(7S1qo6nRIkp?dSIAwpt?V<K7+b
z)#JG?v`oaYW0ixXWbZ54^11u`0@wfBQLi6wH{E=*Y*g#IO%=5gj-AiA7PYWzei3ls
z6=^A!xT^d;>f(a=hfY{8VYd$Qm3Y|WUU8OV)!r2+%Ivpky>!^yF7M~x%AXQ<@5m(4
zx|!i~w{Drd{oq2oem|xq0$sOnpY1aI@%QiU7WM6uoxWcwy8iwB{eI_r@B8EH|9+cp
z|G((Z<@g`d^_SYbI&8`^(R1g=+w1rL-TwdA`G0>u{=Ge4?)&e&AO8P;?*H@c_I1bf
zGr{+LJ}X=}`zvRuV4aud(kVyyOph5oJNDBkYOX2oJe8XntjDa%-nRdl$#7Tb@w}2x
z`cDoyM|ANQ<YzZBG+lE3axDB{tHYhwFDHNN@jss=xM!79^v=3tw~GzzcF*eHqt+ke
zqWsr$j&FKZ?!~T)>h~7wOg=r&w)5zJ<F`R+0;e_o&%LmUSY<49Tg-e{MTh*Ysj0g@
zwN3f8rP3~0TkOZP)k<y8mF63>RWiDKe-L^ibDODi>hFA`X@)ku@`eeMvgNKFjcp4K
zx_e^oSym3WucxLz-t|*Lcuh<HJ!c-1C3<f;rz;h$UC22%Yo+AZ24StNK#wg`4*yBm
zvi0(eOH<#N{Vw}^Rf9`ikN5cco;1Pqg$KPCe~vtG{-=uc+fy0IlHSj6=`Eh2`tRiB
zFG(}ak322r)|@b*;Rfg1;;o%~cC4G9@{@Zn+v%-6itMl2>$YEw`#)E;E#v1QgUwmf
zA4JSqCXxKzNKbLwg~wk^ns}RTPM&G>t-V%LqFb)LW@+x@7n$x^>%IlmP5H5RO}u~^
zukHPL`6<f}XPW&K;^J*{3s=(p_U7uY9mm?fOxPjj>Xi9nGDqmsMR`lTBKzlkQSX)8
zblWp^-ue0jTho=%%1>AEoBgukynC?NC~vKf-YLts8=q@(JiX-OfBn)K?SnOfF`R*)
z8E@Qu9&(ZGL!{n9J}ap_)?J&`q*YQbPTnh3ZIZ4sDgVag8<K*PZEl3I>1GM8>TQ!V
zw9|-D(Ru3^&RNBqDBQ69v1{tQQ)fl*UFSF`RLI=*La?Y@E-+Sb*4)=ebkC<0%03QN
ztn^r1$t4;zMT9>&=aBj2zXChnA4&gc`$w&!xi080PhI8Z`uqxk`qgpi0l#h-e)?LI
z&%62bTJfzcJqiyOm5TN#^>5p^<I2Y483#?W7P;nX-#(iY+-85T|HGCy-#$CfJlFnb
zQ(!>XSD{t>W)7jI2m5%he3!c4Y~mfraWnDLVRd=O<?Y&0thYRF@dO-~n8wrQ^0i=7
zN8Zvsca`K$P8KR(`*oSpljb!^A{>iOH|cY2OfZ=9F>e2V+i!1|rEQabr+4vN^)AzQ
zwV%9v)+T;vekduxugi5dWoGM|){Jk8Tg_Icttp*!{dhHte9#^b&jq15TC;EFEAekI
zsxi<rcqLiql4X(Fcz)IP_gR}(ow=m*;@#EQ1@rA|Pep&R$ck?hTF?~`vxlqi`}+M?
zk3X)c5tm!~XSe;|O}V#k?|XaymGd3Zg%7_}-RaNYU-w^Hr|)?EFZ+L&zvthtGSk)M
zGS!y8&A0PxRkawS`Q^14Qk#AnX7Op;e)7!fFFmmPg=JU?--c^x%g<aYT~~eb{{t15
zniYck)?_M1oPW*9mAx=@Zi4CIu<Nad`KR#sHq|fX<GG(D_s`eS>e&vN_s&*x{MtD3
zc-G9Dbu=#7=ZcBI-Qt^F0do(|6v>^kq%>*onoXZNo9xmi%I7;7$Vaf4C~eTlH1Lhl
zU#n5m7Gu-vecopd=bd;@cXjUHlT2)qD-@ph=R5vjR<B>3P*cRbn=i74=|$-Qlbd{h
zk8JMi7T>L5(QQ=}(;R+;BlRxhx?bTQUZQOx0_q8!R_i$DJiYkY`=fZTk)G7_;@;DO
z&i__fP7c2<wPt4CrKGz8AuJ&q)7UpJd~ocnSysl|tE>0N95>0n;gl(PHc~)cwsB2f
zr*QCo?S*<Qf3BVUBHurA&fRU*4^|mXT4$OY|4PMqy_5;h@~>PQ?LAAb6&1`CU)-o?
zD{+c}Sx9$_#YNSJ8%<QtAGK{S2!H<8P|@<Se$d>-*Fq-${XZ>M%k{#uC&{eVE4U9{
zC~555@IlbcI`3WKlkcxBm3+8&eT%hO(cxAqo#Qt}%}1c)LXOk--Aw%fp_f+ID@?63
zRBF<h`u7m~qc`s-`Yk&BQE|Gdng5B)%fA{u>1lXnP#Jl6N}GtzU5?G0bF+`1nCX9d
zwe-fRwRzX{qL)|ics?gnt<A<&#e9Ei)1mgsv$C6=BRBAew5~EzN#VTu)@^~Bd7Ht`
zADu2mZN{%EWpiHqEUrB$EW)ZFUCA7B<fQusF5A<KUQBxBQ=T!Wqp-_-a@Y22E}X)`
zN=pQ^vOb;lVNv84*3^^Ai_5=aKD}Q3`2VeW|1VGfJ3r@l`=b3<f6M-x#Zz%D*(ad+
zS#ykO3Ae~b8S7y7$W6Hi)Pncx%NQ<m_qu($?Do|MS6?6Q|Lv%Lcar%0D@uWm3P&ae
zt$i|S*Y+jqlQ=A!+GmR??z8JFe;_e&&cdx>Rr6WDCazPsU+niy^q`sFo4Z+TDwSUg
z`VBNr?fWU1X(ik#TeO(t>BB;QOYH@o{7P&O(^7nQw7XoXzH)f`-7PD`G8)e0wsk#u
z9{g#~OwT&o#4R~$BKC)DUo4)VH$8XNJc-=y8O7XOnsIXjEBD*yZ1;8DBi72~XD6j9
zq5QS%hiFT;Y1X~K<#Xg?ewOfbHa=N!+<C?O{^PeFZ>#<J<=Zp&zf$-6ByMSKxoN{6
z#v8uA{dcV3(hSY|d)xkAe)~E9{e5lc7b&JM;>s=u_5XPLe*fRE>+5T6<kHP1&zic&
zEF^qNp2mjUC@-~s_IS;UPp4@8Q~E3_-pAq>cQ*2Q|H|_WvTHRLEmz9@7P9*ERQVHO
z{~8SHS=)SnefBAM;qh61=JV<8Cp<PuB(9gRG=4H`YNKWC#MFuB8GPLwSe6x32y*yD
z9s5+!m^5kiuL7gZcWy1N)?jdVR*LcPyDatn(awTRPkUw8?@`crv@{}STIvp+m=o`w
z)GuXxc-gU9X@9Thgp&AtZOdQVSYsM$y;ol96*(ZK=*iP6Cbp^CgCn%isfKNS+-;Ar
zLzO1`?tBgYusMeF?ZfGAjwe_XF54BaJNM=1BcarVKmYhoz1!my{nPU0{QTQK8f~xU
z_@4N>B<rEP>6Q;u7>vDD`>Micdn?X*HoLav#e{F`s?EKVuG$x`ZE!C>@xZce;caut
zDRtB4Pp#R*a;tX1i>L@&|DTn)y+X;Imgy@`$Lx!3D!RB$w|HLJE{7kx8YGJL)ffq#
zY0T*eSnB`rc92R+Mb^hFaemsq-6}^{6(1?$D(%kIW?A-$;dIi%*wYvHMSi<FKZt3O
zrq$KN%4B}=U3x3R9!eePXvok=jX62R@JPGMmm2Ma9X}ol$~H|Gesn#L?OR2B9N%lX
zc-6Z*uk83H`~6@h^T9*jvr5kG{2nd&EoRLbZ?WSmMbB~@u}#YS87_A0T94RkPS!~b
zTk=-Fy??9Z{*x#7X6u-&d;6Hv^TIRR4fi&Enj*${cIT#TQVVlRT;j@q9{xRbVRP_B
zd%3D5TFOpQk8W8gE?)YxR6go_X#a%|wVRWqnz$<7ss6USde~9n){l>u=joPw_bK<*
zJF#b{@ih+-hOHr&LNgSu1eiQ|{k{6{x3|lApI&}zyd$%3$6L#c2P%gpbT@tb7jZ<X
zY$4ZNjtrCHLlQ?mX1=(p7ioI&Q111hD|h?9KmPddgR;s2j?ODbT&vg2N>PgGd$o{@
zWACwtnyp6`-+CAwSSa2aY@@ME%ZtCoZ-VO<!O-1Frt&YDPD@vLFdCI_>xi&8y!TDm
zf_Kl_H@CHjKU~J~=x;~6&8m(mlbmK*%qj7@zw_uEz9&cXqi^Qjeq6n_pJnIknU1rZ
zza6zoP1-$m@0LHB8CEXO<8)_D$*|-MbrFu6c{RsY=|#Y!6~AxiwH54`C2}^_@$$aZ
zlyC#S{#RD-L_e*`b1mQC6YjU=ZtHgy*-K9Y<@eYA58IY&tQ~#x+o7ob!?V?kCI~DR
zQYgIBAOBx(>5J+5-=6Q^lc^cto%=)ikKmF#jomBu*}uDg|Nh<Ast1EZi(g*6(0MlN
ziNl-OQx_d<Z~y*^LH*cmf%#10T%VTikUwRub23anDf{;Hb+OAo>uj1Y{nc~gV^7|*
z*F9IRcl(`L6D~YALVV4uE0!Mi!H?AW^5nW~uV^jzD&BD~O|$m==6%JjOpGRfQr~Y<
z(vXUL`_)@E{aJCpz)5*4u^F~v!Z#N(p3S~DgVp!L;j7Z8kGgN^+5AE2L+OJ(m8b0O
zYOhrsyT10okI4?!PnK@@*PnAe*vUe@m51q6=|`iJ)3nz--V`4-hxavKlHxT}X4T`a
z_kMiSI$t+gen0a*mNV<ugy?ukI!ImKl&@Q`>Zj%EY{_@G`o7Poyf^*H+^vgWE>J(n
z9}p5#!*2gWT+P0$&)~wrd2el1a;$coisk57UM5of?VRQ%r{x^SEAlUXFY>l;-S<!a
z!=umJJ}3(@#2<Y=Zx8>irfcbSOxLgS-@bnF(&K`{gx(KR4L7qtm7IUJb*AxM6B!Mk
zg-r=6HoH7uytp{SQutoAukMNc*99V$ALQBh=+T@U?SE=@jW($Vdh@5Oy6Kk2nUhfd
zICFB6ameL|Cp)=fJw+lE!hH+pt1)R!<(hrf?#M$2PxZVn5~sIZV~w6rd^_^r1<QLk
z!&atycLtwX`ns#8fBLl$9j;3UIw$r_dne5_SFv8gXjSnuMt_UHd_Hy8rp7KwbZRY2
z4B>yD`S|n0SJ^D8Ms-ufo7*;KrDe_0VXwW(qT#w&q&3s-$fiBnap^}zJ~m%ilCPk2
zNAupoS#zwOsmyq?!?s@G($%$Gy9{|&IjpE!H`jFPRYmTW&;^#7i`~EY%v+%-^6{P3
zZi~;KC;Bkzgk-#Ww2<p^zWYz9J=Xayf>~x}{~ouPrP=xZdytkO(3B+i=-^(9fX~}Q
z)^x3F4Zk3!9I!R%_w6)SO~I>st7KAG6J{Q~@~1fZWJ?rhzyj8&HB+35jf>x^9gs3V
z$mZ}s>-fW<ZBL#U9C47-J9}QnY1@0LW|4WUo6m6wp4;+X#*!t{?DnE1KbzUj51*ZK
zM7d&mySC72i=tyo*k>AAGVmV1r8RGpXsDo-=R(6;@r!YxM;`P#oXs)5EfDzELPO)X
zn4X{&OR9*A#qYnJ^Lmw3LPJ6YwK^6D-L)}05E9B=c0zgAq6th1#wl6T+vTUWcTX0)
zyS&-+om#U=aF6~j)$8Zy?&UbN*v;mXTF~CR_d3eIf0@vo#a1T7v2B8P<(>WYf200A
z&CmaxzSfTA{-3h{|L*_)w)&?3BDMMX_un7C{{DWtJc~huPw3YQ8KtCEl|^Z{c|A1K
zmI>Z;na9RhEY`JJCS7;l2BucEvpFx`O;1ypeEV07!!F^3*!-lqQ;K{4KenjUuXqxy
z!}Uby#?4iWJ58snUjA|@?`NWwpZD#wFH72%R&{&2cFgoxFJbAboV9kg)s3>{uU{#@
zuKd(lZ(bz5v+w*u=DJH3^^0PauE?D6?mM7q-KKmoa-HFJ*?4vC?Z1`SGVWY1o^mnh
z+uWm%^>;jzKc@9Gy(QT0)t3cdSAGll#<%+MudRL-kELG!&UvKL=GVOIgPfaLTJ%en
zrOTT?M8>w9O#RQ{^7)A1l&AY7cRiYTNMmxDMsI*x?5&%B1a7`svB9$}dQGF&qMgi7
zjI#HOxT+ui+<!yiJaf^S`L84N13GGiw%5Be=EiP#@~Pf}=Vq?ZzI!DxHF?~M?a4X*
zL7}S5ho+TrYMz%nd8}e@VLX4^gI#|aR(du@Uv6v+wU{y6gY{Pi4>v<au5#L<qbHq<
zzrJ0c?SAg6faX@W+c%ypxuh=tagtI)sPdg6{(1N6-nA5lKH0u8C;Y^kZT}Kf^7_uW
zFJoEh*mU87fLTDu^6I{I5!W-WsqKv0>Y;lk*x8vmWSh&}FTGRhq_wBlOg!Ctn0xX=
zWmVP5x<Yr=wqDuxb30>D^qdnlerH&e&Mj$ZW9->z*SGJDtYrC&(mfn(H`{8%U4Lr)
z&DZ-Y`Z?jPjQ-o}t5<g%P+B7A>B;Gr!Mjegc~Qoq35s2hKQMgRIPF=+eX|?OxPCNW
zRW6SBZE}RQ;oz1f&eL0#D;&SM`;u^x*TOWjZ4#4a?oeYeJ*433c*6FNqSmx+aj%P+
z%-*_KnZ2sHu&vFa?Pn*qQ%IFdi1xNm({)!C6$F;-J~SgvYuT0Do3;f%Z$C}EwnjVU
z+Kgoi0j;4)wROo1Y3;f`nX{+W-nM_6x}76E#D0$7!qn^9IW``QP8`B#i~U@=PsMGS
zF)MoFrr8=EIm@b#rm1x$b6ja=T%<AMk=+GL(Ou4o?{ZgYJZ73_c<bPYM9t!I5mQ4>
z{(|7US7Qna-h^59*wkKhEUVw3m9x3_zUz`}vy8MBxb1kbtSjR-%QwXb4HKUIp59SV
zzRM$U>m<EPA3l62yZ60J;IiJ_f9)r#=3iyK_~5&Mc&TZilgKUc<q8gW&*m84d%|J2
z=brq>6({3-&pU`c+L@O%W7eFdlaxyQpB5Z{8`yt1Gmzo-w%)@1*AvdFCLL~2h$?jU
zy72n(_5J@>xxV{Rw_`WQl<$u}p0EF#-Fy4@;<|fZ>i^9D|8@KQee0WVbC@YCb5A#2
z=-%#Xc=t5hf-2r)JgN2bver~~G+jT}Tw1hW;{*HWOcCL^J+rgK-ZC;BT-h<}Ql;A(
z&L?v$g<>ih=7^-ti|^J6h^l#_yTigrf9i{ZcUvQvUALRZgjZ(<NHy$H>xkFjT4k%e
zByqC^*A~r&T3O2}pF%uuJ>Xd&<9ebbr`P@JnYRZn&(8WPG5bzpLWCRN@yYg~VO>8Z
zqSjT5#?(C5FG{jIrx2Aeae32!k)6z~EZ1Hac1)BCnO77fyG8d@)`kb2+AW6s*A`BH
zE4?OSkxk{lL%;W>Ju}(zs*ICWqA9+w#>7W~dzt$7^}ZEx#dAF-&UBhQee$;(&uX4+
zJKT3sPpCuR@S}brLs6RTLuSEOpMzSP&)olRJnQz7g-f~B1PlMJvAL!E$HcOpOZS4@
zeeaKqtEBX{dvfGHKly3R^Ve*$*=KJ$&h%Y!Z<$K>xq0Erq5IEGuX?%Wz_FF)FDsu3
zXPjHzoXnWLRDW6Yx7L>Gj*JQ=*OrOdDR%k$#n@U)THl}6?>oH9=<lJ}Z|ky8g$lQJ
zs~PU)R#oC?^Ip->(sn88;k*?IoT=A0J+M^vm=?Bd`7*AHcWPEi#a@U|`f=*X7n9g{
zQJdPLO=~j3IwG&vO=MQpsdb&?#=g$_ai;9UJ119oESB6}vB&c9{O9*O7R|mlCF25r
z-IlwDPcoWao0M&F++g~1#zhYwPTlJE;j7UWedDe2MvtEyO2|8LqUy~|j@~#SLAf5W
zPm?aT1}t+Hzx3U!ci;JW=|^Vrbf^VCzSRA-xYz5QQf|@eqLKptES;@4wSSx0T(wYl
zU(d7pFT(~W*Rwub6O(2zW}W>$U4Hw--K$nswV40^dBA&4M|QdfTfwaD0Zp$qa*ITX
z_D%U%T6|9Cc7OoWjb$^FL+^ddz0Xj%{_Dek2_YP!u{QT~)C4*&ELs|Ilgnr8idml0
zEI||2$C|zH^$nH%B(TGyh2zDeRa&fUpBJZooU@VRbg6~LW#ivQqH4-hmA9-AO%CkH
zQJNhYS{uSCsOx$*Dy#WW*VCC({W(mpF1o#T%I$aWz6u0y_1!ulQ#s5uBH+;9q^w>6
zUX89?>5T91`+u$2=4>jhp%BR;;jfsHrEIY0kmbK6E!`o`D^5JD>91NZxcA(ddgIxx
zT!tPybH8m{`66!bKKa7+H5GTV-3~w7FDrCbcFr-LE|$>aD?&ARz8`-qxGd<@Hpw*+
zH=q1fY<9iTr;<B0*kiB!{e5+-ZIb#8#4jf;IllgW{qFkz`QO*Sx8I-pfByeZ=kM=3
z_sg#TWYV%#f_5B>a_uz@oNsz**}7<)a{XDi_mcgG+i^)X&ssO1`}Fh<L&3Itk$0ZQ
zIJ!HUxJ-C3VVce^(YUAIjg_CpJD+&|HSpg@mkhu2&mPHHot*r5ae%h&{FF_HlD^FR
zDZMiCoI+}8q~5xd)4LxB_w@Nmo@A<g{xg49<3HBGCB7Sq7q5%$Y-(j{yl&IA*~K}n
zqImnly@%o-g<P9I(^Y)2)%tsHyG5V;Ilpnvzwp#^Zy2X7J;3EJd-$N*i)kGJB@E{l
zO<W#xD8s+5r{@0MpQp8X+QL8Wo-^;+Uf)BDcq`xOy8piR-0<#2?d5O(S*$5k+I5Vx
zUH$pRzw&>RTm2?h?(lnX=4rLEDm&NDe<4eCJ@uG=viff6+xh(M5%F(Nf)(y~`h0(N
zEY{#z?ZKvpeHqb{^=zkiD?XjN=1tCt>BdcyEvEjPQ~EY@d+D$D7R$fx49#8aR_15@
z*!6AS(Os6iq94BAyk`0*<BVL^mp$?CR~?uU_<gPD#f?_1hg)}*Xh<n5CHNIr#F;zz
zUE9&2uwqdp+wzy&=V(Z#wBMg^vhd~|?FY6Sv(;)p=rGuxKCvkzU5RVf-Gw`6h)bV)
zAZyC+-Org3?Z{>-C^7e9S=rojO~3Qks@Sf~uh=UvCsSx`s=U<yZwj5?;*A((OVVA!
zwQ3I>p7B%Jb0^n2&yt*^9V>ryeH2!&YGPWId1`+v&-5w-nI0DDWqTLg-Lz`vRt3R(
zORLTtGBH1^bw|_s%1N$kEeG_JCz(Cu^x`h|ThX`2hdbN;{*^ALsiv9pCLYTzlb4VX
zyQ9cuxybHF)Vb<Owj1r-D`aDoSbhFFug;sg`@n+GCyrL<oc8?Fcj6Sjo3i`nzn5Wo
z(kyj5u6KB?|DqMxdXGh}*lFfs?ND9aPM!Ud{jch-ym}}wYe#zCAHk1$4;NMDcDKy<
zCaI_D;JQ5V`=SIbHNU3I%W4Y0FRs${3R;x0G3ZPWPq#r=u#7<K)Sz!`->5D+bY&Zp
zi^X0yv(jx--@aYO`gO<kQ-41%->z@&CeyyWpyTtucj4aMMblP1(pox$QLFGs^YM*$
zyH;^7P&93d-7$4;to<3W(>uBxuQ)&KsJQau?ca)W%NVEXSH9hgJGBHkZnEs2Hhbp1
zQ-U=WHTHe$-&fhcdYn*N;>Le;*Pj=&jCFgeWR!vu<7{?_Z{B`;w*8;qzpKAYxKh`u
z<-DLrs{eQ3(a6r&oclpL!&bZ3+T{HcsQLT$_TRtvB!k{E=-qA;u_)c_w>hzUMb^f|
zSdN36QsYW_y(Jz_?vmQkr;_S>VqVmYvJ=N=%t|UuvAdqm?N=4l?$65QpRs9%+p$pn
zLMNBUtw!Ht6l((>OjZ`yTg$WRh0iR#=69PfO)N4Ie>5xL)|vh@n)}K&o?LaMWzJE9
z$+{=yv^M4a-<>G?Jga=B?j-%Fw$fl<hKZ{GE=@Dd>Ua<;dye1b-1H0P=WG;CWop&W
z`dy#Uzi(6cblDT4MH|edrhA0wxJ93sS#ms7MLNN*XNSn^WS_IeL065o%swY@=D_F4
zZ(54Z<viNZz_UAZz1ovx+uZ(5vd;I-_C;t+E-rVByeRvru;=amZ~Sw9Xm~qze_eZC
zG4bC&EkUEnwdpQLH`o6C!uyR^=e*V~%eD2pBCc_FKDc`9&zd&(<gl0<W)EUsByM$e
zh}-$jFi~guwUnZUjXb{7%9@xv^>{vZaJXzzS~1i7MiN((znI#W$16^svN(S+TexTC
z*6VD(F@-;LKkd5fq}yZd_##<GOrr0~w^y@!jSh&Xz1@7b(K3JXvF*Wc9V}Tcot4ok
zj=wyK`Dsp3^sJ>vjn>GTFMhG@MV)I|<-#&$V~-TIf@v(tQFGRwspwn!#qe#1=I#?o
zN|)uWPk*<`k2=SqBV-xZ&(qV9KIiNA*EjE&H~Mawscz5wP$*I=ZTg|~Xx&*kHEWzL
z7o=UfvNrk6q{%=3`0j1pyW$+{l2t3!vLX*D^L=!W`a4t1O^567L=ldwFZagB#~g9f
zTqR<0G&QuZFMr9?ZUYVZk8i5@S7@yY*t%?D?C15*6PxDT*M1=A{@B0hW18;eiDx7;
zGA^=N&-=u$Ze!NmCDXIJdg~W<f3@)YXS)>=(w0uCH8*{4Jv~!BP4=Lr!ztH{ZBzHI
zm>KY@@JUnB?I7!pC*fQt9~%S)mv(Tw@J309Dy6JoV)eN5=U`(~4*$GAE$$MJ5Bd77
zNVVnLy;|(Ri}n?cQzz_LnWM#C$?i0H;{NT^v@V&>p0nPeaptU<0>|$j&UAJ8^7gcN
zhNWEl?(2(x9e!-V(ztPsps#M-H_k3*ru9=86nlKmHc1p5;abppLT;0-QunG{SFR<q
zTEz-EEt}rE*X{_J?yw?qcg$xuZkLLw)mqDUcD_y9ZhNn*_U7NX*T47flmD7AubV^c
zr6Ds*v!vI>`IjHMmwtS_puPU*pUVAr7ad!C%_3|0!HvSsE)wsq^80E{4O-}Qw<R)-
z?Z{p?+jl(gyZ)QG7@U2QpY?f(cT2T}V%9Fv(>C1uRv7FE5b)b}@U?P$%F@Jje|~e8
zzn>9rmS?7)=3FWF-^_9Cx=S||PuLsh`uXpFCq47aB))hX={rC3R~K3Yr(NeT;OP$8
z9a9`S^ZfZaN5A~q*L^)a<g8I{)Rec}H>N&slJ-9H)?t2O&U*2vjWKTbb46@|pGwb4
zeY>fO_t&kUu42<|8G-Y5I`7C?Fn_C6(ah=gx^s?qiA21d5V5JVBYRffezDI#7kdXO
z8*29aT_In_ANxS!y8HX4tfv|$!>=bZW&RFr+$Qtl^2~Gr!D;DdjONKX2RuxwnzO#B
z<^LDkGurt^A*B`D|NZ}TJVJ8Mql&VSKT<5S{};Jz?#@t7PrRh}`I2<ZN`~_{PgX1y
zd-77A*Uy$=&xJoWuX&a$_RM&9cVdET<b-XPzkFhxUoQ0ULsqrHJM);_|1QfLz8sA5
z-_{lO{6tB>yxiBxyUv>C{&~qHk>C@X{{5#!Kn8bf&e4CruBv_7a$3J=|J81*Z;eGp
zhl4tW7GHdr^;%K??thUL-3Cum?&x1~mZ{>&*Iv?Y;J0v5$@}9D)4u)O&lGU++1}?;
z;U&i+zA|aE{}jA+%FAz8ZF-Ta_NMI>T{Q=bRgNAkkaCQ-4SK!G<%s9^uR-p|-W7XQ
zTs-k+rk-KI1HE~Zj~$ymzyDhLovW69%$XK>?J6z~+8VoOZTlH_IC+;kUl4EBDy`mL
zIU&6-Z$3S$^t(``WW}Qxqrh}|&CQ?}TO7M*MO^#tt>o3RAn?QWj=t$hHx0D{y!Y5&
z5;mXZyv0EG<=4G$RxRUrsOar>OQL)2Vewe=jyc8*{5w{=2Zmm4-@*K`N7pNhZNajz
zGutM;Dn7re&vv5K^Ru3XnKFr*OQ+mk$7R&_`|w_;gL_YeEr{~^$XjUhA~gH%cH3MA
z#?QCxc7-f-YmqDo7q0pmHl=iahMIGsjbEtdms=Cf9jksk7BFnCTKml4S6<QPmWz)W
zI1VjHJM_wZZM(5WSE|t#5#483Uq5C%{PA6X^xgYrUIGhQrOkqcz4nHyecuu;*Hch#
zzvSz6rA0I6%{W)Y<$9S<PS;YbspG}Gk0q;KmKm=-{;pIw+=F-JlHD%6>@ywruj0~O
zq^r6h^N3{r#=GA(<!|cFbG?7YF8}u8Uk`<*C_e~ca$LXFPGW1HU^4fvEjpk7T04hs
zpTBLp|NMLJ`PV;uXS>L|>YbcKP=C9T?Uk(A9)%rTv5IU)tFHe(@c!A~wOpGg#g&Q3
z?s@n@>*jL}kw<5K+8!_Zv}83yTo`BeocQpYyid*A<nvCg*esB}c#g%)2(iVtxK15s
z`ZeXWOZwSck^T*#2A5^TW7clpn?CRTyYkp=duGL6KUsMDwAo2M<67tEn>1=AZh4+J
zI`04a^r5A@A5EQBE42E|ioMfPRGW8h*D+KOikZyNwyx$xy=zIpamk&jwW-VB=^i>M
z-tcnLK95y}ZbyqhO+R4YHm~xZoSy3Ur}qy$nz3k2@%7-fb`>tRt_;sp1>-(_51aiy
zMq_^kL!#^QEk>ya7B1ZJW{RKiP5CI1G=Upm(ro^}*DPOfeVP0%LF*+?G@Tx8HBI8#
zzJA7?o7GDqv;M3QbDK9k!DGwGRZQLDl`ZU_wj>=Ec+MlFy3s1n&hODBIg$U$%Fi_W
ztF<<!xSTyHT*SU<k;3`qPmD}F_DiqZb}x8de7WDY+Xi8(_EAAzXP53eby_9Q@3BnS
zd3WbuPw#Bxf7L!YY~jm_Hxt8WR$g_zaN2y;+YNj|xl`q)To9PSySDE4yy_L@1{>0X
zl1uwNN;EGn;L`tU>s`RdTrtzw)JQnQL6%i=1yg?C<aa-%_AWd3_vRi|!<dzVo-r47
z_}DM3eOY-?=I!RgK?aK~({k>l&5S!)xHNvJr$qEzRfla?C)Th{h<$wW_`3cH0c=V^
z2M+0coIgkDh4RKQ&u@E5oK~<IXS;1wN;Nm#V&)rYTl9O51dr)OB`(>AJ9TfFS$wK}
z#>Blmw*JCJYvv9o-#d&su4mV{*_`0Ipkf{BG3mqZi8hZm8QO8*_Gp>;a#`x_3rFN4
z->sZ;vRf%>(&L<p4PHM~&*j*9#JyeJzK!Rc(x#b?1spOTN_DIM1#T17W;@%mNa|hB
zo4!Snfzmu$_iaLtZVUR>`|kK-3pw_e+iphOi_}cLUR7>i<K3)fa$waL)`u4k@kvH&
zZd#bLf`2;SBa4&s&o>C_Jlp)vy*=R1iUs>6@0l?dO!%I-b4Fa}sRXMH3l~L(cU6?y
z%jm^L=JD%R+4OzSzkg=Zq9vf)7M9-Yf4t)1#8V4*{)o&9;CQuo+boN%0<Mgy9ILrG
z&re*{_vzTSrljo`^ly}H<`UMe?9bv*4lq&LqHsqrr0M$yix($P^&~`O?mYKv*{^N6
zdlW;orV8+fegEye-~OF|oSfXuAHG%zy#F5j`}c2O{_VJLxwp@}lk*b_DU<h*)^_dI
z2n)K+chyl}?P$m1s=dhph1H3Bb7%8Bb57q=Q7U?~OEmOSv*BU&@IFJa%gXbV_deTs
zY1Plm6Lw!V*`W0C;?n1}lW+d9VQP>|X0bn1-ZrO5#a%~kRi}_0?_aTX33;D(%?n!Q
zG%Mut*318QY*}IxDL?&1-ch4-ZBgH&7WfuL&$nT}c=~L|(jzZStBy=QBX&MT;EZ8)
zbrSD}U8l1T=53m-_xPxdi2Ec?*NrT9`DJr2FP)#dJSDvK@tyL(xP@Q;*Z*kN|ChFi
z?RM$cRli$K^vsA|X14Ky^!gx)B;KmnRsXh>*6}=Q>-eD}#PGaV{QRG}pTp*<n~DWn
zH*8+{Q%gX~X#SPM=NTGIR_xlI8B!CmN4wyv(cU=$=JGMW*NDphiO%+VVcN4mK6T0q
zqef4;hn+8O-v76h>DA=Mo62jOKbKDUTbTHNamIGT)0Zue%5&Gm9J%~y%k4QcKDSQb
zIeSj`+-Xm<*e@k#H!w##tEhipbot%QJy-M>AO8?kWYJx9R3-V<q01+8-w8ZeuX}2R
zt#gP<(W&e14|mp?KITaJ^ugnpt>{6?X}XsmUGl8De7&}hJt5PpNwrO@J~eB9l$-L+
z%MU~*C0zctXU5sm<Hx%v3BF`pRyN7+bWgJJRHd}})7I3oxJ+!`D&p*^bj`nd>&?>-
zHo0f}2kSNWvIaMBPLJ;LJ;(DlW6!l^1}6($4Aj4Se3F}=ed*(Lt(}V-ophTQO)6Y}
zR(RzVrD#LbM30I!g(AV``)AC!R4MsTVmte_+3RhWDRdMBW-^p7S$2ovW{A0#f?n?z
zJBvI`+4y@`+08##bcSp_&~-Cy%@>tYp-t=Jev1by%vNd%ec~Z$loR>Lc4y8sH`Vqc
z-Y+K&HaV;a3a#Q>_pN$gaKF#hEmO{(67yT%zrwUQ>$B$4ER9D(9SvKj#dWXLR4^6&
z(9RuUX7Iw@mE+2`y&r_-uXZ0{6ZBxs-fwlWYxRQQP|uA)PnJpkFlmc7OtYV%Sf9%~
ziGOm&S%qeXy=|QN%i`_!KYY+!_+_2iA<2mo{>jYj?l_pEy-G|j&bIyCmlfX+-`}=p
zLEq!Vv{grWo~8u|PJUcxaO>9c_mV9sYGqqm=Wp+0^4Kw9T2k1R?4>Hp7o0Yql62$y
zC)VxyA8K!Fb-guMue#*fE@hU^|12*Z#XUtA?bz<Yek|_F6tR#{V-bgt`)g)+uDbQk
z{qMg6x4-?}{=Tq2<xmP=;H0HGjO)L?zP|oz*l|UJ6WNcGOkD%t94%>nw`}RoIG<Ew
zO)WY19~)O$N-$;IH2bcQxh%42_jMVg+@(23ZERlHe(pQms*@d~S@$6%(r|{cc3@iF
zBEC6=OvORZugz0ByYa}j%~t+To}ca8Ec0ZgguSZM+_X);YK&K>x_^4|vD()5nbJ%N
zty`V)v5w6vzg{^v?cI%EHks*xPA?e^?<($QvoGX*Ww}-RG#gLE$A^0pI@p9>3NPqu
zcP`H6zjd%+;@0%Ef>WWAOY}}nww!UGr#_a;!Y=iPPyX#G341p<?+N+W^PjKc-R2F)
zgkm>{c%L>2;y<4%e{E~!{k4y?u0FZc@X%<L?N8S5*}Cyh=bXRFI{nIQ%U#D(`Hi>_
zr=HxutHOVx$nptiMa}Hpm!HY~E%Wey9LpVd>8V$%cCq{{O%$9i^&rUpyLFs`kcYrc
z7Pa_F$6^P2wO^}k(tb{STvfC2<ipASzg4~4!@J)c=w(O`l>Trj{nMtyuM<xgC0Bk@
z&Z<5%!zOiGibu(eLzm{2<on3q`}pMWfu9WLR+@b{7(QKR$K;)5vTE}qP8@LkBktAd
za##K21n0tQ@?wkmyUy)%NNKs+uvArJ*VR{(%Vs6NiM)T_x<~%Tnd=b`uADL3Y4baN
zj^4f=+gbcarg^Q1T{Kxi_Q~X;iL=aZIRs4Uo}?`*;CpEMuWh^T<zyZz4Ah+K7PD!?
zAvYcY>8TvK2QJ)Se@>+BLILOb0+xa<|BZg3x8AR1V%FJTeNp7gv*s0=nN^nOW;ljC
zRyo<b*<|62@NX5>Y<m~q7fZ~Q)@I5|UaQP$^D=!+m&igVzOKc+o1$2964r#p%@01j
zi)-qWpDQu~8B7&_uI^m6-!;HNtR*wxh^DDN=gEsJ=d3$WwrR^BGe$=5H%?*GgaQ@P
z1Wqu9b<Q(2JgK|L;PNKXjKqu`^52#Fl$~6<X081u81Y&nv~+iFnr23(!QrxNRUzrw
zTenO(o3efOOJ7%>_78&ZQts7x`iiY;X%lw4yGAE4WA3gC&vsnoaGO=WB<+#wuJf@8
zPAy#qivseNUdaC1ck(4CPr#J5Cv>bbjxzW<?%I-@d&Rx?RjR&uWpz#c`Nu1NO|;%v
zq`htGuMZZB_J#Ye-N<Ln8M5`X*z9BriGc0q!Iu>RBs(~^t4%2onX-0|<Gm%TUO9#c
zicGz*CCk8N%0t7ZB%|q#<^D&j{#kF6<XyN<^-@NTTTXzfz>V!i!S_PSuDn;3GvyOk
z+iep#qo?C>1xJQP)`p~?shTbe+?)cs_P)J+`}VfIqP52L$)X`nowb|K2JHP+IAI~%
z_hsSRa`m^}?f<?fQF@7elVj-3Z9!eE{QSp{9MRo!#Q6KOX5ISLR_#nnk3QRKu~~A}
zra4z{%{qR~c#%Ax@=?x1;>9~uxeBIDRo=hnZ2b-&(JHMfwf#Odmy9kQEzy6(&NNZX
zL{=tDYUYwzd7i9WxrKgRUVb>Dk^5Tl)U#!Q&kX|1blY`;&qls1EMdHR@c6Q5?vtO3
zCJBb^PBkk%xLHqlEx+q-(e}WCcP=HG4jXseiTe<45%>7l$GEk-Dn17Cy!u@bQ~vB3
z*X$UUb$fn<rT1T9EaZ(}w$~)+<;j<GGv=`@pHXKr@6n3$wtV8V?{8Set^Y~vT%epg
zLvH>Op(U#F3*t7}KNMKDzQiMPlkkP$aKHWPde&+WZhrcpRT^<_@9Cg<A5tDL>xC?e
zWJwn7<Jsx>{L}YybN8;jx|ykY(}z>%o=xIc|LELee!T4bhw3=-cgxql-x+^udb@h<
zl>pbdTS6oF73MwEFxIR*8+3V{<Ylpw76tVSV@2*wm+bv5xqSMro{h&-9-2fRRK3up
zT>ncmt>9u>|NnO)IhX#qyE=tBg-A}^>7v`sJb^*kcCFH*PDAmk7djvH`4-G%y}bXn
zg#NNQ$-V}^mr5?Mb4+Hq;9Pp}UYgL|+qbT=U93D4!a4E8k$^Ir0O>`gC!c+FvOI6;
zmfj_tmHG7Pd^ZkXnLV3o^u?bXy=|!W<LsNwrw+^v(0aC}Z_mBW&YP~p1%(~BwrHtO
zL&u89)9fY<JfD1ShVleT-k!uVOKFL~`iJbw&h^tAPWwHRJf3!7SNhWSm8pyq*qnIv
zD>q~+$rh{4`xz##c&qzNM;+7A6R*9Zm28h%HXkuM%KcNQ`@}Mi1x|f37V902ewyui
zILBMbIrz4#!2u3s^ApRQJX)^_`t^LA&&{*aPjRW8Qr0Ddlk2ME4`eP>)ehgxqnPD2
zZ-Mg)gGH5Dhx-{~wg((2yZvp`R3}&7Yfm+0mIO^%;I7(g5UQINxK~^(>sr$N=FRgD
zpXi)%ZT<(hbxW=POgJlJ<5D9vK{b<M&YA-%Qjt4cI<h(!9KX13->dEC=QDg=T&3!~
zxN5E5l(%VFVP%Wk1P-z*rx?Az@L;X7>?{F(`$retZ+7LnD#S6*EM*lobUORSEssa>
z{k!+>Yjv%arbXUz*vBEUpsV|LV~5m|V7uH+l3kO|2}FDrPG<Jm*ttUN_J-u@H(&o&
z)YMlPxhOxrG08z7j$dQ#gJs8N-kRrDcK7?=&(F8d-&b?%9+%nP*B|f3GOfD(-2eB!
zsriA2jo&8p6?Th;t~PC%Tr&ClVYZy5E25TKuU25oe3&8>#oZE_`S8z`7>jJ*9)4fJ
z1UXUe-wV=1pYhq5Yg%r)uq|nB7psluuJbMXPp^pEFzFQA#z5;&g=<`vYfV!5@sQzj
za_Hd|y6Q>HzP$N|PhRTLtulJ_=*~JG7vXRrcc$ErYZzX*7Hn_kbJ*0Aav`VGB4f|l
z9oH8(i3I)LTa%bq@WM6y=MJW<b&HDUIL~*9WB9ehC15SjFXO@wf8InF9P<7+OPu}L
zv^2@y{?(s0J&rtKJ2{k>QT12qx$q1xpK~Xt?o`U&Xjpvv!V9G-k`X%(bfu{LWv%ux
z4P(`CTXJ6No$(dt!(4aG4ULovr|)~bM@Z3HBiHFq{F>d%<=(aSq<=Xwfx9#HQ_%cJ
z8f?!4rcd%To>Ex$?%>TMaTE457Ze7rwcnl8c6;Oc>jsZAe=z+u=|1){>PeRPekNZ(
zy{~<1y3!@it2%z)bYCXt>w_5%3)@|bY=UMl+@>yFqGf-6cW3`Ioz;7n-?w{w!@53G
z{^jJbt)ammJ5tuJ(`qoh(Qas*cg~1e&vkLi>3zJByPt$L&ec1_`rl<rarm)?|0BdU
z$-Mhy80J3x(V6`(9+{o_V)X6&uD1cl$`6S1Z?2ehFtP5q?xm@J|0XZ=UK4x2hU4Lv
zndh$he?F<Z_m$R_hqF#zO5-T#e_Zfgeu3fgvb8sLie5bv;=gwI?%bfQ1@{*!uDYTa
z|5iVE>q^hrF?-9xvc<EKleCvD;PMqz$r4bgdg8HrlhT!1eXfb@d%J!fI(k%dOWn=p
z0-oA1^_U4<UKXxZ8;^#qKljcmm`mBrBar2ZfLtZ}sn!#&qBB;mtgMh?+Lf!wS}f8v
z(K;o#<57l@?-q;YXFAS)-K*(oZ{wX}<>D32&c7qL!$Krg|Mpwog%9T)-D~jCJoJFb
z%FBEA?f$)R`>xz=E8lF--7KVJD*C$Xr^WgFV<$VjCa!(z6R4DBupn)2)`jhvx%WCx
zq@9-Z(>)|$+2s0S;nl!7S~_Z)Tg4{?$#1pmduG<+6zHWHZF%VO)k|V^3NuUQvQ7Nn
zT+-Hk{SV)&X(lHQZ3|NR#J=T9Yf0^v<t~?v+}NtM7v8wvmAm~PV_NN!0NrKQx!mFh
ztG`y4isv4(&zc;jn~-R1tR2@ZV%PNLaPj$OrqkuRivL!u)Y*PECs=dQvK!lWNanT8
zzWQC6umAAR4`x=JfgYLx!HP;w3#ajJWLWk4_P?i}m!G$<-M9Dcjf8*i`rE5TZq(g2
zW8Q1Ds&9umBg1CjhN#*dyurm9PNBL>8CJM24e{c6sa4UsCC2QKbHpWuBfECE{Z?Ag
z|6#5Dub?g|P5n16Eqio}t0r9R;C(vNf10uI=I0;2_Ljwq@cVt<Rg=F?W!s)7Jy+&-
zO0ReMR=?-SahrRpZC2scyh|3FoR?z1Y-ORzm)v6R@PBoh=z*)AT~|NddjIU_%_o22
zwwzrp^z*=1jXI;>j#5nVRUQGm?^|*#mQ~(#$v8geod5hkFAIwg)t@P}wo92mx6#Vh
zc1i282{D;V=IR<-G*8!BVR`urlU2;0`Zs3#Uk1v=%<5mInRM;4vQCa#B(K>E)7Ic4
zQAyu_Wmq4KUdO5zb$0ug{O+pCDRRX+Ci@Pqo6^tvsD9@m@sFpqVrw5If0W6(KIMUy
zy2(2qk0}dRKfL5uT;zEDP|X}~!A0Eysc$AOuzyhFAOEPh|Klw`>my>~doOR;_9{om
zU-y;Mal3x;@E3nIL&DO|@m;rk!}V(B*H<r1w|;(Vv*iE8H~$t%#N}k3p0e-ivg=FP
z&NW_KeSF1P&07~VRO`Ja+8h@=TbwH5aV&M!Di4?LnwdK{W<NIcR6i{qK2Ok~NBCkw
zmC7yoLf->X`%lXp^K844RybdHsp9F2W&hmLw=|x=J@2B}*@zy&kn0t)wyGSr=KIzz
zK4ST5y53`n##b_02QA+mEb-A_bz+Za(=4aY3bRjjELWKIP4jiYo8piaYgWAfa_i2{
zWhNafBN$_^xX21N`Z}&yHPf(#=j58L3mmRYv0bIMzVE|{e%6K=H_~=#@-T(Y<nFN9
zY+fYnaKyDQL-X6=B4g{NJM8+|Z!YjWtQ4E<y5ilE%?EXF@EoX(aGKzCUsUbU6z2zP
zm&~@55j=C)Cv@wKmLr16sdHL432;OmNn5l`H0XHe`HfE%LlS2#F8tm+@qUX_NQq7J
zu7@6)Qrq%xuk}#4)5^lrapsp$)${|6Q|nr|mtG6{a<;@cWTLyr3g70OxB#sMZP%sR
zJKp41M-?i!%)6(a&9!#l<mnODvM#+}dum#q*rq_P1+zupRP6ZT$g6ya*-<l1<Z99H
zFKn9M-?Q&+n|$y4J`u$|RTUMzzY2CuOn4IWXM*y3o~@fAZ(cdl|LWb}->1?OQ?d_D
ze911_Se?Po*SqE$Ppa|i#7C>VGExJ$S2&!#sO>6o&$7SQD<PoiRnXnT&N1)(<fb^p
zDMm@<n+qRNy3!T3QA{-bXxZlIkM3*V^Xqpo+b|tmlzQ7tbO*bPYwrF0X+hh+oSYG~
z@OE6DnBM)nQv<I4-Ilx5VNt7%I!}YZ#9VEsj|HK=e{0*01!ts18}eDCc}pz#(5mrW
z=KZbUE$6qb`Nwfmb?&$Nbvc1IDi=hz1-N_v&26p;?R6+(Ja%wK^#jfQ4-GcONPkE(
zj5+?N`?AiBL*_SPw{aRI==k5Q@0KiK`C25ZIJ3z%zPNAJ)1JdGe{p|%ap2gruDq7C
zGW+D07h}7Euid?TCpATe$$RVZ(|j3oCST7FQe#%+T;}byJI3vB#a)g-yG^=!B0nep
zh+EJ4ccsc~9nLi&MP8d<8$`P-@ohUTT)t^m-Teo*8F$GrOgddM(R<e^Z^`oW`)Xf#
zP0R0muC(#0@aBZ)nk>_Wzg-Emd>mPP?A7vlm3v;c&3egu_k^mf)Q`BLJ<r_dop6g)
zxEjzX6F8ySPA}K9&a3pVgKP8YeS4RG_n4!w{;*eH;qLV_<_F*Wb1CPp#CnGhQ<pYu
zdf~KAbLwiYPI>EM1EwDro?0o0T@$?)sJxC*Kq#_KV|Vq`i@9%~ProwdTfjW&mlG3<
z=G}a(rSfM&V~c&>{s`s`$7>hNdb8cqRgp0?r6|MeamcI-+e5h&6<0|KdVlP*yO@9J
zMAPzbkG6DH?~Rf2WBqnz`b>VA&-%BDRQ@OZ$qjy4l2!Y=rnK#sS!TTW+HHk9_~bdo
z6!-R}tTql#I^b+QgY&d-!reLN{L^G=WJF{bJzTaFyx4fJimhm)su$m^$-9H!eb`-X
zadd0J{ZnCEa=5&uI+qA<ckbAdeKGpQ+ZQaMzeUXCEv|{NMs<m1F#62sta>4lq;|y3
z?0`axj8dq?^OXl~KI_|Bn06+j$0z=B?&*w$AN&2z|JnJRFI6Ow@m{n01|!|atG#kV
zIH$bwX*{mr$G)`tz3C#66<!SNs+w+=bKFk77g>9XMM283b?fpiOt)-qaYYz1hNSKm
zooLbbt75-=`ScCja;BT{$*UN#%!oMtOT)z2@<xcb!PS692VN=8GS)50&WF2ZNwlt6
z!tQeWvaao#*bl4<vsjn(%Q`*ku-G#}?LZuhtJ16!mN)K8U74bkwQ5<M<hyyCM{>lc
zh^AgxEw0_*YI|rR+x#yUC6!x$3!XL0R*gTjB|b~~<{^U~cb<=jW3&7n1q6@wZHnF*
z{-j63>vq7F1sqOWjChYY<yI=JT-d3#_2O&|j$@A3eii%*e7*5~{Jn4f+ly|mefary
z3YX;N^Z<vR*-qve6Bl>fKglaJvuMFh-NPYg@7>E=Dk93e(735aCQeTDXvig}x`>T`
zb8oL*p>*iB!6N+=-WwBkTxWCfF7&ftnLTr6(4NfqUv@FFe>igd@hqWx&s}$(J~c(;
ztx8dDSAAgYJhxh9k+tj@zt=y!`$lrQ=8D6QnxC#@dorV7PPfaaR}oY8$@D82J*a#s
zH}6LCb#^(u>Xcf?Tp3Tny>}NftuHySxTe{MDKy@!eNE>ZNfCWp-_(oBbJ9BQ%}JbN
z_(jIVGET|nc<20S&mLZ|+FE$lbI#6>O;am=88vXGTi@?~_5W4z-{zgny{xh^Ym<L2
z>74Ooj^UyB6{^oKD%i~`FMGx3S5sId>K{Aj+2wsZ*GzPmy6d%c%2(k`6NkB<{)a9#
zSsAcd^HA5b__=F+XEwXeJJ&zKAg1l}rK$fb0*xG7<X2qz5q03B&&8^L1>zjfl;o1<
z{}NtY^oGgI<Re$u@=yKirt!U=T-sEBP%PG;zpqzd+4RRYGwapnC%t+vu6w<x`>U>O
zYUjtiRSUnhX2whE-d}WLvPMU-=b_4f32!RTtA7#xyKJRXQQ|%BZU3fqzVuUL?J&GC
z&;53=bi+P3XXRG{izdEt&HBgSD!^s3Gqpowk*B)J{#P&V9^&Z9ninp}dg}7A*4UqI
z+3!B-JhB!{Z<9X9acA4D?mLm*79L-gRQjyG%+-B<LuilHzW=R#yY}<0EjYI>LVCl^
z-%on4m-sB6E6E=8U8CgZwH_DAH%~rF3h?akPI;F1Vddf4ef77WGp2{M1s`eDaLvnK
zddG)%>Q=W3lZ>t{-&Z?*Iik(MaG2$4rvLwIHRl-B!V-3`{C30keJ|t8s~v4`41#_8
zdX-BbWn8)3+Vt1RbeH6%<poxIc{>H3Jjsm9<V_Sjx}bquOY&}$6??2!<=LvWYhB-R
zszuK8>W?&?c-Mzf+|H%OYlVUQKUrh8)>}zYSx!6~7o{5OGIfaQrdq#ycf9ZpzrDsm
zjxBu#{(_=`j<@1smhP$u6~65fvf6-=D{IzMeXbzZ#2}X#q4xC})rl7hOFq622wkw#
zG-&w}E5CJRKSQpc^5r-<_uqvHYo*s8mE00k^iAj1f$AQK@WtQCbC*tdcy;@x+}m~T
zpViB^2BpU{F8Oh>zG!30Esw+V`{u^|?l?4AS$XZUGbfgtOjFn+S6IKpFo2KI+wgYB
ztSbvY2uxiR<|WwqVL^58Gl#r<m6(M~b{ntyefaUi+v@uH(Vu)AFDr619^hCgQ6j9(
z)V(WkP9U?Q``Rx89)a4w*DcMtJ?~~(f0c}2VfjRM)?LQEbGEH~%jNa;m7#Sw*TS@;
zhq7ICXKxkQ7NsmBl(u1!$ojf7wohDz_D*vvmR+07)xA&grT@FWr>9P`OD9%d74YA}
z!g`av!!TWvgLCdR?Tp7N#W}3S76!HLGMlo`ZJBLp61#flf~fRVJ?<B(_ujj{6S7z9
zPdNGM4d(~mhh=>MbDh7k&5382KV^#d!kf#a(nL1TlYKGI_2!<MdAE9}PkiPxbERPn
z(}ra~++1z=<9hzfDs_HaklsA|?E05e_dJfVE<CZ~=R<~%QC77g*RA$RTl%hPpW(RS
zpRRvcj*eTxpNRE7>*^*gwU(UbyynD$K+f|A5)Vcy-FdygJLFC0%}FeKGIkwr72P4x
zp?~Gt(u6Zt4)|?nS$=$ax@eu*{Ng;V^M)(L+Yh>HnV!h`JgurIs?0={F?`mUVsTx4
zujgSId@F*t^XN3Zmya>KzUTSUtFsSHVDwkfmrGi+N6GH)M`nv%J?de>r*1iK=%20^
z|EDH+v)EIwz`31ir;l-Ht2szS-YI{#?2*yA_?COz(HSY6&+=uKOycNbV>U7J=$&KV
zfA~*k*L>!in#|dog?LqK!pzvJE`QGW&%bfXmN%S%IYD>owsD^}UL(MLyk_Nt@b!P^
z+%I46eSUVvJV%4rtrwfDZ=U!lGe_TtBha{ufk}1B2E9c8$jWIyw1O|TTt1Pcdp=#Z
zYF~YI<DFwhtENk^#@KT4PUG)CcARH=>#J_{DiN_x>FVoZ0c@|!E^pWW{VntIS-xX|
zk1dv1-JNW|K67<yNBfZvi9I*{+OC>DP?K_ZpE>#WvfMc4C(+s7ym?B~L>I5>-u^t;
z;mD<!)2a*)O&S&akNy6~r>w;Mr(z=S`CBtwlgvX`be}6&{nX~*A)yZixdvJ;E^SO(
z#d>rOE(x-FBCzAjHwRATkR0t@k;<1=O57^n{$RCKMTkOF$krC)+{K5r*d^4TP0bCN
zb)!5z#69G!Z)#woV$!l`W0ffWyXrSf*1wACx|O!gv+t_9t(BoE6Gx5C&5IlMy<_B;
z3G`9<u^>cr<F((WQRmWjiJEp4-8Nsr6>FugUES|?CZ(cOa7wK<YoSH6diyVr66QTs
zE;AzaYAvevEVuQqC|MJBT2@G5kDc88)7$U&y)Uo7?cBGXeP6rGZa#_4vzD$ny6sZX
zy;YwrjwlzEUR`TA>&sI1?7hFgGymTHX<P30+y8!^-M02lmza0qEU7gs6cnV*x36_s
z`!Mc4|9*MX=**6&i?`E$Z<`w1_=@rS2M+!Pht5PX$RxBS**sd|&RscUQ~2F&cP0Jq
zZ{1?H__b|!B=>sW_z5wq?+aGUdm8`6;;qXp_WKeoJ(<U|ZpFI)$lZP8+g&vw0hURR
zmn@ueR787~^6gR!CjWD*A5NZKXwmJmAXtgD@^k6@#X3*Fm#UmB_PCtIH237K;<<)m
zd&E_rR~*}Z@z;}3KBtorn&vwU{Hh+dDiykN#=39qJ$O*fO}g}kO7Iqyiw-?kjS6Ec
znJ)+J+m`OU{9{(M-G(;%S=;}ut~s+=<n`li>%!+P4E)QxrLM*Q@^NF$j`ZM@TIVfX
zH8-rkRywb8^}n>4n|e3CGje*o`MSj9{e~y{kGSWt)&2MwxTy2U$x>^RsJS;k?74Gd
z@1Iw{=18nx%;u+Xbh!l2Jci=zpz9nWGH3sm%;MMiVKDC_*Kupn7yf&X8$GdF+qrIj
z{bOh0$Kofw^|$PD{jiwdo;4@E!thBEKd0E?Lm}CPGamjHcUh!)b{F5A(@QH1*L>lU
z<eHOoIDTKqI^mXi<=j`*)=kYQT+eth;f3e9w*FZM9{x>?zH4<Xt?jX=K#Nn8fhgZ%
zkNtK^9bev<o%i1?Ct4KwMQ2)T$(ta*vagvAQd1be)y3}0wd6lMW9E79@~s^|U2e6=
z#XelR=v3e}XU=Tr)jxYqpImb8t6mtF-G`EedZ(XtwqN25t&N*~=JM`bxvZ(L_J&JJ
zoA2AvqkcU&(Y18?g4HK(r&s^Zyc{dL#<Z{bos`N_zTf%AZOe{Tz7x9L<H;SO`h3qS
z35UvqrxmNLC+Q>wujt^GQ=I4SDiAa&{*%`7Sw_y8mHz^QIwc-9_NhO9@_F0oVAZmM
zaOMXtQ(Y=_15aH~UUx91bwYzq+xII!bCj409a&dgJSFcfR(<i3P*Os}i#dK@e%T4J
z-hZ^f)TXcctNYi(mNuzh?&WSvpE_-eiNm7fFQNn!H6lc!R1O-=@fHYDu{ySOx9rv_
zf6vO*U8&kI(Xo=#H0ne}=k4aU8O6aGZ`Qr)+Oligfn6D|M6&9NX1&~8>uI~^F;9O&
zNM{s(XawVg%&^>$i>LaImJ7+Hh|HMkv^1pZyx;`BA2Q~%R&CWba@E>ucj)x@^%hOP
zzx}QI^Y(82VmqbH37SHY_oD-6Oikhu3OzYN%g(uB)=w>$JFYDS|LZ>N{v9X3|DNgX
z*+HjnZ`+&u`+4vD?|;`ixURji&(q~f)al0y-}ks55$rISc(&yBf@{Cpk4>5smFXUE
z`FY+VuF(Fd?5`gh7cS|Tnfp6TIckIWtXGDMTBCE99g=$ZHzxK;M#Mt39V<Ru^kkCI
z+dXgU_J6I59(pg|v7z^=lB_Q`>*l}vwQl_CohG=0Poc0XXMTY8<BY`VuKu1n;&Xk{
zmHl=d7hV3h&~}%K%ejX<ZGEX7hCdI!`PH*4KkAU4z}#ggg1OGI_kTEjX42jnr#e4a
z$n3heZ13YcqRu8)Jo2-mJ_OxKZmZGXs$P)V_91fmIr+?u8;|V%Kkq){>Znu(<7c~;
zIltZ28l~5I@o98)`iY1|#+3_Y`;L0fpQU?khi9g>s>(NhPMg0oPRlW8Hy2J4vQ00Z
zTO5Aw|DOFj)b;K?c-JG<QNLi#{9E5x)kW$TxE?m%o2R*2Q~NG|_RZ>Bc3)@eP5b|3
z)vrmakNnRW&3S1#FTZJdSWlBe+pO-~z#S?@^SZ15EZCnF{`OJza|T7do8R=ORL1zN
z-O}T8?(eFHLUB^;cIs{qrnKzR3H58tn#*&M<<QKhLV~$lSDZ3C!QbVie0A;N+8@>C
z&r=lES*U*d#nyXw%PIz!MF;(EwefK;<DSy=xp4Cx?m2h1PWwK4@{hP*+Ydea+O<4z
zMVoI@VAxm18ttdIJF*her<`yy)BN=L`t;7Lcm5<bK6*UWBWJ}%3zn8H$thRoW&WC!
z#u#+B_`*Hy-^)W<@)#@|Pb6$uw6sB*<L14tcd9OnKCR%?6znWBdH!xjb5vE4JL|5M
z8>bw}%X+0M6_cKHap_Xuvuwg%hCxe0ZiPQ@T=ewq^6lH>?dtm;|M>Oq>)%5EpoQ(#
z9=!tjy+4cYEEkErttorg^jiNK1$X9EO)be{p$vXjKb6a_81=t7d|ZylYf{poB+0y1
zF425THG>&5bC0c)4ZJFIDbaviI^9@fiL+N?M2P*p)$;D$IZa)S_n2qRo*if!vUWuv
zM|0kpvkdY|aUxM$Dt5T=3UbPK9XR^-z0j#WlS+ekY+81?Fw*e2$pTZknM$IJ(i>NI
zMVS^y<Q-_3kZ{H9<+cFf(5)+i<V#h!w)bD2`|#&oyL<2B>;8Rux!j+p{?1ENwhx^b
z&tG~Gw=sbwaBpmZ)f_PfmGhg!+FnYSG|R<lhb+0qJ3Fhge*OLX{p?@l0#9u@dRgb`
zLwAdl%30x@j$1FTpUi%(r|xc0m-36Pb?X-i1P4xC^mJS9{Oz~>=iB}NbhAu*`=-0!
zHoe@|eRFY6{_Su7{=L3_{IB8>=RXcPY66o5L^Y1s-nrX6JLmU4*_sKjcr5wWALr;i
z6Ks`f|GxjbU3qq1!x<}G&e+t7xS$7_ZuYOkw=@Tw`!)ApOm>-8pyWnLWrf+9((f;G
ze%Y8D^su;&<+ZA}&PDrN#ktNi+srd8AKXf4vD8<1_9R7gJwpsfm7|skvzNrdFIL|l
zhp(6XEFZoidv8aYZG_|PJq!kx&oi6<PFS?#-WHy}8&_@ep7%uA@OOXwf`rI|1fvb^
zJuAC*ZSPsjnH0l){Gi3lYYWvD%}oiM*!pFIR`7}IOW*!nywJz~_4|GECq}pZEKom|
zU&A;5X+(Jf*RgevUq{X^dZpKLYe9SQ8Jqj(uAH53CYsU7c<De=FZ=2KTf4>C7%W#B
z>Ae*`f0i-6=BCZ>%gb&&RLx5b`IH{Yx9-543Ck1IPuyCtKTGJD#>D6A`7OL3`7ca2
zdKOlcW<Kfs(pl?n7U^FtDe7AmeP-p^H8O95n~s<ye`?;b>iUCOt;UK)k=5UBuKaTM
zu|TK6`DaUIUrs)j5$tqgYH`=OM^BO}tausR<}FBSJzu2hoEo@N;KM)9bI+3veljmQ
z&|h_My6HuY@0Tx2vMgGDZ=>(~<F9jzcj+8nvi$J`r6djcKXTntd3@^r=Jr1qf7#|L
z&fD^!q-Fc@pvjL?X02Z<RP$2#kJ`G7`K$b*jl=F~Elsc2(fc@Ur<0}e1_RS6%DHtr
zF7N7H6#L4{F+lmj#KO<Z!&1}cef=<XO(UbAu&zPlUP;qZks!&d(SKgtSi9xKZEp4L
z%BwFLpD{ijytcJuk{5r<q?LRxj_guD8nLqDLgy;iFW>zC|EvF3|5yKifBpC8_CG)V
zSpWa;{(oQJ*YBMv_2cd9>;FGq-~adP`*_#GA+15#wKl6fF7V&pIz?#aw?}_3g&p)r
zn>ATui_2P>T}5xi&mBI%_Qw8Xm(?82sPhLj9;5~NI6qbVD3E&l$r9_}%e^^iH?yDp
zv3vM!=DR5qw`8TI2I~g?-EqCebxYt1#fE9Mnhk=>ay8SUyeyS`o=l#4S4ZQ0oIGn%
zRGIC))h5-M&dJ~RI(}Y#bnmQ-CEDASCOrOoqHMP(x0Bkz*_XF+1f5jUS!DWSZc%_u
zYhJIVc45VF;njyE-d!!$yIq@Z)L!_cu>QpSx?f*DryFf~5@J=9D)J>_(Z8bF*X~QR
zZkjn<5y>(87xMm#bA#Z-lgwFChdLTwsQuXUcEj6*S_|#fmsN$LCJXKfypy$Vp+sh#
zYUV1hrDk>R{+;=HeheIH#}!+`OjP~|_P_u7@$JVSzkdAN{{Hszx8I(=eY@O$e%-H<
z-zB&2m+d_I_U*>~d++YwYrjvv{rL9^k({M(xFU3`LMl|R+PbW{5w}8V#nx-Te}CLz
z93ysW(JaPCTb<aBPLi8<cxuwL@Kl3iQ#T|@>bw8UlsTbzB+#VPCHtTCf@QKl9=P7J
z4|7NeSa@-nIO8gXpiT2nW=-%3*~k%pYRR{S>Q>KKbi8joW|}2^<@lZx3pZ@uwZ2{c
ztLwkh96wUc<}PVFQ*?xTyM~_DzlB|L7Uz4up6K7Gxu>E|Xl{ea6!lksv~1_f%$V1u
zxuB%xqn1}oN7s&aC+4JOmKPe9r**z&c^uQ{8zz^^_;~+3_U%Ib(|MWashz7|lNhNX
z88K;|ox|+Yn-4L_KXjbdT^YK$LFhrleS7_Y1v6S+ZA)TuG8QiMRr`IntZ;@@<s`}Y
zZ$F<$J`0()Uh$y%Zl*f>7k`hoE#~yqG+H=I-~RUJJjMODHXe^pm_O%ma7^dam_Lfy
zKEl@nEie1+Qk!nO_VW4pUrtS*>S*(3O0nMIm39(SuG^QrbZsczUOw%ry``AKocWKh
zJS>rMK6B&VzJPp<CFf?@ude&GOW2}zYDR5lx}W+ihx0F^|0?^l=WaQ<e8KV^RV|u9
z3orkAmNBLJ=k<RVPp2Q33HrW@^>N9mMHXzm?!lMt%dLFso_%UcR(pD}z{J+fFB|`|
zCmr1Ck-FuG<J&IXmnnPHjxJpG%r5)a6$|x<WmCWAwwV?+_)nDME$ig0d&|$km;9N*
z@zkMj?oCrRJn?_*DY9Hh%CX#S&gWgb^(uAxXDRzxt-CnIEwiMI<ALD2hjKBp#<M>3
zU)gJ*_`*q)Q_1Rnj7!Gf!za)CZdxpyBJ-+=N4D4b+3t%XM_oh|Z~NE(YXARX{~!DM
zpUdn2&vw6KcXIxl>H7b_$p8DaT)*vm{QqzJ{~!PV<MjRdzn^~Q8vp(F@9+Bhzth*>
ze>Z!_uid8$C*04nu@OqsD+vfuoTc}KgVV#a^UUm5%iJ7AEuAth&)&A>MEK9QH`Xi-
z4x7WZOhDO(-y=eLLEoCSFH+Ytq_BpVp4EN#wP5|(dr4VgEk!nRUjy6cEU<PD@4q%_
z?QMUb3D%J=o_pl~*&YdYiJKz$^_|Sc?#kGwn-2w-sNPsJW$l92yaQ7eAN2-qPBYo`
zk=J=~9*?NC&Awo{cW)B5U(7H%q@8>FQ1Y|1?64M-%YW{EzV-dpR=G0mU5hVVIXgFS
zv0YY6azw^~(o1i)_9Y&B^&;xyh2D9ZN?KuDbMjMvr_5dvGJ)-*K_i=^K;aMP(8aT|
zmh)Z}aMfJeD=)erLan^~=K_-x3HLWn2(*oKS@fW~_tfL)y{l_>*WR_iZ}<P_^7;QB
zP6u5`zrXg@_uKzoeqKJm@AviJANM*H%&_4y*H-<uHf;UZhwipp>{h+i(<-@kxIh2?
zz5Dl-W7I05PK7Oa_3+kVDJ7|MVp>Zz6x!vdPdWAT;J&owx}9H+nA|AzoR;{!@B94r
zcZQQCU*0_@ma7t$yF8{|%<Ox{{7uX2LQ4MBWG3EJx8SXCQ~iG6ywBSWDXC2d<b~Tm
zS59f&d2+VvPvfW*+2!8@cc>g~oN;@3YMsLR7be__uXDEAc)knwe_OL@uH=Wc%LC3P
z&P-wRjowo<>G<@j@^5m%A<Fd|%+&3Sg5zG^p8jT@_vS4ft15K`JSWQktc#zh>n9m2
zdGOedo^JQ9l*f_#Cs?l653#;~$C7c%PW_(9&v8~<mNPG;oXnA!)VJEB?X~-c1moz$
zd&ec3mplL57?-tk#*at5^A;5gKmX#s!LrD2;bDRI|Na%_bg{HPmO5l`i|_lVH?HS4
z%nA12VQ}>FWs3!x7jtWJAAb~F|JZ%{=Si1aK67dQZNI){b=h7g6Q^05wmSzL$on*3
zYce-C`{_B`wY$%sx@Wk~S6(Fg!!k#e`7yaF7Z>&Kj+%RthwtT{bIaD|^mnTl-rKzC
z*UG>%k#)*v`&Tdgxb<S{FWY6R)fOEm9^BWr-+Xwp#oeu&v|hHeW#0W}8m@Hvth)!p
znVXmYPv2snKS{-VlC@*v#N}D<<jUhpOBYHTD=-<boJlI*|NnTzqMaP&i+AnPUT<;d
ztj~Oo_ww&Uj6Zj*a0?6!OAA@jF1yPAX~Mt47KvBJ3-T2d8btS5hj1=Vn)GN2lizEH
z9d^>Mb2eq}QDZvl@G#_nS?aUfk8e2&oo!T_I=Mk_yW~w}kqa+(mbEN)dm2}1p%<{0
za}%3km)LdnSy@q0N1}ZDnHDUw)!4GpZLh`$-=jY2E%}CwuC}Kaox4=664`q&bFrI7
zOyM+viBCSM-Icxf_ruxK+n4+M*Z*2BZ$Ia5bN>B(#d2q+g|EN#zOH%u{b%O+aUDN?
ze}Dg8E^hhLcW+Ny_dY%zU;jtG{?}*wKVKBRr!C<>UFGp?<J|4KhrFe(9BXjW><E}J
zfydJA$-ZfOXYKC^+xg;|C+nh_tlSA{roL&6JGz=(JU>bCb6km-@X>g+z2lKHUO6+C
zvaXs}{Pyaru*HkavTJ=7R)ijRY1v(^crduNJHb%+pyu(5wcbf>Stc*vE-@*~e`&uf
zOn;6E!?uWfMLWM}2^YJCXq+*5!4!DRaU(<IhZ#?E4B!3Ba!{Qc>wI5#vniX0ugamh
zfm<3vE`5I1`7ZjN*`+ICp~6a=cK@CucS(H*>kjKbf=2A;gf(>6wLBNP%OlDw7a}~f
zd3lGynoL3C4p#&Bb%{c)$yr)FRSp6Voi7IdWmA4xsFk`(c%hH{R{z;^SXO+{o!4h6
z|M_R3b^YeO|NhP1Eq;2}y_K?;J{8!_QL=81T%lC9U1#63IomI9%U#|tKU3vVUSv+{
zhnFuatYnurE_X<n=8|&Th+*|X?uVA#hkb&LbbkgVv)cGxyumhgle~uc^l<C@B{|mC
zYnHaiYhQfYxwJ`tBa4O*L#S-@?5OItmY^1?pbtuKCtP=%c(XA1l)BvCu36`-&T)Oq
zyk~QimCN&v!}gwucl7RT$n;sh{N*0Esk0+z`%dea6nV|*hL@X>`y`1n!-J1^p7Sm(
z*3aD8{6F-1472o<d%>$uZ8>eB)!)0-TTAlYtrrsK4UZK>2As<|c3b$->4@96H?+^r
z?w4M@_*vVVq&B<ghc6rtc=sJyIwj$MY?iosBKtCtl^e?x)TO`sMRlK?X`ZuV(FcXJ
z)=O8l53iXy*YnkG{zma_SGy0^h3l|t_DoxUxz~Tw{QIZFnLP8~zM1FQbv%7mYqwnP
zp3CWJn`iu$So?f$*qJwHJ7wo~ORttL3i`8Q{hf=P%Xl6x@s<vgjj4~(UAW57y2{`p
zi~RF<8y?i9ntp1FUUP85>z?kN`eI5NR~{y<ST{9k;mwna+jfaJ%??t%=9B8NGD_>#
z;xA%S90H72i;e}E2&@uVFMRW_d)>~PEA0K#$}>LS6>@WXFI3yq<H@VzyufYliYCFA
zY3*CvAD@XZm}$JoeQN)Vx?o2>K_R9r?_Y1+78(j(VcM!O`K9J<$JY{Dms?!DBYt&S
zP}4K7<E>Y67tDXMv*C+WvC-q8-=9R8F9n}r7San1R5tzlC`o0*e4d3j*j!wvFnV42
z@Z^}_r6Osc6ZLO-g_qxrGBOR-E{f{s&~lyJaM5MU=KU_~Zd?$UxTfgKiF4xFzvkYv
zzq8kGp?%%2FJB%$?%!Wm@~Zp#{j6R7nSmKm@4}03RNb0)?%dOY|KHx0>&M;ioD;vV
zV&O)+x&KtEYJR>oumAI~o#o%t<@;-@{%l$I`tPPWa`u1!d^~(|GZXv$AiJHPGzur3
z__5~7nhUSp^{T=J#rSo_OpSRJzVBqP`qb?!vV2vPo<uZ{g7OuGEG4$qS%wmp<?myA
zOFA>QJquu6<91|2W~F4@Bvw8bq23PT$30FlA19eFUo|l{ZP7Hd?$FwWJdzHp%UTxL
zwWmmUZFPI@)aa_hnEvQRYWSwrR$1zoOT5-J|FRO%O6n=xFiE8F`y?l^jg|Kvgs5;O
zc5zhw{9k5w{2SNJx%VE=EEeR7wBwra@YOtlnRgg$b;>gXg9}smQw|tMv$SoRuvtZ7
zo_U~lzWZb|mX|U&EJWt#-o6+#ZFbfDF4NS>y7QHlH|;om`1bGL;osYLA1;b`AJo-R
zb8km#xz@thN#2isZkfCN^3ORAliQze==%KQM~T(kSq6(VE}3L1sou%n64o6gv~cCJ
z5Dg<%9W7@gb2*vV$nKRf4&L3bXZk5$N&PPKpkw>Wm0fQNv~z{0tdQC;jbX*oxF8nS
z&PVgI%I-x;Oy94scDG~Qvw6wq)0H@8g>7CLzcoCf$~5rZ{Y#~9?Q_-$^T&Ow?h|{x
zE&b|sN6)nfc1;aSKXJ@PdTrES>61M*-{f!a*mkk9O<;e$N(R40S)kpkaKYTCR~Bcd
zo2UPu(3R74eP80kxbwZY)sp?Jx2~(d5L;O9I%Va7cYb>$UdG+othK(#Qr+{q#Ky!~
zlise#Ud!^TyQtua@HF4xk15;V&kc@gI1{<%f$^ppdoS#3jNJM7<!SLZSN@utovw_m
zIk|5BotL|Q%$e~j((~}Tt~qPhZf`KINPDMmE|O|K)lryvoB72(43|%*+?xBv=uY<U
zgOz=^I^N4v><#>Azu)CRZ(sb9B<Tx9)vvUVd}%G&V4t#F!u|OKV~;?wiCpEsUMGHO
z<N2q?UKpyTHPdIxrNnpl-7oanPsp9p_3nbk$@fvuei}XG7kxOFclYB73orG!y4@&y
z>$hlu->l-j{&Tj5+p*>TXpqQX)Vj4TAmsA%JNJ9<mZmz**VFl;@IYhD->id&W;k8>
zD%Q3&=fXXa-kHvfSAV(3g<AbKNSN!a^XsaGtwU9QG>7qybFZA@-M890D<1Ei#5z}W
z?TW>Bc5V81XJ3)tU8&pim$*-e68mv6OzY|F6;EeIp6t*JJ*Xkby~Lnp_L0a>&phV6
zQ{Y-*-10?j*Zw7Jonfw(ac|T+?RWn7dpo;jx=?3ikj*{Gd2ddBojbF2{wAFz!v4>F
z#Y9ymny#$6X8-@oj~V^<@5=Js|NHM@;IudOpO4pnJUsh)omJkXxIJg>YyKZzz5DC*
z{ePZ*ZJ+P?eB00d{}taqF4w(z@iFI_*zhxR%NOPz2w7<O{a@%G$JPh~&5**dlSWO9
zr!H$p%#qGY%Tj*5>g*Lp;{zeRhd#(WyyPjU`0${W*SpJECGG21P5n1n=+Qn0#gd}W
z1ug08WhT4M?%!Tgej!9Pe5>P{ub0!NNeM1na#>QT&1z@LuGP1aN_4tazo)zs^q1be
zEa<nG>eh-(^(#S}rkq(L!!cQX>47Z)rO952Kb{Gur8OAc`Mk{Qz!{%Y#~;jDd}pcp
z61R<a`rKW2@0xu@Fl@2y$rH~l=9We}3(UIq)$jc~MULfp(xxWcKi1DQV_x-h&r_>v
z(F044onrg`UC*;nKa;goRm676?j@Gt?<-?-WsA@GA738-{@wjO6+h3u4$r?Y^X|Qv
z!qTJu={N7Zj8U1LUuT!Eetr{Yn74ks_me$y{ohsUmuc)xYWcls$`1X`ZL@=2gLHR^
zFS@Zh;;n6{sIjxnmd-oURt;QQOM2LMDQ_v5n=(g7bi-oN7v`^iMBSb?Z~B)^hsbl6
z8?GB}F>@DF6V90*mwk0+p#|S^ORn$E51&b|UELXZZ(DiBH~I3wok?~3;w(R`;#;`j
z*V4(eF3NxSd?(h>TKV8kqaVKnR=nIgG3C7C&a?eDga2vkDdnA+R+e~`S;F~Da6wc2
zU#a85Yr{8HxbfDnUixaYV$uGk@@X3sJ6>$Q{LDeu>)qT&;T;#&l^lQbgmu<_%cMz`
zXL~)Cs&6zlt*X)K{Cj8DOs1)8ZD;)9*<m6Y*ZfS)`P~%VxDQdbT*4wIbKTk5-f=0~
zHBWq@VQ};DV>S<Ob?4uF_YCXa7~gx8{X(rwX3omSH`@JDd@386WolpU+i`+<TBe;H
zgPXYNqj#-GTU%$?KHH@yw_G=De{OV0ta>M}XT;8o#X0|^i`Sb!EN*fAT=YGpjn8CV
z@y5($za{J4rR^43{w(eIX{vOpca2NH0@I2Gyb8M>EKlV>>t%E^amD)HJFOl13op0Y
z>%C7}esyBt9lP0|Hh7x%@sw$W{69XA$s($F(f%3J7AYyJn6wzyUkVic^rq*uUvF@(
zQt;$`i>CG+I?}Oms${ZWb;rlPPOYz(mc|)&?lAtl;rH?1iFfuemg+29a%rj0h4#JA
zcZJG2yb{bZs!>145%Q<rNBdaN{SFO*$xk_3cLdd4ZB2i+xVp@xtxK<)rTgZM^!KON
z*{w5Qy!^|=KkuLSJpNezvEs*%*Q<ZLGta+oXS3(WWAo?z>G$^5N#8E}a`ueKuDR!)
zzy0#<?rn4ZdwH3`vHw2(`}q9v<x^ii6@35j>TcrR|FiG^zpM38@&BGXdv`@&dibzc
zciOvoaVz&${VZTVRsR0TbNk<qkJtbD`Mh{X`v$Kyzb4IBJ|tNC*}PKXb<YGZDGmdL
z$QjC3jC(|$W-TkdoLGAGd7O%>Qc{C%_k?{<Ywj(|No?ZzAvW>m0rewq7B{Yt6bml)
zRTE@ecl13g=fwn(;#IoW_MKF*40!m?l=aHAWz&VGeUX;XQLLVNdE>!Z?nMDBFNwuY
zk`q}br6m1QE+Xh<Y{!d=5^qoM!uxw!SYJjyV=Y}CuJ3W)bYGUO%!{8jT^|fW8&}K}
zSaSJs(IQigmdPFt(OX4SFR$n6buBvba!UX4!+&?XH>K@bbGphPI;7BU{uj@4&o`%+
zmvaPprEzVX@}<Tuoxf^<gU1D~Yo{CqHwiSplDHZoy?)KBqu#c$dZxlxbtennl=r{=
z^Wz@b^S3jrf4)0=`0(Gv$V+>TDvwouKC2tE$-U1ZXZF(x56W!jo?jljM7Dc<w5@eF
z{}QGMZiV<sN~+8+T`#fwJ$zq!f4#vwlg|uW<$6O}u5Nhy>AKjO*w3u5>Q8qCM<*Ky
zs#!m2JIpM_qo2sS#_3W}V|75|q&X|1GS9Wnz8RUkb=$tT(+<CzocTZBWrgIi3svq_
z`#SmeJ3V+icZG}Ls<!WvT8AF5I<T{+N!E+GZezf`z2{z*Ey;4Vy!zj{+|ed{YRqKU
z#8nrw9v-xv)B8EDt}E)#>wllaH#MZqPWbNSl=i7s)`|E0cM;}mN;==0+Gd~Te{hJu
zhfny?49@#n-Rj1g47_f;-&t;`vv!U>%Q>s8#(K@O^f%1cn?G=?#|bl+@4Ij`Ei<pG
z@pe4lu99N&8PA``2m5nRy{+&%I#S?wdCN|QSA0ra9?jb!BK6RF-J8gTSIV{TF#agd
zZoIqut?|9Mb%k05+`qXO&p0?Q?pA$BGs`vEf-QSvWGxo{6xwxLq>Hg9{~eEH#F=Fp
z7hj(|VqwIH9b>y?s<gNyv0fRGnQIYS~jq7qa8T59fnlj*Btc;NG$gN(nJ#pmqO
z*uwK}#esE--C4a}Q+8-CyIYV`A9`Nrgq%uP+P%ravmFxWxdc9IU`*{z$(VOPZKm3<
zgt`*bz}MgOPE2%MGV#;QtyTLKo4B6L-1PeEEN>s~MJ#iyJkK2e+c4>vfVpE;SJ-SJ
z=~RKZ`TNDQRf>g0rnbw>ZuM1Cn_$4<))>RJHK$7HM&y-JeNWB))Ojz}zh9miI&q2b
z20w4N#3#?x_~##sueF@HZOMO^+FPIO=h^T3^|^ol-um+IyYDO8DoEa4__82E{BU;X
z;k$Exo?ia;i$KwOO|dzu_8(eW@2~eP-ZSmX+1dH`_EfyRefi<Tqu%1SGG7YnPc2`5
zLHXU}eHA}nJ$$?R`maat{@VY2dHncs3%jZflb$TGJhy7*hRY!l-rG+;@Y5{hjbm76
z`JgLk+ivz%n|R;uULEe-^8EA8n&p|k+f=;E?BupzwzTnHe6_thU-jI9#Sav>-fNuY
zGCA`s)9S1`X78<YJ;j}PlD?*BGT;4muFLJ#RTtjW)d87Wa)NwWu@Rg~!V_M+>r!Zb
z7g@9<bn-_%P0_?xQgX3Zj>;uRO}(~t-imX-4%rD!jN<C<o221UWGnMx(_EQDA5Qq0
zhH8KDV)_#EE;hHM*VbFnt8o3c8Ot8!iwRz8<+FM?ks<grkGhQu=jN2`DBrnytByb3
z_-VI}ET85~t_rL9{r>9xn`RX`9eVxn?(;KoP8wdhxwpjDPqh91=8e#OZdH{n4Qk3i
zO6=0^+sXRB@Bet${Pgzx>4%%Hig4VYCf@$_<Aa;$&nP~e?cH~_|9HRuMCL26o~d+;
z>Fnk|q3==En``FFCi!sdDy>y>tF>2|***+)S!!|o)Rg^a-$y(7ZxnZMF<J5~+IjWz
z9l@EmuCdkf$ChSCRo#5_bNvPBsFrS~oc&@)70*^ajO%;K^!6!>-JC_mY|l4b^3~ET
zT6L?dh)tHEI$Pt?4105p>zkzcw;e5vEaAKB+O6{Q^+%EOBH!mrr<(;lbG?&R&a$ZP
zb&hbGTz}sVv-7=TD;(R?BJPH-ms)ovKzue=O#Pd+Mz2qGWvZRkUe2&y=v0UP^fQ|?
zD{UO4W9EmhyPKuO6JL2d|2x0q>l>RV#Hin~Uf$r{_tCmFHPv#xzM9mfJ^6|X!72yW
z?O5k(Y#5gNm+`!};^C*0o;0;|cE3?+XkFEG`j5ezn|xc-YRkTg9IUl2yqU*(Q~Pw>
z>sam?mAilMQ_OfJbn#nm!FKnXE6w!p_REA^(yGW|JCtxgyzj4@f0)#v18Hp<Q*B$a
z48C_2I$k}oQasa{H8VtnS2BE+$eU}Lo1Lne#KSjAcsXne@Tv)wob9?=?z98Tg-_yZ
zzi#4M!S&_#mR%LE-phUZl(2>0EN}gTwv+oGJ)6ayeT8ua_W`XhKGzRbpLxm1d-?Ii
z(-#b;L?yERowHnF36Ht9?B1^T&(}Pvx=?<l_Oq~eY3j8#$I|9CW;s4wxcGguc*pF8
zNx8-kPtNbtu9Z!D{bkALYg#`Y!hbGIn(AS2;J}>tefv4p+QRHrow^>#T~+Qe)XUV(
z|JmYl#CErP@R3!scz4u?Dx_%E@rFi7zIWseoV04q?duVD>NiKvVVX6sfyd_A27deh
z&;P&vf5HB5|CTkYi!JZym)F--{QY$G@ZIQ#iJqquZSUOIJ}~+Arh9ky{&;*h>Dcyj
z{q_Gmi}w^&{`z?R_~rJJ4b8gy|NZ*-cm4bQm6iWD$QYgU=xf%=`o6y4e;i-=I`xZb
zDpxOsAG489^kn-q>y2sp3Du*ICYm@-a5nSOV4m<sDe%hkhkYlzPhQh-bj|fVeafvn
zKfKedJ1g})$BJc{k(UxfMI#tioV#VVEAlEITWF5utFLc4MDNQiD-Ft1VwxdxaH3Sg
z2G>x<3kBybRTM;|_EfHXo?g6nw<J&aucT#ZTaqmL_!L8(4+$L=j4t+MRJA=3U}~Cr
zVY}0=F0af5E$$XqFBPqr^0!D}%9*Ip%||aK-9Gtwr@&$X!RZMP?&)e@nlz)rZeFyU
z<dLIu*2~8|n`3@C_*{C9^gO?tmn}n?RZ8|&pV%h3VcqLZ5kV^)CwIQ6yuGL9i;mXp
zSxRObMNS#m^Mozc+HmN1=#>1$attfaUFYmrHL=6^bNlidi%+Qm&Xbp=?yUdnw6?7*
zP|jT6|LBQIYx(m!daTd7)PjSzzVU0zygZ@Jy~SzT(I8coDog(m=_hift*b61{ElAP
zrnEBBGs>iKtNZm&yjMN!S+dp@N%m}cFk@3~)@(kW%6ieohZ`oC-JGPcdO}jx^rnnZ
z-3@7rmhx@+=x;xfRk&@&Zl@K04wShBXRB3DFWkbn&GNtpRlU=pf;u|W7hO4cHmBrq
zs?wx48vdU|L#uahTg*1;O*q4jb+apvT0fKs4|g=y*Gk%ZX8xi7F5X4S8>&1Lp3Zr4
z?yrT=<~dtV2fc3Fxnx#B_}qA&zDDU5{*}*oeRhVvF;nq5e1Dxyo^?f!=c<xO=e<ut
zvNr}U<IiZ~J(YUl$)CNhzd0w=_I-+T->;nbX_M+93H@uD0_7(?4~SQ7&=Y&y%j-US
z-QqyYf~Dc_kDNR$^p@K*a=JHX(Gr{K?;Trbyi(ob&!lW5B0hQU^S#__&&uuG_Bs6Q
z^QS*@c5*FESU=tW>d)9a*A81;U+=HGdilH`mzc~1HH0j;F_&?9?s}1RVxo#^p^U<I
z;R%u)Q=2?iEY@SR*|lSyE9Vk5*7)NW9<2*>EfI=sn0!HAhPzKKWkzQ2=baZrzv(X6
zCaJS)-T74(8Xk;6x*cT?82lD(oOQENtapW?;-XaHz$>~cN~eMugMH&0oSdIt-IinI
ze#xu*#LQE9eHzC<>c*{k_bV<z?&5u?PminK7=+Z8hFrbupHvpXaLQrgwU8uRjm3ZG
z&ib+`DersA4u_lDUS1Pv-TXVQo7XC!<iXA}Kf0~gezj$_aEW|%CH(pl&t_fIb4Oej
zhHekte4e9o;_<-vWi!=lOGNW};(xw&w|c<k%M|_Y+TmS$1T1yhf45J6oftbkrLW}s
zgnRqyYV-3C{rr4hzy4?D-zTT<o}PTSeUV0z*|GD-`(Gb?*KmCKaoyuDAKsh(wcvYI
z?bj`PeqFW8n>AlWt@_G~uU#xV9$df4T06N>PbfJsv7mU~L5;REyG)rhCpyny7g+z6
z;qs<cx+Tv(Kbw8o`*P?V!w#8;O9Dhr?|QK)GjhQKmcy#TvtGQjY~S6cb3jp&NAQuJ
zqg&smd6!$K1?^>8^CFR@_d~Ei)G7ntrZ=-4u1<0(YY4i_bm`FE?sl03or6nNo;$^?
z7Tnnv5W3~Z3qglSiEVFh>fLr+_1)ZQ)}>gN=77svX7u#h2r6}Eai)cRI$IRdo|i4F
zf4wsG<&_@~u5yTY^hw3)6a@13ztxemnfA_W($XzD?Ww+LX<e_}QUjd2Wu=5p%naQg
zmMr*7Inr}iwVCh9R|gknIRuIPsxdE{!pze)Wx=c!&t}@>-9DZyv}8KF>DJ%OlN=IH
zFPdgodnd*teNJ(HQPe)>Gm})U=blfkUu3$|Oi4ybbEfLj9LM&+t>;dj{vN$XZNfj-
zZHbpZgiqb-DY<>&UDLgvF0Q(i`2C8LhUt%;Ug60SCvG_RUvPNqcyZ;ops)|M8YW&|
zy_*86jyQ|TUA%kXYI;>@tjKQf;NM0{SG){d-|qj`vsd!<brYS6G`>4Ozx8~wNxi<3
zuca-fh5xP3$vqS1eCA$ys#V{i?9DRI<%hN9H=MbBT)X<i){B#6t*>Mq-Ql$Nuc>#(
zugG_+L>^S_;CXrYcTo5H+|R`v3~Hek`I7eezb3gG1Sjv<a`CLd@uDTiPdnV(@@QY>
zlUEtyD_(!g5bxGfx^<?gtL#-H^KQ-hzu5`rljQC-bjBEPJ1x8G`gC&45ALMt2g<xR
z?e4C<X_}%ex^VBxO2zV}wvSD!>$e@MeD?ok`2~X?*Z<CX;8iHT*L+^S0?*UGDIfK>
zr#n1(RcfpJTE38fuYB&eSyF#4DGNQ@vE?K0LeFViR+TK`xn%fIW-D)<Xx5Sfm#PPk
zJYG1shn~sb(x0?`dh(o}T|AmS7m9Q?PyWwx(2HFsJ#RCM*LGR4{yC3?79aVzcsBpv
zq8-=5nq<T-+f`p=;62JOma^*}>lypSvUi+<!%j72DSmVB=rV74&C#}E)uVT7^Iv6$
z9Iu`Kh+o(9`ucwU9oZ~fJ?@wFXfLQ|b>3F9^2y`k9IZ3?f)~nqM2}=l-NEGgac1$9
zhef-0ZoIA&IwyYP7N(^M%N@>Sy;i#L(EHtt*Il{d{Tc>Wt#l>`L<p{Ra8R9=(#_xy
zq;<RXu6@v2q1dZR^MW#57S_DI`;5crgpA{^2DM1r4XKt+b}bGGr?c9krs;ftdGz)4
z_AgG4&ptl>`|-_9>ozz?eYtla*SGwB-RHN`wZFx_#Mk{gR(xaEykz6=yQjZ4-&6f(
z%b#DK^ZZs6t-R@fy=<9iUis8l?t<T4l{h`-IUBwD5bGi(9^}$;PbxB$!Eu@D($vNW
zH8xK_Ka1{r6Wn6DaAD_?6)%gXU)r?eblE+R<vFkSc80YpDof-|NLiqyU38I+b*6K`
zA@y4ur4`jyt%^#!swdKDy1v`tXqYIg^EQpemp7eXu`0}TqwC}&dv9t<H9i)r6kOvu
zd0M3d3wLg;%7T_Q4#)6h<y{&lr%Wpl6H0g7yn3be`Qy4XS46IUx3%S->ona>s!Y?a
zsjQSb$i1`FMQK~;qUsZFt><i8CuutIhLn2mdXX4uWKzB3dG2Wez1NB^(^3UD<d`V5
zxCD50$wX{<X6RbtFLS&t`s<vl3j#QqJUriDOZDt(ifjr{*}Z+sD*=`StxrEb2KQBz
zR(<AsFBf-bD!-BL>A$b5jW*0XukO3r&UV^|TT(m1=2^QIzn{lzx^vO_iHwogSXaeu
zQc%-#FkX8tYR{W2X-mh`nKx3xt|@*GzT?l$muT&LQ=uoLxu}mfPyXzd`IlGrUNz8S
zI?xm;Rk7A+;%(-AfBGhAn`oLEzDbzO+u7?J)^wsPVeiqx$f>IwmiD_pOFx>jV(q3k
zdnY-(D)vfhN#=>XCF0x?-4_%W?2%{thV9MHg3{+pq9>)@Se_L4>`<wH$DQ68t6Uh8
zC9X)=Oy7I3L-D`1%z?#xM|4>NUOO0+2kcF_t9v-#KiYCV>kpmowCC$n&quIqY|k-W
z{+9Q}_YH50)0eNkz-Z*s{cKs&+1b6*E<dT2Yu@(M=6FosEvwgY28Jo-$M$P=Ep--~
zy;Hn4HaMQS?8}+;85hcOkEP6-FTF|n**V!YZ*G+ri9S31_Z`En^_y%DK6Pp{Xx&zJ
zOkw%yp2Ml93z^?V^?Q4Wu|L|rJ6IuD=y@w=@(#WWrJgH%P8+faW-XWPofG9}7_jPW
ziM8VjwJDc1jxC#TYog}AISB=mHneW&Xlq}{@^t2lRZ<fkO$~NfoxN^dmeb;0xwnfS
zCEeVyxb<OK<GNL$9rq5ewOC`f^s(ppdm999eE2eraiWuw0@vC*`Xw_QQx9BuwnqQ2
z+li{cIn#FjIijDxE?#!hq_*c1^ro+rThO<6(rJdbY<C`)Z2b`FcXbP2#)~c~u4_je
zw!SU+zGBZi>FSMYlSE}zwOtNy>|$9q)Ag{WFZ)E6NX3Q>hplO!)~%D1UCr23dg=O)
zg=Z&5DQo^#Z-16J=^f|9U9(o4T6n6AY4Muhhn1PQO#6AaS~ldYo+oe5@XFHGc3rgH
zikFYoe?MLRG|%_-r%w;h&c43>{ktH$`b+WkmM%??znuPkyh8AF?T$L0IZfwo%(gBQ
zn|!j@%<$9c%m@1#y0utmA8<Fjd2i7*{zUcA1(UA1p0#;iWA(f^P0zyn%b8hE7rbjx
zi#~HjV&eHq>)i@2vIgE=1;G|9Yc_Q<m@kc9yLzeM!_^(Gt5%0Dj&lqBD6`~R@rj_7
z(-Ky<c84>vE_q-YnzJ$0^I2hjRYTV;%R4JWkLPf8?I<sEm?<)4RY0ii^xWB;Gw-fG
zzieN&=&7<>IxFw#r!8B>IKA!bWDUDOf9_S;J0o<>vn(B>N<0=;8SkDl*(}RT++(TE
z63rD-noCXh#_&J?u>7&%eB1fwRW|?L_H>evs?yPuHQU#@rrlDyrxLN_kD%)8DH_5%
z;yTQf6i);w=)QdSZ^I0=4pZ?rmif=$EI(UZF-han_T|Sv|L&h}S8L^UVBg<kVOK&X
z9eo#nUs?8?e^JC`;Z4<fAJ$vtZ+;}xJ4-mm^hb}7Mw({ls^=V?LOJbXw#VNc6x!10
zI=fhet7VJz90^l_StmCB&VGJ)Dl_+mz}~H`bA7WyBzGU*;ZvN`9$!68JTZVL*<E(l
z`(qauGaS0HC}^3)H?=k5f-~N=>(9yhIrrA%MMvH*`#s6+<X@4xD`nvurbMS5f1bkj
z_M)K{_q%k*$$vI`>qLh(CF(zZdLjLgaZ=bMo70gK_Y1xMd?){1Pqbg~lUg&a-C>n_
zx7N2`NLEw*z2oEaIIq_QT8k&evgmeXmKUEj+Ldwg!u+`cE|P-5r?<*5OWfQsZ|#x%
zV;lwPi7PgU6y30U?)uRt_ZjQAw(4o;T^=0_nN`+tza#J;mqqYu)kg2o3oG2e7fILM
zlhN7!SkP@@$GX^Awg0Q1)^RV&kY`%H%yZd_-RE}5uszOXZ_?&I-dfjm!*N~5N!CEE
z`zsf_@jg)#&@xHn<eaJbGq+C2Xw|o6T6;P;^&|t<&Y!c7>E_7~sr&aSM>Z5)yfjJ5
zH}gc-Y~{|tw})KyuWUHprD1n_#jc4N3H(Q8Olm~M3KkyDSKr<iWDz*&0_#s!@wvyE
z3Ji8ed*7ZV*(N?~{)E(m_zB!k4{IgtUMA%f{ilFyRd$%imswR-kyhd*Lf^|CZ3rp%
zHV)tF(Z~1o(VcDI8^5+X?fZV<?ar0UeAj64nw<T{@s=Sg*`=l8O3A9%)>D|jM4mO8
z>!cv7E5pki*u8QiW71(plZiov_p;i<H`I%`ZjP_z<6;a^eX+E?H?rso_q`i8w^%%G
z?lM)+a%in!?Pt*le861t{YHbx6yfduQWHKLpa1oX=VYhcNcm}7W}KWmUw+2>uYT9t
zFIUxnKfC?4zW(;N*W>Gdo<6=j*)gEVA^r5C#T_53ovd~VEbi|XN)`3ox-UFm>wV!t
zvGP8?c|tBhQ3@Z<elF8HC3F7Zf@>F5O*^H6nwBn^mGt=M%RO`N6dzd8-QnTY$rSA}
zRjN|-&dQyKM12I>Qydj#7A{ifVzxH(>d;-JHcKe0M%qHn)u}}<YodpE*K;-XHv<39
zsJ81)Y>j-;vxn6`E0Zy#SMIFdI))=Yp{7g@71KU+2;BDymr||UdLm+GvDuMTvvT%E
zpFF$0N`Nsz!6wG2dD9Z5Gs%1Rw%$p7F1ojAck+i>S6qTxxMFv3Zdh}=>iO+X!OGh<
z&wstonYTSePL|73hnvN7>y&fziuGOuopWJ4cluFM_Pf<;fe}&HcDI{m2HC7B_x7^P
zkG?l=-aNayDw}!cCwCsW|98vW?I(`cOy~Q4`r*;j)uDn{r-+olKKNwMhkKW*IbN>e
zi(2_wZ0jRd{X|x-sRg?x_?(<(+IezYD%aI3lgzHlU0GnjIb&H-@O1r+b=_N8uUP$g
z!GC?4(UtI?w}Cg*%X#0Ht>jc+J~80%gp9K%w7+vnYfKBB>{~TI^&^wGc?Y-fzBRsy
zRXW)}7nK}eaQ||xhDMoyAIF86J2(rtmQ9?+7GJ&Or=i&u+3i6yqJA8Cu9fIF!F;F2
z+2_Y=ymzhN-Ct;3zBKsz9^XwD*QLBWBDv+_vs95^t<A6Jf1cl2aH~~rw?SiAKOd9(
z<xM9pFfR5Eh|qo&R^s^MC*P9I65oGHDyfzWI}`|IC$9g#^DTc~Y@YM^FB3Zc>1~{S
zga6mxeNpe9YJQ8Bc@X*b*A<gb-z@g7?Cvggce87+3C+Bpdiv?7izk~KmetLBH<R;S
zW%2RD=V#3QZEIxA;Qc%N<e|MguX1?@zS-LsYX8l$+bTVAuMfvES&Qx+6E2<QUgx>?
za_0}7*)8+<LhRbQeq1u!bUEqPyVRXxKl3bODqkz#R!==W!z_Ni>9PM}Gba0Z^-S|I
z@4P(a!HXp32CqWaDQ{K=y$ql4@Vde|zotGzXp^RN$`ZyaOn<$m#MN!;&@f3nzx~$f
zbvG72o%K>eX-(3OLp&?J?`wTnzcMa?x!CN>>TPVk(KjAxJlnYZ&HFn~T*EHu?mBx=
zWb2Xsdyyyh?RYMC_UOZNue)b(t}yd@cX!I-3;A3D)$i7P`x~PB?rV{z&z8D65zp8Q
zcQO|1T#v6mUMyoEDcHlHx*$m-qd>&WTXAQJLHd^EN!8y1T%M&>*G<^SsC9KpM$ciX
zcQ%4ykKf#p5Mbu6mRd6BuB+4LA0O8!Ev(yiqSMVYJ@(6SdwqWW*8Ye7KTEP+Red|}
z#Vx<@!u{RVY8H<>JOUpr(3&EU(y?8qy*qi5^W1<(EILIk`IgU51l~S7>!t9NO+6F8
z{?n74c4Nl&Ip^hO3Q3#@7d72H<xEU#=SGJ;DLE^{X1DQtVQ+Mr#&X5jbNdnzsgM<|
zufuO}KYVA|z_M9t`O%Yeg%9Z#6xpUbnP29qGPwM>hHv@Ot=0!mA6>I*X{*4*GaueO
z{Lp(lGy7wt$dsNqw!bAdy5%03zS+0VMP>v(JlMk2cS2QY$~<H1@SZ!z--wFYM)N6J
zzxu{ED`b1e@z38>G!vd*UfK9WJB3fMWO+w>e##E*f_jT7sy*ra->(IhcwFZ=YS_GL
z=FFumD>wXp<LDU0|GoY2>h|f~H!H0TAHAJzzW>js*T4DY`)=P<o;=T^xcvF$<NojK
z|4x{6_wHKGT`MAu)K1J*>$h)a)w#SWc`*xLfXeedI^EwgUMhIBs$E`Co#N;@Z{4Jy
zSqqd!!rtu536$&oyK=@2wNSnLYDuvyJXxk`rAcZ3|Gr2Pcqd}Y>JlopE$9;e?F$Rj
z{I=*cXElC3=)WfN;ElD{+Cm-_J%~CdXS#M7ALB`}t=A^jUMgI7ebr3O7w?k}E65p6
zF^Ok-c1LPryrA%IJ{j>Ag?k6yZ&!Dna7u6Au>)oW>V=hjCo3=JeL1)Dtk6&U;79M@
z?AiU{UfR6L3nxBgJyEp%TIQZ<bH8=`K5|ETa#D%a(vVs|UuDgS>5J#B-)VMdsd(X>
z7uCN%@>T1VPJDUs=9JyyW+!G|Z<2msT$mR8(xB7NAvdcn>sh#4#q=QYCtJ1d{9M!f
zWM<u(&G+3_8%%YXvsC14+`;ufKb8Mp|7=EPn~l#y?n~PRjDk7?UZ<a%mHG1X%A0R?
z@YiYGH~Q}OynB5j*SEv2$2+uV*3D1i3FH55w5U^Pb<FaW<!!$>UAD@UO8dsYjd{_i
z&tbnk@5bG_72?+}EpmDmI<e^9EXD?wt+h9!zCH-_ovM@cSkZOX9pk5*PFG?##yK7P
z(IdD##`_${vRJW--N(zK7slSdX8+i(S1<6+)<^mKBe-YXI>ErAy<>gW?TyYEXRR)+
zI&o{_Pm44;jl&PpPPmlb|F|q>?efBcNlQ$%ou2x7nX+Zo#<so=o^0y8$WLf7=Y;ir
z+qAlm9{GPud!^H@h^naXE$qxzFRUgX*~;gh8dc@YDD1l-<?DR|VZp}m@A8JVfqVGA
zty-SvI78&;`fDn8rb)---V|&+*u?Ab;%mv;y#<ZcuC+TX?@XP&ql2a8IsfxND_*%u
z#Xme{{q`>h;~L(hd*n4cKPY)cnu^}5ZmZ~TzWA^6qR;M)QKb_n^-0a!F7xi~<I4r<
z!gp3WoedFow%PrB!UiGxYj1;}-fmkd6?Q_&B(Wujv42*0cyZ^}wbdrB_mrwTQ}s^G
zoE4e7^=NCybB&A3mzKQg-|}iFSH!VRo~PDnZ{}LysmsprS?183YpS~5%eq}EcC^oO
z40U2<D$Sc`zWzv5?(8EIq})Oan6<KeyTc-^1dSyB)SPC>@yuNmbT9915X+(s)1qvj
zy;^;9b!x26)877{KVD~kUcMaE>^-q_$(#jor#rqiecCqntdFkdv?(SkJ@4in_B_<K
zd*d%}sS9DN%VM_a+S}UM%JJ#T#VvcPx582&NporG#_|hOzC3=s{Bz~$XupQQb4Pyu
z_~CE=@ABocs=wdP-WJ#AFSCef>e6$YCwPVNRno;1Ga@tVL{!&v$q5QB(3v(#tL0tf
zt$W9OM75IccC$2u1gxD`s<kjEcE_z34$WLmS|1K7FnTmUUCmV<%=)I}h{Viyfg-=J
z?mhJBhZy%Wvoa04HxiB0-ZtLx@P9G&5U-=hSN<)5Ww~a%0u~)zn{evyx`;h<-@ZDu
z!1B0LPjbqGW4oW{yQQ3%ckOs)SHECd$;_pvrJb8!D(Lb{y<L>sBi1hbJ|)Mqy!xBI
zc#ZCn>^${<Hz)t8bg^RpKE>qju9d5~FG$917Vg|q#&PiyOM7&l`JXGx9Cu47@-Gba
z-adD-9gBGBRnc$hFTNa1lGmL3vEs<0Xh-$%!#vk=UN0yM{Pvd3a`Ef^Ek;hw9MkSU
zd+RUtF~xbQskGj_zlFaIv~MnNNleL^IHSw(gh}S#9^b|8?y*KJ>7jn<-%p5ywe7Xb
z+F!8a(eb4HZL{74?%7aa()9kz`u@&5=KKOFZ+|Z-*|@`<I>EkPiU}9ZrzNt4m>Bdf
z`pzUBv#KCZ*YTTU%%3->oT^N&S4<U}ZoSBBr567_2L%Qbp45z<7w<9^Jp*%D&g|=w
z-EGJz6xFt*`u+MX|9V7}0$1qnEuD0{w?@tF`ZS;S(XVG-*Pmu~NIteKF?A7tb4gKV
z@|w<BY^&lVc#CxQrauUnzGc<ERE95)>elV<)~eP2)>D?^viqIk(}kioX@_mESQhK1
zMmqbs@4Ei{l7hvjzge55?#?u6Jao*sVX9zAY=$wHlwh2=nyRSi!eyp&mMwX5MP^PK
z-+KQ@$uDzk6}!W||Mb=6o6P1aeCZu@Z0UydEq5ojZ(6ID$kF-EXKlszhnpPuYtlac
z3j1Q4o^Cx`=U~g~nX~4(7Tx~nr_&v<xviN;aD}Skw$IO=%j{HgkM@h$s_(pUO?H&d
zoS!w{-t?$_UK7<Npp^eE{X2sqSLE4{2UB!as(mMBO}p9T623s-<>E=QpC$UdHt+U6
zx|T!bMTXvsw3lI~8$~wl(h2mvxoFp#>@thR3`dofbUS_C?NloZ?aDSiG37;k)=YtP
zv(iG-ZPWKITdA8W_n=fV+Nj@q`%}N=D_p)!?6Ye&ebu{K)opi;%*jiK9pmSo?^Zqk
zNO1OX%iS%z^L_b(#4n|FbrkKj@LoLi@-DsCyY#eKZf~1=?)i!Zo)f;D&7Kr;uukjF
z-*+3<o!)h0TCtt&+)qE>{5&Vfl;Opsc<Jt8Zo#Na%O78US#d~n^(`TbDw+HH?QLcI
zk3atTl})W9^3dT;EX6r-mzUkPUF>=-{ty4!HGLn9&PPvLnj&B%^I($BQZKhKPuFLS
z0t?OjCkA-2t?kPAGV}YJzw?EB8!Vq}7To>oPwt9|f<g>Zxyv}GoIc-sYuC5E88c^0
z>E57P`bD==cFA#*gG&0-7B9Bbt-c#~?R7?<OWV}mxwaSO!oo{x{G@Ma`ju8$Pk*uN
zQOeFEPkT-!7Kw5#Ync6)MbLcSONm{d7qT3U`>FUjd8h2=KQb==jqY}AH~3aR_3XEw
z@lRB2SE#&ScJ=1r{kt~`*lsAP`DnW?<%-V?!%4eY?(c0$*pPK>E2|INm(w1<EcX_D
zI1ze~VTnePh-m!b=l0#N%y<4;^SdK+`O!z8+q^QThR)?%Wj3QqF=Oi)g*daGia0M*
z>%-h(AGOjG%)A!=R%`c8S=Cmkdqn=+%x6ayoX|Nkqj$~GU*b%a$DTzmI<~60@%N>q
zRbTTH{i;q{>Q=hFv}e58?zL8VUe>A9`V$LIDxEqPe9_-*rJh>Mtn1~EkN%X}eJbqt
zr?=&kBP$m&9d~_Xz-_#FcV@PbnDau80O#^M+99>d&Q4AYSLf*%?KS_l@xX~AO4SF-
z*M7KsPwjkUPGrHO343eyh?X;0FQ1~jKlx4Zv2`o0({}$~FmGb9PHF$KY`xxV>}G+<
z`rDSyV%vR3IQWrX`UfY?Nvkcn?;42aDu%8Rwb~iEMET+={?dNF<@a;jo?ls0#Or?c
z?h!vRlPyaQO$@mvKS7m$Wu3L{KQoI(*H0T=I_1a6nz%&tq)_ZLVZo)B-);{$Q}Mv?
z-x1Lb*>ko!L<{w9-ogJ(W0Ut1X7+i`C3=59^G-g`aOT?a)E&RY-llJRyo^_8>Aiy+
zZz-*cREd|rR68l+glo`4*Q@&_t~O{jHf;AaF7e!^!=M#@+}Vkp&zE6F$J62;H@(@u
zPI|LoR>*dl<+^i(TuQ`3zH@P3U2`|$pn`XRh>!OL(W>7qL89D|72e5fMK5qz|5nn{
zJN~)Mq<_nm+H!+K6GKE_C*C#Fzq*s7=-Y`_mKZ$_cApUCs181N1Gjsds@oiWXURwl
zuUV$@#7Se~;`3{d?To3*xG1_UCbMJ4E3K;iYnGj~HJSe7;WEW=kBlpGLezOBl0Fx$
zZ@r+i_VlDHm$q@LRExFD>^i~rr1;#!r(VV7-dCo??XA!}c=_X$D?TBMmpccVbn10Q
zOgpT&X^OT!hv?#2&Brf4UhXfiv`zSKq0PR!j|KDJOqhS}`10g)J4FuLRyv2;E?Shm
zYp;&XBhk9}Q-6(iUY~Sp`7O<QQ+any4knXJRS9O=ie6PR@uu9ot+n+6FWyKo@BXxX
znc23T=kz+3&3j;&s#?_I7FcF;@MX$A*#(t`CvRo{zp1LbN5iFMmD#_oY%k}>RvSIc
zIkutJN~i30>Aea4Z`dSC?|I&tcd~XF*Zu6v>3LE6XO<~0+x^q_X^@7}`Kf=;w*A_w
z_51ezGqV>z>MZT3-EsA++%Ef{&sM*G6?~WddeU{}g7;IOrUt!#A*^=V@%oDgZo-E4
z*;_dmY1u`wef_gdsVjWi>yukQ&ODYav?W06<%U)ETht`QzOpwbdwJZ8I1y9#UwP~1
zj1^WVJ8qcnnzMW1zr<~CS!YlA(sc8j=IgcnNhfygy{%Za>(~;`^iu_j-=s>u9Tk7B
z!TM>{tGTwq!o5Z1*+tt!5144*xu3XVYq0){t+}VSO!>{RR^b1%O)-5FGaDmv1r4W{
zuMK>E@=oxlDCHeB&lb#HzoPN_u99;fydGcQuy*RSV9DKQ%cMVaPU*^=&&lcFm=!M^
zn<nMc{C-A?&fX;7$@y#F&(_;wC38+8<fYuVYubuyH(LGL((>YBmvVGi)XVdo{s9Kd
zF0;4U?R;)h^Ufz=8_(fw+j}F0jW0O8X_^z)x#_s0X?3`7tLyCh*8|la`rW@XSMW*k
zq*L6Sf%8?*_HS7g=qS3V=jysm=Ynge?+L9<x%gdfRnV7Rtvmf@uiNnHDBJq?yZ)W*
zEtn|1{Oc5!otss%lP4U@D1M$?5@f@7a><)@OC#@J%el7HQ1e`;gt@xZ*A4TT&UhxV
zy39E3_vMnLdsPejtQnI#*n&913@RcWrB6<3-Xaqg+Tvc}arLRYsj#W=Vu6WA&8=cx
z`O?^h1uawSPrYL}TE^eJK%7xHD<I0Hq{OAMXq6hHYIXm`X@1A&{C4rmxp_yt;mPHn
zKT@`>&A#bB-OV`0bD1jZWb4B(rET-~O>Ft`Byidc&f8D3J`~%@AHI|sJDsT{RlV^{
zRCC??_(uO@7R%CNd_vVz1A~pjyG~|#x)x8<`sTKy-B?3zp7)`$&F76JZW@bt3Qby9
zCNTB#%NO%bFFx!OyHwTEbK~}^X}TNMwf{c+cT!H}4*&A<%vcQ(-YK@NEw2`txJF&G
zUOqE;#rGzap3jxPzf{;9kN11NoL~Olo?2n2%3oi;ynG>d_u736<D)I70`E*O?wsf&
z9&EIkVdll_$x}0KM(jK`i<f)KjGnd??OvSIUgd@HH2doJNwGe-b<W&&h0?4HyQ~lC
zwvAp>-0v^iEUUTq*ZS+}H=RDFt^DFM_s*{qX@x!C)K(vQ!S?QPag4;Nd%hksxo56-
zI_oou@kDA0pZ>{*%xkw#-*n36&#T)8GnIVT33ceHKR4GddY9&USNzu-0l^=u&ikay
z*l_dZD~ZkLn{uQ)4ob^EYh^Y{Ibo_lyGd&5x1JSC?X7?Ph;+Mqd;3!v@hxw1HDn7e
zJ-V~S&hFQqyrSC=gBF^xt=u8BtbSRfY_@!o+_&wkR=1opQD;1&(q#QLH+Vvfcj+vC
z-(``~d4dy4R<XBzkm=oiQDH6fbwgK{{&V}jm;GJ3L2ZrMuGKG=gf>svbEf-NgZX#C
z<f!{q_qbCJ{CSqNUA{TyM}+!L$EVWXf$rb-hwWIfO-o+l!_-X=uX-u17q!z4D|In?
z%T>rAp5D@P{*~l^qjD97HlJ&<_hNpZnRh))Fyq#WfQ*g>YfZPbDLMtLSzCK2xM-=@
zG{4G>cjlaj&szS;%59pH@?z&Pw&#b0<C|Zs>M?KIwKA`EQH*x1w0U-x6w|60$GtaX
z7A#BLKdqac=lTBW3y*7BJc)|pEsn@%e|97-Y{^D1Z}Yre4jVicNnZ?e|L5>h?XYsb
zY1}u-y|>?ec_uKE;pnVoo3nkQ_R8#j&cFCRZ^m}7`<>es+C@kB?=N4xB*&}yc<i>6
z$#qi{7f0W<Woy%|JT&uFK;_g`JU3Jiyr0As_32`swd8cQWtqC6YnrEp%!%vD5!vA+
z+84BBLd>Lsi%C1DL<OBaba;~XleiUXfmdD!><W)w67nG@D&$B)fU$;6db#^mQ_%}5
zOLwl&Ql1&4GyA0L;pwTOoyAi)3$|{nDvLhgv{-zaOpNi`MRUq*J^d~8PfyR<`R>5_
zjO}rq`^7o~%A;L)7fPKyc=dP5{UW>kEY%LCeU*043;JcAKHy8=(SCf&*)^-g-m!jr
zF}u3Va>|p4GtYK$T$9+ajismHWnsR|CJVtU%cWj_a%?@5C!*oRIN5Y<chtQc<{}HF
z5GPZoIle~eQL55{8=vwVeaRELclYky#-a~DO3ZcOZ#|?lbCnZM((P&WKc5~hsxSXv
zpntk(k<E;oyNqm(KT}l9Iz7o}vQxj>GN#*R(;p<AKD<fc(c#A*pG4RF`C|Bc+uX}Z
zWp;A?$BQ<)&J_{ctWaEbs^Zo47pa@n!l%C4AF<M9%UTTv)}>7EIYn2jy5LkD=3N=n
zJ}u_JjQ<nL(w)BFm^1IhP0pvKVV92d-d)x*VVCBr^G{d!|0uHGn#nmUahVzWXNl^#
z;$o>~?^zfR#~+!rJ$hl0Y*)Kt-_C8yo6--IEv=tF;Y?B4r}domNA0G3h`jlWK`b^(
z^k=Bqy}!<LC-;86dE(x|Es5pMb**vp<I^|Jao^2)EGm1koPO|fzunI=Cpn)FV>{7W
zlex$E%u$BIuM2%+_N{x6)qm?;&r7@WvNp>VuP?h0`|@p3wraX)N5TKPS^c}N-R_$G
zO44tC_>*PLvp2uXSn+c!D~qV)u@D)zfUl93b`9wtZ6Cxvy>uyZw%|7I+Va!ZB`-~u
zH+2Oode3wXKKsP@-8r>AX;*`smd(!p)55g$<Bi4>hR@%X&7015`SFq`Yt`-2*PYXd
zn#H)%@AYX>ZPtqTr{>op9-TdXVNFo(_Q(ak9J<^yVwalDTC|N}roYO&^SgfjR$srV
zu~WMBL2Jo|BPX^Tbbq)Y`{7OBpJ5#r-HdKU%sTp2_+8R6(KD4_KQDh-{`b?Yg7BaT
zE5sk=-ks`mY=+~eHCN{GeY^f*(u7|#IwtL^+ql{x-{yGhnO(W7r&>0}*7z&F3f5|4
z+ApNO+Pa~-+)+QfIk(QzNc!~JOJBD9&6>UX%eKvqJ1VBx?ccxm_nqD_tAbs=l}(!^
zUDMX_yZN+)^#tX&>`rTq{q1;c${!)QxvLa~c+_9s6^TB-cZK(!%H_p#^@57j)cqOU
zJgoJV{<hs~D>AHoAaZ$u(et2fhRv&#JZ1N5m`L6<dU$$y;Fdd2`3lrJI9(?y^4(M|
z-QKvMnt_LHF_ZRci{&5gzf$gbIpI>0(w8~Pdse)a%qgr@@^Ub}$`LMVUA*}6`Pq5W
zlUi4H?+V=W=Z{m+2ev(nmkB-4nRUhIWYZc=yVesYnB4dz{Ty}0n`d2%k`Zt=n7wP}
zQq#5Vsfh<{s$1VoPd0phc8|}#DT<cQclBj_7MrFK?Bo&>_=fSV;i`v<SElf7oS|gB
zw{w=nP47)F-xOzj+q7v$)~DSQU#vO$^Fih9zU5gpL5^Oh6Sq$Jl*C|~Z@K&6ji0P3
z6I|a-@~m3sbx+}-sp|Pijdyp79FCmPnc2=!92Ml{=~OFh|2)|4<i|fHxA=}<K782u
z#iVPyk6!w@CH211CcT$ePltbB|8Bbf>V4sicjOm-d-Zko6fsXV`@ehU6)m&a{44U&
zo?L&+`Kg<JP3d*ie8Cv8$>=oydBK|snwmZ~bDTm$14RvWoupRhcwPMUebJ&@F-|YK
zjwLm6OzK*HInsY>;gr~D<E;W$pDN{6#K&BF+!?SnW=iM(9sNHw*h^B|!@nG9c)9(!
zO4<e9#qBQ~PCePq%Vl_?=<fsFi^*ovUr#X3oqf#g$@C4D@hdtia}Q6I>}sfEsXsf-
z@!RvCYxcjiXs+;E(&%YYelgj``^5iS<+tWPR-Umm{esW`y|$C4uM2*{>Mk_p{KHy<
z(C5WJ?WZ+={jRAOyddgH&fPCQE{o-k_He!KSd=pDE<?5%$7iQ3=5u|1JdE0wj-S3W
zuRq8?sjSXvD${Yfn2XC(xjMA8Yo^V=yzb=DDQ~7N;ZbT`Wu7Y>^*)4uqW4vqlo>C7
z^CY-0&Nq|zlB@E3VVz`&VuZu}JIeO!`WBquw(o(?hM<z!)@8+i4`u}QT#L0#4cRC;
z^}@n>-2>8lePv7Q{Ux+nB)pbNhjJWvs-11}`R4WQ-5cj#`KJ5MHqR{U{EU3{qJ+<)
zEGrh77HA0c?pO9|6Dw|HS{TY2zi3m)(Y<^>^!xwX_I0aDZ#`<gyknAc+`5bxB4yWS
z)S5oq8+vxK@m~IkZZRhlwiYRDc3%(@{=mhwszf_?_BHOT_kra{a-~8_&rUY0{Wtq=
z&`N)|#lNH6w}+OV_Z8nHE&Ar)<8yOf&YW+&I{9DM`c}QY=C#k#+?Z!N-uAs+xWjqg
z3ad9C?uMwVzLv`|;^~wSk3YMj!6}>hc1)gm^ZLcpKc${q@zo{&W19H<3v)U<y64{E
zm2f;8>iT_Y-_MI1L>CI?FFmz4?DA2Ei99Et?76ynv2SY>)0A@^D}L??j5=+U@NLnN
z?87fV?3v21ddGEz>6hsj4ygrsXRY=OYwb+GdFYO>${DAvCe^K{MIRn|F@N)y88TNA
zE6-{yGqABq&<NapSmE%>3`R{o4#vrfx?0>xzDrg+Hmy3P_VByz@3gdKi{u2OyM6qY
zr8X{*vb1^r?EV3ho2mh8E@nnvnRc|eqKvb}{Em)Ol-vff3*77HKVf3!YSa-p_x9xr
z|K$O_U*%5k(#!VK4!L?M(RNPX@y8#Z_z5mi)jjQW>}O3?Rh7+rbGIq0V;Ws~kNm7S
zXK61x-*35b!<LSjiw=n2ZoX>!{7vQO6Z7u+FMqu9kDwrbyJ`OI%;H+BS}WT*KYyL=
zF0HKiDe&c&>dn91;?t&0pXYx&GuB_Os7*HR-rma3A5P9cb!o-+$2tFQO-gDz7TH<g
zw&dtEQ_dF;l_u#d-7-UnNjq=ms%^RpZcb8Mx^?^evZZa?LmXrBx}=}g$vKwwvM=Yj
zHep-4pxfN397nj1ZZ1eWy=X<^qE&NOMhWh&stF70cr!)x^Bjh)(mK<=Z)*9hA+tm4
z81u;so6bz=s=gh%@u0WkkA}>`Roe?y*ZqG~QO&SlIwGazuifz@GgwWMt7~U_7A3CV
zJ<a3Pn#DKHTkFR()Vr?!6fTt~t=k?fbT*)jhm||(wQJm!Q$L(0Z{Du=?$jZ^3pd_3
z8t&oUtXESMQ6|su{4SH;Hr_k6iPi^e&u6VOt(LCc9G5?R`oXj8e?KgG%lM?2t=Qz?
z(iIs#cG{Kaj(q5e+V*gso6`)Q+xIGR&fLGf=DcRRr_aS=b1(mQJWN}UNckpa{yZL+
zvi;=3H}1d0&6RK4WoSs2@ja|ueq`MN_9~6i*{8xCHY}R<UNv#kjMavR;$2+VWX-&B
z)#z(?mt4%%eM_zt#>XtYk@}^sb?3KNkM_U$TrF)7ap`SIxROcBg`gAWHOqx~*Yyir
zT;biFF_~{|l=sBWkC*@Y7Op()t~o9EN)dOpg09uO@|1?cj1RXp_HU2d<G@{R>-Bzj
z*N;6lJ&GM#33FaoKQ!5?U;5+1t0`?umcK2Dd)n8aKi4%zSi&*E&p`g)&%E>Rp1+QB
zQWyN9(Q*9z+`JW=H0So6uGDUSDZS6Gwt7a`S#3U!kkv=GdVSquAa`14;dTSA<d1u<
z7du+=C{%rH-Eg?ITIc?`KXKKG?+!hz>^wYs#k#dt`?p<+`}*heq;i?iP|t{?Jhxu`
z&bpS?v}T!SVOXKybhnKcxOS}<bK4ws*}D1miA|{+H8&q&bvV2`h4ogsXcpIj&E1`P
z%Q&~)DDseF(wh6Mh$GN(2CGo>gbNO(H9Z;2^JF(a+oSt1L-ek&WY(_R65ClbnsX)<
z9CE(nbM(km_sKB^6Zkl}^R=z{?bB^dP6aD$66;$c71Ar%r<3Xz;CD#lvZ1J9;*-tu
z`=wdZmZi<xUXZ>ZsqWS3Zarqnf)80!UmnN{`@AstT;JyCMI0)+o?%yAUQBxPu;6+5
z?RI{7Ie+!8JNNJ3y>CCE(8_Mo&pooPOczpKK799Y!nKrV5B+af-rlsEUDaL1J8NpN
z<N1drRxb}t_^`k|A|>6!Ztgq&_9d^BBA@(y{P_6gm$$d;-`}^xd)JzBeZKPVnyL=l
z-vsQfwz%Q6%4SXb^!$5!em{Hq^Rd26L+dIhtzUCE^tC6J&0<s4xw%Wt;f^y)+Q|g9
zMH&3+8yB6?l(k(cY$Z7-z$i7$l>fx@t3M=8ykp~vUAB4&%W1KN^S;I%Tr|hW;pp1f
z&ATI<C3f#xa4yW?wD+y-LvN1X*4%$w^{}0uxMuezs~Nh^uRB&n3%t{~5$qIP&HE$f
zVX0!)j!(x5)pw>}SLN6Jas6gw_obrp1M%89l`q9-xV3~zT3tV7y+o<N^vU1VO*#2C
zQEDOy>lg0cB)fg>2~*#V&ue;n7Ojs|{}46zuIqB^D8*-0>$gfJFS}pvB=InGEn`CW
z4k^pITN^I#(mDR`_Kc*4C7ym#m)QR%9q#SrO*8tpPE~NG3+GZZ-G%dQr+F?Ew0l|C
z;mNz{@NFO0e0AP!hpc`XZ(_UgT{WbZJ3wQ3<?DCF`$g-IXxB<dvVFVzyUR?xyY|V8
zCGkr0Zs$FmWGfom+&a_d=De@IthZKL1cl8Kb-Z|`M|O$wqR9aZy5Ea3%`g#f`=&Wd
zYFE-~9?z+vmiA%VXW4HoOgum3>yF45$$q@Iw>pada`pSwx6ACp0f`mt>Kc{0DNSeX
z17-)l<zDg3QnFC}$&-B<PIHfFES|bgO>W!5oq2K#ZtOeFpmzG0#>*RNdRrw9nkeNh
z&$&C-gfn64$?G#$^DL^3HT#|t{BF_0Mbj4+v!4BEHS5`a@g+0tD`U1Se{*PGR({`#
zM|0A9w$9i-cgLp^UPUGK=(cm`Q%x^2352b6x_{|+c5+BrqQ<GM0mUm?e4m^<cs1Pl
zs~o3ZZup(on#aRZ|4&(R=GJTdT-|+P|8B9%wlYO^y|;RJ!SM6?X1P`epA8PY1s-n3
zfv+uiMOF1hT{(s2VjqWYf5Z_Q=$$5ZFInb7QSsuPQ5?-{r(IjZd3u)4K|vnIL`Cf@
z4V#Z|GZ6CW)tjezchQxdt8R&%X?}e_VQZ>ptJc(&>}9f%Tei*OIX}_(4r|#W8@BUW
zi##V^@SYquS8`tLS)H3-o{Bj;2`=;E)YzyxQ}WD>GxMB;4;0JD%ZE-qlVuRh*`c)Q
z^^@g$RGsJiGuP<&<ea)V*Joz;%#PGh^{xr5nV#kR<=-DYxoE4Tv$lQrZlxzT&waoB
zQ^w}<rA_O~%fEL^HVZ7<vSQvvoiiJRf+8j<I~Y|99yXYxFhla-<_Wi7?$LWai$!e8
z^5g#g{qy_$*U!7Rr>?rbe*eAuYcmaP=lWUB=eW0bkAG5xFJJrNhkUE$&M%+yS*KE<
zBTDPIuhHt0SJtHQ8+)FN@{r*<IYA^Ou!Ack<M1y1)-HxrV>{DO7uR14bIsKMEiya%
z&LZ-b&fGPx*6eyRCr$pTg>L1`(2!Pzu8G^z``&+G+~X;uQ?otJrF84LjK*7)@sf<J
z6K+O!t0YTl1g*PU`gy*Yb^A|K;q1D)OJ^NLr`#6qkhbdlrD=YTzr5kYN{8oDC3igk
zTy&~<D)*UxXD%u4*EslV)vNzmhZl-(l36KOJo!L!od}=y^E)x0iUTI9{oVcM#xtP|
zDS`8nS6&pHaVws|ab`}gurou$!Ptkc=^OQ(zq3Y~fA!lvZLawzp(z#K-6p%1)SVUg
zjPgiS=I&TE@!G77Rhy?jdZ*cY*Q~wzPKJETccJ<hyQLPc=q^@Uc~axpQr?p1i;5Ly
zFPrkhVSmDo=&Ny_^XKhNUH0Eqe!=5KD`)4vc7D82$aJRTg1a}vZ$F;pev9YPBBd~i
zgKNr`+dbBZ+ZU8_jiFzI@!L_wP_yO1C*Hl0Kd|{BQ_GzhpNikR?_9WFKE6;gm;2Yv
z=*hX=TES6y1$Rp}CSSJVFIC{Nh_#e>^wn=+j%r2Mg07aXg}!sk7f<m#a`0vd^Ugb2
z4s0!s7XKY@_%_crSf%k`-Sh7qbMNhY<*EMYZ^$F_spo7;->K`C-OYKeGCL!7m;HaE
z_iLrjX~|5UU(5Hvswc%~=j~e4eLZRhYa%!`wr-MLtfw8-QFdQzTb}Wjr#(};cNE-k
zoF=m6h1K2{73*jG@fVIOv*OdcsWHQ2&68gChxX3mw<c#a1#R#MJbg3iZ_eahvGO^J
zrxz{bI1uV|V?xUBt>z~S%nHgsuASx(nDHWM+YN)f)n>a3_iM+vlo+mN^WofOeR%1T
z2Gxn7F03<cs?G@+T<&6@If?1iyrWwzB^PCgE;$#Tl_a?!_U0vVom_>Gv$G2An3pOZ
znEd%kN|5Q5UmZ^6jXMqmWt_Fh-tbA|*{wTHoeQ$EJj)twODkSSPYY?<E7P?lA|)uf
zqr>gfHPPjZW0xQOk{{v{k-!;SdedYs>++76;E4~iUMIExZh!i*;@-L1B^hbo`M)bI
zvCY*>^E}IO(`K^I=}kJ`_qK7YI>0r%<BXhaad>dPg5&b;71qU1rkLM7m$q%jGTn-r
zDl57E{`vFH-LLyTyF8!2eXGhUR^_|8nyQ;yyVI@)%t@}cvU@hiy&$>JXZF<drN{QT
zS+t~iczSm)ZE~G_@I=hg^FA+jaRl~tyi;p`TJeUp^xEo}W!=di;yXAq4#;(jm+D+P
zceZ0!i@JnR>4f<0D+Bv(ml?#1f8XrTQ2p}9gf~b0Z0}zcD&-0;U%X2B);gugvaSC@
zIzDtinru5IkA1`bi<8eu&CgUeTqw8P_GC+|_`4%EGMup@c9*~SH9WIy<~9#C?iKQ_
z`FmnsyzVK<JJY{emz*i-c<6ce<SBKrB=yC$jdz3RCT)*<!g_^Gw<lOnac=TrYr8(q
zEUT6cH=5h^)^~IN`r)(xz&@qhy5I6wq)OR5+mw9p-)imZ(r-NGJ!`5?<*tw|FjYKk
z&|$L5EqcM%n%OTm?dGsPdSrH1I!jsDKJJ^M>FQ3`7jR03Y)R4N2|n?>YR}!^MT-_a
zpL(3Z^6LJHcYC8(-v~OmJ#klGNAx$f9V~M*m--!k^)f0zOxcf<L-PlF7fa~wjT?$?
zCPx^kR7KqoY1qcSd+(G7vFh!0NAI2}oU_4gvcG9r$bZ>fAy%KMwbmzG4RaHZHD#<Y
zzMZpCE!Qw~lc1xQhe5&xg~H9zl0K_WHpDCc{dOm1rr4rOEZ=1oE<I~)ef7KOJYmD@
zA1;RcpX|2BGESZI-@1MKCv<)7_fcOhT$cZ$>XYE4;uh~C^Mtp@H+&P}-`2D4WWeR%
zDe9r!eW6n`Y+vt^TjX%GGi~F_;=PHASvD7>q5|tDIkBpmaq2d0`e%3bgOFdtsXzUO
z8vApss;8XLnYU)9iELBfy6{Kg{U7vi8AvkDEVgjpe^7%bbw-EgidQR+^s~)KIBB?M
zY0vR@(ie~AUlXzp-PYTCS^RCT)*{vmtE4pdwTe8vF{?>UOUvZbf<;==kM~>@UDz_S
zvqN0L?<;?(?{xOv*Bq@jhecNud^MiFl-FVFr9;~o+u842PZ78)^W69Ros8L$S%;_G
z`mLoi=iw0_DgKXZ51tEYUDEB%ad>--UUKN#nkiZrrxozK_kUB}YqI~6PE@$?rG3Uh
zjE*x@SInL>J!O?xQ}B_(ZsCMwjx`llL41=g-M!nqa;4IXr*C{RD=R8C3d+wv=bHBP
ziQ#<P-C2U$lp?*BmA4<A#8zoD_uTU}tGvD5Ro>ooIZ}Co_iNF4FD0|1R%q_dIr#Zy
ziQQby9nt*y@8qu>kxV?;TyW9gw#v2>2fT`IdzD43uUW|!n#$536ta4P=AEoa9mA$~
z%OZS4J63fXooIUY?EO|bmW|t&oYl?q|Nn&V)edIgb5b#G28V2aT{2a&*|X#`YjkA4
z(lJfFFA_&KtzIr_@H_9q#BWFIix?ATlynNlU9n&dbTACuy6gLSkya_Ub-K@YOsm|s
z>$d4Pe>UgJTfvvsi+Astai`yDIrq`$iki!68C@1`?&kUZLiOAUp1zf@j>=VT|M@bv
z`O!~Sb*^JuW{A4&`}<k_O{+tOzxUk#r94T`D$35Yw7xcK6`vtq7g=Bbmt$`6R73u%
z4UgB>q_(!azO%8;ZtJqmeMhCXB!9B$i4?79oVI@ln|^`DoD;&;r(M|p`Y?ox$$CWg
zdER;9roaED?SjRnylaZBZe~CDaec+MTVE1F_|C5oICiJx$ExEy+h>N?b2i;6ZIfu@
zi`f(ZrC{Mdei`HAQjhPdf4Q`3%D=@ow5kIIm0c58uw4k;X>z`~%X5OO<vpY3u2<Gt
zJ2t+U*&ICWn$L{mTT~8;JSu#<>7n?kU)}F}eD?<|SD6tKz5U_SR6prcT$gk7izfK1
z`$R2XP-<ZtY|(vS_k%eKkCteqtWS{Md^PF}-?H$^<hda?KbWb0-CD7|YO+bl57)M!
z%iie~=l1Q%|6KiYzh+g^Tf4JazmCt7zhP@=^*UsCb?FkW@6LO|4xaEkUo!o}>VUF$
zExpB@-P_6^dSo73vE1>}Rw?C+iGqTRlINI0O7p+ob<#Ms>5yjFyaJ~ZiG8P<mcN{}
zrSsQeNk@&Xep@eoX3qP}|7!noU->f0E4R12-|;qYc3t%A+Um_BrH5j?vKp73)NY+w
z8lcL$I>A)5k$LCtHxXM;)hY_SGd&u9@Ld7>7K_x$y_T(hPqr&fFxL)_WK^nl;g}(o
zoT2P}=|#6wqkpA!;N~cinw_hhRNdL9aUR`%=h?KgO4owf7x;RvN}eC3w9MAaboqn=
zwNDJQmc6O^aw0)tcTni;##OxzcMor^yT$7hc*C((A)2*X?w#fQ?Z2jN4Jfglvxy;#
z=i?mD6<k|BXsyn_ard^E=x$EmvtEiUY2KHXawzbhymCqG<BZQQdp3JDrS$ux-Isg!
z?p^#IJDGD&Dr3Vn^>t$=&s3cld~e(1%ab+wmM1@a-m<%6&FLbGHTrQf*G!)ZaNUtV
zJ8g+)<D9+r;<H8FZmF3a*_`6#q1ODz^rk=m_x!lMKI`A@%;=b-nm@1kp_1PkPn+kt
z)~DyGBxj|Geoo3ddQg6ztCQmENs}aPou6DieE7DwKFh|7%D3)q5Wce|tD)84qR<5A
zuFh$NlSAM5p8j*&wq)0>zHsI(T4x0v8GnhH1zynpxW#g|+Vsl|lJDJ??tcHX+xSWM
zM=wbxx84*pgHx^^i)_Nu{+Z}6wXi;a+wN%Ae!q~bdtY+L`}WQ`KG*uxq1Ljr`Xg_T
zE6S!+YOl|F-!+e=zk2OC-*+ckk31^=bmFx9R&Jd+kvhB|0>sUE3a9fa)}{%a<PLF)
zwb;J@Udp}qK1=s*pShNOlX3H-W65774NrG$-MNwR;nhE}tcK6#Tc6<hX8h%RoTakH
zDr?5{`6kbw-`&C%a5Z7F&-+io?Aub<UVKpUEdO5i<9d`~UKUSU;qkN4$-5*jY^Y%=
zd$%{+|AT?)b}3P-Y2W_7Vo!H=5Egg+ow7pkhGomEwV`z`GncLiU~JCVWTKkdyL#iZ
z$s8<6N3Zy2UAf?Q^)=rf?|as!3(73a|Jv<&cz6GqPkT&mCCOJ>pXEHYIcoLk(Ag&z
zP2yYSu(JF4@B3%IW%)h))Y}r2ym!_G)t&4K?@XLI-LINyKi%^2$(5|Bp3J+p)wL*_
zMO9A`n(w?c*yon2-?p8MUQMSMwp~5(C2A?_ZHK%|&wK9gR=$4rYi$qbxeFDpviHmw
zzj^e|ydro(Z_ULM^J2I8^t;X6t*8|~QF~_XX0<r2+pj}Jn*_AOr4My~@Y>^MzrAP4
zv{koPO`XfK{S<>ps)U+sn)k&;cjcr0uX5Y(S91UEDcwgASG6AYiJSdhoh>!}7GveR
zi=q2p?uyf}?shVmp!sj^l0&D$R~&RzapcUI5@Ta_H)m;vV9A~a1NRn&tPM*tV_jMw
znl3)-`l3hd=GuVO^6~fXn5VbQ*pZ<!BT{g#ibvCgO@cRPDmNu%%~}xjLGyAzbF<EY
z)gqm{=BGG#?QH&)q-E%7lU^#v!oK3rwllg%G;cCXZHutf)$RzEn0NQ+gjXk{@2aJ?
z3$(5ak++%i<jB)QS6y~JXIPOYC6E=HF@-%O`_eCmH;Z%@cW9jv^X+yCI=0Gy@r>E`
z%hPIuC7jI^&+kh;uk`KJ!>14PVok4=p4^vP*<Iiobt&@X<DYNlna<oj{Yyfqx99cL
zz5DHLZ0<1j=uT%+zT>>*XThzPN>P)RY+ZK2rPEHQeqsLc<NWr2=fv%=i-@nw=l48!
zdYXUw%~L#_9;;^cTh2FUD_x}>dNstB$$0ZNhvbIT$PmSzt8cd7ul?VCf1mYSyS?@Q
zFF&5CsG4@ig~!-zU-PT9*U~Ao9-QZ#8-BX>+d=Kt51)cw3CzFvZ(&>h$&w37zZBx7
zOc>4Gj=n8CW6Qs0`I0w6v8i^3xB7$Fj!MkCy5;KhFEXuqR_}x6pPVAQj^&HMce{g?
zZks+$4VoTT$2VR3+dq|t`I?*e`vq-2{CvtsmihW0_Zw}0J+m!4Ug?^X)+!^PW?kVa
zx&e79tH0-J*(<xxJ+}G$v;EWNcbz^YJ->I#tH--*FB_$|7dl*3KQ4LEYl7s93fYZ6
zV%QBAU$Afg_h#w&^aWdaKgNWvOEx>>X{BWRwN4_!@X4p(qPHg&{Ca)r=ezxTey$O}
zGs8jn^1SBY3FmZ<?+{tRaOze2-JNCCZ;Ws36;d|YzAJt4<5^M((`BpUqkpg+TUirw
zAY_eJb@sFGIX?4TGeb`<P+E9FD%U%-*J;se>AdXQA!mMF)eK(}eftBe_0BtapMHkl
zJ7W4~+2jsIAtBvsx)(OB%YNfi+cf3R)WYp0yLDA7S@v+bo)Hi<npGU9`gvn||Cbw|
z>%N}Xy6|kG?xux?t>P;`ZCR0^XmV3yHQQo0tz#Y!v(`1v?616e-v2%KoTtUp1E1yg
zudKd*{AszkVrBQ7*O!BqUHDkbQ&ca+b6Hw@?YtTNXHN;g@svvq;B7PTdN!5mYOIl&
z>yvvzCU328lsbkkWe!@vdzMAFRVPhX&i?x2)vI0_h<tvv(elOC-$yH=T@yZ8#oYFr
z^_%f}_nTduo%1#x481c!qe|`klo=(?J2n_^Zj)yYVmLXg>s^#5SICRn8}jFjyxbkm
zPSSGD;&)%j9w5e)q_%3(0T$r|;k$0ZL8W1(BGMlv=X^|EeL2aUEoAE-kK~gJ&88l`
za<bTkkJBq>L4`%Gp|?QDgj5d&cP|gk)bjOn&&18S6Sim7t)h*;I~9ID30nEO^Wuxy
zxuO^L9Z&0>uRm?pEtQuWmVD)#{qot#4IN!8#k-wf`Y3-YyXRMDH=pxpQN>m}8M!di
z!yh#d$6PLoh;Ol}t+JB!e?Dg;<1(g*O{KOn*S>-dELn2(Q<+vIgW$WF@21W+4x4}W
zhooF;+xCrF(py(N-EJ{g{=R&DrN!Fv(}}_7C(VsaeEQGCclqPxmp`ghhsIs35`DDp
zoAi_=7ks9d8g8~I-uU%*`Tf5Sf6Jdc-fz!jU-#on#kbA;%N3r<%<a4puy4J@t)xei
zXG_Dkl>`T!uj>Awk<jqz;FTFKG86h=UYPgL!q${M{OI5CDQBL{`QH?~-`ioUi@`Uy
zCPCHJIW{iy7sn=*SM{7)c>HtB$KMLoSKoy1C@$2V7haaj|J|!!-!j&Em8)98m+Mo0
zoL?PM5PbWeXhE}>+6DI|x+VR#33K<z*uE=D)H@ux-)**M+#7*??`p&TKDV2(Gqvnn
z?F+x_#>XD89=ED<zwP?|4d3+W{#wu4#1l8R_s$7Qe=@CZg@%7qpi*AB&}RL;rF_P0
z`s<IMH`tXgEb!ybRm-Evv05cJm%Ueh{YA?n>CBas{If42&u@N|(U6z+S1>B{^4Dd@
zE*Q0IoK4?)JM7tvZGv@{?Z0dLA6`4~PG$B!o|O@CWj<bCnK!0#hcw3C-d=w4_Z)eH
zrTXXFZ;O8o{C}s{`}F@8j5h@r{{7h7onJX4sC!?s$hnjxg9mRH=}BL?ruewH>eab5
z;+YfLJkMSC-DX<2?U&YR_L!d7wJCjR+jIN3Yc{1a$XGk2yFH89qjko$hTppF+~?-1
zlw__ayC2n0eB@d$KegG2#mPLX>E)kQUF)kZbxiBbEtwr!ZE<zV@(&Z9O`poQqi<HN
ziQAzS2hFG7eRyNF$LGeW3Ue1$y{>t?>O_{&2kv>P%2OK6ug?AKTB<0q=TGk1j{Zv1
zOV*k;<^dN!x?h%9ePq!qMfZMThm42EPlp<K&9!$}khF|xiK*!0%wE&{+p29I3SO65
z&35iF-cxPj=EpTfKrNxM>w!;R?v+T3Tcv;I-#!1;`t_ru&*d*&_PjFn07FPlAcydu
z4Y!gC64$-mJ<T#><6?2=)mfi(r+Xg`^4jKNSH(H&uJ)qH?!X(9R#mLrSbJ!(sn)I7
zH(RgiOn!YRxA&S$T;S9NN!gorH%o4qJ3UlfD}?#Vly{fzs)^0!ZtnR!ZNA)mrCJ;3
zbBCs%ezg407T1<LOP?Nn{q)Dnk1s2}JT{yYC>XTMsl;=e-Y%UG!K)n~<|zN3R@-s>
z=G$iFSFK;~EmEAQR5BsWq<h1k7xT|8UoNx!^EZ#e%gw7gn-rHXfBdr|%W%WjvfzxL
z7o<|Q@yo1O)vd~LXQ$rj)#B^-SANvfe|;(-x~}T~%fpwaEq>2%Tw?806Q1p{(wr>M
zT;II;{OCkg(wF_3x8{7vF#B8f;n<wFXRY??F4s2*^V|G!+2^<VVO;S#=bjnQRPlOM
zrdYUW`y<^2+h6>@c!S$kocZ_Sls>(Qnjb`JSua&Snern0`}w`kZ1=eA^P6_%tibg`
z`<gJB_}>%W1-udZw_fjK`N!qflOCG}7uzp(^{$kBv*+F2q}fl4!}{+F#5}6p{os13
z+Vrggd7jf0r|<cueQsq*`@AzfUxn63rVE+QToUbWbt*~dX2W#Ve<u6)X>2I_f92OZ
zK7sv0Vbc}ky~De>8Aa#Us>SS;DVQ$*=gjfm=vcY(x`tcoF=ys|xVbK*Y+1*{)wwEp
zahm7VR4>`=%ilWV-TgTKC9l2|8}t|+kD6+HqJ?XT(%JRV8ZQ&f11GN*c~G_2f5E(M
z*E5$+FWavB{7KBW#ON!f0!~`1=Uj0T)LnO;b61J*Ep9%6-|6KwcP?_xziH4XD^R)d
z^v$I^PnA6DUu~u0%ED>f5uwUadef@>a`(m8<_5P+oVr$(zuNdBw>EbAjh<D<G9Uj4
z`m|l%Q1JTfE%P>CbWVHm#$?Cy&_1co?#HXtl#ZM^Fu!}}#EuK+n*`3E@$0|XUX#bv
z=x2~Aou99{Kx=aPp0jHg-`ug7xAoMvnCSg)E?X!^TzHmSXTN^E^!HVz`McjSZ&~tz
zLvX2>eW}Ii)^4q$2PK+@cTS&wzBIDx*q6GeA2P-5_jV^sM480doOgA4p;j26cKJ<V
zQ%=K@khu3hI!=GgWHn355*5krUKuGH(`LYRV8eV_--zf_TUVT(`Kmm0<C0k}z9z~7
z-AXIlPE7pLHABX{vBQBk%IZ$~{Nm3qR`>qjTF=FmVdR@NL8U5U`q2rzaVihBdYb$e
z34G~F)KY#Kl{Pywh<7UA)%;i8j~*E4Y`wvq*kYma<xN0f^9-})XHp{dzqij!*MBm<
z&a%(rj8IhAlB?YBu7B<0bdbmhP%BOKSXCoyIlu3=zrCFOp8qBF-#-;B@9{t0ugG|L
z`uFl3_B!9oPu8A}%epF4qrz>F9{Xw56$PdW(HZV~jcluSOx&vWPLO9sR|<#g9Ixo>
z?bly^aM~<${<uuxw-c*WS#CDgOuPKJ-T!CFv<DKQPP(d$MfX;<ct*v|2snFblFNgG
zsXOgu;%*d8=UFwOWcnrjZ>N9r_dmZ}rspTswY5uDcJ5)@?j7n&ryq<=SiXDBu_c}x
zBm@KXei|Js=YQ$KtQi-QBy(=>hwQ5bv-#id`kuK%?@vzd^*j1cTit(~HY}+=`ma07
zIP>byo5f$1+1}=xuW<TqbXB;xVZr>F^S0ituKn;SjnVuX^K`9@9?37Ny%XcZAKtt0
zvZmJIZGzuF(?>ir4^I2q`<iFogNirXoaQTQzdd&Hzo>qBpU4aWmkgD6(qC1}AJx=_
zZND((=AD}wr#@`GrnO3Es&c^NebI5X+Nw;u&oP^e^PSgQk@oYv&RSufMQZMsc*?E9
z&dlfkW;9n?>fOnh{r7jj7x|LQ(Y04bX3i6z?bbKW@4L&n!kFdpj=#kb!q20VYNy}r
z^V-Y*%R>0^tXpwb_v)TRZ)9N-(_h&0<!N_C${Cjln)w;^SsVNdM5As+bIA0)V|><7
zo$)xVe>!jHinxC-4f~(h_QW2!u=~91zmikSZq)viogDjl&9mT~z7w<BI=H?>u_&E;
z5w$odZP}c?7v|p5=;B&o`sl=m;J(a4#+wzqX6Xhjew<z16&=A{>sN<<d-_p%UvUNR
z0YCe^q)itKjJCb0`eAAPwCtH;o7<e=sh01ho-@nrc)`>vQogdr=l!1@vgzzQe&!@i
zI5usW;+xKQLM@&y3+@S+3PpL(ni?|I<Nbd|k<zwl%)6$3dUBvhRE|$-o#5sf+kM~p
zcHLO?!Q{C0G|PA;t{oZ6_<94L+8)XK(;a-zbp3Kp=1rBl-yWr8yvU4v8`WU2a?O;Z
zCAa^m+&S~>6MwU~u|=wDWJue)Ss|himu})Ods?@}NBgSN$%G5Vdg6O$i8;(x+w6Mw
z!EBM9olJThYhP-aJ~;2X<l`REfK*lWy1U=(oSyB>jSeuhR=Tt)r)`0gX0*t!z~s9d
zZhB<pZk~JI_T2h?ZOc5QbQRh<@?7?^D92n8%GsWkIr*jB-fP>oUG-QT#j<zTlA|sg
zFYfHjKjqPt8kzV|==tZeTPjAO6AycS{Jibw$0fghILc2q)jU^M_G|fZfBiU{A7$s}
zg@y{MY9E|wn{@hLiH-j2qWTE=cYLL9VsgK|VRRDTJSnQmdn&tAX6N_y*Uazlt@`>-
zF4H<!z03Ku?DB_|*11pA#OB|tzvFj%&!M2u%_8qC^G*5OZn1l4O_-)KfkW^0-G?8T
zcCC2i_jRsr17p^&hkuVBfBbCco}E$c4dRm?YG;Rct4MBZzZetbRU_(B^zFr(IUJ1_
zmj4i(dHKjq?gk}Snf`*3%lp6FbXGRyR(~5%e>U*&vEv`j#b@#C>UcFltyEp9d8*)s
z+xN~lY+mUwODXqM|H`#-mlLO450Fo(NsH-9e-&aAe&o@vqP?b!M~$cKTv+`ox-~ZP
zWH;mS|2dufJ|~X*an4%u_AHM%-(xvb;~gdD55C6VJio=1=X-Ws%!12>sz$r69lgKC
z!umq-`pxrOcFYi66dPf`j_YU4qBsAlpS-JTJhR;Ae*gQ8TCz?z-)z2HU^o59l$V-i
z-8+oDZ2PmeywT>H==gEQyF=Q|){65_mjCODy*STrm-oqAu^XE&nA}<S_e{>^!;zO(
z=h|G_^7x{dQE^f3@BGDy@3skKOntfZFt0dc*42w~GiSc74)o$&BR~71Wj<r;%b>|y
zy39fkYB=xA*vy@=_1@hZE``@+{GFd{<k&M|O<{upyIxKb-`A~Yj@~-4JX~O^rGD;m
zZKX*?=T@yc6~Mbn%O`ZDX-{m&uD2?uEw;?zd{mnjZ;|aN&T=%HF@EXy%g)RD=Pp{e
zP~EWhy?>P3uL_N$d#rApw06yV#Qp8=JG~{6yH`KlroQFLQrC{^iX9yGw{O0`rfu5d
z?XFh#B!D&W*khjWlbl2Yi{2T;u&=p3x8=*fGMj0g>EaeaH!XBi<_QEEo4WG$Xgzcn
zP+xH1LbvnQ{+}90s}4oC%eB9*|79<Au(<s8`&+@e+x~K{4!2!)cw+Or?~5)RShA^W
z)~%U!d?z<QD>Q#|Bh{U!-Fa=oqOaV+ua|4w+<r-F)nCV}t0D!ow#`cA3{;6-=)Fv<
zvq4Ms(85){m4R>BTZ498&pB1va3JTdrfdImj&j@FXNe+OHk|=HQkUY?U2SY`l`m9M
zJjxm6G{yS=+O=Q1*v_nu*xBf)x+-}g(}mRg4I97SS|$7H`qRSNc`38_UcPIXDs3xs
zQ@p=r(!t&{7SG;RsGWD&=w5wf+F@sgz}j1WS|=vW{aAA3^5exTSM4?{TkmwbF4g+^
z9Od-yx~sERSsyF=I&HG=md{+@CvRYIF_~sv{q@_wx8K*t@7cLseAm(7WzG{#X0F;j
zUv7S1zhu#J?sLnF6%U9shP<4=)kuD2?BXbP-=waTFuSFtmO8H;a}ou1>2&X@5)@I}
zy5!gE<Bu=j++L&~vM_wFl*psYOKRS~nKSRc?yJVsPXUW=X}y&b%=s|KP2^o(ch>5;
zuU4--#HP2a>gL4iE$g|mC9a9yYTiEOVd-WD3BC5eN1wDte%sE^Qy13KcQ;|DcBobi
zlj`=UCqL)RI?p@r_v4B-ro=P9E7wlAGqp`*=ZmTO>MGMum)5QI{2pX<vwh7A)1@Kr
zuIu!_YdflzFh5^ls%^=;V+z$h`LFNo<%x8!`MAd5!M!&Yf?@fK4BA&!Pq+CRV|}rB
z`ih2rw@oM14U*SveY-s5`NY4i>OAIOCR@8)+VbMziMng^7z>rpWPh*PI!V9Go4w}e
zYdx+2zogc`wT9MVOM@5BK3OzP_3za~@1v^(KXKn`^**CM>z8w?*p#WJamBG_YyFxf
zwmhsoztKrFI<$VRU)>YK33IPx&p7$<#(If>a|VmzEKM$RR`+YpSnqGOB}q%oP<G3*
z$JcHwl$-K1##KXikJRO#jr%0tEpru{%VxoD`6Y~hcXjjUznjZbU5}=gnO1Zi+`xJ1
z%oe$Sj;!pV8<KUi&pR+KKJ)S+({c+zCD(T0rE?41t{8P4G~2JPmheHZb{qe)vZ#O3
z*97Ydoo;nWUfaTY!Y`>y%=~iAy~|zlf;`jT<ZyJ|omd*Z$I||mlS^Wh#~Vc@L6((a
z(_~ZY;&*?@u%2io#S!%4>8WUufE6yG84Io**!)6kkwb#lgS^h8W~^JES%g+B;VgQm
zc=65VdHb{`GDhvJc=Ex_Bw>lAU*GoACi5J3-fJ$pZLlPahqqrRE6Lz=Wgx4`WNDQP
zw{PxMTzPSgBv0XtDFRDoehZql=G5L+4j0}mv76r&3m*4<oRhggfcr_pysvp%rzd`Y
z(7wRmdB@AnOQ-f`3*TX#o<AwVLnL$JHJJr--tJjiz!JNoagxc*ww!&wf&5pe$HhEq
zowhmB=<1bE4bE9sR<CZYX>pjjNJr1X?Va`>t-`=dd*{#SUEVS!h^6@6v9<192PTB+
zXp2n@_@(tv;W_IKrTKUK@9eqv?@5?+XF>k+>0TabFT*q@oV~kHZQ)iAO~?JuTvJp}
zM3<d^z3Yx>F>9BX=eLPmFW7dKt!|&G^Znbe+0*a;eK$KiKR)4M<fiAm6H>aF=6!zo
z^2^J^hVxS+#iU+uZIqfEH%sQ~soYiHWZPImH$+6~N-oj(c`em*8_U%aO@Xyr?(D7W
zERDRrMd#bc>HPEU=57lv>-r@2A!6Mk?dl8;cbnXgUElw#+NGEqCHvmw%r^tAT~F_+
zO`59c*0^GplT>KVcac|*FFZeWAyf8D?1bv+rr}LH&Y7w#KBqr#;==b!YxhiH+0y*y
z5Oe#f)B~xVf0wQ_FN`gVUHQTC(%K)Yvc~h)&fJ%l^xtypWAQeh8@!?4Uw4}Ak$<*R
zLe0EP<i<b#!|&ex`n3Gi+@v!0z8b#uVO*c=)@gsOm{9TTyWiaC<GOEB`LC8-pM7lB
zJe9erb)}}_KDuQmO~v2JKXaHIwyyV#bX=_6bZxGfbNd_4bMLBHYB`7JSWa0@T!~JG
z(Zl*}+b0LBsTEw@^8UNj*F!6qzph*BAek@Tk;}sPcH6nQYv$Xc=P&U8nf+$riQHFi
z8zMfH&yqP>{4o3a4~4b$)3W;Q9JVN}oPK{*#+0%|+pQ64i<VB}nc3*A`^n61cF9#H
z|7U3_$Fvf(GQ9ZL{>{k!`7Jkfcd;FtJLk=Lw+!Q5R?d1Sain9W?o?YT|ERVT%Vjtv
zrI^w+Uwd;bVp)3id+IKK^+g=VvbEB<H$Pn?Cta_9c~!x!(2uJg%WdvYpJ|Y>eRWbn
z-mk4Iu9jH;E&p}AW9N?lGR0|6*xq03V13*&jY)R;bZJ9vS?P$)JZH0ZMR7XMoix3*
znQ4E1*|YxDs)ySqKV150ePjWbY3k{TvT1YfZkxZo`hxA5)$Ltdl4`p;cNT75_~BPU
zV#`d?;?h@2S{juloNiuXg$_QJjIO)ZrmIKr9P(LMlqmk%ICIPFnknml3!KsVoE^0_
zVMWd<;lF;LPK&HRQg`-aNU&kau8?-WCq_!K%O(}uD@x5i_$}pP$Kh%3Lnl9&^LL`<
z@)lLq8MA(fP28(&>fOV@BcNr{q_i-rYtz-Ka`!Hl=s0l(iZmDgm*{@GK*juGvYo@J
z194|X7A+K1Qk7gRvUS&z4O13w5pA!2=xwXo8EjX3>iOotGnWp=96B%e+-LcO24<~C
zimLMuM*ryQT9sWHK1*k2<icMojdxcYD9!Ubp7MRsvSpLhrAkW<UX%IY9vm57`StB#
z^ZoU|KfJTkaoW_rnkkFzv#$JnzvIg%=POR~`Zn$Nvx(d+JLFgPR{BqAQB&0L(z&oZ
zHL&TOz#Z9lXMz^qIqc#huKV!BG#BwTt99+`D*pd_S#j##wF`H{wkO{{ub%9)=i#Me
z<q47cKfMpxy!=|^ve_bZY4(-g)h<izHBbFmFsF68x=6}@o1L-~i(|k0zka##lK$K!
zS3?gi2>nv9f;;haZ&SgF)W&l{FSwRf*674P<azg{Ho>j`@yu0oh3`tmr-tsiVpCFV
z`cG1|Zfl#J@qg9tzm!Tgep*tQq4fLY!fSGWFNMZz-?Yx|dUnmXwGXsgy0+^t7ksem
zeofLV<Mtza|5mM3`#zIPRO3c1d+Hvs!1}MVmftkyx^-dB47Eo~73cQOJ}YzYkBYcP
z>p!OXg=-(QdA(V*G-p@O!t4VkPj64`UaXe&vLLbcZfMn3v&^$+Bz`}9rT2FG8G+;F
zwXt{4PZhLiarLohHjHM8oSnlp>%qRW@4a;{Pu(pu`N;pD3>JO<a))olsWAme>b4e%
zpY8rMLwKrY<H{qFp<X?0Z7oUf8;`j#-Tf=2&u4I_VQTytCTpSii;QP;tfg*V<aPgf
z?Dd<qht6kC+Ud4x$+Wx9ok6ljuk@VxFD+d2)z(X=C)nx1(tY}m7X@7vygVbJ!(IQ2
zrLkC&RnGdRRDHWCvjs26$}ZO0?bD}qA?uaiwOR9y8byYSme$QT{~qM)X!tDdI=3y4
z<WrYV`b;<0&DE^;P+V|j!=<vOtFL)dR|`IM&-C28g=;_i8fUJX-y=7t$!ZjtbG>5Q
z`sbN~YKZGq9Z3Pd_4hp*SF$N5t>3bLkC)!jm8FK98QnGwK@rbp*9u;qeJJvQ%emjT
zHPf5|J4{w>FrNJCpo!8srJb%3F@-Nno||S>wy9PvJk{hqLr3t-qD$_+TY4u|S+gFT
zvOH$tD}_@$AwQRVoLD3Hoc#%VG~dDCW#R#8d-sR#awy|a&0jOuik)-ue9h^Zm5F>E
zj*}#hZ2BxS@%+J~SJ!knuVOj9`taTmcUI2!PcN7zIZZw4d^h%ADyPKF#8tNpXXHFv
z8ua?*j@ep_or?E@8ofI_LRYWfCRY+Wqiesj@zqE*Gu7#eGh5V5OBbu3l&U*7@AdI@
znw~5zw_a>yIP9#nz#v%h?qZJg*X7>JTb}DyH+86PK5OIN_&;Vv<SmX9m-p`Z`S0Of
z`FnPIcBgl}v6Oh4#whSbcmDRbZ=%Iiu5QukcB;9}HEB&R$AVcQ!p)~VRHrPNWpJwN
z@EL)JTAP+lnG|+^m6iibBD>(d&wh#1&P>v(`}_3r^5AnK$>&X_vVOW9x_fl-mz4Jz
zZ<=aDSlv%YZH&HPc8Fh7aGUOdEgNRs`n)-O*Y8uM$3>oBKa;P&rZIHOHGl1MYOZxV
zFTCAx;9E(E$paO=S8pUI-wawOJ-=H1n(ZX>C6U}^=acKwR#on`UDi>v?}2)%Q}gyK
zzt5}17IJy+$XK#&>yPk>&Xdz*=FVxE7aO1VSK{+;w|g3!Y`?wyV4T4p_iC0$-PCta
zyq~J-?>})m{g=jj#>5_d?nRe`6}9#AoUD2xmw#Kou&;e*`|j(f<aX~^wRc~-%KSpE
zg?l@LeoC3V-aJnrF3$0*P|wF%w~y8}vg9{z5D&lb^*~-Y_qE-7r%jRV+x<EIjbK(r
zMco<G?-S}YU!9pJpu2c$M)L1v|7JIQzfxIefA@NANOv6n+hh0QKPs+%^L}SnK#stC
z?!WRZTQY9Vc37JlxMI0h$8#Ztg3{>eYcAC#F1Ak6Z`l0#c?|Oj(^HmrH$~|`o~PFF
zD0h$hgq7U)SNjHchzTry@z+E0=d{F{2@AQhJOZCh*(9gfVPqooQOf)3d_Cdyd++8Z
zvmW33Xv&k)+vl~T&)(a<@&2N+{0AOgtM@P4@TlT-&I7TjjnCvS7u?@Ar`YA`#qi^g
zHn+#x`!ogp$@SA(5cF@w$p?IFiB=a=GP$BG_iS0U-1qhS%s(raT}gd>J0wzfO~$R3
z!0oYn4{55sNZRVqlyNgRu#}50d++tQ<v-4txwxHq`_6N7xL=t_TF5p2WfOnJb(*Do
z<Z7PPxUlQ!mjtc{Gir@{SIeAqx}QC3*CF#jW8tptXTAS+-BcA_bn=YOotH~5xE$6v
zDYNO`qv@@O)O-04<ty*s_@}9YK~B}i!Qt$Zq6<@mQbVFNyQZ>wtngc_JzFPdivKL1
ztE--}dN>3L2QIHtnA12RL_gYB%Td3mX4+{X9<OD_2`nyWu9z%R>G1Kn-^P>G_kGbR
z$(ct23X~kKY;1LZ*>P&e_7G0Zn~P4b%AU03%7P=){_gQCThfvm8N4e#A>@$F4FBi3
zjq^?B|E+OK$@;rxg^}?Nhb{}vhdOWTzqCeF@;{V&lC&%<O6Zu?$%tbu&MH@S$b5M9
zyFLH^v$qe=Dh90+Q_xISEIfbv=bpLS_f$?dT<xfINLF{+rj`%72BDwdsx0w5Y12Q)
zyleKQkTV>|l6KhZbZ2gTo2A<8bmUT0$nz|ZWy=Ddu9u(RH``j#AZ*5@9~_b^l@4#y
zxo3U3+j-Y%ovQO@F)q2m!EM}UBeioUhN@akH{LMUa?Rym1}i?rKiw=761B2n=Peul
z@A572&XZOyoo3cF<z0y6(TJecJ3Ch1%(wVo+ama#De<Kz-}$}zewy=2{r~OB`?xdF
z)V=vihev$HV;Ppe-C6%ESL?2mYqehS=L>_Xt%a5Ax926=;-~(<FrKQo?@OrdY|THe
zp>NY)CkP1`vn)NkWEoRU+ln(HJB!5mlB*7Hsl4A-Y@V@q;-c>Ci|#YuganBEj`)AN
ztgA8PK=JHPcjdWf_9w?Fc>YQ~!JS$6^?Xfd_U`BJ&S_iwJ&4=1>1EpfnA);e1vMgJ
z(Q9pQc>TGUvVhTY3s2drDSWpsh)1oJ{T=Ei9(X_OU;R7bJv*$Q-PthnO?zc(mX=b~
z4{wnx+H09~j(yT7dGowH@x=5;)iWy<Vq;9ss{Uqp@af&kDXu<#-8U|tZnnO4=<NR`
zh4GVL6wf)O_HI^}hOmOkx2DZo7?r$MUf7)AE3<C*gB81`Xq-R%P<72u-W`t|VgugZ
z{8%_;y=aJ;1fSc~`)QM(t`BPN42kkT`R<HHV|F9Q6WvYi+tpv3wb#y)V2-NPZO#)`
z)#wR3!hfk^(Zzt$Se2MPElgRG7J)7kvX}+ratZ@oQ><?VTvJ?q`E;kDh5eI%bF|LA
zs;Xb5<#Xq?<8{TQ(_I$qOmUxWKD~KuMR3)hRi)O>fBj#|8^nDOVPp&3tQ4)a^4QYb
z*Oz2z$qB4FaD-)-Yfhw%`ND}=yZu9!c{Ki7wyN2(YF<Ttt7+u^^-7EGY&rVGTz8B2
z%6P$x4@FLKe%gG>eEHk&S@&1hefrZmuhBqunVo*|mxw8zs!By3j^<*=xuXvCsA|XZ
zTX$LAnKX4xB;(`@Jd9QkSY(&vMC<+Cwo=j5OJDl%exZD47ukuEMAL1yTD_RPp=(yi
zF*!-4RccuZu}88x-{%<eu&xPz@bv2|jqBNm79}~Qp8VhKP_R%X(BP?orbC66j^PuH
zlA9S3*HpYeCA;Zu@M`IBU+EH=ohas!{n>tFnep8i_N}RVxy8?K{Mp&N^zs{j#Yc<k
zDmL!>eRlTt{q?K)tHh?q=($deO8-=2HFtjB?P<!~8#<~?m$d($W_kR-#}w|m+uM|K
z&%aA}@|LH{t0-cl)b`7lCp~%EUFy3*ib1A<&H2a?mJ6!6nm0Gy*mPr($;0LI@7Gt|
z`XuJ)$<(rFq26(ieOEu;t&_68tB`(@lbhesrOEJTuEmd{4kh!jR`(f_CGzSme>X3G
zk`R7-rrl?qn~WyAix%x<TK&m5>s;52M1ky&s;VB<IxDuxN~VZ>k9~gr7t6kDD^Jz^
zXgJ3`)oRt6=s&BgI_BzrI4>Dkta9mbq`30FN6QnOet%0p$hy2j-sNigt{q0qS5#)*
zh>yyxt)IGt?a}%bAJU)NEzj<}>b`UJ_I=ad2le)REez^=*WfYJ#)U`u?z_xZ&im&k
ze!6~Q6|cmVean9t-rtpV%P=6@=3F%QP6599&{?<U>}=-tQhKJDuG&}peq}G8)Sjls
z_r1dAdM{VMnI~;l!j-R+zs~kWa@qY-xu+{b?#<;BQCCl$tFS6;dF9v0mj%IMH}0(9
zP5!#nIH@aX>gjb3zs0R{_jL(;>*_u_SF|jpS>&*hN{ETd7K6_b<qzH-?tMManN31q
z`n)5Ov(0W>=cV|B1e=%3^~fb0?d<<^hf9NZ(lLv<jEYk(E456%F;`V}%l4Acxh<Wa
zE+ub?+S#eZEq6+H!XBZfntu!S=lq>NQ}4?9^|v2B(+gM-u(q^D`jxjjLs;G99mS^w
z&l(jJXN9iaZF_y)ZKkyhJkrOPotc!<&}~qbdv{7?%N4EOhWLq=m*v=-RyZCIx%)I{
z>iV6bS2y{y=BM!`o?FMXFzl|biqp0ZqnoX-Vy^k8FZJv1`(@c$z4C=*W89bRFV@}q
zUBxr&$N}pT)|%>V?~mHI?wsW^#b*BYhF!+<@A~9;er~<L<H=WN^UZ9nftvaasyn!p
zLauThDM{Z^9cJo1W5IIUs2x#}FM0$xYxDmszx?gDaNOP7?*;o$Dab!m6k|y+ShMTT
z!}9%qXW##;AY}jl%VYb$KlN`{Mo&&XfBf^u9kP2>%qlrFSTk2mc_gkCFT;GU)yIKh
zc}L9E;72zTLnfyzJITV-`$0l__oaf%t}i5ashRe6e3MzhJt^^7;M9b>(>JlqPn&To
zX=Zdc`%3M$g6Y3JZ*N-T8I<G{anA9p=~C6Dhd14M`t$&6)AG;X?2fl@ZjJCd9rSBz
z!nZYPRnuKWXP%sH@mx3k<kXoG&3Cx8InG%g^;K+$jJkHXs=ohL)Q1wg>x`*4*$Otd
z>fgJ!_xIb|=KCub>rV@wv-$ZuOU-+l=YM|rQDK)qv02A{)&!P`C+3}>9U|_-$+?O_
ztKUcQ?$M(H+-9Fj7~ba?%O(Ch{QZ9I|GT$;w>w9LnWko%ug+P=l9T%J?B|<jf;JuA
zb!V;pzo*l`&-eQ`<@2*Ijb6FS&&0`D@h#aWFsI{$>%m8Bl~<j+^-d*Y+m9*}C!e*u
zR~kM(m-_s$rl637^f|^uyFM&-YORfIG2D8wmL)5e+offLnnz5h(65KLG-PbAb(j~W
zto}Dcx$FD2DLdAmy7)>w^q_05f%~TGt@rM2zP<09_u~U^gEFQ1z8U2<_0}!RJg`Ue
zfp2S;{2a4ai(dTWy#IXJrJJXG<!?WYd!w~}-TrjWo{Mwueo^!L`%NxsO-`kwgYK8v
zks+Z|>zy@@zSa&<W6_gLySh$Rk+(eN_zL67O>d7Fw7$ObJn?9M=G#ZJ+p<>uS=N<p
zRNyT+#WnCh*E$E&9W&U%eqGaz`8Dqr%QlfMn)ck2FJ#pjGIL#*i4<>ouX&5BShD~1
z>CZ}mbqlWFa9-1Lu!t*;XBB(tx_?hRlZ{Um<@#s^x>_iny4JsyKR)$R<f2^mH1<bw
z4wvtx{AJ$K9q{e@iFbdq<}Z2Advng+8j%2}?6vYyw*)y`*_>Uw(;WhvE;fg}Gx4rA
zQVabmwa&V=_X_i;s*XD)UvC7TUfwfnh1cJ|zwf@C5yU>{jPw0nYLdJsmKHAX2v~h-
z<81M&gVIVGx3#|niB2#pJ7o9i&1O@(XMgr)E-XA9dtX*=hOgV=!!CynUrsn+-pOVy
zZk2NT&RpRi|BiiHU|;_{JZV)_;S{g7CN@@qj43b8RA(*ni`aED=#q%$SJu3BC-&bc
z(4DvVTt?*9tG<^k<-F&FH3l)*DG4&H^)Q~c#PazX(?V(S=%3kdFIKoQxr$wKiM|+G
zeQBxPil1IwoK90OAI?c>NJ#k^lAEzCXQGT6PiKtMDjS_OLN*Kc7GGTz6K<-_m23ap
zsJ#E%S<wXnKe=9fnWG%zo1o#^&J$d;*>uYC-sagXZ+1L5Kl}go&3{gd>~F3Na`_kh
z$Z!XzhxfM+$M^q?|M$KAzx=y*@0|Z!Ew{hF|406M{?p&}<^B5K|9>d|@8ACakLRB|
z*e;&W^zY-xzb{Kxo!!4umT%@X+oZ*F{`PHJk@}o-;>jeZrO}?h&s@FdHN(k^ee3i0
zY<D-a<`}Y^T2N-TNL*0dg;lpnLrJCmWmSM+D&tx=?)dB}E}R=vJSV$OTEk-Q5_igv
zpMP2Q)PJkf^P=r?Ez@V@>A&aKOf0m`{i%|!!+vMw&7yYE=$zx1KRyn&OWpK@!#%UC
z+r`%A&fW-{MGv}71ADV}>An6Nn{xi4%3Ws_mUWu@E6V@BmA|+5&pXlgUt<jHcHZ8t
zm~yY{m0kE$DbK3XwBU2gi|=i_)hv2)YDCKaFE0<5*-2kfSe<gZpWV`a&jJ4X`+x1;
z|Mww({f~!lcbD(~eOP|~-*@Kizo#d8f9@`p(y>-jD%xSc$0l$2^6>TXckVAe`bECJ
zO5Q(OWKNBvu{-;0m-5oE<#y`tuj^dMWGbK1v`_L+_rh(>^;I>BZ0ju*KA(6O71pO0
z5#)NrWz+iAa-HY?&6_vXT=Bl|sjO6&Am;RMv!-S(d|SD3hk(Dp_JYY1q|@$x+;WxE
z$Y|TMH=p-($DB&L#5c34{kC^cecy?e-ww<5wS(rB-##V5lsui!?(EtncQ4MB4rIB&
z+<x3{^L(ebfpT|>y#jXYIjwuVGb+-@Y|+&<$61a!$K2X2-!Buhvft~$7N73lo=Xl2
z39U>0*nM!J{Hlv{<F_sdN{N|uF=oT60{-CMlcv`{>hJln;kbzWbl+FT@uKU0*qawC
zet-G#P`BhC-DV56Ww}d!Y^(6?yXdpJQ-bMlw*HmOg3e3l5~MVXzNm`^uV}r@pDA}U
z?Cl1dyc<`FcqMQ7g>Q-8`TX%W*W9ZK0S(dea%~rj*q7b>?WQ+p(&diW{7Wpa*7-0u
zzYa+k<5+n0@gyOIsv=>%XOB0>t^4jcV{=_n&i1J53z-d-b-mNgqt>mw|C`|kXRP(o
z+H%K=B%ja!)LJzk-!-bzGuL%1a%=oiQtuSKKh7j#j&OAOU*?}{zI`xCe00<EU+;-S
zD~s>5DxTff$r#Jz=Ej=-hOKFm@pd2n50>9W@@6N$n!3)lEzM!Z)HBA@=B<qF*==(D
z(UNX0*&FKeKDk!uH$Ht0S{5RB@%qFU(XY+^iMvGZa^aX<`Z?+5lERXQ5{=yAFUsET
zUzuw7Ma4f~RixXXa931&*tWe2TMSd}Z=Vu<Zdp*qyNYR<I)9)CkHC=)37ds3=-%u&
zpYr<o>sO`6->*(u_^3M|L~B7t;(6|f2ir<-JWz@}I%S!B>Cf{wuLu8o{P)-It3l6V
z?fy5<uU|TE<Gz~D&*J}d@87!R&;0-2>ubJT*s|oyyQk&%_f>7k-}gJQvLaP~e{IFr
zS8s3U`_=tgU;jVee$W38ub!SRy?FWM<I6Aq#@E&U*^zpG6B~b8QrRMlRyik+ke-Hs
z7nUo04e!g%Zj3Vvl$e?4(LGbr_RivkiAzHpnrzSS_!N;680;Ok#q(g&-K&~Q0#$pY
zH#iD7q!o1cED^k_&KkV+>4ujFW0jO;&b1$Ayj|9pT;Y<k#=G&{8vRSV?%tf$CqMtw
zw2i5Gaw4IPp`x;!QNAMl87$|P^H!}&%FdXn91wG*^yuN;?fLcJ-yJR2uXN~JYrm^|
zy}6Q#%JZ|^eY%ublddc_2-;rUcW3TLQ=`fIej4`toKT_u^6cGTe{XN!|Nm|I|4;FM
z{(<uKZTWq*U%!2Q_)k&xovYXjCgYWD(|37`Y)jab^{em1^6Q|SeQ!_A*JnrP|NFAs
z{{Q3t_x5#G&wNe7ZDyV8EC}}FuwT4X*UGLTaG`1AHWTHD6a2Av&194Abnh<Dtlz4u
zA-GU(Z^`PJZ`WRV(yFp|=2@$_4Y$)B-t>KX_J1aO;Ei_X+GBD@vX}n;cV^yF1>c<~
zKVE%vDJ`qZHQv7X%HPd7nenbCpHF`AJ*lDcSnutsU*`}1HeB&3Cv3&uxSe+H%L)Qs
zs!f|zFLL;@+1}uDKTiDm8Nc7gZ}XD8ycO=h7#8!~-}Xh~!0JZ9yqO#)*fz`$lXxoL
zs#>(-=|rwp!yQ}qK4)=LI1o}arG#aRW_+z?)E6tOS-WLhUNd=GtXRwZ?Q^JL!i;M%
z&W9Vl3tV?k)3cRIcI&g8yo_nVY#F~_YW$b)XxY7<wc}vp+kYuFf!FTsTQSe_>hW*R
zs}Fy>?sjs@$~Euvm+Q%t&NB;|H*s6}OYbW$IiK-lo#=fsv-|4dx`~Ghr;BfKvX|SM
zrarxq*-CAx$MPvw8t&HJuJZlnjBFdaZRTmMRpe9dcAm`2eOI}q`i*wFaCZ6KBBu3k
ztC~#yXXieSQf8d(KGm3M<IMS`MN{tu)E6%QH|?HWyy(*6bI+bGZA*`O*tcqK{?uKr
z_s{mGF^1>LR~Otm*Kfq;lHhS}V{XqYCXpD{?I%i7!)`yB@@7?(%Kt?ip}Ve~^<Hpx
z>8s1#lhqr2zoseAS+LXmmymab=%tuG+c$T9<U~*3^<d>0_lZWcR3+yft&Z-$nb;Ox
zb+9c_vuElei|1Elw%#;n6ghujsdA>#{mto-SN#J+eVA0u&d$+J2w0x;G<Ej{SEqo`
z_kjz;4|2GhB<`(VId@j_-XDjK>#a$u5;VBFWYsatI$u@+Q*ZC2GSP|^3V}NMKh*PH
zeo6VV|LLdgp8I`ITi>WZ`6KXpe*FIG-=B_NUd~_tbGiNhpVyT))_wn*|Nn#e`o2jU
zqpJSi&940U>Z-Q$ob9ab*YE#%cDw%P_WIu&mc9M)_;>t2?_cZx|4{!w)BD%o?f3r+
z?%}Shs<}1KPpyo-bZPke-7Kn`ydwn{dgPhhaC*3kHB?FI`5I9(gVsIUyk~CezP0w&
z<PTC3fqzb&lIn~}SZlCZ)W^wpR+uZVP!|XP{XGWHS46yz-@{q>{rK_6+n%yk&FA~h
zf6}S$@0W*}rx~9&MyJG{@d@86KKb(Fljpe3&U5xTS)ka$b-=-~gH2@7@yD5`BNuY~
z+PUIIjP5k+>c3~ZuiNjb`gzw|CMS7a!;g-6TcmE<?tZSbnKRxx=#s*#+3mkyB)U#|
z$+WI8%x<sWx9$6D|Nj3~{{NT#pWXF;XUp%a{`>1`Rk^zNq%B>#jVwABQo^ERx7e@f
zv#(ywl6B|h!-wxaJbHS1`u%;?UuWmb{kvTM^L_pQ<?{7*c`jU@c2_)iFPPA%^Sp2Q
zP3MQ&A9-~iZ@M(y{rA*Wy`dq2PkXn%V0C|Ob$Ov}Pk>vQkc_hSxs^*_xc*aU;8T%4
z-m;2!%eo_$(~Wt~@a_CL;bqLW2YL+2Oh31;^F2N#dY4SDd@{$(%Coi^ivuK{y?CqX
z^;)UoyLRgGADfO!>`B~xB!r=N&9Q}r2jkQX+di5~Dq7t5u*o_8joclH&67_@{%*;B
z8G5g!Q)cpwNs>Q(=9;pL`bGp_mHo48>CuomF=u%cSBU5y)lF;4GM%{nXYJNx!zCQS
zD>-IZGEWiGxgHXHe$r$2E@QP@=OjK9^DLZi&VDLK;pN>lY3KK9K{vkNjy1dK_wkw8
z%9%4{dOeqRZFMWL`=Gz+$^4_;{>%)IgmPLGR~$At;3dB>jDMa}kM-TVIZ1h2He~I7
z?>Xhh&i?OVv!ayxcemLJJdSGA-_2II{OSCsAI0zVaW$@6uDz&Vlih9GCy|3fO+uGM
zS*xE0l}ewhe>B07Z)w&0h3%!2QZjs$TO~E@FP_=x{&nK?mlr)Q3q~EzWN&Gc5UE-F
zD*MaDKRo^V5B{4;&1acq{qaSQ=FSaE;%6PTliuGpH>`L+o96bGZL61@d#TQ|q33U)
z=IoOCjGf)-h2r09buJsLf1L1t3Ns5=>#WF$+r@G;x<1WKQnp-_9}*z`{^?`Qm8MB)
zL2@p4f5x!!?siqNyxE!6Ef^H>*l1(W;l(r5y8as9kz@=O5lFxPmdER}f#7NrF;-V2
zMner(j^%y+LVJ(ziSbHOcX>O#pkNLg(~f6vMJj{Ky}LV7z0xMQ>G#*0?B2zs6u6Y_
z+^O$9i?n7sFPN1WAKE>6&Fki)FXvhNEbIAv^~@PAE2dRC4vFfGxt{-<Pq**+`&6R-
z_oIK-)_=S=%b)r8rtJC0cRzkS{PObZx5vL9UzYr?|KaZH-CzIxi}U^T@z08A&sjfS
zPVe9U@7I@SZ~vb6|7U;C{@(ge?{@G1nO0rV)t<la=d=5uUjE0oAD_>^_ka2SKac8u
zo8PY!KmF<PdHcQpzg|y~nrCloE4Tggu{GI?dbgFjoD<|ZDZOx_SoVq2LYua^&;BYc
z=Q?$bs-%X^i=an07ih&9?Fh+j)w^K2w*B|v)zh=RQsxwY+kE_|-Sg%hP8r8`?{0rO
zL3!HOA0;-dN8Y`E&&MtkHtXba|8<*Q-A&xve9$(S_4{hZq}eMKceC7WmJ?i4@vZ1c
zhQqcS63^Qf_j2TI`}FVV-PQSVd;YvTdiXZGZNxO0XuX4bRd0muvOKbQ$+GL*+gdkH
z+iY!Q+wKw?!F9AnyT5#*{Inl$%h&(gUjKRhzx)3m-v6Kf|K0t%-+#?-Zg$Az&e^!7
zOHKDAZ`9KTYs=QK^O?LgJ1y4ed$}s&a#4B3*I!R}m*21Ze)j(V`Tsxe|MPP>|8Wle
zt=`O*?%J;we?M+yU>f{_w~8_F*@6(k0L3dsze2NZ?99(R_RW`VOW2;jsM<_HJ7|&m
z-81vP+H!6Fu3<c{OgzA)thj(Vsf**_d5O&tS4@n8)?Wx}-~D*`|A>POsi!tLeXNSK
zHt7Crx5Mpc&^{~m@9SBQJunL8*=#Yh_=5j>^&76&C+nn7d*HqPMe356soGw@{u?hU
z`Qfu@Wz5Oqd3RSP^oRZHQCc$X<zlB-qAar%v;wXyRC<_rRBZ8X9i0{5reAUTa4W#-
zR43m7@7-bV&IIUcPTTX=<>|g7_ZRx>{bk$p>i9Y<mb=#v6+INnc@|b=&NnSKM_KfR
zV2i6veATwP!f#W5o(nEGe7xiQOVt~d<(vFxyh(5>5iQy6d}>_)<Jx<rewP-j{mv5h
z{$O=vMa+&W=^kT-IhQAFPu=<Elt;{z&KY;l&RATxVY~7fVG)lFJR;Wi`^wfuwOB3K
z)}YCgp0>TI$75e+ZAIX3KFzwb+M3bTy|XNBRg6m?*hH8(-u(J@y5M!66|?_w?U0N(
z8?=g@>#@JLftvGgwY=|Vw`LwJE;rEk@ZRKq@`u&?H&fTAGZ>UtPds<{Vp`A3c~P!^
zzqSRu4v*O^&@%mz+(TZ*E1busY8u@=D^YRu*tw~hW)brYU7O@X4n6o1c|)$0uW_60
z$_#ID^UIZj7fmy**^k!swy(PIQjLA(w;fum)QSz-^W#*kBSI8*yM0sA*tR)YQ%#E}
zIMi6*Ua+Ms{{7l>>30l@yKgtWK0B+7W#7*HnYWkS47TFXk=XfsX{FBM4L97^Opgo+
zIp=kAeeXK{uqTqdvx86Wt2}MAvGvlVGd`ipO!wySb$#k-UZrxj>c`E(PoIDH%l|j~
z(th~w>yq;6CWiYsE5AQ8{r6+{{@-%`m7e+cK0ZGF{~N#k-iohI*S}sq?k{g&^K16~
zf6v<U>pwiMtgX1Q?JcNp&wE__|EuzsA6~snt^0oXzD?)fRllC~|9>5QXYcOa^&j8O
zKK^#jzt8pm-u=G+@7wJBbN&7G|G(G&|M>E<{ohZQkJof@nY(n`eVFK_`|w%V1jnct
z{m%=M*4&-NP!;uh;={nX?P*$`N4lkZTMoP_*uJN#@c*ZOcB(g(K3V4b>(~E#c{Eu^
zE`77_+0ccVJEwbW3#_oXaeBHwUv}8pX+=M}!`H{}sa){s@bd5SpO-gy1nrFv-ei%U
zHhuE$=SK1`dpoNog1$a-O*yaR!yUh;?#DCp{P?=xhva;E7Cy}VEFh@*eD%Gq49~q&
zd0sm{IqP~w$U#BHEL=mi*(7n&Tfa}=*Yp4X7yp0t|M&l&-~WHR{`dO&-@EUhoOI4{
zN%PXsTv5ZVTW9#Js@gZTO(uJ9ZAi6mpQ!kfuLpf}U*=!iy*(dvD&DvM-|PR_|Nna4
zey{!0q^Dg8EuWm4FWpr4^|+p%vUbPvvi!ia^*N=FZ|wV2x&MjE!@K8Op1EdM2Ce$m
zQ$F#=!q^*O&v#3#{Cj`9aOrWYIj4SA-nQEPX}<Wn{ts_%UaJ(|a`C9?;y;>m7su?r
zR(4qPOV`VKvA_Rf8vpOnEI#(0r;I0L^*fh#u0^#?$woJ_e{6j1zBuLmsyl6s4@`MV
zw0<ippDwNc*z;z?#A99S|8EHBo?o|S^P&5UjZxB~ZeO?cZi-}=SR^oCBgu5mD*N==
z!3}p0Y}lV?IlZ&?#XZ60n{Sjnzy5<~$|iNrWTAyV@fqu1X8T`?`%_VR_H=N2W>(bo
zCRX8eU0LDaNIQ?|^}f<oirz}g*VQf8&z$`vNJGr&<R0sw)@?f&W<G7ry?U*E_Kuf6
z0e2RiESn~AynTmw?&oE-wOiO%)jT`x^!ezuZQIrOLgxuu-_mfJy*VKu^>pCk87;@7
z%G%DG?6-dIIs1XsNrweOp^KLJ_v$k5THlvDy@2_&w9i(%)wjd^8lJA2KS^P0$Gr~C
z?I{hlX$QQ{=dR+A&5SuVca4C|E$=Nqiv9Zct+!#Y_!(sIbr<i~pk_zct^Sj<rk-DR
zD4+AB?i=BW%K18h3op-k^U7aNd)vZ>oXKCg&OH2J(|K**TqT|h&LIK`XW#35oAU5O
zi_f98nLCbZDa^?`e93Lqs)Vwpg_}KQ$1U%$VbSxN@;8UG^M2^#->WwW{9!dPY+jY=
z^-Rceeqi;|ZD;!SRXmru6vq2cuvjqi*2yNeP(#kP-7Ve~O5vu0w$){(Re#?d>YP-v
z*L_>)!%3S@Yo%PW<DAl?b+=Ps@*;&vYDdHmxLoDw`k=w{d)ImowaEbyKB`L>KiWV4
z^LqFH-zt8|)y$p7{%6m>r;p3)zaOvvTK>P<>h$k;`Tmy+R(<!b+NsiIv+Sw9{lA)R
zU;FES<W_y|>hOQgU-ZXx?T*c@7P(FS>FmK??d`j_-~X$>zBIP}=ZBB_`|EyeSvoy`
zfAz<2?fLgUT|J$D{`Tenf9L<3|Nr&=f5+#TZ~Skc{r{TE$*HO7P1SFUubjDKyN*Gq
z(qGJYcZ5o`$G0;au4%s#=RNyx@7LW^&=zzyDLifhgOY&YyR@oraep_Q_q}?C`NX4!
zNQH)u0(0}=tDE(+Ygf0O4pLm>aNc0n&e=1UuF=%KyVi$OsyHZ1<*wJ)f>lln)%?sm
zX6(*l5)Bb}WU@|m?enjH4!d3}DX%GHkKFLAwZC)zhZd**72o_HO>cfwF=0+ig_lNI
z*i`>W`N>u@PB52Ps>F)d9m>i!Nk4OCO7O`prX=0e4?C4uZeMGWDdczX?B4g|(Yw>}
zH6K59i&r(Rj{M4(b9~-ikB+Rhi#c@PFnkG$-J)>hj`zaqFh>2(>H6<J+y7Y||9AQS
z5AXjyuK)9W|F5(8@8#kiR1|5LWLr5DdvSBVxb(>BM*8*LK`Yna+!Yom>ac9}a+N9p
z8NXeB{>96`_t%&Ece?)X|9{)-e|@`rym;aEGjf@UsiLbI=B=}kxuUyvn&`4iQzk_T
z=UV$R6zfTt2cOVWlxgP3J2X>Ls`TaQPnN1$UoBm21WUyt%mOd6tk1A$44jp_O#D;w
z5wX0i4+~@GF+FkTzHOk8yTmZD^#G@8yWCc7nLRcpVKQo)zN%RL(EPc622=S{W3kPd
zVij*K8gibzxvUVX`09?^=Ig&)cHErr^D#{D*2~}*&r5ec7s>5OeUrCdmZji%`?Ke-
zzNgO>*|c@RM)z}We>84sF1MYh7-n>9os#S=#V;qe-*~{h=w$XW&-*1m<|vt^_VRgT
z1h;LFUw5N6uEuSSsnRUB;?)y9l<&UkTlnRXTj(aw@Vwa66&@}sOWpc}Hq9#gG5PTP
zWm9FJ9B-PsUTE_!Pg}E!RZcEV3j$&pRvDaqx`*e-(HU<ih2ES}RdkW<`$4(w#-GFt
z&E+PC$%a2WTA6iW9s|>>8Pkfs@9As`5e;I#_Odipo$=`OTUJ&pC-37;ey3Bt>-0PU
zWuupRtWgWEe|$D$w~}iNZ(HRu)0taWX0$wb$33n0bJz(R8{KBx&1dCi-8x{_`SPLk
zo3~eE9!<7h{C#SRZ1hsT@2OAy-@U&W@9b|bEzJ6IR<-DotfyjcJ5L3#)Ki(fBA}~N
z^6rr+t`k?YvwrEz?$i{GVDOz45}GQ;#AY6<-4ws+qQ=%IO9HZ%WNxYO+jL3&rSiOP
z8HLW1#ur%(=BQ^aT2jrsFY9A$SBU-O46C_(yNm@YXM9O~v8C?X!C6YGz8~vPct)hW
z3B29xrtVzt6J51#eX;x8)t9ICoi*&5=r=>%i%nyXMYzfT#ojMO%{s3#@->7Ub(JpP
zxIg)0&9{dEogCW@%RD9paqV9_HEHP+=X2gp^*2r(C~}xGiS^|4Klk3oys_5){`&Xf
z-$j4)YySR?e;2>MZo#k1tN8ij>%KmnzCPY5;@a2k>*N1FdKl{0E*|pg&{5$x=KAsf
z-ke>%zN_x{yPW%fJ}i{E_wU#B_4n&4t@btrOsh%%^yz6;S%ug8?!C1;>VG`)uK(_P
z&hq)1r8{=~^a_cO;5}w+CA(zx^C_=GR4cc~nLP_CnG(0YwtV{{i<YTJoUWX|P{p`p
zs+0Mqjwf^Yr+j+(XUXa1sTX&*gm0PC;$0O~JH4W9wnanepUY1-)oF=b%H11t?zwu`
zocafgW2ZGoS+38HydHE-R6ylw>P1!M9b(Vft_XH0H0C|qCnWNt=u*4+_x$>gzn+Hk
zznsB!{i@zQEu-%S4w-)nQg!o=-j(`4!)aBw_3JXto)do2`U!#cRu4X{p1=1G|Nn3N
z|G)pgUH|8=eBJ*~Utd4{a-gJO$(A__W-eMKIgzu+&;CJ0)pncBap{FBGH%NP_S{qP
zXj;>~tfWD%Zer0cYq|H+|G(Y;XL|m={|<qh8djz)Z-28nHN4cT^11xA=X$%!YLX`A
z1c=Do+BxsL$#1RowXJVl9ZawLrT#M6cQZk>`Vcp}&xXjZV|=oo8*0oJo>l0)G51^%
zW8P&G%lh<FX$dCJpFD_q^*_e)@Y$}NwSUW|CH0y8mC;R#iY&0XP+t@58{>JsF|z*B
z&V5(w4@MT>m*6}+(f!-oQ0>omzcYqEOg?b+iO6}0TKnL|=e0QAJTS06{KYtBVcbd9
zBbmQ?a}B0Og`{?8K2dm8vdhEx$+f@`-jGjx{6S&Qz8~2nzxfQu<=Ji(XZ7q``ekq1
zNOD#CUh`YW8DYulcI?T-87F2G$+q6Rl$30G{m*kH0TtIXuWLTvT|dXvoYms`^|$`3
z4n|qtR=XUve8t0Fw_+EwytELR42ib_{$IYdipiL4e(&yZAw54d!8>NAyyK>&pZvl^
z=k73Lo@MU6AZgk3EqQ9vnio7nMFsAC4Bi-#R6eJFgM`DAw4gcO%dNJm?%GzZ71jMf
zr6o~-cLP^k^v{iq`%9+Q{>T>n5LOUb%MjMZk@okCoB7V4lP;%joGY2r@_w(^u2wy<
zhtKE8-H6Ut*SYp%dPlIrs#gn~+(kGfwI9h}XkK&t$wFz52YlLFl%5GmN4=5VX55-6
zcamd)=F`;3O=_7Yzh~J=)=s<5WdEf(x%ipFH|Z_kCRUZt+bA9I#A<tvNo=WiV6LH8
z_pSAY^%qMv39Jfq?p)10(X*{ynq|dR_ti?8e-1s0<8gbtd)tl=yWVZxw=?eKntO8V
z&nj*y+`HwDUa|Qh*Un}I&X|an>?bpWGK9TW+;7O(eXgn|gF9;7iSnzbDwaoXD0@{a
zdVZb=qrX-|Y>w^vCCk@Lb!qXnsOjnPyf9g#;=^(0N}=EPxc^M_ulM@@xAx!Tmro|$
zxm;B7wc!7^e|ds36W;af-`n@gXntJHpMP_0Ya{mU_<eP@e%<eHud9uJew#i2Z$V_`
zC-;-@=RHZZ{XBW+`cwJ$YTH|`uaB?&e3<|J_2sH}_u9$$*ZjPCe*dr2?fci=pYFZW
z;;8FwM)nChC5B54CT8<qSo{C-H_g^%viIa?I4`+%N^O_>{v|%IxR#5TKWFvV)IB=M
zIYz#}J;#0P@#QYtm-HWh+TLtaxpR*GwslL_Jpa5jCd~MCvV;24DaDq{RSr36Z=1Ed
z^{~4`u)B~PqgGeRQjysY*dA3Lyt{h$_4xfYAD`~McW}eH*HL$*%=N<@Czc;8+BxOx
zn|A_E%1_u7J+FW8att-iPBYvc^Wb%?R{i(W`v2tr|Fr)T|6l+A!|?t8p8h>;enNdo
z^D|}>&P}g^g6mzQ+eP?!%oHyqm~Yqp{6u1{!0R7JU0pKsm?HTuOqpVD|Ec8A%L)HL
zLkz#p*Vq347&C|Oq8W2?c4%}Ef3&Kx@2ZH{4YH}nHIH@{U$uJCP|J8n=8N;3x}%E%
zcsJQu-<N&dviADLuby{=E$-eH5SER7bZO1+!#x$*^;MhABwzpPov_T-{Pv4Q=`OiV
zX=e7C&prQeoVe@3aAIZigieQzx|1q&BkufcHeWn5eQ)1nm5tXOUo4vFs6TU6&D&^J
z#U0zz^8Xh`ES`0A5%;3DspZ$aO^x{TraxBRB5owjGyk{b>kp|Py4G8FJ&SZVUU&Xw
z#>Ug(GX<oLtxYVnqarW2Ot{sxDJr*!W8JN5vl~Bl@;-B$wC+k)dilc#M^CK%a_?R3
zJWZ?57WGDH{W7d?)~&Ey7nWqg>|@@f<>4uOG$lEIr`VnPbJuOx^FBMj<JuSAFM-}&
zyUdy{Enjis!!$PWMc<yS6#TaLAX|upO6$wbZ^U`OsqLG+_DRC_$3ImsUI{tdxT9Fw
zpvLv{rcI41cV`NmPPgD}f6Xv0aQE}jw1oQ4dhgyYdH1(;>xU(tZ=N2?DsSz+y3@qv
zXs=%IIh$?DeS+_<+rN{spUNbcr;_i-_in|avoS*UO<xq&Tw{6rG9y{rz<$E=*W$C!
zY?n0rx<(@_{onx=lh=u%x*w94nflf*ShzW=@cs^y+&AvpCf$dBb(w9EH9ljq?1zcd
zk-3wO%@jDb`NNNct5ucl><e2qZ@qo{_~$Kiou{sUb-823M7hS923Ox12sU01mSb6X
z;TP|Ul8Pe&*EyW?SDv2b^6;&2(E;axwvK|1Iq|)3^g;#o<FdA8Y}LMcaG{xC;)(4^
z4waAd9v=&EFOK&0IWkj&{ph4Aho<T*TDW!zx9^JyCii&09F?$ozUE@X<Eu`G*H}t8
zynV${x=D?9XW!ImOm}LztrXc0vfa$Sqa2#Js9~1IqkUq}kLvgP8Gcf#|ECc-aUtid
z4R_?<`Mq4C6#4b(^!S>uU&GhG<6Hju_m{u&Zx{dhb@X<7eBIwiPfxS|YPbLE$!ISp
zr}y;eU8m)H{}-@rlDl;HW1XdL{QpOXx7XX%f1m!{-~V~iy2ww9Dm$ygXC)n&do)Hi
zZ$YMe!-{JvDS?cu^bCyVssD5AO<eJQNrqF?W|8g7PtTI8^mJRN<Wm&0;fJN?&neT(
z!|vJs{ZR7p>zk)V+k)<#NtQJeo5iFtS&ZXI)oriU9nuS26uqZb#U1q$*L%bj&-e1*
z)0bDL>&xG}_mDwcJFaW((KV<1Rgan`ubz7KRMxgh?*cAf`NHhGM9%2i9v#iq@p?ga
zwo6X$|ND3Szw7`1?ElmL|Hu3KkE`S7y?g(;s7bG7)v+!6UhaH(kL%qk?yj`p;#D=@
zAI_2&o8>j@tgew8qstPLy&gg`EC*(<=legwZ^E8G)Bpc^{{CKV#V(t<-L4W`OjY5_
zfAD^?yZR#G!veFy_b-y~N@<_-t+nT0m3;M_q<V4otLy!~=k^;s`EtIKvEf6KZ`t{2
zT#F{WeAqDi_5J5TQ{4)#v<SX(oyzC((1v^dOgW>aOq<o$xaS<Xk&|;#ZH{8W9!=F-
z-3{BNJA8Y8M^^rNV|sW-8DFFOV%h6en`U$D@b#H7)#c3({`=0~%<7U3etyIor>SJc
znQF!;JbPE}zGV@6|E-OfckolO%-a~f;)F@NFD)__XJ2dD#n^vpnQ*GsiAd#V-kjwv
zGmW-L=yI-m7IG-^^SjwxL9b<e73{vH-~P@xTa@>ERPVoeeKrwcYwB0Dy)G>`^<3-q
z@|&IJv+VL3wv3Fj6KYCH36f9KidN)g@Vq@UuPWxm*61xSC*MptDVuqB+8=?4+FQvQ
z-g$Q(UAMJ1+qf;4>Eq0472dNCsokxdwdVA@E}5K*wx?J>RBW)cbuB%b^={g`y>rZs
za@Q{S5p&$`+4sv@Tm0V6jS3H9$utQqUD&n#!ouF%jJio>`lW6byEH#UY*-*^R&~f!
z>8j-QdlG!Y{$IZ>*tO_}bi=QVjJTQeYJKw7oRc|lRX&w7KW(*a?@bHQxw@7sRyUgj
zJ(ro2=4m@u@88Xyr4`0OZ}d%)4l8$;wmS)jy#Bn<`~2!__jKFx_&9uKXL(9jOflTz
z6nysDdWpHRtE1Am?ys)gQp=+8H@*G(+Ros~OsjmHbdN^d3ESwfD%$kP4$CcXKPU4%
zKjgG<g~B$0qrNPb&ksLjaF{eBbV5zG#9q7ft;_a>x}>ds^YX~E&r&fQ3|g=9OxjmY
z=qzFoO4(XusDCNNrZsNQy+>WAn3?bFEex1t)abIOR_f{W@R$2@zSl}H2$Wx2^KeJ@
zt2oyz7R}sl=KM9=E>yWb2r;W#I9r2hQJT`yYWB*X|1P_4)v)~X;o;qn|9>nG)?5Ge
zWBSWE*LSnNmwWT^(+8u>e_#GNd!Bx|egE&Hv%}e~>udi1UElia(bk<0UMh8dU$o!u
zZtSFe?R6scCh<!}f<or%JW~6pDIT9}HPwXWi(`XG;jEjjk0!0)%=Daf;r@iP)ki9S
z^{?5rYx?rdjL)74Y~>az7CZT4^|b&0K3*>8+_p-3&x{@25vmimajPX|)#Mtxe{3^;
z`zW+KE3zu5Mpsg{v)xrbZtlIBufHCCwdH%IQ6gskJ?ZERrWH&jACsmp_ueV-Izv_}
zvgpm{eG<J_)LWW#`q^Xt-M#<+*Z-gU|2_Zz^Zwu2+wFf>S=zlQx>O_J`Eg0k<C&-C
zh;{RrUdx|z==8)x4I)pkq$s{kJ9=&6aa)GQu4S$!lQlDqI40cRbGH7+-qeGq?f-qA
z|L@n|_<es56kArC{8drAd%I$Wqv&RbWNvM_g*SC~sd!2DiXJ(3^Jk72XUpcLUd#Rj
zZrSawu|3h;P^gcwZmVZe*Os(^JdtLHeX}j=%0BG4JpIwFjq1^^|N0)TRt%rXUEjB~
zo_&6}&~=4-zE^&nXW8?nE|Guo>|@u$Pxt%AJ(T!+Eq(R3sQJ>?F~u!D^|^l*7aQ#D
z=~3wnP;oLVoH)NRenY>tdqdRKoPtTKlI)k?eltIIc6MZ&#G5vcgplL4pX2(S*@HF*
zCtEYP-}b(4FBn^Nw&mk4hK$Xdf3d$>u=ek*HB2+lu-#QOc=P$jgS;&L@U4};>WiFZ
zji&E7e9}s$tY)3too^j${cq&GkT@ruu<XSLshjq*J+~Q7-)**~H9XJv-rA{BE91|;
zXx7u7HG9rG;U6`f=O&i3=CQi`*l2sC$7k+;*;Si5AIR>17{>DT1SjKa*07K%d+jf+
zw79)*?v8Uy@BVshsJdb6&ZXDf1+Ox;c6V<U>oJYIt9op!?V?nh<7U0VeyjXETowrw
zTt8JLxpo83DgLN^CiA0Pzx*w!zpy$aG9p_23UBjn|CbV%O)@{##c$6zlK1dt-psY^
z37gJuTh-L1`#jx7=7PuiwiWNse&5J%z28OnPk7miWA^@)dl=UI_~N*2qnG*Q${*9B
z53ju%yQoWKIr~Kig@;au&*-n0>(26BXL;AO+D=_eOJrGDp>_1m9aE+5g)@rtN^;~M
z?Q1_174+a-vEKz3^M%WnSnd?nP7CwWR0~`0`(9K0;I|UrSLZnTaxd);^V!TK_-L)S
zW@qPAgJ~;vseEMhY}L+PDx%WGICb;8XR}^6T#0^uq39~Ryza73xznCU^={6c9&2yl
zY3cajS==72*@k`2#m!E}YeJSj-LmiVyw^vA<}Wh6dVOuD%5EPomzKyGucsCS&g*@j
zQ@j1gZdT3HzYg>NtpA<A|K#!d^8X)}{Pdsx>C0bv{`K+qt1q7ZRbU~fFaGRsw|@No
zJ@$`YPKu3__4hya=-<gx?WMI}zB*Y3&&%*Xr2Z-CgQ>DC3%822qe@`wk`|wfkJ7sw
z73FSAHZHS>xy7BIEuH1&bg3yw&u>@Yo!);_&drxgXm9`Bz2tpOZFtVvleU~w(|SHI
z%`Du$R=#IvTd;$6+q4Z@BD)f2%v>@v$Y+ZV`<?xE`+mMU+HGF`a)G$^JHL*pL0${n
zcQL1}7TrJRdXeTfzr7j`!c3+gA~<%WO$fWDzTf8F!#}U@|Nr#=>-_(h@Bjb%|I4$t
zw=e%?Ry`V}qW?<x>zTWp?Ft;Xm}2?7Peld`9k{c$^rFPHC9%FQML&q{x-x}9i)EIk
zPrLZK?$eJ~>;H+jobJ5KTBfh^=VSBx`|G13-#qdw4g4{Izl>STeXn_XhgY~m$b&W8
z6}UoWFD;7O9@p(wdu&g?(;{z;<wE}@m6+W1o-L7mVd=Yj;oUnk6Vj(lK0oo6+0NJ-
z=l53@Z#?jjtHFAGs;SZKkIQmI?#ZMjKkE0t)F<h_q)D%S=0n}uRkzc(d^U3~T^zsj
zYh8lR^JU)Wq^*;s?w)@+S*6@+>AT!Jz8?)QzgMfRyx)7oKD+r+PmcUM?NdK}uT>@Q
za4zNdo~z8=VB5Rg=yDa?<Bse%F{X+cYK4!hlr@B7_prSD5#hdtDNg+6$5iJ-0re{%
zhN)Nfo-SUvdGfsQ?cEbPjm$Z@*tS0}KE2^?f&Ap-ZN9$@&0k%<Ft0-Iz1z2&xz~Sf
zHM~AgYFV1gx^3-lX06|A6KCvZQ`s;5=h#<0=Q|OrXG%?}o!fFuaNb$ZGj`&ygDc)F
zIlpqp{D%#9`ML{lFE9}4bTGE>tP<LC^>Wx$gCODc`RC5+ZNA(3Th#4{_qFf3AwAC&
zf43d)E)(YO*>0q@+aae<%W(M;LyKkByI9h9RBv|-kXStH`O&7&>ox?h%Q|oH%O;E|
zaP!`dn2k*9_Os@1FTdfqd&f*GhZR>h-zsa{yrRcw(W$#f4|%?LqqgDst|RxF%4_+g
zg6Dj`yfgLAmFGo1U$b~pbbch71S+jgX+5yF?bAz+uA?b$)(JOS>3ZBcVtF?DRnqF|
zk0NUShUBc(_)w8@{1tn^%fr?EEsHtMUYIgH)$+*}jfAH{<;`0I&Ufy6a+X72-Qp_i
zosr+Su^kePa-T2Cv@}Z8WYN)U8m5o_dLH9ZJ9@ORFnx17$5Fjh+xQ7extYzf6SkEl
zdGmWmE_`$}JOAzEWnPR++t!9H3H-c#g6MP!_UucEP0G(XHpGV8G3=iqcUIYcO&t5j
zPAeDF%pl7bRrB>*vo{_-?yu9DzF((3U*G@#@A~@f|6Udd-CJ5RUEKF{k;T3r&z{cy
zeR<`Ym%Hud^`1AMiuKax|9SfIK^L8AG7~o6J{~kL=3JALNczX3WidOZEn3#8r8Q5<
zaE+SL6}C`@WhtwV8ZMqS=~>U36A4SU9c+CxFUNN3@_5}Jxlz;B*mu<U?{0X+QoHDj
zPu+L6%`?&qwyvM2&7G7|d!=aOimIq5|GvIFdp>UOpGU5CD>}^=toMGq!=Z7<?N`&6
zm)eR;9Xi;0^ZqT??U&a@o_#m9{>Rnp|6hLJ|J(fkFZ1Q?zn@mHpVfRiW%H_8(cLF{
zF0@E|UX|tdbHX&$r7Q<x=R9Q22<$bVm&Dub{eGW_xp3mb?f<+Kxs5Jw-1F;`cGuD=
z@7}$8|L)!YpR?cZtNefX_vd8Jox8S)*3^e2-V(h2Nc)LzaZ|;Ws;!gCq&Lq$p}_im
z)k*osbA0~Y`nSGV<kgQ!Q`RO2cx;zie&b|AbddTN^+M4~@5}?f6JGIY?msxS;FVxq
zO7od$JC{5WsT&o)%Zl30cNQ4P?}!$As1tT@>fa3~=l+VD`=TuBJ<tBkmh*+or_LO8
zXa36TR9PKdrhJOmZvE-iW%lP&Hs3o@#J+yB)aB5q^RL%+WLlL@F0@Q}{$k6{hS|BT
zDb15)VwZMX1RVcX!Y|p^wY%l=8TYiD$64Ds?W3RR-}+Uf+%hqJR@Av`pHD5ym^$&B
zWZh*x2I(oj9@aIJ?(!dcq4w9LVEZM`%;p(4q6(b0M6TYmy+qn}o=yIr1L^r&_WkAD
zQos5VduRQDMy2TB(4(%RCgGxCpYlz)U%DtyId$)9V&mO=+2M`Zwr3{`W}Un{p{`nx
zX+~?_rrtBRA638fUBBY*-!;*1`?Q`MTli__bJ1B-%e0P03-DX5IoavbvaET@%;h3Z
z(jl?zx6ZH%i5LC9Q2in5+7boRsI+{uUmv@kZ?U`P@L(nT&TSX?H#5xgPZhb8!+A4s
zg6}3blNSDYqIpGS)xO_8XL6qX=^3$onupRY>$AVa_!bEL`p$P|(ahPbMPH)6Z*#Sn
zTpA}gb5*C|jooc*jJ2f)0)Fm3s;qWQG|Vve-6JIyp*`O+a@zz9&umTUnW<QtvwO*F
z*~~|)Z{FN{V4hxG+4f%-W*?5zXxSVU)cUMs^)kjF&6_S~@49+!PSToWw5zw^kf!hX
zZmm=KiR`{hIZWQ#y-aiV68D`VyhzU{g-I;5V~y#ZRj=ndl*gada}Az-@c8bBwYq(M
zubi9GTDxL0mU?EjaETWBYCcs-i98!t-Ynp7DP$*y$*zlQO|C~>|J);AoEN`*$H(Z;
zj=69Bw^VtlWQBQ}sPZYf`**JvKV-LXkN>w77O#Iz44HDId)AA^I#1WLPjuE?{`qnD
zrg;Y@Kk&O8_}THvif;=07&yD;^lD6*`ts3^6*jx-Oa8xjExd8p97R?O^_V$Fk0guA
zdTup*7!Y=FRZ~Fl($id#SI@@2sfm}XXa9Y@{qT8__l@?VzqPL~-;m?e{CXzq#Y5XR
zL>8Vecw6Q>(J3mpaz@8eo$vbn`TJ^qetO$o<FiOayz|}q)}|F*;?t(SzPxK*c=TM`
zs7aUqY!tPWs}o_I^PO{l_2*~xKM&vk`_Fv;p8Agy`uQjIUs$8LeeL4lBYW1Kc5;~4
zFy&I>#gY#ZzCZXZdMkI|ex$4HqobFQYv(G<Yae^W;ry(x1<N`0GqVkaH1*pbE9!gR
zfB5(3N12y<Zn>$yVczg^qxCoT+Xi>4p1k%6xy>`>`Agkpn=`bhvz9f~JS=T{{pNDd
z`lk!dU0iRM@TAbpf2Qf<vw!YvtYj|PHuuhbjRQM6v;P`yd77*Fw)DWSuO2UEww`#q
ziL?49OKQcE?}s;MF1z>Od+AAucCWedJoh&g+{oDe(AP-Jck&!|VX^l=OJ?6R(<}Zn
z(NAyZ+D($z4WG-{8B4?-bx%tZKF-JJlP{Zg@6kEK<5Jyx>}La)<_fr*ZkWL_wdA41
z?5}^mSL}0<-`nex_wUikHapvl8<`<*Stgvje*A`S;-?MwlKQ{w40_*VZn<gBQIXoF
z9=oUv?*Av<)U@{QIxMo{>lwaR#*4n?y>##Hl@3}u&GUL;M8=_K`&M!umtVzt)l#$B
zAXOrCYu!`sqt~zhY?IA6sMc}wUg-|g+4uh**7f`MFYdjSF=v}|bA^Ru>}3zJx-^9X
z-??wMsB<0k=y^X;xU}u1TwRJ|Q}Dw%Ru(JROq}l|+bw!kYsWvqQg!L&i%;&&nO-_Y
zc1bFSj>Nu4W*hE!epz*>*o1Adti-QBmAfuJK5;Pp1xL8`;%7e?mbc}bWUdm(UGm6p
z*MWvD8}*J>-4NACOFMce?Sw#nU&NN_>w1^G*tpYhwuF9c#d6=`XK8XiM&;b)aesFo
zxfd|yR8JQVW3*50hg*wvJD<;C4~|bRd)wkLGa@5hzyBHQ)10K+tY0_G*eImDxWw?%
zs;;{%+JPaSN}&c7C$=wQEO}>E{q^Lv%YipGMmkt9MlEt*k~LZJtEu7VWo*SA>IH!u
zPn|S_51BDICJFAYh?)^}Xp7L>{@|oH!G|tKZp`HD6@D1_HzP<tZ*_H{{_K?RRlh9H
zb}hSex#?oF<@&pc((?bVo@m}&lO*i+WXqaGGrcG22u!^CmPx9`^N8tzg#ztGOXn-^
zF;Fy2{d_bva%PI}p~W{E%UJv(-%s0;5oN-<?_*2J@|U_BW{LMFiglfs{9}1{tx&a4
zeN$vuxbCT|wO>DMk2`WFfTi>3lRa{J{oaxBvuo#GTHoLPyQU)i$9=h<{j2v1Z(Jf0
zcvs?>x!$_y><zky_H;gzYKc9t=fS_HpPqG#>)*elfBZ<`iXU>{+;`24To_#%+~E}S
zEX%s!*vD_X5AKkt`}ymkeBIyE_y0b6t{=b8V$Qqik7G|st~`5q74NE->s?x|ObIa+
zbBuFu%HeEY{NUy1xnlA5JhOBs@0}-Ys&|q1(L1+Ug5Gu=uRgNI?Oen6igDMKqhHHc
z&zY~cLQ5mhMb7j_@~T-?Uvd__e!gO7?8-JrW62A%?WF$1Jb1eA+Vr(;`!icEl(6sB
zeK}*gRq`Igb<fiRx2-J7+x5v}d)R#a$X~NBvv}O8vi>dpb*F-K%lw_KHP1{RzN(mW
zJZ1js#YykCpDmyHpIiOv_AM%DMGK05HQuwHwQ<4wt#-?u&8wb^EmMeoRvS_Hn6vw3
ztP^t{gYcI`ogL55C)Mxb|5^}wYKN11+bOl%4KuWRj}@xs$1xdxz4-3J`trGfbAmj|
zJ7YxLHp<_ct+%+f%us0WwPU^-cmM2--W7QD!vV`BzjkcmWip>;8T|ar-uJ)CejoqK
zEGnHVwT=5?*Xsjw+*Izp(7j=|T0PyuzW8)Z_Jm!N(|n}%Omqwly5tcQB<d?wQ$F)+
z^zYL12{X<xOjpdz$Z8X_3HzVu)PDcKrgNMB#}@WKH=8<n%AU|tQ7`dbbMtI}`K+$r
zZ5w9tYe5+|&nm?pPQ&%$ZiW3P<O;*?ZVvxjU2U<^@AxapnZZHsE)9I?rOB=BDMz>G
zyD-mmlrHd956aW6nk}^U{vF|snTH#*e6N3T`O8^&|Az4Q=P%vP<X>D;v7+H~++Y8&
zxJ-vsvo&(gmw(5EeSNmuPkL)c%$k=8+sc0*YrPh38Miv7OtRl$NypC1E~ZsSC0-u)
zPg~U`vMJ-OeNnK2XWOd7-8We4%Jdr++fB5HZaU__*ke@`yV#bBt^3wZf3(P}QH|q8
z<k<<Elsb3%YEPGIh*<sIYQM|gnAtP*rWp8H-F^7z*@DXspI^@Ts8e)7MM<13;CTB&
z<^_kj|32E+a$@=YJehy#9lH;|iWEyZrcwP$&qVR*l2u|Sgry_R^iuB4do5Eo@wv$a
z`%<mz^Fk+nXw331Yt-@di{uY!6uFcX6Hva=$!YV7u7k0nnud*xi}ee`gO6PG+O&y-
z|CxKF|0X%j@^$4$I(K-V4!q1*rC;TB+E71ovWmoCrLtwUo4zi-8To{>g5{j&e3x@i
zj$VJ{78v_B;c4sq{SH5`7FJH)+)?v+$G`90|HCKmchX<9$t!4LYvMWSPdvYF_>}d$
z4&yLfRN}0{*_&{9%ZbCQFR#{r7q_?8Vos0^<K9&HS+2JEX|2!Racs{%y!6`Y)f-iA
z9bU_KD8_Nh*VDVh|9{S}|Jxp4^Y_`)!ygNL+JocxF5I*6w$S^PVYg2H;t~%5L*sKM
zyDK_oX8$!75?Nsu^!jT3l4UOUU-)hpeh_&*tX))C;q}9R@3!t+Xcf9j^!|y`pB?_&
z>h{FED9N%{dd=I~cP`=cuJgBgULQ1Gw?i+uOtjpnC4Hi}?Uz-yO0U<HwfP)l*zf9g
zRq;W?iITz-*SgD|emnem)7jj!pKZ>xGg}?xjIwJ#b)9X^e&^eX{{$YLf6a8xs^-s?
z=C_fCCDXStouA`zb&lqheUkHL%qV8L#j$Tm4y&5X=8ku61^q|g`-N;@9Gk<J;*+T_
znzeJK<?KTxue6rCOxN;w;Tx>F?A0-2+mg`jTa%Tq&3xyS>pVN`@a`)Y_XOV2<a=4s
zmbw4Mr5hJzes^Ew-nvv$E%x)0UUu`FADjj3-hV$P^ULaVkNJYY-xZm8r+%H>f8v_Z
z?lVeX4QdzNe8u`$Y{#aH^=7$Kq7%;RedYOlcnKfx0T%u#%Y(P_EkEoP_4ts%p>_Eo
zw{M%QPJ7aS*89rT8>KP39sey5eYou-!}+cM{yJyvk<Q)K+v_~-_nj4<d0+0_zA?MC
zZNt0h4z>?(mDLP9?1Vz5Ju~Q>n$9nhCE1daXD7BJe1HFTxdq>oWn>fU+g9@I;bTz?
z;943~BC*P{@z;`&do9fzZIvmfuFB{=kbWJKW8&cZDg5_xt2=q$PFBS)J7u{tbM<Ea
zp6IU&IF6q2P7HbZTjb1LX3@!0&3c;TRNU83+5RE3SHrz*qno)kZ)d$~xt5RZsrU{j
zsk-yb&mWykT;wTxZq>7LL7mEzU5oV!cK+ZJUK%!WZ-_vy&HIu`Q!*|cPW1FVd*`L#
z^{lj#T_UqO4s~^Kh;L;v_2ANX{(87@GJmY{J}t4}_CAw6R~+r9?C9nE9+tPkx8nrs
z2`)j+CZ#-`pi1`^20fiES1)lX@XOv{(Oa=Mc;mhmz3z=loHAO9%lTG#UuG5JT7G%1
z-4ib1omy`H*Cq<vxNkaqRyn<eYpZDP_y0i*o{{p?p2~HG`AiZtm0f71#L`zK@0wKF
zx|3_~v<{IDokc%6m%W;`BjZ%dx+~pFyHoe;G_h`CQ1>m@wADHNchdW5Q$mHFO!?$|
z%0sE3-8fV6e4c-{X5JFTLcx3Te&wqkeY*E_es6u$_585^`yMp;sIZzkuf28bwX;NW
z(~XP+SG&v_vNtcgy62JPm5FcOJ$-mKTtB|9uzb(yzsBXCgP8V5p6QD2+}|qY82V+#
zoxpReC*-|g{@2jo>JswsP|@^D`+wif|NmG1--mB!S3iDPvB#QKHzd@w?`@RI4oglM
z|MK?3j-huWFK;{}Rjt2_<3ULH<!BK;(Hk+cxB6MumK+jebHDJ|b(doZtET+B_}YnQ
zBDi8dOl9!Uulw=u@6*TT@9*!guCfvTeDh?F<yQgU4F@^<Pdx9?n0@t%9na_EN51d2
z+V)c7=FyqaoYnpp1tzhDwsr-Zm>!P#`#AiAWXaPXYfqlfJ^DwYX;QSS>(`Hk5rVVM
zWt{r@V%~h|cGIdY?#taia~c2K9{SChL8ATGlIR0^1#NFG&6*~u^1<-r>!|3QXWynY
z9-MC1B(2Hhob3B%^+~%o(vzMwE?HBfal(iDPWIxo5cOYWjDG$N+?QKwUW#^le`&q3
z!@}a?n%D9izjt3&{>&PB$*R<jQOGTv>B{m~Gi*bIoj-p#r|Y#|*-*HDhpVmcQCI$3
zk`fX=iKk`GfBm@Tk<U)WXHu_z&ON_IaBb3;TL}lFJ!XVEo_O|er;x(eQ>E{om%pvO
z`-SJVE_-qAJJ*Q;smDY-6{f!9c-GRnc;RDp!}Hrtq#eIqkha9kJJkD9-fQ=VIT!EU
zZO(0Iu(bCqtE?-Y`Mgwp@v3+0VwLqxXN%lm`I&dq^ZcRWU9#PK-%95#{kU)SqAcen
z$F^SSC|dhMDz(J->!Z@#ecWDQS7%MW`k}#7K&5M)&@BtC2f-;NcTU9p{k1^hsFOac
z_?$H|ihL@5%WX9tHA!!1I(PYg;pNU)h1ofux4({F6?Hjl^@h+XSIt*!KUI=5Nz9o`
z;8exp`g;{Z{(&LeGonqjv+f*i+1eWz8v8nU;UtUJqX|-ytkH`5wwdp=S*^2V_W775
zw&0oVS&uZCyL4ESCM?>*_4B?C>+fG-uh^$dzteCp%lD~#N@%XXDZl2$S0#qRYK~z`
zIJ0_QYniQ+N-e+1=6_`Fv$ux=Z|<;Ob>uCBjwa*Ej-Mjz%6Hds{)#%~Dqwg}*Lu-M
z!`T7H_p*L4yA;URD^Pr{DP`#q-`SgXUD?qPsnjjvwcAU3wauA{?=D8({d{?j^j($m
z6>H*{vyQ0j*e(81N5t}JO*_w4slQR|&i5WYSyXi8fz{E5p0%YWZ4Z|(eY32^#eb)Q
zb8AwX_Fff1!{+Emv+nRenfr*t(y34Dc+YR=y29;q_^NzPNG_lAj=@rAW5>s&I3Jh!
zdat)X|2kj)>Hmg(+BHHFlLCZay`4TWG;q4964%K{O`WAJY&xqVi`{-P&3(o<x4JfH
z2EWZ-n>~MjuD%|>*Tz<*%iW<>G~RpXobH*srSG2Y64zJ#@qLlz(kTfA#!|vpyw*S3
zv$aL!mgAH9Usu=vyL|uu?D(3WPw&2*!p|MlVzY(+fR{v~^R-F*$LDNvx|=DMEfMrW
zLF(5&(Xx`LyG>TdLu&6emYbKg%2oUbU&P}mT{U$9i|n+g75_iIoxap%3fq(&)^p?H
z?dI_vf4uws{=YvSu8d%mUTe(j_|E6_I_U}LHI|t@{<*qPqVCWuF8fcbZ|CNwt_(D}
z&hxBlzlN3JhiliL_1CXH%fDm#oiCT>?!CI~%@NDoKRV{mpY8ssR;JIt`|sV-xxW_+
z|F!!s&Gccdl-TB0i%Y%EYa^TcWPc~GzUp1tu)+WM#0_3Mj8$*8B(%n7|7P2w)Me_*
zZ2gjP+0oO+_EwRsM+`NK+r79SuA8{|Zu`d04|87JHb1lWYUTabio&znb3cmA-F59+
z_+jSad#ftNSLT-SiaY9WNuRwTe=m!&Txsy31esZ%)w=F1F*vO=dDfY_C&s5ISMF}r
z-gQpnyQb1b)7IkY(xFe%ULR5HoVEUjW&-z(XNK*^_Ae}StG!=$My2<byW$ijuGmde
zHyW%8p6>BvcI1X+zi?@ewM`T4&Q0H^`bO%|hH|5=z7I-s_uj33Bl>gllXbJDAF-}l
z<(gZ>z06Yb%Man}zoqZ-{d%y+|G?3`*B+!~#BZ(5H2tX3)p0e|qnu@Nqx+fnTldNG
zZacB$`mgWTzP(`B$Hm$G;fG_4Sdg%1`RsWv%#ya8(N&%M3t!y-pe`7_pm6>lQyc%)
zFSYN@i<MY^UMAk~+4FVJ);|m97q!h>X5W*!WH*O~TxGy2rWIPRpD0O4ToACh8pUho
zw!Kugv+n!32P>skI~5kL;1WEex9ytp2J@DaN*b$EjGiCKVO!|;XSME}CB4m$UWzWM
zo#h_s@8#7g5bCSRd35cC*cLb8e!JasFMeIL{@IeOv)nTrFEu(VpI!5;)zr7EWd9Bp
zHrJ2|Cscd{#MkNRothTepthpda@X2<d~$BLBh33Eq;s-56x!L=Ic#n7**$%eRO}-2
z?#jgZp$aRzJTlMh>$)7oS9<uX=&dKc%#&j0zx?|6(uGBCWjZcLt_f*xo5E?le?~@<
zU)=WP56`W4{v|P`;<uru4`*p?xGq~^RHf>)EblcsC5v|oykZLH?O5L393nWQnQvoE
zMZs?2{pPl*GK%G#?h{icadcQcP5O87hvyUaM|+>BKQX=NDl#jg{?Htg=W~L+G^cJ7
zi`J}*;;RhY{NUWKNX?`At3&q$?Omsysq4dER5SDbpGQ|;uaB2|FRqea@LNAMc5Te5
zr<-<}9$2@LQ}3~`QBX*5+@_R;%%?U@3wyk)<AQh5zgI_J|NpW6|A*Q7`>bUBPk&re
zc`;<M&WmdseFQu&?9rK3WV!zN^6tr=#dn3>EF#*Izh1oVx9(iuI*yf%;r%~yc-O==
zg+*0QRo}63ijqhB<-4=>>)pil_r>X{|9V(rqtm|oX@Ql@{+}QIwSV>5wOU}?{ioO8
zKltnyS~ti0aQ(z}Ps$$J9(6jJy^?vuhu2po>r})%t`#VEza-}qcslmp1+lx=HWbGf
z_AgxVewxJGZ{{49Gqx4oxcTYV&il9jrKdRm_^31Qx#$n)-2U0`b4~qv4Gk9cK5g0Z
z{F}yW%M))Oa^76Ae(BuE!-u3V_PpduzJKb?YQazb7j(kB6SY6O#;~thbJw@@RCC;m
zD-%wcP5o||dMrTRGP`H@=FKJBe$=lNc)qGGVF~9|&!t9e)0w&hCiAfzp8BpXOt`kp
z(|E;P^Lt8Hnx`1|q#ReP{3f)sFfc}UdQFMPgl&Ct)7|E?K49w&Qk%a0&GFApmu5Ac
zy8UkT)E^sny^ae~<?t0;@#=)8M@C-s@xs#&XJ`Cfu_)~6)Xxo<|7~d3JzKGN)wTt1
zqV?~5x_i#vY}-}g<KegOOm#l4k+glM%bXCmU&*ff%i{a9Tz)VJl&h#diVn$Sv;F73
z>bQN+ZaZ`Kb>Duhet2Q;yj?y1+g%rb=jpX_|Dfsbb&;{4L)7{IspKwuBX;Rnr?OKA
zau40in|EY~dg_X8ui^x(ZwjY)HP8EC?W^u_O!$cJE#vgbH}xj(Fx9jCer{)X;noh1
zgilMXsw^f(C~xd?{5ya0f!Pz;Yo)I(^*3_ezSWxJu!XCTiSfEGZ?ZSJ&UO1Nx%?N;
zL=Mjh3%R4YmR&7d8ofA#aaqQM<X3@RJG8eo+zDHDdFm-PwnSm|BlD+R?A&_1>cZqK
z<!LV#onu|(P`H1>(dSD(X5M`vuw48*ORwPNkc-z{rtUj>?Mktl(#GqH?;G?lx<A{x
zxVT2kNjV~zn<13NzKow$@-dgop}?AB7yUin6uX$*^Xr{qEGlT;tzf9#7W5~`M!(F$
z;2Td`LP|!<-`2~^wxqUxxe}N0H+HSg!5N$zjv8)r@4NQq*R2OnMf%guM)YUeK0Dr{
z>KUddGDS&yX~@$u#q%kiD;>D6%=Bn?Vya;~>8<;A`r(Hq7PF>){<&wG_pwCJpBp^R
zc{ZM|s;jb+_YOTL&Yit`gYz3Pxp$Wj&whM&HM3E;b$j&FqMa^<`C_{pt(Io=m2cY>
z^CIqH*do3_Au%@LV_6Zs>-o$7f0%v$@9OwJuillMvdlhpL1$4(;jv1W#e!#6F-$t~
z^y8l&6>)2qsLJ=1uZq09=;P@d9(R|XJz?wh^BI4V_nwY>`aJg&>p3r^OUqPjz8_`N
zacb#>uN!pQ4?lc(^z`S;moNWx(om?hTv~JH`0kn$zy9t1?mvHCjcCKFt?zR>R=#u2
zuQUC_q4<6$qy75s=h+`l8+U#7Y&*ZKAdicqD)@i#qV>#DN>(!;xct6;Xus>fP35B9
zcT%qL>}6NK`(;9Vna365#<<;G-`&FwRXjVN=XdbzEs@U`Pp{ke)??#3=bIjf*&UW|
zXL)PSw>KoX<@(}dHy6}N>^StY#QAiqJm-Wvli2<;FdUZmba4!soS2}m)qnn4_4|v`
zYuSDTPW5z+^S53rFf}YJMAnVJjM-_bap6(bSvMEF=;-?sC%LXgV)usVOBdPCZ@tfv
zqj2l?o|pG6yo>W#KHJ4VeLt(w^X7$JA~UBt?5X_x_Wf#4k=@G=CY}4W(wS{N$Ip4u
zg~{Fx-!`6?ukyE^>$dmd=i@5gP7Qh12WG9({I#*-c#xaQ?C9#p*%GOxj^_LsQy+y~
zTy8yEVewP3(Afgt-t#P%^7=7(_N#0Ai+@Q8D?5BWCY~LAqwm1{_dBzlCfaZ3c)I6(
zmg*X_E$%CnmRLoLr15FJ@Un_!zIy1M$#bv0?;amtb>#n=m8B&{x%*gBZ4FG8_4S?b
z4_!XlchyE?*<7YI5^r8L3fy;VGL?VFae2qPxZ69TqNa(gv)`e6bM129aLvuqchv7a
zzIrm$oH-@=KtjQ?q$!bW_m-5%)=#!KT;(G@dwt%QXC=zT#kYD=J3U-hW@Z0VkV~o%
z6DbZiRCUR?r|)(~_M(pLm6k0Tc28XN1p=R2T~(X-z<JUv6HPJSNRv*{LU-=99L#l_
z1-Dx>H?ps)Ieo#`_w-(|Dr?>8i@%x635h)V;(g~uqTjO*&)sXa{Pd5hDxO?2L!=>K
zE!!f6SJ}MJmh4`^v{K}Ak}1!EUF)m-h0}xt``15y`}p$S+N{SvcA9d!wl3QGFUi^a
z#W@uRN5{e$!f|KxIj6BWBzeSiL|XRrPF?vZF^SzU`QPoGp@HEATM`b|6-sJyo)9?O
zqOf|-B>79T61jX^4s_3CyWskIcBuFtEw&jg_t>Uf`N-pWnD=pt*Xf51s`4SGAAT;s
zeD`I^o+)2{d|4r&{rd6a$IJcu`^$D-xYL!U$8ULmY0dhdERTPF%*?$g6x4N7YSWyy
zhhG{k)+~3Jqf+WWg~iI)>b2a(zYkuT=huDvJ^%l&vxg5qEb!3O7E&pH6m!M3P<%&2
zUbg1+<wccIzA{UmFi*(-R&4#F%1Xs@{+BsXn<KS0AH8@*{@CozyPijGOWQZaY1y3q
z^5vHgpMH9Iserdv`{TXKQcs?qG{N|b_5W{|`Nj41r1X9sbd@L(FiX7s)<~x2TgI+S
z#cy8!Y?PSEHqU(R+8g}GwuqlQd(HA`;$5d_J}*RjmK>UIBYFR2LiGL8<ax)YHTLE4
zAD=R7W2|Dqb<w{rW%hNCwC8@gvG)4%FCpwaU&~Aw?(O(~C#2v_&`ZzW`+kZOf)AX~
zmEP8{d5VlF%P!q>_LIG>D%pOkTP({gnQQl2*7T^W!_l5o9JS%{kGE&$Ync3A8ys3T
zx8%0m#do5YT%-EeWR>+#cpY=xq0soj!Vhij($_ysUAI>5-$~z1fnR5uJSyH^d48$V
zF1AO;+ju{@rQW|DoN-Pg>G5>=W$ATpuGgev^M1Ly1ugS&|El7if39m9i;74V*S5gB
z`PY9`tewo8moWK_@ShWhq+1^!&@DSH6Roc$E%W1k!Sj#B2DjJUeVA?@eE!DFS!)(l
zXV`C+l|5O$x9Za=<z7Q~mdjnrlLBI6=RR7oQCx45sgeAasQ%QljM{Dc-hYa3YI-92
zBe3=TN8jo-9#_2VYxQ>?6ncN8msk0mZ<U|>ca~3_n_UxMu`jc?m=o}3W=-Ro8a<&|
zyKk?$){*gQg;#cG4(s<5F;7J%-8uG4`C*AM+xt`Uce8C`Zxj|ySks{5t8#XAty0PA
zk4z~C7HUYZcWY%R$(xuF#TOam74b5ttMJ3`jAtU}Crl}d+oUzsHDh&jzrb?qpo|}-
z8;<T(TbbC^zq0s=)%KT>?77~48j>Gmgr#nr?_zuUE>LyFjN0PJOLsK4c~yz+|2l`U
zMQDNIpWE}UF3)+Hcp&ZgykqB+w`(`~PI?^j<p}ewpi^%Jn0hBgEjza3kj4}j9i{-i
zqw{;R{nl{Hzt?#n=i7X?*6@tnrq&79jh2SZ_xdu`Xll|z+0(1KyPC8-JZ2r>NOAR<
zdN=c+THa6Nhh1#Szgu&y<<ytV*lj&sHSkH%&Xlv7&$p-u#EEmW`z}9HVzX?{)wfPv
z;@guLAKBko-oCy3rJmvJv}r|Z3nPCYea&<BZ-u>^?B#|V$s!)_SL_Po*}3Du)7Rqb
z|3BPb|NXQ6J{wv8(?N^Vcc*SR*>^XSX@|z$Ysas0DSFJBtj01!d)B#mHrqw3+c+Xb
zZ`bs>xqlV7nQgD~xNvV~!|Mk}m9x^fPMsHe`DKaCJ=yThOT7Iqlz3DM`YzIJ33@-z
z&h}Q+o#=P&;(G3{T=|0@9=lu`tX=<E<Z!Xx?YNZzW=V5=4_^p9sG|I1{k4y#jGOrl
zm+eg3e^v2E=jLB$6O!d$9X{K<J?ic5vm1RKvk#m5P0{$5bc_3?%Eh;r#q1yMczplI
z{ro9^!i6@RD_tR&_V)O}y!eaD@4cPb+Bf5EeyOqcw!0kLE-%}5BV_HSng0$P<1yb8
zH-{%W_rCMuCf@9ir#CLRQL#z;Pu8M9(=U?N3BhgCXB^n_J?Fo#dWHEyr}+UTi?3wN
zlKD_p_}_<B{PvHZ`+ZH0{j(}j*n9Dy&DS=^BT3sE-uudk)=p~nVY@7S%Hj>vwk5_B
z43rHmx!>g9Sg?KCt+|Ky9I}74_qDHsoVUf@TXjsU!UFX_3iZetZ)J$E)?iz4L56wO
zJ^2r!$$9sZeD+qpHz;5&ylW+WHX&=#p<UJ`u7RNir=~oT*p$bnzgy7ST>hQ!^Y%L`
zLcU>F1Wi8n1Qofz>yL77^x)2#^`XUTKl93vCZ>KH?S?a40oIJ-;YqfYX`KuX){B?B
zt=$^X-RJX0WzRf^2KmUihpU|LMOaiW_MZ2|VbP^WyH~RZ?&A8tIZxlo_3Y6zcb^Fy
z%kI7KZP{DL5{EO_&*&_b+1HeCtb}!fxoBu~E1%Qpf2Z$d%+5Z^*ccQVI=M^1Gs@d=
zv#+$ZiCy&B`*~5KA4Gj4ryf(4ocnH_X6t+1bkprqicbaJ-)((0*0r4DZB4|aRbMyO
zitb!csiQvevP`N~?<9p2Qx>o4n0{cYucb%BrpZ$epA39yXg70t^w}#m;_t*RP2^bL
zq}9u%dd1UxdA06Eg;^$xgl8|G&SkoC+tgpZbvBIbU*(!If_%NYB0^s_=r--$tgWFi
z`GgYJ!J|K(loifel_uKqYyRPe2cP$}p8s}C_F0&GT-4?xufA$Ag@@WMS@AwQJT8|_
zC_%Ax$HvgBYfi??<*huX{Cd$x6_LWlyPwnq9JuCf*uC2*lS|!Iv%+e(^@$@_PiKYA
zD7Kr&7ZI~bPVUpktBc%qAGNq{mtOEzGku@f{gessx72xEcyVsFp_<jVOGZny4C7<?
z7VZE0DSiLnSFhW5?|%Gn!t^;oa>t8um+qZ1Rmt*c#_z>}Cn_(hE}s)sGjH4N+;E4+
zxzn~3pUzz}|7*;WiZ3rFmn~78wq&JV#GHLqo|z}xYj*9A=XMrl)d_m|n4!e;^x@^@
z_v<SDAO86==jgMaG5T}Nmf!F9DaqsA^JC}7hcZi#XKP>Xv2NUMJ2z|YT<aN8OMI@M
zI~tQ`eB!>XLg7)1+5RsrGZL8+!#cuc^IxjX`c)ikQ}*wq$cz(rRbIp=#46PH>^S^4
zr^e6hb<_#V1IH>$7ZmN;ZOmR@XS{rZc9MgsnRxNlgaxly8l@Q(4y5JYw(dW6KSyTk
zx9;%xh4UWR>Npu_ExEDX+WO9#u#Xe3+;jB^K78$L@(RwLrirCDWoj<9T1{B#Enzh0
z)|FUw=fr(E<;;$LR}I;MOFuZw-kPia{5t1c%kSH7Oi?<Y!>=%Vj^mp*5B^O2`R!Qb
z#<jb5m|hQCvM=EH)k~+}@B5Y%JF#)$8otZhRygoXoqvsE|J*&7j+_-f9m96fP)+LH
z)^hG$heLZ<mreZNR&?Z_<`<4jnf3>z%MJ5azDk|HX<KEmrS9ru*Rn2qoN(LP+Pkpe
zSLQ7l?u5lNI<hjHR}@ZQxqnDC`}(^+)=y`b3oXlAvq`l|%}6EZxx}ydmBpzYK|O2b
z-*$6;JM&3VY>n`gvd13JTi?&m+tho!>$B*KqM)mSC9Vgzb|*OrZrJ$IarxB~D<iVs
z?R$Bz^{z=k#>XvZH|vWD|4<8CxT@I5_JmsOpHnUFSzd>a#`+50o3_rxTq9;hmzw>(
z_%`VuvQAU=JSRIj8Eu_)u{QR)cbJ-lr^mUSr}TqX*|4-~1^ERiY_;v^clHT($~tW)
zXuLfs_lK=V%Boc!TfH8hdnF&IZq2~1ad7Ip3-|g@d+4PU&*PYz=jJcWw)hr%_P$?h
z+*Un1{~&5rmeSqGCTFfCQ$k7#I+s4Q-oh)8H|y#Asb7;AinTRYRGj{^idj93Gdk!8
z<Bsz`7yNAQo;jnrd&UfhpldB6wbwtyo&7U8^62wq&4-NhS5)(@fBcrkSL!t5*;U+1
zhcwr@e9xN4creLr+G(z34*m{P?rr9bmeOaHi<@u7$eCFaCuT6cH1_({xtlVg!~|zp
z>p#<b_pU7Ry#MsmKMNk*J^ffS-Lh&Qv$#~aV79bI$>BNe{<~}cd&pf|QnIvi>Y1$q
zT32PFMXF9%&wZx!B`D5z&!4m6@q72}wwCSJR1I8P>>P4?uEpK1wX7a~PLZ2WhN`Sw
z)^Z}buV{hVitk1bzgd0%#`-wND`|PepA{c=Jt^LD-t^folaB!|iZ6@m|8JPqk$&#w
zjOh<$P985Qk9z0-^UK}#^82;FJ`~yp2fiwd++4KYFtvVm>9>4=!#4Iy^X@1tiI6Z_
zy2Lg?SUG8*c-Z{ilIE9QNad6~7P+nQbi@8q*2xv8>L2khX4qa`F>|@_d52r=36F1@
zb)EN_HE-g(GRBm(y;T{%R^0!6!nG`rt@cwd=cfYGyv5g7YTo+Z%x0B!p{T7!Y<t?h
z*{5=Y{`;?L@ttu_BQ=!GZpC&3(*!dP^IEU?<NFjZXRqaRd3|ZU-TdSQ{9OxJy_iG{
zJ5BGiy*u~Ze22Ybx@u_qrDY2W_k>j2PTIwLYRR@!jLj4EV|U64PX8M>qhI&ny5!@<
z%%{G^Z*BH(n7uMIdb@Jl-t1pCu}<Nq+jJ#WnDgh~T96w0bDz^>32mvq(9rc_H+}i0
z`+ZsKa*($vV?i;Ky4fAY+E*c=X|sPcyk=+m<*LDaEc`ylUuoNq@9L_vpU%0)aC=Ld
zpRClg)4QJs<(p}Iyg$k5am9kw%d{pWDQuQdHd&E!(<QISA!z$qb@f!$<NI@$eZ6Po
zvugVOxn~13u4cZQ<29wSLP3PZVqg0k-_MB!KFWdq`@Vg?-rJS+;rHWroPlQo?vyS3
zyTvotxwdT0e#x~P(wk#sC1!7DahPz${AW6^z1buA+scLSZm;6dHK@6C{HO3;H&dmk
zv$t4w-E*kjbLPY)qw@Lte%btgnd|$<AoFD8W~NS&<yo3xZ~lDo(6&4{ccXZJre{#7
zO0SH3V20Ns&y_`n$*vzbS!S8IDdfqmcQb4_QhM;<uC>lvlj7bsc}(z93uRK?v9kKs
zE$4+(4U{xzF?q{OZ}j-C+Elzi{k3t=JCjST&mJxEm@8?kyVhMHuV&AemlOU33dT2^
zaUALXuC*$;L{nt?Ee7r*GJd6n6V)d_nlR;5)Dcz98I2}~)T{V>)_=OVE%am2<9&sn
zXU!6+cR6}j)p1h6#2sHuvQ2V6_H?X1a^_Xs)vjp%_F4LOPc%-R^?Aj~HAjkUm+Ks;
z+FA4KM`vA((|S{-v|c9dEsvV!h$wj_8+t1p|J{6V>7sOR*CSbzHZyRXoY2wmELC?K
z*8<J#bF+d~mZ&I+?waj>tN6t6JueGyKFVh7_~<y@{?DWE{l7k~-hEh6zx&x8jqh9S
z+jmS2y0&!<>#LHhE8+vRs~dx+gg9`@hA_ShPJBJdv1a}IIcEx{Zqjqx_MoAy`IY74
z$B{3WTsTtVS$VSk@W+ec3Y`kF@09OxcsMjY+94z7r!Qx>@AtpU{O|2*D`NDI9r)+Z
zw(jneZ@d0H`>gu!^P!8CAJ5bpT>G(4*6HFMv#DR!G|Xi0TEmu*IrUG=FR>qmp}&n;
zxVP-tZ1?ckjh)~3`xl1HzZ?8zcHM=EDVz7*&)2U1r7idPuC7Ysn=J<;>`IFG?i`g;
zjCOJEl9;@3mU&R|H09-~6L_*SPR5-2Z869BtMTt3bM1G6B`sm+?@Co{^JSX*tT)#3
z!Jn;*S0B^MfAUk-uBcAlCagR3ede)WF1g%^>%Sf`@S0Km-mCeTfX&K<UQWB~nq9@1
z<}I8s-{e)W{igq?+uZDa$iM4ZtRmnLXlAnXrA(~#v7eg^pM^%W_HgGI=rT@}DwkY&
z_zvHTse8(vB;8K**WUg7{Ep5w=N091E>^}Jm*(5yCX#LOwd5Js2@~#BuTxg-2oC<c
zV`ZtdahSeHalW94Rdl@5ddAHiLj8Z`L|(lrD7+~5KFaaazSj{uRGDs+WgV1m47bxR
zvzWazTlvykecPpV5qDGh3#I(_aAjW5X1?<8a5nQht;c>}!&I-l6kuKZ@!uw`S54L5
zY$nz=*_k=Y{N~Bn{x<nSjN-(l3KN&S*e(}sdTW{a{bb#SZ9(@%M9&6lC`au){d($*
zkg6!dxZITsr|dj6f6-*+qagwgRUzC=v-mf!{daNRzuktRt`5r%Zz_3N`?jGoFpjym
z$5M0OvKuKkk1X1{T6|JaSA$Q`kywY;Ya46V?rCP!Qay3w_WJW>ai$3xE>js?_m=LT
zRp516O88B8yw`!ES8Ely#90iA8}%wPty`4d+`sGUT(ffPI<v)<vX#g0M%*u&tH7~V
z{P>n*>*nuE^^Kg|z3$7_Q*sx59cQqt3utwkvMT6g%n=TIK3(=zf{XNpZ`D3eGmTjj
z`6ux3l8`%Vr$v<h5!LPy%lWtG$E34I4RvFWZ}7_M*|<%zSixL-i&n7G)$G!pGolj?
zyqM&)ts*0{Huz4O^h0Bwq$5xMygzLyy7WoFr(g$_1jZ)D<#w}|#H^ZDyXx4&Sy^j*
z+!G@;{;YaqC&%i=8@<=z&fA(gFHipEmkTYAy!SkHVOqz+OTob%X$+#_j}KOwOg9aB
zyinQ7DaxQ#^6rsdCVx&T&hxFka|1U2Ke9aWiGe~H*VO-yy4U~z7ry`Jqo=1IKb#O8
z<eU5Zev75a#-!IvW4Jb)m^8zsSZKwvJ8a*$d_z}VN!&Q2bl=08a>4zRM9&?1|BtD;
z`q2vO6?(rC-Zn_?S?a-Pa3T2I@yiD}%nK*#xZ5t9@cyo&i|NzHg*&S)?(N;V`~SC!
z@9#c7=6}BYc+k1P%O3VVWhYIgPriIUC-aZbt6gV;%TDBV?Ts`o<Epb~Z9nHE5mLH*
zxz>VJMwgcLSFtsIy!mSW-YJr|8|0%e_)gPX;90wS`i{M#lMMx~w>>&9{&UY%Q-cuJ
z=>K)kubf=5O}c!on7!RR(b|grC$Im>d@cUw&EFfb5|>Z&<zCqH<wCDcWR>OJ`NubZ
z+<r>bsy$s;%V>$EOmp5jLoM4?yHtCGlx23SG38k2@cw3BuIn|U|52+jXSDRkDHD(F
zbbY$@vvltAyZ7hY&CLm3YgX?hq0anGKX_^A>GIY72h*dkM|mf*M@w<+)Bb$aIQaDZ
zBFisV`g{+(Rr5(u4$Pdfi1U4Ldu_lBrWB6SCX?e!R&31a)Y<qZceOgl9@UHCf_XjV
zf-c{>Ji@cqimy%=w}0tACp74D=;_yw>z1FHsa2I{q<m<a)U7QRn>KcyUDw)}ar};O
z^0Ln75leRjZE5jY#C6YP%fpnsTOSJ^b$wrR|K0AHF|IQ<J)Ql9b!|I$-@f8&XEw6M
zA5F+SJXPZ4H0Bwm=9WgDKi%KOO~3PCR>RHKDvAE7yh~Tx{@5B9ZTO~(+sP#(X({K4
z%Gk^2n--kd9^#vs!EpPlpi^$wj-O&jj33S_s4WgV=+CuV>w)RfkdTW^r<U;kTb;bM
zUC8gq+O_M&3{$79jbZm(sC04<ORtSiuBPX@XgP;XTFtXPXYD)@wIfQr{Y&<d))Mpg
z)khe<9bd;4@bTKMur&|&w5;;yDLr|CW$A&W3j5ZK1<Om1SVe@22C3Slm>T*gD+zd-
zE6-1xRGhbT=iRWluQylzyS76%uyudMH2Xc)^Qs-0ueNjvC2iifu`c!VvO9&l(v@C6
z>hn+1e&r~lZBTBmttdA+E#}^l8UC5OHzuVi$6v27*YfQ3_Fu}D_|<ja_q17umU({u
zCeeTW&QnD{vGO**RQ`?y7Aw5HBxfzOeEzsd;L;g}88c(U_>W8z)7iNxsNvA2S^5*E
zERyVMy}mBHce~5nHouwtD`puw<=oAAP~uiM=g6aI;g+KLeEWajyuQEU_tU!%MQncm
z(pbAnKWzHmsYQ0vy*)i+ifZOndd>=ZGgUP3h5w8L+0D}0qJ4Yh?!8x;csS?9JLe~N
zZa%KPwNNTT&-`?^On<kfp1+$o(=64;FOmvhbZW?0>MlS0_we1>;q&kP|Jn{(5pekT
z=jG~AZ*KiP#vr-on(@QW743GH_6B;M_bPumLoUay{_>FwkDA#`$M3G*vc*W=x!dA*
z`0@)!ZcEAD5#l`^Hj6p$nyyjB>o3ojeN#SHwro<e(G$N5&tr34>-HV_-y`+BYC^<J
z?Y*m3JY^1$mHp~*Xo=nPvt~Tj#s$2>&vm&Yj_==K9jX>1@Zn8RQuNh)g~HiKE@!09
zPO5Lde&?XBg(d&3UyF)gXwH3ps5h!+*ZO16*rIlZDxT(i9QUHHd4H=kW6)yeZ*vdT
zvREhHvv2X+I#FqPhCxtC$J44n@f|`zK`uHOXU=|G^L=BCEW>I|g$&`|s=Z3T&WN5^
z8Tl+_*Sr?Nx4+LevMxO4uwA!1K<L+Z_tr<>64H!<jyePwvv^(T@zM(Re)e+XVTmo9
z>e|@tI0MSBl&#&hZtKDOh8(ZXM<lObF|AnnyYi|#xuNgpWNL1ma%0-pOZk`H@%IYZ
zZ!HycOVN08yjqy&>)cDRO*$82Qk8fc```H<_xtDfj?FkwhutH+&LgDl=$z0C7L(rY
zKXkkGeX+&oZ#-`%{}WW-XUT2wsYNP%=`8<Wf(psJ_m+BZe9P<L>~h(q_PyHDh>cbU
zRr2q%KW6JI5zG8{b63a1Yi@VjI7L&sqQ(7xb1o`cTjN`9o^LqK^hQs~N4eC9m3OW`
ziTz^gl+JX0!txb8i_IA|bvAA_h<9!EIVY35WL1@fns>*4-y>Y7Q}^zev1Gz+)({h>
z<~@~XR;*IGWps3^+>M_n)<5kP$q-a6PEqfcSbF7chImxv?G<eX`)vO=v#i=^*yLl_
z7ij%Dqx0rQ@fkZAUj(u-Pl@D=wOfD3zF6t~j-apiT+bh!qE{X&6?x~5=q#qpa2-qG
z1SS7R(*<W)L@t<paB-N(xtl7_83SG=GMyEU+5RKb<$u_#l5&CTAJ)9Mu&QCJ{g&*#
z@0HvN5460CbJBBT4Z37E)g@K<hvM_cA1kcn_)bqMF8l1JC*FRM>w(rXUBM*X>7jN@
zL&YPD{GNrXzkX{}xLz{5<V*JPJJ+T*?CzWy(J6DABjK`K`Z>Ro;*+jCir-W7b+!Kf
zy3+djhl>ubZP^xfEAREvIrDQ$V?!=Ybj#Xdm8DWvGBaqp*zL`el_qXcc+ysT(6?~U
z@|PPf&q}N1`5m;Q&v8#(N!*>&llo6jy5_y5xbXV3Gs&z+CHL-_BWj^j&R>3fy8gTP
z{Xc%(mEZsSa=E<SytCZ-T|1V)KeONQPf_e+b7y_4g;C3%Se}bJ=V!}rswT+N)610P
zU~aA%8LiK+9v1okf7%`e#mhwzkM7MknPh&wBzVbVomz?d`<MQhy_)ydqJID78+jGx
zr*FKz$MQn^%z;dkPd+w0H7xUswq#xYk!0Tb=JY3y_s4o4rTyBdc(-b+a`cvy-9Dwc
ze|^lg=4{*dwQ}JFh4?wz?`KBZE#I4W^@MSjLY&RnnoEnePp-7-GUSo{5LO@6d)Fa5
zDy#Oqo%f!i|GCqJ?g$uEpJ(6H<jy9tWn<iKIn9_v5m(b${sPKc0UIB7sU5!i;&Pf*
z@%{>{=&I)?&v`8c*KN-`&(p*ChCS_ZV%xt<9u^#j45nr+taRF*aaq~bTtqW*W-y~H
z=c~}trE8DqueE*i$7Y+)_cRM(Q30tPCQW|RRZgjd_AcZ6y5>r(%D0Sn`S#bAExjIh
z%&^n<#^-0%r<y_@+!olDYyQD@?`c8i0-sDP-C30(FJ>$|6E4=FCK6zFhDq@B?{_Jy
z7RgQ8UXWSi)*umFd}*1GyyEfvyj0QeoSS1rrwTjm+pPO*QjJtizhs`WmYI)d$42L_
zOM-8FC-T3$*Ic&Qd;xC*i$_Q3YmtJlVkv)r8uvWgw(EjQ=7}qlnMySurOk0>d**s5
z?1-tg`U213a}zDpRGs3hvz%vtPx5*Be8!rOGnY(xWGxbX`<k%l>nc@NHo*@H6+t1v
zUM1#M5qI`xob!<S>-x}DrQ`RTbGM}8MJH)}UdD0D)7>pMDBt1!w}KgEMk;$!I+|Dw
z8hN{KNo-l1Z+yF0`ACSzDyEaVKZB-hiF^39^mxi_XYtOYO;Z*pC5t64Jbd=pkLKGK
zx$J_!Er{#8Hq-Hz`?J5b(lUFKJXa(z>TdXQsNhUT1c%=Z{{W3k&mv9qr@h|q(jM_w
z>+U@x&CsaD_AW}kcUCHQEjKbgv_c`Q(bZIl;oaK9InFDsPM-7Lw|lqsJU>nS3oI8|
z&UzmGTM=V0bIF-CYnaS;io_KyQga9rnsxkx;t7ePYmaPjW_FYA@BRA7l<%UWxVc+d
z{M*Y;V(;U4<(EwV+VW-h`nrFcujdz?Wfzo<+bGp5bJ<k;*}d1^><cm-y{`TV?5@sC
z+%h5da@XG_^Jb^Zr+2EZV!z<3l(4|NW%aY3Ih^MWrMt_QY4BO<&f;16s#CGZLTRO%
zdQ8TnwxXRK4@!1f-`!vH@zdM85C2X0_v88d|Gx_NhTPt|VAkr?!{-+N;g;QE93pRZ
zP~*nos;G_jo<V10J9n%(!XCAVt%?0}pQ-60v49&L%^S~7lU09__H3{5mA(UO1wx&+
z9lLB8yYJoso}RBi&iY3GRSw=SX<m5yT*PVdoI4Yf@81Y3t>wDItoBg1VH5wM^|9-A
zPyDwvuKh_}!rg?zuZCxKK5u-J)4cm<TK{e7Kv|<n`_(u2o_>FG$K7RNtLkfJ>@V1H
zNke$!7QfO}XD8)Oy`pH^=NWZ*9%tg&6|)~*yp_B3f{c8fZ`Q*j+h%^rShMBxL!G6Y
zBmcW?@Q&$JjCw!osZ|sA>-?o{ZVz`w&r003s(1$LtAtde>{IFLH;M(l-yF?5D}I}A
z-OK3B@+ZDd3-e24`s`e2bEm3ASaa4CH-<?L;(>*Y6}{E%s}67!Zr;U~e@XXh)GGI5
zeUpO(?@tPTb#<G9Z=l)ypDz!tJ}P_iuSJp67yoEY$rs`8KhAp;>$zL>{`N2jhQO)Q
z;y=zgzJC6kyPrC93*VO*ux^}i@%Q1G(M31cym@i<!{%$N)VO(bc+VVOCnd(crQpGg
z{^eSN-x#;PEnX`)=lDb6`}4JuYnpw14nKR;#^9ILAF?9iX<N*Sj-Ezg_ZQZO`rS+m
z^-HGzxZj_B!Na&qWKy@@$8+kThMQ;Xo^|)s_8HMh_l)j2s~F4;<KNa7DHiCv@+G5z
zZdmj5Lk)Yfn>;eAb=dYM227tN{x~sl^)wzu)um5j?npN+nGrU#ty^hngu2Au84<k^
z9-lv?8~1yAtWBF!w(_D+N$qW!AIu#AFBxBIKJkj$yD4fbXP?Ku4NXh8m9rg+S#Qcx
zQ8m3><n}t9RiF8L#l@2Sx3yo_nXn>gey>#Lrs$cysv0eaPhY*XOik7?cR^^8^<E7F
zf8NV#-eKjL4qvRwK3d%qOc1YU`!Z?n=bD248!v(v-~F)KNivCbQ7m(8zJ~aOnAgIS
z<od)?ljDv`=-KdD>T5p#RT)wm*%J};N4!zTQhu8ADM^OCbLP)m6Z4M0z2~xKw%es2
zk6L!_WYuL4TNKIdp1>>2@gv9UrmRU-w!PG=4ZCWRM4cz4X?%Nn+r0k!=lFfQc6(o(
zv^p&R$D73BV&h%Qr(8<({HZa^MExx1LI1VWcv590GORDo=2Tx-l&;_Z?9sd@p?2ol
zyN<t1n|t6w|FKsU^HR5VuvZyI-<CTsx{@#5CG>NV&CW{ARLwHIyb_VfzIO5D&3mRT
z|NQiEyZpK7#ZyB3!ldKQv)1Z;uQHLd-*{pEV{^7WyX|K#pPIva_mRo#u7)(GJh?5A
zIV*K;*he4K)y<u)y}k49i}f29rk&XKYIc}_K;cnc-@j{|e`VI_{w(Z`4~<{4<m<M(
ze(zHjTl05|&7N%&*Zhm4;1=Kg|El#Lvo4*EynjQ(DkJUx>QK>bvZ0@q&;EOzS#zlG
z@P)vBKdlXXwc!UY7XG;1>-D#amnqKr=icz3gZH+Dnm)Sm;q;otbLGD+Hae7BzbZsN
zp!SZ{#4;zT3#!LVc1K&qh3@y&TB%{SpKI1~5z#GXOoHX=7xvYEnfKl2X4R#ilV^Q7
zd!BDa4$JY~(d!p4Qk&p+;6=M|v#Er#$u|Y(PVvis<a(wuEL_H5Tcy!{*8BS8ZIAu$
zFuN^E(<^0f`0y*}WZCi~b#f<m2tK_tO>WP#$ULp0oS;({#>KyM_D%gJ)Aq?it$lC$
zudQ`V)=G<(_5b|7@@n9s5KHH^EN(6x6ZqCo3_0nn`dB3K-pz%twyjt_HNEk@Zn#bM
zUC#V@f2EdcuJU-s6xbS7`h25))vLa9-+w7S+!H5bYOAgop?$lwO8C*E3zE{OY_oRx
zGQPUM^;OBE(^6-Tc>Qhov+due|381RC!6Iy3z!}G<8qV8`8gG(M=oc_=GL$I9<}OK
zir311vq@@8OxXSfWON9exn6MnvW|}Sn_bQfmd~dY-HzH4>Az>HXUmC6U7Tk<I)AZg
za{3l|ZG2f+Zv6SuT#?z{O|oj5U(@7Xhv)R{->$c$!d~3h^Xh|T0`k%ay((PqNA{lD
z$M;FYWT6n}k|k@5v_iibW^Smoe!O+HMcE>Q15N!#8Tp@UZ3B~*|5K5<w^Q_TTx)3H
z=8_{t`e_VHW?h}z=&CXC)l?PPX(}hrE_uHGIQzA?=DQg>>b7oVPvAVsmDIiK;%()H
z>W5E=Y`$e!SQ{R&_@hVi+hwob%@Q`0xZJ~kVT+Et;@RBT?yOHol$L2a26zdInC>_c
zSQ;tjacRxzN%8wC4sOx;K50tp(HNfu*Hf?Z-l>(aXdFKCa_f_>t-QW2TYg!cx#M!X
zXz`_ri)Repx%=*~`3%~B*?)SIPWQ4YA+NporS9tAx!X0HQ&s<Tk;TiRB`Td?v_n^B
z%-UeQLf}FA$L!x3B}s=0ulKuXU0vhGS|oLC(aUP)T~{{g96Il!y-4-Kc|o`3awkO>
zXqYTNoM>o0OZ@xkr<<lt?eAlmR&wCs(T91l=l$dK&Y7khe_>;}<!G?|v>3K#nPA1g
zKL!7ti$A??Q})J;+nPpNr|)zxJ<-R}=QX(^x8U2ZrRI;?wr^9fE#|wD%layWy?)uQ
zxoS-F9p(R36#h5QaawgsP?wuy?rX~(=ap`8J-#)6gYc1gfAgfPWb2=pZtXtrJnvlQ
zH_MZmyX=-O)%RW0t6|7qQ7rV(f73ImP4`x8FMYTAbEPellHu7)4|6qiwG7l(^zP_g
z8|>1<x1v(SzOSt3#gn<Is<E%1te^4Y!{qq-)m85PwNFKshrXQV;49Mjq^IEN(mBUe
zk}AYEG_TCIzT1>HZ;~5F^&0z<9<^;2i&tvB-O*Z*dgjy5=nr3SFSSvNe|y9F@ix}o
zy=+WVpX}|(X5Py1CggZF>#3J4e|xrDMm38Zv*~?P60u|nn|p+IsP_^%-nUhWqRd^_
zll^UHF<k9<)AMuts=mNo=MSBj+m&WqS9`2p@$!d#cA;tvYeK9zzkm4s^5u&|BCUT5
zI!l&T?w+$catrH;%*%7RmEXnfIsN6tiM^-hI+U#DIr=C0{i{t?m)dH>LRQ%=*OG5|
zaGZ6K*01!tWgek^;g9<*ilxoiW!6uUHv4^pZHLT_>V)PU`6s^Uy<tc`y^eEMgNl#G
z^P<kBHT^&4nNAR1AhK8C=o5q9!rkUyZ>jBlbgd)py5jY{-F;dDp}qQ1M-5&ucmA8M
z)_UN`o#qL72V4DDEy=zla<5^^^v8vkI-05+Yem{VE%{y`(3>@RqxA}<M^@!)zm<I5
zI3+Wxbmn2J0OLJ2JnJW{UBz3?$ZYBSR$6YAn}FhV#fmr0S$;`-wJiHL{$}ZyHhfX1
zd^bBXL1LZ3>@y}>H*d7|XsE8VyM1z>im+4J8)L`Tj_Qp;k0!7BaKZAfZrnn%*9J3P
z`2`rmCWcD5ESLOZ_TJ&fru^l$JI<Ji7Ycn{t^CDIWU1T!i_MQ$XJ7PH>IpLu*<mQP
z-f7K(jxuTX3+5O9Y7{NcTE!g6y5RNF8I>l=)4%$B-=kxhdUlu8&n-bkhL^lGk523>
z=YLtEU%vX`o_W_dPmNkFUdTTuE9657_wC%*-E%|2cxGG;4f$TOHP&VK{+N4r?*ISP
zy*^Hk-_nh5)uOJQtm~~*yIrR_xC#e}d3s1{wqDT)`kbVGEUmsa?s38DL${9P7dQD-
zRL`o|>tRuF?e9iI!>=AuT{D=a&aRbT8gOWWi7a3D?TfxiOC#R=*)s3^bJtI^+s)hO
z%s=(?@UJib{#1y~au;vRePaFm(8sj8zq-2msmd!HvO}JyXK(f_Y4Pi^Q?Gmy@Ps=n
zCRO_2%P9{3YHasP_HTPwyZyS?)+Jp(c=unGQ>t^dT7GBB>(9%@zRh5~68yB!Q*g)g
zx}HVcwG-?+x96q3Ke4UgCnqo4_b0P86|(--?7X~aVsMy_`i6bmJ7S(MX}fO_#~`_F
zfA75=#ofoM59LmY_!BGo?!k-~$yWJ+M-R-FUl95%<Kw)^PYP>xMY|}PZ+dvwv$<C)
ztLN?V2*r-w%a{22+WlGm_vo9ifw`Adq!{lUyf{nE^lo~O-V;fc{k&UR!XIsoRAW4N
zM&;&}Epr@Nsvn7MPkLdN`>MU~y0d`||KCsZrb;ec+0(P?S#7}8DXU#gmxe5MRJy2o
z!K5i<|8}caJ6dmLz0<G{=GIyBTjs6v_tn3Xl;_>_dVf6b{WSLNmQ}Uh7habBd9v}D
zT55jF)}!AxtljZIil@NjdSd9kHjd3FpDYu0chz>eve-D};vKctQ)k;&#&IZn{L(tU
zx@!09rElKcvUIbZ^&*U0Q>}cprQX`AK31oki+3woWe)9|_#!cUhE-TkkasnsT&~6J
z$PJt~US=4}GjJ|)xU}Pk%<H2$ruuo$qR$;(>%MjJhX+QjUiUmp`+Gen?09*^-%npg
z^~#0NYfsk5UGVARe7KCul}kxPGcD|lPTRW`OcPQg86u1NqEaknK5crkChut1#IT!Y
z(u?AGjsy!ZA5wjgx9UXZMb+iqA0D}`v0SveYU0{eA69;n`TE^$pM`F4yS!N*kKuL$
ziOk;@R(*(AIBWWxVAh+v53g5c^7Gt$@cwH1#kErf7hRK6pTWnewC+V`w)DGe8dk?F
zn7^(xO^UeHDSdPCwrNHCcI<w(RQ%Ig(b6leVf>30&(hZneDbDDOjFM%GH9;3!Jn1Q
z=TF+6+|ab<qSslz7cMvK0}uajm^fwfGL<Derw6^5`gyU(#N?;<B&@D>tUgsI-o9{`
zT9nDMw;R;N%qJbyQC;rgwQ|bUjH4?0krO=PxqnuKtz8lz<k6jGRcU8$H?>If=$s|H
zOe?z%``)oNaSBS_W-@1U)1f?zb&a<=H0zvMI(MYJn64jJ|Mk_^yOV-1gig$Q_~P0v
zxs~qj8@EabIUUmLt-PQ8t4c}mM$!_?r>}b^On=$w_wcI4hO?h+WnL|1z3CZjdG1%U
z)GeXoYbL*279?<L-c&zklV@T%dAGOdoGw~0^TPZ$o_9i@=-ro%*L@=0yw|?|$MXFD
zU(Oyc_j`Rwlw*~L)RAj%H(ssF37>TPty#+Q%PT}{zMn0ARdCEO`fYzf8{hq@r7ix~
z9W`%+*hedJ=BIPLms!kVcR=LKslT7Ieha_cw=q*Rg1cDGlyO`CzVxo&i3c)VC;3g0
z>pS)Q)R(B!jG4lpZkbgrzglQ~bw&mMW${n3-dyJrKEJkQGi<Nt)j2lp`Q5$JZ-sAY
zOi}-PDC~pwT}6|RS*tJATd1Toy$sc{k68QNkZE0i)_moz+YM!0j;~(J^f`w2O#Ah6
zf2UU)k6Y2}WxpoRPc}7aIXn67t+}3#vx_Wjne3RJcSiQx>xVyMm53H-v2w4qi2u6B
zYfiSt<Yi7P_lVt`YH?uCZzuog>5_ksd-7XtwYU+sQ{YhJg{$Gg{dqEvR(yGy756GC
zSKeU7vZ<?_Z!US|WBJ1<D=&1#W7B%Ar6C8Eb59(<VRbcJphaor=ca>FQd?$AH(m3T
zG2JG<`=o94v5NH*<}%N|)cvn8G{$<?F-F-V>P%ugq@9*rGr3~NId$GT??+N0LHX9a
zD+0>id)Va2UAvR!b22CBm9f#wmntjt1CLj&SU1t|_utn>yS`4b*3nK~l5x`Pcy{}V
ztg9O|lrl{Plyj!19{%LKEXUL2bMlYJK`Pgm-8dtCH{+Sz3fuFp+?QT%**Jswm1yre
z=lM+x^{3(w-pvkg4%T|TZ0}`0K^5NWRE|Y!gR+;17{8v`!sL3c(o(MI(vp>3u6Mdc
zCq9wm+1R=N>Yv>~oHMufmQQ??*2+}q>3vS=N>O8EZ5!XO6H^$>y?fMtyc5{hopUrb
z^l@m&d7aie0n173KZFyR8nr`0LR43GRk%!W+`DXdMal=oSNko_nIxWhQFiY5Dy79G
z7bI>^;#(A0eXU9Uv)Q(Z>n?^)GBsb%Yd%-+=$o9n%yqx6e2Uv5ym{+1dBI;k{?E84
zsfZ+%nhUF~%Hk2{{E;=a^11c=yKAa*%-OSI3|B-3XX#k;i!NR^$LOy9HnCzcheLm*
z4!E!HyQjs~xnwif+Aq91d!DkX<sNUn9rfzN&7McIN+PFuBt~BkJ-NwENhjoMMa-e6
zYo3|J|1Em>bknq?cYfxRGRyKpIv0mO)=|E&bLY3$R@R)ZCW<*BvI0tRewu&Ze7(J$
zUw`_J%dZ|U3fi#XV8H6!mPp@Qs~=bTuCny{Gi~oep~B-`vU+T9Pkwt`{%^)x(;0yg
zQb{LFre!-nzqaYlMaQ1N6Cv-Kc8f3XNIQGt;C|K_pAU+4|JpZY(}xA86?cgSdVSjc
znSc7D=bycPmn60{haOEh%D?qU(z|0N=e47ssmwVhH}}WKzk7B%6@T}2^^n@bs#n!7
z)?1&o!>9K(qrI8IeWP$TheckCES4tb{nh#1RQD(?;fLwT{eex&=d*T7<*;6eE$n}G
zce>v)kpwA*+G8f)-<<yxJjLq#scB1&9`Y+xf3oG*lupA()%V{X3o$xc{)j87;Vh5n
zrpyPc64o!DbN$}MWshgwZrUODwngjcURR$5=NJCA?ml^bOC#g9Q*}<_XROzY?Y>)W
zwt-vE?N%E5p&S+7+ILq(&%QV@Vb@imT`TzB9@d%i-c|kCmlj6fnV%jn&u`}JD_;HL
z;9G;I`^s;>5DMUy^x%zA397z#?3dycSzmz@eI7D;UAr3AoniJ>SQvSAa@2%(o|kUq
ze806tDP%`>itW)yw-hzQ&Q7(q<qS4c{!^t`cqlrrtn@l#;+%sPQyHD#`<Cm9&6UkF
zE!-c)elyHfu7G1{OHH8YM9<ZCzRF39K2rO;?rvx3{PQt)Zy0r-l+vy7U-@*Npy53A
zIRTEFV&g&|HSYd;GB@YScJp|d(CS-9?!;V?$y^cAko7mndCT<#aow+xv)t9SQdwG8
zDH(WF+<CIljYFs>;7HBu#$@I6!W#!TnzMX=zLa$-IMq5ev8b3eV5;=WVAU+m`0onu
z-pvw<`YMo~<)xX~rgrG;8Ly8)P9HO_ZR6yq)($a!zDK6Gpz+%0s>trO(<jJ;TZwdp
zWna_c74W*|b>IB^=ch$+dqwPGyWCmVN;Q`7IbC(SWS&0zdt_g6(789i4*xauIS}+K
z<@NC(xlf;rp8kF`pF^41r-f0WSd`s&ZjYZ`)1(mA4_6mHI(OnBm%6x<S!jObl?qLt
z?NN)}xBu<mW+eV1{lCpBdCNCaTS^KWFL!$yu9I^rme48`x_sC>TF&c(!;#5_fjbgJ
zXRX*-X_z6ZlX>Zbol9r@Mb7K&O+8Bc!WdXKq?~P(YTo7XCxUC$lt<zVUH37{?t9X)
z@sdG)MR3B(Lu<DEYD}B;Ge)4r`c&6e1q06OA)2daP7UgkEQsXe7C08U`0}3_L4KB}
zkG6>W_s{E^q`9eVZFXqW^6T?=1@(XWcVKPB-GI#P!uzAnJ~m!DX}$cr`hU;Pc0Zo$
zwnm~#uVSyNa^Z|TlkF4yvL?)!tR$!@GT(^TUv2y68T#MEY&F%2PW%x_y)EwgHGp}o
zmQ#AwGok3&i^5Wj+)gaN(35tMbHPq&)>qHMcsyHI&5Esw<a08P5B9rRyeKQ`(xliI
zK^dREJ$$>oUta&&Qk&i%cYblQEDATTnlwL2jo0+chHt&*2idP?n%tOObIriTD)r4r
zfz#=(8k;><@-00$=l+zQ8}pBBir3gSr^9yo&&W>SuWo#Cq24j`+t)8P`cZT9&K&I-
zO{b+Zw*5+fm3r;-&0SYE?oLxGvq@RE_fbFRRDZAUxBuU^P<}nJ-P6d>>#m%a^`)D~
zvliG(o6i*8w^IC9P}boot2yQ!?2u++iYcy4(_+}RVAZdT4CB9gf7{Od=6)y3cX&Tb
zR`^pE9hQA}op^3_27LSSZ<<~F?ek(m>tY=OGbh~Lqq1_F>*Lth`k9-`ODB8Q-pUKG
zt+h9jK6HGgfwhErdfto;_Lmo&FZio%{!^Dz$$fu7_&Sc%*r&B@U)3$79A^8ph={D*
zt9?)4m9%5<iQ;6r1zf%!mVqx;Up4uD<LZv1QIevM1N&bbS#>l&x_g01!X%Eq@5UFT
zCQrJT{yJ7w(??4uUs5V`g3VFCzJ!@vC*Can%iF7UX>Wta$Jv)|yqepwT7dTy@0`nZ
zPM5Clof&#*)r|Sg8A{h@UfS8eaIT!tn}E~X`Z^9+xY=J(Q@vVzaOIcx{YRv>#_s=>
z@-%D9?3u>`N?)x?e0Mcb@5;s;*U+$%(z1B{BIQks8wwV$4GlgVYhazfQvarq$gf@b
zvwRQywS02(Sk1n<A2&M&zpB}|mf7^_6s5bN$@d@S6>O-Ik3QS!#^D;>yS;Vg%(Zb|
znx0<^PFSjow)AiC+BS(zaL>K^62@RBvz=?cRqT`aJ7Lv{S??xm?YMbtc}wv(_I=+2
z4$WL*@GvGy%~<sPng1nr|6|YPe|{)#BK+Tv)nmz_Ept|SSN;8Tb+veT)2EM*m;2AR
zn;Us)66=l`Yg<FK6y4&xwI%$z*Jx^Lhsix!5z{p5?d&^i_nuHQoYTQKcc!ru-|{`e
zR*^0}Cg}?$5*ympBL$P3gpTkgZs^`IXF|V++9sn#YZJ0Bt|<w7m*us@L+OQWQd(W^
zlNr;?cJWVe2}=DN=pvM?7jVSWJT&cUNdCEbIik{S9usnJ_GmJmnpX1OFh=5PPr0&!
zqJ@s+gi_nf-Yedmf4;e+V@}^`#g%KOCxvZcVCah3v8m?J$2Gg2`J|fe-aUKv1%YFe
z#MUZ^<jc&~NZJ$0An4MwEP6$Opo7?{+rQh*_2u|1kJhS8{1LX>Zu+{Sa^vW`d)=-+
zQ<>EvW1CW45Y*o9=H9BvTO6CbieuYXr{ID!jP;kMMVUTdIFIF?Mc$41H_oLUnKZ8@
zHO|GQK*T&`p1I;Hwd~D2iYvQRF01NK<By9tlj(GJmxivYrvCeTH9tSRZ2$W4<>i}M
zl@e~M3)@fRPGddo@@9|V#O9L)GN-r4F8clC^|x~ZJ$>7S8|Uh}urfR>J!sb+l0Rk6
zj^4iG9o-M}xPDu{^x>Vel1VT~i+|hHzBjL~>bCCX{j6|ZvU7VbmyCCVmaZ(nt4mRS
zXn&{E6yL2&b1zAm9a<r`+dggD^3xsui*qJgP0ZFjtYj*4URv_l5`kyu1O%FY9@yn!
z-4Oj?XO2c?jog3poe}G#*Q^aJTph5AGlTQiUCDs<>p8cySR~?JWr{AksNgx{%#BlQ
zZ==pk3%{W?`I-EIH-Yntw$+>{zcFJ*uGNmCI>$6L7tM}}J+9XKV2VRW_NtdJZZ10#
zICJx&%4C_-zP*Z*g{!umj9F~HU4GKNTRwRPzRDd{A*W1V#jV*V{PtMhbHCr!(hZ)W
zHpef=xt@@+oixK`%`ThJu>R%2a~M9HVthYg9hX-jlV%zFi=QikH=cRP$9eR%T~dwI
z=TkoK<oY-LEH|4Nmb_$lkAv)+t`{=|+wO!Mt*tn{^4+>dMh2M+immxE32zy%O^Fn7
z?y|f)^YO0Z--?~>mF$gIX0EtV6@K~F($=_ii{Hi@mGjiOZd7b|%+~gLm9>nv3Xejk
zT0jJIgO-9waF3|eqiu}4EG-LeD(urUnI?2>*Zf^;=bXB^@zCvw>kDmLGCqcy?dDM1
z;U*dvE?gq9^y+L5F-=uprqrn1*l;N+Z~3WH*e5+(#wXa%<Ra?T;%1+I_PDcO=H4}N
zYoz^SCI&GDco}vlp9y76D%-eOQhdn~595vteOhuw?*f9G6b~zI|G8_A;6ld(A#%b0
zuKj)97ViD;Xk(lTv%gZh=j!x}U#{xcf93!Gj6Xkaf6UuIXXpR_b3K2brR?X!cMEOj
z@BjHaet+%%=F7!?M{~D&Z**FiwKQRywDL-hyBbWM9uG77WcijG=E%n1-SO?V^O{w!
z7B%!eJJ@W+t2jGib@U_k{|Vs?Ggg0$yUbYCIHjtZd)L9mj4i8YTB~mAS+`2m{_)LA
zUmu8y-~N1l_tp#^J`vAbQ;y#C(tP;jU4&3*X?du_#KukQmWmW^F`Zl96TW74$iz*{
zrnEc?(@W*;uMZM5YMfB8VfT_Xn#=bv<i)(4eq@nXY=WMFSJU)qCtfOd6uHc1JbL!-
zg&%!e+47VZ@A4Gr`tY#e*U#JL>)*W#O7U5;WY(;63%l;_(W<<2=4|JZS(}trKl?R%
zx9!f{ux$qGe*WYC-*5gr<AIfVGV_Ud83mgrs4?%0<G#6TioB<?t=y`mA1>bY)@;4{
zx+iYIF6&IrNhZcO9~PukOn;k`r?D)+!^=YE^4-(z*VpUW*F?ylPSsr#Tkkz#=G&&j
zjuz&Mza>2$+@6)S=&o<v^bCnZUv*dKS>?4}m=%4;bc^Hlz?JNaCtERk<mCw5GATW`
zS2|&bXOC5XY}+B1b+xR)(;m!-cq4k*KIqQpQ^#*_?dkn_#p~V<1_OsFhqJow?YPr*
zKa<gLcICZ1M*ci8Y5OF`d2@u!(|4?KeRt(3ub9XDoJa0b^ZsrY6Mmeu$-$R>8N;i|
ztUG2Ozonqy)Kns{!qTxm>Xq>8#jIiFMmIuauSYzbX=Q)I=XOY13FG8*fgQDilWrMt
z^Jdxkp3uCVfB5CSWo=4Z(*3Sh{y2B{VCSK;=l4FYWpRA7!S4FMH<21=Z*IGk`tXv`
z+a>&m7DrxE`D!Ry{VnT3*0N=t`ZpB?N)whY;`C!@?ese7q4wbrXTfo```OlA8`75M
zS<Wzecl*38(?d)DFWW_{nK=T@7RLQ;H2r(+S968%*0RQ~_*zecbr)tV5;zyoa-_{U
zmPt1-)n`f4sl(lpyZrV|;QW5k`sbNwg%kgtJW4;>lygh|eb#TyYkiXwmxWo_C1^1h
z$((pS+tqYCn~a~)^>xBK8oB1m^M0J`e$V8F(bMKdhdX==qBD=~6)gF4?Xl!}+x3m-
zzp7a))_vH)SaQaWVe_eWp&ikkDV`pFk`0zDOvb*dOI4Ro`6RSNS1{Z1Mxu6@Mo-+D
zg0Bne0#Z(PFAF(!E6=u~_2P;Py?P4^{X{xNI5Qb%@QLl1F|~X3vs?eN+D~ja@@1L!
z8*_P<bPk3pWfoU;`)kijE}bs=k??eO@lNlT>Fg)j=bW(LxkorArLyAh)BOMc?%%8Z
zc{Sdy#v<BI{@$E@`+o0Ue}7+OLjQB~<L&$J)RynDzH@i3oc?tC|6gY>|9biECY8_X
zyK8i&^9L<)GGlCYy%*NynaeD@%ORU5?kL}{w?5^W*VoQiaP9=>d|~gbQ!Az|6L5Q8
zdgV6Lvz<2%syd&G+~cA8<5(1f)6&`OE}mJNm-DoH?DpE4yZwf4@ztv5^X|9`E9z-|
zxOLz__ta9;?^CjTIwt2m)D`9p&WrJrl8s+$;ZZmvz-P{}Z#Jb}ZC%cS>$fm{cFgem
zzMOe(q-VUh=CN0AmTP4`YYlSX4sU7j@rdMOi}YTo5}3jFFs)_R@-<9rUROoh2HfJh
zaNB0;N6#xguaZy2@2U8>`TF|zMPVUwa;~l=9fi?NnLqM97>y=ytex~ePf$5U+;jJ3
zH@B&m@2VvybWU4lFeOKUamS3GUh&GyR(m#Uw{72bEbCma+4P|QQdc$opI32Ow<@c4
ztWq(*)T>z|&dzaYUGc`!S|k1EI+rfp|FXqq&#wBv|NcA-I`^eGq9IED<iWM_QQv)^
zUQj-is<LKLwzt)*ZBFakRaCbW#NKA_cd6u<t0mhmW?fe}(IoAoi@NN#b6Japp5-a^
zcq(397S}6v=~DBKm51`CY}ljbac=X|wU0}*&17G0-V^ooTDbfTZnIy{LxoK~Se~6|
z_<4H6j%8Y1oo|z7zj}Z2<+IQe*VM0@tL&JObZ*<5zfxV-j~rySX>RnCxiUBX**BS;
z-r>vFxC>f4o%@wDJGU>gv`8lES*Pg6mmSaCzZb+^H@kgXd{)@swH2x-Caz>Y_{M2k
zV)cZ%lMh;k>WFd+UoO1PmDA*;*_WuF@_FmMC)H;@z1`?tcCP1Aq*~n4oWFZ#<{df7
zVq^OJ?Yoz9?UqZ^!haRIvnr)*;^jEn@`}agoxgC?yGN^53My~9;IMp22mAU-RqH?Q
zw!3#R?3*LQdCPN-Dl4)Tglyur&tBP)af`9j*+co>w)0<k-ba2mQY`2YJhE-JSi)>e
zJI~AJ+x=p<u-mtlx>sgbvI@4lXKRKmG@s;tK=0u_*DJk$^R<dUOcc0v_O8Uk4W;v@
z%wGFhYiZ4bi645RGuc9Nxqfj<-q1g&sC?LLUrTRxzvYL{!}=D?vr}4jZu`9c-))72
zMTg(5Oz!gk(&NjUnLGaxcc#wjnZ`A%_Dp=p<4~Wn>*IFIDQ4aBW~aPrllRn}-&HRz
zy!D3cvJ}mgT2o@Y4a0Wl_=;XywOQ-vq9-ORyiT?6d7}I`qHE$lvFx&whWcX0VGln>
z<Oj-hb}tfHJUf=9Ys%{f=1bl(dObf=<Xp|Rszc$6{-Ma+0D0rXDvvZ~-P`>?IP&Y6
zIehE;e)iSne>&b>tNc<t|6a}44<8=h-v8&Z{lEA3ZAF!8KVA*buG*m@6Th$ODSPGL
z&*tmnw0=DNm~6Svqy6yp_&pcDetLJ;{QjS>`t^VQ&0hX>(x-?2Dq4O%zI-_~_H1mI
zXT#^Cb3@c)64qVU>HT?l^Qk#)?D^r*n|6LbeQ0jXT&)Gi`qsufd0f3Z<&3e4rp-a4
zqZ({Gj3R9G3>#NX>DOdvc{}gxy-5Gx*H`df@!a96H#IL=zn1AutFXId)!Ikb)c&lx
zY&tb)QtP1@ua~zTi+wcF+HcUrt~+JPvMo>MEO?i_G06AFA#cr|ZVAsBxl_ODm`0qr
zbZE(kq$961pB^r{v_$dB#U<^ktCNmaC1|C+j&hkWiH&o$m2)VoCilC~pPqJa&wm%E
zr+M68Wc^Odr%FyuXHRO&ZRYaz;5PAYxx66to8z(%`<}ngxubQ;DK~Qainzlo?s~lb
znwzfrHum)##?Cp78t?cFHD2V^2~M1{DS#zx@1}dZTos;{*vOsf&-0)D`6$;!u?Ek}
zn`GkV%gMcuvzcqZWBs%n4{zSx&YAO|XNCyN;?jKKqkUS^?w%Z<+LfxrR)4j)bT-Lm
znOw_`P0zk<bEvj5{S~)1UBKdF*8Jddhj}+e%V#GpFAe(UcYCkp^xmee{~MpUW#nY5
zn=PzP&apo*K}Pyjp#H*gJ>d!=^Bp~hdM~8;M5KR_aW5*BcRrW&)$#A618eTCVm=Ye
zcT47VW$UvWb5=h7&sm_&5^nL>arwG}<?FJRSLmz1VmbAEMe%~&hoq91EZ#qBvFZ)s
zT74@U_P>W+j5#;m_I$23J(+FgW%j#&gr@p5d3~R1;D7AxN~4?J^<H^jW~lP|zyJ2?
zp2@5m%Z>6cu9;EXw(IS#S?4s*xBj#-p14$Ck>C0?r{z2z872l_a{lO;)tYE_(0ut?
zA$8db3D)Rj`L4R4mu~!1>bRHnv;Xcnm-{pFaiLg(p5&fGjen#oB;!0!Y}ncTZ1<ex
z&k@4>S%06@b{0Nf!r@X}Q6C%Q+FT&pQ?Nth#i@t`tGlY5n@#k8)u@%6-4VX3>&Z5y
zhq{40<?WK?vu0|ZUTgby)?r60^EGSJXHQ;osJ6D?Xv$)9X+F(7%L7wR7GBzVsri+z
zZ<1>Am%u=gMCqLQI~QH{aoQmDwO01t=P<$l`hEX*TORr!6Dy#<;k}b~@vMEvUT;cK
zTEyA7>G_ijPC-|{shF4tgv72`ApXpu(do>b)y$I?@hs(@eXUqV<LfE*876z&gBEQr
zD`n^L5LP*R+rTeNfHBI;P}+-OMozFqY=P^CojaBlKl*-XOB`SSx7{`0!=BD~GUalt
zRc&o~{rjhf`S1U3FPFc!_RR9_@qe}C<DWNQy4$B_Q9J3(>7VWD)t|n6`ZDeK^%%YU
zzU7w>dplSCKg^%M|KG3e_Wvujp3dH1_xbJa_o<#gC+wMHZ(CbqWxqD<nV&YNtX5rf
zvq<2|cRsVW%sO&pdBk3W1v@8(E;gGxH`G3KqUK3m?U0w9(+)cMYKWda&GETU<55}J
z#&?x5WtERZxAc0<m@J}NYgk_HIOz_XWq_~uvi58K3QCdM1xjbuAB-sazGT%2o}M*M
z(^J<?-qUe8*vs<yFQ>C}Clz_k65)tnGx_u+ah2HfZu)V0Ve|C-S4Uc@gtqEreQ<xi
z{865_*Y|ejJ3`$T`KxLy>~v0FjIgWuqv1AXi59!sRp(_(r)U(}$OSd+jJ|SW*23e>
zC!D?FZT8ez$ShuWeNK?qO0Op_TeW6Nb1<(t5Gb1BmF0XjY>Cj`S8qSshH78Dr8U#H
zC|BZ2`KNbs4tGz@T~ql??2h6`t%lyP1ItruLwM{eug>0h#xpv+`O$<;?tFKDZo1^D
z<~u1w@L18}Xs?$gHc^Jr`eEnX!hN+?E#hx~EdE{J&gSm?(~~`4PINBKv_CA&`edH0
z$rI1g`B$Ggn}(K4tDOERpXK;;&YGW>rdX;q9C)$P^=}n#z!tAqvCu6=@ynh#++VUI
z|L@j2xBh+KnxXr?Myey}g^PK^<h1wC98|6By)Vy9x6W`}yD@|BX|43e7ODEYxiwL4
zZ|apguFITMWX#LowEg8jjo)UG^Hw~5WjH-I`KeyPUZu~I3YPvVv)QM5;NH9mN4~Pm
zyfGp8tE6RFY0(p{g7?Zte2Z_q7GvvK<7;!MT~1^NuhZJ>tp*Pj7MX~A{PC;!1j~v$
zH<nvW@12~)A^q{xnW+aB2%Cj>I=nt`boQk~cNMKl82+}#SnplUZMA66vpq#Iz6;GJ
z2pnXPdLOQ#Jd^vdn~v@Jq*<qGmz{e4b*)*Kd{lFu(IkW52=3c^w6>Wl-#$G1@KS4S
z>3KStn}ZqrrcQB@$(#5;SpWZwS=U=7ezAwxzhb-BA>37(Xpr>2eA3>zefiZ3&+FaI
zQ~I<|X2s75MTejAI-KF`eevPIt?MT7{OKNRWMud(R6TPHn;Z0%zvPx&zWO*TbpOkr
z#tJR5RUGTDKA+7Wb-USagTPexvwhFAUlu?1Jb2T|Xo=M3ImK~JX6K}u>y_7h^uDz-
zY_8<>@;GUeeDlz4oc7AxiqFa}wSE8JJY{qJLyP)*>)xsqPu&_gTVpW?_e!Vj6T@Oc
zrKEgXfAGj$@VMr>bB?}mfB5n_JGmx4j#+t;^YF&oSA1H!f`!Evxpd^VWE@p9TxZeX
zyR@`)hTHsA%sED?J=<m<e&im|s{fPuvi-E8e;>ZQzWnLig81csw0?8P$In*GuB`m-
z7C-0fkE0J~$j!H#`**{OUF+W4*Tv|a?*2V_h1`Y%U;68R{+s=N-_J*HyY=taRaQkm
zdG}Jh{(HIpJOB2tH8y*9)_--BlZ&&P^Ui4B|7ZR3`~I(<{^@Xg`Fp#62}<I+`|Q>&
zyz;)cdrM-~+x5b;F6KVz4;Qr1obsb4Ykza#$t%sLrm}fD37_@2u%|=#-NL|*LkGKk
zt=i=tS>$GyCI|_f6w}M}6iPXHX;O)zfak0U6M~mr_F8#;UeZZ--J=0_PDyPHioGfS
zwdmB=Wj%9No{($T)71ajAa&V;<Ekc?Z)B*K=D8`;AHOV^aGYP<IP`m|ZmOtnRFT)L
zO>%`ozn*>y(R<gvry%l6MHFATrS9oO%XucMGoGs~U6bc9amne!A5~rTUb9YkTG4Q#
zYTumeo4H?FKE3Su_35TPcD8daEt={v)x|`_^_J1G18)0QS^YeaojF&Odk1gp%Uyk(
zo*PdV@kKB#^Hg(N8ebc}Zr#Vei4WD6MSCarMSfDVUtZRFZd<|E1;S6aOo+SYtvRp%
zw4#2Rs+{1`HPb&&Ry%8%sdV?%UEY(Dl@qtdoZ6-LlOwc4P39f{_x11N_x%4-XWH$|
z#TDXn*CVm(@-*qNvu69(`NnJsGJQSsl&)2c(?yQvy$>c|yP=vVBq6%;R{K%Y8&)g|
zo?B->-YzjQwx)7*i^XRlL$z|z+nzTM{Qh0r+xO=AJ;`Ms3r|1zQ>evQ;VL<0@0$57
zi_^-Eow(&1eB{KJvwBTo{hfC<YrZv)nsP1AWRJ+xYkrxBH_ZQQ9qJ+Wq;q3ee)O+>
z4gJ&4u54SUow!qWt?9A1(ihLNF`0GBun36zzj(EBu7HekYxm@UC6^9uy{FxK{ou2P
zwSP5ktKM9nUG~oYwMJTw+pepPvhD`4M{a!2-Tk%BTmMkV-c3g%*|w^#JoCcs-i9Q8
zgV!$%YyQtGh;Av{$e$_}dMC?*{dDJ@C%*Z6Vx?EtUN1=8takX!tIV`LJN(Xs&t0Q0
zSSK~L;79q{%B$@@jUO{^zi*J*ePqRvutO#e*)v?qOBFBN4?bg{@^M<)Po7s3q!Vu}
zNt<<&DLCh#+op*5UrpW8cT_B&EwVPmjh*Am?~9&`xhGfHb{*;w-nOiRb5fk|_JjWN
z6J1_U%J+NM9%%F^(QbF|%#!v~Y04d%MY9<g92h3;xp+O~Va}%wXFqmC7R1YI3Rk%<
zytrtqMrNUrLYL{6hzrJjTh`qan6&I&l}zJ)5rL&G7fU7^*97-A&)>3im*Tsh@@xM|
zU)~tE&ge|WTP3ke+^nG{K2!8=UM!T|pvJr+OYq6G^89ym-*sBm^1tmpmf<LR#xqiB
z@979Hhr`0vJvSo6O--VY@7cEV)(cs)wTZ^s&!({Kxp1E|XYZWazrQ}cbp6z<{`-fs
z*1y^FYyVyik6%0e&%cZMnV!?X9=_cE`0&dNHF<gapZxqk)8qG6l=@HIQuF!n<KOxF
zV?*xktK2*%Zib9pxb6R^f5n&o`SkGZclp2b|NZ>E`}gnr|5xAN`{&V*50A8OJU#sT
z{k|VR{s!;-^KpH>rTx^+v)cVnTW<`^l9Ar+<<l;A>o)Urk%gU$J&a0y&j$2e_|Cl|
z@8sJJI!s$Tl+SBco|xNnk7w4zJB_}pUTjNQQFiQSM8|_ER*_{=QccT}Dz`jR_7ZfA
z6KnN4^>LGULyDK@g7(9X%nzcLU*06sSE3oC+BjtnzqegRka4%QVcxMNjS>Dzk9;n;
zXsr%ExJP99he-=(O%J)YMdy|3?w6^tdfs+>cGy3g6&|c}VxH#{1r?Q6-)|M=^Na1~
zpKmlbG|sS+^?O#fGv?aYBEfwXKR;c)dsy-LoM~M?Uki+mcBLI@^WZmg3z&T*Hd<Ls
zdfh*pN8bW_JzXv{c3y3{aIV-*V*TvvB5{vjshwkJ+7<afBlO{yhM8{h>vdCWzcpUW
zp0ZRt`uz0Ilc!%TzI6HS#kFM_tug1H7fsxgF*$H&Wal?K^_^2n_X_M{;JP@+kH5WL
z{CNBF<3$_0GrXH~a@#|WzEV4$B-CayGwkYf6Ps?IE$!RxeJ%S_em*-|*ym~wdz1K4
zC4q%^bJiX${kY=W<gEHzVTs3W_B_9*P(5|Sl`vkv63^{FRGMD;@3@)wbo-T$o3DJz
z`epU>(xT)?uN+0MTeGp$OmYyEF4x}uuJlrWM*H5LtfsvMc|l*bP1gl9@7;0!+p4tF
zUU#O}X+GHeYa*lkoS+Ze9Jl-stjwMh+&FWiR_ezE;dwjsm5(IsS-)!Kw`ilJ=Z`mg
zT<DtHJ9moz=UZ{VzZX_8A39t3xj4S{+dq@o^XWZ@F6)QY@;!RCG&odf>Tg$}Z5h>m
zT(g$_eQ@vOM&YSvCb~-8JMr4~*0z}YTT>d&m_A(d&58ZTnXc~{#(Ug<l`yT^?KmYb
zXz@znm%XZdRT~yOFmBwJcR^aim3dCY<?NeclDk%{+``EgeVDiFQex?1V^*!=r0k-_
z0sHwH<>C%j*6o~oyx8erU&8jdyXWo~#i>qAcu_Pf&1^;ftfo)lXHB?5cLYxfEnD{J
zi>B22d_jSI_bT{5r_8x!;bwU&wJyxzUg@4|rsh|bLXP_FF8629NL*;Z!ReuMP-ITd
zZbNffp}AHQ^JgWly}8b)Np)-9snv^TUFDEUK9+ECTZB%D`}*$Pw~wBBm1eU4=Z*h`
z2XCf#{hoh~rAF^&o!F#_zI+#!Kh9nF;#jAXwEdE6i?=;$T2kD&aF)r`rfJ;QL=I_k
z*m_<RoxfAaqBS7PZEaAO>kg?&3%D*=_HCVY&pK1&@ft7RrI*%i-cf4e=qm2Z5Gr)}
zlj3v#uz$Ozg+KimdhpZn`hV$>_x}8O`|_#njvWsfY#rnFSAJF~(){${)mMIh|MG};
z3*AE>r+)pc@87;Y&hF0cJ=IqBwN}sB>r4NC`uDY4RrmR)v+?_X|A>nJ^K|k$ag{rJ
zcU6D<^n86>UB$=g`TwVU-OcsBp)1Aw<!rNE$M>G_TmH%J)9k!=b`_IFR8AQ(@kFWr
z^|R^R@#5x_oQL+J;vQ0x(eEw`Y_I(h%Xjg_?o;c!re&zj2rf!rS+Xv}eon*g_S|hj
zD^r~|F<LIyC=Nd5a7we)@Z{WvWm7^m+%Ha9Q<0N4>*{RHmCJs)pV3uU4ZQa0p#8k_
z&x6h_wP0zVvR0(9JeqUwja_jCo}3{|n!W9A7hQT>82M*YoZj?^MHZ1<u9FnAPqF^0
zIOw!gW-eP-_w&P=*-Sq^9^U=w@OP1Yd#vxsPFv!!YSyF`d2JlGi?wyEG*hp-PPio<
zt@%|T)b~yQyF}O3#pmY5ybvx9(CVLgR7U8N47=^A+Dm7>C8~EXY2}r=c>Ssx-&>8j
zt2{ehHGTKYnCg^lsIB$R)wJ6q$X2dMvt;|%qQYHFv$EdqT#{0`@#?whQ@<R3xFYjp
z)voBcJ$tHuCf4fIaW*h7?@C`id7hwr@Xf2&V+%^GcU_MZTV}-Q8y?n{w3RVlxz1?Y
z0X-)7HTNg)3O03-ZgO)k<vOcv>#Jtl%HvRR{YsJFt$W4{hBK87C10KB)swouNbcOG
zJH3ZrWvvKYTbro#T23Wv)fTsxnl5wrZ<^gI^!QNp_}8kiydK#@aX!Y8hj-3>a5_}z
zmCXz3B>DMqC*}4OnSOfguroi?afU40*Xy@W=b1Y9d$o4Hh=}R4+Ia4|dPeY?TLv{f
zyr=I>=ofyy!sz4*g}ZA-SKi8P?2YVNr{H$quV-#1`^Qr*ItPo__HW;`%xWG(`TMqM
z2A!+?Hb2@cB~`t)_sza??V0P&+&+Kp^C9VorEixq^u(=azRY4=Q@w|~S*dNF-=@!x
zwyPMGl$VrEQ#<DDt2A%Thd1&O=j|6e1oes(goG@vy!OOv*UpWJg4(TaRWDu&oa*<G
z`L;D>zhcGezUG+SuTvLIYU?}EzHI*4nBDsCs@bjeU5^}nl*rzCU1y$w#_=h(jd^lg
zWn{Qy^`|_NJ>!$PxcK~buWJ)Kzwh*#dbiL%MZ&oB#j?zO`M<3`xoYfQXtC?rUR$;c
z;ex&2ZUh8fkXqfmDoS;U^83|sVXn8{XYOCMD8nS!FiLlAj`__Qhu!~O+iz#T@W*ZS
zU$@_~nO_!)dn(h{^Texw#mq8QU`OZG!#m!_P0r$(@M)9I>7cWdH6u4(_1dcu?6oK^
zZ0WBLGm@W#T}hr@@aFpTDUMpQ)&XlzarW+0I^y)`Om^h`kDomLJUGWM|D*r^{y)#3
zR+OLlIpNYKnfLK~sy_bux?0>%)n7kuPwihL`Azfn*(1M{SjhI<|Nr#$bZTYI|9_9m
z@2~Y=y(IYA>%)g__2j=^Zl8buxqiItx%>Y<&);8Nzh}39eErWyXS)wK%i8@bu$p63
zG^v$ITS;(X)MVK<iED>do-C>4E}U8Ug=tBaW9Xr%jVyvjqWdl#R@80#<5I~rxqRjJ
z)@34HtZScd;+JGJ*lc|#rg#q5t!FZg={)^8hi`Q6s7z&&K0L8gDpOSRL1gI5)xjc~
zOPBf`a=M}yV(_$Qj#%resIv0K7o2`X?A*e!h+E`n?%o?ei=GJwb2-foo)P+<ul@9{
zX^)Pc?PC6r<P^G8m#J0mQkBWZ6>Y8W($dlxuGUQTIJq@d({kO+B}=R#Ga?(O6no{r
z@6V67mEnt=oaJ}n*OL|Y8%u8UsT(iOiV8Y)!DMIuwwF)dXgKtkK6c@gUm%pn=lfY=
zSMfKON$+$I?wOfy)tENj@1$;Px09iEzr21O=WgG=i8Fs|vBV~ypSeI_@0zlmYnC{w
zs&Z_262j-L`F#23o0`jat3H2PVR2XX{rgWJSK7oyY-7L8CwVh<vTa~Xqme|@>4g;n
zUk)(doIYdDt4+0uPu?mnN?e~5vGT>FEzitcKBQcg%F8LZZmb#e{LK6N_w?>aDNbB_
z=-7lcZycu_zTaVaxktL;N$%_<*>%^x2);D^uai}JQ&NT9qV<U`$AK{ZbBzT{-)HZ*
zEuDR0|AP<GwZik2eUo?Psy(@@^EPp}_$sgV1aF5An&LUd`x5q>-mkf4{9@DD2U|Pm
zcLwgNir$yFq1E6`WAU<t@5db0@jRX_-IgIFyRz(7S;)0T_jhMLIj-Kg_3!(~4IhNQ
zdVa15Uza8RMp5g=!>H}`i!xuv%u;KLX8QB?4AZ$jzAxV<Cd&yun^7jH5!2PNN$T$w
z_nd^Tcduf@T<aFzUy!lS%%FK$*hb~A&Yur{I(CKY<mN)2=(kVLKIbeGnwwh?t^Z<Y
ztJcS9tBYssmP(uuTBnhwrxcRgm?C%ScZ7<yjIjU8WPv?QB`;#<JHOj*v;FFi*|E>R
z_?b$qxV}_dt-E)-I&amrl;AC)471GwzSy~NU2;BrJ31!v!PczswZhZS>GAJ9;_y_(
zAyg<L)avU+!PP$vlxI%4dOa!a&PkTvQmk_q{4(cxG5@FVe`mkDw}U5NJbL!vvKtyV
zzHLwVUhUt<?ry7lHzJI?&Z})!ct+@&t6SXnv#7?eHus4#m=@-6dtO+eDAU!vZmmu?
zFD}9G435Su-&am&UoTpn;M%lgYs{a+%5ttIoesfq|K{*p-W7Jgdin0*)#2;?PJ8+W
zMZMcs@%h==yWMm6-rswuU;p#yasK#wdn5W!%YOM$Vp(7H{m}e-`>gc-z1;u*=lSXH
z&-s_Hf6rgO{{1_?pYKj@w}1Zg<;%lIua(Q6%Z}_0Uz>LIy_1HPN&A7w?%(rH-Msc#
zPDHkRnV#5(iS9*7RZ~@#9~S<2aYxxy*}1w;owB@6PtXg$dHb1~i2k|@5pTYFZ#`7L
zH#%s7NbiZ$K{G<8?^!a3amgIT0&{JV?Nf^4#C&&7xf<5Jvf*G?v{ykz4CCb(ZMC4r
zpd>HN3#%U;yk>s-gr%lt0c*cyx^Md46HyUtI<s|L6LgNAUVeB|u~pk9mpu^+D;+Md
zWNWshEsse#t0_B;)wBEN&mzB9kv7Z!d^@|^T>l+^<mzB{A%>c@nT>^A6;mB&P7S)_
zx^hB`2m6Bt;jc_v&faUQif6yMWNG@f`6q7IE&sQkbC1l|K+o?E;gQn+mrChfoqm69
zUBUf=2{tkJp7qa{JMZtWRB4%Er!#Bo$LRL6oSOO~v+U>UFP-ywQj+WTCn@bUk*A~d
zR(iCl+1`<T_pYy8UT=PwabCA;-l}Okz2>{Eyrb3s+F0E5%z}CRwQqW=N>Aq<ySm1@
zDPsPN@U7tn6Bwi%f_Qn)UapzFyYJ1hDV^cRZ>cPo6L|hHp;z>2%4+WK&)&?Dmb<!b
z?;Q`dFFPeKgtSd+o~>*&v0W;phUM8duH0LPcx64L6T+wcn3Ax+cb<CwuYPm+kD(&(
z|F{<>|GT=Q`S5I;R~u>%S_ZS1*Uk-ye9!RZe@kEKEX5VOSsK=zzF)IN*FTN>Y|n*$
zuI3D{73bPA1m`gKERqfj+%tLC<4ZYu>oyj=_&PytNATlD$*KF>;y9e6Ynm6IntL%}
z4V#(U(Vg#i%-c|ur}@2RIq$7qFPmmPFT1X=<tWGV1-mQ74xZe2SaMU;>+@k3Em`zD
z7VZ_P+8-X3amj4Xy5~3jWfVfEzPcCKSn)CKhJ*3#q<5+Amish6SqpRSHvf|L`Sfh3
zHGNA{tM7eQdtWjkb5ZnVvkBkLzFpU{UmkqbHTaTPVEo6OT{nfFiN23q&9~ohfBvTB
zt0fQDoi=uj_{9<ux|sE{<Fh|mer~H~PHC1k^xn2_ZJ>(3_O`rzE7sg^n-`QC|8RxZ
zpX~q5iur4foRN}tUwDA$b=J}=C#B|}R1<&UKl#a@wU_4num5MNKf&CS=el>vJ%QFT
z#zi}Kvn}4AX~`C=$Mx~s%UN3_@&rYFxtuhmeu@^pzcOpGsVTqEq*Y5eTe{Rj-|tdp
zTOqi>BT<NT%QBX9ha-u76O>}lxo$XRy^!m}m)g4T%Uh<*47vQ!pI`qSKYu6xwq^eE
zo0nWK=jUJkxc1!BzrQ~{ynB1f`{_rHm;d_mE<vxRBKF$l@A7{RDF1nV`DM|GO?FTH
z{rgw1)zc2}=>G26I>{#VoZeQh(pKp+ZChTLtV{fsVdwh!@XtT0f1i7cohW}M-(l+T
zMnuZhbK{NVkei#%`~I+Sz2Ll{)lezi=vuH?VUXyo=7?(5*a<D~G^Pc4ot{+8)o9D2
zw&a;dzO7FC6!F$8S6!YIOix;Bp!50Y;gxGc(<PTgcF#K=skAfXrPj2s6HK^$TRbLx
z+O%Yj*R|v7g8C10r&~=lJG^s>w^Lk^<-BQg1ovld)e9(iK8c^z?RnRc%O5w$+`abo
z<K>&D7X4E?d3yKb?#GiRFPk$VM9VUGc5r?2OueNxwzi!*FBj+qX<mJ_Lf&EW9A4IA
z9=V%mGMZI1UtAd~>$g~I$Cvc_24$t7)v4DGSuz!QE@{wqSRuRrZpPodqere@E2^`}
zE)rjOD%L9Vl;fGg?PBvEX)fKPbMxBiqxN+*r!-}!#e|!Co;~&R<;EG$T;KWg>-+J)
zm%nd6O+9bkrQ0h6WBi@BIG$QpXgKkj-0H)-*k&tzP<g|7o&Um*kJ{-P0Zbilw$FO<
zTI%_Brk=VPDkg82E4(?e{Wkx{$W_1A#1#E`q^ADb^6aw^>xS!-e{VY@(qmIqz_HtU
z!petAQuVQETmF82yx`z*ala%!pU$mkCzdY_eH8OJIb_=A=|$n+&E?<UtGw`C;dWuZ
z`ilbV`)|^HS(dsVILDIv=E_tz{-5@@tGBCpZhGu-`O2FLm$0v_yaLTL@7CV^|9^Iv
zTZ-#9Baw3)7h@&g9ryWu@Z4EjrPaF_n^-JwD;#+DYSoz@j_;4l0{@1rb8Eg=r+aC|
z)TTB&nR7?hPAO53)3~&C<^!)dj|ZK{TTIqG6!?9_C;Rb-XY-~=+&!y(H<-Jmg^4wx
zLS@O0ooxH#!b~DRpJ5mMbn@NCu(&-_RFt>B`<3qJHj9&M|C##pT9aR%lUrEByzbQM
zseAsOQU11adf7F$UlNVyW|Zx{Daz3@?ath7smbE+y|cDRHavG!nCE%d*=-Bk1-|6J
ztV*Tdgw~Y)Hx|xLEzteh8vAEoPSO6S>z}Mxa$A|fHqLFq!mPIo4;<O29JK6m(*)P3
zX<_w_f|@IPOY7`^{hQITWO-bw@0tmzPIvyEm|}O0!>wJaMBr-AsR%>X<o$}=`rgw*
zw`x4!;!&zrxZrA7>`dbur)Br8+<WVk(Tc6i0v%<BCRtAlTjUEX=DcB=KKs#irG))=
zS#8*N{L7tTqi%KnX?j)Qi}g0rT`lHIt}|@^R&2NW^Sb52hhARVY_~OH;uMMWhm+p<
zyKXw3p3gBcBSyBL=a=BF=jpjwt9Io^Uz;{nq|o&3>Q5nJn$cA`(|F@vmNZT1s`#_>
z4}a7f^Iy7u)@{E%zh3YE_5Xjvw(UNl_V4alyZuwm>-WgU|GR$PzV_EkVS&>6<+<~_
z>hibmxV!E5U;g*U&rgk4satv6?_G~`M5srxzqO*0Nv-}Z_OCNrrBs+6hVh4s&S-F4
zAJ#vA)&G?@`Y+UP`gC8pj?Z$Ijltf$$C|gM^}8@km*_Q1_BpVa#XIowqAN{F8>Fr-
zpUkPUHrY2}+e|U-n@Jll?`XMMveL;l&_`LQe&(wFst|rb#=F@vdbdQ1<n%bWLK4$f
zY4Kc=o;{7}!M5C0vw!<>Zr*hF{r1~w4lUkqthL#s6a<!7?0xmGLO*Sl!HNoRKDGlK
zAGIF6tm~`#kz2n0_dnHFjz8X(-~a#N?(*MfjYPFYT_oiXnI?KVNvib!t*C!<JFMYR
z<tDab&dXbbc>+wn?D-b|JaSXp^t2AgYu8Sm<SgIy{7Te;g7gqh{`H<VqJp8(Yi=va
zF~7F)J7TkB@2+Z>uX~Ql{dVsA8(7YjzkPP<3GNv`<C%EN1p_lQpIUr7{`jv&z#0uN
z!?maM``h1q`nGb)>^i-C>8>ubgzzn!H>(@Qv~`D<KAk0GDbdzjJl$lM#GIS=-q*wg
zreAG6xLr^&bD8crGg-Atlcy6rw<c@qtz<Oz=rG<iSw7-)+)*B0z2BUA8~?oj)c;4V
zr}yk<F2C!V#RaxJ3~vw1PmP%$aP+FT!QZCWi@*7tep%MRH#u$w`=_Sr%MNdaKUQ&F
zj3~W2`Ki$kuI9a4_i^v4GI?GhyIl67#P(o@3b(%=4(?{BH}O3?lO?;XG4lMJFYJFm
z-Y?T#lqIP6@Uj?_0h2b%*Ixpniy9B5P3YEf(dE1NKj;JxXI)3eYj5{Eku4FsW~e>>
z`Z}C<nt8p`_xZ(_+uSO1KVIAZn&(7(!}`sQ$5mH7Z;ji}YxQaKyn^@ZWb9to@3LRA
z_(;u>sZ3qdLi=Lwr+fV3?AUi=Tl@39s_WDFf)-qwS>}>^BjgM#ub=AcMOPT7%;evm
zcgvvEPQQIyW^(i$rsV8vn%Ad<f0`5g^p40^L0c8EPT9*0Av`M0*RK6$x-RyxNzm;6
zv2T-_*DpLj(P&=Llk2O4Yz`{j@}0BcT-YU@NsCRb@->8}2c9aqV)Dytd7bX8BE96^
znTv1!(Y$o^Tatp`p{sp+^BYahhFRCME-LPQ)O-8xw4*sKd-gkN|KED*U;OHI_O`W(
zB6YdTH@rIe;m_;Go|o3h3En+AM}mKbqJ;YozVP;jd6%6hF^0D7sJM6du>PcPM;mLr
zoVRfMT-}y?o9poF!Yi)|RZiSJ(lo33-^U{}*hSk_EqHr*41Y5HRBBWBoBnfo`IUz7
zsTIbxyQiL)vR_s6E5?4m{O#NJ`~Lp8{Fi_K;{Eyc|CYbM*MI)@UHj9!_r7<p-C?`P
z?#}*}_lNY?yq}`~W$q?ncE`|*y~`F#Jt@2rrh01O1eX&YGEXc1YyPR<{<ePS-Id?&
zYyRi>C-kVPXtP)Tr`;JhuNc{A?OOKki@?W>B+1shZ}z=B8{p%ud3#~|=jTz`EUoLV
zEN1Qr{aSW=+uN`+RUx_zZ>m$>iaK|=UwtJf@}kVxczyL<d9}`|x)&?&*0OqXDP0j1
zS#tf_kp)#UQo4%{hIIXY=osRE`RMK2(bsCWM#>nZdkZd+vD@)4_xyxQQx#4c%`^Hv
zaa}6=-`vah|9^P7{Jp*C<RG2mxy8b)8BvGYA9|noAQjiTC1}kQw-xU;o0!BMepKjg
z%=GDMt?#-PK_idvf_wQAJ=8KS7q^AGt-5ur@~>6l;{CdY6;gr^<&<iwt9N%^Nno)5
z|LODZ-_!l`=fBG@5B7fUWLGb;DYvyqJ-L2*ZT<4!xx%^6W$(Uz?7cU+`u4l|@8|zN
z{`kCUMD$d#14~xxTK@^oS-Z!8n`hqPlhwDja@@B$uP~t_z_t4???QvKom##Zrkq^c
zY`<zlcjd3eQm*?9J1n~|7xL|yWww_8QRK<~Wlrnov96!A<eI>0^@V@f*SCI}di}&}
zwMVXuO<Rj!C%?RJF|}%5EZ@o3n>QENx^7&+Qpsjj6!@r*-+YF~_Wt>WM=nKum{ZIU
z`gWr`bF6}vl4p0^7wK9ddzBv>^Kz`!OKw&bgl<XLrh7Q@=KuIz?mH^@-Cll+xubtg
zJoAxKPs)p@McGfJ=3W;N<9?~}Fx`jo*hS&#?_`Vmx+@C(*V>%z{J;NS+Pd)4+u4^s
z_rKWEZF#t==37nRVw=cZuiPr*b&^YG28eR3)SB=i<3p^Y`m%K|^FGh`_xkka$$LA}
zBY(Fy|6Z1Hoa-)=k*)3F5`lnAQ#_q{m^{_H3jZarP2JX^ow#AmOY8O0y~`!PP55eB
zU%;0r_0FhchOTS#>9vmB5&I>73EJFXv3S3;de(zmkCOx6`JClDFeQ^`a`fhB=ZZ5`
z-W;<1l-K{^Ra|z(*)uGkly4u*zxeEr^vPnoUxxLj!X5Fp%circPYdj0(eTUPzTmvi
zvATBm9~&GML)LzOozimSkHY2fg=qmRR%$5CIsP#2?(7uHr-qv%W~qH#_r4)$$?X`1
zT%jxu*FgIt<%&Po^(_|rz^l`qZ~y;u?(W?Ge=q-)x4$2`{qgDV`|9obcJJPhHSzbh
z+C#@U#M?R!^F2KLH}LiD(}x#3X)g(qJ9qc-ijC>9?uPwqS1inXR3Gr?<h%M^_haAg
zcl&ew*N^-TcZpMM99_TO37@d(|5VvxDpUTy#OG|u?UdcG*G}2J(lK|#a;3e(+#8v?
zZs%^^H1lTe?z2fo8WvLP{Y4wnpUu8-=1iByySon_Z*99a<!9yL3##?z3oSODWOKcl
zv%7ZN-EXzuXMGH2G+i(F?}Wzu-+$x0D(h|b{rU8?@X9}*p3R?Tdx|9NSS7VaB-oTu
zbWzq)k)S<c!8g0=zHj^c@w<J^_n*3na|1$kJ5NiM?1?mDc`0Xl>CeUam(IREvh+fx
zm)}&!K$*21#~!VBw@D~eadlaCvd_^}`GwzikB_zAgjPNamuzdCm!o>|Q|#?lxk_)D
zucxOU*N>O`?YFh)|Ch|&ca{IF`1<#w#j5*xx5K<NRrf4g=-aqf<X)cShGHAJ_y1HU
zdM}IWzmeVQy7Ojv{``A&JNIv2Z5aQaJN#_tzjl5;iMTB{gICSG#wS#x`6}|?!nx<y
zcE-1y6)|?1-RHHh+=kQWS5wucMc1BOfBvdQ?^XCjeHZp!S!wm<7t6vuW3ql9+d6T1
z#l>_+oiw*+MQ0NYqx)Z31fAQmheJ<^bKdJe;Y+yJoO|z=K3`_7S)%O16-G*DTOB2?
zWqn(G+}(Wr_p&YQJjEwe&+S@p)kx=X_LQ6n9;QsY%d^`AOTxVQXGXMmim&{uB>cYk
z<~PCbFJ6ZnUz{oXsj!$)@My;8sA|E>_pA?OUcaVh)xPiei?Zwt{X!kX1-IOOc<1C-
zXHH4C_tfS(F5YkPRX6y`QSb5(7fmaquITS>%C<3%=lpOhfsJ{Q;gzS|&S4ukF1Js$
zOmC4hoF#SmK~NXprK+IAw_Gh!CSSTe<6muvqf^&5*{m}!i%P!V+7(@+6l<Hl?qu=n
z9XXrBKEC;|bDMQW9>=v8AD5qFKU;Li{^PVnHkZlE6_k7~T3o%*R~5U%=~j<&?+0}u
z9UEP)wm)%kLF>al`;_;(I@fKRx6s&j)@6=jrUS)R-#8vJR7BnQ_~!D17rmdKS)UjC
zwBzDLxryakEw?5yN^4GeFv&kL=X8={<lX+g-+s=j(K+ycZK(Cr`&Z+C<o?g%dAs7U
z*{)@s4>z^F+hxQWYvOtDmLbQj+Jy%LL_^AAW4GKoy;(OmYD(W?tF3z$|L}aBUmvkZ
ze+OrwWyYc1%fy?N_#fVL``@qBZ++k1{@(p}N9{iEcfB#)`QOX>7mq81_jcC=RX6X`
zytieKqtBHiF1@P?x7$8aSbf6z$At>_+6tK_<s+-^1@1Hc8~6CTQu)QXKkN_x=Raa^
zBp1Nw)qT+S7}xyg#dCiiTll2a<6@Ea_St*y26Ww1P}QhwIsUQ2rtk5JtGib$nk(n(
z8)$ad$2(YebF15mnF0?(Ot&fTuAD0?cQ7eh)2*;WM()g(8{ZmugAS>xyr`VO#B`@?
zD(_Y~cTw@s87XDc+*vwjNA0}p^1$?~+C;w=MnlQ@xgLIB6c@}|*u8n{im5^~nO-h>
zbpNjX|HJ+Ed+u&)%Q{uucJ}?Pr+aFpWWLOb`E`f4vTAG5Ob5Y_Tf)o!y_ognT?F5f
z*8F>?3f)o!<=<`;xz{EqH|cEPzdI976zAk^ntfqM@_r2-muosNn^*iUtoixr>Gb*c
zZ9}$LRr2rKySKLP_P56?-t~X~9{Y9M)TGS&S&TcEsdnY3c1L~x`*16lnKAQ~!}9T~
zSX&kuutZsR8c+SD!t5b>;X$^JS&Qf1BU%SInj|$Xmf5R6f04;%`Fe>!sGEbU^5X4l
zFIqTu3g|}7%8Xj_a>;Sq7FXlqWgKyAA0(bJeOq$<+j47pck2%Ye-GTW|16htCh$@D
z)p<_mWP_&vF1I_M(&xeNHcz#Bo#T@CkJ_JqUNZkBr?K*}Y+IX(OQ++OY=0XaGlTat
zf8WQSi*~4<yPw=uK7FR)v{k9CA@gRwoxDnO@@iflgK+74XEJx{Bu==McCIII%W26n
zUBl`6#rqFxI?HGEs@|xRVBdYPa>*aPb@3%L4_>yZuB_Sg-$zM6HoC(t=gW&ppAJ5Y
z{N!AztG6>^ariOAA5&H?%YG*sHuGZYoa(%`@WV#Sm5cuP?pZa-%BbD@-C3uz`Mg5M
zKiEyWa&LR}1cp})%^fQ|@=PaeS{gI+LQ2igL<i^V=1RMn#X}QM&C_v`sr<HfUQ`^9
zeRJyOEsZbgnjV%^Yu<gG{3he~;V|L(7JH_@`_EavWr`n1koXIpCf|*V4~mH|NV1Jv
zV0cbbdDFuEM<y9_&O7`0#rKmUr%PpH&5u2I7t-~desP81iG=eJ=UXH>3io}wVA;hT
z@^PifF$>ijOT%9;->7x+*NldUf`m<BW`A#;a}Hv1N?*HmnW*8TRxx3HfsN^ba{uJi
zf5gAG`k;SP<V)*yEx!14mWY44E&`S<OBn<$Grq4|=_Rev$J`(n&ALG7ouOL7S<{PA
zkGF*Chb8IFY7CiT&KO}N=6Y*Gz!7oJ!mJ+$12(LvX}Z%j*?sNth4$ee=emh4>bQU8
z{Q<W9uM3;^&D@t+slAEQE3mo7=<l3q4EvvQPMh~{Q>G^Chq)j9760r1{O|Hd`H6vO
z#H+irr}^CG>#5oFAZ11QElHNh`}=d}zq`F$X<5<|m$h5v;^p2SPhS7E;Ctpb(ZEpS
zX3mEfqElid_eN+g-E;EqgSeMH4zt7Ec3gjag7aY6eTCGX#5;56rpbnsua8r{%luR5
z(B18eq*A<eLc9;X@LcJ~U;k#Ks8+zXpgy5}*6QyViWlCo_$PSiuxp>es?LbO3<k&6
z+*L*VKVSZSU-#!-@ySYdo(<Pr_~s>Sk(7<<NmMJ4Xz0lNI&a%2ndgm4QXAPW6i+=m
zPiD^CRDp-{emk-~6Ixa({rHCGqU!4h=7`UIldSZlM@?dl+`DrfE8h7@)UVuU&$s^H
z&(G!C--dZzSuFSa@$}=#yQi+rS+yu@-ui$IxzTw{uYSw%1d51?+uge#C~4tU#IbA9
z=DTlVgLqF!?wFKatP^ag!@PFq@3Zk=#3HJFU;W(0+gq#blopdXjhjnIE%(mUf`HlC
zub=DcoHO|yUeUe$^Sp>8!==v)6HSX_<bJI_SNWntpMS<4AD-ejyO;4K1iYACU3%=&
zmu;CDX4ej;)Ng+rbVhf^fuBq5B5tI2=bw%)nN|N$s-)CS{;bJryGwD7)Bc<guz7R9
z(C5tZ16Rzx%5D5!JJn$NdqXy#^x!8t(WO&Y_N@LKlBF^6W>oSl?|t{4KW1es(G)6t
zbjzfjg<;|I5Y||M^QwycPcBTFbEkhs-qDvFJ~_-O4;Jrwn(8vW-(^~N-8*LH7e9~f
zh%kve_u*60O!H2jl{wdQd~BXNGzD#LdDG7Dc|lzBwy1>20vpb#fCk@4yMjk-+tQN*
zB$HMNurKcW8tfi(I_mDDPY35Ncw4jg<&Gz>=e;XeY?ga|WXJS(tuH;~E&X+O^3QF!
zb@+pngjS$zhw6X6zd46gSUNl=zFd(Ivp_5R&O4#gH?ma9IQ%~q-_ZK4W;rd7G4H;<
z*yT&Rc1@YQ|8ZuhmF1D;UD8JEvJq#TmtC2bX(rrga4gaJ_{8QE!DsV!S6%6N(0<_G
z^xSv*4}Y}%e{^@B=jk8QW_&x;|2F#dv-`d*j3Gx&Di!OlOskuAvx~Fg<Fwh6zqd~7
zSrL7;YbDq9nP+^r&EfFe*KM+{vgeL7mr+MJ^CtJNu}hBqTly~kPxsx|X@|rt7dmwt
zT<X*KB3;>B6ZUgSMey#Gdsg1-zI&!CQ*dV{Tl2)aH-h_H{tJKTZ>kUZD`}CSw8$rM
zCxe*CT3?O}3ub#?U(M9ID&yd?EUlzfuK8Y}rZ3n0iQ-@XcG0}zMw!y@3!W$2FG#)3
zwfU}A(e1pXCG2ziYxysksVeVrYgy^o8ka4t9lX$Ws~d0EZi9%`>KEe{2=HFldUy49
zOX$KZhE?qqp<9HmtKRuuv+kbRWR3pCOt*p-MfF~Ocdlhq)2E6z4qI=RxrBUAG8epa
z>f6uD<^Kzg*Z=za-aa=vaBC3DG>5Bl!73YP{1v<ym$a*StH2AHBi|m#&i<lTFMnsJ
zuzIr0B_1sep56t<S)%;klP9hfUSgh@k$+d|_4CG~aZ3v3T-g*eedqO@bs|xF<#xZw
zZI-hArq3Uc|9x%F)4JWcOJ$DNy?JkM|1D@uj;iOXmt`iKf4`M`_x?S9x!SU@)~_oR
zMFnpO&#vCZ`|Mpw>n{GvJ2M#0{j^m(wAS9y=$xXI{ktHEdIiZ7hK+n$zVmgRBo?ur
zIKtZ{aXDz-?Kh=|jS8n-E@F#(?&jY(Wz#bby*=(+s>|-QWFL6r?*5Lw;i=h#SD$m&
zSpCrpXP4%)OIjG@?Y?SrF~j;xO-mQrJdHM$_3LrUKVu|b>Xf|yn^Q{Ss}Gj@f3zC0
zGgmhnpYUCGi#@Wf;coQheYO{WcX;X?>X@FL@%-h}9>W8NqvY#%w7u8v_!AjzDYb-2
zw#oWV+r@Ah89%!%sR=#PEuz<yT3wv_b7G|0gp+sPWiHyewTExxgmhka_xt{=H@TV4
zB^fiEINjRo8r1K-bl;=~|GvrBt=LWp|ITP(cG(@Adg1hW$v_z%4TD9FMd?miyBHXr
zojHH8P-EKK`B|@<SmxC1D}2A@dajnj-p8vZMx9XOuv&Y(a+-c@LimLKGwR0AO7o6g
zd$ZhBB+J5EFWk=WuR(h9<v6ZiH-(rs2)<@g^f_kutNyuzj&mzdEBDj+8~->w=j-EM
zyd$z%`h#`*ca^=b3z;|;h5r<aO&7hC`c205Do5$;`+_!SKUP~5=9^U+ysbE~Nz*3k
zUGcAs(qjq95r+?k{jZo@7<ec$TjuKTJ`FntmAO-E>c2Dnsn2cwpZ{y}TMe797p!hs
z=dV`!9`fn(Z^6eqS6?k#wqj<*weOtT@11iESkL<I<l#`+6JK(6?=1`Ax7PwZGGuN<
ze%EimvcNs$m*AQdlUqTPA4%=G$*1v1?T$jLLHjk04@F1V?uSe@S=hMf1JnICu^+o1
z8dzWHQ*e=1j8)z}TaDTFWq_s4{bfsElwG%0y3fI(y6R1V*xfT4ott?SqKYDlUl_ee
z3+r+`_#iNQ&;IJ1+i6*+623;A*|zq|<AfiDGI6E5Z-#|z&53rbmTp#b_2Ff^*u4Aw
znwbo5|DO3gK}z9>$eF7BIm=v?7s<&5h9)jMFzwt19ZS)!wJRQ3WQkVUm${^uxy4&f
zQd;A_<9coQZI-8(-~Ri1yZ?Uu_q+V|HTCy-Y$xqJ?=a=cw>fz$x1_YlfBk52XYZZ7
zxsnQJch5=8R_yJ%%VhR}NxE)c?zIAwo3*{K1h<B@AK1dLUZnq4*MfHfgHMmIK=ZQ&
z0)f1OCT2V@op!zdJ#os(z1LoSExd9_<F>Ea=Z@t2odxT6-wONk?^~GJ@2Ai0_wTp=
zP!Vx4_j1swk1GThw!PgZdpz#D@`**K-ip0%cYnL<$Itt5JX@X1KHSoO-Se}KXNL2t
z!k`_huLQs7eV@T9@bl{AHYb<KiQ9Y<4=FA*5zm$P>HC#&<w8K{tIH{+Qzbq<;&}J#
zmswMk>z&HvRUTgi4Z_WiditE>j*s&Qe7aK2?qy2V<t4$D_nvff3oPqD)Bbhs|BJ2}
z*01a1CY2s<G`c+7{p&IBtrn~^o&<S+74-?;WiES9IE!QThKP+Wt$Syb>VIN1Ni~ZW
zk-8<eZSmUka}PvH-DNty`2hb*!|#`RB!4KJ&B*<vW@;?kvg*_Xhx<QH9g*mhUckI9
zC1kmH=fSAZ@BS*ElHUF@YzS$bF=3h+^W5u?PsEDGI68NSY;IMb_iB5Fcf(43sZ9$N
zYn~LB-DB16p6IEc9(BdD*N`jH-^1y5T_dMnZ1-=M`>PdPZYsFl3zxq5@vD}=#sJ}k
zT}m<`8~<#Y6=-td=HlLqTewzmaVW7mUt53d!#cw*zjK!E%JCxB>x+)J9b3PBVMMCM
z*{x>z+h=SGjqK&k-MVQVr<KsNtn&A>pKbJW7HT`@mggf^HQg)dRMGMEo?Ez|t(yAf
z?T)U%``?B3e}2!Lv0%MgMaJKVFS#d5wKgT4sL1>~eFdM7_|sW>OmF?C-Q!euTHacy
zBxSbz=-MrZg^KUX_1*}Z>)v}RbjCKRfV5TL_NRva`5rj?zjtk8p4IE@d4?Qs1*e}*
zD}DL<Gs}l>*>S727p%P&kXmHCb?Ux`L&Z6Fb|_z5*7@@M(Sj6%MMtgNT-Ub0pXw*7
z5xS_9k>})=>t$63l$;U_WSSNWUY_l$cCq%Mp5@}uiN2!8syVMt`KPX~zImd=!_Zl8
zjq0V&%=t3!{BqW(dD~~-4LHcc>1wlzyNT}(uaKZ3N3d&P`!4rBQ!X#fqiX_G?%Y)2
z(^%#b!YQM)hxKsR)TDQe3AeZ9YKNVw-97UlzkS~<&jLpMMc>!Hss5?-zR+gQ!sK<*
zQw@c;UonYXp^}&}ce)tEyS!E3s~KN^tO#Fke)C>wy$nB_ZSU5bLhBMLB7`;>PtjFc
zP-F1#=X~Y58}DXk`rioHYBwi%>!EdfPDaFZPRLk!wn5dhU9;Zz6@RGz<l|yZ1yaiT
zpPJU4>Rq5MdG+w-F8+cuCbONlo&I?Hut89W*pj~W%Wi(vS;N|PeZ{=puWRqi_uIxC
z+GGFi_hDA0L&x{Yf8CLL`}W_-0`L3v_wDYO*>%Wda=hsG-*=V2C7(aJJj!sIn53v+
zivQjRbEdpHGgnb;^79Fu+u2>8DVBJKc$BI;UOe&g!if_<Pjwn<OD~C26=SJfp6>l*
zg~BYkXh!v`{^}O`atFgDeyd)O5TBU0p4n)VxK-4gvMF1ec@L`Z`NYn5C9QY1@soXq
zLJYh7{MbeId0M-f%HLfgDWAch=a_P~`cUMgkIjc>doOvJ`s7&1yFE4h`nugZaknm?
z%MM#~dPVVyX*(TmZ<=}W_3WE17hl|&By;N6<)xFCUET4_Y43`U)(*xErCXdP+Pu7a
z*!pRf>*ju?{FnuM3$AZwR$^zn%YR{tckD-b$I}N(ikNmPsVwVCzId>F=^Z03sirUL
zQ!U~cUE<CuSKf8}_=Nev8$tgkm(NMhxDXWCb1-Hb3m4~>3K7pj%URkY&a+y1wl7uw
zv>`BSMaJqoHj8HZHrGDd-BbH}uO^3~ai+x$yZPTc)X$0h5SP^3`DX5?7tK0LPMzkx
zIahH`;J%;iB{^XRTdUqq-MZE~MWR$AEK06=x9F{<W(Czu9+4)we;;&v{ync5Z0+I}
z7pVW{d6uO+8^;TyEspQM8?J5qv`Re6bED1C)eC*ue?41xaog>u{og)h?@zilY1)O)
zyrlu{Y5519KH;irVYhYr^@9D4<e$@14*uA!@$TdI3uX_LFGR2VGA)w#$E&~2`@?qc
zIKTAkQ(eA;R;i33u~u6TCRzSub`)G+`O<l@LYtazwUeCB)m^FmJ>QarLY7!GBrR4<
zoUp6#wEQHUmJCP3t07wxC&syO+%fJfd$;Uja;xjqup@c9?tWP|@zu8!ZjZGSi<7id
zy$xzyF3H_IlF@SD%!TQH4{|IwJ1u`jXZD=Ea#~5Q`QM|B((l>bT|2|!ZQ1QEvBgu_
zbl1j(rW*hLvhv=xvkme4HdGz3J`ms(tt%n8@w&tr$+S6nnhymRU45!|`)wI}NPqj~
z{7YNv*OYvlqSse3f1iMCxydSr{@VUS{^h&#7wixX)qT-$=a}zX_BjRuDQn|wZEo`^
zZ*8<lVNsu#6dFFS(E4QRy2uONLK7w(TJ|Aob&L7}-6^L6r)@p6+ULwkm1Y)spC>AN
zQx+covigbC{Px{Wr^}jWb?mS%57fQAjZsypFL`;v?z`LH9{X_oae(ONx0j{AJ>Do^
ztr-;(Y{;6S`Lt{|>)x>Nt+&hS(?h>Fup7Jx-pse=<l5IcW{aLO@-q}I@!iR5FF5nu
zox&gCCx3?XPyHv@q_jxrTnR&~zL<Nw$1)|aE!=-!EtR?=9K$xDc)@8!+apGM-!SgY
znak#)Tyyt>zVV;U2gJi`zfIb<`fa*|(5}*Q``B%_<c@rPt*@tauJy;vH<s)bH+9Xo
zbGWT}FjM(T{Xd>6Yt481^IC(aR@bdre0}|;+cTEW>wWz#Vb-MSOe{igDp+<t*=AFJ
zaL)bB+k1_&4!FKP^ISgF;E?*EN$2huFaIi#<dP9~|0m1AS67aGI9UGnT#N^!_af~@
zi(8^5TTL<2tXuFz(IGlcKP=GqnajCX{kk!=AwM@;MoAgh?9F?+Q+&sh6uZMSKb&$l
zO#M({_~>|vm(ur(FFVg(39InCcp|Ok%(COI7kk_f1<dM@l6bk%KfG3I%|h>3>$UO;
zr&;n{&dOZ<=^-^|eY?Er<3AY{`%RYCs^6ApyLjqR!~=%2Pw(i@Hug#Tcy&n#pYmhA
z<i5Q<Tcn@F{@!}c_P~LvDW$c2Uq0DRTeN`x<;`P(hF`U9<@O5iynbPIp1*bE`Y<8E
z<H^57m<&y)z5d#C@$mcymbPoRJ(YGa-fXN{ta;IKv*>o=RnEK$k~5}W^srSscKz)u
zc5^1XSHJ(;YL<L3|1I^yA@bz?56oXwPIsz5XRj}KBGlKMWBJRu>DGF_Ekd<r2V}KO
zOL}4^un1h-r!4Qq%duH1%i^H~!@Z+3j!wLG)<vpZRKUeWOY>lIYxTFQ0TU7`E~yG1
zu$1Etn8DFCW%u28W%sviD3jgXG9fN|?Yf{B(Gn)_*}QCn7V#QuFU`5_d)7x+Xy(lp
zMZqP3Hk*<khWT7wlft&5Q&c?lz=~783L)Z`JLdW^mF>=Vy}FGfpy0~m+WXn1p82c2
zmR;E4{>I&J*%h9wB%yU%tGL4NeZTwW>^AfM4rPV3ur0fiR;0GQ*b#lo@3*&+T-4v+
zx0lb~xBq^(qwCpU$rUGg&i_BTwvB!6*A-i>d}h1qWV{i3?&Ej%?>|l9<cl+wUsJD=
z(`66Qx}&|i`bwOCAhVu`*;l{M`V*Hd)7p1dd-3Cq%oF!TyBQlB_U)BB9+<-%**`1%
z{yzDZS-Go@`ZNb+i-vM6%sOhMc;if7)nUh~wKgsh+jL)-8s9#p`$tfq<LSNRFFJ-2
z$DAhow3!;oxP517+|<Vx%|m2#t!-y{TuWxUa_;@hrDY9!c=}k_lDwT)oZ8dIE?l_g
zfXX6UiKk1wTI9b7J$ZWHG{8Hq`Z3p=P3&*On0+>Vv()gI)}R?#UwdR9&#B8_KQ<XE
z$;q|NTYg!h=&;xA*pi6!uA->(`i7qxIVVM%+A(X+J;gM&qr|W`((q%}1@6kuKgTpx
zI=BUur4!GsZvCA9i8=Yvl<R7<oi%NpPqpQ#Smd8o`+A^bRqUxpuZ=TLGFz}p%`I4(
zvuESH%~kI@PJHd$l;C=uY5!b}uFzFYKI|ec+8-1hFSo9#_nLk5&Aj&Zgm>SU?cZtM
zP~r6S!_%(xy1&7CGGT%LtETRGb9AFufu8QM8QldZ1N<0{)$}{<Dr;qUIcvA{lNYS_
zoQhc{O|){!;9!|0KPPFSep#V#h{CKTuDlbZq-G?B1%D}VY(7zPea^CpF)Bfe3*=Pp
zCr+1LxJ6xcPt3c?Pgl9<mEE$bUcltKg8M~0@8$M8w>9T#b%cGG;`ZgG$((!FOoXre
zIL=p@aM=BL$i{OQvOe&9n46{(ceJS6p6hvW!)Yh6<)xX^AE>6Ox?Wz@yEbFyM6Rsg
zyt!oyeAnGSe&B$XL@WDhcV8Ea{R=m$&%3_k|HIaA`|oD#{xS2%Ril54tK@%NJ<YI>
z>yhm11CGik)#BVgJSo*aep&L~gZQ^QnbSV4%RK2U$z&K2Jlo>h&#PC~syeHScY6sv
zRC4P!5cO?)?XfrS$F-S1t}7l=2nk7)-gb7|?z>&Nf^kc&7C(<@yCA%QbA{O6k7Dya
zEe+7!{`+m&{=0ekvvZX!`W9E&N*SGcy>EBrHl?H6qIZgF`_ARK`CV#};;Tnx9pU*8
zk6pdHxFgczV(#w8m%saU>c5ZAI=3n@jmxzn_w=TYD1MF{?KH+u9}3P_NWJ;Ja8=ls
z6@_o!#VH@zGK1@ZoayA^&lf9re=THCd^r1ueEpv<?eFdHcHPN4+GV==>J0rQ(>H5Q
z=S*8rP_aQ`!K|!B8*R?4O!QeC^E|7s=Ipo5>d#%<n9ts=zI5rpJGI>gUw89du$`lP
zFqCym#wy{G2RYMzumqiO?f4P6T7Ih)2j@-s-~Vp7`h*^S{IO!i)ZLLgr*0Ary;5M%
zHEUL9XA0N)Q-vq~{af)>gEw~R$uO;@Z;lJ*e#rP4BD+4Z=a}_{=v}YHoLA^(I|T{{
zeV@ZvR3N(Si_F=kk20J0-aGgC4TIWy7UeBlZ>orRq}nZVb<FP8X7QVpQ8`byw)lGI
zRQ79;_xDaep!9T}6o1^jjJs2Bx-Q-{hbi~woiCl-3G=!)6c$-Vzc9C`oV2(vmVIYO
z`W*TB%Yx5)+?y~*X|to{&J6X9$I45UFXxoE#g|4-u`v%>zrmPStYxCFwBoYWJCdDR
z6l#)+Y+SkcrT)#*DBZ=eqbT#_!?{N<^LYAf<y<t+tSa{5zD4hDuh)%VQ2YADEEz^t
zDaVc#x*Jcp1mx6iRlmqPEiGu)=UvGS$B!0>mF?fRuF~=FC#&lH2MQ`zU5>ls6;p4_
zxh>?<WT(^*Hlk800%{puO9j;gHSeF?t7xLm;<ZV{TXyf#X_sc*mg1dqD^$mFrT4a!
zlG5c3iAyrn^X6D)%C8jEdw=5OQm=_u<E_tZsTDnA`swErBe@fIgB#wo*g9G2{<tL2
z+<D1yaaT!`e$`#;SJjT0(U17r-{hW<c+cDWm!aw#Z{YduQWKrMuQ{A@OMZIw(Wje}
zKXE-gB`9axw_R=7IsGM`?*nFZF|uhK33)SX?WL0IyL`g+Z$8TPzIW2n=#h@yubBAN
zAAbJ7{%t*rxqtsM{`*^>A34@&{`>T&CkEAqW&Kg*+m2l4t;v?QKU>PB%~rC${8a6M
zxZ54eo~Zp-=C!y~qkeX(Q($k&?Do}N3XEH>aXefZ(yzB+iD^%l)`po=XWz}cU3>eU
z&y~Xw3hx|mw%o~?ej-3Vupv$D_FFy;<*v+|W}7FK-HEdgVAN%GsMxSfU#ycS)HHEQ
z($-(^jz1K+IOUD}@4&w&wqI9pKXp@e`M+q**^d&r#6oqWbR@3!uh3b#q9f>J$167P
zlkH5p(@p;T{oC!~5h7@K*C9NZE9>myCmhd&6;5QUX}GBT`JDFu=Xd+RAMQR+ntfB(
zlwY%dlU;RAUs&6>$?Kgz7k$3*`Y6Z6c{5BL#F-~=kdkKq5+U($?T+I9sEh1f7Dr7|
zE|p(Uwq4_E+{hX>r!aW$qmCH{zUQ~(3g+*5{NvL<zE^)Qm(Lbm`lg?qC2Q69Ag_}>
zoA-vXZp&>J6LcxPnpF0tzvBDhH9X~OR?WP2c(Q$><?{X=XN(2pvJT&2)zf-q`R(zR
z+-HmRY&Dgn&MElyALma_IKOLJu={-5LS^SCqJc+q-BLQD*mat6G^K6MzmK2y%4X?-
z0=ub}TYg6R87SRwGP|U9=;8csVXLH;rMy49-4>{Q(%oG;w|P(Kk9qG4Or}g+^Jy<%
z)LDggojkW~eL8ucgvC-N4<yuDoZ}aiR5SV6k-OmDwRorOPZK8c85mEsy3O{WTP>H@
zOzuhF%6;>*o;jYJz0)MO&BA!|G)uFp{0(K?7f-eEeZCUXvf;ab#mZwRGz+$L?_jNI
zSR|)7Ia+ISr~Qq)ImsO#P8`|Q`EEYf^mkQr_kEdGu%wHp`OIe<7ZbJ=&9w?QWn<QC
zi8Ov`;&MLn%dU*dyiFUPy<gYOweQNc6F1NOQQ2sxx!$0LBfXn@)|Vy`S8dhbhfRuj
z?r&bPH&_3XGVe+^?_S?mT#j8cb_Csi5Pq5Cy7TQN^Lac1=1%b9Uige%^vjA^KKaix
z`-)AAZeHz?4V=DwySc4VfL+?E&{Huk^E6x@ER}j^aw6^aix{3Ym!zMZVO4H*-KCrL
zr)?`=T4whziCMoSPW|e5;_fEzVHfH+gT435p(W}w3O_a$>{CxZ-{N}AWbdAcaO?D!
zM`x^lx#E-J%FWmM-Z(3#ZFyA3c4e-bLG0X#UuRrDGx3YW>VwngzFYrssm^utfAeIw
z)eC?9dwQkB{;zyBB31vwf5`o6beWUIS1EQZuKZs*-?ayfE#(4^pRR0;jbk|0rW4wt
zbk1ts;&<JN)6bnW@ZENN=4QtIP0y@eR$MA$ciMbAtU&vA?d^AIvjdOL5t;GS`*vP*
zrHz~SRKJFz-BWM#>IS+iEwcNzO0??(!;+k^oaPmm_o{V;tPT8X-Q=ROr->=l&9?9H
z>EG2mypsyg1ikzJui{Prg@W%ItLEQ(Cpg=I^JclfRrRtJN=KY;nF}!_NlKn}EB4=d
zaBa@_?Z4ytqq6;tS_1Cu?Y3tL&t!P`@wolJfByUb-_6xdHC|D8$-YJD9be#dJ)JXE
zmXpdBOlf(s*zjP?l!l`ZCi`+e(rLT&Qu1U)Zu6hMrzZ=vX9^$8%ulk|JL%pfPM2F6
zT1&Y7qt{>9$v5#}!NHD>#Z`PE_wL{S{@3E$-R}a2?CRs+xh{P7@!!7o6MyUOZ(Ezg
zy6RnF#f+z(Syuu!=W4$%U4Pt>Q{FP>ujj>zHNu&9gYNj3hwAxnI^4yws^^wd^xK1Z
z%%WS=A~r1e{v>t719rW&-3FVgR~D_dZ7VtZZeF-U+1{=r8TvmwdmcO$|IB)3scXE+
ziwV3!m;Y|_TfHMl!@cTsrogSX+nI~5X(kFS?^IJ1>}Fnkcuvp*k&SEY+_o{aM|rp{
za+*Ecy@Wl%ZbtL@$+y?7-IZjjezq#1Q~9CZ%Ik@XSlTu|S2^R@yyLUZl{qu{)RP~*
znPO}#oGo(QWk*TtWyx13UP{RYRLyhyxmZ%bV+*Hk_0#fsW-C)7EB}crFVX#dG-ItF
zkNeArRXq7ugLe4$md;~YIH}4e;Kf4S#Zp2y)i(us&D*V;%C_2N-uf{9RgY{n>UtMP
zE2ZdXcR!8XQF2Ztp6A)|^ct}{J;4kIHq7K%cYdaKLGFTf;U$ZIaf+Uu`(p*;;mh@k
z6JFfism$OzZ@r-2Ec4Z-Q+KQUvstQVTe`Y6&e_%1BjWRU1rgh2Q*#w>*3Hqrk$p0L
z^49%cj@9!sTE4K{%xjkun!EXYLFCMpuMVA>s(R4){>eGFzB|QlS-Wxh?8p*99{<0!
zX&#r_#kTWq*ZcgNV_sAL(<eO77JT{U$)|3V@%M*+Fy}mv!_)uOeZ19gx<M&Z)AC~c
zvtPUmjlym(W?*O(Km0xNL}klv|HsDq*~?xtJi0#p&7blmC;!iW@hd(ur|z}&q4?{O
zd$xTvQ9PEq-k5u{y6^kdrOBr?`O8&uf?Pvq&sP_{6{f|^67??HxV2nkwa&3^^N!kf
zF{iq-O-Rww`^@Avzf<YTHs44EHbb?oiv^Ng880pn-~381Q}^=<Q<sFJ3AGodSih6|
zurwg}wpZiI*08G_QJVSRx98qwP`z56$#(L1|EiWtp(~e)7+?NWw*UUNv#;g$bzcZM
z^_Oig`{BFy<@tAcyByme{Yaj5X@q7gbA-EA0+;Y<->jXvQC>3cX{nu#iLQ*-qq28=
zU$gt^%BP=}-LC)H{{PEm{{6SN>BcVS6`RJHF1JW?s@J_D`BqLX$#)9eSDlqyBm)He
zwtwN*nI1l8Su*E=my7C7zPUTcPg#2{ivjNj6-`0YmbDjOs5yBtAKJDe&NNgb#P+dO
zjO)P#)h*jMp5FI<YnYosO?|z{?wOKjTwZ>y*z;XNch=M(FK6}2Hhzw^Ir;Nhn!bOJ
zfA#Ka_M*E|p|frTaPb6mO8z>nwl^Xs{PpCznIe45i`>Kh^Rzp>bY9rZdG$!}`EMU{
z`CcpydvP@C^jbcFsYx#u3JD2HbnU%7rBISx$C-Uw==+vc4|z68E_yNf);!B?`5S|i
z@9ni|x~0#{by{R@x<kHKdC0U2Ixi*XU$XeFygo?G)xE?0h|iWtbs5=BCld=FHB{@J
zWaa&9r@ArcR5cs(!VTM;vbTL;ta~!~*Ye{w{u%0elP1h7zVd#>#ABwi+hpdqd`tYn
z^&zD^c<sYQw-)<`d1xz0)?Pca^2nC(`A>gHoJ&8M+~h5N{f~H0-|q!{{;K{g^X9!&
zO>YdkouVl0b#yY<6^8#E)2?31zis&CymG>tFoWW`J9$>`4w5PkH%>mFH|eISUF8y;
z{Sn&|R=Sj)-B<f-x3}JxO0A+zS2vhST`bt3TND@lihC-T!kl?WYn8M%iRT4wDG^9u
zP)a(q;Bbi2;S`Rq%Pzlrf9Q&l>Dg>M*5Z}RUDj-4l?<NQeQL*k(ZaTtwmEB*1-%ze
zocBYm;pYyvSL_k@s}j9VTFa`5e%s^celx_{S#W#I8P#2v^{kZCn=Z*#_}1TF)#S&>
z9G4;VTV0^}{$`)Jikl6NVoSu9FDkz0xj6WwytV4~lSd7GbNs?seOmac*k_wc{NnO3
z+#UACU)=UB$Jy8mi$ty`97@&v?Pc}D-udLeX!alSaXafz%Kv#)@ZT!OI{Ul7^3Ug<
z?<a9-`kSBr{MNYq<XX#qHOHJOE?-Y1vvhyqHu~PX>6AcLi^7)b#G?OJK?2E@OO~gw
zdx<yY1==bkYMHiYDDfGGvTT#SviF+u@2ju;CQ2>TTAFk8)ZfXPk$1Cg9agMTnm&ak
zVd;#HRa0}dZ~GeMH0(_0;p{Iv=CZ6Nch%A<Nw;&gqYYg<m#}xL*0nFW{%UI5;VVZY
zzt1|wBiOhwN6TeVHVf0{ppa1SjWxn@zc;@BUH4|KOIwGBVe^8niqe+`hpw$Nto!xv
z`2Ro0?d!gmg||i>b13Yb5L4^&_~6k->lJ?7c_IS28RyyZcdXbV9W~|dUduWz^}|b-
z{@a@R_J?qHu+fPH>vv8$q_%f{F>l)vW}l;z+*U2TD=n+=x4=HeNrXe=WbOT}5^EfM
zuSz_8cRaATEU;^)=&DVp>fWr{H1pQmyk(NYZy#P<?_j#wpm}CgwaFu|gL95NP0ib4
z^yStc3uWP1KZFI9Wq#PS{BN7{J~{Wzk`pB^u_oHdv#qr@#b&OQuDW?6lRZAJkMF6|
z`$G~wiHsZ9JHD)Svbhptym{{F8~e_Gb)UtXw(Y>5=AD9>r`FE+BF!Mgn6`tbdg7J3
z^Bex1Jkk5U_|@y8UB|WK?>*jJoA=1Lg3B!1WwOHCSr5)#ZV=nduy>Y*o+@K_qldfG
zwndSOlY3X5?XR+%Tf9ifch~$iH&@H;FSoh-al#L#$WLcK{dQYlJnw`;_pb!OM!kqT
z&y)`Gd}m#kzriPF;iAwFXE)7REo~HIwA99F!AjkjSwS-yZ)N-5d$yw~pe<SO;QkM@
zY;w1RFMC${w5U$uQF8H_pf9%UDZOTO_crb2OnCor&(a|GlzZ|fmsTIx_}=?+h}->b
zd{U2EESWR9BfQwW`z~0n2?^}b$~d&FojWYZ<l+xyv!6%%>dQjanQc2&8MZY>`}N4K
z+qPdk!L`X*xL{Ia$!eWVhu>D3b@?r9UgR&m+<oW&9Y<}aHP-%{`~1HD0>2Yd%GImh
z&sH~>n{n~m!UY)#hHuT4?9$g<Ez<w|r+lvDX-D44%i^*X>n&GafA+U&WyZX_xBRkM
zpQbFHd(@UeYS)9Qj}E@N9CN_fI(uLIVXH5^fnM(we|(Sm`ajpi>BoXy|DW;5@Bey#
zZ-<2)H`n~z{`-=q?hAUkCqS}RB5$I2%kiG27M>?N!gsv(DA}5O*Z5FcU*@8)%_2UR
zFV9$YuHfUH2fJHmh4w#QaneNRDAy5&6`WZznkL>x;T_t7Y1=p56&3pJyhm$6SkaW)
z5cXqxy!h5XTbDJ(qVMm<{lPIRtiKBQ*YECFv0~-LKNft7zDKUy;$1SSW=_z%RkO1$
z+W-GK{rh+2K8_h`_tjUYzuzQgnWf9t)oH9K^n2g?-+|dvA4aNq-xLYBYGAa8->LOr
z@2BPVe;)SR{|)Kd+V8P-_7@Gsc}(A3H7`E;p%Bpf;9=upug=+DFKPX9<o9djNIls5
z_1C_I7nQzR>dtciRkDA6qwlmivfitzgt|BrFYFTBW6!|Ms3CY!|A@{F?Yqr!^2a~k
zcx-V<*m9*{p-lTLKSxDH)_1BtD+D*nFWl-C9G1P-y(Rx>*Uh(e_I-yX1p^mzMLkP?
zF09Y$AZ(@MednU&_HSSIoL2MXP4g-EwX5Zd>i5Dso<E*BG<=f!a5_wbg;_Jeym7hI
zy@!jgS?4WOsPZ~hv|pd`#_7d<7W{l-<#nsg4URku=051RIdlF`tIJbvO7vt+?3KMI
zytRF)w9^qOTk9Wlw3QVke07vwx74lFyRcCrv-a4ju7{e*-qQrWf7>{rL*T07a>cff
z+cWz*+x|!xu2uiU@^fS2Wd8~CgYRAuxhA+!!qRhP{Ppa5cb%-Rl@}vJ4?VbfkkQ4X
zsL^8;U$RTV#cv<qxS!jk6?J^xL+!tto?EWoT*+==##TFP=8bP`Wv}I`SX?KYEns;l
z7;x6IJFVy?&zz^MI(k!Dtu83d_FJ;lb;mZodB^X3+bOxjj{C7x$?t{J@|2<*S)FS4
z?)TFwS(Tx1`_KvVP{o-IM<2fRX;I^x&LAk*up=#;dEpL^XLnY5@C&`z))9J}U4i#(
zRx8s~t$D_KBJOTp9xnAMC;sKk^w<Qkv+tbd<*)m^<dLNAs>f}+O#&9DA8E>Y*LG{^
zg`>vBGOBm>-p(xEDgSdv$Fh^1qTZ$VyiDRU@4c9He-cM{fQpOr5m%<W&AZ*I&L&6d
zojUzzRfL<IWplWo=tjT4CIPC}a%OL8_i<kEyfc^Sgdb<F$Da=|2V7gKUEW=tZ}R2(
zjc4MmkGq!(epnwD{r~IKANSwQewgiC@n_o2KdZk>zRt<L#%|#s^OP$_Q1D=WpYCDH
za}|jlwr@EqrsXH;Jv=nGXThq*OHUhqT7?Lhn6m7>X3?Z1#kERv;nxe!3*_o#*j=|5
z?asZOR;GP4q-sej&!aepzEhhzGa9>2*oD4#$ci<p@p4g4s^7EvLD_EOSIPf3z3<P}
zKH62iwsig14^<rtW=vF-X?L&Kn>$<6qmaL&Q(<#ve*XMGGehCae=XcjF3AcwyEyGh
zZ3xGU;}`F3s<PPYWx)_ULF%fijqywkt9iGVxL<j1xBu^S`@av{-`lTt-0Hx);f}w{
zg{y+Qw6;E5bI7M#an<pS^6xdIx1F7}>BhrkJtN7&-byjuo}6>Nx0U4;J1@^%x8z^J
zDHq-I|9QXHlr2oH_f=irHf!sNUv`HTHIE1O9}m3yI`DPfj_OmThg3~`76z10uU+24
z$!Hl8GCMydBr0oJ;OmXM<+pEQtlE8laaF|uTjq|Jd|jsd41_)vTdh_%+v6D<_2KuN
zW6mry&(A)8Y=*|%QkSOm)Mu`NJMTFu|9f}q{pQv`&(5qbRdMz4P!L*h(RNX+eWQ2&
zJyof-O0T7wW8z%?3vT(Z;(0LNG@Z@6WcqUjzj;2Xrwe)+GwweVKfBO4`q*2`-ldXm
zMt>LNx7`p|&V6Lha^1LBPQ10WI_iAOwT))-AC@ad#Hr8h?VA;HLN%=VkM+qD>z6H$
zoHil+=<CgYCN7WiXsfP`OVC=LdOYNDawQv&MAG{?k5bq>zIGb?SmpHa%p|V9{*H9b
zX*~TOXK7!ktywwC<M=<NhGQJJul-X?*1RuuXzBSTwKw)vcLXP%F}Ui}__S)TzCo3x
zVZ<l9aL#!rRy_V1|6OYNgqAWx{`yn1UjA|6u32?guRmR{kooPWSL@}y3moPb%>6EN
zVxn&C55HtdB^5SzrY?b~T|rT{8gc;~&0cL<=RQW|#+*48X(G8-$u(M0XHUYnbKJZ!
zW)lv&&O0I*peSSed57xHDyNH6B5(Q2=^Xws@y^xLK~KKvnlec+r>;}8@8ak#PtN?Z
z-{!KWv}???>bIx(<PIt*T~QDe2-18dr1;1qbW_2D<?6GzpDX1aJD$|L<5gv-nt_S0
zpYZdJsHFGRn;QH0mamW9dRiqq_WtkdoQDd|w@3M0yrK2}ad>y&A6C~t^$~{r|JCZd
z?q`L>=?W+=xA0lx#PRRx{g0Yg-iyu_?%takxXIKdyZDW(>In-YAMM36LPLs_MYX;p
zq+Sv55*2?JDr>t}Ny<|8;lsGv-G&i7Fa4W4iaA0<7dT!yadpZ}sgh5HR~M{GQ(f`S
z^iKcdg<5i=kCyGozr8K@tHU={wm{?H84G7@%ZOlD^-?YMtFmw#@9k}(Ti^csHnY&W
zFY2}6IkWrbs%N9NXir&bajxLntj6GrF?n7Nq4&GYH`^;Fr8%uEX`6LQzV6RM|N0+y
zw`J~=Yu&nlF(S{qF}NbmF56dhLecJ>3Vl&~A5~0mpDMjjEQGasneXx4K9?$%vz`)g
ze}45@ZJ3Dja(nj&j{O|vE9V9J=xz{8+~W49p+z;IljChe{KpS}H~tPxZ}N@)-7`VJ
z^O&l_L&X)%OV}3V%y#<y#MWg&*28}*`gw}(zcWt{`Myg&aIWC0tgL0X5|p0oIeM;@
zc{bygixS}r;#Z~Gq@6VhHcS8Vt6QRby1Cj%r=I(1>vGkZyIO*nSq>Y&u?jxyGevi6
z=FZmHld8o`Bj&vipOSa|N_p++6}#D3mlQilKQUbP+Q98z(*Nz5la-Gu`#6Yp+Lb&}
z`f91z>E`Bp`%X#YapA?uy_SnUtWliDq!jl!lf5#?t;l7`yu@#Mx|7=&U%ZH`^wK}z
z-Ff_ofdAPGleTP@+Ac1z<OFBV&W!);ifcLVetLY8AvsR1>#?a{&k6%o#kEVOx#p>H
z$IO2oCzQOVKk>prP2SWrA@$pJKWwIlAAEM?h489%68$_on-d!iOZipf6J4MCtgJr8
zt|(ME(f$0Jqz_%5vfi`5ap{TP_dN0Ck=v1zKiK9-25buZo_>=1f~wSvTif|!!cTqK
zY@Dmr;G%LNF12%RM)rmiNs>%$W>RNgvge(3cD8W}{=Ys*S)?TLP=4I1{sW!Sd5(AA
z9(nK9VKPl8`@o)k>;CjfKA)pIzi8t2M20ZU!z<R^>R-Q3<Z#mD0Kq`ldCsXBg+Z0`
z8CpMB<eq(YKxn#T>apqF9o`*69A{UEvj`j#cspy((lzh3j$BRUei`w2=0R=Al&f6x
z<swrXUat6ek)d_*zVMf_vrFFEiWPjcx^zB0*Guxm^U^c%QAZxdZ{zq^{!#o#{-;MD
z4(jWKG>4X$PL!&#En<k{emAe}yL{~&`H7R;w3pA#y(iyz&nS*XWNKGdLC}jO?G7UC
zyR(~bZ<Bb%p+2>wd~!kOsa$3c4-V%`ChNkC7IhpC%v=?sz5R(#q@$+At3ca5a`!$T
zbT`b}wlYF<dytuLaiJUY^q{=kYo`?LUg&w(bglV{x;ML*b1;{^VGnt|QU3U1TSnoP
z8n67=L|9+3+@1TdNlEEp;Tvns&c*(_V%3T{P5PAm*VynBZTkMQ?*ILNUyjeO`(v~4
z@!d0rgP&aB)IGCh{j8}$32v43^>5<q?D}@yx37)Q%U`By@}1o>Uf%WZgI&oi7r&_{
zHnTHb+Suv4d(ov$#mX<<Ii(BCjJOomKkK$@+989Oj-*#|kxnwD^>5yLt#D*MdP=YV
zm8jApj<)L`bo0`d<~T0Du(#{c3N8MQ18X8~O}~`0Mzx{#{_DaSS}aSa+|K=6b;UW`
zfo0llfs0zDJbuwvAFfuH<<`&hVQ;>aSr|FH%`DGZe&2}$vOI798{15Op8wf!xyA49
z1z8>$+OJiOH5ityatfLH=%W0TDcuq73bU4Y_JrmB&GeX{tL0wwdCQXP6PyDIO7&Lp
zJvl0AaOu}KAM;d`UM;sZIY-W%UwcohqeJ^Xo9)817je(DHk*rDetcEET-}3Dzd$U=
zQ}ODO@D>*nCW+Y-cdU1>ytZ-QCCSe+%S}$$WzTr=YvuQz`#g1C4tvTb3Hr}gI(EgM
zB|Q5@j{L@14wgaHmYMbs)<o=Zk>J((zqumGjKL;xf4DlE*mBP0>?sAF&q|xbgnbo^
z=IJSLtS~FR8hpPqwA|>{9x*0G-{blbQ`|Ru?q;znTyXAZR?g9Fu0LP=H1Br3demok
z$O4Jj`1<%|(-j%F-L%pUHBox&F2*}Wuy^t8`x`m}&RQH`{2?XC-n))b$>gSv_dC(5
z&#Pn$j)*+Xj(aZ`oc;M?#75DRmxKk>JIt=hNzJ&v^MTUO*}{1x-%Hh#zwydT&rLIV
zebH#oh7_HiI*TYVBTc2H;@?;Pn0xcUCmlH}^<}&<e&@F>sL(jns#%k}T1{wmpQFXe
zz&#@E%0m09+dIwHp4V92T6W*7Cgn)z^7Dx%S+TRO+*>0Va&Y5^(@R~yzPeoeU2EsG
z{iVPDO?CSIKB4Nr`RYR-*MEC7ZS9-wTOFsQ%KtV0r|}`X;qT*Tv-md7+vBv{VU<j-
zfQC{}mCS@Ex6M0e_ATKEpYgV&&0U78C4{a2aRGl<<cYGw53|-OskZM-OJj}J6tOC_
zsjIvHJ&IZ2@}dhGOb*=_bv)8nteX1v+c(eL=2_ExuiUY{UV4UUzpa&%gk?hE#9j7v
zKcCL`)DJt;b0{kN-2`Lah>)X>jxG~I*RBxDnA-7n+tZnMrcGTnb0v3wjM@_RjU4s=
z|MJ)WYPa8aH*faM4=ROcA`dP*GT(Wf*``dxtp}s_%6+Y<xye6WfB&BUpPs(GT)us~
zf9melQ+pH}W<3b8@@?`7`;dF<;^9YILv0o?NIAOs-frwSSmPCVanXV@7v7Ci_VHQ^
z2?h2aUwlngakhl&s_RkTk5AV>{_(~(bH-KS)~_~(YDe$PWpP%q(aN%u$}<n#tNJ0t
zrMPc$@tVgL43icvOVz#|#usM!YQ?_`tBWRPI~u%Npz^f1WYgPcr<-QTd3$^@_I}H`
zQ}@%XP1e#JdyF2X^siPrC}w|WojaRLsH)1O{nuF4m7fKLEpqz*^{AnbPSCVzSsN5G
z#2$S<V84F%VX?C-n{OOGcC0aAVtL?kou4sV^&hU8TrZ!U@Y1L!`SjJJs>vU|xXo+I
zsXlqT)Z3ZWG&Rv%DvxVH%j+*Gypy6_uj>2n7e1=L^{exf-RIu3^IM-*&DAdFk@+{1
zRX96blef-x$La^3tG4XPpSk<azF#}uR&en@E1I~zs`ux={@N))N{5$KpNrn@mduhp
z_rj|se(!dAEek3CQ9C_OmAi-Wl|nPyzF!gYvEnIh`*_j|bW9cJKb8$={pK0k&p#!s
zwp(cKJ00z+Qcvc6Q)*wx=BO8&F7*2x<g!~+Pp-qo)+J-vsV)Cv3;SwZ#n)zT=R3kN
z?XZOI`Wj`XRbo86EIbN}-lqA#XZKgQSmt$p%XL%MyvXM|8^ye@-<^IadGS4c4at}4
za}BTUdc7c`yx_D$=34Kym1{TY>=c@lR)6BqlX^#Xx%ra27mDRFNOH6Zh$Un!4}Rcd
zVG%z?=J21Li<feR9NG0?pZG5mlT8!7x{9M_H72<vB&soe(cItTW&fL3q4@L><yGI8
z+`GQipe|eX|M#`~|9{sLh`Jsrzi0oSZ`J=Erk8LoiraC2|GI+@*XMjaa5-4q_T(n^
zn1?(@412$|&l8wc)HH!1o40#K`|JrK>@D~Fnk^(Q70lo~8)!P?qR$!)mN45V7B-g)
z&W3FF-OkIT-Xf=DI%C?M=#95+`F&Sj?XT)^@G@MPmUj0abH_}sqD6a*Wz?cV4y=fK
z{ZLLRDbiI;SVF-@LT=TftfSiuX5}vO$hFjPUATQwK&#b;>&ln41HLSmumAIL_xli^
ztFu(y-8%1IJ7cqy&usS0fIkJ_t8C=@zkh%H_u<w_kNWS*|E{awzqfYN?b%b$b}@2Y
zm8`s=*DsM2^DVHZSlMajjzg-<lZ6=FW`8!nVCg)|o}q<{>1U*a>EnKtMb{o%eEX2K
zN>MPbziN+t-Orz==hy$-RNr5;JD<sR)#ty8tMb34%@*|tIK*6FZl6{9ebdxjZ+T|d
zs{t)**zfYUO*Xlj6y`Ja!eT{^_PLp>^rjs@Eb)|&?~BZOE9<#)U)Jw+n|1Ez^YuI|
z_2(AMWj!jXbLrXn+82CnEu1k&f<&i&6%{^rEc9zHV_4*ltYWi!@v$j^eT}ak96HCC
zX3W8(^fX2_dFK7gtV_k$-<@gcA`<&Wd7{q;&i(Aq&1F|>f11R;$BT6r!{3zd$-(<X
z#d~}^UgY!}a(szcAkzFs@AI+hop)p_E%ypUU0OTq^Q6`Ht-KBA?!J1tdXL7@W=+4&
z4Cgy-8y{&Xt>4Kwx5593gvG;|-DxovKO!G^x7q6-dc(3~z0r#}PxfnUW^KvVRkNI$
zpPO){WISqU@!~gh^=K+?3u(K)_0#1$C7L?wM@xNcBHl3V*i`UrPE6XPH#=GDf{Gu!
zl=sh*tlN=$zHF0foS@br2eqT#EE6uR6c7wN6EZuaOUFeZ_b|(x8~&QtGg~CfzOWuy
z@_Uu=q*<1M1)iTH_HS;F&JeiwTez|*>Dk}wy>C7>8uWTEx|8(q_@mqfOYZqT<C3yX
zPdcEc6t!;Z=KY&070+>Xw&tGQ$mX*!GWglY64U*~eYuO?Co}o%j?r*w_E?%6#c1U4
zO!Ikz`rYdx_sWa5av7UAYR=N|;cRuZIAUq_>~E1!LF|U-&92sq{=ay*wXFHp<=#Tu
z#q*{_)L%UC>;1w#|7L%Y*dH~ocvk!$*Zb`8E149gM@(X^ss5RJzG`;(d%hp9OMb6U
zVg7TheS=DvK11`Ig=gk>PF|&W_QSj6C;FM0>$gsqS5#V-7FK1YZOc0&>%x<}&5PcJ
zbh-zH6m8_?J@`-{J&HjiX_aS5_|Y&Qj`ejiUsss+{)k%a<g-w%(!KPan2kqUu4+W~
zUD3>~JtiC>>le9hIoc)M#$>w0OG83VZ;DjUhnk`(Yj1>YTU%jR`C!G@75&>c{XP7?
z?nlY~e$My_T9fCs&D6Z<mZZ`eryUR~<Q4q=MHlO{3fmW3<&N*WE1$Z1Wp0$$v7JsA
zrik(SmHG9)l<Ii#xiy5@+e&+ulJ}z0INMa?Tg=L)%O!VZEeVL>aI?*_?Mqmppk%Y^
z`rd2Jhm!5(woSi%JB-V<Z`Whls0}C6mSzNgJjlko{7QRu#LX?16PXNx+kQX1H{(z4
zM5bWvWz*iTJ$9#_zxLG*-NnJXXL{ONdgbZ-XcUUAeO~LVZ*x`k&#{R|(*<SJucTe9
zSYEg6?8TewlY9O8`5ObBj>NrTz7UiBJh5<gYw65lABn$<9!2O_i0rg+SR6R%*1U&D
zs#Ij>J~$!M!B<$hz2NfRmrcp<cnhKw)_6@)^-JeG?bP2Z>L<Trjq*N2rnjXvV*1kx
z^xnEkGRwR-^Go@bzFx@RZCc)q7uR{xUvB>M;g-VI%{CdTx2^`dO6Gff;a6(=J=3`2
zty5OIzn|ZOg!Q{hrZEb<{QEN4zw+QYAL)ln&dvAxc(ur8)2^_q34XtV&HI;m9z5`e
z`J~pIzaKo48oxe2C8#TAeBeQmX|C5KzV<mjzY1QO<iET6U8;(;PUqmG(D|kYZs|_f
zYKot^Ea1J{8FRq!;KvCE41{|BJUOGr?_{!gzH<oAF*gVM!Z`85z<Tq=_2x4fx9YZ7
zwd>^O%Up7P=;!iw;w2epp3CN{Pf8D(6dPTemd~S7@@$u$UFJ8h{ncFVIxa2@v(B4b
zedjS-^TFkbL2jGpTruvOdVt5u&h%{T<IOVCnin%;^0$?E^w!O0U3dAav3BF?6hHTH
z=G?p}lS8$?;~heEo?1EO^55)}&fYNhr+jT-YqNmG%d6StZ|181*wLe<x7Fs~rC0O*
zRBOEZuV#B}eT}_XChyO#ti#inw0=%W*!u3zvUjZio$j8EpWl4Id%~|p?iGR03U21F
zQ|Dw5j6W1u9Z|VST+r#1UiHoXUn}@lL}_l1-ndBVLE!iAk3XK?C@;`BRjNiN@ZR<(
z{3{;S-R_#{l@Ydd>i(Lao9xTH+6q&r2!t-=)LVT~dBLKC>W!~>>~H;ZPz>>Zm1b*m
zUPvG%Fxa>&J-w(xbmEcPZH~(r%%=oxJDYGQeqYDy_xt|8<G26!Ztavi0cZH+J2+A&
zEcMk@a=A3?fTHio_GJ!hr|hxocQ12EQB^*)H_!T1+P28d-;7VpwbPbL<UG{v>Zn-k
z|Jp&&&3c{Vx3g9X0_iC-j?4>9%mZgfW-c~%udwZV_3&q{#KhW}9NwC#+U>tL%0DgJ
zzDZN^%kjr^pQ!2FIK!nV*Vh%)Gv)j7Joa1Z+KVnQh_Hp&3l;O28gE~lvy8Q6*|f|^
z+qr4er#yTA&T~bt*mK{Kv!!`fxz!cB8b9O;i`dU%Id|aIy3)+^Q)={7n<sEg^jYJO
zel5HH(&Lt^uRPq&6)alWY~QG|_$zbFMiq6FUlPapS8E(MDYWol7T!D6XkNrTZ{5y~
z(bJ}1YV3ZP6FQAU|Lfj4t&($+zHTmG)L&Gpu|bGSnl06UAvz_pdD65UhnC+FI<shV
zy6cH4clG)2MO1J9cWwLc_}!cGjwS2v|9r)1E$0zI=ZTA+$MXB_mY?Jlx@E?yi04mM
zzb;FZ{<v!YEUvy*xi42lmxwNIyuG`5;cmkcw&~A2XT9W|Sp2uLf6o&A(^G!wa~1NW
zoYB0sApFK-rSp~_tr(tp%zgawrf20!zE?|CEFb&1SN5N7S<m_I_%yb^9X^wHep~S%
zrhQ9Nnbzss{Gw+JIPwq1onqlXdFbe))uHmq3z;3KJdzb&(hyLb;>#j9@$(|V0K?7O
zE5p{V&+GrZ_5VTPSg{xlxAmH5_X_Mhmyq9h>y2DiijvDgPyN@SQ_~{En*$9xf{phn
zU90|OH6z06$Ujr&SxR#<oKBau_RTZYPfX(wn06<(Bys+nf^}LKR5H!)=W>-A`n0Vw
z$P7w(d%^p}s|UyCTunZtcils@(BC4>IefmF#{sQLzjB4<tbg(Tqy2ij=aJ#f;f9;`
zzsvskc-zCswWss<75(8o{qMT((Ub1|@t0fnRC&#*RerovCg;wvIHNX}`Z^h<q@@nJ
z>TFtyHh(OX?p#^mce6SEiq|venTh-JzuykKawa6Z*5=#%`hQE`@f+XXmXxMss^n#5
zt{7#aq~Igi=din8zK6|3IB;UiTI*F#w^x~{9M#q>?#rncS=G??UMbKtq}8FppzhYG
z`d=@X->>~{8El}UyVLUTNrB0R*@^F@RjgQc*rsfCE-k&hh*ir#u6kL6L6$|<4mtKS
zA->(Ih5VZzsdigvtJ&t6F<fI2ud+KZJEX%yzFyB(dEs;A?Z#(gxXy~~chOxIZLvu1
zt3}_3hNs+~&PInbx7~f8zxC7Kj~i~k{rx-EcXhk9{BPyT-rK%;+h@-(ths;JE6}xc
z`h#ten#n9z&V}bQ1(}+P8n1YEs^(zz{CS!8xRy<xvMc$zs`1(<oUFB#_w-pVt-qga
z(|-1x@&ac0Hhx)e6P4Q%)1)S?u6er0EX}jF&a5~qb((H<cWeZ6?q;3KXEuq<Gdo%w
zc#-+0^O0T8Ubb-Xn+e|x@YSykR0~{p`$$J%^L{VGM^d^roIxA8s?KFa&bxd^Ws=L3
zHHBGS2c~jdQV^Ude7VtS&o`mx=k?ZIKJ-#5<J*nAat6K7<XQGR-dz>{a!+Sja>&$(
z#|6cfMp|9Q{L34S7)|_Z7B{XFQdrkEd3vaHoTp)1(OmJ67gn!wJk&}A>J~rHm~n}n
zQ%glk@LBfe^=gbU`BRs2+MoaR{ItXQFS`!q&a!!PDnEYO>EHyjU9mAK>rQ*wu9ddg
znH#;dU#)0%hGMSRyFLC%JoVRioN!YS-Z<yc;;XM(7}SDS)YiRQr}q5fi>66Ck9a-d
zuovVD$*R~q<=Esa95t!Zk9NM^eeRv#8b8a|zr#xc^UXY${B63J!?IBF>TOSt%LbSG
zIoK8#%ssBNT|qzp#0>Wh**cajDKZ5DZw1ZYyxXSAyiVUMTqBpCKks^B7>nhl4BK|0
zl_xW`YE-_ho4ZeU!uIJOl$jEa{tXCPufyGK!OHM%s~Yc@Dyz5)PxrfBvi+~~Pww>V
z`eJ5>E|IvaSO1)~SNXV=!SDoYnptn~dGkk0?fz|ue!PColX~y{$N7&>z8|r9&%Z>m
z2VW)~z9asiGX7du+XbW0Ts_~8jzED6LW(^dT#YSR6<>GdFO}$d^6|#^@A_B!g<Kt9
zqy(%;x@{I5vVQBej86*X-wVDgpGeuZK>ehf9J@+rsDj+p;5Cz-cXg~WzkT9TYtYjL
zyOlF%J)Hl3yZxUZhjX=c{d;-@baQWLZ1`3o;;U8ICvn$y{`{A_iaOW5VaeIN`<}_f
z|B^q#S!cCWwjR1I$Fyo+uH4pb(>NQlG*3>>DLSI?QgX(nMY+uZ!b>ZT?M!Q!+I?a3
zzXpW|m&0`CUHbjk<B34i@4_pujz5gQ_x<?dsGBT7tL^39Z{PlV`KQX;Cg)!rj?EGc
z)m=IxO7mUemB-ue8eKae@4j;B#i%x(u!r*klnoslB^M~!Dex;zpIF20yWn;4jm^ia
z3^uRfI#aCJl6|;fk$QaB?>~ML!j7M8f^!n}r-Z3DD+^>@$dNcRKP359Rk7EuYu+&v
zJufab>z$@Dcj85+bg?$Bw(XO+Riu5R@5wGx+~R1|GUww3o5#nhf34rLcgC86d6L0L
z<>nm|wn|%bZ=;0hb^EKoHU{##FOKDu$_q%}DB<fZ-PzwO_s{UmhM2;a;-c>k{5(JN
zVbjqmPv@#nQ`38BDYd|B0^8h+Ox%1a`}94Tljn!_rE=?YF|AQc%w%NWtMO9B`_Y<k
z$=|hgu72D2+~+^r?h>u7v%k_P(`52TCeP!(dWtRwr&#(wG<$l;B<FPNL`T6x(``SS
z22az{6|5|%^S?cBDr4*Nr7Y9l6^TvR(NVK2=#$R9`TV;@h2~G+V|qn+arUL%tL)E7
zADMH_q=tL1q@I*Sv5biGl%ikz=A4?V=b7!eBCA6{V8fZ*@7dpf2W{8&?+iTj`5i;H
z+vW$0{>z?S`0VJ6YdRfSNlX{aD$jUUcpq|(XI5Bzv3N<)wMXpU3yd^E4peiv^zWLq
z+-1wfq}Mm@8?imv%FeAXYp^>#bK|2g6SiNuy!~3{gklBBtCM~fZRp#3=E6NgmP1bW
z=iHE)xt=TFrEG*tNdFuqy|%~2PSW;K?hp0X-2bn~Fth$If8+gsuJO-fJ~kixV!qE#
zPwMy0Nw(3^AGGUTY8LFTk6irt@S#89aY9ebO+)^4+aFmS71{L2_@V9mYZEtYR9n+C
zYkT$~`3oE}t5iHb9~RMCa{cgM57W|Kt$96O+?}TatT_4k6@6NHzH@wk_3q%6qtRZ^
z79Bk`!RdWK^Aom=n_TQ$wmr=hUR6+a#raUzT^^yY-|hc>{J#HptfB9g6axp}tqUUk
ztwe(Myf@n|Z}zjw@i{~8lj_9*9byi?wj1sCYLra(ITaKdx>;uF>QKRyo$JC%ZlBtg
zyM3*Qw}9aT!JlC&E=Q$b%s3I^$g$^nisj~cryLr;uw8k)LU7{l)34ljTwnZ4u<nC}
zjWEwA!L9aXn>XFvzA3k%s)fTQqF>8FGs<gW^8J3EU%a1AKkjzgp46MDzi3tDEH2m1
z-MO=GEne8LY*O_psU2ZG$!o4!d`Rv(|I|!&PIo7(O}PJh{UW}HPalcLyjc}Huea3r
z0#o8j&D)u~yXF*Sx_PB0PD~eB{-Y|?g6B_a^$yqjmgghI^q=xwe{rsK@11RzYS-}1
zn=Y`kz(Uoi)#d(V6RszfU1x6AdW2tJZ+5}6Dr$>#w%R)ZyTYkowmesBRa}_l+c&>%
z*T1>RN)M{DBHEeaUOn7gS$?uDR_4s!Aj!(}e{J916%?O;Z&OnQ*N@OgqN~?Gcw+zd
z!>6Suo_(0&7yt4M=WEkfnrz~Qs%dd@EWUl0?`uYCrNyak))kx^5-7+&D}%#z<wc?O
zKkodn)6JVTZBF-t)%ClDPcG|DH%ZN|?NmIg+&{fv<x^Gnyrr8oYod4W4rSYz68+HA
z;EvXD%NyytkAL~1)|jk5!THfVJ~z&ZRvEt}<vRtZZGG{(_{#$I{!dmy&WCRw{1xdS
zIwADd<40>OcKukV-duZQ)BH&$ZEEt@`Ad2ZwoejlJ~{cO?8F>4g<B%YCv)DN`12<7
z^vvWdB8%4QpXq6`FulF{<=*-l7j@~CX99R4uV$R?H4#tGR*Ly;|Ca5r+V1m3^Ch-_
zw`z;`Q+p9{gu#F6)kkUa1+5%m-+8ukt+V8RJ?Y6;m9NpRe~vTX{vYr$tisad|AHO=
z3$C1de*5w4_jfj!`bu93&N~u+=jn&*SI+3WI`0o`{J&NH%lg*`*R}mf?PBoQeogmR
z%(sN|3H{$0A1o9UYL~NBS|ZAK;+w>jth5OZ$L`DhezE0Zo&Vae3*24a#$>K|^<b|%
z|Fe*jj61T@3N&whu>bts&2f^O^>>cCdwKs~^6&ruaJPTz=E4IeHR0iNc2rB>cKPo#
zC+x7z%Ef=%ldE`3ViwKf)6e8;3v%S&Xwb59XVkJMRYt#A9W=w%<jhu8)VObL+F9UL
zp%QMgGv4yrcaDh-E4miEH&RwQX}MMIpn)Sx_Se4_MY@yT@~@0C(^b98+J9X6{>$HQ
z%WiLzF_w4lt=YSDhGtjj`b)vBbI#ojR-E|p)TN_ykDgkav%Ta`(McD}!pqCA)hyFz
z{oP!C=1Bo#*8UaurpKJ-&*z?#@>G1TL-g-glWzFEl=FHyqwmW<wwTc7kdtZ4&-x`N
z#^{TDc@X6u=lL$VKvs5>g`u^?*6QjLeSeFqJG7_IStk5iW$uAY%jcp(`(OB^`PTnR
zJa%HguwD3sXFq;KF<%Q84mMll-ESPbXJv@NRFA1e+%sOvT7Q_sS`pRO@qy(_%<;XO
zKJcphPdp*MB4Z}o>U(8p=Xcee+3+`H<F~Ze(jCUHlFvQaDd8afZ+3+CsgLS$bAIgA
z?5b;gZTZx}>c$J68@a!_&Ta3s;hwcY_pHsu^DoXka;R^gCe!--=FKVHHiw<vf^^ky
zb3Xsg{rD3@oP9#U(rq@j2QQR|KaPr@nx}rzXj$E}CvI;O?!9@q_<7HoX&h{_2LlYN
zer#gSkzkyAx0PqB6#Igei>Lg`?qpG463`x?yz->cESHAQ67O!-*{;-EKIOsdHFZBv
zhCMbpXIbLelQBcJF{F_7;4*_)2HDD+lLb>>Z9U+5DQDZmHAj{oOLMU-ux!29>LbZ@
zDbin->q7F0V|JMnwz`B}|G&cS0!xR?9mBKJ)^2Uhc(lYU;n0iL#>7L_iq-KCnG|(*
zUds;qe(`+bJfo^Y%ksL^s~z_Z?moZ2y6?I9^<&pJzq@jOUG5Xn`G@yE=&mnK>07^!
z<HtgV|4aS;+i3r~<|Dg**WJhQVc$=iAOE-hKJUE!|03^PIJ~ituaD*E(a(`qA_bqb
zm&x6|wae$MPVqX0MXQ#12>Jfo-n^~2qCY9xf5pd+3!c7wY4fItWIWlpaMJTNP7@qu
z6;%I}+t>U*KEJN~zL&H7WWh6xxh5N3x71Gz)R>!k=4h`Bce_iFlKjP^E4`OVD7l?k
zwq)XtRja2inkUVi9~yhXdBugKsBO8sr=E2?)VM77+F@k{#lPvV{H53w{gyT_X?k~j
z<NL1$6I|Brv;S5lV>vG)@-dH_k!P#YO_7CREAEAE&lSG->fKdg^Sq@q92c@2{;bP%
z_;9l$f7|8a+X7CB7mKvbn-{LM7rJ#a^oUR2seQ8}rG=z)KX19e;qT;svrOJzm$m=#
z;NI~==V$Jp`*(TXsvrXw)20mxCc@L%tS49B<2)E@yf00{H|)%!FL95OQywJkYBUJ$
zV&OWrNjvYr(z0tuue|V8OPltd_s8|;Gt$=`o3krX>CW@3c3E2=>OMJjCbRYchk0&)
z{fi%RJX%p1k|9bilV#U0n#r&*-d625Q;YDc|1!pX%bR<6O{ZkBEIK;fuKRua1Pco$
ztKXL5uP?nkdg5#4^V6p7X4-v{Yj=5h&ktR;Je2V%YjB6U%H=6tPrfeNdx^7W(hhYi
z4W((7l6D-@86oa33)3fP?{%LxZ~e;iYRAiu+3CD$E6w@8=iReO%DZ|JmivhCvIw4j
z<?!plw(pN7+I-1yHD~MDRvx`v^Xr?Bi+%<O&7L;x_P^<?(pn0xAKs#G_K2OGvvghk
z1n!2k5NYR<g$JG=)6uhO&`$4<uQw~7{=vZP`!ogTr_<hPChj}uleEyo>!Ys8gIl?_
zn_7-cQaQb5P5S2Rg0~MYoE3Fw^Smu*P6x-QA4&7kIv>z=wz%AE>RcHfLE((Yv0Ysn
ztX!g6m-)}i`Ak?LpD{hMPcBcWm9=<E|I!~$ZMRGe*v?<B*1Khp@N~yt?^g4PpX8tD
z+mr^JSN-XJRqfnX^Z8ztbs8)Gm&OX%ugI@I`~H7K=jXGp{$;a%TQBnY&(!ei>(}+J
zTmOo6TZWURTf_Mk(ox@n7RbGwVBlTm8}%@Bd(tw6nM&W%Cb<iry0dNe)S&Om%$q|$
zT~`*`F@5X)j;Yc<TPzN5*5+9M`=8v#`#k?YT<*VLU$=SL#vHyijFJ-PSFY2_Ja*UB
zbyA_?#S=3Yujsb>GreTn!LH5~QN{^wF1-C2iT87_2sm05iO9#EUtKk2ZBBHuCfB>~
zzi%%$Z*@~%_r=g-)!f^gd3?T3X}|vXqlFR^+cl}WD~~z+pZeYHKmL#}qtl}=bzff#
zTZ{YJsZ-vT?Z5xtz4ZGo`Gs<~brV;v%GxTIv3YetRh+xK?H;+CBAOq=-kEF1*Tjd)
zEdN#4v+FsB<Z{c8oA&*$s;`pWP*cMC_*c5W-^x~r#zL9inej>Ur`YO>X?DFxx^~hi
zX37@vkW*P^d9BXcj~C8Vs{C5&%W*{OjKlN!^Ve)K{%9__a^s$jt3ApuJr~>e#Avbw
z?~mi}V^&{JmHwRc%t){)_wPlO2g&)9*X_T2<=3OWy{Fu#DO$}}+pe7yBeunM<Bdo2
zZqAtg(bXxS!_Ro`60<*Di>I%5UGjI=6;8&d;m<Tq7k5wJa5McZTlLDTmsZbCTek5-
z>?UQOU|u(S#>mfe^qGGdulry6`i$bX2<DFTUsrd%e6YPOkN3}H=Gdc23Xx2G7EkZr
z?LKsHV*9rWv#F_F`rZ$Y$&@9>8FgROasOF4(dfm7eJ}cZc%$1HazDE0$+9Iqm?fpV
z`TeFV2h1|2eV97$=<=W+cb=cLoLFgA^x4Sm`NwZJSihWK=HI5Yf;05g?bY)wYfJV?
g%>8V}-}R?{nRxdqqeAET3=9kmp00i_>zopr08GNTg8%>k

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/fishbrain_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/fishbrain_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..bc1bf6e1e719adb41b08c967d5adc2b7839d9453
GIT binary patch
literal 7764
zcmeAS@N?(olHy`uVBq!ia0y~yU{q&dVASAXV_;yA;xV7Xz`(#+;1OBOz`!jG!i)^F
z=14Fwuzt)8i71Ki^|4CM&(%vz$xlkvtH>>200A5Oih{)C?9>v4q}24xJX@vryZ0+8
zWTx0Eg`4^s_!c;)W@LI)6{QAO`Gq7`WhYyvDB0U7*i=|m<QC+nmLw`v<mTiRTUFR9
zfz7qbE4Bg&>nkaMm6T-LDmj8IREY2mP;kyKN>wn`Gt*5rG%-*xx70H<GcYkV)KM@p
zFf`XUFx59O(={-(GO(~RGE{&9B|8P1qLehNAQv~NT}3Hrwn`Z#B?VUc`sL;2dgaD?
z`9<mahL)C=`UXb&Mn<|tDQUXJm3bwJ6}oxF$}kgLQj3#|G7CyF^YauyCMG83mzLNn
zDM5{`$Sr^yn^z1CrsVuw{ffi_eM3D1ke9%IO-Y8UD=00>0b8Dute={bnwy$eQmk*N
zX9!nYkda@KU!0MT!)UlPiUk$91-`y;JFvPXHy3V2afxeL34YbZA(aKG`a!A1`K3k4
zsjg+Ic_qpqZx>iO7o{ea<QIkH=jXs8DgfqW1!rdk4d2A%%sh}6G?6r<x+IpQ+A0|t
z7+UBWSn3)YhZq`I8CqJITId>>TNxNYl=|kUWTsVOQ)*;vWo(I}G_@o#C9x!tpyHHd
ztF-*0+{6-FrOe#K^i=(VymTc6craQy=jY@X`R1pj+A0|%i+egd2NdO}W#**XDisI0
zJ1Ka&I-40<nwjYu=^28Z>+5UfnO9trn3tUD>0+x?kz1gbnVDjhlxSp@lA4;LYi5y}
zq-&C9Y^0lLm~5eIW|*34lx${hXp)i&)9;d>Tnb78ApMZskdmLAnPP=-o<X94af-2_
zxo)C`k*RL7sY#NqrBRZRu7OFKMVgs~p_!!_+%^71ndzB%i8;uof?So7nPO#-lw@L>
zW@xHwVxF9$Yhr0&u4|E+YNBglX_A^~nU-Q^m}CY@_TZFc<&jxjl3!E_%}Bwig-}*b
zrUE$eSS2QdGKH;DGAK2qrYJ#$GqHpRQ8q*MmuIBr;Y}qF+l%wlO3D+9QXSJ%^GXP&
zLVaA0FHS7O?{wm#65=LoPDe?#5M`t}9U3gj8Hsu6sVTNf`br?r=!424NTlea#}`-*
zsob(tumKANxwwIt;35`O+}bH1%JI~^6kDYtC40O5DHrn?7$i77T^vIyZoQq$Ib&{U
z<#FMZiM5&{tXuh)ZF9Y%w4h0Bq27t$jZT^(OFEWjY@M=bLF+2jkDOV7D?C=~{PH-d
z<1;PI|AgJi6lG4&w0C<q-#K|E<=f89Y2Rw!z5HIMXxKROWbD4DbKd{{JOBCj76BvU
zIq&EEw|)Ni-sgSBQ-X{c7)qKQ85quRGAJk)pNe3;7-?Lzxc}S?@sqPB3Tw_WT(6S%
z_g4DfRSXIbWa3_bJe+<16T?TQk1ae?_gX%6*Pl00dS>?D07E$e1|O$0-#9*U`Jc&j
zd>Lt6v}p59Nk)fL*4iI=KJr~Zqp4oD`)jWdj|D?YLJ;R|XBkuVrD^+iMtd2ZV3^SA
zsG6%U=zsFo95ItQd9?>yd2|>oGJ?2s^H1KG^F~(q7z=}*<P{a`6<5w^E>GcW=3say
zIpwKg=IqN4ADA1eF%(#`mDWtCsVd%dnYBWjok4EzQERoc301|DGOU`}80=);FEdj=
z`Ap@Y5W^n7D>cTGYqE;1YE)YfGBM-}zMpw*&JJyMhPt~)XJ4B0M9q+yg`qC7^1_p6
zDbsBe8yF6(UmW}8;>oj~SsM);7#>JZ+;j5m%Yua#Y#a=Ks<Oi^j-Feoa*lzqq5sJ8
zTb2{=voP#?X6?JRIH`c)K)8WA!+|*r3>XwBybaVr!LCp%cmMVoiv&N0s>080SyO$~
zuKpIAYA82DxIr$)#C};6n|rF@3hQlGjMP9vI;-D@xqm6ilumB4Wc+d7yxeR>^47Im
z=NBHF!|>;+|MajGjT<{3pUJ%N>+ea+9^Pjx4}_OTux;6%?BKg>by%2@+YI3bdmGW*
z(BPNR>&*>7-0ydGZ1eI}nH)UpvUX@}kMJ{=2fEM2kA&3z@0jH$de^x}6eRMQUoJ>}
zl1lQt?^#nm9-70j=i$EGuUesZx9po`cz6!Oo#%hMPlvvLQE}?omm~kgj`0>V6-+OG
zGG&Eq?q4n0wOMZx-x#Pfl+O&F6Yz3XZAQ;Vke<i8uCibG;S!X(DDqn11j8e97-XKm
z`LklxJK3<AujZ{S?NNTl^1x=2&&=L=+WF}+hUyIIJBz*7o(kK2i(SI|4CevMdz)AI
zl=Akc=K3i=7Jjwm4o7mAC1c8YPq9|r+EbdbyMHk}1`8xt&z?HDs-j#N%x%AtD0S8#
zH$m&^^?-#8#Y~GQMBP69y86oB+ppiUG1mRJNq^!RQu%#CQPiZa1D}u0IiSGRxAU(4
z)`{CxeqAa5X0r0M*ze0xFIy|xi@dF7=s#<*bnME>csIAmaC!;vt#G$>F@oQpr2o9M
zOzP>fyqgh+jgQYcAo8u8Z`qa&*}vVcY);L6)n*#{jWuR@eP6k*Mrf{c^14}<?@VGj
zFH_7EoMGW;dh+BILz`uBb{AIe+w*dPm975oKMQAytvCIqwSDWlmDAJvEE_fVY+V|(
zYtsJxJJv<lmPXIrwLoF>jC1?n<%Ptps7|?K<}}ax8Oto?{9|*j+;9oX&3PBh|28Y+
zee{$lhriCJwXa<G(elY_y8hY+H<SGqSij%ObHvxvey9Aq{EfUBo8Ie(d#Sv5w{+jO
zz4OxhEgRp}l(^n84qov}c3bf(yRWwxe`j^o{EwfqRdfB<nmx@BC3&%1KRK^$PA;np
zHJbV5PxbrPGEcyUmy2FqwZZdIQu>oXP;jrUEC^rBwRl~8@X^o5=QyONiEnSYG-uy$
zHc>7E^$Sv(@zQR(tKL5Qne4Z~ILJ+<XVRLAdG^m(q&9C`8QvN?T}LOCFTrnt_9PX_
zu)ARu6D%7n8`pU(ZQSr`WrlV!LorkAOZmARYb|sAz8JC_s9zA=d0i~6({Oj-bS?w+
z6O+8fr?uUhlhodJW#@XgncOpkUmSCvcHBABt2tS?Y-vmI4bw!w1?ux7!$qx<Ctc}`
z>^@!jZ-(%TGqTZ#ieH)=TC93^GwOKp@i_-r_I<5#e&Flrwdv2YP0n-U9lSNpaP~Mp
z(sSgm@J_UzXZnn#?c*B$P>sD@75@|D8S~F@_NXqms+eA<`rz+B9_D-3*V-+J>nUF`
z-zDnm_Hy%^hO5r6UmsS#Pift`Q|eC{ikZ%TSmZy4lkxrMxZYL&HS_0hOWE)zGWLIm
zZQA)M+gASGetmVeuJPe52I>Wkci%pX|79!m!A}2hidSXw%_Pa_Ws@fD-@T^rPvn}=
zzgOF&O8GqJ&JbSGZZc_4PT|a2;d;M9C*75=zikPhBi&WTyV5^R;={DX^QEni7oJ}8
zt%T=BY+Bx?z7}=i?}z3*`0nxXU;p}5A@TPwCdh|x+xBhsyorTO_r#LK?pH6ie64qU
zrPr-(-ZHN@zh1TTc)Mlm`?@(Y>i=rvH{Z=#xv1L1_s502lT#YXk343Zciih%;@?$^
zVr#2>uU_0&J45(cip}z8Zf~}`ea~GT{y$Y#a>>)}HCpYOhkw6YbcX3((avzGOf^%Z
zx62v>ckeuMl-tWtU2KPCjEQ}k`R<w$|H-q$@1|~lEjQO=m4O$}Q&oB9dwJO@HHYes
zU0<{2$Q%cG6}G~km$-IseCoe=ZB}V=f0mp?=!@@?_hz!~QT?(1*oI#(Zb$!qt8Dd*
zWuB5dcjl8H7uLCby?x5>cW>Xz{L^=MD(de~XxJ{T<@JU2+b!N_OBjVdOgYx;{d)J+
zg96uI`h{n2;;1;Sclvz8_RCQ>FC-tAG*mB`!>`}JdRAPI=GBJp;UM|w4Ji!wgHFv3
z-^pB=R>xAzl)kf|&PR0Hmxp$Gtb5*Hv3c-z$`bE$;@85H{T}EDSx*eV`J&?GjE8G?
zKRG+^<kpo2>b>2&ds|IZ51efHzW;6Rw*77qstcR6d3#+G{iKQ?-&z%*>Tf0a;hXj0
zX!GX%*AHwuJY(`R557K`%tK-y&Y2vJeiXQW-3yMC1J=>D;rH^cJWV=tl3@wogb+3k
z=L6At`-DD}`5X27to+q!DH(DsCC{^|;k(H0(*|sNjIVzBCHMQE>NA!;rQOYq46GHZ
za!R2>92M28Lwjw@x+~9c$~;{)b?T~p_y4r`a#oyPr+%3yKQ_5A_Hgp6r_6I-T|3nL
z%75CF+sBI>|3z~BI61#G^TQ(Nz6xVQ^#Uo)`HIu+eM7t*nC}(+50sLdGu!Em&p+v(
z`G5BrYwGSdP%l`TclrGq|A!k`_WbvI+))1bK!)2=pC|84enrGOt=n^F&qAqUrt%5b
zTprI-&p9RNQ`B#C_QSQEOFCzChe)1X=CpD?o2vU6pZ@DJgy#roPN?@deCz&AmOW43
z+bpX*5Z#yRCsIFo{ln*i{;Q-C{2oZjM+B`qwfiflP<r&GBP~yYJEbkZ{&|x9Q}b?L
z`HWx&?yH_tefBfo3)*OWxnX;r>7M1*d~;tXytn)rRk!cxWPY=`LMDdG8^qTCu3U3=
zdc$|_NR~ZEF9@-vR(N|gTt5FUb$)z-&5qTA#Z2EPo}PDD?x^9DNx1>c_kuk?&6rb{
zHi`Y|@+D%{&sgSt=)5|0ote{wgs+dQo25U@^P3d%^2_H*>tAlS+qO!mnCbnj%y)V6
z1vNsaYqu$+RV>#!`K#>lr0w@786BG;Tyf00SXpcRGVkt&@7x~~rkE5So^$TnfjKG~
z>z%Dj&S<8b5Y)LW_9T(JqWx=x`mU(vV{>N&bL>n_?tSHHoM_y`_}+l+NWVv(Y=Ykd
zZuyw7g{!LCgZV1v&q+M+`y<126V4>9gMyyM;b%B&9yc}3xOL%<_kr0eXSQc%XWKtx
z`KM&9v{Lu)wTA5>3>?)5tVLO0{yMdB(!KH-!Wjpdt)<?yoPEw&v3%)*m_KuK!aFP*
z%~G<w9C<62YhCtU=A-s<W`||tMi$!#bJsb}jL6B;@|;_BhO<Y7hw;5nU&XSiFX!rA
z6f0(W+*hZ6EHkQdQN`I1UcS7mD-Iqkne<6>hOk7liK@qiEk+_yVfTA28~=G6sLmDk
z)jqsL-d$*h@C(@w(|+A&*>iZgkA7LQ-vg^~oz)w-`xk!jeAHRLL!r&Gah}MwFB`p_
zS6#o&w&FQURng&DT45O-OTEPt&T!s4_KaosvJGBYwW+txaTha%Pkc6~LxkxJ=ayvu
z;|$s+M}EE8m34;m%j0in*(2vFh7~?v5cXNHxM^|eT&?Q1$c1la8K__2e)*;A&}!Av
zySL7Ip5c5`=9252yyn7$tdtt3vzMOm%06Q`{xM|BR8grU+l8KS!XWN(?^AF6{Y3mk
zRex>CSkP&C@b-i~Vj_N?zAL}$X<aVMOY&2RSv28QeoO{em!;#iIn$&SOBIexvRM4C
zSfrRsQ;5a&VMv_s^jVn)WRA{p>eyo@r^F%W1!@#5kJ|lAa)$7W6H;F9+JaPLn_tPW
z-(H^V_dw%&-rD4;Z2_rwonPIJH2HUA&VkS+U$UMX%zTh|dEYt5(uaR$6n+*h<`U%2
z;@=qLyexO^Urp1WN9G(b&(pR!;l(@in)&xt#-Ejsor^r9sdC&>?^~PqCT}}k^Q13(
zj?US!sEKKrx-zHorx^yzCwxB8#}=0Dp>o#${mm%x89TczStVXBx@c~7ZWV81R+ame
zsoIYkXYHR6>@YFx(Ipu{*7(&@)mMJ9Zai={@r>pIo8Vi=R`Kpw!gpP{dKQyl{GmBc
zhjgX0BX%-Q51F!~Dm8V%bHisM21_a=A|D*Q_W2{<;?L6!mJ29tKU==#>*gxvRc-$S
zcRM6RJTrbKq7b+0O#&->z{!-LzsIV!EsJwLILGOndHJOCA}&X(+ph9vuX3_nZn&JG
zV6SPVyZWRxPZ%;QYMjpMdP>x@vKDhyGVCdxz)&$^%dU;b^KOcM+u3U=*>gleDYkjd
zvxXN5_d9lle(uwlA-rS?qu$pA6Mw#vlnn2GpBwg`=c6EBF_(bl)z~#zhfVD!hUg!$
zelyAJOy(I)fj<_L5>lntE&Y1;RadI$ne;Q77B+i!cTUaSV5a`-O6h8TgXxCL6F&Ms
zF_K(uE6e=q8Sm9I$!9bxYI3Xh&eiNWb4hxd=1JF_X9mwiJl^T83Ko`_T)6DZQr{Uu
z&qNa5X{`!PPN|n(yvRlVxcM1PneR_;7|v8&>GpHwb&ir`Kg9>Xe@8`pY<rQuG3=Sn
zGm)CKprq>_XZ7+D|0Vey)D!#eB|7u|GOta0(~bHKmoF&%*c7yN?^La6%U+kWTV3@8
zxmKg5#5LyX_x=r=n3Zn+-PC+&PS5i`-K|*(IZdLn1}FczKQn)pl2h|Clj(lrwZFGt
z&wM>&a^#<zm$<gBpR82qIDLxWzG>e3(q>*x?LK|we*W6v{W{s(y(hiPc^xi#*LhW8
zLdeyx{VQJGOZ@%)=ryK$S1v6VyIOX8u4|oCZs`9_t1qq-t2leg>+8GLrN`#J-M?{j
zWAdD1S@KfB@pmE?pIZC#dCQa60WRkligz6`5<a=4G|2g}@TxPj7__CTz^!$~(|hc;
zuZ!B7?YJ_GS(o+bv@4~Xf0<tU^(^i;^Lr1S3!1i~s-D5s#gn4U;=HWx=WX2ge7&~S
zmF8)gp51e~pI1kh2E=Lq|NOf5)rAultK%O(zW)7QhFzYx-v5|47cNF?#nq)$${fCU
zbIvZ4%*k^lWVYI?{=fXoW%uP@EmQo&^JMt#x2?LgZ*Je0(A4uAWR+&T`RN(TsD7E-
zK&JPsrxeSpRe9Ozn-3c4RI>hg>dW4r_ulv+KUY$jeYM`(S8J#9m#o_SY5Ds_yI0?x
z$uIuW*?nz6@7b50hqL0nSI@iiO7C~Xx5VsdJ7d;hq2kxFPdS!vPI>-k`|RxCS)1p_
z$G^St{cr!@JNqwRo3n&5<^PJE`KzqfB)@#LEAMyjL-xMRDvf1dCVQ-Yb$Ck84c)o%
z0-sgGq^wO^UcQ*)_2Dgp=nMM<+2H@%R|y9jIhgKM{-&of+4A?t(5ZcYc!JV?HZk>;
z-I_Id&)%rF1y+ns!Jd!m_G@GZr$0-dp1ozkw+YJj=hef)lWeTi6X$)-x^nN4(vxK>
zGjw;$sXtq@=b~)sf|yV7`HSxE_;x)U)D;Z<%D1^`!s9F{iO~4FVb3e7PMlA^dB1hZ
z%SOfW*9IC-mOR;Z;LaqMYwkN%T3Fk!nx*^rwA=i+;MR{biav&a>fFK7z3J+o=JqS{
zf8%Gq{v7(QB~5vKzD9x{Uyah*%vY(c3%7iYN?xB_xHRPCm%0^u?j9}S(V1CP<kxxk
zDAU4_E!Tb->^`@=NTsOJcJl5?lepVoO2nM^JhiZ-*i+0`ai{Q4Pu^dL@6JrS%5<`D
z*47;|mksycn6qb_t{Pu!U*wG-q38N0*-@I}$Aho#$<q7BwoK^R%SyrBlUcdet9<^q
z^HbUH!(PXaDsvtU;`rz?NjrMFj?U47rm$t<U3H$NP9kzATVK0>Gg(<2-<!3+;oijH
zr2E%r`^9UnI-JuK<s4JBOLy(^u9OL;W`#dqs2RFPepptT`R};w*HxtjZ*r~GX34ZH
zIe2hy^y?KGk_J;tcyH}q(k(ut{>{W&0p|HDwyn<k*PQD0#H4l2?^8>!-Hf$MoVYV<
zbKlFL_p!4&H*qKytX-J5dE5Vzlcw%PzAtB9GW-3jE%Uh9fjO7l-UXaYJ+qPX)WcUl
ze%1RQZ}@d^pV-%hPepaiIMRcCH5W!dDNGmtQs%k-p!j3iSML_>T9dVbnPuJM^(k+!
zee2EGTs$HC#HGzS<-Z*i%FB7LZT!W0y62tNs-ry&#a4pLpSoQszn}4GFYnZt%@*ID
zTPf==%8Q9k-G6eT^cSIKPpNxzT_4ryC>@w{$wlbLGZFO~<CP(=Lc0@V_innR*P|92
z&82gBQf%1q4gWls+bTcaa+B?hru33y{nu~ZyEXsodEH~G;;v8aW~Q9<K0Y1PJC0}A
z6LK^0`j%;zWM9v!3QhE$e=Dul-_VTH>Gb#M_3Gx|r(gemBV1?6(Wx=4Gh^>0)$4Y0
z-%XKR@kAnZQGDo@%?%92r`R$b-)$|~y`V}-cuHIU%U8{(E12151c%wYDb&{LRG(?)
zCO!Ld^<~Yw#Su0QmZ_azrhfKnP8OawrK?oR^3~GC(kq&IX9Rz9*|ldi_tzWu(v*+C
zFn%3m|0~b)%f<qR;!|Rkv!8_4O`P%IWLw;63)|iulRZD09GJ7^>8z<!=Sf`Nk=n4V
zyy)&6r?$N@JJK#PJbS{x|0d(ZkvS^dcfM?#bmZzwlccw6O4fRC&X`>Ir@V6a>ti8R
z87Izpnb)paGVfA}dqQT|$tK1#Cl&AP<FU!oO*{F4?^K2E%GrBvtxzeO=QLCJz??aY
zFF(yzShjEO?5@+(!*1yAy&omf_gE&ukFV(BpBbx<sdRR$vlckj6egd&xVyHOdGVDb
zrZXoq*Oh;~)%8YAkGG&xVCN+z)sm;-Vw?$ne4k6F-@D>@>p=WERc9%N;!_irUwUzD
zR^PtO%R4)n8!S`(;(Vt+J~k!Z**lZr%*p1S=OrO`EIoMG7>ZBLW41lqCpmeEO3bz$
zJ)9uL=MUUyNoM>oJ4j<EH^Yp}oHHwb)SnToO-^SPW@RWYY6~t_?vuFOzr&xc!P4^N
z&7T+7^m@+XFmPlrP<P+CKw<Z4?e!t0te>*D7@nnk*q9e*dW@yPa;L}l6BW0PeYt(g
zuZNSNxM=#^+`pNUmxX^v&k$gkG5NPm?5$}<*2{lyyJ{@SV4$AAL#y=}+a<%NTvpea
z68z$n4VRp}Vr{kVnTYzEYk@vY3};RjFX_KCd1A|6=Sd7duU)IN-d6JPj|9IwLopM0
zP!UEQn!|uD1XX}j24g%DI!K8amIMu1LU?C55o`vA>1Q<6FCBlt(2yS?XL9wNCVb#i
z)X0M2!Q*o)3%ce8tMoH6G5qg%;+yBU^P;<?48w=Q=U+}dc@`ot_b3Bn14!#m%g$$0
zk4UI6d}y2zapB6dDbh1fD=;=Z@A&SM?f3Iy_sMc5hV`Fk=YR%hT`Vu&<dI;gNcWre
ze9y|4XF=o3(<ZA*m@s@uxISfrt-5OQrgHy{x7Y5UlJbG!fNQeXt+?VIzZ^fA_Y4OP
zKaQz-w&aS>>6>Ck9Sjc+t;<~a)=+ZhWZC*G3)yBShU2}Sds2_3R4)oS`|<_fsuVs$
z4TckT$5JX!s#q^ObLQm_1<U-83<{gK7v*!-S*QECU*=h~+4BlZ!xUki)a@lcyBe&Y
zmAFhgTlr%Zclk$#gipJR9tq~B)h%E3=IqM|PgCaIcf3^IpKU5FEDK{$yd^0c_qlZ&
tdxv<3dddpVvz!bL;u0+2=nQ)LpRu~8UcqTzVlimm*VEO{Wt~$(697T6&7=ST

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/fishbrain_logo_big.png b/tensorflow/contrib/lite/g3doc/images/landing-page/fishbrain_logo_big.png
new file mode 100644
index 0000000000000000000000000000000000000000..d76fca86a92d4b77d529e3572acaa6198a986b86
GIT binary patch
literal 16308
zcmeAS@N?(olHy`uVBq!ia0y~yU}9ikU}WcDV_;yoG4<bl1_lPUByV>YhW{YAVDIwD
z3=9mM1s;*b3=De8Ak0{?)V_>?fq}im)7O>#5vPzCi~e@MQ<oVSxPDfJM3e+2mMat#
z<)>xlq$-qD7NjbqR%E6zFhsPzoe^24?A_<`zgBcF%lrq2i)t9oy_CK3>9>-H=G2*|
zu7`eI^?#O45=*n7-2oo9=B9s-_v`)<Uq30DC-hgw=B+1IpR@Y>DCNlY*QGui?dHp`
z|FiGc<*x$2f14ER$ee!NzHEJRRLpdF{x84w?W^A>_p8$1_#SiZnLo44#pd5UfB$>$
zcL&w|>z052ZgBtn+Fz;G@Auc-JG_tm|JUlbERVS!PnFp;EktSl)pX4@3wkzL_}N*W
zuXCNJJ@tIlukRbV=P%#Sl0W6V?mv;;JMNWFneQx^RDJ(KUFXH$Rg>b^)$_a%{IzS5
zh<)Jim+e;{^~J}>C#9FQ&)iqGJxe)AcaQm3!9&k`q$QV3d%p45?$7FTzCX9E<lk1|
z@TTe6vs$NFn!>r>rgyhYQmyS%mz@@|`Pn(c=aWwt=T4cIZpW{2_5R5s_8HO#G^R6W
zwVwW2tow3ix!vcvCNqAjy*tx+P{{w!H@^F^f6DL8XP#+U8vH4t@6(a4^Ka&)N%qaD
zFA{H1sXiVu?}y#{52}*(aeO(9j+KXOZanDvZF*$dm*(iF2X_|CKO*%=+caqZdqxhy
zV(xBFR+gmZ8!MOID1N6Yc%0F$d+PI`&uon<lk&OP%$AxQP?VlAJH4vv-PuX;(<h2G
zOt|F5E)~SSAxref<<||)-d$G|lzeyb%-j@}xz#g!t@-s8SH0KAR=wWaZ}(d&^@`%+
zUb|y6lF!<33#n~BEB9<h@w%f+x)+N3>%WW0ejZzPchl{6)o*rQZ?s!3XZiMv@j08<
z&!Y8S*57{nu+RGar6mELtGmqJT9v#q+L3m2Rj27|vz%MMT;BG+I=^CR{Dj}xS1w7L
zy05YO{#AZO;m+^<ijz)vTBIpV%Vzrcj>F-_n;AzZW}WSulPs35rm4~#CB*(*&7Fs3
z^)j7lb{l^;-id!Cw&b7pj;HpD;xfDbvlJa%SgrisI`961vc2!-{y61!-l;!Rr)@)F
zbKlhCpJsiz?taYf=9F0pM&++(U(YczIa*+T<7;7Ayk1~Y^7ChF&BZV8TVJ;On%tN5
zJl(DHkF#;_Zt0JHlJ@fUp$zsTRk1(kEh~?j<$V6-0zZQlp~crD%Vwv2{Bq~4YRb)Z
zhtDsIdOp1_{KNXToPU+Fv1wDwe6_r4j$O#AI-O~gcwD^fPf)^fmSQ#e4Kb>_vZDXY
ze)jE*RSRdW((B(d45d#?6~9!=bG^RisG8;_Nw%YDil>fW`xW)S`s@3x$#QxV3jbVw
zG=Ht_>*Y;m=B&o$SHkA>iThev>2Pkb6VqRs!92UPEq0NG(A?EoaaByr!ZG~Ywp|tZ
z;&c6_-ueHRRvpq__C8cFlu4<wMA`AQx#QWhZueLFUSe@}WP5w3Ytoh~v9zNrF5Eq}
zsp$Kyy%Qx2-o|NH2MUERw>!rif9&n!TM24wMZRWlE4g*n+CciN@O`G@6QAaWtbF=d
zM_5kYNh|v9uBgc&s-0e&qW*5_y1k}9`N!8o_T_J~Z+Xm4Jt-mcjqB)*iRXOs{&)W0
z-q7EBD2U6(nt%6q(MvmRSG)WbQgaeiKlN)?$2G?<9XEEoz2lwpS|vf$lWG0MH!4iq
z*YH~Gxg}>TywmpNH3<&4OVPRF?_A0x3a#50OSjGZQ75<JyXxH9BOWIjxLVG8&1*|;
z(BWFSHr2T<bX$t}QEAJgDPd{*o^U>7_cHN`^k_MDRj=%2%-65U6Q0bO=5+tEjKQVi
zw^=i!%Qma0m8_|H;HuOVn-FOiHuvAwsWD%rj&*M3c*gssaI0t)Q)bh1j&l7cZf}Dh
zd})welW!a%dFfg}&b?{3#2;<zymKq)QQCu9)e<T<#imR-y{uS^)#;be#yYQ$0nf{3
zB|iRkF8$$U-{O0VW1lr8n49XRJDxC%=z1lmwP|X@{f#LTCNs^N?Qym&;FGsc_w&a-
z;=AHKKX*-AH*?LL%$A;YhnJuGxAr4@vQXX>p@{+klcRGy{kvS`mQ?vFiB+nt^~=*a
zb^dVRHa4Gx4*_QXdXp>sCK=V`^m8rtx*7a2@c7}R*Fh^J*4H1u7xuB#uFLA-{^g5`
zO*2YT0=TYA#_Z@6IdxRq`JtzCjLFZiRckkWjtvi<{rZilVuaDF$NN~Sw{P-m*6r{9
zC}Y!VbgCihyxh8rTZ2~xEPugs*6ZP|5a!sBBgYah$iFz$w)I8zKB?Cku8)}9?(0t2
zl2q<B^Mkag$_+1FwM?nf`ny^eYVRxWE_dE+Zn0=TvzlIyx=8v)ant4xGp&UeJBDrC
zxU*e}oBQ~dX)78xZQI<}b!o+xTLI!%TetLHR*!Z)c5zBoz?sVvgCD7|a$U)+&R%XK
z@UJYOp?XE<1%+)6C68>Eh%S+hkgi|}xOCxa#nynzY+V~O<87_yZz$GF?XYl@WnFu#
zK=<2$HFJ2bW#}B)%e%^t+gQ&~tfpy|)P`w4)Hf+<zNq-&cVfHfsiG!v(G@GCH0J)E
z9eutqdeV2nuuqdCIsf;}{(1T0lA2S+KUST8!LBFx(DAK>Y09$b;|@X_{w(@8`-g<k
za=Rn@C7M#Am?Zg*uAKWKH>X_1Vv*27o+A-2ZmqcWSTo|ZhpdN&xLlc6)Qh7^&jLgi
zzDT?|XZf_x8OC;t?ERLU_xiJO@{_7^#=}~ZI4o{jJMb>_o^av<;}x!08N(|pgM_C_
zPF&#R@^wqr^sjms+_t<vxbb?$JH^dzC1>xpOBqjBIp84ge=w~j=>1yhgYOQoxbX<2
z9x?RZ)B0%M%uL(TC0jWwZL_Yg{(b1ky2`?%){Hp|bRMdn{Bh~z{YSs1-M!8ACc&RK
zvmrF}A)kpXOLY1jU8lY?R;EcE1<QrlTsD;j-|N^p=hU7b3~cR<4c-^;7bhC6v{-V!
zB<x<UaLhrKw_(DWrw&X}j@iHc>Y;hnc@@f&E_UnbTG`}3*_VF9!I6O{E9l>z6G``C
zXO{J`2+N7CI(l2v?arDr8@)dr2yj`Sw<Yiequw+5?gpI`i{^Xwn$62=%ix=mU2?I&
zL{WPNW1i+01(7RlViVJ5XT{xl{9?}!*(@Ow!_UXBE^j-2MgOAl-dQdEMW2?=3Rv#3
z_ToaX#yn3&z0R;7QE@jLRV4nsIcd$aT>kL!llu#o%={TBa_D{N!#nXdA(b{ik{87s
z){9wuK=pm%kC(etl>V`n$=YQIRU|zrlb&Rm`RsJz?pJ~3MN?~Dh8$b+^3d8{M~}Fu
zlm@rGUeX%z_@lydrE@LMC#?vW`^nX;Qo1U-U5491WM}I_6*J9CE+yA&`WgZcF|+K5
zh~bvxs`p);{z5M{!vEfllH}#JJLlwux7u!)ICaf}w{PMWmoIZ)vU_#oF@eB>%}pB*
zN`-vq$nf3%Q|rXNopn_g*Kd26eqG?-k|X!#lm)e{Yc2VG&HPV_^r8D-`8_YTbR-@(
zN;EjBaAtBKL$#PfNYt)9*P}|RnFU_vH>kMZkvVGEd-~VL<14s!dI~SnKA@DedwyMu
zL;Ma!uYZ%uOwXHYom0H%*;9I_EAX2*n}D)A4~zYs`06bW7S|d_)Smr(*QCVbth6H!
zZx)N{^<`b``9C)0o_YNI!F$0!_Ce<*_V@n1tN*Y#C+k$v^LHzo5*KWDT&35L-lg_5
zC1`un*;jI>g4zDRdiUb-^bcqKO?JGpn||zd<k$J)e^YNitncjj|LOOldYzq9B2?!G
zH$VT%vZCj}3C8M_$t{dCdUmL-TA}e_&Q6KfG5^;yWO|>vw(O#z%~Z907rGT%x9qP?
zC>QA3mFJwEk*hgl@sIkF6AO07OS$Y*TysCF@q@X|k^lT(>J26={9yctU(s~eA_a41
zJq8978&4O<kcwMx=d#a;3H{lAK2J(T(Sd2AoQo9;<4PshRZN!xmuP5kbp^RN2xvAn
zX^2$`9yzm3B*@c4nbVVlbIIE?IqOg6ZA-7tzyH5xbNV0s(`Vk@DL#E?=Fh$7Kig`k
z7#F{{{XXaWmiHWt40u6<?UJ7@9v{`ZL*B<7dTi}qv8TL4a%TUD3b$jMIa*$RsF*+L
z|DUZh9_|fX@@t>k|Njy{&o6Pf$;`m;z(8)lfMn#!O&9Im!cNpr5-r)+6DApJ^eaVx
zDKYTN=@%0(Dkg95S-R;=ta?z5u<I=O_eu;57HkvWeEqF<!s~}(q~!EfTvEKEhfcnj
z@HBs^$c6lln>RfxG#MEV<ZXJD@_4?fYuLW`r@VGZ^9m}R3KWvy^*Ha(*P+S4z##n~
z@&AFC`m|MRDylF3P>2x<4Pw$0)r{+5VPI&`UGmHQWY@RCFMkBrarbX`sHi#K6&YFK
z&&Y5fsN(3><K<mza<VTzP>9(XaAES#T^=9%4xZ*>U<jM^NLIT#GC%2{{Gp>4RZhRm
ztcYY}IG|;;<NJKe(69|pGg1Wf`0myHYSLLB!Og(HaP7-K=96{nLaR4kyr6JL=5<Qt
zU1@z?28M{n<Jw1f-f#B!(A?2@L-F+MgB6*K3<tQ<Qi}Gp|EzcMS;IVIVkJvl6a&Ks
zKL)Xdg^ouY3hKI&KU`vBXeedcAkoXM<aUm?0TgX442<CTV{~u;aUB>01VFSv!;YRn
zDC<a<^W~Z^KXwIqwSlA>7*yPPzNcJzJ^Qrg{&l^5XFfQCgdG@!6!YRH`J2AI8*=-e
zv$@`xR*-N5!;$Jac9;DlP1hdz^6mTO$*a2*K;jAvA7!eSSDbvjp3CHq<GnL$mKZ{<
z`@s|U=KGCTNm;MoEj^gCCS#eV90y1TheOP<?f1_wJJiM}<>g#$E<0O-n^6%Y!|1Wb
z!se>`YVTK>7f$^DpYitH(%I9+p#FKJ>2|u}-Q~GgF5Y%u_~Pa3<=4%oI)ZdLFg()C
z`>W7jw!QRnfzG+rOFzo#vw$R66yzSY@Yv_B%2-w>HSg?I-I9`7{Gk3R1Czk#=zl5w
zc2}c+w(8bD%y{!=VgC73O(0DT3`dGoPVPu|4P21e&l1O8@ViS1;_L=tL)UMY*3X*n
zef<0vcB$OecHf&UZ-!jD;5YHo_51$$dFxvs23QEDpZ|3E+ATA!mHEqmR&FhM{>jP8
z`_zFr&0pM~_@>mCJ37WL2sA%*?US37^nIA$3*2MO)VsB=a@FRotN#6K<%*5X(@wMf
zIa#{&OYhCd#=s^IcXqdGqnAH<tygHDnYRC-#ss0FOFs4xuO4`}BQeixE05<{=_!nP
zM_ya+%FOg+-n(Jnlyxx^q$MOJ{Mgn_tY5ZgX?Mwr_iq-?7JuRda;yWxn?CuIZ03^|
zR>hr+zkJPi-zu@Hf18VTCD!rD&V2W~iAP5J|NrN^XRVn3<;CRsj~A~Vowm*un&1SL
z-o->Yuq}VZT)%JS>gn6UOoMl=@5+mr<bQnm&lg|U?q0FmfBR4Q2lxNIa(Lzd4aDBU
z_p^i!GZ~b8txNZhe=uSGHi2MHw~tr1TvFkOdgWS0Qu0hq#tl+_k~XzvVo=d@DYpM_
z#d+#w{b%MDs^7Lo01}f4%NdqDzqZSZp<?4h>Di|ql`1NLG$=4!>`#$g?Ij(f%5Y}S
zo~T(O^En&A36`;=u4mH3*sD%?%A2?K_W40m_YI-yWfhs}wYy)lEZDVn_P&V^n?V88
zz;Hv5ecA67Yc?J><+Xn<aAH;F()s$@3pnN0K(gBgRlgZ?U3bs2UVQJvXQ`H3-}0(5
zi!FW6ABY5LS76w7tnzcwid9>+SM}Dr=gY6vT)F(U_|r=k{&(&dgM!3?VUhgxQ*P6%
z?_F##W}9wSa^d~;sh1umiv98ZQ1P;hb!W-Up5XiEmpL2%Vf2((y3GI_Z%hn~7cAKy
z9InZF^Qv^u$NXvcEsb}#=}qdb3Yzh@^ZvZau9Bb5U-5KI^jw+ucG0P4ey3LFF17qw
z`sH|UYRUIYOuItrjvdV07a_<5al@S6i<_R?m>!k3*z!E)=8k_Use)%utz5Xy?)fH$
zFDKsLio5FceT~MF&cwFg{GMMgU4C62Q0Q#k`aAQlmPKa3PLbxdqGwIach7nk7b<hZ
zcEk4SVh(V&WlD%I+rQE1hr`^*X{i@|y+6Iwf9<7dy18lEG?RcjXZF&P??L7H^$QqF
zx#F@v9J|IfYo6S$?RIX*zKcJ-^yyL6t2b{i&kA~wy(;77qRO3Hzx;TWWaSw5UjpLl
zM~25vwEbJbV;^g$-(L4O(av^nqI&9+t3h$G(GL=2?Y3P%8TT*XR`supw|#883KR1Z
zM3k<WemGTsk8#89y|*{5X_5U@tsQNpsk(bsUgQ+_;5!o|I6%otfFarNnz4aRT~u5D
zK5gmW)z_w6sNWO1W$BuUMXAa0iq$tR=DcwJdcs@E`bGb1ua^f5dfz`0(Z9QOje>#h
zh2+cm&;8fFf3c9)TKeU4?XDLaUUV@)5~bjo+n+c5m{e1h8Z4<h!S-hV=~*kb?3}!-
z>i3Fpvl+E{-jCKlXl>;#%6@t0W@Mx2?}cJNYm5EN&Q7|1Wch!~so4$gtk<TPu?X?L
zo%;x!a9IxQZU1^__bQ7EyZc$ILe%W-&XpI<VEeWpMBesiA?MqSHw7iUhZ3TeE;w_J
zcRx2H*mn&t<ZnDaxuiO!f9r;Yb7O8j<o~m7*0#K=++xeQB5ITG&--MxE^dPN>Qw$Y
z)sOOKEYg@N^y$X&rCa6~FJ=T4?hOnQ(wsX=>iBZ+T@~3HS8P#n^k3A?74f?@`oDc>
zxi_^SWLN*aJDa*!-QE?WZn*Yt?UZ+`7cIVA3(>p7*l*s;<*KZujbC;KzW=(~wXT1?
z_EP0TI%{UX_;_jkG|>xU(&9fO8P_k5*t@t<Z`Nd1P)_4$ux`HmY~Pm3>2G#^b6b1x
ziBDZ|QQP?^$4|CdTnq?K_xD*7pJ4xU&7zxmoRIR#!QP_#-r{Vp6`QW!R4koWcjMU1
zKVf2QLcFcNa@oBdAFi!u{rYy=`=AH!^>be>Np;<wxp04%SZes?TbtanrcAn+e{sdJ
z=Tpy?{ClGVNp5#`TYR|2|4C!@wzGnq4}9wKbJs3dpZ>%7+po&gkG^uV8~rT3Y2bRW
z`e*#jP2K0_y<A=!w5RdDf!9;F9chkwYqeHxSK()>KO&?lQL_)49@g>7wLRbaw(RW2
zau%bXrk8rV+s;4v{^?n4<6MTaKV0Voe#{naZCum-@ZDjPn+fMXg+~_2TP*z`J^OU#
z9(HhCGKe3)eu-Ube;N1o>xI@)cWyY=N9|WOSR-#VdH!+lay{(@Yp0%VY%M9fplknq
z{`{%zMv8qq?40hk`=6Sv^E1w_aYDZmxH-Ub;Oas3zk7GA^!YaJtp2XYpP9v;UA?gV
zS<Cq+<=^Jy%$gf2>#-{Nxd_X1dkecmx>Yv12b5WJ@77LR!yXa0#kwFKQiT_k`b^mS
zWOpia#JwvEuWEe$uyg&gsqdn~Jc19I)Yv@#*WT~Vw3xY;?}>`X$8h#Z9IjRT%g&!#
zWK{V#Y_0W<L`ObIM%h>>^P!Zxh<Wu_<=h3ouTJ<L9bLxy?6Y&Np8C(7L3Qt>+Kqqi
z-B`GMVaeWVk3!9r*_(9^KU{rVi%D4ylFE*_%rU*}-_2d#zeA%h{LSaWz1MY4ao@cB
z?AY|pPxfxzX|qB2z{?W5{2VP~6$ar5hO+IE-~Jx`wE5fScPx&O_<qx`e5vNkwl@dc
ztP1sKoP6(j?umHXY3b+EJFHL5{9{y<#k61fQnp=gnb=hG(x`8Lx8~k0Ir%3BqB%+X
z*lD?s^4F&Yg+^ar@aH@8%s+Z_q^%~)|KFW|vTe$LpX4dVKc||OUs`GWSJ1!SX4|S8
zmqJeJ`0tO412@Du9Nrw2kSy2HU2tcY*)6T=&->#dU4B<N@n_5Hv_4!}x^znXeZM@5
z@(y>odw1g`Ey5F)ytZ-da!M=hetKJAesnMoIM+D5nWs^)D@VcK=X>j$ub<2IXNy0(
z)HUhX0nYlort@+?Hcy$vc{t~!wc=s_(_F7~>~$yFzl;C08xoftyDdD;=IPhU&2_Dt
zD1ZC5_sg56C1>}@SZSOU_h7a;_n6n@Pr1IVhuf9p!xLEDKouU7K>5MT4EB*>8QE&~
ztE(TIzP-D}_uswqWhNJ%TT5%K-Zu4C&Yfu|9u>-811V=wh$~{9bNKkzv{3f8>v!*e
zapynRU(i{4vH@CG$KBbtrJ+lD&dDd@ucutwe({RQ6NtZhjwfdBJ21h1*1r=*jQd`%
zcT;m<U}#`C^6$d`{BY5mR_Sv7AHJMeEC1x74g*-C%xCi6|G8rCC-<kDO*6lwxY7<9
zQP%tiieG!jZa;p1?T1G9?{y(5UyC6n$bp*{s)zk8OLH@BD$n^HAGT#SpU+1~{VTBh
zpl5O1?M>Qh-+t|G?3%XDA<RX39xEjAo;y1AbJ2=DJB4+tTb%!&U!%>t#0`?i6w;FO
z`=^UNy`FeGp5=ojQw`IjD<SJ+&oe{va{9Kvceiiqh}u<kaaTXX`Mxz;xeF#=sx7k<
zhPd@j|4YfbTXA>0uGuyntB(%lo@;G=`dvB`I7k}y8s3}i`Ty*i4Sy=`ROI~Ge(Ko&
zO*?P;*@#0m_s$l7&d#@q@v!N?zh5#dh3BT%pO=SN{QL0|9{bcV#mDyxzMNS1|J0?b
zU(a;*Pd?8Ek*-c!blf{Xe?5;$fx@4#{UY_Evgz-e>>=jL#C+Yq$e^Hum%}$S+Wc?n
zVZT|E9`znG1~;c!4*atC|2OB<-QJS@55N6Lzq;juZujTRHR@UCp*3yx<JFmVQNcm|
zE%u5T@82!mvL?&w?#-Wd5X0Zh`!0XtYyaeb`}Ul&-@UF^E-KXJcg3E(iZDnvk0`m&
z-_w45io)&t0#a608DV9nb1k*CH<%q#J<knseD<UE?oTT=E#=nTw`leD`DS4z7ya$e
zEZ=+Q;_G&B8a!}oM@^pD(<@sO@5SqW($2|PzCZVd=jBT^d9`T}TV`|{r=?z8zI&GN
zzK_peug`e7=(~~jQ`?W1Gs^AFArAfW_y~`^uKqfHog(iiZTt5vZclr5<-+%8PcKOL
z``B1R6+Mc$`9oj#&fE>X_UaZnZ{952xzn`dGJEBgcJ*cR<RPi&*y9r=^17$id!INq
zfBlYSduNM1zkVsSyC^gMyY2^YqZAaVmCNQ&J29_5pXu3;{MYXm_so&nl~%{+>-F*2
zr($qFghk=gk85vqqBbu!Ol<%AGp9M-qJIBz>DT9lz`@cmQ91HhDR)t3ajNlG^&dt5
zr@f2&_5Ar$lbg+gpin8)VR&pav*i3Ir&~9#G}iuWUdHtAU!>*Rw~Zj#4rfLSrL*q(
zIa=IHHnYah<NNV;)+7h66o@x@xDP}~E<f|-@>}h$7w_j!4_>u>(zEpN%QE$%OHcg&
zyZ-F%7vJZ<SunpQ#AUkfDQ=aSmUdUY+m}=@Yo<-kJ@vZ&;GLYP*r53V?7w+ruHN?A
ztt>LHYntD#ygF#*aE$fAMAM^NRxj<|mEwH+@^$%$gyY<AEO$6Qs=uz={J$F1vGBMl
zdv@-Yyl~^QCs!{mH=BNW@+aeUzZK^9&7CT0F|mL1g_m+R#SJ3e%dbrfNOX30I{SV5
z$=_Di%iH`WH+_xQjN2<1WAneW-%C2<e+alcslaed?0W9Kt0`6SRqa!n+s?_Y%6+@2
zX_wiosqyaqc{e=Eb<Zv4ex7o<F>{|_Yr-0l_SfRiF3pnVy>f4|#ooGo^{GKC<YI%J
zey?6zD}Mg;!d2JSbsT+oZ>lRKiMhlH7B99r9G~m$Kihs%`}@i*2g0v9d4I}&xv<pw
z#iN~;C2K^QY_BqH<NP#VeBXRu<ISs9uF~D;_xbVf%~9VTtdE-TyLF|AoMUBnwd;}d
zsh)|lZ^ff489~LUz+26R_h<X}Y|r%KNJ&0+ne(mf-YJo{y^1$yznEFN6x2AAy7@u<
z(!IKUmx?sq>i?Cf`*IuC)L)#VbL-+2!Hdre(+(IL|AD$wZjST6qrIttl4Z~33Kpq;
zd93)Gzq{<RXz}Lu*n{W)zg>FvsbH$jn`z%acDYtU^hj{et9~Z>zjMo@dzlrAOV=-(
zTDN`G3^jL)nEFX?uKv!7R{r%ydK#po&$M9UFT1PWnfq3?L^V7=QLz8bf$3(Y7f#Qg
ze!1|ao%3zApXcrNElW9)en^>N`q}fXpw!6XpwBnO_VRS|>Fi-&m_PH{uU}i??<J~V
zB(rqp*8Ej9YBfJD7I>?8&9Zu#qA3n>QwIC`Q~Td9R??c>>t$c`K1=oU%G}q>{-=ap
zdiuGDN&DAXyUE_q(t-<}t0OMXc)Fg?2Aa=(<o6Xn{UZ|<e@!CLNy?$|Xcg<GiSst@
zZ8|r9_UHY%ePJy5*Z<ty<g{v1lHnAqpYPYjP3X1(Ig)Y1x<z)1cirEgU|YLxv9#_V
z1IwSaS9QNUQMZ2aWoDgJZm@q1zvTC|I*TS9>-+z9PR7H>jrKP)PiinjiiLQq-uKJp
z#U|`uX1bwYo+l^%iOnD5O{-o$_x_YQ%lY$r8>8Zrzi)n7&TIX0>f5Pbx|gzUDen?b
z2UQI$2j*R7sO0!{gvWmG-nC7yZtWI+7jZ8^NcmjW4vqbN^WvhNj-S5wZ-<8If|FX&
z2|V+^fSMo;5dsYfJ<n_Yb{D;z%yQ!T`QEy<Q|7*z6&5um+}l1j&}Dh*kBs>@-V}m|
zjhGVbmM#7H;-RVPl>NJPC$t>X`t<773)y<pr>E;bJ=!+yUEJ|tmd|^v<kP;Kc$KuP
zu8vPOv*5iQ6B7diL!maq!OX^&#(%$DT4tp6Y;m=(*&drC|6bg><iK^uQ%-;G$2zmS
zoA*qJyT4=dr6nS3Zd`P*{QCCFotwfI(`NLmzk4NXv1*4q>-z`^nS=H6Hs^L|PpGQ+
z5x;Bcp3Y?_;_8H?G9l&Cq2m&g=DO$1_c|0*hjP!ANHdh3AO1n~riT37S4)GR&bKVj
zGmHJ5$oMxW)X;WM9=ol+>XWBucfUBhR`kpKw^P5o&lUahYFg}<N9j+$yg6pHE57UK
zTJ5eEvs8ah3B7CdCTySklliX9yr3ZGaA5hkAg86;mxE!w`UAys^{Tsb7O3CxKf5k_
z@#N&!A+`_~EmCJFF3Q^C|Ml_yZCfg~CqKV=f7>2|CmF2%!h60%-rS)6MG=&P85jc$
z1I_N->r#7r&;I%8{dJCg$@{<0HvStTtF$=Iv9>xPTV5(1Vx&^X`tzr_Oy*T_mi~IV
zv-jM$U6r~AyC(h8|I^nu&#vmxW1%(QAoZTYr5%NJd{@i9WW3!RAAQRq-n;mp$nNFW
zrm;!Ywxu(NiCn+DaOI8r{1E>f`FP+G(^}?cuh~{FFI8FwW;XCmy0mdiLAe9dwh)uc
zmc^ScXh2hfiueI}^EW1UerzxMdMSBz_u^d--WA+@yg#XqZ|;<L&drdfX~V<^HDBJR
zgi2@qsoeTE`?gj_6T9;}Q0`<f)MTh}dHYG0SJ3>Q&Ha6wcFgPZb6zja#K6eF&@e-^
zVXw{1#y@{29%6qz$5#6DTMm%Vk8ELhqs+Qs+fw62$M&z;IXUl#0>9-qL6CVJrHl`p
zi(f_Ua?AOn9liU-x~Hu-()&SeJcmUb4xmQMv&D}NEOgBH0Uk8%U}l{0<m8g3yAQ83
zyqQ!~@Za6lgpXGaoK!z5Gu&D2e||0R##{U2yBw|WZc~vz{14)Q*$f4Lo*ZUc_5KCt
zoL9#=8KZYsXC}twD(sdM`vZ<ff#pmGE;I12(OY))=x$Bh-EDbsbNtztD4gU6n_Z~O
zz`+8^>^NzVPZ>BE!0H*88bE$vXkbtPClUn)usdMV0t_r*Wt7qv1shy5Vgw8Y_JYE4
zLm0!o;Jw-=MjYnulY5lGL(!rYdx9ADoIlmIm7kaA(9@01)2=sv1dUOPF8PsCv3}yz
zY<?+;mea{=MH)OIBiu$y7%GgGpE|XbhnMHj(Syn%=TCo{)iPa-fk8)_^+Vr8+tub9
z_!u5O%9!%>OURxz3=9dXnKoPxtRurVnPy}Nth@8?`Nw@)K{d%AHZd_YTwU_Z-IKE-
zBy3x3MuEUOu6}d>X=js<tkK!CoPi<1QqJD{fJ@}fa{<5CUi_eNM_{Vb(dvmFA0NxI
zGBDi!aNUdZ#4H_~nfa%?woCW&E1e2dY5kHlao&8Ljt_NA3=L}wTxUO;t6;N_J%u%<
zr?s&|!)15QkLj$}mY(^ReAON_QQ^;i=8V@Q(GP)}l<K0wPFNbvy84Sjg~usk?*0Ah
z7haq{v4(q&_rs5Iq7yEZ@cd?Ga47NMR^jv#%e>ba#CC3?=;z!`P5*j(>`zoMH5$mu
zy-)r$zr0)WUbgG1_p3ks@1OSHhii&0s16|tI~+0&JLV!8oH|GCv6>|RiHe|O-A4DO
zO;k)?*JC32{zQfAvG~+KfyeSwYT}PY8u3lsxV~p^${WpOkvx3{1{J;blNBa@k6H|_
zSIlO<|JPdbcjO0~Ng<EV^#0XP-J|{3Skl*MSL&A&FQ#5xz9^oJx2sO{;QxE8|MC65
z>nAzYXxpg>mB;c%?<N+`?8!9xG3Rs2SLvRDEzIA)D@w*+-LqWBc7M?W^EmS-miOK+
zDt|1f+847dZ|cWb$>pbJ=p=8cdY88IZ0hyu{XOv-nfn5c88#oeuDkQ^^3xSJ4*vc$
zVb$lk56aqK-L1X)eQxT*=l{ZvIhx&_p2=tPS>Eke@;T{C4M&RZ$UKv}ZvElzofmTH
zivoJW>gQ{()isg|J)HafeVSv&)Ijaz**)Q@cNS<Zt5sg3vA*m<d6`z=#fKV_Px<~F
zvuxfsS^5go^u6+qif<-eTy`pAYEQ4xyvZMBuUfq5<5^q&@awk~n=UIQZ|_l(G*0~!
zajeseW6{a}tDg&A&HJJ)xh=Y3QpoY6o#{18ew}OA{dZ^j6^=DmYobd}yxF6#t;n!Z
zkzsLY@^5*`-A-R#EDkN)B#;xXEO7mlMZhr&4hJ9Z10Dyb1}*FfkxV}=A^(`k;L`6X
z<>b~?=HEmwN{SXlF<6AHylBU9;KRYi>%<)x6d1}l4=@`2Tcfq8PCR_d-c`#h*jW_T
zu_k;l&Yy1Re{82$&|_Z)Huip@8m-6EG<rB(oEctS-6G49@Z`slMJr=D7+4-~_a2VS
zda>2KO{!I0fq_$zp?SWgxtRZ|txO-a7@AK<%+k{LEYxAR^WzRHV@Cv^{FaPf9u@{B
zff@08y?DPLk4k1>^w43Hh~d-|P}c8?l4rku>Zy%;Z;i_Fl?(+-_>GscGyX{WaOmY^
zMFs(e%}f*4JiGilX-iTIgUT`n7VTT2S+}(2u_{QUCM5)>1kK@M*v}uV$RK#VcC*Q~
zBOo8fKR6@t=BG1*0z<QQ^gGLUQyG{JY;@+)Y4hoFW_%Dh_q8H}p!@VFWhTZ3h73*y
zeU(V<JG*c2GchoBY*E@{edfcDUlx1rtv$5w?}LjX40ZE^bLKqjFlT&lVp5?63j<R^
z2*Z!mAA39IvepYQa2zPuXzb%E(a)6dyzWB=7lZpYi5vI089VmyJ=tVnb&q9%y~~W9
zo?-2gj1kWdn116`W{8OtyBQ$S+!^b1^np*~Up|h8so%}7*uI-kxbhiy`I1?ex9;ZQ
z5pAfEe%CGCbo#pSqtl$t*N(sZ`l;?iaZZ*-{gNkUQs37)U;SV9aVGQonD+{d6(<^h
zpGbWuH2M2eCe_-ozt;lh3vXm7c>khT@#D0fm|poql26lGe%Kfr-agRFsm{RP`XlDt
z!ux9~58F@qnAUOL#-NAuzmw^)up4<Zqk6vX>5*R{{^NC$!dcC9Gwaa5yI)@kcpGeV
zZ}LXBw(zg_qD;7M7cdnR%`n;$yV)S0>tkijzJniU>E8({-QQOC%qH}w)Zbpy#CiW?
zo#*{FR{ZA^B~o1z#x{Marr@4*n=^OB9$!C|TC}gnHYY=+()z<{#do*&r;1n?<koL3
z>^;0VaouXy&)e$+9__7;{qsNRnD^l;pIgQ58_zp+_xh1qOSLn`-!t}Te@%R~i2aO>
z#W|s`eaCt=Hz#^z?rL24c{Xp&I}2g`8&lc#wYT$`M_l+Yy{mE0U-xW2$HM&FO`B5J
zl~~wY$*YBAHZrvQ-nTy^?)BnBk`;~N&ByZpoN-xt^0}l*mYI`%@YFdIA7l#K@w}>2
zy*yRS*R^!@!ZQ;eFs~>P-)Fa|aIegDEy?7}GaEe|UQNxgK58d5cSFCal~hf=npH(?
z^$Nd^bH+PdHtU|=kjV1JQ2Ltxhp5S1&HIn$AGhke$f>}PEGyLT*i?GvL0+ZV@2~5a
zmHu1vF>3_}lYlf+LDR|SwqB+u{>ZG>eIaU}znLK><x|zpq$g+U!aEuduRn8cx1I7F
zZ-J7H2k+lr<o0)!N!yn5!si6}nFQ{*S>9!3da!x1j}yZh;fC3!?K7JXI)DFt`-joG
z1_qTD2A<meI}3FUBRT7I-0UngqL>V(cpA+!GyEg7CP&>OKyUI@gLM-#cjbg`v%8=X
z-p`djN$%)mhA*o%k9-unC(Yq7htpwFmPx62d4Y4`!n69yED9f46e7&@#ecm{Xgb`l
ze71F$D?`)!Kf-EmOg~Hx2Fq$p*ZFgscUo`A*K>azlcf@RkMc$;aX1tRKGhX7J0Q?t
zA<UpxlRsM{VLo>?zj`Bs$~p#?`;FUuIJEZLUrzF1knl^|67gbv^^TJZi*Dw9n7gy^
z!j;C0ru;TqMLH}F+7)w;Efiq*&GMkYa6)@cAWPxC(<}*dP9FNcY(x1uL8b{em=4TQ
zWGj3BV%ce1tz#?>D*{dLu54JIBd7Pg=xfu#`ysmj>>~yHPe`1tJSe+{U6J8d_Lexo
zYfm~G7+NY7kF7dZvc%w1l9eLEo$7dvJG)<SFflOR2<|m`ImJKkMt`Rx;|=c@>MMWt
zXNA0Jx~}y^;*YO9kJKUgr8ghw3Z9c`mC0uki1DaoWH}(mxPf~)OTxa+rmx$WnKp#*
zJYkjBtoZS{ki+2)=Yfo?%@;N2t0q0UvLa7};hE~$mHkf-&8uN>)C=J4nt1Q`;^tGz
z|3CBfX5U_VN{z)qzhd?GI#9`TL9@X`xK+#hE61Cr`NsLOyX>2G`18o5u`qV%Gd>73
zzALiI=Ks&B5~2;#t#L)`dd<E~`59@Gx#8opm$Nsw)toawa-hj|@zdA81sH<cwQW_-
z`0_h4Jn~`KA^4}g)9EO8W1i5tx3=4aI2>{~7>cK-{k$XMn15p}Q^B?i-W^Xwzpb6|
z{Jg1^>}26b-`}j3Vq1Lu+rAC;)(LyK913)godmfs;hpd^qxGU1i~CJE8yJqXF?=Zf
z6ZQG~9>IgpcZV|cU8>PH&J}66+`94Vf0=sm_;W`Ooc|Kro5_$jgMaJKx!viE4}PD$
zX;xb7>cH?QfI&~>*XlLw{;W4@^(<Kw3YRm?yHw*hkw+3_)Mb%|t%v5BGQ`ZACwgp|
zs#W{S?sdY)mRnTB{|*&kxO_bIJ{w~L1ItbKV|?6PV#|L#VU|+^)evI)**CY|V>-~B
z6v1%rx(LI)JezZE_cw2!#J$+|TLJg?9qRFm3v0t<%4eTo5|}g1Qih3vk>lHjf0}Ee
zGd`{CiJr=#virwBCx#?vh7y*k^{1niHoYy#6K#;^-0Rx#p0j!z<ENR5Zkvttq?c`c
z##|T1wO8}_UZEI<U#&a8yp35GcTGcGpus|f;h9e2JBI}UN*f}1)=1d7N}O+G0J(Rk
zuY~=)8UDwtZ4aF<U(BGgvMnZE`hl|9-t2yfKem6;>n#tN{&Ovsdhm!voM9jPo{HON
zb_(hgPj_NaU~n?MP~1{?>gq!&Z@rAwSHB!EKYBp&ZC->n17pE{hJwh*<sWPQr6tU`
z(aUt;_tw2q``e}4vU7ge?pa$f`QZISJDJ?P4wdZub=K<q^>uu%Ivfo8NB+!{+WFQv
z(0;w}v1K`m424<@XHGih`<?S(KFI6cr}beD&y$Q$26y*||6Xx9d`OrqEyH1VIXb;D
z&4z<veadsuCvMLVZeQVM8+1pF;m@6Q*JU29om#kvcmGR~uQ%Q5POE4AN>e#_+45VZ
zGs}G0_e=#ddk&{OnYm)}&%0GIJvB*cEDDS)4ewvRZ~19_ve!gHH)~bHc9y>($0ojF
zElJwt5YEuF|L)2QKWjJYFr0iPTkE`7@X1!Dbg4gf+c^(}ES%3)(Y&FKoniXoYMyT<
zzeC@fST-GdU)%dSNA%m%IUTBu57LVl8|*VJE+}ZO3tGhCFt_J+YRp>)8Oda(13n?G
z7td^%l-GP~ZA(M@(s$E8mL{HM6f5IsxUC$;WDpzr%PqMn>&tmT=JtJ0GN(A!h3{Kj
zcsEaoq3-`b-7jJPKmXl-yY1Ap`;Lpdzgk`Wuurx~a8KCoWe<})8Jd(+jeSeRe!M>`
zKJh7AM<wHf?zcg#SI#|P|1w)bq~U>cobsA@{)Ccw>%&5A%vZ-vyxPjal(3e8W&6FS
znJyDol}MXPG3V)Q=lW|Sk|)rhVmfEGQjdOx%pC3i8q!inl1)!B&;PbPzh!=XUs~nn
z<Soi#tPU$yUNI8>%DVpW4&A2j>d#mYoMd~mP(qX;ZB|yz@0bGpO80|eSA%EdBqla7
zMCdd4^-GzqO3z4((wAbIu;kSS+pGEi*Gfcuxt_mhU#07f&41t5edUw-X)DLPzxDo0
zrrzvzLCSst4UK_Ys(nu=yYG{*Jgvl{;K<^zqD#})V!N7xkl+KaPkyE$T#Ozuj1O|_
z)}OKnoe}Sl<;D;fu3YQ(nA6mKpO3Hiw^@7S?<B}3Iwzkve45+z-eqf^Dfa)0SI#PZ
z^VZBnZ=(hS&vUnk#7$?be0k=(a4}7AU=o-!`N^rQ?cMKm{WsQVu!*fxabyq*Vt5s@
zbB^Ab`8DZo40i<XX0aqr;bl5-S;(!N?P}Kv@u{pkJ&V62H*Tn74_w2=P`u1w`{eHJ
zIq@8f4GcVcuRrf>G+8kBknQ39OOq064`gsS9P(qlvdZm&&%*r)Cev6K_<x<J&Q-l5
zI9aM-v%$QsA0CM{vl9c%_?j7N7Otzi_M}5dUVX1T3j?UeUFGy)cS^2<!OauRv!>d!
zC~RX**uY`3`r!{}VGo91p{cbUzP<k=OfRK<DYso2`0dA>#;;QMj~3svZ(^un%-dr=
zQzP7_^|QweK~)Y1CT@p<eR->@PHt}bc+|1XWwsQPEH9&n2cyKFb03oe<3i`jJZCy^
zeb=pJmH!p?EI#n!)F-|~8-C{vag~w9QV(Ji-<Ca$5N}Wk{Xg4N_{?Pvbx@gjc<+Wi
z9v|4VvKDx{l;2kP&QPDT;ZZxok?9OPe&4>vGGCX^W;$@WU~|h}&a;!Q?9MIGnCZ^G
zf6>pm*E#mG9%ntw6REz4f#r5zxKGcOqS$|R9V=Bq-3D(_*8K+JCw@$pxY2)CaH%F!
zoN&P+W~K}4xxUJ<CrwRTDzVnZA%?APsh~~M=XBHgu_|YB)49@EZ0rwJ3urSP=rw%q
zvm@4J+Log{fAU_)96ZDOJbRUSzDZifx!CU+W_v#BFdY1Dao?qq+i$&!BZC6N#4xFx
zp7f5!R*mc<4)gMtr!+__#0hL$ewOLL^}|0mAGZ|P^TSAxkJ&5eeDmo)Dxr1Lxr)2}
zwx6`H;A3fcfAG1{ys+z!k2prPbL^G*dohap<FeN8s&On0?~m&q`jP1ET+A#Z=g)K?
zY;)}1fHF4yLpyX2vLs|YyzPB9FmCy?m!CqC)1HSou9o_@HJYJ7YQf)jM&A%+W?wZG
zn@Ps!@9Hqz+5NP2yWji;m8%QR_s#cEx_l_Jp+b-$ZI9H+jammo&Tu>I+|{Adt`x|s
zFh!ssE5MHLhn?w`yE<po9XLflyQV~*P+)i`*6^UZZhz;o4q>+5EgEOfG9CCnCH#?p
z*@xJtRnKh&qhH%q9K0PV&@f@nf{PjavGHH$RtPf8VSB+R`QYK($43qnZod-|IWu=c
zb<ac<rACGw@eEC(MY}pbedv;~^Xyy4@Zp`X-KVmz92-4nL|?wJdfAmZRz}tJh8x<H
zu8GzEJGiJ#?H)_Qlt~%JsUL(@7YZ<NG(0sr!1_AtYDVP7wHw@fjT{&rIWx@o-1_ev
z%Q?yV8N5srv>#_@t;*i<AoSyBAHQ1Dl`Boumocsres|;E>dns%cO7ng<jWv&Zu5tY
zo}8%%twfj@7&mllo(jp<UBMG{<V1C|BEv>yh8WHKt2*3wKJUEB#&|+(Vt(ag+pnx@
zr%M+0B<JYm|M6SW^&;=(RbIW#%bgk8)_R}0vUX~g76ZtvtxUhx8Q80zc5n4-VmQ*v
z@L}tp*cay)|IeL~$)L{CQ2*aI>(+rYRSo<h`RDZb!jz>q&iLP0y^=rT!i^)V8G6FL
z&Yc+>IVWSaB*-vfzh|GFCr?$~e?WOHR~y5TJ_ZBx*)L{(T3Ic;NQ2?v_n)ehMGg4R
z+Uvft{5E}O#E$1*{O3mKoRQ}fUC*QtGIz((hw0#M=-w7>Uy)sH99rod4mq3$TGdz@
zE{DArPGuBW_3mcQ!GP~m%|ce`+{@4UBX@6MU~=J>nHBH(H!pbC8P52?sJPc)x|#%&
zKsuAbF$Wdviof<|fe*{wKFm>M*r>{|e6|MvpL_mgjSL2J?wovMC4O#Y;@2vMce2wm
zY>Mxt7VbFuvE`Wur$6%_876~A-}mayiGI$=Sg?`d!@Zh&tG4JhJqTTrZdvhHo%62^
zW6lnC2Zl#p40=X^nIHE&{ii3$ux_`U@UyU6pKE96yTv_EYB_CMbx-G3`i&23`ppfw
z4E$$U%V^wVZFqQBJM9vSGs7bnh94>WJ)ZyBVfb%d^Y!{U7v}e`3A@h5*dfpOV<9JF
zhWs)c3ta{$(>vWh-wyq_Tj3QkS-!#kv1{-B!<Qw?biAItoxIEaR?*zb6`k+>SQsDd
zddQdf?+h;o1Iqz3J%d2UPt#Uw3N-8xWY{-t|Ev`)uUvn)F1#qf(8j;cXm-JJM+xhn
z1v}Wx-5*!}59OJk;yFEVmXpw$<?9+4dN`JI&U<|zgn@;DX+h{i`|L#pMXMfmg3M~z
z_-fXfIU+BonKCq={*im=PTuqfRSk=M?%n5Z{aA83_t352A8m&?J4^UCRi1bG=`X^V
z;3vrZ`!{F=Bb@WVzg10FbxYoNP2BF~$dKgDa4u+Xg>j|gx1yO0EYh>fTXwE(tht}g
z{r4-wo(<pQr8do#Z%mopc==q|%o%JmvL4M;>0<b>xjx{=?4N>642%YUoc<hiS?Hqm
z;D-={o%y5E$<9+VUlrd`V>tN!e%hu<PmTWXxDd^MAUJh?cFl8cGv5FFb@t*J_xSBZ
zzgP=2bUc{2@hd2!O0zVq|2h2?t66c?)s+GbI|LZsExfZz%jWJUftol^JyC{#vD3N!
zZsu#+7;VbsHve~~)c5VazS1{1<B!i(JM#YgtfomBOb6zyKE)5J*m7J?y^Pqe`q;y8
zL#x5fLtXQp8{RjFyjfhMrDQF_!t@}I@j?2D;wPqRr=4GO^oDL>Rfw2xn|d%nNB@cS
zp#<gMbKI0?XD4;s<ybx=W5%}b4>#o%8RGx()(bydwkSuKVciOKUQW4fGZubKE?e1_
z_Tu?l#YLxo2<-0+*PWhq?~Al^&AlZ8yxRmBC2DqXb&D*Y60^kTx7gL!b`G5fRZ4x6
zL!X*?)s}Di_1{M3^yhY)>;H2N4btP4&aC%%8rWs+B(me8;=7}EM*mi7GWfUt*t56g
z-?TL;rS@%g&s=Bq8rsj^>s)j1g2~E-`-*o-Sk#`&WRzHXiq(F4{;3(tXY?m<r!mg4
zI8$A==0Y@ws10wyl4={7YQA69m#6HRwKGg__U4eghMG+l4xty^=5JQ_`6;zN@}_#L
zX7`@C=hs*2@0e&~e<YnRZo7fvk^1LWD`P%eujdQaSK)tkh4=4!8`jVJj%|&Vem3*f
zGlvJ8CcJcGv~b-k(VV<E?fe4o&^~AT_umig{JhQiV_L`kBfk@@P4lbrI?g|C4QP*6
zU|6U%H|}cn!?wi^w)2m!TJ#|9ulFf~_>k+T)cb1JXx)=byc;$p=IUbUQ<aIQCcInd
zZ0%el7k_@{tn>4=ceN_oU0#1vJ16wI`Ipc0q=R0b<NKJtjdL%H&)-*(_KOAUg_yZ7
z?ue-f-MM{{Pr=PUZx?X=SSxUSx9f|y?a#a_iaX9%n;lx1zVE+2&(Gb~x|1H{D%M?J
zo;YvoxvkS%e(t&68mIT~@1ZG2duJI*>GZcM+I?<zJpcTz!Pj-~{{47k@HKCP-!9WL
z->+s)c|FJA`~x*%p<~wiQFqiBo^$>d(~yWX%~O9ZyTh$cv2U{@!yTc!S?3Hw5_jKf
zeE;6WP^WxKXI%Tl8OAAB7ApA7tKFdyw6{e4{^pr%ED3dU?!MpZl%IaUk#E{juB$h_
zg`RDjCY8CSKjiw)4b1n;_jj_DuaSy;-_GE`kfhn<zO?N&7b~NO4da7}liAyRszHql
zfrg2%7cp@#uqaGhq;O=bBZE*l!>_9%SC(k(Nf%(?aM0mq_<wh|PL8+&g8)Nx^Yf4T
zatkLoG6;DyRJ=W67+7^)fkB`lLX4qJ++A-&2Bc=vJr%Op`cjz-hr=UKZ|j%a3@>&j
zP(RRxO@4nYD`Nx04#OWyABo;bca7Mg!0=I@Vb8C*=No3oJ^juD8ogv^eDLd`;KF(p
zYZe7JmId0!v?T8>J9PVOkIYX_#*TQ#2SsOZudt9mDk2VXqYVec=J~Aec~}k{Wjf%g
zb?M56%;pXMGdzqB{$6)|g<IL2gTJ{MJM0-BY>HZY>itQjO*V1@44`sWV$O=SoJ<1q
znFQ8!-1u@%fcf`}|EuGUsU^lbi5VZ>)tlvb>buJi;k#fr{>|Fbb(&fI{X-Ueb<SkY
zJ)i*LXfWIn`=|Yazi>_z!@1DUSIUmv4isxJ<%>}cpLg6XK(p}a(UlYZr+jpl^grzp
zUi@xrQbK*{dM$y!W+9*c+?@0Ox3l4d;`zpoeWsBe>xv%y`*fw{-Om>>j#Df?NT>4&
zHpsB=yPUDX<71=WoR^dP=iFUzr1gC61C#UWD+Cqh7Him*J@~#{QlQ~`Y{mbxN3?{Q
z9)v{RvGkC#FI{27J~`0##ewg4wb$6!JQuh+;q{++Z`11e!~e10kg2|1aJk3eW@EOE
z!;7WXenFz|?q!{2_`a)7f$frd@vHN#r~Yg|bt2d{a^0TSp|1CqJyF)+iP5kM@n7dO
z(_Y^F)bDMUp>LliJvgxJqnO>~LcTk<*Jt?nvOHu<u4k`%Z0EeMyuSU^+>0|-)!j}p
zeYLrL!?On~H3N4XKfH0h_xt^MKG$l}C(G(|2g~a5v-1cY;^eEj&NBO!#@$W3<}(zY
zw+MCM3p;#MT_f-Tb5n&i=lW~*?mTl;Dz?qa`>{*n#_F@(Kh-#2`#~1SV@?eTsQ%*@
W6YY;!_vdZ|NY>NU&t;ucLK6V&%!alA

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/gboard_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/gboard_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..f1a93ab76307168eff28fdb08d4780b2e2cf5ff8
GIT binary patch
literal 20159
zcmeAS@N?(olHy`uVBq!ia0y~yVE75b9Bd2>3{U+hPheo+T9@hU9N_8ftPoI?pO%@E
z%D~{!IdvL)$mEk^$DjYcB&}PbxzPNjZo?O;T_qa?CowE+nIPbzqoJg=L(FA~ruHt8
zzyjTdHLQY14=F_)*r(9lwMkFR#r0|J0p&OQ%I{S_|9gJso%=g?7C%2%WBfdZ;Y9z;
znmR5WRtGJ8XMsm=-MYK_7CxNE$T5LIP=iTZ&HF?-FW)!zj%RQ0f88kG=~i;QLhbtg
z|NrAv&pzXbWN<jhaeQOznvfQM<;BfM#Zwp>PC5OFT5bHpIHq&M8U{^)hr-5l435t=
zmN7s6Jw5r5(;Y!MW(NN>BUVm^33iHgQEJ+ZC!NDwZuA^~_DSHI<BiU#CsLI6@IE}c
za%HA*&lQF*kN-_lf0%Fi_=x4qGc1PZ-=>CD@UNRN|4_{5db#8O!?L<E>UeciCJ3|I
zDRxYqVY&T@iM`?HM@-GnW}cqcJO8Q6ME@T)n_J!)em;{{^`rCoGL~QGi*2rVE1sKt
zkYjebV^oKQz55GEtMfhw4@E6{QV{-emrR>QdRbD$6PxrRWv1tmGh|+E7kI4qjoU@>
z(X7X;agS`5asA(<ujec?f1<=Q<$KD-3(firykeMVKYpnB?en3dhJSd>3+w8<HdxG?
z{7Cm3x3TllV&e&?w;7m6)Yy6Y+<#EXW~U%=!GUK3OIicB1Dm>b^0I>OCjX=u%@qxs
zpLOk#;p;t^JX2>`<cyh@8D~_s7tjCs_wUW`tS29f*QAOuRD>z`eQWCf$I_5|V(zq;
z@gFkF869S_JUCQ6&0~V)bLaDa5++>N<xFW53UGcm(awNP>;RhugZKl^8b<*R=8Flc
z35_ljxL6WQBiPLvYXUgRnrj!xgft#rz$e45?#OMyuGhHwf!Yh!?+26(INT0o8}QjR
zX>v3b2(T`4@|s|^gjv#2OhC4%$wskWgQcr2X(3Aod*_1E3u!9^w3<#WbPllnVq7A&
zizT-G*8=tn0w!F0oA({in89=U(5el3Ib4?$!*B5OHHRPIEU=ox`~0Bw!=N2fc5LQ{
zr$3ne!L~x6j@N$yLs!cr_sj)7fhAL{wKznZyWBYgqyt!22!)ug61>{>%IU5n@1kP?
z^Djo2i26E9FP6Pv_oBdrdv4#ohI@^A?$;O2zBu=V^$W!>7Qd|9ngS0>B)&1^@exsD
z+$=S@U-RIVgmVe&65b`c8OU#pSYs2$pWXfSpj-0f4cQy5H?VK;y-}7UUnXJQc=+Lz
z0yhiy7=gad(+8eDK2>nMK)i5!q4Ey@3ZXdJ{KoHxjz6gU5mUqYPvalcKeK<VbzKUs
z7Zh&@H*w?~k~*Y!NYF|8p;%&zf$J2tSAw#giAPi(sU>M{oLJHNW5$npE=rk`-gtO>
z#$0j>(tOGCQYy3K%S;_-GnL(wr%!f$GWSVTk@Zi;pSG<|7nkk`NYap;GIvVVsnk=!
zr_?!DwREyLH|b7P4ph9TQyBfISH&^N{ZeSr(v)o}oli7RMo&r(V$qD%EZ1Cig>5C*
z%FRK{LBf|pGK*g-ybO6Md&zz2dhh*H4xW5*s^Zj!)K4jVr(>o`E?cxBDx_Sid)LgW
zma0}OrS7R$C#RlJJ1KTbZu;AavePF|+&rm!!s$t?Cs^y7YxGaLJ~jQs@)PeT*=yd{
z|L^J;dN8zc@rKxife*JGTJlKARot~NsWEBtBip0;UM8ykOH-EGdg*%kPq}!i?UeV_
zaBcaJeT!_id6Z2&taNQrldHP8ZAfD%PpDJKwN)}<FT-XAga&^Nd%Aks%D6Du;K|{|
zf!;ydVY?TYt?~;k4|={ze06+Cz3b$~J1+%gEZyRFYe!pGTYsC!;|Xs4$EGZsm$oBq
zUE1Vl!n%vZVzuK|N33?vj){)7cBr0P*8I&`wuvuRJurR0-}G}!*3HXX@-FPI*xl(>
zT~$1LbM~_BJ^o9iHvMnO->kp8*&>*CIc_@0*c|74<H9Y2iyIDom?oH$l6=I}D&fj@
ziOo0i4D$=WK4MJXv~hLevcl$%6FT>F?)O~h*?m$vIQO!|W!q)3%kH0Db2jKq-wa!$
z>4x7;&zt5O*QY0D9Ne^G!;WmvjbApM*_gAnCOb9d=tirI()+WrWE1vY-F5ia?B2D#
z2bNt-yApQW?Ao?VWes}X{L9`ih~Iqp#?G5hZ_M5NJ10MReq#NlfBx*r?J4bx`Shp1
z(0!<5xhdsF;N^)gD^3(%=zJ#f`pe?W3pTeIU*37^=gjVD+_#q=d)m6XF?-T$!|UdG
zX?aZBjq@hI`}Xej-P^VSa`)s|<!t8}&3icS={*0vCVPeUNbNbcckSNmRoj0j)cmRW
z_JhYDL{i7iHu=u=Yvp_Mm&@POzq0<$`<eR}?mw_!mx1pC?+xh>76IxL#BT_(G)XvC
zH_u~v&AOcZ|0a>$CKZ`SFFjX$ruf=vm($9HtDXAW>UfH|Z;O7@h_R0mxhJS7xzT2k
zU6Roy<xLtz2B%E2YOkElc$$${lFg&1rnF2gEotWSO|_@prbX+YkH5FyDc|*4*K6&m
z;mg+8?Dr}XExx)k!Y3!?&BYt*&nTps7Ay4$hD(~CD@uQxzBg_6hVGk-W!(J)pS$i!
zt_l2en&o)QUe3jx=Q>q8lVi&BEWhVY@(&8Vlu{H>l>4c~E5$2V%g)s7tB!x$^fRx0
zg1t|By?!kH@$=8-m)~CgzI2?mu1QK!B}Osl>;zY-d3h7}Wprm~T-MyI^!f6uj;qTz
zE!kWVc{iKqdQSXjrQ6!Smv4Qsb<I}U*Cy9;qs+J5EB(8W_d?qaURld1%SuaS>AmLH
za+UJD?tRH#7nv9MuKn!9*|$H1d^UT=oB#3Nr3v~I^-sv(c(q|`mUNYLdGz_R({EF6
z?-$>|ohLF&bec%r-GjFZAG3d!_o|<9Pjdg}^yz2UHSgZIyK?vM?-uV@YERNTr6+Yy
zdc*deGs0p5{v_UCReRlgjn}Gg!r!~U@lX3TecQCm>2LLZN6m|<TYqTnqLo2Qi>jYK
zVf@y3n?07#ENo?%XjtvfiN7EJb-vd=Yjx>rZ@y&ybh)}Zl}8@e{lCnAl|3b!D|_G7
zPg(4*onBeJ&VBv5f0fMLs|@BHQf~da_SdTKJg&%IT=;mQ+RtT&gdeoLII?<va{W8|
zm*txti~J3FoH_UM)bthUT(cHFGd=rQw^O%y?ZvgV>9yy}wpebN`}63N>9>ldo@<>u
zJ@<ZTV};`TLtl=*=@x$b=i5ea&gpH|VWl>^WJ-VLJ->8ro_1aMy4bI~s&<~<`udUS
z@$Bd8ZpYhKH`gtGy6o)PXL+yR8Sjt!U$yM`vgJRgy<K}PZuh-y`&4Vi^Q`ahD>?r7
z+|}JZyUpLT-h2I;|NVaZ4K)cL?0)|{&92FR&+bf(#n+6-$!8unsR#Lc&0A%&_TSkD
z+m|`cKKR*L&iU?nr}?&0X}tUY#s2txed3<yQ=Y5pUx~l6e@jJW-j(bv->+^8S6=^f
z{qkd<{!jR~_*3t{*_Z2g{kdr?XKME;>_geFyM5oi-e=oK)V`R(F|Xp6;n&AkQVyyw
z^)H?Oa{lgpE_IcLTYqW4%YJFzoS%FD?>_r^eskwNGiE(=GmN2?$HGpop?s^yd;wmD
zt>O%iHtn>l47(7MZ}0uU{$q^3t+q6Sl%VhvriM~$hUEgQn<Kc4ngscZ88<9lTJb0N
zP=$B*&X^@)OH4Iq&a3PH(r&@bu#eZr-+!x*fBkC5u38R<-^X|Ut8tt4*h4zr%(=YC
zFPG*2qI!n#elBCnKN5Ki42*L#Ln2Bde0{8v^K<nQQ}UBi^(t};7(l?L!m1*-AUCxn
zQK2F?C$HG5!d3~a!YZ%W3M8zrqySb@l5MLL;TxdfoL`ixV5(=LXP{)qrJ$f-Q<Rcs
z73AUu)n1g6W~-D@Qc_^0uU}qXu2*iXmtT~wZ)j<0sc&GUZ)Bufl#-@fT$xvrSfQI&
ztPC*(W{yj0adJ^+K}lwQo&w0+#H9Sv5?duDkVOhG0C8t(MJB?gyke-glJj%*K{EP=
zdItJ%vq8p#M6CicD^hbJTrzW0^NKU`^X!aFEN#H*F=P-n+8}gB;?#*G1J!Ax5Apz#
zhajN^77cQ7v*WVS2Zt*tjP1D6xPD$_U~p3Qba4!+xb=4L%W|3UvIjqR>nkZ6Hy-g~
zigEQ?G%d(eYwD6or?&2h(c69J-N%yO&fmBH{QT_lmzkEfcInIB=2gF0w|C1_m8{2p
zUECf??(ZfFetcKcae~)p%}fWih8@Rs91NzNsd)eW-KTd-t5z+U<fU~y%;y(Jkf&;^
z&D4+}Pep&#sUblpf<0BIihK&eB8;T0uVu$e<r{NXXl1f}$ms4+P<q+yp?F{V#<g2w
zcdp&~vasicRVT->BW)`U)mZIXYpOS16LJmY{@|xOFYEKgtB>a1ig78BVLx8zXY)q$
z$!rrR5m(&<){6d5UmU!*<;biJ%O@;v*ltoG(WZIFy+U@j{Oq(E;XjkPs<?it@!q@g
zXr+w&gTn?j$Kw4J?He1|9WRKcct5RJn_rx3Jo8NF<rnI5McKNE65DD%g!HMuID7cE
zMDe37J3YnYga7|#SzDg&VA}epCF7%Cu-??kjFlp;norbEhFfQH&)b!*{yxF};)LbP
zew}3ftK#@>-ji0p^{4%wJ-iq5KV<TJhWk2KAE@6px7Q52)5*7IsVmRRUCU?L#J&jW
z`}k}5gyoaGv^1ZrTr$avt5R#_5~CE))ff8KUzV(2aQXY4YL!UFOVJ%7pF-T2Us=ph
zSrKK@bzwmxuawJGslcs0UtecDSe$s{_w&0vYO{|%IaYA&n88h(W1kJyajU)DuxP=o
zXJShxEH8Q?6}?t<RTziaHO}kPIm5#r^-E{FJXP8j=Gts!#-Y$v<^0Z~WBSnw%Q=56
zmKk(f7ju<AEBRCSOC&E*NPWKhkp+eyUrK=tN=R@Q4BYCoVUNe-lgdp;S*1?<bB2p|
znqBMAGQ7jWxPtYrC8NiYBm3{G9O&<T?Bny$XRcCl*1R(>jVsdiAI#m^VO`CoK0ABw
zJdq!QclD~6F7FGOYQV#J_(O*1ijD=>l3te{`kMNumEA<{Ua!609rargM}L``WHk70
z+4^RE*{-iXLYHIa@H|SL_($U1%F0dR^Nw5BcfMIFe)3%Wfw^;B?)dQh>9F?~JXjhV
zbKO?3S1$NOKg$!QEY8)UO-q+wk-xLF?zz-g>+2lz?OdAKq*`ko_gcRY{v5+%$Tvy;
z`0hVOKd0~W{?{6QB5p^EwQcvw3e6`VxVueSb=6glY4d~j7Wcl|y81`?I>EJi$E2ct
zvLDNRGww-#-*r;F;Cx)t8|(YYZ>;Ytf4{@MV{&lE<7E?;Pgp)7|F_lQ2?fc$r`G0c
z)c<mSWxoH&I{o8O(QeCIC$3^$JtIc$`=8Wf_p9a~nr*L8{mydEpNQM({)!-&-~ILV
zm3McS{kWLFGVX6f!BayQ!6ctW58XfRj0$l5^f)EHrngi6A8YIN`)bC;R+V=_&eeRv
z;PO;STK@06NBRHRd)v2PXuH3Q*<Q(Cg7wbK%47Tfr~CZ<GQB5uACodu6;~zKPlIy#
zf0vKz{D1u4^XudIWN+z@Y&MH#2ds59lH4O6m-;OKpYY`Tz3nH{E-IS8@=)|wls}i<
z<#ft?{pMf%|E>ScU4L<bqp{)F4*Q$}nL9tZeg1sOpZW9jepO?;z9?Tse?@!63Ecvr
zr$p?3Pw1`x$u9Lh|Io&4&!q`6b52SgyZ2ji`M+=W6KBqom5Nd4{j`NE_k_^8^_%{+
z*C)UI|JOe0K=sl&a<#Ecfi7G2`MmA!&sd|Um|1pbq35h?RxX)qbOQgpFrQNQx4*?-
z{;1TSh%8Z7#`ISr!u8*{i_*`@U;aMRX`WYmr`#Hyz+XSgr_}wn=a^@clR8P(`96nB
zLE7_~KThh;{Pj}5kbCk=IS)nqdkyW<ucz4kpYZ7Y|JDa5|32pb@p7}zpC9XI{yG`|
zaPBN`?#Q4M{pJq>m-;+??XOw;!Jbp@u5F2M?Q<;?Ntp-79-N!I^uxdNGyk28e|&bX
zFQY%m$m|OXs(woy<*#e4|6V1NV(oDt`sY)XdiO^)Mb}+>nERMlWr!RN%?^~1w^}>@
z_H)@<;SWDQi`@Gy8EpSEd(Tgi4<er)>#h3HFBkCdhxwKKI!@mI4;b>x@(N>A9xMJV
zoO4LUFHvZ+{P8EBB=ouC#MT`BWyivsv(_m@S8%G?hN(u^);mh5q^{o0vTy0Z^II9t
z&)Yj;T|L)N&JTyLU)QMp#l9-Owrj1(<7YhV1rt_qJbGHdHF?MK^Al!TyQlrLnQ1J3
z;>@{+B5DTn+IQ$Zs#+i>&5|LpsBZ3t9e&9l4G%tIdUz?h>FDv6Z0{RUtcMIvo}KuX
z(Y|)tq2Kivc`H^O4L;GoeWqN%zh_!g&)X}+#LCz_kb2(beN5T7SnB;F?v(X=+}`~R
zKQVWAhqY<%$&_0QRPW#aYBX`iU6*M0uTlqJ8GUX4zT*C`wqJ9zS^VE6Yfrtg=Kh+i
zlNDxHzv5|?|7XK$)A~onQ18lon<alfuIH4y-#^)bEo<5FTbDI`Ce=Q)7P<G6GkN{K
z#_}q;$sZpD2}+xn-TAjYBiuOraq7vr^?h%4c8l!$UHgjvec--dhF4e13)<}LXRC9+
zmHVgp+#8uz`TwH9_W!gueSRKvAozrTn^^xV)2{h{=KY$RAF$sp;DBiF*6%yLMe6=|
zFT3|&^WGQDBNA7lu6&zld!6S5<GC9at>O0->VI-wHNThoIQ+lI*KYoeQ+p)}L+`x2
z?DOZ#eZ#-c_aCvYRco^bdE|z19?#bN_W^(3@Zad&+IVf&3azv~4*z=PjXs;#pZ_s6
z{B{AK)py&O|1$UJ2;Z-4{&UoO&A)G|UH|{Yb556kEV69okJJB6*=o%8vimFA2S)Aj
zdbv1WP)CpFgPjfAxw&%}e|Wq<@tb(Pd)l`z3mccNvUn^!=ilo6hiA`C|NH;<e#z?3
zw#(|jOixMkTi&pI!h99OjIYu@U!r@$l~&FE%VSV^?Y~%a`A4J4_P?W-)%_^G)A@2?
zgX+5DPj0`TxciIEyCW)U=F{{lxqfmAM>~7}cY1hK^~kjqF`qMZQWYlE&VIY<LF|q*
zSK|N3PJaL2^4$>;W(&)2wGTRGADnGJy|2Mf(Y~;(s>k82*|sm1e(TQt5IriJ9k4I&
zqkQF!T{n$itP=bFrr77ti}f=qz8~j&u=v5$B_Kz-Tx5v;#j#euDQeD~3DtgvJ12NK
zuDpL*K<ut9<GV9wf-Al+_u2D1+351-m&Sgotu-BGR#IyOrrNpX?p?aU*X_i-Kp(Rs
zvf-<?PdT=UdsnksNO#KTC;NZbs@?k+81tl%W!aP^lbq^md`o#R{@R}Lw5<HQFsq?c
znww!<%Sr94x4Zryajo6l`<#2{X???w-}+C)*2sBwd4bwOH5O%7S+zM&wK=@LaCLQZ
zylXM~w&<v2_Jf|ttnbD*x-L6+M$cI(d2Ii8yPw+k4?Zc<e6VuK1b*qFMM~R>M2{Z(
z`M`F+Z-LE;_<%Vz9VfN7-j+PHh<Tf2ZU62$Y3ui#yRrKI;kk4C<ny&OpBzrNagy%N
zc&oRz>do7pYWEM%4xDVaNS^k}SO42(<<7}|%hDvO|H|&!EFNTRJGIvP*4okq|0)`;
zy>||{6V+}1J9r1{jEb{HKlA3^`N=kEYr~`1x|tir<Bv*eFKJjlDeKe3hC9D*{PNv!
za9;hjl!P~%9*Na-&5!=$Zh0)Oa_*UxmPZWP*>>7|KKY{3;pN9^PIgL37c-duRCn6%
zD_8b(IdP8L*gh*D*24SY*Xf1Fj`N3fzc!w~WRlaoQ)0giS~jp8jM|^h`fi57-+SIW
z(*2L``@!gWwp~5j>JoD_i^S=JceXrOaxRtYXTe#o?l#e!koVEswG<wQADFAcH~Ynb
zgPkH`VN?Ep(q3We_I;xjTb#w}KY#mMXENQm^2kF`Uh1*bqKi^4Q}@ZlupT+9yz_L&
z6Fxs%Efc52r5P)KEq*yk`{S0!HDMap6P8VQw^`&<qSREW1rNDeuG%#Q9+~ZKlzwmr
z`~OYW2jv}4g)V%hEo!gZa&!HXxN`^0QdqV%-P%z9&FJz2KbcdXI2KDs<csaNDjkr$
zq^yeT=ZCjncPyWkC4VF1&*XP3N^$KMt0v9YHep)9{<7zY$S1*@6aKz;cAV4MY<{(W
zf{PiO?OAp&MSDB%B(5b7C*@fm12qBc!=$6{2!5N=zWi+PvEUQ^{qHuJDtI~itG3oW
zabaVr3}NB*EWO?!pUYLL_4ln4qoH+K@vTg4ZK>tQ`5tJ!v*>)i<l<M6Pl6RwlJ=y`
zNSrTaq0_i_)9hJaCNDa+ISy3)Uoc<PJwwwV?!Mc$Ht&obQtCIBG(46%VexXYhobx{
zjSlX2Iu?&>Gm9Jp>SbT+FOZVCwp#ur@2s;9=O&xDY>W?AuVra2IvIOpuG<}^TM;k)
zOEX!%-Y(qOeNtZI&a9e6?B8O3U4LTfFL8QVwN7h|NJ-yp_VP9U8dEMDTKtv$%-Jat
z<^G9mZ+p~S(s@LVv0Rs2^IE>&xJ{wOZAGo>Mve*R56zf<Og-pCd)_+fBQxFh7#}cy
zqn)AVb&{)oRnNz-Zms)g#6H`4hqcJhY^9OB%wsl-dai%l;|)G66I2RV$sK<u^k0c_
zTdn=`DaZP!ew<@8&)4>}$frPqn{y3S%2_Agf2W(F))gATCG#&}=A*FMS&KsR*4}yc
z=nHqsaX<M3-K`sS_EhA3*td?uCboI~duNHS>6@(C)Q%}k_l^A|@@e9R8@4{*xF$|^
zJ$&k&)wZrn8$RSJA6DNgnRiwF<J$Jn)7!JRN9gQ77?LM-K80=7+1UoW*i#s%XP=7F
zwA<q&Uw^nXp(^G?W#+po`MwmbX5%Jfk9SOyY<Fu1#|D}y9ZuhBSQk6*v0Lq|S-tBY
zxU@{YB%G%kzdRz|-s&1pq2bBLTO($FFK+WFvbVGMy><WXimamCzvmVI-(}`jl|FGs
zDfxcmsk^z39D&oErmuhfxF&XXilM~uB&!7iQi*e;Pv0-7e8#G_X>tnto7wUrPhY1#
zeP|@I`tQtFZVM)eKU>b9cQ$19@wbU~iz>YOWZX}zZ8ZpzO3XQTRVpF$rLp*y>lZKH
z6S)<0V8`?TnUt;>Qo+ZHtsdxTInF8cdC2?bRQ-LUMWNvp5*0O(vmZ>eKUp;+dt3L7
zB_~^sUP@l4TX*#3$I5g5aW}qRXj}L}_X5Yme?m4fO$KlHi>`P6-ZgjDlt!}xkp=JE
z*XXi`WS``oE>_1rFG`(hUbe(}>0`%bHc2+W_KMFtdqI(3dBv3}TQ{X{U6K=`^E!2D
zP`~Ac#w}49vky#&xxHdh*v2eJ-D_Hb5{Vo3d|Y(+w0P_%=C@O4%y{!NT%nrnmH)1g
z35%wzc^IdBT70YLyC1?ouHF6?+q&@l+o#!Q#LoqNP@8eew|9$D{JnrYyQZaE|E&mN
z)n|U5mS|(1(Cxn<g6qeZ*Ieb|QW{N$Y=X@lB@Xuw)x}$fnAg7fGI4K@L1e*<xkZ+y
zQxogV4!@aly5b(spHp8P6t|n@9r|8!w&;E5)7#4hyI)TWx+)Zz#B2A#`oS`_>rdt%
zjp~l-b`uN~N$jw&eOmZ)jiUXF4J{Hq<&42U<gUdt@n(q4TjR!e{dMKO6}}q<L^!VA
zEd1Odac#AC&DST!%RU*`l>g)XlvnrUOqt@l$90bd??iU2-BLF#Oy%A7e|Ku;T(Wu*
zcIIl;iM_2-@pslNl;|l;I?=20l|486Rl=P^H@d#q9DcLp^wG-x>3UBqp9W3aIg=;y
zJP%JVbF)oX!qzo+Y9u~By*pvIzs}Jv-vimQ%(q`X-Mh~-_ur#so%0;laXvBY+7Q^1
ztR&SaH(|c|1HT78`?}ZFH{2>^O0X>18KwOpG`DMO0ngp$hig~aMD0KRoLk=bo#+mu
z-$Lt78hB0jnt151>eJKJ8@3oW^u9c>^5hS_{WqPeCa_Nm{UALp>+CUe1~!4C5{wtG
zJ$v02!_M&i*=4uL$nAG3B|co0oqs+4%*T7utZ}<$EH!v`WPP@XV!u+voeecr#*ZGq
z7VOSeoOso&rTW9_?@YU1ZMNH~wYJ%H@)uv*3mx%uSd<^XcYLV&=j-Y_>kqqM^?dg$
z_{uiZ^V{!DbG-84+N0|!J?`gD78d>z>g#bk5h*%vou+&Z^LM$|`zFU*)yLa$Pk+`r
zvq|Q`)tOtqHT!;>b@d(Jwq5QAzVlcbieEc)S@iTNw-e3F75D$y%yhrHvS($Ju)NJ;
zmYtyyw>JFwxOzhW`t~)`7TF%y`<40j*=2&6b^pJ)|35fgN9)nqgHO)y*th4v`%{M|
zAG4TfRh#2}kU#rQ#2=ySCqD@Pv=m>am(a$ui(}2S2jAnh<L|4>T~YY^|J=&L9W`u4
za>};HGB>m@6=D%Ie}8?&PeI28dw$<7T&8~KU!Er0UnlcZr;cA&w%aMWF=|sS<37o6
zXa8^ODd}nI*K#uW7`*P2o_~|1)7$-FK?19Kh2nK;j_kklEB{XP{_aJ!KYRan2;R^*
zyuU8(=Labf?dHhmIc$1!#O7Joia5Ug|2tHdf4<p|=lrK?DtG%Wy>?&IJm7@%!lJK!
zN0x7Q3hNeJ_3eOs#`XOZqjFAH<n@K0|2yO3n<EGO6dXS^OI@E|ckch^um6ue@3&X(
zIplox#YW5QialC?4$DumuVY^N>VZ!4P0os!Ha_AhEL(Q)xmK@;+E*L%JbeC?9bY&2
z3B1a1_?K|;)78Bbo_^)9UUIVJ?CQ<inD711++FXRUh(!zwvG93p?TIjT*MqZ7ENf~
zy8F*%^Z!%pcm2E{u}bII2iXN-`Tzd4@&2fJX!ieUxpz&K_sZW!2I3lz4;6nt<o&!q
zZrW{yEfv#WXHT`}e;u<oFf}vx|Jws>&nB}A-l@rmjM!*$=;55Y+jEbuy<XfQ+8w65
z`Df(5@^`Uer~E5-eH1v#_lDKQENSbfDg5vMO7mZP_u*UX_sY_D);>>{6&~qi?yFtb
ztkA)+%FA}m=76XNpO4Qk`1{dZC;U0bne%~_j8|S)pACKZMPyG6W170^*--bD*Ob@Y
zU;X*iQk&14XYSCch?rm>xA9J;%>BmfT~@idc|7{_t6JS(f7kr&>h*PNW7^u4TA};T
z?0x!rO{mz;jbC{6=hjU<effNGRZZWV+1a)C^BVHgYL2d)?j*q~^4e9btAEGts_xy|
z<`Fu&0r??{zeVzwxdi+(-1d!4%6G&4-+EUq{QtHVADa64<J})0wqJZ_dT7DcisfeY
zyMzi}JI)o?tC(XsRg&+?st+llk$SiG)wCy9imf)9IQK#I^LuGae@&6@j=#k5w{iLM
zHnW7Sn_O<{Z`@EfeP`$AIWsNJ-|YN6M{`c9mc8%2vyW$;*|w}S!}(RO)Kj;aPhWBQ
zpYD&;(cKVIbtWsdGb=!GQ(}?C4~h4DGdr~=ZgvZ)WtTp~d~Tkjcq8}nmiI9&!NP|#
zzN*;1-5)r?>+YrvG8;At^74OjTVN9zysA`o<?Ft{jT$Tu|2$#6XY*a}=q)z>>+e+@
zJtbe<XZUbf>hx2q*IPs1t-qJ1WPa4=pGfdeNv@lo$M!P)JR>ph@x;PS4M+K<Sppm6
z!&b70o~rBIwEv)6Zg1}|tFH^31k5^2yqfZ4*GD_`Y-d>3XCd7(RX#y_y5r_Is+B3r
z81C=o(VsUpqMJXm`MqxB%Y@qU?5S6KJC<&p;I^Pb(z516_P$Mf<kBbhmSpZP(OWEY
zOFLBL{Ktx)S5~jN9=4KAH0+c=-x~);jzesYFM>asM`-Cju#%HJez<s3g`e2<!pUna
zy#-ob%$5k6a?JXl)2zW2ZLV_3<YBd4NO$4%i68wBXbOkkJNQd~@x>?WnsfEzMdEg=
z`1H!BP4U@&Q;A)8&&~dOfeRuyBX4qiloreHUz^Ro_@wjFkkCUfGG@Hmbyi7U<;CQk
zoN@NQ#iqK=JTG@u^IF(MX04a~i{DIQdEs<OV~Jm>__7rxfB9}+y&5x#Da*@Zr@mmY
zN|9MdR{nyAG9Oj@@-mmj&l114OKFb1sQZ<#0@vc%Q(nqW+Zgd(_i2{L2L5AgOqELn
zqK@`yJl+(Ut?jJ&L7?U5$I5$F|Mj*mI2GP+zwp)y8K<8wo@wh@?%1FF%Hrnjj$pSr
zLV;g@-*`}S{>5&l;@}VGPJei`(}MHag*Do5Wm9f<|Fg?5uHAf6&h1oPkpG>7I=SWh
z-e+XJe;2;I`qWbSIc$YT9v*s9yr=kK`;WxA(%ta^{8`Ge>ieGGRS;C$+|Uy)m_B#I
zFXk<EtXE^Mx6PMnFOL(tR}nWu#`@0G3{kI_d11b7%MP*^>dBv$%ind>IBVI~)yEDV
z%HVj{x+taR^wXKoJ55wBbwqc^U*vdQFvo%YNbGUR-z<gUoM-zte!ZS#Z>#B0`I>9b
z_f-X1Mb^##63jdVTv!&~y?+1B^mtnr?d!5C5muWHY=56Oo3n6fl*`@xdoNEfY@S!T
zDQ0G^rDs%Qw0FFqjP`}u&vy4u-rZQ(#d6Ln@u_W5THE`*3V+`C9-f(5!1wdmnN>k+
z+(KSYyY|Vxc)bR5ac*1f$;WaGfyxV9ciFzPI2;-!nl-U?>Y}9)y_fT4pMTiAdY0gK
z@$`75*J*`wd9Kf|X_LQFCi%w7D=OA-Caclp|1QV0KRx^N|Iq&G-3MmQ+;QS*!0*+Z
zERCs^whJOUH;DBpyR+T++aYQ4;C5K2?5}@WV*j$EH>+H1zPf1B7O|l3Z!hqvyB7AI
zF1|ed=lAvh*zfK9@;t@3vTr%Z&pDsIm1qBHw+}vd?%2Tti8jBIR3DryYA(8(_rAbm
z`!lPov|kSt??2*|`#0N%=gYF6s&6+WeD*%ZF;&YZh9^>=-8pyZ6`$QR<mW%XzyDYH
zpGQ~!pZM}4=967to#Ixt_lqaYezx~{{lWY7-|dT68CdSQzId9uf3{84tp#!S7fpQL
zkf-G4`g(W!=LG)ZZ6)RJO4j>iF-i&Esg(Kj=rP;o$$CF?(pKDSV@jS``0vlp`Ni7`
z`tINPZT@b3%yPjiyWj2U7keOf{iB6>Y3FzSzdhT}?LAtt%wUU;3uBk){Os~<n@GM+
zoepKsMdv>nU*;G6be2C;!j>=TazK=$m|$Wh@4EZ>1@E>TUTVztn5Uj&+w+u)(s!oI
ziZ9pv`jNb^>XYQ^r|K?&6)rWUxsmF@KZExe{bze0Q!)Rv@%hJ=_X5@~GvL|zHMccC
z@49B_B*$!7X%)YJk~|NZ?M1%TU*Lbr_4Y!eIAgDPfJ=ZNi?i~EJ2eTGwxP=>h^H+t
zk}$5h5c{|&?~~2roAZk^Usc-G|KThT&kI<!c43x#*yhzy0*bRGA3yrSy{Gb>Y~0M>
ze4BsXRB0`%NLoChbtzZ<o~VZxgFI4HU!Kvr?Q+dseO-x)$<GT7{!;so&MBXf=6UK_
z)XAc+VJ9Nio%<f&xUy5`+y%#QnZg<8d?(MZ?%jFF=DXkCLtmHXSL~>356ZP&$099w
zV>Y|Lo%O++E1!c(!969fM9=xmN!Bs8+wv{Mw{y{irBk`~|Gmt1|IWviS2L$y{-pE3
zNGocO_PUZ5#ry9b^5o>J%-VC;(B5wEQI0?ro47!`pK1p>K0Wh36{6T~{bJ$W-&_%n
z=gt=9J(3CDsd>&f?#aPlw(r*7dv@`d?X}gbTg(<z-qbF#i1M%wI_kpncxUE0-?#@q
zZ%(hM`^J^7e$IH}+ykQJI~;vOn+y_4*B)DMulea?c<D9ot|WovTWVF+-aUOY?G7{-
zE-}8x#=7^ywMQP8w%EN~n|nl{<<#Hhf{9laJk!pP+j!>uvlA&s&exr0EeY*Q=Zcvp
zmz-BK@3gW0ryXA=xWDF)(6CEb`AKAdpz<chRe?-@6t?c!Hp{O=Lgx6<Qk!L;jOWa+
z=;OXTeTTtMnc`MspEUJ?-rIAMjcq>PiF&o~eDm>8yFZK0sK;NAxXaLea7I}$kCnY$
zvD>Kv2KhDKb$>2JCBL@mTWK#GnV<FHh`u{d(xXL}-B#FCWUag^Jw;3Ke*Mv+mpOdr
zKO3a|VAOXtOwvf<?x}t}XST)hsln?vMbsUdd`WqlSNEEb=^EF<SDLM237mVvEkb%L
zv-j>>JKi1?=|~XhIXR)wrf8-i*G<o~<9>y!435`4w0X|;e5UMd)#uL)9xZU5_g3h;
zLBB?5O<ryEvu6EMF?$~duWOK4ZmQ^7#dhnK%F7Mrh5o1989p)V3%)58Uv_t$_w~})
z^*P<oH2Yp&F#FhAC!y&dK6RFS(8_BWd-8VH#7Um7R2HlZzonaLtA6^<51Hl1{XSn>
z86L5I?dG89#HE{#MCHy*%I)10zeFr#WqawL4SoWq9JvlxSn4NR98;al@OEcpQC?q+
z+W8&k2VL@Jz5JtN{4>(8cWzo*&a{nZBJQY$%{Kh1xV<VsmB;y-*{hh{!O!^TPv2R#
z-ElADu}(&FHO?KXj^>7!61SC~ak#B@yRv)T0rt#g-?RI7y0d;*t21Nc9)o$pG457v
z+9f&LQ<}Ke-ppuPm7shu%5kkgtJB$bFVUL#NPW%uar~Osr^<)uu8-U1Beuz+&$#6C
zUhxl+>ZfORZl0q#N6|twfd6%fl&DwTM6bn8H}!+^L{_qizDn#>$oleU?!3vW&!3%4
zKc-nB+>@>@@qN<yy@j9h=JibWT=!{<X?~tbjCJ>Yj>wpmMPI9C_U~U)@Y~TLgEd)Z
z)4W8T_YYF%RP0Dx?v~$l@9M{8->ujigFZh#xo2*06`xPDd2FX$mvi5mo$DO#3D28d
zY*Bq`!slJpRa>4fyvVpCDRg7l<DD7Tgj}a77_6Dz;@-VM<jCKU)x4*#&KAr*YPEIO
z)2-q<n=f1L>9ICiP|hs>zM)4r`%&i`m2~xU#~#n>RNo!7RpX=ai<h5NpY2RAn5&k1
zy5;JV3o>hcY_gNhUthe;RsU<%5&8WqK9*Q^-S0P)NCx%8jbBeVZ)M>pQ(-K7Yma0@
zaMf9dZ11b#^VYbm$%$wxXvk0UKHzTfVHvZDaoI0t;mX9kTLCA&YZ*-JW9<0&D3V)E
zvOVvpz|9>FhYSsrz3zsGb#a}1v~p|B?;DTIYqU&H&(@DVH|NykuXiSXxV+sc(p>&$
z(}Rv??cF;KZ^YbLtZeSMHCQQoWk}6CJFU+b#Z~S8?mBj=EoqKRuynJ<p*vd+SDZU&
zVR*;)fbZ*ddy0>L^Xn3PWE-w8aVm7FAWz1=zh(`mHJt8o|7dQyTUwTN;QqrctGBJ6
z;bX&iL}Trnzdf%fd=xPhOnRgr{mAHqy7sjCzdH_AMOMDwEhv7z&v4pjH?c2ouRq>V
zayj&n#-fO>zwhtYMm>3b`=jiR!by>jk7Z^S>p$MIrJTp?B+KQy)2Cz|n)i51{hIvX
zTJ;0b8yf}BS6G|8-Pg>nFYCW{>j}=M$1XmZV*X&u`m4Dj`dJ&3e!flT?V8#lzc+Jd
zk)eO0O}jA@<Ic$%i4vQ(#XY{9?OOM_aHciCbi~vO+gVaEDnD<u-ByWz&sH-(tZ4sQ
z4!($H{<G@W8QuBHtJ>DS-qP2(T&c@2Av#wid2L=#?_`w_q4Oq&zjIqA&uE)}ze6MR
z)t@K2PrqtQrJu0O^SZl`>0UzS)>HX*Hy`lN4~>ZF3^4U{&~2OL(tP<(6#IkR&o(zF
z$FgeezVO0o!P%X{4{j|y*#F7)r{nA|xjSYb6!AUaKW}B{-=gM&670z`SE8;=P&w`5
zd2o3`N4v$n%9NE?Uab*RzE`W8RA*){)M<P?HbnIP_n&18kL<78aPaDDJL%NfDf+Xv
z@!wl`^6?Y(%e(Bde?7RB<W&_>&7K?bF08pf<3VnKRNaOL+b<{0T3{|7Vmhy0R{q!7
z;Q3|qH4Z3b2$zXoiJG@As;+*k<I1ec_ob(w7FTYXSAJ&KuAaUK>A>~BYf4w2*k2p*
z<k?=ft5q@~6Zt00XWUa7do8d>Xt{&jBHx|-2TF_C?g_4Tvd+1@uQJJe@!Ppldtc68
zt)-N^RzO(xvc9LlIfg9O*&p_Y1^p}0T@teP(_(X7{rF8!Hr>49FJIE2+oLfxyXKqk
z#QNVWjx--<>}}xFy}Ner?1tIr{*+|a?b~`x%ra$1Id{eU^`~v4PT&2z^W9HNd+w^>
z)7^@OPbY<hGj1*1pY6No?W~YZiW}#zGn4yJ@%J`|aw*TLrJtuAs<^XRcj~{hpA|SF
zV%L8<`&Q`xFUOhr^6f2w+mGe_eCW&Dw}l~J=+dwAzR!N1V6K~U=Ih^}`<8t*%b&0Q
zwdSQ<cELf8*`H?meyaaiV{Y0M!6>ZWb0aK(?@!y)lbfC%+s;^*Q2B39YE@9--7Eh0
z_gChv{LMR0FLX!U@*`*SYsJDBFX7vB*(JZZZwBAwn0dL3+gv}MJi)Lpbz;w)gcCO-
z=YDUCkDB#p=H$J_fkJEZCi%Kw?lE6`?$O%Aia|Ft4!>nr+!z16^|{XSWp2w4Jk`j|
z&(@u~>ilAL(RFwAit7(954wEqYWU`mEtkdJeWu^PYqe(SW+i{V>n`^h|D-d7Z?o<A
z7VTMc=zX2l>nkazh4;^tSsWG}%#ifi_T(btrpHIL16R4-sTB$0*U^vLaAtncCrke0
zhBE8ty<%sskZ8-9^6E}~$iriIxw!qMe>5Jk<mpp6{iNi_ec8J5q8VK!*RGvjw`R{m
zwwq<4&$N_FTFWk!Co#u0KeMgcJ>O+_=!Px#f)?p+xO<RcwP#420pE^w0<Ho4I{b3c
zQoU?-&X(r+lJch?^#wGZ)?m7q!LnxE^&P(#oH<)<Di+S9q`f2WvdH$kQMYG=#a3u9
zIu};?{B6+fi~H52-*DSl_}WQj=2f%otzMM=$L!Iv7OjuIm3I;?zMV~e!TR{ftUHCb
zYo8nv5Syk`ci{m46z+7L;MSa~`S*kiyw}QY*b?*b+IG>r`|CG`^)NKQKIUlh_C!?h
zlGR;nc5^1X@5~79elHLzpLxJ~_w}AF{&tTJbT?$UUkl!>v~uRRJ|(RcjDgJp{HHe`
zQT(B@;Wn$K$DQb$vzIuS|Fk{bxp~@5P1V<_LH2u<4}`7#sbjOU;Bnr}YW`ZU(C)`G
z!e$i;7a5uvUN7hT@yy_~AWO{_)1QIH&!acaEbEGqslCs#i$lUSXAWC-?j65V7DrTj
zPCnYv(BUl-Y33zpF0EJ^U-@rOQPr_Y@)O-eoUhz>nYc7$-RFdbXUw;2+f-dTkbmU-
z#ll&cs|ue+$Vgx0+u+Oo;qJez?<=w_C#)@sXgEEwWzY07c|qO#@9y6;^w%xCaJ#PR
z)Ba;eavxpTwsW4zI)zO~etvqJJ!h4C;qyS3yX!wE2^6OJNuEwG^iOc`3+-Ig@il3x
z_5Eo-e}%Okoob+GP`KN4hv72Tot~e>Gu*Ds&YN2D>yE6sR{KjaS(69H(@H+d9o){d
ztRP6}`>m+d&2|^oIrs^GKH`5|MOvTlj%DA=PuopTF3A4ZYp|(M-g3X=-i0>h*McO@
zKiu`MlgWmI>6-bfn$pEb%*$7acn985xRiU1e@58Rh|T|^Gs|x*J#BN}q`Z#%K<M_&
zms_5HpYSJpuKsU^J;l?$>6uj62RFOdm*-Bt9kovLXxF6kTygU6k7V8Xn7nsQN$lKg
z_7}eO>o3>%U#VsZk#;&Nu6A5T#=T<^hcu^~PG6&j&>;h*u9l^y-S;^@NT2#Ft`jV5
zd8eoR&DTm9$M2Cb>k7U`b-pgoHeo8bb1qMpO}%k<q|z#-DXY9}Z}3#~uf6oxC{lOL
zG@Y3}&CkDG|GNENfY;U~i7xh1HQO7uUwYf0`R-TQ1@3^An%l*d!n~%f+5O+yYE!_f
z<9qL}$>X|`vHasc&M!w2w~Jjned?_HsV5O@UOyHK{^;&&7v=qDt4HOAjGezNo}4;5
zL4J>1VO`uuqk9{B_e5u}y!z_SuJ}jwbpcNvCePn6b8p&~RKe1^ydGbn-j*QVrUi2!
zTq;{tX7*d8Vv9)NTJ5+r;pw}~c_Y6sj0pTwu*LVwV&0!>n^%Tx4axeuJ3j1j{idn6
z7ku)$vSI646*~>HQyWtSzfb8l^_cVj-XEJhnatcfK0?`*o1`}Eko&lKyK>wf3pcyz
z)@c*JFZ&<+__Va^zn|Y-<!m@Nn)w+yy;vdCeSGp!`K^cVUwda+W_w#Le?McBz_e>`
zHve|{{>g7v=9TD-tIUixWuae|INf8hwc66>Hrw$!--F58CA0rbZ}K&3_7(GtZmzj4
zy)0&PO{w~#nAM*bo9pK9T~qkz%mnovJeU60?TUH2Sl{f&;pd`aV#;gOO79kimxoOL
z5~8wmQIEZu^TN5h+jcXAm(6{>^43S=FPDELcAwci*XU*mzvUaFh|qc4Ol~HB2-&L4
zas8F-bd}Q?cS_aHb@n$Gt$JHm*wh<*f9c|st4;SCcK7RCjhdSkTlsrx$IO>9GnHc=
z2&wNdsn`(kuz9ts-M>|*PPMsCEaTsH?pFR~%MS;>?D=2k*8JXhX7&As_uFr{yp;bC
z_=>04Md<FYroGvdO&r}FW)*(-wYoQB$;Xxim#Bv0nIAST<5xPpcxgv`tx&o3&x7T<
z&!2yCH!4)?dL$NU{%-2e4_PzU=U<J~zy0Jv_Olu0(pDv91rv3JVyxC3|8*zQUFd^d
zq}7zXwa0mXtqZxKXF5M-gQxn7vt85ParAp;UB3PG<;Io!cclG%aGLGutWW>Sb*G<u
zs@YTNzC^H9s6}!0>(YJuLLVL0KXvENHo@aQJqJ6sSglaH`}f8?);~9GN@TajZpl8d
zRR6&3b*BUku78y{xNqlepV|MmY2KfzwpuLI`Eu&<ZKvgDKii|ocVg{SCA+<HLF=zq
zJU2ak`h4Uvb@%KhZ++3598B|c!himWn(4o9RYAVuf&2}bNuGB?)_<*ZFZ8Tl@+GW6
ze$%&me%<f4&&rW+=sz=K$=*YCHCujdUeH|@x6}0ft;b=)$7WcZ%F~%!?9sy!#-d}v
zHFeh0(<%z#>y8P>JWJbLp&aobY3<SP(s$}(o=#rxRa3V1)Z?W)n!Tk9md83?TkTUK
zd}A-;^hBKx_ZZUmH{^f*{@Z7teAI)t7BfGcmHB?IA;x&OmvwT9ufC1p-QD+!gQnkm
zWAV7-0efNg%B;0={somMi}L(VJ~vXnY;Y;cd*`F#=Z>BH>!<zv<JNnXclMWv+gDt#
zmX!G4z4ZT`V1jfT|GxMG+q;&$=lJ30``j&M)k}qi_F3!>=Nn`cq<+7cJ9W)kxeb#n
zo}VpWDK`1Xm#|4I_O!NpPpcC6{qp^utequ#$0P0?p7HS4%RrGJo!>FLW1oDVKXqr>
z_2ofV`_}2o?!94k%Q#m&L3+tIIj#rQe4E~~efV46Dyey*yqe(z$L!zdS)N2at7iK3
z?0T-xoW|s&P`z0uouRk8_IOVUyv}oC>Z)n-W;*&YA!^5yFYw>4>9^kDvL-L&=Oed?
z@%16k?k#Rm@cNVg(&S?FO<m3(B`%j=&OBJQgfU!F<H4niJ)b;S?y+(H;Sk&S)!@37
z_RTGaW#0ZMa~7H~*`jsZ%*#2qY!CS|IqgjQ`KHQaW6V?ctsU3nP8i$HJYFjMWWv4L
z&CRDDdA0KI*Z4Hs|CG);mGAeL-M%k&`KIGm=kD2k55=l|0&P7T%4;)IR$VAD;cK|a
zAZr%BX{(fbSIN@@of>TxzvRA8+U)sErgNJ3ED83BMUNYohO7==7kTe*Uuv3G|MZhG
zKMcj$Z!pQ}Py2cGxb&ZQm${yHZF`W`Ep~FRw9SRN2e-Aq**ckNM^DV<j>kqi#S<2&
zck9oU+PhPD_Uyfx^QA6Zo?lrycbV4Okci0*Ny?`eAMN=6uX_H}uO0g<ju(9^kvo4{
zPU6M2s8v_3w!feLWBL8HpKqGiWgl4Z<<f%__n2~+Z(L~Ss(8M2`n#j-(`Env`&MvT
zK(T${nT^*N|DUY;cI(=D*Lgqdv-&UZzbUZrF5k2;?iUhO-bp_endzPTdHCvt-lCcN
z>+Bz0vh>!o{=qa|gDX=1+K)$W6YKvbPmKP~Eb>x|f2r^X-&@j47N^hKa^S}5_jj{%
z%Xa_UyKlWL$JDclQ}il!$1S&sK5FK1fZyrPLB@OEZn9-&<*2(Ql$|Q5IyK?+x4<l^
zqe~Zczn(U0|NeuY=4sEFXEbNtG~M)N*`Muvt^1CoShZy3>|&qzsYEp?mHCi?;ZM_Z
zz1z1<wV(b$|BAF={GBe1X{-M{vOQfsuV16*IeXB9OF9OTG3!2E443}%%|CUXj%(p&
z9`{LA@B2KegdFqLcBI%Q?n;=UCDC(vw^~$L;-YrJH8~lRq;~ITuG7&!<IipBZx(B9
zfBRR&wt!n*0V}n$U6%%3@3U%azVcvpzD!BQgzvW$*dNB1=zVZGSn#sv@Jr2ug}=D3
z`1oraYpm{HZ2Nyta?4E5!&ABWTK|}-7k~2E_3p~#Ba*q(maj6JXl`<-BBD80j_d5a
zqP4C0{zaTjJ3f1?G<}sbMJIo6(#*N<@-MB*vN`zVpHr)S-M)kF@nMm%+t(gsTPA7y
z{>#DW1>EbDef%wA<`kbuPZd71ciHq4+p0C^hyHO`X<l->(=xiSr0`rp)};ql(dkQ1
ziajW=t5pk%IWDv=;?=*0jCN9XKjjw6v;3&!=c+t-`pZuPkz+-NT<qp`B$ck8aJYhx
zD>*v*<GBYtc~%o<U(fSBv0S~S#%xhYV92dV<1FFqeV1o>>QrUxa(7R=Uw6-;(R`i8
z^0cCq?}i7D-s;G@`uyNyqxxUF&ZJ+@TC?WnH37$$d(^TS@4c<J?o*sEdaOuop7XY|
z8yIdDR<<0PG2yJsmA`v;XDAsMrk&VvY1hgdr`Aqd%h&KEd!2I6bC&ahuU5T%HlNx2
z;%?I)6W6N0XQ{V8Af5Sq(wEn>yEo)#J1qGdRrXeSo7kq^cCjjU8cD`C%Fd^HYu(sl
z*{ypsp>SjOK{r8d>1om5m1TmTA3b1XpDlU1+Tz^1X;wVWT}vwt9PHg4`pzyRV5OFp
z-=!6oY$m_CH1nvNd;RAf{?UK#?_xFCZR#-Fyz6)Dv$Xv_qBr^8uQTW@nd5Y8qyKH?
z8Ep#>KFx5foAK=L3D)~dqr1)T7ynjUnLN$xh(W1rm%_Hlg?=_=mY%Cu|2lM7eVRee
zgWuDNKcDqn{chU%t(J>2D%YNRTb-I$8=YFIdn7+&_4QdV4Njj``@h@t<dd0|w>B*e
zyAyxmIm5L@?VcAuXGez|k)6Vp|9@$+)rWwuDR)jDy|vPK%Vm?)-3v3n)%%AiyCwJL
z%`$)IsnlC2a$Hbq;in4Abv)TDr&^6qJ#l%@Bp!P7yR=PgAG^$*+JN-E62I^FNq?Wx
zFIv`qEbnSdx%iBdb2DzexykqX&NbcUoND$xa~riw*4;Un@!_CB!ItVhy+;zS{MXKW
zzIS`|F;C+_7pq?fR_8XayA``J>+zO|WSze3f*94H)1P*!awz|Co7<hPyL_4G`o)5+
zTc<_Y&QqxR>o+rB{%6RljsM^7U=vn#=Jh!qa4?N6TK_=xj=NSa2X-HrYRj;1YUs|Z
zcMjeSnf&&omuyVpw(2FE*O?W|-cM`$wqIZ_t6psBi@Coyq;A@N=<V8>FF%G#cL?7%
z*^wu6qT4BPZwlw38(;XGpMF`RI{j-vs8;Cw-`uJDc7{H@c-d|BzuQ}u&Df$oy@XHt
zPK?s6n|C&?-r0QO!sXtj?f3U6KiDKX!M4cf`<(85mo+zvJ}5e*VDE5tkAAqLq`-?$
zJ;&Aex`ih@JhGd&=xxKKLXqR5GUgZGC2gD-urhjIZBk|nuhhb)Uwm97qW0BBJbWE3
zI&a+(*$`Rvmj|!SigUW7bkn5x+-~C=FADTiwx_*TeEq?8=8M}5|K8MZULW)2FN5Ck
zg;z|bp0iby?Nq*7eADWz>(ymX^O6?yHXDjPmWW}$!jaVK@u=T%iH?qTokY;OC@s<X
z>+?e+^~+{F@AS;`37*_Mqio9l%`27^FE8J@M@u<Kr=+0a{i!(l4Q%qAT3bwNYa6OJ
zPY=DpT4i&4rRaq0bMM8}=Q%u5+u-`~e5B}=W6pBZ%ojv#o1!f!J=Nm>9?PZ^Dic3P
zbRQ2-n0uJjdi{xA4E1f3(x18W$riA&{!q;NAUAnIkZ;|ej{AHilUbgaT-)YpU;Nl=
zN1n|ZZ6QY`Y4^vz)(_P}*!ABVthB4!dvO+9@+@9))t|9`!IQJuAKYbCTbw%YJChF^
zTSa<l`EK*~-|Fk1d;GfA!0!}vqCaBGMq`fqktdgBP0H5u^m%Jxx|MV3D$k(w+bg#y
z>CJoo@HcPPRH?v}$-6(W6ex4vxm1{2a)n{}XSYrD!h25(-tX_+chz)ePGU)kU;flu
z+h-!5E~e}(x1Muu;i<Xt+4q*J&kcC|Ao9)i=*2fYHe}sy{HfBqZRJGU^QyDY2CTlq
z*|cHn$|aAr{Hs`RSa3DGzodVk^?~)Tr`2C>?_Te>oz3RzljEFk-p+U_bLY~UfcHEl
zTc359GN->)c62f5RCC{b>RNQv&9gfy;@YCN_PsaXu+1`lukGZV&$S;lzB#=AYHFEs
zpnUPca^4+Z_^Y{oYB=oWmyZp(_4W7KX~Dj4Z@0umUoY|v-di49`loo^<B2DCTv=6`
zWhyUylP`DOV$K~o`DX90h}Y^jlutR%@6h~u9fN$!!Y6XvZf*<Sh6IV-i_5qDadxld
zmmT&o)&**d3WYtdE;-y&wu9-`Q-AYBvp!+=cSd?&XZ2r^emi$3`~T>#0@G5jFYZoJ
zX!zc9;O*iYPYis%JYIT|J#dFY^F{BX8|;2eR<d<f++h>fty-Jbw<l|o&E?ANx_9l$
z)lQZb8}p^c8~Dj~l<|IV)Y<q({KLKb_rxFAZvC9soU>_b>9*UqZck>Z6mU}R**fLr
z=Yu~cJ~A(f*Il!wH{M;q$?Wp~->b~07+l&L+0-d282O?3N6~S^OP8e9u2Z^|H|NI9
zfQ`97k42v?jB(+axh5gTwTJ&kEaSa7XXV^J+F!qUf8Sf1{mlWKi)7DT(Df+^UBT{k
zZSuUWZ=+o=R#!5GDRyL--@3E3bKg_#`>kx(CpP_=ur*?3TIDXa2v+^g|CujDX<VDK
zly|qjuSjI-;zc%boipaWcz<A{*fevUsk`o8ZoGU)#c_qsy2zOJGIz1ND+?!VI~_gw
zyy((<4PRGzt61)tBfx5B)>U@p-r2*;A~x!*J$&TGKdz8jajqGOi<fw+PuqK*J-qek
z)r#EBap^S+f9~4Se(L(X3D>1Jgmvw{(!uj>R?cqTHOdz!Me>Umd79ii<mIw4>*S8w
zE%lqcwlY3__3vIos7AQ%_jy{ai(ammOespZQgioj<KnyRM=tF;%0DG%ne->ypF6nf
zIHt}LdidyR%LR_c_sNS3+&|mYsd<S+e%fh~o4&tFX;Z+;<A2|OyKELII?a4aRl>)Z
zmp;Y`#nq`d54L}9{;{>?VVvpxe~j<1FPEwDe&#F}5VuzQ>aP#NS;=RYif9M^?++@U
zd1^)UZbNP9TSn_s&o|`inF;nhew4;~MDatmOX7~l4+SGNu1_iQefjw9mc2LkGTqB7
z_7^x56=)u*JNsT~e@N)vlzp$~{rM2fv@hE`S7h~tzRPPK9sc5y^8a1H^Ltm*+1THG
z@!mH7b!t!Z?B3TWnyY#>ILcx-UjP5)!C%IEug{ijYj}0nE_U1fbALr<zY+bgE^B`v
z-yYfJg3At1_$#pK<gV<^8r60QX4(4JPbc*(3|uY9##{1S<H`g7y>SiM>z%VMY(6QW
z^I+5b%XK1(_g^oYD)PvAGvBwLlX_+!lr|19cHx!JetU4wL_wyS*JmpYq-p{l$#web
zYhGFW<5JJ#39|Ca1u{<i+j<ur{PyfN?}J;X&r1BM`OrA~MUh2AzuuGPCMQ3=HSJ%;
zTzF>2@ENu(h&$YNFu~zIXTjZAnZyFAn&oFr7ijzn(_Q>@hjU$%q77HSo6Cum%@WGG
z_IuY~*}j))pX`SAOX7bY_>@>0HEs{_JF@e+O5LuHvP}2gb4!*m%5P&6_w@D+mNZm$
z>*S6x+F6sd@A__`^s=;c=>uEU9obhLtggKu_SlEtt+{`W|GFjDK6-u&uv-7(h{cXL
zz9Ry^zRvyo<!zU5$@8L%hs`g3XZUfz<i`rN!%K?Z9OjGOsD11~m0VKG1@T30i_Kr$
zc3s=f@Z+4?uZtHSu(R!acWJU@W`Rj-jlua-4|c?D`qgIe;o^z1e6F{9qpMWre)};~
z?%CG7UzRr)F8r8hvLmZsUcQ#M?!~N_evTh!UoKm0kRRy&MdjvgMU9jg{@H~b(tM5a
z`Qe*Nf9>5RJYjFv?&|yf-oLM_CeOX8{@^2b^){|Q28{BKzWc84HTGIy&FAxkH<<Nc
zX{yLisejxz(z|qb+&H0{v*Sv3;o%d@e%{+$oaH|uMf_Ry_dA;vg3L~N<$jL&96RsP
zdAW@C)J3NcG?csN^79I>-8kprPGi2}%T2FOoNW8;aBoU;R$JBHn+8S>Yu{eGciTt&
zt>u=2_9qh+nwI9+D6I<R+JE|`Ml7#>JJ%1vYfpaPyu;o$??6PDg>mwU)XQA6m`gf7
z7+HRbY%kjBc9=iOKDO+_EnDlYC;Va;d@xE#la<rcnt0W>ZqJ()d%v^nkYRb?enQ>e
zhgbdgkGW?%53JM9s1mO@FoS#E^FR%WtYvO{?<u@`VR89g^zqFf&EB#tDK05dH<8^N
ze_}`B?pNu|rOYffM|{30^Zq<Id)HzyO^bcqk1F+!NglRnt2(RBvv{6)NVIwEm3tT7
za5-5rN!1vfSpNOVN}jJ*N@LqUezfUJUJ?45rFX^Z?XmY8-d}2$FFkNMWkyKfI?tsE
zJ&N<sKeXVx>Sc4#HFP^e%=Wf4Zr*)cGw(ev$_erPVi)rnv<vf)zmVXjNPWqfIUioz
zUCVp7e;ezA+dM{7_$-z#=Gc3^p*faWubq8|$N$aUXBDo8Pc_LX6*?%F@X$%^v60=p
z^c<W1$5zo<kM1(u>z@B@#R@@&m~zG+3{M<Pe$ES;5^Px*Rejv|rPY(;MV<5C39}sB
z$56j*(wg$f7Z(_&?`5cCnUuIqC&lBT|G{^PeTKT9_lAi|^)9&We_#1QuJ&gQCcCCt
zHdlXX-f0#{=9xF$lg&`-PgT&#ZQ2K_4Xu?Nx79M#y}4z-^uhd`9p?GJe2yKJI4IHl
ze!Fmb*`)`Y&fc>u;a#^WIezEb>eJrw*@Y>KUS~SIPjT2Q`eNPdN#|#?l+4%YxmCSL
zxZrN2CJ$@DH|~o6-jm}NE@eOI!F=!Oa_hDjy*!DRzk7YB=4|<tuzk~%)Z1yo)pnKN
z*mk@T{=le`ux!HeTfaRz4L{cIlIT2cYI>-TQ*STB^gfk}yP^-y_E_I^R-ehtT*ENQ
zX<ul83TstKxCe{=<bxdtJNy?5CMoa!8uc!)`SiP-zsigiT$QVq_8Yz2A!&Z?#ia!o
zIfO3k-uiXn>=(CQ7f8xI(0nrch*pc>j%OX83ws`3OaD_cJ?7p63(qHejTNSQNNimF
zPNg>c12a?1i}&wrTKDX<<1&qpyxB2hw%H89y_+vjUa<809~YM6XD0+6yv3F%acpiJ
zYYxk4>1h}GEFSDSd*4re1&^Os=;^P&Pkq{@`1<6DcQ2>BUpuc?z)g?$e$ruv)ep=Q
zcCL_JJD)9QQNHEH)cuxkZvTHP@~Q4@DfiP^LF-%>ANkT*%Kq(d`WkN$tw4eF6PlNb
z9ZvVDeMn?D7sufL`egso6DthXy1&>f-8}iDg9_W-R}V~ngx!sH2kiw6>02AEAoTlX
zhk|3nTZw%;@_Fr84lJE{ZC0P2Uur_m>%uDeCPTePk}5Cq#CED5kW_on<92NJvdetI
zE8M@>9{s#m_C!U(p^8(l<X+8dbqt&QmiYtY2?qu7&fAj9ef@$rxPPsce5`VS+w7H3
ztF2T+H^;FP?@q_F^Hv@cJ@%RHI>*6l!Eb^*8|7YAix^6%GjmK+dr_rgQemdt^OWbE
zU5Bhk|Mrb*3aU;$NDT^|$7FZwy<wii!$&(LdmfuC*Xv<<aJxpibx&fM(}{P@8yg+w
zojov9@<jUOXMBq{eeAb>@ORH~wHL?ECdi0qHmy6%TC@6oBFi$aLvN-WEN*AnX?SR7
z<YAwK6HIRSq;g2xwO03UkXCQAWvtzxJDZK!!r`3oxpQ30)xVnuRVV1qJU`zo#wuCj
z{87OjXQlOXuiHdS@V{kjyW+dnF@?@B@nko?f{78$_Vw3N%ZloE@bP?dimQ0K=<uB_
z2Mw!luUWa|{EH0_ubq8Xz3A#ui?{o-oY`a;&kN79e_iyl$2o8QSCO;{{O^`>^R169
zn>n|{!0p$qE5~b#=a-zd@!3(z&2Y4^Fh)H2%(9Ixo3s<yd3Pl9Jon$d=Vwye=FPv|
zOBYDy3nw4iy?)1<sShtp2dr~lD;M)u=i5DI?ZY>xtn7JbeCO+WGd?#S7tcpN=l`m%
zJ?zBCQ6PBuOiJ7L_|qR=skYaYwG>^~KPOrDX_xcXw4aaqE^*}EU$FeZ(gJylJF^#<
zc;4IXHd`c=$>KLlOxY$T76E6w?27-U?cd`{XU=_b{(+b+15>j=o_*Jkv*L>3?_EN&
z9xiO2=f(Bip{{@H4Fi{}+a64*o%JGm`6|2B2kSqzo%Y{9{o$E2-wf9+%qVK;;nt5U
z_+XP*6!YWDvQ2yB5~J(Fxc+pu{chNPK%suCM026>3f_&^-d(o2|LWE1#ff_wvRk;9
z$JYG{Yb%fITm14I!=g0yMJKIIe|cRjdL4K~yqxKsUdx&smzK40Hkxl%-Yyd1>)rA0
z+AWQ1FLu=$f6;9h{g$Dnsq|zM-{;MI>usKY{H85;etzi;iOG!&bq^*myNJEow<&1L
zo%%};z8z-s4`&K9cQQMrn7BmqRr9@%o<AG<_?p;%z5XiTFSOb$&uO8pm2+P3H(7n}
zPqs==Yz}{EIsI}b&-3Rtar@>xeYEGekK+d;qYDo{Hag!E$zJ#@+U%6zQLiJf+&oqa
z3Aw#Z`?iut?2~ZnKlw-Um-l6@PHx!D`NVj#UNCF>$|mP)#_XRa@$-H<d%)buTjCh=
z!{8&qmXYq>IbFNgDL&>nCi6yHmJ77!|J1`yPkBeJl}pf%0hr`8&kKt%$;wV$ocv$D
YSCm8XunO}>1_lNOPgg&ebxsLQ0I6T4#{d8T

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/gmail_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/gmail_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..21aa2c84ea56a1c627eace2be610e3df69468450
GIT binary patch
literal 35371
zcmeAS@N?(olHy`uVBq!ia0y~yV4MKL9Bd2>3=R9u&M+{DU8xF*C<#g|S12gTPs_|n
zRVb+}NL5I!$V_8ksJL}@R{A6v>$2AW$)ZtAdm{OAj=XU=eZ71_Xzq!+w0jy(uhb%s
zuJTbSIBR&q<oEvnrDy6tPOjagwk)Xl^vzGg(K-7*FT3^pd1a~Dz54h5)%%O@+ndYB
zSHv-|fA~}B{qw7(cK-9#r~LR;Q}h4Ml%IP(el@-DnwxE(y<_;b-`5Mjng)D0|6H%`
z+>3u-exzEi-><vhu3G=~^#8}548oH3Jm|fDkoENbz;Mnp3Rf;v=KeWdz25&(?fdu7
z-#xz}`J4UV=6V6^+wt?;-!C-yW_$OKcTe2>-Cs^WfB4ww-|e0|&!_IMow!K*+w1ew
zZ_M5N%JO$)Z2wG;)Xz~@sx`mMvhz7fzRR3>|Iqy-zl-Xn_fJ*zGun3I*@JSa>vPm^
z9e3RxQ-1vIZ>zg0T^IA?E56qKDzIO@@W-?Cil77Qt@llsk$Pex|Kv5ScHiWS3%~Dr
z8<)T9)YR#ZJ>E<B{QUj)`kyZk?SKB2cp)=&UGcWMBVCo1S4}PlJo}>WK2e}N&d}F<
z-}mb^E2V#{ZfxXPCjIOb({9=G0?Iqj-{LYq-}rb^?=icz4^LI`I&|zPOES`%z&JbB
zL(e*TkN!Eq<6d=E#@`ixIkLrg)mavw@a35K<N|NZ?BcH7$A3MU-`^-EG|`E#^~wb+
z742g#f2TyOTsY6EOKs_tU{BMnSBg&<E}v6;^3aMnr%u(WF<vW|PK|T!HN6_+y>{!>
zdo|n+5m`G$FK1=1E!VpxxvRMBdB?qW`NSg=-Q-fQTy&d1<$RCvS)0>qHlN-1hpYQr
ze5L5-S6Qp`w`(qyEx%PcJ+}Pz-+*lOITe>qE}v6%>!o~Tb?VpZYj^Z#{{FmXXZW=#
z8!P&?=SMz%9CXjw;-F{Hxf3(``+CGv8=AhInz>fbZT%U|^lg>1lqxqpn>1<j93wZj
z$7Oc1-zV-qf97xIJ)f@sVfX&Xr=HU&{VzS!*z2x#@&4y;+t;kElT^LKvXWagSeoy^
zF~;3x@w+Wv%I}}`zog+mJM-6>T8?vfFE^4om^*ocU$M=~Z<e=q9^828P|Mwuf9EM~
z3p7$%{Ci_i=j$s$RX%HP?@{QV=6}vOIA_UKX{I=D1^L@cuNv!g+Xz0B_h*&c9medC
zdGw6^?f%Jk@>c%xES)biO;<Pf{UXlX^^eQ-i}ly-%PLN7E6bU2{HLYPb<0B0X_Fnd
z&dlZgD05)jvOC*mP3W_@_+s(l(n}e;Tc%9UK7RIz#=HgZm&t#;;lg+`uu3z1?!C*|
z3qGbUs5S9nk-8I6U-E0`a(3PQn+s$&W=p-6+s)tP@@vJ8-Ak-vZ->w56J8v@`|izY
zcYnT1mc4e5^UmV?#uu2*ENni1oMp}5T}?mC*>-q8sQFym@cZ3wt$W!OVISfgnjhIM
z<NsK3m}}a-O-<jXFTcL~Mn3Z$4w<7b+8$g_*;o8Cl{H@Z!|7k8+kSRm{$Iv-_Pffz
z&oZ;_7kzN%yRUiT%j~<!Q4{zoO`LXy#+xtUyYejN`PG{n=f6A5H(Bql&&{28R@}8)
z#qRbvWx|!rNgMp{)bbqPo@<!%(t5@OkLI(z7ptEX%e(7cXnk9H=}q4}yY=(tMSouK
zQC-2HW_IC)MQ7HDMx-b`|5cEBKxN(ZEsI|)u-L~Mcwp|e97e5%4*}(8YYi+T49?7r
z*0<d4a$G)}Z|@wof(hSdn$ErO{gG&&Yhl@m-VIw_TY@rUvR!N@tX^>Vj_<)$dd~V=
zI?f-lU1M@PZ?9wG>mxH3KAU0l{M;s|*NZn7Ei!A^+H~labd=57$$??**PLCxOnJ>T
z)8Xu$zmdgnZm!zv-r9V}R;XpojibTUSsfZbwlr<9Th<Wu_UN%G{hhUE{0fv}-Y5iX
zsZTR>R|$|(X)#@R`mLXf^Le{ZkNl;s+$j2#G&{Rj@{ERdzoJn71le`0$I2!hJ+m;l
zQ%_OI;eMIZiqb`AAKbcV@>{u$C9KmaD=n}5{;A5p)$8X?4-F9X$~2kT^zz{Hu$gMw
z8pf;Cgk9&b+cmFpZdb7MyyzG><#PBEIc4>E*?Tf1vaVfeTf+G=N6g%6O0cWTLcUwC
zdSW$7Te@tEwXC0Pd=t2h|M8kfmzQ|7yF26_4m_o{wfo<U&0p^*ELPdyW2ID3y8Kv4
z^6RN)i%l>2buzKv653b5$H)5atX^&3?c-;&-o9e7T5upKk^hU>pN<cUwwK3=Y){RS
zjTU{epwP+oNmnIr%Grf)YnQ3|D@`?7a${>?ukF<t;*(YctW|aOdEJxwv2WL`RBMj1
z9CPu*oNi$O2U$6Ug~~tAQR`@27T}|t=D+`#!EKAuiUsVUu7bBa&$^|k#&T(WKbD&{
zVUeHKE}5B=S&w)KX9Qk<>%6PuSlipq^thd7KdM*AC`-mii+XJ^ZW3^fjr90-T3yp;
z%k6a?`)>%Ynj+8Yx%Id2m(CiwkhhA*62))zMai0~H*>u4VVc;{6SGgj?Ao&b-=_No
zeY_F0{d=>V@-y4&ev_?hr@2lzSz)wPWr{0%%bpDTEmrBvzgjg+vYOGf$g=nI^zN<x
z*8_xjnO)f}lN>H?QZP9CXX~a#%&VSzMM_I-{%LeYp1VU%NA$?L-lelHZVh`ieIDbk
zO$+2tm4xhjBJklzP=JHyS|LxXr0H)Ayt7i4?|f$SV5!D#i%m999!jX27&+w^FAMqm
zF2N{nv1?XU;}K=1nLVo8R!_Rts-?49-?*jg`1jDpmutE-r<9yOoOd+$O!K<=4Y}5Z
z0^TtmOJZ{8i^?yI3;2A(Y0J?GA?sa#m|yB<O;uW@8pvhHAIW;?zjA}L%yD&fAMb*N
z{l?Fq?mZCS!<8i1Z@JRAUgOwd#m18zp5`2O;#DHg)EdrgIJcGast@zk6FCCgm(S{X
z)WMl2m(IT`pg`-?_Sc<<*)N}xwExT0z%Mn$sp&wg;FMZZ=A@@<L{?-?3%?>6dqchK
zU_|h8pQYEfMBHvzdTGnvUQ@Tb+=2%WD7SAoJK;>MboBET7j9gSVVQcX*=Lie&EfYI
zXIK|3eY&B#y3*?2+RCQ)0#7_8mCU9Xwl-~UQWM(Bu%z|sqXjNys}s7S#Vj{3YRF3D
zUMYRkdy$j`-<dLt{er&Tr7eALG@?phGwRBkcPVb)dXeGtH69ZSmVgbGb9C9Ku`=>B
zRy9sODV4i(P0}2zOLt#4ykr#dThjfKzr}@1M~i8?-180&EnnHro&^VfO1T!U`*K)z
z(FRUW*0@x$ehUG!b7c>Fb1ZM)I}^!vB*o+=zsGH5y#?1RTBOx}&08~r<=)o6x8_?t
zp1!PAJhpj)7K^Z&+p`I)&Q4{D?a*A?Ut+vtdRJA(5x<nj8E-p1TQ_gJ^LF0uL+iL@
z+&VNaa&ql|)VMNLwA(NHW<Yx#;~Yts&lfk(+A&=@u2b~gh6Euc?-yQ43q9Pk1em%?
z^yW6d?%jJ>>gbY|#6w}s*&+uTS6J=xE%LKzo#N8^?v4G-Ra$q?OjT)q*wFO3V^dCs
z`wCVLe&I8%N=;P~ht(f+NH(&hzw2DZeEA)p@x$$jYb2^Bx0aufIep2gAbG;Iml<0f
zjHHD2h}|-jtC@W`b(720)B^{d^^cTTTRP4x$@+FJ@>QGh%1_HyeGl2|ZfbJlaxm*W
zrbuolkqODchO0PAeszDI#LD@(C09*JATo0To5{6>VQjTMF3KfatSmdsJ}|AiqRJR%
zBpkRjh*vz}NX=VT<)^&L{f(6gDHB!Ompv1*Z{%1KZRaWe@UF(TT;9b@t6X2XbLnJs
z2NXQJ&Up4hLvqIaszAT8_Q@9Rk0u|v@BF^8SnY_$D(;Dm4oz2W)aFIaPB%NgJ^269
zS(#g&CIoYd&UC0Pxg}%E92VGIQfYNshfC<T;$DuMiruy)_Yzk)b0)oQl<q#k=&W4f
z9_-4XmQwSxOgdNcK-iQwF4s0V)$mTT&DK2QTH`Y}qJ%rathJQ+NRCJx*W8F)v8V#Z
z@baTPJ-P?vtsjckuL}R&Q0H&h^Gd?dHa6<vBBra`_E<<8g(Q3sy*hoaa#(+nSB0R$
zk{f2br=~YPNYGnWnQ%he$xH3{tv5|u)g|O!UXXYgHO0`E_4X1IE2q~y1zVJ_G^C4%
z_4n{>_{_et(PXKz;@8>U<;SHDt>jB_ys8}`)$piK?T$`%D#z!iD?;utubCd8Idz>|
zR_Ld~@*NUK@1DH4M3_f4+tOuLdx_MH2we}(xI0W!9n*5;r}E@nESx;;(}Rztdy?+?
z>{rv<H%WZkVg)9pgHz@O`mjG!{2H;%;oz0V-W?5#Y!_8T2J~1+b+4$Y*^-!Z`9#J$
z$9J1c*$&ti<;@Y&d-jIu)r9|yJDNNlH<owRdRYES+5AN12DhHF=>+v$`709?eXT26
zBw41Y+-hC<!|F@@vq=F{xDWCDk^0wU%HN{Ryz0Y+1DT5x*n}=Rl(+bO7ddk03wNTI
zmQN0|D^s=YvD)m@f!icQXCKsWm}Bsojpu-G9j}q0*y_5&Qq3K*GnOU<Y@WAExl&~3
ztOMPld{27v8}9@bowMha+iH4&*O_N!$S<1&6+^broc#ft6HJ3<oU7x}X^h!DVTJD7
zRURk#oz<3P$~W=edc$696EIuDVbxQP-kg67Hv9=PEtf?-xZg9X?Bw>;SRPd{fj8yn
zjSUTf884jAy}f&=TH*cDIR&fS&8zl3npNT=JUuU2K1F(e(^QjL)5E0g58imMktQeM
z%(#Q)SK!v5u2|_EI*v;^du{51crvD(s@VB`i`ctu4Esyd@BJ*BB=|l-#$9%%$%AX0
z+|Ey$l=PZRel4`v#8Q=TWa*&|8&ZxPcUT?yu-ajj<jkCSE90kgdJZs32M0M$^R`wN
z-tNH8zDL_ZJ<Iz6-`mX0=wHRT0qpukCGibYnldfU=cYz!T&UO}o8V`gHNT^zM}P0N
z+_@R~>rM;h?#g+$cV&N5r@*YT>yE_@B46@Ez4>}pmENgZ#uVGVH(I#KNFeO^&9*lO
zugskH<mxGh^myL6TQ9pj^<*pkX|wRw3X5jDa83iO&W&0v*`7-RY^Up_WyG^?bw8cB
zsqfqL#&nj#$mv#vn%>SwrQVCk-rE!Z-GAQ;T@l5(yC<F0aCs}eIIvG3O>-+ph~2v_
zno4QWcX<|Cug`3l+n;9|6ueC^W|K?o)`SKBcF%4zm=ho66YaD@Fe3Zgho#5&?0WMp
zqPptM-K1xWj|Y?syvVn06H*gA(lU=NWa6{9YOf2=$~zObtd3Y0Ki#k7`ugm;r(3gq
z!+&t8YiI2^{&<s#48x5r!K{t9n}j0{E(lUdJn%+)#s(YrN1mqyo^1TL(JWj!`0rDJ
z04_#Hwzzq-xH<YV8Qd0h{K$DCa!N3CMoI9pJ-1U=JPyg6Hcw&SZRK6-<d>HP2H)6b
zobe?0fQA$2l4zNGjgMDf&r{KE@yknOwq9VSB{tE&(Bx3u@!ZX|t9)*Dl}ruP*?9O?
z(1%@(_X{mghJLBOAHBQzVB!s3)2@17o|;DK^0{rd=II<-zJt3tw)d8M%FhmqyK#PN
zrm}unv-@tuM7uR+j8+0}-@h%pD<|{fQW;}jR`Uc)%NR559no$b)}M-Pw%jcZd{m&H
zRk*!pNwg5d>s-EE8~qp4<9uY^Wymj{cu?5JM!9(Ig5B%S1VyK_6gToNG7(t9_G{6T
zNsh}sMIW@7J2dFr_~Em7(t<B4tgpUJpIx@&o{o}+-OJ3)7Ma#ZM0Z`v+|8#xtzh#c
z@fXVZOJ8mKHerkV+!ad=zPxbVcXz_CP`*uer8Y&TSC$9ZNAZ4@>DsyWn|rU=e$!ds
zJQ@C~WnQ}V<DTEMtNthdDqM<Ou(~ONVc7}2x~wni5BQUPJ4<GLyZAD0(*HfPH+&7}
zcZ>YE`qE+b*+;Vl4({jIk~Vm|zQ?tH!dox(lc)aNYs~0hSUUNHdeqXy)V@gl+noU}
z{r9{>ogz2SYFilp_n65E=V^Hp`@49$@Ap<L4Bsxf#K`4J8ehkObxzrnzE8Z=#d-f|
z=;zJse(@|_;U+h<N-R1wPx1OnK1{j%{0MLI?5=IhMPZBIt-7nu#`KQu6N_yv(}fFc
zS{vjSM5($6`@HQ<J9W;C+2YKr$wJjG2Hw)EFRF$3=Uq%X7c=L;%3Z>yzq4DXe)rq7
zaK<_Rty_LrPn;fS?zN>}F=4UZ#+$-fld2M`JZFfN$Y1_Zu-!~5X~Mp1bI*R1(0k4C
zuyR{}f%T;$;lKVYSgvq5`3dL3{25tlCnHV@rCcd{UH-=Hg;H79o^>Hp_5Oy1o@0=?
zweH)cxxsEm2S03A@N0OUxSgly{?niNJ7TxA<>m)uEm2r`UnjkP9h2vgok1Hnlm{i`
zE#{qgUN<ds>95rN%D>k9JN|37WtUcIIJZsdTf?7KYVpd)gy%l*e^;1MUl;sn-6!@$
z*^{briXH-LZzcE_OYbgG$Y{0<P>Mf!wYn{=s$EaMWLFRKO1{6L|Gx*=DehlZ<NZO&
zV*;<-3^TXoSH3E<CAzkItTpd?F4(`UQKFx{@>0M5PsP8#&tKZl7m<7~;d8Aw!-Z8#
zYZ}gSXU_SS>M-m2u}42PsTc5kx>mORqv3q(?n$d=sr4{yG;dp_w<qwaaB$l;-<7@D
zDn4&b_Z_PZZIoj^7+AOb+ky#(;pL(8Puy*)<A{;r&6y|7qG=koDvxQ=>WQfZ?U!$T
z>6l}-Psfny@5STaswSw%=ihOv>37<~!+Pn_(>~4Kwr#u>=4T2I{V#rT<*=)k%<T=6
z{1yM1$y_$wzf{7Zdh7mg^Xk_0&6PjxVtvufJwn>&LbAU6yq9aAzbo!P_38dLA2z-n
z>UD*_GrwKavwD>q5wd6Vz36A$sho=~4zbSRd&;{nq13hc+8mxzXVw>87b_M%4NO_;
z_54Ai`lOVJt9zmuGCJqhJ33#<&3HM3UFuB#4FP$bUt10Sw746D-+J&TNFmZD^PXss
z;+&_;$}}=$t5sSrItH3A2x<JmZell&(|yj@lCz!io|jkuoOSejHrrV*WhDa<mIq5@
zHt!MLv|eRG)OY^&mgTj1OFw4a%P&#q5j$~y(GxQP=D$t0rlsZ2=T2UE;_Eh-Tfdn(
z=ZXDfV>{IJdUjVH_d-jlQ$e$huQXj=JL|zL;WHoink@Y`b;6;Hh{-$VY@g)Mn9si2
zQ*TRp^tq?qdkys+wzTvvP?)`f*>;tXa#`xv*N2@t*_fw4m$XxE44Zd_duK^TM)~*W
zmd#$8FPb>MPwTjF@mcPU=+)QOUwvQ8qP}cxoZh{i(GGL7K1R(GVOSTdB`6*H_3plG
zzDB3A=yIjEQ!X)YSy&b9@@_(|pFq@UXY-W;@@M>1E^H27rB+*Nezntrwb7dMNbakf
z%&P<@9hCXK@VEA@X3Z%Ey$=j8A5IkAdh+Vl<<lCsE6tI0XsMU`T(HKM|MH!-<@-No
zw}>!xp5H9Sc(eNGWS!!8&)p)ktgdwZT44S2#J1;|ojeQY{gu(>{#cc@q+eIzZCJ<Y
z)@>84rbezml;jn@JF<3(r>nYRg?xWvkk84M^O}3rOO`KAx1A)HR#erZeD?XziPu}~
zmIu$f`-UsE-p9?Kx&Q0DK9^VPg{%DC4Yg)n*e|@eUvsK`cfk5*`bs5n!bSCR3ui>m
z%FAutz2!sAf6K&@_pQ~Y)0f@jU~xPuUDv&I-hD$ko9(y0ZrauUDRtA8@cm*_R=&)f
zRjvK@xQ87}CVRlM*~_B$`A>iOW&X0y-ZJkWyX<%qzwEQu^Y1V3XxIt=vXXV(`0}8M
z>5P|g{q~b?Sxs2TTp|1|E?7(d)q$<m-1;xy`SiP*dv82zoTS>9*Lhf0RxxSLW#371
zvon}14orOB_(iIsF(xKoFL?>GjoIe2PwJiiupUU;_#yBV-<`{IlMhHfn2_VX!+BAx
zdG+%Zk&ma9X1roZiL~YJIOaaL<a>p<?bpjCo9o~A&wliAzMb`_pIQGdGyYr8wn$5X
z?X&$B4=+ivx#9a67#I?;j&Elclq7@3w{1HQ@-_r;IKTIK$l);iM7;Zsiq8-J33_hf
z)jd1;^RD1+g0TW6deb)jJ$))jwt7Ef`yqk3uiq-(eDdPxzK4&#`i53A81;(WsdLeb
zVPN2o$P9@niSYHYO3u&KOH9d6O4X~#Enolv8~cia#N_PM5{0DH^vpb4rT4q{D=B2A
z*eZpa`WpBaIHzW0dQ=sq23ProBv)l8Tc#-4+i}@cSXJZ}<ffJ+Dpcg=<P}?0*eZd|
zwaP2D0txFYDS(xfWZNo5_y#CA=NF|anCO}48R)uJWR@8z*>Ne@6s4qD1-ZCEjVMY<
zvsKC{DJihh*Do(G*DE*H%P&gTH?*|0)Hg8FH!{*KN=ef#uFNY*tkBIXRt6aXF~cRb
zIJqdZpd>RtPXT0RVp4u-iLH_nmx6)<)NoL^x>l6HEdcSs#ut~Q=zA8FB%4&^7PwZF
z=o=XrSm+uU=^9z;BkQloE%5b)>&`2NhGKGlu6{*gfxe-hfqrf-ijLwE*D{2CSalSK
zR2HP_2c;J0mlh?bx|XHpl_(=yT98v(jBICVaVkg%?CzYDH2sQPh|S1)+&z5*;Cev8
zn4Xzi0M-FgjjS>y8LqCNv?vFxCM8)vH77MUHLs*t-%!sG#X}Xj1#k^my#qEJ98gv+
z`N^dqQ#@U4l|T-#O36>oOtAtpEiElfED|lvb&V_xlXXpuQ;c*iOw3Jm4GmKbEKH2d
zO-(IRk&N=pD=taQOHKtDRgqhumzkMjWtM1QmXu_irfZOxY^rOLVq&gqVUV1pYiy91
zWMX8Lnq+EWfn<b#QD%B(USbZit3XDjWTsf9nHVRVCRrHhrY0Jh=$e=sC+jAem|N;5
znwVM|q!^@HT9`wO0tKv<V}Pfvl98SP*l@5&Ku%&wT7FTkt&(qIvVwmwLM9|LH#N8<
z5fo~MrpD&R2BwBamPSSvMkdAxMPaE$#hLkeAVUoe^i04KDalrD`9-;jCALbLxryni
z`UQFEV2O&{0xRdD)WnkfqLBRj99t!j3lxm>3=P0}LBR%;s$DBetbFp56G6EFtT;8r
z4xACd+07|2T_4P~(Z{D4!=&Jh{PH}oMo5;!V-7+o+!V*6lJfkbZ2uzvq^#8B68z?1
zQw%pLC^bE^xTL7klYpnt)Y(9irAKCQNq$i!lKsJ{g%BRZTREBFpi)o(=U%JCWKbyD
zDitS|rKW(RKmjI|oROH9o|<B-1kEEb@k}hqH6_^~#lj@TLf6#Lz(Ci;$izT5$;iY=
z*EHF}%-Aw1(K0Q`9M$yV{Irtt#G+Kk^whi(TP63*+yZbsD`-GcnI@|8@{Cka5;8Ef
z&^55sHM9sZG_f)?vNEvLH88g_Fi?U-xq^*8sC0yR*#=a8`1)EQ2Z{}-e6jKbrMlqK
zf`XjP)FMdmfD=z}Y9YiV8+{z=ko1GoUOqwX$jTtPo%0JSi!#$QO8m?7@LG>WA;biq
z%;eO(V!U=ED}e+HQYB)?1r4eo7dJaD8+~w12CCH{VF9XDXo;aokd{^`D2!S{QuvMr
z*JyB&6apkE9!*`N!9`LCkfeAtbx|$2xDegt)Vvg1rE(>EJ2A%lnG6gJY)RhkE)4%c
zaKYZ?lNlHoI14-?iy0Wig+Q1weg35&1_lQ95>H=O_D7sTqC6ZoZhOCEU{GN2ba4!+
zxb<dlW`T0}G`(+kXLlZP5_oV<%X6#8*43**s#h+V78R8F`dy*_4dHhj=Ur_6Z#Zs$
zO#h8|-N%2{G7s;UwH*GabgZuL`@J`pZ}(Jf3CqmiTDJ0AZQ|Nhdp#yZ6z%^0!<{KX
zX(oe^iU-q^ziO-8SsGO)oUpn7_uS6s)(uUgV338tp*@YIx1_B;KCka$ktk6<wk3rr
z#(2Tn8kSq~e>t}tRDADi%yw+q{asTwJ(D;%pRu9%-mOl~KfM|mU-MfO-|Q`a+IsX^
zS*D<1N5)0_<1zb#zgmh!?b_b?;NEWT=i&9t9}n|Rc<nGltnlHX!~@Ir|DWFX%jZ|p
z{>IDo-_#kHyCtozPxhSKvxD)tWWt<YgAH~DDvB*^x2Gv2CG<>f7XH~h_5Y)J*&ja1
zE{=XLQM!C}@ydPu=i)X5$1dCE(<Cy-P_46g^6OtL{V%`wMC3d=RsTEw_O{59+}l%K
z7QD@uiJHgtq<#K>nJ};CMst0#Pd?c&B}xDK7WvH1BWC9+?HN7UBochu*EO%dZ6f;r
z<7vIuug@F!UY@;n+N}*rc|V;(wM|$>E2bapoU{IS3`cmM{frHE73ON^=bk%M%F)1(
zz^pNG0{_y~c@v!eG@87vj?}uFywge1E^f-|eLs?33UmB@B)((fr37CQ33)&M*DJpS
zv;10_Cb8bYH=0+$Ge}C|q=NROnG8%b)_H$B@ha)f%;()3)vtZuRAF=QmZGWE-$g+I
z0THjBI9}43I;UG)ZLYuln~B$tS1H8A#r>@6jz7k5PWAB}wtz`aF_Wj&b}+uTI(mq+
zGXGUw&4%gM0+`-}?~kftDSEg6r?P}w(@Qra-@?bNV&?y}mu0RuJm>DoF*j%S#Mccu
z>`$y3UN<da@nD)1U_0~Xze$B3)Q>*d$Gn7>kFUXSonAI8tLTB<LP1;~AN*^+G=0AD
z=eg!p8GjtPRQ6stbNb1`%!FNhV%?3Bg&St@i5Yif&hierH_d5}`?k;L&up_7Ok0@{
zt$ORzwdLOJ4r_8}7rZ`@7hAN?^4jEo?`N$0Z<w5;YuTsft?IFv<p!U}4u>h+M+{o_
z<$e$ObaeXM8*ROrV#Y!*4)8K^adCAyys8s9u6gtMIiG2K`?D?X*UxOt%{;4Y_W8sS
zg9AdIA40fv#Do>3Cii`|-X8JezemSshC96fe}sJ(KCv`@Pt%P98C-^sMdx|^jV{>6
zzi#%VpEfBQ8H!>P%&hw@)Xhw?r1ke&O}taCurhVCZ9+`&_8Co0N)!103pn}o_|J_0
zvoJ%hD%;`SPRaHuYmX<LJMcxK!k>RjiT;Auu9H4#C(ioczF{uEe9XRBn-5pR?|Lev
z*MAf)aP0W{{rk(8_w185-uTb7=y>YNB`5!=oY=Pf>am!0oi0HyZkP3_$Nij{wkA?K
zePe{z-83$)E(fbmf35uv?z9Ti+V}JPCEH(b6{d1+j`<R_R?CnlW2W7Q5HEq`>8-i{
z=jYk|FF$w0<=jQdvVDJV?x~qq8NKnpf3%sy`_p_cXYSko!0pZbi3Q=OWBN|7lzFi0
z?QiFGr=Px+pLA|1w}S8aY-Z-B<BttX0vcl-9TRUF&li}y^W>%H^F_V(|GB-T`d5U!
z%<kH+(TWOZg*-UU@g_GMPpaNBgYn?aO!IiHzfYgOzU!<oz3wB|mp1|P=bik=dVOoX
z^E6YXW5*?DF3m9QyS87(EqLdQ9VO=q<om2^3MwXj|03aY*&-y@#nCa*aOL4bneWH8
zR{y;J&ucsXU+=Jc`{%L<&Tik&Qj~LGo8;x>%sh$Prnb)}9qa$U`b)!C`Mi5?YM3@|
z%X7)Rd~C|JS3kpJuK$~Ba#qmN@Z5=Q&;GA2lJUK_ORiim_DaH$<CXsvp0_PrIX8-n
zi>rH~Rf=7_!Tx_qdp_{j-_`f4nctRruKKgtPQ4SGBrbmy>6i8|ik;uF{PFUCNvDO+
z?YFPpSj_q4()@oiKaXF3ai(Mcx#VO2wRE$$p6N9%n!`4~tnNxj^}iA)5pUIN*W(|&
z+;VV-)&^}24IP1&4Rc>#mlL~RIr+@}zpLiIyX$vmRe)Wz2jix-T|zfKT*Q~Wj`=5l
z-B#`Y-=ErZ-4(uc$1BXgAdz%TO!w;N{fD=epSZ&kAMlA|g<#>=phJ7>Vgt@-WI7*i
zdbw+1mNFL?*XI>hDv{5Y{CpeUdT9NByLUBzBkgmF3w0JRVYzAcxb5Qdqr&}@3ie+s
zsr>){*U{8J`SRrwjemabTYezPP5-^;-)N6t$rGjf^*wHJF<ebnQgRdKfAsDsx8cn5
z;y*8JI2o)NyHb#qwRPKZMMasy-`6T0o;8p8y!QT=le5Ev6?X364%2j4ce+x^@DPu)
zm*&5M|Mwn$`(OJhC|&Tx{<z;3mwy?rH|qT_Z8%k0b@LUSSNbwK!NS*%3B8+YC_Z!F
zq-iI;#JZ0z;CjQw#g+SjFH*-Za=HIA?)u;Iyw7i+{~dVZwXAUb5(br{6-I_i2haQ6
zo3Z1=|G2%^ELlDsK3~sgI7#l}q|SQZrI)&m=07hjkJi0rmZxcJTJ&y<UhKr{zu0Bw
zd1WT}fIV>YnCS7n#np>`98_HW<@Ns8lg-zEap>r~y~X(u>*b_KvHsah9-jVhH&gSE
z{r0@GKX>GwJbTmD>Uyy4?USYP(_5R`Sm$Iqt~C%6)G*JBc)!>1L0_zvYW&ZW%*ltB
z)y?ye@aomj(AXkWb;88=_n(>UN6YV>*`>b!{Gp9s`)aobIf;Fe>OUIizwB~-KEH@P
zlfiQb+viumzqJ1?{KEER+_4+;50}L)3)=WO{7X>Lu6p%nF*fqkr`ay#?f07Ik>Qj3
zJv1PoV}aF?!|jt>55M=B`tRp-=SP<!^V0&4@6TskcYG34jf~;n?*ECOt6uA$kC|~<
z@wtqFB=h5sM-14jTf10g?<Sc#9C`6i_Hfnr-i3ec|D<m^H&;&mw$Z|8Y8o0ESGuL9
zg>E`Gr^?`K_<zQ0()(qmmggONzErcI?w)Om;`-M->s5j}6SpY7HcCG!+I@V-mbJD#
zIlMdH9XcnT>At2s=dA6g(rpEgqb}BdyqfgsWqNm%+U@Jper^H9YfpXmj%|~Jx=x;C
zUus`-YR&iiUZ-!HU)~(2qyOl*!u7;Eg<-pc-d#PhmnlEt>LbApIsX?E9$5xzs9ap@
zmhQ4(_x7M`@9R=`Bv0o$*}VR~xm>-?>7=$*y&4)i<@btYPD}1KiK}`4wx#;X>Nu@u
zU-kZ(OgLIIX?fMWZuQ{*HsMS&gYUgNGXKA3(yVEZ68;;DcE1)2Iu*D(<BosTJj3KW
zI}7?wSJ<5Y{fo6<eTw1YrbC~W@M#tc@4R$9UaYJB|9qoIcRKHR7+YKYD{g$fa$Xbf
zudDNI{>}Is{`{Djd*c@azGoK90!a)mH4p9n+i1yZO5EK!)2ROE^}_F$j(&X7t^d5|
zYW8HG>%y=4f&wDc1QtDsk<}KtU+-@bUuR`gefw*liW}c)nGH)NUOb#J>-55BE($)E
zC6`D{JgVm#Z(4Oe&9iv_kBS8ak7udIRhdq|vGw{>TU*W4Gb<KsI1=iv7;muuciJhN
zKV@M(bN!u8EiFAixy9h@k`C#`KK-$WWmXv#D+weqFmIZWqQm*s(7D$`Z+)E4w7O#d
za=q9YKKp(JTbu1tI$Lr#pnO)-A)&{7*SFrW-mc!A|G#fe{(gx?Ckq!Zbxk*~4O4u?
z@OpN3S)CutBP+cbQ#g5-=(<d75X=0{<oh(~+;jQw-O|^+I>YPD=H1w4d$zi2QbUk3
z7ne6fkk}{J*GD#<ymWj2YtiKARS$M~|GuZTlJj<Fw`JRgoq`@KIoQ5ryL?=y+#eP$
zbm(c(8z=vl@Bb}+_wPk((5Ii`+`mqAIWEgQ#pUX_Fj}$Y^}U_7>wkP?FZ?~(nb~Bf
zw@!PQfu~5hPPN0Cj~2yB0xxeQI80+mwiD$jJtOY5phhom|Nk<d7+Gz#xT>#pHMjfX
z<oeY?wavoWVGmT6rcPb+*u7}!;rC09-W1N<!542QFW>fh$IYF5;`4<YErOaDMV{B(
zQ~I-K!tt2TGY&m{bm!!}M{A|qem|aGd@_CB_AL)5#vR+a?wt>7D<A*7Agf3B?s|Uy
zKHuV5`+J^CN0=99?>c!~B3xIf;JF0%bCC}$f)Wpe1Q(uND|F?h(WH)xN1P8={hr@`
z$6H)&()zzSmu~Dh`ru;)f5_*BO-|`W?U~QkZccx5$vWR*j-?&b@wjO-nLSlxHDkoP
zJrtzU6C5rw#;o-XEWX#-eCtlb3oD;FcGtpp6rV1eXKyjJ=9y)<w8WcG=hY%2R+>p(
z5AAl?NB(&ueEr{#g0^>eqsmq#TODIEJGiyet9zA0$+E_VMVhVB={bHZn-sc|_icMn
zd90`K_1pSOcfFr;pPc{iBXjcG$C7gXN2W>z21JxxO^i&QUc9vaXKv6O%V~{~QP;Yy
znF1y}xiyKa+e=}Np@-5#4^7tO2fE8PikbLs75KefH&|G`|BG1O-w6eL`^@IuDOXtf
zonPhP3{XBk^YYu$n|n@_9+wyMj<1W`!ngmbe|f&-gruALFJhELS`{2so+b%#?0IUM
z>0;pN6t5>eQEG09rsUn-sS*Fb@HantH8tte(dqIM{p(+cE0>6hh<)HIl{vnv<l(}f
z4<9c1d^X*A!<lmurjFhJpL<VA@VY2g{&?G(9#c01RmGDGy`@i{ZuwAud0WZ%)&AAo
z=IdU)`^;Y4$$b9U){<T9K3CmDM4s^mahcCQKi%&C-qiY<72$F6-n!SL++Vy@RM1!P
zQM`31^=*K{1BN9k5pyQIGI<;ob%f1DNi9Fdb7uaZ#Si0TXM64Y^Q$7~>MEYgpzzE1
zJ@=i}w%WJ1Qa)a|J?r<|=gb+~-&&RW+26_u{B$XNg3AK4mId49B=9*fFVs+d)Z_eG
zL`d;f$K$|_3#{YoZmF;9eJZ^Ezm0r-miqNeFV}493=haSUF(^YrIjvct#oGlKTXB_
zI8WylQSJgw4VsDp0(>fhw=Py)G;6zdnkDEFk8SatiCedXaf`myn6amD>GmJ||4)|c
z*-!WW_hV`M#^YgMjeLWT^>A@*-g~xn!`s*4%e~}l8NPq+Hx8N6vvRZ0y2vgSLrs<#
z&D&)$du8)`4z8{}!ZAZgsPLFT^qHfkfzRbO8!WY+FZW73TyN&d>;GRQ*1Z14<NsAd
zTZFas+B<hmuGXNJM>cxCo@>6T=#3N8q0sLEvKB59<*Yu}TPMWnB<H+Kov^cXkvC_<
zfzv8U4GF>%goXaj+GfG}HE%<7S*+gibIZHxtB-G(U-!=O)zNU7+7j2Gm4d9T8LJNO
ztX&=P=Yj3%zkent&YE>$de_-sH4Xv>dMcls4opk^eZ=hOvu$ECB^oL$r-~-??3Fs+
zq{sPeePBUcpXX%ZU7sKBKK%3b_3mQ_!;3rD-o6}VG&5kUAZzP|>b513TGRYw&d+YY
z|8in_fBKYH89bZ{>Z(Of2SoHzRHuY`DHQjZ<f>1Q;9zU;R9wjT;^L1#*PVA|eif`)
zf9UC>HycY{o;y3E_(7%d&qI!<C8XYkntkNrGF^4JRQ}F<jk<~%*7ZL$y!qd$P0Q)Y
zP_Fdh>?{ml^ki2-3ybs8Hx3FO`C19;0w=yme-7zjNsOsDI_pY}mf$O)_iNIQOYN60
zzhAFCZGX{CpFQ6;cOF@$o2fg|V2V^=#Jf9+XD4OOvz)8F_p`q3qdT2)H>L&23Z78b
zSCQlBoK%p&xm{}EU&huAC+5B|RXMar;v}Prg9bx|hKfpq+_~HF!Y|c6bub-&R`RW4
zRmtzRXL5g5&b;w`OJ;@H$5(BE5p8D_TdV)yb9s6C{K=~K<+UdN6!<1}Z(i_9iB*|n
z#v~#BR+qpX8awq=1hsZ9@OnF^@$AmSMSd)Yv^FWb^ky(jQV`*&c)?{MrMURxw>RgM
zqIQY%2parTc=zYsbi?nTT5BFH4ObVQZKiH=xP#fNS3^hm-G;Z5O23tC{PM4QR`r+H
zIx<VGv}P?7%u#yM;^OBK5wzfmm4M3XU5mZsa=BECl_y*{9uX-fSYohbhP%>ar$cHB
z{2zA|o9&(wX8znTy7qnF*_SV_?_Dat&idSEegCgEpOwSkzxLOfUD3L9oz;;e6$^j-
zOjrGV`1__$-+GS*%f_;B9+(n$bAf_t&O`P7TM=h#{)<g@Xj<|@T;<Gnb}f~lDMEaL
zo21<N(=ROE^uS5jeYt?B;NiLQJm1gW|Kc0F&tU4Gll2qL!`Ay|e5{DMYsBTP$8js{
z<i4t{5jCIZKbU7>)Dq;VJ5A|;>Y<RCHaU|vv@|@ey8Dc0MWFneHFgpXah?m6yIx#o
z+&<~rC*e*$g;2lrn;(Lr)SjQnkl~D5o?UAg>@l~@XY-1ai{sO|x98pZcJMm)O55Ky
zri*6ri%x5KqwvtrN2Ke1UFxm&d#|S2$9OwvT@_S)HEqfCO{cs$+f{t7x9YMqSUm}J
zmMrF%|7@~+(IQulHGk?21%i4es5kMlMTRNwm2#hDoH4ghvHYFk(v%&gw(tIYdtXua
zW|FA=k6RT<UuWs`AH8MD6Xclq@V&+H!nN7q@>BTsPcGT_)7ieOH6r}Z_2TIC^O8()
zkqYW6I-i2VroBFtIcXEupS(iri`n<tuI@1nQxp8Z*pMNU!E(|D)#fKWdV)IVCS0_5
z)ctbd)ss&iy04TFj;~uaJul|!-RW`C{%>zizjkSF;Ziq|U4ErcmY=y~9`763e*fgD
z*VBbhF&IXz*xF$b7%1D6z$rehL{XJ7$LijuB9V2Tu@mbP?aR;PDkY{C+Dx{uDHU8*
z$HL~s<IraKq$7c|=>boZM2=F5#r*ki_ubv()VaouW%27|&fnqni@USKxjUc7Z+Wyj
zZ28W0t53Fb&6Q_~y}m6!X0}e<AI5UK-x&f&YkVRnnzVbjG)<W?L1>wZ#nX31;a&oQ
z63z;_ri<S_R2I)a^*HAIuRSa>ZAHz>zMD^ex&7Tk<+P-l%E3uOO~xK4A5Lm<y;l~W
zyzq*L`OWz({qbjBKbrjAsCD&pu6GCLi%haE&#YRt+}%`Ot7OG1g+Lv@seLuiXSCll
zda|^5?^*XVT1hKc^vw~NAr$xOzEIp+=Ly%>+|{ZySYRW(M3`gAB}WgIV>A4FDx#~h
zTR5357~N*N6g3412{j3_JWn-cN-^ydNq&$Mr~l$zxJi&vjNSZg@9r1h(Vot|Qk?%;
zk7)mrnaeay7hSM)4B$Avvo?Fvhl7ec=GRYJc&EZ=qH{3^Q<sZ`IMcEz8(Ld5=d60g
z@cFu#)a3U4>_?<oPFQ&~hIptbxJrb2EV<-%-l6A!mdc_+2ahQo0h5H9KEGtLe*WUB
zP4<s%pH9@U{Wv=5(d5U5A3s*l{Bz@I(4+MKPg80xFKf9s+0RMzhN#$VYp&L&(tkc1
ze;ivs`O~LXIg4$puDb@v@(C6-8K`RCE{=^@#~rvXxn**D{`OFf$|?1F;bIZ$Gd07Q
zG(Jn8z3!QMRawxRV@HqTK}!`uoum_4GM@L;?AI=xlU*w7H~swA8TDVM7yjPayz@$b
zoleh1QO{#LgOs_r<yn@N%(=atd+B+*ZEyPdcmv)oyf^jp$=ia)Q5G#M0TWv0OwsV<
zjP{ywsP*x#(D$oOc|Nhb+Q|0A(AS71tncDm1CK24gq1}uk*xtgdBe_0e_pyN;wQ_`
znMV`Ww7JA6O;K?)c)wH6^Gkuk;>xq_IUDtw@)XpS0`g+S-~U~wUtfBrYyX#9E}u@S
zS8v^6lok@<@Xo;Zv`ul=9h)y@w`@yOIQkaPVp1;X@ot&(C8S+dHRoYf_p7c~4qK*#
zu8`84F1bUT&13bEH+?}0qGmeh@2nNMqP<clBwOjsbv-9Wp^scPVjSyI1m+wTWil&F
zIeoTWcjEIR#r`PMMHg2pdw%}rd)M~E<CRyq^(8m{Yh1FbSL2L+F~@aT`SarTRrfd5
z|G&s^cX#TUxKf?@Qk^Z=MJ(b1WuHd12TR&}i)b_%$A_^i9{rSgHE^XeJ7=5KGJX}8
z)*tfRlREZCFPU_TOIc8<f1+w=S(fj0w@Xni8+USxUa>aSbvsh<zwhC`?{5FEeAR9}
zvf6&G&zjp87OM)eh2**{yn81xa$4!fKPfNwZh!Rd<NLORnNjR_m}To9&DkXCxo*z7
zO)chCVNF><VF&;5nXE2Ps9Jb-#Wvx&TF=|8PM)*%P*VIlAt7+?bk8NLTnu7`4NoSg
zd9rN2W2|J^tj!@-8K!aa=r*U9ZL_*^3$4ES=D&QlbyCI4Qsa+bjF(3IcW}FTM0-p6
zPo1U4hIV;%KP@-&?Z4_=p8s{hrlPQn73;euO_<EY>RGU)MOEg8Q^jQFTGvlrZ@y?7
zvo&1zj%@tN_3UzM%gbAy)nD$)NOMZQDvEIE46*;#QzACoE;uz%)J=)ufk%^q!a0V{
zDD5e$3>Rm4aH(4;J-GLF$=_r8yx&hPO?vboTewsG+VY)=N2W*x&e-~@Am{e362ow-
zKkF3xYNobDUYdSI*TSE*WO{?AreWmT(hA>3r&BL2A}6m~bYfNW)WhqSTiqzR)x{L1
zwuU(<V$D1*&Pg5XSJ<6h_QX3gwBv|(=Fx|d`=7HcbO})v^t^Xo&q9S+=2MU4Z<B2w
z!ZhaAEpORtzk12Z{QVx&*TrtUbNye2*3NAInD?*!HMK=r#n$USe5S^A^!+}~oA3YV
zId9KbTQ-~Ns?xf35r?;kdMYL~Xq>K<{Wm4Fpe{k>Q-iqkUoDn>B4Gxq+g@m{c@VR*
zN+oFavsr2rA6(4Z{=>-NrRa<$?{99KBG984x3<?H#lo5Y`GuQJcN}7CM7iCz=c~Ml
z|F`vFoYj19xtg^i@q5kAB^!n&glE+K+|idDzcXo?UB&gH{r~n9=U%sdBUj3K!n-r(
zXqQXXR=)5qt5t`m&-$c$q>`62p!Ze7`9pIz793h(x1Cqy(m$oD=8|dk;+Y0>LoEd_
z`71H-e6qU6SmD*OjQg`w)zPwp7AnsRIuj<jTYX;TwyV)*(H{5BC!UJW|0Y_$<E54U
z-nJ*_VlPx1XKi2aG}pUJVVPQ>Rawi;_IsVDH-3KkEKG$<BtUkXN`CuPfqlyy#CF+=
z|J&s{H#Y3D=IWV87+JP`3=4`ra!Th?<LwG(&ZUc7vbIlbz2dU$wC%n3oEwcg4Lmib
zc^>)y)<Q*bMNTUd=WK~ha~8Tke!IOhKFd^0Q=)fn|INqz3di)6DnFD;Xooc%PkMDO
ze8xT>8TM~;Z)@!RuXm(8{^gek@0eQdzH48xf=@9~sB&xHs#hy~-~W~^`pcRhw%8_3
zi2X$T`wv?KuPoVhV)`krBM<x(C$z0x!_fJIH6(Ywhwke`?n*PegeEcyF)DN<crkMF
zCCTV9a0^MEyOn(AhVK!>+25@`GCcY&zo6hV+nnj~?w?NPUbvLG)NRU}hS}#2+tdnO
zd$?Uue-5Ke_05`ZJ6H1Mr?sayXe8{6TX?7V;9A}f4l{Or5OnaB@94Lw$<*1OpOEXU
z=oEjt!r+3QNn^-vW$phFDMBlkJjnX(vS)Jm?_WmujhX@#1(|$&mT&%Ze^#hjnn{PV
zP_yrv9O3h6`K_C-oLtOwoiWye_4Pa54*9(;Ka9^SZTuu?aJ71e%}!^Ii0yR^KaZvt
z*j36)>|bAI=@uxf5j~M(z2=6b-S0R8Z(l$7=gLF<HukP3A!m>OSmXVt^;6WQ8J<!h
zcN>p37gk>U?U3<||E%YdKgT#t-|aneN_}#`-MOxfYxl7QTFzyia%%tjcOkm+$5opq
zFfqwYvXGgy;gsvQX}yxuZ}U2{_}Dc3z9%hkef@#jcdjbi?OAOn&e{C7(dl~(^C6SO
zc{XgHFZV0-*NaD%1|0YKAN}L|LrH~2Hg7r;x;HT{Dp;0#JzM0^nY<#F6V2^8=hupz
z`fEC|sMoS)x@dEJj!(JGnZGi+5xgmG=g!{G;b{2Nxc7|OliR;z777})NhqgCFtaQS
znW(4u{Lt-{t`mADPne^lbnm$3q`=%qyL)YJ{j7^+FL`7gVZPq+Pl3QmX8R^X8>VHN
z-iw#6vzk*~9{B51v4y-WlkJY?)_krv`&?%$<WD}|@_dnFpTL?Yy9$<azf|TAi{l7?
zxj5)B$Ev;sN}L*<RZjw)g+wG$qoqH0FwV6MShenxL$jUWl~o+}oj#{`XP!(q__XZ@
zm(xV8c&@8Q0-D05H3fHmTqrns$%|4oxkb|}FY_n+tn6z`abdZ$pZWXRdZ)TKLPGlc
z`+l_bUYM*|S~o4})zXlr(uX!)@_V=||5P8)S~*!Lu+)dE)064Ri^T;dXLVxsiU?o2
zn$0T^xnNnuwnC@c?=kuxf*-%#**L3cL*|6WtP{<OfA(AVIY>?PD4qK-Sn0#uo>Rh-
z5tg@IP2H0cIt&#SGlt}usrdZ9JNbRSM?vEH+ZmrGita6WWl>~)EMd3ara5O#(%yu9
znsNSn?(7B0_Ydr;6+YJ=?^yJ{&c9bfeaccnmZdGl|K+^y*F0U>7j4ckY4Z-2>DLc=
zFG_NC(m49)f%G<oecv;0nCIVk^edHD!+J9NiR89T92=D%iY|3B$#p#xuwC-P`iMWQ
zmG8LQa^zU<HHEmp&XwdgPT<M=`gQr1Lw!QYJdtOAoB4VwnAxtmo~Hp@3(&gv(4k3<
z6FeCEZP;d>WD<7|u&L(yc7>&?bHYO&sa-eDFTQJ@cjIG+!}k0`wV$tR#Oz}DzIDFR
z#&4V%oh@Al?#i(E3O~DQIOF|3&caU|0Vni%zuKH%b$GLqqK}XQo14Jxo#MZ)Ek9r+
z`QYJUhgY3H%jN}TtX({5)$@Z<?d!KQYy6HnTOpfe<zC@&O3Op;VBEw$wFw@Yy8EY3
z_3!-=9OZI(v+DzGJ;^h#e<yr+)){cLW6iHwpQbO(bWr2onX8^rAW`zmLM<<@_35q?
zSv;J_4qQEZN#ZEelZopS?(JgyzBGK|oz3za|1mCg+Z9l{<MCsS+W)3+zTaoPyNAu=
zqN3-*b9>jT>}h?~q9BrN_@Q*R!2QoAKUQ5$aI<mXS);c6$gCB7ZMSx@t&gytTF<v^
z^`9e;!(|VOgn!twtyup|)BE#1A>tneJMEVqjYxfc^9<AOgAF{-8>VRd)L3@v&Key>
zN#;j|7hY|hFq7xSB@M20zXO^+3#T*gouk_$>SZoGNo0Zix`RL8+B?MTZu{|BU*YEu
ztAfJ;6L(67wC5g{+0FC%yZphwuf98MT=1$+@PW{&ZLe9(Z}Zolzub{~kIT&B$;u|K
z!XIT3tf2>blD6BYimC|5EAH9%ii2lbl5w_T-BrWjNhOkvCQ5<|HPgdN4L~89tt{&M
zNK=r{qsenq)&Y)V2P7BI`o;Bp!9T|f-%M^iFjyn({_x&T#^{cp+nru6yArBAg=@n6
z+J;MKvjyy{7&~j(1zx`m%~~=!)V+@D+4cAXzuqKYSi1Qh-|8}nb9#wPO8Z}3NIvk`
z|G<s!I$y4?ZgeS%5)JWa`FW!Gl`#9MBZ1i!5=*ZMDlDBaC1k2i&F0IuedW5{6ux?-
zuuWvqxs@Fhb+!4*s)-S*b5$#MZiovsdV2kLudF2V(Z>r+vmI+|6Vm+^>p$weIW*;Z
z%?YcB-jXwQ)to(Z+iTyO-FY28vEreEQ2*MntR<I2+LI6OtCL;oZ`bhSm+^&=b<Cme
z&nHeuZ+NkiPyIH3?QOpo)@6<SJWMg%ioF8tF=5M3%WRE(b~(Bx-RgxOf0I&u(VBg)
zKDb_yy8g}V_-5B!!@MWQ4+ZQ`XKB3UQLsC|r?Ra}pdoT=)B3-YR@&^AWmxdi;lIL@
zcgt-}i@u4--P^_3ZKFM<FIf3=;p6j9XSf^);rq0iKVXj)+xM&U6*rbz_vD0`%9kD3
z`pC!ckX5P7wcYtnaeFy-^jYZ(t~jIQ<R|dShk0HEd-8L+jMwIkM;~9-Tzz2w%LV?W
zq3)?c2@$~xS}j5=N*8$98UMEZ^Tg4`_;R|4pIpzzbCQ|X@-9Kw#qOEEG_YC9dc@&G
zQ3H3|qW>=JOi`gz4cNR?6t3s8NXszIwQN1fot_Y>nqqk&_}<E`A<aJ3)3Wsp%H*!y
zzu)-#O}B^LXPHN<!<t{8;z|wN7k0h1_OX>$d=1<4%ikMUcFw&2HOM=D>%+YqN>5ss
zTfLcQvgPe=!P^_9YceD*Y?--)F}`qivB5FtSE44xH-tBSkPTaS<Z#**t26<hgyUgn
ze<nOMEcX=fSiB|7S%_1}DRp<gWvM9hh0}ZW%G(W-=3GCgrnx`tmvwHbtb`2P+nfA6
z{fDe7S@`Adw7&F8*=i<ftk-%aba8@_CF93~w>9E+vOIq}Utw#h^cK0)Gdm_+@6Zql
zu&QOc_OM(*e@>hoTSsw-Y@bRGcPG<zhkjeO<oCuGuH`bke#yDE^vC+lbCsq$K1<rl
zAM|W&{1WW3>y*8X=9<|y9&(DWA6KgO_bE?sQwg$lKX`F#o#~_W+eJ~Qv&~XMu9SMt
z2|sx#p8w&T>+2RtFfXlAxSr4Q`aQd!{P8O{TyLatM)oRijmTPZIVimM!Ho&e{SNJ_
zmVVdxd_vVfRf)ygQ=}p}t`>gh^O|4B^8K^F1ONL~vSEQC><x=1NQeuStrEXAE!Uvz
zJ-fUdW69n&or=oI9vTZ7R`7hE6R7e1=DNk}-u@~HRuD8*?{_<5c>c-7i7Y|=E>f!m
zHPhGS)t+rPJf<=G-NY~>&dVm*ujEB;XvKV<v98%I%;DRTgKlAmY$|!)9p1g6^Fs7C
zr^pSarQ#Eg_k^$se)u4F<l**!9fh*zyw@x2tknJz;&s*SnRDrsE`#&)+I}8XJQ3_Z
zYjt1WBhjcSuV*scaG5csMUyGxsMe8hw#>DE*$nO%Y^_>#`2P))jVwW7D|o6G&19aR
zCgA?}?M{b^je8U;FZ>a3cNadvbM;~U9-$wvV{F#+z3dkBSD1LV$*?VG<-X(o8xEY4
zU|w3SXr9OTTE3<Fmqk&y-l3wL6bt7reTUD9;%gM7pB<Ey{`8~VApd?##k1ocHs6^Z
zIUhbXV`J>|IF1|q?+*X@d%VEzKTFAPwJR~7o=srj3Q|(%NVDP0{cLum>}~V5BGm=Q
zoyx*D?rI78T(NbR;1;#aqy~<7#yv8B(_4%W@r2*~_2gp39rlSX$Nf}tq$dh&-FA0-
zpD0&y56@u-v)QZ-QmT*m>l|H!{#uGLuZ`fmIh&EUe?i4FmNy^SUrcT3Ikw<@cy7wR
z6UhfXI~#Uyf3W8_+mWrS3;6eSJvnxEo@iH)^5%bhOH&Wo)bl-KfA2UaqJsHI-kbj)
zcmz&Le`8^@+$6Ly_V>d4dtBFUTPL?qop11JqWgpLGxoL<&MR~^d|GkNY1Lal_q|o_
zBA+tccui~#l0R}RF$(B<X1?NV+V1>NTZXIp6OG%rI<EvP{kwI7$8=HVf_rfnt}So)
z=eQ%?FY(bU*NAn^YmQCK*)H{R((KJk6_RF7IKI#A<+IQo`u{~v-mRW$_&DcW<xz%@
zyYnB))CrfoFKdsOShDYx#>q`@t91DGRZ2gyE^AyL#r$Gp+p9&Ntj|>@J84==6uHd4
zc~E1HO4bSGRid6#wp2+!_WG?Rr01|<_SLfZ`{MCG5Ahy$P@BxiFvV->s#6@w{Q*aK
zF03+j*mgfyd3pcAnP(PG=h*vnVW_OHT(_@JP*v6w^LqBeuSP=W_jVj<^S$t>Q1qda
z!ws=DpPwiE`<L92|Ifhs>czm@<>Jf!Jrt-6Q%PlZsr_DeWLpmF=`_Y;()s6e7g+si
zF|y^zimE&y9^d3*xcJ>e?`15rnP#_5WmW6{wqyeb_q;q_<w<SUTWjt+Jgxj!64vVS
z=cV?nhlh`@>KB~2+-+0jg=@<loSh5$<qsa=zrH$4<>E@^lIOb%?s4S^e>&|SxT{X|
zU4Oh=(fi9CU%zXcUUDeiA^dpW+h51q74v&^4{ccfE<`phv{WJ?+gxGaf5{K-^32`p
zQF>F%S2q_N>-;_SSkJ`GjcfNRJ_#_stbC}d<XG`5i>&R^v3r*r72W4J>l17kZ?)P<
zsdvAES@L%=31)8<hyJyVC%!e$x9oh>YFx0a=!Agttm>_TOKqf93+}RG``sIVXm1JQ
z$?J7pKUQ76@aUA<zGo(pVr!(Y2UNY+?MS}Q_;Zexrr-l1MSV_Tmjd>-h55G|Z}izF
z%+wL^aQ%65;lx)NA^#ujyBqPa^%Ga=mAN7ow(qL9J=xgsz3zJcss1bGhh9x~njy6E
z`UkPJRwtK+WsEoNF089va8Y!Fzue)&v%?d)I(5EZ@lcU^Y4j;0o6*#s^XJ)gjeENo
zpFeo*@%9Gq-u?AA&E<szj~ub^I^K7n>c8xTt*09b#BGzW9P_u(zg~8gA<#=?C!ftm
z#$)rAFI*qTts+!f9`5QHZTjnYZCpf++bOL(T`DWAR(PLC_7w2gJ7wDckDK_zUhq8N
z>iqKcE8~Z<^q<z3oy$|M2C93gIc8sPG^(8-er{pGGmAMvjC+q<IBjXN?8IWl7KYfp
zEWfYUGgm$}dZaJol5|z-LqtyHr1r-L_7w77J1&3d=P%=gqD9Gz>eb)x6Jqi4?DXWk
zu{ZLC^*6`++hl7BcDR+A+)fPDuND3F`@s8%)D^-r_It$b*SsKqU=dGs(pJX2i<|ho
zSLyCJxNM78>Ygl%uznXcHMO)QoByzu)=#{<-euMKizXK?7--C8YimoGYr*vW9RG%x
z#x=(T`Gb|aY_beZIlo{2u9z3o{QGgU$GcnHd$*V0Tp_5`bmDoS)hFFI{&o#}DweIW
zy0tMX{?K#YhR}7TA||pCo5hapW4>^09kajM^5sn10yMra-}dUNy;FJW!TXu#R<)L1
z*&55QSEamemh|1v+zy|-&xI?UI`^!~dX1J!(9K8EZv;74O?;4S>aonhKZoJhFU527
z-2VN^yphIO`@q9+zSPT2wO_cmeG<`o@klbFyslwKC3B~|ZU2wO{f)1^qkBEBH`abu
zJ7ZtN^?ftHqbRdWs{5soQW1|NCxJKf;?6S^{`UJYQLrJ~bg%5%6@6*9%MOQde&2b=
zOlJyvS;R(;@8Q-{-Jc}usxz&ee(4kcR{nR}xF&f-Y)&-(#L2zI=X&GAK!v%Cyu634
zN_oD&zvm>yBzZ3J%yv<)?d)?m87{8=;VZJP?_ljmQI&1)9o8&Qe`zW&bWm3;LC&7-
z_2a`6(*4iuy>?M_R^mHuso5NsiWAdiwq)t{-2J_9{e2dd$ya4-1Ir?=-kx>3b~jUV
zi(rWA<%?{k+)op47e^kMacy10!2=ii&h~lVy4mNt!{;!^!bL7cJMZ+Ah_QYvIlYO0
z`ur(P$Cu2LyKqeRzy*UNCl}AyQzADl(rKFU5)(W7V?B3g?8;i=c22In@;UpC>+ues
zZax=`-*`#+#Zm#c)`r^OVqWwA-wT%Ie_v|yv)t>iv%o?Hm7+-rORE;--itGHkXg4;
ziF1;h_Nv8l7q0}XhB|31-&oNc1*#1yCnd0Kz2$elCpED*YVMQJ^J1zkE=e0BF6-`d
zF}Zxwyx@$K_LPT#4-R}@a8Pi@a{mLB1?(@2a#gPRX-`?|BKPn7mk)Ax4(BV?|75x|
z|Bp;3uk^y@yCQl897VVd^yjqyJojEfe;#i>SI+gF@k)^!x<ozwJbVP1Z8x2g{<fu3
zIzme{vb1HQ%U-9-8J|9^?%N>6ksqXye~fdpQa{J3#d9On?%g^Y8-FM$DpAF#S;Fax
zXTZvk{*#s(SDAC}^6~UK%(3e`>HR#>I_#?2yzPn_x|tDbFV1jId3UGj^P|-sZ|xb9
zQ(Ui08cz}_cz4I~<!k*1H6<Kh9;`E$+ws^kzi{_?)rLi1PAnGjceuSp_Sn0<546LY
zjwW68xP4&xs)siGaqTsK`q}3Pf39gU<x|?Sc#f&X_PY*p-m@KEl`b@2naFarW4?=)
z>BLh<AI;Yd>ThYfrZG>-u)$sabIn$_umd*#Z13E0jtE**u9uZE@5JKA+j1tc$lYoE
zt-s&x->cLk@fDIMd!u(9o-vUFG+JZN{rd5D#lCv(h-Zg%ZtspM-7z(aUE;_Ejilf6
zTKCl{M{LW#cK1mlm-3Gp^An^*?`t1<^xkQGx~ozCsm1H&KlpP;+`HdFigmY@X;)g7
z%k{5Xoe|SQRr|btt%|Gn2x$FrIhLg%^|yn69y>4JVatEAMc0od>@@rJLuXCEH15hD
ziP018*D-!>pIyLj)BmJc|G<m)?;}i|oDR&CaDTbk|3S^i>I-MJ&+gyOXZ62(sht*E
zhoxqqk<bJG9H*N5j2}e%8QLyxdRw*kZb<Ju`KJ<FSGBCz)oT7IK>ga6#&dyk-rWvq
zg^BJreV>N&imDn}T1H;Zu~7CEt;n5_&}6H6Mq~9vAGS|_xTc(&(_s8@!<`z5i!%x>
zC$+!deEIC^f_wYe3m;1fvB$eU>ijxS>ZO9m<rd2_&S&lI3Vn6VQ@8>$`nnh&Em7$>
zD41dWU1?kD?Jch|4wmf|^fXcK-yk<3;HdKx?pqg6D3@&Z7oXt%UC3iLQ}o`od%n&S
z?moIoZ{O6CFo{HyO->(|Z_!e5nbVxQX@P9DjcL&<7PWP;O>cLdn3DS_QhZH-_*W|p
zQCsaP($^oz)$pFQ{VniXKB6YsP4l5o&*4Asq$A|_aqp@$-NpFGNN--SYbi(PB01*U
zJC15kSs%%Lq^FHbK}R=qeeoXAtAYBiV%tt@&)AgZC3hvsJdtIy%QlA3LC^gZau}Nq
z+bFM+VhnIrnwEN1w)ggm<h^-brwtlR`<s9La&$hdF~^e6+P*XH<b~C(62|8qTfVCJ
zD){Z+5|+HU#=o=ox9+H7{rEiI;Zx_=2W=ZI?mWD)K`*}P=kNZ9b$=xT&XgO5>&N^$
zrFv+Jf{)O_vwRNFQJ3GR{La4J5WC*l<ZDp$)a&L-y7t*7%KT>2D{e8KTCB_B<P-Mo
z__ewfX_?dcc>Dsdcs#8X6`DC`PD}3Xg?kmgdaRn~wenxw<paOi8}3#&K1{Sw?@RjA
zp1q;PVg50}?`I_<wmrGXbCl_byq(L-Q?5JK{}T+dE^|8nO?QTr;-rFN%NfV}4%mGW
zdvaF0;q|tv38~BNI2t@tZ<p$_t%+>E@vip5H5<lh9eKO&l{^S$SsLiSux06nrjPYI
zC%DfJir{{1eLpj6iIY-raIop_&402|-1aW+6I{%*#Ds(45$A)CUmgA{yg1`2a&B%@
z><p)A#$O7|@^>E3TUl+=T3|c-NBy7k1;_Q2zCLd5SRZBkKFh_5@6GG*z+X?Ccf{AS
ziKtEG^<>%MqT%7>Cp5=LeoOA|1@>FH*VHrkTu#$yTd_*Ht37mr$gH>9R(Z?i9D8<m
z=DzUHu5zYZr?)n*)4O|f<wUN|FW<gZm2qv8U8Q%(LxpQ&NbV!{_6FPEjTfhG=&$Q$
zFV8!$M#6YoC7-qKRtNVdH;>d8akRb;f4Jw1R*?VOrfDa)+@CR#<J}b%ujBoP{(TU5
zvUIcev91)m_1#^JNq$};JJ{AlcHh{Sejt1;qxaV<x;5;f$8%mw1y1#vU?8PAA?#%H
zq7z5VwEN3q|8_Z~d8$l3-sH7p=EEfI%yn9dORF6G*RcQkt>`az^h)dI4PLA%md>+G
zH+IHn&G0o4W-57TvgUXB!?=ANPYzxeh~Ky=*X-k^!i4)fdoQuScdYroG~IR6h4~w`
zFNJT-_YjyceV%CN<;jiPO0B=ltv<-sW^+|gfu-IyY1=C0Q?I2~_EkMO_{zsIw<_DM
zu>NhprvMhC_AOxr<ze~x`VF@ZRqg$_<KXNCak)#D{aE+<zypH~|J@fnoVvkZ?qKuo
z?FkcIrWs2VhbV86xuo;>yP}<C&!OJzg1DcYLH>E|)BMiI^$Hx+eUdoOiu3i~a+}+w
z$HUBhSfoDk2yj*&YdC(bYHMGqbVG>$;k=JtU7B2hKjgEQ>Iu0iOgwS;_rhhudm~kr
zn|)#2TJ&{W_1cvjDSRrA>i5WX{+KU3ZQrWp0Zq|$og5EjJiao_p3m~N+U(=x=l%z`
z`JSIK@%*_N8?#R=mVJMDqa&#AUMu{mHC(X%SEPzu<(8+<oQ0Uat(8xx`)N7HpRZ9(
zZp-t(r89nIe1E6SxIja-s3}1;=)vvsO6xUYc1#6p7jmtb;6CM0`GkZh-{{t5%cTUm
z>>W0*>ijLET<Lvk@m`n394U;!QU7%J^?InRaC?>eo>z#SwS0#m+nO2?oy+V}a*dUL
zsxA26FADcaId!%@=OD9-_=NWV5s&#F?D;9(u{yb6e!bwyT<ZnD+xSdFWYajGbw1u;
z_gmuAkMf4qA?_l-3QlBm%wcMd`N;9bmVLq6`yAIMuUocyQQrC75a~`U*9{Z9R!-Ml
z;VyIaxkA%yzuhOB#oBE|8qYLKWx6OinhJ@XIsD7b>7eBVKjzuno7mbF`s5woopPPA
zt#jWdsfWJnFQ)FYUh!#XEc>Sy<_7sOt-t@A_OSgd(=&16uB?`phT`{nUGX(+-!Jn!
ze$U^2ouROE6+fp_VAbJyYg|rcbKTi1Uz5Tr(0x_4)y-?s)G491p%+g~sttSc@U=_4
z#nX)+GPPY5WF0^b`TG8<gz2I?H9I~92B>jM$+cEKV|i!0r=92IDp9W2WuG#(|61`g
zlIeTi;o8^RPxQySJlZ;6abq6ymXKA?G8cukRDNVS<DBn2&ysJ3YN@cxNeQ9EAP*m=
z3CjIW_cu!4ntb~})upRdju8s^Q{u8*c+MU>&*r&I^ihVF+}n;Pu`Y654$lmiUhQ#_
zS~)Rn`){LjjV1aUYSlARL>?Ux^oiNYak_NEQ=XFzQjy!9T;vGj{Pe<H<L)lb=S#0o
zxU*aS*ar@V1N+LwpOwcS{`o=R#W{YvtB!ZSTZ((sR<LzkGIr=Wdhpux(Br3~n2)4o
zuCCJCnYeOASa8_aEZM+N_tp(*(kIjBJia->eQ{Vb>kdDW#xueb4UQ#99P3Ft?6y94
z3#-MN74QDDGjtqpJRZh;QzxTDwM)JK&^cQdea{rj_G`JzJl^uH@-5%vapA3W!Mz{u
zC%nZU$kbd_p3u+dJm-g;hS;t7>RG=nqyKDwAT(jJP%ul!Kf#RM-<<C65!IQ^`Zy}%
z06*Kiun_kvNtFh<S2L_@m5VaJ{3r^PJKgOd_F~y8^P3wMxxAX_A>r6E(Ia5tl}I7R
zn{^LX#vXWJaObhRL18jaS!A!u4X5v`${u#@iD>xot$0SV_@3>b9>^&iz4T{IZVO97
zSkCoiwhd*>J+{9O%15(Gd|YO|Y*uNL`74Kfo1|_3R1O*hbvVgQ<Xqx>vU#hfj-D{n
zLe|}$uYJwdxGVK`y$rnK@!|O@CBsJtW*u+Lk$-W>OT<sE(blw+M}0|ThvDO*9c)ou
zZA>Na%bx3fe*Q7{tXl2Uu$=3s**bo$`+LDGmr+_`PGa4SEcPEcvzerqu9ceTKIQot
z(@EjyKPpXZ4^q&L`O__^by!WB&2!1Tg-H>)OXf+f{Qg}cS>aeiqhrEzxr7XfcN^au
z)_vB}(VXz`MU3gD3oBPI{SYbsQhAbpsfej;&RHvo>iKQkver%y%1u}tr~g;PP2}qd
z<yUk1Se5%f{SwS!eB&AQwyHf#CFgWjrlF@w@VCUQFyWbNr8sNlJ5@gXs0#Sc@Zt&Q
zlk;<%LT9*4OOET6oSvqYlbK(e_q48vQ^TW^Q)$QE+6&v?u8~r3x~#N(*{sNkEb+J3
z%oEhvWUf`>e)G_6jc5U>PqS_MmC{_SA}dRD8%v_!I~g2!y{oUpN_N({tad5RY&VuY
ze@vt$nC8`8+uUhiD#KsDgKhPl8j1HJrkmze1@8)}6<}Ds$JYG3>G`tS<9eXM>edx~
zRcal9&kI~UBU$H*`_!IwE!{F{TT<??d$aHL{>wS6G|~MxM+%?$p`wg5ZnpmsI$^4U
z|9ifssR)&=(dZA&Q&VCR;!Jofw&7sng_puJtjdMTR(9~He>roBEA{7+byI!`u!x*6
zcGwb}nQ^<^@%}#Unv9CJS1Yc!2d+Q4e%_)P-IMc-gEGF&nRY_iOZlNtpj@u1KvbUg
zaWjocVIC?vOGQJZ61A4z6H8q7dcnlN^^<E$RM#wyfBbN-^@f)8GZN7}rk6fm=hxkQ
z-um-pYnK@lgjkn3+I`YU`aQpO+c(bjnj0K9@K^7-So7&-&x+i%rX31T1Rq-G&&qqJ
z^y$SZk!wN96B9NkPMH$+VG(bzY_HW*?UbWcml9^1HM6xV^w;*U^nRW&)AdWC!coZz
z^Z8Gn23X`PuQ%n&5pr;>dXl*O(gW%3O8e6HFSjb$`|MV?7f<h$6EQKxvrGHU<=IT@
zpIj_Fb(mYD%VF0{mp`V0r#DI6yU@$^rtZ*+J4X^*Op?W{yBmAgG1&f2zR)#wLtlOW
z@9+7_x+_wX-KOq}O48J=*!DU&D2a<_%O}GZyXUv<%VUn%wLNd`;<yL>PI`g6FV6Pm
zj=A*YqS|MdLw9aVi!|yObuv8E+xK<X?Ze`N#-2+8y|kb7GVI-ZVDjyQ?ClEva?bA#
zMc(M^<az#iwOh!mRmxY}U2c}e3i&8A?n|}qk==bT+|J6m)Z~_{=+pAEL4kqWeq2-T
zc6(xYUGMX{P`TVQxtdHb1(Yt9?tK`rDowmV#9vo4_3_*N8zpWi_ct2VE_fI?#ZRX3
z?+3XmRYlOssN7KH&yI4Zmx{T6E|1urwVSWwp=ih3Tl~47C&#T-Uex<5=igQ3r5dGu
zJO6xln&@89e@kg1`_^_Bk;W2%PLo+VH&&QEs`uLv_Q>ANV5?|U%5@Wk$&A%m;x`@`
zJUO}eM64bE>5Uhb7pf@TIHE86DdJaa@&D3LLx(vNE(Oa<&vU6cp7-H+81vJQ`_DF2
zD3=PBRwZ0Hp5`nOD4tTR-mg(uBlg5F{PPjB=Zi%fOC+>Zf-=p%#BVw4u_SPnR>W_P
zeReanCU2D3P{mtOZ=Z1_Soz!$=M)d6*mb=+xl=dnDT}oX)fRH#=~390Ex$$g_JJy6
z-cWZ5$1`rFE}43Bu6EpXRp9TMp!Vk3mZ?e;^b`c^6Zg(LnY371ea1d6>DAHsH$uNw
z&SE?DjnV%tn|vhW$G_$h`|a2+mtI(2cqBbodggkEU2CKSZZ8LQ_Jo5D^++aUUsvAu
zUh;#x9P6V_Nfp7=Pw`hu!Xym0SS4+{d_p-&X1;3Wm1u`;RSV4SX>~bR#dNZGF1b2;
zVPLMxoAmm<OS)W`BDS)vSug+TNukL3c@2}L-P<HC?O{zmwob7>b;2&D2TUipJ7pO2
z?hAa$wO;W42Jf_!*H-W?*jK%-z9HC{X~pwq(Qm~wKIeU~>bFtdz`JC_izgQoPdN*E
zh%bms+&fQ-Q^;v?+Mz1v)mr`Rmt%ddJJ$Za@XeMtYU3f}v^{og-W(|w?oH-9O{aW%
zA(8i)M__N1`^47?+1INN&kI}6>V0Ijpn8|+$(T7si_g58@iuRjpHhDoTVm(LcBKh=
zN)r=&mk3T|nY}CP*n@2rZre8%ulDllRN0WHToG^oWas4*^DUTmPguW7uy>=LdjHbB
zxz|;l;)K+ew#+@}aC<BN#<RB%w6&#QvHEj8!1k%(bdgZwOPOw5foWA5PYw4^->x)4
zPjTXgSsovp`h%o9iw^$~?VR7T>UM)Adz;&WrPYe<?hkAJ^UB@Z$9P%v#bpbg{Zf(V
zo?J{kbX4oQ6;}*%*!tFu?<GH&`ZuTw`Oaqgn-UZ#m)LUYg~VAYR%y-lvZZTwO|ywI
z@sH8aS8`Q|^-$WD+g_D^b@oQNJ(7!7O;FJMc{!@bFu;g)U!L-ce1=a?3_<lp?3u;=
z5<zp$&NocnKEZv<ji)UP&~pLWSMX2Yv+O`;*Q*=5n16kn`@mT_l-b4J(M59_dvpEj
z=)%K6VS(xlWx|zPKJC>lxYbp$WT9%8!?ZKIHaRK%tdZN6+x~6|mv{8l-4~nwDxCFD
zvFP6WXW0Qax5o5&%)LE_><UG`=f}F=ntCFG$8CGhg|}Rjvj6Vd-Z?>7DEPsFRq6t_
zce3AV^S#$IgJad=$WvYJ89F_)c4d|AJy&|<P19s|Z>?0vlg_<cV&8c!aZ;+=)RDAI
zqU`a{&L#6qy_BCiF;uK#-%_c4;{wN%kIFM_>ba&xI)x>_$PEAdu*f&|?4!4wQ$pJn
zB2J2I(Y?L!{jF#-hn}-L*rIMWS<mKdI`~>|MarRd>(icGJk@ct)jzuOzjsD+zGz_e
ziRN4L?*_4Ka`8~{;#zgnS6HQgLo(}C4__@^F=n@Lhu;^uCoEpPp-=YU-|Fpq1b0cz
z-)*?qE7rAYS8$+gPji@E*Nt_1FPP;qdVgi$wL1RS`g7W4i5op8hc1Qnax9Gfd{TUh
zP0e+SttY>1xO-~x+Knr2r|wdk7$wA+&DT?8c>Q%;{uk^2W%pkdoAOTaYtFyRXlBAy
z`KS7c-kjFd*(b7iG>;vOTpp9{eY)FQAVA5#bjC-PEoX0o7GBgG?UK4GxQyvgQ?+*9
zuCJFu4jym*EBiI1a7x^AKi_EAST$EA{zczpI;;wp=uT9c$fq#TX<@+HPLo;8m$*7z
zGR@{B)(eFw_uH}Q&TWhN$l>L_uhF<pKGP%R`QaIbnoW!B&ng~T@_|R-=?9IZYJP|4
z?H9ATat!Y3>l`pIzM8wuTieHxrTcBK$HG|UG;y2Ur?Wk7=;)N~{uH3OM%6{h*4CCG
zz*%YSs);M=qI%1!&Su!9@LajhzwfPPLV?7i7v?8=W1HSaoXFxixnir}QoC1&j2%>b
zI68xD829A~_t^S3uaDt8(j#@Kf78=#aeI}yezI^bh*eJ8-DabtJ!Pq*T$jT%gHDqm
z*<LltYr%SJZ!A1hai*hreH81ovrc_>O_xqC-f;ZAvh>jzg*zv?w;T@T5h$Ld(C#L1
zd*g9qhc7Gps{V@g26=Cud-GzCLz=VPW=X@{DJK?RG`-4IdFJshH^CD^DuVx04zF01
zY{n9NevV<uZvE>Ae*HT5?M<_cRGUR955NBWcGWwpM7iEKh}N1<a6igmsIJNt#Zz-x
zOv3*yQ~c*2Jh#7AHSUk&cQdY@RCPRiiN!Se$}{)AY>1xHt~4QaiOVZN&&^EL5#`l0
zK4rMM{c{XUIdX7b1izgHhow!|p^wTNmKB{4SUA1-Mb5sHi+5+2=`l_YN}9xAe);{)
zb@n@Db#9AixeBTJUOf|Po^v{P@3I31O_|fWvaT;!?x(=Ns`#az>!$#gO=mooT<pE`
zrZO>e<A(dKvf4N9oG6jqR4-8Ty>7$9z#Z=U4*od1dPB_h&7J9=OPNdZUv-^YmF(mp
z@>Sx-EOqdjnOCO1xz}GWUDnj^UU(&UiPbYNxnj;A-s_$jMvHOT<<7hA-Q_TCp%5qc
zr?o34ZrC@`t?Y9mZ|9ui03*(-B-<MY6poxs)|g|-m|etFdneoO%57`=?c70$UqjvF
zynH;GX1{IUSIYiHcK3l_Y^zo&`{-_2WcTXrzAP81UJey6tt?H={%w=o#Wna{6;4la
zIkc;Ov5=?jqZQ%oE8RnaPQAQ-G-<)Y#0`JXJFJP|{Qf)t!-MwKai0~Rca(oByI;BP
zq5#9{J0dgEr8ayklmMmUt!y4kr6#%9J{7E~3kkogu}Zn?Qtr3=cYWmUZS~V-EfRaW
zYnE=M&VgB;f+viWCTeJ|3cD0_aJG7v)W`m7YdyQaE;(qH)AmNb#imdsINk4n*3*fJ
zyg84i7rgRnm>_<SPkZ-r@n7HOUfBMgNm^!=lmeTm^Iiq}+s_VYuxy$U%w!#0xnbu-
z_vv8o^tebQ`(`Vxo#0_x#4*d1?WC$j()ME)0<Rx*cRM(*R!$<8^Y@MI6&0`9bQC9E
z{h`w<xqJ2f!}$#}(q$sHvK@Q(7qrkpN<v}VBCAC!`m0v1HekKfwJYM*6G7Wq?i+L8
zmw*yLkB3UAU*(xLmsd--MD5&o=$A`c$@;}Pt(PXVRToKTBrrVs;^{Njj_tON;%UpE
zC~dQku}*R!fwB&c6}OAmrf~iG7Tb|q%Ow%ZnUyg6o%`aqJJ;@dY33bms@$5EoH1Rf
z(#KymNA8liOQlZ0Nq@l(rHLM^l({ClZ0yVrv0rH!r^8lNA{r5If8>P58Rh#2c9u(L
zDo%JB*dw{S-9~Fw@}{hY9}+#_Q45ACyaJL@n`85*Ok3ry)_cWGf^kc5Y02(SE9}<K
zjJ+V_5v}XY6j&F#c9MtfrWvc3hZ&a!O@4RDBp{#d#xB{M3YJH|^9?Fr%IK_au{$?m
zRd%aWY;mL)OU`w3yUrW){vP<ivEb#^DWQ84{ihy(RoeGEUi|2Ft(-uCRJr3ZA;%|q
zL~m7n5F9@7_$BoTUJHW%Tr}!5QJUnoa-G2?j?JC@PH~~f<CXcook&iYsq^FoSA=^1
zqkn&`Ux*4x%9*6S=@Z=Layd+8GRq0kex+^6=3jJg?|pk#FiZH~l`An*-IXuRcxx8p
zD)(9V!|e0?fh?DvNyiwT$njL+N;FYwOf&nKcy8aqS1rXK?(y6>XPdBCTd}XUy|X(#
z(b8<w9OaXe71JlQt1O7vrlk3jZHsTT;QhV)HAj^v?0Pp>+{9e@%k4gsqJ6nxI{eE0
zX{Efba&Oi6vmS30X*_fL>}rE*q0UuquUKbqxaDD7DzwEZ@gvKRSz906wCm{d{d2_m
z%cM0?3rn)@b1ckpRbpAzaqz8t+s5yiFJ{kU`I=^PmC?md*H!k1NZOQ$_tjmW%4f6Q
zdg>%M@zPG&RY_LO4qDb3drk=0?CuhnCeg`~KT9KXWm=X>#hne$S#uv9H2Lt4al_o|
zgP_*XcTu%<ai6YqJX<+&+ufYpU2)oM9m4BeP8uALzu)m>A9FO`b$dBfWoGC9gh
znM-V53Dzu5^zX8Xs?K|&yfK4G<<mzoi#@M$cOE>HXXGm4J<Da$iYW&B*KYWK|HmgT
z`IMs)%-$*uTm2QLophRG+xqC^`-JDmCaSiTSg#4VRl-{xrV<yt;I=(S?e#i|=xt5E
zem$69cGoNac+UT=tE`%qT*wjJFy}<`v#MGb6TY`k4rXUbuRpZra^%?%_sz!_-|28X
zG}BRKPBTAW<J(%Pq6(2mFSb`yd=>I}s1&`ebN*G{8*HCeNGLp0S$Lv!!`Ih>_dlC#
zxti@@xHxU2L|REie@I%gzmV9A#ZeQtB%R)OXR7<|&w*>Ma{CE(IH^ze5D}W;`#CYf
z-$SDEifx<`>%KbW9}fiX2)hSbl!|-{;ftKrYM3m2VsTP~g(jo=W%k<NdJ@)U&6^@U
zjztH_t}U6+e$}Sw<pBesnZ2(f&OI?y|9WsyZ1!9~xh@BhD5Z&E%nl*5f4D_Ayw>dB
zFn{6e2hN8(Y(LvPIl*x!+5N+VUlM1EJ}1sOzu!=qal@%qiw~G?P`{?QF<bwKu(ZR*
z8$4GH#pPG09Wjbu<*eDy7c8gMy7a<J6{FjFkJ8w>zqW<Az6|466=cwv$Z&fPZ%&5D
zyH4d9R;4^;hW$K-OHA+G;7aAw_GS#>6_`6O?uGx`wvEZ+7wkDsMy)R0V)P<&8{e*C
z-$>o?Pcyduib#60ki$h$sB_C>wb|bHJ&aR@mNCs(*WAC(Va;>P4Wj*r{(a-N*;6b$
zP5YI!@i`*{A87?4E+1ywUmB0t=QVG8R;VzuiD%|<p>1=^^jJSVkeOg$)aCSL^TxFB
zQ_0iT2y9cB^_F!7hs(lX&m)C%ro~jw*6i1)RZcwLIqTjk&*HCX3-arPY6>_Wowz>3
zs-CTEWy6|hSKWTdoOEW=@Ho_>aAT#(mb1}<_kWxGcwmt7;IFBXa!=H24&{YASM+IY
zKAA4<F<q(hj90CW<5Q47j5<ZSkM2@2ntkP3Oj@st;-UxHl{#$Ol2mVeW>*ZeV|)E#
z`;PkOYC4)1PFwawn#hNR9G{}#)7U5%*?ME%UxRD;OjVaAC?4K=D^erBY>xrcOy0^f
zYN<=7xU+<DXno1+71Z+4Iy22BijyHQ=TZ7(rOYqiv#xj?<x2g1?v>x#J2xk9&=HR8
zRXiFgn|$oos^lO6ua7KW)XNq2C7W+?muvlEYU@^bzBh^|f7+(3?XNn1-6&G(pLX@Q
zlSaR5-u7UYMF!eJnWhXTn>+KLN+izx5SX7Q*4?-^ius;BLwSknnfLqKK0naAbV)ku
z>Itr;Nv@NGg@O|%C~FJcFJ?IQY^{SozsjMtky55_&$?E&teg=R6!^cs`uR2Go26Gj
z{=fG;>T2w~k4Gj-stb6FaSC-bJpM7wBSy)2lDXU@|B2#9#2;|C{7_WV5VHBup!!F^
z!DHjbPBzsv8$-ThW?cs#9}!%jIP2q?FBQvPmz|5V37P+><f~=ryHdTc|2=d4%fsgf
zZ{J+E_1nGd+*6BN&m=4V4yf(t<>_D2<8msdC-hsQ|7ioW@WQ8&YnSS9?|3hBWx0H7
z-Gjv(_bO!F!|xwVGCW+F9a^(OcvIg@!8=^MY3ul32k-Ctm~O}YI(a|G#y3|j&C62m
zm@NtUx|*{xWbS{7O*Td8@{zx<T+!_3Gx@#1!6UKLXZhtI;n$vrPVC;K^>&BrriH;u
zj0<AKuj$*?Y|6aq>T|zFwtM#V!ucF(Q3ow8-|c$4i}4II|DozHg**0t%Y1RNcJ-Ra
zOVnfTl_^%PKEC~=<I*?6lfs`&ad)2S?q|GW;SrWL*Psxk#TPR~BfS0AxX;O)6rpIb
z<m+{>Q(=$hYfMdboWEcG)y3lr>K-aJU5`Jyc7EJZorp(wJP)1-)R<wZ_eI^l^<(h-
z_RklNE3V^C_Ibrpe@}6>lBD~!fQWf(l;5ULVb@BR_gWdV`e#;Ui)^N;g->J7zK%Dq
zcNlk0nJL|=cWIH)$!OzAQ|`Kaiu!-%dt&XQ#1)^PC*G@LKA!(y`q}pUb$#Uz87uaG
zys>K0pF2NGuT2bn5MlQD#u=SWZHGQ}HGj(ZIY+7aq)nURggcDwY&Tn7PO)_vRc}z2
z)qZQ2!rQy{|G7;Gck3B{eO$g!u7=~`Z2P8dxiU51Ja_QhGwpbnRhgGlKWPiklD;#G
z&#h_xq)~ob>+VWv>20q9EVAPja(>%MbcXEB_$;Qf?C;xW0-^Wz-dybe``C=Dc4q%L
z&Z*b4F7B_D>3e-(*M|cuck%7r7-Fuw`Ig$`CD&ALFPZ$MHm-09Z+xKKZtLZ*lI94n
z%u(Kb&)QjIVv>0DJ<&*4<4I?uCaj5zE;Njo#(MtqgSZ_Gf{p?#JFA+W#wv!Yz4oxO
zdwOw=O_$z_7gKE*9L~(+Pn>wqV@Ze2sxB4(-`}_79NLy!F^~Q9ri7lzR8{Hi-m{ZE
zf7j2_m=v}8`1|mAYn1zK%49#xtTZvm3H|(G>Xi}&t`-x$iAu$Cu6hrYPIqiB^9=Di
zSQ=X~Z@b*NuG7j^$F@~YTk<%+OU0Wf(>dDpsAFZw?%bQIuX;NdufA^UET0*q6#8^x
zXi7x8%OXYX<Av9p!;Z~3vPSv*-}94OQ+O*wb65IRZYeo;){m`}FTPX<l!O-xaW*a3
zb|(4ugs`mW?>sv-mOilHtGVBPP3o9LOJarEkL9|-ep%6zO8V9$9e8oywVUblo0pE2
zBKGMM-R7U%*?w#O>$!^kCW;d)j#kAU4)avHne%*~uEde^%eS_@|6LF(&LGgHc*ZT)
z{od60NJqPs%anDUPYOAQT?<S<us)ir+W7gxZ^|HtBu7kRz5kt2#eYrs-VNs8nQs1x
zy!E8oB=FsDJDKB=>9T6q`c&%HEaCcQ<D7Tfs86TSWM*lA+|kYbPW^_ze%nS)@oJpl
zW;}68*H^11&r2K4^y@rta9;SlC)hd6^R0*3?Tz`;*EBXMX?tq8R!0QN$xd?@)^t9e
z>e4vjnDN9#N84PVZnV5JWlHUl&3sF*%>3DRcGm>s-)o-F5I(tk&bAj^tG76WrJU&c
zl<{s3=l0hrC#JK#6JvEM)0$Wj7&q0wBXjS%Nt^htmp=S$mvh_BG-c|Qe(jetT$xU9
zd)9i}X!4q5k)O6t1K;kwdMx=(@;ArE33I(utmaOe<Z*BB)yIzZv)Vt}@JGGgYBA}V
zv6*qt+5h>%USZ27n!mVSDt&74)-#JYdpN5ta`1SrI!U$v?Bk5J*Pj>3urr)HvG~Ng
z>dJ3ACwCuro4TuSS<{_;rx$yfzqCt{zmy>5+&JN!mx{miWR=Pt>$)#yu$<l`babLg
z<~)<ad1dF8R8H8`xGUqDW96Gpo2o<iI9k@q{Z^f5^r@q5P0juGpjSOMX3TY$E`&_;
zc*V6QHF|Zh+}nA@>7QPB9=Nrfcj=1>eO@a2thD_@D<=ua9FOFWR4u*Wo1H8qtABo7
zN$NBCN85F+bKELTiq9=Ned_m&N{_>d>;E$)O8>m)&EoJ(?saC-#i+xdZF<}m8*h~@
zjr$Y2>RI=xhEF#hpIMwPU-~vmE^o5qoF<7qE;aYtJ95?vdroo>xh+_Ff%mY%R#wlb
zgNYOGMY%Eu%RX9Q^UX)j_LSW!ZWooXUFRMAPw^_JY_;{|`V^S!ZjvH;QrVy3#I5X<
z=+f$&w)ao7X`EVoH(_6K<rkfvB2lr-P`TMRT}<jHPAhfs6_|W$q6jC$?Xc8PP6gLn
zT>)}a+~;rR=V~c&k`tW0db64SvlriI%a(MrG?-{E{kv{0gHPp~g*H_I-yIt#oa<En
z_gUlcjI(L4AFf%k?eO>7h-s{XH)oZKOmx3>YL(h6=35+3rW~(3nS4D!VM%DEif_j8
z=Po^luRA}j;7qy}C!6{t&`sxdU#J;JpxoKH#^)b#h;arfEIIxuV!vUo^yHIIuC#pL
z*7?cm>BhHaq0?>z$;ISeE%~Ns?U$+{u&KB(<)m`lZAB#uJG)~wcKNY#wVXv$j^BJd
zrSiQ1SIZX<nZRd)zEj-addM7ilxT~7?V`bdYVq_>FA71)@!SdJRc|GF-8SU@4zZrJ
z==Ni=*h}7(JmGS?t>Rx_kMZ98L4+eSMlLeaecr7;lg<FSDaW@xzOvC|H~&rdXs~Zw
zd<8yVDLbONDN^03GC*z$JFC+-7a1?jih0}bWtqL#TJ52<`R1D`AX}5;zki+}mg~|u
zVeV<=RT}-*%Th$y@+NaUb*#K%IpyZ^H_lODFBw<nyt~bqvR(P4@<bcq*HhS^9$dOh
z#KY5=d;8>D3oRz?JHI>MU9ywmw)!>A{=XC5FF#NIb}_gyq<&Mz1hY+j8W;MHKMvk1
zo4Tn_r&GmuO|aZ=)&5Oe?U(R2&S7c1+I@`S>>B0zT=%(4KyqhntW^6OQ{_!l6{Z~D
zcT&0Er|aGA!09f}K38TPwR&opQ+Z`?ui?#32acnKvy`^VmU^q)x>pvg1aj4zW+p}1
zY3?Dn4W)Xo{Vsd5@@<Bb(#N^Hn*E8V7q9#rC?~?PQlo!Qq`OJWli&Y#ZByns9QLlh
z#$xrm&eBtht3PRM{wcoD!GryY;MKAp`}nVyUXMJk*S6+UiV{QC?1<$y8vPGb<iE;v
zGbt|DJCb|YclV07H@1R8axwp>6+e^gD=%pX1WmT->R|S`w_W+QMJzZhzdVzAyP{V^
zAWgRQ$;6P`7h4rqFA`d4^Gmy5?c8>Eaa&fWS?8@LJz2Ol&fM|MqY0sMof=Dv{MQ7_
z-JRn8rS7(PrAPbgHyQ$6%Hh`p<u@^1__;>eSLx#4^;&|bnm?WB_|-g}oz<!E$t3sN
zTV+8uUVLY1nG<To5umVShsC5L6IA;&ZiL<3p4=g{&_<*GuBu__I>x+bvz9ur>Gc_X
zx^ZXwVt(PLyC-lwz4*`4bGyVohi_}HCyN#`q-R_IvP%h_4)S30&EpqXm0If!Jqo8x
zbvFr=)2)<^x)vA)3igZ9l{{9=iQ+%zb}PR$%iJpKe8#HFtL4)In<_a4D?Ncc_Dj1=
zR-H(`{=4jpCs@PlH_7bR4b(4w50UX&8616<>9=9Z)NrLrYtCS~Deme|H!5pPnCG>4
zl}%g7?ZE9WHs=o~ZCsI>H22d2n<y?B5ssySf6HcXm(0p_pPR8ZdnPz$6w~EJJouLe
zs=h9eVSoH_hq<r7Zg2>_*kV)mBXI)L%ZV1B?$q7Sww^T&l)_7F78o!uG<kK>sv$Jj
zeak7X2QHPOolhW!g>CMab#5#P5Gy^A4AL>-MAF6^VAF0af78tLa)-qy8TGZCNhT>r
zJ3rm^0QoiQtX<TuZJHDMI2W(7S)|sn=IxESppaV}TDgUT^^#zuh^<++^)5wFq`h!(
zSmf|6QY!R4ceeGdi6@G7=74;c@a-T^%O;JQb6eJ?9e?~W;rrVcn_Og$Gd9fiTLTK1
z@J;+&OO$WA-c;+D_xvEJSUb9AakJ1un-deBZj@*0+*ZPBB^k5&&FzP~^ri%zv9i(-
zJk|V$`LN)LZ4NqjH<X4}F1cD3Aa{2X$5Y2|ioaLr3EaF`a^XN*>Z=U*LyD{Oyi`tc
zX!0ACRECJ1u&OZZxT@5*<@Sbci|vIr=2rie3Y1HEQ6RMUa8qLL@ee9al?J~hyuLX$
zmRNk=pp$)<W#U%Z!xO+E>m|pu;L&NX#w|MiTWYP{77J!uvnfrqinl)1d_`)umQ^QH
zrsiXh$?fld&(V;d;nJkA!c}RCdwvlAg3mYIZI3T@h!W}PSfd|sdXs@%|Mji1Q;rv?
zJhI@Mb~;tHGeE9d*1)RThAX9gru(1oAM1CotIk?|Q-MRHk5Q1<d)icvr;fY0ST`|O
z9x?7a>%V;QMUH*%i%)b3Ewu4izp4KeQ{o(NardZgg0Tw%t%c5iwg~j=43JBC&KQ>T
zKi5@ZVU(@M+38l(Ef%e5-YPrAW15Qw|D`1pSOVM@ta6*{#bkJ)K`2l_<HWqZVnGpd
z+j2K=YhGh~VawIXDeN~7e(386a?=uP3K2S?#IvyBK+}xBm9eLJBQ5(5*IIil0~ys}
zb58Cui(Atw!4;Dn(vBTqdNwbvH!V=^<?k{V=dd$Y9$#+dd$oL0u%5)uV!&<qdBa&J
zr?pcu9`rTpe3=*X<#ye+pS3lM=KL<3u;hDy%0#92yr-H&L?<zd1^n_0O%4@0A;HLV
z<!kGTnM*}>{Vo%kvbQ|7eNDt|$4`6u7k%$eja<!b?W{C~eRKb-ivkfG%ghQ6hv}Yc
zobgjA`1a1VCV9WSBwJ;5PH25r@varG4Z9|o-*n;@-z3$3C2-kc)u-7IDX~-2>+ZLb
ztrKEju9x5a#fm3|apHESWHAB9$ML#xUuH+T|M{J<=+B1_0*^mRL}j-mo>1L2#eLy{
zyhyQ<L$1D+6Lz(TaEW=n+|H8q^XTdS_vSe;9AIP;{F9(I<CKHmk44@ki)*dbCUeGI
zkL8an+}Hd`L->KYDk!<FcwNDipf;oH1i#G01+P|WZ8y5ecxYy8z3GATOb08MpAJ?u
zO_7})F>Py}`=MKWL9TMMg;rL{oJ!_@I?<8igiytOhkF}B`}R(sea?a5!1Z5N4b#(<
z8hKtcoi`4&o9Sa9)f=YI*S<!AKgj6DCZUzeAA;qoRr-69&+#xZ#%-Efy0>rI=c%lp
zo-Ebgz*L%__~Fo*6aV$9C2YD+IcM|U|86+b=Ya)Zk73Lr@2vk10@__P_=Pr#B=GGN
zbBO!!=E1$`>tk}`6&M80^;lkbwN~}Rqm2_Ue0kBNuf>0RTVX*#Lvu5;-;~M6o&<t)
zrpXJk9oXoxLu8LdQ`PO#XIoodMzMVQ^7QnM`+WH~4&3))wVx~fvL=0vN#T-{%66dE
zIYYssIakb5LbroTo#emT48@Ggn&xMmC|8@d^P~0jd+VQW*?KkijzM3<tzYUIf8NSY
ze7y9FzQB^&^;?fs*c^NKP%-Cr9Dk(Y>jgGtu7aSZh5H<a1F;7;HvQVYapL>);)%KC
zi_98htjh)Up4{_zqCc%kyS#^Ux7Ff#p*#}rLyz3zyW|G05{}P1soczPK(^Vy;L|Gh
zpT+(wtIz*Aef3sf{A`DPzy5sw;G@9wXF2!J$vYqHRldA2HPTY=&kmcK`^O(XRJ1wY
zY}n=;w&W{6-)$8qP@`^}%y$<#Bk{`fp+9!<?#bRV&Fa4D)@^c?U*zjM-0%PNe4zch
z+U&~a;ECDp$4Xq3PJTA>ES8&=x>o(vVz=@$RvLZl_Wbh%CEul+KMBn2jM?0nA0aiJ
zf#rP6&%(9k3%}h9W&d9+%b!$Zv+c=xlP9x}D_zf*ROWVEE&RF?)DV!7lbg5dZNfFj
z%97q4E*kx|XY4Nhj!D!!@HXJdjR(7gYCb!>e6AN!^yjJkzTn??GnV{(sI0%gXQPoj
z`=WihjOWksth>w8>(hIn#6Y8iW6L#(Uboj}Dr?><#7;fF&8Ou}qfL+j!!o8jCQ8N!
zO}Q(sU4M3){lLZN@jv_B&Sjc^O=#NL>a73Aq5Ie6YPRjul<F%kpHNP|c+Exo2G`t|
zm4|ArJ?0<Ts5x=PYvcBa?=Et)yXzQOnC^%k3=!b^FmLsTUgo9qtsCsDu6=%%lpf6f
z@66KE3=QS`zZ?nV__H+iO8r@u`jYT7!kYtn`L`bOV4N0oL6^1tN7&{&+nc}m%1t>@
z=dCGpJcQ*qlMd@~sZXnxe<+P!?JcZrlc)8$Bg3Zb|EKcx4w5pv4|EHkxGu9IpH(S5
zzg2CI9M`)RHe*4St>-76c)7l}+1*zl^0wonM=xH#miKHq9LapnE@i8L27`g{gFeBU
z1QvI;M1_j$fj_P_2E~7#@~2sKSz)Egti~fh@0?!$$UVNoW=8h!<9b)*vxV#OIsZ5+
ze5&5gDz2-%ZS}$!4GXV>K@F`}r>{y};hngZO`D7F;)JQMcQHf+`WejN=kr<_*Ii&B
z?-KBrdE=al6AT<1e@&3>NYFgsq@XB}(V9K&P`B9=>tyBo->2+KpQ2j2ep^<k!hx*L
z_wn^lwTnK!xOKN(=I6V$4*Sc(Kg<dHbd>eOnJXvMmra<+7PEHBHTA~$qbdRaS~<3U
zKInV<|IhQhyMy8x7?_@b?g&V{ZB}#t_}$&*ib_7eD>w3g`uL&t?!C|TpPQwBl_iL1
ziPgB*=casRyus1KD8{f%?8@(XhrAa(X*IlZTi84AjJR9f_pPg~=SFWU|M{fu>(A%c
z&!4ro|9a(X=W;z(v$*Do_Qg|womKs?bLNTl>`LMcKNxHrA{!!7Ws5#3o#`<w_D@+f
zC8+zT(;)*BOFg5j6BU+h`gVm)p?22^kI&~f8ZLYPIP%i*@Ap3@6_)VaX*>|Nk$vGO
z+hs4lFS>TJ{S2ee)<b6}Udq>+7|!#AH$3vXt@V!QR>q&BO}}n?wZ-9)P3=!3h8f}Y
zzdQM5A2{c(oG+<b|DNm4#N@f$F102CbNG2{l#e-g{*>x`wEFewC(81c7B)6}R!q6X
za7cmU(3+6{DG}bOxmv1|V(!empyYbAetP*Ht5Z{qSPuFBee%V?>-_VQS#^0`i?&W%
z^Lr`7jejpZV*dBd^nIH0BZi@|sVOLez47Y)&@bghUPf$g+Bys~ZywN%et1mi)izcg
zS%m;Co<@fK|4&^Fi&x+M*8J4u>FZqn^DwrA@B8rM&ZSp}R|fu<T{rLXr~E@6N?wl9
zVq1b%D<2C>oLZS2zE`9B-`@#LdQTlc?em{C$z$>RgDW<CezI?VY{BXxwzdENhcQ25
r{IJ#Ss_hw0#TEf4j*&^&KKjpaZKD-mx0wDd(7lwNu6{1-oD!M<B-xmb

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/loseit_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/loseit_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..b6b3d14df994748c37953fc67ebc6bc6d62e2607
GIT binary patch
literal 12002
zcmeAS@N?(olHy`uVBq!ia0y~yU|7Pyz%ZYKje&uooTEsefq{Xuz$3Dlfq`2Xgc%uT
z&5>YWU@OcFi71Ki^|4CM&(%vz$xlkvtH>>200A5Oih{)C?9>v4q}24xJX@vryZ0+8
zWTx0Eg`4^s_!c;)W@LI)6{QAO`Gq7`WhYyvDB0U7*i=|m<QC+nmLw`v<mTiRTUFR9
zfz7qbE4Bg&>nkaMm6T-LDmj8IREY2mP;kyKN>wn`Gt*5rG%-*xx70H<GcYkV)KM@p
zFf`XUFx59O(={-(GO(~RGE{&9B|8P1qLehNAQv~NT}3Hrwn`Z#B?VUc`sL;2dgaD?
z`9<mahL)C=`UXb&Mn<|tDQUXJm3bwJ6}oxF$}kgLQj3#|G7CyF^YauyCMG83mzLNn
zDM5`*NrszRP+F7&HZvtzKQ$*cH#M)MSl>|35acU}cXJEiit~!0L7JSOt6z~=pl_&W
z09RO$kzbNuoRN=1Ib0e=Q$=opuP=V{b93QV6qmS`mEc!h98y`3svneEoL^d$oa$PZ
znpdK%q>z$qm6l(Wn^<D2l$o2Do~mDvmktVy0xRdD)WnkfqLBRj99Z-Pz&xVh><o%7
zQw0s*#N^C8knc2+bfvl^mZaJ$85tN_=o(n+8XAWf8dw=xTA3K?8kk!d7(kTz=BH$)
zRbo?WWMpM*j-oWRBrzqiB$1$EcvxCF=jY@X`R1pj+A0|<DIml>ot*=U^3yVNQf-xr
zgWR1IJYAj53@y#fbdB^3!4C8Fwerj>E=kNwPW5!LRjSA>(96tBu}U^JNU}(@G}TQs
zGBeXPNi|Q^O*Bn2)iq2uGBip~G%!m_vV`e($xki?B^Z!?NR~*+PtHuSLO8=LHQC(Q
z#5hIQ!Z0;WH#x;5QP;xEBt<tZ*~B!}Fx4d0)W{TOntxGddS+f?4zj5rSEXd8SQ#5y
znp-9%r|KpprKRecm?xU)TBN0<=q6hvr=?k1q$MR8CWF#`O0rc+W=T#eG$T3Ylt5WI
znV_U^m6Mp4ZmU$Go06KASejF!WS3N0oSBzeT%4R(l%i{4nwwi;YMPs?n_;65$~HNf
zb_x&{#3X$r^T8?C$|JM5B)_N<YD;ixA(WMqsQ^x_R*A`=OlYfAoLH8cq68Jp#1imC
zc@e6=JR>y^J>j9GCWzg|`DrEPiAAZ7>8W`oSW`Dj!iH!{&PdElPff8^(pS<a+cU(a
zB8Y`_^b9ysA(5ny9)Dmtq;eA$^Fc0dASSqo2Nm~r3W$0nH7~_hsYuD*PQ%UrECYjv
zxTlL_NX4zUb1S!`hQ~g)ueNx8E`OC}=*&(YM@~;37ADUZ4hmh&+m^0f)S&DgX`&Y;
z9WCy9**k<sqqm9o#iqd78{ZnR8!{eN2vY3yRGk{KOe$#otG(f0k6F*%{Qcio`S1H4
z?km3^@^w`l@AKKEMj>%`t;*-W|M{=&@9q8bg#;O<1g#M-dn@$!TzL{3JG*lB_Lm<&
zK90&>`s>%PR-Wv(jT-|+f4TfAm@sc%Z(WJV_dU-aP6<l1eV;w6nIX%TV`qB*5nF4u
zQ|rX4nkIYMZL>9f&epTY{6V*uWOmO*$yPtcJ^YIbg`5s+8DHagIpb^DnIlzNpQ;w^
zT5D9<8g#aJ-h}FJZ#uhY>&bDPO__Z5wzt4KlPLT1?EiH*A2Ny5cAQd9ay(O|^=&7Q
zU;vjq%la)@D-?uJt#C|!`&j8yhsC!gJJgjXOBYUVO`aaNO6s|YVtZTLDc_>oApz&Z
z+NQHkoX$G&xz^I(J%$||iY!hXvgXqsUUAmE5Vd%#qoymzya#`$7=8;>eQ53b{@uq9
z6VARqJpJ4E{<&9M?iibd=<Phz+V`Z*Y0CbymEVi9&oVR=_X#*1>oA_QgezOuD|=6I
zFxS`oM^CxW&Di~_)4t+q>)zw%wVdAFR4zQfth4*lg1nx!zappqn(kgGvOIIv#1A?u
z=`zQcWNO`dQ62gq_~0u2r<Xomj^4iNm8AcP<NZE)VJ7RkbN=~0dTM%YR?Q@p?iop}
ztNxz9mfN`Hf90~Dv9jA$z3)H!a(&jn-;%Gn*4O-vUM2Z(ukhNh+im_OKicv8Qf@8#
zbtR|xVyCW-+N_jMMcSXZn|_IApSkwVCFHHTipIx|a*dA*SPm}gaSXW<H|MM3GLP9s
zOpXd24J=FoHLuf~TF%|hu3b9KpKXDOm!r1-f(u>?w>bH(-#lyS!Ce-84P1Pje6AUV
z0nske0cmB5&n5<Ix{4fUdB19b-P`6rp^fv{{lj#xPnl5tjc01H^q0~~+5NT$vROWy
zD^*>@#Wyu9@a*FozZ7=awRouo2u)bgqn~qhs{Rr2?M){eTQ0jf2!42bK&wN^*Y;p_
z(~obt9QUd-CZA*NxA6aa!MKB8WP85zb8d-T2W};<wWeIXmlC!di|RJiP0Ct(@$L7|
zTT(@)xGgR9`OC3vmwd3S(@&{IJd<84i<mv;issnKsG`-h>2Bvr(?XGlsx~h!KYJVW
zJ$~n#O}g&a=T2zn^Ud+-{U4Zo=Dl?Cys0L$*Ydyrq0`ExxvH`!<BwsH@^_*BJN@%)
z8vN7)6mw5^s~)l4aqoG+?k8JKw%xrGH6_^NkU*5+zGBAbHg+7DlZq-nNSftKDaapR
za@Oja#;2VfRcpQM#3!FW9<qE{MxXzN@B8<+8`vG(T>m}yNO+82{!XV^RjanLwFXH(
zZ*_Zh;;5@zU;m<>#0B*XOGPIdN-Q>v+S;+|il)kPC)48^HpS{knd1-j*f*91E682w
z=;PQK=kzpamh^hHn-#p7=bw74;}6_6dCJ>#8CO9iBRQuZ+)_1PCVBswXBFvmOZab<
zKQC{tZ*%oxxBu&!?zt^C%x-gW$aJw;R@n372eSy@Jx`lIwk5SYz8c?ZxU_b4D#yjM
zkB_+TbFBGOcdV|U>sG3LbCmP^O-t)^RZH|HEZeuVMDana@M@;a(n;U{U$n0LeY>uE
z`ko(FYU;sjF6KzQNY9B8K5l3`l}mZ?v|TD+WF(L9^KUl1^u+wjrj`5WyX`&qM1<$d
zSEJC?Mury`<RAESk^ir$v{~NM=W+kLLtgu4`L=Ep^vYgy(dAk~uiNWs4l7Jmlpkr>
zf8?Jc|5M`!TgL5a>6gnE2Km%-1k0*e{gSOo<UToVdeHrkT9^K5f81$zWlR30x?`S`
z5_(>`{;&-V5Py5y{^z{?|7YL+{_fV+*Db!cIp4Ouu{Ee^^go-Qom$Sh%G~p^<n#Td
znI6kK+}|yJe$ak-`FHD+@;{VzRm%9>*)?V1<;hw4eJ7o*T+iS8-?~=LWy{>pJJrir
z1?Py*(hJC4;&W9ft5?l4$aRuuRdJv1(gO~5AMTvqZdg)bU;Xbdqgmda1IHTeigwEE
zjOj~S=#itmW~)fCVWCKHpHkK088ImuB1K0g9*uI|8L}wg%*6AqITA0_(|O~6J`&e>
z&NMxD^@fE48ndL9o@iWsImcv$=`5eC!cv{bcQ%MyzI?CeU?66_tNqgAdhSWvuE(E`
ztT#>nn|wiVnW4|amFF+){=Gl&-fzW67i~N8mAs6vcT~UU6uNNhkJFUz+xI^VzyFI#
z@Z$2`3I4pkxxH)t=@-iUE#t8JcJJ)xcL%TV1=oBE=BauAUDM`o{l%9lr`6|_&6vJ-
zHHUHOjEW!CC%o#<D=oXpu2|R@WxO=Z-(#|x-ONSi&u<1@<Pk|c{wz-K#-`31yR^g0
zdhgU+_Pta3{rbm)6VE@_d{Qj2SVNTi3Ag2zrlOrL8z(GYBBHwV#LbR-^W=KMo)jBC
zEn4~3TuRF}_1oL(p5){GpI1f4Im$$S+L(0wS)Jakm}UC!Uz}Ykl6zn|@2g(5ugjD@
zqt+ccnJ*#n$$fvP-|?`2JX07aA2xB<wEgz|%iDj<UhMBOB6si^ZP)Riq~c;$Wje9y
zf8)W4;_q&iCM}7%nqT$h;$dm$TNTkFNAo|Po87l((IthxA3vDC*!_sBlK)j)^7X>2
zjvE^ln%453Gkm$)*;~~1>$1i7JSx9TbeFLFWHP(jgn8$bwSOzO<;pDxIk8xvXlF;#
zK2eXmzgC48q-4x^^}`}iCQx&_gF=_`Ba1xSq~sH>hN;P#3={KIr!X$E$ooGhKufHX
zJ5{dcgQDK<CrR`E&XIR;QrI-DXk~|K+Z@w3H!dCT>t(H4tfu<9;?>Do8A+){^D1U#
z9;mild8BzQUy;+s+Gx|aKkTaB{THtKUel0~qyON!t@Of+84126$rmoZ-Ty~=arnF-
zr+1Q4TDOb-O;?uwpZ-3EU6QreYng`3FRj||`?~)$uHRyRcixq1-Az^eK`sS$7jmjr
zeVt<Bsd4PX&+~us?S7ZbCwte=<YoSA@w`LHduNOaOaHZ5Uithj2fkk4-?9DAv)O@B
zrOW20n8~VgL}`1u1<yG5r$gt5<x-KBDct(|I__7Uo~<e3-EiS@@Ig1R_p(oe=GEA>
zhdKFcF3S<N`?36e+I{EmLYb~>=LKXhTXIggX^mCrZLUXtmz1vxYAq3&_eZDpe!ck5
zgZYiC^%n$ezNoP2Vz1n1_x`u`RW~&ipS-)fdvQ(h-x*t<$=bEtsJ7qy%u!@l<(x~C
z=P!7_&;CfC?X{qzTMJH3o#pAd?g^J;f!>7-D=E*eA5)5^6ntF9w0-X@H@ojU^*^lt
z|0}lY*RQk_!CGP`#kmxkE=sxF*i>n}|J&>RUQ2@_&E%z+Sw+%TemHHNSNT-C;_~b|
z&ONhFK38;-ai5|sy5-uEOsDvG{rUgS*l&7&aqpfVubyvM>MrOhJAIk1@w}=Ujur*&
zxal|lyg#|T*TC@N3Fa@g|6`}j+s8cBPspYGPvu!R=CxmQkKMArVQ#S_^DO7h&5tLo
z_7&M1t$Ee@wJ}%IA(i7VD!(lL^5&<4*31mcquIOMH*M1PP$-=jHR+O%kk`fy$p->V
z<Xj(=uCJCg-}9#G-;;0li!ZLYeET-DlE%^~mpS^H>vrl$icVlI()KK|<}d!d`R#eL
zV`uhov<A)m`<ws6!To;>@@r4$e`pfl$FN%FFYEbbU2dLUTQ4q|Xwk%>lqj0LrOm)_
zmz~S`=Iy~%kK_&V7Og#S=~?T87%vgm_2zcJw%%>$7r)*!!QZa3r2Ffu{n6R_-jkLb
z_K`OCOnrY`<L&PX?asBluNegueVKcYD8^~L+9JB)%e0p-HG@}QJ@NGOp2y|)&Yd`6
z$R}^pa`x#pmFMYGl$<t-9i6??sZ+o1rQf!^yJw{De{pk~bRoy=hJnF>Y5$mJpW=Ew
zTjktV56OuAT$;Io`y9_7mS6Vei28+v7axkX2z<CFE$f;uUvN=ZY15*kahFcs-ku+S
z&i&ijof=i4i!aW(`zeG=S97Vlo#)Ta`i$8v!6E+I-oM=xmgrsZkv&+w?c32OB|fuE
zF0OJ}8k3Y%G-<+>oxfW3A1K#<bu4?cgJDnnYM%SIUiSO%{B|pQ$FEtdU&>1;tn>}k
z5YZ6*tg`a(gbk$%Nt-x@zfQAwqTGG$l2^_AcYi%n9b{~)?VV2W2JHP(+MzLpV?~&k
zhS@g%q~Gh><?MH`S3FuGVsu2h&Z6s0zTa{=7Y7lIr6+monolj3&(>UgX?X;r_~+i1
zD_cKxOS$?_xey-m<@?u!H4E>pSlqjC`|{;eGp@~&-}S0pL!|rlw(xx=SqDCElFol{
zY<3)*o?@4h)>9)5*Tr$_eM)A$Up6je+35A*?VVR8H9LP;%}om0C1uP1pY`L1y=!?V
zT7JKjV`g|OE>vUM{i+WS4j$qZF1o+3*7jD*{j0}va(?z+-q&%^M{dFQhx}V>+)7`a
z*>WcEVBYla4{9Eqco0~VJA-#`<*Tn&TrXa~&6J$5ZT^Z7>!h-GXH*{dtbdtSb2f9j
zlgovu%eQZP7syRldvhZq<<pHsH@Citzb<GoMW)R4votJiStq;n`$PW3SI2KLnf`vh
zJhRL27jJOppH0HyCn_FXz2>&vOuDx+aQ)&xNA4#c|F*7>VYAh2Sr(z^7jAvHoBvN(
z{^teu6{fExvR9f*Nn~qIJw1mzz+q?5u?**}aZ5wi8Q9q!bN~P8uT9DS?*dDsTjkXs
zeP5HfV6yZ-y`z8U&wYBT$Nr_^t&p6Yd;8ws*f=j#_|v}S^IioX+aH=OT3RRRUGv7y
zi`{N}9wSfjwl#CU9|_y>?PvIhv-$P2?d#6Z4_bTO(Cl4D%6ZN$i(e(JJn~_qVA<Z+
z^&6f)Y~T6mQ|<$yAoYs<`|gKd2$}as<>C=x?$49YvK_lMJM`-Qw5u&Tvo5W@_9SkK
z$?VOCR*U>ln|j>7F{^KWQ1ITEN5_9(ySIfyWX0!K*Y6y9`@U8`{{J!CnpeW})#A10
zM5oLtyw3BrYI6Dh-?lcJ%Jt>{9QeKC;jiosL5F`H@TvQ8MXh#P!sLS?{J~YfF2A)8
zFTK+oc8l#wKHvMh(UFFV`)fZWI4-V_luUiLOSMT=?r-@g=l@JrNq2AD(3|x&?#0RM
zq{l1!`ahovzk8_q{}29~`;x7H6n|OKa*2nR&&b5&$o&5|fA4tbYJcX%3yaxRpGA}u
zdDl)a{VTz_NcU)S{82xfoA><Y?QJ*7Oj`WnsPf0Q*<n5>>g3~>uC2?w<YTo*LbdVH
zzw?VHtqhh6)qncmNBojjORue=G~2J}XS)NRUfsU8YyQuh^XKWlwY&a6c6xlFW&P)k
z{KbF0<6M%nCV4G&iO+BK@)O)M-!7r>SM3SQ!0jvL7gqdO&5?F+^X6kNr%vd<y|>po
zYT5*)MQg6vi@FAGOcr<0_W3Gsbn*8&U;cf074x$-NRq=tV2xnWw$RGE*RS8!%9$a&
zaNcTzsLh$5)e8E8R=R8qQ1F$tGP!VTdw=+xw!5>Is<>o!wg2OsEHm43lTodz=j0PJ
zW}m*Tw=XBs$*%5Cg=)v24)4q7s;p8qd1|GD?Tc>J96zd4_L`$ZDV?d;EmzI9D9%)D
z#`8%gQl+1E&hXMxs^B<L75T<&wa&*UkB=Oea|lVdUCqxZ?aF-W_w8xnCkpGQUVK*c
z{Lh^H#Rt@%^xxTRS9oa8)Vcdwy$sznTZ`ZM*xf3R3eXaJxpGQ}Pr}75Z(sU_{A@Y&
zVE>jYRsW1d7!9{~cEnDaaPP#~LcKn#Z|+moR`Ck;+iiKba^B3@VGE924qhqXdeKrc
za6Ma~%=U9Es=i+&Kjn38deV2d+wYioi&o~WrXc-2?^iD0rtjq;crofq_jCDY{I>*p
zZJToW=7$yA>h|B=aqU)#V9SQS=bq=+$f!mIt`pfU6qI6nW}D(FIhA=8a}4^;9hoLx
zWp?e-kC-yS-_A0MbH(#zdb#hTPxst;)^~ru-Ne;ClZ(Cuty=m!IXU^Q?VX;98m*rj
zA1)MZ+Onx$_Pju(_~x}Sy%}E?erw!s`CWu1d9Bdmb&*ph@LKP5UcJHf&>rI*`#f#F
zacq#g`!Qtu`p&Rf{xz>Q_w@G8{r>N<iQ@i0OV@vxxc}envfSr0FP#jSD1W;-Y2gf3
zeTl$uJueU0Q`TEH{_LBacWS}p!bPtXQ*8enFI(L3bKlNOZSoc0icfA`z3zOuQP%<U
z{XccRo^tJsxuZ96?)J3T8~hgWlzqzd$mTA~-NVxK=+f@@`*tgD*tq$=_T9u>#_8vt
zeRR<-sb!hlGxu3a#qW7{TvqGV#@!ZDEV!(lxA0Q3u~XLqkw5pW>)NecrOxRZoLAy|
zs^-7Hda=ne!xqnDg{``>dGjm$ITC}PF6#C=@Pnz9$C2&I9u>8WoW~lsuKWL<E@<-c
zh_FUDv+4FJUKbp;KJ=KY&g{YQ<!k;C*2`gsET;#}NSTw7_1{xg(=PC<*-MpGl{_=k
zUpu{a)e3!duPA)-+9`UvBGRq7a@nEb>yLJRy&7{%sNdGh<fn;%Zb#`H@nx?ftM|WT
zow;iHjtvEm1E#3T)wX05)$cy^yTs1D^^<Q-apJ4?uma)ik3ZM7xTq{To&IRm8vkya
zUeCT@t3UoMSC96G7+mDBkUv*#re<ewk;C}OoZZVCEl!=Uk#s7qH4;*Kklpd>i=uNh
zYwRt_IcH|&h?>NHbGT|Hrp3|r>7c%_zunzeM@(nG%Aa~%z47BL`}KEE2()aRrS7@Z
z_t;K#LEW7fZAIAM$F<9RlCieFd^GRp+QpHrJfGwBukCMKvF*mI%U^81dVcVHUABDt
zybMvno~t%rzx?^4>e`Z}Y!EpA-d?`qd9&SiYT5e#iCw*XlF{qh&7XH%{TtJ|jJy1L
zsdUM~*QqOHb~Hb_VEeW_{;Xy8<xi^Z>N1_y_kFfs@MxX5>iBOl#pSc3wwzfUVE08-
ztGRscF|M_)v3Ep^z1P3Ed~N0gt+@#gCh|{|u9wL)x0+=+NzZAqq=nhbg1B?iM%RQB
zGi;X5WU<;FA=;#5UnKvxzm8?*Hl6eYg=3pq=BTeO-v4A*oRz!DM+s@UpYvU=I=@X>
z;&kZCyln<?-+O$PEqPusL0Rd_tj32Qr&I|wd^5Z;Q^Redbp0G&{(6RB`#bC2#h-s}
ze%$K(bhpS!6O5J%9=j9sUy?~S*rJMGTJ9&e&s&2<%c|}DrW>;!%bGWl)7{wCoO!N-
z)7F=g^9rsAnLE49<V&3*;A?mAnC*LY^Ir@ve(>ygeNIgK^Z%xE3lqycB}LEaM7`>`
z^w4_a*TXXP*Q8hE?qj^zrogDmwUp_x>5Aoz_mUO_860sd+$NsT!SPkaAnv=*)!S#6
zrEKf%(r7N@J>|a7?fzqV^?5aqcodV5f9QJmzxdwbjhkOKN3O0_+cjyk{v1{FUjoe4
z_C2|MMqA&C)m*uJOU`*Ar%CY9kDiL2tzHKMI-+a$oOe>Y^kuC_)uwluDvb)cMt-YX
zH{E!~G1WPK*{3h*9og)+Z4aKGaNM-rMmBZZ+uQT)LoVv6%spfLC|yc9`sc|zN9=yd
z{nl7!cH&8^y;IaK4#9($gS^#eeXLr2Gu?8%<};q39*fVjC3Q_;S$K~pWOifL<;hiz
z0vyIJIjrR$kHl5+)y(_!(0b$9$GS%IJl~y?(Oxa~_}U`1-M9JIdY3-u+P8d@%h$zU
zY`=OgdgZ8A{)gjhufB^9r=b6V#Z8OmX$i@%P<1j~uKCQwa<SpFGs<ZKieGg<$K<nR
zb*dLGloj!j_YW|h+5YH5`Af?mu3X3e@xGmG>?IkVmYkaDbnblf{FjA}r^{Cb+Wj$I
z^ontn@c+h@Yt1K3(^E_cIJh_@`(4j>C*{x(={Bb|8b{hJf?T53uFmS>(A#pS+fB<@
zcaHFNqqAptGJ`tIG(1oA+IwYwcs-^6kMYMVjbV9VGmU3;UoyK~T$)$eI!C{)$ZW!<
zsTwEx->R;XRWtu)5FDMqXy3!AONY`N7VcGFaWj+WYlxt^P37v7N84^_DXrcRmH1_)
zY>J8PtRt;q&$lhPyLau(s!3(mQ!Z@tT03Q$(-b$sj#ccMeD%{WO<W%=QOhv*TGZw3
z=lZT_a<|Rb6}>V?cGiv~->-^I_6xN8#QUhT*2Ju4{;OxxFTD8g7*nUr*vlsu*u>&`
z*V*{%i)HFYx=zQ!gr$2;xv~#zxf*;~?xj(>+MHSMj=zYWTR7Ec`|F%)@3()yT`)Up
z$8pc`R-X0TA7SyQuW+ApR@hShZ`t=HqKkCAWD4I(emX9{z~WCX*YSP+A@X^>&n==)
z?8sP`raAepSH@ZHlx3N<WiBf&XGvb4r?^U9(fk)f@4wY6@^`ek-T&;t9nGoMuC$;@
zKxD;(Lcfb1j;pW5IIR}Fwk^swn)jGSPmqT1>^W<H7WwI%yn0<hJiaaD*9Ttqb*AEh
zOg}P}etp}p?1Itxd)ucPoiV#Gf9A#;a%xL!=I;2#^Yf#vi2FNL`(N)}wjS0^z4+{n
zP`7039Al@viOe;dGv{x<&>nMYJIe{*ck1ip78SgSo3;1+!i)cFr1qXy)Qs)v|0R%{
zZl*X@e2<VwmxuzRZF)jl_KlshX8Ln9^mE#OuR3D+rR}Z3G>6+N>=VEFU3w$2d#B*C
zGqU?unKU`Ii(HmbTK7)f{2#~9i|r@+-ybz=i{cNMx%vA{ooCK!r<c{9V`a9>J?0y2
z$S-((-ok<$bI+Galjfyge6zD@NeyS_L#CQL?K5PQcKKdtSX3&Id2q+WN>8)Wlc{e{
z=E_N}JH6|y*1?{jEx~49OjcJeo(){|E3s&@|HRoh-1Ca$1HUS$&7D(u@Z8Sc<)W;n
z+=t{NP2x+;w3hiE+j#c3X3yJ$$7@xVzFl&Z+tXWo;*_!p&$d5!%vf{B`NGe#WER`}
z+>~=2nT-?9U2U-Jw$)vFdB2!~#g_HSmpOAjOKhv#@^w?lci~Wep{s0$FC$j7h0Zj#
zE{c0=%rTe!tc&jS_Bom%FAn)WoKxYPT6JSN$Fj;Er{o!bKC%l&A9>1Fu=DJWKWdi_
zsnz5O+r2w}bT3P^Th7FUPaS<xKkZTlt{V1j+WO#8z@rO_u0PpMT{U<qq9%FN#C_vQ
zj=)-u+Y-mj+6~VJPFcVGdE4vFIuVg_#>`7Y)VE*h@n1PD@I<Bjwj&FEZoZRr#&1#R
zI=3l)g4_9>I=cj;kGQKgt$ncANKs2TNAO~0)5)!##_<PjZY^JN!DT1gvG9wTqHonM
z=5bB-TNop&>Y45K<IEQ>)7e+-u3xL1dOzJSKv+y8#Uj6tZA+Pb+EhJ9Ro{}LJ)%Bm
zo>WDxx)Bs<d-rncxg3Ga{l6B5ZqvW4di3r7^D_!;CktB0E4=M`#maS)PbvJR^Su5U
zyi1?g|DP85t>Jatb@f%vPo6J$G1ov-{KteuRY%{p&<c0Su&Li79od!}ub8PmVbb1v
zlGn5j8^8Ct9+RJ5*cvlI^8Ae%iMq<O^X^0~N~)>57Wk}cqNKU;oDDX5J?>YO1iR)X
zZxp|~Do|3=c#eu@>dG6H9~Zct+xz!0&uhI2MrRM?eNFG4KEE~3vgaKC*$EF@iUf<2
z+$DCo-jozHUNiINJE2r_aobW2?od<iO~z|d6E+v<S6j+&ioT_uoZ4pkN!i+G+nO&|
z9$NLEI$f~NsA!|JskcxOr(W8UGX;zGDXVWPy)kcs`b>i~3s<*l)y|K4)i*yo7tefo
za)#pJtcA})E}nlnE$HoH*|SF%y~&%Tp-_B!rBRB-!G^TPlW~#tCtU4xo!f#BUU|AW
zXA{$1p5<yACT`f^5I#*Y<@*A))!$BTF!Q>hyCU$k_LEajO`Y39ttZXxiMi9xQ`~p(
z+}t}8CK?`&`e3-B;MSQ-ne$aw)f%NlWOv4FyQRLN;PtPbS>cl(u4$Qdcy-=YyY5+U
zS_R9F9pcHBx?Hkw)|vOFS9A4uFAjAOOHGepb}QSw`7Y<ua9P>Z6C1>AV$BTY#J(#U
zr&vUO(tYKuclA<bov)kQF`n%7ovDfJu?cm@gRFl`J53i<n6%O8@U5Sdir2nZTFy89
ztD>=8T7<Klo^;*qb%I}Ol;;T=ALew@6*{<P&&`nIj&nq7oZ5I!d#0Gn7gZTadb60u
zel=B2Y70)>Ev~sPqU?1+Sh;bE1*e_fwZ8Q`r#PLPe&kH~nGJ3!F#$1(U3t1CUoU-&
z+_Y?|<h27)r@QKM8=cDT=1ys~tv5=ED4e3(y?esl4O^VN!XhRn@;2o}Ug^_cI3ckr
zpkn%3=c%ryl711&xBpD8e7@=GrL`eW=CN;oobQ<;=@+Cbo+GQ5a`n`y^F}EanQrSI
zT#wl1VD8+uHR;J8V^iJE96hU(+9j6Lb&Ge+NV7RxAQTfLkb2rv=F_PO50`A+c-rsm
zrn%Zi;YNp7c5Vzmr+NFM@?EYHW399Z@0D}5-rR6Uu#MHx_t;UM<ytEQxBg-Ho)_Id
zi8Hlr@{>nNYiko+?NY?9rdY5t?TkpP33T!^pZ)4V^08w)*(n>uU2dNKF(L8jHGAi_
zVDZ$~IVX+g6lL$_Dw*--O-xx^@L{i)kDR==C*S=1{7Q*+=?p`o!%{ycaQn$_Yo4`a
zXUvqnT+Q<p4@=#Cb8c_WrpyhJeoMX1ah8RJ)Lnb~!N=_sPxhf>=l1k#w4T@$EForD
zI^(0!Vb70+Pepf?IT~Ck-4k={1dn>=>fHs|FXbLzvW+@F;bF-!v*M;%8x&0D-%m1f
zZoBGyR?su@+7$i0!NNvHX%X6;Z{&2$r}uBNJ~3DE@U5G#ul7tko%E6?ohfH|>%97q
z!h3fP^GNzxTzGeEt!ldLqbVyYV~bhW7q3k^b*3iWLeWgi#g+S#otGwulHR9<yHE0k
z>@K{0d)}FuX9^zGTwzP^UQsE!oy%eMvUOgU=1!RPIPqRZ5{JoSyQV&)|3-&bEmRTk
znW*wE+Tuj0ri;_!cavAHYx$B{b;R2Id?k;{PF{=Xs4bTr+IYMx`!nx!Z?0YOz)Ip~
zo`^xw&0`gtv=vp2e%BV9ykoMAtI_0@`pNvnkIsCOMmmcoRAjYsdoDiIq&UY*Qh+a}
z`t8=t>kF1$(2?=VsdbU8ZtR$!SYg1r>{NJwTs_N0v*@Qh+U^3&w89w#jr3X9ZFTCY
z%J`Q*Gw{UY<MJ}Xi{-ao)8V*zyn4QSfLinep4CRz<HTpYsnt{BUc36UT<^bsi(2mN
zEMBhO@?LAHb#85e&sUpvm3tiz#4VQJoEm#~y~ExL#;cE(zNq}SIIL^Jvd5j?@1v(o
zVDP&VtC#!ho6Ux4Yo|L*x_!ju{pX7Z)Aj4xL><|WNBlSavBKwhCCfpHZtjN%pM4N!
zpQe(sIdK1V_R1jX%Yh3YPJMP`+3RS|b3Bq^3G3$S7S}xPjykLUY~}JxTx*|pTbIA{
znWpl@|9;iq42O4HmKj$B9yGl8yJnfz^hHNfU(Zc^`65bY^4@Tx(48_z^f}TdsmOl5
zSGRBVrm~s|K}o`YJ)d9NHP5)xQE|tL_J_XfPkeeTIW5#(@zINE(HvUW*R{==d~Ktp
z%c0obiO0X`dV4gS40;<keOJDu^x1it0*st8w@YldUQ*R~Z?xWa_pZ28%X_}Zo~*bZ
zs+)c5pkixrrc}%?^SftC!=2vMob-O>^rL`7ajNCjeI4qZ=0-dInq8RXqh(wYd_3Wk
z%*}6$cdXzLvic!)R80PkEpPC@UyBX~T<`DIjxW{wJKf?PPm)MqvUJ?3jQ89MOJ*vh
zgg^gR;vsqSOVBa4bxvnL%TB$-B0o#$3wznUnx|J*ar}I2Zj&y=`Tq04>mtRUCOH-a
z&P><))xV_hf0Mn*;YyhkEbfbU%su$l-tmHA%Q?TLg)$HK{I7BD{rh>+%SEpL{ri9I
z@;|J;=*GEfZYE*d^G`qY&7U*Vbkf6`{#DEekI3vQP``TVsDPDuz!t+LE2Tx&|4Vs(
z*4%%tfMR_0+pVVglW$+X@^*7s-p|O@A&*~73pd*IQ*Da>X7!?Zzk3yU^8N?9ne8iS
zbkJnYkae2%?`FdM{q;Njqd#3qEP8Ta@u}VA>@~5Pwx=sOf2=rhu)Ex7*F&o*{zmMt
z_UvJKDtmZ#e$s~~dFLM1mv^Ra4D>1ZS1j@M`hL6ho6@g>Q>c9IFYAyG@|&)0Idv{Q
zuE#-ne=GC!e?rO3*B3c0oAP}6%)F`UeK|WPGK>FLzqF$K;6ZKaqb%>`m&|-q_4e%b
zzBNyZr@vkBn|u4-ibIjL)vx}@UOOh6sp_;K>dv{~>3bU|p4E5n%oqQ(>Wx*qzG{=k
z*^AtHssE4K`x|c;oOt0NZ?dw=569^{%CD$@v){tC$ouv+{o1@|+~sy3YKlIEADb~<
z<?}2buCAKm7G3vuDYqZ4Pl(Z7J=Z3E@yjVH@6ro?XG`8HGS0km{LiD@WS%A|;rI8$
zE(*`z@$=biY456q#ZyX;+8!&fe=aXCcIfup@84YuZS?ImO^Rn*?D?x+@hNh@*VY=(
z-6^p~=C5N;8hSafo_Bfwx#Ck-{PPprqCFSgYfafAHf0viqyG*?U(f5g9}^DqXxROJ
z-)+B^d($IU-(S9UpPJ_+R<D)LPAkhA&bq%(I{aaK$0bjfNS9P~@$*sffij+X%+Aqq
z=T1D6Uio~x@RXOE{Bs%IgrDE*G?V{-a(2AMwnY}PPmXX{`kpF`-KD4Wz1?fIbnLA;
z&rfG52rQi(qV{c5=8xB3`s_MNO<g2iQk&(^O}n#5$X_Z{;`RPn56|i+ull4jXYCo!
z?`M>{lA8F>vpktA_x0w~NgtPRa(-Cp-`V+V-}mOibN{XjTsiM>pnT5vFb*zd%|Iis
z)K7+NZvT4kb+&g-HjENgR0}?9Jh7#M#n*WI#Et)+8(S5~20uOR@z{>3EU)yi&pgAf
zB&QGG+ZU91#_WB4Jby=GQRvJwlT}lGJ74*~`}MukJ<r0o%rMtpU3FO{HsO$CqRPQ#
zlSQ)FUrN@u%=_rBagASaX|To}o<_%X72Dor5{GyWHg$O{mRohSd&Y}ux!|ek!TLGs
zy;>}(xBY{~SoAeSMQXxW>wiy}Rr8>C^%u$OJrgePc%1!zm+O{YS6{qX8})Lv^M}t@
zil)tw>Mtx^R5GU~xx?wLK<h2XPhS^5xfIwl|IiPW)0y*}I+y|l4?Zc}=3*?hAn=CU
z-Yv~hiR@?ps9jnizCJlcP|0ugqVEr93jAqj{xhHb-Ogt#V|QQSoI5ox*oo;*<#Xnx
zZCN(qH(!Vg*4=YUJFs(#z!Ggk#|w`FCneMfzs{0~SM^$*5q`8y%`0e1O8UX;(|Xz#
zDD64<*hf%&>c?#z=B|5A3YPRFq|MRWC1iNWGE-~ueu1XnE8NP~o4l;O#bKFyXn(<V
zX7P&0>o=d;W4EJVb+6AxX+!IiH~;AiUfIH#`M=uiPjT;A6E4T?w`Q!kw%^<RS^h$)
zbD=KJ_k?@Zx~tU0h?M89Kk+X+e1q%`y_Px0rtjOL|NQO7?jtkAlh68h@|#CyS37f5
zyr2H!_4QIsBeAt>-O3{G9-ZHA$d=4=ZYl5Dv*mld-v?Ozn{_0ZJxqGX8fnGlAAjE6
z-7RB&TUdDBeB)o!1pa6{afIKnNO$_c^4>a6<*?*}1zrocI!gC?2Tcj{&tCif<I4Y1
zr{4If*lhkh^?Kj>pV>uL9G0tR#Gks#%{;qHYrF2phgxfYBsQ*YKCx8ENZ=;Nl`m`F
z<=l9lx#hmb(x~&X2X=}qy|N>yYJU^E%c1Wv5A0IUE<Bl%@k2a+@7AhQCBI9Airxv;
zyy?rIrhT-g-fyMd`@62UR-B%4O+5ReI7>=*&=;18ObITVE}mTe>SOlWIZmbxo~l_K
zy6w%=nGb$eDXfrMwDHQ%i|zZrww{dLUDg_0@BZ5P1mBx`d#$UArY!m>vN6cu#~!6k
z2bWLWdaUBP5&I{R0}<M1-HbDYmoh%}St5PwbyY`*?p>MdWpS0MMZe-F8aGThEj{H-
k+5AmQZ4Zl;>nZ-Xm++o$w6p4dD`*wEr>mdKI;Vst0L9=nD*ylh

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/loseit_logo_big.png b/tensorflow/contrib/lite/g3doc/images/landing-page/loseit_logo_big.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3e46d4bd8c1bc3d2c5165c182c04e18db659c68
GIT binary patch
literal 25868
zcmeAS@N?(olHy`uVBq!ia0y~yU}|7sV3^Or#=yXkc`3t%fq{XsILO_JVcj{ImkbO{
z!I>cuB@w<pR>}FfdWk9dNvV1jxdjX$U{hgLkz0_PT9T+xk(-lOY*k^a1Xf{{S8N3m
z)>l#hD=EpgRf_NpP;kyKN>wn`Gto0pvg1-vP_QXVNwW%aaf50vN=dU-$|xx*u+rBr
zFE7_CH`dE9O4m2Ew6xSWFw!?N(k)6!(=D#dD@m--%_~-h7y>iLCAB!YD6^m>Ge1uO
zWNu<oerbuVk`l-w1sH(1GbI^rPeEx>4%n8IWc}2f)ZEm(l45;BJwttL`YLh@;2QIa
zp`nqSpQ~SySfFpHXP^&v5y(*~$yOzqB{`{JufiMv@v~!2392YaJ;>2HnV_(+%1O*i
zw^gdpO-W5lEX^rVvP&v0&df_KE>2D?O3}42&CRVaHO<Y{&9Kpj=t4COB8qB>jXoCl
z*i_^eSUDG^CYIzEh2-bw*g1kCSRo)Iza+mnBfmhw*;&EJz|d4d!#6QGGY=%FX`_#>
z2^zu>Talsx>?nw6kc*ogmyJF+HGoov9hbe_ZZ8Igq;yXg$B>F!Z|AbtNFU$$U-Ncn
z@Euh_hO4PZ{dCM{DiyMy+9sr4ILD%{vT(|D3#|jP4&PP$-c0%L9rr|Q`i;W(d!zNV
zLc~>r7Fkv6C$LI$Y%ni9Tc5_y!WlZrVuFI{{`2WICaiMb%D(6AH@CKy78GQQ+9Pp}
zp<(^it5<7JKW;3Zb8gw^Q>SK~&(CW5eB+Dps^IkC%ayLK_ljS?dJz5d=ZDfY-;I?b
zbi}%kCT(=mW$J&mZCkh2tLSCZLe1j?Z(opmz9#oyWo7K=KbNfXYa(ku$4;HMt@?&j
zLd14a-m@Q{e(mLseK#%I>y^#*sp01FkykHR?OnI7c8>6aoOcT$rj|7A`?Eg0!m5|0
zCiLF7`Qeqf{)b(@@w0B~@?7_&yO(a-v~cOxOLupz-?Z<kfD>29&S;S7`%7$EKJ4AU
zY2ntjtM)Cu^)&Nt@a;Q43w?u++pm_ks*AjLp(wO8`Fl$j*AKSuF<{q6Fi$)F*mC);
z%ircomH$4w_1-u256@*odDrUd`CDHxD`j+2xNx4Mz9Ha{Sxei4-qQPOw;FCOx^>&H
z;6~u%w;rp%ZrK|UcmGRvdhO*@QC(f}0{^s)pnxg$daC>*^jc}n`Yr2D2EKk974|eQ
zdzI=1ht*0qR%fSiJ1g@mGA&?m3U`inFLnN{79M-M_9^e&@TcM1V~^O*KAN;KLg(7X
z2kswh=WUaG`mSv2vW>~fskN``nm9jwn5vwxp;ty(f8qMKeLMENUCGhRA7Wa1reycS
z+jlNUf&-9q>Mm=g(|7L8j_s9=4>jHLUbRA@!@@4%!TXo@Uq#nmTl80t$8kl^nr+8t
zEts<A8B5#$Fg2gwH9wf*{=c4oYs0PED;pOqw_SSZa?;WBeh)>DfDN5{%t^pv^_|;w
zD^FkkRxow>-hW3PTz}Nw+qKJ0V||%(>uNErRoPqSzur@ukTN5If8kp84_>QgFF0|P
z!!=Y+!{M~{b)DsBcyBJPJ@ftlm)|x<e|+j%-T%Y}P8Ik1Amb;mz-1ug_hI$*1M3%w
zz23D;?)AQ~?B-c)f4(n^Q?TLV37UQF!j%on*9tp+n9Ef@`^Y^9a18o6KYd<(PP5ef
z--2V8_qB-?D=^Fx_@k92y>Qbeu2o&e3(c~5ue`4jzY{&Lz3oZ*!M%TdSDcMt4OP7v
z;AaqY-)hML_lB@1pPUq(qU)B5Z<>>LLSt3y7ANr~&nAZ+TsNoa#?)xO>I-El)``_s
zf-387ut@K}#c{j*`N4B<jQ(u*x-mJParb8}#jOWeGg(b1#Dy?Ns+KbGa0_y<iiCY<
zNW3g?NPb)6YPpz@|B|N<&i%A2?0a-v!>?F*hum*fYyRu3SW)!wxwJn|)4MQTw+$PW
zmoM5^7+6`^-1wda;yvN3SGM%8U6CKz^5D5hoIO)Za>FuKtt^{>$gqwJ8=0ic|HyBt
zui-A)S7utW-$M4Aojqr+(wo{ut&+WEs#|J*Wiu*^hMFq2@0*l$cDH-n?lq61JoMKz
z#@-bDaCKMi2Hm|3mx{a-)1&m3q&;)Yb2NP8dAecF9UG0pX%1;=Y;U~$+REz-l=sN$
z9{#q}`(yFTNB2K*bA8-Ab3*ismNR>VZp@5iko-AygQBXi*}D^h_a`^rpC0|Ht%-|Y
zL|I_kHIp_8wvDn{S_k&N4GsJsyZ+1(tNDwr{{1|8qpn`tIrDXm-~97g^B*ndc=39{
zraLPux%%52;w@M2mEF=b`_gxtsK}dEz3(}|A>p=D%SJA_b@$Krq3^C;%ktmNreDju
zaAA*4UFy+*>AOUt_64y%cw2jI$;^g*C#70AX0Q4CE|U38?N6Z<{*|uX3$1_cag303
znzot$ORCsaF8#an_ITZx8OpQrX6uAa$^0?a-`OHICP`LEXD!^bMIc7>=!L7#8l$EQ
z?|G+n<Nj5it)@={W^Lr&x9g49kD5K4UDInBL)Qhpc$d+X=Fb~4RouyaGyAJ-tpz(2
z*ro267F@Uee#l<jo~t$W>V#?GY`cq3DXv{0b!nr_&2v?Z%_gyL+8yumyxw?B`#|1%
z?#ne_?N|E$^?w)q|406LSI6hCH!r9>Y8x<npOE8*GvV_Nd|Pn;;-h(|eub^b%J_XC
z@@nDDM@PFY^(KJIi8U8jC`~tCwPoAOldIak{hj*G?Ea$meb<<thF%SryH{}5^!u!_
z>udNA9(vKdPjk5$%Y=32X`L@0Z2I+hZheBAgKI))z{^G*mbF_BN!^$n-^viiRK4lb
z!W)rXOV2rJXrF68)|R#7%OuGUHx73*^1W~V_N*#n#p)L4-?tVVjpoknEiI_1xBeHy
zyzUOqje<~<oQ_#cQ&&%GJRYrU=U#vL%7agTeR)lGtzNiwquet8*=+tgl@i~|w>!Dc
z6fXMwdBKH9#+Q{#H>huCIvvU@dDJBIj>4WMKY0$e+Q0A0bh3>~_Lq46zwK~ffuL3V
zZYHVgclfVXMlU=Q%Ad;ghWY&ZN@nYYn--Slrskg3iIX`5E;Uzt{!=P-`qiv|k3Vg@
zoG0Xduwk3mwo5bl-z{0b@U`#f0|(akzy00M_-gz9&JUL)S$<?*-cx%}-*mrFS-QAm
zVWwqBn8J^#Pn^SNi|@KEy>Qzp&Vm&EJA(P`^8XU_WUOv*vhRv!xp44cnZ<>xJim9(
zV&L0y_~2gM3lX#UZ?B)ndt2S7^;hZYg#Z7{KIXH3FuwKNYQf^Aa*gX*gUa(7KcBak
zHwxbVc&F9EeY-*!RBOv4HK#3K!ooEtbi%A~y*-m6RliP%_q%*pZohlM2ZkH0^NzSD
z+}}KrIj>!AeZukG*SlXn?%E_I&tfKj`I}N|&vMh9tIkRL{<i*n7vg!1fWpGNJ2u@q
zrXF*zFZp5rf(6HzukM_^@TvA{v!y@ov@|li^D`)|sW7+WS;~92xk>KRyo(|FYtJp!
zymme;udUSc_JUQnSkEnr$;e(M9K7@E)zk}jpEVxl$k*l7Yd9Rc;Z1k{!%wHvouZR<
z4=nU!EIq_mmeFP}@A%pw;Y{B&1!l$#g@>~&dg`msJm=tgsIcenFK>g_tF#yHKHR-O
zyZzWNFN=<Z9s4&+W@*h$>e{*YD6{I()_{N@zSGxoTYYzbV%hQR_wqJ&{)V&K?~YlS
z^6aubq3vxr`>ghR#_)rFHtWCdxaSb18uKvEcU_rz{K0Y_PzxkNIcCdd_sHDO@v*#b
zfA_cT51RF9?V7e-vGQF06F>Z7J@TEGkHJ=2j`2AI<Agn_NA>idOf}{B;H_QYm{_TM
zCO(6;J*&BvBSvJgd+-*fM^Ud9ZeAM4aC>3x+D9+e={8@Fs}#I7`}H4%@-4D<7uaI2
zE)9^%ZggTfpvH2*xi)d*df9hvy|0BItTm4pv=<hcG=0K3-w!umtrcUqb@LX(yIDpB
zQ^gPR+W!&U^Xpmlg!pG|SDK_4n_8#J2_B9xa}X%GR_=JYtGbEV;UVuFl|7Ab<^Qo{
zU7qGVbG4t(cX{T<-YGV_a!Wl!50~sK2DL1XN-gyA_Wm6ex%K<DRc+g@b^QusjI{0S
zpKE$HATExLclP|Icj0=M#ROTHC2JWA9xr}SQa<0}<)=V3u?hv1)ZfdhFMrZrc*2w0
zc-F(m{P!6T|M)mz&JHm?*%vKt<x?3PnhY81Y8!hOyg%_z_)vU2Ly00onBNSxwbIuP
z9290$xH(a8%ew>b4?JGDSGVp%j-$Yx>o+<4?RT=M`OjjBl-#}W;%$b=pMH#+MC510
z-(yha*io|f@4D%~@-m_-^FJ`$NWO6_cKvy#@7KZw{>0v3k7C-_&v5nWO9hFV<B{?9
z^WG~!%K8OfcY2?Bm1j#n{dxI;zcKH>ITv<(d%rL9Q0Fg(HhBhlM}{|CKh6n9dTtfx
z(vnhGyd+HL!-A>X-u=odm@0nwUZ2~I>v!3<|NA8N<E5{;Q~K`kBOhcx9(}`mjDumf
zAcM33!)tZF=54`@3HCYh<=i6A(+_M}87#mg&)QIX;Z_89=u_8#&{*b~uAv*)9SxRm
ziL0=jd-Y!3)!q*buQ(ePtyOpU?rppv@$zGi#?VFjU;g?<Ni_X^8c^x``2Vu2S8Kc9
zD?z+5LE~h8W$xu)vyPohe#p;W_kPy?pG7BqFY)WKaWt?vJ&`>9;pwN-+B&Ui{%a!+
zI5N0i-uvXD_wB8sNy%x>o^@#p%=I`Ja!igpK0fmD!a`<-NA(Y8E}zz*bJSsT{?<b0
zT*;DcRSEIw!dI^BP*A?kndy3Ug0DctReKH|PKIwf0+oqcP7j2WtRil|UGlwJGi1T0
zg=?4IdAgTXKac0WFSt$D%V0V8+~&DgeY;K1&(AMde{Gwo_UgO?jsHF4|5?;N)MsFL
zz1xqePU;Wm`S^`u@56(iv#r0)%4_<xAR}K}=H7L!WnnKp<o3VmiM??5TEV54cN-ZJ
zCeLH2Si!wbEw|yx+1G*r-w*yf=cdai_}^D2Vx?y5u3KIjdh5lkCx2;m$n~n|VBEmY
z6hDjc>+O3V-meI{wOq$UH0EL6$up(9^qyIFfonLy1&>xevz+?QOkcJ1TEgnUHS3nT
z%9~bCbNYUII-l0j(+-Rm{@*=5BV5J*T*)=(utM1>7T!mlxVly@me|jC{}CUPz2RN$
zq~E)m+xAajT+ku<=3BM;-{30|_skxiT+`Idv31I7gWU%-lbDK6)mSq7N4P7p9hkSL
zcYQqXx;4BH*5q0~`_0MZojrU0`gQC6sof9N0mW|X_V4d)TK2L`-+uXd;-^So?huhw
z=i|HYU(Y>wypv%ABjbi8>$E@o{v8pxRJ_RR?uN2#u_ftg&Xbq3?fE+O&~5i_?IW-5
zB{b%&VEkaA7N%`}oqNOG`xjNhxXdfF5>NM;+}N|*!CU6-r+FLZt>BXmZxLXq<#xVh
z_b*>VUsv=YW9|O=MJ!=EqHlAbmaaQf4sy}Ph=)84p}zaq6>OJ%@o`J$hvk3Y#Qt^g
zc&IPHVBODc%3`xcLTUXS?r!;Nu6G^b3wPb(JbO>-MNTRElSk4o8E)*(+QB@_R_{f|
zSBIU(@?vNH%i8?ERXrivabCc8PlgS~Ul=@czr~2`Ft=UEnBbf2AbEnjWB=_3IayZy
z@AbeAI215bR8)M*wTf-wmACpo{oCpkdX4QxooY$&{=cjaO!ImkNof>TCR{!BwBYB{
z`_0wwcFC@a^}BKVrcBTeX)%T`zMSu0XElA=#1vuie|y1M)6EINkJwI>W>s^FciJ=_
zW^dTSwdZwj_JU0cUnTSvB6sfony~WpXZxCVaHmVOP3xHBu1)*iPH1SCQ||W*Wj3i_
z_(RCnUs8xc$E`y1Wi+4R*Vlsg_J3I<#j;OpW9sc+TUS5$G;{jF%l>w34AstDNrzb*
zwrG2LC(GVg!>y3^P3Oi&UC$iL@Q6nup6frKe&yPCVDanK6)PA5w#*jXll`yh@6yPe
z!`HXVOyzwWvTyCZMT?4m>Lr65qusM*@vdL9{lXu!efq~a<3r@e5C3vo-c3($NxAyI
zkg<T7DIquA!SY1L)%>(VSqtsg3l?sR(V1XTR3X16cB_ot-ygOcXO)F8J$RU>an84W
z{@*WdjP)%0-pwl7sIg?hneEXXH_Y@l>|4Rt&c$%j_|Df`R|DcEEOShlU&NuGQ2V;^
zP|^GIKmDLi35mF=F#W1&dElG)_YZvM%}M*P?#JH=75|wSHz*6Ft=cmo+HG}R_QIt*
z19iS-<P~H_ul^e8XsyLKp^We5b*_28xC-ZPIk9xLZ}iug*kEo+S5v0Ge%1qh3z;3}
zi(i=gLt>xF7TI92E>@jYpK2U;pZ;+77^o!{CebnJt5xj1`rUhEmA0MBpYnaZhI`I`
z(N{P2$8{KdVLWhelSr7UL*Hh;PhQyzcJIA7|B>p9SX(zChOBu94!mDD*DotJbwX$m
zYvt1^(iemS(vNO^%hB+iN0K2ZYR0$n{rpRxKD}j9Fn?1(-hoZCmMvQp^?qs1aZuoQ
zi?S^eoyEc5x}`VVu(A1kSJv$Xd9PP*cyD?60z(Tw!(EOBSB{3-(?Jmz6~t^Tv}2N7
z-vl(rM=(4Hbn9F<TWZz9HLLg@?uc_JO8Cud$gs~q^4nem&X|>_U0yic-!viAbj#z5
zwet51A>}}$*D2fUQ^U+<FD#3)wfpQO_^<x+rTeWmUr!e@ZdkV1Khh?tYcj)yyAQ-3
zC@-sX-t)-(4&#JZ4-(3acVuO`+!YBkW4RI;cs77}!L_=3Q@=X~mU=rxp3u6Rdu6>D
z(}76m3v17JbVRI~3>p&XxHzMuqcWZC|DDCfE-!xknO(l@;VIVp*AA<l`rMXS%(!Oq
z%+LGQb!V1J%W192`XPQagO%roUhc2$pSP}V%lY*^gkeT>6hH5~sEHpcEgH1%Fic>m
z$loolb?UUs-4<qsceQy;TlN>rPMH##Bq_yw%K78cqw9_x>-qFab738WRI2H<jIe$A
zCi*<*lEh;C`DNsSBXq8L1YCJzt#2B){_tl0^NAB4ufMQ5Mm!>KcG4D&Yaa`@xjIVD
zJ1mm^UzO=*q;ur@iS`jC8xJP^*LR*%<Q>rVyrU;{9ar}`CWds8@TzNB_h#NZxNE~Q
z(U{$K&9&#<d1Y;WFn&nXSYs!~@cGoMgR$|<?cW0T*|RwavfsGL5tqN$gSFU%K`d;0
zbNLF!3Ew*E3Z+8Q)SMQ+*3e&Yn)zwzS@*6}cel4}@^^PXUVm0$<?WtLcXOv)`E_vi
zUb{<cLT0P{{$3H0xG_T6apI<5Q#AGVF~&E&ZZ>Su*?uzVZ@~Vd$oj-BH~1w|RV^-`
z3pBZ+|BUVAzuzmeA4hM@YcjH8pZEIbh8H@isb`;yhgE53vbQU-bjWG1$!l#ht(Q-#
z_`vw!S3Bz-mMP0G^)f`TyS7#qEZAKcv8?{^Mdlk9+Y08)=ia$A!aTygG4}@By{F%K
zyrr+@)rHm6-~TiraMr>@tJ>I|Z!S+iH|Kll;z&)is7H+XI~30FaZF<8-Sm}9E({d<
z6AmU-9~LRMme9=#P&=onon6Z8aPpV;<fDR5jAx#5d%j^RuPH-Ptj&V(y;3T-r4uhk
zep<aMO)T8>&C(zH7qh>f)?eS)yfagR;lspah6!sH+~Mt(uMw~*eX3;nDU-k9$6jyy
zO8IxcKmT2nrx93I!LUGZ*1jT#z|zl#xe<Z)*;J>_(|6))m=xr}(0*LO=csh3rGVMH
zDzWaPR-z22PGv8%sQMY&WE3pZIBk<xJi`|DSiQv-?Q2V3Fb3?6pI_CxyzuGQ4KH+B
zMSab!vS%%ru#Bg5Iq%w4f?i%;3qJSWbe8)6oc%y4KPSVN>Brx*emQh}zFzCLV^ZH3
z9(|R!yurDD|L?LN$M?&%Y`XQV;g{AI#)QY4cDKtgX!31%yI0;Zu#$J$Vc~#fmv)Jr
zVxKW@$`p}JKX}<dBS#D;7)~%$aEXTQy2@2P+hg;kWY%?R8fWh@-qk;QA!%2e1cP+g
zU4GW9jj2L%4qL4Jjb$X4RqQU9!&G*CileLVUbYXluU0RYooyGdVN>MH)o|WlsJnfR
z>(582i}Gh?D0(=y-kus3R%XYn(0w$?kim$7Z!*`Z*YXSYMsaK3snagl>hSbk>W23}
zIi0!rQqIp>@bvNjV?|fGckJKyREeWxm(7O#yIUCrdW#g_z0MLYRkB%9`>M+(|LD@E
z7yh05dPv!P%Ucg_$7PSbkA!ImFx-e<mo$C)t*t9}>?&je+c;N^L1Xp)1O}ennh)BJ
zioRRSxIr{)+dj_U|B_qwp7)nbn#~fcAJ4Yb(ppetR$r6U+3qV>t{h?TVA!#qS4vvC
zdU?d}ZHb}AAxqXB`!%P_tn$R|LV2h3r}J0>`xqFOh~9nrRn{;3@KK%y9kK4xDh35R
z@BEAW45I4H3*;Cg-qlTE*wC=xVERv%hS~F-8BBPX8M3-LUR}3k`5EfV`1DmL>zWmx
z4;|dQcDW_Pr0?rlPi5QG8GO4QsyfqMhe6PDPy15d#S4m_r{9<!#dJGh$(O=%(P@?X
zi3}+lBXrmfI94!7GJFlFbG<7RH8V0G@V&XzmTckql4cBde!pRPQQ^#%9%t3$vv_~Z
zEOF1~1IL!BS#0|Mh1J2U?d>-+r$GNb@$<hew$eWB)3h!;?a;mFYJAcRVQ+sNVQ<)*
znhdpL0YgAgm-?yiyf5<<77H_+=3t1<j5?S9GnA#lpI@HAZv9avGnGHAT&G^S2)&rM
zN%WV)hAp?4W9RBFS7iu#6JMQeJf*{S(%nUVf8M$~L`1RP)4!7V`5!AoLTItWeJzof
zHLoNYKtr{R6Br9DrnWOg^fAovfAD;>4a<R957`bpS9ALAJ-sXQh)v6^KSkR$X0tWk
zYMOA$O8?FKJlnGj0{i!G7Cn`{H)>71YyR%1m%rFXm43?J@V+%uX<m^9;|3u{1AfL0
z`S%x1z4H?sMMoJ<Fi0$8*ikrx@xpV~ocd}#i_P1VS*ofOFFa>m^78zyHaCkM+uxcn
zI`IC}V*O*vaXvnO_fA%g=}XtOp1Ypbo%!Wq&Te6*1Xf7ms$*F3Zw{k@4C97%u5VOt
zC-0nU%aHM9t&*&p^Ms6lPs{`6OmBS9@FC!tSn8XPESIJv?OiU&$8&MfgojKHv%bB3
zmC?9r--qY<#@km&Gd;cbJx{3A?9N*^hKP-Sh2MCeWvumPNRWis<j0V)Wre-55ks3a
z!!<okXND!UWz{`wZ#OVHFg6M=_*g93^~p%F-zJtrs4}%SiAmekUtw(*@7F^U9x@y_
z%uz4<_N?9c_`UM~T%s=uzFw$O&Az4P%jyjc_Ph-03Ji5UwJS9j<X;pql@nyhmH@@b
z>k>wX&0m-EG(6;J*y_%h;A$MA%<zs^oI#~JPif1(df_SWfAD<r%2o_-NQjlsPhg&M
zwPL}>ot)pe^Am1&upf|HA3t}wpx3IZ<u!lby2xFeq_yJKFR?9qZN*<_1pU|&&d(4g
z!_MH1i2aDEkFPKoY~Ql(`Y(nAbG~N}lUOX)=GV)|c&>MH4Aguwt<t5kdQan&^gf5q
zJ&m$&bqg}$Z=Yasxb<-HZqZq@OxMRx%ojC@P0yRTzW2i|Zf@?D<=fu!Mn;<m`{lYa
z?C@niu&=f|N%Z55sYe-3ByEh~X=rA2*c@aMIPW;a3U-ERTnuZzM)UFAs$;0h%Zj*p
zFJ0Z~clPFmWncF%&`9TR5@2L)3|#xcKy*rWVWX(2+y^r=F8*7IvW^Bz7&;!f-#dNn
zTG|TN4&6f24DITpA?w%Iaxi({4Vbf2^i{YGgY5bk%~vXKZoRY@ULeD0;Lebce}B=-
zEH+RHoW#I$h<S#6>AJ7dJPo@T5<YKa*zxfGhVL^!Fgx%vsXJcpzR!9-i1k!i4QHk5
zQw}#?yZi**C)2NYPCaxqG;hVK&rRD`dOqQ?S#mh*VF1H}#QDe9vmWf<Gkxh*-hW^8
zN^GV6T<y$0qg~`wd8+-wkB|8aU$=I<#<rwN|76_o(Sjl3G&nex3p03~^5tnr<Y@3?
za!6l$f74+Z22+kXJa;p6-~SQFx~AK>XpL;dH>(qxrQO*lgf|4LFZ1JM$<*3<@HWTW
zTwSJZY_CPHa5HG`>krc_-?rT$IoLSldiBC9SFNvVKaz2+eqbti$7=OL6<f}%@W0%*
z?#+3;`=92++jcAsn)1o<Sv)VIXR%LaEI7jP`qLlnyxj$B_7ohdsQkxsIOpr9!e_1*
ze;D+bdYEm#nA9t&zj|{|+U?Y~h~HZZFMjqFpIKiXcjV5_^tszg#bp0)<zD>wyuA2w
zmrkqhZE=DD>4p*MhSydfKO3fJ8hJ&N?R3+L6)%3|ySnJuJ>H@=eO^^}pSi1>{;nGm
zn=_tSoGkNoh@ZXooRR0Z_mZlwLspsRT5qUKH~Q9U&oC?HwZ8<zVL^uMS&aR4r}grg
z!e=li=x6w}oSUx4{C+w20;Qk28Yj-Sy}4AiMNZ(0uARW{fE^3h>|)Fk+{UqsgYA3S
z)-IbSmXw7o4p#O@RfKASYLt$Czxb%H>lPd13Rc^0>l2sNZ!PqTapwD1pXetQl3T?o
zWj~8ORzFT@o#mg~N7);m_A=IXGYItaG(23f@>+gO$~J!$hlNYla+I2_Y2Gh&_S9rG
z&stgbd;A|B{<ggK+H3Kfh~z1cuS@CnU3Kf;#Z>=&=Ea=)^D3I_A3T>c_1yMb>Fl<j
z6E|*?1~sG}>)iekXcaKuP_gV`1XJ>^u*s3J4PVRbzs*1HyXmdz+H+U*nO^(Erf#yB
zmUy?WX@0fz>tB^NdVO0~lt27<-R)-C--^v&tvMz<@4I)-?7dIRy~2slTEpE=^+^{d
z#CUv7n>1BbZS&jE$!{v|OFWOzde!@D#`^U$b-q1awBGYuIq#%jOnj&KAKkx~+Y|b{
z*Ic`pXCwErOoOuw5_t>~lIi~^&SS7BfA>hS_N2^Fh8reHj2n(idtCdri-kA-UhjjC
zf+A0za#XxxbTs_?qWNIcCo7Fl2N$fqr~K_oZr-(q4ZAnHzR@cZNbKFy!o>4?$u!HR
zZQI!x3%0TJ9y@tqXPv2x5W|gU?6$wGe^qxqfBi12k<TW-*xR_`Lv_}jxD_ih8-y5Z
z^Byi0x$yk+kIV<x6>cpwP?uf!EjQ!G9lp2g<C+6ZHP`H|JI?gL{l@e?#~BYuu^liF
ztc*D!-P-(qJx|)xe2%Jo3EhvY3mE*4v*)T_b6o8i_C2vy=AiP74SxP9KN^-;S@-bo
z+*^@zWWWB=;<pFNeq2(xJ+F6BV^mO{l>TaCrL;TM-%hOEdNf@?LptSan@8kc+4WyD
z8IP5$72C?Qa6RYitoIDB-YC><6Yl@|<3B5h)$ETl@=_|dm4EDZT_*9>>XTYu1%G16
zpX)q^r%G6onfC2o=^t?a!0Wr6$BWkVr%k?A&i8%wy_BZ{*P_Mi?)4VT?SJlZWIiX)
ze@6x1?{9z1{CqBvK}Lu{rkJa{W6P2M5&8^Ao)?|`k^Stvf`7q=vWY*-g<rb9`@(YI
z(JR$+uhdSIUHcLhsl4NwFvBc2mYgrWyi1Z_&n`aj_o*$1|9gkTYk4O$*EQ*|9L!<S
zWmH<a*TLgs)0DWSGa4>jWe6>mYuVyfP%Y*9Mxjyh=(DX`vl?#PYcvRt<^1}w-1+X>
z?TzKf%Q9Z)i2nDRzxe#+`+_0Y9#zIv>?>ljn;w08SFcU&(!Ced8)m<{!@k<`sA#~9
zjl5cu;+#KpiwE3(%X~-hsu)K@DoaD^y@q|Rt7mNXmSAYFjxzV!RyUo^Qzk=RI8L-%
znZG{o;)16V4w4ajI}chGFkDwS-p5?}N3Tix&;R`j9RFOHXHQi8q$8OuW60KRdoLyM
zzCol+^v3?Hk4)t`)}5<*<!$>*CbnYDfo(TajBdwun?%1q^nP!jO}a9dhfcYRkj`wC
z*YgV)YWy3F4z64BDn9?w`>SP4nZa}ZYQHJ&?%nw6yhckSqd^J7inpH%tM?qtvES#}
z<;+}XTEQUlcI}pg-TV9hKA5}U%%cU5FAM(l&@|R!G~j03(891`$+7%MWf6wzhtA9g
zw_Q*DP?N@dy>0ozl<!$5PM>amleEW9{m_EVa%=SkuKhgd^F-7|+bKcWphQVx()I}s
zei5m%Q`Fuwr9U#$&|W9<ll|rWm&GP?z3amx<T$Du5448`X`gaPb>wLOGe@W3T@2&8
z(*EY>lYd_L_bNyJhp*Ja+y#sYVvGs%_o?pL#M02p)8MSzb#`&45zBP@hX;PF3@j6Q
zz`WABfFaB&WiRtqe!kf!KPMZlUVmwe=F!sD*CK{XXA4~J-t?>7kXQ3|c9mnLK!Xj#
zgm=tl?Hlc{Pb?QW{juldnmbaWdkcyzzMDTxoOrd;<fv{!<fiKC^_d;7e8a3aocs2m
ze7}>v&6Ay}3_Oesr(S(pVfk_E52MIpCF7_o#~wV?7hq{N<BnY%rTS=^e>j7V9T$VK
zy9L9yuN;$}uX+4bd%?@SvlrIglwVkM*_(;&bFq=)@)b@F3Hk!x{6e4o2w!)N^}s^=
zWg9Lr{|uG8-_$F2;B|KGvMaR*raxug_eSW?_eDl0&ige_iq&42^|<&@^36~0dX?q{
zYJXocG4{JUlY=_*g9-c`;d(!|U0hRRe=4W?LdL>Oqutx}ds_K^`Pq6r{y1TV*0eYC
zC3U?1L@+ize6lx4-#uU%`+ql`d~f{;ytQ+G+Qv@WbU&l>ysEy+(S5G7-nMP1y4bPN
z*XYQty6*4it=%l`_8-4&u2Z`<-~O<;T+PLb);*zrB|k_qus>o9**5dpk>9^wd^}^H
zD<7w}Y)P)@-_Gq<MHx2s9^z)`Uj27kXLH%rU(4NW7}$<*bhQd!UhsHo<b^N)&OZ3}
z$Ztj5ZkAKq>KI(B%^d2&q@IXOaR{5pz3I<}XT~gMZ_91NivP@iY5e78!~Uc9MME9^
z>ICbK%W2%(D|Twq{nl5e{SJqp^JUb1SLav}o_KRvhtWG(MZ>zktOtCSn=i=EG&sXx
z5wbB$?en6HJ#GKno8yfto!dlz`Ei(io9DP_md)gelRoV(+}u{4HG_@i)22lEABR*L
zEa$xPefyOwN~V%6vUZwSAJ6ww*<I?Q!Hp~m(|#<H)6YC%Q`@}KIMOy_lf|WstQlhJ
zh77Nm8{E1lJ^KCUgpsb=#+pBOcAt~<VmM;=)2o6ZA@S=ZrEp~hj$gWqudp}x&tAaI
z{y8xDyr1ahRd+d}VmYGb>eul{-S22V_Wi%~9q-2rtsd4dEIPcIBjdf}$G)clJJbca
zjLID1%GjPfd*<X9r`Qz3nIdi>C^q+zM%}UdPoIcBNH0>bx0d~%X8rH2o5sz3Jf|kz
zYdcly?|k;ue&@sKS|)qTyJyULSMzZ9BAyU2t;;J`rOPu+%lj)}<Za7X>s{gGy-Je7
znb+XrXWt~z^*R18KE||4|B&TpX+3`Ik;XChr`;AF=a;rJA2tZJyJ&uQ_GRH@v2&$r
zVlPkhsZ3sC(WY%TK}^WCQ*>uv$)|0+)2DCw{~;$|b;t52M-E>+?si{f!=_%w0tJQ@
z`((B;%<yE`@lNxF8beLlD~)ZM7hFF!**g5VQ@-Hw($x#TmU5fyuT`$F-ytex7t8WB
z{tjnp{9Sfa?SE|(e(-OpmrecOaO-LP(er-<UroQqIL}{9A>^OX`H#vr(|expVLETi
zy#Fx!{2y~4?RGD`TV{Wl!NNz%+43@v*ze*W1=`Y_56WX!OuX`!DJv;=yY#vk0m~GH
z<ZX5A4<A&nvCjYDbf$Be<zsD)UaqGXq*wh~)M_|!(=YCp0I%%JVs>)-|LKM}ow%QC
zu=>=GndNg2gN8b8F($YrUY&GEOFu>E$4{NRmAe=od~7Hv(iah0!5J~Ti@}`NYX1HR
zhotA9r~l}?f3c6F{MLTo&(5jhmEZnsFt5DAeE%cEg`KR77j6n%*uJSdTXFM|qYM&E
zfk{_9PX6-V^KR=CQ7J=)VxtucLrgY(Qdt|tXZfddi_odtYBw+13cdDgcBtO%7<PY=
z$|>guUyWy}JoQ)WRz7`&IYfEwISvM$zDpjPtL#;6(iu`1Y-|}foOrsuWI4N*zc<4h
zbrremJM#M28zfbE8Edsy?`Z4eX|U>M)30&3n><DKrt-zt<v}YZzL{I`=SIRdFU3|#
zDT~zK`zFSeD@_a9^xpk<Mh?fq%9uBYyftds?j?U!{^h!Od4Hr#sha2C<E-J)`Uwmv
z5O|r9;c`FI0UtJpyRNGpHtzYq$W=tGyEdfZe&Mp$d=t|rhZ(<)YkXokHJAU<&)sgU
z7Tv!#$uF2wz0gC{d3X7PpZ7mE{(Mn+af5|a&!)F^UcbR1U&WA6cXbjI%Yh!J>Gv;j
zFkClc*u=`-pvuy4lo!^ht*T5~^n5l?(rv@)#~G46{?opQ-!{1^ko~)HUTKDr{;pD!
zicL&soC>FWb2}BkLi^9dy$z*bEw>$Qy!a@UFDBseiiLN>m40nBW?@!fe(-|ffEUvN
zt^G@<niw!_V&!i*DgT3E!}k1CvC!WP8+sTGelYwgbnE`b5|{q^Mce+xVFgidqLr4q
z7Vf(8a8B#wj|cZDZ~0mt@+s1j_pK8rZ;I54e3!GG*UvIrghhR@oxG%GVP*1F#g>1H
ztG3^>IMq}*C9IwCLkZ&nqr?60|9@M_c;VlH6)X;3S#$cDi~E-<F@&wD4Prj<-zPO$
zdPAxGUFL>&4f|XfI^5X~BqU!;yI0y5xb5%!laIVV8ohd`FTiy1nq=DQ>Fklc=`ORT
z)<04x@Ce^<!Dg!M)3-<df1japHFeUFy6sbc{H|(VvP~+Z+U9?HsO-b@ah=Sc?q_hD
z?foUKTE48dtasDngp3F4x66H)XPq+PYT4v9CSR`2j%p0+WzG<O8+W;Q?Lv+27x8JO
ze_IpxUG|b-d?4}X+*@UFhO0qQXX{Q?E!}&`U7BH;AVZi8Yr|0~hIfC>nH8c17-oE7
zTq4WbAla(S&5$`i{8XV>nE#K;&*yrkOtCI7C~KGdSHoO<{y|O{zx3Htbu}69MAcUQ
zW;wyq&@c7N$g}S-2bcV%N4<{DTh?4!b-(EDtAJ&`uQ*@1{F+<JcJkM<i#gR76mHh_
zJ1mw^y{+_T&Q+IO^~nEOlXA|xoa|fr<7UX3-D>-fYg_y{-Q5$KKR<C_`1ZMa8P|Dq
ze((R7n!Ntq0o7Oc%8safW83z=XVTmGiZ!;`i=I5)_{w(g*JXNLlR~VoJ*~a*u0Lt-
zzTT%d8?Hz9mDZhD@w`^y(%~EYk1FNmIxU&n<ru{0H-6oAIz{p+gU0QDc3zAd7%y?J
z`lH5BzzYh3HiifvhKLaP3)@*3#6tQ67(_M{8hCAr753w=2wwBnZSorpPR&1G=gdCf
zvhDJzM_Vlv>KgV}b25JLbYAmTu=(aUgQs(iXXc)N^#0yKBi+T0bHo<^aM0NDd#}<2
z1&;sPNymKC`L*WxhP^kBl5sC#Wb?c$uey4-Puf$<$*!>p9h<#{&wT4t+Lm!Iuv$hx
zuI5<J?02@-S$TgflIJ8ZySV6|j?uMW4F7BoEm*wh{ORKjry2})ax?xlpTg&He0#5P
zZ`xNqnLD#f-saExo~M6MpXu)GN7gCFdpXLcORnGAJ744e_p`~bbd{3M+}YWEUTMcs
zt9eZh67C5eo`*N;FG-w#Sy%6(xoAvwSK#_&h7<;wqYM!bnGbmFUn;hnm7z?Tq3lt@
zxnpJlml;0PdWbUIxV`-ncPvlCL7%^;KBr%B>X>kFetkmOlFL)#mqh;D?78Xxqz1K1
zLiL7~cht`BeCu>etM@@#Rmq>9CJC1tY!ds5rh10eJ6Q)k_;^#}<~~nm57QQH^S4{K
z{Jqh@?zz!^Q*Qc&uPcr&zxw$6`O91R%Wh3=xFFGNu;T5#Pm=Ra-+lj2d$s$A&7D6!
z`yOvxwjgDScuarl#gD1Z2R<o2{JKP(^JZD`mC|jVpPMwkPsx7l%XYKS*y)M)6TvIO
zOdnaFfBtCveC}a}8}bZ2=?pTR3^UppW=xBJx!snd;VDl;P8CB!Vy(k{C59u(*Yns8
zG<whb?pwoQ+kL&%qyD_qyz^`1#2%<WKA@wyU1F*AvY=SLJqrxl95@z!*mWY}DdVhF
zmy;g8S=f1f(iD-)I;E!?9ITF2ZPmBazfp0-ZOwXi4yP}fDKb81x1Humd)i;|E;nia
zec^3ex!AnEE8X1RbE$Ic`t^N1KV(;bouhQTSn<ZHe8C^S9kCW+=Rf&%1ux17=a1j}
z(p2=0-3;-b%X|!HPw}xFXl9GF-55E=`PaeOyner8|C)9`Uf>zj?H|o>XL9&PmF;^N
zg9;1YZM^Hs(~x7u@F0zs;rF+-Uzo&YB^lU1Cz#*<e_G?x-W7@wrd`aNQ|f24?dIHA
z{@Tpoa*2xX)rW`w2ma}E`n$I=X~MF`rwbb^_HW?J7Y`0Qs2Xc0lo=K`C-L9i%Bt;K
zSD0`9>z;HgHtcr)O7q<QM}B+0ZC8(wQS1mj>S%lIn|RLVzq_`~oUv$I`fQckwO^71
zS5>zx>u}wB@|4?~^H!=#+t;e~8!|uejQf%D@z5vv$!lb;3v8Qr@Yf#~`Sr4QKX<Xm
z|Cwif@%z{NZ~hy4Z2RB)&V9}E7pw=2m=195e|yYvcg<H}o`xJp#sop82Xc%Jduz3I
z_jfTCY`7;8d2`Y4E9++*DtU5YT3nk%YSoHU_JLvghuB}__nPSH+~j|4FS=$|k73`8
ze}_%@COI-L`0&p(fG1w#cI_dV<Em56vF<<pnB9=~#_SKR5A3&0P&HY|#-0A~`?Qyr
z?jL!llGWw8>0i*1Z@i5sl)YZo_Eh<NjFyq*Ffcb@m=__!<)q)IzO6NxchV>4x0S*v
zJ9Wwqq<iwU&YSbc`gi)|=XX4#8Tu@H887gztQ4*L%={o2<O_xeYAgqub~Wr%WiVLZ
z#8|L^zx3p{Y8KPl$?v)JCnfJbx#rHT`I-|Juo|>2Yb{jx;q_$FWl^TW4Mw-`_01^E
zuNP~XlQ<>k<fDAHz3pq5Pn;GpEPofNm|uOm$)kD6KW272{aSt9O@_0s{$H$lenQQ|
zp4sQjj@-J(v8=ADHzD-yB1VxO$y3$GG9teiuUWk+;l1Rlc8|@U_;foG8F-f2t}(1)
zRFE@1;cdI+Yn2pZLL@(fo$=J(+ymFXzUK_+VA%0ngTW`2L1u%`=DgWW{O4>Y-&?7d
z^1Dx?`%`k%RI4f1g;iH?=GpXDdt-&;aod8AHN7TxYhG14y*6J_&k{U8>cH-=M;ER#
z{uCr}=2fyqMM_frpS|a^1!w&eOW%Jjqp+fx&-8-L)R$H7?-)&Z`M@Lc=km@Xakl&0
zvv{xHTyDs~_EjkR+C}CWmRB^Z+%y@c>7LYF&CcSu|7{XO%4JrD`_2q&cwa=bG*o{&
zCCR}4>6wl;zeMWs#M<y*w*)oT+Dn?PFxBren{`#!Eq0Gj?Vg5BZ`TG_K3#ThYg|KV
zlypzJM|FpGKY!}niO2aL{uEdIdhOBP<^?|;FWj?#%ewel#<ss%7Vl-fjym64v|v7~
z*m1$&HE+w<4t!v6$eH}5<3$GBfdz~P%X2)<m>#^lQ?&oB&KhTi8kt@5x|;Z8&fL7g
zmKFPd-U`Nqx?G3*Lft(@f()A_7=A0?dM$o>;_7rMzR4-yyZ-vM-}qd<Md|FR{;$5b
z6@z+q^cr^iNlj1RQRKKJaFgjvhKBVnJw}Ohk4#cGt$08G!!F%rU(62I`l{u+eif8;
z;tA_-IdUP<n$;%yi|$?qgItY6`fj!i@01vHY`GY?Uo9xPDs0Cv=h!i`gx4+%It~nX
zN<|pnolItsx!`;`XS<~CLEl7Hzt|uy$)_(BJAYcYYJ8WjG%qZ;yZrIji_YKXtF!y>
zX0(@<VNeo~P)WU=Fgc)o<*|UE_X*+dlf$p8Xx~2hTTIMBWU<fjZR@zTOnc0D-xzvb
zTglVV%<?fJb(`Gk21$mmY!1%@8D{h|%s9YsLX3&wOe({h?P+(aw;f!w#{8wJuhz1<
zskRGa-daCs*Luo5WlFDGxAT6{gOeuJUGxmv?6V{BVZ@&v^&Pp_Gt`B)9@V(LPG0wm
zdDy<>Cb{Ux5AVNsJ~2<;OuS8Bifwc5PQHX{iTD$BqVL{<rcW&j7|v*ZzU?QoWiC%c
z&OxSxN7WmQ*IO;fe|v820cGZddK1O4om~6(hwj%5axYI=v&rBMn~s;2?rrNYw~vS3
z&F--@oY6bq`n}BJNKJD#eyb@Rm6a)97InKbGt4^kiizdpgGoKz(?v_om;U8kojrL?
zJ=Zr^ujt0y>S>^9vk=CFX`A^zyx1<w#87M4;P0OdYF^r1<!-3`Eyk!IC&}>J_^<ZO
ze@T~48oSAvHAmi*Idye$vuLiS`^V5-m5jO{r<A{&C|oPAT5Ec0Tgc+0>zt)zt$pM7
z%WB@%`Rn5-ylhtaGS8rW98!~xILSP1EZA+exbormt+FLeeav^Pt~@vhnpCS|IHUJ@
zwmif96owRTCI)jQhG${yUvwE8Zt^rN7jf)bqV~$-Z*_x>%|ojU-w9LVrH^h)vs%kG
zEk{#O{pK+~`|mS7o-XpA^!RVmZtKXs_eB=RiHpkG=Ph{{AJUUM>EfCz?7M$3KU{oP
zA$mQBwMN%g|HF0{#oj0>GanT?x2YG@S&|WD*rB{S$1}}S_sx-mJCARBuE?+s<QhJP
z&w>owl^MRhjow|X^&#REL&d80r)zF6o1t=a^?9?H5dJL2)N}mN3_YTSl|S9?mdy*8
z<tMhG|Le!_yL(p8QV)oZ%`MC^yC4>vcDp9I$JFip-#lmYzq?;tTM*29@jy;kP^R?m
zg5CSL>n+~b=q%RW@lW;F)w;<y<<2r#oMF(=Udep(*F?q-EetQdFea?LzwP-|1BOj6
z85-UzG3;Xcb#2|8BDMqnY;uy;*t@T=SaW{L3WmA$?e%Xa@o5KCuZ!FyZx=bwIe5<3
zC>f7$tfKGB_SAN2>IDSxyPW(Mq;oiHa!NHDdy4467r&pha<yN*etd29#$V=kFY4T)
z-?XMa7m_Pgjc;ew6L`n%=w{ClwDZc{J;EJ)x7rwH1jcY%Z}R%PjhAr)3*!c1#tpLU
z#b(<Yr5F?LwUsAE_h~Q47i4%ek?})_q4%Us5!*GrBJcKp_}JWGaJ^#l-{o%Ie<#Eh
zHn?rB`p;?Wbn)X8!BZD%^Pe7YIl;sx$P)X*Hd=t`zO%xg`x=pZd&_hTBd=&aWN0bb
zpRjWqyL7YWt2>MS%>4G7*L-oSJ)7rAJ}HKeQp$%7j(z;X?2szXydaJtB1D*@VJ9d@
zMIPs5C`dPBT(DWYmGOf}aM39vTZVwo5gMt_&+@Dfoq1gNZAs#UoV>|=m+ys?TRhlb
z8FZpA^ncQn$NcRq4E@Z_`y_Uju^dX8{N_eS?Q7Y|Hy($_I=oC-^z4YgT;GwCkJt5@
zcW$vbSmnI6EIMn_l>hDvq~n(TJM6G~KTBorzUohvYz}M(0@)h;_UE21Rb<e+$nfF*
z<r_EHa%BC581^+YM2KG4uE95f@xv1(jsuP+-u<6`FALd!;Bp`5Z-*^Ai_ben++B41
zkoenOxwDm(9<*G~?T9t-{cbyrou?&hcVO8RH_yJaZkylceu{GQe06^Dqbc4ei%mtY
z%BkMgiRf##ijAJWO~56S-`ZHaz4W7w_o;NA2A+n+;ta(C4B-k4b_W?wEYoLrcOr?w
z=4taWr|79G^A@LEKQ7$Hzpg`2smkD$5c`U)wFg&<sa*KCjw2zq>S+{@7{|xwYAg>*
znwQi$%`>t2cSqx{q3M*{-z1ZByM3(QowS+d+g-dR7c?FqBg-)3Va%ks2<F?ybL+ev
z8FbPZQn-Z}im&fy*1Q=C>$<C7jIjK-`^VkiC0_rFT7-1_MHozGuC2eI{%zu;_fuvS
z`)}m>ZWFopgvv^JiKB5Fvm_kc_<2?-E$up)<bU}7eqmMZS1z0Xo_Znh#x>jIw%nDa
z=lr=DL=;7>+p6Vr*&Ns$?lKtgG8ROqb>81*#IWflOGET#ZRTIoIt)b_B2F+=Fu8hl
zKl-q{{{@rqq@o0;p0BS2nHwhko$jcf@oo8=fZ3Y+@2-@8b4~lhXHRE4(L4DPA`cw7
zJ@*+!UD+*gL+|qT{hIF$82A#ti*KGKDJ%Qe6cp!z{0*9tN4+m`txAk`>%711S%~Au
zA~pvzSq6jEMho(D)fvucBr@2%oHFN^@npT9|BtYJJ1ai3-n{UL599f5=Z;!kXRkZ!
zb-f~T?nQh38;9O2SLB&ymWtJVxEQQ=Gp_yk(!fc7>%W{b>}b9eb+<^VuT1!N=Iu{u
zEg#qEFK1=&;%QjOeBh0iMiN5`_b~y6;y(6<nLG{4`<kP_WXC3L+R}33>cegNuTLhr
z^qP8YJ8V^H|NT-@SDpQXWU;#q_xH2Cb@)-q{5wv3>9;}=sg$o9Ij1S?-Q6udzw!C)
z%uP2W-~E!0j<N`x_uZH6Kq8Zaj(FT-J{wi$gbxf69Skq5xEQnr8RVP$jxS!o3oaRE
zF&rq(muGIcEy3Xbcfpnw++U(N5+~#=eXJLHPS(<}p?lHBH|(WM2WkU1ecjE>8ql0m
ze>P*|bxsZ*_xuI-g;U=<aKxld$~EUJ3vZD$&a=9$plRtglX1iS%&98Ti5Bn5ie@oB
z_|~C#F2}%%(O_+XrxsI!m)pks+fu$V9$3C?<3`>ax8`49DDVz*b-1s@;giT<b5vx*
zrhxUW0ulc%6&5P9GxUBs6tS4^rBw~X4O7LkhYi0aRWr;^zLM|Rq|7OQ<L*Q5rd8+P
zSo@|uy|%MhoWCOf%8L2N>$(4Lo55J%P-*&El0lNeouA>EGQ%?Q-@XhFj39BnS!qE&
zCxcHqgU!$8V^y>3RtDsRipS*cO*q1^AoAzFxR3p8o|%i^US|uPAZb3gH^KE|hra^H
zyF<y-Z@f3xdCa_W?%SIaH@?#MoA&)f#iHrVHtv?p3I7=<6qUHOB$%3k#;PV{b2B}d
zXCt2DtNksyh##D(#bg-N(nDCj=n5uHXk*whoAHCn<ptYcZ50wbYi)WgOUBW#;q#ZS
z`Xh_2FXfoC1j~5|H$UHbG9^&nF!Is$<eOO={P@2gS|DX6(2?q8el%)*Dg%!yL)X5Y
z>Pkrk!S8q)lqDE4-Dg^scZ4`NO3qb|`F7?V>j9<xs_9`)ip&bp6BuUfoyM@iW<mba
zCBh71h79vuCT{GL{_4T>cJp55-eze(*Hf?l{;IIgJi2DBPSU$Q#a}e8w_f~wu766d
zn9lvTYB#^lx+t`t^>?^#-FZh|w!;atZYZ7FFm2g}-?l4acRsi+qJE0OLV>}D^Le#K
z4&#NK{$rnIB@fCSxbJp$XYty=vNfE?UdXU*{?%^4bYRtDh7C+y6<jlPJs8eJ{k~?%
zu)}ok48{*v@@J*}$&3BA!qoGXpmR^vVGl-!=E%y}`L+|PZA`0{cSY13i>Q3a;=JZ9
zJ4^Eo`#1-QnA9y6pFTWyW7*(*n6stgF1znD*ZzyP7uKJinf<rzV$S7EYa7w3b+uQb
z?z}Z&J!9R-;}p2biu-(d#S*3mJ0iD)SMHfP@BG0?8@u~!gAcPYlq)mj9Xz=6_#_U7
z{Z%}-1R0{|gImJc^)bu`R9HakP8dI2k<vduKhZ>cZWTidgW~3*?;Z{Ix1JtS(7gR+
z#y>UvMVeueVvm&OD{gzcIU-qLQC;iTKhHfpYdI4Q81H}C?90;f@8u&!Ca3sy4m$2J
z|Me2zMOv(Q$60r4?gRgu-Ph-}y`63qrK|O>FQhpq{t$!39flQo-4dL?XSeS*VOYb%
z;vlnyl|e6&Aw_!5RzZe!mV69ZW{Vgm1Wo37YHL|}Zm#&F^4%qj4&F91RBlTvKa4Dy
zc+*Z${)3$DqgN*Si{5rFRK4+IeQ*CpspB)>UYt1T>vWbD&{TfQ#jiJ}`RRmzpKxG-
ztHb}_#_4+Rri))l*p|ytb!)Hsv!Jkg$IdhEt0hy8pWRZ-!~Ea_(}Igh-icM0nHj!6
z(Z4x0n)Bq!*1Fqwm>c^26J{hzt1$QpG3>D1s{_t(x-yOo9sD}o%4r_?r*;;fH=32Z
zyCjlFaZl#9FP)!i@AG_57knF;GwDtH*QNy@H*Rx1{Q6SQe<@#S*JD~mOP2gOr?RqN
zLid&U4YkU>tGnJV^yF98?)}Ru%5s3eseiW!!wmsZS^GZsP__dTm>1;SKBc?1c@hVx
zR{bvWtCzvxK7YgIn>W~USQ=)sG-RfNrd^o68!*iCvRJTh_3dvln{E{*&ZySSusWU*
z7I!7xKT5EB+j)`YXZZB_pT9q69C77z@5XDDQas1jJ3Uw{Xua(PPt%uA4G(@g)SpV7
zl+*uWi};6sA2(l(v{_}uV8qau!r<f1@MjX^1YHk?m|7W7kNC}e^E2P7=dz_TGPB+M
z)m~7?cX8jgEr*-ra-Vr$i>~`O$6BA~`s<~kM~_Kl_ju~41lk=my8eQzQOvMn{cHcC
zj6Zuq`=|59_wBj=am6Wy6ATuQ7&aWp=V{3K#qeM@N5fLaT5U;2f&SP|o`&TW5mq`J
z?7vG-8ZSTElH1qS_djfhUAxlRQ)_R;v6pyXRn(sE-qWG*u`<1H_S%>%lhx&tx>>90
z1T+%PxF*z|+BN-(#h*WpwvY69H@)4;!FLYaCRb$Gp<RFS+#D{CLkD*rH(NT@@t=6V
z({CY`1M15*gUa)13JmM2gntKRI^4fx)5z1XT&QxrhW+e32Zhydm+fd&<&@L%o$|c)
zvc}CYiLbLhZL{ce@!DqXex-Rvn8R_aJU5OrSJ$3ByY279wBJ*g|9Yi)@v|?RXCeDu
z1J}<CVtyTat9AO<fLcq+4zmRq%2UD*JiE(U=k3C9CYb?Try2_|2nTyJ|C+{d;4V+Y
z-wP7XYd$TED{R>JdC%SVe+rzf?ymiFLE~ohSFh$KhO0S~-rSdN?0;++xM`78s{3?1
z&4!Q;qv-mQQ~66>-8bn*&X<jN!>;CWYJFu&Dg%!uLtCZMyFkW<T#g1^iy50Y@?P;{
zY48+DKE-h3Jo~b*W-X0}tJd<k8hq+8*DgNv@~83N_K(jWH*EFMe?NcP_Y2$flz1Gs
z+wU^4`d72qMf<?pU006&h$xM#-*-JFO#ZU!vnz|P{&#&=eg5&qsf-32`gOaGiT6Gz
zXS(<(x}2xv{_BN;Ju}Vo@68nK{_5u-Z^UpWdXw^M8MF6VAEJ|@G?^YKgBsV&59VEJ
z*ynorKt&3}oA!+3oHZ9hUzw?{u(hzCZ*{%O@di&=+Ulf9Z{}L*)mgMKFR<FgUNBqF
zK&{9jD(Ca>ubUF%z9j3i1+$-5-j<#+Lyv8ttKq)2g6_F<l6EjHFgzx>SXGO^$UdQ4
zmS@r=*Urc7dp8Mw>$xNw%q#Yre^JU;vzp4?lebkK{&Y8ci$>U<>G{j1{%@1l|4}xB
zIbq-Z6WNCuPKens>}zCr!Sf>e3G1CL+VdGV)LSv{|1Wy?(!no0=M1-5AAZO?by*9~
zrdeyR+ikH6DC^t%s*m&Fl<p)Ub^ga(Nm3pqkyE=FBr?OqzdW}--@CV3=b!IJJ$>n0
zmfu{CYEEyHm(ksIP{Y!$)9Cj9>|;IE=hGNutQbV1V_vZ|a73-+Zu{ygc1+^Q4{e{K
zKX2W-fAyVAx}UUZg=$6Z*B}4mY^pw#PhDnFEp_d&c!mF;;-)qC4C>y+K5q$4S7f-;
z$I#Kd>Asl~!!scUyEcX&KT6EDF-E^mo~$6tSfI?5aDU%X28#oklOuj_b-Q_S+3j<J
zQJ)?vEl!zhz2<|tdY#d<=f;csR{DQv^sbh-7g|%U?zQc&*`+mqieu&!y)&p}$iJ4#
z714a+-?7KNhBK!=|8m+k(>}*w_4>Bpq!*lGudljot`)xLU&5avapv2PlZkV*GPW=#
zEU~%2h@YV=IsM9;hm{9EJpKHK_uE4K-okHD_EVy+Jf89Xdy?Qh<)bP$|Cz6e>#1Ti
zIG->5U4fzQ9K#7Q6$Ui{hG{HcbZ4<00}XB@s{DF>|B(6fsk@$aOwQywI?qFH$HN`V
zJT~7=m~bV1gUs(Q-Zeg77w-Zs**nO%-Dvi*=>lqd+aiwdJv`CsxWMig+jr@lm=GeN
zA6<Xx{(D*Z^u1#2Yxrv8%#O@E^7h8L|1;!d>;HXlp7@nZ=el@BuKTY&9HEcScYjRH
z&|-cdz_4J8hF`Oy>W@v4Z`_r1yYpmU?J|wamd>B>w*K&Y{V&dO^$Sh=5-!}!nQ*mV
zNw-Tzj$wy#{>g1;j6Ckle7>ExdJa!Rjw5q|6w`yYyQ++}-OC!y-^xTXSj5TQyUV*s
z<@I?%C+m{C_F+6NucoVQuJ!nCIWJ+G|B=S>&dxovCaqfj{e6dFSgnmx(ud&Ni*hEt
z^}d*I;;`b-<GaQmdh0zHB^SDg`_J+D9r1mCEiY)rhi8!4s~J@tuP)>oXxA@pEZ*@>
zUdLVMR*=jNe!1h)_nK-yUe!2R#><@0vAp4InWs6!m38lrE?g7dHMeil)OMf3vi=Wm
zcu!Sp#`W;cPDz#Z?c}dczgfY=aK@1#$CV-H*ugoyxeLma7?xx{I3mc_pw|7!;F#up
z^N1Y#3#T>Ymh7L`{?Yio)$x>Vy4P#(M?~7DOt>Q27V(>3V@9RM9s@Sd%6lrSb*m1o
zSs@;&D<ytpiU4b7@=pPux4-+DTC{4C|NnNe+_B?O<CMc0VcS^tYDHaHe@jO6^K4J%
z&vsFo{|e^k6|8eiV0PS}|M6%5-;MhQ$8OeVO}cXR%60!YD?`3Z>%8jH*!<W1bK3(u
zi&(}28K-SC0xvQ$)G~y*Z02+hT)w5^M8dXzjPWMYrHv7}Srt<g`;6u?PRP9ydHMyT
zgP?xPHrMd(-N~y@UfR!4`!Qg7Or_ZF@N$kRSCm8KZ1jvG-gWS}RtG#VKDsrrx~A{i
z+U3vvnR#RmoING4%qIPNd&Nh|Bx{qC?`+w8OgX+>usiz1lk4jv$;pvBMfb0Fp8oc~
zit22!Ej&+tpPziSdqZu|7hcN?Nee!tK32|IJ)h@jm&f7KZ<$+!gx4qg|Nk|c;p*Eh
zRhDm<zPfWVxE(#a|B8xs?5-n>TpdL+CZ4BcogYn|o+}nSXK&4gdHWyxC(9q&Rgv{<
zPohuc)<S8f1UF5UcPBpIZnIULuz3ok*8gdhb0bvf!ge9eV<pE=Fgd(!Ouc?}(W7?^
z&5?hr_v>dJ^G%mlz0Lh&yL$=iihHlMO+8;-=CHbdJ)2>UW&X$OzJ;f9xN43aG`;%1
z@Z!cdv%dRBrf<-ZFxlfabE;*<+n?(1^qCUADTb^)_&&-b_^9H{Ra+vGH~7CXIK>w$
z){wBb-ekqE3Ay`^?s{>_^ohfk-G2+Fyt&cZ{O$kZI>X0mf5RV6a@X(*t8cvjlP}Ks
zZo@oArf;&XZkzv}f4u1VYzc<ac}&N|eQf4O+cT)>&zj_z8Rir@f8X3LyRS<xUOK<U
zRf+eomFS+r=yQ`qSPt}B7yaE<XU_QGFen2uCTKA|_!WDUv9^0pL&X~}pOaFS3=&M2
zKkd2s&uGGHDe2t5>pa7ZeHQM}JL|n!oc%z`;+x;<1I3;mH+kzBEYJLPnPAVu+n=_w
zdG6&;R6D4!tmdo!<`wMSi?f`1stzw;Un-Gm75DvC-PHcFYZGLb{&-Ml=@pbGDW7cm
z$~07R>D#tR58{_BI9yh7BRo&ur&6zHUim)HuxYK!=6AmQmA2-1=#Nbw(<i&`)w?nE
zknq#r`y^G5*G~_#nXU4>SncG*rM{-CnqyVpyCu|CwY*ak-p1f@^5)0L37K7+UvmVB
z_^1EBA&@@(*Jmfkoy_V-H3Nm$?fu*KZ{0Cj@0Zy+-@^7j67OG^+?-RbmbBwj&^!?a
zc~^!Qqq(5@kxv@T3+^$z=w7^=SH7ZmSwO7xT7hL#ST`kG^Qy3Q7wK1qWL%Zv-4tlJ
z&GSjB;Od7Tdgmz=I<0vuG)vC)!Ea07lL1zOHs{q?1p4*GbX^X-da;O=ZOhb~3BNes
zIyF77x>xb#QA2ReddE%jM}B#_Csfb7<S6l2#+d!F>C*iRMW0Qa^htg4(TPr%KkoAF
z_Rb2p{ZKMF=lbIX&$)c<gRA2#q*l&<@P6Jh&5Ff2yX`Is)qiw}+}p9>hWTAPH;Drh
zg6*YGoAQMXE<MkP;=BE*{>0*rw;wd8ZDsiVpmD+b>a{ts90%nmmsh+~_$|)-pwLN1
z=T!T?&##`i2EV8*Wms6Gd)Hm*3d?)V8oOAt#J<NuAJ0t6xqg9L?C$Ca^Lmk;q0jHk
zGm`rqv;Q6I&2RO)ru}@e<l3f5_A#2vm98l><he53X=pC)Uwiu~!--}54C|a3-rU}v
zAvJ^1fP>)(<Ag_UK4QP?YL~z0d%V){c3%fW-N}iq$4f1C)MZVWV*c@d{G|h)l@}Fm
z-aEj4<2Z-(>Ww~`5zDOp+;Lp{&N<Z1YJIbf&3ox|-n>#x-dAp$|K>%pTCFJR3bE2^
zOjy{NBz<-pyKma>npsZf$8=_SF0cVDsjk`}{Nk*@&chrtu1W^l6$sWle|^TDy4Pm$
z7TMXQp#iraCjOPkcy%x7zVxoTW#6tmW?a+UrEzkjWc@z|_B+cz-2c~AT4{c0ZnMvu
z4GK5k9lrlpINso&b8i3jopBEH*KGMZ$NTvvS+l4|!n|p#k1(tVzNvEi{n5?-;T^9c
zV^^CfaV&F-lv%y#$g2FO$yJ62#J7klvCA*bJ9z#=-I2iJcMHtyzJ8wms5v!t?nxmn
z29e|IR=ijkt02nJ@RFn9cJUuIh7ZPU4%-wM3s{*RoLkheZ|lS)pY6{L82E%3TzkU4
z96CAW(9!9qRF9;-75;zYb1X+2+vcJfed>RFG+z77cJooWAXT$whEw$XsjnXC3SD1Q
z_GsrElU6xZ>uE|S_BhVdot#sBCSjXx&ZMcT$0BZXvsiG{N`H{z`gq^E^!tQK-_Jj~
z(a!w#Kg-?<^<!Bs9{+5X=u5dDzuEO~>6_zKSx2q14;<VWW;`|N_^b<7N4VYImlSpU
zj;#6H5^<Yfg5j&6@A6qXuj1}LV(9N(Y3&#EY;oOlZ)IW4$p2U8|FExN)Vx;w@%YAN
zOsQ=WZVV-ItPMveyk$P12AN+w%h6CBwx>*n;Y=z6j}U{W$dbP1&(&()KjM-zUiq%c
z`}?YCb-i)vhsy!4N;zVmx}B6`I-jC{LGsM2#FnYfQ|kR>&YqgM*j{+nuZ^`Lh6k?h
zSAUzn>94Wvc4e80*tTuw{I;y-iQ8@+wCTT*hQAi`gNuyo?;Av1nfvP)BcD<Kf+c@`
zha6Sax2xNyC-Sl0eQC?jn;RG(Ofyk`a50~W!M;`^O3sm?CdW$pcEj4Af-DD~ax_?X
zG5xyM_I77U3dq4no9)UDn#`(``E{`J#{>S`B~N$kux6k8K7Y@ea|WKT%<hUsw@jWG
zel%yj@l5fTv+m6Pq&-XJ_p!^%=XXvq&}G{kx^QlV+Z`o=&2O)6V0B<zv2FkBz<moC
zFD%+ua*8!jL|tcfs3NPwp8VsU-*z5mZqPi=^xzg~ChQl(gDAEG6Brjr%vm2*(fMp4
z=i8DS1<k$r^Y7~)&*GM5+ndza^y_2dJ;8Ie$rt2|yB`azUDMmq_kY&8?N?@e{B$TX
zBK_oIStj3ocO84gUdJ95n<2mC!_H4d7R(B7A9jCVpuQ#GsyzRKw|9f5mwk*>J2@?n
zD>CwBs`JY^8Qbd8Bdku}nzBaqWTxtdyy$KBuf5S}(y?cVJ9Eq|A-gWc=#3jgOf$oc
z?fGk$3pD56<Y|af_`+NjlMkACE4)w@%(AxMjAKgmHor~#J@4)AP`IobQ?T`GlZWR|
zan*h~-_onbU#s)p9NM^W=W4O}j;l3Y|14^7C>5#t_a<eY$IUY9uai3jR~4{3L?xt5
z%017wTzta5+X~wAzve&e`c$^!l-9CCs?mN&3Myyyb2r?sW++fP%oAO}uw$=*0K@i7
zh8?pQ1zNVdsmB_;-lnOx`K{sPH!mb~3mZ#h12^rxs<5&@#=v{-qT39Xr_JOTpK)%q
zy`j<IaR17WWrZ92yq&D0bnG54+Tm)v@txhrrCyS$4_UJ$-?9{46nOu)<`$#E*_bnh
zF-e<NxE>be)>!AeY3-Zv+9<Jqr!=hj+VixcDr;_7bu%0YVL31*XTKrCrk6|&r)wB)
zEC&sOCNiinn0TI=|7B0!sq^jmTUU8*6VK$gC_Unr-ZfVzrTX0ui(8BG8s9}<=@ebJ
z>96NR<^q=Hp7nEsYYOX~C;F7}sGeW8x3+iE-qIufoY||FX)aGGniBcbaA{<h#6u}g
z1_?pK_h$-|j!SOMwOH2l@|Mr0+~j(%s=POMw+N_2e~)QUx)zdqDE;Eq!#oW~c^Yzf
z8kA)ij?VtN^wiED347yimxtS0T-wuk%Vf^3>k2pR7r5+yoPLh`=Dsyat*ejjt8l6E
zt$wT&p8Y}mmTgx*`-O>OziU_y`po~Hlj8UDR95<wCHrJD<0?`)lGG&`{I9itxVNmP
zU+MZ20p|rb8l8i-ef_*NvnOy<ROb6c+soH~+<I{`F*enYVTa{b4)&)y``5ZLoN-~u
zabw7-`H}?c=m|4D&|^7p^)@%tFEL-{<oU@AJc10rnsUwwt&V-1^ZlaS_I-_cEG+lC
zuTEb%apTAB(n)VGw=re$gzI&f%$Qm+IYnPiQ(^H|^|x1FS0t|SP_W+e;rjA-`*M2<
zk0?0oZF!n0r@66FP2NWN&+>`K|16TYy`&>>$wa1xS>?G8BW?-H{`R{a;K!c*cKtHf
zGqcOn>XX|)-u$pGF3w^8^SaNi#gEkav(pbA{PA#!?a>Wo+0*Wa<!iTWS?cff`S4_k
zv>R;f4gCI%b5Gwtwl75B!uD$ywLa@c&ebkkaz259N0i~4-4>o88`Zd4DaQ+<eF`s{
zB^4MBf2-m+Sb8nsc*4c+y_031nAY;ke_H#HtI763)7Pnb2af*P-I;hmI$6)?wz`z=
zw)ns<IpMeJve&+<J8`cn_)t6N*R37i`IhEMQ!-T;7sxYZ9vAP9QrcInv8OSK+blY+
z{u}?d_8Sq8^pDl;HM(JxRiA&~`F6S+&vm&2bqseVechhSxPhUp{;L+#gOoPOZH~7}
zJR-kpuDLl){K9sZSqrDUS=Q2<Ia!jynY+Q3`OA(M6_XDp{b%HBI3KiN_kCugn0vG4
z?a=nD_*29?>G4C4n~93~cRtv1KA)R8WleDJq{r9YZhoBU88(YWhbP^Zb)DbRR=K5H
z7N4-#Y&&b&ufttd%cAxh>XkiIb=!PaO!c<#hB<Gga#kh^_`h1e-|N$?OM4xHZydJ!
zVZ`vQE%}J%bE%iAIeou6PcABHmtkpT573-#v7zN<@-Z*&eeJccWOZ&gGFIzoKI8w-
zmaF`Bt>5`MiSKo?gEu`gf2-GIYGLvHMwZN$?T;SZHJ)I6W&>}|uX3phx&1O`xe@BW
zg6{9zT34+nXPiBK-R(j_<_8lP7UYLjyo>xXp|@+&6$_RP|D8YF%3H+yzFI;33iH-G
z)&j~hTQXj{OtDX#nU>vc)0QsvS>j=XLuM(@#=hCD&qI#vSoync&Ah4wt$P;DQh&?v
zZ%*UauN|7#s&sX~aHbb5+@Z(VH0e|J#91ej^k2tbS<IJPv*g>{>fTAU+XMURPl#-O
zv1{X}L!2T?f`(nDLW~>KOIpPwdZ*`fZ@7Ccpw4Dvgz~v{@;tv3-<19O?{#Z=^@62G
zo*xtmp7Y85O?>37pMGqs_wAEVpC`{-5qn<Z?Dlvuu@CN1TP?p%<Go$m{6*|+*zzY1
zkB;>|sC(pPIK%kL23{V9<xX#n3pdn#Ue{3Px-F5N>A|!!Z?<gNvc6S>;jEYB-6yvW
z$kvI8mZ$$WQ@$Rb?&5m9-}p?JzWI?`b<(Q0<Ckc<)<l&4VE46uv7(u6#=hna`ZqI$
z?JR;bUY!+Ke#a<mTitiiUPXhPM7G}-c0_uKti8}ZWom1Pf5bPFE!X7u(>{DH$%)P6
zww{vLbyvlo*|*pAm{xe^197zpch+yb^Q)J8vAEwdalV}2_F3JNGTx^?ZTN3@Ayq&A
zYsLfSi_<syyGOp!cb}E}IkU0-+ay7?=Dd#9zt6v}uq@BIAlTx1b|>@iMY)Fyj!b*h
z{ctJU=3mm2-&C;v_BGq5Q#PY!!&T<iWetDpn9n|8-+S+T@_vh)EgEt)nl~j)-D^)K
z1lAkhn9j(xe&O_Ro%{ED#oUd)eRQ+=FF3#bJpbAszw_Qq{&_{bH|@9o%b7x9b5#{p
zAFtY~8TtEq-q|IMH`60Gm|611od3(g{#ld9G>6&f7VD9pr}7g-*J_AAxPE)P**l#E
z(~~nf7k(?7nWDXPWrZ|*`}RYZbhhsomdxH=xMOdu$bQ#`M*bVoCyg_v$a<fA-O9h2
zO=?Yb;&S_CzSFZ97GHUAe)k66d!|v3{^yuP*Jt$<o)HPoSI*53m6_mTJM(S0+sSM3
z3OxUM!!#$SpH#V-{Xud6_VQJQ3Z+Ybyy`l)aM_~+Uo@|MZ)SP+IBDul6IIz$p%<LL
z9&&6})%BVsu=>Q~vt_db`|6Fn!ooXQy?!J${C#ubz^};pzb0?}<?L~zx%9_G&#wRR
zGBx#{wX<&V#Fy<8Eu3{SVcS17ExmuNYc4<NIj4B@z09@e!9BS`ds_l~H1C$*c$c4G
zz5ZyKzx#nj6FQ`ttUfkrYM)iR*`~1U+Tzd!sf$&mbiVO_d@pyj`fZ=a=ePDoC$4F)
z$bT{6Q##L4eW|nCPWu#Jng8H#^>RbL`3uFq7Ivi{tNW$?sYO_$tJUq{k2~HzIx@S}
zEEkvMdhx9}#<=Tv*wQD?2cyd!KRXLA{5Y}u=8a#uSJI8_|EPak*wkR>x-GL@agE=}
zD4|a?glaAet@{<3m=&>YgO`Q%<9nM=8hc!x|K&ih(e3({<`;)FA}bsAlttXTZt5V|
z`0oe9w)MizJJ#qeZ#4JXwBKiA4adED1CMS04Nq-*{nF7}K-s2tvC2_4+as<Qs@^nZ
zKKk`6;`yb9)v??E3PuTLPY`f9^U7##-J91d_HDQ))FSQ^bl)|hvd@A4|6jSl@Cv?%
zOS=M>1y6d_(Rfs~SEX0*-rN^I)o#UC%&83AQeo(zair?Ew(9d^zp8lq=Pz3O;$?bD
zMEvTSuiyK^>igIit2~Wc`|r)aU)JWowm()%yHiqjBvXeek4a|MJHBHtWES3zOnY#C
z`RA0cN}seQmrr1*Uz1{dd2e!;<BzV*|E?(9&U1UP>!gDIF2_AN4~l!X^)0EMYV#r~
zd;d<wS^thdm$p6s^54?wF`q2DpWh34ZGLFiW}VnwUwmXHnCIV_DQ^&<uE6l_&m-52
znhGvw-rEd0Ru%F;`Eq=JNC<6vCd=ILy8H>_zMM1N39Uax?%mmZqS}eyuxi1@=IIY6
zJld`Cl|z#E(c`n%-d{MNeQ(9bh3eZb2=VU~G>Mg*@@(!(-|}Pg{jcl4QoLr@`>M6<
z*@x?**KULMK{F*u#bhqnVNiANXH@K5x!}p_QWl(CnFa4(rYrIFyyE{I?(jF(xSnmV
zMd06yO^Si_#r~gvS-*AtF7G6}H{siiYx2hm?tTegy*)+8^G+zo>dD4`w`F;o<=mOT
zQR?sVxWP{JZIaoxx*H({w#Bw<?rstO9T?^?Ur4)gd&;(T9$c|p65yk)BK}_KdU8g7
zt@lYQl|3dWCGC{^ea%Gn?&6pwH|t2k;cNQYRT)Z8_Iy^4*lN1nKmJmWn6#?O(&eD=
zdtGyVBk1Tp(=&^v$Z*uFW^aFbKucytb&e|EqucvyM5W~Cohvwejej;DZ-1@$Bw@Se
z7Tw0$P9JV#kKfYpQ-&B4d=5?P5zlB%yU%Fdxa_UacE_ii%{w9-*kra%j(mK0^1?4m
zqc^@^(z);QcKx+Cz2u&*nKpIy=Cn_;$HAvD&B%CrY>j1h@TT3Gq3#SUc_$mM^vks#
zYnNcS>G$sMXS*81od1tRBX+8Omw11_P4>~#N8a=PrGgx;-Ls|ScwfF+j^F(qb=7if
zyu7|gI~9qoxxGO$#y_6Fm)}pY*+%;I#cMwvsA$wa6pNU=Blz>buP>cfPclk8d+iOg
zws86VGv!j?!x|HpO`RQ_A3J+ev4WyzbJN$q+l(yP%j$oznAq9yvj2X`W0T{4v*d%#
zC)pd{j=tVd<Xz@)Uvcr<`$Cg0o9p_XvTuKH1dfkGKHA$v=XNufEt1&qo9~gx;jM+z
z7W4nJR>;p`k(2py^F~$AFPDSaVG&y^`(J3+{ywKQ@t<n-{ZnUW{y$nS0ddNzma{AE
zV<%6#>Uz11bM|GBQ@AHhujeTBUdQs@IasphM$HFv)>ZQ-C0t(hbW#78x5g>bWdX0I
z_ne=urn>Qa|Iwt45o<QE9@w%aUp41kMfaWtZO%V`rYZJi%KzcE;rBg!u-=OK!b`sT
z8~jpoKl$GL`63z7sn65+S9NaK-$_^3`kd)`XbpDC8iVVf*6!Aeo;oLTQ(873-;>O}
z%~e5cYwm5}<%-+dSZ?UgI{%(oY(>-Ndpr4sgvz`3T@?S9@vFSl>)gYZBUkqCnKtd4
z>AuyVRCbiBxbttL`BJ@zt)`o;9eLhl?rr<OzwT|6M&-jWUMBfnGv017ZrOiy@<-PL
zT-n#v6X$#IzfsP}-X18E`D|0mk}ZWb?x0xjHg)>)a8~T|wSIopRk>Z;R&9R&U>}pB
zLE%-e0=xhBnOGz`{Nz8|d3-Pz<%*AFcztc%_f|HRL-%J*k(u@DtY^gb-&*@7gB>K5
z7`0JXIV&~j*r%<UJ2xZ+M=c0!_`gat;K!$$mZp_VJU0Ai6Qqw`yL_y2{e8aI*VYKv
zK6p{Lu`1i_P<-0muHH<0Wq+gluHe(QdL7nA&9D8r>dBh7$D0p4oqb7=<(l)e+x1M}
zm8Wx_7ihku(YpU=@Pcm}S1Z2VTYqR)>FkYvjr(5mPx+*(Xt_G|>9UgSprA|UKi(yR
z{FvR6_3_csbQP8K6BZlNFI>52IM0El@#^dChx+{gGf(|LZ?)rtW^rd`C+6EH%=>42
zn#-2@e<x><T|H}9?hUmA|0RChp5%IMpOWU^d7`(s%{^Vt2aZkU7*9{nbj263+pE08
zZryt9pul+WRIhd7<*xGz-@o2o`0nEA*5e(H1uD(=kIuTDf6{cq&u{XHk5{E`Fe*G>
z`0)JncS|d8F56N)T{SiC0Q8X4#T-o2^KT|c?p3v(Y161}zWmXH``0fWow$rWc-lT5
zSN*?IujGHu<7ZiJ9MY@Ne_w?0BdflCi*EcL#?{x=zcTJSP!_-0boRGjYa?e)S!nbJ
z<oj+@uB|1Sb9Uw&p0_$Y|Ktyo#xgEXulA>%D{FOlr>RadFA!f7yH{*m?JpUTJwL2h
z)aFc^;U;qVaJPP28+-bO)`^v3*%zK4G+PxX#JqF;t%TQ?!V}KU`no}>aC=5@*6n?{
zZ`=1j>G8Sstao{BBq;WzjxseEo;F(=yT?c~_vEUxGuN)(o+iuMxRQg%R$+&*ViL1T
z?MJaC`%CRD=KqwuVplb_!GVd7mt(o12%qMgSe;jMUd_AJP#6CBhH1c`H~I=+@BU}^
zjNjLuJB>rzEbGDcHyR(57vGt{d2(iZxN5py>|V>WfB5u&B!E2o`pbo`Be88yXP4{y
zc%1WCF0mnbZ{uEt9ZH4#YofNZUQ=JwqC0;-XZ5{ko}KGgaI}~=zwWeHQTs-#!2XXd
z+ZKUYzZXgd^q2bFyRl0Bxx>l&yv8}<`R#Y*ZF(C1@vVv8$|ZDd`GHT98{+$~>nmz*
zPCL46OZ|oJ1CNdtu-)ecc|K{bThxBL-?z2R-pt!uRDA5Swt1?FpiF?$1K~ym*^`^M
z6ghLPiJPs_U{Lr|orx>hu9B~;?u*KsUmpS)RdasbT*oipEGuuzsyTljgXjGHtkTo#
z+YU@`$lbtsJ4t%mMY;IjGS*jbbJs=Q*f#UG(6*HNjY^t-)%P4`2Petp8#BBt5`(X`
zJU#jH|K2R`YvCMCvFc0*9VEU~&NN$=6|kdF{Ll$Mj~g3e9XMy#ExY>CFYvg?<>gzQ
zs}ihbmana{spI(3E;Pv~I4sYv-~GqziN~scZvtm;rL~cdZLAXyuY14d&XipVPp9=f
z)izt+`Qt##6P}2DnxPdl`|??o-J_*uuM>;dFZE#V4qJ_5=h;72hpVRN#ZBDtoL}2K
z{p^!gIdCX#>|5OeE>BPVIX8XwrT@|oAF8%=II(m)ujbIGG1%ebW4WodJ8<?Imlx4b
zqc1dnh--ZC@8751mE6<S(_*%jB;WdZXEQjpCCv@WzP85l_vt0E!q?T(kFB{e<(<ck
zIsbm0I{Py7wE0rgiQ-wIn{HS(g-z!A5wUY-u7^+b!PRf?Onk$<&o^`JkNlgQvyJb?
z@6>#IZ@t;Fvys!LSbqKc_4WR<wlhHmzs|J;lV@v1Z=Z|ZWtjV^>&O@F{ZadWKYQ9)
zUAI4}_||34*+zDb7I)q-AFkQ5D77<FHD}?5D@kiU*Q|M6vL$8B@1>lcFVC;@@cDgt
z)7Q-X5!1Fj&uX&!^zGue`a8K0_rFw&a&VaN@#!v;%hNkoUY@T0ZdcENf}6i$FLKTf
zN}j&{erDR``z!MHN9@i07XG>{{_D|Qw#J&Rr@MG11GiKkzx3zN`r`ATBeTT1O=F~l
n1sQV`4Oc~jYtM+~oB!GSMt9B1kb7?py0^sB)z4*}Q$iB}D)D-i

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/nest_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/nest_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..35bfd97373279a3a5a1f8f622d3358ecbfce10f2
GIT binary patch
literal 7839
zcmeAS@N?(olHy`uVBq!ia0y~yU~~Xs4mJh`hO-lvu3}(dU`z6LcVYMsf(!O8pUl9(
zz**oCS<Jv7E(F4i>GLlIF)%Q&mw5WRvOnS!5-}F5K4I3#z#ymN>EaktaqI0|&KWVG
zQ;%1l_T%6@vcV&Q#l;~&P%t&(YKOMo!Nv!6hkJGi2EX6Sw<vyc$<^2IGCWqrsV{J1
zn5cS4LqlQRjt8fdni>T~(;Wm|9C#EKT34N~UGC&@>F(9LrT2rr{;s|MvqNats`tCU
z|37E@{Z4UI6vHq93-T(K7Ojq7b!Nrpt%jQzCa^Q!xyf<OzK8eP$$4hm%$2zrDg_Vl
z9{U%mUd_hH!oVWHz`)4D;K0DhAmG42HkZ+bVe|KdUt!x=5|<t|Pm>Gs4lt_Mn-={i
zDCJ*drskL0Hpik}21?f7_Q`&KWxx7ye4gcM>!b<W9379HJafHb&7;cMsl}=dkN?K`
zx?c#M_;1N=$=S&{`pN7P)~9aV?wOnOX790ksc+UEi%ZR^Pp+1jotyJ&=H1f~waH61
z?+7|pH>+=H&&#@FafxqwkKN0BbN5<T*`NDs9{*QLu736T*16Z3=BfG-`aP?X!{=_-
zH!G_ty>lk%pTo4XHOGy6c&`L9EHzCtIQQOk{X*9dXC85V)KHV0y#4&eV_nDgWxuKK
zwN1#`%lD*!S(pE9VRF66wsSX1kDW6qn-O~=(S$Mcwv@5iRq?ZV$NUVgtxm4r-uLwJ
zFR}mWypR8vC)ZzlUN(8-rjJth6LZ%6{QSkq|H`d`$Kg8LXCAc-ZarsQws+(1l6!WE
zImxm|mqfm0+kcVeLj1NUvHKxkWp?%cy>la8)@O!e>=b*0XRD_h{rvK;cy`bSvD#;Q
zrYZE?IxBI;=o?$IxYX~hTys}*{#nBHO7z&hyc~6Vo$b>4_VeT7$|K{XZcA6s-KgDr
zH#&ViXSNF?Pn8N^P$qv;{^Q_dd6&<9^yFK%XKH=Uo5N*_V%okNL$zC;u3D}heQmwO
z?_jTcoi)#zkL^o)<Fa4G*IX`1&&GMb#MYI|b>cqRR+MujtQB@|$#2>?U3&MO8_ALq
zS9)Jb&RcG{?%TP7Q0LBjf3{}7<vxDn-La~kziP?dWdZvqx7d`KPCGe$qWpdvmmf-Z
zmnwL1B=E+$7p9gn%B{=)!z(G9@R3X3?A`;$W73%%{TKLqw+C%ucNUkZHhC-@<MK06
z+E^#_8Jpxsj;}{`6K6H;(-Bl=DF`|EwYDdA(jl|*HENE3iuWXK*<AVh`IXz-9_y)Z
zpMUwjy1(#Vt(Nraq74%s8BfUF`BRQv^0#7iEK7mOJa_K^E&q$#)8w{u8{K@rIa4mt
z<Y4?RC3Srn$?qOXHFLF;D(8KWHa+%@FIkp*_cV3u`p3>no|~&!J~}dF+dt7*H);E`
zgR__nXMA>?9=vyT&L5pK?;c2B%zmkJQ)228{=2sl?sYA>A0u+)W#Bvi^1~};^=(yg
z-z~;sAbjv!($d2<C;811CPeHxXwH_n^z7*y&J&~!t2XU+c|4=n;-~9{`t&z9nmB4V
zK05RH^W(XvwSuAq4g}`xJ!Mq6Y_|T5_~b)5a=u4Y1KukbN5}PU{I`N}&)cZSo8@^Q
z<uqCC_jtlJqjuY0U!!Rp33K_ZQXZZ7_T;v&fQH!m2Q{7zyj7RHr$^q{yDPP@aA!{C
z^Gv;XV_lZYgJDlnwiT59ebnT*QO9uq=8t~_4g?;Z*YsoE<Mp#%O!28ZpD%IS@P+5U
zPy2Z<i04LK%(S$5xHkRWgQ?GsS})%4=FYA?N51C$v=@+KDG>QM>D0ce=bLZJ9dp|o
zW^U~%_@(-{s;Tp~`dF41Z;f}HJ9Aie&%S?#{VyLy{P~vO5M0b+aB$5JvAzD);gj_b
zNcWzNiScq{*}EY=DWsNHCSCTl@s4Y6oR*cBmiAu#J$p%a_eU3oY@1S^+?n&^KLxyD
znsQtBvQWp?uIJl(B(9pxKe#mLPSO{Bk=K7~Cph!76qwxnc=^le36lTcFxi-EOr1OR
z)~82b7H$6cr>&P$kJ~HoxW;DjvI|;zJ&H^bhZnkMh(Gc=l0SR-VeY+2JKuc|w|<<R
zE3fn{{PN2W^=wP0uHCvfB<JF#hMheO*DCg3@tg7eaVyW<#m}ZLdi-jp#@nMCuL;lk
zv88;AnrK_!iqwS?{k5~=wkF>ZI1uGmqpa!vUw}U;VzThZxvvbL1#P+VXN`D<>$5ev
zKiRLomc3w~ezJR)^D1Nh`JdC1>;L<`;b&Z98fFyfZ?k#B54J5!uCd(}%L`avqM*$E
z(djX3S<mUu-RrmKu6K>T+_%;<?~d%TeM_#tO|h=)lK;M4AbP?7`AeR~q`VQYyR$Dr
z;i%;&OXVV2=jwX1DWAWx?7z%bJF(VVTY2vM!rfcW>9bd=Wgq`J*KT4LLzV5{wZ9%V
zYx<@|Hf%rP*c{XRX=zLK#-bH_nZ(?xs|po^U;b3=ohbIRu=0DUR#zs+qs!l{+b&id
z;?0dJv{;*UwtnMQ!DM~Ut|?Ln4eo2M*`lYH8#^!S;isoXlbxP<&i=dm=qI1MyY$1?
zS13$PH#pKHsBgRQS4WZhKbdIm%^8<-Z@sk;)1GPend#ou_4VO9YLETgJLfK={G@g3
zOpDI?zB*+)!^z>dn(lJdpKZdag{*u3D$kvE%+=xT{q51;{9o%%T_XO#N?Gi7zu|R%
zu@{A!B4!d-#LvDw_ISSNiHiQmz142lErrZ7O&=RX9yNHGwBz-jAd_ScMw^o#PrE*w
z{l#wPYwwS3?`p12U84VCbM$?_uPl@9XvHMIxvG@Zy6@`aJ@+FsO&3|X?mhfoU87~w
z4!cN=-gV5g;!e(*>D087W80RUa~@fHyyAPbh*!4C;p5zvdCfM>c{dN*r1$L3-F@D)
z?|DJEK$sfa(i*WVt_*KQ6|FP*Jx>VVT(NO+vdY8l_f(hLZB<#5_UF9+hgb7NG;frK
zB+Gq&q^q{$YwY^yx3YU!c5P-Y%qgC9FWK*rgYULa)w#;^{uZ}?-XSA$T2|MRjj`t2
zvxBCbQ)fo4GJShT@4&o&S%04YY1x)s+q%I_{{nyb$#Uxx{_7&Am?&8VRoz-<us!PQ
zzqe;O|IABc56-*)X0z(gpN#JFOPBxoGi}c0FH9QWwa$v$88E*0_d6ChyF^&&Zr@4K
z75gp!{Y_c;_L5{@O{9ItVcvM(6Sw~@(^owc^k)0QVxKs->sH??{)=_zhS)ORJ9|+q
zrmp?_@<2rn#x=it`-^7FUElr9f$u|b*W`+g;zes7ubrX8_5O;to~+}aoHy@Zu$(f?
zy;3sm?PjCvCU>7}_ql!$S}(Ho<#XAWFE*#_w%xsyF{6s{%9ZwZneO{`o@Towohn>d
z;+Iaoe)WCTd5<Pxk=z{*Z*z9sKk6xxCvmuVLhj3Q*?;f)Z!P(>sMqma`4`EeCOcNf
zGdmexc|LKM$>-*enln+@D7W@Um90R*)Jw%@0zRHqh?{cUYTs+^N9CnLh1WKo-<e^4
zJpA&Z1;@|5y)5(h&-<T`eHz>uj5q%6eKO^pnfv4?D=j=Nq<z<K=Qve=C$Z{S)cz~?
zt&W84iQV3yx7WUM-sZhL<ySvFTGO0d(zipR`S;oP_R*7H#{T&u&$2XwiD^bMW61k_
zd-v(@k8;19*z(Nh-@C%4Pq+4owW~}zfAd$!uRU3-E(Z5)uV=4bs=>~E{+`OqVv)s@
zUEXg$+IcVUox<-#6JEwM`x)xaTx8jKwPO11^3u7}&)&DaCwJELTe`bBbKm#v7K$O~
zi)TKTHeK$y#+aXby|K;mSseeQ-5)ES{P4c`%VzoM!u2c$$C)&0^JQZXOlPb(T#;X<
zvPJaYjK$UGFHZYt{@CzL!Hmgi+8HcMKP_3e?%y4qeFx%Ld+t5fkoh>{*!I}D7UF+&
z%-_2)xQjHz%G~|^*Da3eh~u66H(J?0RqwpFJ#)?D!~K(Gj>)R_6nZS2v44+={hRrZ
zRy=LuxSuY|^*~-qQFNIm>xK)t%@_2`3!I(mehN3u*(162=8u<rdmKDZ+)dv+y;r8L
z=H|lh;;f7JbkEV%zaVzM^kL`v;yDlMJJ<dZa^pyt$8tg6%DaB&W$wkDJ3l#iP4sDS
zmpQ^1|N8qe{iX#GGoOCAb$ju)KLLH)lb^L(p1zoua-s0l#fH7{vgyGJ4Lb!7{9t%u
z#2(G}a`%N<>p2rn|Bru>Q0TGuP?qbX$uid>VkU15S!{SL`Lt!;rIJRO7c7<{2bYN)
zSfHL&F7SKO>RA2LcUfLUYI1+j<$F{-{W$mSYdU)lTzpi0>4tan9QMb1A1!;`SeeVF
zs{jA?(^s*I@0K?+q*b35JX<n>sVIJ7#LOATSzjD)O^{#zx#G^cdA&LP@5-k4oBfj8
zDJgpJW#6SqleT=F#rUD7Q(wF*mBnB&cU?Yv>LLBpx4(VAIpf8tGWM;^^4nb94`<l7
z-~IZ>hW)|zjlBOBIrHm!<_EmHRy|4Jy|}Wcr89%E*-gH(0;^{~E!TFv&iH7Xb|ka!
z%lgtS45`0=EA>8qc5jRAfohqas|&=|_$Ei|s6IIN<?*JNwQ>0^3~4C`_isA)QEX<)
z=cl{E{a6EX3hx$wK9mu!d*aoq9dBH3IjjnNljYv-8vgs`w;j`j--%xOeC&1SErA1H
z#Qy}A^8Gr^AUFU1kIB9vc2^r6Jl8$-fAM+NF3;#Q@@WsMZ5HKf@PF`{%Gt^zGtFc*
zgIxIYwQX}4<R^wR#_Xus+H-QYOio~bR>Ofh<r{l%eJfT_zVH6*SZSSR)zUIY7Y1Rm
z1J^AyYQJ{PVSgAO|9pB=&&l0JTVBk%c|`iYt#aIhY1Yf7x32#<rFFipas#LCx9e5{
z2M!AySbHob(~I}~#<k@d`q!meBF~u0T=DCC|MgHtmg2#~E-V6!C7KP>l^bRnL{3rv
zRmgiNRI*M@sb&8)E!WG<EK-q*4IB%+8TUwjJbA{qm%sGZBk_wlqBGaj-@kvVeA~_?
ze6~&?BUBqUUERh1@nnzg*Y2rdnv!Y%YATv+XC8kocyE>HG%GRd6<>N8lp5Bt6eu*t
z9f^4Rtu?Eg&3?=GQh&b%rkS?!%fxTByt2Bl<;WoD5YDvYVj`<VPR_{-xwCEaJK5(w
zT%fk>bneIh($$;8*KE1I`lTpS2g5EQ2c2j&O@p`5HlNFMqo*y`GTC&^>)5iN-r))j
z912VwS0gObxf$>Hu=p2Wk-eCj^C2qv{N$?1ASs5VtzR-<N<6x`{pG9^mlsYwbM^L{
zzmE!+oCfQ>A@l7(!P#9_OQn1>P9M2DWAlx-cR%gjJCB3Wg+b6E=f<y|!)4kcf*-x6
zuDo^Y;YFFhdHegiH-rf|um~_7Y0Hr_P1=3UzvR33WHrypH}Bv0YUwp~=gAv9ECP%!
z3>z0lP4!z5DRF3%$(C6QvY$SaS#mn(%C8B+f(|SO+Dsh_o`o=&pY6UNc6zSP&+=n+
z(>_j-^xXIVL&h!^r3Q`#+Kf|9{@Ag&Q}6L!Iq9i?S5DLYaN)D^q(eJ}OW)p^@Nn(N
z_+HtF6(4WOKmzEa$h&frlm8EMYg;ES-Xy!%Dn^-m*J<6ljdvbS)pDQpyr5Og{Ol}_
zy2CeASOge-f*Iytve6Y6-m{rCC^fKL#n9=bd`;)Z^Q*VdpE}|A0^MWL&e49L=+<nA
zFWaJ_r#D3@M1%QE{O)bDJoZlCXqVeKfmQL()R^hc41xztST{V$F#K~yzt46aYu~eg
zh)mP-sqXQ=Opn(tGhbGtRddSYYO;U>%Y~&3dIcq`AMhVbQ?itt>wQ-{x;e<?Y`-<{
zy(i1BPx`#Ps0oyGGC3EN-2dIZIQW6Mg6U1Q4WcWwC;$0z;E9xNuivh|2i>6X(`B5}
z`M=nG@^9ukwqK{5c%5z}7js&l`BqVHYgwn|D7Z$XYqfv_i-9PkP0Z%cu|98JFFj)Y
zX_CCy^ivxJj{RM9T~(#wljUv2295+NrivLGFUztX{-2R!wrSGoooQ=q+_xWHr+waI
z-lI4U1*RF(8021PaM)$cPxi3#(R&_UnA*MeRm`fGSqCydu7oJ?yC&AIr}phSckSQY
z6P|v_&4ul|ULM=`I)dT4LIX#F7L&vg&rR_qCL84^o}A{woLjI{cAMuFehZNU(R-4v
zG%_ePXmTvzt<f)#c>VHIcTV%?9`R|>rj@s@hW#i%w#}VE@W2sPgLx08&Xoyk6Fi?N
zzJ9s9>dEhaX0OcEk3Bol66A<3hOFls*IVuRR5ESmiM5vU^&gwmvL1b?$a%Cwx91ku
z-qn-7GCdAB`CPWms|;jd2+M{G+b?}Ji%|PvSLxfs=Tt3G(DH1}U1Pq<zqglPJ!m)S
zOGH^|=}Cz_moLgpynB2e#FAU{<=r+Ntvr#q;5XOG;OV=6Oh4jrIHUUW-};FQtE$qp
zqwXoTTJD%Gc4_VR_n*HnEMk=9c)-Ba!H{RFb}J*j^{}h<<R2;e>2VJh?W?=_tJW;}
zEpv+C{5f01S5!9YY596vH`s3bIkWKVtg7$UH{}^!80Nob{Pyj1y3DH`Q+_oci+k~P
z?Wxl<PJ2JREBSVF;pXt<(5F`13%a>32dVx4d7H<6hv41HnS93;8cuRQNNZQTZW*`3
z-q7syo}0$<-;KN9XMB#8Kb?`!`?2euluE^AHI1-m7PpwZ?Wa8FHGHpbxgw%u+x9Iz
z3`z~roC&3GVz))y$<PjL|CDzAnF`ONLW_{q>ghWt-_c(>!6E2g&(ArJWo4UqX0mI{
z{nT?e^7;ASt>Im23Jn|&5~KD1ZSvV6uNuJn!%6U<LUOd=-EVU1;ykZKXLd={2JI_$
zdVJ}zhFDd!K;G`A9q+u>l?oK!(cipVfW^R;@yp?&ufG3Fn3Q&yc~4#ZcSq-i**&&%
z7S|mrYO0#@TD-GrR=L$LS-r_7vvce6GFr=bHlL5t-5Vs}z)~PRKXvbc*TPW;>^Hp>
zO;mmRaq1$0bg62kcHgV@`pZ|$TWo!O(viHnf0=xrUHs)=Z8rV?;V++EbP<32tdj;j
zOf%Lo9NU?1WH~3c|MsON_8)Jn-kba<u|4)qap`Q+3C<r^-&%XDjQ3T|`>4&#-%2I;
zryhC2+&=&JQ*rBC6Yof!XpFh$wN0_%CD($#d-Ak-=6f3af17z%=>0?Wg4}n>+9|8!
z=Plc=^>0e@!`qv#KH+{XDtzVeg1xeNwMYN?Z(?fuFZZ;wD&Ub4&u7uyd(Q{i2r&A@
zGn|jQrOmW)a@<3&B0>8#hV$>LFuc23*)8<tMs|I0&tBjBavrmeAGvW64(m+)WVF_q
zeTtd9x_;u6WA={Le|G-a7jJxi$J#dL87C5TenmZIeYdB;Q<Bkz!Tdf~^u~(VL%v03
z`|WEwHzxNA_-~y4vHEye;IaPqpH5{y@_Y61?xag)=9(L)=03gN)8TpWHs{=<x5T!^
z_e7b#_j_mi-`|Nn@w2;7mi{-F=azZ*HNS81pCax)VVijeLz*)~t5Zg6vTIlPLAER0
zw)byT?+x10FBjnx5STr6U0PgxefRcHO=9|SM{;l7N#7*6|L+-l`;<3YqF1-n@t<59
zGwp+7H^+_dJ++5!C>~pu<vNpdcIfn(GVQWj`4``p7dO>5z5jmr;r9LK^O|$zA6E;{
z$To_5zwzX+j}}`!i`PH2x~|y3tJ5$`#@1QfP3+UXc^4c0^qZz>yyV$?Y}zW%JD-wt
zAJ^^w_jk+tkEfnb-&^szHrH#u{etaL@51<hl+|vuS(QA+P2&M~T<=PaUA+HNoX!^o
zT<*@hwDXIm{mJZhj)XFn0DhMzpTD0!c(!BBE#9JjGmc!3hXS!NXYJJddzNW9rf2vs
z{_gWKYtbH)Wy%e{S`D)+oAOP!%{ywnE1K_;TuI{hS6R1ubI$KSup>n8`{93qQ{6v%
zotv~s(1B&cvMn5ud(tiTY2Weq7QHzmenaZKnOisv%HoClPv1YYV^78v^&dyOo-BVi
z_mHnFivZ)DDN}AOG~DjUk{7Z2y3RG`ZK3}!Xosy5-7)*4>!yox+ghF)#yWrY`Zht^
zqvD@p1IL5necemErU)KPJhDE-GUC(AHc4m8#64R$k3aMKEHu~fTIq$WrSmEpjZI!k
zGrBNbmK5H+L$!9%q|dLD_1@@3XD(@b9Xe}zg}ZQXugpK0x8ldA7wCLHcJ_Q_YE!!>
zhXRwu67?J2J_VPry<7e*uCTXNR_wO4_|)kZ8b4;bP8MG(e&yw?WP$K^oI+KM{ay@T
z4-`yRY~a;yNPVek<H+weFC*#W<bVUI$I2vf&kMgj`so>)eeCqg>UAIgs9me{J;D9s
z6iX4ayNHAD<%u@u7x#S4cKT}GE7~iY^32tJ;SM3j92Lej^Ukm?<n@_<LuKBXX==7c
z=eVA@xt{;D(JoqY|M#UOPYpOf{w|8~->|PdH?$-zb9U<$mJ8e4qrSCW`o6XP<ovBx
z=Vhys{<iE>NWb<(P^sak-~rt`|C+xX%+C?}I5}k6^=j9XZ?*f&|3qzlcXu+cT3z@n
zUnTX|PQl4#mupU!uxtpqdG$vApB-!8Xdb(Ld!xX#r4!tDFSB9QYMLZqU;6LXR*r<l
zoC$x_wwSv;(Jk+LzioHSt#3VI+%uxjd!5m0`n)xzTKUKyJN-iscdrlARkFDLu=K1V
zYry-(>o1C5zgYOO@6`3bJ9gd(Exf829r=8}X;|GX^Gl49Of&o$dp>W``V^LT{nDrJ
zCNs=;1ZJ~)tv<s!uQ}^bT}<uItDG@^%4^T&ZWKTMVp5d6kiTgD>)U+K?i4WXSR8jg
zda<j8SccD!-@91r4lQ%Jsg|&9-J@NCuT7O2eu^H@-F9U2mnlC!vI*>8p7XHP;MUpF
z|Gevu&*M8E>Yt>LuFtVjzM|>+?p3BQy-pX}&;C@RJK>{`6H~>Wx@($N&&6LDTl-vJ
zQ&Ib#>yhyPy0zCk7|eDt<W1Y0D%PaJdOPC6Wy`zoyy~YYFDW{CXx-D!N|U$@?@-+<
zUAmF->XqBXC2rsJ-+1}{%%h@@ILlrvyyVQu@xW87=<}EQaN7g>vm{>VKXcvCet&z^
zzq?5+8&0xZs4{W={cW=6Pp#ze=Pn%cFTTC9=f^d-PqoTVt~D<^`DVHL%1D{*j%VxV
zq{j)ojf~X4xA}3Iuk4-|4_Q9!@RdvWXepQAy8T$V<oor$-MiMgG0YBNSS|91&02QW
zpWSZvCrKM`YmHu?RJ<iF{PdyC*9*1f*Du<msC=lr`<V0cl4*<1tevS)_wkf*P5U)_
znGy-+Jq4R39vH_-><!VrZnNaA?6%84?#FT@Oy+v9?Ecd5U%f6x0Y<lP9N+rkNwDVp
zh<{$(tKJEA`+gHEN#C)`b+yI++4G#17e?)gb7soFc;x<@^>gEHKMmkhw6i!?#^KDJ
zAdy?iRkOginsc$`v-!KFKljQm_#6`#^WQXS)1IH+>m3=S!x_w<U-n*<<+h*0e|g#k
z`xy=CKbAb}TWeCbV&;_K-s2Z$weL)*e%f|z@;z7a8~!r#CVykM?+lWx^gn-*vqDLK
z!iBDHhQF997aXfJTf&fa|Iyjc4B{+HPx(mida+TQ<;d^fTh48lULyR^{q$2o@!#`i
z*+l3^t4vmX&a|U^aldEy^_y39yK}VrOt!gr-@d>0_S21diDntUPUS{5x^BxozunfT
z>#2=@_cW*Pr(SWph&^0+Y+XS9w}pns_OX3oE!ll9E@DA<#ivN`-%(PHPVZNV>3_RZ
z7UKWSd)6kSkn=MVg%_IcQ#W_ayVA@r`8)pAwC{qxtvioAJ0Yg6%dnNZeonB3tzGOE
z-afwzZ(aJ2#qL`r`OWC(KH+lb)ioC*=5OSGotLD)c8}50%@uyl{oyLh-(C9iqOyuZ
zfqTz_r>EClc>DVMm37;v&U{?I-Q&H=`_=#7UQzjFoZjfVw@lo~*MCF&GW8P+&nbIm
z?DII5e4{Xa*P<%%A0Br@YrOy8RxRVsvH7-O`I#orOzSY2uO!<W9+T=B?Kgbv=Ipl*
P1sUS$>gTe~DWM4f^%O+E

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/photos_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/photos_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..4333426dfe008e399786c19c4a312693230860a4
GIT binary patch
literal 27152
zcmeAS@N?(olHy`uVBq!ia0y~yV4MKL9Bd2>3=R9u&M+{Ds8oeSlmsP~D-;yvr)B1(
zDwI?fq$;FVWTr7NRNUG+E3@p5RdL(@WYI{bXNi^?3kv+oCOmzvxj{8@)6>_l^*82h
zJj=+EUBSL#?!WW(^55hCXqr||_kLY^<<-y0?sA2n7XE&IelxG#{rlg~SNs;Sdtd&Z
zUFr4_Gh6mmcDD^~#@p#-{M;IE_r5&WRz9ho_lwnm1K<5OS>Jwsz4EVVK-c+y6YSGU
z{-61C`0<{9y}xdK{}vJdZ@%=w8?7IMN&<sD=H0*fZqf|LSDBk+|2!++V*9hs?~mTf
zKUQVu>V<9o`@Z>8^VWa9^TR*4OI}Zx*!!*hQ)+$He&0VPi$4@T_h;X-xAo)m8*?}R
zw)|yt_smp}(C<|%w!80~ZxP6mx@&r9oz#D>`M3W{{wa|wEL`nt);j;`A&!X|^CukU
zGoG`ceD|TGF3YT(oa7k8yS27kEgpIKbN%2{Hxt?{YuS}KaqE$HKkTL#ytjV$H|)io
zO7?R-jR%>+|9p8|xAk*=-92`z<6h=0_wOek-LvSb$>j}aZswm;Vtu$LDJrk#egE%;
zd4KpjeN#5vF({WSsH$w6AY_-Neb+|)oVK0ye-+Cuf2A4(3f~_}aZ+O5*B0W$yYJ`?
zCxyPbe|*y9g8Q7<oHPsB%TM@vO?`5K_s;C>s2$Ib3C+Ljz}=~_a+zCJhU}!N#S81A
z-d(w{B!83k(kqv|^cSC*m9=i^wOgwaujH(J8Wxqi_N&>gH!GjVRlQyN-R_r6p^Czq
z9;;c>wLOOF=5u*$cUBc0n<J#^IZY{aR*9sM_|wgwv{tX%_Dd@J-Hyx4{+8c8`*z>)
zc~Vn?F0FB0ZT8lxL@w*?lF9y-U$<O7Z}HhSd;OZab5TDg?Yy6^yJu6H<mvBwzD+bO
z+y2eW#lS*ynz4~svBKlCMh=edr)R!be4a6L^PHc8nO#CBqco<?k=02~G05(C+VcHQ
zZOXgtmBp(6SMNFc-srhb>3{K=#nX0$KdU=;TYuBG=c!Xlgi~TO48Oi)$eFXC_LyDo
z<bPGKXZW-ApSxp|+&eMyifgE6_s!$F%CF<p^&c266Mpu}xc{cglpCMF>`Goa@8O-E
z<RcTf>wDk+t9t$K&5=t{>n7e=H#h9M<kn>+9kb_ezH#u`Im5&a<@xE`x6b-2KVgQ$
zVF}&SFO}y6otu$)UU#Z{aF^CP!HPSY#$T_#WN^=PNo)vhYYQot$bK!iuWw4Z^65;^
zzL_628`-SaELoJ?9CGBfLDcjaE8kdfZRTV9-2QBm>Y1%?u9yUiO)=_qdFsLx*Xr~(
zvwhP=r%Pgw{pW3&H1C~|<qD-Ic|ZNX&t0Cea?{LRq3_tZoo3c%##bM;y8cb<fWzV|
z_Wa-C)8DE+nsYvKgUp_yFU4JVqWk!Kf2(>roxj{wx83gcwR?=!C$>m_<P+hKy=r>&
zLZ@rNmarhFm+c`;1<dJ%*`8TaQE44tA}8wJmVc_I{`$+_28GzhNrH{bY>qBmnJ3%u
z#?@<1NavgxyI2^Fvsji$1*mk|Em~S~s(N9DpKlhQ@q3r=-REA}-z_!&71z}lDyVyA
ze#F_i+NnX3OgmorZ$CF>&DkH_c2&PW%+IrD6IvzP@;)@&N+kbj{p^GfJI*V<x0nBN
zkMFo+V$jib2l)P;sTAYi%kF>X#EPZL4Av5YKht&OPjSq0<1j6sR~gKFjj!|IGtY+>
zNjE=>DSZ&!x$$AU@OJr^Ydl_SetmTC_)Ud*^4SXIT-;AXHamx0XgCnHyz!HrN$G3p
z(CHHvIwkuCI0W)1cvP+zzSVlgWm(MeOSK7t@^yPwpURtXQqv%UvA!re_j)GR>~FHc
z7pk`Z@|O6!O=?TU_IGa&+vm(KWewyrXgj92H>W~Ng6qkxhvMZk?n}+9oRV^8qrbb9
zb3yCPrIMfSRhFsBZ783on`QXvl7U}->Z_N*pCk>|t_wX8Ft?W5M>9e%B<zARZ?jQY
zcBXpPUXKk^svXi7ruuJNvTbpV)Rf-UcYcIClIvyry(PnS(!<{j64H9v7tQZpvAlg&
z!gku*=o_oIn0|gK+cNj+#AP#5oeq9ZX`B``bq#w9>)om9&9i=9P;M>k{_w)!#{JuB
zKg?{ke%H3OJPcd!TPU@3dvNIE$VFb4`dmI)oeOi06tdNb=9gJ`>g~+~$`56Vgx2uq
za6Lb+xks&G`i<S5_ulAsR83*<(Q3<H5E12m*K_ra&ixWe`}qTR{kUxG$#mw;!`be<
zXNt6~x4%-|#d=YRLn=ZrRod>!WDb>`=gUrSP`M>gdTT+iS+&A^j~8l+%H01AOa(Pu
zQ$HHIKc2b$*qv)LWStJpn3HqId`9)+t!b|Hg|h>=H>Fp~DOS52S(j<Ja;0y<Qfa2H
z$u|xsiny|gJUDZ&AtCKzfcS65f0=@s-38jd@gfUF6|{9$FLLARbkx@M5tZ2OcUSdk
zm*J*%#hu>2%4?tO%hh?5zhQS>&e}isqU=kj{cgVY>hT^nQCAk-3k;bYoDX!2exBq#
zR@5M~<NkraN*Xa5{}krWn|5%!M02?Y`^rF@h|t`!h7)FHs{E_3#QNW0>Rw~RbIil2
z#9jMl<+Hn8C;2w$>O2eJjJj#%w6HfsZRh;}?WdA{oBRspek@_~56W!_b72ej(caYl
zZ^ngyRjGd2wy#>Hn0PM;g$Q3bv($uN^1SgbyWTZA7fh;HFT71&zew$jO>W7{hRSPh
z`3DYjr`zy-Y`fyXHEUhdvqyD-dd(-wRc|@nzNT|V(d)i?$YSl9L!qUw@|jab`t&q@
zZ&#nQkozEi=Vte1b3Vm|X7GBw^*@rau!WPS!OKJ;aGA#@3x~qlu7Q!;I3_etb6OTV
z`_haf7cTaG-u!J6$A6z?JRg4)>Rb@F|1Bl**O>FENhY__!6XhYuET3XI^>?Fxh)Lw
z*`2s$mJla{{)E->7t624UzxNysp@Lbk9E^-<!5nD4rc%IT%)9A=121riSvbXqXI=a
zR`eO!y<rN<YEIU6Ipf;)`^Uk^=~LFMKUD1h)RU2Mg$Gwo0K0_8qPeHe?0fa3V)+pf
z6`#Kg&RN-c8+UB{J|Sf1)DZRzr8-XkBypBw*Z7o}8f~uFxb+GtT>YkUMDUH|ibme_
z&nkB&$a#6NU*Gm~<J0}qR7*PwojxAS=H<S=GQVRqPh(-Xn#FRp0&IA{N+(yGHJ
zS|%>|))3?2wDfk@6ixQd`*PAh8#XMsW@>tYB}n^c)l@~k<SF;PY~MP3o#_!J`a#lI
z>!#+u^;>Pv2wgrg<%rJQDwTxSCp5Zu8J;>6{J3bgxoz9Mg)<g86-!R9^1fOdzxw~8
z5NG*!fm|g*vdOPE%&1OKd31KlgR-tGSH5w}HX1l3WY-rnxU(OzJ#+ij)yRX_^jW<x
zP3UQKa!eDl*dm%`^DSMu`^5h(w;cr)SkCpnl&|`~RrBtBqx!txw_J8Di`x6xVBTl0
zxo<x!l<>JX*Bz{$nzMFZ0`DhZa~G}r1M42Tcc>=au>Z2O$kZ`Xh{52@y`3+YTX}bv
z7)xE%t_^JvITEr)g7b+*yO46jtbm)hF0sFAFn+MgWWuh6SKgJ)w4QLm)ALPollCDO
z9<Rp_<y@C6>bli*Q0S=bb1n{37v2QcstdLb8gb{|*4^m0F=zQNdS6~{1=A6gTbH{F
z(v5q?S0oGQf4}#47H^A-#Dlor;t7d1x(dd}PTt@)4)rw5S#ho5fstxokNPo2x8#!C
zi_`Y)eA&lm;Xm_{f=~KmUcLv1L=sY0cX4jYom{fzpX6!xJ>eb)4)TNrT&O$P=fxqS
zGVcvbvP^>F?S(e)-XASqTea(h#kL0-@2*aBQ}0`QwTI>CDbd5%I3FbbnPGQQA+JFF
zY}+kaAFEs3d}rAwB-gA~Puo51&_#{bWdiX_YmZLbHamCD&UGC!`<&DySdaR-OwQzb
z>+`ipYSPn)>#VU}?ebE;er-4A$@cHQJ9GYyFVEK+1}c@D7F}*<>%2hdov$zF{&y1V
zvNl^MofGj~{np~_0ha(%7N`6orUmWFmN%mlZ?gVV-Q$xpd+Fb&Z|__`=`?l9+wPu;
z>iJKim#-|bIka)*#mtX8Pwp}2ygbw6c#KImhvHH%QH{zkrAK9^9W)Vm8KQext?}r!
z{FbbrHA{cjti0JJc=q{&x|H<U<u#Gqp9PF_yWTr|XOUoWH|x8mb-C33?Dv_uGw%C-
zotcq0|2gOP`72sDukn3oy(N^Qy2R$-frGg<p}i{t@>k1j6^v`WxFSy{TdP`A#o$~}
z;ps20Dw*!)=zX5X7?ONRc7ETzb($sug3qU)I>0k8R`KR!<Ax&^#=KhAH?6E>d>(2A
ze6dpL)ssu>J<xJeaP8bn-Ik9gN}o;GTh_Cn#V;aZU+|0a8;86)&c+>^VRF30J-We!
z`}ouoN;{l+7Hwd9@zzQ_Vd~ngPS0Pay}S3bBhmE)bJLBwGm;nh6053jxQKj+yU8J~
z$#?$o+rT|Z**@3Jn{sX!2vqh&vxu#{`t;(TU9%TX)_l5B^ueT;-)E%G>|JXj(-C;!
z!5X$N9wBEK^z^s96*60Uh-K}nl{}7n3bg%{9%=`B&0R1t=#r<ko5aLJ=D}Ykt(biN
z)~TXxIW4A#`7cUeIp+B0mh_s5AF6IRZ%7vuG8H`;Y`p%=V*#~-g^nx>R-7|>@NZh5
zZn%Vau=|G1LA__h=9w=y)?LgIyyvl)%9(vGz3y%|_WG^6vV7Ok=6&)~NjthUS_~&w
zh`2EK-{+fK-O&*=tFdC~hdDiK7~Bl1R>leSzQ3$^X0yYe0;W40Z<Xga9521E&oDiE
zaYFbOmRDOAdr$IS7MH8bsK|O<k#EJ3JTLKef;p3ChV|&4P4+H0d#%;r_30_6%uXEY
zJMd3oiM!*i=-HvsujjTc<T-ch;2P&x%?noAi6_^5DiE7yBDC`4G~LE<Ei+!p)4Z#;
z2Q=N?GKKMAHmljP_b$=1(ipd$bYM;V66MBRFCuiUaoMANe;?g_9{h&q#<ZqG6O84z
zm>uzFyU?w1e)Sn|wycf8AO0^<*r?h3$ZGAA5;4WS)`i!ZkAApl*5s7qP`x-Y>q|oD
z`O^n4F)g^^{mscZYb$HFSRTKE!iP(}o1O&C%g_uk6kt2C`t8JM)$Xu@Eh2?7W~F`e
z919jM&Rux%%Fea_eNTw~Qpr$Oi(0@`Vxij4J?H0I(G)g@ng;jtf-faQ@_&5mvhUYC
z%(IP;??mv^6C%;y9cC;q*ev)ysWig+;?m=%i?)22BPrf|!oOdcDdWC&-rn##)dx>j
zA3Vv=`uyL{M<;k6^fT&rOpZFTeRq|^jyLyQb5bR_8$H}uo{6{{wD!&1a7Un_@Ol&@
zqe<-H;FUj%H)munS{@j>)Sg?_!&hYM@4rcP+S|5OowU6DXufOwJ0{*&o96GXd;EUw
z-2-WNm0Zg5g6%IFg)Use7$CHmYoj7h`Gz9v?VFCBQkT^BX^GjJAFyrjxgUF?U+mG6
zkUOyErIYspD@nN{4F?y^WZtmv$MQ`KxhaeGp53%{gG<}{S<Ow(Wrd&LTk0K-yYx3e
zwyj+Be7DkNm!At&Hnv?A49-z6;rqg)#r9yE>W>6AVIJns2O1+a9__1LFz?l4SB;M|
z_0mK?PoC(`{kT+Qnx6d1rC)N+sw;6PAHSQkI7#4{r%q<>LdSm5*6dB^QWu!twl+Um
zd@=fXSc_xrgS(GXFa2h|&mb-}nfuVo^m;A9@9L(X)Zf?6x$0GUmB}roR+se;uen=Z
z@2YKfPVe_0>~wk?F_~+%Qg+|o4~-Wm>Tf&1ywG`>_w=<#CN9~%M8Z{&b>mD?hx-~O
z1@Vd%Dy4phC+C$$yfIvm<Ein(vS%-Q8`I*hdwrS0792)pzm~8oD6THeTkoNEp;2I=
z^1H>6YfV0ff0(`3K7t`bKX&4;DQf8lxH=~t+|{RZp?cd?wdffeTIHiZTB=oaJwCkQ
z&@Szb+O|?#Z)6B(sj_%*wV15C!eNt}Z)f)Q`qTBR<v;x2VRQfCp0y_T{7+uXez_$s
z|Ms!;{JBySwz_Z7DK-e}GihJk_}xC{-ug$ZN9EuCo^`1Be?Z=8@4~apM^2P35zadu
z*1q0q<N7so8pPVlv#U$J{@&8O#{Dc#i|^7sg*gi4g)uWe@BO_+@nVNFmy`ZG$D=R)
ziuoDZidxOMnPz&hHPx?!u~gQUb93j(BbS)>_Bm+%o<8?c{W}p>kx5${l$l%Ym>st5
zwP@L!Dg5YQ{GK1qmnS{?wOstoyXDME9~ZC$N?hnsdirg{RbJz+18lMR2Ge^tHm|i`
zo_tZ~m5)r|ylKKqwjH_KJym<N!p?19uIzqk;+sF$Ff$~p*6hx++TCqQM?-guGTrI^
zz>-jNXT{mX^*5AeS!|#5PTTC2K<KS2Cq&;1zREl)Ha8_rE&u(diFL*aQyz9O#Fx)8
z`<A|DiyBAkV?TzK`93bkyvs~xP5FKGmxs(_wr1uKUAFuCUR^MG|6KI%0&}&vSB>3Y
ztr!dc&G}^2*!MEG^5(&~`R37`6??UK|N7tCT)X@4+it5ly}$f+1{HKLe(`YHEzy5a
z{fx4c%a`Q}|F{44c#!{DJn!GL?>~#*b57ZRCVoTO)R%TbFNC?$*U0^Kypibj!AbU(
z>$h9KEPil{IQW?_;o2cvqV8<J=&<YD?Sb5Dl3q2O+Z0|}biU)I<)5Dl@01_R^iM0l
zp_PAM`rwg&|LgOYyt#OwC)9t_)3l~0c0Tj6C4!!29kNl6E7H5Ceozy&Ia$0-G(h<z
zqo>lJ#~VZgGd_v<&k%PLdhIi5{sFP*B`M-p-#u7T=g-Cz@2b2#qp9|K+D+@UB`bAf
z7cM&<xaCiszGRwXB+so|_trbse5_Ml7#aCOxBIuKkZXSZlib;xj(yaRFfX0g_~Ln1
zyy8KnIWN{7Z!dqn?r$ag%-oOGcNagcTzx*#<N=@dO81X9!(~4le*HD<cKn6n{|~SC
z-P^uj-a~5wcg^GN3&nRZ1n=6sR{Xu|riz{*&v{<`EYgqr!}CN!ba!kDe8VkzL_+zC
ze#61PpBxklZpeJ!uhz6~-G-Rk{tOHZ30TJqGYd+RLF0wCo;!IP9C%(Evns1>^nY?q
zG4qASjrf+y2d3Fao_p<gQ|v|4g5ph6&dP^Pdv#+sn+%upmh<M<C;ye%dxMFst)ezK
zRL<B`<oU~)7Uc{K{1KTU5hW46K32*3xq68y`AMmI6}bfrAYfx(QIMFNom!%hl$xHI
zXRGvn_kJaX%oJOta8q9c-vZ~<j7*QJqSW9jzmVjr>}1OnC3`zAn+mIn+=ATHl0=1y
z+?>2(s|s5su(?)w#a19;eI*63l9Fs&r3l{u1?T*tR0R_~6Fmc6*NV(CBPBa71)HLj
zG^-#NH>eRsDQUJ!86_nJR{Hwo<>h+i#(Mch>H3D2mX`VkM*2oZx<x5zy2X`wC5aWf
zdBw^gBOqqDq!uR^WfqiV=I1GZ%uGzmFD<cEQsPojP=Fc^3Rl;P61W8*KG^u;k`#T<
zf|6vDirfO%iV}Sz0|N_P10!7{OMPVh6}bhzzHr@n#n4bp&d=4aNG#Ad)HBe}%|+2s
zT;f`Wun((_;*iRMRQ;gT;{4L0<W$$P)Vva9WJ?QjN{f;0EG<q234z_6lai)ikqfaI
zS&zG?Zvb2mC>YZ-a|^&aK&p{drX<7F6_gg`fYqcV>!;?V=BDPA6zd!68KQWoBDVmp
z0jqbwhJypj$|XO!6l990i>(sK0ahvb$(bouV5V`hX`-P)N{X&Qa#FIciMhFvu7!D`
zv93v~u|blhQKCsok~xx5o_WP3iFwJXAfqaB3-mHGQ>;vkOcE`OjSO|ok`t44O-v0_
zbS;w%lXQ&?(<}_rOpQ&GO-+%E@Gr_t&&*5AL3S0$sFchUs}$o@<3vj{6J1ji<78bE
z6O%OEL?h!wT|*<|B*R1lbBjb%Gq6#hfVFZA@U&Gj(lbDa1mq-^q~#ao+A8@bCM);{
zBV<A{b5ny$5<#J6XliV3Y+!0+Y+-C}X=H4KP!yJ0RGgWg2Qt*qK+gm$k&<lXmS2>c
zSYoS`nVXoNs$Y<o4wk6MEwFMfN=+=uFAB-e&#_ekxk<rD&(Hvz7ZhwjsoJ%o#L6c>
zIT4f_z=~5-?7$fToZXxf)Ahkz8-09=F-!{1$S=<WYlLJuJmw&j!cB23Dk;w|%Jwhv
zPs&P7F2QdOHpOt0f>P5ni%W_sJqdUUO`Q!SS$bp^m*f{!BH16DS_t7myp@v)4k`r&
zaPGBAOa_Ibtx|DfS!xP63KU>c$r*`x>8UBUO3*w46VJqwT#Zsp(<}@u5_OZ3Ez@*O
z%uS7TlZ=v6bS(|jOpJ}p%#BPe3{g!l&QB{TPb^AxOi#@#u~l-<%q;-Nvw{XBm1&|X
zFV9E?B_RVt3ta<CT|<iyLlY}QV=Ge&T?2C~0|O;Ulq=ZigGxu3mu*1hhp(>{a-i6N
z$`>n7P^t?qEhxyzOf7;04><7zrxrp?veCz(4oN>a?d22Hj;su#+d03WvM4h>qr|^F
z53luD6hch!$xKeoE5>U#vJyzJAXOrET+pBja&fccve5_EWT09N5*DCZg_ana1Zio7
zg2JdJB!%y2aE%5RNg+Uz;?dMK8eAlW07;5RQy0~Oiwn_RPR&cPRVr7qw<~mBFU!Ed
zz?S6g?!xdN1Q+aGKAC}mfwRCPvY3HETnL02)8}6bVqjokFY)wsWq-seB+6^WDXy)-
zz@Wh3>EaktaqI2g>J2i_EARc>V_U4pQ2DHkL1>DL=d?B42fLSUlJRD8Jt@S=v?Y7{
z9N!p0&kOguQgSD89p0fO9lc5B)H>hXpq01oYW?5nd9yIL^TE;7(6vqsecPXC1x?Yh
znj#XHa$3maBO8;&f{JHF=NQkOnfbE(|BKTl-@lysat1`&*k7B-*HB!QR<&$@y#4*}
zd+)OgIB_VpP@70$e5k3AV)4B0b-g9WRe$Fc(Pcm1+_ta#{rOY&{$)1CEli3nEt@{B
ze{cTn+qZw|2mX`?guKa46F)HD>1$*8pZWh+p4#l9A>h=}&;9>f`Gub^pT#olTefWH
zs@=OAR!(@Ddg7AMf?x5BHQ(MY-G24SLH5c+Ck{<~uBo6h=UlWY^MSu}-@d(i&i{T(
znbo`fPq|#5Xum$LQ1q$y*3W8&D;391EK2+!B5;X)^JH`V_<t3@FSYs01-LGcJA7~U
zbcU;s8RRU#AIJ}@IK;u~#IaGxBbj?{R4%KOgtqwV=hyGtJ9KaMbe2`e8EU@XVcsA4
zBvM&~Ly=|f+4+a=>=pXgU)O2j`6N(H(5X?tX~DA2d0z}|I_KDYKaek0@eO2NXN!90
zos%j~8Ujuy3S63m1b17%Rypnhl9CKn@)N530W$R&r;-SVV#*bjB5_r)QjdyBCkz?*
zH(GZGD6~xQ&{R<n^!um^Qlrz^q25{0rN7XDBhp0j$OM-@#)=-#mH>s80HukXnr2UC
za4dA-Sm?6ouipugc!<N?{FKUXdM;UV#~Y;1pw;E)Tn8b`pKfn!I@M=@buRWe;;GcX
z;l>1zbtfRSL2J)nYaKHc5e~&ECzg1&9Bl(dV~^fkhjOvWAWIUtB(*yW?7@C_o6PrJ
zSW$#SQRL7Pp^eLboI3nm?gvvhFDI)LhpJF-o{(X`=9eeZTe+bI`kB0}SpGxn%fw>-
znnvc=Ad@v!s)RE4S9-ot`E(T_dP&k#sbQ9n5?Dpb6_rhIRVF^>s(o>XIX+>d;z9=w
zMP<(;k`eV&z+s@1#U-h{wc?mdpY|6-FgMI9*ZT4hWG2K9nkg^zStg%|EfZP@i3!V2
z3+C5v&VeGqM={y61>ztc6TcZQbCR4PA<sS4N$tpF7mK6QK_O<5?CD~m297I*$1a{q
z{KlOETrCq^Jwy#lt6P0OKCJ`0xJN~B`Hcy`!E!tO6&K6aOi?-1RSHTi4|}G%ylMP;
zzBpknD9{U4n%vLXeyHSIBMdUfN3^lIUrBJ|w&O>gpUeHQfrO-_VrR@QmKjw(jqWqS
zE*JJL>3oy3@b9y>10|C=!0GD5BCo_XDxaRUO?Hv9`+nfLB{&V~teAUjYR7>qyG1N2
z-!-1!4324ojV{*P)H`LuA3TxXY95i}p&{TT=Es#RG=XFJ4<W7=0i#VWN92TmYO!DA
zWOX``bW|FWo@CU0l7qGGi^Pjb3xOOv^$-g@d8lo25VAaJ2u>8oJeA5ffo(cdzsAE!
zDQx4yM=Ib1o_31!vaIn-HNnq2w6250om;o{qe{`qi6<66|Hlfpk3-T|iRrS<G0lCT
zxcAIxng3OFVlp@-r2JbmAx!8YIE+))O>*L5nRMc?4=DOl8kZV4f@DNE6qju86`$9V
z;rYZb^qo2=$WQF>Jo4&X(ndvACyqoeNz2ZHgt<(LEfY9Z7FDz`zXoNmf6CW5?4>=G
zzKO)wzWDmv{&<ozSBrp?o1fBg2Af~M{?1Pp0;RbWl}?T;Hhd2jUVr~z<~XS6;^@?x
znENJn7t4xDo&$&1-~VrU?%W~=4#nWaB{vgp&ah0&%TIiNNwslSIitl-kU1b%oLIE=
z?xZV?;tHKPmYz>68YVwCJHPS91dy9Hs(j*LzoseR#331~l+@o@GZpN>7K6?Jg%*KH
zSwe-&er$p$oj94Jn-`>Vrbg$SCo?!$oj4R9t4y@+y?V7>|KIHYQ$16To%k$rhVR#*
z`|D@)p4)F^J}bYO@yX{^^Xji2=<oh!Kj}aF<I1wV>~cRITfN%&`0q_c|8T~P^z#*O
zp9IBO|9PXTIq}@Tze?GykMm<Q?(sixoF~fFazwvhXv^0LF}=x~wqLrCmA&?I<HXaM
z_uCA_412CzNPp;Zv{<6>U;FJwm3EGvzrPdTy`7nF6TLR>PqfS<&xH;icgi!*|9@q*
zr8MZOP1oL4Prj93I}>Yi#{7HNTp5+={x9<CQYuwS-z<r{zoA~-Vf|uPp+h(J%SXgr
z^cMdfYF?K!ZQ-50-AsxfPd#qkA1{(?%VPT8<ulWgSXPF^5zc|@7i8_bsinxSyj^7L
z!!u6G^F<qZeny^U+|eh0Naac4)zAZ<x+-5D+IO}){qt*ssk80RJ^Fnjc*4^sAGKdE
zT6>)7(}u_PM|QY*A3Ioc_sxMfZ_Zx0I-Rw0!b^oqQ;#35z1p4rSyV$=j@9YWH21)U
z$<u7kb#F6&%XaBEt8#fjS5d*rw6X=ps)Czl@b+EKEls@t?5O<n(*fHYML0U6<wP0f
zT4%XmwBavV^*8?!tJ^!NDNkqB<W>khl(jnP@@d1awHp01+mjU}oxVE0;J&ZR`C-Fc
zn+CtF`ThL|q@J@X>|_pl^CIYOP4Rk`4i|ahm%mefv`vVZ+^)0!#j)1f+0t*eB{3;-
zay)5$Kdt%Ghf_%jxAhE)*aIgqdz)V7@m3N!w4>?Dl9X99mKcADtDbbc%{F13kDLfc
zr}*yMuLM_A?up+X>Tj%Q`?mE7-y)6I-d{_OyJ#%e;neRw`S{Y&+N-^re;Q3@XLWis
z-91o*zg4y0@K(WLiw+?^otw!@$tEA(>{BW<(JTA3qayKsw0QlT^#+vz3MzG#Yq&p1
zB*;m-${GETS1o67U88SmWNzJE6vc65R&HwCt{v;EI+Nu@I6NKX^#6D{F7W#_q3)B!
z(^JLMD}*GDG3qXw^hM;pN~^=oGs|q&F-;3!YuGRU{506*wv~TA?OJimwJD#+M{eTm
zz_&Xui$89dvSrhij+h*a&76&!JZ%K|eg_Ih%V|Gb+PylodfQaP{qrrI8hunN|7;3Y
zsaP}H#$=h>hH9B=M~9QL6HKF(nkS!F6mueH!z53+-**^4pXC+f3{p^;_#{y9sPf-#
zmgAzH|L+GMFyQkDJguxUQRUsQ^bHS}WiOhsc(#q1-n-j(EM`9rxy#zv+OpErh2N?E
zgHNQ%o^#0+k98XBc{6XS?D5_HKdOg$Q(Dr>#IWyOwny7$W^3LoIv@4v#*Q89t6t{x
z1((ir@OX3fA^$wh&RyBjy@sLN(`DCu=&lS9)+rOx)cq-*aHUY#KP>O5<%IbYpB!Ad
zON66SdiU*Dfz@Y{*I4ymcvSp0nXg(|CA%_*w@rLvspgEOOLen-THc?ZqV)2PO20QK
z>^wc>&i^e6Y*-jv+J5H+?;m5OHBn0c?wj|NikwN>m$&V>q_F3%kKP}CST&zMoc_VG
zdeZS|us%1r*G#n{w<N7AdG=0l{^zvl?<9%WA5<o;5RzQn_{ign%u<E-A$M!HdZ!ob
z2rY5&`15H)H_Jnxt9R%4h&;;Qb=Q8iJD1|gBe#2>zWcoPRZYHxsK}Y1TiNMnJ)#tC
zm){Id+_$sPES$?*ZcqFIhZ89$7b_XaGdD3mn{}nsR%Kd6UD#E9Y38Igi!;<G{`fBJ
zSrlJ7N$!!qoOIZygx`*h6J|UO6nSy@?i0cAxyKu_tyNk*{J9Jj1MdImtf)J+;z8Et
z83(s+PTIQmQ)SO@$<HF3aaOan1LaznI^*S18||3B-1jV)Wc_wS-kN<251WfEV!T|E
zs(kl7Z?ddww9>~n>XRydMKrwJeR*0hM~_N3%gy_)jRNy+`rPN|n(kSr+~soUxRcFs
zvHlzv=FL0)Ogyq8;j9iD`?bo6=RJ;e?QguXU4GM^Q!971FnI>Zxi-#wBl}t6>m&L7
z)u(0efABaw-Kj=s<Noq`w>`;9*A7%2^i(P?N<V3R*KqRjRXYn4rv{gDvQFx;dAEwA
z<jG%~MW<cZmk7MR@A~F{`(1C<e<qi-1-~wz8rRvevCnk#jQdjAJ}SMZ%9bs1Y0Y|b
zn?K^@;%y(5zJFE_@en$>I6<}l+zr>J$DB*Ubq=muJMYw&2j7#pm(O=`;niH6Aa-fS
zquma#3(h(Sy;AG1&5yO2t-X3>jyeacQ1Pb-@frP&)?wR2tBtQ7FK-bvle?53HtjXr
zVM&|!#w*_Gu;)AUbXjj$ck`xyisQByzP~H0cdcFX#IS0jLN%ABfRRn5$X%s_*H|5Q
zTTNZ<zGQzym5S2d_j`<)z5i_9Xj9L)+FCnH$aCrIE|pTbxp}L5xqqAK7p>A;=pf`N
zC))Va!@M%2TKb}h>!r)3Pc!`vsXQv~WaPN=%aQZUjZa=KHJ&PaZWwI6zQ(xj56>fi
zxvRb6#jjVMXfAAJik$J=x>n#>arso1Uj|of-g(Of@3Z)Iwf%<*(-%#XkbajxE00X=
zxFMvOe0JVk(aSSBmVG>y`qFvI$>$IKuI^3H^*_W|?9_Oq_N?vy`=_?q^eqydWb-R_
zg{G;Lv#Qcwp^tvwOY4`XuUhY~)E4Nt(fa6h$&SQdGdyorEPoYuXM5$xqZ5=Im$5iC
z`n1`GeiziPoHO0y(~L<Ti#%QKMn>30-tcr`@^oqPT+(E^IYC@{+e|y7gxMeU>ij1@
zaN}`pD&b{ys(PBpJ7aZa`jaPysuMfD+_ye5sU!W~wFyp#=N)<U-)KpbQ@U>a`z?uO
zw=&<T8$Dhdye#T^-lrWIzE@KxDlBw(H2wI;t^-@XeV!2_m#E~qBJkpY<WAd%*QK4<
zyj65MJrp`uNQjDT*|Ftnrt@NNm8ES{Rx^Cx|Kj`m^5w7MstUb+R~oDfP$-^f^G3<>
z^Ng#xTW8*B6<O|MbLfNI<=v}7O*#+AM!fe``gUL1U`bDt*tA!RJyQ-|a&MCgPo5vG
zvZHe2`8v6rC!QgnY*=e2A3u37Z}q%q=9hWhKiuTnb+r9a?zE+^xn09GviRq59Qj=P
z-6QL6Q%6WZNMu&YtLp3qiJg0nX*5~hXm;7WA!|d#$>hwV(JC2dj(Ds1C>T$%>H7C%
z;-5M>Pen!6md9#>6DNG#Jm)=c$CQoBT9>;0S2-k>wo3e0UABFH@4JxLxZf>Io(h#V
zOs0psS<<&ShWKa+e${7wFDd-d<H!WAuBe!-FMMmVea_b~=Cvl*|C{&MM(@q-I~9h{
z{@R>yI;^J?;o9`;PyBS&Dxs`CgM@XBYL<sRRW8W6zdE-`+IZrIcWdq?@APz;E;aX9
zoS?wTr)R%BnRxX0%%bn1=9O>O=gl!)<bU<*(YSy~28}NNZS4PFUwZMWVRfcTk<iBf
zVTV*iOgkf(U#G3Q{diO2tI0Os^5bd@cdp!J^H$`<p`_C2m&%V%AHHpq{@bfas!~>C
zqMdY5tB1-ChQ;|lOBS)}KHFxMtXn-P?0V?2OYS|o8z<Ym*}C?W!PL`}1-Mk=DtEXx
z9ls#6_)v|TYtsdlMFCe2N(U|cFB1RJ)8(z^zMu<E`|iE$ndDMoZT+(HlS=<~y?JLh
zdrSyW;8f`^-R`lWQtn00u{#y1;VMEg{HHI+Ud?~IM#IzPP+EHEk_AU=j;I(t`#GyU
z^+~d){N23Oh2KASSaU9|%5iF3GR21L?vp}Col2Ff8&U#Q{8FpluWMWE&9bj|MUxSy
z%0xz?nZ}hq3m0zDIum}zTl`yooXza_Vw`F}VqP$FrCglUzS-vF+`CUEx-l;fp62=f
zar?E-9-WB?3hy2jZ%$SUx{|cdFsfXvQ_Z3N(+-K^i3*A$9Cr-)v+eumJ(-wRbYV)1
z&|AYp+m7|>2$o-nt^F@{Yi{T?)6Em|e@s5X@;o8#((#t<AKx|nZ2dc1I{ouxQO-@j
zy8;vzo?M(G&MEhHzRj{L5591yu1YxeckjI`ipi>iUqjch3-^|Ht-o<M(#^Fis$dd_
z^x~5re6BvNlWSq}43hKvWTAiY>BMC&#{_*2$tp9e3MJYL_7^;Oe=f?HyE0q!e#q|4
zK^|<K%lUad<$kY{PXGLLOZe5LvrpXNK2r0p;$ob{#~T?ZlOx>E%iKHfCMEnbSp8DE
zy`W>o#)W~72X&qu*EYMtddGiB{HGlo{;%!UKV}j--AzHHBV6vd7Sm@#Z@GLQh5n;L
zo)tnH({g!bgkNqioAy0@cd1fR$dUvxcORu$SO2<P@|K^cbnsws=}vi@g$~F3<XFqq
z9k<`sI(MwTPQT-V)x;Idum6^ru_(+t`e3)G%N!M>7^PL*|KdD1cx<p)tK|5r;mlfx
zGtc|hMxR`KdHHQEzs@tSr<yp<^>{I(Lw#yTg{qR9Ygd}#eF+W0vp>4_X`b0r^x8&E
zQ)r1G2dj}mrO0n_PPNi-tG818Dj%ZM)(B5bJUvfAW8w?X6hZwf2SV;HJlT9VyoE{9
zvhv37N**mCpIzC>VXl|{SN5A(YikI)?*Hq;rXu+Nz%Qlc{DS6{IWc#__s(eiYx9En
zk%9Q{#v8vYr|jCiR<9@G`NSi#`8Q^5kDTJR#B9y;<8y7DWI9*;@L#&sb{*Tc<Gerb
z{r)}8<(khE=@r|%8J7J2wnK8F`=#=V-`||4O)q_X=-=5Bcf*2XSZ5eCo|)$`@4sJU
zg58^UDi&dLkAKlVblF$R>RP^=x02Nszaz6<j`%6vOFDc(WzrHGr}cfS6}(Sh&RlbJ
z|KtZpBky`nC|cyOspsdNxuUB*HWbU8HBZ^Ja@XY*R`cf`?~YS%TDW<0Ra#c%|2KNo
zn@o<?DSg!KPTKd_e8ZoQ=Y5tIZkg1+dA@s-Ym@T6HkWX##A=_$B|7}?>*O5L#lC+I
zP}x4Eb6L?s{}^>4A0>t(Gap4qtWw{{kUT$Z<L7h!Nne%aeRppBrtAOd+R@F*?^G;K
zm;dq67V$WBa`Eo@?z^RmXU|;9UtCorq1w6P`OjW!Yi*OxifOhR)6#WSchC2@F)=K^
zsq(?!h`R6932WjfM<q{x5ps9S?Q*V0rjsjdxZcS<JIlS-n)8^JU>r-}`SU75AFp@*
z_%3XCDo@BW*Iw}R1AWKup%b*lHkN#D56-N$_1*+3+rop2kG!7T{Je#!bBc{z@$y&S
zZnv3T%WhWUw3hF@-uYv)$3-2zuQ&Zw{ylwgw*1aByUwfqf08!wu5WqsJW=mPo!k^j
z)=5n^UCfi+7YQx&__g=n$yY9a?(^T0yS{<#n)5f|&YcHOO~0`7o7?w(?f2F8DW#=<
z+VW1kuV6c4ZXS20cl&(zLob`}hNmoJO;Yc7Z&T|(qPnR=N%BwCV{aviX)ZkVLY~jA
zmfia%ePL?T@8@?y!;_z$NPkqX@N)C~<BPWLox;mH=}!7<#~J5kb#J>i9amQEzjEt$
zrTEE=_4#k+^0KQ6e(_dvzIIM(_d%hOb^Cb*&M!@ri1_<6UgG^)xepQlR_@yQarrBw
zS^=&l=hHv_+^^MHwd~b4^%<*X-*{dncm0sm->ojUZVD)?2_ByBvL?y7bkd0zzdLy{
z@05H^o56nX`seSb?LW&&u&?V}uK3nh?(WK6In|u3Lj2E)-L)6}>exG3;X+@ceg6^t
z3z1Vi_nfc1FvH_PTKyA4!SVxlZ{6Im;ONp9zdK!a7@P6mW4jyv^5^dxX~{=t+B|!h
zH$StL$<ss5)x2`cx$0eOlYSgndTn3&;ymTI&#xM*E&6AFibLpQf9DGQ&JITBHC7WJ
zda5Kjr^fx)Y*`;GW+a{6({?dj<ww#24}+<#oHuG+pT#JYecI5)5}kNA%&^uYIRE-`
z)hRD+*0J4NweRyXB~?LQ6+st&r9kPeOSdm9{?NAX|DI2q-#5o>HA_=|HGkFZU=@ka
zKe!aRH3dHJT+AA1%Q@rt1u2sRxyMJ1x4sUT_$NrsYU^~h7kg*Q>vv|nXsDX6vgolM
zYrfns_6t{??$@7>-P`sweoyHh?zPd0d7_+t$%#jJ)>gYR1}S)|^>+l9Cg&&^+vZGM
zyVS?cw!f>O^F=`Cim5g>|Jxc|m`=|V*}CIO?9uWiY^NN4|GLd^lJ|!o^KZLP-FuB$
z*-X0<U7H*$&v^e@AfPcp^VH%D_5R1rlij&8Tdq5<d9J&~FJ*<xmA}Umv_7f{a{7Fn
z+O=lpG?y)x`|QltYPToG6(7DZKf3Py`9mvYdhRsv=?eM0-aKvVM*dT6Or3#pse$L~
z<dS`__Woq{oOq;eo|3v?x_9h)3#O)zwfBQ_I^NYO392T=eb#)DRr%ulQ!ZDXzjw~_
z+b^hJmAA*5bJIsv5s!YGcc0F0liF@NJC512kS8}-S!E*Q?@kvlrCWDz7PP2b+W9Yd
zV*K%a^Z!>nOMg8OayJN+wRf%CWcJNbW5SFlfg+}JSMDmCdwj|Kdo58lT`Jo?3(Rv7
zSta;QSaOETmh_v#rOSTsP3X|uFa7MGw37Y*r{DMSyDgNt_lQqRXwIqMOK(>5NOduF
zI?9O#ehyqwxu<+po{ap47P}>%LrUzHUcWQj<;r{)w+SBQHqp-0Z9n>ce5)+@`PP5N
zT~R%M{uF0tM--XO;_{RO4+Vv1Gk3EXTNRw!Ha8_s;aW)SXT|JQdHeHL?@*caL2k}x
zEn(lsUwk*r@9<Z8^}qW5CUt|9yZ8K6LX_CfY`_01{&&`k(By=#hvhk=3?|rodb;nV
z@?z%aKJqG`nKn*7p1f}^i?F|7W{gL%!sL*6XP)1k6;dkk4;OxazV^R(^4+<QUQa*q
zsrsU2$>S5}>%RV#ZxHc*H~F34RbPclgIWQuBeI`60#s&9aS7Vxl(rxyx$6Ap?+1_F
z_jjq*y7t@a$h&hz-MqJ%94=ax*j#$EdLH-kfFq~QN8gS3$N2eA#qY`^cQ+Z=PN)>>
zm}}#9|Ge@y7Q@ysTW-^a;CjBl@+Esz7Cn3<p)yg%<=Wf7E^m?!pR>-B{w=z;?uoeV
z-LTbL7cxJS%(K6_ZF1oW_5TNJ1b+S5?!9Q)*2xL454Dua30*ht{um*=XU+vN)5c^!
z+gCmR`Z{8L-g&qj&3k!-SK->k+{hJeHShn-`^%8zu%yW}xR`g#z2G-8KMvfGZ+iND
zkNB~J&Q%}X!?VghO}KG?cBQ9$3ulT>+>bKP5P!cg6$|&wlsj7{TvT2E$Y%fH$sK)L
zG(!zKJCeni+hnTZFYfs{KjzrJPdC=<g=;LTeX?%u*=tpn6IbriJn3w_gUcY1snM?7
zU2>USPA=P5whJqdO;LK}$6vU7wrl#KV@`kCH%#hao5Z1{DtN<QKg;3sdp0ij#C7-C
zbn6^81ntUNRIMQ>^K0tj@ag$~C9>@6M5XUniOs%W!T8p^j3GE(L9*}Wy5cv@D!Kx;
zzt6MYw*RxgVS&vfAqSyeJJ(6QDKDmWRH-ygR9&?5^}_7^Oq+{X(ta9j2<iKtDb;k<
zi?Q9V%9yYAOWKxyAI0i^ZP$Nz_WhoQSM{}w)z7yZ&$Z(?nZ|VU=fn1fbM`KuYaHYX
zmv^WfGis_{Kk3Smht`|iB!oG=jxbES$G`X2<QG%&o5k`P^hDiPf1c6s^pn)RvM0_v
z-X(8e@U?$?;NRWh8$Mpyyu$9Q^P9bQKB!ODJl*`VIVDWt?Xnt;_0p>!ewxABX)x8t
zS?b<n?-P6>wW^bLi#@xweo^o~-n56!7iLU8_TuD`1715DoaJ}3w%e6)&zgVV>8rbb
z^5d218-M=zobmsQk-b|B)63f|G52n~oiTe>bR$n*_2VfXlXz`|J-w>e@%?S-h~S={
z@vhlm{XZ6~r~Zdxb~Db>=ec)2xogdo>jf_sZ+~zreh25;^Y0Trx9dN?6nuWsx3&A7
zUoVaNo@!hxz{O&4JN#de^{$m&rO7AXIZiMUa*^sxoS3p`W`)dlT_Y}oZuJYAcey^l
z`tCgawDDItk8=mSGI_7pz7dQ0elhyOpBUz=o7X?Ow0-_@fB9OrxogwRo;rHmcy77z
znS<!r{T08Tv%Ixu*&y_HqRO#!)z2kf&wV76ZQ}khUpTaU0ec>^SNlTm)+@(ME**L^
z<3{!ykQ);puY4X*f67mddy1XNr}=eXd)L=Bo;6}+Vz}}9*m;Sx>hBf{{>vEH^ChU>
z+B_+;YUWD2b)7MBrthv5M{QriEO-3||H?bOGJjOIe6UMcYIHr*e=qMktyx!fubkW0
z)thhI?;T$)w(EA^^PKcjej$ckTfVs0fAjx;*xvh*@a&RTQa9Ig#xIgxkehY-PyPSY
zP05X&@e`%v_j9(^@*XxQUtB)D^UNb}-=`b<_Z5BV*JD->`n|+o^S;j0X$9A<>loed
z{Y~8S|ABSjy?6<;8D{oIj2CwB?5X0c_?9YszczH^^-j;<vret=c`h@tPxI132ep5#
zv8?-8AMh6+h`0H6AXM#lGzY_j*#F=2|8#QIDPNkK!zO*Ku;P9e=l0lYX1kB)<{y6O
z{N6F~tCqy)YpqKUoYVVoiS@zFaMAi|!4>cP-t(2ciNBiZyLHB+VyVYJW}U0v&i!Ei
z-<!<gf77{I7#QwZx+r{B>6_Qgv0+;DvAfs*Gv(Dj*R8mJu>9u96t~H3TCc1RoR&WD
zJub-RE7u0;#NV;9i;rHKa&J#aTewi2?93Y8#ebR@ESV!ta!)Jzv9jylhpqL6U;QWl
z;Cb$twxQiQ{b}d1^7`cZ-B&Xo+VA_M`{#K0zSi&Srxm<5l|Q0>|7+r%g16@ra=&Ul
zF`DP&!@6xfL;C#yo6kZY{!~6Vds;Dlt-txZ9h}Ci@5at@n?5V;wW-R#KcX*Y9EgkF
zSkL;pvc!)0fgxARjp(;$D{n~@U4Qps@m;C9FSjoLcpN_enCa=?AMNA57ymf2+pdK_
z?zgeV`{>hGDl=o);%wGeJX_PbJf3-5RGlXWQ|RPX8+%#PD%U)@m)Zwg3y^Z{>BOz#
zYZ~8|>upaBoweH2)|M;f8B3E7mrHO3Lu=dqGDf|v`j7kLe)IkN{M!HV-qWmi%3c`c
zRX=b4a4`FR@1s{&SN6}G!Icvmwx=ZS;~8(Yd9|`P?y!cis#QHr(7pX=&(4QJ7dI|D
zmi8kpneSDg$>F9;e(ZC@UZ3LY`R~^#a9w_uVDpmgTg&%93aEEn-nD3P*XFfW=k|S*
z{qr&V{=wVR*B^^*&pUK{{eIT=^}i#hgirQW>pk5iKmYZE-}<vEo|(Gt)tn}CgyBe4
z-@8@)uimeFr9E+r>xKm{ldru$wZ&b|*Wqhmq}2}_R;M?|+$PU_YIE$l<<zM9-d|O_
zvv+hq@0^$a_CbGKy;S|5Uzc~({nWZs_(H7a(OmgM?0oyUm)5dM2+#8gU9d0tk-E6`
zk9Rlu_Who<XxnR@ORCp`K7Gi^dbFf}fd`w1%kh{MiT9TMO!Ezh^p%&Zoa4r~|Lc+F
zld&sBPOSOqeB_>F^1n%L=ND(~=ehP=^47C}?2S4lvr}HH$K~ce?vMYi{pY0rzTWcZ
z+*SYE?e-SGIhGvWA05Al%jRE5>FY1|bz}Ed-n8sUwD+#Q9Gl-H;9mS_V$)wM+sq@C
z^=sz`&zFDx@L6<em|e56M3hdYMu@-Sa`k;bf49H#w3~M@*e;umF~MU|ywne;sQ9G2
z8>7wN|E=RwjK6#T{kpe&=QqyLZ`>m4cHHFtzW-})t&UD#cPrEESe<rOballWee-qu
z3KmAOCRFVf+kHzYoIP*-l*^niJDd+iU!N!4+97ST%rw>fGH>;<OO8^_|K~j^WUI9O
zcy@hEwEr)YV1>=%jwYS_-?x+>ySjm`IObrNvcOl)BN^H~eXovdr9EzAu0E>$c2-Qr
z9-D8@d%ye=-LsSV=Gy$}Wv{pH($8Cw&f_dATXMN&6{p4BP^pMH!B_vUE*3F<u3DS2
zmiydf8##AB|C-O*?XNPwlm;top5=7x&#7BsuU_qq7WerfIw{FzO<I+IV#$?16-$Gy
zYZBILFflKx__+3c@83J~|IL<7$mBfnv?pzEVAj>F*{hb_l)V+O<G-cZ2R)CajG12F
z?_as{kM~7`xnFSI`x$o2`d><ZzOd)s@3Z&qLwsLvv}7~d%qICcX+loS!&xULU%GJY
zTE^^YHmBYmd&s<YQAcrp+JmXT>W@F&%l1uQ)U9gCuQK-fuZv@|Z>UaZ4oTe}r>`9l
zuNl_0k5&KZ|1(GKR$t%EzrJdZ(hJ!b`MPPF?v+kGy6B+ECHG0|4^Fo6Q*Qq~@7Jk?
zZnMITgirJ=R9dL1#GR>>CA{8K)_jIX!s){ywm~ni<XLP8-{9`OrY%N9cXMO$)y>NG
z_wMiA(|mUBJE_u1=beiz7vH?0XZHH)s>_U5-<~Zxma%K)k=uK(zWp@mU0HYIwX6Uk
z&tSRFQwq{7YggD;-uZj>{{C&Vz7;QU5L@v(YgYWViWaq!Hy>^nFV0oE9{AZ~X{>e4
zhDfubtOEtsop;!NEq%55+YFZ@dd$KV&+^L?elbmPm^MwZE%D7Y>8W?Cwk|7kpYnU@
zyd#Zy;?u34C06bF%yfEp`pSc+-)y<d>Lq{c<$}%ELt1CsY+Lp*a)CqIS@}g)+V?)B
zEmrB*S3GY1&F9IjRr31fuguqM?@ID-7ZU7HY>>!{{ulN5_l`Z=Wq;acyxH<`t-Pt?
zxlKlX|0gj`I_+F!nR!k6=-sNV&-xy(cDf?Pz0qaSJeNl%J_iGf&-3WMy?<=S@#rV(
zR<nA^Z?j>z8a!*Z-=_r*Gn|Bcr2jd-`LX}PTq)^Ksc$rka%ae|FUc~O%a~S_bd@{m
zwwAVwNRq~p4U;bBE${o>J6&o1zs>7o9lvhktaxR*%UY9LWk1W4Pi~v{1>aj6zIxTN
zmnGqrMjus$)`$FB#?L#oLoe#}&Id0KEB9YMk@+fFPUJ+)4dw3Z>zC)-AJlKn6u;hD
zFIg=sAsgHua_j5isbL$Rx9rqi|0C~1{GC5_Pp<#^!e3^?yDPSk@BNnO#)<LUCV3?9
zzOv@}>sKWaHy6E76FUE){quW=1itmZy7{VaNq_v~SJbNHBR|ta?p(;->iK_8Exfg5
zzA&qk+Aa>$jfo#89G41`NVEtp^`86cq1Wb=4%>ux_kLDiQxo}mP3F;^YoB-QG5?dc
zZr|Nw#_8p2`-}zWn>5MhxYeDMi@2|4`g*I@l@fQ$2kX=?{N1eI%M$GV;QgONe*cas
zT|PZk$Klrpm44$B>CSQ@Cw$lhPP%-b!j`2!>DQ;TdeR??{U$z`ShT$Eabub%%Ofdg
zCi7SKs>^?uzA0N}asS835ARlomv7vdEFZpfcivQ{L~qsGLK`P}ERHMJ$*8*A5zeBd
z7#sDEVU?|?%VPD3J>s7XYk7R+mQOg`sV+Y!Z@Va~(PgHKSH#oi9y3XdFF&_||Kq&S
z=Y>lyt-F_S@-T<j;g?LC!j6f?Uz~U2{{AgL?>27U=p+2&9DmuJvc1&_>59ich+Obh
zy1g`Q@jY4Lk4yN=@9L#b)cL9+{AJ=vg-)NHPkq+D_S*b2Drsu_W`m@-_jRANFH0Nn
zH@CJJt4~Oge!KF1Zl(5C<vn4mZ=W*B*WLcwe9i5XhkvhTn*VP}bl=0sn>V6<eb{=;
zc*myi**Esf#D(lA-Px;Nzr*^_Y$dzS84V`Vw^c*0Oy_P@O5lCByZeii!lHeP9Ji|+
zsXM22H?-aMbnedg^Yc~X-xqz<?5p0eO=Vtu!Q^(^(C;VNmm7yp=S-Q`k!cfkSO5K$
zAfLD1SL602w>(eWBI0)Yit<vww{2dlH5x9yzp=Nr;M|*)dH-WpZ#FJ4yVf7Sv+0<q
z|HlR8Gr!IIa#33}yDaQnMqaLXbZ-^!s#kAU1y$^SU*>9AfB)?+DZ|c;UA=3UeU13M
zDZaYCl3~^I3D>JPO|#LM!1}#=|H9e#rf*+eKjpshHxak!v&v8X-hQasoU`__cG>24
zXL>eGbN;^Vt<WB6@sGVRdqV!k&2Mx0I9EQVjNO;zSVZB4x94u%(%dffRm$h`7w;qM
zBtJfL3|*aZQN7>6OS;@AC2V(fNuS=0x|4@zt@`~sQtI;phi5SY#-FW1j^A2z_0=|`
z#4VxyQ*LiP{4~vQhj-2XnTd~H>I=O*b=Y<@)13Q18Ox%+&U+qp`_+SLebeguy%)X+
z{+q|DqI7oc+@$Q)h9;dQ>A9@Oos?7rr|!75^~jTnR~tWP_SAk+>vz7Y6MtNUW2R)`
zj!zrd&+v!ON);?;SFaCF<S+TM?d>h&&eH;`R|L3hxLwCBeS7_ndHM`{O7g_TxB2*A
zzp7BllfR|bGi7yP>#CP)HOeP>Y*-QC?x`d-$;GIw*xo-`XL4UHOBaj3&CmQv_s=&o
zMVh1)%If5wJ;ZW+WBApp%1_Jw7A%c4;;D9;8vW$qMCL2El27G5=5?;w+}9i%o@cCm
zz<<^K_LZ{R8O?kj?EicAso=c@AD&!vSGsiPmgd%NQl;vh7Oj(3-Mal(#I*B8!@D&s
zx6<ZrZv4FExSdlM%j@9VyJs;eex6|Fv}^4d<)_=CRbJTjWt^|wUoIB5f1UQnl331N
zvWe?W_VwP~EBig-cDU7#Zt-<4KJtrAHRh=VWtpx~VG6xpryyb4sSzYA9C+@-tQVy$
zWkP3sCMvKxEmPy|-Pl<4q;U0In>h=)vpK(Oe7bPrlK?|*?xR1~y5D`Q`*KP&CNuNN
zy48X4J7>FWd#jSPiZl4H_qH!4zY;fZ5;3?s@B3=y>zyfeca#rz+xyM2nXp&Mk*j5i
z7LV_PQ=j=}+z-f%3NGDyH*ud@U`_7M@?)X<9gFwd?3$guC+7FQyS*$nf6b>0Pj)xx
zd}CJDb#`sl*;!Sh7R&#&Zg$$W+2#>Pto7=8p^KG&a+ay|7aq6U6sID=Dkd~(<C2M|
z?)Iuz-U-N@b~MWR$aS_~PZz$kIJV+$SkpQQmIJMG<rC7^H(q-e`s1GWT9>B}<y5z`
z1WKG;dpgx2OFLY=%OG(D<Msy|qf0g9bUSwcpHiyu^~}Ca``?G>bbL8(D^~Sjk9p;s
zlUWlLggIt*P7&0t<e9`?^XWz0O)HDa1?H6&_0>y1i|OU=Z<;?R`%%*3$vbT3Tz)n8
z(d|2YYJ&cUIMgO4SbaQu?$vx3lNmWTN~i89WC|`#Hv%ninWR&~bvfIH`B~Y-1yTps
z{l2g(KU_yhi=R`C|Dy{3`=GFI|Nk%fR{Q7c{Wm;Q+-1I+9}K=D8?iqoFYju0z-1XN
z!4>DF_io<oCNF%l#b#5@+Mf9~cmCR><(_F~Qk*%-)qnDybf;VILSpMby|{d@SEv8M
zLX#b3)4ta0U;ig{zvAI1hf1G+a$j~D+*K*sdE-vr6?Rp@ubwJZLWavDHty?qscaDX
zeO~0OT-U}k{;TabiTtWf<;~ogEO+(r>$8(Oq!KqeH~qI>Wux+;dSPrbSMcqOQj?9l
zUnb@^s&`JA6<wO$wccH+Plv^^lgD4lDotqS@pY?B=0zo}de^Bgf2MAt0;|#Te=}4y
zU3Lmw^^<dRzdCo7bWHQ&^gGV#liMfT{5p{w@Z_SzG?%_#VU?3SPWmdXGN|p=Q*u%I
zJNd+p-<@Z&uZLcm-mA;6UOA`Y)4`h>pcNy}jxsVFGnv%RGqL?!^|_ZP7n^s!nKCQ-
z>#A9ao+>l!4o&Xpvs9b3%f{$Vsp+Zp8OxbBTZewXw`P5=YvY77-iNnuSg?0n=&O^=
z&QBB*qgVQ?^?SBS%u(62yL9K3J6Ts4cPHId7UWbD{HiV}n^-pOJLht_oaakkpPJUb
zYU|_&XZU9XE2s!xYOt?-vG!@%+k&N$XM8?Q*zk3Y+VtZ~JoNHaC%&DVv^jA7!7u!4
zzN-lKi*0#-EAr#nb4k}j?>{lT99*g?FiA4=$CE@={_Iun94dX(D>FWv^x3$u=-76i
zaHZ*|eX})x%zL*cLFi%~w}G37(yGU<%h#00s{Xt(=gGsFt9F~Nzi!3q6g1h*LGJDo
z!wt8!{yupaZE4~Bso?y4Rl&@>+o$JcOi^h%Aou$4WRG(7<5Q%*^t``mm-NY^b<e%O
zN9Oc`Qe|0UPm!*auJFz^s{H-2Cfn{z?6F}I+Bn%Gf8)ZotDQd%9e<#b^j2q1(p-<O
z<}aSSbAInyTU2#lxY**EW8;L7R>5gDHLgvO9&#`49xa#@IjwN(=EKWU%O`il=DhG!
z`d9nTRKTPpK`798kF|EPqEc|O&b++U$0uH{o<E}*RA3xY>bB4eIX}a_W#ukSRsP+#
zwRZe{yW~^H@@~~zjdxTYX_{S`XDIl3^>*vk=A9Yy->uQ`Oc6U~JIO;H)Wi9CbRs*e
zQ{a+K>vtIjO-%4Q$$U*|YvknP7OV^BsU-P+d^>Y)tKHp5t4lLZEKo68|5#4&>B0cp
z);aScTJp5HQjSUgEnG68P2-nZzoKiCQR@@IZ@-RDb&rY-b5>1y(i6M;hQJGB>($Gx
zSI>4?vu?HFD;J|O-X(W(pQ`llls8!J@XW?CCHG$K6(z^-{8H0w1hPK`DE!b-o8~?%
zHmuoBc+z*3iEfMK_LYfM9e25-Tk=~~uy>A&*wWXh>_f%X=goKwT9M`XzgcvvN5Jm8
z+DeHhogc<Jm*07k=<Cg!FZ9e&?p@{i>Mq{xRo{g<y_LEWH*XgBU8l79PN}GlP)(9w
zH_PG!vYehqp9&@&-?evg!U@q^vTr#iv%AHInycJgVLrQcMv_eB5zEP@ew7ufNlb3V
zx9{C7IJ<xILMDN&=XSp=ZMoqjeqO%DT3X5A?#=z16EA%_;dJPmO22WpQRsBemL<CU
z^%E03{?E!kvpD`!hZaBY<aQ<l+wv>jiv12k6CFK|Y!}({eTLwqj-)eI6Q?{CO#k&k
zl=In_d9E^_7dUuGom^bCBk8@0M3L#QwCJM~ZB%^Zntcsd**sdausiwXUk}m$mHrzO
z5AUDf(YEf+>boU|EBsVe8cw`9;do~DwyQO7o;x;9@KMkez7%L1lyyB{!`!=L)2h5P
z$#PP2f&`pDUFccb7bqQ~EI;EBcPOvV$FJ+>F16h)^XrvMTTds?mbX3cGNpr5EX;pb
z1}LQL5*HV_^y)yy>}PA2+Lo_cx8D1tdFb}|Pak})PUSf1Jm+6cYnQFXo9v}3ZBG_%
zG^@%y4K7W}q!X>B7oFGIcT0O>Vfe?n7uT(J)dQ71N{2rDd%p7X3i)+gyqIJ!xn<W^
z_DryO5dERxM_PdL$-_IF&jn1sy2Sj0n;pL)PiADP(${-df;>0<U5?-LPdWB9QJnM9
zkwu^~Bt<Eb#rxQw;L>Q<_)iW(Cl7l`Ukg#>iIphKsXS82muxlfSOG(N`r&KS_RjD4
zwlaeMu7AoI6(i<P2Od1x_yJUm7)??+lydR&!Ix_Nw;p<}mQm~9G0A;<pYowwVUN}N
zS;A(Won*7i^T^bWy<P7=s2F{#Q<`;1Yai#F`wkNy)OJUycr;FEY?3G!?3(aIG4az3
zlgbsrsTRllnkGJ-IKkr4t4&?Oa*UlG*{ko@DZN-5;XYq;-``0cefHMbvtq;d{;o8r
z<Y{Vc*{sGAcvbV>SxH^to!e%)pL+Z2;n%R8>1UKb<*fej<l()8Z0XUes{K3e&tJ2o
zPm$g5leYom;;mbQ*#e>?wZDl-J1wpLwAGq>_3|4RDmRDJPJX|plXqT{^VNm<XH0gl
z6DzdmR}+}D%BE-A=K1ccEh6L=tMZF}l}*26dq(-B^5K=ryp=q@O3PlpUl-)+&mJ1H
zdGj(2e%|-ns_y6sJ;~ALYB{3AUmbckY~}qK?n|C1-b-Q<){;6c_ssd}#DMskIgdQN
z<rq6Hl0gf*cifG%72=#Z_o=jSr_#g;-xFtP|N6D;)h3S#Oo}H}`u7EwDy#8#Tt56T
zb57EY!!;{xghbq*1s#`Q|D+<A-L-xd-xQDiDvKVg3eJ6D+o4+-@w;*ZNGqo*f41kh
ziIdw+mpzof-jS?+aN#G9f^YtF9tX^7Y?k<`sxaMn*1o?kZ7LHF2z`{;?x*r}-RcWZ
zCO*FvCc@$AC0F*T;fK^qyOW2l=Pg{U<1bVQHXuUgvl{<%0Y9P4yxpEEQRUP4Zr|MS
z|5oH>sqY*&&o?ta6XEb&aQjqb==B%s{HtFY>~5{wXR_#G7t3Om{sZ?~Z9jSJxTla;
znBpnd_)cf>&Zn_Q%X{9f;rQLDacQma#d}W*r7HjGEPoZGpfdAGpmj{<G}i~GoMSy#
z?AM%85_(tTq%w=Ar_4-mS<VxS-IZ?NOFCT8xgy<p!^7$|$2WJqSIKX+uhi&f(G-~E
z^XbOxch4VAk-hcOiOKhZ%kRpbxi%t`KDg~`E;F1{^~%Eol*sn0JnE_K**xFl!?)xq
zh7&z^t*h#0u>@5$5%1Q8+OB>z?faU)HZHs^%bMPmpH%H<)a}rkG*5l8VYjDo*@BGU
zofVTj7MFLeFW(yXKj_2X$sK+ZT#_cSce5n7Ont$v(BV-oRDEyduCtSt|9yXXga3|I
z>z1F`ng6LEEt)mXdD3Pb(?*xcU-ece3K=qH=DnZ70P6J#`3SOXKRz|@?#|!WEdO5)
z-uUj1!}mG-t0VdyLEZS@5h@;yI_m^{diPG;{4}w*a*g@vro>MkY3h!g*)M}8xi33v
zs9MRQlJq9;?G%v}>(+>REv#N+EyNl5%|-dmM!ni^5&fE=RANxMfxG*N9{*pLow55j
zUee*6c6`B8Pq!JXtTWCy`ySxQH=H!#xJtLzvT0T&zs|cCy_?aodvjtze<^2Q=ZeiD
zHTy$rCg#lb*x)hY(*g$%rIUy6d0zNu<6&RPGc#8-RsG<GClB*%uGDYbyW{D^B^GIg
zdy*f#&o=Mu2+p~Y(efr|h3)Uo6@J^wuI>0|@b@Gn<v7XBU$N!2`MTT5a*Zeb4nH+?
z{IucsF4xC*57$IJI*~Lzbk3&_W$CPwIwW;Fd-l!m=-WBJ<7TwV2~fq>x%c*}OY7v;
znRMrU`tahAHvdG<$?eMA`O23Tb9tIns_6FVltmuZ?Qd{dlXTckImKRZv-NKM3v)$H
zJ7*-nQ%@>VKN#{*;TO{><#g5lTMxgcxwgeDytKJCC&BYu-O0oIgnR97b9J6@7G5iS
z@)_e}i!&Rf>(UeUzI>-1RPbH+rC#1+o@L$9A0+q9=h(IRZOE>>@yhSiA9>29TY%~|
zl_@rPYqw;#Uyok;?~}m)(k%TZ(~50VtClRdm>#V<!A9+F;lxiJKPo?oN=JFRR3CFO
zIdrb=E_<l#dcPxI#5kYrFi>K367rPW9(wDl^xBoX+)g~XlUK!Y;_%J~N7uTQyCmNF
z^kU{7(<O&u)NdcQd78J%W!LH?yM$SvI6RJI<h`9zUd?uGuI9dL5!0h{BqR#Tn^%-y
z{_QBl`A+qbzR;68&@y+W%9^CTvAYZRMjEkP+U?`9J^Pe?|2tRxbCcfuJ!WHY>Q>lC
z)&7|x?q>|sq&|J{JhFlPnso0jTZT*aof!c<w`y~)UQoaC`JYwOtC9;Z->FpSfB)<u
z-q)zpW^=}W-S*Y({5O*2-kDv0m0edQU6cRmzy!no!0!h0bt*Hws&53!T&#=#<Smfx
zvxFt1bZLN&<J=_Y!t`{;M#G60_!~YmZawZ=)x|4Yx#eP}DI+T=iwl`wG6Ie9rYw!!
z{n26bn`w;f?Axk&1m0|Wb;{YMHhAHu2^`$YehU1)wV#7eDZhHZ`}Dk=SUtv|cukS6
z00}O4rE@cO9-8?l_wEr7^Ow6$+g0aVG|7iG&dOD<oa5$ak>J-j!Qy?#+|W|{o%hz=
ztGFQdS>WX|zK`2q-u1p*&&WRcxI<;a44Ypae$Qg047-)Pp<yk;xOS~bS8ByKnd0fT
zh4(_MzfIj`uKUVg>0EC2-|t+kK9~LEs-!AEzf+g1k$!EXAs{5~c3R5d|CPqFx5t=w
zi+<l0|JK^^>W3qOJ3e=tSF(f}N5~Z`_Vz66Tk_<h_C#>cGP`w=l)(|@iFr$RR@qg*
zij7(QJ#=+c@|S%pcTHhubxNArelhIt6-M*Qj^c$?s+absemZxc=*dDk&z{LPDnXxS
zXlJIFl&XL-!x1l~6>CMfQq`j~HgI2!3%NLR<I`oyAM>kMrm@f6tL9S3Re9(4k>=|g
zZ+9^%ZdB{<o3+1WLG13Av(i}oO50YYUx@j&#pPh*mjkEN{-#AIZpvG|b28gg!AYPM
z08R^6d|0!6*}aW|{udJsIyDxQZJ)YgURz`QavP&d&pwEp3IFtCDf8P`zSrNdg<btB
z#+G${`sR?%%c)QO<(QYf>f1Sa!PYn1&n>q}nzv2#`NK<7+QXInHa^T-92oq*C-%IM
zxvg;7-p!R~`K;E)ie5dQ_+qdBpM#~~)%|x)KEBL;d$xJ}<uwu6(XDIEpZ+@d{pnwK
zPP5<5liZKko~fPs@UxYxedEu4g<Z)~(5Ra-`)ifxvRU_>oLTa%Y`AmEmcM#6fAvE3
zc|ZQX-XST%>h#D@Zob`xb$4qn?3bLfzm{MB&gWSwJ4`&++n5A>`tYvu?cVKkPXAih
zvMu$z*Q*ZIoeqjW_e^!!R_=Ay?XIrb8@(-^c}sWQU7LE^IdRtVgcp1L{~f#?p)$kJ
zg3ajnCj<Lr)&9~iYYyaz#_hB|{4Mk7-$fVag_iF=xwyPnr?vQFzxnN~XHO?SbWU@X
zyQ?y({FsXu!z%5iNuCOARd&^RD-)LU7O!VzNS|=IdfuPr@`5wXoGmH}{kP6u{}Fyw
zG+rlQzQV8ipU17MCb!pTa&0|s*w$ZpW}%JIJgZF_{Oeur{B?QNYZc;~&(%2D=35k3
z@Og(XcBh(|6b-i-`E8#%*T(DIjQ0Ad-1RO^m-A{L6$E=1m_}!F=DD_nmO9VSu2j+J
z)9Ec&QSEPV=}Al8y(4;k_+I|>&>7nqg3lkgQ2*u0|8IT1abIq>#zt&T`;&HDAoR2G
z^)0K-<1fdQmxoNa7v}Nir|v7Wz`IRrA3UoUnsM*;+jEm1W=DTblzXx;js4ceH|l*e
z_#}=Onp{eEUeK^HdUg1fH&?>AjxKWG*?c)H^l$Ns%KPU3-|hz0kP|EfFJJMO%?Z!n
zwJfr&WV`V8s1o6Mo9z8J2=u!M>?`q%*u)|EIbz9^g#k0Fs|{|uF&*#w7%^+^;}<9T
zR~Dsi4l{{f`zvc>2vY%D*1gYH_Sc(FPg(BZaYttNvY*GkW>-(`)0}j^R66_WJM(h~
zcA8H;vEb)s+oX;OHf$m@&+oFi^G^Kb49mlR>aCR0Jh(L%reB)<&g9RM>@Cx*E=69k
z=+3-S9Id$cf7Xd9F}&BkzCUP7T#;F}Wb0mL<>}VjORm29|MANHdi|${+U8qqST`U2
z|F8G|$}2|J@vSfP*XwjY;XQwM@sIWKvL)FXwRS9t+F#pFIzQ8|Y}s;Dmmzz9>66#G
zb)Pt1wl=>oK9+y~-_~12dgb~d|MuJr+T!bf_^mhVtzZF(H9}u>C%stvk;Cuytm4Wu
zxuG_SA{;L#o=D#p7H?FfzMyE=%^=?C9sApyon(`|5AUw&<nhobR_QnS=BwVqq&V|Q
zdEWn9&%E^N%|p(a%>JkHNUu`$!ym`Lo`*NKK40Io^6-3{61^vmnF3CY0u29u8P{Kn
zFe>`a_)q1Lw{l+Ahnh_PA1^#*>nhLGeOh3_vCzTeze>?9Yc-~RnLUd*;$1eHm!4_1
zx#8=|!RoX~O>nAlo{;3V9j65fc_y@bY<mhecf!R<9;G*DyI4u=`61fYk=$T2!;C{w
z<isA;iT?LP;?0WG7i?g=W_{7gV}(VUgWME0MG=lp&WY?FZ*Q+(t9GF}v~bnUkhdjm
zYxYSy%0y0LU+T!g>Lm2!wEKVWs{wo2SEZNDR=YL#UT)6bgUL4*9NPl5)kL-E&3==j
zwmA)_UfzwpT4w!%zf+|z^JMa?O9f#oL%g~LxKbE@UXqPpulMvrq{F#4W{ul4C$62a
z&BElv{V$LBz}7G@>@WU(eR~m`AA`l-+aa=68|EdwI^lC&<TA*WFXy{_Vtd#2=u6)G
zn~|9v%Ep?_nau)REiTWy%WL*dG%uJRuB3Fb)7kmoX&w=FMG+3kNTsWL8mk#??&a{l
zKR1gzl4Yr5p@384gpWVw-rgT~n%i&1qceFeNrw{`o%nJk%B%UhOzj^A2G&+j7sn6*
zu9hS193PHa?~B{6T69V>;(DZ*t9_w>lZJqhef*xsudmLT@qNXsnc8I=ANq*DHp~SX
z{N=|b*?x1Li^2hO^2{Df3okU`Sm<zq>BH`Fo4B)*zaqD)G@bEoxunp-q$ug7wB3GR
zYRA3hkv>Xsi9U`}S42QjF~NoB;_kPhHOpta7)>ym@(H9?*>lN5Po<ZhDW^1*HF&!~
zp6*efI7_i=y_TH+8f$IkLkYJ)>P~t-IdjBQMWefir_LDcpeHJq3|#yztfk$id9bA(
z+yaWsOVd3}(_h@RJw2~wX%~<G+ElRK-~;m>RXc_Hzc=&E-JZT!av8|!9&VF7;tIBF
z`Muw^DLTgklx`>Js7<t%%YVZ1{ie`KpN)#(h)YxHe8Td5#iJ`GlQ|#)hMjl*RotEI
z5;U<RNU9}3A?2Ik##b&y=jxO=OVa}-Kq7u0OVa0dDtD%6ry6_)h29d?OB*JiIKlk-
zX3io_9w!aZ9@>-AKi(`*Su}Is@^o;p1wZ!QZoluT+QenA;?l)trFww^BSpPaLHT#5
z0ocou-b(KNrk^g~@P`C+vEsz2xG(cuzC&Z9=F?GYzpJ0G1${`r<gElw|1&FH8uVpL
zRg1KCsyKl?x$}v)yMO50cRK&;ls0lSgRTCoH_<Kb3(K0uuY!{~T0tQxX4k1B`RnGx
z&6_nq&UIp{`Pdg6Jx_Drt#>JV?e_kbR|G}0hf>>&^4Y7Fml;PugOKO*)5XiBEv-9K
z9H9=-?YvU7==-WiAMa|4N<#xowMcEP${}!+J#$y;ofDfgBSZ+2qW*X-nPvXv-Hu5e
zZ2y!&VcGZduC-tOP0uG^EIeBdfzm?vJQtVueMZ*ZnxGI8s2BR^|FIP6ZjU7@lQxSb
z*lfLRvhn&t1JFqUowHotS;U$r^mh7`raOne?gmB3O$U}Ad#(Mi^FFw@@=cBhIPcti
z_Whkr?CD4$&wV#8Ok_I>3WsOS`u8h$B{HZ_IC8EH5~9CVg3fZj>q}*H1ILg~gpg%b
z)oF%z(%I?GU=f4PCk?x=z5<QY?>V#pl-M{J?-)+}UA62-5GZ-HxOga;GVeGZVzckI
z$=18z<jB!E%cXDLy5|kTtBTULPu=^rkaM8}$4L#pjSF=r2Jc?`@=cBh2dmSGo=?@~
z^VU6M{cg|+Drt?3;_a$-C8|%nT&ODewMNiML%`_2=abFbT#orc!awqi%BHEz@1#v3
zX-MFarrDI>=1GvW>@mYdYMx3{*>Qayu*)Yn3axy4<d%QR_LG|XK*u%oC{0|vJiF3c
zDSg?l&5EES7B(NB-ajw%3GbW>D&K@bi6G^jYEbN{qg&>4fOy?U-`^`t)6RdeFR<J!
zWZgS&kfW#f&s(V{xg-5WosyZXaK_t@djy>t1(+U`uQi+)f1Sfx`rX@?@9xP7bAT+E
zrve(;+`H}n{m{m{%NE_ewp=0Oa}P+vC10iTIV*2(-t1>SyX$ng$Zp@GQ)7j#!+qXt
zEe&t9zmxuErcE@+)jK*E;_a%I%`+}o@AQ93@~f$Pw=KHcxayHF!+(?fNB@7nwcdAd
zva84b*B#3nbgb%sE&nfIA8~shXA?hjhSY@pH6L!w++LAJgQH3WoH%B({;SVWeynER
Stq(dYmBG{1&t;ucLK6UqZfpnu

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/shazam_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/shazam_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ec412c75c51d0e6f7cfffa011d250907731c12e
GIT binary patch
literal 17783
zcmeAS@N?(olHy`uVBq!ia0y~yV0g>G!0?8Hje&t-hM=Mr0|VE(OlRi+PiJR^fTH}g
z%$!sP29M6E)7V2MpA<X({P!hk-4e}(<}Y;{zDVsV*&sNHVPVSz0T&$&C9NG|E=x4E
zcZmcR=r*ij6+C)KDdNCBh3>9RdSWiFPiqe-zu8xQulo7l^E2<<-?_8+`MDb7=P?W?
z`e)YEap|x+Xz4o(JbLTa-PO17;XFo;2@HZ7OxkMRC(3#GzOi>adwc)uM)^*+lH(O>
z*Z2SbAFq1$8Al|8!$FSY8(Y_ewD>D8ZaylW!q9Na=}**Z;~&N`og3CLXbL<OHlAZ}
ze6F#K`SI`R$%mZo2+A=t_@^1MaxzS?Q>=?p(`G#B9OiPP=lHWv0^b~ObWS~yqP&Oq
z;n9^VGmU$$FnoFZZ<6}Me9Om2EN7l!F+Be^HLQYv-GupvVm{Z)9seJe)s<1ltD7=G
znAJ|PW9kgc?N3bX4L?6(YJN8J^t|5rPhBSZ|FGHI^3L${nY5}OozIuC{5oH3bG=*f
z+~k8Cv(p`;IxOtnUr1V=_c?edYSEK|@Q=G>+APw`k|Lhiq!%eOJ&&9r^J=@mW4&+O
zE{cz4J!Xx2WV?*(|0aDsXPNmEC7vnYQ!ZX;)@R@q!#w-(L(OlW4;?l9!((1pSLe0C
zV&3FOy5G2sosSk9PdL5Jz&xVH&eP}qgGx3#1&Ip|JR4Zj8n_+U)U}hB6?`}OC&g&4
zXxRL$YmW?H@4@7mI?Ezw%)HDvqq4nt{?EUEZ+>Sz`B=OrRg9q`Ou_G4Q~y7fhU61-
zr@f5-kXg>?Fq7rMq3UTK6D*%QpZ}9E;kqtoN~2JK^Sg<525e#n*en>tA8^(<3UDxA
zOi)c|beX`#l3*IaZq`^6z){v*yFezS@$dpZ8FqC?ZVPt3#?=qhUa)>Yplrb5b|Bk;
z&#p<6qp3iEb&-?T1gj;?l8#~mvPDfciuD>SU2RDVSvuG|7nEK|TOpv;bZVh<fbAFK
z60uz@vF*PWuwM``;o950?|{Y(p3{d`ZP3f%x||q(gP*TC`~YWx)g0dE2dy6l?U1r#
zGe122!R!yV6#{j<{tFnoS|+(?F6ap?nPRQQA==#K&KV#bz`8;x#B`P5)wWkocO7{b
z9SfL$F~UUD*I9b8>;=0Q1t#2c`|dT|Yt(bUzHs)%xi73=D1Nc{W!=^kcu*qojUkVZ
zh#KQ&smcAC2d^ZYOIVliF44_Eeq+QMn=t<D?ym>kk|%G--e|pneS`0fvK;v`3G2qg
z52qBkS-8gt^mU#-@bvMig5w3^h1(02clcKb#mVM3em`{lLFJE_8qR+j|Cs)n{bQ}`
zQgFSXctf~}Bj=FRA-zL_PSOv>5?c&hr>MOWl<iDBqVh;BNps`Giq;=9e#~=G%AEAZ
z!`n0Fl3S4GOO}^XnH^td>NuOJ?4CS*vg?z%Poj#fe=`2GZFRc1bVopvhUApFQ>sp-
zo(evt&bg|klf}77ccOBj;zgap=tsROjzR91LW`EBY)k2UqIoiUQgRTBW~^qp=CUhn
zE4fx~4q^@xz7&#K{8Hg%$V=Hv?n~Eu@1Jt;<cm`kr#7U1O5r;lGfi^Yq7_jg<yzgl
zW>&RSwOT24PrW)h^@Q3<u~Tx>-%gaBK6&EiN!=4pPg*^}THjowf711-=_i(-ct6Qr
z^S=InSI5wUp^b|-#4Zecxb4uAM@p{Zu6;?3Ns}Mh9@Y0UQT1P%veec~*UNv(#Zzsk
zyr+h1%ZKb+WV6kqY~o?1Ym1s()x~W?8bf(PokFgyk_memHZveJ_-okH)zenSg~<j_
z4lfS$4$=<Wy})dhUub#I^Ht)j<3s9QCokT4DJWy<7Qb6N+Pd2M+dLjmaO*!dW!b#6
z9ck;*CO;F_T_hH(9k)7SwR3h%bgZ>Q_1v=NZ_ct!e6i|*>HGbrpIfqSUfz;-VRyyu
zPOs{!;@O+Cmu>IyUm~^Xe@p&m{oTzL!Mw|H(?Q1OIOiJ|ZW&zMaOlG{!JL%jBc@gf
zSGG%RzL95`U-<PAWAdhrs|%MEHh-MZxu<i#=Q_{slhVPtmnAOSE{k1u|LmHxL1+4A
z*cweY{BC;QG~c*BJu&0prWG4@WP5J>vgypmoUJw4sVPS{T4j{ppOqz>u=nb&!@p+t
zuI)Xr>|)xLu-j(Wwp}V~(DUYB_I^S9=EFC3-h6sv?&jY)`N{JW>o5KDXHRZVX<y8z
zKmCR7LmkUaDK`QyPkdQ%qVPiJGm+O{7GGYlxy|_U&Raicc2DEJz4X}A*42&KlU^HM
zH_uDUW7=+<H~HPScdzf>whfTGC&wyhJI`p|!+B5V`R_H^E3`*y&#}F0_g=5s{yU-O
zPtCU<JO&|>I&QYfcdlP6-<!W&{-*ww^>^OS+`n-Df&ID+d>?pkNPn;hP@f=vLx`nG
z!m+w}9?NUi<?R1AiR?D1$UJ)Kx#Ba$*G{{fRxVua)ZbReQ_Ou^^qWSEeT>LGK}E@p
zHjC_%j4ml}(kL=GWs+5U<!r{&jJ%R;9z8XsWol_jGoNp&J?%CvTK|0fz5P!4uGhL=
zYflYdw#H_^SCMG()s+!GIVo>0-dKM|A<eW{sZTIm()?Ue`rGuqX}dRc-&`!??kD)%
zbx(3l;Gfeh$6NMtF77<nsoI$wQ=Vt}J$I6SQ0S$UqJW~@PbFR{Ucp*+re<Gt{M)9V
zdF>PIecJ2wW9g5de>T7T_VV|o<E(W}Qi>`uiaBQ|xJu2-o47BdJ455L=4PeOmtS>U
zUA}3_=8DL>**w>C;y)|h*8aVG>x->xw#vRXxt1GczU5x&--Wyv+IH~DT1Ht`S}IHL
zHNTdtl;?HtOZK|Ryuf$uXD80S{VC+L*)!h!kM}N3(4VM(LjK094O_FMtE9`L&zGHk
zn|gb{_y+Dgky)bCMC$GyyjA#^{j<DR{fv8(`!}agKfA7Z_r~3oyMKSTc)wD6lHMsj
zse95Jw(pz~78CF%@&2mX>)va;R(%ux-u;b#+OO%`re#iltM@x<UPRscLu(hU3|d-L
z{qzarx5nG-v3zD>E5k&?YJX1r{rIo*z4lqFOILgICG)4t)zzsy^0@B*W&W$|DcM}v
z`>uY<Vt?)Q%IbCQ>(~9OWbR&NFz=9Z>({lvR(<DjMfT#t#|zbdE;}UrpykDp)$^0<
z-`T$`-}G4IZ^+}!xtFJ=uSn;bweXqg*~hw_y3K1ZuB}b4JzuuPa?9MGN1sf;RV?*f
z>)h$N_e&cq6yG2Ea`a8N@Y_G%HhObTZ?g_7wb><8`ZMqOrE~MN>%!N?e%)2I^Yqr&
zk4%qeKVNq{-oCoIZt>G)XU{&%d;QLMf7Jh~WyhB-|2ggL+G}yU?`_+sS}UGseScrc
z@yF+`?(W%b{+{*T>(~76_uFr%N%&y*``>AHP5yg!XKF0IW;{+l^SDVp$lq(;Dx0<c
z&OX?_%yIU?&(3nrch5V`x0Ooc-TyE4$M5SC_dK8STvh){{FVJ%Dk}4?WN-O?byK+V
z`k(8UAN%xw!oS6zdjHM7T)*qjO<OrryH8;s%6{GL`|kBV+diW9#SD&l6~7F>KE9H2
zP<^R?>HL@Tckgqlt2EsDOZ#2+OY`RZ-1~p`+0XNvJMWn>>zSKj46QsCc5)5nTRr9r
z@G@)_XLz(}r(I>(g_wMM?+5lDWAtscr5U6Ig`Y4rlv*<^7g*gK!DZAW$XCp`Vd>I}
zKf#A8yt{YCED>8`syTCBUH_MM3ucCWygvT^TYdcNS37pqaya}xzVly=+pNbP((z`_
z<wbtEEdLkPGlchZ8C(95$YWq&oSPXEQ4-<nW0jnrtCyIPpOmUskz2q30yY&^6}bhu
zsU?XD6}dTi#a0!zN?;XMdBs*BVSOb9u#%E&Tcrr!00rm#qErP_Jrg|xB|9z!1qGX;
zlr*a#7dNQ(qLegSrHqo20xNy}^73-Ma$~*xqI7*jOG`_A10#JSBi*8uG~MFLypqHU
z-MnIDh#@d@TvCgZi!uvJGV}8kK;|YU<(HP&Dk*_1Qh))7J5wt%5jN!&L%o%ppQ{g&
z(KpmH(1)81G9Dyq6_8nxniJuYnVXtdoSC0zXJl+{16GeAgRs#Cp)(R$rx~(NBpIkq
z8-0)mkURtlEwE^ii<=#njXpSBL1Api<*#&n4+De6ZBG}+kcwMxW814m%rAex@4C-?
z>IJ5j3*Ehpvy-}f`4@^izmPDH$mv?S?EAgyh6~v*_Le&}Y?$=y@|$}{UvJ%ZH+T2u
zFE*d=yj^!H!^A$*?NXMp1Dj-u$I1zct_saOLR*YZC^WOI<uN*;;HZt2Um+*K9H4O^
zo%O-|dd7RTOcnpko7wXvV(QO5c$CbxT;9N<Zu-HCw|V&PCKWtTYyRtDz9oLovEM&r
z9w*lLw7)c*FK%OhM*Q>fq761W;cT2yk~&OJ6@E_7I8gal@WI7s-Vlxrvf-LKR*s3U
z7`YB_7na!2>G1w5OVv8n3ESHou7|Q%6`zXrR7huS^qXV#ievppJG<i*vEkV-tJXdK
zne*Uhu3FZ<8UG(m7AgDm>D-5+{12Zd_M6L_?%OBO>o2L2k^St&NyZN!xmPq)e3H8|
zG5>ER*W~Yk5+@w5WY1-DV~IY_#lFSqd$!7M6Va=WlXgbfeQNxp5HGD4u;Wk5!QgZ6
zZ}&_6kKpf2=DfAVw9vlr^5^&G8T#)tzS(!mLV)AT?Gx^MuWr1!QR!Dpf?M71;(g2C
zsBd_#&*pzG^Xo4a-)zoXA?D>5=lnV1zIeJk!?)PwF();79-O*YnVQ9LMKiIMt!Q$s
z*82O6(jswx7hjE@by;YOrs1MVH<nzy8_p9pf2l#0dTZY5%iK%KT9iLZ#Jx9tu5qXO
zY;CFQ<ZhWp)eL3RRf{4!9AZnw)Vo($y%Y|A#CQ7WlJJthTd)5AvGv7uo%+YNoB#Bt
zc6<^&y0vTm>9a2dZznT6`{ZbO@?v<C*_98oW;gBL8E;<29x8f;XTc;B2JugV73WQJ
z-J;kyZyjLj6APbbFr&I>_CAZl*~W{!R#_=6-t%e4oo9a?A8Jiko^{U1<3iQ@^-X5Z
zUD6_L`N|hsXYZe@5-?ebMgLUclQO3IJBc42ZjP~=wBYNB_}Z_G3BN_zYRxmet{E;X
zxp~^8>;Lutujapg+<D?hj7a$W{gE+({Aqh;92K3Ke6+tqJ<u-ft9#`mo)UY#`*Xu*
zzAEzAAKrAW)M4^l-tc*)&o#b8ou10|_iAd=(|)B+2H~)qd6yQ>3A`tL-nf*xon<}C
z+Qkz^tAClMue|knxm3jE#(;Yo<#!G=?mlwQ<Ac`)uG+9=xySBT%{=r#S7=Qj)B6c)
zS;}G-e)u;z=JnJCy{vb5>fe|Jo3oU}N;1E`G~<(w>7VQOqgMrQTH3TD?B$tDQkQi$
z@612Cdh(`eRwpiN&6S+5w8<-bBKtP>ZKosd*&TmxP<nndkEzI(9cTIOr_3zvSmiAs
z`*uoj(2AC~m*;K|J}JnxJ<ee3UzLpP37S2>8_i=9ChQMuGQGOuauE~jp1Wr8DX-4d
z`I^nVEVt#xwjKIE4k@!O-WI&|<KhJ^Z>NX+HLcE@Zg;V0@=4{3pZ2Wq=F8u2Xsr=b
zb>>Q0s?e6qZ7ssl`D-oCE;!9~?}fJstLKzWr4HT}{Y%fiv#fo_Ztr(VC3;<S-KvJ^
z@#Rn7?);T=Hs-#s_k~S2It9w(7<umbOlDiXBtwX);)NJjd3@&Evnm<E<!8Sh-hYd&
zGf#VI(sh<okKgY3$P^mRl67ds?63t#Wqm6a9csBNV)f$enxlqMNk6tOR#p4I_L1eu
z`p!o2$Mx#x-<&Es<*f8o=&#k(mC}*BPl(^Hz9h$W{=W0A{XG-R791_^_Pd*GkWf46
zl(Od1%m-{t&*d`SfAG5CX7NDh(J^kGU11(er@DmL-Y5#39XoA7Z^D6_hbJ8tvC_LL
zrIQ_)`uJhJZKUo0^%un>wOcg&mlwLGO8>vT)co}2E^%wgh>r<pj)&x#Jze>qHFM6i
zmtk^qci2slU7GpxQLR_=w};Yfi?<inr*(d2n;KHg@iplV->P|!bsVJ|d4;dfGkURm
zn)#Wem2)QPG@Sm*{M<6nCOLDn%9Nr7M>YR$=9efrepZO%M$u$8?edE|p6>ebRP5)J
z+fNe9b5%CmoR<CYvgLXa`zOuq9r1aB_d9Lgyn5j_uRG?`(%t=4m$F40yZa-ebU2ra
zWE5)sKVZsLDY{4TOoITMuGhsq*#ZmqEx0GMcjm9>!Kq7x61K-Jxlme}<K(QhNM(9U
z)#cvT=g(=IENz@EwBYFVjo0Qay^uOn?a!IkP|Z63`wqXdSO5R@<yW?H=R4`kU7rM>
zzI+lDqA%EAX0&0|WXb($=hvn@a5<ND-u3B>ts7_Boc$qk)ZO-0#k^&2qVkqFcE4kv
z7t-HwHZtk{hDj=x3w8W<dUNJHIN*Eqd$I8%o`@Kkcy&hYlj=@;Zyjm;y<W-j^-aCy
zy5h+Vu?s>(cf7de5%$rgHzT}l@{h30l08R#xBPp}@n_%U184q<c<}|zI9Pu?PyIr9
zubI${S-oMUC8xTDkD8sjJmcPvGo1Qixyq|%U*5F#i(^RM8}YB%$5W>T9XQ*emB1g9
ze4%nBr$X?QO=4OX7CSX+R&)HR^sg2V`Fl@$?H@CNeJejbVm|ON+K07d`_rghC6k=5
ztoR(n%6C8E)$3ZJEu4N<b9bI!aQf1hpq{>N5s8hhE1m~ieq8+M9b=tj-32x#SuxLo
zpPsQh_dR^6w(`MaH?A!fmmZYI?a~ciaQDxumL}8P*SO>BbEk#XpO?7)W?rOSTIH+H
zFOM*W*UecapHTMpw(;A`D#70aPv~u5H{FA+%*J|UxROypwSJ)7oDYvp@~`ZkeSY!(
zwty6e?$>D=8=9sbDSYfY*T}LoYR!RL#UlGuR&rRpdeki#$vtJuM1PT$=GXV{)?d80
zSW9TZ(Th($MmH!*CpFBSuGa8W`3)QYb>@%WuUl_-t`5}9oRwZYH>a-i*vCya%)GZf
z9=zQBV%`r4&I|i~a~PzGhaamnRJ?t$rv0PS5*fQgdHXyXt1_|{^4V4#zH-r8G-bl7
zma2#?(^tfsPgo$zGAAqk4}0r3?n}!r3T}wj3%u{MMpgZ?cU+*#l>DP7GxGLvh;CVN
zWw)O3iO7)HjNH?!CcT*VZ)Rvzpxnp*X&-xkcCK?xEstDuVB6_`?{fG4p5=JF^;(Vf
z%E>c*-aN^6_p3B~Y(258WEJ<KZ^B#E|GDqI^Ig1mPQA2N#>crg;zaNC>l)`T2vK5r
zvu^dj_nAle5)W7<t-LX5lKPd?e?(c<*(VEaktj$$_~7d-`PqEDccwCbd(ivf%*v(e
zJ34LRnNH_B{gv4_am9Vzbr%=7+HcF?Z_NqW6~ug$H7v{N*Yi}dLa*EvXB_`6s%R>S
z-N9e|rEG(B7GG@og~Z0gzouHQ?|xi0edm<z@*&e4?yyw2+Rm+2u&SQI%&zOVSbpcp
zzc)Tjkh+!ZbYR{N=|A_bPA<=N&^{S_>gm6?A8%i@*rZl)!LH`)g5#TJ$Y%@q%-%Uw
zWWU*O)9!4s^IG=G{CN!B)g6alH2UAU=#af;^^5(1`|@68ik@F*vw53o_GGQ^KDREf
z%L$ZBD=j>1{wjO@(_6oCdzp0e+T^$QI$X0Ue7sbs^s(KWle-`8mEH83>wD(&rSt9t
z%?!v_f1|Ll{(+Ujg+-Bjescx?W!~R<`_^KMv?<#|0{1J)Ka;xmrdh=3w914(S;?FC
zT-bH9zPEK(w3fkzd-X?mY!WUF$+f*MyS1?3u3N$Wtw;a5RK93iUwD_R{G#l4#qUv$
z3w&2@{<U@Egt^i?)s@ZnpDvs=U$CX@WzYA3`ipxH8*jfXwPnQv*J=~{ZC~$PQOU81
zxn@~o{AY9P2T`M#cb=h<%d8g4iNEJw|M;8imL0QM((f7xGrp1GzjVg%&gJ;?hIfoT
zS2LAuWODz(;q5ff?g(G<t9dhy8XZrZ`<yF9q2v0oN0CmO<~&}&``jYuwWVIkp`Lb2
zwVLRwmzV5&aev+}>Asn7v@<r^)z>q<*xnVQ<<ceiX>Rno10Q$meA#Zdf!U_k{u<-C
zi6YzLcw(F%9De@5KJBe`#>dZ-wf!zl$(u2!Rc1r^q4@cS`5r%IZ;(4($-d(8#?a1M
z+cl3YJm-7FBwH=d?ENUDdwDI7kS6=r<hL&$oDVqnA^7CHy~z_cF9^~IRH{v$Y5RVQ
zf>gs%L9zY++TKgM2Tn7cS+uin&kySe?K9>V9`U4qymuwUh()JwL%mB%%hB3f&Z-H@
z-o?!K9?f`o{Y1fQ$>wiM_XKT@4d^^us`SL{yb$jO_H~y}%s9e)ebSSs0;XD?7mZjd
z)23?{e0MEayjFj~-wx~f+m<X_q?qtMZt+5YtHh7}_ZzFy0&g*z@n&8sTO`LmWzvG9
zf!~f9H|W@%+BRpYV3^jw)#`tXS7-08ajBg3H2X~Of1bbZWd(m&2$xBGk2BPWa9psJ
z_587_sFR0s<?=4wda9Y_yxr#JQRB(Yu~Pa!4*Z^3EV|>b3{z#|z2Xax3g;i*Vfoy3
zF}tVibM05+yoc=KSQx9Ht2Y06Dzib=>(t4OYL{p9y4;PQAGF|T+_PK4Y>~ybVjhJ(
zbB+{GPJdHxww=pt{_<B(B5!OyQZb{lf9W~5h{W3N*b@tHIOL_ySn+1=68*9n`#atz
ziT@Oxy~NjHdI8__bKL*kqx~JbxzD!l*`BEy)RDxk>iK0SdyDLuW5p&0^+&iin8zEu
zkYvv(WXT9~`Qahxv{!z6XaBbO!7N*J%ubbQe2tUPP5Y(%<cuZP8cW~M$!)DXH&*-a
z<lo)nFuQ?4diu)1ZNIYjKbe<d{kT}&#`3B4y!lgk?mX-LRsVYDe&dt#8pYCkyuW=6
zyrvMdH{p+(`O~dW|0Vr2T=eU~Lgs1ZWj`$ro%34fH|dai&iYG#LykH>aqND*Y3Ju)
z=M5Zvk$aje|6AS(e)3p3M_>4W)gsAzn$n*dlxuejo#k>o^#8`ehkwI1F&Db*{4DbI
zveHDhx8ke`rtb{(wgs$^WSW)v!~SyWAIYXK#y;<k&s+G&_r|iha`ILQKRIsK7V_|@
z{jpm0m`A$uc%x-7v;3@r%5IS#rM!(NtvSwEdn6v^T=nalwO;VoHSu?1gw$<RlV7uP
zW%~z4)J!?lbC<;}S0!M>9EWo@ym$UH-s+k^Q|ztf;e|YFE~$DMPd;;bf>_~(k6Y@s
z1?M$<yJ_99?~eTEkO_7dKYV4mSJx82zu?#=eY>O!sfyv>X8yY>>Ue8f|2&R0@!^6q
z_I0nav)QNl<%#Zr+x}DUA1}Ssm?-n-4ClE`i=!KNZ_O*<&i)?CmfEh-bI;+*Ifd-L
zh03cMuk@E^=-+b}*|MVH_d0_UPt-!XUvinuUuT&6$G6Mp(}q*Ke|~v&zWQ_D)0@9P
zFjjXmZ~M%|8*<3XNBpz?s?8d4(RQ=S9{%i)a8J<O-tZy2<-J|wdj0pwdkdWS`R#81
z77up+z?AoG`~1aMFQ3>wcb>%S)biY^?w?;iDw<<sJh{HO+sx_IliY$Qv+U#VdrNJ(
z5ioh%ge;~mAw>?WX__Cd2a8!HthY&fbwWL8RSJjun>RbVvOelq`aQfTy-Y8;H8l2(
z!iR=@8@+Og?Rgi@X9urwiSplbZReu5N2YVGT;f>W%&fRP<aj|~&ob$EszE=M*uTp>
z=~a8wclsfd*mk!^=>Z!)2QWsb1kP7#FV*g9oZ6q(d|hbAhY6Xd!v%kT<M~yV@$ihT
zOGxa&ZyVOEkCxRcS}5(RS8~2*z7q4y4-=!?qNO9ZICLwl_|fe2dS+9XOG{K?!<k#&
z%8U^*iF;Rtd&jNU$!qGp<8mc7WWH!e&4O3a-acPfTR$qa6uc&_&+&TuH~;sg`kEIW
z1sWX=n|y+c*LTVuSC`sG8=nLBstzvD{%$ziclDB#S69=PnQe-zJT&Rwp&E_ft1TaP
zKj!Wa+WI)t^6s8>eyO7U%crlGw(P#7?mgrDimkWe8csBPJ5<Xi>$OTNd5T8yw6Y&z
zGnBhIHgpP=IqeYJ_g;(l%WlUywm(XDIo^HV<~BVss-#pi@YRXf>E8SiCJx<om-p0$
zyj5MSp<^Py{GHOmnMXC`V-num*dMg=F}>1$y7HAD(|S7tkL?Y1@1K44`FghYV*AQn
z_g8uz*_f%kQ0`M%bj^+1-!}`h>F$#;SkV*`W~3z|toE(o{JqT=7^-LYZ`kqX(w%0j
zg+}l9$^5I{R~N1NV)p-ClN`D;y}o?W;*{FuWxds~<U-u@e<8;wWpJ(3{r)p-F4yxf
zofZkZryf6J%bf4KJmbj{Io%!cSJF8H-QSe3t&!WYBRE%W`-EF(3e=R<H<@%l-Xgo^
zQ|Tmk^?NVx)toxS{&rEbNR<Ejlsg{Bx;p2*uAi)!EN3g8bz9^xgU-GK_YO1MaCvpJ
z>vY)@E2j5$ZM#E6+jt6VE1z<%I_Igfn%i+w<js!BUCAaf7an~&wBV^q2;<Da=zGrV
z?S=mc@r$+3|ELqOW@h=xD~#?lHE$=gCtg=L@GAA^Oy=_QqHJ&PGL;v7DcK~s{NWv!
zZ&{IgiWm0%kzSCkd66Y$iDuW2eZsRQrPSHvy+3;E-lGM~d2$jFg3NE89O1N{`?Ebm
zL;bqD?t)4G3nx2%%W1DNIO-hh6S94INB!>e!4Iz9S8V!rgqQ7YT7`}P{|eLZj30HU
zEck05Q!KNs&Ed2P<Lr~Qg@2R|x1U#V)_G8QNIk#!!MUz^`*!Bu7B-KxSm0Wm`zp+C
zQNiUO(>;@%)XvVcR85xqn7w+>?sLko=G|6cez#tBuOD-c($}tFvHUAvmuD!jH!oEx
z=)Zf#Cw2L*N3DES3%3_K?GbtZC9UX+X~xIUb?-iVWH=crf0166r`(wL{P4+jofZ2&
zZ+TL2{z=&BWyb&Jl}+F~aJO#Ws&~#)zMSKoQ9ZqS@%(j;zrW}<^DUCgU-Tf#f3nX%
z|1BqyC;3|L`jWd`yWmAsL7CyK4XV{0S(CV5t3Rqt`f8<+@Ub<MGlJbtPu^gf%)$#r
ztDNV~eqnUZh27+&V~ud_S>3=ZKNlQutbQfdyv>zQTS7tfL);l{_xqmf_qY9?m%B{$
zxbdtbxAj`;g;KQ6Y9&86n;C8HtM2O0{&t0|e1zV;^n=aYa?D%BYF6YMEu1v-NB!z}
zW24iD-|T-OF1;b8+(6#ZY5o(H_d9M$Sm~{{dcFSf$E$+hbbLFv7cbh@*S(r~hgZUs
z)p7e4nHf~4O+Tk4zxw$xqo^YByoYvDIvIsw+ukePJAISEFeB^e*`<4@HyieT<e1pE
z`K`6G_94NlyDJm+Z<*%jXdD09FxY)vyqb%*_*#y_%k`X|Ri%eTGwy$sTld$8``EG9
z8BUWvx|~1WeYyOR!l7Q?I(Ct7Id+T8XQv#TyrS>%>Tlb$`+~n3*ho$?o_hVi<mO*K
zDY0+=RdkybPb%B9lP~Jn^}zNd*V?6~1{dyCo~z!Tc}|k2%Wa*aC-<Sw$g-z`&0@hq
z&o{HO|K7CtVZFY_+_Wj}>v|&V9tSrY?Tr5VL6n<si+a96MsC2Z`SsUNRIJ@EzCXWa
zUBCLS^y&3~{%txeA~L_RVey+CU+?rAT{*g{@yq$y#RsOzcm11frE|?ObRoy*>d!OH
z3~wEAn(y)}Yw2dcl5=nN-8Fsm{LWkk$K751LKaaS4dP-8SMK`H^|f}&-p@^rn>#r_
zow$7VrnK#WO*85{jxSQm-?60W{p)+xC$ISLiFsW2*f#F{_5T^iZzjm}cq_9#SsJ51
z+ucY1EXT*q4_SEmwmkmnae2z^_xAml4UNt{s+#lgp2g(ni)vgdIaj<jdGF6=z$SCl
z`+LAzbNS~D`nCx>_bkg%oM_y+QTfo_Wb1RmGdA|F;Qnusl=%DA>(2idcO6J)eaa>B
zaaWG5`_=c)FLjhSS<JqZIc@8_kjvV}e@!lDs(+Myv3eE%Y!8D4NAGR@9TOB==)6qk
zUPOQGy<HuzSHwQs@qDdj__v4JfB$#QnmtjpyzuQVhr|a#OuA}WHvc~DezYPsl;Qun
zrH-!^>lqt)>w~_od2A3be`L}3xJ4JTinf$o_G#ODYLa!}QNs-TnzU8IcPn)CYjWOP
zKM;`o`0vKHAG_{nWgZJVAN_x||8o81{PK$T{yl#GiD}=5`$rDsi_SaNm3m=iM5Flo
z$dm)SK8m!ynHiP;|MYvNn|-2N+CBG93}BpH$gurk-L06sPMa!Oxr&X+ZvwskUA9iH
z?Wt0~676hdEi1aruygtTJo{MBEcQ9c7wn!r3s~*>=$LoQ8Igm!y|Ml^wc2cFZ>74g
z$lkZ%`u*yOZU308Kh^(KY+iRKD8ufc^-gI{_xnEYR^?ba*Ump_Zv34;X4!?RP4-_u
zPueiKdRj~A3yCe^m4~-KQ44&0DB@u&-%OFC`XWp8P44KIOf&yAYkut=wvI~ib!tg+
zy04n|{%iMt$RuXH`4e+UsL&RXRUhItG8S(XJI^f|E&0HsN$vXtu0?hCHD8GT*tpr@
z^5rSlouqAxXRW)Sw6A%G#gDr+LJS%HG3ghQnwZ&`mrb=>U%g0jvi<xWk2rmgRk9zl
z<@|J&{oc(2ZIvFg?5?`oHcaO8SgJ$%sw(W(6}SZWe{E^LfAcnDL)mr9Wm?K&w@w^z
zQ5JqC6=`F<EL+7RT_ri@&)uS1_m_&uti8Z*@;LBF{qB`qF|{K1JSX_?cxlSxe@aF_
zuis59_v(pdyk}ApYQAyH-`ssxvEXUqh66{pU$8o_7W!jTwzaWs-V+DC-rN+GNy57{
z+4b2NZcV+j{;=GZz{D*X*DLG)9{Vou_|^aJhbxt37Qz7<?Z5RUdE(qc<c;5K%M@aM
z_cmR}AXu{8)A!B-Uk_K`@W(B=n;%+ymEW?|dgZNZGbv&1nw|#5i^i)f($25V@VPqg
z^@Y@W(MP=I#)WS;N%2=1eVA(BzvB0nX0|u6Hs%ud>wex{<l9ptZnY=>npm>Vqn?Wm
zw<rIedHx>T^-kHJ-K**r2b@1P<B#~&^d8q;PSTQnhWp#!ZqCSOSTOg<-^FUlx>{mv
zg^V(LL>MxT?|!`cyN%Eb9xWy@_sw<Oo~g&B6thlMN^Mlyk(~Q+Z`Pa%U9)5!wOGvB
zIP(Ypt%crOPD<!(d6?0A$7r^Pl=ZFJbM>EhJXI+#eHkfr$D_OS_}aFgsa+d_w@$A7
zvr}W<zj={%$1KuzUF_-Cd+pD%d8+XJ1+#XBUJl<}_3~%QqUnB5Grsxw+ZXS13uj@t
zweNGsgvJe-r|19kcS#iu%}ua=WfI)@{C$K%L+zhgj&YsKI8L4tdfk57jAO&I$EJq@
zFRk7a?dy}b^04oX%6Y5)xckOWlq!zo(c)*=QfZ;w-rCnCYNy3{%-yoW?vz^Y8*%j$
zf8stJoOoF<-_Yr)P4)5kr!LbO{vNrLB)dZ9)ESTc1q-%4vw8mM9NYbrsPajN_I2}o
zi&?PYkw)vn)0`=f%?s5txYB2>?~R$@Z!)?0M*H6fs?B_^YF6f_obI2wb$#94vzNF}
z9pYQFz=YvKQnQ)V(qt{AkX@($pA}uaW6{osD?JtSxW1of>R{e>JBP2uu=6w@;|;BC
zjeoBNFDnZ;lj7oD|K(3C8?(4zMxSu(Vqxvgn(q}p?2y~f{K+~+vUAS9RSTZBT$}iQ
zL9YGMtb_KwYkp<BAN%oRwSV@v`+DaMSO2|#Y{w6V(h&8(wQrvMxh34I{!6r=oYhf$
z`r@fIuAh!<VZFnDj8}lGRK5RjxMWIEZHCRQ$l0&|E8k$(SKaq}+mEzsO}1OSewJ0K
zX1S|p_Ix!twQrV;#Zw=ChAp}3VW}EgEvH_8=VqS%ZgUoIlDows9lIBARU&I1|8H=e
zwR+NuGaHu6@9$eNIj3_|`|E=*%v27sN3qx2eyV?4c3g7RtS5n2ZogT{*s!fv`a&Pa
zx=$~*+tj_yf4unSjeRF<^EHfPx;|<>xX#{o%x9m=)7H%6YZd3o-2SEVEI9medW$2w
zTgdZ;DeeD_Pd?vac|WVh#PPNr$AxY47G(Eu2%4RG$`IG2|Hv-OdHSA(l2gK+Y;MYa
z?PM;~VUIqnl5(}<v+<nfd#9T(79Pp5yr-(a@!Yq~=bzg?P7&$L2)q(qV*b~>U$$x6
z!Ghj9Hka=k|Jz>Gwm&25?fw_~85fTnbzUajS90!my@TY^nIHOvZFDCcz1CG&;&LJK
zWSG9Oa^kIc@l3Ui*Eeb2^<8*UqVV^ut>&kM`Zs?p?*03fy&<mH)3nw|dEz=N)j+A$
zWhdteU2vIlyNkt9V?yek@{k&{tp!ZW{xeMqXkOHm<{x~;eCZt-hxWqt#Yf*6x--`=
zGIBZ~Hs$AcR<<+df`20lkKAYeSrC4!<EhG|s@cIJ3>h0I%-?Vz(Ts=Jc&FU64Ni-G
zm}PEEY_f3rdLo`js(;gp#Yas~t&3b}R$SFO`?JMkU8&SZ^Y<N4o#83Xu|;Be<G#k=
zo6=9Nca$65P0O5Z`)Ae<$NO6fL@%Z&KRViSzHoKtdiHIL@7pDFuhbJ)-=*4RekRez
zxK=J~C3odf#)fUTd4f1^bp*1%+ip|!^#3jSzHR$;ul+gYQt2w7C}3SQ&pkvSqO|AG
zKJF0HkJsgvfA}56G<UghM(go|Jzjo#5n<Dl>KUc^EB54l-uUi^_6+ZuzkPyv=|_5h
z23`(768P^8gIT!9lo`$2{sk1XJwD&FzpUkBPoe8|58u%JW-}Hueq`z2Q55wt{n+Y%
zl1-~lYV_7GQCqyV+O1OZ^VZ4cLeDNIPrEjg^`qVE%jP_{{I})hE5FYO`u%gO!xn~X
zGE3KeIqQ*@tG;Ub<%*8Qjj4<c-?q2PY!REvJvo4*cpH0hk-gTTE@zQCIZ5A0g@vjO
zi*M|GesFI?U-r|v%XdE(*5$MBe{yhFzRf{LX~$aGohA$y{+;vP*;aUguXEx&$-lOb
z*K5oRt=gR3{o`khXW=#j4OZDRXH#U(3Qal~AZ>Z;Y34+^%2%hWKU(`Y+fJ7-uYDpq
z`PH9UFB$$Hm3;o)ZXa)}-od`hYlOBa-CMJ+{nm>o0gf@NzxDjhKjCOy>dUY|_omCj
ziv`Y4m0WbWL@nxccZje3SgkX+(D~Pf7bh5Qr$1m?w2w#ri{qpB74Fwc*$R*9?_#cN
zE=!l$;=JgpANMx)4?;CZ{~d9N&fl>A{Nt8o)fd@>Zd`v6pt#OH=~c@me@5nT72%xO
zj(1uQ^0Fq%XtYm`y*c^r$NNcZ{y+P;?D3WIo6_%ZzRXF^<6Ubn#E?<`=j4Kq1-%Pb
z1my*8I6iAZP|S|@15f|`yYS&v@7tzgKL7hH^*Na~F+0zPWFP0fA32$6Mu(M;^77^O
z#TVY$iyt_ccYM9U)(tH)|LVFe`rY|Nb;tdd&+VIA9p!KTynfg#_~w-B_hU{RX_cAP
z6O!<x=-v7MvT_N5Y@4?Um2N4&T=rAdPXDJs?#bg@^VBYd?Vow%ukV`W>3J*+w@#PO
zjJi?J<2vCL|FvuTo9t9#{>~8nUif>e;E&23$J+0g<{mhmVdm9Xv~EhGeAmB!%G=G<
zcCSCYL-b$%x*0oXy7>9e{?zyF*I@>S-!E93<9QaWwO(Q6{IU91`j2>#>pgECb)V`G
z*822N+hj^0d-~MyqT)5I`<`1Jne#q{^Xx;Jci!<&WHWEXKRz5NcK+q1@)sY!ZN9|6
zWGZ7$vdq4gkjKtPW(UR#=UQ9ub#;=h{bj{)q3*|#kkmEiE3y_>IDXBUw^MVoow(om
zNS9h|wVsB$D|gd*y3B;OPR_PivP)_A%J@H)eQS7k-oI0}hw1hI1-Eu@5d7hxlea3v
z`rNL*LuHS??)-V>xj5U7(+$!-b6(9~q5Dd8Mf+Esf7_q!t2c@dD*3wd^c_u>SFg1m
zs9*gXy7))^-`b1o=Uw<?QPJ7<>tV8#@l0*LfYQs1P1_IVOG&=YT$4ZN)$vE>&-N9~
z4`SUm_ff)Mk-6tB7e87#JC7}T`{EaM=S+@<Uyg{KR((>TxMpdN>#pQu^XJ-0U6!0b
z^QhyW5^b&*!P6z&bd|FD^HX%PT9{7FThj9BtJ&s@;_(a-<voXfOl2uro+<3{m35QL
zPczxG(q(FmC$l+n{tDYGo>VeiC8b`ew#Ls#KA`aV`!D}hEB?g!ZJv1XlJA=b$Mm>g
zEWRW$?cq#o=QT4|8E=^6a{i6hQvdbmk87=c5t*Flwa&&xdGQktt2eQMffxIoKmR>y
z&9>cg#`=5Dzcd__%?sOKbN))Y1pDKpSsf2UpJua}|G&t!AzL^wdeO}OXq~_BI=0Q<
zbS-@Ey@;1<4f}*^O&7cEdAFMX!UKoj$5WRzIUdQl!ohlFj`g1%)*7oCUmjS~WZcAH
z%J#;`t2{t-L+2LmzD*X{Z@zDm`DB@IZOR%JaiwLqRr<Qk4;-pr3pU5MwMV>Ud?%Wc
zQCYBi$tBwb{JRp&Lw)zS+<uXM@8$om1`Wy~_Dnpp{O7#9!q9ESW3ci%L-(%ky5uW4
zEll6%vvS-ruQ$%H+oUBNnz7)hvAM~G-G8JCS#MWO@;su-eM6tY`&&@xJkG688FGVi
zzljN*Snl|)n0xixi2g4ZowH7^-CyoGPwo}h7L7OSf;p}fb1(a6cYOPTU30FB#ROF3
za8JJBtMPy_N1Hz=!{|z)H0Qj9=TiQzTTxddka&B(h!nqP(EMN4EAs2_ZvJ<z$U*Xn
zulK&orwV2#Tw}|V<Go}!n<->Ew@$rS=b2k+TC8(KqK`7(E{Q1m^uKkveW~({KH;-J
zG*|z8V5AULbz{QgWhKR*(%P~w%sXIv!ugEk?~U`_er2nF{G;~fsoMHWX0aZ3thnXv
znb%mlgdW&Ab^E^D!1yrvW&75jK6bdfoMG-x*B4Xltz$)B<|^&#o_6Y{v`)nZyE@Jb
zNiLz!f?F>*MV$J(seZqQ!>!i(k_KP5XT3Y*t&5pGv)Cux-tb)})VgcM%lO$BxoqmW
z4>pUn&bRIRF7r1?c*l*~Z>?lw*3Vzn5L@kG+&Ouo>8$3*+8yilcXj`HAK2ZQHObxc
zy@38)^~%kAubF0CRF~Y`;qbR{C1=zUF=bJU!l|AQxR~}CZb`D2Pi$AqzuKdt{^8mC
zgrD^vnjCj;ym;C2$my1+M}+6wDNl51*Du;%R`9ARj5Y7gTsO`;r#_g=3!W}l%a1bq
z5!&du)a9#gGmnLK^fuqw8ohH`kEd4sXfofd$=Uerr}TqwmW{V+MX$GrdYJ9IZF6SD
z&jQwedyZx8=|7#+;Cf%U_CiWZmvG(QG@*)bYCo#>{A9RL*kr$J(wcSYVp<;#DbG}4
ziQ00yT6AIn>myOc0{%JatMo&J)2CjHaX!!&>2yU<OEU7>+w~6Dm0W9gt7Sw!+IGU!
zeZ?#H@W*TYcUZMqytMDG_;zq(k4yElozgqwtrgS+1ix<GS?1KaWaDY?3F^P{SMvwu
zTCQMI6c_#bwVLg(ch0R#j1OJB&$+nPPPgC@e55hEbKjcjR*wZuA8d-<admfYf@G)K
zo~nMw&xsd>0~UP|{(gAb`m1SHnY;|^e(idY%oeMb-Qew3#xZ40A=jNsp>ykub&7As
zt=g6-Qo5RT`?Ga-(;qf}`)ki&yIk<c!tI~r6`RZcyIv1e4B5BuPS4J%&!Xi7H{ABm
z^0qp;jdfZ6@xNPl`yZZs^Km%OEZ)!Y_F3vP)_vz$v`b-OvzYs0$<>-yUP}8is4Z@>
z*u3W{fBkuVuIxiEpIWmePZwHo`@D4JR@ucF@x~k<RZbM1-m|*@`T@6n&+@)}Yq`*q
zBJ{Yr(YKvrhW-YH*g3b3{V2;6UdZyvW#6ms;SaVO<zDe^Z<wsG+dnYFc2cxRt<Z#S
z1-o+{_v(cHT<7`n@J1>dFZ-_r=4QKIPnpFV`rwameyN7$g?*3D)z=Afx+N$E$gaC%
z&wt78;fGJLSxw^iRri&&J0A2|tGw!M;PD%2OV}cWloCEZ*tW*${i`SJU$xa%X-i68
zNNPTN-)Ui9JmcY?P5*zYO+9^mV&tA0N1N4~&RpuQIAvZRH#^w&PS3$wwL9I?&lWz&
za}TqLE<a(z)LVDeN+H<t`Rx-P`K=;H)WYOixFd9`lUOgl3Qf4YF0b@t7ne>-T1DTE
zh*yUrBP36z75p@7e&#F_Vs><;*>bj;C0R#Bw@oPzIe)LE{@I2MG12XckNno#9i25b
zV0(YN(SuVrYF4mCNEs!V|94zicR%B`54X+I$Lqe^)?8Zf_k&=<ks76xU%%s~rhKyr
zY_#yq4qf+R=Er&eqUN(rWj**io3rt8_{?4V&CYmg|B*X)Se!@f-GU31Oo0cZmT$L~
zl(_Bnzx#_adyaQp`kzZKGrW^HZg|gWU)FpyS0&4S^*6K0&7oBnx32hesEFzRj{>EP
zOAC(5mA^LR;{6pW>g2mX-qJv-__A@iP2<PY9B=HsAHK|fYa3!ODsA1dUEV?Vqge6h
z)%M?KZxCL2fBi0_D#xwK4SOrBV-74@XIptzA-(2HDf7n-&;4I@yH0xOs`jHV_qdbn
zW!qe>tCt<;#PL1;;mA3qTRCtsBUehVqQR0WD=z=3_6sik_VcOr*_@-(h3Ck1b}iFu
z61HYw(Kz+viYeRbZ^2)4C&~qLnjCgM^joUo*5$=j-`1=VFXIr;YtI&1y?gr(*UQ^y
z%qaJ<waxoHuk-Zg%Mnce@jsQh{T;fc--+ig{gAwG-J-hS^$e%I8rVa=&ELV_^RMn5
z3v1nM?}S%eOx!!>2vkJxC~fjK`2FKW%@d|p=I1~D8;fk4yI{jVgHUyYwDrqZb+-QA
zbGz?}otE5@KD#W3(&JVsz0Z{2%q?K-mX#=rIlNO^Y(<}{;`Xk)|75pKco^#TqQLyO
z?k^_Z*#ZSCDoU%f_Z2ex*?+x!Mstr_z_o*sx8fdZ$GO`@Jt(_s;G4vvqo?clP*(2O
z%as?7o&9L-Y9}ZBS~BC}!+9HK%+{Op@_kjvrSHdH%Z08#`>1xh!=CrmyG~y|TfDvU
zw`1Pv5~2Gm3M%-mMOofV<zNuIZgJX7c<W^EE$2>do9e-T=9A0c`FnbP88JS!%m_R%
zJ<qW83P;pTUDkWbQGa$keEVDARik(G&E!M!iC3p`uKVG=V|KuP|Ew*NcG)Ms{=a2g
z|2;9iVy5J`uD@vlYm@G7^^y>MzrXr!@{<4e_Zr=;KlCC`z^~j<Zqc5S!qv9s_g)9?
z<<b7<l3J55dcMW}vPqhe;FZHh{OmR_FE94&vwL&a{UNhh{N$LO{j1(J2e5@*Z~A6>
zRa`tb;rh0SV+}L!?UQ+6ol&@n$^CZK-q^5+pcf&tXP3^D*<dc$#vgI#?A9v#Q1`VV
zlM<Qp1n=y4|9?qr+jYrD8PNv4;Y#{pl7H8~*x&Ifhb3j2o&N{>^Gf$^7~|8<&;5{c
z+<9K}u4k1B%+@!hPyJfRKI;hAoxTIwojby-?kaNivRho3n36If{nD{YzAp;?F=6#~
zr|LK!Jn=m5*uD46)Z09-o<(!G1f{(A{m$a$k^_5}KV%jwx6ZY3T6prnmC4C_na)n<
zP0G|c-0xHPEQjHH?p~2o+wY3Re=44}UPJEQX7BeZ^_%znoyHjS?md@H?O7*#d2Z{T
zx9S>yY!2*=S^G#$*?70vh1VAz+;q9iA?)&jb<2(0e@febZ~HlwS3$GtO0rR4d3LMq
zp`?#Hs@D|$Uo|&zRlwh-8TzM#B({9Kx!>?}CyTqw$^{(<R)2puzrFRSyp8+9!24HN
zbT0jHy*GD@gjPnNq?~Q~g%$kkBf^<WjsMKDEiXEs8KZt)sB<<4f9I5m9Q;ic_Z1VC
zXR7~qXWLL~vQOsv#^_0Jv!*SZ{d<qp=X1|qo=$kqZ1m@hOgfYI_2BP^raa+RG%#}4
zm}t<c$abSmX2bn*Z{JF#`%j~r?7h!~Usxr*_oK&zhc0WMn8!}h+P|p!hqU_MhMw<h
zAI*HK+ogQ!ewV*--I^08Hs3T3{HJ8+QnX<;f6&}{UB8wE+-Nu|`0s@)m;Lw6>o#}1
zDRETMUcGHYR`}|oeUGopx@EebWYIm}qac;>-ji+X?%CWbk-yB`?q3P`b?t-pBKaNr
zE&6`PCC)eKJZ;skACO^nK}scXxqP)o!3!S?*#tL%)&-O8_OxWAO<;cdd5Tr_{CCmp
zANPyieg8(W-tW{k+tsJtZ*p%xB%5?G_Ez;Qr_b@Mf6B{lSeBNlxgY=JzkqiGvrMd;
z({HvAw*c0=H3t@W+BZMgt7QMlNBLLZY4z+SLN9oh8~%}I*6G`@Qt$M#zoI)nYwY1O
zFu1Tta$^~PtJPiAj;3X|YEouww6fVPVcp&QT*5?l!mmwhPM>|`%bvO7Gn1sFSle%-
z-#%&|oXt6xED+7T?kOdez(4=`nl&NQm0#ulUwQD2ejiVU;eyD<XGhB~E#d8XRapP+
ziKrM?LVrxctEe=q9TK`bGXH;w{E?m7*c^5@xoV2j-?)qUE7lh`d(O|X+_vE9CcD4l
z+%g+7gsfR}78!Lc{~X=MyMw1(zW-SJO`}5^BHJzaX0DuhQsYg?;vJK2+}*tEy}G{J
zs>5d=H>WpWHk%WAx7h18@AFjKLyK<3_`9E1{qQrs!{r3W>IdoTB}$}RuiShR!jh*Y
z=;tyupZnPIqp5Rc)wlEtzf`cka(-PaOWdBmbK4V)4(a@nF~2#h@c%byk$}F2FHBeN
z8&^+D7QFv1YD##YYs|&`KJU1qm?_g=2k0^Vn%19WtERbc|Laa^b2GVR7k2bJM_Nss
zf3tMcrqT;xE2f`i*C_ZGwZ-n9h*m~m=e0M>0=8FAVw#@f{k2-_!-Jjmr`oOOUXx#P
zVMF<^6KmwIpXU9Yxgy=poo~yl7S`{=UDE=8tlR9EG1sT%=E>Fv)~Y{kCK??+ao$nV
zzT~Oer?C1h{M%POn(1c|DeMxf^WM7i_DPj>@1(vKe)E!g)i&isli1<c-x->h3IA|Y
zxf8NTrvLLN1N$kt%643KRVQua6T=I#SH|%#-Sl^nR<QUcsVJW-(_<G&C^1I8S^3^J
z?LsDtr|p7e0=2VO%XT+KbM5_<s9l!%?yR}xOXcNv0$Jp2QYZf9D|8EyP>f;JyPtGn
z&wjNX|4Y~W>HTkh&-|Y7(ZWwYBK}j>%XU^=ex5&l{?%e@NsF`MXZOuqJ^B8dzWdK&
z_sn&gYs0%C%;x3m_hD>lnRlP1Ph0=7o_k&W_4lp655Aegm|L^+!I@At`>$*6MXdBt
zTH4L@_kwHV$L`Lmrv0}swj8~7bNPCu56R}~r9G=;dah{1H+q}TT-6@PCmEQ`A^YEX
zx#pGk^W+ZnYCSv>vb~UH-QDy)_2__^ABz=j)dfudUe8qNXS<_rU)R|BZC9*Uv)R7n
ztjBflv<ol4u9r;`5iejrY`Z#L*7woO*zQm75(1Zn&gwJ!ZguO(=GDPQMjd|_MgA_%
zXKg#9RPaTqSv-I1`-h9xdzIclUi)*}%K49FqMRN~x3h^=mzK16c-ZS$*{9OaXZ+g!
z)Z4C0kLJ5{;!TB@=Y>^j65-R9{y2MEPmEXlr0_4X>xDl(F4svkHoCGLJEv~2an7p!
z%*%!A;-x$PF=tC%?7x!~u~WH}K|1onsspJ~1<%(m4v1zlFZgxq{GZLrOcCqCyUj8i
zQWYFu%NFc(IDJhh^gz6{v`w^w(oz9i@ukMy`c=<V^4P_nMqE|C?R0bPr8Mga(=Trc
zD=G*&&Zl?!?6!&9=jugl5q}jZYj;TR&Vt-|9;yf2<hFcln9Th7T9N$r)qFc9si-=v
zu>bnu#pPa;Pn(Zu@<x>^&gZUpDaQ5nvr56`ng7=B-QSRVA@+0PVY{iD+#1aGrRIHh
zT*W0P%^)3d|HQ}tzkYxGf2=y}yi3Z|^`SS*3cv2LetJ~m{3^e)QqN~IFMoRT$@tWP
zb-Z&=9G;=acVn)5<`Q$iC9)Y;b^Bu;xSun~xUpEPK-JS|asG~Zf9`lU3X4ZPxp!lw
z;DTQNfQP$x=>8Jg>bN9n8Dr|t$>OivUo*1k#q__6dnCnptKz_U?lMV9=Dca^4$cXx
zZvVbVZTnL-;r)Br|7`ufu!n);>!NDK#%TUaENfpfMDgg~GmtQyePFxnqLmht4lT^L
z)-ov5YYfl7%)KF#F)BysPS*AW*;cE$OE-lrJToiu{jOM%h&`RFW*1-L-;~L4Ye#=~
zb{_lxB^NfW_e)%RLhW4mvHyObOth6xtJO{`zPHb4_w=a_dpEB+{o|CQOrlhBW#@@=
zzPDej2(@SJn|t-%6;JmA%Xl;XN1s-`b^6QC#~d|x??t;#6v_S7w!QrQM;1BTWX-*7
z(T^RZ<4@OqH+Og`#q@QTN9sDx5dQ5d>z04`$Gqg|B)?wf+&eMHY)^;h8J_qP+W5cl
z^PAOWQlZ9MZ-xFfy=VOI`eY6HWUI{<X{ph+<wE!S+yiUhoDw=JIyF)3`kH@Xn+gw^
zZ_ECcz`E{U+O1{vY^F*nONyU<;FzthFL5Tu@8NNojX~<(X$hTLOQrUwm9Ch)`oNq}
zk$HPlH1{9&Yzi}Y_2|u~jm11x{OTWk%U)^c`Y!$d_=|4-{&mWoRi|34tj{bs*4i^G
zC@?2Rru6yxr57Z=>mB!fwk=*LT!lUJ|9Q_{DHR@VkDmV0Vb;I&@JP~QIj$-B3qE{N
zT|F^jZQ+7m_L{TrAC<BlP7_!$`#0;q=|5eA6_$&HTdaRv<*QU+@$_t8eqdc%rA2SL
z)BnSr%g%B2Xs8wZOv{?{P{r|e@Q>f}4R75wN@N#0y!DI!(eXporonTS+YPxJdX<Ys
zZj|=4{!>inyu~r|%=-I{yQ-JnWnis)p>a@}bwiSM3SUUV&oHk1d(ORPt*RNi*UIAe
zFwWTB<#4%%b>I7jwAovWt`@Cm*&1_az3PtqJ?g*WPb>Ow6?$-zH@`DLS2iN>-Ps*~
zPG1YC_{o-aXUztd+$L|A^YM&R)-AhW;NSQ@>A~8o*XHza#hi9~>H0Ez=YsV@HSe~`
z=wGUTs-SyH{AtgpdHeegbzhINKB3_Fy6VYM$x~gqBD20+al7~bw21cns~j3Zx7O=C
zx>_`2-?Zk7=lM#)QmrH}W=BSF$6HpkeRA3P?~=Fmzo$R5!<5U6Juk35_`I+7e8O!<
zF4?-r%K6{Ke^?pU{y84y_<Qx7edjJ6xRu$#beli;>*<n9=~EYcRecbxyj;&v!m`_8
zu1xMq5Bto})xB=N{(sC?Ot=;D*VN9~&iuFN)5K|UBImozHy647ylv}TdG%~)LJ4Q1
zarmYB;QcJQN0??6X>SjHVG|SR9=CUs*DA03A0~=UzZ&-bz)4lhC0-ZWGRzkLZ(aO;
z>u#Q|>46#c9)Y4EOrgwLyM#AuulRM-V~X%8?^y?hwoK$O`Nm}TF8|+aGo~3U{z>KW
ztnhUBJMp@Cx(3tK1Eu#SF0={R;I+W(f}7NVklSiM&%5V-t9)$7#rl}T<n7m^H6nRy
zf9K9sQpjC<b$RAL@fXJP4lMgCGH>s-SG`}SZ$2QjMKjy!vB&50>T?>|c6<)_+*~ax
zR4Tc$%stm^t5inx>^sK0#g=ZZ$`g+-;Q#4TSO2g^yhn9Im3r&${uxpFYvxyN>6Qsp
z&A2Rh#DeSLy!1N`1>X&u?au|4e)oTWpL5HLwGPdCTs8C8G8Z3cd7AX|guC}S{=P*&
zc$W%`uc+$b|Ma)<^Z%#aEGt=Zd3kqqGW*9kC_MICbDLjc&hAqQ|2f%2qcwe^92W4i
zoA?!(o)2G;XwLfTdC~ug{Z;q3K3B_nCjMjBT()iNZ-uCr#%`ILcB<}voOk&C&*G-?
zXY^-3zdfC~_><G3Je8aElUBALP?kR*pIxI^f9cND1yc_meIhom`pYZX73|aQ?l}J7
zkg3c<(}ed{URQhNrN0M7>~BA`D^}#%n=5r6rQYY;I$W)gkiVQ*b>{iS;${1E8h@Vl
zROXD5Oqt-=6vif*GGW0~N6=<Ia`^?q|Kvq}ac-NK`o)offq}u()z4*}Q$iB}>$7Yo

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/landing-page/vsco_logo.png b/tensorflow/contrib/lite/g3doc/images/landing-page/vsco_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..f408f9024b3036df362a9c792b5af1dc99ae939b
GIT binary patch
literal 17249
zcmeAS@N?(olHy`uVBq!ia0y~yVA#dLz_6Bsje&uI{ch?d1_rKmna<7up3cq+0Y&*~
znK`Kp3?7|Rr?H1jJ}Gwm`R_~8x+R(m&0p#^e39B!vO#bX!@`yc0xmilN?JR_T$X5R
z?-B_t&}~@5DtPpeQpACM3f*0s^u%0TpVl5wezULqUiI_8=V#uzzjJ5t^K&)E&tn))
z^v|rR<I-Vu(9(Anc=Xn-yQ^>E!+DGx6Bq<Fn6%ZrPn7fWePi!<_V)hQjq;stCC4k&
zuJ8Z<KVJ3hGmc0Ghl3o)H@2<`Y4KNH+<a6#g`we;)1Ro-#y^Z>IybCg&=hzmY&^%{
z_*`Qd^W)#slMgxF5tL(Q@J};h<z$#(r&t%Irp<WLIn3oo&+%uU1im@m=$v{YMR^bJ
z!=o!#W*YZgVfga+-z4>i`Ie85Sk64dVtD>-YFGvTx(V|S#eA-pJN`c`t1F|9S2tyX
zFsq$n$J7~?+n<=&8-9Mo)ckDb>3O~LpSn!+|6#Ma<(=W@Gig;nI-f6N`E|b7=6bi{
zxyc7PW~Vzwby(QDzmT*#?{n}_)S@Q^;U9O&v{|H=B}F{3NiR}ndLB7L=GAtA$9mtm
zT@)Y9ddwR4$aWdm|4sUO&NA~SN<34(r(C?ytk1wJhI#hmhnn9$A3AFIhsV6IuFh+N
z#k|RnbiZ*MJ0C4Jo^X1bfq6uYou|+J2bFAg3KAC_cs8)4HE=tyscR=MEBJ2mPm0l8
z(Xjbh*B%+Z-h;_Ab(TfWn0c9TMrC{P{GWgS-u%vb^09bLsu)8>n1bK8rv85{4aq0w
zPJ0>uA+wy(VJ6FiL)FtfCRjdqKL00S!gXEFlt!Td=XVqB4A{gDuvsvOKj5r!6yRXK
zn4p@_=rVzeCBZa;-K?=DfTOIrc7aSt<KYE-GVJP(+!pM5jjJE1y<q)*K-qx9?Lf8x
zpIwtCM^k|S>mnzw306y(B^|{CWQ&??6zeruy4sQ!vUIR_E-1Z_wn9Lw>C{5!0NXFd
zC1Sf+V%vW$V80+>!nL<~-vNyoJf{z>+Mt)ibvZHo20veO_yNuWt2w;S4_ZGA+974f
zW`213gV`T!D+KCz{TDEFwM=r)T+kC(GR0bpL$tZeoiji>fOUmXi0LZ9t8K5G?mF@=
zIu<bhVuXpPue0=G*$Z|r3QV}?_T6i^*Qn=yec|kjb6;4$Q2b)?%et*8@SsHE8$%u+
z5jDomQj_~N4_-+)m#{A3U80+T{KkkiHevkP-Cqy7B~RXvz0rCD`v%_|WjXR?64s4}
zA5JN7vv7|Q=<7Uv;OXO21;-1-3%3_4@9?h>ij&Q6{C?>8gUTN<HJtx6{xSVC`^Q?>
zrQmu&@rG~{N6sOsLwbh<ounU%CAJv2PEmU$DBGEMMCFlMlIF&V6|FyJ{FvvWlsV~*
zhqq_UCAT2Wmn<)(GCRJ^)NwXb**$ssWY;HipF|Z||784W+v;?2>5hOT4aq5Ur&OIv
zJr#UPopV)7CyR5F?nLE4#fv(H(T{pn9E031g%&MM*_P7zMDt|yq~stL%~;KH&1F~E
zR&uS}9K;+Xd?_Te_@%<jke9NT+?THR-aqBw$rq<8PHjm2l)`s9W}4)(MJu90%C)+8
z&8%vvYPC}8o_ck1>It=zVyEP$znv&Mee%T3le#CIp0s*`wZ6GV|D@|v(@!iv@qUuM
z=6(JDu8yGxLmL-wh+P=?aND6JkCa@+UHg(6lO{j1J*w|zqUygiWvQ)~u9yFmi>KO7
zc~1@3mJivt$Yz^I*~G(2*A_Los*Br(G=}noI)z+YB@^~CY-T`c@Yk@XtEa7u3zH3=
z99|sg9i$z$dx6<1ztHla=c~k5$A{FrPF}q8Qc%XyEq=Flv~{)hw|P9C;MRX^%CdQB
zJJQyrO@1b<yGSfnJ8pHvYUk{j=vZrq>bYgj-<)Ne_+r%q)A##LKeuGvyu2mv!tRRQ
zonF;d#j`hOFWcVZzeH-&|CapC`n#Jgf_azYrh|;lan3g`+%mYh;n0U^f;lP4M@+2}
zu56dsd?U{=zwqlL#^g;KR~IfTZ2mZ*b5G}f&vl;NC#8dPFH2muT^76S{@FEWgU<BL
zur-=)_}%opX})oNdSb@GO)EC+$oAa$Wz(6BIa_P8Q&Wy^w8|*GKPyW%Vei#lhkwoP
zUE6zL*~PRgVYkh$ZM#&~py$oM?EQlH&4+L7y!rIT+|9pp@{{K$)?fPP&z{_#(!Q8a
zfBFmEhdP#<Qf>rZp7^rjMB#<bXCkk^EWW&8bDQzyowt6@?4HJbd+D*Kt*aZeC%rbj
zZl0Hx$F$uzZ}Piu?_S@%Z5tqWPmWd2cAn9^hx4A!^WST-S7?vao@0C0?!8{M{dYpm
zpPFw!cnm@$b=+)|?_9rDzBhll{7wBU>+iguxqspQ1N(Iu_&)I7kp5s1pguwTh7e1W
zgkyE{JeJq2%h~^L64`B1k$LpebH!(hubp-|tz5X;slTm`r<nV;=r@fR`xudXf{KzG
zZ5G)j8C_D|q)}vW$|S4y%Gr#k8F?kyJbG$M%hb}6W<K9kd)jSUwEp?{d;6X8U9WY$
z)}9)^Y>mx+uOiXnt1BaXa#G%0ys`d_LYir@QlDVBr1`m`^tb7I({^v@zPVV&-B0kj
z>z?GAz(1#1j<@XPT-<rCQ?)ZWraaH`d+sFvpwLSxMFB;*pGv$^yn?mtOwGRP__s|z
z^V%oa`?S~V$I>4^|7?Ew?d9)F$64!|q!d+R6m!l_aFv>uH*sG^cZSAg&CN=mFTd)z
zx_r};%@vV%vw5!P#D7-0t^IrX))!mXY?Xa&axFK?e9OJkzYBRUwC&)PwT!Z?v{aVf
zYkn<PDbMTPm+W<sd4ccR&rY0u`%}ngvuC{dAMah7pg&Rng#3+H8@6UiS4o#gpD#Q8
zHud&?@eSO0BC|xNiPYUac&qR+`)7Ht`Wg2m_is+0es*2+?v1-EcmMuw@qVTDB)wC5
zQum}cY~MK}EGFPj;{8>%*S*(xt@<YXz55&gv|rP=P0O7AR_}MzyokE>ht@7y8ML&h
z`sowKZ;iLvWBJU&R)&d&)&89L`|)4rd+oDUm#+5aOXg3PtE*Fa<Z<2q%luc_Q?j|T
z_g(#z#s1pqmDTIq*RT6m$=tolVBR6+)~{=St@_U6itNROj~A-_Ty{wKLCcFHtLG=z
zzq5Z?zUi^Z-;l?db1zR#Uy;r=YvD7~vyXK<b(_~-Tw9x7d%kRo<(9cWk3N}xt61u}
z*16Mj@0T`KD84`R<>;Gk;kSRjZS>}x-ew(EYO_nG^k?4lOXuck*M+Z({kp4a=jpAl
zADJG{e!lK@ynS_Z-QuUq&Ypdi_xhdj{;2;|%Z@Kw{&U*fwb$Zy-`loNwN^aO`u@I>
z<B!i>-QBa>{5|Wv*RT2C@3-Gjlkmar_rKHZn*8_d&eT|Z&3K%A=5dpHkiXZwRW@t?
zoqe!<nd9t(pPl8L@1A#>Z!49?yZ>M8kKflP?s-1txvKt^_$&LjR8;0&$=>q)>ZWkz
z^*`4yKlbVWgnx@a_5PcExqjE5o3?VMcAvsNl>NHf_ucD#wtYnHiy0jADt;M$eS9V5
zp!!n(()lmv@80K9S82HQm-f5tm*&m+x%dC>v!CZTciuB&)-yN57+QHO?Bp8Cw|dMM
z;APk<&hTi{PP@vm3o-fj-Vf|Q#^~E>OEX9b3O`|LD79u-F0i^eg3G8$kgu3=!_uV{
ze}WHHcz5rNSt7Q?RCDIMy8bWi7R(I$czyi+xBB?kuXgOJ<#70YeCNL!w^@%pq~pz;
z%ZvPSS^h7oX9(}-GPe99k;lNmI5#sSq9nrC$0|8LS1&OoKPgqOBDa761Z*m-Dsl^Q
zQ%e#RDspr3imfVamB1>j@`|lM!um=IU?nBlwn`Dc0SeCfMX3s=dM0`XN_Jcd3JNww
zDQQ+gE^bimMJZ{vN*N_31y=g{<>lpi<;HsXMd|v6mX?<K21fcuM!H2QX}ZOgc_oPz
zx_QOQ5JO<*xTF>*7iAWdWaj57fXq!y$}cUkRZ;?3qyPgDccxZkB5cYlhI%VGKUW_l
zqi?8Zpbs}2WIRaJDj>5WH7CL)GdDG_I5R)b&d|cl2CN=K24SNOLT4nBPIF^qok%iJ
zoi_R)4<LC65?Wx<AQv|~E*pJtxPrpij>}g0l0O53*-=jy$B>F!Z||~CDGvQ=_Tl>e
zz2EOuzi&!<uhGTD)x~-=PEf?fH88}5mHWnq?i<|S;sg$M#fnsmSHJ&ScmMsad22q$
zO`bW&XVS@(cd^>p_sZ3FdY-F1XIE|i`{wVQ%wMbbn;qP`RKDrotHyirf3Gs{-T!Mf
zdsY3{aQ-j<UagmV@%L)H?S<c~_g5|W9$x>|;XO#m^yzE+|3Cddudn~R`sZ`||2O|U
zmj9pe@89?R=JmgC-@kP{{{OG*&-CkmdjJ0MdH(-BSC#jFmpT8{ivN4T{`&vF@4wyq
zdo}y7T@6<J4lHN#>wn)q%WnVU;4@YIO3%+n)a^D_K46x=ap$RCd1hYm?As>Sci;Ou
zOFRDOsk2M<N>@Hh65kzBe6c%jOYM*Esd~jTpS=p-Use2A`u>icci-k+zw`H9dA{7g
z5AEju)weFsTHI%~>gSK+_V?!0d^#E3XxIPmRioWx?hAEaSI3*i|9urc`}>~fwr3y9
zec$nHnfcz#;&a9GHe0^U^uIml*OldY^2@ERuACJ-@6(jGkN^LB|DUZtc+MjcXZfGi
zagV>sy(^t{dCoG+yD$B&>-AXQ``CAO^SrNXo+Zowj@Z57--mYlFOx4Z*IFHT7WU@>
zyM0F7k3-^S)^9dA7q@E1EVO)IdH(jCe_xj8H*(B4uq*ZSv~PkBeo23*z9)IU;@C5v
zbgSUsA9gp$b)2txX1smB{b9*-7np9nZ`iXZ{oU)mf9`(PuX)IA9=`k5)mhbdU&o!j
z=q`Wv&!_48b7JS;yY$^b{<P{t>Hp8n_h;Juy4Y{#U;pd!?6=$R@3Z`U^Sm8HM#J;_
z4(toWcU@?mmAmccvQpc3#q)n}nf7nz`P#hCyRPqD_wz!#-IbbOm*?mC*?c_mEc*Vh
zu;Lqu?Yv?qYTsAi|5hIV>(cZn@uPYlPlfN>WclTS^X%GhH<y<t@3G(hJm$Go_-=tc
z+y2PBd;DE|=Ly$YyYIfuTl@OUQT@7&u?z0Zm3#4bYp8&x#GkM0>+j0c{djm*Rj;s9
z%x2@Nx?^)W4m7-%JYV7V`Kf;W&YyqZ*WYKGCe|67uvGTZ3FZDA@!smUd%xe)t}~n8
zJ@wz^`Twry?c=ZiariCMjxS5~ZGYq}jTU)u+i_j-+}9@WtMC7wW&iJEe=g6)OV3|5
z?t8*u{?p;hGxPmBzt*3xeOH{%`mUkl=)KzSccT-Yzqfw7C3v?*R_sEl1^v7DET2sH
z_SH_>;r^#-+jGtpP1~F!%AO*~QO0^~{?9Y%QEnN=JI!n_{Qv)TvHaf^l{ZhvU5k0?
z75+;;URprE`sQh~?>mm$My<{;+;FYQ{@>2$g{pPmHqSpR`{F>?*^iPn@5J~2IQq8f
zbL5}j&&>B%7B_OoBs{n`W#8Ym`L(Z~{e53wzuWqYL;NFdj~$Y~-|w$~J&!|#nZrKv
z=-sm0Yb~G6NS-zS&l7dsh8_GHoBs8uevDbiJHP6l!}b^JO4nxdiaiP6_h~BQ+ZVro
z{x0A5dG52X>+5T!&8OTKd{BElJMQzWxA%X3JpJZU&dnZ%<8wGZ{n)OP7$fX(qwIEW
z^t<Q)jx&qZ=C9xVd|owQ#sPKPm!6^P4wy9jdo2Hd$6eo=GMhc;O-}7+uM+#i$Y0@5
z{INT}X7&5rg`55{{NH{5Z=NRebzjCWEHj%nmhb<4SC>C`k+2WbZB_lslhY&_I|><I
z*R8Mnx|;p_&D80!w_Hv97wmr(x?XC}y!U%=J?~rk|EPZ5MVoIM`*UOU^L&mS+`qp5
z?`wXspGH4FM4K_6@LhL=YxU7z)8=eB{x-k<_q6JDd*6es5zBa2ICXW*(pZ(xELQfn
zzi$-Y`+4sBHB4!C|C8lD&-gDnxp>ZF8C~`K=h0#}4_;#a+iw3SF?;WYoR9vDr@zTg
z*pzes&olGY@6WE{PI!Eo`J7DyU&a2vZ}V?||MK&E{lCxtUw%gB{;6fCyS8~=>47T@
z_H8HErK-Pa+;<@C&yD2%+|ARsJ@Hx4!tdsEYtA~AMoC*vv!!MWYOXAnGxa!c|J?HY
zOzsPNzX{L(;qp5A_n%FR8se)xTOxIi9rfSxc<=k#_huibe*Bd0xIN{X+qb*Z`Ch0!
zes2H2lG|aj$iLhBBiFC5eVeVjK$m6aDV64k_4zfArLS_o+WWpXzdFCWwcc8<c%#^Y
z-+lU37rSpAeY*SYuGj0X_HdeapU|n2dSrM|NBjS}^1Z8fp7hN#G7$LZ*V5LMde?lB
z_T~@=_5>yEe?>xSYu0))W!3AepFHAqHGPwzkZIW6z5D)sUB5l1bX#3_j)cIZDP1+!
ztM@+=T`%z>lHu(8zqO{G>&|CfteV@(^kwVy<#u0JHm-I`7S1y27P#xN??AkN+`($j
zs?M8cvr6No7BF+>vd?v&r0no-Lc7h9pZC7+GrwfC=+2|n!tS;If8SqU6(6&w_32lg
z-3zv}tf}|D+b#McW`iArUz|g~P^ogJ-o_i*@xO0{J@Q`MBzy8*upYzr-G83y+jsEp
z5H+2CeR1I4^@c(d`l3$6ECH!w-cbEx|M~3`5A1u`m}gY5pIxTlK-fOj1H!Hw=V<3{
zRqGZq43oVb@R)7K_t^JU*BLWv`nK=;y7p}F{9jkh9v-Q;yM6PV*@QP`sk>ucljc1>
zc#GR)rwf18h2~k$?f?Hg%Wn6f(X3N+ZSNNQT;JZ?&2GGFHC~@~TK^=euy{w<jkzjU
zK77xwI4o_(bHm)^f-&cv{vVSWN_(?o7N;s6yINdef3A4mXPzegupHA3zpF33b&v?>
zlu)|e%eGW|^V&+4mBJ4uKaH~Zxw}=f*Ip~qq$hLdL*9Ig3H+yd*0Y5rpO`9mC%U4u
z`i+{{x2$7J*ndgIPMz#$wQ}b{-aLaFVT~d$8rG*h@U6Sk5%D$g03*k>&p#gb=Nm6f
z*?c7aq|vlZvHO`T>I_zM#+2{6JU8lo)%v~PvgH20a5ul5yZtW1kv0G8e!snL^Cpo$
z`(5Sv>TTWAyA>XD-f4Q4c&p@r`_!tx+;Jb8$}Sf9w&{lCT>up(2WxBetme-$d|t;H
z@_BBp`#Z&J=@V61qRq5Uq`uDx@oIh8$SxPLwRgJs{K-qtJmJiGEX=%D$V`v(tVhJO
zpl#e8+lm)BAOERSlY3BeS(9tY{2#aLCJ8**%B3c>hDl|^FTEs&s;TCCU;4iNoEd#Y
z$WD90^nIJFH*Sj2JgBy^kg0%wmMr)2=WJJ|@Bi~O^yZz2@{P<>Q;r=tKeznePTyU+
zS~tvPF7T?{{r*(1eCOJf6?L1M9xwZ_TU=m~O0j)ph#upqk9FU7$7>6|SjE8D?Rr1C
z-!^J)$5E!3$6g!;x{SMAP2MT7-|(;dG&%Q!mUY&{(z`0%Qxm%9Saf;+Z2s}B#O|H3
zX6x@K>h_h|KN~)7IBdw#{o+{8%?yR`TH*RzM>ge6a@d%$?XRa%r5bxcWU8?G<gRqL
zy;B)J<tqe!{Jd@Z-nCnIO%8eC&-JC|+N5<XdJFcy&CR!#m|f)fY4!Y1Q<iTr$#yu|
zx^nBY%KTZiTatH9PRS{|Q+T{g{B`3#w(?fh>FfBD8zUbc@<>tn_;@)(_M}5h)7~HC
zulo?@n4mWKtCRAi$D3}&bO&v-?hKtH^5XD;=f9&$9tpc%J|KHA@=-&NwAk_DZ2R5c
zAM`EVaQN?0!5a!9xhEK`Ilre()cHNFm#L_sZ+Fs!i)HF_3Y@ql{=Ii(m%S48P{qvW
z+YX<W>Kn&YJtqCplxI-#c%tv$?KfNP!tMpnmpyo!U)UYZlUn%3qo?`N#KPKFEdN(c
z*;H`n{-IBGMa*T_bQnUTx}R*;x4FP8qNdoGen(V6pIxx{kW_oxlcE(IQJila4tB|x
z-zj{n#5I}W#<sMm86pasV<tx&tYo&m?>$N9q%n8=gr2>ga~|4mbjY}Im8YptY_-Jq
zOSO|%oT~U?m(;Lh;<13Q!jCs_OsieknAh;*w#Jc$J1tXRzWS8pc%fycPr_1H$+XtJ
z_f$AkHVZGAH9!B~&GU8JzSia*-uP-t&Ey|jKeO$smM=Ua$Q<VK(M(V}?f8$@xSnIr
zXP&Ql=An1z{f?{`7mXypx9ZnCtbcak#sZ7UncwVYDCNcRe-U-)o4r(&zqRe*1LeLV
z`8{kF7prHAFiO0>&&rhD@2+?H`MRpBtGCH+bUHcngn{V0qp^>w^{g(`|GGSXn^2?9
zh3)(QzSZTZm2Z3evA_P$f_tV1RbHRVDnFRbr}XdBozHXM*R0LUxFREA9r>`~Zr_hL
z>GsukjaP7Umk2Ar4%?;F#T~}Y;42b!!A1S5->TE;r5~Tq|InhIv((@HsAu8k53${E
zOK0@#U=TZDqLw?&?$<$fd7FZrZ`$@1A3NiJEm~6Ur^7MxH$5p4V!P&7U4CMD?Bw$p
z%?tiwqN=lxCcevPxOTZU(qn;i0@K<ZA{9xuU+>UWZJ4_)DnW8z^m*s~kIkR$tMk|U
z&{w`EdCC8`n=i~f{_EzU<rn|uhB++%{V=apQ);S+kz0(=W2yaCQewAizE9lCdm^<W
zaK_^$zms#`)jYT67n|cEdM#(~fqexI><p2fbqO!DE?u~_Pm}G-=lTC@iVt$f6)ZSw
z5V_Il;mM^3!jstU99QDF%`n9!FnjKXNQS%VlXiHi8+z(>7cD)Xr6{eQ7ae1=Ui@{7
z(U+eFL60{@{M6A}_hy-)S%egKg6)hMtxUTPPR#Lm+VA)+-|qR69^RYTGY_6VqMB^+
zj!${1yh4C=pL9ZL>5EL&+=q*F7Hdv#`S|-pk+P_Sx!}|p#}4()emL)O-{c!@am!ZU
z{+OS4cgb#vs2k^`zMt5e)qCv{<J~9kcE8`pyn1!i{v{jKA}zk$x4M(|h$WaI;BoqH
z{`kAw-W6Y*S;BlLkelVKdsz3zht|o85#B3*zll1@J;6X=iX`(pp<3><r&c8Oz2iIR
zdvZ~l!VWdl`xnn0jdgTR+rzRpyU<ccNzZ>$_@A%+HBXd_`TfMXUi`?>bJgkOnddn#
zCE(F64wc8dA}qFD_SA3^7OQP<J;ASMAKqlns4L4Qs;aQuI4{a`dexV8j|`)0vI6!V
z%f9Oz7A7U{o1y+H?|<2h*Kyx>y{+AIt}pbpl1ji-pHK6C#)R*ABzk>G;=}jRH)e*n
zE#Gz^RkI<FVQZMjRUftcOMc6qTA#L?*<ods0)Ius4p3t|dy`5?OVg?y_8DgLxj8;u
z)3DlSvwflVO4+M+R%;j8{yBF)$lYp9a{jLDMAHjxcSPqu?9H!veBkW{jcZQLyXN0L
z@@~4$i{8AF#jmQRvfEf01$G&J_PS;5UsrotsbNm*Qh(2l`r?Yyc3iP3mJeGje%E`G
zy5kB*)!e5}YfKwT@=KyiUe6M{`HE5LjmwVg|I;@x)tJcr4`(+>(z^P6Qk>mn*QQs=
zE8jY3br|^F5nsm>;66ESTj#l?%?&#(CpmH6Q_a0L>&7N)&YY)un>;+H9QA&@LGMS^
zBJ(M!l26<wKl!~N>-o_`YpgFYhClVvu($ttwdTPjP7l>xErHul?378cvn@FKowa3F
zo%U*91<R(B{5N*-{`XySh=uhulcB>rlOLbznjCK1WQqLmKl&o-PtdwEx*I<GFgv_U
zOfNZqhVk{DONz|TF6%kpoq5S^{^fH^kMCJD`HfA?qC&BAK@4|YF4Y~c+B}1C)0BOU
zo-ZRN@vr%BpvI=~;m%E^pHJ>g&E!>)%>OlSij(I5=goHSE6-=!J6G<zU{b#C#{~!d
z-6s!CTl-b$&Bv#2g)T?kDU*n)j?pMK>e<Lt6m};`r?)|-t?u2sqprRJa^JG$N*?cG
zy8G=^mHT;~3l?AU#Lg=$4KuuLY;O|k@KHPTS_=CEvjX;OQg@H9(wght=)6WG?p}4#
z%7~qnUuC|}bP7x|*x?pgBPq|MV|c{mmF3>)!PbXfFlflzt#!O3Z?DJjefyco=Ue=q
z97~Z~l-|Gf`Jo$)X}sS`S$AEXdr6PM$(i@{Y9@z2k*|)ZIR7}fLjU&8wL<4*kEu23
z?ORpyWr_F%2BGdLPddMg+GiBI-!NHaS3GZ>{4(|xtcMwAwSRO-_3B!CG=2MmjB}@h
z?T!RSwf>O)zHEwcM8+c}yB{0Ume#o+o>IBR`<#`*-A(hZ-O1$rczwyE@T@PAulpJ`
zZ`-PSa0wV!T-<QtMdtH>+lQD`-7e3wy0pW0RRzm#)5t@WtX~tod)ytSf3Y^W)^yUg
zcdl!$&grBn9@BRVF|i$6$QiRdF!|lSro3x5C5!KtPh(5I+i;F6kpI}M*9VRIu6llI
z+ps^ysz_c*O;|wuaB*JQ=Gj|o{#Y=-v=qCuHm`Iwqw|BfCu<HEWq$l>qi>?Xny4AW
z?54D{@V4V}7yh*oVf-g^3%Yl?A74^8EiYpZYwGf=f{N?S?`=LaW1aK`*5HMwRjji1
zs~mV)7W&O%qxOwux=ML|k*7QA4!>Jd)-df}kLR8v@=x3So9E5h?ybD-q0^N6uDADI
z<=U}VjCGdZLCzh$n#u=K6L%WFdltA>FMLZ{Pfw|Y&ivllJ4_tR1rDrWP+;VC41T-q
z=r8FRB~yI+KYVx-(>;CPm!-{0al&bV73&l0c3)p;a#(u#^hI0#y4h?tZkOi}+8KRw
zZo{M*xgqjm$;suu&DY+r^>{HwC+mOkDeyYIXvsRW5B!Z2W<2`$egFS6F^6_C-+ka^
z_}oTz&i0clSSRI9J<8dk$+$bO)lqoK(gR91wpD9FzRpxQfB5qAO11NcB2D6Jg1Ewc
zbN}QR_B@(m^6UIj`Q;+M+L_6TZ{G0SE!?2}LVV`$lUJ^>{k!RS)J}8F!6msB3znPo
zNl!Fxmhm_8ROrs_eC;e?x@zvm=Uz$=UkM*r7Ncjo|Klabot~Q;Sms>TnjPJ8dfk*+
z=Th=h;|`x-kO@tDBxnEg>?fwhU*055yU@aPN@Vr*py{V1C+ALcpLul0dC?ba4|Ve^
zPcszUX!NYwu))UDYQ}`CS(dsCb=IrnFHPQ2*=zK8-IMZ7^G{beNpJYP{%{$C*oJK5
z_L-gBi9K>ZPq?pJm70F=Ekj|U0OQt!y9&3>R^vagbnVx#;(RaI*rF%>Q`#cGHU31B
zXVQt<h-Za&xvw$H_tfiuz4nyzg2TQu(?2f0=${^w9Kp4&r`PAjj%kr?Rv*O{8=vIM
zEmWEPs(9f9bDay5FX`{j@htFI&E0eCXHuB`Yhn3IWu6?N#u}ENRJD!-Ex(t$aAQw+
zQ%GUZO6%_nM5eN=E|itvXVc!49FmtSo&8a4ogJHiZA|r<bB7r2vM*=VG!@9~>SVh7
zJ)nQ$)N|i_w#e6?duPD;Jxj@C|9O_=<PZBV%iVF^*K^SB_Z#C4&z?13DZjKNC`snv
zz9m<)j<uaWbzEJ@*<*^O;QY)v*Y0w2mW16F=X$TDp|xM;LDxRfE0>%CWTw3=wbyNt
zaX+*^?57du>t<IEGyb=3EL%DDx<9LByzOWG*5riBW&V>H*LLw9n-XRGiCHh<<_?W1
zOTG*L$gV%Qqtt&>pKCj7TmG!J^gM^@4IFz}KWy{(CDPFS#$>^E0fxv($pf2j=}c%j
z8f6(Ste`kWw$-o3HcLYMXz#_-s-Du1vzD)x4gbE;VTG&m6`_P@Yt+2l&rY7e|MGT(
z*y0EUG09uzrkr=CZ+$W48taR&Jz`uR&t@tzt>gHzv^jWk<gZ>fC#hR3Etwk)Hn2~Q
zIrVVTrDIe5y?5`O*c_ZZ^MQ!KPLatmmR&}>N@Fi=FiMsGz3H@m)QpYXx|(g5cPh*b
zy7Fwxvot-2?MA`IFSsW?o3`n!$$s{#H64r^{N=n~#l06OEZ(MdHz}AS@RYNx@s62}
z^FBV`c2hC<jN)^H&SmapCZhj0UVG}7%NJU=(8ScFYm4Q(%&IHtjvPhHpYA)u`obk*
zWza`gch&Morvv%CZhV-_*^$(^`7UEdZqUJoiOp@(riiV#yi_w&-{G;+tw7;`Q~Ewm
z7E_$oHr)RErKHf`(`xt66?HRy-FLrsaM$5OZH&7dRO^1c+x^~do7cqyoJpyU&hZmp
zT$?v3C*UZH=ad-%Q-A&4`n6&;yGMh^T1&s#z3*%VI4>Sx*bx=4YyIZc;|&^jrhfPo
z*Kc=Ez~TGTxlI##X5Ujb%}i=Oyh&Y4e8Dv%m2HW~`%=71HLe+DL@cZQIFYqA)q3}>
zDR!L&e!K@4UHx_QnU-&oq#Nt4%LV%Bd?k<Omz1Rju6-v}C$b{bz{xR4{u<xDB_SzG
zEk0*PcBbwLG7|q;vexm%ubV55++q<qn(@f+`>eKasmmO;3;El8JmPq2?tHs<iD64y
zCv5FG9at^4xAF0>Bl&kqug99JrrW(!J#oDG`2M(+zTa0^UO%P$&YPv&)Y`(__|CpQ
zuFDG~)A>q5MEC0-Z@8Z4>@HXN<iLLQck8!4`{VO3twf}0VXFV<^-q&*ryf^T6WVg!
z$RI{xcg3Ha18Y-X8l_wO%lxurzUBo+F4rky*}tl;=uP?bqo|5w&-TWImEp{5u3s*3
z+4v&o%)MFb>k8lgxUQo$t*1~TeVv5j<L<PS&fpghYZl*%wfKC_;%2Cr+>4B*H3?G^
zc~6L@Jo&JOXI4wwB+IwzA0BPH^=ZQ`p+9Ur1v=}mUd)<l@@(_2p5?bLT-X_Pf9m6?
zXLbRt8Bt+#SfU>*g;{*!<Ci=?YwishPk~v+{Kb1OIG77qI<=&(c<92rOjo~^Npee`
zz4Na^-|mndORU8%%$6`&;=DsiWYN@{Z#Wkf>MdY-*Ry$t+Ls0UCq5H8_;N!*yY-!g
zZ+BeHeHC(U^4u-k%9j=GV>#M$HR^e8n#1{%8Ju1n6<Ia5%~=M)yHBLwF1ww3d;Z<J
z=c(#Z^~`me8S8`^eoM10{C@WM{)m?o7&cw0a??6?;nDirk~^|%Z0yg5{q0qmb!<i2
zJI?o$EM~R6OFgZ#w&~H+Ek29GFYZ1pZq{-4%li%2nYAA5Vt;*b>coSW)81`3%$MD*
zXJB~u-PVT*#+>gK?%2Nl+2eSf4MDpkCZ0LGfp33Ca;q?(*@EbxY0F*u=g&K{Vs4p?
z{CshSE00<{I__4lWqf(2f$zww56mS8KDTmij!63ykmd4kdX(Eg*}u(>LT5hgEuGm_
zzy5WwrN+B_Bb)25j*EDF{H(jvo73U&-0N=tX8&cfnA>!Enqp@3+GS<W=a%RF&A96I
ztmk9bX$fc1iPLu)9Pg19FfJ4F<e65vHQK=CT{@rY_dV_!ZZjO6bW|?ziU>?+p33$6
zUFI=C21b+D3^QJ=EIDbk!>>#(y-?C)-_C=lm6WAtgk3&+_Q`VX1t%o5)+h*WSj_Zw
zMc=WdzZ-KqU$0mzFHo!)I&)@lUS{|r+c$~VkDXng>ON2M!rHv64uP_-KEL%oc3iLZ
z+f$_%Y;)xP?3LQO$Z?zXk>i)eH;5NpJsz>;RcHH|*xva$d|T#f|4y7%7`oEutnv(o
z8a92e;O`53qdXSeUdGb$@qNS%8MmX?pLrF8|9B|IQhz())+QY`Z9C?}vMbj*Z&~X0
zb;Xl)-_NTXhXu|Rin=Nx{dny@^EHujJ^a5W8VUO9lpTI$@uR_Iwpm=`t{kn{U_Rr=
zzx7ov{5*eP_CH-Mzf+DJp{zgHrfhYvKb7+7{)Om|7a6vyT?{reFb$~L|8Ld)Gl920
z8`?ZtcuQ8XUGTSO9Pg1k^6OSa1$Q66qsAXJFT?4~^c3Y!iHVKt&bxm(8&=-yx^2_r
z`_Bs+q!!pe66}|VV16t8E8A$n<X8#!V_pY~crP4vT@hEDG5y$)D-EWRxe3Q#=%`$X
z%$VxXaPGd+yH$*<mbq+vv2~_t_o6ups<D4fXU?COwt?Zf!oykDH{D>!<73?SO~dyc
zS2#Q01{-e961%M)wzWqKUzj>PU(fm?rhirn|ANyuD)%-lOEZ7De(k|isSm7F!~=^z
zZVqgIHX%Ol^c<DUBN1Jf#l&7@3bDMn)pM00R`?&o+%wyrd4Kxe@qO}|8KI9)on<IF
zDqbkh@g?i-^*I?$N-hpp&L}Ta74>Jj-pX3F>Km((sf}58&5MQYd=2Z0_KUqpt=aG<
z_gNZeiS9F|5}}5uYibVjm#mKBEV+4+m$8KP(qyLCM=Ql%WUOZRyJWK8t3Yek1=lx)
z-<XogIJY1tv2mW#^(%bWcIed{W{7nNS)D%3O)fnBV*KJpy;6Dk*!+1>JQ-Oc*Ihl6
z&)O_gW>_$}S4ZcylhSNCEsLvC3#1<~%uQQlCNZ_e!$8aXR{UhfyOs+a>^Ob3^%uo@
zR+P+|A+=%AjKjNDWJg3Sc>YHw>}r<WcQ5&`46&zMZZSote_bHXJEi*T^%RHc{T{b1
zIOos0?)7h_^NEl8Tc10;WER%#Y3ghBmB>`&d9miz$K&$xtR;J;CL4TG&0cj;muu6#
zl&e2}{|&9b={lEv*4&M4dzrl6Y5DH9Ou3oe?_BPeqj@<lVy*X%H<zR&Y#e%0gs*EQ
zyKQ=TM&b72EBc*zDcf${yc)Jb1~lCCQNpsI-j}T;r@it(^JR^z46)mj9W$0(@?brv
zTX$zsVBPV4^X+91jsCt@6877)YTLV#Jcsng(x*%&^3y83131oPnVZhP^4Oo*OH}iY
zH1{Iycjq^DnZ9!7_WVD+W7m)J+O-*bZ#%u4^K3Cg{AvZ;cRL=lJ=$X;mRh59{93lV
zHBXe9SoF_1*FQXcnRw}vl%9s7xX_C*jeDk1$?<x(o+VB?8?<Vs+y9BLKfQOmI(r(^
z#y28CXLo$xGymY4jc$Lx#xKY+6P>wE$SQ1yNPOivjSovjHrW*J(mpE}I{Uzji99aL
zP77QMnq17h>)-?F>Ccas%QtkCEizv|>9X??&eRt!t><(qYyKpPEc`C;AfsKu_Uv?Z
zYme|7d-(Ud_OdM8|Id0`%@c|4De9B1OQkQL9<b!o;r5pk{`g&;-ukF?%9<O-&;1wH
zT~%KoSN!FX0ds`hZe{UJ6CA#~em#5I;i9Vd>EMMQ4+qvQnYHcsm5HBjMyBj;obNZW
zn<-H1*8S37n|eR){`h5O$jOGXyYpnWoz{P2`m%K0@~#C2@v02LEqQk$qthq8+xz|A
z+c~!?9~VE5a9zh9ZuI$-zWD5!<<`2dX6oGD_%2K0&4HR<d(6LYQ+sF;zToz*FNG(s
zE%%=8sPTLKnlhj5vo23b`Y6a*os-G7F1l_i<JJXl`1osIIN!EVm{s!seWRVEZ4TcA
z=2H)UTz$Chbkdh8PPc4DU0-jXr!3o8rn+ES>qRl9#pP!VkMnGBW{tk>SefV0@3WaF
zEvx#&OQi{Yfp;$p@iZ}}T=nhPr&kfao25I^FtNDE#mesb`|1t-=HHGP{@Xe$+Ir<j
z<7H)8rc8JE6Sl2xdnom+?Zt&?XEg@f|8^-%mdB&`S!QH1E6YbrEDkHpZROf;_55|p
z=_l*oZaS^EmjC?&^;I6xPYoEPW3JtO?aT6g>9+F6X?uch%qzTK|Nn3MGed^>9+%0B
zxYtkCIT925><nX$X{A8%ly&>W`k&m7T0e99lH$0)J6u<ibj8pA_iohTGgH6HSSF{f
zJI(g{o#KK^lJ-T$vKuX4sxFq4wOs92Y?L+UbftlQPxtfU_(N;1tnxoAxZU`pq~!@G
zX4~Z&Dgkp+Qn{=yH}iJ~-_G@JGc}p&YJ7I`xoK1F!zYSvKiH?UIP!DV#)y<l@02gB
z;cY2>&UMv-H}Kw>#=O$!OVUk}*1Co+=UB1LpgDZ@4?*!4XJ^#i=j(kt*<JhnBA%d6
z(i#V)wjZ7oE#bku-ktA?$lZ#)6$)FF&s^EEIof=|>4H>g7uTy?)eNBuv)vZ{`6jh`
z-Zk-smUq1;+=)M|UsD<tV-VmFWqnVlqABR+*{GjvJ3VS@N*<d;)t>*8A<o#nW|sar
zqin}8?VX`7<|It{9kg3}hRz$GUtJ})6~14u*SDTKL+fUy`kAYJrq?HIQEtqsyz*>E
z9(TE^^(mEiuXZuqbxq5D5OKAc&uYc3QiT_uD)|E0yYpu1Fb1yS34Lm^?QrVuB|nr`
zrwY9A%W^z6|5><nw)M}G>hoj1R&lD76wW&Ax4QXoOoLwNoJksW-z>tKL_aJwH1&-6
z@hRh!XCG@yX37mAg_lMbieE+dZ~szL;?`2IHDrye<A&)y%L=ziul|=l=a^*v*S(ol
zN3K5ol_#;F`w*+>Le7`UZIN8l<~g0ynpXGB|M{QAMu{d}*JE>|m$iEqSDJQgy_Po9
zQP1^!m-^0U7g}z08*YrAc2cR^&EeF6mr4oOIb3bt6ldhj`nm1KvZIsEvc7PbDcst4
zW@63DW3NtgT4YWSoOnBD^VtKx+BFRsw;fE0Wc4<kShHrAtW#zDlEWqDY_-PG-{$ZL
zS)Jc;LBe-Z{)1NrDesh(&l(+n>eIve<p~Ru-=>D5Kdfv0wrliHnJMOMCcfaelHc@h
zJ69vd7dh$f^0g(u>m|3Yd7@I0Q+r2Dc2NY=<cW-vCFiPmO!PIqF_F*uWrcCNH)F4!
z>OuL^M;5xf+9fxCxx|=v)Hp`Ws@UlDI+uozP68eGJ9gFZ_4r4%o^H7Q;=VZ33%#VO
zq~8%KQJhj;>&y-%2z*?jcQH|0+lrk##qb7~dM4}HAjvyCR?}B)Q-5F-S<G!~t(vW)
zw#roD*t-^CKZy|GpC=7elTWQU8NcDlni|XJ|9HQgvkba0Z?e~v55AvS4(?>UyUis(
zj%%~m_ZHXkDJM1_m$RNwS?2H5t)jrVG&X6{U02R<zK@?G--yhf7@h7o`Nl_q9=}Jz
zw*_}D<4oBpf7owh@>kVnmNiD}3~LTL#K>4P>HaYht}bTYrLW~Z>xeIx7K{F_Wm!&H
z6T*v>gSclUN57ahH#hk9osM66xe`i09!<|vt893)lv(JM!+h7Tzi+V!28J8W6g%hT
zs`so*FgfRh@tZq040fDoI=ro5{<58?^JYGO@a)H1mZg&zg}ff~<wOZ``)m$~_*xh@
zU&>?4B8Qbc6E@X8x;0()%`vVS<#!5?Gw#^5S&F;$MbEh%*RK3o`O|0R9B0{MsYjTq
z=BMv|yG{BG`}VS1oO^dN-DOyz+*|*~rP*^)weHg7tC0fN61RRX@#zrSAoC^5VQSIE
zjp>JSm)<m*elgvgPt53Mj@S;@^KVb=kO&qFbG~r(Sfgq9=}5VQkN$66r}B8}F2;iz
zwU@UXuRpzqJxXQX;qZ@JP1nBis*i7xzu)(#Y>Mc~gL^J6eQx?E@K*)5g793cFzd5M
zT>B&Z-gzE7cqr^z#+yE?S1ZDP&vy{a*vcAna_W<~8w-VB%TzpQyxMm9Z*AKORng)z
zO4)l;?=|x}?0&oLwnD)|>veaR*~B*~uS~O>d+PtY+CAd?rfu!e&ngOx7xSLie|-Dj
zW1D`jblAO2DNDH_rm?8&YVX_|E6$`R98-CD>T~4VT})rswVG~Ym#_Jdkp97nb4QeS
zj@%Q!w7DDSSzoB-boBTUR5E!>{@dxvohLmXXInqf-j(^?$@bO1uCt!itn0WQlzYGW
z6CH56w=;G7>jj6ip0ahnSodM4q4(16wpDNYm^W~%NG|GO{;=|UY5%2N#es)z>{!2O
zMpSm*#wn~{GE&!^(OIYa+VDqH&^nhad)CISp3==(8;W1eJXn;#AV0<0#YtfQWq+P#
zJJrQ8lU^)fb^2&|H+Zj7U&Z2|ZyEafSXJ10mM`yWJs&Dj>X&hhk$dWqsIJRXm3A#Z
zn*Z>~r)+N}zg2Uc-v_fT+*~5Lea&^37e^;OieA@xP<pzpp=U_Z)0c<jzFgLsdjI$(
zW=r2M7VbH_x33M^n{rFEdPc$RnwRf%KC$XJED4VKqJ7NMr-e)Ax%G$JvNLBcn!e?~
z+xt^x&whOB*(lRtDRW&RLd0Og?;YQx{M+L^7A!YUwkUn85hnaX+M&*PW&W)2ve|d1
z{5R{Us=6N3cF$d{fA!?wPY-C=Dv3+JDP(6U{rTqDbca*v>t)$_IHoIo_liuunX3EP
z*X}^8<nc|>BA*m0*Q#97d0ly(_XXR;40W;g^?siXPD#yh__vrPI7aBf;>8E}HiSKF
z{`;l=tx=fuscO}3XQfm2z6{U*zH!pIwdf?<rqd_w+N*l){Z1>j8=PeNDz&@$_q5pS
zJ*?Zj66CWC7?(1XEp6$qe)UN7;B;Au%SNX^-q|#7@?opZlQ!~v;k_%oXtC<`e-4W@
zZm(e6s+_>(vnj#z^BkTXEhlzJy_kBSuTVi@bMusU0(1M;%wwpybku}5IZ@nYzSr*G
zJ&BGD9U_sg*EJ$@&RPf@o;oe2hxN=m-ESA~tVmhAXZvH}oep<5F055#?8wi{OTRle
zF}5-9!^I{wjhmid=idAkQTRyVP5b_%QOsL{Uwd6sSe~*WA(EqMwr5NH;UuQm<6kB_
zrcR3twPr74pL|5?tkjodJQXUD3@2}V$QNGyW97;Vb-&+k&r-_0^Im_S<NjkOUgT%S
z8TrpsR#$8|dNjS}h#9*~fy3_KPj`y7%{`D*VDG5gY@Y4Bah=0?asPhTX6I?MlQ)-U
z8x^Ro?=DVtZFhaXMtZ)@wD#GIf3~_xY1f8rdX)1p)!WQbx*@WG;o0V%+jaS>6Q-`c
z^(Kc)>4@=+DLV?Drhlx~yJvEP`+dIN4&6fIo|SUlhu^*b9rfW#_e_8L(pO0rYS`=h
z_1Z3e_+R;^?KR(*h@O%!Mhi}@U-Qdolevb&o>}_G#a#uD#UH=^`j1s_=lPo+iOCV$
zf_g;$i@u1|+GFF{c-hWrf9Y&}-s+0J?@p6fZJRV>;x_+_9^0R8+vvINW?xwGt`lCL
zlgyvwr^~%rHEZuWk({czM+DD$A5-&93zb+{xw!k_b^qY5-4`6rmz}9P{`Q1JiZ+|n
zoSzm~LjQ#2xvk++NMOFkRQ7#qEO*`J=mZs(3k+w|`4t`|E#+u@$|@u_zvQ6Pv-_{*
zv^A`sOmMyxbI9)Zo6Ag6+jT6;_nXweGW+VXG|5?q$y(+D%a1SLvKa$(cAl5qP-Wk4
z*>iZsk-NoZQyz8gFj{I7E$iUheP}7euI~N13$&Q!qGhx$x?EFHE((~^$9S{IIPh@O
z;zDud>56wNze@I`-Lw6<Q0L{o-|ucW&1==wjaqrw-@Qd9dq#0p(fho~e|F^ZG`cPj
zxp+ui<2R3(0iTr~qu7#X1zQr6-Sq_icDi=Ou8g&@6Q92H%$BxqTaC{I`)@IDD8KpX
z(9Nm^Lar|+|0=j6adHs@-;Wf3#wBg7A7ZZAICSMCN_aHH8Cqobe@l6*8eF5_xVriV
zuNRkrp0vZy=v^$<(ZZWuwk&(f-ljRXseNZ?>PP1680pzR=9Ou@muwEsYF4|u_A2vl
zznd@LM19UP-xT2BpBcWRv|0DZ>?u`Vrh!(iuElGA3;!>^6U&tF?N;{s#;r#kdbk%w
z)VxnG(oGAQJ8A0|^)I$_pTBHie&x5v^t$5*OTF&N#?6y*kNLdpGJ9w7^ozjmz&mc=
z1^@iEnJgvEt>QbeCVb-!gNZkmF;%g-T@Z@*6HpGdIeB1*_^-8dc0`EU7<e1^ozG8-
zcq|g~c-i%;$doT1#ro&6{kUGms6AbIBjelu7WdD;K2dU1{-&>Sn4NrBj!^eI>q+n2
ztW(}(S(oeTUI_NlPMy0p)@t{*y{y?QQcLcLHJ|C#WRA;g;aWOXZDRbX_w%+)e_+%2
zZjy!fdzYX)Q>#MT{(t-H6xybKJ;ua(^?uGd*)>`3Gp{?bOsV@<_41E6-wVdZFV<^4
zo6Ki!FMREs`~5}-OR$>E{ByDk4tTejCWuG8GBi4K>BCvge@Zq%du(~vo?g>1PwC4R
z9>s8pXTN^VeGnOchNWws|Js!qvsO<HUL5>W*-zs6?%#POrYU<0&r0ZRJY98CO-$&;
zMvik$&v&^vFZ5uJ2$#|k_HBzw^T>U8a>6E!l<iDME;OpmxOd2Mdg?Sj2?NuGnlIz7
zRZTm$rt#h8(=(QFioA~bb>p76y}pLO(YenNachLQ=Qrmb6=}M~a*l^hs$wfs*}l44
z9J}n5x894rC?@uT(Yi9H#%6(lNRfA2qio05N|VgMTb^>ISGyl}Zj@A8f93p(rSsaX
zEPpCC8cw{i<<IFzM%$3JcAACT56$|q*GFY);=axATl<bw@^jbd3bij$71+KzvA%7m
z!n4y(o{TkRO!ss5vKDajerX7I5Z=Wt@pu2}mr@=I{^k+QXR_9pL@{qDOf}%%cuTG(
z=Bl^m95${4Gp9{act3a2p-kqx5gflR&EQ~e-X8Wa{+Z^V=x4&$_g*^c#9V#My=|Jv
z!sw+(D+Dg9Zc}nE*mXy##3{OBf%{&Lii<NoXr>B#Kai~G+PUQU(pllf#YwF@Or@qX
zxokfeq_g+g^viP$zIO#=-Et^j82a;m>NGQ-Y42OE%cmY~@14jp^-SKo_Q(8=0#etV
zx(<3Q*stOGjGO-p&lU%^|FL1aH4?U|p62q}uq38u_rbps;xjzIaM)aqU2t2if2q;U
z%d3o-8X_6)mTxO~s(k8?<ImU~IW^B_eR4Sebkh<)lbeahjwH9smPr_x-*L1)QFU^<
zxy}XVvMtFsXS3co>A+i2;}Mn~G=D<<%vIB`Y(D1n(_WI%tvTTIlI5FksJu7TUC_?X
zo~P4g<ghNni&1#B%c<9jpDjDZj{UYcZOYR4@nPLk4dqKQzvc$ICw+5}FAg$fT(K&k
zy-;81#}zr*;3%heo--t_ESn~k!tpNpRhW-fn0t|1IAbBh>a=hP@gI|!?v{U!ayZ1W
z&~@wWXWOQ}-F7&HQ|_pg3g^S?dzb(Bf7Df3zjaCC)>G`>e#>mmMx+HZ*@_1w2Jc!L
zt5y><UmznO>0@G~YP7xsqvF=YRWH3ShW_GxG+%Ya4jz`cIBt)L&&r<ev|Ija6+`Uv
z2kS!2`|q@iK8riasZsN-@~zR*AKHH`<zp0$x9fE~=P7bK@)&0(hbA|@(ltJ3$3CxC
z>f{!=7p<w=L%n^Me>%Z@W64+bo${^;euo|Ur;4O;DZEj$sLJuX^i=i0q&rETCj&CC
z?-csAJGrGnGGL>GPT{4mIveLSvMNnp`7PKvEUF}yH$`*%{kq>}j;*X-Yl7Yx-J7iK
zXl$HQuIR$8{nS=HQ*iEO!&Ab0B^iX8TtA(h&h&y&K5oX-Ug^noU6x$6cO3bhCzfoN
zx|nF~tg~^6P{6E$(skQDe)9ORPVVmW&ockC8yMEK3NcQ5{&VkrP3<RnCs+$u_n19+
z_iYmYi5KVJT2<7Gm0l@l|0feQZP)oiZ{Ee9r)T<FE<FFl`lOhC)&1GL++TR?v=_b9
zc$qWvsqw<cC9g{Pb1%H%iCh2wm&L1H5tWnu<z{z^b7Z|x=43vR>To;NA*^aj9><2_
z{0s9%`j<|*>1U!RJf-i@%4erOK3=_7tm7@)uh40yf=-(HTye87OmSx3%QN9Z=cON<
zQjAAjJlc9D{hU=X^-a^+Eeoe*to#~YcuG*Oko|~28RtnM>vM9|Z!Xq}2}P`#aPQ5U
za=$Bl%}3ULFb`Sz{*Y_Yxk+yf92i&LW%)c&_Q>AsCi?|bn@?DLyc)P&JF+mxZ>I0o
zhKrrjD`W2mG1!VLMnwLbYFx>(@-W}iO$?JtzsA_r`6dT`Uo7pZ#9C0>7C8C+-yc62
zlx`m4^+=t#s<!(a(??0S$dfARJ528dPuuj(`1VGf6(Wl{ly*m+lX>L&U&zz7r}60S
zi&7i(L@x+05lZ?P%Ul+{B{y7QMy{1=@=MlarpQybK6^**j!4Tpzr%FuyyKgeP3hw?
z>p7hz&b@Al-T~#H)w1gE;&uskb9=qk5eR2iST3MF&&|1Rzu@oV`l1)S_sbSHwra#m
z+HCS;S~F31=9+}1&paRR2|pmUI_7X(Nn_mOx`32r-=<`&{`P9#g)`T;H+U>eU2vLV
zwzr$G^MkE{{vV>P7ED(#F^myz%(Be<lRBqJ!|0#kulz}NZ<p*|Blmj6$%{4(@3u}d
zn|S`xj4RWmrR4Y38?A^@V63T}H=$KU@af)bx4y1n|J9?<aA0lr%!!BNPWNan2!7Vf
zbs{X<<L1AY`?Aj+jp4qaYY_S`P@U<q#oeae^Y52GW16bt|H5y}LWk+|_whX6pl5r*
zc%{P${iBKHFYRQky5lAv`fkJHo3ehAz3Cfw@fR_xW_8QYow9GP=JTpPY55HoGC~hl
zo&5XTj?dy+T-CL|0@Vx7PiRXy`o)VUSSXwAkIC~ddXK;8urRIhJd*NCFMHcJy$k&I
ze?Gi=FKlZq?sopv)qD4=CmDY}e);R_Gw(j#oO~>P+u>7Yq8E}cM#Oo!UiISKwJG&;
z<VJy->hqGROD|5<G-5EZJ(;~u@50xHm3uFhN#EG$wb*Y1LyXw{LlcZUt|*>zJE^;S
zZft&}|C8G#4*F}Y1wNNv_2)WtYT4X>x{ZsgBaQ@muHS875;u8T`PSv8KFlvP4wfiA
zo3wOyLCWqF#w}(h-x%V|+@IcG{q)P-tgSC?FPOhbowQTi;o%y&Jr$8r$6s-tE13}_
zqr=_3ucwN?JGgq)F_Q+q8=PvjZZg}?t2(M3*cLtYk^cFGix~^no%8BkDfhzr-L(@^
zTt1CbZ*RP9eR2PV*EJiveXL9+FPm??owYk`!SqY9>p#og({6u!%xn46*q7z*5yw7t
z`+9J?iBA9SaKEL2UF&5{qjW60x5cY9Ds@H+r!MKbR>Tx#6#cH}LAivzz21dJsnQjT
zI<<ns6GB6`yfHCcs<!`+4@2LlkcqBpbIxzP$#9Rk*2muNstuE=tEg7r4zr1D<x|@x
zK4sef#b(~zrB9PGQhlwPjLYY6X3OO6tY_YB6_L)zS+ev2YuCGj%gfFOZn|so@_Wsn
z?T^FW7&SffJpH$7L4H!a+zU}ZjjjN*W!o3G@J?6cIbm?OVtf29_m}p|-AvamKE3Mi
zx^s7nFO^RJH9;<RTJYq+YxWP`t(SWdy!nTN>I&fnUQ<_Z2<KfAuQE~mqQrJ}ZpE{I
zch_G0Twnj4Atk4;!ojdpVW!NC(@m;DtGoXB<ZQU9TgLyTG>YK>e}~uOElqA;HSZiN
zTkyprcBg?AJ9paOXqyEkef{gTXZ&leDYoVa%P?k}XnVmtAYu14ckre!#O^ToJ}`vP
nuhr~0HjIJykS*QyPyVoT(D}+&Pudt57#KWV{an^LB{Ts5KvRLN

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/index.md b/tensorflow/contrib/lite/g3doc/tfmobile/index.md
index d003bb2f38..49ad35d4e6 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/index.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/index.md
@@ -4,7 +4,7 @@
 TensorFlow was designed to be a good deep learning solution for mobile
 platforms. Currently we have two solutions for deploying machine learning
 applications on mobile and embedded devices: TensorFlow for Mobile and
-<a href="../index.md">TensorFlow Lite</a>.
+<a href="../../lite">TensorFlow Lite</a>.
 
 ## TensorFlow Lite versus TensorFlow Mobile
 
-- 
GitLab


From 62e41201e291b241bfad0b902ab6aa785ee06059 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 19 Sep 2018 21:23:29 -0700
Subject: [PATCH 0422/1357] Internal change.

PiperOrigin-RevId: 213749129
---
 tensorflow/core/platform/default/cord.h | 5 +----
 tensorflow/core/platform/file_system.h  | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/platform/default/cord.h b/tensorflow/core/platform/default/cord.h
index 1ab682182c..5823374d1a 100644
--- a/tensorflow/core/platform/default/cord.h
+++ b/tensorflow/core/platform/default/cord.h
@@ -16,9 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_
 #define TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_
 
-class Cord;
-namespace absl {
-using ::Cord;
-}  // namespace absl
+// TODO(ebrevdo): Fill this in.
 
 #endif  // TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_
diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h
index 30059dc02e..156af6cdea 100644
--- a/tensorflow/core/platform/file_system.h
+++ b/tensorflow/core/platform/file_system.h
@@ -255,10 +255,13 @@ class WritableFile {
   /// \brief Append 'data' to the file.
   virtual Status Append(StringPiece data) = 0;
 
+  // TODO(ebrevdo): Remove this ifdef when absl is updated.
+#if defined(PLATFORM_GOOGLE)
   // \brief Append 'data' to the file.
   virtual Status Append(const absl::Cord& cord) {
     return errors::Unimplemented("Append(absl::Cord) is not implemented");
   }
+#endif
 
   /// \brief Close the file.
   ///
-- 
GitLab


From d5d8a1bd06751b3ad166380a0a0ca00a3412145b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 20 Sep 2018 12:49:22 +0800
Subject: [PATCH 0423/1357] CLN: revise comment

---
 tensorflow/python/framework/test_util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index b34330aa2a..2e3ec149f5 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1340,7 +1340,7 @@ class TensorFlowTestCase(googletest.TestCase):
 
     msgs = [msg]
     if not np.allclose(a, b, rtol=rtol, atol=atol):
-      # Add more details than np.testing.assert_allclose.
+      # Adds more details to np.testing.assert_allclose.
       #
       # NOTE: numpy.allclose (and numpy.testing.assert_allclose)
       # checks whether two arrays are element-wise equal within a
@@ -1551,7 +1551,7 @@ class TensorFlowTestCase(googletest.TestCase):
       same = np.logical_or(same, np.logical_and(np.isnan(a), np.isnan(b)))
     msgs = [msg]
     if not np.all(same):
-      # Add more details than np.testing.assert_array_equal.
+      # Adds more details to np.testing.assert_array_equal.
       diff = np.logical_not(same)
       if a.ndim:
         x = a[np.where(diff)]
-- 
GitLab


From 01bf92f9ee303aa2b34312a90bc6af6960b08830 Mon Sep 17 00:00:00 2001
From: Cheng CHEN <cncng@microsoft.com>
Date: Thu, 20 Sep 2018 13:02:09 +0800
Subject: [PATCH 0424/1357] Fix typo error in grapper remapper optimizer.

---
 tensorflow/core/grappler/optimizers/remapper.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc
index 03e36a7b9c..008a289cfd 100644
--- a/tensorflow/core/grappler/optimizers/remapper.cc
+++ b/tensorflow/core/grappler/optimizers/remapper.cc
@@ -218,7 +218,7 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item,
 void Remapper::Feedback(Cluster* /*cluster*/, const GrapplerItem& /*item*/,
                         const GraphDef& /*optimized_graph*/,
                         double /*result*/) {
-  // Nothing to do for ArithmeticOptimizer.
+  // Nothing to do for RemapperOptimizer.
 }
 
 }  // namespace grappler
-- 
GitLab


From fcfc5ad738b1521aa70aaad323079eb72493dcad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Sep 2018 22:30:06 -0700
Subject: [PATCH 0425/1357] Speeds up _random_flip for batched images.

PiperOrigin-RevId: 213753728
---
 tensorflow/python/ops/image_ops_impl.py | 40 ++++++++++++-------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index de260f3140..325418d5f7 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -29,7 +29,6 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gen_image_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
@@ -301,21 +300,21 @@ def random_flip_left_right(image, seed=None):
 
 def _random_flip(image, flip_index, seed, scope_name):
   """Randomly (50% chance) flip an image along axis `flip_index`.
-    Args:
-      image: 4-D Tensor of shape `[batch, height, width, channels]` or
-             3-D Tensor of shape `[height, width, channels]`.
-      flip_index: The dimension along which to flip the image.
-                  Vertical: 0, Horizontal: 1
-      seed: A Python integer. Used to create a random seed. See
-        `tf.set_random_seed`
-        for behavior.
-      scope_name: Name of the scope in which the ops are added.
 
-    Returns:
-      A tensor of the same type and shape as `image`.
+  Args:
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
+    flip_index: Dimension along which to flip image. Vertical: 0, Horizontal: 1
+    seed: A Python integer. Used to create a random seed. See
+      `tf.set_random_seed`
+      for behavior.
+    scope_name: Name of the scope in which the ops are added.
 
-    Raises:
-      ValueError: if the shape of `image` not supported.
+  Returns:
+    A tensor of the same type and shape as `image`.
+
+  Raises:
+    ValueError: if the shape of `image` not supported.
   """
   with ops.name_scope(None, scope_name, [image]) as scope:
     image = ops.convert_to_tensor(image, name='image')
@@ -334,15 +333,16 @@ def _random_flip(image, flip_index, seed, scope_name):
         result = result[0]  # TODO(b/111124878) remove this logic (CondV2).
       return fix_image_flip_shape(image, result)
     elif shape.ndims == 4:
+      batch_size = array_ops.shape(image)[0]
       uniform_random = random_ops.random_uniform(
-          [array_ops.shape(image)[0]], 0, 1.0, seed=seed
+          [batch_size], 0, 1.0, seed=seed
       )
-      mirror_cond = math_ops.less(uniform_random, .5)
-      return array_ops.where(
-          mirror_cond,
-          image,
-          functional_ops.map_fn(lambda x: array_ops.reverse(x, [flip_index]), image, dtype=image.dtype)
+      flips = math_ops.round(
+          array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])
       )
+      flips = math_ops.cast(flips, image.dtype)
+      flipped_input = array_ops.reverse(image, [flip_index + 1])
+      return flips * flipped_input + (1 - flips) * image
     else:
       raise ValueError('\'image\' must have either 3 or 4 dimensions.')
 
-- 
GitLab


From 9fd4798cc2a4b1cc20d5577f944c6423a8aaabef Mon Sep 17 00:00:00 2001
From: Koan-Sin Tan <koansin.tan@gmail.com>
Date: Thu, 20 Sep 2018 14:27:34 +0800
Subject: [PATCH 0426/1357] fix style

> clang-format -i --style Google tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
---
 .../contrib/lite/examples/label_image/bitmap_helpers_impl.h    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
index 7e09d4bc79..21ad39a6bf 100644
--- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
+++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
@@ -80,8 +80,7 @@ void resize(T* out, uint8_t* in, int image_height, int image_width,
   interpreter->Invoke();
 
   auto output = interpreter->typed_tensor<float>(2);
-  auto output_number_of_pixels =
-      wanted_height * wanted_width * wanted_channels;
+  auto output_number_of_pixels = wanted_height * wanted_width * wanted_channels;
 
   for (int i = 0; i < output_number_of_pixels; i++) {
     if (s->input_floating)
-- 
GitLab


From 2ea398b12ed18b6c51e09f363021c6aa306c5179 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 20 Sep 2018 00:22:51 -0700
Subject: [PATCH 0427/1357] Add feature_group_count parameter of Convolution op
 to xla_client.py.

This parameter has been added to HLO to support depthwise convolution.

PiperOrigin-RevId: 213761790
---
 .../xla/python/local_computation_builder.cc   |  6 +++--
 .../xla/python/local_computation_builder.h    |  3 ++-
 tensorflow/compiler/xla/python/xla_client.py  | 19 ++++++++++-----
 .../compiler/xla/python/xla_client_test.py    | 24 +++++++++++++++++++
 4 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index 9da5dc0d2d..cd5fd33029 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -469,9 +469,11 @@ LocalOp LocalComputationBuilder::ConvGeneralDilated(
     absl::Span<const int64> window_strides,
     absl::Span<const std::pair<int64, int64>> padding,
     absl::Span<const int64> lhs_dilation, absl::Span<const int64> rhs_dilation,
-    const ConvolutionDimensionNumbers& dimension_numbers) {
+    const ConvolutionDimensionNumbers& dimension_numbers,
+    int64 feature_group_count) {
   return xla::ConvGeneralDilated(lhs.op(), rhs.op(), window_strides, padding,
-                                 lhs_dilation, rhs_dilation, dimension_numbers);
+                                 lhs_dilation, rhs_dilation, dimension_numbers,
+                                 feature_group_count);
 }
 
 LocalOp LocalComputationBuilder::ConvertElementType(
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index 1d5dfe5911..2166bb6721 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -248,7 +248,8 @@ class LocalComputationBuilder {
       absl::Span<const std::pair<int64, int64> > padding,
       absl::Span<const int64> lhs_dilation,
       absl::Span<const int64> rhs_dilation,
-      const ConvolutionDimensionNumbers& dimension_numbers);
+      const ConvolutionDimensionNumbers& dimension_numbers,
+      int64 feature_group_count);
 
   LocalOp ConvertElementType(const LocalOp& operand,
                              PrimitiveType new_element_type);
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index fa4366ff07..bb303c5678 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -1109,7 +1109,7 @@ class ComputationBuilder(object):
       dimension_numbers = GetDotDimensionsFromLists(dimension_numbers)
     return self._client.DotGeneral(lhs, rhs, dimension_numbers)
 
-  def Conv(self, lhs, rhs, window_strides, padding):
+  def Conv(self, lhs, rhs, window_strides, padding, feature_group_count=1):
     """Enqueues a Conv operation onto the computation.
 
     Args:
@@ -1117,6 +1117,7 @@ class ComputationBuilder(object):
       rhs: LocalOp for the rank N+2 array of kernel weights.
       window_strides: length-N array-like of integer kernel strides.
       padding: PaddingType representing either 'SAME' or 'VALID' padding.
+      feature_group_count: number of feature groups for grouped convolution.
 
     Returns: a LocalOp representing the Conv operation.
     """
@@ -1125,10 +1126,11 @@ class ComputationBuilder(object):
         self.GetShape(rhs).dimensions()[2:], window_strides)
     dimension_numbers = self._GetConvDimensionNumbers(len(window_strides))
     return self._client.ConvGeneralDilated(lhs, rhs, window_strides, pads, (),
-                                           (), dimension_numbers)
+                                           (), dimension_numbers,
+                                           feature_group_count)
 
   def ConvWithGeneralPadding(self, lhs, rhs, window_strides, padding,
-                             lhs_dilation, rhs_dilation):
+                             lhs_dilation, rhs_dilation, feature_group_count=1):
     """Enqueues a ConvWithGeneralPadding operation onto the computation.
 
     Args:
@@ -1138,6 +1140,7 @@ class ComputationBuilder(object):
       padding: length-N array-like of pairs of integers of (low, high) padding.
       lhs_dilation: length-N array-like of dilation factors.
       rhs_dilation: length-N array-like of dilation factors.
+      feature_group_count: number of feature groups for grouped convolution.
 
     Returns:
       A ComputationdataHandle representing the added ConvWithGeneralPadding op.
@@ -1145,7 +1148,8 @@ class ComputationBuilder(object):
     dimension_numbers = self._GetConvDimensionNumbers(len(window_strides))
     return self._client.ConvGeneralDilated(lhs, rhs, window_strides, padding,
                                            lhs_dilation, rhs_dilation,
-                                           dimension_numbers)
+                                           dimension_numbers,
+                                           feature_group_count)
 
   def _GetConvDimensionNumbers(self, num_spatial_dims):
     """Create ConvolutionDimensionNumbers proto for convolutions."""
@@ -1163,7 +1167,8 @@ class ComputationBuilder(object):
     return dimension_numbers
 
   def ConvGeneralDilated(self, lhs, rhs, window_strides, padding, lhs_dilation,
-                         rhs_dilation, dimension_numbers):
+                         rhs_dilation, dimension_numbers,
+                         feature_group_count=1):
     """Enqueues a ConvGeneralDilated operation onto the computation.
 
     Args:
@@ -1190,6 +1195,7 @@ class ComputationBuilder(object):
         labels appear in the rhs_spec string, so that window_strides[0] is
         matched with the dimension corresponding to the first character
         appearing in rhs_spec that is not 'I' or 'O'.
+      feature_group_count: number of feature groups for grouped convolution.
 
     Returns: a LocalOp representing the ConvGenralDilated operation.
     """
@@ -1215,7 +1221,8 @@ class ComputationBuilder(object):
                  key=lambda i: rhs_spec.index(out_spec[i])))
     return self._client.ConvGeneralDilated(lhs, rhs, window_strides, padding,
                                            lhs_dilation, rhs_dilation,
-                                           dimension_numbers)
+                                           dimension_numbers,
+                                           feature_group_count)
 
   def Sort(self, operand, dimension=-1):
     """Enqueues a sort operation onto the computation."""
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index fd98e19457..82103f0313 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -661,6 +661,30 @@ class SingleOpTest(LocalComputationTest):
                          [40., 50., 0.]]]])
     self._ExecuteAndCompareClose(c, expected=np.transpose(result, (1, 3, 0, 2)))
 
+  def testConvGeneralDilatedGroupedConvolutionF32(self):
+    c = self._NewComputation()
+    a = lambda *dims: np.arange(np.prod(dims)).reshape(dims).astype("float32")
+    lhs = a(1, 2, 2, 3)
+    rhs = a(2, 1, 1, 2) * 10
+    strides = [1, 1]
+    pads = [(1, 0), (0, 1)]
+    lhs_dilation = (2, 1)
+    rhs_dilation = (1, 1)
+    dimension_numbers = ("NCHW", "OIHW", "NCHW")
+    feature_group_count = 2
+    c.ConvGeneralDilated(c.Constant(lhs), c.Constant(rhs),
+                         strides, pads, lhs_dilation, rhs_dilation,
+                         dimension_numbers, feature_group_count)
+    result = np.array([[[[0., 0., 0.],
+                         [10., 20., 0.],
+                         [0., 0., 0.],
+                         [40., 50., 0.]],
+                        [[0., 0., 0.],
+                         [330., 380., 160.],
+                         [0., 0., 0.],
+                         [480., 530., 220.]]]])
+    self._ExecuteAndCompareClose(c, expected=result)
+
   def testBooleanNot(self):
     c = self._NewComputation()
     arr = NumpyArrayBool([True, False, True])
-- 
GitLab


From 31c0857f6b5d79f4a7b16ee4af85f0bde8b5f5da Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 20 Sep 2018 00:41:30 -0700
Subject: [PATCH 0428/1357] Add AOT test case for XlaSort.

The only tensorflow op that uses XlaSort is nn.top_k, so we add a test case
using nn.top_k.

PiperOrigin-RevId: 213763591
---
 tensorflow/compiler/aot/tests/BUILD           | 15 +++++++++++
 .../compiler/aot/tests/make_test_graphs.py    |  8 ++++++
 .../aot/tests/test_graph_tftop_k.config.pbtxt | 13 ++++++++++
 .../compiler/aot/tests/tfcompile_test.cc      | 25 +++++++++++++++++++
 tensorflow/compiler/aot/tfcompile.bzl         |  1 +
 5 files changed, 62 insertions(+)
 create mode 100644 tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt

diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD
index 7a0932d44d..10fa33ab5e 100644
--- a/tensorflow/compiler/aot/tests/BUILD
+++ b/tensorflow/compiler/aot/tests/BUILD
@@ -25,6 +25,7 @@ test_suite(
         ":test_graph_tfmatmul_test",
         ":test_graph_tfmatmulandadd_test",
         ":test_graph_tfsplits_test",
+        ":test_graph_tftop_k_test",
         ":tfcompile_test",
     ],
 )
@@ -42,6 +43,7 @@ py_binary(
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:session",
         "//tensorflow/python:training",
@@ -66,6 +68,7 @@ genrule(
         "test_graph_tfmatmul.pb",
         "test_graph_tfmatmulandadd.pb",
         "test_graph_tfsplits.pb",
+        "test_graph_tftop_k.pb",
     ],
     # Set CUDA_VISIBLE_DEVICES='' to prevent the code we launch from using any
     # GPUs which might be present.  This is important because builds may run
@@ -208,6 +211,17 @@ tf_library(
     ],
 )
 
+tf_library(
+    name = "test_graph_tftop_k",
+    testonly = 1,
+    config = "test_graph_tftop_k.config.pbtxt",
+    cpp_class = "TopKComp",
+    graph = "test_graph_tftop_k.pb",
+    tags = [
+        "manual",
+    ],
+)
+
 tf_cc_test(
     name = "tfcompile_test",
     srcs = ["tfcompile_test.cc"],
@@ -226,6 +240,7 @@ tf_cc_test(
         ":test_graph_tfmatmulandadd",
         ":test_graph_tfmatmulandadd_with_profiling",
         ":test_graph_tfsplits",
+        ":test_graph_tftop_k",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:xla_data_proto",
diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py
index 9ec7df163b..de135d7a23 100644
--- a/tensorflow/compiler/aot/tests/make_test_graphs.py
+++ b/tensorflow/compiler/aot/tests/make_test_graphs.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import app
 from tensorflow.python.training import saver as saver_lib
@@ -142,6 +143,12 @@ def tfsplits(_):
   array_ops.identity(y, name='result')
 
 
+def tftop_k(_):
+  x = array_ops.placeholder(dtypes.int32, shape=[5], name='x')
+  output = nn_ops.top_k(x, 2, name='values')
+  array_ops.identity(output[1], name='indices')
+
+
 def write_graph(build_graph, out_dir):
   """Build a graph using build_graph and write it out."""
   g = ops.Graph()
@@ -163,6 +170,7 @@ def main(_):
   write_graph(tfmatmul, FLAGS.out_dir)
   write_graph(tfmatmulandadd, FLAGS.out_dir)
   write_graph(tfsplits, FLAGS.out_dir)
+  write_graph(tftop_k, FLAGS.out_dir)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt b/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt
new file mode 100644
index 0000000000..6b4ac2d7cb
--- /dev/null
+++ b/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt
@@ -0,0 +1,13 @@
+# Text form of tensorflow.tf2xla.Config proto.
+feed {
+  id { node_name: "x" }
+  shape {
+    dim { size: 5 }
+  }
+}
+fetch {
+  id { node_name: "values" }
+}
+fetch {
+  id { node_name: "indices" }
+}
diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc
index 7ac90fb8a9..f10852c785 100644
--- a/tensorflow/compiler/aot/tests/tfcompile_test.cc
+++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/aot/tests/test_graph_tfmatmulandadd.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tfmatmulandadd_with_profiling.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tfsplits.h"
+#include "tensorflow/compiler/aot/tests/test_graph_tftop_k.h"
 #include "tensorflow/compiler/xla/service/hlo_profile_printer.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
@@ -448,6 +449,30 @@ TEST(TFCompileTest, Splits) {
   EXPECT_NEAR(expected[3], fn.result0(1, 1), 1e4);
 }
 
+TEST(TFCompileTest, TopK) {
+  Eigen::ThreadPool tp(1);
+  Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
+
+  TopKComp fn;
+
+  fn.set_thread_pool(&device);
+  // x = [4, 1, 4, 4, 3]
+  fn.arg0(0) = 4;
+  fn.arg0(1) = 1;
+  fn.arg0(2) = 4;
+  fn.arg0(3) = 4;
+  fn.arg0(4) = 3;
+
+  EXPECT_TRUE(fn.Run());
+  EXPECT_EQ(fn.error_msg(), "");
+  const int32 expected_values[] = {4, 4};
+  const int32 expected_indices[] = {0, 2};
+  EXPECT_EQ(expected_values[0], fn.result0(0));
+  EXPECT_EQ(expected_values[1], fn.result0(1));
+  EXPECT_EQ(expected_indices[0], fn.result1(0));
+  EXPECT_EQ(expected_indices[1], fn.result1(1));
+}
+
 TEST(TFCompileTest, AssertEqAndReturnDiff) {
   // Assert is converted into a no-op in XLA, so there is no failure even if the
   // two args are different.
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 792b7fe14a..859c84bb91 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -273,6 +273,7 @@ def tf_library(
             "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_1d",
             "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_2d",
             "//tensorflow/compiler/xla/service/cpu:runtime_conv2d",
+            "//tensorflow/compiler/xla/service/cpu:runtime_key_value_sort",
             "//tensorflow/compiler/xla/service/cpu:runtime_matmul",
             "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_conv2d",
             "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_matmul",
-- 
GitLab


From da3357ecbdd6772413e8bbceeab8238971be11ce Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 00:53:31 -0700
Subject: [PATCH 0429/1357] Automated rollback of commit
 31c0857f6b5d79f4a7b16ee4af85f0bde8b5f5da

PiperOrigin-RevId: 213764810
---
 tensorflow/compiler/aot/tests/BUILD           | 15 -----------
 .../compiler/aot/tests/make_test_graphs.py    |  8 ------
 .../aot/tests/test_graph_tftop_k.config.pbtxt | 13 ----------
 .../compiler/aot/tests/tfcompile_test.cc      | 25 -------------------
 tensorflow/compiler/aot/tfcompile.bzl         |  1 -
 5 files changed, 62 deletions(-)
 delete mode 100644 tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt

diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD
index 10fa33ab5e..7a0932d44d 100644
--- a/tensorflow/compiler/aot/tests/BUILD
+++ b/tensorflow/compiler/aot/tests/BUILD
@@ -25,7 +25,6 @@ test_suite(
         ":test_graph_tfmatmul_test",
         ":test_graph_tfmatmulandadd_test",
         ":test_graph_tfsplits_test",
-        ":test_graph_tftop_k_test",
         ":tfcompile_test",
     ],
 )
@@ -43,7 +42,6 @@ py_binary(
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:session",
         "//tensorflow/python:training",
@@ -68,7 +66,6 @@ genrule(
         "test_graph_tfmatmul.pb",
         "test_graph_tfmatmulandadd.pb",
         "test_graph_tfsplits.pb",
-        "test_graph_tftop_k.pb",
     ],
     # Set CUDA_VISIBLE_DEVICES='' to prevent the code we launch from using any
     # GPUs which might be present.  This is important because builds may run
@@ -211,17 +208,6 @@ tf_library(
     ],
 )
 
-tf_library(
-    name = "test_graph_tftop_k",
-    testonly = 1,
-    config = "test_graph_tftop_k.config.pbtxt",
-    cpp_class = "TopKComp",
-    graph = "test_graph_tftop_k.pb",
-    tags = [
-        "manual",
-    ],
-)
-
 tf_cc_test(
     name = "tfcompile_test",
     srcs = ["tfcompile_test.cc"],
@@ -240,7 +226,6 @@ tf_cc_test(
         ":test_graph_tfmatmulandadd",
         ":test_graph_tfmatmulandadd_with_profiling",
         ":test_graph_tfsplits",
-        ":test_graph_tftop_k",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:xla_data_proto",
diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py
index de135d7a23..9ec7df163b 100644
--- a/tensorflow/compiler/aot/tests/make_test_graphs.py
+++ b/tensorflow/compiler/aot/tests/make_test_graphs.py
@@ -31,7 +31,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import app
 from tensorflow.python.training import saver as saver_lib
@@ -143,12 +142,6 @@ def tfsplits(_):
   array_ops.identity(y, name='result')
 
 
-def tftop_k(_):
-  x = array_ops.placeholder(dtypes.int32, shape=[5], name='x')
-  output = nn_ops.top_k(x, 2, name='values')
-  array_ops.identity(output[1], name='indices')
-
-
 def write_graph(build_graph, out_dir):
   """Build a graph using build_graph and write it out."""
   g = ops.Graph()
@@ -170,7 +163,6 @@ def main(_):
   write_graph(tfmatmul, FLAGS.out_dir)
   write_graph(tfmatmulandadd, FLAGS.out_dir)
   write_graph(tfsplits, FLAGS.out_dir)
-  write_graph(tftop_k, FLAGS.out_dir)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt b/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt
deleted file mode 100644
index 6b4ac2d7cb..0000000000
--- a/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt
+++ /dev/null
@@ -1,13 +0,0 @@
-# Text form of tensorflow.tf2xla.Config proto.
-feed {
-  id { node_name: "x" }
-  shape {
-    dim { size: 5 }
-  }
-}
-fetch {
-  id { node_name: "values" }
-}
-fetch {
-  id { node_name: "indices" }
-}
diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc
index f10852c785..7ac90fb8a9 100644
--- a/tensorflow/compiler/aot/tests/tfcompile_test.cc
+++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc
@@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/aot/tests/test_graph_tfmatmulandadd.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tfmatmulandadd_with_profiling.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tfsplits.h"
-#include "tensorflow/compiler/aot/tests/test_graph_tftop_k.h"
 #include "tensorflow/compiler/xla/service/hlo_profile_printer.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
@@ -449,30 +448,6 @@ TEST(TFCompileTest, Splits) {
   EXPECT_NEAR(expected[3], fn.result0(1, 1), 1e4);
 }
 
-TEST(TFCompileTest, TopK) {
-  Eigen::ThreadPool tp(1);
-  Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
-
-  TopKComp fn;
-
-  fn.set_thread_pool(&device);
-  // x = [4, 1, 4, 4, 3]
-  fn.arg0(0) = 4;
-  fn.arg0(1) = 1;
-  fn.arg0(2) = 4;
-  fn.arg0(3) = 4;
-  fn.arg0(4) = 3;
-
-  EXPECT_TRUE(fn.Run());
-  EXPECT_EQ(fn.error_msg(), "");
-  const int32 expected_values[] = {4, 4};
-  const int32 expected_indices[] = {0, 2};
-  EXPECT_EQ(expected_values[0], fn.result0(0));
-  EXPECT_EQ(expected_values[1], fn.result0(1));
-  EXPECT_EQ(expected_indices[0], fn.result1(0));
-  EXPECT_EQ(expected_indices[1], fn.result1(1));
-}
-
 TEST(TFCompileTest, AssertEqAndReturnDiff) {
   // Assert is converted into a no-op in XLA, so there is no failure even if the
   // two args are different.
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 859c84bb91..792b7fe14a 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -273,7 +273,6 @@ def tf_library(
             "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_1d",
             "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_2d",
             "//tensorflow/compiler/xla/service/cpu:runtime_conv2d",
-            "//tensorflow/compiler/xla/service/cpu:runtime_key_value_sort",
             "//tensorflow/compiler/xla/service/cpu:runtime_matmul",
             "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_conv2d",
             "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_matmul",
-- 
GitLab


From a54310b1faa39df94dcef9ad1b5aaa0acc691e35 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 01:43:05 -0700
Subject: [PATCH 0430/1357] Internal change.

PiperOrigin-RevId: 213770000
---
 tensorflow/compiler/jit/xla_device.cc         | 10 ++++
 tensorflow/compiler/jit/xla_device.h          | 12 ++++-
 .../compiler/xla/service/stream_pool.cc       | 10 +++-
 .../compiler/xla/service/stream_pool_test.cc  | 34 ++++++++++++
 tensorflow/core/common_runtime/device.h       |  4 ++
 tensorflow/core/common_runtime/executor.cc    |  6 ++-
 tensorflow/core/framework/cancellation.cc     | 10 ++++
 tensorflow/core/framework/cancellation.h      |  9 ++++
 .../core/framework/cancellation_test.cc       | 52 +++++++++++++++++++
 9 files changed, 142 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc
index 51797def04..32fce2bf94 100644
--- a/tensorflow/compiler/jit/xla_device.cc
+++ b/tensorflow/compiler/jit/xla_device.cc
@@ -434,6 +434,16 @@ Status XlaDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
   return status;
 }
 
+void XlaDevice::SetRequiresSyncOnCompletion(bool sync_on_completion) {
+  mutex_lock lock(mu_);
+  sync_on_completion_ = sync_on_completion;
+}
+
+bool XlaDevice::RequiresSyncOnCompletion() const {
+  mutex_lock lock(mu_);
+  return sync_on_completion_;
+}
+
 XlaDeviceOpRegistrations* RegisterXlaDeviceKernels(const char* device,
                                                    const char* jit_device) {
   // Any op assigned to the device that isn't rewritten by the graph rewriter
diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h
index 92891ffa8c..0f06b3fc80 100644
--- a/tensorflow/compiler/jit/xla_device.h
+++ b/tensorflow/compiler/jit/xla_device.h
@@ -151,6 +151,12 @@ class XlaDevice : public LocalDevice {
   // information for GPU and TPU devices.
   Status UseGpuDeviceInfo() LOCKS_EXCLUDED(mu_);
 
+  // Instructs this XlaDevice to return 'sync_on_completion' for
+  // RequiresSyncOnCompletion().
+  void SetRequiresSyncOnCompletion(bool sync_on_completion) LOCKS_EXCLUDED(mu_);
+
+  bool RequiresSyncOnCompletion() const override LOCKS_EXCLUDED(mu_);
+
  private:
   xla::LocalClient* client() const;
   Allocator* GetAllocatorLocked(AllocatorAttributes attr)
@@ -165,7 +171,7 @@ class XlaDevice : public LocalDevice {
   static Status GetMetadataFromDevice(DeviceBase* device,
                                       const XlaDevice::Metadata** metadata);
 
-  mutex mu_;
+  mutable mutex mu_;
   // The metadata of this XlaDevice.
   const Metadata xla_metadata_;
   // Which hardware device in the client's platform this XlaDevice controls.
@@ -207,6 +213,10 @@ class XlaDevice : public LocalDevice {
 
   // Thread pool used for running closures
   std::unique_ptr<thread::ThreadPool> thread_pool_;
+
+  // True if the device requires XlaDevice::Sync to be called on completion
+  // regardless of status.
+  bool sync_on_completion_ GUARDED_BY(mu_) = false;
 };
 
 // Builds OpKernel registrations on 'device' for the JIT operators
diff --git a/tensorflow/compiler/xla/service/stream_pool.cc b/tensorflow/compiler/xla/service/stream_pool.cc
index 5d1cd1c442..ec09dff924 100644
--- a/tensorflow/compiler/xla/service/stream_pool.cc
+++ b/tensorflow/compiler/xla/service/stream_pool.cc
@@ -28,8 +28,14 @@ StreamPool::Ptr StreamPool::BorrowStream(se::StreamExecutor* executor) {
       // Re-use an existing stream from the pool.
       stream = std::move(streams_.back());
       streams_.pop_back();
-      VLOG(1) << stream->DebugStreamPointers()
-              << " StreamPool reusing existing stream";
+      if (stream->ok()) {
+        VLOG(1) << stream->DebugStreamPointers()
+                << " StreamPool reusing existing stream";
+      } else {
+        VLOG(1) << stream->DebugStreamPointers()
+                << " stream was not ok, StreamPool deleting";
+        stream = nullptr;
+      }
     }
   }
 
diff --git a/tensorflow/compiler/xla/service/stream_pool_test.cc b/tensorflow/compiler/xla/service/stream_pool_test.cc
index aaf5c37b0d..92f47579d3 100644
--- a/tensorflow/compiler/xla/service/stream_pool_test.cc
+++ b/tensorflow/compiler/xla/service/stream_pool_test.cc
@@ -132,5 +132,39 @@ TEST_F(StreamPoolTest, BadStreamDiscarded) {
   EXPECT_EQ(stream2_ptr, stream3_ptr);
 }
 
+TEST_F(StreamPoolTest, BadStreamAfterReturnDiscarded) {
+  std::unique_ptr<se::StreamExecutor> executor = NewStreamExecutor();
+  StreamPool pool;
+
+  // Borrow a stream.
+  StreamPool::Ptr stream1 = pool.BorrowStream(executor.get());
+  EXPECT_TRUE(stream1->ok());
+
+  // Return the stream, but hold a handle to it.
+  se::Stream* stream1_ptr = stream1.get();
+  stream1 = nullptr;
+
+  // Now stream1 is back in the pool, force an error on the stream. Here we call
+  // a method that requires DNN support, which we know the Host platform doesn't
+  // support.
+  stream1_ptr->ThenDepthConcatenate({}, {}, nullptr);
+  EXPECT_FALSE(stream1_ptr->ok());
+
+  // Borrow stream2.
+  StreamPool::Ptr stream2 = pool.BorrowStream(executor.get());
+  EXPECT_TRUE(stream2->ok());
+
+  // The underlying streams should be different. They would have been
+  // the same, but since we forced an error on stream1, it cannot be
+  // put back into the pool. Sadly we can't just check:
+  //    EXPECT_NE(stream1_ptr, stream2_ptr);
+  //
+  // The above should hold logically, but it may fail if the new
+  // stream instance allocated for stream2 happens to reside in the
+  // same memory address as stream1, which has been deleted.
+  //
+  // The check that stream2->ok() serves as a good-enough check.
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h
index 81d68e3be4..fb76d6ac29 100644
--- a/tensorflow/core/common_runtime/device.h
+++ b/tensorflow/core/common_runtime/device.h
@@ -106,6 +106,10 @@ class Device : public DeviceBase {
   // at completion.
   virtual Status Sync() = 0;
 
+  // Override this to return true for devices that require a Sync() call before
+  // session completion.
+  virtual bool RequiresSyncOnCompletion() const { return false; }
+
   // Optionally modify the device's GraphDef before execution.
   //
   // This method should be considered experimental and is supplied to enable
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index d0a0767d6b..98719542c0 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -2301,13 +2301,15 @@ void ExecutorState::Finish() {
   auto done_cb = std::move(done_cb_);
   auto runner = std::move(runner_);
   mu_.unlock();
-  if (sync_on_finish_ && status.ok()) {
+  Device* device = impl_->params_.device;
+  if ((sync_on_finish_ && status.ok()) || device->RequiresSyncOnCompletion()) {
     // Block until the device has finished all queued operations. For
     // devices like GPUs that continue to execute Ops after their Compute
     // methods have completed, this ensures that control is not returned to
     // the user until the step (and its side-effects) has actually completed.
-    status = impl_->params_.device->Sync();
+    status.Update(device->Sync());
   }
+
   delete this;
   CHECK(done_cb != nullptr);
   runner([=]() { done_cb(status); });
diff --git a/tensorflow/core/framework/cancellation.cc b/tensorflow/core/framework/cancellation.cc
index 1258e40c93..af59500aee 100644
--- a/tensorflow/core/framework/cancellation.cc
+++ b/tensorflow/core/framework/cancellation.cc
@@ -89,6 +89,16 @@ bool CancellationManager::DeregisterCallback(CancellationToken token) {
   }
 }
 
+bool CancellationManager::TryDeregisterCallback(CancellationToken token) {
+  mutex_lock lock(mu_);
+  if (is_cancelled_ || is_cancelling_) {
+    return false;
+  } else {
+    callbacks_.erase(token);
+    return true;
+  }
+}
+
 CancellationManager::~CancellationManager() {
   if (!callbacks_.empty()) {
     StartCancel();
diff --git a/tensorflow/core/framework/cancellation.h b/tensorflow/core/framework/cancellation.h
index acdaaf6a90..7a5d942486 100644
--- a/tensorflow/core/framework/cancellation.h
+++ b/tensorflow/core/framework/cancellation.h
@@ -122,6 +122,15 @@ class CancellationManager {
   // cancellation manager.
   bool DeregisterCallback(CancellationToken token);
 
+  // Deregister the callback that, when registered, was associated
+  // with the given cancellation token. Returns true iff the callback
+  // was deregistered and will not be invoked; otherwise returns false
+  // immediately, with no guarantee that the callback has completed.
+  //
+  // This method is guaranteed to return true if StartCancel has not been
+  // called.
+  bool TryDeregisterCallback(CancellationToken token);
+
  private:
   bool is_cancelling_;
   std::atomic_bool is_cancelled_;
diff --git a/tensorflow/core/framework/cancellation_test.cc b/tensorflow/core/framework/cancellation_test.cc
index e3f18240b5..bf7593bc5f 100644
--- a/tensorflow/core/framework/cancellation_test.cc
+++ b/tensorflow/core/framework/cancellation_test.cc
@@ -115,4 +115,56 @@ TEST(Cancellation, IsCancelled) {
   delete cm;
 }
 
+TEST(Cancellation, TryDeregisterWithoutCancel) {
+  bool is_cancelled = false;
+  CancellationManager* manager = new CancellationManager();
+  auto token = manager->get_cancellation_token();
+  bool registered = manager->RegisterCallback(
+      token, [&is_cancelled]() { is_cancelled = true; });
+  EXPECT_TRUE(registered);
+  bool deregistered = manager->TryDeregisterCallback(token);
+  EXPECT_TRUE(deregistered);
+  delete manager;
+  EXPECT_FALSE(is_cancelled);
+}
+
+TEST(Cancellation, TryDeregisterAfterCancel) {
+  bool is_cancelled = false;
+  CancellationManager* manager = new CancellationManager();
+  auto token = manager->get_cancellation_token();
+  bool registered = manager->RegisterCallback(
+      token, [&is_cancelled]() { is_cancelled = true; });
+  EXPECT_TRUE(registered);
+  manager->StartCancel();
+  EXPECT_TRUE(is_cancelled);
+  bool deregistered = manager->TryDeregisterCallback(token);
+  EXPECT_FALSE(deregistered);
+  delete manager;
+}
+
+TEST(Cancellation, TryDeregisterDuringCancel) {
+  Notification cancel_started, finish_callback, cancel_complete;
+  CancellationManager* manager = new CancellationManager();
+  auto token = manager->get_cancellation_token();
+  bool registered = manager->RegisterCallback(token, [&]() {
+    cancel_started.Notify();
+    finish_callback.WaitForNotification();
+  });
+  EXPECT_TRUE(registered);
+
+  thread::ThreadPool w(Env::Default(), "test", 1);
+  w.Schedule([&]() {
+    manager->StartCancel();
+    cancel_complete.Notify();
+  });
+  cancel_started.WaitForNotification();
+
+  bool deregistered = manager->TryDeregisterCallback(token);
+  EXPECT_FALSE(deregistered);
+
+  finish_callback.Notify();
+  cancel_complete.WaitForNotification();
+  delete manager;
+}
+
 }  // namespace tensorflow
-- 
GitLab


From e514555a9572e00243083a8ec6e58c8deed5a501 Mon Sep 17 00:00:00 2001
From: manipopopo <pwmutantbread@gmail.com>
Date: Sun, 10 Jun 2018 16:30:40 +0000
Subject: [PATCH 0431/1357] Fix routing of quantized tensors

The original tensor was not replaced with the quantized one when it had
already been quantized.
---
 .../contrib/quantize/python/quantize.py       | 80 ++++++++++---------
 .../quantize/python/quantize_graph_test.py    | 22 +++++
 2 files changed, 64 insertions(+), 38 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index e88db0acd5..6f34308fdb 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -454,8 +454,8 @@ class _LayerMatch(object):
     return self._bias_add_op
 
 
-def _FollowedByFakeQuant(tensor):
-  """Returns True if the tensor is followed by a FakeQuant."""
+def _GetFollowingFakeQuantOp(tensor):
+  """Returns the following FakeQuant op if it exists else None."""
   fake_quant_ops = set([
       'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxArgs',
       'FakeQuantWithMinMaxVarsPerChannel'
@@ -465,11 +465,11 @@ def _FollowedByFakeQuant(tensor):
   while consumers:
     c = consumers.pop()
     if c.type in fake_quant_ops:
-      return True
+      return c
     elif c.type in pass_through_ops:
       for output in c.outputs:
         consumers.extend(output.consumers())
-  return False
+  return None
 
 
 def _InsertQuantOp(context,
@@ -552,43 +552,47 @@ def _InsertQuantOp(context,
   # Prevent ops from being quantized multiple times. Bypass ops can sometimes
   # overlap between multiple matches, so we need to ensure that we don't
   # add duplicate FakeQuant operations.
-  if _FollowedByFakeQuant(inputs):
+  fake_quant_op = _GetFollowingFakeQuantOp(inputs)
+  if fake_quant_op is not None and name == 'act_quant':
     return
 
-  if moving_avg:
-    quant = (
-        quant_ops.MovingAvgQuantize(
-            inputs,
-            init_min=init_min,
-            init_max=init_max,
-            ema_decay=ema_decay,
-            is_training=is_training,
-            num_bits=bits,
-            narrow_range=narrow_range,
-            vars_collection=vars_collection,
-            name_prefix=name_prefix))
+  if fake_quant_op is None:
+    if moving_avg:
+      quant = (
+          quant_ops.MovingAvgQuantize(
+              inputs,
+              init_min=init_min,
+              init_max=init_max,
+              ema_decay=ema_decay,
+              is_training=is_training,
+              num_bits=bits,
+              narrow_range=narrow_range,
+              vars_collection=vars_collection,
+              name_prefix=name_prefix))
+    else:
+      quant = (
+          quant_ops.LastValueQuantize(
+              inputs,
+              init_min=init_min,
+              init_max=init_max,
+              is_training=is_training,
+              num_bits=bits,
+              narrow_range=narrow_range,
+              vars_collection=vars_collection,
+              name_prefix=name_prefix))
+
+    if quant_delay and quant_delay > 0:
+      activate_quant = math_ops.greater_equal(
+          common.CreateOrGetQuantizationStep(),
+          quant_delay,
+          name=name_prefix + '/activate_quant')
+      quant = control_flow_ops.cond(
+          activate_quant,
+          lambda: quant,
+          lambda: inputs,
+          name=name_prefix + '/delayed_quant')
   else:
-    quant = (
-        quant_ops.LastValueQuantize(
-            inputs,
-            init_min=init_min,
-            init_max=init_max,
-            is_training=is_training,
-            num_bits=bits,
-            narrow_range=narrow_range,
-            vars_collection=vars_collection,
-            name_prefix=name_prefix))
-
-  if quant_delay and quant_delay > 0:
-    activate_quant = math_ops.greater_equal(
-        common.CreateOrGetQuantizationStep(),
-        quant_delay,
-        name=name_prefix + '/activate_quant')
-    quant = control_flow_ops.cond(
-        activate_quant,
-        lambda: quant,
-        lambda: inputs,
-        name=name_prefix + '/delayed_quant')
+    quant = fake_quant_op.outputs[0]
 
   if consumers:
     tensors_modified_count = common.RerouteTensor(
diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py
index e80d2183a6..d3e7264ba4 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import template
 from tensorflow.python.platform import googletest
 
 
@@ -306,6 +307,27 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     # No ops should be inserted or removed.
     self.assertEqual(op_names_before_rewrite, op_names_after_rewrite)
 
+  def testWithSharedWeights(self):
+    self._RunTestOverAllRewrites(self._TestWithSharedWeights)
+
+  def _TestWithSharedWeights(self, rewrite_fn):
+    with ops.Graph().as_default() as g:
+      conv = template.make_template('shared_weights_conv', self._ConvLayer)
+      conv()
+      conv()
+      rewrite_fn()
+
+    conv_ops = [op for op in g.get_operations() if op.type == 'Conv2D']
+    weights_quants = [
+        op for op in g.get_operations()
+        if 'weights_quant' in op.name and op.type == 'FakeQuantWithMinMaxVars'
+    ]
+    # Check that the shared weights variable is not quantized multiple times
+    self.assertTrue(len(weights_quants) == 1)
+    # Check that the Conv2D operations get the quantized weights
+    weights_quant_tensor = weights_quants[0].outputs[0]
+    self.assertTrue(all(weights_quant_tensor in op.inputs for op in conv_ops))
+
   def _ConvLayer(
       self, input_tensor=None, scope='test', pre_activation_bypass=False,
       post_activation_bypass=False):
-- 
GitLab


From f44805f8333aaf76d392bb565fe2381be07ccf2a Mon Sep 17 00:00:00 2001
From: manipopopo <pwmutantbread@gmail.com>
Date: Mon, 11 Jun 2018 14:19:48 +0000
Subject: [PATCH 0432/1357] Fix routing of delayed quantized tensors

---
 .../contrib/quantize/python/quantize.py       |  4 ++++
 .../quantize/python/quantize_graph_test.py    | 19 ++++++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index 6f34308fdb..ccf58c7a8a 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -593,6 +593,10 @@ def _InsertQuantOp(context,
           name=name_prefix + '/delayed_quant')
   else:
     quant = fake_quant_op.outputs[0]
+    if quant_delay and quant_delay > 0:
+      name_prefix = '/'.join(quant.name.split('/')[:-1])
+      quant = quant.graph.get_tensor_by_name(
+          name_prefix + '/delayed_quant/Merge:0')
 
   if consumers:
     tensors_modified_count = common.RerouteTensor(
diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py
index d3e7264ba4..36d87039a5 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py
@@ -309,13 +309,19 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
 
   def testWithSharedWeights(self):
     self._RunTestOverAllRewrites(self._TestWithSharedWeights)
+    self._RunTestOverTrainingRewrites(
+        lambda rewrite_fn: self._TestWithSharedWeights(rewrite_fn,
+                                                       quant_delay=1))
 
-  def _TestWithSharedWeights(self, rewrite_fn):
+  def _TestWithSharedWeights(self, rewrite_fn, quant_delay=None):
     with ops.Graph().as_default() as g:
       conv = template.make_template('shared_weights_conv', self._ConvLayer)
       conv()
       conv()
-      rewrite_fn()
+      if quant_delay is None:
+        rewrite_fn()
+      else:
+        rewrite_fn(quant_delay=quant_delay)
 
     conv_ops = [op for op in g.get_operations() if op.type == 'Conv2D']
     weights_quants = [
@@ -324,8 +330,15 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     ]
     # Check that the shared weights variable is not quantized multiple times
     self.assertTrue(len(weights_quants) == 1)
-    # Check that the Conv2D operations get the quantized weights
     weights_quant_tensor = weights_quants[0].outputs[0]
+    if quant_delay:
+      delayed_weights_quants = [
+          op for op in g.get_operations()
+          if 'weights_quant' in op.name and op.type == 'Merge'
+      ]
+      self.assertTrue(len(delayed_weights_quants) == 1)
+      weights_quant_tensor = delayed_weights_quants[0].outputs[0]
+    # Check that the Conv2D operations get the quantized weights
     self.assertTrue(all(weights_quant_tensor in op.inputs for op in conv_ops))
 
   def _ConvLayer(
-- 
GitLab


From 562ae317d485842159a253ad974d13f7a5f94de2 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 20 Sep 2018 02:00:19 -0700
Subject: [PATCH 0433/1357] Automated rollback of commit
 da3357ecbdd6772413e8bbceeab8238971be11ce

PiperOrigin-RevId: 213771631
---
 tensorflow/compiler/aot/tests/BUILD           | 15 +++++++++++
 .../compiler/aot/tests/make_test_graphs.py    |  8 ++++++
 .../aot/tests/test_graph_tftop_k.config.pbtxt | 13 ++++++++++
 .../compiler/aot/tests/tfcompile_test.cc      | 25 +++++++++++++++++++
 tensorflow/compiler/aot/tfcompile.bzl         |  1 +
 tensorflow/compiler/xla/service/cpu/BUILD     |  2 +-
 .../xla/service/cpu/runtime_key_value_sort.cc |  1 -
 7 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt

diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD
index 7a0932d44d..10fa33ab5e 100644
--- a/tensorflow/compiler/aot/tests/BUILD
+++ b/tensorflow/compiler/aot/tests/BUILD
@@ -25,6 +25,7 @@ test_suite(
         ":test_graph_tfmatmul_test",
         ":test_graph_tfmatmulandadd_test",
         ":test_graph_tfsplits_test",
+        ":test_graph_tftop_k_test",
         ":tfcompile_test",
     ],
 )
@@ -42,6 +43,7 @@ py_binary(
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:session",
         "//tensorflow/python:training",
@@ -66,6 +68,7 @@ genrule(
         "test_graph_tfmatmul.pb",
         "test_graph_tfmatmulandadd.pb",
         "test_graph_tfsplits.pb",
+        "test_graph_tftop_k.pb",
     ],
     # Set CUDA_VISIBLE_DEVICES='' to prevent the code we launch from using any
     # GPUs which might be present.  This is important because builds may run
@@ -208,6 +211,17 @@ tf_library(
     ],
 )
 
+tf_library(
+    name = "test_graph_tftop_k",
+    testonly = 1,
+    config = "test_graph_tftop_k.config.pbtxt",
+    cpp_class = "TopKComp",
+    graph = "test_graph_tftop_k.pb",
+    tags = [
+        "manual",
+    ],
+)
+
 tf_cc_test(
     name = "tfcompile_test",
     srcs = ["tfcompile_test.cc"],
@@ -226,6 +240,7 @@ tf_cc_test(
         ":test_graph_tfmatmulandadd",
         ":test_graph_tfmatmulandadd_with_profiling",
         ":test_graph_tfsplits",
+        ":test_graph_tftop_k",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:xla_data_proto",
diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py
index 9ec7df163b..de135d7a23 100644
--- a/tensorflow/compiler/aot/tests/make_test_graphs.py
+++ b/tensorflow/compiler/aot/tests/make_test_graphs.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import app
 from tensorflow.python.training import saver as saver_lib
@@ -142,6 +143,12 @@ def tfsplits(_):
   array_ops.identity(y, name='result')
 
 
+def tftop_k(_):
+  x = array_ops.placeholder(dtypes.int32, shape=[5], name='x')
+  output = nn_ops.top_k(x, 2, name='values')
+  array_ops.identity(output[1], name='indices')
+
+
 def write_graph(build_graph, out_dir):
   """Build a graph using build_graph and write it out."""
   g = ops.Graph()
@@ -163,6 +170,7 @@ def main(_):
   write_graph(tfmatmul, FLAGS.out_dir)
   write_graph(tfmatmulandadd, FLAGS.out_dir)
   write_graph(tfsplits, FLAGS.out_dir)
+  write_graph(tftop_k, FLAGS.out_dir)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt b/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt
new file mode 100644
index 0000000000..6b4ac2d7cb
--- /dev/null
+++ b/tensorflow/compiler/aot/tests/test_graph_tftop_k.config.pbtxt
@@ -0,0 +1,13 @@
+# Text form of tensorflow.tf2xla.Config proto.
+feed {
+  id { node_name: "x" }
+  shape {
+    dim { size: 5 }
+  }
+}
+fetch {
+  id { node_name: "values" }
+}
+fetch {
+  id { node_name: "indices" }
+}
diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc
index 7ac90fb8a9..f10852c785 100644
--- a/tensorflow/compiler/aot/tests/tfcompile_test.cc
+++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/aot/tests/test_graph_tfmatmulandadd.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tfmatmulandadd_with_profiling.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tfsplits.h"
+#include "tensorflow/compiler/aot/tests/test_graph_tftop_k.h"
 #include "tensorflow/compiler/xla/service/hlo_profile_printer.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
@@ -448,6 +449,30 @@ TEST(TFCompileTest, Splits) {
   EXPECT_NEAR(expected[3], fn.result0(1, 1), 1e4);
 }
 
+TEST(TFCompileTest, TopK) {
+  Eigen::ThreadPool tp(1);
+  Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
+
+  TopKComp fn;
+
+  fn.set_thread_pool(&device);
+  // x = [4, 1, 4, 4, 3]
+  fn.arg0(0) = 4;
+  fn.arg0(1) = 1;
+  fn.arg0(2) = 4;
+  fn.arg0(3) = 4;
+  fn.arg0(4) = 3;
+
+  EXPECT_TRUE(fn.Run());
+  EXPECT_EQ(fn.error_msg(), "");
+  const int32 expected_values[] = {4, 4};
+  const int32 expected_indices[] = {0, 2};
+  EXPECT_EQ(expected_values[0], fn.result0(0));
+  EXPECT_EQ(expected_values[1], fn.result0(1));
+  EXPECT_EQ(expected_indices[0], fn.result1(0));
+  EXPECT_EQ(expected_indices[1], fn.result1(1));
+}
+
 TEST(TFCompileTest, AssertEqAndReturnDiff) {
   // Assert is converted into a no-op in XLA, so there is no failure even if the
   // two args are different.
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 792b7fe14a..859c84bb91 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -273,6 +273,7 @@ def tf_library(
             "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_1d",
             "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_2d",
             "//tensorflow/compiler/xla/service/cpu:runtime_conv2d",
+            "//tensorflow/compiler/xla/service/cpu:runtime_key_value_sort",
             "//tensorflow/compiler/xla/service/cpu:runtime_matmul",
             "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_conv2d",
             "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_matmul",
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index b3e4fab727..bf627986a5 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -631,7 +631,7 @@ cc_library(
     copts = runtime_copts(),
     visibility = ["//visibility:public"],
     deps = [
-        "//tensorflow/core:lib",
+        "//tensorflow/core:framework_lite",
         "//third_party/eigen3",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
index cef5420f00..e0e7deb98e 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
@@ -23,7 +23,6 @@ limitations under the License.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/platform/dynamic_annotations.h"
-#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
-- 
GitLab


From 9604413da7a27f5718bb88d407d13476dbef5b82 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 02:15:37 -0700
Subject: [PATCH 0434/1357] compat: Update forward compatibility horizon to
 2018-09-20

PiperOrigin-RevId: 213773990
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 8edd6419d3..419c376b45 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 19)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 20)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 90d084e0c42232043c186e66093b67800fb30fba Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Thu, 20 Sep 2018 03:14:04 -0700
Subject: [PATCH 0435/1357] [XLA:TF] Whitelist quantized types for CPU/GPU

These have the same behavior as unquantized types so we can just pass them
through to XLA (which converts them to unquantized types). They're supposed to
be used with special ops, none of which are currently implemented by XLA.
Casting (without quantization) and basic math works fine though.

These do not have a corresponding numpy type, so only tests using TF types will
see them.

PiperOrigin-RevId: 213781650
---
 tensorflow/compiler/jit/xla_cpu_device.cc     |  6 +--
 tensorflow/compiler/jit/xla_gpu_device.cc     |  6 +--
 tensorflow/compiler/tests/BUILD               | 15 +++++-
 tensorflow/compiler/tests/build_defs.bzl      |  4 +-
 tensorflow/compiler/tests/gather_test.py      | 14 ++++--
 .../compiler/tests/quantized_ops_test.py      | 48 +++++++++++++++++++
 tensorflow/compiler/tests/xla_test.py         | 13 +++--
 tensorflow/compiler/tf2xla/xla_op_registry.h  | 15 +++---
 8 files changed, 97 insertions(+), 24 deletions(-)
 create mode 100644 tensorflow/compiler/tests/quantized_ops_test.py

diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc
index 1afc305abe..e26fa27b31 100644
--- a/tensorflow/compiler/jit/xla_cpu_device.cc
+++ b/tensorflow/compiler/jit/xla_cpu_device.cc
@@ -65,9 +65,9 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_CPU, XlaCpuDeviceFactory);
 
 // Kernel registrations
 
-constexpr std::array<DataType, 9> kAllXlaCpuTypes = {
-    {DT_UINT8, DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
-     DT_COMPLEX64, DT_BOOL}};
+constexpr std::array<DataType, 12> kAllXlaCpuTypes = {
+    {DT_UINT8, DT_QUINT8, DT_INT8, DT_QINT8, DT_INT32, DT_QINT32, DT_INT64,
+     DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}};
 
 REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_CPU, XlaLocalLaunchOp, kAllXlaCpuTypes);
 REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_CPU, kAllXlaCpuTypes);
diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index 4cf556524d..c386984930 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -74,9 +74,9 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_GPU, XlaGpuDeviceFactory);
 
 // Kernel registrations
 
-constexpr std::array<DataType, 10> kAllXlaGpuTypes = {
-    {DT_UINT8, DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
-     DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}};
+constexpr std::array<DataType, 13> kAllXlaGpuTypes = {
+    {DT_UINT8, DT_QUINT8, DT_INT8, DT_QINT8, DT_INT32, DT_QINT32, DT_INT64,
+     DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}};
 
 REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_GPU, XlaLocalLaunchOp, kAllXlaGpuTypes);
 REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_GPU, kAllXlaGpuTypes);
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 97ed554171..3cf74fa788 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -978,7 +978,7 @@ tf_xla_py_test(
     name = "gather_test",
     size = "medium",
     srcs = ["gather_test.py"],
-    tags = ["noasan"],  # times out, http://b/78599043
+    tags = ["optonly"],
     deps = [
         ":xla_test",
         "//tensorflow/python:array_ops",
@@ -1197,6 +1197,19 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "quantized_ops_test",
+    size = "small",
+    srcs = ["quantized_ops_test.py"],
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_xla_py_test(
     name = "xla_ops_test",
     size = "medium",
diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl
index b8583c9bdb..1d3979b21b 100644
--- a/tensorflow/compiler/tests/build_defs.bzl
+++ b/tensorflow/compiler/tests/build_defs.bzl
@@ -62,12 +62,12 @@ def tf_xla_py_test(
         if backend == "cpu":
             backend_args += [
                 "--test_device=XLA_CPU",
-                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_INT8,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_QUINT8,DT_INT8,DT_QINT8,DT_INT32,DT_QINT32,DT_INT64,DT_BOOL,DT_COMPLEX64",
             ]
         elif backend == "gpu":
             backend_args += [
                 "--test_device=XLA_GPU",
-                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_INT8,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_QUINT8,DT_INT8,DT_QINT8,DT_INT32,DT_QINT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16",
             ]
             backend_tags += tf_cuda_tests_tags()
         elif backend in plugins:
diff --git a/tensorflow/compiler/tests/gather_test.py b/tensorflow/compiler/tests/gather_test.py
index 089d95daab..a38e1edafe 100644
--- a/tensorflow/compiler/tests/gather_test.py
+++ b/tensorflow/compiler/tests/gather_test.py
@@ -51,7 +51,7 @@ class GatherTest(xla_test.XLATestCase):
           indices_tf = constant_op.constant(indices)
           gather_t = array_ops.gather(params, indices_tf)
           gather_val = session.run(gather_t, feed_dict={params: params_np})
-          np_val = params_np[indices]
+          np_val = constant_op.constant(params_np[indices])
           self.assertAllEqual(np_val, gather_val)
 
   def testScalar2D(self):
@@ -65,7 +65,8 @@ class GatherTest(xla_test.XLATestCase):
           indices = constant_op.constant(2)
           gather_t = array_ops.gather(params, indices, axis=axis)
           gather_val = session.run(gather_t, feed_dict={params: params_np})
-          expected = np.take(params_np, 2, axis=axis)
+          expected = constant_op.constant(
+              np.take(params_np, 2, axis=axis), dtype)
           self.assertAllEqual(expected, gather_val)
 
   def testSimpleTwoD32(self):
@@ -80,7 +81,8 @@ class GatherTest(xla_test.XLATestCase):
           indices = constant_op.constant([0, 1, 0, 2])
           gather_t = array_ops.gather(params, indices, axis=axis)
           gather_val = session.run(gather_t, feed_dict={params: params_np})
-          expected = np.take(params_np, [0, 1, 0, 2], axis=axis)
+          expected = constant_op.constant(
+              np.take(params_np, [0, 1, 0, 2], axis=axis), dtype)
           self.assertAllEqual(expected, gather_val)
 
   def testSimpleTwoD32_Int64Indices(self):
@@ -103,7 +105,8 @@ class GatherTest(xla_test.XLATestCase):
                   params: params_np,
                   indices: indices_np
               })
-          expected = np.take(params_np, [0, 1, 0, 2], axis=axis)
+          expected = constant_op.constant(
+              np.take(params_np, [0, 1, 0, 2], axis=axis), dtype)
           self.assertAllEqual(expected, gather_val)
 
   def testHigherRank(self):
@@ -119,7 +122,8 @@ class GatherTest(xla_test.XLATestCase):
             tf_indices = constant_op.constant(indices, dtype=dtypes.int32)
             gather = array_ops.gather(tf_params, tf_indices, axis=axis)
             gather_value = sess.run(gather, feed_dict={tf_params: params})
-            gather_np = np.take(params, indices, axis=axis)
+            gather_np = constant_op.constant(
+                np.take(params, indices, axis=axis), dtype)
             self.assertAllEqual(gather_np, gather_value)
 
   def testIndicesWithDifferentDimensions(self):
diff --git a/tensorflow/compiler/tests/quantized_ops_test.py b/tensorflow/compiler/tests/quantized_ops_test.py
new file mode 100644
index 0000000000..80c338513b
--- /dev/null
+++ b/tensorflow/compiler/tests/quantized_ops_test.py
@@ -0,0 +1,48 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for quantized operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.compiler.tests import xla_test
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import googletest
+
+
+class QuantizedOpsTest(xla_test.XLATestCase):
+
+  # Verify that quantized types can be clustered by XLA.
+  def testQuantizedTypeRoundtrip(self):
+    with self.cached_session() as session:
+      for dtype in self.quantized_tf_types:
+        in_values = np.array([1, 2, 3, 4, 5, 6])
+        expected = [[1, 2], [3, 4], [5, 6]]
+        with self.test_scope():
+          p = array_ops.placeholder(dtype=dtypes.int32)
+          x = math_ops.cast(p, dtype)
+          x = array_ops.reshape(x, [3, 2])
+
+        value = session.run(x, {p: in_values})
+        self.assertAllEqual(value, expected)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py
index df5c81243a..98a41981cf 100644
--- a/tensorflow/compiler/tests/xla_test.py
+++ b/tensorflow/compiler/tests/xla_test.py
@@ -97,9 +97,16 @@ class XLATestCase(test.TestCase):
     ])
     self._numeric_tf_types = set(
         self.int_tf_types | self._float_tf_types | self.complex_tf_types)
-
-    self._all_types = set(
-        [dtype.as_numpy_dtype for dtype in self._all_tf_types])
+    self.quantized_tf_types = set(
+        dtype for dtype in self._all_tf_types if dtype.is_quantized)
+
+    # Quantized types don't have a numpy equivalent, include them in
+    # all_tf_types but not in all_types.
+    # TODO(b/115960798): Parametrize tests on TF types instead of numpy types
+    # and remove all_types.
+    self._all_types = set(dtype.as_numpy_dtype
+                          for dtype in self._all_tf_types
+                          if not dtype.is_quantized)
     self._int_types = set([dtype.as_numpy_dtype for dtype in self.int_tf_types])
     self.signed_int_types = set(dtype.as_numpy_dtype
                                 for dtype in self.int_tf_types
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h
index a4b624820a..4b2c2bacd6 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.h
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.h
@@ -51,13 +51,14 @@ constexpr std::array<DataType, 11> kNumericTypes = {
     {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF,
      DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BFLOAT16}};
 
-constexpr std::array<DataType, 11> kCpuAllTypes = {
-    {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF,
-     DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}};
-
-constexpr std::array<DataType, 12> kGpuAllTypes = {
-    {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF,
-     DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}};
+constexpr std::array<DataType, 14> kCpuAllTypes = {
+    {DT_UINT8, DT_QUINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_QINT8, DT_INT32,
+     DT_QINT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}};
+
+constexpr std::array<DataType, 15> kGpuAllTypes = {
+    {DT_UINT8, DT_QUINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_QINT8, DT_INT32,
+     DT_QINT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL,
+     DT_BFLOAT16}};
 
 // Class that manages registrations of operators and devices for the XLA JIT.
 // Not thread-safe.
-- 
GitLab


From 32047f490d0892056ae4e0214d2f049887fdcf35 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 06:34:45 -0700
Subject: [PATCH 0436/1357] Fix typo in _EnforceShapeInvariant.

PiperOrigin-RevId: 213801006
---
 tensorflow/python/ops/control_flow_ops.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 0e20fadb2b..87f8bd85a5 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -610,9 +610,10 @@ def _EnforceShapeInvariant(merge_var, next_var):
           "less-specific shape." %
           (input_t.name, input_t.shape, n_shape))
   else:
-    if not isinstance(var, (ops.IndexedSlices, sparse_tensor.SparseTensor)):
-      raise TypeError("Type %s not supported" % type(var))
-    if isinstance(var, ops.IndexedSlices):
+    if not isinstance(merge_var,
+                      (ops.IndexedSlices, sparse_tensor.SparseTensor)):
+      raise TypeError("Type %s not supported" % type(merge_var))
+    if isinstance(merge_var, ops.IndexedSlices):
       m_values_shape = merge_var.values.get_shape()
       m_indices_shape = merge_var.indices.get_shape()
       m_shape_shape = tensor_shape.TensorShape(None)
-- 
GitLab


From 78e205d35b31aa49e8dac357d827900a165f0a21 Mon Sep 17 00:00:00 2001
From: Erik Smistad <ersmistad@gmail.com>
Date: Thu, 20 Sep 2018 15:56:34 +0200
Subject: [PATCH 0437/1357] Added warning message if cmake version is below 3.8
 or host toolset is not set to x64 on windows

---
 tensorflow/contrib/cmake/CMakeLists.txt | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 225c5e6227..a7a66472df 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -1,8 +1,14 @@
 # Minimum CMake required
+cmake_minimum_required(VERSION 3.5)
+
 if(WIN32)
-  cmake_minimum_required(VERSION 3.8)
-else()
-  cmake_minimum_required(VERSION 3.5)
+	if(${CMAKE_VERSION} VERSION_LESS "3.8")
+		message(WARNING "Your current cmake version is ${CMAKE_VERSION} which does not support setting the toolset architecture to x64. This may cause \"compiler out of heap space\" errors when building. Consider upgrading your cmake to > 3.8 and using the flag -Thost=x64 when running cmake.")
+	else()
+		if(NOT CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE OR NOT "${CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE}" STREQUAL "x64")
+			message(WARNING "Your current cmake generator is set to use 32 bit toolset architecture. This may cause \"compiler out of heap space\" errors when building. Consider using the flag -Thost=x64 when running cmake.")
+		endif()
+	endif()
 endif()
 
 # Project
-- 
GitLab


From ec6407526c28353c9a0aca5c471681e7d8d4f981 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 20 Sep 2018 14:14:02 +0000
Subject: [PATCH 0438/1357] Fix build failure in verbs

While looking into 22372, the verbs build fails with the following error:
```
tensorflow/contrib/verbs/verbs_server_lib.cc:80:6: error: 'once_call' in namespace 'std' does not name a type
 std::once_call reg_mem_visitors_call;
      ^
tensorflow/contrib/verbs/verbs_server_lib.cc: In member function 'tensorflow::Status tensorflow::VerbsServer::Init(tensorflow::ServiceInitFunction, tensorflow::RendezvousMgrCreationFunction)':
tensorflow/contrib/verbs/verbs_server_lib.cc:85:18: error: 'reg_mem_visitors_call' was not declared in this scope
   std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); });
```

This fix fixes the build failures with `once_call` -> `once_flag`.

This fix fixes 22372.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/verbs/verbs_server_lib.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 61469686e4..d6425bf409 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #ifdef TENSORFLOW_USE_VERBS
 
+#include <mutex>
+
 #include "tensorflow/contrib/verbs/verbs_server_lib.h"
 
 #include "grpc/support/alloc.h"
@@ -77,7 +79,7 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def,
 }
 
 namespace {
-std::once_call reg_mem_visitors_call;
+std::once_flag reg_mem_visitors_call;
 }  // namespace
 
 Status VerbsServer::Init(ServiceInitFunction service_func,
-- 
GitLab


From e5b72e3107188e4e8286459704c076a8a84b2a96 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 20 Sep 2018 14:16:17 +0000
Subject: [PATCH 0439/1357] Remove unneeded header include

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/verbs/verbs_server_lib.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index d6425bf409..5b72b1604a 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -15,8 +15,6 @@ limitations under the License.
 
 #ifdef TENSORFLOW_USE_VERBS
 
-#include <mutex>
-
 #include "tensorflow/contrib/verbs/verbs_server_lib.h"
 
 #include "grpc/support/alloc.h"
-- 
GitLab


From 06ad4ad47bef99d4a8f6856bbb121387e8edcfa5 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Thu, 20 Sep 2018 09:39:49 -0700
Subject: [PATCH 0440/1357] Callbacks should count the steps correctly in the
 multi step case

PiperOrigin-RevId: 213829360
---
 tensorflow/python/keras/callbacks.py             | 16 +++++++++++-----
 .../python/keras/engine/training_distributed.py  | 10 +---------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index befe82f4ec..6dfbbf3694 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -360,7 +360,10 @@ class BaseLogger(Callback):
   def on_batch_end(self, batch, logs=None):
     logs = logs or {}
     batch_size = logs.get('size', 0)
-    self.seen += batch_size
+    # In case of distribution strategy we can potentially run multiple steps
+    # at the same time, we should account for that in the `seen` calculation.
+    num_steps = logs.get('num_steps', 1)
+    self.seen += batch_size * num_steps
 
     for k, v in logs.items():
       if k in self.stateful_metrics:
@@ -448,10 +451,13 @@ class ProgbarLogger(Callback):
   def on_batch_end(self, batch, logs=None):
     logs = logs or {}
     batch_size = logs.get('size', 0)
+    # In case of distribution strategy we can potentially run multiple steps
+    # at the same time, we should account for that in the `seen` calculation.
+    num_steps = logs.get('num_steps', 1)
     if self.use_steps:
-      self.seen += 1
+      self.seen += num_steps
     else:
-      self.seen += batch_size
+      self.seen += batch_size * num_steps
 
     for k in self.params['metrics']:
       if k in logs:
@@ -1068,7 +1074,7 @@ class TensorBoard(Callback):
     logs = logs or {}
     batch_logs = {('batch_' + k): v
                   for k, v in logs.items()
-                  if k not in ['batch', 'size']}
+                  if k not in ['batch', 'size', 'num_steps']}
     self._write_custom_summaries(self._total_batches_seen, batch_logs)
     self._total_batches_seen += 1
 
@@ -1092,7 +1098,7 @@ class TensorBoard(Callback):
     # batch number as Tensorboard summaries
     logs = {('epoch_' + k): v
             for k, v in logs.items()
-            if k not in ['batch', 'size']}
+            if k not in ['batch', 'size', 'num_steps']}
     self._write_custom_summaries(epoch, logs)
 
     # pop the histogram summary op after each epoch
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 05b40c66e3..26c5ec4efc 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -314,12 +314,6 @@ def _experimental_fit_loop(
     distributed_model = current_strategy.unwrap(model._grouped_model)[0]
     distributed_training_utils.set_weights(
         current_strategy, distributed_model, orig_model_weights)
-
-  # TODO(sourabhbajaj): Convert this into a proper validation function
-  if callbacks:
-    raise NotImplementedError(
-        'Callbacks are not supported with TPUStrategy right now.')
-
   callbacks = cbks.configure_callbacks(
       callbacks,
       model,
@@ -345,9 +339,7 @@ def _experimental_fit_loop(
     step_index = 0
     prev_step_count = None
     for step_count in steps_to_run:
-      # TODO(sourabhbajaj): Replace size with a combination of steps_per_run
-      # and batch_size
-      batch_logs = {'batch': step_index, 'size': 1}
+      batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count}
       callbacks.on_batch_begin(step_index, batch_logs)
       if prev_step_count is None or step_count != prev_step_count:
         steps_per_run_var.load(step_count, K.get_session())
-- 
GitLab


From 13fac9da3820d0dda504eac43a0bd59876742262 Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Thu, 20 Sep 2018 09:56:07 -0700
Subject: [PATCH 0441/1357] Set back the ITensor name, but conditionally.

---
 .../contrib/tensorrt/convert/convert_nodes.cc    | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 6283bd2300..0ce891782e 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -693,10 +693,16 @@ class Converter {
       // TODO(jie): tf protobuf seems to be omitting the :0 suffix
       string output_name = node_def.name();
       if (i != 0) output_name = StrCat(output_name, ":", i);
-      // We should not call output.tensor()->setName(), since the name may have
-      // already been set before (e.g. for Identity op where the output is the
-      // input, if its input is one of the engine input, setting the name here
-      // will overwrite engine input bindings which will cause runtime error).
+      // We need to check the name before setting it. For Identity op where the
+      // output is the input, if its input is one of the engine input, setting
+      // the name here will overwrite engine input bindings which will cause
+      // runtime error.
+      if (output.is_tensor()) {
+        const char* tensor_name = output.tensor()->getName();
+        if (tensor_name == nullptr || std::strlen(tensor_name) == 0) {
+          output.tensor()->setName(output_name.c_str());
+        }
+      }
       VLOG(2) << "Adding out tensor " << output_name << ": "
               << output.DebugString();
       if (!trt_tensors_.insert({output_name, output}).second) {
@@ -1301,6 +1307,7 @@ tensorflow::Status ConvertConv2DHelper(
 
   layer->setStride(stride);
   layer->setPadding({padding[0].first, padding[1].first});
+  layer->setName(node_def.name().c_str());
   layer->setNbGroups(num_groups);
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
   VLOG(2) << "TENSOR out: " << DebugString(output_tensor->getDimensions());
@@ -1546,6 +1553,7 @@ tensorflow::Status ConvertPool(Converter& ctx,
 
   layer->setStride(stride);
   layer->setPadding({padding[0].first, padding[1].first});
+  layer->setName(node_def.name().c_str());
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
 
   if (data_format == "NHWC") {
-- 
GitLab


From 72e085ca1701e275acec381885b519fa6b06522c Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Thu, 20 Sep 2018 10:01:59 -0700
Subject: [PATCH 0442/1357] Add comments about why we use uint64_t type for
 alignment.

---
 tensorflow/contrib/tensorrt/resources/trt_allocator.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index f6b4b4dcab..a9425864dd 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -27,6 +27,10 @@ namespace tensorflow {
 namespace tensorrt {
 
 // std::align is not supported, so this method mimic its behavior.
+//
+// NOTE(aaroey): according to the TensorRT API,
+// nvinfer1::IGpuAllocator::allocate() uses uint64_t type for size and alignment
+// parameters, so here we use the same type to make it compatible.
 void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space) {
   QCHECK_GT(alignment, 0ul) << "alignment must be greater than 0.";
   QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
-- 
GitLab


From 07c9ba4bba274d424404eedab14b3fab3f072350 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 20 Sep 2018 10:41:29 -0700
Subject: [PATCH 0443/1357] [tf.data] Use
 vectorization_utils::VectorizeMapDefun in MapVectorization optimization

PiperOrigin-RevId: 213840320
---
 .../optimization/map_vectorization_test.py    | 40 ++++++++++---------
 .../core/grappler/optimizers/data/BUILD       |  1 +
 .../optimizers/data/map_vectorization.cc      | 31 ++++++++++++--
 .../optimizers/data/vectorization_utils.cc    |  5 +++
 4 files changed, 56 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
index e2c9bc82df..5b493f44c9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
@@ -173,16 +173,6 @@ class MapVectorizationBenchmark(test.Benchmark):
     self.report_benchmark(iters=num_iters, wall_time=median_time, name=name)
     return median_time
 
-  def benchmark_CheapFns(self):
-
-    input_sizes = [(10, 10, 3), (10, 100, 300)]
-    batch_size = 1000
-    for input_size in input_sizes:
-      input_dataset = dataset_ops.Dataset.from_tensor_slices(
-          (np.random.rand(*input_size), np.random.rand(*input_size))).repeat()
-      for map_fn, str_id in self._get_known_cheap_fns():
-        self._compare(input_dataset, map_fn, batch_size, input_size, str_id)
-
   def _compare(self, input_dataset, map_fn, batch_size, input_size, str_id):
     num_elems = np.prod(input_size)
     name_template = "{}__batch_size_{}_input_size_{}_{}"
@@ -205,14 +195,28 @@ class MapVectorizationBenchmark(test.Benchmark):
           "Speedup: {}\n".format(batch_size, input_size, str_id,
                                  (unoptimized_time / optimized_time)))
 
-  def _get_known_cheap_fns(self):
-    return [
-        (lambda *args: [array_ops.identity(x) for x in args], "identity"),
-        (lambda *args: [x + 1 for x in args], "add_const"),
-        (lambda *args: args[0], "select"),
-        (lambda *args: [math_ops.cast(x, dtypes.float64) for x in args],
-         "cast"),
-    ]
+  # Known cheap functions
+  def benchmarkIdentity(self):
+    self._benchmark_helper(lambda *args: [array_ops.identity(x) for x in args],
+                           "identity")
+
+  def benchmarkAddConst(self):
+    self._benchmark_helper(lambda *args: [x + 1 for x in args], "add_const")
+
+  def benchmarkSelect(self):
+    self._benchmark_helper(lambda *args: args[0], "select")
+
+  def benchmarkCast(self):
+    self._benchmark_helper(
+        lambda *args: [math_ops.cast(x, dtypes.float64) for x in args], "cast")
+
+  def _benchmark_helper(self, map_fn, str_id):
+    input_sizes = [(10, 10, 3), (10, 100, 300)]
+    batch_size = 1000
+    for input_size in input_sizes:
+      input_dataset = dataset_ops.Dataset.from_tensor_slices(
+          (np.random.rand(*input_size), np.random.rand(*input_size))).repeat()
+      self._compare(input_dataset, map_fn, batch_size, input_size, str_id)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 7128a50be0..79d5fe87b6 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -175,6 +175,7 @@ cc_library(
     deps = [
         ":function_utils",
         ":graph_utils",
+        ":vectorization_utils",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core/grappler:grappler_item",
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index 07766aa7b3..ad6722a3ae 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/map_vectorization.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
 
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -38,11 +39,11 @@ void CopyAttribute(const string& attr_name, const NodeDef& from, NodeDef* to) {
   (*to->mutable_attr())[attr_name] = from.attr().at(attr_name);
 }
 
-FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
+// Returns a FunctionDef containing a MapDefun op that wraps the original
+// function.
+FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
                                    const FunctionDef& orig_func,
                                    FunctionDefLibrary* library) {
-  // If we decide to use a different method of vectorization, we can just
-  // swap out this part.
   FunctionDef* vectorized_func = library->add_function();
   // Function inputs and outputs are the same as original, just
   // with different shapes.
@@ -82,6 +83,30 @@ FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
   return vectorized_func;
 }
 
+FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
+                                   const FunctionDef& orig_func,
+                                   FunctionDefLibrary* library) {
+  // Vectorizes orig_func naively by wrapping in a MapDefun op, then tries to
+  // do true vectorization with Vectorize.
+  FunctionDef* vectorized_func =
+      CreateMapDefunWrapper(map_node, orig_func, library);
+  NodeDef* map_defun_node = vectorized_func->mutable_node_def()->Mutable(0);
+  DCHECK_EQ(map_defun_node->op(), "MapDefun");
+
+  // Create a copy of the original function so that we can mutate it, and
+  // attach that to the map defun node.
+  FunctionDef* map_defun_fn = library->add_function();
+  *map_defun_fn = orig_func;
+  graph_utils::SetUniqueGraphFunctionName(orig_func.signature().name(), library,
+                                          map_defun_fn);
+  (*map_defun_node->mutable_attr())["f"].mutable_func()->set_name(
+      map_defun_fn->signature().name());
+
+  vectorization_utils::VectorizeMapDefun(vectorized_func, map_defun_fn,
+                                         map_defun_node);
+  return vectorized_func;
+}
+
 bool IsOutputShapesFullyDefined(const NodeDef& node) {
   auto* shapes_attr = gtl::FindOrNull(node.attr(), "output_shapes");
   if (shapes_attr == nullptr) return false;
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 6a59eb0d32..5dd9d00511 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -333,6 +333,11 @@ void Vectorization::Vectorize() {
 
 void VectorizeMapDefun(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
                        NodeDef* map_defun_node) {
+  if (map_defun_node->attr().at("f").func().name() !=
+      map_defun_fn->signature().name()) {
+    LOG(ERROR) << "`map_defun_fn` and `map_defun_node` do not match";
+    return;
+  }
   Vectorization(outer_scope, map_defun_fn, map_defun_node).Vectorize();
 }
 
-- 
GitLab


From dcd63fab37f686a069b54a7653254bbb15a2bf20 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Thu, 20 Sep 2018 11:04:25 -0700
Subject: [PATCH 0444/1357] Fix for failing eager:function_test

---
 tensorflow/core/kernels/partitioned_function_ops.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index 42f99a73e6..7a5a2ff8fa 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -100,9 +100,9 @@ class PartitionedCallOp : public AsyncOpKernel {
         // We need to pass global op_registry as default_registry when creating
         // graph. So that graph optimization passes can lookup all possible ops
         // by name.
-        FunctionLibraryDefinition func_lib_def(
-            OpRegistry::Global(), fbody->graph->flib_def().ToProto());
-        auto graph = tensorflow::MakeUnique<Graph>(func_lib_def);
+        auto graph = tensorflow::MakeUnique<Graph>(fbody->graph->flib_def());
+        FunctionLibraryDefinition global_flib(OpRegistry::Global(), {});
+        graph.get()->AddFunctionLibrary(global_flib.ToProto());
         CopyGraph(*fbody->graph, graph.get());
         OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done);
 
@@ -255,10 +255,10 @@ class PartitionedCallOp : public AsyncOpKernel {
     VLOG(3) << "Partitioned function '" << func_.name() << "', yielding "
             << partitions.size() << " shards.";
 
-    FunctionLibraryDefinition func_lib_def(OpRegistry::Global(),
-                                           graph->flib_def().ToProto());
     for (const auto& partition : partitions) {
-      std::unique_ptr<Graph> subgraph(new Graph(func_lib_def));
+      std::unique_ptr<Graph> subgraph(new Graph(graph->flib_def()));
+      FunctionLibraryDefinition global_flib(OpRegistry::Global(), {});
+      subgraph.get()->AddFunctionLibrary(global_flib.ToProto());
       GraphConstructorOptions opts;
       opts.allow_internal_ops = true;
       opts.expect_device_spec = true;
-- 
GitLab


From b874ada5731ca2315600f97a2703561a30b82b89 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Thu, 20 Sep 2018 11:06:21 -0700
Subject: [PATCH 0445/1357] [SE] Use absl instead of TF classes where an absl
 version exists

With the exception of StrCat all of these are using absl already, this change
just removes one layer of indirection.

PiperOrigin-RevId: 213846036
---
 tensorflow/stream_executor/lib/array_slice.h    | 8 +++++---
 tensorflow/stream_executor/lib/inlined_vector.h | 4 ++--
 tensorflow/stream_executor/lib/strcat.h         | 6 +++---
 tensorflow/stream_executor/lib/stringpiece.h    | 5 ++---
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/tensorflow/stream_executor/lib/array_slice.h b/tensorflow/stream_executor/lib/array_slice.h
index 8e3c4ca047..5f4e586762 100644
--- a/tensorflow/stream_executor/lib/array_slice.h
+++ b/tensorflow/stream_executor/lib/array_slice.h
@@ -16,13 +16,15 @@ limitations under the License.
 #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_ARRAY_SLICE_H_
 #define TENSORFLOW_STREAM_EXECUTOR_LIB_ARRAY_SLICE_H_
 
-#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "absl/types/span.h"
 
 namespace stream_executor {
 namespace port {
 
-using tensorflow::gtl::ArraySlice;
-using tensorflow::gtl::MutableArraySlice;
+template <typename T>
+using ArraySlice = absl::Span<const T>;
+template <typename T>
+using MutableArraySlice = absl::Span<T>;
 
 }  // namespace port
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/lib/inlined_vector.h b/tensorflow/stream_executor/lib/inlined_vector.h
index 40bdddb180..0198947e5b 100644
--- a/tensorflow/stream_executor/lib/inlined_vector.h
+++ b/tensorflow/stream_executor/lib/inlined_vector.h
@@ -16,12 +16,12 @@ limitations under the License.
 #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_INLINED_VECTOR_H_
 #define TENSORFLOW_STREAM_EXECUTOR_LIB_INLINED_VECTOR_H_
 
-#include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "absl/container/inlined_vector.h"
 
 namespace stream_executor {
 namespace port {
 
-using tensorflow::gtl::InlinedVector;
+using absl::InlinedVector;
 
 }  // namespace port
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/lib/strcat.h b/tensorflow/stream_executor/lib/strcat.h
index c959e4df5b..3688d7b4eb 100644
--- a/tensorflow/stream_executor/lib/strcat.h
+++ b/tensorflow/stream_executor/lib/strcat.h
@@ -18,13 +18,13 @@ limitations under the License.
 #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STRCAT_H_
 #define TENSORFLOW_STREAM_EXECUTOR_LIB_STRCAT_H_
 
-#include "tensorflow/core/lib/strings/strcat.h"
+#include "absl/strings/str_cat.h"
 
 namespace stream_executor {
 namespace port {
 
-using tensorflow::strings::StrCat;
-using tensorflow::strings::StrAppend;
+using absl::StrAppend;
+using absl::StrCat;
 
 }  // namespace port
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/lib/stringpiece.h b/tensorflow/stream_executor/lib/stringpiece.h
index b80de5df30..7624910129 100644
--- a/tensorflow/stream_executor/lib/stringpiece.h
+++ b/tensorflow/stream_executor/lib/stringpiece.h
@@ -16,13 +16,12 @@ limitations under the License.
 #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STRINGPIECE_H_
 #define TENSORFLOW_STREAM_EXECUTOR_LIB_STRINGPIECE_H_
 
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/stream_executor/platform/port.h"
+#include "absl/strings/string_view.h"
 
 namespace stream_executor {
 namespace port {
 
-using tensorflow::StringPiece;
+using StringPiece = absl::string_view;
 
 }  // namespace port
 }  // namespace stream_executor
-- 
GitLab


From ae59f459cd1e6bd2f2bdeb3b49cfedf0cdaf51a1 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Thu, 20 Sep 2018 11:09:47 -0700
Subject: [PATCH 0446/1357] [data-stats] Adds number of filtered elements as
 scalar summary, also adds number of filtered elements to monitoring counter.

PiperOrigin-RevId: 213846793
---
 .../kernel_tests/stats_dataset_ops_test.py    | 26 ++++++++++
 .../kernel_tests/stats_dataset_test_base.py   |  9 ++++
 .../core/kernels/data/filter_dataset_op.cc    | 48 ++++++++++++++++++-
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
index 719ce2e3fe..14c5cffdf4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
@@ -25,6 +25,7 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
@@ -100,6 +101,31 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
       self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                   100)
 
+  def testFilteredElementsStats(self):
+    stats_aggregator = stats_ops.StatsAggregator()
+    dataset = dataset_ops.Dataset.range(101).filter(
+        lambda x: math_ops.equal(math_ops.mod(x, 3), 0)).apply(
+            stats_ops.set_stats_aggregator(stats_aggregator))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run(iterator.initializer)
+      for i in range(34):
+        self.assertEqual(i * 3, sess.run(next_element))
+        if i is not 0:
+          self._assertSummaryHasScalarValue(
+              sess.run(summary_t), "Filter::dropped_elements", float(i * 2))
+        self._assertSummaryHasScalarValue(
+            sess.run(summary_t), "Filter::filtered_elements", float(i + 1))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasScalarValue(
+          sess.run(summary_t), "Filter::dropped_elements", 67.0)
+      self._assertSummaryHasScalarValue(
+          sess.run(summary_t), "Filter::filtered_elements", 34.0)
+
   def testReinitialize(self):
     stats_aggregator = stats_ops.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
index 2f5a44408f..6951564091 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
@@ -52,3 +52,12 @@ class StatsDatasetTestBase(test.TestCase):
         self.assertEqual(expected_value, value.histo.sum)
         return
     self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto))
+
+  def _assertSummaryHasScalarValue(self, summary_str, tag, expected_value):
+    summary_proto = summary_pb2.Summary()
+    summary_proto.ParseFromString(summary_str)
+    for value in summary_proto.value:
+      if tag == value.tag:
+        self.assertEqual(expected_value, value.simple_value)
+        return
+    self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto))
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index 19c35f94a6..00884314a9 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -14,11 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -139,7 +141,13 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
     class Iterator : public DatasetIterator<FilterDatasetBase> {
      public:
       explicit Iterator(const Params& params)
-          : DatasetIterator<FilterDatasetBase>(params) {}
+          : DatasetIterator<FilterDatasetBase>(params),
+            filtered_elements_(0),
+            dropped_elements_(0) {
+        std::vector<string> components =
+            str_util::Split(params.prefix, "::", str_util::SkipEmpty());
+        prefix_end_ = components.back();
+      }
 
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
@@ -154,6 +162,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
         // `input_impl_` and `f` are thread-safe. However, if multiple
         // threads enter this method, outputs may be observed in a
         // non-deterministic order.
+        auto stats_aggregator = ctx->stats_aggregator();
         bool matched;
         do {
           {
@@ -176,8 +185,34 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
+            if (stats_aggregator) {
+              mutex_lock l(mu_);
+              dropped_elements_++;
+              stats_aggregator->AddScalar(
+                  strings::StrCat(prefix_end_, "::dropped_elements"),
+                  static_cast<float>((dropped_elements_)));
+              // TODO(shivaniagrawal): multiple pipelines would collect
+              // aggregated number of dropped elements for all the pipelines,
+              // exploit tagged_context here.
+              stats_aggregator->IncrementCounter(
+                  prefix_end_, "dropped_elements", static_cast<float>(1));
+            }
           }
         } while (!matched);
+        // TODO(shivaniagrawal): add ratio of dropped_elements and
+        // filtered_elements as a histogram.
+        if (stats_aggregator) {
+          mutex_lock l(mu_);
+          filtered_elements_++;
+          stats_aggregator->AddScalar(
+              strings::StrCat(prefix_end_, "::filtered_elements"),
+              static_cast<float>((filtered_elements_)));
+          // TODO(shivaniagrawal): multiple pipelines would collect aggregated
+          // number of filtered elements for all the pipelines, exploit
+          // tagged_context here.
+          stats_aggregator->IncrementCounter(prefix_end_, "filtered_elements",
+                                             static_cast<float>(1));
+        }
         *end_of_sequence = false;
         return Status::OK();
       }
@@ -190,6 +225,10 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
         else
           TF_RETURN_IF_ERROR(
               writer->WriteScalar(full_name("input_impls_empty"), ""));
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("filtered_elements"),
+                                               filtered_elements_));
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("dropped_elements"),
+                                               dropped_elements_));
         return Status::OK();
       }
 
@@ -200,12 +239,19 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
           input_impl_.reset();
         else
           TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("filtered_elements"),
+                                              &filtered_elements_));
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("dropped_elements"),
+                                              &dropped_elements_));
         return Status::OK();
       }
 
      private:
       mutex mu_;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+      int64 filtered_elements_ GUARDED_BY(mu_);
+      int64 dropped_elements_ GUARDED_BY(mu_);
+      string prefix_end_;
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 30756301bee0de2b1c16a74a710bd7bf29be468d Mon Sep 17 00:00:00 2001
From: Daryl Ng <darylng@google.com>
Date: Thu, 20 Sep 2018 11:23:28 -0700
Subject: [PATCH 0447/1357] Moving tpu_embedding_config.proto to
 tpu_embedding_configuration.proto, refactoring it, adding several new fields
 and an EmbeddingOutputLayout message to provide experimental support for
 controlling the embedding output.

PiperOrigin-RevId: 213849572
---
 tensorflow/contrib/tpu/BUILD                  |  4 +-
 .../contrib/tpu/ops/tpu_embedding_ops.cc      | 22 ++---
 tensorflow/contrib/tpu/proto/BUILD            | 18 +++-
 .../tpu/proto/tpu_embedding_config.proto      | 66 -------------
 .../proto/tpu_embedding_configuration.proto   | 95 +++++++++++++++++++
 .../proto/tpu_embedding_output_layout.proto   | 75 +++++++++++++++
 tensorflow/contrib/tpu/python/tpu/tpu.py      |  2 +-
 7 files changed, 199 insertions(+), 83 deletions(-)
 delete mode 100644 tensorflow/contrib/tpu/proto/tpu_embedding_config.proto
 create mode 100644 tensorflow/contrib/tpu/proto/tpu_embedding_configuration.proto
 create mode 100644 tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 298ffc1ded..87d00aca05 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -80,7 +80,7 @@ tf_gen_op_libs(
         "tpu_embedding_ops",
     ],
     deps = [
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_config_proto_cc",
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
         "//tensorflow/core:lib_proto_parsing",
         "//tensorflow/core:protos_all_cc",
     ],
@@ -99,7 +99,7 @@ tf_custom_op_library(
         "ops/tpu_embedding_ops.cc",
     ],
     deps = [
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_config_proto_cc",
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
         "//tensorflow/core:lib_proto_parsing",
     ],
 )
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index 72d37f774c..18b98939b8 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tpu/proto/tpu_embedding_config.pb.h"
+#include "tensorflow/contrib/tpu/proto/tpu_embedding_configuration.pb.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
@@ -88,12 +88,12 @@ Status GradientDescentShapes(shape_inference::InferenceContext *c) {
 
   int table_id;
   TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
-  int64 num_tables = config.table_config_size();
+  int64 num_tables = config.table_descriptor_size();
   if (table_id >= num_tables) {
     return errors::InvalidArgument("Table id >= num_tables");
   }
-  int64 width = config.table_config(table_id).width();
-  int64 num_rows = config.table_config(table_id).num_rows();
+  int64 width = config.table_descriptor(table_id).dimension();
+  int64 num_rows = config.table_descriptor(table_id).vocabulary_size();
 
   TF_RETURN_IF_ERROR(c->set_output("parameters", {c->Matrix(num_rows, width)}));
   return Status::OK();
@@ -160,12 +160,12 @@ Status AdagradShapes(shape_inference::InferenceContext *c) {
 
   int table_id;
   TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
-  int64 num_tables = config.table_config_size();
+  int64 num_tables = config.table_descriptor_size();
   if (table_id >= num_tables) {
     return errors::InvalidArgument("Table id >= num_tables");
   }
-  int64 width = config.table_config(table_id).width();
-  int64 num_rows = config.table_config(table_id).num_rows();
+  int64 width = config.table_descriptor(table_id).dimension();
+  int64 num_rows = config.table_descriptor(table_id).vocabulary_size();
 
   TF_RETURN_IF_ERROR(c->set_output("parameters", {c->Matrix(num_rows, width)}));
   TF_RETURN_IF_ERROR(
@@ -244,11 +244,11 @@ Status ActivationShapes(shape_inference::InferenceContext *c) {
   if (!config.ParseFromString(config_string)) {
     return errors::InvalidArgument("Malformed tpu_embedding_config.");
   }
-  int64 batch_size = config.batch_size();
-  int64 num_tables = config.table_config_size();
+  int64 batch_size = config.batch_size_per_tensor_core();
+  int64 num_tables = config.table_descriptor_size();
   for (int table_id = 0; table_id < num_tables; ++table_id) {
-    int64 width = config.table_config(table_id).width();
-    int64 num_features = config.table_config(table_id).num_features();
+    int64 width = config.table_descriptor(table_id).dimension();
+    int64 num_features = config.table_descriptor(table_id).vocabulary_size();
     c->set_output(table_id, c->Matrix(batch_size * num_features, width));
   }
   return Status::OK();
diff --git a/tensorflow/contrib/tpu/proto/BUILD b/tensorflow/contrib/tpu/proto/BUILD
index 598b73b438..c20cab844c 100644
--- a/tensorflow/contrib/tpu/proto/BUILD
+++ b/tensorflow/contrib/tpu/proto/BUILD
@@ -10,12 +10,15 @@ load(
 )
 
 tf_proto_library(
-    name = "tpu_embedding_config_proto",
+    name = "tpu_embedding_configuration_proto",
     srcs = [
-        "tpu_embedding_config.proto",
+        "tpu_embedding_configuration.proto",
     ],
     cc_api_version = 2,
-    protodeps = [":optimization_parameters_proto"],
+    protodeps = [
+        ":tpu_embedding_output_layout_proto",
+        ":optimization_parameters_proto",
+    ],
     visibility = ["//visibility:public"],
 )
 
@@ -28,6 +31,15 @@ tf_proto_library(
     visibility = ["//visibility:public"],
 )
 
+tf_proto_library(
+    name = "tpu_embedding_output_layout_proto",
+    srcs = [
+        "tpu_embedding_output_layout.proto",
+    ],
+    cc_api_version = 2,
+    visibility = ["//visibility:public"],
+)
+
 tf_proto_library(
     name = "topology_proto",
     srcs = [
diff --git a/tensorflow/contrib/tpu/proto/tpu_embedding_config.proto b/tensorflow/contrib/tpu/proto/tpu_embedding_config.proto
deleted file mode 100644
index 3476cc8953..0000000000
--- a/tensorflow/contrib/tpu/proto/tpu_embedding_config.proto
+++ /dev/null
@@ -1,66 +0,0 @@
-syntax = "proto3";
-
-package tensorflow.tpu;
-
-import "tensorflow/contrib/tpu/proto/optimization_parameters.proto";
-
-// The TPUEmbeddingConfiguration contains specification of TPU Embedding lookups
-// and gradient updates separate from the TF Graph.
-message TPUEmbeddingConfiguration {
-  // model_mode specifies whether the model is to be run in training or
-  // inference. In inference mode, gradient updates to embedding tables are not
-  // performed.
-  enum ModelMode {
-    INVALID = 0;
-    TRAINING = 1;
-    INFERENCE = 2;
-  }
-
-  ModelMode model_mode = 1;
-
-  // num_hosts is the number of host CPU systems in the training/inference job.
-  // Each embedding table must be sharded into num_hosts separate Variables,
-  // placed separately on the num_hosts CPU devices in the cluster. Sharding
-  // will be performed equivalently to the 'div' sharding_strategy option of
-  // embedding_lookup() and embedding_lookup_sparse().
-  int32 num_hosts = 2;
-
-  // The total number of TensorNodes. This is equal to num_hosts times the
-  // number of TensorNodes attached to each host.
-  int32 num_tensornodes = 3;
-
-  // The number of training examples per TensorNode.
-  int32 batch_size = 4;
-
-  // Each Embedding
-  message TPUEmbeddingTable {
-    // Name of the embedding table. This will be used to name Variables in the
-    // Tensorflow Graph.
-    string name = 1;
-
-    // Number of rows of the embedding table. The Variable created to hold the
-    // learned embedding table values will have shape (num_rows, width).
-    int32 num_rows = 3;
-
-    // Width of the embedding table. The Variable created to hold the
-    // learned embedding table values will have shape (num_rows, width).
-    int32 width = 4;
-
-    // Number of distinct embedding activation vectors per training example
-    // produced by lookups into this table during model evaluation. For each
-    // table, the Graph will receive an activations Tensor of shape
-    //   (batch_size * table.num_features, table.width).
-    // For example, num_features = 1 produces equivalent behavior to a single
-    // tf.nn.embedding_lookup() call. In the case of 'multivalent' embeddings,
-    // (i.e. tf.nn.embedding_lookup_sparse()) which compute weighted averages of
-    // embedding table rows, num_features is the number of vectors produced
-    // after averaging. In sequence models num_features is typically equal
-    // to the sequence length, since each sequence element must be represented
-    // separately to the convolutional or recurrent network.
-    int32 num_features = 5;
-
-    OptimizationParameters optimization_parameters = 6;
-  }
-
-  repeated TPUEmbeddingTable table_config = 5;
-}
diff --git a/tensorflow/contrib/tpu/proto/tpu_embedding_configuration.proto b/tensorflow/contrib/tpu/proto/tpu_embedding_configuration.proto
new file mode 100644
index 0000000000..da19b135d7
--- /dev/null
+++ b/tensorflow/contrib/tpu/proto/tpu_embedding_configuration.proto
@@ -0,0 +1,95 @@
+syntax = "proto3";
+
+package tensorflow.tpu;
+
+import "tensorflow/contrib/tpu/proto/optimization_parameters.proto";
+import "tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto";
+
+message TPUEmbeddingConfiguration {
+  // Description of the various embedding tables.
+  message TableDescriptor {
+    // Name of the table.
+    string name = 1;
+    // Size of the vocabulary (i.e., number of rows) in the table.
+    int32 vocabulary_size = 2;
+    // The embedding dimension (i.e., the width of the embedding table).
+    int32 dimension = 3;
+    // Number of features mapped to this table.
+    int32 num_features = 4;
+    // Details of the learning algorithm used to update the embedding
+    // parameters.
+    OptimizationParameters optimization_parameters = 5;
+  }
+  repeated TableDescriptor table_descriptor = 1;
+
+  // Mode. Should the embedding layer program be run for inference (just forward
+  // pass), training (both forward and backward pass) or just the backward_pass.
+  enum Mode {
+    UNSPECIFIED = 0;
+    INFERENCE = 1;
+    TRAINING = 2;
+    BACKWARD_PASS_ONLY = 3;
+  }
+  Mode mode = 2;
+
+  // Number of samples in each batch of embedding layer activations sent to
+  // the TensorCore.
+  int32 batch_size_per_tensor_core = 3;
+
+  // Number of TPU hosts used for inference/training.
+  int32 num_hosts = 4;
+
+  // Number of TensorCore used for inference/training.
+  int32 num_tensor_cores = 5;
+
+  // Sharding strategy of the embedding tables among the hosts.
+  // If the sharding_strategy is "mod", each id is assigned to host
+  // "id % num_hosts". For instance, 13 ids are split across 5 hosts as:
+  // [[0, 5, 10], [1, 6, 11], [2, 7, 12], [3, 8], [4, 9]].
+  // If the sharding_strategy is "div", ids are assigned to hosts in a
+  // contiguous manner. In this case, 13 ids are split across 5 hosts as:
+  // [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]].
+  // In both the strategies, if the id space does not evenly divide the number
+  // of hosts, each of the first "table_descriptor.num_ids % num_hosts" hosts
+  // will be assigned one more id.
+  // This partitioning strategy exactly follows that in the embedding_lookup
+  // TensorFlow function at tensorflow/python/ops/embedding_ops.py.
+  enum ShardingStrategy {
+    DIV_DEFAULT = 0;
+    MOD = 1;
+  }
+  ShardingStrategy sharding_strategy = 6;
+
+  // This parameter determines if the execution of the sparse core will be
+  // pipelined with that of the TensorCore. This parameter only affects results
+  // when mode=TRAINING. If mode=INFERENCE or BACKWARD_PASS_ONLY, this parameter
+  // does not affect execution and hence, is a don't care value.
+  //
+  // false: The execution of the sparse core is not pipelined with that of the
+  // TensorCore. The forward pass of every step on the sparse core is executed
+  // only after the backward pass of the previous step is complete. And the
+  // backward pass on the sparse core is executed only after the embedding
+  // gradients have been computed on the TensorCore on every step. This ensures
+  // that the activations on every step observe the gradient updates from the
+  // previous step on both the sparse core and the TensorCore.
+  //
+  // true: The execution of the sparse core is pipelined with that of the
+  // TensorCore. The forward pass of every step on the sparse core can be
+  // executed after the forward pass of the previous step is complete without
+  // waiting for the backward pass. This improves the utilization of the sparse
+  // core allowing it to process step N+1 while the embedding gradients for step
+  // N are computed on the TensorCore. The backward pass of every step on the
+  // sparse core is executed directly after the forward pass for the next step
+  // is complete. The drawback is that embedding activations for step N+1 do not
+  // observe the embedding gradient updates from step N. This could affect model
+  // quality if step N and N+1 involve the same set of embedding IDs. However,
+  // since the embedding updates are sparse, this is generally not considered a
+  // problem.
+  bool pipeline_execution_with_tensor_core = 7;
+
+  // Extended output layout information; if not provided, a compatibility mode
+  // will use defaults that match the old layout. Providing a value for this
+  // field is EXPERIMENTAL and most ways of filling it will probably break. Do
+  // not set it unless you know what you are doing.
+  TPUEmbeddingOutputLayout output_layout = 8;
+}
diff --git a/tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto b/tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto
new file mode 100644
index 0000000000..aed30b2f22
--- /dev/null
+++ b/tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto
@@ -0,0 +1,75 @@
+syntax = "proto3";
+
+package tensorflow.tpu;
+
+// In the comments here, "layout" refers to the top-level EmbeddingOutputLayout
+// proto contained in the TPUEmbeddingConfiguration.
+
+// The embedding output consists of a list of tensors, each specified by an
+// EmbeddingOutputTensor proto within the EmbeddingOutputLayout (the "output"
+// field). Each table and feature lookup is then placed into some number of
+// particular positions within some output tensor (identified by "tensor_index"
+// within OutputLocation). The tree of table lookups, feature lookups, and
+// output locations is specified by the
+// "table(table_id).feature(feature_id).output_location" repeated fields within
+// EmbeddingOutputLayout.
+
+message TPUEmbeddingOutputLayout {
+  // Location of one copy of the feature's data.
+  message OutputLocation {
+    // Which output tensor this copy of the feature will go into. Must be
+    // between 0 and layout.output_size().
+    int32 tensor_index = 1;
+
+    // Offset in dimension 0 for this feature copy. Must be between 0 and
+    // layout.output(tensor_index).dim0_size_per_sample().
+    int32 dim0_offset = 2;
+
+    // Offset in dimension 1 for this feature copy. Must be between 0 and
+    // layout.output(tensor_index).dim1_size() - table width; repeated or
+    // partially/fully overlapping values are allowed and results in the same
+    // range will be summed (with the gradients replicated in the backward
+    // pass).
+    int32 dim1_offset = 3;
+  }
+
+  // Description of the output placement for one feature.
+  message FeatureDescriptor {
+    // Typically, only one copy of each feature is used, but multiple are
+    // allowed and the same data will be copied to all of them (with the
+    // gradients summed in the backward pass).
+    repeated OutputLocation output_location = 1;
+  }
+
+  // Description of the output placement for features of one table.
+  message TableDescriptor {
+    // Output locations for each feature loaded from this table.
+    repeated FeatureDescriptor feature = 1;
+  }
+  // Output locations for each feature of each table.
+  repeated TableDescriptor table = 1;
+
+  // Data layout and shape computation information for a single output tensor.
+  // Any unused locations in the tensor will be filled with zeros, and
+  // corresponding gradients will be ignored.
+
+  // Size and layout information for 2-D tensors.
+  message TwoDOutputTensor {
+    // Multiplier for output dimension 0 size; used to match legacy format that
+    // stacks features within a sample in dimension 0.
+    int32 dim0_size_per_sample = 2;
+
+    // The size (in dimension 1) of this output tensor.
+    int32 dim1_size = 1;
+  }
+
+  // Format information for a single output tensor.
+  message EmbeddingOutputTensor {
+    oneof output_format {
+      TwoDOutputTensor two_d = 4;
+    }
+  }
+
+  // Shape and layout information for each tensor.
+  repeated EmbeddingOutputTensor output = 2;
+}
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 593f1d909e..7815d81a5b 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -76,7 +76,7 @@ def initialize_system(embedding_config=None, job=None):
   """Initializes a distributed TPU system for use with TensorFlow.
 
   Args:
-    embedding_config: If not None, an `EmbeddingLayerConfiguration` proto
+    embedding_config: If not None, a `TPUEmbeddingConfiguration` proto
       describing the desired configuration of the hardware embedding lookup
       tables. If embedding_config is None, no hardware embeddings can be used.
     job: The job (the XXX in TensorFlow device specification /job:XXX) that
-- 
GitLab


From a755420354d4e74e66b17d7a8c14fa421e4a7bae Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 20 Sep 2018 12:31:15 -0700
Subject: [PATCH 0448/1357] Replace the OrderedDict with a basic list/dict
 solution. OrderedDict is problematic to use in eager because of the circular
 references it creates.

PiperOrigin-RevId: 213862402
---
 tensorflow/python/autograph/core/converter.py | 8 +++-----
 tensorflow/python/autograph/impl/api.py       | 9 +++++----
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index 7b3905fdee..80928ae7f4 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -63,10 +63,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
 from enum import Enum
 
-
 from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import naming
 from tensorflow.python.autograph.pyct import anno
@@ -129,9 +127,8 @@ class ProgramContext(object):
     self.autograph_module = autograph_module
     self.uncompiled_modules = uncompiled_modules
 
-    # Required to output dependencies in discovery order, which should match
-    # the reverse dependency order.
-    self.dependency_cache = collections.OrderedDict()
+    self.conversion_order = []
+    self.dependency_cache = {}
     self.additional_imports = set()
     self.name_map = {}
 
@@ -177,6 +174,7 @@ class ProgramContext(object):
         self.name_map[o] = name
 
   def add_to_cache(self, original_entity, converted_ast):
+    self.conversion_order.append(original_entity)
     self.dependency_cache[original_entity] = converted_ast
 
 
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index ee2467e0dc..1dc97d2331 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -302,8 +302,9 @@ def to_graph(e,
                                                   arg_types)
 
   nodes = []
-  for dep in reversed(tuple(program_ctx.dependency_cache.values())):
-    nodes.extend(dep)
+  for dep in reversed(program_ctx.conversion_order):
+    nodes.extend(program_ctx.dependency_cache[dep])
+
   compiled_module, compiled_src = compiler.ast_to_object(
       nodes,
       source_prefix=program_ctx.required_imports,
@@ -371,7 +372,7 @@ def to_code(e,
   conversion.entity_to_graph(e, program_ctx, arg_values, arg_types)
 
   code = '\n'.join(
-      compiler.ast_to_source(dep, indentation)
-      for dep in reversed(tuple(program_ctx.dependency_cache.values())))
+      compiler.ast_to_source(program_ctx.dependency_cache[dep], indentation)
+      for dep in reversed(program_ctx.conversion_order))
 
   return program_ctx.required_imports + '\n\n' + code
-- 
GitLab


From 350effcc2fd95c723c92267cf13fcd38777a2a98 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Thu, 20 Sep 2018 12:33:49 -0700
Subject: [PATCH 0449/1357] Fix _handle_data of variant and resource type
 outputs of While op in while_v2.

https://github.com/tensorflow/community/pull/13

PiperOrigin-RevId: 213862844
---
 tensorflow/python/kernel_tests/BUILD          |  1 -
 .../python/kernel_tests/while_v2_test.py      | 56 +++++++++++++------
 tensorflow/python/ops/while_v2.py             |  7 +++
 3 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 9dc6df77f1..5f93682de7 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3235,7 +3235,6 @@ tf_py_test(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:gradients_impl",
         "//tensorflow/python:list_ops",
-        "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tf_optimizer",
         "//tensorflow/python:while_v2",
     ],
diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
index d00e39d482..0c3b72408e 100644
--- a/tensorflow/python/kernel_tests/while_v2_test.py
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -25,7 +25,6 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.grappler import tf_optimizer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
@@ -218,30 +217,55 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
       self.assertSequenceEqual(sess.run(grad), [32.])
 
   @parameterized.named_parameters(
-      ("Unknown shape", None),
-      ("Partially defined shape", [None]),
-      ("Fully defined shape", [1, 2]),
+      ("UnknownShape", None),
+      ("PartiallyDefinedShape", [None, 2]),
+      ("FullyDefinedShape", [1, 2]),
   )
   def testTensorListOutputElementShape(self, shape):
-    self.skipTest("b/115982901")
+
+    def MatchShape(actual_tensor_shape):
+      # Compare the shapes, treating None dimensions as equal. We do not
+      # directly check actual_tensor_shape and tf.TensorShape(shape) for
+      # equality because tf.Dimension.__eq__ returns None if either dimension is
+      # None.
+      if shape is None:
+        self.assertIsNone(actual_tensor_shape.dims)
+      else:
+        self.assertListEqual(actual_tensor_shape.as_list(), shape)
+
+    def GetAccumulatorForInputAtIndex(while_op, idx):
+      body_graph = while_v2._get_body_graph(while_op)
+      y_input_t = body_graph.inputs[idx]
+      push_back_node = [c for c in y_input_t.consumers()
+                        if c.type == "TensorListPushBack"][0]
+      output_idx = body_graph.outputs.index(push_back_node.outputs[0])
+      return while_op.outputs[output_idx]
+
     x = constant_op.constant(2.)
     y = array_ops.placeholder(dtype=dtypes.float32, shape=shape)
-    ret = while_loop_v2(lambda v, u: v < 8., lambda v, u: (v * v, u), [x, y])
 
+    # Forward pass.
+    ret = while_loop_v2(lambda v, u: v < 8., lambda v, u: (v * v, u), [x, y])
+    while_op = ret[0].op
     # Get the TensorList output of While op containing the accumulated values
     # of y.
-    while_op = ret[0].op
-    body_graph = while_v2._get_body_graph(while_op)
-    # body_graph.inputs: [counter_arg, x_arg, y_arg, *accumulators]
-    y_input_t = body_graph.inputs[2]
-    push_back_node = [c for c in y_input_t.consumers()
-                      if c.type == "TensorListPushBack"][0]
-    output_idx = body_graph.outputs.index(push_back_node.outputs[0])
-    output = while_op.outputs[output_idx]
-
+    # while_op.inputs: [counter_arg, x_arg, y_arg, *accumulators]
+    output = GetAccumulatorForInputAtIndex(while_op, 2)
     _, val = list_ops.tensor_list_pop_back(output,
                                            element_dtype=dtypes.float32)
-    self.assertEqual(val.shape, tensor_shape.TensorShape(shape))
+    MatchShape(val.shape)
+
+    # Gradient pass.
+    grad = gradients_impl.gradients(ret[1], y)
+    grad_while_op = grad[0].op
+    # Get the TensorList output of gradient While op containing the accumulated
+    # values of grad_y.
+    # grad_while_op.inputs:
+    # [counter_arg, total_iters_arg, grad_x_arg, grad_y_arg, *other_args]
+    grad_output = GetAccumulatorForInputAtIndex(grad_while_op, 4)
+    _, val = list_ops.tensor_list_pop_back(grad_output,
+                                           element_dtype=dtypes.float32)
+    MatchShape(val.shape)
 
 
 def ScalarShape():
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 801217fe66..875be31602 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -151,6 +151,7 @@ def while_loop(cond, body, loop_vars, name=None):
         cond_v2._create_new_tf_function(body_graph),
         name=scope)
 
+    _copy_handle_data(body_graph.outputs, outputs)
     _maybe_set_lowering_attr(outputs[0].op)
 
   # First var is loop counter.
@@ -213,6 +214,7 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
       cond_v2._create_new_tf_function(body_grad_graph),
       name=_get_unique_name("%s_grad" % op.name))
 
+  _copy_handle_data(body_grad_graph.outputs, outputs)
   _maybe_set_lowering_attr(outputs[0].op)
 
   # outputs[0] is the loop counter.
@@ -529,6 +531,11 @@ class _WhileBodyGradFuncGraph(function.FuncGraph):
     return captured_tensor
 
 
+def _copy_handle_data(src_tensors, tgt_tensors):
+  for src_t, tgt_t in zip(src_tensors, tgt_tensors):
+    function._copy_handle_data(src_t, tgt_t)
+
+
 # TODO(srbs): Move to common utils for cond_v2 and while_v2.
 def _maybe_set_lowering_attr(op):
   """Sets the flag to enable lowering on the `While` op if necessary.
-- 
GitLab


From 4aa639c0cbb47f4707f735e0cc80f4c39506d928 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 12:37:30 -0700
Subject: [PATCH 0450/1357] Add searchsorted (ie lower/upper bound) op.

PiperOrigin-RevId: 213863392
---
 tensorflow/core/BUILD                         |   1 +
 .../api_def/base_api/api_def_LowerBound.pbtxt |  45 ++++
 .../api_def/base_api/api_def_UpperBound.pbtxt |  45 ++++
 tensorflow/core/kernels/BUILD                 |   7 +
 tensorflow/core/kernels/searchsorted_op.cc    | 249 ++++++++++++++++++
 tensorflow/core/kernels/searchsorted_op.h     |  52 ++++
 .../core/kernels/searchsorted_op_gpu.cu.cc    | 126 +++++++++
 tensorflow/core/ops/array_ops.cc              |  28 ++
 tensorflow/core/util/cuda_kernel_helper.h     |  31 ++-
 .../python/kernel_tests/array_ops_test.py     | 198 ++++++++++++++
 tensorflow/python/ops/array_ops.py            |  61 +++++
 .../tools/api/golden/v1/tensorflow.pbtxt      |   4 +
 .../tools/api/golden/v2/tensorflow.pbtxt      |   4 +
 13 files changed, 846 insertions(+), 5 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt
 create mode 100644 tensorflow/core/kernels/searchsorted_op.cc
 create mode 100644 tensorflow/core/kernels/searchsorted_op.h
 create mode 100644 tensorflow/core/kernels/searchsorted_op_gpu.cu.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index e82dd13b31..ed1818f834 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1336,6 +1336,7 @@ cc_library(
         "//tensorflow/core/kernels:rpc_op",
         "//tensorflow/core/kernels:scoped_allocator_ops",
         "//tensorflow/core/kernels:sdca_ops",
+        "//tensorflow/core/kernels:searchsorted_op",
         "//tensorflow/core/kernels:set_kernels",
         "//tensorflow/core/kernels:sparse",
         "//tensorflow/core/kernels:state",
diff --git a/tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt b/tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt
new file mode 100644
index 0000000000..5ce825ae04
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt
@@ -0,0 +1,45 @@
+op {
+  graph_op_name: "LowerBound"
+  visibility: HIDDEN
+  in_arg {
+    name: "sorted_inputs"
+    description: <<END
+2-D Tensor where each row is ordered.
+END
+  }
+  in_arg {
+    name: "values"
+    description: <<END
+2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
+the values that will be searched for in `sorted_search_values`.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A `Tensor` with the same shape as `values`.  It contains the first scalar index
+into the last dimension where values can be inserted without changing the
+ordered property.
+END
+  }
+  summary: "Applies lower_bound(sorted_search_values, values) along each row."
+  description: <<END
+Each set of rows with the same index in (sorted_inputs, values) is treated
+independently.  The resulting row is the equivalent of calling
+`np.searchsorted(sorted_inputs, values, side='left')`.
+
+The result is not a global index to the entire 
+`Tensor`, but rather just the index in the last dimension.
+
+A 2-D example:
+  sorted_sequence = [[0, 3, 9, 9, 10],
+                     [1, 2, 3, 4, 5]]
+  values = [[2, 4, 9],
+            [0, 2, 6]]
+
+  result = LowerBound(sorted_sequence, values)
+
+  result == [[1, 2, 2],
+             [0, 1, 5]]
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt b/tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt
new file mode 100644
index 0000000000..0630f6e82a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt
@@ -0,0 +1,45 @@
+op {
+  graph_op_name: "UpperBound"
+  visibility: HIDDEN
+  in_arg {
+    name: "sorted_inputs"
+    description: <<END
+2-D Tensor where each row is ordered.
+END
+  }
+  in_arg {
+    name: "values"
+    description: <<END
+2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
+the values that will be searched for in `sorted_search_values`.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A `Tensor` with the same shape as `values`.  It contains the last scalar index
+into the last dimension where values can be inserted without changing the
+ordered property.
+END
+  }
+  summary: "Applies upper_bound(sorted_search_values, values) along each row."
+  description: <<END
+Each set of rows with the same index in (sorted_inputs, values) is treated
+independently.  The resulting row is the equivalent of calling
+`np.searchsorted(sorted_inputs, values, side='right')`.
+
+The result is not a global index to the entire 
+`Tensor`, but rather just the index in the last dimension.
+
+A 2-D example:
+  sorted_sequence = [[0, 3, 9, 9, 10],
+                     [1, 2, 3, 4, 5]]
+  values = [[2, 4, 9],
+            [0, 2, 6]]
+
+  result = UpperBound(sorted_sequence, values)
+
+  result == [[1, 2, 4],
+             [0, 2, 5]]
+END
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b0d04a7213..08245e6ea0 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -641,6 +641,7 @@ cc_library(
         ":reshape_op",
         ":reverse_op",
         ":reverse_sequence_op",
+        ":searchsorted_op",
         ":shape_ops",
         ":slice_op",
         ":snapshot_op",
@@ -873,6 +874,12 @@ tf_kernel_library(
     deps = ARRAY_DEPS + [":split_lib"],
 )
 
+tf_kernel_library(
+    name = "searchsorted_op",
+    prefix = "searchsorted_op",
+    deps = ARRAY_DEPS,
+)
+
 tf_kernel_library(
     name = "inplace_ops",
     prefix = "inplace_ops",
diff --git a/tensorflow/core/kernels/searchsorted_op.cc b/tensorflow/core/kernels/searchsorted_op.cc
new file mode 100644
index 0000000000..dc627ac77a
--- /dev/null
+++ b/tensorflow/core/kernels/searchsorted_op.cc
@@ -0,0 +1,249 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/searchsorted_op.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+template <typename T, typename OutType>
+struct UpperBoundFunctor<CPUDevice, T, OutType> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& sorted_inputs,
+                        const typename TTypes<T, 1>::ConstTensor& values,
+                        int batch_size, int num_inputs, int num_values,
+                        typename TTypes<OutType, 1>::Tensor* output) {
+    // TODO(eriche): If anyone ever needs this to be faster, we can multithread.
+    for (int b = 0; b < batch_size; ++b) {
+      const T* sorted_inputs_ptr = sorted_inputs.data() + b * num_inputs;
+      OutType* output_ptr = output->data() + b * num_values;
+      for (int i = 0; i < num_values; ++i) {
+        output_ptr[i] =
+            std::upper_bound(sorted_inputs_ptr, sorted_inputs_ptr + num_inputs,
+                             values(i + b * num_values)) -
+            sorted_inputs_ptr;
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
+template <typename T, typename OutType>
+struct LowerBoundFunctor<CPUDevice, T, OutType> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& sorted_inputs,
+                        const typename TTypes<T, 1>::ConstTensor& values,
+                        int batch_size, int num_inputs, int num_values,
+                        typename TTypes<OutType, 1>::Tensor* output) {
+    // TODO(eriche): If anyone ever needs this to be faster, we can multithread.
+    for (int b = 0; b < batch_size; ++b) {
+      const T* sorted_inputs_ptr = sorted_inputs.data() + b * num_inputs;
+      OutType* output_ptr = output->data() + b * num_values;
+      for (int i = 0; i < num_values; ++i) {
+        output_ptr[i] =
+            std::lower_bound(sorted_inputs_ptr, sorted_inputs_ptr + num_inputs,
+                             values(i + b * num_values)) -
+            sorted_inputs_ptr;
+      }
+    }
+
+    return Status::OK();
+  }
+};
+}  // namespace functor
+
+template <typename Device, typename T, typename OutType>
+class UpperBoundOp : public OpKernel {
+ public:
+  explicit UpperBoundOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& sorted_inputs_t = ctx->input(0);
+    const Tensor& values_t = ctx->input(1);
+
+    // must have same batch dim_size for both
+    OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0),
+                Status(error::INVALID_ARGUMENT,
+                       "Leading dim_size of both tensors must match."));
+
+    // this is required because we do indexing in int32 on the GPU
+    OP_REQUIRES(ctx, values_t.NumElements() < std::numeric_limits<int>::max(),
+                Status(error::INVALID_ARGUMENT,
+                       "values tensor size must less than INT_MAX"));
+
+    Tensor* output_t;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, values_t.shape(), &output_t));
+
+    if (output_t->dtype() == DT_INT32) {
+      OP_REQUIRES(ctx,
+                  FastBoundsCheck(sorted_inputs_t.dim_size(1),
+                                  std::numeric_limits<int>::max()),
+                  errors::InvalidArgument("trailing dim_size must less than "
+                                          "INT_MAX for int32 output type, was ",
+                                          sorted_inputs_t.dim_size(1)));
+    }
+
+    auto output = output_t->template flat<OutType>();
+    const auto sorted_inputs = sorted_inputs_t.template flat<T>();
+    const auto values = values_t.template flat<T>();
+    OP_REQUIRES_OK(
+        ctx, functor::UpperBoundFunctor<Device, T, OutType>::Compute(
+                 ctx, sorted_inputs, values, sorted_inputs_t.dim_size(0),
+                 sorted_inputs_t.dim_size(1), values_t.dim_size(1), &output));
+  }
+};
+
+template <typename Device, typename T, typename OutType>
+class LowerBoundOp : public OpKernel {
+ public:
+  explicit LowerBoundOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& sorted_inputs_t = ctx->input(0);
+    const Tensor& values_t = ctx->input(1);
+
+    // must have same batch dim_size for both
+    OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0),
+                Status(error::INVALID_ARGUMENT,
+                       "Leading dim_size of both tensors must match."));
+
+    // this is required because we do indexing in int32 on the GPU
+    OP_REQUIRES(ctx, values_t.NumElements() < std::numeric_limits<int>::max(),
+                Status(error::INVALID_ARGUMENT,
+                       "values tensor size must less than INT_MAX"));
+
+    Tensor* output_t;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, values_t.shape(), &output_t));
+
+    if (output_t->dtype() == DT_INT32) {
+      OP_REQUIRES(ctx,
+                  FastBoundsCheck(sorted_inputs_t.dim_size(1),
+                                  std::numeric_limits<int>::max()),
+                  errors::InvalidArgument("trailing dim_size must less than "
+                                          "INT_MAX for int32 output type, was ",
+                                          sorted_inputs_t.dim_size(1)));
+    }
+
+    auto output = output_t->template flat<OutType>();
+    const auto sorted_inputs = sorted_inputs_t.template flat<T>();
+    const auto values = values_t.template flat<T>();
+    OP_REQUIRES_OK(
+        ctx, functor::LowerBoundFunctor<Device, T, OutType>::Compute(
+                 ctx, sorted_inputs, values, sorted_inputs_t.dim_size(0),
+                 sorted_inputs_t.dim_size(1), values_t.dim_size(1), &output));
+  }
+};
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("UpperBound")                      \
+                              .Device(DEVICE_CPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int32>("out_type"), \
+                          UpperBoundOp<CPUDevice, type, int32>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("UpperBound")                      \
+                              .Device(DEVICE_CPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int64>("out_type"), \
+                          UpperBoundOp<CPUDevice, type, int64>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#if GOOGLE_CUDA
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("UpperBound")                      \
+                              .Device(DEVICE_GPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int32>("out_type"), \
+                          UpperBoundOp<GPUDevice, type, int32>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("UpperBound")                      \
+                              .Device(DEVICE_GPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int64>("out_type"), \
+                          UpperBoundOp<GPUDevice, type, int64>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#endif  // GOOGLE_CUDA
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("LowerBound")                      \
+                              .Device(DEVICE_CPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int32>("out_type"), \
+                          LowerBoundOp<CPUDevice, type, int32>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("LowerBound")                      \
+                              .Device(DEVICE_CPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int64>("out_type"), \
+                          LowerBoundOp<CPUDevice, type, int64>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#if GOOGLE_CUDA
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("LowerBound")                      \
+                              .Device(DEVICE_GPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int32>("out_type"), \
+                          LowerBoundOp<GPUDevice, type, int32>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#define REGISTER_KERNELS(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("LowerBound")                      \
+                              .Device(DEVICE_GPU)                 \
+                              .TypeConstraint<type>("T")          \
+                              .TypeConstraint<int64>("out_type"), \
+                          LowerBoundOp<GPUDevice, type, int64>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#endif  // GOOGLE_CUDA
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/searchsorted_op.h b/tensorflow/core/kernels/searchsorted_op.h
new file mode 100644
index 0000000000..f075bf0fa2
--- /dev/null
+++ b/tensorflow/core/kernels/searchsorted_op.h
@@ -0,0 +1,52 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_SEARCHSORTED_OP_H_
+#define TENSORFLOW_CORE_KERNELS_SEARCHSORTED_OP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace functor {
+
+template <typename Device, typename T, typename OutType>
+struct UpperBoundFunctor {
+  // Searches for values in sorted_inputs and returns the greatest possible
+  // index where they maintain sorted order.
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& sorted_inputs,
+                        const typename TTypes<T, 1>::ConstTensor& values,
+                        int batch_size, int num_inputs, int num_values,
+                        typename TTypes<OutType, 1>::Tensor* output);
+};
+
+template <typename Device, typename T, typename OutType>
+struct LowerBoundFunctor {
+  // Searches for values in sorted_inputs and returns the lowest possible
+  // index where they maintain sorted order.
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& sorted_inputs,
+                        const typename TTypes<T, 1>::ConstTensor& values,
+                        int batch_size, int num_inputs, int num_values,
+                        typename TTypes<OutType, 1>::Tensor* output);
+};
+}  // namespace functor
+
+}  // end namespace tensorflow
+#endif  // TENSORFLOW_CORE_KERNELS_SEARCHSORTED_OP_H_
diff --git a/tensorflow/core/kernels/searchsorted_op_gpu.cu.cc b/tensorflow/core/kernels/searchsorted_op_gpu.cu.cc
new file mode 100644
index 0000000000..263b5bf298
--- /dev/null
+++ b/tensorflow/core/kernels/searchsorted_op_gpu.cu.cc
@@ -0,0 +1,126 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/searchsorted_op.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace {
+template <typename T, typename OutType>
+__global__ void UpperBoundKernel(const T* sorted_inputs, int batch_size,
+                                 int sorted_inputs_size, int values_size,
+                                 const T* values, OutType* outputs) {
+  CUDA_1D_KERNEL_LOOP(work_unit_id, values_size * batch_size) {
+    int bid = work_unit_id / values_size;
+    T value = values[work_unit_id];
+    outputs[work_unit_id] = cuda_helper::upper_bound<T, OutType>(
+        sorted_inputs + bid * sorted_inputs_size, sorted_inputs_size, value);
+  }
+}
+
+template <typename T, typename OutType>
+__global__ void LowerBoundKernel(const T* sorted_inputs, int batch_size,
+                                 int sorted_inputs_size, int values_size,
+                                 const T* values, OutType* outputs) {
+  CUDA_1D_KERNEL_LOOP(work_unit_id, values_size * batch_size) {
+    int bid = work_unit_id / values_size;
+    T value = values[work_unit_id];
+    outputs[work_unit_id] = cuda_helper::lower_bound<T, OutType>(
+        sorted_inputs + bid * sorted_inputs_size, sorted_inputs_size, value);
+  }
+}
+}  // namespace
+
+namespace functor {
+template <typename T, typename OutType>
+struct UpperBoundFunctor<GPUDevice, T, OutType> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& sorted_inputs,
+                        const typename TTypes<T, 1>::ConstTensor& values,
+                        int batch_size, int num_inputs, int num_values,
+                        typename TTypes<OutType, 1>::Tensor* output) {
+    const cudaStream_t& stream = GetCudaStream(context);
+    CudaLaunchConfig config =
+        GetCudaLaunchConfig(values.size(), context->eigen_gpu_device());
+
+    UpperBoundKernel<T>
+        <<<config.block_count, config.thread_per_block, 0, stream>>>(
+            sorted_inputs.data(), batch_size, num_inputs, num_values,
+            values.data(), output->data());
+
+    return Status::OK();
+  }
+};
+
+template <typename T, typename OutType>
+struct LowerBoundFunctor<GPUDevice, T, OutType> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& sorted_inputs,
+                        const typename TTypes<T, 1>::ConstTensor& values,
+                        int batch_size, int num_inputs, int num_values,
+                        typename TTypes<OutType, 1>::Tensor* output) {
+    const cudaStream_t& stream = GetCudaStream(context);
+    CudaLaunchConfig config =
+        GetCudaLaunchConfig(values.size(), context->eigen_gpu_device());
+
+    LowerBoundKernel<T>
+        <<<config.block_count, config.thread_per_block, 0, stream>>>(
+            sorted_inputs.data(), batch_size, num_inputs, num_values,
+            values.data(), output->data());
+
+    return Status::OK();
+  }
+};
+}  // namespace functor
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::UpperBoundFunctor<GPUDevice, type, int32>;
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_SPEC);
+#undef REGISTER_GPU_SPEC
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::UpperBoundFunctor<GPUDevice, type, int64>;
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_SPEC);
+#undef REGISTER_GPU_SPEC
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::LowerBoundFunctor<GPUDevice, type, int32>;
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_SPEC);
+#undef REGISTER_GPU_SPEC
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::LowerBoundFunctor<GPUDevice, type, int64>;
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_SPEC);
+#undef REGISTER_GPU_SPEC
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 7dbb18aa5d..c24950643f 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2916,6 +2916,34 @@ Status ScatterNdShape(InferenceContext* c) {
 
 }  // namespace
 
+REGISTER_OP("UpperBound")
+    .Input("sorted_inputs: T")
+    .Input("values: T")
+    .Output("output: out_type")
+    .Attr("T: type")
+    .Attr("out_type: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused_shape));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &unused_shape));
+      c->set_output(0, c->input(1));
+      return Status::OK();
+    });
+
+REGISTER_OP("LowerBound")
+    .Input("sorted_inputs: T")
+    .Input("values: T")
+    .Output("output: out_type")
+    .Attr("T: type")
+    .Attr("out_type: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused_shape));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &unused_shape));
+      c->set_output(0, c->input(1));
+      return Status::OK();
+    });
+
 REGISTER_OP("ScatterNd")
     .Input("indices: Tindices")
     .Input("updates: T")
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index 540adb58d4..f6f0408ccc 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -93,11 +93,11 @@ __device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXorSync(
 }
 
 namespace cuda_helper {
-template <typename IntType>
-__device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
-  IntType* orig = first;
-  IntType* it = nullptr;
-  IntType step = 0;
+template <typename T, typename OutType = int32>
+__device__ OutType upper_bound(const T* first, OutType count, T val) {
+  const T* orig = first;
+  const T* it = nullptr;
+  OutType step = 0;
   while (count > 0) {
     it = first;
     step = count / 2;
@@ -112,6 +112,27 @@ __device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
 
   return first - orig;
 }
+
+template <typename T, typename OutType = int32>
+__device__ OutType lower_bound(const T* first, OutType count, T val) {
+  const T* orig = first;
+  const T* it = nullptr;
+  OutType step = 0;
+  while (count > 0) {
+    it = first;
+    step = count / 2;
+    it += step;
+    if (*it < val) {
+      first = ++it;
+      count -= step + 1;
+    } else {
+      count = step;
+    }
+  }
+
+  return first - orig;
+}
+
 }  // namespace cuda_helper
 }  // namespace tensorflow
 
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 573bb8614f..2fe85839d0 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -1276,5 +1276,203 @@ class SnapshotOpTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(y.eval(), [0, 1, 2, 3])
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class SortedSearchTest(test_util.TensorFlowTestCase):
+
+  def testUpperBoundFloatHandCoded(self):
+    cdf = np.array([0, .2, .5, .6, .8, 1.], dtype=np.float32)
+    arr = np.array([.04, .99, .53, .58, .31, .01, .79, .8, .21],
+                   dtype=np.float32)
+    result = np.searchsorted(cdf, arr, side="right")
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="right"))
+    self.assertAllEqual(result, tf_result)
+
+  def testUpperBoundFloatRandomNd(self):
+    dim_size = 7
+    for d in range(1, 5):
+      shape = [dim_size] * d
+      cdf = np.cumsum(
+          np.random.uniform(size=shape).astype(np.float32), axis=(d - 1))
+      arr = np.random.uniform(size=shape).astype(np.float32) * dim_size
+
+      tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="right"))
+
+      cdf = cdf.reshape([-1, dim_size])
+      arr = arr.reshape([-1, dim_size])
+      result = np.zeros(arr.shape, dtype=np.int32)
+      for i in range(dim_size**(d - 1)):
+        result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="right")
+
+      result = result.reshape(shape)
+
+      self.assertAllEqual(result, tf_result)
+
+  def testUpperBoundFloatUneven(self):
+    batch_size = 7
+    size_search_array = 1000
+    size_values = 47
+    cdf = np.cumsum(
+        np.random.uniform(size=[batch_size, size_search_array]).astype(
+            np.float32),
+        axis=1)
+    arr = np.random.uniform(size=[batch_size, size_values]).astype(
+        np.float32) * size_search_array
+
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="right"))
+
+    result = np.zeros(arr.shape, dtype=np.int32)
+    for i in range(batch_size):
+      result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="right")
+
+    self.assertAllEqual(result, tf_result)
+
+  def testLowerBoundFloatHandCoded(self):
+    cdf = np.array([0, .2, .5, .6, .8, 1.], dtype=np.float32)
+    arr = np.array([.04, .99, .53, .58, .31, .01, .79, .8, .21],
+                   dtype=np.float32)
+    result = np.searchsorted(cdf, arr, side="left")
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="left"))
+    self.assertAllEqual(result, tf_result)
+
+  def testLowerBoundFloatRandomNd(self):
+    dim_size = 7
+    for d in range(1, 5):
+      shape = [dim_size] * d
+      cdf = np.cumsum(
+          np.random.uniform(size=shape).astype(np.float32), axis=(d - 1))
+      arr = np.random.uniform(size=shape).astype(np.float32) * dim_size
+
+      tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="left"))
+
+      cdf = cdf.reshape([-1, dim_size])
+      arr = arr.reshape([-1, dim_size])
+      result = np.zeros(arr.shape, dtype=np.int32)
+      for i in range(dim_size**(d - 1)):
+        result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="left")
+
+      result = result.reshape(shape)
+
+      self.assertAllEqual(result, tf_result)
+
+  def testLowerBoundFloatUneven(self):
+    batch_size = 7
+    size_search_array = 1000
+    size_values = 47
+    cdf = np.cumsum(
+        np.random.uniform(size=[batch_size, size_search_array]).astype(
+            np.float32),
+        axis=1)
+    arr = np.random.uniform(size=[batch_size, size_values]).astype(
+        np.float32) * size_search_array
+
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="left"))
+
+    result = np.zeros(arr.shape, dtype=np.int32)
+    for i in range(batch_size):
+      result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="left")
+
+    self.assertAllEqual(result, tf_result)
+
+  def testUpperBoundIntHandCoded(self):
+    cdf = np.array([0, 20, 50, 60, 80, 100], dtype=np.int64)
+    arr = np.array([4, 99, 53, 58, 31, 1, 79, 8, 21], dtype=np.int64)
+    result = np.searchsorted(cdf, arr, side="right")
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="right"))
+    self.assertAllEqual(result, tf_result)
+
+  def testUpperBoundIntRandomNd(self):
+    dim_size = 7
+    for d in range(1, 5):
+      shape = [dim_size] * d
+      cdf = np.cumsum(
+          np.random.randint(low=0, high=10, size=shape).astype(np.int64),
+          axis=(d - 1))
+      arr = np.random.randint(
+          low=0, high=10 * dim_size, size=shape).astype(np.int64)
+
+      tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="right"))
+
+      cdf = cdf.reshape([-1, dim_size])
+      arr = arr.reshape([-1, dim_size])
+      result = np.zeros(arr.shape, dtype=np.int32)
+      for i in range(dim_size**(d - 1)):
+        result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="right")
+
+      result = result.reshape(shape)
+
+      self.assertAllEqual(result, tf_result)
+
+  def testUpperBoundIntUneven(self):
+    batch_size = 7
+    size_search_array = 1000
+    size_values = 47
+    cdf = np.cumsum(
+        np.random.randint(low=0, high=10,
+                          size=[batch_size,
+                                size_search_array]).astype(np.int64),
+        axis=1)
+    arr = np.random.randint(
+        low=0, high=10 * size_search_array, size=[batch_size,
+                                                  size_values]).astype(np.int64)
+
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="right"))
+
+    result = np.zeros(arr.shape, dtype=np.int32)
+    for i in range(batch_size):
+      result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="right")
+
+    self.assertAllEqual(result, tf_result)
+
+  def testLowerBoundIntHandCoded(self):
+    cdf = np.array([0, 20, 50, 60, 80, 100], dtype=np.int64)
+    arr = np.array([4, 99, 53, 58, 31, 1, 79, 8, 21], dtype=np.int64)
+    result = np.searchsorted(cdf, arr, side="left")
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="left"))
+    self.assertAllEqual(result, tf_result)
+
+  def testLowerBoundIntRandomNd(self):
+    dim_size = 7
+    for d in range(1, 5):
+      shape = [dim_size] * d
+      cdf = np.cumsum(
+          np.random.randint(low=0, high=10, size=shape).astype(np.int64),
+          axis=(d - 1))
+      arr = np.random.randint(
+          low=0, high=10 * dim_size, size=shape).astype(np.int64)
+
+      tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="left"))
+
+      cdf = cdf.reshape([-1, dim_size])
+      arr = arr.reshape([-1, dim_size])
+      result = np.zeros(arr.shape, dtype=np.int32)
+      for i in range(dim_size**(d - 1)):
+        result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="left")
+
+      result = result.reshape(shape)
+
+      self.assertAllEqual(result, tf_result)
+
+  def testLowerBoundIntUneven(self):
+    batch_size = 7
+    size_search_array = 1000
+    size_values = 47
+    cdf = np.cumsum(
+        np.random.randint(low=0, high=10,
+                          size=[batch_size,
+                                size_search_array]).astype(np.int64),
+        axis=1)
+    arr = np.random.randint(
+        low=0, high=10 * size_search_array, size=[batch_size,
+                                                  size_values]).astype(np.int64)
+
+    tf_result = self.evaluate(array_ops.searchsorted(cdf, arr, side="left"))
+
+    result = np.zeros(arr.shape, dtype=np.int32)
+    for i in range(batch_size):
+      result[i, :] = np.searchsorted(cdf[i, :], arr[i, :], side="left")
+
+    self.assertAllEqual(result, tf_result)
+
+
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index c8b883350d..a7f57e94e3 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -2787,4 +2787,65 @@ def quantize(input,  # pylint: disable=redefined-builtin
       name=name)
 
 
+@tf_export("searchsorted")
+def searchsorted(sorted_sequence,
+                 values,
+                 side="left",
+                 out_type=dtypes.int32,
+                 name=None):
+  """Searches input tensor for values on the innermost dimension.
+
+  A 2-D example:
+
+  ```
+    sorted_sequence = [[0, 3, 9, 9, 10],
+                       [1, 2, 3, 4, 5]]
+    values = [[2, 4, 9],
+              [0, 2, 6]]
+
+    result = searchsorted(sorted_sequence, values, side="left")
+
+    result == [[1, 2, 2],
+               [0, 1, 5]]
+
+    result = searchsorted(sorted_sequence, values, side="right")
+
+    result == [[1, 2, 4],
+               [0, 2, 5]]
+  ```
+
+  Args:
+    sorted_sequence: N-D `Tensor` containing a sorted sequence.
+    values: N-D `Tensor` containing the search values.
+    side: 'left' or 'right'; 'left' corresponds to lower_bound and 'right' to
+      upper_bound.
+    out_type: The output type (`int32` or `int64`).  Default is `tf.int32`.
+    name: Optional name for the operation.
+
+  Returns:
+    An N-D `Tensor` the size of values containing the result of applying either
+    lower_bound or upper_bound (depending on side) to each value.  The result
+    is not a global index to the entire `Tensor`, but the index in the last
+    dimension.
+
+  Raises:
+    ValueError: If the last dimension of `sorted_sequence >= 2^31-1` elements.
+                If the total size of values exceeds `2^31 - 1` elements.
+                If the first `N-1` dimensions of the two tensors don't match.
+  """
+  sequence_size = shape_internal(sorted_sequence)[-1]
+  values_size = shape_internal(values)[-1]
+  sorted_sequence_2d = reshape(sorted_sequence, [-1, sequence_size])
+  values_2d = reshape(values, [-1, values_size])
+  if side == "right":
+    output = gen_array_ops.upper_bound(sorted_sequence_2d, values_2d, out_type,
+                                       name)
+  elif side == "left":
+    output = gen_array_ops.lower_bound(sorted_sequence_2d, values_2d, out_type,
+                                       name)
+  else:
+    raise ValueError("side must be either 'right' or 'left'.  Saw: %s." % side)
+  return reshape(output, shape_internal(values))
+
+
 quantize.__doc__ = gen_array_ops.quantize_v2.__doc__
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 6ff4343e9e..fbc58e5933 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1804,6 +1804,10 @@ tf_module {
     name: "scatter_update"
     argspec: "args=[\'ref\', \'indices\', \'updates\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
   }
+  member_method {
+    name: "searchsorted"
+    argspec: "args=[\'sorted_sequence\', \'values\', \'side\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'left\', \"<dtype: \'int32\'>\", \'None\'], "
+  }
   member_method {
     name: "segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index db90c007d4..7eca26be06 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1724,6 +1724,10 @@ tf_module {
     name: "scatter_nd"
     argspec: "args=[\'indices\', \'updates\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "searchsorted"
+    argspec: "args=[\'sorted_sequence\', \'values\', \'side\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'left\', \"<dtype: \'int32\'>\", \'None\'], "
+  }
   member_method {
     name: "segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From 07bb219ee9a6f11139396ac73d4138522300f86b Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Thu, 20 Sep 2018 12:57:56 -0700
Subject: [PATCH 0451/1357] Modify docs under contrib/distributions to point to
 tfp.

PiperOrigin-RevId: 213866466
---
 .../bayesflow/python/ops/monte_carlo_impl.py  | 29 ++++++++-----
 .../python/ops/autoregressive.py              |  7 ++--
 .../distributions/python/ops/batch_reshape.py |  3 +-
 .../ops/bijectors/masked_autoregressive.py    |  9 ++--
 .../python/ops/bijectors/permute.py           |  5 ++-
 .../python/ops/bijectors/real_nvp.py          |  5 ++-
 .../python/ops/bijectors/reshape.py           |  5 ++-
 .../python/ops/bijectors/scale_tril.py        |  5 ++-
 .../distributions/python/ops/cauchy.py        |  3 +-
 .../distributions/python/ops/deterministic.py | 10 +++--
 .../distributions/python/ops/gumbel.py        |  3 +-
 .../distributions/python/ops/half_normal.py   |  7 +++-
 .../distributions/python/ops/independent.py   |  3 +-
 .../distributions/python/ops/inverse_gamma.py |  4 +-
 .../distributions/python/ops/logistic.py      |  3 +-
 .../distributions/python/ops/mixture.py       |  4 +-
 .../python/ops/mixture_same_family.py         |  7 ++--
 .../distributions/python/ops/mvn_diag.py      |  3 +-
 .../python/ops/mvn_diag_plus_low_rank.py      |  3 +-
 .../python/ops/mvn_full_covariance.py         |  3 +-
 .../python/ops/mvn_linear_operator.py         |  3 +-
 .../distributions/python/ops/mvn_tril.py      |  7 ++--
 .../python/ops/poisson_lognormal.py           |  3 +-
 .../python/ops/quantized_distribution.py      |  5 ++-
 .../distributions/python/ops/sinh_arcsinh.py  |  2 +-
 .../python/ops/statistical_testing.py         | 42 +++++++++----------
 .../python/ops/vector_diffeomixture.py        |  3 +-
 .../python/ops/vector_exponential_diag.py     |  3 +-
 .../ops/vector_exponential_linear_operator.py |  3 +-
 .../python/ops/vector_laplace_diag.py         |  3 +-
 .../ops/vector_laplace_linear_operator.py     |  3 +-
 .../python/ops/vector_sinh_arcsinh_diag.py    |  2 +-
 .../python/ops/vector_student_t.py            |  3 +-
 .../distributions/python/ops/wishart.py       | 18 +++++---
 34 files changed, 136 insertions(+), 85 deletions(-)

diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py
index 9afe3df585..18d40fc1df 100644
--- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
+from tensorflow.python.util import deprecation
 
 __all__ = [
     'expectation',
@@ -66,7 +67,7 @@ def expectation_importance_sampler(f,
       shape broadcastable to `q.batch_shape`.
       For example, `log_p` works "just like" `sampling_dist_q.log_prob`.
     sampling_dist_q:  The sampling distribution.
-      `tf.contrib.distributions.Distribution`.
+      `tfp.distributions.Distribution`.
       `float64` `dtype` recommended.
       `log_p` and `q` should be supported on the same set.
     z:  `Tensor` of samples from `q`, produced by `q.sample` for some `n`.
@@ -141,7 +142,7 @@ def expectation_importance_sampler_logspace(
       shape broadcastable to `q.batch_shape`.
       For example, `log_p` works "just like" `q.log_prob`.
     sampling_dist_q:  The sampling distribution.
-      `tf.contrib.distributions.Distribution`.
+      `tfp.distributions.Distribution`.
       `float64` `dtype` recommended.
       `log_p` and `q` should be supported on the same set.
     z:  `Tensor` of samples from `q`, produced by `q.sample` for some `n`.
@@ -188,6 +189,12 @@ def _logspace_mean(log_values):
   return log_mean_of_values
 
 
+@deprecation.deprecated(
+    '2018-10-01',
+    'The tf.contrib.bayesflow library has moved to '
+    'TensorFlow Probability (https://github.com/tensorflow/probability). '
+    'Use `tfp.monte_carlo.expectation` instead.',
+    warn_once=True)
 def expectation(f, samples, log_prob=None, use_reparametrization=True,
                 axis=0, keep_dims=False, name=None):
   r"""Computes the Monte-Carlo approximation of \\(E_p[f(X)]\\).
@@ -236,17 +243,17 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True,
   Example Use:
 
   ```python
-  bf = tf.contrib.bayesflow
-  ds = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Monte-Carlo approximation of a reparameterized distribution, e.g., Normal.
 
   num_draws = int(1e5)
-  p = ds.Normal(loc=0., scale=1.)
-  q = ds.Normal(loc=1., scale=2.)
-  exact_kl_normal_normal = ds.kl_divergence(p, q)
+  p = tfd.Normal(loc=0., scale=1.)
+  q = tfd.Normal(loc=1., scale=2.)
+  exact_kl_normal_normal = tfd.kl_divergence(p, q)
   # ==> 0.44314718
-  approx_kl_normal_normal = bf.expectation(
+  approx_kl_normal_normal = tfp.monte_carlo.expectation(
       f=lambda x: p.log_prob(x) - q.log_prob(x),
       samples=p.sample(num_draws, seed=42),
       log_prob=p.log_prob,
@@ -260,9 +267,9 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True,
   num_draws = int(1e5)
   p = ds.Gamma(concentration=1., rate=1.)
   q = ds.Gamma(concentration=2., rate=3.)
-  exact_kl_gamma_gamma = ds.kl_divergence(p, q)
+  exact_kl_gamma_gamma = tfd.kl_divergence(p, q)
   # ==> 0.37999129
-  approx_kl_gamma_gamma = bf.expectation(
+  approx_kl_gamma_gamma = tfp.monte_carlo.expectation(
       f=lambda x: p.log_prob(x) - q.log_prob(x),
       samples=p.sample(num_draws, seed=42),
       log_prob=p.log_prob,
@@ -278,7 +285,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True,
   KL-divergence, the following is preferred:
 
   ```python
-  approx_kl_p_q = bf.monte_carlo_csiszar_f_divergence(
+  approx_kl_p_q = tfp.vi.monte_carlo_csiszar_f_divergence(
       f=bf.kl_reverse,
       p_log_prob=q.log_prob,
       q=p,
diff --git a/tensorflow/contrib/distributions/python/ops/autoregressive.py b/tensorflow/contrib/distributions/python/ops/autoregressive.py
index bb9b8043b2..3ba1c3a665 100644
--- a/tensorflow/contrib/distributions/python/ops/autoregressive.py
+++ b/tensorflow/contrib/distributions/python/ops/autoregressive.py
@@ -65,13 +65,14 @@ class Autoregressive(distribution_lib.Distribution):
   ```
 
   where the ellipses (`...`) represent `n-2` composed calls to `fn`, `fn`
-  constructs a `tf.distributions.Distribution`-like instance, and `x0` is a
+  constructs a `tfp.distributions.Distribution`-like instance, and `x0` is a
   fixed initializing `Tensor`.
 
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   def normal_fn(self, event_size):
     n = event_size * (event_size + 1) / 2
@@ -127,7 +128,7 @@ class Autoregressive(distribution_lib.Distribution):
 
     Args:
       distribution_fn: Python `callable` which constructs a
-        `tf.distributions.Distribution`-like instance from a `Tensor` (e.g.,
+        `tfp.distributions.Distribution`-like instance from a `Tensor` (e.g.,
         `sample0`). The function must respect the "autoregressive property",
         i.e., there exists a permutation of event such that each coordinate is a
         diffeomorphic function of on preceding coordinates.
diff --git a/tensorflow/contrib/distributions/python/ops/batch_reshape.py b/tensorflow/contrib/distributions/python/ops/batch_reshape.py
index 519077bc9a..612376efb7 100644
--- a/tensorflow/contrib/distributions/python/ops/batch_reshape.py
+++ b/tensorflow/contrib/distributions/python/ops/batch_reshape.py
@@ -45,7 +45,8 @@ class BatchReshape(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   dtype = np.float32
   dims = 2
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
index 296e66f2b2..3b3d8ee6f2 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
@@ -61,8 +61,8 @@ class MaskedAutoregressiveFlow(bijector.Bijector):
   `shift_and_log_scale_fn`, `masked_autoregressive_default_template`, achieves
   this property by zeroing out weights in its `masked_dense` layers.
 
-  In the `tf.distributions` framework, a "normalizing flow" is implemented as a
-  `tf.contrib.distributions.bijectors.Bijector`. The `forward` "autoregression"
+  In the `tfp` framework, a "normalizing flow" is implemented as a
+  `tfp.bijectors.Bijector`. The `forward` "autoregression"
   is implemented using a `tf.while_loop` and a deep neural network (DNN) with
   masked weights such that the autoregressive property is automatically met in
   the `inverse`.
@@ -126,8 +126,9 @@ class MaskedAutoregressiveFlow(bijector.Bijector):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
-  tfb = tfd.bijectors
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+  tfb = tfp.bijectors
 
   dims = 5
 
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
index f182a1adcb..178c3c94bf 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
@@ -41,9 +41,10 @@ class Permute(bijector.Bijector):
   """Permutes the rightmost dimension of a `Tensor`.
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfb = tfp.bijectors
 
-  reverse = tfd.bijectors.Permute(permutation=[2, 1, 0])
+  reverse = tfb.Permute(permutation=[2, 1, 0])
 
   reverse.forward([-1., 0., 1.])
   # ==> [1., 0., -1]
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py
index 773ae24461..0bcb08cdea 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py
@@ -90,8 +90,9 @@ class RealNVP(bijector.Bijector):
   #### Example Use
 
   ```python
-  tfd = tf.contrib.distributions
-  tfb = tfd.bijectors
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+  tfb = tfp.bijectors
 
   # A common choice for a normalizing flow is to use a Gaussian for the base
   # distribution. (However, any continuous distribution would work.) E.g.,
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
index c8282229a3..71ac29038f 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
@@ -80,9 +80,10 @@ class Reshape(bijector.Bijector):
   Example usage:
   ```python
 
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfb = tfp.bijectors
 
-  r = tfd.bijectors.Reshape(event_shape_out=[1, -1])
+  r = tfb.Reshape(event_shape_out=[1, -1])
 
   r.forward([3., 4.])    # shape [2]
   # ==> [[3., 4.]]       # shape [1, 2]
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py b/tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py
index 6fbe866578..0a6d690b65 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py
@@ -42,7 +42,10 @@ class ScaleTriL(chain.Chain):
   #### Examples
 
   ```python
-  tfb = tf.contrib.distributions.bijectors
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+  tfb = tfp.bijectors
+
   b = tfb.ScaleTriL(
        diag_bijector=tfb.Exp(),
        diag_shift=None)
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
index cb5223b055..c461833b9a 100644
--- a/tensorflow/contrib/distributions/python/ops/cauchy.py
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -63,7 +63,8 @@ class Cauchy(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Define a single scalar Cauchy distribution.
   dist = tfd.Cauchy(loc=0., scale=3.)
diff --git a/tensorflow/contrib/distributions/python/ops/deterministic.py b/tensorflow/contrib/distributions/python/ops/deterministic.py
index affc64a14f..507c5d3679 100644
--- a/tensorflow/contrib/distributions/python/ops/deterministic.py
+++ b/tensorflow/contrib/distributions/python/ops/deterministic.py
@@ -198,8 +198,11 @@ class Deterministic(_BaseDeterministic):
   #### Examples
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Initialize a single Deterministic supported at zero.
-  constant = tf.contrib.distributions.Deterministic(0.)
+  constant = tfd.Deterministic(0.)
   constant.prob(0.)
   ==> 1.
   constant.prob(2.)
@@ -208,7 +211,7 @@ class Deterministic(_BaseDeterministic):
   # Initialize a [2, 2] batch of scalar constants.
   loc = [[0., 1.], [2., 3.]]
   x = [[0., 1.1], [1.99, 3.]]
-  constant = tf.contrib.distributions.Deterministic(loc)
+  constant = tfd.Deterministic(loc)
   constant.prob(x)
   ==> [[1., 0.], [0., 1.]]
   ```
@@ -310,7 +313,8 @@ class VectorDeterministic(_BaseDeterministic):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single VectorDeterministic supported at [0., 2.] in R^2.
   constant = tfd.Deterministic([0., 2.])
diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py
index acdea4d61d..4b50df5b48 100644
--- a/tensorflow/contrib/distributions/python/ops/gumbel.py
+++ b/tensorflow/contrib/distributions/python/ops/gumbel.py
@@ -63,7 +63,8 @@ class _Gumbel(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Define a single scalar Gumbel distribution.
   dist = tfd.Gumbel(loc=0., scale=3.)
diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py
index b02c403106..f121637086 100644
--- a/tensorflow/contrib/distributions/python/ops/half_normal.py
+++ b/tensorflow/contrib/distributions/python/ops/half_normal.py
@@ -66,15 +66,18 @@ class HalfNormal(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Define a single scalar HalfNormal distribution.
-  dist = tf.contrib.distributions.HalfNormal(scale=3.0)
+  dist = tfd.HalfNormal(scale=3.0)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued HalfNormals.
   # The first has scale 11.0, the second 22.0
-  dist = tf.contrib.distributions.HalfNormal(scale=[11.0, 22.0])
+  dist = tfd.HalfNormal(scale=[11.0, 22.0])
 
   # Evaluate the pdf of the first distribution on 1.0, and the second on 1.5,
   # returning a length two tensor.
diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py
index 0672702b96..e1cfff3c66 100644
--- a/tensorflow/contrib/distributions/python/ops/independent.py
+++ b/tensorflow/contrib/distributions/python/ops/independent.py
@@ -70,7 +70,8 @@ class Independent(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Make independent distribution from a 2-batch Normal.
   ind = tfd.Independent(
diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
index 70d050d7a6..452628257e 100644
--- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
+++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
@@ -89,7 +89,9 @@ class InverseGamma(distribution.Distribution):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   dist = tfd.InverseGamma(concentration=3.0, rate=2.0)
   dist2 = tfd.InverseGamma(concentration=[3.0, 4.0], rate=[2.0, 3.0])
   ```
diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py
index 02e3bad51e..21c9b5a354 100644
--- a/tensorflow/contrib/distributions/python/ops/logistic.py
+++ b/tensorflow/contrib/distributions/python/ops/logistic.py
@@ -61,7 +61,8 @@ class Logistic(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Define a single scalar Logistic distribution.
   dist = tfd.Logistic(loc=0., scale=3.)
diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py
index 3b7114ef06..52b67f2c54 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture.py
@@ -50,7 +50,9 @@ class Mixture(distribution.Distribution):
 
   ```python
   # Create a mixture of two Gaussians:
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   mix = 0.3
   bimix_gauss = tfd.Mixture(
     cat=tfd.Categorical(probs=[mix, 1.-mix]),
diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
index 8ffee940d0..f4d394ff29 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
@@ -44,7 +44,8 @@ class MixtureSameFamily(distribution.Distribution):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   ### Create a mixture of two scalar Gaussians:
 
@@ -113,12 +114,12 @@ class MixtureSameFamily(distribution.Distribution):
     """Construct a `MixtureSameFamily` distribution.
 
     Args:
-      mixture_distribution: `tf.distributions.Categorical`-like instance.
+      mixture_distribution: `tfp.distributions.Categorical`-like instance.
         Manages the probability of selecting components. The number of
         categories must match the rightmost batch dimension of the
         `components_distribution`. Must have either scalar `batch_shape` or
         `batch_shape` matching `components_distribution.batch_shape[:-1]`.
-      components_distribution: `tf.distributions.Distribution`-like instance.
+      components_distribution: `tfp.distributions.Distribution`-like instance.
         Right-most batch dimension indexes components.
       validate_args: Python `bool`, default `False`. When `True` distribution
         parameters are checked for validity despite possibly degrading runtime
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py
index cd0c282ba6..0b5b76be92 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py
@@ -85,7 +85,8 @@ class MultivariateNormalDiag(
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 2-variate Gaussian.
   mvn = tfd.MultivariateNormalDiag(
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
index 74d9d04fc7..80546083d3 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
@@ -87,7 +87,8 @@ class MultivariateNormalDiagPlusLowRank(
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 3-variate Gaussian with covariance `cov = S @ S.T`,
   # `S = diag(d) + U @ diag(m) @ U.T`. The perturbation, `U @ diag(m) @ U.T`, is
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
index dbc4c1b3dc..bcb4937980 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
@@ -73,7 +73,8 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
index efe5a6d0d9..8fdc99824b 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
@@ -91,7 +91,8 @@ class MultivariateNormalLinearOperator(
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py
index c6a23e4336..c21f70fc3b 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py
@@ -77,13 +77,14 @@ class MultivariateNormalTriL(
   ```
 
   Trainable (batch) lower-triangular matrices can be created with
-  `tf.contrib.distributions.matrix_diag_transform()` and/or
-  `tf.contrib.distributions.fill_triangular()`
+  `tfp.distributions.matrix_diag_transform()` and/or
+  `tfp.distributions.fill_triangular()`
 
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
index 7a7ad1be35..85683e3233 100644
--- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
+++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
@@ -220,7 +220,8 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Create two batches of PoissonLogNormalQuadratureCompounds, one with
   # prior `loc = 0.` and another with `loc = 1.` In both cases `scale = 1.`
diff --git a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py
index 18a0f754e6..134658deab 100644
--- a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py
@@ -196,8 +196,9 @@ class QuantizedDistribution(distributions.Distribution):
   parameter determining the unnormalized probability of that component.
 
   ```python
-  tfd = tf.contrib.distributions
-  tfb = tfd.bijectors
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+  tfb = tfp.bijectors
 
   net = wavenet(inputs)
   loc, unconstrained_scale, logits = tf.split(net,
diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
index a9d0fb4ccf..4b520b912e 100644
--- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
+++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
@@ -124,7 +124,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution):
       tailweight:  Tailweight parameter. Default is `1.0` (unchanged tailweight)
       distribution: `tf.Distribution`-like instance. Distribution that is
         transformed to produce this distribution.
-        Default is `tf.distributions.Normal(0., 1.)`.
+        Default is `tfp.distributions.Normal(0., 1.)`.
         Must be a scalar-batch, scalar-event distribution.  Typically
         `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
         a function of non-trainable parameters. WARNING: If you backprop through
diff --git a/tensorflow/contrib/distributions/python/ops/statistical_testing.py b/tensorflow/contrib/distributions/python/ops/statistical_testing.py
index c25e8c51d7..af22f4843a 100644
--- a/tensorflow/contrib/distributions/python/ops/statistical_testing.py
+++ b/tensorflow/contrib/distributions/python/ops/statistical_testing.py
@@ -30,27 +30,27 @@ is some expected constant.  Suppose the support of P is the interval
 `[0, 1]`.  Then you might do this:
 
 ```python
-tfd = tf.contrib.distributions
-
-expected_mean = ...
-num_samples = 5000
-samples = ... draw 5000 samples from P
-
-# Check that the mean looks right
-check1 = tfd.assert_true_mean_equal_by_dkwm(
-    samples, low=0., high=1., expected=expected_mean,
-    false_fail_rate=1e-6)
-
-# Check that the difference in means detectable with 5000 samples is
-# small enough
-check2 = tf.assert_less(
-    tfd.min_discrepancy_of_true_means_detectable_by_dkwm(
-        num_samples, low=0., high=1.0,
-        false_fail_rate=1e-6, false_pass_rate=1e-6),
-    0.01)
-
-# Be sure to execute both assertion ops
-sess.run([check1, check2])
+  from tensorflow_probability.python.distributions.internal import statistical_testing
+
+  expected_mean = ...
+  num_samples = 5000
+  samples = ... draw 5000 samples from P
+
+  # Check that the mean looks right
+  check1 = statistical_testing.assert_true_mean_equal_by_dkwm(
+      samples, low=0., high=1., expected=expected_mean,
+      false_fail_rate=1e-6)
+
+  # Check that the difference in means detectable with 5000 samples is
+  # small enough
+  check2 = tf.assert_less(
+      statistical_testing.min_discrepancy_of_true_means_detectable_by_dkwm(
+          num_samples, low=0., high=1.0,
+          false_fail_rate=1e-6, false_pass_rate=1e-6),
+      0.01)
+
+  # Be sure to execute both assertion ops
+  sess.run([check1, check2])
 ```
 
 The second assertion is an instance of experiment design.  It's a
diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
index 3c8aae2797..a3d178357b 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
@@ -300,7 +300,8 @@ class VectorDiffeomixture(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Create two batches of VectorDiffeomixtures, one with mix_loc=[0.],
   # another with mix_loc=[1]. In both cases, `K=2` and the affine
diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
index 73356a3625..36cbd71f8b 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
@@ -90,7 +90,8 @@ class VectorExponentialDiag(
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 2-variate VectorExponential, supported on
   # {(x, y) in R^2 : x > 0, y > 0}.
diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
index 9a47b48557..fd5bf9ecc7 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
@@ -108,7 +108,8 @@ class VectorExponentialLinearOperator(
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 2-variate VectorExponential, supported on
   # {(x, y) in R^2 : x > 0, y > 0}.
diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
index e68ddc569c..8cd4e128c7 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
@@ -102,7 +102,8 @@ class VectorLaplaceDiag(
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 2-variate VectorLaplace.
   vla = tfd.VectorLaplaceDiag(
diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
index 3923161a33..67d2ccd28d 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
@@ -110,7 +110,8 @@ class VectorLaplaceLinearOperator(
   #### Examples
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 3-variate VectorLaplace with some desired covariance.
   mu = [1., 2, 3]
diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
index 49ffff24ca..da57d0cb55 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
@@ -152,7 +152,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution):
         broadcastable with `event_shape`.
       distribution: `tf.Distribution`-like instance. Distribution from which `k`
         iid samples are used as input to transformation `F`.  Default is
-        `tf.distributions.Normal(loc=0., scale=1.)`.
+        `tfp.distributions.Normal(loc=0., scale=1.)`.
         Must be a scalar-batch, scalar-event distribution.  Typically
         `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
         a function of non-trainable parameters. WARNING: If you backprop through
diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py
index f289b39e51..bad91a0844 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py
@@ -92,7 +92,8 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution):
   Extra leading dimensions, if provided, allow for batches.
 
   ```python
-  tfd = tf.contrib.distributions
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
 
   # Initialize a single 3-variate vector Student's t-distribution.
   mu = [1., 2, 3]
diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py
index 49b9de0ab5..ee2fc58864 100644
--- a/tensorflow/contrib/distributions/python/ops/wishart.py
+++ b/tensorflow/contrib/distributions/python/ops/wishart.py
@@ -480,11 +480,14 @@ class WishartCholesky(_WishartLinearOperator):
   #### Examples
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Initialize a single 3x3 Wishart with Cholesky factored scale matrix and 5
   # degrees-of-freedom.(*)
   df = 5
   chol_scale = tf.cholesky(...)  # Shape is [3, 3].
-  dist = tf.contrib.distributions.WishartCholesky(df=df, scale=chol_scale)
+  dist = tfd.WishartCholesky(df=df, scale=chol_scale)
 
   # Evaluate this on an observation in R^3, returning a scalar.
   x = ...  # A 3x3 positive definite matrix.
@@ -498,14 +501,14 @@ class WishartCholesky(_WishartLinearOperator):
   # Initialize two 3x3 Wisharts with Cholesky factored scale matrices.
   df = [5, 4]
   chol_scale = tf.cholesky(...)  # Shape is [2, 3, 3].
-  dist = tf.contrib.distributions.WishartCholesky(df=df, scale=chol_scale)
+  dist = tfd.WishartCholesky(df=df, scale=chol_scale)
 
   # Evaluate this on four observations.
   x = [[x0, x1], [x2, x3]]  # Shape is [2, 2, 3, 3].
   dist.prob(x)  # Shape is [2, 2].
 
   # (*) - To efficiently create a trainable covariance matrix, see the example
-  #   in tf.contrib.distributions.matrix_diag_transform.
+  #   in tfp.distributions.matrix_diag_transform.
   ```
 
   """
@@ -604,11 +607,14 @@ class WishartFull(_WishartLinearOperator):
   #### Examples
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Initialize a single 3x3 Wishart with Full factored scale matrix and 5
   # degrees-of-freedom.(*)
   df = 5
   scale = ...  # Shape is [3, 3]; positive definite.
-  dist = tf.contrib.distributions.WishartFull(df=df, scale=scale)
+  dist = tfd.WishartFull(df=df, scale=scale)
 
   # Evaluate this on an observation in R^3, returning a scalar.
   x = ...  # A 3x3 positive definite matrix.
@@ -622,14 +628,14 @@ class WishartFull(_WishartLinearOperator):
   # Initialize two 3x3 Wisharts with Full factored scale matrices.
   df = [5, 4]
   scale = ...  # Shape is [2, 3, 3].
-  dist = tf.contrib.distributions.WishartFull(df=df, scale=scale)
+  dist = tfd.WishartFull(df=df, scale=scale)
 
   # Evaluate this on four observations.
   x = [[x0, x1], [x2, x3]]  # Shape is [2, 2, 3, 3]; xi is positive definite.
   dist.prob(x)  # Shape is [2, 2].
 
   # (*) - To efficiently create a trainable covariance matrix, see the example
-  #   in tf.contrib.distributions.matrix_diag_transform.
+  #   in tfd.matrix_diag_transform.
   ```
 
   """
-- 
GitLab


From 14986a41bc281d847d109fefed6fee85bde07c75 Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Thu, 20 Sep 2018 13:04:40 -0700
Subject: [PATCH 0452/1357] Updating doc references to tf.distributions to
 point to tfp.distributions.

PiperOrigin-RevId: 213867606
---
 .../python/kernel_tests/distribution_test.py  | 20 +++++++++----------
 tensorflow/python/ops/distributions/beta.py   |  9 ++++++---
 .../python/ops/distributions/dirichlet.py     |  9 ++++++---
 .../python/ops/distributions/distribution.py  |  8 ++++----
 tensorflow/python/ops/distributions/gamma.py  |  9 ++++++---
 .../ops/distributions/kullback_leibler.py     |  4 ++--
 tensorflow/python/ops/distributions/normal.py |  9 ++++++---
 .../python/ops/distributions/student_t.py     | 14 ++++++-------
 8 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
index f073f51a69..9b9b3ce2dd 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
@@ -212,7 +212,7 @@ class DistributionTest(test.TestCase):
   def testStrWorksCorrectlyScalar(self):
     normal = tfd.Normal(loc=np.float16(0), scale=np.float16(1))
     self.assertEqual(
-        ("tf.distributions.Normal("
+        ("tfp.distributions.Normal("
          "\"Normal/\", "
          "batch_shape=(), "
          "event_shape=(), "
@@ -221,7 +221,7 @@ class DistributionTest(test.TestCase):
 
     chi2 = tfd.Chi2(df=np.float32([1., 2.]), name="silly")
     self.assertEqual(
-        ("tf.distributions.Chi2("
+        ("tfp.distributions.Chi2("
          "\"silly/\", "  # What a silly name that is!
          "batch_shape=(2,), "
          "event_shape=(), "
@@ -230,7 +230,7 @@ class DistributionTest(test.TestCase):
 
     exp = tfd.Exponential(rate=array_ops.placeholder(dtype=dtypes.float32))
     self.assertEqual(
-        ("tf.distributions.Exponential(\"Exponential/\", "
+        ("tfp.distributions.Exponential(\"Exponential/\", "
          # No batch shape.
          "event_shape=(), "
          "dtype=float32)"),
@@ -240,7 +240,7 @@ class DistributionTest(test.TestCase):
     mvn_static = tfd.MultivariateNormalDiag(
         loc=np.zeros([2, 2]), name="MVN")
     self.assertEqual(
-        ("tf.distributions.MultivariateNormalDiag("
+        ("tfp.distributions.MultivariateNormalDiag("
          "\"MVN/\", "
          "batch_shape=(2,), "
          "event_shape=(2,), "
@@ -251,7 +251,7 @@ class DistributionTest(test.TestCase):
         loc=array_ops.placeholder(shape=[None, 3], dtype=dtypes.float32),
         name="MVN2")
     self.assertEqual(
-        ("tf.distributions.MultivariateNormalDiag("
+        ("tfp.distributions.MultivariateNormalDiag("
          "\"MVN2/\", "
          "batch_shape=(?,), "  # Partially known.
          "event_shape=(3,), "
@@ -261,7 +261,7 @@ class DistributionTest(test.TestCase):
   def testReprWorksCorrectlyScalar(self):
     normal = tfd.Normal(loc=np.float16(0), scale=np.float16(1))
     self.assertEqual(
-        ("<tf.distributions.Normal"
+        ("<tfp.distributions.Normal"
          " 'Normal/'"
          " batch_shape=()"
          " event_shape=()"
@@ -270,7 +270,7 @@ class DistributionTest(test.TestCase):
 
     chi2 = tfd.Chi2(df=np.float32([1., 2.]), name="silly")
     self.assertEqual(
-        ("<tf.distributions.Chi2"
+        ("<tfp.distributions.Chi2"
          " 'silly/'"  # What a silly name that is!
          " batch_shape=(2,)"
          " event_shape=()"
@@ -279,7 +279,7 @@ class DistributionTest(test.TestCase):
 
     exp = tfd.Exponential(rate=array_ops.placeholder(dtype=dtypes.float32))
     self.assertEqual(
-        ("<tf.distributions.Exponential"
+        ("<tfp.distributions.Exponential"
          " 'Exponential/'"
          " batch_shape=<unknown>"
          " event_shape=()"
@@ -290,7 +290,7 @@ class DistributionTest(test.TestCase):
     mvn_static = tfd.MultivariateNormalDiag(
         loc=np.zeros([2, 2]), name="MVN")
     self.assertEqual(
-        ("<tf.distributions.MultivariateNormalDiag"
+        ("<tfp.distributions.MultivariateNormalDiag"
          " 'MVN/'"
          " batch_shape=(2,)"
          " event_shape=(2,)"
@@ -301,7 +301,7 @@ class DistributionTest(test.TestCase):
         loc=array_ops.placeholder(shape=[None, 3], dtype=dtypes.float32),
         name="MVN2")
     self.assertEqual(
-        ("<tf.distributions.MultivariateNormalDiag"
+        ("<tfp.distributions.MultivariateNormalDiag"
          " 'MVN2/'"
          " batch_shape=(?,)"  # Partially known.
          " event_shape=(3,)"
diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py
index 99d30b0bd1..2ba1ea6744 100644
--- a/tensorflow/python/ops/distributions/beta.py
+++ b/tensorflow/python/ops/distributions/beta.py
@@ -98,10 +98,13 @@ class Beta(distribution.Distribution):
   #### Examples
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Create a batch of three Beta distributions.
   alpha = [1, 2, 3]
   beta = [1, 2, 3]
-  dist = tf.distributions.Beta(alpha, beta)
+  dist = tfd.Beta(alpha, beta)
 
   dist.sample([4, 5])  # Shape [4, 5, 3]
 
@@ -117,7 +120,7 @@ class Beta(distribution.Distribution):
   # Create batch_shape=[2, 3] via parameter broadcast:
   alpha = [[1.], [2]]      # Shape [2, 1]
   beta = [3., 4, 5]        # Shape [3]
-  dist = tf.distributions.Beta(alpha, beta)
+  dist = tfd.Beta(alpha, beta)
 
   # alpha broadcast as: [[1., 1, 1,],
   #                      [2, 2, 2]]
@@ -138,7 +141,7 @@ class Beta(distribution.Distribution):
   ```python
   alpha = tf.constant(1.0)
   beta = tf.constant(2.0)
-  dist = tf.distributions.Beta(alpha, beta)
+  dist = tfd.Beta(alpha, beta)
   samples = dist.sample(5)  # Shape [5]
   loss = tf.reduce_mean(tf.square(samples))  # Arbitrary loss function
   # Unbiased stochastic gradients of the loss function
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 9104a1d071..415249a958 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -104,10 +104,13 @@ class Dirichlet(distribution.Distribution):
   #### Examples
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Create a single trivariate Dirichlet, with the 3rd class being three times
   # more frequent than the first. I.e., batch_shape=[], event_shape=[3].
   alpha = [1., 2, 3]
-  dist = tf.distributions.Dirichlet(alpha)
+  dist = tfd.Dirichlet(alpha)
 
   dist.sample([4, 5])  # shape: [4, 5, 3]
 
@@ -129,7 +132,7 @@ class Dirichlet(distribution.Distribution):
   # Create batch_shape=[2], event_shape=[3]:
   alpha = [[1., 2, 3],
            [4, 5, 6]]   # shape: [2, 3]
-  dist = tf.distributions.Dirichlet(alpha)
+  dist = tfd.Dirichlet(alpha)
 
   dist.sample([4, 5])  # shape: [4, 5, 2, 3]
 
@@ -144,7 +147,7 @@ class Dirichlet(distribution.Distribution):
 
   ```python
   alpha = tf.constant([1.0, 2.0, 3.0])
-  dist = tf.distributions.Dirichlet(alpha)
+  dist = tfd.Dirichlet(alpha)
   samples = dist.sample(5)  # Shape [5, 3]
   loss = tf.reduce_mean(tf.square(samples))  # Arbitrary loss function
   # Unbiased stochastic gradients of the loss function
diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py
index d6435d3bee..76d980679e 100644
--- a/tensorflow/python/ops/distributions/distribution.py
+++ b/tensorflow/python/ops/distributions/distribution.py
@@ -1121,7 +1121,7 @@ class Distribution(_BaseDistribution):
     where `F` denotes the support of the random variable `X ~ P`.
 
     Args:
-      other: `tf.distributions.Distribution` instance.
+      other: `tfp.distributions.Distribution` instance.
       name: Python `str` prepended to names of ops created by this function.
 
     Returns:
@@ -1152,7 +1152,7 @@ class Distribution(_BaseDistribution):
     denotes (Shanon) cross entropy, and `H[.]` denotes (Shanon) entropy.
 
     Args:
-      other: `tf.distributions.Distribution` instance.
+      other: `tfp.distributions.Distribution` instance.
       name: Python `str` prepended to names of ops created by this function.
 
     Returns:
@@ -1164,7 +1164,7 @@ class Distribution(_BaseDistribution):
       return self._kl_divergence(other)
 
   def __str__(self):
-    return ("tf.distributions.{type_name}("
+    return ("tfp.distributions.{type_name}("
             "\"{self_name}\""
             "{maybe_batch_shape}"
             "{maybe_event_shape}"
@@ -1180,7 +1180,7 @@ class Distribution(_BaseDistribution):
                 dtype=self.dtype.name))
 
   def __repr__(self):
-    return ("<tf.distributions.{type_name} "
+    return ("<tfp.distributions.{type_name} "
             "'{self_name}'"
             " batch_shape={batch_shape}"
             " event_shape={event_shape}"
diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py
index b631f0247c..3293cda874 100644
--- a/tensorflow/python/ops/distributions/gamma.py
+++ b/tensorflow/python/ops/distributions/gamma.py
@@ -100,8 +100,11 @@ class Gamma(distribution.Distribution):
   #### Examples
 
   ```python
-  dist = tf.distributions.Gamma(concentration=3.0, rate=2.0)
-  dist2 = tf.distributions.Gamma(concentration=[3.0, 4.0], rate=[2.0, 3.0])
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
+  dist = tfd.Gamma(concentration=3.0, rate=2.0)
+  dist2 = tfd.Gamma(concentration=[3.0, 4.0], rate=[2.0, 3.0])
   ```
 
   Compute the gradients of samples w.r.t. the parameters:
@@ -109,7 +112,7 @@ class Gamma(distribution.Distribution):
   ```python
   concentration = tf.constant(3.0)
   rate = tf.constant(2.0)
-  dist = tf.distributions.Gamma(concentration, rate)
+  dist = tfd.Gamma(concentration, rate)
   samples = dist.sample(5)  # Shape [5]
   loss = tf.reduce_mean(tf.square(samples))  # Arbitrary loss function
   # Unbiased stochastic gradients of the loss function
diff --git a/tensorflow/python/ops/distributions/kullback_leibler.py b/tensorflow/python/ops/distributions/kullback_leibler.py
index e3c6f3e789..fdeb97bf64 100644
--- a/tensorflow/python/ops/distributions/kullback_leibler.py
+++ b/tensorflow/python/ops/distributions/kullback_leibler.py
@@ -127,8 +127,8 @@ def cross_entropy(ref, other,
   where `F` denotes the support of the random variable `X ~ P`.
 
   Args:
-    ref: `tf.distributions.Distribution` instance.
-    other: `tf.distributions.Distribution` instance.
+    ref: `tfd.Distribution` instance.
+    other: `tfd.Distribution` instance.
     allow_nan_stats: Python `bool`, default `True`. When `True`,
       statistics (e.g., mean, mode, variance) use the value "`NaN`" to
       indicate the result is undefined. When `False`, an exception is raised
diff --git a/tensorflow/python/ops/distributions/normal.py b/tensorflow/python/ops/distributions/normal.py
index d0a987ba7c..2feaf806c0 100644
--- a/tensorflow/python/ops/distributions/normal.py
+++ b/tensorflow/python/ops/distributions/normal.py
@@ -71,15 +71,18 @@ class Normal(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Define a single scalar Normal distribution.
-  dist = tf.distributions.Normal(loc=0., scale=3.)
+  dist = tfd.Normal(loc=0., scale=3.)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued Normals.
   # The first has mean 1 and standard deviation 11, the second 2 and 22.
-  dist = tf.distributions.Normal(loc=[1, 2.], scale=[11, 22.])
+  dist = tfd.Normal(loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -94,7 +97,7 @@ class Normal(distribution.Distribution):
   ```python
   # Define a batch of two scalar valued Normals.
   # Both have mean 1, but different standard deviations.
-  dist = tf.distributions.Normal(loc=1., scale=[11, 22.])
+  dist = tfd.Normal(loc=1., scale=[11, 22.])
 
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py
index e0cf6f86f1..e8d214bbe0 100644
--- a/tensorflow/python/ops/distributions/student_t.py
+++ b/tensorflow/python/ops/distributions/student_t.py
@@ -91,8 +91,11 @@ class StudentT(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  import tensorflow_probability as tfp
+  tfd = tfp.distributions
+
   # Define a single scalar Student t distribution.
-  single_dist = tf.distributions.StudentT(df=3)
+  single_dist = tfd.StudentT(df=3)
 
   # Evaluate the pdf at 1, returning a scalar Tensor.
   single_dist.prob(1.)
@@ -100,9 +103,7 @@ class StudentT(distribution.Distribution):
   # Define a batch of two scalar valued Student t's.
   # The first has degrees of freedom 2, mean 1, and scale 11.
   # The second 3, 2 and 22.
-  multi_dist = tf.distributions.StudentT(df=[2, 3],
-                                                 loc=[1, 2.],
-                                                 scale=[11, 22.])
+  multi_dist = tfd.StudentT(df=[2, 3], loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -117,7 +118,7 @@ class StudentT(distribution.Distribution):
   ```python
   # Define a batch of two Student's t distributions.
   # Both have df 2 and mean 1, but different scales.
-  dist = tf.distributions.StudentT(df=2, loc=1, scale=[11, 22.])
+  dist = tfd.StudentT(df=2, loc=1, scale=[11, 22.])
 
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
@@ -130,7 +131,7 @@ class StudentT(distribution.Distribution):
   df = tf.constant(2.0)
   loc = tf.constant(2.0)
   scale = tf.constant(11.0)
-  dist = tf.distributions.StudentT(df=df, loc=loc, scale=scale)
+  dist = tfd.StudentT(df=df, loc=loc, scale=scale)
   samples = dist.sample(5)  # Shape [5]
   loss = tf.reduce_mean(tf.square(samples))  # Arbitrary loss function
   # Unbiased stochastic gradients of the loss function
@@ -138,7 +139,6 @@ class StudentT(distribution.Distribution):
   ```
 
   """
-  # pylint: enable=line-too-long
 
   def __init__(self,
                df,
-- 
GitLab


From 05fe0cf733977572a546bf1e6117d491ca1dc221 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 20 Sep 2018 13:32:12 -0700
Subject: [PATCH 0453/1357] Simplifies the ResourceVariable constructor.

PiperOrigin-RevId: 213872127
---
 .../python/ops/resource_variable_ops.py       | 72 ++++++-------------
 1 file changed, 22 insertions(+), 50 deletions(-)

diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 9e477ab8af..4a126e9d7a 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -55,7 +55,7 @@ def get_resource_handle_data(graph_op):
       compat.as_bytes(handle_data))
 
 
-def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
+def eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
   """Creates a variable handle with information to do shape inference."""
   container = ops.get_default_graph()._container  # pylint: disable=protected-access
   if container is None:
@@ -397,61 +397,33 @@ class ResourceVariable(variables.RefVariable):
           # When in eager mode use a uid for the shared_name, to prevent
           # accidental sharing.
           shared_name = "%s_%d" % (handle_name, ops.uid())
-        if init_from_fn:
-          # Use attr_scope and device(None) to simulate the behavior of
-          # colocate_with when the variable we want to colocate with doesn't
-          # yet exist.
-          if self._in_graph_mode:
-            attr = attr_value_pb2.AttrValue(
-                list=attr_value_pb2.AttrValue.ListValue(
-                    s=[compat.as_bytes("loc:@%s" % handle_name)]))
-            with ops.get_default_graph()._attr_scope({"_class": attr}):
-              with ops.name_scope("Initializer"), ops.device(None):
-                initial_value = ops.convert_to_tensor(
-                    initial_value(), name="initial_value", dtype=dtype)
-              self._handle = _eager_safe_variable_handle(
-                  shape=initial_value.get_shape(),
-                  dtype=initial_value.dtype.base_dtype,
-                  shared_name=shared_name,
-                  name=name,
-                  graph_mode=self._in_graph_mode)
-              self._shape = initial_value.get_shape()
-          else:
-            initial_value = initial_value()
-            with ops.name_scope("Initializer"):
-              initial_value = ops.convert_to_tensor(
-                  initial_value, name="initial_value", dtype=dtype)
-            self._handle = _eager_safe_variable_handle(
-                shape=initial_value.get_shape(),
-                dtype=initial_value.dtype.base_dtype,
-                shared_name=shared_name,
-                name=name,
-                graph_mode=False)
-            self._shape = initial_value.get_shape()
-        # pylint: enable=protected-access
-
-        # Or get the initial value from a Tensor or Python object.
-        else:
-          with ops.name_scope("Initializer"):
+        # Use attr_scope and device(None) to simulate the behavior of
+        # colocate_with when the variable we want to colocate with doesn't
+        # yet exist.
+        attr = attr_value_pb2.AttrValue(
+            list=attr_value_pb2.AttrValue.ListValue(
+                s=[compat.as_bytes("loc:@%s" % handle_name)]))
+        with ops.get_default_graph()._attr_scope({"_class": attr}):
+          with ops.name_scope("Initializer"), ops.device(None):
             initial_value = ops.convert_to_tensor(
-                initial_value, name="initial_value", dtype=dtype)
-          # pylint: disable=protected-access
-          if (self._in_graph_mode and initial_value is not None and
-              initial_value.op._get_control_flow_context() is not None):
-            raise ValueError(
-                "Initializer for variable %s is from inside a control-flow "
-                "construct, such as a loop or conditional. When creating a "
-                "variable inside a loop or conditional, use a lambda as the "
-                "initializer." % name)
-          # pylint: enable=protected-access
-          self._handle = _eager_safe_variable_handle(
+                initial_value() if init_from_fn else initial_value,
+                name="initial_value", dtype=dtype)
+          self._handle = eager_safe_variable_handle(
               shape=initial_value.get_shape(),
               dtype=initial_value.dtype.base_dtype,
               shared_name=shared_name,
               name=name,
               graph_mode=self._in_graph_mode)
-          self._shape = initial_value.get_shape()
-
+        self._shape = initial_value.shape
+        # pylint: disable=protected-access
+        if (self._in_graph_mode and initial_value is not None and
+            initial_value.op._get_control_flow_context() is not None):
+          raise ValueError(
+              "Initializer for variable %s is from inside a control-flow "
+              "construct, such as a loop or conditional. When creating a "
+              "variable inside a loop or conditional, use a lambda as the "
+              "initializer." % name)
+        # pylint: enable=protected-access
         self._unique_id = shared_name
         self._initial_value = initial_value if self._in_graph_mode else None
         self._handle_name = handle_name + ":0"
-- 
GitLab


From c367ba02acc1d292738e3213173acbc0fe04335e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 13:32:27 -0700
Subject: [PATCH 0454/1357] This CL adds a Keras-based mobilenet_v2 feature
 extractor for object detection models.

As part of this CL, we use the Keras mobilenet_v2 application's keyword argument layer injection API to allow the generated network to support the object detection hyperparameters.

PiperOrigin-RevId: 213872175
---
 tensorflow/python/keras/applications/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/applications/__init__.py b/tensorflow/python/keras/applications/__init__.py
index a8b6d55e41..c35cdb15a4 100644
--- a/tensorflow/python/keras/applications/__init__.py
+++ b/tensorflow/python/keras/applications/__init__.py
@@ -63,7 +63,8 @@ def keras_modules_injection(base_fun):
   def wrapper(*args, **kwargs):
     if hasattr(keras_applications, 'get_submodules_from_kwargs'):
       kwargs['backend'] = backend
-      kwargs['layers'] = layers
+      if 'layers' not in kwargs:
+        kwargs['layers'] = layers
       kwargs['models'] = models
       kwargs['utils'] = utils
     return base_fun(*args, **kwargs)
-- 
GitLab


From 88cfc00ad2a33ef1440d8474fa830bce44c13056 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 20 Sep 2018 13:36:02 -0700
Subject: [PATCH 0455/1357] [tf.data] Fixes for two recently introduced
 use-after-free bugs.

1. In ParallelMapIterator, do not call `cond_var_.notify_all()` without holding
   the associated mutex. In some cases, the iterator may have been deleted
   between releasing the lock and notifying the condition variable, which
   leads to a use-after-free. This change applies this style to all use of
   condition variables in tensorflow/core/kernels/data/.

2. In CapturedFunction::RunAsync(), do not use `shared_ptr` to manage
   the lifetime of objects that (potentially) borrow from runtime
   objects. The present code runs the destructor after the `done()`
   callback is called, but the `done()` callback may be the last
   action in a session, and thus trigger destruction of those borrowed
   objects. In that case, the `shared_ptr` destructor may use the
   borrowed objects after they are freed.

PiperOrigin-RevId: 213872829
---
 .../core/kernels/data/captured_function.cc    |  37 +++--
 .../kernels/data/map_and_batch_dataset_op.cc  |   6 +-
 .../data/parallel_interleave_dataset_op.cc    | 130 +++++++++---------
 .../kernels/data/parallel_map_iterator.cc     |   8 +-
 4 files changed, 87 insertions(+), 94 deletions(-)

diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index 8a5d30a27c..b5f4072e89 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -427,17 +427,17 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
     done(s);
     return;
   }
-  std::shared_ptr<OwnedArgsCallFrame> frame(
-      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_));
+  OwnedArgsCallFrame* frame =
+      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_);
 
   FunctionLibraryRuntime::Options f_opts;
   f_opts.step_id = CapturedFunction::generate_step_id();
   ResourceMgr* resource_mgr = ctx->lib()->device()->resource_manager();
-  std::shared_ptr<ScopedStepContainer> step_container(new ScopedStepContainer(
+  ScopedStepContainer* step_container = new ScopedStepContainer(
       f_opts.step_id, [resource_mgr](const string& name) {
         resource_mgr->Cleanup(name).IgnoreError();
-      }));
-  f_opts.step_container = step_container.get();
+      });
+  f_opts.step_container = step_container;
   f_opts.runner = ctx->runner();
   if (ctx->lib()->device()->device_type() != DEVICE_CPU) {
     f_opts.create_rendezvous = true;
@@ -448,8 +448,8 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
   // (such as queue kernels) that depend on the non-nullness of
   // `OpKernelContext::cancellation_manager()`, but additional effort
   // will be required to plumb it through the `IteratorContext`.
-  std::shared_ptr<CancellationManager> c_mgr(new CancellationManager);
-  f_opts.cancellation_manager = c_mgr.get();
+  CancellationManager* c_mgr = new CancellationManager;
+  f_opts.cancellation_manager = c_mgr;
   std::shared_ptr<SimpleStepStatsCollector> stats_collector;
   std::shared_ptr<model::Node> node;
   if (ctx->model()) {
@@ -460,19 +460,19 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
   }
   f_opts.stats_collector = stats_collector.get();
 
-  OwnedArgsCallFrame* raw_frame = frame.get();
   auto callback = std::bind(
-      [rets](const std::shared_ptr<CancellationManager>& c_mgr,
-             const FunctionLibraryRuntime::DoneCallback& done,
-             const std::shared_ptr<OwnedArgsCallFrame>& frame,
-             const std::shared_ptr<model::Node>& node,
-             const std::shared_ptr<SimpleStepStatsCollector>& stats_collector,
-             const std::shared_ptr<ScopedStepContainer>& step_container,
-             // Begin unbound arguments.
-             Status s) {
+      [rets, step_container, c_mgr, frame](
+          const FunctionLibraryRuntime::DoneCallback& done,
+          const std::shared_ptr<model::Node>& node,
+          const std::shared_ptr<SimpleStepStatsCollector>& stats_collector,
+          // Begin unbound arguments.
+          Status s) {
+        delete step_container;
+        delete c_mgr;
         if (s.ok()) {
           s = frame->ConsumeRetvals(rets);
         }
+        delete frame;
         if (node) {
           node->add_processing_time(stats_collector->processing_time());
           node->start_work();
@@ -482,11 +482,10 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
           node->stop_work();
         }
       },
-      std::move(c_mgr), std::move(done), std::move(frame), std::move(node),
-      std::move(stats_collector), std::move(step_container),
+      std::move(done), std::move(node), std::move(stats_collector),
       std::placeholders::_1);
 
-  ctx->lib()->Run(f_opts, handle, raw_frame, std::move(callback));
+  ctx->lib()->Run(f_opts, handle, frame, std::move(callback));
 }
 
 CapturedFunction::CapturedFunction(const NameAttrList& func,
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 83896219a3..fb022ddf12 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -206,9 +206,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
             {
               mutex_lock l(mu_);
               num_parallel_calls_ = value;
+              cond_var_.notify_all();
             }
             VLOG(2) << "setting parallelism knob to " << value;
-            cond_var_.notify_all();
           };
           AddTunableParameter(
               ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */,
@@ -236,8 +236,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           }
           std::swap(result, batch_results_.front());
           batch_results_.pop_front();
+          cond_var_.notify_all();
         }
-        cond_var_.notify_all();
         return ProcessResult(ctx, result, out_tensors, end_of_sequence);
       }
 
@@ -340,11 +340,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(mu_) {
-        {
           mutex_lock l(mu_);
           num_calls_--;
           result->num_calls--;
-        }
         cond_var_.notify_all();
       }
 
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 9cd46bf5dd..3dac7902f0 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -1241,9 +1241,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
             {
               mutex_lock l(mu_);
               num_parallel_calls_ = value;
+              cond_var_.notify_all();
             }
             VLOG(2) << "setting parallelism knob to " << value;
-            cond_var_.notify_all();
           };
           AddTunableParameter(
               ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */,
@@ -1278,8 +1278,8 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
               *end_of_sequence = true;
               return Status::OK();
             }
+            cond_var_.notify_all();
           }
-          cond_var_.notify_all();
           StopWork(ctx);
           result->notification.WaitForNotification();
           StartWork(ctx);
@@ -1425,17 +1425,15 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
         // Release the ownership of the cycle element iterator, closing the
         // iterator if end of input was encountered.
-        {
-          if (end_of_input) {
-            current_elements_[cycle_index].reset();
-          }
-          mutex_lock l(mu_);
-          element_in_use_[cycle_index] = false;
-          num_calls_--;
-          if (end_of_input) {
-            args_list_[cycle_index].clear();
-            num_open_--;
-          }
+        if (end_of_input) {
+          current_elements_[cycle_index].reset();
+        }
+        mutex_lock l(mu_);
+        element_in_use_[cycle_index] = false;
+        num_calls_--;
+        if (end_of_input) {
+          args_list_[cycle_index].clear();
+          num_open_--;
         }
         cond_var_.notify_all();
       }
@@ -1453,32 +1451,44 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         StartWork(ctx.get());
         auto cleanup = gtl::MakeCleanup([this, ctx] { StopWork(ctx.get()); });
         while (true) {
-          {
-            mutex_lock l(mu_);
-            // Wait until this thread is cancelled, the end of input has been
-            // reached, or the cycle element at the `cycle_index_` position is
-            // not in use and there is space in the `invocation_results_` queue.
-            while (!cancelled_ && (!end_of_input_ || num_open_ > 0) &&
-                   (element_in_use_[cycle_index_] ||
-                    num_calls_ >= num_parallel_calls_ ||
-                    invocation_results_.size() >= MaxInvocationResults())) {
-              StopWork(ctx.get());
-              cond_var_.wait(l);
-              StartWork(ctx.get());
-            }
+          mutex_lock l(mu_);
+          // Wait until this thread is cancelled, the end of input has been
+          // reached, or the cycle element at the `cycle_index_` position is
+          // not in use and there is space in the `invocation_results_` queue.
+          while (!cancelled_ && (!end_of_input_ || num_open_ > 0) &&
+                 (element_in_use_[cycle_index_] ||
+                  num_calls_ >= num_parallel_calls_ ||
+                  invocation_results_.size() >= MaxInvocationResults())) {
+            StopWork(ctx.get());
+            cond_var_.wait(l);
+            StartWork(ctx.get());
+          }
 
-            if (cancelled_ || (end_of_input_ && num_open_ == 0)) {
-              return;
-            }
+          if (cancelled_ || (end_of_input_ && num_open_ == 0)) {
+            return;
+          }
 
-            while (!element_in_use_[cycle_index_] &&
-                   (!end_of_input_ || num_open_ > 0) &&
-                   num_calls_ < num_parallel_calls_ &&
-                   invocation_results_.size() < MaxInvocationResults()) {
-              if (!current_elements_[cycle_index_]) {
-                // Try to create a new iterator from the next input element.
-                Status status = input_impl_->GetNext(
-                    ctx.get(), &args_list_[cycle_index_], &end_of_input_);
+          while (!element_in_use_[cycle_index_] &&
+                 (!end_of_input_ || num_open_ > 0) &&
+                 num_calls_ < num_parallel_calls_ &&
+                 invocation_results_.size() < MaxInvocationResults()) {
+            if (!current_elements_[cycle_index_]) {
+              // Try to create a new iterator from the next input element.
+              Status status = input_impl_->GetNext(
+                  ctx.get(), &args_list_[cycle_index_], &end_of_input_);
+              if (!status.ok()) {
+                invocation_results_.emplace_back(new InvocationResult());
+                std::shared_ptr<InvocationResult>& result =
+                    invocation_results_.back();
+                result->status.Update(status);
+                result->notification.Notify();
+                break;
+              }
+              if (!end_of_input_) {
+                Status status = MakeIteratorFromInputElement(
+                    ctx.get(), args_list_[cycle_index_], cycle_index_,
+                    dataset()->captured_func_.get(), prefix(),
+                    &current_elements_[cycle_index_]);
                 if (!status.ok()) {
                   invocation_results_.emplace_back(new InvocationResult());
                   std::shared_ptr<InvocationResult>& result =
@@ -1487,39 +1497,25 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                   result->notification.Notify();
                   break;
                 }
-                if (!end_of_input_) {
-                  Status status = MakeIteratorFromInputElement(
-                      ctx.get(), args_list_[cycle_index_], cycle_index_,
-                      dataset()->captured_func_.get(), prefix(),
-                      &current_elements_[cycle_index_]);
-                  if (!status.ok()) {
-                    invocation_results_.emplace_back(new InvocationResult());
-                    std::shared_ptr<InvocationResult>& result =
-                        invocation_results_.back();
-                    result->status.Update(status);
-                    result->notification.Notify();
-                    break;
-                  }
-                  ++num_open_;
-                }
+                ++num_open_;
               }
-              if (current_elements_[cycle_index_]) {
-                // Pre-allocate invocation results for outputs to be fetched
-                // and then fetch the outputs asynchronously.
-                std::vector<std::shared_ptr<InvocationResult>> results;
-                results.reserve(dataset()->block_length_);
-                for (int i = 0; i < dataset()->block_length_; ++i) {
-                  invocation_results_.emplace_back(new InvocationResult());
-                  results.push_back(invocation_results_.back());
-                }
-                num_calls_++;
-                element_in_use_[cycle_index_] = true;
-                thread_pool_->Schedule(std::bind(&Iterator::FetchOutputs, this,
-                                                 ctx, cycle_index_,
-                                                 std::move(results)));
+            }
+            if (current_elements_[cycle_index_]) {
+              // Pre-allocate invocation results for outputs to be fetched
+              // and then fetch the outputs asynchronously.
+              std::vector<std::shared_ptr<InvocationResult>> results;
+              results.reserve(dataset()->block_length_);
+              for (int i = 0; i < dataset()->block_length_; ++i) {
+                invocation_results_.emplace_back(new InvocationResult());
+                results.push_back(invocation_results_.back());
               }
-              cycle_index_ = (cycle_index_ + 1) % dataset()->cycle_length_;
+              num_calls_++;
+              element_in_use_[cycle_index_] = true;
+              thread_pool_->Schedule(std::bind(&Iterator::FetchOutputs, this,
+                                               ctx, cycle_index_,
+                                               std::move(results)));
             }
+            cycle_index_ = (cycle_index_ + 1) % dataset()->cycle_length_;
           }
           cond_var_.notify_all();
         }
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 5f6052ce83..20ac518f37 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -63,9 +63,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
         {
           mutex_lock l(mu_);
           num_parallel_calls_ = value;
+          cond_var_.notify_all();
         }
         VLOG(2) << "setting parallelism knob to " << value;
-        cond_var_.notify_all();
       };
       // TODO(jsimsa): Surface the number of threads used by `ctx->runner()` and
       // use it here for the maximum.
@@ -96,8 +96,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
       }
       std::swap(result, invocation_results_.front());
       invocation_results_.pop_front();
+      cond_var_.notify_all();
     }
-    cond_var_.notify_all();
     StopWork(ctx);
     result->notification.WaitForNotification();
     StartWork(ctx);
@@ -201,9 +201,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
     {
       mutex_lock l(mu_);
       num_calls_--;
+      cond_var_.notify_all();
     }
     result->notification.Notify();
-    cond_var_.notify_all();
   }
 
   void CallFunction(const std::shared_ptr<IteratorContext>& ctx,
@@ -275,8 +275,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
           new_calls.push_back(invocation_results_.back());
           num_calls_++;
         }
+        cond_var_.notify_all();
       }
-      cond_var_.notify_all();
       for (const auto& call : new_calls) {
         CallFunction(ctx, call);
       }
-- 
GitLab


From 1f1e5ac6154583d5f87c846d1d7c9c59a77d6e0c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 13:39:28 -0700
Subject: [PATCH 0456/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 213873471
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 64 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 64 +++++++++++++++++++
 2 files changed, 128 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 2360432d96..e30a111096 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -29387,6 +29387,38 @@ op {
     type: DT_BOOL
   }
 }
+op {
+  name: "LowerBound"
+  input_arg {
+    name: "sorted_inputs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "MakeIterator"
   input_arg {
@@ -75327,6 +75359,38 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "UpperBound"
+  input_arg {
+    name: "sorted_inputs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "VarHandleOp"
   output_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 29e327753b..594edfd7f0 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -14535,6 +14535,38 @@ op {
     type: DT_BOOL
   }
 }
+op {
+  name: "LowerBound"
+  input_arg {
+    name: "sorted_inputs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "MakeIterator"
   input_arg {
@@ -36014,6 +36046,38 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "UpperBound"
+  input_arg {
+    name: "sorted_inputs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "VarHandleOp"
   output_arg {
-- 
GitLab


From d388770922ad1afa95e55597a33836fe74035c75 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Thu, 20 Sep 2018 13:48:43 -0700
Subject: [PATCH 0457/1357] Implement TF graph capture.

PiperOrigin-RevId: 213875284
---
 tensorflow/core/BUILD                         |  16 +-
 tensorflow/core/common_runtime/session_ref.cc | 170 ------
 tensorflow/core/protobuf/replay_log.proto     |  47 ++
 tensorflow/python/BUILD                       |  15 +-
 tensorflow/python/client/session_ref.cc       | 515 ++++++++++++++++++
 .../client}/session_ref.h                     |  15 +-
 tensorflow/python/client/tf_session_helper.cc |   2 +-
 7 files changed, 597 insertions(+), 183 deletions(-)
 delete mode 100644 tensorflow/core/common_runtime/session_ref.cc
 create mode 100644 tensorflow/core/protobuf/replay_log.proto
 create mode 100644 tensorflow/python/client/session_ref.cc
 rename tensorflow/{core/common_runtime => python/client}/session_ref.h (91%)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ed1818f834..85b6d4ff68 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2988,12 +2988,16 @@ tf_cuda_library(
     ] + tf_additional_device_tracer_deps(),
 )
 
-cc_library(
-    name = "session_ref",
-    srcs = ["common_runtime/session_ref.cc"],
-    hdrs = ["common_runtime/session_ref.h"],
-    copts = tf_copts(),
-    deps = [":core_cpu_base"],
+tf_proto_library_cc(
+    name = "replay_log_proto",
+    srcs = ["protobuf/replay_log.proto"],
+    cc_api_version = 2,
+    protodeps = [
+        ":master_proto",
+    ] + tf_additional_all_protos(),
+    visibility = [
+        "//tensorflow:internal",
+    ],
 )
 
 cc_library(
diff --git a/tensorflow/core/common_runtime/session_ref.cc b/tensorflow/core/common_runtime/session_ref.cc
deleted file mode 100644
index b931ef4229..0000000000
--- a/tensorflow/core/common_runtime/session_ref.cc
+++ /dev/null
@@ -1,170 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/common_runtime/session_ref.h"
-
-#include <utility>
-
-namespace tensorflow {
-
-namespace {
-
-// Scope helper to track active calls and manage session lifetime.
-struct RunCounter {
-  std::shared_ptr<Session> session;
-  uint64* value;
-  mutex* m;
-  condition_variable* cv;
-
-  explicit RunCounter(std::shared_ptr<Session> s, uint64* v, mutex* m,
-                      condition_variable* cv)
-      : session(std::move(s)), value(v), m(m), cv(cv) {
-    mutex_lock l(*m);
-    ++*value;
-  }
-
-  ~RunCounter() {
-    mutex_lock l(*m);
-    if (--*value == 0) {
-      cv->notify_all();
-    }
-  }
-};
-
-}  // namespace
-
-Status SessionRef::CheckNotClosed() {
-  mutex_lock l(run_lock_);
-  if (session_ == nullptr) return errors::Cancelled("Session has been closed.");
-  return ::tensorflow::Status::OK();
-}
-
-Status SessionRef::Run(const RunOptions& run_options,
-                       const std::vector<std::pair<string, Tensor> >& inputs,
-                       const std::vector<string>& output_tensor_names,
-                       const std::vector<string>& target_node_names,
-                       std::vector<Tensor>* outputs,
-                       RunMetadata* run_metadata) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->Run(run_options, inputs, output_tensor_names,
-                         target_node_names, outputs, run_metadata);
-}
-
-Status SessionRef::Create(const GraphDef& graph) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->Create(graph);
-}
-
-Status SessionRef::Create(const RunOptions& run_options,
-                          const GraphDef& graph) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->Create(run_options, graph);
-}
-
-Status SessionRef::Extend(const RunOptions& run_options,
-                          const GraphDef& graph) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->Extend(run_options, graph);
-}
-
-Status SessionRef::Extend(const GraphDef& graph) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->Extend(graph);
-}
-
-Status SessionRef::Close(const RunOptions& run_options) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  mutex_lock l(run_lock_);
-  Status status = session_->Close(run_options);
-  session_.reset();
-  while (run_count_ > 0) {
-    run_finished_.wait(l);
-  }
-  return status;
-}
-
-Status SessionRef::Close() {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  mutex_lock l(run_lock_);
-  Status status = session_->Close();
-  session_.reset();
-  while (run_count_ > 0) {
-    run_finished_.wait(l);
-  }
-  return status;
-}
-
-Status SessionRef::Run(const std::vector<std::pair<string, Tensor> >& inputs,
-                       const std::vector<string>& output_tensor_names,
-                       const std::vector<string>& target_node_names,
-                       std::vector<Tensor>* outputs) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->Run(inputs, output_tensor_names, target_node_names,
-                         outputs);
-}
-
-Status SessionRef::ListDevices(std::vector<DeviceAttributes>* response) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->ListDevices(response);
-}
-
-Status SessionRef::PRunSetup(const std::vector<string>& input_names,
-                             const std::vector<string>& output_names,
-                             const std::vector<string>& target_nodes,
-                             string* handle) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->PRunSetup(input_names, output_names, target_nodes, handle);
-}
-
-Status SessionRef::PRun(const string& handle,
-                        const std::vector<std::pair<string, Tensor> >& inputs,
-                        const std::vector<string>& output_names,
-                        std::vector<Tensor>* outputs) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->PRun(handle, inputs, output_names, outputs);
-}
-
-Status SessionRef::MakeCallable(const CallableOptions& callable_options,
-                                CallableHandle* out_handle) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->MakeCallable(callable_options, out_handle);
-}
-
-Status SessionRef::RunCallable(CallableHandle handle,
-                               const std::vector<Tensor>& feed_tensors,
-                               std::vector<Tensor>* fetch_tensors,
-                               RunMetadata* run_metadata) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->RunCallable(handle, feed_tensors, fetch_tensors,
-                                 run_metadata);
-}
-
-Status SessionRef::ReleaseCallable(CallableHandle handle) {
-  TF_RETURN_IF_ERROR(CheckNotClosed());
-  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_);
-  return rc.session->ReleaseCallable(handle);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/replay_log.proto b/tensorflow/core/protobuf/replay_log.proto
new file mode 100644
index 0000000000..7644314fc9
--- /dev/null
+++ b/tensorflow/core/protobuf/replay_log.proto
@@ -0,0 +1,47 @@
+syntax = "proto3";
+
+option cc_enable_arenas = true;
+package tensorflow;
+
+import "tensorflow/core/framework/graph.proto";
+import "tensorflow/core/protobuf/cluster.proto";
+import "tensorflow/core/protobuf/master.proto";
+
+// Records the creation of a new replay session.  We record the device listing
+// here to capture the state of the cluster.
+message NewReplaySession {
+  ListDevicesResponse devices = 1;
+  string session_handle = 2;
+}
+
+message ReplayOp {
+  double start_time_us = 31;
+  double end_time_us = 32;
+
+  oneof op {
+    CreateSessionRequest create_session = 1;
+    ExtendSessionRequest extend_session = 2;
+    PartialRunSetupRequest partial_run_setup = 3;
+    RunStepRequest run_step = 4;
+    CloseSessionRequest close_session = 5;
+    ListDevicesRequest list_devices = 6;
+    ResetRequest reset_request = 7;
+    MakeCallableRequest make_callable = 8;
+    RunCallableRequest run_callable = 9;
+    ReleaseCallableRequest release_callable = 10;
+    NewReplaySession new_replay_session = 11;
+  }
+
+  oneof response {
+    CreateSessionResponse create_session_response = 21;
+    ExtendSessionResponse extend_session_response = 22;
+    PartialRunSetupResponse partial_run_setup_response = 23;
+    RunStepResponse run_step_response = 24;
+    CloseSessionResponse close_session_response = 25;
+    ListDevicesResponse list_devices_response = 26;
+    ResetResponse reset_request_response = 27;
+    MakeCallableResponse make_callable_response = 28;
+    RunCallableResponse run_callable_response = 29;
+    ReleaseCallableResponse release_callable_response = 30;
+  }
+}
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9730e9933a..79f14466e6 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3763,6 +3763,19 @@ cuda_py_tests(
     ],
 )
 
+cc_library(
+    name = "session_ref",
+    srcs = ["client/session_ref.cc"],
+    hdrs = ["client/session_ref.h"],
+    deps = [
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:master_proto_cc",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:replay_log_proto_cc",
+    ],
+)
+
 tf_cuda_library(
     name = "tf_session_helper",
     srcs = ["client/tf_session_helper.cc"],
@@ -3773,6 +3786,7 @@ tf_cuda_library(
         ":ndarray_tensor_bridge",
         ":numpy_lib",
         ":safe_ptr",
+        ":session_ref",
         ":test_ops_kernels",
         "//tensorflow/c:c_api",
         "//tensorflow/c:c_api_internal",
@@ -3785,7 +3799,6 @@ tf_cuda_library(
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:session_ref",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
     ],
diff --git a/tensorflow/python/client/session_ref.cc b/tensorflow/python/client/session_ref.cc
new file mode 100644
index 0000000000..b2300df0b6
--- /dev/null
+++ b/tensorflow/python/client/session_ref.cc
@@ -0,0 +1,515 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/python/client/session_ref.h"
+
+#include <stdlib.h>
+#include <memory>
+#include <utility>
+
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/io/record_writer.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/protobuf/master.pb.h"
+#include "tensorflow/core/protobuf/named_tensor.pb.h"
+#include "tensorflow/core/protobuf/replay_log.pb.h"
+
+namespace tensorflow {
+
+namespace {
+
+// Scope helper to track active calls and manage session lifetime.
+// SessionRef blocks closing until all active calls complete or are cancelled.
+struct RunCounter {
+  std::shared_ptr<Session> session;
+  uint64* value;
+  mutex* m;
+  condition_variable* cv;
+
+  explicit RunCounter(std::shared_ptr<Session> s, uint64* v, mutex* m,
+                      condition_variable* cv)
+      : session(std::move(s)), value(v), m(m), cv(cv) {
+    mutex_lock l(*m);
+    ++*value;
+  }
+
+  ~RunCounter() {
+    mutex_lock l(*m);
+    if (--*value == 0) {
+      cv->notify_all();
+    }
+  }
+};
+
+std::string SessionToHandle(Session* session) {
+  return strings::Printf("%llu", reinterpret_cast<uint64>(session));
+}
+
+// The Session interface has many methods of the form:
+//
+// X(a, b);
+// X(RunOptions, a, b);
+//
+// Not all sessions support the second case (with an empty RunOptions()).
+// We use this variable as a sentinel to dispatch to the correct call.
+RunOptions* kEmptyRunOptions() {
+  static RunOptions* options = new RunOptions();
+  return options;
+}
+
+}  // namespace
+
+// Run the given session operation, recording start and end timestamps.
+// If the operation returns a bad status, return after flushing the current
+// log request.  This should be run _after_ all request information has been
+// added to the current op.
+#define RUN_WITH_TIMESTAMP(OpName, ...)              \
+  op.set_start_time_us(Env::Default()->NowMicros()); \
+  Status status = session->OpName(__VA_ARGS__);      \
+  op.set_end_time_us(Env::Default()->NowMicros());   \
+  if (!status.ok()) {                                \
+    Flush(op).IgnoreError();                         \
+    return status;                                   \
+  }
+
+// Records requests (and optionally responses) performed against a session.
+// The resulting replay log can be used with the `tf_replay` tool to replicate
+// the operations against a simulated environment, without requiring the
+// original code or cluster setup.
+//
+// Session logging by setting the TF_REPLAY_LOG_FILE environment variable.
+class SessionLogger {
+ public:
+  SessionLogger() {
+    std::string log_name = getenv("TF_REPLAY_LOG_FILE");
+    TF_CHECK_OK(
+        Env::Default()->RecursivelyCreateDir(string(io::Dirname(log_name))));
+    Env::Default()->DeleteFile(log_name).IgnoreError();
+    TF_CHECK_OK(Env::Default()->NewWritableFile(log_name, &log_file_));
+
+    log_writer_ = absl::make_unique<io::RecordWriter>(log_file_.get());
+  }
+
+  Status RecordCreateSession(Session* session) {
+    LOG(INFO) << "Capturing devices for session.";
+    ReplayOp op;
+    NewReplaySession* req = op.mutable_new_replay_session();
+
+    std::vector<DeviceAttributes> devices;
+    TF_CHECK_OK(session->ListDevices(&devices));
+    for (const DeviceAttributes& dev : devices) {
+      *req->mutable_devices()->add_local_device() = dev;
+    }
+
+    req->set_session_handle(SessionToHandle(session));
+    return Flush(op);
+  }
+
+  Status RecordRun(Session* session,
+                   const std::vector<std::pair<string, Tensor> >& inputs,
+                   const std::vector<string>& output_tensor_names,
+                   const std::vector<string>& target_node_names,
+                   std::vector<Tensor>* outputs) {
+    return RecordRun(session, *kEmptyRunOptions(), inputs, output_tensor_names,
+                     target_node_names, outputs, nullptr);
+  }
+
+  Status RecordRun(Session* session, const RunOptions& run_options,
+                   const std::vector<std::pair<string, Tensor> >& inputs,
+                   const std::vector<string>& output_tensor_names,
+                   const std::vector<string>& target_node_names,
+                   std::vector<Tensor>* outputs, RunMetadata* run_metadata) {
+    ReplayOp op;
+    RunStepRequest* req = op.mutable_run_step();
+    RunStepResponse* resp = op.mutable_run_step_response();
+
+    req->set_session_handle(SessionToHandle(session));
+    *req->mutable_options() = run_options;
+
+    for (const auto& it : inputs) {
+      NamedTensorProto* feed = req->add_feed();
+      feed->set_name(it.first);
+      it.second.AsProtoField(feed->mutable_tensor());
+    }
+
+    // Build an index from fetch tensor name to first index in
+    // output_tensor_names.
+    std::unordered_map<string, int> output_name_to_offset;
+    for (int i = 0; i < output_tensor_names.size(); ++i) {
+      const string& name = output_tensor_names[i];
+      if (output_name_to_offset.insert(std::make_pair(name, i)).second) {
+        req->add_fetch(name);
+      }
+    }
+    for (const string& target : target_node_names) {
+      req->add_target(target);
+    }
+
+    if (&run_options == kEmptyRunOptions()) {
+      RUN_WITH_TIMESTAMP(Run, inputs, output_tensor_names, target_node_names,
+                         outputs);
+    } else {
+      RUN_WITH_TIMESTAMP(Run, run_options, inputs, output_tensor_names,
+                         target_node_names, outputs, run_metadata);
+    }
+
+    for (size_t i = 0; i < outputs->size(); ++i) {
+      const Tensor& tensor = (*outputs)[i];
+      NamedTensorProto* tproto = resp->add_tensor();
+      tensor.AsProtoField(tproto->mutable_tensor());
+      tproto->set_name(output_tensor_names[i]);
+    }
+
+    if (run_metadata) {
+      *resp->mutable_metadata() = *run_metadata;
+    }
+
+    return Flush(op);
+  }
+
+  Status RecordCreate(Session* session, const GraphDef& graph) {
+    return RecordCreate(session, *kEmptyRunOptions(), graph);
+  }
+
+  // N.B. RunOptions is not stored (it has no entry in CreateRequest)
+  Status RecordCreate(Session* session, const RunOptions& run_options,
+                      const GraphDef& graph) {
+    ReplayOp op;
+    CreateSessionRequest* req = op.mutable_create_session();
+    *req->mutable_graph_def() = graph;
+
+    CreateSessionResponse* resp = op.mutable_create_session_response();
+    if (&run_options == kEmptyRunOptions()) {
+      RUN_WITH_TIMESTAMP(Create, graph);
+    } else {
+      RUN_WITH_TIMESTAMP(Create, run_options, graph);
+    }
+    resp->set_session_handle(SessionToHandle(session));
+    return Flush(op);
+  }
+
+  Status RecordExtend(Session* session, const GraphDef& graph) {
+    return RecordExtend(session, *kEmptyRunOptions(), graph);
+  }
+
+  // N.B. RunOptions is not stored (it has no entry in ExtendRequest)
+  Status RecordExtend(Session* session, const RunOptions& run_options,
+                      const GraphDef& graph) {
+    ReplayOp op;
+    ExtendSessionRequest* req = op.mutable_extend_session();
+    op.mutable_extend_session_response();
+    req->set_session_handle(SessionToHandle(session));
+    *req->mutable_graph_def() = graph;
+    if (&run_options == kEmptyRunOptions()) {
+      RUN_WITH_TIMESTAMP(Extend, graph);
+    } else {
+      RUN_WITH_TIMESTAMP(Extend, run_options, graph);
+    }
+
+    return Flush(op);
+  }
+
+  Status RecordClose(Session* session) {
+    return RecordClose(session, *kEmptyRunOptions());
+  }
+
+  // N.B. RunOptions is not stored (it has no entry in CloseRequest)
+  Status RecordClose(Session* session, const RunOptions& run_options) {
+    mutex_lock l(log_mutex_);
+    ReplayOp op;
+    CloseSessionRequest* req = op.mutable_close_session();
+    req->set_session_handle(SessionToHandle(session));
+    op.mutable_close_session_response();
+    if (&run_options == kEmptyRunOptions()) {
+      RUN_WITH_TIMESTAMP(Close);
+    } else {
+      RUN_WITH_TIMESTAMP(Close, run_options);
+    }
+    return Flush(op);
+  }
+
+  Status RecordListDevices(Session* session,
+                           std::vector<DeviceAttributes>* response) {
+    mutex_lock l(log_mutex_);
+    ReplayOp op;
+    ListDevicesRequest* req = op.mutable_list_devices();
+    ListDevicesResponse* resp = op.mutable_list_devices_response();
+    req->set_session_handle(SessionToHandle(session));
+    RUN_WITH_TIMESTAMP(ListDevices, response);
+
+    // TODO(power) -- local vs remote device distinction is lost here!
+    *resp->mutable_local_device() = {response->begin(), response->end()};
+    return Flush(op);
+  }
+
+  Status RecordPRunSetup(Session* session,
+                         const std::vector<string>& input_names,
+                         const std::vector<string>& output_names,
+                         const std::vector<string>& target_nodes,
+                         string* handle) {
+    mutex_lock l(log_mutex_);
+    ReplayOp op;
+    PartialRunSetupRequest* req = op.mutable_partial_run_setup();
+    req->set_session_handle(SessionToHandle(session));
+    for (auto& input : input_names) {
+      req->add_feed(input);
+    }
+    for (auto& output : output_names) {
+      req->add_fetch(output);
+    }
+    for (auto& target : target_nodes) {
+      req->add_target(target);
+    }
+    RUN_WITH_TIMESTAMP(PRunSetup, input_names, output_names, target_nodes,
+                       handle);
+    op.mutable_partial_run_setup_response()->set_partial_run_handle(*handle);
+    return Flush(op);
+  }
+
+  Status RecordPRun(Session* session, const string& handle,
+                    const std::vector<std::pair<string, Tensor> >& inputs,
+                    const std::vector<string>& output_names,
+                    std::vector<Tensor>* outputs) {
+    ReplayOp op;
+    RunStepRequest* req = op.mutable_run_step();
+    RunStepResponse* resp = op.mutable_run_step_response();
+    req->set_session_handle(SessionToHandle(session));
+
+    // Mark this step as a partial run for replay.
+    req->set_partial_run_handle(handle);
+    for (auto& input : inputs) {
+      auto* feed = req->add_feed();
+      feed->set_name(input.first);
+      input.second.AsProtoField(feed->mutable_tensor());
+    }
+
+    for (auto& output : output_names) {
+      req->add_fetch(output);
+    }
+
+    RUN_WITH_TIMESTAMP(PRun, handle, inputs, output_names, outputs);
+
+    for (size_t i = 0; i < outputs->size(); ++i) {
+      const Tensor& tensor = (*outputs)[i];
+      NamedTensorProto* tproto = resp->add_tensor();
+      tensor.AsProtoField(tproto->mutable_tensor());
+      tproto->set_name(output_names[i]);
+    }
+
+    return Flush(op);
+  }
+
+  Status RecordMakeCallable(Session* session,
+                            const CallableOptions& callable_options,
+                            Session::CallableHandle* handle) {
+    ReplayOp op;
+    MakeCallableRequest* req = op.mutable_make_callable();
+    req->set_session_handle(SessionToHandle(session));
+    *req->mutable_options() = callable_options;
+
+    RUN_WITH_TIMESTAMP(MakeCallable, callable_options, handle);
+
+    MakeCallableResponse* resp = op.mutable_make_callable_response();
+    resp->set_handle(*handle);
+
+    return Flush(op);
+  }
+
+  Status RecordRunCallable(Session* session, Session::CallableHandle handle,
+                           const std::vector<Tensor>& feed_tensors,
+                           std::vector<Tensor>* fetch_tensors,
+                           RunMetadata* run_metadata) {
+    ReplayOp op;
+    RunCallableRequest* req = op.mutable_run_callable();
+    req->set_session_handle(SessionToHandle(session));
+    req->set_handle(handle);
+    for (auto& tensor : feed_tensors) {
+      tensor.AsProtoField(req->add_feed());
+    }
+    RUN_WITH_TIMESTAMP(RunCallable, handle, feed_tensors, fetch_tensors,
+                       run_metadata);
+
+    RunCallableResponse* resp = op.mutable_run_callable_response();
+    if (run_metadata) {
+      *resp->mutable_metadata() = *run_metadata;
+    }
+    for (const Tensor& tensor : *fetch_tensors) {
+      tensor.AsProtoTensorContent(resp->add_fetch());
+    }
+    return Flush(op);
+  }
+
+  Status RecordReleaseCallable(Session* session,
+                               Session::CallableHandle handle) {
+    ReplayOp op;
+    ReleaseCallableRequest* req = op.mutable_release_callable();
+    req->set_session_handle(SessionToHandle(session));
+    req->set_handle(handle);
+    RUN_WITH_TIMESTAMP(ReleaseCallable, handle);
+    return Flush(op);
+  }
+
+ private:
+  Status Flush(const ReplayOp& op) {
+    string buf;
+    op.SerializeToString(&buf);
+    TF_RETURN_IF_ERROR(log_writer_->WriteRecord(buf));
+
+    // Flushing the RecordWriter _does not_ flush the underlying file.
+    TF_RETURN_IF_ERROR(log_writer_->Flush());
+    return log_file_->Flush();
+  }
+
+  mutex log_mutex_;
+  std::unique_ptr<io::RecordWriter> log_writer_;
+  std::unique_ptr<WritableFile> log_file_;
+};
+
+static SessionLogger* global_session_logger() {
+  static SessionLogger* logger = new SessionLogger();
+  return logger;
+}
+
+SessionRef::SessionRef(Session* session) : session_(session) {
+  if (getenv("TF_REPLAY_LOG_FILE") != nullptr) {
+    logger_ = global_session_logger();
+    logger_->RecordCreateSession(this->session_.get()).IgnoreError();
+  } else {
+    logger_ = nullptr;
+  }
+}
+
+SessionRef::~SessionRef() = default;
+
+Status SessionRef::CheckNotClosed() {
+  mutex_lock l(run_lock_);
+  if (session_ == nullptr) return errors::Cancelled("Session has been closed.");
+  return ::tensorflow::Status::OK();
+}
+
+// If logging is active, log the start and end time of the operation along with
+// the request and response.
+#define LOG_AND_RUN_OPERATION(OpName, ...)                          \
+  TF_RETURN_IF_ERROR(CheckNotClosed());                             \
+  RunCounter rc(session_, &run_count_, &run_lock_, &run_finished_); \
+  if (!logger_) {                                                   \
+    return rc.session->OpName(__VA_ARGS__);                         \
+  }                                                                 \
+  return logger_->Record##OpName(rc.session.get(), __VA_ARGS__);
+
+Status SessionRef::Run(const RunOptions& run_options,
+                       const std::vector<std::pair<string, Tensor> >& inputs,
+                       const std::vector<string>& output_tensor_names,
+                       const std::vector<string>& target_node_names,
+                       std::vector<Tensor>* outputs,
+                       RunMetadata* run_metadata) {
+  LOG_AND_RUN_OPERATION(Run, run_options, inputs, output_tensor_names,
+                        target_node_names, outputs, run_metadata);
+}
+
+Status SessionRef::Run(const std::vector<std::pair<string, Tensor> >& inputs,
+                       const std::vector<string>& output_tensor_names,
+                       const std::vector<string>& target_node_names,
+                       std::vector<Tensor>* outputs) {
+  LOG_AND_RUN_OPERATION(Run, inputs, output_tensor_names, target_node_names,
+                        outputs);
+}
+
+Status SessionRef::Create(const GraphDef& graph) {
+  LOG_AND_RUN_OPERATION(Create, graph);
+}
+
+Status SessionRef::Create(const RunOptions& run_options,
+                          const GraphDef& graph) {
+  LOG_AND_RUN_OPERATION(Create, run_options, graph);
+}
+
+Status SessionRef::Extend(const RunOptions& run_options,
+                          const GraphDef& graph) {
+  LOG_AND_RUN_OPERATION(Extend, run_options, graph);
+}
+
+Status SessionRef::Extend(const GraphDef& graph) {
+  LOG_AND_RUN_OPERATION(Extend, graph);
+}
+
+Status SessionRef::ListDevices(std::vector<DeviceAttributes>* response) {
+  LOG_AND_RUN_OPERATION(ListDevices, response);
+}
+
+Status SessionRef::PRunSetup(const std::vector<string>& input_names,
+                             const std::vector<string>& output_names,
+                             const std::vector<string>& target_nodes,
+                             string* handle) {
+  LOG_AND_RUN_OPERATION(PRunSetup, input_names, output_names, target_nodes,
+                        handle);
+}
+
+Status SessionRef::PRun(const string& handle,
+                        const std::vector<std::pair<string, Tensor> >& inputs,
+                        const std::vector<string>& output_names,
+                        std::vector<Tensor>* outputs) {
+  LOG_AND_RUN_OPERATION(PRun, handle, inputs, output_names, outputs);
+}
+
+Status SessionRef::MakeCallable(const CallableOptions& callable_options,
+                                CallableHandle* out_handle) {
+  LOG_AND_RUN_OPERATION(MakeCallable, callable_options, out_handle);
+}
+
+Status SessionRef::RunCallable(CallableHandle handle,
+                               const std::vector<Tensor>& feed_tensors,
+                               std::vector<Tensor>* fetch_tensors,
+                               RunMetadata* run_metadata) {
+  LOG_AND_RUN_OPERATION(RunCallable, handle, feed_tensors, fetch_tensors,
+                        run_metadata);
+}
+
+Status SessionRef::ReleaseCallable(CallableHandle handle) {
+  LOG_AND_RUN_OPERATION(ReleaseCallable, handle);
+}
+
+Status SessionRef::Close(const RunOptions& run_options) {
+  TF_RETURN_IF_ERROR(CheckNotClosed());
+  mutex_lock l(run_lock_);
+  Status status;
+  if (logger_) {
+    status = logger_->RecordClose(session_.get(), run_options);
+  } else {
+    status = session_->Close(run_options);
+  }
+  session_.reset();
+  while (run_count_ > 0) {
+    run_finished_.wait(l);
+  }
+  return status;
+}
+
+Status SessionRef::Close() {
+  TF_RETURN_IF_ERROR(CheckNotClosed());
+  mutex_lock l(run_lock_);
+  Status status;
+  if (logger_) {
+    status = logger_->RecordClose(session_.get());
+  } else {
+    status = session_->Close();
+  }
+  session_.reset();
+  while (run_count_ > 0) {
+    run_finished_.wait(l);
+  }
+  return status;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/session_ref.h b/tensorflow/python/client/session_ref.h
similarity index 91%
rename from tensorflow/core/common_runtime/session_ref.h
rename to tensorflow/python/client/session_ref.h
index 9459e7edbe..b0fb12b189 100644
--- a/tensorflow/core/common_runtime/session_ref.h
+++ b/tensorflow/python/client/session_ref.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SESSION_REF_H_
-#define TENSORFLOW_CORE_COMMON_RUNTIME_SESSION_REF_H_
+#ifndef TENSORFLOW_PYTHON_CLIENT_SESSION_REF_H_
+#define TENSORFLOW_PYTHON_CLIENT_SESSION_REF_H_
 
 #include <memory>
 
@@ -22,6 +22,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+class SessionLogger;
+
 // A `SessionRef` manages the lifetime of a wrapped `Session` pointer.
 //
 // SessionRef blocks the return of Close() until all pending operations have
@@ -29,8 +31,8 @@ namespace tensorflow {
 // subsequent operations on the SessionRef object will return errors::Cancelled.
 class SessionRef : public Session {
  public:
-  SessionRef(Session* session) : session_(session) {}
-  virtual ~SessionRef() {}
+  explicit SessionRef(Session* session);
+  ~SessionRef() override;
 
   Status Create(const GraphDef& graph) override;
   Status Extend(const GraphDef& graph) override;
@@ -78,9 +80,12 @@ class SessionRef : public Session {
   uint64 run_count_ GUARDED_BY(run_lock_) = {0};
   std::shared_ptr<Session> session_;
 
+  // Borrowed reference to global session logger.
+  SessionLogger* logger_;
+
   Status CheckNotClosed();
 };
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SESSION_REF_H_
+#endif  // TENSORFLOW_PYTHON_CLIENT_SESSION_REF_H_
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index bcd4af2912..dc0c10bab7 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/tf_status_helper.h"
-#include "tensorflow/core/common_runtime/session_ref.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
@@ -31,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/equal_graph_def.h"
+#include "tensorflow/python/client/session_ref.h"
 #include "tensorflow/python/lib/core/ndarray_tensor.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 #include "tensorflow/python/lib/core/safe_ptr.h"
-- 
GitLab


From 17dbe77f5ad47e8fd71924f12b3bc53c05afbacf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 13:56:49 -0700
Subject: [PATCH 0458/1357] Fix bug in Pow optimizer rule when broadcasting is
 involved. Minor cleanup by moving the helper function ShapesEqual to
 GraphProperties and adding unit tests for it.

PiperOrigin-RevId: 213876779
---
 tensorflow/core/grappler/optimizers/BUILD     | 35 +----------
 .../optimizers/arithmetic_optimizer.cc        | 61 ++++++++++---------
 .../optimizers/arithmetic_optimizer_test.cc   | 19 ++++--
 .../grappler/optimizers/constant_folding.cc   | 27 ++------
 .../grappler/optimizers/shape_optimizer.cc    |  3 +-
 tensorflow/core/grappler/utils/BUILD          | 29 +++++++++
 .../{optimizers => utils}/symbolic_shapes.cc  |  2 +-
 .../{optimizers => utils}/symbolic_shapes.h   |  6 +-
 .../symbolic_shapes_test.cc                   |  2 +-
 9 files changed, 89 insertions(+), 95 deletions(-)
 rename tensorflow/core/grappler/{optimizers => utils}/symbolic_shapes.cc (99%)
 rename tensorflow/core/grappler/{optimizers => utils}/symbolic_shapes.h (94%)
 rename tensorflow/core/grappler/{optimizers => utils}/symbolic_shapes_test.cc (98%)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 029205248b..261dee4382 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -7,10 +7,6 @@ load("//tensorflow:tensorflow.bzl", "tf_cuda_only_cc_test")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 
 # Platform specific build config
-load(
-    "//tensorflow/core:platform/default/build_config.bzl",
-    "tf_protos_grappler",
-)
 load(
     "//tensorflow/core:platform/default/build_config_root.bzl",
     "if_static",
@@ -97,7 +93,6 @@ cc_library(
     deps = [
         ":evaluation_utils",
         ":graph_optimizer",
-        ":symbolic_shapes",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -107,6 +102,7 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/utils:symbolic_shapes",
     ],
 )
 
@@ -261,7 +257,6 @@ cc_library(
         ":constant_folding",
         ":graph_optimizer",
         ":graph_optimizer_stage",
-        ":symbolic_shapes",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -270,6 +265,7 @@ cc_library(
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/utils:symbolic_shapes",
         "//tensorflow/core/grappler/utils:topological_sort",
     ],
 )
@@ -648,7 +644,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_optimizer",
-        ":symbolic_shapes",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -658,6 +653,7 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:frame",
+        "//tensorflow/core/grappler/utils:symbolic_shapes",
     ],
 )
 
@@ -714,31 +710,6 @@ tf_cuda_cc_test(
     ],
 )
 
-cc_library(
-    name = "symbolic_shapes",
-    srcs = ["symbolic_shapes.cc"],
-    hdrs = ["symbolic_shapes.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-    ] + tf_protos_grappler(),
-)
-
-tf_cc_test(
-    name = "symbolic_shapes_test",
-    srcs = ["symbolic_shapes_test.cc"],
-    deps = [
-        ":symbolic_shapes",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-    ],
-)
-
 cc_library(
     name = "debug_stripper",
     srcs = ["debug_stripper.cc"],
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 992e85d2c6..76a9dca73b 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -35,8 +35,8 @@ limitations under the License.
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer_stage.h"
-#include "tensorflow/core/grappler/optimizers/symbolic_shapes.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/symbolic_shapes.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
@@ -2367,26 +2367,24 @@ class ConvertPowStage : public ArithmeticOptimizerStage {
   }
 
   Status TrySimplify(NodeDef* node, string* simplified_node_name) override {
-    const auto& p = ctx().graph_properties->GetInputProperties(node->name())[1];
-    for (int i = 0; i < p.shape().dim_size(); ++i) {
-      if (p.shape().dim(i).size() < 0) {
+    const auto& pow_props =
+        ctx().graph_properties->GetInputProperties(node->name())[1];
+    for (int i = 0; i < pow_props.shape().dim_size(); ++i) {
+      if (pow_props.shape().dim(i).size() < 0) {
         // skip if p is is not fully defined.
         return Status::OK();
       }
     }
-    if (TensorShape::IsValid(p.shape()) && p.has_value()) {
-      Tensor pow(p.dtype(), p.shape());
-      if (!pow.FromProto(p.value())) {
+    if (TensorShape::IsValid(pow_props.shape()) && pow_props.has_value()) {
+      Tensor pow(pow_props.dtype(), pow_props.shape());
+      if (!pow.FromProto(pow_props.value())) {
         return errors::InvalidArgument("Cannot parse tensor from proto: ",
-                                       p.value().DebugString());
+                                       pow_props.value().DebugString());
       }
 
       complex128 prev, curr;
       for (int i = 0; i < pow.NumElements(); ++i) {
-        if (!GetElementUnexhaustive(pow, i,
-                                    {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE,
-                                     DT_COMPLEX64, DT_COMPLEX128},
-                                    &curr)) {
+        if (!GetElementUnexhaustive(pow, i, {pow_props.dtype()}, &curr)) {
           // input data type is not supported by Pow. Skip.
           return Status::OK();
         }
@@ -2399,12 +2397,19 @@ class ConvertPowStage : public ArithmeticOptimizerStage {
       NodeDef *x, *y;
       TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &x));
       TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &y));
+      const auto& value_props =
+          ctx().graph_properties->GetInputProperties(node->name())[0];
+      const TensorShapeProto& output_shape =
+          ctx().graph_properties->GetOutputProperties(node->name())[0].shape();
       if (curr == complex128(2, 0)) {
         node->set_op("Square");
         node->set_input(1, AsControlDependency(y->name()));
         AddToOptimizationQueue(node);
         AddToOptimizationQueue(y);
-      } else if (curr == complex128(1, 0)) {
+      } else if (curr == complex128(1, 0) &&
+                 ShapesSymbolicallyEqual(value_props.shape(), output_shape)) {
+        // Pow could be used to broadcast, so make sure the shapes of the two
+        // arguments are identical before replacing Pow with Identity.
         node->set_op("Identity");
         node->set_input(1, AsControlDependency(y->name()));
         AddToOptimizationQueue(node);
@@ -2414,20 +2419,20 @@ class ConvertPowStage : public ArithmeticOptimizerStage {
         node->set_input(1, AsControlDependency(y->name()));
         AddToOptimizationQueue(node);
         AddToOptimizationQueue(y);
-      } else if (curr == complex128(0, 0)) {
-        const auto& b =
-            ctx().graph_properties->GetInputProperties(node->name())[0];
-        for (int i = 0; i < b.shape().dim_size(); ++i) {
-          if (b.shape().dim(i).size() < 0) {
+      } else if (curr == complex128(0, 0) &&
+                 ShapesSymbolicallyEqual(value_props.shape(), output_shape)) {
+        for (int i = 0; i < value_props.shape().dim_size(); ++i) {
+          if (value_props.shape().dim(i).size() < 0) {
             // skip if b is is not fully defined.
             return Status::OK();
           }
         }
-        if (TensorShape::IsValid(b.shape()) && b.has_value()) {
-          Tensor base(b.dtype(), b.shape());
-          if (!base.FromProto(b.value())) {
+        if (TensorShape::IsValid(value_props.shape()) &&
+            value_props.has_value()) {
+          Tensor base(value_props.dtype(), value_props.shape());
+          if (!base.FromProto(value_props.value())) {
             return errors::InvalidArgument("Cannot parse tensor from proto: ",
-                                           b.value().DebugString());
+                                           value_props.value().DebugString());
           }
           node->set_op("Const");
           Tensor c(base.dtype(), base.shape());
@@ -2585,12 +2590,10 @@ class ConvertExpm1Stage : public ArithmeticOptimizerStage {
   ~ConvertExpm1Stage() override = default;
 
   bool IsSupported(const NodeDef* node) const override {
-    if (!IsSub(*node))
-      return false;
+    if (!IsSub(*node)) return false;
 
     NodeDef* input;
-    if (!GetInputNode(node->input(0), &input).ok())
-      return false;
+    if (!GetInputNode(node->input(0), &input).ok()) return false;
 
     return IsExp(*input);
   }
@@ -2610,10 +2613,8 @@ class ConvertExpm1Stage : public ArithmeticOptimizerStage {
       return Status::OK();
     }
 
-    const auto& t =
-        ctx().graph_properties->GetInputProperties(exp->name())[0];
-    const auto& c =
-        ctx().graph_properties->GetInputProperties(node->name())[1];
+    const auto& t = ctx().graph_properties->GetInputProperties(exp->name())[0];
+    const auto& c = ctx().graph_properties->GetInputProperties(node->name())[1];
     for (int k = 0; k < c.shape().dim_size(); ++k) {
       // Skip if c shape is not fully determined.
       if (c.shape().dim(k).size() < 0) {
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 88839d944c..77f3c64c65 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -2474,6 +2474,9 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) {
   auto y_Point5 = ops::Const(s.WithOpName("y_.5"), {-0.5f, -0.5f}, {1, 2});
   auto y_1 = ops::Const(s.WithOpName("y_1"), {-1.0f, -1.0f}, {1, 2});
   auto y = ops::Const(s.WithOpName("y"), {3.0f, 4.0f}, {1, 2});
+  auto z = ops::Const(s.WithOpName("z"), {42.0f}, {});
+  auto ones = ops::Const(s.WithOpName("ones"), {1.0f, 1.0f, 1.0f}, {1, 3});
+  auto zeros = ops::Const(s.WithOpName("zeros"), {0.0f, 0.0f, 0.0f}, {1, 3});
   Output out2 = ops::Pow(s.WithOpName("out2"), x, y2);
   Output out1 = ops::Pow(s.WithOpName("out1"), x, y1);
   Output outPoint5 = ops::Pow(s.WithOpName("out.5"), x, yPoint5);
@@ -2481,21 +2484,24 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) {
   Output out_Point5 = ops::Pow(s.WithOpName("out_.5"), x, y_Point5);
   Output out_1 = ops::Pow(s.WithOpName("out_1"), x, y_1);
   Output out = ops::Pow(s.WithOpName("out"), x, y);
+  Output out_bcast1 = ops::Pow(s.WithOpName("out_bcast1"), z, ones);
+  Output out_bcast2 = ops::Pow(s.WithOpName("out_bcast2"), z, zeros);
 
   GrapplerItem item;
-  item.fetch = {"out2", "out1", "out.5", "out0", "out_.5", "out_1", "out"};
+  item.fetch = {"out2",  "out1", "out.5",      "out0",      "out_.5",
+                "out_1", "out",  "out_bcast1", "out_bcast2"};
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
-  EXPECT_EQ(7, tensors_expected.size());
+  EXPECT_EQ(9, tensors_expected.size());
 
   GraphDef got;
   ArithmeticOptimizer optimizer;
   EnableOnlyConvertPow(&optimizer);
   OptimizeAndPrune(&optimizer, &item, &got);
   auto tensors = EvaluateNodes(got, item.fetch);
-  EXPECT_EQ(7, tensors.size());
+  EXPECT_EQ(9, tensors.size());
 
-  for (int i = 0; i < 7; ++i) {
+  for (int i = 0; i < tensors.size(); ++i) {
     EXPECT_EQ(tensors[i].NumElements(), tensors_expected[i].NumElements());
     test::ExpectTensorNear<float>(tensors[i], tensors_expected[i], 1e-6);
   }
@@ -2509,6 +2515,9 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) {
   AddNode("y_.5", "Const", {}, {}, &want);
   AddNode("y_1", "Const", {}, {}, &want);
   AddNode("y", "Const", {}, {}, &want);
+  AddNode("z", "Const", {}, {}, &want);
+  AddNode("ones", "Const", {}, {}, &want);
+  AddNode("zeros", "Const", {}, {}, &want);
   AddNode("out2", "Square", {"x", AsControlDependency("y2")}, {}, &want);
   AddNode("out1", "Identity", {"x", AsControlDependency("y1")}, {}, &want);
   AddNode("out.5", "Sqrt", {"x", AsControlDependency("y.5")}, {}, &want);
@@ -2517,6 +2526,8 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) {
   AddNode("out_.5", "Rsqrt", {"x", AsControlDependency("y_.5")}, {}, &want);
   AddNode("out_1", "Reciprocal", {"x", AsControlDependency("y_1")}, {}, &want);
   AddNode("out", "Pow", {"x", "y"}, {}, &want);
+  AddNode("out_bcast1", "Pow", {"z", "ones"}, {}, &want);
+  AddNode("out_bcast2", "Pow", {"z", "zeros"}, {}, &want);
 
   CompareGraphs(want, got);
 }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 99737a71eb..cfbd298f11 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -32,8 +32,8 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/evaluation_utils.h"
-#include "tensorflow/core/grappler/optimizers/symbolic_shapes.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/symbolic_shapes.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -437,25 +437,6 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
 }
 
 namespace {
-bool ShapesEqual(const TensorShapeProto& shape1,
-                 const TensorShapeProto& shape2) {
-  if (shape1.unknown_rank() || shape2.unknown_rank()) {
-    return false;
-  }
-  if (shape1.dim_size() != shape2.dim_size()) {
-    return false;
-  }
-  for (int i = 0; i < shape1.dim_size(); ++i) {
-    if (shape1.dim(i).size() != shape2.dim(i).size()) {
-      return false;
-    }
-    if (shape1.dim(i).size() == -1 || shape2.dim(i).size() == -1) {
-      return false;
-    }
-  }
-  return true;
-}
-
 bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties,
                   BCast::Vec* shape, int64* min_id) {
   if (shape_node.op() == "Shape") {
@@ -2348,7 +2329,8 @@ Status ConstantFolding::SimplifyArithmeticOperations(
         properties.GetInputProperties(node->name())[1].shape();
     const bool x_is_zero = IsZeros(*x);
     const bool x_is_one = x_is_zero ? false : IsOnes(*x);
-    const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
+    const bool y_matches_output_shape =
+        ShapesSymbolicallyEqual(output_shape, y_shape);
     if (y_matches_output_shape &&
         ((is_mul && x_is_one) || (is_add && x_is_zero))) {
       // 1 * y = y or 0 + y = y.
@@ -2378,7 +2360,8 @@ Status ConstantFolding::SimplifyArithmeticOperations(
         properties.GetInputProperties(node->name())[0].shape();
     const bool y_is_zero = IsZeros(*y);
     const bool y_is_one = y_is_zero ? false : IsOnes(*y);
-    const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
+    const bool x_matches_output_shape =
+        ShapesSymbolicallyEqual(output_shape, x_shape);
     if (x_matches_output_shape && (((is_mul || is_any_div) && y_is_one) ||
                                    ((is_add || is_sub) && y_is_zero))) {
       // x * 1 = x or x / 1 = x or x +/- 0 = x
diff --git a/tensorflow/core/grappler/optimizers/shape_optimizer.cc b/tensorflow/core/grappler/optimizers/shape_optimizer.cc
index caa0b7b0cb..4542d17ccc 100644
--- a/tensorflow/core/grappler/optimizers/shape_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/shape_optimizer.cc
@@ -20,10 +20,9 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/grappler/graph_view.h"
 #include "tensorflow/core/grappler/grappler_item.h"
-#include "tensorflow/core/grappler/optimizers/symbolic_shapes.h"
-
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/symbolic_shapes.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD
index e540cc0476..bdbb8836e1 100644
--- a/tensorflow/core/grappler/utils/BUILD
+++ b/tensorflow/core/grappler/utils/BUILD
@@ -1,6 +1,10 @@
 licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load(
+    "//tensorflow/core:platform/default/build_config.bzl",
+    "tf_protos_grappler",
+)
 
 cc_library(
     name = "scc",
@@ -210,3 +214,28 @@ tf_cc_test(
         "//tensorflow/core:testlib",
     ],
 )
+
+cc_library(
+    name = "symbolic_shapes",
+    srcs = ["symbolic_shapes.cc"],
+    hdrs = ["symbolic_shapes.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ] + tf_protos_grappler(),
+)
+
+tf_cc_test(
+    name = "symbolic_shapes_test",
+    srcs = ["symbolic_shapes_test.cc"],
+    deps = [
+        ":symbolic_shapes",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
diff --git a/tensorflow/core/grappler/optimizers/symbolic_shapes.cc b/tensorflow/core/grappler/utils/symbolic_shapes.cc
similarity index 99%
rename from tensorflow/core/grappler/optimizers/symbolic_shapes.cc
rename to tensorflow/core/grappler/utils/symbolic_shapes.cc
index 155843a744..1666de4b80 100644
--- a/tensorflow/core/grappler/optimizers/symbolic_shapes.cc
+++ b/tensorflow/core/grappler/utils/symbolic_shapes.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/grappler/optimizers/symbolic_shapes.h"
+#include "tensorflow/core/grappler/utils/symbolic_shapes.h"
 #include "tensorflow/core/util/bcast.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/grappler/optimizers/symbolic_shapes.h b/tensorflow/core/grappler/utils/symbolic_shapes.h
similarity index 94%
rename from tensorflow/core/grappler/optimizers/symbolic_shapes.h
rename to tensorflow/core/grappler/utils/symbolic_shapes.h
index ace7bd1fe7..0a7d8ac82b 100644
--- a/tensorflow/core/grappler/optimizers/symbolic_shapes.h
+++ b/tensorflow/core/grappler/utils/symbolic_shapes.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SYMBOLIC_SHAPES_H_
-#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SYMBOLIC_SHAPES_H_
+#ifndef TENSORFLOW_CORE_GRAPPLER_UTILS_SYMBOLIC_SHAPES_H_
+#define TENSORFLOW_CORE_GRAPPLER_UTILS_SYMBOLIC_SHAPES_H_
 
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
@@ -74,4 +74,4 @@ int64 ComputeSizeRatio(const TensorShapeProto& numerator,
 }  // namespace grappler
 }  // end namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SYMBOLIC_SHAPES_H_
+#endif  // TENSORFLOW_CORE_GRAPPLER_UTILS_SYMBOLIC_SHAPES_H_
diff --git a/tensorflow/core/grappler/optimizers/symbolic_shapes_test.cc b/tensorflow/core/grappler/utils/symbolic_shapes_test.cc
similarity index 98%
rename from tensorflow/core/grappler/optimizers/symbolic_shapes_test.cc
rename to tensorflow/core/grappler/utils/symbolic_shapes_test.cc
index 7ce995d1c5..6ac644cdb1 100644
--- a/tensorflow/core/grappler/optimizers/symbolic_shapes_test.cc
+++ b/tensorflow/core/grappler/utils/symbolic_shapes_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/grappler/optimizers/symbolic_shapes.h"
+#include "tensorflow/core/grappler/utils/symbolic_shapes.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/platform/test.h"
 
-- 
GitLab


From c07dc66e441c66a7cb1b136b4239e4dfdf84d221 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 20 Sep 2018 14:11:39 -0700
Subject: [PATCH 0459/1357] Include the print function in the list of special
 functions - its name is not found in the namespace in Python 3.

PiperOrigin-RevId: 213879813
---
 .../python/autograph/pyct/static_analysis/live_values.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index 48b442f3bd..3963772dad 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -29,10 +29,11 @@ from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import transformer
 from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 
+
 # TODO(aqj): Do we need this? Do other builtins fail in similar ways
 # See b/114389775 for a related bug in pyct
 # These symbols are legal in Python, but don't appear in the namespace.
-_special_symbols = {'range': range}
+_SPECIAL_SYMBOLS = {'range': range, 'print': print}
 
 
 class LiveValueResolver(transformer.Base):
@@ -71,8 +72,10 @@ class LiveValueResolver(transformer.Base):
             # If the symbol value is for example a primitive, then it will not
             # have a name.
             pass
-        elif node.id in _special_symbols:
-          anno.setanno(node, 'live_val', _special_symbols[node.id])
+        elif node.id in _SPECIAL_SYMBOLS:
+          # Note: if the user redefined any of these symbols, then they would
+          # be visible in the namespace and we would never reach this branch.
+          anno.setanno(node, 'live_val', _SPECIAL_SYMBOLS[node.id])
         else:
           pass
           # TODO(mdan): Should we raise an error here?
-- 
GitLab


From 61ba909b0a82fa3745964f0eb2a7949b2249982e Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 20 Sep 2018 14:25:34 -0700
Subject: [PATCH 0460/1357] [Java]: Release 1.11.0-rc1

PiperOrigin-RevId: 213882538
---
 tensorflow/java/maven/libtensorflow/pom.xml              | 2 +-
 tensorflow/java/maven/libtensorflow_jni/pom.xml          | 2 +-
 tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml      | 2 +-
 tensorflow/java/maven/pom.xml                            | 2 +-
 tensorflow/java/maven/proto/pom.xml                      | 2 +-
 tensorflow/java/maven/spark-tensorflow-connector/pom.xml | 2 +-
 tensorflow/java/maven/tensorflow-hadoop/pom.xml          | 2 +-
 tensorflow/java/maven/tensorflow/pom.xml                 | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml
index cf6a64daeb..6c82301eff 100644
--- a/tensorflow/java/maven/libtensorflow/pom.xml
+++ b/tensorflow/java/maven/libtensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc0</version>
+    <version>1.11.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml
index 978c3cbf6d..f7634795d6 100644
--- a/tensorflow/java/maven/libtensorflow_jni/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc0</version>
+    <version>1.11.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
index d1378b5d56..7fcc6ff8f9 100644
--- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc0</version>
+    <version>1.11.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni_gpu</artifactId>
diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml
index 1342b0e9bb..689902e9cd 100644
--- a/tensorflow/java/maven/pom.xml
+++ b/tensorflow/java/maven/pom.xml
@@ -6,7 +6,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.tensorflow</groupId>
   <artifactId>parentpom</artifactId>
-  <version>1.11.0-rc0</version>
+  <version>1.11.0-rc1</version>
   <packaging>pom</packaging>
 
   <url>https://www.tensorflow.org</url>
diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml
index 19ff65a095..ea1462a9ae 100644
--- a/tensorflow/java/maven/proto/pom.xml
+++ b/tensorflow/java/maven/proto/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc0</version>
+    <version>1.11.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>proto</artifactId>
diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
index ba7e9f4c69..ce1ebfa15b 100644
--- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
+++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
@@ -6,7 +6,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>spark-tensorflow-connector_2.11</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0-rc0</version>
+    <version>1.11.0-rc1</version>
     <name>spark-tensorflow-connector</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord connector for Apache Spark DataFrames</description>
diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
index f913faffa2..56346fd045 100644
--- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml
+++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
@@ -5,7 +5,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>tensorflow-hadoop</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0-rc0</version>
+    <version>1.11.0-rc1</version>
     <name>tensorflow-hadoop</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop</description>
diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml
index f6cb595885..93decea0a0 100644
--- a/tensorflow/java/maven/tensorflow/pom.xml
+++ b/tensorflow/java/maven/tensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc0</version>
+    <version>1.11.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>tensorflow</artifactId>
-- 
GitLab


From bf5324fd55a894ac00d10b7cfb2d26f3d9f7f5c9 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Thu, 20 Sep 2018 14:29:09 -0700
Subject: [PATCH 0461/1357] [XLA] Don't create mixed precision operations
 accidentally

The reshape we created change the element type unintentionally.

PiperOrigin-RevId: 213883142
---
 .../xla/service/algebraic_simplifier.cc         |  3 ++-
 .../xla/service/algebraic_simplifier_test.cc    | 17 +++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 4ef1dffa73..75dae7a714 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -754,11 +754,12 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   };
 
   auto reshape_if_necessary = [&](HloInstruction* hlo) {
+    hlo = as_type(hlo, dot->shape().element_type());
     if (!ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) {
       hlo = computation_->AddInstruction(
           HloInstruction::CreateReshape(dot->shape(), hlo));
     }
-    return as_type(hlo, dot->shape().element_type());
+    return hlo;
   };
 
   auto add_reduce_in_f32 = [&](HloInstruction* hlo, const int64 dim) {
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 3fc1ba2427..2047f894b4 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -3233,17 +3233,18 @@ INSTANTIATE_TEST_CASE_P(
 class DotStrengthReductionTest
     : public AlgebraicSimplifierTest,
       public ::testing::WithParamInterface<
-          ::testing::tuple<int, int, int, bool, bool>> {};
+          ::testing::tuple<int, int, int, bool, bool, PrimitiveType>> {};
 TEST_P(DotStrengthReductionTest, DotStrengthReduction) {
   int m, k, n;
   bool transpose_lhs, transpose_rhs;
-  std::tie(m, k, n, transpose_lhs, transpose_rhs) = GetParam();
+  PrimitiveType element_type;
+  std::tie(m, k, n, transpose_lhs, transpose_rhs, element_type) = GetParam();
 
-  Shape dot_shape = ShapeUtil::MakeShape(F32, {m, n});
-  Shape lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
-  Shape transposed_lhs_shape = ShapeUtil::MakeShape(F32, {k, m});
-  Shape rhs_shape = ShapeUtil::MakeShape(F32, {k, n});
-  Shape transposed_rhs_shape = ShapeUtil::MakeShape(F32, {n, k});
+  Shape dot_shape = ShapeUtil::MakeShape(element_type, {m, n});
+  Shape lhs_shape = ShapeUtil::MakeShape(element_type, {m, k});
+  Shape transposed_lhs_shape = ShapeUtil::MakeShape(element_type, {k, m});
+  Shape rhs_shape = ShapeUtil::MakeShape(element_type, {k, n});
+  Shape transposed_rhs_shape = ShapeUtil::MakeShape(element_type, {n, k});
   HloComputation::Builder builder(TestName());
 
   auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(
@@ -3285,7 +3286,7 @@ INSTANTIATE_TEST_CASE_P(
     DotStrengthReductionTestInstantiation, DotStrengthReductionTest,
     ::testing::Combine(::testing::Values(1, 2), ::testing::Values(1, 2),
                        ::testing::Values(1, 2), ::testing::Bool(),
-                       ::testing::Bool()));
+                       ::testing::Bool(), ::testing::Values(F32, BF16)));
 
 struct DotOfConcatTestSpec {
   int64 m;
-- 
GitLab


From f2d30a68169fc00ea444e5bffb2134f8fce92562 Mon Sep 17 00:00:00 2001
From: Raghuraman Krishnamoorthi <raghuramank@google.com>
Date: Thu, 20 Sep 2018 14:31:25 -0700
Subject: [PATCH 0462/1357]  Remove restriction on scope for bypass operators.
 Previously, the scope had to be of the form 'scope/<arbitrary_text>'. Relax
 restriction to handle empty scopes. Enable this change to work for both fused
 and unfused batch norm layers

PiperOrigin-RevId: 213883621
---
 tensorflow/contrib/quantize/BUILD             |   4 +
 tensorflow/contrib/quantize/python/common.py  |   4 +-
 .../contrib/quantize/python/common_test.py    |  59 +++-
 .../quantize/python/fold_batch_norms.py       |  94 +++---
 .../contrib/quantize/python/quantize.py       |  15 +-
 .../python/quantize_parameterized_test.py     | 282 +++++++++++-------
 6 files changed, 308 insertions(+), 150 deletions(-)

diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD
index c59f667f6a..23e3a25d71 100644
--- a/tensorflow/contrib/quantize/BUILD
+++ b/tensorflow/contrib/quantize/BUILD
@@ -20,9 +20,13 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":common",
+        "//tensorflow/contrib/layers:layers_py",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform_test",
         "//tensorflow/python:session",
         "//tensorflow/python:variable_scope",
diff --git a/tensorflow/contrib/quantize/python/common.py b/tensorflow/contrib/quantize/python/common.py
index b27117dd48..e6c04bcf55 100644
--- a/tensorflow/contrib/quantize/python/common.py
+++ b/tensorflow/contrib/quantize/python/common.py
@@ -34,10 +34,10 @@ SKIPPED_PREFIXES = (
     'ScalarSummary')
 
 # Valid activation ops for quantization end points.
-_ACTIVATION_OP_SUFFIXES = ['/Relu6', '/Relu', '/Identity']
+_ACTIVATION_OP_SUFFIXES = ['Relu6', 'Relu', 'Identity']
 
 # Regular expression for recognizing nodes that are part of batch norm group.
-_BATCHNORM_RE = re.compile(r'^(.*)/BatchNorm/batchnorm')
+_BATCHNORM_RE = re.compile(r'^(.*)BatchNorm/batchnorm')
 
 
 def BatchNormGroups(graph):
diff --git a/tensorflow/contrib/quantize/python/common_test.py b/tensorflow/contrib/quantize/python/common_test.py
index 2b26302f8a..a3ce041cea 100644
--- a/tensorflow/contrib/quantize/python/common_test.py
+++ b/tensorflow/contrib/quantize/python/common_test.py
@@ -13,21 +13,26 @@
 # limitations under the License.
 # ==============================================================================
 """Tests for common utilities in this package."""
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-
+from tensorflow.contrib.layers.python.layers import layers
 from tensorflow.contrib.quantize.python import common
 from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
+batch_norm = layers.batch_norm
+conv2d = layers.conv2d
+
 
 class CommonTest(test_util.TensorFlowTestCase):
 
@@ -87,6 +92,56 @@ class CommonTest(test_util.TensorFlowTestCase):
     for i in inputs:
       self.assertIn(i, op.inputs)
 
+  def testBatchNormScope(self):
+    batch_size, height, width, depth = 5, 128, 128, 3
+    g = ops.Graph()
+    with g.as_default():
+      inputs = array_ops.zeros((batch_size, height, width, depth))
+      stride = 1
+      out_depth = 32
+      scope = ''
+      node = conv2d(
+          inputs,
+          out_depth, [2, 2],
+          stride=stride,
+          padding='SAME',
+          weights_initializer=self._WeightInit(0.09),
+          activation_fn=None,
+          normalizer_fn=batch_norm,
+          normalizer_params=self._BatchNormParams(False),
+          scope=scope)
+
+      node = nn_ops.relu(node, name='Relu6')
+    bn_list = common.BatchNormGroups(g)
+    with open('/tmp/common_test.pbtxt', 'w') as f:
+      f.write(str(g.as_graph_def()))
+
+  # Exactly one batch norm layer with empty scope should be found
+    self.assertEqual(len(bn_list), 1)
+    self.assertEqual(bn_list[0], '')
+
+  def _BatchNormParams(self, fused=False, force_updates=False):
+    params = {
+        'center': True,
+        'scale': True,
+        'decay': 1.0 - 0.003,
+        'fused': fused
+    }
+    return params
+
+  def _WeightInit(self, stddev):
+    """Returns a truncated normal variable initializer.
+
+    Function is defined purely to shorten the name so that it stops wrapping.
+
+    Args:
+      stddev: Standard deviation of normal variable.
+
+    Returns:
+      An initializer that initializes with a truncated normal variable.
+    """
+    return init_ops.truncated_normal_initializer(stddev=stddev, seed=1234)
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index 2971b28f45..e5790a6e13 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -95,8 +95,7 @@ def _FoldFusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
               _ComputeBatchNormCorrections(
                   context='',
                   match=match,
-                  freeze_batch_norm_delay=freeze_batch_norm_delay,
-                  fused_batch_norm=True))
+                  freeze_batch_norm_delay=freeze_batch_norm_delay))
         # The shape of depthwise weights is different, so we need to reshape the
         # multiplier_tensor to ensure that the scaled_weight_tensor has the
         # expected shape.
@@ -296,8 +295,7 @@ def _FindFusedBatchNorms(graph):
         batch_to_space_op=batch_to_space_op)
 
 
-def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay,
-                                 fused_batch_norm):
+def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay):
   """Computes batch norm correction params.
 
      Before batch normalization is frozen:
@@ -327,14 +325,14 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay,
       computation.
     freeze_batch_norm_delay: Delay in steps at which computation switches
       from regular batch norm to frozen mean and variance.
-    fused_batch_norm: Bool, true if fused batch norm is used.
+
 
   Returns:
     A tuple of correction_scale, correction_recip, correction_offset
   """
 
   g = ops.get_default_graph()
-  prefix = '' if not context else context + '/'
+  prefix = '' if not context else context
   with g.name_scope(prefix + 'batch_norm_correction'):
     recip_sigma_mv = math_ops.rsqrt(
         match.moving_variance_tensor + match.batch_epsilon)
@@ -495,8 +493,23 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
 
     # Treat consumer ops in bypass modules differently since they have Add
     # operations instead of Relu* above.
-    add_bypass_ctx = re.search(r'^(.*)/([^/]+)', bn).group(1)
-    add_bypass = graph.get_operation_by_name(add_bypass_ctx + '/Add')
+    # Changes to make sure that the correct scope is selected for the bypass add
+    # The rule here is that if the scope is of the form: str1/str2 for the
+    # batch norm,
+    # the bypass add is at scope str1. If bn is of scope just str1, then the
+    # bypass add is at scope ''.
+    # If there is no batch norm, then there is no bypass add.
+    add_bypass_ctx = ''
+    if bn:
+      try:
+        add_bypass_ctx = re.search(r'^(.*)/([^/]+)', bn).group(1)
+      except AttributeError:
+        add_bypass_ctx = ''
+
+    if add_bypass_ctx:
+      add_bypass_ctx = add_bypass_ctx + '/'
+
+    add_bypass = graph.get_operation_by_name(add_bypass_ctx + 'Add')
     nodes_modified_count = common.RerouteTensor(
         folded_op.outputs[0], original_op.outputs[0], can_modify=[add_bypass])
     if nodes_modified_count != 1:
@@ -505,8 +518,8 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
 
 def _IsValidUnfusedBatchNorm(graph, context):
   """Checks that the output of the unfused batch norm has consumers."""
-  add_shift = graph.get_operation_by_name(
-      context + '/BatchNorm/batchnorm_1/add_1')
+  add_shift = graph.get_operation_by_name(context +
+                                          'BatchNorm/batchnorm_1/add_1')
   # Ensure that the output tensor of batch norm has consumers, otherwise this
   # is a dangling node and not a match.
   return bool(add_shift.outputs[0].consumers())
@@ -538,7 +551,8 @@ def _FindMatchingTensor(graph, match_pattern, scope):
     if op.name.endswith(match_pattern):
       split_name = op.name.split('/')
       num_matches = len(set(split_name) & split_context)
-      if num_matches > 0:
+
+      if num_matches > 0 or not scope:
         match_dict[op.name] = num_matches
   # match_dict contains matching op names from graph with values being
   # number of matches to scope. We pick the key with the most matches
@@ -597,21 +611,21 @@ def _GetBatchNormParams(graph, context, has_scaling):
   # op.name =  MobilenetV2/expanded_conv_3/depthwise/BatchNorm/moving_mean/read
   # will have 2 matches,scope with a different conv layer will have one match.
 
-  op_suffix_mean = '/BatchNorm/moments/Squeeze'
-  op_suffix_variance = '/BatchNorm/moments/Squeeze_1'
-  op_suffix_epsilon = '/BatchNorm/batchnorm_1/add/y'
-  op_suffix_bn_decay_mean = '/BatchNorm/AssignMovingAvg/decay'
-  op_suffix_bn_decay_var = '/BatchNorm/AssignMovingAvg_1/decay'
+  op_suffix_mean = 'BatchNorm/moments/Squeeze'
+  op_suffix_variance = 'BatchNorm/moments/Squeeze_1'
+  op_suffix_epsilon = 'BatchNorm/batchnorm_1/add/y'
+  op_suffix_bn_decay_mean = 'BatchNorm/AssignMovingAvg/decay'
+  op_suffix_bn_decay_var = 'BatchNorm/AssignMovingAvg_1/decay'
 
   if variable_scope.get_variable_scope().use_resource:
-    op_suffix_gamma = '/BatchNorm/gamma/Read/ReadVariableOp'
+    op_suffix_gamma = 'BatchNorm/gamma/Read/ReadVariableOp'
     op_suffix_moving_variance = (
-        '/BatchNorm/moving_variance/Read/ReadVariableOp')
-    op_suffix_moving_mean = ('/BatchNorm/moving_mean/Read/ReadVariableOp')
+        'BatchNorm/moving_variance/Read/ReadVariableOp')
+    op_suffix_moving_mean = ('BatchNorm/moving_mean/Read/ReadVariableOp')
   else:
-    op_suffix_gamma = '/BatchNorm/gamma'
-    op_suffix_moving_variance = '/BatchNorm/moving_variance/read'
-    op_suffix_moving_mean = '/BatchNorm/moving_mean/read'
+    op_suffix_gamma = 'BatchNorm/gamma'
+    op_suffix_moving_variance = 'BatchNorm/moving_variance/read'
+    op_suffix_moving_mean = 'BatchNorm/moving_mean/read'
   # Parse through list of ops to find relevant ops
 
   batch_mean_tensor = _FindMatchingTensor(graph, op_suffix_mean, context)
@@ -679,8 +693,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
       the folded graph (add_fold).
   """
   mul_scale_name = 'mul_1' if has_scaling else 'mul'
-  mul_scale = graph.get_operation_by_name(context +
-                                          '/BatchNorm/batchnorm_1/' +
+  mul_scale = graph.get_operation_by_name(context + 'BatchNorm/batchnorm_1/' +
                                           mul_scale_name)
   op_below = mul_scale.inputs[0].op
   # Skip over the BatchToSpace operation in the case of atrous convolutions.
@@ -697,8 +710,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
         _ComputeBatchNormCorrections(
             context=context,
             match=match,
-            freeze_batch_norm_delay=freeze_batch_norm_delay,
-            fused_batch_norm=False))
+            freeze_batch_norm_delay=freeze_batch_norm_delay))
   # Special handling for weights of depthwise convolution.
   if op_below.type == 'DepthwiseConv2dNative':
     new_shape = [
@@ -706,27 +718,27 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
         weights.get_shape().as_list()[3]
     ]
     scale_name = 'mul' if has_scaling else 'Rsqrt'
-    scale = graph.get_operation_by_name(
-        context + '/BatchNorm/batchnorm_1/' + scale_name)
+    scale = graph.get_operation_by_name(context + 'BatchNorm/batchnorm_1/' +
+                                        scale_name)
     scale = array_ops.reshape(scale.outputs[0], new_shape,
-                              context + '/scale_reshape')
+                              context + 'scale_reshape')
 
     if correction_scale is not None:
       correction_scale = array_ops.reshape(correction_scale, new_shape,
-                                           context + '/correction_reshape')
+                                           context + 'correction_reshape')
       with ops.device(mul_scale.device):
         weights = math_ops.multiply(correction_scale, weights,
-                                    context + '/correction_mult')
+                                    context + 'correction_mult')
 
-    mul_fold = _CloneOp(mul_scale, context + '/mul_fold', [(0, weights),
-                                                           (1, scale)])
+    mul_fold = _CloneOp(mul_scale, context + 'mul_fold', [(0, weights),
+                                                          (1, scale)])
   elif op_below.type in ['Conv2D', 'MatMul']:
 
     if correction_scale is not None:
       with ops.device(mul_scale.device):
         weights = math_ops.multiply(correction_scale, weights,
-                                    context + '/correction_mult')
-    mul_fold = _CloneOp(mul_scale, context + '/mul_fold', [(0, weights)])
+                                    context + 'correction_mult')
+    mul_fold = _CloneOp(mul_scale, context + 'mul_fold', [(0, weights)])
   else:
     raise ValueError('Cannot handle operation of type: %s' % op_below.type)
   _AssertShapesMatch('mul_fold', mul_fold.inputs[0], mul_fold.outputs[0])
@@ -734,8 +746,8 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
   conv_or_fc_folded = _CloneOp(op_below, op_below.name + '_Fold',
                                [(1, mul_fold.outputs[0])])
 
-  add_shift = graph.get_operation_by_name(
-      context + '/BatchNorm/batchnorm_1/add_1')
+  add_shift = graph.get_operation_by_name(context +
+                                          'BatchNorm/batchnorm_1/add_1')
 
   corrected_output = conv_or_fc_folded.outputs[0]
   # Copy the batch to space operation if we have a atrous convolution.
@@ -748,10 +760,10 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
   if correction_offset is not None:
     with ops.device(conv_or_fc_folded.device):
       corrected_output = math_ops.multiply(correction_recip, corrected_output,
-                                           context + '/post_conv_mul')
+                                           context + 'post_conv_mul')
       corrected_output = math_ops.add(corrected_output, (correction_offset),
-                                      context + '/correction_add')
-  add_fold = _CloneOp(add_shift, context + '/add_fold', [(0, corrected_output)])
+                                      context + 'correction_add')
+  add_fold = _CloneOp(add_shift, context + 'add_fold', [(0, corrected_output)])
   _AssertShapesMatch('add_fold', add_fold.inputs[0], add_fold.outputs[0])
   return add_shift, add_fold
 
@@ -930,7 +942,7 @@ def _HasScaling(graph, input_to_ops_map, bn):
   Returns:
     A boolean indicating whether this batch norm layer has scaling enabled.
   """
-  rsqrt_op = graph.get_operation_by_name(bn + '/BatchNorm/batchnorm_1/Rsqrt')
+  rsqrt_op = graph.get_operation_by_name(bn + 'BatchNorm/batchnorm_1/Rsqrt')
   rsqrt_consumers = input_to_ops_map.ConsumerOperations(rsqrt_op)
 
   return sum(1 for op in rsqrt_consumers if op.type == 'Mul') == 1
diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index e88db0acd5..5e63d33db8 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -97,8 +97,11 @@ def Quantize(graph,
         layer_match.activation_op)
     add_context = context
     if layer_match.bypass_op:
-      add_context = re.search(r'^(.*)/([^/]+)', context).group(1)
-
+      pattern_match_result = re.search(r'^(.*)/([^/]+)', context)
+      if pattern_match_result is not None:
+        add_context = pattern_match_result.group(1)
+      else:
+        add_context = ''
     # If `scope` is given, only quantize it if the producer of weights
     # (usually it's the layer op) is in the right scope.
     _InsertQuantOp(
@@ -156,8 +159,12 @@ def Quantize(graph,
 
     # Quantize bypass ops that occur after the activation.
     if layer_match.post_activation_bypass_op is not None:
-      post_activation_bypass_context = re.search(
-          r'^(.*)/([^/]+)', layer_match.post_activation_bypass_op.name).group(1)
+      pattern_match_result = re.search(
+          r'^(.*)/([^/]+)', layer_match.post_activation_bypass_op.name)
+      if pattern_match_result is not None:
+        post_activation_bypass_context = pattern_match_result.group(1)
+      else:
+        post_activation_bypass_context = ''
       # If `scope` is given, only quantize it if the producer is in the right
       # scope.
       # Make sure the op following this isn't an activation. In which case, we
diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py
index 31a2955ddb..f6bf57a789 100644
--- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py
@@ -58,85 +58,102 @@ class QuantizeTest(test_util.TensorFlowTestCase):
     ]
     for params in parameters_list:
       # Test everything with resource variables and normal variables.
-      test_fn(params[0], params[1], params[2], params[3], False)
-      test_fn(params[0], params[1], params[2], params[3], True)
+      test_fn(params[0], params[1], params[2], params[3], False, None)
+      test_fn(params[0], params[1], params[2], params[3], True, None)
+      # Test with both empty scope and an example scope
+      test_fn(params[0], params[1], params[2], params[3], False, 'test')
+      test_fn(params[0], params[1], params[2], params[3], True, 'test')
 
   def _AssertCorrectQuantizedGraphWithoutBatchNorm(
       self, graph, scope, layer, activation_op_name, with_bypass, delay,
       use_resource):
     quantization_node_name = 'FakeQuantWithMinMaxVars'
-    weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' +
-                                                quantization_node_name)
+    conv_scope = self._GetConvScope(scope, with_bypass)
+    delim = '/' if conv_scope else ''
+
+    if scope:
+      scope = scope + '/'
+    weights_quant = graph.get_operation_by_name(
+        conv_scope + delim + 'weights_quant/' + quantization_node_name)
     self.assertEqual(weights_quant.type, quantization_node_name)
 
     # Assemble the expected inputs.
     if use_resource:
       expected_inputs = [
-          scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
-          scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
+          conv_scope + delim +
+          'weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
+          conv_scope + delim +
+          'weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
       ]
       if layer == 'DepthwiseConv2dNative':
-        expected_inputs.append(scope + '/depthwise/ReadVariableOp')
+        expected_inputs.append(conv_scope + delim + 'depthwise/ReadVariableOp')
       else:
-        expected_inputs.append(scope + '/' + layer + '/ReadVariableOp')
+        expected_inputs.append(conv_scope + delim + layer + '/ReadVariableOp')
     else:
       expected_inputs = [
-          scope + '/weights_quant/AssignMinLast',
-          scope + '/weights_quant/AssignMaxLast',
+          conv_scope + delim + 'weights_quant/AssignMinLast',
+          conv_scope + delim + 'weights_quant/AssignMaxLast',
       ]
       if layer == 'DepthwiseConv2dNative':
-        expected_inputs.append(scope + '/depthwise_weights/read')
+        expected_inputs.append(conv_scope + delim + 'depthwise_weights/read')
       else:
-        expected_inputs.append(scope + '/weights/read')
+        expected_inputs.append(conv_scope + delim + 'weights/read')
 
     self._AssertInputOpsAre(weights_quant, expected_inputs)
     if delay and delay > 0:
-      output_op_name = scope + '/weights_quant/delayed_quant/Switch_1'
+      output_op_name = (
+          conv_scope + delim + 'weights_quant/delayed_quant/Switch_1')
     else:
       if layer == 'DepthwiseConv2dNative':
-        output_op_name = scope + '/depthwise'
+        output_op_name = conv_scope + delim + 'depthwise'
       else:
-        output_op_name = scope + '/' + layer
+        output_op_name = conv_scope + delim + layer
 
     self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name])
 
     if with_bypass:
-      conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' +
-                                               quantization_node_name)
+      conv_quant = graph.get_operation_by_name(
+          conv_scope + delim + 'conv_quant/' + quantization_node_name)
       self.assertEqual(conv_quant.type, quantization_node_name)
       if use_resource:
         expected_inputs = [
-            scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
-            scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
-            scope + '/BiasAdd',
+            conv_scope + delim +
+            'conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
+            conv_scope + delim +
+            'conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
+            conv_scope + delim + 'BiasAdd',
         ]
       else:
         expected_inputs = [
-            scope + '/conv_quant/AssignMinEma',
-            scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd'
+            conv_scope + delim + 'conv_quant/AssignMinEma',
+            conv_scope + delim + 'conv_quant/AssignMaxEma',
+            conv_scope + delim + 'BiasAdd'
         ]
       self._AssertInputOpsAre(conv_quant, expected_inputs)
-      output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1'
-                        if delay else 'test/Add')
+
+      output_op_name = (
+          conv_scope + delim + 'conv_quant/delayed_quant/Switch_1'
+          if delay else scope + 'Add')
       self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name])
 
-    act_quant = graph.get_operation_by_name('test/act_quant/' +
+    act_quant = graph.get_operation_by_name(scope + 'act_quant/' +
                                             quantization_node_name)
     self.assertEqual(act_quant.type, quantization_node_name)
     if use_resource:
       expected_inputs = [
-          'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
-          'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
-          'test/' + activation_op_name,
+          scope + 'act_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
+          scope + 'act_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
+          scope + activation_op_name,
       ]
     else:
       expected_inputs = [
-          'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma',
-          'test/' + activation_op_name
+          scope + 'act_quant/AssignMinEma', scope + 'act_quant/AssignMaxEma',
+          scope + activation_op_name
       ]
     self._AssertInputOpsAre(act_quant, expected_inputs)
-    output_op_name = ('test/act_quant/delayed_quant/Switch_1'
-                      if delay else 'control_dependency')
+    output_op_name = (
+        scope + 'act_quant/delayed_quant/Switch_1'
+        if delay else 'control_dependency')
     self._AssertOutputGoesToOps(act_quant, graph, [output_op_name])
     self._AssertIdempotent(graph)
 
@@ -145,7 +162,8 @@ class QuantizeTest(test_util.TensorFlowTestCase):
         self._TestQuantize_Conv2dWithoutBatchNorm)
 
   def _TestQuantize_Conv2dWithoutBatchNorm(self, activation, activation_op_name,
-                                           with_bypass, delay, use_resource):
+                                           with_bypass, delay, use_resource,
+                                           scope):
     """Tests quantization: inputs -> Conv2d no batch norm -> Activation.
 
     Args:
@@ -156,6 +174,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
         inputs to just before Activation.
       delay: Int (optional), delay in number of steps until quantization starts.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -165,7 +184,9 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       stride = 1 if with_bypass else 2
       out_depth = 3 if with_bypass else 32
       activation_fn = None if with_bypass else activation
-      scope = 'test/test2' if with_bypass else 'test'
+      conv_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
       node = conv2d(
           inputs,
           out_depth, [5, 5],
@@ -173,16 +194,19 @@ class QuantizeTest(test_util.TensorFlowTestCase):
           padding='SAME',
           weights_initializer=self._WeightInit(0.09),
           activation_fn=activation_fn,
-          scope=scope)
+          scope=conv_scope)
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
-        node = activation(node, name='test/' + activation_op_name)
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
+        node = activation(node, name=scope + delim + activation_op_name)
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
         array_ops.identity(node, name='control_dependency')
 
       quantize.Quantize(graph, True, quant_delay=delay)
 
+    if conv_scope is None:
+      conv_scope = ''
+
     self._AssertCorrectQuantizedGraphWithoutBatchNorm(
         graph, scope, 'Conv2D', activation_op_name, with_bypass, delay,
         use_resource)
@@ -192,7 +216,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
         self._TestQuantize_FCWithoutBatchNorm)
 
   def _TestQuantize_FCWithoutBatchNorm(self, activation, activation_op_name,
-                                       with_bypass, delay, use_resource):
+                                       with_bypass, delay, use_resource, scope):
     """Tests quantization: inputs -> FC no batch norm -> Activation.
 
     Args:
@@ -203,6 +227,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
         inputs to just before Activation.
       delay: Int (optional), delay in number of steps until quantization starts.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -211,16 +236,18 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       inputs = array_ops.zeros((batch_size, depth))
       out_depth = 256 if with_bypass else 128
       activation_fn = None if with_bypass else activation
-      scope = 'test/test2' if with_bypass else 'test'
+      fc_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
       node = fully_connected(
           inputs,
           out_depth,
           weights_initializer=self._WeightInit(0.03),
           activation_fn=activation_fn,
-          scope=scope)
+          scope=fc_scope)
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
-        node = activation(node, name='test/' + activation_op_name)
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
+        node = activation(node, name=scope + delim + activation_op_name)
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
         array_ops.identity(node, name='control_dependency')
@@ -235,7 +262,8 @@ class QuantizeTest(test_util.TensorFlowTestCase):
         self._TestQuantize_DepthwiseConv2dWithoutBatchNorm)
 
   def _TestQuantize_DepthwiseConv2dWithoutBatchNorm(
-      self, activation, activation_op_name, with_bypass, delay, use_resource):
+      self, activation, activation_op_name, with_bypass, delay, use_resource,
+      scope):
     """Tests quantization: inputs -> DWConv2d no batch norm -> Activation.
 
     Args:
@@ -246,6 +274,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
         inputs to just before Activation.
       delay: Int (optional), delay in number of steps until quantization starts.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -254,7 +283,10 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       inputs = array_ops.zeros((batch_size, height, width, depth))
       stride = 1 if with_bypass else 2
       activation_fn = None if with_bypass else activation
-      scope = 'test/test2' if with_bypass else 'test'
+      conv_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
+
       node = separable_conv2d(
           inputs,
           None, [5, 5],
@@ -263,10 +295,10 @@ class QuantizeTest(test_util.TensorFlowTestCase):
           padding='SAME',
           weights_initializer=self._WeightInit(0.09),
           activation_fn=activation_fn,
-          scope=scope)
+          scope=conv_scope)
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
-        node = activation(node, name='test/' + activation_op_name)
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
+        node = activation(node, name=scope + delim + activation_op_name)
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
         array_ops.identity(node, name='control_dependency')
@@ -280,8 +312,9 @@ class QuantizeTest(test_util.TensorFlowTestCase):
     self._RunWithoutBatchNormTestOverParameters(
         self._TestQuantize_AtrousConvWithoutBatchNorm)
 
-  def _TestQuantize_AtrousConvWithoutBatchNorm(
-      self, activation, activation_op_name, with_bypass, delay, use_resource):
+  def _TestQuantize_AtrousConvWithoutBatchNorm(self, activation,
+                                               activation_op_name, with_bypass,
+                                               delay, use_resource, scope):
     """Tests quantization: inputs -> atrous conv no batch norm -> Activation.
 
     Args:
@@ -292,6 +325,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
         inputs to just before Activation.
       delay: Int (optional), delay in number of steps until quantization starts.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -300,7 +334,10 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       inputs = array_ops.zeros((batch_size, height, width, depth))
       dilation_rate = 2
       activation_fn = None if with_bypass else activation
-      scope = 'test/test2' if with_bypass else 'test'
+      conv_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
+
       node = separable_conv2d(
           inputs,
           None, [3, 3],
@@ -309,10 +346,10 @@ class QuantizeTest(test_util.TensorFlowTestCase):
           padding='SAME',
           weights_initializer=self._WeightInit(0.09),
           activation_fn=activation_fn,
-          scope=scope)
+          scope=conv_scope)
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
-        node = activation(node, name='test/' + activation_op_name)
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
+        node = activation(node, name=scope + delim + activation_op_name)
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
         array_ops.identity(node, name='control_dependency')
@@ -353,78 +390,96 @@ class QuantizeTest(test_util.TensorFlowTestCase):
     ]
     for params in parameters_list:
       # Test everything with resource variables and normal variables.
-      test_fn(params[0], params[1], params[2], params[3], params[4], False)
-      test_fn(params[0], params[1], params[2], params[3], params[4], True)
+      test_fn(params[0], params[1], params[2], params[3], params[4], False,
+              None)
+      test_fn(params[0], params[1], params[2], params[3], params[4], True, None)
+      test_fn(params[0], params[1], params[2], params[3], params[4], False,
+              'test')
+      test_fn(params[0], params[1], params[2], params[3], params[4], True,
+              'test')
 
   def _AssertCorrectQuantizedGraphWithBatchNorm(self, graph, scope, layer,
                                                 activation_op_name, with_bypass,
                                                 delay, use_resource):
     quantization_node_name = 'FakeQuantWithMinMaxVars'
+    conv_scope = self._GetConvScope(scope, with_bypass)
+    delim = '/' if conv_scope else ''
+
+    if scope:
+      scope = scope + '/'
+
     weights_quant = graph.get_operation_by_name(
-        scope + '/weights_quant/' + quantization_node_name)
+        conv_scope + delim + 'weights_quant/' + quantization_node_name)
+
     self.assertEqual(weights_quant.type, quantization_node_name)
     if use_resource:
       expected_inputs = [
-          scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
-          scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
+          conv_scope + delim +
+          'weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
+          conv_scope + delim +
+          'weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
       ]
     else:
       expected_inputs = [
-          scope + '/weights_quant/' + 'AssignMinLast',
-          scope + '/weights_quant/' + 'AssignMaxLast'
+          conv_scope + delim + 'weights_quant/' + 'AssignMinLast',
+          conv_scope + delim + 'weights_quant/' + 'AssignMaxLast'
       ]
-    expected_inputs.append(scope + '/mul_fold')
+    expected_inputs.append(conv_scope + delim + 'mul_fold')
 
     self._AssertInputOpsAre(weights_quant, expected_inputs)
     if layer == 'DepthwiseConv2dNative':
-      output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1'
-                                if delay else '/depthwise_Fold')
+      output_op_name = conv_scope + delim + (
+          'weights_quant/delayed_quant/Switch_1' if delay else 'depthwise_Fold')
     else:
-      output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1'
-                                if delay else '/' + layer + '_Fold')
+      output_op_name = conv_scope + delim + (
+          'weights_quant/delayed_quant/Switch_1' if delay else layer + '_Fold')
     self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name])
 
     if with_bypass:
       conv_quant = graph.get_operation_by_name(
-          scope + '/conv_quant/' + quantization_node_name)
+          conv_scope + delim + 'conv_quant/' + quantization_node_name)
       self.assertEqual(conv_quant.type, quantization_node_name)
 
       if use_resource:
         expected_inputs = [
-            scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
-            scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
+            conv_scope + delim +
+            'conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
+            conv_scope + delim +
+            'conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
         ]
       else:
         expected_inputs = [
-            scope + '/conv_quant/AssignMinEma',
-            scope + '/conv_quant/AssignMaxEma',
+            conv_scope + delim + 'conv_quant/AssignMinEma',
+            conv_scope + delim + 'conv_quant/AssignMaxEma',
         ]
-      expected_inputs.append(scope + '/add_fold')
+      expected_inputs.append(conv_scope + delim + 'add_fold')
 
       self._AssertInputOpsAre(conv_quant, expected_inputs)
       output_op_name = (
-          scope + '/conv_quant/delayed_quant/Switch_1' if delay else 'test/Add')
+          conv_scope + delim + 'conv_quant/delayed_quant/Switch_1'
+          if delay else scope + 'Add')
       self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name])
 
-    act_quant = graph.get_operation_by_name(
-        'test/act_quant/' + quantization_node_name)
+    act_quant = graph.get_operation_by_name(scope + 'act_quant/' +
+                                            quantization_node_name)
     self.assertEqual(act_quant.type, quantization_node_name)
 
     if use_resource:
       expected_inputs = [
-          'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
-          'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
+          scope + 'act_quant/FakeQuantWithMinMaxVars/ReadVariableOp',
+          scope + 'act_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1',
       ]
     else:
       expected_inputs = [
-          'test/act_quant/AssignMinEma',
-          'test/act_quant/AssignMaxEma',
+          scope + 'act_quant/AssignMinEma',
+          scope + 'act_quant/AssignMaxEma',
       ]
-    expected_inputs.append('test/' + activation_op_name)
+    expected_inputs.append(scope + activation_op_name)
 
     self._AssertInputOpsAre(act_quant, expected_inputs)
-    output_op_name = ('test/act_quant/delayed_quant/Switch_1'
-                      if delay else 'control_dependency')
+    output_op_name = (
+        scope + 'act_quant/delayed_quant/Switch_1'
+        if delay else 'control_dependency')
     self._AssertOutputGoesToOps(act_quant, graph, [output_op_name])
     self._AssertIdempotent(graph)
 
@@ -433,7 +488,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
 
   def _TestQuantize_Conv2dWithBatchNorm(self, activation, activation_op_name,
                                         with_bypass, delay, fused_batch_norm,
-                                        use_resource):
+                                        use_resource, scope):
     """Tests quantization: inputs -> Conv2d with batch norm -> Activation.
 
     Args:
@@ -445,6 +500,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       delay: Int (optional), delay in number of steps until quantization starts.
       fused_batch_norm: Bool, when true use FusedBatchNorm.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -453,7 +509,9 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       inputs = array_ops.zeros((batch_size, height, width, depth))
       stride = 1 if with_bypass else 2
       out_depth = 3 if with_bypass else 32
-      scope = 'test/test2' if with_bypass else 'test'
+      conv_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
       node = conv2d(
           inputs,
           out_depth, [5, 5],
@@ -463,13 +521,13 @@ class QuantizeTest(test_util.TensorFlowTestCase):
           activation_fn=None,
           normalizer_fn=batch_norm,
           normalizer_params=self._BatchNormParams(fused_batch_norm),
-          scope=scope)
+          scope=conv_scope)
 
       # Manually add a bypass (optional) and an activation.
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
 
-      node = activation(node, name='test/' + activation_op_name)
+      node = activation(node, name=scope + delim + activation_op_name)
 
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
@@ -487,7 +545,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
 
   def _TestQuantize_FCWithBatchNorm(self, activation, activation_op_name,
                                     with_bypass, delay, fused_batch_norm,
-                                    use_resource):
+                                    use_resource, scope):
     """Tests quantization: inputs -> FC with batch norm -> Activation.
 
     Args:
@@ -499,6 +557,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       delay: Int (optional), delay in number of steps until quantization starts.
       fused_batch_norm: Bool, when true use FusedBatchNorm.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -506,7 +565,9 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       batch_size, depth = 5, 256
       inputs = array_ops.zeros((batch_size, depth))
       out_depth = 256 if with_bypass else 128
-      scope = 'test/test2' if with_bypass else 'test'
+      conv_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
       node = fully_connected(
           inputs,
           out_depth,
@@ -514,13 +575,13 @@ class QuantizeTest(test_util.TensorFlowTestCase):
           activation_fn=None,
           normalizer_fn=batch_norm,
           normalizer_params=self._BatchNormParams(fused_batch_norm),
-          scope=scope)
+          scope=conv_scope)
 
       # Manually add a bypass (optional) and an activation.
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
 
-      node = activation(node, name='test/' + activation_op_name)
+      node = activation(node, name=scope + delim + activation_op_name)
 
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
@@ -540,7 +601,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
 
   def _TestQuantize_DepthwiseConv2dWithBatchNorm(
       self, activation, activation_op_name, with_bypass, delay,
-      fused_batch_norm, use_resource):
+      fused_batch_norm, use_resource, scope):
     """Tests quantization: inputs -> DWConv2d with batch norm -> Activation.
 
     Args:
@@ -552,6 +613,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       delay: Int (optional), delay in number of steps until quantization starts.
       fused_batch_norm: Bool, when true use FusedBatchNorm.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -559,7 +621,9 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       batch_size, height, width, depth = 5, 128, 128, 3
       inputs = array_ops.zeros((batch_size, height, width, depth))
       stride = 1 if with_bypass else 2
-      scope = 'test/test2' if with_bypass else 'test'
+      conv_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
       node = separable_conv2d(
           inputs,
           None, [5, 5],
@@ -570,13 +634,13 @@ class QuantizeTest(test_util.TensorFlowTestCase):
           activation_fn=None,
           normalizer_fn=batch_norm,
           normalizer_params=self._BatchNormParams(fused_batch_norm),
-          scope=scope)
+          scope=conv_scope)
 
       # Manually add a bypass (optional) and an activation.
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
 
-      node = activation(node, name='test/' + activation_op_name)
+      node = activation(node, name=scope + delim + activation_op_name)
 
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
@@ -595,7 +659,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
 
   def _TestQuantize_AtrousConvWithBatchNorm(
       self, activation, activation_op_name, with_bypass, delay,
-      fused_batch_norm, use_resource):
+      fused_batch_norm, use_resource, scope):
     """Tests quantization: inputs -> atrous conv with batch norm -> Activation.
 
     Args:
@@ -607,6 +671,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       delay: Int (optional), delay in number of steps until quantization starts.
       fused_batch_norm: Bool, when true use FusedBatchNorm.
       use_resource: Bool, when true uses resource variables.
+      scope: String, specifies top level scope for the graph
     """
     graph = ops.Graph()
     with graph.as_default():
@@ -614,7 +679,10 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       batch_size, height, width, depth = 5, 128, 128, 3
       inputs = array_ops.zeros((batch_size, height, width, depth))
       dilation_rate = 2
-      scope = 'test/test2' if with_bypass else 'test'
+      conv_scope = self._GetConvScope(scope, with_bypass)
+      scope = '' if scope is None else scope
+      delim = '/' if scope else ''
+
       node = separable_conv2d(
           inputs,
           None, [3, 3],
@@ -625,13 +693,13 @@ class QuantizeTest(test_util.TensorFlowTestCase):
           activation_fn=None,
           normalizer_fn=batch_norm,
           normalizer_params=self._BatchNormParams(fused_batch_norm),
-          scope=scope)
+          scope=conv_scope)
 
       # Manually add a bypass (optional) and an activation.
       if with_bypass:
-        node = math_ops.add(inputs, node, name='test/Add')
+        node = math_ops.add(inputs, node, name=scope + delim + 'Add')
 
-      node = activation(node, name='test/' + activation_op_name)
+      node = activation(node, name=scope + delim + activation_op_name)
 
       update_barrier = control_flow_ops.no_op(name='update_barrier')
       with ops.control_dependencies([update_barrier]):
@@ -718,6 +786,18 @@ class QuantizeTest(test_util.TensorFlowTestCase):
     with open('/tmp/bn_quant_test.pbtxt', 'w') as f:
       f.write(str(graph.as_graph_def()))
 
+  def _GetConvScope(self, scope, with_bypass):
+    if scope is None:
+      scope = ''
+    delim = '/' if scope else ''
+
+    if with_bypass:
+      conv_scope = scope + delim + 'test2'
+    else:
+      conv_scope = scope
+
+    return conv_scope
+
   def _BatchNormParams(self, fused=False, force_updates=False):
     params = {
         'center': True,
-- 
GitLab


From 800cc654de0bb99c5753fc4ab26a9293547ee0b3 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Thu, 20 Sep 2018 14:41:38 -0700
Subject: [PATCH 0463/1357] Fix missing TODO.

PiperOrigin-RevId: 213885561
---
 tensorflow/contrib/lite/toco/export_tensorflow.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index 3d1eb3978c..61e9106783 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -470,9 +470,9 @@ void ConvertDepthwiseConvOperator(const Model& model,
   strides.mutable_list()->add_i(src_op.stride_height);
   strides.mutable_list()->add_i(src_op.stride_width);
   strides.mutable_list()->add_i(1);
-  // TODO(b/): To return a working TF GraphDef, we should be returning the
-  // correct SpaceToBatchNd and BatchToSpaceND operation before and after the
-  // conv since TF doesn't support dilations.
+  // TODO(b/116063589): To return a working TF GraphDef, we should be returning
+  // the correct SpaceToBatchNd and BatchToSpaceND operation before and after
+  // the conv since TF doesn't support dilations.
   if ((src_op.dilation_width_factor != 1) ||
       (src_op.dilation_height_factor != 1)) {
     auto& dilations = (*dc2d_op->mutable_attr())["dilations"];
-- 
GitLab


From 424f0556ad8acde8f912a67e46421957a71dcef2 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 20 Sep 2018 14:49:14 -0700
Subject: [PATCH 0464/1357] [tf.data] Some vectorization cleanup

PiperOrigin-RevId: 213886813
---
 tensorflow/core/framework/node_def_util.cc    | 12 ++-
 tensorflow/core/framework/node_def_util.h     |  4 +
 .../core/framework/node_def_util_test.cc      | 42 ++++++++++
 .../optimizers/data/map_vectorization.cc      |  4 +-
 .../optimizers/data/vectorization_utils.cc    | 82 +++++++++----------
 5 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index 42ec315a32..43ac1d0ada 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc
@@ -372,6 +372,14 @@ Status InputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
                                  node_def.name());
 }
 
+Status InputTypesForNode(const NodeDef& node_def, const OpDef& op_def,
+                         DataTypeVector* inputs) {
+  for (const auto& arg : op_def.input_arg()) {
+    TF_RETURN_IF_ERROR(AddArgToSig(node_def, arg, inputs));
+  }
+  return Status::OK();
+}
+
 Status OutputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
                          int output_port, DataType* output_type) {
   DataTypeVector output_types;
@@ -397,9 +405,7 @@ Status OutputTypesForNode(const NodeDef& node_def, const OpDef& op_def,
 
 Status InOutTypesForNode(const NodeDef& node_def, const OpDef& op_def,
                          DataTypeVector* inputs, DataTypeVector* outputs) {
-  for (const auto& arg : op_def.input_arg()) {
-    TF_RETURN_IF_ERROR(AddArgToSig(node_def, arg, inputs));
-  }
+  TF_RETURN_IF_ERROR(InputTypesForNode(node_def, op_def, inputs));
   return OutputTypesForNode(node_def, op_def, outputs);
 }
 
diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index 7528d3d306..187bfa2c88 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -249,6 +249,10 @@ const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name);
 // REQUIRES: ValidateOpDef(op_def).ok()
 Status InputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
                         int input_port, DataType* input_type);
+// Computes the input types for a specific node.
+// REQUIRES: ValidateOpDef(op_def).ok()
+Status InputTypesForNode(const NodeDef& node_def, const OpDef& op_def,
+                         DataTypeVector* inputs);
 // Computes the output type for a specific node output.
 // REQUIRES: ValidateOpDef(op_def).ok()
 Status OutputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
diff --git a/tensorflow/core/framework/node_def_util_test.cc b/tensorflow/core/framework/node_def_util_test.cc
index 74cc594863..d9d437024a 100644
--- a/tensorflow/core/framework/node_def_util_test.cc
+++ b/tensorflow/core/framework/node_def_util_test.cc
@@ -370,6 +370,48 @@ TEST(NodeDefUtilTest, ValidSyntax) {
                       "Illegal op input name 'a:00");
 }
 
+TEST(InputTypesForNode, Simple) {
+  const OpDef op_def = ToOpDef(OpDefBuilder("Simple")
+                                   .Input("a: float")
+                                   .Input("b: int32")
+                                   .Output("c: string")
+                                   .Output("d: bool"));
+  const NodeDef node_def = ToNodeDef(
+      NodeDefBuilder("simple", &op_def).Input(FakeInput()).Input(FakeInput()));
+  DataTypeVector types;
+  EXPECT_TRUE(InputTypesForNode(node_def, op_def, &types).ok());
+  EXPECT_EQ(types[0], DT_FLOAT);
+  EXPECT_EQ(types[1], DT_INT32);
+
+  DataType type;
+  EXPECT_TRUE(InputTypeForNode(node_def, op_def, 0, &type).ok());
+  EXPECT_EQ(type, DT_FLOAT);
+  EXPECT_TRUE(InputTypeForNode(node_def, op_def, 1, &type).ok());
+  EXPECT_EQ(type, DT_INT32);
+  EXPECT_FALSE(InputTypeForNode(node_def, op_def, 2, &type).ok());
+}
+
+TEST(OutputTypesForNode, Simple) {
+  const OpDef op_def = ToOpDef(OpDefBuilder("Simple")
+                                   .Input("a: float")
+                                   .Input("b: int32")
+                                   .Output("c: string")
+                                   .Output("d: bool"));
+  const NodeDef node_def = ToNodeDef(
+      NodeDefBuilder("simple", &op_def).Input(FakeInput()).Input(FakeInput()));
+  DataTypeVector types;
+  EXPECT_TRUE(OutputTypesForNode(node_def, op_def, &types).ok());
+  EXPECT_EQ(types[0], DT_STRING);
+  EXPECT_EQ(types[1], DT_BOOL);
+
+  DataType type;
+  EXPECT_TRUE(OutputTypeForNode(node_def, op_def, 0, &type).ok());
+  EXPECT_EQ(type, DT_STRING);
+  EXPECT_TRUE(OutputTypeForNode(node_def, op_def, 1, &type).ok());
+  EXPECT_EQ(type, DT_BOOL);
+  EXPECT_FALSE(OutputTypeForNode(node_def, op_def, 2, &type).ok());
+}
+
 TEST(NameRangesForNodeTest, Simple) {
   const OpDef op_def = ToOpDef(OpDefBuilder("Simple")
                                    .Input("a: float")
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index ad6722a3ae..7a2f1910da 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -86,8 +86,8 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
 FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
                                    const FunctionDef& orig_func,
                                    FunctionDefLibrary* library) {
-  // Vectorizes orig_func naively by wrapping in a MapDefun op, then tries to
-  // do true vectorization with Vectorize.
+  // Vectorizes orig_func naively by wrapping in a MapDefun op, then performing
+  // efficient vectorization with VectorizeMapDefun.
   FunctionDef* vectorized_func =
       CreateMapDefunWrapper(map_node, orig_func, library);
   NodeDef* map_defun_node = vectorized_func->mutable_node_def()->Mutable(0);
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 5dd9d00511..bfca63b820 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/grappler/mutable_graph_view.h"
@@ -89,20 +90,13 @@ void RemoveMapDefunOutput(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
       ->ExtractSubrange(output_position, 1, nullptr);
 }
 
-Status ConvertCastOp(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
-                     NodeDef* map_defun_node, const NodeDef& cast_node,
-                     const FunctionDefTensorDesc& output_desc,
+Status ConvertCastOp(FunctionDef* outer_scope, gtl::ArraySlice<string> inputs,
+                     const NodeDef& cast_node,
                      std::map<string, string>* conversion_map) {
-  if (output_desc.node_output != "y" || output_desc.position != 0) {
-    // We expect the Cast node to have only one output, with the name "y".
-    return errors::Internal("Cannot convert Cast op output.");
+  if (inputs.size() != 1) {
+    return errors::Internal("Cast op should only have one input.");
   }
 
-  // Promote Cast inputs to outputs of MapDefun
-  DCHECK_EQ(cast_node.input_size(), 1);
-  AddMapDefunOutput(map_defun_fn, map_defun_node, cast_node.input(0),
-                    cast_node.attr().at("SrcT").type());
-
   // Add new Cast node
   NodeDef* new_cast_node = outer_scope->add_node_def();
   *new_cast_node = cast_node;
@@ -110,29 +104,22 @@ Status ConvertCastOp(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
   function_utils::SetUniqueFunctionNodeName(
       strings::StrCat("vectorized/", cast_node.name()), outer_scope,
       new_cast_node);
-  new_cast_node->set_input(
-      0, strings::StrCat(map_defun_node->name(), ":output:",
-                         map_defun_fn->signature().output_arg_size() - 1));
+  new_cast_node->set_input(0, inputs[0]);
 
   // Add the output mapping to conversion map
-  (*conversion_map)[strings::StrCat(output_desc.node_name, ":y:0")] =
+  (*conversion_map)[strings::StrCat(cast_node.name(), ":y:0")] =
       strings::StrCat(new_cast_node->name(), ":y:0");
 
   return Status::OK();
 }
 
-Status ConvertUnpackOp(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
-                       NodeDef* map_defun_node, const NodeDef& unpack_node,
-                       const FunctionDefTensorDesc& output_desc,
+Status ConvertUnpackOp(FunctionDef* outer_scope, gtl::ArraySlice<string> inputs,
+                       const NodeDef& unpack_node,
                        std::map<string, string>* conversion_map) {
-  if (output_desc.node_output != "output") {
-    return errors::Internal("Cannot convert Unpack op output.");
+  if (inputs.size() != 1) {
+    return errors::Internal("Unpack op should only have one input.");
   }
 
-  // Promote Unpack inputs to outputs of MapDefun
-  AddMapDefunOutput(map_defun_fn, map_defun_node, unpack_node.input(0),
-                    unpack_node.attr().at("T").type());
-
   // Add new Unpack node
   NodeDef* new_unpack_node = outer_scope->add_node_def();
   *new_unpack_node = unpack_node;
@@ -144,14 +131,12 @@ Status ConvertUnpackOp(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
   // Increment "axis" attr by 1:
   (*new_unpack_node->mutable_attr())["axis"].set_i(
       unpack_node.attr().at("axis").i() + 1);
-  new_unpack_node->set_input(
-      0, strings::StrCat(map_defun_node->name(), ":output:",
-                         map_defun_fn->signature().output_arg_size() - 1));
+  new_unpack_node->set_input(0, inputs[0]);
 
   // Add the output mappings to conversion map
   int num = new_unpack_node->attr().at("num").i();
   for (int i = 0; i < num; ++i) {
-    (*conversion_map)[strings::StrCat(output_desc.node_name, ":output:", i)] =
+    (*conversion_map)[strings::StrCat(unpack_node.name(), ":output:", i)] =
         strings::StrCat(new_unpack_node->name(), ":output:", i);
   }
 
@@ -241,17 +226,37 @@ Status Vectorization::AddConversionMappingFromOp(
   // TODO(rachelim): Have some mechanism for registering converters and some
   // uniform, simpler way to represent them.
 
-  // TODO(rachelim): Do step (1) outside of the individual op converters, when
-  // we know how to find out the type of the input.
+  DataTypeVector types;
+  const OpDef* op_def = nullptr;
+  TF_RETURN_IF_ERROR(OpRegistry::Global()->LookUpOpDef(node.op(), &op_def));
+  TF_RETURN_IF_ERROR(InputTypesForNode(node, *op_def, &types));
+
+  std::vector<string> promoted_inputs;
+  promoted_inputs.reserve(node.input_size());
+  for (int i = 0; i < node.input_size(); ++i) {
+    promoted_inputs.push_back(strings::StrCat(
+        map_defun_node_->name(),
+        ":output:", map_defun_fn_->signature().output_arg_size() + i));
+  }
+
   if (node.op() == "Cast") {
-    return ConvertCastOp(outer_scope_, map_defun_fn_, map_defun_node_, node,
-                         output_desc, &conversion_map_);
+    TF_RETURN_IF_ERROR(
+        ConvertCastOp(outer_scope_, promoted_inputs, node, &conversion_map_));
   } else if (node.op() == "Unpack") {
-    return ConvertUnpackOp(outer_scope_, map_defun_fn_, map_defun_node_, node,
-                           output_desc, &conversion_map_);
+    TF_RETURN_IF_ERROR(
+        ConvertUnpackOp(outer_scope_, promoted_inputs, node, &conversion_map_));
+  } else {
+    return errors::Unimplemented("Op converter for \"", node.op(),
+                                 "\" not implemented yet");
   }
-  return errors::Unimplemented("Op converter for \"", node.op(),
-                               "\" not implemented yet");
+
+  // If we get here, the conversion was successful, so we promote the inputs
+  // of the ops to MapDefun outputs.
+  for (int i = 0; i < types.size(); ++i) {
+    AddMapDefunOutput(map_defun_fn_, map_defun_node_, node.input(i), types[i]);
+  }
+
+  return Status::OK();
 }
 
 Status Vectorization::AddConversionMappingFromInput(
@@ -333,11 +338,6 @@ void Vectorization::Vectorize() {
 
 void VectorizeMapDefun(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
                        NodeDef* map_defun_node) {
-  if (map_defun_node->attr().at("f").func().name() !=
-      map_defun_fn->signature().name()) {
-    LOG(ERROR) << "`map_defun_fn` and `map_defun_node` do not match";
-    return;
-  }
   Vectorization(outer_scope, map_defun_fn, map_defun_node).Vectorize();
 }
 
-- 
GitLab


From 1d1ec99bd3b322ea35a2d3d0eb754589ec2fd512 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Thu, 20 Sep 2018 15:08:59 -0700
Subject: [PATCH 0465/1357] Add more specific ReLU implementation tests.

PiperOrigin-RevId: 213890403
---
 tensorflow/python/keras/layers/advanced_activations.py    | 4 +++-
 .../python/keras/layers/advanced_activations_test.py      | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 4ab786a184..a2385dfdbb 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -314,7 +314,9 @@ class ReLU(Layer):
                        'cannot be negative value: ' + str(negative_slope))
 
     self.support_masking = True
-    self.max_value = K.cast_to_floatx(max_value)
+    if max_value is not None:
+      max_value = K.cast_to_floatx(max_value)
+    self.max_value = max_value
     self.negative_slope = K.cast_to_floatx(negative_slope)
     self.threshold = K.cast_to_floatx(threshold)
 
diff --git a/tensorflow/python/keras/layers/advanced_activations_test.py b/tensorflow/python/keras/layers/advanced_activations_test.py
index b020b6e730..c41087be0a 100644
--- a/tensorflow/python/keras/layers/advanced_activations_test.py
+++ b/tensorflow/python/keras/layers/advanced_activations_test.py
@@ -67,6 +67,14 @@ class AdvancedActivationsTest(test.TestCase):
       testing_utils.layer_test(keras.layers.ReLU,
                                kwargs={'max_value': 10},
                                input_shape=(2, 3, 4))
+      x = keras.backend.ones((3, 4))
+      # Test that we use `leaky_relu` when appropriate in graph mode.
+      self.assertTrue(
+          'LeakyRelu' in keras.layers.ReLU(negative_slope=0.2)(x).name)
+      # Test that we use `relu` when appropriate in graph mode.
+      self.assertTrue('Relu' in keras.layers.ReLU()(x).name)
+      # Test that we use `relu6` when appropriate in graph mode.
+      self.assertTrue('Relu6' in keras.layers.ReLU(max_value=6)(x).name)
 
   def test_relu_with_invalid_arg(self):
     with self.assertRaisesRegexp(
-- 
GitLab


From d78b3484d4b98790c2d3a7c0d861487e2fcdefdf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 15:37:03 -0700
Subject: [PATCH 0466/1357] This CL moves the tf.print logging level tests that
 are sensitive to OS & environment configurations to a separate test target,
 and disables running them on Windows.

PiperOrigin-RevId: 213895372
---
 tensorflow/python/kernel_tests/BUILD          | 15 ++++
 .../logging_ops_logging_level_test.py         | 70 +++++++++++++++++++
 .../python/kernel_tests/logging_ops_test.py   |  3 -
 3 files changed, 85 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/logging_ops_logging_level_test.py

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 5f93682de7..17831fa5cb 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -537,6 +537,21 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "logging_ops_logging_level_test",
+    size = "small",
+    srcs = ["logging_ops_logging_level_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:logging_ops",
+    ],
+    tags = [
+        "no_windows",
+    ],
+)
+
 tf_py_test(
     name = "logging_ops_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/logging_ops_logging_level_test.py b/tensorflow/python/kernel_tests/logging_ops_logging_level_test.py
new file mode 100644
index 0000000000..252090b7bd
--- /dev/null
+++ b/tensorflow/python/kernel_tests/logging_ops_logging_level_test.py
@@ -0,0 +1,70 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.kernels.logging_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+
+class PrintV2LoggingLevelTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorLogInfo(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(
+            tensor, output_stream=tf_logging.info)
+        self.evaluate(print_op)
+      self.assertTrue("I" in printed.contents())
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue(expected in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorLogWarning(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(
+            tensor, output_stream=tf_logging.warning)
+        self.evaluate(print_op)
+      self.assertTrue("W" in printed.contents())
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue(expected in printed.contents())
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintOneTensorLogError(self):
+    with self.test_session():
+      tensor = math_ops.range(10)
+      with self.captureWritesToStream(sys.stderr) as printed:
+        print_op = logging_ops.print_v2(
+            tensor, output_stream=tf_logging.error)
+        self.evaluate(print_op)
+      self.assertTrue("E" in printed.contents())
+      expected = "[0 1 2 ... 7 8 9]"
+      self.assertTrue(expected in printed.contents())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index 79fe9de62f..cf0beba3c3 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -280,7 +280,6 @@ class PrintV2Test(test.TestCase):
         print_op = logging_ops.print_v2(
             tensor, output_stream=tf_logging.info)
         self.evaluate(print_op)
-      self.assertTrue("I" in printed.contents())
       expected = "[0 1 2 ... 7 8 9]"
       self.assertTrue(expected in printed.contents())
 
@@ -292,7 +291,6 @@ class PrintV2Test(test.TestCase):
         print_op = logging_ops.print_v2(
             tensor, output_stream=tf_logging.warning)
         self.evaluate(print_op)
-      self.assertTrue("W" in printed.contents())
       expected = "[0 1 2 ... 7 8 9]"
       self.assertTrue(expected in printed.contents())
 
@@ -304,7 +302,6 @@ class PrintV2Test(test.TestCase):
         print_op = logging_ops.print_v2(
             tensor, output_stream=tf_logging.error)
         self.evaluate(print_op)
-      self.assertTrue("E" in printed.contents())
       expected = "[0 1 2 ... 7 8 9]"
       self.assertTrue(expected in printed.contents())
 
-- 
GitLab


From 4d39844c1dafb6b74ad49b231bc949a2e026f5ea Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 20 Sep 2018 15:37:17 -0700
Subject: [PATCH 0467/1357] Split XlaLaunch into XlaCompile and XlaRun; NFC

This CL splits the functionality in XlaLaunch into two separate operations:

 - XlaCompile, responsible for compiling a TF function into a LocalExecutable
 - XlaRun, responsible for executing a LocalExecutable created by XlaCompile

This CL is a stepping stone towards implementing lazy compilation for TF/XLA.
The XlaCompile op is spec'ed to return a boolean indicating whether the
compilation was successful.  Right now that boolean is always set to true by
XlaCompile and its value is otherwise ignored, but in the future it will be used
to indicate whether the TF function was compiled or not, and thus whether we
should execute XlaRun or just directly call the TF function.

XlaLaunch still exists, and will be created by create_xla_launch_op.cc.  In the
future we may consider removing it altogether.  build_xla_launch_ops.cc, now
renamed to build_xla_ops.cc, creates a XlaCompile/XlaRun pair instead of
XlaLaunch.

This CL is organized as follows:

 - jit/ops/xla_ops.cc gets two new XLA-specific operations, XlaCompile and
   XlaRun, described above.  XlaRun redundantly takes the must-be-constant
   inputs to the TensorFlow cluster to keep the implementation simple (simple in
   the sense of similar to XlaLaunch), but I will remove this in a subsequent
   cleanup CL.

 - jit/kernels/xla_ops.cc implements XlaCompile and XlaRun in a fairly
   straightforward manner.  XlaCompile compiles the TF function, puts it in a
   process-global storage, XlaExecutableClosureStore, and produces a int64 key.
   XlaRun uses the key to read out the LocalExecutable and execute it.  I'm not
   sure if XlaExecutableClosureStore should be a resource like
   XlaCompilationCache; I did not immediately see any reason to make it so.

 - There are changes to the various _device files to register XlaCompile and
   XlaRun for the XLA_* devices.

 - Finally, I had to fix some tests that were expecting XlaLaunch in the
   execution timeline.

PiperOrigin-RevId: 213895405
---
 tensorflow/compiler/jit/BUILD                 |  24 +-
 .../compiler/jit/build_xla_launch_ops_pass.cc | 142 -----
 tensorflow/compiler/jit/build_xla_ops_pass.cc | 187 +++++++
 ...launch_ops_pass.h => build_xla_ops_pass.h} |  10 +-
 .../compiler/jit/create_xla_launch_op.cc      |   2 +-
 .../jit/jit_compilation_pass_registration.cc  |   4 +-
 tensorflow/compiler/jit/kernels/BUILD         |   7 +-
 .../compiler/jit/kernels/xla_launch_op.cc     | 276 ----------
 .../compiler/jit/kernels/xla_launch_op.h      |  87 ----
 tensorflow/compiler/jit/kernels/xla_ops.cc    | 488 ++++++++++++++++++
 tensorflow/compiler/jit/kernels/xla_ops.h     | 168 ++++++
 tensorflow/compiler/jit/ops/xla_ops.cc        |  43 ++
 tensorflow/compiler/jit/xla_cpu_device.cc     |   5 +-
 tensorflow/compiler/jit/xla_device_ops.h      |  11 +
 tensorflow/compiler/jit/xla_gpu_device.cc     |   5 +-
 .../compiler/jit/xla_interpreter_device.cc    |   6 +-
 tensorflow/compiler/jit/xla_launch_util.cc    |   2 +-
 tensorflow/compiler/jit/xla_launch_util.h     |   3 +-
 tensorflow/compiler/tests/dense_layer_test.py |  25 +-
 tensorflow/compiler/tests/jit_test.py         |  48 +-
 20 files changed, 980 insertions(+), 563 deletions(-)
 delete mode 100644 tensorflow/compiler/jit/build_xla_launch_ops_pass.cc
 create mode 100644 tensorflow/compiler/jit/build_xla_ops_pass.cc
 rename tensorflow/compiler/jit/{build_xla_launch_ops_pass.h => build_xla_ops_pass.h} (71%)
 delete mode 100644 tensorflow/compiler/jit/kernels/xla_launch_op.cc
 delete mode 100644 tensorflow/compiler/jit/kernels/xla_launch_op.h
 create mode 100644 tensorflow/compiler/jit/kernels/xla_ops.cc
 create mode 100644 tensorflow/compiler/jit/kernels/xla_ops.h

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 9544c365b7..4e184729ef 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -51,7 +51,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":jit_compilation_passes",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/compiler/xla/service:cpu_plugin",
     ],
@@ -63,7 +63,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = if_cuda([
         ":jit_compilation_passes",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/compiler/xla/service:gpu_plugin",
     ]),
@@ -77,7 +77,7 @@ cc_library(
     deps = [
         ":jit_compilation_passes",
         ":xla_device",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/jit/legacy_flags:xla_device_flags",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
@@ -95,7 +95,7 @@ cc_library(
     deps = [
         ":jit_compilation_passes",
         ":xla_device",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/compiler/xla/service:gpu_plugin",  # buildcleaner: keep
@@ -112,7 +112,7 @@ cc_library(
     deps = [
         ":jit_compilation_passes",
         ":xla_device",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/compiler/xla/service:interpreter_plugin",  # buildcleaner: keep
@@ -281,7 +281,7 @@ cc_library(
     deps = [
         ":common",
         ":compilation_passes",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
@@ -342,7 +342,7 @@ tf_cc_test(
         "//tensorflow/cc:ops",
         "//tensorflow/cc:resource_variable_ops",
         "//tensorflow/cc:sendrecv_ops",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/core:core_cpu",
@@ -360,7 +360,7 @@ tf_cc_test(
 cc_library(
     name = "compilation_passes",
     srcs = [
-        "build_xla_launch_ops_pass.cc",
+        "build_xla_ops_pass.cc",
         "deadness_analysis.cc",
         "deadness_analysis_internal.h",
         "encapsulate_subgraphs_pass.cc",
@@ -370,7 +370,7 @@ cc_library(
         "partially_decluster_pass.cc",
     ],
     hdrs = [
-        "build_xla_launch_ops_pass.h",
+        "build_xla_ops_pass.h",
         "deadness_analysis.h",
         "encapsulate_subgraphs_pass.h",
         "encapsulate_xla_computations_pass.h",
@@ -460,7 +460,7 @@ tf_cc_test(
         "//tensorflow/cc:function_ops",
         "//tensorflow/cc:ops",
         "//tensorflow/cc:sendrecv_ops",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/core:core_cpu",
@@ -494,7 +494,7 @@ tf_cc_test(
         "//tensorflow/cc:ops",
         "//tensorflow/cc:resource_variable_ops",
         "//tensorflow/cc:sendrecv_ops",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla:test_util",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
@@ -525,7 +525,7 @@ tf_cc_test(
         "//tensorflow/cc:cc_ops_internal",
         "//tensorflow/cc:function_ops",
         "//tensorflow/cc:ops",
-        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/jit/kernels:xla_ops",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/core:core_cpu",
diff --git a/tensorflow/compiler/jit/build_xla_launch_ops_pass.cc b/tensorflow/compiler/jit/build_xla_launch_ops_pass.cc
deleted file mode 100644
index b17ff589e2..0000000000
--- a/tensorflow/compiler/jit/build_xla_launch_ops_pass.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/jit/build_xla_launch_ops_pass.h"
-#include "tensorflow/compiler/jit/defs.h"
-#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
-#include "tensorflow/compiler/tf2xla/dump_graph.h"
-#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/common_runtime/optimization_registry.h"
-#include "tensorflow/core/framework/graph_def_util.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/node_def_util.h"
-#include "tensorflow/core/graph/algorithm.h"
-#include "tensorflow/core/graph/graph.h"
-#include "tensorflow/core/graph/graph_constructor.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/hash/hash.h"
-#include "tensorflow/core/public/version.h"
-
-namespace tensorflow {
-
-static Status BuildLaunchNode(
-    const string& nodename, const string& function_name,
-    const AttrValueMap& function_attr, const string& device_name,
-    const DataTypeVector& constant_dtypes, int num_resources,
-    const DataTypeVector& arg_dtypes, const DataTypeVector& result_dtypes,
-    Graph* graph, Node** node) {
-  NodeDef def;
-  def.set_name(graph->NewName(nodename));
-  def.set_op("XlaLaunch");
-  def.set_device(device_name);
-  AddNodeAttr("Tconstants", constant_dtypes, &def);
-  AddNodeAttr("Targs", arg_dtypes, &def);
-  AddNodeAttr("Nresources", num_resources, &def);
-  AddNodeAttr("Tresults", result_dtypes, &def);
-  NameAttrList function;
-  function.set_name(function_name);
-  *function.mutable_attr() = function_attr;
-  AddNodeAttr("function", function, &def);
-
-  Status status;
-  *node = graph->AddNode(def, &status);
-  return status;
-}
-
-static Status ReplaceNodeWithXlaLaunch(Graph* graph, Node* node) {
-  VLOG(2) << "Replacing " << node->name() << " with XlaLaunch";
-
-  int num_constant_args, num_resource_args;
-  TF_RETURN_IF_ERROR(
-      GetNodeAttr(node->attrs(), kXlaNumConstantArgsAttr, &num_constant_args));
-  TF_RETURN_IF_ERROR(
-      GetNodeAttr(node->attrs(), kXlaNumResourceArgsAttr, &num_resource_args));
-
-  if (num_constant_args < 0 || num_resource_args < 0 ||
-      num_constant_args + num_resource_args > node->num_inputs()) {
-    return errors::InvalidArgument(
-        "Invalid number of constant/resource arguments to XLA kernel.");
-  }
-  const int num_nonconst_args =
-      node->num_inputs() - num_constant_args - num_resource_args;
-
-  DataTypeVector const_dtypes(node->input_types().begin(),
-                              node->input_types().begin() + num_constant_args);
-  DataTypeVector arg_dtypes(
-      node->input_types().begin() + num_constant_args,
-      node->input_types().begin() + num_constant_args + num_nonconst_args);
-
-  // Build a XlaLaunch operator to execute the function body.
-  Node* launch_node;
-  TF_RETURN_IF_ERROR(BuildLaunchNode(
-      graph->NewName(node->name()), node->type_string(), node->def().attr(),
-      node->requested_device(), const_dtypes, num_resource_args, arg_dtypes,
-      node->output_types(), graph, &launch_node));
-  launch_node->set_assigned_device_name(node->assigned_device_name());
-
-  // Copy incoming edges to the launch node.
-  for (const Edge* edge : node->in_edges()) {
-    if (edge->IsControlEdge()) {
-      graph->AddControlEdge(edge->src(), launch_node);
-    } else {
-      graph->AddEdge(edge->src(), edge->src_output(), launch_node,
-                     edge->dst_input());
-    }
-  }
-
-  // Copy outgoing edges to the launch node.
-  std::vector<const Edge*> out_edges(node->out_edges().begin(),
-                                     node->out_edges().end());
-  for (const Edge* edge : out_edges) {
-    Node* dst = edge->dst();
-    int src_output = edge->src_output();
-    int dst_input = edge->dst_input();
-    graph->RemoveEdge(edge);
-
-    if (edge->IsControlEdge()) {
-      graph->AddControlEdge(launch_node, dst);
-    } else {
-      graph->AddEdge(launch_node, src_output, dst, dst_input);
-    }
-  }
-  graph->RemoveNode(node);
-
-  return Status::OK();
-}
-
-Status BuildXlaLaunchOpsPass::Run(const GraphOptimizationPassOptions& options) {
-  Graph* graph = options.graph->get();
-
-  for (Node* n : graph->op_nodes()) {
-    // In all cases, only try to compile computational nodes.
-    if (n->IsSend() || n->IsRecv() || n->IsControlFlow()) {
-      continue;
-    }
-
-    // Only compile nodes that are marked for compilation by the
-    // compilation-marking pass (via 'attr_name').
-    if (IsXlaCompiledKernel(*n)) {
-      TF_RETURN_IF_ERROR(ReplaceNodeWithXlaLaunch(graph, n));
-    }
-  }
-
-  if (VLOG_IS_ON(1)) {
-    dump_graph::DumpGraphToFile("build_xla_launch_ops", *graph,
-                                options.flib_def);
-  }
-  return Status::OK();
-}
-}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
new file mode 100644
index 0000000000..a6086f30a1
--- /dev/null
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc
@@ -0,0 +1,187 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/build_xla_ops_pass.h"
+#include "tensorflow/compiler/jit/defs.h"
+#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/tf2xla/dump_graph.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+#include "tensorflow/core/framework/graph_def_util.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/public/version.h"
+
+namespace tensorflow {
+
+static Status BuildXlaCompileNode(
+    const string& nodename, const string& function_name,
+    const AttrValueMap& function_attr, const string& device_name,
+    const DataTypeVector& constant_dtypes, int num_resources,
+    const DataTypeVector& arg_dtypes, Graph* graph, Node** node) {
+  NodeDef def;
+  def.set_name(graph->NewName(nodename));
+  def.set_op("_XlaCompile");
+  def.set_device(device_name);
+  AddNodeAttr("Tconstants", constant_dtypes, &def);
+  AddNodeAttr("Targs", arg_dtypes, &def);
+  AddNodeAttr("Nresources", num_resources, &def);
+  NameAttrList function;
+  function.set_name(function_name);
+  *function.mutable_attr() = function_attr;
+  AddNodeAttr("function", function, &def);
+
+  Status status;
+  *node = graph->AddNode(def, &status);
+  return status;
+}
+
+static Status BuildXlaRunNode(const string& nodename, const string& device_name,
+                              const DataTypeVector& constant_dtypes,
+                              const DataTypeVector& arg_dtypes,
+                              const DataTypeVector& result_dtypes, Graph* graph,
+                              Node** node) {
+  NodeDef def;
+  def.set_name(graph->NewName(nodename));
+  def.set_op("_XlaRun");
+  def.set_device(device_name);
+  AddNodeAttr("Tconstants", constant_dtypes, &def);
+  AddNodeAttr("Targs", arg_dtypes, &def);
+  AddNodeAttr("Tresults", result_dtypes, &def);
+
+  Status status;
+  *node = graph->AddNode(def, &status);
+  return status;
+}
+
+static Status GetXlaAttrs(Node* node, int* num_constant_args,
+                          int* num_resource_args, DataTypeVector* const_dtypes,
+                          DataTypeVector* arg_dtypes) {
+  TF_RETURN_IF_ERROR(
+      GetNodeAttr(node->attrs(), kXlaNumConstantArgsAttr, num_constant_args));
+  TF_RETURN_IF_ERROR(
+      GetNodeAttr(node->attrs(), kXlaNumResourceArgsAttr, num_resource_args));
+
+  if (*num_constant_args < 0 || *num_resource_args < 0 ||
+      *num_constant_args + *num_resource_args > node->num_inputs()) {
+    return errors::InvalidArgument(
+        "Invalid number of constant/resource arguments to XLA kernel.");
+  }
+
+  const int num_nonconst_args =
+      node->num_inputs() - *num_constant_args - *num_resource_args;
+
+  const DataTypeVector& input_types = node->input_types();
+  std::copy(input_types.begin(), input_types.begin() + *num_constant_args,
+            std::back_inserter(*const_dtypes));
+  std::copy(input_types.begin() + *num_constant_args,
+            input_types.begin() + *num_constant_args + num_nonconst_args,
+            std::back_inserter(*arg_dtypes));
+  return Status::OK();
+}
+
+static void CopyIncomingEdges(Graph* g, Node* old_node, Node* new_node) {
+  for (const Edge* edge : old_node->in_edges()) {
+    if (edge->IsControlEdge()) {
+      g->AddControlEdge(edge->src(), new_node);
+    } else {
+      g->AddEdge(edge->src(), edge->src_output(), new_node, edge->dst_input());
+    }
+  }
+}
+
+static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
+  std::vector<const Edge*> out_edges(old_node->out_edges().begin(),
+                                     old_node->out_edges().end());
+  for (const Edge* edge : out_edges) {
+    Node* dst = edge->dst();
+    int src_output = edge->src_output();
+    int dst_input = edge->dst_input();
+    g->RemoveEdge(edge);
+
+    if (edge->IsControlEdge()) {
+      g->AddControlEdge(new_node, dst);
+    } else {
+      g->AddEdge(new_node, src_output, dst, dst_input);
+    }
+  }
+}
+
+static Status ReplaceNodeWithXlaCompileAndRun(Graph* g, Node* n) {
+  int num_constant_args, num_resource_args;
+  DataTypeVector const_dtypes;
+  DataTypeVector arg_dtypes;
+
+  TF_RETURN_IF_ERROR(GetXlaAttrs(n, &num_constant_args, &num_resource_args,
+                                 &const_dtypes, &arg_dtypes));
+
+  Node *compile_node, *run_node;
+
+  TF_RETURN_IF_ERROR(BuildXlaCompileNode(
+      n->name(), n->type_string(), n->def().attr(), n->requested_device(),
+      const_dtypes, num_resource_args, arg_dtypes, g, &compile_node));
+
+  DataTypeVector arg_dtypes_with_resources = arg_dtypes;
+  for (int i = 0; i < num_resource_args; i++) {
+    arg_dtypes_with_resources.push_back(DT_RESOURCE);
+  }
+
+  TF_RETURN_IF_ERROR(BuildXlaRunNode(n->name(), n->requested_device(),
+                                     const_dtypes, arg_dtypes_with_resources,
+                                     n->output_types(), g, &run_node));
+
+  compile_node->set_assigned_device_name(n->assigned_device_name());
+  run_node->set_assigned_device_name(n->assigned_device_name());
+
+  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/compile_node);
+  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/run_node);
+
+  // The compilation_key output.
+  g->AddEdge(compile_node, 0, run_node, n->num_inputs());
+
+  MoveOutgoingEdges(g, /*old_node=*/n, /*new_node=*/run_node);
+  g->RemoveNode(n);
+
+  return Status::OK();
+}
+
+Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) {
+  Graph* graph = options.graph->get();
+
+  for (Node* n : graph->op_nodes()) {
+    // In all cases, only try to compile computational nodes.
+    if (n->IsSend() || n->IsRecv() || n->IsControlFlow()) {
+      continue;
+    }
+
+    // Only compile nodes that are marked for compilation by the
+    // compilation-marking pass (via 'attr_name').
+    if (IsXlaCompiledKernel(*n)) {
+      TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndRun(graph, n));
+    }
+  }
+
+  if (VLOG_IS_ON(1)) {
+    dump_graph::DumpGraphToFile("build_xla_ops", *graph, options.flib_def);
+  }
+  return Status::OK();
+}
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/build_xla_launch_ops_pass.h b/tensorflow/compiler/jit/build_xla_ops_pass.h
similarity index 71%
rename from tensorflow/compiler/jit/build_xla_launch_ops_pass.h
rename to tensorflow/compiler/jit/build_xla_ops_pass.h
index 1dfea93f02..1dd38fa951 100644
--- a/tensorflow/compiler/jit/build_xla_launch_ops_pass.h
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.h
@@ -13,19 +13,21 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_JIT_BUILD_XLA_LAUNCH_OPS_PASS_H_
-#define TENSORFLOW_COMPILER_JIT_BUILD_XLA_LAUNCH_OPS_PASS_H_
+#ifndef TENSORFLOW_COMPILER_JIT_BUILD_XLA_OPS_PASS_H_
+#define TENSORFLOW_COMPILER_JIT_BUILD_XLA_OPS_PASS_H_
 
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 
-class BuildXlaLaunchOpsPass : public GraphOptimizationPass {
+// Adds _XlaCompile and _XlaRun operations to the TF graph that compiles and
+// executes (using XLA) TF function calls marked with "_XlaCompiledKernel".
+class BuildXlaOpsPass : public GraphOptimizationPass {
  public:
   Status Run(const GraphOptimizationPassOptions& options) override;
 };
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_COMPILER_JIT_BUILD_XLA_LAUNCH_OPS_PASS_H_
+#endif  // TENSORFLOW_COMPILER_JIT_BUILD_XLA_OPS_PASS_H_
diff --git a/tensorflow/compiler/jit/create_xla_launch_op.cc b/tensorflow/compiler/jit/create_xla_launch_op.cc
index 56b034a30b..6f1ff85f24 100644
--- a/tensorflow/compiler/jit/create_xla_launch_op.cc
+++ b/tensorflow/compiler/jit/create_xla_launch_op.cc
@@ -16,7 +16,7 @@ limitations under the License.
 
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/jit/defs.h"
-#include "tensorflow/compiler/jit/kernels/xla_launch_op.h"
+#include "tensorflow/compiler/jit/kernels/xla_ops.h"
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
 #include "tensorflow/compiler/tf2xla/const_analysis.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
diff --git a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
index 3770eea6d0..085c0e5adb 100644
--- a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
+++ b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/jit/build_xla_launch_ops_pass.h"
+#include "tensorflow/compiler/jit/build_xla_ops_pass.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
@@ -55,6 +55,6 @@ REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 30,
 
 // Must run after EncapsulateSubgraphsPass.
 REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 40,
-                      BuildXlaLaunchOpsPass);
+                      BuildXlaOpsPass);
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD
index 253a5d2547..0839f1cb3d 100644
--- a/tensorflow/compiler/jit/kernels/BUILD
+++ b/tensorflow/compiler/jit/kernels/BUILD
@@ -7,9 +7,9 @@ package(
 )
 
 cc_library(
-    name = "xla_launch_op",
-    srcs = ["xla_launch_op.cc"],
-    hdrs = ["xla_launch_op.h"],
+    name = "xla_ops",
+    srcs = ["xla_ops.cc"],
+    hdrs = ["xla_ops.h"],
     deps = [
         "//tensorflow/compiler/jit:common",
         "//tensorflow/compiler/jit:xla_compilation_cache",
@@ -26,6 +26,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core/kernels:variable_ops",
+        "@com_google_absl//absl/memory",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
deleted file mode 100644
index b6f2f632f7..0000000000
--- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/jit/kernels/xla_launch_op.h"
-
-#include "tensorflow/compiler/jit/defs.h"
-#include "tensorflow/compiler/jit/xla_launch_util.h"
-#include "tensorflow/compiler/tf2xla/shape_util.h"
-#include "tensorflow/compiler/tf2xla/tf2xla_util.h"
-#include "tensorflow/compiler/tf2xla/xla_compiler.h"
-#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
-#include "tensorflow/compiler/xla/client/client_library.h"
-#include "tensorflow/compiler/xla/client/local_client.h"
-#include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/common_runtime/dma_helper.h"
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/node_def_util.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/variable_ops.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/stream_executor_no_cuda.h"
-#include "tensorflow/core/util/stream_executor_util.h"
-
-namespace tensorflow {
-
-XlaLocalLaunchBase::XlaLocalLaunchBase(OpKernelConstruction* ctx,
-                                       const std::vector<int>& constants,
-                                       const std::vector<int>& resources,
-                                       const NameAttrList& function)
-    : OpKernel(ctx),
-      constants_(constants),
-      resources_(resources),
-      device_type_(ctx->device_type()),
-      function_(function) {
-  if (device_type_ == DeviceType(DEVICE_CPU)) {
-    platform_id_ = se::host::kHostPlatformId;
-  } else if (device_type_ == DeviceType(DEVICE_GPU)) {
-    platform_id_ = ctx->device()
-                       ->tensorflow_gpu_device_info()
-                       ->stream->parent()
-                       ->platform()
-                       ->id();
-  } else if (XlaDevice::GetMetadata(ctx, &xla_device_metadata_).ok()) {
-    use_multiple_streams_ = xla_device_metadata_->UseMultipleStreams();
-    platform_id_ = xla_device_metadata_->platform()->id();
-  }
-}
-
-Status XlaLocalLaunchBase::BuildCompilationCache(OpKernelContext* ctx,
-                                                 XlaCompilationCache** cache) {
-  if (xla_device_metadata_) {
-    *cache = new XlaCompilationCache(xla_device_metadata_->client(),
-                                     xla_device_metadata_->jit_device_type());
-    return Status::OK();
-  }
-
-  auto platform = se::MultiPlatformManager::PlatformWithId(platform_id_);
-  if (!platform.ok()) {
-    return platform.status();
-  }
-  xla::LocalClientOptions client_options;
-  client_options.set_platform(platform.ValueOrDie());
-  client_options.set_intra_op_parallelism_threads(
-      ctx->device()->tensorflow_cpu_worker_threads()->num_threads);
-  auto client = xla::ClientLibrary::GetOrCreateLocalClient(client_options);
-  if (!client.ok()) {
-    return client.status();
-  }
-  const XlaOpRegistry::DeviceRegistration* registration;
-  if (!XlaOpRegistry::GetCompilationDevice(device_type_.type(),
-                                           &registration)) {
-    return errors::InvalidArgument("No JIT device registered for ",
-                                   device_type_.type());
-  }
-  *cache = new XlaCompilationCache(
-      client.ValueOrDie(), DeviceType(registration->compilation_device_name));
-  return Status::OK();
-}
-
-void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
-  VLOG(1) << "XlaLocalLaunchOpBase::Compute "
-          << Canonicalize(function_.name(), AttrSlice(&function_.attr()));
-  // We store information about the JIT-compiled XLA computation
-  // in the ResourceMgr.
-  ResourceMgr* rm = ctx->resource_manager();
-  OP_REQUIRES(ctx, rm, errors::Internal("No resource manager."));
-
-  se::Stream* stream =
-      ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
-
-  XlaCompilationCache* cache;
-  OP_REQUIRES_OK(ctx, rm->LookupOrCreate<XlaCompilationCache>(
-                          rm->default_container(), "xla_cache", &cache,
-                          [this, ctx](XlaCompilationCache** cache) {
-                            return BuildCompilationCache(ctx, cache);
-                          }));
-  // Hold the reference to the JIT during evaluation. (We could probably
-  // free it sooner because the ResourceMgr will retain a reference, but
-  // this is more obviously correct.)
-  core::ScopedUnref cache_ref(cache);
-
-  std::map<int, OptionalTensor> variables =
-      SnapshotResourceVariables(ctx, resources_);
-
-  xla::LocalClient* client = static_cast<xla::LocalClient*>(cache->client());
-
-  XlaAllocator local_xla_allocator(client->backend().platform(),
-                                   ctx->device()->GetAllocator({}));
-  xla::DeviceMemoryAllocator* xla_allocator;
-  // If we are on an XlaDevice, use the underlying XLA platform's allocator
-  // directly. We could use the StreamExecutor's allocator which may
-  // theoretically be more correct, but XLA returns a nice OOM message in a
-  // Status and StreamExecutor does not.
-  //
-  // Importantly we can't use ctx->device()->GetAllocator() as the allocator
-  // (which local_xla_allocator above uses) as on an XlaDevice, this is a
-  // dummy allocator that returns XlaTensor objects. The XlaCompiler needs a
-  // real allocator to allocate real buffers.
-  if (xla_device_metadata_) {
-    xla_allocator = client->backend().memory_allocator();
-  } else {
-    xla_allocator = &local_xla_allocator;
-  }
-
-  XlaCompiler::Options options;
-  options.client = client;
-  if (ctx->op_device_context() != nullptr) {
-    options.device_ordinal =
-        ctx->op_device_context()->stream()->parent()->device_ordinal();
-  }
-  options.device_type = cache->device_type();
-  options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition();
-  options.graph_def_version = ctx->function_library()->graph_def_version();
-  options.allow_cpu_custom_calls = (platform_id_ == se::host::kHostPlatformId);
-  options.device_allocator = xla_allocator;
-  if (xla_device_metadata_) {
-    options.shape_representation_fn =
-        xla_device_metadata_->shape_representation_fn();
-  }
-
-  const XlaCompiler::CompilationResult* kernel;
-  xla::LocalExecutable* executable;
-
-  std::map<int, Tensor> constant_args;
-  for (int i : constants_) {
-    constant_args.insert({i, ctx->input(i)});
-  }
-  XlaCompiler::CompileOptions compile_options;
-  compile_options.is_entry_computation = true;
-  // If we resolve constants we never emit them on the device, meaning that if
-  // they are needed by a following computation the host has to transfer
-  // them. Not resolving constants is expected to be faster than resolving
-  // constants.
-  compile_options.resolve_compile_time_constants = true;
-  // Optimization: where possible, have the computation return a naked array
-  // rather than a one-element tuple.
-  compile_options.always_return_tuple = false;
-
-  OP_REQUIRES_OK(
-      ctx, cache->Compile(options, function_, constant_args, variables, ctx,
-                          &kernel, &executable, compile_options));
-
-  VLOG(1) << "Executing XLA Computation...";
-
-  XlaComputationLaunchContext launch_context(
-      client, xla_allocator,
-      /*allocate_xla_tensors=*/xla_device_metadata_ != nullptr,
-      use_multiple_streams_);
-  launch_context.PopulateInputs(ctx, kernel, variables);
-
-  // Execute the computation.
-  VLOG(2) << "Executing computation.";
-  xla::ExecutableRunOptions run_options;
-  run_options.set_stream(stream);
-  run_options.set_allocator(xla_allocator);
-  run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device());
-  run_options.set_rng_seed(GetXLARandomSeed());
-  Env* env = Env::Default();
-  auto start_time = env->NowMicros();
-
-  auto run_result = executable->Run(launch_context.arguments(), run_options);
-  OP_REQUIRES(ctx, run_result.ok(), run_result.status());
-
-  auto elapsed = env->NowMicros() - start_time;
-  VLOG(2) << "Elapsed time: " << elapsed << "us";
-
-  OP_REQUIRES_OK(ctx, launch_context.PopulateOutputs(
-                          ctx, kernel, run_result.ConsumeValueOrDie()));
-  VLOG(1) << "Done";
-}
-
-namespace {
-
-// OP_REQUIRES_OK_RETURN is the same as OP_REQUIRES_OK except that
-// in error case, it returns RET instead of void.
-#define OP_REQUIRES_OK_RETURN(CTX, RET, ...)                \
-  do {                                                      \
-    ::tensorflow::Status _s(__VA_ARGS__);                   \
-    if (!TF_PREDICT_TRUE(_s.ok())) {                        \
-      (CTX)->CtxFailureWithWarning(__FILE__, __LINE__, _s); \
-      return RET;                                           \
-    }                                                       \
-  } while (0)
-
-// Helper static functions to construct parameters for
-// XlaLocalLaunchBase constructor from OpKernelConstruction.
-std::vector<int> ConstantsVector(OpKernelConstruction* ctx) {
-  DataTypeVector constant_types;
-  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
-                        ctx->GetAttr("Tconstants", &constant_types));
-  std::vector<int> constants(constant_types.size());
-  std::iota(constants.begin(), constants.end(), 0);
-  return constants;
-}
-
-std::vector<int> ResourcesVector(OpKernelConstruction* ctx) {
-  DataTypeVector constant_types;
-  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
-                        ctx->GetAttr("Tconstants", &constant_types));
-
-  DataTypeVector arg_types;
-  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
-                        ctx->GetAttr("Targs", &arg_types));
-
-  int num_resources;
-  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
-                        ctx->GetAttr("Nresources", &num_resources));
-
-  std::vector<int> resources(num_resources);
-  std::iota(resources.begin(), resources.end(),
-            constant_types.size() + arg_types.size());
-  return resources;
-}
-
-NameAttrList FunctionAttr(OpKernelConstruction* ctx) {
-  const NameAttrList* func;
-  OP_REQUIRES_OK_RETURN(ctx, NameAttrList(), ctx->GetAttr("function", &func));
-  return *func;
-}
-
-#undef OP_REQUIRES_OK_RETURN
-}  // namespace
-
-XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx)
-    : XlaLocalLaunchBase(ctx, ConstantsVector(ctx), ResourcesVector(ctx),
-                         FunctionAttr(ctx)) {}
-
-XlaLocalLaunchOp::~XlaLocalLaunchOp() {
-  VLOG(1) << "XlaLocalLaunchOp destroyed";
-}
-
-REGISTER_KERNEL_BUILDER(Name("XlaLaunch").Device(DEVICE_CPU), XlaLocalLaunchOp);
-
-REGISTER_KERNEL_BUILDER(Name("XlaLaunch")
-                            .Device(DEVICE_GPU)
-                            .HostMemory("constants")
-                            .HostMemory("resources"),
-                        XlaLocalLaunchOp);
-
-}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.h b/tensorflow/compiler/jit/kernels/xla_launch_op.h
deleted file mode 100644
index e0f10e9817..0000000000
--- a/tensorflow/compiler/jit/kernels/xla_launch_op.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_JIT_KERNELS_XLA_LAUNCH_OP_H_
-#define TENSORFLOW_COMPILER_JIT_KERNELS_XLA_LAUNCH_OP_H_
-
-#include "tensorflow/compiler/jit/xla_compilation_cache.h"
-#include "tensorflow/compiler/jit/xla_device.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/util/stream_executor_util.h"
-
-namespace tensorflow {
-
-// XlaLocalLaunchBase is almost the same as XlaLocalLaunchOp.
-// The only difference is that it does not require arguments to follow
-// the "constants, then regular args, then resources" order.
-// It takes vectors of constant and resource arguments explicitly.
-// It does not have corresponding OpDef because it is never present
-// in the GraphDef.
-// Currently, it is used by eager runtime. FunctionLibraryRuntime creates
-// this kernel when asked to create a kernel for an XLA-compiled function.
-class XlaLocalLaunchBase : public OpKernel {
- public:
-  XlaLocalLaunchBase(OpKernelConstruction* ctx,
-                     const std::vector<int>& constants,
-                     const std::vector<int>& resources,
-                     const NameAttrList& function);
-  XlaLocalLaunchBase(const XlaLocalLaunchBase&) = delete;
-  XlaLocalLaunchBase& operator=(const XlaLocalLaunchBase&) = delete;
-  ~XlaLocalLaunchBase() override = default;
-
-  void Compute(OpKernelContext* ctx) override;
-
- protected:
-  // Builds a XlaCompilationCache class suitable for the current device.
-  Status BuildCompilationCache(OpKernelContext* ctx,
-                               XlaCompilationCache** cache);
-
-  // Indexes of compile-time constant inputs
-  std::vector<int> constants_;
-  // Indexes of resource inputs
-  std::vector<int> resources_;
-
-  DeviceType device_type_;
-  NameAttrList function_;
-  se::Platform::Id platform_id_ = nullptr;
-  bool use_multiple_streams_ = false;
-  const XlaDevice::Metadata* xla_device_metadata_ = nullptr;
-};
-
-// XlaLocalLaunchOp is used to replace a region of the TensorFlow graph
-// which will be compiled and executed using XLA.  The XlaLocalLaunchOp is
-// responsible for handling interactions with the TensorFlow executor.
-// Once all inputs are present, and their shapes are known, the op can
-// use a 'XlaCompilationCache' to compile and execute code which is specific
-// to the shapes of input Tensors.
-// XlaLocalLaunchOp uses xla::LocalClient::Compile() and
-// xla::LocalExecutable::Run(), and passes arguments into/out of XLA in device
-// memory.
-class XlaLocalLaunchOp : public XlaLocalLaunchBase {
- public:
-  explicit XlaLocalLaunchOp(OpKernelConstruction* ctx);
-  ~XlaLocalLaunchOp() override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(XlaLocalLaunchOp);
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_COMPILER_JIT_KERNELS_XLA_LAUNCH_OP_H_
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
new file mode 100644
index 0000000000..c483841a7c
--- /dev/null
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -0,0 +1,488 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/kernels/xla_ops.h"
+
+#include "absl/memory/memory.h"
+#include "tensorflow/compiler/jit/defs.h"
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/tf2xla_util.h"
+#include "tensorflow/compiler/tf2xla/xla_compiler.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/common_runtime/dma_helper.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/variable_ops.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/core/util/stream_executor_util.h"
+
+namespace tensorflow {
+
+namespace {
+
+Status PlatformInfoFromContext(OpKernelConstruction* ctx,
+                               XlaPlatformInfo* result) {
+  DeviceType device_type = ctx->device_type();
+  se::Platform::Id platform_id = nullptr;
+  const XlaDevice::Metadata* xla_device_metadata = nullptr;
+  std::unique_ptr<XlaAllocator> xla_allocator;
+  xla::DeviceMemoryAllocator* device_allocator = nullptr;
+
+  if (ctx->device_type() == DeviceType(DEVICE_CPU)) {
+    platform_id = se::host::kHostPlatformId;
+  } else if (ctx->device_type() == DeviceType(DEVICE_GPU)) {
+    platform_id = ctx->device()
+                      ->tensorflow_gpu_device_info()
+                      ->stream->parent()
+                      ->platform()
+                      ->id();
+  } else if (XlaDevice::GetMetadata(ctx, &xla_device_metadata).ok()) {
+    // If we are on an XlaDevice, use the underlying XLA platform's allocator
+    // directly. We could use the StreamExecutor's allocator which may
+    // theoretically be more correct, but XLA returns a nice OOM message in a
+    // Status and StreamExecutor does not.
+    //
+    // Importantly we can't use ctx->device()->GetAllocator() as the allocator
+    // (which xla_allocator above uses) as on an XlaDevice, this is a dummy
+    // allocator that returns XlaTensor objects. The XlaCompiler needs a real
+    // allocator to allocate real buffers.
+
+    platform_id = xla_device_metadata->platform()->id();
+    device_allocator =
+        xla_device_metadata->client()->backend().memory_allocator();
+  }
+
+  if (!device_allocator) {
+    TF_ASSIGN_OR_RETURN(se::Platform* const platform,
+                        se::MultiPlatformManager::PlatformWithId(platform_id));
+    xla_allocator = absl::make_unique<XlaAllocator>(
+        platform, ctx->device()->GetAllocator({}));
+  }
+
+  *result = XlaPlatformInfo(device_type, platform_id, xla_device_metadata,
+                            std::move(xla_allocator), device_allocator);
+
+  return Status::OK();
+}
+
+// A closure describing how to run a compiled version of a TensorFlow function.
+//
+// It may seem unusual to stick the resource variable snapshots in this class.
+// This is necessary: we need to use the snapshots observed by the compiler as
+// the initial values for the resource variables (and cannot snapshot them again
+// during execution) because otherwise we risk observing a different snapshot
+// with shapes different from what we compiled for.
+class XlaExecutableClosure {
+ public:
+  explicit XlaExecutableClosure(
+      xla::LocalClient* client, xla::LocalExecutable* executable,
+      const XlaCompiler::CompilationResult* compilation_result,
+      std::map<int, OptionalTensor> resource_var_snapshots)
+      : client_(client),
+        executable_(executable),
+        compilation_result_(compilation_result),
+        resource_var_snapshots_(std::move(resource_var_snapshots)) {}
+
+  XlaExecutableClosure(XlaExecutableClosure&&) = default;
+  XlaExecutableClosure& operator=(XlaExecutableClosure&&) = default;
+
+  xla::LocalClient* client() const { return client_; }
+  xla::LocalExecutable* executable() const { return executable_; }
+  const XlaCompiler::CompilationResult* compilation_result() const {
+    return compilation_result_;
+  }
+  const std::map<int, OptionalTensor>& resource_var_snapshots() const {
+    return resource_var_snapshots_;
+  }
+
+ private:
+  xla::LocalClient* client_;
+  xla::LocalExecutable* executable_;
+  const XlaCompiler::CompilationResult* compilation_result_;
+  std::map<int, OptionalTensor> resource_var_snapshots_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosure);
+};
+
+// This maintains a mapping from a globally unique ID to XlaExecutableClosure
+// instances.
+class XlaExecutableClosureStore {
+ public:
+  XlaExecutableClosureStore() : key_counter_(0) {}
+
+  using KeyT = string;
+
+  KeyT Produce(XlaExecutableClosure result) {
+    mutex_lock l(mutex_);
+    KeyT key = absl::StrCat(key_counter_++);
+    bool insert_successful = closures_.emplace(key, std::move(result)).second;
+    DCHECK(insert_successful);
+    (void)insert_successful;
+    return key;
+  }
+
+  XlaExecutableClosure Consume(const KeyT& key) {
+    mutex_lock l(mutex_);
+    auto it = closures_.find(key);
+    DCHECK(it != closures_.end());
+    XlaExecutableClosure value = std::move(it->second);
+    closures_.erase(it);
+    return value;
+  }
+
+  static XlaExecutableClosureStore* Global() {
+    static XlaExecutableClosureStore* instance = new XlaExecutableClosureStore;
+    return instance;
+  }
+
+ private:
+  mutex mutex_;
+  int64 key_counter_ GUARDED_BY(mutex_);
+  gtl::FlatMap<KeyT, XlaExecutableClosure> closures_ GUARDED_BY(mutex_);
+
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosureStore);
+};
+
+}  // namespace
+
+XlaLocalLaunchBase::XlaLocalLaunchBase(OpKernelConstruction* ctx,
+                                       const std::vector<int>& constants,
+                                       const std::vector<int>& resources,
+                                       const NameAttrList& function)
+    : OpKernel(ctx),
+      constants_(constants),
+      resources_(resources),
+      function_(function) {
+  OP_REQUIRES_OK(ctx, PlatformInfoFromContext(ctx, &platform_info_));
+}
+
+static Status BuildCompilationCache(OpKernelContext* ctx,
+                                    const XlaPlatformInfo& platform_info,
+                                    XlaCompilationCache** cache) {
+  if (platform_info.xla_device_metadata()) {
+    *cache = new XlaCompilationCache(
+        platform_info.xla_device_metadata()->client(),
+        platform_info.xla_device_metadata()->jit_device_type());
+    return Status::OK();
+  }
+
+  auto platform =
+      se::MultiPlatformManager::PlatformWithId(platform_info.platform_id());
+  if (!platform.ok()) {
+    return platform.status();
+  }
+  xla::LocalClientOptions client_options;
+  client_options.set_platform(platform.ValueOrDie());
+  client_options.set_intra_op_parallelism_threads(
+      ctx->device()->tensorflow_cpu_worker_threads()->num_threads);
+  auto client = xla::ClientLibrary::GetOrCreateLocalClient(client_options);
+  if (!client.ok()) {
+    return client.status();
+  }
+  const XlaOpRegistry::DeviceRegistration* registration;
+  if (!XlaOpRegistry::GetCompilationDevice(platform_info.device_type().type(),
+                                           &registration)) {
+    return errors::InvalidArgument("No JIT device registered for ",
+                                   platform_info.device_type().type());
+  }
+  *cache = new XlaCompilationCache(
+      client.ValueOrDie(), DeviceType(registration->compilation_device_name));
+  return Status::OK();
+}
+
+static Status CompileToLocalExecutable(
+    OpKernelContext* ctx, const NameAttrList& function,
+    const XlaPlatformInfo& platform_info, absl::Span<const int> resources,
+    absl::Span<const int> constants, xla::LocalClient** client,
+    std::map<int, OptionalTensor>* variables,
+    const XlaCompiler::CompilationResult** kernel,
+    xla::LocalExecutable** executable) {
+  // We store information about the JIT-compiled XLA computation
+  // in the ResourceMgr.
+  ResourceMgr* rm = ctx->resource_manager();
+  if (!rm) {
+    return errors::Internal("No resource manager.");
+  }
+
+  XlaCompilationCache* cache;
+  TF_RETURN_IF_ERROR(rm->LookupOrCreate<XlaCompilationCache>(
+      rm->default_container(), "xla_cache", &cache,
+      [&](XlaCompilationCache** cache) {
+        return BuildCompilationCache(ctx, platform_info, cache);
+      }));
+  // Hold the reference to the JIT during evaluation. (We could probably
+  // free it sooner because the ResourceMgr will retain a reference, but
+  // this is more obviously correct.)
+  core::ScopedUnref cache_ref(cache);
+
+  *variables = SnapshotResourceVariables(ctx, resources);
+  *client = static_cast<xla::LocalClient*>(cache->client());
+
+  XlaCompiler::Options options;
+  options.client = *client;
+  if (ctx->op_device_context() != nullptr) {
+    options.device_ordinal =
+        ctx->op_device_context()->stream()->parent()->device_ordinal();
+  }
+  options.device_type = cache->device_type();
+  options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition();
+  options.graph_def_version = ctx->function_library()->graph_def_version();
+  options.allow_cpu_custom_calls =
+      (platform_info.platform_id() == se::host::kHostPlatformId);
+  options.device_allocator = platform_info.allocator();
+  if (platform_info.xla_device_metadata()) {
+    options.shape_representation_fn =
+        platform_info.xla_device_metadata()->shape_representation_fn();
+  }
+
+  std::map<int, Tensor> constant_args;
+  for (int i : constants) {
+    constant_args.insert({i, ctx->input(i)});
+  }
+  XlaCompiler::CompileOptions compile_options;
+  compile_options.is_entry_computation = true;
+  // If we resolve constants we never emit them on the device, meaning that if
+  // they are needed by a following computation the host has to transfer
+  // them. Not resolving constants is expected to be faster than resolving
+  // constants.
+  compile_options.resolve_compile_time_constants = true;
+  // Optimization: where possible, have the computation return a naked array
+  // rather than a one-element tuple.
+  compile_options.always_return_tuple = false;
+
+  return cache->Compile(options, function, constant_args, *variables, ctx,
+                        kernel, executable, compile_options);
+}
+
+void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
+  VLOG(1) << "XlaLocalLaunchOpBase::Compute "
+          << Canonicalize(function_.name(), AttrSlice(&function_.attr()));
+
+  xla::LocalClient* client;
+  const XlaCompiler::CompilationResult* kernel;
+  xla::LocalExecutable* executable;
+  std::map<int, OptionalTensor> variables;
+
+  OP_REQUIRES_OK(
+      ctx, CompileToLocalExecutable(ctx, function_, platform_info_, resources_,
+                                    constants_, &client, &variables, &kernel,
+                                    &executable));
+
+  se::Stream* stream =
+      ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
+
+  VLOG(1) << "Executing XLA Computation...";
+
+  XlaComputationLaunchContext launch_context(
+      client, platform_info_.allocator(),
+      /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(),
+      platform_info_.UseMultipleStreams());
+  launch_context.PopulateInputs(ctx, kernel, variables);
+
+  // Execute the computation.
+  VLOG(2) << "Executing computation.";
+  xla::ExecutableRunOptions run_options;
+  run_options.set_stream(stream);
+  run_options.set_allocator(platform_info_.allocator());
+  run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device());
+  run_options.set_rng_seed(GetXLARandomSeed());
+  Env* env = Env::Default();
+  auto start_time = env->NowMicros();
+
+  auto run_result = executable->Run(launch_context.arguments(), run_options);
+  OP_REQUIRES(ctx, run_result.ok(), run_result.status());
+
+  auto elapsed = env->NowMicros() - start_time;
+  VLOG(2) << "Elapsed time: " << elapsed << "us";
+
+  OP_REQUIRES_OK(ctx, launch_context.PopulateOutputs(
+                          ctx, kernel, run_result.ConsumeValueOrDie()));
+  VLOG(1) << "Done";
+}
+
+namespace {
+
+// OP_REQUIRES_OK_RETURN is the same as OP_REQUIRES_OK except that
+// in error case, it returns RET instead of void.
+#define OP_REQUIRES_OK_RETURN(CTX, RET, ...)                \
+  do {                                                      \
+    ::tensorflow::Status _s(__VA_ARGS__);                   \
+    if (!TF_PREDICT_TRUE(_s.ok())) {                        \
+      (CTX)->CtxFailureWithWarning(__FILE__, __LINE__, _s); \
+      return RET;                                           \
+    }                                                       \
+  } while (0)
+
+// Helper static functions to construct parameters for
+// XlaLocalLaunchBase constructor from OpKernelConstruction.
+std::vector<int> ConstantsVector(OpKernelConstruction* ctx) {
+  DataTypeVector constant_types;
+  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
+                        ctx->GetAttr("Tconstants", &constant_types));
+  std::vector<int> constants(constant_types.size());
+  std::iota(constants.begin(), constants.end(), 0);
+  return constants;
+}
+
+std::vector<int> ResourcesVector(OpKernelConstruction* ctx) {
+  DataTypeVector constant_types;
+  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
+                        ctx->GetAttr("Tconstants", &constant_types));
+
+  DataTypeVector arg_types;
+  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
+                        ctx->GetAttr("Targs", &arg_types));
+
+  int num_resources;
+  OP_REQUIRES_OK_RETURN(ctx, std::vector<int>(),
+                        ctx->GetAttr("Nresources", &num_resources));
+
+  std::vector<int> resources(num_resources);
+  std::iota(resources.begin(), resources.end(),
+            constant_types.size() + arg_types.size());
+  return resources;
+}
+
+NameAttrList FunctionAttr(OpKernelConstruction* ctx) {
+  const NameAttrList* func;
+  OP_REQUIRES_OK_RETURN(ctx, NameAttrList(), ctx->GetAttr("function", &func));
+  return *func;
+}
+
+#undef OP_REQUIRES_OK_RETURN
+}  // namespace
+
+XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx)
+    : XlaLocalLaunchBase(ctx, ConstantsVector(ctx), ResourcesVector(ctx),
+                         FunctionAttr(ctx)) {}
+
+XlaLocalLaunchOp::~XlaLocalLaunchOp() {
+  VLOG(1) << "XlaLocalLaunchOp destroyed";
+}
+
+XlaCompileOp::XlaCompileOp(OpKernelConstruction* ctx)
+    : OpKernel(ctx),
+      constants_(ConstantsVector(ctx)),
+      resources_(ResourcesVector(ctx)),
+      function_(FunctionAttr(ctx)) {
+  OP_REQUIRES_OK(ctx, PlatformInfoFromContext(ctx, &platform_info_));
+}
+
+void XlaCompileOp::Compute(OpKernelContext* ctx) {
+  xla::LocalClient* client;
+  const XlaCompiler::CompilationResult* kernel;
+  xla::LocalExecutable* executable;
+  std::map<int, OptionalTensor> variables;
+
+  OP_REQUIRES_OK(
+      ctx, CompileToLocalExecutable(ctx, function_, platform_info_, resources_,
+                                    constants_, &client, &variables, &kernel,
+                                    &executable));
+
+  // Each execution of an XlaCompile op creates a new XlaExecutableClosure, even
+  // if it didn't have to compile the cluster because of a compilation-cache
+  // hit.  This is because we at least need new snapshots of the resource
+  // variables.
+  XlaExecutableClosureStore::KeyT key =
+      XlaExecutableClosureStore::Global()->Produce(XlaExecutableClosure(
+          client, executable, kernel, std::move(variables)));
+
+  Allocator* cpu_allocator = [&] {
+    AllocatorAttributes host_alloc_attrs;
+    host_alloc_attrs.set_gpu_compatible(true);
+    host_alloc_attrs.set_on_host(true);
+    return ctx->device()->GetAllocator(host_alloc_attrs);
+  }();
+
+  Tensor compilation_key(cpu_allocator, DT_STRING, TensorShape({}));
+  compilation_key.flat<string>()(0) = key;
+
+  Tensor compilation_successful(cpu_allocator, DT_BOOL, TensorShape({}));
+  compilation_successful.flat<bool>()(0) = true;
+
+  ctx->set_output(0, compilation_key);
+  ctx->set_output(1, compilation_successful);
+}
+
+XlaRunOp::XlaRunOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+  OP_REQUIRES_OK(ctx, PlatformInfoFromContext(ctx, &platform_info_));
+}
+
+void XlaRunOp::Compute(OpKernelContext* ctx) {
+  Tensor key_tensor = ctx->input(ctx->num_inputs() - 1);
+  const XlaExecutableClosureStore::KeyT& key = key_tensor.flat<string>()(0);
+
+  XlaExecutableClosure closure =
+      XlaExecutableClosureStore::Global()->Consume(key);
+
+  XlaComputationLaunchContext launch_context(
+      closure.client(), platform_info_.allocator(),
+      /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(),
+      /*use_multiple_streams=*/platform_info_.UseMultipleStreams());
+  launch_context.PopulateInputs(ctx, closure.compilation_result(),
+                                closure.resource_var_snapshots());
+
+  se::Stream* stream =
+      ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
+  xla::ExecutableRunOptions run_options;
+  run_options.set_stream(stream);
+  run_options.set_allocator(platform_info_.allocator());
+  run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device());
+  run_options.set_rng_seed(GetXLARandomSeed());
+  Env* env = Env::Default();
+  auto start_time = env->NowMicros();
+
+  auto run_result =
+      closure.executable()->Run(launch_context.arguments(), run_options);
+  OP_REQUIRES(ctx, run_result.ok(), run_result.status());
+
+  auto elapsed = env->NowMicros() - start_time;
+  VLOG(2) << "Elapsed time in computation: " << elapsed << "us";
+
+  OP_REQUIRES_OK(
+      ctx, launch_context.PopulateOutputs(ctx, closure.compilation_result(),
+                                          run_result.ConsumeValueOrDie()));
+}
+
+REGISTER_KERNEL_BUILDER(Name("XlaLaunch").Device(DEVICE_CPU), XlaLocalLaunchOp);
+
+REGISTER_KERNEL_BUILDER(Name("XlaLaunch")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("constants")
+                            .HostMemory("resources"),
+                        XlaLocalLaunchOp);
+
+REGISTER_KERNEL_BUILDER(Name("_XlaCompile").Device(DEVICE_CPU), XlaCompileOp);
+REGISTER_KERNEL_BUILDER(Name("_XlaCompile")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("constants")
+                            .HostMemory("resources"),
+                        XlaCompileOp);
+
+REGISTER_KERNEL_BUILDER(Name("_XlaRun").Device(DEVICE_CPU), XlaRunOp);
+
+REGISTER_KERNEL_BUILDER(
+    Name("_XlaRun").Device(DEVICE_GPU).HostMemory("constants"), XlaRunOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.h b/tensorflow/compiler/jit/kernels/xla_ops.h
new file mode 100644
index 0000000000..489d26eb30
--- /dev/null
+++ b/tensorflow/compiler/jit/kernels/xla_ops.h
@@ -0,0 +1,168 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_JIT_KERNELS_XLA_OPS_H_
+#define TENSORFLOW_COMPILER_JIT_KERNELS_XLA_OPS_H_
+
+#include "tensorflow/compiler/jit/xla_compilation_cache.h"
+#include "tensorflow/compiler/jit/xla_device.h"
+#include "tensorflow/compiler/jit/xla_launch_util.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/util/stream_executor_util.h"
+
+namespace tensorflow {
+
+// Holds some information about the platform on which an
+// XlaLaunch/_XlaCompile/_XlaRun op must run on.
+class XlaPlatformInfo {
+ public:
+  XlaPlatformInfo() : device_type_("") {}
+  explicit XlaPlatformInfo(const DeviceType device_type,
+                           se::Platform::Id platform_id,
+                           const XlaDevice::Metadata* xla_device_metadata,
+                           std::unique_ptr<XlaAllocator> xla_allocator,
+                           xla::DeviceMemoryAllocator* device_allocator)
+      : device_type_(device_type),
+        platform_id_(platform_id),
+        xla_device_metadata_(xla_device_metadata),
+        xla_allocator_(std::move(xla_allocator)),
+        device_allocator_(device_allocator) {
+    CHECK((device_allocator_ != nullptr) ^ (xla_allocator_.get() != nullptr));
+  }
+
+  XlaPlatformInfo& operator=(XlaPlatformInfo&& other) = default;
+
+  bool UseMultipleStreams() const {
+    return xla_device_metadata_ && xla_device_metadata_->UseMultipleStreams();
+  }
+
+  xla::DeviceMemoryAllocator* allocator() const {
+    return device_allocator_ ? device_allocator_ : xla_allocator_.get();
+  }
+  DeviceType device_type() const { return device_type_; }
+
+  // This is equal to xla_device_metadata()->platform()->id() if
+  // xla_device_metadata() is not nullptr.
+  se::Platform::Id platform_id() const { return platform_id_; }
+
+  // This may be null if the op this XlaPlatformInfo is for was not placed on an
+  // XLA device.
+  const XlaDevice::Metadata* xla_device_metadata() const {
+    return xla_device_metadata_;
+  }
+  bool is_on_xla_device() const { return xla_device_metadata() != nullptr; }
+
+ private:
+  DeviceType device_type_;
+  se::Platform::Id platform_id_;
+
+  // xla_device_metadata_ lives in the tensorflow::DeviceBase in which the
+  // XlaLaunch/_XlaCompile/_XlaRun op is placed and thus does not die before the
+  // XlaLaunch/_XlaCompile/_XlaRun OpKernel.
+  const XlaDevice::Metadata* xla_device_metadata_;
+
+  // If the op associated with this XlaPlatformInfo is placed on an XLA device
+  // then device_allocator_ is the xla::Backend's memory allocator and
+  // xla_allocator_ is null.  If the op is placed on a regular CPU or GPU device
+  // then device_allocator_ is null and xla_allocator_ points to an appropriate
+  // XlaAllocator instance.
+  std::unique_ptr<XlaAllocator> xla_allocator_;
+  xla::DeviceMemoryAllocator* device_allocator_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaPlatformInfo);
+};
+
+// XlaLocalLaunchBase is almost the same as XlaLocalLaunchOp.
+// The only difference is that it does not require arguments to follow
+// the "constants, then regular args, then resources" order.
+// It takes vectors of constant and resource arguments explicitly.
+// It does not have corresponding OpDef because it is never present
+// in the GraphDef.
+// Currently, it is used by eager runtime. FunctionLibraryRuntime creates
+// this kernel when asked to create a kernel for an XLA-compiled function.
+class XlaLocalLaunchBase : public OpKernel {
+ public:
+  XlaLocalLaunchBase(OpKernelConstruction* ctx,
+                     const std::vector<int>& constants,
+                     const std::vector<int>& resources,
+                     const NameAttrList& function);
+  XlaLocalLaunchBase(const XlaLocalLaunchBase&) = delete;
+  XlaLocalLaunchBase& operator=(const XlaLocalLaunchBase&) = delete;
+  ~XlaLocalLaunchBase() override = default;
+
+  void Compute(OpKernelContext* ctx) override;
+
+ protected:
+  // Indexes of compile-time constant inputs
+  std::vector<int> constants_;
+  // Indexes of resource inputs
+  std::vector<int> resources_;
+
+  NameAttrList function_;
+  XlaPlatformInfo platform_info_;
+};
+
+// XlaLocalLaunchOp is used to replace a region of the TensorFlow graph
+// which will be compiled and executed using XLA.  The XlaLocalLaunchOp is
+// responsible for handling interactions with the TensorFlow executor.
+// Once all inputs are present, and their shapes are known, the op can
+// use a 'XlaCompilationCache' to compile and execute code which is specific
+// to the shapes of input Tensors.
+// XlaLocalLaunchOp uses xla::LocalClient::Compile() and
+// xla::LocalExecutable::Run(), and passes arguments into/out of XLA in device
+// memory.
+class XlaLocalLaunchOp : public XlaLocalLaunchBase {
+ public:
+  explicit XlaLocalLaunchOp(OpKernelConstruction* ctx);
+  ~XlaLocalLaunchOp() override;
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaLocalLaunchOp);
+};
+
+class XlaCompileOp : public OpKernel {
+ public:
+  explicit XlaCompileOp(OpKernelConstruction* ctx);
+
+  void Compute(OpKernelContext* ctx) override;
+
+ private:
+  // Indexes of compile-time constant inputs
+  std::vector<int> constants_;
+  // Indexes of resource inputs
+  std::vector<int> resources_;
+
+  NameAttrList function_;
+
+  XlaPlatformInfo platform_info_;
+};
+
+class XlaRunOp : public OpKernel {
+ public:
+  explicit XlaRunOp(OpKernelConstruction* ctx);
+
+  void Compute(OpKernelContext* ctx) override;
+
+ private:
+  XlaPlatformInfo platform_info_;
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_JIT_KERNELS_XLA_LAUNCH_OP_H_
diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc
index 1a29c3caab..6b4cdaa1c1 100644
--- a/tensorflow/compiler/jit/ops/xla_ops.cc
+++ b/tensorflow/compiler/jit/ops/xla_ops.cc
@@ -51,4 +51,47 @@ REGISTER_OP("XlaClusterOutput")
         "Operator that connects the output of an XLA computation to other "
         "consumer graph nodes.");
 
+REGISTER_OP("_XlaCompile")
+    .Input("constants: Tconstants")
+    .Attr("Tconstants: list(type) >= 0")
+    .Input("args: Targs")
+    .Attr("Targs: list(type) >= 0")
+    .Input("resources: Nresources * resource")
+    .Attr("Nresources: int >= 0")
+    .Output("key: string")
+    .Output("compilation_successful: bool")
+    .Attr("function: func")
+    // The compilation cache is stateful.
+    .SetIsStateful()
+    .Doc(R"(XLA Compile Op. For use by the XLA JIT only.
+
+Compiles a TensorFlow function into an XLA LocalExecutable and returns a key
+that _XlaRun can use to look up the LocalExecutable and execute it.
+
+key: A key that can be used to look up the local executable compiled by the
+   node and associated metadata.
+
+compilation_successful: True iff the compilation was successful.  Always true
+for now.
+)");
+
+REGISTER_OP("_XlaRun")
+    // TODO(sanjoy): We don't need constants and Tconstants and they should be
+    // removed.
+    .Input("constants: Tconstants")
+    .Attr("Tconstants: list(type) >= 0")
+    .Input("args: Targs")
+    .Attr("Targs: list(type) >= 0")
+    .Output("results: Tresults")
+    .Attr("Tresults: list(type) >= 0")
+    .Input("key: string")
+    // XLA random-number generation ops are stateful.
+    // TODO(phawkins): create stateful and non-stateful variants of _XlaRun.
+    .SetIsStateful()
+    .Doc(R"(XLA Run Op. For use by the XLA JIT only.
+
+Executes a TensorFlow function previously compiled into a LocalExecutable by an
+_XlaCompile op.
+)");
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc
index e26fa27b31..003c1d8081 100644
--- a/tensorflow/compiler/jit/xla_cpu_device.cc
+++ b/tensorflow/compiler/jit/xla_cpu_device.cc
@@ -16,7 +16,7 @@ limitations under the License.
 // Registers the XLA_CPU device, which is an XlaDevice instantiation that runs
 // operators using XLA via the XLA "Host" (CPU) backend.
 
-#include "tensorflow/compiler/jit/kernels/xla_launch_op.h"
+#include "tensorflow/compiler/jit/kernels/xla_ops.h"
 #include "tensorflow/compiler/jit/legacy_flags/xla_device_flags.h"
 #include "tensorflow/compiler/jit/xla_compile_on_demand_op.h"
 #include "tensorflow/compiler/jit/xla_device.h"
@@ -70,6 +70,9 @@ constexpr std::array<DataType, 12> kAllXlaCpuTypes = {
      DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}};
 
 REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_CPU, XlaLocalLaunchOp, kAllXlaCpuTypes);
+REGISTER_XLA_COMPILE_KERNEL(DEVICE_XLA_CPU, XlaCompileOp, kAllXlaCpuTypes);
+REGISTER_XLA_RUN_KERNEL(DEVICE_XLA_CPU, XlaRunOp, kAllXlaCpuTypes);
+
 REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_CPU, kAllXlaCpuTypes);
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 49c8582682..639243973c 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -65,6 +65,17 @@ class XlaAssignVariableOp : public AsyncOpKernel {
                               .HostMemory("resources"),   \
                           KERNEL);
 
+#define REGISTER_XLA_COMPILE_KERNEL(DEVICE, KERNEL, TYPES) \
+  REGISTER_KERNEL_BUILDER(Name("_XlaCompile")              \
+                              .Device(DEVICE)              \
+                              .HostMemory("constants")     \
+                              .HostMemory("resources"),    \
+                          KERNEL);
+
+#define REGISTER_XLA_RUN_KERNEL(DEVICE, KERNEL, TYPES) \
+  REGISTER_KERNEL_BUILDER(                             \
+      Name("_XlaRun").Device(DEVICE).HostMemory("constants"), KERNEL);
+
 #define REGISTER_XLA_DEVICE_KERNELS(DEVICE, TYPES)                             \
   REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE), SendOp);               \
   REGISTER_KERNEL_BUILDER(Name("_Recv").Device(DEVICE), RecvOp);               \
diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index c386984930..60979556a3 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -16,7 +16,7 @@ limitations under the License.
 // Registers the XLA_GPU device, which is an XlaDevice instantiation that runs
 // operators using XLA via the XLA "CUDA" (GPU) backend.
 
-#include "tensorflow/compiler/jit/kernels/xla_launch_op.h"
+#include "tensorflow/compiler/jit/kernels/xla_ops.h"
 #include "tensorflow/compiler/jit/xla_device.h"
 #include "tensorflow/compiler/jit/xla_device_ops.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -79,6 +79,9 @@ constexpr std::array<DataType, 13> kAllXlaGpuTypes = {
      DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}};
 
 REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_GPU, XlaLocalLaunchOp, kAllXlaGpuTypes);
+REGISTER_XLA_COMPILE_KERNEL(DEVICE_XLA_GPU, XlaCompileOp, kAllXlaGpuTypes);
+REGISTER_XLA_RUN_KERNEL(DEVICE_XLA_GPU, XlaRunOp, kAllXlaGpuTypes);
+
 REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_GPU, kAllXlaGpuTypes);
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc
index 4574559674..19e681af0c 100644
--- a/tensorflow/compiler/jit/xla_interpreter_device.cc
+++ b/tensorflow/compiler/jit/xla_interpreter_device.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 // Registers the XLA_INTERPRETER device which exposes the XLA Interpreter.
 
-#include "tensorflow/compiler/jit/kernels/xla_launch_op.h"
+#include "tensorflow/compiler/jit/kernels/xla_ops.h"
 #include "tensorflow/compiler/jit/xla_device.h"
 #include "tensorflow/compiler/jit/xla_device_ops.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -72,6 +72,10 @@ static bool OpFilter(KernelDef* kdef) { return true; }
 
 REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_INTERPRETER, XlaLocalLaunchOp,
                            kExecAllTypes);
+REGISTER_XLA_COMPILE_KERNEL(DEVICE_XLA_INTERPRETER, XlaCompileOp,
+                            kExecAllTypes);
+REGISTER_XLA_RUN_KERNEL(DEVICE_XLA_INTERPRETER, XlaRunOp, kExecAllTypes);
+
 REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_INTERPRETER, kExecAllTypes);
 REGISTER_XLA_BACKEND(DEVICE_INTERPRETER_XLA_JIT, kExecAllTypes, OpFilter);
 
diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc
index 5f2f6801e7..07a93e9c39 100644
--- a/tensorflow/compiler/jit/xla_launch_util.cc
+++ b/tensorflow/compiler/jit/xla_launch_util.cc
@@ -42,7 +42,7 @@ using xla::ShapedBuffer;
 }  // anonymous namespace
 
 std::map<int, OptionalTensor> SnapshotResourceVariables(
-    OpKernelContext* ctx, const std::vector<int>& variables) {
+    OpKernelContext* ctx, absl::Span<const int> variables) {
   std::map<int, OptionalTensor> snapshot;
   for (int i : variables) {
     Var* variable = nullptr;
diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h
index 7ac275fab8..fa7a5e5f89 100644
--- a/tensorflow/compiler/jit/xla_launch_util.h
+++ b/tensorflow/compiler/jit/xla_launch_util.h
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/variable_ops.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
 class XlaAllocator;
@@ -43,7 +44,7 @@ class XlaAllocator;
 // resource variable is not initialized, the corresponding OptionalTensor
 // will have its `present` field set to false.
 std::map<int, OptionalTensor> SnapshotResourceVariables(
-    OpKernelContext* ctx, const std::vector<int>& variables);
+    OpKernelContext* ctx, absl::Span<const int> variables);
 
 // Adapter class that wraps a Tensorflow allocator as an XLA allocator.
 // Assumes that the Tensorflow allocator permits asynchronous deallocation:
diff --git a/tensorflow/compiler/tests/dense_layer_test.py b/tensorflow/compiler/tests/dense_layer_test.py
index 0af74c2d8f..9390870e07 100644
--- a/tensorflow/compiler/tests/dense_layer_test.py
+++ b/tensorflow/compiler/tests/dense_layer_test.py
@@ -45,17 +45,21 @@ def InLabels(labels, substr):
   return any([substr in x for x in labels])
 
 
-def XlaLaunchOpCount(labels):
-  """Count how many XlaLaunch labels are present."""
-  return sum("XlaLaunch(" in x for x in labels)
+class DenseLayerTest(test.TestCase):
 
+  def countXlaOps(self, labels):
+    """Count how many XlaCompile/XlaRun labels are present."""
+    xla_compile_count = sum("XlaCompile(" in x for x in labels)
+    xla_run_count = sum("XlaRun(" in x for x in labels)
+    self.assertEqual(xla_compile_count, xla_run_count)
+    return xla_run_count
 
-class DenseLayerTest(test.TestCase):
 
   def testDenseLayerAutoJit(self):
     """Tests dense layer compilation in auto-jit mode.
 
-    Dense layer should be compiled into a single XlaLaunch op in auto-jit mode.
+    Dense layer should be compiled into a single XlaCompile/XlaRun op pair in
+    auto-jit mode.
     """
 
     os.environ["TF_XLA_FLAGS"] = (
@@ -77,14 +81,14 @@ class DenseLayerTest(test.TestCase):
               trace_level=config_pb2.RunOptions.FULL_TRACE))
 
     labels = GetRunMetadataLabels(run_metadata)
-    self.assertEqual(1, XlaLaunchOpCount(labels))
+    self.assertEqual(1, self.countXlaOps(labels))
     self.assertFalse(InLabels(labels, "MatMult"))
 
   def testDenseLayerJitScopeDefinedShape(self):
     """Tests that the dense layer node is properly compiled in jit scope.
 
     Dense layer with static shape input tensor should be compiled into a single
-    XlaLaunch op by XLA.
+    XlaCompile/XlaRun op pair by XLA.
     """
 
     with self.cached_session() as sess:
@@ -101,7 +105,7 @@ class DenseLayerTest(test.TestCase):
               trace_level=config_pb2.RunOptions.FULL_TRACE))
 
     labels = GetRunMetadataLabels(run_metadata)
-    self.assertEqual(1, XlaLaunchOpCount(labels))
+    self.assertEqual(1, self.countXlaOps(labels))
     # No need to check whether ListDiff is compiled or not because ListDiff op
     # is not used when input tensor shape is fully defined.
 
@@ -111,7 +115,8 @@ class DenseLayerTest(test.TestCase):
     Dense layer uses shape op to get shape of input tensor if its shape is not
     fully defined. XLA does not cluster shape op with other operators. But in
     experimental_jit_scope, XLA is forced to compile shape op into its own
-    cluster, causing dense layer to be split into TWO XlaLaunch ops.
+    cluster, causing dense layer to be split into TWO XlaCompile/XlaRun op
+    pairs.
     """
 
     with self.cached_session() as sess:
@@ -128,7 +133,7 @@ class DenseLayerTest(test.TestCase):
               trace_level=config_pb2.RunOptions.FULL_TRACE))
 
     labels = GetRunMetadataLabels(run_metadata)
-    self.assertEqual(2, XlaLaunchOpCount(labels))
+    self.assertEqual(2, self.countXlaOps(labels))
     self.assertFalse(InLabels(labels, "MatMult"))
 
 
diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py
index 0839fb123e..de68ff0e32 100644
--- a/tensorflow/compiler/tests/jit_test.py
+++ b/tensorflow/compiler/tests/jit_test.py
@@ -77,11 +77,11 @@ def InLabels(labels, substr):
   return any([substr in x for x in labels])
 
 
-def MetadataHasXlaLaunch(run_metadata):
-  """Returns true if there is a XlaLaunch kernel in run_metadata's timeline."""
+def MetadataHasXlaOp(run_metadata):
+  """Returns true if there are XlaRun kernels in run_metadata's timeline."""
 
   # TODO(phawkins): find a less hacky way to test whether a kernel ran.
-  return InLabels(RunMetadataLabels(run_metadata), "XlaLaunch")
+  return InLabels(RunMetadataLabels(run_metadata), "XlaRun")
 
 
 class JitLaunchTest(test.TestCase):
@@ -90,9 +90,10 @@ class JitLaunchTest(test.TestCase):
   # Verifies that the outputs match and that XLA was invoked. 'fn' must take
   # the same number of tensors as arguments that are in 'args', and must return
   # a tuple of output tensors.
-  # If 'require_kernel_launch' is True, then we verify that a XlaLaunch node
-  # actually ran. However, it is sometimes possible for XlaLaunch ops to be
-  # constant-folded away, so the check is optional.
+  #
+  # If 'require_kernel_launch' is True, then we verify that an XlaCompile/XlaRun
+  # node actually ran. However, it is sometimes possible for XlaCompile/XlaRun
+  # ops to be constant-folded away, so the check is optional.
   def _compare(self, fn, args, require_kernel_launch=True, noinline=None):
     with session_lib.Session(config=NoRewriteSessionConfig()) as sess:
       placeholders = []
@@ -115,7 +116,7 @@ class JitLaunchTest(test.TestCase):
       print("Compiled Result {}".format(compiled))
 
       if require_kernel_launch:
-        self.assert_(MetadataHasXlaLaunch(run_metadata))
+        self.assert_(MetadataHasXlaOp(run_metadata))
 
         direct = sess.run(direct_op, feeds)
         print("Direct Result {}".format(direct))
@@ -149,10 +150,10 @@ class JitLaunchTest(test.TestCase):
       y = math_ops.add(x, x)
       return y, y
 
-    # Exercises compling a function (say, Foo) which calls another
-    # function (say, Bar) which is not inlined. When the compiler compiles
-    # Foo, it needs to symbolic execute Bar correctly regardless whether
-    # Bar is inlined or not.
+    # Exercises compiling a function (say, Foo) which calls another function
+    # (say, Bar) which is not inlined. When the compiler compiles Foo, it needs
+    # to symbolically execute Bar correctly regardless of whether Bar is inlined
+    # or not.
 
     # TODO(b/36139787): Re-enable this test when noinline works again.
     # Tests compiled=True and noinline=True.
@@ -259,7 +260,7 @@ class JitLaunchTest(test.TestCase):
         # TODO(phawkins): really we would like to test that there were exactly
         # two kernel launches. However, we have no reliable way to determine
         # that.
-        self.assert_(MetadataHasXlaLaunch(run_metadata))
+        self.assert_(MetadataHasXlaOp(run_metadata))
 
         expected = np.square(np.dot(dx, dw) + db)
         self.assertAllClose(expected, output, rtol=1e-1)
@@ -289,7 +290,7 @@ class XlaCompilationTest(test.TestCase):
                      run_metadata=run_metadata,
                      options=config_pb2.RunOptions(
                          trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaLaunch(run_metadata))
+      self.assert_(MetadataHasXlaOp(run_metadata))
       self.assertAllClose(np.array([[1, 2, 3], [4, 5, 6]], np.float32), out)
 
   def testIgnoredArguments(self):
@@ -313,7 +314,7 @@ class XlaCompilationTest(test.TestCase):
                      run_metadata=run_metadata,
                      options=config_pb2.RunOptions(
                          trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaLaunch(run_metadata))
+      self.assert_(MetadataHasXlaOp(run_metadata))
       self.assertAllClose(28, out)
 
   def testLoops(self):
@@ -331,7 +332,7 @@ class XlaCompilationTest(test.TestCase):
                            run_metadata=run_metadata,
                            options=config_pb2.RunOptions(
                                trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaLaunch(run_metadata))
+      self.assert_(MetadataHasXlaOp(run_metadata))
       self.assertAllClose(result, np.float32(95), rtol=1e-1)
 
   def testCond(self):
@@ -356,7 +357,7 @@ class XlaCompilationTest(test.TestCase):
                            run_metadata=run_metadata,
                            options=config_pb2.RunOptions(
                                trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaLaunch(run_metadata))
+      self.assert_(MetadataHasXlaOp(run_metadata))
       self.assertAllClose(result, np.float32(6), rtol=1e-1)
 
   def testNestedFunction(self):
@@ -441,14 +442,16 @@ class XlaCompilationTest(test.TestCase):
     self.assertFalse(InLabels(labels, "Log"))
     self.assertTrue(InLabels(labels, "Reciprocal"))
     self.assertTrue(InLabels(labels, "Mul"))
-    self.assertFalse(InLabels(labels, "XlaLaunch"))
+    self.assertFalse(InLabels(labels, "XlaCompile"))
+    self.assertFalse(InLabels(labels, "XlaRun"))
 
-    # Compile the backprop. One XlaLaunch.
+    # Compile the backprop. One XlaCompile/XlaRun pair.
     labels = _Run(compiled=True)
     self.assertFalse(InLabels(labels, "Log"))
     self.assertFalse(InLabels(labels, "Reciprocal"))
     self.assertFalse(InLabels(labels, "Mul"))
-    self.assertTrue(InLabels(labels, "XlaLaunch"))
+    self.assertTrue(InLabels(labels, "XlaCompile"))
+    self.assertTrue(InLabels(labels, "XlaRun"))
 
 
 class ElementWiseFusionTest(test.TestCase):
@@ -482,9 +485,12 @@ class ElementWiseFusionTest(test.TestCase):
               trace_level=config_pb2.RunOptions.FULL_TRACE))
 
       labels = RunMetadataLabels(run_metadata)
-      count = sum("XlaLaunch(" in x for x in labels)
 
-      return output, count
+      xla_compile_count = sum("XlaCompile(" in x for x in labels)
+      xla_run_count = sum("XlaRun(" in x for x in labels)
+      self.assertEqual(xla_compile_count, xla_run_count)
+
+      return output, xla_run_count
 
   def testElementWiseClustering(self):
     arg0 = np.random.rand(2, 2).astype(np.float32)
-- 
GitLab


From 039ddaa6c0af4be4291383564db5a964d0035c1d Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 20 Sep 2018 15:49:40 -0700
Subject: [PATCH 0468/1357] Fix bad indentation

---
 tensorflow/python/keras/metrics_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 43ac5b7ead..5f5565d4d5 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -108,13 +108,13 @@ class KerasMetricsTest(test.TestCase):
       y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
       y_true = K.variable(np.array([1, 0]))
       result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
       self.assertEqual(result, 1)
       result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
       self.assertEqual(result, 0.5)
       result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
       self.assertEqual(result, 0.)
 
   def test_top_k_categorical_accuracy(self):
-- 
GitLab


From 1797aacbd8b910fb8c15577f66257b35af97cc1a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 15:40:53 -0700
Subject: [PATCH 0469/1357] Change all YAML booleans from True/False to
 true/false.

PiperOrigin-RevId: 213896057
---
 tensorflow/contrib/lite/g3doc/_book.yaml    | 4 ++--
 tensorflow/contrib/lite/g3doc/_project.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml
index 6f56e3139f..beaa5c479a 100644
--- a/tensorflow/contrib/lite/g3doc/_book.yaml
+++ b/tensorflow/contrib/lite/g3doc/_book.yaml
@@ -5,7 +5,7 @@ upper_tabs:
 # Dropdown menu
 - name: Ecosystem
   path: /ecosystem
-  is_default: True
+  is_default: true
   menu:
   - include: /ecosystem/_menu_toc.yaml
   lower_tabs:
@@ -23,7 +23,7 @@ upper_tabs:
         path: /lite/demo_ios
       - title: Performance
         path: /lite/performance
-      - break: True
+      - break: true
       - title: TensorFlow Lite APIs
         path: /lite/apis
       - title: Custom operators
diff --git a/tensorflow/contrib/lite/g3doc/_project.yaml b/tensorflow/contrib/lite/g3doc/_project.yaml
index d48d07be04..3ce6986396 100644
--- a/tensorflow/contrib/lite/g3doc/_project.yaml
+++ b/tensorflow/contrib/lite/g3doc/_project.yaml
@@ -4,7 +4,7 @@ home_url: /lite/
 parent_project_metadata_path: /_project.yaml
 description: >
   TensorFlow Lite is a lightweight solution for mobile and embedded devices.
-use_site_branding: True
-hide_from_products_list: True
+use_site_branding: true
+hide_from_products_list: true
 content_license: cc3-apache2
 buganizer_id: 316308
-- 
GitLab


From 23a88ec5e913ba7086a9aef57875447ccf96e4b5 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Thu, 20 Sep 2018 15:42:23 -0700
Subject: [PATCH 0470/1357] It is more computationally efficient to represent
 resize bilinear as a depthwise convolution instead of a full convolution now
 that it exists in XLA.

PiperOrigin-RevId: 213896333
---
 .../tf2xla/kernels/image_resize_ops.cc        | 76 +++++++++----------
 .../compiler/xla/service/shape_inference.cc   |  5 +-
 2 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
index d9a0257b70..7b2bb4a7c5 100644
--- a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/array4d.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/lib/numeric.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
@@ -132,14 +133,14 @@ int64 CalculateUpperPadding(int64 in_size, int64 out_size, int64 kernel_size,
 // If the 2D kernel would be very large, the 1D kernel can be applied once in
 // each dimension due to the symmetry of the kernel along all axis to reduce the
 // computational intensity.
-std::vector<float> Make1DKernel(int64 n) {
+xla::XlaOp Make1DKernel(xla::XlaBuilder* builder, int64 n) {
   std::vector<float> kernel(n * 2 - 1);
   for (int64 i = 0; i < n; ++i) {
     float v = (i + 1.0f) / n;
     kernel[i] = v;
     kernel[n * 2 - 2 - i] = v;
   }
-  return kernel;
+  return xla::ConstantR1<float>(builder, kernel);
 }
 
 // Kernels with more than 16 spatial elements are considered intense and the
@@ -149,41 +150,26 @@ const int64 kMax2DKernelSize = 16;
 xla::XlaOp MakeBilinearResizeKernel(xla::XlaBuilder* builder,
                                     absl::Span<const int64> kernel_size,
                                     int64 channels) {
-  xla::XlaOp channels_iota = xla::Iota(builder, xla::S32, channels);
+  auto depthwise_kernel = xla::Broadcast(
+      xla::Zero(builder, xla::F32),
+      {(2 * kernel_size[0] - 1), (2 * kernel_size[1] - 1), channels, 1});
 
-  auto diag = xla::ConvertElementType(
-      xla::Eq(xla::Broadcast(channels_iota, {2 * kernel_size[0] - 1,
-                                             2 * kernel_size[1] - 1, channels}),
-              channels_iota, /*broadcast_dimensions=*/{2}),
-      xla::PrimitiveType::F32);
   return xla::Mul(
-      xla::Mul(diag,
-               xla::ConstantR1<float>(builder, Make1DKernel(kernel_size[1])),
+      xla::Add(depthwise_kernel, Make1DKernel(builder, kernel_size[1]),
                /*broadcast_dimensions=*/{1}),
-      xla::ConstantR1<float>(builder, Make1DKernel(kernel_size[0])),
+      Make1DKernel(builder, kernel_size[0]),
       /*broadcast_dimensions=*/{0});
 }
 
 xla::XlaOp MakeBilinearResizeKernelInDim(xla::XlaBuilder* builder,
                                          absl::Span<const int64> kernel_size,
                                          int64 channels, int64 dim) {
-  xla::XlaOp channels_iota = xla::Iota(builder, xla::S32, channels);
-
-  auto diag = xla::ConvertElementType(
-      xla::Eq(
-          xla::Broadcast(channels_iota,
-                         {dim == 0 ? (2 * kernel_size[0] - 1) : 1,
-                          dim == 1 ? (2 * kernel_size[1] - 1) : 1, channels}),
-          channels_iota, /*broadcast_dimensions=*/{2}),
-      xla::PrimitiveType::F32);
-  if (dim == 1) {
-    return xla::Mul(
-        diag, xla::ConstantR1<float>(builder, Make1DKernel(kernel_size[1])),
-        /*broadcast_dimensions=*/{1});
-  }
-  return xla::Mul(diag,
-                  xla::ConstantR1<float>(builder, Make1DKernel(kernel_size[0])),
-                  /*broadcast_dimensions=*/{0});
+  auto depthwise_kernel =
+      xla::Broadcast(xla::Zero(builder, xla::F32),
+                     {dim == 0 ? (2 * kernel_size[0] - 1) : 1,
+                      dim == 1 ? (2 * kernel_size[1] - 1) : 1, channels, 1});
+  return xla::Add(depthwise_kernel, Make1DKernel(builder, kernel_size[dim]),
+                  /*broadcast_dimensions=*/{dim});
 }
 
 xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder,
@@ -206,8 +192,8 @@ xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder,
   xla::ConvolutionDimensionNumbers dimension_numbers;
   dimension_numbers.set_input_batch_dimension(0);
   dimension_numbers.set_output_batch_dimension(0);
-  dimension_numbers.set_input_feature_dimension(3);
-  dimension_numbers.set_output_feature_dimension(3);
+  dimension_numbers.set_input_feature_dimension(num_spatial_dims + 1);
+  dimension_numbers.set_output_feature_dimension(num_spatial_dims + 1);
   for (int i = 0; i < num_spatial_dims; ++i) {
     dimension_numbers.add_input_spatial_dimensions(1 + i);
     dimension_numbers.add_output_spatial_dimensions(1 + i);
@@ -285,7 +271,8 @@ xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder,
                                 {{dims.kernel_size[0] - 1, upper_padding[0]},
                                  {dims.kernel_size[1] - 1, upper_padding[1]}},
                                 /*lhs_dilation=*/dims.kernel_size,
-                                /*rhs_dilation=*/{1, 1}, dimension_numbers);
+                                /*rhs_dilation=*/{1, 1}, dimension_numbers,
+                                /*feature_group_count=*/channels);
   } else {
     xla::XlaOp kernel0 =
         MakeBilinearResizeKernelInDim(builder, dims.kernel_size, channels, 0);
@@ -294,7 +281,8 @@ xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder,
         /*padding=*/
         {{dims.kernel_size[0] - 1, upper_padding[0]}, {0, 0}},
         /*lhs_dilation=*/{dims.kernel_size[0], 1},
-        /*rhs_dilation=*/{1, 1}, dimension_numbers);
+        /*rhs_dilation=*/{1, 1}, dimension_numbers,
+        /*feature_group_count=*/channels);
     xla::XlaOp kernel1 =
         MakeBilinearResizeKernelInDim(builder, dims.kernel_size, channels, 1);
     output = xla::ConvGeneralDilated(
@@ -302,7 +290,8 @@ xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder,
         /*padding=*/
         {{0, 0}, {dims.kernel_size[1] - 1, upper_padding[1]}},
         /*lhs_dilation=*/{1, dims.kernel_size[1]},
-        /*rhs_dilation=*/{1, 1}, dimension_numbers);
+        /*rhs_dilation=*/{1, 1}, dimension_numbers,
+        /*feature_group_count=*/channels);
   }
 
   // Add broadcasts to handle expanding from a size == 1 dimension to a
@@ -331,15 +320,15 @@ xla::XlaOp ResizeUsingDilationAndConvolutionGradOp(xla::XlaBuilder* builder,
   xla::ConvolutionDimensionNumbers dimension_numbers;
   dimension_numbers.set_input_batch_dimension(0);
   dimension_numbers.set_output_batch_dimension(0);
-  dimension_numbers.set_input_feature_dimension(3);
-  dimension_numbers.set_output_feature_dimension(3);
+  dimension_numbers.set_input_feature_dimension(num_spatial_dims + 1);
+  dimension_numbers.set_output_feature_dimension(num_spatial_dims + 1);
   for (int i = 0; i < num_spatial_dims; ++i) {
-    dimension_numbers.add_input_spatial_dimensions(1 + i);
-    dimension_numbers.add_output_spatial_dimensions(1 + i);
+    dimension_numbers.add_input_spatial_dimensions(i + 1);
+    dimension_numbers.add_output_spatial_dimensions(i + 1);
     dimension_numbers.add_kernel_spatial_dimensions(i);
   }
-  dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims);
-  dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims + 1);
+  dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims + 1);
+  dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims);
   xla::XlaOp output;
   if (dims.kernel_size[0] * dims.kernel_size[1] < kMax2DKernelSize) {
     xla::XlaOp kernel =
@@ -362,7 +351,8 @@ xla::XlaOp ResizeUsingDilationAndConvolutionGradOp(xla::XlaBuilder* builder,
         {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1},
          {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}},
         /*lhs_dilation=*/dims.stride,
-        /*rhs_dilation=*/{1, 1}, dimension_numbers);
+        /*rhs_dilation=*/{1, 1}, dimension_numbers,
+        /*feature_group_count=*/channels);
   } else {
     xla::XlaOp kernel0 =
         MakeBilinearResizeKernelInDim(builder, dims.kernel_size, channels, 0);
@@ -388,14 +378,16 @@ xla::XlaOp ResizeUsingDilationAndConvolutionGradOp(xla::XlaBuilder* builder,
         /*padding=*/
         {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, {0, 0}},
         /*lhs_dilation=*/{dims.stride[0], 1},
-        /*rhs_dilation=*/{1, 1}, dimension_numbers);
+        /*rhs_dilation=*/{1, 1}, dimension_numbers,
+        /*feature_group_count=*/channels);
 
     output = xla::ConvGeneralDilated(
         output, kernel1, /*window_strides=*/{1, dims.kernel_size[1]},
         /*padding=*/
         {{0, 0}, {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}},
         /*lhs_dilation=*/{1, dims.stride[1]},
-        /*rhs_dilation=*/{1, 1}, dimension_numbers);
+        /*rhs_dilation=*/{1, 1}, dimension_numbers,
+        /*feature_group_count=*/channels);
   }
 
   // If in_size[i] > 1 and grad_size[i] == 1, pad the output in dimension i.
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 74bdf2a2e3..7194b2cafd 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -1665,10 +1665,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
   if (input_features != kernel_input_features * feature_group_count) {
     return InvalidArgument(
         "Expected LHS feature dimension (value %d) to match RHS "
-        "input feature dimension * feature_group_count (value %d); "
+        "input feature dimension * feature_group_count (value %d * %d = %d); "
         "got <conv>(%s, %s)\n"
         "Dimension numbers: {%s}.",
-        input_features, kernel_input_features * feature_group_count,
+        input_features, kernel_input_features, feature_group_count,
+        kernel_input_features * feature_group_count,
         ShapeUtil::HumanString(lhs), ShapeUtil::HumanString(rhs),
         dnums.DebugString());
   }
-- 
GitLab


From 0e1efc3d9129c740a16081fdc53bdc482f8f0c11 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 20 Sep 2018 16:48:51 -0700
Subject: [PATCH 0471/1357] [tf.data] Moving auto-tuning optimizations into a
 background thread, refactoring the API for exposing tunable parameters, and
 removing `model::Node` from the public API.

PiperOrigin-RevId: 213907565
---
 tensorflow/core/framework/dataset.h           |  87 +--
 tensorflow/core/framework/model.cc            | 204 +++---
 tensorflow/core/framework/model.h             | 613 +++++++++---------
 .../core/kernels/data/captured_function.cc    |  20 +-
 .../kernels/data/map_and_batch_dataset_op.cc  |  66 +-
 .../core/kernels/data/model_dataset_op.cc     |  71 +-
 .../data/parallel_interleave_dataset_op.cc    |  83 +--
 .../kernels/data/parallel_map_iterator.cc     |  58 +-
 .../core/kernels/data/prefetch_dataset_op.cc  |  12 +-
 9 files changed, 628 insertions(+), 586 deletions(-)

diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 91b1e61d3c..697e0604bf 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -529,25 +529,11 @@ class DatasetBase : public core::RefCounted {
                       std::unique_ptr<IteratorBase>* iterator) const {
     *iterator = MakeIteratorInternal(prefix);
     if (ctx->model()) {
-      // The prefix might contain an index. We need to strip it to make it
-      // possible for the model to successfully identify the output node.
-      string sanitized_prefix = prefix;
-      if (str_util::EndsWith(prefix, "]")) {
-        sanitized_prefix = prefix.substr(0, prefix.rfind('['));
-      }
-      std::shared_ptr<model::Node> node =
-          ctx->model()->AddNode((*iterator)->prefix(), sanitized_prefix);
-      std::vector<string> tokens =
-          str_util::Split((*iterator)->prefix(), ':', str_util::SkipEmpty());
-      node->set_name(tokens[tokens.size() - 1]);
+      ctx->model()->AddNode((*iterator)->prefix(), prefix);
       std::shared_ptr<model::Model> model = ctx->model();
       const string& prefix = (*iterator)->prefix();
-      (*iterator)->AddCleanupFunction([model, node, prefix]() {
-        if (node->output()) {
-          node->output()->remove_input(node);
-        }
-        model->RemoveNode(prefix);
-      });
+      (*iterator)->AddCleanupFunction(
+          [model, prefix]() { model->RemoveNode(prefix); });
     }
     return (*iterator)->Initialize(ctx);
   }
@@ -629,23 +615,10 @@ class DatasetBaseIterator : public IteratorBase {
   Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
                  bool* end_of_sequence) final {
     tracing::ScopedActivity activity(params_.prefix);
-    Status s;
-    if (ctx->model()) {
-      std::shared_ptr<model::Node> node =
-          ctx->model()->LookupNode(params_.prefix);
-      if (node->output()) {
-        node->output()->stop_work();
-      }
-      node->start_work();
-      s = GetNextInternal(ctx, out_tensors, end_of_sequence);
-      node->stop_work();
-      node->add_element();
-      if (node->output()) {
-        node->output()->start_work();
-      }
-    } else {
-      s = GetNextInternal(ctx, out_tensors, end_of_sequence);
-    }
+    RecordStart(ctx, true /* stop_output */);
+    Status s = GetNextInternal(ctx, out_tensors, end_of_sequence);
+    if (s.ok() && !*end_of_sequence) RecordElement(ctx);
+    RecordStop(ctx, true /* start_output */);
     if (TF_PREDICT_FALSE(errors::IsOutOfRange(s) && !*end_of_sequence)) {
       s = errors::Internal(
           "Iterator \"", params_.prefix,
@@ -677,52 +650,46 @@ class DatasetBaseIterator : public IteratorBase {
   void AddConstantParameter(IteratorContext* ctx, const string& name,
                             int64 value) {
     if (ctx->model()) {
-      std::shared_ptr<model::Node> node = ctx->model()->LookupNode(prefix());
-      if (node) {
-        node->add_constant_param(name, value);
-      }
+      ctx->model()->AddConstantParameter(prefix(), name, value);
     }
   }
 
   // When performance modeling is enabled, this method adds a tunable parameter
   // to the model node corresponding to this iterator.
   //
-  // The `set_fn` function should set the tunable parameter to the value of
-  // its input argument. The function should be thread-safe; in particular, the
-  // state it updates should be protected by a lock as the function can be
-  // invoked asynchronously. It is guaranteed that this function will not be
-  // invoked after the iterator is deleted because the model node that owns
-  // the function is deleted when the iterator is deleted.
+  // The performance modeling logic may use `value` to set the value of the
+  // tunable parameter at any point during the lifetime of this iterator. When
+  // it does, it notifies `cond_var`.
   void AddTunableParameter(IteratorContext* ctx, const string& name,
-                           int64 value, int64 min, int64 max,
-                           std::function<void(int64)>&& set_fn) {
+                           std::atomic<int64>* value, int64 min, int64 max,
+                           condition_variable* cond_var) {
     if (ctx->model()) {
-      std::shared_ptr<model::Node> node = ctx->model()->LookupNode(prefix());
-      if (node) {
-        node->add_tunable_param(name, value, min, max, std::move(set_fn));
-      }
+      ctx->model()->AddTunableParameter(prefix(), name, value, min, max,
+                                        cond_var);
+    }
+  }
+
+  // When performance modeling is enabled, this method records the fact that
+  // this iterator has produced an element.
+  void RecordElement(IteratorContext* ctx) {
+    if (ctx->model()) {
+      ctx->model()->RecordElement(prefix());
     }
   }
 
   // When performance modeling is enabled, this method records the fact that
   // a thread of this iterator has started work.
-  void StartWork(IteratorContext* ctx) {
+  void RecordStart(IteratorContext* ctx, bool stop_output = false) {
     if (ctx->model()) {
-      std::shared_ptr<model::Node> node = ctx->model()->LookupNode(prefix());
-      if (node) {
-        node->start_work();
-      }
+      ctx->model()->RecordStart(prefix(), stop_output);
     }
   }
 
   // When performance modeling is enabled, this method records the fact that
   // a thread of this iterator has stopped work.
-  void StopWork(IteratorContext* ctx) {
+  void RecordStop(IteratorContext* ctx, bool start_output = false) {
     if (ctx->model()) {
-      std::shared_ptr<model::Node> node = ctx->model()->LookupNode(prefix());
-      if (node) {
-        node->stop_work();
-      }
+      ctx->model()->RecordStop(prefix(), start_output);
     }
   }
 
diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index 112298c344..b0330ec990 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,16 +17,14 @@ limitations under the License.
 
 #include <memory>
 
-#include "tensorflow/core/lib/gtl/map_util.h"
-
 namespace tensorflow {
 namespace data {
 namespace model {
 
 // TODO(jsimsa): Use `Node` subclassing instead of types and node statements.
-void Node::CollectTunables(
+void Model::Node::CollectTunables(
     std::vector<std::shared_ptr<Node::Tunable>>* tunables) {
-  mutex_lock l(mu_);
+  tf_shared_lock l(mu_);
   for (auto input : inputs_) {
     input->CollectTunables(tunables);
   }
@@ -45,14 +43,14 @@ void Node::CollectTunables(
   }
 }
 
-int64 Node::GetParameterValue(const string& name) {
+int64 Model::Node::GetParameterValue(const string& name) {
   if (auto* tunable_param = gtl::FindOrNull(tunable_params_, name)) {
     return (*tunable_param)->value;
   }
   return constant_params_[name];
 }
 
-int64 Node::ProcessingTimeLocked() {
+int64 Model::Node::ProcessingTimeLocked() {
   switch (type_) {
     case Type::BATCH:
     case Type::MAP_AND_BATCH:
@@ -101,7 +99,7 @@ int64 Node::ProcessingTimeLocked() {
   }
 }
 
-int64 Node::OutputTimeLocked(std::vector<int64>* input_times) {
+int64 Model::Node::OutputTimeLocked(std::vector<int64>* input_times) {
   switch (type_) {
     case Type::BATCH:
     case Type::PADDED_BATCH: {
@@ -251,15 +249,34 @@ int64 Node::OutputTimeLocked(std::vector<int64>* input_times) {
   }
 }
 
-std::shared_ptr<Node> Model::AddNode(const string& name,
-                                     const string& output_name) {
-  mutex_lock l(mu_);
+void Model::AddConstantParameter(const string& node_name,
+                                 const string& parameter_name, int64 value) {
+  tf_shared_lock l(mu_);
+  auto node = gtl::FindOrNull(lookup_table_, node_name);
+  if (node) {
+    (*node)->add_constant_param(parameter_name, value);
+  }
+}
+
+void Model::AddNode(const string& name, const string& output_name) {
+  // The name captures the sequence of iterators joined by `::`. We use the full
+  // sequence as the key in the lookup table, but only the last element of the
+  // sequence as the name node.
+  std::vector<string> tokens =
+      str_util::Split(name, ':', str_util::SkipEmpty());
+  // The output name might contain an index. We need to strip it to make it
+  // possible for the model to successfully identify the output node.
+  string sanitized_output_name = output_name;
+  if (str_util::EndsWith(output_name, "]")) {
+    sanitized_output_name = output_name.substr(0, output_name.rfind('['));
+  }
   std::shared_ptr<Node> output;
-  auto it = lookup_table_.find(output_name);
+  mutex_lock l(mu_);
+  auto it = lookup_table_.find(sanitized_output_name);
   if (it != lookup_table_.end()) {
     output = it->second;
   }
-  std::shared_ptr<Node> node(new Node(id_counter_++, output));
+  std::shared_ptr<Node> node(new Node(id_counter_++, tokens.back(), output));
   if (!output_) {
     output_ = node;
   }
@@ -267,88 +284,125 @@ std::shared_ptr<Node> Model::AddNode(const string& name,
     output->add_input(node);
   }
   lookup_table_.insert(std::make_pair(name, node));
-  return node;
 }
 
-std::shared_ptr<Node> Model::LookupNode(const string& name) {
+void Model::AddProcessingTime(const string& name, int64 delta) {
   tf_shared_lock l(mu_);
-  std::shared_ptr<Node> result;
-  auto it = lookup_table_.find(name);
-  if (it != lookup_table_.end()) {
-    result = it->second;
+  auto node = gtl::FindOrNull(lookup_table_, name);
+  if (node) {
+    (*node)->add_processing_time(delta);
   }
-  return result;
+}
+
+void Model::AddTunableParameter(const string& node_name,
+                                const string& parameter_name,
+                                std::atomic<int64>* value, int64 min, int64 max,
+                                condition_variable* cond_var) {
+  tf_shared_lock l(mu_);
+  auto node = *gtl::FindOrNull(lookup_table_, node_name);
+  DCHECK(node);
+  node->add_tunable_param(parameter_name, value, min, max, cond_var);
 }
 
 // The optimization algorithm starts by setting all tunable parallelism
-// parameters to 1. It then repeatedly identifies the parameter that whose
-// increase in parallelism decreases the output time the most. This process is
-// repeated until all parameters reach their maximum values or the
-// projected output time is less than or equal to the processing time needed to
-// produce an element divided by CPU budget.
+// parameters to 1. It then repeatedly identifies the parameter whose increase
+// in parallelism decreases the output time the most. This process is repeated
+// until all parameters reach their maximum values or the projected output time
+// is less than or equal to the processing time needed to produce an element
+// divided by CPU budget.
 void Model::Optimize(int64 cpu_budget) {
-  mutex_lock l(optimization_mu_);
-  std::vector<std::shared_ptr<Node::Tunable>> tunables;
-  {
-    mutex_lock l2(mu_);
-    const int64 processing_time = ProcessingTime();
-    tunables = CollectTunables();
-    for (auto tunable : tunables) {
-      tunable->value = 1;
-    }
-    while (true) {
-      const int64 output_time = OutputTime();
-      bool all_tunables = true;
-      for (auto& tunable : tunables) {
-        if (tunable->value < tunable->max) {
-          all_tunables = false;
-          break;
-        }
-      }
-      if (output_time < processing_time / cpu_budget || all_tunables) {
+  tf_shared_lock lock(mu_);
+  std::vector<std::shared_ptr<Model::Node::Tunable>> tunables;
+  const int64 processing_time = ProcessingTime();
+  tunables = CollectTunables();
+  for (auto tunable : tunables) {
+    tunable->value = 1;
+  }
+  while (true) {
+    const int64 output_time = OutputTime();
+    bool all_tunables = true;
+    for (auto& tunable : tunables) {
+      if (tunable->value < tunable->max) {
+        all_tunables = false;
         break;
       }
-      int64 best_delta = -1;
-      Node::Tunable* best_tunable = nullptr;
-      for (auto& tunable : tunables) {
-        if (tunable->value == tunable->max) {
-          continue;
-        }
-        tunable->value++;
-        int64 delta = output_time - OutputTime();
-        if (delta > best_delta) {
-          best_delta = delta;
-          best_tunable = tunable.get();
-        }
-        tunable->value--;
+    }
+    if (output_time < processing_time / cpu_budget || all_tunables) {
+      break;
+    }
+    int64 best_delta = -1;
+    Model::Node::Tunable* best_tunable = nullptr;
+    for (auto& tunable : tunables) {
+      if (tunable->value == tunable->max) {
+        continue;
       }
-      if (!best_tunable) {
-        // NOTE: This can happen because we are performing the optimization
-        // while the model data is changing. If this becomes an issue, we should
-        // look into performing the optimization using a model snapshot.
-        break;
+      tunable->value++;
+      int64 delta = output_time - OutputTime();
+      if (delta > best_delta) {
+        best_delta = delta;
+        best_tunable = tunable.get();
       }
-      best_tunable->value++;
+      tunable->value--;
+    }
+    if (!best_tunable) {
+      // NOTE: This can happen because we are performing the optimization
+      // while the model data is changing. If this becomes an issue, we should
+      // look into performing the optimization using a model snapshot.
+      break;
     }
+    best_tunable->value++;
   }
-  // The `set_fn` functions should be invoked without holding a lock to avoid a
-  // potential deadlock.
+  VLOG(2) << "Number of knobs: " << tunables.size();
   for (auto& tunable : tunables) {
-    tunable->set_fn(tunable->value);
+    VLOG(2) << "Setting tunable parameter: " << tunable->value;
+    tunable->value_ptr->store(tunable->value);
+    if (tunable->cond_var) {
+      tunable->cond_var->notify_all();
+    }
+  }
+}
+
+void Model::RecordElement(const string& name) {
+  tf_shared_lock l(mu_);
+  auto node = gtl::FindOrNull(lookup_table_, name);
+  if (node) {
+    (*node)->record_element();
   }
 }
 
-void Model::RemoveNode(const string& prefix) {
-  // Nodes are not allowed to be removed when optimization is in progress to
-  // prevent the optimization from trying to access an iterator that was
-  // concurrently deleted.
-  mutex_lock l(optimization_mu_);
-  mutex_lock l2(mu_);
-  lookup_table_.erase(prefix);
+void Model::RecordStart(const string& name, bool stop_output) {
+  tf_shared_lock l(mu_);
+  auto node = gtl::FindOrNull(lookup_table_, name);
+  if (node) {
+    if (stop_output && (*node)->output()) {
+      (*node)->output()->record_stop();
+    }
+    (*node)->record_start();
+  }
+}
+
+void Model::RecordStop(const string& name, bool start_output) {
+  tf_shared_lock l(mu_);
+  auto node = gtl::FindOrNull(lookup_table_, name);
+  if (node) {
+    (*node)->record_stop();
+    if (start_output && (*node)->output()) {
+      (*node)->output()->record_start();
+    }
+  }
+}
+
+void Model::RemoveNode(const string& name) {
+  mutex_lock l(mu_);
+  auto node = gtl::FindOrNull(lookup_table_, name);
+  if (node && (*node)->output()) {
+    (*node)->output()->remove_input(*node);
+  }
+  lookup_table_.erase(name);
 }
 
-std::vector<std::shared_ptr<Node::Tunable>> Model::CollectTunables() {
-  std::vector<std::shared_ptr<Node::Tunable>> tunables;
+std::vector<std::shared_ptr<Model::Node::Tunable>> Model::CollectTunables() {
+  std::vector<std::shared_ptr<Model::Node::Tunable>> tunables;
   output_->CollectTunables(&tunables);
   return tunables;
 }
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index f88ec06ef3..26402f5cd3 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/env.h"
@@ -32,341 +33,365 @@ namespace tensorflow {
 namespace data {
 namespace model {
 
-class Model;
-class Node;
-
-// Abstract representation of a TensorFlow input pipeline node. It collects
-// information about inputs to this node, processing time spent executing the
-// node logic, number of elements produced by the node, various other
-// information (e.g. batch size or execution parallelism).
+// Abstract representation of a TensorFlow input pipeline that can be used
+// for collecting runtime information and optimizing performance. It collects
+// runtime information about execution of the input pipeline that is used to
+// create a performance model, which is in turn used to identify optimal values
+// of tunable parameters.
 //
 // Developers of tf.data transformations are not expected to interact with this
 // class directly. Boiler plate code for creating the abstract representation of
-// the input pipeline and collecting common information has been added to the
+// the input pipeline and collecting runtime information has been added to the
 // implementation of `DatasetBase` and `DatasetBaseIterator` respectively.
-//
-// In addition, `DatasetBaseIterator` provides wrappers that can be used for
-// transformation-specific information collection. The `SetMetadata` wrapper can
-// be used to pass arbitrary metadata to the modeling framework, while the
-// `StartWork` and `StopWork` wrappers should be used to correctly account for
-// processing time of multi-threaded transformation that yield the CPU; such
-// transformations should invoke `StartWork()` when a transformation thread
-// starts executing (e.g. when created or woken up) and `StopWork()` when a
-// transformation thread stops executing (e.g. when returning or waiting).
-//
-// TODO(jsimsa): Create an API to capture the abstract semantics of each
-// tf.data transformation and replace switch-case blocks with inheritance.
-class Node {
+class Model {
  public:
-  Node(int64 id, std::shared_ptr<Node> output) : id_(id), output_(output) {}
-
-  // Adds a constant parameter.
-  void add_constant_param(const string& name, int64 value) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    constant_params_[name] = value;
-  }
-
-  // Records that the node produced an element.
-  void add_element() LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    num_elements_++;
-  }
-
-  // Adds an input.
-  void add_input(std::shared_ptr<Node> node) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    inputs_.push_back(node);
-  }
-
-  // Increments the aggregate processing time by the given delta.
-  void add_processing_time(int64 delta) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    processing_time_ += delta;
-  }
-
-  // Adds a tunable parameter.
-  void add_tunable_param(const string& name, int64 value, int64 min, int64 max,
-                         std::function<void(int64)>&& set_fn)
-      LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    tunable_params_[name] =
-        std::make_shared<Tunable>(value, min, max, std::move(set_fn));
-  }
-
-  // Returns the unique node ID.
-  int64 id() LOCKS_EXCLUDED(mu_) { return id_; }
-
-  // Returns the node inputs.
-  std::list<std::shared_ptr<Node>> inputs() LOCKS_EXCLUDED(mu_) {
-    tf_shared_lock l(mu_);
-    return inputs_;
-  }
-
-  // Returns the node name.
-  const string& name() LOCKS_EXCLUDED(mu_) {
-    tf_shared_lock l(mu_);
-    return name_;
-  }
-
-  // Returns the number of elements produced by the node.
-  int64 num_elements() LOCKS_EXCLUDED(mu_) {
-    tf_shared_lock l(mu_);
-    return num_elements_;
-  }
-
-  // Returns the node output.
-  std::shared_ptr<Node> output() LOCKS_EXCLUDED(mu_) {
-    tf_shared_lock l(mu_);
-    return output_;
-  }
-
-  // Removes an input.
-  void remove_input(std::shared_ptr<Node> input) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    inputs_.remove(input);
-  }
-
-  // Sets the node name.
-  void set_name(const string& name) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    name_ = name;
-    type_ = TypeFromName(name);
-  }
-
-  // Set the node output.
-  void set_output(std::shared_ptr<Node> output) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    output_ = output;
-  }
-
-  // Records that a node thread has started work.
-  void start_work() LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    work_start_[std::this_thread::get_id()] = Env::Default()->NowNanos();
-  }
-
-  // Records that a node thread has stopped work.
-  void stop_work() LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    auto iter = work_start_.find(std::this_thread::get_id());
-    CHECK(work_start_.end() != iter)
-        << "Encountered a stop event that was not preceded by a start event.";
-    processing_time_ += Env::Default()->NowNanos() - iter->second;
-    work_start_.erase(iter);
-  }
-
- private:
-  // Represents a tunable parameter.
-  struct Tunable {
-    Tunable(int64 value, int64 min, int64 max,
-            std::function<void(int64)> set_fn)
-        : value(value), min(min), max(max), set_fn(std::move(set_fn)) {}
-
-    int64 value;
-    int64 min;
-    int64 max;
-    std::function<void(int64)> set_fn;
-  };
+  Model() = default;
 
-  enum class Type {
-    BATCH = 0,
-    CACHE,
-    CONCATENATE,
-    FILTER,
-    FLAT_MAP,
-    INTERLEAVE,
-    MAP,
-    MAP_AND_BATCH,
-    PADDED_BATCH,
-    PARALLEL_INTERLEAVE,
-    PARALLEL_INTERLEAVE_V2,
-    PARALLEL_MAP,
-    PREFETCH,
-    REPEAT,
-    SHUFFLE,
-    SKIP,
-    TAKE,
-    ZIP,
-    UNKNOWN,
-  };
+  // Adds a constant parameter for the given node.
+  void AddConstantParameter(const string& node_name,
+                            const string& parameter_name, int64 value)
+      LOCKS_EXCLUDED(mu_);
 
-  // Collects tunable parameters in the subtree rooted in this node.
-  void CollectTunables(std::vector<std::shared_ptr<Node::Tunable>>* tunables)
+  // Adds a node with the given name and given output (identified by name).
+  void AddNode(const string& name, const string& output_name)
       LOCKS_EXCLUDED(mu_);
 
-  // Gets a value of the given parameter (tunable or constant).
-  int64 GetParameterValue(const string& name) EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  // Increments the processing time for the given node..
+  void AddProcessingTime(const string& name, int64 delta) LOCKS_EXCLUDED(mu_);
 
-  // Returns the per-element processing time spent in this node.
-  int64 NanosPerElement() LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    return NanosPerElementLocked();
-  }
+  // Adds a tunable parameter for the given node.
+  void AddTunableParameter(const string& node_name,
+                           const string& parameter_name,
+                           std::atomic<int64>* value, int64 min, int64 max,
+                           condition_variable* cond_var) LOCKS_EXCLUDED(mu_);
 
-  int64 NanosPerElementLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (num_elements_ == 0) {
-      return 0;
-    }
-    return (int64)((double)processing_time_ / (double)num_elements_);
-  }
-
-  // Returns the per-element output time for this node.
-  int64 OutputTime(std::vector<int64>* input_times) LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    return OutputTimeLocked(input_times);
-  }
-
-  int64 OutputTimeLocked(std::vector<int64>* input_times)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_);
-
-  int64 OutputTimeForInputs(std::vector<int64>* input_times)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    int64 sum = 0;
-    for (auto input : inputs_) {
-      sum += input->OutputTime(input_times);
-    }
-    return sum;
-  }
-
-  // Returns the per-element processing time spent in the subtree rooted in this
-  // node.
-  int64 ProcessingTime() LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    return ProcessingTimeLocked();
-  }
-
-  int64 ProcessingTimeLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
-
-  // Returns the per-element processing time spent in the inputs of this node.
-  int64 ProcessingTimeForInputs() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    int64 sum = 0;
-    for (auto input : inputs_) {
-      sum += input->ProcessingTimeLocked();
-    }
-    return sum;
-  }
+  // Runs optimization.
+  void Optimize(int64 cpu_budget) LOCKS_EXCLUDED(mu_);
 
-  Type TypeFromName(const string& name) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (name_ == "Batch") {
-      return Type::BATCH;
-    }
-    if (str_util::EndsWith(name_, "Cache")) {
-      return Type::CACHE;
-    }
-    if (name_ == "Concatenate") {
-      return Type::CONCATENATE;
-    }
-    if (name_ == "Filter") {
-      return Type::FILTER;
+  // Records that a node has produced an element.
+  void RecordElement(const string& name) LOCKS_EXCLUDED(mu_);
+
+  // Records that the given node has started work. If `stop_output` is set, it
+  // also records that the output of the given node has stopped work.
+  void RecordStart(const string& name, bool stop_output) LOCKS_EXCLUDED(mu_);
+
+  // Records that the given node has stopped work. If `stop_output` is set, it
+  // also records that the output of the given node has started work.
+  void RecordStop(const string& name, bool start_output) LOCKS_EXCLUDED(mu_);
+
+  // Removes the given node.
+  void RemoveNode(const string& name) LOCKS_EXCLUDED(mu_);
+
+ private:
+  // Abstract representation of a TensorFlow input pipeline node. It collects
+  // information about inputs to this node, processing time spent executing the
+  // node logic, number of elements produced by the node, various other
+  // information (e.g. batch size or execution parallelism).
+  //
+  // Developers of tf.data transformations are not expected to interact with
+  // this class directly. Boiler plate code for creating the abstract
+  // representation of the input pipeline and collecting common information has
+  // been added to the implementation of `DatasetBase` and `DatasetBaseIterator`
+  // respectively.
+  //
+  // In addition, `DatasetBaseIterator` provides wrappers that can be used for
+  // transformation-specific information collection. The `SetMetadata` wrapper
+  // can be used to pass arbitrary metadata to the modeling framework, while the
+  // `StartWork` and `StopWork` wrappers should be used to correctly account for
+  // processing time of multi-threaded transformation that yield the CPU; such
+  // transformations should invoke `StartWork()` when a transformation thread
+  // starts executing (e.g. when created or woken up) and `StopWork()` when a
+  // transformation thread stops executing (e.g. when returning or waiting).
+  //
+  // TODO(jsimsa): Create an API to capture the abstract semantics of each
+  // tf.data transformation and replace switch-case blocks with inheritance.
+  class Node {
+   public:
+    // Represents a tunable parameter.
+    struct Tunable {
+      Tunable(std::atomic<int64>* value, int64 min, int64 max,
+              condition_variable* cond_var)
+          : value(*value),
+            min(min),
+            max(max),
+            value_ptr(value),
+            cond_var(cond_var) {}
+
+      // Identifies the model value of the parameter. This can be different from
+      // the actual value (e.g. during optimization search).
+      int64 value;
+
+      // Identifies the minimum value of the parameter.
+      int64 min;
+
+      // Identifies the maximum value of the parameter.
+      int64 max;
+
+      // Points to the actual value of the parameter. Not owned.
+      std::atomic<int64>* value_ptr;
+
+      // If non-null, this condition variable is notified when the model updates
+      // the actual value of the parameter (via `value_ptr`). Not owned.
+      condition_variable* cond_var;
+    };
+
+    Node(int64 id, const string& name, std::shared_ptr<Node> output)
+        : id_(id), name_(name), type_(TypeFromName(name)), output_(output) {}
+
+    // Adds a constant parameter.
+    void add_constant_param(const string& name, int64 value)
+        LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      constant_params_[name] = value;
     }
-    if (name_ == "FlatMap") {
-      return Type::FLAT_MAP;
+
+    // Adds an input.
+    void add_input(std::shared_ptr<Node> node) LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      inputs_.push_back(node);
     }
-    if (name_ == "Interleave") {
-      return Type::INTERLEAVE;
+
+    // Increments the aggregate processing time by the given delta.
+    void add_processing_time(int64 delta) LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      processing_time_ += delta;
     }
-    if (name_ == "Map") {
-      return Type::MAP;
+
+    // Adds a tunable parameter.
+    void add_tunable_param(const string& name, std::atomic<int64>* value,
+                           int64 min, int64 max, condition_variable* cond_var)
+        LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      tunable_params_[name] =
+          std::make_shared<Tunable>(value, min, max, cond_var);
     }
-    if (name_ == "MapAndBatch") {
-      return Type::MAP_AND_BATCH;
+
+    // Returns the unique node ID.
+    int64 id() LOCKS_EXCLUDED(mu_) { return id_; }
+
+    // Returns the node inputs.
+    std::list<std::shared_ptr<Node>> inputs() LOCKS_EXCLUDED(mu_) {
+      tf_shared_lock l(mu_);
+      return inputs_;
     }
-    if (name_ == "PaddedBatch") {
-      return Type::PADDED_BATCH;
+
+    // Returns the node name.
+    const string& name() LOCKS_EXCLUDED(mu_) {
+      tf_shared_lock l(mu_);
+      return name_;
     }
-    if (name_ == "ParallelInterleave") {
-      return Type::PARALLEL_INTERLEAVE;
+
+    // Returns the number of elements produced by the node.
+    int64 num_elements() LOCKS_EXCLUDED(mu_) {
+      tf_shared_lock l(mu_);
+      return num_elements_;
     }
-    if (name_ == "ParallelInterleaveV2") {
-      return Type::PARALLEL_INTERLEAVE_V2;
+
+    // Returns the node output.
+    std::shared_ptr<Node> output() LOCKS_EXCLUDED(mu_) {
+      tf_shared_lock l(mu_);
+      return output_;
     }
-    if (name_ == "ParallelMap") {
-      return Type::PARALLEL_MAP;
+
+    // Records that the node produced an element.
+    void record_element() LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      num_elements_++;
     }
-    if (name_ == "Prefetch") {
-      return Type::PREFETCH;
+
+    // Records that a node thread has started executing.
+    void record_start() LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      work_start_[std::this_thread::get_id()] = Env::Default()->NowNanos();
     }
-    if (str_util::EndsWith(name_, "Repeat")) {
-      return Type::REPEAT;
+
+    // Records that a node thread has stopped executing.
+    void record_stop() LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      std::thread::id tid = std::this_thread::get_id();
+      auto start_time = gtl::FindOrNull(work_start_, tid);
+      DCHECK(start_time)
+          << "Encountered a stop event that was not preceded by a start event.";
+      if (start_time) {
+        processing_time_ += Env::Default()->NowNanos() - *start_time;
+        work_start_.erase(tid);
+      }
     }
-    if (name_ == "Shuffle") {
-      return Type::SHUFFLE;
+
+    // Removes an input.
+    void remove_input(std::shared_ptr<Node> input) LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      inputs_.remove(input);
     }
-    if (str_util::EndsWith(name_, "Skip")) {
-      return Type::SKIP;
+
+    // Set the node output.
+    void set_output(std::shared_ptr<Node> output) LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      output_ = output;
     }
-    if (str_util::EndsWith(name_, "Take")) {
-      return Type::TAKE;
+
+    // Collects tunable parameters in the subtree rooted in this node.
+    void CollectTunables(std::vector<std::shared_ptr<Tunable>>* tunables)
+        LOCKS_EXCLUDED(mu_);
+
+    // Returns the per-element output time for this node.
+    int64 OutputTime(std::vector<int64>* input_times) LOCKS_EXCLUDED(mu_) {
+      tf_shared_lock l(mu_);
+      return OutputTimeLocked(input_times);
     }
-    if (name_ == "Zip") {
-      return Type::ZIP;
+
+    // Returns the per-element processing time spent in the subtree rooted in
+    // this node.
+    int64 ProcessingTime() LOCKS_EXCLUDED(mu_) {
+      tf_shared_lock l(mu_);
+      return ProcessingTimeLocked();
     }
-    return Type::UNKNOWN;
-  }
 
-  mutex mu_;
-  const int64 id_;
-  Type type_ GUARDED_BY(mu_);
-  string name_ GUARDED_BY(mu_);
-  int64 processing_time_ GUARDED_BY(mu_) = 0;
-  int64 num_elements_ GUARDED_BY(mu_) = 0;
-  std::map<std::thread::id, int64> work_start_ GUARDED_BY(mu_);
-  std::map<string, int64> constant_params_ GUARDED_BY(mu_);
-  // Tunables are shared with the model during optimization.
-  std::map<string, std::shared_ptr<Tunable>> tunable_params_ GUARDED_BY(mu_);
-  std::list<std::shared_ptr<Node>> inputs_ GUARDED_BY(mu_);
-  std::shared_ptr<Node> output_ GUARDED_BY(mu_);
+   private:
+    enum class Type {
+      BATCH = 0,
+      CACHE,
+      CONCATENATE,
+      FILTER,
+      FLAT_MAP,
+      INTERLEAVE,
+      MAP,
+      MAP_AND_BATCH,
+      PADDED_BATCH,
+      PARALLEL_INTERLEAVE,
+      PARALLEL_INTERLEAVE_V2,
+      PARALLEL_MAP,
+      PREFETCH,
+      REPEAT,
+      SHUFFLE,
+      SKIP,
+      TAKE,
+      ZIP,
+      UNKNOWN,
+    };
+
+    // Gets a value of the given parameter (tunable or constant).
+    int64 GetParameterValue(const string& name) SHARED_LOCKS_REQUIRED(mu_);
+
+    // Returns the per-element processing time spent in this node.
+    int64 NanosPerElement() LOCKS_EXCLUDED(mu_) {
+      tf_shared_lock l(mu_);
+      return NanosPerElementLocked();
+    }
 
-  friend class Model;
-};
+    int64 NanosPerElementLocked() SHARED_LOCKS_REQUIRED(mu_) {
+      if (num_elements_ == 0) {
+        return 0;
+      }
+      return (int64)((double)processing_time_ / (double)num_elements_);
+    }
 
-// Abstract representation of a TensorFlow input pipeline that can be used
-// for collecting runtime information and optimizing performance. It collects
-// runtime information about execution of the input pipeline that is used to
-// create a performance model, which is in turn used to identify optimal values
-// of tunable parameters.
-//
-// Developers of tf.data transformations are not expected to interact with this
-// class directly. Boiler plate code for creating the abstract representation of
-// the input pipeline and collecting runtime information has been added to the
-// implementation of `DatasetBase` and `DatasetBaseIterator` respectively.
-class Model {
- public:
-  Model() = default;
+    int64 OutputTimeLocked(std::vector<int64>* input_times)
+        SHARED_LOCKS_REQUIRED(mu_);
 
-  // Returns the model output node.
-  std::shared_ptr<Node> output() LOCKS_EXCLUDED(mu_) {
-    tf_shared_lock l(mu_);
-    return output_;
-  }
+    int64 OutputTimeForInputs(std::vector<int64>* input_times)
+        SHARED_LOCKS_REQUIRED(mu_) {
+      int64 sum = 0;
+      for (auto input : inputs_) {
+        sum += input->OutputTime(input_times);
+      }
+      return sum;
+    }
 
-  // Adds a node with the given name and given output (identified by name).
-  std::shared_ptr<Node> AddNode(const string& name, const string& output_name)
-      LOCKS_EXCLUDED(mu_);
+    int64 ProcessingTimeLocked() SHARED_LOCKS_REQUIRED(mu_);
 
-  // Looks up the node using the given name.
-  std::shared_ptr<Node> LookupNode(const string& name) LOCKS_EXCLUDED(mu_);
+    // Returns the per-element processing time spent in the inputs of this node.
+    int64 ProcessingTimeForInputs() SHARED_LOCKS_REQUIRED(mu_) {
+      int64 sum = 0;
+      for (auto input : inputs_) {
+        sum += input->ProcessingTime();
+      }
+      return sum;
+    }
 
-  // Runs optimization.
-  void Optimize(int64 cpu_budget) LOCKS_EXCLUDED(mu_);
+    Type TypeFromName(const string& name) SHARED_LOCKS_REQUIRED(mu_) {
+      if (name_ == "Batch") {
+        return Type::BATCH;
+      }
+      if (str_util::EndsWith(name_, "Cache")) {
+        return Type::CACHE;
+      }
+      if (name_ == "Concatenate") {
+        return Type::CONCATENATE;
+      }
+      if (name_ == "Filter") {
+        return Type::FILTER;
+      }
+      if (name_ == "FlatMap") {
+        return Type::FLAT_MAP;
+      }
+      if (name_ == "Interleave") {
+        return Type::INTERLEAVE;
+      }
+      if (name_ == "Map") {
+        return Type::MAP;
+      }
+      if (name_ == "MapAndBatch") {
+        return Type::MAP_AND_BATCH;
+      }
+      if (name_ == "PaddedBatch") {
+        return Type::PADDED_BATCH;
+      }
+      if (name_ == "ParallelInterleave") {
+        return Type::PARALLEL_INTERLEAVE;
+      }
+      if (name_ == "ParallelInterleaveV2") {
+        return Type::PARALLEL_INTERLEAVE_V2;
+      }
+      if (name_ == "ParallelMap") {
+        return Type::PARALLEL_MAP;
+      }
+      if (name_ == "Prefetch") {
+        return Type::PREFETCH;
+      }
+      if (str_util::EndsWith(name_, "Repeat")) {
+        return Type::REPEAT;
+      }
+      if (name_ == "Shuffle") {
+        return Type::SHUFFLE;
+      }
+      if (str_util::EndsWith(name_, "Skip")) {
+        return Type::SKIP;
+      }
+      if (str_util::EndsWith(name_, "Take")) {
+        return Type::TAKE;
+      }
+      if (name_ == "Zip") {
+        return Type::ZIP;
+      }
+      return Type::UNKNOWN;
+    }
 
-  // Removes the node identified by the given name.
-  void RemoveNode(const string& prefix) LOCKS_EXCLUDED(mu_);
+    mutex mu_;
+    const int64 id_;
+    const string name_;
+    const Type type_;
+    int64 processing_time_ GUARDED_BY(mu_) = 0;
+    int64 num_elements_ GUARDED_BY(mu_) = 0;
+    std::map<std::thread::id, int64> work_start_ GUARDED_BY(mu_);
+    std::map<string, int64> constant_params_ GUARDED_BY(mu_);
+    // Tunables are shared with the model during optimization.
+    std::map<string, std::shared_ptr<Tunable>> tunable_params_ GUARDED_BY(mu_);
+    std::list<std::shared_ptr<Node>> inputs_ GUARDED_BY(mu_);
+    std::shared_ptr<Node> output_ GUARDED_BY(mu_);
+  };
 
- private:
   std::vector<std::shared_ptr<Node::Tunable>> CollectTunables()
-      EXCLUSIVE_LOCKS_REQUIRED(mu_);
+      SHARED_LOCKS_REQUIRED(mu_);
 
-  int64 OutputTime() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  int64 OutputTime() SHARED_LOCKS_REQUIRED(mu_);
 
-  int64 ProcessingTime() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  int64 ProcessingTime() SHARED_LOCKS_REQUIRED(mu_);
 
-  // Used for coordination between different input pipeline threads.
+  // Used for coordination between different input pipeline threads. Exclusive
+  // access is required only when adding or removing nodes. Concurrent access to
+  // existing nodes is protected by a node mutex.
   mutex mu_;
-  // Used for preventing iterator deletion when optimization is in progress
-  // because the optimization may try to update the values of tunable
-  // parameters.
-  mutex optimization_mu_ ACQUIRED_BEFORE(mu_);
   int64 id_counter_ GUARDED_BY(mu_) = 1;
   std::shared_ptr<Node> output_ GUARDED_BY(mu_);
   std::map<string, std::shared_ptr<Node>> lookup_table_ GUARDED_BY(mu_);
diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index b5f4072e89..0bb929b3ce 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -451,19 +451,15 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
   CancellationManager* c_mgr = new CancellationManager;
   f_opts.cancellation_manager = c_mgr;
   std::shared_ptr<SimpleStepStatsCollector> stats_collector;
-  std::shared_ptr<model::Node> node;
   if (ctx->model()) {
-    node = ctx->model()->LookupNode(prefix);
-    if (node) {
-      stats_collector = MakeUnique<SimpleStepStatsCollector>();
-    }
+    stats_collector = MakeUnique<SimpleStepStatsCollector>();
   }
   f_opts.stats_collector = stats_collector.get();
 
   auto callback = std::bind(
       [rets, step_container, c_mgr, frame](
           const FunctionLibraryRuntime::DoneCallback& done,
-          const std::shared_ptr<model::Node>& node,
+          const std::shared_ptr<model::Model>& model, const string& prefix,
           const std::shared_ptr<SimpleStepStatsCollector>& stats_collector,
           // Begin unbound arguments.
           Status s) {
@@ -473,16 +469,16 @@ void CapturedFunction::RunAsync(IteratorContext* ctx,
           s = frame->ConsumeRetvals(rets);
         }
         delete frame;
-        if (node) {
-          node->add_processing_time(stats_collector->processing_time());
-          node->start_work();
+        if (model) {
+          model->AddProcessingTime(prefix, stats_collector->processing_time());
+          model->RecordStart(prefix, false /* stop_output */);
         }
         done(s);
-        if (node) {
-          node->stop_work();
+        if (model) {
+          model->RecordStop(prefix, false /* start_output */);
         }
       },
-      std::move(done), std::move(node), std::move(stats_collector),
+      std::move(done), ctx->model(), prefix, std::move(stats_collector),
       std::placeholders::_1);
 
   ctx->lib()->Run(f_opts, handle, frame, std::move(callback));
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index fb022ddf12..2bbf4af664 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #define EIGEN_USE_THREADS
 
+#include <atomic>
 #include <utility>
 
 #include "tensorflow/core/common_runtime/function.h"
@@ -202,17 +203,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
         if (num_parallel_calls_ == kAutoTune) {
           num_parallel_calls_ = 1;
-          std::function<void(int64)> set_fn = [this](int64 value) {
-            {
-              mutex_lock l(mu_);
-              num_parallel_calls_ = value;
-              cond_var_.notify_all();
-            }
-            VLOG(2) << "setting parallelism knob to " << value;
-          };
-          AddTunableParameter(
-              ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */,
-              port::NumSchedulableCPUs() /* max */, std::move(set_fn));
+          AddTunableParameter(ctx, "parallelism",
+                              &num_parallel_calls_ /* value */, 1 /* min */,
+                              port::NumSchedulableCPUs() /* max */, &cond_var_);
         } else {
           AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
         }
@@ -230,9 +223,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EnsureRunnerThreadStarted(ctx);
           while (batch_results_.empty() ||
                  batch_results_.front()->num_calls > 0) {
-            StopWork(ctx);
+            RecordStop(ctx);
             cond_var_.wait(l);
-            StartWork(ctx);
+            RecordStart(ctx);
           }
           std::swap(result, batch_results_.front());
           batch_results_.pop_front();
@@ -340,9 +333,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(mu_) {
-          mutex_lock l(mu_);
-          num_calls_--;
-          result->num_calls--;
+        mutex_lock l(mu_);
+        num_calls_--;
+        result->num_calls--;
         cond_var_.notify_all();
       }
 
@@ -435,11 +428,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         result->output_allocated = true;
       }
 
-      int MaxBatchResults() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        return (num_parallel_calls_ + dataset()->batch_size_ - 1) /
-               dataset()->batch_size_;
-      }
-
       Status ProcessResult(IteratorContext* ctx,
                            const std::shared_ptr<BatchResult>& result,
                            std::vector<Tensor>* out_tensors,
@@ -488,34 +476,34 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void RunnerThread(const std::shared_ptr<IteratorContext>& ctx)
           LOCKS_EXCLUDED(mu_) {
         std::vector<std::pair<std::shared_ptr<BatchResult>, int64>> new_calls;
-        StartWork(ctx.get());
+        RecordStart(ctx.get());
         auto stop_cleanup =
-            gtl::MakeCleanup([this, &ctx]() { StopWork(ctx.get()); });
-        {
-          tf_shared_lock l(mu_);
-          new_calls.reserve(num_parallel_calls_);
-        }
+            gtl::MakeCleanup([this, &ctx]() { RecordStop(ctx.get()); });
+        new_calls.reserve(num_parallel_calls_);
+        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
+          int64 num_parallel_calls = num_parallel_calls_;
+          int64 max_batch_results =
+              (num_parallel_calls + dataset()->batch_size_ - 1) /
+              dataset()->batch_size_;
+          return num_calls_ >= num_parallel_calls ||
+                 (batch_results_.size() > max_batch_results ||
+                  (batch_results_.size() == max_batch_results &&
+                   call_counter_ % dataset()->batch_size_ == 0));
+        };
         while (true) {
           {
             mutex_lock l(mu_);
-            while (!cancelled_ &&
-                   (num_calls_ >= num_parallel_calls_ ||
-                    batch_results_.size() > MaxBatchResults() ||
-                    (batch_results_.size() == MaxBatchResults() &&
-                     call_counter_ % dataset()->batch_size_ == 0))) {
-              StopWork(ctx.get());
+            while (!cancelled_ && busy()) {
+              RecordStop(ctx.get());
               cond_var_.wait(l);
-              StartWork(ctx.get());
+              RecordStart(ctx.get());
             }
 
             if (cancelled_) {
               return;
             }
 
-            while (num_calls_ < num_parallel_calls_ &&
-                   (batch_results_.size() < MaxBatchResults() ||
-                    (batch_results_.size() == MaxBatchResults() &&
-                     call_counter_ % dataset()->batch_size_ != 0))) {
+            while (!busy()) {
               if (call_counter_ % dataset()->batch_size_ == 0) {
                 batch_results_.emplace_back(
                     new BatchResult(dataset()->batch_size_));
@@ -660,7 +648,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       // the `batch_results_` buffer.
       condition_variable cond_var_;
       // Identifies the maximum number of parallel calls.
-      int64 num_parallel_calls_ GUARDED_BY(mu_) = 0;
+      std::atomic<int64> num_parallel_calls_;
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(mu_) = 0;
       // Counts the total number of calls.
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 63025d3371..9aa505f4f1 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -77,6 +77,14 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           : DatasetIterator<Dataset>(params),
             model_(std::make_shared<model::Model>()) {}
 
+      ~Iterator() override {
+        // Signal the optimize thread to terminate it. We will then join that
+        // thread when we delete `this->optimize_thread_`.
+        mutex_lock l(mu_);
+        cancelled_ = true;
+        cond_var_.notify_all();
+      }
+
       Status Initialize(IteratorContext* ctx) override {
         IteratorContext ctx_with_model(CreateParams(ctx));
         return dataset()->input_->MakeIterator(&ctx_with_model, prefix(),
@@ -87,21 +95,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);
-        int64 now = ctx->env()->NowMicros() / EnvTime::kMillisToMicros;
-        if (last_optimization_ms_ + optimization_period_ms_ < now) {
-          model_->Optimize(port::NumSchedulableCPUs());
-          // Exponentially increase the period of running the optimization until
-          // a threshold is reached.
-          if (optimization_period_ms_ < kOptimizationPeriodThresholdMs) {
-            if (optimization_period_ms_ << 1 < kOptimizationPeriodThresholdMs) {
-              optimization_period_ms_ <<= 1;
-            } else {
-              optimization_period_ms_ = kOptimizationPeriodThresholdMs;
-            }
-          }
-          last_optimization_ms_ =
-              ctx->env()->NowMicros() / EnvTime::kMillisToMicros;
-        }
+        TF_RETURN_IF_ERROR(EnsureOptimizeThreadStarted(ctx));
         IteratorContext ctx_with_model(CreateParams(ctx));
         return input_impl_->GetNext(&ctx_with_model, out_tensors,
                                     end_of_sequence);
@@ -128,10 +122,53 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       }
 
      private:
+      Status EnsureOptimizeThreadStarted(IteratorContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        if (!optimize_thread_) {
+          std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
+          optimize_thread_.reset(ctx->env()->StartThread(
+              {}, "optimize_thread",
+              [this, new_ctx]() { OptimizeThread(new_ctx); }));
+        }
+        return Status::OK();
+      }
+
+      void OptimizeThread(const std::shared_ptr<IteratorContext>& ctx) {
+        int64 last_optimization_ms = 0;
+        int64 optimization_period_ms = 10;
+        while (true) {
+          {
+            mutex_lock l(mu_);
+            while (!cancelled_ &&
+                   last_optimization_ms + optimization_period_ms >=
+                       ctx->env()->NowMicros() / EnvTime::kMillisToMicros) {
+              cond_var_.wait_for(
+                  l, std::chrono::milliseconds(
+                         last_optimization_ms + optimization_period_ms -
+                         ctx->env()->NowMicros() / EnvTime::kMillisToMicros));
+            }
+            if (cancelled_) return;
+          }
+          model_->Optimize(port::NumSchedulableCPUs());
+          // Exponentially increase the period of running the optimization
+          // until a threshold is reached.
+          if (optimization_period_ms < kOptimizationPeriodThresholdMs) {
+            if (optimization_period_ms << 1 < kOptimizationPeriodThresholdMs) {
+              optimization_period_ms <<= 1;
+            } else {
+              optimization_period_ms = kOptimizationPeriodThresholdMs;
+            }
+          }
+          last_optimization_ms =
+              ctx->env()->NowMicros() / EnvTime::kMillisToMicros;
+        }
+      }
+
       mutex mu_;
+      condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      int64 last_optimization_ms_ GUARDED_BY(mu_) = 0;
-      int64 optimization_period_ms_ GUARDED_BY(mu_) = 10;
+      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
+      bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 3dac7902f0..2e6e0465f7 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <atomic>
 #include <deque>
 #include <utility>
 
@@ -344,13 +345,13 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
 
           if (must_wait_for_input) {
             // Wait for elements to become available.
-            StopWork(ctx);
+            RecordStop(ctx);
             if (dataset()->sloppy_) {
               sloppy_cond_var_.wait(l);
             } else {
               workers_[interleave_indices_[next_index_]].cond_var.wait(l);
             }
-            StartWork(ctx);
+            RecordStart(ctx);
           }
         }
         return errors::Cancelled(
@@ -618,11 +619,11 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
 
         // std::function arguments are copy-constructable, so we pass raw
         // pointers, and then immediately wrap them to ensure correct ownership.
-        StartWork(ctx.get());
+        RecordStart(ctx.get());
         auto cleanup = gtl::MakeCleanup([this, thread_index, ctx] {
           mutex_lock l(mu_);
           workers_[thread_index].cond_var.notify_all();
-          StopWork(ctx.get());
+          RecordStop(ctx.get());
         });
         bool make_new_iterator;
         {
@@ -660,9 +661,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             if (read_new_input) {
               mutex_lock l(mu_);
               while (!cancelled_ && !workers_[thread_index].is_producing) {
-                StopWork(ctx.get());
+                RecordStop(ctx.get());
                 workers_[thread_index].cond_var.wait(l);
-                StartWork(ctx.get());
+                RecordStart(ctx.get());
               }
               if (cancelled_) return;
               // Copy the input tensors so that we do not need to block on `mu_`
@@ -712,9 +713,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             // Wait for space in the prefetch queue.
             while (!cancelled_ && workers_[thread_index].outputs.size() ==
                                       dataset()->buffer_output_elements_) {
-              StopWork(ctx.get());
+              RecordStop(ctx.get());
               workers_[thread_index].cond_var.wait(l);
-              StartWork(ctx.get());
+              RecordStart(ctx.get());
             }
             if (cancelled_) return;
             tf_shared_lock ckpt_l(ckpt_mu_);
@@ -763,9 +764,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
                 // Wait for space in the prefetch queue.
                 while (!cancelled_ && workers_[thread_index].outputs.size() ==
                                           dataset()->buffer_output_elements_) {
-                  StopWork(ctx.get());
+                  RecordStop(ctx.get());
                   workers_[thread_index].cond_var.wait(l);
-                  StartWork(ctx.get());
+                  RecordStart(ctx.get());
                 }
                 if (cancelled_) return;
 
@@ -1213,10 +1214,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
+            num_parallel_calls_(params.dataset->num_parallel_calls_),
             args_list_(params.dataset->cycle_length_),
             current_elements_(params.dataset->cycle_length_),
             element_in_use_(params.dataset->cycle_length_, false),
-            num_parallel_calls_(params.dataset->num_parallel_calls_),
             thread_pool_(new thread::ThreadPool(
                 Env::Default(), ThreadOptions(), "parallel_interleave",
                 dataset()->cycle_length_ /* num_threads */,
@@ -1237,17 +1238,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         mutex_lock l(mu_);
         if (num_parallel_calls_ == kAutoTune) {
           num_parallel_calls_ = 1;
-          auto set_fn = [this](int64 value) {
-            {
-              mutex_lock l(mu_);
-              num_parallel_calls_ = value;
-              cond_var_.notify_all();
-            }
-            VLOG(2) << "setting parallelism knob to " << value;
-          };
-          AddTunableParameter(
-              ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */,
-              dataset()->cycle_length_ /* max */, std::move(set_fn));
+          AddTunableParameter(ctx, "parallelism",
+                              &num_parallel_calls_ /* value */, 1 /* min */,
+                              dataset()->cycle_length_ /* max */, &cond_var_);
         } else {
           AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
         }
@@ -1267,9 +1260,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
             EnsureRunnerThreadStarted(ctx);
             while (invocation_results_.empty() &&
                    (!end_of_input_ || num_open_ > 0)) {
-              StopWork(ctx);
+              RecordStop(ctx);
               cond_var_.wait(l);
-              StartWork(ctx);
+              RecordStart(ctx);
             }
             if (!invocation_results_.empty()) {
               std::swap(result, invocation_results_.front());
@@ -1280,9 +1273,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
             }
             cond_var_.notify_all();
           }
-          StopWork(ctx);
+          RecordStop(ctx);
           result->notification.WaitForNotification();
-          StartWork(ctx);
+          RecordStart(ctx);
         } while (result->skip);
 
         if (result->status.ok()) {
@@ -1406,8 +1399,8 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           const std::shared_ptr<IteratorContext>& ctx, int64 cycle_index,
           const std::vector<std::shared_ptr<InvocationResult>>& results)
           LOCKS_EXCLUDED(mu_) {
-        StartWork(ctx.get());
-        auto cleanup = gtl::MakeCleanup([this, ctx] { StopWork(ctx.get()); });
+        RecordStart(ctx.get());
+        auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
         bool end_of_input = false;
         for (auto& result : results) {
           if (!end_of_input) {
@@ -1438,40 +1431,36 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         cond_var_.notify_all();
       }
 
-      int64 MaxInvocationResults() {
-        return dataset()->cycle_length_ * dataset()->block_length_;
-      }
-
       // Method responsible for 1) creating iterators out of input elements, 2)
       // determining the order in which elements are fetched from the iterators,
       // and 3) scheduling the fetching of the elements to a threadpool.
       //
       // This method runs in the `runner_thread` background thread.
       void RunnerThread(const std::shared_ptr<IteratorContext>& ctx) {
-        StartWork(ctx.get());
-        auto cleanup = gtl::MakeCleanup([this, ctx] { StopWork(ctx.get()); });
+        RecordStart(ctx.get());
+        auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
+        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
+          return element_in_use_[cycle_index_] ||
+                 num_calls_ >= num_parallel_calls_ ||
+                 invocation_results_.size() >=
+                     dataset()->cycle_length_ * dataset()->block_length_;
+        };
         while (true) {
           mutex_lock l(mu_);
           // Wait until this thread is cancelled, the end of input has been
           // reached, or the cycle element at the `cycle_index_` position is
           // not in use and there is space in the `invocation_results_` queue.
-          while (!cancelled_ && (!end_of_input_ || num_open_ > 0) &&
-                 (element_in_use_[cycle_index_] ||
-                  num_calls_ >= num_parallel_calls_ ||
-                  invocation_results_.size() >= MaxInvocationResults())) {
-            StopWork(ctx.get());
+          while (!cancelled_ && (!end_of_input_ || num_open_ > 0) && busy()) {
+            RecordStop(ctx.get());
             cond_var_.wait(l);
-            StartWork(ctx.get());
+            RecordStart(ctx.get());
           }
 
           if (cancelled_ || (end_of_input_ && num_open_ == 0)) {
             return;
           }
 
-          while (!element_in_use_[cycle_index_] &&
-                 (!end_of_input_ || num_open_ > 0) &&
-                 num_calls_ < num_parallel_calls_ &&
-                 invocation_results_.size() < MaxInvocationResults()) {
+          while ((!end_of_input_ || num_open_ > 0) && !busy()) {
             if (!current_elements_[cycle_index_]) {
               // Try to create a new iterator from the next input element.
               Status status = input_impl_->GetNext(
@@ -1618,6 +1607,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       // and there are elements left to be fetched.
       condition_variable cond_var_;
 
+      // Identifies the maximum number of parallel calls.
+      std::atomic<int64> num_parallel_calls_;
+
       // Iterator for input elements.
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
 
@@ -1644,9 +1636,6 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       // Identifies the number of open iterators.
       int64 num_open_ GUARDED_BY(mu_) = 0;
 
-      // Identifies the maximum number of parallel calls.
-      int64 num_parallel_calls_ GUARDED_BY(mu_) = 0;
-
       // Identifies the number of outstanding calls.
       int64 num_calls_ GUARDED_BY(mu_) = 0;
 
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 20ac518f37..ee20249bfe 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/kernels/data/parallel_map_iterator.h"
 
+#include <atomic>
 #include <deque>
 #include <functional>
 #include <utility>
@@ -40,11 +41,6 @@ class ParallelMapIterator : public DatasetBaseIterator {
         num_parallel_calls_(num_parallel_calls) {}
 
   ~ParallelMapIterator() override {
-    // TODO(mrry): Replace this cancellation logic with a
-    // CancellationManager. The syntax would be more heavyweight,
-    // but it would be possible to thread a cancellation manager
-    // through the IteratorContext to upstream,
-    // potentially-blocking iterators, when we add these.
     mutex_lock l(mu_);
     // Cancel the runner thread.
     cancelled_ = true;
@@ -59,19 +55,11 @@ class ParallelMapIterator : public DatasetBaseIterator {
     mutex_lock l(mu_);
     if (num_parallel_calls_ == kAutoTune) {
       num_parallel_calls_ = 1;
-      auto set_fn = [this](int64 value) {
-        {
-          mutex_lock l(mu_);
-          num_parallel_calls_ = value;
-          cond_var_.notify_all();
-        }
-        VLOG(2) << "setting parallelism knob to " << value;
-      };
       // TODO(jsimsa): Surface the number of threads used by `ctx->runner()` and
       // use it here for the maximum.
-      AddTunableParameter(ctx, "parallelism", num_parallel_calls_ /* value */,
+      AddTunableParameter(ctx, "parallelism", &num_parallel_calls_ /* value */,
                           1 /* min */, port::NumSchedulableCPUs() /* max */,
-                          std::move(set_fn));
+                          &cond_var_);
     } else {
       AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
     }
@@ -90,17 +78,17 @@ class ParallelMapIterator : public DatasetBaseIterator {
       mutex_lock l(mu_);
       EnsureRunnerThreadStarted(ctx);
       while (invocation_results_.empty()) {
-        StopWork(ctx);
+        RecordStop(ctx);
         cond_var_.wait(l);
-        StartWork(ctx);
+        RecordStart(ctx);
       }
       std::swap(result, invocation_results_.front());
       invocation_results_.pop_front();
       cond_var_.notify_all();
     }
-    StopWork(ctx);
+    RecordStop(ctx);
     result->notification.WaitForNotification();
-    StartWork(ctx);
+    RecordStart(ctx);
     return ProcessResult(result, out_tensors, end_of_sequence);
   }
 
@@ -218,9 +206,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return;
     }
 
-    // Call `func_(input_element)`, store the result in
-    // `result->return_values`, and notify `result->notification` to unblock
-    // a consumer.
+    // Call `func_(input_element)`, store the result in `result->return_values`,
+    // and notify `result->notification` to unblock a consumer.
     auto done = [this, result](Status status) {
       result->status.Update(status);
       CallCompleted(result);
@@ -249,28 +236,27 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   void RunnerThread(const std::shared_ptr<IteratorContext>& ctx) {
-    StartWork(ctx.get());
-    auto cleanup = gtl::MakeCleanup([this, ctx] { StopWork(ctx.get()); });
+    RecordStart(ctx.get());
+    auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
     std::vector<std::shared_ptr<InvocationResult>> new_calls;
-    {
-      tf_shared_lock l(mu_);
-      new_calls.reserve(num_parallel_calls_);
-    }
+    new_calls.reserve(num_parallel_calls_);
+    auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
+      int64 num_parallel_calls = num_parallel_calls_;
+      return num_calls_ >= num_parallel_calls ||
+             invocation_results_.size() >= num_parallel_calls;
+    };
     while (true) {
       {
         mutex_lock l(mu_);
-        while (!cancelled_ &&
-               (num_calls_ >= num_parallel_calls_ ||
-                invocation_results_.size() >= num_parallel_calls_)) {
-          StopWork(ctx.get());
+        while (!cancelled_ && busy()) {
+          RecordStop(ctx.get());
           cond_var_.wait(l);
-          StartWork(ctx.get());
+          RecordStart(ctx.get());
         }
         if (cancelled_) {
           return;
         }
-        while (num_calls_ < num_parallel_calls_ &&
-               invocation_results_.size() < num_parallel_calls_) {
+        while (!busy()) {
           invocation_results_.emplace_back(new InvocationResult());
           new_calls.push_back(invocation_results_.back());
           num_calls_++;
@@ -334,7 +320,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // buffer.
   condition_variable cond_var_;
   // Identifies the maximum number of parallel calls.
-  int64 num_parallel_calls_ GUARDED_BY(mu_) = 0;
+  std::atomic<int64> num_parallel_calls_;
   // Counts the number of outstanding calls.
   int64 num_calls_ GUARDED_BY(mu_) = 0;
   std::unique_ptr<IteratorBase> input_impl_;
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 52c421caee..2a1e9c85f1 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -112,9 +112,9 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
         while (!cancelled_ && buffer_.empty() && !prefetch_thread_finished_ &&
                auto_tuner_.buffer_limit() != 0) {
           auto_tuner_.RecordEmpty();
-          StopWork(ctx);
+          RecordStop(ctx);
           cond_var_.wait(l);
-          StartWork(ctx);
+          RecordStart(ctx);
         }
 
         if (cancelled_) {
@@ -255,8 +255,8 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     //
     // It owns the iterator context passed to it.
     void PrefetchThread(const std::shared_ptr<IteratorContext>& ctx) {
-      StartWork(ctx.get());
-      auto cleanup = gtl::MakeCleanup([this, ctx] { StopWork(ctx.get()); });
+      RecordStart(ctx.get());
+      auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
       while (true) {
         std::vector<Tensor> value;
 
@@ -264,9 +264,9 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
         {
           mutex_lock l(mu_);
           while (!cancelled_ && buffer_.size() >= auto_tuner_.buffer_limit()) {
-            StopWork(ctx.get());
+            RecordStop(ctx.get());
             cond_var_.wait(l);
-            StartWork(ctx.get());
+            RecordStart(ctx.get());
           }
 
           if (cancelled_) {
-- 
GitLab


From e03bb6fe224e16e6ea9bcfcdf47166681e5f725d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 17:22:47 -0700
Subject: [PATCH 0472/1357] Fixes regression to tf.Print that removed square
 braces around printed tensors.

PiperOrigin-RevId: 213912507
---
 tensorflow/core/kernels/logging_ops.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index 8bafd5739d..1ded012f3c 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -75,7 +75,8 @@ class PrintOp : public OpKernel {
     string msg;
     strings::StrAppend(&msg, message_);
     for (int i = 1; i < ctx->num_inputs(); ++i) {
-      strings::StrAppend(&msg, ctx->input(i).SummarizeValue(summarize_));
+      strings::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_),
+                         "]");
     }
     std::cerr << msg << std::endl;
   }
-- 
GitLab


From 97c64ea8501634866aaa9e8a5c6a861b04293c1b Mon Sep 17 00:00:00 2001
From: Youlong Cheng <ylc@google.com>
Date: Thu, 20 Sep 2018 17:26:56 -0700
Subject: [PATCH 0473/1357]  Support 16 ways model parallelism.

PiperOrigin-RevId: 213913013
---
 tensorflow/contrib/tpu/python/tpu/tpu_config.py   |  7 +++----
 .../contrib/tpu/python/tpu/tpu_config_test.py     |  2 +-
 tensorflow/contrib/tpu/python/tpu/tpu_context.py  | 15 ++++++++++++++-
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py
index 18e0abdda2..9f8d147068 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py
@@ -32,7 +32,6 @@ from tensorflow.python.platform import tf_logging as logging
 _TF_CONFIG_ENV = run_config_lib._TF_CONFIG_ENV
 _SERVICE_KEY = run_config_lib._SERVICE_KEY
 _TPU_WORKER_JOB_NAME = 'tpu_worker_job_name'
-_NUM_CORES_PER_HOST = 8
 # pylint: enable=protected-access
 
 
@@ -103,7 +102,7 @@ class TPUConfig(
       input mode.
 
     Raises:
-      ValueError: If `num_cores_per_replica` is not 1, 2, 4 or 8.
+      ValueError: If `num_cores_per_replica` is not 1, 2, 4, 8 or 16.
   """
 
   def __new__(cls,
@@ -139,9 +138,9 @@ class TPUConfig(
 
     # Check num_cores_per_replica
     if num_cores_per_replica is not None:
-      if num_cores_per_replica not in [1, 2, 4, 8]:
+      if num_cores_per_replica not in [1, 2, 4, 8, 16]:
         raise ValueError(
-            'num_cores_per_replica must be 1, 2, 4, or 8; got {}'.format(
+            'num_cores_per_replica must be 1, 2, 4, 8, or 16; got {}'.format(
                 str(num_cores_per_replica)))
 
     # per_host_input_for_training may be True, False, or integer in [1..3].
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py
index 2326fe97a8..b2fe0a6888 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py
@@ -86,7 +86,7 @@ class TPURunConfigTest(test.TestCase):
 
   def test_fail_with_invalid_num_cores_per_replica(self):
     with self.assertRaisesRegexp(
-        ValueError, 'num_cores_per_replica must be 1, 2, 4, or 8;'
+        ValueError, 'num_cores_per_replica must be 1, 2, 4, 8, or 16;'
         ' got 7'):
       tpu_config_lib.TPUConfig(num_cores_per_replica=7)
 
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index ac76712aeb..3b45bbe75a 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -35,7 +35,8 @@ _NUM_CORES_TO_COMPUTATION_SHAPE = {
     1: [1, 1, 1],
     2: [1, 1, 2],
     4: [1, 2, 2],
-    8: [2, 2, 2]
+    8: [2, 2, 2],
+    16: [4, 2, 2],
 }
 
 
@@ -298,6 +299,7 @@ class _InternalTPUContext(object):
 
   @property
   def num_of_replicas_per_host(self):
+    """Return the number of replicas per host."""
     if self.model_parallelism_enabled:
       return self.num_replicas // self.num_hosts
     else:
@@ -580,6 +582,17 @@ class _InternalTPUContext(object):
 
         raise ValueError(message)
 
+    if self._config.tpu_config.num_cores_per_replica:
+      num_cores_per_replica = self._config.tpu_config.num_cores_per_replica
+      num_cores_per_host = self._get_tpu_system_metadata().num_of_cores_per_host
+      if num_cores_per_replica > num_cores_per_host:
+        raise ValueError(
+            'The num of cores required by the model parallelism, specified by '
+            'TPUConfig.num_cores_per_replica, is larger than the '
+            'num_cores_per_host. num_cores_per_replica: {}, '
+            'num_cores_per_host: {}'.format(num_cores_per_replica,
+                                            num_cores_per_host))
+
     if mode == model_fn_lib.ModeKeys.TRAIN:
       if (self._train_batch_size % num_replicas != 0 and
           not self.is_input_broadcast_with_iterators()):
-- 
GitLab


From 9c81bb7308b0cbec609aa0d9c7a4bed7c4545111 Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Thu, 20 Sep 2018 17:48:09 -0700
Subject: [PATCH 0474/1357] Updating doc references to tf.distributions to
 tfp.distributions.

PiperOrigin-RevId: 213915666
---
 tensorflow/contrib/gan/python/losses/python/losses_impl.py | 6 +++---
 tensorflow/contrib/gan/python/namedtuples.py               | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl.py b/tensorflow/contrib/gan/python/losses/python/losses_impl.py
index d389748374..8bc4db8424 100644
--- a/tensorflow/contrib/gan/python/losses/python/losses_impl.py
+++ b/tensorflow/contrib/gan/python/losses/python/losses_impl.py
@@ -773,9 +773,9 @@ def mutual_information_penalty(
     structured_generator_inputs: A list of Tensors representing the random noise
       that must  have high mutual information with the generator output. List
       length should match `predicted_distributions`.
-    predicted_distributions: A list of tf.Distributions. Predicted by the
-      recognizer, and used to evaluate the likelihood of the structured noise.
-      List length should match `structured_generator_inputs`.
+    predicted_distributions: A list of `tfp.distributions.Distribution`s.
+      Predicted by the recognizer, and used to evaluate the likelihood of the
+      structured noise. List length should match `structured_generator_inputs`.
     weights: Optional `Tensor` whose rank is either 0, or the same dimensions as
       `structured_generator_inputs`.
     scope: The scope for the operations performed in computing the loss.
diff --git a/tensorflow/contrib/gan/python/namedtuples.py b/tensorflow/contrib/gan/python/namedtuples.py
index a462b68e28..b9ac1bf151 100644
--- a/tensorflow/contrib/gan/python/namedtuples.py
+++ b/tensorflow/contrib/gan/python/namedtuples.py
@@ -91,9 +91,9 @@ class InfoGANModel(
     structured_generator_inputs: A list of Tensors representing the random noise
       that must  have high mutual information with the generator output. List
       length should match `predicted_distributions`.
-    predicted_distributions: A list of tf.Distributions. Predicted by the
-      recognizer, and used to evaluate the likelihood of the structured noise.
-      List length should match `structured_generator_inputs`.
+    predicted_distributions: A list of `tfp.distributions.Distribution`s.
+      Predicted by the recognizer, and used to evaluate the likelihood of the
+      structured noise. List length should match `structured_generator_inputs`.
     discriminator_and_aux_fn: The original discriminator function that returns
       a tuple of (logits, `predicted_distributions`).
   """
-- 
GitLab


From 7d18cafbe79aecb0f5d703270771b9207866d886 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Thu, 20 Sep 2018 18:07:08 -0700
Subject: [PATCH 0475/1357] Update links to tf lite site.

PiperOrigin-RevId: 213917881
---
 tensorflow/contrib/lite/README.md                           | 4 ++--
 tensorflow/contrib/lite/examples/android/app/README.md      | 2 +-
 tensorflow/contrib/lite/java/ovic/README.md                 | 2 +-
 tensorflow/contrib/lite/tutorials/post_training_quant.ipynb | 2 +-
 tensorflow/contrib/quantization/README.md                   | 2 +-
 tensorflow/contrib/quantize/README.md                       | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index a676b705f1..a4b3d83efe 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -4,5 +4,5 @@ TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded
 devices. It enables low-latency inference of on-device machine learning models
 with a small binary size and fast performance supporting hardware acceleration.
 
-See the documentation: https://www.tensorflow.org/mobile/tflite/
-Documentation edits can be made here: [tensorflow/docs_src/mobile/tflite](../../docs_src/mobile/tflite)
+See the documentation: https://www.tensorflow.org/lite/
+Documentation edits can be made here: [tensorflow/contrib/lite/g3doc](./g3doc/)
diff --git a/tensorflow/contrib/lite/examples/android/app/README.md b/tensorflow/contrib/lite/examples/android/app/README.md
index cbdeeac879..dc31171672 100644
--- a/tensorflow/contrib/lite/examples/android/app/README.md
+++ b/tensorflow/contrib/lite/examples/android/app/README.md
@@ -2,7 +2,7 @@
 
 ## Building from Source with Bazel
 
-1. Install [Bazel](https://docs.bazel.build/versions/master/install.html), the Android NDK and SDK. The recommended versions are specified on this [webpage](https://www.tensorflow.org/mobile/tflite/demo_android#build_tensorflow_lite_and_the_demo_app_from_source).
+1. Install [Bazel](https://docs.bazel.build/versions/master/install.html), the Android NDK and SDK. The recommended versions are specified on this [webpage](https://www.tensorflow.org/lite/demo_android).
 
 2. Build this demo app with Bazel. The demo needs C++11. We configure the fat_apk_cpu flag to package support for 4 hardware variants. You may replace it with --config=android_arm64 on a 64-bit device and --config=android_arm for 32-bit device:
 
diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md
index 26349347fa..df77bfaab3 100644
--- a/tensorflow/contrib/lite/java/ovic/README.md
+++ b/tensorflow/contrib/lite/java/ovic/README.md
@@ -4,7 +4,7 @@ This folder contains building code for track one of the [Low Power ImageNet Reco
 
 ## Pre-requisite
 
-Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK.
+Follow the steps [here](https://www.tensorflow.org/lite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK.
 
 ## Test the benchmarker:
 
diff --git a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
index 82abbc1532..80cdb2f080 100644
--- a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
+++ b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
@@ -36,7 +36,7 @@
       "source": [
         "## Overview\n",
         "\n",
-        "[TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) now supports\n",
+        "[TensorFlow Lite](https://www.tensorflow.org/lite/) now supports\n",
         "converting weights to 8 bit precision as part of model conversion from\n",
         "tensorflow graphdefs to TFLite's flat buffer format. Weight quantization\n",
         "achieves a 4x reduction in the model size. In addition, TFLite supports on the\n",
diff --git a/tensorflow/contrib/quantization/README.md b/tensorflow/contrib/quantization/README.md
index 359950aaf3..826e8db2d3 100644
--- a/tensorflow/contrib/quantization/README.md
+++ b/tensorflow/contrib/quantization/README.md
@@ -2,6 +2,6 @@ The contrib/quantization package exposes a few TensorFlow quantization operation
 
 If you are looking for quantized training rewrites that allow for training
 quantized models that work with
-[TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/), you should look at
+[TensorFlow Lite](https://www.tensorflow.org/lite/), you should look at
 the [contrib/quantize](https://www.tensorflow.org/api_docs/python/tf/contrib/quantize)
 package.
diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md
index 3f1e7d2792..0ab19c91bb 100644
--- a/tensorflow/contrib/quantize/README.md
+++ b/tensorflow/contrib/quantize/README.md
@@ -105,7 +105,7 @@ toco \
   --std_value=127.5 --mean_value=127.5
 ```
 
-See the documentation for `tf.contrib.quantize` and [TensorFlow Lite](../mobile/tflite/).
+See the documentation for `tf.contrib.quantize` and [TensorFlow Lite](../lite/).
 
 
 ## Quantized accuracy results
-- 
GitLab


From 684b3e02e098cb6fda5569fb7f7990ff57248e5a Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Thu, 20 Sep 2018 18:07:30 -0700
Subject: [PATCH 0476/1357] Update links to install pages.

PiperOrigin-RevId: 213917946
---
 RELEASE.md                                              | 2 +-
 tensorflow/contrib/cmake/README.md                      | 2 +-
 tensorflow/contrib/lite/g3doc/tfmobile/android_build.md | 2 +-
 tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md  | 2 +-
 tensorflow/contrib/lite/toco/g3doc/python_api.md        | 2 +-
 tensorflow/contrib/tensorrt/README.md                   | 2 +-
 tensorflow/go/README.md                                 | 6 ++----
 tensorflow/java/README.md                               | 7 +++----
 tensorflow/python/pywrap_tensorflow.py                  | 2 +-
 tensorflow/tools/ci_build/README.md                     | 2 +-
 tensorflow/tools/dist_test/README.md                    | 2 +-
 11 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 2f26623373..20e1d9217b 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -94,7 +94,7 @@ Ricardo Perez-Lopez, 张天启, 张晓飞
 
 ## Breaking Changes
 
-* Prebuilt binaries are now (as of TensorFlow 1.10) built against NCCL 2.2 and no longer include NCCL in the binary install. TensorFlow usage with multiple GPUs and NCCL requires upgrade to [NCCL 2.2](https://developer.nvidia.com/nccl). See updated install guides: [Installing TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux#tensorflow_gpu_support) and [Install TensorFlow from Sources](https://www.tensorflow.org/install/install_sources#optional_install_tensorflow_for_gpu_prerequisites).
+* Prebuilt binaries are now (as of TensorFlow 1.10) built against NCCL 2.2 and no longer include NCCL in the binary install. TensorFlow usage with multiple GPUs and NCCL requires upgrade to [NCCL 2.2](https://developer.nvidia.com/nccl). See updated install guides: [TensorFlow GPU support](https://www.tensorflow.org/install/gpu) and [Build TensorFlow from source](https://www.tensorflow.org/install/source).
 * Starting from TensorFlow 1.11, Windows builds will use Bazel. Therefore, we will drop official support for cmake.
 
 ## Bug Fixes and Other Changes
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 789dab81ed..77242b34fd 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -17,7 +17,7 @@ Linux.
 Current Status
 --------------
 
-CMake can be used to build TensorFlow on Windows. See the [getting started documentation](https://www.tensorflow.org/install/install_windows)
+CMake can be used to build TensorFlow on Windows. See the [getting started documentation](https://www.tensorflow.org/install/source_windows)
 for instructions on how to install a pre-built TensorFlow package on Windows.
 
 ### Current known limitations
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
index c7cdee07de..b0f32a8d6c 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
@@ -93,7 +93,7 @@ requires some knowledge of build systems and Android developer tools, but we'll
 guide you through the basics here.
 
 - First, follow our instructions for
-  <a href="http://www.tensorflow.org/install/install_sources">installing from sources</a>.
+  <a href="http://www.tensorflow.org/install/source">installing from sources</a>.
   This will also guide you through installing Bazel and cloning the
   TensorFlow code.
 
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
index 84680b968e..aba7536cbd 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
@@ -38,7 +38,7 @@ There are two approaches to running TOCO via command line.
     examples below use `tflite_convert` for simplicity.
     *   Example: `tflite_convert --output_file=...`
 *   `bazel`: In order to run the latest version of TOCO, [clone the TensorFlow
-    repository](https://www.tensorflow.org/install/install_sources#clone_the_tensorflow_repository)
+    repository](https://www.tensorflow.org/install/source)
     and use `bazel`. This is the recommended approach for converting models that
     utilize new features that were not supported by TOCO in TensorFlow 1.9.
     *   Example: `bazel run
diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md
index 51f808d4f0..910fa4c8de 100644
--- a/tensorflow/contrib/lite/toco/g3doc/python_api.md
+++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md
@@ -260,7 +260,7 @@ interpreter.allocate_tensors()
 In order to run the latest version of the TOCO Python API, clone the TensorFlow
 repository, configure the installation, and build and install the pip package.
 Detailed instructions are available
-[here](https://www.tensorflow.org/install/install_sources).
+[here](https://www.tensorflow.org/install/source).
 
 ### Converting models prior to TensorFlow 1.9. <a name="pre-tensorflow-1.9"></a>
 
diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md
index 687dee07e1..caf8b6db0d 100644
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@@ -26,4 +26,4 @@ available. An example use can be found in test/test_tftrt.py script
 In order to make use of TensorRT integration, you will need a local installation
 of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt).
 Installation instructions for compatibility with TensorFlow are provided on the
-[TensorFlow Installation page](https://www.tensorflow.org/install/install_linux#nvidia_requirements_to_run_tensorflow_with_gpu_support).
+[TensorFlow GPU support](https://www.tensorflow.org/install/gpu) guide.
diff --git a/tensorflow/go/README.md b/tensorflow/go/README.md
index 288a32530a..3989f9b25a 100644
--- a/tensorflow/go/README.md
+++ b/tensorflow/go/README.md
@@ -10,7 +10,7 @@ Construct and execute TensorFlow graphs in Go.
 
 ## Quickstart
 
-Refer to [Installing TensorFlow for Go](https://www.tensorflow.org/install/install_go)
+Refer to [Installing TensorFlow for Go](https://www.tensorflow.org/install/lang_go)
 
 ## Building the TensorFlow C library from source
 
@@ -23,9 +23,7 @@ from source.
 
 -   [bazel](https://www.bazel.build/versions/master/docs/install.html)
 -   Environment to build TensorFlow from source code
-    ([Linux](https://www.tensorflow.org/install/install_sources#PrepareLinux)
-    or [OS
-    X](https://www.tensorflow.org/install/install_sources#PrepareMac)).
+    ([Linux of macOS](https://www.tensorflow.org/install/source)).
     If you don't need GPU support, then try the following:
 
     ```sh
diff --git a/tensorflow/java/README.md b/tensorflow/java/README.md
index c7382ff231..7ef862ae79 100644
--- a/tensorflow/java/README.md
+++ b/tensorflow/java/README.md
@@ -10,7 +10,7 @@
 
 ## Quickstart
 
--   Refer to [Installing TensorFlow for Java](https://www.tensorflow.org/install/install_java)
+-   Refer to [Installing TensorFlow for Java](https://www.tensorflow.org/install/lang_java)
 -   [Javadoc](https://www.tensorflow.org/api_docs/java/reference/org/tensorflow/package-summary)
 -   [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.tensorflow/tensorflow/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.tensorflow/tensorflow)
 
@@ -22,8 +22,7 @@ native libraries will need to be built from source.
 1.  Install [bazel](https://www.bazel.build/versions/master/docs/install.html)
 
 2.  Setup the environment to build TensorFlow from source code
-    ([Linux](https://www.tensorflow.org/install/install_sources#PrepareLinux)
-    or [macOS](https://www.tensorflow.org/install/install_sources#PrepareMac)).
+    ([Linux or macOS](https://www.tensorflow.org/install/source)).
     If you'd like to skip reading those details and do not care about GPU
     support, try the following:
 
@@ -35,7 +34,7 @@ native libraries will need to be built from source.
     brew install swig
     ```
 
-3.  [Configure](https://www.tensorflow.org/install/install_sources#configure_the_installation)
+3.  [Configure](https://www.tensorflow.org/install/source)
     (e.g., enable GPU support) and build:
 
     ```sh
diff --git a/tensorflow/python/pywrap_tensorflow.py b/tensorflow/python/pywrap_tensorflow.py
index 5c0c5783dc..f0724277d3 100644
--- a/tensorflow/python/pywrap_tensorflow.py
+++ b/tensorflow/python/pywrap_tensorflow.py
@@ -68,7 +68,7 @@ try:
     sys.setdlopenflags(_default_dlopen_flags)
 except ImportError:
   msg = """%s\n\nFailed to load the native TensorFlow runtime.\n
-See https://www.tensorflow.org/install/install_sources#common_installation_problems\n
+See https://www.tensorflow.org/install/errors\n
 for some common reasons and solutions.  Include the entire stack trace
 above this error message when asking for help.""" % traceback.format_exc()
   raise ImportError(msg)
diff --git a/tensorflow/tools/ci_build/README.md b/tensorflow/tools/ci_build/README.md
index f2161b700a..e2fd977f50 100644
--- a/tensorflow/tools/ci_build/README.md
+++ b/tensorflow/tools/ci_build/README.md
@@ -24,7 +24,7 @@ natively on your system.
 
 ### Run TensorFlow CI Scripts Natively on your Machine
 
-1.  Follow the instructions at https://www.tensorflow.org/install/install_sources,
+1.  Follow the instructions at https://www.tensorflow.org/install/source,
     but stop when you get to the section "Configure the installation". You do not
     need to configure the installation to run the CI scripts.
 
diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md
index 228d5ee35d..f8ed74aaf7 100644
--- a/tensorflow/tools/dist_test/README.md
+++ b/tensorflow/tools/dist_test/README.md
@@ -23,7 +23,7 @@ You can test specify version of TensorFlow:
 ./local_test.sh ${whl_file_url}
 ```
 
-For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu.
+For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/pip) for Ubuntu.
 
 **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the
 test suite on it**
-- 
GitLab


From a3321411e4f7b5ba3f1773de8a557947a4851671 Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Fri, 21 Sep 2018 09:24:42 +0800
Subject: [PATCH 0477/1357] More changes.

---
 tensorflow/core/framework/common_shape_fns.cc | 5 ++++-
 tensorflow/core/framework/common_shape_fns.h  | 2 +-
 tensorflow/core/ops/array_ops.cc              | 8 ++------
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 20922d7884..5f34a75121 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -1306,6 +1306,8 @@ Status RandomShape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
+namespace {
+
 // This SliceHelper processes the output shape of the `slice`
 // when the tensor of `sizes` is available.
 template <typename T>
@@ -1333,6 +1335,7 @@ Status SliceHelper(InferenceContext* c, ShapeHandle begin_value,
 
   return Status::OK();
 }
+} // namespace
 
 Status SliceShape(InferenceContext* c) {
   ShapeHandle input = c->input(0);
@@ -1356,7 +1359,7 @@ Status SliceShape(InferenceContext* c) {
 
   // We check the tensor value here and will only use
   // `MakeShapeFromShapeTensor` when `sizes_value` is null.
-  // The reason is that `sizes`might contain -1, which can't
+  // The reason is that `sizes` might contain -1, which can't
   // be represented (-1 in the ShapeHandle would mean "unknown").
   const Tensor* sizes_value = c->input_tensor(2);
 
diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h
index 478f796516..3a496e06ae 100644
--- a/tensorflow/core/framework/common_shape_fns.h
+++ b/tensorflow/core/framework/common_shape_fns.h
@@ -293,7 +293,7 @@ inline Status BroadcastBinaryOpShapeFn(InferenceContext* c) {
 // Shape function for random operations.
 Status RandomShape(shape_inference::InferenceContext* c);
 
-// Shape function for Slice operator.
+// Shape function for Slice opertaions.
 Status SliceShape(shape_inference::InferenceContext* c);
 
 // Validates the 3 component tensors of a sparse tensor have the proper
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 325690eded..15cafaddf4 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1539,9 +1539,7 @@ REGISTER_OP("Slice")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Index: {int32,int64}")
-    .SetShapeFn([](InferenceContext* c) {
-      return shape_inference::SliceShape(c);
-    });
+    .SetShapeFn(shape_inference::SliceShape);
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklSlice")
@@ -1555,9 +1553,7 @@ REGISTER_OP("_MklSlice")
     .Output("mkl_output: uint8")
     .Attr("T: type")
     .Attr("Index: {int32,int64}")
-    .SetShapeFn([](InferenceContext* c) {
-      return shape_inference::SliceShape(c);
-    });
+    .SetShapeFn(shape_inference::SliceShape);
 #endif
 
 REGISTER_OP("StridedSlice")
-- 
GitLab


From f283f3ac5d7b6de8cadc9c1cee6886b187319afd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 18:35:09 -0700
Subject: [PATCH 0478/1357] Add an API which gives explicit control over shard
 sizes and introspection into the number of shards used. This is a variant of
 threadpool::parallelFor

PiperOrigin-RevId: 213920649
---
 tensorflow/core/lib/core/threadpool.cc      | 49 +++++++++++++++++
 tensorflow/core/lib/core/threadpool.h       | 14 +++++
 tensorflow/core/lib/core/threadpool_test.cc | 61 +++++++++++++++++++++
 tensorflow/core/util/work_sharder.cc        |  2 +
 tensorflow/core/util/work_sharder.h         |  3 +
 5 files changed, 129 insertions(+)

diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc
index 99684ae47b..9ccd911b0e 100644
--- a/tensorflow/core/lib/core/threadpool.cc
+++ b/tensorflow/core/lib/core/threadpool.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/platform/context.h"
 #include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/logging.h"
@@ -120,6 +121,54 @@ void ThreadPool::Schedule(std::function<void()> fn) {
   impl_->Schedule(std::move(fn));
 }
 
+int ThreadPool::NumShardsUsedByTransformRangeConcurrently(
+    const int64 block_size, const int64 total) {
+  if (block_size <= 0 || total <= 1 || total <= block_size ||
+      NumThreads() == 1) {
+    return 1;
+  }
+  return (total + block_size - 1) / block_size;
+}
+
+// This functionality is similar to parallelFor, except that reasoning about
+// the number of shards used is significantly easier.
+void ThreadPool::TransformRangeConcurrently(
+    const int64 block_size, const int64 total,
+    const std::function<void(int64, int64)>& fn) {
+  const int num_shards_used =
+      NumShardsUsedByTransformRangeConcurrently(block_size, total);
+  if (num_shards_used == 1) {
+    fn(0, total);
+    return;
+  }
+
+  // Adapted from Eigen's parallelFor implementation.
+  BlockingCounter counter(num_shards_used);
+  std::function<void(int64, int64)> handle_range =
+      [=, &handle_range, &counter, &fn](int64 first, int64 last) {
+        while (last - first > block_size) {
+          // Find something near the midpoint which is a multiple of block size.
+          const int64 mid = first + ((last - first) / 2 + block_size - 1) /
+                                        block_size * block_size;
+          Schedule([=, &handle_range]() { handle_range(mid, last); });
+          last = mid;
+        }
+        // Single block or less, execute directly.
+        fn(first, last);
+        counter.DecrementCount();  // The shard is done.
+      };
+  if (num_shards_used <= NumThreads()) {
+    // Avoid a thread hop by running the root of the tree and one block on the
+    // main thread.
+    handle_range(0, total);
+  } else {
+    // Execute the root in the thread pool to avoid running work on more than
+    // numThreads() threads.
+    Schedule([=, &handle_range]() { handle_range(0, total); });
+  }
+  counter.Wait();
+}
+
 void ThreadPool::ParallelFor(int64 total, int64 cost_per_unit,
                              std::function<void(int64, int64)> fn) {
   impl_->ParallelFor(total, cost_per_unit, std::move(fn));
diff --git a/tensorflow/core/lib/core/threadpool.h b/tensorflow/core/lib/core/threadpool.h
index 74df7c84a4..e14ad7ac64 100644
--- a/tensorflow/core/lib/core/threadpool.h
+++ b/tensorflow/core/lib/core/threadpool.h
@@ -59,6 +59,20 @@ class ThreadPool {
   // Schedules fn() for execution in the pool of threads.
   void Schedule(std::function<void()> fn);
 
+  // Requires 0 < block_size <= total.
+  // Spawns k threads and calls fn(i*block_size, (i+1)*block_size) from the
+  // ith thread (i>=0). When (i+1)*block_size > total, fn(i*block_size, total)
+  // is called instead. k = NumShardsUsedByTransformRangeConcurrently(...).
+  // Note that when there aren't enough threads in the pool to achieve full
+  // parallelism, function calls will be automatically queued.
+  void TransformRangeConcurrently(const int64 block_size, const int64 total,
+                                  const std::function<void(int64, int64)>& fn);
+
+  // Returns the number of threads spawned by calling TransformRangeConcurrently
+  // with these parameters.
+  int NumShardsUsedByTransformRangeConcurrently(const int64 block_size,
+                                                const int64 total);
+
   // ParallelFor shards the "total" units of work assuming each unit of work
   // having roughly "cost_per_unit" cost, in cycles. Each unit of work is
   // indexed 0, 1, ..., total - 1. Each shard contains 1 or more units of work
diff --git a/tensorflow/core/lib/core/threadpool_test.cc b/tensorflow/core/lib/core/threadpool_test.cc
index 320f3ebb83..db996b783f 100644
--- a/tensorflow/core/lib/core/threadpool_test.cc
+++ b/tensorflow/core/lib/core/threadpool_test.cc
@@ -61,6 +61,67 @@ TEST(ThreadPool, DoWork) {
   }
 }
 
+void RunSharding(int64 block_size, int64 total, ThreadPool* threads) {
+  mutex mu;
+  int64 num_shards = 0;
+  int64 num_done_work = 0;
+  std::vector<bool> work(total, false);
+  threads->TransformRangeConcurrently(
+      block_size, total,
+      [=, &mu, &num_shards, &num_done_work, &work](int64 start, int64 end) {
+        VLOG(1) << "Shard [" << start << "," << end << ")";
+        EXPECT_GE(start, 0);
+        EXPECT_LE(end, total);
+        mutex_lock l(mu);
+        ++num_shards;
+        for (; start < end; ++start) {
+          EXPECT_FALSE(work[start]);  // No duplicate
+          ++num_done_work;
+          work[start] = true;
+        }
+      });
+  LOG(INFO) << block_size << " " << total;
+  const int64 num_workers = (total + block_size - 1) / block_size;
+  EXPECT_EQ(num_done_work, total);
+  if (num_workers < threads->NumThreads()) {
+    // If the intention is to limit the parallelism explicitly, we'd
+    // better honor it. Ideally, even if per_thread_max_parallelism >
+    // num_workers, we should expect that Shard() implementation do
+    // not over-shard. Unfortunately, ThreadPoolDevice::parallelFor
+    // tends to over-shard.
+    EXPECT_LE(num_shards, 1 + num_workers);
+  }
+}
+
+// Adapted from work_sharder_test.cc
+TEST(SparseUtilsTest, TransformRangeConcurrently) {
+  ThreadPool threads(Env::Default(), "test", 16);
+  for (auto block_size : {1, 7, 10, 64, 100, 256, 1000, 9999}) {
+    for (auto diff : {0, 1, 11, 102, 1003, 10005, 1000007}) {
+      const int64 total = block_size + diff;
+      RunSharding(block_size, total, &threads);
+    }
+  }
+}
+
+TEST(SparseUtilsTest, NumShardsUsedByTransformRangeConcurrently) {
+  ThreadPool threads(Env::Default(), "test", 16);
+  EXPECT_EQ(1, threads.NumShardsUsedByTransformRangeConcurrently(
+                   3 /* block_size */, 3 /* total */));
+  EXPECT_EQ(2, threads.NumShardsUsedByTransformRangeConcurrently(
+                   3 /* block_size */, 4 /* total */));
+  EXPECT_EQ(2, threads.NumShardsUsedByTransformRangeConcurrently(
+                   3 /* block_size */, 5 /* total */));
+  EXPECT_EQ(2, threads.NumShardsUsedByTransformRangeConcurrently(
+                   3 /* block_size */, 6 /* total */));
+  EXPECT_EQ(3, threads.NumShardsUsedByTransformRangeConcurrently(
+                   3 /* block_size */, 7 /* total */));
+  EXPECT_EQ(7, threads.NumShardsUsedByTransformRangeConcurrently(
+                   1 /* block_size */, 7 /* total */));
+  EXPECT_EQ(1, threads.NumShardsUsedByTransformRangeConcurrently(
+                   0 /* block_size */, 7 /* total */));
+}
+
 TEST(ThreadPool, ParallelFor) {
   Context outer_context(ContextKind::kThread);
   // Make ParallelFor use as many threads as possible.
diff --git a/tensorflow/core/util/work_sharder.cc b/tensorflow/core/util/work_sharder.cc
index f4bd2950e9..74f0713a61 100644
--- a/tensorflow/core/util/work_sharder.cc
+++ b/tensorflow/core/util/work_sharder.cc
@@ -50,6 +50,8 @@ void Shard(int max_parallelism, thread::ThreadPool* workers, int64 total,
               max_parallelism);
 }
 
+// DEPRECATED: Prefer threadpool->TransformRangeConcurrently, which allows you
+// to directly specify the shard size.
 void Sharder::Do(int64 total, int64 cost_per_unit, const Work& work,
                  const Runner& runner, int max_parallelism) {
   cost_per_unit = std::max(int64{1}, cost_per_unit);
diff --git a/tensorflow/core/util/work_sharder.h b/tensorflow/core/util/work_sharder.h
index b12c31c1ae..9db85a54c6 100644
--- a/tensorflow/core/util/work_sharder.h
+++ b/tensorflow/core/util/work_sharder.h
@@ -23,6 +23,9 @@ limitations under the License.
 
 namespace tensorflow {
 
+// DEPRECATED: Prefer threadpool->TransformRangeConcurrently, which allows you
+// to directly specify the shard size. Use this function only if you want to
+// manually cap parallelism.
 // Shards the "total" unit of work assuming each unit of work having
 // roughly "cost_per_unit". Each unit of work is indexed 0, 1, ...,
 // total - 1. Each shard contains 1 or more units of work and the
-- 
GitLab


From a1fd584736c7dea06d411f79d43d6f32412b96ef Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Fri, 21 Sep 2018 09:48:18 +0800
Subject: [PATCH 0479/1357] Fix clang-format styles.

---
 tensorflow/core/framework/common_shape_fns.cc | 2 +-
 tensorflow/core/graph/mkl_layout_pass_test.cc | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 5f34a75121..50403b4004 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -1335,7 +1335,7 @@ Status SliceHelper(InferenceContext* c, ShapeHandle begin_value,
 
   return Status::OK();
 }
-} // namespace
+}  // namespace
 
 Status SliceShape(InferenceContext* c) {
   ShapeHandle input = c->input(0);
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index cccef5a03a..7f96a18023 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -3523,8 +3523,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Slice_DeviceTest) {
       " input: ['A', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Int32Input);C(Int32Input);"
-            "D(_MklSlice);DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A->E;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;A:control->DMT/_2:control;"
+            "D(_MklSlice);DMT/_0(Const);DMT/_1(Const);DMT/"
+            "_2(Const);E(Zeta)|A->D;A->E;"
+            "A:control->DMT/_0:control;A:control->DMT/"
+            "_1:control;A:control->DMT/_2:control;"
             "B->D:1;C->D:2;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
 }
 
-- 
GitLab


From f10b00558de87020554c9c0512537dab96dba918 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Thu, 20 Sep 2018 20:25:19 -0700
Subject: [PATCH 0480/1357] Make threading.local not an instance member of
 collective ops because in python3 threading.local cannot be pickled.

PiperOrigin-RevId: 213928766
---
 .../distribute/python/cross_tower_utils.py        | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/cross_tower_utils.py b/tensorflow/contrib/distribute/python/cross_tower_utils.py
index 24cb08fb48..9fc1b88955 100644
--- a/tensorflow/contrib/distribute/python/cross_tower_utils.py
+++ b/tensorflow/contrib/distribute/python/cross_tower_utils.py
@@ -221,9 +221,12 @@ def split_grads_by_size(threshold_size, device_grads):
   return small_grads, large_grads
 
 
-# threading.Lock() cannot be pickled and therefore cannot be a field of
-# CollectiveKeys.
+# threading.Lock() and threading.local() cannot be pickled and therefore cannot
+# be a field of CollectiveKeys. Right now _thread_local is not necessary to be
+# an instance member of CollectiveKeys since we always create a new thread for
+# each tower.
 _lock = threading.Lock()
+_thread_local = threading.local()
 
 
 # TODO(yuefengz): use random key starts to avoid reusing keys?
@@ -266,14 +269,12 @@ class CollectiveKeys(object):
     # For instance keys without ids
     self._instance_key_start = instance_key_start
 
-    self._thread_local = threading.local()
-
   def _get_thread_local_object(self):
     # We make instance key without key ids thread local so that it will work
     # with MirroredStrategy and distribute coordinator.
-    if not hasattr(self._thread_local, 'instance_key'):
-      self._thread_local.instance_key = self._instance_key_start
-    return self._thread_local
+    if not hasattr(_thread_local, 'instance_key'):
+      _thread_local.instance_key = self._instance_key_start
+    return _thread_local
 
   def get_group_key(self, devices):
     """Returns a group key for the set of devices.
-- 
GitLab


From 23552a8b2f2a92a31710b9339e6ade514ac25996 Mon Sep 17 00:00:00 2001
From: Abhijit Karmarkar <awk@google.com>
Date: Thu, 20 Sep 2018 22:18:35 -0700
Subject: [PATCH 0481/1357] Return model format from
 LoadSessionBundleOrSavedModelBundle(), allowing callers to know if we
 up-converted a SessionBundle to SavedModel format.

PiperOrigin-RevId: 213937542
---
 tensorflow/contrib/session_bundle/bundle_shim.cc   |  9 ++++++++-
 tensorflow/contrib/session_bundle/bundle_shim.h    |  6 ++++--
 .../contrib/session_bundle/bundle_shim_test.cc     | 14 ++++++++++----
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/session_bundle/bundle_shim.cc b/tensorflow/contrib/session_bundle/bundle_shim.cc
index 4fc36d85ed..c669ced997 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim.cc
+++ b/tensorflow/contrib/session_bundle/bundle_shim.cc
@@ -355,11 +355,15 @@ Status LoadSessionBundleOrSavedModelBundle(
     const SessionOptions& session_options, const RunOptions& run_options,
     const string& export_dir,
     const std::unordered_set<string>& saved_model_tags,
-    SavedModelBundle* saved_model_bundle) {
+    SavedModelBundle* saved_model_bundle, bool* is_session_bundle) {
+  if (is_session_bundle != nullptr) {
+    *is_session_bundle = false;
+  }
   if (MaybeSavedModelDirectory(export_dir)) {
     LOG(INFO)
         << "Attempting to load native SavedModelBundle in bundle-shim from: "
         << export_dir;
+
     return LoadSavedModel(session_options, run_options, export_dir,
                           saved_model_tags, saved_model_bundle);
   } else if (IsPossibleExportDirectory(export_dir)) {
@@ -368,6 +372,9 @@ Status LoadSessionBundleOrSavedModelBundle(
     LOG(INFO) << "Attempting to up-convert SessionBundle to SavedModelBundle "
                  "in bundle-shim from: "
               << export_dir;
+    if (is_session_bundle != nullptr) {
+      *is_session_bundle = true;
+    }
     return LoadSavedModelFromLegacySessionBundlePath(
         session_options, run_options, export_dir, saved_model_bundle);
   }
diff --git a/tensorflow/contrib/session_bundle/bundle_shim.h b/tensorflow/contrib/session_bundle/bundle_shim.h
index 4628b6ab1b..7f0f9958d7 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim.h
+++ b/tensorflow/contrib/session_bundle/bundle_shim.h
@@ -59,11 +59,13 @@ Status ConvertSessionBundleToSavedModelBundle(
 }  // namespace internal
 
 // Loads a SavedModel from either a session-bundle path or a SavedModel bundle
-// path.
+// path. If `is_session_bundle` is not a nullptr, sets it to `true` iff
+// SavedModel was up-converted and loaded from a SessionBundle.
+// `is_session_bundle` value should not be used if error is returned.
 Status LoadSessionBundleOrSavedModelBundle(
     const SessionOptions& session_options, const RunOptions& run_options,
     const string& export_dir, const std::unordered_set<string>& tags,
-    SavedModelBundle* bundle);
+    SavedModelBundle* bundle, bool* is_session_bundle = nullptr);
 
 }  // namespace serving
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/session_bundle/bundle_shim_test.cc b/tensorflow/contrib/session_bundle/bundle_shim_test.cc
index 9a1dd9303f..815beb73a0 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim_test.cc
+++ b/tensorflow/contrib/session_bundle/bundle_shim_test.cc
@@ -63,12 +63,16 @@ void ValidateHalfPlusTwo(const SavedModelBundle& saved_model_bundle,
 
 void LoadAndValidateSavedModelBundle(const string& export_dir,
                                      const std::unordered_set<string>& tags,
-                                     const string& signature_def_key) {
+                                     const string& signature_def_key,
+                                     bool expect_session_bundle) {
   SessionOptions session_options;
   RunOptions run_options;
   SavedModelBundle saved_model_bundle;
+  bool is_session_bundle = false;
   TF_ASSERT_OK(LoadSessionBundleOrSavedModelBundle(
-      session_options, run_options, export_dir, tags, &saved_model_bundle));
+      session_options, run_options, export_dir, tags, &saved_model_bundle,
+      &is_session_bundle));
+  EXPECT_EQ(expect_session_bundle, is_session_bundle);
   const MetaGraphDef meta_graph_def = saved_model_bundle.meta_graph_def;
   const auto& signature_def_map = meta_graph_def.signature_def();
 
@@ -512,7 +516,8 @@ TEST(BundleShimTest, BasicExportSessionBundle) {
   const string session_bundle_export_dir =
       test_util::TestSrcDirPath(kSessionBundlePath);
   LoadAndValidateSavedModelBundle(session_bundle_export_dir, tags,
-                                  kDefaultServingSignatureDefKey);
+                                  kDefaultServingSignatureDefKey,
+                                  /*expect_session_bundle=*/true);
 
   // Verify that the named signature is also present.
   SessionOptions session_options;
@@ -558,7 +563,8 @@ TEST(BundleShimTest, BasicExportSavedModel) {
   const string saved_model_bundle_export_dir =
       io::JoinPath(testing::TensorFlowSrcRoot(), kSavedModelBundlePath);
   LoadAndValidateSavedModelBundle(saved_model_bundle_export_dir,
-                                  {kSavedModelTagServe}, "regress_x_to_y");
+                                  {kSavedModelTagServe}, "regress_x_to_y",
+                                  /*expect_session_bundle=*/false);
 }
 
 // Checks a basic load fails with an invalid export path.
-- 
GitLab


From cf047f7755f3400ee128db2571042091fe9f8314 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 20 Sep 2018 23:31:26 -0700
Subject: [PATCH 0482/1357] Fix cub include path so that TensorFlow compiles
 when used as a bazel dependency.

PiperOrigin-RevId: 213942340
---
 tensorflow/core/kernels/bincount_op_gpu.cu.cc          |  2 +-
 tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc    |  2 +-
 tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc |  8 ++++----
 tensorflow/core/kernels/histogram_op_gpu.cu.cc         |  2 +-
 tensorflow/core/kernels/reduction_gpu_kernels.cu.h     | 10 +++++-----
 tensorflow/core/kernels/topk_op_gpu.cu.cc              |  6 +++---
 tensorflow/core/kernels/where_op_gpu.cu.h              |  8 ++++----
 third_party/cub.BUILD                                  |  1 +
 8 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
index 6074b3e1f6..7d09e9b820 100644
--- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
-#include "external/cub_archive/cub/device/device_histogram.cuh"
+#include "third_party/cub/device/device_histogram.cuh"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index 2a25459194..76afd6f18c 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "external/cub_archive/cub/util_ptx.cuh"
+#include "third_party/cub/util_ptx.cuh"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/depthwise_conv_op.h"
 #include "tensorflow/core/platform/types.h"
diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
index 862a97723f..e7882acc80 100644
--- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
@@ -35,10 +35,10 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
-#include "external/cub_archive/cub/device/device_radix_sort.cuh"
-#include "external/cub_archive/cub/device/device_reduce.cuh"
-#include "external/cub_archive/cub/iterator/constant_input_iterator.cuh"
-#include "external/cub_archive/cub/thread/thread_operators.cuh"
+#include "third_party/cub/device/device_radix_sort.cuh"
+#include "third_party/cub/device/device_reduce.cuh"
+#include "third_party/cub/iterator/constant_input_iterator.cuh"
+#include "third_party/cub/thread/thread_operators.cuh"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/histogram_op_gpu.cu.cc b/tensorflow/core/kernels/histogram_op_gpu.cu.cc
index a88e9b0ddc..374a05850e 100644
--- a/tensorflow/core/kernels/histogram_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/histogram_op_gpu.cu.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "external/cub_archive/cub/device/device_histogram.cuh"
+#include "third_party/cub/device/device_histogram.cuh"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
index 88b3c2ac76..bb8254eaac 100644
--- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
+++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
@@ -21,11 +21,11 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "external/cub_archive/cub/device/device_reduce.cuh"
-#include "external/cub_archive/cub/device/device_segmented_reduce.cuh"
-#include "external/cub_archive/cub/iterator/counting_input_iterator.cuh"
-#include "external/cub_archive/cub/iterator/transform_input_iterator.cuh"
-#include "external/cub_archive/cub/warp/warp_reduce.cuh"
+#include "third_party/cub/device/device_reduce.cuh"
+#include "third_party/cub/device/device_segmented_reduce.cuh"
+#include "third_party/cub/iterator/counting_input_iterator.cuh"
+#include "third_party/cub/iterator/transform_input_iterator.cuh"
+#include "third_party/cub/warp/warp_reduce.cuh"
 #include "cuda/include/cuComplex.h"
 #include "tensorflow/core/kernels/reduction_ops.h"
 #include "tensorflow/core/lib/core/bits.h"
diff --git a/tensorflow/core/kernels/topk_op_gpu.cu.cc b/tensorflow/core/kernels/topk_op_gpu.cu.cc
index ca296d5aa0..2fbe1fe7cb 100644
--- a/tensorflow/core/kernels/topk_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/topk_op_gpu.cu.cc
@@ -20,9 +20,9 @@ limitations under the License.
 #include <cmath>
 #include <vector>
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "external/cub_archive/cub/device/device_segmented_radix_sort.cuh"
-#include "external/cub_archive/cub/iterator/counting_input_iterator.cuh"
-#include "external/cub_archive/cub/iterator/transform_input_iterator.cuh"
+#include "third_party/cub/device/device_segmented_radix_sort.cuh"
+#include "third_party/cub/iterator/counting_input_iterator.cuh"
+#include "third_party/cub/iterator/transform_input_iterator.cuh"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
diff --git a/tensorflow/core/kernels/where_op_gpu.cu.h b/tensorflow/core/kernels/where_op_gpu.cu.h
index 8879d9dd4c..2255597651 100644
--- a/tensorflow/core/kernels/where_op_gpu.cu.h
+++ b/tensorflow/core/kernels/where_op_gpu.cu.h
@@ -21,10 +21,10 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "external/cub_archive/cub/device/device_reduce.cuh"
-#include "external/cub_archive/cub/device/device_select.cuh"
-#include "external/cub_archive/cub/iterator/counting_input_iterator.cuh"
-#include "external/cub_archive/cub/iterator/transform_input_iterator.cuh"
+#include "third_party/cub/device/device_reduce.cuh"
+#include "third_party/cub/device/device_select.cuh"
+#include "third_party/cub/iterator/counting_input_iterator.cuh"
+#include "third_party/cub/iterator/transform_input_iterator.cuh"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/bounds_check.h"
diff --git a/third_party/cub.BUILD b/third_party/cub.BUILD
index 29159c9dad..a04347b21e 100644
--- a/third_party/cub.BUILD
+++ b/third_party/cub.BUILD
@@ -20,6 +20,7 @@ filegroup(
 cc_library(
     name = "cub",
     hdrs = if_cuda([":cub_header_files"]),
+    include_prefix = "third_party",
     deps = [
         "@local_config_cuda//cuda:cuda_headers",
     ],
-- 
GitLab


From 2952f5134905af795ba90ae1eb97e39091ba9843 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 00:02:49 -0700
Subject: [PATCH 0483/1357] Move from deprecated self.test_session() to
 self.cached_session().

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 213944355
---
 .../batching/python/ops/batch_ops_test.py     | 29 +++++++-------
 tensorflow/contrib/compiler/jit_test.py       |  2 +-
 .../kernel_tests/bijectors/softsign_test.py   |  4 +-
 tensorflow/contrib/gan/python/train_test.py   |  4 +-
 .../python/kernel_tests/grid_rnn_test.py      | 28 +++++++-------
 .../python/ops/input_pipeline_ops_test.py     |  8 ++--
 .../kernel_methods/python/losses_test.py      | 38 +++++++++----------
 .../mappers/random_fourier_features_test.py   | 12 +++---
 .../python/stat_summarizer_test.py            |  2 +-
 .../kernel_tests/scatter_add_ndim_op_test.py  | 10 ++---
 .../python/tensor_forest_test.py              |  2 +-
 .../text/python/ops/skip_gram_ops_test.py     | 32 ++++++++--------
 .../python/estimator/export/export_test.py    |  2 +-
 .../keras/utils/multi_gpu_utils_test.py       |  8 ++--
 .../python/profiler/pprof_profiler_test.py    |  2 +-
 .../tools/optimize_for_inference_test.py      | 16 ++++----
 16 files changed, 100 insertions(+), 99 deletions(-)

diff --git a/tensorflow/contrib/batching/python/ops/batch_ops_test.py b/tensorflow/contrib/batching/python/ops/batch_ops_test.py
index 7846814546..01ee8703a9 100644
--- a/tensorflow/contrib/batching/python/ops/batch_ops_test.py
+++ b/tensorflow/contrib/batching/python/ops/batch_ops_test.py
@@ -43,7 +43,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBasicBatch(self):
     """Tests that a single batched tensor executes together and only once."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
       batched, index, _ = batch_ops.batch(
           [inp], num_batch_threads=1, max_batch_size=2,
@@ -83,7 +83,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBatchWithPadding(self):
     """Test that batching with padding up to an allowed batch size works."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp = array_ops.placeholder(dtype=dtypes.int32, shape=[2])
       batched, index, _ = batch_ops.batch(
           [inp], num_batch_threads=1, max_batch_size=10,
@@ -113,7 +113,7 @@ class BatchOpsTest(test.TestCase):
 
   def testMultipleBatch(self):
     """Tests that multiple batched tensors execute together."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp0 = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
       inp1 = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
       batched, _, _ = batch_ops.batch(
@@ -152,7 +152,7 @@ class BatchOpsTest(test.TestCase):
 
   def testIllegalBatchDifferentDim0Sizes(self):
     """Tests illegally feeding tensors with different dim0 sizes."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp0 = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
       inp1 = array_ops.placeholder(dtype=dtypes.int32, shape=[2])
       batched, index, _ = batch_ops.batch(
@@ -166,7 +166,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBasicUnbatch(self):
     """Tests that batch and unbatch work together."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
       batched, index, id_t = batch_ops.batch(
           [inp], num_batch_threads=1, max_batch_size=10,
@@ -190,7 +190,8 @@ class BatchOpsTest(test.TestCase):
 
   def testBasicUnbatchV1Decorated(self):
     """Tests that the batch_function_v1 decorator works."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
+
       @batch_ops.batch_function_v1(1, 10, 100000)
       def computation(in_t):
         return in_t + 1
@@ -211,7 +212,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBasicUnbatchDecorated(self):
     """Tests that the batch_function decorator works."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # TODO(apassos): Removing this line causes test flakiness! Ideally should
       # be investigated.
       default_inp = array_ops.placeholder_with_default(2, shape=[])  # pylint: disable=unused-variable
@@ -236,7 +237,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBatchDecoratedWithCapturedInput(self):
     """Tests that the batch_function decorator works."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       captured_inp0 = array_ops.placeholder_with_default(2, shape=[])
       captured_inp1 = array_ops.placeholder_with_default(1, shape=[])
 
@@ -260,7 +261,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBatchFunctionOp(self):
     """Tests that the batch_function op works."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
 
       @function.Defun(dtypes.int32)
       def computation(in_t):
@@ -289,7 +290,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBatchFunctionOpWithCapturedInput(self):
     """Tests that batch_function op works with captured input."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       captured_inp0 = array_ops.placeholder_with_default(2, shape=[])
       captured_inp1 = array_ops.placeholder_with_default(1, shape=[])
       inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
@@ -323,7 +324,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBatchFunctionOpWithInputError(self):
     """Tests that batch_function op works with error in the inputs."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
 
       @function.Defun(dtypes.int32, dtypes.int32)
@@ -346,7 +347,7 @@ class BatchOpsTest(test.TestCase):
 
   def testBasicUnbatchDecoratedWithReshape(self):
     """Tests that the batch_function decorator works."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
 
       @batch_ops.batch_function(1, 10, 100000)
       def computation(in_t):
@@ -368,7 +369,7 @@ class BatchOpsTest(test.TestCase):
 
   def testUnbatchTimeout(self):
     """Tests that the unbatch timeout works."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
       batched, index, id_t = batch_ops.batch(
           [inp], num_batch_threads=1, max_batch_size=2,
@@ -410,7 +411,7 @@ class BatchOpsTest(test.TestCase):
 
   def testUnbatchGrad(self):
     """Tests that batch and unbatch are differentiable."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inp = array_ops.placeholder(dtype=dtypes.float32, shape=[1])
       batched, index, id_t = batch_ops.batch(
           [inp], num_batch_threads=1, max_batch_size=2,
diff --git a/tensorflow/contrib/compiler/jit_test.py b/tensorflow/contrib/compiler/jit_test.py
index 42b3b9f026..3e631b5909 100644
--- a/tensorflow/contrib/compiler/jit_test.py
+++ b/tensorflow/contrib/compiler/jit_test.py
@@ -173,7 +173,7 @@ class JITTest(test.TestCase):
 class CompilationEnabledInGradientTest(test.TestCase):
 
   def testCompilationInGradient(self):
-    with self.test_session():
+    with self.cached_session():
       x = constant_op.constant([[3.]])
       y_nc = math_ops.matmul(x, x, name="not_compiled")
       with jit.experimental_jit_scope():
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softsign_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softsign_test.py
index 8dad80aa64..c32ea9ade7 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softsign_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softsign_test.py
@@ -93,12 +93,12 @@ class SoftsignBijectorTest(test.TestCase):
             bijector.inverse_log_det_jacobian(y, event_ndims=1)))
 
   def testScalarCongruency(self):
-    with self.test_session():
+    with self.cached_session():
       bijector = Softsign(validate_args=True)
       assert_scalar_congruency(bijector, lower_x=-20., upper_x=20.)
 
   def testBijectiveAndFinite(self):
-    with self.test_session():
+    with self.cached_session():
       bijector = Softsign(validate_args=True)
       x = np.linspace(-20., 20., 100).astype(np.float32)
       y = np.linspace(-0.99, 0.99, 100).astype(np.float32)
diff --git a/tensorflow/contrib/gan/python/train_test.py b/tensorflow/contrib/gan/python/train_test.py
index 58f348034f..64d6706199 100644
--- a/tensorflow/contrib/gan/python/train_test.py
+++ b/tensorflow/contrib/gan/python/train_test.py
@@ -399,7 +399,7 @@ class StarGANModelTest(test.TestCase):
     target_tensor = train._generate_stargan_random_domain_target(
         batch_size, domain_numbers)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       targets = sess.run(target_tensor)
       self.assertTupleEqual((batch_size, domain_numbers), targets.shape)
       for target in targets:
@@ -676,7 +676,7 @@ class GANLossTest(test.TestCase, parameterized.TestCase):
 
     self.assertIsInstance(model_loss, namedtuples.GANLoss)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
 
       sess.run(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/grid_rnn/python/kernel_tests/grid_rnn_test.py b/tensorflow/contrib/grid_rnn/python/kernel_tests/grid_rnn_test.py
index fed8a771cc..27aed091c2 100644
--- a/tensorflow/contrib/grid_rnn/python/kernel_tests/grid_rnn_test.py
+++ b/tensorflow/contrib/grid_rnn/python/kernel_tests/grid_rnn_test.py
@@ -233,7 +233,7 @@ class GridRNNCellTest(test.TestCase):
                     ([[1.38917875, 1.49043763]], [[0.83884692, 0.86036491]])))
 
   def testGrid2LSTMCellWithRelu(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 3])
@@ -261,7 +261,7 @@ class GridRNNCellTest(test.TestCase):
   """
 
   def testGrid2BasicRNNCell(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([2, 2])
@@ -292,7 +292,7 @@ class GridRNNCellTest(test.TestCase):
                     [[0.80049908, 0.80049908], [0.97574311, 0.97574311]]))
 
   def testGrid2BasicRNNCellTied(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([2, 2])
@@ -323,7 +323,7 @@ class GridRNNCellTest(test.TestCase):
                     [[0.80049908, 0.80049908], [0.97574311, 0.97574311]]))
 
   def testGrid2BasicRNNCellWithRelu(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2])
@@ -348,7 +348,7 @@ class GridRNNCellTest(test.TestCase):
   """
 
   def testGrid1LSTMCell(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)) as root_scope:
         x = array_ops.zeros([1, 3])
@@ -410,7 +410,7 @@ class GridRNNCellTest(test.TestCase):
   """
 
   def testGrid3LSTMCell(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 3])
@@ -455,7 +455,7 @@ class GridRNNCellTest(test.TestCase):
   """
 
   def testGridRNNEdgeCasesLikeRelu(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([3, 2])
@@ -481,7 +481,7 @@ class GridRNNCellTest(test.TestCase):
         self.assertAllClose(res_g, ([[0, 0], [0, 0], [0.5, 0.5]],))
 
   def testGridRNNEdgeCasesNoOutput(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2])
@@ -541,7 +541,7 @@ class GridRNNCellTest(test.TestCase):
       self.assertEqual(out[0].get_shape()[1], num_units)
       self.assertEqual(out[0].dtype, inp.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
 
       input_value = np.ones((batch_size, input_size))
@@ -581,7 +581,7 @@ class GridRNNCellTest(test.TestCase):
       self.assertEqual(out[0].get_shape()[1], num_units)
       self.assertEqual(out[0].dtype, inp.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
 
       input_value = np.ones((batch_size, input_size))
@@ -623,7 +623,7 @@ class GridRNNCellTest(test.TestCase):
       self.assertEqual(out[0].get_shape()[1], num_units)
       self.assertEqual(out[0].dtype, inp.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
 
       input_value = np.ones((batch_size, input_size))
@@ -663,7 +663,7 @@ class GridRNNCellTest(test.TestCase):
       self.assertEqual(out[0].get_shape(), (3, num_units))
       self.assertEqual(out[0].dtype, inp.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
 
       input_value = np.ones((batch_size, input_size))
@@ -700,7 +700,7 @@ class GridRNNCellTest(test.TestCase):
       self.assertEqual(out[0].get_shape()[1], num_units)
       self.assertEqual(out[0].dtype, inp.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
 
       input_value = np.ones((3, input_size))
@@ -715,7 +715,7 @@ class GridRNNCellTest(test.TestCase):
 
   def testGrid2LSTMCellLegacy(self):
     """Test for legacy case (when state_is_tuple=False)."""
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 3])
diff --git a/tensorflow/contrib/input_pipeline/python/ops/input_pipeline_ops_test.py b/tensorflow/contrib/input_pipeline/python/ops/input_pipeline_ops_test.py
index 9ed017592a..f44edaa14c 100644
--- a/tensorflow/contrib/input_pipeline/python/ops/input_pipeline_ops_test.py
+++ b/tensorflow/contrib/input_pipeline/python/ops/input_pipeline_ops_test.py
@@ -29,7 +29,7 @@ from tensorflow.python.platform import test
 class InputPipelineOpsTest(test.TestCase):
 
   def testObtainNext(self):
-    with self.test_session():
+    with self.cached_session():
       var = state_ops.variable_op([], dtypes.int64)
       state_ops.assign(var, -1).op.run()
       c = constant_op.constant(["a", "b"])
@@ -45,7 +45,7 @@ class InputPipelineOpsTest(test.TestCase):
 
   def testSeekNext(self):
     string_list = ["a", "b", "c"]
-    with self.test_session() as session:
+    with self.cached_session() as session:
       elem = input_pipeline_ops.seek_next(string_list)
       session.run([variables.global_variables_initializer()])
       self.assertEqual(b"a", session.run(elem))
@@ -65,7 +65,7 @@ class InputPipelineOpsTest(test.TestCase):
 
   def testSeekNextLimitEpochs(self):
     string_list = ["a", "b", "c"]
-    with self.test_session() as session:
+    with self.cached_session() as session:
       elem = input_pipeline_ops.seek_next(string_list, num_epochs=1)
       session.run([
           variables.local_variables_initializer(),
@@ -75,7 +75,7 @@ class InputPipelineOpsTest(test.TestCase):
 
   def testSeekNextLimitEpochsThree(self):
     string_list = ["a", "b", "c"]
-    with self.test_session() as session:
+    with self.cached_session() as session:
       elem = input_pipeline_ops.seek_next(string_list, num_epochs=3)
       session.run([
           variables.local_variables_initializer(),
diff --git a/tensorflow/contrib/kernel_methods/python/losses_test.py b/tensorflow/contrib/kernel_methods/python/losses_test.py
index 72507539f8..4d5cc24ce0 100644
--- a/tensorflow/contrib/kernel_methods/python/losses_test.py
+++ b/tensorflow/contrib/kernel_methods/python/losses_test.py
@@ -32,7 +32,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testInvalidLogitsShape(self):
     """An error is raised when logits have invalid shape."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([-1.0, 2.1], shape=(2,))
       labels = constant_op.constant([0, 1])
       with self.assertRaises(ValueError):
@@ -40,7 +40,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testInvalidLabelsShape(self):
     """An error is raised when labels have invalid shape."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([-1.0, 2.1], shape=(2, 1))
       labels = constant_op.constant([1, 0], shape=(1, 1, 2))
       with self.assertRaises(ValueError):
@@ -48,7 +48,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testInvalidWeightsShape(self):
     """An error is raised when weights have invalid shape."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([-1.0, 2.1], shape=(2, 1))
       labels = constant_op.constant([1, 0], shape=(2,))
       weights = constant_op.constant([1.5, 0.2], shape=(2, 1, 1))
@@ -57,7 +57,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testInvalidLabelsDtype(self):
     """An error is raised when labels have invalid shape."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([-1.0, 2.1], shape=(2, 1))
       labels = constant_op.constant([1, 0], dtype=dtypes.float32)
       with self.assertRaises(ValueError):
@@ -65,7 +65,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testNoneWeightRaisesValueError(self):
     """An error is raised when weights are None."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([-1.0, 2.1], shape=(2, 1))
       labels = constant_op.constant([1, 0])
       with self.assertRaises(ValueError):
@@ -73,7 +73,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testInconsistentLabelsAndWeightsShapesSameRank(self):
     """Error raised when weights and labels have same ranks, different sizes."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([-1.0, 2.1, 4.1], shape=(3, 1))
       labels = constant_op.constant([1, 0, 2], shape=(3, 1))
       weights = constant_op.constant([1.1, 2.0], shape=(2, 1))
@@ -82,7 +82,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testInconsistentLabelsAndWeightsShapesDifferentRank(self):
     """Error raised when weights and labels have different ranks and sizes."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([-1.0, 2.1], shape=(2, 1))
       labels = constant_op.constant([1, 0], shape=(2, 1))
       weights = constant_op.constant([1.1, 2.0, 2.8], shape=(3,))
@@ -91,7 +91,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testOutOfRangeLabels(self):
     """An error is raised when labels are not in [0, num_classes)."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.2, -1.4, -1.0], [1.4, 1.8, 4.0],
                                      [0.5, 1.8, -1.0]])
       labels = constant_op.constant([1, 0, 4])
@@ -101,7 +101,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testZeroLossInt32Labels(self):
     """Loss is 0 if true class logits sufficiently higher than other classes."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.2, -1.4, -1.0], [1.4, 1.8, 4.0],
                                      [0.5, 1.8, -1.0]])
       labels = constant_op.constant([0, 2, 1], dtype=dtypes.int32)
@@ -110,7 +110,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testZeroLossInt64Labels(self):
     """Loss is 0 if true class logits sufficiently higher than other classes."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[2.1, -0.4, -1.0], [1.4, 2.8, 4.0],
                                      [-0.5, 0.8, -1.0]])
       labels = constant_op.constant([0, 2, 1], dtype=dtypes.int64)
@@ -130,7 +130,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
     ]
 
     for batch_size, num_classes in logits_shapes:
-      with self.test_session():
+      with self.cached_session():
         logits = array_ops.placeholder(
             dtypes.float32, shape=(batch_size, num_classes))
         labels = array_ops.placeholder(dtypes.int32, shape=(batch_size,))
@@ -140,7 +140,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testCorrectPredictionsSomeClassesInsideMargin(self):
     """Loss is > 0 even if true class logits are higher than other classes."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.2, -1.4, 0.8], [1.4, 1.8, 4.0],
                                      [1.5, 1.8, -1.0]])
       labels = constant_op.constant([0, 2, 1])
@@ -150,7 +150,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testIncorrectPredictions(self):
     """Loss is >0 when an incorrect class has higher logits than true class."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[2.6, 0.4, 0.8], [1.4, 0.8, -1.0],
                                      [0.5, -1.8, 2.0]])
       labels = constant_op.constant([1, 0, 2])
@@ -162,7 +162,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testIncorrectPredictionsColumnLabels(self):
     """Same as above but labels is a rank-2 tensor."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.6, -0.4, 0.8], [1.5, 0.8, -1.0],
                                      [0.2, -1.8, 4.0]])
       labels = constant_op.constant([1, 0, 2], shape=(3, 1))
@@ -174,7 +174,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testIncorrectPredictionsZeroWeights(self):
     """Loss is 0 when all weights are missing even if predictions are wrong."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.6, -0.4, 0.8], [1.5, 0.8, -1.0],
                                      [0.2, -1.8, 4.0]])
       labels = constant_op.constant([1, 0, 2], shape=(3, 1))
@@ -185,7 +185,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testNonZeroLossWithPythonScalarWeights(self):
     """Weighted loss is correctly computed when weights is a python scalar."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.6, -0.4, 0.8], [1.5, 0.8, -1.0],
                                      [0.2, -1.8, 4.0]])
       labels = constant_op.constant([1, 0, 2], shape=(3, 1))
@@ -195,7 +195,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testNonZeroLossWithScalarTensorWeights(self):
     """Weighted loss is correctly computed when weights is a rank-0 tensor."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.6, -0.4, 0.8], [1.5, 0.8, -1.0],
                                      [0.2, -1.8, 4.0]])
       labels = constant_op.constant([1, 0, 2], shape=(3, 1))
@@ -205,7 +205,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testNonZeroLossWith1DTensorWeightsColumnLabels(self):
     """Weighted loss is correctly computed when weights is a rank-0 tensor."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.6, -0.4, 0.8], [1.5, 0.8, -1.0],
                                      [0.2, -1.8, 4.0]])
       labels = constant_op.constant([1, 0, 2], shape=(3, 1))
@@ -216,7 +216,7 @@ class SparseMulticlassHingeLossTest(test.TestCase):
 
   def testNonZeroLossWith2DTensorWeights1DLabelsSomeWeightsMissing(self):
     """Weighted loss is correctly computed when weights is a rank-0 tensor."""
-    with self.test_session():
+    with self.cached_session():
       logits = constant_op.constant([[1.6, -0.4, 0.8], [1.5, 0.8, -1.0],
                                      [0.2, -1.8, 4.0], [1.6, 1.8, -4.0]])
       labels = constant_op.constant([1, 0, 2, 1])
diff --git a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py
index 2ff4d41d75..bad0a596a7 100644
--- a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py
+++ b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py
@@ -58,7 +58,7 @@ class RandomFourierFeatureMapperTest(TensorFlowTestCase):
   def testInvalidInputShape(self):
     x = constant_op.constant([[2.0, 1.0]])
 
-    with self.test_session():
+    with self.cached_session():
       rffm = RandomFourierFeatureMapper(3, 10)
       with self.assertRaisesWithPredicateMatch(
           dense_kernel_mapper.InvalidShapeError,
@@ -70,7 +70,7 @@ class RandomFourierFeatureMapperTest(TensorFlowTestCase):
     x2 = constant_op.constant([[1.0, -1.0, 2.0], [-1.0, 10.0, 1.0],
                                [4.0, -2.0, -1.0]])
 
-    with self.test_session():
+    with self.cached_session():
       rffm = RandomFourierFeatureMapper(3, 10, 1.0)
       mapped_x1 = rffm.map(x1)
       mapped_x2 = rffm.map(x2)
@@ -80,7 +80,7 @@ class RandomFourierFeatureMapperTest(TensorFlowTestCase):
   def testSameOmegaReused(self):
     x = constant_op.constant([[2.0, 1.0, 0.0]])
 
-    with self.test_session():
+    with self.cached_session():
       rffm = RandomFourierFeatureMapper(3, 100)
       mapped_x = rffm.map(x)
       mapped_x_copy = rffm.map(x)
@@ -93,7 +93,7 @@ class RandomFourierFeatureMapperTest(TensorFlowTestCase):
     y = constant_op.constant([[1.0, -1.0, 2.0]])
     stddev = 3.0
 
-    with self.test_session():
+    with self.cached_session():
       # The mapped dimension is fairly small, so the kernel approximation is
       # very rough.
       rffm1 = RandomFourierFeatureMapper(3, 100, stddev)
@@ -113,7 +113,7 @@ class RandomFourierFeatureMapperTest(TensorFlowTestCase):
     y = constant_op.constant([[1.0, -1.0, 2.0]])
     stddev = 3.0
 
-    with self.test_session():
+    with self.cached_session():
       # The mapped dimension is fairly small, so the kernel approximation is
       # very rough.
       rffm = RandomFourierFeatureMapper(3, 100, stddev, seed=0)
@@ -139,7 +139,7 @@ class RandomFourierFeatureMapperTest(TensorFlowTestCase):
 
     normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
     total_absolute_error = 0.0
-    with self.test_session():
+    with self.cached_session():
       rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0)
       # Cache mappings so that they are not computed multiple times.
       cached_mappings = dict((point, rffm.map(point))
diff --git a/tensorflow/contrib/stat_summarizer/python/stat_summarizer_test.py b/tensorflow/contrib/stat_summarizer/python/stat_summarizer_test.py
index e4db5f2e3c..e6a0b30567 100644
--- a/tensorflow/contrib/stat_summarizer/python/stat_summarizer_test.py
+++ b/tensorflow/contrib/stat_summarizer/python/stat_summarizer_test.py
@@ -38,7 +38,7 @@ class StatSummarizerTest(test.TestCase):
       graph_def = graph.as_graph_def()
       ss = pywrap_tensorflow.NewStatSummarizer(graph_def.SerializeToString())
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.global_variables_initializer())
 
         for _ in range(20):
diff --git a/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py b/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
index e429d12e96..1c4e18dbda 100644
--- a/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
+++ b/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
@@ -32,7 +32,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
     indices = [[1], [10]]
     updates = [100., 200.]
 
-    with self.test_session():
+    with self.cached_session():
       variables.global_variables_initializer().run()
       tensor_forest_ops.scatter_add_ndim(input_data, indices, updates).run()
       self.assertAllEqual(
@@ -45,7 +45,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
     indices = [[0, 0, 1], [1, 1, 2]]
     updates = [100., 200.]
 
-    with self.test_session():
+    with self.cached_session():
       variables.global_variables_initializer().run()
       tensor_forest_ops.scatter_add_ndim(input_data, indices, updates).run()
       self.assertAllEqual([[[1., 102., 3.], [4., 5., 6.]],
@@ -57,7 +57,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
     indices = []
     updates = []
 
-    with self.test_session():
+    with self.cached_session():
       variables.global_variables_initializer().run()
       tensor_forest_ops.scatter_add_ndim(input_data, indices, updates).run()
       self.assertAllEqual(init_val, input_data.eval())
@@ -67,7 +67,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
     input_data = variables.Variable(init_val)
     indices = [[0, 0, 1], [1, 1, 2]]
     updates = [100.]
-    with self.test_session():
+    with self.cached_session():
       variables.global_variables_initializer().run()
       with self.assertRaisesOpError(
           'Number of updates should be same as number of indices.'):
@@ -80,7 +80,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
     indices = [[0, 0], [1, 1]]
     updates = [[100., 200., 300.], [400., 500., 600.]]
 
-    with self.test_session():
+    with self.cached_session():
       variables.global_variables_initializer().run()
       tensor_forest_ops.scatter_add_ndim(input_data, indices, updates).run()
       self.assertAllEqual([[[101., 202., 303.], [4., 5., 6.]],
diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
index 1c9c81827e..e0f0c0d4ff 100644
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
@@ -149,7 +149,7 @@ class TensorForestTest(test_util.TensorFlowTestCase):
     self.assertTrue(isinstance(probs, ops.Tensor))
     self.assertTrue(isinstance(paths, ops.Tensor))
     self.assertTrue(isinstance(var, ops.Tensor))
-    with self.test_session():
+    with self.cached_session():
       variables.global_variables_initializer().run()
       resources.initialize_resources(resources.shared_resources()).run()
       self.assertEquals(probs.eval().shape, (4, 2))
diff --git a/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py b/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py
index 84e36146d5..832d34d60d 100644
--- a/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py
+++ b/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py
@@ -63,7 +63,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"jumps", b"brown"),
         (b"jumps", b"fox"),
     ])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
 
@@ -94,7 +94,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"jumps", b"fox"),
         (b"jumps", b"jumps"),
     ])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
 
@@ -105,7 +105,7 @@ class SkipGramOpsTest(test.TestCase):
     # If emit_self_as_target is False (default), output will be empty.
     tokens, labels = text.skip_gram_sample(
         input_tensor, min_skips=0, max_skips=0, emit_self_as_target=False)
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(0, tokens.eval().size)
       self.assertEqual(0, labels.eval().size)
 
@@ -117,7 +117,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"quick", b"quick"),
         (b"brown", b"brown"),
     ])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
 
@@ -134,7 +134,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"brown", b"the"),
         (b"brown", b"quick"),
     ])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
 
@@ -150,7 +150,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"quick", b"brown"),
         (b"brown", b"quick"),
     ])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
 
@@ -165,7 +165,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"quick", b"brown"),
         (b"brown", b"quick"),
     ])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
 
@@ -196,7 +196,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"over", b"fox"),
         (b"over", b"jumps"),
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens_eval, labels_eval = sess.run([tokens, labels])
       self.assertAllEqual(expected_tokens, tokens_eval)
       self.assertAllEqual(expected_labels, labels_eval)
@@ -222,7 +222,7 @@ class SkipGramOpsTest(test.TestCase):
     tokens_2, labels_2 = text.skip_gram_sample(
         input_tensor, min_skips=1, max_skips=5)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tokens_1_eval, labels_1_eval, tokens_2_eval, labels_2_eval = sess.run(
           [tokens_1, labels_1, tokens_2, labels_2])
 
@@ -244,7 +244,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"brown", b"fox"),
         (b"fox", b"brown"),
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
 
@@ -269,7 +269,7 @@ class SkipGramOpsTest(test.TestCase):
         (2, 3),
         (3, 2),
     ])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
 
@@ -286,7 +286,7 @@ class SkipGramOpsTest(test.TestCase):
     for min_skips, max_skips in invalid_skips:
       tokens, labels = text.skip_gram_sample(
           input_tensor, min_skips=min_skips, max_skips=max_skips)
-      with self.test_session() as sess, self.assertRaises(
+      with self.cached_session() as sess, self.assertRaises(
           errors.InvalidArgumentError):
         sess.run([tokens, labels])
 
@@ -338,7 +338,7 @@ class SkipGramOpsTest(test.TestCase):
     vocab_freq_table = lookup.HashTable(
         lookup.KeyValueTensorInitializer(keys, values), -1)
 
-    with self.test_session():
+    with self.cached_session():
       vocab_freq_table.init.run()
 
       # No vocab_freq_table specified - output should be the same as input.
@@ -395,7 +395,7 @@ class SkipGramOpsTest(test.TestCase):
     vocab_freq_table = lookup.HashTable(
         lookup.KeyValueTensorInitializer(keys, values), -1)
 
-    with self.test_session():
+    with self.cached_session():
       vocab_freq_table.init.run()
       output = skip_gram_ops._filter_input(
           input_tensor=input_tensor,
@@ -464,7 +464,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"life", b"and"),
         (b"and", b"life"),
     ])
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(expected_tokens, tokens.eval())
       self.assertAllEqual(expected_labels, labels.eval())
@@ -510,7 +510,7 @@ class SkipGramOpsTest(test.TestCase):
         (b"to", b"life"),
         (b"life", b"to"),
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       lookup_ops.tables_initializer().run()
       tokens_eval, labels_eval = sess.run([tokens, labels])
       self.assertAllEqual(expected_tokens, tokens_eval)
diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py
index 3eed1ab163..ed3219c49b 100644
--- a/tensorflow/python/estimator/export/export_test.py
+++ b/tensorflow/python/estimator/export/export_test.py
@@ -376,7 +376,7 @@ class ExportTest(test_util.TensorFlowTestCase):
                         "  } "
                         "} ", example)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sparse_result = sess.run(
             serving_input_receiver.features,
             feed_dict={
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
index c7e94998b4..d6016ed711 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
@@ -48,7 +48,7 @@ class TestMultiGPUModel(test.TestCase):
     if not check_if_compatible_devices(gpus=gpus):
       return
 
-    with self.test_session():
+    with self.cached_session():
       model = keras.models.Sequential()
       model.add(keras.layers.Dense(hidden_dim,
                                    input_shape=(input_dim,)))
@@ -78,7 +78,7 @@ class TestMultiGPUModel(test.TestCase):
     if not check_if_compatible_devices(gpus=gpus):
       return
 
-    with self.test_session():
+    with self.cached_session():
       input_a = keras.Input((input_dim_a,))
       input_b = keras.Input((input_dim_b,))
       a = keras.layers.Dense(hidden_dim)(input_a)
@@ -105,7 +105,7 @@ class TestMultiGPUModel(test.TestCase):
     if not check_if_compatible_devices(gpus=2):
       return
 
-    with self.test_session():
+    with self.cached_session():
       input_shape = (1000, 10)
       model = keras.models.Sequential()
       model.add(keras.layers.Dense(10,
@@ -144,7 +144,7 @@ class TestMultiGPUModel(test.TestCase):
     if not check_if_compatible_devices(gpus=gpus):
       return
 
-    with self.test_session():
+    with self.cached_session():
       input_shape = (num_samples,) + shape
       x_train = np.random.randint(0, 255, input_shape)
       y_train = np.random.randint(0, num_classes, (input_shape[0],))
diff --git a/tensorflow/python/profiler/pprof_profiler_test.py b/tensorflow/python/profiler/pprof_profiler_test.py
index c2469f012d..11a3487360 100644
--- a/tensorflow/python/profiler/pprof_profiler_test.py
+++ b/tensorflow/python/profiler/pprof_profiler_test.py
@@ -141,7 +141,7 @@ comment: 9
     run_metadata = config_pb2.RunMetadata()
 
     num_iters = 5
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       i = constant_op.constant(0)
       c = lambda i: math_ops.less(i, num_iters)
       b = lambda i: math_ops.add(i, 1)
diff --git a/tensorflow/python/tools/optimize_for_inference_test.py b/tensorflow/python/tools/optimize_for_inference_test.py
index fcb3ceac82..a39c046761 100644
--- a/tensorflow/python/tools/optimize_for_inference_test.py
+++ b/tensorflow/python/tools/optimize_for_inference_test.py
@@ -129,7 +129,7 @@ class OptimizeForInferenceTest(test.TestCase):
     self.assertProtoEquals(expected_output, output)
 
   def testFoldBatchNorms(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
       input_op = constant_op.constant(
           np.array(inputs), shape=[1, 1, 6, 2], dtype=dtypes.float32)
@@ -161,7 +161,7 @@ class OptimizeForInferenceTest(test.TestCase):
     optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(
         original_graph_def)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       _ = importer.import_graph_def(
           optimized_graph_def, input_map={}, name="optimized")
       optimized_result = sess.run(["optimized/output:0"])
@@ -224,7 +224,7 @@ class OptimizeForInferenceTest(test.TestCase):
         self.assertNotEqual("FusedBatchNorm", node.op)
 
   def testFuseResizePadAndConv(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
       input_op = constant_op.constant(
           np.array(inputs), shape=[1, 2, 3, 2], dtype=dtypes.float32)
@@ -242,7 +242,7 @@ class OptimizeForInferenceTest(test.TestCase):
     optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
         original_graph_def, ["output"])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       _ = importer.import_graph_def(
           optimized_graph_def, input_map={}, name="optimized")
       optimized_result = sess.run(["optimized/output:0"])
@@ -255,7 +255,7 @@ class OptimizeForInferenceTest(test.TestCase):
       self.assertNotEqual("ResizeBilinear", node.op)
 
   def testFuseResizeAndConv(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
       input_op = constant_op.constant(
           np.array(inputs), shape=[1, 2, 3, 2], dtype=dtypes.float32)
@@ -271,7 +271,7 @@ class OptimizeForInferenceTest(test.TestCase):
     optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
         original_graph_def, ["output"])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       _ = importer.import_graph_def(
           optimized_graph_def, input_map={}, name="optimized")
       optimized_result = sess.run(["optimized/output:0"])
@@ -284,7 +284,7 @@ class OptimizeForInferenceTest(test.TestCase):
 
 
   def testFusePadAndConv(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
       input_op = constant_op.constant(
           np.array(inputs), shape=[1, 2, 3, 2], dtype=dtypes.float32)
@@ -300,7 +300,7 @@ class OptimizeForInferenceTest(test.TestCase):
     optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
         original_graph_def, ["output"])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       _ = importer.import_graph_def(
           optimized_graph_def, input_map={}, name="optimized")
       optimized_result = sess.run(["optimized/output:0"])
-- 
GitLab


From b19d6657070bbf1df5706195a0bf3a92cbf371fc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 00:07:20 -0700
Subject: [PATCH 0484/1357] Move from deprecated self.test_session() to
 self.cached_session().

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 213944932
---
 .../all_reduce/python/all_reduce_test.py      |  2 +-
 .../python/kernel_tests/bigtable_ops_test.py  | 18 ++++-----
 .../boosted_trees/python/utils/losses_test.py |  4 +-
 .../coder/python/ops/coder_ops_test.py        |  2 +-
 .../distribute/python/cross_tower_ops_test.py |  2 +-
 .../fused_conv2d_bias_activation_op_test.py   |  6 +--
 .../python/kernel_tests/kinesis_test.py       |  4 +-
 .../python/kernel_tests/seq2seq_test.py       | 38 +++++++++----------
 tensorflow/contrib/rate/rate_test.py          |  4 +-
 .../python/ops/resampler_ops_test.py          |  8 ++--
 .../python/kernel_tests/rpc_op_test_base.py   | 32 ++++++++--------
 .../contrib/summary/summary_ops_graph_test.py | 28 +++++++-------
 .../debug/lib/session_debug_grpc_test.py      |  2 +-
 .../keras/wrappers/scikit_learn_test.py       | 12 +++---
 tensorflow/python/ops/losses/util_test.py     |  6 +--
 .../python/summary/writer/writer_test.py      |  4 +-
 16 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/tensorflow/contrib/all_reduce/python/all_reduce_test.py b/tensorflow/contrib/all_reduce/python/all_reduce_test.py
index b3f5d92259..9a8f62b986 100644
--- a/tensorflow/contrib/all_reduce/python/all_reduce_test.py
+++ b/tensorflow/contrib/all_reduce/python/all_reduce_test.py
@@ -149,7 +149,7 @@ class AllReduceTest(test_util.TensorFlowTestCase):
     num_devices = num_workers * num_gpus
     dev_list = ["/replica:0/task:0/device:CPU:0"
                 for _ in range(num_devices)]
-    with self.test_session():
+    with self.cached_session():
       input_tensors = self._buildInitialVars(shape, dev_list)
       un_op = lambda x: math_ops.div(
           x, constant_op.constant(num_devices, dtype=types_pb2.DT_FLOAT))
diff --git a/tensorflow/contrib/bigtable/python/kernel_tests/bigtable_ops_test.py b/tensorflow/contrib/bigtable/python/kernel_tests/bigtable_ops_test.py
index e36f7f32c6..316da9ebe1 100644
--- a/tensorflow/contrib/bigtable/python/kernel_tests/bigtable_ops_test.py
+++ b/tensorflow/contrib/bigtable/python/kernel_tests/bigtable_ops_test.py
@@ -61,7 +61,7 @@ class BigtableOpsTest(test.TestCase):
     n = itr.get_next()
     expected = list(self.COMMON_ROW_KEYS)
     expected.reverse()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._writeCommonValues(sess)
       sess.run(itr.initializer)
       for i in range(3):
@@ -84,7 +84,7 @@ class BigtableOpsTest(test.TestCase):
     expected_keys.reverse()
     expected_values = list(self.COMMON_VALUES)
     expected_values.reverse()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._writeCommonValues(sess)
       sess.run(itr.initializer)
       for i in range(3):
@@ -125,7 +125,7 @@ class BigtableOpsTest(test.TestCase):
     expected_keys = list(self.COMMON_ROW_KEYS)
     expected_values = list(self.COMMON_VALUES)
     expected_tuples = zip(expected_keys, expected_values)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._writeCommonValues(sess)
       sess.run(itr.initializer)
       for i, elem in enumerate(expected_tuples):
@@ -144,7 +144,7 @@ class BigtableOpsTest(test.TestCase):
     itr = ds.make_initializable_iterator()
     n = itr.get_next()
     expected_key = self.COMMON_ROW_KEYS[0]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._writeCommonValues(sess)
       sess.run(itr.initializer)
       output = sess.run(n)
@@ -163,7 +163,7 @@ class BigtableOpsTest(test.TestCase):
   def runSampleKeyPairsTest(self, ds, expected_key_pairs):
     itr = ds.make_initializable_iterator()
     n = itr.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._writeCommonValues(sess)
       sess.run(itr.initializer)
       for i, elems in enumerate(expected_key_pairs):
@@ -219,7 +219,7 @@ class BigtableOpsTest(test.TestCase):
     ds = bigtable_api._BigtableSampleKeyPairsDataset(
         self._table, prefix="r", start="r1", end="")
     itr = ds.make_initializable_iterator()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(itr.initializer)
 
@@ -227,7 +227,7 @@ class BigtableOpsTest(test.TestCase):
     ds = bigtable_api._BigtableSampleKeyPairsDataset(
         self._table, prefix="r", start="", end="r3")
     itr = ds.make_initializable_iterator()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(itr.initializer)
 
@@ -235,7 +235,7 @@ class BigtableOpsTest(test.TestCase):
     ds = self._table.parallel_scan_prefix(prefix="r", cf1="c1")
     itr = ds.make_initializable_iterator()
     n = itr.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._writeCommonValues(sess)
       sess.run(itr.initializer)
       expected_values = list(zip(self.COMMON_ROW_KEYS, self.COMMON_VALUES))
@@ -253,7 +253,7 @@ class BigtableOpsTest(test.TestCase):
     ds = self._table.parallel_scan_range(start="r1", end="r4", cf1="c1")
     itr = ds.make_initializable_iterator()
     n = itr.get_next()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._writeCommonValues(sess)
       sess.run(itr.initializer)
       expected_values = list(zip(self.COMMON_ROW_KEYS, self.COMMON_VALUES))
diff --git a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
index ccb8509c03..cc22504c8f 100644
--- a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
+++ b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
@@ -45,7 +45,7 @@ class LossesTest(test_util.TensorFlowTestCase):
 
     eps = 0.2
 
-    with self.test_session():
+    with self.cached_session():
       predictions_tensor = constant_op.constant(
           prediction_logits, dtype=dtypes.float32)
       loss_for_positives, _ = losses.per_example_exp_loss(
@@ -84,7 +84,7 @@ class LossesTest(test_util.TensorFlowTestCase):
     predictions = np.array(
         [[0.123], [23.2], [233], [52], [3]], dtype=np.float32)
 
-    with self.test_session():
+    with self.cached_session():
       loss_tensor, _ = losses.per_example_squared_loss(labels, weights,
                                                        predictions)
 
diff --git a/tensorflow/contrib/coder/python/ops/coder_ops_test.py b/tensorflow/contrib/coder/python/ops/coder_ops_test.py
index d5e14e7a64..f5431ca1ff 100644
--- a/tensorflow/contrib/coder/python/ops/coder_ops_test.py
+++ b/tensorflow/contrib/coder/python/ops/coder_ops_test.py
@@ -45,7 +45,7 @@ class CoderOpsTest(test.TestCase):
     decoded = coder_ops.range_decode(
         encoded, array_ops.shape(data), cdf, precision=14)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllEqual(*sess.run((data, decoded)))
 
 
diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops_test.py b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py
index 490371477a..a3e1b96a68 100644
--- a/tensorflow/contrib/distribute/python/cross_tower_ops_test.py
+++ b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py
@@ -114,7 +114,7 @@ class CrossTowerOpsTestBase(test.TestCase, parameterized.TestCase):
         self.assertEqual([v.numpy() for v in left._index.values()],
                          list(right._index.values()))
       else:
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           self.assertEqual(
               sess.run(list(left._index.values())), list(right._index.values()))
 
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
index 0185ef662c..e47342bc7d 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
@@ -265,7 +265,7 @@ class FusedConv2DBiasActivationTest(test.TestCase):
     tensors = []
     for (data_format, use_gpu) in GetTestConfigs():
       tensors.append(_SetupVal(data_format, use_gpu))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       values = sess.run(tensors)
       for i in range(1, len(values)):
         self.assertAllClose(values[0], values[i], rtol=1e-5, atol=1e-5)
@@ -282,7 +282,7 @@ class FusedConv2DBiasActivationTest(test.TestCase):
               data_format, filter_format, dtype)
         tensors.append(result)
         ref_tensors.append(expected)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         values = sess.run(tensors)
         ref_values = sess.run(ref_tensors)
         for i in range(len(tensors)):
@@ -493,7 +493,7 @@ class FusedConv2DBiasActivationTest(test.TestCase):
     if gpu_only and not test.is_gpu_available():
       tf_logging.info("Skipping OpEdgeCases tests.")
       return
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Illegal strides.
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "Convolutional strides are not supported in "
diff --git a/tensorflow/contrib/kinesis/python/kernel_tests/kinesis_test.py b/tensorflow/contrib/kinesis/python/kernel_tests/kinesis_test.py
index 7289b45c50..bf89922318 100644
--- a/tensorflow/contrib/kinesis/python/kernel_tests/kinesis_test.py
+++ b/tensorflow/contrib/kinesis/python/kernel_tests/kinesis_test.py
@@ -64,7 +64,7 @@ class KinesisDatasetTest(test.TestCase):
     init_batch_op = iterator.make_initializer(batch_dataset)
     get_next = iterator.get_next()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Basic test: read from shard 0 of stream 1.
       sess.run(init_op, feed_dict={stream: stream_name, num_epochs: 1})
       for i in range(10):
@@ -108,7 +108,7 @@ class KinesisDatasetTest(test.TestCase):
     get_next = iterator.get_next()
 
     data = list()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Basic test: read from shard 0 of stream 2.
       sess.run(
           init_op, feed_dict={
diff --git a/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/seq2seq_test.py b/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/seq2seq_test.py
index 2f33a2b74d..0e5ea6b9f7 100644
--- a/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/seq2seq_test.py
+++ b/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/seq2seq_test.py
@@ -47,7 +47,7 @@ from tensorflow.python.training import adam
 class Seq2SeqTest(test.TestCase):
 
   def testRNNDecoder(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
@@ -65,7 +65,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testBasicRNNSeq2Seq(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
@@ -81,7 +81,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testTiedRNNSeq2Seq(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
@@ -98,7 +98,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testEmbeddingRNNDecoder(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
@@ -124,7 +124,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].h.shape)
 
   def testEmbeddingRNNSeq2Seq(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         enc_inp = [
@@ -228,7 +228,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertAllClose(res1, res3)
 
   def testEmbeddingTiedRNNSeq2Seq(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         enc_inp = [
@@ -316,7 +316,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertAllClose(res1, res3)
 
   def testAttentionDecoder1(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         cell_fn = lambda: rnn_cell.GRUCell(2)
@@ -341,7 +341,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testAttentionDecoder2(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         cell_fn = lambda: rnn_cell.GRUCell(2)
@@ -367,7 +367,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testDynamicAttentionDecoder1(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         cell_fn = lambda: rnn_cell.GRUCell(2)
@@ -391,7 +391,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testDynamicAttentionDecoder2(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         cell_fn = lambda: rnn_cell.GRUCell(2)
@@ -416,7 +416,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testAttentionDecoderStateIsTuple(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         single_cell = lambda: rnn_cell.BasicLSTMCell(  # pylint: disable=g-long-lambda
@@ -448,7 +448,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0][1].h.shape)
 
   def testDynamicAttentionDecoderStateIsTuple(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         cell_fn = lambda: rnn_cell.MultiRNNCell(  # pylint: disable=g-long-lambda
@@ -479,7 +479,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0][1].h.shape)
 
   def testEmbeddingAttentionDecoder(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
@@ -513,7 +513,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertEqual((2, 2), res[0].shape)
 
   def testEmbeddingAttentionSeq2Seq(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         enc_inp = [
@@ -622,7 +622,7 @@ class Seq2SeqTest(test.TestCase):
         # self.assertAllClose(res1, res3)
 
   def testOne2ManyRNNSeq2Seq(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         enc_inp = [
@@ -712,7 +712,7 @@ class Seq2SeqTest(test.TestCase):
         self.assertAllClose(res1, res3)
 
   def testSequenceLoss(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       logits = [constant_op.constant(i + 0.5, shape=[2, 5]) for i in range(3)]
       targets = [
           constant_op.constant(
@@ -748,7 +748,7 @@ class Seq2SeqTest(test.TestCase):
       self.assertAllClose(9.656628, res)
 
   def testSequenceLossByExample(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       output_classes = 5
       logits = [
           constant_op.constant(
@@ -778,7 +778,7 @@ class Seq2SeqTest(test.TestCase):
   #   classes = 10
   #   buckets = [(4, 4), (8, 8)]
 
-  #   with self.test_session():
+  #   with self.cached_session():
   #     # Here comes a sample Seq2Seq model using GRU cells.
   #     def SampleGRUSeq2Seq(enc_inp, dec_inp, weights, per_example_loss):
   #       """Example sequence-to-sequence model that uses GRU cells."""
@@ -839,7 +839,7 @@ class Seq2SeqTest(test.TestCase):
     random.seed(111)
     np.random.seed(111)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # We use sampled softmax so we keep output projection separate.
       w = variable_scope.get_variable("proj_w", [24, classes])
       w_t = array_ops.transpose(w)
diff --git a/tensorflow/contrib/rate/rate_test.py b/tensorflow/contrib/rate/rate_test.py
index 08908104f4..3dee163881 100644
--- a/tensorflow/contrib/rate/rate_test.py
+++ b/tensorflow/contrib/rate/rate_test.py
@@ -46,7 +46,7 @@ class RateTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testBasic(self):
-    with self.test_session():
+    with self.cached_session():
       r_ = rate.Rate()
       a = r_(array_ops.ones([1]), denominator=array_ops.ones([1]))
       self.evaluate(variables.global_variables_initializer())
@@ -67,7 +67,7 @@ class RateTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testWhileLoop(self):
-    with self.test_session():
+    with self.cached_session():
       r_ = rate.Rate()
 
       def body(value, denom, i, ret_rate):
diff --git a/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py b/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
index 6253f96315..e30e7255fa 100644
--- a/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
+++ b/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
@@ -210,7 +210,7 @@ class ResamplerTest(test.TestCase):
 
     # Input data shape is not defined over a 2D grid, i.e. its shape is not like
     # (batch_size, data_height, data_width, data_channels).
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       data_shape = (batch_size, data_height, data_width, data_depth,
                     data_channels)
       data = np.zeros(data_shape)
@@ -225,7 +225,7 @@ class ResamplerTest(test.TestCase):
         sess.run(outputs)
 
     # Warp tensor must be at least a matrix, with shape [batch_size, 2].
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       data_shape = (batch_size, data_height, data_width, data_channels)
       data = np.zeros(data_shape)
       warp_shape = (batch_size,)
@@ -238,7 +238,7 @@ class ResamplerTest(test.TestCase):
         sess.run(outputs)
 
     # The batch size of the data and warp tensors must be the same.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       data_shape = (batch_size, data_height, data_width, data_channels)
       data = np.zeros(data_shape)
       warp_shape = (batch_size+1, warp_height, warp_width, 2)
@@ -252,7 +252,7 @@ class ResamplerTest(test.TestCase):
 
     # The warp tensor must contain 2D coordinates, i.e. its shape last dimension
     # must be 2.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       data_shape = (batch_size, data_height, data_width, data_channels)
       data = np.zeros(data_shape)
       warp_shape = (batch_size, warp_height, warp_width, 3)
diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py
index 1c23c28860..0d615923e0 100644
--- a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py
+++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py
@@ -49,7 +49,7 @@ class RpcOpTestBase(object):
     return rpc_op.try_rpc(*args, protocol=self._protocol, **kwargs)
 
   def testScalarHostPortRpc(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = (
           test_example_pb2.TestCase(values=[1, 2, 3]).SerializeToString())
       response_tensors = self.rpc(
@@ -63,7 +63,7 @@ class RpcOpTestBase(object):
     self.assertAllEqual([2, 3, 4], response_message.values)
 
   def testScalarHostPortTryRpc(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = (
           test_example_pb2.TestCase(values=[1, 2, 3]).SerializeToString())
       response_tensors, status_code, status_message = self.try_rpc(
@@ -83,7 +83,7 @@ class RpcOpTestBase(object):
     self.assertEqual(b'', status_message_values)
 
   def testEmptyHostPortRpc(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = []
       response_tensors = self.rpc(
           method=self.get_method_name('Increment'),
@@ -98,7 +98,7 @@ class RpcOpTestBase(object):
         '/InvalidService.Increment',
         self.get_method_name('InvalidMethodName')
     ]:
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         with self.assertRaisesOpError(self.invalid_method_string):
           sess.run(self.rpc(method=method, address=self._address, request=''))
 
@@ -111,7 +111,7 @@ class RpcOpTestBase(object):
   def testInvalidAddress(self):
     # This covers the case of address='' and address='localhost:293874293874'
     address = 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@'
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       with self.assertRaises(errors.UnavailableError):
         sess.run(
             self.rpc(
@@ -128,7 +128,7 @@ class RpcOpTestBase(object):
           self.connect_failed_string in status_message_value.decode('ascii'))
 
   def testAlwaysFailingMethod(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       response_tensors = self.rpc(
           method=self.get_method_name('AlwaysFailWithInvalidArgument'),
           address=self._address,
@@ -150,7 +150,7 @@ class RpcOpTestBase(object):
       self.assertTrue(I_WARNED_YOU in status_message_value.decode('ascii'))
 
   def testSometimesFailingMethodWithManyRequests(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Fail hard by default.
       response_tensors = self.rpc(
           method=self.get_method_name('SometimesFailWithInvalidArgument'),
@@ -179,7 +179,7 @@ class RpcOpTestBase(object):
       self.assertAllEqual(expected_message_values, status_message_values)
 
   def testVecHostPortRpc(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = [
           test_example_pb2.TestCase(
               values=[i, i + 1, i + 2]).SerializeToString() for i in range(20)
@@ -197,7 +197,7 @@ class RpcOpTestBase(object):
       self.assertAllEqual([i + 1, i + 2, i + 3], response_message.values)
 
   def testVecHostPortManyParallelRpcs(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = [
           test_example_pb2.TestCase(
               values=[i, i + 1, i + 2]).SerializeToString() for i in range(20)
@@ -219,7 +219,7 @@ class RpcOpTestBase(object):
         self.assertAllEqual([i + 1, i + 2, i + 3], response_message.values)
 
   def testVecHostPortRpcUsingEncodeAndDecodeProto(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = encode_proto_op.encode_proto(
           message_type='tensorflow.contrib.rpc.TestCase',
           field_names=['values'],
@@ -241,7 +241,7 @@ class RpcOpTestBase(object):
                          for i in range(20)], response_shape_values)
 
   def testVecHostPortRpcCancelsUponSessionTimeOutWhenSleepingForever(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = [''] * 25  # This will launch 25 RPC requests.
       response_tensors = self.rpc(
           method=self.get_method_name('SleepForever'),
@@ -254,7 +254,7 @@ class RpcOpTestBase(object):
           sess.run(response_tensors, options=options)
 
   def testVecHostPortRpcCancelsUponConfiguredTimeOutWhenSleepingForever(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       request_tensors = [''] * 25  # This will launch 25 RPC requests.
       response_tensors = self.rpc(
           method=self.get_method_name('SleepForever'),
@@ -265,7 +265,7 @@ class RpcOpTestBase(object):
         sess.run(response_tensors)
 
   def testTryRpcPropagatesDeadlineErrorWithSometimesTimingOutRequests(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       response_tensors, status_code, status_message = self.try_rpc(
           method=self.get_method_name('SometimesSleepForever'),
           timeout_in_ms=1000,
@@ -281,7 +281,7 @@ class RpcOpTestBase(object):
 
   def testTryRpcWithMultipleAddressesSingleRequest(self):
     flatten = lambda x: list(itertools.chain.from_iterable(x))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       addresses = flatten([[
           self._address, 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@'
       ] for _ in range(10)])
@@ -301,7 +301,7 @@ class RpcOpTestBase(object):
 
   def testTryRpcWithMultipleMethodsSingleRequest(self):
     flatten = lambda x: list(itertools.chain.from_iterable(x))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       methods = flatten(
           [[self.get_method_name('Increment'), 'InvalidMethodName']
            for _ in range(10)])
@@ -319,7 +319,7 @@ class RpcOpTestBase(object):
 
   def testTryRpcWithMultipleAddressesAndRequests(self):
     flatten = lambda x: list(itertools.chain.from_iterable(x))
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       addresses = flatten([[
           self._address, 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@'
       ] for _ in range(10)])
diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index ae8336daaf..807741e05f 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -52,7 +52,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
       summary_ops.histogram('histogram', [1.0], step=1)
       summary_ops.image('image', [[[[1.0]]]], step=1)
       summary_ops.audio('audio', [[1.0]], 1.0, 1, step=1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(summary_ops.summary_writer_initializer_op())
       sess.run(summary_ops.all_summary_ops())
     # The working condition of the ops is tested in the C++ test so we just
@@ -64,7 +64,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
     writer = summary_ops.create_file_writer(logdir, max_queue=0)
     with writer.as_default(), summary_ops.always_record_summaries():
       summary_ops.scalar('scalar', 2.0, step=1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(summary_ops.summary_writer_initializer_op())
       sess.run(summary_ops.all_summary_ops())
     events = summary_test_util.events_from_logdir(logdir)
@@ -77,7 +77,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
     with writer.as_default(), summary_ops.always_record_summaries():
       with ops.name_scope('scope'):
         summary_ops.scalar('scalar', 2.0, step=1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(summary_ops.summary_writer_initializer_op())
       sess.run(summary_ops.all_summary_ops())
     events = summary_test_util.events_from_logdir(logdir)
@@ -90,7 +90,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
     writer = summary_ops.create_file_writer(logdir, max_queue=0)
     with writer.as_default(), summary_ops.always_record_summaries():
       summary_ops.scalar('scalar', 2.0)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       sess.run(summary_ops.summary_writer_initializer_op())
       step, _ = sess.run(
@@ -105,7 +105,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
         logdir, max_queue=1, flush_millis=999999)
     with writer.as_default(), summary_ops.always_record_summaries():
       summary_ops.scalar('scalar', 2.0, step=1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(summary_ops.summary_writer_initializer_op())
       get_total = lambda: len(summary_test_util.events_from_logdir(logdir))
       # Note: First tf.Event is always file_version.
@@ -123,7 +123,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
     with writer.as_default(), summary_ops.always_record_summaries():
       summary_ops.scalar('scalar', 2.0, step=1)
       flush_op = summary_ops.flush()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(summary_ops.summary_writer_initializer_op())
       get_total = lambda: len(summary_test_util.events_from_logdir(logdir))
       # Note: First tf.Event is always file_version.
@@ -157,7 +157,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
       with writer3.as_default():
         summary_ops.scalar('three', 3.0, step=3)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # Run init ops across writers sequentially to avoid race condition.
       # TODO(nickfelt): fix race condition in resource manager lookup or create
       sess.run(writer1.init())
@@ -191,7 +191,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
           logdir, max_queue=100, flush_millis=1000000)
       with writer.as_default():
         summary_ops.scalar('one', 1.0, step=1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(summary_ops.summary_writer_initializer_op())
       get_total = lambda: len(summary_test_util.events_from_logdir(logdir))
       self.assertEqual(1, get_total())  # file_version Event
@@ -219,7 +219,7 @@ class GraphFileTest(test_util.TensorFlowTestCase):
           logdir, max_queue=100, flush_millis=1000000)
       with writer.as_default():
         summary_ops.scalar('one', 1.0, step=1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(summary_ops.summary_writer_initializer_op())
       get_total = lambda: len(summary_test_util.events_from_logdir(logdir))
       self.assertEqual(1, get_total())  # file_version Event
@@ -241,7 +241,7 @@ class GraphDbTest(summary_test_util.SummaryDbTest):
     training_util.get_or_create_global_step()
     name = 'hi'
     graph = graph_pb2.GraphDef(node=(node_def_pb2.NodeDef(name=name),))
-    with self.test_session():
+    with self.cached_session():
       with self.create_db_writer().as_default():
         summary_ops.initialize(graph=graph)
     six.assertCountEqual(self, [name],
@@ -249,7 +249,7 @@ class GraphDbTest(summary_test_util.SummaryDbTest):
 
   def testScalarSummary(self):
     """Test record_summaries_every_n_global_steps and all_summaries()."""
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       global_step = training_util.get_or_create_global_step()
       global_step.initializer.run()
       with ops.device('/cpu:0'):
@@ -280,7 +280,7 @@ class GraphDbTest(summary_test_util.SummaryDbTest):
 
   def testScalarSummaryNameScope(self):
     """Test record_summaries_every_n_global_steps and all_summaries()."""
-    with ops.Graph().as_default(), self.test_session() as sess:
+    with ops.Graph().as_default(), self.cached_session() as sess:
       global_step = training_util.get_or_create_global_step()
       global_step.initializer.run()
       with ops.device('/cpu:0'):
@@ -311,7 +311,7 @@ class GraphDbTest(summary_test_util.SummaryDbTest):
           self.assertEqual(events[1].summary.value[0].tag, 'scope/my_scalar')
 
   def testSummaryGraphModeCond(self):
-    with ops.Graph().as_default(), self.test_session():
+    with ops.Graph().as_default(), self.cached_session():
       training_util.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
       with summary_ops.create_file_writer(
@@ -332,7 +332,7 @@ class GraphDbTest(summary_test_util.SummaryDbTest):
       self.assertEqual(events[1].summary.value[0].tag, 'cond/scalar')
 
   def testSummaryGraphModeWhile(self):
-    with ops.Graph().as_default(), self.test_session():
+    with ops.Graph().as_default(), self.cached_session():
       training_util.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
       with summary_ops.create_file_writer(
diff --git a/tensorflow/python/debug/lib/session_debug_grpc_test.py b/tensorflow/python/debug/lib/session_debug_grpc_test.py
index ff49b69547..91f21cb1f3 100644
--- a/tensorflow/python/debug/lib/session_debug_grpc_test.py
+++ b/tensorflow/python/debug/lib/session_debug_grpc_test.py
@@ -741,7 +741,7 @@ class DelayedDebugServerTest(test_util.TensorFlowTestCase):
      debug_server) = grpc_debug_test_server.start_server_on_separate_thread(
          server_start_delay_sec=2.0, dump_to_filesystem=False)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a_init = constant_op.constant(42.0, name="a_init")
       a = variables.Variable(a_init, name="a")
 
diff --git a/tensorflow/python/keras/wrappers/scikit_learn_test.py b/tensorflow/python/keras/wrappers/scikit_learn_test.py
index c322efdedf..f904290803 100644
--- a/tensorflow/python/keras/wrappers/scikit_learn_test.py
+++ b/tensorflow/python/keras/wrappers/scikit_learn_test.py
@@ -102,7 +102,7 @@ def assert_regression_works(reg):
 class ScikitLearnAPIWrapperTest(test.TestCase):
 
   def test_classify_build_fn(self):
-    with self.test_session():
+    with self.cached_session():
       clf = keras.wrappers.scikit_learn.KerasClassifier(
           build_fn=build_fn_clf,
           hidden_dim=HIDDEN_DIM,
@@ -118,7 +118,7 @@ class ScikitLearnAPIWrapperTest(test.TestCase):
       def __call__(self, hidden_dim):
         return build_fn_clf(hidden_dim)
 
-    with self.test_session():
+    with self.cached_session():
       clf = keras.wrappers.scikit_learn.KerasClassifier(
           build_fn=ClassBuildFnClf(),
           hidden_dim=HIDDEN_DIM,
@@ -134,7 +134,7 @@ class ScikitLearnAPIWrapperTest(test.TestCase):
       def __call__(self, hidden_dim):
         return build_fn_clf(hidden_dim)
 
-    with self.test_session():
+    with self.cached_session():
       clf = InheritClassBuildFnClf(
           build_fn=None,
           hidden_dim=HIDDEN_DIM,
@@ -144,7 +144,7 @@ class ScikitLearnAPIWrapperTest(test.TestCase):
       assert_classification_works(clf)
 
   def test_regression_build_fn(self):
-    with self.test_session():
+    with self.cached_session():
       reg = keras.wrappers.scikit_learn.KerasRegressor(
           build_fn=build_fn_reg,
           hidden_dim=HIDDEN_DIM,
@@ -160,7 +160,7 @@ class ScikitLearnAPIWrapperTest(test.TestCase):
       def __call__(self, hidden_dim):
         return build_fn_reg(hidden_dim)
 
-    with self.test_session():
+    with self.cached_session():
       reg = keras.wrappers.scikit_learn.KerasRegressor(
           build_fn=ClassBuildFnReg(),
           hidden_dim=HIDDEN_DIM,
@@ -176,7 +176,7 @@ class ScikitLearnAPIWrapperTest(test.TestCase):
       def __call__(self, hidden_dim):
         return build_fn_reg(hidden_dim)
 
-    with self.test_session():
+    with self.cached_session():
       reg = InheritClassBuildFnReg(
           build_fn=None,
           hidden_dim=HIDDEN_DIM,
diff --git a/tensorflow/python/ops/losses/util_test.py b/tensorflow/python/ops/losses/util_test.py
index 7fa7a41fca..df2e60e2e4 100644
--- a/tensorflow/python/ops/losses/util_test.py
+++ b/tensorflow/python/ops/losses/util_test.py
@@ -28,7 +28,7 @@ class LossesUtilTest(test.TestCase):
 
   def testGetRegularizationLoss(self):
     # Empty regularization collection should evaluate to 0.0.
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(0.0, util.get_regularization_loss().eval())
 
     # Loss should sum.
@@ -36,14 +36,14 @@ class LossesUtilTest(test.TestCase):
         ops.GraphKeys.REGULARIZATION_LOSSES, constant_op.constant(2.0))
     ops.add_to_collection(
         ops.GraphKeys.REGULARIZATION_LOSSES, constant_op.constant(3.0))
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(5.0, util.get_regularization_loss().eval())
 
     # Check scope capture mechanism.
     with ops.name_scope('scope1'):
       ops.add_to_collection(
           ops.GraphKeys.REGULARIZATION_LOSSES, constant_op.constant(-1.0))
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(-1.0, util.get_regularization_loss('scope1').eval())
 
 
diff --git a/tensorflow/python/summary/writer/writer_test.py b/tensorflow/python/summary/writer/writer_test.py
index dc990c2602..670230e917 100644
--- a/tensorflow/python/summary/writer/writer_test.py
+++ b/tensorflow/python/summary/writer/writer_test.py
@@ -286,7 +286,7 @@ class FileWriterTestCase(test.TestCase):
   def testAddingSummariesFromSessionRunCalls(self):
     test_dir = self._CleanTestDir("global_step")
     sw = self._FileWriter(test_dir)
-    with self.test_session():
+    with self.cached_session():
       i = constant_op.constant(1, dtype=dtypes.int32, shape=[])
       l = constant_op.constant(2, dtype=dtypes.int64, shape=[])
       # Test the summary can be passed serialized.
@@ -437,7 +437,7 @@ class SessionBasedFileWriterTestCase(FileWriterTestCase):
       # Pass in test_session() as the session. It will be cached during this
       # test method invocation so that any other use of test_session() with no
       # graph should result in re-using the same underlying Session.
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         kwargs["session"] = sess
         return writer.FileWriter(*args, **kwargs)
     return writer.FileWriter(*args, **kwargs)
-- 
GitLab


From 347201aa1e866f5899d000f40251c4090bcf3c73 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Fri, 21 Sep 2018 00:46:12 -0700
Subject: [PATCH 0485/1357] keras/training.py: Improve error message.

Inspired by:
https://stackoverflow.com/questions/52428939/eager-mode-optimizers/

PiperOrigin-RevId: 213948133
---
 tensorflow/python/keras/engine/training.py | 38 ++++++++++++----------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 7df72d45b4..154c219dcc 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -383,27 +383,31 @@ class Model(Network):
     """
     # Validate that arguments passed by the user to `compile` are supported by
     # DistributionStrategy.
-    if distribute and not isinstance(
-        optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
-      raise NotImplementedError('Only TF native optimizers are supported with '
-                                'DistributionStrategy.')
-    if distribute and context.executing_eagerly():
-      raise NotImplementedError('DistributionStrategy is not supported in '
-                                'Eager mode.')
-    if distribute and sample_weight_mode:
-      raise NotImplementedError('sample_weight_mode is not supported with '
-                                'DistributionStrategy.')
-    if distribute and weighted_metrics:
-      raise NotImplementedError('weighted_metrics is not supported with '
-                                'DistributionStrategy.')
-    if distribute and target_tensors:
-      raise ValueError('target_tensors is not supported with '
-                       'DistributionStrategy.')
+    if distribute:
+      if not isinstance(
+          optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
+        raise NotImplementedError(
+            'optimizer must be an instance of '
+            'tf.train.Optimizer, not a %s' % type(optimizer))
+      if context.executing_eagerly():
+        raise NotImplementedError('DistributionStrategy is not supported '
+                                  'when eager execution is enabled.')
+      if sample_weight_mode:
+        raise NotImplementedError('sample_weight_mode is not supported with '
+                                  'DistributionStrategy.')
+      if weighted_metrics:
+        raise NotImplementedError('weighted_metrics is not supported with '
+                                  'DistributionStrategy.')
+      if target_tensors:
+        raise ValueError('target_tensors is not supported with '
+                         'DistributionStrategy.')
 
     loss = loss or {}
     if context.executing_eagerly() and not isinstance(
         optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
-      raise ValueError('Only TF native optimizers are supported in Eager mode.')
+      raise ValueError(
+          'optimizer must be an instance of tf.train.Optimizer, not '
+          'a %s' % type(optimizer))
 
     self.optimizer = optimizers.get(optimizer)
     # We've disabled automatic dependency tracking for this method, but do want
-- 
GitLab


From 04d86e99547f21947ca7a88f32fa2a0449ba4a5e Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 21 Sep 2018 00:50:20 -0700
Subject: [PATCH 0486/1357] Internal change.

PiperOrigin-RevId: 213948394
---
 tensorflow/tools/ci_build/builds/run_pip_tests.sh             | 2 +-
 tensorflow/tools/ci_build/ci_parameterized_build.sh           | 2 +-
 tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh   | 4 ++--
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/run_pip_tests.sh b/tensorflow/tools/ci_build/builds/run_pip_tests.sh
index 4b762bf258..17198a6560 100755
--- a/tensorflow/tools/ci_build/builds/run_pip_tests.sh
+++ b/tensorflow/tools/ci_build/builds/run_pip_tests.sh
@@ -64,7 +64,7 @@ while true; do
   fi
 done
 
-TF_GPU_COUNT=${TF_GPU_COUNT:-8}
+TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 
 # PIP tests should have a "different" path. Different than the one we place
 # virtualenv, because we are deleting and recreating it here.
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index cc09784c1d..49a9048c03 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -147,7 +147,7 @@ PIP_INTEGRATION_TESTS_FLAG="--integration_tests"
 ANDROID_CMD="${CI_BUILD_DIR}/builds/android.sh"
 ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 
-TF_GPU_COUNT=${TF_GPU_COUNT:-8}
+TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 03a2a07fb1..cd7206baf8 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -21,8 +21,8 @@
 # Required environment variables:
 #     TF_GPU_COUNT = Number of GPUs available.
 
-TF_GPU_COUNT=${TF_GPU_COUNT:-8}
-TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-4}
+TF_GPU_COUNT=${TF_GPU_COUNT:-4}
+TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-8}
 # We want to allow running one of the following configs:
 #  - 4 tests per GPU on k80
 #  - 8 tests per GPU on p100
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 28d5565b98..34847e637a 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -122,7 +122,7 @@ fi
 PIP_NAME=$(ls ${PY_TEST_DIR}/tensorflow_gpu-*.whl)
 reinstall_tensorflow_pip ${PIP_NAME}
 
-TF_GPU_COUNT=${TF_GPU_COUNT:-8}
+TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
-- 
GitLab


From de0e25fd221a341b1be48673e83c7437b34210f9 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 21 Sep 2018 01:37:52 -0700
Subject: [PATCH 0487/1357] [TF:XLA] Bump open source llvm revision to r342644

PiperOrigin-RevId: 213952786
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index d2e6f8def5..d0531f8193 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -491,11 +491,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/ad72545325c087661feb3512efa54ebe5f888736.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/ad72545325c087661feb3512efa54ebe5f888736.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/db98902adc6431c9cc4ddec50fe174cfc9e626d6.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/db98902adc6431c9cc4ddec50fe174cfc9e626d6.tar.gz",
         ],
-        sha256 = "66ed69443af00fbf9b912edbb6bc0fa796a12766b5e9ad504eb6b20f813dc163",
-        strip_prefix = "llvm-ad72545325c087661feb3512efa54ebe5f888736",
+        sha256 = "8c02d312b3d417cf9bc7e58ff53c2528bf77a5d839ce4a23b95bd04b9e5da023",
+        strip_prefix = "llvm-db98902adc6431c9cc4ddec50fe174cfc9e626d6",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
     )
 
-- 
GitLab


From 287c73d3ff1df0b064d9028ad0fa59242a18a077 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 21 Sep 2018 01:50:01 -0700
Subject: [PATCH 0488/1357] [XLA:CPU] Re-enable half float tests for unary ops

This was blocked by an LLVM bug, which was fixed in r342542.

PiperOrigin-RevId: 213953743
---
 tensorflow/compiler/tests/unary_ops_test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index 04ea004fe7..77f6eee0cf 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -158,9 +158,6 @@ class UnaryOpsTest(xla_test.XLATestCase):
 
   def testFloatOps(self):
     for dtype in self.float_types:
-      # TODO(b/77694432): Half test failed on CPU, last ran on 04-06-2018.
-      if dtype == np.float16 and self.device == "XLA_CPU":
-        continue
       x = np.arange(-0.90, 0.90, 0.25)
       self._assertOpOutputMatchesExpected(
           math_ops.acos, x.astype(dtype), expected=np.arccos(x).astype(dtype))
-- 
GitLab


From d1e9a1ed54cae9b0b10ab89c06d6d7f9b53af3a1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 02:08:33 -0700
Subject: [PATCH 0489/1357] compat: Update forward compatibility horizon to
 2018-09-21

PiperOrigin-RevId: 213955428
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 419c376b45..5e8f5d6e8e 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 20)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 21)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 200b89761a4665e3de6d0efc4e3e10ab287ad81b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 03:54:43 -0700
Subject: [PATCH 0490/1357] Added fetch support for attrs classes.

Given a class

@attr.s()
class SampleAttr(object):
  field_1 = attr.ib()
  field_2 = attr.ib()

we will be able to run

obj = SampleAttr(tensor_1, tensor_2)
session.run(obj) # equivalent with session.run([obj.field_1, obj.field_2])

Please note, this does not need nest flatten support (which is only relevant to the feed_dict argument).

Also, the information in __attrs_attrs__ is provided for extensions (as per the docs: http://www.attrs.org/en/stable/extending.html#extending-metadata) like this and is not an "implementation detail".

PiperOrigin-RevId: 213963978
---
 tensorflow/python/client/session.py      | 46 ++++++++++++-
 tensorflow/python/client/session_test.py | 82 ++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index ae0ad27f15..c963cfd334 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -178,16 +178,30 @@ def register_session_run_conversion_functions(
     feed_function_for_partial_run: A callable for specifying tensor values to
       feed when setting up a partial run, which takes a `tensor_type` type
       object as input, and returns a list of Tensors.
+
+  Raises:
+    ValueError: If `tensor_type` has already been registered.
   """
   for conversion_function in _REGISTERED_EXPANSIONS:
     if issubclass(conversion_function[0], tensor_type):
-      raise ValueError('%s has already been registered so ignore it.',
+      raise ValueError('%s has already been registered so ignore it.' %
                        tensor_type)
-      return
+
   _REGISTERED_EXPANSIONS.insert(0, (tensor_type, fetch_function, feed_function,
                                     feed_function_for_partial_run))
 
 
+def _is_attrs_instance(obj):
+  """Returns True if the given obj is an instance of attrs-decorated class."""
+  return getattr(obj.__class__, '__attrs_attrs__', None) is not None
+
+
+def _get_attrs_values(obj):
+  """Returns the list of values from an attrs instance."""
+  attrs = getattr(obj.__class__, '__attrs_attrs__')
+  return [getattr(obj, a.name) for a in attrs]
+
+
 class _FetchMapper(object):
   """Definition of the interface provided by fetch mappers.
 
@@ -247,6 +261,8 @@ class _FetchMapper(object):
       return _ListFetchMapper(fetch)
     elif isinstance(fetch, collections.Mapping):
       return _DictFetchMapper(fetch)
+    elif _is_attrs_instance(fetch):
+      return _AttrsFetchMapper(fetch)
     else:
       # Look for a handler in the registered expansions.
       for tensor_type, fetch_fn, _, _ in _REGISTERED_EXPANSIONS:
@@ -398,6 +414,32 @@ class _DictFetchMapper(_FetchMapper):
     return results
 
 
+class _AttrsFetchMapper(_FetchMapper):
+  """Fetch mapper for attrs decorated classes."""
+
+  def __init__(self, fetches):
+    """Creates a _AttrsFetchMapper.
+
+    Args:
+      fetches: An instance of an attrs decorated class.
+    """
+    values = _get_attrs_values(fetches)
+    self._fetch_type = type(fetches)
+    self._mappers = [
+        _FetchMapper.for_fetch(fetch) for fetch in values
+    ]
+    self._unique_fetches, self._value_indices = _uniquify_fetches(self._mappers)
+
+  def unique_fetches(self):
+    return self._unique_fetches
+
+  def build_results(self, values):
+    results = []
+    for m, vi in zip(self._mappers, self._value_indices):
+      results.append(m.build_results([values[j] for j in vi]))
+    return self._fetch_type(*results)
+
+
 class _FetchHandler(object):
   """Handler for structured fetches.
 
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 4afc6399d5..f576435136 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -61,6 +61,12 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
 
+try:
+  import attr  # pylint:disable=g-import-not-at-top
+except ImportError:
+  attr = None
+
+
 # NOTE(mrry): Dummy shape registration for ops used in the tests, since they
 # don't have C++ op registrations on which to attach C++ shape fns.
 ops.RegisterShape('ConstructionFails')(common_shapes.unknown_shape)
@@ -300,6 +306,82 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertEqual(None, res[2])
       self.assertEqual(44.0, res[1])
 
+  def testFetchAttrs(self):
+    if attr is None:
+      self.skipTest('attr module is unavailable.')
+
+    @attr.s
+    class SampleAttr(object):
+      field1 = attr.ib()
+      field2 = attr.ib()
+
+    val1 = np.array([1.2, 3.4, 5.6])
+    val2 = np.array([[1, 2], [4, 3]])
+    val3 = np.array([10, 20, 30])
+
+    t1 = constant_op.constant(val1)
+    t2 = constant_op.constant(val2)
+
+    sample = SampleAttr(t1, t2)
+    with session.Session() as sess:
+      result = sess.run(sample)
+      self.assertIsInstance(result, SampleAttr)
+      self.assertAllEqual(val1, result.field1)
+      self.assertAllEqual(val2, result.field2)
+
+      result = sess.run(sample, feed_dict={sample.field1: val3})
+      self.assertIsInstance(result, SampleAttr)
+      self.assertAllEqual(val3, result.field1)
+      self.assertAllEqual(val2, result.field2)
+
+  def testFetchNestedAttrs(self):
+    if attr is None:
+      self.skipTest('attr module is unavailable.')
+
+    @attr.s
+    class SampleAttr(object):
+      field0 = attr.ib()
+      field1 = attr.ib()
+
+    v1 = 10
+    v2 = 20
+    v3 = np.float32(1.2)
+    v4 = np.float32(3.4)
+    v5 = np.float64(100.001)
+    v6 = np.float64(-23.451)
+    arr1 = np.array([1.2, 6.7, 3.4])
+    arr2 = np.array([7, 11, 3])
+    sample = SampleAttr(
+        SampleAttr(
+            SampleAttr(constant_op.constant(v1), constant_op.constant(v2)),
+            SampleAttr(constant_op.constant(arr1), constant_op.constant(arr2))),
+        {'A': SampleAttr(constant_op.constant(v3), constant_op.constant(v4)),
+         'B': [SampleAttr(constant_op.constant(v5), constant_op.constant(v6))]})
+
+    with session.Session() as sess:
+      result = sess.run(sample)
+      self.assertIsInstance(result, SampleAttr)
+      self.assertIsInstance(result.field0, SampleAttr)
+      self.assertIsInstance(result.field0.field0, SampleAttr)
+      self.assertIsInstance(result.field0.field1, SampleAttr)
+      self.assertIsInstance(result.field0.field1.field0, np.ndarray)
+      self.assertAllEqual(arr1, result.field0.field1.field0)
+      self.assertIsInstance(result.field0.field1.field1, np.ndarray)
+      self.assertAllEqual(arr2, result.field0.field1.field1)
+      self.assertIsInstance(result.field1, dict)
+      self.assertIn('A', result.field1)
+      self.assertIn('B', result.field1)
+      self.assertIsInstance(result.field1['A'], SampleAttr)
+      self.assertAllEqual(
+          [v3, v4],
+          [result.field1['A'].field0, result.field1['A'].field1])
+      self.assertIsInstance(result.field1['B'], list)
+      self.assertEqual(1, len(result.field1['B']))
+      self.assertIsInstance(result.field1['B'][0], SampleAttr)
+      self.assertAllEqual(
+          [v5, v6],
+          [result.field1['B'][0].field0, result.field1['B'][0].field1])
+
   def testFetchNestingEmptyOneLevel(self):
     with session.Session() as sess:
       a_val = 11.0
-- 
GitLab


From 035a84769de2921667677b5530011bbd558ddf0c Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Fri, 21 Sep 2018 07:19:09 -0700
Subject: [PATCH 0491/1357] Use weakrefs where absolutely safe to do so, in
 order to reduce the number of circular references. Replace unnecessary
 OrderedDict with a regular dict.

PiperOrigin-RevId: 213982097
---
 tensorflow/python/autograph/pyct/cfg.py             | 13 ++++++++++---
 .../autograph/pyct/static_analysis/activity.py      |  6 +++++-
 .../autograph/pyct/static_analysis/live_values.py   |  3 ++-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index 1433f9ac83..fca0eb62e4 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@@ -27,6 +27,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import weakref
 from enum import Enum
 
 # pylint:disable=g-bad-import-order
@@ -61,7 +62,10 @@ class Node(object):
 
   def freeze(self):
     self.next = frozenset(self.next)
-    self.prev = frozenset(self.prev)
+    # Assumption: All CFG nodes have identical life spans, because the graph
+    # owns them. Nodes should never be used outside the context of an existing
+    # graph.
+    self.prev = weakref.WeakSet(self.prev)
 
   def __repr__(self):
     if isinstance(self.ast_node, gast.FunctionDef):
@@ -256,7 +260,7 @@ class GraphBuilder(object):
     """Resets the state of this factory."""
     self.head = None
     self.errors = set()
-    self.node_index = collections.OrderedDict()
+    self.node_index = {}
 
     # TODO(mdan): Too many primitives. Use classes.
     self.leaves = set()
@@ -309,7 +313,10 @@ class GraphBuilder(object):
     """Grows the graph by adding a CFG node following the current leaves."""
     if ast_node is self.node_index:
       raise ValueError('%s added twice' % ast_node)
-    node = Node(next_=set(), prev=set(), ast_node=ast_node)
+    # Assumption: All CFG nodes have identical life spans, because the graph
+    # owns them. Nodes should never be used outside the context of an existing
+    # graph.
+    node = Node(next_=set(), prev=weakref.WeakSet(), ast_node=ast_node)
     self.node_index[ast_node] = node
     self.owners[node] = frozenset(self.active_stmts)
 
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index 9cb5991322..086eda7574 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function
 
 import copy
+import weakref
 
 import gast
 
@@ -126,7 +127,10 @@ class Scope(object):
       self.parent.mark_read(name)
 
   def mark_param(self, name, owner):
-    self.params[name] = owner
+    # Assumption: all AST nodes have the same life span. This lets us use
+    # a weak reference to mark the connection between a symbol node and the
+    # function node whose argument that symbol is.
+    self.params[name] = weakref.ref(owner)
 
   def mark_creation(self, name, writes_create_symbol=False):
     """Mark a qualified name as created."""
diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index 3963772dad..36b9e7074d 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -89,7 +89,8 @@ class LiveValueResolver(transformer.Base):
 
       if has_single_def:
         def_, = defs
-        if def_.param_of is self.enclosing_entities[0]:
+        # Note: param_of is a weakref.
+        if def_.param_of and def_.param_of() is self.enclosing_entities[0]:
           if node.id in self.entity_info.arg_values:
             obj = self.entity_info.arg_values[node.id]
             anno.setanno(node, 'live_val', obj)
-- 
GitLab


From df31f7a0d6570055eebf0a19449aecbecdb748fa Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 21 Sep 2018 07:45:30 -0700
Subject: [PATCH 0492/1357] [TPU] Change the TPU DeviceAssignment class to use
 a flatter (replica, logical core) indexing scheme for cores.

Previously the DeviceAssignment class mixed both a general concept (a mapping from (replica, logical core) to physical TPU core) and a specific instantiation of that concept, by imposing a particular 3D grid structure on the logical core numbers. This was excessive ? while the physical core numbers have a particular structure, there is no need to impose any particular structure on the logical core numbers.

This change simplifies the DeviceAssignment scheme, changing it so logical cores within a replica are numbered sequentially without any particular semantics.

PiperOrigin-RevId: 213984629
---
 tensorflow/contrib/tpu/BUILD                  |   2 +-
 .../tpu/python/tpu/device_assignment.py       | 158 ++++++++----------
 tensorflow/contrib/tpu/python/tpu/topology.py |  15 ++
 tensorflow/contrib/tpu/python/tpu/tpu.py      |   9 +-
 .../contrib/tpu/python/tpu/tpu_context.py     |   4 +-
 tensorflow/contrib/tpu/python/tpu/tpu_feed.py |  22 +--
 6 files changed, 97 insertions(+), 113 deletions(-)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 87d00aca05..4e0b61227e 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -351,7 +351,7 @@ tf_py_test(
 
 tf_py_test(
     name = "topology_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/tpu/topology_test.py"],
     additional_deps = [
         ":tpu",
diff --git a/tensorflow/contrib/tpu/python/tpu/device_assignment.py b/tensorflow/contrib/tpu/python/tpu/device_assignment.py
index 471b1fa46c..b9e2a4287a 100644
--- a/tensorflow/contrib/tpu/python/tpu/device_assignment.py
+++ b/tensorflow/contrib/tpu/python/tpu/device_assignment.py
@@ -72,13 +72,12 @@ class DeviceAssignment(object):
         self._invert_topology(topology))
 
     topology_rank = self._topology_tasks.ndim
-    if core_assignment.ndim != topology_rank + 2:
-      raise ValueError("core_assignment must be a rank {} numpy array".format(
-          topology_rank + 2))
+    if core_assignment.ndim != 3:
+      raise ValueError("core_assignment must be a rank 3 numpy array, "
+                       "got shape {}".format(core_assignment.shape))
 
     self._num_replicas = core_assignment.shape[0]
-    self._computation_shape = np.array(
-        core_assignment.shape[1:-1], dtype=np.int32)
+    self._num_cores_per_replica = core_assignment.shape[1]
 
     if core_assignment.shape[-1] != topology_rank:
       raise ValueError(
@@ -107,18 +106,15 @@ class DeviceAssignment(object):
     """Computes a nested dict which maps task and logical core to replicas."""
     task_and_cores_to_replicas = {}
     for replica in xrange(core_assignment.shape[0]):
-      for dx in xrange(core_assignment.shape[1]):
-        for dy in xrange(core_assignment.shape[2]):
-          for dz in xrange(core_assignment.shape[3]):
-            x, y, z = core_assignment[replica, dx, dy, dz, :]
-            task_id = topology_tasks[x, y, z]
-            if task_id not in task_and_cores_to_replicas:
-              task_and_cores_to_replicas[task_id] = {}
-            logical_core = (dx, dy, dz)
-            if logical_core not in task_and_cores_to_replicas[task_id]:
-              task_and_cores_to_replicas[task_id][logical_core] = set()
-
-            task_and_cores_to_replicas[task_id][logical_core].add(replica)
+      for logical_core in xrange(core_assignment.shape[1]):
+        x, y, z = core_assignment[replica, logical_core, :]
+        task_id = topology_tasks[x, y, z]
+        if task_id not in task_and_cores_to_replicas:
+          task_and_cores_to_replicas[task_id] = {}
+        if logical_core not in task_and_cores_to_replicas[task_id]:
+          task_and_cores_to_replicas[task_id][logical_core] = set()
+
+        task_and_cores_to_replicas[task_id][logical_core].add(replica)
 
     task_to_sorted_replica_id = {}
 
@@ -135,24 +131,10 @@ class DeviceAssignment(object):
     """A `Topology` that describes the TPU topology."""
     return self._topology
 
-  @property
-  def computation_shape(self):
-    """The computation shape.
-
-    Returns:
-      A rank-1 int32 numpy array with size equal to the TPU topology rank.
-      Describes the logical shape in numbers of core of each replica of the
-      computation in the TPU topology.
-
-    Returns:
-      The computation shape.
-    """
-    return self._computation_shape
-
   @property
   def num_cores_per_replica(self):
     """The number of cores per replica."""
-    return np.prod(self.computation_shape)
+    return self._num_cores_per_replica
 
   @property
   def num_replicas(self):
@@ -164,33 +146,22 @@ class DeviceAssignment(object):
     """The logical to physical core mapping.
 
     Returns:
-      A numpy array of rank `topology_rank + 2`, with shape
-      `[num_replicas] + computation_shape + [topology_rank]`. Maps
-      (replica, logical core coordinates) pairs to physical topology
-      coordinates.
+      An integer numpy array of rank 3, with shape
+      `[num_replicas, num_cores_per_replica, topology_rank]`. Maps
+      (replica, logical core) pairs to physical topology coordinates.
     """
     return self._core_assignment
 
   def _coordinates(self, replica, logical_core):
     """Returns the physical topology coordinates of a logical core."""
-    if logical_core is None:
-      logical_core = np.array([0, 0, 0], np.int32)
-    else:
-      logical_core = np.asarray(logical_core)
-
-    if any(logical_core < 0) or any(logical_core >= self.computation_shape):
-      raise ValueError("Invalid core {}; computation shape is {}".format(
-          logical_core, self.computation_shape))
-
-    logical_offset = tuple([replica] + logical_core.tolist() + [slice(3)])
-    return tuple(self.core_assignment[logical_offset])
+    return tuple(self.core_assignment[replica, logical_core, :])
 
   def lookup_replicas(self, task_id, logical_core):
     """Lookup replica ids by task number and logical core.
 
     Args:
       task_id: TensorFlow task number.
-      logical_core: A tuple of three integers which represents a logical core.
+      logical_core: An integer, identifying a logical core.
     Returns:
       A sorted list of the replicas that are attached to that task and
       logical_core.
@@ -205,17 +176,17 @@ class DeviceAssignment(object):
           "Can not find any replica in task: {} contains logical_core: {} ".
           format(task_id, logical_core))
 
-  def tpu_ordinal(self, replica=0, logical_core=None):
+  def tpu_ordinal(self, replica=0, logical_core=0):
     """Returns the ordinal of the TPU device assigned to a logical core."""
     coordinates = self._coordinates(replica, logical_core)
     return self._topology_devices[coordinates]
 
-  def host_device(self, replica=0, logical_core=None, job=None):
+  def host_device(self, replica=0, logical_core=0, job=None):
     """Returns the CPU device attached to a logical core."""
     coordinates = self._coordinates(replica, logical_core)
     return _tpu_host_device_name(job, self._topology_tasks[coordinates])
 
-  def tpu_device(self, replica=0, logical_core=None, job=None):
+  def tpu_device(self, replica=0, logical_core=0, job=None):
     """Returns the name of the TPU device assigned to a logical core."""
     coordinates = self._coordinates(replica, logical_core)
     return _tpu_device_name(job, self._topology_tasks[coordinates],
@@ -228,6 +199,8 @@ def device_assignment(topology,
                       num_replicas=1):
   """Computes a device_assignment of a computation across a TPU topology.
 
+  Attempts to choose a compact grid of cores for locality.
+
   Returns a `DeviceAssignment` that describes the cores in the topology assigned
   to each core of each replica.
 
@@ -240,12 +213,12 @@ def device_assignment(topology,
       `initialize_system` using `Session.run`. Either a serialized
       `TopologyProto` or a `Topology` object may be passed. Note: you must
       evaluate the `Tensor` first; you cannot pass an unevaluated `Tensor` here.
-    computation_shape: A rank 1 int32 numpy array of size 3, describing the
-      shape of the computation's block of cores. If None, the
-      `computation_shape` is `[1, 1, 1]`.
-    computation_stride: A rank 1 int32 numpy array of size 3, describing the
-      inter-core spacing of the `computation_shape` cores in the TPU topology.
-      If None, the `computation_stride` is `[1, 1, 1]`.
+    computation_shape: A rank 1 int32 numpy array with size equal to the
+      topology rank, describing the shape of the computation's block of cores.
+      If None, the `computation_shape` is `[1] * topology_rank`.
+    computation_stride: A rank 1 int32 numpy array of size `topology_rank`,
+      describing the inter-core spacing of the `computation_shape` cores in the
+      TPU topology. If None, the `computation_stride` is `[1] * topology_rank`.
     num_replicas: The number of computation replicas to run. The replicas will
       be packed into the free spaces of the topology.
 
@@ -271,21 +244,21 @@ def device_assignment(topology,
   topology_rank = len(topology.mesh_shape)
   mesh_shape = topology.mesh_shape
   if computation_shape is None:
-    computation_shape = np.array([1, 1, 1], dtype=np.int32)
+    computation_shape = np.array([1] * topology_rank, dtype=np.int32)
   else:
     computation_shape = np.asarray(computation_shape, dtype=np.int32)
 
   if computation_stride is None:
-    computation_stride = np.array([1, 1, 1], dtype=np.int32)
+    computation_stride = np.array([1] * topology_rank, dtype=np.int32)
   else:
     computation_stride = np.asarray(computation_stride, dtype=np.int32)
 
-  if computation_shape.shape != (3,):
-    raise ValueError("computation_shape must have shape [3]; got {}".format(
-        computation_shape.shape))
-  if computation_stride.shape != (3,):
-    raise ValueError("computation_stride must have shape [3]; got {}".format(
-        computation_stride.shape))
+  if computation_shape.shape != (topology_rank,):
+    raise ValueError("computation_shape must have shape [{}]; got {}".format(
+        topology_rank, computation_shape.shape))
+  if computation_stride.shape != (topology_rank,):
+    raise ValueError("computation_stride must have shape [{}]; got {}".format(
+        topology_rank, computation_stride.shape))
 
   if any(computation_shape < 1):
     raise ValueError(
@@ -315,28 +288,41 @@ def device_assignment(topology,
             num_replicas, max_replicas, computation_shape, computation_stride,
             mesh_shape))
 
-  # Choose a compact layout for the cores. Choose the smaller dimension in the
-  # topology to be close to the square root of the number of replicas.
-  num_chips = int(math.ceil(num_replicas / replica_counts[2]))
-  target_size = int(math.ceil(math.sqrt(num_chips)))
-
-  # Prefer an even size, if possible. Odd numbered rows head back towards the
-  # first column, so it's best if the last row has an odd index.
-  if target_size % 2 != 0:
-    target_size -= 1
-  y_size = min(replica_counts[1], target_size)
-  if y_size * replica_counts[0] < num_chips:
-    y_size = replica_counts[1]
+  def ceil_of_ratio(n, m):
+    return (n + m - 1) // m
+
+  replica_shape = [0] * topology_rank
+  if num_replicas > 0:
+    remaining_replicas = num_replicas
+    remaining_dims = topology_rank
+
+    # Choose dimensions as close to an equal cube as possible, in order of
+    # increasing dimension size. By visiting dimensions in increasing size, we
+    # assign the most constrained dimension first, so we won't make infeasible
+    # choices.
+    #
+    # As a secondary sort order, visit the dimensions in reverse order. This
+    # means we try to use both cores on the same chip in preference to two cores
+    # on different chips.
+    for x, ni in sorted(((x, -i) for (i, x) in enumerate(replica_counts))):
+      i = -ni
+      target_size = int(math.ceil(remaining_replicas**(1.0 / remaining_dims)))
+      replica_shape[i] = min(target_size, x)
+      remaining_replicas = ceil_of_ratio(remaining_replicas, replica_shape[i])
+      remaining_dims -= 1
+
+    assert remaining_replicas == 1 and remaining_dims == 0
 
   # Assigns an offset to each replica such that no two replicas overlap.
-  replica_offsets = np.full([num_replicas, 3], -1, dtype=np.int32)
+  replica_offsets = np.full([num_replicas, topology_rank], -1, dtype=np.int32)
   for replica in xrange(num_replicas):
-    # Chooses a replica number in X/Y/Z axes.
-    z = replica % replica_counts[2]
-    t = replica // replica_counts[2]
-    y = t % y_size
-    x = t // y_size
-    replica_pos = np.array([x, y, z], dtype=np.int32)
+    # Chooses a replica number in each axis.
+    t = replica
+    pos = []
+    for dim in replica_shape[::-1]:
+      pos.append(t % dim)
+      t //= dim
+    replica_pos = np.array(pos[::-1], dtype=np.int32)
 
     # Determines where that replica starts in each axis.
     outer = replica_pos // computation_stride
@@ -351,6 +337,6 @@ def device_assignment(topology,
   indices = np.concatenate(
       [i[..., np.newaxis] for i in np.meshgrid(*indices, indexing="ij")],
       axis=-1)
-  assignment = (
-      indices + replica_offsets[:, np.newaxis, np.newaxis, np.newaxis, :])
+  indices = indices.reshape((-1, topology_rank))
+  assignment = indices + replica_offsets[:, np.newaxis, :]
   return DeviceAssignment(topology, core_assignment=assignment)
diff --git a/tensorflow/contrib/tpu/python/tpu/topology.py b/tensorflow/contrib/tpu/python/tpu/topology.py
index 1fb26e701a..ab89c6aa8c 100644
--- a/tensorflow/contrib/tpu/python/tpu/topology.py
+++ b/tensorflow/contrib/tpu/python/tpu/topology.py
@@ -111,6 +111,11 @@ class Topology(object):
     """A rank 1 int32 array describing the shape of the TPU topology."""
     return self._mesh_shape
 
+  @property
+  def mesh_rank(self):
+    """Returns the number of dimensions in the mesh."""
+    return len(self._mesh_shape)
+
   @property
   def device_coordinates(self):
     """Describes the mapping from TPU devices to topology coordinates.
@@ -125,6 +130,16 @@ class Topology(object):
     """
     return self._device_coordinates
 
+  @property
+  def num_tasks(self):
+    """Returns the number of TensorFlow tasks in the TPU slice."""
+    return self._device_coordinates.shape[0]
+
+  @property
+  def num_tpus_per_task(self):
+    """Returns the number of TPU devices per task in the TPU slice."""
+    return self._device_coordinates.shape[1]
+
   def serialized(self):
     """Returns the serialized form of the topology."""
     if self._serialized is None:
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 7815d81a5b..712b02ff0d 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -562,13 +562,14 @@ def split_compile_and_replicate(computation,
             device_assignment.core_assignment.flatten().tolist()
     }
     # TODO(phawkins): remove this case after the forward compatibility window
-    # expires on 2018-10-6.
-    if api_compat.forward_compatible(2018, 10, 6):
+    # expires on 2018-10-5.
+    if api_compat.forward_compatible(2018, 10, 5):
       metadata_kwargs["num_cores_per_replica"] = (
           device_assignment.num_cores_per_replica)
     else:
-      metadata_kwargs["computation_shape"] = (
-          device_assignment.computation_shape.tolist())
+      metadata_kwargs["computation_shape"] = [
+          device_assignment.num_cores_per_replica
+      ]
 
   if ((not isinstance(inputs, list)) or
       any(not isinstance(inp, (list, tuple)) for inp in inputs)):
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index 3b45bbe75a..b1a8a16d1e 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -540,8 +540,8 @@ class _InternalTPUContext(object):
       """
       if self.model_parallelism_enabled:
         # We put both enqueue/dequeue ops at tpu.core(0) in each replica.
-        replica = self.device_assignment.lookup_replicas(
-            host_id, (0, 0, 0))[shard_index_in_host]
+        replica = self.device_assignment.lookup_replicas(host_id,
+                                                         0)[shard_index_in_host]
         return self.device_assignment.tpu_ordinal(replica=replica)
       else:
         return shard_index_in_host % self.num_of_cores_per_host
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
index d9c77a3ea1..e75a09492e 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
@@ -765,9 +765,8 @@ class _PartitionedInfeedQueue(InfeedQueue):
           zip(per_host_sharded_inputs[replica_index], inputs_part_dims_flat)
       ]
 
-      for core_index in xrange(self._device_assignment.num_cores_per_replica):
+      for logical_core in xrange(self._device_assignment.num_cores_per_replica):
         # Places different partitions to different logic cores.
-        logical_core = self._get_logical_core(core_index)
         replica_id = self._device_assignment.lookup_replicas(
             self._host_id, logical_core)[replica_index]
         ordinal = self._device_assignment.tpu_ordinal(
@@ -784,7 +783,7 @@ class _PartitionedInfeedQueue(InfeedQueue):
                   inputs=infeed_inputs,
                   shapes=[x.shape for x in infeed_inputs],
                   name="enqueue/replica_{0}/input_{1}".format(
-                      replica_index, core_index),
+                      replica_index, logical_core),
                   device_ordinal=ordinal))
     return per_host_enqueue_ops
 
@@ -890,20 +889,3 @@ class _PartitionedInfeedQueue(InfeedQueue):
     return nest.map_structure_up_to(
         dequeues, self._tag_sharding_attribute_for_dequeued_tensor, dequeues,
         dims)
-
-  def _get_logical_core(self, core_index):
-    """Maps the core index to the 3D coordinate within replica.
-
-      The lowest dimension number in computation_shape is the slowest varying
-      dimension (most major).
-
-    Args:
-      core_index: An integer represents the core index within replcia.
-
-    Returns:
-      A tuple with three integers which represents the 3D coordinate.
-    """
-    computation_shape = self._device_assignment.computation_shape
-    return (core_index // (computation_shape[1] * computation_shape[2]),
-            core_index % (computation_shape[1] * computation_shape[2]) //
-            computation_shape[2], core_index % computation_shape[2])
-- 
GitLab


From bf574689f1aa631bbbb19801050152690772108d Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Fri, 21 Sep 2018 08:33:14 -0700
Subject: [PATCH 0493/1357] [data-stats] Collects prefetch `buffer_size` and
 `buffer_capacity` as scalar, if stats_aggregator is associated with dataset.

PiperOrigin-RevId: 213989745
---
 .../kernel_tests/stats_dataset_ops_test.py    | 24 +++++++++++++++++++
 .../kernel_tests/stats_dataset_test_base.py   |  8 +++++++
 .../core/kernels/data/prefetch_dataset_op.cc  | 16 ++++++++++++-
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
index 14c5cffdf4..be8ae5e955 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
@@ -93,6 +93,8 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         summary_str = sess.run(summary_t)
         self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                     float(i + 1))
+        self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity")
+        self._assertSummaryContains(summary_str, "Prefetch::buffer_size")
         self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization",
                                     0, 1)
       with self.assertRaises(errors.OutOfRangeError):
@@ -101,6 +103,28 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
       self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                   100)
 
+  def testPrefetchBufferScalars(self):
+    stats_aggregator = stats_ops.StatsAggregator()
+    dataset = dataset_ops.Dataset.range(10).map(
+        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(
+            0).apply(stats_ops.set_stats_aggregator(stats_aggregator))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer)
+      for i in range(10):
+        self.assertAllEqual(
+            np.array([i] * i, dtype=np.int64), sess.run(next_element))
+        summary_str = sess.run(summary_t)
+        self._assertSummaryHasScalarValue(summary_str,
+                                          "Prefetch::buffer_capacity", 0)
+        self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_size",
+                                          0)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
   def testFilteredElementsStats(self):
     stats_aggregator = stats_ops.StatsAggregator()
     dataset = dataset_ops.Dataset.range(101).filter(
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
index 6951564091..b1b4c23510 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
@@ -25,6 +25,14 @@ from tensorflow.python.platform import test
 class StatsDatasetTestBase(test.TestCase):
   """Base class for testing statistics gathered in `StatsAggregator`."""
 
+  def _assertSummaryContains(self, summary_str, tag):
+    summary_proto = summary_pb2.Summary()
+    summary_proto.ParseFromString(summary_str)
+    for value in summary_proto.value:
+      if tag == value.tag:
+        return
+    self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto))
+
   def _assertSummaryHasCount(self, summary_str, tag, expected_value):
     summary_proto = summary_pb2.Summary()
     summary_proto.ParseFromString(summary_str)
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 2a1e9c85f1..754ed772db 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -103,9 +103,9 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status GetNextInternal(IteratorContext* ctx,
                            std::vector<Tensor>* out_tensors,
                            bool* end_of_sequence) override {
+      auto stats_aggregator = ctx->stats_aggregator();
       {
         mutex_lock l(mu_);
-        auto stats_aggregator = ctx->stats_aggregator();
         TF_RETURN_IF_ERROR(EnsurePrefetchThreadStarted(ctx));
         // Wait until the next element in the buffer has been
         // produced, or we are shutting down.
@@ -136,6 +136,14 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
 
       mutex_lock parent_l(parent_mu_);
       mutex_lock l(mu_);
+      if (stats_aggregator) {
+        stats_aggregator->AddScalar(
+            strings::StrCat(prefix_end_, "::buffer_size"),
+            static_cast<float>(buffer_.size()));
+        stats_aggregator->AddScalar(
+            strings::StrCat(prefix_end_, "::buffer_capacity"),
+            static_cast<float>(auto_tuner_.buffer_limit()));
+      }
       return input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
     }
 
@@ -219,6 +227,12 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
             strings::StrCat(prefix_end_, "::buffer_utilization"),
             {static_cast<float>(buffer_.size()) /
              static_cast<float>(auto_tuner_.buffer_limit())});
+        stats_aggregator->AddScalar(
+            strings::StrCat(prefix_end_, "::buffer_size"),
+            static_cast<float>(buffer_.size()));
+        stats_aggregator->AddScalar(
+            strings::StrCat(prefix_end_, "::buffer_capacity"),
+            static_cast<float>(auto_tuner_.buffer_limit()));
       }
       // A new element is available. Forward the status from computing it, and
       // (if we successfully got an element) the output values.
-- 
GitLab


From b79b97cd23a7f8c308c9e8e6fcd425d6a8e9c243 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 08:42:59 -0700
Subject: [PATCH 0494/1357] Fix typo.

PiperOrigin-RevId: 213990950
---
 tensorflow/core/framework/tensor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index 5f5d2021a4..e412329498 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -154,7 +154,7 @@ class Tensor {
   /// Returns the estimated memory usage of this tensor.
   size_t TotalBytes() const;
 
-  // Returns the size of sallocated memory for this tensor.
+  // Returns the size of allocated memory for this tensor.
   size_t AllocatedBytes() const;
 
   /// Returns true iff this tensor is aligned.
-- 
GitLab


From 5877baddc72e3f234f6e0a174447becd4cabc493 Mon Sep 17 00:00:00 2001
From: Thomas Joerg <tjoerg@google.com>
Date: Fri, 21 Sep 2018 09:10:17 -0700
Subject: [PATCH 0495/1357] [XLA] Dump the original, unclustered graph with
 --tf_xla_clustering_debug.

So far, just the clustered graph is dumped.

PiperOrigin-RevId: 213994376
---
 tensorflow/compiler/jit/mark_for_compilation_pass.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index 1eaedbfbfb..133d982360 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -982,6 +982,11 @@ Status MarkForCompilationPass::RunImpl(
   // Names for each cluster.
   std::unordered_map<int, string> cluster_names;
 
+  if (flags->tf_xla_clustering_debug) {
+    dump_graph::DumpGraphToFile("before_mark_for_compilation", **options.graph,
+                                options.flib_def);
+  }
+
   // Mark clusters for compilation that:
   // * are placed on a device that requires compilation (an XlaDevice),
   // * are explicitly marked for compilation (_XlaCompile=true), or
-- 
GitLab


From 16a257eb598b7dfd220249babf8d18c984aab103 Mon Sep 17 00:00:00 2001
From: Guozhong Zhuang <guozhong.zhuang@intel.com>
Date: Fri, 21 Sep 2018 09:43:22 -0700
Subject: [PATCH 0496/1357] change back MICRA def - coding styling

---
 tensorflow/core/util/mkl_util.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 387e5ee5a6..f371fd6f95 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
-#define TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#ifndef TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
 #ifdef INTEL_MKL
 
 #include <string>
@@ -2222,4 +2222,4 @@ inline bool IsConv1x1StrideNot1(memory::dims filter_dims,
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
-#endif  // TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#endif  // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
-- 
GitLab


From 233de7fe7efcf7c8fbcd4d3653a1f6d32feff5c8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 09:42:47 -0700
Subject: [PATCH 0497/1357] Update scipy to 1.1.0.

Code uses scipy.signal.stft, which was added in 0.19.

PiperOrigin-RevId: 213998546
---
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index a9ae715c6a..4ced96f90b 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -68,8 +68,8 @@ else
   pip3 install --upgrade numpy==1.14.5
 fi
 
-pip2 install scipy==0.18.1
-pip3 install scipy==0.18.1
+pip2 install scipy==1.1.0
+pip3 install scipy==1.1.0
 
 pip2 install scikit-learn==0.18.1
 pip3 install scikit-learn==0.18.1
-- 
GitLab


From 29120b605eebe4518c31e774be389f70e5b59520 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 21 Sep 2018 16:50:02 +0000
Subject: [PATCH 0498/1357] Fix pylint issue

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/layers/python/layers/optimizers.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index d92de3b58c..2fdcd849b0 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -21,8 +21,6 @@ from __future__ import print_function
 import six
 
 from tensorflow.contrib import framework as contrib_framework
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
-- 
GitLab


From 59a47b7d330a40971bad89f0e8aa282e79e889f1 Mon Sep 17 00:00:00 2001
From: Guozhong Zhuang <guozhong.zhuang@intel.com>
Date: Fri, 21 Sep 2018 09:56:29 -0700
Subject: [PATCH 0499/1357] refine a comment per Tatiana's suggestions

---
 tensorflow/core/util/mkl_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index f371fd6f95..2f2705de92 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -2098,7 +2098,7 @@ static inline memory::format get_desired_format(int channel,
              (channel % 8) == 0) {
     fmt_desired = is_2d
                       ? memory::format::nChw8c
-                      : memory::format::ncdhw;  // not support avx2 for 3d yet.
+                      : memory::format::ncdhw;  // no avx2 support for 3d yet.
   } else {
     fmt_desired = is_2d ? memory::format::nchw : memory::format::ncdhw;
   }
-- 
GitLab


From 4e252b2f997904769711b242bb37027706b08b7f Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Fri, 21 Sep 2018 09:53:02 -0700
Subject: [PATCH 0500/1357] Set device on resource touching ops before checking
 where to execute.

Thanks @alextp for finding the bug!

PiperOrigin-RevId: 213999971
---
 .../core/common_runtime/eager/execute.cc      | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 1da1326a9a..1bc63616d0 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -251,26 +251,6 @@ Status EagerLocalExecute(EagerOperation* op,
   EagerContext* ctx = op->EagerContext();
   auto status = ctx->GetStatus();
   if (!status.ok()) return status;
-  // Ensure all resource-touching ops run in the device the resource is,
-  // regardless of anything else that has been specified. This is identical to
-  // the graph mode behavior.
-  for (int i = 0; i < op->Inputs().size(); ++i) {
-    Device* input_op_device = nullptr;
-    status = op->Inputs()[i]->OpDevice(&input_op_device);
-    if (!status.ok()) return status;
-    VLOG(2) << "for op " << op->Name() << " input " << i << " "
-            << DataTypeString(op->Inputs()[i]->dtype) << " "
-            << (input_op_device == nullptr ? "cpu" : input_op_device->name())
-            << " " << (op->Device() == nullptr ? "cpu" : op->Device()->name());
-    if (op->Inputs()[i]->dtype == DT_RESOURCE &&
-        (input_op_device != op->Device() || input_op_device == nullptr)) {
-      Device* d = input_op_device == nullptr ? ctx->HostCPU() : input_op_device;
-      VLOG(1) << "Changing device of operation " << op->Name() << " to "
-              << d->name() << " because input #" << i
-              << " is a resource in this device.";
-      op->SetDevice(d);
-    }
-  }
   Device* device = op->Device();
 
   Fprint128 cache_key = op->MutableAttrs()->CacheKey(
@@ -604,6 +584,27 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
 Status EagerExecute(EagerOperation* op,
                     gtl::InlinedVector<TensorHandle*, 2>* retvals,
                     int* num_retvals) {
+  // Ensure all resource-touching ops run in the device the resource is,
+  // regardless of anything else that has been specified. This is identical to
+  // the graph mode behavior.
+  EagerContext* ctx = op->EagerContext();
+  for (int i = 0; i < op->Inputs().size(); ++i) {
+    Device* input_op_device = nullptr;
+    auto status = op->Inputs()[i]->OpDevice(&input_op_device);
+    if (!status.ok()) return status;
+    VLOG(2) << "for op " << op->Name() << " input " << i << " "
+            << DataTypeString(op->Inputs()[i]->dtype) << " "
+            << (input_op_device == nullptr ? "cpu" : input_op_device->name())
+            << " " << (op->Device() == nullptr ? "cpu" : op->Device()->name());
+    if (op->Inputs()[i]->dtype == DT_RESOURCE &&
+        (input_op_device != op->Device() || input_op_device == nullptr)) {
+      Device* d = input_op_device == nullptr ? ctx->HostCPU() : input_op_device;
+      VLOG(1) << "Changing device of operation " << op->Name() << " to "
+              << d->name() << " because input #" << i
+              << " is a resource in this device.";
+      op->SetDevice(d);
+    }
+  }
   bool op_is_local = IsLocal(op->EagerContext(), op->Device());
 
   if (op_is_local) {
-- 
GitLab


From 010e8ed731d0e10c82fccbf6c119180ca1a36efd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 10:21:48 -0700
Subject: [PATCH 0501/1357] Kernel signature reworking, misc fixes.

PiperOrigin-RevId: 214004752
---
 .../contrib/lite/kernels/internal/optimized/optimized_ops.h  | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 1a2d45166a..0999738396 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -2256,10 +2256,7 @@ inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
   const int output_rows = FlatSizeSkipDim(output_shape, 3);
   TFLITE_DCHECK_EQ(output_cols, filter_rows);
   TFLITE_DCHECK_EQ(output_rows, gemm_input_rows);
-  TFLITE_DCHECK_EQ(bias_shape.Dims(3), output_cols);
-  TFLITE_DCHECK_EQ(bias_shape.Dims(2), 1);
-  TFLITE_DCHECK_EQ(bias_shape.Dims(1), 1);
-  TFLITE_DCHECK_EQ(bias_shape.Dims(0), 1);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_cols);
 
   // MatrixBatchVectorMultiplyAccumulate assumes that each row of the second
   // input matrix has its own scale factor. This code duplicates the scale
-- 
GitLab


From eafd43b7e47508fd0eddfd389ea206be79f5dbe6 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 21 Sep 2018 10:37:23 -0700
Subject: [PATCH 0502/1357] Simple scaffold for parameter-server training with
 eager execution

PiperOrigin-RevId: 214007470
---
 tensorflow/contrib/eager/python/BUILD         |  13 +
 .../contrib/eager/python/parameter_server.py  | 289 ++++++++++++++++++
 .../contrib/eager/python/remote_test.py       |  20 ++
 3 files changed, 322 insertions(+)
 create mode 100644 tensorflow/contrib/eager/python/parameter_server.py

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 84517b57c7..9c3676629d 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -97,6 +97,18 @@ py_library(
     ],
 )
 
+py_library(
+    name = "parameter_server",
+    srcs = ["parameter_server.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:framework",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
 cuda_py_test(
     name = "saver_test",
     srcs = ["saver_test.py"],
@@ -241,6 +253,7 @@ py_test(
     srcs = ["remote_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":parameter_server",
         ":remote",
         "//tensorflow/contrib/eager/python:tfe",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/eager/python/parameter_server.py b/tensorflow/contrib/eager/python/parameter_server.py
new file mode 100644
index 0000000000..3a9e7b027e
--- /dev/null
+++ b/tensorflow/contrib/eager/python/parameter_server.py
@@ -0,0 +1,289 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""EXPERIMENTAL utilities for parameter server training with eager execution.
+
+Note: this should eventually be merged with the distribution strategy for
+ParameterServer.
+"""
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import time
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training.checkpointable import base as checkpointable
+
+
+def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
+  """Creates a variable handle with information to do shape inference."""
+  container = ops.get_default_graph()._container  # pylint: disable=protected-access
+  if container is None:
+    container = ""
+  handle = resource_variable_ops.var_handle_op(shape=shape, dtype=dtype,
+                                               shared_name=shared_name,
+                                               name=name,
+                                               container=container)
+  if graph_mode:
+    return handle
+
+  with context.graph_mode(), ops.Graph().as_default() as graph:
+    h = resource_variable_ops.var_handle_op(shape=shape, dtype=dtype,
+                                            shared_name=shared_name,
+                                            name=name,
+                                            container=container)
+
+    # Tensor._handle_data contains information for the shape-inference code to
+    # know the shape and dtype of the variable pointed to by a handle. Since
+    # shape inference doesn't run in eager mode we copy this data here for when
+    # the handle is captured by an eager mode function.
+    # pylint: disable=protected-access
+    if ops._USE_C_SHAPES:
+      handle._handle_data = resource_variable_ops.get_resource_handle_data(h)
+    else:
+      if h._handle_data is None:
+        ops.set_shape_and_handle_data_for_outputs(h.op)
+      handle._handle_data = h._handle_data
+    # pylint: enable=protected-access
+  # Clean up op->graph->op reference cycles.
+  ops.dismantle_graph(graph)
+  return handle
+
+
+class SharedVariable(resource_variable_ops.ResourceVariable):
+  """Experimental Variable designed for parameter server training.
+
+  A SharedVariable has a name and two instances of SharedVariable with the
+  same name will have the same value, even if they are in different Sessions,
+  as long as they are placed on the same device.
+
+  The storage associated with SharedVariables is also not deleted when they go
+  out of scope.
+  """
+
+  def __init__(self,  # pylint: disable=super-init-not-called
+               initial_value=None,
+               trainable=True,
+               name=None,
+               dtype=None,
+               constraint=None,
+               initialize=True,
+               **unused_kwargs):
+    """Creates a variable.
+
+    Args:
+      initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
+        which is the initial value for the Variable. The initial value must have
+        a shape specified unless `validate_shape` is set to False. Can also be a
+        callable with no argument that returns the initial value when called.
+        (Note that initializer functions from init_ops.py must first be bound
+         to a shape before being used here.)
+      trainable: If `True`, automatically watches this variable on GradientTape
+        whenever it's used.
+      name: Optional name for the variable. Defaults to `'Variable'` and gets
+        uniquified automatically.
+      dtype: If set, initial_value will be converted to the given type.
+        If None, either the datatype will be kept (if initial_value is
+        a Tensor) or float32 will be used (if it is a Python object convertible
+        to a Tensor).
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
+      initialize: if True, runs initialization in eager execution; leaves the
+        variable uninitialized otherwise.
+
+    Raises:
+      ValueError: If the initial value is not specified, or does not have a
+        shape and `validate_shape` is `True`.
+    """
+    if initial_value is None:
+      raise ValueError("initial_value must be specified.")
+    init_from_fn = callable(initial_value)
+
+    if isinstance(initial_value, ops.Tensor) and hasattr(
+        initial_value, "graph") and initial_value.graph.building_function:
+      raise ValueError("Tensor-typed variable initializers must either be "
+                       "wrapped in an init_scope or callable "
+                       "(e.g., `tf.Variable(lambda : "
+                       "tf.truncated_normal([10, 40]))`) when building "
+                       "functions. Please file a feature request if this "
+                       "restriction inconveniences you.")
+
+    if constraint is not None and not callable(constraint):
+      raise ValueError("The `constraint` argument must be a callable.")
+
+    if isinstance(initial_value, checkpointable.CheckpointInitialValue):
+      self._maybe_initialize_checkpointable()
+      self._update_uid = initial_value.checkpoint_position.restore_uid
+      initial_value = initial_value.wrapped_value
+
+    self._trainable = trainable
+    self._save_slice_info = None
+    # Store the graph key so optimizers know how to only retrieve variables from
+    # this graph.
+    self._graph_key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+    with ops.init_scope():
+      self._in_graph_mode = not context.executing_eagerly()
+      with ops.name_scope(name, "Variable", []
+                          if init_from_fn else [initial_value]) as name:
+        # pylint: disable=protected-access
+        handle_name = ops._name_from_scope_name(name)
+        shared_name = handle_name
+        if init_from_fn:
+          # Use attr_scope and device(None) to simulate the behavior of
+          # colocate_with when the variable we want to colocate with doesn't
+          # yet exist.
+          if self._in_graph_mode:
+            with ops.name_scope("Initializer"), ops.device(None):
+              initial_value = ops.convert_to_tensor(
+                  initial_value(), name="initial_value", dtype=dtype)
+            self._handle = _eager_safe_variable_handle(
+                shape=initial_value.get_shape(),
+                dtype=initial_value.dtype.base_dtype,
+                shared_name=shared_name,
+                name=name,
+                graph_mode=self._in_graph_mode)
+            self._shape = initial_value.get_shape()
+          else:
+            initial_value = initial_value()
+            with ops.name_scope("Initializer"):
+              initial_value = ops.convert_to_tensor(
+                  initial_value, name="initial_value", dtype=dtype)
+            self._handle = _eager_safe_variable_handle(
+                shape=initial_value.get_shape(),
+                dtype=initial_value.dtype.base_dtype,
+                shared_name=shared_name,
+                name=name,
+                graph_mode=False)
+            self._shape = initial_value.get_shape()
+        # pylint: enable=protected-access
+
+        # Or get the initial value from a Tensor or Python object.
+        else:
+          with ops.name_scope("Initializer"):
+            initial_value = ops.convert_to_tensor(
+                initial_value, name="initial_value", dtype=dtype)
+          # pylint: disable=protected-access
+          if (self._in_graph_mode and initial_value is not None and
+              initial_value.op._get_control_flow_context() is not None):
+            raise ValueError(
+                "Initializer for variable %s is from inside a control-flow "
+                "construct, such as a loop or conditional. When creating a "
+                "variable inside a loop or conditional, use a lambda as the "
+                "initializer." % name)
+          # pylint: enable=protected-access
+          self._handle = _eager_safe_variable_handle(
+              shape=initial_value.get_shape(),
+              dtype=initial_value.dtype.base_dtype,
+              shared_name=shared_name,
+              name=name,
+              graph_mode=self._in_graph_mode)
+          self._shape = initial_value.get_shape()
+
+        self._unique_id = shared_name
+        self._initial_value = initial_value if self._in_graph_mode else None
+        self._handle_name = handle_name + ":0"
+        self._dtype = initial_value.dtype.base_dtype
+        self._constraint = constraint
+
+        if self._in_graph_mode:
+          with ops.name_scope("IsInitialized"):
+            self._is_initialized_op = (
+                resource_variable_ops.var_is_initialized_op(self._handle))
+          if initial_value is not None:
+            with ops.name_scope("Assign") as n, ops.colocate_with(self._handle):
+              self._initializer_op = (
+                  resource_variable_ops.assign_variable_op(
+                      self._handle,
+                      self._try_guard_against_uninitialized_dependencies(
+                          initial_value),
+                      name=n))
+          with ops.name_scope("Read"), ops.colocate_with(self._handle):
+            # Manually assign reads to the handle's device to avoid log
+            # messages.
+            with ops.device(self._handle.device):
+              value = self._read_variable_op()
+            self._graph_element = value
+            self._cached_value = None
+        else:
+          if initialize:
+            resource_variable_ops.assign_variable_op(self._handle,
+                                                     initial_value)
+          self._is_initialized_op = None
+          self._initializer_op = None
+          self._graph_element = None
+          self._cached_value = None
+
+    self._handle_deleter = None
+    self._cached_shape_as_list = None
+
+
+@contextlib.contextmanager
+def parameter_server_scope(is_chief, ps_job_name, num_ps_tasks):
+  """Strategy to use parameter servers in eager.
+
+  Creates SharedVariable objects for variables created in this scope. These
+  SharedVariable objects will be placed round-robin on the parameter servers
+  specified by the ps_job_name and num_ps_tasks arguments.
+
+  To use parameter servers you need only to wrap your model initialization in
+  this scope:
+
+  ```
+  with tf.contrib.eager.parameter_server_scope(
+      is_chief, ps_job_name, num_ps_tasks):
+    my_model = tf.keras.Sequential([...])  # Or
+    input = tf.keras.Input(...)
+    ....
+    my_model = tf.keras.Model(input, output)
+  my_model.compile(...)
+  # or other usages of the model.
+  ```
+
+  Args:
+    is_chief: Boolean. Whether this worker is responsible for initializing
+      variables.
+    ps_job_name: The name of the ps job in this cluster.
+    num_ps_tasks: The number of ps tasks to use.
+
+  Yields:
+    a context manager.
+  """
+  # Note: capturing in a list to allow assignment.
+  ps_index = [0]
+
+  def variable_creator_scope(unused_next_creator, **kwargs):
+    kwargs["initialize"] = is_chief
+    with ops.device(
+        "/job:%s/task:%s" % (ps_job_name, ps_index[0] % num_ps_tasks)):
+      ps_index[0] += 1
+      v = SharedVariable(**kwargs)
+      if not is_chief:
+        while not resource_variable_ops.var_is_initialized_op(v.handle):
+          time.sleep(10)
+      return v
+
+  with variable_scope.variable_creator_scope(variable_creator_scope):
+    yield
diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py
index 13029db975..ba6fe9701d 100644
--- a/tensorflow/contrib/eager/python/remote_test.py
+++ b/tensorflow/contrib/eager/python/remote_test.py
@@ -23,6 +23,7 @@ import os
 
 import numpy as np
 
+from tensorflow.contrib.eager.python import parameter_server
 from tensorflow.contrib.eager.python import remote
 from tensorflow.core.protobuf import cluster_pb2
 from tensorflow.core.protobuf import tensorflow_server_pb2
@@ -33,6 +34,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import server_lib
 
@@ -120,6 +122,24 @@ class RemoteExecutionTest(test.TestCase):
       y = math_ops.matmul(x1, x2)
     np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
 
+  def testParameterServer(self):
+    with parameter_server.parameter_server_scope(
+        is_chief=True, ps_job_name=JOB_NAME, num_ps_tasks=3):
+      v0 = variables.Variable([1.0], name="v0")
+      v1 = variables.Variable([2.0], name="v1")
+    v0.assign(v0 * v1)
+    self.assertAllEqual(v0.read_value(), [2.0])
+    self.assertAllEqual(v0.device,
+                        "/job:%s/replica:0/task:0/device:CPU:0" % JOB_NAME)
+    self.assertAllEqual(v1.device,
+                        "/job:%s/replica:0/task:1/device:CPU:0" % JOB_NAME)
+    v1.assign_add(v1)
+    # Simulate aliasing another variable of the same name as v1
+    with ops.device("/job:%s/replica:0/task:1/device:CPU:0" % JOB_NAME):
+      v1_replica = parameter_server.SharedVariable(
+          [1.0], name="v1", initialize=False)
+    self.assertAllEqual(v1_replica.read_value(), [4.0])
+
   @run_sync_and_async
   def testSimpleWeightRead(self):
     """Basic remote eager weight read."""
-- 
GitLab


From 61a9623ac31fd363aff8537df6c3b6073d721425 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Fri, 21 Sep 2018 11:12:21 -0700
Subject: [PATCH 0503/1357] In standalone client mode, only run hooks on one
 thread.

PiperOrigin-RevId: 214013965
---
 .../python/distribute/estimator_training.py   | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/distribute/estimator_training.py b/tensorflow/python/distribute/estimator_training.py
index e17a598123..8daa34c885 100644
--- a/tensorflow/python/distribute/estimator_training.py
+++ b/tensorflow/python/distribute/estimator_training.py
@@ -182,6 +182,7 @@ def should_run_distribute_coordinator(config):
   # pylint: disable=protected-access
   if (not hasattr(config, '_distribute_coordinator_mode') or
       config._distribute_coordinator_mode is None):
+    logging.info('Not using Distribute Coordinator.')
     return False
   if (not isinstance(config._distribute_coordinator_mode, six.string_types) or
       config._distribute_coordinator_mode not in [
@@ -221,15 +222,28 @@ def train_and_evaluate(estimator, train_spec, eval_spec, executor_cls):
     local_estimator = copy.deepcopy(estimator)
     # pylint: disable=protected-access
     local_estimator._config._train_distribute = strategy
-    _init_run_config_from_worker_context(
-        local_estimator._config, dc_context.get_current_worker_context())
+    context = dc_context.get_current_worker_context()
+    _init_run_config_from_worker_context(local_estimator._config, context)
+    logging.info('Updated config: %s', str(vars(local_estimator._config)))
     local_estimator._train_distribution = strategy
     # pylint: enable=protected-access
 
+    # In the standalone client, we don't need to run hooks on all threads
+    # because logging hooks on all threads may be too much on the screen; also
+    # tensor passed to one hook can only be fetched with the graph where the
+    # tensor is defined. Other hooks such as checkpointing hooks will added by
+    # MonitoredTrainingSession.
+    # TODO(yuefengz): Is there a hook that does need to run on all threads in
+    # standalone client mode?
+    if (run_config._distribute_coordinator_mode ==  # pylint: disable=protected-access
+        dc.CoordinatorMode.INDEPENDENT_WORKER or context.is_chief):
+      hooks = list(train_spec.hooks)
+    else:
+      hooks = []
     local_estimator.train(
         input_fn=train_spec.input_fn,
         max_steps=train_spec.max_steps,
-        hooks=list(train_spec.hooks))
+        hooks=hooks)
 
   def _eval_fn(strategy):
     """Function for evaluator task."""
@@ -238,6 +252,7 @@ def train_and_evaluate(estimator, train_spec, eval_spec, executor_cls):
     local_estimator._config._eval_distribute = strategy
     _init_run_config_from_worker_context(
         local_estimator._config, dc_context.get_current_worker_context())
+    logging.info('Updated config: %s', str(vars(local_estimator._config)))
     local_estimator._eval_distribution = strategy
 
     executor = executor_cls(local_estimator, train_spec, eval_spec)
-- 
GitLab


From 9bce52aa9f75a94e121bbf360248e89e9226fd11 Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Fri, 21 Sep 2018 11:17:52 -0700
Subject: [PATCH 0504/1357] Add possibility to include default optimizers in
 custom optimizer list

---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 4b0cbfaa82..56ffc5a0ec 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -215,6 +215,16 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers(
       TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config));
       optimizers->push_back(std::move(custom_optimizer));
     } else {
+      // if there are no custom optimizers with given name, try to initalize a
+      // default optimizer. This way custom configurable optimizers can be
+      // mixed with default optimizers in any order.
+      auto optimizer = MakeNewOptimizer(optimizer_config.name());
+      if (optimizer) {
+        VLOG(2) << "Registered default graph optimizer: "
+                << optimizer_config.name();
+        optimizers->push_back(std::move(optimizer));
+        continue;
+      }
       VLOG(2) << "Can't register an optimizer by name: "
               << optimizer_config.name();
     }
-- 
GitLab


From 63f39c0cc2aeb6d375016de599863e91cdefd98c Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Fri, 21 Sep 2018 11:22:12 -0700
Subject: [PATCH 0505/1357] Workaround a Notebook bug in in c.NotebookApp.ip

See https://github.com/jupyter/notebook/issues/3946. I found this by
checking package version differences and hunting for recent issues on
Notebook's github page.
---
 tensorflow/tools/docker/jupyter_notebook_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docker/jupyter_notebook_config.py b/tensorflow/tools/docker/jupyter_notebook_config.py
index 05dcefb099..4449e3501f 100644
--- a/tensorflow/tools/docker/jupyter_notebook_config.py
+++ b/tensorflow/tools/docker/jupyter_notebook_config.py
@@ -16,7 +16,7 @@ import os
 from IPython.lib import passwd
 
 c = c  # pylint:disable=undefined-variable
-c.NotebookApp.ip = '*'
+c.NotebookApp.ip = '0.0.0.0'  # https://github.com/jupyter/notebook/issues/3946
 c.NotebookApp.port = int(os.getenv('PORT', 8888))
 c.NotebookApp.open_browser = False
 
-- 
GitLab


From 268bf6b118646c8e93162d591263bca907c7db28 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Fri, 21 Sep 2018 11:39:29 -0700
Subject: [PATCH 0506/1357] Removing dead code. With the addition of mkl slice
 using MKL DNN this code will not longer be executed

---
 tensorflow/core/kernels/slice_op.cc | 198 ----------------------------
 1 file changed, 198 deletions(-)

diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index 77594479cb..83377ffab5 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -228,190 +228,6 @@ class SliceOp : public OpKernel {
   }
 };
 
-#ifdef INTEL_MKL
-template <typename Device, typename T>
-class MklSliceOp : public OpKernel {
- public:
-  explicit MklSliceOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    TensorShape output_shape;
-    gtl::InlinedVector<int64, 4> begin;
-    gtl::InlinedVector<int64, 4> size;
-    Tensor* result = nullptr;
-    bool done = false;
-    SharedSliceCommonCases<T>(context, &output_shape, &begin, &size, &result,
-                              &done);
-    if (!context->status().ok() || done == true) return;
-
-    const Tensor& input = context->input(0);
-    const int input_dims = input.dims();
-
-    if (output_shape.num_elements() > 0) {
-      if (std::is_same<Device, CPUDevice>::value && input_dims == 2 &&
-          DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())) {
-        auto input = context->input(0).tensor<T, 2>();
-        auto output = result->tensor<T, 2>();
-        // TODO(agarwal): Consider multi-threading this loop for cases where
-        // size[0] is very large.
-        for (int i = 0; i < size[0]; ++i) {
-          const int64 row = begin[0] + i;
-          if (i + 1 < size[0]) {
-            port::prefetch<port::PREFETCH_HINT_T0>(&output(i + 1, 0));
-            port::prefetch<port::PREFETCH_HINT_T0>(&input(row + 1, begin[1]));
-          }
-          memcpy(&output(i, 0), &input(row, begin[1]), size[1] * sizeof(T));
-        }
-        return;
-      }
-#define HANDLE_DIM(NDIM)                            \
-  if (input_dims == NDIM) {                         \
-    HandleCase<NDIM>(context, begin, size, result); \
-    return;                                         \
-  }
-
-      HANDLE_DIM(1);
-      HANDLE_DIM(2);
-      HANDLE_DIM(3);
-      HANDLE_DIM(4);
-      HANDLE_DIM(5);
-      HANDLE_DIM(6);
-      HANDLE_DIM(7);
-
-#undef HANDLE_DIM
-
-      OP_REQUIRES(
-          context, false,
-          errors::Unimplemented("SliceOp : Unhandled input dimensions"));
-    }
-  }
-
- private:
-  // Helper function for DoesSliceShapeDifferInOnly1D. Checks if the following
-  // criteria matches for slice_dim: if indices for slice are 0 in all dims
-  // except slice_dim and if sizes of all the dimensions of the slice are same
-  // as the sizes of all the dimensions of the input except slice_dim, then
-  // returns True. Otherwise, returns False.
-  bool DoesSliceShapeDifferInOnly1DHelper(const TensorShape& input_shape,
-                                          const gtl::ArraySlice<int64>& begin,
-                                          const gtl::ArraySlice<int64>& size,
-                                          int slice_dim) {
-    for (int dim = 0; dim < 4; dim++) {
-      if (dim != slice_dim &&
-          (begin[dim] != 0 || size[dim] != input_shape.dim_size(dim))) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Is 'input' tensor being sliced over a single dimension out of 4?
-  //
-  // This check is applicable in the context of Slice of a 4-D tensor in
-  // NHWC or NCHW format over channel dimension.
-  //
-  // If indices for slice are 0 in all dims except one dimension and if sizes of
-  // all dimensions of slice are same as sizes of all dimensions of inputs
-  // except that dimension, then we are slicing over a single dimension.
-  //
-  // Returns True if Slicing over a single dimension, and sets slice_dim
-  // to the number of the dimension that satisfies criteria.
-  bool DoesSliceShapeDifferInOnly1D(const TensorShape& input_shape,
-                                    const gtl::ArraySlice<int64>& begin,
-                                    const gtl::ArraySlice<int64>& size,
-                                    int* slice_dim) {
-    for (int dim = 0; dim < 4; dim++) {
-      if (DoesSliceShapeDifferInOnly1DHelper(input_shape, begin, size, dim)) {
-        *slice_dim = dim;
-        return true;
-      }
-    }
-    return false;
-  }
-
-  template <int NDIM>
-  void HandleCase(OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
-                  const gtl::ArraySlice<int64>& size, Tensor* result) {
-    int slice_dim = -1;
-    TensorShape in_shape = context->input(0).shape();
-    // Special case for handling 4-D tensor slice when shape of the slice
-    // differs from the input tensor in only 1 out of 4 dimensions.
-    // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW
-    // format over channel dimension.
-    if (NDIM == 4 &&
-        DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
-      size_t in_strides[4] = {
-          (size_t)in_shape.dim_size(1) * in_shape.dim_size(2) *
-              in_shape.dim_size(3),
-          (size_t)in_shape.dim_size(2) * in_shape.dim_size(3),
-          (size_t)in_shape.dim_size(3), (size_t)1};
-
-      size_t out_strides[4] = {(size_t)size[1] * size[2] * size[3],
-                               (size_t)size[2] * size[3], (size_t)size[3],
-                               (size_t)1};
-
-      T* in_buf = const_cast<T*>(
-          const_cast<const T*>(context->input(0).flat<T>().data()));
-      T* op_buf = result->flat<T>().data();
-
-      if (slice_dim == 1) {
-        /* data format = NCHW */
-
-#pragma omp parallel for
-        for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) {
-          T* ip = in_buf + (d0 * in_strides[0]);
-          T* op = op_buf + ((d0 - begin[0]) * out_strides[0]);
-#pragma omp parallel for
-          for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) {
-            T* ip1 = ip + (d1 * in_strides[1]);
-            T* op1 = op + ((d1 - begin[1]) * out_strides[1]);
-            // For NCHW, H and W will be contiguous. So we can copy
-            // both with one memcpy.
-            memcpy(static_cast<void*>(op1), static_cast<void*>(ip1),
-                   sizeof(T) * in_strides[1]);
-          }
-        }
-        return;
-      } else if (slice_dim == 3) {
-        /* data_format = NHWC */
-
-#pragma omp parallel for
-        for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) {
-          T* ip = in_buf + (d0 * in_strides[0]);
-          T* op = op_buf + ((d0 - begin[0]) * out_strides[0]);
-#pragma omp parallel for
-          for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) {
-            T* ip1 = ip + (d1 * in_strides[1]);
-            T* op1 = op + ((d1 - begin[1]) * out_strides[1]);
-#pragma omp parallel for
-            for (ssize_t d2 = begin[2]; d2 < begin[2] + size[2]; d2++) {
-              T* ip2 = ip1 + (d2 * in_strides[2]);
-              T* ip3 = ip2 + begin[3];
-              T* op2 = op1 + ((d2 - begin[2]) * out_strides[2]);
-              T* op3 = op2;
-              memcpy(static_cast<void*>(op3), static_cast<void*>(ip3),
-                     sizeof(T) * size[3]);
-            }
-          }
-        }
-        return;
-      }
-      // slice_dim is not 1 or 3, then we fallback to Eigen implementation.
-    }
-
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
-    for (int i = 0; i < NDIM; ++i) {
-      indices[i] = begin[i];
-      sizes[i] = size[i];
-    }
-
-    functor::Slice<Device, T, NDIM>()(
-        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
-        context->input(0).tensor<T, NDIM>(), indices, sizes);
-  }
-};
-#endif
 
 // Forward declarations of the functor specializations for declared in the
 // sharded source files.
@@ -440,7 +256,6 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N);
 #undef DECLARE_CPU_SPEC
 }  // namespace functor
 
-#ifndef INTEL_MKL
 #define REGISTER_SLICE(type)                             \
   REGISTER_KERNEL_BUILDER(Name("Slice")                  \
                               .Device(DEVICE_CPU)        \
@@ -452,19 +267,6 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N);
 TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
 TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
 #undef REGISTER_SLICE
-#else
-#define REGISTER_SLICE(type)                             \
-  REGISTER_KERNEL_BUILDER(Name("Slice")                  \
-                              .Device(DEVICE_CPU)        \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("begin")       \
-                              .HostMemory("size"),       \
-                          MklSliceOp<CPUDevice, type>)
-
-TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
-TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
-#undef REGISTER_SLICE
-#endif  // INTEL_MKL
 
 #if GOOGLE_CUDA
 // Forward declarations of the functor specializations for GPU.
-- 
GitLab


From 7461ff7837bb9c57f0020d8adf46a73596dfb77d Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 21 Sep 2018 12:22:53 -0700
Subject: [PATCH 0507/1357] Fix broken build.

PiperOrigin-RevId: 214025729
---
 tensorflow/contrib/eager/python/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 9c3676629d..33a1d572a2 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -14,6 +14,7 @@ py_library(
         ":datasets",
         ":metrics",
         ":network",
+        ":parameter_server",
         ":remote",
         ":saver",
         "//tensorflow/python:framework_ops",
-- 
GitLab


From 0598bbc08098e5ed0ec6f9029f301c725fcd9530 Mon Sep 17 00:00:00 2001
From: drpngx <drpngx@users.noreply.github.com>
Date: Fri, 21 Sep 2018 12:29:30 -0700
Subject: [PATCH 0508/1357] Minor style fix.

---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 56ffc5a0ec..79f8cfe35e 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -215,8 +215,8 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers(
       TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config));
       optimizers->push_back(std::move(custom_optimizer));
     } else {
-      // if there are no custom optimizers with given name, try to initalize a
-      // default optimizer. This way custom configurable optimizers can be
+      // If there are no custom optimizers with given name, try to initalize a
+      // default optimizer. This way, custom configurable optimizers can be
       // mixed with default optimizers in any order.
       auto optimizer = MakeNewOptimizer(optimizer_config.name());
       if (optimizer) {
-- 
GitLab


From 25c1a4441bbf364c8ed263f75e0bebad30f6599c Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 21 Sep 2018 12:38:07 -0700
Subject: [PATCH 0509/1357] [tf.data] Add a ConverterRegistry for vectorization
 converters

PiperOrigin-RevId: 214027910
---
 .../core/grappler/optimizers/data/BUILD       |  1 +
 .../optimizers/data/vectorization/BUILD       | 69 +++++++++++++++++
 .../data/vectorization/cast_vectorizer.cc     | 54 +++++++++++++
 .../data/vectorization/unpack_vectorizer.cc   | 61 +++++++++++++++
 .../data/vectorization/vectorizer.h           | 49 ++++++++++++
 .../data/vectorization/vectorizer_registry.cc | 47 ++++++++++++
 .../data/vectorization/vectorizer_registry.h  | 75 +++++++++++++++++++
 .../vectorization/vectorizer_registry_test.cc | 50 +++++++++++++
 .../optimizers/data/vectorization_utils.cc    | 70 ++---------------
 9 files changed, 414 insertions(+), 62 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/BUILD
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 79d5fe87b6..cf305cebe1 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -464,6 +464,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/optimizers/data/vectorization",
         "//tensorflow/core/grappler/utils:functions",
     ] + tf_protos_all(),
 )
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
new file mode 100644
index 0000000000..1462cb234d
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -0,0 +1,69 @@
+package(
+    default_visibility = ["//visibility:private"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all")
+
+VECTORIZER_DEPS = [
+    ":vectorizer_registry",
+    "//tensorflow/core/grappler/optimizers/data:function_utils",
+] + tf_protos_all()
+
+cc_library(
+    name = "vectorizer",
+    hdrs = ["vectorizer.h"],
+    deps = [
+        "//tensorflow/core:lib",
+    ] + tf_protos_all(),
+)
+
+cc_library(
+    name = "vectorizer_registry",
+    srcs = ["vectorizer_registry.cc"],
+    hdrs = ["vectorizer_registry.h"],
+    deps = [
+        ":vectorizer",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+cc_library(
+    name = "cast_vectorizer",
+    srcs = ["cast_vectorizer.cc"],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "unpack_vectorizer",
+    srcs = ["unpack_vectorizer.cc"],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "vectorization",
+    hdrs = ["vectorizer_registry.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":cast_vectorizer",
+        ":unpack_vectorizer",
+        ":vectorizer",
+        ":vectorizer_registry",
+    ],
+)
+
+tf_cc_test(
+    name = "vectorizer_registry_test",
+    srcs = ["vectorizer_registry_test.cc"],
+    deps = [
+        ":vectorizer_registry",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ] + tf_protos_all(),
+)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
new file mode 100644
index 0000000000..c1739737a0
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+class CastVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
+                   FunctionDef* outer_scope,
+                   std::map<string, string>* conversion_map) override {
+    if (inputs.size() != 1) {
+      return errors::Internal("Cast op should only have one input.");
+    }
+
+    // Add new Cast node
+    NodeDef* new_cast_node = outer_scope->add_node_def();
+    *new_cast_node = node;
+    new_cast_node->clear_name();
+    function_utils::SetUniqueFunctionNodeName(
+        strings::StrCat("vectorized/", node.name()), outer_scope,
+        new_cast_node);
+    new_cast_node->set_input(0, inputs[0]);
+
+    // Add the output mapping to conversion map
+    (*conversion_map)[strings::StrCat(node.name(), ":y:0")] =
+        strings::StrCat(new_cast_node->name(), ":y:0");
+
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("Cast", CastVectorizer);
+
+}  // namespace vectorization_utils
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
new file mode 100644
index 0000000000..776d3179c5
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
@@ -0,0 +1,61 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+class UnpackVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
+                   FunctionDef* outer_scope,
+                   std::map<string, string>* conversion_map) override {
+    if (inputs.size() != 1) {
+      return errors::Internal("Unpack op should only have one input.");
+    }
+
+    // Add new Unpack node
+    NodeDef* new_unpack_node = outer_scope->add_node_def();
+    *new_unpack_node = node;
+    new_unpack_node->clear_name();
+    function_utils::SetUniqueFunctionNodeName(
+        strings::StrCat("vectorized/", node.name()), outer_scope,
+        new_unpack_node);
+
+    // Increment "axis" attr by 1:
+    (*new_unpack_node->mutable_attr())["axis"].set_i(
+        node.attr().at("axis").i() + 1);
+    new_unpack_node->set_input(0, inputs[0]);
+
+    // Add the output mappings to conversion map
+    int num = new_unpack_node->attr().at("num").i();
+    for (int i = 0; i < num; ++i) {
+      (*conversion_map)[strings::StrCat(node.name(), ":output:", i)] =
+          strings::StrCat(new_unpack_node->name(), ":output:", i);
+    }
+
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("Unpack", UnpackVectorizer);
+
+}  // namespace vectorization_utils
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
new file mode 100644
index 0000000000..d341dbba7d
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_H_
+
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+// Interface for vectorization of TensorFlow operations. See `CastVectorizer`
+// for an example.
+class Vectorizer {
+ public:
+  virtual ~Vectorizer() {}
+
+  // Vectorizes an operation, `node`, by adding operation(s) to `outer_scope`
+  // that produce the same vector output(s) as executing `node`'s op
+  // on elements of the vector inputs, and adding mappings to `conversion_map`
+  // from old output tensor names to new (vectorized) output tensor names.
+  // The new node(s) collectively have the same number of inputs and outputs as
+  // the node being converted, and use the tensor names in `inputs` as their
+  // inputs.
+  virtual Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
+                           FunctionDef* outer_scope,
+                           std::map<string, string>* conversion_map) = 0;
+};
+
+}  // namespace vectorization_utils
+}  // namespace grappler
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_H_
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
new file mode 100644
index 0000000000..a6551e36ac
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+VectorizerRegistry* VectorizerRegistry::Global() {
+  static VectorizerRegistry* registry = new VectorizerRegistry;
+  return registry;
+}
+
+Vectorizer* VectorizerRegistry::Get(const string& op_type) {
+  auto found = vectorizers_.find(op_type);
+  if (found == vectorizers_.end()) {
+    return nullptr;
+  }
+  return found->second.get();
+}
+
+void VectorizerRegistry::Register(const string& op_type,
+                                  std::unique_ptr<Vectorizer> vectorizer) {
+  auto existing = Get(op_type);
+  CHECK_EQ(existing, nullptr)
+      << "Vectorizer for op type: " << op_type << " already registered";
+  vectorizers_.insert(std::pair<const string&, std::unique_ptr<Vectorizer>>(
+      op_type, std::move(vectorizer)));
+}
+}  // namespace vectorization_utils
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
new file mode 100644
index 0000000000..16159d47ca
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
@@ -0,0 +1,75 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_REGISTRY_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_REGISTRY_H_
+
+#include <functional>
+#include <map>
+
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+// A global VectorizerRegistry is used to hold all the vectorizers.
+class VectorizerRegistry {
+ public:
+  // Returns a pointer to a global VectorizerRegistry object.
+  static VectorizerRegistry* Global();
+
+  // Returns a pointer to a vectorizer that can vectorize an op for the op type.
+  Vectorizer* Get(const string& op_type);
+
+  // Registers a vectorizer that can vectorize an op for the given op type.
+  void Register(const string& op_type, std::unique_ptr<Vectorizer> vectorizer);
+
+ private:
+  std::map<string, std::unique_ptr<Vectorizer>> vectorizers_;
+};
+
+namespace vectorizer_registration {
+
+class VectorizerRegistration {
+ public:
+  VectorizerRegistration(const string& op_type,
+                         std::unique_ptr<Vectorizer> vectorizer) {
+    VectorizerRegistry::Global()->Register(op_type, std::move(vectorizer));
+  }
+};
+
+}  // namespace vectorizer_registration
+
+#define REGISTER_VECTORIZER(op_type, vectorizer) \
+  REGISTER_VECTORIZER_UNIQ_HELPER(__COUNTER__, op_type, vectorizer)
+
+#define REGISTER_VECTORIZER_UNIQ_HELPER(ctr, op_type, vectorizer) \
+  REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)
+
+#define REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)                  \
+  static ::tensorflow::grappler::vectorization_utils::                      \
+      vectorizer_registration::VectorizerRegistration                       \
+          vectorizer_registration_##ctr(                                    \
+              op_type,                                                      \
+              ::std::unique_ptr<                                            \
+                  ::tensorflow::grappler::vectorization_utils::Vectorizer>( \
+                  new vectorizer()))
+
+}  // namespace vectorization_utils
+}  // namespace grappler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_REGISTRY_H_
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
new file mode 100644
index 0000000000..86e303564b
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
@@ -0,0 +1,50 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace vectorization_utils {
+
+class TestVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
+                   FunctionDef* outer_scope,
+                   std::map<string, string>* conversion_map) override {
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("test_op", TestVectorizer);
+
+TEST(TestVectorizer, TestTestVectorizer) {
+  EXPECT_EQ(VectorizerRegistry::Global()->Get("nonexistent"), nullptr);
+
+  auto vectorizer = VectorizerRegistry::Global()->Get("test_op");
+  EXPECT_NE(vectorizer, nullptr);
+
+  FunctionDef function;
+  NodeDef node;
+  std::map<string, string> conversion_map;
+  EXPECT_TRUE(vectorizer->Vectorize(node, {}, &function, &conversion_map).ok());
+}
+
+}  // namespace vectorization_utils
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index bfca63b820..cb56b65985 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 #include "absl/strings/str_join.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
@@ -90,59 +91,6 @@ void RemoveMapDefunOutput(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
       ->ExtractSubrange(output_position, 1, nullptr);
 }
 
-Status ConvertCastOp(FunctionDef* outer_scope, gtl::ArraySlice<string> inputs,
-                     const NodeDef& cast_node,
-                     std::map<string, string>* conversion_map) {
-  if (inputs.size() != 1) {
-    return errors::Internal("Cast op should only have one input.");
-  }
-
-  // Add new Cast node
-  NodeDef* new_cast_node = outer_scope->add_node_def();
-  *new_cast_node = cast_node;
-  new_cast_node->clear_name();
-  function_utils::SetUniqueFunctionNodeName(
-      strings::StrCat("vectorized/", cast_node.name()), outer_scope,
-      new_cast_node);
-  new_cast_node->set_input(0, inputs[0]);
-
-  // Add the output mapping to conversion map
-  (*conversion_map)[strings::StrCat(cast_node.name(), ":y:0")] =
-      strings::StrCat(new_cast_node->name(), ":y:0");
-
-  return Status::OK();
-}
-
-Status ConvertUnpackOp(FunctionDef* outer_scope, gtl::ArraySlice<string> inputs,
-                       const NodeDef& unpack_node,
-                       std::map<string, string>* conversion_map) {
-  if (inputs.size() != 1) {
-    return errors::Internal("Unpack op should only have one input.");
-  }
-
-  // Add new Unpack node
-  NodeDef* new_unpack_node = outer_scope->add_node_def();
-  *new_unpack_node = unpack_node;
-  new_unpack_node->clear_name();
-  function_utils::SetUniqueFunctionNodeName(
-      strings::StrCat("vectorized/", unpack_node.name()), outer_scope,
-      new_unpack_node);
-
-  // Increment "axis" attr by 1:
-  (*new_unpack_node->mutable_attr())["axis"].set_i(
-      unpack_node.attr().at("axis").i() + 1);
-  new_unpack_node->set_input(0, inputs[0]);
-
-  // Add the output mappings to conversion map
-  int num = new_unpack_node->attr().at("num").i();
-  for (int i = 0; i < num; ++i) {
-    (*conversion_map)[strings::StrCat(unpack_node.name(), ":output:", i)] =
-        strings::StrCat(new_unpack_node->name(), ":output:", i);
-  }
-
-  return Status::OK();
-}
-
 int FindOutputToConvert(const FunctionDef& function,
                         const std::set<string>& unconvertible,
                         FunctionDefTensorDesc* f) {
@@ -239,17 +187,15 @@ Status Vectorization::AddConversionMappingFromOp(
         ":output:", map_defun_fn_->signature().output_arg_size() + i));
   }
 
-  if (node.op() == "Cast") {
-    TF_RETURN_IF_ERROR(
-        ConvertCastOp(outer_scope_, promoted_inputs, node, &conversion_map_));
-  } else if (node.op() == "Unpack") {
-    TF_RETURN_IF_ERROR(
-        ConvertUnpackOp(outer_scope_, promoted_inputs, node, &conversion_map_));
-  } else {
-    return errors::Unimplemented("Op converter for \"", node.op(),
-                                 "\" not implemented yet");
+  auto vectorizer = VectorizerRegistry::Global()->Get(node.op());
+  if (vectorizer == nullptr) {
+    return errors::Unimplemented("No vectorizer registered for op: ",
+                                 node.op());
   }
 
+  TF_RETURN_IF_ERROR(vectorizer->Vectorize(node, promoted_inputs, outer_scope_,
+                                           &conversion_map_));
+
   // If we get here, the conversion was successful, so we promote the inputs
   // of the ops to MapDefun outputs.
   for (int i = 0; i < types.size(); ++i) {
-- 
GitLab


From adb5d74f52917d00e9a779a74f0e0a4e5ca22ca4 Mon Sep 17 00:00:00 2001
From: Martin Wicke <577277+martinwicke@users.noreply.github.com>
Date: Fri, 21 Sep 2018 12:45:18 -0700
Subject: [PATCH 0510/1357] Fix lint errors

---
 tensorflow/python/data/ops/dataset_ops.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 1b9ea2ed08..28a36bfb32 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1009,8 +1009,9 @@ class Dataset(object):
   def flat_map(self, map_func):
     """Maps `map_func` across this dataset and flattens the result. 
     
-    Use `flat_map` if you want to make sure, that the order of your dataset stays the same.
-    For example, to flatten a dataset of batches into a dataset of their elements:
+    Use `flat_map` if you want to make sure that the order of your dataset
+    stays the same. For example, to flatten a dataset of batches into a
+    dataset of their elements:
 
     ```python
     # NOTE: The following examples use `{ ... }` to represent the
@@ -1022,7 +1023,8 @@ class Dataset(object):
     ```
     
     `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since 
-    `flat_map` produces the same output as `tf.data.Dataset.interleave(cycle_length=1)`
+    `flat_map` produces the same output as 
+    `tf.data.Dataset.interleave(cycle_length=1)`
     
     Args:
       map_func: A function mapping a nested structure of tensors (having shapes
-- 
GitLab


From 9f2dd5cc693f3dca077392dff76f740969e9fea8 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 21 Sep 2018 12:50:46 -0700
Subject: [PATCH 0511/1357] Print errors if --outfile or --outtree are not
 passed to the tf_upgrade_v2.py script.

PiperOrigin-RevId: 214029776
---
 tensorflow/tools/compatibility/tf_upgrade_v2.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index 38216ce9b1..53c546b10c 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py
@@ -120,10 +120,18 @@ Simple usage:
   report_filename = args.report_filename
   files_processed = 0
   if args.input_file:
+    if not args.output_file:
+      raise ValueError(
+          "--outfile=<output file> argument is required when converting a "
+          "single file.")
     files_processed, report_text, errors = upgrade.process_file(
         args.input_file, args.output_file)
     files_processed = 1
   elif args.input_tree:
+    if not args.output_tree:
+      raise ValueError(
+          "--outtree=<output directory> argument is required when converting a "
+          "file tree.")
     files_processed, report_text, errors = upgrade.process_tree(
         args.input_tree, args.output_tree, args.copy_other_files)
   else:
-- 
GitLab


From 457ef66c2d4985000aa1d1a9bc643f66bbddd46d Mon Sep 17 00:00:00 2001
From: Martin Wicke <577277+martinwicke@users.noreply.github.com>
Date: Fri, 21 Sep 2018 12:58:32 -0700
Subject: [PATCH 0512/1357] Fix long lines

---
 tensorflow/python/keras/layers/embeddings.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py
index a0b9393812..76e551a7ce 100644
--- a/tensorflow/python/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/layers/embeddings.py
@@ -142,12 +142,14 @@ class Embedding(Layer):
       else:
         in_lens = [self.input_length]
       if len(in_lens) != len(input_shape) - 1:
-        raise ValueError('"input_length" is %s, but received input has shape %s' %
+        raise ValueError('"input_length" is %s, '
+                         'but received input has shape %s' %
                          (str(self.input_length), str(input_shape)))
       else:
         for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])):
           if s1 is not None and s2 is not None and s1 != s2:
-            raise ValueError('"input_length" is %s, but received input has shape %s' %
+            raise ValueError('"input_length" is %s, '
+                             'but received input has shape %s' %
                              (str(self.input_length), str(input_shape)))
           elif s1 is None:
             in_lens[i] = s2
-- 
GitLab


From 282d6e7c384c83f9b6bf43b7b37eb606ccc64d06 Mon Sep 17 00:00:00 2001
From: Martin Wicke <577277+martinwicke@users.noreply.github.com>
Date: Fri, 21 Sep 2018 12:59:15 -0700
Subject: [PATCH 0513/1357] Fix long lines

---
 tensorflow/python/ops/nn_ops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 17e10995f2..a68422c315 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -818,12 +818,14 @@ class Convolution(object):
     try:
       input_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("input tensor must have rank %d" % 
+                       (num_spatial_dims + 2))
 
     try:
       filter_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("filter tensor must have rank %d" % 
+                       (num_spatial_dims + 2))
 
     if data_format is None or not data_format.startswith("NC"):
       input_channels_dim = input_shape[num_spatial_dims + 1]
-- 
GitLab


From ef630974578b2c1185d4c3848836839d91cb3963 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Fri, 21 Sep 2018 13:01:35 -0700
Subject: [PATCH 0514/1357] Simplify XLA pattern matcher to explicitly
 construct AllOf relations, instead of relying on individual patterns carrying
 around continuations.

PiperOrigin-RevId: 214031269
---
 .../compiler/xla/service/pattern_matcher.h    | 502 ++++++++----------
 .../xla/service/pattern_matcher_test.cc       |  23 +
 2 files changed, 256 insertions(+), 269 deletions(-)

diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index 7d4d62ecb9..0bcf67c1be 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -127,6 +127,51 @@ bool Match(Value* value, const Pattern& pattern) {
 
 namespace match {
 
+namespace detail {
+template <typename Item, typename... Patterns>
+class AllOfPattern {
+ public:
+  explicit AllOfPattern(const Patterns&... patterns) : patterns_(patterns...) {}
+
+  bool Match(const Item* item) const {
+    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  }
+
+  bool Match(Item* item) const {
+    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  }
+
+ private:
+  template <typename ItemType, size_t index>
+  bool MatchImpl(ItemType* item, std::integral_constant<size_t, index>) const {
+    return std::get<index>(patterns_).Match(item) &&
+           MatchImpl(item, std::integral_constant<size_t, index + 1>());
+  }
+
+  template <typename ItemType>
+  bool MatchImpl(ItemType* item,
+                 std::integral_constant<size_t, sizeof...(Patterns)>) const {
+    return true;
+  }
+
+  std::tuple<Patterns...> patterns_;
+};
+
+}  // namespace detail
+
+// Returns a pattern that represents the conjunction of all input patterns. All
+// patterns need to match in order to have the AllOf pattern match.
+//
+// TODO(timshen): Currently AllOf is still nested, e.g. AllOf<AllOf<A>, B> is
+// not AllOf<A, B>. We might want to flatten the AllOf type structure if the
+// C++ compile error message gets annoying.
+template <typename Item, typename... Patterns>
+detail::AllOfPattern<typename std::remove_const<Item>::type, Patterns...> AllOf(
+    const Patterns&... patterns) {
+  return detail::AllOfPattern<typename std::remove_const<Item>::type,
+                              Patterns...>(patterns...);
+}
+
 namespace detail {
 
 template <typename LayoutType, typename Impl>
@@ -141,43 +186,45 @@ class LayoutPatternBaseImpl {
 
 // A LayoutPattern implementation that matches only if the layout equals a
 // Layout proto.
-template <typename Previous>
 class LayoutPatternEqualImpl {
  public:
-  explicit constexpr LayoutPatternEqualImpl(const Previous& previous,
-                                            const ::xla::Layout* layout)
-      : previous_(previous), layout_(layout) {}
+  explicit constexpr LayoutPatternEqualImpl(const ::xla::Layout* layout)
+      : layout_(layout) {}
 
   bool Match(const ::xla::Layout* layout) const {
-    return previous_.Match(layout) && LayoutUtil::Equal(*layout_, *layout);
+    return LayoutUtil::Equal(*layout_, *layout);
   }
 
  private:
-  Previous previous_;
   const ::xla::Layout* layout_;
 };
 
 // A LayoutPattern implementation that matches only if the layout has a given
 // format.
-template <typename Previous>
 class LayoutPatternFormatImpl {
  public:
-  explicit constexpr LayoutPatternFormatImpl(const Previous& previous,
-                                             Format format)
-      : previous_(previous), format_(format) {}
+  explicit constexpr LayoutPatternFormatImpl(Format format) : format_(format) {}
 
   bool Match(const ::xla::Layout* layout) const {
-    return previous_.Match(layout) && layout->format() == format_;
+    return layout->format() == format_;
   }
 
  private:
-  Previous previous_;
   Format format_;
 };
 
 // A pattern that matches Layouts.
 template <typename LayoutType, typename Impl>
 class LayoutPattern {
+ private:
+  template <typename NewImpl>
+  LayoutPattern<LayoutType, AllOfPattern<::xla::Layout, Impl, NewImpl>>
+  AppendImpl(NewImpl new_impl) const {
+    return LayoutPattern<LayoutType,
+                         AllOfPattern<::xla::Layout, Impl, NewImpl>>(
+        AllOf<Layout>(impl_, std::move(new_impl)), matched_layout_);
+  }
+
  public:
   explicit constexpr LayoutPattern(const Impl& impl,
                                    LayoutType** matched_layout)
@@ -207,24 +254,21 @@ class LayoutPattern {
 
   // Modifies the pattern to match only if the layout equals the given proto.
   // The layout must outlive the returned pattern.
-  constexpr LayoutPattern<LayoutType, LayoutPatternEqualImpl<Impl>> EqualTo(
-      const ::xla::Layout* layout) const {
-    return LayoutPattern<LayoutType, LayoutPatternEqualImpl<Impl>>(
-        LayoutPatternEqualImpl<Impl>(impl_, layout), matched_layout_);
+  constexpr auto EqualTo(const ::xla::Layout* layout) const
+      -> decltype(this->AppendImpl(LayoutPatternEqualImpl(layout))) {
+    return AppendImpl(LayoutPatternEqualImpl(layout));
   }
 
   // Modifies the pattern to match only if the layout has a dense format.
-  constexpr LayoutPattern<LayoutType, LayoutPatternFormatImpl<Impl>>
-  WithDenseFormat() const {
-    return LayoutPattern<LayoutType, LayoutPatternFormatImpl<Impl>>(
-        LayoutPatternFormatImpl<Impl>(impl_, DENSE), matched_layout_);
+  constexpr auto WithDenseFormat() const
+      -> decltype(this->AppendImpl(LayoutPatternFormatImpl(DENSE))) {
+    return AppendImpl(LayoutPatternFormatImpl(DENSE));
   }
 
   // Modifies the pattern to match only if the layout has a sparse format.
-  constexpr LayoutPattern<LayoutType, LayoutPatternFormatImpl<Impl>>
-  WithSparseFormat() const {
-    return LayoutPattern<LayoutType, LayoutPatternFormatImpl<Impl>>(
-        LayoutPatternFormatImpl<Impl>(impl_, SPARSE), matched_layout_);
+  constexpr auto WithSparseFormat() const
+      -> decltype(this->AppendImpl(LayoutPatternFormatImpl(SPARSE))) {
+    return AppendImpl(LayoutPatternFormatImpl(SPARSE));
   }
 
  private:
@@ -260,6 +304,7 @@ class AnyOfPattern {
 
   std::tuple<Patterns...> patterns_;
 };
+
 }  // namespace detail
 
 // Returns a pattern that represents the logical disjunction of the input
@@ -305,167 +350,136 @@ class ShapePatternBaseImpl {
 
 // A ShapePattern implementation that matches only if the shape equals a Shape
 // proto.
-template <typename Previous>
 class ShapePatternEqualImpl {
  public:
-  explicit constexpr ShapePatternEqualImpl(const Previous& previous,
-                                           const ::xla::Shape* shape)
-      : previous_(previous), shape_(shape) {}
+  explicit constexpr ShapePatternEqualImpl(const ::xla::Shape* shape)
+      : shape_(shape) {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::Equal(*shape_, *shape);
+    return ShapeUtil::Equal(*shape_, *shape);
   }
 
  private:
-  Previous previous_;
   const ::xla::Shape* shape_;
 };
 
 // A ShapePattern implementation that matches only if the shape is compatible to
 // a Shape proto.
-template <typename Previous>
 class ShapePatternCompatibleImpl {
  public:
-  explicit constexpr ShapePatternCompatibleImpl(const Previous& previous,
-                                                const ::xla::Shape* shape)
-      : previous_(previous), shape_(shape) {}
+  explicit constexpr ShapePatternCompatibleImpl(const ::xla::Shape* shape)
+      : shape_(shape) {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::Compatible(*shape_, *shape);
+    return ShapeUtil::Compatible(*shape_, *shape);
   }
 
  private:
-  Previous previous_;
   const ::xla::Shape* shape_;
 };
 
 // A ShapePattern implementation that matches only if the shape has a given
 // element type.
-template <typename Previous>
 class ShapePatternElementTypeImpl {
  public:
-  explicit constexpr ShapePatternElementTypeImpl(const Previous& previous,
-                                                 PrimitiveType element_type)
-      : previous_(previous), element_type_(element_type) {}
+  explicit constexpr ShapePatternElementTypeImpl(PrimitiveType element_type)
+      : element_type_(element_type) {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && shape->element_type() == element_type_;
+    return shape->element_type() == element_type_;
   }
 
  private:
-  Previous previous_;
   PrimitiveType element_type_;
 };
 
 // A ShapePattern implementation that matches only if the shape is scalar.
-template <typename Previous>
 class ShapePatternIsScalarImpl {
  public:
-  explicit constexpr ShapePatternIsScalarImpl(const Previous& previous)
-      : previous_(previous) {}
+  explicit constexpr ShapePatternIsScalarImpl() {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::IsScalar(*shape);
+    return ShapeUtil::IsScalar(*shape);
   }
-
- private:
-  Previous previous_;
 };
 
 // A ShapePattern implementation that matches only if the shape is an array
-template <typename Previous>
 class ShapePatternIsArrayImpl {
  public:
-  explicit constexpr ShapePatternIsArrayImpl(const Previous& previous)
-      : previous_(previous) {}
+  explicit constexpr ShapePatternIsArrayImpl() {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::IsArray(*shape);
+    return ShapeUtil::IsArray(*shape);
   }
-
- private:
-  Previous previous_;
 };
 
 // A ShapePattern implementation that matches only if the shape is a tuple.
-template <typename Previous>
 class ShapePatternIsTupleImpl {
  public:
-  explicit constexpr ShapePatternIsTupleImpl(const Previous& previous)
-      : previous_(previous) {}
+  explicit constexpr ShapePatternIsTupleImpl() {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::IsTuple(*shape);
+    return ShapeUtil::IsTuple(*shape);
   }
-
- private:
-  Previous previous_;
 };
 
 // A ShapePattern implementation that matches only if the shape has a given
 // rank.
-template <typename Previous>
 class ShapePatternRankImpl {
  public:
-  explicit constexpr ShapePatternRankImpl(const Previous& previous, int64 rank)
-      : previous_(previous), rank_(rank) {}
+  explicit constexpr ShapePatternRankImpl(int64 rank) : rank_(rank) {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::Rank(*shape) == rank_;
+    return ShapeUtil::Rank(*shape) == rank_;
   }
 
  private:
-  Previous previous_;
   int64 rank_;
 };
 
 // A ShapePattern implementation that matches only if the shape has a layout
 // that matches a given pattern.
-template <typename Previous, typename LayoutType, typename LayoutImpl>
+template <typename LayoutType, typename LayoutImpl>
 class ShapePatternLayoutImpl {
  public:
   explicit constexpr ShapePatternLayoutImpl(
-      const Previous& previous,
       const LayoutPattern<LayoutType, LayoutImpl>& layout)
-      : previous_(previous), layout_(layout) {}
+      : layout_(layout) {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && LayoutUtil::HasLayout(*shape) &&
-           layout_.Match(&shape->layout());
+    return LayoutUtil::HasLayout(*shape) && layout_.Match(&shape->layout());
   }
 
   bool Match(Shape* shape) const {
-    return previous_.Match(shape) && LayoutUtil::HasLayout(*shape) &&
+    return LayoutUtil::HasLayout(*shape) &&
            layout_.Match(shape->mutable_layout());
   }
 
  private:
-  Previous previous_;
   LayoutPattern<LayoutType, LayoutImpl> layout_;
 };
 
 // A ShapePattern implementation that matches only if the shape has a subshape
 // that matches a given pattern.
-template <typename Previous, typename SubshapeType, typename SubshapeImpl>
+template <typename SubshapeType, typename SubshapeImpl>
 class ShapePatternSubshapeImpl {
  public:
   explicit ShapePatternSubshapeImpl(
-      const Previous& previous, ShapeIndexView index,
+      ShapeIndexView index,
       const ShapePattern<SubshapeType, SubshapeImpl>& subshape)
-      : previous_(previous), index_(index), subshape_(subshape) {}
+      : index_(index), subshape_(subshape) {}
 
   bool Match(const ::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::IndexIsValid(*shape, index_) &&
+    return ShapeUtil::IndexIsValid(*shape, index_) &&
            subshape_.Match(&ShapeUtil::GetSubshape(*shape, index_));
   }
 
   bool Match(::xla::Shape* shape) const {
-    return previous_.Match(shape) && ShapeUtil::IndexIsValid(*shape, index_) &&
+    return ShapeUtil::IndexIsValid(*shape, index_) &&
            subshape_.Match(ShapeUtil::GetMutableSubshape(shape, index_));
   }
 
  private:
-  Previous previous_;
   ShapeIndexView index_;
   ShapePattern<SubshapeType, SubshapeImpl> subshape_;
 };
@@ -473,6 +487,14 @@ class ShapePatternSubshapeImpl {
 // A pattern that matches Shapes.
 template <typename ShapeType, typename Impl>
 class ShapePattern {
+ private:
+  template <typename NewImpl>
+  ShapePattern<ShapeType, AllOfPattern<::xla::Shape, Impl, NewImpl>> AppendImpl(
+      NewImpl new_impl) const {
+    return ShapePattern<ShapeType, AllOfPattern<::xla::Shape, Impl, NewImpl>>(
+        AllOf<Shape>(impl_, std::move(new_impl)), matched_shape_);
+  }
+
  public:
   explicit constexpr ShapePattern(const Impl& impl, ShapeType** matched_shape)
       : impl_(impl), matched_shape_(matched_shape) {}
@@ -501,108 +523,90 @@ class ShapePattern {
 
   // Modifies the pattern to match only if the shape equals the given proto.
   // The layout must outlive the returned pattern.
-  constexpr ShapePattern<ShapeType, ShapePatternEqualImpl<Impl>> EqualTo(
-      const ::xla::Shape* shape) const {
-    return ShapePattern<ShapeType, ShapePatternEqualImpl<Impl>>(
-        ShapePatternEqualImpl<Impl>(impl_, shape), matched_shape_);
+  constexpr auto EqualTo(const ::xla::Shape* shape) const
+      -> decltype(this->AppendImpl(ShapePatternEqualImpl(shape))) {
+    return AppendImpl(ShapePatternEqualImpl(shape));
   }
 
   // Modifies the pattern to match only if the shape is compatible to the given
   // proto. The layout must outlive the returned pattern.
-  constexpr ShapePattern<ShapeType, ShapePatternCompatibleImpl<Impl>>
-  CompatibleTo(const ::xla::Shape* shape) const {
-    return ShapePattern<ShapeType, ShapePatternCompatibleImpl<Impl>>(
-        ShapePatternCompatibleImpl<Impl>(impl_, shape), matched_shape_);
+  constexpr auto CompatibleTo(const ::xla::Shape* shape) const
+      -> decltype(this->AppendImpl(ShapePatternCompatibleImpl(shape))) {
+    return AppendImpl(ShapePatternCompatibleImpl(shape));
   }
 
   // Modifies the pattern to match only if the shape has the given element type.
-  constexpr ShapePattern<ShapeType, ShapePatternElementTypeImpl<Impl>>
-  WithElementType(PrimitiveType element_type) const {
-    return ShapePattern<ShapeType, ShapePatternElementTypeImpl<Impl>>(
-        ShapePatternElementTypeImpl<Impl>(impl_, element_type), matched_shape_);
+  constexpr auto WithElementType(PrimitiveType element_type) const
+      -> decltype(this->AppendImpl(ShapePatternElementTypeImpl(element_type))) {
+    return AppendImpl(ShapePatternElementTypeImpl(element_type));
   }
 
   // Modifies the pattern to match only if the shape is scalar.
-  constexpr ShapePattern<ShapeType, ShapePatternIsScalarImpl<Impl>> IsScalar()
-      const {
-    return ShapePattern<ShapeType, ShapePatternIsScalarImpl<Impl>>(
-        ShapePatternIsScalarImpl<Impl>(impl_), matched_shape_);
+  constexpr auto IsScalar() const
+      -> decltype(this->AppendImpl(ShapePatternIsScalarImpl())) {
+    return AppendImpl(ShapePatternIsScalarImpl());
   }
 
   // Modifies the pattern to match only if the shape is an array.
-  constexpr ShapePattern<ShapeType, ShapePatternIsArrayImpl<Impl>> IsArray()
-      const {
-    return ShapePattern<ShapeType, ShapePatternIsArrayImpl<Impl>>(
-        ShapePatternIsArrayImpl<Impl>(impl_), matched_shape_);
+  constexpr auto IsArray() const
+      -> decltype(this->AppendImpl(ShapePatternIsArrayImpl())) {
+    return AppendImpl(ShapePatternIsArrayImpl());
   }
 
   // Modifies the pattern to match only if the shape is a tuple.
-  constexpr ShapePattern<ShapeType, ShapePatternIsTupleImpl<Impl>> IsTuple()
-      const {
-    return ShapePattern<ShapeType, ShapePatternIsTupleImpl<Impl>>(
-        ShapePatternIsTupleImpl<Impl>(impl_), matched_shape_);
+  constexpr auto IsTuple() const
+      -> decltype(this->AppendImpl(ShapePatternIsTupleImpl())) {
+    return AppendImpl(ShapePatternIsTupleImpl());
   }
 
   // Modifies the pattern to match only if the shape has the given rank.
-  constexpr ShapePattern<ShapeType, ShapePatternRankImpl<Impl>> WithRank(
-      int64 rank) const {
-    return ShapePattern<ShapeType, ShapePatternRankImpl<Impl>>(
-        ShapePatternRankImpl<Impl>(impl_, rank), matched_shape_);
+  constexpr auto WithRank(int64 rank) const
+      -> decltype(this->AppendImpl(ShapePatternRankImpl(rank))) {
+    return AppendImpl(ShapePatternRankImpl(rank));
   }
 
   // Modifies the pattern to match only if the shape has a layout that matches
   // the given pattern.
   template <typename LayoutType, typename LayoutImpl>
-  constexpr ShapePattern<ShapeType,
-                         ShapePatternLayoutImpl<Impl, LayoutType, LayoutImpl>>
-  WithLayout(const LayoutPattern<LayoutType, LayoutImpl>& layout) const {
-    return ShapePattern<ShapeType,
-                        ShapePatternLayoutImpl<Impl, LayoutType, LayoutImpl>>(
-        ShapePatternLayoutImpl<Impl, LayoutType, LayoutImpl>(impl_, layout),
-        matched_shape_);
-  }
-
-  constexpr ShapePattern<
-      ShapeType,
-      ShapePatternLayoutImpl<Impl, const ::xla::Layout,
-                             LayoutPatternEqualImpl<LayoutPatternBaseImpl>>>
-  WithLayoutEqualTo(const ::xla::Layout* layout) const {
+  auto WithLayout(const LayoutPattern<LayoutType, LayoutImpl>& layout) const
+      -> decltype(this->AppendImpl(
+          ShapePatternLayoutImpl<LayoutType, LayoutImpl>(layout))) {
+    return AppendImpl(ShapePatternLayoutImpl<LayoutType, LayoutImpl>(layout));
+  }
+
+  constexpr auto WithLayoutEqualTo(const ::xla::Layout* layout) const
+      -> decltype(this->WithLayout(Layout().EqualTo(layout))) {
     return WithLayout(Layout().EqualTo(layout));
   }
 
-  constexpr ShapePattern<
-      ShapeType,
-      ShapePatternLayoutImpl<Impl, const ::xla::Layout,
-                             LayoutPatternFormatImpl<LayoutPatternBaseImpl>>>
-  IsDenseArray() const {
+  constexpr auto IsDenseArray() const
+      -> decltype(this->WithLayout(Layout().WithDenseFormat())) {
     return WithLayout(Layout().WithDenseFormat());
   }
 
-  constexpr ShapePattern<
-      ShapeType,
-      ShapePatternLayoutImpl<Impl, const ::xla::Layout,
-                             LayoutPatternFormatImpl<LayoutPatternBaseImpl>>>
-  IsSparseArray() const {
+  constexpr auto IsSparseArray() const
+      -> decltype(this->WithLayout(Layout().WithSparseFormat())) {
     return WithLayout(Layout().WithSparseFormat());
   }
 
   // Modifies the pattern to match only if the shape has a subshape that matches
   // the given pattern.
   template <typename SubshapeType, typename SubshapeImpl>
+  auto WithSubshape(ShapeIndexView index,
+                    const ShapePattern<SubshapeType, SubshapeImpl>& subshape)
+      const -> decltype(this->AppendImpl(
+          ShapePatternSubshapeImpl<SubshapeType, SubshapeImpl>(index,
+                                                               subshape))) {
+    return AppendImpl(
+        ShapePatternSubshapeImpl<SubshapeType, SubshapeImpl>(index, subshape));
+  }
+
   ShapePattern<ShapeType,
-               ShapePatternSubshapeImpl<Impl, SubshapeType, SubshapeImpl>>
-  WithSubshape(ShapeIndexView index,
-               const ShapePattern<SubshapeType, SubshapeImpl>& subshape) const {
-    return ShapePattern<
-        ShapeType, ShapePatternSubshapeImpl<Impl, SubshapeType, SubshapeImpl>>(
-        ShapePatternSubshapeImpl<Impl, SubshapeType, SubshapeImpl>(impl_, index,
-                                                                   subshape),
-        matched_shape_);
-  }
-
-  ShapePattern<ShapeType, ShapePatternSubshapeImpl<
-                              Impl, const ::xla::Shape,
-                              ShapePatternEqualImpl<ShapePatternBaseImpl>>>
+               AllOfPattern<Shape, Impl,
+                            ShapePatternSubshapeImpl<
+                                const ::xla::Shape,
+                                AllOfPattern<::xla::Shape, ShapePatternBaseImpl,
+                                             ShapePatternEqualImpl>>>>
   WithSubshapeEqualTo(ShapeIndexView index, const ::xla::Shape* shape) const {
     return WithSubshape(index,
                         ShapePattern<const ::xla::Shape, ShapePatternBaseImpl>(
@@ -610,9 +614,12 @@ class ShapePattern {
                             .EqualTo(shape));
   }
 
-  ShapePattern<ShapeType, ShapePatternSubshapeImpl<
-                              Impl, const ::xla::Shape,
-                              ShapePatternCompatibleImpl<ShapePatternBaseImpl>>>
+  ShapePattern<ShapeType,
+               AllOfPattern<Shape, Impl,
+                            ShapePatternSubshapeImpl<
+                                const ::xla::Shape,
+                                AllOfPattern<::xla::Shape, ShapePatternBaseImpl,
+                                             ShapePatternCompatibleImpl>>>>
   WithSubshapeCompatibleTo(ShapeIndexView index,
                            const ::xla::Shape* shape) const {
     return WithSubshape(index,
@@ -660,156 +667,133 @@ class HloInstructionPatternBaseImpl {
 
 // An HloInstructionPattern implementation that matches only if the instruction
 // has a given name.
-template <typename Previous>
 class HloInstructionPatternNameImpl {
  public:
-  explicit HloInstructionPatternNameImpl(const Previous& previous,
-                                         absl::string_view name)
-      : previous_(previous), name_(name) {}
+  explicit HloInstructionPatternNameImpl(absl::string_view name)
+      : name_(name) {}
 
   bool Match(const ::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && inst->name() == name_;
+    return inst->name() == name_;
   }
 
  private:
-  Previous previous_;
   absl::string_view name_;
 };
 
 // An HloInstructionPattern implementation that matches only if the instruction
 // has a given opcode.
-template <typename Previous>
 class HloInstructionPatternOpcodeImpl {
  public:
-  explicit constexpr HloInstructionPatternOpcodeImpl(const Previous& previous,
-                                                     HloOpcode opcode,
+  explicit constexpr HloInstructionPatternOpcodeImpl(HloOpcode opcode,
                                                      bool invert)
-      : previous_(previous), opcode_(opcode), invert_(invert) {}
+      : opcode_(opcode), invert_(invert) {}
 
   bool Match(const ::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && (invert_ ^ (inst->opcode() == opcode_));
+    return (invert_ ^ (inst->opcode() == opcode_));
   }
 
  private:
-  Previous previous_;
   HloOpcode opcode_;
   bool invert_;
 };
 
 // An HloInstructionPattern implementation that matches only if the instruction
 // has a shape that matches a given pattern.
-template <typename Previous, typename ShapeType, typename ShapeImpl>
+template <typename ShapeType, typename ShapeImpl>
 class HloInstructionPatternShapeImpl {
  public:
   explicit constexpr HloInstructionPatternShapeImpl(
-      const Previous& previous, const ShapePattern<ShapeType, ShapeImpl>& shape)
-      : previous_(previous), shape_(shape) {}
+      const ShapePattern<ShapeType, ShapeImpl>& shape)
+      : shape_(shape) {}
 
   bool Match(const ::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && shape_.Match(&inst->shape());
+    return shape_.Match(&inst->shape());
   }
 
   bool Match(::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && shape_.Match(inst->mutable_shape());
+    return shape_.Match(inst->mutable_shape());
   }
 
  private:
-  Previous previous_;
   ShapePattern<ShapeType, ShapeImpl> shape_;
 };
 
 // An HloInstructionPattern implementation that matches only if the instruction
 // has an operand that matches a given pattern.
-template <typename Previous, typename OperandType, typename OperandImpl>
+template <typename OperandType, typename OperandImpl>
 class HloInstructionPatternOperandImpl {
  public:
   explicit constexpr HloInstructionPatternOperandImpl(
-      const Previous& previous, int64 operand_index,
+      int64 operand_index,
       const HloInstructionPattern<OperandType, OperandImpl>& operand)
-      : previous_(previous), operand_index_(operand_index), operand_(operand) {}
+      : operand_index_(operand_index), operand_(operand) {}
 
   bool Match(const ::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && operand_index_ < inst->operand_count() &&
+    return operand_index_ < inst->operand_count() &&
            operand_.Match(inst->operand(operand_index_));
   }
 
   bool Match(::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && operand_index_ < inst->operand_count() &&
+    return operand_index_ < inst->operand_count() &&
            operand_.Match(inst->mutable_operand(operand_index_));
   }
 
  private:
-  Previous previous_;
   int64 operand_index_;
   HloInstructionPattern<OperandType, OperandImpl> operand_;
 };
 
 // An HloInstructionPattern implementation that matches only if the instruction
 // is a fusion node with a particular kind.
-template <typename Previous>
 class HloInstructionPatternFusionKindImpl {
  public:
   explicit constexpr HloInstructionPatternFusionKindImpl(
-      const Previous& previous, ::xla::HloInstruction::FusionKind kind)
-      : previous_(previous), kind_(kind) {}
+      ::xla::HloInstruction::FusionKind kind)
+      : kind_(kind) {}
 
   bool Match(const ::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && inst->opcode() == HloOpcode::kFusion &&
-           inst->fusion_kind() == kind_;
+    return inst->opcode() == HloOpcode::kFusion && inst->fusion_kind() == kind_;
   }
 
   bool Match(::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) && inst->opcode() == HloOpcode::kFusion &&
-           inst->fusion_kind() == kind_;
+    return inst->opcode() == HloOpcode::kFusion && inst->fusion_kind() == kind_;
   }
 
  private:
-  Previous previous_;
   ::xla::HloInstruction::FusionKind kind_;
 };
 
 // An HloInstructionPattern implementation that matches only if the instruction
 // is a kGetTupleElement with a particular tuple index.
-template <typename Previous>
 class HloInstructionPatternTupleIndexImpl {
  public:
-  explicit constexpr HloInstructionPatternTupleIndexImpl(
-      const Previous& previous, int64 tuple_index)
-      : previous_(previous), tuple_index_(tuple_index) {}
+  explicit constexpr HloInstructionPatternTupleIndexImpl(int64 tuple_index)
+      : tuple_index_(tuple_index) {}
 
   bool Match(const ::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) &&
-           inst->opcode() == HloOpcode::kGetTupleElement &&
+    return inst->opcode() == HloOpcode::kGetTupleElement &&
            inst->tuple_index() == tuple_index_;
   }
 
   bool Match(::xla::HloInstruction* inst) const {
-    return previous_.Match(inst) &&
-           inst->opcode() == HloOpcode::kGetTupleElement &&
+    return inst->opcode() == HloOpcode::kGetTupleElement &&
            inst->tuple_index() == tuple_index_;
   }
 
  private:
-  Previous previous_;
   int64 tuple_index_;
 };
 
-template <typename Previous, typename ItemType, typename Predicate>
+template <typename ItemType, typename Predicate>
 class HloPredicatePatternImpl {
  public:
-  explicit HloPredicatePatternImpl(const Previous& previous, Predicate pred)
-      : previous_(previous), pred_(std::move(pred)) {}
+  explicit HloPredicatePatternImpl(Predicate pred) : pred_(std::move(pred)) {}
 
-  bool Match(const ItemType* item) const {
-    return previous_.Match(item) && pred_(item);
-  }
+  bool Match(const ItemType* item) const { return pred_(item); }
 
-  bool Match(ItemType* item) const {
-    return previous_.Match(item) && pred_(item);
-  }
+  bool Match(ItemType* item) const { return pred_(item); }
 
  private:
-  Previous previous_;
   Predicate pred_;
 };
 
@@ -818,6 +802,16 @@ struct PatternFriend;
 // A pattern that matches HloInstructions.
 template <typename HloInstructionType, typename Impl>
 class HloInstructionPattern {
+ private:
+  template <typename NewImpl>
+  HloInstructionPattern<HloInstructionType,
+                        AllOfPattern<::xla::HloInstruction, Impl, NewImpl>>
+  AppendImpl(NewImpl new_impl) const {
+    return HloInstructionPattern<
+        HloInstructionType, AllOfPattern<::xla::HloInstruction, Impl, NewImpl>>(
+        AllOf<HloInstruction>(impl_, std::move(new_impl)), matched_inst_);
+  }
+
  public:
   explicit constexpr HloInstructionPattern(const Impl& impl,
                                            HloInstructionType** matched_inst)
@@ -846,113 +840,83 @@ class HloInstructionPattern {
   }
 
   // Modifies the pattern to match only if the instruction has the given name.
-  HloInstructionPattern<HloInstructionType, HloInstructionPatternNameImpl<Impl>>
-  WithName(absl::string_view name) const {
-    return HloInstructionPattern<HloInstructionType,
-                                 HloInstructionPatternNameImpl<Impl>>(
-        HloInstructionPatternNameImpl<Impl>(impl_, name), matched_inst_);
+  auto WithName(absl::string_view name) const
+      -> decltype(this->AppendImpl(HloInstructionPatternNameImpl(name))) {
+    return AppendImpl(HloInstructionPatternNameImpl(name));
   }
 
   // Modifies the pattern to match only if the instruction has the given opcode.
-  constexpr HloInstructionPattern<HloInstructionType,
-                                  HloInstructionPatternOpcodeImpl<Impl>>
-  WithOpcode(HloOpcode opcode) const {
-    return HloInstructionPattern<HloInstructionType,
-                                 HloInstructionPatternOpcodeImpl<Impl>>(
-        HloInstructionPatternOpcodeImpl<Impl>(impl_, opcode, false),
-        matched_inst_);
+  auto WithOpcode(HloOpcode opcode) const
+      -> decltype(this->AppendImpl(HloInstructionPatternOpcodeImpl(opcode,
+                                                                   false))) {
+    return AppendImpl(HloInstructionPatternOpcodeImpl(opcode, false));
   }
 
   // Modifies the pattern to match only if the instruction does not have the
   // given opcode.
-  constexpr HloInstructionPattern<HloInstructionType,
-                                  HloInstructionPatternOpcodeImpl<Impl>>
-  WithoutOpcode(HloOpcode opcode) const {
-    return HloInstructionPattern<HloInstructionType,
-                                 HloInstructionPatternOpcodeImpl<Impl>>(
-        HloInstructionPatternOpcodeImpl<Impl>(impl_, opcode, true),
-        matched_inst_);
+  auto WithoutOpcode(HloOpcode opcode) const
+      -> decltype(this->AppendImpl(HloInstructionPatternOpcodeImpl(opcode,
+                                                                   true))) {
+    return AppendImpl(HloInstructionPatternOpcodeImpl(opcode, true));
   }
 
   // Modifies the pattern to match only if the instruction is a constant.
-  constexpr HloInstructionPattern<HloInstructionType,
-                                  HloInstructionPatternOpcodeImpl<Impl>>
-  IsConstant() const {
+  constexpr auto IsConstant() const
+      -> decltype(this->WithOpcode(HloOpcode::kConstant)) {
     return WithOpcode(HloOpcode::kConstant);
   }
 
   // Modifies the pattern to match only if the instruction is not a constant.
-  constexpr HloInstructionPattern<HloInstructionType,
-                                  HloInstructionPatternOpcodeImpl<Impl>>
-  IsNonConstant() const {
+  constexpr auto IsNonConstant() const
+      -> decltype(this->WithoutOpcode(HloOpcode::kConstant)) {
     return WithoutOpcode(HloOpcode::kConstant);
   }
 
   // Modifies the pattern to match only if the instruction has a shape that
   // matches the given pattern.
   template <typename ShapeType, typename ShapeImpl>
-  constexpr HloInstructionPattern<
-      HloInstructionType,
-      HloInstructionPatternShapeImpl<Impl, ShapeType, ShapeImpl>>
-  WithShape(const ShapePattern<ShapeType, ShapeImpl>& shape) const {
-    return HloInstructionPattern<
-        HloInstructionType,
-        HloInstructionPatternShapeImpl<Impl, ShapeType, ShapeImpl>>(
-        HloInstructionPatternShapeImpl<Impl, ShapeType, ShapeImpl>(impl_,
-                                                                   shape),
-        matched_inst_);
+  constexpr auto WithShape(const ShapePattern<ShapeType, ShapeImpl>& shape)
+      const -> decltype(this->AppendImpl(
+          HloInstructionPatternShapeImpl<ShapeType, ShapeImpl>(shape))) {
+    return AppendImpl(
+        HloInstructionPatternShapeImpl<ShapeType, ShapeImpl>(shape));
   }
 
   // Modifies the pattern to match only if the instruction has an operand that
   // matches the given pattern.
   template <typename OperandType, typename OperandImpl>
-  constexpr HloInstructionPattern<
-      HloInstructionType,
-      HloInstructionPatternOperandImpl<Impl, OperandType, OperandImpl>>
-  WithOperand(
+  constexpr auto WithOperand(
       int64 operand_index,
-      const HloInstructionPattern<OperandType, OperandImpl>& operand) const {
-    return HloInstructionPattern<
-        HloInstructionType,
-        HloInstructionPatternOperandImpl<Impl, OperandType, OperandImpl>>(
-        HloInstructionPatternOperandImpl<Impl, OperandType, OperandImpl>(
-            impl_, operand_index, operand),
-        matched_inst_);
+      const HloInstructionPattern<OperandType, OperandImpl>& operand) const
+      -> decltype(this->AppendImpl(
+          HloInstructionPatternOperandImpl<OperandType, OperandImpl>(
+              operand_index, operand))) {
+    return AppendImpl(
+        HloInstructionPatternOperandImpl<OperandType, OperandImpl>(
+            operand_index, operand));
   }
 
   // Modifies the pattern to match only if the instruction is a fusion node with
   // the given kind.
-  constexpr HloInstructionPattern<HloInstructionType,
-                                  HloInstructionPatternFusionKindImpl<Impl>>
-  WithFusionKind(HloInstruction::FusionKind kind) const {
-    return HloInstructionPattern<HloInstructionType,
-                                 HloInstructionPatternFusionKindImpl<Impl>>(
-        HloInstructionPatternFusionKindImpl<Impl>(impl_, kind), matched_inst_);
+  constexpr auto WithFusionKind(HloInstruction::FusionKind kind) const
+      -> decltype(this->AppendImpl(HloInstructionPatternFusionKindImpl(kind))) {
+    return AppendImpl(HloInstructionPatternFusionKindImpl(kind));
   }
 
   // Modifies the pattern to match only if the instruction is a
   // get-tuple-element with the given tuple index.
-  constexpr HloInstructionPattern<HloInstructionType,
-                                  HloInstructionPatternTupleIndexImpl<Impl>>
-  WithTupleIndex(int64 tuple_index) const {
-    return HloInstructionPattern<HloInstructionType,
-                                 HloInstructionPatternTupleIndexImpl<Impl>>(
-        HloInstructionPatternTupleIndexImpl<Impl>(impl_, tuple_index),
-        matched_inst_);
+  constexpr auto WithTupleIndex(int64 tuple_index) const -> decltype(
+      this->AppendImpl(HloInstructionPatternTupleIndexImpl(tuple_index))) {
+    return AppendImpl(HloInstructionPatternTupleIndexImpl(tuple_index));
   }
 
  private:
   template <typename Predicate>
-  constexpr HloInstructionPattern<
-      HloInstructionType,
-      HloPredicatePatternImpl<
-          Impl, typename std::remove_const<HloInstructionType>::type,
-          Predicate>>
-  WithPredicate(Predicate pred) const {
-    using NewImplType = HloPredicatePatternImpl<
-        Impl, typename std::remove_const<HloInstructionType>::type, Predicate>;
-    return HloInstructionPattern<HloInstructionType, NewImplType>(
-        NewImplType(impl_, std::move(pred)), matched_inst_);
+  constexpr auto WithPredicate(Predicate pred) const -> decltype(
+      this->AppendImpl(HloPredicatePatternImpl<HloInstruction, Predicate>(
+          std::move(pred)))) {
+    return AppendImpl(
+        HloPredicatePatternImpl<HloInstruction, Predicate>(std::move(pred)));
   }
 
   friend struct PatternFriend;
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
index b3a2c954b3..7bd27268aa 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
@@ -295,5 +295,28 @@ TEST(PatternMatcherTest, AnyOfShortCircuit) {
   }
 }
 
+TEST(PatternMatcherTest, AllOf) {
+  using match::AllOf;
+  using match::Broadcast;
+  using match::Constant;
+  using match::Op;
+
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module ENTRY test { ROOT constant = f16[] constant(1) })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  auto scalar_pattern = Constant().WithShape(match::Shape().IsScalar());
+  auto f16_pattern = Constant().WithShape(match::Shape().WithElementType(F16));
+  ASSERT_TRUE(Match(root, scalar_pattern));
+  ASSERT_TRUE(Match(root, f16_pattern));
+  EXPECT_TRUE(Match(root, AllOf<HloInstruction>(scalar_pattern, f16_pattern)));
+  EXPECT_TRUE(Match(root, AllOf<HloInstruction>(f16_pattern, scalar_pattern)));
+  EXPECT_FALSE(
+      Match(root, AllOf<HloInstruction>(Broadcast(Op()), f16_pattern)));
+  EXPECT_FALSE(
+      Match(root, AllOf<HloInstruction>(Broadcast(Op()), scalar_pattern)));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From f22484881fe1895ee77ec62b6493e015ca40e71a Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Fri, 21 Sep 2018 13:10:39 -0700
Subject: [PATCH 0515/1357] [XLA] Add a global decreasing size best-fit buffer
 allocation algorithm, which sorts buffers by size regardless of their
 alloc/free time. It uses a interval tree to avoid conflicting allocations.

Also changed to choose the best result from the new algorithm and the old one.

PiperOrigin-RevId: 214032637
---
 .../compiler/xla/service/buffer_assignment.cc |  26 ++-
 .../compiler/xla/service/heap_simulator.cc    | 204 ++++++++++++++++++
 .../compiler/xla/service/heap_simulator.h     |  62 ++++++
 .../xla/service/heap_simulator_test.cc        | 130 +++++++++++
 4 files changed, 414 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 65fa951afe..34a7be0e9c 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -1064,6 +1064,19 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
   // that seems to give the best results is lazy-best-fit, with all runs of
   // alloc / free calls sorted in decreasing size order.
   const HloOrdering& hlo_ordering = assignment->liveness().hlo_ordering();
+
+  // Returns a heap algorithm that chooses the best result from several
+  // algorithms.
+  auto get_heap_algorithm = [&](int64 alignment) {
+    auto algorithms =
+        absl::make_unique<std::vector<std::unique_ptr<HeapAlgorithm>>>();
+    algorithms->push_back(absl::make_unique<DecreasingSizeRunsHeap>(
+        absl::make_unique<LazyBestFitHeap>(alignment)));
+    algorithms->push_back(
+        absl::make_unique<GlobalDecreasingSizeBestFitHeap>(alignment));
+    return absl::make_unique<ChooseBestHeapAlgorithm>(std::move(algorithms));
+  };
+
   if (run_whole_module_heap_simulation) {
     // Run the heap simulation over the whole module. This reduces memory usage,
     // since buffers for kCall, kWhile, and kConditional sub-computations are
@@ -1093,8 +1106,7 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
       options.buffers_to_assign = &buffer_value_set;
       TF_ASSIGN_OR_RETURN(
           const HeapSimulator::Result result,
-          HeapSimulator::Run(absl::make_unique<DecreasingSizeRunsHeap>(
-                                 absl::make_unique<LazyBestFitHeap>(alignment)),
+          HeapSimulator::Run(get_heap_algorithm(alignment),
                              assignment->module(), schedule,
                              assignment->points_to_analysis(),
                              assignment->buffer_size_, options));
@@ -1123,12 +1135,10 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
         options.buffers_to_assign = &buffer_value_set;
         TF_ASSIGN_OR_RETURN(
             const HeapSimulator::Result result,
-            HeapSimulator::Run(
-                absl::make_unique<DecreasingSizeRunsHeap>(
-                    absl::make_unique<LazyBestFitHeap>(alignment)),
-                *computation, HloInstructionSequence(*instruction_sequence),
-                assignment->points_to_analysis(), assignment->buffer_size_,
-                options));
+            HeapSimulator::Run(get_heap_algorithm(alignment), *computation,
+                               HloInstructionSequence(*instruction_sequence),
+                               assignment->points_to_analysis(),
+                               assignment->buffer_size_, options));
         AssignBuffersFromHeapSimulator(result, assignment,
                                        single_colored_set.first);
       }
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index e0f3a7e0e2..a07eaaf997 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -736,4 +736,208 @@ HeapSimulator::Result LazyBestFitHeap::Finish() {
   return result_;
 }
 
+void GlobalDecreasingSizeBestFitHeap::Alloc(const BufferValue* buffer,
+                                            int64 size) {
+  // Degenerate case: 0-sized buffers are always allocated at offset 0.
+  if (size == 0) {
+    result_.chunk_map.emplace(buffer, Chunk{0, 0});
+    return;
+  }
+  auto emplace_result = buffer_intervals_.emplace(
+      buffer, BufferInterval{buffer, size, current_time_, -1});
+  DCHECK(emplace_result.second);
+  ++current_time_;
+}
+
+void GlobalDecreasingSizeBestFitHeap::Free(const BufferValue* buffer,
+                                           int64 size) {
+  // Degenerate case: 0-sized buffers are always allocated at offset 0.
+  if (size == 0) {
+    return;
+  }
+  BufferInterval& buffer_interval = FindOrDie(buffer_intervals_, buffer);
+  DCHECK_EQ(buffer_interval.buffer, buffer);
+  DCHECK_EQ(buffer_interval.size, size);
+  DCHECK_EQ(buffer_interval.end, -1);
+  buffer_interval.end = current_time_;
+  ++current_time_;
+}
+
+namespace {
+
+// Node in BufferIntervalTree that stores the alloc and free times of a buffer,
+// and the chunk assigned to it.
+struct BufferIntervalTreeNode {
+  // Alloc time.
+  int64 start;
+  // Free time.
+  int64 end;
+  // Maximum free time of all nodes in the subtree where this node is the root.
+  int64 subtree_end;
+  // Allocated chunk for the buffer.
+  HeapSimulator::Chunk chunk;
+  // Left child.
+  BufferIntervalTreeNode* left;
+  // Right child.
+  BufferIntervalTreeNode* right;
+};
+
+// An interval tree that can query buffers overlapping in time.
+class BufferIntervalTree {
+ public:
+  explicit BufferIntervalTree(int capacity) : node_storage_(capacity) {}
+
+  using Chunk = HeapSimulator::Chunk;
+
+  // Adds a buffer to the interval tree, with the time interval and allocated
+  // chunk specified.
+  void Add(int64 start, int64 end, const Chunk& chunk) {
+    int index = node_count_;
+    DCHECK_LT(index, node_storage_.size());
+    ++node_count_;
+
+    node_storage_[index] =
+        BufferIntervalTreeNode{start, end, end, chunk, nullptr, nullptr};
+
+    if (index == 0) {
+      // This is root.
+      return;
+    }
+
+    BufferIntervalTreeNode* parent = &node_storage_[0];
+    while (true) {
+      parent->subtree_end = std::max(parent->subtree_end, end);
+      if (parent->start > start) {
+        if (parent->left == nullptr) {
+          parent->left = &node_storage_[index];
+          return;
+        }
+        parent = parent->left;
+      } else {
+        if (parent->right == nullptr) {
+          parent->right = &node_storage_[index];
+          return;
+        }
+        parent = parent->right;
+      }
+    }
+  }
+
+  // Returns vector of allocated chunks that overlap with the given time
+  // interval.
+  std::vector<Chunk> ChunksOverlappingInTime(int64 start, int64 end) {
+    std::vector<Chunk> result;
+    if (node_count_ > 0) {
+      ChunksOverlappingInTimeHelper(start, end, &node_storage_[0], &result);
+    }
+    return result;
+  }
+
+ private:
+  void ChunksOverlappingInTimeHelper(int64 start, int64 end,
+                                     BufferIntervalTreeNode* visiting_node,
+                                     std::vector<Chunk>* result) {
+    if (start > visiting_node->subtree_end) {
+      return;
+    }
+    if (visiting_node->left != nullptr) {
+      ChunksOverlappingInTimeHelper(start, end, visiting_node->left, result);
+    }
+    if (visiting_node->start <= end && visiting_node->end >= start) {
+      result->push_back(visiting_node->chunk);
+    }
+    if (end < visiting_node->start) {
+      return;
+    }
+    if (visiting_node->right != nullptr) {
+      ChunksOverlappingInTimeHelper(start, end, visiting_node->right, result);
+    }
+  }
+
+  int64 node_count_ = 0;
+  std::vector<BufferIntervalTreeNode> node_storage_;
+};
+
+}  // namespace
+
+HeapSimulator::Result GlobalDecreasingSizeBestFitHeap::Finish() {
+  std::vector<BufferInterval> sorted_buffer_intervals;
+  for (auto& entry : buffer_intervals_) {
+    sorted_buffer_intervals.push_back(entry.second);
+  }
+  std::sort(sorted_buffer_intervals.begin(), sorted_buffer_intervals.end(),
+            [](const BufferInterval& x, const BufferInterval& y) {
+              if (x.size != y.size) {
+                return x.size > y.size;
+              }
+              if (x.end - x.start != y.end - y.start) {
+                return x.end - x.start > y.end - y.start;
+              }
+              return x.buffer->id() < y.buffer->id();
+            });
+
+  BufferIntervalTree interval_tree(sorted_buffer_intervals.size());
+  for (auto& buffer_interval : sorted_buffer_intervals) {
+    auto chunks_overlapping_in_time = interval_tree.ChunksOverlappingInTime(
+        buffer_interval.start, buffer_interval.end);
+    std::sort(
+        chunks_overlapping_in_time.begin(), chunks_overlapping_in_time.end(),
+        [](const Chunk& x, const Chunk& y) { return x.offset < y.offset; });
+
+    // Find the minimum free chunk that can hold this buffer.
+    Chunk min_fit_chunk{-1, INT64_MAX};
+    auto use_free_chunk_if_smaller = [&](int64 free_offset, int64 free_size) {
+      if (free_size < buffer_interval.size) {
+        return;
+      }
+
+      if (free_size < min_fit_chunk.size) {
+        min_fit_chunk = {free_offset, free_size};
+      }
+    };
+
+    int64 offset = 0;
+    for (auto& chunk : chunks_overlapping_in_time) {
+      if (offset < chunk.offset) {
+        use_free_chunk_if_smaller(offset, chunk.offset - offset);
+      }
+      offset =
+          std::max(offset, RoundUpToNearest(chunk.chunk_end(), alignment_));
+    }
+    use_free_chunk_if_smaller(offset, result_.heap_size - offset);
+
+    if (min_fit_chunk.offset == -1) {
+      // Increase the heap size to fit in the last free chunk.
+      result_.heap_size = offset + buffer_interval.size;
+      min_fit_chunk = {offset, buffer_interval.size};
+    }
+
+    min_fit_chunk.size = buffer_interval.size;
+    const auto emplace_result =
+        result_.chunk_map.emplace(buffer_interval.buffer, min_fit_chunk);
+    DCHECK(emplace_result.second);
+
+    interval_tree.Add(buffer_interval.start, buffer_interval.end,
+                      min_fit_chunk);
+  }
+  return result_;
+}
+
+HeapSimulator::Result ChooseBestHeapAlgorithm::Finish() {
+  DCHECK(!algorithms_.empty());
+  std::vector<Result> results(algorithms_.size());
+  int64 min_size = INT64_MAX;
+  int min_size_index = -1;
+  for (int i = 0; i < algorithms_.size(); ++i) {
+    results[i] = algorithms_[i]->Finish();
+    if (results[i].heap_size < min_size) {
+      min_size = results[i].heap_size;
+      min_size_index = i;
+    }
+  }
+
+  DCHECK_GE(min_size_index, 0);
+  return results[min_size_index];
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index ffbf947d5a..7d6dcc0dc9 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -351,6 +351,68 @@ class LazyBestFitHeap : public HeapAlgorithm {
   std::set<Chunk, OrderChunkByIncreasingSize> free_;
 };
 
+// GlobalDecreasingSizeBestFitHeap collects the live intervals of all buffers,
+// then allocates them in decreasing sizes regardless of the alloc/free time. It
+// internally tracks the allocated buffers and their live intervals; when
+// allocating a buffer, it finds the best-fit free chunk during its live
+// interval.
+class GlobalDecreasingSizeBestFitHeap : public HeapAlgorithm {
+ public:
+  GlobalDecreasingSizeBestFitHeap(int64 alignment) : alignment_(alignment) {}
+  ~GlobalDecreasingSizeBestFitHeap() override {}
+
+  void Alloc(const BufferValue* buffer, int64 size) override;
+  void Free(const BufferValue* buffer, int64 size) override;
+  Result Finish() override;
+
+ private:
+  int64 alignment_;
+  Result result_;
+
+  // The current time represented as an integer. It increments by 1 at each
+  // Alloc or Free call.
+  int64 current_time_ = 0;
+
+  // BufferInterval stores a buffer's size and time interval.
+  struct BufferInterval {
+    const BufferValue* buffer;
+    int64 size;
+    // Alloc time of the buffer.
+    int64 start;
+    // Free time of the buffer.
+    int64 end;
+  };
+  tensorflow::gtl::FlatMap<const BufferValue*, BufferInterval>
+      buffer_intervals_;
+};
+
+// A heap algorithm that chooses the best results from other algorithms added to
+// it.
+class ChooseBestHeapAlgorithm : public HeapAlgorithm {
+ public:
+  ChooseBestHeapAlgorithm(
+      std::unique_ptr<std::vector<std::unique_ptr<HeapAlgorithm>>> algorithms)
+      : algorithms_(std::move(*algorithms)) {}
+  ~ChooseBestHeapAlgorithm() override {}
+
+  void Alloc(const BufferValue* buffer, int64 size) override {
+    for (auto& algorithm : algorithms_) {
+      algorithm->Alloc(buffer, size);
+    }
+  }
+
+  void Free(const BufferValue* buffer, int64 size) override {
+    for (auto& algorithm : algorithms_) {
+      algorithm->Free(buffer, size);
+    }
+  }
+
+  Result Finish() override;
+
+ private:
+  std::vector<std::unique_ptr<HeapAlgorithm>> algorithms_;
+};
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HEAP_SIMULATOR_H_
diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc
index 957c4a6891..191fbf8194 100644
--- a/tensorflow/compiler/xla/service/heap_simulator_test.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc
@@ -1021,5 +1021,135 @@ TEST_F(LazyBestFitHeapTest, Alignment) {
   EXPECT_EQ(128, result.chunk_map.at(buffer_e_).offset);
 }
 
+class GlobalDecreasingSizeBestFitHeapTest : public HeapAlgorithmTestBase {};
+
+TEST_F(GlobalDecreasingSizeBestFitHeapTest, Empty) {
+  GlobalDecreasingSizeBestFitHeap heap(/*alignment=*/1);
+  const HeapSimulator::Result result = heap.Finish();
+  EXPECT_EQ(0, result.heap_size);
+  EXPECT_EQ(0, result.chunk_map.size());
+}
+
+TEST_F(GlobalDecreasingSizeBestFitHeapTest, DecreasingSize) {
+  // space
+  //   ^
+  //   |  +---a---+
+  //   |      +-------+
+  //   |      +---c---+
+  //   |    +-------+
+  //   |    |   b   |
+  //   |    +-------+
+  //   |         +-------+
+  //   |         |       |
+  //   |         |   d   |
+  //   |         +-------+
+  //   -----------------> time
+  GlobalDecreasingSizeBestFitHeap heap(/*alignment=*/1);
+  heap.Alloc(buffer_a_, 10);
+  heap.Alloc(buffer_b_, 30);
+  heap.Alloc(buffer_c_, 20);
+  heap.Alloc(buffer_d_, 40);
+  heap.Free(buffer_a_, 10);
+  heap.Free(buffer_b_, 30);
+  heap.Free(buffer_c_, 20);
+  heap.Free(buffer_d_, 40);
+
+  const HeapSimulator::Result result = heap.Finish();
+  EXPECT_EQ(100, result.heap_size);
+  EXPECT_EQ(10, result.chunk_map.at(buffer_a_).size);
+  EXPECT_EQ(30, result.chunk_map.at(buffer_b_).size);
+  EXPECT_EQ(20, result.chunk_map.at(buffer_c_).size);
+  EXPECT_EQ(40, result.chunk_map.at(buffer_d_).size);
+
+  EXPECT_EQ(90, result.chunk_map.at(buffer_a_).offset);
+  EXPECT_EQ(40, result.chunk_map.at(buffer_b_).offset);
+  EXPECT_EQ(70, result.chunk_map.at(buffer_c_).offset);
+  EXPECT_EQ(0, result.chunk_map.at(buffer_d_).offset);
+}
+
+TEST_F(GlobalDecreasingSizeBestFitHeapTest, DecreasingSizeWithAlignment) {
+  // space
+  //   ^
+  //   |      +-------+
+  //   |      +---b---+
+  //   |            +-------+
+  //   |            |       |
+  //   |            |   d   |
+  //   |  +---a---+ +-------+
+  //   |
+  //   |         +-------+
+  //   |         |       |
+  //   |         |   c   |
+  //   |         |       |
+  //   |         +-------+
+  //   ---------------------> time
+  GlobalDecreasingSizeBestFitHeap heap(/*alignment=*/20);
+  heap.Alloc(buffer_a_, 10);
+  heap.Alloc(buffer_b_, 20);
+  heap.Alloc(buffer_c_, 50);
+  heap.Free(buffer_a_, 10);
+  heap.Alloc(buffer_d_, 40);
+  heap.Free(buffer_b_, 20);
+  heap.Free(buffer_c_, 50);
+  heap.Free(buffer_d_, 40);
+
+  const HeapSimulator::Result result = heap.Finish();
+  EXPECT_EQ(120, result.heap_size);
+  EXPECT_EQ(10, result.chunk_map.at(buffer_a_).size);
+  EXPECT_EQ(20, result.chunk_map.at(buffer_b_).size);
+  EXPECT_EQ(50, result.chunk_map.at(buffer_c_).size);
+  EXPECT_EQ(40, result.chunk_map.at(buffer_d_).size);
+
+  EXPECT_EQ(60, result.chunk_map.at(buffer_a_).offset);
+  EXPECT_EQ(100, result.chunk_map.at(buffer_b_).offset);
+  EXPECT_EQ(0, result.chunk_map.at(buffer_c_).offset);
+  EXPECT_EQ(60, result.chunk_map.at(buffer_d_).offset);
+}
+
+TEST_F(GlobalDecreasingSizeBestFitHeapTest, BestFit) {
+  // space
+  //   ^
+  //   |    +-------+
+  //   |    +---b---+
+  //   |         +-------+
+  //   |         |   d   |
+  //   | +--a--+ +-------+
+  //   |      +-------+
+  //   |      |       |
+  //   |      |   c   |
+  //   |      +-------+
+  //   |           +-------+
+  //   |           |       |
+  //   |           |   e   |
+  //   |           |       |
+  //   |           +-------+
+  //   ---------------------> time
+  GlobalDecreasingSizeBestFitHeap heap(/*alignment=*/1);
+  heap.Alloc(buffer_a_, 10);
+  heap.Alloc(buffer_b_, 20);
+  heap.Alloc(buffer_c_, 40);
+  heap.Free(buffer_a_, 10);
+  heap.Alloc(buffer_d_, 30);
+  heap.Alloc(buffer_e_, 50);
+  heap.Free(buffer_b_, 20);
+  heap.Free(buffer_c_, 40);
+  heap.Free(buffer_d_, 30);
+  heap.Free(buffer_e_, 50);
+
+  const HeapSimulator::Result result = heap.Finish();
+  EXPECT_EQ(140, result.heap_size);
+  EXPECT_EQ(10, result.chunk_map.at(buffer_a_).size);
+  EXPECT_EQ(20, result.chunk_map.at(buffer_b_).size);
+  EXPECT_EQ(40, result.chunk_map.at(buffer_c_).size);
+  EXPECT_EQ(30, result.chunk_map.at(buffer_d_).size);
+  EXPECT_EQ(50, result.chunk_map.at(buffer_e_).size);
+
+  EXPECT_EQ(90, result.chunk_map.at(buffer_a_).offset);
+  EXPECT_EQ(120, result.chunk_map.at(buffer_b_).offset);
+  EXPECT_EQ(50, result.chunk_map.at(buffer_c_).offset);
+  EXPECT_EQ(90, result.chunk_map.at(buffer_d_).offset);
+  EXPECT_EQ(0, result.chunk_map.at(buffer_e_).offset);
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 86b4d8e65c62ff0be930e8c179f077cb83666aff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 13:27:50 -0700
Subject: [PATCH 0516/1357] Don't crash on Pack nodes with no axis argument
 set.

PiperOrigin-RevId: 214035048
---
 .../grappler/optimizers/constant_folding.cc   |  3 ++-
 .../optimizers/constant_folding_test.cc       | 23 ++++++++++++++-----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index cfbd298f11..ca5d3a6dfd 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -2106,7 +2106,8 @@ bool ConstantFolding::SimplifyPack(GraphDef* optimized_graph, NodeDef* node) {
     Tensor axis_t(DT_INT32, TensorShape({}));
     NodeDef* axis_node = optimized_graph->add_node();
     axis_node->set_name(OptimizedNodeName(*node, "_const_axis"));
-    const int axis = node->attr().at("axis").i();
+    const int axis =
+        node->attr().count("axis") == 0 ? 0 : node->attr().at("axis").i();
     if (!SetTensorValue(DT_INT32, axis, &axis_t).ok() ||
         !CreateNodeDef(axis_node->name(), TensorValue(&axis_t), axis_node)
              .ok()) {
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 2a19b3f95a..b09360a2c2 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -3015,37 +3015,48 @@ TEST_F(ConstantFoldingTest, TrivialPack) {
   auto stack =
       ops::Stack(scope.WithOpName("stack").WithControlDependencies({y}), {x},
                  ops::Stack::Axis(1));
+  auto stack_no_axis = ops::Stack(scope.WithOpName("stack_no_axis"), {x});
 
   GrapplerItem item;
   TF_CHECK_OK(scope.ToGraphDef(&item.graph));
-  item.fetch.push_back("stack");
+  item.fetch = {"stack", "stack_no_axis"};
 
   ConstantFolding optimizer(nullptr /* cpu_device */);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-  EXPECT_EQ(5, output.node_size());
+  EXPECT_EQ(7, output.node_size());
+  int found = 0;
   for (const auto& node : output.node()) {
     if (node.name() == "stack") {
-      EXPECT_EQ("stack", node.name());
       EXPECT_EQ("ExpandDims", node.op());
       EXPECT_EQ(3, node.input_size());
       EXPECT_EQ("x", node.input(0));
       EXPECT_EQ("ConstantFolding/stack_const_axis", node.input(1));
       EXPECT_EQ("^y", node.input(2));
+      ++found;
+    } else if (node.name() == "stack_no_axis") {
+      EXPECT_EQ("ExpandDims", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("ConstantFolding/stack_no_axis_const_axis", node.input(1));
+      ++found;
     } else if (node.name() == "ConstantFolding/stack_const_axis") {
       EXPECT_EQ("Const", node.op());
       EXPECT_EQ(1, node.input_size());
       EXPECT_EQ("^x", node.input(0));
+      ++found;
     }
   }
+  EXPECT_EQ(found, 3);
 
-  std::vector<string> fetch = {"stack"};
+  std::vector<string> fetch = {"stack", "stack_no_axis"};
   auto tensors_expected = EvaluateNodes(item.graph, fetch);
   auto tensors = EvaluateNodes(output, fetch);
-  EXPECT_EQ(1, tensors_expected.size());
-  EXPECT_EQ(1, tensors.size());
+  EXPECT_EQ(2, tensors_expected.size());
+  EXPECT_EQ(2, tensors.size());
   EXPECT_EQ(tensors_expected[0].shape(), tensors[0].shape());
+  EXPECT_EQ(tensors_expected[1].shape(), tensors[1].shape());
 }
 
 // The test does not evalute the optimized and original graphs to check if their
-- 
GitLab


From 3f40afa0409a2b22ff5a2e735418da7724aca0e8 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Fri, 21 Sep 2018 14:00:57 -0700
Subject: [PATCH 0517/1357] Document custom contraction mappers for
 convolutions.

(1) Fix packed rhs memory layout documentation.
(2) Add documentation for custom contraction mapper in spatial_convolutions.h.

PiperOrigin-RevId: 214039947
---
 .../core/kernels/eigen_cuboid_convolution.h   | 81 +++++++++--------
 .../core/kernels/eigen_spatial_convolutions.h | 90 ++++++++++++++++---
 2 files changed, 122 insertions(+), 49 deletions(-)

diff --git a/tensorflow/core/kernels/eigen_cuboid_convolution.h b/tensorflow/core/kernels/eigen_cuboid_convolution.h
index c41fbc42d3..37414ddca3 100644
--- a/tensorflow/core/kernels/eigen_cuboid_convolution.h
+++ b/tensorflow/core/kernels/eigen_cuboid_convolution.h
@@ -40,8 +40,8 @@ namespace internal {
 // at the given vertical and horizontal offsets.
 //
 // "Virtual matrix" dimensions:
-//   *0: kernelChannels * kernelDepth * kernelRows * kernelCols;
-//    1: out_depth * out_height * out_width; * OTHERS (e.g batches, etc...)
+//   *0: kernelChannels * kernelPlanes * kernelRows * kernelCols
+//    1: out_planes * out_height * out_width * OTHERS (e.g batches, etc...)
 //
 // *) extracted patches are continuous in memory (innermost dimension assuming
 //    col major layout)
@@ -391,14 +391,13 @@ class TensorContractionInputMapper<
     const Index patchOffset = patchId / m_fastDimZero;
 
     const Index colOffset = patchOffset / m_fastColStride;
-    const Index inputCol = colIndex + colOffset;
-
     const Index rowOffset =
         (patchOffset - colOffset * m_colStride) / m_fastRowStride;
-    const Index inputRow = rowIndex + rowOffset;
-
     const Index planeOffset =
         patchOffset - colOffset * m_colStride - rowOffset * m_rowStride;
+
+    const Index inputCol = colIndex + colOffset;
+    const Index inputRow = rowIndex + rowOffset;
     const Index inputPlane = planeIndex + planeOffset;
 
     if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 ||
@@ -524,12 +523,13 @@ class TensorContractionInputMapper<
     eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset);
 
     const Index colOffset = patchOffset / m_fastColStride;
-    const Index inputCol = colIndex + colOffset;
     const Index rowOffset =
         (patchOffset - colOffset * m_colStride) / m_fastRowStride;
-    const Index inputRow = rowIndex + rowOffset;
     const Index planeOffset =
         patchOffset - colOffset * m_colStride - rowOffset * m_rowStride;
+
+    const Index inputCol = colIndex + colOffset;
+    const Index inputRow = rowIndex + rowOffset;
     const Index inputPlane = planeIndex + planeOffset;
 
     if (inputCol < 0 || inputRow < 0 || inputPlane < 0 ||
@@ -564,7 +564,7 @@ class TensorContractionInputMapper<
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void computeBaseIndices(
       Index patchIndex, Index& planeIndex, Index& rowIndex, Index& colIndex,
       Index& otherIndex) const {
-    const int NumInputDims = array_size<
+    const size_t NumInputDims = array_size<
         typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
 
     // Check if patchIndex might contain batch and other dimensions.
@@ -859,24 +859,29 @@ class TensorContractionSubMapper<
 // matrix" constructed from extracted volume patches) in contiguous memory.
 //
 // Given column major input (A0 beside A1 in memory):
-// A0 B0 C0 D0 E0 F0 G0 H0 ...
-// A1 B1 C1 D1 E1 F1 G1 H1 ...
-// A2 B2 C2 D2 E2 F2 G2 H2 ...
-// A3 B3 C3 D3 E3 F3 G3 H3 ...
-// A4 B4 C4 D4 E4 F4 G4 H4 ...
-// A5 B5 C5 D5 E5 F5 G5 H5 ...
-// A6 B6 C6 D6 E6 F6 G6 H6 ...
-// A7 B7 C7 D7 E7 F7 G7 H7 ...
+// A0 B0 C0 D0 E0 F0 G0 H0 ... Z0
+// A1 B1 C1 D1 E1 F1 G1 H1 ... Z1
+// A2 B2 C2 D2 E2 F2 G2 H2 ... Z2
+// A3 B3 C3 D3 E3 F3 G3 H3 ... Z3
+// A4 B4 C4 D4 E4 F4 G4 H4 ... Z4
+// A5 B5 C5 D5 E5 F5 G5 H5 ... Z5
+// A6 B6 C6 D6 E6 F6 G6 H6 ... Z6
+// A7 B7 C7 D7 E7 F7 G7 H7 ... Z7
 // A8 ...
 // ...
 //
-// Packing yields row major output (A0 beside A1 in memory):
-// A0 A1 A2 A3 A4 A5 A6 A7
-// B0 B1 B2 B3 B4 B5 B6 B7
-// C0 ...
+// *) A, B, C, ... - patches extracted from the original input.
+// *) A0, A1, A2 ... - values from the same patch at different offsets.
+//
+// The traversal (packed rhs memory) order (B0 besides A0 in memory):
+// A0 B0 C0 D0 A1 B1 C1 D1 ...
+// E0 F0 G0 H0 E1 F1 G1 H1 ...
 // ...
+// Z0 Z1 Z2 Z3 Z4 Z5 Z6 Z7 ... <- doesn't belong to any block (nr = 4)
+//
+// This traversal order must be the same as in default gemm_pack_rhs defined in
+// GeneralBlockPanelKernel.h.
 //
-// *) A, B, C, ... - patches extracted from the original input.
 // *) nr - number of registers along the 'n' dimension.
 //    See GeneralBlockPanelKernel.h and "Anatomy of High-Performance Matrix
 //    Multiplication" paper.
@@ -1454,7 +1459,7 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
       isColMajor ? kern.dimensions()[1] : kern.dimensions()[3];
 
   // Spatial size of the kernel.
-  const TensorIndex kernelDepth =
+  const TensorIndex kernelPlanes =
       isColMajor ? kern.dimensions()[2] : kern.dimensions()[2];
   const TensorIndex kernelRows =
       isColMajor ? kern.dimensions()[3] : kern.dimensions()[1];
@@ -1474,27 +1479,27 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
   const TensorIndex inputCols =
       isColMajor ? in.dimension(3) : in.dimension(NumDims - 4);
 
-  TensorIndex out_depth;
+  TensorIndex out_planes;
   TensorIndex out_height;
   TensorIndex out_width;
   switch (padding_type) {
     case PADDING_VALID:
-      out_depth = Eigen::divup(inputPlanes - kernelDepth + 1,
-                               static_cast<TensorIndex>(stridePlanes));
+      out_planes = Eigen::divup(inputPlanes - kernelPlanes + 1,
+                                static_cast<TensorIndex>(stridePlanes));
       out_height = Eigen::divup(inputRows - kernelRows + 1,
                                 static_cast<TensorIndex>(strideRows));
       out_width = Eigen::divup(inputCols - kernelCols + 1,
                                static_cast<TensorIndex>(strideCols));
       break;
     case PADDING_SAME:
-      out_depth =
+      out_planes =
           Eigen::divup(inputPlanes, static_cast<TensorIndex>(stridePlanes));
       out_height =
           Eigen::divup(inputRows, static_cast<TensorIndex>(strideRows));
       out_width = Eigen::divup(inputCols, static_cast<TensorIndex>(strideCols));
       break;
     default:
-      out_depth = 0;
+      out_planes = 0;
       out_height = 0;
       out_width = 0;
       eigen_assert(false && "unexpected padding");
@@ -1503,9 +1508,9 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
   DSizes<TensorIndex, 2> kernel_dims;
   if (isColMajor) {
     kernel_dims[0] = kernelFilters;
-    kernel_dims[1] = kernelChannels * kernelDepth * kernelRows * kernelCols;
+    kernel_dims[1] = kernelChannels * kernelPlanes * kernelRows * kernelCols;
   } else {
-    kernel_dims[0] = kernelChannels * kernelDepth * kernelRows * kernelCols;
+    kernel_dims[0] = kernelChannels * kernelPlanes * kernelRows * kernelCols;
     kernel_dims[1] = kernelFilters;
   }
 
@@ -1516,15 +1521,15 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
   DSizes<TensorIndex, 2> pre_contract_dims;
   if (isColMajor) {
     pre_contract_dims[0] =
-        kernelChannels * kernelDepth * kernelRows * kernelCols;
-    pre_contract_dims[1] = out_depth * out_height * out_width;
+        kernelChannels * kernelPlanes * kernelRows * kernelCols;
+    pre_contract_dims[1] = out_planes * out_height * out_width;
     for (int i = 4; i < NumDims; ++i) {
       pre_contract_dims[1] *= in.dimension(i);
     }
   } else {
     pre_contract_dims[1] =
-        kernelChannels * kernelDepth * kernelRows * kernelCols;
-    pre_contract_dims[0] = out_depth * out_height * out_width;
+        kernelChannels * kernelPlanes * kernelRows * kernelCols;
+    pre_contract_dims[0] = out_planes * out_height * out_width;
     for (int i = 0; i < NumDims - 4; ++i) {
       pre_contract_dims[0] *= in.dimension(i);
     }
@@ -1543,7 +1548,7 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
   DSizes<TensorIndex, NumDims> post_contract_dims;
   if (isColMajor) {
     post_contract_dims[0] = kernelFilters;
-    post_contract_dims[1] = out_depth;
+    post_contract_dims[1] = out_planes;
     post_contract_dims[2] = out_height;
     post_contract_dims[3] = out_width;
     for (int i = 4; i < NumDims; ++i) {
@@ -1551,7 +1556,7 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
     }
   } else {
     post_contract_dims[NumDims - 1] = kernelFilters;
-    post_contract_dims[NumDims - 2] = out_depth;
+    post_contract_dims[NumDims - 2] = out_planes;
     post_contract_dims[NumDims - 3] = out_height;
     post_contract_dims[NumDims - 4] = out_width;
     for (int i = 0; i < NumDims - 4; ++i) {
@@ -1564,13 +1569,13 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
       kernel.reshape(kernel_dims)
           .contract(input
                         .extract_volume_patches(
-                            kernelDepth, kernelRows, kernelCols, stridePlanes,
+                            kernelPlanes, kernelRows, kernelCols, stridePlanes,
                             strideRows, strideCols, padding_type)
                         .reshape(pre_contract_dims),
                     contract_dims)
           .reshape(post_contract_dims),
       input
-          .extract_volume_patches(kernelDepth, kernelRows, kernelCols,
+          .extract_volume_patches(kernelPlanes, kernelRows, kernelCols,
                                   stridePlanes, strideRows, strideCols,
                                   padding_type)
           .reshape(pre_contract_dims)
diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h
index a4dff4b91c..8bd362db45 100644
--- a/tensorflow/core/kernels/eigen_spatial_convolutions.h
+++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h
@@ -22,8 +22,36 @@ namespace Eigen {
 
 namespace internal {
 
-// TODO: Consolidate this part of the code with the image patch extraction code
-// since they are both very similar.
+// WARNING: Most of the code here implicitly assumes that the matrix is in
+// ColMajor layout. This is guaranteed by the tensor contraction (see
+// TensorContraction.h).
+//
+// Inside Eigen a tensor contraction is represented by a matrix multiplication.
+// We don't want to actually extract image patches and reshape the result into
+// a matrix (this involves allocating huge extra memory), so the patch
+// extraction and reshape operations are implicit.
+//
+// TensorContractionInputMapper takes a matrix index and returns the coefficient
+// (or the packet) of the "virtual tensor", that would be at that index if we
+// were to actually reshape the result of patch extraction.
+//
+// TensorContractionSubMapper provides a similar view into the "virtual matrix"
+// at the given vertical and horizontal offsets.
+//
+// "Virtual matrix" dimensions:
+//   *0: kernelChannels * kernelRows * kernelCols;
+//    1: out_height * out_width; * OTHERS (e.g batches, etc...)
+//
+// *) extracted patches are continuous in memory (innermost dimension assuming
+//    col major layout)
+//
+// With this dimensions:
+//   row - offset within a single patch (in code: patchId)
+//   col - index of the extracted patch (in code: patchIndex)
+//         patchIndex ∈ [0..num_patches * OTHERS] (batch and other dimensions)
+//
+// TODO(ezhulenev): Consolidate this part of the code with the image patch
+// extraction code since they are both very similar.
 template <typename NewDimension, DenseIndex Rows, DenseIndex Cols,
           typename ArgType, typename Device, typename Scalar_, typename Index,
           typename nocontract_t, typename contract_t, int Side, int packet_size,
@@ -238,6 +266,8 @@ class TensorContractionInputMapper<
       nocontract_t, contract_t, packet_size, inner_dim_contiguous,
       inner_dim_reordered, Alignment>;
 
+  // Load coefficient from a patch specified by the "within patch offset"
+  // (patchId) and the precomputed indices of the first element of the patch.
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE Scalar loadCoeff(Index patchId, Index rowIndex,
                                        Index colIndex, Index otherIndex) const {
@@ -250,6 +280,7 @@ class TensorContractionInputMapper<
         (m_patch_col_inflate_strides == 1)
             ? inputCol
             : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
+
     const Index rowOffset = patchOffset - colOffset * m_colStride;
     const Index inputRow = rowIndex + rowOffset * m_in_row_strides;
     const Index origInputRow =
@@ -268,6 +299,8 @@ class TensorContractionInputMapper<
     return m_impl.coeff(inputIndex);
   }
 
+  // This is the same as loadCoeff(...), but optimized for all `inflate_strides`
+  // and `in_strides` equal to 1 (template specialization without templates).
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE Scalar loadCoeffStandard(Index patchId, Index rowIndex,
                                                Index colIndex,
@@ -276,10 +309,9 @@ class TensorContractionInputMapper<
 
     // Find the offset of the element wrt the location of the first element.
     const Index patchOffset = patchId / m_fastDimZero;
-
     const Index colOffset = patchOffset / m_fastColStride;
-    const Index inputCol = colIndex + colOffset;
     const Index rowOffset = patchOffset - colOffset * m_colStride;
+    const Index inputCol = colIndex + colOffset;
     const Index inputRow = rowIndex + rowOffset;
     if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 ||
         inputRow >= m_inputRows) {
@@ -291,6 +323,8 @@ class TensorContractionInputMapper<
     return m_impl.coeff(inputIndex);
   }
 
+  // Load packet from a patch specified by the "within patch offset"
+  // (patchId) and the precomputed indices of the first element of the patch.
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Packet loadPacket(Index patchId, Index rowIndex,
                                         Index colIndex,
@@ -318,12 +352,14 @@ class TensorContractionInputMapper<
     if ((patchDepth() % packetSize) == 0) {
       return loadPacketFast(patchId, rowIndex, colIndex, otherIndex);
     } else {
+      // Offsets and input calculation here are identical to
+      // loadCoeffStandard(...), but repeated twice.
+
       const Index patchOffsets[2] = {
           patchId / m_fastDimZero, (patchId + packetSize - 1) / m_fastDimZero};
 
       const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride,
                                    patchOffsets[1] / m_fastColStride};
-
       const Index inputCols[2] = {colIndex + colOffsets[0],
                                   colIndex + colOffsets[1]};
       if (inputCols[0] >= m_inputCols || inputCols[1] < 0) {
@@ -371,8 +407,8 @@ class TensorContractionInputMapper<
     eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset);
 
     const Index colOffset = patchOffset / m_fastColStride;
-    const Index inputCol = colIndex + colOffset;
     const Index rowOffset = patchOffset - colOffset * m_colStride;
+    const Index inputCol = colIndex + colOffset;
     const Index inputRow = rowIndex + rowOffset;
     if (inputCol < 0 || inputRow < 0 || inputCol >= m_inputCols ||
         inputRow >= m_inputRows) {
@@ -401,7 +437,7 @@ class TensorContractionInputMapper<
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void computeBaseIndices(
       Index patchIndex, Index& rowIndex, Index& colIndex,
       Index& otherIndex) const {
-    const int NumInputDims = array_size<
+    const size_t NumInputDims = array_size<
         typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
     otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches;
     const Index patch2DIndex = (NumInputDims == 3)
@@ -617,12 +653,44 @@ class TensorContractionSubMapper<
   Index m_depth_offset;               // First row in the input matrix
   Index m_col_offset;                 // First col in the input matrix
 
-  Index m_rowIndex;  // precomputed row index corresponding to the col offset
-  Index m_colIndex;  // precomputed col index corresponding to the col offset
-  Index
-      m_otherIndex;  // precomputed other index corresponding to the col offset
+  // Knowing that: col_offset == patchIndex * OTHERS, we keep precomputed base
+  // indices for the first element in a patch specified by col_offset
+  // (see computeBaseIndices(...) for details).
+  Index m_rowIndex;
+  Index m_colIndex;
+  Index m_otherIndex;
 };
 
+// Arrange a block of the right input matrix (in our case it's always a "virtual
+// matrix" constructed from extracted image patches) in contiguous memory.
+//
+// Given column major input (A0 beside A1 in memory):
+// A0 B0 C0 D0 E0 F0 G0 H0 ... Z0
+// A1 B1 C1 D1 E1 F1 G1 H1 ... Z1
+// A2 B2 C2 D2 E2 F2 G2 H2 ... Z2
+// A3 B3 C3 D3 E3 F3 G3 H3 ... Z3
+// A4 B4 C4 D4 E4 F4 G4 H4 ... Z4
+// A5 B5 C5 D5 E5 F5 G5 H5 ... Z5
+// A6 B6 C6 D6 E6 F6 G6 H6 ... Z6
+// A7 B7 C7 D7 E7 F7 G7 H7 ... Z7
+// A8 ...
+// ...
+//
+// *) A, B, C, ... - patches extracted from the original input.
+// *) A0, A1, A2 ... - values from the same patch at different offsets.
+//
+// The traversal (packed rhs memory) order (B0 besides A0 in memory):
+// A0 B0 C0 D0 A1 B1 C1 D1 ...
+// E0 F0 G0 H0 E1 F1 G1 H1 ...
+// ...
+// Z0 Z1 Z2 Z3 Z4 Z5 Z6 Z7 ... <- doesn't belong to any block (nr = 4)
+//
+// This traversal order must be the same as in default gemm_pack_rhs defined in
+// GeneralBlockPanelKernel.h.
+//
+// *) nr - number of registers along the 'n' dimension.
+//    See GeneralBlockPanelKernel.h and "Anatomy of High-Performance Matrix
+//    Multiplication" paper.
 template <typename NewDimension, DenseIndex Rows, DenseIndex Cols,
           typename ArgType, typename Device, typename Scalar, typename Index,
           typename nocontract_t, typename contract_t, int packet_size,
-- 
GitLab


From f4de7ec889311c42b3af4d5f34f7d31f56f73177 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Fri, 21 Sep 2018 14:05:14 -0700
Subject: [PATCH 0518/1357] Fixes a bug for the case when the
 MultiDeviceIterator waits on background thread to finish even if None is
 running.

PiperOrigin-RevId: 214040824
---
 .../contrib/data/kernels/prefetching_kernels.cc     | 13 ++++++++++++-
 .../python/kernel_tests/prefetching_ops_test.py     |  9 +++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc
index 078de717e0..39f23f7b24 100644
--- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc
+++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc
@@ -621,7 +621,13 @@ class MultiDeviceIterator : public ResourceBase {
           incarnation_id_(incarnation_id),
           host_iterator_(std::move(host_iterator)) {}
 
-    ~MultiDeviceBuffer() { Reset(); }
+    ~MultiDeviceBuffer() {
+      {
+        mutex_lock l(mu_);
+        if (!background_thread_started_) return;
+      }
+      Reset();
+    }
 
     void Reset() LOCKS_EXCLUDED(mu_) {
       {
@@ -731,6 +737,10 @@ class MultiDeviceIterator : public ResourceBase {
     }
 
     void BackgroundThread(IteratorContext* ctx) {
+      {
+        mutex_lock l(mu_);
+        background_thread_started_ = true;
+      }
       std::unique_ptr<IteratorContext> cleanup(ctx);
       int shard_to_fetch = 0;
       while (true) {
@@ -799,6 +809,7 @@ class MultiDeviceIterator : public ResourceBase {
     mutex mu_;
     std::unique_ptr<Thread> background_thread_ GUARDED_BY(mu_);
     bool background_thread_finished_ GUARDED_BY(mu_) = false;
+    bool background_thread_started_ GUARDED_BY(mu_) = false;
     bool cancelled_ GUARDED_BY(mu_) = false;
     condition_variable shutdown_cond_var_ GUARDED_BY(mu_);
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
index 0166ba0d44..5b17511e41 100644
--- a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
@@ -946,6 +946,15 @@ class CopyToDeviceTest(test.TestCase):
 
 class MultiDeviceIteratorTest(test.TestCase):
 
+  def testNoGetNext(self):
+    dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2"])
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+
   def testBasic(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-- 
GitLab


From d0caa5a700dd36b7ac92be2722deaca9a4e23ef4 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Fri, 21 Sep 2018 14:14:36 -0700
Subject: [PATCH 0519/1357] Ensure that no capture is done unless Match()
 return true. Otherwise the application that relies on such behavior is hard
 to get right.

To implement this, we need to be careful about AllOf, so that no capture
is done unless all sub-patterns succeed. This leads to the solution that
we have to run all patterns twice, first time with no captures, and
second time to capture.

PiperOrigin-RevId: 214042307
---
 .../compiler/xla/service/pattern_matcher.h    | 177 ++++++++++--------
 .../xla/service/pattern_matcher_test.cc       |  38 ++++
 2 files changed, 137 insertions(+), 78 deletions(-)

diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index 0bcf67c1be..63b51fc8c9 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -120,38 +120,50 @@ namespace xla {
 //                                              .WithOperand(1, Op(&c))
 //                                              .WithOperand(2, Op(&d))
 //
+
+struct MatchOption {
+  // If true, actually capture matched item into the user pointer.
+  bool capture;
+};
+
 template <typename Value, typename Pattern>
-bool Match(Value* value, const Pattern& pattern) {
-  return pattern.Match(value);
+bool Match(Value* value, const Pattern& pattern,
+           MatchOption option = {/*.capture=*/true}) {
+  if (option.capture) {
+    auto new_option = option;
+    new_option.capture = false;
+    if (!pattern.Match(value, new_option)) {
+      return false;
+    }
+  }
+  return pattern.Match(value, option);
 }
 
 namespace match {
 
 namespace detail {
+
 template <typename Item, typename... Patterns>
 class AllOfPattern {
  public:
   explicit AllOfPattern(const Patterns&... patterns) : patterns_(patterns...) {}
 
-  bool Match(const Item* item) const {
-    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  bool Match(const Item* item, MatchOption option) const {
+    return MatchImpl(item, option,
+                     absl::make_index_sequence<sizeof...(Patterns)>());
   }
 
-  bool Match(Item* item) const {
-    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  bool Match(Item* item, MatchOption option) const {
+    return MatchImpl(item, option,
+                     absl::make_index_sequence<sizeof...(Patterns)>());
   }
 
  private:
-  template <typename ItemType, size_t index>
-  bool MatchImpl(ItemType* item, std::integral_constant<size_t, index>) const {
-    return std::get<index>(patterns_).Match(item) &&
-           MatchImpl(item, std::integral_constant<size_t, index + 1>());
-  }
-
-  template <typename ItemType>
-  bool MatchImpl(ItemType* item,
-                 std::integral_constant<size_t, sizeof...(Patterns)>) const {
-    return true;
+  template <typename ItemType, size_t... indices>
+  bool MatchImpl(ItemType* item, MatchOption option,
+                 absl::index_sequence<indices...>) const {
+    return std::min<bool>(
+        {std::get<indices>(patterns_).Match(item, option)...});
   }
 
   std::tuple<Patterns...> patterns_;
@@ -181,7 +193,9 @@ class LayoutPattern;
 // nullptr.
 class LayoutPatternBaseImpl {
  public:
-  bool Match(const ::xla::Layout* layout) const { return layout != nullptr; }
+  bool Match(const ::xla::Layout* layout, MatchOption option) const {
+    return layout != nullptr;
+  }
 };
 
 // A LayoutPattern implementation that matches only if the layout equals a
@@ -191,7 +205,7 @@ class LayoutPatternEqualImpl {
   explicit constexpr LayoutPatternEqualImpl(const ::xla::Layout* layout)
       : layout_(layout) {}
 
-  bool Match(const ::xla::Layout* layout) const {
+  bool Match(const ::xla::Layout* layout, MatchOption option) const {
     return LayoutUtil::Equal(*layout_, *layout);
   }
 
@@ -205,7 +219,7 @@ class LayoutPatternFormatImpl {
  public:
   explicit constexpr LayoutPatternFormatImpl(Format format) : format_(format) {}
 
-  bool Match(const ::xla::Layout* layout) const {
+  bool Match(const ::xla::Layout* layout, MatchOption option) const {
     return layout->format() == format_;
   }
 
@@ -231,9 +245,9 @@ class LayoutPattern {
       : impl_(impl), matched_layout_(matched_layout) {}
 
   // Returns true and captures the layout iff it matches the pattern.
-  bool Match(const ::xla::Layout* layout) const {
-    if (impl_.Match(layout)) {
-      if (matched_layout_) {
+  bool Match(const ::xla::Layout* layout, MatchOption option) const {
+    if (impl_.Match(layout, option)) {
+      if (option.capture && matched_layout_) {
         *matched_layout_ = layout;
       }
       return true;
@@ -242,9 +256,9 @@ class LayoutPattern {
   }
 
   // Returns true and captures the layout iff it matches the pattern.
-  bool Match(::xla::Layout* layout) const {
-    if (impl_.Match(layout)) {
-      if (matched_layout_) {
+  bool Match(::xla::Layout* layout, MatchOption option) const {
+    if (impl_.Match(layout, option)) {
+      if (option.capture && matched_layout_) {
         *matched_layout_ = layout;
       }
       return true;
@@ -281,23 +295,24 @@ class AnyOfPattern {
  public:
   explicit AnyOfPattern(const Patterns&... patterns) : patterns_(patterns...) {}
 
-  bool Match(const Item* item) const {
-    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  bool Match(const Item* item, MatchOption option) const {
+    return MatchImpl(item, option, std::integral_constant<size_t, 0>());
   }
 
-  bool Match(Item* item) const {
-    return MatchImpl(item, std::integral_constant<size_t, 0>());
+  bool Match(Item* item, MatchOption option) const {
+    return MatchImpl(item, option, std::integral_constant<size_t, 0>());
   }
 
  private:
   template <typename ItemType, size_t index>
-  bool MatchImpl(ItemType* item, std::integral_constant<size_t, index>) const {
-    return std::get<index>(patterns_).Match(item) ||
-           MatchImpl(item, std::integral_constant<size_t, index + 1>());
+  bool MatchImpl(ItemType* item, MatchOption option,
+                 std::integral_constant<size_t, index>) const {
+    return std::get<index>(patterns_).Match(item, option) ||
+           MatchImpl(item, option, std::integral_constant<size_t, index + 1>());
   }
 
   template <typename ItemType>
-  bool MatchImpl(ItemType* item,
+  bool MatchImpl(ItemType* item, MatchOption option,
                  std::integral_constant<size_t, sizeof...(Patterns)>) const {
     return false;
   }
@@ -345,7 +360,9 @@ class ShapePattern;
 // nullptr.
 class ShapePatternBaseImpl {
  public:
-  bool Match(const ::xla::Shape* shape) const { return shape != nullptr; }
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
+    return shape != nullptr;
+  }
 };
 
 // A ShapePattern implementation that matches only if the shape equals a Shape
@@ -355,7 +372,7 @@ class ShapePatternEqualImpl {
   explicit constexpr ShapePatternEqualImpl(const ::xla::Shape* shape)
       : shape_(shape) {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::Equal(*shape_, *shape);
   }
 
@@ -370,7 +387,7 @@ class ShapePatternCompatibleImpl {
   explicit constexpr ShapePatternCompatibleImpl(const ::xla::Shape* shape)
       : shape_(shape) {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::Compatible(*shape_, *shape);
   }
 
@@ -385,7 +402,7 @@ class ShapePatternElementTypeImpl {
   explicit constexpr ShapePatternElementTypeImpl(PrimitiveType element_type)
       : element_type_(element_type) {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return shape->element_type() == element_type_;
   }
 
@@ -398,7 +415,7 @@ class ShapePatternIsScalarImpl {
  public:
   explicit constexpr ShapePatternIsScalarImpl() {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::IsScalar(*shape);
   }
 };
@@ -408,7 +425,7 @@ class ShapePatternIsArrayImpl {
  public:
   explicit constexpr ShapePatternIsArrayImpl() {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::IsArray(*shape);
   }
 };
@@ -418,7 +435,7 @@ class ShapePatternIsTupleImpl {
  public:
   explicit constexpr ShapePatternIsTupleImpl() {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::IsTuple(*shape);
   }
 };
@@ -429,7 +446,7 @@ class ShapePatternRankImpl {
  public:
   explicit constexpr ShapePatternRankImpl(int64 rank) : rank_(rank) {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::Rank(*shape) == rank_;
   }
 
@@ -446,13 +463,14 @@ class ShapePatternLayoutImpl {
       const LayoutPattern<LayoutType, LayoutImpl>& layout)
       : layout_(layout) {}
 
-  bool Match(const ::xla::Shape* shape) const {
-    return LayoutUtil::HasLayout(*shape) && layout_.Match(&shape->layout());
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
+    return LayoutUtil::HasLayout(*shape) &&
+           layout_.Match(&shape->layout(), option);
   }
 
-  bool Match(Shape* shape) const {
+  bool Match(Shape* shape, MatchOption option) const {
     return LayoutUtil::HasLayout(*shape) &&
-           layout_.Match(shape->mutable_layout());
+           layout_.Match(shape->mutable_layout(), option);
   }
 
  private:
@@ -469,14 +487,15 @@ class ShapePatternSubshapeImpl {
       const ShapePattern<SubshapeType, SubshapeImpl>& subshape)
       : index_(index), subshape_(subshape) {}
 
-  bool Match(const ::xla::Shape* shape) const {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::IndexIsValid(*shape, index_) &&
-           subshape_.Match(&ShapeUtil::GetSubshape(*shape, index_));
+           subshape_.Match(&ShapeUtil::GetSubshape(*shape, index_), option);
   }
 
-  bool Match(::xla::Shape* shape) const {
+  bool Match(::xla::Shape* shape, MatchOption option) const {
     return ShapeUtil::IndexIsValid(*shape, index_) &&
-           subshape_.Match(ShapeUtil::GetMutableSubshape(shape, index_));
+           subshape_.Match(ShapeUtil::GetMutableSubshape(shape, index_),
+                           option);
   }
 
  private:
@@ -500,9 +519,9 @@ class ShapePattern {
       : impl_(impl), matched_shape_(matched_shape) {}
 
   // Returns true and captures the shape iff it matches the pattern.
-  bool Match(const ::xla::Shape* shape) const {
-    if (impl_.Match(shape)) {
-      if (matched_shape_) {
+  bool Match(const ::xla::Shape* shape, MatchOption option) const {
+    if (impl_.Match(shape, option)) {
+      if (option.capture && matched_shape_) {
         *matched_shape_ = shape;
       }
       return true;
@@ -511,9 +530,9 @@ class ShapePattern {
   }
 
   // Returns true and captures the shape iff it matches the pattern.
-  bool Match(::xla::Shape* shape) const {
-    if (impl_.Match(shape)) {
-      if (matched_shape_) {
+  bool Match(::xla::Shape* shape, MatchOption option) const {
+    if (impl_.Match(shape, option)) {
+      if (option.capture && matched_shape_) {
         *matched_shape_ = shape;
       }
       return true;
@@ -660,7 +679,7 @@ class HloInstructionPattern;
 // instruction is not nullptr.
 class HloInstructionPatternBaseImpl {
  public:
-  bool Match(const ::xla::HloInstruction* inst) const {
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
     return inst != nullptr;
   }
 };
@@ -672,7 +691,7 @@ class HloInstructionPatternNameImpl {
   explicit HloInstructionPatternNameImpl(absl::string_view name)
       : name_(name) {}
 
-  bool Match(const ::xla::HloInstruction* inst) const {
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
     return inst->name() == name_;
   }
 
@@ -688,7 +707,7 @@ class HloInstructionPatternOpcodeImpl {
                                                      bool invert)
       : opcode_(opcode), invert_(invert) {}
 
-  bool Match(const ::xla::HloInstruction* inst) const {
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
     return (invert_ ^ (inst->opcode() == opcode_));
   }
 
@@ -706,12 +725,12 @@ class HloInstructionPatternShapeImpl {
       const ShapePattern<ShapeType, ShapeImpl>& shape)
       : shape_(shape) {}
 
-  bool Match(const ::xla::HloInstruction* inst) const {
-    return shape_.Match(&inst->shape());
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
+    return shape_.Match(&inst->shape(), option);
   }
 
-  bool Match(::xla::HloInstruction* inst) const {
-    return shape_.Match(inst->mutable_shape());
+  bool Match(::xla::HloInstruction* inst, MatchOption option) const {
+    return shape_.Match(inst->mutable_shape(), option);
   }
 
  private:
@@ -728,14 +747,14 @@ class HloInstructionPatternOperandImpl {
       const HloInstructionPattern<OperandType, OperandImpl>& operand)
       : operand_index_(operand_index), operand_(operand) {}
 
-  bool Match(const ::xla::HloInstruction* inst) const {
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
     return operand_index_ < inst->operand_count() &&
-           operand_.Match(inst->operand(operand_index_));
+           operand_.Match(inst->operand(operand_index_), option);
   }
 
-  bool Match(::xla::HloInstruction* inst) const {
+  bool Match(::xla::HloInstruction* inst, MatchOption option) const {
     return operand_index_ < inst->operand_count() &&
-           operand_.Match(inst->mutable_operand(operand_index_));
+           operand_.Match(inst->mutable_operand(operand_index_), option);
   }
 
  private:
@@ -751,11 +770,11 @@ class HloInstructionPatternFusionKindImpl {
       ::xla::HloInstruction::FusionKind kind)
       : kind_(kind) {}
 
-  bool Match(const ::xla::HloInstruction* inst) const {
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
     return inst->opcode() == HloOpcode::kFusion && inst->fusion_kind() == kind_;
   }
 
-  bool Match(::xla::HloInstruction* inst) const {
+  bool Match(::xla::HloInstruction* inst, MatchOption option) const {
     return inst->opcode() == HloOpcode::kFusion && inst->fusion_kind() == kind_;
   }
 
@@ -770,12 +789,12 @@ class HloInstructionPatternTupleIndexImpl {
   explicit constexpr HloInstructionPatternTupleIndexImpl(int64 tuple_index)
       : tuple_index_(tuple_index) {}
 
-  bool Match(const ::xla::HloInstruction* inst) const {
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
     return inst->opcode() == HloOpcode::kGetTupleElement &&
            inst->tuple_index() == tuple_index_;
   }
 
-  bool Match(::xla::HloInstruction* inst) const {
+  bool Match(::xla::HloInstruction* inst, MatchOption option) const {
     return inst->opcode() == HloOpcode::kGetTupleElement &&
            inst->tuple_index() == tuple_index_;
   }
@@ -789,9 +808,11 @@ class HloPredicatePatternImpl {
  public:
   explicit HloPredicatePatternImpl(Predicate pred) : pred_(std::move(pred)) {}
 
-  bool Match(const ItemType* item) const { return pred_(item); }
+  bool Match(const ItemType* item, MatchOption option) const {
+    return pred_(item);
+  }
 
-  bool Match(ItemType* item) const { return pred_(item); }
+  bool Match(ItemType* item, MatchOption option) const { return pred_(item); }
 
  private:
   Predicate pred_;
@@ -818,9 +839,9 @@ class HloInstructionPattern {
       : impl_(impl), matched_inst_(matched_inst) {}
 
   // Returns true and captures the instruction iff it matches the pattern.
-  bool Match(const ::xla::HloInstruction* inst) const {
-    if (impl_.Match(inst)) {
-      if (matched_inst_) {
+  bool Match(const ::xla::HloInstruction* inst, MatchOption option) const {
+    if (impl_.Match(inst, option)) {
+      if (option.capture && matched_inst_) {
         *matched_inst_ = inst;
       }
       return true;
@@ -829,9 +850,9 @@ class HloInstructionPattern {
   }
 
   // Returns true and captures the instruction iff it matches the pattern.
-  bool Match(::xla::HloInstruction* inst) const {
-    if (impl_.Match(inst)) {
-      if (matched_inst_) {
+  bool Match(::xla::HloInstruction* inst, MatchOption option) const {
+    if (impl_.Match(inst, option)) {
+      if (option.capture && matched_inst_) {
         *matched_inst_ = inst;
       }
       return true;
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
index 7bd27268aa..d4e128bd70 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
@@ -318,5 +318,43 @@ TEST(PatternMatcherTest, AllOf) {
       Match(root, AllOf<HloInstruction>(Broadcast(Op()), scalar_pattern)));
 }
 
+TEST(PatternMatcherTest, AllOfNoCaptureIfNotMatch) {
+  using match::AllOf;
+  using match::Broadcast;
+  using match::Constant;
+  using match::Op;
+
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module
+    ENTRY test {
+      ROOT v = f16[] constant(42)
+    })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  const HloInstruction* constant = nullptr;
+  ASSERT_FALSE(
+      Match(root, AllOf<HloInstruction>(Constant(&constant), Broadcast(Op()))));
+  EXPECT_EQ(nullptr, constant);
+  ASSERT_TRUE(Match(root, Constant(&constant)));
+  EXPECT_NE(nullptr, constant);
+}
+
+TEST(PatternMatcherTest, TestNoCapture) {
+  using match::Constant;
+
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module
+    ENTRY test {
+      ROOT v = f16[] constant(42)
+    })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  const HloInstruction* constant = nullptr;
+  ASSERT_TRUE(Match(root, Constant(&constant), {/*capture=*/false}));
+  EXPECT_EQ(nullptr, constant);
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 29ed4a334c22f235ad1d1c3f6224cc9810c925f2 Mon Sep 17 00:00:00 2001
From: Elms <elms@freshred.net>
Date: Fri, 21 Sep 2018 14:26:38 -0700
Subject: [PATCH 0520/1357] Fix tflite Makefile so it can find absl includes

---
 tensorflow/contrib/lite/tools/make/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/lite/tools/make/Makefile b/tensorflow/contrib/lite/tools/make/Makefile
index 59bdb10811..16012a3fb1 100644
--- a/tensorflow/contrib/lite/tools/make/Makefile
+++ b/tensorflow/contrib/lite/tools/make/Makefile
@@ -30,6 +30,7 @@ INCLUDES := \
 -I$(MAKEFILE_DIR)/../../../../../../ \
 -I$(MAKEFILE_DIR)/downloads/ \
 -I$(MAKEFILE_DIR)/downloads/eigen \
+-I$(MAKEFILE_DIR)/downloads/absl \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/neon_2_sse \
 -I$(MAKEFILE_DIR)/downloads/farmhash/src \
-- 
GitLab


From e3108ea446b8b07d6a4aaca9667aff6ff5151a51 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 14:43:06 -0700
Subject: [PATCH 0521/1357] Fix bias feature being selected for splitting. The
 previous logic was broken for cases where all the examples in the last
 partition just had missing values. In those cases, the range that was
 selected for the leaf previous to the last included the bias value for the
 last leaf.

PiperOrigin-RevId: 214046965
---
 .../kernels/split_handler_ops.cc              | 23 ++---
 .../batch/categorical_split_handler_test.py   | 86 +++++++++++++++++++
 2 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
index af7006bff2..8edb5d6c64 100644
--- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
@@ -739,21 +739,22 @@ class BuildCategoricalEqualitySplitsOp : public OpKernel {
 
     // Find the number of unique partitions before we allocate the output.
     std::vector<int32> partition_boundaries;
-    std::vector<int32> non_empty_partitions;
-    for (int i = 0; i < partition_ids.size() - 1; ++i) {
+    partition_boundaries.push_back(0);
+    for (int i = 1; i < partition_ids.size(); ++i) {
       // Make sure the input is sorted by partition_ids;
-      CHECK_LE(partition_ids(i), partition_ids(i + 1));
-      if (i == 0 || partition_ids(i) != partition_ids(i - 1)) {
+      OP_REQUIRES(context, partition_ids(i - 1) <= partition_ids(i),
+                  errors::InvalidArgument("Partition IDs must be sorted."));
+      if (partition_ids(i) != partition_ids(i - 1)) {
         partition_boundaries.push_back(i);
-        // Some partitions might only have bias feature. We don't want to split
-        // those so check that the partition has at least 2 features.
-        if (partition_ids(i) == partition_ids(i + 1)) {
-          non_empty_partitions.push_back(partition_boundaries.size() - 1);
-        }
       }
     }
-    if (partition_ids.size() > 0) {
-      partition_boundaries.push_back(partition_ids.size());
+    std::vector<int32> non_empty_partitions;
+    partition_boundaries.push_back(partition_ids.size());
+    for (int i = 0; i < partition_boundaries.size() - 1; ++i) {
+      // We want to ignore partitions with only the bias term.
+      if (partition_boundaries[i + 1] - partition_boundaries[i] >= 2) {
+        non_empty_partitions.push_back(i);
+      }
     }
     int num_elements = non_empty_partitions.size();
     Tensor* output_partition_ids_t = nullptr;
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py
index 94ea7bc2eb..c050c2ed7f 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py
@@ -577,6 +577,92 @@ class EqualitySplitHandlerTest(test_util.TensorFlowTestCase):
     self.assertEqual(len(gains), 0)
     self.assertEqual(len(splits), 0)
 
+  def testLastOneEmpty(self):
+    with self.cached_session() as sess:
+      # The data looks like the following:
+      # Example |  Gradients    | Partition | Feature ID     |
+      # i0      |  (0.2, 0.12)  | 0         | 1,2            |
+      # i1      |  (-0.5, 0.07) | 0         |                |
+      # i2      |  (1.2, 0.2)   | 0         | 2              |
+      # i3      |  (4.0, 0.13)  | 1         |                |
+      gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0])
+      hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13])
+      partition_ids = [0, 0, 0, 1]
+      indices = [[0, 0], [0, 1], [2, 0]]
+      values = array_ops.constant([1, 2, 2], dtype=dtypes.int64)
+
+      gradient_shape = tensor_shape.scalar()
+      hessian_shape = tensor_shape.scalar()
+      class_id = -1
+
+      split_handler = categorical_split_handler.EqualitySplitHandler(
+          l1_regularization=0.1,
+          l2_regularization=1,
+          tree_complexity_regularization=0,
+          min_node_weight=0,
+          sparse_int_column=sparse_tensor.SparseTensor(indices, values, [4, 1]),
+          feature_column_group_id=0,
+          gradient_shape=gradient_shape,
+          hessian_shape=hessian_shape,
+          multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS,
+          init_stamp_token=0)
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      empty_gradients, empty_hessians = get_empty_tensors(
+          gradient_shape, hessian_shape)
+      example_weights = array_ops.ones([4, 1], dtypes.float32)
+
+      update_1 = split_handler.update_stats_sync(
+          0,
+          partition_ids,
+          gradients,
+          hessians,
+          empty_gradients,
+          empty_hessians,
+          example_weights,
+          is_active=array_ops.constant([True, True]))
+      with ops.control_dependencies([update_1]):
+        are_splits_ready, partitions, gains, splits = (
+            split_handler.make_splits(0, 1, class_id))
+        are_splits_ready, partitions, gains, splits = (
+            sess.run([are_splits_ready, partitions, gains, splits]))
+    self.assertTrue(are_splits_ready)
+    self.assertAllEqual([0], partitions)
+
+    # Check the split on partition 0.
+    # -(0.2 + 1.2 - 0.1) / (0.12 + 0.2 + 1)
+    expected_left_weight = -0.9848484848484846
+
+    # (0.2 + 1.2 - 0.1) ** 2 / (0.12 + 0.2 + 1)
+    expected_left_gain = 1.2803030303030298
+
+    # -(-0.5 + 0.1) / (0.07 + 1)
+    expected_right_weight = 0.37383177570093457
+
+    # (-0.5 + 0.1) ** 2 / (0.07 + 1)
+    expected_right_gain = 0.14953271028037385
+
+    # (0.2 + -0.5 + 1.2 - 0.1) ** 2 / (0.12 + 0.07 + 0.2 + 1)
+    expected_bias_gain = 0.46043165467625885
+
+    split_info = split_info_pb2.SplitInfo()
+    split_info.ParseFromString(splits[0])
+    left_child = split_info.left_child.vector
+    right_child = split_info.right_child.vector
+    split_node = split_info.split_node.categorical_id_binary_split
+
+    self.assertEqual(0, split_node.feature_column)
+
+    self.assertEqual(2, split_node.feature_id)
+
+    self.assertAllClose(
+        expected_left_gain + expected_right_gain - expected_bias_gain, gains[0],
+        0.00001)
+
+    self.assertAllClose([expected_left_weight], left_child.value, 0.00001)
+
+    self.assertAllClose([expected_right_weight], right_child.value, 0.00001)
+
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From 75138a1204c7aab340d159f5a6b85a55eb33c1e4 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 21 Sep 2018 14:46:25 -0700
Subject: [PATCH 0522/1357] Executor: Move `GetNodeAttr()` off the critical
 path for loop execution.

In `ExecutorState::PropagateOutputs()`, each time a loop enter node is
processed, the node's attrs are consulted to determine if it is a
"constant" or "non-constant" enter node. This entails a call to the
protobuf library, followed by multiple string comparisons to find the
attribute in the Node's NodeDef's attr map. The value of this property
never changes after the executor is first constructed, so in this
change we move it to a cached field on the `NodeItem` struct, and use
that value.

PiperOrigin-RevId: 214047449
---
 tensorflow/core/common_runtime/executor.cc | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 98719542c0..7cef34ac52 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -143,6 +143,8 @@ struct NodeItem {
   bool kernel_is_async : 1;      // True iff kernel->AsAsync() != nullptr
   bool is_merge : 1;             // True iff IsMerge(node)
   bool is_enter : 1;             // True iff IsEnter(node)
+  bool is_constant_enter : 1;    // True iff IsEnter(node) and
+                                 // node->GetAttr("is_constant") == true.
   bool is_exit : 1;              // True iff IsExit(node)
   bool is_control_trigger : 1;   // True iff IsControlTrigger(node)
   bool is_sink : 1;              // True iff IsSink(node)
@@ -626,6 +628,14 @@ Status ExecutorImpl::Initialize() {
     item->kernel_is_async = (item->kernel->AsAsync() != nullptr);
     item->is_merge = IsMerge(n);
     item->is_enter = IsEnter(n);
+    if (item->is_enter) {
+      bool is_constant_enter;
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(n->attrs(), "is_constant", &is_constant_enter));
+      item->is_constant_enter = is_constant_enter;
+    } else {
+      item->is_constant_enter = false;
+    }
     item->is_exit = IsExit(n);
     item->is_control_trigger = IsControlTrigger(n);
     item->is_sink = IsSink(n);
@@ -1988,15 +1998,12 @@ void ExecutorState::PropagateOutputs(const TaggedNode& tagged_node,
     is_frame_done = input_frame->DecrementOutstandingOpsLocked(
         &impl_->gview_, input_iter, ready);
   } else if (item->is_enter) {
-    bool is_constant;
-    const Status s = GetNodeAttr(node->attrs(), "is_constant", &is_constant);
-    DCHECK(s.ok()) << s;
     FindOrCreateChildFrame(input_frame, input_iter, node, &output_frame);
     output_iter = 0;
     {
       const NodeItem* item = impl_->gview_.node(node->id());
       mutex_lock l(output_frame->mu);
-      if (is_constant) {
+      if (item->is_constant_enter) {
         // Propagate to all active iterations if this is a loop invariant.
         output_frame->AddLoopInv(item, (*outputs)[0], ready);
       } else {
-- 
GitLab


From 95a87497c7a2fd11b2f66dca4966dfde45d8419c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 14:48:17 -0700
Subject: [PATCH 0523/1357] Allow functional_rnn to run with bfloat16.

PiperOrigin-RevId: 214047718
---
 .../contrib/recurrent/python/ops/functional_rnn.py     | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/recurrent/python/ops/functional_rnn.py b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py
index c3db71359c..efaf63086f 100644
--- a/tensorflow/contrib/recurrent/python/ops/functional_rnn.py
+++ b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py
@@ -22,7 +22,6 @@ from __future__ import print_function
 import copy
 
 from tensorflow.contrib.recurrent.python.ops import recurrent
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -62,7 +61,7 @@ class _FunctionalRnnCell(object):
     assert initial_state is not None
 
     # TODO(drpng): Dtype needs to be configurable.
-    input_dtypes = [dtypes.float32] + _GetDTypesFromStructure(initial_state)
+    input_dtypes = [seq_inputs.dtype] + _GetDTypesFromStructure(initial_state)
     # See _index.
     like_inputs_t = nest.map_structure(
         lambda x: array_ops.stop_gradient(array_ops.gather(x, 0)), seq_inputs)
@@ -144,7 +143,10 @@ class _FunctionalRnnCell(object):
   @property
   def extended_initial_state(self):
     if self._prepend_output:
-      return [array_ops.zeros(self._output_shape), self._state_template]
+      return [array_ops.zeros(
+          self._output_shape,
+          dtype=_GetDTypesFromStructure(self._state_template)[0]),
+              self._state_template]
     else:
       # The base case, where the output is just the hidden state.
       return self._state_template
@@ -185,7 +187,7 @@ def _ApplyLengthsToBatch(sequence_lengths, tf_output):
   lengths = array_ops.tile(
       array_ops.reshape(sequence_lengths, [-1, 1]), [1, max_time])
   is_less = math_ops.cast(
-      math_ops.less(output_time, lengths), dtype=dtypes.float32)
+      math_ops.less(output_time, lengths), dtype=tf_output.dtype)
   keep_mask = array_ops.tile(
       array_ops.expand_dims(is_less, -1),
       [1, 1, vector_size])
-- 
GitLab


From 2bb5db28f7303fbfb6ee6e99e46333f53901cfba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 14:52:49 -0700
Subject: [PATCH 0524/1357] Copy edits to overview file

PiperOrigin-RevId: 214048409
---
 tensorflow/contrib/lite/g3doc/overview.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/g3doc/overview.md b/tensorflow/contrib/lite/g3doc/overview.md
index 8cf43496df..9d035a6921 100644
--- a/tensorflow/contrib/lite/g3doc/overview.md
+++ b/tensorflow/contrib/lite/g3doc/overview.md
@@ -25,7 +25,7 @@ models.
 
 TensorFlow Lite defines a new model file format, based on
 [FlatBuffers](https://google.github.io/flatbuffers/). FlatBuffers is an
-open-sourced, efficient cross platform serialization library. It is similar to
+efficient open-source cross-platform serialization library. It is similar to
 [protocol buffers](https://developers.google.com/protocol-buffers/?hl=en), but
 the primary difference is that FlatBuffers does not need a parsing/unpacking
 step to a secondary representation before you can access data, often coupled
-- 
GitLab


From 9655bbd9d67a62b4af399100201918f138316dac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 15:17:03 -0700
Subject: [PATCH 0525/1357] This CL disables tf.print tests that change logging
 levels on Windows. These tests will still be run in a different test target
 that does not run on Windows.

PiperOrigin-RevId: 214052241
---
 .../python/kernel_tests/logging_ops_test.py   | 34 -------------------
 1 file changed, 34 deletions(-)

diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index cf0beba3c3..b24a0d0f9b 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -34,7 +34,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging
 
 
 class LoggingOpsTest(test.TestCase):
@@ -272,39 +271,6 @@ class PrintV2Test(test.TestCase):
       expected = "[0 1 2 ... 7 8 9]"
       self.assertTrue((expected + "\n") in printed.contents())
 
-  @test_util.run_in_graph_and_eager_modes()
-  def testPrintOneTensorLogInfo(self):
-    with self.test_session():
-      tensor = math_ops.range(10)
-      with self.captureWritesToStream(sys.stderr) as printed:
-        print_op = logging_ops.print_v2(
-            tensor, output_stream=tf_logging.info)
-        self.evaluate(print_op)
-      expected = "[0 1 2 ... 7 8 9]"
-      self.assertTrue(expected in printed.contents())
-
-  @test_util.run_in_graph_and_eager_modes()
-  def testPrintOneTensorLogWarning(self):
-    with self.test_session():
-      tensor = math_ops.range(10)
-      with self.captureWritesToStream(sys.stderr) as printed:
-        print_op = logging_ops.print_v2(
-            tensor, output_stream=tf_logging.warning)
-        self.evaluate(print_op)
-      expected = "[0 1 2 ... 7 8 9]"
-      self.assertTrue(expected in printed.contents())
-
-  @test_util.run_in_graph_and_eager_modes()
-  def testPrintOneTensorLogError(self):
-    with self.test_session():
-      tensor = math_ops.range(10)
-      with self.captureWritesToStream(sys.stderr) as printed:
-        print_op = logging_ops.print_v2(
-            tensor, output_stream=tf_logging.error)
-        self.evaluate(print_op)
-      expected = "[0 1 2 ... 7 8 9]"
-      self.assertTrue(expected in printed.contents())
-
   @test_util.run_in_graph_and_eager_modes()
   def testInvalidOutputStreamRaisesError(self):
     with self.test_session():
-- 
GitLab


From ba5d214a6b5d131b693eff277cc3b56298a4721a Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 21 Sep 2018 15:30:56 -0700
Subject: [PATCH 0526/1357] Fix lint errors

---
 tensorflow/contrib/layers/python/layers/embedding_ops.py | 3 ++-
 tensorflow/python/feature_column/feature_column_v2.py    | 3 ++-
 tensorflow/python/ops/embedding_ops.py                   | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py
index 897aed527d..17ee8c0733 100644
--- a/tensorflow/contrib/layers/python/layers/embedding_ops.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py
@@ -112,7 +112,8 @@ def safe_embedding_lookup_sparse(embedding_weights,
   dtype = sparse_weights.dtype if sparse_weights is not None else None
   if isinstance(embedding_weights, variables.PartitionedVariable):
     embedding_weights = list(embedding_weights)
-  if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable):
+  if not isinstance(embedding_weights[0],
+                    resource_variable_ops.ResourceVariable):
     embedding_weights = [
         ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
     ]
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 220a4f7ed6..1a2213707c 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -3283,7 +3283,8 @@ def _safe_embedding_lookup_sparse(embedding_weights,
     raise ValueError('Missing embedding_weights %s.' % embedding_weights)
 
   dtype = sparse_weights.dtype if sparse_weights is not None else None
-  if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable):
+  if not isinstance(embedding_weights[0],
+                    resource_variable_ops.ResourceVariable):
     embedding_weights = [
         ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
     ]
diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py
index fe422f5095..bcd135eedb 100644
--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@@ -545,7 +545,8 @@ def safe_embedding_lookup_sparse(embedding_weights,
     raise ValueError('Missing embedding_weights %s.' % embedding_weights)
 
   dtype = sparse_weights.dtype if sparse_weights is not None else None
-  if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable):
+  if not isinstance(embedding_weights[0],
+                    resource_variable_ops.ResourceVariable):
     embedding_weights = [
         ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
     ]
-- 
GitLab


From 47d8a750bc0a9e3165e8fc61d38df3646bf8f278 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Fri, 21 Sep 2018 15:36:17 -0700
Subject: [PATCH 0527/1357] Fix "from tensorflow._api.v1 import *".

PiperOrigin-RevId: 214055060
---
 tensorflow/api_template.__init__.py               | 15 ++++++++-------
 .../tools/api/generator/create_python_api.py      |  1 -
 tensorflow/tools/test/check_futures_test.py       |  3 +++
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 53a72b8443..2de740e145 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -14,9 +14,9 @@
 # ==============================================================================
 """Bring in all of the public TensorFlow interface into this module."""
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import as _absolute_import
+from __future__ import division as _division
+from __future__ import print_function as _print_function
 
 import os as _os
 
@@ -41,6 +41,11 @@ except (ImportError, AttributeError):
 from tensorflow.python.util.lazy_loader import LazyLoader  # pylint: disable=g-import-not-at-top
 contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
 del LazyLoader
+# The templated code that replaces the placeholder above sometimes
+# sets the __all__ variable. If it does, we have to be sure to add
+# "contrib".
+if '__all__' in vars():
+  vars()['__all__'].append('contrib')
 
 from tensorflow.python.platform import flags  # pylint: disable=g-import-not-at-top
 app.flags = flags  # pylint: disable=undefined-variable
@@ -51,10 +56,6 @@ _tf_api_dir = _os.path.dirname(_os.path.dirname(app.__file__))  # pylint: disabl
 if _tf_api_dir not in __path__:
   __path__.append(_tf_api_dir)
 
-del absolute_import
-del division
-del print_function
-
 # These symbols appear because we import the python package which
 # in turn imports from tensorflow.core and tensorflow.python. They
 # must come from this module. So python adds these symbols for the
diff --git a/tensorflow/python/tools/api/generator/create_python_api.py b/tensorflow/python/tools/api/generator/create_python_api.py
index 67cfd799ff..ab749f28cd 100644
--- a/tensorflow/python/tools/api/generator/create_python_api.py
+++ b/tensorflow/python/tools/api/generator/create_python_api.py
@@ -181,7 +181,6 @@ class _ModuleInitCodeBuilder(object):
 _names_with_underscore = [%s]
 __all__ = [_s for _s in dir() if not _s.startswith('_')]
 __all__.extend([_s for _s in _names_with_underscore])
-__all__.remove('print_function')
 ''' % underscore_names_str
 
     return module_text_map
diff --git a/tensorflow/tools/test/check_futures_test.py b/tensorflow/tools/test/check_futures_test.py
index 9181c9bd4a..a883ce221f 100644
--- a/tensorflow/tools/test/check_futures_test.py
+++ b/tensorflow/tools/test/check_futures_test.py
@@ -37,6 +37,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
 FUTURES_PATTERN = re.compile(r'^from __future__ import (\w+)\s*$')
 FUTURES_PATTERN_2 = re.compile(
     r'^from __future__ import (\w+), (\w+), (\w+)\s*$')
+FUTURES_PATTERN_3 = re.compile(r'^from __future__ import (\w+) as \w+\s*$')
 REQUIRED_FUTURES = frozenset(['absolute_import', 'division', 'print_function'])
 
 WHITELIST = [
@@ -59,6 +60,8 @@ def check_file(path, old_division):
   for line in open(path, encoding='utf-8') if six.PY3 else open(path):
     count += 1
     m = FUTURES_PATTERN.match(line)
+    if not m:
+      m = FUTURES_PATTERN_3.match(line)
     if m:
       futures.add(m.group(1))
     else:
-- 
GitLab


From 5d7d8f9f7500e1b648e62fdd43f6d2999524e833 Mon Sep 17 00:00:00 2001
From: Nick Felt <nickfelt@google.com>
Date: Fri, 21 Sep 2018 15:48:47 -0700
Subject: [PATCH 0528/1357] Remove unused tf.contrib.tensorboard.plugins.trace
 code

PiperOrigin-RevId: 214056834
---
 tensorflow/contrib/cmake/python_modules.txt   |   1 -
 tensorflow/contrib/cmake/python_protos.txt    |   1 -
 tensorflow/contrib/tensorboard/BUILD          |  31 ----
 .../contrib/tensorboard/plugins/__init__.py   |   2 +-
 .../tensorboard/plugins/trace/__init__.py     |  24 ---
 .../tensorboard/plugins/trace/trace.py        | 167 ------------------
 .../plugins/trace/trace_info.proto            |  60 -------
 .../tensorboard/plugins/trace/trace_test.py   |  95 ----------
 8 files changed, 1 insertion(+), 380 deletions(-)
 delete mode 100644 tensorflow/contrib/tensorboard/plugins/trace/__init__.py
 delete mode 100644 tensorflow/contrib/tensorboard/plugins/trace/trace.py
 delete mode 100644 tensorflow/contrib/tensorboard/plugins/trace/trace_info.proto
 delete mode 100644 tensorflow/contrib/tensorboard/plugins/trace/trace_test.py

diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index 1c432b6e0b..c0763f4c0e 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -406,7 +406,6 @@ tensorflow/contrib/summary
 tensorflow/contrib/tensorboard
 tensorflow/contrib/tensorboard/plugins
 tensorflow/contrib/tensorboard/plugins/projector
-tensorflow/contrib/tensorboard/plugins/trace
 # TODO(sami): Add cmake implementations.
 # tensorflow/contrib/tensorrt/python
 # tensorflow/contrib/tensorrt/python/ops
diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt
index cf1ee2ad76..42afbd9105 100644
--- a/tensorflow/contrib/cmake/python_protos.txt
+++ b/tensorflow/contrib/cmake/python_protos.txt
@@ -12,7 +12,6 @@ tensorflow/contrib/mpi_collectives
 tensorflow/contrib/session_bundle
 tensorflow/contrib/tensor_forest/proto
 tensorflow/contrib/tensorboard/plugins/projector
-tensorflow/contrib/tensorboard/plugins/trace
 tensorflow/contrib/tpu/proto
 tensorflow/contrib/tpu/profiler
 tensorflow/contrib/training/python/training
diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD
index 2b6a2b2f3c..7f0b3255ed 100644
--- a/tensorflow/contrib/tensorboard/BUILD
+++ b/tensorflow/contrib/tensorboard/BUILD
@@ -32,7 +32,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":projector",
-        ":trace",
     ],
 )
 
@@ -60,33 +59,3 @@ py_test(
         "//tensorflow/python:summary",
     ],
 )
-
-# API methods and protos in `tf.contrib.tensorboard.plugins.trace` package.
-py_library(
-    name = "trace",
-    srcs = glob(
-        ["plugins/trace/**/*.py"],
-        exclude = ["**/*test*"],
-    ),
-    srcs_version = "PY2AND3",
-    deps = [
-        ":protos_all_py",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:platform",
-    ],
-)
-
-py_test(
-    name = "trace_test",
-    size = "small",
-    srcs = ["plugins/trace/trace_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_windows"],
-    deps = [
-        ":trace",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:platform",
-    ],
-)
diff --git a/tensorflow/contrib/tensorboard/plugins/__init__.py b/tensorflow/contrib/tensorboard/plugins/__init__.py
index 41aa77910c..4ba469eb52 100644
--- a/tensorflow/contrib/tensorboard/plugins/__init__.py
+++ b/tensorflow/contrib/tensorboard/plugins/__init__.py
@@ -20,4 +20,4 @@ from __future__ import print_function
 
 # Add projects here, they will show up under tf.contrib.tensorboard.plugins
 from tensorflow.contrib.tensorboard.plugins import projector
-from tensorflow.contrib.tensorboard.plugins import trace
+
diff --git a/tensorflow/contrib/tensorboard/plugins/trace/__init__.py b/tensorflow/contrib/tensorboard/plugins/trace/__init__.py
deleted file mode 100644
index 2c99f4077e..0000000000
--- a/tensorflow/contrib/tensorboard/plugins/trace/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Public API for the Trace plugin."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# pylint: disable=wildcard-import
-from tensorflow.contrib.tensorboard.plugins.trace.trace import *
-from tensorflow.contrib.tensorboard.plugins.trace.trace_info_pb2 import *
-# pylint: enable=wildcard-import
diff --git a/tensorflow/contrib/tensorboard/plugins/trace/trace.py b/tensorflow/contrib/tensorboard/plugins/trace/trace.py
deleted file mode 100644
index 07e5316b8b..0000000000
--- a/tensorflow/contrib/tensorboard/plugins/trace/trace.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Stores debugging information regarding TensorFlow model."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import parser
-import re
-import token
-
-from google.protobuf import json_format
-
-from tensorflow.contrib.tensorboard.plugins.trace.trace_info_pb2 import TraceInfo
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import gfile
-
-# List of regex patterns that match files in the core tensorflow library.
-TF_LIB_REGEX_FPATHS = [os.sep + os.path.join('tensorflow', 'python')]
-
-LEFT_TOKENS = [token.LPAR, token.LSQB, token.LBRACE]
-RIGHT_TOKENS = [token.RPAR, token.RSQB, token.RBRACE]
-TOKENS = LEFT_TOKENS + RIGHT_TOKENS
-
-
-def store_trace_info(output_file_path,
-                     graph=None,
-                     ignore_regex_fpaths=None):
-  """Collects and stores trace information for a TensorFlow model.
-
-  The output proto is stored in json format.
-
-  Args:
-    output_file_path: The path where to store the output proto.
-    graph: Optional. The data flow graph. Defaults to `tf.get_default_graph()`.
-    ignore_regex_fpaths: Optional. Files whose path matches any of the regexes
-        in this list will be ignored. Defaults to patterns that match the core
-        tensorflow python library.
-  """
-  graph = graph or ops.get_default_graph()
-
-  if not ignore_regex_fpaths:
-    ignore_regex_fpaths = TF_LIB_REGEX_FPATHS
-
-  trace_info = TraceInfo()
-  # Extract trace information for every op in the graph.
-  source_fpaths = set()
-  for op in graph.get_operations():
-    op_info = trace_info.ops.add()
-    op_info.name = op.name
-    op_info.op_type = op.type
-    op_info.device = op.device
-    for trace in op.traceback:
-      fname, lineno, _, _ = trace
-      # Ignore traces in specified file paths.
-      if os.path.isabs(fname) and not _ignore_file_path(fname,
-                                                        ignore_regex_fpaths):
-        line_trace = op_info.traceback.add()
-        line_trace.file_path = fname
-        line_trace.line_number = lineno
-        source_fpaths.add(fname)
-    _add_data_from_tensors(op.inputs, op_info.inputs)
-    _add_data_from_tensors(op.outputs, op_info.outputs)
-
-  # Read the source files involved in the graph construction.
-  for fpath in source_fpaths:
-    file_info = trace_info.files.add()
-
-    with gfile.Open(fpath, 'r') as f:
-      source = f.read()
-
-    file_info.file_path = fpath
-    file_info.source_code = source
-
-    line2start = find_multiline_statements(source)
-
-    for key, value in line2start.items():
-      file_info.multiline_statements[key] = value
-
-  # Make sure the directory for the output file exists.
-  output_file_path = os.path.expanduser(output_file_path)
-  output_dir = os.path.dirname(output_file_path)
-  if not gfile.Exists(output_dir):
-    gfile.MakeDirs(output_dir)
-
-  # Store the debug information.
-  with gfile.Open(output_file_path, 'w') as f:
-    f.write(json_format.MessageToJson(trace_info))
-
-
-def find_multiline_statements(source):
-  """Parses the python source and finds multiline statements.
-
-  Based on counting the number of open and closed parenthesis on each line.
-
-  Args:
-    source: The source code string.
-
-  Returns:
-    A dict that maps a line index A to a line index B, where A is the end of a
-    multiline statement and B is the start. Line indexing is 0-based.
-  """
-  # Get the AST.
-  tree = parser.suite(source)
-  line2paren_count = [0] * (source.count('\n') + 1)
-  _count_brackets_braces_parenthesis(tree.totuple(True), line2paren_count)
-
-  line2start = {}
-  for end in range(len(line2paren_count)):
-    if line2paren_count[end] >= 0:
-      # This is not the end of a multiline statement.
-      continue
-    cumulative_paren_count = 0
-    for start in range(end, -1, -1):
-      cumulative_paren_count += line2paren_count[start]
-      if cumulative_paren_count == 0:
-        line2start[end] = start
-        break
-  return line2start
-
-
-def _add_data_from_tensors(tensors, info):
-  for t in tensors:
-    tensor_info = info.add()
-
-    shape = t.get_shape()
-    if shape.ndims:
-      shape = [(-1 if s is None else s) for s in shape.as_list()]
-      tensor_info.shape.extend(shape)
-    tensor_info.dtype = t.dtype.name
-    tensor_info.num_bytes_per_elem = t.dtype.size
-
-    for c in t.consumers():
-      tensor_info.consumers.append(c.name)
-
-
-def _ignore_file_path(fname, ignore_regex_fpaths):
-  for regex_pattern in ignore_regex_fpaths:
-    if re.search(regex_pattern, fname):
-      return True
-  return False
-
-
-def _count_brackets_braces_parenthesis(node, line2par):
-  if isinstance(node[1], tuple):
-    for child in node[1:]:
-      _count_brackets_braces_parenthesis(child, line2par)
-  else:
-    tok = node[0]
-    if tok in TOKENS:
-      lineno = node[2]
-      line2par[lineno - 1] += (1 if tok in LEFT_TOKENS else -1)
-  return line2par
diff --git a/tensorflow/contrib/tensorboard/plugins/trace/trace_info.proto b/tensorflow/contrib/tensorboard/plugins/trace/trace_info.proto
deleted file mode 100644
index 9f20becb0f..0000000000
--- a/tensorflow/contrib/tensorboard/plugins/trace/trace_info.proto
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-syntax = "proto3";
-
-package tensorflow.contrib.tensorboard;
-
-message TraceInfo {
-  repeated OpInfo ops = 1;
-  repeated FileInfo files = 2;
-}
-
-message OpInfo {
-  string name = 1;
-  string op_type = 2;
-  string device = 3;
-  repeated LineTrace traceback = 4;
-  repeated TensorInfo inputs = 5;
-  repeated TensorInfo outputs = 6;
-}
-
-message LineTrace {
-  // Absolute file path.
-  string file_path = 1;
-  // 1-based line number.
-  uint32 line_number = 2;
-}
-
-message TensorInfo {
-  // Size of the tensor for each dimension. Value of -1 denotes "unknown"
-  // size for that dimension.
-  repeated int32 shape = 1;
-  // The data type of the tensor.
-  string dtype = 2;
-  // Number of bytes per element in the tensor.
-  uint32 num_bytes_per_elem = 3;
-  // List of operation names that consume this tensor.
-  repeated string consumers = 4;
-}
-
-message FileInfo {
-  // Absolute file path to the source code.
-  string file_path = 1;
-  string source_code = 2;
-  // Map from end of statement to start of statement. End and start are 0-based
-  // line indexes.
-  map<uint32, uint32> multiline_statements = 3;
-}
diff --git a/tensorflow/contrib/tensorboard/plugins/trace/trace_test.py b/tensorflow/contrib/tensorboard/plugins/trace/trace_test.py
deleted file mode 100644
index d580f04c5f..0000000000
--- a/tensorflow/contrib/tensorboard/plugins/trace/trace_test.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for tensorflow.contrib.tensorboard.plugins.trace package."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tempfile
-
-from google.protobuf import json_format
-
-from tensorflow.contrib.tensorboard.plugins import trace
-from tensorflow.python.framework import constant_op
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-
-
-class TraceTest(test.TestCase):
-
-  def setUp(self):
-    self._temp_dir = tempfile.mkdtemp()
-    self._temp_trace_json = self._temp_dir + 'trace.json'
-
-  def tearDown(self):
-    gfile.DeleteRecursively(self._temp_dir)
-
-  def testEmptyGraph(self):
-    trace_info = self._store_and_read_trace_info()
-    self.assertEqual(len(trace_info.ops), 0)
-
-  def testHasSourceCodeOfThisFile(self):
-    constant_op.constant(0)
-    trace_info = self._store_and_read_trace_info()
-
-    self.assertTrue(trace_info.files)
-    for file_info in trace_info.files:
-      if file_info.file_path.endswith('trace_test.py'):
-        return
-    self.fail('trace_test file not found in the trace info json')
-
-  def testHasTheConstantOp(self):
-    constant_op.constant(0)
-    trace_info = self._store_and_read_trace_info()
-
-    self.assertTrue(trace_info.ops)
-
-    for op in trace_info.ops:
-      if op.op_type == 'Const':
-        return
-    self.fail('Could not find operation of type `Const` in the graph')
-
-  def testMultilineStatements(self):
-    source = """def test():
-      a(4,
-        3,
-        1)
-
-      b(3, 4, 5)
-
-      c((4, 3),
-        (),
-      )
-    """
-    line2start = trace.find_multiline_statements(source)
-
-    self.assertEqual(line2start[3], 1)
-    self.assertEqual(line2start[9], 7)
-    self.assertEqual(len(line2start), 2)
-
-  def _store_and_read_trace_info(self):
-    trace.store_trace_info(self._temp_trace_json)
-    trace_info = trace.TraceInfo()
-
-    with gfile.Open(self._temp_trace_json) as f:
-      text = f.read()
-    json_format.Parse(text, trace_info)
-
-    return trace_info
-
-
-if __name__ == '__main__':
-  test.main()
-- 
GitLab


From e38dc6a60b133ce3af704c61e105f7fe5801b8fa Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Fri, 21 Sep 2018 15:48:52 -0700
Subject: [PATCH 0529/1357] Update BuildComputation function comment.

PiperOrigin-RevId: 214056851
---
 tensorflow/compiler/tf2xla/xla_compiler.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 739e47778a..d5094e8ec5 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -333,10 +333,8 @@ Status ExecuteGraph(XlaContext* xla_context, std::unique_ptr<Graph> graph,
 }
 
 // Builds the XLA computation.
-//
-// `retvals` is the list of retvals produced by _Retval operators, in index
-// order. `variable_map` is a map from variable ID numbers to XlaOpContext
-// variable states, generated by the symbolic evaluation.
+// `args` is the list of input arguments, `retvals` is the list of retvals
+// produced by _Retval operators, in index order.
 // If `return_updated_values_for_all_resources` is true, all resources will be
 // included in `resource_updates`, regardless of whether their value changed.
 // Sets `*num_nonconst_outputs` to the number of outputs of the `computation`.
-- 
GitLab


From 64577d4fe51246710fbf2c9ebc17bed2febe6694 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 15:49:58 -0700
Subject: [PATCH 0530/1357] Rollback change introduced on cross_towers_ops_test
 by previous commit.

PiperOrigin-RevId: 214057023
---
 tensorflow/contrib/distribute/python/cross_tower_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops_test.py b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py
index a3e1b96a68..490371477a 100644
--- a/tensorflow/contrib/distribute/python/cross_tower_ops_test.py
+++ b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py
@@ -114,7 +114,7 @@ class CrossTowerOpsTestBase(test.TestCase, parameterized.TestCase):
         self.assertEqual([v.numpy() for v in left._index.values()],
                          list(right._index.values()))
       else:
-        with self.cached_session() as sess:
+        with self.test_session() as sess:
           self.assertEqual(
               sess.run(list(left._index.values())), list(right._index.values()))
 
-- 
GitLab


From adb742eba146478c3cee86d7b366e3faf121f6bd Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 21 Sep 2018 15:50:26 -0700
Subject: [PATCH 0531/1357] Exclude cloud contrib modules in s390x and windows.

Fixes #20502

PiperOrigin-RevId: 214057093
---
 tensorflow/contrib/BUILD | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index e1af52cd96..ae5ca32bcf 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -126,11 +126,16 @@ py_library(
     }) + if_not_windows_cuda([
         "//tensorflow/contrib/fused_conv:fused_conv_py",  # unresolved symbols, need to export more symbols
     ]) + if_not_windows([
-        "//tensorflow/contrib/bigtable",  # depends on bigtable
-        "//tensorflow/contrib/cloud:cloud_py",  # doesn't compile on Windows
-        "//tensorflow/contrib/tensorrt:init_py",  # doesn't compile on windows
-        "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
-    ]),
+    ]) + select({
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": [
+            "//tensorflow/contrib/bigtable",
+            "//tensorflow/contrib/cloud:cloud_py",
+            "//tensorflow/contrib/tensorrt:init_py",
+            "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
+        ],
+    }),
 )
 
 cc_library(
-- 
GitLab


From a444f6a29f4340fc673ce0fc70ceac58dbbf43b9 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Fri, 21 Sep 2018 16:24:04 -0700
Subject: [PATCH 0532/1357] Correctly compute loop bounds in pack_rhs_mem for
 convolutions.

(1) Correctly compute maxCols/maxRows/maxPlanes for the fast path.
(2) Use fast divisors in a hot path.

~8-20% improvement for cuboid convolutions (Conv3D).
~1-5% improvement for spatial convolutions (Conv2D).

PiperOrigin-RevId: 214061787
---
 .../core/kernels/eigen_cuboid_convolution.h   | 367 ++++++++++--------
 .../core/kernels/eigen_spatial_convolutions.h | 268 +++++++------
 2 files changed, 362 insertions(+), 273 deletions(-)

diff --git a/tensorflow/core/kernels/eigen_cuboid_convolution.h b/tensorflow/core/kernels/eigen_cuboid_convolution.h
index 37414ddca3..6a9a2accd8 100644
--- a/tensorflow/core/kernels/eigen_cuboid_convolution.h
+++ b/tensorflow/core/kernels/eigen_cuboid_convolution.h
@@ -113,6 +113,11 @@ class TensorContractionInputMapper<
       m_num_patches = tensor.impl().dimensions()[NumDims - 5];
     }
 
+    // Strides for navigating through the single patch.
+    m_patch_plane_stride = m_patch_depth;
+    m_patch_row_stride = m_patch_planes * m_patch_plane_stride;
+    m_patch_col_stride = m_patch_rows * m_patch_row_stride;
+
     // Strides for the output tensor.
     // IMPORTANT: These strides are used to locate an element in a patch at a
     // depth zero (channel), which is not quite the same as "traditional"
@@ -166,6 +171,13 @@ class TensorContractionInputMapper<
 
     m_fastNumPatches = internal::TensorIntDivisor<Index>(m_num_patches);
 
+    m_fastPatchPlaneStride =
+        internal::TensorIntDivisor<Index>(m_patch_plane_stride);
+    m_fastPatchRowStride =
+        internal::TensorIntDivisor<Index>(m_patch_row_stride);
+    m_fastPatchColStride =
+        internal::TensorIntDivisor<Index>(m_patch_col_stride);
+
     m_fastInputPlaneStride =
         internal::TensorIntDivisor<Index>(m_patch_plane_inflate_strides);
     m_fastInputRowStride =
@@ -195,6 +207,10 @@ class TensorContractionInputMapper<
     m_patch_cols = base_mapper.m_patch_cols;
     m_num_patches = base_mapper.m_num_patches;
 
+    m_patch_plane_stride = base_mapper.m_patch_plane_stride;
+    m_patch_row_stride = base_mapper.m_patch_row_stride;
+    m_patch_col_stride = base_mapper.m_patch_col_stride;
+
     m_rowStride = base_mapper.m_rowStride;
     m_colStride = base_mapper.m_colStride;
     m_patchStride = base_mapper.m_patchStride;
@@ -234,6 +250,9 @@ class TensorContractionInputMapper<
     m_outputPlanesRows = base_mapper.m_outputPlanesRows;
 
     m_fastNumPatches = base_mapper.m_fastNumPatches;
+    m_fastPatchPlaneStride = base_mapper.m_fastPatchPlaneStride;
+    m_fastPatchRowStride = base_mapper.m_fastPatchRowStride;
+    m_fastPatchColStride = base_mapper.m_fastPatchColStride;
     m_fastInputPlaneStride = base_mapper.m_fastInputPlaneStride;
     m_fastInputRowStride = base_mapper.m_fastInputRowStride;
     m_fastInputColStride = base_mapper.m_fastInputColStride;
@@ -305,9 +324,9 @@ class TensorContractionInputMapper<
   }
 
   EIGEN_DEVICE_FUNC
-  EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_patch_depth; }
+  EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_planeInputStride; }
   EIGEN_DEVICE_FUNC
-  EIGEN_ALWAYS_INLINE Index patchPlanes() const { return m_patch_planes; }
+  EIGEN_ALWAYS_INLINE Index patchPlanes() const { return m_rowStride; }
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Index patchRows() const { return m_patch_rows; }
   EIGEN_DEVICE_FUNC
@@ -594,7 +613,12 @@ class TensorContractionInputMapper<
   Index m_patch_cols;    // number of columns in the patch
   Index m_num_patches;   // number of patches to extract
 
-  // Strides for the output tensor.
+  // Strides for navigating through the single patch.
+  Index m_patch_plane_stride;
+  Index m_patch_row_stride;
+  Index m_patch_col_stride;
+
+  // Strides for the output tensor (depth is not the part of the stride).
   Index m_rowStride;
   Index m_colStride;
   Index m_patchStride;
@@ -637,6 +661,10 @@ class TensorContractionInputMapper<
   // Fast representation of various divisors.
   internal::TensorIntDivisor<Index> m_fastNumPatches;
 
+  internal::TensorIntDivisor<Index> m_fastPatchPlaneStride;
+  internal::TensorIntDivisor<Index> m_fastPatchRowStride;
+  internal::TensorIntDivisor<Index> m_fastPatchColStride;
+
   internal::TensorIntDivisor<Index> m_fastInputPlaneStride;
   internal::TensorIntDivisor<Index> m_fastInputRowStride;
   internal::TensorIntDivisor<Index> m_fastInputColStride;
@@ -750,13 +778,62 @@ class TensorContractionSubMapper<
     return m_base_mapper.nonStandardPatches();
   }
 
+  // Max(Col|Row|Plane|Depth): compute the upper limit for the column, row,
+  // plane and depth index respectively that fits into the peeled_k elements
+  // starting at m_depth_offset.
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index maxCol(const Index peeled_k) const {
+    const Index max_col =
+        fastPatchColStride().divide(m_depth_offset + peeled_k);
+    return std::min<Index>(1 + max_col, patchCols());
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index maxRow(const Index peeled_k,
+                                   const Index col) const {
+    const Index max_row = fastPatchRowStride().divide(
+        m_depth_offset + peeled_k - col * patchColStride());
+    return std::min<Index>(1 + max_row, patchRows());
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index maxPlane(const Index peeled_k, const Index col,
+                                     const Index row) const {
+    const Index max_plane = fastPatchPlaneStride().divide(
+        m_depth_offset + peeled_k - col * patchColStride() -
+        row * patchRowStride());
+    return std::min<Index>(1 + max_plane, patchPlanes());
+  }
+
+  // MaxDepth uses only the remaining number of elements in the peeled_k.
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index maxDepth(const Index num_elements,
+                                     const Index start_depth) const {
+    return std::min<Index>(start_depth + num_elements, patchDepth());
+  }
+
+  // Every register matters in this code, so sometimes to prevent register
+  // spilling, instead of the variable that you would expect to see, we use
+  // another one, that is guaranteed to have the same value. E.g. patch depth is
+  // always the same as input depth, and it's also the same as input plane
+  // stride. Bunch of other parameters have similar relations.
+
+  typedef internal::TensorIntDivisor<Index> IndexDivisor;
+
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Index patchDepth() const {
-    return m_base_mapper.m_patch_depth;
+    eigen_assert(m_base_mapper.m_patch_depth ==
+                     m_base_mapper.m_planeInputStride &&
+                 "Patch depth must be equal to plane input stride.");
+    return m_base_mapper.m_planeInputStride;
   }
+
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Index patchPlanes() const {
-    return m_base_mapper.m_patch_planes;
+    eigen_assert(m_base_mapper.m_patch_planes == m_base_mapper.m_rowStride &&
+                 "Patch planes must be equal to row stride.");
+    return m_base_mapper.m_rowStride;
   }
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Index patchRows() const {
@@ -767,6 +844,36 @@ class TensorContractionSubMapper<
     return m_base_mapper.m_patch_cols;
   }
 
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index patchPlaneStride() const {
+    eigen_assert(patchDepth() == m_base_mapper.m_patch_plane_stride &&
+                 "Patch depth must be equal to patch plane stride.");
+    return patchDepth();
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index patchRowStride() const {
+    return m_base_mapper.m_patch_row_stride;
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index patchColStride() const {
+    return m_base_mapper.m_patch_col_stride;
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE IndexDivisor fastPatchPlaneStride() const {
+    eigen_assert(patchDepth() == m_base_mapper.m_patch_plane_stride &&
+                 "Patch depth must be equal to patch plane stride.");
+    return m_base_mapper.m_fastDimZero;  // patch_depth
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE IndexDivisor fastPatchRowStride() const {
+    return m_base_mapper.m_fastPatchRowStride;
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE IndexDivisor fastPatchColStride() const {
+    return m_base_mapper.m_fastPatchColStride;
+  }
+
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth,
                                              const Index baseIndex) const {
@@ -832,8 +939,7 @@ class TensorContractionSubMapper<
 
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Index depthOffset() const {
-    const Index patchOffset = m_depth_offset % m_base_mapper.patchDepth();
-    return patchOffset;
+    return m_depth_offset % patchDepth();
   }
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper
@@ -859,14 +965,14 @@ class TensorContractionSubMapper<
 // matrix" constructed from extracted volume patches) in contiguous memory.
 //
 // Given column major input (A0 beside A1 in memory):
-// A0 B0 C0 D0 E0 F0 G0 H0 ... Z0
-// A1 B1 C1 D1 E1 F1 G1 H1 ... Z1
-// A2 B2 C2 D2 E2 F2 G2 H2 ... Z2
-// A3 B3 C3 D3 E3 F3 G3 H3 ... Z3
-// A4 B4 C4 D4 E4 F4 G4 H4 ... Z4
-// A5 B5 C5 D5 E5 F5 G5 H5 ... Z5
-// A6 B6 C6 D6 E6 F6 G6 H6 ... Z6
-// A7 B7 C7 D7 E7 F7 G7 H7 ... Z7
+// A0 B0 C0 D0  E0 F0 G0 H0 ... Z0
+// A1 B1 C1 D1  E1 F1 G1 H1 ... Z1
+// A2 B2 C2 D2  E2 F2 G2 H2 ... Z2
+// A3 B3 C3 D3  E3 F3 G3 H3 ... Z3
+// A4 B4 C4 D4  E4 F4 G4 H4 ... Z4
+// A5 B5 C5 D5  E5 F5 G5 H5 ... Z5
+// A6 B6 C6 D6  E6 F6 G6 H6 ... Z6
+// A7 B7 C7 D7  E7 F7 G7 H7 ... Z7
 // A8 ...
 // ...
 //
@@ -910,7 +1016,11 @@ struct gemm_pack_rhs<
       nocontract_t, contract_t, packet_size, inner_dim_contiguous,
       inner_dim_reordered, Alignment>
       SubMapper;
+
   typedef SubMapper DataMapper;
+  typedef typename packet_traits<Scalar>::type Packet;
+
+  EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
 
   EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
@@ -919,9 +1029,6 @@ struct gemm_pack_rhs<
     eigen_assert(stride == 0);
     eigen_assert(offset == 0);
 
-    EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    typedef typename packet_traits<Scalar>::type Packet;
-
     const Index packet_cols4 = (cols / 4) * 4;
     const Index peeled_k = (depth / packet_size) * packet_size;
     const bool non_standard_patches = rhs.nonStandardPatches();
@@ -934,81 +1041,58 @@ struct gemm_pack_rhs<
 
       Index k = 0;
       if ((packet_size % 4) == 0 && !non_standard_patches) {
-        const Index patch_depth = rhs.patchDepth();
-
-        if ((patch_depth % packet_size) == 0) {
-          const Index patch_cols = rhs.patchCols();
-          const Index patch_rows = rhs.patchRows();
-          const Index patch_planes = rhs.patchPlanes();
-
-          const Index startCol = rhs.colOffset();
-          const Index max_cols = std::min<Index>(
-              Eigen::divup(peeled_k, patch_rows * patch_planes * patch_depth) +
-                  startCol,
-              patch_cols);
-
-          for (Index c = startCol; c < max_cols; ++c) {
-            eigen_assert(k < peeled_k);
-
-            const Index startRow = (c == startCol) ? rhs.rowOffset() : 0;
-            const Index max_rows = std::min<Index>(
-                Eigen::divup(
-                    peeled_k - c * patch_rows * patch_planes * patch_depth,
-                    patch_planes * patch_depth) +
-                    startRow,
-                patch_rows);
+        // FAST PATH:
+        // Iterate over patch columns, rows and planes if we know that a single
+        // packet do not span across multiple planes, rows or columns.
+        if ((rhs.patchDepth() % packet_size) == 0) {
+          const Index start_col = rhs.colOffset();
+          const Index max_col = rhs.maxCol(peeled_k);
+
+          for (Index c = start_col; c < max_col; ++c) {
+            eigen_assert(k <= peeled_k);
+
+            const Index start_row = (c == start_col) ? rhs.rowOffset() : 0;
+            const Index max_row = rhs.maxRow(peeled_k, c);
 
             const bool pad_col0 = dm0.padCol(c);
             const bool pad_col1 = dm1.padCol(c);
             const bool pad_col2 = dm2.padCol(c);
             const bool pad_col3 = dm3.padCol(c);
 
-            for (Index r = startRow; r < max_rows; ++r) {
-              eigen_assert(k < peeled_k);
+            for (Index r = start_row; r < max_row; ++r) {
+              eigen_assert(k <= peeled_k);
 
-              const Index startPlane =
-                  ((c == startCol) && (r == startRow)) ? rhs.planeOffset() : 0;
-              const Index max_planes = std::min<Index>(
-                  Eigen::divup(
-                      peeled_k -
-                          c * patch_rows * patch_planes * patch_depth -  // col
-                          r * patch_planes * patch_depth,                // row
-                      patch_depth) +
-                      startPlane,
-                  patch_planes);
+              const Index start_plane = ((c == start_col) && (r == start_row))
+                                            ? rhs.planeOffset()
+                                            : 0;
+              const Index max_plane = rhs.maxPlane(peeled_k, c, r);
 
-              const bool pad_row0 = dm0.padRow(r);
-              const bool pad_row1 = dm1.padRow(r);
-              const bool pad_row2 = dm2.padRow(r);
-              const bool pad_row3 = dm3.padRow(r);
+              const bool pad_row0 = pad_col0 || dm0.padRow(r);
+              const bool pad_row1 = pad_col1 || dm1.padRow(r);
+              const bool pad_row2 = pad_col2 || dm2.padRow(r);
+              const bool pad_row3 = pad_col3 || dm3.padRow(r);
 
-              for (Index p = startPlane; p < max_planes; ++p) {
-                eigen_assert(k < peeled_k);
+              for (Index p = start_plane; p < max_plane; ++p) {
+                eigen_assert(k <= peeled_k);
 
-                const bool pad0 = pad_col0 || pad_row0 || dm0.padPlane(p);
-                const bool pad1 = pad_col1 || pad_row1 || dm1.padPlane(p);
-                const bool pad2 = pad_col2 || pad_row2 || dm2.padPlane(p);
-                const bool pad3 = pad_col3 || pad_row3 || dm3.padPlane(p);
+                const bool pad0 = pad_row0 || dm0.padPlane(p);
+                const bool pad1 = pad_row1 || dm1.padPlane(p);
+                const bool pad2 = pad_row2 || dm2.padPlane(p);
+                const bool pad3 = pad_row3 || dm3.padPlane(p);
 
                 const Index idx0 = dm0.baseIndex(p, r, c);
                 const Index idx1 = dm1.baseIndex(p, r, c);
                 const Index idx2 = dm2.baseIndex(p, r, c);
                 const Index idx3 = dm3.baseIndex(p, r, c);
 
-                const Index startDepth =
-                    ((c == startCol) && (r == startRow) && (p == startPlane))
+                const Index start_depth =
+                    ((c == start_col) && (r == start_row) && (p == start_plane))
                         ? rhs.depthOffset()
                         : 0;
-                const Index max_depth = std::min<Index>(
-                    peeled_k -
-                        c * patch_rows * patch_planes * patch_depth -  // col
-                        r * patch_planes * patch_depth -               // row
-                        p * patch_depth +                              // plane
-                        startDepth,
-                    patch_depth);
-                eigen_assert((max_depth - startDepth) % packet_size == 0);
-
-                for (Index d = startDepth; d < max_depth; d += packet_size) {
+                const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
+                eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+                for (Index d = start_depth; d < max_depth; d += packet_size) {
                   eigen_assert(k < peeled_k);
                   PacketBlock<Packet, 4> kernel;
                   kernel.packet[0] = pad0 ? pset1<Packet>(Scalar(0))
@@ -1031,20 +1115,12 @@ struct gemm_pack_rhs<
             }
           }
 
-          for (; k < peeled_k; k += packet_size) {
-            PacketBlock<Packet, 4> kernel;
-            kernel.packet[0] = dm0.loadPacketFast(k);
-            kernel.packet[1] = dm1.loadPacketFast(k);
-            kernel.packet[2] = dm2.loadPacketFast(k);
-            kernel.packet[3] = dm3.loadPacketFast(k);
-            ptranspose(kernel);
-            pstoreu(block + 0 * packet_size, kernel.packet[0]);
-            pstoreu(block + 1 * packet_size, kernel.packet[1]);
-            pstoreu(block + 2 * packet_size, kernel.packet[2]);
-            pstoreu(block + 3 * packet_size, kernel.packet[3]);
-            block += 4 * packet_size;
-          }
+          // The loop above should fill peeled_k elements.
+          eigen_assert(peeled_k == k);
+
         } else {
+          // Packet can span multiple planes, rows or columns, so we have to go
+          // though the slower "standard" path.
           for (; k < peeled_k; k += packet_size) {
             PacketBlock<Packet, 4> kernel;
             kernel.packet[0] = dm0.loadPacketStandard(k);
@@ -1060,7 +1136,9 @@ struct gemm_pack_rhs<
           }
         }
       }
-      if (!rhs.nonStandardPatches()) {
+
+      // Copy the remaining coefficients of the column block after the peeled_k.
+      if (!non_standard_patches) {
         for (; k < depth; k++) {
           block[0] = dm0.loadCoeffStandard(k);
           block[1] = dm1.loadCoeffStandard(k);
@@ -1079,7 +1157,7 @@ struct gemm_pack_rhs<
       }
     }
 
-    // copy the remaining columns one at a time (nr==1)
+    // Copy the remaining columns one at a time (nr==1).
     for (Index j2 = packet_cols4; j2 < cols; ++j2) {
       const SubMapper dm0 = rhs.getLinearMapper(0, j2);
       for (Index k = 0; k < depth; k++) {
@@ -1118,6 +1196,9 @@ struct gemm_pack_rhs<
       inner_dim_reordered, Alignment>
       SubMapper;
   typedef SubMapper DataMapper;
+  typedef typename packet_traits<Scalar>::type Packet;
+
+  EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
 
   EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
@@ -1126,9 +1207,6 @@ struct gemm_pack_rhs<
     eigen_assert(stride == 0);
     eigen_assert(offset == 0);
 
-    EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    typedef typename packet_traits<Scalar>::type Packet;
-
     const int packet_size = 2;
 
     const Index packet_cols4 = (cols / 4) * 4;
@@ -1143,56 +1221,39 @@ struct gemm_pack_rhs<
 
       Index k = 0;
       if (!non_standard_patches) {
-        const Index patch_depth = rhs.patchDepth();
-
-        if ((patch_depth % packet_size) == 0) {
-          const Index patch_cols = rhs.patchCols();
-          const Index patch_rows = rhs.patchRows();
-          const Index patch_planes = rhs.patchPlanes();
-
-          const Index startCol = rhs.colOffset();
-          const Index max_cols = std::min<Index>(
-              Eigen::divup(peeled_k, patch_rows * patch_planes * patch_depth) +
-                  startCol,
-              patch_cols);
-
-          for (Index c = startCol; c < max_cols; ++c) {
-            eigen_assert(k < peeled_k);
-
-            const Index startRow = (c == startCol) ? rhs.rowOffset() : 0;
-            const Index max_rows = std::min<Index>(
-                Eigen::divup(
-                    peeled_k - c * patch_rows * patch_planes * patch_depth,
-                    patch_planes * patch_depth) +
-                    startRow,
-                patch_rows);
+        // FAST PATH:
+        // Iterate over patch columns, rows and planes if we know that a single
+        // packet do not span across multiple planes, rows or columns.
+        if ((rhs.patchDepth() % packet_size) == 0) {
+          const Index start_col = rhs.colOffset();
+          const Index max_col = rhs.maxCol(peeled_k);
+
+          for (Index c = start_col; c < max_col; ++c) {
+            eigen_assert(k <= peeled_k);
+
+            const Index start_row = (c == start_col) ? rhs.rowOffset() : 0;
+            const Index max_row = rhs.maxRow(peeled_k, c);
 
             const bool pad_col0 = dm0.padCol(c);
             const bool pad_col1 = dm1.padCol(c);
             const bool pad_col2 = dm2.padCol(c);
             const bool pad_col3 = dm3.padCol(c);
 
-            for (Index r = startRow; r < max_rows; ++r) {
-              eigen_assert(k < peeled_k);
+            for (Index r = start_row; r < max_row; ++r) {
+              eigen_assert(k <= peeled_k);
 
-              const Index startPlane =
-                  ((c == startCol) && (r == startRow)) ? rhs.planeOffset() : 0;
-              const Index max_planes = std::min<Index>(
-                  Eigen::divup(
-                      peeled_k -
-                          c * patch_rows * patch_planes * patch_depth -  // col
-                          r * patch_planes * patch_depth,                // row
-                      patch_depth) +
-                      startPlane,
-                  patch_planes);
+              const Index start_plane = ((c == start_col) && (r == start_row))
+                                            ? rhs.planeOffset()
+                                            : 0;
+              const Index max_plane = rhs.maxPlane(peeled_k, c, r);
 
               const bool pad_row0 = dm0.padRow(r);
               const bool pad_row1 = dm1.padRow(r);
               const bool pad_row2 = dm2.padRow(r);
               const bool pad_row3 = dm3.padRow(r);
 
-              for (Index p = startPlane; p < max_planes; ++p) {
-                eigen_assert(k < peeled_k);
+              for (Index p = start_plane; p < max_plane; ++p) {
+                eigen_assert(k <= peeled_k);
 
                 const bool pad0 = pad_col0 || pad_row0 || dm0.padPlane(p);
                 const bool pad1 = pad_col1 || pad_row1 || dm1.padPlane(p);
@@ -1204,20 +1265,14 @@ struct gemm_pack_rhs<
                 const Index idx2 = dm2.baseIndex(p, r, c);
                 const Index idx3 = dm3.baseIndex(p, r, c);
 
-                const Index startDepth =
-                    ((c == startCol) && (r == startRow) && (p == startPlane))
+                const Index start_depth =
+                    ((c == start_col) && (r == start_row) && (p == start_plane))
                         ? rhs.depthOffset()
                         : 0;
-                const Index max_depth = std::min<Index>(
-                    peeled_k -
-                        c * patch_rows * patch_planes * patch_depth -  // col
-                        r * patch_planes * patch_depth -               // row
-                        p * patch_depth +                              // plane
-                        startDepth,
-                    patch_depth);
-                eigen_assert((max_depth - startDepth) % packet_size == 0);
-
-                for (Index d = startDepth; d < max_depth; d += packet_size) {
+                const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
+                eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+                for (Index d = start_depth; d < max_depth; d += packet_size) {
                   eigen_assert(k < peeled_k);
                   PacketBlock<Packet, 2> kernel0;
                   PacketBlock<Packet, 2> kernel1;
@@ -1242,21 +1297,9 @@ struct gemm_pack_rhs<
             }
           }
 
-          for (; k < peeled_k; k += packet_size) {
-            PacketBlock<Packet, 2> kernel0;
-            PacketBlock<Packet, 2> kernel1;
-            kernel0.packet[0] = dm0.loadPacketFast(k);
-            kernel0.packet[1] = dm1.loadPacketFast(k);
-            kernel1.packet[0] = dm2.loadPacketFast(k);
-            kernel1.packet[1] = dm3.loadPacketFast(k);
-            ptranspose(kernel0);
-            ptranspose(kernel1);
-            pstoreu(block + 0 * packet_size, kernel0.packet[0]);
-            pstoreu(block + 1 * packet_size, kernel1.packet[0]);
-            pstoreu(block + 2 * packet_size, kernel0.packet[1]);
-            pstoreu(block + 3 * packet_size, kernel1.packet[1]);
-            block += 4 * packet_size;
-          }
+          // The loop above should fill peeled_k elements.
+          eigen_assert(peeled_k == k);
+
         } else {
           for (; k < peeled_k; k += packet_size) {
             PacketBlock<Packet, 2> kernel0;
@@ -1275,6 +1318,8 @@ struct gemm_pack_rhs<
           }
         }
       }
+
+      // Copy the remaining coefficients of the column block after the peeled_k.
       if (!rhs.nonStandardPatches()) {
         for (; k < depth; k++) {
           block[0] = dm0.loadCoeffStandard(k);
@@ -1294,7 +1339,7 @@ struct gemm_pack_rhs<
       }
     }
 
-    // copy the remaining columns one at a time (nr==1)
+    // Copy the remaining columns one at a time (nr==1).
     for (Index j2 = packet_cols4; j2 < cols; ++j2) {
       const SubMapper dm0 = rhs.getLinearMapper(0, j2);
       for (Index k = 0; k < depth; k++) {
@@ -1333,6 +1378,8 @@ struct gemm_pack_rhs<
       SubMapper;
   typedef SubMapper DataMapper;
 
+  EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
   EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
                                     Index depth, Index cols, Index stride = 0,
@@ -1340,8 +1387,6 @@ struct gemm_pack_rhs<
     eigen_assert(stride == 0);
     eigen_assert(offset == 0);
 
-    EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
     const Index packet_cols4 = (cols / 4) * 4;
 
     for (Index j2 = 0; j2 < packet_cols4; j2 += 4) {
@@ -1369,7 +1414,7 @@ struct gemm_pack_rhs<
       }
     }
 
-    // copy the remaining columns one at a time (nr==1)
+    // Copy the remaining columns one at a time (nr==1).
     for (Index j2 = packet_cols4; j2 < cols; ++j2) {
       const SubMapper dm0 = rhs.getLinearMapper(0, j2);
       for (Index k = 0; k < depth; k++) {
diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h
index 8bd362db45..e926d73f87 100644
--- a/tensorflow/core/kernels/eigen_spatial_convolutions.h
+++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h
@@ -105,12 +105,17 @@ class TensorContractionInputMapper<
       m_patch_cols = tensor.impl().dimensions()[2];
       m_num_patches = tensor.impl().dimensions()[3];
     } else {
-      const int NumDims = tensor.impl().dimensions().size();
+      const size_t NumDims = tensor.impl().dimensions().size();
       patch_depth = tensor.impl().dimensions()[NumDims - 1];
       patch_rows = tensor.impl().dimensions()[NumDims - 2];
       m_patch_cols = tensor.impl().dimensions()[NumDims - 3];
       m_num_patches = tensor.impl().dimensions()[NumDims - 4];
     }
+
+    // Strides for navigating through the single patch.
+    m_patch_row_stride = patch_depth;
+    m_patch_col_stride = patch_rows * m_patch_row_stride;
+
     m_patch_row_inflate_strides = tensor.impl().rowInflateStride();
     m_patch_col_inflate_strides = tensor.impl().colInflateStride();
 
@@ -139,6 +144,10 @@ class TensorContractionInputMapper<
     m_rowPaddingTop = tensor.impl().rowPaddingTop();
     m_colPaddingLeft = tensor.impl().colPaddingLeft();
 
+    m_fastPatchRowStride =
+        internal::TensorIntDivisor<Index>(m_patch_row_stride);
+    m_fastPatchColStride =
+        internal::TensorIntDivisor<Index>(m_patch_col_stride);
     m_fastInputRowStride =
         internal::TensorIntDivisor<Index>(m_patch_row_inflate_strides);
     m_fastInputColStride =
@@ -154,6 +163,10 @@ class TensorContractionInputMapper<
       : m_impl(base_mapper.m_impl) {
     m_patch_cols = base_mapper.m_patch_cols;
     m_num_patches = base_mapper.m_num_patches;
+
+    m_patch_row_stride = base_mapper.m_patch_row_stride;
+    m_patch_col_stride = base_mapper.m_patch_col_stride;
+
     m_patch_row_inflate_strides = base_mapper.m_patch_row_inflate_strides;
     m_patch_col_inflate_strides = base_mapper.m_patch_col_inflate_strides;
 
@@ -176,6 +189,8 @@ class TensorContractionInputMapper<
     m_rowPaddingTop = base_mapper.m_rowPaddingTop;
     m_colPaddingLeft = base_mapper.m_colPaddingLeft;
 
+    m_fastPatchRowStride = base_mapper.m_fastPatchRowStride;
+    m_fastPatchColStride = base_mapper.m_fastPatchColStride;
     m_fastInputRowStride = base_mapper.m_fastInputRowStride;
     m_fastInputColStride = base_mapper.m_fastInputColStride;
     m_fastNumPatches = base_mapper.m_fastNumPatches;
@@ -450,8 +465,15 @@ class TensorContractionInputMapper<
     rowIndex = rowIndex * m_row_strides - m_rowPaddingTop;
   }
 
-  Index m_patch_cols;                 // number of colums in the patch
-  Index m_num_patches;                // number of patches to extract.
+  Index m_patch_cols;   // number of columns in the patch
+  Index m_num_patches;  // number of patches to extract.
+
+  // Strides for navigating through the single patch.
+  Index m_patch_row_stride;
+  Index m_patch_col_stride;
+  internal::TensorIntDivisor<Index> m_fastPatchRowStride;
+  internal::TensorIntDivisor<Index> m_fastPatchColStride;
+
   Index m_patch_row_inflate_strides;  // the strides for row inflation in the
                                       // image patch
   Index m_patch_col_inflate_strides;  // the strides for col inflation in the
@@ -585,6 +607,40 @@ class TensorContractionSubMapper<
     return m_base_mapper.nonStandardPatches();
   }
 
+  // Max(Col|Row|Depth): compute the upper limit for the column, row and depth
+  // index respectively that fits into the peeled_k elements starting at
+  // m_depth_offset.
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index maxCol(const Index peeled_k) const {
+    const Index max_col =
+        fastPatchColStride().divide(m_depth_offset + peeled_k);
+    return std::min<Index>(1 + max_col, patchCols());
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index maxRow(const Index peeled_k,
+                                   const Index col) const {
+    const Index max_row = fastPatchRowStride().divide(
+        m_depth_offset + peeled_k - col * patchColStride());
+    return std::min<Index>(1 + max_row, patchRows());
+  }
+
+  // MaxDepth uses only the remaining number of elements in the peeled_k.
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index maxDepth(const Index num_elements,
+                                     const Index start_depth) const {
+    return std::min<Index>(start_depth + num_elements, patchDepth());
+  }
+
+  // Every register matters in this code, so sometimes to prevent register
+  // spilling, instead of the variable that you would expect to see, we use
+  // another one, that is guaranteed to have the same value. E.g. patch depth is
+  // always the same as input depth, and it's also the same as input row stride.
+  // Bunch of other parameters have similar relations.
+
+  typedef internal::TensorIntDivisor<Index> IndexDivisor;
+
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Index patchDepth() const {
     return m_base_mapper.m_rowInputStride;
@@ -598,6 +654,28 @@ class TensorContractionSubMapper<
     return m_base_mapper.m_patch_cols;
   }
 
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index patchRowStride() const {
+    eigen_assert(patchDepth() == m_base_mapper.m_patch_row_stride &&
+                 "Patch depth must be equal to patch row stride.");
+    return patchDepth();
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Index patchColStride() const {
+    return m_base_mapper.m_patch_col_stride;
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE IndexDivisor fastPatchRowStride() const {
+    eigen_assert(patchDepth() == m_base_mapper.m_patch_row_stride &&
+                 "Patch depth must be equal to patch row stride.");
+    return m_base_mapper.m_fastDimZero;  // patch_depth
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE IndexDivisor fastPatchColStride() const {
+    return m_base_mapper.m_fastPatchColStride;
+  }
+
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth,
                                              const Index baseIndex) const {
@@ -639,8 +717,7 @@ class TensorContractionSubMapper<
 
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Index depthOffset() const {
-    const Index patchOffset = m_depth_offset % m_base_mapper.patchDepth();
-    return patchOffset;
+    return m_depth_offset % patchDepth();
   }
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper
@@ -665,14 +742,14 @@ class TensorContractionSubMapper<
 // matrix" constructed from extracted image patches) in contiguous memory.
 //
 // Given column major input (A0 beside A1 in memory):
-// A0 B0 C0 D0 E0 F0 G0 H0 ... Z0
-// A1 B1 C1 D1 E1 F1 G1 H1 ... Z1
-// A2 B2 C2 D2 E2 F2 G2 H2 ... Z2
-// A3 B3 C3 D3 E3 F3 G3 H3 ... Z3
-// A4 B4 C4 D4 E4 F4 G4 H4 ... Z4
-// A5 B5 C5 D5 E5 F5 G5 H5 ... Z5
-// A6 B6 C6 D6 E6 F6 G6 H6 ... Z6
-// A7 B7 C7 D7 E7 F7 G7 H7 ... Z7
+// A0 B0 C0 D0  E0 F0 G0 H0 ... Z0
+// A1 B1 C1 D1  E1 F1 G1 H1 ... Z1
+// A2 B2 C2 D2  E2 F2 G2 H2 ... Z2
+// A3 B3 C3 D3  E3 F3 G3 H3 ... Z3
+// A4 B4 C4 D4  E4 F4 G4 H4 ... Z4
+// A5 B5 C5 D5  E5 F5 G5 H5 ... Z5
+// A6 B6 C6 D6  E6 F6 G6 H6 ... Z6
+// A7 B7 C7 D7  E7 F7 G7 H7 ... Z7
 // A8 ...
 // ...
 //
@@ -717,9 +794,9 @@ struct gemm_pack_rhs<
       inner_dim_reordered, Alignment>
       SubMapper;
   typedef SubMapper DataMapper;
+  typedef typename packet_traits<Scalar>::type Packet;
 
-  EIGEN_DEVICE_FUNC
-  static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; }
+  EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
 
   EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
@@ -728,9 +805,6 @@ struct gemm_pack_rhs<
     eigen_assert(stride == 0);
     eigen_assert(offset == 0);
 
-    EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    typedef typename packet_traits<Scalar>::type Packet;
-
     const Index packet_cols4 = (cols / 4) * 4;
     const Index peeled_k = (depth / packet_size) * packet_size;
     const bool non_standard_patches = rhs.nonStandardPatches();
@@ -743,30 +817,27 @@ struct gemm_pack_rhs<
 
       Index k = 0;
       if ((packet_size % 4) == 0 && !non_standard_patches) {
-        const Index patch_depth = rhs.patchDepth();
-        if ((patch_depth % packet_size) == 0) {
-          const Index patch_cols = rhs.patchCols();
-          const Index patch_rows = rhs.patchRows();
-
-          const Index startCol = rhs.colOffset();
-          const Index max_cols = std::min<Index>(
-              ceil_div(peeled_k, patch_rows * patch_depth) + startCol,
-              patch_cols);
-
-          for (Index c = startCol; c < max_cols; ++c) {
-            eigen_assert(k < peeled_k);
-            const Index startRow = (c == startCol) ? rhs.rowOffset() : 0;
-            const Index max_rows = std::min<Index>(
-                ceil_div(peeled_k - c * patch_rows * patch_depth, patch_depth) +
-                    startRow,
-                patch_rows);
+        // FAST PATH:
+        // Iterate over patch columns and rows, if we know that a single
+        // packet do not span across multiple rows or columns.
+        if ((rhs.patchDepth() % packet_size) == 0) {
+          const Index start_col = rhs.colOffset();
+          const Index max_col = rhs.maxCol(peeled_k);
+
+          for (Index c = start_col; c < max_col; ++c) {
+            eigen_assert(k <= peeled_k);
+
+            const Index start_row = (c == start_col) ? rhs.rowOffset() : 0;
+            const Index max_row = rhs.maxRow(peeled_k, c);
 
             const bool pad_col0 = dm0.padCol(c);
             const bool pad_col1 = dm1.padCol(c);
             const bool pad_col2 = dm2.padCol(c);
             const bool pad_col3 = dm3.padCol(c);
-            for (Index r = startRow; r < max_rows; ++r) {
-              eigen_assert(k < peeled_k);
+
+            for (Index r = start_row; r < max_row; ++r) {
+              eigen_assert(k <= peeled_k);
+
               const bool pad0 = pad_col0 || dm0.padRow(r);
               const bool pad1 = pad_col1 || dm1.padRow(r);
               const bool pad2 = pad_col2 || dm2.padRow(r);
@@ -777,14 +848,13 @@ struct gemm_pack_rhs<
               const Index idx2 = dm2.baseIndex(r, c);
               const Index idx3 = dm3.baseIndex(r, c);
 
-              const Index startDepth =
-                  ((c == startCol) && (r == startRow)) ? rhs.depthOffset() : 0;
-              const Index max_depth =
-                  std::min<Index>(peeled_k - c * patch_rows * patch_depth -
-                                      r * patch_depth + startDepth,
-                                  patch_depth);
-              eigen_assert((max_depth - startDepth) % packet_size == 0);
-              for (Index d = startDepth; d < max_depth; d += packet_size) {
+              const Index start_depth = ((c == start_col) && (r == start_row))
+                                            ? rhs.depthOffset()
+                                            : 0;
+              const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
+              eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+              for (Index d = start_depth; d < max_depth; d += packet_size) {
                 eigen_assert(k < peeled_k);
                 PacketBlock<Packet, 4> kernel;
                 kernel.packet[0] = pad0 ? pset1<Packet>(Scalar(0))
@@ -806,19 +876,9 @@ struct gemm_pack_rhs<
             }
           }
 
-          for (; k < peeled_k; k += packet_size) {
-            PacketBlock<Packet, 4> kernel;
-            kernel.packet[0] = dm0.loadPacketFast(k);
-            kernel.packet[1] = dm1.loadPacketFast(k);
-            kernel.packet[2] = dm2.loadPacketFast(k);
-            kernel.packet[3] = dm3.loadPacketFast(k);
-            ptranspose(kernel);
-            pstoreu(block + 0 * packet_size, kernel.packet[0]);
-            pstoreu(block + 1 * packet_size, kernel.packet[1]);
-            pstoreu(block + 2 * packet_size, kernel.packet[2]);
-            pstoreu(block + 3 * packet_size, kernel.packet[3]);
-            block += 4 * packet_size;
-          }
+          // The loop above should fill peeled_k elements.
+          eigen_assert(peeled_k == k);
+
         } else {
           for (; k < peeled_k; k += packet_size) {
             PacketBlock<Packet, 4> kernel;
@@ -835,6 +895,8 @@ struct gemm_pack_rhs<
           }
         }
       }
+
+      // Copy the remaining coefficients of the column block after the peeled_k.
       if (!rhs.nonStandardPatches()) {
         for (; k < depth; k++) {
           block[0] = dm0.loadCoeffStandard(k);
@@ -892,9 +954,9 @@ struct gemm_pack_rhs<
       Alignment>
       SubMapper;
   typedef SubMapper DataMapper;
+  typedef typename packet_traits<Scalar>::type Packet;
 
-  EIGEN_DEVICE_FUNC
-  static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; }
+  EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
 
   EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
@@ -903,9 +965,6 @@ struct gemm_pack_rhs<
     eigen_assert(stride == 0);
     eigen_assert(offset == 0);
 
-    EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    typedef typename packet_traits<Scalar>::type Packet;
-
     const int packet_size = 2;
     const Index packet_cols4 = (cols / 4) * 4;
     const Index peeled_k = (depth / packet_size) * packet_size;
@@ -919,30 +978,27 @@ struct gemm_pack_rhs<
 
       Index k = 0;
       if (!non_standard_patches) {
-        const Index patch_depth = rhs.patchDepth();
-        if ((patch_depth % packet_size) == 0) {
-          const Index patch_cols = rhs.patchCols();
-          const Index patch_rows = rhs.patchRows();
-
-          const Index startCol = rhs.colOffset();
-          const Index max_cols = std::min<Index>(
-              ceil_div(peeled_k, patch_rows * patch_depth) + startCol,
-              patch_cols);
-
-          for (Index c = startCol; c < max_cols; ++c) {
-            eigen_assert(k < peeled_k);
-            const Index startRow = (c == startCol) ? rhs.rowOffset() : 0;
-            const Index max_rows = std::min<Index>(
-                ceil_div(peeled_k - c * patch_rows * patch_depth, patch_depth) +
-                    startRow,
-                patch_rows);
+        // FAST PATH:
+        // Iterate over patch columns and rows if we know that a single
+        // packet do not span across multiple rows or columns.
+        if ((rhs.patchDepth() % packet_size) == 0) {
+          const Index start_col = rhs.colOffset();
+          const Index max_col = rhs.maxCol(peeled_k);
+
+          for (Index c = start_col; c < max_col; ++c) {
+            eigen_assert(k <= peeled_k);
+
+            const Index start_row = (c == start_col) ? rhs.rowOffset() : 0;
+            const Index max_row = rhs.maxRow(peeled_k, c);
 
             const bool pad_col0 = dm0.padCol(c);
             const bool pad_col1 = dm1.padCol(c);
             const bool pad_col2 = dm2.padCol(c);
             const bool pad_col3 = dm3.padCol(c);
-            for (Index r = startRow; r < max_rows; ++r) {
-              eigen_assert(k < peeled_k);
+
+            for (Index r = start_row; r < max_row; ++r) {
+              eigen_assert(k <= peeled_k);
+
               const bool pad0 = pad_col0 || dm0.padRow(r);
               const bool pad1 = pad_col1 || dm1.padRow(r);
               const bool pad2 = pad_col2 || dm2.padRow(r);
@@ -953,14 +1009,13 @@ struct gemm_pack_rhs<
               const Index idx2 = dm2.baseIndex(r, c);
               const Index idx3 = dm3.baseIndex(r, c);
 
-              const Index startDepth =
-                  ((c == startCol) && (r == startRow)) ? rhs.depthOffset() : 0;
-              const Index max_depth =
-                  std::min<Index>(peeled_k - c * patch_rows * patch_depth -
-                                      r * patch_depth + startDepth,
-                                  patch_depth);
-              eigen_assert((max_depth - startDepth) % packet_size == 0);
-              for (Index d = startDepth; d < max_depth; d += packet_size) {
+              const Index start_depth = ((c == start_col) && (r == start_row))
+                                            ? rhs.depthOffset()
+                                            : 0;
+              const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
+              eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+              for (Index d = start_depth; d < max_depth; d += packet_size) {
                 eigen_assert(k < peeled_k);
                 PacketBlock<Packet, 2> kernel0;
                 PacketBlock<Packet, 2> kernel1;
@@ -984,22 +1039,12 @@ struct gemm_pack_rhs<
             }
           }
 
-          for (; k < peeled_k; k += packet_size) {
-            PacketBlock<Packet, 2> kernel0;
-            PacketBlock<Packet, 2> kernel1;
-            kernel0.packet[0] = dm0.loadPacketFast(k);
-            kernel0.packet[1] = dm1.loadPacketFast(k);
-            kernel1.packet[0] = dm2.loadPacketFast(k);
-            kernel1.packet[1] = dm3.loadPacketFast(k);
-            ptranspose(kernel0);
-            ptranspose(kernel1);
-            pstoreu(block + 0 * packet_size, kernel0.packet[0]);
-            pstoreu(block + 1 * packet_size, kernel1.packet[0]);
-            pstoreu(block + 2 * packet_size, kernel0.packet[1]);
-            pstoreu(block + 3 * packet_size, kernel1.packet[1]);
-            block += 4 * packet_size;
-          }
+          // The loop above should fill peeled_k elements.
+          eigen_assert(peeled_k == k);
+
         } else {
+          // Packet can span multiple rows or columns, so we have to go
+          // though the slower "standard" path.
           for (; k < peeled_k; k += packet_size) {
             PacketBlock<Packet, 2> kernel0;
             PacketBlock<Packet, 2> kernel1;
@@ -1017,7 +1062,9 @@ struct gemm_pack_rhs<
           }
         }
       }
-      if (!rhs.nonStandardPatches()) {
+
+      // Copy the remaining coefficients of the column block after the peeled_k.
+      if (!non_standard_patches) {
         for (; k < depth; k++) {
           block[0] = dm0.loadCoeffStandard(k);
           block[1] = dm1.loadCoeffStandard(k);
@@ -1036,7 +1083,7 @@ struct gemm_pack_rhs<
       }
     }
 
-    // copy the remaining columns one at a time (nr==1)
+    // Copy the remaining columns one at a time (nr==1).
     for (Index j2 = packet_cols4; j2 < cols; ++j2) {
       const SubMapper dm0 = rhs.getLinearMapper(0, j2);
       for (Index k = 0; k < depth; k++) {
@@ -1074,8 +1121,7 @@ struct gemm_pack_rhs<
       SubMapper;
   typedef SubMapper DataMapper;
 
-  EIGEN_DEVICE_FUNC
-  static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; }
+  EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
 
   EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
@@ -1084,8 +1130,6 @@ struct gemm_pack_rhs<
     eigen_assert(stride == 0);
     eigen_assert(offset == 0);
 
-    EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
     const Index packet_cols4 = (cols / 4) * 4;
 
     for (Index j2 = 0; j2 < packet_cols4; j2 += 4) {
@@ -1113,7 +1157,7 @@ struct gemm_pack_rhs<
       }
     }
 
-    // copy the remaining columns one at a time (nr==1)
+    // Copy the remaining columns one at a time (nr==1).
     for (Index j2 = packet_cols4; j2 < cols; ++j2) {
       const SubMapper dm0 = rhs.getLinearMapper(0, j2);
       for (Index k = 0; k < depth; k++) {
-- 
GitLab


From 514814057e03dcc9389f58e29187898ce7f3a44e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 16:28:01 -0700
Subject: [PATCH 0533/1357] Make 8bit reduce sum op handler rescaling

PiperOrigin-RevId: 214062241
---
 .../internal/reference/reference_ops.h        | 41 ++++++++++-----
 tensorflow/contrib/lite/kernels/reduce.cc     | 52 ++++++++++++++++---
 .../contrib/lite/kernels/reduce_test.cc       | 12 +++++
 3 files changed, 84 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index bb1d30b216..5bfa3bd084 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -4661,12 +4661,15 @@ inline void Mean(const T* input_data, const Dims<4>& input_dims,
 // It does so in two stages, first calculates the sum of elements along the axis
 // then divides it by the number of element in axis for quantized values.
 template <typename T, typename U>
-inline bool Mean(const T* input_data, int32 input_zero_point, float input_scale,
-                 const int* input_dims, const int input_num_dims,
-                 T* output_data, int32 output_zero_point, float output_scale,
-                 const int* output_dims, const int output_num_dims,
-                 const int* axis, const int num_axis_dimensions, bool keep_dims,
-                 int* temp_index, int* resolved_axis, U* temp_sum) {
+inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
+                               float input_scale, const int* input_dims,
+                               const int input_num_dims, T* output_data,
+                               int32 output_zero_point, float output_scale,
+                               const int* output_dims,
+                               const int output_num_dims, const int* axis,
+                               const int num_axis_dimensions, bool keep_dims,
+                               int* temp_index, int* resolved_axis, U* temp_sum,
+                               bool compute_sum) {
   // Reset output data.
   size_t num_outputs = 1;
   for (int idx = 0; idx < output_num_dims; ++idx) {
@@ -4708,14 +4711,24 @@ inline bool Mean(const T* input_data, int32 input_zero_point, float input_scale,
 
   if (num_elements_in_axis > 0) {
     const float scale = input_scale / output_scale;
-    const float bias = -input_zero_point * scale;
-    for (size_t idx = 0; idx < num_outputs; ++idx) {
-      float float_mean = static_cast<float>(temp_sum[idx]) /
-                         static_cast<float>(num_elements_in_axis);
-
-      // Convert to float value.
-      output_data[idx] =
-          static_cast<T>(round(float_mean * scale + bias)) + output_zero_point;
+    if (compute_sum) {
+      // TODO(b/116341117): Eliminate float and do this completely in 8bit.
+      const float bias = -input_zero_point * scale * num_elements_in_axis + 0.5;
+      for (size_t idx = 0; idx < num_outputs; ++idx) {
+        const U value = static_cast<U>(round(temp_sum[idx] * scale + bias)) +
+                        output_zero_point;
+        output_data[idx] = static_cast<T>(value);
+      }
+    } else {
+      const float bias = -input_zero_point * scale + 0.5;
+      for (size_t idx = 0; idx < num_outputs; ++idx) {
+        float float_mean = static_cast<float>(temp_sum[idx]) /
+                           static_cast<float>(num_elements_in_axis);
+
+        // Convert to float value.
+        output_data[idx] = static_cast<T>(round(float_mean * scale + bias)) +
+                           output_zero_point;
+      }
     }
   }
   return true;
diff --git a/tensorflow/contrib/lite/kernels/reduce.cc b/tensorflow/contrib/lite/kernels/reduce.cc
index d94d821e87..4732a37a65 100644
--- a/tensorflow/contrib/lite/kernels/reduce.cc
+++ b/tensorflow/contrib/lite/kernels/reduce.cc
@@ -215,7 +215,7 @@ TfLiteStatus PrepareAny(TfLiteContext* context, TfLiteNode* node) {
   return PrepareSimple(context, node);
 }
 
-TfLiteStatus PrepareMean(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
 
   // reduce_mean requires a buffer to store intermediate sum result.
@@ -274,7 +274,7 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
         } else {
           TF_LITE_ENSURE(
               context,
-              reference_ops::Mean<>(
+              reference_ops::QuantizedMeanOrSum<>(
                   GetTensorData<uint8_t>(op_context.input),
                   op_context.input->params.zero_point,
                   op_context.input->params.scale, op_context.input->dims->data,
@@ -286,7 +286,7 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
                   GetTensorData<int>(op_context.axis), num_axis,
                   op_context.params->keep_dims, GetTensorData<int>(temp_index),
                   GetTensorData<int>(resolved_axis),
-                  GetTensorData<int>(temp_sum)));
+                  GetTensorData<int>(temp_sum), /*compute_sum=*/false));
         }
         break;
       default:
@@ -416,19 +416,57 @@ TfLiteStatus EvalGeneric(TfLiteContext* context, TfLiteNode* node) {
   }
 }
 
+TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) {
+  OpContext op_context(context, node);
+  const auto& input = op_context.input;
+  const auto& output = op_context.output;
+  if (input->type != kTfLiteUInt8 ||
+      (input->params.scale == output->params.scale &&
+       input->params.zero_point == output->params.zero_point)) {
+    return EvalGeneric<kReference, kSum>(context, node);
+  } else {
+    // Rescaling 8bit reduce sum.
+    int num_axis = static_cast<int>(NumElements(op_context.axis));
+    TfLiteTensor* temp_index = GetTemporary(context, node, /*index=*/0);
+    TfLiteTensor* resolved_axis = GetTemporary(context, node, /*index=*/1);
+    TfLiteTensor* temp_sum = GetTemporary(context, node, /*index=*/2);
+    // Resize the output tensor if the output tensor is dynamic.
+    if (IsDynamicTensor(op_context.output)) {
+      TF_LITE_ENSURE_OK(context,
+                        ResizeTempAxis(context, &op_context, resolved_axis));
+      TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context));
+      TF_LITE_ENSURE_OK(context, ResizeTempSum(context, &op_context, temp_sum));
+    }
+
+    TF_LITE_ENSURE(
+        context,
+        reference_ops::QuantizedMeanOrSum<>(
+            GetTensorData<uint8_t>(op_context.input),
+            op_context.input->params.zero_point, op_context.input->params.scale,
+            op_context.input->dims->data, op_context.input->dims->size,
+            GetTensorData<uint8_t>(op_context.output),
+            op_context.output->params.zero_point,
+            op_context.output->params.scale, op_context.output->dims->data,
+            op_context.output->dims->size, GetTensorData<int>(op_context.axis),
+            num_axis, op_context.params->keep_dims,
+            GetTensorData<int>(temp_index), GetTensorData<int>(resolved_axis),
+            GetTensorData<int32>(temp_sum), /*compute_sum=*/true));
+  }
+
+  return kTfLiteOk;
+}
 }  // namespace reduce
 
 TfLiteRegistration* Register_MEAN_REF() {
   static TfLiteRegistration r = {reduce::Init, reduce::Free,
-                                 reduce::PrepareMean,
+                                 reduce::PrepareMeanOrSum,
                                  reduce::EvalMean<reduce::kReference>};
   return &r;
 }
 
 TfLiteRegistration* Register_SUM_REF() {
-  static TfLiteRegistration r = {
-      reduce::Init, reduce::Free, reduce::PrepareSimple,
-      reduce::EvalGeneric<reduce::kReference, reduce::kSum>};
+  static TfLiteRegistration r = {reduce::Init, reduce::Free,
+                                 reduce::PrepareMeanOrSum, reduce::EvalSum};
   return &r;
 }
 
diff --git a/tensorflow/contrib/lite/kernels/reduce_test.cc b/tensorflow/contrib/lite/kernels/reduce_test.cc
index 6d289b14d8..fb2ec58ab2 100644
--- a/tensorflow/contrib/lite/kernels/reduce_test.cc
+++ b/tensorflow/contrib/lite/kernels/reduce_test.cc
@@ -488,6 +488,18 @@ TEST(ConstUint8SumOpTest, NotKeepDims) {
                   ArrayFloatNear({-0.823529, -0.815686}, kQuantizedTolerance)));
 }
 
+TEST(ConstUint8SumOpTest, NotKeepDimsRescaling) {
+  float kQuantizedTolerance = GetTolerance(0.0, 2.0);
+  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  SumOpConstModel m({TensorType_UINT8, {1, 3, 2}, 0.0, 1.0},
+                    {TensorType_UINT8, {2}, 0.0, 2.0}, {1}, {1}, false);
+  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
+  EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
+                                            {1.2, 1.2}, kQuantizedTolerance)));
+}
+
 TEST(ConstUint8SumOpTest, KeepDims) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
   std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-- 
GitLab


From f32c678543fcee2950e7ac6a84022e929df3acd7 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Fri, 21 Sep 2018 16:49:19 -0700
Subject: [PATCH 0534/1357] Include xla.compile library in tensorflow pip
 package

PiperOrigin-RevId: 214065176
---
 tensorflow/tools/pip_package/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index f86cb03995..12354a6ab2 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -62,6 +62,7 @@ COMMON_PIP_DEPS = [
     "//tensorflow/contrib/autograph:autograph",
     "//tensorflow/contrib/boosted_trees:boosted_trees_pip",
     "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
+    "//tensorflow/contrib/compiler:xla",
     "//tensorflow/contrib/constrained_optimization:constrained_optimization_pip",
     "//tensorflow/contrib/data/python/kernel_tests/serialization:dataset_serialization_test_base",
     "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base",
-- 
GitLab


From 305a392904e6981e935c2a3514394379ba7083b1 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 21 Sep 2018 16:56:18 -0700
Subject: [PATCH 0535/1357] Prototype for the functions-not-sessions
 implementation.

PiperOrigin-RevId: 214065999
---
 tensorflow/python/eager/BUILD                |  28 +++
 tensorflow/python/eager/def_function.py      | 235 +++++++++++++++++++
 tensorflow/python/eager/def_function_test.py |  87 +++++++
 3 files changed, 350 insertions(+)
 create mode 100644 tensorflow/python/eager/def_function.py
 create mode 100644 tensorflow/python/eager/def_function_test.py

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index a2686c68a9..f571da308e 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -46,6 +46,7 @@ py_library(
         ":backprop",
         ":context",
         ":core",
+        ":def_function",
         ":execute",
         ":function",
         ":graph_only_ops",
@@ -380,3 +381,30 @@ cuda_py_test(
         "optonly",  # The test is too slow in non-opt mode
     ],
 )
+
+py_library(
+    name = "def_function",
+    srcs = ["def_function.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":context",
+        ":function",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/training/checkpointable:base",
+    ],
+)
+
+py_test(
+    name = "def_function_test",
+    srcs = ["def_function_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":def_function",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:framework_ops",
+    ],
+)
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
new file mode 100644
index 0000000000..8dcacd5c99
--- /dev/null
+++ b/tensorflow/python/eager/def_function.py
@@ -0,0 +1,235 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=unidiomatic-typecheck
+"""Prototype decorator for defining graph-mode functions with eager semantics."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training.checkpointable import base as checkpointable
+
+
+class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
+  """Variable which does not lift its initializer out of function context.
+
+  Instances of this variable, when created, build a graph which runs their
+  initializer inside a tf.cond(is_initialized) block.
+
+  This can only be created inside a defun called from (eventually) eager
+  mode. That is, non-function-building graphs are not supported.
+  """
+
+  def __init__(self,  # pylint: disable=super-init-not-called
+               initial_value=None,
+               trainable=True,
+               caching_device=None,
+               name=None,
+               dtype=None,
+               constraint=None,
+               **unused_kwargs):
+    """Creates a variable.
+
+    Args:
+      initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
+        which is the initial value for the Variable. The initial value must have
+        a shape specified unless `validate_shape` is set to False. Can also be a
+        callable with no argument that returns the initial value when called.
+        (Note that initializer functions from init_ops.py must first be bound
+         to a shape before being used here.)
+      trainable: If `True`, GradientTapes automatically watch uses of this
+        Variable.
+      caching_device: Optional device string or function describing where the
+        Variable should be cached for reading.  Defaults to the Variable's
+        device.  If not `None`, caches on another device.  Typical use is to
+        cache on the device where the Ops using the Variable reside, to
+        deduplicate copying through `Switch` and other conditional statements.
+      name: Optional name for the variable. Defaults to `'Variable'` and gets
+        uniquified automatically.
+      dtype: If set, initial_value will be converted to the given type.
+        If None, either the datatype will be kept (if initial_value is
+       a Tensor) or float32 will be used (if it is a Python object convertible
+       to a Tensor).
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
+
+    Raises:
+      ValueError: If the initial value is not specified, or does not have a
+        shape and `validate_shape` is `True`.
+      RuntimeError: If called outside of a function definition.
+    """
+    if context.executing_eagerly():
+      raise RuntimeError(
+          "UnliftedInitializerVariable should not be created "
+          "outside of functions.")
+    with ops.init_scope():
+      if not context.executing_eagerly():
+        raise RuntimeError(
+            "UnliftedInitializerVariable does not support legacy graph mode.")
+    self._in_graph_mode = False
+    if initial_value is None:
+      raise ValueError("initial_value must be specified.")
+    init_from_fn = callable(initial_value)
+
+    if constraint is not None and not callable(constraint):
+      raise ValueError("The `constraint` argument must be a callable.")
+
+    if isinstance(initial_value, checkpointable.CheckpointInitialValue):
+      self._maybe_initialize_checkpointable()
+      self._update_uid = initial_value.checkpoint_position.restore_uid
+      initial_value = initial_value.wrapped_value
+
+    self._trainable = trainable
+    self._save_slice_info = None
+    self._initial_value = None
+    self._initializer_op = None
+    self._is_initialized_op = None
+    self._graph_element = None
+    self._cached_value = None
+    # Store the graph key so optimizers know how to only retrieve variables from
+    # this graph. Guaranteed to be the same as the eager graph_key.
+    self._graph_key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+    with ops.name_scope(name, "Variable", []
+                        if init_from_fn else [initial_value]) as name:
+      # pylint: disable=protected-access
+      with ops.init_scope():
+        assert context.executing_eagerly()
+        shared_name = ops._name_from_scope_name(name)
+        shared_name = "%s_%d" % (shared_name, ops.uid())
+      # Use attr_scope and device(None) to simulate the behavior of
+      # colocate_with when the variable we want to colocate with doesn't
+      # yet exist.
+      with ops.name_scope("Initializer"), ops.device(None):
+        initial_value = ops.convert_to_tensor(
+            initial_value() if init_from_fn else initial_value,
+            name="initial_value", dtype=dtype)
+      with ops.init_scope():
+        self._handle = resource_variable_ops.eager_safe_variable_handle(
+            shape=initial_value.get_shape(),
+            dtype=initial_value.dtype.base_dtype,
+            shared_name=shared_name,
+            name=name,
+            graph_mode=False)
+      self._shape = initial_value.shape
+      self._unique_id = shared_name
+      self._handle_name = shared_name + ":0"
+      self._dtype = initial_value.dtype.base_dtype
+      self._constraint = constraint
+      assert initial_value is not None
+      def assign_fn():
+        with ops.name_scope("Assign") as n, ops.colocate_with(self._handle):
+          resource_variable_ops.assign_variable_op(
+              self._handle,
+              initial_value,
+              name=n)
+        # Returning values to keep tf.cond happy.
+        return ops.convert_to_tensor(1)
+      def not_assign_fn():
+        return ops.convert_to_tensor(0)
+      # Note: this cond is always guaranteed to run because we're inside a defun
+      # which will insert automatic control dependencies.
+      control_flow_ops.cond(
+          resource_variable_ops.var_is_initialized_op(self._handle),
+          not_assign_fn, assign_fn)
+
+    # After the handle has been created, set up a way to clean it up when
+    # executing eagerly. We'll hold the only reference to the deleter, so that
+    # when this object is garbage collected the deleter will be too. This
+    # means ResourceVariables can be part of reference cycles without those
+    # cycles being uncollectable.
+    self._handle_deleter = resource_variable_ops.EagerResourceDeleter(
+        handle=self._handle, handle_device=self._handle.device)
+    self._cached_shape_as_list = None
+
+
+def _defun_with_scope(scope, fn):
+
+  def wrapped_fn(*args, **kwds):
+    with variable_scope.variable_creator_scope(scope):
+      return fn(*args, **kwds)
+
+  return function.defun(wrapped_fn)
+
+
+def def_function(fn):
+  """Defines a function as per the "functions, not sessions" document."""
+
+  # Wrapping the values in lists to bypass python's lack of way to mutate
+  # symbols from an outer scope.
+  first_call = [True]
+  function_to_call = []
+
+  # TODO(apassos) represent this as an object and not as a closure.
+  def decorated_fn(*args, **kwds):
+    """Graph function for fn."""
+    if not first_call[0]:
+      return function_to_call[0](*args, **kwds)
+
+    first_call[0] = False
+    created_variables = []
+
+    def variable_creator_scope(unused_next_creator, **kwds):
+      """Creates UnliftedInitializerVariables and saves references to them."""
+      v = UnliftedInitializerVariable(**kwds)
+      created_variables.append(v)
+      return v
+
+    first_graph_function = _defun_with_scope(variable_creator_scope, fn)
+
+    # Force the definition of the function for these arguments
+    first_concrete = first_graph_function.get_concrete_function(*args, **kwds)
+
+    def invalid_creator_scope(*unused_args, **unused_kwds):
+      """Disables variable creation."""
+      raise ValueError(
+          "def_function-decorated function tried to create "
+          "variables on second call.")
+
+    second_graph_function = _defun_with_scope(invalid_creator_scope, fn)
+
+    function_to_call.append(second_graph_function)
+    if not created_variables:
+      # Note: this retracing might be unnecessary, but running the function
+      # forever in the scope which disallows variable creation is safer than not
+      # doing so.
+      return second_graph_function(*args, **kwds)
+
+    def fn_with_cond(*inner_args, **inner_kwds):
+      """Conditionally runs initialization if it's needed."""
+      condition = True
+      for variable in created_variables:
+        condition = condition and resource_variable_ops.var_is_initialized_op(
+            variable.handle)
+      # We want to call second_graph_function if possible because it avoids
+      # recomputing potentially expensive initializers.
+      return control_flow_ops.cond(
+          condition,
+          lambda: second_graph_function(*inner_args, **inner_kwds),
+          lambda: first_concrete(*inner_args, **inner_kwds))
+
+    return function.defun(fn_with_cond)(*args, **kwds)
+
+  return decorated_fn
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
new file mode 100644
index 0000000000..804436c4bb
--- /dev/null
+++ b/tensorflow/python/eager/def_function_test.py
@@ -0,0 +1,87 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class DefFunctionTest(test.TestCase):
+
+  def testNoVariables(self):
+
+    @def_function.def_function
+    def fn(x):
+      return 2 * x
+
+    self.assertAllEqual(fn(constant_op.constant(4.0)), 8.0)
+
+  def testFailIfVariablesAreCreatedMoreThanOnce(self):
+
+    @def_function.def_function
+    def fn(x):
+      return variables.Variable(1.0) + x
+
+    with self.assertRaises(ValueError):
+      fn(1.0)
+
+  def testFailIfVariablesAreCreatedMoreThanOnceNoWeakRef(self):
+    state = []
+
+    @def_function.def_function
+    def fn(x):
+      state.append(variables.Variable(1.0))
+      return state[-1] + x
+
+    with self.assertRaises(ValueError):
+      fn(1.0)
+
+  def testCorrectVariableCreation(self):
+
+    state = []
+
+    @def_function.def_function
+    def fn(x):
+      if not state:
+        state.append(variables.Variable(2.0))
+      return state[0] * x
+
+    self.assertAllEqual(fn(constant_op.constant(1.0)), 2.0)
+    self.assertAllEqual(fn(constant_op.constant(3.0)), 6.0)
+
+  def testVariableInitializerNotConstant(self):
+
+    state = []
+
+    @def_function.def_function
+    def fn(x):
+      if not state:
+        state.append(variables.Variable(2.0 * x))
+      return state[0] * x
+
+    self.assertAllEqual(fn(constant_op.constant(1.0)), 2.0)
+    self.assertAllEqual(fn(constant_op.constant(3.0)), 6.0)
+
+
+if __name__ == '__main__':
+  ops.enable_eager_execution()
+  test.main()
-- 
GitLab


From 0e220a53b7c4b9ad6dd14a3bfa0ab52f6105b7cf Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 21 Sep 2018 17:12:04 -0700
Subject: [PATCH 0536/1357] [TF nest] Better error messages showing full
 structures on assert_same_structure

PiperOrigin-RevId: 214067946
---
 tensorflow/python/util/nest.py      | 22 +++++++++++++++++++++-
 tensorflow/python/util/nest_test.py |  6 +++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 2968ca9c07..653ca525dc 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -118,6 +118,18 @@ flatten = _pywrap_tensorflow.Flatten
 _same_namedtuples = _pywrap_tensorflow.SameNamedtuples
 
 
+class _DotString(object):
+
+  def __str__(self):
+    return "."
+
+  def __repr__(self):
+    return "."
+
+
+_DOT = _DotString()
+
+
 def assert_same_structure(nest1, nest2, check_types=True):
   """Asserts that two structures are nested in the same way.
 
@@ -149,7 +161,15 @@ def assert_same_structure(nest1, nest2, check_types=True):
     TypeError: If the two structures differ in the type of sequence in any of
       their substructures. Only possible if `check_types` is `True`.
   """
-  _pywrap_tensorflow.AssertSameStructure(nest1, nest2, check_types)
+  try:
+    _pywrap_tensorflow.AssertSameStructure(nest1, nest2, check_types)
+  except (ValueError, TypeError) as e:
+    str1 = str(map_structure(lambda _: _DOT, nest1))
+    str2 = str(map_structure(lambda _: _DOT, nest2))
+    raise type(e)("%s\n"
+                  "Entire first structure:\n%s\n"
+                  "Entire second structure:\n%s"
+                  % (str(e), str1, str2))
 
 
 def flatten_dict_items(dictionary):
diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py
index ef503137d1..bfb4c6f910 100644
--- a/tensorflow/python/util/nest_test.py
+++ b/tensorflow/python/util/nest_test.py
@@ -264,7 +264,11 @@ class NestTest(parameterized.TestCase, test.TestCase):
          "Second structure:.*\n\n"
          "More specifically: Substructure "
          r'"type=tuple str=\(\(1, 2\), 3\)" is a sequence, while '
-         'substructure "type=str str=spam" is not')):
+         'substructure "type=str str=spam" is not\n'
+         "Entire first structure:\n"
+         r"\(\(\(\., \.\), \.\), \., \(\., \.\)\)\n"
+         "Entire second structure:\n"
+         r"\(\., \.\)")):
       nest.assert_same_structure(structure1, structure_different_num_elements)
 
     with self.assertRaisesRegexp(
-- 
GitLab


From 8469e314dae2c177c116bd17e38991c9a32bf418 Mon Sep 17 00:00:00 2001
From: Mingxing Tan <tanmingxing@google.com>
Date: Fri, 21 Sep 2018 17:58:20 -0700
Subject: [PATCH 0537/1357] Release MnasNet models.

PiperOrigin-RevId: 214072562
---
 tensorflow/contrib/lite/g3doc/models.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tensorflow/contrib/lite/g3doc/models.md b/tensorflow/contrib/lite/g3doc/models.md
index a4267eee4c..279764ce96 100644
--- a/tensorflow/contrib/lite/g3doc/models.md
+++ b/tensorflow/contrib/lite/g3doc/models.md
@@ -1,6 +1,23 @@
 
 # List of Hosted Models
 
+# AutoML mobile image classification models (Float Models)
+
+Model Name          | Paper_Model_Files | Model_Size | Top-1 Accuracy | Top-5 Accuracy | TF Lite Performance^
+------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | ---------: | -------------: | -------------: | ---------------------:
+MnasNet_0.50_224| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_0.5_224_09_07_2018.tgz) | 8.5 Mb    | 68.03%          | 87.79%          | 37 ms
+MnasNet_0.75_224| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_0.75_224_09_07_2018.tgz) | 12 Mb     | 71.72%          | 90.17%          | 61 ms
+MnasNet_1.0_224| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_1.0_224_09_07_2018.tgz) | 17 Mb     | 74.08%          | 91.75%          | 93 ms
+MnasNet_1.3_224| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_1.3_224_09_07_2018.tgz) | 24 Mb     | 75.24%          | 92.55%          | 152 ms
+MnasNet_1.0_96| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_1.0_96_09_07_2018.tgz) | 17 Mb    | 62.33%          | 83.98%          | 23 ms
+MnasNet_1.0_128| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_1.0_128_09_07_2018.tgz) | 17 Mb    | 67.32%          | 87.70%          | 34 ms
+MnasNet_1.0_160| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_1.0_160_09_07_2018.tgz) | 17 Mb    | 70.63%          | 89.58%          | 51 ms
+MnasNet_1.0_192| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_1.0_192_09_07_2018.tgz) | 17 Mb    | 72.56%          | 90.76%          | 70 ms
+MnasNet_1.0_224| [paper](https://arxiv.org/abs/1807.11626), [tflite&pb](https://storage.cloud.google.com/download.tensorflow.org/models/tflite/mnasnet_1.0_224_09_07_2018.tgz) | 17 Mb    | 74.08%          | 91.75%          | 93 ms
+
+^ Performance numbers are generated on Pixel-1 using single thread large BIG core.
+
+
 ## Image classification (Float Models)
 
 Model Name            | Paper_Model_Files^                                                                                                                                                                        | Model_Size | Top-1 Accuracy | Top-5 Accuracy | TF Lite Performance^^ | Tensorflow Performance
-- 
GitLab


From d125fb8a39bb4fca1be5421130ed66d673ee590f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 18:03:48 -0700
Subject: [PATCH 0538/1357] Always add layer annotations, regardless of mode.

PiperOrigin-RevId: 214073179
---
 .../estimator/dnn_with_layer_annotations.py   | 79 +++++++++----------
 1 file changed, 37 insertions(+), 42 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 152431d1b2..3fd9f12c61 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -24,7 +24,6 @@ import pickle
 from google.protobuf.any_pb2 import Any
 
 from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import dnn
 from tensorflow.python.feature_column import feature_column as feature_column_lib
 from tensorflow.python.framework import ops
@@ -68,7 +67,7 @@ def _to_any_wrapped_tensor_info(tensor):
   return any_buf
 
 
-def make_input_layer_with_layer_annotations(original_input_layer, mode):
+def make_input_layer_with_layer_annotations(original_input_layer):
   """Make an input_layer replacement function that adds layer annotations."""
 
   def input_layer_with_layer_annotations(features,
@@ -137,42 +136,38 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode):
     if cols_to_output_tensors is not None:
       cols_to_output_tensors = local_cols_to_output_tensors
 
-    if mode and mode == model_fn.ModeKeys.PREDICT:
-      # Only annotate in PREDICT mode.
-
-      # Annotate features.
-      # These are the parsed Tensors, before embedding.
-
-      # Only annotate features used by FeatureColumns.
-      # We figure which ones are used by FeatureColumns by creating a parsing
-      # spec and looking at the keys.
-      spec = feature_column_lib.make_parse_example_spec(feature_columns)
-      for key in spec.keys():
-        tensor = features[key]
-        ops.add_to_collection(
-            LayerAnnotationsCollectionNames.keys(
-                LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
-        ops.add_to_collection(
-            LayerAnnotationsCollectionNames.values(
-                LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
-            _to_any_wrapped_tensor_info(tensor))
-
-      # Annotate feature columns.
-      for column in feature_columns:
-        # TODO(cyfoo): Find a better way to serialize and deserialize
-        # _FeatureColumn.
-        ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
-                              serialize_feature_column(column))
-
-      for column, tensor in local_cols_to_output_tensors.items():
-        ops.add_to_collection(
-            LayerAnnotationsCollectionNames.keys(
-                LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
-            column.name)
-        ops.add_to_collection(
-            LayerAnnotationsCollectionNames.values(
-                LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
-            _to_any_wrapped_tensor_info(tensor))
+    # Annotate features.
+    # These are the parsed Tensors, before embedding.
+
+    # Only annotate features used by FeatureColumns.
+    # We figure which ones are used by FeatureColumns by creating a parsing
+    # spec and looking at the keys.
+    spec = feature_column_lib.make_parse_example_spec(feature_columns)
+    for key in spec.keys():
+      tensor = ops.convert_to_tensor(features[key])
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.keys(
+              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.values(
+              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
+          _to_any_wrapped_tensor_info(tensor))
+
+    # Annotate feature columns.
+    for column in feature_columns:
+      # TODO(cyfoo): Find a better way to serialize and deserialize
+      # _FeatureColumn.
+      ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
+                            serialize_feature_column(column))
+
+    for column, tensor in local_cols_to_output_tensors.items():
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.keys(
+              LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name)
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.values(
+              LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
+          _to_any_wrapped_tensor_info(tensor))
 
     return input_layer
 
@@ -302,8 +297,8 @@ def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
         feature_column_lib, 'input_layer',
-        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
-                                                mode)):
+        make_input_layer_with_layer_annotations(
+            feature_column_lib.input_layer)):
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
@@ -423,8 +418,8 @@ def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
         feature_column_lib, 'input_layer',
-        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
-                                                mode)):
+        make_input_layer_with_layer_annotations(
+            feature_column_lib.input_layer)):
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
-- 
GitLab


From 086183579a59e07fc9b1ebbfa6516258da0a215b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 18:25:02 -0700
Subject: [PATCH 0539/1357] Create a GRPC service library to enable reuse in
 other parts of the code base.

PiperOrigin-RevId: 214074684
---
 tensorflow/compiler/xla/rpc/BUILD                | 12 ++++++++++--
 tensorflow/compiler/xla/rpc/grpc_service_main.cc | 11 ++++++++---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/rpc/BUILD b/tensorflow/compiler/xla/rpc/BUILD
index 97fcd37f6b..aa8da04489 100644
--- a/tensorflow/compiler/xla/rpc/BUILD
+++ b/tensorflow/compiler/xla/rpc/BUILD
@@ -34,8 +34,8 @@ cc_library(
     ],
 )
 
-tf_cc_binary(
-    name = "grpc_service_main_cpu",
+cc_library(
+    name = "grpc_service_main_library",
     srcs = ["grpc_service_main.cc"],
     deps = [
         ":grpc_service",
@@ -47,6 +47,14 @@ tf_cc_binary(
     ],
 )
 
+tf_cc_binary(
+    name = "grpc_service_main_cpu",
+    deps = [
+        ":grpc_service_main_library",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+    ],
+)
+
 tf_cc_test(
     name = "grpc_client_test",
     srcs = ["grpc_client_test.cc"],
diff --git a/tensorflow/compiler/xla/rpc/grpc_service_main.cc b/tensorflow/compiler/xla/rpc/grpc_service_main.cc
index d6b5149a24..fb54d39a2a 100644
--- a/tensorflow/compiler/xla/rpc/grpc_service_main.cc
+++ b/tensorflow/compiler/xla/rpc/grpc_service_main.cc
@@ -29,8 +29,12 @@ namespace {
 
 int RealMain(int argc, char** argv) {
   int32 port = 1685;
+  bool any_address = false;
   std::vector<tensorflow::Flag> flag_list = {
-      tensorflow::Flag("port", &port, "port to listen on"),
+      tensorflow::Flag("port", &port, "The TCP port to listen on"),
+      tensorflow::Flag(
+          "any", &any_address,
+          "Whether to listen to any host address or simply localhost"),
   };
   string usage = tensorflow::Flags::Usage(argv[0], flag_list);
   bool parsed_values_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
@@ -44,15 +48,16 @@ int RealMain(int argc, char** argv) {
       xla::GRPCService::NewService().ConsumeValueOrDie();
 
   ::grpc::ServerBuilder builder;
-  string server_address(absl::StrFormat("localhost:%d", port));
+  string server_address(
+      absl::StrFormat("%s:%d", any_address ? "[::]" : "localhost", port));
 
+  builder.SetMaxReceiveMessageSize(INT_MAX);
   builder.AddListeningPort(server_address, ::grpc::InsecureServerCredentials());
   builder.RegisterService(service.get());
   std::unique_ptr<::grpc::Server> server(builder.BuildAndStart());
 
   LOG(INFO) << "Server listening on " << server_address;
   server->Wait();
-
   return 0;
 }
 
-- 
GitLab


From 1cb8940078f6be9313899734e1307a69fffc4b6f Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Fri, 21 Sep 2018 18:40:52 -0700
Subject: [PATCH 0540/1357] Move winograd algorithm workaround to stream
 executor.

PiperOrigin-RevId: 214075796
---
 .../gpu/cudnn_convolution_algorithm_picker.cc | 48 +++-------------
 tensorflow/stream_executor/cuda/cuda_dnn.cc   | 57 +++++++++++++++++++
 2 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index f528e62b17..9eee9ebbd7 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -76,54 +76,23 @@ StatusOr<se::DeviceMemory<uint8>> ScratchAllocator::AllocateBytes(
   return se::DeviceMemory<uint8>(buffer_addr);
 }
 
-// Determines whether we can safely perform a winograd non-fused convolution for
-// the given input and output shapes.  This works around b/68264959, an integer
-// overflow in cuDNNv5 and cuDNNv6.
-bool ShouldIncludeWinogradNonfusedAlgo(const Shape& input_shape,
-                                       const Shape& output_shape,
-                                       const ConvolutionDimensionNumbers& dnums,
-                                       se::StreamExecutor* stream_exec) {
-  // Skip this check for cudnn7 and newer.
-  auto version = stream_exec->AsDnn()->GetVersion();
-  if (version.ok() && version.ValueOrDie().major_version() >= 7) {
-    return true;
-  }
-
-  int64 batch = input_shape.dimensions(dnums.input_batch_dimension());
-  int64 in_depths = input_shape.dimensions(dnums.input_feature_dimension());
-  int64 in_rows = input_shape.dimensions(dnums.input_spatial_dimensions(0));
-  int64 in_cols =
-      dnums.input_spatial_dimensions_size() == 1
-          ? 1
-          : input_shape.dimensions(dnums.input_spatial_dimensions(1));
-  int64 out_depths = output_shape.dimensions(dnums.output_feature_dimension());
-
-  int64 total_size = CeilOfRatio(batch, int64{16}) *
-                     std::max(in_depths, out_depths) * in_cols * in_rows *
-                     sizeof(float);
-
-  const int64 threshold = 1L << 31;
-  return total_size < threshold;
-}
-
 std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind,
-                                         bool with_winograd_nonfused,
                                          se::StreamExecutor* stream_exec) {
   std::vector<AlgorithmDesc> algorithms;
+  bool succ = false;
   switch (kind) {
     case CudnnConvKind::kBackwardFilter:
-      CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms(
-          with_winograd_nonfused, &algorithms));
+      succ =
+          stream_exec->GetConvolveBackwardFilterAlgorithms(true, &algorithms);
       break;
     case CudnnConvKind::kBackwardInput:
-      CHECK(stream_exec->GetConvolveBackwardDataAlgorithms(
-          with_winograd_nonfused, &algorithms));
+      succ = stream_exec->GetConvolveBackwardDataAlgorithms(true, &algorithms);
       break;
     case CudnnConvKind::kForward:
-      CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused,
-                                               &algorithms));
+      succ = stream_exec->GetConvolveAlgorithms(true, &algorithms);
       break;
   }
+  DCHECK(succ);
 
   return algorithms;
 }
@@ -282,8 +251,6 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
     }
   }();
 
-  const bool use_winograd_nonfused = ShouldIncludeWinogradNonfusedAlgo(
-      input_shape, output_shape, *params.dnums, stream_exec_);
   se::dnn::ProfileResult best_result;
   int64 best_result_bytes_used = 0;
 
@@ -292,8 +259,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
   // particular reason to use it, as any algorithm sufficies. It doesn't make
   // this algorithm considered correct, though.
   optional<AlgorithmDesc> first_algorithm;
-  for (const AlgorithmDesc& alg :
-       GetAlgorithms(params.kind, use_winograd_nonfused, stream_exec_)) {
+  for (const AlgorithmDesc& alg : GetAlgorithms(params.kind, stream_exec_)) {
     ScratchAllocator scratch_allocator(device_ordinal, allocator);
     se::dnn::ProfileResult profile_result;
     VLOG(3) << "Trying algorithm " << AlgorithmToString(alg) << " for "
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 3a77ba769c..ca90c383f9 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/env.h"
 #include "tensorflow/stream_executor/lib/error.h"
 #include "tensorflow/stream_executor/lib/initialize.h"
+#include "tensorflow/stream_executor/lib/mathutil.h"
 #include "tensorflow/stream_executor/lib/strcat.h"
 #include "tensorflow/stream_executor/lib/stringpiece.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
@@ -2406,6 +2407,33 @@ cudnnDataType_t GetRnnComputeType(dnn::DataType data_type) {
   }
 }
 
+// Determines whether we can safely perform a winograd non-fused convolution for
+// the given input and output shapes.  This works around b/68264959, an integer
+// overflow in cuDNNv5 and cuDNNv6.
+#if CUDNN_VERSION >= 7000
+bool ShouldIncludeWinogradNonfusedAlgo(const dnn::BatchDescriptor&,
+                                       const dnn::BatchDescriptor&) {
+  return true;
+}
+#else
+bool ShouldIncludeWinogradNonfusedAlgo(
+    const dnn::BatchDescriptor& input_desc,
+    const dnn::BatchDescriptor& output_desc) {
+  int64 batch = input_desc.count();
+  int64 in_depths = input_desc.feature_map_count();
+  int64 in_rows = input_desc.height();
+  int64 in_cols = input_desc.ndims() == 1 ? 1 : input_desc.width();
+  int64 out_depths = output_desc.feature_map_count();
+
+  int64 total_size = port::MathUtil::CeilOfRatio(batch, int64{16}) *
+                     std::max(in_depths, out_depths) * in_cols * in_rows *
+                     sizeof(float);
+
+  const int64 threshold = 1L << 31;
+  return total_size < threshold;
+}
+#endif
+
 }  // namespace
 
 template <class T>
@@ -2484,6 +2512,13 @@ port::Status CudnnSupport::DoConvolveImpl(
     return port::Status::OK();
   }());
 
+  if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+      !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
+    return port::Status(port::error::FAILED_PRECONDITION,
+                        "This configuration has potential integer overflow in "
+                        "cuDNNv5 and cuDNNv6. See b/68264959.");
+  }
+
   RETURN_IF_CUDNN_ERROR(cudnnConvolutionForward(
       cudnn.handle(),
       /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(),
@@ -2588,6 +2623,14 @@ port::Status CudnnSupport::DoFusedConvolveImpl(
           << "\noutput_nd.handle() = " << output_nd.handle()
           << "\noutput_data->opaque() = " << output_data->opaque();
 
+  if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+      !ShouldIncludeWinogradNonfusedAlgo(conv_input_descriptor,
+                                         output_descriptor)) {
+    return port::Status(port::error::FAILED_PRECONDITION,
+                        "This configuration has potential integer overflow in "
+                        "cuDNNv5 and cuDNNv6. See around b/68264959.");
+  }
+
   RETURN_IF_CUDNN_ERROR(cudnnConvolutionBiasActivationForward(
       cudnn.handle(),
       /*alpha1=*/&conv_input_scale,
@@ -3114,6 +3157,13 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
     }
   }
 
+  if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+      !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
+    return port::Status(port::error::FAILED_PRECONDITION,
+                        "This configuration has potential integer overflow in "
+                        "cuDNNv5 and cuDNNv6. See b/68264959.");
+  }
+
   // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
   // zero-initialized, nvbugs/2254619.
   if (CUDNN_VERSION >= 7000 &&
@@ -3293,6 +3343,13 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
         "This configuration potentially produces incorrect results.");
   }());
 
+  if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+      !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
+    return port::Status(port::error::FAILED_PRECONDITION,
+                        "This configuration has potential integer overflow in "
+                        "cuDNNv5 and cuDNNv6. See b/68264959.");
+  }
+
   // Zero out the result buffer for strided conv backward filter for NHWC
   // layouts. cuDNN 7.1.4 and 7.2 has non-determinisic bug if the buffer is not
   // zeroed.
-- 
GitLab


From a2fd40adcc714f18167acd9650e5442d4afd6a01 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 18:46:35 -0700
Subject: [PATCH 0541/1357] [tf:xla]Implement DivNoNan.

PiperOrigin-RevId: 214076068
---
 tensorflow/compiler/tests/binary_ops_test.py  |  7 +++++++
 .../compiler/tf2xla/kernels/binary_ops.cc     | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 900e84ab58..e219cf3d88 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -560,6 +560,13 @@ class BinaryOpsTest(xla_test.XLATestCase):
         dtype(2),
         expected=np.array([[5], [2]], dtype=dtype))
 
+    if dtype in [np.float32, np.float64]:
+      nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1)
+      divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24)
+      np_result = np.true_divide(nums, divs)
+      np_result[:, divs[0] == 0] = 0
+      self._testBinary(gen_math_ops.div_no_nan, nums, divs, expected=np_result)
+
     if dtype not in self.complex_types:  # floordiv unsupported for complex.
       self._testBinary(
           gen_math_ops.floor_div,
diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index 0d9a768a6f..66676452d0 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -55,6 +56,24 @@ XLA_MAKE_BINARY(Div, xla::Div(lhs, rhs, extend_dimensions));
 XLA_MAKE_BINARY(Atan2, xla::Atan2(lhs, rhs, extend_dimensions));
 XLA_MAKE_BINARY(Complex, xla::Complex(lhs, rhs, extend_dimensions));
 
+// Implementation of DivNoNan. Pseudo-code:
+// if (y == 0) {
+//   return 0
+// } else {
+//   return x / y;
+// }
+static xla::XlaOp DivNoNanImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
+                               xla::XlaOp y, const BCast& broadcast_helper) {
+  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  auto zero = XlaHelpers::Zero(b, dtype);
+  auto y_equals_0 = xla::Eq(y, zero);
+  auto zeros = xla::ZerosLike(x);
+  auto result = xla::Select(y_equals_0, zeros, xla::Div(x, y));
+  return result;
+}
+XLA_MAKE_BINARY(DivNoNan,
+                DivNoNanImpl(b, input_type(0), lhs, rhs, broadcast_helper));
+
 // Implementation of FloorDiv. Pseudo-code:
 // if ((x < 0) != (y < 0)) {
 //   T abs_x = std::abs(x);
-- 
GitLab


From 7229d08f0b25e24e6dd4833a94a27f404b27a350 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Fri, 21 Sep 2018 18:56:21 -0700
Subject: [PATCH 0542/1357] Experiment using Bazel's pip_install rule to
 install keras_applications.

PiperOrigin-RevId: 214076591
---
 WORKSPACE                     | 20 ++++++++++++++++++--
 tensorflow/python/keras/BUILD |  2 ++
 tensorflow/requirements.txt   |  2 ++
 3 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/requirements.txt

diff --git a/WORKSPACE b/WORKSPACE
index 17961829a6..11605871f3 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -9,11 +9,27 @@ http_archive(
         "https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz",  # 2018-04-13
     ],
 )
-
 load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
-
 closure_repositories()
 
+http_archive(
+    name = "io_bazel_rules_python",
+    strip_prefix = "rules_python-8b5d0683a7d878b28fffe464779c8a53659fc645",
+    urls = [
+        "https://github.com/bazelbuild/rules_python/archive/8b5d0683a7d878b28fffe464779c8a53659fc645.tar.gz",
+    ],
+)
+load("@io_bazel_rules_python//python:pip.bzl", "pip_repositories")
+pip_repositories()
+
+load("@io_bazel_rules_python//python:pip.bzl", "pip_import")
+pip_import(
+    name = "pip_deps",
+    requirements = "//tensorflow:requirements.txt",
+)
+load("@pip_deps//:requirements.bzl", "pip_install")
+pip_install()
+
 # We must check the bazel version before trying to parse any other BUILD
 # files, in case the parsing of those build files depends on the bazel
 # version we require here.
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 4a72c4b3f3..ac011a2940 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -7,6 +7,7 @@ exports_files(["LICENSE"])
 
 package(default_visibility = ["//visibility:public"])
 
+load("@pip_deps//:requirements.bzl", "requirement")
 load("//tensorflow:tensorflow.bzl", "py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 
@@ -62,6 +63,7 @@ py_library(
         ":backend",
         ":engine",
         ":layers",
+        requirement("keras_applications"),
         "//tensorflow/python/saved_model",
         "//tensorflow/python:training",
     ],
diff --git a/tensorflow/requirements.txt b/tensorflow/requirements.txt
new file mode 100644
index 0000000000..6e111edefc
--- /dev/null
+++ b/tensorflow/requirements.txt
@@ -0,0 +1,2 @@
+keras_applications >= 1.0.5
+keras_preprocessing >= 1.0.3
-- 
GitLab


From 812d5505f5302944f7bdd815a5518bd289418b9d Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Fri, 21 Sep 2018 19:03:15 -0700
Subject: [PATCH 0543/1357] Fix typo and use fully qualified names for
 consistency in LossScaleOptimizer docstring

PiperOrigin-RevId: 214077127
---
 .../contrib/mixed_precision/python/loss_scale_optimizer.py   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/mixed_precision/python/loss_scale_optimizer.py b/tensorflow/contrib/mixed_precision/python/loss_scale_optimizer.py
index fcce52a07a..a5621b44cd 100644
--- a/tensorflow/contrib/mixed_precision/python/loss_scale_optimizer.py
+++ b/tensorflow/contrib/mixed_precision/python/loss_scale_optimizer.py
@@ -66,10 +66,11 @@ class LossScaleOptimizer(optimizer.Optimizer):
 
   # Choose a loss scale manager which decides how to pick the right loss scale
   # throughout the training process.
-  loss_scale_manger = tf.contrib.mixed_precision.FixedLossScaleManager(5000)
+  loss_scale_manager = tf.contrib.mixed_precision.FixedLossScaleManager(5000)
 
   # Wraps the original optimizer in a LossScaleOptimizer.
-  loss_scale_optimizer = LossScaleOptimizer(opt, loss_scale_manager)
+  loss_scale_optimizer =
+      tf.contrib.mixed_precision.LossScaleOptimizer(opt, loss_scale_manager)
 
   # Call minimize() on the loss scale optimizer.
   train_op = loss_scale_optimizer.minimize(loss)
-- 
GitLab


From 174e782ded74187fa81f034bb3cfedf2b100286d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 19:08:11 -0700
Subject: [PATCH 0544/1357] Update error message upon a preemption error to
 highlight a potential gRPC failure and suggest increasing the number of
 parameter servers.

PiperOrigin-RevId: 214077622
---
 .../python/training/monitored_session.py      | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index 0e0125a956..82f0e3be52 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -1114,7 +1114,11 @@ class _RecoverableSession(_WrappedSession):
         logging.info('An error was raised while a session was being created. '
                      'This may be due to a preemption of a connected worker '
                      'or parameter server. A new session will be created. '
-                     'Error: %s', e)
+                     'This error may also occur due to a gRPC failure caused '
+                     'by high memory or network bandwidth usage in the '
+                     'parameter servers. If this error occurs repeatedly, try '
+                     'increasing the number of parameter servers assigned to '
+                     'the job. Error: %s', e)
 
   def _check_stop(self):
     try:
@@ -1127,7 +1131,11 @@ class _RecoverableSession(_WrappedSession):
                    'session is complete. This may be due to a preemption in '
                    'a connected worker or parameter server. The current '
                    'session will be closed and a new session will be '
-                   'created. Error: %s', e)
+                   'created. This error may also occur due to a gRPC failure '
+                   'caused by high memory or network bandwidth usage in the '
+                   'parameter servers. If this error occurs repeatedly, try '
+                   'increasing the number of parameter servers assigned to '
+                   'the job. Error: %s', e)
       self.close()
       self._sess = self._create_session()
       # Since we have just recreated the session, the overall computation should
@@ -1150,7 +1158,11 @@ class _RecoverableSession(_WrappedSession):
         logging.info('An error was raised. This may be due to a preemption in '
                      'a connected worker or parameter server. The current '
                      'session will be closed and a new session will be '
-                     'created. Error: %s', e)
+                     'created. This error may also occur due to a gRPC failure '
+                     'caused by high memory or network bandwidth usage in the '
+                     'parameter servers. If this error occurs repeatedly, try '
+                     'increasing the number of parameter servers assigned to '
+                     'the job. Error: %s', e)
         self.close()
         self._sess = None
 
@@ -1166,7 +1178,11 @@ class _RecoverableSession(_WrappedSession):
         logging.info('An error was raised. This may be due to a preemption in '
                      'a connected worker or parameter server. The current '
                      'session will be closed and a new session will be '
-                     'created. Error: %s', e)
+                     'created. This error may also occur due to a gRPC failure '
+                     'caused by high memory or network bandwidth usage in the '
+                     'parameter servers. If this error occurs repeatedly, try '
+                     'increasing the number of parameter servers assigned to '
+                     'the job. Error: %s', e)
         self.close()
         self._sess = None
 
-- 
GitLab


From 0695e9ad8fe6f50942c8c18d648aea982541eeae Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Fri, 21 Sep 2018 19:24:00 -0700
Subject: [PATCH 0545/1357] xla.estimator_model_fn can be used to decorate a
 model_fn written for estimator API in order to compile entire model with XLA.

PiperOrigin-RevId: 214078470
---
 tensorflow/contrib/compiler/BUILD  |  20 +-
 tensorflow/contrib/compiler/xla.py | 293 +++++++++++++++++++++++++++++
 2 files changed, 294 insertions(+), 19 deletions(-)

diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index 3b0e8f6cda..9c7fbee838 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -59,27 +59,9 @@ py_library(
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:platform",
+        "//tensorflow/python:summary_op_util",
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python/estimator:model_fn",
     ],
 )
-
-tf_py_test(
-    name = "xla_test",
-    srcs = ["xla_test.py"],
-    additional_deps = [
-        ":xla",
-        "@six_archive//:six",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:control_flow_util",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:summary",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-    ],
-    tags = ["no_pip"],
-)
diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py
index 0aae695f92..1e30525159 100644
--- a/tensorflow/contrib/compiler/xla.py
+++ b/tensorflow/contrib/compiler/xla.py
@@ -19,17 +19,22 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import contextlib
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.jit.ops import xla_ops
 from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.core.framework import attr_value_pb2
+from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import summary_op_util
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import compat
+from tensorflow.python.util import function_utils
+from tensorflow.python.util import tf_decorator
 
 _XLA_COMPILE_ATTR = '_xla_compile_id'
 _MAX_WARNING_LINES = 5
@@ -353,3 +358,291 @@ def _compile_internal(computation, inputs=None):
           array_ops.identity(outputs[i], name='output_%d' % i)
           for i in xrange(output_arity)
       ]
+
+
+@contextlib.contextmanager
+def _disable_summary_context():
+  """Enters a context where all summary ops are skipped.
+
+  Summaries are not yet supported in xla.compile(). So we provide this context
+  manager that can skip creating summary ops. This is a temporary workaround due
+  to XLA not supporting summary ops.
+
+  Yields:
+    None.
+  """
+  origional_skip_summary_func = summary_op_util.skip_summary
+  summary_op_util.skip_summary = lambda: True
+
+  try:
+    yield
+  finally:
+    summary_op_util.skip_summary = origional_skip_summary_func
+
+
+class _CapturedObject(object):
+  """A placeholder to capture an object."""
+
+  def __init__(self):
+    self._object = None
+
+  def capture(self, o):
+    if self._object:
+      raise RuntimeError(
+          'InternalError: _CapturedObject can capture only once. Please file '
+          'bug.')
+
+    self._object = o
+
+  def get(self):
+    return self._object
+
+
+def _get_scaffold(captured_scaffold_fn):
+  """Retrieves the Scaffold from `captured_scaffold_fn`."""
+  scaffold_fn = captured_scaffold_fn.get()
+
+  if not scaffold_fn:
+    return None
+
+  scaffold = scaffold_fn()
+  if scaffold is None:
+    raise ValueError(
+        'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed')
+
+  return scaffold
+
+
+class _ModelFnWrapper(object):
+  """_ModelFnWrapper supports executing model_fn with XLA."""
+
+  def __init__(self, function):
+    self._model_fn = function
+
+  def __call__(self, features, labels, mode, params):
+
+    # TPUEstimator compiles model_fn when use_tpu=True. To avoid double
+    # compilation, we use this params['use_tpu'] as a hint. When it is set to
+    # True, model_fn is called without compilation.
+    # Note that this condition isn't accurate for the case of exporting a model.
+    # In that case we should ideally not compile so that user can see detailed
+    # graph. However, we don't have enough information to tell whether model_fn
+    # is being called for export mode or not.
+    # TODO(ycao): Make this condition more accurate when implementing PREDICT
+    # mode.
+    if params.get('use_tpu'):
+      return self._call_model_fn(features, labels, mode, params)
+
+    if mode == model_fn_lib.ModeKeys.TRAIN:
+      train_step, captured_scaffold_fn = self._make_train_step(
+          features, labels, params)
+      with _disable_summary_context():
+        (loss,) = compile(train_step)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=loss,
+          train_op=array_ops.identity(loss),
+          scaffold=_get_scaffold(captured_scaffold_fn))
+    elif mode == model_fn_lib.ModeKeys.EVAL:
+      eval_step, captured_eval_metric_fn, captured_scaffold_fn = (
+          self._make_eval_step(features, labels, params))
+      with _disable_summary_context():
+        outputs = compile(eval_step)
+      loss = outputs[0]
+
+      # Calculate eval_metric_ops if eval_metric_fn is set and captured.
+      eval_metric_fn = captured_eval_metric_fn.get()
+      if eval_metric_fn:
+        eval_metric_fn_tensors = outputs[1:]
+        eval_metric_ops = eval_metric_fn(*eval_metric_fn_tensors)
+      else:
+        eval_metric_ops = None
+
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=loss,
+          eval_metric_ops=eval_metric_ops,
+          scaffold=_get_scaffold(captured_scaffold_fn))
+    else:
+      raise NotImplementedError('%s is not implemented, only TRAIN and EVAL are'
+                                ' supported' % mode)
+
+  def _make_train_step(self, features, labels, params):
+    """Creates a single step of training for xla.compile()."""
+    captured_scaffold_fn = _CapturedObject()
+
+    def train_step():
+      """A single step of training."""
+      estimator_spec = self._call_model_fn(features, labels,
+                                           model_fn_lib.ModeKeys.TRAIN, params)
+
+      try:
+        captured_scaffold_fn.capture(estimator_spec.scaffold_fn)
+      except AttributeError:
+        captured_scaffold_fn.capture(None)
+
+      # train_step will be run by xla.compile(). xla.compile() only supports
+      # tensor output while train_op can be either an operation or a tensor.
+      # Even though xla.compile() automatically adds operation-typed train_op as
+      # control dependency of other tensor outputs, it doesn't do so for
+      # tensor-typed train_op. Thus, we need to set it explicitly here.
+      with ops.control_dependencies([estimator_spec.train_op]):
+        return array_ops.identity(estimator_spec.loss)
+
+    return train_step, captured_scaffold_fn
+
+  def _make_eval_step(self, features, labels, params):
+    """Creates a single step of evaluation for xla.compile()."""
+    captured_eval_metric_fn = _CapturedObject()
+    captured_scaffold_fn = _CapturedObject()
+
+    def eval_step():
+      """A single step of evaluation."""
+      estimator_spec = self._call_model_fn(features, labels,
+                                           model_fn_lib.ModeKeys.EVAL, params)
+
+      try:
+        captured_scaffold_fn.capture(estimator_spec.scaffold_fn)
+      except AttributeError:
+        captured_scaffold_fn.capture(None)
+
+      eval_metric_fn = None
+      eval_metric_fn_tensors = []
+      try:
+        if estimator_spec.eval_metrics:
+          (eval_metric_fn, eval_metric_fn_tensors) = estimator_spec.eval_metrics
+      except AttributeError:
+        pass
+
+      # If a dictionary is provided, we need to convert it into a list sorted
+      # according to order of eval_metric_fn positional arguments.
+      if isinstance(eval_metric_fn_tensors, dict):
+        eval_metric_fn_args = function_utils.fn_args(eval_metric_fn)
+        eval_metric_fn_tensors = [
+            eval_metric_fn_tensors[i] for i in eval_metric_fn_args
+        ]
+
+      captured_eval_metric_fn.capture(eval_metric_fn)
+
+      return tuple([estimator_spec.loss] + eval_metric_fn_tensors)
+
+    return eval_step, captured_eval_metric_fn, captured_scaffold_fn
+
+  def _call_model_fn(self, features, labels, mode, params):
+    """Calls the model_fn with required parameters."""
+    model_fn_args = function_utils.fn_args(self._model_fn)
+    kwargs = {}
+
+    if 'labels' in model_fn_args:
+      kwargs['labels'] = labels
+    elif labels is not None:
+      raise ValueError(
+          'model_fn does not take labels, but input_fn returns labels.')
+    if 'mode' in model_fn_args:
+      kwargs['mode'] = mode
+
+    if 'params' in model_fn_args:
+      kwargs['params'] = params
+
+    return self._verify_estimator_spec(
+        self._model_fn(features=features, **kwargs))
+
+  def _verify_estimator_spec(self, estimator_spec):
+    """Verifies estimator spec contains correct data."""
+    # TODO(ycao): Implement estimator spec verification for other modes.
+
+    try:
+      if estimator_spec.scaffold:
+        logging.warning('EstimatorSpec.scaffold is ignored with XLA compilation'
+                        '. Please use TPUEstimatorSpec.scaffold_fn instead.')
+    except AttributeError:
+      pass
+
+    try:
+      if estimator_spec.eval_metric_ops:
+        raise ValueError('EstimatorSpec.eval_metric_ops is not supported with '
+                         'XLA compilation. Please use '
+                         'TPUEstimatorSpec.eval_metrics instead.')
+    except AttributeError:
+      pass
+
+    if estimator_spec.mode == model_fn_lib.ModeKeys.EVAL:
+      # If estimator_spec is of type TPUEstimatorSpec and contains eval_metrics,
+      # check that eval_metrics contains eval_metric_fn and
+      # eval_metric_fn_tensors with matching arguments.
+      try:
+        eval_metrics = estimator_spec.eval_metrics
+      except AttributeError:
+        eval_metrics = None
+
+      if eval_metrics:
+        (eval_metric_fn, eval_metric_fn_tensors) = eval_metrics
+        eval_metric_fn_args = function_utils.fn_args(eval_metric_fn)
+
+        if isinstance(eval_metric_fn_tensors, dict):
+          missing_tensors = [
+              i for i in eval_metric_fn_args if i not in eval_metric_fn_tensors
+          ]
+          additional_tensors = [
+              i for i in eval_metric_fn_tensors if i not in eval_metric_fn_args
+          ]
+
+          if missing_tensors:
+            raise ValueError('Arguments %s are needed by metric_fn (first '
+                             'element of TPUEstimatorSpec.eval_metrics) but '
+                             'they are not provided by evaluation tensors '
+                             '(second element of TPUEstimatorSpec.eval_metrics)'
+                             '.' % missing_tensors)
+
+          if additional_tensors:
+            raise ValueError('Arguments %s are provided by evaluation tensors '
+                             '(second element of TPUEstimatorSpec.eval_metrics)'
+                             ' but they are not needed by metric_fn (first '
+                             'element of TPUEstimatorSpec.eval_metrics).' %
+                             additional_tensors)
+
+    return estimator_spec
+
+
+def estimator_model_fn(target_model_fn=None):
+  """estimator_model_fn decorates a model_fn to be compiled for execution.
+
+  Currently only it only works with `TPUEstimator`. If you need to use it with
+  base `Estimator`, please add `tf.enable_resource_variables()` at beginning of
+  your program.
+
+  Example 1, decorating model_fn:
+  ```
+  @xla.estimator_model_fn()
+  def model_fn(features, labels, mode, params):
+    ...
+    return EstimatorSpec(...)
+
+
+  est = Estimator(model_fn=model_fn, ...)
+  est.train(...)
+
+  ```
+
+  Example 2, decorator as function:
+  ```
+  def model_fn(features, labels, mode, params):
+    ...
+    return EstimatorSpec(...)
+
+  est = Estimator(model_fn=xla.estimator_model_fn(model_fn), ...)
+  est.train(...)
+  ```
+
+  Args:
+    target_model_fn: model_fn to be decorated. This is only needed when
+      decorator is used in function call form (example 2).
+
+  Returns:
+    Decorated target_model_fn.
+  """
+
+  def decorated(function):
+    return tf_decorator.make_decorator(function, _ModelFnWrapper(function))
+
+  return decorated(target_model_fn) if target_model_fn else decorated
-- 
GitLab


From edbd1a9afc31c9a5127ba769db5e1df11249660a Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 21 Sep 2018 19:24:18 -0700
Subject: [PATCH 0546/1357] Add a flag that lets users override the number of
 host "devices" as seen by XLA

While overriding it from the default of 1 won't be great for performance, it can
help writing tests.

The CL is organized as follows:

 * xla/legacy_flags/debug_options_flags and xla/xla.proto now has a
   --xla_force_host_platform_device_count flag which defaults to 1.

 * xla/service/platform_util.cc respects this --xla_force_host_platform_device_count
   flag.

 * xla/service/cpu/... has some changes to generalize infeed and outfeed on CPU
   to work with multiple devices.

PiperOrigin-RevId: 214078482
---
 .../xla/legacy_flags/debug_options_flags.cc   |  13 ++
 tensorflow/compiler/xla/service/BUILD         |   1 +
 tensorflow/compiler/xla/service/cpu/BUILD     |   4 +
 .../compiler/xla/service/cpu/cpu_runtime.cc   |  98 ++++++++------
 .../compiler/xla/service/cpu/cpu_runtime.h    |  32 +++--
 .../xla/service/cpu/cpu_transfer_manager.cc   |  10 +-
 .../compiler/xla/service/cpu/ir_emitter.cc    |  25 ++--
 .../xla/service/cpu/xfeed_manager_test.cc     |  18 +--
 .../compiler/xla/service/platform_util.cc     |  10 +-
 tensorflow/compiler/xla/tests/BUILD           |  18 +++
 .../tests/multiple_devices_on_host_test.cc    | 120 ++++++++++++++++++
 tensorflow/compiler/xla/xla.proto             |   9 ++
 12 files changed, 285 insertions(+), 73 deletions(-)
 create mode 100644 tensorflow/compiler/xla/tests/multiple_devices_on_host_test.cc

diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
index 0d3136b0cc..3ed3afcfce 100644
--- a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
@@ -57,6 +57,8 @@ void SetDebugOptionsDefaults(DebugOptions* flags) {
   // regression.
   flags->set_xla_cpu_enable_fast_math(true);
   flags->set_xla_gpu_enable_fast_math(true);
+
+  flags->set_xla_force_host_platform_device_count(1);
 }
 
 // Allocates flag_values and flag_objects; this function must not be called more
@@ -323,6 +325,17 @@ void AllocateFlags() {
           flag_values->xla_gpu_crash_on_verification_failures(),
           "Crashes the program on extra verification failures, e.g. cuDNN "
           "cross checking failures"),
+      tensorflow::Flag(
+          "xla_force_host_platform_device_count",
+          int32_setter_for(
+              &DebugOptions::set_xla_force_host_platform_device_count),
+          flag_values->xla_force_host_platform_device_count(),
+          "Force the host platform to pretend that there are these many "
+          "host \"devices\". All of these host devices are backed by the same"
+          "threadpool.  Setting this to anything other than 1 can increase "
+          "overhead from context switching but we let the user override this "
+          "behavior to help run tests on the host that run models in parallel "
+          "across multiple devices."),
   });
   ParseFlagsFromEnv(*flag_objects);
 }
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 2bc50c70cf..e800cf470c 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -593,6 +593,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/legacy_flags:debug_options_flags",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index bf627986a5..b7103118ac 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -50,6 +50,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/cpu:cpu_runtime",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
+        "//tensorflow/stream_executor",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:span",
     ],
@@ -462,12 +463,15 @@ cc_library(
     ],
     copts = runtime_copts(),
     deps = [
+        "//tensorflow/compiler/xla:executable_run_options",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
+        "//tensorflow/stream_executor",
+        "@com_google_absl//absl/synchronization",
         "@com_google_absl//absl/types:span",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
index 7e1590955a..20cf855735 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
@@ -17,19 +17,29 @@ limitations under the License.
 
 #include <functional>
 
+#include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/core/platform/dynamic_annotations.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/stream_executor/stream_executor.h"
 
 namespace xla {
 namespace cpu {
 namespace runtime {
 
-XfeedManager* GetXfeedManager() {
-  static XfeedManager* manager = new XfeedManager;
-  return manager;
+XfeedManager* GetXfeedManager(int device_ordinal) {
+  static tensorflow::gtl::FlatMap<int, XfeedManager*>* managers =
+      new tensorflow::gtl::FlatMap<int, XfeedManager*>();
+  static absl::Mutex* mutex = new absl::Mutex();
+
+  absl::MutexLock lock(mutex);
+  auto it = managers->find(device_ordinal);
+  if (it == managers->end()) {
+    it = managers->emplace(device_ordinal, new XfeedManager()).first;
+  }
+  return it->second;
 }
 
 extern const char* const kEigenMatMulF16SymbolName =
@@ -118,14 +128,18 @@ tensorflow::string ShapeString(const void* shape_ptr, xla::int32 shape_length) {
 }  // namespace
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void*
-__xla_cpu_runtime_AcquireInfeedBufferForDequeue(xla::int32 buffer_length,
-                                                const void* shape,
-                                                xla::int32 shape_length) {
-  if (VLOG_IS_ON(2)) {
-    LOG(INFO) << "AcquireInfeedBufferForDequeue: "
-              << ShapeString(shape, shape_length);
-  }
-  xla::cpu::runtime::XfeedManager* xfeed = xla::cpu::runtime::GetXfeedManager();
+__xla_cpu_runtime_AcquireInfeedBufferForDequeue(
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    const void* shape, xla::int32 shape_length) {
+  int device_ordinal =
+      run_options ? run_options->stream()->parent()->device_ordinal() : 0;
+
+  VLOG(2) << "AcquireInfeedBufferForDequeue: "
+          << ShapeString(shape, shape_length) << " on stream executor "
+          << device_ordinal;
+
+  xla::cpu::runtime::XfeedManager* xfeed =
+      xla::cpu::runtime::GetXfeedManager(device_ordinal);
   // Wait until there's a buffer to dequeue.
   xla::cpu::runtime::XfeedBuffer* buffer =
       xfeed->infeed()->BlockingDequeueBuffer();
@@ -138,15 +152,18 @@ __xla_cpu_runtime_AcquireInfeedBufferForDequeue(xla::int32 buffer_length,
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
-__xla_cpu_runtime_ReleaseInfeedBufferAfterDequeue(xla::int32 buffer_length,
-                                                  void* buffer_ptr,
-                                                  const void* shape_ptr,
-                                                  xla::int32 shape_length) {
-  if (VLOG_IS_ON(2)) {
-    LOG(INFO) << "ReleaseInfeedBufferAfterDeque: "
-              << ShapeString(shape_ptr, shape_length);
-  }
-  xla::cpu::runtime::XfeedManager* xfeed = xla::cpu::runtime::GetXfeedManager();
+__xla_cpu_runtime_ReleaseInfeedBufferAfterDequeue(
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    void* buffer_ptr, const void* shape_ptr, xla::int32 shape_length) {
+  int device_ordinal =
+      run_options ? run_options->stream()->parent()->device_ordinal() : 0;
+
+  VLOG(2) << "ReleaseInfeedBufferAfterDeque: "
+          << ShapeString(shape_ptr, shape_length) << " on stream executor "
+          << device_ordinal;
+
+  xla::cpu::runtime::XfeedManager* xfeed =
+      xla::cpu::runtime::GetXfeedManager(device_ordinal);
   xla::StatusOr<xla::Shape> shape =
       xla::llvm_ir::DecodeSelfDescribingShapeConstant(shape_ptr, shape_length);
   xfeed->infeed()->ReleaseCurrentBuffer(buffer_length, buffer_ptr,
@@ -154,14 +171,18 @@ __xla_cpu_runtime_ReleaseInfeedBufferAfterDequeue(xla::int32 buffer_length,
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void*
-__xla_cpu_runtime_AcquireOutfeedBufferForPopulation(xla::int32 buffer_length,
-                                                    const void* shape_ptr,
-                                                    xla::int32 shape_length) {
-  if (VLOG_IS_ON(2)) {
-    LOG(INFO) << "AcquireOutfeedBufferForPopulation: "
-              << ShapeString(shape_ptr, shape_length);
-  }
-  xla::cpu::runtime::XfeedManager* xfeed = xla::cpu::runtime::GetXfeedManager();
+__xla_cpu_runtime_AcquireOutfeedBufferForPopulation(
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    const void* shape_ptr, xla::int32 shape_length) {
+  int device_ordinal =
+      run_options ? run_options->stream()->parent()->device_ordinal() : 0;
+
+  VLOG(2) << "AcquireOutfeedBufferForPopulation: "
+          << ShapeString(shape_ptr, shape_length) << " on stream executor "
+          << device_ordinal;
+
+  xla::cpu::runtime::XfeedManager* xfeed =
+      xla::cpu::runtime::GetXfeedManager(device_ordinal);
   // Wait until there's a buffer to dequeue.
   xla::cpu::runtime::XfeedBuffer* buffer =
       xfeed->outfeed()->BlockingDequeueBuffer();
@@ -174,15 +195,18 @@ __xla_cpu_runtime_AcquireOutfeedBufferForPopulation(xla::int32 buffer_length,
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
-__xla_cpu_runtime_ReleaseOutfeedBufferAfterPopulation(xla::int32 buffer_length,
-                                                      void* buffer_ptr,
-                                                      const void* shape_ptr,
-                                                      xla::int32 shape_length) {
-  if (VLOG_IS_ON(2)) {
-    LOG(INFO) << "ReleaseOutfeedBufferAfterPopulation: "
-              << ShapeString(shape_ptr, shape_length);
-  }
-  xla::cpu::runtime::XfeedManager* xfeed = xla::cpu::runtime::GetXfeedManager();
+__xla_cpu_runtime_ReleaseOutfeedBufferAfterPopulation(
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    void* buffer_ptr, const void* shape_ptr, xla::int32 shape_length) {
+  int device_ordinal =
+      run_options ? run_options->stream()->parent()->device_ordinal() : 0;
+
+  VLOG(2) << "ReleaseOutfeedBufferAfterPopulation: "
+          << ShapeString(shape_ptr, shape_length) << " on stream executor "
+          << device_ordinal;
+
+  xla::cpu::runtime::XfeedManager* xfeed =
+      xla::cpu::runtime::GetXfeedManager(device_ordinal);
   xla::StatusOr<xla::Shape> shape =
       xla::llvm_ir::DecodeSelfDescribingShapeConstant(shape_ptr, shape_length);
   xfeed->outfeed()->ReleaseCurrentBuffer(buffer_length, buffer_ptr,
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
index e6345e0344..b2e760a224 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
@@ -26,6 +26,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_H_
 
+#include "tensorflow/compiler/xla/executable_run_options.h"
 #include "tensorflow/compiler/xla/service/cpu/xfeed_manager.h"
 #include "tensorflow/compiler/xla/types.h"
 
@@ -80,8 +81,9 @@ extern const char* const kKeyValueSortF64SymbolName;
 // prefix.
 extern const char* const kXlaCpuRuntimeSymbolNamePrefix;
 
-// Returns the infeed manager used by the CPU runtime.
-XfeedManager* GetXfeedManager();
+// Returns the infeed manager used by the CPU runtime for the CPU device
+// `device_ordinal`.  Note the device ordinal does not name a CPU
+XfeedManager* GetXfeedManager(int device_ordinal);
 
 }  // namespace runtime
 }  // namespace cpu
@@ -89,6 +91,18 @@ XfeedManager* GetXfeedManager();
 
 extern "C" {
 
+// Some things common to all of the runtime entry points below:
+//
+//  * The shape pointer and shape_length reflect values that can be deserialized
+//    via llvm_ir::DecodeSelfDescribingShapeConstant. This is the way we pass
+//    reified type information from the generated program to the runtime, which
+//    helps check the type safety and contract for the emitted-code/runtime
+//    communication.
+//
+//  * run_options is used to look up the device ordinal for the stream executor
+//    we're executing under.  If it is null the device ordinal is assumed to be
+//    0 (this behavior helps in writing tests).
+
 // Note: in the runtime entry points below, the shape pointer and shape_length
 // reflect values that can be deserialized via
 // llvm_ir::DecodeSelfDescribingShapeConstant. This is the way we pass reified
@@ -101,7 +115,8 @@ extern "C" {
 // the length would be more exact, but the length check is chosen as a
 // tradeoff between error checking and speed/simplicity.
 extern void* __xla_cpu_runtime_AcquireInfeedBufferForDequeue(
-    xla::int32 buffer_length, const void* shape, xla::int32 shape_length);
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    const void* shape, xla::int32 shape_length);
 
 // Relinquishes the next infeed buffer that was returned by
 // __xla_cpu_runtime_AcquireInfeedBufferForDequeue. Once this call
@@ -116,13 +131,14 @@ extern void* __xla_cpu_runtime_AcquireInfeedBufferForDequeue(
 // implemented we will add support for multiple outstanding buffers
 // that can be returned out of order.
 extern void __xla_cpu_runtime_ReleaseInfeedBufferAfterDequeue(
-    xla::int32 buffer_length, void* buffer_ptr, const void* shape_ptr,
-    xla::int32 shape_length);
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    void* buffer_ptr, const void* shape_ptr, xla::int32 shape_length);
 
 // Blocks until the next outfeed buffer is available to be populated, then
 // returns it.
 extern void* __xla_cpu_runtime_AcquireOutfeedBufferForPopulation(
-    xla::int32 buffer_length, const void* shape_ptr, xla::int32 shape_length);
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    const void* shape_ptr, xla::int32 shape_length);
 
 // Relinquishes the outfeed buffer after it has been populated.
 // buffer_ptr must have been previously returned by
@@ -134,8 +150,8 @@ extern void* __xla_cpu_runtime_AcquireOutfeedBufferForPopulation(
 // acquired, i.e., there may only be one outstanding outfeed buffer in
 // use by the runtime.
 extern void __xla_cpu_runtime_ReleaseOutfeedBufferAfterPopulation(
-    xla::int32 buffer_length, void* buffer_ptr, const void* shape_ptr,
-    xla::int32 shape_length);
+    const xla::ExecutableRunOptions* run_options, xla::int32 buffer_length,
+    void* buffer_ptr, const void* shape_ptr, xla::int32 shape_length);
 
 }  // extern "C"
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
index 5519a43b2f..1cc2844470 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/notification.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/stream_executor/stream_executor.h"
 
 namespace xla {
 
@@ -128,7 +129,8 @@ Status CpuTransferManager::TransferLiteralToInfeed(
     buffers.push_back(buffer);
   }
 
-  cpu::runtime::XfeedManager* xfeed_manager = cpu::runtime::GetXfeedManager();
+  cpu::runtime::XfeedManager* xfeed_manager =
+      cpu::runtime::GetXfeedManager(executor->device_ordinal());
   xfeed_manager->infeed()->EnqueueBuffersAtomically(buffers);
 
   cleanup.release();
@@ -141,7 +143,8 @@ Status CpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor,
   TF_ASSIGN_OR_RETURN(cpu::runtime::XfeedBuffer * buffer,
                       TransferBufferToInfeedInternal(executor, size, source));
 
-  cpu::runtime::XfeedManager* xfeed_manager = cpu::runtime::GetXfeedManager();
+  cpu::runtime::XfeedManager* xfeed_manager =
+      cpu::runtime::GetXfeedManager(executor->device_ordinal());
   xfeed_manager->infeed()->EnqueueBuffersAtomically({buffer});
 
   return Status::OK();
@@ -265,7 +268,8 @@ StatusOr<Shape> CpuTransferManager::TransferBuffersFromOutfeedInternal(
     buffer_pointers.push_back(b.get());
   }
 
-  cpu::runtime::XfeedManager* xfeed_manager = cpu::runtime::GetXfeedManager();
+  cpu::runtime::XfeedManager* xfeed_manager =
+      cpu::runtime::GetXfeedManager(executor->device_ordinal());
   xfeed_manager->outfeed()->EnqueueBuffersAtomically(buffer_pointers);
   VLOG(2) << "Waiting for buffer to be notified as populated.";
   std::vector<Shape> outfed_shapes;
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index c32f2533ee..c3e8020783 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -404,13 +404,12 @@ Status IrEmitter::EmitXfeedTransfer(XfeedKind kind, const Shape& shape,
       llvm::Value * shape_ptr,
       llvm_ir::EncodeSelfDescribingShapeConstant(shape, &shape_length, &b_));
 
-  // The signature of the acquire infeed buffer function is:
-  //
-  //   (void*)(int32 length);
   llvm::Type* int32_type = b_.getInt32Ty();
   llvm::Type* i8_ptr_type = llvm::Type::getInt8PtrTy(module_->getContext());
   llvm::FunctionType* acquire_type = llvm::FunctionType::get(
-      i8_ptr_type, {int32_type, i8_ptr_type, int32_type},
+      i8_ptr_type,
+      {/*run_options*/ i8_ptr_type, /*buffer_length*/ int32_type,
+       /*shape_ptr*/ i8_ptr_type, /*shape_length*/ int32_type},
       /*isVarArg=*/false);
 
   llvm::Function* acquire_func;
@@ -423,11 +422,11 @@ Status IrEmitter::EmitXfeedTransfer(XfeedKind kind, const Shape& shape,
   }
   acquire_func->setCallingConv(llvm::CallingConv::C);
 
-  // The signature of the release infeed buffer function is:
-  //
-  //   (void)(int32 length, void* buffer);
   llvm::FunctionType* release_type = llvm::FunctionType::get(
-      b_.getVoidTy(), {int32_type, i8_ptr_type, i8_ptr_type, int32_type},
+      b_.getVoidTy(),
+      {/*run_options*/ i8_ptr_type, /*buffer_length*/ int32_type,
+       /*buffer_ptr*/ i8_ptr_type, /*shape_ptr*/ i8_ptr_type,
+       /*shape_length*/ int32_type},
       /*isVarArg=*/false);
 
   llvm::Function* release_func;
@@ -444,9 +443,9 @@ Status IrEmitter::EmitXfeedTransfer(XfeedKind kind, const Shape& shape,
   // of size exactly 'length_32', and the runtime is responsible for
   // check-failing the process if there is a mismatch, versus passing us back a
   // buffer that we might overrun.
-  llvm::Value* acquired_pointer =
-      Call(acquire_func,
-           {b_.getInt32(length_32), shape_ptr, b_.getInt32(shape_length)});
+  llvm::Value* acquired_pointer = Call(
+      acquire_func, {GetExecutableRunOptionsArgument(), b_.getInt32(length_32),
+                     shape_ptr, b_.getInt32(shape_length)});
 
   if (kind == XfeedKind::kInfeed) {
     // Copy to the program buffer address from the acquired buffer.
@@ -458,8 +457,8 @@ Status IrEmitter::EmitXfeedTransfer(XfeedKind kind, const Shape& shape,
            /*SrcAlign=*/1, length_32);
   }
 
-  Call(release_func, {b_.getInt32(length_32), acquired_pointer, shape_ptr,
-                      b_.getInt32(shape_length)});
+  Call(release_func, {GetExecutableRunOptionsArgument(), b_.getInt32(length_32),
+                      acquired_pointer, shape_ptr, b_.getInt32(shape_length)});
 
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/cpu/xfeed_manager_test.cc b/tensorflow/compiler/xla/service/cpu/xfeed_manager_test.cc
index 8fe65f488a..cc38b81455 100644
--- a/tensorflow/compiler/xla/service/cpu/xfeed_manager_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/xfeed_manager_test.cc
@@ -66,9 +66,9 @@ void ProcessNextBuffer(int32 length) {
   auto shape = ShapeUtil::MakeShape(U8, {length});
   string bytes = shape.SerializeAsString();
   void* buffer = __xla_cpu_runtime_AcquireInfeedBufferForDequeue(
-      length, bytes.data(), bytes.size());
-  __xla_cpu_runtime_ReleaseInfeedBufferAfterDequeue(length, buffer,
-                                                    bytes.data(), bytes.size());
+      /*run_options=*/nullptr, length, bytes.data(), bytes.size());
+  __xla_cpu_runtime_ReleaseInfeedBufferAfterDequeue(
+      /*run_options=*/nullptr, length, buffer, bytes.data(), bytes.size());
 }
 
 // Performs the acquire/release sequence on the outfeed, as the generated CPU
@@ -76,16 +76,16 @@ void ProcessNextBuffer(int32 length) {
 void ProcessNextOutfeedBuffer(int32 length, const Shape& shape) {
   string bytes = shape.SerializeAsString();
   void* buffer = __xla_cpu_runtime_AcquireOutfeedBufferForPopulation(
-      length, bytes.data(), bytes.size());
+      /*run_options=*/nullptr, length, bytes.data(), bytes.size());
   __xla_cpu_runtime_ReleaseOutfeedBufferAfterPopulation(
-      length, buffer, bytes.data(), bytes.size());
+      /*run_options=*/nullptr, length, buffer, bytes.data(), bytes.size());
 }
 
 TEST_F(InfeedManagerTest, SingleThreadedSequential) {
   TestInfeedBuffer* a = new TestInfeedBuffer(64);
   TestInfeedBuffer* b = new TestInfeedBuffer(32);
 
-  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager();
+  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager(0);
 
   xfeed->infeed()->EnqueueBuffersAtomically({a});
   xfeed->infeed()->EnqueueBuffersAtomically({b});
@@ -97,7 +97,7 @@ TEST_F(InfeedManagerTest, SingleThreadedInterleaved) {
   TestInfeedBuffer* a = new TestInfeedBuffer(64);
   TestInfeedBuffer* b = new TestInfeedBuffer(32);
 
-  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager();
+  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager(0);
 
   xfeed->infeed()->EnqueueBuffersAtomically({a});
   ProcessNextBuffer(a->length());
@@ -108,7 +108,7 @@ TEST_F(InfeedManagerTest, SingleThreadedInterleaved) {
 TEST_F(InfeedManagerTest, MultiThreaded) {
   tensorflow::thread::ThreadPool pool(tensorflow::Env::Default(), "test", 2);
 
-  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager();
+  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager(0);
 
   const int32 length = 64;
 
@@ -130,7 +130,7 @@ TEST_F(InfeedManagerTest, MultiThreaded) {
 
 TEST_F(InfeedManagerTest, OutfeedWrongShape) {
   TestInfeedBuffer* b = new TestInfeedBuffer(32, /*expect_shape_match=*/false);
-  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager();
+  cpu::runtime::XfeedManager* xfeed = cpu::runtime::GetXfeedManager(0);
   xfeed->outfeed()->EnqueueBuffersAtomically({b});
 
   ProcessNextOutfeedBuffer(32, ShapeUtil::MakeShape(U8, {33}));
diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc
index 178a78ede0..c522e7ae23 100644
--- a/tensorflow/compiler/xla/service/platform_util.cc
+++ b/tensorflow/compiler/xla/service/platform_util.cc
@@ -21,6 +21,7 @@ limitations under the License.
 
 #include "absl/strings/ascii.h"
 #include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -217,9 +218,12 @@ PlatformUtil::GetStreamExecutors(se::Platform* platform) {
   if (platform->id() == se::host::kHostPlatformId) {
     // On host "devices", StreamExecutor exports a device for each hardware
     // thread. Because we parallelize a single computation across threads, it
-    // doesn't make sense to expose these as separate devices, so fix the number
-    // of devices to one.
-    device_count = 1;
+    // doesn't make sense to expose these as separate devices, so by default we
+    // fix the number of devices to one.  However we do let the user override
+    // this behavior to help run tests on the host that run models in parallel
+    // across multiple devices.
+    device_count = legacy_flags::GetDebugOptionsFromFlags()
+                       .xla_force_host_platform_device_count();
   }
   std::vector<se::StreamExecutor*> stream_executors(device_count, nullptr);
   VLOG(1) << "Initializing devices";
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index fd3e3bfa94..f474ecb18c 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -2168,3 +2168,21 @@ xla_test(
         "//tensorflow/core:lib",
     ],
 )
+
+tf_cc_test(
+    name = "multiple_devices_on_host_test",
+    srcs = ["multiple_devices_on_host_test.cc"],
+    args = ["--xla_force_host_platform_device_count=4"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
diff --git a/tensorflow/compiler/xla/tests/multiple_devices_on_host_test.cc b/tensorflow/compiler/xla/tests/multiple_devices_on_host_test.cc
new file mode 100644
index 0000000000..c530591c6e
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/multiple_devices_on_host_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "absl/synchronization/mutex.h"
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace {
+StatusOr<XlaComputation> BuildComputation() {
+  XlaBuilder b("computation");
+  Shape scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  XlaOp infeed = InfeedWithToken(CreateToken(&b), scalar_s32);
+  return b.Build(
+      OutfeedWithToken(GetTupleElement(infeed, 0) +
+                           ConstantLiteral(&b, LiteralUtil::CreateR0<int32>(1)),
+                       GetTupleElement(infeed, 1), scalar_s32, ""));
+}
+
+void CompileAndExecute(
+    LocalExecutable* executable, int device_ordinal, LocalClient* client,
+    absl::Mutex* results_mutex,
+    std::vector<std::pair<int, StatusOr<ScopedShapedBuffer>>>* results) {
+  xla::ExecutableRunOptions execute_options;
+  execute_options.set_intra_op_thread_pool(
+      client->backend().eigen_intra_op_thread_pool_device());
+  execute_options.set_device_ordinal(device_ordinal);
+  execute_options.set_allocator(
+      xla::ClientLibrary::GetXlaService(client->platform())
+          ->backend()
+          .memory_allocator());
+  StatusOr<ScopedShapedBuffer> result = executable->Run({}, execute_options);
+  {
+    absl::MutexLock lock(results_mutex);
+    results->emplace_back(device_ordinal, std::move(result));
+  }
+}
+
+void TestWithDeviceCount(const int device_count) {
+  // Run `device_count` copies of the XLA program built by BuildComputation.
+  TF_ASSERT_OK_AND_ASSIGN(
+      se::Platform* const platform,
+      perftools::gputools::MultiPlatformManager::PlatformWithName("Host"));
+  xla::LocalClientOptions client_options;
+  client_options.set_platform(platform);
+  TF_ASSERT_OK_AND_ASSIGN(
+      LocalClient* const client,
+      xla::ClientLibrary::GetOrCreateLocalClient(client_options));
+
+  TF_ASSERT_OK_AND_ASSIGN(XlaComputation xla_computation, BuildComputation());
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<LocalExecutable> executable,
+      client->Compile(xla_computation, {}, xla::ExecutableBuildOptions{}));
+  std::vector<tensorflow::Thread*> threads;
+  absl::Mutex results_mutex;
+  std::vector<std::pair<int, StatusOr<ScopedShapedBuffer>>> results;
+  tensorflow::Env* env = tensorflow::Env::Default();
+  for (int device_ordinal = 0; device_ordinal < device_count;
+       device_ordinal++) {
+    tensorflow::Thread* t = env->StartThread(
+        tensorflow::ThreadOptions{}, absl::StrCat("thread-", device_ordinal),
+        [&executable, device_ordinal, client, &results_mutex, &results] {
+          CompileAndExecute(executable.get(), device_ordinal, client,
+                            &results_mutex, &results);
+        });
+    threads.push_back(t);
+  }
+
+  for (int device_ordinal = 0; device_ordinal < device_count;
+       device_ordinal++) {
+    TF_ASSERT_OK(client->TransferToInfeedLocal(
+        LiteralUtil::CreateR0<int32>(device_ordinal * 100), device_ordinal));
+  }
+
+  for (int device_ordinal = 0; device_ordinal < device_count;
+       device_ordinal++) {
+    TF_ASSERT_OK_AND_ASSIGN(Literal outfeed,
+                            client->TransferFromOutfeedLocal(
+                                ShapeUtil::MakeShape(S32, {}), device_ordinal));
+    EXPECT_EQ(outfeed, LiteralUtil::CreateR0<int32>(device_ordinal * 100 + 1));
+  }
+
+  for (int device_ordinal = 0; device_ordinal < device_count;
+       device_ordinal++) {
+    delete threads[device_ordinal];
+  }
+
+  for (int device_ordinal = 0; device_ordinal < device_count;
+       device_ordinal++) {
+    TF_ASSERT_OK(results[device_ordinal].second.status());
+  }
+}
+
+// NB!  This test requires --xla_force_host_platform_device_count=4
+
+TEST(MultipleDeviceOnHostTest, OneDevice) { TestWithDeviceCount(1); }
+
+TEST(MultipleDeviceOnHostTest, TwoDevices) { TestWithDeviceCount(2); }
+
+TEST(MultipleDeviceOnHostTest, ThreeDevices) { TestWithDeviceCount(3); }
+
+TEST(MultipleDeviceOnHostTest, FourDevices) { TestWithDeviceCount(4); }
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index b53f89d63b..60d25a6407 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -200,6 +200,15 @@ message DebugOptions {
   // among different algorithms.
   bool xla_gpu_crash_on_verification_failures = 101;
 
+  // Force the host platform to pretend that there are these many host
+  // "devices".  All these devices are backed by the same threadpool.  Defaults
+  // to 1.
+  //
+  // Setting this to anything other than 1 can increase overhead from context
+  // switching but we let the user override this behavior to help run tests on
+  // the host that run models in parallel across multiple devices.
+  int32 xla_force_host_platform_device_count = 102;
+
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
   map<string, string> xla_backend_extra_options = 500;
-- 
GitLab


From 36c568f1fdc53376052ab354d56f33e0df4a9319 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Wed, 15 Aug 2018 16:35:48 +0800
Subject: [PATCH 0547/1357] systemlibs: Unbundle double_conversion library

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tensorflow/workspace.bzl                       |  4 +++-
 third_party/systemlibs/double_conversion.BUILD | 12 ++++++++++++
 third_party/systemlibs/syslibs_configure.bzl   |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 third_party/systemlibs/double_conversion.BUILD

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index d0531f8193..a1e7567f4f 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -738,14 +738,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"),
     )
 
-    native.new_http_archive(
+    tf_http_archive(
         name = "double_conversion",
         urls = [
+            "https://mirror.bazel.build/github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip",
             "https://github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip",
         ],
         sha256 = "2f7fbffac0d98d201ad0586f686034371a6d152ca67508ab611adc2386ad30de",
         strip_prefix = "double-conversion-3992066a95b823efc8ccc1baf82a1cfc73f6e9b8",
         build_file = clean_dep("//third_party:double_conversion.BUILD"),
+        system_build_file = clean_dep("//third_party/systemlibs:double_conversion.BUILD"),
     )
 
     tf_http_archive(
diff --git a/third_party/systemlibs/double_conversion.BUILD b/third_party/systemlibs/double_conversion.BUILD
new file mode 100644
index 0000000000..568460181a
--- /dev/null
+++ b/third_party/systemlibs/double_conversion.BUILD
@@ -0,0 +1,12 @@
+licenses(["notice"])
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "double-conversion",
+    linkopts = ["-ldouble-conversion"],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index 8b09c9ac1f..41b867e770 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -14,6 +14,7 @@ VALID_LIBS = [
     "com_googlesource_code_re2",
     "curl",
     "cython",
+    "double_conversion",
     "flatbuffers",
     "gif_archive",
     "grpc",
-- 
GitLab


From 57b2fbb59804497c3860106e70323405cc1871bf Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Wed, 15 Aug 2018 18:02:06 +0800
Subject: [PATCH 0548/1357] systemlibs: Unbundle BoringSSL dependency

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tensorflow/workspace.bzl                     |  1 +
 third_party/systemlibs/boringssl.BUILD       | 21 ++++++++++++++++++++
 third_party/systemlibs/syslibs_configure.bzl |  1 +
 3 files changed, 23 insertions(+)
 create mode 100644 third_party/systemlibs/boringssl.BUILD

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index a1e7567f4f..ce68a46b96 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -531,6 +531,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
         sha256 = "1188e29000013ed6517168600fc35a010d58c5d321846d6a6dfee74e4c788b45",
         strip_prefix = "boringssl-7f634429a04abc48e2eb041c81c5235816c96514",
+        system_build_file = clean_dep("//third_party/systemlibs:boringssl.BUILD"),
     )
 
     tf_http_archive(
diff --git a/third_party/systemlibs/boringssl.BUILD b/third_party/systemlibs/boringssl.BUILD
new file mode 100644
index 0000000000..bc4c533403
--- /dev/null
+++ b/third_party/systemlibs/boringssl.BUILD
@@ -0,0 +1,21 @@
+licenses(["notice"])
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "crypto",
+    linkopts = ["-lcrypto"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "ssl",
+    linkopts = ["-lssl"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":crypto",
+    ],
+)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index 41b867e770..b22da7c251 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -11,6 +11,7 @@ _TF_SYSTEM_LIBS = "TF_SYSTEM_LIBS"
 
 VALID_LIBS = [
     "astor_archive",
+    "boringssl",
     "com_googlesource_code_re2",
     "curl",
     "cython",
-- 
GitLab


From 05b43db45d23a32e51bf064946dae868e98396bd Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Wed, 15 Aug 2018 15:20:56 +0800
Subject: [PATCH 0549/1357] systemlibs: allow building with PREFIX other than
 /usr

Some use-cases want to build packages in a different path than /usr. For
example to have a set of packages independent from other system
binaries. This change allows building with bazel build
--define=PREFIX=/some/other/path to search that path instead. The
default of /usr is set in bazelrc so that building with no options will
work as before and setting PREFIX on the commandline or later in the
bazelrc will override that setting if desired. PREFIX is not used by the
bundled build so should not affect that at all.

This also adds a few other standard Make variables that can be
overridden independently if needed.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 third_party/systemlibs/jsoncpp.BUILD | 2 +-
 tools/bazel.rc                       | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/third_party/systemlibs/jsoncpp.BUILD b/third_party/systemlibs/jsoncpp.BUILD
index cf91917cfb..526fd0c418 100644
--- a/third_party/systemlibs/jsoncpp.BUILD
+++ b/third_party/systemlibs/jsoncpp.BUILD
@@ -23,7 +23,7 @@ genrule(
     cmd = """
       for i in $(OUTS); do
         i=$${i##*/}
-        ln -vsf /usr/include/jsoncpp/json/$$i $(@D)/include/json/$$i
+        ln -sf $(INCLUDEDIR)/jsoncpp/json/$$i $(@D)/include/json/$$i
       done
     """,
 )
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 601e07ffdd..ccf62629d1 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -67,3 +67,8 @@ build -c opt
 
 # Modular TF build options
 build:dynamic_kernels --define=dynamic_loaded_kernels=true
+
+# Default paths for TF_SYSTEM_LIBS
+build --define=PREFIX=/usr
+build --define=LIBDIR=$(PREFIX)/lib
+build --define=INCLUDEDIR=$(PREFIX)/include
-- 
GitLab


From 37f7bfbce884700295e6348506154be3a30f1457 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Thu, 6 Sep 2018 22:01:10 +0800
Subject: [PATCH 0550/1357] third_party/repo: add system_link_files

third_party_http_archive has a link_files attr, add a similar
system_link_files attr that is only used when the system libraries are
enabled. Also add it to tf_http_archive.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 third_party/repo.bzl | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index 7d1aa5dce9..6e30618d39 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -119,6 +119,10 @@ def _tf_http_archive(ctx):
             "%prefix%": ".." if _repos_are_siblings() else "external",
         }, False)
 
+    if use_syslib:
+        for internal_src, external_dest in ctx.attr.system_link_files.items():
+            ctx.symlink(Label(internal_src), ctx.path(external_dest))
+
 tf_http_archive = repository_rule(
     implementation = _tf_http_archive,
     attrs = {
@@ -130,6 +134,7 @@ tf_http_archive = repository_rule(
         "patch_file": attr.label(),
         "build_file": attr.label(),
         "system_build_file": attr.label(),
+        "system_link_files": attr.string_dict(),
     },
     environ = [
         "TF_SYSTEM_LIBS",
@@ -180,7 +185,16 @@ def _third_party_http_archive(ctx):
             _apply_patch(ctx, ctx.attr.patch_file)
         ctx.symlink(Label(ctx.attr.build_file), buildfile_path)
 
+    link_dict = dict()
+    if use_syslib:
+        link_dict.update(ctx.attr.system_link_files)
+
     for internal_src, external_dest in ctx.attr.link_files.items():
+        # if syslib and link exists in both, use the system one
+        if external_dest not in link_dict.values():
+            link_dict[internal_src] = external_dest
+
+    for internal_src, external_dest in link_dict.items():
         ctx.symlink(Label(internal_src), ctx.path(external_dest))
 
 # Downloads and creates Bazel repos for dependencies.
@@ -201,6 +215,7 @@ third_party_http_archive = repository_rule(
         "system_build_file": attr.string(mandatory = False),
         "patch_file": attr.label(),
         "link_files": attr.string_dict(),
+        "system_link_files": attr.string_dict(),
     },
     environ = [
         "TF_SYSTEM_LIBS",
-- 
GitLab


From 206d46704ef6d93e278a44222e2d65259b9f9bc4 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Thu, 6 Sep 2018 22:04:03 +0800
Subject: [PATCH 0551/1357] systemlibs: unbundle absl_py dependency

absl_py has rules in many dirs so this uses system_link_files to put
files in the right place.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tensorflow/workspace.bzl                          |  5 +++++
 third_party/systemlibs/absl_py.BUILD              |  1 +
 third_party/systemlibs/absl_py.absl.flags.BUILD   | 11 +++++++++++
 third_party/systemlibs/absl_py.absl.testing.BUILD |  7 +++++++
 third_party/systemlibs/syslibs_configure.bzl      |  1 +
 5 files changed, 25 insertions(+)
 create mode 100644 third_party/systemlibs/absl_py.BUILD
 create mode 100644 third_party/systemlibs/absl_py.absl.flags.BUILD
 create mode 100644 third_party/systemlibs/absl_py.absl.testing.BUILD

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index ce68a46b96..f2e0c7e163 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -341,6 +341,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
         sha256 = "95160f778a62c7a60ddeadc7bf2d83f85a23a27359814aca12cf949e896fa82c",
         strip_prefix = "abseil-py-pypi-v0.2.2",
+        system_build_file = clean_dep("//third_party/systemlibs:absl_py.BUILD"),
+        system_link_files = {
+            "//third_party/systemlibs:absl_py.absl.flags.BUILD": "absl/flags/BUILD",
+            "//third_party/systemlibs:absl_py.absl.testing.BUILD": "absl/testing/BUILD",
+        },
     )
 
     tf_http_archive(
diff --git a/third_party/systemlibs/absl_py.BUILD b/third_party/systemlibs/absl_py.BUILD
new file mode 100644
index 0000000000..fe756e1be2
--- /dev/null
+++ b/third_party/systemlibs/absl_py.BUILD
@@ -0,0 +1 @@
+licenses(["notice"])  # Apache 2.0
diff --git a/third_party/systemlibs/absl_py.absl.flags.BUILD b/third_party/systemlibs/absl_py.absl.flags.BUILD
new file mode 100644
index 0000000000..95ec92b887
--- /dev/null
+++ b/third_party/systemlibs/absl_py.absl.flags.BUILD
@@ -0,0 +1,11 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+    name = "LICENSE",
+)
+
+py_library(
+    name = "flags",
+)
diff --git a/third_party/systemlibs/absl_py.absl.testing.BUILD b/third_party/systemlibs/absl_py.absl.testing.BUILD
new file mode 100644
index 0000000000..c1b794c1e9
--- /dev/null
+++ b/third_party/systemlibs/absl_py.absl.testing.BUILD
@@ -0,0 +1,7 @@
+licenses(["notice"])  # Apache 2.0
+
+py_library(
+    name = "parameterized",
+    testonly = 1,
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index b22da7c251..bdee56c766 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -10,6 +10,7 @@
 _TF_SYSTEM_LIBS = "TF_SYSTEM_LIBS"
 
 VALID_LIBS = [
+    "absl_py",
     "astor_archive",
     "boringssl",
     "com_googlesource_code_re2",
-- 
GitLab


From ed5578687b52d905a40859b08818f8810faadbee Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Fri, 7 Sep 2018 17:04:47 +0800
Subject: [PATCH 0552/1357] systemlibs: unbundle google_cloud_cpp

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tensorflow/workspace.bzl                                   | 4 ++++
 third_party/systemlibs/google_cloud_cpp.BUILD              | 6 ++++++
 .../google_cloud_cpp.google.cloud.bigtable.BUILD           | 7 +++++++
 third_party/systemlibs/syslibs_configure.bzl               | 1 +
 4 files changed, 18 insertions(+)
 create mode 100644 third_party/systemlibs/google_cloud_cpp.BUILD
 create mode 100644 third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index f2e0c7e163..0e73e911c3 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -179,6 +179,10 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
         sha256 = "fdd3b3aecce60987e5525e55bf3a21d68a8695320bd5b980775af6507eec3944",
         strip_prefix = "google-cloud-cpp-14760a86c4ffab9943b476305c4fe927ad95db1c",
+        system_build_file = clean_dep("//third_party/systemlibs:google_cloud_cpp.BUILD"),
+        system_link_files = {
+            "//third_party/systemlibs:google_cloud_cpp.google.cloud.bigtable.BUILD": "google/cloud/bigtable/BUILD",
+        },
     )
 
     tf_http_archive(
diff --git a/third_party/systemlibs/google_cloud_cpp.BUILD b/third_party/systemlibs/google_cloud_cpp.BUILD
new file mode 100644
index 0000000000..cbe6e10ba5
--- /dev/null
+++ b/third_party/systemlibs/google_cloud_cpp.BUILD
@@ -0,0 +1,6 @@
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD b/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
new file mode 100644
index 0000000000..b59d565390
--- /dev/null
+++ b/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
@@ -0,0 +1,7 @@
+licenses(["notice"])  # Apache 2.0
+
+cc_library(
+    name = "bigtable_client",
+    linkopts = ["-lbigtable_client"],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index bdee56c766..86ee25ebd4 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -13,6 +13,7 @@ VALID_LIBS = [
     "absl_py",
     "astor_archive",
     "boringssl",
+    "com_github_googlecloudplatform_google_cloud_cpp",
     "com_googlesource_code_re2",
     "curl",
     "cython",
-- 
GitLab


From f8c1a3d9806d8c4adf2104be0b0ee395d548a6cb Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Fri, 7 Sep 2018 17:06:01 +0800
Subject: [PATCH 0553/1357] systemlibs: unbundle googleapis

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tensorflow/workspace.bzl                     |  1 +
 third_party/systemlibs/googleapis.BUILD      | 12 ++++++++++++
 third_party/systemlibs/syslibs_configure.bzl |  1 +
 3 files changed, 14 insertions(+)
 create mode 100644 third_party/systemlibs/googleapis.BUILD

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 0e73e911c3..32c20d4087 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -194,6 +194,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         sha256 = "824870d87a176f26bcef663e92051f532fac756d1a06b404055dc078425f4378",
         strip_prefix = "googleapis-f81082ea1e2f85c43649bee26e0d9871d4b41cdb",
         build_file = clean_dep("//third_party:googleapis.BUILD"),
+        system_build_file = clean_dep("//third_party/systemlibs:googleapis.BUILD"),
     )
 
     tf_http_archive(
diff --git a/third_party/systemlibs/googleapis.BUILD b/third_party/systemlibs/googleapis.BUILD
new file mode 100644
index 0000000000..7687745df9
--- /dev/null
+++ b/third_party/systemlibs/googleapis.BUILD
@@ -0,0 +1,12 @@
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "bigtable_protos",
+    linkopts = ["-lbigtable_protos"],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index 86ee25ebd4..7812b00d4d 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -13,6 +13,7 @@ VALID_LIBS = [
     "absl_py",
     "astor_archive",
     "boringssl",
+    "com_github_googleapis_googleapis",
     "com_github_googlecloudplatform_google_cloud_cpp",
     "com_googlesource_code_re2",
     "curl",
-- 
GitLab


From 5fc39bd5d63e332e621d5bb8821c8a70ec9aee90 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sun, 16 Sep 2018 01:38:55 +0800
Subject: [PATCH 0554/1357] configure.py: enhance systemlibs config

Allow TF_SYSTEM_LIBS env var to split on spaces or commas.
Write PREFIX and related variables to bazelrc as well.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 configure.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index e9d162fbd2..f0b9fada5e 100644
--- a/configure.py
+++ b/configure.py
@@ -1401,10 +1401,20 @@ def set_grpc_build_flags():
 
 def set_system_libs_flag(environ_cp):
   syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
-  syslibs = ','.join(sorted(syslibs.split(',')))
   if syslibs and syslibs != '':
+    if ',' in syslibs:
+      syslibs = ','.join(sorted(syslibs.split(',')))
+    else:
+      syslibs = ','.join(sorted(syslibs.split()))
     write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
 
+  if 'PREFIX' in environ_cp:
+    write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
+  if 'LIBDIR' in environ_cp:
+    write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
+  if 'INCLUDEDIR' in environ_cp:
+    write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
+
 
 def set_windows_build_flags(environ_cp):
   """Set Windows specific build options."""
-- 
GitLab


From f5d29a57caff67d27f7e3ad5c36b4b09d19d50e0 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sat, 22 Sep 2018 01:01:51 +0800
Subject: [PATCH 0555/1357] systemlibs: unbundle gast

---
 tensorflow/workspace.bzl                     |  1 +
 third_party/systemlibs/gast.BUILD            | 12 ++++++++++++
 third_party/systemlibs/syslibs_configure.bzl |  1 +
 3 files changed, 14 insertions(+)
 create mode 100644 third_party/systemlibs/gast.BUILD

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 32c20d4087..d47d15315d 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -324,6 +324,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930",
         strip_prefix = "gast-0.2.0",
         build_file = clean_dep("//third_party:gast.BUILD"),
+        system_build_file = clean_dep("//third_party/systemlibs:gast.BUILD"),
     )
 
     tf_http_archive(
diff --git a/third_party/systemlibs/gast.BUILD b/third_party/systemlibs/gast.BUILD
new file mode 100644
index 0000000000..c6e1d0c4e0
--- /dev/null
+++ b/third_party/systemlibs/gast.BUILD
@@ -0,0 +1,12 @@
+licenses(["notice"])  # BSD 3-clause
+
+filegroup(
+    name = "PKG-INFO",
+    visibility = ["//visibility:public"],
+)
+
+py_library(
+    name = "gast",
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index 7812b00d4d..8b0ab39eaf 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -20,6 +20,7 @@ VALID_LIBS = [
     "cython",
     "double_conversion",
     "flatbuffers",
+    "gast_archive",
     "gif_archive",
     "grpc",
     "jemalloc",
-- 
GitLab


From ded4efd3cf8c4f0a1a8e04c72c99c0b60fdb57bd Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sat, 22 Sep 2018 01:23:27 +0800
Subject: [PATCH 0556/1357] install_headers: skip externals that were unbundled

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tensorflow/BUILD | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index c8e24e3aff..56eedc4ea1 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -628,6 +628,14 @@ genrule(
         continue
       fi
 
+      if [[ $${d} == external* ]]; then
+        extname="$${d#*external/}"
+        extname="$${extname%%/*}"
+        if [[ $${TF_SYSTEM_LIBS:-} == *$${extname}* ]]; then
+          continue
+        fi
+      fi
+
       mkdir -p "$@/$${d}"
       cp "$${f}" "$@/$${d}/"
     done
-- 
GitLab


From 94b5ff16ce1530e09bc30c51709b0596ff61103f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 23:30:59 -0700
Subject: [PATCH 0557/1357] Fully depend on external repositories (instead of
 just headers) in cc_header_only_library. This allows TensorFlow to be build
 from another bazel repo.

PiperOrigin-RevId: 214091199
---
 tensorflow/core/BUILD | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 85b6d4ff68..d914fdb96c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2501,7 +2501,12 @@ tf_cuda_library(
 
 cc_header_only_library(
     name = "framework_internal_headers_lib",
-    includes = ["../../external/com_google_absl"],
+    # Fully depend on external repositories, because identifying the headers
+    # is fragile.
+    extra_deps = [
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
     deps = [
         ":lib",
         ":lib_internal",
@@ -2587,11 +2592,12 @@ tf_cuda_library(
 
 cc_header_only_library(
     name = "framework_headers_lib",
+    # Fully depend on external repositories, because identifying the headers
+    # is fragile.
     extra_deps = [
-        # ABSL headers get dropped, so we add them back here.
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
     ],
-    includes = ["../../external/com_google_absl"],
     visibility = ["//visibility:public"],
     deps = [
         ":framework",
@@ -2601,7 +2607,12 @@ cc_header_only_library(
 
 cc_header_only_library(
     name = "stream_executor_headers_lib",
-    includes = ["../../external/com_google_absl"],
+    # Fully depend on external repositories, because identifying the headers
+    # is fragile.
+    extra_deps = [
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
     visibility = ["//visibility:public"],
     deps = [
         ":stream_executor",
-- 
GitLab


From 6f0bdfd788ebaaa55c2b4022c70c8bad2cc5dd2c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Sep 2018 23:45:44 -0700
Subject: [PATCH 0558/1357] Wrap ARModel and LSTMPredictionModel into an
 LSTMAutoRegressor estimator

PiperOrigin-RevId: 214091820
---
 .../timeseries/python/timeseries/ar_model.py  |  65 ++++++--
 .../python/timeseries/estimators.py           | 157 ++++++++++++++++++
 .../python/timeseries/estimators_test.py      |  35 ++++
 3 files changed, 242 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py
index 1d27fffc62..9bbe87e301 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py
@@ -191,6 +191,43 @@ class ARModel(model.TimeSeriesModel):
 
   Note that this class can also be used to regress against time only by setting
   the input_window_size to zero.
+
+  Each periodicity in the `periodicities` arg is divided by the
+  `num_time_buckets` into time buckets that are represented as features added
+  to the model.
+
+  A good heuristic for picking an appropriate periodicity for a given data set
+  would be the length of cycles in the data. For example, energy usage in a
+  home is typically cyclic each day. If the time feature in a home energy
+  usage dataset is in the unit of hours, then 24 would be an appropriate
+  periodicity. Similarly, a good heuristic for `num_time_buckets` is how often
+  the data is expected to change within the cycle. For the aforementioned home
+  energy usage dataset and periodicity of 24, then 48 would be a reasonable
+  value if usage is expected to change every half hour.
+
+  Each feature's value for a given example with time t is the difference
+  between t and the start of the time bucket it falls under. If it doesn't fall
+  under a feature's associated time bucket, then that feature's value is zero.
+
+  For example: if `periodicities` = (9, 12) and `num_time_buckets` = 3, then 6
+  features would be added to the model, 3 for periodicity 9 and 3 for
+  periodicity 12.
+
+  For an example data point where t = 17:
+  - It's in the 3rd time bucket for periodicity 9 (2nd period is 9-18 and 3rd
+    time bucket is 15-18)
+  - It's in the 2nd time bucket for periodicity 12 (2nd period is 12-24 and
+    2nd time bucket is between 16-20).
+
+  Therefore the 6 added features for this row with t = 17 would be:
+
+  # Feature name (periodicity#_timebucket#), feature value
+  P9_T1, 0 # not in first time bucket
+  P9_T2, 0 # not in second time bucket
+  P9_T3, 2 # 17 - 15 since 15 is the start of the 3rd time bucket
+  P12_T1, 0 # not in first time bucket
+  P12_T2, 1 # 17 - 16 since 16 is the start of the 2nd time bucket
+  P12_T3, 0 # not in third time bucket
   """
   SQUARED_LOSS = "squared_loss"
   NORMAL_LIKELIHOOD_LOSS = "normal_likelihood_loss"
@@ -208,7 +245,9 @@ class ARModel(model.TimeSeriesModel):
 
     Args:
       periodicities: periodicities of the input data, in the same units as the
-        time feature. Note this can be a single value or a list of values for
+        time feature (for example 24 if feeding hourly data with a daily
+        periodicity, or 60 * 24 if feeding minute-level data with daily
+        periodicity). Note this can be a single value or a list of values for
         multiple periodicities.
       input_window_size: Number of past time steps of data to look at when doing
         the regression.
@@ -218,21 +257,18 @@ class ARModel(model.TimeSeriesModel):
       prediction_model_factory: A callable taking arguments `num_features`,
         `input_window_size`, and `output_window_size` and returning a
         `tf.keras.Model`. The `Model`'s `call()` takes two arguments: an input
-        window and an output window, and returns a dictionary of
-        predictions. See `FlatPredictionModel` for an example. Example usage:
+        window and an output window, and returns a dictionary of predictions.
+        See `FlatPredictionModel` for an example. Example usage:
 
-        ```python
-        model = ar_model.ARModel(
-          periodicities=2, num_features=3,
-          prediction_model_factory=functools.partial(
-              FlatPredictionModel,
-              hidden_layer_sizes=[10, 10]))
-        ```
+        ```python model = ar_model.ARModel( periodicities=2, num_features=3,
+        prediction_model_factory=functools.partial( FlatPredictionModel,
+        hidden_layer_sizes=[10, 10])) ```
 
         The default model computes predictions as a linear function of flattened
         input and output windows.
       num_time_buckets: Number of buckets into which to divide (time %
-        periodicity) for generating time based features.
+        periodicity). This value multiplied by the number of periodicities is
+        the number of time features added to the model.
       loss: Loss function to use for training. Currently supported values are
         SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for
         NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For
@@ -240,10 +276,9 @@ class ARModel(model.TimeSeriesModel):
         observations and predictions, while the training loss is computed on
         normalized data (if input statistics are available).
       exogenous_feature_columns: A list of `tf.feature_column`s (for example
-          `tf.feature_column.embedding_column`) corresponding to exogenous
-          features which provide extra information to the model but are not part
-          of the series to be predicted. Passed to
-          `tf.feature_column.input_layer`.
+        `tf.feature_column.embedding_column`) corresponding to
+        features which provide extra information to the model but are not part
+        of the series to be predicted.
     """
     self._model_factory = prediction_model_factory
     self.input_window_size = input_window_size
diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py
index 0ddc4b4144..af68aa03cf 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py
@@ -30,6 +30,7 @@ from tensorflow.contrib.timeseries.python.timeseries.state_space_models import s
 from tensorflow.contrib.timeseries.python.timeseries.state_space_models.filtering_postprocessor import StateInterpolatingAnomalyDetector
 
 from tensorflow.python.estimator import estimator_lib
+from tensorflow.python.estimator.canned import optimizers
 from tensorflow.python.estimator.export import export_lib
 from tensorflow.python.feature_column import feature_column
 from tensorflow.python.framework import dtypes
@@ -386,6 +387,162 @@ class ARRegressor(TimeSeriesRegressor):
         config=config)
 
 
+# TODO(b/113684821): Add detailed documentation on what the input_fn should do.
+# Add an example of making and returning a Dataset object. Determine if
+# endogenous features can be passed in as FeatureColumns. Move ARModel's loss
+# functions into a more general location.
+class LSTMAutoRegressor(TimeSeriesRegressor):
+  """An Estimator for an LSTM autoregressive model.
+
+  LSTMAutoRegressor is a window-based model, inputting fixed windows of length
+  `input_window_size` and outputting fixed windows of length
+  `output_window_size`. These two parameters must add up to the window_size
+  of data returned by the `input_fn`.
+
+  Each periodicity in the `periodicities` arg is divided by the `num_timesteps`
+  into timesteps that are represented as time features added to the model.
+
+  A good heuristic for picking an appropriate periodicity for a given data set
+  would be the length of cycles in the data. For example, energy usage in a
+  home is typically cyclic each day. If the time feature in a home energy
+  usage dataset is in the unit of hours, then 24 would be an appropriate
+  periodicity. Similarly, a good heuristic for `num_timesteps` is how often the
+  data is expected to change within the cycle. For the aforementioned home
+  energy usage dataset and periodicity of 24, then 48 would be a reasonable
+  value if usage is expected to change every half hour.
+
+  Each feature's value for a given example with time t is the difference
+  between t and the start of the timestep it falls under. If it doesn't fall
+  under a feature's associated timestep, then that feature's value is zero.
+
+  For example: if `periodicities` = (9, 12) and `num_timesteps` = 3, then 6
+  features would be added to the model, 3 for periodicity 9 and 3 for
+  periodicity 12.
+
+  For an example data point where t = 17:
+  - It's in the 3rd timestep for periodicity 9 (2nd period is 9-18 and 3rd
+    timestep is 15-18)
+  - It's in the 2nd timestep for periodicity 12 (2nd period is 12-24 and
+    2nd timestep is between 16-20).
+
+  Therefore the 6 added features for this row with t = 17 would be:
+
+  # Feature name (periodicity#_timestep#), feature value
+  P9_T1, 0 # not in first timestep
+  P9_T2, 0 # not in second timestep
+  P9_T3, 2 # 17 - 15 since 15 is the start of the 3rd timestep
+  P12_T1, 0 # not in first timestep
+  P12_T2, 1 # 17 - 16 since 16 is the start of the 2nd timestep
+  P12_T3, 0 # not in third timestep
+
+  Example Code:
+
+  ```python
+  extra_feature_columns = (
+      feature_column.numeric_column("exogenous_variable"),
+  )
+
+  estimator = LSTMAutoRegressor(
+      periodicities=10,
+      input_window_size=10,
+      output_window_size=5,
+      model_dir="/path/to/model/dir",
+      num_features=1,
+      extra_feature_columns=extra_feature_columns,
+      num_timesteps=50,
+      num_units=10,
+      optimizer=tf.train.ProximalAdagradOptimizer(...))
+
+  # Input builders
+  def input_fn_train():
+    return {
+      "times": tf.range(15)[None, :],
+      "values": tf.random_normal(shape=[1, 15, 1])
+    }
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval():
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+
+  def input_fn_predict():
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+  """
+
+  def __init__(self,
+               periodicities,
+               input_window_size,
+               output_window_size,
+               model_dir=None,
+               num_features=1,
+               extra_feature_columns=None,
+               num_timesteps=10,
+               loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS,
+               num_units=128,
+               optimizer="Adam",
+               config=None):
+    """Initialize the Estimator.
+
+    Args:
+      periodicities: periodicities of the input data, in the same units as the
+        time feature (for example 24 if feeding hourly data with a daily
+        periodicity, or 60 * 24 if feeding minute-level data with daily
+        periodicity). Note this can be a single value or a list of values for
+        multiple periodicities.
+      input_window_size: Number of past time steps of data to look at when doing
+        the regression.
+      output_window_size: Number of future time steps to predict. Note that
+        setting this value to > 1 empirically seems to give a better fit.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      num_features: The dimensionality of the time series (default value is
+        one for univariate, more than one for multivariate).
+      extra_feature_columns: A list of `tf.feature_column`s (for example
+        `tf.feature_column.embedding_column`) corresponding to features which
+        provide extra information to the model but are not part of the series to
+        be predicted.
+      num_timesteps: Number of buckets into which to divide (time %
+        periodicity). This value multiplied by the number of periodicities is
+        the number of time features added to the model.
+      loss: Loss function to use for training. Currently supported values are
+        SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for
+        NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For
+        SQUARED_LOSS, the evaluation loss is reported based on un-scaled
+        observations and predictions, while the training loss is computed on
+        normalized data.
+      num_units: The size of the hidden state in the encoder and decoder LSTM
+        cells.
+      optimizer: string, `tf.train.Optimizer` object, or callable that defines
+        the optimizer algorithm to use for training. Defaults to the Adam
+        optimizer with a learning rate of 0.01.
+      config: Optional `estimator.RunConfig` object to configure the runtime
+        settings.
+    """
+    optimizer = optimizers.get_optimizer_instance(
+        optimizer, learning_rate=0.01)
+    model = ar_model.ARModel(
+        periodicities=periodicities,
+        input_window_size=input_window_size,
+        output_window_size=output_window_size,
+        num_features=num_features,
+        exogenous_feature_columns=extra_feature_columns,
+        num_time_buckets=num_timesteps,
+        loss=loss,
+        prediction_model_factory=functools.partial(
+            ar_model.LSTMPredictionModel, num_units=num_units))
+    state_manager = state_management.FilteringOnlyStateManager()
+    super(LSTMAutoRegressor, self).__init__(
+        model=model,
+        state_manager=state_manager,
+        optimizer=optimizer,
+        model_dir=model_dir,
+        config=config,
+        head_type=ts_head_lib.OneShotPredictionHead)
+
+
 class StateSpaceRegressor(TimeSeriesRegressor):
   """An Estimator for general state space models."""
 
diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py
index 83260fc59a..6ec7184c68 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py
@@ -226,5 +226,40 @@ class TimeSeriesRegressorTest(test.TestCase):
                 input_pipeline.NumpyReader(numpy_data)),
             steps=1)
 
+  def test_ar_lstm_regressor(self):
+    dtype = dtypes.float32
+    model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
+    exogenous_feature_columns = (
+        feature_column.numeric_column("exogenous"),
+    )
+    estimator = estimators.LSTMAutoRegressor(
+        periodicities=10,
+        input_window_size=10,
+        output_window_size=6,
+        model_dir=model_dir,
+        num_features=1,
+        extra_feature_columns=exogenous_feature_columns,
+        num_units=10,
+        config=_SeedRunConfig())
+    times = numpy.arange(20, dtype=numpy.int64)
+    values = numpy.arange(20, dtype=dtype.as_numpy_dtype)
+    exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype)
+    features = {
+        feature_keys.TrainEvalFeatures.TIMES: times,
+        feature_keys.TrainEvalFeatures.VALUES: values,
+        "exogenous": exogenous
+    }
+    train_input_fn = input_pipeline.RandomWindowInputFn(
+        input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1,
+        batch_size=16, window_size=16)
+    eval_input_fn = input_pipeline.RandomWindowInputFn(
+        input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1,
+        batch_size=16, window_size=16)
+    estimator.train(input_fn=train_input_fn, steps=1)
+    evaluation = estimator.evaluate(
+        input_fn=eval_input_fn, steps=1)
+    self.assertAllEqual(evaluation["loss"], evaluation["average_loss"])
+    self.assertAllEqual([], evaluation["loss"].shape)
+
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 7832d2c3a84c79c0dc76a7ed1f6560707a294f22 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Sep 2018 00:20:00 -0700
Subject: [PATCH 0559/1357] Make C libraries publicly visible so other repos
 can depend on TensorFlow's C API.

PiperOrigin-RevId: 214093482
---
 tensorflow/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index c8e24e3aff..a6f782ba3d 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -564,6 +564,7 @@ tf_cc_shared_object(
             "$(location //tensorflow/c:version_script.lds)",
         ],
     }),
+    visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/c:c_api",
         "//tensorflow/c:c_api_experimental",
@@ -588,6 +589,7 @@ tf_cc_shared_object(
             "$(location //tensorflow:tf_version_script.lds)",
         ],
     }),
+    visibility = ["//visibility:public"],
     deps = [
         "//tensorflow:tf_exported_symbols.lds",
         "//tensorflow:tf_version_script.lds",
-- 
GitLab


From 6bfb26e1a3dc6a381829e6e8759cf2d441260738 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Sat, 22 Sep 2018 00:34:17 -0700
Subject: [PATCH 0560/1357] Add back xla_test, which was deleted by mistake
 earlier

PiperOrigin-RevId: 214094266
---
 tensorflow/contrib/compiler/BUILD | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index 9c7fbee838..67918316b2 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -65,3 +65,22 @@ py_library(
         "//tensorflow/python/estimator:model_fn",
     ],
 )
+
+tf_py_test(
+    name = "xla_test",
+    srcs = ["xla_test.py"],
+    additional_deps = [
+        ":xla",
+        "@six_archive//:six",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:control_flow_util",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:summary",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+    ],
+    tags = ["no_pip"],
+)
-- 
GitLab


From c6df472e243d6e084dc000022857e08316430b16 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Sat, 22 Sep 2018 01:06:17 -0700
Subject: [PATCH 0561/1357] Internal changes

PiperOrigin-RevId: 214096165
---
 tensorflow/contrib/compiler/BUILD | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index 67918316b2..f51bfc1b22 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -5,7 +5,10 @@ package(default_visibility = [":friends"])
 package_group(
     name = "friends",
     includes = ["//tensorflow/compiler/jit:friends"],
-    packages = ["//tensorflow/..."],
+    packages = [
+        "//tensorflow/...",
+        "//third_party/py/tensor2tensor/...",
+    ],
 )
 
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
-- 
GitLab


From 6ce84a89001df281f6100affa85fa7b8eb56c67f Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Sat, 22 Sep 2018 01:16:14 -0700
Subject: [PATCH 0562/1357] Disable the logging for OutofRangeError in test

---
 tensorflow/python/framework/test_util.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index d63abd7f01..24dea8c336 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -877,7 +877,11 @@ class ErrorLoggingSession(session.Session):
     try:
       return super(ErrorLoggingSession, self).run(*args, **kwargs)
     except Exception as e:  # pylint: disable=broad-except
-      logging.error(str(e))
+      # Note: disable the logging for OutOfRangeError, which makes the output
+      # of tf.data tests hard to read, because OutOfRangeError is used as the
+      # signal completion
+      if not isinstance(e, errors.OutOfRangeError):
+        logging.error(str(e))
       raise
 
 
-- 
GitLab


From e317152dad1aa66bc493abc046a60dbbf650de92 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Sep 2018 02:08:18 -0700
Subject: [PATCH 0563/1357] compat: Update forward compatibility horizon to
 2018-09-22

PiperOrigin-RevId: 214099980
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 5e8f5d6e8e..cafce9af52 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 21)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 22)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From ca552d54ac67be8837aeabdb43269846d9df4eb5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Sep 2018 05:15:18 -0700
Subject: [PATCH 0564/1357] Add PinToHostOptimizer to grappler: force small ops
 to happen on CPU (instead of GPU). This avoids many unnecessary CPU<->GPU
 memcpy and syncs.

PiperOrigin-RevId: 214108484
---
 ...direct_session_with_tracking_alloc_test.cc |   3 +
 tensorflow/core/grappler/clusters/cluster.cc  |   1 +
 tensorflow/core/grappler/graph_view.cc        |  30 +++
 tensorflow/core/grappler/graph_view.h         |  10 +
 tensorflow/core/grappler/graph_view_test.cc   |  54 +++++
 tensorflow/core/grappler/optimizers/BUILD     |  39 ++++
 .../grappler/optimizers/meta_optimizer.cc     |   6 +
 .../optimizers/pin_to_host_optimizer.cc       | 218 ++++++++++++++++++
 .../optimizers/pin_to_host_optimizer.h        |  62 +++++
 .../optimizers/pin_to_host_optimizer_test.cc  | 162 +++++++++++++
 .../core/grappler/utils/grappler_test.cc      |   9 +-
 .../core/protobuf/rewriter_config.proto       |   2 +
 .../python/debug/cli/analyzer_cli_test.py     |   3 +-
 .../lib/debug_graph_reconstruction_test.py    |   3 +-
 tensorflow/python/framework/test_util.py      |   2 +
 15 files changed, 599 insertions(+), 5 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
 create mode 100644 tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
 create mode 100644 tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc

diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
index 0b096a14a3..2ed4f69f90 100644
--- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
@@ -77,6 +77,9 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) {
   options.config.mutable_graph_options()
       ->mutable_rewrite_options()
       ->set_min_graph_nodes(-1);
+  options.config.mutable_graph_options()
+      ->mutable_rewrite_options()
+      ->set_pin_to_host_optimization(RewriterConfig::OFF);
   std::unique_ptr<Session> session(NewSession(options));
   TF_ASSERT_OK(session->Create(def));
   std::vector<std::pair<string, Tensor>> inputs;
diff --git a/tensorflow/core/grappler/clusters/cluster.cc b/tensorflow/core/grappler/clusters/cluster.cc
index 7171ae059b..3b1d7d8347 100644
--- a/tensorflow/core/grappler/clusters/cluster.cc
+++ b/tensorflow/core/grappler/clusters/cluster.cc
@@ -83,6 +83,7 @@ void Cluster::DisableOptimizer(bool disable) {
     rewriter_config->set_memory_optimization(RewriterConfig::NO_MEM_OPT);
     rewriter_config->set_shape_optimization(RewriterConfig::OFF);
     rewriter_config->set_remapping(RewriterConfig::OFF);
+    rewriter_config->set_pin_to_host_optimization(RewriterConfig::OFF);
     rewriter_config->mutable_auto_parallel()->set_enable(false);
     rewriter_config->clear_optimizers();
   } else {
diff --git a/tensorflow/core/grappler/graph_view.cc b/tensorflow/core/grappler/graph_view.cc
index a6b6b6f8b2..b8d8243174 100644
--- a/tensorflow/core/grappler/graph_view.cc
+++ b/tensorflow/core/grappler/graph_view.cc
@@ -14,11 +14,41 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/graph_view.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/grappler/utils.h"
 
 namespace tensorflow {
 namespace grappler {
 
+int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
+  for (int output_arg_id = 0; output_arg_id < op.output_arg_size();
+       ++output_arg_id) {
+    if (port_id < 0) {
+      return -1;
+    } else if (port_id == 0) {
+      return output_arg_id;
+    }
+
+    const auto& output_arg = op.output_arg(output_arg_id);
+    if (!output_arg.number_attr().empty()) {
+      const int n = node.attr().at(output_arg.number_attr()).i();
+      if (n < 0) {
+        // This should never happen.
+        DCHECK_GE(n, 0);
+        return -1;
+      }
+      if (port_id < n) {
+        return output_arg_id;
+      }
+      port_id -= n;
+    } else {
+      --port_id;
+    }
+  }
+
+  return -1;
+}
+
 GraphView::GraphView(GraphDef* graph) : graph_(graph) {
   for (int i = 0; i < graph_->node_size(); i++) {
     auto node = graph_->mutable_node(i);
diff --git a/tensorflow/core/grappler/graph_view.h b/tensorflow/core/grappler/graph_view.h
index ac260f85a0..ec946ca3b5 100644
--- a/tensorflow/core/grappler/graph_view.h
+++ b/tensorflow/core/grappler/graph_view.h
@@ -20,11 +20,21 @@ limitations under the License.
 #include <unordered_set>
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace grappler {
 
+// Map a node/op's output port_id to arg_id.
+//
+// The port_id refers to the n-th tensor of the node, while the arg_id refers to
+// the n-th arg of the op. These two can be different if an op's arg is a list
+// of tensors.
+//
+// We return -1 for any invalid port_id (i.e., no corresponding arg_id).
+int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id);
+
 // A utility class to simplify the traversal of a GraphDef.
 class GraphView {
  public:
diff --git a/tensorflow/core/grappler/graph_view_test.cc b/tensorflow/core/grappler/graph_view_test.cc
index 958eb921fb..30512d9d47 100644
--- a/tensorflow/core/grappler/graph_view_test.cc
+++ b/tensorflow/core/grappler/graph_view_test.cc
@@ -25,6 +25,60 @@ namespace {
 
 class GraphViewTest : public ::testing::Test {};
 
+TEST_F(GraphViewTest, OpOutputPortIdToArgIdShapeN) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const(s.WithOpName("a"), 0.0f, {10, 10});
+  ops::ShapeN b(s.WithOpName("b"), {a, a, a});
+
+  GraphDef graph_def;
+  TF_CHECK_OK(s.ToGraphDef(&graph_def));
+  GraphView graph_view(&graph_def);
+
+  const NodeDef& a_node_def = *graph_view.GetNode("a");
+  const NodeDef& b_node_def = *graph_view.GetNode("b");
+
+  const OpDef* a_op_def = nullptr;
+  const OpDef* b_op_def = nullptr;
+  EXPECT_TRUE(
+      OpRegistry::Global()->LookUpOpDef(a_node_def.op(), &a_op_def).ok());
+  EXPECT_TRUE(
+      OpRegistry::Global()->LookUpOpDef(b_node_def.op(), &b_op_def).ok());
+
+  EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *a_op_def, 0));
+  EXPECT_EQ(-1, OpOutputPortIdToArgId(b_node_def, *a_op_def, 1));
+
+  EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 0));
+  EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 1));
+  EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 2));
+  EXPECT_EQ(-1, OpOutputPortIdToArgId(b_node_def, *b_op_def, 3));
+  EXPECT_EQ(-1, OpOutputPortIdToArgId(b_node_def, *b_op_def, 4));
+}
+
+TEST_F(GraphViewTest, OpOutputPortIdToArgIdSparseSplit) {
+  for (int num_splits : {1, 2}) {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output a = ops::Const<int64>(s.WithOpName("a"), 1, {10, 10});
+    ops::SparseSplit b(s.WithOpName("b"), a, a, a, a, num_splits);
+
+    GraphDef graph_def;
+    TF_CHECK_OK(s.ToGraphDef(&graph_def));
+    GraphView graph_view(&graph_def);
+
+    const NodeDef& b_node_def = *graph_view.GetNode("b");
+    const OpDef* b_op_def = nullptr;
+    EXPECT_TRUE(
+        OpRegistry::Global()->LookUpOpDef(b_node_def.op(), &b_op_def).ok());
+
+    for (int port_id = 0; port_id <= num_splits * 3; ++port_id) {
+      int arg_id = -1;
+      if (port_id < num_splits * 3) {
+        arg_id = port_id / num_splits;
+      }
+      EXPECT_EQ(arg_id, OpOutputPortIdToArgId(b_node_def, *b_op_def, port_id));
+    }
+  }
+}
+
 TEST_F(GraphViewTest, BasicGraph) {
   TrivialTestGraphInputYielder fake_input(4, 2, 2, false, {"/CPU:0", "/GPU:0"});
   GrapplerItem item;
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 261dee4382..960d1addb3 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -518,6 +518,7 @@ cc_library(
         ":loop_optimizer",
         ":memory_optimizer",
         ":model_pruner",
+        ":pin_to_host_optimizer",
         ":remapper",
         ":scoped_allocator_optimizer",
         ":shape_optimizer",
@@ -883,3 +884,41 @@ tf_cc_test(
         "//tensorflow/core/grappler/utils:grappler_test",
     ],
 )
+
+cc_library(
+    name = "pin_to_host_optimizer",
+    srcs = ["pin_to_host_optimizer.cc"],
+    hdrs = [
+        "pin_to_host_optimizer.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_optimizer",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/grappler:graph_view",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:op_types",
+        "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/utils:frame",
+        "//tensorflow/core/grappler/utils:symbolic_shapes",
+        "//tensorflow/core/grappler/utils:topological_sort",
+    ],
+)
+
+tf_cuda_cc_test(
+    name = "pin_to_host_optimizer_test",
+    srcs = ["pin_to_host_optimizer_test.cc"],
+    deps = [
+        ":pin_to_host_optimizer",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler/utils:grappler_test",
+    ],
+)
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 4b0cbfaa82..3da7a72e80 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/loop_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/memory_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/model_pruner.h"
+#include "tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/remapper.h"
 #include "tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/shape_optimizer.h"
@@ -105,6 +106,7 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
   MK_OPT("scoped_allocator",
          new ScopedAllocatorOptimizer(cfg_.scoped_allocator_optimization(),
                                       cfg_.scoped_allocator_opts()));
+  MK_OPT("small_op", new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
 
   return std::unique_ptr<GraphOptimizer>();
 }
@@ -133,6 +135,9 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
+  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
+    optimizers->push_back(MakeUnique<PinToHostOptimizer>());
+  }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
     optimizers->push_back(
         MakeUnique<ArithmeticOptimizer>(cfg_.arithmetic_optimization()));
@@ -468,6 +473,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
+         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
new file mode 100644
index 0000000000..8a65cd3ec3
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -0,0 +1,218 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h"
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/grappler/graph_view.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/utils/symbolic_shapes.h"
+#include "tensorflow/core/grappler/utils/topological_sort.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace internal {
+
+// TODO(williamchan): Change this constant to be something smarter, maybe
+// dynamically determined.
+constexpr int64 kTensorMaxSize = 64;
+
+// Find KernelDef for `node`.
+Status TryFindKernelDef(const NodeDef& node, const KernelDef** kdef) {
+  // Try find KernelDef for node.device, else GPU or CPU.
+  for (const DeviceType& device :
+       {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}) {
+    Status s = FindKernelDef(device, node, kdef, nullptr);
+    if (s.ok()) {
+      return Status::OK();
+    }
+  }
+
+  return errors::NotFound("Could not find KernelDef for op: ", node.op());
+}
+
+// Check if all node's inputs are pinned to CPU memory.
+bool AreAllNodeInputsPinnedToHost(const GraphView& graph, const NodeDef& node) {
+  // Loop through all the inputs excluding the controlling nodes.
+  for (const GraphView::OutputPort& fanin : graph.GetFanins(node, false)) {
+    // Check if (the fanin) op's device is on CPU.
+    if (str_util::StrContains(fanin.node->device(), DEVICE_CPU)) {
+      continue;
+    }
+
+    // Check if (the fanin) op's output port is pinned to HostMemory.
+    const OpDef* fanin_odef = nullptr;
+    Status s = OpRegistry::Global()->LookUpOpDef(fanin.node->op(), &fanin_odef);
+    if (!s.ok()) {
+      LOG(INFO) << "Could not find OpDef for : " << fanin.node->op();
+      return false;
+    }
+
+    const int output_arg_id =
+        OpOutputPortIdToArgId(*fanin.node, *fanin_odef, fanin.port_id);
+    if (output_arg_id < 0) {
+      LOG(WARNING) << "Invalid port: " << fanin.port_id << "!\n"
+                   << node.DebugString() << "\n"
+                   << fanin_odef->DebugString();
+      return false;
+    }
+
+    const KernelDef* fanin_kdef = nullptr;
+    s = TryFindKernelDef(*fanin.node, &fanin_kdef);
+    if (!s.ok()) {
+      LOG(INFO) << "Could not find KernelDef for : " << fanin.node->op();
+      return false;
+    }
+
+    bool fanin_pinned = false;
+    for (const string& host_memory_arg : fanin_kdef->host_memory_arg()) {
+      if (fanin_odef->output_arg(output_arg_id).name() == host_memory_arg) {
+        fanin_pinned = true;
+        break;
+      }
+    }
+
+    if (!fanin_pinned) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool IsTensorIntegerAndSmall(const OpInfo::TensorProperties& prop) {
+  // Check if Tensor is integer and small size.
+
+  // Check type to be int32 or int64.
+  if (prop.dtype() != DataType::DT_INT32 &&
+      prop.dtype() != DataType::DT_INT64) {
+    return false;
+  }
+
+  // Check size known and small.
+  const int64 size = NumCoefficients(prop.shape());
+  if (size < 0 || size > kTensorMaxSize) {
+    return false;
+  }
+
+  return true;
+}
+
+bool AreAllNodeInputsAndOutputsIntsAndSmall(const GraphProperties& properties,
+                                            const NodeDef& node) {
+  for (const auto& prop : properties.GetInputProperties(node.name())) {
+    if (!IsTensorIntegerAndSmall(prop)) {
+      return false;
+    }
+  }
+
+  for (const auto& prop : properties.GetOutputProperties(node.name())) {
+    if (!IsTensorIntegerAndSmall(prop)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+string TryFindHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, const string& device) {
+  // Force this node onto the CPU.
+  if (device.empty() && has_device_cpu) {
+    return "/device:CPU:0";
+  } else if (str_util::StrContains(device, DEVICE_GPU)) {
+    // Sometimes the cluster can have:
+    //   devices = {"/device:CPU:0", "/device:XLA_GPU:0"}
+    // and we need to handle them properly.
+    for (const auto& device_match :
+         {std::pair<string, string>("GPU", "CPU:0"),
+          std::pair<string, string>("/device", "/device:CPU:0")}) {
+      const string device_host =
+          strings::StrCat(device.substr(0, device.rfind(device_match.first)),
+                          device_match.second);
+      if (devices.find(device_host) != devices.end()) {
+        return device_host;
+      }
+    }
+  }
+
+  // We couldn't find an appropriate Host device, return original device.
+  return device;
+}
+}  // end namespace internal
+
+Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
+                                    GraphDef* optimized_graph) {
+  *optimized_graph = item.graph;
+
+  GraphProperties properties(item);
+  bool has_properties = false;
+  GraphView graph(optimized_graph);
+
+  gtl::FlatSet<string> devices;
+  if (cluster) {
+    const std::vector<string> device_names = cluster->GetDeviceNames();
+    devices.insert(device_names.begin(), device_names.end());
+  } else {
+    devices = {"/device:CPU:0"};
+  }
+
+  const bool has_device_cpu = devices.find("/device:CPU:0") != devices.end();
+
+  // Topologically sort the graph, so that we traverse the nodes in order. This
+  // will help us discover producer->consumer chains of Host ops.
+  TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph));
+  for (auto& node : *optimized_graph->mutable_node()) {
+    // Check if node already on CPU.
+    if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+      continue;
+    }
+
+    // Check the node can be run on CPU.
+    Status s = FindKernelDef(DEVICE_CPU, node, nullptr, nullptr);
+    if (!s.ok()) {
+      continue;
+    }
+
+    // Check all input's are pinned to CPU.
+    if (!internal::AreAllNodeInputsPinnedToHost(graph, node)) {
+      continue;
+    }
+
+    if (!has_properties) {
+      // This is an expensive call, call it lazily.
+      TF_RETURN_IF_ERROR(properties.InferStatically(false));
+      has_properties = true;
+    }
+
+    // Check all inputs and outputs are integers and small.
+    if (!internal::AreAllNodeInputsAndOutputsIntsAndSmall(properties, node)) {
+      continue;
+    }
+
+    // Try and swap the device to Host.
+    node.set_device(
+        internal::TryFindHostDevice(devices, has_device_cpu, node.device()));
+  }
+  return Status::OK();
+}
+
+}  // end namespace grappler
+}  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
new file mode 100644
index 0000000000..d557a03463
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
@@ -0,0 +1,62 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_PIN_TO_HOST_OPTIMIZER_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_PIN_TO_HOST_OPTIMIZER_H_
+
+#include <unordered_set>
+#include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
+#include "tensorflow/core/protobuf/rewriter_config.pb.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace internal {
+// Try and find an appropriate Host device in `devices` given `device`.
+string TryFindHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, const string& device);
+}  // end namespace internal
+
+// Optimize TensorFlow ops that should be swapped into the CPU to avoid
+// excessive cpu<->gpu memcpy/sync.
+//
+// TODO(williamchan): The current heuristic will swap any small integer Const to
+// CPU. This may cause a problem cpu->cpu->gpu wherein the original behaviour of
+// gpu->gpu->gpu may have been better/faster. We should probably fix this.
+class PinToHostOptimizer : public GraphOptimizer {
+ public:
+  PinToHostOptimizer() : opt_level_(RewriterConfig::DEFAULT) {}
+  explicit PinToHostOptimizer(RewriterConfig::Toggle opt_level)
+      : opt_level_(opt_level) {}
+
+  ~PinToHostOptimizer() override {}
+
+  string name() const override { return "pin_to_host_optimizer"; };
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override;
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override {}
+
+ private:
+  RewriterConfig::Toggle opt_level_;
+};
+
+}  // end namespace grappler
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_PIN_TO_HOST_OPTIMIZER_H_
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
new file mode 100644
index 0000000000..339ddfd1b5
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -0,0 +1,162 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/utils/grappler_test.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+class PinToHostOptimizerTest : public GrapplerTest {};
+
+TEST_F(PinToHostOptimizerTest, TryFindHostDevice) {
+  gtl::FlatSet<string> devices = {};
+  EXPECT_EQ("ABC", internal::TryFindHostDevice(devices, false, "ABC"));
+
+  devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, ""), "/device:CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:0"),
+            "/device:CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:*"),
+            "/device:CPU:0");
+
+  devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
+            "/device:XLA_CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
+            "/device:XLA_CPU:0");
+
+  devices = {"/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
+            "/device:XLA_GPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
+            "/device:XLA_GPU:*");
+}
+
+TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const(s.WithOpName("a"), 1, {1024, 1024});
+  Output c = ops::Shape(s.WithOpName("c"), a);
+  Output d = ops::Const(s.WithOpName("d"), 0, {1});
+  Output e = ops::ReduceProd(s.WithOpName("e"), c, d);
+
+  GrapplerItem item;
+  item.fetch = {"a", "c", "d", "e"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+
+  GraphDef output;
+  PinToHostOptimizer optimizer(RewriterConfig::ON);
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  auto tensors = EvaluateNodes(item.graph, item.fetch);
+  EXPECT_EQ(tensors_expected.size(), tensors.size());
+  for (int i = 0; i < tensors.size(); ++i) {
+    test::ExpectTensorEqual<int32>(tensors[i], tensors_expected[i]);
+  }
+
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    if (node.name() == "a" || node.name() == "c") {
+      EXPECT_TRUE(node.device().empty());
+    } else if (node.name() == "d" || node.name() == "e") {
+      EXPECT_EQ(node.device(), "/device:CPU:0");
+    }
+    ++found;
+  }
+  EXPECT_EQ(found, 4);
+}
+
+TEST_F(PinToHostOptimizerTest, TopologicalSort) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const(s.WithOpName("a"), 1, {1024, 1024});
+  Output c = ops::Shape(s.WithOpName("c"), a);
+  Output d = ops::Const(s.WithOpName("d"), 0, {1});
+  Output e = ops::ReduceProd(s.WithOpName("e"), c, d);
+
+  GrapplerItem item;
+  item.fetch = {"a", "c", "d", "e"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+
+  // Reverse the graph, and hence rely on the optimizer to sort it.
+  std::reverse(item.graph.mutable_node()->begin(),
+               item.graph.mutable_node()->end());
+
+  GraphDef output;
+  PinToHostOptimizer optimizer(RewriterConfig::ON);
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  auto tensors = EvaluateNodes(item.graph, item.fetch);
+  EXPECT_EQ(tensors_expected.size(), tensors.size());
+  for (int i = 0; i < tensors.size(); ++i) {
+    test::ExpectTensorEqual<int32>(tensors[i], tensors_expected[i]);
+  }
+
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    if (node.name() == "a" || node.name() == "c") {
+      EXPECT_TRUE(node.device().empty());
+    } else if (node.name() == "d" || node.name() == "e") {
+      EXPECT_EQ(node.device(), "/device:CPU:0");
+    }
+    ++found;
+  }
+  EXPECT_EQ(found, 4);
+}
+
+TEST_F(PinToHostOptimizerTest, PortIdToArgId) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const(s.WithOpName("a"), 1, {1, 2, 3});
+  ops::ShapeN b(s.WithOpName("b"), {a, a, a});
+
+  GrapplerItem item;
+  item.fetch = {"a", "b"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+
+  GraphDef output;
+  PinToHostOptimizer optimizer(RewriterConfig::ON);
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  auto tensors = EvaluateNodes(item.graph, item.fetch);
+  EXPECT_EQ(tensors_expected.size(), tensors.size());
+  for (int i = 0; i < tensors.size(); ++i) {
+    test::ExpectTensorEqual<int32>(tensors[i], tensors_expected[i]);
+  }
+
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    EXPECT_EQ(node.device(), "/device:CPU:0");
+    ++found;
+  }
+  EXPECT_EQ(found, 2);
+}
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc
index 910b0acaef..6266733f3e 100644
--- a/tensorflow/core/grappler/utils/grappler_test.cc
+++ b/tensorflow/core/grappler/utils/grappler_test.cc
@@ -30,13 +30,16 @@ GrapplerTest::GrapplerTest() {
   // optimizations interfering in the comparison.
   RewriterConfig* cfg =
       options_.config.mutable_graph_options()->mutable_rewrite_options();
-  cfg->set_constant_folding(RewriterConfig::OFF);
+  // TODO(rmlarsen): Add utility to generate config w/ all optimizers turned
+  // off.
   cfg->set_arithmetic_optimization(RewriterConfig::OFF);
+  cfg->set_constant_folding(RewriterConfig::OFF);
+  cfg->set_debug_stripper(RewriterConfig::OFF);
   cfg->set_dependency_optimization(RewriterConfig::OFF);
-  cfg->set_loop_optimization(RewriterConfig::OFF);
   cfg->set_function_optimization(RewriterConfig::OFF);
   cfg->set_layout_optimizer(RewriterConfig::OFF);
-  cfg->set_debug_stripper(RewriterConfig::OFF);
+  cfg->set_loop_optimization(RewriterConfig::OFF);
+  cfg->set_pin_to_host_optimization(RewriterConfig::OFF);
 }
 
 std::vector<Tensor> GrapplerTest::EvaluateNodes(
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 07f984ceea..0e780eacc9 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -75,6 +75,8 @@ message RewriterConfig {
   // Try to allocate some independent Op outputs contiguously in order to
   // merge or eliminate downstream Ops (off by default).
   Toggle scoped_allocator_optimization = 15;
+  // Force small ops onto the CPU (default is OFF).
+  Toggle pin_to_host_optimization = 18;
 
   // Controls how many times we run the optimizers in meta optimizer (default
   // is once).
diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py
index 55231954d1..4630bda590 100644
--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@@ -57,7 +57,8 @@ def no_rewrite_session_config():
       disable_model_pruning=True,
       constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
       arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
-      dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF)
+      dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+      pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF)
 
   graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
   return config_pb2.ConfigProto(graph_options=graph_options)
diff --git a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
index 676097fde9..1f67f8a0d4 100644
--- a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
+++ b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
@@ -45,6 +45,7 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
   def _no_rewrite_session_config(self):
     rewriter_config = rewriter_config_pb2.RewriterConfig(
         dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+        pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF,
         min_graph_nodes=-1)
     graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
     return config_pb2.ConfigProto(graph_options=graph_options)
@@ -156,7 +157,7 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
           sess, cond, expected_output=21.0)
 
   def testReconstructGraphWithWhileLoop(self):
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       loop_body = lambda i: math_ops.add(i, 2)
       loop_cond = lambda i: math_ops.less(i, 16)
       i = constant_op.constant(10, name="i")
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index c302072aa1..68b7b323d5 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1934,6 +1934,8 @@ class TensorFlowTestCase(googletest.TestCase):
           rewriter_config_pb2.RewriterConfig.OFF)
       config.graph_options.rewrite_options.arithmetic_optimization = (
           rewriter_config_pb2.RewriterConfig.OFF)
+      config.graph_options.rewrite_options.pin_to_host_optimization = (
+          rewriter_config_pb2.RewriterConfig.OFF)
       return config
 
     return ErrorLoggingSession(graph=graph, config=prepare_config(config))
-- 
GitLab


From e692dda4c8b199555e2fa32132a7784e0893c870 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Sat, 22 Sep 2018 09:11:05 -0700
Subject: [PATCH 0565/1357] Fixed a bug in CollectiveAllReduce that sometimes
 the variable names it sees are not complete and thus not unique, leading to
 same collective keys for different variables.

PiperOrigin-RevId: 214117466
---
 .../python/collective_all_reduce_strategy.py  |  8 +-
 .../collective_all_reduce_strategy_test.py    | 78 +++++++++++++++++++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
index 77079d0df9..297cacf192 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
@@ -143,8 +143,10 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy):
     def _real_mirrored_creator(devices, *args, **kwargs):
       """Creates one MirroredVariable on the current worker."""
       index = {}
+      unique_var_name = ops.get_default_graph().unique_name(
+          kwargs["name"], mark_as_used=False).rstrip("/")
       collective_instance_key = self._collective_keys.get_instance_key(
-          key_id=kwargs["name"])
+          key_id=unique_var_name)
       if "initial_value" not in kwargs:
         raise ValueError("Initial value must be specified.")
       initial_value = kwargs["initial_value"]
@@ -188,6 +190,10 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy):
           with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
             v = next_creator(*args, **kwargs)
 
+          if i == 0:
+            actual_var_name = v.name.split(":")[0]
+            assert unique_var_name == actual_var_name, "%r vs %r" % (
+                unique_var_name, actual_var_name)
           assert not isinstance(v, values.DistributedVariable)
           index[d] = v
       return index
diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
index 36e9761073..33ffbf6abe 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
@@ -26,6 +26,7 @@ from tensorflow.contrib.distribute.python import combinations
 from tensorflow.contrib.distribute.python import cross_tower_utils
 from tensorflow.contrib.distribute.python import multi_worker_test_base
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -34,9 +35,14 @@ from tensorflow.python.layers import core
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import test
+from tensorflow.python.training import adam
+from tensorflow.python.training import training_util
 
 
 class CollectiveAllReduceStrategyTestBase(
@@ -146,6 +152,56 @@ class CollectiveAllReduceStrategyTestBase(
       self.assertLess(error_after, error_before)
       return error_after < error_before
 
+  def _test_complex_model(self, task_type, task_id, num_gpus):
+    d, master_target = self._get_test_object(task_type, task_id, num_gpus)
+
+    def model_fn():
+      """Mnist model with synthetic input."""
+      data_format = 'channels_last'
+      input_shape = [28, 28, 1]
+      l = keras.layers
+      max_pool = l.MaxPooling2D((2, 2), (2, 2),
+                                padding='same',
+                                data_format=data_format)
+      model = keras.Sequential([
+          l.Reshape(target_shape=input_shape, input_shape=(28 * 28,)),
+          l.Conv2D(
+              32,
+              5,
+              padding='same',
+              data_format=data_format,
+              activation=nn.relu), max_pool,
+          l.Conv2D(
+              64,
+              5,
+              padding='same',
+              data_format=data_format,
+              activation=nn.relu), max_pool,
+          l.Flatten(),
+          l.Dense(1024, activation=nn.relu),
+          l.Dropout(0.4),
+          l.Dense(10)
+      ])
+      image = random_ops.random_uniform([2, 28, 28])
+      label = random_ops.random_uniform([2, 1], maxval=10, dtype=dtypes.int32)
+      logits = model(image, training=True)
+      loss = losses.sparse_softmax_cross_entropy(labels=label, logits=logits)
+      optimizer = adam.AdamOptimizer(learning_rate=1e-4)
+      train_op = optimizer.minimize(loss,
+                                    training_util.get_or_create_global_step())
+      return train_op
+
+    with ops.Graph().as_default(), \
+         self.test_session(config=self._sess_config,
+                           target=master_target) as sess:
+      with d.scope():
+        train_op = d.call_for_each_tower(model_fn)
+        train_op = d.group(d.unwrap(train_op))
+
+      sess.run(variables.global_variables_initializer())
+      sess.run(train_op)
+      return True
+
   def _test_variable_initialization(self, task_type, task_id, num_gpus):
     distribution, master_target = self._get_test_object(task_type, task_id,
                                                         num_gpus)
@@ -206,6 +262,14 @@ class DistributedCollectiveAllReduceStrategyTest(
         self._cluster_spec,
         num_gpus=num_gpus)
 
+  @combinations.generate(
+      combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1))
+  def testComplexModel(self, num_gpus):
+    if context.num_gpus() < num_gpus:
+      return
+    self._run_between_graph_clients(
+        self._test_complex_model, self._cluster_spec, num_gpus=num_gpus)
+
 
 class DistributedCollectiveAllReduceStrategyTestWithChief(
     CollectiveAllReduceStrategyTestBase, parameterized.TestCase):
@@ -236,6 +300,14 @@ class DistributedCollectiveAllReduceStrategyTestWithChief(
         self._cluster_spec,
         num_gpus=num_gpus)
 
+  @combinations.generate(
+      combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1))
+  def testComplexModel(self, num_gpus):
+    if context.num_gpus() < num_gpus:
+      return
+    self._run_between_graph_clients(
+        self._test_complex_model, self._cluster_spec, num_gpus=num_gpus)
+
 
 class LocalCollectiveAllReduceStrategy(
     CollectiveAllReduceStrategyTestBase, parameterized.TestCase):
@@ -246,6 +318,12 @@ class LocalCollectiveAllReduceStrategy(
       return
     self._test_minimize_loss_graph(None, None, num_gpus)
 
+  def testComplexModel(self, num_gpus=2):
+    # Collective ops doesn't support strategy with one device.
+    if context.num_gpus() < num_gpus:
+      return
+    self._test_complex_model(None, None, num_gpus)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From adea2433eb49726d248e0ae8e99835250bc6194f Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Sat, 22 Sep 2018 09:57:09 -0700
Subject: [PATCH 0566/1357] Temporarily remove isolate_session_state in
 CollectiveAllReduceStrategy.

PiperOrigin-RevId: 214119090
---
 .../contrib/distribute/python/collective_all_reduce_strategy.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
index 297cacf192..c900b41e14 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
@@ -235,8 +235,6 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy):
     if not session_config or not self._cluster_spec:
       return
 
-    session_config.isolate_session_state = True
-
     assert self._task_type
     assert self._task_id is not None
 
-- 
GitLab


From 1a8dd7910eedfea6ba1917c8055fcd7fed9b157e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Sep 2018 10:31:37 -0700
Subject: [PATCH 0567/1357] Enable platform selection in GRPC service.

PiperOrigin-RevId: 214120578
---
 tensorflow/compiler/xla/rpc/BUILD                |  1 +
 tensorflow/compiler/xla/rpc/grpc_service_main.cc | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/rpc/BUILD b/tensorflow/compiler/xla/rpc/BUILD
index aa8da04489..3abb3855a4 100644
--- a/tensorflow/compiler/xla/rpc/BUILD
+++ b/tensorflow/compiler/xla/rpc/BUILD
@@ -41,6 +41,7 @@ cc_library(
         ":grpc_service",
         "//tensorflow:grpc++",
         "//tensorflow/compiler/xla/service:cpu_plugin",
+        "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings:str_format",
diff --git a/tensorflow/compiler/xla/rpc/grpc_service_main.cc b/tensorflow/compiler/xla/rpc/grpc_service_main.cc
index fb54d39a2a..522ab99fb1 100644
--- a/tensorflow/compiler/xla/rpc/grpc_service_main.cc
+++ b/tensorflow/compiler/xla/rpc/grpc_service_main.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "grpcpp/server_builder.h"
 #include "absl/strings/str_format.h"
 #include "tensorflow/compiler/xla/rpc/grpc_service.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/command_line_flags.h"
@@ -30,7 +31,10 @@ namespace {
 int RealMain(int argc, char** argv) {
   int32 port = 1685;
   bool any_address = false;
+  string platform_str;
   std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("platform", &platform_str,
+                       "The XLA platform this service should be bound to"),
       tensorflow::Flag("port", &port, "The TCP port to listen on"),
       tensorflow::Flag(
           "any", &any_address,
@@ -44,8 +48,12 @@ int RealMain(int argc, char** argv) {
   }
   tensorflow::port::InitMain(argv[0], &argc, &argv);
 
+  se::Platform* platform = nullptr;
+  if (!platform_str.empty()) {
+    platform = PlatformUtil::GetPlatform(platform_str).ValueOrDie();
+  }
   std::unique_ptr<xla::GRPCService> service =
-      xla::GRPCService::NewService().ConsumeValueOrDie();
+      xla::GRPCService::NewService(platform).ConsumeValueOrDie();
 
   ::grpc::ServerBuilder builder;
   string server_address(
-- 
GitLab


From 647be78acaaa48b7788acea2d26925c3361b5372 Mon Sep 17 00:00:00 2001
From: Roland Fernandez <rfernand@microsoft.com>
Date: Sat, 22 Sep 2018 18:51:11 -0700
Subject: [PATCH 0568/1357] fix doc bug for per_image_standardization (unit
 norm vs. variance)

---
 tensorflow/python/ops/image_ops_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 325418d5f7..d945f95716 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1176,7 +1176,7 @@ def resize_image_with_pad(image,
 
 @tf_export('image.per_image_standardization')
 def per_image_standardization(image):
-  """Linearly scales `image` to have zero mean and unit norm.
+  """Linearly scales `image` to have zero mean and unit variance.
 
   This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
   of all values in image, and
-- 
GitLab


From 646b3c237deaddddd087d39ab57130b08375c4c7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Sep 2018 02:01:46 -0700
Subject: [PATCH 0569/1357] compat: Update forward compatibility horizon to
 2018-09-23

PiperOrigin-RevId: 214157821
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index cafce9af52..ec840965a7 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 22)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 23)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 6dd7a09211cc74d11ff1554624b527c432020cbc Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Sun, 23 Sep 2018 20:33:19 +0800
Subject: [PATCH 0570/1357] Enable partitioned variable assignments

---
 .../python/kernel_tests/variables_test.py     | 43 ++++++++++++++++-
 tensorflow/python/ops/variables.py            | 47 +++++++++++++++++--
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 2e7975667c..687784c8b7 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -673,7 +673,7 @@ class PartitionedVariableTest(test.TestCase):
         v0._set_save_slice_info(
             variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
         v1._set_save_slice_info(
-            variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
+            variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1]))
         partitions = [2]
 
         variables.PartitionedVariable(
@@ -696,6 +696,47 @@ class PartitionedVariableTest(test.TestCase):
             variable_list=[v0],
             partitions=partitions)
 
+  def testPartitionedVariableAssignments(self):
+    with ops.Graph().as_default(), self.cached_session() as sess:
+      v0 = variables.Variable(initial_value=[0.0])
+      v1 = variables.Variable(initial_value=[1.0])
+      v0._set_save_slice_info(
+          variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
+      v1._set_save_slice_info(
+          variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
+      partitions = [2]
+
+      # Pass variable_list as [v1, v0] to ensure they are properly
+      # re-sorted to [v0, v1] based on their slice info offsets.
+      partitioned_variable = variables.PartitionedVariable(
+          name="two_vars",
+          shape=[2],
+          dtype=v0.dtype,
+          variable_list=[v0, v1],
+          partitions=partitions)
+      
+      deltas_a = constant_op.constant([1.0, 2.0])
+      deltas_b = constant_op.constant([3.0, 4.0])
+      ones = array_ops.ones([2])
+      plus_delta = partitioned_variable.assign_add(deltas_a)
+      minus_delta = partitioned_variable.assign_sub(deltas_b)
+      assign_ones = partitioned_variable.assign(ones)
+      variables.global_variables_initializer().run()
+
+      self.assertEqual([1.0], plus_delta[0].eval())
+      self.assertEqual([1.0], v0.eval())
+      self.assertEqual([3.0], plus_delta[1].eval())
+      self.assertEqual([3.0], v1.eval())
+      
+      self.assertEqual([-2.0], minus_delta[0].eval())
+      self.assertEqual([-2.0], v0.eval())
+      self.assertEqual([-1.0], minus_delta[1].eval())
+      self.assertEqual([-1.0], v1.eval())
+ 
+      self.assertEqual([1.0], assign_ones[0].eval())
+      self.assertEqual([1.0], v0.eval())
+      self.assertEqual([1.0], assign_ones[1].eval())
+      self.assertEqual([1.0], v1.eval())
 
 class VariableContainerTest(test.TestCase):
 
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 7a46157739..2d6a767fed 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2395,11 +2395,50 @@ class PartitionedVariable(object):
   def _get_partitions(self):
     return self._partitions
 
-  def assign(self, value, use_locking=False):
-    _ = value, use_locking
-    raise NotImplementedError(
-        "assign() has not been implemented for PartitionedVariable.")
+  def _apply_assign_fn(self,
+                       assign_fn,
+                       value):
+    partition_axes = self._partition_axes()
+    if len(partition_axes) > 1:
+      raise NotImplementedError(
+          "Cannot concatenate along more than one dimension: %s.  "
+          "Multi-axis partition assign_fn is not supported" % str(partition_axes))
+    partition_ix = partition_axes[0]
+    size_splits_list = [
+        var.shape[partition_ix].value for var in self._variable_list]
+    value_list = array_ops.split(
+        value, size_splits_list, axis=partition_ix)
+    op_list = [
+        assign_fn(var, value_list[idx], idx) \
+        for idx, var in enumerate(self._variable_list)]
+    return op_list
 
+  def assign(self, value, use_locking=False, name=None, read_value=True):
+    assign_fn = lambda var, r_value, idx: var.assign(
+        r_value, use_locking=use_locking,
+        name="%s_%d" % (name, idx), read_value=read_value)
+    assign_list = self._apply_assign_fn(assign_fn, value)
+    if read_value:
+      return assign_list
+    return [assign.op for assign in assign_list]
+
+  def assign_add(self, value, use_locking=False, name=None, read_value=True):
+    assign_fn = lambda var, r_value, idx: var.assign_add(
+        r_value, use_locking=use_locking,
+        name="%s_%d" % (name, idx), read_value=read_value)
+    assign_list = self._apply_assign_fn(assign_fn, value)
+    if read_value:
+      return assign_list
+    return [assign.op for assign in assign_list]
+
+  def assign_sub(self, value, use_locking=False, name=None, read_value=True):
+    assign_fn = lambda var, r_value, idx: var.assign_sub(
+        r_value, use_locking=use_locking,
+        name="%s_%d" % (name, idx), read_value=read_value)
+    assign_list = self._apply_assign_fn(assign_fn, value)
+    if read_value:
+      return assign_list
+    return [assign.op for assign in assign_list]
 
 @tf_export("global_variables")
 def global_variables(scope=None):
-- 
GitLab


From f1237459efb3a5578885b03d5b33c3fed350c348 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Sun, 23 Sep 2018 08:53:45 -0700
Subject: [PATCH 0571/1357] Moving MultiDeviceIterator from contrib to core.

PiperOrigin-RevId: 214173896
---
 .../data/kernels/prefetching_kernels.cc       | 650 ------------------
 tensorflow/contrib/data/ops/dataset_ops.cc    |  76 --
 .../kernel_tests/prefetching_ops_test.py      | 160 -----
 .../api_def_MultiDeviceIterator.pbtxt         |  43 ++
 ..._MultiDeviceIteratorFromStringHandle.pbtxt |  29 +
 ..._MultiDeviceIteratorGetNextFromShard.pbtxt |  41 ++
 .../api_def_MultiDeviceIteratorInit.pbtxt     |  30 +
 ...ef_MultiDeviceIteratorToStringHandle.pbtxt |  17 +
 tensorflow/core/kernels/data/BUILD            |  15 +
 tensorflow/core/kernels/data/dataset_utils.cc |  37 +
 tensorflow/core/kernels/data/dataset_utils.h  |  10 +
 tensorflow/core/kernels/data/iterator_ops.cc  |  37 -
 .../kernels/data/multi_device_iterator_ops.cc | 633 +++++++++++++++++
 tensorflow/core/ops/dataset_ops.cc            |  37 +
 tensorflow/python/data/BUILD                  |   1 +
 tensorflow/python/data/kernel_tests/BUILD     |  20 +
 .../multi_device_iterator_test.py             | 190 +++++
 tensorflow/python/data/ops/BUILD              |  18 +
 .../data/ops/multi_device_iterator_ops.py     | 213 ++++++
 19 files changed, 1334 insertions(+), 923 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_MultiDeviceIterator.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorInit.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt
 create mode 100644 tensorflow/core/kernels/data/multi_device_iterator_ops.cc
 create mode 100644 tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
 create mode 100644 tensorflow/python/data/ops/multi_device_iterator_ops.py

diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc
index 39f23f7b24..96f1dd0059 100644
--- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc
+++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc
@@ -476,656 +476,6 @@ class IteratorGetDeviceOp : public OpKernel {
 REGISTER_KERNEL_BUILDER(Name("IteratorGetDevice").Device(DEVICE_CPU),
                         IteratorGetDeviceOp);
 
-Status VerifyTypesMatch(const DataTypeVector& expected,
-                        const DataTypeVector& received) {
-  if (expected.size() != received.size()) {
-    return errors::InvalidArgument(
-        "Number of components does not match: expected ", expected.size(),
-        " types but got ", received.size(), ".");
-  }
-  for (size_t i = 0; i < expected.size(); ++i) {
-    if (expected[i] != received[i]) {
-      return errors::InvalidArgument("Data type mismatch at component ", i,
-                                     ": expected ", DataTypeString(expected[i]),
-                                     " but got ", DataTypeString(received[i]),
-                                     ".");
-    }
-  }
-  return Status::OK();
-}
-
-Status VerifyShapesCompatible(const std::vector<PartialTensorShape>& expected,
-                              const std::vector<PartialTensorShape>& received) {
-  if (expected.size() != received.size()) {
-    return errors::InvalidArgument(
-        "Number of components does not match: expected ", expected.size(),
-        " shapes but got ", received.size(), ".");
-  }
-  for (size_t i = 0; i < expected.size(); ++i) {
-    if (!expected[i].IsCompatibleWith(received[i])) {
-      return errors::InvalidArgument("Incompatible shapes at component ", i,
-                                     ": expected ", expected[i].DebugString(),
-                                     " but got ", received[i].DebugString(),
-                                     ".");
-    }
-  }
-
-  return Status::OK();
-}
-
-string SanitizeThreadSuffix(string suffix) {
-  string clean;
-  for (int i = 0; i < suffix.size(); ++i) {
-    const char ch = suffix[i];
-    if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
-        (ch >= '0' && ch <= '9') || ch == '_' || ch == '-') {
-      clean += ch;
-    } else {
-      clean += '_';
-    }
-  }
-  return clean;
-}
-
-struct HostBufferElement {
-  Status status;
-  bool end_of_sequence;
-  std::vector<Tensor> value;
-};
-
-using MultiDeviceIteratorCallback =
-    std::function<void(const HostBufferElement&)>;
-
-class MultiDeviceIterator : public ResourceBase {
- public:
-  MultiDeviceIterator(const DataTypeVector& output_types,
-                      const std::vector<PartialTensorShape>& output_shapes,
-                      const std::vector<string>& devices,
-                      std::unique_ptr<FunctionLibraryDefinition> flib_def,
-                      std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
-                      FunctionLibraryRuntime* lib)
-      : output_types_(output_types),
-        output_shapes_(output_shapes),
-        devices_(devices),
-        flib_def_(std::move(flib_def)),
-        pflr_(std::move(pflr)),
-        lib_(lib) {
-    CHECK_NOTNULL(lib_);
-  }
-
-  string DebugString() override {
-    return strings::StrCat("MultiDeviceIterator for ", devices_.size(),
-                           " devices");
-  }
-
-  Status Init(std::unique_ptr<IteratorBase> iterator, int64 max_buffer_size,
-              int64* incarnation_id) {
-    if (iterator) {
-      TF_RETURN_IF_ERROR(
-          VerifyTypesMatch(output_types_, iterator->output_dtypes()));
-      TF_RETURN_IF_ERROR(
-          VerifyShapesCompatible(output_shapes_, iterator->output_shapes()));
-    }
-
-    mutex_lock l(mu_);
-    if (multi_device_buffer_) {
-      multi_device_buffer_->Reset();
-    }
-
-    ++incarnation_id_;
-    *incarnation_id = incarnation_id_;
-
-    multi_device_buffer_.reset(
-        new MultiDeviceBuffer(devices_.size(), max_buffer_size, incarnation_id_,
-                              std::move(iterator)));
-    return Status::OK();
-  }
-
-  void GetNextFromShard(IteratorContext* ctx, int shard_num,
-                        int64 incarnation_id,
-                        MultiDeviceIteratorCallback callback) {
-    if (lib_ != nullptr) {
-      ctx->set_lib(lib_);
-    }
-    tf_shared_lock l(mu_);
-    multi_device_buffer_->GetNextFromShard(ctx, shard_num, incarnation_id,
-                                           std::move(callback));
-  }
-
-  const DataTypeVector& output_types() const { return output_types_; }
-
-  const std::vector<PartialTensorShape>& output_shapes() const {
-    return output_shapes_;
-  }
-
-  std::shared_ptr<const FunctionLibraryDefinition> function_library() {
-    tf_shared_lock l(mu_);
-    return lib_def_;
-  }
-
-  FunctionLibraryRuntime* const lib() {
-    tf_shared_lock l(mu_);
-    return lib_;
-  }
-
- private:
-  // A private class that uses a background thread to keep a per device buffer
-  // full.
-  class MultiDeviceBuffer {
-   public:
-    MultiDeviceBuffer(size_t size, int64 max_buffer_size, int64 incarnation_id,
-                      std::unique_ptr<IteratorBase> host_iterator)
-        : buffer_(size),
-          size_(size),
-          max_buffer_size_(max_buffer_size),
-          incarnation_id_(incarnation_id),
-          host_iterator_(std::move(host_iterator)) {}
-
-    ~MultiDeviceBuffer() {
-      {
-        mutex_lock l(mu_);
-        if (!background_thread_started_) return;
-      }
-      Reset();
-    }
-
-    void Reset() LOCKS_EXCLUDED(mu_) {
-      {
-        mutex_lock l(mu_);
-        if (background_thread_finished_) {
-          return;
-        }
-
-        cancelled_ = true;
-        // Wake up the background thread.
-        for (int i = 0; i < size_; ++i) {
-          buffer_[i].cond_var.notify_all();
-        }
-
-        // Make sure background thread has finished first.
-        while (!background_thread_finished_) {
-          shutdown_cond_var_.wait(l);
-        }
-      }
-      RunPendingCallbacks();
-    }
-
-    void GetNextFromShard(IteratorContext* ctx, int shard_num,
-                          int64 incarnation_id,
-                          MultiDeviceIteratorCallback callback) {
-      HostBufferElement elem;
-      if (incarnation_id_ != incarnation_id) {
-        elem.status = errors::InvalidArgument("Invalid incarnation id");
-        callback(elem);
-        return;
-      }
-
-      bool produced_output = false;
-      {
-        mutex_lock l(mu_);
-        if (cancelled_) {
-          elem.status = errors::Cancelled("Cancelled Multidevice iterator");
-          callback(elem);
-          return;
-        }
-
-        EnsureBackgroundThreadStarted(ctx);
-
-        if (!buffer_[shard_num].data.empty()) {
-          produced_output = true;
-          std::swap(elem, buffer_[shard_num].data.front());
-          buffer_[shard_num].data.pop_front();
-          // Wake up background thread if it is blocked on this element.
-          if (buffer_[shard_num].data.size() == max_buffer_size_ - 1) {
-            buffer_[shard_num].cond_var.notify_all();
-          }
-        } else {
-          if (background_thread_finished_) {
-            produced_output = true;
-            elem.end_of_sequence = true;
-          } else {
-            buffer_[shard_num].callbacks.push_back(std::move(callback));
-            callback = nullptr;
-          }
-        }
-      }
-
-      if (produced_output) {
-        callback(elem);
-      }
-    }
-
-   private:
-    void EnsureBackgroundThreadStarted(IteratorContext* ctx)
-        EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      if (!background_thread_) {
-        background_thread_.reset(ctx->env()->StartThread(
-            {}, "multi_device_iterator_background_thread",
-            std::bind(&MultiDeviceIterator::MultiDeviceBuffer::BackgroundThread,
-                      this, new IteratorContext(*ctx))));
-      }
-    }
-
-    void RunPendingCallbacks() LOCKS_EXCLUDED(mu_) {
-      // Run all remaining callbacks.
-      std::vector<MultiDeviceIteratorCallback> cancellation_callbacks;
-      std::vector<HostBufferElement> cancellation_elements;
-      {
-        mutex_lock l(mu_);
-
-        for (int i = 0; i < size_; ++i) {
-          while (!buffer_[i].callbacks.empty()) {
-            if (buffer_[i].data.empty()) {
-              HostBufferElement elem;
-              elem.status =
-                  errors::Cancelled("Cancelled and buffer not filled.");
-              cancellation_elements.push_back(std::move(elem));
-            } else {
-              cancellation_elements.push_back(
-                  std::move(buffer_[i].data.front()));
-              buffer_[i].data.pop_front();
-            }
-            cancellation_callbacks.push_back(
-                std::move(buffer_[i].callbacks.front()));
-            buffer_[i].callbacks.pop_front();
-          }
-        }
-      }
-      for (int i = 0; i < cancellation_callbacks.size(); ++i) {
-        cancellation_callbacks[i](cancellation_elements[i]);
-      }
-    }
-
-    void BackgroundThread(IteratorContext* ctx) {
-      {
-        mutex_lock l(mu_);
-        background_thread_started_ = true;
-      }
-      std::unique_ptr<IteratorContext> cleanup(ctx);
-      int shard_to_fetch = 0;
-      while (true) {
-        HostBufferElement elem;
-        MultiDeviceIteratorCallback callback = nullptr;
-        bool end_of_iterator = false;
-
-        {
-          mutex_lock l(mu_);
-          while (!cancelled_ &&
-                 buffer_[shard_to_fetch].data.size() >= max_buffer_size_) {
-            buffer_[shard_to_fetch].cond_var.wait(l);
-          }
-
-          if (cancelled_) {
-            background_thread_finished_ = true;
-            shutdown_cond_var_.notify_all();
-            return;
-          }
-        }
-
-        elem.status =
-            host_iterator_->GetNext(ctx, &elem.value, &elem.end_of_sequence);
-
-        if (elem.status.ok() && elem.end_of_sequence) {
-          end_of_iterator = true;
-        }
-
-        {
-          mutex_lock l(mu_);
-          // Try to find a callback, else just push stuff into buffer.
-          if (!buffer_[shard_to_fetch].callbacks.empty()) {
-            callback = buffer_[shard_to_fetch].callbacks.front();
-            buffer_[shard_to_fetch].callbacks.pop_front();
-          } else {
-            buffer_[shard_to_fetch].data.push_back(std::move(elem));
-            elem = HostBufferElement();
-          }
-        }
-
-        if (callback) {
-          (*ctx->runner())(std::bind(std::move(callback), std::move(elem)));
-        }
-
-        // Finish off the thread if we reach the end of the iterator. Runs
-        // pending callbacks.
-        if (end_of_iterator) {
-          {
-            mutex_lock l(mu_);
-            background_thread_finished_ = true;
-            shutdown_cond_var_.notify_all();
-          }
-          RunPendingCallbacks();
-          return;
-        }
-        shard_to_fetch = (shard_to_fetch + 1) % size_;
-      }
-    }
-
-    struct HostBuffer {
-      condition_variable cond_var;
-      std::deque<HostBufferElement> data;
-      std::deque<MultiDeviceIteratorCallback> callbacks;
-    };
-
-    mutex mu_;
-    std::unique_ptr<Thread> background_thread_ GUARDED_BY(mu_);
-    bool background_thread_finished_ GUARDED_BY(mu_) = false;
-    bool background_thread_started_ GUARDED_BY(mu_) = false;
-    bool cancelled_ GUARDED_BY(mu_) = false;
-    condition_variable shutdown_cond_var_ GUARDED_BY(mu_);
-
-    std::vector<HostBuffer> buffer_;
-
-    const size_t size_;
-    const int64 max_buffer_size_;
-    const int64 incarnation_id_;
-    const std::unique_ptr<IteratorBase> host_iterator_;
-  };
-
-  mutex mu_;
-  const DataTypeVector output_types_;
-  const std::vector<PartialTensorShape> output_shapes_;
-  const std::vector<string> devices_;
-  const std::unique_ptr<FunctionLibraryDefinition> flib_def_;
-  const std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
-  FunctionLibraryRuntime* const lib_ = nullptr;  // not owned.
-  std::shared_ptr<const FunctionLibraryDefinition> lib_def_ GUARDED_BY(mu_);
-
-  int64 incarnation_id_ GUARDED_BY(mu_) = 0;
-  std::unique_ptr<MultiDeviceBuffer> multi_device_buffer_ GUARDED_BY(mu_);
-};
-
-// Just creates a MultiDeviceIterator and returns it.
-class MultiDeviceIteratorHandleOp : public OpKernel {
- public:
-  explicit MultiDeviceIteratorHandleOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx), graph_def_version_(ctx->graph_def_version()) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("shared_name", &name_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("container", &container_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("devices", &devices_));
-  }
-
-  // The resource is deleted from the resource manager only when it is private
-  // to kernel.
-  ~MultiDeviceIteratorHandleOp() override {
-    if (resource_ != nullptr) {
-      resource_->Unref();
-      if (cinfo_.resource_is_private_to_kernel()) {
-        if (!cinfo_.resource_manager()
-                 ->template Delete<MultiDeviceIterator>(cinfo_.container(),
-                                                        cinfo_.name())
-                 .ok()) {
-          // Do nothing; the resource can have been deleted by session resets.
-        }
-      }
-    }
-  }
-
-  void Compute(OpKernelContext* context) override LOCKS_EXCLUDED(mu_) {
-    {
-      mutex_lock l(mu_);
-      if (resource_ == nullptr) {
-        FunctionLibraryRuntime* lib;
-        std::unique_ptr<FunctionLibraryDefinition> flib_def(nullptr);
-        std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(nullptr);
-        OP_REQUIRES_OK(context, context->function_library()->Clone(
-                                    &flib_def, &pflr, &lib));
-        ResourceMgr* mgr = context->resource_manager();
-        OP_REQUIRES_OK(context, cinfo_.Init(mgr, def()));
-
-        MultiDeviceIterator* resource;
-        OP_REQUIRES_OK(
-            context,
-            mgr->LookupOrCreate<MultiDeviceIterator>(
-                cinfo_.container(), cinfo_.name(), &resource,
-                [this, lib, &flib_def, &pflr](MultiDeviceIterator** ret)
-                    EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-                      *ret = new MultiDeviceIterator(
-                          output_types_, output_shapes_, devices_,
-                          std::move(flib_def), std::move(pflr), lib);
-                      return Status::OK();
-                    }));
-
-        Status s = VerifyResource(resource);
-        if (TF_PREDICT_FALSE(!s.ok())) {
-          resource->Unref();
-          context->SetStatus(s);
-          return;
-        }
-
-        resource_ = resource;
-      }
-    }
-    OP_REQUIRES_OK(context, MakeResourceHandleToOutput(
-                                context, 0, cinfo_.container(), cinfo_.name(),
-                                MakeTypeIndex<MultiDeviceIterator>()));
-  }
-
- private:
-  // During the first Compute(), resource is either created or looked up using
-  // shared_name. In the latter case, the resource found should be verified if
-  // it is compatible with this op's configuration. The verification may fail in
-  // cases such as two graphs asking queues of the same shared name to have
-  // inconsistent capacities.
-  Status VerifyResource(MultiDeviceIterator* resource) {
-    TF_RETURN_IF_ERROR(
-        VerifyTypesMatch(output_types_, resource->output_types()));
-    TF_RETURN_IF_ERROR(
-        VerifyShapesCompatible(output_shapes_, resource->output_shapes()));
-    return Status::OK();
-  }
-
-  mutex mu_;
-  ContainerInfo cinfo_;  // Written once under mu_ then constant afterwards.
-  MultiDeviceIterator* resource_ GUARDED_BY(mu_) = nullptr;
-  DataTypeVector output_types_;
-  std::vector<PartialTensorShape> output_shapes_;
-  const int graph_def_version_;
-  string name_;
-  string container_;
-  std::vector<string> devices_;
-};
-
-REGISTER_KERNEL_BUILDER(Name("MultiDeviceIterator").Device(DEVICE_CPU),
-                        MultiDeviceIteratorHandleOp);
-
-// Calls init on the MultiDeviceIterator.
-class MultiDeviceIteratorInitOp : public OpKernel {
- public:
-  explicit MultiDeviceIteratorInitOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor* tensor_max_buffer_size;
-    OP_REQUIRES_OK(ctx, ctx->input("max_buffer_size", &tensor_max_buffer_size));
-    int64 max_buffer_size = tensor_max_buffer_size->scalar<int64>()();
-
-    DatasetBase* dataset;
-    OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset));
-    MultiDeviceIterator* resource;
-    OP_REQUIRES_OK(ctx,
-                   LookupResource(ctx, HandleFromInput(ctx, 1), &resource));
-    core::ScopedUnref unref(resource);
-
-    std::unique_ptr<IteratorBase> iterator;
-    IteratorContext iter_ctx(ctx);
-    iter_ctx.set_lib(resource->lib());
-    OP_REQUIRES_OK(
-        ctx, dataset->MakeIterator(std::move(iter_ctx), "Iterator", &iterator));
-    int64 incarnation_id;
-    OP_REQUIRES_OK(ctx, resource->Init(std::move(iterator), max_buffer_size,
-                                       &incarnation_id));
-    Tensor tensor_incarnation_id(DT_INT64, TensorShape({}));
-    tensor_incarnation_id.scalar<int64>()() = incarnation_id;
-    OP_REQUIRES_OK(ctx,
-                   ctx->set_output("incarnation_id", tensor_incarnation_id));
-  }
-};
-
-REGISTER_KERNEL_BUILDER(Name("MultiDeviceIteratorInit").Device(DEVICE_CPU),
-                        MultiDeviceIteratorInitOp);
-
-// Calls GetNextFromShard(shard) and returns a vector of Tensors as output.
-// TODO(rohanj): Implement using BackgroundWorker that Derek built?
-class MultiDeviceIteratorGetNextFromShardOp : public AsyncOpKernel {
- public:
-  explicit MultiDeviceIteratorGetNextFromShardOp(OpKernelConstruction* ctx)
-      : AsyncOpKernel(ctx),
-        thread_pool_(new thread::ThreadPool(
-            ctx->env(), ThreadOptions(),
-            strings::StrCat("multi_device_iterator_get_next_thread_",
-                            SanitizeThreadSuffix(name())),
-            1 /* num_threads */, false /* low_latency_hint */)) {}
-
-  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
-    const Tensor* tensor_shard_num;
-    OP_REQUIRES_OK_ASYNC(ctx, ctx->input("shard_num", &tensor_shard_num), done);
-    int32 shard_num = tensor_shard_num->scalar<int32>()();
-
-    const Tensor* tensor_incarnation_id;
-    OP_REQUIRES_OK_ASYNC(
-        ctx, ctx->input("incarnation_id", &tensor_incarnation_id), done);
-    int64 incarnation_id = tensor_incarnation_id->scalar<int64>()();
-
-    MultiDeviceIterator* iterator;
-    OP_REQUIRES_OK_ASYNC(
-        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator), done);
-    thread_pool_->Schedule(std::bind(
-        [ctx, iterator, shard_num, incarnation_id](DoneCallback done) {
-          IteratorContext::Params params;
-          params.env = ctx->env();
-          params.runner = *(ctx->runner());
-          params.function_library = iterator->function_library();
-          DeviceBase* device = ctx->function_library()->device();
-          params.allocator_getter = [device](AllocatorAttributes attrs) {
-            return device->GetAllocator(attrs);
-          };
-          IteratorContext iter_ctx(std::move(params));
-
-          MultiDeviceIteratorCallback callback = std::bind(
-              [ctx](const HostBufferElement& elem, DoneCallback done) {
-                // iterator->Unref();
-                Status s = elem.status;
-                if (!s.ok()) {
-                  ctx->SetStatus(s);
-                } else if (elem.end_of_sequence) {
-                  ctx->SetStatus(errors::OutOfRange("End of sequence"));
-                } else {
-                  for (int i = 0; i < elem.value.size(); ++i) {
-                    ctx->set_output(i, elem.value[i]);
-                  }
-                }
-                done();
-              },
-              std::placeholders::_1, std::move(done));
-
-          iterator->GetNextFromShard(&iter_ctx, shard_num, incarnation_id,
-                                     callback);
-          iterator->Unref();
-        },
-        std::move(done)));
-  }
-
- private:
-  std::unique_ptr<thread::ThreadPool> thread_pool_;
-};
-
-REGISTER_KERNEL_BUILDER(
-    Name("MultiDeviceIteratorGetNextFromShard").Device(DEVICE_CPU),
-    MultiDeviceIteratorGetNextFromShardOp);
-
-class MultiDeviceIteratorToStringHandleOp : public OpKernel {
- public:
-  explicit MultiDeviceIteratorToStringHandleOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& resource_handle_t = ctx->input(0);
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(resource_handle_t.shape()),
-                errors::InvalidArgument("resource_handle must be a scalar"));
-
-    // Validate that the handle corresponds to a real resource, and
-    // that it is an MultiDeviceIterator.
-    MultiDeviceIterator* resource;
-    OP_REQUIRES_OK(ctx,
-                   LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
-    resource->Unref();
-
-    Tensor* string_handle_t;
-    OP_REQUIRES_OK(ctx,
-                   ctx->allocate_output(0, TensorShape({}), &string_handle_t));
-    string_handle_t->scalar<string>()() =
-        resource_handle_t.scalar<ResourceHandle>()().SerializeAsString();
-  }
-};
-
-REGISTER_KERNEL_BUILDER(
-    Name("MultiDeviceIteratorToStringHandle").Device(DEVICE_CPU),
-    MultiDeviceIteratorToStringHandleOp);
-
-class MultiDeviceIteratorFromStringHandleOp : public OpKernel {
- public:
-  explicit MultiDeviceIteratorFromStringHandleOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
-    OP_REQUIRES(
-        ctx,
-        output_types_.empty() || output_shapes_.empty() ||
-            output_types_.size() == output_shapes_.size(),
-        errors::InvalidArgument("If both 'output_types' and 'output_shapes' "
-                                "are set, they must have the same length."));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& string_handle_t = ctx->input(0);
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(string_handle_t.shape()),
-                errors::InvalidArgument("string_handle must be a scalar"));
-
-    ResourceHandle resource_handle;
-    OP_REQUIRES(
-        ctx,
-        resource_handle.ParseFromString(string_handle_t.scalar<string>()()),
-        errors::InvalidArgument(
-            "Could not parse string_handle as a valid ResourceHandle"));
-
-    OP_REQUIRES(
-        ctx, resource_handle.device() == ctx->device()->attributes().name(),
-        errors::InvalidArgument("Attempted create an iterator on device \"",
-                                ctx->device()->attributes().name(),
-                                "\" from handle defined on device \"",
-                                resource_handle.device(), "\""));
-
-    // Validate that the handle corresponds to a real resource, and
-    // that it is an MultiDeviceIterator.
-    MultiDeviceIterator* resource;
-    OP_REQUIRES_OK(ctx, LookupResource(ctx, resource_handle, &resource));
-    core::ScopedUnref unref_iterator(resource);
-    if (!output_types_.empty()) {
-      OP_REQUIRES_OK(ctx,
-                     VerifyTypesMatch(output_types_, resource->output_types()));
-    }
-    if (!output_shapes_.empty()) {
-      OP_REQUIRES_OK(ctx, VerifyShapesCompatible(output_shapes_,
-                                                 resource->output_shapes()));
-    }
-
-    Tensor* resource_handle_t;
-    OP_REQUIRES_OK(
-        ctx, ctx->allocate_output(0, TensorShape({}), &resource_handle_t));
-    resource_handle_t->scalar<ResourceHandle>()() = resource_handle;
-  }
-
- private:
-  DataTypeVector output_types_;
-  std::vector<PartialTensorShape> output_shapes_;
-};
-
-REGISTER_KERNEL_BUILDER(
-    Name("MultiDeviceIteratorFromStringHandle").Device(DEVICE_CPU),
-    MultiDeviceIteratorFromStringHandleOp);
-
 }  // namespace
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc
index ad410e17fe..d1a771f005 100644
--- a/tensorflow/contrib/data/ops/dataset_ops.cc
+++ b/tensorflow/contrib/data/ops/dataset_ops.cc
@@ -151,82 +151,6 @@ Resets the FunctionBufferingResource.
 function_buffer_resource: The FunctionBufferingResource handle.
 )doc");
 
-REGISTER_OP("MultiDeviceIterator")
-    .Output("handle: resource")
-    .Attr("devices: list(string) >= 1")
-    .Attr("shared_name: string")
-    .Attr("container: string")
-    .Attr("output_types: list(type) >= 1")
-    .Attr("output_shapes: list(shape) >= 1")
-    .Doc(R"doc(
-Creates a MultiDeviceIterator resource.
-
-handle: Handle to the resource created.
-devices: A list of devices the iterator works across.
-shared_name: If non-empty, this resource will be shared under the given name
-  across multiple sessions.
-container: If non-empty, this resource is placed in the given container.
-  Otherwise, a default container is used.
-output_types: The type list for the return values.
-output_shapes: The list of shapes being produced.
-)doc");
-
-REGISTER_OP("MultiDeviceIteratorInit")
-    .Input("dataset: variant")
-    .Input("multi_device_iterator: resource")
-    .Input("max_buffer_size: int64")
-    .Output("incarnation_id: int64")
-    .Doc(R"doc(
-Initializes the multi device iterator with the given dataset.
-max_buffer_size: The maximum size of the host side per device buffer to keep.
-incarnation_id: An int64 indicating which incarnation of the MultiDeviceIterator
-  is running.
-dataset: Dataset to be iterated upon.
-multi_device_iterator: A MultiDeviceIteratorResource.
-)doc");
-
-REGISTER_OP("MultiDeviceIteratorGetNextFromShard")
-    .Input("multi_device_iterator: resource")
-    .Input("shard_num: int32")
-    .Input("incarnation_id: int64")
-    .Output("components: output_types")
-    .Attr("output_types: list(type) >= 1")
-    .Attr("output_shapes: list(shape) >= 1")
-    .Doc(R"doc(
-Gets next element for the provided shard number.
-
-multi_device_iterator: A MultiDeviceIterator resource.
-shard_num: Integer representing which shard to fetch data for.
-incarnation_id: Which incarnation of the MultiDeviceIterator is running.
-components: Result of the get_next on the dataset.
-output_types: The type list for the return values.
-output_shapes: The list of shapes being produced.
-)doc");
-
-REGISTER_OP("MultiDeviceIteratorToStringHandle")
-    .Input("multi_device_iterator: resource")
-    .Output("string_handle: string")
-    .Doc(R"doc(
-Produces a string handle for the given MultiDeviceIterator.
-
-multi_device_iterator: A MultiDeviceIterator resource.
-string_handle: A string representing the resource.
-)doc");
-
-REGISTER_OP("MultiDeviceIteratorFromStringHandle")
-    .Input("string_handle: string")
-    .Output("multi_device_iterator: resource")
-    .Attr("output_types: list(type) >= 0 = []")
-    .Attr("output_shapes: list(shape) >= 0 = []")
-    .Doc(R"doc(
-Generates a MultiDeviceIterator resource from its provided string handle.
-
-string_handle: String representing the resource.
-multi_device_iterator: A MultiDeviceIterator resource.
-output_types: The type list for the return values.
-output_shapes: The list of shapes being produced.
-)doc");
-
 REGISTER_OP("ThreadPoolDataset")
     .Input("input_dataset: variant")
     .Input("thread_pool: resource")
diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
index 5b17511e41..33a64ea767 100644
--- a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
@@ -31,7 +31,6 @@ from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import test
 
@@ -944,164 +943,5 @@ class CopyToDeviceTest(test.TestCase):
           sess.run(elem_value_t)
 
 
-class MultiDeviceIteratorTest(test.TestCase):
-
-  def testNoGetNext(self):
-    dataset = dataset_ops.Dataset.range(10)
-    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-        dataset, ["/cpu:1", "/cpu:2"])
-
-    config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
-
-  def testBasic(self):
-    dataset = dataset_ops.Dataset.range(10)
-    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-        dataset, ["/cpu:1", "/cpu:2"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
-
-    config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
-      for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
-
-  def testOneOnSameDevice(self):
-    with ops.device("/cpu:0"):
-      dataset = dataset_ops.Dataset.range(10)
-    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-        dataset, ["/cpu:0", "/cpu:1"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
-
-    config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
-      for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
-
-  def testRepeatDevices(self):
-    with ops.device("/cpu:0"):
-      dataset = dataset_ops.Dataset.range(20)
-    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-        dataset, ["/cpu:1", "/cpu:2", "/cpu:1", "/cpu:2"])
-    elements = multi_device_iterator.get_next()
-    elem_on_1, elem_on_2, elem_on_3, elem_on_4 = elements
-
-    config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
-      for i in range(0, 20, 4):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
-        self.assertEqual(i + 2, sess.run(elem_on_3))
-        self.assertEqual(i + 3, sess.run(elem_on_4))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
-        sess.run(elem_on_3)
-        sess.run(elem_on_4)
-
-  def testNotFullyDivisible(self):
-    dataset = dataset_ops.Dataset.range(9)
-    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-        dataset, ["/cpu:1", "/cpu:2"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
-
-    config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
-      for i in range(0, 8, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
-      self.assertEqual(8, sess.run(elem_on_1))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
-
-  def testUneven(self):
-    dataset = dataset_ops.Dataset.range(10)
-    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-        dataset, ["/cpu:1", "/cpu:2"], max_buffer_size=4)
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
-
-    config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
-      for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-      for i in range(0, 10, 2):
-        self.assertEqual(i + 1, sess.run(elem_on_2))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
-
-  def testMultipleInitializations(self):
-    with ops.device("/cpu:0"):
-      epoch = array_ops.placeholder(dtypes.int64, shape=[])
-      dataset1 = dataset_ops.Dataset.from_tensors(epoch).repeat(1000)
-      dataset2 = dataset_ops.Dataset.range(1000)
-      dataset = dataset_ops.Dataset.zip((dataset1, dataset2))
-    multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-        dataset, ["/cpu:1", "/cpu:2"], prefetch_buffer_size=4)
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
-    init_op = multi_device_iterator.initializer
-
-    config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
-      for i in range(1000):
-        sess.run(init_op, feed_dict={epoch: i})
-        self.assertEqual([(i, 0), (i, 1)], sess.run([elem_on_1, elem_on_2]))
-
-  def testBasicGpu(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    with compat.forward_compatibility_horizon(2018, 8, 4):
-      dataset = dataset_ops.Dataset.range(10)
-      multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-          dataset, ["/cpu:1", "/gpu:0"])
-      elem_on_1, elem_on_2 = multi_device_iterator.get_next()
-
-      config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
-      with self.test_session(config=config) as sess:
-        sess.run(multi_device_iterator.initializer)
-        for i in range(0, 10, 2):
-          self.assertEqual(i, sess.run(elem_on_1))
-          self.assertEqual(i + 1, sess.run(elem_on_2))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(elem_on_1)
-          sess.run(elem_on_2)
-
-  def testUnevenGpu(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    with compat.forward_compatibility_horizon(2018, 8, 4):
-      dataset = dataset_ops.Dataset.range(10)
-      multi_device_iterator = prefetching_ops.MultiDeviceIterator(
-          dataset, ["/cpu:1", "/gpu:0"], max_buffer_size=4)
-      elem_on_1, elem_on_2 = multi_device_iterator.get_next()
-
-      config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
-      with self.test_session(config=config) as sess:
-        sess.run(multi_device_iterator.initializer)
-        for i in range(0, 10, 2):
-          self.assertEqual(i, sess.run(elem_on_1))
-        for i in range(0, 10, 2):
-          self.assertEqual(i + 1, sess.run(elem_on_2))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(elem_on_1)
-          sess.run(elem_on_2)
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/api_def/base_api/api_def_MultiDeviceIterator.pbtxt b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIterator.pbtxt
new file mode 100644
index 0000000000..4b0a5d8f65
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIterator.pbtxt
@@ -0,0 +1,43 @@
+op {
+  graph_op_name: "MultiDeviceIterator"
+  out_arg {
+    name: "handle"
+    description: <<END
+Handle to the resource created.
+END
+  }
+  attr {
+    name: "devices"
+    description: <<END
+A list of devices the iterator works across.
+END
+  }
+  attr {
+    name: "shared_name"
+    description: <<END
+If non-empty, this resource will be shared under the given name
+across multiple sessions.
+END
+  }
+  attr {
+    name: "container"
+    description: <<END
+If non-empty, this resource is placed in the given container.
+Otherwise, a default container is used.
+END
+  }
+  attr {
+    name: "output_types"
+    description: <<END
+The type list for the return values.
+END
+  }
+  attr {
+    name: "output_shapes"
+    description: <<END
+The list of shapes being produced.
+END
+  }
+  summary: "Creates a MultiDeviceIterator resource."
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt
new file mode 100644
index 0000000000..adaacd8ab7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt
@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "MultiDeviceIteratorFromStringHandle"
+  in_arg {
+    name: "string_handle"
+    description: <<END
+String representing the resource.
+END
+  }
+  out_arg {
+    name: "multi_device_iterator"
+    description: <<END
+A MultiDeviceIterator resource.
+END
+  }
+  attr {
+    name: "output_types"
+    description: <<END
+The type list for the return values.
+END
+  }
+  attr {
+    name: "output_shapes"
+    description: <<END
+The list of shapes being produced.
+END
+  }
+  summary: "Generates a MultiDeviceIterator resource from its provided string handle."
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt
new file mode 100644
index 0000000000..f9be9188cc
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt
@@ -0,0 +1,41 @@
+op {
+  graph_op_name: "MultiDeviceIteratorGetNextFromShard"
+  in_arg {
+    name: "multi_device_iterator"
+    description: <<END
+A MultiDeviceIterator resource.
+END
+  }
+  in_arg {
+    name: "shard_num"
+    description: <<END
+Integer representing which shard to fetch data for.
+END
+  }
+  in_arg {
+    name: "incarnation_id"
+    description: <<END
+Which incarnation of the MultiDeviceIterator is running.
+END
+  }
+  out_arg {
+    name: "components"
+    description: <<END
+Result of the get_next on the dataset.
+END
+  }
+  attr {
+    name: "output_types"
+    description: <<END
+The type list for the return values.
+END
+  }
+  attr {
+    name: "output_shapes"
+    description: <<END
+The list of shapes being produced.
+END
+  }
+  summary: "Gets next element for the provided shard number."
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorInit.pbtxt b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorInit.pbtxt
new file mode 100644
index 0000000000..6b54fa1307
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorInit.pbtxt
@@ -0,0 +1,30 @@
+op {
+  graph_op_name: "MultiDeviceIteratorInit"
+  in_arg {
+    name: "dataset"
+    description: <<END
+Dataset to be iterated upon.
+END
+  }
+  in_arg {
+    name: "multi_device_iterator"
+    description: <<END
+A MultiDeviceIteratorResource.
+END
+  }
+  in_arg {
+    name: "max_buffer_size"
+    description: <<END
+The maximum size of the host side per device buffer to keep.
+END
+  }
+  out_arg {
+    name: "incarnation_id"
+    description: <<END
+An int64 indicating which incarnation of the MultiDeviceIterator
+is running.
+END
+  }
+  summary: "Initializes the multi device iterator with the given dataset."
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt
new file mode 100644
index 0000000000..1f1fdf99b4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt
@@ -0,0 +1,17 @@
+op {
+  graph_op_name: "MultiDeviceIteratorToStringHandle"
+  in_arg {
+    name: "multi_device_iterator"
+    description: <<END
+A MultiDeviceIterator resource.
+END
+  }
+  out_arg {
+    name: "string_handle"
+    description: <<END
+A string representing the resource.
+END
+  }
+  summary: "Produces a string handle for the given MultiDeviceIterator."
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index b3c359010d..87efdff789 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -627,6 +627,20 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "multi_device_iterator_ops",
+    srcs = ["multi_device_iterator_ops.cc"],
+    deps = [
+        ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
 tf_kernel_library(
     name = "optional_ops",
     srcs = ["optional_ops.cc"],
@@ -722,6 +736,7 @@ tf_kernel_library(
         ":map_dataset_op",
         ":map_defun_op",
         ":model_dataset_op",
+        ":multi_device_iterator_ops",
         ":optimize_dataset_op",
         ":optional_ops",
         ":padded_batch_dataset_op",
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index e7ac368ae3..e10833f525 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -44,5 +44,42 @@ Status MakeIteratorFromInputElement(
       ctx, strings::StrCat(prefix, "[", thread_index, "]"), out_iterator);
 }
 
+Status VerifyTypesMatch(const DataTypeVector& expected,
+                        const DataTypeVector& received) {
+  if (expected.size() != received.size()) {
+    return errors::InvalidArgument(
+        "Number of components does not match: expected ", expected.size(),
+        " types but got ", received.size(), ".");
+  }
+  for (size_t i = 0; i < expected.size(); ++i) {
+    if (expected[i] != received[i]) {
+      return errors::InvalidArgument("Data type mismatch at component ", i,
+                                     ": expected ", DataTypeString(expected[i]),
+                                     " but got ", DataTypeString(received[i]),
+                                     ".");
+    }
+  }
+  return Status::OK();
+}
+
+Status VerifyShapesCompatible(const std::vector<PartialTensorShape>& expected,
+                              const std::vector<PartialTensorShape>& received) {
+  if (expected.size() != received.size()) {
+    return errors::InvalidArgument(
+        "Number of components does not match: expected ", expected.size(),
+        " shapes but got ", received.size(), ".");
+  }
+  for (size_t i = 0; i < expected.size(); ++i) {
+    if (!expected[i].IsCompatibleWith(received[i])) {
+      return errors::InvalidArgument("Incompatible shapes at component ", i,
+                                     ": expected ", expected[i].DebugString(),
+                                     " but got ", received[i].DebugString(),
+                                     ".");
+    }
+  }
+
+  return Status::OK();
+}
+
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 234856ea39..6ec1350cd4 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -27,6 +27,16 @@ Status MakeIteratorFromInputElement(
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
     std::unique_ptr<IteratorBase>* out_iterator);
 
+// Returns Status::OK() if `expected` and `received` types match,
+// errors::InvalidArgument otherwise.
+Status VerifyTypesMatch(const DataTypeVector& expected,
+                        const DataTypeVector& received);
+
+// Returns Status::OK() if `expected` and `received` shapes are compatible,
+// errors::InvalidArgument otherwise.
+Status VerifyShapesCompatible(const std::vector<PartialTensorShape>& expected,
+                              const std::vector<PartialTensorShape>& received);
+
 }  // namespace data
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 30c6585ba2..c0bc507ec0 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -44,43 +44,6 @@ namespace {
 
 const char kIteratorVariantTypeName[] = "tensorflow::Iterator";
 
-Status VerifyTypesMatch(const DataTypeVector& expected,
-                        const DataTypeVector& received) {
-  if (expected.size() != received.size()) {
-    return errors::InvalidArgument(
-        "Number of components does not match: expected ", expected.size(),
-        " types but got ", received.size(), ".");
-  }
-  for (size_t i = 0; i < expected.size(); ++i) {
-    if (expected[i] != received[i]) {
-      return errors::InvalidArgument("Data type mismatch at component ", i,
-                                     ": expected ", DataTypeString(expected[i]),
-                                     " but got ", DataTypeString(received[i]),
-                                     ".");
-    }
-  }
-  return Status::OK();
-}
-
-Status VerifyShapesCompatible(const std::vector<PartialTensorShape>& expected,
-                              const std::vector<PartialTensorShape>& received) {
-  if (expected.size() != received.size()) {
-    return errors::InvalidArgument(
-        "Number of components does not match: expected ", expected.size(),
-        " shapes but got ", received.size(), ".");
-  }
-  for (size_t i = 0; i < expected.size(); ++i) {
-    if (!expected[i].IsCompatibleWith(received[i])) {
-      return errors::InvalidArgument("Incompatible shapes at component ", i,
-                                     ": expected ", expected[i].DebugString(),
-                                     " but got ", received[i].DebugString(),
-                                     ".");
-    }
-  }
-
-  return Status::OK();
-}
-
 }  // namespace
 
 class IteratorResource : public ResourceBase {
diff --git a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
new file mode 100644
index 0000000000..5f143967d9
--- /dev/null
+++ b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
@@ -0,0 +1,633 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <deque>
+
+#include "tensorflow/core/common_runtime/process_function_library_runtime.h"
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/resource_op_kernel.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/device_name_utils.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+struct HostBufferElement {
+  Status status;
+  bool end_of_sequence;
+  std::vector<Tensor> value;
+};
+
+using MultiDeviceIteratorCallback =
+    std::function<void(const HostBufferElement&)>;
+
+class MultiDeviceIterator : public ResourceBase {
+ public:
+  MultiDeviceIterator(const DataTypeVector& output_types,
+                      const std::vector<PartialTensorShape>& output_shapes,
+                      const std::vector<string>& devices,
+                      std::unique_ptr<FunctionLibraryDefinition> flib_def,
+                      std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
+                      FunctionLibraryRuntime* lib)
+      : output_types_(output_types),
+        output_shapes_(output_shapes),
+        devices_(devices),
+        flib_def_(std::move(flib_def)),
+        pflr_(std::move(pflr)),
+        lib_(lib) {
+    DCHECK(lib_ != nullptr);
+  }
+
+  string DebugString() override {
+    return strings::StrCat("MultiDeviceIterator for ", devices_.size(),
+                           " devices");
+  }
+
+  Status Init(std::unique_ptr<IteratorBase> iterator, int64 max_buffer_size,
+              int64* incarnation_id) {
+    if (iterator) {
+      TF_RETURN_IF_ERROR(
+          VerifyTypesMatch(output_types_, iterator->output_dtypes()));
+      TF_RETURN_IF_ERROR(
+          VerifyShapesCompatible(output_shapes_, iterator->output_shapes()));
+    }
+
+    mutex_lock l(mu_);
+    if (multi_device_buffer_) {
+      multi_device_buffer_->Reset();
+    }
+
+    ++incarnation_id_;
+    *incarnation_id = incarnation_id_;
+
+    multi_device_buffer_.reset(
+        new MultiDeviceBuffer(devices_.size(), max_buffer_size, incarnation_id_,
+                              std::move(iterator)));
+    return Status::OK();
+  }
+
+  void GetNextFromShard(IteratorContext* ctx, int shard_num,
+                        int64 incarnation_id,
+                        MultiDeviceIteratorCallback callback) {
+    if (lib_ != nullptr) {
+      ctx->set_lib(lib_);
+    }
+    tf_shared_lock l(mu_);
+    multi_device_buffer_->GetNextFromShard(ctx, shard_num, incarnation_id,
+                                           std::move(callback));
+  }
+
+  const DataTypeVector& output_types() const { return output_types_; }
+
+  const std::vector<PartialTensorShape>& output_shapes() const {
+    return output_shapes_;
+  }
+
+  std::shared_ptr<const FunctionLibraryDefinition> function_library() {
+    tf_shared_lock l(mu_);
+    return lib_def_;
+  }
+
+  FunctionLibraryRuntime* const lib() {
+    tf_shared_lock l(mu_);
+    return lib_;
+  }
+
+ private:
+  // A private class that uses a background thread to keep a per device buffer
+  // full.
+  class MultiDeviceBuffer {
+   public:
+    MultiDeviceBuffer(size_t size, int64 max_buffer_size, int64 incarnation_id,
+                      std::unique_ptr<IteratorBase> host_iterator)
+        : buffer_(size),
+          size_(size),
+          max_buffer_size_(max_buffer_size),
+          incarnation_id_(incarnation_id),
+          host_iterator_(std::move(host_iterator)) {}
+
+    ~MultiDeviceBuffer() {
+      {
+        mutex_lock l(mu_);
+        if (!background_thread_started_) return;
+      }
+      Reset();
+    }
+
+    void Reset() LOCKS_EXCLUDED(mu_) {
+      {
+        mutex_lock l(mu_);
+        if (background_thread_finished_) {
+          return;
+        }
+
+        cancelled_ = true;
+        // Wake up the background thread.
+        for (int i = 0; i < size_; ++i) {
+          buffer_[i].cond_var.notify_all();
+        }
+
+        // Make sure background thread has finished first.
+        while (!background_thread_finished_) {
+          shutdown_cond_var_.wait(l);
+        }
+      }
+      RunPendingCallbacks();
+    }
+
+    void GetNextFromShard(IteratorContext* ctx, int shard_num,
+                          int64 incarnation_id,
+                          MultiDeviceIteratorCallback callback) {
+      HostBufferElement elem;
+      if (incarnation_id_ != incarnation_id) {
+        elem.status = errors::InvalidArgument("Invalid incarnation id");
+        callback(elem);
+        return;
+      }
+
+      bool produced_output = false;
+      {
+        mutex_lock l(mu_);
+        if (cancelled_) {
+          elem.status = errors::Cancelled("Cancelled Multidevice iterator");
+          callback(elem);
+          return;
+        }
+
+        EnsureBackgroundThreadStarted(ctx);
+
+        if (!buffer_[shard_num].data.empty()) {
+          produced_output = true;
+          std::swap(elem, buffer_[shard_num].data.front());
+          buffer_[shard_num].data.pop_front();
+          // Wake up background thread if it is blocked on this element.
+          if (buffer_[shard_num].data.size() == max_buffer_size_ - 1) {
+            buffer_[shard_num].cond_var.notify_all();
+          }
+        } else {
+          if (background_thread_finished_) {
+            produced_output = true;
+            elem.end_of_sequence = true;
+          } else {
+            buffer_[shard_num].callbacks.push_back(std::move(callback));
+            callback = nullptr;
+          }
+        }
+      }
+
+      if (produced_output) {
+        callback(elem);
+      }
+    }
+
+   private:
+    void EnsureBackgroundThreadStarted(IteratorContext* ctx)
+        EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      if (!background_thread_) {
+        background_thread_.reset(ctx->env()->StartThread(
+            {}, "multi_device_iterator_background_thread",
+            std::bind(&MultiDeviceIterator::MultiDeviceBuffer::BackgroundThread,
+                      this, new IteratorContext(*ctx))));
+      }
+    }
+
+    void RunPendingCallbacks() LOCKS_EXCLUDED(mu_) {
+      // Run all remaining callbacks.
+      std::vector<MultiDeviceIteratorCallback> cancellation_callbacks;
+      std::vector<HostBufferElement> cancellation_elements;
+      {
+        mutex_lock l(mu_);
+
+        for (int i = 0; i < size_; ++i) {
+          while (!buffer_[i].callbacks.empty()) {
+            if (buffer_[i].data.empty()) {
+              HostBufferElement elem;
+              elem.status =
+                  errors::Cancelled("Cancelled and buffer not filled.");
+              cancellation_elements.push_back(std::move(elem));
+            } else {
+              cancellation_elements.push_back(
+                  std::move(buffer_[i].data.front()));
+              buffer_[i].data.pop_front();
+            }
+            cancellation_callbacks.push_back(
+                std::move(buffer_[i].callbacks.front()));
+            buffer_[i].callbacks.pop_front();
+          }
+        }
+      }
+      for (int i = 0; i < cancellation_callbacks.size(); ++i) {
+        cancellation_callbacks[i](cancellation_elements[i]);
+      }
+    }
+
+    void BackgroundThread(IteratorContext* ctx) {
+      {
+        mutex_lock l(mu_);
+        background_thread_started_ = true;
+      }
+      std::unique_ptr<IteratorContext> cleanup(ctx);
+      int shard_to_fetch = 0;
+      while (true) {
+        HostBufferElement elem;
+        MultiDeviceIteratorCallback callback = nullptr;
+        bool end_of_iterator = false;
+
+        {
+          mutex_lock l(mu_);
+          while (!cancelled_ &&
+                 buffer_[shard_to_fetch].data.size() >= max_buffer_size_) {
+            buffer_[shard_to_fetch].cond_var.wait(l);
+          }
+
+          if (cancelled_) {
+            background_thread_finished_ = true;
+            shutdown_cond_var_.notify_all();
+            return;
+          }
+        }
+
+        elem.status =
+            host_iterator_->GetNext(ctx, &elem.value, &elem.end_of_sequence);
+
+        if (elem.status.ok() && elem.end_of_sequence) {
+          end_of_iterator = true;
+        }
+
+        {
+          mutex_lock l(mu_);
+          // Try to find a callback, else just push stuff into buffer.
+          if (!buffer_[shard_to_fetch].callbacks.empty()) {
+            callback = buffer_[shard_to_fetch].callbacks.front();
+            buffer_[shard_to_fetch].callbacks.pop_front();
+          } else {
+            buffer_[shard_to_fetch].data.push_back(std::move(elem));
+            elem = HostBufferElement();
+          }
+        }
+
+        if (callback) {
+          (*ctx->runner())(std::bind(std::move(callback), std::move(elem)));
+        }
+
+        // Finish off the thread if we reach the end of the iterator. Runs
+        // pending callbacks.
+        if (end_of_iterator) {
+          {
+            mutex_lock l(mu_);
+            background_thread_finished_ = true;
+            shutdown_cond_var_.notify_all();
+          }
+          RunPendingCallbacks();
+          return;
+        }
+        shard_to_fetch = (shard_to_fetch + 1) % size_;
+      }
+    }
+
+    struct HostBuffer {
+      condition_variable cond_var;
+      std::deque<HostBufferElement> data;
+      std::deque<MultiDeviceIteratorCallback> callbacks;
+    };
+
+    mutex mu_;
+    std::unique_ptr<Thread> background_thread_ GUARDED_BY(mu_);
+    bool background_thread_finished_ GUARDED_BY(mu_) = false;
+    bool background_thread_started_ GUARDED_BY(mu_) = false;
+    bool cancelled_ GUARDED_BY(mu_) = false;
+    condition_variable shutdown_cond_var_ GUARDED_BY(mu_);
+
+    std::vector<HostBuffer> buffer_;
+
+    const size_t size_;
+    const int64 max_buffer_size_;
+    const int64 incarnation_id_;
+    const std::unique_ptr<IteratorBase> host_iterator_;
+  };
+
+  mutex mu_;
+  const DataTypeVector output_types_;
+  const std::vector<PartialTensorShape> output_shapes_;
+  const std::vector<string> devices_;
+  const std::unique_ptr<FunctionLibraryDefinition> flib_def_;
+  const std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
+  FunctionLibraryRuntime* const lib_ = nullptr;  // not owned.
+  std::shared_ptr<const FunctionLibraryDefinition> lib_def_ GUARDED_BY(mu_);
+
+  int64 incarnation_id_ GUARDED_BY(mu_) = 0;
+  std::unique_ptr<MultiDeviceBuffer> multi_device_buffer_ GUARDED_BY(mu_);
+};
+
+// Just creates a MultiDeviceIterator and returns it.
+class MultiDeviceIteratorHandleOp : public OpKernel {
+ public:
+  explicit MultiDeviceIteratorHandleOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx), graph_def_version_(ctx->graph_def_version()) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("shared_name", &name_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("container", &container_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("devices", &devices_));
+  }
+
+  // The resource is deleted from the resource manager only when it is private
+  // to kernel.
+  ~MultiDeviceIteratorHandleOp() override {
+    if (resource_ != nullptr) {
+      resource_->Unref();
+      if (cinfo_.resource_is_private_to_kernel()) {
+        if (!cinfo_.resource_manager()
+                 ->template Delete<MultiDeviceIterator>(cinfo_.container(),
+                                                        cinfo_.name())
+                 .ok()) {
+          // Do nothing; the resource can have been deleted by session resets.
+        }
+      }
+    }
+  }
+
+  void Compute(OpKernelContext* context) override LOCKS_EXCLUDED(mu_) {
+    {
+      mutex_lock l(mu_);
+      if (resource_ == nullptr) {
+        FunctionLibraryRuntime* lib;
+        std::unique_ptr<FunctionLibraryDefinition> flib_def(nullptr);
+        std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(nullptr);
+        OP_REQUIRES_OK(context, context->function_library()->Clone(
+                                    &flib_def, &pflr, &lib));
+        ResourceMgr* mgr = context->resource_manager();
+        OP_REQUIRES_OK(context, cinfo_.Init(mgr, def()));
+
+        MultiDeviceIterator* resource;
+        OP_REQUIRES_OK(
+            context,
+            mgr->LookupOrCreate<MultiDeviceIterator>(
+                cinfo_.container(), cinfo_.name(), &resource,
+                [this, lib, &flib_def, &pflr](MultiDeviceIterator** ret)
+                    EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                      *ret = new MultiDeviceIterator(
+                          output_types_, output_shapes_, devices_,
+                          std::move(flib_def), std::move(pflr), lib);
+                      return Status::OK();
+                    }));
+
+        Status s = VerifyResource(resource);
+        if (TF_PREDICT_FALSE(!s.ok())) {
+          resource->Unref();
+          context->SetStatus(s);
+          return;
+        }
+
+        resource_ = resource;
+      }
+    }
+    OP_REQUIRES_OK(context, MakeResourceHandleToOutput(
+                                context, 0, cinfo_.container(), cinfo_.name(),
+                                MakeTypeIndex<MultiDeviceIterator>()));
+  }
+
+ private:
+  // During the first Compute(), resource is either created or looked up using
+  // shared_name. In the latter case, the resource found should be verified if
+  // it is compatible with this op's configuration. The verification may fail in
+  // cases such as two graphs asking queues of the same shared name to have
+  // inconsistent capacities.
+  Status VerifyResource(MultiDeviceIterator* resource) {
+    TF_RETURN_IF_ERROR(
+        VerifyTypesMatch(output_types_, resource->output_types()));
+    TF_RETURN_IF_ERROR(
+        VerifyShapesCompatible(output_shapes_, resource->output_shapes()));
+    return Status::OK();
+  }
+
+  mutex mu_;
+  ContainerInfo cinfo_;  // Written once under mu_ then constant afterwards.
+  MultiDeviceIterator* resource_ GUARDED_BY(mu_) = nullptr;
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+  const int graph_def_version_;
+  string name_;
+  string container_;
+  std::vector<string> devices_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("MultiDeviceIterator").Device(DEVICE_CPU),
+                        MultiDeviceIteratorHandleOp);
+
+// Calls init on the MultiDeviceIterator.
+class MultiDeviceIteratorInitOp : public OpKernel {
+ public:
+  explicit MultiDeviceIteratorInitOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor* tensor_max_buffer_size;
+    OP_REQUIRES_OK(ctx, ctx->input("max_buffer_size", &tensor_max_buffer_size));
+    int64 max_buffer_size = tensor_max_buffer_size->scalar<int64>()();
+
+    DatasetBase* dataset;
+    OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset));
+    MultiDeviceIterator* resource;
+    OP_REQUIRES_OK(ctx,
+                   LookupResource(ctx, HandleFromInput(ctx, 1), &resource));
+    core::ScopedUnref unref(resource);
+
+    std::unique_ptr<IteratorBase> iterator;
+    IteratorContext iter_ctx(ctx);
+    iter_ctx.set_lib(resource->lib());
+    OP_REQUIRES_OK(
+        ctx, dataset->MakeIterator(std::move(iter_ctx), "Iterator", &iterator));
+    int64 incarnation_id;
+    OP_REQUIRES_OK(ctx, resource->Init(std::move(iterator), max_buffer_size,
+                                       &incarnation_id));
+    Tensor tensor_incarnation_id(DT_INT64, TensorShape({}));
+    tensor_incarnation_id.scalar<int64>()() = incarnation_id;
+    OP_REQUIRES_OK(ctx,
+                   ctx->set_output("incarnation_id", tensor_incarnation_id));
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("MultiDeviceIteratorInit").Device(DEVICE_CPU),
+                        MultiDeviceIteratorInitOp);
+
+// Calls GetNextFromShard(shard) and returns a vector of Tensors as output.
+// TODO(rohanj): Implement using BackgroundWorker that Derek built?
+class MultiDeviceIteratorGetNextFromShardOp : public AsyncOpKernel {
+ public:
+  explicit MultiDeviceIteratorGetNextFromShardOp(OpKernelConstruction* ctx)
+      : AsyncOpKernel(ctx),
+        thread_pool_(new thread::ThreadPool(
+            ctx->env(), ThreadOptions(),
+            strings::StrCat("multi_device_iterator_get_next_thread_",
+                            SanitizeThreadSuffix(name())),
+            1 /* num_threads */, false /* low_latency_hint */)) {}
+
+  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
+    const Tensor* tensor_shard_num;
+    OP_REQUIRES_OK_ASYNC(ctx, ctx->input("shard_num", &tensor_shard_num), done);
+    int32 shard_num = tensor_shard_num->scalar<int32>()();
+
+    const Tensor* tensor_incarnation_id;
+    OP_REQUIRES_OK_ASYNC(
+        ctx, ctx->input("incarnation_id", &tensor_incarnation_id), done);
+    int64 incarnation_id = tensor_incarnation_id->scalar<int64>()();
+
+    MultiDeviceIterator* iterator;
+    OP_REQUIRES_OK_ASYNC(
+        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator), done);
+    thread_pool_->Schedule(std::bind(
+        [ctx, iterator, shard_num, incarnation_id](DoneCallback done) {
+          IteratorContext::Params params;
+          params.env = ctx->env();
+          params.runner = *(ctx->runner());
+          params.function_library = iterator->function_library();
+          DeviceBase* device = ctx->function_library()->device();
+          params.allocator_getter = [device](AllocatorAttributes attrs) {
+            return device->GetAllocator(attrs);
+          };
+          IteratorContext iter_ctx(std::move(params));
+
+          MultiDeviceIteratorCallback callback = std::bind(
+              [ctx](const HostBufferElement& elem, DoneCallback done) {
+                // iterator->Unref();
+                Status s = elem.status;
+                if (!s.ok()) {
+                  ctx->SetStatus(s);
+                } else if (elem.end_of_sequence) {
+                  ctx->SetStatus(errors::OutOfRange("End of sequence"));
+                } else {
+                  for (int i = 0; i < elem.value.size(); ++i) {
+                    ctx->set_output(i, elem.value[i]);
+                  }
+                }
+                done();
+              },
+              std::placeholders::_1, std::move(done));
+
+          iterator->GetNextFromShard(&iter_ctx, shard_num, incarnation_id,
+                                     callback);
+          iterator->Unref();
+        },
+        std::move(done)));
+  }
+
+ private:
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("MultiDeviceIteratorGetNextFromShard").Device(DEVICE_CPU),
+    MultiDeviceIteratorGetNextFromShardOp);
+
+class MultiDeviceIteratorToStringHandleOp : public OpKernel {
+ public:
+  explicit MultiDeviceIteratorToStringHandleOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& resource_handle_t = ctx->input(0);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(resource_handle_t.shape()),
+                errors::InvalidArgument("resource_handle must be a scalar"));
+
+    // Validate that the handle corresponds to a real resource, and
+    // that it is an MultiDeviceIterator.
+    MultiDeviceIterator* resource;
+    OP_REQUIRES_OK(ctx,
+                   LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
+    resource->Unref();
+
+    Tensor* string_handle_t;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(0, TensorShape({}), &string_handle_t));
+    string_handle_t->scalar<string>()() =
+        resource_handle_t.scalar<ResourceHandle>()().SerializeAsString();
+  }
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("MultiDeviceIteratorToStringHandle").Device(DEVICE_CPU),
+    MultiDeviceIteratorToStringHandleOp);
+
+class MultiDeviceIteratorFromStringHandleOp : public OpKernel {
+ public:
+  explicit MultiDeviceIteratorFromStringHandleOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+    OP_REQUIRES(
+        ctx,
+        output_types_.empty() || output_shapes_.empty() ||
+            output_types_.size() == output_shapes_.size(),
+        errors::InvalidArgument("If both 'output_types' and 'output_shapes' "
+                                "are set, they must have the same length."));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& string_handle_t = ctx->input(0);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(string_handle_t.shape()),
+                errors::InvalidArgument("string_handle must be a scalar"));
+
+    ResourceHandle resource_handle;
+    OP_REQUIRES(
+        ctx,
+        resource_handle.ParseFromString(string_handle_t.scalar<string>()()),
+        errors::InvalidArgument(
+            "Could not parse string_handle as a valid ResourceHandle"));
+
+    OP_REQUIRES(
+        ctx, resource_handle.device() == ctx->device()->attributes().name(),
+        errors::InvalidArgument("Attempted create an iterator on device \"",
+                                ctx->device()->attributes().name(),
+                                "\" from handle defined on device \"",
+                                resource_handle.device(), "\""));
+
+    // Validate that the handle corresponds to a real resource, and
+    // that it is an MultiDeviceIterator.
+    MultiDeviceIterator* resource;
+    OP_REQUIRES_OK(ctx, LookupResource(ctx, resource_handle, &resource));
+    core::ScopedUnref unref_iterator(resource);
+    if (!output_types_.empty()) {
+      OP_REQUIRES_OK(ctx,
+                     VerifyTypesMatch(output_types_, resource->output_types()));
+    }
+    if (!output_shapes_.empty()) {
+      OP_REQUIRES_OK(ctx, VerifyShapesCompatible(output_shapes_,
+                                                 resource->output_shapes()));
+    }
+
+    Tensor* resource_handle_t;
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_output(0, TensorShape({}), &resource_handle_t));
+    resource_handle_t->scalar<ResourceHandle>()() = resource_handle;
+  }
+
+ private:
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("MultiDeviceIteratorFromStringHandle").Device(DEVICE_CPU),
+    MultiDeviceIteratorFromStringHandleOp);
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 4d3f272c1b..1ada623cf5 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -932,4 +932,41 @@ REGISTER_OP("MapDefun")
       return Status::OK();
     });
 
+REGISTER_OP("MultiDeviceIterator")
+    .Output("handle: resource")
+    .Attr("devices: list(string) >= 1")
+    .Attr("shared_name: string")
+    .Attr("container: string")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("MultiDeviceIteratorInit")
+    .Input("dataset: variant")
+    .Input("multi_device_iterator: resource")
+    .Input("max_buffer_size: int64")
+    .Output("incarnation_id: int64")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("MultiDeviceIteratorGetNextFromShard")
+    .Input("multi_device_iterator: resource")
+    .Input("shard_num: int32")
+    .Input("incarnation_id: int64")
+    .Output("components: output_types")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(IteratorGetNextShapeFn);
+
+REGISTER_OP("MultiDeviceIteratorToStringHandle")
+    .Input("multi_device_iterator: resource")
+    .Output("string_handle: string")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("MultiDeviceIteratorFromStringHandle")
+    .Input("string_handle: string")
+    .Output("multi_device_iterator: resource")
+    .Attr("output_types: list(type) >= 0 = []")
+    .Attr("output_shapes: list(shape) >= 0 = []")
+    .SetShapeFn(shape_inference::ScalarShape);
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/data/BUILD b/tensorflow/python/data/BUILD
index 3e08c1587e..138141f4fc 100644
--- a/tensorflow/python/data/BUILD
+++ b/tensorflow/python/data/BUILD
@@ -12,6 +12,7 @@ py_library(
         "//tensorflow/python:util",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:multi_device_iterator_ops",
         "//tensorflow/python/data/ops:readers",
     ],
 )
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 17d4fec662..f97116cadd 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -408,6 +408,26 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "multi_device_iterator_test",
+    size = "small",
+    srcs = ["multi_device_iterator_test.py"],
+    additional_deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:multi_device_iterator_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    tags = [
+        "no_windows_gpu",
+    ],
+)
+
 tf_py_test(
     name = "window_dataset_op_test",
     size = "small",
diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
new file mode 100644
index 0000000000..056664b83b
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
@@ -0,0 +1,190 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""MultiDeviceIterator tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import multi_device_iterator_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class MultiDeviceIteratorTest(test.TestCase):
+
+  def testNoGetNext(self):
+    dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2"])
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+
+  def testBasic(self):
+    dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2"])
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 10, 2):
+        self.assertEqual(i, sess.run(elem_on_1))
+        self.assertEqual(i + 1, sess.run(elem_on_2))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elem_on_1)
+        sess.run(elem_on_2)
+
+  def testOneOnSameDevice(self):
+    with ops.device("/cpu:0"):
+      dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:0", "/cpu:1"])
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 10, 2):
+        self.assertEqual(i, sess.run(elem_on_1))
+        self.assertEqual(i + 1, sess.run(elem_on_2))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elem_on_1)
+        sess.run(elem_on_2)
+
+  def testRepeatDevices(self):
+    with ops.device("/cpu:0"):
+      dataset = dataset_ops.Dataset.range(20)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2", "/cpu:1", "/cpu:2"])
+    elements = multi_device_iterator.get_next()
+    elem_on_1, elem_on_2, elem_on_3, elem_on_4 = elements
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 20, 4):
+        self.assertEqual(i, sess.run(elem_on_1))
+        self.assertEqual(i + 1, sess.run(elem_on_2))
+        self.assertEqual(i + 2, sess.run(elem_on_3))
+        self.assertEqual(i + 3, sess.run(elem_on_4))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elem_on_1)
+        sess.run(elem_on_2)
+        sess.run(elem_on_3)
+        sess.run(elem_on_4)
+
+  def testNotFullyDivisible(self):
+    dataset = dataset_ops.Dataset.range(9)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2"])
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 8, 2):
+        self.assertEqual(i, sess.run(elem_on_1))
+        self.assertEqual(i + 1, sess.run(elem_on_2))
+      self.assertEqual(8, sess.run(elem_on_1))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elem_on_1)
+        sess.run(elem_on_2)
+
+  def testUneven(self):
+    dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2"], max_buffer_size=4)
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 10, 2):
+        self.assertEqual(i, sess.run(elem_on_1))
+      for i in range(0, 10, 2):
+        self.assertEqual(i + 1, sess.run(elem_on_2))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elem_on_1)
+        sess.run(elem_on_2)
+
+  def testMultipleInitializations(self):
+    with ops.device("/cpu:0"):
+      epoch = array_ops.placeholder(dtypes.int64, shape=[])
+      dataset1 = dataset_ops.Dataset.from_tensors(epoch).repeat(1000)
+      dataset2 = dataset_ops.Dataset.range(1000)
+      dataset = dataset_ops.Dataset.zip((dataset1, dataset2))
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2"], prefetch_buffer_size=4)
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
+    init_op = multi_device_iterator.initializer
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      for i in range(1000):
+        sess.run(init_op, feed_dict={epoch: i})
+        self.assertEqual([(i, 0), (i, 1)], sess.run([elem_on_1, elem_on_2]))
+
+  def testBasicGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/gpu:0"])
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 10, 2):
+        self.assertEqual(i, sess.run(elem_on_1))
+        self.assertEqual(i + 1, sess.run(elem_on_2))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elem_on_1)
+        sess.run(elem_on_2)
+
+  def testUnevenGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/gpu:0"], max_buffer_size=4)
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 10, 2):
+        self.assertEqual(i, sess.run(elem_on_1))
+      for i in range(0, 10, 2):
+        self.assertEqual(i + 1, sess.run(elem_on_2))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elem_on_1)
+        sess.run(elem_on_2)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index 57517afae8..9dffc38820 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@@ -19,6 +19,7 @@ py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:random_seed",
         "//tensorflow/python:script_ops",
+        "//tensorflow/python:smart_cond",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:tensor_shape",
@@ -83,3 +84,20 @@ py_library(
         "//tensorflow/python/data/util:sparse",
     ],
 )
+
+py_library(
+    name = "multi_device_iterator_ops",
+    srcs = ["multi_device_iterator_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dataset_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
new file mode 100644
index 0000000000..84e8abbd83
--- /dev/null
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -0,0 +1,213 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrapper for prefetching_ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import functional_ops
+from tensorflow.python.ops import gen_dataset_ops
+
+
+class _PerDeviceGenerator(dataset_ops.Dataset):
+  """A `dummy` generator dataset."""
+
+  def __init__(self, shard_num, multi_device_iterator_resource, incarnation_id,
+               source_device, target_device, output_shapes, output_types,
+               output_classes):
+    self._target_device = target_device
+    self._output_types = output_types
+    self._output_shapes = output_shapes
+    self._output_classes = output_classes
+    self._flat_output_shapes = nest.flatten(
+        sparse.as_dense_shapes(self._output_shapes, self._output_classes))
+    self._flat_output_types = nest.flatten(
+        sparse.as_dense_types(self._output_types, self._output_classes))
+
+    multi_device_iterator_string_handle = (
+        gen_dataset_ops.multi_device_iterator_to_string_handle(
+            multi_device_iterator_resource))
+
+    @function.Defun()
+    def _init_func():
+      return multi_device_iterator_string_handle
+
+    @function.Defun()
+    def _remote_init_func():
+      return functional_ops.remote_call(
+          target=source_device,
+          args=_init_func.captured_inputs,
+          Tout=[dtypes.string],
+          f=_init_func)
+
+    self._init_func = _remote_init_func
+    self._init_captured_args = _remote_init_func.captured_inputs
+
+    @function.Defun(dtypes.string)
+    def _next_func(string_handle):
+      multi_device_iterator = (
+          gen_dataset_ops.multi_device_iterator_from_string_handle(
+              string_handle=string_handle,
+              output_types=self._flat_output_types,
+              output_shapes=self._flat_output_shapes))
+      return gen_dataset_ops.multi_device_iterator_get_next_from_shard(
+          multi_device_iterator=multi_device_iterator,
+          shard_num=shard_num,
+          incarnation_id=incarnation_id,
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+
+    @function.Defun(dtypes.string)
+    def _remote_next_func(string_handle):
+      return functional_ops.remote_call(
+          target=source_device,
+          args=[string_handle] + _next_func.captured_inputs,
+          Tout=self._flat_output_types,
+          f=_next_func)
+
+    self._next_func = _remote_next_func
+    self._next_captured_args = _remote_next_func.captured_inputs
+
+    @function.Defun(dtypes.string)
+    def _finalize_func(unused_string_handle):
+      return array_ops.constant(0, dtypes.int64)
+
+    @function.Defun(dtypes.string)
+    def _remote_finalize_func(string_handle):
+      return functional_ops.remote_call(
+          target=source_device,
+          args=[string_handle] + _finalize_func.captured_inputs,
+          Tout=[dtypes.int64],
+          f=_finalize_func)
+
+    self._finalize_func = _remote_finalize_func
+    self._finalize_captured_args = _remote_finalize_func.captured_inputs
+
+  def _as_variant_tensor(self):
+    with ops.device(self._target_device):
+      return gen_dataset_ops.generator_dataset(
+          self._init_captured_args,
+          self._next_captured_args,
+          self._finalize_captured_args,
+          init_func=self._init_func,
+          next_func=self._next_func,
+          finalize_func=self._finalize_func,
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+
+class MultiDeviceIterator(object):
+  """An iterator over multiple devices."""
+
+  def __init__(self,
+               dataset,
+               devices,
+               max_buffer_size=1,
+               prefetch_buffer_size=1,
+               source_device="/cpu:0"):
+    """Constructs a MultiDeviceIterator.
+
+    Args:
+      dataset: The input dataset to be iterated over.
+      devices: The list of devices to fetch data to.
+      max_buffer_size: Maximum size of the host side per device buffer to keep.
+      prefetch_buffer_size: if > 1, then we setup a buffer on each device
+        to prefetch into.
+      source_device: The host device to place the `dataset` on.
+    """
+    self._dataset = dataset
+    self._devices = devices
+    self._source_device = source_device
+    self._source_device_tensor = ops.convert_to_tensor(source_device)
+
+    self._flat_output_shapes = nest.flatten(
+        sparse.as_dense_shapes(self._dataset.output_shapes,
+                               self._dataset.output_classes))
+    self._flat_output_types = nest.flatten(
+        sparse.as_dense_types(self._dataset.output_types,
+                              self._dataset.output_classes))
+
+    # Create the MultiDeviceIterator.
+    with ops.device(self._source_device):
+      self._multi_device_iterator_resource = (
+          gen_dataset_ops.multi_device_iterator(
+              devices=self._devices,
+              shared_name="",
+              container="",
+              output_types=self._flat_output_types,
+              output_shapes=self._flat_output_shapes))
+
+      # The incarnation ID is used to ensure consistency between the per-device
+      # iterators and the multi-device iterator.
+      self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
+          self._dataset._as_variant_tensor(),  # pylint: disable=protected-access
+          self._multi_device_iterator_resource,
+          max_buffer_size=max_buffer_size)
+
+    # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to
+    # initialize the device side of the pipeline. This would allow the
+    # MultiDeviceIterator to choose, for example, to move some transformations
+    # into the device side from its input. It might be useful in rewriting.
+    # Create the per device iterators.
+    self._device_iterators = []
+    i = 0
+    for device in self._devices:
+      ds = _PerDeviceGenerator(
+          i, self._multi_device_iterator_resource, self._incarnation_id,
+          self._source_device_tensor, device, self._dataset.output_shapes,
+          self._dataset.output_types, self._dataset.output_classes)
+      if prefetch_buffer_size > 0:
+        ds = ds.prefetch(prefetch_buffer_size)
+      with ops.device(device):
+        self._device_iterators.append(ds.make_initializable_iterator())
+      i += 1
+
+    device_iterator_initializers = [
+        iterator.initializer for iterator in self._device_iterators
+    ]
+    self._initializer = control_flow_ops.group(*device_iterator_initializers)
+
+  def get_next(self):
+    result = []
+    i = 0
+    for device in self._devices:
+      with ops.device(device):
+        result.append(self._device_iterators[i].get_next())
+      i += 1
+    return result
+
+  @property
+  def initializer(self):
+    return self._initializer
-- 
GitLab


From fc0150a7730aa06b825a26e65dfca944609726e7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Sep 2018 09:17:49 -0700
Subject: [PATCH 0572/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 214174935
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 128 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 128 ++++++++++++++++++
 2 files changed, 256 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index e30a111096..ffef0bca8e 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -35272,6 +35272,134 @@ op {
   }
   is_commutative: true
 }
+op {
+  name: "MultiDeviceIterator"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "devices"
+    type: "list(string)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorFromStringHandle"
+  input_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorGetNextFromShard"
+  input_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "shard_num"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "incarnation_id"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorInit"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "max_buffer_size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "incarnation_id"
+    type: DT_INT64
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorToStringHandle"
+  input_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
 op {
   name: "Multinomial"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 594edfd7f0..fd6f9e3432 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -16811,6 +16811,134 @@ op {
   }
   is_commutative: true
 }
+op {
+  name: "MultiDeviceIterator"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "devices"
+    type: "list(string)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorFromStringHandle"
+  input_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorGetNextFromShard"
+  input_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "shard_num"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "incarnation_id"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorInit"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "max_buffer_size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "incarnation_id"
+    type: DT_INT64
+  }
+  is_stateful: true
+}
+op {
+  name: "MultiDeviceIteratorToStringHandle"
+  input_arg {
+    name: "multi_device_iterator"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
 op {
   name: "Multinomial"
   input_arg {
-- 
GitLab


From 7d25d2d6c5db2269b6dba4cade6edaf7e8ddf6ba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Sep 2018 09:46:09 -0700
Subject: [PATCH 0573/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214176089

---
 tensorflow/go/op/wrappers.go | 406 +++++++++++++++++------------------
 1 file changed, 203 insertions(+), 203 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 1d72bcd2b6..e6e07c8437 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -3770,6 +3770,68 @@ func BoostedTreesMakeStatsSummary(scope *Scope, node_ids tf.Output, gradients tf
 	return op.Output(0)
 }
 
+// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//
+// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest
+// layer.
+func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesGetEnsembleStates",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// Creates a tree ensemble model and returns a handle to it.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble resource to be created.
+//	stamp_token: Token to use as the initial value of the resource stamp.
+//	tree_ensemble_serialized: Serialized proto of the tree ensemble.
+//
+// Returns the created operation.
+func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCreateEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Checks whether a tree ensemble has been initialized.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble resouce.
+//
+// Returns output boolean on whether it is initialized or not.
+func IsBoostedTreesEnsembleInitialized(scope *Scope, tree_ensemble_handle tf.Output) (is_initialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsBoostedTreesEnsembleInitialized",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the sum along sparse segments of a tensor.
 //
 // Read
@@ -5755,26 +5817,6 @@ func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// Checks whether a tree ensemble has been initialized.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble resouce.
-//
-// Returns output boolean on whether it is initialized or not.
-func IsBoostedTreesEnsembleInitialized(scope *Scope, tree_ensemble_handle tf.Output) (is_initialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsBoostedTreesEnsembleInitialized",
-		Input: []tf.Input{
-			tree_ensemble_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // CastAttr is an optional argument to Cast.
 type CastAttr func(optionalAttr)
 
@@ -19714,27 +19756,6 @@ func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Out
 	return op.Output(0)
 }
 
-// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//
-// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest
-// layer.
-func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesGetEnsembleStates",
-		Input: []tf.Input{
-			tree_ensemble_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
 // Returns the element-wise min of two SparseTensors.
 //
 // Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
@@ -21078,6 +21099,147 @@ func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_
 	return op.Output(0)
 }
 
+// Deserializes a serialized tree ensemble config and replaces current tree
+//
+// ensemble.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//	stamp_token: Token to use as the new value of the resource stamp.
+//	tree_ensemble_serialized: Serialized proto of the ensemble.
+//
+// Returns the created operation.
+func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesDeserializeEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Transforms a tf.Example proto (as a string) into typed tensors.
+//
+// Arguments:
+//	serialized: A vector containing a batch of binary serialized Example protos.
+//	dense_defaults: A list of Tensors (some may be empty), whose length matches
+// the length of `dense_keys`. dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	num_sparse: The number of sparse features to be parsed from the example. This
+// must match the lengths of `sparse_keys` and `sparse_types`.
+//	sparse_keys: A list of `num_sparse` strings.
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: The keys expected in the Examples' features associated with dense
+// values.
+//	sparse_types: A list of `num_sparse` types; the data types of data in each
+// Feature given in sparse_keys.
+// Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	dense_shapes: The shapes of data in each Feature given in dense_keys.
+// The length of this list must match the length of `dense_keys`.  The
+// number of elements in the Feature corresponding to dense_key[j] must
+// always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
+// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
+// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
+// ..., DN), the shape of the output Tensor dense_values[j] will be (M,
+// D1, .., DN), where M is the number of blocks of elements of length
+// D1 * .... * DN, in the input.
+func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes}
+	opspec := tf.OpSpec{
+		Type: "ParseSingleExample",
+		Input: []tf.Input{
+			serialized, tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values
+}
+
+// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
+type WholeFileReaderV2Attr func(optionalAttr)
+
+// WholeFileReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A Reader that outputs the entire contents of a file as a value.
+//
+// To use, enqueue filenames in a Queue.  The output of ReaderRead will
+// be a filename (key) and the contents of that file (value).
+//
+// Returns The handle to reference the Reader.
+func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "WholeFileReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Pop the element at the top of the stack.
 //
 // Arguments:
@@ -30734,27 +30896,6 @@ func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, val
 	return op.Output(0)
 }
 
-// Creates a tree ensemble model and returns a handle to it.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble resource to be created.
-//	stamp_token: Token to use as the initial value of the resource stamp.
-//	tree_ensemble_serialized: Serialized proto of the tree ensemble.
-//
-// Returns the created operation.
-func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesCreateEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Applies sparse addition to `input` using individual values or slices
 //
 // from `updates` according to indices `indices`.  The updates are non-aliasing:
@@ -32575,144 +32716,3 @@ func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2)
 }
-
-// Transforms a tf.Example proto (as a string) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing a batch of binary serialized Example protos.
-//	dense_defaults: A list of Tensors (some may be empty), whose length matches
-// the length of `dense_keys`. dense_defaults[j] provides default values
-// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
-// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
-// The input type is inferred from dense_defaults[j], even when it's empty.
-// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
-// then the shape of dense_defaults[j] must match that of dense_shapes[j].
-// If dense_shapes[j] has an undefined major dimension (variable strides dense
-// feature), dense_defaults[j] must contain a single element:
-// the padding element.
-//	num_sparse: The number of sparse features to be parsed from the example. This
-// must match the lengths of `sparse_keys` and `sparse_types`.
-//	sparse_keys: A list of `num_sparse` strings.
-// The keys expected in the Examples' features associated with sparse values.
-//	dense_keys: The keys expected in the Examples' features associated with dense
-// values.
-//	sparse_types: A list of `num_sparse` types; the data types of data in each
-// Feature given in sparse_keys.
-// Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-//	dense_shapes: The shapes of data in each Feature given in dense_keys.
-// The length of this list must match the length of `dense_keys`.  The
-// number of elements in the Feature corresponding to dense_key[j] must
-// always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
-// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
-// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
-// ..., DN), the shape of the output Tensor dense_values[j] will be (M,
-// D1, .., DN), where M is the number of blocks of elements of length
-// D1 * .... * DN, in the input.
-func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes}
-	opspec := tf.OpSpec{
-		Type: "ParseSingleExample",
-		Input: []tf.Input{
-			serialized, tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	return sparse_indices, sparse_values, sparse_shapes, dense_values
-}
-
-// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
-type WholeFileReaderV2Attr func(optionalAttr)
-
-// WholeFileReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A Reader that outputs the entire contents of a file as a value.
-//
-// To use, enqueue filenames in a Queue.  The output of ReaderRead will
-// be a filename (key) and the contents of that file (value).
-//
-// Returns The handle to reference the Reader.
-func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "WholeFileReaderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deserializes a serialized tree ensemble config and replaces current tree
-//
-// ensemble.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//	stamp_token: Token to use as the new value of the resource stamp.
-//	tree_ensemble_serialized: Serialized proto of the ensemble.
-//
-// Returns the created operation.
-func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesDeserializeEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-- 
GitLab


From a4eecdb369ecdae3b7fe7c1415d7b3b55bcc7b9e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 23 Sep 2018 17:14:53 +0000
Subject: [PATCH 0574/1357] Fix GPU build issue on python 3

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/image/kernels/image_ops.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h
index 6b63eed130..7fac774d07 100644
--- a/tensorflow/contrib/image/kernels/image_ops.h
+++ b/tensorflow/contrib/image/kernels/image_ops.h
@@ -71,14 +71,7 @@ class ProjectiveGenerator {
         (transform[3] * output_x + transform[4] * output_y + transform[5]) /
         projection;
 
-    // TODO(ringwalt): Add a fill value input.
-#if (defined __CUDA_ARCH__) && (CUDART_VERSION < 8000)
-    // On CUDA versions previous to 8.0, only __shared__ variables
-    // could be declared as static in the device code.
     const T fill_value = T(0);
-#else
-    static const T fill_value = T(0);
-#endif
     switch (interpolation_) {
       case INTERPOLATION_NEAREST:
         // Switch the order of x and y again for indexing into the image.
-- 
GitLab


From 5b6a09f81f8088626b5d88ed7fe3f3414d7ae23e Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Sun, 23 Sep 2018 10:51:24 -0700
Subject: [PATCH 0575/1357] Add Metadata object for RecordReader and associated
 stats computing / MD fetching method.

TFRecord files do not contain a file-level header that describes the
MD of the file.  To avoid backwards compatibility issues, we add a
lightweight function that computes the statistics over the file once
and caches the result for future calls.

A future implementor could do a better job of computing GetMetadata() by
having the RecordWriter emit these entries during writing so that
GetMetadata() only reads.  Doing so will require additional backwards
compatibility checks to ensure that the function works both for old
TFRecords and the new format.

PiperOrigin-RevId: 214178704
---
 tensorflow/core/lib/io/record_reader.cc       | 53 +++++++++++++++++++
 tensorflow/core/lib/io/record_reader.h        | 25 +++++++++
 .../core/lib/io/record_reader_writer_test.cc  |  7 +++
 3 files changed, 85 insertions(+)

diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc
index f93ebea771..e22adcd569 100644
--- a/tensorflow/core/lib/io/record_reader.cc
+++ b/tensorflow/core/lib/io/record_reader.cc
@@ -108,6 +108,59 @@ Status RecordReader::ReadChecksummed(uint64 offset, size_t n, string* result) {
   return Status::OK();
 }
 
+Status RecordReader::GetMetadata(Metadata* md) {
+  if (!md) {
+    return errors::InvalidArgument(
+        "Metadata object call to GetMetadata() was null");
+  }
+
+  // Compute the metadata of the TFRecord file if not cached.
+  if (!cached_metadata_) {
+    TF_RETURN_IF_ERROR(input_stream_->Reset());
+
+    int64 data_size = 0;
+    int64 entries = 0;
+
+    // Within the loop, we always increment offset positively, so this
+    // loop should be guaranteed to either return after reaching EOF
+    // or encountering an error.
+    uint64 offset = 0;
+    string record;
+    while (true) {
+      // Read header, containing size of data.
+      Status s = ReadChecksummed(offset, sizeof(uint64), &record);
+      if (!s.ok()) {
+        if (errors::IsOutOfRange(s)) {
+          // We should reach out of range when the record file is complete.
+          break;
+        }
+        return s;
+      }
+
+      // Read the length of the data.
+      const uint64 length = core::DecodeFixed64(record.data());
+
+      // Skip reading the actual data since we just want the number
+      // of records and the size of the data.
+      TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(length + kFooterSize));
+      offset += kHeaderSize + length + kFooterSize;
+
+      // Increment running stats.
+      data_size += length;
+      ++entries;
+    }
+
+    cached_metadata_.reset(new Metadata());
+    cached_metadata_->stats.entries = entries;
+    cached_metadata_->stats.data_size = data_size;
+    cached_metadata_->stats.file_size =
+        data_size + (kHeaderSize + kFooterSize) * entries;
+  }
+
+  md->stats = cached_metadata_->stats;
+  return Status::OK();
+}
+
 Status RecordReader::ReadRecord(uint64* offset, string* record) {
   // Position the input stream.
   int64 curr_pos = input_stream_->Tell();
diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h
index 11af1366b0..17444660d4 100644
--- a/tensorflow/core/lib/io/record_reader.h
+++ b/tensorflow/core/lib/io/record_reader.h
@@ -66,6 +66,18 @@ class RecordReader {
   static const size_t kHeaderSize = sizeof(uint64) + sizeof(uint32);
   static const size_t kFooterSize = sizeof(uint32);
 
+  // Statistics (sizes are in units of bytes)
+  struct Stats {
+    int64 file_size = -1;
+    int64 data_size = -1;
+    int64 entries = -1;  // Number of values
+  };
+
+  // Metadata for the TFRecord file.
+  struct Metadata {
+    Stats stats;
+  };
+
   // Create a reader that will return log records from "*file".
   // "*file" must remain live while this Reader is in use.
   explicit RecordReader(
@@ -79,6 +91,17 @@ class RecordReader {
   // OUT_OF_RANGE for end of file, or something else for an error.
   Status ReadRecord(uint64* offset, string* record);
 
+  // Return the metadata of the Record file.
+  //
+  // The current implementation scans the file to completion,
+  // skipping over the data regions, to extract the metadata once
+  // on the first call to GetStats().  An improved implementation
+  // would change RecordWriter to write the metadata into TFRecord
+  // so that GetMetadata() could be a const method.
+  //
+  // 'metadata' must not be nullptr.
+  Status GetMetadata(Metadata* md);
+
  private:
   Status ReadChecksummed(uint64 offset, size_t n, string* result);
 
@@ -86,6 +109,8 @@ class RecordReader {
   std::unique_ptr<InputStreamInterface> input_stream_;
   bool last_read_failed_;
 
+  std::unique_ptr<Metadata> cached_metadata_;
+
   TF_DISALLOW_COPY_AND_ASSIGN(RecordReader);
 };
 
diff --git a/tensorflow/core/lib/io/record_reader_writer_test.cc b/tensorflow/core/lib/io/record_reader_writer_test.cc
index 13bea1f8f1..a88d34d293 100644
--- a/tensorflow/core/lib/io/record_reader_writer_test.cc
+++ b/tensorflow/core/lib/io/record_reader_writer_test.cc
@@ -147,6 +147,13 @@ TEST(RecordReaderWriterTest, TestBasics) {
       EXPECT_EQ("abc", record);
       TF_CHECK_OK(reader.ReadRecord(&offset, &record));
       EXPECT_EQ("defg", record);
+
+      io::RecordReader::Metadata md;
+      TF_ASSERT_OK(reader.GetMetadata(&md));
+      EXPECT_EQ(2, md.stats.entries);
+      EXPECT_EQ(7, md.stats.data_size);
+      // Two entries have 16 bytes of header/footer each.
+      EXPECT_EQ(39, md.stats.file_size);
     }
   }
 }
-- 
GitLab


From 2862f65fd6e6966ebf8af7cb4fa754b319202b0f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Sep 2018 11:17:38 -0700
Subject: [PATCH 0576/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 214179792
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 53 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 53 +++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index ffef0bca8e..b02ea64ac9 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -21901,6 +21901,59 @@ op {
     }
   }
 }
+op {
+  name: "ExtractVolumePatches"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "patches"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksizes"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
 op {
   name: "FFT"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index fd6f9e3432..4c5a472e9f 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -10186,6 +10186,59 @@ op {
     }
   }
 }
+op {
+  name: "ExtractVolumePatches"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "patches"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksizes"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
 op {
   name: "FFT"
   input_arg {
-- 
GitLab


From 1f8db608007ae60f89bf38c4c6af98a0248f214e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Sep 2018 17:21:07 -0700
Subject: [PATCH 0577/1357] Add blacklist ops to PinToHostOptimizer. Fix test.

PiperOrigin-RevId: 214195020
---
 .../optimizers/pin_to_host_optimizer.cc       |  8 ++++
 .../python/profiler/model_analyzer_test.py    | 42 ++++++++++---------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 8a65cd3ec3..c8f9311b2e 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -156,6 +156,9 @@ string TryFindHostDevice(const gtl::FlatSet<string>& devices,
   // We couldn't find an appropriate Host device, return original device.
   return device;
 }
+
+// All the nodes that should be blacklisted and not swapped.
+bool IsBlacklisted(const NodeDef& node) { return IsCollective(node); }
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
@@ -185,6 +188,11 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       continue;
     }
 
+    // Skip these node types.
+    if (internal::IsBlacklisted(node)) {
+      continue;
+    }
+
     // Check the node can be run on CPU.
     Status s = FindKernelDef(DEVICE_CPU, node, nullptr, nullptr);
     if (!s.ok()) {
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index c0e16ca536..94c685274a 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -52,13 +52,19 @@ builder = option_builder.ProfileOptionBuilder
 
 class PrintModelAnalysisTest(test.TestCase):
 
+  def _no_rewrite_session_config(self):
+    rewriter_config = rewriter_config_pb2.RewriterConfig(
+        pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF)
+    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
+    return config_pb2.ConfigProto(graph_options=graph_options)
+
   def testDumpToFile(self):
     ops.reset_default_graph()
     outfile = os.path.join(test.get_temp_dir(), 'dump')
     opts = builder(builder.trainable_variables_parameter()
                   ).with_file_output(outfile).build()
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       _ = lib.BuildSmallModel()
       model_analyzer.profile(sess.graph, options=opts)
 
@@ -83,7 +89,8 @@ class PrintModelAnalysisTest(test.TestCase):
     with profile_context.ProfileContext(test.get_temp_dir(),
                                         trace_steps=[],
                                         dump_steps=[]) as pctx:
-      with session.Session() as sess, ops.device(dev):
+      with session.Session(
+          config=self._no_rewrite_session_config()) as sess, ops.device(dev):
         x = lib.BuildSmallModel()
 
         sess.run(variables.global_variables_initializer())
@@ -149,11 +156,8 @@ class PrintModelAnalysisTest(test.TestCase):
             .select(['params', 'float_ops', 'occurrence', 'device', 'op_types',
                      'input_shapes']).build())
 
-    rewriter_config = rewriter_config_pb2.RewriterConfig(
-        disable_model_pruning=True)
-    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
-    config = config_pb2.ConfigProto(graph_options=graph_options)
-    with session.Session(config=config) as sess, ops.device('/device:CPU:0'):
+    with session.Session(config=self._no_rewrite_session_config()
+                        ) as sess, ops.device('/device:CPU:0'):
       x = lib.BuildSmallModel()
 
       sess.run(variables.global_variables_initializer())
@@ -179,7 +183,7 @@ class PrintModelAnalysisTest(test.TestCase):
             .select(['bytes', 'params', 'float_ops', 'num_hidden_ops', 'device',
                      'input_shapes']).build())
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
 
       sess.run(variables.global_variables_initializer())
@@ -213,7 +217,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with profile_context.ProfileContext(test.get_temp_dir(),
                                         trace_steps=[],
                                         dump_steps=[]) as pctx:
-      with session.Session() as sess:
+      with session.Session(config=self._no_rewrite_session_config()) as sess:
         x = lib.BuildFullModel()
 
         sess.run(variables.global_variables_initializer())
@@ -274,7 +278,7 @@ class PrintModelAnalysisTest(test.TestCase):
             .account_displayed_op_only(False)
             .select(['bytes', 'params', 'float_ops', 'device']).build())
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
 
       sess.run(variables.global_variables_initializer())
@@ -302,7 +306,7 @@ class PrintModelAnalysisTest(test.TestCase):
             .with_timeline_output(outfile)
             .with_accounted_types(['.*']).build())
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
       sess.run(variables.global_variables_initializer())
@@ -338,7 +342,7 @@ class PrintModelAnalysisTest(test.TestCase):
                      'peak_bytes', 'residual_bytes',
                      'output_bytes', 'occurrence', 'input_shapes']).build())
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
       sess.run(variables.global_variables_initializer())
@@ -384,7 +388,7 @@ class PrintModelAnalysisTest(test.TestCase):
   def testAdvisor(self):
     ops.reset_default_graph()
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
       sess.run(variables.global_variables_initializer())
@@ -417,7 +421,7 @@ class PrintModelAnalysisTest(test.TestCase):
             .with_node_names(trim_name_regexes=['ops.py.*'])
             .with_pprof_output(outfile).build())
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
       sess.run(variables.global_variables_initializer())
@@ -484,7 +488,7 @@ class PrintModelAnalysisTest(test.TestCase):
           self.assertGreaterEqual(n.output_bytes, mob)
         check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob)
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
       sess.run(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
@@ -549,7 +553,7 @@ class PrintModelAnalysisTest(test.TestCase):
         for attr in not_selected:
           self.assertFalse(s.find(attr) > 0, s)
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
       sess.run(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
@@ -582,7 +586,7 @@ class PrintModelAnalysisTest(test.TestCase):
 
   def _trainLoop(self, train_op, train_steps, time_dir, time_step,
                  memory_dir, memory_step, profile_dir, dump_step):
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       sess.run(variables.global_variables_initializer())
       # start from 1 because variable_initializer took one step.
       for i in range(1, train_steps + 1):
@@ -655,7 +659,7 @@ class PrintModelAnalysisTest(test.TestCase):
       c = a * b
 
     try:
-      with session.Session() as sess:
+      with session.Session(config=self._no_rewrite_session_config()) as sess:
         sess.run(c, options=config_pb2.RunOptions(
             report_tensor_allocations_upon_oom=True))
     except Exception as e:  # pylint: disable=broad-except
@@ -758,7 +762,7 @@ class PrintModelAnalysisTest(test.TestCase):
 
     grad = gradients.gradients(y, [x1])
 
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       run_options = config_pb2.RunOptions(
           trace_level=config_pb2.RunOptions.FULL_TRACE)
       run_metadata = config_pb2.RunMetadata()
-- 
GitLab


From 167272ead245ac9e0183da807d996ba9d6e401b0 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Sun, 23 Sep 2018 18:28:36 -0700
Subject: [PATCH 0578/1357] [tf.data] Add `tf.contrib.data.Optional` support to
 `Structure`.

This change switches `tf.contrib.data.Optional` to use a `Structure` class to represent
the structure of its value, instead of `output_types`, `output_shapes`, and `output_classes` properties. It adds support for nesting `Optional` objects and representing their structure.

This change also makes a modification to the `Structure` class: `Structure.is_compatible_with(x)` now takes another `Structure` as the `x` argument, instead of a value. This makes it easier to work with nested structures (where we might not have a value readily available), and better matches the interface of other `is_compatible_with()` methods (e.g. in `tf.TensorShape` and `tf.DType`).

Finally, in the process of making this change, I observed possible crash-failures when a DT_VARIANT tensor containing another DT_VARIANT tensor is copied between CPU and GPU. This change "fixes" the immediate problem by raising an UnimplementedError, but more work will be necessary to support the full range of use cases.

PiperOrigin-RevId: 214198993
---
 tensorflow/core/common_runtime/copy_tensor.cc |   7 +-
 tensorflow/core/kernels/data/optional_ops.cc  |   8 +
 tensorflow/python/data/kernel_tests/BUILD     |   1 +
 .../data/kernel_tests/optional_ops_test.py    | 176 ++++++++++++------
 tensorflow/python/data/ops/BUILD              |   5 +-
 tensorflow/python/data/ops/iterator_ops.py    |  13 +-
 tensorflow/python/data/ops/optional_ops.py    | 150 ++++++---------
 tensorflow/python/data/util/structure.py      | 131 +++++++++----
 tensorflow/python/data/util/structure_test.py |  36 +++-
 9 files changed, 330 insertions(+), 197 deletions(-)

diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc
index cf3d1f0b79..d800a86199 100644
--- a/tensorflow/core/common_runtime/copy_tensor.cc
+++ b/tensorflow/core/common_runtime/copy_tensor.cc
@@ -347,7 +347,12 @@ namespace {
 static Status WrappedTensorDeviceCopy(
     const Tensor& from, Tensor* to,
     const UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn& copy) {
-  if (DMAHelper::CanUseDMA(&from)) {
+  if (from.dtype() == DT_VARIANT) {
+    // TODO(b/116349787): Implement support for nested variants.
+    return errors::Unimplemented(
+        "Support for copying nested variants to device has not yet been "
+        "implemented.");
+  } else if (DMAHelper::CanUseDMA(&from)) {
     TF_RETURN_IF_ERROR(copy(from, to));
   } else {
     *to = from;
diff --git a/tensorflow/core/kernels/data/optional_ops.cc b/tensorflow/core/kernels/data/optional_ops.cc
index 346e4ceebd..2ab5c83082 100644
--- a/tensorflow/core/kernels/data/optional_ops.cc
+++ b/tensorflow/core/kernels/data/optional_ops.cc
@@ -212,6 +212,14 @@ static Status OptionalDeviceCopy(
     const std::vector<Tensor>& from_values = from.get_values();
     std::vector<Tensor> to_values;
     to_values.reserve(from_values.size());
+    for (const Tensor& t : from_values) {
+      if (t.dtype() == DT_VARIANT) {
+        // TODO(b/116349787): Implement support for nested variants.
+        return errors::Unimplemented(
+            "Support for copying nested variants to device has not yet been "
+            "implemented.");
+      }
+    }
     for (const Tensor& t : from_values) {
       if (DMAHelper::CanUseDMA(&t)) {
         Tensor tmp(t.dtype());
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index f97116cadd..28ee3ebaa6 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -394,6 +394,7 @@ cuda_py_test(
     size = "small",
     srcs = ["optional_ops_test.py"],
     additional_deps = [
+        "@absl_py//absl/testing:parameterized",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
         "//tensorflow/python/data/ops:optional_ops",
diff --git a/tensorflow/python/data/kernel_tests/optional_ops_test.py b/tensorflow/python/data/kernel_tests/optional_ops_test.py
index c344513e71..706a65fe55 100644
--- a/tensorflow/python/data/kernel_tests/optional_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/optional_ops_test.py
@@ -17,11 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import optional_ops
+from tensorflow.python.data.util import structure
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -33,14 +35,11 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class OptionalTest(test.TestCase):
+class OptionalTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testFromValue(self):
     opt = optional_ops.Optional.from_value(constant_op.constant(37.0))
-    self.assertEqual(dtypes.float32, opt.output_types)
-    self.assertEqual([], opt.output_shapes)
-    self.assertEqual(ops.Tensor, opt.output_classes)
     self.assertTrue(self.evaluate(opt.has_value()))
     self.assertEqual(37.0, self.evaluate(opt.get_value()))
 
@@ -50,15 +49,6 @@ class OptionalTest(test.TestCase):
         "a": constant_op.constant(37.0),
         "b": (constant_op.constant(["Foo"]), constant_op.constant("Bar"))
     })
-    self.assertEqual({
-        "a": dtypes.float32,
-        "b": (dtypes.string, dtypes.string)
-    }, opt.output_types)
-    self.assertEqual({"a": [], "b": ([1], [])}, opt.output_shapes)
-    self.assertEqual({
-        "a": ops.Tensor,
-        "b": (ops.Tensor, ops.Tensor)
-    }, opt.output_classes)
     self.assertTrue(self.evaluate(opt.has_value()))
     self.assertEqual({
         "a": 37.0,
@@ -76,46 +66,29 @@ class OptionalTest(test.TestCase):
         values=np.array([-1., 1.], dtype=np.float32),
         dense_shape=np.array([2, 2]))
     opt = optional_ops.Optional.from_value((st_0, st_1))
-    self.assertEqual((dtypes.int64, dtypes.float32), opt.output_types)
-    self.assertEqual(([1], [2, 2]), opt.output_shapes)
-    self.assertEqual((sparse_tensor.SparseTensor, sparse_tensor.SparseTensor),
-                     opt.output_classes)
+    self.assertTrue(self.evaluate(opt.has_value()))
+    val_0, val_1 = opt.get_value()
+    for expected, actual in [(st_0, val_0), (st_1, val_1)]:
+      self.assertAllEqual(expected.indices, self.evaluate(actual.indices))
+      self.assertAllEqual(expected.values, self.evaluate(actual.values))
+      self.assertAllEqual(expected.dense_shape,
+                          self.evaluate(actual.dense_shape))
 
   @test_util.run_in_graph_and_eager_modes
   def testFromNone(self):
-    opt = optional_ops.Optional.none_from_structure(tensor_shape.scalar(),
-                                                    dtypes.float32, ops.Tensor)
-    self.assertEqual(dtypes.float32, opt.output_types)
-    self.assertEqual([], opt.output_shapes)
-    self.assertEqual(ops.Tensor, opt.output_classes)
+    value_structure = structure.TensorStructure(dtypes.float32, [])
+    opt = optional_ops.Optional.none_from_structure(value_structure)
+    self.assertTrue(opt.value_structure.is_compatible_with(value_structure))
+    self.assertFalse(
+        opt.value_structure.is_compatible_with(
+            structure.TensorStructure(dtypes.float32, [1])))
+    self.assertFalse(
+        opt.value_structure.is_compatible_with(
+            structure.TensorStructure(dtypes.int32, [])))
     self.assertFalse(self.evaluate(opt.has_value()))
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(opt.get_value())
 
-  def testStructureMismatchError(self):
-    tuple_output_shapes = (tensor_shape.scalar(), tensor_shape.scalar())
-    tuple_output_types = (dtypes.float32, dtypes.float32)
-    tuple_output_classes = (ops.Tensor, ops.Tensor)
-
-    dict_output_shapes = {
-        "a": tensor_shape.scalar(),
-        "b": tensor_shape.scalar()
-    }
-    dict_output_types = {"a": dtypes.float32, "b": dtypes.float32}
-    dict_output_classes = {"a": ops.Tensor, "b": ops.Tensor}
-
-    with self.assertRaises(TypeError):
-      optional_ops.Optional.none_from_structure(
-          tuple_output_shapes, tuple_output_types, dict_output_classes)
-
-    with self.assertRaises(TypeError):
-      optional_ops.Optional.none_from_structure(
-          tuple_output_shapes, dict_output_types, tuple_output_classes)
-
-    with self.assertRaises(TypeError):
-      optional_ops.Optional.none_from_structure(
-          dict_output_shapes, tuple_output_types, tuple_output_classes)
-
   @test_util.run_in_graph_and_eager_modes
   def testCopyToGPU(self):
     if not test_util.is_gpu_available():
@@ -126,17 +99,15 @@ class OptionalTest(test.TestCase):
           (constant_op.constant(37.0), constant_op.constant("Foo"),
            constant_op.constant(42)))
       optional_none = optional_ops.Optional.none_from_structure(
-          tensor_shape.scalar(), dtypes.float32, ops.Tensor)
+          structure.TensorStructure(dtypes.float32, []))
 
     with ops.device("/gpu:0"):
       gpu_optional_with_value = optional_ops._OptionalImpl(
           array_ops.identity(optional_with_value._variant_tensor),
-          optional_with_value.output_shapes, optional_with_value.output_types,
-          optional_with_value.output_classes)
+          optional_with_value.value_structure)
       gpu_optional_none = optional_ops._OptionalImpl(
           array_ops.identity(optional_none._variant_tensor),
-          optional_none.output_shapes, optional_none.output_types,
-          optional_none.output_classes)
+          optional_none.value_structure)
 
       gpu_optional_with_value_has_value = gpu_optional_with_value.has_value()
       gpu_optional_with_value_values = gpu_optional_with_value.get_value()
@@ -148,14 +119,101 @@ class OptionalTest(test.TestCase):
                      self.evaluate(gpu_optional_with_value_values))
     self.assertFalse(self.evaluate(gpu_optional_none_has_value))
 
-  def testIteratorGetNextAsOptional(self):
-    ds = dataset_ops.Dataset.range(3)
+  def _assertElementValueEqual(self, expected, actual):
+    if isinstance(expected, dict):
+      self.assertItemsEqual(list(expected.keys()), list(actual.keys()))
+      for k in expected.keys():
+        self._assertElementValueEqual(expected[k], actual[k])
+    elif isinstance(expected, sparse_tensor.SparseTensorValue):
+      self.assertAllEqual(expected.indices, actual.indices)
+      self.assertAllEqual(expected.values, actual.values)
+      self.assertAllEqual(expected.dense_shape, actual.dense_shape)
+    else:
+      self.assertAllEqual(expected, actual)
+
+  # pylint: disable=g-long-lambda
+  @parameterized.named_parameters(
+      ("Tensor", lambda: constant_op.constant(37.0),
+       structure.TensorStructure(dtypes.float32, [])),
+      ("SparseTensor", lambda: sparse_tensor.SparseTensor(
+          indices=[[0]], values=constant_op.constant([0], dtype=dtypes.int32),
+          dense_shape=[1]),
+       structure.SparseTensorStructure(dtypes.int32, [1])),
+      ("Nest", lambda: {
+          "a": constant_op.constant(37.0),
+          "b": (constant_op.constant(["Foo"]), constant_op.constant("Bar"))},
+       structure.NestedStructure({
+           "a": structure.TensorStructure(dtypes.float32, []),
+           "b": (structure.TensorStructure(dtypes.string, [1]),
+                 structure.TensorStructure(dtypes.string, []))})),
+      ("Optional", lambda: optional_ops.Optional.from_value(37.0),
+       optional_ops.OptionalStructure(
+           structure.TensorStructure(dtypes.float32, []))),
+  )
+  def testOptionalStructure(self, tf_value_fn, expected_value_structure):
+    tf_value = tf_value_fn()
+    opt = optional_ops.Optional.from_value(tf_value)
+
+    self.assertTrue(
+        expected_value_structure.is_compatible_with(opt.value_structure))
+    self.assertTrue(
+        opt.value_structure.is_compatible_with(expected_value_structure))
+
+    opt_structure = structure.Structure.from_value(opt)
+    self.assertIsInstance(opt_structure, optional_ops.OptionalStructure)
+    self.assertTrue(opt_structure.is_compatible_with(opt_structure))
+    self.assertTrue(opt_structure._value_structure.is_compatible_with(
+        expected_value_structure))
+    self.assertEqual([dtypes.variant], opt_structure._flat_types)
+    self.assertEqual([tensor_shape.scalar()], opt_structure._flat_shapes)
+
+    # All OptionalStructure objects are not compatible with a non-optional
+    # value.
+    non_optional_structure = structure.Structure.from_value(
+        constant_op.constant(42.0))
+    self.assertFalse(opt_structure.is_compatible_with(non_optional_structure))
+
+    # Assert that the optional survives a round-trip via _from_tensor_list()
+    # and _to_tensor_list().
+    round_trip_opt = opt_structure._from_tensor_list(
+        opt_structure._to_tensor_list(opt))
+    if isinstance(tf_value, optional_ops.Optional):
+      self.assertEqual(
+          self.evaluate(tf_value.get_value()),
+          self.evaluate(round_trip_opt.get_value().get_value()))
+    else:
+      self.assertEqual(
+          self.evaluate(tf_value), self.evaluate(round_trip_opt.get_value()))
+
+  @parameterized.named_parameters(
+      ("Tensor", np.array([1, 2, 3], dtype=np.int32),
+       lambda: constant_op.constant([4, 5, 6], dtype=dtypes.int32), True),
+      ("SparseTensor", sparse_tensor.SparseTensorValue(
+          indices=[[0, 0], [1, 1]],
+          values=np.array([-1., 1.], dtype=np.float32), dense_shape=[2, 2]),
+       lambda: sparse_tensor.SparseTensor(
+           indices=[[0, 1], [1, 0]], values=[37.0, 42.0], dense_shape=[2, 2]),
+       False),
+      ("Nest", {"a": np.array([1, 2, 3], dtype=np.int32),
+                "b": sparse_tensor.SparseTensorValue(
+                    indices=[[0, 0], [1, 1]],
+                    values=np.array([-1., 1.], dtype=np.float32),
+                    dense_shape=[2, 2])},
+       lambda: {"a": constant_op.constant([4, 5, 6], dtype=dtypes.int32),
+                "b": sparse_tensor.SparseTensor(
+                    indices=[[0, 1], [1, 0]], values=[37.0, 42.0],
+                    dense_shape=[2, 2])}, False),
+  )
+  def testIteratorGetNextAsOptional(self, np_value, tf_value_fn, works_on_gpu):
+    if not works_on_gpu and test.is_gpu_available():
+      self.skipTest("Test case not yet supported on GPU.")
+    ds = dataset_ops.Dataset.from_tensors(np_value).repeat(3)
     iterator = ds.make_initializable_iterator()
     next_elem = iterator_ops.get_next_as_optional(iterator)
-    self.assertTrue(isinstance(next_elem, optional_ops.Optional))
-    self.assertEqual(ds.output_types, next_elem.output_types)
-    self.assertEqual(ds.output_shapes, next_elem.output_shapes)
-    self.assertEqual(ds.output_classes, next_elem.output_classes)
+    self.assertIsInstance(next_elem, optional_ops.Optional)
+    self.assertTrue(
+        next_elem.value_structure.is_compatible_with(
+            structure.Structure.from_value(tf_value_fn())))
     elem_has_value_t = next_elem.has_value()
     elem_value_t = next_elem.get_value()
     with self.cached_session() as sess:
@@ -169,10 +227,10 @@ class OptionalTest(test.TestCase):
       # For each element of the dataset, assert that the optional evaluates to
       # the expected value.
       sess.run(iterator.initializer)
-      for i in range(3):
+      for _ in range(3):
         elem_has_value, elem_value = sess.run([elem_has_value_t, elem_value_t])
         self.assertTrue(elem_has_value)
-        self.assertEqual(i, elem_value)
+        self._assertElementValueEqual(np_value, elem_value)
 
       # After exhausting the iterator, `next_elem.has_value()` will evaluate to
       # false, and attempting to get the value will fail.
diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index 9dffc38820..76bf2470b1 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@@ -64,6 +64,7 @@ py_library(
         "//tensorflow/python/compat",
         "//tensorflow/python/data/util:nest",
         "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/util:structure",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/training/checkpointable:base",
     ],
@@ -78,10 +79,8 @@ py_library(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/util:structure",
     ],
 )
 
diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index 8f8e026df9..cae00cdbfc 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -24,6 +24,7 @@ from tensorflow.python.compat import compat
 from tensorflow.python.data.ops import optional_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
+from tensorflow.python.data.util import structure
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -85,10 +86,10 @@ class Iterator(checkpointable.CheckpointableBase):
       initializer: A `tf.Operation` that should be run to initialize this
         iterator.
       output_types: A nested structure of `tf.DType` objects corresponding to
-        each component of an element of this dataset.
+        each component of an element of this iterator.
       output_shapes: A nested structure of `tf.TensorShape` objects
-        corresponding to each component of an element of this dataset.
-      output_classes: A nested structure of Python `type` object corresponding
+        corresponding to each component of an element of this iterator.
+      output_classes: A nested structure of Python `type` objects corresponding
         to each component of an element of this iterator.
     """
     self._iterator_resource = iterator_resource
@@ -670,6 +671,6 @@ def get_next_as_optional(iterator):
           output_shapes=nest.flatten(
               sparse.as_dense_shapes(iterator.output_shapes,
                                      iterator.output_classes))),
-      output_shapes=iterator.output_shapes,
-      output_types=iterator.output_types,
-      output_classes=iterator.output_classes)
+      structure.Structure._from_legacy_structure(iterator.output_types,
+                                                 iterator.output_shapes,
+                                                 iterator.output_classes))
diff --git a/tensorflow/python/data/ops/optional_ops.py b/tensorflow/python/data/ops/optional_ops.py
index b75b98dc72..3bbebd7878 100644
--- a/tensorflow/python/data/ops/optional_ops.py
+++ b/tensorflow/python/data/ops/optional_ops.py
@@ -19,11 +19,9 @@ from __future__ import print_function
 
 import abc
 
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
+from tensorflow.python.data.util import structure
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_dataset_ops
 
@@ -67,36 +65,14 @@ class Optional(object):
     raise NotImplementedError("Optional.get_value()")
 
   @abc.abstractproperty
-  def output_classes(self):
-    """Returns the class of each component of this optional.
-
-    The expected values are `tf.Tensor` and `tf.SparseTensor`.
-
-    Returns:
-      A nested structure of Python `type` objects corresponding to each
-      component of this optional.
-    """
-    raise NotImplementedError("Optional.output_classes")
-
-  @abc.abstractproperty
-  def output_shapes(self):
-    """Returns the shape of each component of this optional.
-
-    Returns:
-      A nested structure of `tf.TensorShape` objects corresponding to each
-      component of this optional.
-    """
-    raise NotImplementedError("Optional.output_shapes")
-
-  @abc.abstractproperty
-  def output_types(self):
-    """Returns the type of each component of this optional.
+  def value_structure(self):
+    """The structure of the components of this optional.
 
     Returns:
-      A nested structure of `tf.DType` objects corresponding to each component
-      of this optional.
+      A `Structure` object representing the structure of the components of this
+        optional.
     """
-    raise NotImplementedError("Optional.output_types")
+    raise NotImplementedError("Optional.value_structure")
 
   @staticmethod
   def from_value(value):
@@ -108,48 +84,30 @@ class Optional(object):
     Returns:
       An `Optional` that wraps `value`.
     """
-    # TODO(b/110122868): Consolidate this destructuring logic with the
-    # similar code in `Dataset.from_tensors()`.
     with ops.name_scope("optional") as scope:
       with ops.name_scope("value"):
-        value = nest.pack_sequence_as(value, [
-            sparse_tensor_lib.SparseTensor.from_value(t)
-            if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor(
-                t, name="component_%d" % i)
-            for i, t in enumerate(nest.flatten(value))
-        ])
-
-      encoded_value = nest.flatten(sparse.serialize_sparse_tensors(value))
-      output_classes = sparse.get_classes(value)
-      output_shapes = nest.pack_sequence_as(
-          value, [t.get_shape() for t in nest.flatten(value)])
-      output_types = nest.pack_sequence_as(
-          value, [t.dtype for t in nest.flatten(value)])
+        value_structure = structure.Structure.from_value(value)
+        encoded_value = value_structure._to_tensor_list(value)  # pylint: disable=protected-access
 
     return _OptionalImpl(
         gen_dataset_ops.optional_from_value(encoded_value, name=scope),
-        output_shapes, output_types, output_classes)
+        value_structure)
 
   @staticmethod
-  def none_from_structure(output_shapes, output_types, output_classes):
+  def none_from_structure(value_structure):
     """Returns an `Optional` that has no value.
 
-    NOTE: This method takes arguments that define the structure of the value
+    NOTE: This method takes an argument that defines the structure of the value
     that would be contained in the returned `Optional` if it had a value.
 
     Args:
-      output_shapes: A nested structure of `tf.TensorShape` objects
-        corresponding to each component of this optional.
-      output_types: A nested structure of `tf.DType` objects corresponding to
-        each component of this optional.
-      output_classes: A nested structure of Python `type` objects corresponding
-        to each component of this optional.
+      value_structure: A `Structure` object representing the structure of the
+        components of this optional.
 
     Returns:
       An `Optional` that has no value.
     """
-    return _OptionalImpl(gen_dataset_ops.optional_none(), output_shapes,
-                         output_types, output_classes)
+    return _OptionalImpl(gen_dataset_ops.optional_none(), value_structure)
 
 
 class _OptionalImpl(Optional):
@@ -159,20 +117,9 @@ class _OptionalImpl(Optional):
   `Optional.__init__()` in the public API.
   """
 
-  def __init__(self, variant_tensor, output_shapes, output_types,
-               output_classes):
-    # TODO(b/110122868): Consolidate the structure validation logic with the
-    # similar logic in `Iterator.from_structure()` and
-    # `Dataset.from_generator()`.
-    output_types = nest.map_structure(dtypes.as_dtype, output_types)
-    output_shapes = nest.map_structure_up_to(
-        output_types, tensor_shape.as_shape, output_shapes)
-    nest.assert_same_structure(output_types, output_shapes)
-    nest.assert_same_structure(output_types, output_classes)
+  def __init__(self, variant_tensor, value_structure):
     self._variant_tensor = variant_tensor
-    self._output_shapes = output_shapes
-    self._output_types = output_types
-    self._output_classes = output_classes
+    self._value_structure = value_structure
 
   def has_value(self, name=None):
     return gen_dataset_ops.optional_has_value(self._variant_tensor, name=name)
@@ -182,28 +129,55 @@ class _OptionalImpl(Optional):
     # in `Iterator.get_next()` and `StructuredFunctionWrapper`.
     with ops.name_scope(name, "OptionalGetValue",
                         [self._variant_tensor]) as scope:
-      return sparse.deserialize_sparse_tensors(
-          nest.pack_sequence_as(
-              self._output_types,
-              gen_dataset_ops.optional_get_value(
-                  self._variant_tensor,
-                  name=scope,
-                  output_types=nest.flatten(
-                      sparse.as_dense_types(self._output_types,
-                                            self._output_classes)),
-                  output_shapes=nest.flatten(
-                      sparse.as_dense_shapes(self._output_shapes,
-                                             self._output_classes)))),
-          self._output_types, self._output_shapes, self._output_classes)
+      # pylint: disable=protected-access
+      return self._value_structure._from_tensor_list(
+          gen_dataset_ops.optional_get_value(
+              self._variant_tensor,
+              name=scope,
+              output_types=self._value_structure._flat_types,
+              output_shapes=self._value_structure._flat_shapes))
 
   @property
-  def output_classes(self):
-    return self._output_classes
+  def value_structure(self):
+    return self._value_structure
+
+
+class OptionalStructure(structure.Structure):
+  """Represents an optional potentially containing a structured value."""
+
+  def __init__(self, value_structure):
+    self._value_structure = value_structure
 
   @property
-  def output_shapes(self):
-    return self._output_shapes
+  def _flat_shapes(self):
+    return [tensor_shape.scalar()]
 
   @property
-  def output_types(self):
-    return self._output_types
+  def _flat_types(self):
+    return [dtypes.variant]
+
+  def is_compatible_with(self, other):
+    # pylint: disable=protected-access
+    return (isinstance(other, OptionalStructure) and
+            self._value_structure.is_compatible_with(other._value_structure))
+
+  def _to_tensor_list(self, value):
+    return [value._variant_tensor]  # pylint: disable=protected-access
+
+  def _from_tensor_list(self, flat_value):
+    if (len(flat_value) != 1 or flat_value[0].dtype != dtypes.variant or
+        not flat_value[0].shape.is_compatible_with(tensor_shape.scalar())):
+      raise ValueError(
+          "OptionalStructure corresponds to a single tf.variant scalar.")
+    # pylint: disable=protected-access
+    return _OptionalImpl(flat_value[0], self._value_structure)
+
+  @staticmethod
+  def from_value(value):
+    return OptionalStructure(value.value_structure)
+
+
+# pylint: disable=protected-access
+structure.Structure._register_custom_converter(Optional,
+                                               OptionalStructure.from_value)
+# pylint: enable=protected-access
diff --git a/tensorflow/python/data/util/structure.py b/tensorflow/python/data/util/structure.py
index c5764b8dfe..a90ca258c0 100644
--- a/tensorflow/python/data/util/structure.py
+++ b/tensorflow/python/data/util/structure.py
@@ -28,6 +28,9 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import sparse_ops
 
 
+_STRUCTURE_CONVERSION_FUNCTION_REGISTRY = {}
+
+
 class Structure(object):
   """Represents structural information, such as type and shape, about a value.
 
@@ -64,12 +67,10 @@ class Structure(object):
     raise NotImplementedError("Structure._flat_shapes")
 
   @abc.abstractmethod
-  def is_compatible_with(self, value):
-    """Returns `True` if `value` is compatible with this structure.
+  def is_compatible_with(self, other):
+    """Returns `True` if `other` is compatible with this structure.
 
-    A value `value` is compatible with a structure `s` if
-    `Structure.from_value(value)` would return a structure `t` that is a
-    "subtype" of `s`. A structure `t` is a "subtype" of `s` if:
+    A structure `t` is a "subtype" of `s` if:
 
     * `s` and `t` are instances of the same `Structure` subclass.
     * The nested structures (if any) of `s` and `t` are the same, according to
@@ -83,10 +84,10 @@ class Structure(object):
       `tf.TensorShape.is_compatible_with`.
 
     Args:
-      value: A potentially structured value.
+      other: A `Structure`.
 
     Returns:
-      `True` if `value` matches this structure, otherwise `False`.
+      `True` if `other` is a subtype of this structure, otherwise `False`.
     """
     raise NotImplementedError("Structure.is_compatible_with()")
 
@@ -98,7 +99,7 @@ class Structure(object):
     `self._flat_types` to represent structured values in lower level APIs
     (such as plain TensorFlow operations) that do not understand structure.
 
-    Requires: `self.is_compatible_with(value)`.
+    Requires: `self.is_compatible_with(Structure.from_value(value))`.
 
     Args:
       value: A value with compatible structure.
@@ -137,9 +138,8 @@ class Structure(object):
       TypeError: If a structure cannot be built for `value`, because its type
         or one of its component types is not supported.
     """
-
-    # TODO(b/110122868): Add support for custom types, Dataset, and Optional
-    # to this method.
+    # TODO(b/110122868): Add support for custom types and Dataset to this
+    # method.
     if isinstance(
         value,
         (sparse_tensor_lib.SparseTensor, sparse_tensor_lib.SparseTensorValue)):
@@ -147,12 +147,76 @@ class Structure(object):
     elif isinstance(value, (tuple, dict)):
       return NestedStructure.from_value(value)
     else:
+      for converter_type, converter_fn in (
+          _STRUCTURE_CONVERSION_FUNCTION_REGISTRY.items()):
+        if isinstance(value, converter_type):
+          return converter_fn(value)
       try:
         tensor = ops.convert_to_tensor(value)
       except (ValueError, TypeError):
         raise TypeError("Could not build a structure for %r" % value)
       return TensorStructure.from_value(tensor)
 
+  @staticmethod
+  def _from_legacy_structure(output_types, output_shapes, output_classes):
+    """Returns a `Structure` that represents the given legacy structure.
+
+    This method provides a way to convert from the existing `Dataset` and
+    `Iterator` structure-related properties to a `Structure` object.
+
+    TODO(b/110122868): Remove this method once `Structure` is used throughout
+    `tf.data`.
+
+    Args:
+      output_types: A nested structure of `tf.DType` objects corresponding to
+        each component of a structured value.
+      output_shapes: A nested structure of `tf.TensorShape` objects
+        corresponding to each component a structured value.
+      output_classes: A nested structure of Python `type` objects corresponding
+        to each component of a structured value.
+
+    Returns:
+      A `Structure`.
+
+    Raises:
+      TypeError: If a structure cannot be built the arguments, because one of
+        the component classes in `output_classes` is not supported.
+    """
+    flat_types = nest.flatten(output_types)
+    flat_shapes = nest.flatten(output_shapes)
+    flat_classes = nest.flatten(output_classes)
+    flat_ret = []
+    for flat_type, flat_shape, flat_class in zip(flat_types, flat_shapes,
+                                                 flat_classes):
+      if issubclass(flat_class, sparse_tensor_lib.SparseTensor):
+        flat_ret.append(SparseTensorStructure(flat_type, flat_shape))
+      elif issubclass(flat_class, ops.Tensor):
+        flat_ret.append(TensorStructure(flat_type, flat_shape))
+      else:
+        # NOTE(mrry): Since legacy structures produced by iterators only
+        # comprise Tensors, SparseTensors, and nests, we do not need to support
+        # all structure types here.
+        raise TypeError(
+            "Could not build a structure for output class %r" % flat_type)
+
+    ret = nest.pack_sequence_as(output_classes, flat_ret)
+    if isinstance(ret, Structure):
+      return ret
+    else:
+      return NestedStructure(ret)
+
+  @staticmethod
+  def _register_custom_converter(type_object, converter_fn):
+    """Registers `converter_fn` for converting values of the given type.
+
+    Args:
+      type_object: A Python `type` object representing the type of values
+        accepted by `converter_fn`.
+      converter_fn: A function that takes one argument (an instance of the
+        type represented by `type_object`) and returns a `Structure`.
+    """
+    _STRUCTURE_CONVERSION_FUNCTION_REGISTRY[type_object] = converter_fn
+
 
 # NOTE(mrry): The following classes make extensive use of non-public methods of
 # their base class, so we disable the protected-access lint warning once here.
@@ -179,16 +243,21 @@ class NestedStructure(Structure):
   def _flat_types(self):
     return self._flat_types_list
 
-  def is_compatible_with(self, value):
+  def is_compatible_with(self, other):
+    if not isinstance(other, NestedStructure):
+      return False
     try:
-      nest.assert_shallow_structure(self._nested_structure, value)
+      # pylint: disable=protected-access
+      nest.assert_same_structure(self._nested_structure,
+                                 other._nested_structure)
     except (ValueError, TypeError):
       return False
 
     return all(
-        s.is_compatible_with(v) for s, v in zip(
+        substructure.is_compatible_with(other_substructure)
+        for substructure, other_substructure in zip(
             nest.flatten(self._nested_structure),
-            nest.flatten_up_to(self._nested_structure, value)))
+            nest.flatten(other._nested_structure)))
 
   def _to_tensor_list(self, value):
     ret = []
@@ -201,7 +270,7 @@ class NestedStructure(Structure):
 
     for sub_value, structure in zip(flat_value,
                                     nest.flatten(self._nested_structure)):
-      if not structure.is_compatible_with(sub_value):
+      if not structure.is_compatible_with(Structure.from_value(sub_value)):
         raise ValueError("Component value %r is not compatible with the nested "
                          "structure %r." % (sub_value, structure))
       ret.extend(structure._to_tensor_list(sub_value))
@@ -242,17 +311,13 @@ class TensorStructure(Structure):
   def _flat_types(self):
     return [self._dtype]
 
-  def is_compatible_with(self, value):
-    try:
-      value = ops.convert_to_tensor(value, dtype=self._dtype)
-    except (ValueError, TypeError):
-      return False
-
-    return (self._dtype.is_compatible_with(value.dtype) and
-            self._shape.is_compatible_with(value.shape))
+  def is_compatible_with(self, other):
+    return (isinstance(other, TensorStructure) and
+            self._dtype.is_compatible_with(other._dtype) and
+            self._shape.is_compatible_with(other._shape))
 
   def _to_tensor_list(self, value):
-    if not self.is_compatible_with(value):
+    if not self.is_compatible_with(Structure.from_value(value)):
       raise ValueError("Value %r is not convertible to a tensor with dtype %s "
                        "and shape %s." % (value, self._dtype, self._shape))
     return [value]
@@ -260,7 +325,7 @@ class TensorStructure(Structure):
   def _from_tensor_list(self, flat_value):
     if len(flat_value) != 1:
       raise ValueError("TensorStructure corresponds to a single tf.Tensor.")
-    if not self.is_compatible_with(flat_value[0]):
+    if not self.is_compatible_with(Structure.from_value(flat_value[0])):
       raise ValueError("Cannot convert %r to a tensor with dtype %s and shape "
                        "%s." % (flat_value[0], self._dtype, self._shape))
     return flat_value[0]
@@ -285,16 +350,10 @@ class SparseTensorStructure(Structure):
   def _flat_types(self):
     return [dtypes.variant]
 
-  def is_compatible_with(self, value):
-    try:
-      value = sparse_tensor_lib.SparseTensor.from_value(value)
-    except TypeError:
-      return False
-    return (isinstance(value, (sparse_tensor_lib.SparseTensor,
-                               sparse_tensor_lib.SparseTensorValue)) and
-            self._dtype.is_compatible_with(value.dtype) and
-            self._dense_shape.is_compatible_with(
-                tensor_util.constant_value_as_shape(value.dense_shape)))
+  def is_compatible_with(self, other):
+    return (isinstance(other, SparseTensorStructure) and
+            self._dtype.is_compatible_with(other._dtype) and
+            self._dense_shape.is_compatible_with(other._dense_shape))
 
   def _to_tensor_list(self, value):
     return [sparse_ops.serialize_sparse(value, out_type=dtypes.variant)]
diff --git a/tensorflow/python/data/util/structure_test.py b/tensorflow/python/data/util/structure_test.py
index d0c7df67ae..2982763181 100644
--- a/tensorflow/python/data/util/structure_test.py
+++ b/tensorflow/python/data/util/structure_test.py
@@ -25,7 +25,9 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -106,13 +108,17 @@ class StructureTest(test.TestCase, parameterized.TestCase):
                   indices=[[0], [1], [2]], values=[4, 5, 6], dense_shape=[3])
       }, (constant_op.constant(15.0), constant_op.constant([4, 5, 6]))]),
   )
-  def testIsCompatibleWith(self, original_value, compatible_values,
-                           incompatible_values):
+  def testIsCompatibleWithStructure(self, original_value, compatible_values,
+                                    incompatible_values):
     s = structure.Structure.from_value(original_value)
     for compatible_value in compatible_values:
-      self.assertTrue(s.is_compatible_with(compatible_value))
+      self.assertTrue(
+          s.is_compatible_with(
+              structure.Structure.from_value(compatible_value)))
     for incompatible_value in incompatible_values:
-      self.assertFalse(s.is_compatible_with(incompatible_value))
+      self.assertFalse(
+          s.is_compatible_with(
+              structure.Structure.from_value(incompatible_value)))
 
   # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they
   # will be executed before the (eager- or graph-mode) test environment has been
@@ -322,6 +328,28 @@ class StructureTest(test.TestCase, parameterized.TestCase):
         ValueError, "Expected 3 flat values in NestedStructure but got 2."):
       s_2._from_tensor_list(flat_s_1)
 
+  @parameterized.named_parameters(
+      ("Tensor", dtypes.float32, tensor_shape.scalar(), ops.Tensor,
+       structure.TensorStructure(dtypes.float32, [])),
+      ("SparseTensor", dtypes.int32, tensor_shape.matrix(2, 2),
+       sparse_tensor.SparseTensor,
+       structure.SparseTensorStructure(dtypes.int32, [2, 2])),
+      ("Nest",
+       {"a": dtypes.float32, "b": (dtypes.int32, dtypes.string)},
+       {"a": tensor_shape.scalar(),
+        "b": (tensor_shape.matrix(2, 2), tensor_shape.scalar())},
+       {"a": ops.Tensor, "b": (sparse_tensor.SparseTensor, ops.Tensor)},
+       structure.NestedStructure({
+           "a": structure.TensorStructure(dtypes.float32, []),
+           "b": (structure.SparseTensorStructure(dtypes.int32, [2, 2]),
+                 structure.TensorStructure(dtypes.string, []))})),
+  )
+  def testFromLegacyStructure(self, output_types, output_shapes, output_classes,
+                              expected_structure):
+    actual_structure = structure.Structure._from_legacy_structure(
+        output_types, output_shapes, output_classes)
+    self.assertTrue(expected_structure.is_compatible_with(actual_structure))
+    self.assertTrue(actual_structure.is_compatible_with(expected_structure))
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From fcd7840fbf49802be4bb7f67671465338b7b78a4 Mon Sep 17 00:00:00 2001
From: Piotr Padlewski <prazek@google.com>
Date: Sun, 23 Sep 2018 18:30:38 -0700
Subject: [PATCH 0579/1357] Fix noop elimination optimization.

Fix for b/116169724
Only remove noops if they refer to const nodes.

PiperOrigin-RevId: 214199200
---
 .../python/kernel_tests/optimization/BUILD    | 16 ++++++
 .../optimization/noop_elimination_test.py     | 57 +++++++++++++++++++
 .../grappler/optimizers/data/graph_utils.cc   | 10 ++++
 .../grappler/optimizers/data/graph_utils.h    |  3 +
 .../optimizers/data/noop_elimination.cc       | 16 +++---
 .../optimizers/data/noop_elimination_test.cc  | 43 ++++++++++++++
 6 files changed, 138 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py

diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
index b3187bf61b..a2fc244ced 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
@@ -109,6 +109,22 @@ py_test(
     ],
 )
 
+py_test(
+    name = "noop_elimination_test",
+    size = "small",
+    srcs = ["noop_elimination_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/data/python/ops:batching",
+        "//tensorflow/contrib/data/python/ops:interleave_ops",
+        "//tensorflow/contrib/data/python/ops:optimization",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "optimize_dataset_op_test",
     size = "small",
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
new file mode 100644
index 0000000000..507feda3ad
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
@@ -0,0 +1,57 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the MapParallelization optimization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class NoopEliminationTest(test.TestCase):
+
+  def testNoopElimination(self):
+    a = constant_op.constant(1, dtype=dtypes.int64)
+    b = constant_op.constant(2, dtype=dtypes.int64)
+    some_tensor = math_ops.mul(a, b)
+
+    dataset = dataset_ops.Dataset.range(5)
+    dataset = dataset.apply(
+        optimization.assert_next(
+            ["FiniteRepeat", "FiniteSkip", "Prefetch", "Prefetch"]))
+    dataset = dataset.repeat(some_tensor).skip(5).prefetch(0).take(-1).skip(
+        0).repeat(1).prefetch(0)
+    dataset = dataset.apply(optimization.optimize(["noop_elimination"]))
+
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      for x in range(5):
+        result = sess.run(get_next)
+        self.assertAllEqual(result, x)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index b3f60e34f9..2dd9ee822e 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -88,6 +88,16 @@ NodeDef* AddScalarConstNodeHelper(
 
 }  // namespace
 
+NodeDef* AddScalarPlaceholder(DataType dtype, MutableGraphView* graph) {
+  NodeDef node;
+  node.set_op("Placeholder");
+  SetUniqueGraphNodeName(node.op(), graph->GetGraph(), &node);
+  (*node.mutable_attr())["dtype"].set_type(dtype);
+  TensorShapeProto* shape = (*node.mutable_attr())["shape"].mutable_shape();
+  shape->set_unknown_rank(false);
+  return graph->AddNode(std::move(node));
+}
+
 NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index 1652afcd9e..b117482db2 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -37,6 +37,9 @@ NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  MutableGraphView* graph);
 
+// Adds Placeholder node for given type.
+NodeDef* AddScalarPlaceholder(DataType dtype, MutableGraphView* graph);
+
 // Adds a Const node with the given value to the graph.
 template <typename T>
 NodeDef* AddScalarConstNode(T v, MutableGraphView* graph) {
diff --git a/tensorflow/core/grappler/optimizers/data/noop_elimination.cc b/tensorflow/core/grappler/optimizers/data/noop_elimination.cc
index a26f1000a3..cf5a19bab1 100644
--- a/tensorflow/core/grappler/optimizers/data/noop_elimination.cc
+++ b/tensorflow/core/grappler/optimizers/data/noop_elimination.cc
@@ -33,25 +33,27 @@ namespace {
 bool IsTakeAll(const NodeDef& take_node, const GraphView& graph) {
   if (take_node.op() != "TakeDataset") return false;
 
-  const NodeDef& count_node = *graph.GetNode(take_node.input(1));
+  const auto& count_node = *graph.GetNode(take_node.input(1));
+  if (count_node.op() != "Const") return false;
   // We are looking only for 'take' with negative count.
   return count_node.attr().at("value").tensor().int64_val(0) < 0;
 }
 
+bool IsConstNodeWithValue(const NodeDef& node, int value) {
+  if (node.op() != "Const") return false;
+  return node.attr().at("value").tensor().int64_val(0) == value;
+}
+
 bool IsSkipNone(const NodeDef& skip_node, const GraphView& graph) {
   if (skip_node.op() != "SkipDataset") return false;
-
-  const NodeDef& count_node = *graph.GetNode(skip_node.input(1));
   // We are looking only for skip(0) nodes.
-  return count_node.attr().at("value").tensor().int64_val(0) == 0;
+  return IsConstNodeWithValue(*graph.GetNode(skip_node.input(1)), 0);
 }
 
 bool IsRepeatOne(const NodeDef& repeat_node, const GraphView& graph) {
   if (repeat_node.op() != "RepeatDataset") return false;
-
-  const NodeDef& count_node = *graph.GetNode(repeat_node.input(1));
   // We are looking only for repeat(1) nodes.
-  return count_node.attr().at("value").tensor().int64_val(0) == 1;
+  return IsConstNodeWithValue(*graph.GetNode(repeat_node.input(1)), 1);
 }
 
 bool IsNoOp(const NodeDef& node, const GraphView& graph) {
diff --git a/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc b/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
index f445e75aa7..be1a66df75 100644
--- a/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
@@ -43,6 +43,14 @@ NodeDef *MakeUnaryNode(StringPiece node_type, int count, string input_node,
                               GetCommonAttributes(), graph);
 }
 
+NodeDef *MakeUnaryNonConstNode(StringPiece node_type, string input_node,
+                               MutableGraphView *graph) {
+  NodeDef *node_count = graph_utils::AddScalarPlaceholder(DT_INT32, graph);
+  return graph_utils::AddNode("", node_type,
+                              {std::move(input_node), node_count->name()},
+                              GetCommonAttributes(), graph);
+}
+
 NodeDef *MakeCacheNode(string input_node, MutableGraphView *graph) {
   NodeDef *node_filename =
       graph_utils::AddScalarConstNode<StringPiece>("", graph);
@@ -205,6 +213,41 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::Values(*kTakeNode, *kSkipNode,
                                          *kRepeatNode)));
 
+struct NoOpPlaceholdersTest
+    : ::testing::TestWithParam<std::tuple<string, string>> {};
+
+TEST_P(NoOpPlaceholdersTest, NonConstNoOpNode) {
+  GrapplerItem item;
+  MutableGraphView graph(&item.graph);
+
+  static_assert(std::tuple_size<NodesTypes>::value == 2,
+                "Make sure to include everything in the test");
+  const std::vector<string> noop_nodes = {std::get<0>(GetParam()),
+                                          std::get<1>(GetParam())};
+  NodeDef *range_node = MakeRangeNode(&graph);
+  std::vector<string> nodes_to_keep;
+  nodes_to_keep.reserve(noop_nodes.size());
+  NodeDef *previous = range_node;
+
+  for (const auto &noop_node : noop_nodes) {
+    NodeDef *node = MakeUnaryNonConstNode(noop_node, previous->name(), &graph);
+    nodes_to_keep.push_back(node->name());
+    previous = node;
+  }
+
+  NoOpElimination optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+  for (const auto &noop_node_name : nodes_to_keep)
+    EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName(noop_node_name, output));
+}
+
+INSTANTIATE_TEST_CASE_P(
+    DoNotRemovePlaceholders, NoOpPlaceholdersTest,
+    ::testing::Combine(
+        ::testing::Values("TakeDataset", "SkipDataset", "RepeatDataset"),
+        ::testing::Values("TakeDataset", "SkipDataset", "RepeatDataset")));
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From fe4ae644e55ac776b310160f363bcf71a221ee04 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Sun, 23 Sep 2018 23:59:53 -0700
Subject: [PATCH 0580/1357] Remove dependency on contrib dataset ops.

PiperOrigin-RevId: 214219282
---
 .../distribute/python/input_ops_test.py       | 20 ++-----------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/input_ops_test.py b/tensorflow/contrib/distribute/python/input_ops_test.py
index c5acb7ced4..559de97bb1 100644
--- a/tensorflow/contrib/distribute/python/input_ops_test.py
+++ b/tensorflow/contrib/distribute/python/input_ops_test.py
@@ -20,8 +20,6 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import interleave_ops
 from tensorflow.contrib.distribute.python import input_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
@@ -126,20 +124,6 @@ class AutoShardDatasetTest(test.TestCase):
     # contain records in order of files.
     self._verifySimpleShardingOutput(dataset, self._record)
 
-  def testParallelInterleave(self):
-    dataset = dataset_ops.Dataset.from_tensor_slices(
-        self._createTFRecordFiles())
-    dataset = dataset.apply(interleave_ops.parallel_interleave(
-        readers.TFRecordDataset,
-        cycle_length=4,
-        block_length=self._num_records))
-    dataset = input_ops.auto_shard_dataset(
-        dataset, self._num_shards, self._shard_index)
-
-    # Since block_length == num records in each file, the output will still
-    # contain records in order of files.
-    self._verifySimpleShardingOutput(dataset, self._record)
-
   def testListfiles(self):
     filenames = self._createTFRecordFiles()
     file_pattern = filenames[0].rsplit("/", 1)[0] + "/tf_record.*.txt"
@@ -171,8 +155,8 @@ class AutoShardDatasetTest(test.TestCase):
     dataset = dataset.prefetch(buffer_size=batch_size)
     dataset = dataset.shuffle(2 * self._num_files * self._num_records)
     dataset = dataset.repeat(num_epochs)
-    dataset = dataset.apply(batching.map_and_batch(
-        lambda x: x, batch_size=batch_size))
+    dataset = dataset.map(lambda x: x)
+    dataset = dataset.batch(batch_size)
     dataset = dataset.prefetch(buffer_size=None)
 
     # Auto shard.
-- 
GitLab


From 8f4ded5884684f40b4912d95c717b185340996b8 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Mon, 24 Sep 2018 11:07:21 +0300
Subject: [PATCH 0581/1357] Fix clang styles.

---
 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 484cc4d6f5..6753c67701 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -24,9 +24,7 @@ constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
 
 class ByteSwapper {
  public:
-  ByteSwapper(bool big_endian) {
-    swap_ = big_endian == kLittleEndian;
-  }
+  ByteSwapper(bool big_endian) { swap_ = big_endian == kLittleEndian; }
 
   inline void SwapIfRequiredInt16(int16_t *x) const {
     if (swap_) {
-- 
GitLab


From cdcc7d31cce91169dc686387522d7015ac57db0e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 02:01:45 -0700
Subject: [PATCH 0582/1357] compat: Update forward compatibility horizon to
 2018-09-24

PiperOrigin-RevId: 214230777
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index ec840965a7..45f40cd183 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 23)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 24)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 90c68770467701a23d23a85c5d769f6f4fa39f0f Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Mon, 24 Sep 2018 12:14:45 +0300
Subject: [PATCH 0583/1357] Fix byte-order issue.

---
 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 6753c67701..46df3e39dc 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -17,14 +17,13 @@ limitations under the License.
 #define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
 
 #include <stdint.h>
+#include "tensorflow/core/platform/byte_order.h"
 
 namespace tensorflow {
 
-constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
-
 class ByteSwapper {
  public:
-  ByteSwapper(bool big_endian) { swap_ = big_endian == kLittleEndian; }
+  ByteSwapper(bool big_endian) { swap_ = big_endian == port::kLittleEndian; }
 
   inline void SwapIfRequiredInt16(int16_t *x) const {
     if (swap_) {
-- 
GitLab


From b57bdf414edb27b82a95c5f4e2729fafd4cf2dc7 Mon Sep 17 00:00:00 2001
From: Lasse Espeholt <lespeholt@google.com>
Date: Mon, 24 Sep 2018 02:17:52 -0700
Subject: [PATCH 0584/1357] Clean-up of function.py.

PiperOrigin-RevId: 214232622
---
 tensorflow/c/eager/BUILD            |  5 +-
 tensorflow/python/eager/BUILD       |  5 +-
 tensorflow/python/eager/function.py | 86 +++++++++++++++--------------
 3 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 37be52f57d..3ee31a6a7a 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -68,7 +68,10 @@ tf_cuda_library(
 tf_cuda_library(
     name = "c_api_internal",
     hdrs = ["c_api_internal.h"],
-    visibility = ["//tensorflow:internal"],
+    visibility = [
+        "//learning/deepmind/courier:__pkg__",
+        "//tensorflow:internal",
+    ],
     deps = [
         ":c_api",
         "//tensorflow/c:c_api",
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index f571da308e..d3d997e6df 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -17,7 +17,10 @@ cc_library(
         "pywrap_tensor.h",
         "pywrap_tfe.h",
     ],
-    visibility = ["//tensorflow:internal"],
+    visibility = [
+        "//learning/deepmind/courier:__pkg__",
+        "//tensorflow:internal",
+    ],
     deps = [
         "//tensorflow/c:c_api",
         "//tensorflow/c:c_api_internal",
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index bcb1881264..1f5d479882 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -850,7 +850,7 @@ def _get_defun_inputs_from_args(args):
 def func_graph_from_py_func(name,
                             python_func,
                             args,
-                            kwds,
+                            kwargs,
                             signature=None,
                             func_graph=None):
   """Returns a `FuncGraph` generated from `python_func`.
@@ -860,11 +860,11 @@ def func_graph_from_py_func(name,
     python_func: the Python function to trace.
     args: the positional args with which the Python function should be called;
       ignored if a signature is provided.
-    kwds: the keyword args with which the Python function should be called;
+    kwargs: the keyword args with which the Python function should be called;
       ignored if a signature is provided.
     signature: a possibly nested sequence of `TensorSpecs` specifying the shapes
       and dtypes of the arguments. When a signature is provided, `args` and
-      `kwds` are ignored, and `python_func` is traced with Tensors conforming
+      `kwargs` are ignored, and `python_func` is traced with Tensors conforming
       to `signature`. If `None`, the shapes and dtypes are inferred from the
       inputs.
     func_graph: Optional. An instance of FuncGraph. If provided, we will use
@@ -885,16 +885,17 @@ def func_graph_from_py_func(name,
 
     if signature is None:
       func_args = _get_defun_inputs_from_args(args)
-      func_kwds = _get_defun_inputs_from_args(kwds)
+      func_kwargs = _get_defun_inputs_from_args(kwargs)
     else:
       func_args = _get_defun_inputs_from_signature(signature)
-      func_kwds = {}
+      func_kwargs = {}
 
     # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
     # Variables to help check whether mutation happens in calling the function
     # Copy the recursive list, tuple and map structure, but not base objects
     func_args_before = nest.pack_sequence_as(func_args, nest.flatten(func_args))
-    func_kwds_before = nest.pack_sequence_as(func_kwds, nest.flatten(func_kwds))
+    func_kwargs_before = nest.pack_sequence_as(
+        func_kwargs, nest.flatten(func_kwargs))
 
     def convert(x):
       """Converts an argument to a Tensor."""
@@ -913,7 +914,7 @@ def func_graph_from_py_func(name,
 
     this_tape = tape.push_new_tape()
     try:
-      func_outputs = python_func(*func_args, **func_kwds)
+      func_outputs = python_func(*func_args, **func_kwargs)
       # invariant: `func_outputs` contains only Tensors and `None`s.
       func_outputs = nest.map_structure(convert, func_outputs)
 
@@ -933,16 +934,16 @@ def func_graph_from_py_func(name,
             raise ValueError(errmsg)
 
       check_mutation(func_args_before, func_args)
-      check_mutation(func_kwds_before, func_kwds)
+      check_mutation(func_kwargs_before, func_kwargs)
     finally:
       tape.pop_tape(this_tape)
 
-    # Variables in `func_args`, `func_kwds` should be explicit inputs
+    # Variables in `func_args`, `func_kwargs` should be explicit inputs
     # to the function, not captured inputs.
     tape_variables = this_tape.watched_variables()
     arg_variables = set()
     inputs = []
-    for arg in nest.flatten(func_args) + nest.flatten(func_kwds):
+    for arg in nest.flatten(func_args) + nest.flatten(func_kwargs):
       if isinstance(arg, resource_variable_ops.ResourceVariable):
         try:
           resource_placeholder = func_graph.captures.pop(arg.handle)
@@ -1073,11 +1074,11 @@ class PolymorphicFunction(object):
     if isinstance(python_function, functools.partial):
       self._python_function = python_function.func
       self._args_to_prepend = python_function.args or tuple()
-      self._kwds_to_include = python_function.keywords or {}
+      self._kwargs_to_include = python_function.keywords or {}
     else:
       self._python_function = python_function
       self._args_to_prepend = tuple()
-      self._kwds_to_include = {}
+      self._kwargs_to_include = {}
     self._name = name
     self._function_cache = collections.OrderedDict()
     self._function_attributes = attributes or {}
@@ -1115,9 +1116,9 @@ class PolymorphicFunction(object):
       self._input_signature = tuple(input_signature)
       self._flat_input_signature = tuple(nest.flatten(input_signature))
 
-  def __call__(self, *args, **kwds):
+  def __call__(self, *args, **kwargs):
     """Calls a graph function specialized to the inputs."""
-    graph_function, inputs = self._maybe_define_function(*args, **kwds)
+    graph_function, inputs = self._maybe_define_function(args, kwargs)
     return graph_function(*inputs)
 
   @property
@@ -1135,7 +1136,7 @@ class PolymorphicFunction(object):
       *args: inputs to specialize on.
       **kwargs: inputs to specialize on.
     """
-    graph_function, _ = self._maybe_define_function(*args, **kwargs)
+    graph_function, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
   def __get__(self, instance, owner):
@@ -1156,13 +1157,13 @@ class PolymorphicFunction(object):
     # then `instance` will be `foo` (and `owner` will be `Foo`).
     return functools.partial(self.__call__, instance)
 
-  def _cache_key(self, args, kwds, ctx, graph):
+  def _cache_key(self, args, kwargs, ctx, graph):
     """Computes the cache key given inputs and execution context."""
     if self._input_signature is None:
-      inputs = (args, kwds) if kwds else args
+      inputs = (args, kwargs) if kwargs else args
       cache_key = tuple(_encode_arg(arg) for arg in inputs)
     else:
-      del args, kwds
+      del args, kwargs
       cache_key = self._flat_input_signature
 
     # The graph, or whether we're executing eagerly, should be a part of the
@@ -1181,8 +1182,8 @@ class PolymorphicFunction(object):
 
     return cache_key + (execution_context, device_functions, colocation_stack)
 
-  def _canonicalize_function_inputs(self, *args, **kwds):
-    """Canonicalizes `args` and `kwds`.
+  def _canonicalize_function_inputs(self, *args, **kwargs):
+    """Canonicalizes `args` and `kwargs`.
 
     Canonicalize the inputs to the Python function using its fullargspec. In
     particular, we parse the varags and kwargs that this
@@ -1192,28 +1193,28 @@ class PolymorphicFunction(object):
 
     Args:
       *args: The varargs this object was called with.
-      **kwds: The keyword args this function was called with.
+      **kwargs: The keyword args this function was called with.
 
     Returns:
       A canonicalized ordering of the inputs.
 
     Raises:
-      ValueError: If a keyword in `kwds` cannot be matched with a positional
+      ValueError: If a keyword in `kwargs` cannot be matched with a positional
         argument when an input signature is specified, or when the inputs
         do not conform to the input signature.
     """
     args = self._args_to_prepend + args
-    kwds = dict(kwds, **self._kwds_to_include)
+    kwargs = dict(kwargs, **self._kwargs_to_include)
     # Maps from index of arg to its corresponding value, according to `args`
-    # and `kwds`; seeded with the default values for the named args that aren't
-    # in `args`.
+    # and `kwargs`; seeded with the default values for the named args that
+    # aren't in `args`.
     arg_indices_to_values = {
         index: default
         for index, default in six.iteritems(self._arg_indices_to_default_values)
         if index >= len(args)
     }
     consumed_args = []
-    for arg, value in six.iteritems(kwds):
+    for arg, value in six.iteritems(kwargs):
       index = self._args_to_indices.get(arg, None)
       if index is not None:
         arg_indices_to_values[index] = value
@@ -1223,9 +1224,9 @@ class PolymorphicFunction(object):
                          "function with keyword arguments when "
                          "input_signature is provided.")
     for arg in consumed_args:
-      # After this loop, `kwds` will only contain true keyword arguments, as
+      # After this loop, `kwargs` will only contain true keyword arguments, as
       # opposed to named arguments called in a keyword-like fashion.
-      kwds.pop(arg)
+      kwargs.pop(arg)
     inputs = args + _deterministic_dict_values(arg_indices_to_values)
     flat_inputs = nest.flatten(inputs)
 
@@ -1239,9 +1240,9 @@ class PolymorphicFunction(object):
       inputs = nest.pack_sequence_as(structure=inputs,
                                      flat_sequence=flat_inputs)
     if self._input_signature is None:
-      return inputs, kwds
+      return inputs, kwargs
     else:
-      assert not kwds
+      assert not kwargs
       try:
         nest.assert_same_structure(self._input_signature, inputs)
       except (ValueError, TypeError):
@@ -1260,24 +1261,27 @@ class PolymorphicFunction(object):
                          (str(inputs), str(self._input_signature)))
       return inputs, {}
 
-  def _maybe_define_function(self, *args, **kwds):
+  def _maybe_define_function(self, args, kwargs):
     """Gets a function for these inputs, defining it if necessary.
 
+    `args` and `kwargs` can be None if this `PolymorphicFunction` was created
+    with an `input_signature`.
+
     Args:
-      *args: args for the Python function.
-      **kwds: keywords for the Python function.
+      args: The varargs for the Python function.
+      kwargs: The keyword args for the Python function.
 
     Returns:
       A graph function corresponding to the input signature implied by args and
-      kwds, as well as the inputs that the object should be called with.
+      kwargs, as well as the inputs that the object should be called with.
 
     Raises:
       ValueError: If inputs are incompatible with the input signature.
       TypeError: If the function inputs include non-hashable objects
     """
-
-    args, kwds = self._canonicalize_function_inputs(*args, **kwds)
-    cache_key = self._cache_key(args, kwds, context.context(),
+    if self._input_signature is None or args is not None or kwargs is not None:
+      args, kwargs = self._canonicalize_function_inputs(*args, **kwargs)
+    cache_key = self._cache_key(args, kwargs, context.context(),
                                 ops.get_default_graph())
     with self._lock:
       try:
@@ -1289,11 +1293,11 @@ class PolymorphicFunction(object):
       if graph_function is None:
         graph_function = Function(
             func_graph_from_py_func(self._name, self._python_function, args,
-                                    kwds, self._input_signature),
+                                    kwargs, self._input_signature),
             self._function_attributes)
         self._function_cache[cache_key] = graph_function
       return graph_function, [
-          t for t in nest.flatten((args, kwds))
+          t for t in nest.flatten((args, kwargs))
           if isinstance(t, (ops.Tensor, resource_variable_ops.ResourceVariable))
       ]
 
@@ -1933,9 +1937,9 @@ def automatic_control_dependencies(f):
     The wrapped function.
   """
 
-  def wrapper(*args, **kwds):
+  def wrapper(*args, **kwargs):
     with AutomaticControlDependencies() as a:
-      result = f(*args, **kwds)
+      result = f(*args, **kwargs)
       result_flat = [a.mark_as_return(t) for t in nest.flatten(result)]
       return nest.pack_sequence_as(result, result_flat)
 
-- 
GitLab


From 379ca4afe9e31f550cd04451af04150b6bbecf78 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Mon, 24 Sep 2018 03:19:11 -0700
Subject: [PATCH 0585/1357] Generalize sort implementation in the HloEvaluator.

It only worked for ranks 1 or 2, and only if the dimension to sort is the most minor dimension.
Also fix the SafeLess function so that the SortExtremeValues() test passes.

PiperOrigin-RevId: 214239560
---
 .../compiler/xla/service/hlo_evaluator.cc     | 144 +++++++++---------
 .../xla/service/hlo_evaluator_typed_visitor.h | 123 ++++++++-------
 2 files changed, 141 insertions(+), 126 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 06b6d5b559..b91b2406e2 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1173,80 +1173,85 @@ StatusOr<Literal> EvaluateSortInternal(HloInstruction* sort,
   TF_RET_CHECK(
       ShapeUtil::SameDimensions(keys_literal.shape(), values_literal.shape()))
       << "Sort keys and values must have the same dimensions";
-  TF_RET_CHECK(rank > 0 && rank <= 2)
-      << "Sort is only supported for rank-1 and rank-2 shapes, rank is: "
-      << rank;
   TF_RET_CHECK(sort->operand_count() == 2) << "Expected key-value sort";
-  // We need to sort and array of keys and an array of values, where the
+  // We need to sort an array of keys and an array of values, where the
   // sorted order of the values is determined by the keys. The simplest(?)
   // way to do this is to go to an array-of-pairs representation, sort the
   // array using the keys, and then go back to pair-of-arrays.
   VLOG(3) << "HandleSort keys_literal: " << keys_literal.ToString();
   VLOG(3) << "HandleSort values_literal: " << values_literal.ToString();
 
-  auto sort_r1 = [](const Literal& keys_literal,
-                    const Literal& values_literal) {
-    const auto& keys_data = keys_literal.data<KeyType>();
-    const auto& values_data = values_literal.data<ValueType>();
-
-    using kv_pair = std::pair<KeyType, ValueType>;
-    std::vector<kv_pair> key_value_vector;
-    CHECK_EQ(keys_data.size(), values_data.size());
-    key_value_vector.reserve(keys_data.size());
-    for (int i = 0; i < keys_data.size(); ++i) {
-      key_value_vector.push_back(std::make_pair(keys_data[i], values_data[i]));
-    }
-    std::sort(key_value_vector.begin(), key_value_vector.end(),
-              [](const kv_pair& a, const kv_pair& b) {
-                return SafeLess<KeyType>(a.first, b.first);
-              });
-    std::vector<KeyType> result_keys;
-    std::vector<ValueType> result_values;
-    for (const auto& key_value : key_value_vector) {
-      result_keys.push_back(key_value.first);
-      result_values.push_back(key_value.second);
-    }
-    Literal result_keys_literal(keys_literal.shape());
-    result_keys_literal.PopulateR1(absl::Span<const KeyType>(result_keys));
-    Literal result_values_literal(values_literal.shape());
-    result_values_literal.PopulateR1(
-        absl::Span<const ValueType>(result_values));
-    return std::make_pair(std::move(result_keys_literal),
-                          std::move(result_values_literal));
-  };
-
-  Literal result_tuple;
-  if (rank == 1) {
-    auto result_pair = sort_r1(keys_literal, values_literal);
-    result_tuple =
-        LiteralUtil::MakeTuple({&result_pair.first, &result_pair.second});
-  } else {
-    // For R2 sort, the desired semantics are to sort each matrix row
-    // independently.
-    Literal keys_result_literal(keys_literal.shape());
-    Literal values_result_literal(values_literal.shape());
-    int64 r1_length = keys_literal.shape().dimensions(1);
-    for (int64 row = 0; row < keys_literal.shape().dimensions(0); ++row) {
-      TF_ASSIGN_OR_RETURN(auto keys_r1_slice,
-                          keys_literal.Slice({row, 0}, {row + 1, r1_length})
-                              .Reshape({r1_length}));
-      TF_ASSIGN_OR_RETURN(auto values_r1_slice,
-                          values_literal.Slice({row, 0}, {row + 1, r1_length})
-                              .Reshape({r1_length}));
-      auto r1_result_pair = sort_r1(keys_r1_slice, values_r1_slice);
-      TF_ASSIGN_OR_RETURN(auto sorted_keys,
-                          r1_result_pair.first.Reshape({1, r1_length}));
-      TF_ASSIGN_OR_RETURN(auto sorted_values,
-                          r1_result_pair.second.Reshape({1, r1_length}));
-      TF_RETURN_IF_ERROR(keys_result_literal.CopySliceFrom(
-          sorted_keys, {0, 0}, {row, 0}, {1, r1_length}));
-      TF_RETURN_IF_ERROR(values_result_literal.CopySliceFrom(
-          sorted_values, {0, 0}, {row, 0}, {1, r1_length}));
-    }
-    result_tuple =
-        LiteralUtil::MakeTuple({&keys_result_literal, &values_result_literal});
+  if (rank == 0) {
+    // Nothing to sort.
+    return LiteralUtil::MakeTuple({&keys_literal, &values_literal});
   }
 
+  Literal keys_result_literal(keys_literal.shape());
+  Literal values_result_literal(values_literal.shape());
+  std::vector<int64> zero_base(rank, 0);
+  std::vector<int64> increment(rank, 1);
+  int64 sort_dim = sort->dimensions(0);
+  int64 sort_dim_elements = keys_literal.shape().dimensions(sort_dim);
+  increment[sort_dim] = sort_dim_elements;
+  // Iterate through each dimension except 'sort_dim'.
+  TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus(
+      keys_literal.shape(), zero_base,
+      AsInt64Slice(keys_literal.shape().dimensions()), increment,
+      [&](absl::Span<const int64> indices) -> StatusOr<bool> {
+        // Extract a slice from the keys and values literals that correspond to
+        // exactly the row in dimension 'sort_dim'.
+        std::vector<int64> limit_indices(indices.begin(), indices.end());
+        std::for_each(limit_indices.begin(), limit_indices.end(),
+                      [](int64& index) { ++index; });
+        limit_indices[sort_dim] = sort_dim_elements;
+        TF_ASSIGN_OR_RETURN(auto keys_to_sort,
+                            keys_literal.Slice(indices, limit_indices)
+                                .Reshape({sort_dim_elements}));
+        const auto& keys_data = keys_to_sort.data<KeyType>();
+        TF_ASSIGN_OR_RETURN(auto values_to_sort,
+                            values_literal.Slice(indices, limit_indices)
+                                .Reshape({sort_dim_elements}));
+        const auto& values_data = values_to_sort.data<ValueType>();
+        using kv_pair = std::pair<KeyType, ValueType>;
+        std::vector<kv_pair> key_value_vector;
+        key_value_vector.reserve(keys_data.size());
+        for (int i = 0; i < keys_data.size(); ++i) {
+          key_value_vector.push_back(
+              std::make_pair(keys_data[i], values_data[i]));
+        }
+        std::sort(key_value_vector.begin(), key_value_vector.end(),
+                  [](const kv_pair& a, const kv_pair& b) {
+                    return SafeLess<KeyType>(a.first, b.first);
+                  });
+        std::vector<KeyType> result_keys;
+        std::vector<ValueType> result_values;
+        for (const auto& key_value : key_value_vector) {
+          result_keys.push_back(key_value.first);
+          result_values.push_back(key_value.second);
+        }
+        Literal sorted_keys(ShapeUtil::MakeShape(
+            keys_literal.shape().element_type(), {sort_dim_elements}));
+        sorted_keys.PopulateR1(absl::Span<const KeyType>(result_keys));
+        Literal sorted_values(ShapeUtil::MakeShape(
+            values_literal.shape().element_type(), {sort_dim_elements}));
+        sorted_values.PopulateR1(absl::Span<const ValueType>(result_values));
+        std::vector<int64> slice_dimensions(rank, 1);
+        slice_dimensions[sort_dim] = sort_dim_elements;
+        std::vector<int64> start_indices(rank, 0);
+        TF_ASSIGN_OR_RETURN(auto sorted_keys_reshaped,
+                            sorted_keys.Reshape(slice_dimensions));
+        TF_RETURN_IF_ERROR(keys_result_literal.CopySliceFrom(
+            sorted_keys_reshaped, start_indices, indices, slice_dimensions));
+        TF_ASSIGN_OR_RETURN(auto sorted_values_reshaped,
+                            sorted_values.Reshape(slice_dimensions));
+        TF_RETURN_IF_ERROR(values_result_literal.CopySliceFrom(
+            sorted_values_reshaped, start_indices, indices, slice_dimensions));
+        return true;
+      }));
+
+  Literal result_tuple;
+  result_tuple =
+      LiteralUtil::MakeTuple({&keys_result_literal, &values_result_literal});
   VLOG(3) << "HandleSort result_tuple: " << result_tuple.ToString();
   return std::move(result_tuple);
 }
@@ -1292,15 +1297,6 @@ StatusOr<Literal> EvaluateSort(HloInstruction* sort,
 }  // namespace
 
 Status HloEvaluator::HandleSort(HloInstruction* sort) {
-  const int64 sort_dim = sort->dimensions(0);
-  const int64 rank = ShapeUtil::Rank(sort->operand(0)->shape());
-  if (sort_dim != rank - 1) {
-    return Unimplemented(
-        "Trying to sort along dimension %d, which is not the last "
-        "dimension",
-        sort_dim);
-  }
-
   if (!ShapeUtil::IsTuple(sort->shape())) {
     return DefaultAction(sort);
   } else {
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index 8fb17a0033..35391ecf8a 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -16,6 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EVALUATOR_TYPED_VISITOR_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EVALUATOR_TYPED_VISITOR_H_
 
+#include <cmath>
+
 #include "absl/algorithm/container.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
@@ -41,7 +43,9 @@ template <typename T>
 using is_complex64_t = std::is_same<T, complex64>;
 
 // It's UB to use std::sort with std::less<float>, because of NaNs. Define
-// "safe" less functions which are actually strict weak orders.
+// "safe" less functions which are actually strict weak orders. -NaN and NaN
+// should appear at the beginning and end of the ordering, and -0.0 should
+// appear before 0.0.
 template <
     typename NativeT,
     typename std::enable_if<std::is_integral<NativeT>::value>::type* = nullptr>
@@ -49,26 +53,33 @@ bool SafeLess(const NativeT& a, const NativeT& b) {
   return a < b;
 }
 
-template <typename NativeT,
-          typename std::enable_if<
-              std::is_floating_point<NativeT>::value ||
-              std::is_same<NativeT, bfloat16>::value>::type* = nullptr>
+template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                NativeT>::value>::type* = nullptr>
 bool SafeLess(const NativeT& a, const NativeT& b) {
-  if (std::isnan(b)) {
-    return !std::isnan(a);
-  } else {
-    return a < b;
+  bool lhs_is_negative = std::signbit(a);
+  bool rhs_is_negative = std::signbit(b);
+  // If the signs are different, we can just compare the signs.
+  if (lhs_is_negative != rhs_is_negative) {
+    return lhs_is_negative && !rhs_is_negative;
+  }
+  bool lhs_nan = std::isnan(a);
+  bool rhs_nan = std::isnan(b);
+  // Exactly one number is nan?
+  if (lhs_nan != rhs_nan) {
+    if (lhs_nan) {
+      return lhs_is_negative;
+    }
+    return !rhs_is_negative;
   }
+  return a < b;
 }
 
-template <typename NativeT, typename std::enable_if<std::is_same<
-                                NativeT, Eigen::half>::value>::type* = nullptr>
+template <typename NativeT,
+          typename std::enable_if<
+              std::is_same<NativeT, bfloat16>::value ||
+              std::is_same<NativeT, Eigen::half>::value>::type* = nullptr>
 bool SafeLess(const NativeT& a, const NativeT& b) {
-  if (Eigen::half_impl::isnan(b)) {
-    return !Eigen::half_impl::isnan(a);
-  } else {
-    return a < b;
-  }
+  return SafeLess(static_cast<float>(a), static_cast<float>(b));
 }
 
 // Templated DfsHloVisitor for use by HloEvaluator.
@@ -1527,47 +1538,55 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
                 !std::is_same<NativeT, bool>::value>::type* = nullptr>
   Status HandleSort(HloInstruction* sort) {
     auto keys = sort->operand(0);
-    auto rank = ShapeUtil::Rank(keys->shape());
-    TF_RET_CHECK(rank > 0 && rank <= 2)
-        << "Sort is only supported for R1 and R2 shapes";
     TF_RET_CHECK(sort->operand_count() == 1)
         << "Typed visitor does not support key-value sort";
 
     const Literal& keys_literal = parent_->GetEvaluatedLiteralFor(keys);
-
-    auto sort_r1 = [this](const Literal& keys_literal) {
-      VLOG(3) << "HandleSort keys_literal: " << keys_literal.ToString();
-      const auto& keys_data = keys_literal.data<ReturnT>();
-
-      std::vector<ReturnT> result_data(keys_data.begin(), keys_data.end());
-      std::sort(result_data.begin(), result_data.end(),
-                [](const ReturnT& a, const ReturnT& b) {
-                  return SafeLess<ReturnT>(a, b);
-                });
-      Literal result_literal(keys_literal.shape());
-      result_literal.PopulateR1(absl::Span<const ReturnT>(result_data));
-      VLOG(3) << "HandleSort result_literal: " << result_literal.ToString();
-      return result_literal;
-    };
-
-    if (rank == 1) {
-      parent_->evaluated_[sort] = std::move(sort_r1(keys_literal));
-    } else {
-      // For R2 sort, the desired semantics are to sort each matrix row
-      // independently.
-      Literal result_literal(keys_literal.shape());
-      int64 r1_length = keys->shape().dimensions(1);
-      for (int64 row = 0; row < keys->shape().dimensions(0); ++row) {
-        TF_ASSIGN_OR_RETURN(auto r1_slice,
-                            keys_literal.Slice({row, 0}, {row + 1, r1_length})
-                                .Reshape({r1_length}));
-        auto r1_result = sort_r1(r1_slice);
-        TF_ASSIGN_OR_RETURN(r1_result, r1_result.Reshape({1, r1_length}));
-        TF_RETURN_IF_ERROR(result_literal.CopySliceFrom(
-            r1_result, {0, 0}, {row, 0}, {1, r1_length}));
-      }
-      parent_->evaluated_[sort] = std::move(result_literal);
+    int64 sort_dim = sort->dimensions(0);
+    int64 sort_dim_elements = keys->shape().dimensions(sort_dim);
+    int64 rank = ShapeUtil::Rank(keys->shape());
+    if (rank == 0) {
+      // Nothing to sort.
+      parent_->evaluated_[sort] = keys_literal.Clone();
+      return Status::OK();
     }
+    Literal result_literal(keys_literal.shape());
+    std::vector<int64> zero_base(rank, 0);
+    std::vector<int64> increment(rank, 1);
+    increment[sort_dim] = sort_dim_elements;
+    // Iterate through each dimension except 'sort_dim'.
+    TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus(
+        keys->shape(), zero_base, AsInt64Slice(keys->shape().dimensions()),
+        increment, [&](absl::Span<const int64> indices) -> StatusOr<bool> {
+          // Extract a slice from the literal that corresponds to exactly the
+          // row in dimension 'sort_dim'.
+          std::vector<int64> limit_indices(indices.begin(), indices.end());
+          std::for_each(limit_indices.begin(), limit_indices.end(),
+                        [](int64& index) { ++index; });
+          limit_indices[sort_dim] = sort_dim_elements;
+          TF_ASSIGN_OR_RETURN(auto row_to_sort,
+                              keys_literal.Slice(indices, limit_indices)
+                                  .Reshape({sort_dim_elements}));
+          const auto& row_data = row_to_sort.data<NativeT>();
+
+          std::vector<NativeT> result_data(row_data.begin(), row_data.end());
+          std::sort(result_data.begin(), result_data.end(),
+                    [](const NativeT& a, const NativeT& b) {
+                      return SafeLess<NativeT>(a, b);
+                    });
+          Literal sorted_row(ShapeUtil::MakeShape(keys->shape().element_type(),
+                                                  {sort_dim_elements}));
+          sorted_row.PopulateR1(absl::Span<const NativeT>(result_data));
+          std::vector<int64> slice_dimensions(rank, 1);
+          slice_dimensions[sort_dim] = sort_dim_elements;
+          TF_ASSIGN_OR_RETURN(auto sorted_row_reshaped,
+                              sorted_row.Reshape(slice_dimensions));
+          std::vector<int64> start_indices(rank, 0);
+          TF_RETURN_IF_ERROR(result_literal.CopySliceFrom(
+              sorted_row_reshaped, start_indices, indices, slice_dimensions));
+          return true;
+        }));
+    parent_->evaluated_[sort] = std::move(result_literal);
     return Status::OK();
   }
 
-- 
GitLab


From 32251dd7793e56130693b33a0c29318b04df8080 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 05:22:37 -0700
Subject: [PATCH 0586/1357] Add support for non-string attributes

PiperOrigin-RevId: 214251264
---
 tensorflow/python/framework/function.py      | 17 +++++++++++-
 tensorflow/python/framework/function_test.py | 27 +++++++++++++++++---
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 68b3170dfe..f287289bd0 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -1096,6 +1096,21 @@ def _from_library(lib):
   return initialized.values()
 
 
+def _get_experimental_kwarg_as_attr(attr_name, value):
+  """Creates an AttrValue for a python object."""
+  if isinstance(value, bool):
+    return attr_value_pb2.AttrValue(b=value)
+  elif isinstance(value, int):
+    return attr_value_pb2.AttrValue(i=value)
+  elif isinstance(value, float):
+    return attr_value_pb2.AttrValue(f=value)
+  elif isinstance(value, str):
+    return attr_value_pb2.AttrValue(s=compat.as_bytes(value))
+  else:
+    raise ValueError("Unsupported attribute type for %s with type %s" %
+                     (attr_name, type(value)))
+
+
 def _parse_kwargs_as_attrs(func_name, **kwargs):
   """Parses **kwargs into a node's attributes."""
   attrs = {}
@@ -1122,7 +1137,7 @@ def _parse_kwargs_as_attrs(func_name, **kwargs):
   kwargs_keys = list(kwargs.keys())
   for key in kwargs_keys:
     if key.startswith("experimental_"):
-      attrs[key] = attr_value_pb2.AttrValue(s=compat.as_bytes(kwargs[key]))
+      attrs[key] = _get_experimental_kwarg_as_attr(key, kwargs[key])
       del kwargs[key]
 
   if kwargs:
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 903768a039..f740e5cfaa 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -1331,12 +1331,33 @@ class FunctionsFromProtos(test.TestCase):
   def testExperimentalAttrs(self):
 
     @function.Defun(dtypes.int32, experimental_tag="tag_value")
-    def FunctionWithAttr(i):
+    def FunctionWithStrAttr(i):
       return array_ops.identity(i)
 
-    self.assertTrue("experimental_tag" in FunctionWithAttr.definition.attr)
-    self.assertEqual(FunctionWithAttr.definition.attr["experimental_tag"].s,
+    @function.Defun(dtypes.int32, experimental_tag=123)
+    def FunctionWithIntAttr(i):
+      return array_ops.identity(i)
+
+    @function.Defun(dtypes.int32, experimental_tag=123.0)
+    def FunctionWithFloatAttr(i):
+      return array_ops.identity(i)
+
+    @function.Defun(dtypes.int32, experimental_tag=True)
+    def FunctionWithBoolAttr(i):
+      return array_ops.identity(i)
+
+    self.assertTrue("experimental_tag" in FunctionWithStrAttr.definition.attr)
+    self.assertEqual(FunctionWithStrAttr.definition.attr["experimental_tag"].s,
                      b"tag_value")
+    self.assertTrue("experimental_tag" in FunctionWithIntAttr.definition.attr)
+    self.assertEqual(FunctionWithIntAttr.definition.attr["experimental_tag"].i,
+                     123)
+    self.assertTrue("experimental_tag" in FunctionWithFloatAttr.definition.attr)
+    self.assertEqual(
+        FunctionWithFloatAttr.definition.attr["experimental_tag"].f, 123.0)
+    self.assertTrue("experimental_tag" in FunctionWithBoolAttr.definition.attr)
+    self.assertEqual(FunctionWithBoolAttr.definition.attr["experimental_tag"].b,
+                     True)
 
 
 @test_util.with_c_shapes
-- 
GitLab


From dfcc5cbe1ca3a27f69a287be9f8ce574dd47bd89 Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Mon, 24 Sep 2018 06:13:51 -0700
Subject: [PATCH 0587/1357] Move a couple more docs from tf.contrib -> tfp

PiperOrigin-RevId: 214255941
---
 tensorflow/python/ops/distributions/util.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index 3e480a79f5..c61efebca0 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -524,6 +524,8 @@ def matrix_diag_transform(matrix, transform=None, name=None):
   Example of heteroskedastic 2-D linear regression.
 
   ```python
+  tfd = tfp.distributions
+
   # Get a trainable Cholesky factor.
   matrix_values = tf.contrib.layers.fully_connected(activations, 4)
   matrix = tf.reshape(matrix_values, (batch_size, 2, 2))
@@ -533,7 +535,7 @@ def matrix_diag_transform(matrix, transform=None, name=None):
   mu = tf.contrib.layers.fully_connected(activations, 2)
 
   # This is a fully trainable multivariate normal!
-  dist = tf.contrib.distributions.MVNCholesky(mu, chol)
+  dist = tfd.MultivariateNormalTriL(mu, chol)
 
   # Standard log loss. Minimizing this will "train" mu and chol, and then dist
   # will be a distribution predicting labels as multivariate Gaussians.
-- 
GitLab


From 3f36c1abb0b07e35df1c2c44881cfd5cb4207ae2 Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Mon, 24 Sep 2018 06:32:03 -0700
Subject: [PATCH 0588/1357] Non-injective bijectors should still support the
 fallback from ildj to -fldj and vice versa.

PiperOrigin-RevId: 214257872
---
 .../python/ops/distributions/bijector_impl.py | 39 ++++++++++++++-----
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/ops/distributions/bijector_impl.py b/tensorflow/python/ops/distributions/bijector_impl.py
index 2e7aa30296..9c63385dd0 100644
--- a/tensorflow/python/ops/distributions/bijector_impl.py
+++ b/tensorflow/python/ops/distributions/bijector_impl.py
@@ -825,10 +825,21 @@ class Bijector(object):
           min_event_ndims=self.inverse_min_event_ndims,
           event_ndims=event_ndims)):
         if not self._is_injective:  # No caching for non-injective
-          ildjs = self._inverse_log_det_jacobian(y, **kwargs)
-          return tuple(self._reduce_jacobian_det_over_event(
-              y, ildj, self.inverse_min_event_ndims, event_ndims)
-                       for ildj in ildjs)
+          try:
+            ildjs = self._inverse_log_det_jacobian(y, **kwargs)
+            return tuple(self._reduce_jacobian_det_over_event(
+                y, ildj, self.inverse_min_event_ndims, event_ndims)
+                         for ildj in ildjs)
+          except NotImplementedError as original_exception:
+            try:
+              x = self._inverse(y, **kwargs)
+              fldjs = self._forward_log_det_jacobian(x, **kwargs)
+              return tuple(self._reduce_jacobian_det_over_event(
+                  x, -fldj, self.forward_min_event_ndims, event_ndims)
+                           for fldj in fldjs)
+            except NotImplementedError:
+              raise original_exception
+
         mapping = self._lookup(y=y, kwargs=kwargs)
         if mapping.ildj_map is not None and event_ndims in mapping.ildj_map:
           return mapping.ildj_map[event_ndims]
@@ -917,11 +928,21 @@ class Bijector(object):
           return -1. * self._constant_ildj_map[event_ndims]
         x = ops.convert_to_tensor(x, name="x")
         self._maybe_assert_dtype(x)
-        if not self._is_injective:
-          fldjs = self._forward_log_det_jacobian(x, **kwargs)  # No caching.
-          return tuple(self._reduce_jacobian_det_over_event(
-              x, fldj, self.forward_min_event_ndims, event_ndims)
-                       for fldj in fldjs)
+        if not self._is_injective:  # No caching for non-injective
+          try:
+            fldjs = self._forward_log_det_jacobian(x, **kwargs)  # No caching.
+            return tuple(self._reduce_jacobian_det_over_event(
+                x, fldj, self.forward_min_event_ndims, event_ndims)
+                         for fldj in fldjs)
+          except NotImplementedError as original_exception:
+            try:
+              y = self._forward(x, **kwargs)
+              ildjs = self._inverse_log_det_jacobian(y, **kwargs)
+              return tuple(self._reduce_jacobian_det_over_event(
+                  y, -ildj, self.inverse_min_event_ndims, event_ndims)
+                           for ildj in ildjs)
+            except NotImplementedError:
+              raise original_exception
         mapping = self._lookup(x=x, kwargs=kwargs)
         if mapping.ildj_map is not None and event_ndims in mapping.ildj_map:
           return -mapping.ildj_map[event_ndims]
-- 
GitLab


From 7cf39dde90f83e584d14ce1c371ff17477a1e57e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 07:15:25 -0700
Subject: [PATCH 0589/1357] Add a rounding operation to eliminate some
 truncation errors (see test for example).

PiperOrigin-RevId: 214262549
---
 tensorflow/python/ops/image_ops_impl.py | 8 ++++----
 tensorflow/python/ops/image_ops_test.py | 6 ++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 325418d5f7..d680c12ac5 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1029,10 +1029,10 @@ def resize_images(images,
       scale_factor_width = (math_ops.to_float(new_width_const) /
                             math_ops.to_float(current_width))
       scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)
-      scaled_height_const = math_ops.to_int32(scale_factor *
-                                              math_ops.to_float(current_height))
-      scaled_width_const = math_ops.to_int32(scale_factor *
-                                             math_ops.to_float(current_width))
+      scaled_height_const = math_ops.to_int32(
+          math_ops.round(scale_factor * math_ops.to_float(current_height)))
+      scaled_width_const = math_ops.to_int32(
+          math_ops.round(scale_factor * math_ops.to_float(current_width)))
 
       # NOTE: Reset the size and other constants used later.
       size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 795e6bbc3e..da45f6e3e6 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -2687,6 +2687,12 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
 
     self._assertResizeCheckShape(x, x_shape, [3840, 2160], [3840, 2160, 3])
 
+  def testPreserveAspectRatioSquare(self):
+    x_shape = [299, 299, 3]
+    x = np.random.uniform(size=x_shape)
+
+    self._assertResizeCheckShape(x, x_shape, [320, 320], [320, 320, 3])
+
 
 class ResizeImageWithPadTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 64498def97852cc359209576703c7b788ba839e9 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 24 Sep 2018 07:24:03 -0700
Subject: [PATCH 0590/1357] Deprecate do_quantize_training_on_graphdef.

PiperOrigin-RevId: 214263489
---
 tensorflow/python/training/quantize_training.i        | 7 +++++++
 tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt | 4 ----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/training/quantize_training.i b/tensorflow/python/training/quantize_training.i
index 41e62e0252..1ab600bb22 100644
--- a/tensorflow/python/training/quantize_training.i
+++ b/tensorflow/python/training/quantize_training.i
@@ -55,6 +55,13 @@ PyObject* DoQuantizeTrainingOnGraphDefHelper(
 
 
 %insert("python") %{
+from tensorflow.python.util import deprecation
+from tensorflow.python.util.tf_export import tf_export
+
+@deprecation.deprecated(
+    None,
+    "GraphDef quantized training rewriter is deprecated in the long term")
+@tf_export(v1=["train.do_quantize_training_on_graphdef"])
 def do_quantize_training_on_graphdef(input_graph, num_bits):
   """A general quantization scheme is being developed in `tf.contrib.quantize`.
 
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index b21dabbde7..cb6da5088b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -264,10 +264,6 @@ tf_module {
     name: "create_global_step"
     argspec: "args=[\'graph\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "do_quantize_training_on_graphdef"
-    argspec: "args=[\'input_graph\', \'num_bits\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "exponential_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'decay_rate\', \'staircase\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-- 
GitLab


From c1e050cc75c6ced7b68a2349a012b2e5a3d04538 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 24 Sep 2018 07:58:00 -0700
Subject: [PATCH 0591/1357] Rename source_map to create_source_map. Reorganize
 the tests to be clearer about the expected functionality.

PiperOrigin-RevId: 214266947
---
 tensorflow/python/autograph/pyct/compiler.py  |  2 +-
 .../python/autograph/pyct/origin_info.py      |  2 +-
 .../python/autograph/pyct/origin_info_test.py | 59 ++++++++-----------
 3 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/compiler.py b/tensorflow/python/autograph/pyct/compiler.py
index 9e1b6bdbe8..37f3e72f6e 100644
--- a/tensorflow/python/autograph/pyct/compiler.py
+++ b/tensorflow/python/autograph/pyct/compiler.py
@@ -108,7 +108,7 @@ def ast_to_object(nodes,
       indices = (-1,)
 
     if include_source_map:
-      source_map = origin_info.source_map(nodes, source, f.name, indices)
+      source_map = origin_info.create_source_map(nodes, source, f.name, indices)
 
   # TODO(mdan): Try flush() and delete=False instead.
   if delete_on_exit:
diff --git a/tensorflow/python/autograph/pyct/origin_info.py b/tensorflow/python/autograph/pyct/origin_info.py
index 4c7c4165ef..102bd42c91 100644
--- a/tensorflow/python/autograph/pyct/origin_info.py
+++ b/tensorflow/python/autograph/pyct/origin_info.py
@@ -75,7 +75,7 @@ class OriginInfo(
 
 
 # TODO(mdan): This source map should be a class - easier to refer to.
-def source_map(nodes, code, filename, indices_in_code):
+def create_source_map(nodes, code, filename, indices_in_code):
   """Creates a source map between an annotated AST and the code it compiles to.
 
   Args:
diff --git a/tensorflow/python/autograph/pyct/origin_info_test.py b/tensorflow/python/autograph/pyct/origin_info_test.py
index 6b9c30dbd0..3b1d5f2040 100644
--- a/tensorflow/python/autograph/pyct/origin_info_test.py
+++ b/tensorflow/python/autograph/pyct/origin_info_test.py
@@ -27,49 +27,41 @@ from tensorflow.python.platform import test
 
 class OriginInfoTest(test.TestCase):
 
-  def test_source_map(self):
+  def test_create_source_map(self):
 
     def test_fn(x):
-      if x > 0:
-        x += 1
-      return x
-
-    node, source = parser.parse_entity(test_fn)
+      return x + 1
+
+    node, _ = parser.parse_entity(test_fn)
+    fake_origin = origin_info.OriginInfo(
+        loc=origin_info.Location('fake_filename', 3, 7),
+        function_name='fake_function_name',
+        source_code_line='fake source line',
+        comment=None)
     fn_node = node.body[0]
-    origin_info.resolve(fn_node, source)
-
-    # Insert a traced line.
-    new_node = parser.parse_str('x = abs(x)').body[0]
-    anno.copyanno(fn_node.body[0], new_node, anno.Basic.ORIGIN)
-    fn_node.body.insert(0, new_node)
+    anno.setanno(fn_node.body[0], anno.Basic.ORIGIN, fake_origin)
+    converted_code = compiler.ast_to_source(fn_node)
 
-    # Insert an untraced line.
-    fn_node.body.insert(0, parser.parse_str('x = 0').body[0])
+    source_map = origin_info.create_source_map(
+        fn_node, converted_code, 'test_filename', [0])
 
-    modified_source = compiler.ast_to_source(fn_node)
+    loc = origin_info.LineLocation('test_filename', 2)
+    self.assertIn(loc, source_map)
+    self.assertIs(source_map[loc], fake_origin)
 
-    source_map = origin_info.source_map(fn_node, modified_source,
-                                        'test_filename', [0])
+  def test_source_map_no_origin(self):
 
-    loc = origin_info.LineLocation('test_filename', 1)
-    origin = source_map[loc]
-    self.assertEqual(origin.source_code_line, 'def test_fn(x):')
-    self.assertEqual(origin.loc.lineno, 1)
+    def test_fn(x):
+      return x + 1
 
-    # The untraced line, inserted second.
-    loc = origin_info.LineLocation('test_filename', 2)
-    self.assertFalse(loc in source_map)
+    node, _ = parser.parse_entity(test_fn)
+    fn_node = node.body[0]
+    converted_code = compiler.ast_to_source(fn_node)
 
-    # The traced line, inserted first.
-    loc = origin_info.LineLocation('test_filename', 3)
-    origin = source_map[loc]
-    self.assertEqual(origin.source_code_line, '  if x > 0:')
-    self.assertEqual(origin.loc.lineno, 2)
+    source_map = origin_info.create_source_map(
+        fn_node, converted_code, 'test_filename', [0])
 
-    loc = origin_info.LineLocation('test_filename', 4)
-    origin = source_map[loc]
-    self.assertEqual(origin.source_code_line, '  if x > 0:')
-    self.assertEqual(origin.loc.lineno, 2)
+    self.assertEqual(len(source_map), 0)
 
   def test_resolve(self):
 
@@ -79,6 +71,7 @@ class OriginInfoTest(test.TestCase):
 
     node, source = parser.parse_entity(test_fn)
     fn_node = node.body[0]
+
     origin_info.resolve(fn_node, source)
 
     origin = anno.getanno(fn_node, anno.Basic.ORIGIN)
-- 
GitLab


From cb98ceba9cff8c10ee3c7e89dc8925c88b28118e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 09:07:57 -0700
Subject: [PATCH 0592/1357] Turn on PinToHostOptimizer by default.

PiperOrigin-RevId: 214275960
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++--
 tensorflow/core/protobuf/rewriter_config.proto        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 3992b45c64..c59645e5f2 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -135,7 +135,7 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
-  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
+  if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
@@ -483,7 +483,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
-         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
+         cfg.pin_to_host_optimization() != RewriterConfig::OFF ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 0e780eacc9..bb8f88336d 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -75,7 +75,7 @@ message RewriterConfig {
   // Try to allocate some independent Op outputs contiguously in order to
   // merge or eliminate downstream Ops (off by default).
   Toggle scoped_allocator_optimization = 15;
-  // Force small ops onto the CPU (default is OFF).
+  // Force small ops onto the CPU (default is ON).
   Toggle pin_to_host_optimization = 18;
 
   // Controls how many times we run the optimizers in meta optimizer (default
-- 
GitLab


From e2ce9787d9927e4a6574e6ac4606a47712320170 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 24 Sep 2018 09:21:45 -0700
Subject: [PATCH 0593/1357] Remove unused compatibility code in Softmax
 implementation now that the forward compatibility window has expired.

PiperOrigin-RevId: 214277870
---
 .../python/kernel_tests/softmax_op_test.py    | 21 +++++-------
 tensorflow/python/ops/nn_ops.py               | 34 +++----------------
 2 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py
index e53347c4bc..89f4697e5c 100644
--- a/tensorflow/python/kernel_tests/softmax_op_test.py
+++ b/tensorflow/python/kernel_tests/softmax_op_test.py
@@ -22,7 +22,6 @@ import unittest
 import numpy as np
 
 
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.ops import array_ops
@@ -163,10 +162,9 @@ class SoftmaxTest(test.TestCase):
     self._testOverflow(use_gpu=False)
 
   def test1DTensorAsInputNoReshape(self):
-    with compat.forward_compatibility_horizon(2018, 8, 27):
-      self._testSoftmax(
-          np.array([3., 2., 3., 9.]).astype(np.float64), use_gpu=False)
-      self._testOverflow(use_gpu=False)
+    self._testSoftmax(
+        np.array([3., 2., 3., 9.]).astype(np.float64), use_gpu=False)
+    self._testOverflow(use_gpu=False)
 
   def test3DTensorAsInput(self):
     self._testSoftmax(
@@ -177,13 +175,12 @@ class SoftmaxTest(test.TestCase):
     self._testOverflow(use_gpu=False)
 
   def test3DTensorAsInputNoReshape(self):
-    with compat.forward_compatibility_horizon(2018, 8, 27):
-      self._testSoftmax(
-          np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
-                    [[2., 3., 4., 5.], [6., 7., 8., 9.]],
-                    [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32),
-          use_gpu=False)
-      self._testOverflow(use_gpu=False)
+    self._testSoftmax(
+        np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
+                  [[2., 3., 4., 5.], [6., 7., 8., 9.]],
+                  [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32),
+        use_gpu=False)
+    self._testOverflow(use_gpu=False)
 
   def testAlongFirstDimension(self):
     self._testSoftmax(
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 2526e6fee2..9ef177e97b 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -22,7 +22,6 @@ import numbers
 
 import numpy as np
 
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
@@ -1670,47 +1669,24 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   shape = logits.get_shape()
   is_last_dim = (dim is -1) or (dim == shape.ndims - 1)
 
-  # TODO(phawkins): remove after 2018/8/27 and simplify this code.
-  softmax_accepts_r1_or_greater = compat.forward_compatible(2018, 8, 27)
-  reshape_required = (not softmax_accepts_r1_or_greater) and shape.ndims != 2
   if is_last_dim:
-    if reshape_required:
-      # If dim is the last dimension, simply reshape the logits to a matrix and
-      # apply the internal softmax.
-      input_shape = array_ops.shape(logits)
-      logits = _flatten_outer_dims(logits)
-      output = compute_op(logits)
-      output = array_ops.reshape(output, input_shape, name=name)
-      return output
     return compute_op(logits, name=name)
 
-  # If dim is not the last dimension, we have to do a reshape and transpose so
-  # that we can still perform softmax on its last dimension.
+  # If dim is not the last dimension, we have to do a transpose so that we can
+  # still perform softmax on its last dimension.
 
   # Swap logits' dimension of dim and its last dimension.
   input_rank = array_ops.rank(logits)
   dim_axis = dim % shape.ndims
   logits = _swap_axis(logits, dim_axis, math_ops.subtract(input_rank, 1))
-  shape_after_swap = array_ops.shape(logits)
 
-  if reshape_required:
-    # Reshape logits into a matrix.
-    logits = _flatten_outer_dims(logits)
-
-    # Do the actual softmax on its last dimension.
-    output = compute_op(logits)
-
-    # Transform back the output tensor.
-    output = array_ops.reshape(output, shape_after_swap)
-  else:
-    # Do the actual softmax on its last dimension.
-    output = compute_op(logits)
+  # Do the actual softmax on its last dimension.
+  output = compute_op(logits)
 
   output = _swap_axis(
       output, dim_axis, math_ops.subtract(input_rank, 1), name=name)
 
-  # Make shape inference work since reshape and transpose may erase its static
-  # shape.
+  # Make shape inference work since transpose may erase its static shape.
   output.set_shape(shape)
 
   return output
-- 
GitLab


From 4cf68d9b869c0cb1efe9865e764c997649b7fbc0 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Mon, 24 Sep 2018 09:35:58 -0700
Subject: [PATCH 0594/1357] Check for too large jpeg images before
 decompressing.

PiperOrigin-RevId: 214279868
---
 tensorflow/core/lib/jpeg/jpeg_mem.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc
index 50ed8bdb3b..f7a359eb5b 100644
--- a/tensorflow/core/lib/jpeg/jpeg_mem.cc
+++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc
@@ -152,7 +152,9 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) {
   cinfo.scale_denom = ratio;
   cinfo.dct_method = flags.dct_method;
 
-  jpeg_start_decompress(&cinfo);
+  // Determine the output image size before attempting decompress to prevent
+  // OOM'ing doing the decompress
+  jpeg_calc_output_dimensions(&cinfo);
 
   int64 total_size = static_cast<int64>(cinfo.output_height) *
                      static_cast<int64>(cinfo.output_width);
@@ -170,6 +172,8 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) {
     return nullptr;
   }
 
+  jpeg_start_decompress(&cinfo);
+
   JDIMENSION target_output_width = cinfo.output_width;
   JDIMENSION target_output_height = cinfo.output_height;
   JDIMENSION skipped_scanlines = 0;
-- 
GitLab


From b1ca5f9d1f2def557ec2cea6c1ebccdfb5c6066a Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 24 Sep 2018 10:11:38 -0700
Subject: [PATCH 0595/1357] Tweak PrintInterpreterState() output

Output names for custom ops and properly insert newlines for
null IntVectors.

PiperOrigin-RevId: 214285696
---
 tensorflow/contrib/lite/optional_debug_tools.cc | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/optional_debug_tools.cc b/tensorflow/contrib/lite/optional_debug_tools.cc
index f1f025f777..64ba2d8baa 100644
--- a/tensorflow/contrib/lite/optional_debug_tools.cc
+++ b/tensorflow/contrib/lite/optional_debug_tools.cc
@@ -25,7 +25,7 @@ void PrintIntVector(const std::vector<int>& v) {
 
 void PrintTfLiteIntVector(const TfLiteIntArray* v) {
   if (!v) {
-    printf(" (null)");
+    printf(" (null)\n");
     return;
   }
   for (int k = 0; k < v->size; k++) {
@@ -99,8 +99,12 @@ void PrintInterpreterState(Interpreter* interpreter) {
         interpreter->node_and_registration(node_index);
     const TfLiteNode& node = node_and_reg->first;
     const TfLiteRegistration& reg = node_and_reg->second;
-    printf("Node %3d Operator Builtin Code %3d\n", node_index,
-           reg.builtin_code);
+    if (reg.custom_name != nullptr) {
+      printf("Node %3d Operator Custom Name %s\n", node_index, reg.custom_name);
+    } else {
+      printf("Node %3d Operator Builtin Code %3d\n", node_index,
+             reg.builtin_code);
+    }
     printf("  Inputs:");
     PrintTfLiteIntVector(node.inputs);
     printf("  Outputs:");
-- 
GitLab


From 77d56a08826826db3350968f19070434fa922995 Mon Sep 17 00:00:00 2001
From: Philip Pham <phillypham@google.com>
Date: Mon, 24 Sep 2018 10:13:17 -0700
Subject: [PATCH 0596/1357] Implement required properties for TPU Strategy

These properties are necessary for the strategy to work with
`tf.estimator.train_and_evaluate`.

PiperOrigin-RevId: 214285957
---
 .../contrib/distribute/python/tpu_strategy.py   | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 6ba83976fc..ba2cc2e806 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -307,6 +307,22 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
   def num_towers_per_host(self):
     return self._tpu_metadata.num_of_cores_per_host
 
+  @property
+  def between_graph(self):
+    return False
+
+  @property
+  def should_init(self):
+    return True
+
+  @property
+  def should_checkpoint(self):
+    return True
+
+  @property
+  def should_save_summary(self):
+    return True
+
   def get_host_cpu_device(self, host_id):
     if self._tpu_cluster_resolver.get_master() in ('', 'local'):
       return '/replica:0/task:0/device:CPU:0'
@@ -324,4 +340,3 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       cluster_spec = self._tpu_cluster_resolver.cluster_spec()
       if cluster_spec:
         session_config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
-
-- 
GitLab


From fbd6ea27dbdc7eb00db60e643d3b3ee5f1985c26 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 10:18:04 -0700
Subject: [PATCH 0597/1357] Replace self.test_session(graph=<an object not
 None>) with self.session(graph=...) as it's the same semantic.

PiperOrigin-RevId: 214286845
---
 .../saved_model/keras_saved_model_test.py     |  2 +-
 .../tensorrt/python/trt_convert_test.py       |  4 +--
 .../python/kernel_tests/basic_gpu_test.py     |  2 +-
 .../python/kernel_tests/cond_v2_test.py       | 29 ++++++++--------
 .../kernel_tests/control_flow_ops_py_test.py  |  2 +-
 .../kernel_tests/depthwise_conv_op_test.py    |  4 +--
 .../kernel_tests/functional_ops_test.py       | 10 +++---
 .../python/kernel_tests/init_ops_test.py      | 34 +++++++++----------
 .../python/kernel_tests/numerics_test.py      |  8 ++---
 .../python/kernel_tests/reduction_ops_test.py |  6 ++--
 .../kernel_tests/reduction_ops_test_big.py    | 12 +++----
 tensorflow/python/kernel_tests/rnn_test.py    |  8 ++---
 tensorflow/python/kernel_tests/scalar_test.py |  2 +-
 .../sparse_tensors_map_ops_test.py            |  2 +-
 .../kernel_tests/summary_audio_op_test.py     |  2 +-
 .../kernel_tests/summary_image_op_test.py     |  4 +--
 .../training/warm_starting_util_test.py       |  8 ++---
 17 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py b/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py
index 12dd72a95b..060c504523 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py
+++ b/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py
@@ -269,7 +269,7 @@ class TestModelSavedModelExport(test.TestCase, parameterized.TestCase):
   def testSaveAndLoadSavedModelExport(
       self, model_builder, uses_learning_phase, optimizer, train_before_export):
     saved_model_path = self._save_model_dir()
-    with self.test_session(graph=ops.Graph()):
+    with self.session(graph=ops.Graph()):
       input_arr = np.random.random((1, 3))
       target_arr = np.random.random((1, 3))
 
diff --git a/tensorflow/contrib/tensorrt/python/trt_convert_test.py b/tensorflow/contrib/tensorrt/python/trt_convert_test.py
index 118a6680fd..f3a1ef0d47 100644
--- a/tensorflow/contrib/tensorrt/python/trt_convert_test.py
+++ b/tensorflow/contrib/tensorrt/python/trt_convert_test.py
@@ -104,7 +104,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase):
   def _GetGraphDef(self):
     """Get the graph def for testing."""
     g, var, _, _ = self._GetGraph()
-    with self.test_session(graph=g, config=self._GetConfigProto()) as sess:
+    with self.session(graph=g, config=self._GetConfigProto()) as sess:
       sess.run(var.initializer)
       graph_def = graph_util.convert_variables_to_constants(
           sess, g.as_graph_def(add_shapes=True), ["output"])
@@ -128,7 +128,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase):
         outputs={"myoutput": utils.build_tensor_info(out)},
         method_name=signature_constants.PREDICT_METHOD_NAME)
     saved_model_builder = builder.SavedModelBuilder(input_saved_model_dir)
-    with self.test_session(graph=g, config=self._GetConfigProto()) as sess:
+    with self.session(graph=g, config=self._GetConfigProto()) as sess:
       sess.run(var.initializer)
       saved_model_builder.add_meta_graph_and_variables(
           sess, [tag_constants.SERVING],
diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py
index e651fa0070..67e8618198 100644
--- a/tensorflow/python/kernel_tests/basic_gpu_test.py
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@@ -260,7 +260,7 @@ class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase):
     threads = []
     results = []
     for _ in xrange(n_threads):
-      session = self.test_session(graph=ops.Graph(), use_gpu=True)
+      session = self.session(graph=ops.Graph(), use_gpu=True)
       results.append(set())
       args = (session, results[-1])
       threads.append(threading.Thread(target=self._run_session, args=args))
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index a1efecf28a..5c0e24117f 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -41,7 +41,7 @@ class CondV2Test(test.TestCase):
   def _testCond(self, true_fn, false_fn, train_vals, feed_dict=None):
     if not feed_dict:
       feed_dict = {}
-    with self.test_session(graph=ops.get_default_graph()) as sess:
+    with self.session(graph=ops.get_default_graph()) as sess:
       pred = array_ops.placeholder(dtypes.bool, name="pred")
 
       expected = control_flow_ops.cond(pred, true_fn, false_fn, name="expected")
@@ -382,7 +382,7 @@ class CondV2Test(test.TestCase):
 
     with ops.Graph().as_default():
       grads, pred_outer, pred_inner = build_graph()
-      with self.test_session(graph=ops.get_default_graph()) as sess:
+      with self.session(graph=ops.get_default_graph()) as sess:
         self.assertSequenceEqual(
             sess.run(grads, {
                 pred_outer: True,
@@ -445,7 +445,7 @@ class CondV2Test(test.TestCase):
 
     with ops.Graph().as_default():
       grads, pred_outer, pred_inner = build_graph()
-      with self.test_session(graph=ops.get_default_graph()) as sess:
+      with self.session(graph=ops.get_default_graph()) as sess:
         self.assertSequenceEqual(
             sess.run(grads, {
                 pred_outer: True,
@@ -504,7 +504,7 @@ class CondV2Test(test.TestCase):
 
     with ops.Graph().as_default():
       grads, pred_outer, pred_inner = build_graph()
-      with self.test_session(graph=ops.get_default_graph()) as sess:
+      with self.session(graph=ops.get_default_graph()) as sess:
         self.assertSequenceEqual(
             sess.run(grads, {
                 pred_outer: True,
@@ -574,7 +574,7 @@ class CondV2Test(test.TestCase):
       meta_graph = saver.export_meta_graph()
 
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
+      with self.session(graph=g) as sess:
         saver.import_meta_graph(meta_graph)
         x = ops.get_collection("x")[0]
         pred = ops.get_collection("pred")[0]
@@ -598,7 +598,7 @@ class CondV2Test(test.TestCase):
 
   def testLowering(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
+      with self.session(graph=g) as sess:
         out_cond = self._createCond("cond")
 
         run_options = config_pb2.RunOptions(output_partition_graphs=True)
@@ -624,7 +624,7 @@ class CondV2Test(test.TestCase):
                          "An `If` op was found, but it should be lowered.")
 
   def testLoweringDisabledInXLA(self):
-    with self.test_session(graph=ops.Graph()) as sess:
+    with self.session(graph=ops.Graph()) as sess:
       # Build the cond_v2 in an XLA context
       xla_context = control_flow_ops.XLAControlFlowContext()
       xla_context.Enter()
@@ -661,7 +661,7 @@ class CondV2CollectionTest(test.TestCase):
   def testCollectionIntValueAccessInCond(self):
     """Read values from graph collections inside of cond_v2."""
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.session(graph=g):
         x = 2
         y = 5
         ops.add_to_collection("x", x)
@@ -677,7 +677,7 @@ class CondV2CollectionTest(test.TestCase):
   def testCollectionTensorValueAccessInCond(self):
     """Read tensors from collections inside of cond_v2 & use them."""
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.session(graph=g):
         x = constant_op.constant(2)
         y = constant_op.constant(5)
         ops.add_to_collection("x", x)
@@ -694,7 +694,7 @@ class CondV2CollectionTest(test.TestCase):
   def testCollectionIntValueWriteInCond(self):
     """Make sure Int writes to collections work inside of cond_v2."""
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.session(graph=g):
         x = constant_op.constant(2)
         y = constant_op.constant(5)
         def true_fn():
@@ -725,7 +725,7 @@ class CondV2ContainerTest(test.TestCase):
     """
     self.skipTest("b/113048653")
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.session(graph=g):
 
         v0 = variables.Variable([0])
         q0 = data_flow_ops.FIFOQueue(1, dtypes.float32)
@@ -802,7 +802,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
   def testColocateWithBeforeCond(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.session(graph=g):
 
         a = constant_op.constant([2.0], name="a")
         b = constant_op.constant([2.0], name="b")
@@ -826,7 +826,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
   def testColocateWithInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.session(graph=g):
 
         a = constant_op.constant([2.0], name="a")
         b = constant_op.constant([2.0], name="b")
@@ -873,7 +873,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
   def testDeviceBeforeCond(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g):
+      with self.session(graph=g):
+
         def fn():
           c = constant_op.constant(3.0)
           self.assertEqual("/device:CPU:0", c.op.device)
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index ebeabcfe1a..2996539004 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1783,7 +1783,7 @@ class ControlFlowTest(test.TestCase):
       else:
         self.assertFalse(gpu_dev_name in dev)
 
-    with self.test_session(graph=graph) as sess:
+    with self.session(graph=graph) as sess:
       self.assertAllClose(1024.0, sess.run(r))
 
   def testWhileGrad_ColocateGradients(self):
diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 5741f2ec64..200da772e5 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -128,7 +128,7 @@ class DepthwiseConv2DTest(test.TestCase):
     x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)]
     ops.reset_default_graph()
     graph = ops.get_default_graph()
-    with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
+    with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-2,
           dtypes.float32: 1e-8,
@@ -366,7 +366,7 @@ class DepthwiseConv2DTest(test.TestCase):
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
     ops.reset_default_graph()
     graph = ops.get_default_graph()
-    with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
+    with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-0,
           dtypes.float32: 8e-4,
diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
index e39daf1371..30d11852c7 100644
--- a/tensorflow/python/kernel_tests/functional_ops_test.py
+++ b/tensorflow/python/kernel_tests/functional_ops_test.py
@@ -735,7 +735,7 @@ class FunctionalOpsTest(test.TestCase):
         def Run(sess, n):
           return sess.run(functional_ops.While([n, 0.], Cond, Body))[1]
 
-        with self.test_session(graph=g, use_gpu=use_gpu) as sess:
+        with self.session(graph=g, use_gpu=use_gpu) as sess:
           self.assertAllEqual(Run(sess, 20.), 210.)
           self.assertAllEqual(Run(sess, 100.), 5050.)
 
@@ -765,7 +765,7 @@ class FunctionalOpsTest(test.TestCase):
             fetch = outputs[1]
           else:
             fetch = "my_while:1"
-        with self.test_session(graph=g, use_gpu=use_gpu) as sess:
+        with self.session(graph=g, use_gpu=use_gpu) as sess:
           return sess.run(fetch)
 
     self.assertAllEqual(Run(20., False), 210.)
@@ -793,7 +793,7 @@ class FunctionalOpsTest(test.TestCase):
         def BodyReturnsTooManyArgs(n, x):
           return n - 1, x + n, x
 
-        with self.test_session(graph=g, use_gpu=use_gpu):
+        with self.session(graph=g, use_gpu=use_gpu):
           with self.assertRaisesRegexp(
               errors.InvalidArgumentError,
               "Expected a single scalar.*got 2 tensors."):
@@ -818,7 +818,7 @@ class FunctionalOpsTest(test.TestCase):
         def Body(n, x):
           return n - 1, x + n
 
-        with self.test_session(graph=g, use_gpu=use_gpu) as sess:
+        with self.session(graph=g, use_gpu=use_gpu) as sess:
           n = array_ops.placeholder(dtypes.float32)
           _, result = functional_ops.While([n, 0.], Cond, Body)
           c = constant_op.constant(37.)
@@ -831,7 +831,7 @@ class FunctionalOpsTest(test.TestCase):
 
   def _tfSum(self, use_gpu, rewrite_with_while):
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g, use_gpu=use_gpu) as sess:
+      with self.session(graph=g, use_gpu=use_gpu) as sess:
 
         @function.Defun(dtypes.int32, dtypes.float32)
         def Body(n, x):
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index 79ce965242..292679e4b9 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -522,7 +522,7 @@ class LinSpaceTest(test.TestCase):
   def _LinSpace(self, start, stop, num):
     # NOTE(touts): Needs to pass a graph to get a new session each time.
     with ops.Graph().as_default() as graph:
-      with self.test_session(graph=graph, force_gpu=self.force_gpu):
+      with self.session(graph=graph, force_gpu=self.force_gpu):
         tf_ans = math_ops.linspace(start, stop, num, name="linspace")
         self.assertEqual([num], tf_ans.get_shape())
         return tf_ans.eval()
@@ -606,7 +606,7 @@ class OrthogonalInitializerTest(test.TestCase):
 
   def testInvalidShape(self):
     init1 = init_ops.orthogonal_initializer()
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       self.assertRaises(ValueError, init1, shape=[5])
 
   def testGain(self):
@@ -614,7 +614,7 @@ class OrthogonalInitializerTest(test.TestCase):
     for dtype in [dtypes.float32, dtypes.float64]:
       init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype)
       init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
-      with self.test_session(graph=ops.Graph(), use_gpu=True):
+      with self.session(graph=ops.Graph(), use_gpu=True):
         t1 = init1(shape).eval()
         t2 = init2(shape).eval()
       return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
@@ -624,7 +624,7 @@ class OrthogonalInitializerTest(test.TestCase):
       for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
         init = init_ops.orthogonal_initializer(dtype=dtype)
         tol = 1e-5 if dtype == dtypes.float32 else 1e-12
-        with self.test_session(graph=ops.Graph(), use_gpu=True):
+        with self.session(graph=ops.Graph(), use_gpu=True):
           # Check the shape
           t = init(shape).eval()
           self.assertAllEqual(shape, t.shape)
@@ -663,7 +663,7 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase):
 
   def testInvalidShape(self):
     init1 = init_ops.convolutional_delta_orthogonal()
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       self.assertRaises(ValueError, init1, shape=[3, 3, 6, 5])
 
   def testGain(self):
@@ -672,7 +672,7 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase):
       init1 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype)
       init2 = init_ops.convolutional_delta_orthogonal(gain=3.14,
                                                       seed=1, dtype=dtype)
-      with self.test_session(graph=ops.Graph(), use_gpu=True):
+      with self.session(graph=ops.Graph(), use_gpu=True):
         t1 = init1(shape).eval()
         t2 = init2(shape).eval()
       return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
@@ -763,7 +763,7 @@ class ConvolutionOrthogonal1dInitializerTest(test.TestCase):
 
   def testInvalidShape(self):
     init1 = init_ops.convolutional_orthogonal_1d()
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       self.assertRaises(ValueError, init1, shape=[3, 6, 5])
 
   def testGain(self):
@@ -772,7 +772,7 @@ class ConvolutionOrthogonal1dInitializerTest(test.TestCase):
       init1 = init_ops.convolutional_orthogonal_1d(seed=1, dtype=dtype)
       init2 = init_ops.convolutional_orthogonal_1d(gain=3.14,
                                                    seed=1, dtype=dtype)
-      with self.test_session(graph=ops.Graph(), use_gpu=True):
+      with self.session(graph=ops.Graph(), use_gpu=True):
         t1 = init1(shape).eval()
         t2 = init2(shape).eval()
       return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
@@ -877,7 +877,7 @@ class ConvolutionOrthogonal2dInitializerTest(test.TestCase):
 
   def testInvalidShape(self):
     init1 = init_ops.convolutional_orthogonal_2d()
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       self.assertRaises(ValueError, init1, shape=[3, 3, 6, 5])
 
   def testGain(self):
@@ -886,7 +886,7 @@ class ConvolutionOrthogonal2dInitializerTest(test.TestCase):
       init1 = init_ops.convolutional_orthogonal_2d(seed=1, dtype=dtype)
       init2 = init_ops.convolutional_orthogonal_2d(gain=3.14,
                                                    seed=1, dtype=dtype)
-      with self.test_session(graph=ops.Graph(), use_gpu=True):
+      with self.session(graph=ops.Graph(), use_gpu=True):
         t1 = init1(shape).eval()
         t2 = init2(shape).eval()
       return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
@@ -972,7 +972,7 @@ class ConvolutionOrthogonal3dInitializerTest(test.TestCase):
 
   def testInvalidShape(self):
     init1 = init_ops.convolutional_orthogonal_3d()
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       self.assertRaises(ValueError, init1, shape=[3, 3, 3, 6, 5])
 
   def testGain(self):
@@ -981,7 +981,7 @@ class ConvolutionOrthogonal3dInitializerTest(test.TestCase):
       init1 = init_ops.convolutional_orthogonal_3d(seed=1, dtype=dtype)
       init2 = init_ops.convolutional_orthogonal_3d(gain=3.14,
                                                    seed=1, dtype=dtype)
-      with self.test_session(graph=ops.Graph(), use_gpu=True):
+      with self.session(graph=ops.Graph(), use_gpu=True):
         t1 = init1(shape).eval()
         t2 = init2(shape).eval()
       return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
@@ -1080,7 +1080,7 @@ class IdentityInitializerTest(test.TestCase):
 
   def testInvalidShape(self):
     init = init_ops.identity_initializer()
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       self.assertRaises(ValueError, init, shape=[5, 7, 7])
       self.assertRaises(ValueError, init, shape=[5])
       self.assertRaises(ValueError, init, shape=[])
@@ -1088,7 +1088,7 @@ class IdentityInitializerTest(test.TestCase):
   def testNonSquare(self):
     init = init_ops.identity_initializer()
     shape = (10, 5)
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       self.assertAllClose(init(shape).eval(), np.eye(*shape))
 
   def testGain(self):
@@ -1096,16 +1096,16 @@ class IdentityInitializerTest(test.TestCase):
     for dtype in [dtypes.float32, dtypes.float64]:
       init_default = init_ops.identity_initializer(dtype=dtype)
       init_custom = init_ops.identity_initializer(gain=0.9, dtype=dtype)
-      with self.test_session(graph=ops.Graph(), use_gpu=True):
+      with self.session(graph=ops.Graph(), use_gpu=True):
         self.assertAllClose(init_default(shape).eval(), np.eye(*shape))
-      with self.test_session(graph=ops.Graph(), use_gpu=True):
+      with self.session(graph=ops.Graph(), use_gpu=True):
         self.assertAllClose(init_custom(shape).eval(), np.eye(*shape) * 0.9)
 
   def testPartitions(self):
     shape = (10, 10)
     init = init_ops.identity_initializer()
     partitioner = partitioned_variables.variable_axis_size_partitioner(1)
-    with self.test_session(graph=ops.Graph(), use_gpu=True):
+    with self.session(graph=ops.Graph(), use_gpu=True):
       with variable_scope.variable_scope(
           "foo", partitioner=partitioner, initializer=init):
         v = array_ops.identity(variable_scope.get_variable("bar", shape=shape))
diff --git a/tensorflow/python/kernel_tests/numerics_test.py b/tensorflow/python/kernel_tests/numerics_test.py
index 89ada8430e..6cc70f7c89 100644
--- a/tensorflow/python/kernel_tests/numerics_test.py
+++ b/tensorflow/python/kernel_tests/numerics_test.py
@@ -66,7 +66,7 @@ class VerifyTensorAllFiniteTest(test.TestCase):
 class NumericsTest(test.TestCase):
 
   def testInf(self):
-    with self.test_session(graph=ops.Graph()):
+    with self.session(graph=ops.Graph()):
       t1 = constant_op.constant(1.0)
       t2 = constant_op.constant(0.0)
       a = math_ops.div(t1, t2)
@@ -76,7 +76,7 @@ class NumericsTest(test.TestCase):
         a.eval()
 
   def testNaN(self):
-    with self.test_session(graph=ops.Graph()):
+    with self.session(graph=ops.Graph()):
       t1 = constant_op.constant(0.0)
       t2 = constant_op.constant(0.0)
       a = math_ops.div(t1, t2)
@@ -86,7 +86,7 @@ class NumericsTest(test.TestCase):
         a.eval()
 
   def testBoth(self):
-    with self.test_session(graph=ops.Graph()):
+    with self.session(graph=ops.Graph()):
       t1 = constant_op.constant([1.0, 0.0])
       t2 = constant_op.constant([0.0, 0.0])
       a = math_ops.div(t1, t2)
@@ -96,7 +96,7 @@ class NumericsTest(test.TestCase):
         a.eval()
 
   def testPassThrough(self):
-    with self.test_session(graph=ops.Graph()):
+    with self.session(graph=ops.Graph()):
       t1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
       checked = array_ops.check_numerics(t1, message="pass through test")
       value = checked.eval()
diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index 496a452a03..248036a82a 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@@ -212,7 +212,7 @@ class SumReductionTest(BaseReductionTest):
 
     arr = np.ones([68000], dtype=np.float16)
 
-    with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+    with self.session(graph=ops.Graph(), use_gpu=True) as sess:
       tf_arr = variables.Variable(arr)
       variables.global_variables_initializer().run()
       tf_mean = math_ops.reduce_mean(tf_arr, 0, False)
@@ -235,7 +235,7 @@ class SumReductionTest(BaseReductionTest):
       col_sum = np.sum(arr, axis=0)
       row_sum = np.sum(arr, axis=1)
 
-      with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+      with self.session(graph=ops.Graph(), use_gpu=True) as sess:
         tf_row_sum = self._tf_reduce(arr, 1, False)
         tf_col_sum = self._tf_reduce(arr, 0, False)
         tf_out_row, tf_out_col = sess.run([tf_row_sum, tf_col_sum])
@@ -249,7 +249,7 @@ class SumReductionTest(BaseReductionTest):
           sum_y = np.sum(arr, axis=1)
           sum_xz = np.sum(arr, axis=(0, 2))
 
-          with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+          with self.session(graph=ops.Graph(), use_gpu=True) as sess:
             tf_sum_xz = self._tf_reduce(arr, [0, 2], False)
             tf_sum_y = self._tf_reduce(arr, 1, False)
             tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
diff --git a/tensorflow/python/kernel_tests/reduction_ops_test_big.py b/tensorflow/python/kernel_tests/reduction_ops_test_big.py
index d70360775a..1e8524f72a 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test_big.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test_big.py
@@ -63,7 +63,7 @@ class BigReductionTest(BaseReductionTest):
         row_sum = np.ones([size_x], dtype=np.float32) * size_y
         full_sum = np.ones([], dtype=np.float32) * size_x * size_y
 
-        with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+        with self.session(graph=ops.Graph(), use_gpu=True) as sess:
           tf_row_sum = self._tf_reduce_sum(arr, 1, False)
           tf_col_sum = self._tf_reduce_sum(arr, 0, False)
           tf_full_sum = self._tf_reduce_sum(arr, [0, 1], False)
@@ -81,7 +81,7 @@ class BigReductionTest(BaseReductionTest):
           sum_y = np.ones([size_x, size_z], dtype=np.float32)
           sum_xz = np.ones([size_y], dtype=np.float32)
 
-          with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+          with self.session(graph=ops.Graph(), use_gpu=True) as sess:
             tf_sum_xz = self._tf_reduce_mean(arr, [0, 2], False)
             tf_sum_y = self._tf_reduce_mean(arr, 1, False)
             tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
@@ -106,7 +106,7 @@ class BigReductionTest(BaseReductionTest):
         row_max = np.max(arr, axis=1)
         full_max = np.max(col_max)
 
-        with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+        with self.session(graph=ops.Graph(), use_gpu=True) as sess:
           tf_row_max = self._tf_reduce_max(arr, 1, False)
           tf_col_max = self._tf_reduce_max(arr, 0, False)
           tf_full_max = self._tf_reduce_max(arr, [0, 1], False)
@@ -125,7 +125,7 @@ class BigReductionTest(BaseReductionTest):
           sum_y = np.max(arr, axis=1)
           sum_xz = np.max(arr, axis=(0, 2))
 
-          with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+          with self.session(graph=ops.Graph(), use_gpu=True) as sess:
             tf_sum_xz = self._tf_reduce_max(arr, [0, 2], False)
             tf_sum_y = self._tf_reduce_max(arr, 1, False)
             tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
@@ -149,7 +149,7 @@ class BigReductionTest(BaseReductionTest):
         row_sum = np.ones([size_x], dtype=np.bool)
         full_sum = np.ones([1], dtype=np.bool).reshape([])
 
-        with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+        with self.session(graph=ops.Graph(), use_gpu=True) as sess:
           tf_row_sum = self._tf_reduce_all(arr, 1, False)
           tf_col_sum = self._tf_reduce_all(arr, 0, False)
           tf_full_sum = self._tf_reduce_all(arr, [0, 1], False)
@@ -167,7 +167,7 @@ class BigReductionTest(BaseReductionTest):
           sum_y = np.ones([size_x, size_z], dtype=np.bool)
           sum_xz = np.ones([size_y], dtype=np.bool)
 
-          with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
+          with self.session(graph=ops.Graph(), use_gpu=True) as sess:
             tf_sum_xz = self._tf_reduce_all(arr, [0, 2], False)
             tf_sum_y = self._tf_reduce_all(arr, 1, False)
             tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index a28cdc3b26..05ad9f6336 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -516,7 +516,7 @@ class RNNTest(test.TestCase):
     fix_weights_generator.build((None, input_shape))
     weights = fix_weights_generator.get_weights()
 
-    with self.test_session(graph=ops_lib.Graph()) as sess:
+    with self.session(graph=ops_lib.Graph()) as sess:
       inputs = array_ops.placeholder(
           dtypes.float32, shape=(None, timestep, input_shape))
       cell = keras.layers.SimpleRNNCell(output_shape)
@@ -524,7 +524,7 @@ class RNNTest(test.TestCase):
           cell, inputs, dtype=dtypes.float32)
       cell.set_weights(weights)
       [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train})
-    with self.test_session(graph=ops_lib.Graph()) as sess:
+    with self.session(graph=ops_lib.Graph()) as sess:
       k_input = keras.Input(shape=(timestep, input_shape),
                             dtype=dtypes.float32)
       cell = keras.layers.SimpleRNNCell(output_shape)
@@ -536,7 +536,7 @@ class RNNTest(test.TestCase):
     self.assertAllClose(tf_state, k_state)
 
   def testBasicLSTMCellInterchangeWithLSTMCell(self):
-    with self.test_session(graph=ops_lib.Graph()) as sess:
+    with self.session(graph=ops_lib.Graph()) as sess:
       basic_cell = rnn_cell_impl.BasicLSTMCell(1)
       basic_cell(array_ops.ones([1, 1]),
                  state=basic_cell.get_initial_state(inputs=None,
@@ -548,7 +548,7 @@ class RNNTest(test.TestCase):
       prefix = os.path.join(self.get_temp_dir(), "ckpt")
       save_path = save.save(sess, prefix)
 
-    with self.test_session(graph=ops_lib.Graph()) as sess:
+    with self.session(graph=ops_lib.Graph()) as sess:
       lstm_cell = rnn_cell_impl.LSTMCell(1, name="basic_lstm_cell")
       lstm_cell(array_ops.ones([1, 1]),
                 state=lstm_cell.get_initial_state(inputs=None,
diff --git a/tensorflow/python/kernel_tests/scalar_test.py b/tensorflow/python/kernel_tests/scalar_test.py
index 287919bab7..d15f2c7b50 100644
--- a/tensorflow/python/kernel_tests/scalar_test.py
+++ b/tensorflow/python/kernel_tests/scalar_test.py
@@ -53,7 +53,7 @@ class ScalarTest(test.TestCase):
     for version in strict + lenient:
       with ops.Graph().as_default() as g:
         test_util.set_producer_version(g, version)
-        with self.test_session(graph=g) as sess:
+        with self.session(graph=g) as sess:
           feed = {}
           xs = placeholders(args, feed)
           x = op(*xs)
diff --git a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
index 96793d5af3..31e84341ae 100644
--- a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
@@ -76,7 +76,7 @@ class SparseTensorsMapTest(test.TestCase):
     return sparse_tensor_lib.SparseTensorValue(ind, val, shape)
 
   def testAddTakeMany(self):
-    with self.test_session(graph=ops.Graph(), use_gpu=False) as sess:
+    with self.session(graph=ops.Graph(), use_gpu=False) as sess:
       sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
       sp_input1 = self._SparseTensorValue_3x4(np.arange(6))
       handle0 = add_sparse_to_tensors_map(sp_input0, shared_name="a")
diff --git a/tensorflow/python/kernel_tests/summary_audio_op_test.py b/tensorflow/python/kernel_tests/summary_audio_op_test.py
index eaae671192..e59a2ceef7 100644
--- a/tensorflow/python/kernel_tests/summary_audio_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_audio_op_test.py
@@ -50,7 +50,7 @@ class SummaryAudioOpTest(test.TestCase):
   def testAudioSummary(self):
     np.random.seed(7)
     for channels in (1, 2, 5, 8):
-      with self.test_session(graph=ops.Graph()) as sess:
+      with self.session(graph=ops.Graph()) as sess:
         num_frames = 7
         shape = (4, num_frames, channels)
         # Generate random audio in the range [-1.0, 1.0).
diff --git a/tensorflow/python/kernel_tests/summary_image_op_test.py b/tensorflow/python/kernel_tests/summary_image_op_test.py
index 4718827e88..b650e10404 100644
--- a/tensorflow/python/kernel_tests/summary_image_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_image_op_test.py
@@ -52,7 +52,7 @@ class SummaryImageOpTest(test.TestCase):
   def testImageSummary(self):
     for depth in (1, 3, 4):
       for positive in False, True:
-        with self.test_session(graph=ops.Graph()) as sess:
+        with self.session(graph=ops.Graph()) as sess:
           shape = (4, 5, 7) + (depth,)
           bad_color = [255, 0, 0, 255][:depth]
           # Build a mostly random image with one nan
@@ -87,7 +87,7 @@ class SummaryImageOpTest(test.TestCase):
   def testImageSummaryUint8(self):
     np.random.seed(7)
     for depth in (1, 3, 4):
-      with self.test_session(graph=ops.Graph()) as sess:
+      with self.session(graph=ops.Graph()) as sess:
         shape = (4, 5, 7) + (depth,)
 
         # Build a random uint8 image
diff --git a/tensorflow/python/training/warm_starting_util_test.py b/tensorflow/python/training/warm_starting_util_test.py
index 6c860cd452..3eddf79e34 100644
--- a/tensorflow/python/training/warm_starting_util_test.py
+++ b/tensorflow/python/training/warm_starting_util_test.py
@@ -203,7 +203,7 @@ class WarmStartingUtilTest(test.TestCase):
                                        "new_vocab")
     # New session and new graph.
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
+      with self.session(graph=g) as sess:
         fruit_output_layer = variable_scope.get_variable(
             "fruit_output_layer",
             initializer=[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.],
@@ -279,7 +279,7 @@ class WarmStartingUtilTest(test.TestCase):
                                        "new_vocab")
     # New session and new graph.
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
+      with self.session(graph=g) as sess:
         fruit_output_layer = variable_scope.get_variable(
             "fruit_output_layer",
             initializer=[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.],
@@ -337,7 +337,7 @@ class WarmStartingUtilTest(test.TestCase):
                                        "new_vocab")
     # New session and new graph.
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
+      with self.session(graph=g) as sess:
         fruit_output_layer = variable_scope.get_variable(
             "fruit_output_layer",
             shape=[4, 3],
@@ -403,7 +403,7 @@ class WarmStartingUtilTest(test.TestCase):
                                        "new_vocab")
     # New session and new graph.
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
+      with self.session(graph=g) as sess:
         fruit_output_layer = variable_scope.get_variable(
             "fruit_output_layer",
             shape=[4, 3],
-- 
GitLab


From ac84ad4b4a83003e1dbfebc505ca994c8126e625 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 24 Sep 2018 10:18:18 -0700
Subject: [PATCH 0598/1357] Clear the stale README and add reference to the
 updated file.

PiperOrigin-RevId: 214286888
---
 tensorflow/contrib/autograph/README.md | 145 +------------------------
 1 file changed, 2 insertions(+), 143 deletions(-)

diff --git a/tensorflow/contrib/autograph/README.md b/tensorflow/contrib/autograph/README.md
index 6ea2db72c4..8c277b59e8 100644
--- a/tensorflow/contrib/autograph/README.md
+++ b/tensorflow/contrib/autograph/README.md
@@ -4,147 +4,6 @@
 [deprecated](https://github.com/tensorflow/community/pull/18), AutoGraph is
 moving into TensorFlow core.
 
-The new code location is `tensorflow/python/autograph`.
+The new code location is `tensorflow/python/autograph`. Please refer to the
+README.md file in that directory.
 **
-
-IMPORTANT: AutoGraph is beta software, and under active development. Expect rough edges and bugs, but if you try it, we appreciate early feedback! We'd also love contributions ([please see our contributing guidelines](CONTRIBUTING.md) and our [style guide](STYLE_GUIDE.md)).
-
-AutoGraph is a Python to TensorFlow compiler.
-
-With AutoGraph, you can write [Eager style](https://www.tensorflow.org/guide/eager) code in a concise manner, and run it as a TensorFlow graph. AutoGraph uses source code transformation and partial evaluation to generate Python code that builds an equivalent TensorFlow subgraph. The result is code that behaves like ops and can be freely combined with other TensorFlow ops.  [Please see this file for which parts of the Python language we currently support](LIMITATIONS.md).
-
-For example, this Python function:
-
-```
-def f(x):
-  if x < 0:
-    x = -x
-  return x
-```
-
-would be converted to this:
-
-```
-def graph_mode_f(x):
-  with tf.name_scope('f'):
-
-    def if_true():
-      with tf.name_scope('if_true'):
-        x_1, = x,
-        x_1 = tf.negative(x_1)
-        return x_1,
-
-    def if_false():
-      with tf.name_scope('if_false'):
-        x_1, = x,
-        return x_1,
-    x = ag__.utils.run_cond(tf.greater(x, 0), if_true, if_false)
-    return x
-```
-
-so you can use it like an op:
-
-```
-with tf.Graph().as_default():
-  x = tf.constant(-1.0)
-
-  converted_f = autograph.to_graph(f)
-  y = converted_f(x)
-
-  with tf.Session() as sess:
-    print(sess.run(y))
-    # Output: 1
-```
-
-# Getting started
-
-Use AutoGraph in one of the following ways, described below:
-
- 1. Annotations (simpler)
- 2. Functional API (more flexible)
-
-To get started, install the latest nightly TensorFlow build:
-
-```shell
-pip install -U tf-nightly
-```
-
-Then import the `autograph` module from `tf.contrib`:
-
-```
-from tensorflow.contrib import autograph as ag
-```
-
-### Related links
-
-Articles:
-
- * [TensorFlow blog post](https://medium.com/tensorflow/autograph-converts-python-into-tensorflow-graphs-b2a871f87ec7)
-
-Interactive notebooks:
-
- * [Quick guide](https://colab.research.google.com/github/tensorflow/models/blob/master/samples/core/guide/autograph.ipynb)
- * [RNN trained using Keras and Estimators](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb)
- * [Demo from the TF Dev Summit 2018](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb)
- * [Basic control flow speed test](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/ag_vs_eager_collatz_speed_test.ipynb)
- * [MNIST training speed test](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/ag_vs_eager_mnist_speed_test.ipynb)
- * [Basic algorithm samples](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/algorithms.ipynb)
- * [Introductory workshop support notebook](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/workshop.ipynb)
-
-## Using with annotations
-
-Annotating a function or class with `@convert` converts it in place:
-
-```
-@ag.convert()
-def f(x):
-  if x < 0:
-    x = -x
-  return x
-```
-
-... so that it always outputs TensorFlow code:
-
-```
-with tf.Graph().as_default():
-  x = tf.constant(-1)
-
-  y = f(x)
-
-  with tf.Session() as sess:
-    print(sess.run(y))
-    # Output: 1
-```
-
-## Using the functional API
-
-The functional API allows you to convert an existing function, class or object after it was defined:
-
-```
-converted_f = ag.to_graph(f)
-
-print(converted_f(tf.constant(-1)))
-# Output: Tensor
-
-print(f(-1))
-# Output: 1
-```
-
-You can use the functional API to inspect the generated code as well:
-
-```
-print(ag.to_code(f))
-# Output: <Python and TensorFlow code>
-```
-
-## Filing bugs and feature requests
-
-### Reporting a bug
-
- - If AutoGraph-generated code is compiling and running, but producing an incorrect result, send us a minimal reproduction case that includes the original Eager code, the inputs and if possible, the outputs or the error message.
- - If AutoGraph-generated code is compiling, but not running, send us a minimal reproduction case that includes the original Eager code, the inputs and if possible, the outputs or the error message.
- - If AutoGraph-generated code is not compiling, send us two minimal pieces of code. First, the Eager code that you would like to write, and second, the Graph code that you would like AutoGraph to have generated for you.
-
-### Requesting a feature
-
-If you’d like AutoGraph to convert a feature of Python or TF that we currently don’t handle, please let us know by filing a bug. We’ll make it as easy as possible to interact with us through there.
-- 
GitLab


From 594936f7ce57aa6623b78a0345c728f0bef5a4cf Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 24 Sep 2018 10:21:00 -0700
Subject: [PATCH 0599/1357] Remove the pretty formatting of generated code. The
 astor library that did that uses circular references, which can be
 problematic in eager mode.

PiperOrigin-RevId: 214287432
---
 tensorflow/python/autograph/pyct/compiler.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/compiler.py b/tensorflow/python/autograph/pyct/compiler.py
index 37f3e72f6e..21281aeb56 100644
--- a/tensorflow/python/autograph/pyct/compiler.py
+++ b/tensorflow/python/autograph/pyct/compiler.py
@@ -57,8 +57,15 @@ def ast_to_source(node, indentation='  '):
 
   # In some versions of Python, literals may appear as actual values. This
   # ensures everything is string.
-  code = map(str, generator.result)
-  code = astor.source_repr.pretty_source(code).lstrip()
+  code = ''.join(map(str, generator.result))
+
+  # Strip leading blank lines.
+  code_lines = code.split('\n')
+  trimmed_code_lines = []
+  for l in code_lines:
+    if l.rstrip() or trimmed_code_lines:
+      trimmed_code_lines.append(l)
+  code = '\n'.join(trimmed_code_lines)
 
   return code
 
-- 
GitLab


From 770a81b1edcb923086b82252d2c1a0271b0c49c5 Mon Sep 17 00:00:00 2001
From: Yutaka Leon <yleon@google.com>
Date: Mon, 24 Sep 2018 10:59:04 -0700
Subject: [PATCH 0600/1357] Fix documentation in some lookup_ops and add eager
 test.

PiperOrigin-RevId: 214294594
---
 .../python/kernel_tests/lookup_ops_test.py    | 70 ++++++++++++++-----
 tensorflow/python/ops/lookup_ops.py           | 40 +++++------
 2 files changed, 72 insertions(+), 38 deletions(-)

diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index 38b14e34cc..6791a03e2e 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -21,6 +21,7 @@ import os
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -29,6 +30,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import server_lib
 
@@ -53,6 +55,12 @@ class HashTableOpTest(test.TestCase):
       result = output.eval()
       self.assertAllEqual([0, 1, -1], result)
 
+      exported_keys_tensor, exported_values_tensor = table.export()
+
+      self.assertItemsEqual([b"brain", b"salad", b"surgery"],
+                            exported_keys_tensor.eval())
+      self.assertItemsEqual([0, 1, 2], exported_values_tensor.eval())
+
   def testHashTableFindHighRank(self):
     with self.cached_session():
       default_val = -1
@@ -181,6 +189,11 @@ class HashTableOpTest(test.TestCase):
           lookup_ops.KeyValueTensorInitializer(keys, values), default_val)
       table.init.run()
 
+      # Ref types do not produce a lookup signature mismatch.
+      input_string_ref = variables.Variable("brain")
+      variables.global_variables_initializer().run()
+      self.assertEqual(0, table.lookup(input_string_ref).eval())
+
       input_string = constant_op.constant([1, 2, 3], dtypes.int64)
       with self.assertRaises(TypeError):
         table.lookup(input_string)
@@ -261,6 +274,21 @@ class HashTableOpTest(test.TestCase):
       table.init.run()
       self.assertAllEqual(3, table.size().eval())
 
+  def testHashTableInt32String(self):
+    with self.cached_session():
+      default_val = "n/a"
+      keys = constant_op.constant([0, 1, 2], dtypes.int32)
+      values = constant_op.constant(["brain", "salad", "surgery"])
+      table = lookup_ops.HashTable(
+          lookup_ops.KeyValueTensorInitializer(keys, values), default_val)
+      table.init.run()
+
+      input_tensor = constant_op.constant([0, 1, -1])
+      output = table.lookup(input_tensor)
+
+      result = output.eval()
+      self.assertAllEqual([b"brain", b"salad", b"n/a"], result)
+
 
 class IndexTableFromFile(test.TestCase):
 
@@ -335,6 +363,7 @@ class IndexTableFromFile(test.TestCase):
       ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"]))
 
       self.assertRaises(errors_impl.OpError, ids.eval)
+
       feed_dict = {vocabulary_placeholder.name: vocabulary_file}
       lookup_ops.tables_initializer().run(feed_dict=feed_dict)
       self.assertAllEqual((1, 2, 3), ids.eval())
@@ -531,15 +560,22 @@ class KeyValueTensorInitializerTest(test.TestCase):
 
 class IndexTableFromTensor(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes
   def test_index_table_from_tensor_with_tensor_init(self):
-    with self.cached_session():
+    table = lookup_ops.index_table_from_tensor(
+        vocabulary_list=("brain", "salad", "surgery"), num_oov_buckets=1)
+
+    if not context.executing_eagerly():
+      with self.assertRaises(errors_impl.OpError):
+        self.evaluate(
+            table.lookup(constant_op.constant(("salad", "surgery", "tarkus"))))
+    else:
+      # Reinitializing a table in eager should work.
       table = lookup_ops.index_table_from_tensor(
           vocabulary_list=("brain", "salad", "surgery"), num_oov_buckets=1)
-      ids = table.lookup(constant_op.constant(("salad", "surgery", "tarkus")))
-
-      self.assertRaises(errors_impl.OpError, ids.eval)
-      lookup_ops.tables_initializer().run()
-      self.assertAllEqual((1, 2, 3), ids.eval())
+    self.evaluate(lookup_ops.tables_initializer())
+    ids = table.lookup(constant_op.constant(("salad", "surgery", "tarkus")))
+    self.assertAllEqual((1, 2, 3), self.evaluate(ids))
 
   def test_int32_index_table_from_tensor_with_tensor_init(self):
     with self.cached_session():
@@ -761,22 +797,20 @@ class InitializeTableFromFileOpTest(test.TestCase):
       f.write("\n".join(values) + "\n")
     return vocabulary_file
 
+  @test_util.run_in_graph_and_eager_modes
   def testInitializeStringTable(self):
     vocabulary_file = self._createVocabFile("one_column_1.txt")
+    default_value = -1
+    table = lookup_ops.HashTable(
+        lookup_ops.TextFileInitializer(
+            vocabulary_file, dtypes.string, lookup_ops.TextFileIndex.WHOLE_LINE,
+            dtypes.int64, lookup_ops.TextFileIndex.LINE_NUMBER), default_value)
+    self.evaluate(table.init)
 
-    with self.cached_session():
-      default_value = -1
-      table = lookup_ops.HashTable(
-          lookup_ops.TextFileInitializer(
-              vocabulary_file, dtypes.string,
-              lookup_ops.TextFileIndex.WHOLE_LINE, dtypes.int64,
-              lookup_ops.TextFileIndex.LINE_NUMBER), default_value)
-      table.init.run()
-
-      output = table.lookup(constant_op.constant(["brain", "salad", "tank"]))
+    output = table.lookup(constant_op.constant(["brain", "salad", "tank"]))
 
-      result = output.eval()
-      self.assertAllEqual([0, 1, -1], result)
+    result = self.evaluate(output)
+    self.assertAllEqual([0, 1, -1], result)
 
   def testInitializeInt64Table(self):
     vocabulary_file = self._createVocabFile(
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 561a341cf3..5443699ddd 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -422,7 +422,7 @@ class TextFileInitializer(TableInitializerBase):
   * `palmer -> 30`
 
   ```python
-  table = tf.contrib.lookup.HashTable(tf.contrib.lookup.TextFileInitializer(
+  table = tf.lookup.HashTable(tf.lookup.TextFileInitializer(
       "test.txt", tf.string, 0, tf.int64, 1, delimiter=" "), -1)
   ...
   table.init.run()
@@ -435,9 +435,9 @@ class TextFileInitializer(TableInitializerBase):
   * `palmer 30 -> 2`
 
   ```python
-  table = tf.contrib.lookup.HashTable(tf.contrib.lookup.TextFileInitializer(
-      "test.txt", tf.string, tf.contrib.lookup.TextFileIndex.WHOLE_LINE,
-      tf.int64, tf.contrib.lookup.TextFileIndex.LINE_NUMBER, delimiter=" "), -1)
+  table = tf.lookup.HashTable(tf.lookup.TextFileInitializer(
+      "test.txt", tf.string, tf.lookup.TextFileIndex.WHOLE_LINE,
+      tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER, delimiter=" "), -1)
   ...
   table.init.run()
   ```
@@ -953,7 +953,7 @@ def index_table_from_file(vocabulary_file=None,
 
   ```python
   features = tf.constant(["emerson", "lake", "and", "palmer"])
-  table = tf.contrib.lookup.index_table_from_file(
+  table = tf.lookup.index_table_from_file(
       vocabulary_file="test.txt", num_oov_buckets=1)
   ids = table.lookup(features)
   ...
@@ -1054,21 +1054,21 @@ def index_table_from_tensor(vocabulary_list,
 
   Any lookup of an out-of-vocabulary token will return a bucket ID based on its
   hash if `num_oov_buckets` is greater than zero. Otherwise it is assigned the
-  `default_value`.
-  The bucket ID range is `[mapping size, mapping size + num_oov_buckets - 1]`.
+  `default_value`. The bucket ID range is
+  `[vocabulary list size, vocabulary list size + num_oov_buckets - 1]`.
 
   The underlying table must be initialized by calling
   `tf.tables_initializer.run()` or `table.init.run()` once.
 
-  Elements in `mapping` cannot have duplicates, otherwise when executing the
-  table initializer op, it will throw a `FailedPreconditionError`.
+  Elements in `vocabulary_list` cannot have duplicates, otherwise when executing
+  the table initializer op, it will throw a `FailedPreconditionError`.
 
   Sample Usages:
 
   ```python
   vocabulary_list = tf.constant(["emerson", "lake", "palmer"])
-  table = tf.contrib.lookup.index_table_from_tensor(
-      mapping=vocabulary_list, num_oov_buckets=1, default_value=-1)
+  table = tf.lookup.index_table_from_tensor(
+      vocabulary_list=vocabulary_list, num_oov_buckets=1, default_value=-1)
   features = tf.constant(["emerson", "lake", "and", "palmer"])
   ids = table.lookup(features)
   ...
@@ -1093,7 +1093,7 @@ def index_table_from_tensor(vocabulary_list,
     The lookup table to map an input `Tensor` to index `int64` `Tensor`.
 
   Raises:
-    ValueError: If `mapping` is invalid.
+    ValueError: If `vocabulary_list` is invalid.
     ValueError: If `num_oov_buckets` is negative.
   """
   if vocabulary_list is None:
@@ -1185,7 +1185,7 @@ def index_to_string_table_from_file(vocabulary_file,
 
   ```python
   indices = tf.constant([1, 5], tf.int64)
-  table = tf.contrib.lookup.index_to_string_table_from_file(
+  table = tf.lookup.index_to_string_table_from_file(
       vocabulary_file="test.txt", default_value="UNKNOWN")
   values = table.lookup(indices)
   ...
@@ -1250,25 +1250,25 @@ def index_to_string_table_from_tensor(vocabulary_list,
   """Returns a lookup table that maps a `Tensor` of indices into strings.
 
   This operation constructs a lookup table to map int64 indices into string
-  values. The mapping is initialized from a string `mapping` 1-D `Tensor` where
-  each element is a value and the corresponding index within the tensor is the
-  key.
+  values. The mapping is initialized from a string `vocabulary_list` 1-D
+  `Tensor` where each element is a value and the corresponding index within the
+  tensor is the key.
 
-  Any input which does not have a corresponding index in 'mapping'
+  Any input which does not have a corresponding index in 'vocabulary_list'
   (an out-of-vocabulary entry) is assigned the `default_value`
 
   The underlying table must be initialized by calling
   `tf.tables_initializer.run()` or `table.init.run()` once.
 
-  Elements in `mapping` cannot have duplicates, otherwise when executing the
-  table initializer op, it will throw a `FailedPreconditionError`.
+  Elements in `vocabulary_list` cannot have duplicates, otherwise when executing
+  the table initializer op, it will throw a `FailedPreconditionError`.
 
   Sample Usages:
 
   ```python
   vocabulary_list = tf.constant(["emerson", "lake", "palmer"])
   indices = tf.constant([1, 5], tf.int64)
-  table = tf.contrib.lookup.index_to_string_table_from_tensor(
+  table = tf.lookup.index_to_string_table_from_tensor(
       vocabulary_list, default_value="UNKNOWN")
   values = table.lookup(indices)
   ...
-- 
GitLab


From f7017ef769bd603b61f25dfffc772e2153a9f076 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Mon, 24 Sep 2018 11:00:48 -0700
Subject: [PATCH 0601/1357] [data-stats] Exposes `StatsAggregator` and
 `set_stats_aggregator` in tf.contrib.data.

PiperOrigin-RevId: 214294955
---
 tensorflow/contrib/data/__init__.py           |  6 ++++
 .../contrib/data/python/ops/stats_ops.py      | 31 ++++++++-----------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index c378b1ce8d..3cb51279c3 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -44,6 +44,7 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
 @@group_by_reducer
 @@group_by_window
 @@ignore_errors
+@@latency_stats
 @@make_batched_features_dataset
 @@make_csv_dataset
 @@make_saveable_from_iterator
@@ -57,9 +58,11 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
 @@reduce_dataset
 @@sample_from_datasets
 @@scan
+@@set_stats_aggregator
 @@shuffle_and_repeat
 @@sliding_window_batch
 @@sloppy_interleave
+@@StatsAggregator
 @@unbatch
 @@unique
 
@@ -111,6 +114,9 @@ from tensorflow.contrib.data.python.ops.resampling import rejection_resample
 from tensorflow.contrib.data.python.ops.scan_ops import scan
 from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat
 from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch
+from tensorflow.contrib.data.python.ops.stats_ops import latency_stats
+from tensorflow.contrib.data.python.ops.stats_ops import set_stats_aggregator
+from tensorflow.contrib.data.python.ops.stats_ops import StatsAggregator
 from tensorflow.contrib.data.python.ops.unique import unique
 from tensorflow.contrib.data.python.ops.writers import TFRecordWriter
 from tensorflow.python.data.ops.iterator_ops import get_next_as_optional
diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py
index 8426228992..7410ee8e05 100644
--- a/tensorflow/contrib/data/python/ops/stats_ops.py
+++ b/tensorflow/contrib/data/python/ops/stats_ops.py
@@ -23,34 +23,31 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
 
 
-# TODO(b/38416882): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
 class StatsAggregator(object):
   """A stateful resource that aggregates statistics from one or more iterators.
 
   To record statistics, use one of the custom transformation functions defined
   in this module when defining your `tf.data.Dataset`. All statistics will be
   aggregated by the `StatsAggregator` that is associated with a particular
-  iterator (see below). For example, to record the total number of bytes
-  produced by iterating over a dataset:
+  iterator (see below). For example, to record the latency of producing each
+  element by iterating over a dataset:
 
   ```python
   dataset = ...
-  dataset = dataset.apply(stats_ops.bytes_produced_stats("total_bytes"))
+  dataset = dataset.apply(stats_ops.latency_stats("total_bytes"))
   ```
 
-  To associate a `StatsAggregator` with a `tf.data.Iterator` object, use
+  To associate a `StatsAggregator` with a `tf.data.Dataset` object, use
   the following pattern:
 
   ```python
-  dataset = ...
-  iterator = dataset.make_one_shot_iterator()
   stats_aggregator = stats_ops.StatsAggregator()
-  set_op = stats_aggregator.subscribe(iterator)
+  dataset = ...
 
-  with tf.Session() as sess:
-    # Running `set_op` will associate `iterator` with `stats_aggregator`.
-    sess.run(set_op)
+  # Apply `set_stats_aggregator` to associate `dataset` with `stats_aggregator`.
+  dataset = dataset.apply(
+      tf.contrib.data.set_stats_aggregator(stats_aggregator))
+  iterator = dataset.make_one_shot_iterator()
   ```
 
   To get a protocol buffer summary of the currently aggregated statistics,
@@ -60,6 +57,7 @@ class StatsAggregator(object):
 
   ```python
   stats_aggregator = stats_ops.StatsAggregator()
+  # ...
   stats_summary = stats_aggregator.get_summary()
   tf.add_to_collection(tf.GraphKeys.SUMMARIES, stats_summary)
   ```
@@ -73,6 +71,7 @@ class StatsAggregator(object):
     """Creates a `StatsAggregator`."""
     self._resource = gen_dataset_ops.stats_aggregator_handle()
 
+  # TODO(b/116314787): Update this/add support for V2 summary API.
   def get_summary(self):
     """Returns a string `tf.Tensor` that summarizes the aggregated statistics.
 
@@ -112,13 +111,11 @@ class _SetStatsAggregatorDataset(dataset_ops.Dataset):
     return self._input_dataset.output_classes
 
 
-# TODO(b/38416882): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
 def set_stats_aggregator(stats_aggregator):
-  """Set the given stats_aggregator for aggregating the input dataset stats.
+  """Set the given `stats_aggregator` for aggregating the input dataset stats.
 
   Args:
-    stats_aggregator: A `StatsAggregator` object.
+    stats_aggregator: A `tf.contrib.data.StatsAggregator` object.
 
   Returns:
     A `Dataset` transformation function, which can be passed to
@@ -155,8 +152,6 @@ def bytes_produced_stats(tag):
   return _apply_fn
 
 
-# TODO(b/38416882): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
 def latency_stats(tag):
   """Records the latency of producing each element of the input dataset.
 
-- 
GitLab


From 3bb3257a5f9675e6c094b9a6318d96d1bc27fc94 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 11:04:14 -0700
Subject: [PATCH 0602/1357] Add functionality to SubSlice a tensor.

PiperOrigin-RevId: 214295534
---
 tensorflow/core/framework/tensor.cc      | 22 +++++++++++++++
 tensorflow/core/framework/tensor.h       | 19 +++++++++++++
 tensorflow/core/framework/tensor_test.cc | 36 ++++++++++++++++++++++++
 3 files changed, 77 insertions(+)

diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index eb9c79ff2d..3df677675e 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -812,6 +812,28 @@ Tensor Tensor::Slice(int64 start, int64 limit) const {
   return ret;
 }
 
+Tensor Tensor::SubSlice(int64 index) const {
+  CHECK_GE(dims(), 2);  // Crash ok.
+  CHECK_LE(0, index);   // Crash ok.
+  int64 dim0_size = shape_.dim_size(0);
+  CHECK_LE(index, dim0_size);  // Crash ok.
+  Tensor ret;
+  ret.shape_ = shape_;
+  ret.shape_.RemoveDim(0);
+  ret.set_dtype(dtype());
+  ret.buf_ = nullptr;
+  if (dim0_size > 0) {
+    const int64 elems_per_dim0 = NumElements() / dim0_size;
+    const int64 delta = index * elems_per_dim0;
+    const int64 num_elems = elems_per_dim0;
+    if (buf_) {
+      DataType dt = dtype();
+      CASES(dt, ret.buf_ = new SubBuffer<T>(buf_, delta, num_elems));
+    }
+  }
+  return ret;
+}
+
 bool Tensor::FromProto(const TensorProto& proto) {
   return FromProto(cpu_allocator(), proto);
 }
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index e412329498..8a0c70fef2 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -200,10 +200,29 @@ class Tensor {
   /// must check the returned tensor's alignment before calling certain
   /// methods that have alignment requirement (e.g., `flat()`, `tensor()`).
   ///
+  /// NOTE: When fed with an N-dimensional tensor, this method returns a tensor
+  /// also with N dimensions. If you want to select a sub tensor, see SubSlice.
+  ///
   /// REQUIRES: `dims()` >= 1
   /// REQUIRES: `0 <= dim0_start <= dim0_limit <= dim_size(0)`
   Tensor Slice(int64 dim0_start, int64 dim0_limit) const;
 
+  /// \brief Select a subslice from this tensor along the 1st dimension.
+  ///
+  /// When fed with an N-dimensional tensor, this method returns a tensor with
+  /// N-1 dimensions, where the returned tensor is a subslice of the input
+  /// tensor along the first dimension. The N-1 dimensions of the returned
+  /// tensor are the last N-1 dimensions of the input tensor.
+  ///
+  /// NOTE: The returned tensor may not satisfy the same alignment
+  /// requirement as this tensor depending on the shape. The caller
+  /// must check the returned tensor's alignment before calling certain
+  /// methods that have alignment requirement (e.g., `flat()`, `tensor()`).
+  ///
+  /// REQUIRES: `dims()` >= 2
+  /// REQUIRES: `0 <= dim0_start < dim_size(0)`
+  Tensor SubSlice(int64 index) const;
+
   /// \brief Parse `other` and construct the tensor.
 
   /// Returns `true` iff the parsing succeeds. If the parsing fails,
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index fc05c86990..0bfa53e6c5 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -1228,6 +1228,42 @@ TEST(Tensor, Slice_Basic) {
   }
 }
 
+TEST(Tensor, SubSlice_Basic) {
+  {  // General
+    Tensor x(DT_FLOAT, TensorShape({10, 4, 36}));
+    // Fills in known values.
+    for (int i = 0; i < 10; ++i) {
+      x.SubSlice(i).flat<float>().setConstant(i * 1.f);
+    }
+    // A simple sub-slice along dim0.
+    Tensor y = x.SubSlice(5);
+    EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 36})));
+    auto tx = x.tensor<float, 3>();
+    auto ty = y.tensor<float, 2>();
+    for (int j = 0; j < 4; ++j) {
+      for (int k = 0; k < 36; ++k) {
+        EXPECT_EQ(ty(j, k), 5.0);
+        EXPECT_EQ(&tx(5, j, k), &ty(j, k));
+      }
+    }
+  }
+  {
+    // Test unaligned access via a SubSlice.
+    Tensor x(DT_FLOAT, TensorShape({30, 5}));
+    x.flat<float>().setConstant(0.0);
+
+    // Take an unaligned subslice.
+    Tensor y = x.SubSlice(1);
+#if EIGEN_MAX_ALIGN_BYTES > 0
+    EXPECT_FALSE(y.IsAligned());
+#endif
+    y.unaligned_flat<float>().setConstant(1.0);
+    for (int64 i = 0; i < y.NumElements(); ++i) {
+      EXPECT_EQ(1.0, y.unaligned_flat<float>()(i));
+    }
+  }
+}
+
 template <typename T>
 Tensor MkTensor(DataType dt, const TensorShape& shape,
                 std::vector<T> init_values) {
-- 
GitLab


From 525238f1e91c708693fda650e4085103eded12f0 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Mon, 24 Sep 2018 11:09:26 -0700
Subject: [PATCH 0603/1357] Disabling MultiDeviceIterator in Eager mode.
 Support is coming soon.

PiperOrigin-RevId: 214296771
---
 .../python/data/ops/multi_device_iterator_ops.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 84e8abbd83..c914a43956 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
@@ -129,7 +130,13 @@ class _PerDeviceGenerator(dataset_ops.Dataset):
 
 
 class MultiDeviceIterator(object):
-  """An iterator over multiple devices."""
+  """An iterator over multiple devices.
+
+  @compatibility(eager)
+  MultiDeviceIterator isn't currently supported in Eager mode but support is
+  coming soon.
+  @end_compatibility
+  """
 
   def __init__(self,
                dataset,
@@ -146,7 +153,14 @@ class MultiDeviceIterator(object):
       prefetch_buffer_size: if > 1, then we setup a buffer on each device
         to prefetch into.
       source_device: The host device to place the `dataset` on.
+
+    Raises:
+      RuntimeError: If run in Eager mode.
     """
+    if context.executing_eagerly():
+      # TODO(rohanj): Fix this. Tracking bug: b/116467184
+      raise RuntimeError("MultiDeviceIterator is not currently supported in "
+                         "Eager mode.")
     self._dataset = dataset
     self._devices = devices
     self._source_device = source_device
-- 
GitLab


From 834ad88d20a9dbdbe7552ecd8c2ec7c26b444ef2 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Mon, 24 Sep 2018 11:17:25 -0700
Subject: [PATCH 0604/1357] [Java]: Release 1.11.0-rc2

PiperOrigin-RevId: 214298224
---
 tensorflow/java/maven/libtensorflow/pom.xml              | 2 +-
 tensorflow/java/maven/libtensorflow_jni/pom.xml          | 2 +-
 tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml      | 2 +-
 tensorflow/java/maven/pom.xml                            | 2 +-
 tensorflow/java/maven/proto/pom.xml                      | 2 +-
 tensorflow/java/maven/spark-tensorflow-connector/pom.xml | 2 +-
 tensorflow/java/maven/tensorflow-hadoop/pom.xml          | 2 +-
 tensorflow/java/maven/tensorflow/pom.xml                 | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml
index 6c82301eff..9fc6969c20 100644
--- a/tensorflow/java/maven/libtensorflow/pom.xml
+++ b/tensorflow/java/maven/libtensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc1</version>
+    <version>1.11.0-rc2</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml
index f7634795d6..68712082e1 100644
--- a/tensorflow/java/maven/libtensorflow_jni/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc1</version>
+    <version>1.11.0-rc2</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
index 7fcc6ff8f9..f031173c99 100644
--- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc1</version>
+    <version>1.11.0-rc2</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni_gpu</artifactId>
diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml
index 689902e9cd..2cac27990e 100644
--- a/tensorflow/java/maven/pom.xml
+++ b/tensorflow/java/maven/pom.xml
@@ -6,7 +6,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.tensorflow</groupId>
   <artifactId>parentpom</artifactId>
-  <version>1.11.0-rc1</version>
+  <version>1.11.0-rc2</version>
   <packaging>pom</packaging>
 
   <url>https://www.tensorflow.org</url>
diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml
index ea1462a9ae..8a93091276 100644
--- a/tensorflow/java/maven/proto/pom.xml
+++ b/tensorflow/java/maven/proto/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc1</version>
+    <version>1.11.0-rc2</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>proto</artifactId>
diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
index ce1ebfa15b..014bd8d212 100644
--- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
+++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
@@ -6,7 +6,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>spark-tensorflow-connector_2.11</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0-rc1</version>
+    <version>1.11.0-rc2</version>
     <name>spark-tensorflow-connector</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord connector for Apache Spark DataFrames</description>
diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
index 56346fd045..d07c5fcd98 100644
--- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml
+++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
@@ -5,7 +5,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>tensorflow-hadoop</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0-rc1</version>
+    <version>1.11.0-rc2</version>
     <name>tensorflow-hadoop</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop</description>
diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml
index 93decea0a0..af0c68a4ed 100644
--- a/tensorflow/java/maven/tensorflow/pom.xml
+++ b/tensorflow/java/maven/tensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc1</version>
+    <version>1.11.0-rc2</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>tensorflow</artifactId>
-- 
GitLab


From 5fbb064ba1e78bb28f7adbe92e6583c3b2bdfda7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 11:21:41 -0700
Subject: [PATCH 0605/1357] This CL adds an init_scope to the Keras set & get
 learning phase functions. This allows the Keras learning phase to work inside
 functions and defuns.

Note: There might still be bugs in graph mode if the default placeholder is being fed (instead of using set_learning_phase) and a layer is in a function.
PiperOrigin-RevId: 214299002
---
 tensorflow/python/keras/backend.py | 48 ++++++++++++++++++------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 60ed8e8c8a..a46f9edb1e 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -367,18 +367,26 @@ def learning_phase():
   Returns:
       Learning phase (scalar integer tensor or Python integer).
   """
-  if context.executing_eagerly():
-    if _DUMMY_EAGER_GRAPH not in _GRAPH_LEARNING_PHASES:
-      # Fallback to inference mode as default.
-      return 0
-    return _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH]
+  with ops.init_scope():
+    # We always check & set the learning phase inside the init_scope,
+    # otherwise the wrong default_graph will be used to look up the learning
+    # phase inside of functions & defuns.
+    #
+    # This is because functions & defuns (both in graph & in eager mode)
+    # will always execute non-eagerly using a function-specific default
+    # subgraph.
+    if context.executing_eagerly():
+      if _DUMMY_EAGER_GRAPH not in _GRAPH_LEARNING_PHASES:
+        # Fallback to inference mode as default.
+        return 0
+      return _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH]
 
-  graph = ops.get_default_graph()
-  if graph not in _GRAPH_LEARNING_PHASES:
-    phase = array_ops.placeholder_with_default(
-        False, shape=(), name='keras_learning_phase')
-    _GRAPH_LEARNING_PHASES[graph] = phase
-  return _GRAPH_LEARNING_PHASES[graph]
+    graph = ops.get_default_graph()
+    if graph not in _GRAPH_LEARNING_PHASES:
+      phase = array_ops.placeholder_with_default(
+          False, shape=(), name='keras_learning_phase')
+      _GRAPH_LEARNING_PHASES[graph] = phase
+    return _GRAPH_LEARNING_PHASES[graph]
 
 
 @tf_export('keras.backend.set_learning_phase')
@@ -394,10 +402,11 @@ def set_learning_phase(value):
   global _GRAPH_LEARNING_PHASES  # pylint: disable=global-variable-not-assigned
   if value not in {0, 1}:
     raise ValueError('Expected learning phase to be 0 or 1.')
-  if context.executing_eagerly():
-    _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH] = value
-  else:
-    _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = value
+  with ops.init_scope():
+    if context.executing_eagerly():
+      _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH] = value
+    else:
+      _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = value
 
 
 @tf_contextlib.contextmanager
@@ -423,10 +432,11 @@ def learning_phase_scope(value):
     yield value
   finally:
     # Restore learning phase to initial value.
-    if context.executing_eagerly():
-      _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH] = previous_value
-    else:
-      _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = previous_value
+    with ops.init_scope():
+      if context.executing_eagerly():
+        _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH] = previous_value
+      else:
+        _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = previous_value
 
 
 @tf_export('keras.backend.get_session')
-- 
GitLab


From 28eeda839f124cf5ba648576e86214b38141e4ab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 11:28:07 -0700
Subject: [PATCH 0606/1357] Move from deprecated self.test_session() to
 self.cached_session().

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 214300210
---
 .../integration_tests/errors_test.py          |  4 +-
 .../python/autograph/core/errors_test.py      |  6 +--
 tensorflow/python/autograph/impl/api_test.py  | 26 ++++-----
 .../autograph/lang/special_functions_test.py  |  4 +-
 .../autograph/operators/py_builtins_test.py   | 16 +++---
 .../python/autograph/operators/slices_test.py |  4 +-
 tensorflow/python/eager/function_test.py      |  6 +--
 .../python/keras/engine/topology_test.py      |  2 +-
 .../keras/utils/multi_gpu_utils_test.py       |  2 +-
 .../boosted_trees/prediction_ops_test.py      |  4 +-
 .../boosted_trees/quantile_ops_test.py        |  4 +-
 .../linalg/linear_operator_addition_test.py   | 24 ++++-----
 .../logging_ops_logging_level_test.py         |  6 +--
 .../python/kernel_tests/logging_ops_test.py   | 40 +++++++-------
 .../kernel_tests/string_format_op_test.py     | 54 +++++++++----------
 .../python/kernel_tests/while_v2_test.py      | 18 +++----
 tensorflow/python/ops/image_ops_test.py       |  6 +--
 tensorflow/python/training/ftrl_test.py       |  4 +-
 .../training/learning_rate_decay_v2_test.py   |  2 +-
 19 files changed, 116 insertions(+), 116 deletions(-)

diff --git a/tensorflow/examples/autograph/integration_tests/errors_test.py b/tensorflow/examples/autograph/integration_tests/errors_test.py
index 69e5936832..9c10dad9aa 100644
--- a/tensorflow/examples/autograph/integration_tests/errors_test.py
+++ b/tensorflow/examples/autograph/integration_tests/errors_test.py
@@ -92,7 +92,7 @@ class ErrorsTest(tf.test.TestCase):
     compiled_fn = ag.to_graph(test_fn)
 
     with self.assertRaises(ag.TfRuntimeError) as error:
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         x = compiled_fn(tf.constant([4, 8]))
         with ag.improved_errors(compiled_fn):
           sess.run(x)
@@ -134,7 +134,7 @@ class ErrorsTest(tf.test.TestCase):
     # frame with "g" as the function name but because we don't yet add
     # try/except blocks to inner functions the name is "tf__g".
     with self.assertRaises(ag.TfRuntimeError) as error:
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         x = compiled_fn(tf.constant([4, 8]))
         with ag.improved_errors(compiled_fn):
           sess.run(x)
diff --git a/tensorflow/python/autograph/core/errors_test.py b/tensorflow/python/autograph/core/errors_test.py
index 0444ed7eab..aa6c293268 100644
--- a/tensorflow/python/autograph/core/errors_test.py
+++ b/tensorflow/python/autograph/core/errors_test.py
@@ -54,7 +54,7 @@ class RuntimeErrorsTest(test.TestCase):
     ops = zero_div_caller()
     with self.assertRaises(errors.TfRuntimeError) as cm:
       with errors.improved_errors(zero_div_caller):
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           sess.run(ops)
 
     for frame in cm.exception.custom_traceback:
@@ -69,7 +69,7 @@ class RuntimeErrorsTest(test.TestCase):
     ops = zero_div_caller()
     with self.assertRaises(errors.TfRuntimeError) as cm:
       with errors.improved_errors(zero_div_caller):
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           sess.run(ops)
 
     all_function_names = set()
@@ -86,7 +86,7 @@ class RuntimeErrorsTest(test.TestCase):
     ops = zero_div_caller()
     with self.assertRaises(tf_errors.InvalidArgumentError):
       with errors.improved_errors(zero_div_caller):
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           sess.run(ops)
 
   def test_improved_errors_validation(self):
diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py
index e0770ef4c6..8ce5022c0a 100644
--- a/tensorflow/python/autograph/impl/api_test.py
+++ b/tensorflow/python/autograph/impl/api_test.py
@@ -55,7 +55,7 @@ class ApiTest(test.TestCase):
         return x
 
     tc = TestClass()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
@@ -75,7 +75,7 @@ class ApiTest(test.TestCase):
         return x
 
     tc = TestClass()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
@@ -96,7 +96,7 @@ class ApiTest(test.TestCase):
         return x
 
     tc = TestClass()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
@@ -122,7 +122,7 @@ class ApiTest(test.TestCase):
         return x
 
     tc = TestClass()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
@@ -145,7 +145,7 @@ class ApiTest(test.TestCase):
         return x
 
     tc = TestClass()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
@@ -185,7 +185,7 @@ class ApiTest(test.TestCase):
         return x
 
     tc = TestClass()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
@@ -202,7 +202,7 @@ class ApiTest(test.TestCase):
         return -x
       return x
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = api.converted_call(test_fn, api.ConversionOptions.new(),
                              constant_op.constant(-1))
       self.assertEqual(1, sess.run(x))
@@ -219,7 +219,7 @@ class ApiTest(test.TestCase):
           return -self.x
         return self.x
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
       x = api.converted_call(tc.test_method, api.ConversionOptions.new(), tc)
       self.assertEqual(1, sess.run(x))
@@ -236,7 +236,7 @@ class ApiTest(test.TestCase):
           return -self.x
         return self.x
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
       x = api.converted_call(
           TestClass.test_method,
@@ -255,7 +255,7 @@ class ApiTest(test.TestCase):
           return -self.x
         return self.x
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
       x = api.converted_call(tc, api.ConversionOptions.new())
       self.assertEqual(1, sess.run(x))
@@ -272,7 +272,7 @@ class ApiTest(test.TestCase):
           return -self.x
         return self.x
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tc = api.converted_call(TestClass, api.ConversionOptions.new(),
                               constant_op.constant(-1))
       # tc is now a converted object.
@@ -284,7 +284,7 @@ class ApiTest(test.TestCase):
     def f(x):
       return x == 0
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = api.converted_call(f, api.ConversionOptions.new(),
                              constant_op.constant(0))
       self.assertTrue(sess.run(x))
@@ -303,7 +303,7 @@ class ApiTest(test.TestCase):
 
     compiled_fn = api.to_graph(test_fn)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       x = compiled_fn(constant_op.constant([4, 8]), 4)
       self.assertListEqual([1, 2], sess.run(x).tolist())
 
diff --git a/tensorflow/python/autograph/lang/special_functions_test.py b/tensorflow/python/autograph/lang/special_functions_test.py
index 1f1cec18f7..545dd11729 100644
--- a/tensorflow/python/autograph/lang/special_functions_test.py
+++ b/tensorflow/python/autograph/lang/special_functions_test.py
@@ -33,7 +33,7 @@ class SpecialFunctionsTest(test.TestCase):
 
     l = special_functions.tensor_list(elements)
     sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
 
   def test_tensor_list_array_from_elements(self):
@@ -41,7 +41,7 @@ class SpecialFunctionsTest(test.TestCase):
 
     l = special_functions.tensor_list(elements, use_tensor_array=True)
     sl = l.stack()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
 
   def test_stack(self):
diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py
index a021263ffa..d64d31cc79 100644
--- a/tensorflow/python/autograph/operators/py_builtins_test.py
+++ b/tensorflow/python/autograph/operators/py_builtins_test.py
@@ -36,7 +36,7 @@ class PyBuiltinsTest(test.TestCase):
 
   def test_abs(self):
     self.assertEqual(py_builtins.abs_(-1), 1)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       t = py_builtins.abs_(constant_op.constant(-1))
       self.assertEqual(sess.run(t), 1)
       t = py_builtins.abs_(constant_op.constant([-1, 2, -3]))
@@ -45,7 +45,7 @@ class PyBuiltinsTest(test.TestCase):
   def test_float(self):
     self.assertEqual(py_builtins.float_(10), 10.0)
     self.assertEqual(py_builtins.float_('10.0'), 10.0)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       t = py_builtins.float_(constant_op.constant(1, dtype=dtypes.int64))
       self.assertEqual(sess.run(t), 1.0)
       st = py_builtins.float_(constant_op.constant('1.0'))
@@ -54,7 +54,7 @@ class PyBuiltinsTest(test.TestCase):
   def test_int(self):
     self.assertEqual(py_builtins.int_(10.0), 10)
     self.assertEqual(py_builtins.int_('11', 2), 3)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       t = py_builtins.int_(constant_op.constant(1, dtype=dtypes.float64))
       self.assertEqual(sess.run(t), 1)
       st = py_builtins.int_(constant_op.constant('1'))
@@ -69,7 +69,7 @@ class PyBuiltinsTest(test.TestCase):
 
   def test_len(self):
     self.assertEqual(py_builtins.len_([1, 2, 3]), 3)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       t = py_builtins.len_(constant_op.constant([[1], [2], [3]]))
       self.assertEqual(t, 3)
       ta = py_builtins.len_(tensor_array_ops.TensorArray(dtypes.int32, size=5))
@@ -82,7 +82,7 @@ class PyBuiltinsTest(test.TestCase):
       py_builtins.len_(constant_op.constant(1))
 
   def test_len_dynamic_shape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       p = array_ops.placeholder(dtype=dtypes.int32, shape=None)
       t = py_builtins.len_(p)
       self.assertEqual(sess.run(t, {p: [1, 2, 3]}), 3)
@@ -95,7 +95,7 @@ class PyBuiltinsTest(test.TestCase):
     try:
       out_capturer = six.StringIO()
       sys.stdout = out_capturer
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(py_builtins.print_(constant_op.constant('test message'), 1))
         self.assertEqual(out_capturer.getvalue(), 'test message 1\n')
     finally:
@@ -105,7 +105,7 @@ class PyBuiltinsTest(test.TestCase):
     try:
       out_capturer = six.StringIO()
       sys.stdout = out_capturer
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(
             py_builtins.print_(constant_op.constant('test message'), [1, 2]))
         self.assertEqual(out_capturer.getvalue(), 'test message [1, 2]\n')
@@ -118,7 +118,7 @@ class PyBuiltinsTest(test.TestCase):
     self.assertListEqual(list(py_builtins.range_(2, 0, -1)), [2, 1])
 
   def test_range_tensor(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       r = py_builtins.range_(constant_op.constant(3))
       self.assertAllEqual(sess.run(r), [0, 1, 2])
       r = py_builtins.range_(1, constant_op.constant(3))
diff --git a/tensorflow/python/autograph/operators/slices_test.py b/tensorflow/python/autograph/operators/slices_test.py
index d8b8418750..9e4865b3c6 100644
--- a/tensorflow/python/autograph/operators/slices_test.py
+++ b/tensorflow/python/autograph/operators/slices_test.py
@@ -51,14 +51,14 @@ class SlicesTest(test.TestCase):
     t = slices.get_item(initial_str, 1,
                         slices.GetItemOpts(element_dtype=initial_str.dtype))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(t), b'b')
 
     initial_list_str = constant_op.constant(['abcd', 'bcde'])
     t = slices.get_item(initial_list_str, 1,
                         slices.GetItemOpts(element_dtype=initial_str.dtype))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(t), b'bcde')
 
 
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index e4513cc87c..04f42f63d4 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1602,7 +1602,7 @@ class FunctionTest(test.TestCase):
     defun_add = function.defun_with_attributes(
         add, attributes={'experimental_3': True, 'experimental_4': 1.0})
 
-    with context.graph_mode(), self.test_session():
+    with context.graph_mode(), self.cached_session():
       with ops.get_default_graph().as_default():
         t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
         sq = matmul(t, t)
@@ -1636,7 +1636,7 @@ class FunctionTest(test.TestCase):
 
     with self.assertRaisesRegexp(ValueError,
                                  '.*Attribute name is not whitelisted.*'):
-      with context.graph_mode(), self.test_session():
+      with context.graph_mode(), self.cached_session():
         with ops.get_default_graph().as_default():
           t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
           matmul(t, t)
@@ -1647,7 +1647,7 @@ class FunctionTest(test.TestCase):
 
     with self.assertRaisesRegexp(ValueError,
                                  '.*Unsupported attribute type.*'):
-      with context.graph_mode(), self.test_session():
+      with context.graph_mode(), self.cached_session():
         with ops.get_default_graph().as_default():
           t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
           add(t, t)
diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py
index 061db8ee34..a0da96334b 100644
--- a/tensorflow/python/keras/engine/topology_test.py
+++ b/tensorflow/python/keras/engine/topology_test.py
@@ -915,7 +915,7 @@ class TopologyConstructionTest(test.TestCase):
 
   def test_constant_initializer_with_numpy(self):
 
-    with self.test_session():
+    with self.cached_session():
       initializer = keras.initializers.Constant(np.ones((3, 2)))
       model = keras.models.Sequential()
       model.add(keras.layers.Dense(2, input_shape=(3,),
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
index d6016ed711..3d0351a11f 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
@@ -186,7 +186,7 @@ class TestMultiGPUModel(test.TestCase):
     if not check_if_compatible_devices(gpus=gpus):
       return
 
-    with self.test_session():
+    with self.cached_session():
       inputs = keras.Input((4, 3))
       init_state = keras.Input((3,))
       outputs = keras.layers.SimpleRNN(
diff --git a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
index 3b28d44cf8..467e33ec87 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
@@ -934,7 +934,7 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase):
     For example, this could happen if the final ensemble contains one tree that
     got pruned up to the root.
     """
-    with self.test_session() as session:
+    with self.cached_session() as session:
       tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
       text_format.Merge(
           """
@@ -990,7 +990,7 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase):
 
   def testContribsMultipleTreeWhenFirstTreeIsABiasNode(self):
     """Tests case when, after training, first tree contains only a bias node."""
-    with self.test_session() as session:
+    with self.cached_session() as session:
       tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
       text_format.Merge(
           """
diff --git a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
index c71b8df4ad..e0d46bae83 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
@@ -78,7 +78,7 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
     self.num_quantiles = constant_op.constant(3, dtype=dtypes.int64)
 
   def testBasicQuantileBucketsSingleResource(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       quantile_accumulator_handle = self.create_resource("floats", self.eps,
                                                          self.max_elements, 2)
       resources.initialize_resources(resources.shared_resources()).run()
@@ -102,7 +102,7 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self._feature_1_quantiles, quantiles[1].eval())
 
   def testBasicQuantileBucketsMultipleResources(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       quantile_accumulator_handle_0 = self.create_resource("float_0", self.eps,
                                                            self.max_elements)
       quantile_accumulator_handle_1 = self.create_resource("float_1", self.eps,
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
index 7c79fedf65..cf56168d63 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
@@ -76,7 +76,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
         [1., 1.], is_positive_definite=True, name="A")
     op_b = linalg.LinearOperatorDiag(
         [2., 2.], is_positive_definite=True, name="B")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op_a, op_b])
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -98,7 +98,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
         [2., 2.], is_positive_definite=True, name="op2")
     op3 = linalg.LinearOperatorDiag(
         [3., 3.], is_positive_definite=True, name="op3")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op1, op2, op3])
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -121,7 +121,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
         name="tril")
     op3 = linalg.LinearOperatorDiag(
         [3., 3.], is_non_singular=True, name="diag_b")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op1, op2, op3])
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -143,7 +143,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase):
     op2 = linalg.LinearOperatorLowerTriangular(
         [[2., 0.], [1.5, 2.]], name="tril")
     op3 = linalg.LinearOperatorDiag([3., 3.], name="diag_b")
-    with self.test_session():
+    with self.cached_session():
       op_sum = add_operators([op0, op1, op2, op3], operator_name="my_operator")
       self.assertEqual(1, len(op_sum))
       op = op_sum[0]
@@ -233,7 +233,7 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase):
     self.assertEqual(2, len(op_sum))
     found_diag = False
     found_tril = False
-    with self.test_session():
+    with self.cached_session():
       for op in op_sum:
         if isinstance(op, linalg.LinearOperatorDiag):
           found_diag = True
@@ -273,7 +273,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertIsInstance(operator, linalg.LinearOperatorScaledIdentity)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -291,7 +291,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertIsInstance(operator, linalg.LinearOperatorScaledIdentity)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(3.2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -310,7 +310,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertIsInstance(operator, linalg.LinearOperatorScaledIdentity)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(1.2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -334,7 +334,7 @@ class AddAndReturnDiagTest(test.TestCase):
     operator = self._adder.add(id1, id2, "my_operator", hints)
     self.assertIsInstance(operator, linalg.LinearOperatorDiag)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(2 *
                           linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(),
                           operator.to_dense().eval())
@@ -354,7 +354,7 @@ class AddAndReturnDiagTest(test.TestCase):
     operator = self._adder.add(op1, op2, "my_operator", hints)
     self.assertIsInstance(operator, linalg.LinearOperatorDiag)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(
           linalg.LinearOperatorDiag(diag1 + diag2).to_dense().eval(),
           operator.to_dense().eval())
@@ -379,7 +379,7 @@ class AddAndReturnTriLTest(test.TestCase):
     operator = self._adder.add(diag, tril, "my_operator", hints)
     self.assertIsInstance(operator, linalg.LinearOperatorLowerTriangular)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose([[11., 0.], [30., 2.]], operator.to_dense().eval())
     self.assertTrue(operator.is_positive_definite)
     self.assertTrue(operator.is_non_singular)
@@ -401,7 +401,7 @@ class AddAndReturnMatrixTest(test.TestCase):
     operator = self._adder.add(diag1, diag2, "my_operator", hints)
     self.assertIsInstance(operator, linalg.LinearOperatorFullMatrix)
 
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose([[0., 0.], [0., 5.]], operator.to_dense().eval())
     self.assertFalse(operator.is_positive_definite)
     self.assertFalse(operator.is_non_singular)
diff --git a/tensorflow/python/kernel_tests/logging_ops_logging_level_test.py b/tensorflow/python/kernel_tests/logging_ops_logging_level_test.py
index 252090b7bd..0e8197dccb 100644
--- a/tensorflow/python/kernel_tests/logging_ops_logging_level_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_logging_level_test.py
@@ -31,7 +31,7 @@ class PrintV2LoggingLevelTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintOneTensorLogInfo(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(
@@ -43,7 +43,7 @@ class PrintV2LoggingLevelTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintOneTensorLogWarning(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(
@@ -55,7 +55,7 @@ class PrintV2LoggingLevelTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintOneTensorLogError(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(
diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index b24a0d0f9b..4beddd00bb 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -69,7 +69,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintOneTensor(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor)
@@ -80,7 +80,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintOneTensorVarySummarize(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor, summarize=1)
@@ -89,7 +89,7 @@ class PrintV2Test(test.TestCase):
       expected = "[0 ... 9]"
       self.assertTrue((expected + "\n") in printed.contents())
 
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor, summarize=2)
@@ -98,7 +98,7 @@ class PrintV2Test(test.TestCase):
       expected = "[0 1 ... 8 9]"
       self.assertTrue((expected + "\n") in printed.contents())
 
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor, summarize=3)
@@ -107,7 +107,7 @@ class PrintV2Test(test.TestCase):
       expected = "[0 1 2 ... 7 8 9]"
       self.assertTrue((expected + "\n") in printed.contents())
 
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor, summarize=-1)
@@ -118,7 +118,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintOneVariable(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(math_ops.range(10))
       if not context.executing_eagerly():
         variables.global_variables_initializer().run()
@@ -130,7 +130,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintTwoVariablesInStructWithAssignAdd(self):
-    with self.test_session():
+    with self.cached_session():
       var_one = variables.Variable(2.14)
       plus_one = var_one.assign_add(1.0)
       var_two = variables.Variable(math_ops.range(10))
@@ -145,7 +145,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintTwoTensors(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor, tensor * 10)
@@ -155,7 +155,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintPlaceholderGeneration(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2("{}6", {"{}": tensor * 10})
@@ -165,7 +165,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintNoTensors(self):
-    with self.test_session():
+    with self.cached_session():
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(23, [23, 5], {"6": 12})
         self.evaluate(print_op)
@@ -174,7 +174,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintFloatScalar(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = ops.convert_to_tensor(434.43)
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor)
@@ -184,7 +184,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintStringScalar(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = ops.convert_to_tensor("scalar")
       with self.captureWritesToStream(sys.stderr) as printed:
         print_op = logging_ops.print_v2(tensor)
@@ -194,7 +194,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintComplexTensorStruct(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       small_tensor = constant_op.constant([0.3, 12.4, -16.1])
       big_tensor = math_ops.mul(tensor, 10)
@@ -214,7 +214,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintSparseTensor(self):
-    with self.test_session():
+    with self.cached_session():
       ind = [[0, 0], [1, 0], [1, 3], [4, 1], [1, 4], [3, 2], [3, 3]]
       val = [0, 10, 13, 4, 14, 32, 33]
       shape = [5, 6]
@@ -238,7 +238,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintSparseTensorInDataStruct(self):
-    with self.test_session():
+    with self.cached_session():
       ind = [[0, 0], [1, 0], [1, 3], [4, 1], [1, 4], [3, 2], [3, 3]]
       val = [0, 10, 13, 4, 14, 32, 33]
       shape = [5, 6]
@@ -262,7 +262,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testPrintOneTensorStdout(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.captureWritesToStream(sys.stdout) as printed:
         print_op = logging_ops.print_v2(
@@ -273,7 +273,7 @@ class PrintV2Test(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testInvalidOutputStreamRaisesError(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       with self.assertRaises(ValueError):
         print_op = logging_ops.print_v2(
@@ -281,13 +281,13 @@ class PrintV2Test(test.TestCase):
         self.evaluate(print_op)
 
   def testPrintOpName(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       print_op = logging_ops.print_v2(tensor, name="print_name")
       self.assertEqual(print_op.name, "print_name")
 
   def testNoDuplicateFormatOpGraphModeAfterExplicitFormat(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       formatted_string = string_ops.string_format("{}", tensor)
       print_op = logging_ops.print_v2(formatted_string)
@@ -298,7 +298,7 @@ class PrintV2Test(test.TestCase):
       self.assertEqual(len(format_ops), 1)
 
   def testPrintOneTensorEagerOnOpCreate(self):
-    with self.test_session():
+    with self.cached_session():
       with context.eager_mode():
         tensor = math_ops.range(10)
         expected = "[0 1 2 ... 7 8 9]"
diff --git a/tensorflow/python/kernel_tests/string_format_op_test.py b/tensorflow/python/kernel_tests/string_format_op_test.py
index afa71db909..74a5072bab 100644
--- a/tensorflow/python/kernel_tests/string_format_op_test.py
+++ b/tensorflow/python/kernel_tests/string_format_op_test.py
@@ -34,14 +34,14 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorOneDim(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       format_output = string_ops.string_format("{}", tensor)
       out = self.evaluate(format_output)
       expected = "[0 1 2 ... 7 8 9]"
       self.assertEqual(compat.as_text(out), expected)
 
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(10)
       format_output = string_ops.string_format("{}", [tensor])
       out = self.evaluate(format_output)
@@ -50,7 +50,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneVariableScalar(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(3.34)
       format_output = string_ops.string_format("{}", [var])
       if not context.executing_eagerly():
@@ -61,7 +61,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneVariableOneDim(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(math_ops.range(10))
       format_output = string_ops.string_format("{}", [var])
       if not context.executing_eagerly():
@@ -72,7 +72,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatTwoVariablesWithAssignAdd(self):
-    with self.test_session():
+    with self.cached_session():
       var_one = variables.Variable(2.14)
       plus_one = var_one.assign_add(1.0)
       var_two = variables.Variable(math_ops.range(10))
@@ -86,7 +86,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorOneDimFloat(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = constant_op.constant([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7])
       format_output = string_ops.string_format("{}", tensor)
       out = self.evaluate(format_output)
@@ -95,7 +95,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorOneDimMatchesSummarize(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(6)
       format_output = string_ops.string_format("{}", tensor, summarize=3)
       out = self.evaluate(format_output)
@@ -104,28 +104,28 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorOneDimVarySummarize(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(6)
       format_output = string_ops.string_format("{}", tensor, summarize=-1)
       out = self.evaluate(format_output)
       expected = "[0 1 2 3 4 5]"
       self.assertEqual(compat.as_text(out), expected)
 
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(6)
       format_output = string_ops.string_format("{}", tensor, summarize=1)
       out = self.evaluate(format_output)
       expected = "[0 ... 5]"
       self.assertEqual(compat.as_text(out), expected)
 
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(6)
       format_output = string_ops.string_format("{}", tensor, summarize=2)
       out = self.evaluate(format_output)
       expected = "[0 1 ... 4 5]"
       self.assertEqual(compat.as_text(out), expected)
 
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(6)
       format_output = string_ops.string_format("{}", tensor, summarize=10)
       out = self.evaluate(format_output)
@@ -134,7 +134,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorOneDimAlmostSummarize(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = math_ops.range(5)
       format_output = string_ops.string_format("{}", tensor, summarize=3)
       out = self.evaluate(format_output)
@@ -143,7 +143,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorTwoDimLessThanSummarize(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(4), [2, 2])
       format_output = string_ops.string_format("{}", tensor, summarize=3)
       out = self.evaluate(format_output)
@@ -153,7 +153,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorTwoDim(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("{}", tensor)
       out = self.evaluate(format_output)
@@ -168,7 +168,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorTwoDimSummarizeTwo(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("{}", tensor, summarize=2)
       out = self.evaluate(format_output)
@@ -181,7 +181,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorThreeDim(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(1000), [10, 10, 10])
       format_output = string_ops.string_format("{}", tensor)
       out = self.evaluate(format_output)
@@ -237,7 +237,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorTemplatePrefix(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("tensor summary: {}", tensor)
       out = self.evaluate(format_output)
@@ -252,7 +252,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorTemplatePrefixAndSuffix(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("tensor summary: {}, suffix",
                                                tensor)
@@ -268,7 +268,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatOneTensorTemplateSuffix(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("{}, suffix", tensor)
       out = self.evaluate(format_output)
@@ -283,7 +283,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatNoTensor(self):
-    with self.test_session():
+    with self.cached_session():
       format_output = string_ops.string_format("No tensor.", ())
       out = self.evaluate(format_output)
       expected = "No tensor."
@@ -291,7 +291,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatMultiTensor(self):
-    with self.test_session():
+    with self.cached_session():
       tensor_one = array_ops.reshape(math_ops.range(100), [10, 10])
       tensor_two = tensor_one * 10
       format_output = string_ops.string_format("One: {},\nTwo: {}",
@@ -315,7 +315,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatSummarizeOne(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("tensor summary: {}", tensor,
                                                summarize=1)
@@ -327,7 +327,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatSummarizeTwo(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("tensor summary: {}", tensor,
                                                summarize=2)
@@ -341,7 +341,7 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testFormatPlaceholder(self):
-    with self.test_session():
+    with self.cached_session():
       tensor = array_ops.reshape(math_ops.range(100), [10, 10])
       format_output = string_ops.string_format("tensor summary: %t%", tensor,
                                                placeholder="%t%")
@@ -357,21 +357,21 @@ class StringFormatOpTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testTensorCountMustMatchPlaceholderCount(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError, r"2 placeholder\(s\) in template does not match 1 "
                       r"tensor\(s\) provided as input"):
         tensor = math_ops.range(10)
         format_output = string_ops.string_format("{} {}", tensor)
         self.evaluate(format_output)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError, r"2 placeholder\(s\) in template does not match 1 "
                       r"tensor\(s\) provided as input"):
         tensor = math_ops.range(10)
         format_output = string_ops.string_format("{} {}", [tensor])
         self.evaluate(format_output)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError, r"1 placeholder\(s\) in template does not match 2 "
                       r"tensor\(s\) provided as input"):
diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
index 0c3b72408e..3a070544e8 100644
--- a/tensorflow/python/kernel_tests/while_v2_test.py
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -41,7 +41,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     x = constant_op.constant(2.)
     ret = while_loop_v2(lambda v: v < 8., lambda v: v * v, [x])
     grad = gradients_impl.gradients(ret, [x])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(ret), 16.)
       self.assertSequenceEqual(sess.run(grad), [32.])
 
@@ -58,7 +58,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
 
     # Note: This is simply d_ret[0]/d_x since d_ret[1]/d_x is 0.
     grad = gradients_impl.gradients(ret, [x])  # [2*x*y]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertSequenceEqual(sess.run(ret), [45., 3.])
       self.assertSequenceEqual(sess.run(grad), [9.])
 
@@ -81,7 +81,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     grady_0 = gradients_impl.gradients(ret[0], [y])  # [2*x*y + x**2]
     grady_1 = gradients_impl.gradients(ret[1], [y])  # [x + 1]
     grady_2 = gradients_impl.gradients(ret, [y])  # [2*x*y + x**2 + x + 1]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertSequenceEqual(sess.run(ret), [120., 23.])
       self.assertSequenceEqual(sess.run(gradx_0), [39.])
       self.assertSequenceEqual(sess.run(gradx_1), [4.])
@@ -96,7 +96,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     ret2 = while_loop_v2(lambda v: v < 16., lambda v: v * v, ret1)  # x**4
     grad = gradients_impl.gradients(ret2, [x])  # 4x**3
     grad_grad = gradients_impl.gradients(grad, [x])  # 12x**2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertSequenceEqual(sess.run(grad), [32.])
       self.assertSequenceEqual(sess.run(grad_grad), [48.])
 
@@ -105,7 +105,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     ret = while_loop_v2(lambda v: v < 8., lambda v: v**2, [x])  # x**4
     grad = gradients_impl.gradients(ret, [x])  # 4x**3
     grad_grad = gradients_impl.gradients(grad, [x])  # 12x**2
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(ret), 16.)
       self.assertSequenceEqual(sess.run(grad), [32.])
       self.assertSequenceEqual(sess.run(grad_grad), [48.])
@@ -148,7 +148,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     y = constant_op.constant(1.)
     ret = while_loop_v2(lambda v: v + y < 9., lambda v: v * 3., [x])
     grad = gradients_impl.gradients(ret, [x])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(ret), 18.)
       self.assertSequenceEqual(sess.run(grad), [9.])
 
@@ -157,7 +157,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     y = constant_op.constant(3.)
     ret = while_loop_v2(lambda v: v < 8., lambda v: v * y, [x])
     grad = gradients_impl.gradients(ret, [x])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(ret), 18.)
       self.assertSequenceEqual(sess.run(grad), [9.])
 
@@ -178,7 +178,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
 
     ret = while_loop_v2(Cond, Body, [x, tensor_list])
     grad = gradients_impl.gradients(ret[0], x)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(ret[0]), 16.)
       self.assertSequenceEqual(sess.run(grad), [32.])
 
@@ -212,7 +212,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     self.assertEqual(accumulator_count, 1)
 
     grad = gradients_impl.gradients(ret[0], x)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self.assertEqual(sess.run(ret[0]), 16.)
       self.assertSequenceEqual(sess.run(grad), [32.])
 
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index da45f6e3e6..35fdee4fad 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -3673,7 +3673,7 @@ class NonMaxSuppressionTest(test_util.TensorFlowTestCase):
     # Note: There are multiple versions of non_max_suppression v2, v3, v4.
     # gen_image_ops.non_max_suppression_v2:
     for dtype in [np.float16, np.float32]:
-      with self.test_session():
+      with self.cached_session():
         boxes = constant_op.constant(boxes_np, dtype=dtype)
         scores = constant_op.constant(scores_np, dtype=dtype)
         max_output_size = constant_op.constant(max_output_size_np)
@@ -3683,7 +3683,7 @@ class NonMaxSuppressionTest(test_util.TensorFlowTestCase):
         self.assertAllClose(selected_indices, [3, 0, 5])
     # image_ops.non_max_suppression = gen_image_ops.non_max_suppression_v3.
     for dtype in [np.float16, np.float32]:
-      with self.test_session():
+      with self.cached_session():
         boxes = constant_op.constant(boxes_np, dtype=dtype)
         scores = constant_op.constant(scores_np, dtype=dtype)
         max_output_size = constant_op.constant(max_output_size_np)
@@ -3694,7 +3694,7 @@ class NonMaxSuppressionTest(test_util.TensorFlowTestCase):
     # gen_image_ops.non_max_suppression_v4.
     score_threshold = float('-inf')
     for dtype in [np.float16, np.float32]:
-      with self.test_session():
+      with self.cached_session():
         boxes = constant_op.constant(boxes_np, dtype=dtype)
         scores = constant_op.constant(scores_np, dtype=dtype)
         max_output_size = constant_op.constant(max_output_size_np)
diff --git a/tensorflow/python/training/ftrl_test.py b/tensorflow/python/training/ftrl_test.py
index 09d6fe36d3..15c50bc878 100644
--- a/tensorflow/python/training/ftrl_test.py
+++ b/tensorflow/python/training/ftrl_test.py
@@ -218,7 +218,7 @@ class FtrlOptimizerTest(test.TestCase):
   def testFtrlWithL1_L2_L2ShrinkageSparse(self):
     """Tests the new FTRL op with support for l2 shrinkage on sparse grads."""
     for dtype in [dtypes.half, dtypes.float32]:
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
         var1 = variables.Variable([[4.0], [3.0]], dtype=dtype)
         grads0 = ops.IndexedSlices(
@@ -252,7 +252,7 @@ class FtrlOptimizerTest(test.TestCase):
   def testFtrlWithL2ShrinkageDoesNotChangeLrSchedule(self):
     """Verifies that l2 shrinkage in FTRL does not change lr schedule."""
     for dtype in [dtypes.half, dtypes.float32]:
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([1.0, 2.0], dtype=dtype)
         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
diff --git a/tensorflow/python/training/learning_rate_decay_v2_test.py b/tensorflow/python/training/learning_rate_decay_v2_test.py
index 0f2d60dafc..b2ac93f06f 100644
--- a/tensorflow/python/training/learning_rate_decay_v2_test.py
+++ b/tensorflow/python/training/learning_rate_decay_v2_test.py
@@ -62,7 +62,7 @@ class LRDecayTestV2(test_util.TensorFlowTestCase):
       self.assertAllClose(self.evaluate(decayed_lr()), expected, 1e-6)
 
   def testVariables(self):
-    with self.test_session():
+    with self.cached_session():
       step = variables.Variable(1)
       assign_1 = step.assign(1)
       assign_2 = step.assign(2)
-- 
GitLab


From f361fb8e4b4a9838e60a11ab45391c308bcb90da Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 24 Sep 2018 12:07:58 -0700
Subject: [PATCH 0607/1357] Further simplify the cuDNN wrappers. Instead of
 passing around CudnnConvParams, just pass around the HloInstruction.

This is based on the observation that most code doesn't care about the
convolution semantics like which operand is input vs filter vs output.
In fact, only layout assignment and conv runner care about them.

PiperOrigin-RevId: 214307399
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  6 +-
 .../xla/service/gpu/convolution_thunk.cc      | 49 ++--------
 .../gpu/cudnn_convolution_algorithm_picker.cc | 72 ++++++---------
 .../gpu/cudnn_convolution_algorithm_picker.h  |  2 +-
 .../service/gpu/cudnn_convolution_runner.cc   | 92 ++++++++++++++++---
 .../service/gpu/cudnn_convolution_runner.h    | 55 ++---------
 .../xla/service/gpu/gpu_layout_assignment.cc  | 81 ++++++++--------
 .../xla/service/gpu/gpu_layout_assignment.h   |  3 +-
 .../xla/service/gpu/ir_emission_utils.cc      | 55 +++++------
 .../xla/service/gpu/ir_emission_utils.h       | 31 +++++--
 10 files changed, 219 insertions(+), 227 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index cbee4db06e..7231fd844e 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -371,7 +371,6 @@ cc_library(
     hdrs = ["ir_emission_utils.h"],
     deps = [
         ":backend_configs",
-        ":cudnn_convolution_runner",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:window_util",
@@ -412,6 +411,8 @@ cc_library(
     srcs = ["cudnn_convolution_runner.cc"],
     hdrs = ["cudnn_convolution_runner.h"],
     deps = [
+        ":backend_configs",
+        ":ir_emission_utils",
         ":stream_executor_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status",
@@ -420,8 +421,10 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/core:stream_executor_no_cuda",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -781,6 +784,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
         "//tensorflow/compiler/xla/service:layout_assignment",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index 85f3682a5a..4effea637d 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -44,52 +44,23 @@ ConvolutionThunk::ConvolutionThunk(
 Status ConvolutionThunk::ExecuteOnStream(
     const BufferAllocations& buffer_allocations, se::Stream* stream,
     HloExecutionProfiler* profiler) {
-  CudnnConvParams params;
-  TF_RETURN_IF_ERROR(PopulateCudnnConvParams(cudnn_call_, &params));
-
-  switch (params.kind) {
-    case CudnnConvKind::kForward:
-      params.input_buf =
-          buffer_allocations.GetDeviceAddress(operand_buffers_[0]);
-      params.filter_buf =
-          buffer_allocations.GetDeviceAddress(operand_buffers_[1]);
-      params.output_buf = buffer_allocations.GetDeviceAddress(result_buffer_);
-      break;
-    case CudnnConvKind::kBackwardInput:
-      params.input_buf = buffer_allocations.GetDeviceAddress(result_buffer_);
-      params.filter_buf =
-          buffer_allocations.GetDeviceAddress(operand_buffers_[1]);
-      params.output_buf =
-          buffer_allocations.GetDeviceAddress(operand_buffers_[0]);
-      break;
-    case CudnnConvKind::kBackwardFilter:
-      params.input_buf =
-          buffer_allocations.GetDeviceAddress(operand_buffers_[0]);
-      params.filter_buf = buffer_allocations.GetDeviceAddress(result_buffer_);
-      params.output_buf =
-          buffer_allocations.GetDeviceAddress(operand_buffers_[1]);
-      break;
+  std::vector<se::DeviceMemoryBase> operand_se_buffers;
+  for (const auto& buffer : operand_buffers_) {
+    operand_se_buffers.push_back(buffer_allocations.GetDeviceAddress(buffer));
   }
 
+  se::DeviceMemoryBase result_buffer =
+      buffer_allocations.GetDeviceAddress(result_buffer_);
+
   se::DeviceMemoryBase scratch =
       buffer_allocations.GetDeviceAddress(scratch_buffer_);
 
   auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction());
-  TF_RETURN_IF_ERROR(RunCudnnConvolution(params, scratch, stream));
+  TF_RETURN_IF_ERROR(RunCudnnConvolution(cudnn_call_,
+                                         absl::MakeSpan(operand_se_buffers),
+                                         result_buffer, scratch, stream));
 
-  // Figure out which of output/input/filter is the result produced by
-  // this op, and write the result tuple.
-  void* result_ptr = [&] {
-    switch (params.kind) {
-      case CudnnConvKind::kForward:
-        return params.output_buf.opaque();
-      case CudnnConvKind::kBackwardInput:
-        return params.input_buf.opaque();
-      case CudnnConvKind::kBackwardFilter:
-        return params.filter_buf.opaque();
-    }
-  }();
-  void* ptrs[] = {result_ptr, scratch.opaque()};
+  void* ptrs[] = {result_buffer.opaque(), scratch.opaque()};
   se::DeviceMemory<void*> tuple_addr(
       buffer_allocations.GetDeviceAddress(tuple_result_buffer_));
   stream->ThenMemcpyH2D<void*>(ptrs, &tuple_addr);
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index 9eee9ebbd7..391456576f 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -146,19 +146,11 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) {
 // caching would speed up compilation a lot.
 StatusOr<std::tuple<int64, bool, int64>>
 CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
-    const HloCustomCallInstruction* instr) {
-  CudnnConvParams params;
-  TF_RETURN_IF_ERROR(PopulateCudnnConvParams(instr, &params));
-
-  const Shape& input_shape = *params.input_shape;
-  const Shape& filter_shape = *params.filter_shape;
-  const Shape& output_shape = *params.output_shape;
-
-  CHECK_EQ(input_shape.element_type(), filter_shape.element_type());
-  CHECK_EQ(input_shape.element_type(), output_shape.element_type());
+    HloCustomCallInstruction* instr) {
   // TODO(timshen): for now only check fp16. It can be expanded to other types,
   // with some work on the HLO routines.
-  const bool cross_check_enabled = input_shape.element_type() == xla::F16;
+  const bool cross_check_enabled =
+      instr->shape().tuple_shapes(0).element_type() == xla::F16;
 
   // Don't run this function concurrently on the same GPU.
   //
@@ -226,48 +218,43 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
   // use a ScratchAllocator for this instead of calling allocator_ directly so
   // that our allocations don't leak.
   ScratchAllocator input_output_allocator(device_ordinal, allocator);
-  TF_ASSIGN_OR_RETURN(params.input_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(input_shape)));
-  TF_ASSIGN_OR_RETURN(params.filter_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(filter_shape)));
-  TF_ASSIGN_OR_RETURN(params.output_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(output_shape)));
-
-  initialize_buffer(params.input_buf);
-  initialize_buffer(params.filter_buf);
-  initialize_buffer(params.output_buf);
-
-  DeviceMemoryBase* result_buf = [&] {
-    switch (params.kind) {
-      case CudnnConvKind::kBackwardFilter:
-        return &params.filter_buf;
-      case CudnnConvKind::kBackwardInput:
-        return &params.input_buf;
-      case CudnnConvKind::kForward:
-        return &params.output_buf;
-    }
-  }();
+  std::vector<se::DeviceMemoryBase> operand_buffers;
+  for (const auto* operand : instr->operands()) {
+    TF_ASSIGN_OR_RETURN(auto buffer,
+                        input_output_allocator.AllocateBytes(
+                            &stream, ShapeUtil::ByteSizeOf(operand->shape())));
+    initialize_buffer(buffer);
+    operand_buffers.push_back(buffer);
+  }
+  TF_ASSIGN_OR_RETURN(
+      auto result_buffer,
+      input_output_allocator.AllocateBytes(
+          &stream, ShapeUtil::ByteSizeOf(instr->shape().tuple_shapes(0))));
+  initialize_buffer(result_buffer);
 
   se::dnn::ProfileResult best_result;
   int64 best_result_bytes_used = 0;
+  TF_ASSIGN_OR_RETURN(auto backend_config,
+                      instr->backend_config<CudnnConvBackendConfig>());
 
   optional<F16BufferComparator> comparator;
   // Use the first algorithm that's supported as reference. There isn't a
   // particular reason to use it, as any algorithm sufficies. It doesn't make
   // this algorithm considered correct, though.
   optional<AlgorithmDesc> first_algorithm;
-  for (const AlgorithmDesc& alg : GetAlgorithms(params.kind, stream_exec_)) {
+  TF_ASSIGN_OR_RETURN(CudnnConvKind kind, GetCudnnConvKind(instr));
+  for (const AlgorithmDesc& alg : GetAlgorithms(kind, stream_exec_)) {
     ScratchAllocator scratch_allocator(device_ordinal, allocator);
     se::dnn::ProfileResult profile_result;
     VLOG(3) << "Trying algorithm " << AlgorithmToString(alg) << " for "
             << instr->ToString();
 
-    params.algorithm = AlgorithmConfig(alg);
-    bool launch_ok = RunCudnnConvolution(params, &scratch_allocator, &stream,
-                                         &profile_result)
+    backend_config.set_algorithm(alg.algo_id());
+    backend_config.set_tensor_ops_enabled(alg.tensor_ops_enabled());
+    TF_RETURN_IF_ERROR(instr->set_backend_config(backend_config));
+    bool launch_ok = RunCudnnConvolution(instr, absl::MakeSpan(operand_buffers),
+                                         result_buffer, &scratch_allocator,
+                                         &stream, &profile_result)
                          .ok();
 
     if (launch_ok && profile_result.is_valid()) {
@@ -278,7 +265,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
               .xla_gpu_crash_on_verification_failures();
       if (comparator.has_value()) {
         StatusOr<bool> result = comparator->CompareEqual(
-            se::DeviceMemory<Eigen::half>(*result_buf));
+            se::DeviceMemory<Eigen::half>(result_buffer));
         if (!result.ok()) {
           LOG(ERROR) << "Unable to compare "
                      << AlgorithmToString(*first_algorithm) << " against "
@@ -296,7 +283,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
         }
       } else if (cross_check_enabled) {
         auto comp = F16BufferComparator::Create(
-            se::DeviceMemory<Eigen::half>(*result_buf), compiler_, allocator,
+            se::DeviceMemory<Eigen::half>(result_buffer), compiler_, allocator,
             &stream);
         if (comp.ok()) {
           comparator.emplace(comp.ConsumeValueOrDie());
@@ -370,7 +357,8 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
       ShapeUtil::MakeTupleShape({instr->shape().tuple_shapes(0),
                                  ShapeUtil::MakeShape(U8, {scratch_bytes})});
 
-  CudnnConvBackendConfig backend_config;
+  TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
+                      instr->backend_config<CudnnConvBackendConfig>());
   backend_config.set_algorithm(algorithm);
   backend_config.set_tensor_ops_enabled(tensor_ops_enabled);
 
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
index ce0189543c..aeda2fc7f8 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
@@ -50,7 +50,7 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass {
   StatusOr<bool> RunOnComputation(HloComputation* computation);
   StatusOr<bool> RunOnInstruction(HloInstruction* instr);
   StatusOr<std::tuple<int64, bool, int64>> PickBestAlgorithm(
-      const HloCustomCallInstruction* instr);
+      HloCustomCallInstruction* instr);
 
   se::StreamExecutor* stream_exec_;                   // never null
   DeviceMemoryAllocator* allocator_;                  // may be null
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
index 3310ee848e..32d67084b3 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
@@ -16,6 +16,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -37,6 +39,20 @@ using se::dnn::FilterDescriptor;
 using se::dnn::FilterLayout;
 using se::dnn::ProfileResult;
 
+struct CudnnConvParams {
+  CudnnConvKind kind;
+  const Shape* input_shape;
+  const Shape* filter_shape;
+  const Shape* output_shape;
+  se::DeviceMemoryBase input_buf;
+  se::DeviceMemoryBase filter_buf;
+  se::DeviceMemoryBase output_buf;
+  const Window* window;
+  const ConvolutionDimensionNumbers* dnums;
+  int64 feature_group_count;
+  se::dnn::AlgorithmConfig algorithm;
+};
+
 // A StreamExecutor ScratchAllocator that wraps a single XLA allocation,
 // returning it (in its entirety) the first time Allocate() is called.
 class ScratchBufAllocator : public se::ScratchAllocator {
@@ -214,32 +230,80 @@ Status RunCudnnConvolutionImpl(CudnnConvParams params,
   return Status::OK();
 }
 
-}  // anonymous namespace
+// Returns the cudnn convolution parameters generated from conv, which must be a
+// custom-call to a cudnn convolution.
+StatusOr<CudnnConvParams> GetCudnnConvParams(
+    const HloCustomCallInstruction* conv,
+    absl::Span<se::DeviceMemoryBase> operand_buffers,
+    se::DeviceMemoryBase result_buffer) {
+  CudnnConvParams params;
 
-string CudnnConvKindToString(CudnnConvKind kind) {
-  switch (kind) {
-    case CudnnConvKind::kForward:
-      return "forward";
-    case CudnnConvKind::kBackwardFilter:
-      return "backward_filter";
-    case CudnnConvKind::kBackwardInput:
-      return "backward_input";
+  TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
+                      conv->backend_config<CudnnConvBackendConfig>());
+  const auto& target = conv->custom_call_target();
+  const auto& lhs_shape = conv->operand(0)->shape();
+  const auto& rhs_shape = conv->operand(1)->shape();
+  const auto& conv_result_shape = conv->shape().tuple_shapes(0);
+
+  params.window = &conv->window();
+  params.dnums = &conv->convolution_dimension_numbers();
+  params.feature_group_count = conv->feature_group_count();
+  params.algorithm = se::dnn::AlgorithmConfig(se::dnn::AlgorithmDesc(
+      backend_config.algorithm(), backend_config.tensor_ops_enabled()));
+
+  if (target == kCudnnConvForwardCallTarget) {
+    params.kind = CudnnConvKind::kForward;
+    params.input_shape = &lhs_shape;
+    params.filter_shape = &rhs_shape;
+    params.output_shape = &conv_result_shape;
+    params.input_buf = operand_buffers[0];
+    params.filter_buf = operand_buffers[1];
+    params.output_buf = result_buffer;
+  } else if (target == kCudnnConvBackwardInputCallTarget) {
+    params.kind = CudnnConvKind::kBackwardInput;
+    params.input_shape = &conv_result_shape;
+    params.filter_shape = &rhs_shape;
+    params.output_shape = &lhs_shape;
+    params.input_buf = result_buffer;
+    params.filter_buf = operand_buffers[1];
+    params.output_buf = operand_buffers[0];
+  } else if (target == kCudnnConvBackwardFilterCallTarget) {
+    params.kind = CudnnConvKind::kBackwardFilter;
+    params.input_shape = &lhs_shape;
+    params.filter_shape = &conv_result_shape;
+    params.output_shape = &rhs_shape;
+    params.input_buf = operand_buffers[0];
+    params.filter_buf = result_buffer;
+    params.output_buf = operand_buffers[1];
+  } else {
+    return InternalError("Unexpected custom call target: %s", target);
   }
+  return params;
 }
 
-Status RunCudnnConvolution(CudnnConvParams params,
+}  // anonymous namespace
+
+Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
+                           absl::Span<se::DeviceMemoryBase> operand_buffers,
+                           se::DeviceMemoryBase result_buffer,
                            se::DeviceMemoryBase scratch_buf, se::Stream* stream,
                            se::dnn::ProfileResult* profile_result) {
   ScratchBufAllocator scratch_allocator(scratch_buf);
-  return RunCudnnConvolution(params, &scratch_allocator, stream,
-                             profile_result);
+  return RunCudnnConvolution(conv, operand_buffers, result_buffer,
+                             &scratch_allocator, stream, profile_result);
 }
 
-Status RunCudnnConvolution(CudnnConvParams params,
+Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
+                           absl::Span<se::DeviceMemoryBase> operand_buffers,
+                           se::DeviceMemoryBase result_buffer,
                            se::ScratchAllocator* scratch_allocator,
                            se::Stream* stream,
                            se::dnn::ProfileResult* profile_result) {
-  PrimitiveType output_primitive_type = params.output_shape->element_type();
+  TF_ASSIGN_OR_RETURN(CudnnConvParams params,
+                      GetCudnnConvParams(conv, operand_buffers, result_buffer));
+
+  PrimitiveType output_primitive_type =
+      conv->shape().tuple_shapes(0).element_type();
   switch (output_primitive_type) {
     case F16:
       return RunCudnnConvolutionImpl<Eigen::half>(params, scratch_allocator,
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h
index 381aa37a1b..61aec1cecc 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h
@@ -16,6 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
 
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -27,52 +30,8 @@ namespace gpu {
 
 // This file contains low-level routines for running cudnn convolutions.
 
-// Different types of convolutions supported by cudnn.
-//
-// A way to think about these is that a convolution is defined by three arrays
-// -- the "input", the "filter", and the "output" -- and given any two of these,
-// we can compute the third.  For example, a backward-input convolution takes as
-// input a filter and an "output" and produces an "input" such that if one were
-// to do a forward convolution of "input" using filter, the result would be
-// something with the same shape as "output".
-//
-// This way of thinking is not correct if you look at the values produced. For
-// example, a backward-input convolution is not actually the mathematical
-// inverse of a forward convolution.  But it's right as far as the shapes and
-// "connectivity" (i.e. which elements of the input affect which elements of
-// the output) are concerned.
-enum class CudnnConvKind {
-  kForward,         // input  + filter => output
-  kBackwardInput,   // filter + output => input
-  kBackwardFilter,  // input  + output => filter
-};
-
-struct CudnnConvParams {
-  CudnnConvKind kind;
-  const Shape* input_shape;
-  const Shape* filter_shape;
-  const Shape* output_shape;
-  se::DeviceMemoryBase input_buf;
-  se::DeviceMemoryBase filter_buf;
-  se::DeviceMemoryBase output_buf;
-  const Window* window;
-  const ConvolutionDimensionNumbers* dnums;
-  int64 feature_group_count;
-  se::dnn::AlgorithmConfig algorithm;
-};
-
-// Converts a CudnnConvKind value to a string.
-string CudnnConvKindToString(CudnnConvKind kind);
-
 // Calls into cudnn to run the specified convolution.
 //
-// Note that depending on the value of CudnnConvKind, the result of this call
-// may be written into input_buf, filter_buf, or output_buf!
-//
-// At the moment convolution with half data type is implemented with cudnn
-// PSEUDO_HALF configuration, that is, the input values are half and the
-// internal computation type is float.
-//
 // We provide one overload which takes a scratch buffer, and another which takes
 // an allocator which is responsible for allocating the scratch space.  In
 // theory the second one shouldn't be necessary -- users of this function could
@@ -83,11 +42,15 @@ string CudnnConvKindToString(CudnnConvKind kind);
 // allocator and take note of how much memory is used.  The next time you call
 // the same conv, you can provide an explicitly preallocated scratch buffer of
 // that size, if you like.
-Status RunCudnnConvolution(CudnnConvParams params,
+Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
+                           absl::Span<se::DeviceMemoryBase> operand_buffers,
+                           se::DeviceMemoryBase result_buffer,
                            se::DeviceMemoryBase scratch_buf, se::Stream* stream,
                            se::dnn::ProfileResult* profile_result = nullptr);
 
-Status RunCudnnConvolution(CudnnConvParams params,
+Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
+                           absl::Span<se::DeviceMemoryBase> operand_buffers,
+                           se::DeviceMemoryBase result_buffer,
                            se::ScratchAllocator* scratch_allocator,
                            se::Stream* stream,
                            se::dnn::ProfileResult* profile_result = nullptr);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index d033faee8d..06314e413e 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -21,8 +21,10 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/gpu_options.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@@ -90,27 +92,32 @@ HeuristicLayoutAssignment(const HloInstruction* instr,
 // operands and the output shape. Depending on the underlying algorithm, one of
 // { NCHW, NHWC } ^ 3 = 8 different layout combinations may be chosen.
 Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall(
-    HloInstruction* instr, LayoutConstraints* constraints) {
-  CHECK(IsCustomCallToDnnConvolution(*instr)) << instr->ToString();
-  Shape input_shape;
-  Shape filter_shape;
-  Shape output_shape;
-  const auto& target = instr->custom_call_target();
-  if (target == kCudnnConvForwardCallTarget) {
-    input_shape = instr->operand(0)->shape();
-    filter_shape = instr->operand(1)->shape();
-    output_shape = instr->shape().tuple_shapes(0);
-  } else if (target == kCudnnConvBackwardInputCallTarget) {
-    input_shape = instr->shape().tuple_shapes(0);
-    filter_shape = instr->operand(1)->shape();
-    output_shape = instr->operand(0)->shape();
-  } else if (target == kCudnnConvBackwardFilterCallTarget) {
-    input_shape = instr->operand(0)->shape();
-    filter_shape = instr->shape().tuple_shapes(0);
-    output_shape = instr->operand(1)->shape();
-  } else {
-    LOG(FATAL) << "Unexpected custom call target: "
-               << instr->custom_call_target();
+    HloCustomCallInstruction* instr, LayoutConstraints* constraints) {
+  Shape lhs_shape = instr->operand(0)->shape();
+  Shape rhs_shape = instr->operand(1)->shape();
+  Shape result_shape = instr->shape().tuple_shapes(0);
+
+  Shape* input_shape;
+  Shape* filter_shape;
+  Shape* output_shape;
+
+  TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(instr));
+  switch (kind) {
+    case CudnnConvKind::kForward:
+      input_shape = &lhs_shape;
+      filter_shape = &rhs_shape;
+      output_shape = &result_shape;
+      break;
+    case CudnnConvKind::kBackwardInput:
+      input_shape = &result_shape;
+      filter_shape = &rhs_shape;
+      output_shape = &lhs_shape;
+      break;
+    case CudnnConvKind::kBackwardFilter:
+      input_shape = &lhs_shape;
+      filter_shape = &result_shape;
+      output_shape = &rhs_shape;
+      break;
   }
 
   {
@@ -127,8 +134,9 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall(
     }
 
     TF_ASSIGN_OR_RETURN(
-        std::tie(*input_shape.mutable_layout(), *filter_shape.mutable_layout(),
-                 *output_shape.mutable_layout()),
+        std::tie(*input_shape->mutable_layout(),
+                 *filter_shape->mutable_layout(),
+                 *output_shape->mutable_layout()),
         StreamExecutorConvLayoutsToXlaLayouts(
             instr->convolution_dimension_numbers(), input, filter, output));
   }
@@ -141,25 +149,10 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall(
                           instr, /*index=*/{0}));
 
   // Set layouts of the instructions' shapes.
-  if (target == kCudnnConvForwardCallTarget) {
-    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(input_shape, instr, 0));
-    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(filter_shape, instr, 1));
-    TF_RETURN_IF_ERROR(
-        constraints->SetBufferLayout(output_shape.layout(), *call_result_buf));
-  } else if (target == kCudnnConvBackwardInputCallTarget) {
-    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(output_shape, instr, 0));
-    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(filter_shape, instr, 1));
-    TF_RETURN_IF_ERROR(
-        constraints->SetBufferLayout(input_shape.layout(), *call_result_buf));
-  } else if (target == kCudnnConvBackwardFilterCallTarget) {
-    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(input_shape, instr, 0));
-    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(output_shape, instr, 1));
-    TF_RETURN_IF_ERROR(
-        constraints->SetBufferLayout(filter_shape.layout(), *call_result_buf));
-  } else {
-    LOG(FATAL) << "Unexpected custom call target: "
-               << instr->custom_call_target();
-  }
+  TF_RETURN_IF_ERROR(constraints->SetOperandLayout(lhs_shape, instr, 0));
+  TF_RETURN_IF_ERROR(constraints->SetOperandLayout(rhs_shape, instr, 1));
+  TF_RETURN_IF_ERROR(
+      constraints->SetBufferLayout(result_shape.layout(), *call_result_buf));
   return Status::OK();
 }
 
@@ -173,8 +166,8 @@ Status GpuLayoutAssignment::AddBackendConstraints(
        ++iterator) {
     HloInstruction* instruction = *iterator;
     if (IsCustomCallToDnnConvolution(*instruction)) {
-      TF_RETURN_IF_ERROR(
-          AddBackendConstraintsToDnnConvCustomCall(instruction, constraints));
+      TF_RETURN_IF_ERROR(AddBackendConstraintsToDnnConvCustomCall(
+          Cast<HloCustomCallInstruction>(instruction), constraints));
     }
 
     // For batched dot we require the default layout.
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index ce24af1cf8..e2b96a81d4 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_LAYOUT_ASSIGNMENT_H_
 
 #include "tensorflow/compiler/xla/service/computation_layout.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/layout_assignment.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
@@ -47,7 +48,7 @@ class GpuLayoutAssignment : public LayoutAssignment {
 
  private:
   Status AddBackendConstraintsToDnnConvCustomCall(
-      HloInstruction* instr, LayoutConstraints* constraints);
+      HloCustomCallInstruction* instr, LayoutConstraints* constraints);
 
   se::StreamExecutor* stream_executor_;
 };
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index 22f43bc08b..b57ac5fd09 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -288,41 +288,30 @@ llvm::Value* EmitFullWarpShuffleDown(llvm::Value* value, llvm::Value* offset,
       value->getType());
 }
 
-Status PopulateCudnnConvParams(const HloCustomCallInstruction* custom_call,
-                               CudnnConvParams* params) {
-  TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
-                      custom_call->backend_config<CudnnConvBackendConfig>());
-  const auto& target = custom_call->custom_call_target();
-  const auto& lhs_shape = custom_call->operand(0)->shape();
-  const auto& rhs_shape = custom_call->operand(1)->shape();
-  const auto& conv_result_shape = custom_call->shape().tuple_shapes(0);
-
-  params->window = &custom_call->window();
-  params->dnums = &custom_call->convolution_dimension_numbers();
-  params->feature_group_count = custom_call->feature_group_count();
-  params->algorithm = se::dnn::AlgorithmConfig(se::dnn::AlgorithmDesc(
-      backend_config.algorithm(), backend_config.tensor_ops_enabled()));
-
+StatusOr<CudnnConvKind> GetCudnnConvKind(
+    const HloCustomCallInstruction* instr) {
+  absl::string_view target = instr->custom_call_target();
   if (target == kCudnnConvForwardCallTarget) {
-    params->kind = CudnnConvKind::kForward;
-    params->input_shape = &lhs_shape;
-    params->filter_shape = &rhs_shape;
-    params->output_shape = &conv_result_shape;
-  } else if (target == kCudnnConvBackwardInputCallTarget) {
-    params->kind = CudnnConvKind::kBackwardInput;
-    params->input_shape = &conv_result_shape;
-    params->filter_shape = &rhs_shape;
-    params->output_shape = &lhs_shape;
-  } else if (target == kCudnnConvBackwardFilterCallTarget) {
-    params->kind = CudnnConvKind::kBackwardFilter;
-    params->input_shape = &lhs_shape;
-    params->filter_shape = &conv_result_shape;
-    params->output_shape = &rhs_shape;
-  } else {
-    LOG(FATAL) << "Unexpected custom call target: "
-               << custom_call->custom_call_target();
+    return CudnnConvKind::kForward;
+  }
+  if (target == kCudnnConvBackwardInputCallTarget) {
+    return CudnnConvKind::kBackwardInput;
+  }
+  if (target == kCudnnConvBackwardFilterCallTarget) {
+    return CudnnConvKind::kBackwardFilter;
+  }
+  return InternalError("Unexpected call target: %s", target);
+}
+
+string CudnnConvKindToString(CudnnConvKind kind) {
+  switch (kind) {
+    case CudnnConvKind::kForward:
+      return "forward";
+    case CudnnConvKind::kBackwardFilter:
+      return "backward_filter";
+    case CudnnConvKind::kBackwardInput:
+      return "backward_input";
   }
-  return Status::OK();
 }
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index 09c455cc1e..19bd3c6330 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -20,7 +20,6 @@ limitations under the License.
 
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Value.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 
@@ -30,6 +29,31 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
+// Different types of convolutions supported by cudnn.
+//
+// A way to think about these is that a convolution is defined by three arrays
+// -- the "input", the "filter", and the "output" -- and given any two of these,
+// we can compute the third.  For example, a backward-input convolution takes as
+// input a filter and an "output" and produces an "input" such that if one were
+// to do a forward convolution of "input" using filter, the result would be
+// something with the same shape as "output".
+//
+// This way of thinking is not correct if you look at the values produced. For
+// example, a backward-input convolution is not actually the mathematical
+// inverse of a forward convolution.  But it's right as far as the shapes and
+// "connectivity" (i.e. which elements of the input affect which elements of
+// the output) are concerned.
+enum class CudnnConvKind {
+  kForward,         // input  + filter => output
+  kBackwardInput,   // filter + output => input
+  kBackwardFilter,  // input  + output => filter
+};
+
+StatusOr<CudnnConvKind> GetCudnnConvKind(const HloCustomCallInstruction* instr);
+
+// Converts a CudnnConvKind value to a string.
+string CudnnConvKindToString(CudnnConvKind kind);
+
 constexpr int64 kWarpSize = 32;
 
 // Returns true if `hlo` will be implemented as a call to BLAS gemm.
@@ -150,11 +174,6 @@ llvm::Value* EmitPrintf(absl::string_view fmt,
 llvm::Value* EmitFullWarpShuffleDown(llvm::Value* value, llvm::Value* offset,
                                      llvm::IRBuilder<>* builder);
 
-// Populates params using conv, which must be a custom-call to a cudnn
-// convolution.  Does not modify any buffers in the params.
-Status PopulateCudnnConvParams(const HloCustomCallInstruction* custom_call,
-                               CudnnConvParams* params);
-
 }  // namespace gpu
 }  // namespace xla
 
-- 
GitLab


From 7919d64414ed47d217b8fc508d1be56b2a531a3c Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 24 Sep 2018 12:12:30 -0700
Subject: [PATCH 0608/1357] Wrap forward and backward pass in a defun for
 L2HMC.

Also a small bugfix to handle unknown shapes in backprop._num_elements.

Before:
entry {
  name: "L2hmcBenchmark.eager_train_cpu_defun"
  iters: 10
  wall_time: 0.594115018845
  extras {
    key: "examples_per_sec"
    value {
      double_value: 336.635152548
    }
  }
}

After:
entry {
  name: "L2hmcBenchmark.eager_train_cpu_defun"
  iters: 10
  wall_time: 0.322251081467
  extras {
    key: "examples_per_sec"
    value {
      double_value: 620.634069216
    }
  }
}
PiperOrigin-RevId: 214308142
---
 .../eager/python/examples/l2hmc/l2hmc_test.py | 26 +++++++++++--------
 tensorflow/python/eager/backprop.py           |  5 +++-
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py
index 9557479885..c38a1597b8 100644
--- a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py
+++ b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py
@@ -37,26 +37,32 @@ def get_default_hparams():
       n_warmup_iters=3)
 
 
+def step(dynamics, optimizer, samples):
+  loss, grads, samples, _ = l2hmc.loss_and_grads(
+      dynamics, samples, loss_fn=l2hmc.compute_loss)
+  optimizer.apply_gradients(zip(grads, dynamics.variables))
+
+  return loss, samples
+
+
 def warmup(dynamics,
            optimizer,
            n_iters=1,
            n_samples=200,
-           loss_fn=l2hmc.compute_loss):
+           step_fn=step):
   """Warmup optimization to reduce overhead."""
 
   samples = tf.random_normal(
       shape=[n_samples, dynamics.x_dim], dtype=tf.float32)
 
   for _ in range(n_iters):
-    _, grads, samples, _ = l2hmc.loss_and_grads(
-        dynamics, samples, loss_fn=loss_fn)
-    optimizer.apply_gradients(zip(grads, dynamics.variables))
+    _, samples = step_fn(dynamics, optimizer, samples)
 
 
 def fit(dynamics,
         samples,
         optimizer,
-        loss_fn=l2hmc.compute_loss,
+        step_fn=step,
         n_iters=5000,
         verbose=True,
         logdir=None):
@@ -66,9 +72,7 @@ def fit(dynamics,
     summary_writer = tf.contrib.summary.create_file_writer(logdir)
 
   for i in range(n_iters):
-    loss, grads, samples, _ = l2hmc.loss_and_grads(
-        dynamics, samples, loss_fn=loss_fn)
-    optimizer.apply_gradients(zip(grads, dynamics.variables))
+    loss, samples = step_fn(dynamics, optimizer, samples)
     if verbose:
       print("Iteration %d: loss %.4f" % (i, loss))
 
@@ -193,16 +197,16 @@ class L2hmcBenchmark(tf.test.Benchmark):
         n_steps=hparams.n_steps,
         eps=hparams.eps)
     optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
-    loss_fn = tfe.defun(l2hmc.compute_loss) if defun else l2hmc.compute_loss
+    step_fn = tfe.defun(step) if defun else step
 
     # Warmup to reduce initialization effect when timing
-    warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters, loss_fn=loss_fn)
+    warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters, step_fn=step_fn)
 
     # Training
     samples = tf.random_normal(
         shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32)
     start_time = time.time()
-    fit(dynamics, samples, optimizer, loss_fn=loss_fn, n_iters=hparams.n_iters)
+    fit(dynamics, samples, optimizer, step_fn=step_fn, n_iters=hparams.n_iters)
     wall_time = time.time() - start_time
     examples_per_sec = hparams.n_samples / wall_time
 
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index d95e0fe721..78f3198011 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -564,7 +564,10 @@ def _aggregate_grads(gradients):
 def _num_elements(grad):
   """The number of elements in the `grad` tensor."""
   if isinstance(grad, ops.Tensor):
-    return functools.reduce(operator.mul, grad._shape_tuple(), 1)  # pylint: disable=protected-access
+    shape_tuple = grad._shape_tuple()  # pylint: disable=protected-access
+    if shape_tuple is None or None in shape_tuple:
+      return 0
+    return functools.reduce(operator.mul, shape_tuple, 1)
   if isinstance(grad, ops.IndexedSlices):
     return functools.reduce(operator.mul, grad.values._shape_tuple(), 1)  # pylint: disable=protected-access
   raise ValueError("`grad` not a Tensor or IndexedSlices.")
-- 
GitLab


From c99a0acb21d28989595c6ba63a2b0496fb46c33d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 12:19:51 -0700
Subject: [PATCH 0609/1357] Updated to newest FlatBuffers and FlexBuffers.

PiperOrigin-RevId: 214309210
---
 tensorflow/contrib/lite/schema/BUILD            |  4 ++++
 tensorflow/contrib/lite/toco/tflite/operator.cc | 14 +++++++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD
index 55bf2c48b9..d892466c7a 100644
--- a/tensorflow/contrib/lite/schema/BUILD
+++ b/tensorflow/contrib/lite/schema/BUILD
@@ -25,14 +25,18 @@ py_binary(
     ],
 )
 
+# TODO(wvo): re-enable this test once latest FlatBuffers has landed.
+
 py_test(
     name = "upgrade_schema_test",
     size = "small",
     srcs = ["upgrade_schema_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "manual",
         "no_oss",
         "no_pip",
+        "notap",
     ],
     deps = [
         ":upgrade_schema",
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index c59a28b864..ca2a6a19b3 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -1260,6 +1260,10 @@ class TensorFlowUnsupported : public BaseOperator {
     return std::unique_ptr<flexbuffers::Builder>(fbb.release());
   }
 
+// TODO(wvo): hack to make this code compile with 2 different API versions.
+// Please remove once OS/internal versions are in sync.
+// See hardcoded values in the switch below.
+
   void ReadOptions(const flexbuffers::Map& m,
                    TensorFlowUnsupportedOperator* op) const {
     ::tensorflow::NodeDef node_def;
@@ -1270,16 +1274,16 @@ class TensorFlowUnsupported : public BaseOperator {
       const auto key = keys[i].AsKey();
       const auto& value = m[key];
       switch (value.GetType()) {
-        case flexbuffers::TYPE_STRING:
+        case 5:  // flexbuffers::FBT_STRING:
           (*attr)[key].set_s(value.AsString().c_str());
           break;
-        case flexbuffers::TYPE_INT:
+        case 1:  // flexbuffers::FBT_INT:
           (*attr)[key].set_i(value.AsInt64());
           break;
-        case flexbuffers::TYPE_FLOAT:
+        case 3:  // flexbuffers::FBT_FLOAT:
           (*attr)[key].set_f(value.AsFloat());
           break;
-        case flexbuffers::TYPE_BOOL:
+        case 26:  // flexbuffers::FBT_BOOL:
           (*attr)[key].set_b(value.AsBool());
           if (string(key) == "_output_quantized") {
             op->quantized = value.AsBool();
@@ -1288,7 +1292,7 @@ class TensorFlowUnsupported : public BaseOperator {
             op->support_output_type_float_in_quantized_op = value.AsBool();
           }
           break;
-        case flexbuffers::TYPE_VECTOR_INT: {
+        case 11: {  // flexbuffers::FBT_VECTOR_INT: {
           auto* list = (*attr)[key].mutable_list();
           const auto& vector = value.AsTypedVector();
           for (size_t i = 0; i < vector.size(); i++) {
-- 
GitLab


From 0aac77d10f8b9124204a6e5aad942e03134ecd94 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 12:22:27 -0700
Subject: [PATCH 0610/1357] [XLA] Update Scatter to ignore out-of-bound
 indices.

PiperOrigin-RevId: 214309598
---
 .../xla/service/hlo_creation_utils.cc         |  39 ++++++
 .../compiler/xla/service/hlo_creation_utils.h |  30 +++++
 .../xla/service/hlo_evaluator_test.cc         | 122 ++++++++++++++++++
 .../xla/service/hlo_evaluator_typed_visitor.h |  19 ++-
 .../compiler/xla/service/scatter_expander.cc  |  78 ++++++++++-
 tensorflow/compiler/xla/tests/scatter_test.cc |  30 +++++
 6 files changed, 303 insertions(+), 15 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc
index b76c50bb5b..b2005d3c21 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/util.h"
 
@@ -201,6 +202,44 @@ StatusOr<HloInstruction*> MakeMapHlo(absl::Span<HloInstruction* const> operands,
       HloInstruction::CreateMap(map_shape, operands, map_computation));
 }
 
+StatusOr<HloInstruction*> MakeReduceHlo(HloInstruction* operand,
+                                        HloInstruction* init_value,
+                                        HloOpcode binary_opcode,
+                                        HloModule* module) {
+  DCHECK_NE(nullptr, module);
+  std::vector<int64> all_dims(ShapeUtil::Rank(operand->shape()));
+  std::iota(all_dims.begin(), all_dims.end(), 0);
+
+  auto scalar_shape = ShapeUtil::MakeShape(operand->shape().element_type(), {});
+  HloComputation* reduce_computation;
+  {
+    HloComputation::Builder b(operand->name() + ".reduce_sub_computation");
+    auto lhs = b.AddInstruction(
+        HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
+    auto rhs = b.AddInstruction(
+        HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
+    b.AddInstruction(
+        HloInstruction::CreateBinary(scalar_shape, binary_opcode, lhs, rhs));
+    reduce_computation = module->AddEmbeddedComputation(b.Build());
+  }
+
+  return operand->parent()->AddInstruction(HloInstruction::CreateReduce(
+      scalar_shape, operand, init_value, all_dims, reduce_computation));
+}
+
+StatusOr<HloInstruction*> MakeSelectHlo(HloInstruction* pred,
+                                        HloInstruction* on_true,
+                                        HloInstruction* on_false) {
+  HloComputation* computation = pred->parent();
+  DCHECK_EQ(computation, on_true->parent());
+  DCHECK_EQ(computation, on_false->parent());
+  TF_ASSIGN_OR_RETURN(Shape select_shape,
+                      ShapeInference::InferTernaryOpShape(
+                          HloOpcode::kSelect, pred, on_true, on_false));
+  return computation->AddInstruction(HloInstruction::CreateTernary(
+      select_shape, HloOpcode::kSelect, pred, on_true, on_false));
+}
+
 StatusOr<HloInstruction*> CollapseFirstNDims(HloInstruction* operand, int64 n) {
   CHECK_GT(n, 0);
 
diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h
index b22058abb4..8e5ddbbd50 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils.h
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CREATION_UTILS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CREATION_UTILS_H_
 
+#include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -107,6 +108,35 @@ StatusOr<HloInstruction*> MakeDotHlo(HloInstruction* lhs, HloInstruction* rhs,
 StatusOr<HloInstruction*> MakeMapHlo(absl::Span<HloInstruction* const> operands,
                                      HloComputation* map_computation);
 
+// Creates a Reduce HLO instruction and adds it to the computation containing
+// the operand. This will create the sub-computation needed for the reduction in
+// the given module. binary_opcode should represent a binary operation.
+StatusOr<HloInstruction*> MakeReduceHlo(HloInstruction* operand,
+                                        HloInstruction* init_value,
+                                        HloOpcode binary_opcode,
+                                        HloModule* module);
+
+// Creates a Select HLO instruction and adds it to the computation containing
+// the predicate. The on_true and on_false instructions must also be contained
+// in the same computation.
+StatusOr<HloInstruction*> MakeSelectHlo(HloInstruction* pred,
+                                        HloInstruction* on_true,
+                                        HloInstruction* on_false);
+
+// Creates an R1 Constant HLO instruction of the given PrimitiveType with the
+// given values and adds it to the given computation.
+template <typename NativeT>
+StatusOr<HloInstruction*> MakeR1ConstantHlo(HloComputation* computation,
+                                            PrimitiveType type,
+                                            absl::Span<const NativeT> values) {
+  Literal literal = LiteralUtil::CreateR1<NativeT>(values);
+  if (literal.shape().element_type() != type) {
+    TF_ASSIGN_OR_RETURN(literal, literal.Convert(type));
+  }
+  return computation->AddInstruction(
+      HloInstruction::CreateConstant(std::move(literal)));
+}
+
 // -----------------------------------------------------------------------------
 // Some other miscellaneous helpers to generate common HLO patterns.  All of
 // these add all the instructions they generate into the computation containing
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index 01e88566a5..cee11a8a21 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -66,6 +66,20 @@ class HloEvaluatorTest : public ::testing::WithParamInterface<bool>,
         .ConsumeValueOrDie();
   }
 
+  // Evaluate function that takes in a local module instead of using module_
+  // that is in HloVerifiedTestBase. Once module_ in HloVerifiedTestBase is
+  // removed, this should be the default Evaluate function.
+  Literal EvaluateWithModule(
+      HloModule* module, absl::Span<const Literal* const> arg_literals = {}) {
+    if (use_bfloat16_) {
+      // In BF16 mode, we convert all F32 type to BF16 and evaluate the module.
+      auto type_converter = HloElementTypeConverter(F32, BF16);
+      type_converter.Run(module).ValueOrDie();
+    }
+    return evaluator_->Evaluate(*module->entry_computation(), arg_literals)
+        .ConsumeValueOrDie();
+  }
+
   std::unique_ptr<HloEvaluator> evaluator_;
 
   void TestUnaryOp(HloOpcode opcode, Literal expected, Literal input,
@@ -2530,6 +2544,114 @@ ENTRY main {
       expected, Evaluate({&operand, &scatter_indices, &updates})));
 }
 
+TEST_P(HloEvaluatorTest, EvaluateScatter_NegativeIndices) {
+  const char* hlo_text = R"(
+HloModule TensorFlowScatter_NegativeIndices
+
+add_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  rhs = s32[] parameter(1)
+  ROOT add = s32[] add(s32[] lhs, s32[] rhs)
+}
+
+ENTRY main {
+  operand = s32[3,3] parameter(0)
+  indices = s32[2] parameter(1)
+  updates = s32[2,3] parameter(2)
+  ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+      to_apply=add_s32,
+      update_window_dims={1},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0},
+      index_vector_dim=1
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(hlo_text));
+  Literal operand =
+      LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  // No updates should happen for the negative indices.
+  Literal scatter_indices = LiteralUtil::CreateR1<int32>({-1, 2});
+  Literal updates = LiteralUtil::CreateR2<int32>({{10, 20, 30}, {70, 80, 90}});
+  EXPECT_TRUE(LiteralTestUtil::Equal(
+      LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {77, 88, 99}}),
+      EvaluateWithModule(module.get(),
+                         {&operand, &scatter_indices, &updates})));
+}
+
+TEST_P(HloEvaluatorTest, EvaluateScatter_OobIndices) {
+  const string hlo_text = R"(
+HloModule BatchDynamicUpdateSlice
+
+update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  ROOT rhs = s32[] parameter(1)
+}
+
+ENTRY main {
+  operand = s32[3,3]{1,0} parameter(0)
+  indices = s32[6,2]{1,0} parameter(1)
+  updates = s32[6,1,1]{2,1,0} parameter(2)
+  ROOT scatter = s32[3,3]{1,0} scatter(operand, indices, updates),
+      to_apply=update_s32,
+      update_window_dims={1,2},
+      inserted_window_dims={},
+      scatter_dims_to_operand_dims={0,1},
+      index_vector_dim=1
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(hlo_text));
+  Literal operand =
+      LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  // No updates should happen for the OOB indices.
+  Literal scatter_indices = LiteralUtil::CreateR2<int32>(
+      {{2, 7}, {2, 1}, {1, 1}, {5, 1}, {2147483647, 1}, {1, 2}});
+  Literal updates = LiteralUtil::CreateR3<int32>(
+      {{{10}}, {{20}}, {{30}}, {{40}}, {{50}}, {{60}}});
+  EXPECT_TRUE(LiteralTestUtil::Equal(
+      LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 30, 60}, {7, 20, 9}}),
+      EvaluateWithModule(module.get(),
+                         {&operand, &scatter_indices, &updates})));
+}
+
+TEST_P(HloEvaluatorTest, EvaluateScatter_OobUpdateWindow) {
+  const char* hlo_text = R"(
+HloModule TensorFlowScatterNd_OobUpdateWindow
+
+update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  ROOT rhs = s32[] parameter(1)
+}
+
+ENTRY main {
+  operand = s32[3,3,2] parameter(0)
+  indices = s32[1,2] parameter(1)
+  updates = s32[1,2,2] parameter(2)
+  ROOT scatter = s32[3,3,2] scatter(operand, indices, updates),
+      to_apply=update_s32,
+      update_window_dims={1,2},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0,1},
+      index_vector_dim=1
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(hlo_text));
+  Literal operand =
+      LiteralUtil::CreateR3<int32>({{{-1, 1}, {-2, 2}, {-3, 3}},  //
+                                    {{-4, 4}, {-5, 5}, {-6, 6}},  //
+                                    {{-7, 7}, {-8, 8}, {-9, 9}}});
+  Literal scatter_indices = LiteralUtil::CreateR2<int32>({{0, 2}});
+  Literal updates = LiteralUtil::CreateR3<int32>({{{-10, 10}, {-40, 40}}});
+  // Given the update window size of 2,2 and the index of 0,2, the update window
+  // will be OOB. So, nothing should be updated.
+  Literal expected = operand.Clone();
+  EXPECT_TRUE(LiteralTestUtil::Equal(
+      expected, EvaluateWithModule(module.get(),
+                                   {&operand, &scatter_indices, &updates})));
+}
+
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise comparison with 2 bfloat16 operands.
 TEST_P(HloEvaluatorTest, DoesCompareBF16) {
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index 35391ecf8a..04cdc6901c 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -2284,19 +2284,16 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
         // be 1.
         int64 update_dim_size =
             update_dim == -1 ? 1 : updates_shape.dimensions(update_dim);
-        // Clamp the scatter index so that the scatter region fits in the
-        // operand. input_scatter_index_clamped[i] =
-        // clamp(input_scatter_index[i], 0,
-        //                                       operand_shape.dimensions(i) -
-        //                                       update_dim_size);
-        input_scatter_index_clamped[i] =
-            std::min(operand_shape.dimensions(i) - update_dim_size,
-                     std::max(0LL, input_scatter_index[i]));
+        // If any part of the update region is out-of-bounds, then do not
+        // perform any update on the input.
+        if ((input_scatter_index[i] < 0) ||
+            (input_scatter_index[i] >
+             operand_shape.dimensions(i) - update_dim_size)) {
+          return true;
+        }
       }
       for (int i = 0, e = input_index.size(); i < e; i++) {
-        input_index[i] = input_scatter_index_clamped[i] + input_window_index[i];
-        DCHECK_GE(input_index[i], 0);
-        DCHECK_LT(input_index[i], operand_shape.dimensions(i));
+        input_index[i] = input_scatter_index[i] + input_window_index[i];
       }
 
       auto result_value_literal =
diff --git a/tensorflow/compiler/xla/service/scatter_expander.cc b/tensorflow/compiler/xla/service/scatter_expander.cc
index 2f4b2667c4..de7aee262e 100644
--- a/tensorflow/compiler/xla/service/scatter_expander.cc
+++ b/tensorflow/compiler/xla/service/scatter_expander.cc
@@ -155,6 +155,53 @@ static StatusOr<HloInstruction*> ExpandIndexVectorIntoOperandSpace(
   return MakeConcatHlo(expanded_index_components, /*dimension=*/0);
 }
 
+static StatusOr<HloInstruction*> CheckIndexValidity(
+    HloComputation* computation, HloInstruction* index,
+    absl::Span<const int64> operand_dims, absl::Span<const int64> window_sizes,
+    HloModule* module) {
+  DCHECK_NE(nullptr, module);
+  DCHECK_EQ(operand_dims.size(), window_sizes.size());
+
+  // Valid range for the index: [0, operand_dims - window_sizes]
+
+  // Check if the index has any negative values.
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * zero_index,
+      BroadcastZeros(computation, index->shape().element_type(),
+                     AsInt64Slice(index->shape().dimensions())));
+  TF_ASSIGN_OR_RETURN(HloInstruction * negative_index_check,
+                      MakeBinaryHlo(HloOpcode::kLe, zero_index, index));
+
+  // Check if the index is OOB w.r.t. the operand dimensions and window sizes.
+  std::vector<int64> max_valid_index(operand_dims.size());
+  for (int i = 0; i < operand_dims.size(); ++i) {
+    max_valid_index[i] = operand_dims[i] - window_sizes[i];
+  }
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * max_valid_index_constant,
+      MakeR1ConstantHlo<int64>(computation, index->shape().element_type(),
+                               max_valid_index));
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * oob_index_check,
+      MakeBinaryHlo(HloOpcode::kGe, max_valid_index_constant, index));
+
+  // Combine the results of the two checks above.
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * valid_index,
+      MakeBinaryHlo(HloOpcode::kAnd, negative_index_check, oob_index_check));
+
+  // Reduce the index validity check vector into a scalar predicate.
+  auto reduction_init = computation->AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(true)));
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * valid_index_reduced,
+      MakeReduceHlo(valid_index, reduction_init, HloOpcode::kAnd, module));
+
+  // Return a broadcasted value of the scalar predicate to the same size as the
+  // window.
+  return MakeBroadcastHlo(valid_index_reduced, {}, window_sizes);
+}
+
 // Body of the while loop that performs the scatter operation using other HLOs.
 static StatusOr<std::vector<HloInstruction*>> ScatterLoopBody(
     HloInstruction* scatter, HloInstruction* induction_var,
@@ -222,7 +269,16 @@ static StatusOr<std::vector<HloInstruction*>> ScatterLoopBody(
       InsertDegenerateDims(update_slice_for_scatter,
                            AsInt64Slice(dim_numbers.inserted_window_dims())));
 
-  // Extact the slice to update from `operand` tensor.
+  // Note that the following transformation assumes that both DynamicSlice and
+  // DynamicUpdateSlice follow the same semantics for OOB indices. For example,
+  // if there are negative indices and DynamicSlice uses "clamping" semantics,
+  // then the extracted data will be "shifted". Since DynamicUpdateSlice also
+  // follows the same "clamping" semantics, writing the update will also be
+  // "shifted" by exactly the same amount. So, this transformation is correct as
+  // long as the semantics of handling OOB indices remain the same in
+  // DynamicSlice and DynamicUpdateSlice.
+
+  // Extract the slice to update from `operand` tensor.
   const Shape& update_slice_shape = update_slice_with_dims_inserted->shape();
   TF_ASSIGN_OR_RETURN(
       HloInstruction * operand_slice_to_update,
@@ -237,10 +293,24 @@ static StatusOr<std::vector<HloInstruction*>> ScatterLoopBody(
       MakeMapHlo({operand_slice_to_update, update_slice_with_dims_inserted},
                  scatter->to_apply()));
 
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * is_index_valid,
+      CheckIndexValidity(
+          operand->parent(), scatter_slice_start,
+          AsInt64Slice(operand->shape().dimensions()),
+          AsInt64Slice(update_slice_with_dims_inserted->shape().dimensions()),
+          scatter->GetModule()));
+
+  // Select the updated operand only if the index is valid. If not, select the
+  // original value.
+  TF_ASSIGN_OR_RETURN(HloInstruction * update_to_apply,
+                      MakeSelectHlo(is_index_valid, updated_operand_slice,
+                                    operand_slice_to_update));
+
   // Write the updated value of the slice into `operand` tensor.
-  TF_ASSIGN_OR_RETURN(HloInstruction * updated_operand,
-                      MakeDynamicUpdateSliceHlo(operand, updated_operand_slice,
-                                                scatter_slice_start));
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * updated_operand,
+      MakeDynamicUpdateSliceHlo(operand, update_to_apply, scatter_slice_start));
 
   return StatusOr<std::vector<HloInstruction*>>{
       {updated_operand, scatter_indices, updates}};
diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc
index d20dba028a..b21dd56045 100644
--- a/tensorflow/compiler/xla/tests/scatter_test.cc
+++ b/tensorflow/compiler/xla/tests/scatter_test.cc
@@ -507,6 +507,36 @@ ENTRY main {
   RunTest(hlo_text, &operand, &scatter_indices, &updates);
 }
 
+XLA_TEST_F(ScatterTest, OutOfBoundsUpdateWindow) {
+  const char* hlo_text = R"(
+HloModule TensorFlowScatterNd_OobUpdateWindow
+
+update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  ROOT rhs = s32[] parameter(1)
+}
+
+ENTRY main {
+  operand = s32[3,3,2] parameter(0)
+  indices = s32[1,2] parameter(1)
+  updates = s32[1,2,2] parameter(2)
+  ROOT scatter = s32[3,3,2] scatter(operand, indices, updates),
+      to_apply=update_s32,
+      update_window_dims={1,2},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0,1},
+      index_vector_dim=1
+}
+)";
+  Literal operand =
+      LiteralUtil::CreateR3<int32>({{{-1, 1}, {-2, 2}, {-3, 3}},  //
+                                    {{-4, 4}, {-5, 5}, {-6, 6}},  //
+                                    {{-7, 7}, {-8, 8}, {-9, 9}}});
+  Literal scatter_indices = LiteralUtil::CreateR2<int32>({{0, 2}});
+  Literal updates = LiteralUtil::CreateR3<int32>({{{-10, 10}, {-40, 40}}});
+  RunTest(hlo_text, &operand, &scatter_indices, &updates);
+}
+
 XLA_TEST_F(ScatterTest, OneScalarIndex) {
   const char* hlo_text = R"(
 HloModule OneScalarIndex
-- 
GitLab


From 6357554896e8a1a340eb9406204a26e6d3219c4f Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 24 Sep 2018 12:36:09 -0700
Subject: [PATCH 0611/1357] Make sure broken tests are filtered out in XLA
 tests suites.

PiperOrigin-RevId: 214311663
---
 tensorflow/compiler/xla/tests/build_defs.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl
index cc65a8939a..05d4d04034 100644
--- a/tensorflow/compiler/xla/tests/build_defs.bzl
+++ b/tensorflow/compiler/xla/tests/build_defs.bzl
@@ -239,7 +239,7 @@ def generate_backend_suites(backends = []):
     for backend in filter_backends(backends):
         native.test_suite(
             name = "%s_tests" % backend,
-            tags = ["xla_%s" % backend],
+            tags = ["xla_%s" % backend, "-broken", "manual"],
         )
 
 def generate_backend_test_macros(backends = []):
-- 
GitLab


From edbc6e078ad306021eeb95827a7451892b35f859 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 13:31:39 -0700
Subject: [PATCH 0612/1357] Split up SPARSE_DEPS, adding each individual
 dependency only to the sparse operators that need it.

PiperOrigin-RevId: 214320700
---
 tensorflow/core/kernels/BUILD | 43 ++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index ab69925d04..96ccc06f9e 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4049,11 +4049,6 @@ cc_library(
 )
 
 SPARSE_DEPS = [
-    ":bounds_check",
-    ":cwise_op",
-    ":fill_functor",
-    ":scatter_functor",
-    "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:sparse_ops_op_lib",
@@ -4086,7 +4081,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_cross_op",
     prefix = "sparse_cross_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
@@ -4098,13 +4095,19 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_dense_binary_op_shared",
     prefix = "sparse_dense_binary_op_shared",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":cwise_op",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_sparse_binary_op_shared",
     prefix = "sparse_sparse_binary_op_shared",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":cwise_op",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
@@ -4136,7 +4139,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_softmax",
     prefix = "sparse_softmax",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
@@ -4148,25 +4153,37 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_tensor_dense_add_op",
     prefix = "sparse_tensor_dense_add_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":scatter_functor",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_tensor_dense_matmul_op",
     prefix = "sparse_tensor_dense_matmul_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":bounds_check",
+        ":fill_functor",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_to_dense_op",
     prefix = "sparse_to_dense_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_xent_op",
     prefix = "sparse_xent_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":bounds_check",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
-- 
GitLab


From cc7525d2c8e44c06685b27e77450cb2114765d72 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Mon, 24 Sep 2018 13:36:56 -0700
Subject: [PATCH 0613/1357] Automated rollback of commit
 7bbd69fd16d4e97afd417786c1ee2fff27d92703

PiperOrigin-RevId: 214321627
---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index ba202839b2..c15e8d8861 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -326,12 +326,7 @@ cuda_py_test(
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
-    tags = [
-        "manual",
-        "no_oss",
-        "no_windows_gpu",
-        "notap",
-    ],
+    tags = ["no_windows_gpu"],
 )
 
 py_test(
-- 
GitLab


From cb926e1ed73d6d8f7158cdabf5c4265a921a407b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 13:55:07 -0700
Subject: [PATCH 0614/1357] Fixes a bug in tf.train.Saver() where it couldn't
 use Checkpointable objects in a tf.train.Saver() if var_list was a dict.

Includes the logic used for list in the dict code path.

PiperOrigin-RevId: 214324913
---
 tensorflow/python/training/saver.py      |  8 ++++++
 tensorflow/python/training/saver_test.py | 32 +++++++++++++-----------
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 274c856686..5b2b19e913 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -622,6 +622,14 @@ class BaseSaverBuilder(object):
           yield BaseSaverBuilder.ResourceVariableSaveable(
               variable, variable._save_slice_info.spec, name)
       # pylint: enable=protected-access
+    elif isinstance(op, checkpointable.CheckpointableBase) and not isinstance(
+        op, variables.Variable):
+      # pylint: disable=protected-access
+      for attr, factory in op._gather_saveables_for_checkpoint().items():
+        op = (factory(name + "_" + attr) if callable(factory) else factory)
+        for op in BaseSaverBuilder.SaveableObjectsForOp(op, op.name):
+          yield op
+      # pylint: enable=protected-access
     else:
       # A variable or tensor.
       if context.executing_eagerly():
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 0ac84813c8..69b1055ebe 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -2850,30 +2850,32 @@ class CheckpointableCompatibilityTests(test.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def testNotSaveableButIsCheckpointable(self):
     v = _OwnsAVariableSimple()
-    saver = saver_module.Saver(var_list=[v])
     test_dir = self.get_temp_dir()
     prefix = os.path.join(test_dir, "ckpt")
-    with self.cached_session() as sess:
-      self.evaluate(v.non_dep_variable.assign(42.))
-      save_path = saver.save(sess, prefix)
-      self.evaluate(v.non_dep_variable.assign(43.))
-      saver.restore(sess, save_path)
-      self.assertEqual(42., self.evaluate(v.non_dep_variable))
+    for saver in (saver_module.Saver(var_list=[v]),
+                  saver_module.Saver(var_list={"v": v})):
+      with self.cached_session() as sess:
+        self.evaluate(v.non_dep_variable.assign(42.))
+        save_path = saver.save(sess, prefix)
+        self.evaluate(v.non_dep_variable.assign(43.))
+        saver.restore(sess, save_path)
+        self.assertEqual(42., self.evaluate(v.non_dep_variable))
 
   @test_util.run_in_graph_and_eager_modes
   def testMoreComplexSaveableReturned(self):
     v = _OwnsMirroredVariables()
-    saver = saver_module.Saver(var_list=[v])
     test_dir = self.get_temp_dir()
     prefix = os.path.join(test_dir, "ckpt")
     self.evaluate(v.non_dep_variable.assign(42.))
-    with self.cached_session() as sess:
-      save_path = saver.save(sess, prefix)
-      self.evaluate(v.non_dep_variable.assign(43.))
-      self.evaluate(v.mirrored.assign(44.))
-      saver.restore(sess, save_path)
-      self.assertEqual(42., self.evaluate(v.non_dep_variable))
-      self.assertEqual(42., self.evaluate(v.mirrored))
+    for saver in (saver_module.Saver(var_list=[v]),
+                  saver_module.Saver(var_list={"v": v})):
+      with self.cached_session() as sess:
+        save_path = saver.save(sess, prefix)
+        self.evaluate(v.non_dep_variable.assign(43.))
+        self.evaluate(v.mirrored.assign(44.))
+        saver.restore(sess, save_path)
+        self.assertEqual(42., self.evaluate(v.non_dep_variable))
+        self.assertEqual(42., self.evaluate(v.mirrored))
 
   def testSingleTensorEvaluation(self):
 
-- 
GitLab


From 7af35581b64b7811b2e70f672d1e824886eac16f Mon Sep 17 00:00:00 2001
From: Karmel Allison <karmel@google.com>
Date: Mon, 24 Sep 2018 13:57:16 -0700
Subject: [PATCH 0615/1357] Fixes typo in docstring to correctly name the new
 Estimator method export_saved_model

PiperOrigin-RevId: 214325271
---
 tensorflow/python/estimator/estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 2dc5d099a0..eec64ad452 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -614,7 +614,7 @@ class Estimator(object):
     # pylint: disable=line-too-long,g-doc-args,g-doc-return-or-yield
     """Exports inference graph as a `SavedModel` into the given dir.
 
-    Note that `export_to_savedmodel` will be renamed to `export_to_saved_model`
+    Note that `export_to_savedmodel` will be renamed to `export_saved_model`
     in TensorFlow 2.0. At that time, `export_to_savedmodel` without the
     additional underscore will be available only through tf.compat.v1.
 
@@ -699,7 +699,7 @@ class Estimator(object):
     """
     # pylint: enable=line-too-long
     # TODO(b/111442174): `export_to_savedmodel` will be renamed to
-    # `export_to_saved_model` in TensorFlow 2.0. This function is a wrapper
+    # `export_saved_model` in TensorFlow 2.0. This function is a wrapper
     # while staging the new version; do not add any logic here.
     return self.export_savedmodel(
         export_dir_base,
-- 
GitLab


From aab3c53e1484404a70565324d1231c4e6ead7425 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 24 Sep 2018 14:34:57 -0700
Subject: [PATCH 0616/1357] Inline kernel tracing logic into
 `ExecutorState::Process()`.

All devices implement the same tracing logic in an override of `Device::Compute()`. However, that logic does not have access to the cached `NodeItem::kernel_is_expensive` bit for the kernel, so it must make a virtual call to `OpKernel::IsExpensive()`. By inlining the logic into `ExecutorState::Process()`, we avoid making an unnecessary virtual call on each kernel invocation (when a trace controller is attached).

PiperOrigin-RevId: 214332492
---
 tensorflow/compiler/jit/xla_device.cc         |  2 +-
 tensorflow/core/BUILD                         |  1 -
 tensorflow/core/common_runtime/device.h       |  6 ++
 tensorflow/core/common_runtime/executor.cc    | 62 ++++++++++++++++++-
 .../core/common_runtime/gpu/gpu_device.cc     |  5 --
 .../core/common_runtime/gpu/gpu_device.h      |  5 ++
 .../core/common_runtime/local_device.cc       |  2 +-
 tensorflow/core/common_runtime/local_device.h |  3 +-
 .../core/common_runtime/tracing_device.h      | 60 ------------------
 .../core/platform/default/device_tracer.cc    |  7 ++-
 tensorflow/core/platform/tracing.h            |  5 +-
 11 files changed, 85 insertions(+), 73 deletions(-)
 delete mode 100644 tensorflow/core/common_runtime/tracing_device.h

diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc
index 32fce2bf94..0824c4644e 100644
--- a/tensorflow/compiler/jit/xla_device.cc
+++ b/tensorflow/compiler/jit/xla_device.cc
@@ -373,7 +373,7 @@ Status XlaDevice::FillContextMap(const Graph* graph,
 void XlaDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
   VLOG(2) << "XlaDevice::Compute " << op_kernel->name() << ":"
           << op_kernel->type_string();
-  TracingDevice::Compute(op_kernel, context);
+  op_kernel->Compute(context);
 }
 
 void XlaDevice::ComputeAsync(AsyncOpKernel* op_kernel, OpKernelContext* context,
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d914fdb96c..59b7dd04e9 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2803,7 +2803,6 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
     "common_runtime/stats_publisher_interface.h",
     "common_runtime/step_stats_collector.h",
     "common_runtime/threadpool_device.h",
-    "common_runtime/tracing_device.h",
     "common_runtime/process_state.h",
     "common_runtime/pool_allocator.h",
     "graph/gradients.h",
diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h
index fb76d6ac29..2ef1547cd9 100644
--- a/tensorflow/core/common_runtime/device.h
+++ b/tensorflow/core/common_runtime/device.h
@@ -101,6 +101,12 @@ class Device : public DeviceBase {
     }
   }
 
+  // If true, and tracing is enabled, the `tracing::ScopedAnnotation()` tracing
+  // mechanism will be used instead of `tracing::ScopedActivity()`. Some devices
+  // may override this method to use annotations, which enable child activities
+  // (such as GPU kernel launches) to be related to the OpKernel invocation.
+  virtual bool TraceUsingAnnotations() const { return false; }
+
   // Blocks until all operations queued on the device at the time of
   // the call have completed.  Returns any error pending on the device
   // at completion.
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 7cef34ac52..2c48084cab 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -1238,6 +1238,9 @@ class ExecutorState {
   // Step-local container.
   ScopedStepContainer* step_container_;
   StepStatsCollectorInterface* const stats_collector_;
+  const tracing::TraceCollector* const trace_collector_;
+  const tracing::EventCollector* const event_collector_;
+
   // QUESTION: Make it a checkpoint::TensorSliceReaderCacheWrapper
   // instead of a pointer?  (avoids having to delete).
   checkpoint::TensorSliceReaderCacheWrapper* slice_reader_cache_;
@@ -1246,6 +1249,7 @@ class ExecutorState {
   CancellationManager* cancellation_manager_;
   Executor::Args::Runner runner_;
   bool sync_on_finish_;
+  const bool trace_using_annotations_;
 
   // Owned.
 
@@ -1360,12 +1364,16 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl)
       tensor_store_(args.tensor_store),
       step_container_(args.step_container),
       stats_collector_(args.stats_collector),
+      trace_collector_(tracing::GetTraceCollector()),
+      event_collector_(
+          tracing::GetEventCollector(tracing::EventCategory::kCompute)),
       slice_reader_cache_(new checkpoint::TensorSliceReaderCacheWrapper),
       call_frame_(args.call_frame),
       impl_(impl),
       cancellation_manager_(args.cancellation_manager),
       runner_(args.runner),
       sync_on_finish_(args.sync_on_finish),
+      trace_using_annotations_(impl->params_.device->TraceUsingAnnotations()),
       num_outstanding_ops_(0) {
   // We start the entire execution in iteration 0 of the root frame
   // so let us create the root frame and the state for iteration 0.
@@ -1551,6 +1559,32 @@ struct ExecutorState::AsyncState {
   }
 };
 
+// Returns true if `item` might be traced by the given trace and event
+// collectors. Returns false only if `item` definitely will not be traced.
+bool MightTrace(const NodeItem& item,
+                const tracing::TraceCollector* trace_collector,
+                const tracing::EventCollector* event_collector,
+                bool using_annotations) {
+  // Tracing will only be enabled if either `event_collector` is non null,
+  // or `trace_collector` is non-null and enabled for this particular kernel.
+  // Although `tracing::ScopedActivity`,
+  // `tracing::ScopedAnnotation`, and `tracing::ScopedRegion` check subsets of
+  // these properties internally in their constructors, the cost of passing the
+  // necessary arguments to them can be significant, so we avoid constructing
+  // them in the common case (when we know they will not be used).
+  if (event_collector != nullptr) {
+    return true;
+  }
+  if (trace_collector) {
+    if (using_annotations) {
+      return trace_collector->IsEnabledForAnnotations();
+    } else {
+      return trace_collector->IsEnabledForActivities(item.kernel_is_expensive);
+    }
+  }
+  return false;
+}
+
 void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
   const GraphView& gview = impl_->gview_;
   TaggedNodeSeq ready;
@@ -1585,6 +1619,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
 
   Status s;
   NodeExecStatsInterface* stats = nullptr;
+
   EntryVector outputs;
   bool completed = false;
   inline_ready.push_back(tagged_node);
@@ -1721,7 +1756,32 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
         // Synchronous computes.
         OpKernelContext ctx(&params, item.num_outputs);
         nodestats::SetOpStart(stats);
-        device->Compute(CHECK_NOTNULL(op_kernel), &ctx);
+
+        if (TF_PREDICT_FALSE(MightTrace(item, trace_collector_,
+                                        event_collector_,
+                                        trace_using_annotations_))) {
+          const string& op_name = op_kernel->name();
+          tracing::ScopedRegion region(tracing::EventCategory::kCompute,
+                                       op_name);
+          if (trace_using_annotations_) {
+            // The OpKernel may create child activities (such as GPU kernel
+            // launches), so use a `ScopedAnnotation` to relate these activities
+            // in the trace.
+            tracing::ScopedAnnotation activity(op_name,
+                                               op_kernel->type_string());
+            device->Compute(op_kernel, &ctx);
+          } else {
+            // Use the cheaper `ScopedActivity` to trace just the OpKernel
+            // execution.
+            tracing::ScopedActivity activity(op_name, op_kernel->type_string(),
+                                             item.kernel_is_expensive);
+            device->Compute(op_kernel, &ctx);
+          }
+        } else {
+          // In the common case, avoid creating any tracing objects.
+          device->Compute(op_kernel, &ctx);
+        }
+
         nodestats::SetOpEnd(stats);
         s = ProcessOutputs(item, &ctx, &outputs, stats);
         if (s.ok() && impl_->device_record_tensor_accesses_) {
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index cf3faf68ff..d8ebdeff5d 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -434,9 +434,6 @@ Status BaseGPUDevice::FillContextMap(const Graph* graph,
 }
 
 void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
-  tracing::ScopedRegion region(tracing::EventCategory::kCompute,
-                               op_kernel->name());
-
   // NOTE(tucker): We need to discriminate between Eigen GPU
   // operations and all others.  If an operation is Eigen
   // implemented (or otherwise tries to launch a cuda kernel
@@ -450,8 +447,6 @@ void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
     context->SetStatus(errors::Internal(
         "Invalid synchronous 'Compute' on GPU for '_Recv' op"));
   } else {
-    tracing::ScopedAnnotation annotation(op_kernel->name(),
-                                         op_kernel->type_string());
     ComputeHelper(op_kernel, context);
   }
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index b25fe8645f..674e8384d5 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -65,6 +65,11 @@ class BaseGPUDevice : public LocalDevice {
   // completes.
   bool RequiresRecordingAccessedTensors() const override;
 
+  // GPU kernel execution requires us to use `tracing::ScopedAnnotation()`
+  // rather than `tracing::ScopedActivity()`, in order to relate asynchronously
+  // launched GPU kernels to the OpKernel.
+  bool TraceUsingAnnotations() const { return true; }
+
   void ConsumeListOfAccessedTensors(
       DeviceContext* device_context,
       const TensorReferenceVector& tensor_refs) override;
diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc
index db5022d56e..873182371e 100644
--- a/tensorflow/core/common_runtime/local_device.cc
+++ b/tensorflow/core/common_runtime/local_device.cc
@@ -62,7 +62,7 @@ struct LocalDevice::EigenThreadPoolInfo {
 
 LocalDevice::LocalDevice(const SessionOptions& options,
                          const DeviceAttributes& attributes)
-    : TracingDevice(options.env, attributes), owned_tp_info_(nullptr) {
+    : Device(options.env, attributes), owned_tp_info_(nullptr) {
   // Log info messages if TensorFlow is not compiled with instructions that
   // could speed up performance and are available on the current CPU.
   port::InfoAboutUnusedCPUFeatures();
diff --git a/tensorflow/core/common_runtime/local_device.h b/tensorflow/core/common_runtime/local_device.h
index 9a82fb7204..226f121bf3 100644
--- a/tensorflow/core/common_runtime/local_device.h
+++ b/tensorflow/core/common_runtime/local_device.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_COMMON_RUNTIME_LOCAL_DEVICE_H_
 
 #include "tensorflow/core/common_runtime/device.h"
-#include "tensorflow/core/common_runtime/tracing_device.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -32,7 +31,7 @@ struct SessionOptions;
 // initializes a shared Eigen compute device used by both.  This
 // should eventually be removed once we refactor ThreadPoolDevice and
 // GPUDevice into more 'process-wide' abstractions.
-class LocalDevice : public TracingDevice {
+class LocalDevice : public Device {
  public:
   LocalDevice(const SessionOptions& options,
               const DeviceAttributes& attributes);
diff --git a/tensorflow/core/common_runtime/tracing_device.h b/tensorflow/core/common_runtime/tracing_device.h
deleted file mode 100644
index e1b163074f..0000000000
--- a/tensorflow/core/common_runtime/tracing_device.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_TRACING_DEVICE_H_
-#define TENSORFLOW_CORE_COMMON_RUNTIME_TRACING_DEVICE_H_
-
-#include "tensorflow/core/common_runtime/device.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/tracing.h"
-
-namespace tensorflow {
-
-namespace test {
-class Benchmark;
-}
-struct SessionOptions;
-
-// This class implements tracing functionality that is shared by its subclasses
-// (including ThreadPoolDevice and XlaDevice).
-class TracingDevice : public Device {
- public:
-  TracingDevice(Env* env, const DeviceAttributes& attributes)
-      : Device(env, attributes) {}
-
-  void Compute(OpKernel* op_kernel, OpKernelContext* context) override {
-    const tracing::TraceCollector* trace_collector =
-        tracing::GetTraceCollector();
-    if (TF_PREDICT_FALSE(
-            (trace_collector &&
-             trace_collector->IsEnabled(op_kernel->IsExpensive())) ||
-            tracing::GetEventCollector(tracing::EventCategory::kCompute))) {
-      const string& op_name = op_kernel->name();
-      tracing::ScopedActivity activity(op_name, op_kernel->type_string(),
-                                       op_kernel->IsExpensive());
-      tracing::ScopedRegion region(tracing::EventCategory::kCompute, op_name);
-      op_kernel->Compute(context);
-    } else {
-      op_kernel->Compute(context);
-    }
-  }
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(TracingDevice);
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_TRACING_DEVICE_H_
diff --git a/tensorflow/core/platform/default/device_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc
index 0389149469..83c65dbfa9 100644
--- a/tensorflow/core/platform/default/device_tracer.cc
+++ b/tensorflow/core/platform/default/device_tracer.cc
@@ -321,7 +321,12 @@ class DeviceTracerImpl : public DeviceTracer,
     return nullptr;
   }
 
-  bool IsEnabled(bool is_expensive) const override {
+  bool IsEnabledForAnnotations() const override {
+    // We are always enabled for 'Annotations'.
+    return true;
+  }
+
+  bool IsEnabledForActivities(bool is_expensive) const override {
     // We don't do anything with 'Activities' so we are never 'enabled'.
     return false;
   }
diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h
index 9974bbbb4e..aefbe64425 100644
--- a/tensorflow/core/platform/tracing.h
+++ b/tensorflow/core/platform/tracing.h
@@ -155,9 +155,12 @@ class TraceCollector {
       StringPiece name_part1, StringPiece name_part2,
       bool is_expensive) const = 0;
 
+  // Returns true if this annotation tracing is enabled for any op.
+  virtual bool IsEnabledForAnnotations() const = 0;
+
   // Returns true if this activity handle tracking is enabled for an op of the
   // given expensiveness.
-  virtual bool IsEnabled(bool is_expensive) const = 0;
+  virtual bool IsEnabledForActivities(bool is_expensive) const = 0;
 
  protected:
   static string ConcatenateNames(StringPiece first, StringPiece second);
-- 
GitLab


From f44af58facb6a09dc362798c7d473d3120792a99 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Mon, 24 Sep 2018 15:02:01 -0700
Subject: [PATCH 0617/1357] Change indicator used to note Google-OSS header
 inclusions

Accompanies some internal changes related to third_party repo rules.

PiperOrigin-RevId: 214337234
---
 tensorflow/contrib/lite/delegates/eager/kernel.cc               | 2 +-
 tensorflow/contrib/lite/delegates/eager/test_util.cc            | 2 +-
 .../lite/experimental/kernels/ctc_beam_search_decoder.cc        | 2 +-
 .../lite/experimental/kernels/ctc_beam_search_decoder_test.cc   | 2 +-
 .../contrib/lite/experimental/writer/option_writer_generator.cc | 2 +-
 tensorflow/contrib/lite/kernels/audio_spectrogram.cc            | 2 +-
 tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc       | 2 +-
 tensorflow/contrib/lite/kernels/detection_postprocess.cc        | 2 +-
 tensorflow/contrib/lite/kernels/detection_postprocess_test.cc   | 2 +-
 tensorflow/contrib/lite/kernels/layer_norm_lstm.cc              | 2 +-
 tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc         | 2 +-
 tensorflow/contrib/lite/kernels/mfcc.cc                         | 2 +-
 tensorflow/contrib/lite/kernels/mfcc_test.cc                    | 2 +-
 tensorflow/contrib/lite/kernels/relu1_test.cc                   | 2 +-
 tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc | 2 +-
 15 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.cc b/tensorflow/contrib/lite/delegates/eager/kernel.cc
index 274c3c082a..48a2f56baf 100644
--- a/tensorflow/contrib/lite/delegates/eager/kernel.cc
+++ b/tensorflow/contrib/lite/delegates/eager/kernel.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/lite/delegates/eager/kernel.h"
 
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/builtin_ops.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/context_util.h"
diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.cc b/tensorflow/contrib/lite/delegates/eager/test_util.cc
index 8584999ace..d47be761fb 100644
--- a/tensorflow/contrib/lite/delegates/eager/test_util.cc
+++ b/tensorflow/contrib/lite/delegates/eager/test_util.cc
@@ -16,7 +16,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/delegates/eager/test_util.h"
 
 #include "absl/memory/memory.h"
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/string.h"
 
 namespace tflite {
diff --git a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc
index 8442c4d46c..b1ebe4a804 100644
--- a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc
+++ b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <vector>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/experimental/kernels/ctc_beam_search.h"
 #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
diff --git a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc
index aa42b495bd..942dbbbeae 100644
--- a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc
+++ b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <vector>
 
 #include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
diff --git a/tensorflow/contrib/lite/experimental/writer/option_writer_generator.cc b/tensorflow/contrib/lite/experimental/writer/option_writer_generator.cc
index e6d5a776b3..b35c6e0655 100644
--- a/tensorflow/contrib/lite/experimental/writer/option_writer_generator.cc
+++ b/tensorflow/contrib/lite/experimental/writer/option_writer_generator.cc
@@ -16,7 +16,7 @@ limitations under the License.
 #include <iostream>
 #include <unordered_map>
 #include <unordered_set>
-#include "flatbuffers/minireflect.h"  // flatbuffers
+#include "flatbuffers/minireflect.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/schema/reflection/schema_generated.h"
 
 namespace tflite {
diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc
index 44ef587244..0d2d5e775f 100644
--- a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc
+++ b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc
@@ -22,7 +22,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
 #include "tensorflow/contrib/lite/kernels/op_macros.h"
 
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 
 namespace tflite {
 namespace ops {
diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc
index 7346b9fd80..7e4ff6fc16 100644
--- a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc
+++ b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <vector>
 
 #include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
diff --git a/tensorflow/contrib/lite/kernels/detection_postprocess.cc b/tensorflow/contrib/lite/kernels/detection_postprocess.cc
index d2906632d7..e21dc5ced9 100644
--- a/tensorflow/contrib/lite/kernels/detection_postprocess.cc
+++ b/tensorflow/contrib/lite/kernels/detection_postprocess.cc
@@ -15,7 +15,7 @@ limitations under the License.
 #include <string.h>
 #include <numeric>
 #include <vector>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/c/builtin_op_data.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
diff --git a/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc b/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc
index 94c91a6bd6..1e8caebd82 100644
--- a/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc
+++ b/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <vector>
 
 #include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc
index 1bbea67b93..9739fd4514 100644
--- a/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc
@@ -16,7 +16,7 @@ limitations under the License.
 // Layer Normalization LSTM op that applies normalization by mean and standard
 // deviation to the activation of the LSTM layers. Please see
 // https://arxiv.org/abs/1607.06450 for details.
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
index abc229f85a..479f6a7d3c 100644
--- a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
diff --git a/tensorflow/contrib/lite/kernels/mfcc.cc b/tensorflow/contrib/lite/kernels/mfcc.cc
index 66cf147d75..5153ce5634 100644
--- a/tensorflow/contrib/lite/kernels/mfcc.cc
+++ b/tensorflow/contrib/lite/kernels/mfcc.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/lite/kernels/internal/mfcc.h"
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/c/builtin_op_data.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h"
diff --git a/tensorflow/contrib/lite/kernels/mfcc_test.cc b/tensorflow/contrib/lite/kernels/mfcc_test.cc
index c9124adcaf..fe69223222 100644
--- a/tensorflow/contrib/lite/kernels/mfcc_test.cc
+++ b/tensorflow/contrib/lite/kernels/mfcc_test.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <vector>
 
 #include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
diff --git a/tensorflow/contrib/lite/kernels/relu1_test.cc b/tensorflow/contrib/lite/kernels/relu1_test.cc
index c1e0149c20..b1d25a9f50 100644
--- a/tensorflow/contrib/lite/kernels/relu1_test.cc
+++ b/tensorflow/contrib/lite/kernels/relu1_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"  // flatbuffers
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
 
diff --git a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc
index 11057203a8..22b4616ccb 100644
--- a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc
+++ b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include <fstream>
 #include <gtest/gtest.h>
-#include "flatbuffers/flatc.h"  // flatbuffers
+#include "flatbuffers/flatc.h"  // TF:flatbuffers
 #include "tensorflow/core/platform/platform.h"
 
 #ifdef PLATFORM_GOOGLE
-- 
GitLab


From 46a52ab26ddf6baafba8b702be4cbd7dba71f1ab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 15:05:59 -0700
Subject: [PATCH 0618/1357] Speed up DedupComputation in arithmetic optimizer.

PiperOrigin-RevId: 214338100
---
 .../optimizers/arithmetic_optimizer.cc        | 46 +++++++++++--------
 tensorflow/core/grappler/utils.cc             | 28 +++++++++++
 tensorflow/core/grappler/utils.h              |  6 +++
 tensorflow/core/grappler/utils_test.cc        | 34 +++++++++++++-
 4 files changed, 92 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 76a9dca73b..ab97dcdb99 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -3042,6 +3042,12 @@ void ArithmeticOptimizer::DedupComputations() {
     return;
   }
   std::set<int> duplicates;
+  // Populate feed_inplace_op;
+  std::unordered_map<string, bool> feeds_inplace_op;
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    feeds_inplace_op[optimized_graph_->node(i).name()] =
+        FeedsInPlaceOp(graph_view, optimized_graph_->node(i));
+  }
   do {
     stop = true;
     UniqueNodes nodes;
@@ -3050,19 +3056,20 @@ void ArithmeticOptimizer::DedupComputations() {
         continue;
       }
       NodeDef* node = optimized_graph_->mutable_node(i);
-      if (!CanDedup(*node)) {
+      const string& node_name = node->name();
+      if (node_name.empty()) continue;
+      if (feeds_inplace_op[node_name] || !CanDedup(*node)) {
         continue;
       }
       NodeDef* rep = nodes.FindOrAddRepresentative(node);
       if (rep == node) {
         continue;
       }
-      // If either node feeds an inplace op, deduping them may cause data races.
-      // For example: If we dedup nodes initializing two independent inplace
-      // accumulations, they will write to the same buffer, clobbering each
-      // other's results.
-      if (FeedsInPlaceOp(graph_view, *rep) ||
-          FeedsInPlaceOp(graph_view, *node)) {
+      // If either node or rep feeds an inplace op, deduping them may cause data
+      // races. For example: If we dedup nodes initializing two independent
+      // inplace accumulations, they will write to the same buffer, clobbering
+      // each other's results.
+      if (feeds_inplace_op[rep->name()]) {
         continue;
       }
       VLOG(3) << "Remove duplicated node: node=" << node->name()
@@ -3070,20 +3077,19 @@ void ArithmeticOptimizer::DedupComputations() {
       const std::set<NodeDef*>& fanouts = node_map_->GetOutputs(node->name());
       for (NodeDef* fanout : fanouts) {
         for (int i = 0; i < fanout->input_size(); ++i) {
-          string* name = fanout->mutable_input(i);
-          int position;
-          const string nodename = ParseNodeName(*name, &position);
-          if (nodename == node->name()) {
-            // Update name in-place.
-            if (position > 0) {
-              *name = StrCat(rep->name(), ":", position);
-            } else if (position == 0) {
-              *name = rep->name();
-            } else {
-              *name = StrCat("^", rep->name());
-            }
-            node_map_->AddOutput(rep->name(), fanout->name());
+          string* fanout_input = fanout->mutable_input(i);
+          const int position = NodePositionIfSameNode(*fanout_input, node_name);
+          // Update name in-place.
+          if (position < -1) {
+            continue;
+          } else if (position > 0) {
+            *fanout_input = StrCat(rep->name(), ":", position);
+          } else if (position == 0) {
+            *fanout_input = rep->name();
+          } else {
+            *fanout_input = StrCat("^", rep->name());
           }
+          node_map_->AddOutput(rep->name(), fanout->name());
         }
       }
       duplicates.insert(i);
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 153785d3b4..0424c9e8a4 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -165,6 +166,33 @@ int NodePosition(const string& name) {
   return position;
 }
 
+int NodePositionIfSameNode(const string& input_name, const string& node_name) {
+  const bool is_ctrl = input_name[0] == '^';
+  auto input_it = is_ctrl ? input_name.begin() + 1 : input_name.begin();
+  auto node_it = node_name.begin();
+  if (std::distance(input_it, input_name.end()) < node_name.size()) {
+    return -2;
+  }
+  while (node_it != node_name.end()) {
+    if (*input_it++ != *node_it++) {
+      return -2;
+    }
+  }
+  if (input_it == input_name.end()) {
+    return is_ctrl ? -1 : 0;
+  } else if (*input_it++ == ':') {
+    StringPiece remaining(&(*input_it),
+                          std::distance(input_it, input_name.end()));
+    int position;
+    if (!strings::safe_strto32(remaining, &position)) {
+      return -2;
+    }
+    return is_ctrl ? -1 : position;
+  } else {
+    return -2;
+  }
+}
+
 string AddPrefixToNodeName(const string& name, const string& prefix,
                            const string& delimiter) {
   if (!name.empty()) {
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 20dbeea2cf..296ee1678e 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -107,6 +107,7 @@ bool IsSameInput(const string& name1, const string& name2);
 string NodeName(const string& name);
 
 // Get the trailing position number ":{digits}" (if any) of a node name.
+// Returns -1 for control inputs.
 int NodePosition(const string& name);
 
 inline StringPiece ParseNodeNameAsStringPiece(const string& name,
@@ -142,6 +143,11 @@ inline string ParseNodeName(const string& name, int* position) {
   return string(ParseNodeNameAsStringPiece(name, position));
 }
 
+// Returns NodePosition(input_name) if NodeName(input_name) == node_name.
+// Otherwise returns -2;
+// REQUIRES: inputs_name.size() > 0 && node_name.size() > 0.
+int NodePositionIfSameNode(const string& input_name, const string& node_name);
+
 // Add a prefix to a node name with a custom delimiter.
 string AddPrefixToNodeName(const string& name, const string& prefix,
                            const string& delimiter);
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index c6e035834c..8ff5f20c6d 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/notification.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -147,6 +148,19 @@ TEST_F(UtilsTest, NodePosition) {
   EXPECT_EQ(0, NodePosition(""));
 }
 
+TEST_F(UtilsTest, NodePositionIfSameNode) {
+  EXPECT_EQ(0, NodePositionIfSameNode("abc", "abc"));
+  EXPECT_EQ(123, NodePositionIfSameNode("abc:123", "abc"));
+  EXPECT_EQ(-1, NodePositionIfSameNode("^abc", "abc"));
+  EXPECT_EQ(-1, NodePositionIfSameNode("^abc:123", "abc"));
+  EXPECT_EQ(-2, NodePositionIfSameNode("abc", "xyz"));
+  EXPECT_EQ(-2, NodePositionIfSameNode("abc", "abc/xyz"));
+  EXPECT_EQ(-2, NodePositionIfSameNode("abc/xyz", "abc"));
+  EXPECT_EQ(-2, NodePositionIfSameNode("abc:123", "xyz"));
+  EXPECT_EQ(-2, NodePositionIfSameNode("^abc", "xyz"));
+  EXPECT_EQ(-2, NodePositionIfSameNode("^abc:123", "xyz"));
+}
+
 TEST_F(UtilsTest, AddNodeNamePrefix) {
   EXPECT_EQ("OPTIMIZED/abc", AddPrefixToNodeName("abc", "OPTIMIZED"));
   EXPECT_EQ("^OPTIMIZED/abc", AddPrefixToNodeName("^abc", "OPTIMIZED"));
@@ -209,7 +223,6 @@ TEST_F(UtilsTest, GetTailOfChain) {
   auto noop = ops::NoOp(s.WithControlDependencies(neg0).WithOpName("noop"));
   GraphDef graph;
   TF_CHECK_OK(s.ToGraphDef(&graph));
-  LOG(INFO) << graph.DebugString();
 
   ASSERT_EQ("c0", graph.node(0).name());
   ASSERT_EQ("c1", graph.node(1).name());
@@ -336,9 +349,26 @@ TEST_F(UtilsTest, NumNonControlOutputs) {
 }
 
 TEST_F(UtilsTest, DeleteNodes) {
-  // TODO(rmlarsen): write forgtten test.
+  // TODO(rmlarsen): write forgotten test.
 }
 
+#define BM_NodePositionIfSameNode(I, N, NAME)               \
+  static void BM_NodePositionIfSameNode_##NAME(int iters) { \
+    string input = I;                                       \
+    string node = N;                                        \
+    for (int i = 0; i < iters; ++i) {                       \
+      const int pos = NodePositionIfSameNode(input, node);  \
+      CHECK_GT(pos, -3);                                    \
+    }                                                       \
+  }                                                         \
+  BENCHMARK(BM_NodePositionIfSameNode_##NAME)
+
+BM_NodePositionIfSameNode("foo/bar/baz:7", "foo/bar/baz", Match_7);
+BM_NodePositionIfSameNode("foo/bar/baz", "foo/bar/baz", Match_0);
+BM_NodePositionIfSameNode("^foo/bar/baz", "foo/bar/baz", Match_Ctrl);
+BM_NodePositionIfSameNode("blah", "foo/bar/baz", NoMatch_0);
+BM_NodePositionIfSameNode("foo/bar/baz/gnu", "foo/bar/baz", NoMatch_end);
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 6995db405617abc90da3331094aa8af5e6b57fd1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 15:06:55 -0700
Subject: [PATCH 0619/1357] Disable PinToHostOptimizer for any TPU graphs.

PiperOrigin-RevId: 214338297
---
 .../grappler/optimizers/pin_to_host_optimizer.cc  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index c8f9311b2e..98c27300a9 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -157,6 +157,16 @@ string TryFindHostDevice(const gtl::FlatSet<string>& devices,
   return device;
 }
 
+bool IsTPUGraphDef(const GraphDef& def) {
+  for (auto node : def.node()) {
+    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
+        node.op() == "TPUPartitionedCall") {
+      return true;
+    }
+  }
+  return false;
+}
+
 // All the nodes that should be blacklisted and not swapped.
 bool IsBlacklisted(const NodeDef& node) { return IsCollective(node); }
 }  // end namespace internal
@@ -165,6 +175,11 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                     GraphDef* optimized_graph) {
   *optimized_graph = item.graph;
 
+  // Skip all TPU graphs.
+  if (internal::IsTPUGraphDef(*optimized_graph)) {
+    return Status::OK();
+  }
+
   GraphProperties properties(item);
   bool has_properties = false;
   GraphView graph(optimized_graph);
-- 
GitLab


From 084f84f2ce44b8a1909b59bcc940652a95cd6fc9 Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Mon, 24 Sep 2018 15:47:17 -0700
Subject: [PATCH 0620/1357] PolymorphicFunction cache key is changed to use the
 init graph instead of the default graph in the scope.

PiperOrigin-RevId: 214345046
---
 tensorflow/python/eager/function.py      | 23 ++++++++++--------
 tensorflow/python/eager/function_test.py | 30 ++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 1f5d479882..b28befeb62 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1157,7 +1157,7 @@ class PolymorphicFunction(object):
     # then `instance` will be `foo` (and `owner` will be `Foo`).
     return functools.partial(self.__call__, instance)
 
-  def _cache_key(self, args, kwargs, ctx, graph):
+  def _cache_key(self, args, kwargs):
     """Computes the cache key given inputs and execution context."""
     if self._input_signature is None:
       inputs = (args, kwargs) if kwargs else args
@@ -1166,19 +1166,23 @@ class PolymorphicFunction(object):
       del args, kwargs
       cache_key = self._flat_input_signature
 
-    # The graph, or whether we're executing eagerly, should be a part of the
-    # cache key so we don't improperly capture tensors such as variables.
-    executing_eagerly = ctx.executing_eagerly()
-    execution_context = executing_eagerly or graph
+    with ops.init_scope():
+      init_graph = ops.get_default_graph()
+
+      # The graph, or whether we're executing eagerly, should be a part of the
+      # cache key so we don't improperly capture tensors such as variables.
+      executing_eagerly = context.executing_eagerly()
+      execution_context = executing_eagerly or init_graph
 
+    default_graph = ops.get_default_graph()
     # Putting the device in the cache key ensures that call-site device
     # annotations are respected.
-    device_functions = _get_device_functions(ctx, graph)
+    device_functions = _get_device_functions(context.context(), default_graph)
 
     # `ops.colocate_with` directives translate into `ops.device` directives when
     # eager execution is enabled.
-    colocation_stack = (None if executing_eagerly else
-                        tuple(graph._colocation_stack.peek_objs()))  # pylint: disable=protected-access
+    colocation_stack = (() if executing_eagerly else
+                        tuple(default_graph._colocation_stack.peek_objs()))  # pylint: disable=protected-access
 
     return cache_key + (execution_context, device_functions, colocation_stack)
 
@@ -1281,8 +1285,7 @@ class PolymorphicFunction(object):
     """
     if self._input_signature is None or args is not None or kwargs is not None:
       args, kwargs = self._canonicalize_function_inputs(*args, **kwargs)
-    cache_key = self._cache_key(args, kwargs, context.context(),
-                                ops.get_default_graph())
+    cache_key = self._cache_key(args, kwargs)
     with self._lock:
       try:
         graph_function = self._function_cache.get(cache_key, None)
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 04f42f63d4..59faf967c5 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1812,6 +1812,36 @@ class FunctionTest(test.TestCase):
         # Grappler fallback to use the CPU impl even called with GPU function.
         self.assertEquals(y_value, 3.0)
 
+  def testDefunFunctionSeparateGraphs(self):
+    with context.graph_mode():
+
+      @function.defun
+      def add(x):
+        return x + 5
+
+      @function.defun
+      def maybe_add(x, should_add):
+        if should_add:
+          return add(x)
+        else:
+          return x
+
+      with ops.Graph().as_default():
+        x = constant_op.constant(11)
+        maybe_add(x, True)
+        self.assertEqual(len(maybe_add._function_cache), 1)
+        self.assertEqual(len(add._function_cache), 1)
+
+        maybe_add(x, False)
+        self.assertEqual(len(maybe_add._function_cache), 2)
+        self.assertEqual(len(add._function_cache), 1)
+
+      with ops.Graph().as_default():
+        x = constant_op.constant(11)
+        maybe_add(x, True)
+        self.assertEqual(len(maybe_add._function_cache), 3)
+        self.assertEqual(len(add._function_cache), 2)
+
 
 @test_util.with_c_shapes
 class AutomaticControlDependenciesTest(test.TestCase):
-- 
GitLab


From 9c58005ec86297a1d0a17dc4f7ad7cbae9c47e4b Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 24 Sep 2018 15:54:22 -0700
Subject: [PATCH 0621/1357] Remove the "constants" input group from _XlaRun;
 NFC

It wasn't actually needed.

PiperOrigin-RevId: 214346217
---
 tensorflow/compiler/jit/build_xla_ops_pass.cc | 20 ++++++-----
 tensorflow/compiler/jit/kernels/xla_ops.cc    | 35 ++++++++++++-------
 tensorflow/compiler/jit/ops/xla_ops.cc        |  4 ---
 .../compiler/jit/xla_compile_on_demand_op.cc  |  6 ++--
 tensorflow/compiler/jit/xla_device_ops.h      |  3 +-
 tensorflow/compiler/jit/xla_launch_util.cc    | 13 ++++---
 tensorflow/compiler/jit/xla_launch_util.h     | 14 ++++++--
 7 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
index a6086f30a1..13a518d0e8 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc
@@ -55,7 +55,6 @@ static Status BuildXlaCompileNode(
 }
 
 static Status BuildXlaRunNode(const string& nodename, const string& device_name,
-                              const DataTypeVector& constant_dtypes,
                               const DataTypeVector& arg_dtypes,
                               const DataTypeVector& result_dtypes, Graph* graph,
                               Node** node) {
@@ -63,7 +62,6 @@ static Status BuildXlaRunNode(const string& nodename, const string& device_name,
   def.set_name(graph->NewName(nodename));
   def.set_op("_XlaRun");
   def.set_device(device_name);
-  AddNodeAttr("Tconstants", constant_dtypes, &def);
   AddNodeAttr("Targs", arg_dtypes, &def);
   AddNodeAttr("Tresults", result_dtypes, &def);
 
@@ -98,12 +96,14 @@ static Status GetXlaAttrs(Node* node, int* num_constant_args,
   return Status::OK();
 }
 
-static void CopyIncomingEdges(Graph* g, Node* old_node, Node* new_node) {
+static void CopyIncomingEdges(Graph* g, Node* old_node, Node* new_node,
+                              int prefix_to_ignore) {
   for (const Edge* edge : old_node->in_edges()) {
     if (edge->IsControlEdge()) {
       g->AddControlEdge(edge->src(), new_node);
-    } else {
-      g->AddEdge(edge->src(), edge->src_output(), new_node, edge->dst_input());
+    } else if (edge->dst_input() >= prefix_to_ignore) {
+      g->AddEdge(edge->src(), edge->src_output(), new_node,
+                 edge->dst_input() - prefix_to_ignore);
     }
   }
 }
@@ -145,17 +145,19 @@ static Status ReplaceNodeWithXlaCompileAndRun(Graph* g, Node* n) {
   }
 
   TF_RETURN_IF_ERROR(BuildXlaRunNode(n->name(), n->requested_device(),
-                                     const_dtypes, arg_dtypes_with_resources,
+                                     arg_dtypes_with_resources,
                                      n->output_types(), g, &run_node));
 
   compile_node->set_assigned_device_name(n->assigned_device_name());
   run_node->set_assigned_device_name(n->assigned_device_name());
 
-  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/compile_node);
-  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/run_node);
+  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/compile_node,
+                    /*prefix_to_ignore=*/0);
+  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/run_node,
+                    /*prefix_to_ignore=*/num_constant_args);
 
   // The compilation_key output.
-  g->AddEdge(compile_node, 0, run_node, n->num_inputs());
+  g->AddEdge(compile_node, 0, run_node, n->num_inputs() - num_constant_args);
 
   MoveOutgoingEdges(g, /*old_node=*/n, /*new_node=*/run_node);
   g->RemoveNode(n);
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
index c483841a7c..a85006eb03 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.cc
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -98,11 +98,13 @@ class XlaExecutableClosure {
   explicit XlaExecutableClosure(
       xla::LocalClient* client, xla::LocalExecutable* executable,
       const XlaCompiler::CompilationResult* compilation_result,
-      std::map<int, OptionalTensor> resource_var_snapshots)
+      std::map<int, OptionalTensor> resource_var_snapshots,
+      int num_constant_args)
       : client_(client),
         executable_(executable),
         compilation_result_(compilation_result),
-        resource_var_snapshots_(std::move(resource_var_snapshots)) {}
+        resource_var_snapshots_(std::move(resource_var_snapshots)),
+        num_constant_args_(num_constant_args) {}
 
   XlaExecutableClosure(XlaExecutableClosure&&) = default;
   XlaExecutableClosure& operator=(XlaExecutableClosure&&) = default;
@@ -115,12 +117,14 @@ class XlaExecutableClosure {
   const std::map<int, OptionalTensor>& resource_var_snapshots() const {
     return resource_var_snapshots_;
   }
+  int num_constant_args() const { return num_constant_args_; }
 
  private:
   xla::LocalClient* client_;
   xla::LocalExecutable* executable_;
   const XlaCompiler::CompilationResult* compilation_result_;
   std::map<int, OptionalTensor> resource_var_snapshots_;
+  int num_constant_args_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosure);
 };
@@ -298,7 +302,8 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
       client, platform_info_.allocator(),
       /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(),
       platform_info_.UseMultipleStreams());
-  launch_context.PopulateInputs(ctx, kernel, variables);
+  launch_context.PopulateInputs(ctx, kernel, variables,
+                                /*missing_ctx_input_prefix=*/0);
 
   // Execute the computation.
   VLOG(2) << "Executing computation.";
@@ -317,7 +322,8 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
   VLOG(2) << "Elapsed time: " << elapsed << "us";
 
   OP_REQUIRES_OK(ctx, launch_context.PopulateOutputs(
-                          ctx, kernel, run_result.ConsumeValueOrDie()));
+                          ctx, kernel, run_result.ConsumeValueOrDie(),
+                          /*missing_ctx_input_prefix=*/0));
   VLOG(1) << "Done";
 }
 
@@ -406,7 +412,7 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) {
   // variables.
   XlaExecutableClosureStore::KeyT key =
       XlaExecutableClosureStore::Global()->Produce(XlaExecutableClosure(
-          client, executable, kernel, std::move(variables)));
+          client, executable, kernel, std::move(variables), constants_.size()));
 
   Allocator* cpu_allocator = [&] {
     AllocatorAttributes host_alloc_attrs;
@@ -440,8 +446,13 @@ void XlaRunOp::Compute(OpKernelContext* ctx) {
       closure.client(), platform_info_.allocator(),
       /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(),
       /*use_multiple_streams=*/platform_info_.UseMultipleStreams());
-  launch_context.PopulateInputs(ctx, closure.compilation_result(),
-                                closure.resource_var_snapshots());
+
+  // We're missing the must-be-constant inputs, tell `PopulateInputs`
+  // about this.  We don't actually need these inputs because they've
+  // already been baked into the compiled kernel.
+  launch_context.PopulateInputs(
+      ctx, closure.compilation_result(), closure.resource_var_snapshots(),
+      /*missing_ctx_input_prefix=*/closure.num_constant_args());
 
   se::Stream* stream =
       ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
@@ -461,8 +472,10 @@ void XlaRunOp::Compute(OpKernelContext* ctx) {
   VLOG(2) << "Elapsed time in computation: " << elapsed << "us";
 
   OP_REQUIRES_OK(
-      ctx, launch_context.PopulateOutputs(ctx, closure.compilation_result(),
-                                          run_result.ConsumeValueOrDie()));
+      ctx,
+      launch_context.PopulateOutputs(
+          ctx, closure.compilation_result(), run_result.ConsumeValueOrDie(),
+          /*missing_ctx_input_prefix=*/closure.num_constant_args()));
 }
 
 REGISTER_KERNEL_BUILDER(Name("XlaLaunch").Device(DEVICE_CPU), XlaLocalLaunchOp);
@@ -481,8 +494,6 @@ REGISTER_KERNEL_BUILDER(Name("_XlaCompile")
                         XlaCompileOp);
 
 REGISTER_KERNEL_BUILDER(Name("_XlaRun").Device(DEVICE_CPU), XlaRunOp);
-
-REGISTER_KERNEL_BUILDER(
-    Name("_XlaRun").Device(DEVICE_GPU).HostMemory("constants"), XlaRunOp);
+REGISTER_KERNEL_BUILDER(Name("_XlaRun").Device(DEVICE_GPU), XlaRunOp);
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc
index 6b4cdaa1c1..bcd1a29b1f 100644
--- a/tensorflow/compiler/jit/ops/xla_ops.cc
+++ b/tensorflow/compiler/jit/ops/xla_ops.cc
@@ -76,10 +76,6 @@ for now.
 )");
 
 REGISTER_OP("_XlaRun")
-    // TODO(sanjoy): We don't need constants and Tconstants and they should be
-    // removed.
-    .Input("constants: Tconstants")
-    .Attr("Tconstants: list(type) >= 0")
     .Input("args: Targs")
     .Attr("Targs: list(type) >= 0")
     .Output("results: Tresults")
diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
index 3ba48e8c31..3c160aefe5 100644
--- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
+++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
@@ -58,7 +58,8 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx,
       /*allocate_xla_tensors=*/true,
       /*use_multiple_streams=*/metadata.UseMultipleStreams());
 
-  launch_context.PopulateInputs(ctx, result, variables);
+  launch_context.PopulateInputs(ctx, result, variables,
+                                /*missing_ctx_input_prefix=*/0);
 
   se::Stream* stream =
       ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
@@ -79,7 +80,8 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx,
   TF_RETURN_IF_ERROR(run_result.status());
 
   TF_RETURN_IF_ERROR(launch_context.PopulateOutputs(
-      ctx, result, run_result.ConsumeValueOrDie()));
+      ctx, result, run_result.ConsumeValueOrDie(),
+      /*missing_ctx_input_prefix=*/0));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 639243973c..2ccee79761 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -73,8 +73,7 @@ class XlaAssignVariableOp : public AsyncOpKernel {
                           KERNEL);
 
 #define REGISTER_XLA_RUN_KERNEL(DEVICE, KERNEL, TYPES) \
-  REGISTER_KERNEL_BUILDER(                             \
-      Name("_XlaRun").Device(DEVICE).HostMemory("constants"), KERNEL);
+  REGISTER_KERNEL_BUILDER(Name("_XlaRun").Device(DEVICE), KERNEL);
 
 #define REGISTER_XLA_DEVICE_KERNELS(DEVICE, TYPES)                             \
   REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE), SendOp);               \
diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc
index 07a93e9c39..f5c8bdd6ee 100644
--- a/tensorflow/compiler/jit/xla_launch_util.cc
+++ b/tensorflow/compiler/jit/xla_launch_util.cc
@@ -133,7 +133,8 @@ XlaComputationLaunchContext::XlaComputationLaunchContext(
 
 void XlaComputationLaunchContext::PopulateInputs(
     OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel,
-    const std::map<int, OptionalTensor>& variables) {
+    const std::map<int, OptionalTensor>& variables,
+    int missing_ctx_input_prefix) {
   se::Stream* stream =
       ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
   // Build ShapedBuffers that point directly to the Tensor buffers.
@@ -145,12 +146,13 @@ void XlaComputationLaunchContext::PopulateInputs(
   const Tensor* t;
   for (int i = 0; i < kernel->xla_input_shapes.size(); ++i) {
     int arg_num = kernel->input_mapping[i];
+    DCHECK_GE(arg_num, missing_ctx_input_prefix);
     const xla::Shape& shape = kernel->xla_input_shapes[i];
     if (variables.count(arg_num)) {
       t = &(variables.at(arg_num).value);
       CHECK(t);
     } else {
-      t = &(ctx->input(arg_num));
+      t = &(ctx->input(arg_num - missing_ctx_input_prefix));
     }
 
     if (use_multiple_streams_) {
@@ -187,7 +189,7 @@ void XlaComputationLaunchContext::PopulateInputs(
 
 Status XlaComputationLaunchContext::PopulateOutputs(
     OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel,
-    ScopedShapedBuffer output) {
+    ScopedShapedBuffer output, int missing_ctx_input_prefix) {
   se::Stream* stream =
       ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
 
@@ -315,7 +317,8 @@ Status XlaComputationLaunchContext::PopulateOutputs(
   for (int i = 0; i < kernel->resource_updates.size(); ++i) {
     Allocator* allocator = ctx->device()->GetAllocator({});
     const XlaCompiler::ResourceUpdate& write = kernel->resource_updates[i];
-    if (write.input_index < 0 || write.input_index >= ctx->num_inputs()) {
+    int actual_input_index = write.input_index - missing_ctx_input_prefix;
+    if (actual_input_index < 0 || actual_input_index >= ctx->num_inputs()) {
       return errors::Internal("Invalid input index for variable write.");
     }
 
@@ -325,7 +328,7 @@ Status XlaComputationLaunchContext::PopulateOutputs(
     // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor,
     // not a Tensor.
     TF_RETURN_IF_ERROR(LookupOrCreateResource<Var>(
-        ctx, HandleFromInput(ctx, write.input_index), &variable,
+        ctx, HandleFromInput(ctx, actual_input_index), &variable,
         [&write](Var** ptr) {
           *ptr = new Var(write.type);
           return Status::OK();
diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h
index fa7a5e5f89..326d70a027 100644
--- a/tensorflow/compiler/jit/xla_launch_util.h
+++ b/tensorflow/compiler/jit/xla_launch_util.h
@@ -89,14 +89,24 @@ class XlaComputationLaunchContext {
 
   // Add all inputs within `ctx` as XLA arguments (returned by arguments()).
   // `variables` is a map from TensorFlow argument number to resource variable.
+  //
+  // Assumes that the first `missing_ctx_input_prefix` inputs to the kernel are
+  // missing and adjusts input indices accordingly.  All elements in kernel's
+  // input_mapping must be greater than or equal to `missing_ctx_input_prefix`
+  // (in other words, no inputs actually required by the kernel can be missing).
   void PopulateInputs(OpKernelContext* ctx,
                       const XlaCompiler::CompilationResult* kernel,
-                      const std::map<int, OptionalTensor>& variables);
+                      const std::map<int, OptionalTensor>& variables,
+                      int missing_ctx_input_prefix);
 
   // Given the XLA output in `output`, populate all outputs of `ctx`.
+  //
+  // Assumes that the first `missing_ctx_input_prefix` inputs to the kernel are
+  // missing and adjusts input indices accordingly.
   Status PopulateOutputs(OpKernelContext* ctx,
                          const XlaCompiler::CompilationResult* kernel,
-                         xla::ScopedShapedBuffer output);
+                         xla::ScopedShapedBuffer output,
+                         int missing_ctx_input_prefix);
 
   // Return the argument list. Only valid after PopulateInputs() has been
   // called.
-- 
GitLab


From 1ff157d82dac29f5a3a3197b2664208f6ed6ba06 Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Mon, 24 Sep 2018 15:54:32 -0700
Subject: [PATCH 0622/1357] Portability preparation for more cross-platform
 prototyping.

PiperOrigin-RevId: 214346240
---
 tensorflow/contrib/lite/c/c_api_internal.c    |  25 +-
 .../lite/core/api/flatbuffer_conversions.cc   |  92 ++-
 .../lite/core/api/flatbuffer_conversions.h    |  22 +-
 .../core/api/flatbuffer_conversions_test.cc   |  26 +-
 .../contrib/lite/kernels/internal/BUILD       |  13 +-
 .../lite/kernels/internal/compatibility.h     |  32 +-
 .../internal/reference/depthwiseconv_uint8.h  |   2 -
 .../internal/reference/fully_connected.h      | 460 ++++++++++++++
 .../internal/reference/reference_ops.h        | 580 +-----------------
 .../lite/kernels/internal/reference/softmax.h | 202 ++++++
 .../contrib/lite/kernels/internal/types.h     |  18 +-
 .../contrib/lite/kernels/kernel_util.cc       |   4 +
 tensorflow/contrib/lite/kernels/op_macros.h   |  46 +-
 tensorflow/contrib/lite/model.cc              |  12 +-
 14 files changed, 868 insertions(+), 666 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h
 create mode 100644 tensorflow/contrib/lite/kernels/internal/reference/softmax.h

diff --git a/tensorflow/contrib/lite/c/c_api_internal.c b/tensorflow/contrib/lite/c/c_api_internal.c
index 1846bad4b7..8a0c177b19 100644
--- a/tensorflow/contrib/lite/c/c_api_internal.c
+++ b/tensorflow/contrib/lite/c/c_api_internal.c
@@ -14,15 +14,29 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
+#ifndef TF_LITE_STATIC_MEMORY
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#endif  // TF_LITE_STATIC_MEMORY
 
 int TfLiteIntArrayGetSizeInBytes(int size) {
   static TfLiteIntArray dummy;
   return sizeof(dummy) + sizeof(dummy.data[0]) * size;
 }
 
+int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b) {
+  if (a == b) return 1;
+  if (a == NULL || b == NULL) return 0;
+  if (a->size != b->size) return 0;
+  int i = 0;
+  for (; i < a->size; i++)
+    if (a->data[i] != b->data[i]) return 0;
+  return 1;
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+
 TfLiteIntArray* TfLiteIntArrayCreate(int size) {
   TfLiteIntArray* ret =
       (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
@@ -40,16 +54,6 @@ void TfLiteIntArrayPrint(const char* s, TfLiteIntArray* a) {
   printf("]\n");
 }
 
-int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b) {
-  if (a == b) return 1;
-  if (a == NULL || b == NULL) return 0;
-  if (a->size != b->size) return 0;
-  int i = 0;
-  for (; i < a->size; i++)
-    if (a->data[i] != b->data[i]) return 0;
-  return 1;
-}
-
 TfLiteIntArray* TfLiteIntArrayCopy(TfLiteIntArray* src) {
   if (!src) return NULL;
   TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
@@ -102,3 +106,4 @@ void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
   }
   tensor->bytes = num_bytes;
 }
+#endif  // TF_LITE_STATIC_MEMORY
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index 03af538073..e6900e0950 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -44,16 +44,6 @@ void FlatBufferIntVectorToArray(int max_size_of_buffer,
   }
 }
 
-// Allocate a structure using malloc, but make sure the structure is a POD
-// structure that doesn't require constructors to run. The reason we do this,
-// is that Interpreter's C extension part will take ownership so destructors
-// will not be run during deallocation.
-template <class T>
-T* MallocPOD() {
-  static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
-  return static_cast<T*>(malloc(sizeof(T)));
-}
-
 }  // namespace
 
 TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
@@ -98,7 +88,8 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
 // need to be released by calling `free`.`
 // If it returns kTfLiteError, `builtin_data` will be `nullptr`.
 TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
-                         ErrorReporter* error_reporter, void** builtin_data) {
+                         ErrorReporter* error_reporter,
+                         BuiltinDataAllocator* allocator, void** builtin_data) {
   auto parse_padding = [](Padding padding) {
     switch (padding) {
       case Padding_SAME:
@@ -150,7 +141,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
   *builtin_data = nullptr;
   switch (op_type) {
     case BuiltinOperator_CONV_2D: {
-      TfLiteConvParams* params = MallocPOD<TfLiteConvParams>();
+      TfLiteConvParams* params = allocator->AllocatePOD<TfLiteConvParams>();
       if (auto* conv_params = op->builtin_options_as_Conv2DOptions()) {
         params->padding = parse_padding(conv_params->padding());
         params->stride_width = conv_params->stride_w();
@@ -165,7 +156,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_CAST: {
-      TfLiteCastParams* params = MallocPOD<TfLiteCastParams>();
+      TfLiteCastParams* params = allocator->AllocatePOD<TfLiteCastParams>();
       if (auto* schema_params = op->builtin_options_as_CastOptions()) {
         auto in_status =
             ConvertTensorType(schema_params->in_data_type(),
@@ -174,7 +165,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
             ConvertTensorType(schema_params->out_data_type(),
                               &params->out_data_type, error_reporter);
         if (in_status != kTfLiteOk || out_status != kTfLiteOk) {
-          free(params);
+          allocator->Deallocate(params);
           return kTfLiteError;
         }
       }
@@ -183,7 +174,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_LSH_PROJECTION: {
       TfLiteLSHProjectionParams* params =
-          MallocPOD<TfLiteLSHProjectionParams>();
+          allocator->AllocatePOD<TfLiteLSHProjectionParams>();
       if (auto* lshParams = op->builtin_options_as_LSHProjectionOptions()) {
         params->type = parseLSHProjectionType(lshParams->type());
       }
@@ -193,7 +184,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_AVERAGE_POOL_2D:
     case BuiltinOperator_MAX_POOL_2D:
     case BuiltinOperator_L2_POOL_2D: {
-      TfLitePoolParams* params = MallocPOD<TfLitePoolParams>();
+      TfLitePoolParams* params = allocator->AllocatePOD<TfLitePoolParams>();
       if (auto* pool_params = op->builtin_options_as_Pool2DOptions()) {
         params->padding = parse_padding(pool_params->padding());
         params->stride_width = pool_params->stride_w();
@@ -208,7 +199,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_DEPTHWISE_CONV_2D: {
       TfLiteDepthwiseConvParams* params =
-          MallocPOD<TfLiteDepthwiseConvParams>();
+          allocator->AllocatePOD<TfLiteDepthwiseConvParams>();
       if (auto* conv_params = op->builtin_options_as_DepthwiseConv2DOptions()) {
         params->padding = parse_padding(conv_params->padding());
         params->stride_width = conv_params->stride_w();
@@ -224,7 +215,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_SVDF: {
-      TfLiteSVDFParams* params = MallocPOD<TfLiteSVDFParams>();
+      TfLiteSVDFParams* params = allocator->AllocatePOD<TfLiteSVDFParams>();
       if (auto* svdf_params = op->builtin_options_as_SVDFOptions()) {
         params->rank = svdf_params->rank();
         params->activation =
@@ -235,7 +226,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN:
     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: {
-      TfLiteSequenceRNNParams* params = MallocPOD<TfLiteSequenceRNNParams>();
+      TfLiteSequenceRNNParams* params =
+          allocator->AllocatePOD<TfLiteSequenceRNNParams>();
       if (auto* sequence_rnn_params =
               op->builtin_options_as_SequenceRNNOptions()) {
         params->activation =
@@ -246,7 +238,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_RNN: {
-      TfLiteRNNParams* params = MallocPOD<TfLiteRNNParams>();
+      TfLiteRNNParams* params = allocator->AllocatePOD<TfLiteRNNParams>();
       if (auto* rnn_params = op->builtin_options_as_RNNOptions()) {
         params->activation =
             parse_activation(rnn_params->fused_activation_function());
@@ -256,7 +248,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: {
       TfLiteEmbeddingLookupSparseParams* params =
-          MallocPOD<TfLiteEmbeddingLookupSparseParams>();
+          allocator->AllocatePOD<TfLiteEmbeddingLookupSparseParams>();
       if (auto* embedding_params =
               op->builtin_options_as_EmbeddingLookupSparseOptions()) {
         params->combiner = parseCombinerType(embedding_params->combiner());
@@ -266,7 +258,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_FULLY_CONNECTED: {
       TfLiteFullyConnectedParams* params =
-          MallocPOD<TfLiteFullyConnectedParams>();
+          allocator->AllocatePOD<TfLiteFullyConnectedParams>();
       if (auto* fully_connected_params =
               op->builtin_options_as_FullyConnectedOptions()) {
         params->activation = parse_activation(
@@ -291,7 +283,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       // no-op.
       break;
     case BuiltinOperator_SOFTMAX: {
-      TfLiteSoftmaxParams* params = MallocPOD<TfLiteSoftmaxParams>();
+      TfLiteSoftmaxParams* params =
+          allocator->AllocatePOD<TfLiteSoftmaxParams>();
       if (auto* softmax_params = op->builtin_options_as_SoftmaxOptions()) {
         params->beta = softmax_params->beta();
       }
@@ -300,7 +293,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_CONCATENATION: {
       TfLiteConcatenationParams* params =
-          MallocPOD<TfLiteConcatenationParams>();
+          allocator->AllocatePOD<TfLiteConcatenationParams>();
       if (auto* concatenation_params =
               op->builtin_options_as_ConcatenationOptions()) {
         params->activation =
@@ -311,7 +304,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_MUL: {
-      auto* params = MallocPOD<TfLiteMulParams>();
+      auto* params = allocator->AllocatePOD<TfLiteMulParams>();
       if (auto* schema_params = op->builtin_options_as_MulOptions()) {
         params->activation =
             parse_activation(schema_params->fused_activation_function());
@@ -320,7 +313,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_ADD: {
-      auto* params = MallocPOD<TfLiteAddParams>();
+      auto* params = allocator->AllocatePOD<TfLiteAddParams>();
       if (auto* schema_params = op->builtin_options_as_AddOptions()) {
         params->activation =
             parse_activation(schema_params->fused_activation_function());
@@ -329,7 +322,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_DIV: {
-      auto* params = MallocPOD<TfLiteDivParams>();
+      auto* params = allocator->AllocatePOD<TfLiteDivParams>();
       if (auto* schema_params = op->builtin_options_as_DivOptions()) {
         params->activation =
             parse_activation(schema_params->fused_activation_function());
@@ -338,7 +331,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_SUB: {
-      auto* params = MallocPOD<TfLiteSubParams>();
+      auto* params = allocator->AllocatePOD<TfLiteSubParams>();
       if (auto* schema_params = op->builtin_options_as_SubOptions()) {
         params->activation =
             parse_activation(schema_params->fused_activation_function());
@@ -347,7 +340,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_L2_NORMALIZATION: {
-      auto* params = MallocPOD<TfLiteL2NormParams>();
+      auto* params = allocator->AllocatePOD<TfLiteL2NormParams>();
       if (auto* schema_params = op->builtin_options_as_L2NormOptions()) {
         params->activation =
             parse_activation(schema_params->fused_activation_function());
@@ -356,7 +349,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: {
-      auto* params = MallocPOD<TfLiteLocalResponseNormParams>();
+      auto* params = allocator->AllocatePOD<TfLiteLocalResponseNormParams>();
       if (auto* schema_params =
               op->builtin_options_as_LocalResponseNormalizationOptions()) {
         params->radius = schema_params->radius();
@@ -370,7 +363,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_LSTM: {
-      TfLiteLSTMParams* params = MallocPOD<TfLiteLSTMParams>();
+      TfLiteLSTMParams* params = allocator->AllocatePOD<TfLiteLSTMParams>();
       if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
         params->activation =
             parse_activation(lstm_params->fused_activation_function());
@@ -389,7 +382,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_RESIZE_BILINEAR: {
-      auto* params = MallocPOD<TfLiteResizeBilinearParams>();
+      auto* params = allocator->AllocatePOD<TfLiteResizeBilinearParams>();
       if (auto* schema_params =
               op->builtin_options_as_ResizeBilinearOptions()) {
         params->align_corners = schema_params->align_corners();
@@ -398,7 +391,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_RESHAPE: {
-      auto* params = MallocPOD<TfLiteReshapeParams>();
+      auto* params = allocator->AllocatePOD<TfLiteReshapeParams>();
       if (auto* schema_params = op->builtin_options_as_ReshapeOptions()) {
         auto* new_shape = schema_params->new_shape();
         FlatBufferIntVectorToArray(sizeof(params->shape), new_shape,
@@ -409,7 +402,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_SKIP_GRAM: {
-      TfLiteSkipGramParams* params = MallocPOD<TfLiteSkipGramParams>();
+      TfLiteSkipGramParams* params =
+          allocator->AllocatePOD<TfLiteSkipGramParams>();
       if (auto* skip_gram_params = op->builtin_options_as_SkipGramOptions()) {
         params->ngram_size = skip_gram_params->ngram_size();
         params->max_skip_size = skip_gram_params->max_skip_size();
@@ -419,7 +413,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_SPACE_TO_DEPTH: {
-      auto* params = MallocPOD<TfLiteSpaceToDepthParams>();
+      auto* params = allocator->AllocatePOD<TfLiteSpaceToDepthParams>();
       if (auto* schema_params = op->builtin_options_as_SpaceToDepthOptions()) {
         params->block_size = schema_params->block_size();
       }
@@ -427,7 +421,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_GATHER: {
-      TfLiteGatherParams* params = MallocPOD<TfLiteGatherParams>();
+      TfLiteGatherParams* params = allocator->AllocatePOD<TfLiteGatherParams>();
       params->axis = 0;
       if (auto* gather_params = op->builtin_options_as_GatherOptions()) {
         params->axis = gather_params->axis();
@@ -442,7 +436,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_REDUCE_PROD:
     case BuiltinOperator_REDUCE_ANY:
     case BuiltinOperator_SUM: {
-      auto* params = MallocPOD<TfLiteReducerParams>();
+      auto* params = allocator->AllocatePOD<TfLiteReducerParams>();
       if (auto* schema_params = op->builtin_options_as_ReducerOptions()) {
         params->keep_dims = schema_params->keep_dims();
       }
@@ -450,7 +444,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_SPLIT: {
-      auto* params = MallocPOD<TfLiteSplitParams>();
+      auto* params = allocator->AllocatePOD<TfLiteSplitParams>();
       if (auto* schema_params = op->builtin_options_as_SplitOptions()) {
         params->num_splits = schema_params->num_splits();
       }
@@ -458,7 +452,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_SQUEEZE: {
-      auto* params = MallocPOD<TfLiteSqueezeParams>();
+      auto* params = allocator->AllocatePOD<TfLiteSqueezeParams>();
       if (auto* schema_params = op->builtin_options_as_SqueezeOptions()) {
         const auto& squeeze_dims = schema_params->squeeze_dims();
         FlatBufferIntVectorToArray(sizeof(params->squeeze_dims), squeeze_dims,
@@ -469,7 +463,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_STRIDED_SLICE: {
-      auto* params = MallocPOD<TfLiteStridedSliceParams>();
+      auto* params = allocator->AllocatePOD<TfLiteStridedSliceParams>();
       if (auto* schema_params = op->builtin_options_as_StridedSliceOptions()) {
         params->begin_mask = schema_params->begin_mask();
         params->end_mask = schema_params->end_mask();
@@ -481,7 +475,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_ARG_MAX: {
-      auto* params = MallocPOD<TfLiteArgMaxParams>();
+      auto* params = allocator->AllocatePOD<TfLiteArgMaxParams>();
       if (auto* schema_params = op->builtin_options_as_ArgMaxOptions()) {
         ConvertTensorType(schema_params->output_type(), &params->output_type,
                           error_reporter);
@@ -490,7 +484,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_ARG_MIN: {
-      auto* params = MallocPOD<TfLiteArgMinParams>();
+      auto* params = allocator->AllocatePOD<TfLiteArgMinParams>();
       if (const auto* schema_params = op->builtin_options_as_ArgMinOptions()) {
         ConvertTensorType(schema_params->output_type(), &params->output_type,
                           error_reporter);
@@ -500,7 +494,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_TRANSPOSE_CONV: {
       TfLiteTransposeConvParams* params =
-          MallocPOD<TfLiteTransposeConvParams>();
+          allocator->AllocatePOD<TfLiteTransposeConvParams>();
       if (auto* transpose_conv_params =
               op->builtin_options_as_TransposeConvOptions()) {
         params->padding = parse_padding(transpose_conv_params->padding());
@@ -512,7 +506,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     }
     case BuiltinOperator_SPARSE_TO_DENSE: {
       TfLiteSparseToDenseParams* params =
-          MallocPOD<TfLiteSparseToDenseParams>();
+          allocator->AllocatePOD<TfLiteSparseToDenseParams>();
       if (auto* sparse_to_dense_params =
               op->builtin_options_as_SparseToDenseOptions()) {
         params->validate_indices = sparse_to_dense_params->validate_indices();
@@ -521,7 +515,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_SHAPE: {
-      auto* params = MallocPOD<TfLiteShapeParams>();
+      auto* params = allocator->AllocatePOD<TfLiteShapeParams>();
       if (auto* schema_params = op->builtin_options_as_ShapeOptions()) {
         ConvertTensorType(schema_params->out_type(), &params->out_type,
                           error_reporter);
@@ -530,7 +524,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_PACK: {
-      TfLitePackParams* params = MallocPOD<TfLitePackParams>();
+      TfLitePackParams* params = allocator->AllocatePOD<TfLitePackParams>();
       if (auto* pack_params = op->builtin_options_as_PackOptions()) {
         params->values_count = pack_params->values_count();
         params->axis = pack_params->axis();
@@ -544,7 +538,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       return kTfLiteError;
     }
     case BuiltinOperator_FAKE_QUANT: {
-      auto* params = MallocPOD<TfLiteFakeQuantParams>();
+      auto* params = allocator->AllocatePOD<TfLiteFakeQuantParams>();
       if (auto* schema_params = op->builtin_options_as_FakeQuantOptions()) {
         params->min = schema_params->min();
         params->max = schema_params->max();
@@ -555,7 +549,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_ONE_HOT: {
-      auto* params = MallocPOD<TfLiteOneHotParams>();
+      auto* params = allocator->AllocatePOD<TfLiteOneHotParams>();
       if (auto* schema_params = op->builtin_options_as_OneHotOptions()) {
         params->axis = schema_params->axis();
       }
@@ -563,7 +557,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       break;
     }
     case BuiltinOperator_UNPACK: {
-      TfLiteUnpackParams* params = MallocPOD<TfLiteUnpackParams>();
+      TfLiteUnpackParams* params = allocator->AllocatePOD<TfLiteUnpackParams>();
       if (auto* unpack_params = op->builtin_options_as_UnpackOptions()) {
         params->num = unpack_params->num();
         params->axis = unpack_params->axis();
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.h b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.h
index 4dec6f9cfc..c770e627fd 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.h
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.h
@@ -26,6 +26,25 @@ limitations under the License.
 
 namespace tflite {
 
+// Interface class for builtin data allocations.
+class BuiltinDataAllocator {
+ public:
+  virtual void* Allocate(size_t size) = 0;
+  virtual void Deallocate(void* data) = 0;
+
+  // Allocate a structure, but make sure it is a POD structure that doesn't
+  // require constructors to run. The reason we do this, is that Interpreter's C
+  // extension part will take ownership so destructors  will not be run during
+  // deallocation.
+  template <typename T>
+  T* AllocatePOD() {
+    static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
+    return static_cast<T*>(this->Allocate(sizeof(T)));
+  }
+
+  virtual ~BuiltinDataAllocator() {}
+};
+
 // Parse the appropriate data out of the op.
 //
 // This handles builtin data explicitly as there are flatbuffer schemas.
@@ -36,7 +55,8 @@ namespace tflite {
 // function's responsibility to free it.
 // If it returns kTfLiteError, `builtin_data` will be `nullptr`.
 TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
-                         ErrorReporter* error_reporter, void** builtin_data);
+                         ErrorReporter* error_reporter,
+                         BuiltinDataAllocator* allocator, void** builtin_data);
 
 // Converts the tensor data type used in the flat buffer to the representation
 // used by the runtime.
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions_test.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions_test.cc
index b12bdf43b2..8ae94e1d33 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions_test.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions_test.cc
@@ -39,11 +39,31 @@ class MockErrorReporter : public ErrorReporter {
   int buffer_size_;
 };
 
+// Used to determine how the op data parsing function creates its working space.
+class MockDataAllocator : public BuiltinDataAllocator {
+ public:
+  MockDataAllocator() : is_allocated_(false) {}
+  void* Allocate(size_t size) override {
+    EXPECT_FALSE(is_allocated_);
+    const int max_size = kBufferSize;
+    EXPECT_LE(size, max_size);
+    is_allocated_ = true;
+    return buffer_;
+  }
+  void Deallocate(void* data) override { is_allocated_ = false; }
+
+ private:
+  static constexpr int kBufferSize = 1024;
+  char buffer_[kBufferSize];
+  bool is_allocated_;
+};
+
 }  // namespace
 
 TEST(FlatbufferConversions, TestParseOpDataConv) {
   MockErrorReporter mock_reporter;
   ErrorReporter* reporter = &mock_reporter;
+  MockDataAllocator mock_allocator;
 
   flatbuffers::FlatBufferBuilder builder;
   flatbuffers::Offset<void> conv_options =
@@ -58,7 +78,7 @@ TEST(FlatbufferConversions, TestParseOpDataConv) {
   const Operator* conv_op = flatbuffers::GetRoot<Operator>(conv_pointer);
   void* output_data = nullptr;
   EXPECT_EQ(kTfLiteOk, ParseOpData(conv_op, BuiltinOperator_CONV_2D, reporter,
-                                   &output_data));
+                                   &mock_allocator, &output_data));
   EXPECT_NE(nullptr, output_data);
   TfLiteConvParams* params = reinterpret_cast<TfLiteConvParams*>(output_data);
   EXPECT_EQ(kTfLitePaddingSame, params->padding);
@@ -67,12 +87,12 @@ TEST(FlatbufferConversions, TestParseOpDataConv) {
   EXPECT_EQ(kTfLiteActRelu, params->activation);
   EXPECT_EQ(3, params->dilation_width_factor);
   EXPECT_EQ(4, params->dilation_height_factor);
-  free(output_data);
 }
 
 TEST(FlatbufferConversions, TestParseOpDataCustom) {
   MockErrorReporter mock_reporter;
   ErrorReporter* reporter = &mock_reporter;
+  MockDataAllocator mock_allocator;
 
   flatbuffers::FlatBufferBuilder builder;
   flatbuffers::Offset<void> null_options;
@@ -84,7 +104,7 @@ TEST(FlatbufferConversions, TestParseOpDataCustom) {
   const Operator* custom_op = flatbuffers::GetRoot<Operator>(custom_pointer);
   void* output_data = nullptr;
   EXPECT_EQ(kTfLiteOk, ParseOpData(custom_op, BuiltinOperator_CUSTOM, reporter,
-                                   &output_data));
+                                   &mock_allocator, &output_data));
   EXPECT_EQ(nullptr, output_data);
 }
 
diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index 195474e7fd..afb5ec05df 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -43,7 +43,10 @@ cc_library(
         "compatibility.h",
         "types.h",
     ],
-    deps = ["@com_google_absl//absl/base:core_headers"],
+    deps = [
+        "//tensorflow/contrib/lite/kernels:op_macros",
+        "@com_google_absl//absl/base:core_headers",
+    ],
 )
 
 config_setting(
@@ -260,6 +263,7 @@ cc_library(
     deps = [
         ":round",
         ":types",
+        "//tensorflow/contrib/lite/kernels:op_macros",
     ],
 )
 
@@ -291,7 +295,9 @@ cc_library(
         "common.h",
         "reference/depthwiseconv_float.h",
         "reference/depthwiseconv_uint8.h",
+        "reference/fully_connected.h",
         "reference/reference_ops.h",
+        "reference/softmax.h",
     ],
     deps = [
         ":quantization_util",
@@ -300,6 +306,7 @@ cc_library(
         ":types",
         "@gemmlowp",
         "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/kernels:op_macros",
     ] + select({
         ":haswell": tflite_deps_intel,
         ":ios_x86_64": tflite_deps_intel,
@@ -320,8 +327,10 @@ cc_library(
         "common.h",
         "reference/depthwiseconv_float.h",
         "reference/depthwiseconv_uint8.h",
+        "reference/fully_connected.h",
         "reference/legacy_reference_ops.h",
         "reference/reference_ops.h",
+        "reference/softmax.h",
     ],
     deps = [
         ":quantization_util",
@@ -330,6 +339,7 @@ cc_library(
         ":types",
         "@gemmlowp",
         "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/kernels:op_macros",
     ] + select({
         ":haswell": tflite_deps_intel,
         ":ios_x86_64": tflite_deps_intel,
@@ -462,6 +472,7 @@ cc_library(
         "@com_google_absl//absl/base:core_headers",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "@arm_neon_2_x86_sse",
+        "//tensorflow/contrib/lite/kernels:op_macros",
         "@gemmlowp",
     ] + select({
         ":arm": [
diff --git a/tensorflow/contrib/lite/kernels/internal/compatibility.h b/tensorflow/contrib/lite/kernels/internal/compatibility.h
index 93fc6b6a76..b87cf2b60d 100644
--- a/tensorflow/contrib/lite/kernels/internal/compatibility.h
+++ b/tensorflow/contrib/lite/kernels/internal/compatibility.h
@@ -15,65 +15,65 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
 #define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
 
-#include <cassert>
 #include <cstdint>
-#include <cstdlib>
+
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 #ifndef TFLITE_DCHECK
-#define TFLITE_DCHECK(condition) (condition) ? (void)0 : assert(false)
+#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
 #endif
 
 #ifndef TFLITE_DCHECK_EQ
-#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false)
+#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 #endif
 
 #ifndef TFLITE_DCHECK_NE
-#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : assert(false)
+#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 #endif
 
 #ifndef TFLITE_DCHECK_GE
-#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : assert(false)
+#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 #endif
 
 #ifndef TFLITE_DCHECK_GT
-#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : assert(false)
+#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 #endif
 
 #ifndef TFLITE_DCHECK_LE
-#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : assert(false)
+#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 #endif
 
 #ifndef TFLITE_DCHECK_LT
-#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : assert(false)
+#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 #endif
 
 // TODO(ahentz): Clean up: We should stick to the DCHECK versions.
 #ifndef TFLITE_CHECK
-#define TFLITE_CHECK(condition) (condition) ? (void)0 : abort()
+#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
 #endif
 
 #ifndef TFLITE_CHECK_EQ
-#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : abort()
+#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
 #endif
 
 #ifndef TFLITE_CHECK_NE
-#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : abort()
+#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
 #endif
 
 #ifndef TFLITE_CHECK_GE
-#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : abort()
+#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
 #endif
 
 #ifndef TFLITE_CHECK_GT
-#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : abort()
+#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
 #endif
 
 #ifndef TFLITE_CHECK_LE
-#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : abort()
+#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
 #endif
 
 #ifndef TFLITE_CHECK_LT
-#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : abort()
+#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
 #endif
 
 // TODO(ahentz): Clean up.
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
index ecc655cf99..e8fc566502 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <algorithm>
 
 #include "fixedpoint/fixedpoint.h"
-#include "public/gemmlowp.h"
 #include "tensorflow/contrib/lite/kernels/internal/common.h"
 #include "tensorflow/contrib/lite/kernels/internal/compatibility.h"
 #include "tensorflow/contrib/lite/kernels/internal/types.h"
@@ -35,7 +34,6 @@ inline void DepthwiseConv(
     const uint8* filter_data, const RuntimeShape& bias_shape,
     const int32* bias_data, const RuntimeShape& output_shape,
     uint8* output_data) {
-  gemmlowp::ScopedProfilingLabel label("DepthwiseConv/8bit");
   const int stride_width = params.stride_width;
   const int stride_height = params.stride_height;
   const int dilation_width_factor = params.dilation_width_factor;
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h b/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h
new file mode 100644
index 0000000000..23325e8c4c
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h
@@ -0,0 +1,460 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
+#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
+
+#include "fixedpoint/fixedpoint.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/round.h"
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+const int kReverseShift = -1;
+
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& weights_shape,
+    const float* weights_data, const RuntimeShape& bias_shape,
+    const float* bias_data, const RuntimeShape& output_shape,
+    float* output_data) {
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  // TODO(benoitjacob): This really should be:
+  //     const int batches = ArraySize(output_dims, 1);
+  // but the current --variable_batch hack consists in overwriting the 3rd
+  // dimension with the runtime batch size, as we don't keep track for each
+  // array of which dimension is the batch dimension in it.
+  const int output_dims_count = output_shape.DimensionsCount();
+  const int weights_dims_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
+                                       output_shape, output_dims_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
+  for (int b = 0; b < batches; ++b) {
+    for (int out_c = 0; out_c < output_depth; ++out_c) {
+      float total = 0.f;
+      for (int d = 0; d < accum_depth; ++d) {
+        total += input_data[b * accum_depth + d] *
+                 weights_data[out_c * accum_depth + d];
+      }
+      float bias_value = 0.0f;
+      if (bias_data) {
+        bias_value = bias_data[out_c];
+      }
+      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
+          total + bias_value, output_activation_min, output_activation_max);
+    }
+  }
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                           const float* weights_data,
+                           const Dims<4>& weights_dims, const float* bias_data,
+                           const Dims<4>& bias_dims,
+                           float output_activation_min,
+                           float output_activation_max, float* output_data,
+                           const Dims<4>& output_dims) {
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(weights_dims), weights_data,
+                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+                 output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                    const float* weights_data, const Dims<4>& weights_dims,
+                    const float* bias_data, const Dims<4>& bias_dims,
+                    float* output_data, const Dims<4>& output_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data,
+                 bias_dims, output_activation_min, output_activation_max,
+                 output_data, output_dims);
+}
+
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data, void* gemm_context) {
+  (void)gemm_context;  // only used in optimized code.
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  // TODO(benoitjacob): This really should be:
+  //     const int batches = ArraySize(output_dims, 1);
+  // but the current --variable_batch hack consists in overwriting the 3rd
+  // dimension with the runtime batch size, as we don't keep track for each
+  // array of which dimension is the batch dimension in it.
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+  for (int b = 0; b < batches; ++b) {
+    for (int out_c = 0; out_c < output_depth; ++out_c) {
+      int32 acc = 0;
+      for (int d = 0; d < accum_depth; ++d) {
+        int32 input_val = input_data[b * accum_depth + d];
+        int32 filter_val = filter_data[out_c * accum_depth + d];
+        acc += (filter_val + filter_offset) * (input_val + input_offset);
+      }
+      if (bias_data) {
+        acc += bias_data[out_c];
+      }
+      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc += output_offset;
+      acc = std::max(acc, output_activation_min);
+      acc = std::min(acc, output_activation_max);
+      output_data[out_c + output_depth * b] = static_cast<uint8>(acc);
+    }
+  }
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, uint8* output_data,
+                           const Dims<4>& output_dims, void* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    int16* output_data, void* gemm_context) {
+  (void)gemm_context;  // only used in optimized code.
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  TFLITE_DCHECK_EQ(output_offset, 0);
+  // TODO(benoitjacob): This really should be:
+  //     const int batches = ArraySize(output_dims, 1);
+  // but the current --variable_batch hack consists in overwriting the 3rd
+  // dimension with the runtime batch size, as we don't keep track for each
+  // array of which dimension is the batch dimension in it.
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+  for (int b = 0; b < batches; ++b) {
+    for (int out_c = 0; out_c < output_depth; ++out_c) {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum = bias_data[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < accum_depth; ++d) {
+        int16 input_val = input_data[b * accum_depth + d] + input_offset;
+        int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset;
+        accum += filter_val * input_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum =
+          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
+      // Saturate, cast to int16, and store to output array.
+      accum = std::max(accum, output_activation_min - output_offset);
+      accum = std::min(accum, output_activation_max - output_offset);
+      accum += output_offset;
+      output_data[out_c + output_depth * b] = accum;
+    }
+  }
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, int16* output_data,
+                           const Dims<4>& output_dims, void* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+inline void ShuffledFullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& weights_shape,
+    const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    int16* output_data, uint8* shuffled_input_workspace_data,
+    void* gemm_context) {
+  (void)gemm_context;  // only used in optimized code.
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+  // TODO(benoitjacob): This really should be:
+  //     const int batches = ArraySize(output_dims, 1);
+  // but the current --variable_batch hack consists in overwriting the 3rd
+  // dimension with the runtime batch size, as we don't keep track for each
+  // array of which dimension is the batch dimension in it.
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
+  TFLITE_DCHECK((accum_depth % 16) == 0);
+  TFLITE_DCHECK((output_depth % 4) == 0);
+
+  // Shuffling and xoring of input activations into the workspace buffer
+  uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
+  if (batches == 1) {
+    for (int i = 0; i < accum_depth; i++) {
+      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
+    }
+  } else if (batches == 4) {
+    for (int c = 0; c < accum_depth; c += 16) {
+      for (int b = 0; b < 4; b++) {
+        const uint8* src_data_ptr = input_data + b * accum_depth + c;
+        for (int j = 0; j < 16; j++) {
+          uint8 src_val = *src_data_ptr++;
+          // Flip the sign bit, so that the kernel will only need to
+          // reinterpret these uint8 values as int8, getting for free the
+          // subtraction of the zero_point value 128.
+          uint8 dst_val = src_val ^ 0x80;
+          *shuffled_input_workspace_ptr++ = dst_val;
+        }
+      }
+    }
+  } else {
+    TFLITE_DCHECK(false);
+    return;
+  }
+
+  // Actual computation
+  if (batches == 1) {
+    int16* output_ptr = output_data;
+    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
+    // so that just reinterpreting them as int8 values is equivalent to
+    // subtracting 128 from them, thus implementing for free the subtraction of
+    // the zero_point value 128.
+    const int8* shuffled_weights_ptr =
+        reinterpret_cast<const int8*>(shuffled_weights_data);
+    // Likewise, we preshuffled and pre-xored the input data above.
+    const int8* shuffled_input_data =
+        reinterpret_cast<const int8*>(shuffled_input_workspace_data);
+    for (int c = 0; c < output_depth; c += 4) {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum[4] = {0};
+      // Accumulation loop.
+      for (int d = 0; d < accum_depth; d += 16) {
+        for (int i = 0; i < 4; i++) {
+          for (int j = 0; j < 16; j++) {
+            int8 input_val = shuffled_input_data[d + j];
+            int8 weights_val = *shuffled_weights_ptr++;
+            accum[i] += weights_val * input_val;
+          }
+        }
+      }
+      for (int i = 0; i < 4; i++) {
+        // Add bias value
+        int32 acc = accum[i] + bias_data[c + i];
+        // Down-scale the final int32 accumulator to the scale used by our
+        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
+        // multiplier and shift here have been pre-computed offline
+        // (e.g. by toco).
+        acc =
+            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+        // Saturate, cast to int16, and store to output array.
+        acc = std::max(acc, output_activation_min);
+        acc = std::min(acc, output_activation_max);
+        output_ptr[c + i] = acc;
+      }
+    }
+  } else if (batches == 4) {
+    int16* output_ptr = output_data;
+    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
+    // so that just reinterpreting them as int8 values is equivalent to
+    // subtracting 128 from them, thus implementing for free the subtraction of
+    // the zero_point value 128.
+    const int8* shuffled_weights_ptr =
+        reinterpret_cast<const int8*>(shuffled_weights_data);
+    // Likewise, we preshuffled and pre-xored the input data above.
+    const int8* shuffled_input_data =
+        reinterpret_cast<const int8*>(shuffled_input_workspace_data);
+    for (int c = 0; c < output_depth; c += 4) {
+      const int8* shuffled_input_ptr = shuffled_input_data;
+      // Accumulation loop.
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum[4][4];
+      for (int i = 0; i < 4; i++) {
+        for (int b = 0; b < 4; b++) {
+          accum[i][b] = 0;
+        }
+      }
+      for (int d = 0; d < accum_depth; d += 16) {
+        for (int i = 0; i < 4; i++) {
+          for (int b = 0; b < 4; b++) {
+            for (int j = 0; j < 16; j++) {
+              int8 input_val = shuffled_input_ptr[16 * b + j];
+              int8 weights_val = shuffled_weights_ptr[16 * i + j];
+              accum[i][b] += weights_val * input_val;
+            }
+          }
+        }
+        shuffled_input_ptr += 64;
+        shuffled_weights_ptr += 64;
+      }
+      for (int i = 0; i < 4; i++) {
+        for (int b = 0; b < 4; b++) {
+          // Add bias value
+          int32 acc = accum[i][b] + bias_data[c + i];
+          // Down-scale the final int32 accumulator to the scale used by our
+          // (16-bit, typically 3 integer bits) fixed-point format. The
+          // quantized multiplier and shift here have been pre-computed offline
+          // (e.g. by toco).
+          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
+                                              output_shift);
+          // Saturate, cast to int16, and store to output array.
+          acc = std::max(acc, output_activation_min);
+          acc = std::min(acc, output_activation_max);
+          output_ptr[b * output_depth + c + i] = acc;
+        }
+      }
+    }
+  } else {
+    TFLITE_DCHECK(false);
+    return;
+  }
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void ShuffledFullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims,
+    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
+    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
+    int output_shift, int32 output_activation_min, int32 output_activation_max,
+    int16* output_data, const Dims<4>& output_dims,
+    uint8* shuffled_input_workspace_data, void* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
+                         DimsToShape(weights_dims), shuffled_weights_data,
+                         DimsToShape(bias_dims), bias_data,
+                         DimsToShape(output_dims), output_data,
+                         shuffled_input_workspace_data, gemm_context);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                    int32 input_offset, const uint8* filter_data,
+                    const Dims<4>& filter_dims, int32 filter_offset,
+                    const int32* bias_data, const Dims<4>& bias_dims,
+                    int32 output_offset, int32 output_multiplier,
+                    int output_shift, int32 output_activation_min,
+                    int32 output_activation_max, uint8* output_data,
+                    const Dims<4>& output_dims, void* gemm_context) {
+  static_assert(Ac == FusedActivationFunctionType::kNone ||
+                    Ac == FusedActivationFunctionType::kRelu ||
+                    Ac == FusedActivationFunctionType::kRelu6 ||
+                    Ac == FusedActivationFunctionType::kRelu1,
+                "");
+  if (Ac == FusedActivationFunctionType::kNone) {
+    TFLITE_DCHECK_EQ(output_activation_min, 0);
+    TFLITE_DCHECK_EQ(output_activation_max, 255);
+  }
+  FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims,
+                 filter_offset, bias_data, bias_dims, output_offset,
+                 output_multiplier, output_shift, output_activation_min,
+                 output_activation_max, output_data, output_dims, gemm_context);
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 5bfa3bd084..7a5535489a 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -28,6 +28,8 @@ limitations under the License.
 #include "public/gemmlowp.h"
 #include "tensorflow/contrib/lite/kernels/internal/common.h"
 #include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/softmax.h"
 #include "tensorflow/contrib/lite/kernels/internal/round.h"
 #include "tensorflow/contrib/lite/kernels/internal/strided_slice_logic.h"
 #include "tensorflow/contrib/lite/kernels/internal/types.h"
@@ -98,13 +100,6 @@ gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
 
 namespace reference_ops {
 
-// TODO(b/80247582) Remove this constant.
-// This will be phased out as the shifts are revised with more thought. Use of a
-// constant enables us to track progress on this work.
-//
-// Used mainly to convert from old-style shifts (right) to new-style (left).
-static constexpr int kReverseShift = -1;
-
 inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) {
   shape->BuildFrom(
       {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
@@ -181,7 +176,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 
-  (void)im2col_data;  // only used in optimized code.
+  (void)im2col_data;   // only used in optimized code.
   (void)im2col_shape;  // only used in optimized code.
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
@@ -606,437 +601,6 @@ inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
   }
 }
 
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const float* input_data, const RuntimeShape& weights_shape,
-    const float* weights_data, const RuntimeShape& bias_shape,
-    const float* bias_data, const RuntimeShape& output_shape,
-    float* output_data) {
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dims_count = output_shape.DimensionsCount();
-  const int weights_dims_count = weights_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
-  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
-                                       output_shape, output_dims_count - 1);
-  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      float total = 0.f;
-      for (int d = 0; d < accum_depth; ++d) {
-        total += input_data[b * accum_depth + d] *
-                 weights_data[out_c * accum_depth + d];
-      }
-      float bias_value = 0.0f;
-      if (bias_data) {
-        bias_value = bias_data[out_c];
-      }
-      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
-          total + bias_value, output_activation_min, output_activation_max);
-    }
-  }
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                           const float* weights_data,
-                           const Dims<4>& weights_dims, const float* bias_data,
-                           const Dims<4>& bias_dims,
-                           float output_activation_min,
-                           float output_activation_max, float* output_data,
-                           const Dims<4>& output_dims) {
-  tflite::FullyConnectedParams op_params;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(weights_dims), weights_data,
-                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-                 output_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                    const float* weights_data, const Dims<4>& weights_dims,
-                    const float* bias_data, const Dims<4>& bias_dims,
-                    float* output_data, const Dims<4>& output_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data,
-                 bias_dims, output_activation_min, output_activation_max,
-                 output_data, output_dims);
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8* input_data, const RuntimeShape& filter_shape,
-    const uint8* filter_data, const RuntimeShape& bias_shape,
-    const int32* bias_data, const RuntimeShape& output_shape,
-    uint8* output_data, gemmlowp::GemmContext* gemm_context) {
-  (void)gemm_context;  // only used in optimized code.
-  const int32 input_offset = params.input_offset;
-  const int32 filter_offset = params.weights_offset;
-  const int32 output_offset = params.output_offset;
-  const int32 output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32 output_activation_min = params.quantized_activation_min;
-  const int32 output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32 acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32 input_val = input_data[b * accum_depth + d];
-        int32 filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * (input_val + input_offset);
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<uint8>(acc);
-    }
-  }
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, uint8* output_data,
-                           const Dims<4>& output_dims,
-                           gemmlowp::GemmContext* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                 bias_data, DimsToShape(output_dims), output_data,
-                 gemm_context);
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8* input_data, const RuntimeShape& filter_shape,
-    const uint8* filter_data, const RuntimeShape& bias_shape,
-    const int32* bias_data, const RuntimeShape& output_shape,
-    int16* output_data, gemmlowp::GemmContext* gemm_context) {
-  (void)gemm_context;  // only used in optimized code.
-  const int32 input_offset = params.input_offset;
-  const int32 filter_offset = params.weights_offset;
-  const int32 output_offset = params.output_offset;
-  const int32 output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32 output_activation_min = params.quantized_activation_min;
-  const int32 output_activation_max = params.quantized_activation_max;
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  TFLITE_DCHECK_EQ(output_offset, 0);
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32 accum = bias_data[out_c];
-      // Accumulation loop.
-      for (int d = 0; d < accum_depth; ++d) {
-        int16 input_val = input_data[b * accum_depth + d] + input_offset;
-        int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset;
-        accum += filter_val * input_val;
-      }
-      // Down-scale the final int32 accumulator to the scale used by our
-      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
-      // multiplier and shift here have been pre-computed offline
-      // (e.g. by toco).
-      accum =
-          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
-      // Saturate, cast to int16, and store to output array.
-      accum = std::max(accum, output_activation_min - output_offset);
-      accum = std::min(accum, output_activation_max - output_offset);
-      accum += output_offset;
-      output_data[out_c + output_depth * b] = accum;
-    }
-  }
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, int16* output_data,
-                           const Dims<4>& output_dims,
-                           gemmlowp::GemmContext* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                 bias_data, DimsToShape(output_dims), output_data,
-                 gemm_context);
-}
-
-inline void ShuffledFullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8* input_data, const RuntimeShape& weights_shape,
-    const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
-    const int32* bias_data, const RuntimeShape& output_shape,
-    int16* output_data, uint8* shuffled_input_workspace_data,
-    gemmlowp::GemmContext* gemm_context) {
-  (void)gemm_context;  // only used in optimized code.
-  const int32 output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32 output_activation_min = params.quantized_activation_min;
-  const int32 output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int weights_dim_count = weights_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
-  TFLITE_DCHECK((accum_depth % 16) == 0);
-  TFLITE_DCHECK((output_depth % 4) == 0);
-
-  // Shuffling and xoring of input activations into the workspace buffer
-  uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
-  if (batches == 1) {
-    for (int i = 0; i < accum_depth; i++) {
-      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
-    }
-  } else if (batches == 4) {
-    for (int c = 0; c < accum_depth; c += 16) {
-      for (int b = 0; b < 4; b++) {
-        const uint8* src_data_ptr = input_data + b * accum_depth + c;
-        for (int j = 0; j < 16; j++) {
-          uint8 src_val = *src_data_ptr++;
-          // Flip the sign bit, so that the kernel will only need to
-          // reinterpret these uint8 values as int8, getting for free the
-          // subtraction of the zero_point value 128.
-          uint8 dst_val = src_val ^ 0x80;
-          *shuffled_input_workspace_ptr++ = dst_val;
-        }
-      }
-    }
-  } else {
-    TFLITE_DCHECK(false);
-    return;
-  }
-
-  // Actual computation
-  if (batches == 1) {
-    int16* output_ptr = output_data;
-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
-    // so that just reinterpreting them as int8 values is equivalent to
-    // subtracting 128 from them, thus implementing for free the subtraction of
-    // the zero_point value 128.
-    const int8* shuffled_weights_ptr =
-        reinterpret_cast<const int8*>(shuffled_weights_data);
-    // Likewise, we preshuffled and pre-xored the input data above.
-    const int8* shuffled_input_data =
-        reinterpret_cast<const int8*>(shuffled_input_workspace_data);
-    for (int c = 0; c < output_depth; c += 4) {
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32 accum[4] = {0};
-      // Accumulation loop.
-      for (int d = 0; d < accum_depth; d += 16) {
-        for (int i = 0; i < 4; i++) {
-          for (int j = 0; j < 16; j++) {
-            int8 input_val = shuffled_input_data[d + j];
-            int8 weights_val = *shuffled_weights_ptr++;
-            accum[i] += weights_val * input_val;
-          }
-        }
-      }
-      for (int i = 0; i < 4; i++) {
-        // Add bias value
-        int acc = accum[i] + bias_data[c + i];
-        // Down-scale the final int32 accumulator to the scale used by our
-        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
-        // multiplier and shift here have been pre-computed offline
-        // (e.g. by toco).
-        acc =
-            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-        // Saturate, cast to int16, and store to output array.
-        acc = std::max(acc, output_activation_min);
-        acc = std::min(acc, output_activation_max);
-        output_ptr[c + i] = acc;
-      }
-    }
-  } else if (batches == 4) {
-    int16* output_ptr = output_data;
-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
-    // so that just reinterpreting them as int8 values is equivalent to
-    // subtracting 128 from them, thus implementing for free the subtraction of
-    // the zero_point value 128.
-    const int8* shuffled_weights_ptr =
-        reinterpret_cast<const int8*>(shuffled_weights_data);
-    // Likewise, we preshuffled and pre-xored the input data above.
-    const int8* shuffled_input_data =
-        reinterpret_cast<const int8*>(shuffled_input_workspace_data);
-    for (int c = 0; c < output_depth; c += 4) {
-      const int8* shuffled_input_ptr = shuffled_input_data;
-      // Accumulation loop.
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32 accum[4][4];
-      for (int i = 0; i < 4; i++) {
-        for (int b = 0; b < 4; b++) {
-          accum[i][b] = 0;
-        }
-      }
-      for (int d = 0; d < accum_depth; d += 16) {
-        for (int i = 0; i < 4; i++) {
-          for (int b = 0; b < 4; b++) {
-            for (int j = 0; j < 16; j++) {
-              int8 input_val = shuffled_input_ptr[16 * b + j];
-              int8 weights_val = shuffled_weights_ptr[16 * i + j];
-              accum[i][b] += weights_val * input_val;
-            }
-          }
-        }
-        shuffled_input_ptr += 64;
-        shuffled_weights_ptr += 64;
-      }
-      for (int i = 0; i < 4; i++) {
-        for (int b = 0; b < 4; b++) {
-          // Add bias value
-          int acc = accum[i][b] + bias_data[c + i];
-          // Down-scale the final int32 accumulator to the scale used by our
-          // (16-bit, typically 3 integer bits) fixed-point format. The
-          // quantized multiplier and shift here have been pre-computed offline
-          // (e.g. by toco).
-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              output_shift);
-          // Saturate, cast to int16, and store to output array.
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_ptr[b * output_depth + c + i] = acc;
-        }
-      }
-    }
-  } else {
-    TFLITE_DCHECK(false);
-    return;
-  }
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void ShuffledFullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
-    int output_shift, int32 output_activation_min, int32 output_activation_max,
-    int16* output_data, const Dims<4>& output_dims,
-    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
-                         DimsToShape(weights_dims), shuffled_weights_data,
-                         DimsToShape(bias_dims), bias_data,
-                         DimsToShape(output_dims), output_data,
-                         shuffled_input_workspace_data, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                    int32 input_offset, const uint8* filter_data,
-                    const Dims<4>& filter_dims, int32 filter_offset,
-                    const int32* bias_data, const Dims<4>& bias_dims,
-                    int32 output_offset, int32 output_multiplier,
-                    int output_shift, int32 output_activation_min,
-                    int32 output_activation_max, uint8* output_data,
-                    const Dims<4>& output_dims,
-                    gemmlowp::GemmContext* gemm_context) {
-  static_assert(Ac == FusedActivationFunctionType::kNone ||
-                    Ac == FusedActivationFunctionType::kRelu ||
-                    Ac == FusedActivationFunctionType::kRelu6 ||
-                    Ac == FusedActivationFunctionType::kRelu1,
-                "");
-  if (Ac == FusedActivationFunctionType::kNone) {
-    TFLITE_DCHECK_EQ(output_activation_min, 0);
-    TFLITE_DCHECK_EQ(output_activation_max, 255);
-  }
-  FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims,
-                 filter_offset, bias_data, bias_dims, output_offset,
-                 output_multiplier, output_shift, output_activation_min,
-                 output_activation_max, output_data, output_dims, gemm_context);
-}
-
 inline void Relu(const RuntimeShape& input_shape, const float* input_data,
                  const RuntimeShape& output_shape, float* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -3238,144 +2802,6 @@ inline void LocalResponseNormalization(
   }
 }
 
-inline void Softmax(const SoftmaxParams& params,
-                    const RuntimeShape& input_shape, const float* input_data,
-                    const RuntimeShape& output_shape, float* output_data) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
-  for (int i = 0; i < outer_size; ++i) {
-    // Find max element value which we'll use to ensure numerical stability
-    // taking advantage of the following equality:
-    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
-    float max = std::numeric_limits<float>::lowest();
-    for (int c = 0; c < depth; ++c) {
-      max = std::max(max, input_data[i * depth + c]);
-    }
-
-    // Compute sum.
-    float sum = 0.f;
-    for (int c = 0; c < depth; ++c) {
-      sum += std::exp((input_data[i * depth + c] - max) * params.beta);
-    }
-
-    // Compute result.
-    for (int c = 0; c < depth; ++c) {
-      output_data[i * depth + c] =
-          std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
-    }
-  }
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
-                    float beta, float* output_data,
-                    const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.beta = beta;
-  Softmax(params, input_shape, input_data, output_shape, output_data);
-}
-
-inline void Softmax(const SoftmaxParams& params,
-                    const RuntimeShape& input_shape, const uint8* input_data,
-                    const RuntimeShape& output_shape, uint8* output_data) {
-  const int32 input_beta_multiplier = params.input_multiplier;
-  const int32 input_beta_left_shift = params.input_left_shift;
-  const int diff_min = params.diff_min;
-  // The representation chosen for the input to the exp() function is Q5.26.
-  // We need to leave extra space since values that we skip might be as large as
-  // -32 before multiplying by input_beta_multiplier, and therefore as large as
-  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
-  // accumulation, but exp(-16) definitely is.
-  static const int kScaledDiffIntegerBits = 5;
-  static const int kAccumulationIntegerBits = 12;
-  using FixedPointScaledDiff =
-      gemmlowp::FixedPoint<int32, kScaledDiffIntegerBits>;
-  using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
-  using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
-
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
-  for (int i = 0; i < outer_size; ++i) {
-    uint8 max_in_row = 0;
-    for (int c = 0; c < depth; ++c) {
-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
-    }
-
-    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
-    for (int c = 0; c < depth; ++c) {
-      int32 input_diff =
-          static_cast<int32>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min) {
-        const int32 input_diff_rescaled =
-            MultiplyByQuantizedMultiplierGreaterThanOne(
-                input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
-                                        exp_on_negative_values(scaled_diff_f8));
-      }
-    }
-
-    int32 fixed_sum_of_exps = sum_of_exps.raw();
-    int headroom_plus_one =
-        CountLeadingZeros(static_cast<uint32>(fixed_sum_of_exps));
-    // This is the number of bits to the left of the binary point above 1.0.
-    // Consider fixed_sum_of_exps=1.25.  In that case shifted_scale=0.8 and
-    // no later adjustment will be needed.
-    int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
-    int32 shifted_sum_minus_one = static_cast<int32>(
-        (static_cast<uint32>(fixed_sum_of_exps) << headroom_plus_one) -
-        (static_cast<uint32>(1) << 31));
-
-    FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1(
-        FixedPoint0::FromRaw(shifted_sum_minus_one));
-
-    for (int c = 0; c < depth; ++c) {
-      int32 input_diff =
-          static_cast<int32>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min) {
-        const int32 input_diff_rescaled =
-            MultiplyByQuantizedMultiplierGreaterThanOne(
-                input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-
-        FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
-        int32 unsat_output = gemmlowp::RoundingDivideByPOT(
-            (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8);
-
-        output_data[i * depth + c] = static_cast<uint8>(
-            std::max(std::min(unsat_output, static_cast<int32>(255)), 0));
-
-      } else {
-        output_data[i * depth + c] = 0;
-      }
-    }
-  }
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy
-inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
-                    int32 input_beta_multiplier, int32 input_beta_left_shift,
-                    int diff_min, uint8* output_data,
-                    const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.input_multiplier = input_beta_multiplier;
-  params.input_left_shift = input_beta_left_shift;
-  params.diff_min = diff_min;
-  Softmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void LogSoftmax(const SoftmaxParams& params,
                        const RuntimeShape& input_shape, const float* input_data,
                        const RuntimeShape& output_shape, float* output_data) {
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/softmax.h b/tensorflow/contrib/lite/kernels/internal/reference/softmax.h
new file mode 100644
index 0000000000..006174e8db
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/reference/softmax.h
@@ -0,0 +1,202 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
+#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
+
+#include "fixedpoint/fixedpoint.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/round.h"
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace reference_ops {
+
+inline void Softmax(const SoftmaxParams& params,
+                    const RuntimeShape& input_shape, const float* input_data,
+                    const RuntimeShape& output_shape, float* output_data) {
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size =
+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i) {
+    // Find max element value which we'll use to ensure numerical stability
+    // taking advantage of the following equality:
+    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
+    float max = std::numeric_limits<float>::lowest();
+    for (int c = 0; c < depth; ++c) {
+      max = std::max(max, input_data[i * depth + c]);
+    }
+
+    // Compute sum.
+    float sum = 0.f;
+    for (int c = 0; c < depth; ++c) {
+      sum += std::exp((input_data[i * depth + c] - max) * params.beta);
+    }
+
+    // Compute result.
+    for (int c = 0; c < depth; ++c) {
+      output_data[i * depth + c] =
+          std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
+    }
+  }
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
+                    float beta, float* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.beta = beta;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Softmax(const SoftmaxParams& params,
+                    const RuntimeShape& input_shape, const uint8* input_data,
+                    const RuntimeShape& output_shape, uint8* output_data) {
+  const int32 input_beta_multiplier = params.input_multiplier;
+  const int32 input_beta_left_shift = params.input_left_shift;
+  const int diff_min = params.diff_min;
+  // The representation chosen for the input to the exp() function is Q5.26.
+  // We need to leave extra space since values that we skip might be as large as
+  // -32 before multiplying by input_beta_multiplier, and therefore as large as
+  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
+  // accumulation, but exp(-16) definitely is.
+  static const int kScaledDiffIntegerBits = 5;
+  static const int kAccumulationIntegerBits = 12;
+  using FixedPointScaledDiff =
+      gemmlowp::FixedPoint<int32, kScaledDiffIntegerBits>;
+  using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
+  using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
+
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size =
+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i) {
+    uint8 max_in_row = 0;
+    for (int c = 0; c < depth; ++c) {
+      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
+    }
+
+    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
+    for (int c = 0; c < depth; ++c) {
+      int32 input_diff =
+          static_cast<int32>(input_data[i * depth + c]) - max_in_row;
+      if (input_diff >= diff_min) {
+        const int32 input_diff_rescaled =
+            MultiplyByQuantizedMultiplierGreaterThanOne(
+                input_diff, input_beta_multiplier, input_beta_left_shift);
+        const FixedPointScaledDiff scaled_diff_f8 =
+            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
+        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
+                                        exp_on_negative_values(scaled_diff_f8));
+      }
+    }
+
+    int32 fixed_sum_of_exps = sum_of_exps.raw();
+    int headroom_plus_one =
+        CountLeadingZeros(static_cast<uint32>(fixed_sum_of_exps));
+    // This is the number of bits to the left of the binary point above 1.0.
+    // Consider fixed_sum_of_exps=1.25.  In that case shifted_scale=0.8 and
+    // no later adjustment will be needed.
+    int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
+    int32 shifted_sum_minus_one = static_cast<int32>(
+        (static_cast<uint32>(fixed_sum_of_exps) << headroom_plus_one) -
+        (static_cast<uint32>(1) << 31));
+
+    FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1(
+        FixedPoint0::FromRaw(shifted_sum_minus_one));
+
+    for (int c = 0; c < depth; ++c) {
+      int32 input_diff =
+          static_cast<int32>(input_data[i * depth + c]) - max_in_row;
+      if (input_diff >= diff_min) {
+        const int32 input_diff_rescaled =
+            MultiplyByQuantizedMultiplierGreaterThanOne(
+                input_diff, input_beta_multiplier, input_beta_left_shift);
+        const FixedPointScaledDiff scaled_diff_f8 =
+            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
+
+        FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
+        int32 unsat_output = gemmlowp::RoundingDivideByPOT(
+            (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8);
+
+        output_data[i * depth + c] = static_cast<uint8>(
+            std::max(std::min(unsat_output, static_cast<int32>(255)),
+                     static_cast<int32>(0)));
+
+      } else {
+        output_data[i * depth + c] = 0;
+      }
+    }
+  }
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy
+inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
+                    int32 input_beta_multiplier, int32 input_beta_left_shift,
+                    int diff_min, uint8* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_beta_multiplier;
+  params.input_left_shift = input_beta_left_shift;
+  params.diff_min = diff_min;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+// Performs softmax along the input of size (input_size * batch_size).
+inline void Softmax(const float* in, const int input_size, const int batch_size,
+                    const float beta, float* out) {
+  //  TF_LITE_ASSERT(input_size > 0);
+
+  // For each batch
+  for (int b = 0; b < batch_size; b++) {
+    // Find the max coeff.
+    float max_coeff = in[0];
+    for (int i = 1; i < input_size; i++) {
+      if (in[i] > max_coeff) max_coeff = in[i];
+    }
+
+    // Compute the normalized sum of exps.
+    float exp_sum = 0.0;
+    for (int i = 0; i < input_size; i++) {
+      out[i] = std::exp((in[i] - max_coeff) * beta);
+      exp_sum += out[i];
+    }
+
+    // Divide by the sum of exps.
+    float reciprocal_sum_exp = 1.f / exp_sum;
+    for (int i = 0; i < input_size; i++) {
+      out[i] *= reciprocal_sum_exp;
+    }
+
+    // Advance in and out pointers for the next batch.
+    in += input_size;
+    out += input_size;
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index 3e0308721e..a3a5994c9c 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -15,8 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TYPES_H_
 #define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TYPES_H_
 
+#include <algorithm>
 #include <cstring>
-#include <iterator>
 
 #include "absl/base/macros.h"
 #include "tensorflow/contrib/lite/kernels/internal/compatibility.h"
@@ -126,7 +126,11 @@ class RuntimeShape {
 
   explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {
     if (dimensions_count > kMaxSmallSize) {
+#ifdef TF_LITE_STATIC_MEMORY
+      TFLITE_CHECK(false && "No shape resizing supported on this platform");
+#else   // TF_LITE_STATIC_MEMORY
       dims_pointer_ = new int32[dimensions_count];
+#endif  // TF_LITE_STATIC_MEMORY
     }
   }
 
@@ -161,7 +165,11 @@ class RuntimeShape {
 
   ~RuntimeShape() {
     if (size_ > kMaxSmallSize) {
+#ifdef TF_LITE_STATIC_MEMORY
+      TFLITE_CHECK(false && "No shape resizing supported on this platform");
+#else   // TF_LITE_STATIC_MEMORY
       delete[] dims_pointer_;
+#endif  // TF_LITE_STATIC_MEMORY
     }
   }
 
@@ -192,11 +200,19 @@ class RuntimeShape {
 
   inline void Resize(int dimensions_count) {
     if (size_ > kMaxSmallSize) {
+#ifdef TF_LITE_STATIC_MEMORY
+      TFLITE_CHECK(false && "No shape resizing supported on this platform");
+#else   // TF_LITE_STATIC_MEMORY
       delete[] dims_pointer_;
+#endif  // TF_LITE_STATIC_MEMORY
     }
     size_ = dimensions_count;
     if (dimensions_count > kMaxSmallSize) {
+#ifdef TF_LITE_STATIC_MEMORY
+      TFLITE_CHECK(false && "No shape resizing supported on this platform");
+#else   // TF_LITE_STATIC_MEMORY
       dims_pointer_ = new int32[dimensions_count];
+#endif  // TF_LITE_STATIC_MEMORY
     }
   }
 
diff --git a/tensorflow/contrib/lite/kernels/kernel_util.cc b/tensorflow/contrib/lite/kernels/kernel_util.cc
index 08f942c933..503ef28459 100644
--- a/tensorflow/contrib/lite/kernels/kernel_util.cc
+++ b/tensorflow/contrib/lite/kernels/kernel_util.cc
@@ -107,6 +107,9 @@ bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
   return TfLiteIntArrayEqual(input1->dims, input2->dims);
 }
 
+// TODO(petewarden): Having macros around this is ugly, look at other strategies
+// before replicating this approach elsewhere.
+#ifndef TF_LITE_STATIC_MEMORY
 TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
                                         const TfLiteTensor* input1,
                                         const TfLiteTensor* input2,
@@ -125,5 +128,6 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
   *output_shape = shape.release();
   return kTfLiteOk;
 }
+#endif  // TF_LITE_STATIC_MEMORY
 
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/op_macros.h b/tensorflow/contrib/lite/kernels/op_macros.h
index d66364c4d8..11e814daee 100644
--- a/tensorflow/contrib/lite/kernels/op_macros.h
+++ b/tensorflow/contrib/lite/kernels/op_macros.h
@@ -15,17 +15,55 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_OP_MACROS_H_
 #define TENSORFLOW_CONTRIB_LITE_KERNELS_OP_MACROS_H_
 
+// If we're on a platform without standard IO functions, fall back to a
+// non-portable function.
+#ifdef TF_LITE_MCU_DEBUG_LOG
+
+// This header is pulled in from the support library at
+// https://github.com/google/stm32_bare_lib
+#include <debug_log.h>
+
+#define DEBUG_LOG(x) \
+  do {               \
+    DebugLog(x);     \
+  } while (0)
+
+inline void InfiniteLoop() {
+  DEBUG_LOG("HALTED\n");
+  while (1) {
+  }
+}
+#define TFLITE_ASSERT_FALSE InfiniteLoop();
+#define TFLITE_ABORT InfiniteLoop();
+
+#else  // TF_LITE_MCU_DEBUG_LOG
+
+#include <cassert>
 #include <cstdio>
+#include <cstdlib>
 
-#define TF_LITE_FATAL(msg)          \
-  do {                              \
-    fprintf(stderr, "%s\n", (msg)); \
-    exit(1);                        \
+#define DEBUG_LOG(x)            \
+  do {                          \
+    fprintf(stderr, "%s", (x)); \
   } while (0)
+
+#define TFLITE_ASSERT_FALSE assert(false)
+#define TFLITE_ABORT abort()
+
+#endif  // TF_LITE_MCU_DEBUG_LOG
+
+#define TF_LITE_FATAL(msg)  \
+  do {                      \
+    DEBUG_LOG(msg);         \
+    DEBUG_LOG("\nFATAL\n"); \
+    TFLITE_ABORT;           \
+  } while (0)
+
 #define TF_LITE_ASSERT(x)        \
   do {                           \
     if (!(x)) TF_LITE_FATAL(#x); \
   } while (0)
+
 #define TF_LITE_ASSERT_EQ(x, y)                            \
   do {                                                     \
     if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index 6311d60b91..ea2817beec 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -189,6 +189,13 @@ std::vector<int> FlatBufferIntArrayToVector(T* flat_array) {
   return ret;
 }
 
+// Used to determine how the op data parsing function creates its working space.
+class MallocDataAllocator : public BuiltinDataAllocator {
+ public:
+  void* Allocate(size_t size) override { return malloc(size); }
+  void Deallocate(void* data) override { free(data); }
+};
+
 }  // namespace
 
 TfLiteStatus InterpreterBuilder::ParseNodes(
@@ -234,8 +241,9 @@ TfLiteStatus InterpreterBuilder::ParseNodes(
           op->custom_options()->size(), nullptr, registration);
     } else {
       void* builtin_data = nullptr;
-      TF_LITE_ENSURE_STATUS(
-          ParseOpData(op, op_type, error_reporter_, &builtin_data));
+      MallocDataAllocator malloc_allocator;
+      TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_,
+                                        &malloc_allocator, &builtin_data));
       interpreter->AddNodeWithParameters(
           FlatBufferIntArrayToVector(op->inputs()),
           FlatBufferIntArrayToVector(op->outputs()), nullptr, 0, builtin_data,
-- 
GitLab


From 3b8442658321d4f48a96dc1580a646606aa17b8c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 15:57:50 -0700
Subject: [PATCH 0623/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214346818

---
 tensorflow/go/op/wrappers.go | 409 +++++++++++++++++++++++++++++++++++
 1 file changed, 409 insertions(+)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index e6e07c8437..8b60e6fd25 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -2461,6 +2461,64 @@ func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...Gathe
 	return op.Output(0)
 }
 
+// LowerBoundAttr is an optional argument to LowerBound.
+type LowerBoundAttr func(optionalAttr)
+
+// LowerBoundOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func LowerBoundOutType(value tf.DataType) LowerBoundAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Applies lower_bound(sorted_search_values, values) along each row.
+//
+// Each set of rows with the same index in (sorted_inputs, values) is treated
+// independently.  The resulting row is the equivalent of calling
+// `np.searchsorted(sorted_inputs, values, side='left')`.
+//
+// The result is not a global index to the entire
+// `Tensor`, but rather just the index in the last dimension.
+//
+// A 2-D example:
+//   sorted_sequence = [[0, 3, 9, 9, 10],
+//                      [1, 2, 3, 4, 5]]
+//   values = [[2, 4, 9],
+//             [0, 2, 6]]
+//
+//   result = LowerBound(sorted_sequence, values)
+//
+//   result == [[1, 2, 2],
+//              [0, 1, 5]]
+//
+// Arguments:
+//	sorted_inputs: 2-D Tensor where each row is ordered.
+//	values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
+// the values that will be searched for in `sorted_search_values`.
+//
+// Returns A `Tensor` with the same shape as `values`.  It contains the first scalar index
+// into the last dimension where values can be inserted without changing the
+// ordered property.
+func LowerBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...LowerBoundAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LowerBound",
+		Input: []tf.Input{
+			sorted_inputs, values,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Creates a tensor filled with a scalar value.
 //
 // This operation creates a tensor of shape `dims` and fills it with `value`.
@@ -6000,6 +6058,44 @@ func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_in
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Extract `patches` from `input` and put them in the "depth" output
+// dimension. 3D extension of `extract_image_patches`.
+//
+// Arguments:
+//	input: 5-D Tensor with shape `[batch, in_planes, in_rows, in_cols, depth]`.
+//	ksizes: The size of the sliding window for each dimension of `input`.
+//	strides: 1-D of length 5. How far the centers of two consecutive patches are in
+// `input`. Must be: `[1, stride_planes, stride_rows, stride_cols, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// We specify the size-related attributes as:
+//
+// ```python
+//       ksizes = [1, ksize_planes, ksize_rows, ksize_cols, 1]
+//       strides = [1, stride_planes, strides_rows, strides_cols, 1]
+// ```
+//
+// Returns 5-D Tensor with shape `[batch, out_planes, out_rows, out_cols,
+// ksize_planes * ksize_rows * ksize_cols * depth]` containing patches
+// with size `ksize_planes x ksize_rows x ksize_cols x depth` vectorized
+// in the "depth" dimension. Note `out_planes`, `out_rows` and `out_cols`
+// are the dimensions of the output patches.
+func ExtractVolumePatches(scope *Scope, input tf.Output, ksizes []int64, strides []int64, padding string) (patches tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "ExtractVolumePatches",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // FractionalAvgPoolAttr is an optional argument to FractionalAvgPool.
 type FractionalAvgPoolAttr func(optionalAttr)
 
@@ -6570,6 +6666,41 @@ func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output,
 	return scope.AddOperation(opspec)
 }
 
+// Gets next element for the provided shard number.
+//
+// Arguments:
+//	multi_device_iterator: A MultiDeviceIterator resource.
+//	shard_num: Integer representing which shard to fetch data for.
+//	incarnation_id: Which incarnation of the MultiDeviceIterator is running.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+//
+// Returns Result of the get_next on the dataset.
+func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorGetNextFromShard",
+		Input: []tf.Input{
+			multi_device_iterator, shard_num, incarnation_id,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err)
+		return
+	}
+	return components
+}
+
 // Computes rectified linear gradients for a Relu operation.
 //
 // Arguments:
@@ -9792,6 +9923,29 @@ func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPe
 	return op.Output(0)
 }
 
+// Initializes the multi device iterator with the given dataset.
+//
+// Arguments:
+//	dataset: Dataset to be iterated upon.
+//	multi_device_iterator: A MultiDeviceIteratorResource.
+//	max_buffer_size: The maximum size of the host side per device buffer to keep.
+//
+// Returns An int64 indicating which incarnation of the MultiDeviceIterator
+// is running.
+func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorInit",
+		Input: []tf.Input{
+			dataset, multi_device_iterator, max_buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the gradient of `igamma(a, x)` wrt `a`.
 func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
@@ -12459,6 +12613,26 @@ func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, upd
 	return scope.AddOperation(opspec)
 }
 
+// Produces a string handle for the given MultiDeviceIterator.
+//
+// Arguments:
+//	multi_device_iterator: A MultiDeviceIterator resource.
+//
+// Returns A string representing the resource.
+func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorToStringHandle",
+		Input: []tf.Input{
+			multi_device_iterator,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Applies softmax to a batched N-D `SparseTensor`.
 //
 // The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
@@ -12913,6 +13087,66 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 	return op.Output(0)
 }
 
+// StringFormatAttr is an optional argument to StringFormat.
+type StringFormatAttr func(optionalAttr)
+
+// StringFormatTemplate sets the optional template attribute to value.
+//
+// value: A string, the template to format tensor summaries into.
+// If not specified, defaults to "%s"
+func StringFormatTemplate(value string) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["template"] = value
+	}
+}
+
+// StringFormatPlaceholder sets the optional placeholder attribute to value.
+//
+// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted.
+// If not specified, defaults to "%s"
+func StringFormatPlaceholder(value string) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["placeholder"] = value
+	}
+}
+
+// StringFormatSummarize sets the optional summarize attribute to value.
+//
+// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension.
+// If not specified, defaults to 3
+func StringFormatSummarize(value int64) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Formats a string template using a list of tensors.
+//
+// Formats a string template using a list of tensors, pretty-printing tensor summaries.
+//
+// Arguments:
+//	inputs: The list of tensors to format into the placeholder string.
+//
+// Returns = The resulting string scalar.
+func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringFormat",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // ShapeAttr is an optional argument to Shape.
 type ShapeAttr func(optionalAttr)
 
@@ -16772,6 +17006,64 @@ func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
+// UpperBoundAttr is an optional argument to UpperBound.
+type UpperBoundAttr func(optionalAttr)
+
+// UpperBoundOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func UpperBoundOutType(value tf.DataType) UpperBoundAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Applies upper_bound(sorted_search_values, values) along each row.
+//
+// Each set of rows with the same index in (sorted_inputs, values) is treated
+// independently.  The resulting row is the equivalent of calling
+// `np.searchsorted(sorted_inputs, values, side='right')`.
+//
+// The result is not a global index to the entire
+// `Tensor`, but rather just the index in the last dimension.
+//
+// A 2-D example:
+//   sorted_sequence = [[0, 3, 9, 9, 10],
+//                      [1, 2, 3, 4, 5]]
+//   values = [[2, 4, 9],
+//             [0, 2, 6]]
+//
+//   result = UpperBound(sorted_sequence, values)
+//
+//   result == [[1, 2, 4],
+//              [0, 2, 5]]
+//
+// Arguments:
+//	sorted_inputs: 2-D Tensor where each row is ordered.
+//	values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
+// the values that will be searched for in `sorted_search_values`.
+//
+// Returns A `Tensor` with the same shape as `values`.  It contains the last scalar index
+// into the last dimension where values can be inserted without changing the
+// ordered property.
+func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UpperBound",
+		Input: []tf.Input{
+			sorted_inputs, values,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad.
 type FractionalMaxPoolGradAttr func(optionalAttr)
 
@@ -23220,6 +23512,58 @@ func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, o
 	return op.Output(0)
 }
 
+// MultiDeviceIteratorFromStringHandleAttr is an optional argument to MultiDeviceIteratorFromStringHandle.
+type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr)
+
+// MultiDeviceIteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
+//
+// value: The type list for the return values.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDeviceIteratorFromStringHandleAttr {
+	return func(m optionalAttr) {
+		m["output_types"] = value
+	}
+}
+
+// MultiDeviceIteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
+//
+// value: The list of shapes being produced.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func MultiDeviceIteratorFromStringHandleOutputShapes(value []tf.Shape) MultiDeviceIteratorFromStringHandleAttr {
+	return func(m optionalAttr) {
+		m["output_shapes"] = value
+	}
+}
+
+// Generates a MultiDeviceIterator resource from its provided string handle.
+//
+// Arguments:
+//	string_handle: String representing the resource.
+//
+// Returns A MultiDeviceIterator resource.
+func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...MultiDeviceIteratorFromStringHandleAttr) (multi_device_iterator tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorFromStringHandle",
+		Input: []tf.Input{
+			string_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // MutableHashTableV2Attr is an optional argument to MutableHashTableV2.
 type MutableHashTableV2Attr func(optionalAttr)
 
@@ -24788,6 +25132,45 @@ func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// PrintV2Attr is an optional argument to PrintV2.
+type PrintV2Attr func(optionalAttr)
+
+// PrintV2OutputStream sets the optional output_stream attribute to value.
+//
+// value: A string specifying the output stream or logging level to print to.
+// If not specified, defaults to "stderr"
+func PrintV2OutputStream(value string) PrintV2Attr {
+	return func(m optionalAttr) {
+		m["output_stream"] = value
+	}
+}
+
+// Prints a string scalar.
+//
+// Prints a string scalar to the desired output_stream.
+//
+// Arguments:
+//	input: The string scalar to print.
+//
+// Returns the created operation.
+func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PrintV2",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2.
 type QueueEnqueueManyV2Attr func(optionalAttr)
 
@@ -31083,6 +31466,32 @@ func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, o
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Creates a MultiDeviceIterator resource.
+//
+// Arguments:
+//	devices: A list of devices the iterator works across.
+//	shared_name: If non-empty, this resource will be shared under the given name
+// across multiple sessions.
+//	container: If non-empty, this resource is placed in the given container.
+// Otherwise, a default container is used.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+//
+// Returns Handle to the resource created.
+func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIterator",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Deprecated. Use TensorArraySizeV3
 //
 // DEPRECATED at GraphDef version 26: Use TensorArraySizeV3
-- 
GitLab


From 29a67eaedd8d95866011bb1c87a9d1739d448686 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 24 Sep 2018 16:08:41 -0700
Subject: [PATCH 0624/1357] Fix typo in error message.

PiperOrigin-RevId: 214348730
---
 tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index dfdcf1875d..01a18f4f8e 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -402,7 +402,7 @@ void WarnIfBadPtxasVersion(const string& ptxas_path) {
     LOG(WARNING)
         << "*** WARNING *** You are using ptxas " << vmaj << "." << vmin << "."
         << vdot
-        << ", which older than 9.2.88. ptxas 9.x before 9.2.88 is known to "
+        << ", which is older than 9.2.88. ptxas 9.x before 9.2.88 is known to "
            "miscompile XLA code, leading to incorrect results or "
            "invalid-address errors.\n\nYou do not need to update to CUDA "
            "9.2.88; cherry-picking the ptxas binary is sufficient.";
-- 
GitLab


From d25b23d5ec6a0a7828e86fa8868f7a6574f9f827 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 24 Sep 2018 16:09:06 -0700
Subject: [PATCH 0625/1357] Remove the public uses of CreateCudnnConv* in the
 favor of CloneWithNewOperands. CreateCudnnConv* is easy to use wrongly, as it
 doesn't propagate backend_config.

PiperOrigin-RevId: 214348788
---
 .../service/gpu/cudnn_convolution_rewriter.cc | 46 ++++++++++++----
 .../xla/service/gpu/ir_emission_utils.cc      | 53 -------------------
 .../xla/service/gpu/ir_emission_utils.h       | 22 --------
 .../compiler/xla/service/gpu/pad_insertion.cc | 25 ++++-----
 4 files changed, 49 insertions(+), 97 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
index 228379a248..2834d47412 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
@@ -35,6 +35,32 @@ namespace gpu {
 
 namespace {
 
+HloInstruction* CreateCudnnConv(const char* call_target, const Shape& shape,
+                                HloInstruction* lhs, HloInstruction* rhs,
+                                const Window& window,
+                                const ConvolutionDimensionNumbers& dnums,
+                                int64 feature_group_count) {
+  HloComputation* computation = lhs->parent();
+
+  // This call returns a tuple of (conv_result, scratch_memory), where
+  // conv_result is the actual result of the convolution, and scratch_memory is
+  // temporary memory used by cudnn.
+  //
+  // At the moment, we don't know how much scratch memory this conv is going to
+  // use, so we put u8[0] in this place.  Later on another pass will choose
+  // which conv algorithm to use, and at that point we'll modify the shape of
+  // this second tuple element.
+  Shape call_shape =
+      ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U8, {0})});
+
+  HloInstruction* custom_call = computation->AddInstruction(
+      HloInstruction::CreateCustomCall(call_shape, {lhs, rhs}, call_target));
+  custom_call->set_window(window);
+  custom_call->set_convolution_dimension_numbers(dnums);
+  custom_call->set_feature_group_count(feature_group_count);
+  return custom_call;
+}
+
 bool CanImplementAsCudnnForwardConv(HloInstruction* conv) {
   const ConvolutionDimensionNumbers& dnums =
       conv->convolution_dimension_numbers();
@@ -462,24 +488,24 @@ StatusOr<bool> RunOnInstruction(HloInstruction* conv) {
 
     std::tie(match, window, dnums) = MatchBackwardFilter(conv);
     if (match) {
-      return CreateCudnnConvBackwardFilter(
-          conv->shape(), conv->mutable_operand(0), conv->mutable_operand(1),
-          window, dnums, conv->feature_group_count());
+      return CreateCudnnConv(kCudnnConvBackwardFilterCallTarget, conv->shape(),
+                             conv->mutable_operand(0), conv->mutable_operand(1),
+                             window, dnums, conv->feature_group_count());
     }
 
     std::tie(match, window, dnums, rhs) = MatchBackwardInput(conv);
     if (match) {
-      return CreateCudnnConvBackwardInput(conv->shape(),
-                                          conv->mutable_operand(0), rhs, window,
-                                          dnums, conv->feature_group_count());
+      return CreateCudnnConv(kCudnnConvBackwardInputCallTarget, conv->shape(),
+                             conv->mutable_operand(0), rhs, window, dnums,
+                             conv->feature_group_count());
     }
 
     // If all else fails, try a forward convolution.
     if (CanImplementAsCudnnForwardConv(conv)) {
-      return CreateCudnnConvForward(conv->shape(), conv->mutable_operand(0),
-                                    conv->mutable_operand(1), conv->window(),
-                                    conv->convolution_dimension_numbers(),
-                                    conv->feature_group_count());
+      return CreateCudnnConv(
+          kCudnnConvForwardCallTarget, conv->shape(), conv->mutable_operand(0),
+          conv->mutable_operand(1), conv->window(),
+          conv->convolution_dimension_numbers(), conv->feature_group_count());
     }
 
     return nullptr;
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index b57ac5fd09..76757faf60 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -145,59 +145,6 @@ bool ImplementedAsLibraryCall(const HloInstruction& hlo) {
          IsCustomCallToDnnConvolution(hlo);
 }
 
-static HloInstruction* CreateCudnnConv(const char* call_target,
-                                       const Shape& shape, HloInstruction* lhs,
-                                       HloInstruction* rhs,
-                                       const Window& window,
-                                       const ConvolutionDimensionNumbers& dnums,
-                                       int64 feature_group_count) {
-  HloComputation* computation = lhs->parent();
-
-  // This call returns a tuple of (conv_result, scratch_memory), where
-  // conv_result is the actual result of the convolution, and scratch_memory is
-  // temporary memory used by cudnn.
-  //
-  // At the moment, we don't know how much scratch memory this conv is going to
-  // use, so we put u8[0] in this place.  Later on another pass will choose
-  // which conv algorithm to use, and at that point we'll modify the shape of
-  // this second tuple element.
-  Shape call_shape =
-      ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U8, {0})});
-
-  HloInstruction* custom_call = computation->AddInstruction(
-      HloInstruction::CreateCustomCall(call_shape, {lhs, rhs}, call_target));
-  custom_call->set_window(window);
-  custom_call->set_convolution_dimension_numbers(dnums);
-  custom_call->set_feature_group_count(feature_group_count);
-  return custom_call;
-}
-
-HloInstruction* CreateCudnnConvForward(const Shape& shape,
-                                       HloInstruction* input,
-                                       HloInstruction* kernel,
-                                       const Window& window,
-                                       const ConvolutionDimensionNumbers& dnums,
-                                       int64 feature_group_count) {
-  return CreateCudnnConv(kCudnnConvForwardCallTarget, shape, input, kernel,
-                         window, dnums, feature_group_count);
-}
-
-HloInstruction* CreateCudnnConvBackwardInput(
-    const Shape& shape, HloInstruction* output, HloInstruction* reverse_filter,
-    const Window& window, const ConvolutionDimensionNumbers& dnums,
-    int64 feature_group_count) {
-  return CreateCudnnConv(kCudnnConvBackwardInputCallTarget, shape, output,
-                         reverse_filter, window, dnums, feature_group_count);
-}
-
-HloInstruction* CreateCudnnConvBackwardFilter(
-    const Shape& shape, HloInstruction* input, HloInstruction* output,
-    const Window& window, const ConvolutionDimensionNumbers& dnums,
-    int64 feature_group_count) {
-  return CreateCudnnConv(kCudnnConvBackwardFilterCallTarget, shape, input,
-                         output, window, dnums, feature_group_count);
-}
-
 bool IsReductionToVector(const HloInstruction& reduce) {
   if (HloOpcode::kReduce != reduce.opcode()) {
     return false;
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index 19bd3c6330..744346abf3 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -128,28 +128,6 @@ extern const char* const kCudnnConvBackwardFilterCallTarget;
 // kConvolution opcode.
 bool IsCustomCallToDnnConvolution(const HloInstruction& hlo);
 
-// Creates a CustomCall for a cudnn forward/backward-input/backward-filter conv.
-// Note that these CustomCalls return a tuple (conv_result, scratch_memory).  If
-// you want just the conv result, you'll need to get-tuple-element the value
-// returned by this function.
-//
-// The created cudnn call will use the default cudnn algorithm and no scratch
-// space.
-HloInstruction* CreateCudnnConvForward(const Shape& shape,
-                                       HloInstruction* input,
-                                       HloInstruction* kernel,
-                                       const Window& window,
-                                       const ConvolutionDimensionNumbers& dnums,
-                                       int64 feature_group_count);
-HloInstruction* CreateCudnnConvBackwardInput(
-    const Shape& shape, HloInstruction* output, HloInstruction* reverse_filter,
-    const Window& window, const ConvolutionDimensionNumbers& dnums,
-    int64 feature_group_count);
-HloInstruction* CreateCudnnConvBackwardFilter(
-    const Shape& shape, HloInstruction* input, HloInstruction* output,
-    const Window& window, const ConvolutionDimensionNumbers& dnums,
-    int64 feature_group_count);
-
 // Returns true if `hlo` will be implemented as a library call, e.g. cuBLAS gemm
 // or cuDNN convolution.
 bool ImplementedAsLibraryCall(const HloInstruction& hlo);
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index 2a6415d0b6..eead408f10 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -161,12 +161,10 @@ bool PadInsertion::CanonicalizeForwardConvolution(HloInstruction* conv) {
 
   // The conv CustomCall returns a tuple (conv_result, scratch_buffer).  Extract
   // out the shape of conv_result.
-  Shape old_conv_shape = conv->shape().tuple_shapes(0);
-
   VLOG(1) << "Canonicalizing forward conv";
-  auto new_conv = CreateCudnnConvForward(
-      old_conv_shape, new_input, new_kernel, new_conv_window,
-      conv->convolution_dimension_numbers(), conv->feature_group_count());
+  auto new_conv = conv->parent()->AddInstruction(
+      conv->CloneWithNewOperands(conv->shape(), {new_input, new_kernel}));
+  new_conv->set_window(new_conv_window);
   VLOG(1) << "Replacing:\n  " << conv->ToString() << "\nwith:\n  "
           << new_conv->ToString();
   TF_CHECK_OK(conv->parent()->ReplaceInstruction(conv, new_conv));
@@ -242,10 +240,10 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution(
 
   // The shape of the backward_conv CustomCall is a tuple (conv_result,
   // scratch_buffer).  Extract out the shape of conv_result.
-  Shape backward_conv_shape = backward_conv->shape().tuple_shapes(0);
-  HloInstruction* new_backward_conv = CreateCudnnConvBackwardFilter(
-      backward_conv_shape, padded_input, output, new_backward_conv_window,
-      backward_conv_dnums, backward_conv->feature_group_count());
+  HloInstruction* new_backward_conv =
+      computation->AddInstruction(backward_conv->CloneWithNewOperands(
+          backward_conv->shape(), {padded_input, output}));
+  new_backward_conv->set_window(new_backward_conv_window);
 
   VLOG(1) << "Canonicalizing backward filter conv";
   VLOG(1) << "Replacing:\n  " << backward_conv->ToString() << "\nwith:\n  "
@@ -308,9 +306,12 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution(
   HloInstruction* output = backward_conv->mutable_operand(0);
   HloInstruction* filter = backward_conv->mutable_operand(1);
 
-  HloInstruction* new_backward_conv_call = CreateCudnnConvBackwardInput(
-      new_backward_conv_shape, output, filter, new_backward_conv_window,
-      backward_conv_dnums, backward_conv->feature_group_count());
+  HloInstruction* new_backward_conv_call =
+      computation->AddInstruction(backward_conv->CloneWithNewOperands(
+          ShapeUtil::MakeTupleShape(
+              {new_backward_conv_shape, ShapeUtil::MakeShape(U8, {0})}),
+          {output, filter}));
+  new_backward_conv_call->set_window(new_backward_conv_window);
 
   // The CustomCall created above returns a tuple (conv_result, scratch_memory).
   // Extract out the two elements.
-- 
GitLab


From 6c40bc717442d56f0b6a60658b05f0549afd69ee Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 24 Sep 2018 16:13:26 -0700
Subject: [PATCH 0626/1357] BEGIN_PUBLIC Temporary rollback to fix forward
 compatibility. END_PUBLIC

Automated rollback of commit 0c48c703c3c1455cf3b2c0e47e2108e053ff83e2. Revert #21798.

PiperOrigin-RevId: 214349479
---
 .../contrib/losses/python/losses/loss_ops.py  | 37 +++++++++---
 .../contrib/metrics/python/ops/metric_ops.py  | 50 +++++++++-------
 tensorflow/contrib/rate/rate.py               | 11 +++-
 .../python/keras/engine/training_utils.py     |  2 +-
 tensorflow/python/keras/metrics.py            | 19 +++++-
 tensorflow/python/kernel_tests/losses_test.py | 15 +++++
 tensorflow/python/ops/losses/losses_impl.py   | 41 +++++++++----
 tensorflow/python/ops/metrics_impl.py         | 60 +++++++++++--------
 8 files changed, 167 insertions(+), 68 deletions(-)

diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 7e5ab05987..651de4e2f4 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -66,6 +66,32 @@ def _scale_losses(losses, weights):
   return math_ops.reduce_sum(reduced_losses)
 
 
+def _safe_div(numerator, denominator, name="value"):
+  """Computes a safe divide which returns 0 if the denominator is zero.
+
+  Note that the function contains an additional conditional check that is
+  necessary for avoiding situations where the loss is zero causing NaNs to
+  creep into the gradient computation.
+
+  Args:
+    numerator: An arbitrary `Tensor`.
+    denominator: A `Tensor` whose shape matches `numerator` and whose values are
+      assumed to be non-negative.
+    name: An optional name for the returned op.
+
+  Returns:
+    The element-wise value of the numerator divided by the denominator.
+  """
+  return array_ops.where(
+      math_ops.greater(denominator, 0),
+      math_ops.div(numerator,
+                   array_ops.where(
+                       math_ops.equal(denominator, 0),
+                       array_ops.ones_like(denominator), denominator)),
+      array_ops.zeros_like(numerator),
+      name=name)
+
+
 def _safe_mean(losses, num_present):
   """Computes a safe mean of the losses.
 
@@ -78,7 +104,7 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return math_ops.div_no_nan(total_loss, num_present, name="value")
+  return _safe_div(total_loss, num_present)
 
 
 @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.")
@@ -583,14 +609,11 @@ def mean_pairwise_squared_error(predictions,
         math_ops.square(diffs), reduction_indices=reduction_indices)
     num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-    term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch,
-                                      num_present_per_batch,
-                                      name="value")
+    term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch)
 
     sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
-    term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff),
-                                      math_ops.square(num_present_per_batch),
-                                      name="value")
+    term2 = 2.0 * _safe_div(
+        math_ops.square(sum_diff), math_ops.square(num_present_per_batch))
 
     loss = _scale_losses(term1 - term2, weights)
 
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 91939b5bf2..bbf5d3f30c 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -45,6 +45,24 @@ from tensorflow.python.util.deprecation import deprecated
 _EPSILON = 1e-7
 
 
+def _safe_div(numerator, denominator, name):
+  """Divides two values, returning 0 if the denominator is <= 0.
+
+  Args:
+    numerator: A real `Tensor`.
+    denominator: A real `Tensor`, with dtype matching `numerator`.
+    name: Name for the returned op.
+
+  Returns:
+    0 if `denominator` <= 0, else `numerator` / `denominator`
+  """
+  return array_ops.where(
+      math_ops.greater(denominator, 0),
+      math_ops.truediv(numerator, denominator),
+      0,
+      name=name)
+
+
 @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_true_positives(predictions,
@@ -3220,28 +3238,22 @@ def streaming_covariance(predictions,
 
     # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount)
     # batch_mean_prediction is E[x_B] in the update equation
-    batch_mean_prediction = math_ops.div_no_nan(
-        math_ops.reduce_sum(weighted_predictions),
-        batch_count,
-        name='batch_mean_prediction')
-    delta_mean_prediction = math_ops.div_no_nan(
-        (batch_mean_prediction - mean_prediction) * batch_count,
-        update_count,
-        name='delta_mean_prediction')
+    batch_mean_prediction = _safe_div(
+        math_ops.reduce_sum(weighted_predictions), batch_count,
+        'batch_mean_prediction')
+    delta_mean_prediction = _safe_div(
+        (batch_mean_prediction - mean_prediction) * batch_count, update_count,
+        'delta_mean_prediction')
     update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                   delta_mean_prediction)
     # prev_mean_prediction is E[x_A] in the update equation
     prev_mean_prediction = update_mean_prediction - delta_mean_prediction
 
     # batch_mean_label is E[y_B] in the update equation
-    batch_mean_label = math_ops.div_no_nan(
-        math_ops.reduce_sum(weighted_labels),
-        batch_count,
-        name='batch_mean_label')
-    delta_mean_label = math_ops.div_no_nan(
-        (batch_mean_label - mean_label) * batch_count,
-        update_count,
-        name='delta_mean_label')
+    batch_mean_label = _safe_div(
+        math_ops.reduce_sum(weighted_labels), batch_count, 'batch_mean_label')
+    delta_mean_label = _safe_div((batch_mean_label - mean_label) * batch_count,
+                                 update_count, 'delta_mean_label')
     update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
     # prev_mean_label is E[y_A] in the update equation
     prev_mean_label = update_mean_label - delta_mean_label
@@ -3903,10 +3915,8 @@ def cohen_kappa(labels,
       po_sum = math_ops.reduce_sum(po)
       total = math_ops.reduce_sum(pe_row)
       pe_sum = math_ops.reduce_sum(
-          math_ops.div_no_nan(
-              math_ops.to_double(pe_row * pe_col),
-              math_ops.to_double(total),
-              name=None))
+          metrics_impl._safe_div(  # pylint: disable=protected-access
+              pe_row * pe_col, total, None))
       po_sum, pe_sum, total = (math_ops.to_double(po_sum),
                                math_ops.to_double(pe_sum),
                                math_ops.to_double(total))
diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py
index d948066b36..24d586479a 100644
--- a/tensorflow/contrib/rate/rate.py
+++ b/tensorflow/contrib/rate/rate.py
@@ -108,6 +108,13 @@ class Rate(object):
   def variables(self):
     return self._vars
 
+  def _safe_div(self, numerator, denominator, name):
+    t = math_ops.truediv(numerator, denominator)
+    zero = array_ops.zeros_like(t, dtype=denominator.dtype)
+    condition = math_ops.greater(denominator, zero)
+    zero = math_ops.cast(zero, t.dtype)
+    return array_ops.where(condition, t, zero, name=name)
+
   def _add_variable(self, name, shape=None, dtype=None):
     """Private method for adding variables to the graph."""
     if self._built:
@@ -141,6 +148,4 @@ class Rate(object):
     state_ops.assign(self.prev_values, values)
     state_ops.assign(self.prev_denominator, denominator)
 
-    return math_ops.div_no_nan(self.numer,
-                               math_ops.maximum(self.denom, 0),
-                               name="safe_rate")
+    return self._safe_div(self.numer, self.denom, name="safe_rate")
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 9c736002ec..9c303f4bed 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -634,7 +634,7 @@ def weighted_masked_objective(fn):
       score_array = math_ops.multiply(score_array, weights)
       score_array = math_ops.reduce_sum(score_array)
       weights = math_ops.reduce_sum(weights)
-      score_array = math_ops.div_no_nan(score_array, weights)
+      score_array = metrics_module.safe_div(score_array, weights)
     return K.mean(score_array)
 
   return weighted
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 3df425fd6e..e64241e5cf 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -155,6 +155,23 @@ def weakmethod(method):
   return inner
 
 
+def safe_div(numerator, denominator):
+  """Divides two tensors element-wise, returning 0 if the denominator is <= 0.
+
+  Args:
+    numerator: A `Tensor`.
+    denominator: A `Tensor`, with dtype matching `numerator`.
+
+  Returns:
+    0 if `denominator` <= 0, else `numerator` / `denominator`
+  """
+  t = math_ops.truediv(numerator, denominator)
+  zero = array_ops.zeros_like(t, dtype=denominator.dtype)
+  condition = math_ops.greater(denominator, zero)
+  zero = math_ops.cast(zero, t.dtype)
+  return array_ops.where(condition, t, zero)
+
+
 def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
   """Squeeze or expand last dimension if needed.
 
@@ -486,7 +503,7 @@ class Mean(Metric):
     return control_flow_ops.group(update_total_op, update_count_op)
 
   def result(self):
-    return math_ops.div_no_nan(self.total, self.count)
+    return safe_div(self.total, self.count)
 
 
 class MeanMetricWrapper(Mean):
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index fb0b5f1137..3ce0b74263 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -33,11 +34,25 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.losses import losses
+from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.ops.losses import util
 from tensorflow.python.platform import test
 from tensorflow.python.training import momentum as momentum_lib
 
 
+safe_div = losses_impl._safe_div  # pylint: disable=protected-access
+
+
+class SafeDivTest(test.TestCase):
+
+  def testEager(self):
+    with context.eager_mode():
+      self.assertAllEqual(safe_div(constant_op.constant(1.0),
+                                   constant_op.constant(0.0)), 0.0)
+      self.assertAllEqual(safe_div(constant_op.constant(1.0),
+                                   0.0), 0.0)
+
+
 class AbsoluteDifferenceLossTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index fe4950a475..806539747e 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -74,6 +74,31 @@ class Reduction(object):
       raise ValueError("Invalid ReductionKey %s." % key)
 
 
+def _safe_div(numerator, denominator, name="value"):
+  """Computes a safe divide which returns 0 if the denominator is zero.
+
+  Note that the function contains an additional conditional check that is
+  necessary for avoiding situations where the loss is zero causing NaNs to
+  creep into the gradient computation.
+
+  Args:
+    numerator: An arbitrary `Tensor`.
+    denominator: `Tensor` whose shape matches `numerator` and whose values are
+      assumed to be non-negative.
+    name: An optional name for the returned op.
+
+  Returns:
+    The element-wise value of the numerator divided by the denominator.
+  """
+  return array_ops.where(
+      math_ops.greater(denominator, 0),
+      math_ops.div(numerator, array_ops.where(
+          math_ops.equal(denominator, 0),
+          array_ops.ones_like(denominator), denominator)),
+      array_ops.zeros_like(numerator),
+      name=name)
+
+
 def _safe_mean(losses, num_present):
   """Computes a safe mean of the losses.
 
@@ -86,7 +111,7 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return math_ops.div_no_nan(total_loss, num_present, name="value")
+  return _safe_div(total_loss, num_present)
 
 
 def _num_present(losses, weights, per_batch=False):
@@ -574,20 +599,14 @@ def mean_pairwise_squared_error(
           keepdims=True)
       num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-      term1 = 2.0 * math_ops.div_no_nan(
-          sum_squares_diff_per_batch,
-          math_ops.maximum(num_present_per_batch - 1, 0),
-          name="value")
+      term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
+                              num_present_per_batch - 1)
 
       sum_diff = math_ops.reduce_sum(
           diffs, reduction_indices=reduction_indices, keepdims=True)
-      term2 = 2.0 * math_ops.div_no_nan(
+      term2 = 2.0 * _safe_div(
           math_ops.square(sum_diff),
-          math_ops.maximum(
-              math_ops.multiply(num_present_per_batch,
-                                num_present_per_batch - 1),
-              0),
-          name="value")
+          math_ops.multiply(num_present_per_batch, num_present_per_batch - 1))
 
       weighted_losses = math_ops.multiply(term1 - term2, weights)
       loss = math_ops.reduce_sum(weighted_losses)
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index e449318020..763877c2d2 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -213,6 +213,24 @@ def _maybe_expand_labels(labels, predictions):
         lambda: array_ops.expand_dims(labels, -1, name=scope), lambda: labels)
 
 
+def _safe_div(numerator, denominator, name):
+  """Divides two tensors element-wise, returning 0 if the denominator is <= 0.
+
+  Args:
+    numerator: A real `Tensor`.
+    denominator: A real `Tensor`, with dtype matching `numerator`.
+    name: Name for the returned op.
+
+  Returns:
+    0 if `denominator` <= 0, else `numerator` / `denominator`
+  """
+  t = math_ops.truediv(numerator, denominator)
+  zero = array_ops.zeros_like(t, dtype=denominator.dtype)
+  condition = math_ops.greater(denominator, zero)
+  zero = math_ops.cast(zero, t.dtype)
+  return array_ops.where(condition, t, zero, name=name)
+
+
 def _safe_scalar_div(numerator, denominator, name):
   """Divides two values, returning 0 if the denominator is 0.
 
@@ -226,7 +244,13 @@ def _safe_scalar_div(numerator, denominator, name):
   """
   numerator.get_shape().with_rank_at_most(1)
   denominator.get_shape().with_rank_at_most(1)
-  return math_ops.div_no_nan(numerator, denominator, name=name)
+  return control_flow_ops.cond(
+      math_ops.equal(
+          array_ops.constant(0.0, dtype=dtypes.float64), denominator),
+      lambda: array_ops.constant(0.0, dtype=dtypes.float64),
+      lambda: math_ops.div(numerator, denominator),
+      name=name)
+
 
 def _streaming_confusion_matrix(labels, predictions, num_classes, weights=None):
   """Calculate a streaming confusion matrix.
@@ -378,14 +402,11 @@ def mean(values,
     with ops.control_dependencies([values]):
       update_count_op = state_ops.assign_add(count, num_values)
 
-    compute_mean = lambda _, t, c: math_ops.div_no_nan(
-        t, math_ops.maximum(c, 0), name='value')
+    compute_mean = lambda _, t, c: _safe_div(t, c, 'value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
-    update_op = math_ops.div_no_nan(update_total_op,
-                                    math_ops.maximum(update_count_op, 0),
-                                    name='update_op')
+    update_op = _safe_div(update_total_op, update_count_op, 'update_op')
 
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
@@ -757,21 +778,16 @@ def auc(labels,
       """
       dtp = tp[:num_thresholds - 1] - tp[1:]
       p = tp + fp
-      prec_slope = math_ops.div_no_nan(
-          dtp,
-          math_ops.maximum(p[:num_thresholds - 1] - p[1:], 0),
-          name='prec_slope')
+      prec_slope = _safe_div(dtp, p[:num_thresholds - 1] - p[1:], 'prec_slope')
       intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:])
       safe_p_ratio = array_ops.where(
           math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0),
-          math_ops.div_no_nan(p[:num_thresholds - 1],
-                              math_ops.maximum(p[1:], 0),
-                              name='recall_relative_ratio'),
+          _safe_div(p[:num_thresholds - 1], p[1:], 'recall_relative_ratio'),
           array_ops.ones_like(p[1:]))
       return math_ops.reduce_sum(
-          math_ops.div_no_nan(
+          _safe_div(
               prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)),
-              math_ops.maximum(tp[1:] + fn[1:], 0),
+              tp[1:] + fn[1:],
               name='pr_auc_increment'),
           name='interpolate_pr_auc')
 
@@ -1052,8 +1068,7 @@ def mean_per_class_accuracy(labels,
     update_count_op = state_ops.scatter_add(count, labels, is_correct)
 
     def compute_mean_accuracy(_, count, total):
-      per_class_accuracy = math_ops.div_no_nan(
-          count, math_ops.maximum(total, 0), name=None)
+      per_class_accuracy = _safe_div(count, total, None)
       mean_accuracy_v = math_ops.reduce_mean(
           per_class_accuracy, name='mean_accuracy')
       return mean_accuracy_v
@@ -1061,9 +1076,7 @@ def mean_per_class_accuracy(labels,
     mean_accuracy_v = _aggregate_across_towers(
         metrics_collections, compute_mean_accuracy, count, total)
 
-    update_op = math_ops.div_no_nan(update_count_op,
-                                    math_ops.maximum(update_total_op, 0),
-                                    name='update_op')
+    update_op = _safe_div(update_count_op, update_total_op, name='update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
 
@@ -1372,15 +1385,12 @@ def mean_tensor(values,
     with ops.control_dependencies([values]):
       update_count_op = state_ops.assign_add(count, num_values)
 
-    compute_mean = lambda _, t, c: math_ops.div_no_nan(
-        t, math_ops.maximum(c, 0), name='value')
+    compute_mean = lambda _, t, c: _safe_div(t, c, 'value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
 
-    update_op = math_ops.div_no_nan(update_total_op,
-                                    math_ops.maximum(update_count_op, 0),
-                                    name='update_op')
+    update_op = _safe_div(update_total_op, update_count_op, 'update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
 
-- 
GitLab


From 18a09eb548db25f6d82760105cf8e1fbbb1343a1 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 24 Sep 2018 16:24:21 -0700
Subject: [PATCH 0627/1357] Fix Hlo pattern matcher's AnyOf, so that a
 sub-pattern doesn't capture when it's not matched.

Also add invariant checking for AllOf.

PiperOrigin-RevId: 214351269
---
 .../compiler/xla/service/pattern_matcher.h    | 42 ++++++++++++++++---
 .../xla/service/pattern_matcher_test.cc       | 29 +++++++++++++
 2 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index 63b51fc8c9..52c6b51993 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -149,13 +149,19 @@ class AllOfPattern {
   explicit AllOfPattern(const Patterns&... patterns) : patterns_(patterns...) {}
 
   bool Match(const Item* item, MatchOption option) const {
-    return MatchImpl(item, option,
-                     absl::make_index_sequence<sizeof...(Patterns)>());
+    bool matched = MatchImpl(item, option,
+                             absl::make_index_sequence<sizeof...(Patterns)>());
+    // This invariant is guaranteed by the top-level Match and AnyOf.
+    DCHECK(matched || !option.capture);
+    return matched;
   }
 
   bool Match(Item* item, MatchOption option) const {
-    return MatchImpl(item, option,
-                     absl::make_index_sequence<sizeof...(Patterns)>());
+    bool matched = MatchImpl(item, option,
+                             absl::make_index_sequence<sizeof...(Patterns)>());
+    // This invariant is guaranteed by the top-level Match and AnyOf.
+    DCHECK(matched || !option.capture);
+    return matched;
   }
 
  private:
@@ -307,8 +313,32 @@ class AnyOfPattern {
   template <typename ItemType, size_t index>
   bool MatchImpl(ItemType* item, MatchOption option,
                  std::integral_constant<size_t, index>) const {
-    return std::get<index>(patterns_).Match(item, option) ||
-           MatchImpl(item, option, std::integral_constant<size_t, index + 1>());
+    auto new_option = option;
+    new_option.capture = false;
+    // Try to match the sub-pattern without capturing behavior.
+    if (std::get<index>(patterns_).Match(item, new_option)) {
+      // Capture the branch.
+      if (option.capture) {
+        // TODO(timshen): Currently the behavior can be exponential. Optimize it
+        // with memoization or recording the matched sub-pattern index, if it
+        // takes too long to run.
+        //
+        // Specifically, the "memoization" approach is to create an empty
+        // container with the key (pattern, instruction), and value as whether
+        // matched or not.
+        //
+        // Alternatively, we may run the pattern matching with captures off, but
+        // instead record a "trace" somewhere, indicating how exactly the
+        // pattern matches the input. For example, the trace information for
+        // AnyOf will be a runtime number indicate which sub-pattern is matched.
+        // Then we run another pass to do captures only with the help of the
+        // trace.
+        bool ret = std::get<index>(patterns_).Match(item, option);
+        DCHECK(ret);
+      }
+      return true;
+    }
+    return MatchImpl(item, option, std::integral_constant<size_t, index + 1>());
   }
 
   template <typename ItemType>
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
index d4e128bd70..e770d54fc1 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
@@ -356,5 +356,34 @@ TEST(PatternMatcherTest, TestNoCapture) {
   EXPECT_EQ(nullptr, constant);
 }
 
+TEST(PatternMatcherTest, TestCaptureMatchedSubPatternForAnyOf) {
+  using match::Add;
+  using match::AddAnyOrder;
+  using match::AnyOf;
+  using match::Op;
+
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module
+    ENTRY test {
+      u = f16[] parameter(0)
+      v = f16[] parameter(1)
+      ROOT add = f16[] add(u, v)
+    })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  const HloInstruction* addend0 = nullptr;
+  const HloInstruction* addend1 = nullptr;
+  const HloInstruction* addend2 = nullptr;
+  auto add2_pattern = Add(Op(&addend0), Op(&addend1));
+  auto add3_pattern = AnyOf<HloInstruction>(
+      AddAnyOrder(add2_pattern, Op(&addend2)), add2_pattern, Op(&addend0));
+
+  ASSERT_TRUE(Match(root, add3_pattern));
+  EXPECT_NE(nullptr, addend0);
+  EXPECT_NE(nullptr, addend1);
+  EXPECT_EQ(nullptr, addend2);
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 5bd5d7f7ba9bb931e52f458f1ab7d0308fcf3264 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 24 Sep 2018 16:26:28 -0700
Subject: [PATCH 0628/1357] [TF:XLA] Bump open source abseil revision to
 e01d95528ea2137a4a27a88d1f57c6cb260aafed

PiperOrigin-RevId: 214351584
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index d47d15315d..b850c5a17f 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -106,11 +106,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/8ff1374008259719b54a8cb128ef951c02da164c.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/8ff1374008259719b54a8cb128ef951c02da164c.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz",
         ],
-        sha256 = "006931f9705484041eed65189038f87931a87cff200bb296f94b3d42339c4cd9",
-        strip_prefix = "abseil-cpp-8ff1374008259719b54a8cb128ef951c02da164c",
+        sha256 = "84043ed402d2a2a6ba4cdddb7e85118b1158fd81fe4ac3a14adc343d054c1e2e",
+        strip_prefix = "abseil-cpp-e01d95528ea2137a4a27a88d1f57c6cb260aafed",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
     )
 
-- 
GitLab


From 120620caf23a044b8aa2db6ba5984384ec936009 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 16:40:14 -0700
Subject: [PATCH 0629/1357] Automated rollback of commit
 edbc6e078ad306021eeb95827a7451892b35f859

PiperOrigin-RevId: 214353862
---
 tensorflow/core/kernels/BUILD | 43 +++++++++++------------------------
 1 file changed, 13 insertions(+), 30 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 96ccc06f9e..ab69925d04 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4049,6 +4049,11 @@ cc_library(
 )
 
 SPARSE_DEPS = [
+    ":bounds_check",
+    ":cwise_op",
+    ":fill_functor",
+    ":scatter_functor",
+    "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:sparse_ops_op_lib",
@@ -4081,9 +4086,7 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_cross_op",
     prefix = "sparse_cross_op",
-    deps = SPARSE_DEPS + [
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
@@ -4095,19 +4098,13 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_dense_binary_op_shared",
     prefix = "sparse_dense_binary_op_shared",
-    deps = SPARSE_DEPS + [
-        ":cwise_op",
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
     name = "sparse_sparse_binary_op_shared",
     prefix = "sparse_sparse_binary_op_shared",
-    deps = SPARSE_DEPS + [
-        ":cwise_op",
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
@@ -4139,9 +4136,7 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_softmax",
     prefix = "sparse_softmax",
-    deps = SPARSE_DEPS + [
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
@@ -4153,37 +4148,25 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_tensor_dense_add_op",
     prefix = "sparse_tensor_dense_add_op",
-    deps = SPARSE_DEPS + [
-        ":scatter_functor",
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
     name = "sparse_tensor_dense_matmul_op",
     prefix = "sparse_tensor_dense_matmul_op",
-    deps = SPARSE_DEPS + [
-        ":bounds_check",
-        ":fill_functor",
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
     name = "sparse_to_dense_op",
     prefix = "sparse_to_dense_op",
-    deps = SPARSE_DEPS + [
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
     name = "sparse_xent_op",
     prefix = "sparse_xent_op",
-    deps = SPARSE_DEPS + [
-        ":bounds_check",
-        "//third_party/eigen3",
-    ],
+    deps = SPARSE_DEPS,
 )
 
 tf_kernel_library(
-- 
GitLab


From 7a1096f424b1adcb4152db80a01a163ddb1a0173 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 16:42:00 -0700
Subject: [PATCH 0630/1357] Replace usage of base::GetFlag with Abseil Flags
 public API absl::GetFlag.

PiperOrigin-RevId: 214354104
---
 tensorflow/core/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 59b7dd04e9..c8f7fc8cea 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -588,6 +588,7 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:other",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/platform/default/build_config:port",
+        "@com_google_absl//absl/flags:flag",
     ],
 )
 
@@ -2155,6 +2156,7 @@ cc_library(
         ":lib_proto_parsing",
         ":abi",
         ":core_stringpiece",
+        "@com_google_absl//absl/flags:flag",
         "//third_party/eigen3",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
-- 
GitLab


From cc5555d3d3daa64f462cc7f8d31fe915073429f4 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 24 Sep 2018 16:57:45 -0700
Subject: [PATCH 0631/1357] Short-circuit AllOf as well. This fixes a crash in
 ConstantScalar, as it uses Cast internally.

PiperOrigin-RevId: 214356411
---
 .../compiler/xla/service/pattern_matcher.h    | 20 +++++++++++--------
 .../xla/service/pattern_matcher_test.cc       |  9 +++++++++
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index 52c6b51993..380cde0e6a 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -149,27 +149,31 @@ class AllOfPattern {
   explicit AllOfPattern(const Patterns&... patterns) : patterns_(patterns...) {}
 
   bool Match(const Item* item, MatchOption option) const {
-    bool matched = MatchImpl(item, option,
-                             absl::make_index_sequence<sizeof...(Patterns)>());
+    bool matched = MatchImpl(item, option, std::integral_constant<size_t, 0>());
     // This invariant is guaranteed by the top-level Match and AnyOf.
     DCHECK(matched || !option.capture);
     return matched;
   }
 
   bool Match(Item* item, MatchOption option) const {
-    bool matched = MatchImpl(item, option,
-                             absl::make_index_sequence<sizeof...(Patterns)>());
+    bool matched = MatchImpl(item, option, std::integral_constant<size_t, 0>());
     // This invariant is guaranteed by the top-level Match and AnyOf.
     DCHECK(matched || !option.capture);
     return matched;
   }
 
  private:
-  template <typename ItemType, size_t... indices>
+  template <typename ItemType, size_t index>
   bool MatchImpl(ItemType* item, MatchOption option,
-                 absl::index_sequence<indices...>) const {
-    return std::min<bool>(
-        {std::get<indices>(patterns_).Match(item, option)...});
+                 std::integral_constant<size_t, index>) const {
+    return std::get<index>(patterns_).Match(item, option) &&
+           MatchImpl(item, option, std::integral_constant<size_t, index + 1>());
+  }
+
+  template <typename ItemType>
+  bool MatchImpl(ItemType* item, MatchOption option,
+                 std::integral_constant<size_t, sizeof...(Patterns)>) const {
+    return true;
   }
 
   std::tuple<Patterns...> patterns_;
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
index e770d54fc1..3ab7b7fd71 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
@@ -239,6 +239,15 @@ TEST(PatternMatcherTest, ConstantScalar) {
   EXPECT_FALSE(Match(root, match::ConstantScalar(0)));
 }
 
+TEST(PatternMatcherTest, NoMatchConstantScalar) {
+  constexpr char kModuleStr[] = R"(
+    HloModule test_module ENTRY test { ROOT v = f16[] parameter(0) })";
+  TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr));
+  auto* root = hlo_module->entry_computation()->root_instruction();
+
+  EXPECT_FALSE(Match(root, match::ConstantScalar(42)));
+}
+
 TEST(PatternMatcherTest, MultiplyAnyOrder) {
   using match::ConstantScalar;
   using match::MultiplyAnyOrder;
-- 
GitLab


From 73083d29afe770870742a9d19555686886e76f6d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 17:20:07 -0700
Subject: [PATCH 0632/1357] Upgrade cloud tpu profiler to 1.11.

PiperOrigin-RevId: 214359786
---
 .../tpu/profiler/pip_package/cloud_tpu_profiler/main.py    | 7 ++++---
 tensorflow/contrib/tpu/profiler/pip_package/setup.py       | 2 +-
 tensorflow/contrib/tpu/profiler/version.h                  | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
index 438f442848..63641e00c5 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
@@ -116,12 +116,13 @@ def main(unused_argv=None):
     elif tpu_cluster_resolver is not None:
       workers_list = get_workers_list(tpu_cluster_resolver)
 
-  if not FLAGS.logdir:
+  if not FLAGS.logdir and not FLAGS.monitoring_level:
     sys.exit('logdir must be provided.')
   executable_path = os.path.join(os.path.dirname(__file__), EXECUTABLE)
-  logdir = os.path.expandvars(os.path.expanduser(FLAGS.logdir))
   cmd = [executable_path]
-  cmd.append('--logdir=' + logdir)
+  if FLAGS.logdir is not None:
+    logdir = os.path.expandvars(os.path.expanduser(FLAGS.logdir))
+    cmd.append('--logdir=' + logdir)
   cmd.append('--service_addr=' + service_addr)
   cmd.append('--workers_list=' + workers_list)
   cmd.append('--duration_ms=' + str(FLAGS.duration_ms))
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index d4ccb0f246..2415c46718 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 from setuptools import setup
 
-_VERSION = '1.10.0'
+_VERSION = '1.11.0'
 
 CONSOLE_SCRIPTS = [
     'capture_tpu_profile=cloud_tpu_profiler.main:run_main',
diff --git a/tensorflow/contrib/tpu/profiler/version.h b/tensorflow/contrib/tpu/profiler/version.h
index aee094177b..90d34b5ef1 100644
--- a/tensorflow/contrib/tpu/profiler/version.h
+++ b/tensorflow/contrib/tpu/profiler/version.h
@@ -16,6 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
 #define TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
 
-#define TPU_PROFILER_VERSION "1.10.0"
+#define TPU_PROFILER_VERSION "1.11.0"
 
 #endif  // TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
-- 
GitLab


From 9ab01c6732dae1143e22713375a9cc7758216787 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 17:26:37 -0700
Subject: [PATCH 0633/1357] Update the functional rnn API to add a fast path
 when cell function is noop for pad input.

PiperOrigin-RevId: 214360620
---
 .../recurrent/python/ops/functional_rnn.py    | 96 +++++++++++++------
 .../contrib/recurrent/python/ops/recurrent.py | 37 +++++--
 2 files changed, 93 insertions(+), 40 deletions(-)

diff --git a/tensorflow/contrib/recurrent/python/ops/functional_rnn.py b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py
index efaf63086f..3abf7bd6da 100644
--- a/tensorflow/contrib/recurrent/python/ops/functional_rnn.py
+++ b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py
@@ -219,7 +219,7 @@ def _PickFinalStateFromHistory(acc_state, sequence_length):
 
 
 def _PostProcessOutput(extended_acc_state, extended_final_state, func_cell,
-                       total_time, inputs_lengths):
+                       total_time, inputs_lengths, is_reversed):
   """Post-process output of recurrent.
 
   This function takes the accumulated extended state and extracts the requested
@@ -228,6 +228,8 @@ def _PostProcessOutput(extended_acc_state, extended_final_state, func_cell,
   When `inputs_lengths` has been set, it extracts the output from the
   accumulated state. It also sets outputs past.
 
+  When `is_reversed` is true, the output will be reversed in this function.
+
   It also sets the static shape information.
 
   Args:
@@ -238,11 +240,12 @@ def _PostProcessOutput(extended_acc_state, extended_final_state, func_cell,
     func_cell: The functional wrapper around the cell.
     total_time: A scalar integer tensor.
     inputs_lengths: An integer tensor with one entry per input.
+    is_reversed: A boolean to indicate if the sequence is reversed.
 
   Returns:
     A tuple with the outputs at each time, and the final state.
   """
-  if inputs_lengths is None:
+  if inputs_lengths is None or is_reversed:
     flat_final_state = func_cell.MaybeRemoveOutputFromState(
         nest.flatten(extended_final_state))
     tf_state = nest.pack_sequence_as(func_cell.state_template, flat_final_state)
@@ -256,21 +259,28 @@ def _PostProcessOutput(extended_acc_state, extended_final_state, func_cell,
     tf_state = _PickFinalStateFromHistory(acc_state, inputs_lengths)
 
   output_from_state = func_cell.GetOutputFromState(extended_acc_state)
+  if is_reversed:
+    output_from_state = array_ops.reverse(output_from_state, [0])
   tf_output = array_ops.transpose(output_from_state, [1, 0, 2])
   tf_output.set_shape(
       [func_cell.output_shape[0], total_time, func_cell.output_shape[1]])
   if inputs_lengths is not None:
     # Need set the outputs to zero.
     tf_output = _ApplyLengthsToBatch(inputs_lengths, tf_output)
-    # tf_output = array_ops.zeros([4, 3, 5])
   _SetShapeFromTemplate(tf_state, func_cell.state_template)
   return tf_output, tf_state
 
 
 # pylint: disable=invalid-name
-def functional_rnn(cell, inputs, sequence_length=None,
-                   initial_state=None, dtype=None, time_major=False,
-                   scope=None, use_tpu=False):
+def functional_rnn(cell,
+                   inputs,
+                   sequence_length=None,
+                   initial_state=None,
+                   dtype=None,
+                   time_major=False,
+                   scope=None,
+                   use_tpu=False,
+                   reverse=False):
   """Same interface as `tf.nn.dynamic_rnn`."""
   with variable_scope.variable_scope(scope or 'rnn'):
     if not time_major:
@@ -285,33 +295,41 @@ def functional_rnn(cell, inputs, sequence_length=None,
     max_length = math_ops.reduce_max(sequence_length)
   else:
     max_length = None
+  if reverse:
+    inputs = array_ops.reverse(inputs, [0])
   extended_acc_state, extended_final_state = recurrent.Recurrent(
       theta=func_cell.theta,
       state0=func_cell.extended_initial_state,
       inputs=inputs,
       cell_fn=func_cell.cell_step,
       max_input_length=max_length,
-      use_tpu=use_tpu)
+      use_tpu=use_tpu,
+      aligned_end=reverse)
+
   tf_output, tf_state = _PostProcessOutput(
-      extended_acc_state, extended_final_state, func_cell,
-      inputs_flat[0].shape[0], sequence_length)
+      extended_acc_state,
+      extended_final_state,
+      func_cell,
+      inputs_flat[0].shape[0],
+      sequence_length,
+      is_reversed=reverse)
 
   if time_major:
     tf_output = array_ops.transpose(tf_output, [1, 0, 2])
   return tf_output, tf_state
 
 
-def bidirectional_functional_rnn(
-    cell_fw,
-    cell_bw,
-    inputs,
-    initial_state_fw=None,
-    initial_state_bw=None,
-    dtype=None,
-    sequence_length=None,
-    time_major=False,
-    use_tpu=False,
-    scope=None):
+def bidirectional_functional_rnn(cell_fw,
+                                 cell_bw,
+                                 inputs,
+                                 initial_state_fw=None,
+                                 initial_state_bw=None,
+                                 dtype=None,
+                                 sequence_length=None,
+                                 time_major=False,
+                                 use_tpu=False,
+                                 fast_reverse=False,
+                                 scope=None):
   """Creates a bidirectional recurrent neural network.
 
   Performs fully dynamic unrolling of inputs in both directions. Built to be API
@@ -342,6 +360,10 @@ def bidirectional_functional_rnn(
     use_tpu: Whether to enable TPU-compatible operation. If True, does not truly
       reverse `inputs` in the backwards RNN. Once b/69305369 is fixed, we can
       remove this flag.
+    fast_reverse: Whether to use fast tf.reverse to replace tf.reverse_sequence.
+      This is only possible when either all sequence lengths are the same inside
+      the batch, or when the cell function does not change the state on padded
+      input.
     scope: An optional scope name for the dynamic RNN.
 
   Returns:
@@ -390,17 +412,29 @@ def bidirectional_functional_rnn(
         return array_ops.reverse(input_, axis=[seq_dim])
 
     with variable_scope.variable_scope('bw') as bw_scope:
-      inputs_reverse = _reverse(
-          inputs, seq_lengths=sequence_length,
-          seq_dim=time_dim, batch_dim=batch_dim)
-      tmp, output_state_bw = functional_rnn(
-          cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length,
-          initial_state=initial_state_bw, dtype=dtype,
-          time_major=time_major, scope=bw_scope, use_tpu=use_tpu)
-
-  output_bw = _reverse(
-      tmp, seq_lengths=sequence_length,
-      seq_dim=time_dim, batch_dim=batch_dim)
+      if not fast_reverse:
+        inputs = _reverse(
+            inputs,
+            seq_lengths=sequence_length,
+            seq_dim=time_dim,
+            batch_dim=batch_dim)
+      output_bw, output_state_bw = functional_rnn(
+          cell=cell_bw,
+          inputs=inputs,
+          sequence_length=sequence_length,
+          initial_state=initial_state_bw,
+          dtype=dtype,
+          time_major=time_major,
+          scope=bw_scope,
+          use_tpu=use_tpu,
+          reverse=fast_reverse)
+
+  if not fast_reverse:
+    output_bw = _reverse(
+        output_bw,
+        seq_lengths=sequence_length,
+        seq_dim=time_dim,
+        batch_dim=batch_dim)
 
   outputs = (output_fw, output_bw)
   output_states = (output_state_fw, output_state_bw)
diff --git a/tensorflow/contrib/recurrent/python/ops/recurrent.py b/tensorflow/contrib/recurrent/python/ops/recurrent.py
index 4f289e0c85..f51de755d8 100644
--- a/tensorflow/contrib/recurrent/python/ops/recurrent.py
+++ b/tensorflow/contrib/recurrent/python/ops/recurrent.py
@@ -274,8 +274,16 @@ def _ConvertNoneGradientToZeros(xs, dxs):
 class _Recurrent(object):
   """A helper class to construct a recurrent neural net."""
 
-  def __init__(self, cell_fn, cell_grad, theta, state0, inputs,
-               max_input_length, extras, use_tpu):
+  def __init__(self,
+               cell_fn,
+               cell_grad,
+               theta,
+               state0,
+               inputs,
+               max_input_length,
+               extras,
+               use_tpu,
+               aligned_end=False):
     """RNN helper class.
 
     Args:
@@ -294,6 +302,8 @@ class _Recurrent(object):
         and shapes of this `extras`.
       use_tpu: A boolean indicating whether the computation is mean to
         run on a TPU.
+      aligned_end: A boolean indicating whether the sequence is aligned at
+        the end.
     """
     self._theta = theta
     self._state = state0
@@ -303,6 +313,7 @@ class _Recurrent(object):
     self._cell_fn = cell_fn
     self._cell_grad = cell_grad
     self._extras = extras
+    self._aligned_end = aligned_end
 
     # pylint: disable=unbalanced-tuple-unpacking
 
@@ -417,10 +428,11 @@ class _Recurrent(object):
       acc_state = _EmptyAcc(slen_dim, state0)
       acc_extras = _EmptyAcc(slen_dim, extras)
 
-      dev_t = array_ops.constant(0, dtype=dev_t_type)
+      t = slen_dim - max_input_length if self._aligned_end else 0
+      dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t)
       run = functional_ops.For(
-          start=0,
-          limit=max_input_length,
+          start=t,
+          limit=slen_dim if self._aligned_end else max_input_length,
           delta=1,
           inputs=[dev_t] + _Flatten(
               [theta, state0, inputs, acc_state, acc_extras]),
@@ -551,13 +563,16 @@ class _Recurrent(object):
       d_theta = _EmptyLike(theta)
       d_inputs = _EmptyLike(inputs)
 
+      slen_dim = _SeqLenDim(inputs)
+
       # Loop backwards. Note the loop's limit is open-ended, so goes through
       # t=0.
-      t = max_input_length - 1
+      t = slen_dim - 1 if self._aligned_end else max_input_length - 1
       dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t)
+      limit = slen_dim - max_input_length - 1 if self._aligned_end else -1
       run = functional_ops.For(
           start=t,
-          limit=-1,
+          limit=limit,
           delta=-1,
           inputs=[dev_t] + _Flatten([
               theta, state0, inputs, acc_state, acc_extras, d_theta, d_state1,
@@ -641,7 +656,8 @@ def Recurrent(theta,
               cell_grad=None,
               extras=None,
               max_input_length=None,
-              use_tpu=False):
+              use_tpu=False,
+              aligned_end=False):
   """Compute a recurrent neural net.
 
   Roughly, Recurrent() computes the following:
@@ -684,6 +700,8 @@ def Recurrent(theta,
       truncate the computation if the inputs have been allocated to a
       larger size. A scalar tensor.
     use_tpu: whether or not we are on TPU.
+    aligned_end: A boolean indicating whether the sequence is aligned at
+      the end.
 
   Returns:
     accumulate_state and the final state.
@@ -717,4 +735,5 @@ def Recurrent(theta,
       inputs=inputs,
       max_input_length=max_input_length,
       extras=extras,
-      use_tpu=use_tpu).Compute()
+      use_tpu=use_tpu,
+      aligned_end=aligned_end).Compute()
-- 
GitLab


From 391cdd80952e9cc546d82a8bf2fe7dd04f46cb2f Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 24 Sep 2018 17:44:58 -0700
Subject: [PATCH 0634/1357] Add cuDNN fused convolution forward support.

The tests are in the next patch.

PiperOrigin-RevId: 214362688
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  6 +-
 .../xla/service/gpu/backend_configs.proto     | 14 +++
 .../gpu/cudnn_convolution_algorithm_picker.cc |  5 +-
 .../service/gpu/cudnn_convolution_rewriter.cc | 10 ++
 .../service/gpu/cudnn_convolution_runner.cc   | 96 +++++++++++++++++++
 .../xla/service/gpu/gpu_layout_assignment.cc  | 15 +++
 .../xla/service/gpu/ir_emission_utils.cc      | 10 +-
 .../xla/service/gpu/ir_emission_utils.h       |  9 +-
 .../xla/service/gpu/pad_for_tensor_cores.cc   |  7 +-
 .../compiler/xla/service/gpu/pad_insertion.cc |  6 +-
 tensorflow/stream_executor/dnn.h              |  4 +-
 11 files changed, 169 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 7231fd844e..2775527e0c 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -433,6 +433,7 @@ cc_library(
     srcs = ["cudnn_convolution_rewriter.cc"],
     hdrs = ["cudnn_convolution_rewriter.h"],
     deps = [
+        ":backend_configs",
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:util",
@@ -597,14 +598,11 @@ cc_library(
     hdrs = ["pad_for_tensor_cores.h"],
     deps = [
         ":ir_emission_utils",
-        "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:window_util",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/service:hlo_creation_utils",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
         "//tensorflow/compiler/xla/service:hlo_pass",
-        "//tensorflow/compiler/xla/service:shape_inference",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/gpu/backend_configs.proto b/tensorflow/compiler/xla/service/gpu/backend_configs.proto
index 640c6392b8..78e14d860e 100644
--- a/tensorflow/compiler/xla/service/gpu/backend_configs.proto
+++ b/tensorflow/compiler/xla/service/gpu/backend_configs.proto
@@ -24,4 +24,18 @@ message CudnnConvBackendConfig {
   // true, cudnn may choose not to use tensor cores, e.g. because the GPU or
   // selected algorithm doesn't support it.
   bool tensor_ops_enabled = 2;
+
+  // The scaling factor multiplied with the convolution result.
+  double conv_result_scale = 4;
+
+  // Below are the fields related to cuDNN's fused convolution. Refer to
+  // CudnnConvParams for their meanings.
+
+  // The requested activation (e.g. relu) after the convolution. It is with type
+  // stream_executor::dnn::ActivationMode.
+  int64 activation_mode = 3;
+
+  // The scaling factor multiplied with the side input. If no side input buffer
+  // is provided, this field must be 0.
+  double side_input_scale = 5;
 }
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index 391456576f..7125673887 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -89,6 +89,7 @@ std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind,
       succ = stream_exec->GetConvolveBackwardDataAlgorithms(true, &algorithms);
       break;
     case CudnnConvKind::kForward:
+    case CudnnConvKind::kForwardActivation:
       succ = stream_exec->GetConvolveAlgorithms(true, &algorithms);
       break;
   }
@@ -363,8 +364,8 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
   backend_config.set_tensor_ops_enabled(tensor_ops_enabled);
 
   HloInstruction* new_call = computation->AddInstruction(
-      instr->CloneWithNewOperands(new_call_shape, {instr->mutable_operand(0),
-                                                   instr->mutable_operand(1)}));
+      instr->CloneWithNewOperands(new_call_shape, instr->operands()));
+
   TF_RETURN_IF_ERROR(new_call->set_backend_config(backend_config));
 
   // Repackage new_call so it has the same shape as the original call, namely
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
index 2834d47412..ef29237301 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
@@ -21,6 +21,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -476,6 +477,12 @@ MatchBackwardInput(HloInstruction* conv) {
   return std::make_tuple(true, new_window, dnums, rhs);
 }
 
+CudnnConvBackendConfig GetDefaultBackendConfig() {
+  CudnnConvBackendConfig config;
+  config.set_conv_result_scale(1);
+  return config;
+}
+
 // Tries to rewrite a single convolution into a call to cudnn.
 StatusOr<bool> RunOnInstruction(HloInstruction* conv) {
   CHECK_EQ(conv->opcode(), HloOpcode::kConvolution);
@@ -515,6 +522,9 @@ StatusOr<bool> RunOnInstruction(HloInstruction* conv) {
     return false;
   }
 
+  TF_RETURN_IF_ERROR(
+      custom_call->set_backend_config(GetDefaultBackendConfig()));
+
   // The CustomCall returns a tuple (conv_result, scratch_memory).  Extract out
   // the conv result and replace `conv` with it.
   TF_RETURN_IF_ERROR(conv->parent()->ReplaceWithNewInstruction(
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
index 32d67084b3..89dd1bb272 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
@@ -40,6 +40,25 @@ using se::dnn::FilterLayout;
 using se::dnn::ProfileResult;
 
 struct CudnnConvParams {
+  // Here are the fields related to cuDNN's fused convolution. The result thus
+  // is defined as:
+  //   activation(conv_result_scale * conv(x, w) +
+  //       side_input_scale * side_input + broadcast(bias))
+  //
+  // The most common fused conv is conv forward + relu/identity, for example.
+  //
+  // bias_buf is a single-dimensional array, with the length equal to the number
+  // of output features. It'll be broadcasted to the output shape in order to be
+  // added to the final results.
+  //
+  // side_input_buf, if valid, must have the same shape as the output buffer.
+  struct FusionParams {
+    se::dnn::ActivationMode mode;
+    double side_input_scale;
+    se::DeviceMemoryBase bias_buf;
+    se::DeviceMemoryBase side_input_buf;  // nullable
+  };
+
   CudnnConvKind kind;
   const Shape* input_shape;
   const Shape* filter_shape;
@@ -51,6 +70,9 @@ struct CudnnConvParams {
   const ConvolutionDimensionNumbers* dnums;
   int64 feature_group_count;
   se::dnn::AlgorithmConfig algorithm;
+  double conv_result_scale;
+
+  absl::optional<FusionParams> fusion;
 };
 
 // A StreamExecutor ScratchAllocator that wraps a single XLA allocation,
@@ -202,23 +224,73 @@ Status RunCudnnConvolutionImpl(CudnnConvParams params,
 
   switch (kind) {
     case CudnnConvKind::kForward:
+      if (params.conv_result_scale != 1) {
+        return InternalError(
+            "StreamExecutor doesn't support scaled convolution: %lf.",
+            params.conv_result_scale);
+      }
       stream->ThenConvolveWithAlgorithm(
           input_descriptor, input_buf, filter_descriptor, filter_buf,
           convolution_descriptor, output_descriptor, &output_buf,
           scratch_allocator, algorithm, profile_result);
       break;
     case CudnnConvKind::kBackwardInput:
+      if (params.conv_result_scale != 1) {
+        return InternalError(
+            "StreamExecutor doesn't support scaled convolution: %lf.",
+            params.conv_result_scale);
+      }
       stream->ThenConvolveBackwardDataWithAlgorithm(
           filter_descriptor, filter_buf, output_descriptor, output_buf,
           convolution_descriptor, input_descriptor, &input_buf,
           scratch_allocator, algorithm, profile_result);
       break;
     case CudnnConvKind::kBackwardFilter:
+      if (params.conv_result_scale != 1) {
+        return InternalError(
+            "StreamExecutor doesn't support scaled convolution: %lf.",
+            params.conv_result_scale);
+      }
       stream->ThenConvolveBackwardFilterWithAlgorithm(
           input_descriptor, input_buf, output_descriptor, output_buf,
           convolution_descriptor, filter_descriptor, &filter_buf,
           scratch_allocator, algorithm, profile_result);
       break;
+    case CudnnConvKind::kForwardActivation: {
+      BatchDescriptor bias_desc;
+      bias_desc.set_count(1)
+          .set_height(1)
+          .set_width(1)
+          .set_feature_map_count(
+              output_shape.dimensions(dnums.output_feature_dimension()))
+          .set_layout(output_dl);
+
+      se::DeviceMemory<T> side_input(params.fusion->side_input_buf);
+      // If there is no side input, use output as the side input.
+      if (side_input.is_null()) {
+        if (params.fusion->side_input_scale != 0) {
+          return InternalError(
+              "Side input scale is not 0, yet no side input buffer is "
+              "provided");
+        }
+        // Since side-input scale is 0, the values in the side input don't
+        // matter.  The simplest thing to do would be to pass in a null buffer
+        // for the side input, but cudnn doesn't allow this.  cudnn does promise
+        // that if side-input-scale is 0 the side input won't be read, so we
+        // just pass in the output buffer, since it's handy and has the correct
+        // size.
+        side_input = output_buf;
+      }
+
+      stream->ThenFusedConvolveWithAlgorithm(
+          input_descriptor, input_buf, params.conv_result_scale,
+          filter_descriptor, filter_buf, convolution_descriptor, side_input,
+          params.fusion->side_input_scale, bias_desc,
+          DeviceMemory<T>(params.fusion->bias_buf), params.fusion->mode,
+          output_descriptor, &output_buf, scratch_allocator, algorithm,
+          profile_result);
+      break;
+    }
   }
 
   if (!stream->ok()) {
@@ -250,6 +322,7 @@ StatusOr<CudnnConvParams> GetCudnnConvParams(
   params.feature_group_count = conv->feature_group_count();
   params.algorithm = se::dnn::AlgorithmConfig(se::dnn::AlgorithmDesc(
       backend_config.algorithm(), backend_config.tensor_ops_enabled()));
+  params.conv_result_scale = backend_config.conv_result_scale();
 
   if (target == kCudnnConvForwardCallTarget) {
     params.kind = CudnnConvKind::kForward;
@@ -275,6 +348,29 @@ StatusOr<CudnnConvParams> GetCudnnConvParams(
     params.input_buf = operand_buffers[0];
     params.filter_buf = result_buffer;
     params.output_buf = operand_buffers[1];
+  } else if (target == kCudnnConvBiasActivationForwardCallTarget) {
+    params.kind = CudnnConvKind::kForwardActivation;
+    params.input_shape = &lhs_shape;
+    params.filter_shape = &rhs_shape;
+    params.output_shape = &conv_result_shape;
+    params.fusion.emplace();
+    auto& fusion = *params.fusion;
+    if (backend_config.activation_mode() <
+        static_cast<int64>(se::dnn::ActivationMode::kNumActivationModes)) {
+      fusion.mode = static_cast<se::dnn::ActivationMode>(
+          backend_config.activation_mode());
+    } else {
+      return InternalError("Bad activation mode: %s",
+                           backend_config.ShortDebugString());
+    }
+    fusion.side_input_scale = backend_config.side_input_scale();
+    params.input_buf = operand_buffers[0];
+    params.filter_buf = operand_buffers[1];
+    params.output_buf = result_buffer;
+    params.fusion->bias_buf = operand_buffers[2];
+    if (operand_buffers.size() >= 4) {
+      params.fusion->side_input_buf = operand_buffers[3];
+    }
   } else {
     return InternalError("Unexpected custom call target: %s", target);
   }
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 06314e413e..74352f26aa 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -104,6 +104,7 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall(
   TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(instr));
   switch (kind) {
     case CudnnConvKind::kForward:
+    case CudnnConvKind::kForwardActivation:
       input_shape = &lhs_shape;
       filter_shape = &rhs_shape;
       output_shape = &result_shape;
@@ -153,6 +154,20 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall(
   TF_RETURN_IF_ERROR(constraints->SetOperandLayout(rhs_shape, instr, 1));
   TF_RETURN_IF_ERROR(
       constraints->SetBufferLayout(result_shape.layout(), *call_result_buf));
+  // instr->operand(2), if exists, is the bias buffer. There is no need to
+  // assign layout to it, as it has only one dimension.
+
+  // instr->opernad(3), if exists, is the side input buffer.
+  if (instr->operand_count() == 4) {
+    if (kind != CudnnConvKind::kForwardActivation) {
+      return InternalError(
+          "Invalid convolution. Conv has a side input, but kind is not fused "
+          "conv forward: %s",
+          instr->ToString());
+    }
+    // The side input layout must match the output layout.
+    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(*output_shape, instr, 3));
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index 76757faf60..ec3d8f9405 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -129,6 +129,8 @@ const char* const kCudnnConvBackwardInputCallTarget =
     "__cudnn$convBackwardInput";
 const char* const kCudnnConvBackwardFilterCallTarget =
     "__cudnn$convBackwardFilter";
+const char* const kCudnnConvBiasActivationForwardCallTarget =
+    "__cudnn$convBiasActivationForward";
 
 bool IsCustomCallToDnnConvolution(const HloInstruction& hlo) {
   if (hlo.opcode() != HloOpcode::kCustomCall) {
@@ -137,7 +139,8 @@ bool IsCustomCallToDnnConvolution(const HloInstruction& hlo) {
   const auto& target = hlo.custom_call_target();
   return target == kCudnnConvForwardCallTarget ||
          target == kCudnnConvBackwardInputCallTarget ||
-         target == kCudnnConvBackwardFilterCallTarget;
+         target == kCudnnConvBackwardFilterCallTarget ||
+         target == kCudnnConvBiasActivationForwardCallTarget;
 }
 
 bool ImplementedAsLibraryCall(const HloInstruction& hlo) {
@@ -247,6 +250,9 @@ StatusOr<CudnnConvKind> GetCudnnConvKind(
   if (target == kCudnnConvBackwardFilterCallTarget) {
     return CudnnConvKind::kBackwardFilter;
   }
+  if (target == kCudnnConvBiasActivationForwardCallTarget) {
+    return CudnnConvKind::kForwardActivation;
+  }
   return InternalError("Unexpected call target: %s", target);
 }
 
@@ -258,6 +264,8 @@ string CudnnConvKindToString(CudnnConvKind kind) {
       return "backward_filter";
     case CudnnConvKind::kBackwardInput:
       return "backward_input";
+    case CudnnConvKind::kForwardActivation:
+      return "forward with activation";
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index 744346abf3..a64a616ab1 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -44,9 +44,11 @@ namespace gpu {
 // "connectivity" (i.e. which elements of the input affect which elements of
 // the output) are concerned.
 enum class CudnnConvKind {
-  kForward,         // input  + filter => output
-  kBackwardInput,   // filter + output => input
-  kBackwardFilter,  // input  + output => filter
+  kForward,            // input  + filter => output
+  kBackwardInput,      // filter + output => input
+  kBackwardFilter,     // input  + output => filter
+  kForwardActivation,  // activation(conv(input, filter) + broadcast(bias) +
+                       // (optionally) side_input) => output
 };
 
 StatusOr<CudnnConvKind> GetCudnnConvKind(const HloCustomCallInstruction* instr);
@@ -119,6 +121,7 @@ bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo);
 extern const char* const kCudnnConvForwardCallTarget;
 extern const char* const kCudnnConvBackwardInputCallTarget;
 extern const char* const kCudnnConvBackwardFilterCallTarget;
+extern const char* const kCudnnConvBiasActivationForwardCallTarget;
 
 // Returns true if `hlo` will be implemented as a call to a cuDNN convolution
 // routine.
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
index b0061fa655..2d270f630b 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/window_util.h"
 
@@ -209,7 +210,11 @@ static std::vector<HloInstruction*> GetRelevantConvs(HloComputation* comp) {
   std::vector<HloInstruction*> convs;
   for (HloInstruction* instr : comp->instructions()) {
     if (IsCustomCallToDnnConvolution(*instr) &&
-        instr->operand(0)->shape().element_type() == F16) {
+        instr->operand(0)->shape().element_type() == F16 &&
+        // TODO(timshen): Disable for fused conv for now. Implement it if it's
+        // needed.
+        Cast<HloCustomCallInstruction>(instr)->custom_call_target() !=
+            kCudnnConvBiasActivationForwardCallTarget) {
       convs.push_back(instr);
     }
   }
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index eead408f10..7e77dc9ac6 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -162,8 +162,12 @@ bool PadInsertion::CanonicalizeForwardConvolution(HloInstruction* conv) {
   // The conv CustomCall returns a tuple (conv_result, scratch_buffer).  Extract
   // out the shape of conv_result.
   VLOG(1) << "Canonicalizing forward conv";
+  std::vector<HloInstruction*> operands(conv->operands().begin(),
+                                        conv->operands().end());
+  operands[0] = new_input;
+  operands[1] = new_kernel;
   auto new_conv = conv->parent()->AddInstruction(
-      conv->CloneWithNewOperands(conv->shape(), {new_input, new_kernel}));
+      conv->CloneWithNewOperands(conv->shape(), operands));
   new_conv->set_window(new_conv_window);
   VLOG(1) << "Replacing:\n  " << conv->ToString() << "\nwith:\n  "
           << new_conv->ToString();
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 9abfa1db6a..621b155240 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -873,7 +873,7 @@ class NormalizeDescriptor {
 
 // Describes a kind of non-linearity (threshold-like mathematical function).
 enum class ActivationMode {
-  kNone,
+  kNone = 0,
   kSigmoid,
   // Rectified linear activation: f(x) = x < 0 ? 0 : x
   kRelu,
@@ -885,6 +885,8 @@ enum class ActivationMode {
   kTanh,
   // Like ReluX, but passes all values in the range [-X,X].
   kBandPass,
+
+  kNumActivationModes,  // Always in the end.
 };
 
 // Returns a string representation of the given activation mode.
-- 
GitLab


From 2e317cf8825994c2c1d77aad8be98f41a6b109d7 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 24 Sep 2018 18:16:57 -0700
Subject: [PATCH 0635/1357] Automated rollback of commit
 7a1096f424b1adcb4152db80a01a163ddb1a0173

PiperOrigin-RevId: 214366272
---
 tensorflow/core/BUILD | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index c8f7fc8cea..59b7dd04e9 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -588,7 +588,6 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:other",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/platform/default/build_config:port",
-        "@com_google_absl//absl/flags:flag",
     ],
 )
 
@@ -2156,7 +2155,6 @@ cc_library(
         ":lib_proto_parsing",
         ":abi",
         ":core_stringpiece",
-        "@com_google_absl//absl/flags:flag",
         "//third_party/eigen3",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
-- 
GitLab


From d5c5f8ecc124ee9a866318f2bd7082df9e38ebf2 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 24 Sep 2018 18:25:31 -0700
Subject: [PATCH 0636/1357] [XLA] Use \n rather than <br/> for linebreaks in
 graphviz tooltips.

<br/> doesn't work in this context, but \n does.

PiperOrigin-RevId: 214367139
---
 tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 287ba84b3b..13a74fd8a1 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1110,7 +1110,7 @@ string HloDotDumper::GetInstructionNodeMetadata(const HloInstruction* instr) {
                               instr->metadata().source_line()));
   }
 
-  return StrJoin(lines, "<br/>");
+  return StrJoin(lines, "\n");
 }
 
 string HloDotDumper::GetInstructionNodeBackendConfig(
-- 
GitLab


From ec2cc9122cca5fdec52d6c1ec42b771b8082d298 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 24 Sep 2018 18:52:45 -0700
Subject: [PATCH 0637/1357] Ensure tf.range has semantics consistent with
 range, which allows start and end indices that would result in an empty
 range. tf.range errors out at graph construction time in that case.

PiperOrigin-RevId: 214369488
---
 tensorflow/python/autograph/operators/py_builtins.py      | 7 +++++++
 tensorflow/python/autograph/operators/py_builtins_test.py | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py
index 1d37ae72d3..91a2a22cc2 100644
--- a/tensorflow/python/autograph/operators/py_builtins.py
+++ b/tensorflow/python/autograph/operators/py_builtins.py
@@ -193,11 +193,18 @@ def range_(start_or_stop, stop=UNDEFINED, step=UNDEFINED):
 
 
 def _tf_range(start_or_stop, stop, step):
+  # Note: for static inputs (e.g. constants), tf.range errors out at graph
+  # construction time, instead of returning an empty tensor. Preventing the
+  # graph construction error aligns the semantics with Python.
+
   # TODO(mdan): We should optimize this when a full tensor is not required.
   if step is not UNDEFINED:
+    # TODO(mdan): Add argument coercion similar to other cases.
     return math_ops.range(start_or_stop, stop, step)
   if stop is not UNDEFINED:
+    stop = math_ops.maximum(start_or_stop, stop)
     return math_ops.range(start_or_stop, stop)
+  start_or_stop = math_ops.maximum(start_or_stop, 0)
   return math_ops.range(start_or_stop)
 
 
diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py
index d64d31cc79..c94a918d5a 100644
--- a/tensorflow/python/autograph/operators/py_builtins_test.py
+++ b/tensorflow/python/autograph/operators/py_builtins_test.py
@@ -126,6 +126,13 @@ class PyBuiltinsTest(test.TestCase):
       r = py_builtins.range_(2, 0, constant_op.constant(-1))
       self.assertAllEqual(sess.run(r), [2, 1])
 
+  def test_range_tensor_empty_range(self):
+    with self.test_session() as sess:
+      r = py_builtins.range_(constant_op.constant(-3))
+      self.assertAllEqual(sess.run(r), [])
+      r = py_builtins.range_(5, constant_op.constant(2))
+      self.assertAllEqual(sess.run(r), [])
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From f0886f7269de900d226455d4831722f6fc94a71b Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Tue, 25 Sep 2018 09:59:17 +0800
Subject: [PATCH 0638/1357] Fix build dependencies in tensorflow/cc/BUILD.

---
 tensorflow/cc/BUILD                            | 1 +
 tensorflow/python/kernel_tests/relu_op_test.py | 4 ++--
 tensorflow/python/ops/nn_ops.py                | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index f56521dac0..e99d15f85d 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -410,6 +410,7 @@ tf_cc_test(
     srcs = ["gradients/nn_grad_test.cc"],
     deps = [
         ":cc_ops",
+        ":cc_ops_internal",
         ":grad_op_registry",
         ":grad_testutil",
         ":gradient_checker",
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 86d9c90e83..d97a1613b9 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -351,7 +351,7 @@ class LeakyReluTest(test.TestCase):
     self.assertLess(err, 1e-10)
 
   def testGradGradFloat32(self):
-    with compat.forward_compatibility_horizon(2018, 10, 2):
+    with compat.forward_compatibility_horizon(2018, 11, 2):
       with self.test_session():
         x = constant_op.constant(
             [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
@@ -369,7 +369,7 @@ class LeakyReluTest(test.TestCase):
       self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with compat.forward_compatibility_horizon(2018, 10, 2):
+    with compat.forward_compatibility_horizon(2018, 11, 2):
       with self.test_session():
         x = constant_op.constant(
             [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index d646245ce3..2861f40586 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1601,7 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None):
     features = ops.convert_to_tensor(features, name="features")
     if features.dtype.is_integer:
       features = math_ops.to_float(features)
-    if compat.forward_compatible(2018, 10, 1):
+    if compat.forward_compatible(2018, 11, 1):
       return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
     alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha")
     return math_ops.maximum(alpha * features, features, name=name)
-- 
GitLab


From dee007d9bab96fcbf7673cb7ed3d5235b122f12a Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Mon, 24 Sep 2018 19:00:02 -0700
Subject: [PATCH 0639/1357] Allow callers to specify a preferred dtype when
 calling convert_to_tensor.

PiperOrigin-RevId: 214370113
---
 tensorflow/python/ops/distributions/util.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index c61efebca0..ad848dfee6 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -155,7 +155,8 @@ def get_logits_and_probs(logits=None,
                          probs=None,
                          multidimensional=False,
                          validate_args=False,
-                         name="get_logits_and_probs"):
+                         name="get_logits_and_probs",
+                         dtype=None):
   """Converts logit to probabilities (or vice-versa), and returns both.
 
   Args:
@@ -169,6 +170,7 @@ def get_logits_and_probs(logits=None,
       `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension
       of `probs` sums to one.
     name: A name for this operation (optional).
+    dtype: `tf.DType` to prefer when converting args to `Tensor`s.
 
   Returns:
     logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or
@@ -183,7 +185,7 @@ def get_logits_and_probs(logits=None,
       raise ValueError("Must pass probs or logits, but not both.")
 
     if probs is None:
-      logits = ops.convert_to_tensor(logits, name="logits")
+      logits = ops.convert_to_tensor(logits, name="logits", dtype=dtype)
       if not logits.dtype.is_floating:
         raise TypeError("logits must having floating type.")
       # We can early return since we constructed probs and therefore know
@@ -194,7 +196,7 @@ def get_logits_and_probs(logits=None,
         return logits, nn.softmax(logits, name="probs")
       return logits, math_ops.sigmoid(logits, name="probs")
 
-    probs = ops.convert_to_tensor(probs, name="probs")
+    probs = ops.convert_to_tensor(probs, name="probs", dtype=dtype)
     if not probs.dtype.is_floating:
       raise TypeError("probs must having floating type.")
 
-- 
GitLab


From 46bbba88be2ce8c4470c04c6f08171c2b7b857ca Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Tue, 25 Sep 2018 10:16:01 +0800
Subject: [PATCH 0640/1357] Some minor changes.

---
 tensorflow/core/kernels/mkl_slice_op.cc | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index 20c4921390..109fe59ed5 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -77,8 +77,8 @@ static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
   GetMklShape(context, kInputSizeIndex, &size_mkl_shape);
 
   // Begin and size tensors cannot be in MklDnn layout.
-  CHECK_EQ(begin_mkl_shape.IsMklTensor(), false);
-  CHECK_EQ(size_mkl_shape.IsMklTensor(), false);
+  DCHECK_EQ(begin_mkl_shape.IsMklTensor(), false);
+  DCHECK_EQ(size_mkl_shape.IsMklTensor(), false);
 
   TensorShape input_tf_shape = input_mkl_shape.IsMklTensor()
                                    ? input_mkl_shape.GetTfShape()
@@ -92,9 +92,8 @@ static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
                    size_tensor.NumElements() == input_dims,
       errors::InvalidArgument(
           "Expected begin and size arguments to be 1-D tensors of size ",
-          input_dims, ", but got shapes ",
-          begin_tensor.shape().DebugString(), " and ",
-          size_tensor.shape().DebugString(), " instead."));
+          input_dims, ", but got shapes ", begin_tensor.shape().DebugString(),
+          " and ", size_tensor.shape().DebugString(), " instead."));
 
   *begin = IntTensorToInt64Vec(begin_tensor);
   *size = IntTensorToInt64Vec(size_tensor);
@@ -173,7 +172,8 @@ class MklDnnSliceOp : public OpKernel {
     CheckCommonCasesForMklInputs<T>(context, &begin, &size, &done);
     if (!context->status().ok() || done == true) return;
 
-    // Though MKL-DNN supports more than 8 dimension and less than 12 dimension tensor.
+    // Though MKL-DNN supports more than 8 dimension and
+    // less than 12 dimension tensor.
     // But we are mimicking functionality of Eigen Slice op for CPU.
     if (begin.size() >= 8) {
       OP_REQUIRES(
@@ -256,13 +256,14 @@ class MklDnnSliceOp : public OpKernel {
         auto input_md = input_mkl_shape.GetMklLayout();
         src.SetUsrMem(input_md, &input_tensor);
       } else {
-        // Initialize input dimensions and strides to be used when input is not in
-        // MklDnn layout.
+        // Initialize input dimensions and strides to be used when input is not
+        // in MklDnn layout.
         memory::dims input_dims, input_strides;
         input_dims = TFShapeToMklDnnDims(input_tensor.shape());
         input_strides = CalculateTFStrides(input_dims);
         // Create input memory descriptor.
-        auto input_md = MklDnnData<T>::CreateBlockedMemDesc(input_dims, input_strides);
+        auto input_md =
+            MklDnnData<T>::CreateBlockedMemDesc(input_dims, input_strides);
         src.SetUsrMem(input_md, &input_tensor);
       }
 
@@ -281,7 +282,7 @@ class MklDnnSliceOp : public OpKernel {
       AllocateOutputTensor(context, input_mkl_shape, &output_pd, size_dims,
                            &output_tensor, &output_mkl_shape);
       CHECK_NOTNULL(output_tensor);
-      CHECK_EQ(input_mkl_shape.IsMklTensor(), output_mkl_shape.IsMklTensor());
+      DCHECK_EQ(input_mkl_shape.IsMklTensor(), output_mkl_shape.IsMklTensor());
       output.SetUsrMem(output_md, output_tensor);
 
       std::vector<primitive> net;
-- 
GitLab


From e78f5226b94e6bdb9c204351791198488d38d403 Mon Sep 17 00:00:00 2001
From: Nick Felt <nickfelt@google.com>
Date: Mon, 24 Sep 2018 19:16:26 -0700
Subject: [PATCH 0641/1357] Update tensorboard dependency to 1.11.x

PiperOrigin-RevId: 214371640
---
 tensorflow/tools/pip_package/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index d40ffb8cd0..1481b53920 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -57,7 +57,7 @@ REQUIRED_PACKAGES = [
     'six >= 1.10.0',
     'protobuf >= 3.6.0',
     'setuptools <= 39.1.0',
-    'tensorboard >= 1.10.0, < 1.11.0',
+    'tensorboard >= 1.11.0, < 1.12.0',
     'termcolor >= 1.1.0',
 ]
 
-- 
GitLab


From 720594142a51c2676d086ba00705d95002474687 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 24 Sep 2018 19:19:06 -0700
Subject: [PATCH 0642/1357] Internal change.

PiperOrigin-RevId: 214371906
---
 tensorflow/tools/docker/parameterized_docker_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 448a3a7647..570aa8278c 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -244,7 +244,7 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
 
     if [[ "${TF_DOCKER_BUILD_TYPE}" == "gpu" ]]; then
       export TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\
-  "${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2"
+  "${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0"
     fi
 
     pushd "${SCRIPT_DIR}/../../../"
-- 
GitLab


From bb1c131aad55e336d25fd297ecd8582773d6476f Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 24 Sep 2018 19:26:46 -0700
Subject: [PATCH 0643/1357] Fix memory leak of a Var resource in the multiple
 variable-handling kernels.

This change fixes memory leaks in the ScatterNdUpdateOp and StridedSliceAssign kernels, and in training-op kernels that use `GetTrainingVariableMutex()`.

PiperOrigin-RevId: 214372346
---
 tensorflow/core/kernels/scatter_nd_op.cc       | 1 +
 tensorflow/core/kernels/strided_slice_op.cc    | 1 +
 tensorflow/core/kernels/training_op_helpers.cc | 1 +
 3 files changed, 3 insertions(+)

diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index e0194605ce..2f8aede427 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -145,6 +145,7 @@ class ScatterNdUpdateOp : public OpKernel {
     if (dtype_ == DT_RESOURCE) {
       Var* v;
       OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
+      core::ScopedUnref scoped_unref(v);
       mutex_lock m(*v->mu());
       DoCompute(c);
     } else if (use_exclusive_lock_) {
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 7b537fef5b..f0575de4d9 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -306,6 +306,7 @@ class StridedSliceAssignOp : public OpKernel {
       Var* v;
       OP_REQUIRES_OK(context,
                      LookupResource(context, HandleFromInput(context, 0), &v));
+      core::ScopedUnref scoped_unref(v);
       mutex_lock ml(*v->mu());
       OP_REQUIRES_OK(context,
                      PrepareToUpdateVariable<Device, T>(context, v->tensor()));
diff --git a/tensorflow/core/kernels/training_op_helpers.cc b/tensorflow/core/kernels/training_op_helpers.cc
index d3c4f62071..83b83fcdb9 100644
--- a/tensorflow/core/kernels/training_op_helpers.cc
+++ b/tensorflow/core/kernels/training_op_helpers.cc
@@ -21,6 +21,7 @@ mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input) {
   if (ctx->input_dtype(input) == DT_RESOURCE) {
     Var* var;
     if (LookupResource(ctx, HandleFromInput(ctx, input), &var).ok()) {
+      core::ScopedUnref scoped_unref(var);
       return var->mu();
     } else {
       ctx->CtxFailureWithWarning(
-- 
GitLab


From e9cdf9f412a3aea324a4a1655d3bffb87abaff0d Mon Sep 17 00:00:00 2001
From: Youlong Cheng <ylc@google.com>
Date: Mon, 24 Sep 2018 19:47:26 -0700
Subject: [PATCH 0644/1357] [TF:XLA] Introduce CollectivePermute op.

PiperOrigin-RevId: 214373714
---
 .../contrib/tpu/ops/cross_replica_ops.cc      | 20 ++++++++++++++
 tensorflow/contrib/tpu/python/ops/tpu_ops.py  | 27 ++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
index ea8e0e00ed..87e3a5946c 100644
--- a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
+++ b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
@@ -125,4 +125,24 @@ output: The sum of all the distributed inputs.
 T: The type of elements to be summed.
 )doc");
 
+REGISTER_OP("CollectivePermute")
+    .Input("input: T")
+    .Input("source_target_pairs: int32")
+    .Output("output: T")
+    .Attr("T: numbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+An Op to permute tensors across replicated TPU instances. Each instance
+supplies its own input.
+
+For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
+source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs:
+`[D, A, B, C]`.
+
+input: The local input to be permuted. Currently only supports float and
+  bfloat16.
+source_target_pairs: A tensor with shape [num_pairs, 2].
+output: The permuted input.
+T: The type of elements to be exchanged.
+)doc");
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index d92a0652bb..a1aee69691 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
@@ -95,7 +95,7 @@ if platform.system() != "Windows":
     ]
 
   def cross_replica_sum(x, group_assignment=None, name=None):
-    """Sum the input tensor accorss replicas according to group_assignment.
+    """Sum the input tensor across replicas according to group_assignment.
 
     Args:
       x: The local tensor to the sum.
@@ -112,6 +112,31 @@ if platform.system() != "Windows":
 
     return gen_tpu_ops.cross_replica_sum(x, group_assignment, name=name)
 
+  def collective_permute(x, source_target_pairs, name=None):
+    """Permute the input tensor across replicas given source_target_pairs.
+
+    For each source_target_pair <a, b>, we send replica a's input to replica b.
+    Each replica id must only appear once in the source column. Also it must
+    only appear once in the target column.
+    For the replica id not in the target column, this op returns a zero tensor
+    with the same shape and dtype of the input x.
+
+    For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
+    source_target_pairs=`[[0,1],[1,2],[2,3]]` gets the outputs:
+    `[0, A, B, C]`.
+
+    Args:
+      x: The local tensor to be permuted.
+      source_target_pairs: 2d int lists with shape [num_pairs, 2].
+        source_target_pairs[i][0] represents the source replica id and
+        source_target_pairs[i][1] represents the target replica id.
+      name: Optional op name.
+
+    Returns:
+      A `Tensor` which is permuted.
+    """
+    return gen_tpu_ops.collective_permute(x, source_target_pairs, name=name)
+
   @ops.RegisterGradient("CrossReplicaSum")
   def _cross_replica_sum_grad(op, grad):
     # The gradient of a cross replica sum is also a cross-replica sum.
-- 
GitLab


From d1ab8b71c2115caacfec19d849ddabf7f1f4287b Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Tue, 25 Sep 2018 11:15:06 +0800
Subject: [PATCH 0645/1357] Some changes for CHECK.

---
 tensorflow/core/kernels/mkl_slice_op.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index 109fe59ed5..d63e14adf6 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -48,7 +48,8 @@ gtl::InlinedVector<int64, 4> IntTensorToInt64Vec(const Tensor& tensor) {
       out.push_back(tensor.flat<int64>()(i));
     }
   } else {
-    LOG(FATAL) << "tensor must be either int32 or int64";
+    // tensor must be either int32 or int64
+    DCHECK(false);
   }
   return out;
 }
@@ -281,7 +282,7 @@ class MklDnnSliceOp : public OpKernel {
       // layout.
       AllocateOutputTensor(context, input_mkl_shape, &output_pd, size_dims,
                            &output_tensor, &output_mkl_shape);
-      CHECK_NOTNULL(output_tensor);
+      DCHECK(output_tensor);
       DCHECK_EQ(input_mkl_shape.IsMklTensor(), output_mkl_shape.IsMklTensor());
       output.SetUsrMem(output_md, output_tensor);
 
@@ -310,8 +311,8 @@ class MklDnnSliceOp : public OpKernel {
                             const memory::dims& output_dims,
                             Tensor** output_tensor,
                             MklDnnShape* output_mkl_shape) {
-    CHECK_NOTNULL(output_tensor);
-    CHECK_NOTNULL(output_mkl_shape);
+    DCHECK(output_tensor);
+    DCHECK(output_mkl_shape);
 
     TensorShape output_tf_shape;
 
-- 
GitLab


From 4dc77744ff6a6854cf4aa2934eb4501bc22c3465 Mon Sep 17 00:00:00 2001
From: Debidatta Dwibedi <debidatta@google.com>
Date: Mon, 24 Sep 2018 20:22:15 -0700
Subject: [PATCH 0646/1357] Documentation for tf.map_fn in Eager mode.

PiperOrigin-RevId: 214376416
---
 tensorflow/python/ops/functional_ops.py       | 40 +++++++++++++++++--
 .../tools/api/golden/v1/tensorflow.pbtxt      |  2 +-
 .../tools/api/golden/v2/tensorflow.pbtxt      |  2 +-
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index a4e7c84ae4..119d9522bd 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -41,6 +41,7 @@ from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops.gen_functional_ops import remote_call
 # pylint: enable=unused-import
 from tensorflow.python.ops.gen_functional_ops import symbolic_gradient
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
@@ -263,7 +264,7 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
 
 
 @tf_export("map_fn")
-def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
+def map_fn(fn, elems, dtype=None, parallel_iterations=None, back_prop=True,
            swap_memory=False, infer_shape=True, name=None):
   """map on the list of tensors unpacked from `elems` on dimension 0.
 
@@ -305,6 +306,25 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
 
   instead.
 
+  When executing eagerly, map_fn does not execute in parallel even if
+  `parallel_iterations` is set to a value > 1. You can still get the
+  performance benefits of running a function in parallel by using the
+  `tf.contrib.eager.defun` decorator,
+
+  ```python
+  # Assume the function being used in map_fn is fn.
+  # To ensure map_fn calls fn in parallel, use the defun decorator.
+  @tf.contrib.eager.defun
+  def func(tensor):
+    return tf.map_fn(fn, tensor)
+  ```
+
+  Note that if you use the defun decorator, any non-TensorFlow Python code
+  that you may have written in your function won't get executed. See
+  `tf.contrib.eager.defun` for more details. The recommendation would be to
+  debug without defun but switch to defun to get performance benefits of
+  running map_fn in parallel.
+
   Args:
     fn: The callable to be performed.  It accepts one argument, which will
       have the same (possibly nested) structure as `elems`.  Its output
@@ -317,7 +337,8 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
       of Tensors differing from the structure of `elems`, then `dtype` is not
       optional and must have the same structure as the output of `fn`.
     parallel_iterations: (optional) The number of iterations allowed to run
-      in parallel.
+      in parallel. When graph building, the default value is 10. While executing
+      eagerly, the default value is set to 1.
     back_prop: (optional) True enables support for back propagation.
     swap_memory: (optional) True enables GPU-CPU memory swapping.
     infer_shape: (optional) False disables tests for consistent output shapes.
@@ -363,6 +384,20 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
         " SparseTensor(input.indices, map_fn(fn, input.values), "
         "input.dense_shape)")
 
+  in_graph_mode = not context.executing_eagerly()
+  # Set the default number of parallel_iterations depending on graph/eager mode.
+  if in_graph_mode and not parallel_iterations:
+    parallel_iterations = 10
+  elif not in_graph_mode and not parallel_iterations:
+    parallel_iterations = 1
+
+  if not in_graph_mode and parallel_iterations > 1:
+    logging.log_first_n(logging.WARN, "Setting parallel_iterations > 1 has no "
+                        "effect when executing eagerly. Consider calling map_fn"
+                        " with tf.contrib.eager.defun to execute fn in "
+                        "parallel.", 1)
+    parallel_iterations = 1
+
   input_is_sequence = nest.is_sequence(elems)
   input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]
   def input_pack(x):
@@ -381,7 +416,6 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
 
   elems_flat = input_flatten(elems)
 
-  in_graph_mode = not context.executing_eagerly()
   with ops.name_scope(name, "map", elems_flat):
     # TODO(akshayka): Remove the in_graph_mode check once caching devices are
     # supported in Eager
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 18fc5836dc..503e145a91 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1434,7 +1434,7 @@ tf_module {
   }
   member_method {
     name: "map_fn"
-    argspec: "args=[\'fn\', \'elems\', \'dtype\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'infer_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'False\', \'True\', \'None\'], "
+    argspec: "args=[\'fn\', \'elems\', \'dtype\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'infer_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\', \'False\', \'True\', \'None\'], "
   }
   member_method {
     name: "matching_files"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 61448f887d..96212f5528 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1378,7 +1378,7 @@ tf_module {
   }
   member_method {
     name: "map_fn"
-    argspec: "args=[\'fn\', \'elems\', \'dtype\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'infer_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'False\', \'True\', \'None\'], "
+    argspec: "args=[\'fn\', \'elems\', \'dtype\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'infer_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\', \'False\', \'True\', \'None\'], "
   }
   member_method {
     name: "matching_files"
-- 
GitLab


From 6ba60e051409a5346c2aab21160c9c311de1cb03 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Mon, 24 Sep 2018 20:22:28 -0700
Subject: [PATCH 0647/1357] Add validation that input shapes should be fully
 defined when using TPU strategy with keras.

PiperOrigin-RevId: 214376435
---
 .../contrib/distribute/python/keras_test.py   | 23 +++++++++++++++++++
 .../contrib/distribute/python/tpu_strategy.py |  2 +-
 .../engine/distributed_training_utils.py      | 16 ++++++++++++-
 tensorflow/python/keras/engine/training.py    | 12 ++++++----
 .../keras/engine/training_distributed.py      |  2 --
 5 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 8165a70743..2e6cd43fd4 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -635,6 +635,29 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
                                    'expected input to have shape'):
         model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
 
+  @combinations.generate(combinations.combine(
+      distribution=[combinations.tpu_strategy_one_step],
+      mode=['graph']))
+  def test_dataset_input_shape_fully_defined(self, distribution):
+    with self.cached_session():
+      x = keras.layers.Input(shape=(3,), name='input')
+      y = keras.layers.Dense(4, name='dense')(x)
+      model = keras.Model(x, y)
+
+      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      inputs = np.zeros((10, 3), dtype=np.float32)
+      targets = np.zeros((10, 4), dtype=np.float32)
+      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+      # Input shapes are not fully known. Batch dimension is unknown as we are
+      # not using the drop_remainder argument.
+      dataset = dataset.repeat(100).batch(10)
+
+      with self.assertRaisesRegexp(ValueError, 'requires fully defined shapes'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
+
   def test_learning_phase_value(self):
     # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare
     # meaningful values. Currently we don't pass the learning phase if the
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index ba2cc2e806..a6762e5e87 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -158,7 +158,7 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       raise ValueError(
           'TPU currently requires fully defined shapes. Either use '
           'set_shape() on the input tensors or use '
-          'dataset.apply(map_and_batch(..., drop_remainder=True)).')
+          'dataset.batch(..., drop_remainder=True).')
     types = nest.flatten(iterator.output_types)
 
     enqueue_ops = [
diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py
index b28df75493..39341a931b 100644
--- a/tensorflow/python/keras/engine/distributed_training_utils.py
+++ b/tensorflow/python/keras/engine/distributed_training_utils.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.client import session as session_module
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend as K
@@ -293,12 +294,14 @@ def configure_and_create_session(distribution_strategy):
   K.set_session(session)
 
 
-def validate_inputs(x, y):
+def validate_inputs(x, y, distribution_strategy):
   """Validate inputs when using DistributionStrategy.
 
   Args:
     x: Model Inputs.
     y: Model Targets.
+    distribution_strategy: The DistributionStrategy with which the model is
+      compiled.
 
   Raises:
     ValueError: if input is not a Dataset or a numpy array.
@@ -319,6 +322,17 @@ def validate_inputs(x, y):
                      'Iterator. You must pass a Dataset object or a numpy '
                      'array as input.')
 
+  if distribution_strategy.__class__.__name__ == 'TPUStrategy':
+    for i in [x, y]:
+      if isinstance(i, dataset_ops.Dataset):
+        shapes = nest.flatten(i.output_shapes)
+        if any([not s.is_fully_defined() for s in shapes]):
+          raise ValueError(
+              'Using TPUs currently requires fully defined shapes. Either use '
+              'set_shape() on the input tensors or use '
+              'dataset.batch(..., drop_remainder=True).'
+              'Found unknown shape {} in input {}.'.format(s, i))
+
 
 def get_input_batch_params(first_x_value, batch_size, current_strategy):
   """Calculate the number of batches and steps/steps_per_epoch.
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 154c219dcc..ade8a4b32d 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -1521,7 +1521,8 @@ class Model(Network):
     if self._distribution_strategy:
       distributed_training_utils.validate_callbacks(callbacks)
 
-      distributed_training_utils.validate_inputs(x, y)
+      distributed_training_utils.validate_inputs(
+          x, y, self._distribution_strategy)
 
       first_x_value = nest.flatten(x)[0]
       if not steps_per_epoch and isinstance(first_x_value, np.ndarray):
@@ -1563,7 +1564,8 @@ class Model(Network):
 
       # Validate and standardize validation data.
       if self._distribution_strategy:
-        distributed_training_utils.validate_inputs(val_x, val_y)
+        distributed_training_utils.validate_inputs(
+            val_x, val_y, self._distribution_strategy)
         first_valx_value = nest.flatten(val_x)[0]
         if not validation_steps and isinstance(first_valx_value, np.ndarray):
           validation_steps = distributed_training_utils.get_input_batch_params(
@@ -1737,7 +1739,8 @@ class Model(Network):
 
     # Validate and standardize user data.
     if self._distribution_strategy:
-      distributed_training_utils.validate_inputs(x, y)
+      distributed_training_utils.validate_inputs(
+          x, y, self._distribution_strategy)
       first_x_value = nest.flatten(x)[0]
       if isinstance(first_x_value, np.ndarray) and not steps:
         steps = distributed_training_utils.get_input_batch_params(
@@ -1852,7 +1855,8 @@ class Model(Network):
       # `MirroredStrategy`.
       if hasattr(self._distribution_strategy, '_prefetch_on_device'):
         self._distribution_strategy._prefetch_on_device = False  # pylint: disable=protected-access
-      distributed_training_utils.validate_inputs(x, None)
+      distributed_training_utils.validate_inputs(
+          x, None, self._distribution_strategy)
       first_x_value = nest.flatten(x)[0]
       if isinstance(first_x_value, np.ndarray) and not steps:
         steps = distributed_training_utils.get_input_batch_params(
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 26c5ec4efc..8b434ca444 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -233,8 +233,6 @@ def _experimental_fit_loop(
   """
   current_strategy = model._distribution_strategy
 
-  # TODO(priyag): Add validation that shapes are fully defined for TPU case.
-
   K.get_session().run(current_strategy.initialize())
 
   def _per_device_train_function(model):
-- 
GitLab


From 626fef2af7d4bc49aeeef7ffd195dc30235bcd1e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 20:39:41 -0700
Subject: [PATCH 0648/1357] Update kernel evals to use new kernel signatures.

PiperOrigin-RevId: 214377809
---
 tensorflow/contrib/lite/kernels/conv.cc       | 141 ++++++++++++------
 .../contrib/lite/kernels/fully_connected.cc   |  66 +++++---
 .../kernels/internal/optimized/cblas_conv.h   |  19 ++-
 .../internal/optimized/multithreaded_conv.h   |   4 +-
 .../contrib/lite/kernels/transpose_conv.cc    |  21 ++-
 5 files changed, 165 insertions(+), 86 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc
index ab6bdaecaa..101b4fc961 100644
--- a/tensorflow/contrib/lite/kernels/conv.cc
+++ b/tensorflow/contrib/lite/kernels/conv.cc
@@ -414,35 +414,57 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
   }
 
   switch (effective_kernel_type) {
-    case kReference:
+    case kReference: {
+      ConvParams op_params;
+      op_params.padding_type = PaddingType::kSame;
+      op_params.padding_values.width = data->padding.width;
+      op_params.padding_values.height = data->padding.height;
+      op_params.stride_width = params->stride_width;
+      op_params.stride_height = params->stride_height;
+      op_params.dilation_width_factor = params->dilation_width_factor;
+      op_params.dilation_height_factor = params->dilation_height_factor;
+      op_params.input_offset = input_offset;
+      op_params.weights_offset = filter_offset;
+      op_params.output_offset = output_offset;
+      op_params.output_multiplier = data->output_multiplier;
+      op_params.output_shift = -data->output_shift;
+      op_params.quantized_activation_min = data->output_activation_min;
+      op_params.quantized_activation_max = data->output_activation_max;
       reference_ops::Conv(
-          GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset,
-          GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset,
-          GetTensorData<int32_t>(bias), GetTensorDims(bias),
-          params->stride_width, params->stride_height,
-          params->dilation_width_factor, params->dilation_height_factor,
-          data->padding.width, data->padding.height, output_offset,
-          data->output_multiplier, data->output_shift,
-          data->output_activation_min, data->output_activation_max,
-          GetTensorData<uint8_t>(output), GetTensorDims(output),
-          GetTensorData<uint8_t>(im2col), GetTensorDims(im2col), gemm_context);
+          op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+          GetTensorShape(filter), GetTensorData<uint8_t>(filter),
+          GetTensorShape(bias), GetTensorData<int32_t>(bias),
+          GetTensorShape(output), GetTensorData<uint8_t>(output),
+          GetTensorShape(im2col), GetTensorData<uint8_t>(im2col), gemm_context);
       break;
+    }
     case kGenericOptimized:
     case kMultithreadOptimized:
-    case kCblasOptimized:
+    case kCblasOptimized: {
       // There is only one optimized implementation for Quantized Conv.
+      ConvParams op_params;
+      op_params.padding_type = PaddingType::kSame;
+      op_params.padding_values.width = data->padding.width;
+      op_params.padding_values.height = data->padding.height;
+      op_params.stride_width = params->stride_width;
+      op_params.stride_height = params->stride_height;
+      op_params.dilation_width_factor = params->dilation_width_factor;
+      op_params.dilation_height_factor = params->dilation_height_factor;
+      op_params.input_offset = input_offset;
+      op_params.weights_offset = filter_offset;
+      op_params.output_offset = output_offset;
+      op_params.output_multiplier = data->output_multiplier;
+      op_params.output_shift = -data->output_shift;
+      op_params.quantized_activation_min = data->output_activation_min;
+      op_params.quantized_activation_max = data->output_activation_max;
       optimized_ops::Conv(
-          GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset,
-          GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset,
-          GetTensorData<int32_t>(bias), GetTensorDims(bias),
-          params->stride_width, params->stride_height,
-          params->dilation_width_factor, params->dilation_height_factor,
-          data->padding.width, data->padding.height, output_offset,
-          data->output_multiplier, data->output_shift,
-          data->output_activation_min, data->output_activation_max,
-          GetTensorData<uint8_t>(output), GetTensorDims(output),
-          GetTensorData<uint8_t>(im2col), GetTensorDims(im2col), gemm_context);
+          op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+          GetTensorShape(filter), GetTensorData<uint8_t>(filter),
+          GetTensorShape(bias), GetTensorData<int32_t>(bias),
+          GetTensorShape(output), GetTensorData<uint8_t>(output),
+          GetTensorShape(im2col), GetTensorData<uint8_t>(im2col), gemm_context);
       break;
+    }
   }
 }
 
@@ -467,27 +489,41 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
   }
   switch (effective_kernel_type) {
     case kReference: {
-      reference_ops::Conv(
-          GetTensorData<float>(input), GetTensorDims(input),
-          GetTensorData<float>(filter), GetTensorDims(filter),
-          GetTensorData<float>(bias), GetTensorDims(bias), params->stride_width,
-          params->stride_height, params->dilation_width_factor,
-          params->dilation_height_factor, data->padding.width,
-          data->padding.height, output_activation_min, output_activation_max,
-          GetTensorData<float>(output), GetTensorDims(output),
-          GetTensorData<float>(im2col), GetTensorDims(im2col));
+      ConvParams op_params;
+      op_params.padding_type = PaddingType::kSame;
+      op_params.padding_values.width = data->padding.width;
+      op_params.padding_values.height = data->padding.height;
+      op_params.stride_width = params->stride_width;
+      op_params.stride_height = params->stride_height;
+      op_params.dilation_width_factor = params->dilation_width_factor;
+      op_params.dilation_height_factor = params->dilation_height_factor;
+      op_params.float_activation_min = output_activation_min;
+      op_params.float_activation_max = output_activation_max;
+      reference_ops::Conv(op_params, GetTensorShape(input),
+                          GetTensorData<float>(input), GetTensorShape(filter),
+                          GetTensorData<float>(filter), GetTensorShape(bias),
+                          GetTensorData<float>(bias), GetTensorShape(output),
+                          GetTensorData<float>(output), GetTensorShape(im2col),
+                          GetTensorData<float>(im2col));
       break;
     }
     case kGenericOptimized: {
-      optimized_ops::Conv(
-          GetTensorData<float>(input), GetTensorDims(input),
-          GetTensorData<float>(filter), GetTensorDims(filter),
-          GetTensorData<float>(bias), GetTensorDims(bias), params->stride_width,
-          params->stride_height, params->dilation_width_factor,
-          params->dilation_height_factor, data->padding.width,
-          data->padding.height, output_activation_min, output_activation_max,
-          GetTensorData<float>(output), GetTensorDims(output),
-          GetTensorData<float>(im2col), GetTensorDims(im2col));
+      ConvParams op_params;
+      op_params.padding_type = PaddingType::kSame;
+      op_params.padding_values.width = data->padding.width;
+      op_params.padding_values.height = data->padding.height;
+      op_params.stride_width = params->stride_width;
+      op_params.stride_height = params->stride_height;
+      op_params.dilation_width_factor = params->dilation_width_factor;
+      op_params.dilation_height_factor = params->dilation_height_factor;
+      op_params.float_activation_min = output_activation_min;
+      op_params.float_activation_max = output_activation_max;
+      optimized_ops::Conv(op_params, GetTensorShape(input),
+                          GetTensorData<float>(input), GetTensorShape(filter),
+                          GetTensorData<float>(filter), GetTensorShape(bias),
+                          GetTensorData<float>(bias), GetTensorShape(output),
+                          GetTensorData<float>(output), GetTensorShape(im2col),
+                          GetTensorData<float>(im2col));
       break;
     }
     case kMultithreadOptimized: {
@@ -561,18 +597,27 @@ void EvalHybrid(TfLiteContext* context, TfLiteNode* node,
     case kReference:
     case kGenericOptimized:
     case kMultithreadOptimized:
-    case kCblasOptimized:
+    case kCblasOptimized: {
       // There is only one implementation for hybrid kernel. Note
       // this does not make use of gemmlowp nor supports multithreading.
+      ConvParams op_params;
+      op_params.padding_type = PaddingType::kSame;
+      op_params.padding_values.width = data->padding.width;
+      op_params.padding_values.height = data->padding.height;
+      op_params.stride_width = params->stride_width;
+      op_params.stride_height = params->stride_height;
+      op_params.dilation_width_factor = 1;
+      op_params.dilation_height_factor = 1;
+      op_params.float_activation_min = output_activation_min;
+      op_params.float_activation_max = output_activation_max;
       optimized_ops::HybridConv(
-          quantized_input_ptr_batch, GetTensorDims(input), filter_ptr,
-          GetTensorDims(filter), GetTensorData<float>(bias),
-          GetTensorDims(bias), params->stride_width, params->stride_height,
-          data->padding.width, data->padding.height, scaling_factors_ptr,
-          output_activation_min, output_activation_max,
-          GetTensorData<float>(output), GetTensorDims(output), im2col_ptr,
-          GetTensorDims(im2col));
+          op_params, scaling_factors_ptr, GetTensorShape(input),
+          quantized_input_ptr_batch, GetTensorShape(filter), filter_ptr,
+          GetTensorShape(bias), GetTensorData<float>(bias),
+          GetTensorShape(output), GetTensorData<float>(output),
+          GetTensorShape(im2col), im2col_ptr);
       break;
+    }
   }
 }
 
diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc
index 7a71fcc219..f6d2f76dbe 100644
--- a/tensorflow/contrib/lite/kernels/fully_connected.cc
+++ b/tensorflow/contrib/lite/kernels/fully_connected.cc
@@ -281,15 +281,23 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
   int32_t input_offset = -input->params.zero_point;
   int32_t filter_offset = -filter->params.zero_point;
   int32_t output_offset = output->params.zero_point;
-#define TF_LITE_FULLY_CONNECTED(type, output_data_type)                     \
-  type::FullyConnected(                                                     \
-      GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset,    \
-      GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset, \
-      GetTensorData<int32_t>(bias), GetTensorDims(bias), output_offset,     \
-      data->output_multiplier, data->output_shift,                          \
-      data->output_activation_min, data->output_activation_max,             \
-      GetTensorData<output_data_type>(output), GetTensorDims(output),       \
-      gemm_context)
+#define TF_LITE_FULLY_CONNECTED(type, output_data_type)                  \
+  {                                                                      \
+    FullyConnectedParams op_params;                                      \
+    op_params.input_offset = input_offset;                               \
+    op_params.weights_offset = filter_offset;                            \
+    op_params.output_offset = output_offset;                             \
+    op_params.output_multiplier = data->output_multiplier;               \
+    op_params.output_shift = -data->output_shift;                        \
+    op_params.quantized_activation_min = data->output_activation_min;    \
+    op_params.quantized_activation_max = data->output_activation_max;    \
+    type::FullyConnected(                                                \
+        op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
+        GetTensorShape(filter), GetTensorData<uint8_t>(filter),          \
+        GetTensorShape(bias), GetTensorData<int32_t>(bias),              \
+        GetTensorShape(output), GetTensorData<output_data_type>(output), \
+        gemm_context);                                                   \
+  }
   if (kernel_type == kReference) {
     switch (output->type) {
       case kTfLiteUInt8:
@@ -349,15 +357,20 @@ TfLiteStatus EvalShuffledQuantized(TfLiteContext* context, TfLiteNode* node,
     return kTfLiteError;
   }
 
-#define TF_LITE_SHUFFLED_FULLY_CONNECTED(type)                  \
-  type::ShuffledFullyConnected(                                 \
-      GetTensorData<uint8_t>(input), GetTensorDims(input),      \
-      GetTensorData<uint8_t>(filter), GetTensorDims(filter),    \
-      GetTensorData<int32_t>(bias), GetTensorDims(bias),        \
-      data->output_multiplier, data->output_shift,              \
-      data->output_activation_min, data->output_activation_max, \
-      GetTensorData<int16_t>(output), GetTensorDims(output),    \
-      GetTensorData<uint8_t>(shuffled_input_workspace), gemm_context)
+#define TF_LITE_SHUFFLED_FULLY_CONNECTED(type)                           \
+  {                                                                      \
+    FullyConnectedParams op_params;                                      \
+    op_params.output_multiplier = data->output_multiplier;               \
+    op_params.output_shift = -data->output_shift;                        \
+    op_params.quantized_activation_min = data->output_activation_min;    \
+    op_params.quantized_activation_max = data->output_activation_max;    \
+    type::ShuffledFullyConnected(                                        \
+        op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
+        GetTensorShape(filter), GetTensorData<uint8_t>(filter),          \
+        GetTensorShape(bias), GetTensorData<int32_t>(bias),              \
+        GetTensorShape(output), GetTensorData<int16_t>(output),          \
+        GetTensorData<uint8_t>(shuffled_input_workspace), gemm_context); \
+  }
   if (kernel_type == kReference) {
     TF_LITE_SHUFFLED_FULLY_CONNECTED(reference_ops);
   } else {
@@ -376,12 +389,17 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
   float output_activation_min, output_activation_max;
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
-#define TF_LITE_FULLY_CONNECTED(type)                                       \
-  type::FullyConnected(GetTensorData<float>(input), GetTensorDims(input),   \
-                       GetTensorData<float>(filter), GetTensorDims(filter), \
-                       GetTensorData<float>(bias), GetTensorDims(bias),     \
-                       output_activation_min, output_activation_max,        \
-                       GetTensorData<float>(output), GetTensorDims(output))
+#define TF_LITE_FULLY_CONNECTED(type)                                         \
+  {                                                                           \
+    FullyConnectedParams op_params;                                           \
+    op_params.float_activation_min = output_activation_min;                   \
+    op_params.float_activation_max = output_activation_max;                   \
+    type::FullyConnected(op_params, GetTensorShape(input),                    \
+                         GetTensorData<float>(input), GetTensorShape(filter), \
+                         GetTensorData<float>(filter), GetTensorShape(bias),  \
+                         GetTensorData<float>(bias), GetTensorShape(output),  \
+                         GetTensorData<float>(output));                       \
+  }
   if (kernel_type == kReference) {
     TF_LITE_FULLY_CONNECTED(reference_ops);
   } else if (kernel_type == kPie) {
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h b/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h
index 4a90e7e640..40d42bbae9 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h
@@ -49,9 +49,18 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
                            filter_width != 1 || filter_height != 1;
   if (need_im2col) {
     TFLITE_DCHECK(im2col_data);
-    optimized_ops::Im2col(input_data, input_dims, stride_width, stride_height,
-                          pad_width, pad_height, filter_height, filter_width, 0,
-                          im2col_data, im2col_dims);
+    ConvParams op_params;
+    op_params.padding_type = PaddingType::kSame;
+    op_params.padding_values.width = pad_width;
+    op_params.padding_values.height = pad_height;
+    op_params.stride_width = stride_width;
+    op_params.stride_height = stride_height;
+    op_params.dilation_width_factor = 1;
+    op_params.dilation_height_factor = 1;
+    optimized_ops::Im2col(op_params, filter_height, filter_width, 0,
+                          DimsToShape(input_dims), input_data,
+                          DimsToShape(im2col_dims), im2col_data);
+
     gemm_input_data = im2col_data;
     gemm_input_dims = &im2col_dims;
   } else {
@@ -82,8 +91,8 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
               stride_a, b, stride_b, 0.0f, c, stride_c);
 
   optimized_ops::AddBiasAndEvalActivationFunction(
-      bias_data, bias_dims, output_data, output_dims, output_activation_min,
-      output_activation_max);
+      output_activation_min, output_activation_max, DimsToShape(bias_dims),
+      bias_data, DimsToShape(output_dims), output_data);
 }
 
 }  // namespace cblas_ops
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h
index 59f0e3c927..b5d001cc9e 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h
@@ -157,8 +157,8 @@ inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data,
                output_width);
 
   optimized_ops::AddBiasAndEvalActivationFunction(
-      bias_data, bias_dims, output_data, output_dims, output_activation_min,
-      output_activation_max);
+      output_activation_min, output_activation_max, DimsToShape(bias_dims),
+      bias_data, DimsToShape(output_dims), output_data);
 }
 
 }  // namespace multithreaded_ops
diff --git a/tensorflow/contrib/lite/kernels/transpose_conv.cc b/tensorflow/contrib/lite/kernels/transpose_conv.cc
index 6f2d98ede8..1c4a5ee91d 100644
--- a/tensorflow/contrib/lite/kernels/transpose_conv.cc
+++ b/tensorflow/contrib/lite/kernels/transpose_conv.cc
@@ -69,7 +69,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
   TF_LITE_ENSURE_EQ(context, NumDimensions(weights), 4);
 
-  // Currenlty only supports float32.
+  // Currently only supports float32.
   const TfLiteType data_type = input->type;
   TF_LITE_ENSURE(context, data_type == kTfLiteFloat32);
   TF_LITE_ENSURE_EQ(context, output->type, data_type);
@@ -117,19 +117,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   // Currently only support float32.
   switch (input->type) {
-    case kTfLiteFloat32:
+    case kTfLiteFloat32: {
+      tflite::ConvParams op_params;
+      op_params.padding_type = PaddingType::kSame;
+      op_params.padding_values.width = padding_size.width;
+      op_params.padding_values.height = padding_size.height;
+      op_params.stride_width = stride_width;
+      op_params.stride_height = stride_height;
+
       reference_ops::TransposeConv(
-          GetTensorData<float>(input), GetTensorDims(input),
-          GetTensorData<float>(weights), GetTensorDims(weights), stride_width,
-          stride_height, padding_size.width, padding_size.height,
-          GetTensorData<float>(output), GetTensorDims(output),
+          op_params, GetTensorShape(input), GetTensorData<float>(input),
+          GetTensorShape(weights), GetTensorData<float>(weights),
+          GetTensorShape(output), GetTensorData<float>(output),
           // Last two args specify im2col which reference_ops ignores.
           // (Note this does not lead to a performance regression, as the
           // previous optimized version was just a copy of the reference code.)
           // TODO(b/110208176): Allocate im2col tensors and switch to
           // optimized_ops.
-          GetTensorData<float>(output), GetTensorDims(output));
+          GetTensorShape(output), GetTensorData<float>(output));
       break;
+    }
     default:
       context->ReportError(context, "Type %d, not currently supported.",
                            input->type);
-- 
GitLab


From df90003a68bcb813843e447d6fa2c49deccc48b6 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 24 Sep 2018 20:54:47 -0700
Subject: [PATCH 0649/1357] Increase the tolerance to deflake fused_conv2d test

For testInceptionFwd I see 8.482029 != 8.48317 when comparing GPU vs. CPU.
testFusedConvInt8 has off-by-one errors. Both occur flakily.

PiperOrigin-RevId: 214378820
---
 .../python/ops/fused_conv2d_bias_activation_op_test.py      | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
index e47342bc7d..4894298694 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
@@ -268,7 +268,7 @@ class FusedConv2DBiasActivationTest(test.TestCase):
     with self.cached_session() as sess:
       values = sess.run(tensors)
       for i in range(1, len(values)):
-        self.assertAllClose(values[0], values[i], rtol=1e-5, atol=1e-5)
+        self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3)
 
   def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides,
                     padding):
@@ -873,9 +873,7 @@ class FusedConvInt8Tests(test.TestCase):
 
     with self.test_session(use_gpu=True) as sess:
       actual_y, expected_y = sess.run([actual, expected])
-      tf_logging.info("actual_y = ", actual_y)
-      tf_logging.info("expected_y = ", expected_y)
-      self.assertTrue(np.array_equal(actual_y, expected_y))
+      self.assertAllClose(actual_y, expected_y, rtol=0, atol=1)
 
   def testFusedConvInt8(self):
     if not test.is_gpu_available(
-- 
GitLab


From 9875df75c308d7498e601ae9a4b57db6aad47056 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Mon, 24 Sep 2018 21:24:39 -0700
Subject: [PATCH 0650/1357] Do not assume Node.in_edges() is sorted by
 dst_input.

PiperOrigin-RevId: 214380876
---
 tensorflow/core/common_runtime/constant_folding.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 97b6971c5b..99cb9ac6a0 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -61,6 +61,7 @@ bool ReadPartialShapesFromShapeMap(
         shape_map,
     std::vector<PartialTensorShape>* input_shapes) {
   CHECK(shape_map != nullptr);
+  input_shapes->resize(n->num_inputs());
   for (const Edge* in : n->in_edges()) {
     // Don't need to check if incoming control edges have known shapes.
     if (in->IsControlEdge()) continue;
@@ -71,7 +72,9 @@ bool ReadPartialShapesFromShapeMap(
     }
     const auto& known_shape = known_shape_iter->second;
     CHECK_GT(known_shape.size(), in->src_output()) << known_shape_iter->first;
-    input_shapes->push_back(known_shape[in->src_output()]);
+    DCHECK_GE(in->dst_input(), 0);
+    DCHECK_LT(in->dst_input(), input_shapes->size());
+    (*input_shapes)[in->dst_input()] = known_shape[in->src_output()];
   }
   return true;
 }
-- 
GitLab


From c1644948d23cae271b140d67101c1a386e5495fd Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Mon, 24 Sep 2018 21:29:42 -0700
Subject: [PATCH 0651/1357] Unpack output of cond_v2 if it is a singleton to
 match behavior of cond.

PiperOrigin-RevId: 214381126
---
 .../python/kernel_tests/cond_v2_test.py       | 31 +++++++++----------
 .../kernel_tests/control_flow_ops_py_test.py  | 23 +++-----------
 tensorflow/python/ops/cond_v2_impl.py         |  6 +++-
 tensorflow/python/ops/image_ops_impl.py       |  2 --
 4 files changed, 24 insertions(+), 38 deletions(-)

diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 5c0e24117f..377c041675 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -131,7 +131,7 @@ class CondV2Test(test.TestCase):
     def false_fn():
       return x + 1
 
-    return cond_v2.cond_v2(pred, true_fn, false_fn, name=name)[0].op
+    return cond_v2.cond_v2(pred, true_fn, false_fn, name=name).op
 
   def testDefaultName(self):
     with ops.Graph().as_default():
@@ -569,8 +569,7 @@ class CondV2Test(test.TestCase):
 
       ops.add_to_collection("pred", pred)
       cond = cond_v2.cond_v2(pred, true_fn, false_fn, name="cond")
-      for c in cond:
-        ops.add_to_collection("cond", c)
+      ops.add_to_collection("cond", cond)
       meta_graph = saver.export_meta_graph()
 
     with ops.Graph().as_default() as g:
@@ -672,7 +671,7 @@ class CondV2CollectionTest(test.TestCase):
           return math_ops.add(x_const, y_const)
 
         cnd = cond_v2.cond_v2(True, fn, fn)
-        self.assertEquals(cnd[0].eval(), 7)
+        self.assertEquals(cnd.eval(), 7)
 
   def testCollectionTensorValueAccessInCond(self):
     """Read tensors from collections inside of cond_v2 & use them."""
@@ -689,7 +688,7 @@ class CondV2CollectionTest(test.TestCase):
           return math_ops.add(x_read, y_read)
 
         cnd = cond_v2.cond_v2(math_ops.less(x, y), fn, fn)
-        self.assertEquals(cnd[0].eval(), 7)
+        self.assertEquals(cnd.eval(), 7)
 
   def testCollectionIntValueWriteInCond(self):
     """Make sure Int writes to collections work inside of cond_v2."""
@@ -709,7 +708,7 @@ class CondV2CollectionTest(test.TestCase):
         cnd = cond_v2.cond_v2(
             True, true_fn,
             false_fn)
-        self.assertEquals(cnd[0].eval(), 14)
+        self.assertEquals(cnd.eval(), 14)
 
         read_z_collection = ops.get_collection("z")
         self.assertEquals(read_z_collection, [7])
@@ -782,10 +781,10 @@ class CondV2ContainerTest(test.TestCase):
 
         with ops.container("l1"):
           cnd_true = cond_v2.cond_v2(True, true_fn, false_fn)
-          self.assertEquals(cnd_true[0].eval(), 2)
+          self.assertEquals(cnd_true.eval(), 2)
 
           cnd_false = cond_v2.cond_v2(False, true_fn, false_fn)
-          self.assertEquals(cnd_false[0].eval(), 6)
+          self.assertEquals(cnd_false.eval(), 6)
 
           v4 = variables.Variable([3])
           q4 = data_flow_ops.FIFOQueue(1, dtypes.float32)
@@ -813,7 +812,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           return c
 
         with ops.colocate_with(a.op):
-          self.assertEquals(cond_v2.cond_v2(True, fn, fn)[0].eval(), 3)
+          self.assertEquals(cond_v2.cond_v2(True, fn, fn).eval(), 3)
 
         def fn2():
           c = constant_op.constant(3.0)
@@ -822,7 +821,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
         with ops.colocate_with(a.op):
           with ops.colocate_with(b.op):
-            self.assertEquals(cond_v2.cond_v2(True, fn2, fn2)[0].eval(), 3)
+            self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
 
   def testColocateWithInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
@@ -838,7 +837,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
             return c
 
         with ops.colocate_with(a.op):
-          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2)[0].eval(), 3)
+          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
 
           d = constant_op.constant([2.0], name="d")
           self.assertEqual([b"loc:@a"], d.op.colocation_groups())
@@ -859,7 +858,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           with ops.colocate_with(b.op):
             c = math_ops.add(a, a, name="c")
           return c
-        out_cond_2 = cond_v2.cond_v2(True, fn, fn)[0]
+        out_cond_2 = cond_v2.cond_v2(True, fn, fn)
 
         run_options = config_pb2.RunOptions(output_partition_graphs=True)
         run_metadata = config_pb2.RunMetadata()
@@ -881,7 +880,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           return c
 
         with ops.device("/device:CPU:0"):
-          self.assertEquals(cond_v2.cond_v2(True, fn, fn)[0].eval(), 3)
+          self.assertEquals(cond_v2.cond_v2(True, fn, fn).eval(), 3)
 
         def fn2():
           c = constant_op.constant(3.0)
@@ -889,7 +888,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           return c
 
         with ops.device("/device:GPU:0"):
-          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2)[0].eval(), 3)
+          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
 
   def testDeviceInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
@@ -903,7 +902,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
             return c
 
         with ops.device("/device:CPU:0"):
-          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2)[0].eval(), 3)
+          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
 
           d = constant_op.constant(4.0)
           self.assertEqual("/device:CPU:0", d.op.device)
@@ -922,7 +921,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
         with ops.device("/device:CPU:0"):
           a = constant_op.constant([2.0], name="a")
-          out_cond_2 = cond_v2.cond_v2(True, fn, fn)[0]
+          out_cond_2 = cond_v2.cond_v2(True, fn, fn)
 
         run_options = config_pb2.RunOptions(output_partition_graphs=True)
         run_metadata = config_pb2.RunMetadata()
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 2996539004..fc4d2a3809 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -422,8 +422,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(r.values.get_shape(), (2,))
 
   def testCondResource(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     with self.cached_session():
       rv = resource_variable_ops.ResourceVariable(True)
@@ -484,15 +482,12 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(11, result)
 
   def testCond_1(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     self._testCond_1(use_gpu=False)
-    self._testCond_1(use_gpu=True)
+    # TODO(b/116526896): Enable GPU tests.
+    # self._testCond_1(use_gpu=True)
 
   def testCond_2(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     with self.cached_session():
       x = constant_op.constant(10)
@@ -503,8 +498,6 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(9, result)
 
   def testCond_3(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     with self.cached_session():
       x = constant_op.constant(10)
@@ -556,8 +549,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(4, count.eval())
 
   def testCond_6(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     with self.cached_session():
       v1 = variables.Variable([7])
@@ -583,8 +574,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual([11, 12], sess.run(r))
 
   def testCondRef(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     with self.cached_session():
       x = gen_state_ops.variable(
@@ -1444,7 +1433,7 @@ class ControlFlowTest(test.TestCase):
 
   def testCondWhile_1(self):
     if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
+      return unittest.skip("b/113294340 (enable while_v2)")
 
     with self.cached_session():
       n = ops.convert_to_tensor(0, name="n")
@@ -1457,7 +1446,7 @@ class ControlFlowTest(test.TestCase):
 
   def testCondWhile_2(self):
     if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
+      return unittest.skip("b/113294340 (enable while_v2)")
 
     with self.cached_session():
       n = ops.convert_to_tensor(0)
@@ -2633,8 +2622,6 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(5.0, result.eval())
 
   def testOneValueCond(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     with self.cached_session():
       c = array_ops.placeholder(dtypes.int32, shape=[])
@@ -2651,8 +2638,6 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual([2], i.eval(feed_dict={c: 0}))
 
   def testExampleCond(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/111124878 (don't return tuple)")
 
     with self.cached_session():
       x = ops.convert_to_tensor([-2.0, 2.0], name="x")
diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py
index c6a6b2a7fa..f8b1ddb140 100644
--- a/tensorflow/python/ops/cond_v2_impl.py
+++ b/tensorflow/python/ops/cond_v2_impl.py
@@ -119,7 +119,11 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
                       attr_value_pb2.AttrValue(b=True))
       # pylint: enable=protected-access
 
-    return tuple(tensors[:num_cond_outputs])
+    result = tuple(tensors[:num_cond_outputs])
+    if len(result) == 1:
+      return result[0]
+    else:
+      return result
 
 
 @ops.RegisterGradient("If")
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 208b56e909..1c75aab578 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -329,8 +329,6 @@ def _random_flip(image, flip_index, seed, scope_name):
           lambda: image,
           name=scope
       )
-      if isinstance(result, tuple):
-        result = result[0]  # TODO(b/111124878) remove this logic (CondV2).
       return fix_image_flip_shape(image, result)
     elif shape.ndims == 4:
       batch_size = array_ops.shape(image)[0]
-- 
GitLab


From eb14cc419ac3e9ced5f38fc3d08b1ab2e128dafa Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Sep 2018 22:09:00 -0700
Subject: [PATCH 0652/1357] Update kernel evals to use new kernel signatures.

PiperOrigin-RevId: 214384090
---
 .../contrib/lite/kernels/depthwise_conv.cc    |  70 ++++++----
 .../internal/depthwiseconv_float_test.cc      |  74 +++++-----
 .../internal/depthwiseconv_quantized_test.cc  | 132 ++++++++++--------
 .../lite/kernels/internal/test_util.cc        |  40 ++----
 .../contrib/lite/kernels/internal/test_util.h |  12 +-
 5 files changed, 166 insertions(+), 162 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
index 798ee849ec..19958844a1 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
@@ -180,24 +180,31 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
 
-  void (*depthwise_conv)(const float*, const Dims<4>&, const float*,
-                         const Dims<4>&, const float*, const Dims<4>&, int, int,
-                         int, int, int, int, int, float, float, float*,
-                         const Dims<4>&);
+  void (*depthwise_conv)(const DepthwiseParams&, const RuntimeShape&,
+                         const float*, const RuntimeShape&, const float*,
+                         const RuntimeShape&, const float*, const RuntimeShape&,
+                         float*);
   if (kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
   } else {
     depthwise_conv = &optimized_ops::DepthwiseConv;
   }
 
-  depthwise_conv(
-      GetTensorData<float>(input), GetTensorDims(input),
-      GetTensorData<float>(filter), GetTensorDims(filter),
-      GetTensorData<float>(bias), GetTensorDims(bias), params->stride_width,
-      params->stride_height, params->dilation_width_factor,
-      params->dilation_height_factor, data->padding.width, data->padding.height,
-      params->depth_multiplier, output_activation_min, output_activation_max,
-      GetTensorData<float>(output), GetTensorDims(output));
+  DepthwiseParams op_params;
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.depth_multiplier = params->depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  depthwise_conv(op_params, GetTensorShape(input), GetTensorData<float>(input),
+                 GetTensorShape(filter), GetTensorData<float>(filter),
+                 GetTensorShape(bias), GetTensorData<float>(bias),
+                 GetTensorShape(output), GetTensorData<float>(output));
 }
 
 template <KernelType kernel_type>
@@ -209,10 +216,10 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
   auto filter_offset = -filter->params.zero_point;
   auto output_offset = output->params.zero_point;
 
-  void (*depthwise_conv)(const uint8*, const Dims<4>&, int32, const uint8*,
-                         const Dims<4>&, int32, const int32*, const Dims<4>&,
-                         int, int, int, int, int, int, int, int32, int32, int,
-                         int32, int32, uint8*, const Dims<4>&);
+  void (*depthwise_conv)(const DepthwiseParams&, const RuntimeShape&,
+                         const uint8*, const RuntimeShape&, const uint8*,
+                         const RuntimeShape&, const int32*, const RuntimeShape&,
+                         uint8*);
 
   if (kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
@@ -220,16 +227,27 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
     depthwise_conv = &optimized_ops::DepthwiseConv;
   }
 
-  depthwise_conv(
-      GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset,
-      GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset,
-      GetTensorData<int32_t>(bias), GetTensorDims(bias), params->stride_width,
-      params->stride_height, params->dilation_width_factor,
-      params->dilation_height_factor, data->padding.width, data->padding.height,
-      params->depth_multiplier, output_offset, data->output_multiplier,
-      data->output_shift, data->output_activation_min,
-      data->output_activation_max, GetTensorData<uint8_t>(output),
-      GetTensorDims(output));
+  DepthwiseParams op_params;
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.depth_multiplier = params->depth_multiplier;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = -data->output_shift;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+  depthwise_conv(op_params, GetTensorShape(input),
+                 GetTensorData<uint8_t>(input), GetTensorShape(filter),
+                 GetTensorData<uint8_t>(filter), GetTensorShape(bias),
+                 GetTensorData<int32_t>(bias), GetTensorShape(output),
+                 GetTensorData<uint8_t>(output));
 }
 
 template <KernelType kernel_type>
diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
index 7600b26f5c..41862a21a6 100644
--- a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
@@ -29,28 +29,20 @@ namespace tflite {
 namespace {
 
 // Runs the DepthwiseConv and compares against the reference implementation.
-void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride, int dilation_width_factor,
-                          int dilation_height_factor, int pad_width,
-                          int pad_height, int depth_multiplier,
-                          float output_activation_min,
-                          float output_activation_max,
-                          const Dims<4>& output_dims) {
-  const int output_buffer_size = RequiredBufferSizeForDims(output_dims);
+void TestOneDepthwiseConv(
+    const DepthwiseParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& filter_shape,
+    const float* filter_data, const RuntimeShape& bias_shape,
+    const float* bias_data, const RuntimeShape& output_shape) {
+  const int output_buffer_size = output_shape.FlatSize();
   std::vector<float> output_data(output_buffer_size);
   std::vector<float> reference_output_data(output_buffer_size);
-  reference_ops::DepthwiseConv(
-      input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
-      stride, stride, dilation_width_factor, dilation_height_factor, pad_width,
-      pad_height, depth_multiplier, output_activation_min,
-      output_activation_max, reference_output_data.data(), output_dims);
-  optimized_ops::DepthwiseConv(
-      input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
-      stride, stride, dilation_width_factor, dilation_height_factor, pad_width,
-      pad_height, depth_multiplier, output_activation_min,
-      output_activation_max, output_data.data(), output_dims);
+  reference_ops::DepthwiseConv(params, input_shape, input_data, filter_shape,
+                               filter_data, bias_shape, bias_data, output_shape,
+                               reference_output_data.data());
+  optimized_ops::DepthwiseConv(params, input_shape, input_data, filter_shape,
+                               filter_data, bias_shape, bias_data, output_shape,
+                               output_data.data());
 
   double sum_abs_diff = 0;
   float max_abs_val = 0;
@@ -105,24 +97,23 @@ bool TryTestOneDepthwiseConv() {
   if (output_depth > kMaxSupportedOutputDepth) {
     return false;
   }
-  Dims<4> input_dims_inference =
-      MakeDimsForInference(input_depth, input_width, input_height, batch);
-  Dims<4> output_dims_inference;
+  RuntimeShape input_shape_inference(
+      {batch, input_height, input_width, input_depth});
+  RuntimeShape output_shape_inference;
   int pad_width, pad_height;
   const auto padding_type =
       UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid;
-  if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width,
+  if (!ComputeConvSizes(input_shape_inference, output_depth, filter_width,
                         filter_height, stride, dilation_width_factor,
                         dilation_height_factor, padding_type,
-                        &output_dims_inference, &pad_width, &pad_height)) {
+                        &output_shape_inference, &pad_width, &pad_height)) {
     return false;
   }
-  Dims<4> filter_dims_inference =
-      MakeDimsForInference(output_depth, filter_width, filter_height, 1);
-  Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1);
-  const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference);
-  const int filter_buffer_size =
-      RequiredBufferSizeForDims(filter_dims_inference);
+  RuntimeShape filter_shape_inference(
+      {1, filter_height, filter_width, output_depth});
+  RuntimeShape bias_shape_inference({1, 1, 1, output_depth});
+  const int input_buffer_size = input_shape_inference.FlatSize();
+  const int filter_buffer_size = filter_shape_inference.FlatSize();
   std::vector<float> input_data(input_buffer_size);
   std::vector<float> filter_data(filter_buffer_size);
   std::vector<float> bias_data(output_depth);
@@ -133,12 +124,21 @@ bool TryTestOneDepthwiseConv() {
   FillRandom(&input_data, -input_amplitude, input_amplitude);
   FillRandom(&filter_data, -filter_amplitude, filter_amplitude);
   FillRandom(&bias_data, -bias_amplitude, bias_amplitude);
-  TestOneDepthwiseConv(input_data.data(), input_dims_inference,
-                       filter_data.data(), filter_dims_inference,
-                       bias_data.data(), bias_dims_inference, stride,
-                       dilation_width_factor, dilation_height_factor, pad_width,
-                       pad_height, depth_multiplier, output_activation_min,
-                       output_activation_max, output_dims_inference);
+  DepthwiseParams op_params;
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride;
+  op_params.stride_height = stride;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  TestOneDepthwiseConv(op_params, input_shape_inference, input_data.data(),
+                       filter_shape_inference, filter_data.data(),
+                       bias_shape_inference, bias_data.data(),
+                       output_shape_inference);
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
index 312d048b2d..9414e109c3 100644
--- a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
@@ -35,29 +35,40 @@ namespace {
 // Runs the DepthwiseConv and compares against the reference implementation.
 template <FusedActivationFunctionType Ac>
 int TestOneDepthwiseConvWithGivenOutputShift(
-    const std::uint8_t* input_data, const Dims<4>& input_dims,
+    const std::uint8_t* input_data, const RuntimeShape& input_shape,
     std::int32_t input_offset, const std::uint8_t* filter_data,
-    const Dims<4>& filter_dims, std::int32_t filter_offset,
-    const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride,
+    const RuntimeShape& filter_shape, std::int32_t filter_offset,
+    const std::int32_t* bias_data, const RuntimeShape& bias_shape, int stride,
     int pad_width, int pad_height, int depth_multiplier,
     std::int32_t output_offset, std::int32_t output_multiplier,
     int output_shift, std::int32_t output_activation_min,
-    std::int32_t output_activation_max, const Dims<4>& output_dims) {
-  const int output_buffer_size = RequiredBufferSizeForDims(output_dims);
+    std::int32_t output_activation_max, const RuntimeShape& output_shape) {
+  const int output_buffer_size = output_shape.FlatSize();
   std::vector<std::uint8_t> output_data(output_buffer_size);
   std::vector<std::uint8_t> reference_output_data(output_buffer_size);
-  reference_ops::DepthwiseConv<Ac>(
-      input_data, input_dims, input_offset, filter_data, filter_dims,
-      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
-      depth_multiplier, output_offset, output_multiplier, output_shift,
-      output_activation_min, output_activation_max,
-      reference_output_data.data(), output_dims);
-  optimized_ops::DepthwiseConv<Ac>(
-      input_data, input_dims, input_offset, filter_data, filter_dims,
-      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
-      depth_multiplier, output_offset, output_multiplier, output_shift,
-      output_activation_min, output_activation_max, output_data.data(),
-      output_dims);
+
+  tflite::DepthwiseParams op_params;
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride;
+  op_params.stride_height = stride;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = -output_shift;
+  reference_ops::DepthwiseConv(op_params, input_shape, input_data, filter_shape,
+                               filter_data, bias_shape, bias_data, output_shape,
+                               reference_output_data.data());
+  optimized_ops::DepthwiseConv(op_params, input_shape, input_data, filter_shape,
+                               filter_data, bias_shape, bias_data, output_shape,
+                               output_data.data());
   int saturated_min = 0;
   int saturated_max = 0;
   std::vector<int> diff(output_buffer_size);
@@ -106,25 +117,25 @@ int TestOneDepthwiseConvWithGivenOutputShift(
 // vacuous. So we just bisect our way to reasonable output_shift values.
 template <FusedActivationFunctionType Ac>
 void TestOneDepthwiseConvBisectOutputShift(
-    const std::uint8_t* input_data, const Dims<4>& input_dims,
+    const std::uint8_t* input_data, const RuntimeShape& input_shape,
     std::int32_t input_offset, const std::uint8_t* filter_data,
-    const Dims<4>& filter_dims, std::int32_t filter_offset,
-    const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride,
+    const RuntimeShape& filter_shape, std::int32_t filter_offset,
+    const std::int32_t* bias_data, const RuntimeShape& bias_shape, int stride,
     int pad_width, int pad_height, int depth_multiplier,
     std::int32_t output_offset, std::int32_t output_multiplier,
     int output_activation_bisect_start, int output_activation_bisect_end,
     std::int32_t output_activation_min, std::int32_t output_activation_max,
-    const Dims<4>& output_dims) {
+    const RuntimeShape& output_shape) {
   ASSERT_LT(output_activation_bisect_start, output_activation_bisect_end)
       << "Bisection failed ?!?!";
   int output_shift_bisect_midpoint =
       (output_activation_bisect_start + output_activation_bisect_end) / 2;
   int bisect_result = TestOneDepthwiseConvWithGivenOutputShift<Ac>(
-      input_data, input_dims, input_offset, filter_data, filter_dims,
-      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      input_data, input_shape, input_offset, filter_data, filter_shape,
+      filter_offset, bias_data, bias_shape, stride, pad_width, pad_height,
       depth_multiplier, output_offset, output_multiplier,
       output_shift_bisect_midpoint, output_activation_min,
-      output_activation_max, output_dims);
+      output_activation_max, output_shape);
   // At this point we know that the test succeeded (otherwise it would have
   // aborted).
   if (bisect_result == 0) {
@@ -147,47 +158,47 @@ void TestOneDepthwiseConvBisectOutputShift(
                                              ? output_activation_bisect_end
                                              : output_shift_bisect_midpoint;
   TestOneDepthwiseConvBisectOutputShift<Ac>(
-      input_data, input_dims, input_offset, filter_data, filter_dims,
-      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      input_data, input_shape, input_offset, filter_data, filter_shape,
+      filter_offset, bias_data, bias_shape, stride, pad_width, pad_height,
       depth_multiplier, output_offset, output_multiplier,
       new_output_activation_bisect_start, new_output_activation_bisect_end,
-      output_activation_min, output_activation_max, output_dims);
+      output_activation_min, output_activation_max, output_shape);
 }
 
 template <FusedActivationFunctionType Ac>
 void TestOneDepthwiseConv(
-    const std::uint8_t* input_data, const Dims<4>& input_dims,
+    const std::uint8_t* input_data, const RuntimeShape& input_shape,
     std::int32_t input_offset, const std::uint8_t* filter_data,
-    const Dims<4>& filter_dims, std::int32_t filter_offset,
-    const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride,
+    const RuntimeShape& filter_shape, std::int32_t filter_offset,
+    const std::int32_t* bias_data, const RuntimeShape& bias_shape, int stride,
     int pad_width, int pad_height, int depth_multiplier,
     std::int32_t output_offset, std::int32_t output_multiplier,
     std::int32_t output_activation_min, std::int32_t output_activation_max,
-    const Dims<4>& output_dims) {
+    const RuntimeShape& output_shape) {
   TestOneDepthwiseConvBisectOutputShift<Ac>(
-      input_data, input_dims, input_offset, filter_data, filter_dims,
-      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      input_data, input_shape, input_offset, filter_data, filter_shape,
+      filter_offset, bias_data, bias_shape, stride, pad_width, pad_height,
       depth_multiplier, output_offset, output_multiplier, 0, 32,
-      output_activation_min, output_activation_max, output_dims);
+      output_activation_min, output_activation_max, output_shape);
 }
 
 void TestOneDepthwiseConv(
     FusedActivationFunctionType Ac, const std::uint8_t* input_data,
-    const Dims<4>& input_dims, std::int32_t input_offset,
-    const std::uint8_t* filter_data, const Dims<4>& filter_dims,
+    const RuntimeShape& input_shape, std::int32_t input_offset,
+    const std::uint8_t* filter_data, const RuntimeShape& filter_shape,
     std::int32_t filter_offset, const std::int32_t* bias_data,
-    const Dims<4>& bias_dims, int stride, int pad_width, int pad_height,
+    const RuntimeShape& bias_shape, int stride, int pad_width, int pad_height,
     int depth_multiplier, std::int32_t output_offset,
     std::int32_t output_multiplier, std::int32_t output_activation_min,
-    std::int32_t output_activation_max, const Dims<4>& output_dims) {
-#define TOCO_HANDLE_CASE(AC_TYPE)                                           \
-  if (AC_TYPE == Ac) {                                                      \
-    TestOneDepthwiseConv<AC_TYPE>(                                          \
-        input_data, input_dims, input_offset, filter_data, filter_dims,     \
-        filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, \
-        depth_multiplier, output_offset, output_multiplier,                 \
-        output_activation_min, output_activation_max, output_dims);         \
-    return;                                                                 \
+    std::int32_t output_activation_max, const RuntimeShape& output_shape) {
+#define TOCO_HANDLE_CASE(AC_TYPE)                                            \
+  if (AC_TYPE == Ac) {                                                       \
+    TestOneDepthwiseConv<AC_TYPE>(                                           \
+        input_data, input_shape, input_offset, filter_data, filter_shape,    \
+        filter_offset, bias_data, bias_shape, stride, pad_width, pad_height, \
+        depth_multiplier, output_offset, output_multiplier,                  \
+        output_activation_min, output_activation_max, output_shape);         \
+    return;                                                                  \
   }
   TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone)
   TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu)
@@ -227,34 +238,33 @@ bool TryTestDepthwiseConv(int batch, int input_depth, int input_width,
   const std::int32_t input_offset = UniformRandomInt(-256, 0);
   const std::int32_t filter_offset = UniformRandomInt(-256, 0);
   const std::int32_t output_offset = UniformRandomInt(-256, 0);
-  Dims<4> input_dims_inference =
-      MakeDimsForInference(input_depth, input_width, input_height, batch);
-  Dims<4> output_dims_inference;
+  RuntimeShape input_shape_inference(
+      {batch, input_height, input_width, input_depth});
+  RuntimeShape output_shape_inference;
   int pad_width, pad_height;
-  if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width,
+  if (!ComputeConvSizes(input_shape_inference, output_depth, filter_width,
                         filter_height, stride, dilation_width_factor,
                         dilation_height_factor, padding_type,
-                        &output_dims_inference, &pad_width, &pad_height)) {
+                        &output_shape_inference, &pad_width, &pad_height)) {
     return false;
   }
-  Dims<4> filter_dims_inference =
-      MakeDimsForInference(output_depth, filter_width, filter_height, 1);
-  Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1);
-  const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference);
-  const int filter_buffer_size =
-      RequiredBufferSizeForDims(filter_dims_inference);
+  RuntimeShape filter_shape_inference(
+      {1, filter_height, filter_width, output_depth});
+  RuntimeShape bias_shape_inference({1, 1, 1, output_depth});
+  const int input_buffer_size = input_shape_inference.FlatSize();
+  const int filter_buffer_size = filter_shape_inference.FlatSize();
   std::vector<std::uint8_t> input_data(input_buffer_size);
   std::vector<std::uint8_t> filter_data(filter_buffer_size);
   std::vector<std::int32_t> bias_data(output_depth);
   FillRandom(&input_data);
   FillRandom(&filter_data);
   FillRandom(&bias_data, -10000, 10000);
-  TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference,
-                       input_offset, filter_data.data(), filter_dims_inference,
-                       filter_offset, bias_data.data(), bias_dims_inference,
+  TestOneDepthwiseConv(ac, input_data.data(), input_shape_inference,
+                       input_offset, filter_data.data(), filter_shape_inference,
+                       filter_offset, bias_data.data(), bias_shape_inference,
                        stride, pad_width, pad_height, depth_multiplier,
                        output_offset, output_multiplier, output_activation_min,
-                       output_activation_max, output_dims_inference);
+                       output_activation_max, output_shape_inference);
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.cc b/tensorflow/contrib/lite/kernels/internal/test_util.cc
index 5ae4b193d0..75d568ae3a 100644
--- a/tensorflow/contrib/lite/kernels/internal/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/internal/test_util.cc
@@ -19,36 +19,15 @@ limitations under the License.
 
 namespace tflite {
 
-Dims<4> MakeDimsForInference(int depth, int width, int height, int batch) {
-  Dims<4> result;
-  int cum_prod = 1;
-
-  result.sizes[0] = depth;
-  result.strides[0] = cum_prod;
-  cum_prod *= result.sizes[0];
-
-  result.sizes[1] = width;
-  result.strides[1] = cum_prod;
-  cum_prod *= result.sizes[1];
-
-  result.sizes[2] = height;
-  result.strides[2] = cum_prod;
-  cum_prod *= result.sizes[2];
-
-  result.sizes[3] = batch;
-  result.strides[3] = cum_prod;
-
-  return result;
-}
-
 // this is a copied from an internal function in propagate_fixed_sizes.cc
-bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
-                      int filter_height, int stride, int dilation_width_factor,
-                      int dilation_height_factor, PaddingType padding_type,
-                      Dims<4>* output_dims, int* pad_width, int* pad_height) {
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_height = ArraySize(input_dims, 2);
-  const int batch = ArraySize(input_dims, 3);
+bool ComputeConvSizes(const RuntimeShape& input_shape, int output_depth,
+                      int filter_width, int filter_height, int stride,
+                      int dilation_width_factor, int dilation_height_factor,
+                      PaddingType padding_type, RuntimeShape* output_shape,
+                      int* pad_width, int* pad_height) {
+  const int input_width = input_shape.Dims(2);
+  const int input_height = input_shape.Dims(1);
+  const int batch = input_shape.Dims(0);
 
   int dilated_filter_width = dilation_width_factor * (filter_width - 1) + 1;
   int dilated_filter_height = dilation_height_factor * (filter_height - 1) + 1;
@@ -76,8 +55,7 @@ bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
       0,
       ((output_width - 1) * stride + dilated_filter_width - input_width) / 2);
 
-  *output_dims =
-      MakeDimsForInference(output_depth, output_width, output_height, batch);
+  output_shape->BuildFrom({batch, output_height, output_width, output_depth});
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.h b/tensorflow/contrib/lite/kernels/internal/test_util.h
index cb6d8b147c..e4a383bedf 100644
--- a/tensorflow/contrib/lite/kernels/internal/test_util.h
+++ b/tensorflow/contrib/lite/kernels/internal/test_util.h
@@ -26,14 +26,12 @@ limitations under the License.
 
 namespace tflite {
 
-// Creates a Dims struct from a set of dimensions.
-Dims<4> MakeDimsForInference(int depth, int width, int height, int batch);
-
 // Computes output and padding dimensions.
-bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
-                      int filter_height, int stride, int dilation_width_factor,
-                      int dilation_height_factor, PaddingType padding_type,
-                      Dims<4>* output_dims, int* pad_width, int* pad_height);
+bool ComputeConvSizes(const RuntimeShape& input_shape, int output_depth,
+                      int filter_width, int filter_height, int stride,
+                      int dilation_width_factor, int dilation_height_factor,
+                      PaddingType padding_type, RuntimeShape* output_shape,
+                      int* pad_width, int* pad_height);
 
 // Returns a mt19937 random engine.
 std::mt19937& RandomEngine();
-- 
GitLab


From c12a90e45c5f94b80289f4278f81be4a0348fa19 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Tue, 25 Sep 2018 13:51:36 +0800
Subject: [PATCH 0653/1357] fix pylint

---
 tensorflow/python/ops/variables.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 2d6a767fed..d058478d58 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2402,7 +2402,8 @@ class PartitionedVariable(object):
     if len(partition_axes) > 1:
       raise NotImplementedError(
           "Cannot concatenate along more than one dimension: %s.  "
-          "Multi-axis partition assign_fn is not supported" % str(partition_axes))
+          "Multi-axis partition assign_fn is not supported "
+          % str(partition_axes))
     partition_ix = partition_axes[0]
     size_splits_list = [
         var.shape[partition_ix].value for var in self._variable_list]
-- 
GitLab


From 3d60d636de59449a8448cbcbcd71af82e2871538 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Tue, 25 Sep 2018 13:53:36 +0800
Subject: [PATCH 0654/1357] fix back variabe name

---
 tensorflow/python/kernel_tests/variables_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 687784c8b7..0b101529fe 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -673,7 +673,7 @@ class PartitionedVariableTest(test.TestCase):
         v0._set_save_slice_info(
             variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
         v1._set_save_slice_info(
-            variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1]))
+            variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
         partitions = [2]
 
         variables.PartitionedVariable(
-- 
GitLab


From 07b3f4618aa0e0d7152feeb1da51a4e3e8696770 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Mon, 24 Sep 2018 22:57:32 -0700
Subject: [PATCH 0655/1357] Updated the nGraph version.

---
 tensorflow/workspace.bzl           | 16 ++++-----
 third_party/ngraph/ngraph.BUILD    | 43 ++++++++++++++----------
 third_party/ngraph/ngraph_tf.BUILD | 52 ++++++++++++++++--------------
 3 files changed, 62 insertions(+), 49 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b850c5a17f..5f04c45611 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -858,11 +858,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "ngraph",
         urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz",
-            "https://github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz",
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.8.0.tar.gz",
+            "https://github.com/NervanaSystems/ngraph/archive/v0.8.0.tar.gz",
         ],
-        sha256 = "34434b6d5993ac5233538c84f498840db7ac91df82e225c379ee7c8f6de644a5",
-        strip_prefix = "ngraph-0.7.0",
+        sha256 = "a8cf3ef2d0e6d31b54eb33f6a9e795f562195ce5c2a857e729ca9c35241cc45c",
+        strip_prefix = "ngraph-0.8.0",
         build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
     )
 
@@ -880,11 +880,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "ngraph_tf",
         urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz",
-            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz",
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.6.0.tar.gz",
+            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.6.0.tar.gz",
         ],
-        sha256 = "23b4566d8e40d6f1f236b0ffe3905dd964ae42ca54bacff67f24abcefd443afb",
-        strip_prefix = "ngraph-tf-0.5.0",
+        sha256 = "1f49391c02bef24872e9f85591e60e0e7eef12a337db71390444118049fe451f",
+        strip_prefix = "ngraph-tf-0.6.0",
         build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
     )
 
diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index 1fd1b8e8e0..71b2187011 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -11,41 +11,35 @@ cc_library(
 cc_library(
     name = "ngraph_cpu_backend",
     srcs = [
-        "src/ngraph/runtime/cpu/cpu_backend.cpp",
-        "src/ngraph/runtime/cpu/cpu_builder.cpp",
-        "src/ngraph/runtime/cpu/cpu_call_frame.cpp",
-        "src/ngraph/runtime/cpu/cpu_external_function.cpp",
-        "src/ngraph/runtime/cpu/cpu_kernels.cpp",
-        "src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp",
-        "src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp",
-        "src/ngraph/runtime/cpu/cpu_tensor_view.cpp",
-        "src/ngraph/runtime/cpu/cpu_tracing.cpp",
         "src/ngraph/runtime/cpu/builder/add.cpp",
         "src/ngraph/runtime/cpu/builder/allreduce.cpp",
-        "src/ngraph/runtime/cpu/builder/avg_pool.cpp",
-        "src/ngraph/runtime/cpu/builder/argmin.cpp",
         "src/ngraph/runtime/cpu/builder/argmax.cpp",
+        "src/ngraph/runtime/cpu/builder/argmin.cpp",
+        "src/ngraph/runtime/cpu/builder/avg_pool.cpp",
         "src/ngraph/runtime/cpu/builder/batch_norm.cpp",
-        "src/ngraph/runtime/cpu/builder/broadcast.cpp",
         "src/ngraph/runtime/cpu/builder/bounded_relu.cpp",
+        "src/ngraph/runtime/cpu/builder/broadcast.cpp",
         "src/ngraph/runtime/cpu/builder/concat.cpp",
         "src/ngraph/runtime/cpu/builder/convert.cpp",
         "src/ngraph/runtime/cpu/builder/convert_layout.cpp",
         "src/ngraph/runtime/cpu/builder/convolution.cpp",
         "src/ngraph/runtime/cpu/builder/dot.cpp",
         "src/ngraph/runtime/cpu/builder/function_call.cpp",
-        "src/ngraph/runtime/cpu/builder/lstm.cpp",
         "src/ngraph/runtime/cpu/builder/lrn.cpp",
+        "src/ngraph/runtime/cpu/builder/lstm.cpp",
         "src/ngraph/runtime/cpu/builder/matmul_bias.cpp",
         "src/ngraph/runtime/cpu/builder/max.cpp",
         "src/ngraph/runtime/cpu/builder/max_pool.cpp",
         "src/ngraph/runtime/cpu/builder/min.cpp",
         "src/ngraph/runtime/cpu/builder/one_hot.cpp",
-        "src/ngraph/runtime/cpu/builder/relu.cpp",
         "src/ngraph/runtime/cpu/builder/pad.cpp",
         "src/ngraph/runtime/cpu/builder/product.cpp",
+        "src/ngraph/runtime/cpu/builder/quantize.cpp",
+        "src/ngraph/runtime/cpu/builder/quantized_avg_pool.cpp",
+        "src/ngraph/runtime/cpu/builder/quantized_max_pool.cpp",
         "src/ngraph/runtime/cpu/builder/reduce_function.cpp",
         "src/ngraph/runtime/cpu/builder/reduce_function_window.cpp",
+        "src/ngraph/runtime/cpu/builder/relu.cpp",
         "src/ngraph/runtime/cpu/builder/replace_slice.cpp",
         "src/ngraph/runtime/cpu/builder/reshape.cpp",
         "src/ngraph/runtime/cpu/builder/reverse.cpp",
@@ -57,6 +51,16 @@ cc_library(
         "src/ngraph/runtime/cpu/builder/slice.cpp",
         "src/ngraph/runtime/cpu/builder/softmax.cpp",
         "src/ngraph/runtime/cpu/builder/sum.cpp",
+        "src/ngraph/runtime/cpu/builder/topk.cpp",
+        "src/ngraph/runtime/cpu/cpu_backend.cpp",
+        "src/ngraph/runtime/cpu/cpu_builder.cpp",
+        "src/ngraph/runtime/cpu/cpu_call_frame.cpp",
+        "src/ngraph/runtime/cpu/cpu_external_function.cpp",
+        "src/ngraph/runtime/cpu/cpu_kernels.cpp",
+        "src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp",
+        "src/ngraph/runtime/cpu/cpu_tensor_view.cpp",
+        "src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp",
+        "src/ngraph/runtime/cpu/cpu_tracing.cpp",
         "src/ngraph/runtime/cpu/kernel/eigen_thread_pool.cpp",
         "src/ngraph/runtime/cpu/kernel/pad.cpp",
         "src/ngraph/runtime/cpu/kernel/reduce_max.cpp",
@@ -68,14 +72,19 @@ cc_library(
         "src/ngraph/runtime/cpu/op/batch_dot.cpp",
         "src/ngraph/runtime/cpu/op/batch_norm_relu.cpp",
         "src/ngraph/runtime/cpu/op/bounded_relu.cpp",
-        "src/ngraph/runtime/cpu/op/group_conv.cpp",
+        "src/ngraph/runtime/cpu/op/conv_add.cpp",
         "src/ngraph/runtime/cpu/op/conv_bias.cpp",
         "src/ngraph/runtime/cpu/op/conv_relu.cpp",
         "src/ngraph/runtime/cpu/op/convert_layout.cpp",
+        "src/ngraph/runtime/cpu/op/dequantize.cpp",
+        "src/ngraph/runtime/cpu/op/group_conv.cpp",
         "src/ngraph/runtime/cpu/op/loop_kernel.cpp",
         "src/ngraph/runtime/cpu/op/lstm.cpp",
         "src/ngraph/runtime/cpu/op/matmul_bias.cpp",
         "src/ngraph/runtime/cpu/op/max_pool_with_indices.cpp",
+        "src/ngraph/runtime/cpu/op/quantize.cpp",
+        "src/ngraph/runtime/cpu/op/quantized_avg_pool.cpp",
+        "src/ngraph/runtime/cpu/op/quantized_max_pool.cpp",
         "src/ngraph/runtime/cpu/op/rnn.cpp",
         "src/ngraph/runtime/cpu/op/sigmoid_mul.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_assignment.cpp",
@@ -101,7 +110,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.7.0\\"',
+        '-D NGRAPH_VERSION=\\"0.8.0\\"',
         "-D NGRAPH_DEX_ONLY",
     ],
     visibility = ["//visibility:public"],
@@ -135,7 +144,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.7.0\\"',
+        '-D NGRAPH_VERSION=\\"0.8.0\\"',
     ],
     visibility = ["//visibility:public"],
     alwayslink = 1,
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index 979318d7c2..baf235d48a 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -10,41 +10,43 @@ load(
 cc_library(
     name = "ngraph_tf",
     srcs = [
-        "src/ngraph_api.h",
         "src/ngraph_api.cc",
-        "src/ngraph_assign_clusters.h",
+        "src/ngraph_api.h",
         "src/ngraph_assign_clusters.cc",
-        "src/ngraph_builder.h",
+        "src/ngraph_assign_clusters.h",
         "src/ngraph_builder.cc",
-        "src/ngraph_capture_variables.h",
+        "src/ngraph_builder.h",
         "src/ngraph_capture_variables.cc",
-        "src/ngraph_conversions.h",
-        "src/ngraph_cluster_manager.h",
+        "src/ngraph_capture_variables.h",
         "src/ngraph_cluster_manager.cc",
-        "src/ngraph_deassign_clusters.h",
+        "src/ngraph_cluster_manager.h",
+        "src/ngraph_conversions.h",
         "src/ngraph_deassign_clusters.cc",
-        "src/ngraph_encapsulate_op.cc",
-        "src/ngraph_encapsulate_clusters.h",
+        "src/ngraph_deassign_clusters.h",
         "src/ngraph_encapsulate_clusters.cc",
-        "src/ngraph_freshness_tracker.h",
+        "src/ngraph_encapsulate_clusters.h",
+        "src/ngraph_encapsulate_op.cc",
         "src/ngraph_freshness_tracker.cc",
-        "src/ngraph_mark_for_clustering.h",
+        "src/ngraph_freshness_tracker.h",
         "src/ngraph_mark_for_clustering.cc",
-        "src/ngraph_rewrite_pass.cc",
-        "src/ngraph_rewrite_for_tracking.h",
+        "src/ngraph_mark_for_clustering.h",
         "src/ngraph_rewrite_for_tracking.cc",
+        "src/ngraph_rewrite_for_tracking.h",
+        "src/ngraph_rewrite_pass.cc",
         "src/ngraph_tracked_variable.cc",
-        "src/ngraph_utils.h",
         "src/ngraph_utils.cc",
+        "src/ngraph_utils.h",
+        "src/ngraph_version_utils.h",
+        "src/tf_deadness_analysis.cc",
+        "src/tf_deadness_analysis.h",
         "src/tf_graphcycles.cc",
+        "src/tf_graphcycles.h",
         "logging/ngraph_log.h",
         "logging/ngraph_log.cc",
         "logging/tf_graph_writer.h",
         "logging/tf_graph_writer.cc",
     ],
-    hdrs = [
-        "src/tf_graphcycles.h",
-    ],
+    hdrs = [],
     deps = [
         "@org_tensorflow//tensorflow/core:protos_all_proto_text",
         "@org_tensorflow//tensorflow/core:framework_headers_lib",
@@ -64,17 +66,19 @@ tf_cc_test(
     name = "ngraph_tf_tests",
     size = "small",
     srcs = [
-        "test/tf_exec.cpp",
         "test/conversions.cpp",
-        "test/padding.cpp",
         "test/graph_rewrites/assign_clusters.cc",
-        "test/test_utilities.h",
-        "test/test_utilities.cpp",
+        "test/graph_rewrites/deadness_test.cc",
+        "test/main.cpp",
+        "test/opexecuter.cpp",
+        "test/opexecuter.h",
+        "test/padding.cpp",
+        "test/test_array_ops.cpp",
         "test/test_math_ops.cpp",
         "test/test_nn_ops.cpp",
-        "test/opexecuter.h",
-        "test/opexecuter.cpp",
-        "test/main.cpp",
+        "test/test_utilities.cpp",
+        "test/test_utilities.h",
+        "test/tf_exec.cpp",
     ],
     deps = [
         ":ngraph_tf",
-- 
GitLab


From ebbf6b3c79ffc0a94b13d95d24aec49fbcef6aee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 00:11:52 -0700
Subject: [PATCH 0656/1357] Use less memory by only storing pointers to ops
 that feed inplace ops. Handle empty strings in NodePositionIfSameNode.

PiperOrigin-RevId: 214393567
---
 .../grappler/optimizers/arithmetic_optimizer.cc | 17 +++++++++--------
 tensorflow/core/grappler/utils.cc               |  4 +++-
 tensorflow/core/grappler/utils_test.cc          |  4 +++-
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index ab97dcdb99..75ed12635e 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -3043,10 +3043,11 @@ void ArithmeticOptimizer::DedupComputations() {
   }
   std::set<int> duplicates;
   // Populate feed_inplace_op;
-  std::unordered_map<string, bool> feeds_inplace_op;
+  std::unordered_set<NodeDef*> feeds_inplace_op;
   for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    feeds_inplace_op[optimized_graph_->node(i).name()] =
-        FeedsInPlaceOp(graph_view, optimized_graph_->node(i));
+    if (FeedsInPlaceOp(graph_view, optimized_graph_->node(i))) {
+      feeds_inplace_op.insert(optimized_graph_->mutable_node(i));
+    }
   }
   do {
     stop = true;
@@ -3056,9 +3057,8 @@ void ArithmeticOptimizer::DedupComputations() {
         continue;
       }
       NodeDef* node = optimized_graph_->mutable_node(i);
-      const string& node_name = node->name();
-      if (node_name.empty()) continue;
-      if (feeds_inplace_op[node_name] || !CanDedup(*node)) {
+      if (!CanDedup(*node) ||
+          feeds_inplace_op.find(node) != feeds_inplace_op.end()) {
         continue;
       }
       NodeDef* rep = nodes.FindOrAddRepresentative(node);
@@ -3069,7 +3069,7 @@ void ArithmeticOptimizer::DedupComputations() {
       // races. For example: If we dedup nodes initializing two independent
       // inplace accumulations, they will write to the same buffer, clobbering
       // each other's results.
-      if (feeds_inplace_op[rep->name()]) {
+      if (feeds_inplace_op.find(rep) != feeds_inplace_op.end()) {
         continue;
       }
       VLOG(3) << "Remove duplicated node: node=" << node->name()
@@ -3078,7 +3078,8 @@ void ArithmeticOptimizer::DedupComputations() {
       for (NodeDef* fanout : fanouts) {
         for (int i = 0; i < fanout->input_size(); ++i) {
           string* fanout_input = fanout->mutable_input(i);
-          const int position = NodePositionIfSameNode(*fanout_input, node_name);
+          const int position =
+              NodePositionIfSameNode(*fanout_input, node->name());
           // Update name in-place.
           if (position < -1) {
             continue;
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 0424c9e8a4..db6e4e6852 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/utils.h"
 
+#include <iterator>
 #include <memory>
 #include <queue>
 #include <vector>
@@ -170,7 +171,8 @@ int NodePositionIfSameNode(const string& input_name, const string& node_name) {
   const bool is_ctrl = input_name[0] == '^';
   auto input_it = is_ctrl ? input_name.begin() + 1 : input_name.begin();
   auto node_it = node_name.begin();
-  if (std::distance(input_it, input_name.end()) < node_name.size()) {
+  if (node_name.empty() ||
+      std::distance(input_it, input_name.end()) < node_name.size()) {
     return -2;
   }
   while (node_it != node_name.end()) {
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index 8ff5f20c6d..6b787a6910 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -149,7 +149,9 @@ TEST_F(UtilsTest, NodePosition) {
 }
 
 TEST_F(UtilsTest, NodePositionIfSameNode) {
-  EXPECT_EQ(0, NodePositionIfSameNode("abc", "abc"));
+  EXPECT_EQ(-2, NodePositionIfSameNode(":123", ""));
+  EXPECT_EQ(-2, NodePositionIfSameNode(":", ""));
+  EXPECT_EQ(-2, NodePositionIfSameNode("", ""));
   EXPECT_EQ(123, NodePositionIfSameNode("abc:123", "abc"));
   EXPECT_EQ(-1, NodePositionIfSameNode("^abc", "abc"));
   EXPECT_EQ(-1, NodePositionIfSameNode("^abc:123", "abc"));
-- 
GitLab


From 21d4e8bb30a1753a81edd4912881d95b47ae3d1c Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Tue, 25 Sep 2018 15:50:10 +0800
Subject: [PATCH 0657/1357] remove warning lines

---
 tensorflow/python/ops/variables.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index d058478d58..69f63bc8e6 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2401,7 +2401,6 @@ class PartitionedVariable(object):
     partition_axes = self._partition_axes()
     if len(partition_axes) > 1:
       raise NotImplementedError(
-          "Cannot concatenate along more than one dimension: %s.  "
           "Multi-axis partition assign_fn is not supported "
           % str(partition_axes))
     partition_ix = partition_axes[0]
-- 
GitLab


From 037f98c596852e3bef56a8c2a6ffeac9e2e5dbbe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 01:28:12 -0700
Subject: [PATCH 0658/1357] Add fast-paths and reduce onerous precondition
 overhead in optimized mode.

PiperOrigin-RevId: 214401061
---
 tensorflow/compiler/xla/literal.h     | 16 ++++++++++------
 tensorflow/compiler/xla/shape_util.cc |  7 +++++--
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h
index 1e0a2ad0dd..3cd3541fe1 100644
--- a/tensorflow/compiler/xla/literal.h
+++ b/tensorflow/compiler/xla/literal.h
@@ -203,6 +203,10 @@ class LiteralBase {
   // Returns the count of the elements in the array at the given shape index in
   // this literal.
   int64 element_count(const ShapeIndex& index = {}) const {
+    if (index.empty()) {
+      // Common case, avoid GetSubshape().
+      return ShapeUtil::ElementsIn(shape());
+    }
     return ShapeUtil::ElementsIn(ShapeUtil::GetSubshape(shape(), index));
   }
 
@@ -852,9 +856,9 @@ class BorrowingLiteral : public LiteralBase {
 
 template <typename NativeT>
 absl::Span<const NativeT> LiteralBase::Piece::data() const {
-  CHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
-  CHECK_EQ(subshape().element_type(),
-           primitive_util::NativeToPrimitiveType<NativeT>())
+  DCHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
+  DCHECK_EQ(subshape().element_type(),
+            primitive_util::NativeToPrimitiveType<NativeT>())
       << "Attempting to access "
       << PrimitiveType_Name(primitive_util::NativeToPrimitiveType<NativeT>())
       << " type, but literal element type is "
@@ -865,9 +869,9 @@ absl::Span<const NativeT> LiteralBase::Piece::data() const {
 
 template <typename NativeT>
 absl::Span<NativeT> LiteralBase::Piece::data() {
-  CHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
-  CHECK_EQ(subshape().element_type(),
-           primitive_util::NativeToPrimitiveType<NativeT>())
+  DCHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
+  DCHECK_EQ(subshape().element_type(),
+            primitive_util::NativeToPrimitiveType<NativeT>())
       << "Attempting to access "
       << PrimitiveType_Name(primitive_util::NativeToPrimitiveType<NativeT>())
       << " type, but literal element type is "
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 96c80fd577..020c167ee9 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -422,8 +422,11 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
 }
 
 /* static */ int64 ShapeUtil::ElementsIn(const Shape& shape) {
-  CHECK(IsArray(shape)) << ShapeUtil::HumanString(shape);
-  CHECK_EQ(shape.dimensions_size(), Rank(shape));
+  DCHECK(IsArray(shape)) << ShapeUtil::HumanString(shape);
+  DCHECK_EQ(shape.dimensions_size(), Rank(shape));
+  if (shape.dimensions().size() == 1) {
+    return shape.dimensions()[0];
+  }
   return std::accumulate<decltype(shape.dimensions().begin()), int64>(
       shape.dimensions().begin(), shape.dimensions().end(), 1LL,
       std::multiplies<int64>());
-- 
GitLab


From 1cb8437a46f1da7717ebc41ee29a74c305266ec6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 02:02:09 -0700
Subject: [PATCH 0659/1357] compat: Update forward compatibility horizon to
 2018-09-25

PiperOrigin-RevId: 214404262
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 45f40cd183..74fe1fe35c 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 24)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 25)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From a9d0bf9afc323be9ca52e1a23c52c3238a9b17cf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 03:54:22 -0700
Subject: [PATCH 0660/1357] Swap Const ops back to GPU greedily.

PiperOrigin-RevId: 214415906
---
 .../optimizers/pin_to_host_optimizer.cc       | 25 ++++++++++++++-
 .../optimizers/pin_to_host_optimizer_test.cc  | 32 +++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 98c27300a9..2190d38937 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -71,6 +71,7 @@ bool AreAllNodeInputsPinnedToHost(const GraphView& graph, const NodeDef& node) {
     if (output_arg_id < 0) {
       LOG(WARNING) << "Invalid port: " << fanin.port_id << "!\n"
                    << node.DebugString() << "\n"
+                   << fanin.node->DebugString() << "\n"
                    << fanin_odef->DebugString();
       return false;
     }
@@ -158,7 +159,7 @@ string TryFindHostDevice(const gtl::FlatSet<string>& devices,
 }
 
 bool IsTPUGraphDef(const GraphDef& def) {
-  for (auto node : def.node()) {
+  for (const auto& node : def.node()) {
     if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
         node.op() == "TPUPartitionedCall") {
       return true;
@@ -197,6 +198,10 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   // Topologically sort the graph, so that we traverse the nodes in order. This
   // will help us discover producer->consumer chains of Host ops.
   TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph));
+
+  // All the Const nodes, and their original devices in topological order.
+  std::vector<std::pair<NodeDef*, string>> const_nodes;
+
   for (auto& node : *optimized_graph->mutable_node()) {
     // Check if node already on CPU.
     if (str_util::StrContains(node.device(), DEVICE_CPU)) {
@@ -230,10 +235,28 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       continue;
     }
 
+    if (IsConstant(node)) {
+      const_nodes.emplace_back(&node, node.device());
+    }
     // Try and swap the device to Host.
     node.set_device(
         internal::TryFindHostDevice(devices, has_device_cpu, node.device()));
   }
+
+  // Traverse all `const_nodes`, and map them back to GPU greedily.
+  for (auto& it : const_nodes) {
+    NodeDef* node = it.first;
+    const string& device = it.second;
+
+    // Check all the consumers of this node, if any of them are on the original
+    // device, swap this node back onto the original device.
+    for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) {
+      if (fanout.node->device() == device) {
+        node->set_device(device);
+        break;
+      }
+    }
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
index 339ddfd1b5..173cb3fe3c 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -128,6 +128,38 @@ TEST_F(PinToHostOptimizerTest, TopologicalSort) {
   EXPECT_EQ(found, 4);
 }
 
+TEST_F(PinToHostOptimizerTest, NoSwap) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  // `b` should be too big to swap, consequently `c` should not be swapped.
+  // PinToHostOptimizer should then detect that `a` should not be swapped.
+  Output a = ops::Const(s.WithOpName("a"), 1, {1, 1});
+  Output b = ops::Const(s.WithOpName("b"), 1, {1, 1024 * 1024});
+  Output c = ops::MatMul(s.WithOpName("c"), a, b);
+
+  GrapplerItem item;
+  item.fetch = {"a", "b", "c"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+
+  GraphDef output;
+  PinToHostOptimizer optimizer(RewriterConfig::ON);
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  auto tensors = EvaluateNodes(item.graph, item.fetch);
+  EXPECT_EQ(tensors_expected.size(), tensors.size());
+  for (int i = 0; i < tensors.size(); ++i) {
+    test::ExpectTensorEqual<int32>(tensors[i], tensors_expected[i]);
+  }
+
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    EXPECT_TRUE(node.device().empty());
+    ++found;
+  }
+  EXPECT_EQ(found, 3);
+}
+
 TEST_F(PinToHostOptimizerTest, PortIdToArgId) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output a = ops::Const(s.WithOpName("a"), 1, {1, 2, 3});
-- 
GitLab


From 2dff919b48799171c3a95acaea9e790cdcadb0c3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 07:03:40 -0700
Subject: [PATCH 0661/1357] Update kernel evals to use new kernel signatures.

PiperOrigin-RevId: 214432840
---
 tensorflow/contrib/lite/kernels/lstm.cc | 48 ++++++++++++++-----------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc
index aaa3ce966e..5b996d00bc 100644
--- a/tensorflow/contrib/lite/kernels/lstm.cc
+++ b/tensorflow/contrib/lite/kernels/lstm.cc
@@ -893,18 +893,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       activation_out->type == kTfLiteFloat32 &&
       concat_temp->type == kTfLiteFloat32 &&
       activation_temp->type == kTfLiteFloat32) {
+    tflite::LstmCellParams op_params;
+    // Float LSTM cell does not need parameters to be set: leave untouched.
     optimized_ops::LstmCell(
+        op_params,
         // Inputs.
-        GetTensorData<float>(input), GetTensorDims(input),
-        GetTensorData<float>(prev_activation), GetTensorDims(prev_activation),
-        GetTensorData<float>(weights), GetTensorDims(weights),
-        GetTensorData<float>(bias), GetTensorDims(bias),
-        GetTensorData<float>(prev_state), GetTensorDims(prev_state),
+        GetTensorShape(input), GetTensorData<float>(input),
+        GetTensorShape(prev_activation), GetTensorData<float>(prev_activation),
+        GetTensorShape(weights), GetTensorData<float>(weights),
+        GetTensorShape(bias), GetTensorData<float>(bias),
+        GetTensorShape(prev_state), GetTensorData<float>(prev_state),
         // Outputs.
-        GetTensorData<float>(state_out), GetTensorDims(state_out),
-        GetTensorData<float>(activation_out), GetTensorDims(activation_out),
-        GetTensorData<float>(concat_temp), GetTensorDims(concat_temp),
-        GetTensorData<float>(activation_temp), GetTensorDims(activation_temp));
+        GetTensorShape(state_out), GetTensorData<float>(state_out),
+        GetTensorShape(activation_out), GetTensorData<float>(activation_out),
+        GetTensorShape(concat_temp), GetTensorData<float>(concat_temp),
+        GetTensorShape(activation_temp), GetTensorData<float>(activation_temp));
   } else if (input->type == kTfLiteUInt8 &&
              prev_activation->type == kTfLiteUInt8 &&
              weights->type == kTfLiteUInt8 && bias->type == kTfLiteInt32 &&
@@ -934,20 +937,25 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     int accum_shift;
     tflite::QuantizeMultiplier(real_accum_multiplier, &accum_multiplier,
                                &accum_shift);
+    tflite::LstmCellParams op_params;
+    op_params.weights_zero_point = weights->params.zero_point;
+    op_params.accum_multiplier = accum_multiplier;
+    op_params.accum_shift = accum_shift;
     optimized_ops::LstmCell<4>(
+        op_params,
         // Inputs.
-        GetTensorData<uint8_t>(input), GetTensorDims(input),
-        GetTensorData<uint8_t>(prev_activation), GetTensorDims(prev_activation),
-        GetTensorData<uint8_t>(weights), GetTensorDims(weights),
-        GetTensorData<int32_t>(bias), GetTensorDims(bias),
-        GetTensorData<int16_t>(prev_state), GetTensorDims(prev_state),
+        GetTensorShape(input), GetTensorData<uint8_t>(input),
+        GetTensorShape(prev_activation),
+        GetTensorData<uint8_t>(prev_activation), GetTensorShape(weights),
+        GetTensorData<uint8_t>(weights), GetTensorShape(bias),
+        GetTensorData<int32_t>(bias), GetTensorShape(prev_state),
+        GetTensorData<int16_t>(prev_state),
         // Outputs.
-        GetTensorData<int16_t>(state_out), GetTensorDims(state_out),
-        GetTensorData<uint8_t>(activation_out), GetTensorDims(activation_out),
-        GetTensorData<uint8_t>(concat_temp), GetTensorDims(concat_temp),
-        GetTensorData<int16_t>(activation_temp), GetTensorDims(activation_temp),
-        weights->params.zero_point, accum_multiplier, accum_shift,
-        gemm_context);
+        GetTensorShape(state_out), GetTensorData<int16_t>(state_out),
+        GetTensorShape(activation_out), GetTensorData<uint8_t>(activation_out),
+        GetTensorShape(concat_temp), GetTensorData<uint8_t>(concat_temp),
+        GetTensorShape(activation_temp),
+        GetTensorData<int16_t>(activation_temp), gemm_context);
   } else {
     context->ReportError(context,
                          "Unsupported combination of data types for LstmCell");
-- 
GitLab


From 76b384d315b56d7bab1854579ee834763a460ca7 Mon Sep 17 00:00:00 2001
From: Josh Gordon <jbgordon@google.com>
Date: Tue, 25 Sep 2018 07:51:36 -0700
Subject: [PATCH 0662/1357] Updating program in README to use eager.

PiperOrigin-RevId: 214437695
---
 README.md | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index e3092e551e..57efb876c9 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ subscribing to
 [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce).
 
 ## Installation
-*See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.*
+*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.*
 
 People who are a little more adventurous can also try our nightly binaries:
 
@@ -48,15 +48,12 @@ $ python
 ```
 ```python
 >>> import tensorflow as tf
+>>> tf.enable_eager_execution()
+>>> tf.add(1, 2)
+3
 >>> hello = tf.constant('Hello, TensorFlow!')
->>> sess = tf.Session()
->>> sess.run(hello)
+>>> hello.numpy()
 'Hello, TensorFlow!'
->>> a = tf.constant(10)
->>> b = tf.constant(32)
->>> sess.run(a + b)
-42
->>> sess.close()
 ```
 Learn more examples about how to do specific tasks in TensorFlow at the [tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/).
 
@@ -106,13 +103,13 @@ The TensorFlow project strives to abide by generally accepted best practices in
 
 
 ## For more information
+* [TensorFlow Website](https://www.tensorflow.org)
+* [TensorFlow Tutorials](https://www.tensorflow.org/tutorials/)
+* [TensorFlow Model Zoo](https://github.com/tensorflow/models)
+* [TensorFlow Twitter](https://twitter.com/tensorflow)
 * [TensorFlow Blog](https://medium.com/tensorflow)
 * [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
-* [TensorFlow Model Zoo](https://github.com/tensorflow/models)
-* [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730)
 * [TensorFlow Roadmap](https://www.tensorflow.org/community/roadmap)
-* [TensorFlow Twitter](https://twitter.com/tensorflow)
-* [TensorFlow Website](https://www.tensorflow.org)
 * [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
 * [TensorFlow YouTube Channel](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ)
 
-- 
GitLab


From 6419058dc6d065c34cccf55b11a7ba02c2ef7b06 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 25 Sep 2018 08:07:53 -0700
Subject: [PATCH 0663/1357] Fix test tag to require a sm70 GPU.

PiperOrigin-RevId: 214439785
---
 tensorflow/contrib/fused_conv/BUILD | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD
index 0f0813c07f..9725233e7f 100644
--- a/tensorflow/contrib/fused_conv/BUILD
+++ b/tensorflow/contrib/fused_conv/BUILD
@@ -111,7 +111,6 @@ tf_gen_op_wrapper_py(
 
 cuda_py_test(
     name = "fused_conv2d_bias_activation_op_test",
-    size = "large",
     srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"],
     additional_deps = [
         ":fused_conv_py",
@@ -130,14 +129,12 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     tags = [
-        "manual",
-        "requires_cudnn6",
+        "requires-gpu-sm70",
     ],
 )
 
 cuda_py_test(
     name = "fused_conv2d_bias_activation_benchmark",
-    size = "large",
     srcs = ["python/ops/fused_conv2d_bias_activation_benchmark.py"],
     additional_deps = [
         ":fused_conv_py",
@@ -155,7 +152,6 @@ cuda_py_test(
     ],
     main = "python/ops/fused_conv2d_bias_activation_benchmark.py",
     tags = [
-        "manual",
-        "requires_cudnn6",
+        "requires-gpu-sm70",
     ],
 )
-- 
GitLab


From 9f300c2712340345570cf388c1a47fd771508ed8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 08:48:32 -0700
Subject: [PATCH 0664/1357] Move from deprecated self.test_session() to
 self.cached_session().

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 214444907
---
 .../lib/learner/batch/categorical_split_handler_test.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py
index c050c2ed7f..a2f708081a 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler_test.py
@@ -170,7 +170,7 @@ class EqualitySplitHandlerTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, split_node.feature_id)
 
   def testObliviousFeatureSplitGeneration(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # The data looks like the following:
       # Example |  Gradients    | Partition | Feature ID     |
       # i0      |  (0.2, 0.12)  | 1         | 1              |
-- 
GitLab


From 8f021033d644b9538f1e551ecffb5dda4f01d084 Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Tue, 25 Sep 2018 08:55:12 -0700
Subject: [PATCH 0665/1357] Removed the hdrs list as it's not neeed

---
 third_party/ngraph/ngraph_tf.BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index baf235d48a..dbedca0a03 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -46,7 +46,6 @@ cc_library(
         "logging/tf_graph_writer.h",
         "logging/tf_graph_writer.cc",
     ],
-    hdrs = [],
     deps = [
         "@org_tensorflow//tensorflow/core:protos_all_proto_text",
         "@org_tensorflow//tensorflow/core:framework_headers_lib",
-- 
GitLab


From 588787ff7572208285cb471c76f4f8c83ad9d7ec Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 09:08:55 -0700
Subject: [PATCH 0666/1357] Use self.cached_session instead of
 self.test_session in linear_operator_circulant_test.

Also:
* Instead of overwriting self.test_session(), overwrite self._constrain_devices_and_set_default() to remap the kernel operations (this way self.cached_session(), self.test_session() and self.session() are all correct).
* Make linear_operator_test_util use self.session(graph=...) instead of self.test_session(graph=...) (semantically equivalent).
PiperOrigin-RevId: 214448118
---
 .../linalg/linear_operator_circulant_test.py  | 73 ++++++++++---------
 .../ops/linalg/linear_operator_test_util.py   | 16 ++--
 2 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py
index 7261d4bb3b..f1e151ebd8 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py
@@ -37,8 +37,10 @@ class LinearOperatorCirculantBaseTest(object):
   """Common class for circulant tests."""
 
   @contextlib.contextmanager
-  def test_session(self, *args, **kwargs):
-    with test.TestCase.test_session(self, *args, **kwargs) as sess:
+  def _constrain_devices_and_set_default(self, sess, use_gpu, force_gpu):
+    """We overwrite the FFT operation mapping for testing."""
+    with test.TestCase._constrain_devices_and_set_default(
+        self, sess, use_gpu, force_gpu) as sess:
       with spectral_ops_test_util.fft_kernel_label_map():
         yield sess
 
@@ -110,8 +112,7 @@ class LinearOperatorCirculantTestSelfAdjointOperator(
     lin_op_spectrum = spectrum
 
     if use_placeholder:
-      lin_op_spectrum = array_ops.placeholder_with_default(
-          spectrum, shape=None)
+      lin_op_spectrum = array_ops.placeholder_with_default(spectrum, shape=None)
 
     operator = linalg.LinearOperatorCirculant(
         lin_op_spectrum, is_self_adjoint=True, input_output_dtype=dtype)
@@ -121,7 +122,7 @@ class LinearOperatorCirculantTestSelfAdjointOperator(
     return operator, mat
 
   def test_simple_hermitian_spectrum_gives_operator_with_zero_imag_part(self):
-    with self.test_session():
+    with self.cached_session():
       spectrum = math_ops.cast([1., 1j, -1j], dtypes.complex64)
       operator = linalg.LinearOperatorCirculant(
           spectrum, input_output_dtype=dtypes.complex64)
@@ -171,8 +172,7 @@ class LinearOperatorCirculantTestHermitianSpectrum(
     lin_op_spectrum = spectrum
 
     if use_placeholder:
-      lin_op_spectrum = array_ops.placeholder_with_default(
-          spectrum, shape=None)
+      lin_op_spectrum = array_ops.placeholder_with_default(spectrum, shape=None)
 
     operator = linalg.LinearOperatorCirculant(
         lin_op_spectrum, input_output_dtype=dtype)
@@ -182,7 +182,7 @@ class LinearOperatorCirculantTestHermitianSpectrum(
     return operator, mat
 
   def test_simple_hermitian_spectrum_gives_operator_with_zero_imag_part(self):
-    with self.test_session():
+    with self.cached_session():
       spectrum = math_ops.cast([1., 1j, -1j], dtypes.complex64)
       operator = linalg.LinearOperatorCirculant(
           spectrum, input_output_dtype=dtypes.complex64)
@@ -217,8 +217,7 @@ class LinearOperatorCirculantTestNonHermitianSpectrum(
     lin_op_spectrum = spectrum
 
     if use_placeholder:
-      lin_op_spectrum = array_ops.placeholder_with_default(
-          spectrum, shape=None)
+      lin_op_spectrum = array_ops.placeholder_with_default(spectrum, shape=None)
 
     operator = linalg.LinearOperatorCirculant(
         lin_op_spectrum, input_output_dtype=dtype)
@@ -228,7 +227,7 @@ class LinearOperatorCirculantTestNonHermitianSpectrum(
     return operator, mat
 
   def test_simple_hermitian_spectrum_gives_operator_with_zero_imag_part(self):
-    with self.test_session():
+    with self.cached_session():
       spectrum = math_ops.cast([1., 1j, -1j], dtypes.complex64)
       operator = linalg.LinearOperatorCirculant(
           spectrum, input_output_dtype=dtypes.complex64)
@@ -238,7 +237,7 @@ class LinearOperatorCirculantTestNonHermitianSpectrum(
       np.testing.assert_allclose(0, imag_matrix.eval(), rtol=0, atol=eps * 3)
 
   def test_simple_positive_real_spectrum_gives_self_adjoint_pos_def_oper(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       spectrum = math_ops.cast([6., 4, 2], dtypes.complex64)
       operator = linalg.LinearOperatorCirculant(
           spectrum, input_output_dtype=dtypes.complex64)
@@ -250,7 +249,7 @@ class LinearOperatorCirculantTestNonHermitianSpectrum(
       operator.assert_self_adjoint().run()  # Should not fail
 
   def test_defining_operator_using_real_convolution_kernel(self):
-    with self.test_session():
+    with self.cached_session():
       convolution_kernel = [1., 2., 1.]
       spectrum = math_ops.fft(
           math_ops.cast(convolution_kernel, dtypes.complex64))
@@ -266,7 +265,7 @@ class LinearOperatorCirculantTestNonHermitianSpectrum(
       np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6)
 
   def test_hermitian_spectrum_gives_operator_with_zero_imag_part(self):
-    with self.test_session():
+    with self.cached_session():
       # Make spectrum the FFT of a real convolution kernel h.  This ensures that
       # spectrum is Hermitian.
       h = linear_operator_test_util.random_normal(shape=(3, 4))
@@ -281,7 +280,7 @@ class LinearOperatorCirculantTestNonHermitianSpectrum(
 
   def test_convolution_kernel_same_as_first_row_of_to_dense(self):
     spectrum = [[3., 2., 1.], [2., 1.5, 1.]]
-    with self.test_session():
+    with self.cached_session():
       operator = linalg.LinearOperatorCirculant(spectrum)
       h = operator.convolution_kernel()
       c = operator.to_dense()
@@ -293,27 +292,27 @@ class LinearOperatorCirculantTestNonHermitianSpectrum(
   def test_assert_non_singular_fails_for_singular_operator(self):
     spectrum = math_ops.cast([0, 4, 2j + 2], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant(spectrum)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError("Singular operator"):
         operator.assert_non_singular().run()
 
   def test_assert_non_singular_does_not_fail_for_non_singular_operator(self):
     spectrum = math_ops.cast([-3j, 4, 2j + 2], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant(spectrum)
-    with self.test_session():
+    with self.cached_session():
       operator.assert_non_singular().run()  # Should not fail
 
   def test_assert_positive_definite_fails_for_non_positive_definite(self):
     spectrum = math_ops.cast([6., 4, 2j], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant(spectrum)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError("Not positive definite"):
         operator.assert_positive_definite().run()
 
   def test_assert_positive_definite_does_not_fail_when_pos_def(self):
     spectrum = math_ops.cast([6., 4, 2j + 2], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant(spectrum)
-    with self.test_session():
+    with self.cached_session():
       operator.assert_positive_definite().run()  # Should not fail
 
   def test_real_spectrum_and_not_self_adjoint_hint_raises(self):
@@ -331,8 +330,10 @@ class LinearOperatorCirculant2DBaseTest(object):
   """Common class for 2D circulant tests."""
 
   @contextlib.contextmanager
-  def test_session(self, *args, **kwargs):
-    with test.TestCase.test_session(self, *args, **kwargs) as sess:
+  def _constrain_devices_and_set_default(self, sess, use_gpu, force_gpu):
+    """We overwrite the FFT operation mapping for testing."""
+    with test.TestCase._constrain_devices_and_set_default(
+        self, sess, use_gpu, force_gpu) as sess:
       with spectral_ops_test_util.fft_kernel_label_map():
         yield sess
 
@@ -446,8 +447,7 @@ class LinearOperatorCirculant2DTestHermitianSpectrum(
     lin_op_spectrum = spectrum
 
     if use_placeholder:
-      lin_op_spectrum = array_ops.placeholder_with_default(
-          spectrum, shape=None)
+      lin_op_spectrum = array_ops.placeholder_with_default(spectrum, shape=None)
 
     operator = linalg.LinearOperatorCirculant2D(
         lin_op_spectrum, input_output_dtype=dtype)
@@ -482,8 +482,7 @@ class LinearOperatorCirculant2DTestNonHermitianSpectrum(
     lin_op_spectrum = spectrum
 
     if use_placeholder:
-      lin_op_spectrum = array_ops.placeholder_with_default(
-          spectrum, shape=None)
+      lin_op_spectrum = array_ops.placeholder_with_default(spectrum, shape=None)
 
     operator = linalg.LinearOperatorCirculant2D(
         lin_op_spectrum, input_output_dtype=dtype)
@@ -493,7 +492,7 @@ class LinearOperatorCirculant2DTestNonHermitianSpectrum(
     return operator, mat
 
   def test_real_hermitian_spectrum_gives_real_symmetric_operator(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # This is a real and hermitian spectrum.
       spectrum = [[1., 2., 2.], [3., 4., 4.], [3., 4., 4.]]
       operator = linalg.LinearOperatorCirculant(spectrum)
@@ -510,7 +509,7 @@ class LinearOperatorCirculant2DTestNonHermitianSpectrum(
       self.assertAllClose(matrix, matrix_transpose, atol=0)
 
   def test_real_spectrum_gives_self_adjoint_operator(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # This is a real and hermitian spectrum.
       spectrum = linear_operator_test_util.random_normal(
           shape=(3, 3), dtype=dtypes.float32)
@@ -526,27 +525,27 @@ class LinearOperatorCirculant2DTestNonHermitianSpectrum(
   def test_assert_non_singular_fails_for_singular_operator(self):
     spectrum = math_ops.cast([[0, 4], [2j + 2, 3.]], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant2D(spectrum)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError("Singular operator"):
         operator.assert_non_singular().run()
 
   def test_assert_non_singular_does_not_fail_for_non_singular_operator(self):
     spectrum = math_ops.cast([[-3j, 4], [2j + 2, 3.]], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant2D(spectrum)
-    with self.test_session():
+    with self.cached_session():
       operator.assert_non_singular().run()  # Should not fail
 
   def test_assert_positive_definite_fails_for_non_positive_definite(self):
     spectrum = math_ops.cast([[6., 4], [2j, 3.]], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant2D(spectrum)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesOpError("Not positive definite"):
         operator.assert_positive_definite().run()
 
   def test_assert_positive_definite_does_not_fail_when_pos_def(self):
     spectrum = math_ops.cast([[6., 4], [2j + 2, 3.]], dtypes.complex64)
     operator = linalg.LinearOperatorCirculant2D(spectrum)
-    with self.test_session():
+    with self.cached_session():
       operator.assert_positive_definite().run()  # Should not fail
 
   def test_real_spectrum_and_not_self_adjoint_hint_raises(self):
@@ -574,13 +573,15 @@ class LinearOperatorCirculant3DTest(test.TestCase):
   """Simple test of the 3D case.  See also the 1D and 2D tests."""
 
   @contextlib.contextmanager
-  def test_session(self, *args, **kwargs):
-    with test.TestCase.test_session(self, *args, **kwargs) as sess:
+  def _constrain_devices_and_set_default(self, sess, use_gpu, force_gpu):
+    """We overwrite the FFT operation mapping for testing."""
+    with test.TestCase._constrain_devices_and_set_default(
+        self, sess, use_gpu, force_gpu) as sess:
       with spectral_ops_test_util.fft_kernel_label_map():
         yield sess
 
   def test_real_spectrum_gives_self_adjoint_operator(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # This is a real and hermitian spectrum.
       spectrum = linear_operator_test_util.random_normal(
           shape=(2, 2, 3, 5), dtype=dtypes.float32)
@@ -597,7 +598,7 @@ class LinearOperatorCirculant3DTest(test.TestCase):
       self.assertAllClose(matrix, matrix_h)
 
   def test_defining_operator_using_real_convolution_kernel(self):
-    with self.test_session():
+    with self.cached_session():
       convolution_kernel = linear_operator_test_util.random_normal(
           shape=(2, 2, 3, 5), dtype=dtypes.float32)
       # Convolution kernel is real ==> spectrum is Hermitian.
@@ -615,7 +616,7 @@ class LinearOperatorCirculant3DTest(test.TestCase):
       np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6)
 
   def test_defining_spd_operator_by_taking_real_part(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       # S is real and positive.
       s = linear_operator_test_util.random_uniform(
           shape=(10, 2, 3, 4), dtype=dtypes.float32, minval=1., maxval=2.)
diff --git a/tensorflow/python/ops/linalg/linear_operator_test_util.py b/tensorflow/python/ops/linalg/linear_operator_test_util.py
index 78c85db557..76d659f109 100644
--- a/tensorflow/python/ops/linalg/linear_operator_test_util.py
+++ b/tensorflow/python/ops/linalg/linear_operator_test_util.py
@@ -184,7 +184,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
     for use_placeholder in self._use_placeholder_options:
       for build_info in self._operator_build_infos:
         for dtype in self._dtypes_to_test:
-          with self.test_session(graph=ops.Graph()) as sess:
+          with self.session(graph=ops.Graph()) as sess:
             sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
             operator, mat = self._operator_and_matrix(
                 build_info, dtype, use_placeholder=use_placeholder)
@@ -199,7 +199,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
     for use_placeholder in self._use_placeholder_options:
       for build_info in self._operator_build_infos:
         for dtype in self._dtypes_to_test:
-          with self.test_session(graph=ops.Graph()) as sess:
+          with self.session(graph=ops.Graph()) as sess:
             sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
             operator, mat = self._operator_and_matrix(
                 build_info, dtype, use_placeholder=use_placeholder)
@@ -215,7 +215,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
     for use_placeholder in self._use_placeholder_options:
       for build_info in self._operator_build_infos:
         for dtype in self._dtypes_to_test:
-          with self.test_session(graph=ops.Graph()) as sess:
+          with self.session(graph=ops.Graph()) as sess:
             sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
             operator, mat = self._operator_and_matrix(
                 build_info, dtype, use_placeholder=use_placeholder)
@@ -240,7 +240,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
         for dtype in self._dtypes_to_test:
           for adjoint in self._adjoint_options:
             for adjoint_arg in self._adjoint_arg_options:
-              with self.test_session(graph=ops.Graph()) as sess:
+              with self.session(graph=ops.Graph()) as sess:
                 sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
                 operator, mat = self._operator_and_matrix(
                     build_info, dtype, use_placeholder=use_placeholder)
@@ -283,7 +283,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
         for dtype in self._dtypes_to_test:
           for adjoint in self._adjoint_options:
             for adjoint_arg in self._adjoint_arg_options:
-              with self.test_session(graph=ops.Graph()) as sess:
+              with self.session(graph=ops.Graph()) as sess:
                 sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
                 operator, mat = self._operator_and_matrix(
                     build_info, dtype, use_placeholder=use_placeholder)
@@ -319,7 +319,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
     for use_placeholder in self._use_placeholder_options:
       for build_info in self._operator_build_infos:
         for dtype in self._dtypes_to_test:
-          with self.test_session(graph=ops.Graph()) as sess:
+          with self.session(graph=ops.Graph()) as sess:
             sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
             operator, mat = self._operator_and_matrix(
                 build_info, dtype, use_placeholder=use_placeholder)
@@ -335,7 +335,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
     for use_placeholder in self._use_placeholder_options:
       for build_info in self._operator_build_infos:
         for dtype in self._dtypes_to_test:
-          with self.test_session(graph=ops.Graph()) as sess:
+          with self.session(graph=ops.Graph()) as sess:
             sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
             operator, mat = self._operator_and_matrix(
                 build_info, dtype, use_placeholder=use_placeholder)
@@ -353,7 +353,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):
     for use_placeholder in self._use_placeholder_options:
       for build_info in self._operator_build_infos:
         for dtype in self._dtypes_to_test:
-          with self.test_session(graph=ops.Graph()) as sess:
+          with self.session(graph=ops.Graph()) as sess:
             sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
             operator, mat = self._operator_and_matrix(
                 build_info, dtype, use_placeholder=use_placeholder)
-- 
GitLab


From 32140ae87fd86398ac4fa45cb67bd2f29a93090d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 09:12:47 -0700
Subject: [PATCH 0667/1357] Boosted trees: Adding categorical split support to
 prediction ops.

PiperOrigin-RevId: 214448656
---
 .../kernels/boosted_trees/boosted_trees.proto |  13 ++
 .../core/kernels/boosted_trees/resources.cc   |  26 +++-
 tensorflow/core/ops/boosted_trees_ops.cc      |   2 +
 .../boosted_trees/prediction_ops_test.py      | 134 ++++++++++++++++++
 4 files changed, 168 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto
index c9664f0c1c..1ab72af059 100644
--- a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto
+++ b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto
@@ -11,6 +11,7 @@ message Node {
   oneof node {
     Leaf leaf = 1;
     BucketizedSplit bucketized_split = 2;
+    CategoricalSplit categorical_split = 3;
   }
   NodeMetadata metadata = 777;
 }
@@ -57,6 +58,18 @@ message BucketizedSplit {
   int32 right_id = 4;
 }
 
+message CategoricalSplit {
+  // Categorical feature column and split describing the rule feature value ==
+  // value.
+  int32 feature_id = 1;
+  int32 value = 2;
+
+  // Node children indexing into a contiguous
+  // vector of nodes starting from the root.
+  int32 left_id = 3;
+  int32 right_id = 4;
+}
+
 // Tree describes a list of connected nodes.
 // Node 0 must be the root and can carry any payload including a leaf
 // in the case of representing the bias.
diff --git a/tensorflow/core/kernels/boosted_trees/resources.cc b/tensorflow/core/kernels/boosted_trees/resources.cc
index cc90bb2f45..2798722536 100644
--- a/tensorflow/core/kernels/boosted_trees/resources.cc
+++ b/tensorflow/core/kernels/boosted_trees/resources.cc
@@ -60,14 +60,26 @@ int32 BoostedTreesEnsembleResource::next_node(
   DCHECK_LT(tree_id, tree_ensemble_->trees_size());
   DCHECK_LT(node_id, tree_ensemble_->trees(tree_id).nodes_size());
   const auto& node = tree_ensemble_->trees(tree_id).nodes(node_id);
-  DCHECK_EQ(node.node_case(), boosted_trees::Node::kBucketizedSplit);
-  const auto& split = node.bucketized_split();
-  if (bucketized_features[split.feature_id()](index_in_batch) <=
-      split.threshold()) {
-    return split.left_id();
-  } else {
-    return split.right_id();
+
+  switch (node.node_case()) {
+    case boosted_trees::Node::kBucketizedSplit: {
+      const auto& split = node.bucketized_split();
+      return (bucketized_features[split.feature_id()](index_in_batch) <=
+              split.threshold())
+                 ? split.left_id()
+                 : split.right_id();
+    }
+    case boosted_trees::Node::kCategoricalSplit: {
+      const auto& split = node.categorical_split();
+      return (bucketized_features[split.feature_id()](index_in_batch) ==
+              split.value())
+                 ? split.left_id()
+                 : split.right_id();
+    }
+    default:
+      DCHECK(false) << "Node type " << node.node_case() << " not supported.";
   }
+  return -1;
 }
 
 float BoostedTreesEnsembleResource::node_value(const int32 tree_id,
diff --git a/tensorflow/core/ops/boosted_trees_ops.cc b/tensorflow/core/ops/boosted_trees_ops.cc
index 7c4184bff4..b8cf538554 100644
--- a/tensorflow/core/ops/boosted_trees_ops.cc
+++ b/tensorflow/core/ops/boosted_trees_ops.cc
@@ -180,6 +180,8 @@ REGISTER_OP("BoostedTreesMakeStatsSummary")
       return Status::OK();
     });
 
+// TODO(nponomareva): when/if creating the new op for unbucketized data, rename
+// bucketized_features to features.
 REGISTER_OP("BoostedTreesPredict")
     .Input("tree_ensemble_handle: resource")
     .Input("bucketized_features: num_bucketized_features * int32")
diff --git a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
index 467e33ec87..7cdc67f83f 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
@@ -445,6 +445,78 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase):
       #            change= 0.1(1.14+7.0-7.0)
       self.assertAllClose([[1], [0.114]], logits_updates)
 
+  def testCategoricalSplits(self):
+    """Tests the training prediction work for categorical splits."""
+    with self.cached_session() as session:
+      tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
+      text_format.Merge(
+          """
+        trees {
+          nodes {
+            categorical_split {
+              feature_id: 1
+              value: 2
+              left_id: 1
+              right_id: 2
+            }
+          }
+          nodes {
+            categorical_split {
+              feature_id: 0
+              value: 13
+              left_id: 3
+              right_id: 4
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 7.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 6.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+          is_finalized: true
+        }
+      """, tree_ensemble_config)
+
+      # Create existing ensemble with one root split
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto=tree_ensemble_config.SerializeToString())
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      feature_0_values = [13, 1, 3]
+      feature_1_values = [2, 2, 1]
+
+      # No previous cached values.
+      cached_tree_ids = [0, 0, 0]
+      cached_node_ids = [0, 0, 0]
+
+      # Grow tree ensemble.
+      predict_op = boosted_trees_ops.training_predict(
+          tree_ensemble_handle,
+          cached_tree_ids=cached_tree_ids,
+          cached_node_ids=cached_node_ids,
+          bucketized_features=[feature_0_values, feature_1_values],
+          logits_dimension=1)
+
+      logits_updates, new_tree_ids, new_node_ids = session.run(predict_op)
+
+      self.assertAllClose([0, 0, 0], new_tree_ids)
+      self.assertAllClose([3, 4, 2], new_node_ids)
+      self.assertAllClose([[5.], [6.], [7.]], logits_updates)
+
   def testCachedPredictionFromTheSameTreeWithPostPrunedNodes(self):
     """Tests that prediction based on previous node in the tree works."""
     with self.cached_session() as session:
@@ -924,6 +996,68 @@ class PredictionOpsTest(test_util.TensorFlowTestCase):
       logits = session.run(predict_op)
       self.assertAllClose(expected_logits, logits)
 
+  def testCategoricalSplits(self):
+    """Tests the predictions work for categorical splits."""
+    with self.cached_session() as session:
+      tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
+      text_format.Merge(
+          """
+        trees {
+          nodes {
+            categorical_split {
+              feature_id: 1
+              value: 2
+              left_id: 1
+              right_id: 2
+            }
+          }
+          nodes {
+            categorical_split {
+              feature_id: 0
+              value: 13
+              left_id: 3
+              right_id: 4
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 7.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 6.0
+            }
+          }
+        }
+        tree_weights: 1.0
+      """, tree_ensemble_config)
+
+      # Create existing ensemble with one root split
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto=tree_ensemble_config.SerializeToString())
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      feature_0_values = [13, 1, 3]
+      feature_1_values = [2, 2, 1]
+
+      expected_logits = [[5.], [6.], [7.]]
+
+      # Prediction should work fine.
+      predict_op = boosted_trees_ops.predict(
+          tree_ensemble_handle,
+          bucketized_features=[feature_0_values, feature_1_values],
+          logits_dimension=1)
+
+      logits = session.run(predict_op)
+      self.assertAllClose(expected_logits, logits)
+
 
 class FeatureContribsOpsTest(test_util.TensorFlowTestCase):
   """Tests feature contribs ops for model understanding."""
-- 
GitLab


From 7cd7a2e3877641da18182424bc7ea114fd7702ba Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 25 Sep 2018 09:13:54 -0700
Subject: [PATCH 0668/1357] Account for cases when the live value of a function
 is not hashable, in the built-in functions converter. Example: d.keys() where
 d is a dict.

PiperOrigin-RevId: 214448772
---
 .../autograph/converters/builtin_functions.py    |  9 +++++++--
 .../converters/builtin_functions_test.py         | 16 +++++++++++++---
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/autograph/converters/builtin_functions.py b/tensorflow/python/autograph/converters/builtin_functions.py
index b8b268d8ce..583c978395 100644
--- a/tensorflow/python/autograph/converters/builtin_functions.py
+++ b/tensorflow/python/autograph/converters/builtin_functions.py
@@ -48,8 +48,13 @@ class BuiltinFunctionTransformer(converter.Base):
     node = self.generic_visit(node)
     if anno.hasanno(node.func, 'live_val'):
       live_val = anno.getanno(node.func, 'live_val')
-      if live_val in py_builtins.SUPPORTED_BUILTINS:
-        node = self._convert_builtin(live_val, node.args, as_expression=True)
+      try:
+        if live_val in py_builtins.SUPPORTED_BUILTINS:
+          node = self._convert_builtin(live_val, node.args, as_expression=True)
+      except TypeError:
+        # Not everything in Python is hashable. If it isn't then it's definitely
+        # not a supported built-in.
+        return node
     return node
 
   def visit_Print(self, node):
diff --git a/tensorflow/python/autograph/converters/builtin_functions_test.py b/tensorflow/python/autograph/converters/builtin_functions_test.py
index c87c304cdb..2ed14c14e7 100644
--- a/tensorflow/python/autograph/converters/builtin_functions_test.py
+++ b/tensorflow/python/autograph/converters/builtin_functions_test.py
@@ -36,7 +36,7 @@ class BuiltinFunctionsTest(converter_testing.TestCase):
       return len(a)
 
     with self.converted(test_fn, builtin_functions, {'len': len}) as result:
-      with self.cached_session() as sess:
+      with self.test_session() as sess:
         p = array_ops.placeholder(dtype=dtypes.int32, shape=None)
         ops = result.test_fn(p)
         self.assertEqual(sess.run(ops, {p: [0, 0, 0]}), 3)
@@ -50,7 +50,7 @@ class BuiltinFunctionsTest(converter_testing.TestCase):
       return print(a)
 
     with self.converted(test_fn, builtin_functions, {'print': print}) as result:
-      with self.cached_session() as sess:
+      with self.test_session() as sess:
         with self.assertPrints('a\n'):
           sess.run(result.test_fn('a'))
 
@@ -63,12 +63,22 @@ class BuiltinFunctionsTest(converter_testing.TestCase):
       return print(a, b, c)
 
     with self.converted(test_fn, builtin_functions, {'print': print}) as result:
-      with self.cached_session() as sess:
+      with self.test_session() as sess:
         with self.assertPrints('a 1 [2, 3]\n'):
           sess.run(
               result.test_fn(
                   constant_op.constant('a'), constant_op.constant(1), [2, 3]))
 
+  def test_conversion_robust_to_unhashable_callables(self):
+
+    def test_fn():
+      return foo()  # pylint:disable=undefined-variable
+
+    with self.converted(test_fn, builtin_functions, {'foo': {
+        'a': 'b'
+    }.keys}) as result:
+      self.assertListEqual(list(result.test_fn()), ['a'])
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From c0b63bef59bd2a94de2d1925259d1499d3ad04ea Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 09:30:04 -0700
Subject: [PATCH 0669/1357] Allow empty arrays to occur as the first input to
 the concat op.

The conversion process fails for graphs that use tf.boolean_mask(..., axis=0) --
this op calls tf.concat with an empty array as the first argument.

PiperOrigin-RevId: 214451470
---
 .../propagate_fixed_sizes.cc                      | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index f943da6d85..d056a8add7 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -659,11 +659,16 @@ void ProcessConcatenationOperator(Model* model, ConcatenationOperator* op) {
     }
   }
   auto& output_array = model->GetArray(op->outputs[0]);
-  // Use 0 input as basis for output dimensions.
-  const auto& first_input_array = model->GetArray(op->inputs[0]);
-  output_array.copy_shape(first_input_array.shape());
-  // Negative axis means the count starts at the back of the dims().
-  if (op->axis < 0) op->axis += first_input_array.shape().dims().size();
+  // Use first non-empty input as basis for output dimensions.
+  for (const auto& input_name : op->inputs) {
+    const auto& input_array = model->GetArray(input_name);
+    if (input_array.shape().dimensions_count() > 0) {
+      output_array.copy_shape(input_array.shape());
+      // Negative axis means the count starts at the back of the dims().
+      if (op->axis < 0) op->axis += input_array.shape().dims().size();
+      break;
+    }
+  }
   // Determine the concat size, and enfore that all inputs have
   // the same dimensions count.
   int concat_size = 0;
-- 
GitLab


From aee2ab023837adbfc61253ffec07f8d2dcd6c2a8 Mon Sep 17 00:00:00 2001
From: Jingyue Wu <jingyue@google.com>
Date: Tue, 25 Sep 2018 09:32:30 -0700
Subject: [PATCH 0670/1357] Fix a bug in debug_stripper.

AsControlDependency accepts a node name not a tensor name.

PiperOrigin-RevId: 214451885
---
 .../grappler/optimizers/debug_stripper.cc     |  4 +--
 .../optimizers/debug_stripper_test.cc         | 29 +++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/debug_stripper.cc b/tensorflow/core/grappler/optimizers/debug_stripper.cc
index 9701a038d0..800160e649 100644
--- a/tensorflow/core/grappler/optimizers/debug_stripper.cc
+++ b/tensorflow/core/grappler/optimizers/debug_stripper.cc
@@ -38,7 +38,7 @@ Status DebugStripper::Optimize(Cluster* cluster, const GrapplerItem& item,
       // be optimized away by dependency optimizer.
       for (string& inp : *node.mutable_input()) {
         if (!IsControlInput(inp)) {
-          inp = AsControlDependency(inp);
+          inp = AsControlDependency(NodeName(inp));
         }
       }
     } else if (IsCheckNumerics(node) || IsPrint(node)) {
@@ -54,7 +54,7 @@ Status DebugStripper::Optimize(Cluster* cluster, const GrapplerItem& item,
       // input.
       for (size_t i = 1; i < node.input_size(); ++i) {
         if (!IsControlInput(node.input(i))) {
-          *node.mutable_input(i) = AsControlDependency(node.input(i));
+          *node.mutable_input(i) = AsControlDependency(NodeName(node.input(i)));
         }
       }
     }
diff --git a/tensorflow/core/grappler/optimizers/debug_stripper_test.cc b/tensorflow/core/grappler/optimizers/debug_stripper_test.cc
index 96ceee791f..affd2d51c2 100644
--- a/tensorflow/core/grappler/optimizers/debug_stripper_test.cc
+++ b/tensorflow/core/grappler/optimizers/debug_stripper_test.cc
@@ -43,6 +43,35 @@ TEST_F(DebugStripperTest, OutputEqualToInput) {
   CompareGraphs(item.graph, output);
 }
 
+TEST_F(DebugStripperTest, StripAssertOnTwoOutputs) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
+                                  ops::Placeholder::Shape({6}));
+  auto split =
+      ops::Split(s.WithOpName("split"), /*axis=*/0, input, /*num_split=*/2);
+  Output x = split[0];
+  Output y = split[1];
+  Output ge = ops::GreaterEqual(s.WithOpName("GreaterEqual"), x, y);
+  auto assert = ops::Assert(s.WithOpName("Assert"), ge, {x, y});
+  Output add = ops::Add(
+      s.WithOpName("add").WithControlDependencies({assert.operation}), x, y);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  DebugStripper optimizer;
+  GraphDef output;
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  for (const NodeDef& node : output.node()) {
+    for (const string& input : node.input()) {
+      if (IsControlInput(input)) {
+        EXPECT_EQ(input.find(':'), -1);
+      }
+    }
+  }
+}
+
 TEST_F(DebugStripperTest, StripAssertFromGraph) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
-- 
GitLab


From 954d6a0ace9b96cdd54659b99e9378a1138a7266 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 25 Sep 2018 09:32:37 -0700
Subject: [PATCH 0671/1357] Add Interpreter.Options Java API for interpreter
 configuration

PiperOrigin-RevId: 214451901
---
 .../org/tensorflow/ovic/OvicClassifier.java   |  2 +-
 .../java/org/tensorflow/lite/Interpreter.java | 93 +++++++++++++------
 .../lite/NativeInterpreterWrapper.java        | 36 ++++---
 .../org/tensorflow/lite/InterpreterTest.java  | 12 +++
 .../lite/NativeInterpreterWrapperTest.java    |  9 ++
 5 files changed, 103 insertions(+), 49 deletions(-)

diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
index 4cf51bb0fa..fd610b054f 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
@@ -74,7 +74,7 @@ public class OvicClassifier {
     }
     labelList = loadLabelList(labelInputStream);
     // OVIC uses one thread for CPU inference.
-    tflite = new Interpreter(model, 1);
+    tflite = new Interpreter(model, new Interpreter.Options().setNumThreads(1));
     inputDims = TestHelper.getInputDims(tflite, 0);
     if (inputDims.length != 4) {
       throw new RuntimeException("The model's input dimensions must be 4 (BWHC).");
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index b84720ae8e..ffb04496cb 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -17,7 +17,6 @@ package org.tensorflow.lite;
 
 import java.io.File;
 import java.nio.ByteBuffer;
-import java.nio.MappedByteBuffer;
 import java.util.HashMap;
 import java.util.Map;
 import org.checkerframework.checker.nullness.qual.NonNull;
@@ -56,16 +55,36 @@ import org.checkerframework.checker.nullness.qual.NonNull;
  */
 public final class Interpreter implements AutoCloseable {
 
+  /** An options class for controlling runtime interpreter behavior. */
+  public static class Options {
+    public Options() {}
+
+    /**
+     * Sets the number of threads to be used for ops that support multi-threading. Defaults to a
+     * platform-dependent value.
+     */
+    public Options setNumThreads(int numThreads) {
+      this.numThreads = numThreads;
+      return this;
+    }
+
+    /** Sets whether to use NN API (if available) for op execution. Defaults to false (disabled). */
+    public Options setUseNNAPI(boolean useNNAPI) {
+      this.useNNAPI = useNNAPI;
+      return this;
+    }
+
+    int numThreads = -1;
+    boolean useNNAPI = false;
+  }
+
   /**
    * Initializes a {@code Interpreter}
    *
    * @param modelFile: a File of a pre-trained TF Lite model.
    */
   public Interpreter(@NonNull File modelFile) {
-    if (modelFile == null) {
-      return;
-    }
-    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath());
+    this(modelFile, /*options = */ null);
   }
 
   /**
@@ -73,12 +92,22 @@ public final class Interpreter implements AutoCloseable {
    *
    * @param modelFile: a file of a pre-trained TF Lite model
    * @param numThreads: number of threads to use for inference
+   * @deprecated Prefer using the {@link #Interpreter(File,Options)} constructor. This method will
+   *     be removed in a future release.
    */
+  @Deprecated
   public Interpreter(@NonNull File modelFile, int numThreads) {
-    if (modelFile == null) {
-      return;
-    }
-    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath(), numThreads);
+    this(modelFile, new Options().setNumThreads(numThreads));
+  }
+
+  /**
+   * Initializes a {@code Interpreter} and specifies the number of threads used for inference.
+   *
+   * @param modelFile: a file of a pre-trained TF Lite model
+   * @param options: a set of options for customizing interpreter behavior
+   */
+  public Interpreter(@NonNull File modelFile, Options options) {
+    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath(), options);
   }
 
   /**
@@ -89,7 +118,7 @@ public final class Interpreter implements AutoCloseable {
    * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
    */
   public Interpreter(@NonNull ByteBuffer byteBuffer) {
-    wrapper = new NativeInterpreterWrapper(byteBuffer);
+    this(byteBuffer, /* options= */ null);
   }
 
   /**
@@ -99,30 +128,25 @@ public final class Interpreter implements AutoCloseable {
    * <p>The ByteBuffer should not be modified after the construction of a {@code Interpreter}. The
    * {@code ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a
    * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
-   */
-  public Interpreter(@NonNull ByteBuffer byteBuffer, int numThreads) {
-    wrapper = new NativeInterpreterWrapper(byteBuffer, numThreads);
-  }
-
-  /**
-   * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file.
    *
-   * <p>The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code
-   * Interpreter}.
+   * @deprecated Prefer using the {@link #Interpreter(ByteBuffer,Options)} constructor. This method
+   *     will be removed in a future release.
    */
-  public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer) {
-    wrapper = new NativeInterpreterWrapper(mappedByteBuffer);
+  @Deprecated
+  public Interpreter(@NonNull ByteBuffer byteBuffer, int numThreads) {
+    this(byteBuffer, new Options().setNumThreads(numThreads));
   }
 
   /**
-   * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file and
-   * specifies the number of threads used for inference.
+   * Initializes a {@code Interpreter} with a {@code ByteBuffer} of a model file and a set of custom
+   * {@link #Options}.
    *
-   * <p>The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code
-   * Interpreter}.
+   * <p>The ByteBuffer should not be modified after the construction of a {@code Interpreter}. The
+   * {@code ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a
+   * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
    */
-  public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer, int numThreads) {
-    wrapper = new NativeInterpreterWrapper(mappedByteBuffer, numThreads);
+  public Interpreter(@NonNull ByteBuffer byteBuffer, Options options) {
+    wrapper = new NativeInterpreterWrapper(byteBuffer, options);
   }
 
   /**
@@ -240,12 +264,25 @@ public final class Interpreter implements AutoCloseable {
     return wrapper.getLastNativeInferenceDurationNanoseconds();
   }
 
-  /** Turns on/off Android NNAPI for hardware acceleration when it is available. */
+  /**
+   * Turns on/off Android NNAPI for hardware acceleration when it is available.
+   *
+   * @deprecated Prefer using {@link Options#setUseNNAPI(boolean)} directly for enabling NN API.
+   *     This method will be removed in a future release.
+   */
+  @Deprecated
   public void setUseNNAPI(boolean useNNAPI) {
     checkNotClosed();
     wrapper.setUseNNAPI(useNNAPI);
   }
 
+  /**
+   * Sets the number of threads to be used for ops that support multi-threading.
+   *
+   * @deprecated Prefer using {@link Options#setNumThreads(int)} directly for controlling thread
+   *     multi-threading. This method will be removed in a future release.
+   */
+  @Deprecated
   public void setNumThreads(int numThreads) {
     checkNotClosed();
     wrapper.setNumThreads(numThreads);
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index fa25082304..6feff9a618 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -23,7 +23,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 /**
- * A wrapper wraps native interpreter and controls model execution.
+ * An internal wrapper that wraps native interpreter and controls model execution.
  *
  * <p><b>WARNING:</b> Resources consumed by the {@code NativeInterpreterWrapper} object must be
  * explicitly freed by invoking the {@link #close()} method when the {@code
@@ -32,36 +32,29 @@ import java.util.Map;
 final class NativeInterpreterWrapper implements AutoCloseable {
 
   NativeInterpreterWrapper(String modelPath) {
-    this(modelPath, /* numThreads= */ -1);
+    this(modelPath, /* options= */ null);
   }
 
-  NativeInterpreterWrapper(String modelPath, int numThreads) {
+  NativeInterpreterWrapper(String modelPath, Interpreter.Options options) {
+    if (options == null) {
+      options = new Interpreter.Options();
+    }
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModel(modelPath, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle, options.numThreads);
     isMemoryAllocated = true;
     inputTensors = new Tensor[getInputCount(interpreterHandle)];
     outputTensors = new Tensor[getOutputCount(interpreterHandle)];
   }
 
-  /**
-   * Initializes a {@code NativeInterpreterWrapper} with a {@code ByteBuffer}. The ByteBuffer should
-   * not be modified after the construction of a {@code NativeInterpreterWrapper}. The {@code
-   * ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a direct
-   * {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
-   */
   NativeInterpreterWrapper(ByteBuffer byteBuffer) {
-    this(byteBuffer, /* numThreads= */ -1);
+    this(byteBuffer, /* options= */ null);
   }
 
-  /**
-   * Initializes a {@code NativeInterpreterWrapper} with a {@code ByteBuffer} and specifies the
-   * number of inference threads. The ByteBuffer should not be modified after the construction of a
-   * {@code NativeInterpreterWrapper}. The {@code ByteBuffer} can be either a {@code
-   * MappedByteBuffer} that memory-maps a model file, or a direct {@code ByteBuffer} of
-   * nativeOrder() that contains the bytes content of a model.
-   */
-  NativeInterpreterWrapper(ByteBuffer buffer, int numThreads) {
+  NativeInterpreterWrapper(ByteBuffer buffer, Interpreter.Options options) {
+    if (options == null) {
+      options = new Interpreter.Options();
+    }
     if (buffer == null
         || (!(buffer instanceof MappedByteBuffer)
             && (!buffer.isDirect() || buffer.order() != ByteOrder.nativeOrder()))) {
@@ -72,10 +65,13 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     modelByteBuffer = buffer;
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle, options.numThreads);
     isMemoryAllocated = true;
     inputTensors = new Tensor[getInputCount(interpreterHandle)];
     outputTensors = new Tensor[getOutputCount(interpreterHandle)];
+    if (options.useNNAPI) {
+      setUseNNAPI(options.useNNAPI);
+    }
   }
 
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index 9070b788b6..fefaa88911 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -54,6 +54,18 @@ public final class InterpreterTest {
     interpreter.close();
   }
 
+  @Test
+  public void testInterpreterWithOptions() throws Exception {
+    Interpreter interpreter =
+        new Interpreter(MODEL_FILE, new Interpreter.Options().setNumThreads(2).setUseNNAPI(true));
+    assertThat(interpreter).isNotNull();
+    assertThat(interpreter.getInputTensorCount()).isEqualTo(1);
+    assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+    assertThat(interpreter.getOutputTensorCount()).isEqualTo(1);
+    assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+    interpreter.close();
+  }
+
   @Test
   public void testRunWithMappedByteBufferModel() throws Exception {
     Path path = MODEL_FILE.toPath();
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
index 9c4a5acd79..270bd6703a 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
@@ -62,6 +62,15 @@ public final class NativeInterpreterWrapperTest {
     wrapper.close();
   }
 
+  @Test
+  public void testConstructorWithOptions() {
+    NativeInterpreterWrapper wrapper =
+        new NativeInterpreterWrapper(
+            FLOAT_MODEL_PATH, new Interpreter.Options().setNumThreads(2).setUseNNAPI(true));
+    assertThat(wrapper).isNotNull();
+    wrapper.close();
+  }
+
   @Test
   public void testConstructorWithInvalidModel() {
     try {
-- 
GitLab


From faee2023f9764de44a804c3208be6f68dac04917 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Tue, 25 Sep 2018 10:14:53 -0700
Subject: [PATCH 0672/1357] [XLA] Make HloComputation::instruction_count()
 constant-time.

* Use a FlatMap for instruction_iterators_, and actually remove elements from it (which is cheap for a FlatMap).
* Use the size of the map (which is O(1)) rather than the size of the list (which is O(n)) for instruction_count().

PiperOrigin-RevId: 214459259
---
 tensorflow/compiler/xla/service/hlo_computation.cc | 9 +++++----
 tensorflow/compiler/xla/service/hlo_computation.h  | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index e9e70b2c57..0e5920af7a 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -272,10 +272,11 @@ Status HloComputation::RemoveInstruction(HloInstruction* instruction) {
       << "instruction " << instruction->name()
       << " has control successors and cannot be removed";
 
-  TF_RET_CHECK(instruction_iterators_.count(instruction) != 0);
-  auto inst_it = instruction_iterators_.at(instruction);
-  (*inst_it)->set_parent(nullptr);
-  instructions_.erase(inst_it);
+  auto inst_it = instruction_iterators_.find(instruction);
+  TF_RET_CHECK(inst_it != instruction_iterators_.end());
+  (*inst_it->second)->set_parent(nullptr);
+  instructions_.erase(inst_it->second);
+  instruction_iterators_.erase(inst_it);
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index e7c98aae23..936a53bd7e 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -227,7 +227,7 @@ class HloComputation {
   void UpdateReachabilityThroughInstruction(
       const HloInstruction* instruction, HloReachabilityMap* reachability_map);
 
-  int64 instruction_count() const { return instructions_.size(); }
+  int64 instruction_count() const { return instruction_iterators_.size(); }
 
   // Creates and returns a list of the embedded computations called by this
   // computation. This includes all embedded computations called directly or
@@ -439,7 +439,7 @@ class HloComputation {
   // instruction pointer to location in the list for fast lookup.
   using InstructionList = std::list<std::unique_ptr<HloInstruction>>;
   InstructionList instructions_;
-  std::unordered_map<const HloInstruction*, InstructionList::iterator>
+  tensorflow::gtl::FlatMap<const HloInstruction*, InstructionList::iterator>
       instruction_iterators_;
 
   std::vector<HloInstruction*> param_instructions_;
-- 
GitLab


From 83763d0be3c664f84a776a8c69d49846fbfd1b9e Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 25 Sep 2018 10:27:46 -0700
Subject: [PATCH 0673/1357] Flesh out TFLite Android sample docs

PiperOrigin-RevId: 214461578
---
 .../lite/examples/android/app/README.md       | 37 ++++++++++++++++++-
 tensorflow/contrib/lite/java/demo/README.md   |  4 +-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/examples/android/app/README.md b/tensorflow/contrib/lite/examples/android/app/README.md
index dc31171672..7347147f99 100644
--- a/tensorflow/contrib/lite/examples/android/app/README.md
+++ b/tensorflow/contrib/lite/examples/android/app/README.md
@@ -1,8 +1,43 @@
 # TF Lite Android App Example
 
+A simple Android example that demonstrates image classification and object
+detection using the camera, as well as speech recognition using the microphone.
+
+## Building in Android Studio with TensorFlow Lite AAR from JCenter.
+The build.gradle is configured to use TensorFlow Lite's nightly build.
+
+If you see a build error related to compatibility with Tensorflow Lite's Java
+API (example: method X is undefined for type Interpreter), there has likely been
+a backwards compatible change to the API. You will need to pull new app code
+that's compatible with the nightly build and may need to first wait a few days
+for our external and internal code to merge.
+
 ## Building from Source with Bazel
 
-1. Install [Bazel](https://docs.bazel.build/versions/master/install.html), the Android NDK and SDK. The recommended versions are specified on this [webpage](https://www.tensorflow.org/lite/demo_android).
+1. Follow the [Bazel steps for the TF Demo App](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#bazel):
+
+  1. [Install Bazel and Android Prerequisites](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install-bazel-and-android-prerequisites).
+     It's easiest with Android Studio.
+
+      - You'll need at least SDK version 23.
+      - Make sure to install the latest version of Bazel. Some distributions
+        ship with Bazel 0.5.4, which is too old.
+      - Bazel requires Android Build Tools `26.0.1` or higher.
+      - You also need to install the Android Support Repository, available
+        through Android Studio under `Android SDK Manager -> SDK Tools ->
+        Android Support Repository`.
+
+  2. [Edit your `WORKSPACE`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#edit-workspace)
+     to add SDK and NDK targets.
+
+     NOTE: As long as you have the SDK and NDK installed, the `./configure`
+     script will create these rules for you. Answer "Yes" when the script asks
+     to automatically configure the `./WORKSPACE`.
+
+      - Make sure the `api_level` in `WORKSPACE` is set to an SDK version that
+        you have installed.
+      - By default, Android Studio will install the SDK to `~/Android/Sdk` and
+        the NDK to `~/Android/Sdk/ndk-bundle`.
 
 2. Build this demo app with Bazel. The demo needs C++11. We configure the fat_apk_cpu flag to package support for 4 hardware variants. You may replace it with --config=android_arm64 on a 64-bit device and --config=android_arm for 32-bit device:
 
diff --git a/tensorflow/contrib/lite/java/demo/README.md b/tensorflow/contrib/lite/java/demo/README.md
index 6a3f0651d0..c04b2a6194 100644
--- a/tensorflow/contrib/lite/java/demo/README.md
+++ b/tensorflow/contrib/lite/java/demo/README.md
@@ -1,4 +1,6 @@
-# TF Lite Android App
+# TF Lite Android Image Classifier App Example
+
+A simple Android example that demonstrates image classification using the camera.
 
 ## Building in Android Studio with TensorFlow Lite AAR from JCenter.
 The build.gradle is configured to use TensorFlow Lite's nightly build.
-- 
GitLab


From 410905d8e8af12e928031aa026683e43b665c8ae Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 25 Sep 2018 10:30:48 -0700
Subject: [PATCH 0674/1357] Keep only weak references to TensorFlow Optimizer
 objects in tf.keras

I don't think this annoyed anyone else yet, it's just a nit I noticed while making sure variables can be garbage collected when tracked via tf.keras.

PiperOrigin-RevId: 214462105
---
 tensorflow/python/keras/backend.py         |  6 ++----
 tensorflow/python/keras/optimizers_test.py | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index a46f9edb1e..4589c821e5 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -695,10 +695,8 @@ def track_tf_optimizer(tf_optimizer):
   if context.executing_eagerly():
     return
   graph = ops.get_default_graph()
-  if graph not in _GRAPH_TF_OPTIMIZERS:
-    _GRAPH_TF_OPTIMIZERS[graph] = set()
-  _GRAPH_TF_OPTIMIZERS[graph].add(tf_optimizer)
-
+  optimizers = _GRAPH_TF_OPTIMIZERS.setdefault(graph, weakref.WeakSet())
+  optimizers.add(tf_optimizer)
 
 def track_variable(v):
   """Tracks the given variable for initialization."""
diff --git a/tensorflow/python/keras/optimizers_test.py b/tensorflow/python/keras/optimizers_test.py
index 8d7493462e..9664f09fff 100644
--- a/tensorflow/python/keras/optimizers_test.py
+++ b/tensorflow/python/keras/optimizers_test.py
@@ -18,10 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import gc
+import weakref
+
 import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.eager import context
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
@@ -156,6 +160,19 @@ class KerasOptimizersTest(test.TestCase):
     with self.assertRaises(NotImplementedError):
       optimizer.from_config(None)
 
+  def test_optimizer_garbage_collection(self):
+    graph = ops.Graph()
+    with graph.as_default():
+      optimizer = keras.optimizers.TFOptimizer(AdamOptimizer(0.01))
+      keras.backend.track_tf_optimizer(optimizer)
+      optimizer_weak = weakref.ref(optimizer)
+    graph_weak = weakref.ref(graph)
+    del graph, optimizer
+    gc.collect()
+    # Check that the weak references are dead now.
+    self.assertIs(graph_weak(), None)
+    self.assertIs(optimizer_weak(), None)
+
   @test_util.run_in_graph_and_eager_modes
   def test_tfoptimizer_iterations(self):
     with self.cached_session():
-- 
GitLab


From 3faf5e08bbc76b7237994af45510ca74fb0503d3 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 25 Sep 2018 10:37:14 -0700
Subject: [PATCH 0675/1357] Automated rollback of commit
 954d6a0ace9b96cdd54659b99e9378a1138a7266

PiperOrigin-RevId: 214463446
---
 .../org/tensorflow/ovic/OvicClassifier.java   |  2 +-
 .../java/org/tensorflow/lite/Interpreter.java | 93 ++++++-------------
 .../lite/NativeInterpreterWrapper.java        | 36 +++----
 .../org/tensorflow/lite/InterpreterTest.java  | 12 ---
 .../lite/NativeInterpreterWrapperTest.java    |  9 --
 5 files changed, 49 insertions(+), 103 deletions(-)

diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
index fd610b054f..4cf51bb0fa 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
@@ -74,7 +74,7 @@ public class OvicClassifier {
     }
     labelList = loadLabelList(labelInputStream);
     // OVIC uses one thread for CPU inference.
-    tflite = new Interpreter(model, new Interpreter.Options().setNumThreads(1));
+    tflite = new Interpreter(model, 1);
     inputDims = TestHelper.getInputDims(tflite, 0);
     if (inputDims.length != 4) {
       throw new RuntimeException("The model's input dimensions must be 4 (BWHC).");
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index ffb04496cb..b84720ae8e 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -17,6 +17,7 @@ package org.tensorflow.lite;
 
 import java.io.File;
 import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
 import java.util.HashMap;
 import java.util.Map;
 import org.checkerframework.checker.nullness.qual.NonNull;
@@ -55,36 +56,16 @@ import org.checkerframework.checker.nullness.qual.NonNull;
  */
 public final class Interpreter implements AutoCloseable {
 
-  /** An options class for controlling runtime interpreter behavior. */
-  public static class Options {
-    public Options() {}
-
-    /**
-     * Sets the number of threads to be used for ops that support multi-threading. Defaults to a
-     * platform-dependent value.
-     */
-    public Options setNumThreads(int numThreads) {
-      this.numThreads = numThreads;
-      return this;
-    }
-
-    /** Sets whether to use NN API (if available) for op execution. Defaults to false (disabled). */
-    public Options setUseNNAPI(boolean useNNAPI) {
-      this.useNNAPI = useNNAPI;
-      return this;
-    }
-
-    int numThreads = -1;
-    boolean useNNAPI = false;
-  }
-
   /**
    * Initializes a {@code Interpreter}
    *
    * @param modelFile: a File of a pre-trained TF Lite model.
    */
   public Interpreter(@NonNull File modelFile) {
-    this(modelFile, /*options = */ null);
+    if (modelFile == null) {
+      return;
+    }
+    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath());
   }
 
   /**
@@ -92,22 +73,12 @@ public final class Interpreter implements AutoCloseable {
    *
    * @param modelFile: a file of a pre-trained TF Lite model
    * @param numThreads: number of threads to use for inference
-   * @deprecated Prefer using the {@link #Interpreter(File,Options)} constructor. This method will
-   *     be removed in a future release.
    */
-  @Deprecated
   public Interpreter(@NonNull File modelFile, int numThreads) {
-    this(modelFile, new Options().setNumThreads(numThreads));
-  }
-
-  /**
-   * Initializes a {@code Interpreter} and specifies the number of threads used for inference.
-   *
-   * @param modelFile: a file of a pre-trained TF Lite model
-   * @param options: a set of options for customizing interpreter behavior
-   */
-  public Interpreter(@NonNull File modelFile, Options options) {
-    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath(), options);
+    if (modelFile == null) {
+      return;
+    }
+    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath(), numThreads);
   }
 
   /**
@@ -118,7 +89,7 @@ public final class Interpreter implements AutoCloseable {
    * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
    */
   public Interpreter(@NonNull ByteBuffer byteBuffer) {
-    this(byteBuffer, /* options= */ null);
+    wrapper = new NativeInterpreterWrapper(byteBuffer);
   }
 
   /**
@@ -128,25 +99,30 @@ public final class Interpreter implements AutoCloseable {
    * <p>The ByteBuffer should not be modified after the construction of a {@code Interpreter}. The
    * {@code ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a
    * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
-   *
-   * @deprecated Prefer using the {@link #Interpreter(ByteBuffer,Options)} constructor. This method
-   *     will be removed in a future release.
    */
-  @Deprecated
   public Interpreter(@NonNull ByteBuffer byteBuffer, int numThreads) {
-    this(byteBuffer, new Options().setNumThreads(numThreads));
+    wrapper = new NativeInterpreterWrapper(byteBuffer, numThreads);
   }
 
   /**
-   * Initializes a {@code Interpreter} with a {@code ByteBuffer} of a model file and a set of custom
-   * {@link #Options}.
+   * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file.
    *
-   * <p>The ByteBuffer should not be modified after the construction of a {@code Interpreter}. The
-   * {@code ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a
-   * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
+   * <p>The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code
+   * Interpreter}.
    */
-  public Interpreter(@NonNull ByteBuffer byteBuffer, Options options) {
-    wrapper = new NativeInterpreterWrapper(byteBuffer, options);
+  public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer) {
+    wrapper = new NativeInterpreterWrapper(mappedByteBuffer);
+  }
+
+  /**
+   * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file and
+   * specifies the number of threads used for inference.
+   *
+   * <p>The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code
+   * Interpreter}.
+   */
+  public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer, int numThreads) {
+    wrapper = new NativeInterpreterWrapper(mappedByteBuffer, numThreads);
   }
 
   /**
@@ -264,25 +240,12 @@ public final class Interpreter implements AutoCloseable {
     return wrapper.getLastNativeInferenceDurationNanoseconds();
   }
 
-  /**
-   * Turns on/off Android NNAPI for hardware acceleration when it is available.
-   *
-   * @deprecated Prefer using {@link Options#setUseNNAPI(boolean)} directly for enabling NN API.
-   *     This method will be removed in a future release.
-   */
-  @Deprecated
+  /** Turns on/off Android NNAPI for hardware acceleration when it is available. */
   public void setUseNNAPI(boolean useNNAPI) {
     checkNotClosed();
     wrapper.setUseNNAPI(useNNAPI);
   }
 
-  /**
-   * Sets the number of threads to be used for ops that support multi-threading.
-   *
-   * @deprecated Prefer using {@link Options#setNumThreads(int)} directly for controlling thread
-   *     multi-threading. This method will be removed in a future release.
-   */
-  @Deprecated
   public void setNumThreads(int numThreads) {
     checkNotClosed();
     wrapper.setNumThreads(numThreads);
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 6feff9a618..fa25082304 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -23,7 +23,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 /**
- * An internal wrapper that wraps native interpreter and controls model execution.
+ * A wrapper wraps native interpreter and controls model execution.
  *
  * <p><b>WARNING:</b> Resources consumed by the {@code NativeInterpreterWrapper} object must be
  * explicitly freed by invoking the {@link #close()} method when the {@code
@@ -32,29 +32,36 @@ import java.util.Map;
 final class NativeInterpreterWrapper implements AutoCloseable {
 
   NativeInterpreterWrapper(String modelPath) {
-    this(modelPath, /* options= */ null);
+    this(modelPath, /* numThreads= */ -1);
   }
 
-  NativeInterpreterWrapper(String modelPath, Interpreter.Options options) {
-    if (options == null) {
-      options = new Interpreter.Options();
-    }
+  NativeInterpreterWrapper(String modelPath, int numThreads) {
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModel(modelPath, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle, errorHandle, options.numThreads);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads);
     isMemoryAllocated = true;
     inputTensors = new Tensor[getInputCount(interpreterHandle)];
     outputTensors = new Tensor[getOutputCount(interpreterHandle)];
   }
 
+  /**
+   * Initializes a {@code NativeInterpreterWrapper} with a {@code ByteBuffer}. The ByteBuffer should
+   * not be modified after the construction of a {@code NativeInterpreterWrapper}. The {@code
+   * ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a direct
+   * {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
+   */
   NativeInterpreterWrapper(ByteBuffer byteBuffer) {
-    this(byteBuffer, /* options= */ null);
+    this(byteBuffer, /* numThreads= */ -1);
   }
 
-  NativeInterpreterWrapper(ByteBuffer buffer, Interpreter.Options options) {
-    if (options == null) {
-      options = new Interpreter.Options();
-    }
+  /**
+   * Initializes a {@code NativeInterpreterWrapper} with a {@code ByteBuffer} and specifies the
+   * number of inference threads. The ByteBuffer should not be modified after the construction of a
+   * {@code NativeInterpreterWrapper}. The {@code ByteBuffer} can be either a {@code
+   * MappedByteBuffer} that memory-maps a model file, or a direct {@code ByteBuffer} of
+   * nativeOrder() that contains the bytes content of a model.
+   */
+  NativeInterpreterWrapper(ByteBuffer buffer, int numThreads) {
     if (buffer == null
         || (!(buffer instanceof MappedByteBuffer)
             && (!buffer.isDirect() || buffer.order() != ByteOrder.nativeOrder()))) {
@@ -65,13 +72,10 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     modelByteBuffer = buffer;
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle, errorHandle, options.numThreads);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads);
     isMemoryAllocated = true;
     inputTensors = new Tensor[getInputCount(interpreterHandle)];
     outputTensors = new Tensor[getOutputCount(interpreterHandle)];
-    if (options.useNNAPI) {
-      setUseNNAPI(options.useNNAPI);
-    }
   }
 
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index fefaa88911..9070b788b6 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -54,18 +54,6 @@ public final class InterpreterTest {
     interpreter.close();
   }
 
-  @Test
-  public void testInterpreterWithOptions() throws Exception {
-    Interpreter interpreter =
-        new Interpreter(MODEL_FILE, new Interpreter.Options().setNumThreads(2).setUseNNAPI(true));
-    assertThat(interpreter).isNotNull();
-    assertThat(interpreter.getInputTensorCount()).isEqualTo(1);
-    assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
-    assertThat(interpreter.getOutputTensorCount()).isEqualTo(1);
-    assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
-    interpreter.close();
-  }
-
   @Test
   public void testRunWithMappedByteBufferModel() throws Exception {
     Path path = MODEL_FILE.toPath();
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
index 270bd6703a..9c4a5acd79 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
@@ -62,15 +62,6 @@ public final class NativeInterpreterWrapperTest {
     wrapper.close();
   }
 
-  @Test
-  public void testConstructorWithOptions() {
-    NativeInterpreterWrapper wrapper =
-        new NativeInterpreterWrapper(
-            FLOAT_MODEL_PATH, new Interpreter.Options().setNumThreads(2).setUseNNAPI(true));
-    assertThat(wrapper).isNotNull();
-    wrapper.close();
-  }
-
   @Test
   public void testConstructorWithInvalidModel() {
     try {
-- 
GitLab


From e5660a6f31dd302a397935867300cf96bfd2f026 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Tue, 25 Sep 2018 11:31:13 -0700
Subject: [PATCH 0676/1357] Skip translations for API docs: site and subsites
 Add translation.yaml config for site

PiperOrigin-RevId: 214473776
---
 tensorflow/contrib/lite/g3doc/_book.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml
index beaa5c479a..de6914e536 100644
--- a/tensorflow/contrib/lite/g3doc/_book.yaml
+++ b/tensorflow/contrib/lite/g3doc/_book.yaml
@@ -57,6 +57,7 @@ upper_tabs:
           path: /lite/tfmobile/optimizing
 
     - name: API
+      skip_translation: true
       contents:
       - title: API
         path: /api_docs/python/tf/contrib/lite
-- 
GitLab


From 9dc05f4bfa6d5dd9a6625c385bdd21fa3e32498c Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 25 Sep 2018 11:45:41 -0700
Subject: [PATCH 0677/1357] [TF:XLA] Bump open source llvm revision to r342977

PiperOrigin-RevId: 214476625
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b850c5a17f..0916d4540b 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -502,11 +502,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/db98902adc6431c9cc4ddec50fe174cfc9e626d6.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/db98902adc6431c9cc4ddec50fe174cfc9e626d6.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/7167e4d196a50f78abe8af6553c943d50b757a13.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/7167e4d196a50f78abe8af6553c943d50b757a13.tar.gz",
         ],
-        sha256 = "8c02d312b3d417cf9bc7e58ff53c2528bf77a5d839ce4a23b95bd04b9e5da023",
-        strip_prefix = "llvm-db98902adc6431c9cc4ddec50fe174cfc9e626d6",
+        sha256 = "11d933232b27531abc83592fc9f03e7f928e504c7d478eeaba51efa929a3d9df",
+        strip_prefix = "llvm-7167e4d196a50f78abe8af6553c943d50b757a13",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
     )
 
-- 
GitLab


From dfafefa3054a37b64d0d47419eb3f7a576e662db Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Tue, 25 Sep 2018 11:46:12 -0700
Subject: [PATCH 0678/1357] Fix keras_support.tpu_model example usages.

PiperOrigin-RevId: 214476713
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index bf445256b6..93ae68d254 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -25,10 +25,9 @@ flattened = tf.keras.layers.Flatten()(c1)
 logits = tf.keras.layers.Dense(10, activation='softmax')(flattened)
 model = tf.keras.Model(inputs=[image], outputs=[logits])
 
-strategy = keras_support.TPUDistributionStrategy(num_cores_per_host=8)
-model = keras_support.tpu_model(model,
-                                strategy=strategy,
-                                tpu_name_or_address=tpu_name)
+resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu=tpu_name)
+strategy = keras_support.TPUDistributionStrategy(resolver)
+model = keras_support.tpu_model(model, strategy=strategy)
 
 # Only TF optimizers are currently supported.
 model.compile(optimizer=tf.train.AdamOptimizer(), ...)
-- 
GitLab


From 3f2d0ad596d5f2cc30f10148ac56ac5680603f5b Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 25 Sep 2018 11:50:05 -0700
Subject: [PATCH 0679/1357] Temporarily disable float16 tests in
 depthwise_conv_op_test. They seem to be failing when running with P100.

PiperOrigin-RevId: 214477405
---
 .../python/kernel_tests/depthwise_conv_op_test.py    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 200da772e5..6d1ead20be 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -191,7 +191,7 @@ class DepthwiseConv2DTest(test.TestCase):
       tf_logging.info(
           "Testing DepthwiseConv2D, %dth config: %r * %r, stride: %d, padding: "
           "%s", index, input_size, filter_size, stride, padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         tf_logging.info("Testing without grouped_conv")
         self._VerifyValues(
             input_size, filter_size, stride, padding, data_type, use_gpu=True)
@@ -227,7 +227,7 @@ class DepthwiseConv2DTest(test.TestCase):
       tf_logging.info(
           "Testing DepthwiseConv2DFormat, %dth config: %r * %r, stride: %d, "
           "padding: %s", index, input_size, filter_size, stride, padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         self._VerifyValues(
             input_size,
             filter_size,
@@ -434,7 +434,7 @@ class DepthwiseConv2DTest(test.TestCase):
       tf_logging.info(
           "Testing DepthwiseConv2DInputGrad, %dth config: %r * %r, stride: %d, "
           "padding: %s", index, input_size, filter_size, stride, padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
             filter_size,
@@ -465,7 +465,7 @@ class DepthwiseConv2DTest(test.TestCase):
           "Testing DepthwiseConv2DInputGradFormat, %dth config: %r * %r, "
           "stride: %d, padding: %s", index, input_size, filter_size, stride,
           padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
             filter_size,
@@ -483,7 +483,7 @@ class DepthwiseConv2DTest(test.TestCase):
       tf_logging.info(
           "Testing DepthwiseConv2DFilterGrad, %dth config: %r * %r, stride: "
           "%d, padding: %s", index, input_size, filter_size, stride, padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
             filter_size,
@@ -504,7 +504,7 @@ class DepthwiseConv2DTest(test.TestCase):
           "Testing DepthwiseConv2DFilterGradFormat, %dth config: %r * %r, "
           "stride: %d, padding: %s", index, input_size, filter_size, stride,
           padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
             filter_size,
-- 
GitLab


From df930015230c1195065e2fd01c61f527b8662efb Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 25 Sep 2018 11:54:16 -0700
Subject: [PATCH 0680/1357] [TF TensorArray] TensorSetZero supports bool types.

PiperOrigin-RevId: 214478085
---
 tensorflow/core/kernels/tensor_array.cc | 3 ++-
 tensorflow/core/kernels/tensor_array.h  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/tensor_array.cc b/tensorflow/core/kernels/tensor_array.cc
index 765467bc1e..0e6c0ddccc 100644
--- a/tensorflow/core/kernels/tensor_array.cc
+++ b/tensorflow/core/kernels/tensor_array.cc
@@ -62,7 +62,8 @@ TF_CALL_complex128(TENSOR_ARRAY_WRITE_OR_ADD_GPU);
   }
 
 #define TENSOR_ARRAY_SET_ZERO_CPU(T) TENSOR_ARRAY_SET_ZERO(CPUDevice, T)
-TF_CALL_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_CPU)
+TF_CALL_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_CPU);
+TF_CALL_bool(TENSOR_ARRAY_SET_ZERO_CPU);
 #undef TENSOR_ARRAY_SET_ZERO_CPU
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/tensor_array.h b/tensorflow/core/kernels/tensor_array.h
index e8dc4fad21..384a63e945 100644
--- a/tensorflow/core/kernels/tensor_array.h
+++ b/tensorflow/core/kernels/tensor_array.h
@@ -81,7 +81,8 @@ Status TensorSetZero(OpKernelContext* ctx, Tensor* value) {
   Status TensorSetZero<Device, T>(OpKernelContext * ctx, Tensor * value);
 
 #define TENSOR_ARRAY_SET_ZERO_CPU(T) TENSOR_ARRAY_SET_ZERO(CPUDevice, T)
-TF_CALL_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_CPU)
+TF_CALL_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_CPU);
+TF_CALL_bool(TENSOR_ARRAY_SET_ZERO_CPU);
 #undef TENSOR_ARRAY_SET_ZERO_CPU
 
 #if GOOGLE_CUDA
-- 
GitLab


From d5c5df164cedcd8ae43fff41256592818bc6c2de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 11:56:33 -0700
Subject: [PATCH 0681/1357] Add "encoding" attribute to string length op, which
 controls how "string length" is defined:   * BYTE: The number of bytes in
 each string.  (Default)   * UTF8: The number of UTF-8 encoded Unicode code
 points in each string.

RELNOTES: Add option to calculate string length in Unicode characters
PiperOrigin-RevId: 214478470
---
 tensorflow/contrib/makefile/tf_op_files.txt   |  1 +
 .../base_api/api_def_StringLength.pbtxt       | 10 +++
 .../python_api/api_def_StringLength.pbtxt     |  4 +-
 tensorflow/core/kernels/BUILD                 | 10 +++
 tensorflow/core/kernels/string_length_op.cc   | 23 ++++++-
 tensorflow/core/kernels/string_util.cc        | 63 +++++++++++++++++++
 tensorflow/core/kernels/string_util.h         | 45 +++++++++++++
 tensorflow/core/ops/string_ops.cc             |  1 +
 .../kernel_tests/string_length_op_test.py     | 27 ++++++++
 tensorflow/python/ops/string_ops.py           | 13 ++++
 .../api/golden/v1/tensorflow.strings.pbtxt    |  2 +-
 .../api/golden/v2/tensorflow.strings.pbtxt    |  2 +-
 12 files changed, 193 insertions(+), 8 deletions(-)
 create mode 100644 tensorflow/core/kernels/string_util.cc
 create mode 100644 tensorflow/core/kernels/string_util.h

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 08de54b8e1..f81a90809a 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -253,6 +253,7 @@ tensorflow/core/kernels/strided_slice_op_inst_5.cc
 tensorflow/core/kernels/strided_slice_op_inst_6.cc
 tensorflow/core/kernels/strided_slice_op_inst_7.cc
 tensorflow/core/kernels/string_join_op.cc
+tensorflow/core/kernels/string_util.cc
 tensorflow/core/kernels/tensor_array.cc
 tensorflow/core/kernels/tensor_array_ops.cc
 tensorflow/core/kernels/tile_functor_cpu.cc
diff --git a/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt
index cc21ddc815..7d2fbcd00b 100644
--- a/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt
@@ -1,5 +1,15 @@
 op {
   graph_op_name: "StringLength"
+  attr {
+    name: "unit"
+    description: <<END
+The unit that is counted to compute string length.  One of: `"BYTE"` (for
+the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8
+encoded Unicode code points in each string).  Results are undefined
+if `unit=UTF8_CHAR` and the `input` strings do not contain structurally
+valid UTF-8.
+END
+  }
   in_arg {
     name: "input"
     description: <<END
diff --git a/tensorflow/core/api_def/python_api/api_def_StringLength.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringLength.pbtxt
index 01c02e1f70..df012414e3 100644
--- a/tensorflow/core/api_def/python_api/api_def_StringLength.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_StringLength.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "StringLength"
-  endpoint {
-    name: "strings.length"
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index ab69925d04..1a3db2c7cd 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4434,8 +4434,16 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "string_util",
+    srcs = ["string_util.cc"],
+    hdrs = ["string_util.h"],
+    deps = ["//tensorflow/core:lib"],
+)
+
 STRING_DEPS = [
     ":bounds_check",
+    ":string_util",
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
@@ -5166,6 +5174,7 @@ filegroup(
         "spacetobatch_functor.h",
         "spacetodepth_op.h",
         "spectrogram.h",
+        "string_util.h",
         "tensor_array.h",
         "tile_functor.h",
         "tile_ops_cpu_impl.h",
@@ -5334,6 +5343,7 @@ filegroup(
         "spectrogram_op.cc",
         "stack_ops.cc",
         "string_join_op.cc",
+        "string_util.cc",
         "summary_op.cc",
         "tensor_array.cc",
         "tensor_array_ops.cc",
diff --git a/tensorflow/core/kernels/string_length_op.cc b/tensorflow/core/kernels/string_length_op.cc
index a6829b29d9..435a7abdca 100644
--- a/tensorflow/core/kernels/string_length_op.cc
+++ b/tensorflow/core/kernels/string_length_op.cc
@@ -14,13 +14,18 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/string_util.h"
 
 namespace tensorflow {
 namespace {
 
 class StringLengthOp : public OpKernel {
  public:
-  using OpKernel::OpKernel;
+  explicit StringLengthOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    string unit;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("unit", &unit));
+    OP_REQUIRES_OK(ctx, ParseCharUnit(unit, &unit_));
+  }
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
@@ -32,10 +37,22 @@ class StringLengthOp : public OpKernel {
     auto src = input.flat<string>();
     auto dst = output->flat<int32>();
 
-    for (int n = 0; n < src.size(); ++n) {
-      dst(n) = src(n).size();
+    switch (unit_) {
+      case CharUnit::BYTE:
+        for (int n = 0; n < src.size(); ++n) {
+          dst(n) = src(n).size();
+        }
+        break;
+      case CharUnit::UTF8_CHAR:
+        for (int n = 0; n < src.size(); ++n) {
+          dst(n) = UTF8StrLen(src(n));
+        }
+        break;
     }
   }
+
+ private:
+  CharUnit unit_ = CharUnit::BYTE;
 };
 
 REGISTER_KERNEL_BUILDER(Name("StringLength").Device(DEVICE_CPU),
diff --git a/tensorflow/core/kernels/string_util.cc b/tensorflow/core/kernels/string_util.cc
new file mode 100644
index 0000000000..3a9803a052
--- /dev/null
+++ b/tensorflow/core/kernels/string_util.cc
@@ -0,0 +1,63 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/kernels/string_util.h"
+
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace {
+inline bool IsTrailByte(char x) { return static_cast<signed char>(x) < -0x40; }
+}  // namespace
+
+namespace tensorflow {
+
+// Sets unit value based on str.
+Status ParseUnicodeEncoding(const string& str, UnicodeEncoding* encoding) {
+  if (str == "UTF8") {
+    *encoding = UnicodeEncoding::UTF8;
+  } else {
+    return errors::InvalidArgument(strings::StrCat(
+        "Invalid encoding \"", str, "\": Should be one of: BYTE"));
+  }
+  return Status::OK();
+}
+
+// Sets unit value based on str.
+Status ParseCharUnit(const string& str, CharUnit* unit) {
+  if (str == "BYTE") {
+    *unit = CharUnit::BYTE;
+  } else if (str == "UTF8_CHAR") {
+    *unit = CharUnit::UTF8_CHAR;
+  } else {
+    return errors::InvalidArgument(strings::StrCat(
+        "Invalid unit \"", str, "\": Should be one of: BYTE, UTF8_CHAR"));
+  }
+  return Status::OK();
+}
+
+// Return the number of Unicode characters in a UTF-8 string.
+// Result may be incorrect if the input string is not valid UTF-8.
+int32 UTF8StrLen(const string& string) {
+  const int32 byte_size = string.size();
+  const char* const end = string.data() + byte_size;
+  const char* ptr = string.data();
+  int32 skipped_count = 0;
+  while (ptr < end) {
+    skipped_count += IsTrailByte(*ptr++) ? 1 : 0;
+  }
+  const int32 result = byte_size - skipped_count;
+  return result;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/string_util.h b/tensorflow/core/kernels/string_util.h
new file mode 100644
index 0000000000..390cf57702
--- /dev/null
+++ b/tensorflow/core/kernels/string_util.h
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
+#define TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
+
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// Enumeration for unicode encodings.  Used by ops such as
+// tf.strings.unicode_encode and tf.strings.unicode_decode.
+// TODO(edloper): Add support for:
+// UTF16, UTF32, UTF16BE, UTF32BE, UTF16LE, UTF32LE
+enum class UnicodeEncoding { UTF8 };
+
+// Enumeration for character units.  Used by string such as
+// tf.strings.length and tf.substr.
+// TODO(edloper): Add support for: UTF32_CHAR, etc.
+enum class CharUnit { BYTE, UTF8_CHAR };
+
+// Sets `encoding` based on `str`.
+Status ParseUnicodeEncoding(const string& str, UnicodeEncoding* encoding);
+
+// Sets `unit` value based on `str`.
+Status ParseCharUnit(const string& str, CharUnit* unit);
+
+// Returns the number of Unicode characters in a UTF-8 string.
+// Result may be incorrect if the input string is not valid UTF-8.
+int32 UTF8StrLen(const string& string);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index 99159839d0..da1d2a6432 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -203,6 +203,7 @@ REGISTER_OP("StringStrip")
 REGISTER_OP("StringLength")
     .Input("input: string")
     .Output("output: int32")
+    .Attr("unit: {'BYTE', 'UTF8_CHAR'} = 'BYTE'")
     .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("EncodeBase64")
diff --git a/tensorflow/python/kernel_tests/string_length_op_test.py b/tensorflow/python/kernel_tests/string_length_op_test.py
index 9f013c2c7e..4afe3ad3f4 100644
--- a/tensorflow/python/kernel_tests/string_length_op_test.py
+++ b/tensorflow/python/kernel_tests/string_length_op_test.py
@@ -32,6 +32,33 @@ class StringLengthOpTest(test.TestCase):
       values = sess.run(lengths)
       self.assertAllEqual(values, [[[1, 2], [3, 4], [5, 6]]])
 
+  def testUnit(self):
+    unicode_strings = [u"H\xc3llo", u"\U0001f604"]
+    utf8_strings = [s.encode("utf-8") for s in unicode_strings]
+    expected_utf8_byte_lengths = [6, 4]
+    expected_utf8_char_lengths = [5, 1]
+
+    with self.test_session() as sess:
+      utf8_byte_lengths = string_ops.string_length(utf8_strings, unit="BYTE")
+      utf8_char_lengths = string_ops.string_length(
+          utf8_strings, unit="UTF8_CHAR")
+      self.assertAllEqual(
+          sess.run(utf8_byte_lengths), expected_utf8_byte_lengths)
+      self.assertAllEqual(
+          sess.run(utf8_char_lengths), expected_utf8_char_lengths)
+      with self.assertRaisesRegexp(
+          ValueError, "Attr 'unit' of 'StringLength' Op passed string 'XYZ' "
+          'not in: "BYTE", "UTF8_CHAR"'):
+        string_ops.string_length(utf8_strings, unit="XYZ")
+
+  def testLegacyPositionalName(self):
+    # Code that predates the 'unit' parameter may have used a positional
+    # argument for the 'name' parameter.  Check that we don't break such code.
+    strings = [[["1", "12"], ["123", "1234"], ["12345", "123456"]]]
+    lengths = string_ops.string_length(strings, "some_name")
+    with self.test_session():
+      self.assertAllEqual(lengths.eval(), [[[1, 2], [3, 4], [5, 6]]])
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 5d949467fd..046a48d192 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -36,10 +36,12 @@ from tensorflow.python.ops import math_ops
 
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
+# pylint: disable=g-bad-import-order
 from tensorflow.python.ops.gen_string_ops import *
 from tensorflow.python.util import compat as util_compat
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
+# pylint: enable=g-bad-import-order
 # pylint: enable=wildcard-import
 
 
@@ -328,6 +330,17 @@ def reduce_join(inputs, axis=None,
 reduce_join.__doc__ = deprecation.rewrite_argument_docstring(
     gen_string_ops.reduce_join.__doc__, "reduction_indices", "axis")
 
+
+# This wrapper provides backwards compatibility for code that predates the
+# unit argument and that passed 'name' as a positional argument.
+@tf_export("strings.length")
+def string_length(input, name=None, unit="BYTE"):
+  return gen_string_ops.string_length(input, unit=unit, name=name)
+
+
+string_length.__doc__ = gen_string_ops.string_length.__doc__
+
+
 ops.NotDifferentiable("RegexReplace")
 ops.NotDifferentiable("StringToHashBucket")
 ops.NotDifferentiable("StringToHashBucketFast")
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index c81c156518..c52581dec1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -10,7 +10,7 @@ tf_module {
   }
   member_method {
     name: "length"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "regex_full_match"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index c81c156518..c52581dec1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -10,7 +10,7 @@ tf_module {
   }
   member_method {
     name: "length"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "regex_full_match"
-- 
GitLab


From 5b86e152402f829f0327ab9d6d2c68ad4300d302 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Tue, 25 Sep 2018 12:25:36 -0700
Subject: [PATCH 0682/1357] - Add a doc for performance tips. - Move the
 benchmarks doc to a different file.

PiperOrigin-RevId: 214483393
---
 tensorflow/contrib/lite/g3doc/performance.md  | 186 +++---------------
 .../lite/g3doc/performance_benchmarks.md      | 174 ++++++++++++++++
 2 files changed, 199 insertions(+), 161 deletions(-)
 create mode 100644 tensorflow/contrib/lite/g3doc/performance_benchmarks.md

diff --git a/tensorflow/contrib/lite/g3doc/performance.md b/tensorflow/contrib/lite/g3doc/performance.md
index 28cb6aba6e..0ae9400068 100644
--- a/tensorflow/contrib/lite/g3doc/performance.md
+++ b/tensorflow/contrib/lite/g3doc/performance.md
@@ -1,174 +1,38 @@
 
-# Performance
+# Performance best practices
 
-This document lists TensorFlow Lite performance benchmarks when running well
-known models on some Android and iOS devices.
+Mobile and embedded devices have limited computational resources and it is important to keep your application resource efficient. We have compiled a list of best practices and strategies you can use to optimize your model and application when using Tensorflow Lite.
 
-These performance benchmark numbers were generated with the
-[Android TFLite benchmark binary](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark)
-and the [iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios).
+## Choose the most efficient model for the problem
+Some models may be too large to run on embedded devices. Instead of large models it is better to use a slightly less precise but smaller model for embedded devices. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices.
 
-# Android performance benchmarks
+You can retrain the listed models on your own dataset by using transfer learning. Check out our transfer learning tutorial for
+[image classification] (https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and
+ [object detection](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
 
-For Android benchmarks, the CPU affinity is set to use big cores on the device to
-reduce variance (see [details](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark#reducing-variance-between-runs-on-android)).
 
-It assumes that models were download and unzipped to the
-`/data/local/tmp/tflite_models` directory. The benchmark binary is built
-using [these instructions](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark#on-android)
-and assumed in the `/data/local/tmp` directory.
+## Profile your model
+Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](../tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
 
-To run the benchmark:
+## Profile and optimize operators in the graph
+If a particular operator appears frequently in the model and based on profiling you find the operator consuming the most amount of time, you can look into optimizing the operator.
+ This scenario should be rare as Tensorflow Lite has optimized versions for most ops. However you may be able to write a faster version of a custom op, if you know the constraints in which the operator is executed. Check out our [custom operator documentation](custom_operators.md).
 
-```
-adb shell taskset ${CPU_MASK} /data/local/tmp/benchmark_model \
-  --num_threads=1 \
-  --graph=/data/local/tmp/tflite_models/${GRAPH} \
-  --warmup_runs=1 \
-  --num_runs=50 \
-  --use_nnapi=false
-```
+## Quantize your model
+If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. Fully quantized models can be remarkably power efficient as well.
 
-Here, `${GRAPH}` is the name of model and `${CPU_MASK}` is the CPU affinity
-chosen according to the following table:
+## Tweak the number of threads
+Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](../interpreter.h) threads.
 
-Device | CPU_MASK |
--------| ----------
-Pixel 2 | f0 |
-Pixel xl | 0c |
+## Eliminate redundant copies
+Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to [mmap a model file](https://github.com/tensorflow/tensorflow/blob/9982fd6c8831cbd2f58954f79ea71f26660393bc/tensorflow/contrib/lite/model.h#L152) and avoid copies. If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151).
 
-<table>
-  <thead>
-    <tr>
-      <th>Model Name</th>
-      <th>Device </th>
-      <th>Mean inference time (std dev)</th>
-    </tr>
-  </thead>
-  <tr>
-    <td rowspan = 2>
-      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz">Mobilenet_1.0_224(float)</a>
-    </td>
-    <td>Pixel 2 </td>
-    <td>166.5 ms (2.6 ms)</td>
-  </tr>
-   <tr>
-     <td>Pixel xl </td>
-     <td>122.9 ms (1.8 ms)  </td>
-  </tr>
-  <tr>
-    <td rowspan = 2>
-      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz">Mobilenet_1.0_224 (quant)</a>
-    </td>
-    <td>Pixel 2 </td>
-    <td>69.5 ms (0.9 ms)</td>
-  </tr>
-   <tr>
-     <td>Pixel xl </td>
-     <td>78.9 ms (2.2 ms)  </td>
-  </tr>
-  <tr>
-    <td rowspan = 2>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz">NASNet mobile</a>
-    </td>
-    <td>Pixel 2 </td>
-    <td>273.8 ms (3.5 ms)</td>
-  </tr>
-   <tr>
-     <td>Pixel xl </td>
-     <td>210.8 ms (4.2 ms)</td>
-  </tr>
-  <tr>
-    <td rowspan = 2>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz">SqueezeNet</a>
-    </td>
-    <td>Pixel 2 </td>
-    <td>234.0 ms (2.1 ms)</td>
-  </tr>
-   <tr>
-     <td>Pixel xl </td>
-     <td>158.0 ms (2.1 ms)</td>
-  </tr>
-  <tr>
-    <td rowspan = 2>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz">Inception_ResNet_V2</a>
-    </td>
-    <td>Pixel 2 </td>
-    <td>2846.0 ms (15.0 ms)</td>
-  </tr>
-   <tr>
-     <td>Pixel xl </td>
-     <td>1973.0 ms (15.0 ms)  </td>
-  </tr>
-  <tr>
-    <td rowspan = 2>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz">Inception_V4</a>
-    </td>
-    <td>Pixel 2 </td>
-    <td>3180.0 ms (11.7 ms)</td>
-  </tr>
-   <tr>
-     <td>Pixel xl </td>
-     <td>2262.0 ms (21.0 ms)  </td>
-  </tr>
+## Profile your application with platform specific tools
+Platform specific tools like [Android profiler](https://developer.android.com/studio/profile/android-profiler) and [Instruments](https://help.apple.com/instruments/mac/current/) provide a wealth of profiling information that can be used to debug your app. Sometimes the performance bug may be not in the model but in parts of application code that interact with the model. Make sure to familiarize yourself with platform specific profiling tools and best practices for your platform.
 
- </table>
+## Use hardware accelerators available on the device
+Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/) on Android.
+You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable NNAPI call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance.
 
-# iOS benchmarks
-
-To run iOS benchmarks, the [benchmark
-app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios)
-was modified to include the appropriate model and `benchmark_params.json` was
-modified  to set `num_threads` to 1.
-
-<table>
-  <thead>
-    <tr>
-      <th>Model Name</th>
-      <th>Device </th>
-      <th>Mean inference time (std dev)</th>
-    </tr>
-  </thead>
-  <tr>
-    <td>
-      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz">Mobilenet_1.0_224(float)</a>
-    </td>
-    <td>iPhone 8 </td>
-    <td>32.2 ms (0.8 ms)</td>
-  </tr>
-  <tr>
-    <td>
-      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz)">Mobilenet_1.0_224 (quant)</a>
-    </td>
-    <td>iPhone 8 </td>
-    <td>24.4 ms (0.8 ms)</td>
-  </tr>
-  <tr>
-    <td>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz">NASNet mobile</a>
-    </td>
-    <td>iPhone 8 </td>
-    <td>60.3 ms (0.6 ms)</td>
-  </tr>
-  <tr>
-    <td>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz">SqueezeNet</a>
-    </td>
-    <td>iPhone 8 </td>
-    <td>44.3 (0.7 ms)</td>
-  </tr>
-  <tr>
-    <td>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz">Inception_ResNet_V2</a>
-    </td>
-    <td>iPhone 8</td>
-    <td>562.4 ms (18.2 ms)</td>
-  </tr>
-  <tr>
-    <td>
-      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz">Inception_V4</a>
-    </td>
-    <td>iPhone 8 </td>
-    <td>661.0 ms (29.2 ms)</td>
-  </tr>
- </table>
+## Need more help
+The Tensorflow team is happy to help diagnose and address specific performance issues you may be facing. Please file a bug on [github](https://github.com/tensorflow/tensorflow/issues) with details of the issue.
diff --git a/tensorflow/contrib/lite/g3doc/performance_benchmarks.md b/tensorflow/contrib/lite/g3doc/performance_benchmarks.md
new file mode 100644
index 0000000000..28cb6aba6e
--- /dev/null
+++ b/tensorflow/contrib/lite/g3doc/performance_benchmarks.md
@@ -0,0 +1,174 @@
+
+# Performance
+
+This document lists TensorFlow Lite performance benchmarks when running well
+known models on some Android and iOS devices.
+
+These performance benchmark numbers were generated with the
+[Android TFLite benchmark binary](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark)
+and the [iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios).
+
+# Android performance benchmarks
+
+For Android benchmarks, the CPU affinity is set to use big cores on the device to
+reduce variance (see [details](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark#reducing-variance-between-runs-on-android)).
+
+It assumes that models were download and unzipped to the
+`/data/local/tmp/tflite_models` directory. The benchmark binary is built
+using [these instructions](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark#on-android)
+and assumed in the `/data/local/tmp` directory.
+
+To run the benchmark:
+
+```
+adb shell taskset ${CPU_MASK} /data/local/tmp/benchmark_model \
+  --num_threads=1 \
+  --graph=/data/local/tmp/tflite_models/${GRAPH} \
+  --warmup_runs=1 \
+  --num_runs=50 \
+  --use_nnapi=false
+```
+
+Here, `${GRAPH}` is the name of model and `${CPU_MASK}` is the CPU affinity
+chosen according to the following table:
+
+Device | CPU_MASK |
+-------| ----------
+Pixel 2 | f0 |
+Pixel xl | 0c |
+
+<table>
+  <thead>
+    <tr>
+      <th>Model Name</th>
+      <th>Device </th>
+      <th>Mean inference time (std dev)</th>
+    </tr>
+  </thead>
+  <tr>
+    <td rowspan = 2>
+      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz">Mobilenet_1.0_224(float)</a>
+    </td>
+    <td>Pixel 2 </td>
+    <td>166.5 ms (2.6 ms)</td>
+  </tr>
+   <tr>
+     <td>Pixel xl </td>
+     <td>122.9 ms (1.8 ms)  </td>
+  </tr>
+  <tr>
+    <td rowspan = 2>
+      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz">Mobilenet_1.0_224 (quant)</a>
+    </td>
+    <td>Pixel 2 </td>
+    <td>69.5 ms (0.9 ms)</td>
+  </tr>
+   <tr>
+     <td>Pixel xl </td>
+     <td>78.9 ms (2.2 ms)  </td>
+  </tr>
+  <tr>
+    <td rowspan = 2>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz">NASNet mobile</a>
+    </td>
+    <td>Pixel 2 </td>
+    <td>273.8 ms (3.5 ms)</td>
+  </tr>
+   <tr>
+     <td>Pixel xl </td>
+     <td>210.8 ms (4.2 ms)</td>
+  </tr>
+  <tr>
+    <td rowspan = 2>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz">SqueezeNet</a>
+    </td>
+    <td>Pixel 2 </td>
+    <td>234.0 ms (2.1 ms)</td>
+  </tr>
+   <tr>
+     <td>Pixel xl </td>
+     <td>158.0 ms (2.1 ms)</td>
+  </tr>
+  <tr>
+    <td rowspan = 2>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz">Inception_ResNet_V2</a>
+    </td>
+    <td>Pixel 2 </td>
+    <td>2846.0 ms (15.0 ms)</td>
+  </tr>
+   <tr>
+     <td>Pixel xl </td>
+     <td>1973.0 ms (15.0 ms)  </td>
+  </tr>
+  <tr>
+    <td rowspan = 2>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz">Inception_V4</a>
+    </td>
+    <td>Pixel 2 </td>
+    <td>3180.0 ms (11.7 ms)</td>
+  </tr>
+   <tr>
+     <td>Pixel xl </td>
+     <td>2262.0 ms (21.0 ms)  </td>
+  </tr>
+
+ </table>
+
+# iOS benchmarks
+
+To run iOS benchmarks, the [benchmark
+app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios)
+was modified to include the appropriate model and `benchmark_params.json` was
+modified  to set `num_threads` to 1.
+
+<table>
+  <thead>
+    <tr>
+      <th>Model Name</th>
+      <th>Device </th>
+      <th>Mean inference time (std dev)</th>
+    </tr>
+  </thead>
+  <tr>
+    <td>
+      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz">Mobilenet_1.0_224(float)</a>
+    </td>
+    <td>iPhone 8 </td>
+    <td>32.2 ms (0.8 ms)</td>
+  </tr>
+  <tr>
+    <td>
+      <a href="http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz)">Mobilenet_1.0_224 (quant)</a>
+    </td>
+    <td>iPhone 8 </td>
+    <td>24.4 ms (0.8 ms)</td>
+  </tr>
+  <tr>
+    <td>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz">NASNet mobile</a>
+    </td>
+    <td>iPhone 8 </td>
+    <td>60.3 ms (0.6 ms)</td>
+  </tr>
+  <tr>
+    <td>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz">SqueezeNet</a>
+    </td>
+    <td>iPhone 8 </td>
+    <td>44.3 (0.7 ms)</td>
+  </tr>
+  <tr>
+    <td>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz">Inception_ResNet_V2</a>
+    </td>
+    <td>iPhone 8</td>
+    <td>562.4 ms (18.2 ms)</td>
+  </tr>
+  <tr>
+    <td>
+      <a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz">Inception_V4</a>
+    </td>
+    <td>iPhone 8 </td>
+    <td>661.0 ms (29.2 ms)</td>
+  </tr>
+ </table>
-- 
GitLab


From b62cadc1513a73c1673094c9e35421c8a6c17645 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Tue, 25 Sep 2018 12:52:13 -0700
Subject: [PATCH 0683/1357] - Upgrade flatbuffer schema and flatbuffer
 versions. - Update the flatbuffer download URL to use a pinned version. - Had
 to provide a mirror url that doesn't exist, since it is required by the
 validation flow, the flatbuffer version will be added later.

PiperOrigin-RevId: 214487576
---
 .../contrib/lite/schema/schema_generated.h    | 147 ++++++++++--------
 .../lite/tools/make/download_dependencies.sh  |   2 +-
 third_party/flatbuffers/BUILD.bazel           |   3 +
 third_party/flatbuffers/workspace.bzl         |   8 +-
 4 files changed, 89 insertions(+), 71 deletions(-)

diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index c7a59cabc5..23ac8484de 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -264,8 +264,8 @@ enum TensorType {
   TensorType_MAX = TensorType_COMPLEX64
 };
 
-inline TensorType (&EnumValuesTensorType())[9] {
-  static TensorType values[] = {
+inline const TensorType (&EnumValuesTensorType())[9] {
+  static const TensorType values[] = {
     TensorType_FLOAT32,
     TensorType_FLOAT16,
     TensorType_INT32,
@@ -279,8 +279,8 @@ inline TensorType (&EnumValuesTensorType())[9] {
   return values;
 }
 
-inline const char **EnumNamesTensorType() {
-  static const char *names[] = {
+inline const char * const *EnumNamesTensorType() {
+  static const char * const names[] = {
     "FLOAT32",
     "FLOAT16",
     "INT32",
@@ -399,8 +399,8 @@ enum BuiltinOperator {
   BuiltinOperator_MAX = BuiltinOperator_FILL
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
-  static BuiltinOperator values[] = {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
+  static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
     BuiltinOperator_CONCATENATION,
@@ -499,8 +499,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
   return values;
 }
 
-inline const char **EnumNamesBuiltinOperator() {
-  static const char *names[] = {
+inline const char * const *EnumNamesBuiltinOperator() {
+  static const char * const names[] = {
     "ADD",
     "AVERAGE_POOL_2D",
     "CONCATENATION",
@@ -680,8 +680,8 @@ enum BuiltinOptions {
   BuiltinOptions_MAX = BuiltinOptions_FillOptions
 };
 
-inline BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
-  static BuiltinOptions values[] = {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
+  static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
     BuiltinOptions_DepthwiseConv2DOptions,
@@ -755,8 +755,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
   return values;
 }
 
-inline const char **EnumNamesBuiltinOptions() {
-  static const char *names[] = {
+inline const char * const *EnumNamesBuiltinOptions() {
+  static const char * const names[] = {
     "NONE",
     "Conv2DOptions",
     "DepthwiseConv2DOptions",
@@ -1699,16 +1699,16 @@ enum Padding {
   Padding_MAX = Padding_VALID
 };
 
-inline Padding (&EnumValuesPadding())[2] {
-  static Padding values[] = {
+inline const Padding (&EnumValuesPadding())[2] {
+  static const Padding values[] = {
     Padding_SAME,
     Padding_VALID
   };
   return values;
 }
 
-inline const char **EnumNamesPadding() {
-  static const char *names[] = {
+inline const char * const *EnumNamesPadding() {
+  static const char * const names[] = {
     "SAME",
     "VALID",
     nullptr
@@ -1732,8 +1732,8 @@ enum ActivationFunctionType {
   ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
 };
 
-inline ActivationFunctionType (&EnumValuesActivationFunctionType())[6] {
-  static ActivationFunctionType values[] = {
+inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] {
+  static const ActivationFunctionType values[] = {
     ActivationFunctionType_NONE,
     ActivationFunctionType_RELU,
     ActivationFunctionType_RELU_N1_TO_1,
@@ -1744,8 +1744,8 @@ inline ActivationFunctionType (&EnumValuesActivationFunctionType())[6] {
   return values;
 }
 
-inline const char **EnumNamesActivationFunctionType() {
-  static const char *names[] = {
+inline const char * const *EnumNamesActivationFunctionType() {
+  static const char * const names[] = {
     "NONE",
     "RELU",
     "RELU_N1_TO_1",
@@ -1770,8 +1770,8 @@ enum LSHProjectionType {
   LSHProjectionType_MAX = LSHProjectionType_DENSE
 };
 
-inline LSHProjectionType (&EnumValuesLSHProjectionType())[3] {
-  static LSHProjectionType values[] = {
+inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] {
+  static const LSHProjectionType values[] = {
     LSHProjectionType_UNKNOWN,
     LSHProjectionType_SPARSE,
     LSHProjectionType_DENSE
@@ -1779,8 +1779,8 @@ inline LSHProjectionType (&EnumValuesLSHProjectionType())[3] {
   return values;
 }
 
-inline const char **EnumNamesLSHProjectionType() {
-  static const char *names[] = {
+inline const char * const *EnumNamesLSHProjectionType() {
+  static const char * const names[] = {
     "UNKNOWN",
     "SPARSE",
     "DENSE",
@@ -1801,16 +1801,16 @@ enum FullyConnectedOptionsWeightsFormat {
   FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
 };
 
-inline FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] {
-  static FullyConnectedOptionsWeightsFormat values[] = {
+inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] {
+  static const FullyConnectedOptionsWeightsFormat values[] = {
     FullyConnectedOptionsWeightsFormat_DEFAULT,
     FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
   };
   return values;
 }
 
-inline const char **EnumNamesFullyConnectedOptionsWeightsFormat() {
-  static const char *names[] = {
+inline const char * const *EnumNamesFullyConnectedOptionsWeightsFormat() {
+  static const char * const names[] = {
     "DEFAULT",
     "SHUFFLED4x16INT8",
     nullptr
@@ -1830,16 +1830,16 @@ enum LSTMKernelType {
   LSTMKernelType_MAX = LSTMKernelType_BASIC
 };
 
-inline LSTMKernelType (&EnumValuesLSTMKernelType())[2] {
-  static LSTMKernelType values[] = {
+inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] {
+  static const LSTMKernelType values[] = {
     LSTMKernelType_FULL,
     LSTMKernelType_BASIC
   };
   return values;
 }
 
-inline const char **EnumNamesLSTMKernelType() {
-  static const char *names[] = {
+inline const char * const *EnumNamesLSTMKernelType() {
+  static const char * const names[] = {
     "FULL",
     "BASIC",
     nullptr
@@ -1860,8 +1860,8 @@ enum CombinerType {
   CombinerType_MAX = CombinerType_SQRTN
 };
 
-inline CombinerType (&EnumValuesCombinerType())[3] {
-  static CombinerType values[] = {
+inline const CombinerType (&EnumValuesCombinerType())[3] {
+  static const CombinerType values[] = {
     CombinerType_SUM,
     CombinerType_MEAN,
     CombinerType_SQRTN
@@ -1869,8 +1869,8 @@ inline CombinerType (&EnumValuesCombinerType())[3] {
   return values;
 }
 
-inline const char **EnumNamesCombinerType() {
-  static const char *names[] = {
+inline const char * const *EnumNamesCombinerType() {
+  static const char * const names[] = {
     "SUM",
     "MEAN",
     "SQRTN",
@@ -1890,15 +1890,15 @@ enum CustomOptionsFormat {
   CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
 };
 
-inline CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] {
-  static CustomOptionsFormat values[] = {
+inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] {
+  static const CustomOptionsFormat values[] = {
     CustomOptionsFormat_FLEXBUFFERS
   };
   return values;
 }
 
-inline const char **EnumNamesCustomOptionsFormat() {
-  static const char *names[] = {
+inline const char * const *EnumNamesCustomOptionsFormat() {
+  static const char * const names[] = {
     "FLEXBUFFERS",
     nullptr
   };
@@ -1943,13 +1943,13 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_MIN) &&
-           verifier.Verify(min()) &&
+           verifier.VerifyVector(min()) &&
            VerifyOffset(verifier, VT_MAX) &&
-           verifier.Verify(max()) &&
+           verifier.VerifyVector(max()) &&
            VerifyOffset(verifier, VT_SCALE) &&
-           verifier.Verify(scale()) &&
+           verifier.VerifyVector(scale()) &&
            VerifyOffset(verifier, VT_ZERO_POINT) &&
-           verifier.Verify(zero_point()) &&
+           verifier.VerifyVector(zero_point()) &&
            verifier.EndTable();
   }
   QuantizationParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -2060,11 +2060,11 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_SHAPE) &&
-           verifier.Verify(shape()) &&
+           verifier.VerifyVector(shape()) &&
            VerifyField<int8_t>(verifier, VT_TYPE) &&
            VerifyField<uint32_t>(verifier, VT_BUFFER) &&
            VerifyOffset(verifier, VT_NAME) &&
-           verifier.Verify(name()) &&
+           verifier.VerifyString(name()) &&
            VerifyOffset(verifier, VT_QUANTIZATION) &&
            verifier.VerifyTable(quantization()) &&
            VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
@@ -2530,9 +2530,9 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Ta
     return VerifyTableStart(verifier) &&
            VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
            VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
-           verifier.Verify(num_columns_per_channel()) &&
+           verifier.VerifyVector(num_columns_per_channel()) &&
            VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
-           verifier.Verify(embedding_dim_per_channel()) &&
+           verifier.VerifyVector(embedding_dim_per_channel()) &&
            verifier.EndTable();
   }
   ConcatEmbeddingsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -3630,7 +3630,7 @@ struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_NEW_SHAPE) &&
-           verifier.Verify(new_shape()) &&
+           verifier.VerifyVector(new_shape()) &&
            verifier.EndTable();
   }
   ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -4294,7 +4294,7 @@ struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
-           verifier.Verify(squeeze_dims()) &&
+           verifier.VerifyVector(squeeze_dims()) &&
            verifier.EndTable();
   }
   SqueezeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -6041,7 +6041,7 @@ struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
            VerifyOffset(verifier, VT_CUSTOM_CODE) &&
-           verifier.Verify(custom_code()) &&
+           verifier.VerifyString(custom_code()) &&
            VerifyField<int32_t>(verifier, VT_VERSION) &&
            verifier.EndTable();
   }
@@ -6360,17 +6360,17 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     return VerifyTableStart(verifier) &&
            VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
            VerifyOffset(verifier, VT_INPUTS) &&
-           verifier.Verify(inputs()) &&
+           verifier.VerifyVector(inputs()) &&
            VerifyOffset(verifier, VT_OUTPUTS) &&
-           verifier.Verify(outputs()) &&
+           verifier.VerifyVector(outputs()) &&
            VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
            VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
            VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
            VerifyOffset(verifier, VT_CUSTOM_OPTIONS) &&
-           verifier.Verify(custom_options()) &&
+           verifier.VerifyVector(custom_options()) &&
            VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
            VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
-           verifier.Verify(mutating_variable_inputs()) &&
+           verifier.VerifyVector(mutating_variable_inputs()) &&
            verifier.EndTable();
   }
   OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -6773,17 +6773,17 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_TENSORS) &&
-           verifier.Verify(tensors()) &&
+           verifier.VerifyVector(tensors()) &&
            verifier.VerifyVectorOfTables(tensors()) &&
            VerifyOffset(verifier, VT_INPUTS) &&
-           verifier.Verify(inputs()) &&
+           verifier.VerifyVector(inputs()) &&
            VerifyOffset(verifier, VT_OUTPUTS) &&
-           verifier.Verify(outputs()) &&
+           verifier.VerifyVector(outputs()) &&
            VerifyOffset(verifier, VT_OPERATORS) &&
-           verifier.Verify(operators()) &&
+           verifier.VerifyVector(operators()) &&
            verifier.VerifyVectorOfTables(operators()) &&
            VerifyOffset(verifier, VT_NAME) &&
-           verifier.Verify(name()) &&
+           verifier.VerifyString(name()) &&
            verifier.EndTable();
   }
   SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -6873,7 +6873,7 @@ struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_DATA) &&
-           verifier.Verify(data()) &&
+           verifier.VerifyVector(data()) &&
            verifier.EndTable();
   }
   BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -6962,18 +6962,18 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     return VerifyTableStart(verifier) &&
            VerifyField<uint32_t>(verifier, VT_VERSION) &&
            VerifyOffset(verifier, VT_OPERATOR_CODES) &&
-           verifier.Verify(operator_codes()) &&
+           verifier.VerifyVector(operator_codes()) &&
            verifier.VerifyVectorOfTables(operator_codes()) &&
            VerifyOffset(verifier, VT_SUBGRAPHS) &&
-           verifier.Verify(subgraphs()) &&
+           verifier.VerifyVector(subgraphs()) &&
            verifier.VerifyVectorOfTables(subgraphs()) &&
            VerifyOffset(verifier, VT_DESCRIPTION) &&
-           verifier.Verify(description()) &&
+           verifier.VerifyString(description()) &&
            VerifyOffset(verifier, VT_BUFFERS) &&
-           verifier.Verify(buffers()) &&
+           verifier.VerifyVector(buffers()) &&
            verifier.VerifyVectorOfTables(buffers()) &&
            VerifyOffset(verifier, VT_METADATA_BUFFER) &&
-           verifier.Verify(metadata_buffer()) &&
+           verifier.VerifyVector(metadata_buffer()) &&
            verifier.EndTable();
   }
   ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -10628,6 +10628,10 @@ inline const tflite::Model *GetModel(const void *buf) {
   return flatbuffers::GetRoot<tflite::Model>(buf);
 }
 
+inline const tflite::Model *GetSizePrefixedModel(const void *buf) {
+  return flatbuffers::GetSizePrefixedRoot<tflite::Model>(buf);
+}
+
 inline const char *ModelIdentifier() {
   return "TFL3";
 }
@@ -10642,6 +10646,11 @@ inline bool VerifyModelBuffer(
   return verifier.VerifyBuffer<tflite::Model>(ModelIdentifier());
 }
 
+inline bool VerifySizePrefixedModelBuffer(
+    flatbuffers::Verifier &verifier) {
+  return verifier.VerifySizePrefixedBuffer<tflite::Model>(ModelIdentifier());
+}
+
 inline const char *ModelExtension() {
   return "tflite";
 }
@@ -10652,6 +10661,12 @@ inline void FinishModelBuffer(
   fbb.Finish(root, ModelIdentifier());
 }
 
+inline void FinishSizePrefixedModelBuffer(
+    flatbuffers::FlatBufferBuilder &fbb,
+    flatbuffers::Offset<tflite::Model> root) {
+  fbb.FinishSizePrefixed(root, ModelIdentifier());
+}
+
 inline std::unique_ptr<ModelT> UnPackModel(
     const void *buf,
     const flatbuffers::resolver_function_t *res = nullptr) {
diff --git a/tensorflow/contrib/lite/tools/make/download_dependencies.sh b/tensorflow/contrib/lite/tools/make/download_dependencies.sh
index 29afa45133..3570f9a38d 100755
--- a/tensorflow/contrib/lite/tools/make/download_dependencies.sh
+++ b/tensorflow/contrib/lite/tools/make/download_dependencies.sh
@@ -35,7 +35,7 @@ GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.g
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
 NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip"
 FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz"
-FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/v1.8.0.zip"
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz"
 FFT2D_URL="https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
diff --git a/third_party/flatbuffers/BUILD.bazel b/third_party/flatbuffers/BUILD.bazel
index 934c0d9650..d0be482fda 100644
--- a/third_party/flatbuffers/BUILD.bazel
+++ b/third_party/flatbuffers/BUILD.bazel
@@ -108,11 +108,14 @@ cc_binary(
         "grpc/src/compiler/schema_interface.h",
         "src/flatc_main.cpp",
         "src/idl_gen_cpp.cpp",
+        "src/idl_gen_dart.cpp",
         "src/idl_gen_general.cpp",
         "src/idl_gen_go.cpp",
         "src/idl_gen_grpc.cpp",
         "src/idl_gen_js.cpp",
         "src/idl_gen_json_schema.cpp",
+        "src/idl_gen_lobster.cpp",
+        "src/idl_gen_lua.cpp",
         "src/idl_gen_php.cpp",
         "src/idl_gen_python.cpp",
         "src/idl_gen_text.cpp",
diff --git a/third_party/flatbuffers/workspace.bzl b/third_party/flatbuffers/workspace.bzl
index 3aeef96a72..7613767fc4 100644
--- a/third_party/flatbuffers/workspace.bzl
+++ b/third_party/flatbuffers/workspace.bzl
@@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive")
 def repo():
     third_party_http_archive(
         name = "flatbuffers",
-        strip_prefix = "flatbuffers-1.9.0",
-        sha256 = "5ca5491e4260cacae30f1a5786d109230db3f3a6e5a0eb45d0d0608293d247e3",
+        strip_prefix = "flatbuffers-1f5eae5d6a135ff6811724f6c57f911d1f46bb15",
+        sha256 = "b2bb0311ca40b12ebe36671bdda350b10c7728caf0cfe2d432ea3b6e409016f3",
         urls = [
-            "https://mirror.bazel.build/github.com/google/flatbuffers/archive/v1.9.0.tar.gz",
-            "https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz",
+            "https://mirror.bazel.build/github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz",
+            "https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz",
         ],
         build_file = "//third_party/flatbuffers:BUILD.bazel",
         system_build_file = "//third_party/flatbuffers:BUILD.system",
-- 
GitLab


From 471e20a6738a326adeb0eef2d158b61bbfd23d6d Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 25 Sep 2018 12:55:00 -0700
Subject: [PATCH 0684/1357] Fix memory leaks of Var objects in the
 XlaCompileOnDemandOp and SnapshotResourceVariables function.

PiperOrigin-RevId: 214488033
---
 tensorflow/compiler/jit/xla_compile_on_demand_op.cc | 1 +
 tensorflow/compiler/jit/xla_launch_util.cc          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
index 3c160aefe5..b98c0cb028 100644
--- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
+++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
@@ -34,6 +34,7 @@ std::map<int, OptionalTensor> GetVariables(OpKernelContext* ctx) {
       OptionalTensor& optional = variables[i];
       optional.name = handle.name();
       if (LookupResource(ctx, handle, &variable).ok()) {
+        core::ScopedUnref scoped_unref(variable);
         tf_shared_lock lock(*variable->mu());
         optional.present = true;
         optional.value = *variable->tensor();
diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc
index f5c8bdd6ee..4f6fc4e068 100644
--- a/tensorflow/compiler/jit/xla_launch_util.cc
+++ b/tensorflow/compiler/jit/xla_launch_util.cc
@@ -49,6 +49,7 @@ std::map<int, OptionalTensor> SnapshotResourceVariables(
     ResourceHandle handle = HandleFromInput(ctx, i);
     OptionalTensor& tensor = snapshot[i];
     if (LookupResource(ctx, handle, &variable).ok()) {
+      core::ScopedUnref scoped_unref(variable);
       tf_shared_lock lock(*variable->mu());
       tensor.name = handle.name();
       tensor.present = true;
-- 
GitLab


From a50dff24b6f38fef7ead20e1015509cac905ed29 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Tue, 25 Sep 2018 13:04:20 -0700
Subject: [PATCH 0685/1357] [XLA] Avoid recursion in global decreasing size
 best-fit heap.

PiperOrigin-RevId: 214489542
---
 .../compiler/xla/service/heap_simulator.cc    | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index a07eaaf997..2bd04259c0 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -827,33 +827,34 @@ class BufferIntervalTree {
   // interval.
   std::vector<Chunk> ChunksOverlappingInTime(int64 start, int64 end) {
     std::vector<Chunk> result;
-    if (node_count_ > 0) {
-      ChunksOverlappingInTimeHelper(start, end, &node_storage_[0], &result);
+    if (node_count_ == 0) {
+      return result;
+    }
+    std::vector<BufferIntervalTreeNode*> visiting_stack;
+    visiting_stack.push_back(&node_storage_[0]);
+    while (!visiting_stack.empty()) {
+      BufferIntervalTreeNode* top = visiting_stack.back();
+      visiting_stack.pop_back();
+      if (start > top->subtree_end) {
+        continue;
+      }
+      if (top->left != nullptr) {
+        visiting_stack.push_back(top->left);
+      }
+      if (top->start <= end && top->end >= start) {
+        result.push_back(top->chunk);
+      }
+      if (end < top->start) {
+        continue;
+      }
+      if (top->right != nullptr) {
+        visiting_stack.push_back(top->right);
+      }
     }
     return result;
   }
 
  private:
-  void ChunksOverlappingInTimeHelper(int64 start, int64 end,
-                                     BufferIntervalTreeNode* visiting_node,
-                                     std::vector<Chunk>* result) {
-    if (start > visiting_node->subtree_end) {
-      return;
-    }
-    if (visiting_node->left != nullptr) {
-      ChunksOverlappingInTimeHelper(start, end, visiting_node->left, result);
-    }
-    if (visiting_node->start <= end && visiting_node->end >= start) {
-      result->push_back(visiting_node->chunk);
-    }
-    if (end < visiting_node->start) {
-      return;
-    }
-    if (visiting_node->right != nullptr) {
-      ChunksOverlappingInTimeHelper(start, end, visiting_node->right, result);
-    }
-  }
-
   int64 node_count_ = 0;
   std::vector<BufferIntervalTreeNode> node_storage_;
 };
-- 
GitLab


From 22c0efbc82d4c721d7a96b7acc1205e161563800 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 13:06:23 -0700
Subject: [PATCH 0686/1357] Optimize Resnet-50 infeed pipeline by adding
 sharding and cache.

PiperOrigin-RevId: 214489904
---
 tensorflow/contrib/tpu/python/tpu/tpu_context.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index b1a8a16d1e..a5f86ecd5f 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -117,6 +117,11 @@ class TPUContext(object):
     """The number of hosts for the TPU system."""
     return self._internal_ctx.num_hosts
 
+  @property
+  def current_host(self):
+    """The current host index for the TPU system."""
+    return self._invocation_index
+
   @property
   def num_of_replicas_per_host(self):
     """The number of replicas for each host."""
-- 
GitLab


From e51963ead78b3c1c4ab0077a3e43fb9c0f6ab374 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 13:29:09 -0700
Subject: [PATCH 0687/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 214493515
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 24 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 13 ++++++++++
 2 files changed, 37 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index b02ea64ac9..0ab1558613 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -70497,6 +70497,30 @@ op {
     type: DT_INT32
   }
 }
+op {
+  name: "StringLength"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT32
+  }
+  attr {
+    name: "unit"
+    type: "string"
+    default_value {
+      s: "BYTE"
+    }
+    allowed_values {
+      list {
+        s: "BYTE"
+        s: "UTF8_CHAR"
+      }
+    }
+  }
+}
 op {
   name: "StringSplit"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 4c5a472e9f..3b89fb76ea 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -33043,6 +33043,19 @@ op {
     name: "output"
     type: DT_INT32
   }
+  attr {
+    name: "unit"
+    type: "string"
+    default_value {
+      s: "BYTE"
+    }
+    allowed_values {
+      list {
+        s: "BYTE"
+        s: "UTF8_CHAR"
+      }
+    }
+  }
 }
 op {
   name: "StringSplit"
-- 
GitLab


From 976fb3105312bb17accebcbca2ebae906bcf99fb Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Tue, 25 Sep 2018 13:36:56 -0700
Subject: [PATCH 0688/1357] Add outputs and target cross replica concat, so
 each core sees the same output and targets and produces the same loss and
 metrics.

PiperOrigin-RevId: 214494877
---
 .../contrib/tpu/python/tpu/keras_support.py   | 155 +++++++++++++++++-
 1 file changed, 146 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 93ae68d254..03e06b8142 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -68,6 +68,7 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
 from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -229,6 +230,39 @@ class TPUEmbedding(embeddings.Embedding):
     return math_ops.tensordot(inputs, self.embeddings, 1)
 
 
+def _cross_replica_concat(tensor, core_id, num_cores, name):
+  """Concatenate `tensor` across cores.
+
+  Args:
+    tensor: The tensor to be concatenated. Must be [int32 and float32].
+    core_id: Tensor indicating the current TPU core.
+    num_cores: Python int. The total number of TPU cores in the system.
+    name: The string name to print for debugging.
+
+  Returns:
+    The same concatenated Tensor on each core.
+  """
+
+  input_dtype = tensor.dtype
+  if input_dtype not in [dtypes.float32, dtypes.int32]:
+    raise TypeError('For model replication, only (float32 and int32) is '
+                    'supported for model outputs and targets. Got {} for '
+                    '{}.'.format(input_dtype, name))
+
+  batch_size = tensor.shape[0]
+  mask = math_ops.to_float(math_ops.equal(range(num_cores), core_id))
+  mask = array_ops.reshape(mask, [num_cores] + [1] * tensor.shape.ndims)
+  result = mask * math_ops.to_float(tensor)
+  local_tensor_with_holes = array_ops.reshape(result,
+                                              [-1] + result.shape.as_list()[2:])
+  concat_tensor = tpu_ops.cross_replica_sum(local_tensor_with_holes)
+  concat_tensor.set_shape((num_cores * batch_size,) + tuple(tensor.shape[1:]))
+
+  if concat_tensor != input_dtype:
+    concat_tensor = math_ops.cast(concat_tensor, input_dtype)
+  return concat_tensor
+
+
 class KerasCrossShardOptimizer(keras_optimizers.Optimizer):
   """An optimizer that averages gradients across TPU shards."""
 
@@ -617,7 +651,7 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
       return {}
 
   # pylint: disable=redefined-outer-name
-  def __init__(self, dataset, tpu_assignment, tpu_session):
+  def __init__(self, dataset, tpu_assignment, tpu_session, mode):
     """Constructs a TPUDatasetInfeedManager.
 
     Must be called within a `KerasTPUModel.tpu_session` context!
@@ -627,8 +661,10 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
       tpu_assignment: The `TPUAssignment` used to configure the
         Keras TPU model.
       tpu_session: The `tf.Session` object used for running the TPU model.
+      mode: ModeKeys enum.
     """
     self._verify_dataset_shape(dataset)
+
     self._dataset = dataset
     self._tpu_assignment = tpu_assignment
     dummy_x_shape = dataset.output_shapes[0].as_list()
@@ -668,6 +704,10 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
                                     self._iterator.output_types)
       input_specs.append(spec)
 
+    # Pre-process the inputs and get_next_ops before caching.
+    input_specs, self._get_next_ops = (
+        _inject_tpu_inputs_for_dataset(
+            tpu_assignment, mode, input_specs, self._get_next_ops))
     self._infeed_instance = self.DatasetInfeedInstance(input_specs)
 
   def _verify_dataset_shape(self, dataset):
@@ -735,6 +775,71 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
                        sharded_infeed_tensors=shard_infeed_tensors)
 
 
+def _inject_tpu_inputs_for_dataset(tpu_assignment, mode,
+                                   input_specs, get_next_ops):
+  """Append core information to the set of dataset inputs."""
+  # This is used during compilation to identify the current TPU core and enable
+  # concatenation operations across cores.
+  if mode not in [model_fn_lib.ModeKeys.TRAIN, model_fn_lib.ModeKeys.EVAL]:
+    return input_specs, get_next_ops
+
+  # Dataset inputs operate on per core basis.
+  per_core_batch_size = input_specs[0].shape.as_list()[0]
+
+  # Insert, at head, the tensor for core_id.
+  assert len(get_next_ops) == tpu_assignment.num_towers
+  for i in range(tpu_assignment.num_towers):
+    core_id_constant = constant_op.constant(
+        np.array([i] * per_core_batch_size).astype('int32'),
+        dtype=dtypes.int32,
+        name='cord_id_constant')
+    get_next_ops[i] = [core_id_constant] + list(get_next_ops[i])
+
+  # Insert the input spec at head also.
+  input_specs = [tensor_spec.TensorSpec([per_core_batch_size], dtypes.int32)
+                ] + input_specs
+
+  return input_specs, get_next_ops
+
+
+def _inject_tpu_inputs_for_infeed(tpu_assignment, mode, input_tensors, inputs):
+  """Append core information to the set of inputs."""
+  # This is used during compilation to identify the current TPU core and enable
+  # concatenation operations across cores.
+  if mode not in [model_fn_lib.ModeKeys.TRAIN, model_fn_lib.ModeKeys.EVAL]:
+    return input_tensors, inputs
+
+  # Puts a place holder in input spec.
+  core_id_place_holder = array_ops.placeholder(
+      dtype=dtypes.int32, shape=[1], name='core_id')
+  input_tensors = [core_id_place_holder] + input_tensors
+
+  # Now fill the core id. For `num_cores` = 2, `batch_size` = 8, we fill the
+  # core id inputs as [0, 0, 0, 0, 1, 1, 1, 1], so each core sees its core id
+  # (duplicated).
+  num_cores = tpu_assignment.num_towers
+  per_core_batch_size = inputs[0].shape[0] // num_cores
+  core_ids = np.arange(num_cores).repeat(per_core_batch_size)
+  inputs = [core_ids] + inputs
+  return input_tensors, inputs
+
+
+def _read_tpu_coreid_from_infeed(mode, infeed_tensors):
+  """Popping out the core ids from infeed."""
+  if mode not in [model_fn_lib.ModeKeys.TRAIN, model_fn_lib.ModeKeys.EVAL]:
+    return None, infeed_tensors
+
+  if len(infeed_tensors) <= 1:
+    raise RuntimeError(
+        'The infeed tensors on TPU core has only {} tensors. '
+        'This is not expected. Please report a bug.\nTensors: {}'.format(
+            len(infeed_tensors), infeed_tensors))
+
+  core_id = infeed_tensors[0][0]  # Pop out the scalar version.
+  rest = infeed_tensors[1:]
+  return core_id, rest
+
+
 class TPUFunction(object):
   """K.function compatible interface for invoking a TPU compiled function.
 
@@ -785,6 +890,10 @@ class TPUFunction(object):
           shapes=[spec.shape for spec in input_specs],
           name='infeed-%s' % self.execution_mode)
 
+      core_id, infeed_tensors = (
+          _read_tpu_coreid_from_infeed(
+              mode=self.execution_mode, infeed_tensors=infeed_tensors))
+
       assert len(infeed_tensors) == len(infeed_layers), (
           'Infeed inputs did not match model: %s vs %s' % (infeed_layers,
                                                            infeed_tensors))
@@ -806,6 +915,28 @@ class TPUFunction(object):
               self._tpu_assignment.num_towers):
             self._cloned_model = models.clone_model(self.model)
 
+      # When running on more than one core, concatenate outputs at the end of
+      # processing. In backprop stage, the gradients will be calculdated
+      # according to the local inputs as gradient of cross-replica-concat being
+      # zero for any outputs other than those from mlocal core so the loss
+      # calculation is identical.
+      num_towers = self.model._tpu_assignment.num_towers
+      if num_towers > 1 and (is_training or is_test):
+        new_outputs = [
+            _cross_replica_concat(
+                o, core_id, num_towers, name='model output ({})'.format(o.name))
+            for o in self._cloned_model.outputs
+        ]
+        self._cloned_model.outputs = new_outputs
+        tpu_targets = [
+            _cross_replica_concat(
+                tensor,
+                core_id,
+                num_towers,
+                name='model target ({})'.format(tensor.name))
+            for tensor in tpu_targets
+        ]
+
       # Create a copy of the optimizer for this graph.
       if isinstance(self.model.optimizer, keras_optimizers.TFOptimizer):
         cloned_optimizer = keras_optimizers.TFOptimizer(
@@ -933,6 +1064,7 @@ class TPUFunction(object):
     for x, mgr in self.model._numpy_to_infeed_manager_list:
       if inputs[0] is x:
         return mgr
+
     return TPUNumpyInfeedManager(self.model._tpu_assignment)
 
   def _tpu_model_ops_for_input_specs(self, input_specs, infeed_manager):
@@ -958,8 +1090,10 @@ class TPUFunction(object):
     shape_key = tuple([tuple(spec.shape.as_list()) for spec in input_specs])
     if shape_key not in self._compilation_cache:
       with self.model.tpu_session():
-        logging.info('New input shapes; (re-)compiling: mode=%s, %s',
-                     self.execution_mode, input_specs)
+        logging.info(
+            'New input shapes; (re-)compiling: mode=%s '
+            '(# of cores %d), %s', self.execution_mode,
+            self._tpu_assignment.num_towers, input_specs)
         new_tpu_model_ops = self._specialize_model(input_specs,
                                                    infeed_manager)
         self._compilation_cache[shape_key] = new_tpu_model_ops
@@ -998,6 +1132,9 @@ class TPUFunction(object):
       input_tensors = self.model._feed_inputs
 
     inputs = inputs[:len(input_tensors)]
+    input_tensors, inputs = (
+        _inject_tpu_inputs_for_infeed(
+            self._tpu_assignment, self.execution_mode, input_tensors, inputs))
     return input_tensors, inputs
 
   def _process_outputs(self, outfeed_outputs):
@@ -1272,8 +1409,8 @@ class KerasTPUModel(models.Model):
         if y is not None:
           raise ValueError('When using tf.data as input to a model, y must be '
                            'None')
-        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
-                                                 sess)
+        infeed_manager = TPUDatasetInfeedManager(
+            dataset, self._tpu_assignment, sess, model_fn_lib.ModeKeys.TRAIN)
         # Use dummy numpy inputs for the rest of Keras' shape checking. We
         # intercept them when building the model.
         x = infeed_manager.dummy_x
@@ -1294,8 +1431,8 @@ class KerasTPUModel(models.Model):
         if validation_steps is None:
           raise ValueError('When using tf.data as validation for a model, you '
                            'should specify the validation_steps argument.')
-        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
-                                                 sess)
+        infeed_manager = TPUDatasetInfeedManager(
+            dataset, self._tpu_assignment, sess, model_fn_lib.ModeKeys.EVAL)
         # Use dummy numpy inputs for the rest of Keras' shape checking. We
         # intercept them when building the model.
         val_x = infeed_manager.dummy_x
@@ -1372,8 +1509,8 @@ class KerasTPUModel(models.Model):
         if y is not None:
           raise ValueError('When using tf.data as input to a model, y must be '
                            'None')
-        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
-                                                 sess)
+        infeed_manager = TPUDatasetInfeedManager(
+            dataset, self._tpu_assignment, sess, model_fn_lib.ModeKeys.EVAL)
         # Use dummy numpy inputs for the rest of Keras' shape checking. We
         # intercept them when building the model.
         x = infeed_manager.dummy_x
-- 
GitLab


From 348478f642216cf3cbe1eb67b875252d8e6a6418 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 25 Sep 2018 13:42:46 -0700
Subject: [PATCH 0689/1357] [tf.data] Adding a private method for (recursively)
 tracking dataset inputs.

PiperOrigin-RevId: 214495925
---
 .../bigtable/python/ops/bigtable_api.py       |   8 +-
 .../contrib/data/python/ops/batching.py       |  12 +-
 .../contrib/data/python/ops/error_ops.py      |   4 +-
 .../contrib/data/python/ops/grouping.py       |  12 +-
 .../data/python/ops/indexed_dataset_ops.py    |   3 +
 .../contrib/data/python/ops/interleave_ops.py |   3 +
 .../contrib/data/python/ops/optimization.py   |  12 +-
 .../contrib/data/python/ops/parsing_ops.py    |   4 +-
 .../data/python/ops/prefetching_ops.py        |  10 +-
 .../contrib/data/python/ops/random_ops.py     |   2 +-
 tensorflow/contrib/data/python/ops/readers.py |   6 +-
 .../contrib/data/python/ops/scan_ops.py       |   4 +-
 .../contrib/data/python/ops/shuffle_ops.py    |  11 +-
 tensorflow/contrib/data/python/ops/sliding.py |   4 +-
 .../contrib/data/python/ops/stats_ops.py      |   8 +-
 .../contrib/data/python/ops/threadpool.py     |   4 +-
 tensorflow/contrib/data/python/ops/unique.py  |   4 +-
 .../distribute/python/prefetching_ops_v2.py   |   3 +-
 .../hadoop/python/ops/hadoop_dataset_ops.py   |   4 +-
 .../kafka/python/ops/kafka_dataset_ops.py     |   6 +-
 .../kinesis/python/ops/kinesis_dataset_ops.py |   6 +-
 .../python/training/tensor_queue_dataset.py   |   4 +-
 tensorflow/python/data/kernel_tests/BUILD     |  13 ++
 .../python/data/kernel_tests/inputs_test.py   | 148 ++++++++++++++++++
 tensorflow/python/data/ops/dataset_ops.py     | 109 ++++++++-----
 .../data/ops/multi_device_iterator_ops.py     |   4 +
 tensorflow/python/data/ops/readers.py         |  12 ++
 27 files changed, 320 insertions(+), 100 deletions(-)
 create mode 100644 tensorflow/python/data/kernel_tests/inputs_test.py

diff --git a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
index 3e1b622867..cf56822ff4 100644
--- a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
+++ b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
@@ -575,7 +575,7 @@ def _normalize_columns(columns, provided_kwargs):
   return normalized
 
 
-class _BigtableKeyDataset(dataset_ops.Dataset):
+class _BigtableKeyDataset(dataset_ops.DatasetSource):
   """_BigtableKeyDataset is an abstract class representing the keys of a table.
   """
 
@@ -645,7 +645,7 @@ class _BigtableSampleKeysDataset(_BigtableKeyDataset):
         table=self._table._resource)  # pylint: disable=protected-access
 
 
-class _BigtableLookupDataset(dataset_ops.Dataset):
+class _BigtableLookupDataset(dataset_ops.DatasetSource):
   """_BigtableLookupDataset represents a dataset that retrieves values for keys.
   """
 
@@ -678,7 +678,7 @@ class _BigtableLookupDataset(dataset_ops.Dataset):
         columns=self._columns)
 
 
-class _BigtableScanDataset(dataset_ops.Dataset):
+class _BigtableScanDataset(dataset_ops.DatasetSource):
   """_BigtableScanDataset represents a dataset that retrieves keys and values.
   """
 
@@ -715,7 +715,7 @@ class _BigtableScanDataset(dataset_ops.Dataset):
         probability=self._probability)
 
 
-class _BigtableSampleKeyPairsDataset(dataset_ops.Dataset):
+class _BigtableSampleKeyPairsDataset(dataset_ops.DatasetSource):
   """_BigtableSampleKeyPairsDataset returns key pairs from a Bigtable table.
   """
 
diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py
index 367c159dc5..7a0f221284 100644
--- a/tensorflow/contrib/data/python/ops/batching.py
+++ b/tensorflow/contrib/data/python/ops/batching.py
@@ -345,12 +345,12 @@ def _padded_batch_sparse_window(dataset, padded_shape):
       dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
 
 
-class _UnbatchDataset(dataset_ops.Dataset):
+class _UnbatchDataset(dataset_ops.UnaryDataset):
   """A dataset that splits the elements of its input into multiple elements."""
 
   def __init__(self, input_dataset):
     """See `unbatch()` for more details."""
-    super(_UnbatchDataset, self).__init__()
+    super(_UnbatchDataset, self).__init__(input_dataset)
     flat_shapes = nest.flatten(input_dataset.output_shapes)
     if any(s.ndims == 0 for s in flat_shapes):
       raise ValueError("Cannot unbatch an input with scalar components.")
@@ -514,12 +514,12 @@ def padded_batch_and_drop_remainder(batch_size,
   return _apply_fn
 
 
-class _DenseToSparseBatchDataset(dataset_ops.Dataset):
+class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s."""
 
   def __init__(self, input_dataset, batch_size, row_shape):
     """See `Dataset.dense_to_sparse_batch()` for more details."""
-    super(_DenseToSparseBatchDataset, self).__init__()
+    super(_DenseToSparseBatchDataset, self).__init__(input_dataset)
     if not isinstance(input_dataset.output_types, dtypes.DType):
       raise TypeError("DenseToSparseDataset requires an input whose elements "
                       "have a single component, whereas the input has %r." %
@@ -548,7 +548,7 @@ class _DenseToSparseBatchDataset(dataset_ops.Dataset):
     return self._input_dataset.output_types
 
 
-class _RestructuredDataset(dataset_ops.Dataset):
+class _RestructuredDataset(dataset_ops.UnaryDataset):
   """An internal helper for changing the structure and shape of a dataset."""
 
   def __init__(self,
@@ -583,7 +583,7 @@ class _RestructuredDataset(dataset_ops.Dataset):
       ValueError: If either `output_types` or `output_shapes` is not compatible
         with the structure of `dataset`.
     """
-    super(_RestructuredDataset, self).__init__()
+    super(_RestructuredDataset, self).__init__(dataset)
     self._input_dataset = dataset
 
     if not allow_unsafe_cast:
diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py
index b4a7521e08..615dbcabd4 100644
--- a/tensorflow/contrib/data/python/ops/error_ops.py
+++ b/tensorflow/contrib/data/python/ops/error_ops.py
@@ -51,12 +51,12 @@ def ignore_errors():
   return _apply_fn
 
 
-class _IgnoreErrorsDataset(dataset_ops.Dataset):
+class _IgnoreErrorsDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that silently ignores errors when computing its input."""
 
   def __init__(self, input_dataset):
     """See `Dataset.ignore_errors()` for details."""
-    super(_IgnoreErrorsDataset, self).__init__()
+    super(_IgnoreErrorsDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
 
   def _as_variant_tensor(self):
diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py
index 020167e4d1..7cae33beb3 100644
--- a/tensorflow/contrib/data/python/ops/grouping.py
+++ b/tensorflow/contrib/data/python/ops/grouping.py
@@ -282,12 +282,12 @@ def window_dataset(window_size):
   return _apply_fn
 
 
-class _GroupByReducerDataset(dataset_ops.Dataset):
+class _GroupByReducerDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that groups its input and performs a reduction."""
 
   def __init__(self, input_dataset, key_func, reducer):
     """See `group_by_reducer()` for details."""
-    super(_GroupByReducerDataset, self).__init__()
+    super(_GroupByReducerDataset, self).__init__(input_dataset)
 
     self._input_dataset = input_dataset
 
@@ -416,12 +416,12 @@ class _GroupByReducerDataset(dataset_ops.Dataset):
         **dataset_ops.flat_structure(self))
 
 
-class _GroupByWindowDataset(dataset_ops.Dataset):
+class _GroupByWindowDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that groups its input and performs a windowed reduction."""
 
   def __init__(self, input_dataset, key_func, reduce_func, window_size_func):
     """See `group_by_window()` for details."""
-    super(_GroupByWindowDataset, self).__init__()
+    super(_GroupByWindowDataset, self).__init__(input_dataset)
 
     self._input_dataset = input_dataset
 
@@ -525,12 +525,12 @@ class Reducer(object):
     return self._finalize_func
 
 
-class _MapXDataset(dataset_ops.Dataset):
+class _MapXDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that maps a function over elements in its input."""
 
   def __init__(self, input_dataset, map_func):
     """See `map_x_dataset()` for details."""
-    super(_MapXDataset, self).__init__()
+    super(_MapXDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
 
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
diff --git a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py b/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
index a0932b4081..cc76ab0850 100644
--- a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
@@ -171,3 +171,6 @@ class IdentityIndexedDataset(IndexedDataset):
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.identity_indexed_dataset(self._size)
+
+  def _inputs(self):
+    return []
diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py
index 92d4251a86..bfa3fdf543 100644
--- a/tensorflow/contrib/data/python/ops/interleave_ops.py
+++ b/tensorflow/contrib/data/python/ops/interleave_ops.py
@@ -173,6 +173,9 @@ class _DirectedInterleaveDataset(dataset_ops.Dataset):
         **dataset_ops.flat_structure(self))
     # pylint: enable=protected-access
 
+  def _inputs(self):
+    return [self._selector_input] + self._data_inputs
+
   @property
   def output_classes(self):
     return self._data_inputs[0].output_classes
diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/contrib/data/python/ops/optimization.py
index 73840452df..3eb172acd5 100644
--- a/tensorflow/contrib/data/python/ops/optimization.py
+++ b/tensorflow/contrib/data/python/ops/optimization.py
@@ -84,12 +84,12 @@ def optimize(optimizations=None):
   return _apply_fn
 
 
-class _AssertNextDataset(dataset_ops.Dataset):
+class _AssertNextDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that asserts which transformations happen next."""
 
   def __init__(self, input_dataset, transformations):
     """See `assert_next()` for details."""
-    super(_AssertNextDataset, self).__init__()
+    super(_AssertNextDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if transformations is None:
       raise ValueError("At least one transformation should be specified")
@@ -115,12 +115,12 @@ class _AssertNextDataset(dataset_ops.Dataset):
     return self._input_dataset.output_types
 
 
-class _ModelDataset(dataset_ops.Dataset):
+class _ModelDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that acts as an identity, and models performance."""
 
   def __init__(self, input_dataset):
     """See `optimize()` for details."""
-    super(_ModelDataset, self).__init__()
+    super(_ModelDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
 
   def _as_variant_tensor(self):
@@ -141,12 +141,12 @@ class _ModelDataset(dataset_ops.Dataset):
     return self._input_dataset.output_types
 
 
-class _OptimizeDataset(dataset_ops.Dataset):
+class _OptimizeDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that acts as an identity, and applies optimizations."""
 
   def __init__(self, input_dataset, optimizations):
     """See `optimize()` for details."""
-    super(_OptimizeDataset, self).__init__()
+    super(_OptimizeDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if optimizations is None:
       optimizations = []
diff --git a/tensorflow/contrib/data/python/ops/parsing_ops.py b/tensorflow/contrib/data/python/ops/parsing_ops.py
index 2701605e64..cfbba701b0 100644
--- a/tensorflow/contrib/data/python/ops/parsing_ops.py
+++ b/tensorflow/contrib/data/python/ops/parsing_ops.py
@@ -26,11 +26,11 @@ from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import parsing_ops
 
 
-class _ParseExampleDataset(dataset_ops.Dataset):
+class _ParseExampleDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that parses `example` dataset into a `dict` dataset."""
 
   def __init__(self, input_dataset, features, num_parallel_calls):
-    super(_ParseExampleDataset, self).__init__()
+    super(_ParseExampleDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if not all(types == dtypes.string
                for types in nest.flatten(input_dataset.output_types)):
diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py
index 5222011d04..f994425304 100644
--- a/tensorflow/contrib/data/python/ops/prefetching_ops.py
+++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py
@@ -262,10 +262,11 @@ class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
 # pylint: enable=protected-access
 
 
-class _PrefetchToDeviceDataset(dataset_ops.Dataset):
+class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
   """A `Dataset` whose iterator prefetches elements to another device."""
 
   def __init__(self, input_dataset, device, buffer_size):
+    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._device = device
     self._buffer_size = buffer_size if buffer_size is not None else 1
@@ -374,7 +375,7 @@ def copy_to_device(target_device, source_device="/cpu:0"):
 # TODO(rohanj): Use the _input_hostmem attr on the RemoteCall ops to indicate
 # all inputs to the Op are in host memory, thereby avoiding some unnecessary
 # Sends and Recvs.
-class _CopyToDeviceDataset(dataset_ops.Dataset):
+class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that copies elements to another device."""
 
   def __init__(self, input_dataset, target_device, source_device="/cpu:0"):
@@ -385,6 +386,7 @@ class _CopyToDeviceDataset(dataset_ops.Dataset):
       target_device: The name of the device to which elements would be copied.
       source_device: Device where input_dataset would be placed.
     """
+    super(_CopyToDeviceDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._target_device = target_device
     spec = framework_device.DeviceSpec().from_string(self._target_device)
@@ -612,6 +614,10 @@ class _PerDeviceGenerator(dataset_ops.Dataset):
           output_types=self._flat_output_types,
           output_shapes=self._flat_output_shapes)
 
+  def _inputs(self):
+    # TODO(b/116506223): Determine which datasets should be used as inputs here.
+    return []
+
   @property
   def output_types(self):
     return self._output_types
diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py
index e670c4c835..344a0763c8 100644
--- a/tensorflow/contrib/data/python/ops/random_ops.py
+++ b/tensorflow/contrib/data/python/ops/random_ops.py
@@ -25,7 +25,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_dataset_ops
 
 
-class RandomDataset(dataset_ops.Dataset):
+class RandomDataset(dataset_ops.DatasetSource):
   """A `Dataset` of pseudorandom values."""
 
   def __init__(self, seed=None):
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index 785b395707..d9d06e2703 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -508,7 +508,7 @@ def make_csv_dataset(
 _DEFAULT_READER_BUFFER_SIZE_BYTES = 4 * 1024 * 1024  # 4 MB
 
 
-class CsvDataset(dataset_ops.Dataset):
+class CsvDataset(dataset_ops.DatasetSource):
   """A Dataset comprising lines from one or more CSV files."""
 
   def __init__(self,
@@ -924,7 +924,7 @@ def _get_file_names(file_pattern, shuffle):
   return file_names
 
 
-class SqlDataset(dataset_ops.Dataset):
+class SqlDataset(dataset_ops.DatasetSource):
   """A `Dataset` consisting of the results from a SQL query."""
 
   def __init__(self, driver_name, data_source_name, query, output_types):
@@ -985,7 +985,7 @@ class SqlDataset(dataset_ops.Dataset):
     return self._output_types
 
 
-class LMDBDataset(dataset_ops.Dataset):
+class LMDBDataset(dataset_ops.DatasetSource):
   """A LMDB Dataset that reads the lmdb file."""
 
   def __init__(self, filenames):
diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py
index 6b002b4a53..c52582cd35 100644
--- a/tensorflow/contrib/data/python/ops/scan_ops.py
+++ b/tensorflow/contrib/data/python/ops/scan_ops.py
@@ -27,12 +27,12 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import gen_dataset_ops
 
 
-class _ScanDataset(dataset_ops.Dataset):
+class _ScanDataset(dataset_ops.UnaryDataset):
   """A dataset that scans a function across its input."""
 
   def __init__(self, input_dataset, initial_state, scan_func):
     """See `scan()` for details."""
-    super(_ScanDataset, self).__init__()
+    super(_ScanDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
 
     with ops.name_scope("initial_state"):
diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py
index 4356721704..985d1d87d0 100644
--- a/tensorflow/contrib/data/python/ops/shuffle_ops.py
+++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py
@@ -25,16 +25,11 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
 
 
-class _ShuffleAndRepeatDataset(dataset_ops.Dataset):
+class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that fuses `shuffle` and `repeat`."""
 
-  def __init__(self,
-               input_dataset,
-               buffer_size,
-               count=None,
-               seed=None):
-    """See `Dataset.map()` for details."""
-    super(_ShuffleAndRepeatDataset, self).__init__()
+  def __init__(self, input_dataset, buffer_size, count=None, seed=None):
+    super(_ShuffleAndRepeatDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py
index b0d6a16c20..bcc383587c 100644
--- a/tensorflow/contrib/data/python/ops/sliding.py
+++ b/tensorflow/contrib/data/python/ops/sliding.py
@@ -26,12 +26,12 @@ from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.util import deprecation
 
 
-class _SlideDataset(dataset_ops.Dataset):
+class _SlideDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that passes a sliding window over its input."""
 
   def __init__(self, input_dataset, window_size, window_shift, window_stride):
     """See `sliding_window_batch` for details."""
-    super(_SlideDataset, self).__init__()
+    super(_SlideDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._window_size = ops.convert_to_tensor(
         window_size, dtype=dtypes.int64, name="window_stride")
diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py
index 7410ee8e05..bc47c5989d 100644
--- a/tensorflow/contrib/data/python/ops/stats_ops.py
+++ b/tensorflow/contrib/data/python/ops/stats_ops.py
@@ -84,11 +84,11 @@ class StatsAggregator(object):
     return gen_dataset_ops.stats_aggregator_summary(self._resource)
 
 
-class _SetStatsAggregatorDataset(dataset_ops.Dataset):
+class _SetStatsAggregatorDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that acts as an identity, and sets given stats_aggregator."""
 
   def __init__(self, input_dataset, stats_aggregator):
-    super(_SetStatsAggregatorDataset, self).__init__()
+    super(_SetStatsAggregatorDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._stats_aggregator = stats_aggregator
 
@@ -173,11 +173,11 @@ def latency_stats(tag):
   return _apply_fn
 
 
-class _StatsDataset(dataset_ops.Dataset):
+class _StatsDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that acts as an identity, and also records statistics."""
 
   def __init__(self, input_dataset, op_function, tag):
-    super(_StatsDataset, self).__init__()
+    super(_StatsDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._op_function = op_function
     self._tag = ops.convert_to_tensor(tag, dtype=dtypes.string)
diff --git a/tensorflow/contrib/data/python/ops/threadpool.py b/tensorflow/contrib/data/python/ops/threadpool.py
index dc67accdcf..9d165ad52a 100644
--- a/tensorflow/contrib/data/python/ops/threadpool.py
+++ b/tensorflow/contrib/data/python/ops/threadpool.py
@@ -61,11 +61,11 @@ class PrivateThreadPool(object):
           display_name=display_name)
 
 
-class _ThreadPoolDataset(dataset_ops.Dataset):
+class _ThreadPoolDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that acts as an identity, and sets a custom threadpool."""
 
   def __init__(self, input_dataset, thread_pool):
-    super(_ThreadPoolDataset, self).__init__()
+    super(_ThreadPoolDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._thread_pool = thread_pool
 
diff --git a/tensorflow/contrib/data/python/ops/unique.py b/tensorflow/contrib/data/python/ops/unique.py
index e0d606311c..bad67a580d 100644
--- a/tensorflow/contrib/data/python/ops/unique.py
+++ b/tensorflow/contrib/data/python/ops/unique.py
@@ -47,12 +47,12 @@ def unique():
   return _apply_fn
 
 
-class _UniqueDataset(dataset_ops.Dataset):
+class _UniqueDataset(dataset_ops.UnaryDataset):
   """A `Dataset` contains the unique elements from its input."""
 
   def __init__(self, input_dataset):
     """See `unique()` for details."""
-    super(_UniqueDataset, self).__init__()
+    super(_UniqueDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if input_dataset.output_types not in (dtypes.int32, dtypes.int64,
                                           dtypes.string):
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
index 1ff60c0762..492d82f6a1 100644
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
+++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
@@ -155,10 +155,11 @@ class _PrefetchToDeviceIterator(object):
 # pylint: enable=protected-access
 
 
-class _PrefetchToDeviceDataset(dataset_ops.Dataset):
+class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
   """A `Dataset` whose iterator prefetches elements to other device(s)."""
 
   def __init__(self, input_dataset, devices, buffer_size):
+    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._devices = devices
     self._buffer_size = buffer_size if buffer_size is not None else 1
diff --git a/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py b/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
index 6e0e628655..bf398b838d 100644
--- a/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
+++ b/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
@@ -19,14 +19,14 @@ from __future__ import print_function
 
 from tensorflow.contrib.hadoop.python.ops import gen_dataset_ops
 from tensorflow.contrib.hadoop.python.ops import hadoop_op_loader  # pylint: disable=unused-import
-from tensorflow.python.data.ops.dataset_ops import Dataset
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 
 
-class SequenceFileDataset(Dataset):
+class SequenceFileDataset(dataset_ops.DatasetSource):
   """A Sequence File Dataset that reads the sequence file."""
 
   def __init__(self, filenames):
diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
index a1624614d1..7129f09e8b 100644
--- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
+++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
@@ -17,15 +17,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.kafka.python.ops import kafka_op_loader  # pylint: disable=unused-import
 from tensorflow.contrib.kafka.python.ops import gen_dataset_ops
-from tensorflow.python.data.ops.dataset_ops import Dataset
+from tensorflow.contrib.kafka.python.ops import kafka_op_loader  # pylint: disable=unused-import
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 
 
-class KafkaDataset(Dataset):
+class KafkaDataset(dataset_ops.DatasetSource):
   """A Kafka Dataset that consumes the message.
   """
 
diff --git a/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py b/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py
index ca2df95ba4..75806dbbeb 100644
--- a/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py
+++ b/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py
@@ -17,15 +17,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.kinesis.python.ops import kinesis_op_loader  # pylint: disable=unused-import
 from tensorflow.contrib.kinesis.python.ops import gen_dataset_ops
-from tensorflow.python.data.ops.dataset_ops import Dataset
+from tensorflow.contrib.kinesis.python.ops import kinesis_op_loader  # pylint: disable=unused-import
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 
 
-class KinesisDataset(Dataset):
+class KinesisDataset(dataset_ops.DatasetSource):
   """A Kinesis Dataset that consumes the message.
 
   Kinesis is a managed service provided by AWS for data streaming.
diff --git a/tensorflow/contrib/training/python/training/tensor_queue_dataset.py b/tensorflow/contrib/training/python/training/tensor_queue_dataset.py
index f46d03209c..8896a95327 100644
--- a/tensorflow/contrib/training/python/training/tensor_queue_dataset.py
+++ b/tensorflow/contrib/training/python/training/tensor_queue_dataset.py
@@ -29,7 +29,7 @@ from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.util import nest as tf_nest
 
 
-class _PrependFromQueueAndPaddedBatchDataset(dataset_ops.Dataset):
+class _PrependFromQueueAndPaddedBatchDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that prepends a queue to another `Dataset`.
 
   A vector of handles to the queue is returned as the first component of
@@ -39,7 +39,7 @@ class _PrependFromQueueAndPaddedBatchDataset(dataset_ops.Dataset):
 
   def __init__(self, input_dataset, batch_size, padded_shapes, padding_values):
     """Initialize `PrependFromQueueAndPaddedBatchDataset`."""
-    super(_PrependFromQueueAndPaddedBatchDataset, self).__init__()
+    super(_PrependFromQueueAndPaddedBatchDataset, self).__init__(input_dataset)
     if sparse.any_sparse(input_dataset.output_classes):
       raise TypeError(
           "Batching of padded sparse tensors is not currently supported")
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 28ee3ebaa6..7a6f03d4d3 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -445,3 +445,16 @@ tf_py_test(
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
+
+tf_py_test(
+    name = "inputs_test",
+    size = "small",
+    srcs = ["inputs_test.py"],
+    additional_deps = [
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
diff --git a/tensorflow/python/data/kernel_tests/inputs_test.py b/tensorflow/python/data/kernel_tests/inputs_test.py
new file mode 100644
index 0000000000..4c9279dd95
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/inputs_test.py
@@ -0,0 +1,148 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.platform import test
+
+
+class InputsTest(test.TestCase, parameterized.TestCase):
+
+  @staticmethod
+  def make_apply_fn(dataset):
+
+    def apply_fn(dataset):
+
+      def _apply_fn(dataset):
+        return dataset.cache()
+
+      return dataset.apply(_apply_fn)
+
+    return apply_fn
+
+  @staticmethod
+  def make_gen():
+
+    def gen():
+      yield 42
+
+    return gen
+
+  @staticmethod
+  def make_interleave_fn(dataset, num_parallel_calls=None):
+
+    def interleave_fn(dataset):
+      return dataset.interleave(
+          lambda x: dataset_ops.Dataset.range(0),
+          cycle_length=2,
+          num_parallel_calls=num_parallel_calls)
+
+    return interleave_fn
+
+  @parameterized.named_parameters(
+      ("FixedLengthRecord", readers.FixedLengthRecordDataset("", 42)),
+      ("FromGenerator",
+       dataset_ops.Dataset.from_generator(make_gen.__func__(), dtypes.int32),
+       1),
+      ("FromSparseTensorSlices",
+       dataset_ops.Dataset.from_sparse_tensor_slices(
+           sparse_tensor.SparseTensor(
+               indices=np.array([[0, 0], [1, 0], [2, 0]]),
+               values=np.array([0, 0, 0]),
+               dense_shape=np.array([3, 1])))),
+      ("FromTensors", dataset_ops.Dataset.from_tensors([42])),
+      ("FromTensorSlices", dataset_ops.Dataset.from_tensors([42])),
+      ("Range", dataset_ops.Dataset.range(10)),
+      ("TextLine", readers.TextLineDataset("")),
+      ("TFRecord", readers.TFRecordDataset(""), 1),
+  )
+  def testDatasetSourceInputs(self, dataset, num_inputs=0):
+    self.assertEqual(num_inputs, len(dataset._inputs()))
+
+  @parameterized.named_parameters(
+      ("Apply", make_apply_fn.__func__(dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Batch", lambda x: x.batch(10), dataset_ops.Dataset.range(0)),
+      ("Cache", lambda x: x.cache(), dataset_ops.Dataset.range(0)),
+      ("Filter", lambda x: x.filter(lambda x: True),
+       dataset_ops.Dataset.range(0)),
+      ("FlatMap", lambda x: x.flat_map(lambda x: dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Interleave", make_interleave_fn.__func__(dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Map", lambda x: x.map(lambda x: x), dataset_ops.Dataset.range(0)),
+      ("PaddedBatch", lambda x: x.padded_batch(10, []),
+       dataset_ops.Dataset.range(0)),
+      ("ParallelInterleave",
+       make_interleave_fn.__func__(dataset_ops.Dataset.range(0), 2),
+       dataset_ops.Dataset.range(0)),
+      ("ParallelMap", lambda x: x.map(lambda x: x, num_parallel_calls=2),
+       dataset_ops.Dataset.range(0)),
+      ("Repeat", lambda x: x.repeat(), dataset_ops.Dataset.range(0)),
+      ("Shuffle", lambda x: x.shuffle(10), dataset_ops.Dataset.range(0)),
+      ("Skip", lambda x: x.skip(1), dataset_ops.Dataset.range(0)),
+      ("Take", lambda x: x.take(1), dataset_ops.Dataset.range(0)),
+      ("Window", lambda x: x.window(10), dataset_ops.Dataset.range(0)),
+  )
+  def testUnaryTransformationInputs(self, dataset_fn, input_dataset):
+    self.assertEqual([input_dataset], dataset_fn(input_dataset)._inputs())
+
+  @parameterized.named_parameters(
+      ("Concatenate", lambda x, y: x.concatenate(y),
+       dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1)))
+  def testBinaryTransformationInputs(self, dataset_fn, input1, input2):
+    self.assertEqual([input1, input2], dataset_fn(input1, input2)._inputs())
+
+  @parameterized.named_parameters(
+      ("ZipOne", dataset_ops.Dataset.zip, (dataset_ops.Dataset.range(0))),
+      ("ZipNest", dataset_ops.Dataset.zip,
+       (dataset_ops.Dataset.range(0),
+        (dataset_ops.Dataset.range(1), dataset_ops.Dataset.range(2)))),
+      ("ZipTuple", dataset_ops.Dataset.zip,
+       (dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1))))
+  def testVariadicTransformationInputs(self, dataset_fn, input_datasets):
+    self.assertEqual(
+        nest.flatten(input_datasets),
+        dataset_fn(input_datasets)._inputs())
+
+  def testCollectInputs(self):
+    ds1 = dataset_ops.Dataset.range(0)
+    ds2 = ds1.concatenate(ds1)
+    ds3 = dataset_ops.Dataset.zip((ds2, ds1, ds2))
+
+    inputs = []
+    queue = [ds3]
+    while queue:
+      ds = queue[0]
+      queue = queue[1:]
+      queue.extend(ds._inputs())
+      inputs.append(ds)
+
+    self.assertEqual(5, inputs.count(ds1))
+    self.assertEqual(2, inputs.count(ds2))
+    self.assertEqual(1, inputs.count(ds3))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 7c20c049f5..ac87a451b1 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -80,6 +80,12 @@ class Dataset(object):
     """
     raise NotImplementedError("Dataset._as_variant_tensor")
 
+  @abc.abstractmethod
+  def _inputs(self):
+    """Returns a list of the input datasets of the dataset."""
+
+    raise NotImplementedError("Dataset._inputs")
+
   def make_initializable_iterator(self, shared_name=None):
     """Creates an `Iterator` for enumerating the elements of this dataset.
 
@@ -1007,8 +1013,8 @@ class Dataset(object):
       return ParallelMapDataset(self, map_func, num_parallel_calls)
 
   def flat_map(self, map_func):
-    """Maps `map_func` across this dataset and flattens the result. 
-    
+    """Maps `map_func` across this dataset and flattens the result.
+
     Use `flat_map` if you want to make sure that the order of your dataset
     stays the same. For example, to flatten a dataset of batches into a
     dataset of their elements:
@@ -1017,15 +1023,15 @@ class Dataset(object):
     # NOTE: The following examples use `{ ... }` to represent the
     # contents of a dataset. '[...]' represents a tensor.
     a = {[1,2,3,4,5], [6,7,8,9], [10]}
-    
-    a.flat_map(lambda x: Dataset.from_tensor_slices(x)) == 
+
+    a.flat_map(lambda x: Dataset.from_tensor_slices(x)) ==
       {[1,2,3,4,5,6,7,8,9,10]}
     ```
-    
-    `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since 
-    `flat_map` produces the same output as 
+
+    `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since
+    `flat_map` produces the same output as
     `tf.data.Dataset.interleave(cycle_length=1)`
-    
+
     Args:
       map_func: A function mapping a nested structure of tensors (having shapes
         and types defined by `self.output_shapes` and `self.output_types`) to a
@@ -1157,6 +1163,7 @@ class Dataset(object):
     dataset = transformation_func(self)
     if not isinstance(dataset, Dataset):
       raise TypeError("`transformation_func` must return a Dataset.")
+    dataset._input_datasets = [self]  # pylint: disable=protected-access
     return dataset
 
   def window(self, size, shift=None, stride=1, drop_remainder=False):
@@ -1199,7 +1206,25 @@ class Dataset(object):
     return WindowDataset(self, size, shift, stride, drop_remainder)
 
 
-class TensorDataset(Dataset):
+class DatasetSource(Dataset):
+  """Abstract class representing a dataset with no inputs."""
+
+  def _inputs(self):
+    return []
+
+
+class UnaryDataset(Dataset):
+  """Abstract class representing a dataset with one input."""
+
+  def __init__(self, input_dataset):
+    super(UnaryDataset, self).__init__()
+    self._input_dataset = input_dataset
+
+  def _inputs(self):
+    return [self._input_dataset]
+
+
+class TensorDataset(DatasetSource):
   """A `Dataset` with a single element, viz. a nested structure of tensors."""
 
   def __init__(self, tensors):
@@ -1239,7 +1264,7 @@ class TensorDataset(Dataset):
     return self._output_types
 
 
-class TensorSliceDataset(Dataset):
+class TensorSliceDataset(DatasetSource):
   """A `Dataset` of slices from a nested structure of tensors."""
 
   def __init__(self, tensors):
@@ -1283,7 +1308,7 @@ class TensorSliceDataset(Dataset):
     return self._output_types
 
 
-class SparseTensorSliceDataset(Dataset):
+class SparseTensorSliceDataset(DatasetSource):
   """A `Dataset` that splits a rank-N `tf.SparseTensor` into its rows."""
 
   def __init__(self, sparse_tensor):
@@ -1384,6 +1409,9 @@ class _VariantDataset(Dataset):
   def _as_variant_tensor(self):
     return self._dataset_variant
 
+  def _inputs(self):
+    return []
+
   @property
   def output_classes(self):
     return self._structure.output_classes
@@ -1624,7 +1652,7 @@ def flat_structure(dataset):
   }
 
 
-class _GeneratorDataset(Dataset):
+class _GeneratorDataset(DatasetSource):
   """A `Dataset` that generates elements by invoking a function."""
 
   def __init__(self, init_args, init_func, next_func, finalize_func):
@@ -1725,6 +1753,9 @@ class ZipDataset(Dataset):
         **flat_structure(self))
     # pylint: enable=protected-access
 
+  def _inputs(self):
+    return nest.flatten(self._datasets)
+
   @property
   def output_classes(self):
     return nest.pack_sequence_as(
@@ -1760,6 +1791,7 @@ class ConcatenateDataset(Dataset):
       raise TypeError(
           "Two datasets to concatenate have different classes %s and %s" %
           (input_dataset.output_classes, dataset_to_concatenate.output_classes))
+    self._input_datasets = [input_dataset, dataset_to_concatenate]
 
   def _as_variant_tensor(self):
     # pylint: disable=protected-access
@@ -1769,6 +1801,9 @@ class ConcatenateDataset(Dataset):
         **flat_structure(self))
     # pylint: enable=protected-access
 
+  def _inputs(self):
+    return [self._input_dataset, self._dataset_to_concatenate]
+
   @property
   def output_classes(self):
     return self._input_dataset.output_classes
@@ -1787,12 +1822,12 @@ class ConcatenateDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class RepeatDataset(Dataset):
+class RepeatDataset(UnaryDataset):
   """A `Dataset` that repeats its input several times."""
 
   def __init__(self, input_dataset, count):
     """See `Dataset.repeat()` for details."""
-    super(RepeatDataset, self).__init__()
+    super(RepeatDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if count is None:
       self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
@@ -1819,7 +1854,7 @@ class RepeatDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class RangeDataset(Dataset):
+class RangeDataset(DatasetSource):
   """A `Dataset` of a step separated range of values."""
 
   def __init__(self, *args):
@@ -1867,12 +1902,12 @@ class RangeDataset(Dataset):
     return dtypes.int64
 
 
-class CacheDataset(Dataset):
+class CacheDataset(UnaryDataset):
   """A `Dataset` that caches elements of its input."""
 
   def __init__(self, input_dataset, filename):
     """See `Dataset.cache()` for details."""
-    super(CacheDataset, self).__init__()
+    super(CacheDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._filename = ops.convert_to_tensor(
         filename, dtype=dtypes.string, name="filename")
@@ -1896,7 +1931,7 @@ class CacheDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class ShuffleDataset(Dataset):
+class ShuffleDataset(UnaryDataset):
   """A `Dataset` that randomly shuffles the elements of its input."""
 
   def __init__(self,
@@ -1924,7 +1959,7 @@ class ShuffleDataset(Dataset):
     Raises:
       ValueError: if invalid arguments are provided.
     """
-    super(ShuffleDataset, self).__init__()
+    super(ShuffleDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
@@ -1956,12 +1991,12 @@ class ShuffleDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class TakeDataset(Dataset):
+class TakeDataset(UnaryDataset):
   """A `Dataset` containing the first `count` elements from its input."""
 
   def __init__(self, input_dataset, count):
     """See `Dataset.take()` for details."""
-    super(TakeDataset, self).__init__()
+    super(TakeDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._count = ops.convert_to_tensor(count, dtype=dtypes.int64, name="count")
 
@@ -1984,12 +2019,12 @@ class TakeDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class SkipDataset(Dataset):
+class SkipDataset(UnaryDataset):
   """A `Dataset` skipping the first `count` elements from its input."""
 
   def __init__(self, input_dataset, count):
     """See `Dataset.skip()` for details."""
-    super(SkipDataset, self).__init__()
+    super(SkipDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._count = ops.convert_to_tensor(count, dtype=dtypes.int64, name="count")
 
@@ -2012,12 +2047,12 @@ class SkipDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class BatchDataset(Dataset):
+class BatchDataset(UnaryDataset):
   """A `Dataset` that batches contiguous elements from its input."""
 
   def __init__(self, input_dataset, batch_size, drop_remainder):
     """See `Dataset.batch()` for details."""
-    super(BatchDataset, self).__init__()
+    super(BatchDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._batch_size = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
@@ -2166,13 +2201,13 @@ def _default_padding(input_dataset):
   return nest.map_structure(make_zero, input_dataset.output_types)
 
 
-class PaddedBatchDataset(Dataset):
+class PaddedBatchDataset(UnaryDataset):
   """A `Dataset` that batches and pads contiguous elements from its input."""
 
   def __init__(self, input_dataset, batch_size, padded_shapes, padding_values,
                drop_remainder):
     """See `Dataset.batch()` for details."""
-    super(PaddedBatchDataset, self).__init__()
+    super(PaddedBatchDataset, self).__init__(input_dataset)
     if sparse.any_sparse(input_dataset.output_classes):
       # TODO(b/63669786): support batching of sparse tensors
       raise TypeError(
@@ -2272,12 +2307,12 @@ def _warn_if_collections(transformation_name):
                   % transformation_name)
 
 
-class MapDataset(Dataset):
+class MapDataset(UnaryDataset):
   """A `Dataset` that maps a function over elements in its input."""
 
   def __init__(self, input_dataset, map_func, use_inter_op_parallelism=True):
     """See `Dataset.map()` for details."""
-    super(MapDataset, self).__init__()
+    super(MapDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._use_inter_op_parallelism = use_inter_op_parallelism
 
@@ -2338,12 +2373,12 @@ class ParallelMapDataset(MapDataset):
     # pylint: enable=protected-access
 
 
-class FlatMapDataset(Dataset):
+class FlatMapDataset(UnaryDataset):
   """A `Dataset` that maps a function over its input and flattens the result."""
 
   def __init__(self, input_dataset, map_func):
     """See `Dataset.flat_map()` for details."""
-    super(FlatMapDataset, self).__init__()
+    super(FlatMapDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
 
     wrapped_func = StructuredFunctionWrapper(
@@ -2434,12 +2469,12 @@ class ParallelInterleaveDataset(FlatMapDataset):
     return "Dataset.interleave()"
 
 
-class FilterDataset(Dataset):
+class FilterDataset(UnaryDataset):
   """A `Dataset` that filters its input according to a predicate function."""
 
   def __init__(self, input_dataset, predicate):
     """See `Dataset.filter()` for details."""
-    super(FilterDataset, self).__init__()
+    super(FilterDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     wrapped_func = StructuredFunctionWrapper(
         predicate, "Dataset.filter()", input_dataset)
@@ -2469,12 +2504,12 @@ class FilterDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class PrefetchDataset(Dataset):
+class PrefetchDataset(UnaryDataset):
   """A `Dataset` that asynchronously prefetches its input."""
 
   def __init__(self, input_dataset, buffer_size):
     """See `Dataset.prefetch()` for details."""
-    super(PrefetchDataset, self).__init__()
+    super(PrefetchDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if buffer_size is None:
       buffer_size = -1  # This is the sentinel for auto-tuning.
@@ -2500,12 +2535,12 @@ class PrefetchDataset(Dataset):
     return self._input_dataset.output_types
 
 
-class WindowDataset(Dataset):
+class WindowDataset(UnaryDataset):
   """A dataset that creates window datasets from the input elements."""
 
   def __init__(self, input_dataset, size, shift, stride, drop_remainder):
     """See `window_dataset()` for more details."""
-    super(WindowDataset, self).__init__()
+    super(WindowDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._size = ops.convert_to_tensor(size, dtype=dtypes.int64, name="size")
     self._shift = ops.convert_to_tensor(shift, dtype=dtypes.int64, name="shift")
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index c914a43956..b7d3aac206 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -116,6 +116,10 @@ class _PerDeviceGenerator(dataset_ops.Dataset):
           output_types=self._flat_output_types,
           output_shapes=self._flat_output_shapes)
 
+  def _inputs(self):
+    # TODO(b/116506223): Determine which datasets should be used as inputs here.
+    return []
+
   @property
   def output_types(self):
     return self._output_types
diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py
index 066e09969c..b0f26631f9 100644
--- a/tensorflow/python/data/ops/readers.py
+++ b/tensorflow/python/data/ops/readers.py
@@ -61,6 +61,9 @@ class TextLineDataset(dataset_ops.Dataset):
     return gen_dataset_ops.text_line_dataset(
         self._filenames, self._compression_type, self._buffer_size)
 
+  def _inputs(self):
+    return []
+
   @property
   def output_classes(self):
     return ops.Tensor
@@ -105,6 +108,9 @@ class _TFRecordDataset(dataset_ops.Dataset):
     return gen_dataset_ops.tf_record_dataset(
         self._filenames, self._compression_type, self._buffer_size)
 
+  def _inputs(self):
+    return []
+
   @property
   def output_classes(self):
     return ops.Tensor
@@ -224,6 +230,9 @@ class TFRecordDataset(dataset_ops.Dataset):
   def _as_variant_tensor(self):
     return self._impl._as_variant_tensor()  # pylint: disable=protected-access
 
+  def _inputs(self):
+    return self._impl._inputs()  # pylint: disable=protected-access
+
   @property
   def output_classes(self):
     return self._impl.output_classes
@@ -278,6 +287,9 @@ class FixedLengthRecordDataset(dataset_ops.Dataset):
         self._filenames, self._header_bytes, self._record_bytes,
         self._footer_bytes, self._buffer_size)
 
+  def _inputs(self):
+    return []
+
   @property
   def output_classes(self):
     return ops.Tensor
-- 
GitLab


From 937ad7c27f0d289067c935543d282e5ac5a310b1 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Tue, 25 Sep 2018 14:00:41 -0700
Subject: [PATCH 0690/1357] Adding check around AddFunctionLibrary

---
 tensorflow/core/kernels/partitioned_function_ops.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index 7a5a2ff8fa..fdb4c84c46 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -102,7 +102,8 @@ class PartitionedCallOp : public AsyncOpKernel {
         // by name.
         auto graph = tensorflow::MakeUnique<Graph>(fbody->graph->flib_def());
         FunctionLibraryDefinition global_flib(OpRegistry::Global(), {});
-        graph.get()->AddFunctionLibrary(global_flib.ToProto());
+        TF_CHECK_OK(
+                    graph.get()->AddFunctionLibrary(global_flib.ToProto()));
         CopyGraph(*fbody->graph, graph.get());
         OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done);
 
@@ -258,7 +259,8 @@ class PartitionedCallOp : public AsyncOpKernel {
     for (const auto& partition : partitions) {
       std::unique_ptr<Graph> subgraph(new Graph(graph->flib_def()));
       FunctionLibraryDefinition global_flib(OpRegistry::Global(), {});
-      subgraph.get()->AddFunctionLibrary(global_flib.ToProto());
+      TF_CHECK_OK(
+                subgraph.get()->AddFunctionLibrary(global_flib.ToProto()));
       GraphConstructorOptions opts;
       opts.allow_internal_ops = true;
       opts.expect_device_spec = true;
-- 
GitLab


From b8c6aa794d8c5b6ecda487f81ea28699b0793a81 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 14:00:19 -0700
Subject: [PATCH 0691/1357] Fix documentation

PiperOrigin-RevId: 214499034
---
 tensorflow/contrib/tpu/python/tpu/tpu_context.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index a5f86ecd5f..7cfb6c38fa 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -703,7 +703,7 @@ def _get_tpu_context(config, train_batch_size, eval_batch_size,
       config.tpu_config.num_cores_per_replica is None):
     logging.warning(
         'Setting TPUConfig.num_shards==1 is an unsupported behavior. '
-        'Please fix as soon as possible (leaving num_shards as None.')
+        'Please fix as soon as possible (leaving num_shards as None.)')
     return _OneCoreTPUContext(config, train_batch_size, eval_batch_size,
                               predict_batch_size, use_tpu)
 
-- 
GitLab


From 153578f3c90ca423501151adcbaf6b81e05e2440 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 14:01:59 -0700
Subject: [PATCH 0692/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214499338

---
 tensorflow/go/op/wrappers.go | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 8b60e6fd25..9dd487e73b 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -12211,6 +12211,17 @@ func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...Fix
 	return op.Output(0)
 }
 
+// StringLengthAttr is an optional argument to StringLength.
+type StringLengthAttr func(optionalAttr)
+
+// StringLengthUnit sets the optional unit attribute to value.
+// If not specified, defaults to "BYTE"
+func StringLengthUnit(value string) StringLengthAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
 // String lengths of `input`.
 //
 // Computes the length of each string given in the input tensor.
@@ -12220,15 +12231,20 @@ func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...Fix
 //
 // Returns Integer tensor that has the same shape as `input`. The output contains the
 // element-wise string lengths of `input`.
-func StringLength(scope *Scope, input tf.Output) (output tf.Output) {
+func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
 		Type: "StringLength",
 		Input: []tf.Input{
 			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
-- 
GitLab


From 038d15d8e2037d4a45e60e076429d67ec7d5ace1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 14:35:40 -0700
Subject: [PATCH 0693/1357] Bug fix for OpOutputPortIdToArgId, include
 type_list_attr.

PiperOrigin-RevId: 214505566
---
 tensorflow/core/grappler/graph_view.cc      | 27 ++++++++++---------
 tensorflow/core/grappler/graph_view_test.cc | 29 +++++++++++++++++++++
 2 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/grappler/graph_view.cc b/tensorflow/core/grappler/graph_view.cc
index b8d8243174..2619a9a8f3 100644
--- a/tensorflow/core/grappler/graph_view.cc
+++ b/tensorflow/core/grappler/graph_view.cc
@@ -29,21 +29,24 @@ int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
       return output_arg_id;
     }
 
+    // Default is 1 port per output arg.
+    int n = 1;
+
     const auto& output_arg = op.output_arg(output_arg_id);
     if (!output_arg.number_attr().empty()) {
-      const int n = node.attr().at(output_arg.number_attr()).i();
-      if (n < 0) {
-        // This should never happen.
-        DCHECK_GE(n, 0);
-        return -1;
-      }
-      if (port_id < n) {
-        return output_arg_id;
-      }
-      port_id -= n;
-    } else {
-      --port_id;
+      n = node.attr().at(output_arg.number_attr()).i();
+    } else if (!output_arg.type_list_attr().empty()) {
+      n = node.attr().at(output_arg.type_list_attr()).list().type_size();
+    }
+
+    if (n < 0) {
+      // This should never happen.
+      DCHECK_GE(n, 0);
+      return -1;
+    } else if (port_id < n) {
+      return output_arg_id;
     }
+    port_id -= n;
   }
 
   return -1;
diff --git a/tensorflow/core/grappler/graph_view_test.cc b/tensorflow/core/grappler/graph_view_test.cc
index 30512d9d47..3d7d2faf7c 100644
--- a/tensorflow/core/grappler/graph_view_test.cc
+++ b/tensorflow/core/grappler/graph_view_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/graph_view.h"
+#include "tensorflow/cc/ops/parsing_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
@@ -79,6 +80,34 @@ TEST_F(GraphViewTest, OpOutputPortIdToArgIdSparseSplit) {
   }
 }
 
+TEST_F(GraphViewTest, ParseSingleExample) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const<string>(s.WithOpName("a"), "", {});
+  Output b = ops::Const<int64>(s.WithOpName("b"), 1, {1, 1});
+  ops::ParseSingleExample c(s.WithOpName("c"), a, {b, b}, 2, {"w", "x"},
+                            {"y", "z"}, {DT_INT64, DT_INT64}, {{1}, {1}});
+
+  GraphDef graph_def;
+  TF_CHECK_OK(s.ToGraphDef(&graph_def));
+  GraphView graph_view(&graph_def);
+
+  const NodeDef& c_node_def = *graph_view.GetNode("c");
+
+  const OpDef* c_op_def = nullptr;
+  EXPECT_TRUE(
+      OpRegistry::Global()->LookUpOpDef(c_node_def.op(), &c_op_def).ok());
+
+  EXPECT_EQ(0, OpOutputPortIdToArgId(c_node_def, *c_op_def, 0));
+  EXPECT_EQ(0, OpOutputPortIdToArgId(c_node_def, *c_op_def, 1));
+  EXPECT_EQ(1, OpOutputPortIdToArgId(c_node_def, *c_op_def, 2));
+  EXPECT_EQ(1, OpOutputPortIdToArgId(c_node_def, *c_op_def, 3));
+  EXPECT_EQ(2, OpOutputPortIdToArgId(c_node_def, *c_op_def, 4));
+  EXPECT_EQ(2, OpOutputPortIdToArgId(c_node_def, *c_op_def, 5));
+  EXPECT_EQ(3, OpOutputPortIdToArgId(c_node_def, *c_op_def, 6));
+  EXPECT_EQ(3, OpOutputPortIdToArgId(c_node_def, *c_op_def, 7));
+  EXPECT_EQ(-1, OpOutputPortIdToArgId(c_node_def, *c_op_def, 8));
+}
+
 TEST_F(GraphViewTest, BasicGraph) {
   TrivialTestGraphInputYielder fake_input(4, 2, 2, false, {"/CPU:0", "/GPU:0"});
   GrapplerItem item;
-- 
GitLab


From 20c71535c5f1ed1d918d6cc6e327ffbba49ecbd6 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Tue, 25 Sep 2018 14:46:09 -0700
Subject: [PATCH 0694/1357] Internal change.

PiperOrigin-RevId: 214507546
---
 tensorflow/contrib/lite/build_def.bzl         |  38 +++
 tensorflow/contrib/lite/python/BUILD          |   2 +-
 .../lite/python/convert_saved_model.py        |  12 +-
 tensorflow/contrib/lite/testing/BUILD         |  27 ++
 .../model_coverage/model_coverage_lib.py      | 241 ++++++++++++++++++
 .../model_coverage/model_coverage_lib_test.py | 130 ++++++++++
 6 files changed, 443 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
 create mode 100644 tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index fc4d9b4f17..7f5c6bdc2f 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -391,3 +391,41 @@ def gen_selected_ops(name, model):
               (tool, model, out, tflite_path[2:]),
         tools = [tool],
     )
+
+def gen_full_model_test(conversion_modes, models, data, test_suite_tag):
+    """Generates Python test targets for testing TFLite models.
+
+    Args:
+      conversion_modes: List of conversion modes to test the models on.
+      models: List of models to test.
+      data: List of BUILD targets linking the data.
+      test_suite_tag: Tag identifying the model test suite.
+    """
+    options = [
+        (conversion_mode, model)
+        for model in models
+        for conversion_mode in conversion_modes
+    ]
+
+    for conversion_mode, model_name in options:
+        native.py_test(
+            name = "model_coverage_test_%s_%s" % (model_name, conversion_mode.lower()),
+            srcs = ["model_coverage_test.py"],
+            main = "model_coverage_test.py",
+            args = [
+                "--model_name=%s" % model_name,
+                "--converter_mode=%s" % conversion_mode,
+            ],
+            data = data,
+            srcs_version = "PY2AND3",
+            tags = [
+                "no_oss",
+                "no_windows",
+                "notap",
+            ] + [test_suite_tag],
+            deps = [
+                "//tensorflow/contrib/lite/testing:model_coverage_lib",
+                "//tensorflow/contrib/lite/python:lite",
+                "//tensorflow/python:client_testlib",
+            ],
+        )
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 57e1290e07..916788f215 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -144,7 +144,7 @@ py_library(
     name = "convert_saved_model",
     srcs = ["convert_saved_model.py"],
     srcs_version = "PY2AND3",
-    visibility = ["//visibility:public"],
+    visibility = ["//tensorflow/contrib/lite:__subpackages__"],
     deps = [
         ":convert",
         "//tensorflow/contrib/saved_model:saved_model_py",
diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py
index 1553464b9f..d18b60d0ea 100644
--- a/tensorflow/contrib/lite/python/convert_saved_model.py
+++ b/tensorflow/contrib/lite/python/convert_saved_model.py
@@ -44,7 +44,7 @@ def _log_tensor_details(tensor_info):
                  dtype)
 
 
-def _get_meta_graph_def(saved_model_dir, tag_set):
+def get_meta_graph_def(saved_model_dir, tag_set):
   """Validate saved_model and extract MetaGraphDef.
 
   Args:
@@ -61,7 +61,7 @@ def _get_meta_graph_def(saved_model_dir, tag_set):
     return loader.load(sess, tag_set, saved_model_dir)
 
 
-def _get_signature_def(meta_graph, signature_key):
+def get_signature_def(meta_graph, signature_key):
   """Get the signature def from meta_graph with given signature_key.
 
   Args:
@@ -86,7 +86,7 @@ def _get_signature_def(meta_graph, signature_key):
   return signature_def_map[signature_key]
 
 
-def _get_inputs_outputs(signature_def):
+def get_inputs_outputs(signature_def):
   """Get inputs and outputs from SignatureDef.
 
   Args:
@@ -236,9 +236,9 @@ def freeze_saved_model(saved_model_dir, input_arrays, input_shapes,
       input_arrays or output_arrays are not valid.
   """
   # Read SignatureDef.
-  meta_graph = _get_meta_graph_def(saved_model_dir, tag_set)
-  signature_def = _get_signature_def(meta_graph, signature_key)
-  inputs, outputs = _get_inputs_outputs(signature_def)
+  meta_graph = get_meta_graph_def(saved_model_dir, tag_set)
+  signature_def = get_signature_def(meta_graph, signature_key)
+  inputs, outputs = get_inputs_outputs(signature_def)
 
   # Check SavedModel for assets directory.
   collection_def = meta_graph.collection_def
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index a4736bfee9..c4a2b03444 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -13,6 +13,7 @@ load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite"
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "py_test",
 )
 
 [gen_zip_test(
@@ -362,4 +363,30 @@ cc_binary(
     ],
 )
 
+py_binary(
+    name = "model_coverage_lib",
+    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow/contrib/lite:__subpackages__"],
+    deps = [
+        "//tensorflow/contrib/lite/python:lite",
+        "//tensorflow/python:platform",
+    ],
+)
+
+py_test(
+    name = "model_coverage_lib_test",
+    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_windows",
+        "notap",
+    ],
+    deps = [
+        ":model_coverage_lib",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 tflite_portable_test_suite()
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
new file mode 100644
index 0000000000..f8ab394c60
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
@@ -0,0 +1,241 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to test TFLite models."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.lite.python import convert_saved_model as _convert_saved_model
+from tensorflow.contrib.lite.python import lite as _lite
+from tensorflow.core.framework import graph_pb2 as _graph_pb2
+from tensorflow.python import keras as _keras
+from tensorflow.python.client import session as _session
+from tensorflow.python.framework.importer import import_graph_def as _import_graph_def
+from tensorflow.python.lib.io import file_io as _file_io
+from tensorflow.python.saved_model import signature_constants as _signature_constants
+from tensorflow.python.saved_model import tag_constants as _tag_constants
+
+
+def _convert(converter, **kwargs):
+  """Converts the model.
+
+  Args:
+    converter: TocoConverter object.
+    **kwargs: Additional arguments to be passed into the converter. Supported
+      flags are {"converter_mode", "post_training_quant"}.
+
+  Returns:
+    The converted TFLite model in serialized format.
+  """
+  if "converter_mode" in kwargs:
+    converter.converter_mode = kwargs["converter_mode"]
+  if "post_training_quantize" in kwargs:
+    converter.post_training_quantize = kwargs["post_training_quantize"]
+  return converter.convert()
+
+
+def _generate_random_input_data(tflite_model, seed=None):
+  """Generates input data based on the input tensors in the TFLite model.
+
+  Args:
+    tflite_model: Serialized TensorFlow Lite model.
+    seed: Integer seed for the random generator. (default None)
+
+  Returns:
+    List of np.ndarray.
+  """
+  interpreter = _lite.Interpreter(model_content=tflite_model)
+  interpreter.allocate_tensors()
+  input_details = interpreter.get_input_details()
+
+  if seed:
+    np.random.seed(seed=seed)
+  return [
+      np.array(
+          np.random.random_sample(input_tensor["shape"]),
+          dtype=input_tensor["dtype"]) for input_tensor in input_details
+  ]
+
+
+def _evaluate_tflite_model(tflite_model, input_data):
+  """Returns evaluation of input data on TFLite model.
+
+  Args:
+    tflite_model: Serialized TensorFlow Lite model.
+    input_data: List of np.ndarray.
+
+  Returns:
+    List of np.ndarray.
+  """
+  interpreter = _lite.Interpreter(model_content=tflite_model)
+  interpreter.allocate_tensors()
+
+  input_details = interpreter.get_input_details()
+  output_details = interpreter.get_output_details()
+
+  for input_tensor, tensor_data in zip(input_details, input_data):
+    interpreter.set_tensor(input_tensor["index"], tensor_data)
+
+  interpreter.invoke()
+  output_data = [
+      interpreter.get_tensor(output_tensor["index"])
+      for output_tensor in output_details
+  ]
+  return output_data
+
+
+def evaluate_frozen_graph(filename, input_arrays, output_arrays):
+  """Returns a function that evaluates the frozen graph on input data.
+
+  Args:
+    filename: Full filepath of file containing frozen GraphDef.
+    input_arrays: List of input tensors to freeze graph with.
+    output_arrays: List of output tensors to freeze graph with.
+
+  Returns:
+    Lambda function ([np.ndarray data] : [np.ndarray result]).
+  """
+  with _session.Session().as_default() as sess:
+    with _file_io.FileIO(filename, "rb") as f:
+      file_content = f.read()
+
+    graph_def = _graph_pb2.GraphDef()
+    graph_def.ParseFromString(file_content)
+    _import_graph_def(graph_def, name="")
+
+    inputs = _convert_saved_model.get_tensors_from_tensor_names(
+        sess.graph, input_arrays)
+    outputs = _convert_saved_model.get_tensors_from_tensor_names(
+        sess.graph, output_arrays)
+
+    return lambda input_data: sess.run(outputs, dict(zip(inputs, input_data)))
+
+
+def evaluate_saved_model(directory, tag_set, signature_key):
+  """Returns a function that evaluates the SavedModel on input data.
+
+  Args:
+    directory: SavedModel directory to convert.
+    tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
+      analyze. All tags in the tag set must be present.
+    signature_key: Key identifying SignatureDef containing inputs and outputs.
+
+  Returns:
+    Lambda function ([np.ndarray data] : [np.ndarray result]).
+  """
+  with _session.Session().as_default() as sess:
+    if tag_set is None:
+      tag_set = set([_tag_constants.SERVING])
+    if signature_key is None:
+      signature_key = _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+    meta_graph = _convert_saved_model.get_meta_graph_def(directory, tag_set)
+    signature_def = _convert_saved_model.get_signature_def(
+        meta_graph, signature_key)
+    inputs, outputs = _convert_saved_model.get_inputs_outputs(signature_def)
+
+    return lambda input_data: sess.run(outputs, dict(zip(inputs, input_data)))
+
+
+def evaluate_keras_model(filename):
+  """Returns a function that evaluates the tf.keras model on input data.
+
+  Args:
+    filename: Full filepath of HDF5 file containing the tf.keras model.
+
+  Returns:
+    Lambda function ([np.ndarray data] : [np.ndarray result]).
+  """
+  keras_model = _keras.models.load_model(filename)
+  return lambda input_data: [keras_model.predict(input_data)]
+
+
+# TODO(nupurgarg): Make this function a parameter to test_frozen_graph (and
+# related functions) in order to make it easy to use different data generators.
+def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
+  """Compares TensorFlow and TFLite models with random data.
+
+  Args:
+    tflite_model: Serialized TensorFlow Lite model.
+    tf_eval_func: Lambda function that takes in input data and outputs the
+      results of the TensorFlow model ([np.ndarray data] : [np.ndarray result]).
+    tolerance: Decimal place to check accuracy to.
+  """
+  input_data = _generate_random_input_data(tflite_model)
+  tf_results = tf_eval_func(input_data)
+  tflite_results = _evaluate_tflite_model(tflite_model, input_data)
+  for tf_result, tflite_result in zip(tf_results, tflite_results):
+    np.testing.assert_almost_equal(tf_result, tflite_result, tolerance)
+
+
+def test_frozen_graph(filename, input_arrays, output_arrays, **kwargs):
+  """Validates the TensorFlow frozen graph converts to a TFLite model.
+
+  Converts the TensorFlow frozen graph to TFLite and checks the accuracy of the
+  model on random data.
+
+  Args:
+    filename: Full filepath of file containing frozen GraphDef.
+    input_arrays: List of input tensors to freeze graph with.
+    output_arrays: List of output tensors to freeze graph with.
+    **kwargs: Additional arguments to be passed into the converter.
+  """
+  converter = _lite.TocoConverter.from_frozen_graph(filename, input_arrays,
+                                                    output_arrays)
+  tflite_model = _convert(converter, **kwargs)
+
+  tf_eval_func = evaluate_frozen_graph(filename, input_arrays, output_arrays)
+  compare_models_random_data(tflite_model, tf_eval_func)
+
+
+def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs):
+  """Validates the TensorFlow SavedModel converts to a TFLite model.
+
+  Converts the TensorFlow SavedModel to TFLite and checks the accuracy of the
+  model on random data.
+
+  Args:
+    directory: SavedModel directory to convert.
+    tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
+      analyze. All tags in the tag set must be present.
+    signature_key: Key identifying SignatureDef containing inputs and outputs.
+    **kwargs: Additional arguments to be passed into the converter.
+  """
+  converter = _lite.TocoConverter.from_saved_model(directory, tag_set,
+                                                   signature_key)
+  tflite_model = _convert(converter, **kwargs)
+
+  tf_eval_func = evaluate_saved_model(directory, tag_set, signature_key)
+  compare_models_random_data(tflite_model, tf_eval_func)
+
+
+def test_keras_model(filename, **kwargs):
+  """Validates the tf.keras model converts to a TFLite model.
+
+  Converts the tf.keras model to TFLite and checks the accuracy of the model on
+  random data.
+
+  Args:
+    filename: Full filepath of HDF5 file containing the tf.keras model.
+    **kwargs: Additional arguments to be passed into the converter.
+  """
+  converter = _lite.TocoConverter.from_keras_model_file(filename)
+  tflite_model = _convert(converter, **kwargs)
+
+  tf_eval_func = evaluate_keras_model(filename)
+  compare_models_random_data(tflite_model, tf_eval_func)
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
new file mode 100644
index 0000000000..5f3355e734
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -0,0 +1,130 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for model_coverage_lib.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+
+from tensorflow.contrib.lite.python import lite
+from tensorflow.contrib.lite.testing.model_coverage import model_coverage_lib as model_coverage
+from tensorflow.python import keras
+from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import saved_model
+from tensorflow.python.training.training_util import write_graph
+
+
+class EvaluateFrozenGraph(test.TestCase):
+
+  def _saveFrozenGraph(self, sess):
+    graph_def_file = os.path.join(self.get_temp_dir(), 'model.pb')
+    write_graph(sess.graph_def, '', graph_def_file, False)
+    return graph_def_file
+
+  def testFloat(self):
+    with session.Session().as_default() as sess:
+      in_tensor = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32)
+      _ = in_tensor + in_tensor
+    filename = self._saveFrozenGraph(sess)
+
+    model_coverage.test_frozen_graph(filename, ['Placeholder'], ['add'])
+
+  def testMultipleOutputs(self):
+    with session.Session().as_default() as sess:
+      in_tensor_1 = array_ops.placeholder(
+          shape=[1, 16], dtype=dtypes.float32, name='inputA')
+      in_tensor_2 = array_ops.placeholder(
+          shape=[1, 16], dtype=dtypes.float32, name='inputB')
+
+      weight = constant_op.constant(-1.0, shape=[16, 16])
+      bias = constant_op.constant(-1.0, shape=[16])
+      layer = math_ops.matmul(in_tensor_1, weight) + bias
+      _ = math_ops.reduce_mean(math_ops.square(layer - in_tensor_2))
+    filename = self._saveFrozenGraph(sess)
+
+    model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'],
+                                     ['add', 'Mean'])
+
+
+class EvaluateSavedModel(test.TestCase):
+
+  def testFloat(self):
+    saved_model_dir = os.path.join(self.get_temp_dir(), 'simple_savedmodel')
+    with session.Session().as_default() as sess:
+      in_tensor_1 = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB')
+      in_tensor_2 = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA')
+      out_tensor = in_tensor_1 + in_tensor_2
+
+      inputs = {'x': in_tensor_1, 'y': in_tensor_2}
+      outputs = {'z': out_tensor}
+      saved_model.simple_save(sess, saved_model_dir, inputs, outputs)
+    model_coverage.test_saved_model(saved_model_dir)
+
+
+class EvaluateKerasModel(test.TestCase):
+
+  def _getSingleInputKerasModel(self):
+    """Returns single input Sequential tf.keras model."""
+    keras.backend.clear_session()
+
+    xs = [-1, 0, 1, 2, 3, 4]
+    ys = [-3, -1, 1, 3, 5, 7]
+
+    model = keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])])
+    model.compile(optimizer='sgd', loss='mean_squared_error')
+    model.train_on_batch(xs, ys)
+    return model
+
+  def _saveKerasModel(self, model):
+    try:
+      fd, keras_file = tempfile.mkstemp('.h5')
+      keras.models.save_model(model, keras_file)
+    finally:
+      os.close(fd)
+    return keras_file
+
+  def testFloat(self):
+    model = self._getSingleInputKerasModel()
+    keras_file = self._saveKerasModel(model)
+
+    model_coverage.test_keras_model(keras_file)
+
+  def testPostTrainingQuantize(self):
+    model = self._getSingleInputKerasModel()
+    keras_file = self._saveKerasModel(model)
+
+    model_coverage.test_keras_model(keras_file, post_training_quantize=True)
+
+  def testConverterMode(self):
+    model = self._getSingleInputKerasModel()
+    keras_file = self._saveKerasModel(model)
+
+    model_coverage.test_keras_model(
+        keras_file, converter_mode=lite.ConverterMode.TOCO_EXTENDED)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From ad27440a79c30a53f9fd2a3171a2c2da6ff37820 Mon Sep 17 00:00:00 2001
From: Alexey Radul <axch@google.com>
Date: Tue, 25 Sep 2018 15:04:25 -0700
Subject: [PATCH 0695/1357] Move the correlation matrix volumes computation for
 testing the LKJ distribution from tf/contrib to tfp.

Relevant to tensorflow/tensorflow#21909

PiperOrigin-RevId: 214511101
---
 .../python/kernel_tests/util/BUILD            |  51 ---
 .../util/correlation_matrix_volumes.py        |  98 ------
 .../util/correlation_matrix_volumes_lib.py    | 323 ------------------
 .../util/correlation_matrix_volumes_test.py   | 150 --------
 4 files changed, 622 deletions(-)
 delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/BUILD
 delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py
 delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py
 delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD b/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD
deleted file mode 100644
index 42ecea034d..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD
+++ /dev/null
@@ -1,51 +0,0 @@
-# Description:
-#   Internal testing utilities, e.g., computing the correct answer to
-#   put in a unit test.
-
-licenses(["notice"])  # Apache 2.0
-
-py_library(
-    name = "correlation_matrix_volumes_py",
-    srcs = [
-        "correlation_matrix_volumes_lib.py",
-    ],
-    deps = [
-        "//tensorflow/contrib/distributions:distributions_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:math_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_binary(
-    name = "correlation_matrix_volumes",
-    srcs = [
-        "correlation_matrix_volumes.py",
-    ],
-    deps = [
-        ":correlation_matrix_volumes_py",
-    ],
-)
-
-py_test(
-    name = "correlation_matrix_volumes_test",
-    size = "medium",
-    srcs = ["correlation_matrix_volumes_test.py"],
-    tags = [
-        "no_pip",
-        "optonly",
-    ],
-    deps = [
-        ":correlation_matrix_volumes_py",
-        # For statistical testing
-        "//tensorflow/contrib/distributions:distributions_py",
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-    ],
-)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py
deleted file mode 100644
index 2eab51cd30..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Executable to estimate the volume of various sets of correlation matrices.
-
-See correlation_matrix_volumes_lib.py for purpose and methodology.
-
-Invocation example:
-```
-python correlation_matrix_volumes.py --num_samples 1e7
-```
-
-This will compute 10,000,000-sample confidence intervals for the
-volumes of several sets of correlation matrices.  Which sets, and the
-desired statistical significance, are hard-coded in this source file.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pprint
-
-from absl import app
-from absl import flags
-
-from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr
-
-FLAGS = flags.FLAGS
-
-# Float to support giving the number of samples in scientific notation.
-# The production run used for the LKJ test used 1e7 samples.
-flags.DEFINE_float('num_samples', 1e4, 'Number of samples to use.')
-
-
-def ctv_debatched(det_bounds, dim, num_samples, error_rate=1e-6, seed=42):
-  # This wrapper undoes the batching in compute_true_volumes, because
-  # apparently several 5x5x9x1e7 Tensors of float32 can strain RAM.
-  bounds = {}
-  for db in det_bounds:
-    bounds[db] = corr.compute_true_volumes(
-        [db], dim, num_samples, error_rate=error_rate, seed=seed)[db]
-  return bounds
-
-
-# The particular bounds in all three of these functions were chosen by
-# a somewhat arbitrary walk through an empirical tradeoff, for the
-# purpose of testing the LKJ distribution.  Setting the determinant
-# bound lower
-# - Covers more of the testee's sample space, and
-# - Increases the probability that the rejection sampler will hit, thus
-# - Decreases the relative error (at a fixed sample count) in the
-#   rejection-based volume estimate;
-# but also
-# - Increases the variance of the estimator used in the LKJ test.
-# This latter variance is also affected by the dimension and the
-# tested concentration parameter, and can be compensated for with more
-# compute (expensive) or a looser discrepancy limit (unsatisfying).
-# The values here are the projection of the points in that test design
-# space that ended up getting chosen.
-def compute_3x3_volumes(num_samples):
-  det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45]
-  return ctv_debatched(
-      det_bounds, 3, num_samples, error_rate=5e-7, seed=46)
-
-
-def compute_4x4_volumes(num_samples):
-  det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45]
-  return ctv_debatched(
-      det_bounds, 4, num_samples, error_rate=5e-7, seed=47)
-
-
-def compute_5x5_volumes(num_samples):
-  det_bounds = [0.01, 0.2, 0.25, 0.3, 0.35, 0.4]
-  return ctv_debatched(
-      det_bounds, 5, num_samples, error_rate=5e-7, seed=48)
-
-
-def main(_):
-  full_bounds = {}
-  full_bounds[3] = compute_3x3_volumes(int(FLAGS.num_samples))
-  full_bounds[4] = compute_4x4_volumes(int(FLAGS.num_samples))
-  full_bounds[5] = compute_5x5_volumes(int(FLAGS.num_samples))
-  pprint.pprint(full_bounds)
-
-if __name__ == '__main__':
-  app.run(main)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py
deleted file mode 100644
index 455e71f00c..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Estimating the volume of the correlation matrices with bounded determinant.
-
-Why?  Because lkj_test.py tests the sampler for the LKJ distribution
-by estimating the same volume another way.
-
-How?  Rejection sampling.  Or, more precisely, importance sampling,
-proposing from the uniform distribution on symmetric matrices with
-diagonal 1s and entries in [-1, 1].  Such a matrix is a correlation
-matrix if and only if it is also positive semi-definite.
-
-The samples can then be converted into a confidence interval on the
-volume in question by the [Clopper-Pearson
-method](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval),
-also implemented here.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import importlib
-import sys
-
-import numpy as np
-
-from tensorflow.python.client import session
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import uniform
-from tensorflow.python.ops.distributions import util
-from tensorflow.python.platform import tf_logging
-
-__all__ = [
-    "correlation_matrix_volume_rejection_samples",
-    "compute_true_volumes",
-]
-
-
-def try_import(name):  # pylint: disable=invalid-name
-  module = None
-  try:
-    module = importlib.import_module(name)
-  except ImportError as e:
-    tf_logging.warning("Could not import %s: %s" % (name, str(e)))
-  return module
-
-optimize = try_import("scipy.optimize")
-stats = try_import("scipy.stats")
-
-
-def _psd_mask(x):
-  """Computes whether each square matrix in the input is positive semi-definite.
-
-  Args:
-    x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`.
-
-  Returns:
-    mask: A floating-point `Tensor` of shape `[B1, ... Bn]`.  Each
-      scalar is 1 if the corresponding matrix was PSD, otherwise 0.
-  """
-  # Allegedly
-  # https://scicomp.stackexchange.com/questions/12979/testing-if-a-matrix-is-positive-semi-definite
-  # it is more efficient to test for positive semi-definiteness by
-  # trying to compute the Cholesky decomposition -- the matrix is PSD
-  # if you succeed and not PSD if you fail.  However, TensorFlow's
-  # Cholesky raises an exception if _any_ of the input matrices are
-  # not PSD, from which I don't know how to extract _which ones_, so I
-  # proceed by explicitly computing all the eigenvalues and checking
-  # whether they are all positive or not.
-  #
-  # Also, as was discussed in the answer, it is somewhat dangerous to
-  # treat SPD-ness as binary in floating-point arithmetic. Cholesky
-  # factorization can complete and 'look' like everything is fine
-  # (e.g., O(1) entries and a diagonal of all ones) but the matrix can
-  # have an exponential condition number.
-  eigenvalues, _ = linalg_ops.self_adjoint_eig(x)
-  return math_ops.cast(
-      math_ops.reduce_min(eigenvalues, axis=-1) >= 0, dtype=x.dtype)
-
-
-def _det_large_enough_mask(x, det_bounds):
-  """Returns whether the input matches the given determinant limit.
-
-  Args:
-    x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`.
-    det_bounds: A floating-point `Tensor` that must broadcast to shape
-      `[B1, ..., Bn]`, giving the desired lower bound on the
-      determinants in `x`.
-
-  Returns:
-    mask: A floating-point `Tensor` of shape [B1, ..., Bn].  Each
-      scalar is 1 if the corresponding matrix had determinant above
-      the corresponding bound, otherwise 0.
-  """
-  # For the curious: I wonder whether it is possible and desirable to
-  # use a Cholesky decomposition-based algorithm for this, since the
-  # only matrices whose determinant this code cares about will be PSD.
-  # Didn't figure out how to code that in TensorFlow.
-  #
-  # Expert opinion is that it would be about twice as fast since
-  # Cholesky is roughly half the cost of Gaussian Elimination with
-  # Partial Pivoting. But this is less of an impact than the switch in
-  # _psd_mask.
-  return math_ops.cast(
-      linalg_ops.matrix_determinant(x) > det_bounds, dtype=x.dtype)
-
-
-def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed):
-  """Returns a uniformly random `Tensor` of "correlation-like" matrices.
-
-  A "correlation-like" matrix is a symmetric square matrix with all entries
-  between -1 and 1 (inclusive) and 1s on the main diagonal.  Of these,
-  the ones that are positive semi-definite are exactly the correlation
-  matrices.
-
-  Args:
-    num_rows: Python `int` dimension of the correlation-like matrices.
-    batch_shape: `Tensor` or Python `tuple` of `int` shape of the
-      batch to return.
-    dtype: `dtype` of the `Tensor` to return.
-    seed: Random seed.
-
-  Returns:
-    matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]`
-      and dtype `dtype`.  Each entry is in [-1, 1], and each matrix
-      along the bottom two dimensions is symmetric and has 1s on the
-      main diagonal.
-  """
-  num_entries = num_rows * (num_rows + 1) / 2
-  ones = array_ops.ones(shape=[num_entries], dtype=dtype)
-  # It seems wasteful to generate random values for the diagonal since
-  # I am going to throw them away, but `fill_triangular` fills the
-  # diagonal, so I probably need them.
-  # It's not impossible that it would be more efficient to just fill
-  # the whole matrix with random values instead of messing with
-  # `fill_triangular`.  Then would need to filter almost half out with
-  # `matrix_band_part`.
-  unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed)
-  tril = util.fill_triangular(unifs)
-  symmetric = tril + array_ops.matrix_transpose(tril)
-  diagonal_ones = array_ops.ones(
-      shape=util.pad(batch_shape, axis=0, back=True, value=num_rows),
-      dtype=dtype)
-  return array_ops.matrix_set_diag(symmetric, diagonal_ones)
-
-
-def correlation_matrix_volume_rejection_samples(
-    det_bounds, dim, sample_shape, dtype, seed):
-  """Returns rejection samples from trying to get good correlation matrices.
-
-  The proposal being rejected from is the uniform distribution on
-  "correlation-like" matrices.  We say a matrix is "correlation-like"
-  if it is a symmetric square matrix with all entries between -1 and 1
-  (inclusive) and 1s on the main diagonal.  Of these, the ones that
-  are positive semi-definite are exactly the correlation matrices.
-
-  The rejection algorithm, then, is to sample a `Tensor` of
-  `sample_shape` correlation-like matrices of dimensions `dim` by
-  `dim`, and check each one for (i) being a correlation matrix (i.e.,
-  PSD), and (ii) having determinant at least the corresponding entry
-  of `det_bounds`.
-
-  Args:
-    det_bounds: A `Tensor` of lower bounds on the determinants of
-      acceptable matrices.  The shape must broadcast with `sample_shape`.
-    dim: A Python `int` dimension of correlation matrices to sample.
-    sample_shape: Python `tuple` of `int` shape of the samples to
-      compute, excluding the two matrix dimensions.
-    dtype: The `dtype` in which to do the computation.
-    seed: Random seed.
-
-  Returns:
-    weights: A `Tensor` of shape `sample_shape`.  Each entry is 0 if the
-      corresponding matrix was not a correlation matrix, or had too
-      small of a determinant.  Otherwise, the entry is the
-      multiplicative inverse of the density of proposing that matrix
-      uniformly, i.e., the volume of the set of `dim` by `dim`
-      correlation-like matrices.
-    volume: The volume of the set of `dim` by `dim` correlation-like
-      matrices.
-  """
-  with ops.name_scope("rejection_sampler"):
-    rej_proposals = _uniform_correlation_like_matrix(
-        dim, sample_shape, dtype, seed=seed)
-    rej_proposal_volume = 2. ** (dim * (dim - 1) / 2.)
-    # The density of proposing any given point is 1 / rej_proposal_volume;
-    # The weight of that point should be scaled by
-    # 1 / density = rej_proposal_volume.
-    rej_weights = rej_proposal_volume * _psd_mask(
-        rej_proposals) * _det_large_enough_mask(rej_proposals, det_bounds)
-    return rej_weights, rej_proposal_volume
-
-
-def _clopper_pearson_confidence_interval(samples, error_rate):
-  """Computes a confidence interval for the mean of the given 1-D distribution.
-
-  Assumes (and checks) that the given distribution is Bernoulli, i.e.,
-  takes only two values.  This licenses using the CDF of the binomial
-  distribution for the confidence, which is tighter (for extreme
-  probabilities) than the DKWM inequality.  The method is known as the
-  [Clopper-Pearson method]
-  (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval).
-
-  Assumes:
-
-  - The given samples were drawn iid from the distribution of interest.
-
-  - The given distribution is a Bernoulli, i.e., supported only on
-    low and high.
-
-  Guarantees:
-
-  - The probability (over the randomness of drawing the given sample)
-    that the true mean is outside the returned interval is no more
-    than the given error_rate.
-
-  Args:
-    samples: `np.ndarray` of samples drawn iid from the distribution
-      of interest.
-    error_rate: Python `float` admissible rate of mistakes.
-
-  Returns:
-    low: Lower bound of confidence interval.
-    high: Upper bound of confidence interval.
-
-  Raises:
-    ValueError: If `samples` has rank other than 1 (batch semantics
-      are not implemented), or if `samples` contains values other than
-      `low` or `high` (as that makes the distribution not Bernoulli).
-  """
-  # TODO(b/78025336) Migrate this confidence interval function
-  # to statistical_testing.py.  In order to do that
-  # - Get the binomial CDF from the Binomial distribution
-  # - Implement scalar root finding in TF.  Batch bisection search
-  #   shouldn't be too hard, and is definitely good enough for this
-  #   problem.  Batching the Brent algorithm (from scipy) that is used
-  #   here may be more involved, but may also not be necessary---it's
-  #   only used here because scipy made it convenient.  In particular,
-  #   robustness is more important than speed here, which may make
-  #   bisection search actively better.
-  # - The rest is just a matter of rewriting in the appropriate style.
-  if optimize is None or stats is None:
-    raise ValueError(
-        "Scipy is required for computing Clopper-Pearson confidence intervals")
-  if len(samples.shape) != 1:
-    raise ValueError("Batch semantics not implemented")
-  n = len(samples)
-  low = np.amin(samples)
-  high = np.amax(samples)
-  successes = np.count_nonzero(samples - low)
-  failures = np.count_nonzero(samples - high)
-  if successes + failures != n:
-    uniques = np.unique(samples)
-    msg = ("Purportedly Bernoulli distribution had distinct samples"
-           " {}, {}, and {}".format(uniques[0], uniques[1], uniques[2]))
-    raise ValueError(msg)
-  def p_small_enough(p):
-    prob = stats.binom.logcdf(successes, n, p)
-    return prob - np.log(error_rate / 2.)
-  def p_big_enough(p):
-    prob = stats.binom.logsf(successes, n, p)
-    return prob - np.log(error_rate / 2.)
-  high_p = optimize.brentq(
-      p_small_enough, float(successes) / n, 1., rtol=1e-9)
-  low_p = optimize.brentq(
-      p_big_enough, 0., float(successes) / n, rtol=1e-9)
-  low_interval = low + (high - low) * low_p
-  high_interval = low + (high - low) * high_p
-  return (low_interval, high_interval)
-
-
-def compute_true_volumes(
-    det_bounds, dim, num_samples, error_rate=1e-6, seed=42):
-  """Returns confidence intervals for the desired correlation matrix volumes.
-
-  The confidence intervals are computed by the [Clopper-Pearson method]
-  (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval).
-
-  Args:
-    det_bounds: A rank-1 numpy array of lower bounds on the
-      determinants of acceptable matrices.  Entries must be unique.
-    dim: A Python `int` dimension of correlation matrices to sample.
-    num_samples: The number of samples to draw.
-    error_rate: The statistical significance of the returned
-      confidence intervals.  The significance is broadcast: Each
-      returned interval separately may be incorrect with probability
-      (under the sample of correlation-like matrices drawn internally)
-      at most `error_rate`.
-    seed: Random seed.
-
-  Returns:
-    bounds: A Python `dict` mapping each determinant bound to the low, high
-      tuple giving the confidence interval.
-  """
-  bounds = {}
-  with session.Session() as sess:
-    rej_weights, _ = correlation_matrix_volume_rejection_samples(
-        det_bounds, dim, [num_samples, len(det_bounds)], np.float32, seed=seed)
-    rej_weights = sess.run(rej_weights)
-    for rw, det in zip(np.rollaxis(rej_weights, 1), det_bounds):
-      template = ("Estimating volume of {}x{} correlation "
-                  "matrices with determinant >= {}.")
-      print(template.format(dim, dim, det))
-      sys.stdout.flush()
-      bounds[det] = _clopper_pearson_confidence_interval(
-          rw, error_rate=error_rate)
-    return bounds
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py
deleted file mode 100644
index 8f99300e63..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for correlation_matrix_volumes_lib.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr
-from tensorflow.contrib.distributions.python.ops import statistical_testing as st
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.platform import test
-
-
-# NxN correlation matrices are determined by the N*(N-1)/2
-# lower-triangular entries.  In addition to being between -1 and 1,
-# they must also obey the constraint that the determinant of the
-# resulting symmetric matrix is non-negative.  In 2x2, we can even
-# analytically compute the volume when the determinant is bounded to >
-# epsilon, as that boils down to the one lower-triangular entry being
-# less than 1 - epsilon in absolute value.
-def two_by_two_volume(det_bound):
-  return 2 * np.sqrt(1.0 - det_bound)
-
-
-# The post
-# https://psychometroscar.com/the-volume-of-a-3-x-3-correlation-matrix/
-# derives (with elementary calculus) that the volume (with respect to
-# Lebesgue^3 measure) of the set of 3x3 correlation matrices is
-# pi^2/2.  The same result is also obtained by [1].
-def three_by_three_volume():
-  return np.pi**2 / 2.
-
-
-# The volume of the unconstrained set of correlation matrices is also
-# the normalization constant of the LKJ distribution from [2].  As
-# part of defining the distribution, that reference a derives general
-# formula for this volume for all dimensions.  A TensorFlow
-# computation thereof gave the below result for 4x4:
-def four_by_four_volume():
-  # This constant computed as math_ops.exp(lkj.log_norm_const(4, [1.0]))
-  return 11.6973076
-
-# [1] Rousseeuw, P. J., & Molenberghs, G. (1994). "The shape of
-# correlation matrices." The American Statistician, 48(4), 276-279.
-
-# [2] Daniel Lewandowski, Dorota Kurowicka, and Harry Joe, "Generating
-# random correlation matrices based on vines and extended onion
-# method," Journal of Multivariate Analysis 100 (2009), pp 1989-2001.
-
-
-class CorrelationMatrixVolumesTest(test.TestCase):
-
-  def testRejection2D(self):
-    num_samples = int(1e5)  # Chosen for a small min detectable discrepancy
-    det_bounds = np.array(
-        [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32)
-    exact_volumes = two_by_two_volume(det_bounds)
-    (rej_weights,
-     rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples(
-         det_bounds, 2, [num_samples, 9], dtype=np.float32, seed=43)
-    # shape of rej_weights: [num_samples, 9, 2, 2]
-    chk1 = st.assert_true_mean_equal_by_dkwm(
-        rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes,
-        false_fail_rate=1e-6)
-    chk2 = check_ops.assert_less(
-        st.min_discrepancy_of_true_means_detectable_by_dkwm(
-            num_samples, low=0., high=rej_proposal_volume,
-            # Correct the false fail rate due to different broadcasting
-            false_fail_rate=1.1e-7, false_pass_rate=1e-6),
-        0.036)
-    with ops.control_dependencies([chk1, chk2]):
-      rej_weights = array_ops.identity(rej_weights)
-    self.evaluate(rej_weights)
-
-  def testRejection3D(self):
-    num_samples = int(1e5)  # Chosen for a small min detectable discrepancy
-    det_bounds = np.array([0.0], dtype=np.float32)
-    exact_volumes = np.array([three_by_three_volume()], dtype=np.float32)
-    (rej_weights,
-     rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples(
-         det_bounds, 3, [num_samples, 1], dtype=np.float32, seed=44)
-    # shape of rej_weights: [num_samples, 1, 3, 3]
-    chk1 = st.assert_true_mean_equal_by_dkwm(
-        rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes,
-        false_fail_rate=1e-6)
-    chk2 = check_ops.assert_less(
-        st.min_discrepancy_of_true_means_detectable_by_dkwm(
-            num_samples, low=0., high=rej_proposal_volume,
-            false_fail_rate=1e-6, false_pass_rate=1e-6),
-        # Going for about a 3% relative error
-        0.15)
-    with ops.control_dependencies([chk1, chk2]):
-      rej_weights = array_ops.identity(rej_weights)
-    self.evaluate(rej_weights)
-
-  def testRejection4D(self):
-    num_samples = int(1e5)  # Chosen for a small min detectable discrepancy
-    det_bounds = np.array([0.0], dtype=np.float32)
-    exact_volumes = [four_by_four_volume()]
-    (rej_weights,
-     rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples(
-         det_bounds, 4, [num_samples, 1], dtype=np.float32, seed=45)
-    # shape of rej_weights: [num_samples, 1, 4, 4]
-    chk1 = st.assert_true_mean_equal_by_dkwm(
-        rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes,
-        false_fail_rate=1e-6)
-    chk2 = check_ops.assert_less(
-        st.min_discrepancy_of_true_means_detectable_by_dkwm(
-            num_samples, low=0., high=rej_proposal_volume,
-            false_fail_rate=1e-6, false_pass_rate=1e-6),
-        # Going for about a 10% relative error
-        1.1)
-    with ops.control_dependencies([chk1, chk2]):
-      rej_weights = array_ops.identity(rej_weights)
-    self.evaluate(rej_weights)
-
-  def testVolumeEstimation2D(self):
-    # Test that the confidence intervals produced by
-    # corr.compte_true_volumes are sound, in the sense of containing
-    # the exact volume.
-    num_samples = int(1e5)  # Chosen by symmetry with testRejection2D
-    det_bounds = np.array(
-        [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32)
-    volume_bounds = corr.compute_true_volumes(
-        det_bounds, 2, num_samples, error_rate=1e-6, seed=47)
-    exact_volumes = two_by_two_volume(det_bounds)
-    for det, volume in zip(det_bounds, exact_volumes):
-      computed_low, computed_high = volume_bounds[det]
-      self.assertLess(computed_low, volume)
-      self.assertGreater(computed_high, volume)
-
-if __name__ == "__main__":
-  test.main()
-- 
GitLab


From f0f475690cb728d7988328d32a5955c55ab1fb22 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Tue, 25 Sep 2018 15:30:05 -0700
Subject: [PATCH 0696/1357] Optimize function before functionalization.

PiperOrigin-RevId: 214515610
---
 .../tf2xla/functionalize_control_flow.cc      | 43 ++++++++++++++-----
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index f792c52032..98b333a467 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -31,11 +31,13 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/tf2xla_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/common_runtime/graph_optimizer.h"
 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/control_flow.h"
+#include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -89,7 +91,6 @@ Status FunctionalizeControlFlowForFunction(
     }
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
-  const FunctionDef& fdef = body->fdef;
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -130,26 +131,46 @@ Status FunctionalizeControlFlowForFunction(
     }
   }
 
+  // Call graph optimizer. The most important optimization we need is constant
+  // folding, which will replace ops like Shape/BroadcastGradientArgs with
+  // constant shape input. Without this optimization, those ops might become
+  // dynamic input for then/else body function and XLA will complain that input
+  // is not compile time constant. We enable function inlining as well, because
+  // otherwise we won't be able to infer shape for any node depending on
+  // function call nodes.
+  if (VLOG_IS_ON(4)) {
+    dump_graph::DumpGraphToFile(
+        absl::StrCat("functionalize_control_flow_before_opt_", func_name),
+        *body->graph, fld);
+  }
+  // Optimizer accepts std::unique_ptr<Graph>* as input and might change
+  // underlying pointer, thus we create a new Graph and copy from body->graph.
+  std::unique_ptr<Graph> optimized_graph(new Graph(fld));
+  CopyGraph(*body->graph, optimized_graph.get());
+  OptimizerOptions opts;
+  opts.set_opt_level(OptimizerOptions::L0);
+  opts.set_do_function_inlining(true);
+  opts.set_do_constant_folding(true);
+  GraphOptimizer optimizer(opts);
+  optimizer.Optimize(flr, flr->env(),
+                     /*device=*/nullptr, &optimized_graph,
+                     /*shape_map=*/nullptr);
+
   // Functionalize the function body.
   if (VLOG_IS_ON(4)) {
     dump_graph::DumpGraphToFile(
         absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-        *body->graph, fld);
+        *optimized_graph, fld);
   }
-  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(body->graph, fld));
+  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
   if (VLOG_IS_ON(4)) {
     dump_graph::DumpGraphToFile(
         absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-        *body->graph, fld);
+        *optimized_graph, fld);
   }
   FunctionDef functionalized_fdef;
-  TF_RETURN_IF_ERROR(
-      GraphToFunctionDef(*body->graph, new_func_name, &functionalized_fdef));
-
-  // Copy signature and ret from original FunctionDef.
-  *functionalized_fdef.mutable_signature() = fdef.signature();
-  *functionalized_fdef.mutable_ret() = fdef.ret();
-  functionalized_fdef.mutable_signature()->set_name(new_func_name);
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
+                                        &functionalized_fdef));
 
   // Add rewritten FunctionDef into library.
   if (func_name == new_func_name) {
-- 
GitLab


From 7630e9df4804a01f5dd0ab20d4c0bcfb58e45432 Mon Sep 17 00:00:00 2001
From: Richard Yu <yohan.richard.yu@gmail.com>
Date: Tue, 25 Sep 2018 15:50:13 -0700
Subject: [PATCH 0697/1357] Fixing error

---
 tensorflow/contrib/quantize/python/fold_batch_norms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index d882b79892..d9f179bee4 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -628,7 +628,7 @@ def _GetBatchNormParams(graph, context, has_scaling):
   bn_decay_var_tensor = _FindMatchingTensor(graph, op_suffix_bn_decay_var,
                                             context)
   if batch_mean_tensor is None and moving_mean_tensor is None:
-    raise ValueError('Error folding unfused batch norms')
+    ValueError('Error folding unfused batch norms')
   if has_scaling:
     gamma_tensor = _FindMatchingTensor(graph, op_suffix_gamma, context)
 
-- 
GitLab


From c274706224fc977fa4a8b8721c3e0b10814e638e Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 25 Sep 2018 15:56:57 -0700
Subject: [PATCH 0698/1357] Automated rollback of commit
 20c71535c5f1ed1d918d6cc6e327ffbba49ecbd6

PiperOrigin-RevId: 214519671
---
 tensorflow/contrib/lite/build_def.bzl         |  38 ---
 tensorflow/contrib/lite/python/BUILD          |   2 +-
 .../lite/python/convert_saved_model.py        |  12 +-
 tensorflow/contrib/lite/testing/BUILD         |  27 --
 .../model_coverage/model_coverage_lib.py      | 241 ------------------
 .../model_coverage/model_coverage_lib_test.py | 130 ----------
 6 files changed, 7 insertions(+), 443 deletions(-)
 delete mode 100644 tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
 delete mode 100644 tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 7f5c6bdc2f..fc4d9b4f17 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -391,41 +391,3 @@ def gen_selected_ops(name, model):
               (tool, model, out, tflite_path[2:]),
         tools = [tool],
     )
-
-def gen_full_model_test(conversion_modes, models, data, test_suite_tag):
-    """Generates Python test targets for testing TFLite models.
-
-    Args:
-      conversion_modes: List of conversion modes to test the models on.
-      models: List of models to test.
-      data: List of BUILD targets linking the data.
-      test_suite_tag: Tag identifying the model test suite.
-    """
-    options = [
-        (conversion_mode, model)
-        for model in models
-        for conversion_mode in conversion_modes
-    ]
-
-    for conversion_mode, model_name in options:
-        native.py_test(
-            name = "model_coverage_test_%s_%s" % (model_name, conversion_mode.lower()),
-            srcs = ["model_coverage_test.py"],
-            main = "model_coverage_test.py",
-            args = [
-                "--model_name=%s" % model_name,
-                "--converter_mode=%s" % conversion_mode,
-            ],
-            data = data,
-            srcs_version = "PY2AND3",
-            tags = [
-                "no_oss",
-                "no_windows",
-                "notap",
-            ] + [test_suite_tag],
-            deps = [
-                "//tensorflow/contrib/lite/testing:model_coverage_lib",
-                "//tensorflow/contrib/lite/python:lite",
-                "//tensorflow/python:client_testlib",
-            ],
-        )
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 916788f215..57e1290e07 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -144,7 +144,7 @@ py_library(
     name = "convert_saved_model",
     srcs = ["convert_saved_model.py"],
     srcs_version = "PY2AND3",
-    visibility = ["//tensorflow/contrib/lite:__subpackages__"],
+    visibility = ["//visibility:public"],
     deps = [
         ":convert",
         "//tensorflow/contrib/saved_model:saved_model_py",
diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py
index d18b60d0ea..1553464b9f 100644
--- a/tensorflow/contrib/lite/python/convert_saved_model.py
+++ b/tensorflow/contrib/lite/python/convert_saved_model.py
@@ -44,7 +44,7 @@ def _log_tensor_details(tensor_info):
                  dtype)
 
 
-def get_meta_graph_def(saved_model_dir, tag_set):
+def _get_meta_graph_def(saved_model_dir, tag_set):
   """Validate saved_model and extract MetaGraphDef.
 
   Args:
@@ -61,7 +61,7 @@ def get_meta_graph_def(saved_model_dir, tag_set):
     return loader.load(sess, tag_set, saved_model_dir)
 
 
-def get_signature_def(meta_graph, signature_key):
+def _get_signature_def(meta_graph, signature_key):
   """Get the signature def from meta_graph with given signature_key.
 
   Args:
@@ -86,7 +86,7 @@ def get_signature_def(meta_graph, signature_key):
   return signature_def_map[signature_key]
 
 
-def get_inputs_outputs(signature_def):
+def _get_inputs_outputs(signature_def):
   """Get inputs and outputs from SignatureDef.
 
   Args:
@@ -236,9 +236,9 @@ def freeze_saved_model(saved_model_dir, input_arrays, input_shapes,
       input_arrays or output_arrays are not valid.
   """
   # Read SignatureDef.
-  meta_graph = get_meta_graph_def(saved_model_dir, tag_set)
-  signature_def = get_signature_def(meta_graph, signature_key)
-  inputs, outputs = get_inputs_outputs(signature_def)
+  meta_graph = _get_meta_graph_def(saved_model_dir, tag_set)
+  signature_def = _get_signature_def(meta_graph, signature_key)
+  inputs, outputs = _get_inputs_outputs(signature_def)
 
   # Check SavedModel for assets directory.
   collection_def = meta_graph.collection_def
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index c4a2b03444..a4736bfee9 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -13,7 +13,6 @@ load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite"
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
-    "py_test",
 )
 
 [gen_zip_test(
@@ -363,30 +362,4 @@ cc_binary(
     ],
 )
 
-py_binary(
-    name = "model_coverage_lib",
-    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib.py"],
-    srcs_version = "PY2AND3",
-    visibility = ["//tensorflow/contrib/lite:__subpackages__"],
-    deps = [
-        "//tensorflow/contrib/lite/python:lite",
-        "//tensorflow/python:platform",
-    ],
-)
-
-py_test(
-    name = "model_coverage_lib_test",
-    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_windows",
-        "notap",
-    ],
-    deps = [
-        ":model_coverage_lib",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
 tflite_portable_test_suite()
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
deleted file mode 100644
index f8ab394c60..0000000000
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functions to test TFLite models."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.lite.python import convert_saved_model as _convert_saved_model
-from tensorflow.contrib.lite.python import lite as _lite
-from tensorflow.core.framework import graph_pb2 as _graph_pb2
-from tensorflow.python import keras as _keras
-from tensorflow.python.client import session as _session
-from tensorflow.python.framework.importer import import_graph_def as _import_graph_def
-from tensorflow.python.lib.io import file_io as _file_io
-from tensorflow.python.saved_model import signature_constants as _signature_constants
-from tensorflow.python.saved_model import tag_constants as _tag_constants
-
-
-def _convert(converter, **kwargs):
-  """Converts the model.
-
-  Args:
-    converter: TocoConverter object.
-    **kwargs: Additional arguments to be passed into the converter. Supported
-      flags are {"converter_mode", "post_training_quant"}.
-
-  Returns:
-    The converted TFLite model in serialized format.
-  """
-  if "converter_mode" in kwargs:
-    converter.converter_mode = kwargs["converter_mode"]
-  if "post_training_quantize" in kwargs:
-    converter.post_training_quantize = kwargs["post_training_quantize"]
-  return converter.convert()
-
-
-def _generate_random_input_data(tflite_model, seed=None):
-  """Generates input data based on the input tensors in the TFLite model.
-
-  Args:
-    tflite_model: Serialized TensorFlow Lite model.
-    seed: Integer seed for the random generator. (default None)
-
-  Returns:
-    List of np.ndarray.
-  """
-  interpreter = _lite.Interpreter(model_content=tflite_model)
-  interpreter.allocate_tensors()
-  input_details = interpreter.get_input_details()
-
-  if seed:
-    np.random.seed(seed=seed)
-  return [
-      np.array(
-          np.random.random_sample(input_tensor["shape"]),
-          dtype=input_tensor["dtype"]) for input_tensor in input_details
-  ]
-
-
-def _evaluate_tflite_model(tflite_model, input_data):
-  """Returns evaluation of input data on TFLite model.
-
-  Args:
-    tflite_model: Serialized TensorFlow Lite model.
-    input_data: List of np.ndarray.
-
-  Returns:
-    List of np.ndarray.
-  """
-  interpreter = _lite.Interpreter(model_content=tflite_model)
-  interpreter.allocate_tensors()
-
-  input_details = interpreter.get_input_details()
-  output_details = interpreter.get_output_details()
-
-  for input_tensor, tensor_data in zip(input_details, input_data):
-    interpreter.set_tensor(input_tensor["index"], tensor_data)
-
-  interpreter.invoke()
-  output_data = [
-      interpreter.get_tensor(output_tensor["index"])
-      for output_tensor in output_details
-  ]
-  return output_data
-
-
-def evaluate_frozen_graph(filename, input_arrays, output_arrays):
-  """Returns a function that evaluates the frozen graph on input data.
-
-  Args:
-    filename: Full filepath of file containing frozen GraphDef.
-    input_arrays: List of input tensors to freeze graph with.
-    output_arrays: List of output tensors to freeze graph with.
-
-  Returns:
-    Lambda function ([np.ndarray data] : [np.ndarray result]).
-  """
-  with _session.Session().as_default() as sess:
-    with _file_io.FileIO(filename, "rb") as f:
-      file_content = f.read()
-
-    graph_def = _graph_pb2.GraphDef()
-    graph_def.ParseFromString(file_content)
-    _import_graph_def(graph_def, name="")
-
-    inputs = _convert_saved_model.get_tensors_from_tensor_names(
-        sess.graph, input_arrays)
-    outputs = _convert_saved_model.get_tensors_from_tensor_names(
-        sess.graph, output_arrays)
-
-    return lambda input_data: sess.run(outputs, dict(zip(inputs, input_data)))
-
-
-def evaluate_saved_model(directory, tag_set, signature_key):
-  """Returns a function that evaluates the SavedModel on input data.
-
-  Args:
-    directory: SavedModel directory to convert.
-    tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
-      analyze. All tags in the tag set must be present.
-    signature_key: Key identifying SignatureDef containing inputs and outputs.
-
-  Returns:
-    Lambda function ([np.ndarray data] : [np.ndarray result]).
-  """
-  with _session.Session().as_default() as sess:
-    if tag_set is None:
-      tag_set = set([_tag_constants.SERVING])
-    if signature_key is None:
-      signature_key = _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-    meta_graph = _convert_saved_model.get_meta_graph_def(directory, tag_set)
-    signature_def = _convert_saved_model.get_signature_def(
-        meta_graph, signature_key)
-    inputs, outputs = _convert_saved_model.get_inputs_outputs(signature_def)
-
-    return lambda input_data: sess.run(outputs, dict(zip(inputs, input_data)))
-
-
-def evaluate_keras_model(filename):
-  """Returns a function that evaluates the tf.keras model on input data.
-
-  Args:
-    filename: Full filepath of HDF5 file containing the tf.keras model.
-
-  Returns:
-    Lambda function ([np.ndarray data] : [np.ndarray result]).
-  """
-  keras_model = _keras.models.load_model(filename)
-  return lambda input_data: [keras_model.predict(input_data)]
-
-
-# TODO(nupurgarg): Make this function a parameter to test_frozen_graph (and
-# related functions) in order to make it easy to use different data generators.
-def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
-  """Compares TensorFlow and TFLite models with random data.
-
-  Args:
-    tflite_model: Serialized TensorFlow Lite model.
-    tf_eval_func: Lambda function that takes in input data and outputs the
-      results of the TensorFlow model ([np.ndarray data] : [np.ndarray result]).
-    tolerance: Decimal place to check accuracy to.
-  """
-  input_data = _generate_random_input_data(tflite_model)
-  tf_results = tf_eval_func(input_data)
-  tflite_results = _evaluate_tflite_model(tflite_model, input_data)
-  for tf_result, tflite_result in zip(tf_results, tflite_results):
-    np.testing.assert_almost_equal(tf_result, tflite_result, tolerance)
-
-
-def test_frozen_graph(filename, input_arrays, output_arrays, **kwargs):
-  """Validates the TensorFlow frozen graph converts to a TFLite model.
-
-  Converts the TensorFlow frozen graph to TFLite and checks the accuracy of the
-  model on random data.
-
-  Args:
-    filename: Full filepath of file containing frozen GraphDef.
-    input_arrays: List of input tensors to freeze graph with.
-    output_arrays: List of output tensors to freeze graph with.
-    **kwargs: Additional arguments to be passed into the converter.
-  """
-  converter = _lite.TocoConverter.from_frozen_graph(filename, input_arrays,
-                                                    output_arrays)
-  tflite_model = _convert(converter, **kwargs)
-
-  tf_eval_func = evaluate_frozen_graph(filename, input_arrays, output_arrays)
-  compare_models_random_data(tflite_model, tf_eval_func)
-
-
-def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs):
-  """Validates the TensorFlow SavedModel converts to a TFLite model.
-
-  Converts the TensorFlow SavedModel to TFLite and checks the accuracy of the
-  model on random data.
-
-  Args:
-    directory: SavedModel directory to convert.
-    tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
-      analyze. All tags in the tag set must be present.
-    signature_key: Key identifying SignatureDef containing inputs and outputs.
-    **kwargs: Additional arguments to be passed into the converter.
-  """
-  converter = _lite.TocoConverter.from_saved_model(directory, tag_set,
-                                                   signature_key)
-  tflite_model = _convert(converter, **kwargs)
-
-  tf_eval_func = evaluate_saved_model(directory, tag_set, signature_key)
-  compare_models_random_data(tflite_model, tf_eval_func)
-
-
-def test_keras_model(filename, **kwargs):
-  """Validates the tf.keras model converts to a TFLite model.
-
-  Converts the tf.keras model to TFLite and checks the accuracy of the model on
-  random data.
-
-  Args:
-    filename: Full filepath of HDF5 file containing the tf.keras model.
-    **kwargs: Additional arguments to be passed into the converter.
-  """
-  converter = _lite.TocoConverter.from_keras_model_file(filename)
-  tflite_model = _convert(converter, **kwargs)
-
-  tf_eval_func = evaluate_keras_model(filename)
-  compare_models_random_data(tflite_model, tf_eval_func)
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
deleted file mode 100644
index 5f3355e734..0000000000
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for model_coverage_lib.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-
-from tensorflow.contrib.lite.python import lite
-from tensorflow.contrib.lite.testing.model_coverage import model_coverage_lib as model_coverage
-from tensorflow.python import keras
-from tensorflow.python.client import session
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import saved_model
-from tensorflow.python.training.training_util import write_graph
-
-
-class EvaluateFrozenGraph(test.TestCase):
-
-  def _saveFrozenGraph(self, sess):
-    graph_def_file = os.path.join(self.get_temp_dir(), 'model.pb')
-    write_graph(sess.graph_def, '', graph_def_file, False)
-    return graph_def_file
-
-  def testFloat(self):
-    with session.Session().as_default() as sess:
-      in_tensor = array_ops.placeholder(
-          shape=[1, 16, 16, 3], dtype=dtypes.float32)
-      _ = in_tensor + in_tensor
-    filename = self._saveFrozenGraph(sess)
-
-    model_coverage.test_frozen_graph(filename, ['Placeholder'], ['add'])
-
-  def testMultipleOutputs(self):
-    with session.Session().as_default() as sess:
-      in_tensor_1 = array_ops.placeholder(
-          shape=[1, 16], dtype=dtypes.float32, name='inputA')
-      in_tensor_2 = array_ops.placeholder(
-          shape=[1, 16], dtype=dtypes.float32, name='inputB')
-
-      weight = constant_op.constant(-1.0, shape=[16, 16])
-      bias = constant_op.constant(-1.0, shape=[16])
-      layer = math_ops.matmul(in_tensor_1, weight) + bias
-      _ = math_ops.reduce_mean(math_ops.square(layer - in_tensor_2))
-    filename = self._saveFrozenGraph(sess)
-
-    model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'],
-                                     ['add', 'Mean'])
-
-
-class EvaluateSavedModel(test.TestCase):
-
-  def testFloat(self):
-    saved_model_dir = os.path.join(self.get_temp_dir(), 'simple_savedmodel')
-    with session.Session().as_default() as sess:
-      in_tensor_1 = array_ops.placeholder(
-          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB')
-      in_tensor_2 = array_ops.placeholder(
-          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA')
-      out_tensor = in_tensor_1 + in_tensor_2
-
-      inputs = {'x': in_tensor_1, 'y': in_tensor_2}
-      outputs = {'z': out_tensor}
-      saved_model.simple_save(sess, saved_model_dir, inputs, outputs)
-    model_coverage.test_saved_model(saved_model_dir)
-
-
-class EvaluateKerasModel(test.TestCase):
-
-  def _getSingleInputKerasModel(self):
-    """Returns single input Sequential tf.keras model."""
-    keras.backend.clear_session()
-
-    xs = [-1, 0, 1, 2, 3, 4]
-    ys = [-3, -1, 1, 3, 5, 7]
-
-    model = keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])])
-    model.compile(optimizer='sgd', loss='mean_squared_error')
-    model.train_on_batch(xs, ys)
-    return model
-
-  def _saveKerasModel(self, model):
-    try:
-      fd, keras_file = tempfile.mkstemp('.h5')
-      keras.models.save_model(model, keras_file)
-    finally:
-      os.close(fd)
-    return keras_file
-
-  def testFloat(self):
-    model = self._getSingleInputKerasModel()
-    keras_file = self._saveKerasModel(model)
-
-    model_coverage.test_keras_model(keras_file)
-
-  def testPostTrainingQuantize(self):
-    model = self._getSingleInputKerasModel()
-    keras_file = self._saveKerasModel(model)
-
-    model_coverage.test_keras_model(keras_file, post_training_quantize=True)
-
-  def testConverterMode(self):
-    model = self._getSingleInputKerasModel()
-    keras_file = self._saveKerasModel(model)
-
-    model_coverage.test_keras_model(
-        keras_file, converter_mode=lite.ConverterMode.TOCO_EXTENDED)
-
-
-if __name__ == '__main__':
-  test.main()
-- 
GitLab


From e62cd643839d264659285a273bcf34df1057136e Mon Sep 17 00:00:00 2001
From: Nick Felt <nickfelt@google.com>
Date: Tue, 25 Sep 2018 16:05:40 -0700
Subject: [PATCH 0699/1357] Update tb-nightly dep to >= 1.12.0a0, < 1.13.0a0

Synchronize tf-nightly dep on current tb-nightly: https://pypi.org/project/tb-nightly/1.12.0a20180925/

PiperOrigin-RevId: 214521033
---
 tensorflow/tools/pip_package/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 1481b53920..b95e1f5c87 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -86,7 +86,7 @@ else:
 if 'tf_nightly' in project_name:
   for i, pkg in enumerate(REQUIRED_PACKAGES):
     if 'tensorboard' in pkg:
-      REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.11.0a0, < 1.12.0a0'
+      REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.12.0a0, < 1.13.0a0'
       break
 
 # weakref.finalize and enum were introduced in Python 3.4
-- 
GitLab


From 22776289fbe30ca7f4b1a80d7e23f5bddca391c2 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 25 Sep 2018 16:05:58 -0700
Subject: [PATCH 0700/1357] Add a new pass after convolution rewriter and pad
 insertion, to pattern match convolution forward + relu.

PiperOrigin-RevId: 214521083
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  17 ++
 .../gpu/cudnn_fused_convolution_rewriter.cc   | 278 +++++++++++++++++
 .../gpu/cudnn_fused_convolution_rewriter.h    |  37 +++
 .../xla/service/gpu/nvptx_compiler.cc         |   2 +
 .../compiler/xla/service/gpu/pad_insertion.cc |   6 +-
 .../compiler/xla/service/gpu/tests/BUILD      |  14 +
 .../cudnn_fused_convolution_rewriter_test.cc  | 283 ++++++++++++++++++
 7 files changed, 635 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
 create mode 100644 tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h
 create mode 100644 tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 2775527e0c..51968d13d4 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -655,6 +655,7 @@ cc_library(
     deps = [
         ":cudnn_convolution_algorithm_picker",
         ":cudnn_convolution_rewriter",
+        ":cudnn_fused_convolution_rewriter",
         ":fusion_merger",
         ":gpu_constants",
         ":gpu_copy_insertion",
@@ -967,3 +968,19 @@ tf_cc_test(
         "@com_google_absl//absl/strings",
     ],
 )
+
+cc_library(
+    name = "cudnn_fused_convolution_rewriter",
+    srcs = ["cudnn_fused_convolution_rewriter.cc"],
+    hdrs = ["cudnn_fused_convolution_rewriter.h"],
+    deps = [
+        ":backend_configs",
+        ":ir_emission_utils",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/compiler/xla/service:pattern_matcher",
+        "//tensorflow/core:stream_executor_no_cuda",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
new file mode 100644
index 0000000000..3761c19cfc
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
@@ -0,0 +1,278 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h"
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/pattern_matcher.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+// Describes a matched pattern:
+//   max(0, alpha1 * conv(x, w) + alpha2 * side_input + broadcast(bias));
+// Where side_input has the shape of output buffer, and bias is a 1D array with
+// the dimension of number of output features.
+struct ConvWithRelu {
+  HloInstruction* maximum;
+  HloCustomCallInstruction* conv;
+  HloInstruction* bias;
+  HloInstruction* side_input;
+  HloConstantInstruction* alpha_conv;
+  HloConstantInstruction* alpha_side_input;
+};
+
+absl::optional<ConvWithRelu> FindConvWithRelu(HloInstruction* instr) {
+  using match::Add;
+  using match::AddAnyOrder;
+  using match::AnyOf;
+  using match::Broadcast;
+  using match::Constant;
+  using match::GetTupleElement;
+  using match::Maximum;
+  using match::MultiplyAnyOrder;
+  using match::Op;
+
+  // The pattern we want to match:
+  //   max(0, alpha1 * conv(x, w) + alpha2 * side_input + broadcast(bias));
+  //
+  // With its variants involving commute/reassociation of adds, multiplies, and
+  // max, and omission of alpha1, side_input, alpha2, or bias.
+
+  HloInstruction* relu_input;
+
+  // Match max(0, relu_input).
+  auto zero_pattern = Broadcast(match::ConstantScalar(0));
+  if (!Match(instr, Maximum(zero_pattern, Op(&relu_input))) &&
+      !Match(instr, Maximum(Op(&relu_input), zero_pattern))) {
+    return absl::nullopt;
+  }
+  HloInstruction* conv_instr = nullptr;
+  HloInstruction* alpha_conv_instr = nullptr;
+  HloInstruction* alpha_side_input_instr = nullptr;
+  HloInstruction* bias_broadcast_instr = nullptr;
+  HloInstruction* bias = nullptr;
+  HloInstruction* side_input = nullptr;
+
+  // These nodes will not be in the returned value, but we need to check them
+  // for single use.
+  HloInstruction *gte = nullptr, *add1 = nullptr, *add2 = nullptr,
+                 *mul1 = nullptr, *mul2 = nullptr;
+
+  const auto bias_pattern = Broadcast(&bias_broadcast_instr, Op(&bias));
+  const auto conv_pattern = [&] {
+    auto alpha_pattern = Broadcast(Constant(&alpha_conv_instr));
+    auto conv_pattern = GetTupleElement(
+        &gte, Op(&conv_instr).WithOpcode(HloOpcode::kCustomCall), 0);
+    return AnyOf<HloInstruction>(
+        MultiplyAnyOrder(&mul1, alpha_pattern, conv_pattern), conv_pattern);
+  }();
+  const auto side_input_pattern = [&] {
+    auto alpha_pattern = Broadcast(Constant(&alpha_side_input_instr));
+    // If bias is already matched, match arbitrary additional input as side
+    // input. Note this may force a cheap operation (e.g. broadcast) to be
+    // materialized into a large buffer, as large as the output buffer.
+    //
+    // TODO(timshen): If in practice there are significant false positives, we
+    // should fix it.
+    auto side_input_pattern = Op(&side_input);
+    return AnyOf<HloInstruction>(
+        MultiplyAnyOrder(&mul2, alpha_pattern, side_input_pattern),
+        side_input_pattern);
+  }();
+
+  {
+    // Try to match any of the following form of add, in any association:
+    //   addends[0]
+    //   addends[0] + addends[1]
+    //   addends[0] + addends[1] + addends[2]
+    //
+    // Then try to match each addend with one of the three patterns: bias, conv,
+    // or side_input. Notice that side_input matching must go last, as it
+    // also matches a conv or a bias.
+    HloInstruction* addends[3] = {nullptr, nullptr, nullptr};
+    auto add3_pattern = [&] {
+      auto add2_pattern = Add(&add1, Op(&addends[0]), Op(&addends[1]));
+      return AnyOf<HloInstruction>(
+          AddAnyOrder(&add2, add2_pattern, Op(&addends[2])), add2_pattern,
+          Op(&addends[0]));
+    }();
+    CHECK(Match(relu_input, add3_pattern));
+    for (auto addend : addends) {
+      if (addend) {
+        if (bias == nullptr && Match(addend, bias_pattern)) {
+          CHECK(bias);
+        } else if (conv_instr == nullptr && Match(addend, conv_pattern)) {
+          CHECK(conv_instr);
+        } else if (side_input == nullptr && Match(addend, side_input_pattern)) {
+          CHECK(side_input);
+        } else {
+          return absl::nullopt;
+        }
+      }
+    }
+  }
+
+  if (conv_instr == nullptr) {
+    return absl::nullopt;
+  }
+
+  for (HloInstruction* instr :
+       {conv_instr, bias_broadcast_instr, gte, add1, add2, mul1, mul2}) {
+    if (instr && instr->user_count() > 1) {
+      return absl::nullopt;
+    }
+  }
+
+  auto conv = Cast<HloCustomCallInstruction>(conv_instr);
+  auto bias_broadcast =
+      CastOrNull<HloBroadcastInstruction>(bias_broadcast_instr);
+
+  if (conv->custom_call_target() != kCudnnConvForwardCallTarget) {
+    return absl::nullopt;
+  }
+
+  if (bias_broadcast) {
+    // TODO(timshen): handle bias_broadcast_instr->dimensions() == {}.
+    if (bias_broadcast_instr->dimensions().size() != 1) {
+      return absl::nullopt;
+    }
+    if (bias_broadcast_instr->dimensions(0) !=
+        conv->convolution_dimension_numbers().output_feature_dimension()) {
+      return absl::nullopt;
+    }
+  }
+
+  return ConvWithRelu{
+      instr,
+      conv,
+      bias,
+      side_input,
+      CastOrNull<HloConstantInstruction>(alpha_conv_instr),
+      CastOrNull<HloConstantInstruction>(alpha_side_input_instr)};
+}
+
+StatusOr<std::unique_ptr<HloInstruction>> TryRewriteToCudnnForwardRelu(
+    ConvWithRelu match) {
+  auto conv = match.conv;
+
+  HloComputation* computation = conv->parent();
+  PrimitiveType element_type = conv->operand(0)->shape().element_type();
+
+  const auto get_alpha_value =
+      [](HloConstantInstruction* instr) -> StatusOr<double> {
+    TF_ASSIGN_OR_RETURN(
+        auto alpha,
+        Cast<HloConstantInstruction>(instr)->literal().Convert(F64));
+    return alpha.GetFirstElement<double>();
+  };
+
+  double alpha_conv = 1;
+  if (match.alpha_conv) {
+    TF_ASSIGN_OR_RETURN(alpha_conv, get_alpha_value(match.alpha_conv));
+  }
+
+  double alpha_side_input;
+  if (match.side_input) {
+    if (match.alpha_side_input) {
+      TF_ASSIGN_OR_RETURN(alpha_side_input,
+                          get_alpha_value(match.alpha_side_input));
+    } else {
+      alpha_side_input = 1;
+    }
+  } else {
+    CHECK(match.alpha_side_input == nullptr);
+    alpha_side_input = 0;
+  }
+
+  auto bias = match.bias;
+  if (!bias) {
+    auto zero = computation->AddInstruction(
+        HloInstruction::CreateConstant(LiteralUtil::Zero(element_type)));
+
+    int64 num_output_feature = conv->shape().tuple_shapes(0).dimensions(
+        conv->convolution_dimension_numbers().output_feature_dimension());
+    bias = computation->AddInstruction(HloInstruction::CreateBroadcast(
+        ShapeUtil::MakeShapeWithDescendingLayout(element_type,
+                                                 {num_output_feature}),
+        zero, {}));
+  }
+
+  CHECK(bias);
+  std::vector<HloInstruction*> args = {conv->mutable_operand(0),
+                                       conv->mutable_operand(1), bias};
+  if (match.side_input) {
+    args.push_back(match.side_input);
+  }
+  auto new_conv = computation->AddInstruction(HloInstruction::CreateCustomCall(
+      conv->shape(), args, kCudnnConvBiasActivationForwardCallTarget));
+  new_conv->set_window(conv->window());
+  new_conv->set_convolution_dimension_numbers(
+      conv->convolution_dimension_numbers());
+  TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig config,
+                      conv->backend_config<CudnnConvBackendConfig>());
+  config.set_activation_mode(
+      static_cast<int64>(se::dnn::ActivationMode::kRelu));
+  config.set_conv_result_scale(alpha_conv);
+  config.set_side_input_scale(alpha_side_input);
+  TF_RETURN_IF_ERROR(new_conv->set_backend_config(config));
+
+  VLOG(1) << "Rewriting " << conv->name() << " to " << new_conv->name();
+  return HloInstruction::CreateGetTupleElement(conv->shape().tuple_shapes(0),
+                                               new_conv, 0);
+}
+
+}  // namespace
+
+StatusOr<bool> CudnnFusedConvolutionRewriter::Run(HloModule* module) {
+  bool changed = false;
+  for (HloComputation* computation : module->MakeNonfusionComputations()) {
+    std::vector<ConvWithRelu> matches;
+    int num_forward_convs = 0;
+    for (auto instr : computation->instructions()) {
+      auto match = FindConvWithRelu(instr);
+      if (match.has_value()) {
+        matches.push_back(*match);
+      }
+      if (auto call = DynCast<HloCustomCallInstruction>(instr)) {
+        if (call->custom_call_target() == kCudnnConvForwardCallTarget) {
+          num_forward_convs++;
+        }
+      }
+    }
+    VLOG(1) << "Identified cuDNN forward conv + relu: " << matches.size()
+            << " out of " << num_forward_convs << " forward convs.";
+    std::vector<std::pair<HloInstruction*, std::unique_ptr<HloInstruction>>>
+        replacements;
+    for (const ConvWithRelu& match : matches) {
+      TF_ASSIGN_OR_RETURN(auto new_instr, TryRewriteToCudnnForwardRelu(match));
+      replacements.push_back({match.maximum, std::move(new_instr)});
+      changed = true;
+    }
+    for (auto& replacement : replacements) {
+      TF_RETURN_IF_ERROR(computation->ReplaceWithNewInstruction(
+          replacement.first, std::move(replacement.second)));
+    }
+  }
+  return changed;
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h
new file mode 100644
index 0000000000..bd12aadded
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h
@@ -0,0 +1,37 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+namespace gpu {
+
+class CudnnFusedConvolutionRewriter : public HloModulePass {
+ public:
+  absl::string_view name() const override {
+    return "cudnn-fused-convolution-rewriter";
+  }
+
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 01a18f4f8e..0b3b429710 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -40,6 +40,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
@@ -208,6 +209,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/false,
                                               /*allow_mixed_precision=*/false);
     pipeline.AddPass<CudnnConvolutionRewriter>();
+    pipeline.AddPass<CudnnFusedConvolutionRewriter>();
     pipeline.AddPass<PadInsertion>();
     if (IsVoltaOrLater(*stream_exec)) {
       pipeline.AddPass<PadForTensorCores>();
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index 7e77dc9ac6..b42a19e3a2 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -30,7 +30,8 @@ namespace gpu {
 
 namespace {
 bool IsForwardConvolutionCanonical(const HloInstruction& conv) {
-  CHECK_EQ(conv.custom_call_target(), kCudnnConvForwardCallTarget);
+  CHECK(conv.custom_call_target() == kCudnnConvForwardCallTarget ||
+        conv.custom_call_target() == kCudnnConvBiasActivationForwardCallTarget);
   return window_util::HasSymmetricPadding(conv.window()) &&
          !window_util::HasNegativePadding(conv.window()) &&
          !window_util::HasDilation(conv.window());
@@ -385,7 +386,8 @@ StatusOr<bool> PadInsertion::RunOnComputation(HloComputation* computation) {
   }
   for (HloInstruction* instruction : convs) {
     const auto& target = instruction->custom_call_target();
-    if (target == kCudnnConvForwardCallTarget) {
+    if (target == kCudnnConvForwardCallTarget ||
+        target == kCudnnConvBiasActivationForwardCallTarget) {
       changed |= CanonicalizeForwardConvolution(instruction);
     } else if (target == kCudnnConvBackwardFilterCallTarget) {
       changed |= CanonicalizeBackwardFilterConvolution(instruction);
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index 5da6f232d5..a725533567 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -209,3 +209,17 @@ tf_cc_test(
         "//tensorflow/core:test_main",
     ],
 )
+
+tf_cc_test(
+    name = "cudnn_fused_convolution_rewriter_test",
+    srcs = ["cudnn_fused_convolution_rewriter_test.cc"],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc
new file mode 100644
index 0000000000..5632cac186
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc
@@ -0,0 +1,283 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "absl/strings/str_replace.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+class CudnnFusedConvolutionRewriterTest : public HloTestBase {
+ protected:
+  string GetOptimizedHlo(absl::string_view hlo_string) {
+    return backend()
+        .compiler()
+        ->RunHloPasses(ParseHloString(hlo_string, GetModuleConfigForTest())
+                           .ConsumeValueOrDie(),
+                       backend().default_stream_executor(),
+                       backend().memory_allocator())
+        .ConsumeValueOrDie()
+        ->ToString();
+  }
+
+  void TestMatchWithAllTypes(absl::string_view hlo_string) {
+    for (absl::string_view type : {"f16", "f32", "f64"}) {
+      const string hlo_with_new_type =
+          absl::StrReplaceAll(hlo_string, {{"TYPE", type}});
+      const string optimized_hlo_string = GetOptimizedHlo(hlo_with_new_type);
+      EXPECT_EQ(absl::string_view::npos,
+                optimized_hlo_string.find("__cudnn$convForward"))
+          << optimized_hlo_string;
+      EXPECT_NE(absl::string_view::npos,
+                optimized_hlo_string.find("__cudnn$convBiasActivationForward"))
+          << optimized_hlo_string;
+      EXPECT_TRUE(RunAndCompare(hlo_with_new_type, ErrorSpec{0.01}))
+          << optimized_hlo_string;
+    }
+  }
+
+  void TestNotMatchWithAllTypes(absl::string_view hlo_string) {
+    for (absl::string_view type : {"f16", "f32", "f64"}) {
+      const string hlo_with_new_type =
+          absl::StrReplaceAll(hlo_string, {{"TYPE", type}});
+      string optimized_hlo = GetOptimizedHlo(hlo_with_new_type);
+      EXPECT_NE(absl::string_view::npos,
+                optimized_hlo.find("__cudnn$convForward"))
+          << optimized_hlo;
+      EXPECT_EQ(absl::string_view::npos,
+                optimized_hlo.find("__cudnn$convBiasActivationForward"))
+          << optimized_hlo;
+    }
+  }
+};
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestConvOnly) {
+  // max(0, conv(x, w));
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,32,9,9] broadcast(zero), dimensions={}
+
+      input = TYPE[1,17,9,9] parameter(0)
+      filter = TYPE[3,3,17,32] parameter(1)
+
+      conv = TYPE[1,32,9,9] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_01io->bf01, feature_group_count=1
+      ROOT relu = TYPE[1,32,9,9] maximum(zeros, conv)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestBias) {
+  // max(0, conv(x, w) + bias);
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,3,3,64] broadcast(zero), dimensions={}
+
+      input = TYPE[1,3,3,64] parameter(0)
+      filter = TYPE[3,3,64,64] parameter(1)
+      bias = TYPE[64] parameter(2)
+
+      conv = TYPE[1,3,3,64] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, feature_group_count=1
+      broadcasted_bias = TYPE[1,3,3,64] broadcast(bias), dimensions={3}
+      add1 = TYPE[1,3,3,64] add(conv, broadcasted_bias)
+      ROOT relu = TYPE[1,3,3,64] maximum(zeros, add1)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestSideInputOnly) {
+  // max(0, conv(x, w) + side_input);
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,3,3,64] broadcast(zero), dimensions={}
+
+      input = TYPE[1,3,3,64] parameter(0)
+      filter = TYPE[3,3,64,64] parameter(1)
+      side_input = TYPE[1,3,3,64] parameter(2)
+
+      conv = TYPE[1,3,3,64] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, feature_group_count=1
+      add1 = TYPE[1,3,3,64] add(conv, side_input)
+      ROOT relu = TYPE[1,3,3,64] maximum(zeros, add1)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestBiasAndSideInput) {
+  // max(0, conv(x, w) + side_input + bias);
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,3,3,64] broadcast(zero), dimensions={}
+
+      input = TYPE[1,3,3,64] parameter(0)
+      filter = TYPE[3,3,64,64] parameter(1)
+      side_input = TYPE[1,3,3,64] parameter(2)
+      bias = TYPE[64] parameter(3)
+
+      conv = TYPE[1,3,3,64] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, feature_group_count=1
+      broadcasted_bias = TYPE[1,3,3,64] broadcast(bias), dimensions={3}
+      add1 = TYPE[1,3,3,64] add(conv, broadcasted_bias)
+      add2 = TYPE[1,3,3,64] add(add1, side_input)
+      ROOT relu = TYPE[1,3,3,64] maximum(zeros, add2)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConv) {
+  // max(0, 0.999994934 * conv(x, w));
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,32,9,9] broadcast(zero), dimensions={}
+      alpha_conv_scalar = TYPE[] constant(0.999994934)
+
+      input = TYPE[1,17,9,9] parameter(0)
+      filter = TYPE[3,3,17,32] parameter(1)
+
+      conv = TYPE[1,32,9,9] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_01io->bf01, feature_group_count=1
+      alpha_conv = TYPE[1,32,9,9] broadcast(alpha_conv_scalar), dimensions={}
+      scaled_conv = TYPE[1,32,9,9] multiply(conv, alpha_conv)
+      ROOT relu = TYPE[1,32,9,9] maximum(zeros, scaled_conv)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndSideInput) {
+  // max(0, conv(x, w) + 0.899994934 * side_input);
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,3,3,64] broadcast(zero), dimensions={}
+      alpha_side_input_scalar = TYPE[] constant(0.899994934)
+      alpha_side_input = TYPE[1,3,3,64] broadcast(alpha_side_input_scalar), dimensions={}
+
+      input = TYPE[1,3,3,64] parameter(0)
+      filter = TYPE[3,3,64,64] parameter(1)
+      side_input = TYPE[1,3,3,64] parameter(2)
+
+      conv = TYPE[1,3,3,64] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, feature_group_count=1
+      scaled_side_input = TYPE[1,3,3,64] multiply(side_input, alpha_side_input)
+      add1 = TYPE[1,3,3,64] add(conv, scaled_side_input)
+      ROOT relu = TYPE[1,3,3,64] maximum(zeros, add1)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndScaledSideInput) {
+  // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input);
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,3,3,64] broadcast(zero), dimensions={}
+      alpha_conv_scalar = TYPE[] constant(0.999994934)
+      alpha_conv = TYPE[1,3,3,64] broadcast(alpha_conv_scalar), dimensions={}
+      alpha_side_input_scalar = TYPE[] constant(0.899994934)
+      alpha_side_input = TYPE[1,3,3,64] broadcast(alpha_side_input_scalar), dimensions={}
+
+      input = TYPE[1,3,3,64] parameter(0)
+      filter = TYPE[3,3,64,64] parameter(1)
+      side_input = TYPE[1,3,3,64] parameter(2)
+
+      conv = TYPE[1,3,3,64] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, feature_group_count=1
+      scaled_conv = TYPE[1,3,3,64] multiply(conv, alpha_conv)
+      scaled_side_input = TYPE[1,3,3,64] multiply(side_input, alpha_side_input)
+      add1 = TYPE[1,3,3,64] add(scaled_conv, scaled_side_input)
+      ROOT relu = TYPE[1,3,3,64] maximum(zeros, add1)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest,
+       TestScaledConvAndScaledSideInputWithBias) {
+  // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input + bias);
+  TestMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,3,3,64] broadcast(zero), dimensions={}
+      alpha_conv_scalar = TYPE[] constant(0.999994934)
+      alpha_conv = TYPE[1,3,3,64] broadcast(alpha_conv_scalar), dimensions={}
+      alpha_side_input_scalar = TYPE[] constant(0.899994934)
+      alpha_side_input = TYPE[1,3,3,64] broadcast(alpha_side_input_scalar), dimensions={}
+
+      input = TYPE[1,3,3,64] parameter(0)
+      filter = TYPE[3,3,64,64] parameter(1)
+      side_input = TYPE[1,3,3,64] parameter(2)
+      bias = TYPE[64] parameter(3)
+
+      conv = TYPE[1,3,3,64] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, feature_group_count=1
+      scaled_conv = TYPE[1,3,3,64] multiply(conv, alpha_conv)
+      scaled_side_input = TYPE[1,3,3,64] multiply(side_input, alpha_side_input)
+      broadcasted_bias = TYPE[1,3,3,64] broadcast(bias), dimensions={3}
+      add1 = TYPE[1,3,3,64] add(scaled_conv, broadcasted_bias)
+      add2 = TYPE[1,3,3,64] add(add1, scaled_side_input)
+      ROOT relu = TYPE[1,3,3,64] maximum(zeros, add2)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchMaxZeroOnly) {
+  // max(0.1, conv(x, w)) shouldn't match.
+  TestNotMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      point_one = TYPE[] constant(0.1)
+      point_ones = TYPE[1,32,9,9] broadcast(point_one), dimensions={}
+
+      input = TYPE[1,17,9,9] parameter(0)
+      filter = TYPE[3,3,17,32] parameter(1)
+
+      conv = TYPE[1,32,9,9] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_01io->bf01, feature_group_count=1
+      ROOT relu = TYPE[1,32,9,9] maximum(point_ones, conv)
+    })");
+}
+
+TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchBroadcastedBiasOnly) {
+  // max(0, conv(x, w) + side_input1 + side_input2) shouldn't match.
+  TestNotMatchWithAllTypes(R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = TYPE[] constant(0)
+      zeros = TYPE[1,3,3,64] broadcast(zero), dimensions={}
+
+      input = TYPE[1,3,3,64] parameter(0)
+      filter = TYPE[3,3,64,64] parameter(1)
+      side_input1 = TYPE[1,3,3,64] parameter(2)
+      side_input2 = TYPE[1,3,3,64] parameter(3)
+
+      conv = TYPE[1,3,3,64] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, feature_group_count=1
+      add1 = TYPE[1,3,3,64] add(conv, side_input2)
+      add2 = TYPE[1,3,3,64] add(add1, side_input1)
+      ROOT relu = TYPE[1,3,3,64] maximum(zeros, add2)
+    })");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
-- 
GitLab


From 4b780e46dcb29a1fb7a3ab81d95b3f8376101989 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Tue, 25 Sep 2018 16:08:24 -0700
Subject: [PATCH 0701/1357] Remove unneeded locks in session logging.

PiperOrigin-RevId: 214521486
---
 tensorflow/python/client/session_ref.cc | 40 +++++++++++++++----------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/client/session_ref.cc b/tensorflow/python/client/session_ref.cc
index b2300df0b6..4d361612b7 100644
--- a/tensorflow/python/client/session_ref.cc
+++ b/tensorflow/python/client/session_ref.cc
@@ -93,23 +93,35 @@ class SessionLogger {
  public:
   SessionLogger() {
     std::string log_name = getenv("TF_REPLAY_LOG_FILE");
+    LOG(INFO) << "Constructing new session logger for " << log_name;
     TF_CHECK_OK(
         Env::Default()->RecursivelyCreateDir(string(io::Dirname(log_name))));
     Env::Default()->DeleteFile(log_name).IgnoreError();
-    TF_CHECK_OK(Env::Default()->NewWritableFile(log_name, &log_file_));
 
+    TF_CHECK_OK(Env::Default()->NewWritableFile(log_name, &log_file_));
     log_writer_ = absl::make_unique<io::RecordWriter>(log_file_.get());
   }
 
-  Status RecordCreateSession(Session* session) {
-    LOG(INFO) << "Capturing devices for session.";
+  ~SessionLogger() {
+    log_writer_->Close().IgnoreError();
+    log_writer_.release();
+    log_file_->Close().IgnoreError();
+  }
+
+  Status RecordNewSession(Session* session) {
+    LOG(INFO) << "New session discovered.  Capturing devices...";
     ReplayOp op;
     NewReplaySession* req = op.mutable_new_replay_session();
 
     std::vector<DeviceAttributes> devices;
-    TF_CHECK_OK(session->ListDevices(&devices));
-    for (const DeviceAttributes& dev : devices) {
-      *req->mutable_devices()->add_local_device() = dev;
+    Status status = session->ListDevices(&devices);
+    if (status.ok()) {
+      LOG(INFO) << "Found: " << devices.size() << " devices.";
+      for (const DeviceAttributes& dev : devices) {
+        *req->mutable_devices()->add_local_device() = dev;
+      }
+    } else {
+      LOG(WARNING) << "Failed to list devices on session. Continuing.";
     }
 
     req->set_session_handle(SessionToHandle(session));
@@ -226,7 +238,6 @@ class SessionLogger {
 
   // N.B. RunOptions is not stored (it has no entry in CloseRequest)
   Status RecordClose(Session* session, const RunOptions& run_options) {
-    mutex_lock l(log_mutex_);
     ReplayOp op;
     CloseSessionRequest* req = op.mutable_close_session();
     req->set_session_handle(SessionToHandle(session));
@@ -241,7 +252,6 @@ class SessionLogger {
 
   Status RecordListDevices(Session* session,
                            std::vector<DeviceAttributes>* response) {
-    mutex_lock l(log_mutex_);
     ReplayOp op;
     ListDevicesRequest* req = op.mutable_list_devices();
     ListDevicesResponse* resp = op.mutable_list_devices_response();
@@ -258,7 +268,6 @@ class SessionLogger {
                          const std::vector<string>& output_names,
                          const std::vector<string>& target_nodes,
                          string* handle) {
-    mutex_lock l(log_mutex_);
     ReplayOp op;
     PartialRunSetupRequest* req = op.mutable_partial_run_setup();
     req->set_session_handle(SessionToHandle(session));
@@ -362,18 +371,19 @@ class SessionLogger {
 
  private:
   Status Flush(const ReplayOp& op) {
+    mutex_lock l(log_mutex_);
+
     string buf;
     op.SerializeToString(&buf);
     TF_RETURN_IF_ERROR(log_writer_->WriteRecord(buf));
 
-    // Flushing the RecordWriter _does not_ flush the underlying file.
-    TF_RETURN_IF_ERROR(log_writer_->Flush());
-    return log_file_->Flush();
+    // TODO(b/116624106): Not all file-systems respect calls to `Sync()`
+    return log_file_->Sync();
   }
 
-  mutex log_mutex_;
-  std::unique_ptr<io::RecordWriter> log_writer_;
   std::unique_ptr<WritableFile> log_file_;
+  std::unique_ptr<io::RecordWriter> log_writer_;
+  mutex log_mutex_;
 };
 
 static SessionLogger* global_session_logger() {
@@ -384,7 +394,7 @@ static SessionLogger* global_session_logger() {
 SessionRef::SessionRef(Session* session) : session_(session) {
   if (getenv("TF_REPLAY_LOG_FILE") != nullptr) {
     logger_ = global_session_logger();
-    logger_->RecordCreateSession(this->session_.get()).IgnoreError();
+    logger_->RecordNewSession(this->session_.get()).IgnoreError();
   } else {
     logger_ = nullptr;
   }
-- 
GitLab


From 4177bc92c3b9301877521ba9b26377b80fa27601 Mon Sep 17 00:00:00 2001
From: Goutham Bhat <goutham@google.com>
Date: Tue, 25 Sep 2018 16:57:18 -0700
Subject: [PATCH 0702/1357] Add a caveat about using early stopping library in
 distributed mode.

PiperOrigin-RevId: 214528768
---
 .../python/estimator/early_stopping.py        | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tensorflow/contrib/estimator/python/estimator/early_stopping.py b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
index e6e25e319f..cafe8279c7 100644
--- a/tensorflow/contrib/estimator/python/estimator/early_stopping.py
+++ b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
@@ -57,6 +57,13 @@ def make_early_stopping_hook(estimator,
   tf.estimator.train_and_evaluate(estimator, train_spec, ...)
   ```
 
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
   Args:
     estimator: A `tf.estimator.Estimator` instance.
     should_stop_fn: `callable`, function that takes no arguments and returns a
@@ -109,6 +116,13 @@ def stop_if_higher_hook(estimator,
   tf.estimator.train_and_evaluate(estimator, train_spec, ...)
   ```
 
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
   Args:
     estimator: A `tf.estimator.Estimator` instance.
     metric_name: `str`, metric to track. "loss", "accuracy", etc.
@@ -158,6 +172,13 @@ def stop_if_lower_hook(estimator,
   tf.estimator.train_and_evaluate(estimator, train_spec, ...)
   ```
 
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
   Args:
     estimator: A `tf.estimator.Estimator` instance.
     metric_name: `str`, metric to track. "loss", "accuracy", etc.
@@ -207,6 +228,13 @@ def stop_if_no_increase_hook(estimator,
   tf.estimator.train_and_evaluate(estimator, train_spec, ...)
   ```
 
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
   Args:
     estimator: A `tf.estimator.Estimator` instance.
     metric_name: `str`, metric to track. "loss", "accuracy", etc.
@@ -257,6 +285,13 @@ def stop_if_no_decrease_hook(estimator,
   tf.estimator.train_and_evaluate(estimator, train_spec, ...)
   ```
 
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
   Args:
     estimator: A `tf.estimator.Estimator` instance.
     metric_name: `str`, metric to track. "loss", "accuracy", etc.
-- 
GitLab


From 05d103bf25110157c34b9ea6420061a23aa6d4ec Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Tue, 25 Sep 2018 17:20:42 -0700
Subject: [PATCH 0703/1357] [XLA:GPU] Pad convolution features of size 3 up to
 4.

PiperOrigin-RevId: 214532043
---
 .../xla/service/gpu/pad_for_tensor_cores.cc   | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
index 2d270f630b..e3869b5c36 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
@@ -37,15 +37,32 @@ static constexpr int64 kDesiredNumFeaturesFactor = 8;
 // there's additional room for speedups.  Achieving those speedups without also
 // slowing other things down will likely require a more sophisticated heuristic,
 // possibly some form of auto-tuning.
-static constexpr double kMaxBytesTouchedIncrease = 1.2;
+//
+// This value should be >= 4/3, otherwise the "dims of size 3 padded up to 4"
+// special case inside PadShape won't fire.
+static constexpr double kMaxBytesTouchedIncrease = 1.35;
 
 // Pads the given dimensions in the given shape up to a multiple of
 // kDesiredNumFeaturesFactor.
 static Shape PadShape(Shape s, absl::Span<const int64> dims) {
   for (int64 dim : dims) {
     int64 dim_to_pad_size = s.dimensions(dim);
-    int64 new_dim_to_pad_size =
-        RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor);
+
+    // Round dim_to_pad_size up to the next multiple of
+    // kDesiredNumFeaturesFactor.
+    //
+    // Special case: dims of size 3 are rounded up to 4, not
+    // kDesiredNumFeaturesFactor.  Empirically (and on the advice of nvidia),
+    // this helps, but as of writing, it's not supported by anything in the
+    // cudnn docs.
+    int64 new_dim_to_pad_size;
+    if (dim_to_pad_size == 3) {
+      new_dim_to_pad_size = 4;
+    } else {
+      new_dim_to_pad_size =
+          RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor);
+    }
+
     s.set_dimensions(dim, new_dim_to_pad_size);
   }
   return s;
-- 
GitLab


From f97610daf89572e52912ddc5bf87576cc9e82f66 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 25 Sep 2018 17:25:56 -0700
Subject: [PATCH 0704/1357] Fix dynamic kernels dependencies for python build
 and test rules.

Bazel does not allow python rules to directly depend on c++ rules.
So I have to separately manage static dependencies, unfortunately avoiding
"kernels" option for now.

PiperOrigin-RevId: 214532631
---
 tensorflow/core/BUILD     | 14 ++++++++++++--
 tensorflow/tensorflow.bzl |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 59b7dd04e9..bc0bfb793c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -144,6 +144,7 @@ load(
 )
 load(
     "//tensorflow/core:platform/default/build_config_root.bzl",
+    "if_dynamic_kernels",
     "if_static",
     "tf_cuda_tests_tags",
 )
@@ -1292,8 +1293,8 @@ cc_library(
 
 # This includes implementations of all kernels built into TensorFlow.
 cc_library(
-    name = "all_kernels",
-    visibility = ["//visibility:public"],
+    name = "all_kernels_statically_linked",
+    visibility = ["//visibility:private"],
     deps = [
         "//tensorflow/core/kernels:array",
         "//tensorflow/core/kernels:audio",
@@ -1372,6 +1373,15 @@ cc_library(
     ]),
 )
 
+cc_library(
+    name = "all_kernels",
+    visibility = ["//visibility:public"],
+    deps = if_dynamic_kernels(
+        [],
+        otherwise = [":all_kernels_statically_linked"],
+    ),
+)
+
 tf_cuda_library(
     name = "tensorflow_opensource",
     copts = tf_copts(),
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 689679c838..7ddaf7806e 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1677,7 +1677,7 @@ def py_test(deps = [], data = [], kernels = [], **kwargs):
         deps = select({
             "//conditions:default": deps,
             clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
-        }) + tf_binary_dynamic_kernel_deps(kernels),
+        }),
         data = data + select({
             "//conditions:default": [],
             clean_dep("//tensorflow:no_tensorflow_py_deps"): ["//tensorflow/tools/pip_package:win_pip_package_marker"],
-- 
GitLab


From a7f14807417ea78aee8ea275536902f0aaa94fd4 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 25 Sep 2018 17:26:23 -0700
Subject: [PATCH 0705/1357] Reland "Add Interpreter.Options Java API for
 interpreter configuration"

The original CL broke the InterpreterTest due to use of a newly
deprecated API. This has been fixed, and deprecated API usage in the
samples has also been updated.

PiperOrigin-RevId: 214532691
---
 .../Camera2BasicFragment.java                 | 26 +++---
 .../tflitecamerademo/ImageClassifier.java     | 29 ++++--
 .../org/tensorflow/ovic/OvicClassifier.java   |  2 +-
 .../java/org/tensorflow/lite/Interpreter.java | 93 +++++++++++++------
 .../lite/NativeInterpreterWrapper.java        | 36 ++++---
 .../org/tensorflow/lite/InterpreterTest.java  | 42 +++------
 .../lite/NativeInterpreterWrapperTest.java    |  9 ++
 .../java/org/tensorflow/lite/TestHelper.java  | 15 ---
 8 files changed, 141 insertions(+), 111 deletions(-)

diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
index 4f5662bc2d..3596e42011 100644
--- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
+++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
@@ -58,9 +58,9 @@ import android.view.View;
 import android.view.ViewGroup;
 import android.widget.CompoundButton;
 import android.widget.NumberPicker;
-import android.widget.ToggleButton;
 import android.widget.TextView;
 import android.widget.Toast;
+import android.widget.ToggleButton;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -305,22 +305,24 @@ public class Camera2BasicFragment extends Fragment
     textView = (TextView) view.findViewById(R.id.text);
     toggle = (ToggleButton) view.findViewById(R.id.button);
 
-    toggle.setOnCheckedChangeListener(new CompoundButton.OnCheckedChangeListener() {
-      public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {
-        classifier.setUseNNAPI(isChecked);
-      }
-    });
+    toggle.setOnCheckedChangeListener(
+        new CompoundButton.OnCheckedChangeListener() {
+          public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {
+            backgroundHandler.post(() -> classifier.setUseNNAPI(isChecked));
+          }
+        });
 
     np = (NumberPicker) view.findViewById(R.id.np);
     np.setMinValue(1);
     np.setMaxValue(10);
     np.setWrapSelectorWheel(true);
-    np.setOnValueChangedListener(new NumberPicker.OnValueChangeListener() {
-      @Override
-      public void onValueChange(NumberPicker picker, int oldVal, int newVal){
-        classifier.setNumThreads(newVal);
-      }
-    });
+    np.setOnValueChangedListener(
+        new NumberPicker.OnValueChangeListener() {
+          @Override
+          public void onValueChange(NumberPicker picker, int oldVal, int newVal) {
+            backgroundHandler.post(() -> classifier.setNumThreads(newVal));
+          }
+        });
   }
 
   /** Load the model and labels. */
diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
index 7bb6afd9d8..2d11a57434 100644
--- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
+++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
@@ -59,9 +59,15 @@ public abstract class ImageClassifier {
 
   private static final int DIM_PIXEL_SIZE = 3;
 
-  /* Preallocated buffers for storing image data in. */
+  /** Preallocated buffers for storing image data in. */
   private int[] intValues = new int[getImageSizeX() * getImageSizeY()];
 
+  /** Options for configuring the Interpreter. */
+  private final Interpreter.Options tfliteOptions = new Interpreter.Options();
+
+  /** The loaded TensorFlow Lite model. */
+  private MappedByteBuffer tfliteModel;
+
   /** An instance of the driver class to run model inference with Tensorflow Lite. */
   protected Interpreter tflite;
 
@@ -89,7 +95,8 @@ public abstract class ImageClassifier {
 
   /** Initializes an {@code ImageClassifier}. */
   ImageClassifier(Activity activity) throws IOException {
-    tflite = new Interpreter(loadModelFile(activity));
+    tfliteModel = loadModelFile(activity);
+    tflite = new Interpreter(tfliteModel, tfliteOptions);
     labelList = loadLabelList(activity);
     imgData =
         ByteBuffer.allocateDirect(
@@ -150,20 +157,28 @@ public abstract class ImageClassifier {
     }
   }
 
+  private void recreateInterpreter() {
+    if (tflite != null) {
+      tflite.close();
+      tflite = new Interpreter(tfliteModel, tfliteOptions);
+    }
+  }
+
   public void setUseNNAPI(Boolean nnapi) {
-    if (tflite != null)
-        tflite.setUseNNAPI(nnapi);
+    tfliteOptions.setUseNNAPI(nnapi);
+    recreateInterpreter();
   }
 
-  public void setNumThreads(int num_threads) {
-    if (tflite != null)
-        tflite.setNumThreads(num_threads);
+  public void setNumThreads(int numThreads) {
+    tfliteOptions.setNumThreads(numThreads);
+    recreateInterpreter();
   }
 
   /** Closes tflite to release resources. */
   public void close() {
     tflite.close();
     tflite = null;
+    tfliteModel = null;
   }
 
   /** Reads label list from Assets. */
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
index 4cf51bb0fa..fd610b054f 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
@@ -74,7 +74,7 @@ public class OvicClassifier {
     }
     labelList = loadLabelList(labelInputStream);
     // OVIC uses one thread for CPU inference.
-    tflite = new Interpreter(model, 1);
+    tflite = new Interpreter(model, new Interpreter.Options().setNumThreads(1));
     inputDims = TestHelper.getInputDims(tflite, 0);
     if (inputDims.length != 4) {
       throw new RuntimeException("The model's input dimensions must be 4 (BWHC).");
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index b84720ae8e..ffb04496cb 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -17,7 +17,6 @@ package org.tensorflow.lite;
 
 import java.io.File;
 import java.nio.ByteBuffer;
-import java.nio.MappedByteBuffer;
 import java.util.HashMap;
 import java.util.Map;
 import org.checkerframework.checker.nullness.qual.NonNull;
@@ -56,16 +55,36 @@ import org.checkerframework.checker.nullness.qual.NonNull;
  */
 public final class Interpreter implements AutoCloseable {
 
+  /** An options class for controlling runtime interpreter behavior. */
+  public static class Options {
+    public Options() {}
+
+    /**
+     * Sets the number of threads to be used for ops that support multi-threading. Defaults to a
+     * platform-dependent value.
+     */
+    public Options setNumThreads(int numThreads) {
+      this.numThreads = numThreads;
+      return this;
+    }
+
+    /** Sets whether to use NN API (if available) for op execution. Defaults to false (disabled). */
+    public Options setUseNNAPI(boolean useNNAPI) {
+      this.useNNAPI = useNNAPI;
+      return this;
+    }
+
+    int numThreads = -1;
+    boolean useNNAPI = false;
+  }
+
   /**
    * Initializes a {@code Interpreter}
    *
    * @param modelFile: a File of a pre-trained TF Lite model.
    */
   public Interpreter(@NonNull File modelFile) {
-    if (modelFile == null) {
-      return;
-    }
-    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath());
+    this(modelFile, /*options = */ null);
   }
 
   /**
@@ -73,12 +92,22 @@ public final class Interpreter implements AutoCloseable {
    *
    * @param modelFile: a file of a pre-trained TF Lite model
    * @param numThreads: number of threads to use for inference
+   * @deprecated Prefer using the {@link #Interpreter(File,Options)} constructor. This method will
+   *     be removed in a future release.
    */
+  @Deprecated
   public Interpreter(@NonNull File modelFile, int numThreads) {
-    if (modelFile == null) {
-      return;
-    }
-    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath(), numThreads);
+    this(modelFile, new Options().setNumThreads(numThreads));
+  }
+
+  /**
+   * Initializes a {@code Interpreter} and specifies the number of threads used for inference.
+   *
+   * @param modelFile: a file of a pre-trained TF Lite model
+   * @param options: a set of options for customizing interpreter behavior
+   */
+  public Interpreter(@NonNull File modelFile, Options options) {
+    wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath(), options);
   }
 
   /**
@@ -89,7 +118,7 @@ public final class Interpreter implements AutoCloseable {
    * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
    */
   public Interpreter(@NonNull ByteBuffer byteBuffer) {
-    wrapper = new NativeInterpreterWrapper(byteBuffer);
+    this(byteBuffer, /* options= */ null);
   }
 
   /**
@@ -99,30 +128,25 @@ public final class Interpreter implements AutoCloseable {
    * <p>The ByteBuffer should not be modified after the construction of a {@code Interpreter}. The
    * {@code ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a
    * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
-   */
-  public Interpreter(@NonNull ByteBuffer byteBuffer, int numThreads) {
-    wrapper = new NativeInterpreterWrapper(byteBuffer, numThreads);
-  }
-
-  /**
-   * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file.
    *
-   * <p>The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code
-   * Interpreter}.
+   * @deprecated Prefer using the {@link #Interpreter(ByteBuffer,Options)} constructor. This method
+   *     will be removed in a future release.
    */
-  public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer) {
-    wrapper = new NativeInterpreterWrapper(mappedByteBuffer);
+  @Deprecated
+  public Interpreter(@NonNull ByteBuffer byteBuffer, int numThreads) {
+    this(byteBuffer, new Options().setNumThreads(numThreads));
   }
 
   /**
-   * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file and
-   * specifies the number of threads used for inference.
+   * Initializes a {@code Interpreter} with a {@code ByteBuffer} of a model file and a set of custom
+   * {@link #Options}.
    *
-   * <p>The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code
-   * Interpreter}.
+   * <p>The ByteBuffer should not be modified after the construction of a {@code Interpreter}. The
+   * {@code ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a
+   * direct {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
    */
-  public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer, int numThreads) {
-    wrapper = new NativeInterpreterWrapper(mappedByteBuffer, numThreads);
+  public Interpreter(@NonNull ByteBuffer byteBuffer, Options options) {
+    wrapper = new NativeInterpreterWrapper(byteBuffer, options);
   }
 
   /**
@@ -240,12 +264,25 @@ public final class Interpreter implements AutoCloseable {
     return wrapper.getLastNativeInferenceDurationNanoseconds();
   }
 
-  /** Turns on/off Android NNAPI for hardware acceleration when it is available. */
+  /**
+   * Turns on/off Android NNAPI for hardware acceleration when it is available.
+   *
+   * @deprecated Prefer using {@link Options#setUseNNAPI(boolean)} directly for enabling NN API.
+   *     This method will be removed in a future release.
+   */
+  @Deprecated
   public void setUseNNAPI(boolean useNNAPI) {
     checkNotClosed();
     wrapper.setUseNNAPI(useNNAPI);
   }
 
+  /**
+   * Sets the number of threads to be used for ops that support multi-threading.
+   *
+   * @deprecated Prefer using {@link Options#setNumThreads(int)} directly for controlling thread
+   *     multi-threading. This method will be removed in a future release.
+   */
+  @Deprecated
   public void setNumThreads(int numThreads) {
     checkNotClosed();
     wrapper.setNumThreads(numThreads);
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index fa25082304..6feff9a618 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -23,7 +23,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 /**
- * A wrapper wraps native interpreter and controls model execution.
+ * An internal wrapper that wraps native interpreter and controls model execution.
  *
  * <p><b>WARNING:</b> Resources consumed by the {@code NativeInterpreterWrapper} object must be
  * explicitly freed by invoking the {@link #close()} method when the {@code
@@ -32,36 +32,29 @@ import java.util.Map;
 final class NativeInterpreterWrapper implements AutoCloseable {
 
   NativeInterpreterWrapper(String modelPath) {
-    this(modelPath, /* numThreads= */ -1);
+    this(modelPath, /* options= */ null);
   }
 
-  NativeInterpreterWrapper(String modelPath, int numThreads) {
+  NativeInterpreterWrapper(String modelPath, Interpreter.Options options) {
+    if (options == null) {
+      options = new Interpreter.Options();
+    }
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModel(modelPath, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle, options.numThreads);
     isMemoryAllocated = true;
     inputTensors = new Tensor[getInputCount(interpreterHandle)];
     outputTensors = new Tensor[getOutputCount(interpreterHandle)];
   }
 
-  /**
-   * Initializes a {@code NativeInterpreterWrapper} with a {@code ByteBuffer}. The ByteBuffer should
-   * not be modified after the construction of a {@code NativeInterpreterWrapper}. The {@code
-   * ByteBuffer} can be either a {@code MappedByteBuffer} that memory-maps a model file, or a direct
-   * {@code ByteBuffer} of nativeOrder() that contains the bytes content of a model.
-   */
   NativeInterpreterWrapper(ByteBuffer byteBuffer) {
-    this(byteBuffer, /* numThreads= */ -1);
+    this(byteBuffer, /* options= */ null);
   }
 
-  /**
-   * Initializes a {@code NativeInterpreterWrapper} with a {@code ByteBuffer} and specifies the
-   * number of inference threads. The ByteBuffer should not be modified after the construction of a
-   * {@code NativeInterpreterWrapper}. The {@code ByteBuffer} can be either a {@code
-   * MappedByteBuffer} that memory-maps a model file, or a direct {@code ByteBuffer} of
-   * nativeOrder() that contains the bytes content of a model.
-   */
-  NativeInterpreterWrapper(ByteBuffer buffer, int numThreads) {
+  NativeInterpreterWrapper(ByteBuffer buffer, Interpreter.Options options) {
+    if (options == null) {
+      options = new Interpreter.Options();
+    }
     if (buffer == null
         || (!(buffer instanceof MappedByteBuffer)
             && (!buffer.isDirect() || buffer.order() != ByteOrder.nativeOrder()))) {
@@ -72,10 +65,13 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     modelByteBuffer = buffer;
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle, options.numThreads);
     isMemoryAllocated = true;
     inputTensors = new Tensor[getInputCount(interpreterHandle)];
     outputTensors = new Tensor[getOutputCount(interpreterHandle)];
+    if (options.useNNAPI) {
+      setUseNNAPI(options.useNNAPI);
+    }
   }
 
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index 9070b788b6..dfdd7d22b0 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -54,6 +54,18 @@ public final class InterpreterTest {
     interpreter.close();
   }
 
+  @Test
+  public void testInterpreterWithOptions() throws Exception {
+    Interpreter interpreter =
+        new Interpreter(MODEL_FILE, new Interpreter.Options().setNumThreads(2).setUseNNAPI(true));
+    assertThat(interpreter).isNotNull();
+    assertThat(interpreter.getInputTensorCount()).isEqualTo(1);
+    assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+    assertThat(interpreter.getOutputTensorCount()).isEqualTo(1);
+    assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+    interpreter.close();
+  }
+
   @Test
   public void testRunWithMappedByteBufferModel() throws Exception {
     Path path = MODEL_FILE.toPath();
@@ -303,32 +315,6 @@ public final class InterpreterTest {
     assertThat(index).isEqualTo(0);
   }
 
-  @Test
-  public void testTurnOffNNAPI() throws Exception {
-    Path path = MODEL_FILE.toPath();
-    FileChannel fileChannel =
-        (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ));
-    MappedByteBuffer mappedByteBuffer =
-        fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size());
-    Interpreter interpreter = new Interpreter(mappedByteBuffer);
-    interpreter.setUseNNAPI(true);
-    float[] oneD = {1.23f, 6.54f, 7.81f};
-    float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD};
-    float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD};
-    float[][][][] fourD = {threeD, threeD};
-    float[][][][] parsedOutputs = new float[2][8][8][3];
-    interpreter.run(fourD, parsedOutputs);
-    float[] outputOneD = parsedOutputs[0][0][0];
-    float[] expected = {3.69f, 19.62f, 23.43f};
-    assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder();
-    interpreter.setUseNNAPI(false);
-    interpreter.run(fourD, parsedOutputs);
-    outputOneD = parsedOutputs[0][0][0];
-    assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder();
-    interpreter.close();
-    fileChannel.close();
-  }
-
   @Test
   public void testTurnOnNNAPI() throws Exception {
     Path path = MODEL_FILE.toPath();
@@ -336,8 +322,8 @@ public final class InterpreterTest {
         (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ));
     MappedByteBuffer mappedByteBuffer =
         fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size());
-    Interpreter interpreter = new Interpreter(mappedByteBuffer);
-    interpreter.setUseNNAPI(true);
+    Interpreter interpreter =
+        new Interpreter(mappedByteBuffer, new Interpreter.Options().setUseNNAPI(true));
     float[] oneD = {1.23f, 6.54f, 7.81f};
     float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD};
     float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD};
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
index 9c4a5acd79..270bd6703a 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
@@ -62,6 +62,15 @@ public final class NativeInterpreterWrapperTest {
     wrapper.close();
   }
 
+  @Test
+  public void testConstructorWithOptions() {
+    NativeInterpreterWrapper wrapper =
+        new NativeInterpreterWrapper(
+            FLOAT_MODEL_PATH, new Interpreter.Options().setNumThreads(2).setUseNNAPI(true));
+    assertThat(wrapper).isNotNull();
+    wrapper.close();
+  }
+
   @Test
   public void testConstructorWithInvalidModel() {
     try {
diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java
index 38b740021b..af20e3280b 100644
--- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java
+++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java
@@ -18,21 +18,6 @@ package org.tensorflow.lite;
 /** A helper class for internal tests. */
 public class TestHelper {
 
-  /**
-   * Turns on/off NNAPI of an {@code Interpreter}.
-   *
-   * @param interpreter an instance of {@code Interpreter}. If it is not initialized, an {@code
-   *     IllegalArgumentException} will be thrown.
-   * @param useNNAPI a boolean value indicating to turn on or off NNAPI.
-   */
-  public static void setUseNNAPI(Interpreter interpreter, boolean useNNAPI) {
-    if (interpreter != null && interpreter.wrapper != null) {
-      interpreter.wrapper.setUseNNAPI(useNNAPI);
-    } else {
-      throw new IllegalArgumentException("Interpreter has not initialized; Failed to setUseNNAPI.");
-    }
-  }
-
   /**
    * Gets the last inference duration in nanoseconds. It returns null if there is no previous
    * inference run or the last inference run failed.
-- 
GitLab


From c1e303ed8fa1bf11aaea16e68b14ba2f5ab5dde0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 17:27:27 -0700
Subject: [PATCH 0706/1357] Support dynamic LR for Keras optimizer by setting
 the global Keras session.

PiperOrigin-RevId: 214532827
---
 .../contrib/tpu/python/tpu/keras_support.py   | 564 +++++++++---------
 1 file changed, 294 insertions(+), 270 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 03e06b8142..f67e0e6aca 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -46,12 +46,12 @@ from __future__ import print_function
 
 import abc
 import collections
-import contextlib
 import re
 import sys
 import time
 
 import numpy as np
+import six
 
 from tensorflow.contrib.cluster_resolver.python.training import tpu_cluster_resolver as tpu_cluster_resolver_lib
 from tensorflow.contrib.framework.python.framework import experimental
@@ -90,34 +90,34 @@ from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 
 
-_SESSIONS = {}
-
-
-def tpu_session(cluster_resolver):
+def setup_tpu_session(cluster_resolver):
   """Construct or return a `tf.Session` connected to the given cluster."""
-  global _SESSIONS
   master = cluster_resolver.master()
-  if master not in _SESSIONS:
-    cluster_spec = cluster_resolver.cluster_spec()
-    config = config_pb2.ConfigProto(isolate_session_state=True)
-    if cluster_spec:
-      config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
 
-    logging.info('Connecting to: %s', master)
-    graph = ops.Graph()
-    session = tf_session.Session(graph=graph, target=master, config=config)
-    with graph.as_default():
-      session.run(tpu.initialize_system())
+  # Use the existing session if we're already connected to this TPU
+  if (K.get_session()._target == master and
+      getattr(K.get_session(), '_tpu_initialized', None)):
+    return
+
+  cluster_spec = cluster_resolver.cluster_spec()
+  config = config_pb2.ConfigProto(isolate_session_state=True)
+  if cluster_spec:
+    config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
 
-    _SESSIONS[master] = session
-  return _SESSIONS[master]
+  logging.info('Initialize')
+  tpu_session = tf_session.Session(target=master, config=config)
+  tpu_session.run(tpu.initialize_system())
+  tpu_session._tpu_initialized = True
 
+  # N.B. We have to call `K.set_session()` AND set our session as the
+  # TF default. `K.get_session()` surprisingly does not return the value
+  # supplied by K.set_session otherwise.
+  K.set_session(tpu_session)
 
-def reset_tpu_sessions():
-  _SESSIONS.clear()
 
 try:
   from scipy.sparse import issparse  # pylint: disable=g-import-not-at-top
@@ -134,9 +134,7 @@ def get_tpu_system_metadata(tpu_cluster_resolver):
   cluster_def = cluster_spec.as_cluster_def() if cluster_spec else None
   tpu_system_metadata = (
       tpu_system_metadata_lib._query_tpu_system_metadata(
-          master,
-          cluster_def=cluster_def,
-          query_topology=False))
+          master, cluster_def=cluster_def, query_topology=False))
 
   return tpu_system_metadata
 
@@ -157,6 +155,8 @@ class TPUDistributionStrategy(object):
         replication, typically using all avaiable TPU cores. If overwrites as
         `True`, force the model replication using single core, i.e., no
         replication.
+    Raises:
+      Exception: No TPU Found on the given worker.
     """
 
     if tpu_cluster_resolver is None:
@@ -172,7 +172,8 @@ class TPUDistributionStrategy(object):
     for device in metadata.devices:
       if 'TPU:0' in device.name:
         self._worker_name = worker_re.search(device.name).group(1)
-        break
+        return
+    raise Exception('No TPU found on given worker.')
 
   def _make_assignment_for_model(self, cpu_model):
     """Makes a `TPUAssignment` for the passed in `cpu_model`."""
@@ -183,8 +184,7 @@ class TPUDistributionStrategy(object):
           'Degrading to a single core.')
       num_cores = 1
 
-    return TPUAssignment(
-        worker_name=self._worker_name, num_cores=num_cores)
+    return TPUAssignment(worker_name=self._worker_name, num_cores=num_cores)
 
 
 class TPUAssignment(object):
@@ -280,9 +280,9 @@ class KerasCrossShardOptimizer(keras_optimizers.Optimizer):
     super(KerasCrossShardOptimizer, self).__init__()
     self._name = name
     self._opt = opt
+    logging.info('KerasCrossShard: %s %s', self._opt, self._opt.weights)
 
   def get_updates(self, loss, params):
-    logging.info('Get updates: %s', loss)
     self._opt.get_gradients = self.get_gradients
     return self._opt.get_updates(loss, params)
 
@@ -291,17 +291,15 @@ class KerasCrossShardOptimizer(keras_optimizers.Optimizer):
     grads = super(KerasCrossShardOptimizer, self).get_gradients(loss, params)
     return [tpu_ops.cross_replica_sum(grad) / num_shards for grad in grads]
 
-  def set_weights(self, weights):
-    # TODO(power): Figure out whether we really need this given there is no
-    # caller for this API yet.
-    self._opt.set_weights()
-
   def get_weights(self):
     return self._opt.get_weights()
 
-  @property
-  def lr(self):
-    return self._opt.lr
+  def get_config(self):
+    return self._opt.get_config()
+
+  # Defer remaining operations to the underlying optimizer
+  def __getattr__(self, key):
+    return getattr(self._opt, key)
 
 
 class TPUModelOp(
@@ -327,14 +325,34 @@ def _replicated_optimizer(opt):
     return KerasCrossShardOptimizer(opt)
 
 
-def clone_metrics(metrics):
+def _clone_metrics(metrics):
   """Returns a copy of metrics. A copy is created for stateful metrics."""
   if metrics is None:
     return None
-  return [
-      m.__class__.from_config(m.get_config())
-      if isinstance(m, metrics_module.Metric) else m for m in metrics
-  ]
+  with variable_scope.variable_scope(
+      'metrics', reuse=variable_scope.AUTO_REUSE):
+    return [
+        m.__class__.from_config(m.get_config()) if isinstance(
+            m, metrics_module.Metric) else m for m in metrics
+    ]
+
+
+def _clone_optimizer(optimizer, config=None):
+  """Returns a cloned optimizer with the provided optimizer.config or config."""
+  if not isinstance(optimizer, keras_optimizers.Optimizer):
+    # In the first call to tpu_model(model), Keras may not have wrapped the TF
+    # optimizer in the TFOptimizer helper, e.g., the given model isn't compiled
+    # or optimizer isn't set, and later generated tpu_model compiles with a TF
+    # optimizer.
+    return optimizer
+
+  if isinstance(optimizer, keras_optimizers.TFOptimizer):
+    return keras_optimizers.TFOptimizer(optimizer.optimizer)
+
+  if config is None:
+    config = optimizer.get_config()
+  logging.info('Cloning %s %s', optimizer.__class__.__name__, config)
+  return optimizer.__class__.from_config(config)
 
 
 class TPURewriteContext(object):
@@ -425,6 +443,7 @@ class TPURewriteContext(object):
         return (r, q)
       else:
         raise ValueError('Invalid shape passed to qr: %s' % input_shape)
+
     gen_linalg_ops.qr = qr
 
     ops.name_scope = _name_scope
@@ -440,9 +459,9 @@ class TPURewriteContext(object):
     gen_linalg_ops.qr = self._default_qr
 
 
-class SizedInfeed(collections.namedtuple('SizedInfeed',
-                                         ['sharded_infeed_tensors',
-                                          'infeed_ops'])):
+class SizedInfeed(
+    collections.namedtuple('SizedInfeed',
+                           ['sharded_infeed_tensors', 'infeed_ops'])):
   """Represents an instantiation of the infeed ops for a concrete input shape.
 
   sharded_infeed_tensors: A data structure of Tensors used to represent the
@@ -628,12 +647,13 @@ class TPUNumpyInfeedManager(TPUInfeedManager):
                 infeed_tensors, [spec.shape for spec in input_specs],
                 name='infeed-enqueue-%s-%d' % (execution_mode, shard_id),
                 device_ordinal=shard_id))
-    return SizedInfeed(infeed_ops=infeed_op,
-                       sharded_infeed_tensors=shard_infeed_tensors)
+    return SizedInfeed(
+        infeed_ops=infeed_op, sharded_infeed_tensors=shard_infeed_tensors)
 
 
 class TPUDatasetInfeedManager(TPUInfeedManager):
   """Manages infeed for a `tf.data.Dataset` into a TPU computation.
+
   """
 
   class DatasetInfeedInstance(TPUInfeedInstance):
@@ -651,16 +671,13 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
       return {}
 
   # pylint: disable=redefined-outer-name
-  def __init__(self, dataset, tpu_assignment, tpu_session, mode):
+  def __init__(self, dataset, tpu_assignment, mode):
     """Constructs a TPUDatasetInfeedManager.
 
-    Must be called within a `KerasTPUModel.tpu_session` context!
-
     Args:
       dataset: A `tf.data.Dataset` to infeed.
       tpu_assignment: The `TPUAssignment` used to configure the
         Keras TPU model.
-      tpu_session: The `tf.Session` object used for running the TPU model.
       mode: ModeKeys enum.
     """
     self._verify_dataset_shape(dataset)
@@ -672,7 +689,7 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
     dummy_y_shape = dataset.output_shapes[1].as_list()
     dummy_y_shape[0] *= tpu_assignment.num_towers
     self._iterator = dataset.make_initializable_iterator()
-    tpu_session.run(self._iterator.initializer)
+    K.get_session().run(self._iterator.initializer)
 
     self._get_next_ops = []
     ctrl_deps = []
@@ -685,10 +702,10 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
 
     # Use dummy numpy inputs for the rest of Keras' shape checking. We
     # intercept them when building the model.
-    self._dummy_x = np.zeros(dummy_x_shape,
-                             dtype=dataset.output_types[0].as_numpy_dtype)
-    self._dummy_y = np.zeros(dummy_y_shape,
-                             dtype=dataset.output_types[1].as_numpy_dtype)
+    self._dummy_x = np.zeros(
+        dummy_x_shape, dtype=dataset.output_types[0].as_numpy_dtype)
+    self._dummy_y = np.zeros(
+        dummy_y_shape, dtype=dataset.output_types[1].as_numpy_dtype)
 
     input_specs = []
     if isinstance(self._iterator.output_shapes, tuple):
@@ -719,9 +736,8 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
       raise ValueError('The dataset must return a tuple of tf.Tensors, '
                        'instead it returns: %s' % dataset.output_classes)
     if len(dataset.output_classes) != 2:
-      raise ValueError(
-          'The dataset must return a 2-element tuple, got '
-          '%s output classes instead.' % (dataset.output_classes,))
+      raise ValueError('The dataset must return a 2-element tuple, got '
+                       '%s output classes instead.' % (dataset.output_classes,))
     for i, cls in enumerate(dataset.output_classes):
       if cls != ops.Tensor:
         raise ValueError('The dataset returned a non-Tensor type (%s) at '
@@ -730,8 +746,7 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
       if not shape:
         raise ValueError('The dataset returns a scalar tensor in '
                          'tuple index %d. Did you forget to batch? '
-                         '(Output shapes: %s).' % (i,
-                                                   dataset.output_shapes))
+                         '(Output shapes: %s).' % (i, dataset.output_shapes))
       for j, dim in enumerate(shape):
         if dim.value is None:
           if j == 0:
@@ -771,8 +786,8 @@ class TPUDatasetInfeedManager(TPUInfeedManager):
                 [spec.shape for spec in input_specs],
                 name='infeed-enqueue-%s-%d' % (execution_mode, shard_id),
                 device_ordinal=shard_id))
-    return SizedInfeed(infeed_ops=infeed_ops,
-                       sharded_infeed_tensors=shard_infeed_tensors)
+    return SizedInfeed(
+        infeed_ops=infeed_ops, sharded_infeed_tensors=shard_infeed_tensors)
 
 
 def _inject_tpu_inputs_for_dataset(tpu_assignment, mode,
@@ -858,12 +873,7 @@ class TPUFunction(object):
     self._tpu_assignment = tpu_assignment
     self._compilation_cache = {}
     self._cloned_model = None
-
-    # Copy optimizer configuration.  This is done prior to `_specialize_model`
-    # as the configuration may require evaluating variables in the CPU session.
-    self._optimizer_config = None
-    if not isinstance(self.model.optimizer, keras_optimizers.TFOptimizer):
-      self._optimizer_config = self.model.optimizer.get_config()
+    self._cloned_optimizer = None
 
   def _specialize_model(self, input_specs, infeed_manager):
     """Specialize `self.model` (a Keras model) for the given input shapes."""
@@ -909,53 +919,51 @@ class TPUFunction(object):
           tpu_targets.append(tensor)
 
       # Clone our CPU model, running within the TPU device context.
+      #
+      # We use the id of the original model as a key to avoid weight collisions
+      # (if a user re-runs the same model multiple times, in e.g. Colab).
       with TPURewriteContext(tpu_input_map):
-        with variable_scope.variable_scope('tpu_model_%s' % id(self.model)):
+        with variable_scope.variable_scope('tpu_%s' % id(self.model)):
           with keras_tpu_variables.replicated_scope(
               self._tpu_assignment.num_towers):
-            self._cloned_model = models.clone_model(self.model)
+            if not self._cloned_optimizer:
+              self._cloned_optimizer = _clone_optimizer(
+                  self.model.cpu_optimizer)
 
-      # When running on more than one core, concatenate outputs at the end of
-      # processing. In backprop stage, the gradients will be calculdated
-      # according to the local inputs as gradient of cross-replica-concat being
-      # zero for any outputs other than those from mlocal core so the loss
-      # calculation is identical.
-      num_towers = self.model._tpu_assignment.num_towers
-      if num_towers > 1 and (is_training or is_test):
-        new_outputs = [
-            _cross_replica_concat(
-                o, core_id, num_towers, name='model output ({})'.format(o.name))
-            for o in self._cloned_model.outputs
-        ]
-        self._cloned_model.outputs = new_outputs
-        tpu_targets = [
-            _cross_replica_concat(
-                tensor,
-                core_id,
-                num_towers,
-                name='model target ({})'.format(tensor.name))
-            for tensor in tpu_targets
-        ]
-
-      # Create a copy of the optimizer for this graph.
-      if isinstance(self.model.optimizer, keras_optimizers.TFOptimizer):
-        cloned_optimizer = keras_optimizers.TFOptimizer(
-            self.model.optimizer.optimizer)
-      else:
-        logging.info('Cloning %s %s', self.model.optimizer.__class__.__name__,
-                     self._optimizer_config)
-        cloned_optimizer = self.model.optimizer.__class__.from_config(
-            self._optimizer_config)
+            self._cloned_model = models.clone_model(self.model)
 
-      if is_training or is_test:
-        self._cloned_model.compile(
-            optimizer=_replicated_optimizer(cloned_optimizer),
-            loss=self.model.loss,
-            loss_weights=self.model.loss_weights,
-            metrics=clone_metrics(self.model.metrics),
-            weighted_metrics=clone_metrics(self.model.weighted_metrics),
-            target_tensors=tpu_targets,
-        )
+            # When running on more than one core, concatenate outputs at the end
+            # of processing. In backprop stage, the gradients will be
+            # calculdated according to the local inputs as gradient of
+            # cross-replica-concat being zero for any outputs other than those
+            # from mlocal core so the loss calculation is identical.
+            num_towers = self.model._tpu_assignment.num_towers
+            if num_towers > 1 and (is_training or is_test):
+              new_outputs = [
+                  _cross_replica_concat(
+                      o, core_id, num_towers,
+                      name='model output ({})'.format(o.name))
+                  for o in self._cloned_model.outputs
+              ]
+              self._cloned_model.outputs = new_outputs
+              tpu_targets = [
+                  _cross_replica_concat(
+                      tensor,
+                      core_id,
+                      num_towers,
+                      name='model target ({})'.format(tensor.name))
+                  for tensor in tpu_targets
+              ]
+
+            if is_training or is_test:
+              self._cloned_model.compile(
+                  optimizer=_replicated_optimizer(self._cloned_optimizer),
+                  loss=self.model.loss,
+                  loss_weights=self.model.loss_weights,
+                  metrics=_clone_metrics(self.model.metrics),
+                  weighted_metrics=_clone_metrics(self.model.weighted_metrics),
+                  target_tensors=tpu_targets,
+              )
 
       # Compute our outfeed depending on the execution mode
       if is_training:
@@ -1089,15 +1097,14 @@ class TPUFunction(object):
     # unique input shape.
     shape_key = tuple([tuple(spec.shape.as_list()) for spec in input_specs])
     if shape_key not in self._compilation_cache:
-      with self.model.tpu_session():
-        logging.info(
-            'New input shapes; (re-)compiling: mode=%s '
-            '(# of cores %d), %s', self.execution_mode,
-            self._tpu_assignment.num_towers, input_specs)
-        new_tpu_model_ops = self._specialize_model(input_specs,
-                                                   infeed_manager)
-        self._compilation_cache[shape_key] = new_tpu_model_ops
-        self._test_model_compiles(new_tpu_model_ops)
+      logging.info(
+          'New input shapes; (re-)compiling: mode=%s '
+          '(# of cores %d), %s', self.execution_mode,
+          self._tpu_assignment.num_towers, input_specs)
+      new_tpu_model_ops = self._specialize_model(input_specs,
+                                                 infeed_manager)
+      self._compilation_cache[shape_key] = new_tpu_model_ops
+      self._test_model_compiles(new_tpu_model_ops)
 
     return self._compilation_cache[shape_key]
 
@@ -1195,11 +1202,10 @@ class TPUFunction(object):
     # Initialize our TPU weights on the first compile.
     self.model._initialize_weights(self._cloned_model)
 
-    with self.model.tpu_session() as session:
-      _, _, outfeed_outputs = session.run([
-          tpu_model_ops.infeed_op, tpu_model_ops.execute_op,
-          tpu_model_ops.outfeed_op
-      ], infeed_dict)
+    _, _, outfeed_outputs = K.get_session().run([
+        tpu_model_ops.infeed_op, tpu_model_ops.execute_op,
+        tpu_model_ops.outfeed_op
+    ], infeed_dict)
     return self._process_outputs(outfeed_outputs)
 
   def pipeline_run(self, cur_step_inputs, next_step_inputs):
@@ -1231,8 +1237,8 @@ class TPUFunction(object):
     next_step_infeed_manager = self._lookup_infeed_manager(next_step_inputs)
     cur_step_infeed_manager = self._lookup_infeed_manager(cur_step_inputs)
 
-    if (next_step_infeed_manager is not None
-        and cur_step_infeed_manager is not None):
+    if (next_step_infeed_manager is not None and
+        cur_step_infeed_manager is not None):
       assert type(next_step_infeed_manager) is type(cur_step_infeed_manager)
 
     next_input_tensors, next_step_inputs = (
@@ -1257,14 +1263,12 @@ class TPUFunction(object):
     infeed_dict = None
 
     if cur_infeed_instance and cur_input_tensors and cur_step_infeed_manager:
-      cur_input_specs = cur_infeed_instance.make_input_specs(
-          cur_input_tensors)
+      cur_input_specs = cur_infeed_instance.make_input_specs(cur_input_tensors)
       cur_tpu_model_ops = self._tpu_model_ops_for_input_specs(
           cur_input_specs, cur_step_infeed_manager)
 
-    if (next_infeed_instance
-        and next_input_tensors
-        and next_step_infeed_manager):
+    if (next_infeed_instance and next_input_tensors and
+        next_step_infeed_manager):
       next_input_specs = next_infeed_instance.make_input_specs(
           next_input_tensors)
       next_tpu_model_ops = self._tpu_model_ops_for_input_specs(
@@ -1275,26 +1279,24 @@ class TPUFunction(object):
     self.model._initialize_weights(self._cloned_model)
 
     if next_tpu_model_ops and cur_tpu_model_ops:
-      with self.model.tpu_session() as session:
-        _, _, outfeed_outputs = session.run([
-            next_tpu_model_ops.infeed_op, cur_tpu_model_ops.execute_op,
-            cur_tpu_model_ops.outfeed_op
-        ], infeed_dict)
+      _, _, outfeed_outputs = K.get_session().run([
+          next_tpu_model_ops.infeed_op, cur_tpu_model_ops.execute_op,
+          cur_tpu_model_ops.outfeed_op
+      ], infeed_dict)
       return self._process_outputs(outfeed_outputs)
+
     if cur_tpu_model_ops:
-      with self.model.tpu_session() as session:
-        _, outfeed_outputs = session.run([
-            cur_tpu_model_ops.execute_op, cur_tpu_model_ops.outfeed_op])
+      _, outfeed_outputs = K.get_session().run(
+          [cur_tpu_model_ops.execute_op, cur_tpu_model_ops.outfeed_op])
       return self._process_outputs(outfeed_outputs)
+
     if next_tpu_model_ops:
-      with self.model.tpu_session() as session:
-        session.run(next_tpu_model_ops.infeed_op, infeed_dict)
+      K.get_session().run(next_tpu_model_ops.infeed_op, infeed_dict)
       return None
     raise RuntimeError('Internal error: both current & next tpu_model_ops '
                        'were None')
 
 
-
 class KerasTPUModel(models.Model):
   """TPU compatible Keras model wrapper."""
 
@@ -1321,8 +1323,6 @@ class KerasTPUModel(models.Model):
     self._tpu_model = None
     self._tpu_weights_initialized = False
 
-    self._session = tpu_session(cluster_resolver)
-
     # If the input CPU model has already been compiled, compile our TPU model
     # immediately.
     if self._cpu_model.optimizer:
@@ -1359,15 +1359,20 @@ class KerasTPUModel(models.Model):
     if target_tensors:
       raise ValueError('target_tensors is not supported for TPU execution.')
 
+    self._cpu_model.compile(
+        _clone_optimizer(optimizer),
+        loss,
+        _clone_metrics(metrics),
+        loss_weights,
+        sample_weight_mode,
+        _clone_metrics(weighted_metrics),
+        target_tensors,
+        **kwargs)
+
     super(KerasTPUModel, self).compile(optimizer, loss, metrics, loss_weights,
                                        sample_weight_mode, weighted_metrics,
                                        target_tensors, **kwargs)
 
-    if not self._cpu_model.optimizer:
-      self._cpu_model.compile(optimizer, loss, metrics, loss_weights,
-                              sample_weight_mode, weighted_metrics,
-                              target_tensors, **kwargs)
-
   def fit(self,
           x=None,
           y=None,
@@ -1400,8 +1405,8 @@ class KerasTPUModel(models.Model):
           'https://github.com/tensorflow/tpu/tree/master/models/experimental'
           '/keras')
     if callable(x):
-      with self.tpu_session() as sess,\
-          ops.device('/job:%s/device:CPU:0' % self._tpu_assignment.worker_name):
+      with ops.device('/job:%s/device:CPU:0' %
+                      self._tpu_assignment.worker_name):
         dataset = x()
         if steps_per_epoch is None:
           raise ValueError('When using tf.data as input to a model, you '
@@ -1410,7 +1415,7 @@ class KerasTPUModel(models.Model):
           raise ValueError('When using tf.data as input to a model, y must be '
                            'None')
         infeed_manager = TPUDatasetInfeedManager(
-            dataset, self._tpu_assignment, sess, model_fn_lib.ModeKeys.TRAIN)
+            dataset, self._tpu_assignment, model_fn_lib.ModeKeys.TRAIN)
         # Use dummy numpy inputs for the rest of Keras' shape checking. We
         # intercept them when building the model.
         x = infeed_manager.dummy_x
@@ -1426,26 +1431,24 @@ class KerasTPUModel(models.Model):
           'https://github.com/tensorflow/tpu/tree/master/models/experimental'
           '/keras')
     if callable(validation_data):
-      with self.tpu_session() as sess:
-        dataset = validation_data()
-        if validation_steps is None:
-          raise ValueError('When using tf.data as validation for a model, you '
-                           'should specify the validation_steps argument.')
-        infeed_manager = TPUDatasetInfeedManager(
-            dataset, self._tpu_assignment, sess, model_fn_lib.ModeKeys.EVAL)
-        # Use dummy numpy inputs for the rest of Keras' shape checking. We
-        # intercept them when building the model.
-        val_x = infeed_manager.dummy_x
-        val_y = infeed_manager.dummy_y
-        infeed_managers.append((val_x, infeed_manager))
-        validation_data = (val_x, val_y)
+      dataset = validation_data()
+      if validation_steps is None:
+        raise ValueError('When using tf.data as validation for a model, you '
+                         'should specify the validation_steps argument.')
+      infeed_manager = TPUDatasetInfeedManager(
+          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
+      # Use dummy numpy inputs for the rest of Keras' shape checking. We
+      # intercept them when building the model.
+      val_x = infeed_manager.dummy_x
+      val_y = infeed_manager.dummy_y
+      infeed_managers.append((val_x, infeed_manager))
+      validation_data = (val_x, val_y)
 
     self._numpy_to_infeed_manager_list = infeed_managers
     try:
       if not kwargs.get('_pipeline', True):
-        logging.info(
-            'Running non-pipelined training loop (`_pipeline=%s`).',
-            kwargs['_pipeline'])
+        logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
+                     kwargs['_pipeline'])
         kwargs.pop('_pipeline')
         return super(KerasTPUModel, self).fit(
             x,
@@ -1501,50 +1504,32 @@ class KerasTPUModel(models.Model):
           'https://github.com/tensorflow/tpu/tree/master/models/experimental'
           '/keras')
     if callable(x):
-      with self.tpu_session() as sess:
-        dataset = x()
-        if steps is None:
-          raise ValueError('When using tf.data as input to a model, you '
-                           'should specify the steps argument.')
-        if y is not None:
-          raise ValueError('When using tf.data as input to a model, y must be '
-                           'None')
-        infeed_manager = TPUDatasetInfeedManager(
-            dataset, self._tpu_assignment, sess, model_fn_lib.ModeKeys.EVAL)
-        # Use dummy numpy inputs for the rest of Keras' shape checking. We
-        # intercept them when building the model.
-        x = infeed_manager.dummy_x
-        y = infeed_manager.dummy_y
-        infeed_managers.append((x, infeed_manager))
+      dataset = x()
+      if steps is None:
+        raise ValueError('When using tf.data as input to a model, you '
+                         'should specify the steps argument.')
+      if y is not None:
+        raise ValueError('When using tf.data as input to a model, y must be '
+                         'None')
+      infeed_manager = TPUDatasetInfeedManager(
+          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
+      # Use dummy numpy inputs for the rest of Keras' shape checking. We
+      # intercept them when building the model.
+      x = infeed_manager.dummy_x
+      y = infeed_manager.dummy_y
+      infeed_managers.append((x, infeed_manager))
 
     self._numpy_to_infeed_manager_list = infeed_managers
     try:
-      return super(KerasTPUModel, self).evaluate(
-          x,
-          y,
-          batch_size,
-          verbose,
-          sample_weight,
-          steps)
+      return super(KerasTPUModel, self).evaluate(x, y, batch_size, verbose,
+                                                 sample_weight, steps)
     finally:
       self._numpy_to_infeed_manager_list = []
 
-  def _pipeline_fit(self,
-                    x,
-                    y,
-                    batch_size,
-                    epochs,
-                    verbose,
-                    callbacks,
-                    validation_split,
-                    validation_data,
-                    shuffle,
-                    class_weight,
-                    sample_weight,
-                    initial_epoch,
-                    steps_per_epoch,
-                    validation_steps,
-                    **kwargs):
+  def _pipeline_fit(self, x, y, batch_size, epochs, verbose, callbacks,
+                    validation_split, validation_data, shuffle, class_weight,
+                    sample_weight, initial_epoch, steps_per_epoch,
+                    validation_steps, **kwargs):
     # Similar to super.fit(...), but modified to support software pipelining.
 
     # Backwards compatibility
@@ -1572,13 +1557,8 @@ class KerasTPUModel(models.Model):
 
     # Prepare validation data
     val_x, val_y, val_sample_weights = self._prepare_validation_data(
-        validation_data,
-        validation_split,
-        validation_steps,
-        x,
-        y,
-        sample_weights,
-        batch_size)
+        validation_data, validation_split, validation_steps, x, y,
+        sample_weights, batch_size)
     return self._pipeline_fit_loop(
         x,
         y,
@@ -1751,8 +1731,8 @@ class KerasTPUModel(models.Model):
       for i in indices_for_conversion_to_dense:
         ins_batch[i] = ins_batch[i].toarray()
 
-      outs = f.pipeline_run(cur_step_inputs=ins_last_batch,
-                            next_step_inputs=ins_batch)
+      outs = f.pipeline_run(
+          cur_step_inputs=ins_last_batch, next_step_inputs=ins_batch)
       ins_last_batch = ins_batch
 
       if batch_index == 0:
@@ -1824,8 +1804,8 @@ class KerasTPUModel(models.Model):
           next_step_inputs = ins
         else:
           next_step_inputs = None
-        outs = f.pipeline_run(cur_step_inputs=ins,
-                              next_step_inputs=next_step_inputs)
+        outs = f.pipeline_run(
+            cur_step_inputs=ins, next_step_inputs=next_step_inputs)
       except errors.OutOfRangeError:
         logging.warning('Your dataset iterator ran out of data; '
                         'interrupting training. Make sure that your '
@@ -1845,25 +1825,21 @@ class KerasTPUModel(models.Model):
         break
 
     if do_validation:
-      val_outs = training_arrays.test_loop(self,
-                                           val_inputs,
-                                           val_targets,
-                                           sample_weights=val_sample_weights,
-                                           steps=validation_steps,
-                                           verbose=0)
+      val_outs = training_arrays.test_loop(
+          self,
+          val_inputs,
+          val_targets,
+          sample_weights=val_sample_weights,
+          steps=validation_steps,
+          verbose=0)
       if not isinstance(val_outs, list):
         val_outs = [val_outs]
       # Same labels assumed.
       for l, o in zip(self.metrics_names, val_outs):
         epoch_logs['val_' + l] = o
 
-  def _prepare_validation_data(self,
-                               validation_data,
-                               validation_split,
-                               validation_steps,
-                               x,
-                               y,
-                               sample_weights,
+  def _prepare_validation_data(self, validation_data, validation_split,
+                               validation_steps, x, y, sample_weights,
                                batch_size):
     """Prepares the validation dataset.
 
@@ -1921,8 +1897,10 @@ class KerasTPUModel(models.Model):
 
       x, val_x = (slice_arrays(x, 0, split_at), slice_arrays(x, split_at))
       y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at))
-      sample_weights, val_sample_weights = (slice_arrays(
-          sample_weights, 0, split_at), slice_arrays(sample_weights, split_at))
+      sample_weights, val_sample_weights = (
+          slice_arrays(sample_weights, 0, split_at),
+          slice_arrays(sample_weights, split_at)
+      )
     elif validation_steps:
       val_x = []
       val_y = []
@@ -1934,11 +1912,20 @@ class KerasTPUModel(models.Model):
 
     return val_x, val_y, val_sample_weights
 
+  @property
+  def optimizer(self):
+    if self._tpu_model:
+      return self._tpu_model.optimizer
+    return self._cpu_model.optimizer
+
+  @optimizer.setter
+  def optimizer(self, optimizer):
+    self._optimizer = optimizer
+
   def _make_train_function(self):
     if not self.train_function:
       self.train_function = TPUFunction(
-          self,
-          model_fn_lib.ModeKeys.TRAIN,
+          self, model_fn_lib.ModeKeys.TRAIN,
           tpu_assignment=self._tpu_assignment)
 
     return self.train_function
@@ -1973,18 +1960,48 @@ class KerasTPUModel(models.Model):
     self._tpu_weights_initialized = True
 
     weights = self._cpu_model.get_weights()
-    with self.tpu_session():
-      logging.info('Setting weights on TPU model.')
-      cloned_model.set_weights(weights)
+
+    if isinstance(self.cpu_optimizer, keras_optimizers.TFOptimizer):
+      cpu_optimizer_config = {}
+    else:
+      cpu_optimizer_config = self.cpu_optimizer.get_config()
+
+    logging.info('Setting weights on TPU model.')
+    cloned_model.set_weights(weights)
+    for k, v in six.iteritems(cpu_optimizer_config):
+      opt_var = getattr(self._tpu_model.optimizer, k)
+      if isinstance(opt_var, variables.Variable):
+        logging.info('CPU -> TPU %s: %s {%s}', k, v, K.get_value(opt_var))
+        K.get_session().run(opt_var.assign(v))
+      else:
+        logging.warning('Cannot update non-variable config: %s', k)
+
+  @property
+  def cpu_optimizer(self):
+    return self._cpu_model.optimizer
 
   def sync_to_cpu(self):
     """Copy weights from the CPU, returning a synchronized CPU model."""
-    if self._tpu_weights_initialized:
-      with self.tpu_session():
-        logging.info('Copying TPU weights to the CPU')
-        tpu_weights = self._tpu_model.get_weights()
+    if not self._tpu_weights_initialized:
+      return self._cpu_model
 
-      self._cpu_model.set_weights(tpu_weights)
+    logging.info('Copying TPU weights to the CPU')
+    tpu_weights = self._tpu_model.get_weights()
+
+    # TFOptimizers have no configurable options
+    if isinstance(self.cpu_optimizer, keras_optimizers.TFOptimizer):
+      tpu_optimizer_config = {}
+    else:
+      tpu_optimizer_config = self._tpu_model.optimizer.get_config()
+
+    self._cpu_model.set_weights(tpu_weights)
+    for k, v in six.iteritems(tpu_optimizer_config):
+      logging.info('TPU -> CPU %s: %s', k, v)
+      opt_var = getattr(self.cpu_optimizer, k)
+      if isinstance(opt_var, variables.Variable):
+        K.get_session().run(opt_var.assign(v))
+      else:
+        logging.warning('Cannot update non-variable config: %s', k)
 
     return self._cpu_model
 
@@ -2005,26 +2022,6 @@ class KerasTPUModel(models.Model):
     self._cpu_model.set_weights(weights)
     self._tpu_weights_initialized = False
 
-  @contextlib.contextmanager
-  def tpu_session(self):
-    """Yields a TPU session and sets it as the default Keras session."""
-    with self._session.graph.as_default():
-      default_session = K.get_session()
-      # N.B. We have to call `K.set_session()` AND set our session as the
-      # TF default. `K.get_session()` surprisingly does not return the value
-      # supplied by K.set_session otherwise.
-      K.set_session(self._session)
-      with self._session.as_default():
-        yield self._session
-      K.set_session(default_session)
-
-  def shutdown(self):
-    # TODO(b/111364423): Actually shut down the system.
-    logging.info('Skipping shutting down TPU system.')
-    # with self.tpu_session() as session:
-    #   session.run(tpu.shutdown_system())
-    self._session.close()
-
 
 # pylint: disable=bad-continuation
 def _validate_shapes(model):
@@ -2065,7 +2062,9 @@ Output shape: %(output_shape)s
 
 @experimental
 def tpu_model(model, strategy=None):
-  """Copy `model` along with weights to the TPU.  Returns a TPU model.
+  """Copy `model` along with weights to the TPU.
+
+  Returns a TPU model.
 
   Usage:
   ```
@@ -2080,21 +2079,16 @@ def tpu_model(model, strategy=None):
   model.compile(
       optimizer=tf.train.GradientDescentOptimizer(learning_rate=1.0),
       ...)
-  model.shutdown()
   ```
 
   Args:
-    model: A `KerasTPUModel`.
+    model: A `tf.keras.Model` instance.
     strategy: `TPUDistributionStrategy`.  The strategy to use for replicating
-              model across multiple TPU cores.
+      model across multiple TPU cores.
 
   Returns:
     A new `KerasTPUModel` instance.
   """
-  # Force initialization of the CPU model.
-  model.get_weights()
-  model.reset_states()
-
   _validate_shapes(model)
   # TODO(xiejw): Validate TPU model. TPUModel only?
   # TODO(xiejw): Validate replicas. Full or 1. Shall we allow subset?
@@ -2108,4 +2102,34 @@ def tpu_model(model, strategy=None):
           '`strategy` must have type `tf.contrib.tpu.TPUDistributionStrategy`. '
           'Got: {}'.format(type(strategy)))
 
-  return KerasTPUModel(cpu_model=model, strategy=strategy)
+  # If the model has already been initialized, grab the optimizer configuration
+  # and model weights before entering the TPU session.
+  if model.optimizer:
+    if (isinstance(model.optimizer, keras_optimizers.Optimizer) and not
+        isinstance(model.optimizer, keras_optimizers.TFOptimizer)):
+      optimizer_config = model.optimizer.get_config()
+    else:
+      optimizer_config = None
+    model_weights = model.get_weights()
+  else:
+    model_weights = None
+
+  setup_tpu_session(strategy._tpu_cluster_resolver)
+
+  # Force initialization of the CPU model in the TPU session.
+  cpu_model = models.clone_model(model)
+  if model.optimizer:
+    cpu_model.compile(
+        _clone_optimizer(model.optimizer, optimizer_config),
+        model.loss,
+        _clone_metrics(model.metrics),
+        model.loss_weights,
+        model.sample_weight_mode,
+        _clone_metrics(model.weighted_metrics),
+    )
+
+  if model_weights:
+    cpu_model.set_weights(model_weights)
+    cpu_model.reset_states()
+
+  return KerasTPUModel(cpu_model=cpu_model, strategy=strategy)
-- 
GitLab


From f55e5ef27b3ccf1b75932e219f7358976dbf56c2 Mon Sep 17 00:00:00 2001
From: IMBurbank <bassmanburbank@gmail.com>
Date: Tue, 25 Sep 2018 18:39:11 -0600
Subject: [PATCH 0707/1357] Update to use python 2-3 compatible function
 tf_inspect.getfullargspec.

---
 .../python/losses/python/tuple_losses_impl.py |   2 +-
 .../labeled_tensor/python/ops/_typecheck.py   |   2 +-
 .../layers/python/layers/rev_block_lib.py     |   3 +-
 .../python/learn/estimators/estimator.py      |   4 +-
 .../learn/python/learn/estimators/head.py     |   2 +-
 .../learn/python/learn/experiment_test.py     |   2 +-
 .../learn/python/learn/export_strategy.py     |   2 +-
 .../contrib/learn/python/learn/metric_spec.py |   2 +-
 .../contrib/learn/python/learn/monitors.py    |   2 +-
 .../contrib/tpu/python/tpu/tpu_function.py    |   2 +-
 tensorflow/python/framework/errors_impl.py    |   2 +-
 tensorflow/python/framework/function.py       |   6 +-
 tensorflow/python/keras/backend_test.py       |   2 +-
 tensorflow/python/keras/testing_utils.py      |   2 +-
 .../kernel_tests/variable_scope_test.py       |   4 +-
 tensorflow/python/ops/variable_scope.py       |   4 +-
 tensorflow/python/util/tf_contextlib_test.py  |   2 +-
 tensorflow/python/util/tf_inspect.py          |   7 +-
 tensorflow/python/util/tf_inspect_test.py     | 249 +++++++++++++++++-
 .../api/lib/python_object_to_proto_visitor.py |   2 +-
 20 files changed, 267 insertions(+), 36 deletions(-)

diff --git a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
index 221c70c38b..00a83e5e55 100644
--- a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
+++ b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
@@ -101,7 +101,7 @@ def _args_to_gan_model(loss_fn):
   """
   # Match arguments in `loss_fn` to elements of `namedtuple`.
   # TODO(joelshor): Properly handle `varargs` and `keywords`.
-  argspec = tf_inspect.getargspec(loss_fn)
+  argspec = tf_inspect.getfullargspec(loss_fn)
   defaults = argspec.defaults or []
 
   required_args = set(argspec.args[:-len(defaults)])
diff --git a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
index 80fa17ec1f..0e23039847 100644
--- a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
@@ -230,7 +230,7 @@ def accepts(*types):
 
   def check_accepts(f):
     """Check the types."""
-    spec = tf_inspect.getargspec(f)
+    spec = tf_inspect.getfullargspec(f)
 
     num_function_arguments = len(spec.args)
     if len(types) != num_function_arguments:
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
index 06da32072f..55979cc391 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -576,7 +576,8 @@ def _recomputing_grad_fn(compute_fn,
 
 def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """See recompute_grad."""
-  has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args
+  has_is_recompute_kwarg = (
+      "is_recomputing" in tf_inspect.getfullargspec(fn).args)
   for arg in args:
     if not isinstance(arg, framework_ops.Tensor):
       raise ValueError("All inputs to function must be Tensors")
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index c1de42782e..b88923bca2 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -199,11 +199,11 @@ def _model_fn_args(fn):
   if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'):
     # Handle functools.partial and similar objects.
     return tuple([
-        arg for arg in tf_inspect.getargspec(fn.func).args[len(fn.args):]
+        arg for arg in tf_inspect.getfullargspec(fn.func).args[len(fn.args):]
         if arg not in set(fn.keywords.keys())
     ])
   # Handle function.
-  return tuple(tf_inspect.getargspec(fn).args)
+  return tuple(tf_inspect.getfullargspec(fn).args)
 
 
 def _get_replica_device_setter(config):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index c6f79e00d5..63dd08316b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -1861,7 +1861,7 @@ def _get_arguments(func):
   _, func = tf_decorator.unwrap(func)
   if hasattr(func, "__code__"):
     # Regular function.
-    return tf_inspect.getargspec(func)
+    return tf_inspect.getfullargspec(func)
   elif hasattr(func, "func"):
     # Partial function.
     return _get_arguments(func.func)
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index fb16c94c29..6926696fb6 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -126,7 +126,7 @@ class TestBaseEstimator(object):
 
 def _check_method_supports_args(method, kwargs):
   """Checks that the given method supports the given args."""
-  supported_args = tuple(tf_inspect.getargspec(method).args)
+  supported_args = tuple(tf_inspect.getfullargspec(method).args)
   for kwarg in kwargs:
     if kwarg not in supported_args:
       raise ValueError(
diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py
index 075cab536e..0d6e0cdc18 100644
--- a/tensorflow/contrib/learn/python/learn/export_strategy.py
+++ b/tensorflow/contrib/learn/python/learn/export_strategy.py
@@ -96,7 +96,7 @@ class ExportStrategy(
     """
     # don't break existing export_fns that don't accept checkpoint_path and
     # eval_result
-    export_fn_args = tf_inspect.getargspec(self.export_fn).args
+    export_fn_args = tf_inspect.getfullargspec(self.export_fn).args
     kwargs = {}
     if 'checkpoint_path' in export_fn_args:
       kwargs['checkpoint_path'] = checkpoint_path
diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
index 97220365d5..604d6d46b4 100644
--- a/tensorflow/contrib/learn/python/learn/metric_spec.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -51,7 +51,7 @@ def _args(fn):
     return tuple(
         [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())])
   # Handle function.
-  return tuple(tf_inspect.getargspec(fn).args)
+  return tuple(tf_inspect.getfullargspec(fn).args)
 
 
 _CANONICAL_LABELS_ARG = 'labels'
diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py
index 3d691d4340..5f61e0264f 100644
--- a/tensorflow/contrib/learn/python/learn/monitors.py
+++ b/tensorflow/contrib/learn/python/learn/monitors.py
@@ -1303,7 +1303,7 @@ class RunHookAdapterForMonitors(session_run_hook.SessionRunHook):
   def end(self, session):
     self._last_step = None
     for m in self._monitors:
-      if "session" in tf_inspect.getargspec(m.end).args:
+      if "session" in tf_inspect.getfullargspec(m.end).args:
         m.end(session=session)
       else:
         m.end()
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
index 0c7a38dbbb..9c4bd1c4d1 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
@@ -80,7 +80,7 @@ def check_function_argument_count(func, input_arity, infeed_queue):
   number_of_arguments_needed = input_arity
   if infeed_queue is not None:
     number_of_arguments_needed += infeed_queue.number_of_tuple_elements
-  arg_spec = tf_inspect.getargspec(func)
+  arg_spec = tf_inspect.getfullargspec(func)
   number_of_args = len(arg_spec.args)
   if arg_spec.defaults is None:
     number_of_defaults = 0
diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index 5af71f2cfb..c373e75a74 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -55,7 +55,7 @@ class OpError(Exception):
 
   def __reduce__(self):
     # Allow the subclasses to accept less arguments in their __init__.
-    init_argspec = tf_inspect.getargspec(self.__class__.__init__)
+    init_argspec = tf_inspect.getfullargspec(self.__class__.__init__)
     args = tuple(getattr(self, arg) for arg in init_argspec.args[1:])
     return self.__class__, args
 
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index f287289bd0..3db6f683c9 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -132,9 +132,9 @@ class Defun(object):
       raise ValueError("func %s must be callable" % func)
 
     # Func should not use kwargs and defaults.
-    argspec = tf_inspect.getargspec(func)
-    if argspec.keywords or argspec.defaults:
-      raise ValueError("Functions with argument defaults or keyword "
+    argspec = tf_inspect.getfullargspec(func)
+    if argspec.varkw or argspec.defaults:
+      raise ValueError("Functions with argument defaults or varkw "
                        "arguments are not supported.")
 
     # Computes how many arguments 'func' has.
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index ab71589940..31191d0d35 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -452,7 +452,7 @@ class BackendLinearAlgebraTest(test.TestCase):
         compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5),
                                          keras_kwargs={'axis': -1},
                                          np_kwargs={'axis': -1})
-        if 'keepdims' in tf_inspect.getargspec(keras_op).args:
+        if 'keepdims' in tf_inspect.getfullargspec(keras_op).args:
           compare_single_input_op_to_numpy(keras_op, np_op,
                                            input_shape=(4, 7, 5),
                                            keras_kwargs={'axis': 1,
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 501b50ba5f..1afaba5653 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -102,7 +102,7 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
   layer.set_weights(weights)
 
   # test and instantiation from weights
-  if 'weights' in tf_inspect.getargspec(layer_cls.__init__):
+  if 'weights' in tf_inspect.getfullargspec(layer_cls.__init__):
     kwargs['weights'] = weights
     layer = layer_cls(**kwargs)
 
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 401e1ae102..1d0b72b17a 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -998,8 +998,8 @@ class VariableScopeTest(test.TestCase):
 
   def testSignatureGetVarVsGetLocalVar(self):
     """get_{local,}variable() must take the same list of args."""
-    arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0]
-    local_arg_names = tf_inspect.getargspec(
+    arg_names = tf_inspect.getfullargspec(variable_scope.get_variable)[0]
+    local_arg_names = tf_inspect.getfullargspec(
         variable_scope.get_local_variable)[0]
     self.assertEqual(arg_names, local_arg_names)
 
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index a43676cd70..3cc1eb916d 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -892,14 +892,14 @@ class _VariableStore(object):
         if shape and shape.is_fully_defined():
           init_val = lambda: initializer(  # pylint: disable=g-long-lambda
               shape.as_list(), dtype=dtype, partition_info=partition_info)
-        elif not tf_inspect.getargspec(initializer).args:
+        elif not tf_inspect.getfullargspec(initializer).args:
           init_val = initializer
         else:
           raise ValueError("You can only pass an initializer function that "
                            "expects no arguments to its callable when the "
                            "shape is not fully defined. The given initializer "
                            "function expects the following args %s" %
-                           tf_inspect.getargspec(initializer).args)
+                           tf_inspect.getfullargspec(initializer).args)
         variable_dtype = dtype.base_dtype
 
     # Create the variable.
diff --git a/tensorflow/python/util/tf_contextlib_test.py b/tensorflow/python/util/tf_contextlib_test.py
index 4a5bf388a6..1e921b5ea3 100644
--- a/tensorflow/python/util/tf_contextlib_test.py
+++ b/tensorflow/python/util/tf_contextlib_test.py
@@ -83,7 +83,7 @@ class TfContextlibTest(test.TestCase):
     self.assertFalse(isinstance(target, tf_decorator.TFDecorator))
 
   def testGetArgSpecReturnsWrappedArgSpec(self):
-    argspec = tf_inspect.getargspec(test_params_and_defaults)
+    argspec = tf_inspect.getfullargspec(test_params_and_defaults)
     self.assertEqual(['a', 'b', 'c', 'd'], argspec.args)
     self.assertEqual((2, True, 'hello'), argspec.defaults)
 
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 967c872c2a..234850ac3f 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -43,7 +43,12 @@ def currentframe():
 
 
 def getargspec(obj):
-  """TFDecorator-aware replacement for inspect.getargspec.
+  """TFDecorator-aware replacement for `inspect.getargspec`.
+
+  This should not be called from other modules. It is deprecated in python3.
+
+  Use `getfullargspec`. It is a TFDecorator-aware replacement for 
+  `inspect.getfullargspec` compatible with both python2 and python3.
 
   Args:
     obj: A function, partial function, or callable object, possibly
diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py
index d3b7e4b969..55f88f8fc6 100644
--- a/tensorflow/python/util/tf_inspect_test.py
+++ b/tensorflow/python/util/tf_inspect_test.py
@@ -122,18 +122,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getargspec(partial_func))
 
-  def testGetFullArgsSpecForPartial(self):
-
-    def func(a, b):
-      del a, b
-
-    partial_function = functools.partial(func, 1)
-    argspec = tf_inspect.FullArgSpec(
-        args=['b'], varargs=None, varkw=None, defaults=None,
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function))
-
   def testGetArgSpecOnPartialInvalidArgspec(self):
     """Tests getargspec on partial function that doesn't have valid argspec."""
 
@@ -303,6 +291,243 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getargspec(NewClass))
 
+  def testGetFullArgSpecOnDecoratorsThatDontProvideFullArgSpec(self):
+    argspec = tf_inspect.getfullargspec(
+        test_decorated_function_with_defaults)
+    self.assertEqual(['a', 'b', 'c'], argspec.args)
+    self.assertEqual((2, 'Hello'), argspec.defaults)
+
+  def testGetFullArgSpecOnDecoratorThatChangesFullArgSpec(self):
+    argspec = tf_inspect.FullArgSpec(
+        args=['a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    decorator = tf_decorator.TFDecorator('', test_undecorated_function, '',
+                                         argspec)
+    self.assertEqual(argspec, tf_inspect.getfullargspec(decorator))
+
+  def testGetFullArgSpecIgnoresDecoratorsThatDontProvideFullArgSpec(self):
+    argspec = tf_inspect.FullArgSpec(
+        args=['a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    inner_decorator = tf_decorator.TFDecorator('', test_undecorated_function,
+                                               '', argspec)
+    outer_decorator = tf_decorator.TFDecorator('', inner_decorator)
+    self.assertEqual(argspec, tf_inspect.getfullargspec(outer_decorator))
+
+  def testGetFullArgSpecReturnsOutermostDecoratorThatChangesFullArgSpec(self):
+    outer_argspec = tf_inspect.FullArgSpec(
+        args=['a'], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+    inner_argspec = tf_inspect.FullArgSpec(
+        args=['b'], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    inner_decorator = tf_decorator.TFDecorator('', test_undecorated_function,
+                                               '', inner_argspec)
+    outer_decorator = tf_decorator.TFDecorator('', inner_decorator, '',
+                                               outer_argspec)
+    self.assertEqual(outer_argspec,
+                     tf_inspect.getfullargspec(outer_decorator))
+
+  def testGetFullArgsSpecForPartial(self):
+
+    def func(a, b):
+      del a, b
+
+    partial_function = functools.partial(func, 1)
+    argspec = tf_inspect.FullArgSpec(
+        args=['b'], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function))
+
+  def testGetFullArgSpecOnPartialInvalidFullArgSpec(self):
+    """Tests getfullargspec.
+
+    Tests on partial function that doesn't have valid fullargspec.
+    """
+
+    def func(m, n, l, k=4):
+      return 2 * m + l + n * k
+
+    partial_func = functools.partial(func, n=7)
+
+    exception_message = (r"Some arguments \['l'\] do not have default value, "
+                         "but they are positioned after those with default "
+                         "values. This can not be expressed with ArgSpec.")
+    with self.assertRaisesRegexp(ValueError, exception_message):
+      tf_inspect.getfullargspec(partial_func)
+
+  def testGetFullArgSpecOnPartialValidFullArgSpec(self):
+    """Tests getfullargspec on partial function with valid fullargspec."""
+
+    def func(m, n, l, k=4):
+      return 2 * m + l + n * k
+
+    partial_func = functools.partial(func, n=7, l=2)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n', 'l', 'k'],
+        varargs=None,
+        varkw=None,
+        defaults=(7, 2, 4),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialNoArgumentsLeft(self):
+    """Tests getfullargspec on partial function that prunes all arguments."""
+
+    def func(m, n):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, 7, 10)
+    argspec = tf_inspect.FullArgSpec(
+        args=[], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialKeywordArgument(self):
+    """Tests getfullargspec on partial function that prunes some arguments."""
+
+    def func(m, n):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, n=7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n'], varargs=None, varkw=None, defaults=(7,),
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialKeywordArgumentWithDefaultValue(self):
+    """Tests getfullargspec.
+    
+    Tests on partial function that prunes argument by keyword.
+    """
+
+    def func(m=1, n=2):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, n=7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialWithVarargs(self):
+    """Tests getfullargspec on partial function with variable arguments."""
+
+    def func(m, *arg):
+      return m + len(arg)
+
+    partial_func = functools.partial(func, 7, 8)
+    argspec = tf_inspect.FullArgSpec(
+        args=[], varargs='arg', varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialWithVarkwargs(self):
+    """Tests getfullargspec.
+
+    Tests on partial function with variable keyword arguments.
+    """
+
+    def func(m, n, **kwarg):
+      return m * n + len(kwarg)
+
+    partial_func = functools.partial(func, 7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['n'], varargs=None, varkw='kwarg', defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialWithDecorator(self):
+    """Tests getfullargspec on decorated partial function."""
+
+    @test_decorator('decorator')
+    def func(m=1, n=2):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, n=7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnCallableObject(self):
+
+    class Callable(object):
+
+      def __call__(self, a, b=1, c='hello'):
+        pass
+
+    argspec = tf_inspect.FullArgSpec(
+        args=['self', 'a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    test_obj = Callable()
+    self.assertEqual(argspec, tf_inspect.getfullargspec(test_obj))
+
+  def testGetFullArgSpecOnInitClass(self):
+
+    class InitClass(object):
+
+      def __init__(self, a, b=1, c='hello'):
+        pass
+
+    argspec = tf_inspect.FullArgSpec(
+        args=['self', 'a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(InitClass))
+
+  def testGetFullArgSpecOnNewClass(self):
+
+    class NewClass(object):
+
+      def __new__(cls, a, b=1, c='hello'):
+        pass
+
+    argspec = tf_inspect.FullArgSpec(
+        args=['cls', 'a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(NewClass))
+
   def testGetDoc(self):
     self.assertEqual('Test Decorated Function With Defaults Docstring.',
                      tf_inspect.getdoc(test_decorated_function_with_defaults))
diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
index 3a48cf683c..2a40caf720 100644
--- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
+++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
@@ -47,7 +47,7 @@ def _SanitizedArgSpec(obj):
     string, a string representation of the argspec.
   """
   output_string = ''
-  unsanitized_arg_spec = tf_inspect.getargspec(obj)
+  unsanitized_arg_spec = tf_inspect.getfullargspec(obj)
 
   for clean_attr in ('args', 'varargs', 'keywords'):
     output_string += '%s=%s, ' % (clean_attr,
-- 
GitLab


From d84159b6694773a558c8eeef63ef79b4034b8fa7 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Tue, 25 Sep 2018 18:52:12 -0700
Subject: [PATCH 0708/1357] Update BUILD files so that Estimator code depends
 on TF thru tf_no_contrib.

PiperOrigin-RevId: 214541221
---
 tensorflow/contrib/compiler/BUILD             |  2 +-
 .../contrib/data/python/kernel_tests/BUILD    |  1 -
 tensorflow/contrib/estimator/BUILD            | 46 +++----------------
 tensorflow/contrib/factorization/BUILD        |  1 -
 tensorflow/contrib/predictor/BUILD            |  3 +-
 tensorflow/contrib/saved_model/BUILD          |  6 +--
 tensorflow/contrib/tensor_forest/BUILD        |  2 +-
 tensorflow/python/feature_column/BUILD        |  2 +-
 tensorflow/python/tools/BUILD                 |  1 +
 9 files changed, 13 insertions(+), 51 deletions(-)

diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index f51bfc1b22..f83386b8a4 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -65,7 +65,7 @@ py_library(
         "//tensorflow/python:summary_op_util",
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
-        "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index c15e8d8861..ce52c990ce 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -190,7 +190,6 @@ py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:estimator_py",
     ],
 )
diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 6db311d52d..1ea00fb7f3 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -132,21 +132,11 @@ py_library(
     srcs = ["python/estimator/dnn_with_layer_annotations.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:layers",
-        "//tensorflow/python:nn",
-        "//tensorflow/python:partitioned_variables",
-        "//tensorflow/python:summary",
-        "//tensorflow/python:variable_scope",
+        "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:head",
         "//tensorflow/python/estimator:model_fn",
         "//tensorflow/python/estimator:optimizers",
-        "//tensorflow/python/feature_column",
-        "//tensorflow/python/ops/losses",
-        "//tensorflow/python/saved_model:utils",
     ],
 )
 
@@ -162,22 +152,13 @@ py_test(
     ],
     deps = [
         ":dnn_with_layer_annotations",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:data_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:summary",
-        "//tensorflow/python:training",
+        "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:dnn",
         "//tensorflow/python/estimator:dnn_testing_utils",
         "//tensorflow/python/estimator:export_export",
         "//tensorflow/python/estimator:numpy_io",
         "//tensorflow/python/estimator:pandas_io",
         "//tensorflow/python/estimator:prediction_keys",
-        "//tensorflow/python/feature_column",
         "@six_archive//:six",
     ],
 )
@@ -283,9 +264,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:summary",
+        "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:exporter",
     ],
 )
@@ -297,7 +276,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":exporter",
-        "//tensorflow/python:platform",
+        "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:exporter",
     ],
@@ -502,7 +481,6 @@ py_library(
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:head",
         "//tensorflow/python/estimator:optimizers",
-        "//tensorflow/python/ops/losses",
         "@six_archive//:six",
     ],
 )
@@ -557,13 +535,10 @@ py_library(
     srcs = ["python/estimator/saved_model_estimator.py"],
     deps = [
         ":export",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:training",
+        "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:export",
         "//tensorflow/python/estimator:model_fn",
-        "//tensorflow/python/saved_model",
     ],
 )
 
@@ -578,16 +553,7 @@ py_test(
     deps = [
         ":export",
         ":saved_model_estimator",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:metrics",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:export_export",
         "//tensorflow/python/estimator:export_output",
diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index 9e1f14f990..e344d7a23b 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -64,7 +64,6 @@ tf_custom_op_py_library(
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
-        "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:estimator_py",
         "//tensorflow/python/feature_column:feature_column_py",
         "//third_party/py/numpy",
diff --git a/tensorflow/contrib/predictor/BUILD b/tensorflow/contrib/predictor/BUILD
index 72ea777ca7..d50b52b8ff 100644
--- a/tensorflow/contrib/predictor/BUILD
+++ b/tensorflow/contrib/predictor/BUILD
@@ -27,7 +27,7 @@ py_library(
         ":contrib_estimator_predictor",
         ":core_estimator_predictor",
         ":saved_model_predictor",
-        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
@@ -89,7 +89,6 @@ py_library(
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:estimator_py",
         "//tensorflow/python/saved_model:signature_constants",
     ],
diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD
index 4ca5274b2e..291ff83791 100644
--- a/tensorflow/contrib/saved_model/BUILD
+++ b/tensorflow/contrib/saved_model/BUILD
@@ -92,10 +92,7 @@ py_library(
         "//tensorflow/python:platform",
         "//tensorflow/python:saver",
         "//tensorflow/python:util",
-        "//tensorflow/python/estimator",
-        "//tensorflow/python/estimator:export",
-        "//tensorflow/python/estimator:keras",
-        "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/estimator:estimator_py",
         "//tensorflow/python/keras:engine",
         "//tensorflow/python/saved_model",
     ],
@@ -111,6 +108,7 @@ py_test(
         ":keras_saved_model",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:training",
+        "//tensorflow/python/estimator:estimator_py",
         "//tensorflow/python/keras",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD
index 00c855daa3..398ac314f4 100644
--- a/tensorflow/contrib/tensor_forest/BUILD
+++ b/tensorflow/contrib/tensor_forest/BUILD
@@ -518,7 +518,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":client_lib",
-        "//tensorflow/contrib/estimator:head",
+        "//tensorflow/contrib/estimator:estimator_py",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/learn",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index ac53a84eef..5800b693b4 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -156,7 +156,7 @@ py_test(
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:estimator_py",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD
index 1c1a1a54cd..75824d83e6 100644
--- a/tensorflow/python/tools/BUILD
+++ b/tensorflow/python/tools/BUILD
@@ -44,6 +44,7 @@ py_library(
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:training",
+        "//tensorflow/python/estimator:estimator_py",
         "//tensorflow/python/saved_model:loader",
         "@six_archive//:six",
     ],
-- 
GitLab


From 8adf133448f822fcb866d69ea9d046512836834e Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Tue, 25 Sep 2018 19:03:54 -0700
Subject: [PATCH 0709/1357] Set step_id in Executor Args to the step_id
 generated in MasterSession.

PiperOrigin-RevId: 214542049
---
 tensorflow/core/distributed_runtime/graph_mgr.cc | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index f7a2967d00..3361819e43 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -475,10 +475,7 @@ void GraphMgr::StartParallelExecutors(const string& handle, int64 step_id,
                             delete step_container;
                           });
   Executor::Args args;
-  {
-    mutex_lock l(mu_);
-    args.step_id = ++next_id_;
-  }
+  args.step_id = step_id;
   args.rendezvous = rendezvous;
   args.collective_executor = ce_handle ? ce_handle->get() : nullptr;
   args.cancellation_manager = cancellation_manager;
-- 
GitLab


From 3f4b8c138165cc9deb0ed931c5a6bb3d8ab556f0 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 25 Sep 2018 19:13:30 -0700
Subject: [PATCH 0710/1357] Remove integer dtypes from Soft{plus,sign} OpDefs.

These ops were never intended to support integer dtypes, and the
OpKernels have already been removed in a previous patch.

PiperOrigin-RevId: 214542750
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 100 ++++++++++++++++++
 tensorflow/core/ops/nn_ops.cc                 |  10 +-
 tensorflow/core/ops/ops.pbtxt                 |  48 ++-------
 .../python/kernel_tests/softplus_op_test.py   |   5 +-
 .../python/kernel_tests/softsign_op_test.py   |   5 +-
 5 files changed, 116 insertions(+), 52 deletions(-)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 0ab1558613..86d4c6b421 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -60084,6 +60084,29 @@ op {
     }
   }
 }
+op {
+  name: "Softplus"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "SoftplusGrad"
   input_arg {
@@ -60220,6 +60243,33 @@ op {
     }
   }
 }
+op {
+  name: "SoftplusGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "Softsign"
   input_arg {
@@ -60340,6 +60390,29 @@ op {
     }
   }
 }
+op {
+  name: "Softsign"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "SoftsignGrad"
   input_arg {
@@ -60476,6 +60549,33 @@ op {
     }
   }
 }
+op {
+  name: "SoftsignGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "SpaceToBatch"
   input_arg {
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 2485fa4717..dc39996017 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -1009,32 +1009,30 @@ REGISTER_OP("SeluGrad")
     .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
-// TODO(b/111515541): change T to {half, bfloat16, float, double}
 REGISTER_OP("Softplus")
     .Input("features: T")
     .Output("activations: T")
-    .Attr("T: realnumbertype")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("SoftplusGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
-    .Attr("T: realnumbertype")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
-// TODO(b/111515541): change T to {half, bfloat16, float, double}
 REGISTER_OP("Softsign")
     .Input("features: T")
     .Output("activations: T")
-    .Attr("T: realnumbertype")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("SoftsignGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
-    .Attr("T: realnumbertype")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 // --------------------------------------------------------------------------
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 3b89fb76ea..bdded2d894 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -28714,18 +28714,10 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_INT64
-        type: DT_BFLOAT16
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -28749,18 +28741,10 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_INT64
-        type: DT_BFLOAT16
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -28780,18 +28764,10 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_INT64
-        type: DT_BFLOAT16
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -28815,18 +28791,10 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_INT64
-        type: DT_BFLOAT16
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
diff --git a/tensorflow/python/kernel_tests/softplus_op_test.py b/tensorflow/python/kernel_tests/softplus_op_test.py
index afe3df6178..e8dc272637 100644
--- a/tensorflow/python/kernel_tests/softplus_op_test.py
+++ b/tensorflow/python/kernel_tests/softplus_op_test.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import errors
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import nn_ops
@@ -125,8 +124,8 @@ class SoftplusTest(test.TestCase):
   def testNoInts(self):
     with self.cached_session():
       with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "No OpKernel was registered to support Op 'Softplus'"):
+          TypeError,
+          "'features' has DataType int32 not in list of allowed values"):
         nn_ops.softplus(constant_op.constant(7)).eval()
 
 
diff --git a/tensorflow/python/kernel_tests/softsign_op_test.py b/tensorflow/python/kernel_tests/softsign_op_test.py
index 05a7c53dee..1b4db9fa46 100644
--- a/tensorflow/python/kernel_tests/softsign_op_test.py
+++ b/tensorflow/python/kernel_tests/softsign_op_test.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import errors
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import nn_ops
 import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
@@ -69,8 +68,8 @@ class SoftsignTest(test.TestCase):
   def testNoInts(self):
     with self.cached_session():
       with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "No OpKernel was registered to support Op 'Softsign'"):
+          TypeError,
+          "'features' has DataType int32 not in list of allowed values"):
         nn_ops.softsign(constant_op.constant(7)).eval()
 
 
-- 
GitLab


From 7f1d70d97f543d69a9f02cd6df0964f22f9278f3 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 25 Sep 2018 20:16:49 -0700
Subject: [PATCH 0711/1357] Switching Distribution strategies to use
 MultiDeviceIterator. Currently only supported in Graph mode using
 initializable iterators. In a subsequent change, we'll add in support for
 Eager mode as well.

This removes prefetching_ops_v2 code.

PiperOrigin-RevId: 214546754
---
 tensorflow/contrib/distribute/python/BUILD    |  28 +--
 .../distribute/python/metrics_v1_test.py      |   3 +-
 .../distribute/python/minimize_loss_test.py   |  26 +-
 .../distribute/python/mirrored_strategy.py    |   6 +-
 .../python/mirrored_strategy_multigpu_test.py |  12 +-
 .../contrib/distribute/python/monitor.py      |   1 +
 .../distribute/python/optimizer_v2_test.py    |   8 +-
 .../distribute/python/prefetching_ops_v2.py   | 229 ------------------
 .../python/prefetching_ops_v2_test.py         |  90 -------
 .../contrib/distribute/python/step_fn.py      |   7 +-
 .../contrib/distribute/python/step_fn_test.py |   1 +
 .../contrib/distribute/python/values.py       |  50 +++-
 .../contrib/distribute/python/values_test.py  |  22 +-
 13 files changed, 92 insertions(+), 391 deletions(-)
 delete mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2.py
 delete mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 48a7593ab4..7eead6e472 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -22,7 +22,6 @@ py_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":input_ops",
-        ":prefetching_ops_v2",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:device_util",
@@ -30,6 +29,7 @@ py_library(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:multi_device_iterator_ops",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/training/checkpointable:base",
         "@six_archive//:six",
@@ -647,32 +647,6 @@ cuda_py_test(
     ],
 )
 
-py_library(
-    name = "prefetching_ops_v2",
-    srcs = ["prefetching_ops_v2.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:contrib_op_loader",
-        "//tensorflow/contrib/data/python/ops:prefetching_ops",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
-cuda_py_test(
-    name = "prefetching_ops_v2_test",
-    srcs = ["prefetching_ops_v2_test.py"],
-    additional_deps = [
-        ":prefetching_ops_v2",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
 py_library(
     name = "input_ops",
     srcs = ["input_ops.py"],
diff --git a/tensorflow/contrib/distribute/python/metrics_v1_test.py b/tensorflow/contrib/distribute/python/metrics_v1_test.py
index 8163494c8e..f7773aff4f 100644
--- a/tensorflow/contrib/distribute/python/metrics_v1_test.py
+++ b/tensorflow/contrib/distribute/python/metrics_v1_test.py
@@ -86,10 +86,11 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
   def _test_metric(self, distribution, dataset_fn, metric_fn, expected_fn):
     with ops.Graph().as_default(), distribution.scope():
       iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+          dataset_fn).make_initializable_iterator()
       value, update = distribution.call_for_each_tower(
           metric_fn, iterator.get_next())
       update = distribution.group(update)
+      self.evaluate(iterator.initializer)
       self.evaluate(variables.local_variables_initializer())
       # TODO(josh11b): Once we switch to using a global batch size for input,
       # replace "distribution.num_towers" with "1".
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index ba147e7824..d082d5c419 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -41,6 +41,14 @@ from tensorflow.python.ops.losses import losses_impl
 
 class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
 
+  def _get_iterator(self, ds):
+    if context.executing_eagerly():
+      iterator = ds.make_one_shot_iterator()
+    else:
+      iterator = ds.make_initializable_iterator()
+      self.evaluate(iterator.initializer)
+    return iterator
+
   @combinations.generate(
       combinations.times(
           combinations.distributions_and_v1_optimizers(),
@@ -62,8 +70,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, *inputs, run_concurrently=layer.built))
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -99,8 +106,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.group(
@@ -159,8 +165,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, *inputs, run_concurrently=layer.built))
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -244,8 +249,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
           fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS)
         return control_flow_ops.group(fetches)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -338,8 +342,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, x, y, run_concurrently=False))
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -432,8 +435,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             output=loss)
         return distribution.group(train_op)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         initial_loss = lambda: constant_op.constant(1e7)
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 0c6805d682..945f450387 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -480,8 +480,10 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
           self._prefetch_on_device)
     else:
       return values.PerDeviceDataset(
-          self._call_dataset_fn(dataset_fn), self._devices,
-          self._prefetch_on_device)
+          self._call_dataset_fn(dataset_fn),
+          self._devices,
+          self._prefetch_on_device,
+          source_device=device_util.resolve("/device:CPU:0"))
 
   # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed.
   def _run_steps_on_dataset(self, fn, iterator, iterations,
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index f51e543624..04c712ce1d 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -300,9 +300,15 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
 
     dist = mirrored_strategy.MirroredStrategy(
         ["/device:GPU:0", "/device:CPU:0"])
-    features = dist.distribute_dataset(
-        lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10)
-    ).make_one_shot_iterator().get_next()
+    ds = dist.distribute_dataset(
+        lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10))
+    if context.executing_eagerly():
+      iterator = ds.make_one_shot_iterator()
+    else:
+      iterator = ds.make_initializable_iterator()
+      self.evaluate([iterator.initializer])
+
+    features = iterator.get_next()
 
     with dist.scope():
       result = dist.call_for_each_tower(
diff --git a/tensorflow/contrib/distribute/python/monitor.py b/tensorflow/contrib/distribute/python/monitor.py
index 7644acedc9..17b7ab74f6 100644
--- a/tensorflow/contrib/distribute/python/monitor.py
+++ b/tensorflow/contrib/distribute/python/monitor.py
@@ -51,6 +51,7 @@ class Monitor(object):
     else:
       if session is None:
         raise ValueError("Should provide a `session` in Graph mode.")
+      session.run(step_callable._iterator.initializer)  # pylint: disable=protected-access
       self._run_step = session.make_callable(step_callable())
       session.run(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/optimizer_v2_test.py b/tensorflow/contrib/distribute/python/optimizer_v2_test.py
index 6e9ba37a19..3064433129 100644
--- a/tensorflow/contrib/distribute/python/optimizer_v2_test.py
+++ b/tensorflow/contrib/distribute/python/optimizer_v2_test.py
@@ -42,8 +42,11 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase):
       model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      ds = distribution.distribute_dataset(dataset_fn)
+      if context.executing_eagerly():
+        iterator = ds.make_one_shot_iterator()
+      else:
+        iterator = ds.make_initializable_iterator()
 
       def run_step():
         return control_flow_ops.group(distribution.unwrap(
@@ -52,6 +55,7 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase):
 
       if not context.executing_eagerly():
         with self.cached_session() as sess:
+          sess.run(iterator.initializer)
           run_step = sess.make_callable(run_step())
         self.evaluate(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
deleted file mode 100644
index 492d82f6a1..0000000000
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Extension of prefetching_ops to support more than one device."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import warnings
-
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
-from tensorflow.contrib.data.python.ops import prefetching_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.util import nest as data_nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.util import nest
-
-
-# pylint: disable=protected-access
-class _PrefetchToDeviceIterator(object):
-  """A replacement for `tf.data.Iterator` that prefetches to another device.
-
-  Args:
-    input_dataset: The input dataset.
-    one_shot: If true, we make a one shot iterator that's already initialized.
-    devices: Devices on which to prefetch.
-    buffer_size: Size of the prefetching buffer.
-    shared_name: (Optional.) If non-empty, the returned iterator will be
-        shared under the given name across multiple sessions that share the
-        same devices (e.g. when using a remote server). Only used if one_shot
-        is False.
-
-  Returns:
-    An Iterator type object.
-  """
-
-  def __init__(self,
-               input_dataset,
-               one_shot,
-               devices,
-               buffer_size,
-               shared_name=None):
-    self._input_dataset = input_dataset
-    self._get_next_call_count = 0
-    self._one_shot = one_shot
-    if shared_name is None:
-      shared_name = ""
-    self._devices = devices
-
-    if self._one_shot:
-      self._input_iterator = input_dataset.make_one_shot_iterator()
-    else:
-      self._input_iterator = iterator_ops.Iterator.from_structure(
-          self._input_dataset.output_types, self._input_dataset.output_shapes,
-          shared_name, self._input_dataset.output_classes)
-    input_iterator_handle = self._input_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _prefetch_fn(handle):
-      """Prefetches one element from `input_iterator`."""
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          handle, self._input_iterator.output_types,
-          self._input_iterator.output_shapes,
-          self._input_iterator.output_classes)
-      ret = remote_iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    target_device = gen_dataset_ops.iterator_get_device(
-        self._input_iterator._iterator_resource)
-    self._buffering_resources = []
-    for device in nest.flatten(self._devices):
-      with ops.device(device):
-        buffer_resource_handle = prefetching_ops.function_buffering_resource(
-            f=_prefetch_fn,
-            output_types=data_nest.flatten(
-                sparse.as_dense_types(self._input_dataset.output_types,
-                                      self._input_dataset.output_classes)),
-            target_device=target_device,
-            string_arg=input_iterator_handle,
-            buffer_size=buffer_size,
-            shared_name=shared_name)
-        self._buffering_resources.append(buffer_resource_handle)
-
-    if not self._one_shot:
-      reset_ops = []
-      for buffer_resource in self._buffering_resources:
-        reset_ops.append(
-            prefetching_ops.function_buffering_resource_reset(buffer_resource))
-      with ops.control_dependencies(reset_ops):
-        self._initializer = self._input_iterator.make_initializer(
-            self._input_dataset)
-
-  def get_next(self, name=None):
-    """See `tf.data.Iterator.get_next`."""
-    self._get_next_call_count += 1
-    if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
-      warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
-
-    flat_result = []
-    # TODO(priyag): This will fail if the input size (typically number of
-    # batches) is not divisible by number of devices.
-    # How do we handle that more gracefully / let the user know?
-    for buffer_resource in self._buffering_resources:
-      flat_ret = gen_dataset_ops.function_buffering_resource_get_next(
-          buffer_resource,
-          output_types=data_nest.flatten(sparse.as_dense_types(
-              self.output_types, self.output_classes)), name=name)
-
-      ret = sparse.deserialize_sparse_tensors(
-          data_nest.pack_sequence_as(self.output_types, flat_ret),
-          self.output_types, self.output_shapes, self.output_classes)
-
-      for tensor, shape in zip(
-          data_nest.flatten(ret), data_nest.flatten(self.output_shapes)):
-        if isinstance(tensor, ops.Tensor):
-          tensor.set_shape(shape)
-      flat_result.append(ret)
-
-    return nest.pack_sequence_as(self._devices, flat_result)
-
-  @property
-  def initializer(self):
-    if self._one_shot:
-      raise NotImplementedError("Can't initialize a one_shot_iterator")
-    return self._initializer
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-# pylint: enable=protected-access
-
-
-class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` whose iterator prefetches elements to other device(s)."""
-
-  def __init__(self, input_dataset, devices, buffer_size):
-    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._devices = devices
-    self._buffer_size = buffer_size if buffer_size is not None else 1
-
-  def make_one_shot_iterator(self):
-    return _PrefetchToDeviceIterator(
-        self._input_dataset,
-        one_shot=True,
-        devices=self._devices,
-        buffer_size=self._buffer_size)
-
-  def make_initializable_iterator(self, shared_name=None):
-    if context.executing_eagerly():
-      raise RuntimeError(
-          "make_initializable_iterator is not supported when eager "
-          "execution is enabled.")
-
-    return _PrefetchToDeviceIterator(
-        self._input_dataset,
-        one_shot=False,
-        devices=self._devices,
-        buffer_size=self._buffer_size,
-        shared_name=shared_name)
-
-  def _as_variant_tensor(self):
-    # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset
-    # transformation methods is called.
-    # TODO(mrry): Investigate support for chaining further transformations after
-    # the prefetch, including GPU support.
-    raise NotImplementedError("`prefetch_to_devices()` must be the last "
-                              "transformation in a dataset pipeline.")
-
-  # TODO(priyag): Fix the output types, shapes and classes to match the result
-  # of get_next (which has the additional nesting layer of devices now).
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-
-def prefetch_to_devices(devices, buffer_size=None):
-  """A transformation that prefetches dataset values to the given `devices`.
-
-  NOTE: Although the transformation creates a `tf.data.Dataset`, the
-  transformation must be the final `Dataset` in the input pipeline.
-
-  Args:
-    devices: A nested structure of devices on which to prefetch the data. It can
-      be a single device name, or a tuple or list of device names.
-    buffer_size: (Optional.) The number of elements to buffer on each device.
-      Defaults to an automatically chosen value.
-
-  Returns:
-    A `Dataset` transformation function, which can be passed to
-    `tf.data.Dataset.apply`.
-  """
-  def _apply_fn(dataset):
-    return _PrefetchToDeviceDataset(dataset, devices, buffer_size)
-
-  return _apply_fn
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
deleted file mode 100644
index 16799104e8..0000000000
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for prefetching_ops_v2."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distribute.python import prefetching_ops_v2
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
-from tensorflow.python.platform import test
-
-
-class PrefetchingOpsV2Test(test.TestCase):
-
-  def testPrefetchToOneDevice(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops_v2.prefetch_to_devices("/gpu:0"))
-
-    iterator = device_dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToTwoDevicesInAList(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"]))
-
-    iterator = device_dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    output = []
-    # TODO(rohanj): Modify test to go till the end of the dataset when we
-    # switch to MultiDeviceIterator.
-    with self.cached_session() as sess:
-      for _ in range(4):
-        result = sess.run(next_element)
-        self.assertEqual(2, len(result))
-        output.extend(result)
-      self.assertEquals(set(range(8)), set(output))
-
-  def testPrefetchToTwoDevicesWithReinit(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"]))
-
-    iterator = device_dataset.make_initializable_iterator()
-    next_element = iterator.get_next()
-
-    # TODO(rohanj): Modify test to go till the end of the dataset when we
-    # switch to MultiDeviceIterator.
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      for _ in range(4):
-        sess.run(next_element)
-      sess.run(iterator.initializer)
-      for _ in range(4):
-        sess.run(next_element)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/distribute/python/step_fn.py b/tensorflow/contrib/distribute/python/step_fn.py
index 1b5a4f64e5..23bf36184f 100644
--- a/tensorflow/contrib/distribute/python/step_fn.py
+++ b/tensorflow/contrib/distribute/python/step_fn.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
 from tensorflow.python.training import optimizer as optimizer_lib
 
 
@@ -50,7 +51,11 @@ class StandardInputStep(Step):
   def __init__(self, dataset_fn, distribution):
     super(StandardInputStep, self).__init__(distribution)
     self._distributed_input = distribution.distribute_dataset(dataset_fn)
-    self._iterator = self._distributed_input.make_one_shot_iterator()
+    if context.executing_eagerly():
+      self._iterator = self._distributed_input.make_one_shot_iterator()
+    else:
+      # TODO(priyag): Expose initializer via some initializer property.
+      self._iterator = self._distributed_input.make_initializable_iterator()
 
 
 class StandardSingleLossStep(StandardInputStep):
diff --git a/tensorflow/contrib/distribute/python/step_fn_test.py b/tensorflow/contrib/distribute/python/step_fn_test.py
index f1ada49fa3..1ff9b9ceec 100644
--- a/tensorflow/contrib/distribute/python/step_fn_test.py
+++ b/tensorflow/contrib/distribute/python/step_fn_test.py
@@ -50,6 +50,7 @@ class SingleLossStepTest(test.TestCase, parameterized.TestCase):
         run_step = single_loss_step
       else:
         with self.cached_session() as sess:
+          sess.run(single_loss_step._iterator.initializer)
           run_step = sess.make_callable(single_loss_step())
       self.evaluate(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index fafa6384a1..a0cd029f51 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -26,7 +26,7 @@ import weakref
 import six
 
 from tensorflow.contrib.distribute.python import input_ops
-from tensorflow.contrib.distribute.python import prefetching_ops_v2
+from tensorflow.python.data.ops import multi_device_iterator_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import device as tf_device
 from tensorflow.python.framework import ops
@@ -683,7 +683,7 @@ class PerDeviceDataIterator(object):
   def get_next(self, name=None):
     """Scatter the input across devices."""
     if self._prefetch_on_device:
-      data_list = self._iterator.get_next(name=name)
+      data_list = self._iterator.get_next()
       index = dict(zip(self._devices, data_list))
     else:
       batch = self._iterator.get_next(name=name)
@@ -703,21 +703,26 @@ class PerDeviceDataIterator(object):
 class PerDeviceDataset(object):
   """Like `tf.data.Dataset` split devices, producing `PerDevice` data."""
 
-  def __init__(self, dataset, devices, prefetch_on_device=None):
+  def __init__(
+      self,
+      dataset,
+      devices,
+      prefetch_on_device=None,
+      source_device="/cpu:0",
+  ):
     self._devices = devices
+    self._source_device = source_device if source_device is not None else "/cpu:0"
 
     # Default to using prefetching in graph mode, unless specified.
-    # TODO(priyag): Enable prefetching in eager mode.
+    # TODO(rohanj): Enable prefetching in eager mode.
     self._prefetch_on_device = prefetch_on_device
     if self._prefetch_on_device is None:
       self._prefetch_on_device = not context.executing_eagerly()
     assert not (self._prefetch_on_device and context.executing_eagerly()), (
         "Prefetching is only supported in graph mode currently")
 
-    if self._prefetch_on_device:
-      self._dataset = dataset.apply(
-          prefetching_ops_v2.prefetch_to_devices(self._devices))
-    else:
+    self._dataset = dataset
+    if not self._prefetch_on_device:
       # TODO(priyag): If dropping remainder is not appropriate, find another
       # approach to distributing the dataset when not possible to divide evenly.
       # Possibly not an issue when we start using PartitionedDataset.
@@ -725,15 +730,33 @@ class PerDeviceDataset(object):
 
   def make_one_shot_iterator(self):
     """Get a one time use iterator for the distributed PerDeviceDataset."""
+    # Graph mode prefetching with one shot iterator is disabled.
+    if not context.executing_eagerly():
+      raise ValueError("Cannot create a one shot iterator. Please use "
+                       "`make_initializable_iterator()` instead.")
+    # Eager mode prefetching would error out in constructor. Only remaining
+    # cases are non-prefetching eager / graph mode. We delegate to
+    # PerDeviceDataIterator to handle them.
     dataset_iterator = self._dataset.make_one_shot_iterator()
     return PerDeviceDataIterator(
-        dataset_iterator, self._devices, self._prefetch_on_device)
+        dataset_iterator, self._devices, prefetch_on_device=False)
 
   def make_initializable_iterator(self):
     """Get an initializable iterator for the distributed PerDeviceDataset."""
-    dataset_iterator = self._dataset.make_initializable_iterator()
+    # Eager mode generates already initialized iterators. Hence we cannot create
+    # an initializable iterator.
+    if context.executing_eagerly():
+      raise ValueError("Cannot create initializable iterator in Eager mode. "
+                       "Please use `make_one_shot_iterator` instead.")
+    if self._prefetch_on_device:
+      dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+          self._dataset, self._devices, source_device=self._source_device)
+    else:
+      dataset_iterator = self._dataset.make_initializable_iterator()
     return PerDeviceDataIterator(
-        dataset_iterator, self._devices, self._prefetch_on_device)
+        dataset_iterator,
+        self._devices,
+        prefetch_on_device=self._prefetch_on_device)
 
 
 class MultiWorkerDataIterator(object):
@@ -813,7 +836,10 @@ class MultiWorkerDataset(object):
         worker_input = input_ops.auto_shard_dataset(
             worker_input, len(worker_device_map), i)
         self._datasets[worker] = PerDeviceDataset(
-            worker_input, worker_devices, prefetch_on_device=prefetch_on_device)
+            worker_input,
+            worker_devices,
+            source_device=worker,
+            prefetch_on_device=prefetch_on_device)
 
   def make_one_shot_iterator(self):
     iterators = {}
diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index 15a85a28f5..002d61f46e 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -349,7 +349,11 @@ class PerDeviceDatasetTest(test.TestCase):
   def _test_iterator_no_prefetch(self, devices, dataset, expected_values):
     per_device_dataset = values.PerDeviceDataset(
         dataset, devices, prefetch_on_device=False)
-    iterator = per_device_dataset.make_one_shot_iterator()
+    if context.executing_eagerly():
+      iterator = per_device_dataset.make_one_shot_iterator()
+    else:
+      iterator = per_device_dataset.make_initializable_iterator()
+      self.evaluate([iterator.initializer])
 
     for expected_value in expected_values:
       next_element = iterator.get_next()
@@ -366,20 +370,14 @@ class PerDeviceDatasetTest(test.TestCase):
     if not context.executing_eagerly():
       per_device_dataset = values.PerDeviceDataset(
           dataset, devices, prefetch_on_device=True)
-      iterator = per_device_dataset.make_one_shot_iterator()
+      iterator = per_device_dataset.make_initializable_iterator()
+      self.evaluate([iterator.initializer])
 
-      # With prefetching, we cannot guarantee which input ends up on which
-      # device, so we verify that the complete set seen on all devices is
-      # correct, and equal numbers are distributed to each device.
-      combined_actual = []
-      combined_expected = []
       for expected_value in expected_values:
         next_element = iterator.get_next()
-        combined_actual.extend(self.evaluate([
-            values.select_device(d, next_element) for d in devices]))
-        combined_expected.extend(expected_value)
-
-      self.assertEqual(set(combined_expected), set(combined_actual))
+        computed_value = self.evaluate(
+            [values.select_device(d, next_element) for d in devices])
+        self.assertEqual(expected_value, computed_value)
 
       with self.assertRaises(errors.OutOfRangeError):
         next_element = iterator.get_next()
-- 
GitLab


From 6666516f390f125ed70ddbd4e6f89b83d953c408 Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Tue, 25 Sep 2018 20:35:05 -0700
Subject: [PATCH 0712/1357] [XLA] In HloEvaluator, fix an issue where the
 return type and native type are assumed to be the same for HandleImag and
 HandleReal, when in fact they should be float and complex64 (or float for
 HandleReal's case), respectively.

PiperOrigin-RevId: 214548051
---
 .../compiler/xla/service/hlo_evaluator.cc     | 55 +++++++++++++++++++
 .../compiler/xla/service/hlo_evaluator.h      |  4 ++
 .../xla/service/hlo_evaluator_typed_visitor.h | 18 +-----
 3 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index b91b2406e2..d7c39b2778 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -496,6 +496,61 @@ Status HloEvaluator::HandleIsFinite(HloInstruction* is_finite) {
   return Status::OK();
 }
 
+Status HloEvaluator::HandleReal(HloInstruction* real) {
+  auto operand = real->operand(0);
+  switch (operand->shape().element_type()) {
+    case BF16: {
+      auto result_or = ElementWiseUnaryOpImpl<bfloat16, bfloat16>(
+          real, [](bfloat16 elem_operand) { return elem_operand; },
+          GetEvaluatedLiteralFor(operand));
+      TF_ASSIGN_OR_RETURN(evaluated_[real], std::move(result_or));
+      break;
+    }
+    case C64: {
+      auto result_or = ElementWiseUnaryOpImpl<float, complex64>(
+          real, [](complex64 elem_operand) { return std::real(elem_operand); },
+          GetEvaluatedLiteralFor(operand));
+      TF_ASSIGN_OR_RETURN(evaluated_[real], std::move(result_or));
+      break;
+    }
+    case F16: {
+      auto result_or = ElementWiseUnaryOpImpl<Eigen::half, Eigen::half>(
+          real, [](Eigen::half elem_operand) { return elem_operand; },
+          GetEvaluatedLiteralFor(operand));
+      TF_ASSIGN_OR_RETURN(evaluated_[real], std::move(result_or));
+      break;
+    }
+    case F32: {
+      auto result_or = ElementWiseUnaryOpImpl<float, float>(
+          real, [](float elem_operand) { return elem_operand; },
+          GetEvaluatedLiteralFor(operand));
+      TF_ASSIGN_OR_RETURN(evaluated_[real], std::move(result_or));
+      break;
+    }
+    case F64: {
+      auto result_or = ElementWiseUnaryOpImpl<double, double>(
+          real, [](double elem_operand) { return elem_operand; },
+          GetEvaluatedLiteralFor(operand));
+      TF_ASSIGN_OR_RETURN(evaluated_[real], std::move(result_or));
+      break;
+    }
+    default:
+      LOG(FATAL) << "HandleReal: unknown/unhandled primitive type: "
+                 << PrimitiveType_Name(operand->shape().element_type());
+  }
+
+  return Status::OK();
+}
+
+Status HloEvaluator::HandleImag(HloInstruction* imag) {
+  auto result_or = ElementWiseUnaryOpImpl<float, complex64>(
+      imag, [](complex64 elem_operand) { return std::imag(elem_operand); },
+      GetEvaluatedLiteralFor(imag->operand(0)));
+
+  TF_ASSIGN_OR_RETURN(evaluated_[imag], std::move(result_or));
+  return Status::OK();
+}
+
 Status HloEvaluator::HandleCompare(HloInstruction* compare) {
   HloOpcode opcode = compare->opcode();
   auto lhs = compare->operand(0);
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 21e676d671..6c2662ebae 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -184,6 +184,10 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
 
   Status HandleSort(HloInstruction* sort) override;
 
+  Status HandleReal(HloInstruction* real) override;
+
+  Status HandleImag(HloInstruction* imag) override;
+
   Status HandleReduce(HloInstruction* reduce) override;
 
   // Returns the already-evaluated literal result for the instruction.
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index 04cdc6901c..b2d12c94b8 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -89,6 +89,8 @@ bool SafeLess(const NativeT& a, const NativeT& b) {
 // to this rule, notably:
 // - HandleCompare and HandleIsFinite: where the resulting literal type is
 //   always boolean.
+// - HandleImag and HandleReal: where the resulting literal type is always float
+//   and the operand is always complex, or real in the case of HandleReal.
 // These operations are handled outside of the parent HloEvaluator handlers
 // instead of from within TypedVisitor.
 //
@@ -329,14 +331,6 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     return HandleFloor<ReturnT>(floor);
   }
 
-  Status HandleImag(HloInstruction* imag) override {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[imag],
-                        ElementWiseUnaryOp(imag, [](ElementwiseT elem_operand) {
-                          return std::imag(elem_operand);
-                        }));
-    return Status::OK();
-  }
-
   Status HandleLog(HloInstruction* log) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[log],
                         ElementWiseUnaryOp(log, [](ElementwiseT elem_operand) {
@@ -684,14 +678,6 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
-  Status HandleReal(HloInstruction* real) override {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[real],
-                        ElementWiseUnaryOp(real, [](ElementwiseT elem_operand) {
-                          return std::real(elem_operand);
-                        }));
-    return Status::OK();
-  }
-
   template <typename NativeT, typename std::enable_if<std::is_floating_point<
                                   NativeT>::value>::type* = nullptr>
   Status HandleRemainder(HloInstruction* remainder) {
-- 
GitLab


From 7c2341501a583ca625c976f118090e495cdcbe07 Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 04:44:12 +0000
Subject: [PATCH 0713/1357] Find NCCL2 debians in Tensorflow configure

---
 configure.py                        | 136 +++++++++++++++++++---------
 third_party/nccl/nccl_configure.bzl |  14 ++-
 third_party/nccl/system.BUILD.tpl   |   4 +-
 3 files changed, 105 insertions(+), 49 deletions(-)

diff --git a/configure.py b/configure.py
index f0b9fada5e..9fd2dc2630 100644
--- a/configure.py
+++ b/configure.py
@@ -54,6 +54,12 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
 _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
 _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
 
+NCCL_LIB_PATHS = [
+  "lib64/",
+  "lib/powerpc64le-linux-gnu/",
+  "lib/x86_64-linux-gnu/",
+  ""
+]
 
 class UserInputError(Exception):
   pass
@@ -1085,7 +1091,7 @@ def set_tf_tensorrt_install_path(environ_cp):
 
 
 def set_tf_nccl_install_path(environ_cp):
-  """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION.
+  """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION.
 
   Args:
     environ_cp: copy of the os.environ.
@@ -1111,46 +1117,98 @@ def set_tf_nccl_install_path(environ_cp):
     if tf_nccl_version == '1':
       break  # No need to get install path, NCCL 1 is a GitHub repo.
 
-    # TODO(csigg): Look with ldconfig first if we can find the library in paths
+    # Look with ldconfig first if we can find the library in paths
     # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
     # include directory. This is where the NCCL .deb packages install them.
-    # Then ask the user if we should use that. Instead of a single
-    # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to
-    # nccl_configure.bzl
-    default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
-    ask_nccl_path = (r'Please specify the location where NCCL %s library is '
+
+    # First check to see if NCCL is in the ldconfig.
+    # If its found, use that location.
+    if is_linux():
+      ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
+      nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p'])
+      nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)',
+                                           nccl2_path_from_ldconfig)
+    if nccl2_path_from_ldconfig:
+      nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1)
+      if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)):
+        nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig)
+        print('NCCL libraries found in ' + nccl2_path_from_ldconfig)
+        
+        # Check if this is the main system lib location
+        if re.search('.*linux-gnu', nccl_install_path):
+          trunc_nccl_install_path = "/usr"
+          print("This looks like a system path.")
+        else:
+          trunc_nccl_install_path = nccl_install_path + "/.."
+  
+        # Look for header
+        nccl_hdr_path = trunc_nccl_install_path + "/include"
+        print("Assuming NCCL header path is " + nccl_hdr_path)
+        if os.path.exists(nccl_hdr_path + "/nccl.h"):
+          # Set NCCL_INSTALL_PATH
+          environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
+          write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
+
+          # Set NCCL_HDR_PATH
+          environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path
+          write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path)
+          break
+        else:
+          print('The header for NCCL2 cannot be found. Please install the libnccl-dev package.')
+      else:
+          print('NCCL2 is listed by ldconfig but the library is not found. ' 
+                'Your ldconfig is out of date. Please run sudo ldconfig.')
+    else:
+      # NCCL is not found in ldconfig. Ask the user for the location.
+      default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
+      ask_nccl_path = (r'Please specify the location where NCCL %s library is '
                      'installed. Refer to README.md for more details. [Default '
                      'is %s]:') % (tf_nccl_version, default_nccl_path)
-    nccl_install_path = get_from_env_or_user_or_default(
+      nccl_install_path = get_from_env_or_user_or_default(
         environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path)
 
-    # Result returned from "read" will be used unexpanded. That make "~"
-    # unusable. Going through one more level of expansion to handle that.
-    nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
-    if is_windows() or is_cygwin():
-      nccl_install_path = cygpath(nccl_install_path)
-
-    if is_windows():
-      nccl_lib_path = 'lib/x64/nccl.lib'
-    elif is_linux():
-      nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version
-    elif is_macos():
-      nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
-
-    nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
-    nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h')
-    if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
-      # Set NCCL_INSTALL_PATH
-      environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
-      write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
-      break
-
-    # Reset and Retry
-    print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
-          'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
+      # Result returned from "read" will be used unexpanded. That make "~"
+      # unusable. Going through one more level of expansion to handle that.
+      nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
+      if is_windows() or is_cygwin():
+        nccl_install_path = cygpath(nccl_install_path)
+
+      if is_windows():
+        nccl_lib_path = 'lib/x64/nccl.lib'
+      elif is_linux():
+        nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version
+        nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename)
+        if not os.path.exists(nccl_lpath):
+          for relative_path in NCCL_LIB_PATHS:
+            path = '%s/%s%s' % (nccl_install_path, relative_path, nccl_lib_filename)
+            if os.path.exists(path):
+              print("NCCL found at " + path)
+              nccl_lib_path = path
+              break
+        else:
+          nccl_lib_path = nccl_lpath
+      elif is_macos():
+        nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
+
+      nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
+      nccl_hdr_path = os.path.join(os.path.dirname(nccl_lib_path), '../include/nccl.h')
+      print("Assuming NCCL header path is "+nccl_hdr_path)
+      if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
+        # Set NCCL_INSTALL_PATH
+        environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path)
+        write_action_env_to_bazelrc('NCCL_INSTALL_PATH', os.path.dirname(nccl_lib_path))
+
+        # Set NCCL_HDR_PATH
+        environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path)
+        write_action_env_to_bazelrc('NCCL_HDR_PATH', os.path.dirname(nccl_hdr_path))
+        break
+
+      # Reset and Retry
+      print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
+            'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
                                               nccl_hdr_path))
 
-    environ_cp['TF_NCCL_VERSION'] = ''
+      environ_cp['TF_NCCL_VERSION'] = ''
   else:
     raise UserInputError('Invalid TF_NCCL setting was provided %d '
                          'times in a row. Assuming to be a scripting mistake.' %
@@ -1401,20 +1459,10 @@ def set_grpc_build_flags():
 
 def set_system_libs_flag(environ_cp):
   syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
+  syslibs = ','.join(sorted(syslibs.split(',')))
   if syslibs and syslibs != '':
-    if ',' in syslibs:
-      syslibs = ','.join(sorted(syslibs.split(',')))
-    else:
-      syslibs = ','.join(sorted(syslibs.split()))
     write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
 
-  if 'PREFIX' in environ_cp:
-    write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
-  if 'LIBDIR' in environ_cp:
-    write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
-  if 'INCLUDEDIR' in environ_cp:
-    write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
-
 
 def set_windows_build_flags(environ_cp):
   """Set Windows specific build options."""
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index ce9447096e..0713b36724 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -5,6 +5,7 @@
 
   * `TF_NCCL_VERSION`: The NCCL version.
   * `NCCL_INSTALL_PATH`: The installation path of the NCCL library.
+  * `NCCL_HDR_PATH`: The installation path of the NCCL header files.
 """
 
 load(
@@ -15,6 +16,7 @@ load(
 )
 
 _NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_NCCL_HDR_PATH = "NCCL_HDR_PATH"
 _TF_NCCL_VERSION = "TF_NCCL_VERSION"
 _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
 
@@ -68,7 +70,7 @@ def _find_nccl_header(repository_ctx, nccl_install_path):
   return header_path
 
 
-def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version):
+def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
   """Checks whether the header file matches the specified version of NCCL.
 
   Args:
@@ -79,7 +81,9 @@ def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version):
   Returns:
     A string containing the library version of NCCL.
   """
-  header_path = _find_nccl_header(repository_ctx, nccl_install_path)
+  header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
+  if not header_path.exists:
+    header_path = _find_nccl_header(repository_ctx, nccl_install_path)
   header_dir = str(header_path.realpath.dirname)
   major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
                                    _DEFINE_NCCL_MAJOR)
@@ -109,6 +113,7 @@ def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
   """
   lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
                                                            nccl_version))
+
   if not lib_path.exists:
     auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
   return lib_path
@@ -138,10 +143,12 @@ def _nccl_configure_impl(repository_ctx):
   else:
     # Create target for locally installed NCCL.
     nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
-    _check_nccl_version(repository_ctx, nccl_install_path, nccl_version)
+    nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
+    _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
     repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
         "%{version}": nccl_version,
         "%{install_path}": nccl_install_path,
+        "%{hdr_path}": nccl_hdr_path,
     })
 
 
@@ -149,6 +156,7 @@ nccl_configure = repository_rule(
     implementation=_nccl_configure_impl,
     environ=[
         _NCCL_INSTALL_PATH,
+        _NCCL_HDR_PATH,
         _TF_NCCL_VERSION,
     ],
 )
diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl
index 7ca835dedf..a07f54955f 100644
--- a/third_party/nccl/system.BUILD.tpl
+++ b/third_party/nccl/system.BUILD.tpl
@@ -20,7 +20,7 @@ genrule(
     "libnccl.so.%{version}",
     "nccl.h",
   ],
-  cmd = """cp "%{install_path}/include/nccl.h" "$(@D)/nccl.h" &&
-           cp "%{install_path}/lib/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
+  cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" &&
+           cp "%{install_path}/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
 )
 
-- 
GitLab


From f2b17b22e12bd743b66945070f338f70b5fa3332 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 21:54:20 -0700
Subject: [PATCH 0714/1357] Allow subslicing Tensors with a single dimension.

PiperOrigin-RevId: 214553359
---
 tensorflow/core/framework/tensor.cc      | 2 +-
 tensorflow/core/framework/tensor.h       | 2 +-
 tensorflow/core/framework/tensor_test.cc | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 3df677675e..1dea6da911 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -813,7 +813,7 @@ Tensor Tensor::Slice(int64 start, int64 limit) const {
 }
 
 Tensor Tensor::SubSlice(int64 index) const {
-  CHECK_GE(dims(), 2);  // Crash ok.
+  CHECK_GE(dims(), 1);  // Crash ok.
   CHECK_LE(0, index);   // Crash ok.
   int64 dim0_size = shape_.dim_size(0);
   CHECK_LE(index, dim0_size);  // Crash ok.
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index 8a0c70fef2..d0f9eb56e2 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -219,7 +219,7 @@ class Tensor {
   /// must check the returned tensor's alignment before calling certain
   /// methods that have alignment requirement (e.g., `flat()`, `tensor()`).
   ///
-  /// REQUIRES: `dims()` >= 2
+  /// REQUIRES: `dims()` >= 1
   /// REQUIRES: `0 <= dim0_start < dim_size(0)`
   Tensor SubSlice(int64 index) const;
 
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index 0bfa53e6c5..c596604143 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -1246,6 +1246,9 @@ TEST(Tensor, SubSlice_Basic) {
         EXPECT_EQ(&tx(5, j, k), &ty(j, k));
       }
     }
+    Tensor z = y.SubSlice(3).SubSlice(31);
+    auto tz = z.unaligned_flat<float>();
+    EXPECT_EQ(*tz.data(), 5.0);
   }
   {
     // Test unaligned access via a SubSlice.
-- 
GitLab


From 96eec07af06f4dfc75cee57b74ba4b5347619634 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 26 Sep 2018 13:04:46 +0800
Subject: [PATCH 0715/1357] Re-add compat module for leaky_relu implementation.

---
 tensorflow/python/ops/nn_ops.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 3f64f0af9a..78e000e458 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -22,6 +22,7 @@ import numbers
 
 import numpy as np
 
+from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
-- 
GitLab


From d59678448469ca134875e062f7f8d6d77942af4e Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 05:19:10 +0000
Subject: [PATCH 0716/1357] fix unintential removal of set_system_libs_flag

---
 configure.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 9fd2dc2630..3791ead3ed 100644
--- a/configure.py
+++ b/configure.py
@@ -1459,10 +1459,20 @@ def set_grpc_build_flags():
 
 def set_system_libs_flag(environ_cp):
   syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
-  syslibs = ','.join(sorted(syslibs.split(',')))
   if syslibs and syslibs != '':
+    if ',' in syslibs:
+      syslibs = ','.join(sorted(syslibs.split(',')))
+    else:
+      syslibs = ','.join(sorted(syslibs.split()))
     write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
 
+  if 'PREFIX' in environ_cp:
+    write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
+  if 'LIBDIR' in environ_cp:
+    write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
+  if 'INCLUDEDIR' in environ_cp:
+write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
+
 
 def set_windows_build_flags(environ_cp):
   """Set Windows specific build options."""
-- 
GitLab


From 1668d28ca3558f3bc4fcf94752799712211f219e Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 05:22:04 +0000
Subject: [PATCH 0717/1357] fix in last line of set_system_lib_flag

---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 3791ead3ed..b1ab55b657 100644
--- a/configure.py
+++ b/configure.py
@@ -1471,7 +1471,7 @@ def set_system_libs_flag(environ_cp):
   if 'LIBDIR' in environ_cp:
     write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
   if 'INCLUDEDIR' in environ_cp:
-write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
+    write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
 
 
 def set_windows_build_flags(environ_cp):
-- 
GitLab


From bd2524f16f3722cce2360ec5f7122c6b6f1ead49 Mon Sep 17 00:00:00 2001
From: Koan-Sin Tan <koansin.tan@gmail.com>
Date: Wed, 26 Sep 2018 13:23:14 +0800
Subject: [PATCH 0718/1357] fix unbalanced delimiter in benchmark_model doc

as reported in https://github.com/tensorflow/tensorflow/issues/22499,
there is unbalanced delimiter `"`
---
 tensorflow/tools/benchmark/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/benchmark/README.md b/tensorflow/tools/benchmark/README.md
index e64af2bfe1..dee1a20f3f 100644
--- a/tensorflow/tools/benchmark/README.md
+++ b/tensorflow/tools/benchmark/README.md
@@ -32,7 +32,7 @@ adb push bazel-bin/tensorflow/tools/benchmark/benchmark_model /data/local/tmp
 
 (4) Run the benchmark. For example:
 ```
-adb shell "/data/local/tmp/benchmark_model \
+adb shell /data/local/tmp/benchmark_model \
   --graph=/data/local/tmp/tensorflow_inception_graph.pb \
   --input_layer="input:0" \
   --input_layer_shape="1,224,224,3" \
-- 
GitLab


From 09bf8eb99cd76c506dcd2a0e8c8e893f7f3916b1 Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 05:26:54 +0000
Subject: [PATCH 0719/1357] white space removal

---
 third_party/nccl/nccl_configure.bzl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index 0713b36724..d78fe8f3aa 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -113,7 +113,6 @@ def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
   """
   lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
                                                            nccl_version))
-
   if not lib_path.exists:
     auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
   return lib_path
-- 
GitLab


From c63d21b0bfc534b6377b332e9d2ba2abbdb7e0eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Sep 2018 22:57:54 -0700
Subject: [PATCH 0720/1357] Adds a build flag to enable MKL (mkl_enabled=true).

PiperOrigin-RevId: 214557082
---
 tensorflow/contrib/cmake/CMakeLists.txt       |  2 +-
 ...direct_session_with_tracking_alloc_test.cc |  8 ++--
 .../common_runtime/mkl_cpu_allocator_test.cc  |  4 +-
 .../core/common_runtime/threadpool_device.cc  |  5 ++-
 tensorflow/core/graph/mkl_layout_pass.cc      |  4 ++
 tensorflow/core/graph/mkl_layout_pass_test.cc |  4 +-
 .../core/graph/mkl_tfconversion_pass.cc       |  2 +
 .../core/graph/mkl_tfconversion_pass_test.cc  |  4 +-
 .../core/kernels/batch_matmul_op_complex.cc   | 10 +++--
 .../core/kernels/batch_matmul_op_real.cc      |  9 +++-
 tensorflow/core/kernels/cwise_ops_common.cc   |  4 +-
 .../core/kernels/gather_nd_op_cpu_impl.h      |  6 +--
 tensorflow/core/kernels/matmul_op.cc          |  8 ++--
 .../core/kernels/mkl_batch_matmul_op.cc       |  2 +
 tensorflow/core/kernels/mkl_matmul_op.cc      |  6 ++-
 tensorflow/core/kernels/slice_op.cc           | 14 +++----
 tensorflow/core/kernels/transpose_op.cc       | 10 ++---
 tensorflow/core/util/port.cc                  |  4 +-
 tensorflow/tensorflow.bzl                     |  3 ++
 third_party/mkl/BUILD                         | 23 +++++++----
 third_party/mkl/build_defs.bzl                | 41 ++++++++++++++-----
 third_party/mkl_dnn/BUILD                     |  6 +--
 third_party/mkl_dnn/build_defs.bzl            |  2 +-
 tools/bazel.rc                                |  5 ++-
 24 files changed, 117 insertions(+), 69 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index ebcabb4223..c6d6f04168 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -353,7 +353,7 @@ endif()
 
 # MKL Support
 if (tensorflow_ENABLE_MKL_SUPPORT)
-  add_definitions(-DINTEL_MKL -DEIGEN_USE_VML)
+  add_definitions(-DINTEL_MKL -DEIGEN_USE_VML -DENABLE_MKL)
   include(mkl)
   list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkl_STATIC_LIBRARIES})
   list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkl_copy_shared_to_destination)
diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
index 2ed4f69f90..efd6185f8b 100644
--- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
@@ -108,7 +108,7 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) {
         EXPECT_EQ(2, shape.dim(0).size());
         EXPECT_EQ(1, shape.dim(1).size());
         if (node->name() == y->name()) {
-#ifdef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
           // if MKL is used, it goes through various additional
           // graph rewrite pass. In TF, everytime a graph pass
           // happens, "constant" nodes are allocated
@@ -120,13 +120,13 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) {
           EXPECT_EQ(29, cm->AllocationId(node, 0));
 #else
           EXPECT_EQ(21, cm->AllocationId(node, 0));
-#endif
+#endif  // INTEL_MKL && ENABLE_MKL
         } else {
-#ifdef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
           EXPECT_EQ(30, cm->AllocationId(node, 0));
 #else
           EXPECT_EQ(22, cm->AllocationId(node, 0));
-#endif
+#endif  // INTEL_MKL && ENABLE_MKL
         }
       }
       EXPECT_LE(0, cm->MaxExecutionTime(node));
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc
index a67411cd2e..e08ab57638 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
 
 #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"
 
@@ -50,4 +50,4 @@ TEST(MKLBFCAllocatorTest, TestMaxLimit) {
 
 }  // namespace tensorflow
 
-#endif  // INTEL_MKL
+#endif  // INTEL_MKL && ENABLE_MKL
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 0fbc20b34b..8587d1783a 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -113,8 +113,11 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
   }
 };
 
+#ifdef ENABLE_MKL
 REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory);
+#endif  // ENABLE_MKL
+
 }  // namespace
-#endif
+#endif  // INTEL_MKL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index f5b0105862..37b88f1728 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -977,7 +977,9 @@ std::vector<MklLayoutRewritePass::ContextInfo*> MklLayoutRewritePass::cinfo_;
 // nodes. Do not change the ordering of the Mkl passes.
 const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
     OptimizationPassRegistry::POST_PARTITIONING;
+#ifdef ENABLE_MKL
 REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
+#endif  // ENABLE_MKL
 
 //////////////////////////////////////////////////////////////////////////
 //           Helper functions for creating new node
@@ -3150,7 +3152,9 @@ MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
 // nodes. Do not change the ordering of the Mkl passes.
 const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
     OptimizationPassRegistry::POST_PARTITIONING;
+#ifdef ENABLE_MKL
 REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
+#endif  // ENABLE_MKL
 
 //////////////////////////////////////////////////////////////////////////
 //           Helper functions for creating new node
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index e8bac847e5..f42a4ee98b 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
 
 #include "tensorflow/core/graph/mkl_layout_pass.h"
 #include "tensorflow/core/graph/mkl_graph_util.h"
@@ -3586,4 +3586,4 @@ BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace tensorflow
 
-#endif /* INTEL_MKL */
+#endif  // INTEL_MKL && ENABLE_MKL
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index b67a321fc1..8c5ffd71a3 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -133,7 +133,9 @@ class MklToTfConversionPass : public GraphOptimizationPass {
 // complete picture of inputs and outputs of the nodes in the graphs.
 const OptimizationPassRegistry::Grouping kMklTfConvPassGroup =
     OptimizationPassRegistry::POST_PARTITIONING;
+#ifdef ENABLE_MKL
 REGISTER_OPTIMIZATION(kMklTfConvPassGroup, 2, MklToTfConversionPass);
+#endif  // ENABLE_MKL
 
 Status MklToTfConversionPass::InsertConversionNodeOnEdge(
     std::unique_ptr<Graph>* g, Edge* e) {
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc
index ebcb6de551..319437a801 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
 
 #include "tensorflow/core/graph/mkl_tfconversion_pass.h"
 #include "tensorflow/core/graph/mkl_graph_util.h"
@@ -304,4 +304,4 @@ BENCHMARK(BM_RunMklToTfConversionPass)->Arg(1000)->Arg(10000);
 }  // namespace
 }  // namespace tensorflow
 
-#endif /* INTEL_MKL */
+#endif  // INTEL_MKL && ENABLE_MKL
diff --git a/tensorflow/core/kernels/batch_matmul_op_complex.cc b/tensorflow/core/kernels/batch_matmul_op_complex.cc
index 54c45bfe63..f48bd0c318 100644
--- a/tensorflow/core/kernels/batch_matmul_op_complex.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_complex.cc
@@ -17,14 +17,18 @@ limitations under the License.
 
 namespace tensorflow {
 
-#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY)
+// MKL_ML registers its own complex64/128 kernels in mkl_batch_matmul_op.cc
+// if defined(INTEL_MKL) && !defined(INTEL_MKL_DNN_ONLY) && defined(ENABLE_MKL).
+// Anything else (the complement) should register the TF ones.
+// (MKL-DNN doesn't implement these kernels either.)
+#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY) || !defined(ENABLE_MKL)
 TF_CALL_complex64(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_complex128(REGISTER_BATCH_MATMUL_CPU);
-#endif
+#endif  // !INTEL_MKL || INTEL_MKL_DNN_ONLY || !ENABLE_MKL
 
 #if GOOGLE_CUDA
 TF_CALL_complex64(REGISTER_BATCH_MATMUL_GPU);
 TF_CALL_complex128(REGISTER_BATCH_MATMUL_GPU);
-#endif
+#endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc
index 584b507c70..25ae795d8e 100644
--- a/tensorflow/core/kernels/batch_matmul_op_real.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_real.cc
@@ -21,10 +21,15 @@ limitations under the License.
 
 namespace tensorflow {
 
-#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY)
+// MKL_ML registers its own float and double kernels in mkl_batch_matmul_op.cc
+// if defined(INTEL_MKL) && !defined(INTEL_MKL_DNN_ONLY) && defined(ENABLE_MKL).
+// Anything else (the complement) should register the TF ones.
+// (MKL-DNN doesn't implement these kernels either.)
+#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY) || !defined(ENABLE_MKL)
 TF_CALL_float(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_double(REGISTER_BATCH_MATMUL_CPU);
-#endif
+#endif  // !INTEL_MKL || INTEL_MKL_DNN_ONLY || !ENABLE_MKL
+
 TF_CALL_half(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_int32(REGISTER_BATCH_MATMUL_CPU);
 
diff --git a/tensorflow/core/kernels/cwise_ops_common.cc b/tensorflow/core/kernels/cwise_ops_common.cc
index 980edffceb..8ad3b4d1fc 100644
--- a/tensorflow/core/kernels/cwise_ops_common.cc
+++ b/tensorflow/core/kernels/cwise_ops_common.cc
@@ -20,9 +20,9 @@ namespace tensorflow {
 BinaryOpShared::BinaryOpShared(OpKernelConstruction* ctx, DataType out,
                                DataType in)
     : OpKernel(ctx) {
-#ifndef INTEL_MKL
+#if !defined(INTEL_MKL) || !defined(ENABLE_MKL)
   OP_REQUIRES_OK(ctx, ctx->MatchSignature({in, in}, {out}));
-#endif
+#endif  // !INTEL_MKL || !ENABLE_MKL
 }
 
 void BinaryOpShared::SetUnimplementedError(OpKernelContext* ctx) {
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
index 277ee2be02..1c78de253e 100644
--- a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
@@ -114,7 +114,7 @@ struct GatherNdSlice<CPUDevice, T, Index, IXDIM> {
     generator::GatherNdSliceGenerator<T, Index, IXDIM> gather_nd_generator(
         slice_size, Tindices, Tparams, Tout, &error_loc);
 
-#ifdef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
 // Eigen implementation below is not highly performant. gather_nd_generator
 // does not seem to be called in parallel, leading to very poor performance.
 // Additionally, since it uses scalar (Tscratch) to invoke 'generate', it
@@ -126,12 +126,12 @@ struct GatherNdSlice<CPUDevice, T, Index, IXDIM> {
       const Eigen::array<Eigen::DenseIndex, 1> loc{i};
       gather_nd_generator(loc);
     }
-#else  // INTEL_MKL
+#else   // INTEL_MKL && ENABLE_MKL
     Tscratch.device(d) = Tscratch.reshape(reshape_dims)
                              .broadcast(broadcast_dims)
                              .generate(gather_nd_generator)
                              .sum();
-#endif
+#endif  // INTEL_MKL && ENABLE_MKL
 
     // error_loc() returns -1 if there's no out-of-bounds index,
     // otherwise it returns the location of an OOB index in Tindices.
diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc
index 79967aab38..4ad390a411 100644
--- a/tensorflow/core/kernels/matmul_op.cc
+++ b/tensorflow/core/kernels/matmul_op.cc
@@ -578,7 +578,7 @@ struct MatMulFunctor<SYCLDevice, T> {
                               .Label("cublas"),                    \
                           MatMulOp<GPUDevice, T, true /* cublas */>)
 
-#if defined(INTEL_MKL)
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
 
 // MKL does not support half, bfloat16 and int32 types for
 // matrix-multiplication, so register the kernel to use default Eigen based
@@ -606,9 +606,9 @@ TF_CALL_double(REGISTER_CPU);
 TF_CALL_complex64(REGISTER_CPU_EIGEN);
 TF_CALL_complex128(REGISTER_CPU_EIGEN);
 TF_CALL_double(REGISTER_CPU_EIGEN);
-#endif
+#endif  // INTEL_MKL_DNN_ONLY
 
-#else  // INTEL MKL
+#else   // INTEL_MKL && ENABLE_MKL
 TF_CALL_float(REGISTER_CPU);
 TF_CALL_double(REGISTER_CPU);
 TF_CALL_half(REGISTER_CPU);
@@ -616,7 +616,7 @@ TF_CALL_bfloat16(REGISTER_CPU);
 TF_CALL_int32(REGISTER_CPU);
 TF_CALL_complex64(REGISTER_CPU);
 TF_CALL_complex128(REGISTER_CPU);
-#endif
+#endif  // INTEL_MKL && ENABLE_MKL
 
 #if GOOGLE_CUDA
 TF_CALL_float(REGISTER_GPU);
diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
index 0841395dc3..bc135de11e 100644
--- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc
+++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
@@ -223,10 +223,12 @@ class BatchMatMulMkl : public OpKernel {
       Name("BatchMatMul").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
       BatchMatMulMkl<CPUDevice, TYPE>)
 
+#ifdef ENABLE_MKL
 TF_CALL_float(REGISTER_BATCH_MATMUL_MKL);
 TF_CALL_double(REGISTER_BATCH_MATMUL_MKL);
 TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL);
 TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL);
+#endif  // ENABLE_MKL
 
 }  // end namespace tensorflow
 #endif
diff --git a/tensorflow/core/kernels/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl_matmul_op.cc
index 077d62ce32..f4788f4851 100644
--- a/tensorflow/core/kernels/mkl_matmul_op.cc
+++ b/tensorflow/core/kernels/mkl_matmul_op.cc
@@ -217,7 +217,7 @@ class MklMatMulOp : public OpKernel {
                 reinterpret_cast<const MKL_Complex16*>(b), ldb, &beta,
                 reinterpret_cast<MKL_Complex16*>(c), ldc);
   }
-#endif
+#endif  // !INTEL_MKL_DNN_ONLY
 };
 
 #define REGISTER_CPU(T)                                         \
@@ -225,6 +225,7 @@ class MklMatMulOp : public OpKernel {
       Name("MatMul").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       MklMatMulOp<CPUDevice, T, false /* cublas, ignored for CPU */>);
 
+#ifdef ENABLE_MKL
 // TODO(inteltf) Consider template specialization when adding/removing
 // additional types
 TF_CALL_float(REGISTER_CPU);
@@ -233,7 +234,8 @@ TF_CALL_float(REGISTER_CPU);
 TF_CALL_double(REGISTER_CPU);
 TF_CALL_complex64(REGISTER_CPU);
 TF_CALL_complex128(REGISTER_CPU);
-#endif
+#endif  // !INTEL_MKL_DNN_ONLY
+#endif  // ENABLE_MKL
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index 77594479cb..97f77e45b6 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -411,7 +411,7 @@ class MklSliceOp : public OpKernel {
         context->input(0).tensor<T, NDIM>(), indices, sizes);
   }
 };
-#endif
+#endif  // INTEL_MKL
 
 // Forward declarations of the functor specializations for declared in the
 // sharded source files.
@@ -440,18 +440,14 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N);
 #undef DECLARE_CPU_SPEC
 }  // namespace functor
 
-#ifndef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
 #define REGISTER_SLICE(type)                             \
   REGISTER_KERNEL_BUILDER(Name("Slice")                  \
                               .Device(DEVICE_CPU)        \
                               .TypeConstraint<type>("T") \
                               .HostMemory("begin")       \
                               .HostMemory("size"),       \
-                          SliceOp<CPUDevice, type>)
-
-TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
-TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
-#undef REGISTER_SLICE
+                          MklSliceOp<CPUDevice, type>)
 #else
 #define REGISTER_SLICE(type)                             \
   REGISTER_KERNEL_BUILDER(Name("Slice")                  \
@@ -459,12 +455,12 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
                               .TypeConstraint<type>("T") \
                               .HostMemory("begin")       \
                               .HostMemory("size"),       \
-                          MklSliceOp<CPUDevice, type>)
+                          SliceOp<CPUDevice, type>)
+#endif  // INTEL_MKL && ENABLE_MKL
 
 TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
 TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
 #undef REGISTER_SLICE
-#endif  // INTEL_MKL
 
 #if GOOGLE_CUDA
 // Forward declarations of the functor specializations for GPU.
diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc
index 0f0f65c5a3..48e392c070 100644
--- a/tensorflow/core/kernels/transpose_op.cc
+++ b/tensorflow/core/kernels/transpose_op.cc
@@ -218,7 +218,7 @@ Status ConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx,
                                             perm, out);
 }
 
-#if defined(INTEL_MKL)
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
 #define REGISTER(T)                                   \
   REGISTER_KERNEL_BUILDER(Name("Transpose")           \
                               .Device(DEVICE_CPU)     \
@@ -230,11 +230,8 @@ Status ConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx,
                               .TypeConstraint<T>("T") \
                               .HostMemory("perm"),    \
                           MklConjugateTransposeCpuOp);
-TF_CALL_ALL_TYPES(REGISTER);
-#undef REGISTER
-
-#else  // INTEL_MKL
 
+#else  // INTEL_MKL && ENABLE_MKL
 #define REGISTER(T)                                   \
   REGISTER_KERNEL_BUILDER(Name("Transpose")           \
                               .Device(DEVICE_CPU)     \
@@ -246,9 +243,10 @@ TF_CALL_ALL_TYPES(REGISTER);
                               .TypeConstraint<T>("T") \
                               .HostMemory("perm"),    \
                           ConjugateTransposeCpuOp);
+#endif  // INTEL_MKL && ENABLE_MKL
+
 TF_CALL_ALL_TYPES(REGISTER)
 #undef REGISTER
-#endif  // INTEL_MKL
 
 #if GOOGLE_CUDA
 Status TransposeGpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
diff --git a/tensorflow/core/util/port.cc b/tensorflow/core/util/port.cc
index c081ceae57..e01058dff6 100644
--- a/tensorflow/core/util/port.cc
+++ b/tensorflow/core/util/port.cc
@@ -38,10 +38,10 @@ bool CudaSupportsHalfMatMulAndConv() {
 }
 
 bool IsMklEnabled() {
-#ifdef INTEL_MKL
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
   return true;
 #else
   return false;
-#endif
+#endif  // INTEL_MKL && ENABLE_MKL
 }
 }  // end namespace tensorflow
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 7ddaf7806e..d6c75d675c 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -22,6 +22,7 @@ load(
 )
 load(
     "//third_party/mkl:build_defs.bzl",
+    "if_enable_mkl",
     "if_mkl",
     "if_mkl_lnx_x64",
     "if_mkl_ml",
@@ -237,6 +238,7 @@ def tf_copts(android_optimization_level_override = "-O2", is_external = False):
         if_tensorrt(["-DGOOGLE_TENSORRT=1"]) +
         if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) +
         if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
+        if_enable_mkl(["-DENABLE_MKL"]) +
         if_ngraph(["-DINTEL_NGRAPH=1"]) +
         if_mkl_lnx_x64(["-fopenmp"]) +
         if_android_arm(["-mfpu=neon"]) +
@@ -1082,6 +1084,7 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
         ]),
         copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
                  if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
+                 if_enable_mkl(["-DENABLE_MKL"]) +
                  if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
         **kwargs
     )
diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD
index efff7fd51b..15a3e5cfa7 100644
--- a/third_party/mkl/BUILD
+++ b/third_party/mkl/BUILD
@@ -1,26 +1,26 @@
 licenses(["notice"])  # 3-Clause BSD
 
 config_setting(
-    name = "using_mkl",
+    name = "build_with_mkl",
     define_values = {
-        "using_mkl": "true",
+        "build_with_mkl": "true",
     },
     visibility = ["//visibility:public"],
 )
 
 config_setting(
-    name = "using_mkl_ml_only",
+    name = "build_with_mkl_ml_only",
     define_values = {
-        "using_mkl": "true",
-        "using_mkl_ml_only": "true",
+        "build_with_mkl": "true",
+        "build_with_mkl_ml_only": "true",
     },
     visibility = ["//visibility:public"],
 )
 
 config_setting(
-    name = "using_mkl_lnx_x64",
+    name = "build_with_mkl_lnx_x64",
     define_values = {
-        "using_mkl": "true",
+        "build_with_mkl": "true",
     },
     values = {
         "cpu": "k8",
@@ -28,6 +28,15 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "enable_mkl",
+    define_values = {
+        "enable_mkl": "true",
+        "build_with_mkl": "true",
+    },
+    visibility = ["//visibility:public"],
+)
+
 load(
     "//third_party/mkl:build_defs.bzl",
     "if_mkl",
diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl
index b645c0fc5c..bb798e715a 100644
--- a/third_party/mkl/build_defs.bzl
+++ b/third_party/mkl/build_defs.bzl
@@ -1,9 +1,11 @@
 # -*- Python -*-
 """Skylark macros for MKL.
-if_mkl is a conditional to check if MKL is enabled or not.
-if_mkl_ml is a conditional to check if MKL-ML is enabled.
+
+if_mkl is a conditional to check if we are building with MKL.
+if_mkl_ml is a conditional to check if we are building with MKL-ML.
 if_mkl_ml_only is a conditional to check for MKL-ML-only (no MKL-DNN) mode.
 if_mkl_lnx_x64 is a conditional to check for MKL
+if_enable_mkl is a conditional to check if building with MKL and MKL is enabled.
 
 mkl_repository is a repository rule for creating MKL repository rule that can
 be pointed to either a local folder, or download it from the internet.
@@ -24,7 +26,7 @@ def if_mkl(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl:using_mkl")): if_true,
+        str(Label("//third_party/mkl:build_with_mkl")): if_true,
         "//conditions:default": if_false,
     })
 
@@ -40,8 +42,8 @@ def if_mkl_ml(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): if_false,
-        str(Label("//third_party/mkl:using_mkl")): if_true,
+        str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): if_false,
+        str(Label("//third_party/mkl:build_with_mkl")): if_true,
         "//conditions:default": if_false,
     })
 
@@ -56,12 +58,12 @@ def if_mkl_ml_only(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl:using_mkl_ml_only")): if_true,
+        str(Label("//third_party/mkl:build_with_mkl_ml_only")): if_true,
         "//conditions:default": if_false,
     })
 
 def if_mkl_lnx_x64(if_true, if_false = []):
-    """Shorthand to select() on if MKL is on and the target is Linux x86-64.
+    """Shorthand to select() if building with MKL and the target is Linux x86-64.
 
     Args:
       if_true: expression to evaluate if building with MKL is enabled and the
@@ -73,7 +75,24 @@ def if_mkl_lnx_x64(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl:using_mkl_lnx_x64")): if_true,
+        str(Label("//third_party/mkl:build_with_mkl_lnx_x64")): if_true,
+        "//conditions:default": if_false,
+    })
+
+def if_enable_mkl(if_true, if_false = []):
+    """Shorthand to select() if we are building with MKL and MKL is enabled.
+
+    This is only effective when built with MKL.
+
+    Args:
+      if_true: expression to evaluate if building with MKL and MKL is enabled
+      if_false: expression to evaluate if building without MKL or MKL is not enabled.
+
+    Returns:
+      A select evaluating to either if_true or if_false as appropriate.
+    """
+    return select({
+        "//third_party/mkl:enable_mkl": if_true,
         "//conditions:default": if_false,
     })
 
@@ -87,9 +106,9 @@ def mkl_deps():
       inclusion in the deps attribute of rules.
     """
     return select({
-        str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): ["@mkl_dnn"],
-        str(Label("//third_party/mkl:using_mkl_ml_only")): ["//third_party/mkl:intel_binary_blob"],
-        str(Label("//third_party/mkl:using_mkl")): [
+        str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): ["@mkl_dnn"],
+        str(Label("//third_party/mkl:build_with_mkl_ml_only")): ["//third_party/mkl:intel_binary_blob"],
+        str(Label("//third_party/mkl:build_with_mkl")): [
             "//third_party/mkl:intel_binary_blob",
             "@mkl_dnn",
         ],
diff --git a/third_party/mkl_dnn/BUILD b/third_party/mkl_dnn/BUILD
index 3e567fa9fc..58ecda55e6 100644
--- a/third_party/mkl_dnn/BUILD
+++ b/third_party/mkl_dnn/BUILD
@@ -3,10 +3,10 @@ licenses(["notice"])
 exports_files(["LICENSE"])
 
 config_setting(
-    name = "using_mkl_dnn_only",
+    name = "build_with_mkl_dnn_only",
     define_values = {
-        "using_mkl": "true",
-        "using_mkl_dnn_only": "true",
+        "build_with_mkl": "true",
+        "build_with_mkl_dnn_only": "true",
     },
     visibility = ["//visibility:public"],
 )
diff --git a/third_party/mkl_dnn/build_defs.bzl b/third_party/mkl_dnn/build_defs.bzl
index 7ce2a7d9b0..6388f31971 100644
--- a/third_party/mkl_dnn/build_defs.bzl
+++ b/third_party/mkl_dnn/build_defs.bzl
@@ -8,6 +8,6 @@ def if_mkl_open_source_only(if_true, if_false = []):
 
     """
     return select({
-        str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): if_true,
+        str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): if_true,
         "//conditions:default": if_false,
     })
diff --git a/tools/bazel.rc b/tools/bazel.rc
index ccf62629d1..6747c7e795 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -24,12 +24,13 @@ build --define framework_shared_object=true
 # Please note that MKL on MacOS or windows is still not supported.
 # If you would like to use a local MKL instead of downloading, please set the
 # environment variable "TF_MKL_ROOT" every time before build.
-build:mkl --define=using_mkl=true
+build:mkl --define=build_with_mkl=true --define=enable_mkl=true
 build:mkl -c opt
 
 # This config option is used to enable MKL-DNN open source library only,
 # without depending on MKL binary version.
-build:mkl_open_source_only --define=using_mkl_dnn_only=true
+build:mkl_open_source_only --define=build_with_mkl_dnn_only=true 
+build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true
 
 build:download_clang --crosstool_top=@local_config_download_clang//:toolchain
 build:download_clang --define=using_clang=true
-- 
GitLab


From ffa90fc521c6051addd50236872a4afaa45e0a49 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 26 Sep 2018 01:38:55 -0700
Subject: [PATCH 0721/1357] Fixes for building with CUDA on ppc64le.

PiperOrigin-RevId: 214569615
---
 configure.py                        | 188 +++++++++++++++-------------
 third_party/gpus/cuda_configure.bzl |   2 +
 2 files changed, 101 insertions(+), 89 deletions(-)

diff --git a/configure.py b/configure.py
index f0b9fada5e..1064f6a9d4 100644
--- a/configure.py
+++ b/configure.py
@@ -41,7 +41,6 @@ _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
 _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
                           'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION)
-_DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine()
 _TF_OPENCL_VERSION = '1.2'
 _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
 _DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
@@ -54,6 +53,11 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
 _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
 _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
 
+if platform.machine() == 'ppc64le':
+  _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/'
+else:
+  _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine()
+
 
 class UserInputError(Exception):
   pass
@@ -153,14 +157,18 @@ def get_python_path(environ_cp, python_bin_path):
   if environ_cp.get('PYTHONPATH'):
     python_paths = environ_cp.get('PYTHONPATH').split(':')
   try:
-    library_paths = run_shell(
-        [python_bin_path, '-c',
-         'import site; print("\\n".join(site.getsitepackages()))']).split('\n')
+    library_paths = run_shell([
+        python_bin_path, '-c',
+        'import site; print("\\n".join(site.getsitepackages()))'
+    ]).split('\n')
   except subprocess.CalledProcessError:
-    library_paths = [run_shell(
-        [python_bin_path, '-c',
-         'from distutils.sysconfig import get_python_lib;'
-         'print(get_python_lib())'])]
+    library_paths = [
+        run_shell([
+            python_bin_path, '-c',
+            'from distutils.sysconfig import get_python_lib;'
+            'print(get_python_lib())'
+        ])
+    ]
 
   all_paths = set(python_paths + library_paths)
 
@@ -187,8 +195,7 @@ def setup_python(environ_cp):
         environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path,
         default_python_bin_path)
     # Check if the path is valid
-    if os.path.isfile(python_bin_path) and os.access(
-        python_bin_path, os.X_OK):
+    if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK):
       break
     elif not os.path.exists(python_bin_path):
       print('Invalid python path: %s cannot be found.' % python_bin_path)
@@ -230,8 +237,9 @@ def setup_python(environ_cp):
   environ_cp['PYTHON_BIN_PATH'] = python_bin_path
 
   # Write tools/python_bin_path.sh
-  with open(os.path.join(
-      _TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), 'w') as f:
+  with open(
+      os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'),
+      'w') as f:
     f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path)
 
 
@@ -250,7 +258,7 @@ def reset_tf_configure_bazelrc(workspace_path):
         continue
       f.write('%s\n' % l)
     if is_windows():
-      tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/")
+      tf_bazelrc_path = _TF_BAZELRC.replace('\\', '/')
     else:
       tf_bazelrc_path = _TF_BAZELRC
     f.write('import %s\n' % tf_bazelrc_path)
@@ -261,8 +269,8 @@ def cleanup_makefile():
 
   These files could interfere with Bazel parsing.
   """
-  makefile_download_dir = os.path.join(
-      _TF_WORKSPACE_ROOT, 'tensorflow', 'contrib', 'makefile', 'downloads')
+  makefile_download_dir = os.path.join(_TF_WORKSPACE_ROOT, 'tensorflow',
+                                       'contrib', 'makefile', 'downloads')
   if os.path.isdir(makefile_download_dir):
     for root, _, filenames in os.walk(makefile_download_dir):
       for f in filenames:
@@ -330,9 +338,8 @@ def get_var(environ_cp,
           'Environment variable %s must be set as a boolean indicator.\n'
           'The following are accepted as TRUE : %s.\n'
           'The following are accepted as FALSE: %s.\n'
-          'Current value is %s.' % (
-              var_name, ', '.join(true_strings), ', '.join(false_strings),
-              var))
+          'Current value is %s.' % (var_name, ', '.join(true_strings),
+                                    ', '.join(false_strings), var))
 
   while var is None:
     user_input_origin = get_input(question)
@@ -355,8 +362,12 @@ def get_var(environ_cp,
   return var
 
 
-def set_build_var(environ_cp, var_name, query_item, option_name,
-                  enabled_by_default, bazel_config_name=None):
+def set_build_var(environ_cp,
+                  var_name,
+                  query_item,
+                  option_name,
+                  enabled_by_default,
+                  bazel_config_name=None):
   """Set if query_item will be enabled for the build.
 
   Ask user if query_item will be enabled. Default is used if no input is given.
@@ -379,8 +390,8 @@ def set_build_var(environ_cp, var_name, query_item, option_name,
   elif bazel_config_name is not None:
     # TODO(mikecase): Migrate all users of configure.py to use --config Bazel
     # options and not to set build configs through environment variables.
-    write_to_bazelrc('build:%s --define %s=true'
-                     % (bazel_config_name, option_name))
+    write_to_bazelrc(
+        'build:%s --define %s=true' % (bazel_config_name, option_name))
 
 
 def set_action_env_var(environ_cp,
@@ -447,7 +458,8 @@ def check_bazel_version(min_version):
   if which('bazel') is None:
     print('Cannot find bazel. Please install bazel.')
     sys.exit(0)
-  curr_version = run_shell(['bazel', '--batch', '--bazelrc=/dev/null', 'version'])
+  curr_version = run_shell(
+      ['bazel', '--batch', '--bazelrc=/dev/null', 'version'])
 
   for line in curr_version.split('\n'):
     if 'Build label: ' in line:
@@ -499,6 +511,7 @@ def set_cc_opt_flags(environ_cp):
     write_to_bazelrc('build:opt --host_copt=-march=native')
   write_to_bazelrc('build:opt --define with_default_optimizations=true')
 
+
 def set_tf_cuda_clang(environ_cp):
   """set TF_CUDA_CLANG action_env.
 
@@ -581,16 +594,14 @@ def set_clang_cuda_compiler_path(environ_cp):
                               clang_cuda_compiler_path)
 
 
-def prompt_loop_or_load_from_env(
-    environ_cp,
-    var_name,
-    var_default,
-    ask_for_var,
-    check_success,
-    error_msg,
-    suppress_default_error=False,
-    n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS
-):
+def prompt_loop_or_load_from_env(environ_cp,
+                                 var_name,
+                                 var_default,
+                                 ask_for_var,
+                                 check_success,
+                                 error_msg,
+                                 suppress_default_error=False,
+                                 n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS):
   """Loop over user prompts for an ENV param until receiving a valid response.
 
   For the env param var_name, read from the environment or verify user input
@@ -629,9 +640,7 @@ def prompt_loop_or_load_from_env(
   )
 
   for _ in range(n_ask_attempts):
-    val = get_from_env_or_user_or_default(environ_cp,
-                                          var_name,
-                                          full_query,
+    val = get_from_env_or_user_or_default(environ_cp, var_name, full_query,
                                           default)
     if check_success(val):
       break
@@ -639,9 +648,9 @@ def prompt_loop_or_load_from_env(
       print(error_msg % val)
     environ_cp[var_name] = ''
   else:
-    raise UserInputError('Invalid %s setting was provided %d times in a row. '
-                         'Assuming to be a scripting mistake.' %
-                         (var_name, n_ask_attempts))
+    raise UserInputError(
+        'Invalid %s setting was provided %d times in a row. '
+        'Assuming to be a scripting mistake.' % (var_name, n_ask_attempts))
 
   environ_cp[var_name] = val
   return val
@@ -650,8 +659,8 @@ def prompt_loop_or_load_from_env(
 def create_android_ndk_rule(environ_cp):
   """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule."""
   if is_windows() or is_cygwin():
-    default_ndk_path = cygpath('%s/Android/Sdk/ndk-bundle' %
-                               environ_cp['APPDATA'])
+    default_ndk_path = cygpath(
+        '%s/Android/Sdk/ndk-bundle' % environ_cp['APPDATA'])
   elif is_macos():
     default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME']
   else:
@@ -668,8 +677,7 @@ def create_android_ndk_rule(environ_cp):
       ask_for_var='Please specify the home path of the Android NDK to use.',
       check_success=valid_ndk_path,
       error_msg=('The path %s or its child file "source.properties" '
-                 'does not exist.')
-  )
+                 'does not exist.'))
   write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path)
   write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL',
                               check_ndk_level(android_ndk_home_path))
@@ -703,9 +711,9 @@ def create_android_sdk_rule(environ_cp):
   api_levels = [x.replace('android-', '') for x in api_levels]
 
   def valid_api_level(api_level):
-    return os.path.exists(os.path.join(android_sdk_home_path,
-                                       'platforms',
-                                       'android-' + api_level))
+    return os.path.exists(
+        os.path.join(android_sdk_home_path, 'platforms',
+                     'android-' + api_level))
 
   android_api_level = prompt_loop_or_load_from_env(
       environ_cp,
@@ -720,9 +728,8 @@ def create_android_sdk_rule(environ_cp):
   versions = sorted(os.listdir(build_tools))
 
   def valid_build_tools(version):
-    return os.path.exists(os.path.join(android_sdk_home_path,
-                                       'build-tools',
-                                       version))
+    return os.path.exists(
+        os.path.join(android_sdk_home_path, 'build-tools', version))
 
   android_build_tools_version = prompt_loop_or_load_from_env(
       environ_cp,
@@ -736,10 +743,8 @@ def create_android_sdk_rule(environ_cp):
 
   write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION',
                               android_build_tools_version)
-  write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL',
-                              android_api_level)
-  write_action_env_to_bazelrc('ANDROID_SDK_HOME',
-                              android_sdk_home_path)
+  write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', android_api_level)
+  write_action_env_to_bazelrc('ANDROID_SDK_HOME', android_sdk_home_path)
 
 
 def check_ndk_level(android_ndk_home_path):
@@ -798,6 +803,7 @@ def reformat_version_sequence(version_str, sequence_count):
   Args:
       version_str: String, the version string.
       sequence_count: int, an integer.
+
   Returns:
       string, reformatted version string.
   """
@@ -841,12 +847,19 @@ def set_tf_cuda_version(environ_cp):
     if is_windows():
       cuda_rt_lib_paths = ['lib/x64/cudart.lib']
     elif is_linux():
-      cuda_rt_lib_paths = ['%s/libcudart.so.%s' % (x, tf_cuda_version)
-                           for x in ['lib64', 'lib/x86_64-linux-gnu']]
+      cuda_rt_lib_paths = [
+          '%s/libcudart.so.%s' % (x, tf_cuda_version) for x in [
+              'lib64',
+              'lib/powerpc64le-linux-gnu',
+              'lib/x86_64-linux-gnu',
+          ]
+      ]
     elif is_macos():
       cuda_rt_lib_paths = ['lib/libcudart.%s.dylib' % tf_cuda_version]
 
-    cuda_toolkit_paths_full = [os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths]
+    cuda_toolkit_paths_full = [
+        os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths
+    ]
     if any([os.path.exists(x) for x in cuda_toolkit_paths_full]):
       break
 
@@ -919,8 +932,8 @@ def set_tf_cudnn_version(environ_cp):
                                            cudnn_path_from_ldconfig)
       if cudnn_path_from_ldconfig:
         cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1)
-        if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig,
-                                     tf_cudnn_version)):
+        if os.path.exists(
+            '%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)):
           cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig)
           break
 
@@ -1166,6 +1179,7 @@ def get_native_cuda_compute_capabilities(environ_cp):
 
   Args:
     environ_cp: copy of the os.environ.
+
   Returns:
     string of native cuda compute capabilities, separated by comma.
   """
@@ -1290,8 +1304,7 @@ def set_computecpp_toolkit_path(environ_cp):
     else:
       sycl_rt_lib_path = ''
 
-    sycl_rt_lib_path_full = os.path.join(toolkit_path,
-                                         sycl_rt_lib_path)
+    sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path)
     exists = os.path.exists(sycl_rt_lib_path_full)
     if not exists:
       print('Invalid SYCL %s library path. %s cannot be found' %
@@ -1319,8 +1332,8 @@ def set_trisycl_include_dir(environ_cp):
   ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
                              'include directory. (Use --config=sycl_trisycl '
                              'when building with Bazel) '
-                             '[Default is %s]: '
-                            ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+                             '[Default is %s]: ') % (
+                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
 
   while True:
     trisycl_include_dir = get_from_env_or_user_or_default(
@@ -1329,13 +1342,12 @@ def set_trisycl_include_dir(environ_cp):
     if os.path.exists(trisycl_include_dir):
       break
 
-    print('Invalid triSYCL include directory, %s cannot be found'
-          % (trisycl_include_dir))
+    print('Invalid triSYCL include directory, %s cannot be found' %
+          (trisycl_include_dir))
 
   # Set TRISYCL_INCLUDE_DIR
   environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
-                              trisycl_include_dir)
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
 
 
 def set_mpi_home(environ_cp):
@@ -1345,8 +1357,9 @@ def set_mpi_home(environ_cp):
   default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home))
 
   def valid_mpi_path(mpi_home):
-    exists = (os.path.exists(os.path.join(mpi_home, 'include')) and
-              os.path.exists(os.path.join(mpi_home, 'lib')))
+    exists = (
+        os.path.exists(os.path.join(mpi_home, 'include')) and
+        os.path.exists(os.path.join(mpi_home, 'lib')))
     if not exists:
       print('Invalid path to the MPI Toolkit. %s or %s cannot be found' %
             (os.path.join(mpi_home, 'include'),
@@ -1434,11 +1447,9 @@ def set_windows_build_flags(environ_cp):
 
   if get_var(
       environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline',
-      True,
-      ('Would you like to override eigen strong inline for some C++ '
-       'compilation to reduce the compilation time?'),
-      'Eigen strong inline overridden.',
-      'Not overriding eigen strong inline, '
+      True, ('Would you like to override eigen strong inline for some C++ '
+             'compilation to reduce the compilation time?'),
+      'Eigen strong inline overridden.', 'Not overriding eigen strong inline, '
       'some compilations could take more than 20 mins.'):
     # Due to a known MSVC compiler issue
     # https://github.com/tensorflow/tensorflow/issues/10521
@@ -1455,10 +1466,11 @@ def config_info_line(name, help_text):
 
 def main():
   parser = argparse.ArgumentParser()
-  parser.add_argument("--workspace",
-                      type=str,
-                      default=_TF_WORKSPACE_ROOT,
-                      help="The absolute path to your active Bazel workspace.")
+  parser.add_argument(
+      '--workspace',
+      type=str,
+      default=_TF_WORKSPACE_ROOT,
+      help='The absolute path to your active Bazel workspace.')
   args = parser.parse_args()
 
   # Make a copy of os.environ to be clear when functions and getting and setting
@@ -1500,7 +1512,7 @@ def main():
   # runtime to allow the Tensorflow testcases which compare numpy
   # results to Tensorflow results to succeed.
   if is_ppc64le():
-    write_action_env_to_bazelrc("OMP_NUM_THREADS", 1)
+    write_action_env_to_bazelrc('OMP_NUM_THREADS', 1)
 
   set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc',
                 'with_jemalloc', True)
@@ -1514,12 +1526,12 @@ def main():
                 'with_kafka_support', True, 'kafka')
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
                 False, 'xla')
-  set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support',
-                False, 'gdr')
+  set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', False,
+                'gdr')
   set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
                 False, 'verbs')
-  set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph',
-                'with_ngraph_support', False, 'ngraph')
+  set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph', 'with_ngraph_support',
+                False, 'ngraph')
 
   set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
   if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
@@ -1585,13 +1597,10 @@ def main():
   # Add a config option to build TensorFlow 2.0 API.
   write_to_bazelrc('build:v2 --define=tf_api_version=2')
 
-  if get_var(
-      environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
-      False,
-      ('Would you like to interactively configure ./WORKSPACE for '
-       'Android builds?'),
-      'Searching for NDK and SDK installations.',
-      'Not configuring the WORKSPACE for Android builds.'):
+  if get_var(environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', False,
+             ('Would you like to interactively configure ./WORKSPACE for '
+              'Android builds?'), 'Searching for NDK and SDK installations.',
+             'Not configuring the WORKSPACE for Android builds.'):
     create_android_ndk_rule(environ_cp)
     create_android_sdk_rule(environ_cp)
 
@@ -1605,5 +1614,6 @@ def main():
     config_info_line('mkl', 'Build with MKL support.')
     config_info_line('monolithic', 'Config for mostly static monolithic build.')
 
+
 if __name__ == '__main__':
   main()
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 5648b1525a..f5fdd3a75e 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -48,6 +48,7 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
 CUDA_LIB_PATHS = [
     "lib64/",
     "lib64/stubs/",
+    "lib/powerpc64le-linux-gnu/",
     "lib/x86_64-linux-gnu/",
     "lib/x64/",
     "lib/",
@@ -70,6 +71,7 @@ CUPTI_HEADER_PATHS = [
 # the other CUDA libraries but rather in a special extras/CUPTI directory.
 CUPTI_LIB_PATHS = [
     "extras/CUPTI/lib64/",
+    "lib/powerpc64le-linux-gnu/",
     "lib/x86_64-linux-gnu/",
     "lib64/",
     "extras/CUPTI/libx64/",
-- 
GitLab


From 2770a03f79f3b544d40cf1b8ab0a5c971dd36f5f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 02:26:08 -0700
Subject: [PATCH 0722/1357] compat: Update forward compatibility horizon to
 2018-09-26

PiperOrigin-RevId: 214574383
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 74fe1fe35c..ce230731b0 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 25)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 26)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From b4ae85234b4f626e4aaee1d2c531a6b534712dbb Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 26 Sep 2018 02:31:31 -0700
Subject: [PATCH 0723/1357] Automated rollback of commit
 7229d08f0b25e24e6dd4833a94a27f404b27a350

PiperOrigin-RevId: 214575129
---
 WORKSPACE                     | 20 ++------------------
 tensorflow/python/keras/BUILD |  2 --
 tensorflow/requirements.txt   |  2 --
 3 files changed, 2 insertions(+), 22 deletions(-)
 delete mode 100644 tensorflow/requirements.txt

diff --git a/WORKSPACE b/WORKSPACE
index 11605871f3..17961829a6 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -9,26 +9,10 @@ http_archive(
         "https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz",  # 2018-04-13
     ],
 )
-load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
-closure_repositories()
 
-http_archive(
-    name = "io_bazel_rules_python",
-    strip_prefix = "rules_python-8b5d0683a7d878b28fffe464779c8a53659fc645",
-    urls = [
-        "https://github.com/bazelbuild/rules_python/archive/8b5d0683a7d878b28fffe464779c8a53659fc645.tar.gz",
-    ],
-)
-load("@io_bazel_rules_python//python:pip.bzl", "pip_repositories")
-pip_repositories()
+load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
 
-load("@io_bazel_rules_python//python:pip.bzl", "pip_import")
-pip_import(
-    name = "pip_deps",
-    requirements = "//tensorflow:requirements.txt",
-)
-load("@pip_deps//:requirements.bzl", "pip_install")
-pip_install()
+closure_repositories()
 
 # We must check the bazel version before trying to parse any other BUILD
 # files, in case the parsing of those build files depends on the bazel
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index ac011a2940..4a72c4b3f3 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -7,7 +7,6 @@ exports_files(["LICENSE"])
 
 package(default_visibility = ["//visibility:public"])
 
-load("@pip_deps//:requirements.bzl", "requirement")
 load("//tensorflow:tensorflow.bzl", "py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 
@@ -63,7 +62,6 @@ py_library(
         ":backend",
         ":engine",
         ":layers",
-        requirement("keras_applications"),
         "//tensorflow/python/saved_model",
         "//tensorflow/python:training",
     ],
diff --git a/tensorflow/requirements.txt b/tensorflow/requirements.txt
deleted file mode 100644
index 6e111edefc..0000000000
--- a/tensorflow/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-keras_applications >= 1.0.5
-keras_preprocessing >= 1.0.3
-- 
GitLab


From e45f7ee4182d5e831026f329cff5da2596d6733a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 05:01:43 -0700
Subject: [PATCH 0724/1357] Refactoring of nest value getters.

PiperOrigin-RevId: 214587760
---
 tensorflow/python/BUILD        |   1 +
 tensorflow/python/util/util.cc | 305 ++++++++++++++++-----------------
 2 files changed, 147 insertions(+), 159 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 79f14466e6..410b3a553a 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -333,6 +333,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/python_runtime:headers",
+        "@com_google_absl//absl/memory",
     ],
 )
 
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 562bbdcfeb..2087957b31 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -15,9 +15,11 @@ limitations under the License.
 #include "tensorflow/python/util/util.h"
 
 #include <functional>
+#include <memory>
 #include <unordered_map>
 #include <vector>
 
+#include "absl/memory/memory.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
@@ -222,93 +224,136 @@ int IsSequenceHelper(PyObject* o) {
   return check_cache->CachedLookup(o);
 }
 
-// Implements the same idea as tensorflow.util.nest._yield_value
-// During construction we check if the iterable is a dictionary.
-// If so, we construct a sequence from its sorted keys that will be used
-// for iteration.
-// If not, we construct a sequence directly from the iterable.
-// At each step, we get the next element from the sequence and use it
-// either as a key or return it directly.
-//
-// 'iterable' must not be modified while ValIterator is used.
-class ValIterator {
+// ValueIterator interface
+class ValueIterator {
+ public:
+  virtual ~ValueIterator() {}
+  virtual Safe_PyObjectPtr next() = 0;
+
+  bool valid() const { return is_valid_; }
+
+ protected:
+  void invalidate() { is_valid_ = false; }
+
+ private:
+  bool is_valid_ = true;
+};
+
+using ValueIteratorPtr = std::unique_ptr<ValueIterator>;
+
+// Iterate through dictionaries in a deterministic order by sorting the
+// keys. Notice this means that we ignore the original order of
+// `OrderedDict` instances. This is intentional, to avoid potential
+// bugs caused by mixing ordered and plain dicts (e.g., flattening
+// a dict but using a corresponding `OrderedDict` to pack it back).
+class DictValueIterator : public ValueIterator {
  public:
-  explicit ValIterator(PyObject* iterable)
-      : dict_(nullptr),
-        mapping_(nullptr),
-        last_mapping_element_(nullptr),
-        seq_(nullptr),
-        index_(0) {
-    if (PyDict_Check(iterable)) {
-      dict_ = iterable;
-      // PyDict_Keys returns a list, which can be used with
-      // PySequence_Fast_GET_ITEM.
-      seq_ = PyDict_Keys(iterable);
-      // Iterate through dictionaries in a deterministic order by sorting the
-      // keys. Notice this means that we ignore the original order of
-      // `OrderedDict` instances. This is intentional, to avoid potential
-      // bugs caused by mixing ordered and plain dicts (e.g., flattening
-      // a dict but using a corresponding `OrderedDict` to pack it back).
-      PyList_Sort(seq_);
-    } else if (IsMappingHelper(iterable)) {
-      mapping_ = iterable;
-      seq_ = MappingKeys(iterable);
-      PyList_Sort(seq_);
+  explicit DictValueIterator(PyObject* dict)
+      : dict_(dict), keys_(PyDict_Keys(dict)) {
+    if (PyList_Sort(keys_.get()) == -1) {
+      invalidate();
     } else {
-      seq_ = PySequence_Fast(iterable, "");
+      iter_.reset(PyObject_GetIter(keys_.get()));
     }
-    size_ = PySequence_Fast_GET_SIZE(seq_);
   }
 
-  ~ValIterator() { Py_DECREF(seq_); }
-
-  // Return a borrowed reference to the next element from iterable.
-  // Return nullptr when iteration is over.
-  PyObject* next() {
-    if (TF_PREDICT_FALSE(seq_ == nullptr)) {
-      return nullptr;
-    }
-    PyObject* element = nullptr;
-    if (index_ < size_) {
-      // Both PySequence_Fast_GET_ITEM and PyDict_GetItem return borrowed
-      // references. For general mappings, ValIterator keeps a reference to the
-      // last retrieved element (and decrefs it before producing the next
-      // element) to abstract away the borrowed/new difference.
-      element = PySequence_Fast_GET_ITEM(seq_, index_);
-      ++index_;
-      if (dict_ != nullptr) {
-        element = PyDict_GetItem(dict_, element);
-        if (element == nullptr) {
-          PyErr_SetString(PyExc_RuntimeError,
-                          "Dictionary was modified during iteration over it");
-          return nullptr;
-        }
-      } else if (mapping_ != nullptr) {
-        element = PyObject_GetItem(mapping_, element);
-        if (element == nullptr) {
-          PyErr_SetString(PyExc_RuntimeError,
-                          "Mapping was modified during iteration over it");
-          return nullptr;
-        }
-        last_mapping_element_.reset(element);
+  Safe_PyObjectPtr next() override {
+    Safe_PyObjectPtr result;
+    Safe_PyObjectPtr key(PyIter_Next(iter_.get()));
+    if (key) {
+      // PyDict_GetItem returns a borrowed reference.
+      PyObject* elem = PyDict_GetItem(dict_, key.get());
+      if (elem) {
+        Py_INCREF(elem);
+        result.reset(elem);
+      } else {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "Dictionary was modified during iteration over it");
       }
     }
-    return element;
+    return result;
   }
 
  private:
-  // Special casing for things that pass PyDict_Check (faster, no Python calls)
   PyObject* dict_;
+  Safe_PyObjectPtr keys_;
+  Safe_PyObjectPtr iter_;
+};
 
-  // General mappings which have custom Python logic
+// Iterate over mapping objects by sorting the keys first
+class MappingValueIterator : public ValueIterator {
+ public:
+  explicit MappingValueIterator(PyObject* mapping)
+      : mapping_(mapping), keys_(MappingKeys(mapping)) {
+    if (!keys_ || PyList_Sort(keys_.get()) == -1) {
+      invalidate();
+    } else {
+      iter_.reset(PyObject_GetIter(keys_.get()));
+    }
+  }
+
+  Safe_PyObjectPtr next() override {
+    Safe_PyObjectPtr result;
+    Safe_PyObjectPtr key(PyIter_Next(iter_.get()));
+    if (key) {
+      // Unlike PyDict_GetItem, PyObject_GetItem returns a new reference.
+      PyObject* elem = PyObject_GetItem(mapping_, key.get());
+      if (elem) {
+        result.reset(elem);
+      } else {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "Mapping was modified during iteration over it");
+      }
+    }
+    return result;
+  }
+
+ private:
   PyObject* mapping_;
-  Safe_PyObjectPtr last_mapping_element_;
+  Safe_PyObjectPtr keys_;
+  Safe_PyObjectPtr iter_;
+};
+
+// Iterate over a sequence, by index.
+class SequenceValueIterator : public ValueIterator {
+ public:
+  explicit SequenceValueIterator(PyObject* iterable)
+      : seq_(PySequence_Fast(iterable, "")),
+        size_(PySequence_Fast_GET_SIZE(seq_.get())),
+        index_(0) {}
 
-  PyObject* seq_;
-  Py_ssize_t size_;
+  Safe_PyObjectPtr next() override {
+    Safe_PyObjectPtr result;
+    if (index_ < size_) {
+      // PySequence_Fast_GET_ITEM returns a borrowed reference.
+      PyObject* elem = PySequence_Fast_GET_ITEM(seq_.get(), index_);
+      ++index_;
+      Py_INCREF(elem);
+      result.reset(elem);
+    }
+
+    return result;
+  }
+
+ private:
+  Safe_PyObjectPtr seq_;
+  const Py_ssize_t size_;
   Py_ssize_t index_;
 };
 
+// Just return itself as a single item.
+class SparseTensorValueIterator : public ValueIterator {
+ public:
+  explicit SparseTensorValueIterator(PyObject* tensor) : tensor_(tensor) {
+    Py_INCREF(tensor);
+  }
+
+  Safe_PyObjectPtr next() override { return std::move(tensor_); }
+
+ private:
+  Safe_PyObjectPtr tensor_;
+};
+
 bool IsSparseTensorValueType(PyObject* o) {
   if (TF_PREDICT_FALSE(SparseTensorValueType == nullptr)) {
     return false;
@@ -322,93 +367,33 @@ int IsSequenceForDataHelper(PyObject* o) {
          !IsSparseTensorValueType(o);
 }
 
-bool GetNextValuesForDict(PyObject* nested,
-                          std::vector<Safe_PyObjectPtr>* next_values) {
-  Safe_PyObjectPtr keys(PyDict_Keys(nested));
-  if (PyList_Sort(keys.get()) == -1) return false;
-  Py_ssize_t size = PyList_Size(keys.get());
-  for (Py_ssize_t i = 0; i < size; ++i) {
-    // We know that key and item will not be deleted because nested owns
-    // a reference to them and callers of flatten must not modify nested
-    // while the method is running.
-    PyObject* key = PyList_GET_ITEM(keys.get(), i);
-    PyObject* item = PyDict_GetItem(nested, key);
-    Py_INCREF(item);
-    next_values->emplace_back(item);
-  }
-  return true;
-}
-
-bool GetNextValuesForMapping(PyObject* nested,
-                             std::vector<Safe_PyObjectPtr>* next_values) {
-  Safe_PyObjectPtr keys(MappingKeys(nested));
-  if (keys.get() == nullptr) {
-    return false;
-  }
-  if (PyList_Sort(keys.get()) == -1) return false;
-  Py_ssize_t size = PyList_Size(keys.get());
-  for (Py_ssize_t i = 0; i < size; ++i) {
-    PyObject* key = PyList_GET_ITEM(keys.get(), i);
-    // Unlike PyDict_GetItem, PyObject_GetItem returns a new reference.
-    PyObject* item = PyObject_GetItem(nested, key);
-    next_values->emplace_back(item);
-  }
-  return true;
-}
-
-bool GetNextValuesForIterable(PyObject* nested,
-                              std::vector<Safe_PyObjectPtr>* next_values) {
-  PyObject* item;
-  PyObject* iterator = PyObject_GetIter(nested);
-  if (iterator == nullptr || PyErr_Occurred()) {
-    return false;
-  }
-  while ((item = PyIter_Next(iterator)) != nullptr) {
-    next_values->emplace_back(item);
-  }
-  Py_DECREF(iterator);
-  return true;
-}
-
-// GetNextValues returns the values that the FlattenHelper function will recurse
-// over next.
-bool GetNextValues(PyObject* nested,
-                   std::vector<Safe_PyObjectPtr>* next_values) {
+ValueIteratorPtr GetValueIterator(PyObject* nested) {
   if (PyDict_Check(nested)) {
-    // if nested is dictionary, sort it by key and recurse on each value
-    return GetNextValuesForDict(nested, next_values);
+    return absl::make_unique<DictValueIterator>(nested);
   } else if (IsMappingHelper(nested)) {
-    // same treatment as dictionaries, but for custom mapping types
-    return GetNextValuesForMapping(nested, next_values);
+    return absl::make_unique<MappingValueIterator>(nested);
+  } else {
+    return absl::make_unique<SequenceValueIterator>(nested);
   }
-  // iterate and recurse
-  return GetNextValuesForIterable(nested, next_values);
 }
 
-// Similar to above, just specialized for the functions in the data pacakage.
-bool GetNextValuesForData(PyObject* nested,
-                          std::vector<Safe_PyObjectPtr>* next_values) {
+// Similar to above, just specialized for the functions in the data package.
+ValueIteratorPtr GetValueIteratorForData(PyObject* nested) {
   if (PyDict_Check(nested)) {
-    // if nested is dictionary, sort it by key and recurse on each value
-    return GetNextValuesForDict(nested, next_values);
+    return absl::make_unique<DictValueIterator>(nested);
   } else if (IsMappingHelper(nested)) {
-    // same treatment as dictionaries, but for custom mapping types
-    return GetNextValuesForMapping(nested, next_values);
+    return absl::make_unique<MappingValueIterator>(nested);
   } else if (IsSparseTensorValueType(nested)) {
-    // if nested is a SparseTensorValue, just return itself as a single item
-    Py_INCREF(nested);
-    next_values->emplace_back(nested);
-    return true;
+    return absl::make_unique<SparseTensorValueIterator>(nested);
+  } else {
+    return absl::make_unique<SequenceValueIterator>(nested);
   }
-  // iterate and recurse
-  return GetNextValuesForIterable(nested, next_values);
 }
 
 bool FlattenHelper(
     PyObject* nested, PyObject* list,
     const std::function<int(PyObject*)>& is_sequence_helper,
-    const std::function<bool(PyObject*, std::vector<Safe_PyObjectPtr>*)>&
-        next_values_getter) {
+    const std::function<ValueIteratorPtr(PyObject*)>& value_iterator_getter) {
   // if nested is not a sequence, append itself and exit
   int is_seq = is_sequence_helper(nested);
   if (is_seq == -1) return false;
@@ -416,16 +401,15 @@ bool FlattenHelper(
     return PyList_Append(list, nested) != -1;
   }
 
-  std::vector<Safe_PyObjectPtr> next_values;
-  // Get the next values to recurse over.
-  if (!next_values_getter(nested, &next_values)) return false;
+  ValueIteratorPtr iter = value_iterator_getter(nested);
+  if (!iter->valid()) return false;
 
-  for (const auto& item : next_values) {
+  for (Safe_PyObjectPtr item = iter->next(); item; item = iter->next()) {
     if (Py_EnterRecursiveCall(" in flatten")) {
       return false;
     }
-    const bool success =
-        FlattenHelper(item.get(), list, is_sequence_helper, next_values_getter);
+    const bool success = FlattenHelper(item.get(), list, is_sequence_helper,
+                                       value_iterator_getter);
     Py_LeaveRecursiveCall();
     if (!success) {
       return false;
@@ -579,22 +563,25 @@ bool AssertSameStructureHelper(
     }
   }
 
-  ValIterator iter1(o1);
-  ValIterator iter2(o2);
+  ValueIteratorPtr iter1 = GetValueIterator(o1);
+  ValueIteratorPtr iter2 = GetValueIterator(o2);
+
+  if (!iter1->valid() || !iter2->valid()) return false;
 
   while (true) {
-    PyObject* v1 = iter1.next();
-    PyObject* v2 = iter2.next();
-    if (v1 != nullptr && v2 != nullptr) {
+    Safe_PyObjectPtr v1 = iter1->next();
+    Safe_PyObjectPtr v2 = iter2->next();
+    if (v1 && v2) {
       if (Py_EnterRecursiveCall(" in assert_same_structure")) {
         return false;
       }
-      bool no_internal_errors = AssertSameStructureHelper(
-          v1, v2, check_types, error_msg, is_type_error, is_sequence_helper);
+      bool no_internal_errors =
+          AssertSameStructureHelper(v1.get(), v2.get(), check_types, error_msg,
+                                    is_type_error, is_sequence_helper);
       Py_LeaveRecursiveCall();
       if (!no_internal_errors) return false;
       if (!error_msg->empty()) return true;
-    } else if (v1 == nullptr && v2 == nullptr) {
+    } else if (!v1 && !v2) {
       // Done with all recursive calls. Structure matched.
       return true;
     } else {
@@ -655,7 +642,7 @@ bool IsMapping(PyObject* o) { return IsMappingHelper(o) == 1; }
 
 PyObject* Flatten(PyObject* nested) {
   PyObject* list = PyList_New(0);
-  if (FlattenHelper(nested, list, IsSequenceHelper, GetNextValues)) {
+  if (FlattenHelper(nested, list, IsSequenceHelper, GetValueIterator)) {
     return list;
   } else {
     Py_DECREF(list);
@@ -668,7 +655,7 @@ bool IsSequenceForData(PyObject* o) { return IsSequenceForDataHelper(o) == 1; }
 PyObject* FlattenForData(PyObject* nested) {
   PyObject* list = PyList_New(0);
   if (FlattenHelper(nested, list, IsSequenceForDataHelper,
-                    GetNextValuesForData)) {
+                    GetValueIteratorForData)) {
     return list;
   } else {
     Py_DECREF(list);
-- 
GitLab


From 5498f24a3385bdd256b8b1e41329c5841996b26d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 08:21:53 -0700
Subject: [PATCH 0725/1357] Changed FusedBatchNorm and FusedBatchNormGrad to
 use allowed_values for data_format attr.

PiperOrigin-RevId: 214608039
---
 .../compiler/tests/fused_batchnorm_test.py    |  40 +--
 .../compiler/tf2xla/kernels/batch_norm_op.cc  |  12 -
 .../core/ops/compat/ops_history.v1.pbtxt      | 338 ++++++++++++++++++
 tensorflow/core/ops/nn_ops.cc                 |   8 +-
 tensorflow/core/ops/ops.pbtxt                 |  24 ++
 5 files changed, 376 insertions(+), 46 deletions(-)

diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index 8c018cccb8..374942a0b3 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -29,6 +29,11 @@ from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import nn
 from tensorflow.python.platform import test
 
+DATA_FORMATS = (
+    ("_data_format_NHWC", "NHWC"),
+    ("_data_format_NCHW", "NCHW"),
+)
+
 
 class FusedBatchNormTest(xla_test.XLATestCase, parameterized.TestCase):
 
@@ -65,12 +70,7 @@ class FusedBatchNormTest(xla_test.XLATestCase, parameterized.TestCase):
     grad_offset = np.sum(grad_y, axis=(0, 1, 2))
     return grad_x, grad_scale, grad_offset
 
-  @parameterized.named_parameters(
-      ("_data_format_NHWC", "NHWC"),
-      ("_data_format_NCHW", "NCHW"),
-      ("_data_format_HWNC", "HWNC"),
-      ("_data_format_HWCN", "HWCN"),
-  )
+  @parameterized.named_parameters(*DATA_FORMATS)
   def testInference(self, data_format):
     channel = 3
     x_shape = [2, 2, 6, channel]
@@ -170,30 +170,15 @@ class FusedBatchNormTest(xla_test.XLATestCase, parameterized.TestCase):
       self.assertAllClose(y_val, y_ref_converted, atol=1e-3)
       self.assertAllClose(var_val, var_ref, atol=1e-3)
 
-  @parameterized.named_parameters(
-      ("_data_format_NHWC", "NHWC"),
-      ("_data_format_NCHW", "NCHW"),
-      ("_data_format_HWNC", "HWNC"),
-      ("_data_format_HWCN", "HWCN"),
-  )
+  @parameterized.named_parameters(*DATA_FORMATS)
   def testLearning(self, data_format):
     self._testLearning(False, data_format)
 
-  @parameterized.named_parameters(
-      ("_data_format_NHWC", "NHWC"),
-      ("_data_format_NCHW", "NCHW"),
-      ("_data_format_HWNC", "HWNC"),
-      ("_data_format_HWCN", "HWCN"),
-  )
+  @parameterized.named_parameters(*DATA_FORMATS)
   def testLearningWithGradientChecker(self, data_format):
     self._testLearning(True, data_format)
 
-  @parameterized.named_parameters(
-      ("_data_format_NHWC", "NHWC"),
-      ("_data_format_NCHW", "NCHW"),
-      ("_data_format_HWNC", "HWNC"),
-      ("_data_format_HWCN", "HWCN"),
-  )
+  @parameterized.named_parameters(*DATA_FORMATS)
   def testGradientTraining(self, data_format):
     # TODO(b/64270657): Use gradient_checker here in addition to comparing with
     # this reference implementation.
@@ -241,12 +226,7 @@ class FusedBatchNormTest(xla_test.XLATestCase, parameterized.TestCase):
       self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2)
       self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)
 
-  @parameterized.named_parameters(
-      ("_data_format_NHWC", "NHWC"),
-      ("_data_format_NCHW", "NCHW"),
-      ("_data_format_HWNC", "HWNC"),
-      ("_data_format_HWCN", "HWCN"),
-  )
+  @parameterized.named_parameters(*DATA_FORMATS)
   def testGradientInference(self, data_format):
     # TODO(b/64270657): Use gradient_checker here in addition to comparing with
     # this reference implementation.
diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
index b3ad0aea84..a267c0c72f 100644
--- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
@@ -34,12 +34,6 @@ class FusedBatchNormOp : public XlaOpKernel {
     OP_REQUIRES(
         ctx, FormatFromString(data_format_str, &data_format_),
         errors::InvalidArgument("Invalid data format: ", data_format_str));
-    OP_REQUIRES(ctx,
-                (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW ||
-                 data_format_ == FORMAT_HWNC || data_format_ == FORMAT_HWCN),
-                errors::InvalidArgument(
-                    "Unsupported data format ", ToString(data_format_),
-                    "; supported formats are NHWC, NCHW, HWNC and HWCN"));
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
@@ -110,12 +104,6 @@ class FusedBatchNormGradOp : public XlaOpKernel {
     OP_REQUIRES(
         ctx, FormatFromString(data_format_str, &data_format_),
         errors::InvalidArgument("Invalid data format: ", data_format_str));
-    OP_REQUIRES(ctx,
-                (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW ||
-                 data_format_ == FORMAT_HWNC || data_format_ == FORMAT_HWCN),
-                errors::InvalidArgument(
-                    "Unsupported data format ", ToString(data_format_),
-                    "; supported formats are NHWC, NCHW, HWNC and HWCN"));
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 86d4c6b421..0c64408892 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -24104,6 +24104,85 @@ op {
     }
   }
 }
+op {
+  name: "FusedBatchNorm"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "offset"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "mean"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "variance"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "batch_mean"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "batch_variance"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_2"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
 op {
   name: "FusedBatchNormGrad"
   input_arg {
@@ -24177,6 +24256,168 @@ op {
     }
   }
 }
+op {
+  name: "FusedBatchNormGrad"
+  input_arg {
+    name: "y_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space_2"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "x_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_4"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "FusedBatchNormGradV2"
+  input_arg {
+    name: "y_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "x_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_4"
+    type_attr: "U"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "U"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
 op {
   name: "FusedBatchNormGradV2"
   input_arg {
@@ -24225,6 +24466,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -24335,6 +24577,12 @@ op {
     default_value {
       s: "NHWC"
     }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "is_training"
@@ -24511,6 +24759,96 @@ op {
     }
   }
 }
+op {
+  name: "FusedBatchNormV2"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "offset"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "mean"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "variance"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "batch_mean"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "batch_variance"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "U"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
 op {
   name: "FusedPadConv2D"
   input_arg {
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index dc39996017..d1d81b27cc 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -178,7 +178,7 @@ REGISTER_OP("FusedBatchNorm")
     .Output("reserve_space_2: T")
     .Attr("T: {float}")
     .Attr("epsilon: float = 0.0001")
-    .Attr("data_format: string = 'NHWC'")
+    .Attr(GetConvnetDataFormatAttrString())
     .Attr("is_training: bool = true")
     .SetShapeFn(shape_inference::FusedBatchNormShape);
 
@@ -196,7 +196,7 @@ REGISTER_OP("FusedBatchNormV2")
     .Attr("T: {half, bfloat16, float}")
     .Attr("U: {float}")
     .Attr("epsilon: float = 0.0001")
-    .Attr("data_format: string = 'NHWC'")
+    .Attr(GetConvnetDataFormatAttrString())
     .Attr("is_training: bool = true")
     .SetShapeFn(shape_inference::FusedBatchNormShape);
 
@@ -213,7 +213,7 @@ REGISTER_OP("FusedBatchNormGrad")
     .Output("reserve_space_4: T")
     .Attr("T: {float}")
     .Attr("epsilon: float = 0.0001")
-    .Attr("data_format: string = 'NHWC'")
+    .Attr(GetConvnetDataFormatAttrString())
     .Attr("is_training: bool = true")
     .SetShapeFn(shape_inference::FusedBatchNormGradShape);
 
@@ -231,7 +231,7 @@ REGISTER_OP("FusedBatchNormGradV2")
     .Attr("T: {half, bfloat16, float}")
     .Attr("U: {float}")
     .Attr("epsilon: float = 0.0001")
-    .Attr("data_format: string = 'NHWC'")
+    .Attr(GetConvnetDataFormatAttrString())
     .Attr("is_training: bool = true")
     .SetShapeFn(shape_inference::FusedBatchNormGradShape);
 
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index bdded2d894..af7d6cbe42 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -11459,6 +11459,12 @@ op {
     default_value {
       s: "NHWC"
     }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "is_training"
@@ -11532,6 +11538,12 @@ op {
     default_value {
       s: "NHWC"
     }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "is_training"
@@ -11616,6 +11628,12 @@ op {
     default_value {
       s: "NHWC"
     }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "is_training"
@@ -11700,6 +11718,12 @@ op {
     default_value {
       s: "NHWC"
     }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "is_training"
-- 
GitLab


From e9f76594ca1d7ea5317e0535d4a4bfffb269a1f9 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 26 Sep 2018 09:09:31 -0700
Subject: [PATCH 0726/1357] Trivial change to softplus_op_test.py.

PiperOrigin-RevId: 214614405
---
 tensorflow/python/kernel_tests/softplus_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/softplus_op_test.py b/tensorflow/python/kernel_tests/softplus_op_test.py
index e8dc272637..636ed4747e 100644
--- a/tensorflow/python/kernel_tests/softplus_op_test.py
+++ b/tensorflow/python/kernel_tests/softplus_op_test.py
@@ -126,7 +126,7 @@ class SoftplusTest(test.TestCase):
       with self.assertRaisesRegexp(
           TypeError,
           "'features' has DataType int32 not in list of allowed values"):
-        nn_ops.softplus(constant_op.constant(7)).eval()
+        nn_ops.softplus(constant_op.constant(42)).eval()
 
 
 if __name__ == "__main__":
-- 
GitLab


From 01512356e10ab87887e3c7b69f9ed3e5a8397f76 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Wed, 26 Sep 2018 09:10:53 -0700
Subject: [PATCH 0727/1357] [XLA] Don't use NumUniqueInstructionIds() as a
 proxy for instruction_count()

It used to be a reasonable proxy, but that's no longer the case. This is because GetUniqueId() in XlaBuilder uses a *global* (rather than a module-global) counter. Since HloModule::CreateFromProto no-longer uniquifies ids coming in from protos, the potentially very high IDs coming from GetUniqueId() become the module's next_unique_id.

There is another case of this in TuplePointsTo, that will be handled separately.

PiperOrigin-RevId: 214614576
---
 tensorflow/compiler/xla/service/hlo_instruction.cc         | 2 +-
 tensorflow/compiler/xla/service/hlo_memory_scheduler.cc    | 2 +-
 tensorflow/compiler/xla/service/logical_buffer_analysis.cc | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index ad58833e4d..f7ec854d80 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2423,7 +2423,7 @@ template <typename Visitor>
 static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
                            const InternalCompareFunction* operand_order,
                            bool ignore_control_predecessors) {
-  visitor->ReserveVisitStates(root->GetModule()->NumUniqueInstructionIds());
+  visitor->ReserveVisitStates(root->GetModule()->instruction_count());
 
   // dfs_stack holds pairs of <HloInstruction*->unique_id(), HloInstruction*>.
   //
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index c7ec88d450..6a4e766788 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -400,7 +400,7 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
         memory_by_computation) {
   // These variables are a hack to prevent overflows.
   int64 cumulative_total_size = 0;
-  int64 total_hlos = computation.parent()->NumUniqueInstructionIds();
+  int64 total_hlos = computation.parent()->instruction_count();
   tensorflow::gtl::FlatMap<const HloInstruction*, int64> extra_users;
   tensorflow::gtl::FlatMap<const HloInstruction*, int64> total_sizes;
   for (const HloInstruction* hlo : computation.MakeInstructionPostOrder()) {
diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc
index eaa09591b7..ec52a24d78 100644
--- a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc
+++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc
@@ -54,7 +54,7 @@ Status LogicalBufferAnalysis::Analyze() {
   // so reserve 10% more than the number of instructions to avoid frequent
   // resizes.
   logical_buffers_.clear();
-  logical_buffers_.reserve((module_->NumUniqueInstructionIds() * 11) / 10);
+  logical_buffers_.reserve((module_->instruction_count() * 11) / 10);
 
   // We filter out fusion computations, and get to them through fusion
   // instructions. This is because it's possible to have orphaned (unreachable)
-- 
GitLab


From c3203eb8bf0d7ae9dce133f982884622f666c681 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 26 Sep 2018 09:13:11 -0700
Subject: [PATCH 0728/1357] The GeneratorDataset init function was being run
 during Initialization which is a blocking Op. Moving it to the GetNext call
 which is a non blocking async op.

This means we can now revert back to the default of 2 threads per GPU.

PiperOrigin-RevId: 214614839
---
 tensorflow/core/kernels/data/generator_dataset_op.cc | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc
index 71a36314a0..b4367d5a11 100644
--- a/tensorflow/core/kernels/data/generator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/generator_dataset_op.cc
@@ -86,8 +86,6 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
       TF_RETURN_IF_ERROR(dataset()->init_func_->Instantiate(ctx));
       TF_RETURN_IF_ERROR(dataset()->next_func_->Instantiate(ctx));
       TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate(ctx));
-      TF_RETURN_IF_ERROR(
-          dataset()->init_func_->RunWithBorrowedArgs(ctx, {}, &state_));
       return Status::OK();
     }
 
@@ -96,6 +94,12 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
                            bool* end_of_sequence) override {
       mutex_lock l(mu_);
 
+      if (!initialized_) {
+        TF_RETURN_IF_ERROR(
+            dataset()->init_func_->RunWithBorrowedArgs(ctx, {}, &state_));
+        initialized_ = true;
+      }
+
       if (finalized_) {
         *end_of_sequence = true;
         return Status::OK();
@@ -123,6 +127,7 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
 
    private:
     mutex mu_;
+    bool initialized_ GUARDED_BY(mu_) = false;
     bool finalized_ GUARDED_BY(mu_) = false;
     std::vector<Tensor> state_ GUARDED_BY(mu_);
   };
-- 
GitLab


From d7de49e456fc84416fbf3a6de7ad1ed6c12d7a20 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 26 Sep 2018 09:13:54 -0700
Subject: [PATCH 0729/1357] The return value checker should ignore inner
 functions.

PiperOrigin-RevId: 214614921
---
 .../autograph/converters/return_statements.py      | 14 ++++++++++++++
 .../autograph/converters/return_statements_test.py | 12 ++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/tensorflow/python/autograph/converters/return_statements.py b/tensorflow/python/autograph/converters/return_statements.py
index 62da045d6a..496c99e3b5 100644
--- a/tensorflow/python/autograph/converters/return_statements.py
+++ b/tensorflow/python/autograph/converters/return_statements.py
@@ -212,6 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor):
 
   def __init__(self):
     self.cant_return = False
+    self.function_level = 0
     super(DetectReturnInUnsupportedControlFlow, self).__init__()
 
   def visit_While(self, node):
@@ -229,6 +230,12 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor):
     self.generic_visit(node)
     self.cant_return = False
 
+  def visit_FunctionDef(self, node):
+    if not self.function_level:
+      self.function_level += 1
+      self.generic_visit(node)
+      self.function_level -= 1
+
   def visit_Return(self, node):
     if self.cant_return:
       raise ValueError(
@@ -242,6 +249,7 @@ class DetectReturnInConditional(gast.NodeVisitor):
 
   def __init__(self):
     self.cant_return = False
+    self.function_level = 0
     super(DetectReturnInConditional, self).__init__()
 
   def visit_If(self, node):
@@ -249,6 +257,12 @@ class DetectReturnInConditional(gast.NodeVisitor):
     self.generic_visit(node)
     self.cant_return = False
 
+  def visit_FunctionDef(self, node):
+    if not self.function_level:
+      self.function_level += 1
+      self.generic_visit(node)
+      self.function_level -= 1
+
   def visit_Return(self, node):
     if self.cant_return:
       raise ValueError(
diff --git a/tensorflow/python/autograph/converters/return_statements_test.py b/tensorflow/python/autograph/converters/return_statements_test.py
index 01dd03da0b..762fbc6f60 100644
--- a/tensorflow/python/autograph/converters/return_statements_test.py
+++ b/tensorflow/python/autograph/converters/return_statements_test.py
@@ -151,6 +151,18 @@ class SingleReturnTest(converter_testing.TestCase):
     self.assertTransformedEquivalent(test_fn, 2)
     self.assertTransformedEquivalent(test_fn, -2)
 
+  def test_nested_functions_in_control_flow(self):
+
+    def test_fn(x):
+
+      if x:
+        def inner_fn(y):
+          return y
+        inner_fn(x)
+
+    self.assertTransformedEquivalent(test_fn, 2)
+    self.assertTransformedEquivalent(test_fn, -2)
+
   def test_loop(self):
 
     def test_fn(x):
-- 
GitLab


From fa1ecc082519922827bad10f07df438c9453fedb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 09:22:46 -0700
Subject: [PATCH 0730/1357] Derive the number of trainers in tensorforest if
 run config is provided.

PiperOrigin-RevId: 214616123
---
 .../tensor_forest/client/random_forest.py       | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tensorflow/contrib/tensor_forest/client/random_forest.py b/tensorflow/contrib/tensor_forest/client/random_forest.py
index 0042d37acd..d78d12d997 100644
--- a/tensorflow/contrib/tensor_forest/client/random_forest.py
+++ b/tensorflow/contrib/tensor_forest/client/random_forest.py
@@ -446,6 +446,12 @@ class TensorForestEstimator(estimator.Estimator):
     Returns:
       A `TensorForestEstimator` instance.
     """
+    # Override default number of trainers if config is provided.
+    if num_trainers == 1 and config is not None:
+      num_trainers = config.num_worker_replicas
+    if trainer_id == 0 and config is not None:
+      trainer_id = config.global_id_in_cluster
+
     super(TensorForestEstimator, self).__init__(
         model_fn=get_model_fn(
             params.fill(),
@@ -564,6 +570,12 @@ class MultiForestMultiHeadEstimator(estimator.Estimator):
                local_eval=False):
     """See TensorForestEstimator.__init__."""
     model_fns = []
+    # Override default number of trainers if config is provided.
+    if num_trainers == 1 and config is not None:
+      num_trainers = config.num_worker_replicas
+    if trainer_id == 0 and config is not None:
+      trainer_id = config.global_id_in_cluster
+
     for i in range(len(params_list)):
       params = params_list[i].fill()
       model_fns.append(
@@ -709,6 +721,11 @@ class CoreTensorForestEstimator(core_estimator.Estimator):
     Returns:
       A `TensorForestEstimator` instance.
     """
+    # Override default number of trainers if config is provided.
+    if num_trainers == 1 and config is not None:
+      num_trainers = config.num_worker_replicas
+    if trainer_id == 0 and config is not None:
+      trainer_id = config.global_id_in_cluster
 
     super(CoreTensorForestEstimator, self).__init__(
         model_fn=get_model_fn(
-- 
GitLab


From 319da67052b067231d01f46692ce429da7a06f97 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 09:38:12 -0700
Subject: [PATCH 0731/1357] Simplify the logic for running through a sequence
 forwards and backwards.

PiperOrigin-RevId: 214618170
---
 .../kernels/bidirectional_sequence_lstm.cc    | 169 ++++++------------
 1 file changed, 58 insertions(+), 111 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 541f320138..66b947771c 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -770,51 +770,29 @@ TfLiteStatus EvalFloat(
   }
 
   // Loop through the sequence.
-  if (forward_sequence) {
-    for (int t = 0; t < max_time; t++) {
-      const float* input_ptr = input->data.f + t * n_batch * n_input;
-      float* output_ptr_time = output->data.f + t * n_batch * n_output;
-
-      kernel_utils::LstmStepWithAuxInput(
-          input_ptr, input_to_input_weights_ptr,
-          input_to_forget_weights->data.f, input_to_cell_weights->data.f,
-          input_to_output_weights->data.f, aux_input_ptr,
-          aux_input_to_input_weights_ptr, aux_input_to_forget_weights_ptr,
-          aux_input_to_cell_weights_ptr, aux_input_to_output_weights_ptr,
-          recurrent_to_input_weights_ptr, recurrent_to_forget_weights->data.f,
-          recurrent_to_cell_weights->data.f,
-          recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
-          cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-          input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
-          output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
-          params, n_batch, n_cell, n_input, aux_input_size, n_output,
-          activation_state->data.f, cell_state->data.f, input_gate_scratch,
-          forget_gate_scratch, cell_scratch, output_gate_scratch,
-          output_ptr_time);
-    }
-  } else {
-    // Loop through the sequence backwards.
-    for (int t = max_time - 1; t >= 0; t--) {
-      const float* input_ptr = input->data.f + t * n_batch * n_input;
-      float* output_ptr_time = output->data.f + t * n_batch * n_output;
-
-      kernel_utils::LstmStepWithAuxInput(
-          input_ptr, input_to_input_weights_ptr,
-          input_to_forget_weights->data.f, input_to_cell_weights->data.f,
-          input_to_output_weights->data.f, aux_input_ptr,
-          aux_input_to_input_weights_ptr, aux_input_to_forget_weights_ptr,
-          aux_input_to_cell_weights_ptr, aux_input_to_output_weights_ptr,
-          recurrent_to_input_weights_ptr, recurrent_to_forget_weights->data.f,
-          recurrent_to_cell_weights->data.f,
-          recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
-          cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-          input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
-          output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
-          params, n_batch, n_cell, n_input, aux_input_size, n_output,
-          activation_state->data.f, cell_state->data.f, input_gate_scratch,
-          forget_gate_scratch, cell_scratch, output_gate_scratch,
-          output_ptr_time);
-    }
+  const int input_step = n_batch * n_input;
+  const int output_step = n_batch * n_output;
+  for (int t = 0; t < max_time; t++) {
+    // If this is the forward_sequence, step forward, otherwise step backwards.
+    const int t_rel = forward_sequence ? t : max_time - t - 1;
+    const float* input_ptr = input->data.f + t_rel * input_step;
+    float* output_ptr_time = output->data.f + t_rel * output_step;
+
+    kernel_utils::LstmStepWithAuxInput(
+        input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
+        input_to_cell_weights->data.f, input_to_output_weights->data.f,
+        aux_input_ptr, aux_input_to_input_weights_ptr,
+        aux_input_to_forget_weights_ptr, aux_input_to_cell_weights_ptr,
+        aux_input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
+        recurrent_to_forget_weights->data.f, recurrent_to_cell_weights->data.f,
+        recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
+        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
+        input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
+        output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
+        params, n_batch, n_cell, n_input, aux_input_size, n_output,
+        activation_state->data.f, cell_state->data.f, input_gate_scratch,
+        forget_gate_scratch, cell_scratch, output_gate_scratch,
+        output_ptr_time);
   }
   return kTfLiteOk;
 }
@@ -991,72 +969,41 @@ TfLiteStatus EvalHybrid(
     aux_input_to_output_weights_scale =
         aux_input_to_output_weights->params.scale;
   }
-  if (forward_sequence) {
-    // Feed the sequence into the LSTM step-by-step.
-    for (int t = 0; t < max_time; t++) {
-      const float* input_ptr = input->data.f + t * n_batch * n_input;
-      float* output_ptr = output->data.f + t * n_batch * n_output;
-
-      kernel_utils::LstmStepWithAuxInput(
-          input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
-          input_to_forget_weights_ptr, input_to_forget_weights_scale,
-          input_to_cell_weights_ptr, input_to_cell_weights_scale,
-          input_to_output_weights_ptr, input_to_output_weights_scale,
-          aux_input_ptr, aux_input_to_input_weights_ptr,
-          aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
-          aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
-          aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
-          aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-          recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-          recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-          recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-          recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-          cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-          cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-          cell_to_output_weights_scale, input_gate_bias_ptr,
-          forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
-          projection_weights_ptr, projection_weights_scale, projection_bias_ptr,
-          params, n_batch, n_cell, n_input, aux_input_size, n_output,
-          input_gate_scratch, forget_gate_scratch, cell_scratch,
-          output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
-          recovered_cell_weights_ptr, quantized_input_ptr,
-          quantized_aux_input_ptr, quantized_output_state_ptr,
-          quantized_cell_state_ptr, output_state_ptr, cell_state_ptr,
-          output_ptr);
-    }
-  } else {
-    // Loop through the sequence backwards.
-    for (int t = max_time - 1; t >= 0; t--) {
-      const float* input_ptr = input->data.f + t * n_batch * n_input;
-      float* output_ptr = output->data.f + t * n_batch * n_output;
-
-      kernel_utils::LstmStepWithAuxInput(
-          input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
-          input_to_forget_weights_ptr, input_to_forget_weights_scale,
-          input_to_cell_weights_ptr, input_to_cell_weights_scale,
-          input_to_output_weights_ptr, input_to_output_weights_scale,
-          aux_input_ptr, aux_input_to_input_weights_ptr,
-          aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
-          aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
-          aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
-          aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-          recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-          recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-          recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-          recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-          cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-          cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-          cell_to_output_weights_scale, input_gate_bias_ptr,
-          forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
-          projection_weights_ptr, projection_weights_scale, projection_bias_ptr,
-          params, n_batch, n_cell, n_input, aux_input_size, n_output,
-          input_gate_scratch, forget_gate_scratch, cell_scratch,
-          output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
-          recovered_cell_weights_ptr, quantized_input_ptr,
-          quantized_aux_input_ptr, quantized_output_state_ptr,
-          quantized_cell_state_ptr, output_state_ptr, cell_state_ptr,
-          output_ptr);
-    }
+
+  // Feed the sequence into the LSTM step-by-step.
+  const int input_step = n_batch * n_input;
+  const int output_step = n_batch * n_output;
+  for (int t = 0; t < max_time; t++) {
+    // If this is the forward_sequence, step forward, otherwise step backwards.
+    const int t_rel = forward_sequence ? t : max_time - t - 1;
+    const float* input_ptr = input->data.f + t_rel * input_step;
+    float* output_ptr = output->data.f + t_rel * output_step;
+
+    kernel_utils::LstmStepWithAuxInput(
+        input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
+        input_to_forget_weights_ptr, input_to_forget_weights_scale,
+        input_to_cell_weights_ptr, input_to_cell_weights_scale,
+        input_to_output_weights_ptr, input_to_output_weights_scale,
+        aux_input_ptr, aux_input_to_input_weights_ptr,
+        aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
+        aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
+        aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
+        aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
+        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
+        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
+        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
+        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
+        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
+        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
+        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
+        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
+        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
+        n_input, aux_input_size, n_output, input_gate_scratch,
+        forget_gate_scratch, cell_scratch, output_gate_scratch,
+        scaling_factors_ptr, prod_scaling_factors_ptr,
+        recovered_cell_weights_ptr, quantized_input_ptr,
+        quantized_aux_input_ptr, quantized_output_state_ptr,
+        quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr);
   }
 
   return kTfLiteOk;
-- 
GitLab


From eac28534e883283977ebae4dc4dea00cdd601fbc Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 26 Sep 2018 09:39:54 -0700
Subject: [PATCH 0732/1357] Extend support for Index nodes in template
 expansions.

PiperOrigin-RevId: 214618421
---
 tensorflow/python/autograph/pyct/templates.py      |  2 ++
 tensorflow/python/autograph/pyct/templates_test.py | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/tensorflow/python/autograph/pyct/templates.py b/tensorflow/python/autograph/pyct/templates.py
index 1bf0515745..1af8fca599 100644
--- a/tensorflow/python/autograph/pyct/templates.py
+++ b/tensorflow/python/autograph/pyct/templates.py
@@ -123,6 +123,8 @@ class ReplaceTransformer(gast.NodeTransformer):
         self._check_inner_children_have_context(e)
       for e in node.values:
         self._check_inner_children_have_context(e)
+    elif isinstance(node, gast.Index):
+      self._check_inner_children_have_context(node.value)
     elif isinstance(node, gast.Subscript):
       self._check_inner_children_have_context(node.value)
       self._check_inner_children_have_context(node.slice)
diff --git a/tensorflow/python/autograph/pyct/templates_test.py b/tensorflow/python/autograph/pyct/templates_test.py
index 078d9a149b..3032241846 100644
--- a/tensorflow/python/autograph/pyct/templates_test.py
+++ b/tensorflow/python/autograph/pyct/templates_test.py
@@ -158,6 +158,18 @@ class TemplatesTest(test.TestCase):
     self.assertIsInstance(function_call_arg.elts[0].elts[0].ctx, gast.Load)
     self.assertIsInstance(function_call_arg.elts[0].elts[1].ctx, gast.Load)
 
+  def test_replace_index(self):
+    template = """
+      def test_fn(foo):
+        foo = 0
+    """
+
+    node = templates.replace(
+        template, foo=parser.parse_expression('foo(a[b]).bar'))[0]
+    function_call_arg = node.body[0].targets[0].value.args[0]
+    self.assertIsInstance(function_call_arg.ctx, gast.Load)
+    self.assertIsInstance(function_call_arg.slice.value.ctx, gast.Load)
+
   def test_replace_call_keyword(self):
     template = """
       def test_fn():
-- 
GitLab


From 23a07f2c1444509986eece54e486cdcf0b8e32e4 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 26 Sep 2018 09:47:58 -0700
Subject: [PATCH 0733/1357] [tf.data] Adding serialization support for
 `StatsAggregatorDatasets` to make it possible to apply static optimizations
 to input pipelines whose prefix contains the `set_stats_aggregator`
 transformation.

PiperOrigin-RevId: 214619583
---
 .../optimization/latency_all_edges_test.py         |  4 ++--
 .../kernels/data/stats_aggregator_dataset_op.cc    | 14 +++++++++++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py
index db380c02a9..e4f18222fd 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py
@@ -34,8 +34,8 @@ class OptimizeStatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         optimization.assert_next(
             ["LatencyStats", "Map", "LatencyStats", "Prefetch",
              "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply(
-                 optimization.optimize(["latency_all_edges"])).apply(
-                     stats_ops.set_stats_aggregator(stats_aggregator))
+                 stats_ops.set_stats_aggregator(stats_aggregator)).apply(
+                     optimization.optimize(["latency_all_edges"]))
     iterator = dataset.make_initializable_iterator()
     get_next = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
diff --git a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
index f5314f7a75..7e528a71be 100644
--- a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
@@ -34,16 +34,18 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
                                        &stats_aggregator_resource));
     core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource);
 
-    *output = new Dataset(ctx, input, stats_aggregator_resource);
+    *output = new Dataset(ctx, input, ctx->input(1), stats_aggregator_resource);
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
     explicit Dataset(OpKernelContext* ctx, const DatasetBase* input,
+                     const Tensor& resource_handle,
                      StatsAggregatorResource* stats_aggregator_resource)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
+          resource_handle_(resource_handle),
           stats_aggregator_resource_(stats_aggregator_resource) {
       input_->Ref();
       stats_aggregator_resource_->Ref();
@@ -75,8 +77,13 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      return errors::Unimplemented("%s does not support serialization",
-                                   DebugString());
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
+      Node* resource_handle_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddTensor(resource_handle_, &resource_handle_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {input_graph_node, resource_handle_node}, output));
+      return Status::OK();
     }
 
    private:
@@ -129,6 +136,7 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     };
 
     const DatasetBase* const input_;
+    const Tensor resource_handle_;
     StatsAggregatorResource* stats_aggregator_resource_;
   };
 };
-- 
GitLab


From 00ae12ad8bf5c348e4c31448e3922cbaab54cc03 Mon Sep 17 00:00:00 2001
From: Piotr Padlewski <prazek@google.com>
Date: Wed, 26 Sep 2018 10:09:24 -0700
Subject: [PATCH 0734/1357] Hoisting RandomUniform out of functions

This patch introduces optimization that hoists RandomUniform out of map functions.
By doing it, we make function stateless, which is crucial for parallelization and vectorization.

PiperOrigin-RevId: 214623178
---
 .../python/kernel_tests/optimization/BUILD    |  17 ++
 .../optimization/hoist_random_uniform_test.py | 102 +++++++
 tensorflow/core/framework/function_testlib.cc |  17 +-
 .../core/grappler/optimizers/data/BUILD       |  65 +++-
 .../grappler/optimizers/data/filter_fusion.cc |  13 +-
 .../optimizers/data/filter_fusion_test.cc     |  11 +-
 .../optimizers/data/graph_test_utils.cc       |  49 +++
 .../optimizers/data/graph_test_utils.h        |  36 +++
 .../grappler/optimizers/data/graph_utils.cc   |  15 +
 .../grappler/optimizers/data/graph_utils.h    |  10 +
 .../optimizers/data/hoist_random_uniform.cc   | 289 ++++++++++++++++++
 .../optimizers/data/hoist_random_uniform.h    |  55 ++++
 .../data/hoist_random_uniform_test.cc         |  84 +++++
 .../optimizers/data/map_and_batch_fusion.cc   |   5 +-
 .../optimizers/data/map_and_filter_fusion.cc  |  14 +-
 .../data/map_and_filter_fusion_test.cc        |  21 +-
 .../grappler/optimizers/data/map_fusion.cc    |  30 +-
 .../optimizers/data/map_fusion_test.cc        |  10 +-
 .../optimizers/data/map_parallelization.cc    |   3 -
 .../data/map_parallelization_test.cc          |  13 +-
 .../optimizers/data/map_vectorization.cc      |  13 +-
 .../data/shuffle_and_repeat_fusion.cc         |   2 +-
 22 files changed, 774 insertions(+), 100 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
 create mode 100644 tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/graph_test_utils.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/hoist_random_uniform.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/hoist_random_uniform.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/hoist_random_uniform_test.cc

diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
index a2fc244ced..1ae92bdeff 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
@@ -19,6 +19,23 @@ py_test(
     ],
 )
 
+py_test(
+    name = "hoist_random_uniform_test",
+    size = "small",
+    srcs = ["hoist_random_uniform_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/data/python/ops:optimization",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 py_test(
     name = "latency_all_edges_test",
     size = "small",
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
new file mode 100644
index 0000000000..9518c2e1ad
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -0,0 +1,102 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for HostState optimization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class HoistRandomUniformTest(test.TestCase, parameterized.TestCase):
+
+  @staticmethod
+  def map_functions():
+    plus_one = lambda x: x + 1
+
+    def random(_):
+      return random_ops.random_uniform([],
+                                       minval=1,
+                                       maxval=10,
+                                       dtype=dtypes.float32,
+                                       seed=42)
+
+    def random_with_assert(x):
+      y = random(x)
+      assert_op = control_flow_ops.Assert(math_ops.greater_equal(y, 1), [y])
+      with ops.control_dependencies([assert_op]):
+        return y
+
+    twice_random = lambda x: (random(x) + random(x)) / 2.
+
+    tests = [("PlusOne", plus_one, False), ("RandomUniform", random, True),
+             ("RandomWithAssert", random_with_assert, True),
+             ("TwiceRandom", twice_random, False)]
+    return tuple(tests)
+
+  @parameterized.named_parameters(*map_functions.__func__())
+  def testHoisting(self, function, will_optimize):
+    dataset = dataset_ops.Dataset.range(5).apply(
+        optimization.assert_next(
+            ["Zip[0]", "Map"] if will_optimize else ["Map"])).map(function)
+
+    dataset = dataset.apply(optimization.optimize(["hoist_random_uniform"]))
+    self._testDataset(dataset)
+
+  def testAdditionalInputs(self):
+    a = constant_op.constant(1, dtype=dtypes.float32)
+    b = constant_op.constant(0, dtype=dtypes.float32)
+    some_tensor = math_ops.mul(a, b)
+
+    def random_with_capture(_):
+      return some_tensor + random_ops.random_uniform(
+          [], minval=1, maxval=10, dtype=dtypes.float32, seed=42)
+
+    dataset = dataset_ops.Dataset.range(5).apply(
+        optimization.assert_next(
+            ["Zip[0]", "Map"])).map(random_with_capture).apply(
+                optimization.optimize(["hoist_random_uniform"]))
+    self._testDataset(dataset)
+
+  def _testDataset(self, dataset):
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+    previous_result = 0
+    with self.cached_session() as sess:
+      for _ in range(5):
+        result = sess.run(get_next)
+        self.assertLessEqual(1, result)
+        self.assertLessEqual(result, 10)
+        # This checks if the result is somehow random by checking if we are not
+        # generating the same values.
+        self.assertNotEqual(previous_result, result)
+        previous_result = result
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc
index d5c203d276..0445c242e9 100644
--- a/tensorflow/core/framework/function_testlib.cc
+++ b/tensorflow/core/framework/function_testlib.cc
@@ -93,7 +93,6 @@ FunctionDef IsZero() {
 
 FunctionDef RandomUniform() {
   const Tensor kZero = test::AsScalar<int64>(0);
-  const Tensor kTen = test::AsScalar<int64>(10);
 
   return FDH::Define(
       // Name
@@ -108,19 +107,11 @@ FunctionDef RandomUniform() {
         "Const",
         {},
         {{"value", kZero}, {"dtype", DT_INT64}}},
-       {{"random_uniform/min"},
-        "Const",
-        {},
-        {{"value", kZero}, {"dtype", DT_INT64}}},
-       {{"random_uniform/max"},
-        "Const",
-        {},
-        {{"value", kTen}, {"dtype", DT_INT64}}},
        {{"random_uniform"},
-        "RandomUniformInt",
-        {},
-        {{"T", DT_INT64},
-         {"Tout", DT_INT64},
+        "RandomUniform",
+        {"random_uniform/shape"},
+        {{"T", DT_INT32},
+         {"Tout", DT_FLOAT},
          {"seed", 87654321},
          {"seed2", 42}}}});
 }
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index cf305cebe1..d42a560cb2 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -22,6 +22,7 @@ cc_library(
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+        "//tensorflow/core:lib_internal",
     ] + tf_protos_all(),
 )
 
@@ -31,6 +32,7 @@ tf_cc_test(
     visibility = ["//visibility:public"],
     deps = [
         ":filter_fusion",
+        ":graph_test_utils",
         ":graph_utils",
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
@@ -145,6 +147,62 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "graph_test_utils",
+    testonly = 1,
+    srcs = ["graph_test_utils.cc"],
+    hdrs = [
+        "graph_test_utils.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:utils",
+        "//tensorflow/core:testlib",
+    ] + tf_protos_all(),
+)
+
+cc_library(
+    name = "hoist_random_uniform",
+    srcs = ["hoist_random_uniform.cc"],
+    hdrs = [
+        "hoist_random_uniform.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":function_utils",
+        ":graph_utils",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:op_types",
+        "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+        "//tensorflow/core:lib_internal",
+    ] + tf_protos_all(),
+)
+
+tf_cc_test(
+    name = "hoist_random_uniform_test",
+    srcs = ["hoist_random_uniform_test.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_test_utils",
+        ":graph_utils",
+        ":hoist_random_uniform",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
+    ] + tf_protos_all(),
+)
+
 cc_library(
     name = "latency_all_edges",
     srcs = ["latency_all_edges.cc"],
@@ -256,7 +314,7 @@ cc_library(
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
-        "//tensorflow/core:ptr_util",
+        "//tensorflow/core:lib_internal",
     ] + tf_protos_all(),
 )
 
@@ -265,6 +323,7 @@ tf_cc_test(
     srcs = ["map_and_filter_fusion_test.cc"],
     visibility = ["//visibility:public"],
     deps = [
+        ":graph_test_utils",
         ":graph_utils",
         ":map_and_filter_fusion",
         "//tensorflow/core:framework",
@@ -294,6 +353,7 @@ cc_library(
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+        "//tensorflow/core:lib_internal",
     ] + tf_protos_all(),
 )
 
@@ -302,6 +362,7 @@ tf_cc_test(
     srcs = ["map_fusion_test.cc"],
     visibility = ["//visibility:public"],
     deps = [
+        ":graph_test_utils",
         ":graph_utils",
         ":map_fusion",
         "//tensorflow/core:framework",
@@ -339,6 +400,7 @@ tf_cc_test(
     srcs = ["map_parallelization_test.cc"],
     visibility = ["//visibility:public"],
     deps = [
+        ":graph_test_utils",
         ":graph_utils",
         ":map_parallelization",
         "//tensorflow/core:framework",
@@ -422,6 +484,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":filter_fusion",
+        ":hoist_random_uniform",
         ":latency_all_edges",
         ":map_and_batch_fusion",
         ":map_and_filter_fusion",
diff --git a/tensorflow/core/grappler/optimizers/data/filter_fusion.cc b/tensorflow/core/grappler/optimizers/data/filter_fusion.cc
index c71aa6e804..1ad495bbad 100644
--- a/tensorflow/core/grappler/optimizers/data/filter_fusion.cc
+++ b/tensorflow/core/grappler/optimizers/data/filter_fusion.cc
@@ -43,19 +43,14 @@ NodeDef MakeFusedFilterNode(const NodeDef& first_filter_node,
   fused_node.set_op("FilterDataset");
   fused_node.add_input(first_filter_node.input(0));
 
-  auto copy_attribute = [](const string& attribute_name, const NodeDef& from,
-                           NodeDef* to) {
-    (*to->mutable_attr())[attribute_name] = from.attr().at(attribute_name);
-  };
-
   auto attr = first_filter_node.attr().at("predicate");
   *attr.mutable_func()->mutable_name() = fused_function.signature().name();
   (*fused_node.mutable_attr())["predicate"] = std::move(attr);
 
-  copy_attribute("Targuments", first_filter_node, &fused_node);
+  graph_utils::CopyAttribute("Targuments", first_filter_node, &fused_node);
 
   for (auto key : {"output_shapes", "output_types"})
-    copy_attribute(key, second_filter_node, &fused_node);
+    graph_utils::CopyAttribute(key, second_filter_node, &fused_node);
 
   return fused_node;
 }
@@ -120,8 +115,8 @@ Status FilterFusion::Optimize(Cluster* cluster, const GrapplerItem& item,
     // functions, or make sure that optimization passes run after filter
     // fusion.
     TF_RETURN_IF_ERROR(function_library.AddFunctionDef(*fused_predicate));
-    // TODO(prazek): we could also remove map functions from library if they
-    // are not used anymore.
+    // TODO(b/116285210): we could also remove map functions from library if
+    // they are not used anymore.
     nodes_to_delete.insert(first_filter_node->name());
     nodes_to_delete.insert(second_filter_node->name());
   }
diff --git a/tensorflow/core/grappler/optimizers/data/filter_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/filter_fusion_test.cc
index 12b1924efd..c8becc5cc0 100644
--- a/tensorflow/core/grappler/optimizers/data/filter_fusion_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/filter_fusion_test.cc
@@ -19,8 +19,8 @@ limitations under the License.
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
-
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -28,14 +28,7 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
-NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name) {
-  return test::function::NDef(
-      name, "FilterDataset", {string(input_node_name)},
-      {{"predicate", FunctionDefHelper::FunctionRef("IsZero")},
-       {"Targuments", {}},
-       {"output_shapes", {}},
-       {"output_types", {}}});
-}
+using graph_tests_utils::MakeFilterNode;
 
 TEST(FilterFusionTest, FuseTwoFilterIntoOne) {
   using test::function::NDef;
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
new file mode 100644
index 0000000000..b2eec7220e
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
+
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace graph_tests_utils {
+
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name) {
+  return test::function::NDef(
+      name, "MapDataset", {string(input_node_name)},
+      {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
+       {"Targuments", {}},
+       {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+       {"output_types", gtl::ArraySlice<DataType>{}}});
+}
+
+NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
+                       StringPiece function_name) {
+  return test::function::NDef(
+      name, "FilterDataset", {string(input_node_name)},
+      {{"predicate", FunctionDefHelper::FunctionRef(string(function_name))},
+       {"Targuments", {}},
+       {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+       {"output_types", gtl::ArraySlice<TensorShape>{}}});
+}
+
+}  // end namespace graph_tests_utils
+}  // end namespace grappler
+}  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
new file mode 100644
index 0000000000..ca0fde997d
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
@@ -0,0 +1,36 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_TEST_UTILS_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_TEST_UTILS_H_
+
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace graph_tests_utils {
+
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name = "XTimesTwo");
+
+NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
+                       StringPiece function_name = "IsZero");
+
+}  // end namespace graph_tests_utils
+}  // end namespace grappler
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_TEST_UTILS_H_
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index 2dd9ee822e..48825d0346 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -260,6 +260,21 @@ void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
   }
   function->mutable_signature()->set_name(std::move(name));
 }
+
+void CopyAttribute(const string& attribute_name, const NodeDef& from,
+                   NodeDef* to_node) {
+  (*to_node->mutable_attr())[attribute_name] = from.attr().at(attribute_name);
+}
+
+void ConcatAttributeList(const string& attribute_name, const NodeDef& first,
+                         const NodeDef& second, NodeDef* to_node) {
+  CopyAttribute(attribute_name, first, to_node);
+  (*to_node->mutable_attr())
+      .at(attribute_name)
+      .mutable_list()
+      ->MergeFrom(second.attr().at(attribute_name).list());
+}
+
 }  // end namespace graph_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index b117482db2..189a72d255 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -106,6 +106,16 @@ void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph, NodeDef* node);
 void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
                                 FunctionDef* function);
 
+// Copies attribute having name `attribute_name` from node `from` to node
+// `to_node`.
+void CopyAttribute(const string& attribute_name, const NodeDef& from,
+                   NodeDef* to_node);
+
+// Concatenates list attribute having name `attribute_name` from `first` and
+// `second` node, setting it to `to_node`.
+void ConcatAttributeList(const string& attribute_name, const NodeDef& first,
+                         const NodeDef& second, NodeDef* to_node);
+
 }  // end namespace graph_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/hoist_random_uniform.cc b/tensorflow/core/grappler/optimizers/data/hoist_random_uniform.cc
new file mode 100644
index 0000000000..ce0b2db039
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/hoist_random_uniform.cc
@@ -0,0 +1,289 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/hoist_random_uniform.h"
+
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+#include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/protobuf.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+NodeDef MakeStatelessMap(const NodeDef& map_node, const NodeDef& zip_node,
+                         const FunctionDef& stateless_function,
+                         MutableGraphView* graph) {
+  NodeDef stateless_map;
+  graph_utils::SetUniqueGraphNodeName("stateless_map", graph->GetGraph(),
+                                      &stateless_map);
+
+  stateless_map.set_op("MapDataset");
+  stateless_map.add_input(zip_node.name());
+  // Add placeholders.
+  for (int i = 1; i < map_node.input_size(); i++)
+    stateless_map.add_input(map_node.input(i));
+
+  auto attr = map_node.attr().at("f");
+  *attr.mutable_func()->mutable_name() = stateless_function.signature().name();
+  *attr.mutable_func()->mutable_attr() = stateless_function.attr();
+  (*stateless_map.mutable_attr())["f"] = std::move(attr);
+
+  graph_utils::CopyAttribute("Targuments", map_node, &stateless_map);
+  for (auto key : {"output_shapes", "output_types"})
+    graph_utils::CopyAttribute(key, map_node, &stateless_map);
+
+  if (const auto* attr =
+          gtl::FindOrNull(map_node.attr(), "use_inter_op_parallelism"))
+    (*stateless_map.mutable_attr())["use_inter_op_parallelism"] = *attr;
+
+  return stateless_map;
+}
+
+NodeDef MakeRandomDataset(const NodeDef& random_uniform_node,
+                          MutableGraphView* graph) {
+  NodeDef random_dataset;
+  random_dataset.set_op("RandomDataset");
+  graph_utils::SetUniqueGraphNodeName("RandomDataset", graph->GetGraph(),
+                                      &random_dataset);
+
+  const auto* seed = graph_utils::AddScalarConstNode<int64>(
+      random_uniform_node.attr().at("seed").i(), graph);
+  const auto* seed2 = graph_utils::AddScalarConstNode<int64>(
+      random_uniform_node.attr().at("seed2").i(), graph);
+
+  random_dataset.add_input(seed->name());
+  random_dataset.add_input(seed2->name());
+
+  (*random_dataset.mutable_attr())["output_shapes"].mutable_list()->add_shape();
+  (*random_dataset.mutable_attr())["output_types"].mutable_list()->add_type(
+      DT_INT64);
+
+  return random_dataset;
+}
+
+NodeDef MakeBatchTwo(const NodeDef& random_dataset, MutableGraphView* graph) {
+  NodeDef batch_dataset;
+  batch_dataset.set_op("BatchDatasetV2");
+  graph_utils::SetUniqueGraphNodeName("pair_of_random", graph->GetGraph(),
+                                      &batch_dataset);
+  const auto* batch_size = graph_utils::AddScalarConstNode<int64>(2, graph);
+  const auto* drop_reminder = graph_utils::AddScalarConstNode(false, graph);
+  batch_dataset.add_input(random_dataset.name());
+  batch_dataset.add_input(batch_size->name());
+  batch_dataset.add_input(drop_reminder->name());
+
+  (*batch_dataset.mutable_attr())["output_shapes"]
+      .mutable_list()
+      ->add_shape()
+      ->mutable_dim()
+      ->Add()
+      ->set_size(-1);
+  (*batch_dataset.mutable_attr())["output_types"].mutable_list()->add_type(
+      DT_INT64);
+
+  return batch_dataset;
+}
+
+NodeDef MakeZipNode(const NodeDef& first_node, const NodeDef& second_node,
+                    MutableGraphView* graph) {
+  NodeDef zip_node;
+  graph_utils::SetUniqueGraphNodeName("zip_with_random", graph->GetGraph(),
+                                      &zip_node);
+
+  zip_node.set_op("ZipDataset");
+  zip_node.add_input(first_node.name());
+  zip_node.add_input(second_node.name());
+
+  for (auto key : {"output_shapes", "output_types"})
+    graph_utils::ConcatAttributeList(key, first_node, second_node, &zip_node);
+
+  (*zip_node.mutable_attr())["N"].set_i(2);
+
+  return zip_node;
+}
+
+// We need to insert our argument before the placeholders, which are the last
+// arguments.
+OpDef_ArgDef* InsertSeedArgument(OpDef* signature, int num_placeholders) {
+  int new_argument_idx = signature->input_arg_size() - num_placeholders;
+  signature->add_input_arg();
+  for (int i = signature->input_arg_size() - 1; i > new_argument_idx; i--) {
+    signature->mutable_input_arg()->SwapElements(i - 1, i);
+  }
+  auto* seed_arg = signature->mutable_input_arg(new_argument_idx);
+  seed_arg->set_name(strings::StrCat("seed_arg", new_argument_idx));
+  seed_arg->set_type(DT_INT64);
+
+  return seed_arg;
+}
+
+// Make function that uses `StatelessRandomUniform` instead of `RandomUniform`
+// to make it less statefull.  The function can still be stateful, but in when
+// other stateful ops are e.g. `Assert`, then it will be parallelizable.
+const FunctionDef* MakeLessStatefulFunction(const FunctionDef& map_function,
+                                            bool is_stateful,
+                                            int num_placeholders,
+                                            FunctionDefLibrary* library) {
+  FunctionDef* stateless_function = library->add_function();
+  *stateless_function = map_function;
+  if (is_stateful)
+    stateless_function->mutable_signature()->set_is_stateful(is_stateful);
+  graph_utils::SetUniqueGraphFunctionName("stateless_function", library,
+                                          stateless_function);
+
+  auto* seed_arg = InsertSeedArgument(stateless_function->mutable_signature(),
+                                      num_placeholders);
+
+  auto* const random_uniform = stateless_function->mutable_node_def(
+      function_utils::FindFunctionNodeWithOp("RandomUniform",
+                                             *stateless_function));
+
+  // Replace RandomUniform node with StatelessRandomUniform.
+  random_uniform->set_op("StatelessRandomUniform");
+  random_uniform->add_input(seed_arg->name());
+  (*random_uniform->mutable_attr())["Tseed"].set_type(DT_INT64);
+  random_uniform->mutable_attr()->erase("seed");
+  random_uniform->mutable_attr()->erase("seed2");
+
+  return stateless_function;
+}
+// This function returns true if function is stateful and has single
+// RandomUniform op and no other stateful ops except Assert.
+// `is_stateful_after_hoisting` is set to true if RandomUniform is the only
+// stateful op and hoisting can be performed.
+bool CanHoistRandomUniform(const FunctionDef& map_function,
+                           const FunctionLibraryDefinition& library,
+                           bool* is_stateful_after_hoisting,
+                           const NodeDef** random_uniform_op) {
+  if (!map_function.signature().is_stateful()) return false;
+  *is_stateful_after_hoisting = true;
+
+  bool have_other_stateful_ops = false;
+
+  for (const auto& node : map_function.node_def()) {
+    const OpDef* op_def;
+    TF_CHECK_OK(library.LookUpOpDef(node.op(), &op_def));
+    // Skip stateless nodes and assert, as it does not actually have a state.
+    if (!op_def->is_stateful()) continue;
+
+    if (op_def->name() == "Assert") {
+      have_other_stateful_ops = true;
+      continue;
+    }
+
+    // TODO(prazek): For now we only handle RandomUniform, we should handle
+    // RandomUniformInt as well.
+    if (op_def->name() != "RandomUniform") return false;
+
+    // TODO(prazek): For now we can only hoist single RandomUniform.
+    if (*random_uniform_op != nullptr) return false;
+
+    *random_uniform_op = &node;
+  }
+
+  if (!have_other_stateful_ops) *is_stateful_after_hoisting = false;
+
+  // Have we found single RandomUniform?
+  return *random_uniform_op != nullptr;
+}
+
+int NumberOfPlaceholders(const NodeDef& map_node) {
+  // First input of MapDataset is the argument to the function.  Rest of the
+  // inputs are placeholders.
+  return map_node.input_size() - 1;
+}
+
+}  // namespace
+
+Status HoistRandomUniform::Optimize(Cluster* cluster, const GrapplerItem& item,
+                                    GraphDef* output) {
+  *output = item.graph;
+
+  MutableGraphView graph(output);
+  std::set<string> nodes_to_delete;
+  FunctionLibraryDefinition function_library(OpRegistry::Global(),
+                                             item.graph.library());
+
+  auto get_map_node = [](const NodeDef& node) -> const NodeDef* {
+    // TODO(prazek): we could also handle ParallelMapDataset and
+    // MapAndBatchDataset.
+    if (node.op() == "MapDataset") return &node;
+    return nullptr;
+  };
+
+  for (const NodeDef& node : item.graph.node()) {
+    const NodeDef* map_node = get_map_node(node);
+    if (!map_node) continue;
+
+    const auto& fun = map_node->attr().at("f");
+    const FunctionDef* func = function_library.Find(fun.func().name());
+
+    const NodeDef* random_uniform_op = nullptr;
+    bool is_stateful_after_hoisting = true;
+    if (!CanHoistRandomUniform(*func, function_library,
+                               &is_stateful_after_hoisting, &random_uniform_op))
+      continue;
+    const auto* random_seed_dataset =
+        graph.AddNode(MakeRandomDataset(*random_uniform_op, &graph));
+
+    const auto* batch_dataset =
+        graph.AddNode(MakeBatchTwo(*random_seed_dataset, &graph));
+
+    const NodeDef& parent_node = *graph_utils::GetInputNode(*map_node, graph);
+
+    const auto* zip_node =
+        graph.AddNode(MakeZipNode(parent_node, *batch_dataset, &graph));
+
+    const auto* stateless_func = MakeLessStatefulFunction(
+        *func, is_stateful_after_hoisting, NumberOfPlaceholders(*map_node),
+        output->mutable_library());
+
+    const auto* stateless_map = graph.AddNode(
+        MakeStatelessMap(*map_node, *zip_node, *stateless_func, &graph));
+
+    graph.ReplaceInput(*map_node, *stateless_map);
+
+    // TODO(b/116285210): we could also remove map functions from library if
+    // they are not used anymore.
+    nodes_to_delete.insert(map_node->name());
+  }
+
+  graph.DeleteNodes(nodes_to_delete);
+  return Status::OK();
+}
+
+void HoistRandomUniform::Feedback(Cluster* cluster, const GrapplerItem& item,
+                                  const GraphDef& optimize_output,
+                                  double result) {
+  // no-op
+}
+
+REGISTER_GRAPH_OPTIMIZER_AS(HoistRandomUniform, "hoist_random_uniform");
+
+}  // end namespace grappler
+}  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/hoist_random_uniform.h b/tensorflow/core/grappler/optimizers/data/hoist_random_uniform.h
new file mode 100644
index 0000000000..d1bcf6782d
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/hoist_random_uniform.h
@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_RANDOM_UNIFORM_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_RANDOM_UNIFORM_H_
+
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+
+namespace tensorflow {
+namespace grappler {
+
+// This optimization hoists instances of `random_uniform` out of a function
+// with the aim of making it stateless.  It creates a new function that takes a
+// random seed as an extra argument and uses `stateless_random_uniform` instead
+// of `random_uniform` to make it stateless.
+// It also creates RandomDataset(seed).batch(2), which is zipped with old input
+// to the map.  The batching in RandomDataset is because we need 2 seeds for
+// `stateless_random_uniform`.
+// TODO(prazek): for now only `RandomUniform` is handled, but we could handle
+// `RandomUniformInt` similarly.
+class HoistRandomUniform : public CustomGraphOptimizer {
+ public:
+  HoistRandomUniform() = default;
+  ~HoistRandomUniform() override = default;
+
+  string name() const override { return "hoist_random_uniform"; };
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* output) override;
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimize_output, double result) override;
+};
+
+}  // end namespace grappler
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_RANDOM_UNIFORM_H_
diff --git a/tensorflow/core/grappler/optimizers/data/hoist_random_uniform_test.cc b/tensorflow/core/grappler/optimizers/data/hoist_random_uniform_test.cc
new file mode 100644
index 0000000000..455459e3f6
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/hoist_random_uniform_test.cc
@@ -0,0 +1,84 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/hoist_random_uniform.h"
+
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+TEST(HoistRandomUniform, SimpleHoisting) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+       NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+       NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("filename", "Const", {}, {{"value", ""}, {"dtype", DT_STRING}}),
+       NDef("range", "RangeDataset", {"start", "stop", "step"},
+            {{"output_shapes", gtl::ArraySlice<TensorShape>{}},
+             {"output_types", gtl::ArraySlice<DataType>{}}}),
+       graph_tests_utils::MakeMapNode("map1", "range", "RandomUniform"),
+       NDef("cache", "CacheDataset", {"map1", "filename"}, {})},
+      // FunctionLib
+      {
+          test::function::RandomUniform(),
+      });
+
+  HoistRandomUniform optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map1", output));
+  const int new_map_id = graph_utils::FindGraphNodeWithOp("MapDataset", output);
+  const int zip_dataset_id =
+      graph_utils::FindGraphNodeWithOp("ZipDataset", output);
+  const int random_dataset_id =
+      graph_utils::FindGraphNodeWithOp("RandomDataset", output);
+  const int batch_random_id =
+      graph_utils::FindGraphNodeWithOp("BatchDatasetV2", output);
+  ASSERT_NE(random_dataset_id, -1);
+  ASSERT_NE(zip_dataset_id, -1);
+  ASSERT_NE(new_map_id, -1);
+  ASSERT_NE(batch_random_id, -1);
+
+  const auto& new_map = output.node(new_map_id);
+  const auto& zip = output.node(zip_dataset_id);
+  const auto& random = output.node(random_dataset_id);
+  const auto& batch = output.node(batch_random_id);
+
+  ASSERT_EQ(new_map.input_size(), 1);
+  EXPECT_EQ(new_map.input(0), zip.name());
+
+  ASSERT_EQ(zip.input_size(), 2);
+  EXPECT_EQ(zip.input(0), "range");
+  EXPECT_EQ(zip.input(1), batch.name());
+
+  ASSERT_EQ(batch.input_size(), 3);
+  EXPECT_EQ(batch.input(0), random.name());
+}
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc
index 63945b8b9e..e66766eb23 100644
--- a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc
@@ -80,11 +80,12 @@ NodeDef MakeMapAndBatchNode(const NodeDef& map_node, const NodeDef& batch_node,
 
   // Set `f` and `Targuments` attributes.
   for (auto key : {"f", "Targuments"}) {
-    (*new_node.mutable_attr())[key] = map_node.attr().at(key);
+    graph_utils::CopyAttribute(key, map_node, &new_node);
   }
+
   // Set `output_types` and `output_shapes` attributes.
   for (auto key : {"output_shapes", "output_types"}) {
-    (*new_node.mutable_attr())[key] = batch_node.attr().at(key);
+    graph_utils::CopyAttribute(key, batch_node, &new_node);
   }
   return new_node;
 }
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc
index f1844a141c..c4868eacbb 100644
--- a/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
@@ -41,19 +42,18 @@ NodeDef MakeFusedNode(const NodeDef& map_node,
   fused_node.set_op("MapDataset");
   fused_node.add_input(map_node.input(0));
 
-  auto copy_attribute = [](const string& attribute_name, const NodeDef& from,
-                           NodeDef* to) {
-    (*to->mutable_attr())[attribute_name] = from.attr().at(attribute_name);
-  };
-
   auto attr = map_node.attr().at("f");
   attr.mutable_func()->set_name(fused_function.signature().name());
   (*fused_node.mutable_attr())["f"] = std::move(attr);
 
-  copy_attribute("Targuments", map_node, &fused_node);
+  graph_utils::CopyAttribute("Targuments", map_node, &fused_node);
 
   for (auto key : {"output_shapes", "output_types"})
-    copy_attribute(key, map_node, &fused_node);
+    graph_utils::CopyAttribute(key, map_node, &fused_node);
+
+  if (const auto* attr =
+          gtl::FindOrNull(map_node.attr(), "use_inter_op_parallelism"))
+    (*fused_node.mutable_attr())["use_inter_op_parallelism"] = *attr;
 
   // Add the predicate output attributes.
   (*fused_node.mutable_attr())["output_types"]
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion_test.cc
index f029a093fa..6e6da37d7c 100644
--- a/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -27,24 +28,8 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 namespace {
-
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name) {
-  return test::function::NDef(
-      name, "MapDataset", {string(input_node_name)},
-      {{"f", FunctionDefHelper::FunctionRef("XTimesTwo")},
-       {"Targuments", {}},
-       {"output_shapes", {}},
-       {"output_types", {}}});
-}
-
-NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name) {
-  return test::function::NDef(
-      name, "FilterDataset", {string(input_node_name)},
-      {{"predicate", FunctionDefHelper::FunctionRef("IsZero")},
-       {"Targuments", {}},
-       {"output_shapes", {}},
-       {"output_types", {}}});
-}
+using graph_tests_utils::MakeFilterNode;
+using graph_tests_utils::MakeMapNode;
 
 TEST(MapAndFilterFusionTest, FuseMapAndFilter) {
   using test::function::NDef;
diff --git a/tensorflow/core/grappler/optimizers/data/map_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_fusion.cc
index a78ecb09f7..bd943342e8 100644
--- a/tensorflow/core/grappler/optimizers/data/map_fusion.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_fusion.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
@@ -40,24 +41,31 @@ NodeDef MakeFusedNode(const NodeDef& parent_map_node, const NodeDef& map_node,
   NodeDef fused_node;
   graph_utils::SetUniqueGraphNodeName("fused_map", graph->GetGraph(),
                                       &fused_node);
-
   fused_node.set_op("MapDataset");
   fused_node.add_input(parent_map_node.input(0));
 
-  auto copy_attribute = [](const string& attribute_name, const NodeDef& from,
-                           NodeDef* to) {
-    (*to->mutable_attr())[attribute_name] = from.attr().at(attribute_name);
-  };
-
   auto attr = parent_map_node.attr().at("f");
   *attr.mutable_func()->mutable_name() = fused_function.signature().name();
   (*fused_node.mutable_attr())["f"] = std::move(attr);
 
-  copy_attribute("Targuments", parent_map_node, &fused_node);
-
+  graph_utils::CopyAttribute("Targuments", parent_map_node, &fused_node);
   for (auto key : {"output_shapes", "output_types"})
-    copy_attribute(key, map_node, &fused_node);
+    graph_utils::CopyAttribute(key, map_node, &fused_node);
 
+  auto value_or_false = [](const AttrValue* attr) {
+    if (!attr) return false;
+    return attr->b();
+  };
+
+  const auto* first_parallelism =
+      gtl::FindOrNull(parent_map_node.attr(), "use_inter_op_parallelism");
+  const auto* second_parallelism =
+      gtl::FindOrNull(map_node.attr(), "use_inter_op_parallelism");
+  // Some graphs cannot execute with use_inter_op_parallelism=False, so we need
+  // to set it to true if one of the ops have it set to true.
+  if (value_or_false(first_parallelism) || value_or_false(second_parallelism)) {
+    (*fused_node.mutable_attr())["use_inter_op_parallelism"].set_b(true);
+  }
   return fused_node;
 }
 
@@ -123,8 +131,8 @@ Status MapFusion::Optimize(Cluster* cluster, const GrapplerItem& item,
     // fusion.
     TF_RETURN_IF_ERROR(function_library.AddFunctionDef(*fused_function));
 
-    // TODO(prazek): we could also remove map functions from library if they
-    // are not used anymore.
+    // TODO(b/116285210): we could also remove map functions from library if
+    // they are not used anymore.
     nodes_to_delete.insert(parent_map_node->name());
     nodes_to_delete.insert(map_node->name());
   }
diff --git a/tensorflow/core/grappler/optimizers/data/map_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/map_fusion_test.cc
index b25dfbd0b8..8889f9dddd 100644
--- a/tensorflow/core/grappler/optimizers/data/map_fusion_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_fusion_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -28,14 +29,7 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name) {
-  return test::function::NDef(
-      name, "MapDataset", {string(input_node_name)},
-      {{"f", FunctionDefHelper::FunctionRef("XTimesTwo")},
-       {"Targuments", {}},
-       {"output_shapes", {}},
-       {"output_types", {}}});
-}
+using graph_tests_utils::MakeMapNode;
 
 TEST(MapFusionTest, FuseTwoMapNodesIntoOne) {
   using test::function::NDef;
diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization.cc b/tensorflow/core/grappler/optimizers/data/map_parallelization.cc
index 305325e434..782c9f48b7 100644
--- a/tensorflow/core/grappler/optimizers/data/map_parallelization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_parallelization.cc
@@ -84,9 +84,6 @@ Status MapParallelization::Optimize(Cluster* cluster, const GrapplerItem& item,
 
     auto* parallel_map = graph.AddNode(MakeParallelMap(*map_node, &graph));
     graph.ReplaceInput(*map_node, *parallel_map);
-
-    // TODO(prazek): we could also remove map functions from library if they
-    // are not used anymore.
     nodes_to_delete.insert(map_node->name());
   }
 
diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc b/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc
index b2a5d9b6af..9fdfe8af30 100644
--- a/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc
@@ -19,8 +19,8 @@ limitations under the License.
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
-
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -28,16 +28,7 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
-                    StringPiece function_name) {
-  return test::function::NDef(
-      name, "MapDataset", {string(input_node_name)},
-      {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
-       {"Targuments", {}},
-       {"output_shapes", {}},
-       {"output_types", {}}});
-}
-
+using graph_tests_utils::MakeMapNode;
 const char stateless_fun_name[] = "XTimesTwo";
 const char stateful_fun_name[] = "RandomUniform";
 
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index 7a2f1910da..32ab912619 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -35,10 +35,6 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
-void CopyAttribute(const string& attr_name, const NodeDef& from, NodeDef* to) {
-  (*to->mutable_attr())[attr_name] = from.attr().at(attr_name);
-}
-
 // Returns a FunctionDef containing a MapDefun op that wraps the original
 // function.
 FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
@@ -61,7 +57,7 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
   for (const string& k : {"f", "output_types", "output_shapes"}) {
     // Function, output types and (unbatched) shapes are the same as the
     // original map node.
-    CopyAttribute(k, map_node, map_defun_node);
+    graph_utils::CopyAttribute(k, map_node, map_defun_node);
   }
 
   // Get types of input arguments from original map function
@@ -195,13 +191,16 @@ NodeDef MakeNewMapNode(const NodeDef& old_map_node,
   }
 
   // Set attrs
-  CopyAttribute("Targuments", old_map_node, &map_node);
+  graph_utils::CopyAttribute("Targuments", old_map_node, &map_node);
   auto& func_attr = (*map_node.mutable_attr())["f"];
   func_attr.mutable_func()->set_name(vectorized_func.signature().name());
 
   for (auto key : {"output_shapes", "output_types"}) {
-    CopyAttribute(key, old_batch_node, &map_node);
+    graph_utils::CopyAttribute(key, old_batch_node, &map_node);
   }
+
+  (*map_node.mutable_attr())["use_inter_op_parallelism"].set_b(true);
+
   return map_node;
 }
 
diff --git a/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc
index cb0ff670e8..99c4afa634 100644
--- a/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc
+++ b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc
@@ -64,7 +64,7 @@ Status ShuffleAndRepeatFusion::Optimize(Cluster* cluster,
 
     // Set `output_types` and `output_shapes` attributes.
     for (auto key : {"output_shapes", "output_types"}) {
-      (*new_node.mutable_attr())[key] = repeat_node.attr().at(key);
+      graph_utils::CopyAttribute(key, repeat_node, &new_node);
     }
     return new_node;
   };
-- 
GitLab


From fa76895ad577246a8ab241e668765cad651558fb Mon Sep 17 00:00:00 2001
From: Isaac Burbank <bassmanburbank@gmail.com>
Date: Wed, 26 Sep 2018 11:20:44 -0600
Subject: [PATCH 0735/1357] Update python_object_to_proto_visitor.py

Changed test key for FullArgSpec to check for `varkw`, replacing the old ArgSpec key `keywords`
---
 tensorflow/tools/api/lib/python_object_to_proto_visitor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
index 2a40caf720..a8e69fda4f 100644
--- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
+++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
@@ -49,7 +49,7 @@ def _SanitizedArgSpec(obj):
   output_string = ''
   unsanitized_arg_spec = tf_inspect.getfullargspec(obj)
 
-  for clean_attr in ('args', 'varargs', 'keywords'):
+  for clean_attr in ('args', 'varargs', 'varkw'):
     output_string += '%s=%s, ' % (clean_attr,
                                   getattr(unsanitized_arg_spec, clean_attr))
 
-- 
GitLab


From a8203086b9bd0a4cd874e42aead0758a3365c387 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Wed, 26 Sep 2018 10:26:23 -0700
Subject: [PATCH 0736/1357] Remove quantize_graph script. TF Lite quantization
 is the supported mobile quantization tooling.

PiperOrigin-RevId: 214625933
---
 tensorflow/tools/quantization/BUILD           |   78 -
 tensorflow/tools/quantization/graph_to_dot.py |   68 -
 .../tools/quantization/quantize_graph.py      | 1302 -----------------
 .../tools/quantization/quantize_graph_test.py |  966 ------------
 4 files changed, 2414 deletions(-)
 delete mode 100644 tensorflow/tools/quantization/BUILD
 delete mode 100644 tensorflow/tools/quantization/graph_to_dot.py
 delete mode 100644 tensorflow/tools/quantization/quantize_graph.py
 delete mode 100644 tensorflow/tools/quantization/quantize_graph_test.py

diff --git a/tensorflow/tools/quantization/BUILD b/tensorflow/tools/quantization/BUILD
deleted file mode 100644
index 17443a8617..0000000000
--- a/tensorflow/tools/quantization/BUILD
+++ /dev/null
@@ -1,78 +0,0 @@
-# Description:
-#   Utilities for quantizing TensorFlow graphs to lower bit depths.
-
-package(default_visibility = ["//visibility:public"])
-
-licenses(["notice"])  # Apache 2.0
-
-exports_files(["LICENSE"])
-
-load("//tensorflow:tensorflow.bzl", "py_test")
-
-py_library(
-    name = "quantize_graph_lib",
-    srcs = ["quantize_graph.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:graph_util",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:session",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_util",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_binary(
-    name = "quantize_graph",
-    srcs = ["quantize_graph.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python",  # TODO(b/34059704): remove when fixed
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:graph_util",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:tensor_util",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "quantize_graph_test",
-    size = "small",
-    srcs = ["quantize_graph_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["nomsan"],  # http://b/32242946
-    deps = [
-        ":quantize_graph",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:graph_util",
-        "//tensorflow/python:platform",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_binary(
-    name = "graph_to_dot",
-    srcs = ["graph_to_dot.py"],
-    main = "graph_to_dot.py",
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:platform",
-    ],
-)
diff --git a/tensorflow/tools/quantization/graph_to_dot.py b/tensorflow/tools/quantization/graph_to_dot.py
deleted file mode 100644
index 81d6aa62c8..0000000000
--- a/tensorflow/tools/quantization/graph_to_dot.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Converts a GraphDef file into a DOT format suitable for visualization.
-
-This script takes a GraphDef representing a network, and produces a DOT file
-that can then be visualized by GraphViz tools like dot and xdot.
-
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import re
-
-from google.protobuf import text_format
-
-from tensorflow.core.framework import graph_pb2
-from tensorflow.python.platform import app
-from tensorflow.python.platform import flags
-from tensorflow.python.platform import gfile
-
-FLAGS = flags.FLAGS
-
-flags.DEFINE_string("graph", "", """TensorFlow 'GraphDef' file to load.""")
-flags.DEFINE_bool("input_binary", True,
-                  """Whether the input files are in binary format.""")
-flags.DEFINE_string("dot_output", "", """Where to write the DOT output.""")
-
-
-def main(unused_args):
-  if not gfile.Exists(FLAGS.graph):
-    print("Input graph file '" + FLAGS.graph + "' does not exist!")
-    return -1
-
-  graph = graph_pb2.GraphDef()
-  with open(FLAGS.graph, "r") as f:
-    if FLAGS.input_binary:
-      graph.ParseFromString(f.read())
-    else:
-      text_format.Merge(f.read(), graph)
-
-  with open(FLAGS.dot_output, "wb") as f:
-    print("digraph graphname {", file=f)
-    for node in graph.node:
-      output_name = node.name
-      print("  \"" + output_name + "\" [label=\"" + node.op + "\"];", file=f)
-      for input_full_name in node.input:
-        parts = input_full_name.split(":")
-        input_name = re.sub(r"^\^", "", parts[0])
-        print("  \"" + input_name + "\" -> \"" + output_name + "\";", file=f)
-    print("}", file=f)
-  print("Created DOT file '" + FLAGS.dot_output + "'.")
-
-
-if __name__ == "__main__":
-  app.run()
diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/tools/quantization/quantize_graph.py
deleted file mode 100644
index 3acb532263..0000000000
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ /dev/null
@@ -1,1302 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Transforms a float-trained graph into an equivalent quantized version.
-
-An example of command-line usage is:
-bazel build tensorflow/tools/quantization:quantize_graph \
-&& bazel-bin/tensorflow/tools/quantization/quantize_graph \
---input=tensorflow_inception_graph.pb
---output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \
---mode=eightbit --logtostderr
-
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import re
-import numpy as np
-
-from tensorflow.core.framework import attr_value_pb2
-from tensorflow.core.framework import graph_pb2
-from tensorflow.core.framework import node_def_pb2
-from tensorflow.python.client import session
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import graph_util
-from tensorflow.python.framework import importer
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import app
-from tensorflow.python.platform import flags as flags_lib
-from tensorflow.python.platform import gfile
-
-flags = flags_lib
-FLAGS = flags.FLAGS
-
-flags.DEFINE_boolean("print_nodes", False, """Lists all nodes in the model.""")
-flags.DEFINE_string("input", "", """TensorFlow 'GraphDef' file to load.""")
-flags.DEFINE_string("output_node_names", "",
-                    """Output node names, comma separated.""")
-flags.DEFINE_string("output", "", """File to save the output graph to.""")
-flags.DEFINE_integer("bitdepth", 8,
-                     """How many bits to quantize the graph to.""")
-flags.DEFINE_string("mode", "round",
-                    """What transformation to apply (round, quantize,"""
-                    """ eightbit, weights, or weights_rounded).""")
-flags.DEFINE_string("test_input_dims", "1,224,224,3",
-                    """The size of the input tensor to use when testing a"""
-                    """ graph loaded from a file.""")
-flags.DEFINE_boolean("strip_redundant_quantization", True,
-                     """Removes redundant dequantize/quantize pairs.""")
-flags.DEFINE_boolean("quantized_input", False,
-                     "If true, assume Placeholders are quantized with values "
-                     "covering [--quantized_input_min,--quantized_input_max]. "
-                     "Only supported when --mode=eightbit")
-flags.DEFINE_float("quantized_input_min", 0,
-                   "The minimum of the actual input range when "
-                   "--quantized_input")
-flags.DEFINE_float("quantized_input_max", 1,
-                   "The maximum of the actual input range when "
-                   "--quantized_input")
-flags.DEFINE_float(
-    "quantized_fallback_min", None,
-    "The fallback 'min' value to use for layers which lack min-max "
-    "information. Note: this should be considered a coarse tool just good "
-    "enough for experimentation purposes, since graphs quantized in this way "
-    "would be very inaccurate.")
-flags.DEFINE_float(
-    "quantized_fallback_max", None,
-    "The fallback 'max' value to use for layers which lack min-max "
-    "information. Note: this should be considered a coarse tool just good "
-    "enough for experimentation purposes, since graphs quantized in this way "
-    "would be very inaccurate.")
-
-
-def print_input_nodes(current_node, nodes_map, indent, already_visited):
-  print(" " * indent + current_node.op + ":" + current_node.name)
-  already_visited[current_node.name] = True
-  for input_node_name in current_node.input:
-    if input_node_name in already_visited:
-      continue
-    input_node = nodes_map[input_node_name]
-    print_input_nodes(input_node, nodes_map, indent + 1, already_visited)
-
-
-def create_node(op, name, inputs):
-  new_node = node_def_pb2.NodeDef()
-  new_node.op = op
-  new_node.name = name
-  for input_name in inputs:
-    new_node.input.extend([input_name])
-  return new_node
-
-
-def create_constant_node(name, value, dtype, shape=None):
-  node = create_node("Const", name, [])
-  set_attr_dtype(node, "dtype", dtype)
-  set_attr_tensor(node, "value", value, dtype, shape)
-  return node
-
-
-def copy_attr(node, key, attr_value):
-  try:
-    node.attr[key].CopyFrom(attr_value)
-  except KeyError:
-    pass
-
-
-def set_attr_dtype(node, key, value):
-  try:
-    node.attr[key].CopyFrom(
-        attr_value_pb2.AttrValue(type=value.as_datatype_enum))
-  except KeyError:
-    pass
-
-
-def set_attr_shape(node, key, value):
-  try:
-    node.attr[key].CopyFrom(
-        attr_value_pb2.AttrValue(shape=tensor_shape.as_shape(value).as_proto()))
-  except KeyError:
-    pass
-
-
-def set_attr_tensor(node, key, value, dtype, shape=None):
-  try:
-    node.attr[key].CopyFrom(
-        attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(
-            value, dtype=dtype, shape=shape)))
-  except KeyError:
-    pass
-
-
-def set_attr_string(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(s=value))
-  except KeyError:
-    pass
-
-
-def set_attr_int_list(node, key, value):
-  list_value = attr_value_pb2.AttrValue.ListValue(i=value)
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(list=list_value))
-  except KeyError:
-    pass
-
-
-def set_attr_bool(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(b=value))
-  except KeyError:
-    pass
-
-
-def set_attr_int(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(i=value))
-  except KeyError:
-    pass
-
-
-def set_attr_float(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(f=value))
-  except KeyError:
-    pass
-
-
-def node_name_from_input(node_name):
-  """Strips off ports and other decorations to get the underlying node name."""
-  if node_name.startswith("^"):
-    node_name = node_name[1:]
-  m = re.search(r"(.*):\d+$", node_name)
-  if m:
-    node_name = m.group(1)
-  return node_name
-
-
-def ensure_tensor_name_has_port(node_name):
-  """Makes sure that a tensor name has :0 if no explicit port exists."""
-  m = re.search(r"(.*):\d+$", node_name)
-  if m:
-    name_with_port = node_name
-  else:
-    name_with_port = node_name + ":0"
-  return name_with_port
-
-
-def unique_node_name_from_input(node_name):
-  """Replaces invalid characters in input names to get a unique node name."""
-  return node_name.replace(":", "__port__").replace("^", "__hat__")
-
-
-def quantize_array(arr, num_buckets):
-  """Quantizes a numpy array.
-
-  This function maps each scalar in arr to the center of one of num_buckets
-  buckets. For instance,
-  quantize_array([0, 0.3, 0.6, 1], 2) => [0.25, 0.25, 0.75, 0.75]
-
-  Args:
-    arr: The numpy array to quantize.
-    num_buckets: The number of buckets to map "var" to.
-  Returns:
-    The quantized numpy array.
-  Raises:
-    ValueError: when num_buckets < 1.
-  """
-  if num_buckets < 1:
-    raise ValueError("num_buckets must be >= 1")
-  arr_max = arr.max()
-  arr_min = arr.min()
-  if arr_max == arr_min:
-    return arr
-  bucket_width = (arr_max - arr_min) / num_buckets
-  # Map scalars to bucket indices. Take special care of max(arr).
-  bucket_indices = np.floor((arr - arr_min) / bucket_width)
-  bucket_indices[bucket_indices == num_buckets] = num_buckets - 1
-  # Map each scalar to the center of a bucket.
-  arr = arr_min + bucket_width * (bucket_indices + 0.5)
-  return arr
-
-
-def quantize_weight_rounded(input_node):
-  """Returns a replacement node for input_node containing bucketed floats."""
-  input_tensor = input_node.attr["value"].tensor
-  tensor_value = tensor_util.MakeNdarray(input_tensor)
-  shape = input_tensor.tensor_shape
-  # Currently, the parameter FLAGS.bitdepth is used to compute the
-  # number of buckets as 1 << FLAGS.bitdepth, meaning the number of
-  # buckets can only be a power of 2.
-  # This could be fixed by introducing a new parameter, num_buckets,
-  # which would allow for more flexibility in chosing the right model
-  # size/accuracy tradeoff. But I didn't want to add more parameters
-  # to this script than absolutely necessary.
-  num_buckets = 1 << FLAGS.bitdepth
-  tensor_value_rounded = quantize_array(tensor_value, num_buckets)
-  tensor_shape_list = tensor_util.TensorShapeProtoToList(shape)
-  return [
-      create_constant_node(
-          input_node.name,
-          tensor_value_rounded,
-          dtypes.float32,
-          shape=tensor_shape_list)
-  ]
-
-
-def quantize_weight_eightbit(input_node, quantization_mode):
-  """Returns replacement nodes for input_node using the Dequantize op."""
-  base_name = input_node.name + "_"
-  quint8_const_name = base_name + "quint8_const"
-  min_name = base_name + "min"
-  max_name = base_name + "max"
-  float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
-  min_value = np.min(float_tensor.flatten())
-  max_value = np.max(float_tensor.flatten())
-  # Make sure that the range includes zero.
-  if min_value > 0.0:
-    min_value = 0.0
-  # min_value == max_value is a tricky case. It can occur for general
-  # tensors, and of course for scalars. The quantized ops cannot deal
-  # with this case, so we set max_value to something else.
-  # It's a tricky question what is the numerically best solution to
-  # deal with this degeneracy.
-  # TODO(petewarden): Better use a tolerance than a hard comparison?
-  if min_value == max_value:
-    if abs(min_value) < 0.000001:
-      max_value = min_value + 1.0
-    elif min_value > 0:
-      max_value = 2 * min_value
-    else:
-      max_value = min_value / 2.0
-
-  sess = session.Session()
-  with sess.as_default():
-    quantize_op = array_ops.quantize_v2(
-        float_tensor,
-        min_value,
-        max_value,
-        dtypes.quint8,
-        mode=quantization_mode)
-    quint8_tensor = quantize_op[0].eval()
-  shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"]
-                                             .tensor.tensor_shape)
-  quint8_const_node = create_constant_node(
-      quint8_const_name, quint8_tensor, dtypes.quint8, shape=shape)
-  min_node = create_constant_node(min_name, min_value, dtypes.float32)
-  max_node = create_constant_node(max_name, max_value, dtypes.float32)
-  dequantize_node = create_node("Dequantize", input_node.name,
-                                [quint8_const_name, min_name, max_name])
-  set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-  set_attr_string(dequantize_node, "mode", quantization_mode)
-  return [quint8_const_node, min_node, max_node, dequantize_node]
-
-
-EightbitizeRecursionState = collections.namedtuple(
-    "EightbitizeRecursionState",
-    ["already_visited", "output_node_stack", "merged_with_fake_quant"])
-
-
-class GraphRewriter(object):
-  """Takes a float graph, and rewrites it in quantized form."""
-
-  def __init__(self,
-               input_graph,
-               mode,
-               quantized_input_range,
-               fallback_quantization_range=None):
-    """Sets up the class to rewrite a float graph.
-
-    Args:
-      input_graph: A float graph to transform.
-      mode: A string controlling how quantization is performed -
-        round, quantize, eightbit, or weights.
-      quantized_input_range: if set, assume the input is
-        quantized and represents the range
-        [quantized_input_range[0], quantized_input_range[1]]
-      fallback_quantization_range: if set, then for nodes where the quantization
-        range can't be inferred from the graph, use the range
-        [fallback_quantization_range[0], fallback_quantization_range[1]) instead
-        of using a RequantizationRange node in the graph.
-
-    Raises:
-      ValueError: Two nodes with the same name were found in the graph.
-    """
-    self.input_graph = input_graph
-    self.nodes_map = self.create_nodes_map(input_graph)
-    self.output_graph = None
-    self.mode = mode
-    self.final_node_renames = {}
-    if quantized_input_range:
-      self.input_range = (quantized_input_range[0], quantized_input_range[1])
-      if self.input_range[0] >= self.input_range[1]:
-        raise ValueError("Invalid quantized_input_range: [%s,%s]" %
-                         self.input_range)
-      if self.mode != "eightbit":
-        raise ValueError(
-            "quantized_input_range can only be specified in eightbit mode")
-    else:
-      self.input_range = None
-
-    if fallback_quantization_range:
-      self.fallback_quantization_range = [
-          fallback_quantization_range[0], fallback_quantization_range[1]
-      ]
-      if (self.fallback_quantization_range[0] >=
-          self.fallback_quantization_range[1]):
-        raise ValueError("Invalid fallback_quantization_range: [%s,%s]" %
-                         self.fallback_quantization_range)
-      if self.mode != "eightbit":
-        raise ValueError("fallback_quantization_range can only be "
-                         "specified in eightbit mode")
-    else:
-      self.fallback_quantization_range = None
-
-    # Data that is valid only during the recursive call to rewrite the graph.
-    self.state = None
-
-  def create_nodes_map(self, graph):
-    """Builds a mapping of node names to their defs from the graph."""
-    nodes_map = {}
-    for node in graph.node:
-      if node.name not in nodes_map.keys():
-        nodes_map[node.name] = node
-      else:
-        raise ValueError("Duplicate node names detected.")
-    return nodes_map
-
-  def rewrite(self, output_node_names):
-    """Triggers rewriting of the float graph.
-
-    Args:
-      output_node_names: A list of names of the nodes that produce the final
-        results.
-
-    Returns:
-      A quantized version of the float graph.
-    """
-    self.output_graph = graph_pb2.GraphDef()
-    output_nodes = [
-        self.nodes_map[output_node_name]
-        for output_node_name in output_node_names
-    ]
-    if self.mode == "round":
-      self.already_visited = {}
-      for output_node in output_nodes:
-        self.round_nodes_recursively(output_node)
-    elif self.mode == "quantize":
-      self.already_visited = {}
-      self.already_quantized = {}
-      for output_node in output_nodes:
-        self.quantize_nodes_recursively(output_node)
-    elif self.mode == "eightbit":
-      self.set_input_graph(graph_util.remove_training_nodes(
-          self.input_graph, protected_nodes=output_node_names))
-      output_nodes = [
-          self.nodes_map[output_node_name]
-          for output_node_name in output_node_names
-      ]
-
-      self.state = EightbitizeRecursionState(
-          already_visited={}, output_node_stack=[], merged_with_fake_quant={})
-      for output_node in output_nodes:
-        self.eightbitize_nodes_recursively(output_node)
-      self.state = None
-      if self.input_range:
-        self.add_output_graph_node(
-            create_constant_node("quantized_input_min_value", self.input_range[
-                0], dtypes.float32, []))
-        self.add_output_graph_node(
-            create_constant_node("quantized_input_max_value", self.input_range[
-                1], dtypes.float32, []))
-      if self.fallback_quantization_range:
-        self.add_output_graph_node(
-            create_constant_node("fallback_quantization_min_value",
-                                 self.fallback_quantization_range[0],
-                                 dtypes.float32, []))
-        self.add_output_graph_node(
-            create_constant_node("fallback_quantization_max_value",
-                                 self.fallback_quantization_range[1],
-                                 dtypes.float32, []))
-      if FLAGS.strip_redundant_quantization:
-        self.output_graph = self.remove_redundant_quantization(
-            self.output_graph)
-        self.remove_dead_nodes(output_node_names)
-      self.apply_final_node_renames()
-    elif self.mode == "weights":
-      self.output_graph = self.quantize_weights(self.input_graph,
-                                                b"MIN_COMBINED")
-      self.remove_dead_nodes(output_node_names)
-    elif self.mode == "weights_rounded":
-      self.output_graph = self.quantize_weights(self.input_graph, self.mode)
-      self.remove_dead_nodes(output_node_names)
-    else:
-      print("Bad mode - " + self.mode + ".")
-    return self.output_graph
-
-  def round_nodes_recursively(self, current_node):
-    """The entry point for simple rounding quantization."""
-    if (current_node.name in self.already_visited
-       ) and self.already_visited[current_node.name]:
-      return
-    self.already_visited[current_node.name] = True
-    for input_node_name in current_node.input:
-      input_node_name = node_name_from_input(input_node_name)
-      input_node = self.nodes_map[input_node_name]
-      self.round_nodes_recursively(input_node)
-    nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"]
-    if any(current_node.op in s for s in nodes_to_quantize):
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      new_node.name = current_node.name + "_original"
-      self.add_output_graph_node(new_node)
-      levels = 1 << FLAGS.bitdepth
-      constant_name = current_node.name + "_round_depth"
-      constant_tensor = constant_op.constant(
-          levels, dtype=dtypes.int32, name=constant_name)
-      constant_node = constant_tensor.op.node_def
-      self.add_output_graph_node(constant_node)
-      quantize_node = node_def_pb2.NodeDef()
-      quantize_node.op = "RoundToSteps"
-      quantize_node.name = current_node.name
-      quantize_node.input.extend([current_node.name + "_original"])
-      quantize_node.input.extend([constant_node.name])
-      self.add_output_graph_node(quantize_node)
-    else:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      self.add_output_graph_node(new_node)
-
-  def quantize_nodes_recursively(self, current_node):
-    """The entry point for quantizing nodes to eight bit and back."""
-    if self.already_visited[current_node.name]:
-      return
-    self.already_visited[current_node.name] = True
-    for input_node_name in current_node.input:
-      input_node_name = node_name_from_input(input_node_name)
-      input_node = self.nodes_map[input_node_name]
-      self.quantize_nodes_recursively(input_node)
-    nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"]
-    if any(current_node.op in s for s in nodes_to_quantize):
-      for input_name in current_node.input:
-        input_name = node_name_from_input(input_name)
-        input_node = self.nodes_map[input_name]
-        self.quantize_node(input_node)
-      self.quantize_node(current_node)
-    else:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      self.add_output_graph_node(new_node)
-
-  def quantize_node(self, input_node):
-    """Handles quantizing a single node."""
-    input_name = input_node.name
-    if input_name in self.already_quantized:
-      return
-    self.already_quantized[input_name] = True
-    original_input_name = input_name + "_original"
-    reshape_name = input_name + "_reshape"
-    reshape_dims_name = input_name + "_reshape_dims"
-    max_name = input_name + "_max"
-    min_name = input_name + "_min"
-    dims_name = input_name + "_dims"
-    quantize_name = input_name + "_quantize"
-    dequantize_name = input_name
-    original_input_node = node_def_pb2.NodeDef()
-    original_input_node.CopyFrom(input_node)
-    original_input_node.name = original_input_name
-    self.add_output_graph_node(original_input_node)
-    reshape_dims_node = create_constant_node(reshape_dims_name, -1,
-                                             dtypes.int32, [1])
-    self.add_output_graph_node(reshape_dims_node)
-    reshape_node = create_node("Reshape", reshape_name,
-                               [original_input_name, reshape_dims_name])
-    set_attr_dtype(reshape_node, "T", dtypes.float32)
-    self.add_output_graph_node(reshape_node)
-    dims_node = create_constant_node(dims_name, 0, dtypes.int32, [1])
-    self.add_output_graph_node(dims_node)
-    max_node = create_node("Max", max_name, [reshape_name, dims_name])
-    set_attr_dtype(max_node, "T", dtypes.float32)
-    set_attr_bool(max_node, "keep_dims", False)
-    self.add_output_graph_node(max_node)
-    min_node = create_node("Min", min_name, [reshape_name, dims_name])
-    set_attr_dtype(min_node, "T", dtypes.float32)
-    set_attr_bool(min_node, "keep_dims", False)
-    self.add_output_graph_node(min_node)
-    quantize_node = create_node("Quantize", quantize_name,
-                                [original_input_name, min_name, max_name])
-    set_attr_dtype(quantize_node, "T", dtypes.quint8)
-    set_attr_string(quantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(quantize_node)
-    dequantize_node = create_node("Dequantize", dequantize_name,
-                                  [quantize_name, min_name, max_name])
-    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-    set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(dequantize_node)
-
-  def should_merge_with_fake_quant_node(self):
-    """Should the current node merge with self.state.output_node_stack[-1]?"""
-    if not self.state.output_node_stack:
-      return False
-    top = self.state.output_node_stack[-1]
-    return top[1] == 0 and top[0].op in ["FakeQuantWithMinMaxVars"]
-
-  def should_quantize_const(self, node):
-    if not self.state.output_node_stack:
-      return False
-    top = self.state.output_node_stack[-1]
-    if not top[2]:
-      return False
-    dtype = dtypes.as_dtype(node.attr["dtype"].type)
-    assert dtype == dtypes.float32, (
-        "Failed to quantized constant %s of type %s" % (node.name, dtype))
-    return True
-
-  def eightbitize_nodes_recursively(self, current_node):
-    """The entry point for transforming a graph into full eight bit."""
-    if current_node.name in self.state.already_visited:
-      if (self.should_merge_with_fake_quant_node() or
-          current_node.name in self.state.merged_with_fake_quant):
-        raise ValueError("Unsupported graph structure: output of node %s "
-                         "is processed by a FakeQuant* node and should have "
-                         "no other outputs.", current_node.name)
-      return
-    self.state.already_visited[current_node.name] = True
-
-    for i, input_node_name in enumerate(current_node.input):
-      quantize_input = False
-      if current_node.op in ("MatMul", "Conv2D", "BiasAdd", "MaxPool",
-                             "AvgPool", "Relu", "Relu6",
-                             "BatchNormWithGlobalNormalization"):
-        quantize_input = True
-      elif current_node.op == "Concat" and i > 0:
-        quantize_input = (
-            dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32)
-      elif current_node.op == "Reshape" and i == 0:
-        quantize_input = (
-            dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32)
-
-      self.state.output_node_stack.append((current_node, i, quantize_input))
-
-      input_node_name = node_name_from_input(input_node_name)
-      input_node = self.nodes_map[input_node_name]
-      self.eightbitize_nodes_recursively(input_node)
-
-      self.state.output_node_stack.pop()
-
-    if current_node.op == "MatMul":
-      self.eightbitize_mat_mul_node(current_node)
-    elif current_node.op == "Conv2D":
-      self.eightbitize_conv_node(current_node)
-    elif current_node.op == "BiasAdd":
-      self.eightbitize_bias_add_node(current_node)
-    elif current_node.op == "MaxPool" or current_node.op == "AvgPool":
-      self.eightbitize_single_input_tensor_node(current_node,
-                                                self.add_pool_function)
-    elif current_node.op == "Relu" or current_node.op == "Relu6":
-      self.eightbitize_single_input_tensor_node(current_node,
-                                                self.add_relu_function)
-    elif (current_node.op == "Concat" and
-          dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32):
-      self.eightbitize_concat_node(current_node)
-    elif current_node.op == "BatchNormWithGlobalNormalization":
-      self.eightbitize_batch_norm_node(current_node)
-    elif (current_node.op == "Reshape" and
-          dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32):
-      self.eightbitize_reshape_node(current_node)
-    elif (self.input_range and
-          current_node.op in ("Placeholder", "PlaceholderV2")):
-      self.eightbitize_placeholder_node(current_node)
-    elif current_node.op == "FakeQuantWithMinMaxVars":
-      # It will have been merged into the underlying node.
-      pass
-    elif current_node.op == "Const":
-      if self.should_quantize_const(current_node):
-        for n in quantize_weight_eightbit(current_node, b"MIN_FIRST"):
-          self.add_output_graph_node(n)
-      else:
-        new_node = node_def_pb2.NodeDef()
-        new_node.CopyFrom(current_node)
-        self.add_output_graph_node(new_node)
-
-    ###################################################################
-    # Note: if more cases are added here, you may need to update the op
-    # name lists in the loop over children at the start of the function.
-    ###################################################################
-    else:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      self.add_output_graph_node(new_node)
-
-    if (self.should_merge_with_fake_quant_node() and
-        current_node.name not in self.state.merged_with_fake_quant):
-      raise ValueError(
-          "FakeQuant* node %s failed to merge with node %s of type %s" %
-          (self.state.output_node_stack[-1][0], current_node.name,
-           current_node.op))
-
-  def add_eightbit_prologue_nodes(self, original_node):
-    """Adds input conversion nodes to handle quantizing the underlying node."""
-    namespace_prefix = original_node.name + "_eightbit"
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    input_names = []
-    min_max_names = []
-    for original_input_name in original_node.input:
-      quantize_input_name, min_input_name, max_input_name = (
-          self.eightbitize_input_to_node(namespace_prefix, original_input_name,
-                                         reshape_dims_name,
-                                         reduction_dims_name))
-      input_names.append(quantize_input_name)
-      min_max_names.append(min_input_name)
-      min_max_names.append(max_input_name)
-    all_input_names = []
-    all_input_names.extend(input_names)
-    all_input_names.extend(min_max_names)
-    return all_input_names
-
-  def add_common_quantization_nodes(self, namespace_prefix):
-    """Builds constant nodes needed for quantization of inputs."""
-    reshape_dims_name = namespace_prefix + "_reshape_dims"
-    reduction_dims_name = namespace_prefix + "_reduction_dims"
-
-    reshape_dims_node = create_constant_node(reshape_dims_name, -1,
-                                             dtypes.int32, [1])
-    self.add_output_graph_node(reshape_dims_node)
-    reduction_dims_node = create_constant_node(reduction_dims_name, 0,
-                                               dtypes.int32, [1])
-    self.add_output_graph_node(reduction_dims_node)
-    return reshape_dims_name, reduction_dims_name
-
-  def eightbitize_input_to_node(self, namespace_prefix, original_input_name,
-                                reshape_dims_name, reduction_dims_name):
-    """Takes one float input to an op, and converts it to quantized form."""
-    unique_input_name = unique_node_name_from_input(original_input_name)
-    reshape_input_name = namespace_prefix + "_reshape_" + unique_input_name
-    min_input_name = namespace_prefix + "_min_" + unique_input_name
-    max_input_name = namespace_prefix + "_max_" + unique_input_name
-    quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name
-    reshape_input_node = create_node("Reshape", reshape_input_name,
-                                     [original_input_name, reshape_dims_name])
-    set_attr_dtype(reshape_input_node, "T", dtypes.float32)
-    self.add_output_graph_node(reshape_input_node)
-    min_input_node = create_node("Min", min_input_name,
-                                 [reshape_input_name, reduction_dims_name])
-    set_attr_dtype(min_input_node, "T", dtypes.float32)
-    set_attr_bool(min_input_node, "keep_dims", False)
-    self.add_output_graph_node(min_input_node)
-    max_input_node = create_node("Max", max_input_name,
-                                 [reshape_input_name, reduction_dims_name])
-    set_attr_dtype(max_input_node, "T", dtypes.float32)
-    set_attr_bool(max_input_node, "keep_dims", False)
-    self.add_output_graph_node(max_input_node)
-    quantize_input_node = create_node(
-        "QuantizeV2", quantize_input_name,
-        [original_input_name, min_input_name, max_input_name])
-    set_attr_dtype(quantize_input_node, "T", dtypes.quint8)
-    set_attr_string(quantize_input_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(quantize_input_node)
-    min_output_name = quantize_input_name + ":1"
-    max_output_name = quantize_input_name + ":2"
-    return quantize_input_name, min_output_name, max_output_name
-
-  def add_quantize_down_nodes(self, original_node, quantized_output_name):
-    quantized_outputs = [
-        quantized_output_name, quantized_output_name + ":1",
-        quantized_output_name + ":2"
-    ]
-    min_max_inputs = None
-    if self.should_merge_with_fake_quant_node():
-      # Use the inputs to the FakeQuantWithMinMaxVars node as the inputs to
-      # Requantize.
-      fake_quant_node = self.state.output_node_stack[-1][0]
-      min_max_inputs = [fake_quant_node.input[1], fake_quant_node.input[2]]
-      assert original_node.name not in self.state.merged_with_fake_quant
-      self.state.merged_with_fake_quant[original_node.name] = True
-    elif self.fallback_quantization_range:
-      min_max_inputs = [
-          "fallback_quantization_min_value:0",
-          "fallback_quantization_max_value:0"
-      ]
-    else:
-      # Add a RequantizationRange node for finding the min and max values.
-      requant_range_node = create_node(
-          "RequantizationRange", original_node.name + "_eightbit_requant_range",
-          quantized_outputs)
-      set_attr_dtype(requant_range_node, "Tinput", dtypes.qint32)
-      self.add_output_graph_node(requant_range_node)
-      min_max_inputs = [
-          requant_range_node.name + ":0", requant_range_node.name + ":1"
-      ]
-    requantize_node = create_node("Requantize",
-                                  original_node.name + "_eightbit_requantize",
-                                  quantized_outputs + min_max_inputs)
-    set_attr_dtype(requantize_node, "Tinput", dtypes.qint32)
-    set_attr_dtype(requantize_node, "out_type", dtypes.quint8)
-    self.add_output_graph_node(requantize_node)
-    return requantize_node.name
-
-  def add_dequantize_result_node(self,
-                                 quantized_output_name,
-                                 original_node_name,
-                                 min_tensor_index=1):
-    min_max_inputs = [
-        "%s:%s" % (quantized_output_name, min_tensor_index),
-        "%s:%s" % (quantized_output_name, (min_tensor_index + 1))
-    ]
-    dequantize_name = original_node_name
-    if self.should_merge_with_fake_quant_node():
-      fake_quant_node = self.state.output_node_stack[-1][0]
-      if original_node_name not in self.state.merged_with_fake_quant:
-        min_max_inputs = [fake_quant_node.input[1], fake_quant_node.input[2]]
-        self.state.merged_with_fake_quant[original_node_name] = True
-      dequantize_name = fake_quant_node.name
-
-    dequantize_node = create_node(
-        "Dequantize", dequantize_name,
-        [quantized_output_name, min_max_inputs[0], min_max_inputs[1]])
-    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-    set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(dequantize_node)
-
-  def eightbitize_mat_mul_node(self, original_node):
-    """Replaces a MatMul node with the eight bit equivalent sub-graph."""
-    quantized_mat_mul_name = original_node.name + "_eightbit_quantized_mat_mul"
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_mat_mul_node = create_node("QuantizedMatMul",
-                                         quantized_mat_mul_name,
-                                         all_input_names)
-    set_attr_dtype(quantized_mat_mul_node, "T1", dtypes.quint8)
-    set_attr_dtype(quantized_mat_mul_node, "T2", dtypes.quint8)
-    set_attr_dtype(quantized_mat_mul_node, "Toutput", dtypes.qint32)
-    copy_attr(quantized_mat_mul_node, "transpose_a",
-              original_node.attr["transpose_a"])
-    copy_attr(quantized_mat_mul_node, "transpose_b",
-              original_node.attr["transpose_b"])
-    self.add_output_graph_node(quantized_mat_mul_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_mat_mul_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def eightbitize_conv_node(self, original_node):
-    """Replaces a Conv2D node with the eight bit equivalent sub-graph."""
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
-    quantized_conv_node = create_node("QuantizedConv2D", quantized_conv_name,
-                                      all_input_names)
-    copy_attr(quantized_conv_node, "strides", original_node.attr["strides"])
-    copy_attr(quantized_conv_node, "padding", original_node.attr["padding"])
-    set_attr_dtype(quantized_conv_node, "Tinput", dtypes.quint8)
-    set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.quint8)
-    set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32)
-    self.add_output_graph_node(quantized_conv_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_conv_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def eightbitize_bias_add_node(self, original_node):
-    """Replaces a BiasAdd node with the eight bit equivalent sub-graph."""
-    quantized_bias_add_name = (
-        original_node.name + "_eightbit_quantized_bias_add")
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_bias_add_node = create_node("QuantizedBiasAdd",
-                                          quantized_bias_add_name,
-                                          all_input_names)
-    set_attr_dtype(quantized_bias_add_node, "T1", dtypes.quint8)
-    set_attr_dtype(quantized_bias_add_node, "T2", dtypes.quint8)
-    set_attr_dtype(quantized_bias_add_node, "out_type", dtypes.qint32)
-    self.add_output_graph_node(quantized_bias_add_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_bias_add_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def eightbitize_single_input_tensor_node(self, original_node,
-                                           add_op_function):
-    """Replaces a single-tensor node with the eight bit equivalent sub-graph.
-
-    Converts a node like this:
-
-       Shape(f)   Input(f)
-         |          |
-         +--------v v
-                Operation
-                    |
-                    v
-                   (f)
-
-     Into a quantized equivalent:
-
-                    Input(f)              ReshapeDims
-                       +------v v-------------+
-                       |    Reshape
-                       |      |
-                       |      |          ReductionDims
-                       |      +-----+         |
-                       |      | +---c---------+
-                       |      v v   v v-------+
-                       |      Min   Max
-                       |  +----+      |
-                       v  v  v--------+
-                      Quantize
-                          |
-                          v
-                   QuantizedOperation
-                      |   |   |
-                      v   v   v
-                      Dequantize
-                          |
-                          v
-                         (f)
-
-
-    Args:
-      original_node: Float node to be converted.
-      add_op_function: Function to create the actual node.
-
-    Returns:
-      Subgraph representing the quantized version of the original node.
-
-    """
-    quantized_op_name = original_node.name + "_eightbit_quantized"
-    quantized_op_type = "Quantized" + original_node.op
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_op_node = create_node(quantized_op_type, quantized_op_name,
-                                    all_input_names)
-    add_op_function(original_node, quantized_op_node)
-    self.add_output_graph_node(quantized_op_node)
-    self.add_dequantize_result_node(quantized_op_name, original_node.name)
-
-  def add_pool_function(self, original_node, quantized_op_node):
-    set_attr_dtype(quantized_op_node, "T", dtypes.quint8)
-    copy_attr(quantized_op_node, "ksize", original_node.attr["ksize"])
-    copy_attr(quantized_op_node, "strides", original_node.attr["strides"])
-    copy_attr(quantized_op_node, "padding", original_node.attr["padding"])
-
-  def add_relu_function(self, unused_arg_node, quantized_op_node):
-    set_attr_dtype(quantized_op_node, "Tinput", dtypes.quint8)
-
-  def eightbitize_concat_node(self, original_node):
-    """Replaces a Concat node with the eight bit equivalent sub-graph.
-
-    Converts a node like this:
-
-       Shape(f)   Input0(f)   Input1(f)
-         |          |            |
-         +--------v v v----------+
-                  Concat
-                    |
-                    v
-                   (f)
-
-     Into a quantized equivalent:
-
-       Shape(f)     Input0(f)             ReshapeDims                  Input1(f)
-         |             +------v v--------------+------------------v v------+
-         |             |    Reshape                             Reshape    |
-         |             |      |                                     |      |
-         |             |      |           ReductionDims             |      |
-         |             |      +------+         |           +--------+      |
-         |             |      |  +---c---------+-----------c-----+  |      |
-         |             |      +v v   v v-------+---------v v     v v+      |
-         |             |       Min   Max                 Min     Max       |
-         |             |  +----+      |                   |       +-----+  |
-         |             v  v  v--------+                   +----------v  v  v
-         |            Quantize                                       Quantize
-         |                +------------------+   +----------------------+
-         +-------------------------------+   |   |
-                                         v   v   v
-                                      QuantizedConcat
-                                         |   |   |
-                                         v   v   v
-                                        Dequantize
-                                             |
-                                             v
-                                            (f)
-    Args:
-      original_node: Float node to be converted.
-
-    Returns:
-      Subgraph representing the quantized version of the original node.
-
-    """
-    namespace_prefix = original_node.name + "_eightbit"
-    quantized_concat_name = namespace_prefix + "_quantized_concat"
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    shape_input_name = original_node.input[0]
-    original_inputs = original_node.input[1:]
-    input_names = []
-    min_names = []
-    max_names = []
-    for original_input_name in original_inputs:
-      quantize_input_name, min_input_name, max_input_name = (
-          self.eightbitize_input_to_node(namespace_prefix, original_input_name,
-                                         reshape_dims_name,
-                                         reduction_dims_name))
-      input_names.append(quantize_input_name)
-      min_names.append(min_input_name)
-      max_names.append(max_input_name)
-    all_input_names = [shape_input_name]
-    all_input_names.extend(input_names)
-    all_input_names.extend(min_names)
-    all_input_names.extend(max_names)
-    quantized_concat_node = create_node("QuantizedConcat",
-                                        quantized_concat_name, all_input_names)
-    set_attr_int(quantized_concat_node, "N", len(original_inputs))
-    set_attr_dtype(quantized_concat_node, "T", dtypes.quint8)
-    self.add_output_graph_node(quantized_concat_node)
-    self.add_dequantize_result_node(quantized_concat_name, original_node.name)
-
-  def eightbitize_placeholder_node(self, current_node):
-    """Replaces a placeholder node with a quint8 placeholder node+dequantize."""
-    name = current_node.name
-
-    # Convert the placeholder into a quantized type.
-    output_node = node_def_pb2.NodeDef()
-    output_node.CopyFrom(current_node)
-    set_attr_dtype(output_node, "dtype", dtypes.quint8)
-    output_node.name += "_original_input"
-    self.add_output_graph_node(output_node)
-
-    # Add a dequantize to convert back to float.
-    dequantize_node = create_node("Dequantize", name, [
-        output_node.name, "quantized_input_min_value",
-        "quantized_input_max_value"
-    ])
-    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-    set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(dequantize_node)
-
-    # For the descent over the graph to work, the dequantize node must be named
-    # current_node.name.  However, for the feeding of the graph to work, the
-    # placeholder must have the name current_node.name; so record a final set
-    # of renames to apply after all processing has been done.
-    self.final_node_renames[output_node.name] = name
-    self.final_node_renames[dequantize_node.name] = name + "_dequantize"
-
-  def eightbitize_reshape_node(self, original_node):
-    """Replaces a Reshape node with the eight bit equivalent sub-graph.
-
-    Args:
-      original_node: Float node to be converted.
-
-    Returns:
-      Subgraph representing the quantized version of the original node.
-
-    """
-    namespace_prefix = original_node.name + "_eightbit"
-    quantized_reshape_name = namespace_prefix + "_quantized_reshape"
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    shape_input_name = original_node.input[1]
-    quantize_input_name, min_input_name, max_input_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_node.input[0],
-                                       reshape_dims_name, reduction_dims_name))
-    quantized_reshape_node = create_node(
-        "QuantizedReshape", quantized_reshape_name,
-        [quantize_input_name, shape_input_name, min_input_name, max_input_name])
-    set_attr_dtype(quantized_reshape_node, "T", dtypes.quint8)
-    self.add_output_graph_node(quantized_reshape_node)
-    self.add_dequantize_result_node(quantized_reshape_name, original_node.name)
-
-  def eightbitize_batch_norm_node(self, original_node):
-    """Replaces a MatMul node with the eight bit equivalent sub-graph."""
-    namespace_prefix = original_node.name + "_eightbit"
-    original_input_name = original_node.input[0]
-    original_mean_name = original_node.input[1]
-    original_variance_name = original_node.input[2]
-    original_beta_name = original_node.input[3]
-    original_gamma_name = original_node.input[4]
-    quantized_batch_norm_name = namespace_prefix + "_quantized_batch_norm"
-
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    quantize_input_name, min_input_name, max_input_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_input_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_mean_name, min_mean_name, max_mean_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_mean_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_variance_name, min_variance_name, max_variance_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_variance_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_beta_name, min_beta_name, max_beta_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_beta_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_gamma_name, min_gamma_name, max_gamma_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_gamma_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantized_batch_norm_node = create_node(
-        "QuantizedBatchNormWithGlobalNormalization", quantized_batch_norm_name,
-        [
-            quantize_input_name, min_input_name, max_input_name,
-            quantize_mean_name, min_mean_name, max_mean_name,
-            quantize_variance_name, min_variance_name, max_variance_name,
-            quantize_beta_name, min_beta_name, max_beta_name,
-            quantize_gamma_name, min_gamma_name, max_gamma_name
-        ])
-    set_attr_dtype(quantized_batch_norm_node, "Tinput", dtypes.quint8)
-    set_attr_dtype(quantized_batch_norm_node, "out_type", dtypes.qint32)
-    copy_attr(quantized_batch_norm_node, "scale_after_normalization",
-              original_node.attr["scale_after_normalization"])
-    copy_attr(quantized_batch_norm_node, "variance_epsilon",
-              original_node.attr["variance_epsilon"])
-    self.add_output_graph_node(quantized_batch_norm_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_batch_norm_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def add_output_graph_node(self, output_node):
-    """Inserts one node into the new graph."""
-    self.output_graph.node.extend([output_node])
-
-  def remove_redundant_quantization(self, old_graph):
-    """Removes unneeded pairs of quantize/dequantize ops from the graph.
-
-    This is a bit of a tricky function, because it's attempting to spot the
-    pattern of dequantizing from eight-bit up to float, and then immediately
-    quantizing back down to eight bits again, that's introduced by previous
-    passes that do 'key-hole' conversions of individual nodes but have to
-    convert back to float to match the previous output interface, since they
-    don't know that the next op can handle quantized tensors.
-    It works by:
-     - Looking for Quantize nodes.
-     - Checking to see if their first input is a Dequantize node.
-     - Seeing if their min/max inputs come from Min/Max nodes.
-     - Making sure those Min/Max nodes are being fed from the same Dequantize.
-     - Or that the Min is indirectly being fed from the same Dequantize as Max.
-     - Making sure the Dequantize is going through a Reshape (which we add
-       during the previous pass when we create the quantize sub-graph).
-     - Looking for the dims Const op for the Min/Max dims.
-    If all of these conditions are met, then it's a sub-graph pattern that
-    we know how to optimize out (and is likely the common one we've introduced).
-    We then rewire the graph to skip it entirely, and then rely on the dead node
-    removal pass to get rid of any nodes that are no longer needed.
-
-    Args:
-      old_graph: The model we'll be stripping redundant nodes from.
-
-    Returns:
-      A graph with the unnecessary nodes removed.
-
-    Raises:
-      ValueError: Two nodes with the same name were found in the graph.
-    """
-    old_nodes_map = self.create_nodes_map(old_graph)
-    self.output_graph = graph_pb2.GraphDef()
-    inputs_to_rename = {}
-    # We go through all the nodes, looking for any that match the patterns we
-    # know how to optimize away.
-    for node in old_graph.node:
-      # We always start with a Quantize node, and examine its inputs to see if
-      # they are in a form that can be removed.
-      if node.op not in ["Quantize", "QuantizeV2"]:
-        continue
-      dequantize_node_name = node_name_from_input(node.input[0])
-      if dequantize_node_name not in old_nodes_map:
-        raise ValueError("Input node name '" + dequantize_node_name +
-                         "' not found in node '" + node.name + "'")
-      dequantize_node = old_nodes_map[dequantize_node_name]
-      # Do we have a Dequantize feeding in, with the same type as the Quantize?
-      if dequantize_node.op != "Dequantize":
-        continue
-      if node.attr["T"] != dequantize_node.attr["T"]:
-        continue
-      # Now look at the other inputs, and ensure they're Min/Max nodes.
-      min_node_name = node_name_from_input(node.input[1])
-      max_node_name = node_name_from_input(node.input[2])
-      min_node = old_nodes_map[min_node_name]
-      max_node = old_nodes_map[max_node_name]
-      is_min_right_type = (min_node.op in ["Min", "Dequantize"])
-      is_max_right_type = (max_node.op in ["Max", "Dequantize"])
-      if not is_min_right_type or not is_max_right_type:
-        print("Didn't find expected types on inputs : %s, %s." % (min_node.op,
-                                                                  max_node.op))
-        continue
-      min_node_input_name = node_name_from_input(min_node.input[0])
-      max_node_input_name = node_name_from_input(max_node.input[0])
-      # There are two different patterns for Min nodes we can recognize, one
-      # where the input comes directly from the same one as the Max, and
-      # another where we run it through another Min first, so check for both.
-      is_same_input = False
-      if min_node_input_name == max_node_input_name:
-        is_same_input = True
-      else:
-        first_min_node_input = old_nodes_map[min_node_input_name]
-        if first_min_node_input.op == "Concat":
-          second_min_node_name = node_name_from_input(
-              first_min_node_input.input[1])
-          second_min_node = old_nodes_map[second_min_node_name]
-          if second_min_node.op == "Min":
-            second_min_node_input_name = node_name_from_input(
-                second_min_node.input[0])
-            is_same_input = (second_min_node_input_name == max_node_input_name)
-      if not is_same_input:
-        print("Different min/max inputs: " + min_node_input_name)
-        continue
-      # We recognize this pattern, so mark the graph edges to be rewired to
-      # route around it entirely, since we know it's a no-op.
-      dequantize_source_name = node_name_from_input(dequantize_node.input[0])
-      node_tensor_name = ensure_tensor_name_has_port(node.name)
-      min_tensor_name = node.name + ":1"
-      max_tensor_name = node.name + ":2"
-      inputs_to_rename[node_tensor_name] = dequantize_source_name
-      inputs_to_rename[min_tensor_name] = dequantize_node.input[1]
-      inputs_to_rename[max_tensor_name] = dequantize_node.input[2]
-    # Finally we apply all the rewiring we've marked to the graph.
-    for node in old_graph.node:
-      for index, input_full_name in enumerate(node.input):
-        input_name = ensure_tensor_name_has_port(input_full_name)
-        if input_name in inputs_to_rename:
-          node.input[index] = inputs_to_rename[input_name]
-      self.add_output_graph_node(node)
-    return self.output_graph
-
-  def apply_final_node_renames(self):
-    """Applies node renames in self.final_node_renames to self.output_graph."""
-    old_graph = self.output_graph
-    self.output_graph = graph_pb2.GraphDef()
-    for node in old_graph.node:
-      node.name = self.final_node_renames.get(node.name, node.name)
-      for index, input_name in enumerate(node.input):
-        node_name = node_name_from_input(input_name)
-        input_full_name = ensure_tensor_name_has_port(input_name)
-        if node_name in self.final_node_renames:
-          node.input[index] = "%s%s" % (self.final_node_renames[node_name],
-                                        input_full_name[len(node_name):])
-      self.add_output_graph_node(node)
-    return self.output_graph
-
-  def remove_dead_nodes(self, output_names):
-    """Removes nodes that are no longer needed for inference from the graph."""
-    old_output_graph = self.output_graph
-    self.output_graph = graph_util.extract_sub_graph(old_output_graph,
-                                                     output_names)
-
-  def quantize_weights(self, input_graph, quantization_mode):
-    """Quantize float Const ops.
-
-    There are two modes of operations, both replace float Const ops with
-    quantized values.
-    1. If quantization_mode is "weights_rounded", this function replaces float
-    Const ops with quantized float Const ops - same as the original op, but
-    float values being mapped to the center of one of 1<<FLAGS.bitdepth buckets.
-    This does not change the raw model size, but compression algorithms such as
-    zip (as used for compressing apks) or bzip2 will achieve a very good
-    compression ratio.
-    2. For other quantization modes ("MIN_COMBINED" or "MIN_FIRST"), float
-    Const ops are quantized and replaced by a tuple of four ops to perform
-    the dequantization at runtime:
-    * eight-bit Const (bucket indices, same shape as original float Const op
-    * two float Const ops (min and max value of original float Const op)
-    * Dequantize op to convert the eight-bit consts to float tensors.
-    The quantization mode is important because we see accuracy problems when
-    quantizing weights for different situations depending on the algorithm
-    used. We haven't figured out exactly what the underlying cause is yet,
-    unfortunately.
-
-    Args:
-      input_graph: A GraphDef of the model containing float Const ops.
-      quantization_mode: How to quantize and dequantize the values.
-
-    Returns:
-      A GraphDef of the converted graph.
-
-    Raises:
-      ValueError: If quantization_mode is unsupported.
-    """
-    output_graph = graph_pb2.GraphDef()
-    for input_node in input_graph.node:
-      should_quantize = False
-      if input_node.op == "Const":
-        dtype = dtypes.as_dtype(input_node.attr["dtype"].type)
-        if dtype == dtypes.float32:
-          should_quantize = True
-      if should_quantize:
-        if quantization_mode == "weights_rounded":
-          output_graph.node.extend(quantize_weight_rounded(input_node))
-        elif quantization_mode in (b"MIN_COMBINED", b"MIN_FIRST"):
-          output_graph.node.extend(
-              quantize_weight_eightbit(input_node, quantization_mode))
-        else:
-          raise ValueError("Unsupported quantization mode %s." %
-                           quantization_mode)
-      else:
-        output_node = node_def_pb2.NodeDef()
-        output_node.CopyFrom(input_node)
-        output_graph.node.extend([output_node])
-    return output_graph
-
-  def set_input_graph(self, new_input_graph):
-    self.input_graph = new_input_graph
-    self.nodes_map = self.create_nodes_map(self.input_graph)
-
-
-def main(unused_args):
-  if not gfile.Exists(FLAGS.input):
-    print("Input graph file '" + FLAGS.input + "' does not exist!")
-    return -1
-
-  known_modes = [
-      "round", "quantize", "eightbit", "weights", "test", "weights_rounded"
-  ]
-  if not any(FLAGS.mode in s for s in known_modes):
-    print("mode is '" + FLAGS.mode + "', not in " + ", ".join(known_modes) +
-          ".")
-    return -1
-
-  tf_graph = graph_pb2.GraphDef()
-  with gfile.Open(FLAGS.input, "rb") as f:
-    data = f.read()
-    tf_graph.ParseFromString(data)
-
-  graph = ops.Graph()
-  with graph.as_default():
-    importer.import_graph_def(tf_graph, input_map={}, name="")
-
-  quantized_input_range = None
-  if FLAGS.quantized_input:
-    quantized_input_range = [
-        FLAGS.quantized_input_min, FLAGS.quantized_input_max
-    ]
-
-  fallback_quantization_range = None
-  if (FLAGS.quantized_fallback_min is not None or
-      FLAGS.quantized_fallback_max is not None):
-    assert FLAGS.quantized_fallback_min is not None
-    assert FLAGS.quantized_fallback_max is not None
-    fallback_quantization_range = [
-        FLAGS.quantized_fallback_min, FLAGS.quantized_fallback_max
-    ]
-
-  rewriter = GraphRewriter(tf_graph, FLAGS.mode, quantized_input_range,
-                           fallback_quantization_range)
-
-  output_graph = rewriter.rewrite(FLAGS.output_node_names.split(","))
-
-  f = gfile.FastGFile(FLAGS.output, "wb")
-  f.write(output_graph.SerializeToString())
-
-  return 0
-
-
-if __name__ == "__main__":
-  app.run()
diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/tools/quantization/quantize_graph_test.py
deleted file mode 100644
index 92bb5127da..0000000000
--- a/tensorflow/tools/quantization/quantize_graph_test.py
+++ /dev/null
@@ -1,966 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests the graph quantization script.
-
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import numpy as np
-
-from tensorflow.core.framework import graph_pb2
-from tensorflow.python.client import session
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import graph_util
-from tensorflow.python.framework import importer
-from tensorflow.python.framework import ops as ops_lib
-from tensorflow.python.platform import flags as flags_lib
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging
-from tensorflow.tools.quantization import quantize_graph
-
-flags = flags_lib
-FLAGS = flags.FLAGS
-
-
-def run_graph_def(graph_def, input_map, outputs):
-  graph = ops_lib.Graph()
-  with graph.as_default():
-    importer.import_graph_def(graph_def, input_map={}, name="")
-  with session.Session(graph=graph) as sess:
-    results = sess.run(outputs, feed_dict=input_map)
-  return results
-
-
-def test_mat_mul(m, n, k, a, b):
-  """Tests a MatMul replacement."""
-  a_constant_name = "a_constant"
-  b_constant_name = "b_constant"
-  mat_mul_name = "mat_mul"
-
-  float_graph_def = graph_pb2.GraphDef()
-  a_constant = quantize_graph.create_constant_node(
-      a_constant_name, value=a, dtype=dtypes.float32, shape=[m, k])
-  float_graph_def.node.extend([a_constant])
-  b_constant = quantize_graph.create_constant_node(
-      b_constant_name, value=b, dtype=dtypes.float32, shape=[k, n])
-  float_graph_def.node.extend([b_constant])
-  mat_mul_node = quantize_graph.create_node("MatMul", mat_mul_name,
-                                            [a_constant_name, b_constant_name])
-  quantize_graph.set_attr_dtype(mat_mul_node, "T", dtypes.float32)
-  quantize_graph.set_attr_bool(mat_mul_node, "transpose_a", False)
-  quantize_graph.set_attr_bool(mat_mul_node, "transpose_b", False)
-  float_graph_def.node.extend([mat_mul_node])
-
-  test_graph(float_graph_def, {}, [mat_mul_name])
-
-
-def test_conv(depth, image_width, image_height, image_batch_count, filter_size,
-              filter_count, stride, padding, input_values, filter_values):
-  """Tests a Conv replacement."""
-  input_constant_name = "input_constant"
-  filter_constant_name = "filter_constant"
-  conv_name = "conv"
-
-  float_graph_def = graph_pb2.GraphDef()
-  input_constant = quantize_graph.create_constant_node(
-      input_constant_name,
-      value=input_values,
-      dtype=dtypes.float32,
-      shape=[image_batch_count, image_height, image_width, depth])
-  float_graph_def.node.extend([input_constant])
-  filter_constant = quantize_graph.create_constant_node(
-      filter_constant_name,
-      value=filter_values,
-      dtype=dtypes.float32,
-      shape=[filter_size, filter_size, depth, filter_count])
-  float_graph_def.node.extend([filter_constant])
-  conv_node = quantize_graph.create_node(
-      "Conv2D", conv_name, [input_constant_name, filter_constant_name])
-  quantize_graph.set_attr_dtype(conv_node, "T", dtypes.float32)
-  quantize_graph.set_attr_int_list(conv_node, "strides", [1, stride, stride, 1])
-  quantize_graph.set_attr_string(conv_node, "padding", padding)
-  float_graph_def.node.extend([conv_node])
-
-  test_graph(float_graph_def, {}, [conv_name])
-
-
-def are_tensors_near(a, b, tolerance):
-  """Tests whether two tensors are nearly identical.
-
-  This is a specialized comparison function designed to help debug problems with
-  quantization. It prints out information about the differences between tensors
-  on failure, paying special attention to possible biases by looking at the mean
-  and absolute average errors.
-
-  Args:
-    a: First comparison tensor.
-    b: Second comparison tensor.
-    tolerance: Float value indicating how large an error between values is ok.
-
-  Returns:
-    Boolean indicating whether the two inputs were close enough.
-  """
-  flat_a = a.flatten()
-  flat_b = b.flatten()
-  if len(flat_a) != len(flat_b):
-    tf_logging.info("Tensors are different sizes: " + str(len(flat_a)) + " vs "
-                    + str(len(flat_b)))
-    return False
-  value_count = len(flat_a)
-  how_many_different = 0
-  total_difference = 0
-  total_abs_difference = 0
-  for index in range(value_count):
-    a_value = flat_a[index]
-    b_value = flat_b[index]
-    difference = a_value - b_value
-    total_difference += difference
-    total_abs_difference += abs(difference)
-    if abs(difference) > tolerance:
-      how_many_different += 1
-  mean_difference = total_difference / value_count
-  mean_abs_difference = total_abs_difference / value_count
-  proportion_different = (how_many_different * 1.0) / value_count
-  if how_many_different == 0:
-    return True
-  else:
-    tf_logging.info("Tensors have {0} different values ({1}%), with mean"
-                    " difference {2} and mean absolute difference {3}".format(
-                        how_many_different, proportion_different * 100,
-                        mean_difference, mean_abs_difference))
-    return False
-
-
-def get_top_value(input_values):
-  max_value = None
-  max_index = None
-  for index, value in enumerate(input_values.flatten()):
-    if max_value is None or value > max:
-      max_value = value
-      max_index = index
-  return max_index, max_value
-
-
-def test_graph(float_graph_def, input_map, output_names, log_graph=False):
-  """Runs the float graph through the rewriter and tests the results."""
-  float_results = run_graph_def(
-      float_graph_def, input_map,
-      [output_name + ":0" for output_name in output_names])
-  # TODO(petewarden): round test is currently failing because there is no
-  # RoundToSteps op available.
-  # round_rewriter = quantize_graph.GraphRewriter(float_graph_def, "round")
-  # round_graph_def = round_rewriter.rewrite(output_name)
-  # round_results = run_graph_def(round_graph_def, input_map,
-  #                               [output_name + ":0"])
-  # assert are_tensors_near(expected, round_results[0], 1.0)
-  #
-  # TODO(petewarden): Add test for "quantize" mode.
-
-  eightbit_rewriter = quantize_graph.GraphRewriter(
-      float_graph_def, "eightbit", quantized_input_range=None)
-  eightbit_graph_def = eightbit_rewriter.rewrite(output_names)
-  eightbit_results = run_graph_def(
-      eightbit_graph_def, input_map,
-      [output_name + ":0" for output_name in output_names])
-  for expected, result in zip(float_results, eightbit_results):
-    assert are_tensors_near(expected, result, 1.0)
-
-  if log_graph:
-    tf_logging.info("8bit:\n%s", str(eightbit_graph_def))
-
-  # Test the weights_rounded mode. This uses the default bit_depth.
-  weights_rounded_rewriter = quantize_graph.GraphRewriter(
-      float_graph_def, "weights_rounded", quantized_input_range=None)
-  weights_rounded_graph_def = weights_rounded_rewriter.rewrite(output_names)
-  weights_rounded_results = run_graph_def(
-      weights_rounded_graph_def, input_map,
-      [output_name + ":0" for output_name in output_names])
-  for expected, result in zip(float_results, weights_rounded_results):
-    assert are_tensors_near(expected, result, 1.0)
-
-
-class QuantizeGraphTest(test.TestCase):
-
-  def test_negative_const_problem(self):
-    shape_constant_name = "shape_constant"
-    shape_constant = quantize_graph.create_constant_node(
-        shape_constant_name, value=-0.8, dtype=dtypes.float32, shape=[1])
-    quantization_result = quantize_graph.quantize_weight_eightbit(
-        shape_constant, b"MIN_COMBINED")
-    self.assertEqual(4, len(quantization_result))
-
-  def test_odd_padding_problem(self):
-    """Tests one error case we ran into in a real graph."""
-    test_conv(1, 4, 4, 1, 3, 1, 2, b"SAME",
-              [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
-              [1, 2, 3, 4, 5, 6, 7, 8, 9])
-
-  def test_mat_mul_tiny(self):
-    # These tests are added to test the generate case where
-    # min(matrix) == max(matrix), which used to cause problems.
-    test_mat_mul(1, 1, 1, [2], [3])
-    test_mat_mul(1, 2, 1, [1], [2, 3])
-    test_mat_mul(1, 1, 2, [1, 1], [1, 1])
-    test_mat_mul(1, 1, 2, [0, 0], [1, 1])
-    # The general case.
-    test_mat_mul(1, 1, 2, [1, 2], [1, 2])
-
-  def test_mat_mul_small(self):
-    test_mat_mul(2, 4, 3, [1, 2, 3, 4, 5, 6],
-                 [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
-
-  def test_conv(self):
-    test_conv(1, 4, 3, 1, 3, 1, 1, b"SAME",
-              [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-              [1, 4, 7, 2, 5, 8, 3, 6, 9])
-
-  def test_reshape(self):
-    """Tests that MatMul->Reshape->MatMul avoids extra quantize/dequantize."""
-
-    def make_matmul(name, a, b):
-      n = quantize_graph.create_node("MatMul", name, [a.name, b.name])
-      quantize_graph.set_attr_dtype(n, "T", dtypes.float32)
-      quantize_graph.set_attr_bool(n, "transpose_a", False)
-      quantize_graph.set_attr_bool(n, "transpose_b", False)
-      return n
-
-    # matmul_1 = input*weight_1
-    input_node = quantize_graph.create_constant_node(
-        "input", value=[0, 1, 2, 3], dtype=dtypes.float32, shape=[4, 1])
-    weight_1_node = quantize_graph.create_constant_node(
-        "weight_1",
-        value=[.5, .6, .7, .8, .9],
-        dtype=dtypes.float32,
-        shape=[1, 5])
-    matmul_1_node = make_matmul("matmul_1", input_node, weight_1_node)
-
-    # Reshape 4x5 to 10x2.
-    new_shape_node = quantize_graph.create_constant_node(
-        "new_shape_node", value=[10, 2], dtype=dtypes.int32, shape=[2])
-    reshape_node = quantize_graph.create_node(
-        "Reshape", "reshape", [matmul_1_node.name, new_shape_node.name])
-    quantize_graph.set_attr_dtype(reshape_node, "T", dtypes.float32)
-
-    # matmul_2_node = reshape*weight_2
-    weight_2_node = quantize_graph.create_constant_node(
-        "weight_2", value=[1.5, 2.5], dtype=dtypes.float32, shape=[2, 1])
-    matmul_2_node = make_matmul("matmul_2", reshape_node, weight_2_node)
-
-    g = graph_pb2.GraphDef()
-    g.node.extend([
-        input_node, weight_1_node, matmul_1_node, new_shape_node, reshape_node,
-        weight_2_node, matmul_2_node
-    ])
-
-    # Test the graph
-    test_graph(g, {}, ["matmul_2"])
-
-    # Verify there is only one Quantize and one Requantize op.
-    eightbit_rewriter = quantize_graph.GraphRewriter(
-        g, "eightbit", quantized_input_range=None)
-    eightbit_graph_def = eightbit_rewriter.rewrite(["matmul_2"])
-
-    ops = [node.op for node in eightbit_graph_def.node]
-    # No quantize since all inputs are const and can be quantized up-front.
-    self.assertEqual(0, ops.count("QuantizeV2") + ops.count("Quantize"))
-    self.assertEqual(1, ops.count("QuantizedReshape"))
-
-    # One dequantize at the end.
-    self.assertEqual(1, ops.count("Dequantize"))
-
-  def test_quantize_array(self):
-    # Test invalid parameters (empty array, or 0 buckets.
-    self.assertRaises(ValueError, quantize_graph.quantize_array, np.array([]),
-                      2)
-    self.assertRaises(ValueError, quantize_graph.quantize_array,
-                      np.array([1, 2]), 0)
-    # Test input array of length 1.
-    arr = np.array([1])
-    qarr = quantize_graph.quantize_array(arr, 1)
-    self.assertEqual(arr, qarr)
-    qarr = quantize_graph.quantize_array(arr, 2)
-    self.assertEqual(arr, qarr)
-    # Test input array with all elements equal.
-    arr = np.array([1, 1, 1])
-    qarr = quantize_graph.quantize_array(arr, 10)
-    self.assertTrue((np.array([1, 1, 1]) == qarr).all())
-    # Test "normal" input arrays.
-    arr = np.array([0, 0.3, 0.6, 1])
-    qarr = quantize_graph.quantize_array(arr, 1)
-    self.assertTrue((np.array([0.5, 0.5, 0.5, 0.5]) == qarr).all())
-    qarr = quantize_graph.quantize_array(arr, 2)
-    self.assertTrue((np.array([0.25, 0.25, 0.75, 0.75]) == qarr).all())
-    qarr = quantize_graph.quantize_array(arr.reshape((2, 2)), 2)
-    self.assertTrue((np.array([[0.25, 0.25], [0.75, 0.75]]) == qarr).all())
-
-  def test_non_float_concat(self):
-    concat_dim = quantize_graph.create_constant_node(
-        "concat_dim", value=0, dtype=dtypes.int32, shape=[])
-    a = quantize_graph.create_constant_node(
-        "a",
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.int32,
-        shape=[2, 2, 3])
-    b = quantize_graph.create_constant_node(
-        "b",
-        value=[13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
-        dtype=dtypes.int32,
-        shape=[2, 2, 3])
-    concat = quantize_graph.create_node("Concat", "concat",
-                                        [concat_dim.name, a.name, b.name])
-    quantize_graph.set_attr_int(concat, "N", 2)
-    quantize_graph.set_attr_dtype(concat, "T", dtypes.int32)
-
-    g = graph_pb2.GraphDef()
-    g.node.extend([concat_dim, a, b, concat])
-    test_graph(g, {}, [concat.name])
-
-  def test_non_float_reshape(self):
-    a = quantize_graph.create_constant_node(
-        "a",
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.int32,
-        shape=[2, 2, 3])
-    shape = quantize_graph.create_constant_node(
-        "shape", value=[12], dtype=dtypes.int32, shape=[1])
-    reshape = quantize_graph.create_node("Reshape", "reshape",
-                                         [a.name, shape.name])
-    quantize_graph.set_attr_dtype(reshape, "T", dtypes.int32)
-
-    g = graph_pb2.GraphDef()
-    g.node.extend([a, shape, reshape])
-    test_graph(g, {}, [reshape.name])
-
-  def test_concat(self):
-    shape_constant_name = "shape_constant"
-    a_constant_name = "a_constant"
-    b_constant_name = "b_constant"
-    concat_name = "concat"
-
-    float_graph_def = graph_pb2.GraphDef()
-    shape_constant = quantize_graph.create_constant_node(
-        shape_constant_name, value=0, dtype=dtypes.int32, shape=[])
-    float_graph_def.node.extend([shape_constant])
-    a_constant = quantize_graph.create_constant_node(
-        a_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[2, 2, 3])
-    float_graph_def.node.extend([a_constant])
-    b_constant = quantize_graph.create_constant_node(
-        b_constant_name,
-        value=[13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
-        dtype=dtypes.float32,
-        shape=[2, 2, 3])
-    float_graph_def.node.extend([b_constant])
-    concat_node = quantize_graph.create_node(
-        "Concat", concat_name,
-        [shape_constant_name, a_constant_name, b_constant_name])
-    quantize_graph.set_attr_int(concat_node, "N", 2)
-    quantize_graph.set_attr_dtype(concat_node, "T", dtypes.float32)
-    float_graph_def.node.extend([concat_node])
-
-    test_graph(float_graph_def, {}, [concat_name])
-
-    # Verify the concat is quantized.
-    eightbit_rewriter = quantize_graph.GraphRewriter(
-        float_graph_def, "eightbit", quantized_input_range=None)
-    eightbit_graph_def = eightbit_rewriter.rewrite([concat_name])
-
-    ops = [node.op for node in eightbit_graph_def.node]
-    self.assertEqual(1, ops.count("QuantizedConcat"))
-
-  def test_multiple_outputs(self):
-    input_constant_name = "input_constant"
-    split_constant_name = "split_constant"
-    split_name = "split"
-    concat_constant_name = "concat_constant"
-    concat_name = "concat"
-
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[2, 6])
-    float_graph_def.node.extend([input_constant])
-    split_constant = quantize_graph.create_constant_node(
-        split_constant_name, value=1, dtype=dtypes.int32, shape=[])
-    float_graph_def.node.extend([split_constant])
-    split_node = quantize_graph.create_node(
-        "Split", split_name, [split_constant_name, input_constant_name])
-    quantize_graph.set_attr_int(split_node, "num_split", 2)
-    quantize_graph.set_attr_dtype(split_node, "T", dtypes.float32)
-    float_graph_def.node.extend([split_node])
-    concat_constant = quantize_graph.create_constant_node(
-        concat_constant_name, value=1, dtype=dtypes.int32, shape=[])
-    float_graph_def.node.extend([concat_constant])
-    concat_node = quantize_graph.create_node(
-        "Concat", concat_name,
-        [concat_constant_name, split_name + ":0", split_name + ":1"])
-    quantize_graph.set_attr_int(concat_node, "N", 2)
-    quantize_graph.set_attr_dtype(concat_node, "T", dtypes.float32)
-    float_graph_def.node.extend([concat_node])
-
-    test_graph(float_graph_def, {}, [concat_name])
-
-  def test_node_name_from_input(self):
-    self.assertEqual("SomeName",
-                     quantize_graph.node_name_from_input("^SomeName:2"))
-
-  def test_unique_node_name_from_input(self):
-    self.assertEqual("__hat__SomeName__port__2",
-                     quantize_graph.unique_node_name_from_input("^SomeName:2"))
-
-  def test_identity(self):
-    input_constant_name = "input_constant"
-    identity_name = "identity"
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[2, 6])
-    float_graph_def.node.extend([input_constant])
-    identity_node = quantize_graph.create_node("Identity", identity_name,
-                                               [input_constant_name])
-    quantize_graph.set_attr_dtype(identity_node, "T", dtypes.float32)
-    float_graph_def.node.extend([identity_node])
-
-    mul_name = "mul"
-    mul_node = quantize_graph.create_node("Mul", mul_name,
-                                          [identity_name, identity_name])
-    quantize_graph.set_attr_dtype(mul_node, "T", dtypes.float32)
-    float_graph_def.node.extend([mul_node])
-
-    test_graph(float_graph_def, {}, [mul_name])
-
-  def test_keep_control_edges(self):
-    no_op_name = "no_op"
-    a_constant_name = "a_constant"
-    b_constant_name = "b_constant"
-    a_check_name = "a_check"
-    b_check_name = "b_check"
-    a_identity_name = "a_identity"
-    b_identity_name = "b_identity"
-    add_name = "add"
-    graph_def = graph_pb2.GraphDef()
-    no_op = quantize_graph.create_node("NoOp", no_op_name, [])
-    graph_def.node.extend([no_op])
-    a_constant = quantize_graph.create_constant_node(
-        a_constant_name, value=1, dtype=dtypes.float32, shape=[])
-    graph_def.node.extend([a_constant])
-    a_check_node = quantize_graph.create_node("CheckNumerics", a_check_name,
-                                              [a_constant_name])
-    graph_def.node.extend([a_check_node])
-    a_identity_node = quantize_graph.create_node(
-        "Identity", a_identity_name,
-        [a_constant_name, "^" + a_check_name, "^" + no_op_name])
-    graph_def.node.extend([a_identity_node])
-    b_constant = quantize_graph.create_constant_node(
-        b_constant_name, value=1, dtype=dtypes.float32, shape=[])
-    graph_def.node.extend([b_constant])
-    b_check_node = quantize_graph.create_node("CheckNumerics", b_check_name,
-                                              [b_constant_name])
-    graph_def.node.extend([b_check_node])
-    b_identity_node = quantize_graph.create_node(
-        "Identity", b_identity_name, [b_constant_name, "^" + b_check_name])
-    graph_def.node.extend([b_identity_node])
-    add_node = quantize_graph.create_node("Add", add_name,
-                                          [a_identity_name, b_identity_name])
-    quantize_graph.set_attr_dtype(add_node, "T", dtypes.float32)
-    graph_def.node.extend([add_node])
-
-    expected_output = graph_pb2.GraphDef()
-    no_op = quantize_graph.create_node("NoOp", no_op_name, [])
-    expected_output.node.extend([no_op])
-    a_constant = quantize_graph.create_constant_node(
-        a_constant_name, value=1, dtype=dtypes.float32, shape=[])
-    expected_output.node.extend([a_constant])
-    a_identity_node = quantize_graph.create_node(
-        "Identity", a_identity_name, [a_constant_name, "^" + no_op_name])
-    expected_output.node.extend([a_identity_node])
-    b_constant = quantize_graph.create_constant_node(
-        b_constant_name, value=1, dtype=dtypes.float32, shape=[])
-    expected_output.node.extend([b_constant])
-    add_node = quantize_graph.create_node("Add", add_name,
-                                          [a_identity_name, b_constant_name])
-    quantize_graph.set_attr_dtype(add_node, "T", dtypes.float32)
-    expected_output.node.extend([add_node])
-    expected_output.versions.CopyFrom(graph_def.versions)
-    expected_output.library.CopyFrom(graph_def.library)
-
-    output = graph_util.remove_training_nodes(graph_def)
-    stripped_output = graph_util.extract_sub_graph(output, [add_name])
-    self.assertProtoEquals(expected_output, stripped_output)
-
-  def test_batch_norm(self):
-    input_constant_name = "input_constant"
-    mean_constant_name = "mean_constant"
-    variance_constant_name = "variance_constant"
-    beta_constant_name = "beta_constant"
-    gamma_constant_name = "gamma_constant"
-    batch_norm_name = "batch_norm"
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6],
-        dtype=dtypes.float32,
-        shape=[1, 1, 6, 2])
-    float_graph_def.node.extend([input_constant])
-    mean_constant = quantize_graph.create_constant_node(
-        mean_constant_name, value=[10, 20], dtype=dtypes.float32, shape=[2])
-    float_graph_def.node.extend([mean_constant])
-    variance_constant = quantize_graph.create_constant_node(
-        variance_constant_name,
-        value=[0.25, 0.5],
-        dtype=dtypes.float32,
-        shape=[2])
-    float_graph_def.node.extend([variance_constant])
-    beta_constant = quantize_graph.create_constant_node(
-        beta_constant_name, value=[0.1, 0.6], dtype=dtypes.float32, shape=[2])
-    float_graph_def.node.extend([beta_constant])
-    gamma_constant = quantize_graph.create_constant_node(
-        gamma_constant_name, value=[0, 0], dtype=dtypes.float32, shape=[2])
-    float_graph_def.node.extend([gamma_constant])
-    batch_norm_node = quantize_graph.create_node(
-        "BatchNormWithGlobalNormalization", batch_norm_name, [
-            input_constant_name, mean_constant_name, variance_constant_name,
-            beta_constant_name, gamma_constant_name
-        ])
-    quantize_graph.set_attr_dtype(batch_norm_node, "T", dtypes.float32)
-    quantize_graph.set_attr_bool(batch_norm_node, "scale_after_normalization",
-                                 False)
-    quantize_graph.set_attr_float(batch_norm_node, "variance_epsilon", 0.001)
-    float_graph_def.node.extend([batch_norm_node])
-    test_graph(float_graph_def, {}, [batch_norm_name])
-
-  def test_max_pool(self):
-    input_constant_name = "input_constant"
-    max_pool_name = "max_pool"
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[1, 2, 6, 1])
-    float_graph_def.node.extend([input_constant])
-    max_pool_node = quantize_graph.create_node("MaxPool", max_pool_name,
-                                               [input_constant_name])
-    quantize_graph.set_attr_int_list(max_pool_node, "ksize", [1, 2, 2, 1])
-    quantize_graph.set_attr_int_list(max_pool_node, "strides", [1, 1, 1, 1])
-    quantize_graph.set_attr_string(max_pool_node, "padding", b"SAME")
-    float_graph_def.node.extend([max_pool_node])
-    test_graph(float_graph_def, {}, [max_pool_name])
-
-  def test_avg_pool(self):
-    input_constant_name = "input_constant"
-    avg_pool_name = "avg_pool"
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[1, 2, 6, 1])
-    float_graph_def.node.extend([input_constant])
-    avg_pool_node = quantize_graph.create_node("AvgPool", avg_pool_name,
-                                               [input_constant_name])
-    quantize_graph.set_attr_dtype(avg_pool_node, "T", dtypes.float32)
-    quantize_graph.set_attr_int_list(avg_pool_node, "ksize", [1, 2, 2, 1])
-    quantize_graph.set_attr_int_list(avg_pool_node, "strides", [1, 1, 1, 1])
-    quantize_graph.set_attr_string(avg_pool_node, "padding", b"SAME")
-    float_graph_def.node.extend([avg_pool_node])
-    test_graph(float_graph_def, {}, [avg_pool_name])
-
-  def test_relu(self):
-    input_constant_name = "input_constant"
-    relu_name = "relu"
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[1, 2, 6, 1])
-    float_graph_def.node.extend([input_constant])
-    relu_node = quantize_graph.create_node("Relu", relu_name,
-                                           [input_constant_name])
-    quantize_graph.set_attr_dtype(relu_node, "T", dtypes.float32)
-    float_graph_def.node.extend([relu_node])
-    test_graph(float_graph_def, {}, [relu_name])
-
-  def test_relu_w_fake_quant_w_min_max_vars(self):
-    input_node = quantize_graph.create_constant_node(
-        "input",
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[1, 2, 6, 1])
-    relu_node = quantize_graph.create_node("Relu", "relu", [input_node.name])
-    quantize_graph.set_attr_dtype(relu_node, "T", dtypes.float32)
-
-    min_node = quantize_graph.create_constant_node(
-        "min_bias_add", value=0, dtype=dtypes.float32, shape=[])
-    max_node = quantize_graph.create_constant_node(
-        "max_bias_add", value=12, dtype=dtypes.float32, shape=[])
-    fake_quant_node = quantize_graph.create_node(
-        "FakeQuantWithMinMaxVars", "fake_quant",
-        [relu_node.name, min_node.name, max_node.name])
-
-    float_graph_def = graph_pb2.GraphDef()
-    float_graph_def.node.extend(
-        [input_node, relu_node, min_node, max_node, fake_quant_node])
-    test_graph(float_graph_def, {}, [fake_quant_node.name], log_graph=True)
-
-    # Verify there is only one Quantize and one Requantize op.
-    eightbit_rewriter = quantize_graph.GraphRewriter(
-        float_graph_def, "eightbit", quantized_input_range=None)
-    eightbit_graph_def = eightbit_rewriter.rewrite([fake_quant_node.name])
-
-    ops = [node.op for node in eightbit_graph_def.node]
-    # No quantize since all inputs are const and can be quantized up-front.
-    self.assertEqual(0, ops.count("QuantizeV2") + ops.count("Quantize"))
-
-    # One dequantize at the end.
-    self.assertEqual(1, ops.count("Dequantize"))
-
-  def test_relu6(self):
-    input_constant_name = "input_constant"
-    relu6_name = "relu6"
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[1, 2, 6, 1])
-    float_graph_def.node.extend([input_constant])
-    relu6_node = quantize_graph.create_node("Relu6", relu6_name,
-                                            [input_constant_name])
-    quantize_graph.set_attr_dtype(relu6_node, "T", dtypes.float32)
-    float_graph_def.node.extend([relu6_node])
-    test_graph(float_graph_def, {}, [relu6_name])
-
-  def test_bias_add(self):
-    input_constant_name = "input_constant"
-    offset_constant_name = "offset_constant"
-    bias_add_name = "bias_add"
-    float_graph_def = graph_pb2.GraphDef()
-    input_constant = quantize_graph.create_constant_node(
-        input_constant_name,
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        dtype=dtypes.float32,
-        shape=[1, 1, 2, 6])
-    float_graph_def.node.extend([input_constant])
-    offset_constant = quantize_graph.create_constant_node(
-        offset_constant_name,
-        value=[1, 2, 3, 4, 5, 6],
-        dtype=dtypes.float32,
-        shape=[6])
-    float_graph_def.node.extend([offset_constant])
-    bias_add_node = quantize_graph.create_node(
-        "BiasAdd", bias_add_name, [input_constant_name, offset_constant_name])
-    quantize_graph.set_attr_dtype(bias_add_node, "T", dtypes.float32)
-    float_graph_def.node.extend([bias_add_node])
-    test_graph(float_graph_def, {}, [bias_add_name])
-
-  def test_quantized_input_range_errors(self):
-    with self.assertRaises(ValueError):
-      # Invalid mode.
-      quantize_graph.GraphRewriter(graph_pb2.GraphDef(), "weights_rounded",
-                                   [0, 1])
-    with self.assertRaises(ValueError):
-      # Invalid range.
-      quantize_graph.GraphRewriter(graph_pb2.GraphDef(), "eightbit", [0, -1])
-
-  def test_quantized_input_range_bias_add(self):
-    input_shape = [1, 1, 2, 6]
-    input_n = quantize_graph.create_node("Placeholder", "input", [])
-    quantize_graph.set_attr_dtype(input_n, "dtype", dtypes.float32)
-    quantize_graph.set_attr_shape(input_n, "shape", input_shape)
-    offset_n = quantize_graph.create_constant_node(
-        "offset", value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6])
-    bias_add_n = quantize_graph.create_node("BiasAdd", "bias_add",
-                                            [input_n.name, offset_n.name])
-    quantize_graph.set_attr_dtype(bias_add_n, "T", dtypes.float32)
-
-    float_graph_def = graph_pb2.GraphDef()
-    float_graph_def.node.extend([input_n, offset_n, bias_add_n])
-
-    input_map = {
-        input_n.name + ":0":
-            np.reshape([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], input_shape)
-    }
-    self._RunTestsForQuantizedInputRange(float_graph_def, input_map,
-                                         [bias_add_n.name], [-1, 20.])
-    self._RunTestsForQuantizedInputRange(float_graph_def, input_map,
-                                         [bias_add_n.name], [0, 12.])
-
-  def test_quantized_input_range_mat_mul(self):
-    shapes = [[3, 2], [2, 4]]
-    inputs = []
-    for i, shape in enumerate(shapes):
-      node = quantize_graph.create_node("Placeholder", "input_%s" % i, [])
-      quantize_graph.set_attr_dtype(node, "dtype", dtypes.float32)
-      quantize_graph.set_attr_shape(node, "shape", shape)
-      inputs.append(node)
-    mat_mul_node = quantize_graph.create_node("MatMul", "mat_mul",
-                                              [n.name for n in inputs])
-    quantize_graph.set_attr_dtype(mat_mul_node, "T", dtypes.float32)
-
-    float_graph_def = graph_pb2.GraphDef()
-    float_graph_def.node.extend(inputs + [mat_mul_node])
-
-    input_map = {
-        inputs[0].name + ":0":
-            np.reshape([1, 2, 3, 4, 5, 6], shapes[0]),
-        inputs[1].name + ":0":
-            np.reshape([.8, .7, .6, .5, .4, .3, .2, .1], shapes[1])
-    }
-    self._RunTestsForQuantizedInputRange(float_graph_def, input_map,
-                                         [mat_mul_node.name], [-1, 20.])
-    self._RunTestsForQuantizedInputRange(float_graph_def, input_map,
-                                         [mat_mul_node.name], [0, 6.])
-
-  def _RunTestsForQuantizedInputRange(self, float_graph_def, input_map,
-                                      output_names, input_range):
-    if sys.version_info[0] == 3:
-      # uint8->quint8 conversion for numpy is not working currently.
-      return
-
-    quantized_input_map = {}
-    for k, v in input_map.items():
-      arr = [
-          int(
-              round((n - input_range[0]) * 255 / (input_range[1] - input_range[
-                  0]))) for n in v.flat
-      ]
-      arr = np.array(arr, np.uint8)
-      arr = arr.reshape(v.shape)
-      arr = arr.astype(dtypes.quint8.as_numpy_dtype)
-      quantized_input_map[k] = arr
-    output_tensors = [output_name + ":0" for output_name in output_names]
-    float_results = run_graph_def(float_graph_def, input_map, output_tensors)
-
-    # Quantize treating the input as quantized in range <input_range>.
-    rewriter = quantize_graph.GraphRewriter(float_graph_def, "eightbit",
-                                            input_range)
-    graph_def = rewriter.rewrite(output_names)
-    results = run_graph_def(graph_def, quantized_input_map, output_tensors)
-    for expected, result in zip(float_results, results):
-      assert are_tensors_near(expected, result, .5)
-    ops = [node.op for node in graph_def.node]
-    self.assertEqual(0, ops.count("QuantizeV2") + ops.count("Quantize"))
-    self.assertEqual(len(output_names), ops.count("Dequantize"))
-
-    # Quantize without treating input as quantized.
-    rewriter = quantize_graph.GraphRewriter(
-        float_graph_def, "eightbit", quantized_input_range=None)
-    graph_def = rewriter.rewrite(output_names)
-    results = run_graph_def(graph_def, input_map, output_tensors)
-    for expected, result in zip(float_results, results):
-      assert are_tensors_near(expected, result, .5)
-    ops = [node.op for node in graph_def.node]
-    self.assertEqual(
-        len(input_map), ops.count("QuantizeV2") + ops.count("Quantize"))
-    self.assertEqual(len(output_names), ops.count("Dequantize"))
-
-  def test_bias_add_w_fake_quant_w_min_max_vars(self):
-    input_node = quantize_graph.create_constant_node(
-        "input",
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-        dtype=dtypes.float32,
-        shape=[1, 1, 2, 5])
-    offset_node = quantize_graph.create_constant_node(
-        "offset", value=[1, 2, 3, 4, 5], dtype=dtypes.float32, shape=[5])
-    bias_add_node = quantize_graph.create_node(
-        "BiasAdd", "bias_add", [input_node.name, offset_node.name])
-    quantize_graph.set_attr_dtype(bias_add_node, "T", dtypes.float32)
-
-    min_node = quantize_graph.create_constant_node(
-        "min_bias_add", value=-.5, dtype=dtypes.float32, shape=[])
-    max_node = quantize_graph.create_constant_node(
-        "max_bias_add", value=15.5, dtype=dtypes.float32, shape=[])
-    fake_quant_node = quantize_graph.create_node(
-        "FakeQuantWithMinMaxVars", "fake_quant",
-        [bias_add_node.name, min_node.name, max_node.name])
-
-    float_graph_def = graph_pb2.GraphDef()
-    float_graph_def.node.extend([
-        input_node, offset_node, bias_add_node, min_node, max_node,
-        fake_quant_node
-    ])
-    test_graph(float_graph_def, {}, [fake_quant_node.name], log_graph=True)
-
-    # Verify there is only one Quantize and one Requantize op.
-    # Pass in fallback_quantization_range, although it will have no effect
-    # because the FakeQuantWithMinMaxVars are used instead.
-    eightbit_rewriter = quantize_graph.GraphRewriter(
-        float_graph_def,
-        "eightbit",
-        quantized_input_range=None,
-        fallback_quantization_range=[-100, 100])
-    eightbit_graph_def = eightbit_rewriter.rewrite([fake_quant_node.name])
-
-    ops = [node.op for node in eightbit_graph_def.node]
-    node_names = [node.name for node in eightbit_graph_def.node]
-    # No quantize since all inputs are const and can be quantized up-front.
-    self.assertEqual(0, ops.count("QuantizeV2") + ops.count("Quantize"))
-
-    # One dequantize at the end.
-    self.assertEqual(1, ops.count("Dequantize"))
-
-    # The fallback constants are not in the graph.
-    self.assertEqual(0, node_names.count("fallback_quantization_min_value"))
-    self.assertEqual(0, node_names.count("fallback_quantization_max_value"))
-
-  def test_bias_add_w_fallback_min_max_vars(self):
-    input_node = quantize_graph.create_constant_node(
-        "input",
-        value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-        dtype=dtypes.float32,
-        shape=[1, 1, 2, 5])
-    offset_node = quantize_graph.create_constant_node(
-        "offset", value=[1, 2, 3, 4, 5], dtype=dtypes.float32, shape=[5])
-    bias_add_node = quantize_graph.create_node(
-        "BiasAdd", "bias_add", [input_node.name, offset_node.name])
-    quantize_graph.set_attr_dtype(bias_add_node, "T", dtypes.float32)
-
-    float_graph_def = graph_pb2.GraphDef()
-    float_graph_def.node.extend([input_node, offset_node, bias_add_node])
-    test_graph(float_graph_def, {}, [bias_add_node.name], log_graph=True)
-
-    # Verify there is only one Quantize, one Requantize op, and no
-    # RequantizationRange op.
-    eightbit_rewriter = quantize_graph.GraphRewriter(
-        float_graph_def,
-        "eightbit",
-        quantized_input_range=None,
-        fallback_quantization_range=[-.5, 15.5])
-    eightbit_graph_def = eightbit_rewriter.rewrite([bias_add_node.name])
-
-    ops = [node.op for node in eightbit_graph_def.node]
-    node_names = [node.name for node in eightbit_graph_def.node]
-    # No quantize since all inputs are const and can be quantized up-front.
-    self.assertEqual(0, ops.count("QuantizeV2") + ops.count("Quantize"))
-
-    # One dequantize at the end.
-    self.assertEqual(1, ops.count("Dequantize"))
-
-    # No RequantizationRange
-    self.assertEqual(0, ops.count("RequantizationRange"))
-
-    # The fallback constants are in the graph.
-    self.assertEqual(1, node_names.count("fallback_quantization_min_value"))
-    self.assertEqual(1, node_names.count("fallback_quantization_max_value"))
-
-  def test_remove_redundant_quantization(self):
-    a_constant_name = "a_constant"
-    a_constant_min_name = "a_constant_min"
-    a_constant_max_name = "a_constant_max"
-    a_dequantize_name = "a_dequantize"
-    a_quantize_name = "a_quantize"
-    b_constant_name = "b_constant"
-    b_constant_min_name = "b_constant_min"
-    b_constant_max_name = "b_constant_max"
-    b_dequantize_name = "b_dequantize"
-    b_quantize_name = "b_quantize"
-    mat_mul_name = "mat_mul"
-    graph_def = graph_pb2.GraphDef()
-    a_constant = quantize_graph.create_constant_node(
-        a_constant_name, value=(0,), dtype=dtypes.quint8, shape=[])
-    graph_def.node.extend([a_constant])
-    a_constant_min = quantize_graph.create_constant_node(
-        a_constant_min_name, value=2, dtype=dtypes.float32, shape=[])
-    graph_def.node.extend([a_constant_min])
-    a_constant_max = quantize_graph.create_constant_node(
-        a_constant_max_name, value=2, dtype=dtypes.float32, shape=[])
-    graph_def.node.extend([a_constant_max])
-    a_dequantize_node = quantize_graph.create_node(
-        "Dequantize", a_dequantize_name,
-        [a_constant_name, a_constant_min_name, a_constant_max_name])
-    quantize_graph.set_attr_dtype(a_dequantize_node, "T", dtypes.uint8)
-    graph_def.node.extend([a_dequantize_node])
-    a_quantize_node = quantize_graph.create_node(
-        "QuantizeV2", a_quantize_name,
-        [a_dequantize_name, a_dequantize_name + ":1", a_dequantize_name + ":2"])
-    quantize_graph.set_attr_dtype(a_quantize_node, "T", dtypes.uint8)
-    graph_def.node.extend([a_quantize_node])
-    b_constant = quantize_graph.create_constant_node(
-        b_constant_name, value=(0,), dtype=dtypes.quint8, shape=[])
-    graph_def.node.extend([b_constant])
-    b_constant_min = quantize_graph.create_constant_node(
-        b_constant_min_name, value=3, dtype=dtypes.float32, shape=[])
-    graph_def.node.extend([b_constant_min])
-    b_constant_max = quantize_graph.create_constant_node(
-        b_constant_max_name, value=3, dtype=dtypes.float32, shape=[])
-    graph_def.node.extend([b_constant_max])
-    b_dequantize_node = quantize_graph.create_node(
-        "Dequantize", b_dequantize_name,
-        [b_constant_name, b_constant_min_name, b_constant_max_name])
-    quantize_graph.set_attr_dtype(b_dequantize_node, "T", dtypes.uint8)
-    graph_def.node.extend([b_dequantize_node])
-    b_quantize_node = quantize_graph.create_node(
-        "QuantizeV2", b_quantize_name,
-        [b_dequantize_name, b_dequantize_name + ":1", b_dequantize_name + ":2"])
-    quantize_graph.set_attr_dtype(b_quantize_node, "T", dtypes.uint8)
-    graph_def.node.extend([b_quantize_node])
-    mat_mul_node = quantize_graph.create_node("QuantizedMatMul", mat_mul_name, [
-        a_quantize_name, b_quantize_name, a_quantize_name + ":1",
-        a_quantize_name + ":2", b_quantize_name + ":1", b_quantize_name + ":2"
-    ])
-    quantize_graph.set_attr_dtype(mat_mul_node, "T1", dtypes.uint8)
-    quantize_graph.set_attr_dtype(mat_mul_node, "T2", dtypes.int32)
-    graph_def.node.extend([mat_mul_node])
-
-    expected_output = graph_pb2.GraphDef()
-    a_constant = quantize_graph.create_constant_node(
-        a_constant_name, value=(0,), dtype=dtypes.quint8, shape=[])
-    expected_output.node.extend([a_constant])
-    a_constant_min = quantize_graph.create_constant_node(
-        a_constant_min_name, value=2, dtype=dtypes.float32, shape=[])
-    expected_output.node.extend([a_constant_min])
-    a_constant_max = quantize_graph.create_constant_node(
-        a_constant_max_name, value=2, dtype=dtypes.float32, shape=[])
-    expected_output.node.extend([a_constant_max])
-    b_constant = quantize_graph.create_constant_node(
-        b_constant_name, value=(0,), dtype=dtypes.quint8, shape=[])
-    expected_output.node.extend([b_constant])
-    b_constant_min = quantize_graph.create_constant_node(
-        b_constant_min_name, value=3, dtype=dtypes.float32, shape=[])
-    expected_output.node.extend([b_constant_min])
-    b_constant_max = quantize_graph.create_constant_node(
-        b_constant_max_name, value=3, dtype=dtypes.float32, shape=[])
-    expected_output.node.extend([b_constant_max])
-    mat_mul_node = quantize_graph.create_node("QuantizedMatMul", mat_mul_name, [
-        a_constant_name, b_constant_name, a_constant_min_name,
-        a_constant_max_name, b_constant_min_name, b_constant_max_name
-    ])
-    quantize_graph.set_attr_dtype(mat_mul_node, "T1", dtypes.uint8)
-    quantize_graph.set_attr_dtype(mat_mul_node, "T2", dtypes.int32)
-    expected_output.node.extend([mat_mul_node])
-    expected_output.versions.CopyFrom(graph_def.versions)
-    expected_output.library.CopyFrom(graph_def.library)
-
-    rewriter = quantize_graph.GraphRewriter(
-        graph_def, [mat_mul_name], quantized_input_range=None)
-    output = rewriter.remove_redundant_quantization(graph_def)
-    stripped_output = graph_util.extract_sub_graph(output, [mat_mul_name])
-    self.assertProtoEquals(expected_output, stripped_output)
-
-
-if __name__ == "__main__":
-  test.main()
-- 
GitLab


From 5bbcdb8a58efd97b0f73927218d5896da67f5203 Mon Sep 17 00:00:00 2001
From: Isaac Burbank <bassmanburbank@gmail.com>
Date: Wed, 26 Sep 2018 11:34:38 -0600
Subject: [PATCH 0737/1357] Update tf_inspect_test.py

Remove subsection of added tests that were problematic.
---
 tensorflow/python/util/tf_inspect_test.py | 78 -----------------------
 1 file changed, 78 deletions(-)

diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py
index 55f88f8fc6..ba9430c756 100644
--- a/tensorflow/python/util/tf_inspect_test.py
+++ b/tensorflow/python/util/tf_inspect_test.py
@@ -353,41 +353,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function))
 
-  def testGetFullArgSpecOnPartialInvalidFullArgSpec(self):
-    """Tests getfullargspec.
-
-    Tests on partial function that doesn't have valid fullargspec.
-    """
-
-    def func(m, n, l, k=4):
-      return 2 * m + l + n * k
-
-    partial_func = functools.partial(func, n=7)
-
-    exception_message = (r"Some arguments \['l'\] do not have default value, "
-                         "but they are positioned after those with default "
-                         "values. This can not be expressed with ArgSpec.")
-    with self.assertRaisesRegexp(ValueError, exception_message):
-      tf_inspect.getfullargspec(partial_func)
-
-  def testGetFullArgSpecOnPartialValidFullArgSpec(self):
-    """Tests getfullargspec on partial function with valid fullargspec."""
-
-    def func(m, n, l, k=4):
-      return 2 * m + l + n * k
-
-    partial_func = functools.partial(func, n=7, l=2)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n', 'l', 'k'],
-        varargs=None,
-        varkw=None,
-        defaults=(7, 2, 4),
-        kwonlyargs=[],
-        kwonlydefaults=None,
-        annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
   def testGetFullArgSpecOnPartialNoArgumentsLeft(self):
     """Tests getfullargspec on partial function that prunes all arguments."""
 
@@ -401,35 +366,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
 
-  def testGetFullArgSpecOnPartialKeywordArgument(self):
-    """Tests getfullargspec on partial function that prunes some arguments."""
-
-    def func(m, n):
-      return 2 * m + n
-
-    partial_func = functools.partial(func, n=7)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n'], varargs=None, varkw=None, defaults=(7,),
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
-  def testGetFullArgSpecOnPartialKeywordArgumentWithDefaultValue(self):
-    """Tests getfullargspec.
-    
-    Tests on partial function that prunes argument by keyword.
-    """
-
-    def func(m=1, n=2):
-      return 2 * m + n
-
-    partial_func = functools.partial(func, n=7)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
   def testGetFullArgSpecOnPartialWithVarargs(self):
     """Tests getfullargspec on partial function with variable arguments."""
 
@@ -459,20 +395,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
 
-  def testGetFullArgSpecOnPartialWithDecorator(self):
-    """Tests getfullargspec on decorated partial function."""
-
-    @test_decorator('decorator')
-    def func(m=1, n=2):
-      return 2 * m + n
-
-    partial_func = functools.partial(func, n=7)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
   def testGetFullArgSpecOnCallableObject(self):
 
     class Callable(object):
-- 
GitLab


From 5cedb0427bd4db4117182da8bc0680dd555b4f49 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Wed, 26 Sep 2018 10:33:45 -0700
Subject: [PATCH 0738/1357] Add checks for dilation_rate.

PiperOrigin-RevId: 214627202
---
 .../lite/kernels/internal/optimized/depthwiseconv_uint8.h       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index f892b8f661..a70545599b 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1701,6 +1701,8 @@ inline void DepthwiseConv(
   const int output_shift = params.output_shift;
   const int dilation_width_factor = params.dilation_width_factor;
   const int dilation_height_factor = params.dilation_height_factor;
+  TFLITE_DCHECK_GE(dilation_width_factor, 1);
+  TFLITE_DCHECK_GE(dilation_height_factor, 1);
   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-- 
GitLab


From cccace731712ab192448ff9bc3ce7259fd462e74 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Wed, 26 Sep 2018 11:18:09 -0700
Subject: [PATCH 0739/1357] Internal change.

PiperOrigin-RevId: 214636032
---
 tensorflow/contrib/lite/build_def.bzl         |  38 +++
 tensorflow/contrib/lite/python/BUILD          |   2 +-
 .../lite/python/convert_saved_model.py        |  12 +-
 tensorflow/contrib/lite/testing/BUILD         |  29 +++
 .../model_coverage/model_coverage_lib.py      | 241 ++++++++++++++++++
 .../model_coverage/model_coverage_lib_test.py | 130 ++++++++++
 6 files changed, 445 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
 create mode 100644 tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index fc4d9b4f17..7f5c6bdc2f 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -391,3 +391,41 @@ def gen_selected_ops(name, model):
               (tool, model, out, tflite_path[2:]),
         tools = [tool],
     )
+
+def gen_full_model_test(conversion_modes, models, data, test_suite_tag):
+    """Generates Python test targets for testing TFLite models.
+
+    Args:
+      conversion_modes: List of conversion modes to test the models on.
+      models: List of models to test.
+      data: List of BUILD targets linking the data.
+      test_suite_tag: Tag identifying the model test suite.
+    """
+    options = [
+        (conversion_mode, model)
+        for model in models
+        for conversion_mode in conversion_modes
+    ]
+
+    for conversion_mode, model_name in options:
+        native.py_test(
+            name = "model_coverage_test_%s_%s" % (model_name, conversion_mode.lower()),
+            srcs = ["model_coverage_test.py"],
+            main = "model_coverage_test.py",
+            args = [
+                "--model_name=%s" % model_name,
+                "--converter_mode=%s" % conversion_mode,
+            ],
+            data = data,
+            srcs_version = "PY2AND3",
+            tags = [
+                "no_oss",
+                "no_windows",
+                "notap",
+            ] + [test_suite_tag],
+            deps = [
+                "//tensorflow/contrib/lite/testing:model_coverage_lib",
+                "//tensorflow/contrib/lite/python:lite",
+                "//tensorflow/python:client_testlib",
+            ],
+        )
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 57e1290e07..916788f215 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -144,7 +144,7 @@ py_library(
     name = "convert_saved_model",
     srcs = ["convert_saved_model.py"],
     srcs_version = "PY2AND3",
-    visibility = ["//visibility:public"],
+    visibility = ["//tensorflow/contrib/lite:__subpackages__"],
     deps = [
         ":convert",
         "//tensorflow/contrib/saved_model:saved_model_py",
diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py
index 1553464b9f..d18b60d0ea 100644
--- a/tensorflow/contrib/lite/python/convert_saved_model.py
+++ b/tensorflow/contrib/lite/python/convert_saved_model.py
@@ -44,7 +44,7 @@ def _log_tensor_details(tensor_info):
                  dtype)
 
 
-def _get_meta_graph_def(saved_model_dir, tag_set):
+def get_meta_graph_def(saved_model_dir, tag_set):
   """Validate saved_model and extract MetaGraphDef.
 
   Args:
@@ -61,7 +61,7 @@ def _get_meta_graph_def(saved_model_dir, tag_set):
     return loader.load(sess, tag_set, saved_model_dir)
 
 
-def _get_signature_def(meta_graph, signature_key):
+def get_signature_def(meta_graph, signature_key):
   """Get the signature def from meta_graph with given signature_key.
 
   Args:
@@ -86,7 +86,7 @@ def _get_signature_def(meta_graph, signature_key):
   return signature_def_map[signature_key]
 
 
-def _get_inputs_outputs(signature_def):
+def get_inputs_outputs(signature_def):
   """Get inputs and outputs from SignatureDef.
 
   Args:
@@ -236,9 +236,9 @@ def freeze_saved_model(saved_model_dir, input_arrays, input_shapes,
       input_arrays or output_arrays are not valid.
   """
   # Read SignatureDef.
-  meta_graph = _get_meta_graph_def(saved_model_dir, tag_set)
-  signature_def = _get_signature_def(meta_graph, signature_key)
-  inputs, outputs = _get_inputs_outputs(signature_def)
+  meta_graph = get_meta_graph_def(saved_model_dir, tag_set)
+  signature_def = get_signature_def(meta_graph, signature_key)
+  inputs, outputs = get_inputs_outputs(signature_def)
 
   # Check SavedModel for assets directory.
   collection_def = meta_graph.collection_def
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index a4736bfee9..55ef1172b2 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -13,6 +13,7 @@ load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite"
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "py_test",
 )
 
 [gen_zip_test(
@@ -362,4 +363,32 @@ cc_binary(
     ],
 )
 
+py_binary(
+    name = "model_coverage_lib",
+    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    visibility = ["//tensorflow/contrib/lite:__subpackages__"],
+    deps = [
+        "//tensorflow/contrib/lite/python:lite",
+        "//tensorflow/python:platform",
+    ],
+)
+
+py_test(
+    name = "model_coverage_lib_test",
+    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+        "notap",
+    ],
+    deps = [
+        ":model_coverage_lib",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 tflite_portable_test_suite()
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
new file mode 100644
index 0000000000..f8ab394c60
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
@@ -0,0 +1,241 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to test TFLite models."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.lite.python import convert_saved_model as _convert_saved_model
+from tensorflow.contrib.lite.python import lite as _lite
+from tensorflow.core.framework import graph_pb2 as _graph_pb2
+from tensorflow.python import keras as _keras
+from tensorflow.python.client import session as _session
+from tensorflow.python.framework.importer import import_graph_def as _import_graph_def
+from tensorflow.python.lib.io import file_io as _file_io
+from tensorflow.python.saved_model import signature_constants as _signature_constants
+from tensorflow.python.saved_model import tag_constants as _tag_constants
+
+
+def _convert(converter, **kwargs):
+  """Converts the model.
+
+  Args:
+    converter: TocoConverter object.
+    **kwargs: Additional arguments to be passed into the converter. Supported
+      flags are {"converter_mode", "post_training_quant"}.
+
+  Returns:
+    The converted TFLite model in serialized format.
+  """
+  if "converter_mode" in kwargs:
+    converter.converter_mode = kwargs["converter_mode"]
+  if "post_training_quantize" in kwargs:
+    converter.post_training_quantize = kwargs["post_training_quantize"]
+  return converter.convert()
+
+
+def _generate_random_input_data(tflite_model, seed=None):
+  """Generates input data based on the input tensors in the TFLite model.
+
+  Args:
+    tflite_model: Serialized TensorFlow Lite model.
+    seed: Integer seed for the random generator. (default None)
+
+  Returns:
+    List of np.ndarray.
+  """
+  interpreter = _lite.Interpreter(model_content=tflite_model)
+  interpreter.allocate_tensors()
+  input_details = interpreter.get_input_details()
+
+  if seed:
+    np.random.seed(seed=seed)
+  return [
+      np.array(
+          np.random.random_sample(input_tensor["shape"]),
+          dtype=input_tensor["dtype"]) for input_tensor in input_details
+  ]
+
+
+def _evaluate_tflite_model(tflite_model, input_data):
+  """Returns evaluation of input data on TFLite model.
+
+  Args:
+    tflite_model: Serialized TensorFlow Lite model.
+    input_data: List of np.ndarray.
+
+  Returns:
+    List of np.ndarray.
+  """
+  interpreter = _lite.Interpreter(model_content=tflite_model)
+  interpreter.allocate_tensors()
+
+  input_details = interpreter.get_input_details()
+  output_details = interpreter.get_output_details()
+
+  for input_tensor, tensor_data in zip(input_details, input_data):
+    interpreter.set_tensor(input_tensor["index"], tensor_data)
+
+  interpreter.invoke()
+  output_data = [
+      interpreter.get_tensor(output_tensor["index"])
+      for output_tensor in output_details
+  ]
+  return output_data
+
+
+def evaluate_frozen_graph(filename, input_arrays, output_arrays):
+  """Returns a function that evaluates the frozen graph on input data.
+
+  Args:
+    filename: Full filepath of file containing frozen GraphDef.
+    input_arrays: List of input tensors to freeze graph with.
+    output_arrays: List of output tensors to freeze graph with.
+
+  Returns:
+    Lambda function ([np.ndarray data] : [np.ndarray result]).
+  """
+  with _session.Session().as_default() as sess:
+    with _file_io.FileIO(filename, "rb") as f:
+      file_content = f.read()
+
+    graph_def = _graph_pb2.GraphDef()
+    graph_def.ParseFromString(file_content)
+    _import_graph_def(graph_def, name="")
+
+    inputs = _convert_saved_model.get_tensors_from_tensor_names(
+        sess.graph, input_arrays)
+    outputs = _convert_saved_model.get_tensors_from_tensor_names(
+        sess.graph, output_arrays)
+
+    return lambda input_data: sess.run(outputs, dict(zip(inputs, input_data)))
+
+
+def evaluate_saved_model(directory, tag_set, signature_key):
+  """Returns a function that evaluates the SavedModel on input data.
+
+  Args:
+    directory: SavedModel directory to convert.
+    tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
+      analyze. All tags in the tag set must be present.
+    signature_key: Key identifying SignatureDef containing inputs and outputs.
+
+  Returns:
+    Lambda function ([np.ndarray data] : [np.ndarray result]).
+  """
+  with _session.Session().as_default() as sess:
+    if tag_set is None:
+      tag_set = set([_tag_constants.SERVING])
+    if signature_key is None:
+      signature_key = _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+    meta_graph = _convert_saved_model.get_meta_graph_def(directory, tag_set)
+    signature_def = _convert_saved_model.get_signature_def(
+        meta_graph, signature_key)
+    inputs, outputs = _convert_saved_model.get_inputs_outputs(signature_def)
+
+    return lambda input_data: sess.run(outputs, dict(zip(inputs, input_data)))
+
+
+def evaluate_keras_model(filename):
+  """Returns a function that evaluates the tf.keras model on input data.
+
+  Args:
+    filename: Full filepath of HDF5 file containing the tf.keras model.
+
+  Returns:
+    Lambda function ([np.ndarray data] : [np.ndarray result]).
+  """
+  keras_model = _keras.models.load_model(filename)
+  return lambda input_data: [keras_model.predict(input_data)]
+
+
+# TODO(nupurgarg): Make this function a parameter to test_frozen_graph (and
+# related functions) in order to make it easy to use different data generators.
+def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
+  """Compares TensorFlow and TFLite models with random data.
+
+  Args:
+    tflite_model: Serialized TensorFlow Lite model.
+    tf_eval_func: Lambda function that takes in input data and outputs the
+      results of the TensorFlow model ([np.ndarray data] : [np.ndarray result]).
+    tolerance: Decimal place to check accuracy to.
+  """
+  input_data = _generate_random_input_data(tflite_model)
+  tf_results = tf_eval_func(input_data)
+  tflite_results = _evaluate_tflite_model(tflite_model, input_data)
+  for tf_result, tflite_result in zip(tf_results, tflite_results):
+    np.testing.assert_almost_equal(tf_result, tflite_result, tolerance)
+
+
+def test_frozen_graph(filename, input_arrays, output_arrays, **kwargs):
+  """Validates the TensorFlow frozen graph converts to a TFLite model.
+
+  Converts the TensorFlow frozen graph to TFLite and checks the accuracy of the
+  model on random data.
+
+  Args:
+    filename: Full filepath of file containing frozen GraphDef.
+    input_arrays: List of input tensors to freeze graph with.
+    output_arrays: List of output tensors to freeze graph with.
+    **kwargs: Additional arguments to be passed into the converter.
+  """
+  converter = _lite.TocoConverter.from_frozen_graph(filename, input_arrays,
+                                                    output_arrays)
+  tflite_model = _convert(converter, **kwargs)
+
+  tf_eval_func = evaluate_frozen_graph(filename, input_arrays, output_arrays)
+  compare_models_random_data(tflite_model, tf_eval_func)
+
+
+def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs):
+  """Validates the TensorFlow SavedModel converts to a TFLite model.
+
+  Converts the TensorFlow SavedModel to TFLite and checks the accuracy of the
+  model on random data.
+
+  Args:
+    directory: SavedModel directory to convert.
+    tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
+      analyze. All tags in the tag set must be present.
+    signature_key: Key identifying SignatureDef containing inputs and outputs.
+    **kwargs: Additional arguments to be passed into the converter.
+  """
+  converter = _lite.TocoConverter.from_saved_model(directory, tag_set,
+                                                   signature_key)
+  tflite_model = _convert(converter, **kwargs)
+
+  tf_eval_func = evaluate_saved_model(directory, tag_set, signature_key)
+  compare_models_random_data(tflite_model, tf_eval_func)
+
+
+def test_keras_model(filename, **kwargs):
+  """Validates the tf.keras model converts to a TFLite model.
+
+  Converts the tf.keras model to TFLite and checks the accuracy of the model on
+  random data.
+
+  Args:
+    filename: Full filepath of HDF5 file containing the tf.keras model.
+    **kwargs: Additional arguments to be passed into the converter.
+  """
+  converter = _lite.TocoConverter.from_keras_model_file(filename)
+  tflite_model = _convert(converter, **kwargs)
+
+  tf_eval_func = evaluate_keras_model(filename)
+  compare_models_random_data(tflite_model, tf_eval_func)
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
new file mode 100644
index 0000000000..5f3355e734
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -0,0 +1,130 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for model_coverage_lib.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+
+from tensorflow.contrib.lite.python import lite
+from tensorflow.contrib.lite.testing.model_coverage import model_coverage_lib as model_coverage
+from tensorflow.python import keras
+from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import saved_model
+from tensorflow.python.training.training_util import write_graph
+
+
+class EvaluateFrozenGraph(test.TestCase):
+
+  def _saveFrozenGraph(self, sess):
+    graph_def_file = os.path.join(self.get_temp_dir(), 'model.pb')
+    write_graph(sess.graph_def, '', graph_def_file, False)
+    return graph_def_file
+
+  def testFloat(self):
+    with session.Session().as_default() as sess:
+      in_tensor = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32)
+      _ = in_tensor + in_tensor
+    filename = self._saveFrozenGraph(sess)
+
+    model_coverage.test_frozen_graph(filename, ['Placeholder'], ['add'])
+
+  def testMultipleOutputs(self):
+    with session.Session().as_default() as sess:
+      in_tensor_1 = array_ops.placeholder(
+          shape=[1, 16], dtype=dtypes.float32, name='inputA')
+      in_tensor_2 = array_ops.placeholder(
+          shape=[1, 16], dtype=dtypes.float32, name='inputB')
+
+      weight = constant_op.constant(-1.0, shape=[16, 16])
+      bias = constant_op.constant(-1.0, shape=[16])
+      layer = math_ops.matmul(in_tensor_1, weight) + bias
+      _ = math_ops.reduce_mean(math_ops.square(layer - in_tensor_2))
+    filename = self._saveFrozenGraph(sess)
+
+    model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'],
+                                     ['add', 'Mean'])
+
+
+class EvaluateSavedModel(test.TestCase):
+
+  def testFloat(self):
+    saved_model_dir = os.path.join(self.get_temp_dir(), 'simple_savedmodel')
+    with session.Session().as_default() as sess:
+      in_tensor_1 = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB')
+      in_tensor_2 = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA')
+      out_tensor = in_tensor_1 + in_tensor_2
+
+      inputs = {'x': in_tensor_1, 'y': in_tensor_2}
+      outputs = {'z': out_tensor}
+      saved_model.simple_save(sess, saved_model_dir, inputs, outputs)
+    model_coverage.test_saved_model(saved_model_dir)
+
+
+class EvaluateKerasModel(test.TestCase):
+
+  def _getSingleInputKerasModel(self):
+    """Returns single input Sequential tf.keras model."""
+    keras.backend.clear_session()
+
+    xs = [-1, 0, 1, 2, 3, 4]
+    ys = [-3, -1, 1, 3, 5, 7]
+
+    model = keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])])
+    model.compile(optimizer='sgd', loss='mean_squared_error')
+    model.train_on_batch(xs, ys)
+    return model
+
+  def _saveKerasModel(self, model):
+    try:
+      fd, keras_file = tempfile.mkstemp('.h5')
+      keras.models.save_model(model, keras_file)
+    finally:
+      os.close(fd)
+    return keras_file
+
+  def testFloat(self):
+    model = self._getSingleInputKerasModel()
+    keras_file = self._saveKerasModel(model)
+
+    model_coverage.test_keras_model(keras_file)
+
+  def testPostTrainingQuantize(self):
+    model = self._getSingleInputKerasModel()
+    keras_file = self._saveKerasModel(model)
+
+    model_coverage.test_keras_model(keras_file, post_training_quantize=True)
+
+  def testConverterMode(self):
+    model = self._getSingleInputKerasModel()
+    keras_file = self._saveKerasModel(model)
+
+    model_coverage.test_keras_model(
+        keras_file, converter_mode=lite.ConverterMode.TOCO_EXTENDED)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From e0b735092ee1ace1ccb0816d4772ff7f7cc4e8a9 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 26 Sep 2018 11:52:20 -0700
Subject: [PATCH 0740/1357] [TF:XLA] Bump open source llvm revision to r343057

PiperOrigin-RevId: 214642860
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 915fee6a1f..108be8533b 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -502,11 +502,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/7167e4d196a50f78abe8af6553c943d50b757a13.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/7167e4d196a50f78abe8af6553c943d50b757a13.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz",
         ],
-        sha256 = "11d933232b27531abc83592fc9f03e7f928e504c7d478eeaba51efa929a3d9df",
-        strip_prefix = "llvm-7167e4d196a50f78abe8af6553c943d50b757a13",
+        sha256 = "a4f8bfe7e3e69069934a87e612a1d4d3b8b6af13e0f1213a42a6046e1bcd50d8",
+        strip_prefix = "llvm-d3429e96fe1e45b1dc0106463832523f37faf271",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
     )
 
-- 
GitLab


From 3da0dff642335c0dff027eb791a7ffc795a63dea Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 26 Sep 2018 11:55:50 -0700
Subject: [PATCH 0741/1357] Remove verbs, gdr and ngraph questions in configure
 script.

PiperOrigin-RevId: 214643700
---
 configure.py   | 18 +++++-------------
 tools/bazel.rc |  8 ++++++++
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/configure.py b/configure.py
index 1064f6a9d4..f42da2cbb1 100644
--- a/configure.py
+++ b/configure.py
@@ -1408,10 +1408,6 @@ def set_other_mpi_vars(environ_cp):
     raise ValueError('Cannot find the MPI library file in %s/lib' % mpi_home)
 
 
-def set_grpc_build_flags():
-  write_to_bazelrc('build --define grpc_no_ares=true')
-
-
 def set_system_libs_flag(environ_cp):
   syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
   if syslibs and syslibs != '':
@@ -1498,8 +1494,6 @@ def main():
     # Windows.
     environ_cp['TF_DOWNLOAD_CLANG'] = '0'
     environ_cp['TF_ENABLE_XLA'] = '0'
-    environ_cp['TF_NEED_GDR'] = '0'
-    environ_cp['TF_NEED_VERBS'] = '0'
     environ_cp['TF_NEED_MPI'] = '0'
     environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0'
 
@@ -1526,12 +1520,7 @@ def main():
                 'with_kafka_support', True, 'kafka')
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
                 False, 'xla')
-  set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', False,
-                'gdr')
-  set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
-                False, 'verbs')
-  set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph', 'with_ngraph_support',
-                False, 'ngraph')
+
 
   set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
   if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
@@ -1588,7 +1577,6 @@ def main():
     set_mpi_home(environ_cp)
     set_other_mpi_vars(environ_cp)
 
-  set_grpc_build_flags()
   set_cc_opt_flags(environ_cp)
   set_system_libs_flag(environ_cp)
   if is_windows():
@@ -1613,7 +1601,11 @@ def main():
           'more details.')
     config_info_line('mkl', 'Build with MKL support.')
     config_info_line('monolithic', 'Config for mostly static monolithic build.')
+    config_info_line('gdr', 'Build with GDR support.')
+    config_info_line('verbs', 'Build with libverbs support.')
+    config_info_line('ngraph', 'Build with Intel ngraph support.')
 
 
 if __name__ == '__main__':
   main()
+
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 6747c7e795..02e5b84306 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -58,6 +58,11 @@ build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fn
 build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain
 build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
 
+# Options extracted from configure script
+build:gdr --define=with_gdr_support=true
+build:ngraph --define=with_ngraph_support=true
+build:verbs --define=with_verbs_support=true
+
 build --define=use_fast_cpp_protos=true
 build --define=allow_oversize_protos=true
 build --define=grpc_no_ares=true
@@ -66,6 +71,9 @@ build --spawn_strategy=standalone
 build --genrule_strategy=standalone
 build -c opt
 
+# Other build flags.
+build --define=grpc_no_ares=true
+
 # Modular TF build options
 build:dynamic_kernels --define=dynamic_loaded_kernels=true
 
-- 
GitLab


From e8c18aa0947d253d861f56c99788a8ab94f28164 Mon Sep 17 00:00:00 2001
From: Christopher Olston <olston@google.com>
Date: Wed, 26 Sep 2018 11:57:25 -0700
Subject: [PATCH 0742/1357] Public no-op.

PiperOrigin-RevId: 214643933
---
 tensorflow/core/kernels/batching_util/BUILD | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD
index 792eb74e31..0d53240330 100644
--- a/tensorflow/core/kernels/batching_util/BUILD
+++ b/tensorflow/core/kernels/batching_util/BUILD
@@ -1,7 +1,7 @@
 # Description: Utilities.
 
 package(
-    default_visibility = ["//tensorflow:internal"],
+    default_visibility = ["//visibility:public"],
 )
 
 licenses(["notice"])  # Apache 2.0
@@ -12,7 +12,6 @@ cc_library(
     name = "periodic_function_dynamic",
     srcs = ["periodic_function.cc"],
     hdrs = ["periodic_function.h"],
-    visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:protos_all_cc",
@@ -21,7 +20,6 @@ cc_library(
 
 cc_library(
     name = "periodic_function",
-    visibility = ["//visibility:public"],
     deps = [
         ":periodic_function_dynamic",
         "//tensorflow/core:lib",
@@ -190,7 +188,6 @@ cc_library(
     testonly = 1,
     srcs = ["fake_clock_env.cc"],
     hdrs = ["fake_clock_env.h"],
-    visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core:tensorflow",
-- 
GitLab


From 9276b19b468b82b7457cf256352e7eac9d90d68e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 12:08:41 -0700
Subject: [PATCH 0743/1357] Account for old run config, more robust num
 trainers

PiperOrigin-RevId: 214646114
---
 .../contrib/tensor_forest/client/random_forest.py      | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/tensor_forest/client/random_forest.py b/tensorflow/contrib/tensor_forest/client/random_forest.py
index d78d12d997..6e3bfbb9bd 100644
--- a/tensorflow/contrib/tensor_forest/client/random_forest.py
+++ b/tensorflow/contrib/tensor_forest/client/random_forest.py
@@ -448,9 +448,7 @@ class TensorForestEstimator(estimator.Estimator):
     """
     # Override default number of trainers if config is provided.
     if num_trainers == 1 and config is not None:
-      num_trainers = config.num_worker_replicas
-    if trainer_id == 0 and config is not None:
-      trainer_id = config.global_id_in_cluster
+      num_trainers = max(1, config.num_worker_replicas)
 
     super(TensorForestEstimator, self).__init__(
         model_fn=get_model_fn(
@@ -572,9 +570,7 @@ class MultiForestMultiHeadEstimator(estimator.Estimator):
     model_fns = []
     # Override default number of trainers if config is provided.
     if num_trainers == 1 and config is not None:
-      num_trainers = config.num_worker_replicas
-    if trainer_id == 0 and config is not None:
-      trainer_id = config.global_id_in_cluster
+      num_trainers = max(1, config.num_worker_replicas)
 
     for i in range(len(params_list)):
       params = params_list[i].fill()
@@ -723,7 +719,7 @@ class CoreTensorForestEstimator(core_estimator.Estimator):
     """
     # Override default number of trainers if config is provided.
     if num_trainers == 1 and config is not None:
-      num_trainers = config.num_worker_replicas
+      num_trainers = max(1, config.num_worker_replicas)
     if trainer_id == 0 and config is not None:
       trainer_id = config.global_id_in_cluster
 
-- 
GitLab


From 6a787235b95dd3040fc5ff7fb7104585e746c66a Mon Sep 17 00:00:00 2001
From: Christopher Olston <olston@google.com>
Date: Wed, 26 Sep 2018 12:22:26 -0700
Subject: [PATCH 0744/1357] Public no-op.

PiperOrigin-RevId: 214648140
---
 tensorflow/core/kernels/batching_util/BUILD | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD
index 0d53240330..039b0db144 100644
--- a/tensorflow/core/kernels/batching_util/BUILD
+++ b/tensorflow/core/kernels/batching_util/BUILD
@@ -12,6 +12,11 @@ cc_library(
     name = "periodic_function_dynamic",
     srcs = ["periodic_function.cc"],
     hdrs = ["periodic_function.h"],
+    visibility = [
+        "//learning/serving:__subpackages__",
+        "//tensorflow:internal",
+        "//tensorflow_serving:__subpackages__",
+    ],
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:protos_all_cc",
@@ -20,6 +25,11 @@ cc_library(
 
 cc_library(
     name = "periodic_function",
+    visibility = [
+        "//learning/serving:__subpackages__",
+        "//tensorflow:internal",
+        "//tensorflow_serving:__subpackages__",
+    ],
     deps = [
         ":periodic_function_dynamic",
         "//tensorflow/core:lib",
@@ -188,6 +198,11 @@ cc_library(
     testonly = 1,
     srcs = ["fake_clock_env.cc"],
     hdrs = ["fake_clock_env.h"],
+    visibility = [
+        "//learning/serving:__subpackages__",
+        "//tensorflow:internal",
+        "//tensorflow_serving:__subpackages__",
+    ],
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core:tensorflow",
-- 
GitLab


From b61ca2d62ab9792e1f386c2e598fee4d07b51f1c Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 26 Sep 2018 12:56:29 -0700
Subject: [PATCH 0745/1357] Fix leaks of a BigtableTableResource in various
 Bigtable ops.

PiperOrigin-RevId: 214653279
---
 .../contrib/bigtable/kernels/bigtable_lookup_dataset_op.cc       | 1 +
 .../contrib/bigtable/kernels/bigtable_prefix_key_dataset_op.cc   | 1 +
 .../contrib/bigtable/kernels/bigtable_range_key_dataset_op.cc    | 1 +
 .../bigtable/kernels/bigtable_sample_key_pairs_dataset_op.cc     | 1 +
 .../contrib/bigtable/kernels/bigtable_sample_keys_dataset_op.cc  | 1 +
 tensorflow/contrib/bigtable/kernels/bigtable_scan_dataset_op.cc  | 1 +
 6 files changed, 6 insertions(+)

diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_lookup_dataset_op.cc b/tensorflow/contrib/bigtable/kernels/bigtable_lookup_dataset_op.cc
index 11f530e82a..2c6317157d 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_lookup_dataset_op.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_lookup_dataset_op.cc
@@ -28,6 +28,7 @@ class BigtableLookupDatasetOp : public UnaryDatasetOpKernel {
                    DatasetBase** output) override {
     BigtableTableResource* table;
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 1), &table));
+    core::ScopedUnref scoped_unref(table);
 
     std::vector<string> column_families;
     std::vector<string> columns;
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_prefix_key_dataset_op.cc b/tensorflow/contrib/bigtable/kernels/bigtable_prefix_key_dataset_op.cc
index 5cab729d9c..92a3658667 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_prefix_key_dataset_op.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_prefix_key_dataset_op.cc
@@ -31,6 +31,7 @@ class BigtablePrefixKeyDatasetOp : public DatasetOpKernel {
     BigtableTableResource* resource;
     OP_REQUIRES_OK(ctx,
                    LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
+    core::ScopedUnref scoped_unref(resource);
 
     *output = new Dataset(ctx, resource, std::move(prefix));
   }
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_range_key_dataset_op.cc b/tensorflow/contrib/bigtable/kernels/bigtable_range_key_dataset_op.cc
index 4dc4647bd2..bd8805a382 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_range_key_dataset_op.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_range_key_dataset_op.cc
@@ -34,6 +34,7 @@ class BigtableRangeKeyDatasetOp : public DatasetOpKernel {
     BigtableTableResource* resource;
     OP_REQUIRES_OK(ctx,
                    LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
+    core::ScopedUnref scoped_unref(resource);
 
     *output =
         new Dataset(ctx, resource, std::move(start_key), std::move(end_key));
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_sample_key_pairs_dataset_op.cc b/tensorflow/contrib/bigtable/kernels/bigtable_sample_key_pairs_dataset_op.cc
index 736775bdac..01608dc6bc 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_sample_key_pairs_dataset_op.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_sample_key_pairs_dataset_op.cc
@@ -38,6 +38,7 @@ class BigtableSampleKeyPairsDatasetOp : public DatasetOpKernel {
     BigtableTableResource* resource;
     OP_REQUIRES_OK(ctx,
                    LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
+    core::ScopedUnref scoped_unref(resource);
 
     OP_REQUIRES(ctx, prefix.empty() || start_key.empty(),
                 errors::InvalidArgument(
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_sample_keys_dataset_op.cc b/tensorflow/contrib/bigtable/kernels/bigtable_sample_keys_dataset_op.cc
index 208b7b3e08..9b60e0a667 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_sample_keys_dataset_op.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_sample_keys_dataset_op.cc
@@ -28,6 +28,7 @@ class BigtableSampleKeysDatasetOp : public DatasetOpKernel {
     BigtableTableResource* resource;
     OP_REQUIRES_OK(ctx,
                    LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
+    core::ScopedUnref scoped_unref(resource);
     *output = new Dataset(ctx, resource);
   }
 
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_scan_dataset_op.cc b/tensorflow/contrib/bigtable/kernels/bigtable_scan_dataset_op.cc
index 9407855fe8..688289a4e2 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_scan_dataset_op.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_scan_dataset_op.cc
@@ -67,6 +67,7 @@ class BigtableScanDatasetOp : public DatasetOpKernel {
     BigtableTableResource* resource;
     OP_REQUIRES_OK(ctx,
                    LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
+    core::ScopedUnref scoped_unref(resource);
 
     const uint64 num_outputs = columns.size() + 1;
     std::vector<PartialTensorShape> output_shapes;
-- 
GitLab


From d600b1b55fa851648918fed7a67f61eefd554034 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Wed, 26 Sep 2018 13:31:43 -0700
Subject: [PATCH 0746/1357] [tf.data] Small utils cleanup to expose generic
 function

PiperOrigin-RevId: 214659488
---
 .../core/grappler/optimizers/data/BUILD       |  2 +-
 .../optimizers/data/function_utils.cc         | 31 +++----------------
 .../grappler/optimizers/data/graph_utils.cc   |  9 ++----
 .../grappler/optimizers/data/graph_utils.h    | 15 +++++++++
 .../optimizers/data/graph_utils_test.cc       | 12 +++++++
 5 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index d42a560cb2..d198a2a591 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -89,10 +89,10 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
+        ":graph_utils",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
-        "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
     ] + tf_protos_all(),
 )
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.cc b/tensorflow/core/grappler/optimizers/data/function_utils.cc
index e95ea1a4c1..e3f6d8e1ea 100644
--- a/tensorflow/core/grappler/optimizers/data/function_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_def.pb.h"
@@ -22,23 +23,6 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 namespace function_utils {
-namespace {
-
-template <typename Predicate, typename Collection>
-std::vector<int> GetElementIndicesWithPredicate(const Predicate& predicate,
-                                                const Collection& collection) {
-  std::vector<int> indices = {};
-  unsigned idx = 0;
-  for (auto&& element : collection) {
-    if (predicate(element)) {
-      indices.push_back(idx);
-    }
-    idx++;
-  }
-  return indices;
-}
-
-}  // namespace
 
 FunctionDefTensorDesc::FunctionDefTensorDesc(const string& node_name,
                                              const string& output, int position)
@@ -152,32 +136,27 @@ bool ContainsFunctionOutputWithName(StringPiece name,
 }
 
 int FindFunctionInputWithName(StringPiece name, const FunctionDef& function) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
+  return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
       function.signature().input_arg());
-  return indices.empty() ? -1 : indices.front();
 }
 
 int FindFunctionOutputWithName(StringPiece name, const FunctionDef& function) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
+  return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
       function.signature().output_arg());
-  return indices.empty() ? -1 : indices.front();
 }
 
 int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
+  return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const NodeDef& node) { return node.name() == name; },
       function.node_def());
-  return indices.empty() ? -1 : indices.front();
 }
 
 int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
+  return graph_utils::GetFirstElementIndexWithPredicate(
       [&op](const NodeDef& node) { return node.op() == op; },
       function.node_def());
-
-  return indices.empty() ? -1 : indices.front();
 }
 
 void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index 48825d0346..3eaaf8fbef 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -201,25 +201,22 @@ bool ContainsNodeWithOp(StringPiece op, const GraphDef& graph) {
 
 int FindGraphFunctionWithName(StringPiece name,
                               const FunctionDefLibrary& library) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
+  return GetFirstElementIndexWithPredicate(
       [&name](const FunctionDef& function) {
         return function.signature().name() == name;
       },
       library.function());
-  return indices.empty() ? -1 : indices.front();
 }
 
 int FindGraphNodeWithName(StringPiece name, const GraphDef& graph) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
+  return GetFirstElementIndexWithPredicate(
       [&name](const NodeDef& node) { return node.name() == name; },
       graph.node());
-  return indices.empty() ? -1 : indices.front();
 }
 
 int FindGraphNodeWithOp(StringPiece op, const GraphDef& graph) {
-  std::vector<int> indices = GetElementIndicesWithPredicate(
+  return GetFirstElementIndexWithPredicate(
       [&op](const NodeDef& node) { return node.op() == op; }, graph.node());
-  return indices.empty() ? -1 : indices.front();
 }
 
 std::vector<int> FindAllGraphNodesWithOp(const string& op,
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index 189a72d255..5dd7819100 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -31,6 +31,21 @@ namespace tensorflow {
 namespace grappler {
 namespace graph_utils {
 
+// Returns the index of the first element in collection that fulfills predicate.
+// If no such element exists, returns -1.
+template <typename Predicate, typename Collection>
+int GetFirstElementIndexWithPredicate(const Predicate& predicate,
+                                      const Collection& collection) {
+  unsigned idx = 0;
+  for (auto&& element : collection) {
+    if (predicate(element)) {
+      return idx;
+    }
+    idx++;
+  }
+  return -1;
+}
+
 // Adds a node to the graph.
 NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<string>& inputs,
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
index 6877c207c4..db986542b2 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
@@ -24,6 +24,18 @@ namespace grappler {
 namespace graph_utils {
 namespace {
 
+TEST(GraphUtilsTest, GetFirstElementIndexWithPredicate) {
+  std::vector<int> vec({1, 2, 3, 4, 5, 6});
+  auto result = GetFirstElementIndexWithPredicate(
+      [](int elem) { return elem % 3 == 0; }, vec);
+
+  EXPECT_EQ(result, 2);
+
+  result = GetFirstElementIndexWithPredicate(
+      [](int elem) { return elem % 7 == 0; }, vec);
+  EXPECT_EQ(result, -1);
+}
+
 TEST(GraphUtilsTest, AddScalarConstNodeBool) {
   GraphDef graph_def;
   MutableGraphView graph(&graph_def);
-- 
GitLab


From 652ce1aaefdadd04a9905a0788ab26c6fff93658 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 13:42:36 -0700
Subject: [PATCH 0747/1357] Kernel signature reworking, misc kernel
 improvements and migrations.

PiperOrigin-RevId: 214661332
---
 .../internal/optimized/optimized_ops.h        |   1 +
 .../internal/reference/reference_ops.h        | 149 ++++++++++++------
 .../contrib/lite/kernels/internal/tensor.h    |  12 ++
 .../contrib/lite/kernels/internal/types.h     |  14 ++
 tensorflow/contrib/lite/kernels/pack.cc       |   9 +-
 tensorflow/contrib/lite/kernels/unpack.cc     |   9 +-
 6 files changed, 139 insertions(+), 55 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 0999738396..732880d9da 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -59,6 +59,7 @@ using reference_ops::BroadcastLessEqual;
 using reference_ops::BroadcastMul4DSlow;
 using reference_ops::BroadcastSub4DSlow;
 using reference_ops::Concatenation;
+using reference_ops::ConcatenationWithScaling;
 using reference_ops::DepthConcatenation;
 using reference_ops::Dequantize;
 using reference_ops::Div;
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 7a5535489a..cd9e1b255d 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -1800,7 +1800,6 @@ inline void Concatenation(int concat_dim, const Scalar* const* input_data,
 // quantized as it takes scale as a floating point value. This should be fixed
 // when optimizng this routine further.
 
-// template <>
 inline void ConcatenationWithScaling(const ConcatenationParams& params,
                                      const RuntimeShape* const* input_shapes,
                                      const uint8* const* input_data,
@@ -1813,15 +1812,13 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
   const int32 output_zeropoint = params.output_zeropoint;
   const float output_scale = params.output_scale;
 
-  // The arguments input_zeropoint and input_scale are expected to be an array
-  // that have the quantization parameters for all the inputs to the concat
-  // operator.
-  TFLITE_DCHECK_GT(inputs_count, 1);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  const int concat_dimensions = output_shape.DimensionsCount();
+  TFLITE_DCHECK_LT(axis, concat_dimensions);
+
   int64_t concat_size = 0;
   for (int i = 0; i < inputs_count; i++) {
-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), 4);
-    for (int j = 0; j < 4; j++) {
+    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
+    for (int j = 0; j < concat_dimensions; j++) {
       if (j != axis) {
         MatchingDim(*input_shapes[i], j, output_shape, j);
       }
@@ -1836,9 +1833,10 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
   // For all input arrays,
   // FlatSize() = outer_size * Dims(axis) * base_inner_size;
   int64_t base_inner_size = 1;
-  for (int i = axis + 1; i < 4; ++i) {
+  for (int i = axis + 1; i < concat_dimensions; ++i) {
     base_inner_size *= output_shape.Dims(i);
   }
+
   const float inverse_output_scale = 1.f / output_scale;
   uint8* output_ptr = output_data;
   for (int k = 0; k < outer_size; k++) {
@@ -1892,37 +1890,51 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data,
 }
 
 template <typename Scalar>
-void Pack(int dim, const Scalar* const* input_data,
-          const Dims<4>* const* input_dims, int inputs_count,
-          Scalar* output_data, const Dims<4>& output_dims) {
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+void Pack(const PackParams& params, const RuntimeShape* const* input_shapes,
+          const Scalar* const* input_data, const RuntimeShape& output_shape,
+          Scalar* output_data) {
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+
   int outer_size = 1;
-  for (int i = dim + 1; i < 4; i++) {
-    outer_size *= output_dims.sizes[i];
+  for (int i = 0; i < axis; i++) {
+    outer_size *= output_shape.Dims(i);
   }
-  Scalar* output_ptr = output_data;
-  const int copy_size = FlatSize(**input_dims) / outer_size;
-  for (int k = 0; k < outer_size; k++) {
-    for (int i = 0; i < inputs_count; ++i) {
-      memcpy(output_ptr, input_data[i] + k * copy_size,
-             copy_size * sizeof(Scalar));
-      output_ptr += copy_size;
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++) {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < inputs_count; ++i) {
+    for (int k = 0; k < outer_size; k++) {
+      const Scalar* input_ptr = input_data[i] + copy_size * k;
+      int loc = k * inputs_count * copy_size + i * copy_size;
+      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
     }
   }
 }
 
 template <typename Scalar>
-void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims,
-            int dimensions, int outputs_count, Scalar* const* output_datas,
-            const Dims<4>& output_dims) {
+void Unpack(const UnpackParams& params, const RuntimeShape& input_shape,
+            const Scalar* input_data, const RuntimeShape& output_shape,
+            Scalar* const* output_datas) {
+  const int dimensions = input_shape.DimensionsCount();
+  const int outputs_count = params.num_split;
+
   int outer_size = 1;
-  for (int i = dimensions - axis; i < 4; i++) {
-    outer_size *= input_dims.sizes[i];
+  for (int i = 0; i < params.axis; i++) {
+    outer_size *= input_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++) {
+    copy_size *= input_shape.Dims(i);
   }
+  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
 
-  const int copy_size = FlatSize(input_dims) / outer_size / outputs_count;
-  for (int k = 0; k < outer_size; k++) {
-    for (int i = 0; i < outputs_count; ++i) {
+  for (int i = 0; i < outputs_count; ++i) {
+    for (int k = 0; k < outer_size; k++) {
       Scalar* output_ptr = output_datas[i] + copy_size * k;
       int loc = k * outputs_count * copy_size + i * copy_size;
       memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
@@ -1931,18 +1943,29 @@ void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims,
 }
 
 template <typename Scalar>
-void Pack(int dim, const Scalar* const* input_data,
-          const Dims<4>* const* input_dims, const int32* input_zeropoint,
-          const float* input_scale, int inputs_count, Scalar* output_data,
-          const Dims<4>& output_dims, const int32 output_zeropoint,
-          const float output_scale) {
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+void PackWithScaling(const PackParams& params,
+                     const RuntimeShape* const* input_shapes,
+                     const uint8* const* input_data,
+                     const RuntimeShape& output_shape, uint8* output_data) {
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  const int32* input_zeropoint = params.input_zeropoint;
+  const float* input_scale = params.input_scale;
+  int inputs_count = params.inputs_count;
+  const int32 output_zeropoint = params.output_zeropoint;
+  const float output_scale = params.output_scale;
+
   int outer_size = 1;
-  for (int i = dim + 1; i < 4; i++) {
-    outer_size *= output_dims.sizes[i];
+  for (int i = 0; i < axis; i++) {
+    outer_size *= output_shape.Dims(i);
   }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; i++) {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
   Scalar* output_ptr = output_data;
-  const int copy_size = FlatSize(**input_dims) / outer_size;
   const float inverse_output_scale = 1.f / output_scale;
   for (int k = 0; k < outer_size; k++) {
     for (int i = 0; i < inputs_count; ++i) {
@@ -3374,15 +3397,21 @@ inline void Floor(const RuntimeShape& input_shape, const float* input_data,
 
 template <typename T>
 inline void Gather(const tflite::GatherParams& op_params,
-                   const RuntimeShape& input_shape, const T* input_data,
-                   const RuntimeShape& coords_shape, const int32* coords_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  // Enable these checks when moving legacy ops to legacy_reference_ops.
-  //
-  // TFLITE_DCHECK_EQ(coords_shape.DimensionsCount(), 1);
+                   const RuntimeShape& unextended_input_shape,
+                   const T* input_data, const RuntimeShape& coords_shape,
+                   const int32* coords_data,
+                   const RuntimeShape& unextended_output_shape,
+                   T* output_data) {
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
   const int input_rank = op_params.input_rank;
   const int gather_dimensions = output_shape.DimensionsCount();
-  TFLITE_DCHECK_LE(input_shape.DimensionsCount(), gather_dimensions);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), gather_dimensions);
   const int axis = gather_dimensions - input_rank;
   TFLITE_DCHECK_LT(axis, gather_dimensions);
   TFLITE_DCHECK_GE(axis, 0);
@@ -4762,22 +4791,44 @@ inline void BroadcastComparison(int left_shift, const T* input1_data,
                          input2_data, output_shape, output_data);              \
   }                                                                            \
   template <typename T>                                                        \
+  inline void name##NoScaling(                                                 \
+      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
+      const T* input1_data, const RuntimeShape& input2_shape,                  \
+      const T* input2_data, const RuntimeShape& output_shape,                  \
+      bool* output_data) {                                                     \
+    gemmlowp::ScopedProfilingLabel label(#name "NoScaling");                   \
+    ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data,          \
+                                input2_shape, input2_data, output_shape,       \
+                                output_data);                                  \
+  }                                                                            \
+  template <typename T>                                                        \
   inline void name##WithScaling(                                               \
       const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
       const T* input1_data, const RuntimeShape& input2_shape,                  \
       const T* input2_data, const RuntimeShape& output_shape,                  \
       bool* output_data) {                                                     \
-    gemmlowp::ScopedProfilingLabel label(#name "/8bit");                       \
+    gemmlowp::ScopedProfilingLabel label(#name "WithScaling/8bit");            \
     ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data,   \
                                        input2_shape, input2_data,              \
                                        output_shape, output_data);             \
   }                                                                            \
+  template <typename T>                                                        \
+  inline void Broadcast4DSlow##name##NoScaling(                                \
+      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
+      const T* input1_data, const RuntimeShape& input2_shape,                  \
+      const T* input2_data, const RuntimeShape& output_shape,                  \
+      bool* output_data) {                                                     \
+    gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name "NoScaling"); \
+    BroadcastComparison4DSlowImpl<T, name##Fn>(                                \
+        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
+        output_shape, output_data);                                            \
+  }                                                                            \
   inline void Broadcast4DSlow##name(                                           \
       const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
       const float* input1_data, const RuntimeShape& input2_shape,              \
       const float* input2_data, const RuntimeShape& output_shape,              \
       bool* output_data) {                                                     \
-    gemmlowp::ScopedProfilingLabel label("Broadcast" #name);                   \
+    gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name);             \
     BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data,  \
                                         input2_shape, input2_data,             \
                                         output_shape, output_data);            \
@@ -4788,7 +4839,7 @@ inline void BroadcastComparison(int left_shift, const T* input1_data,
       const T* input1_data, const RuntimeShape& input2_shape,                  \
       const T* input2_data, const RuntimeShape& output_shape,                  \
       bool* output_data) {                                                     \
-    gemmlowp::ScopedProfilingLabel label("Broadcast" #name "/8bit");           \
+    gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name "/8bit");     \
     BroadcastComparison4DSlowWithScaling<T, name##Fn>(                         \
         op_params, input1_shape, input1_data, input2_shape, input2_data,       \
         output_shape, output_data);                                            \
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h
index 13106456df..f1b08383b0 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor.h
@@ -58,11 +58,14 @@ class VectorOfTensors {
     all_data_.reserve(num_tensors);
     all_dims_.reserve(num_tensors);
     all_dims_ptr_.reserve(num_tensors);
+    all_shape_.reserve(num_tensors);
+    all_shape_ptr_.reserve(num_tensors);
 
     for (int i = 0; i < num_tensors; ++i) {
       TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
       all_data_.push_back(GetTensorData<T>(t));
       all_dims_.push_back(GetTensorDims(t));
+      all_shape_.push_back(GetTensorShape(t));
     }
 
     // Taking the pointer from inside a std::vector is only OK if the vector is
@@ -70,6 +73,7 @@ class VectorOfTensors {
     // are free to grab iterators here.
     for (int i = 0; i < num_tensors; ++i) {
       all_dims_ptr_.push_back(&all_dims_[i]);
+      all_shape_ptr_.push_back(&all_shape_[i]);
     }
   }
   // Return a pointer to the data pointers of all tensors in the list. For
@@ -84,10 +88,18 @@ class VectorOfTensors {
   //   dims[1] are the dimensions of the second tensor in the list.
   const Dims<4>* const* dims() const { return all_dims_ptr_.data(); }
 
+  // Return a pointer the shape pointers of all tensors in the list. For
+  // example:
+  //   const RuntimeShape* const* d = v.dims();
+  //   dims[1] are the dimensions of the second tensor in the list.
+  const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
+
  private:
   std::vector<T*> all_data_;
   std::vector<Dims<4>> all_dims_;
   std::vector<Dims<4>*> all_dims_ptr_;
+  std::vector<RuntimeShape> all_shape_;
+  std::vector<RuntimeShape*> all_shape_ptr_;
 };
 
 // A list of quantized tensors in a format that can be used by kernels like
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index a3a5994c9c..b39347758a 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -875,6 +875,15 @@ struct MeanParams {
   int16 axis[4];
 };
 
+struct PackParams {
+  int8 axis;
+  const int32* input_zeropoint;
+  const float* input_scale;
+  uint16 inputs_count;
+  int32 output_zeropoint;
+  float output_scale;
+};
+
 struct PadParams {
   int8 left_padding_count;
   int32 left_padding[4];
@@ -975,6 +984,11 @@ struct TransposeParams {
   int32 perm[4];
 };
 
+struct UnpackParams {
+  uint16 num_split;
+  int16 axis;
+};
+
 template <typename P>
 inline void SetActivationParams(float min, float max, P* params) {
   params->float_activation_min = min;
diff --git a/tensorflow/contrib/lite/kernels/pack.cc b/tensorflow/contrib/lite/kernels/pack.cc
index 4cb98fdd19..c368582ef7 100644
--- a/tensorflow/contrib/lite/kernels/pack.cc
+++ b/tensorflow/contrib/lite/kernels/pack.cc
@@ -85,9 +85,12 @@ template <typename T>
 void PackImpl(TfLiteContext* context, TfLiteNode* node, TfLiteTensor* output,
               int values_count, int axis) {
   VectorOfTensors<T> all_inputs(*context, *node->inputs);
-  reference_ops::Pack<T>(RemapDim(NumDimensions(output), axis),
-                         all_inputs.data(), all_inputs.dims(), values_count,
-                         GetTensorData<T>(output), GetTensorDims(output));
+  tflite::PackParams op_params;
+  op_params.axis = axis;
+  op_params.inputs_count = values_count;
+
+  reference_ops::Pack<T>(op_params, all_inputs.shapes(), all_inputs.data(),
+                         GetTensorShape(output), GetTensorData<T>(output));
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
diff --git a/tensorflow/contrib/lite/kernels/unpack.cc b/tensorflow/contrib/lite/kernels/unpack.cc
index 9ff06f8331..a7d3a9bc76 100644
--- a/tensorflow/contrib/lite/kernels/unpack.cc
+++ b/tensorflow/contrib/lite/kernels/unpack.cc
@@ -88,10 +88,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 template <typename T>
 void UnpackImpl(TfLiteContext* context, TfLiteNode* node,
                 const TfLiteTensor* input, int output_count, int axis) {
+  tflite::UnpackParams op_params;
+  op_params.axis = axis;
+  op_params.num_split = output_count;
   VectorOfTensors<T> all_outputs(*context, *node->outputs);
-  reference_ops::Unpack<T>(axis, GetTensorData<T>(input), GetTensorDims(input),
-                           NumDimensions(input), output_count,
-                           all_outputs.data(), **all_outputs.dims());
+  reference_ops::Unpack<T>(op_params, GetTensorShape(input),
+                           GetTensorData<T>(input), **all_outputs.shapes(),
+                           all_outputs.data());
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-- 
GitLab


From 1736e0bbbfdeeba178dff37c970b5a0180ee013f Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 26 Sep 2018 13:48:21 -0700
Subject: [PATCH 0748/1357] [TF] Add new internal ops _VarHandlesOp and
 _ReadVariablesOp.

The purpose of these ops is to fix a latency problem observed for an inference benchmark. Often a inference step starts by reading the value of many (hundreds) of weights. For a resource variable, this requires a VarHandleOp and a ReadVariableOp per variable. Running hundreds of trivial ops can add hundreds of microseconds of latency to the critical path of an inference step. The inter-op latency of the executor can be hundreds of nanoseconds, which rapidly adds up.

This change introduces two fused ops _VarHandlesOp and _ReadVariablesOp that allow us to read many variables in a pair of larger ops, rather than many tiny ops.

PiperOrigin-RevId: 214662338
---
 tensorflow/compiler/jit/xla_device_ops.h      |   6 +
 tensorflow/core/BUILD                         |   9 +-
 tensorflow/core/framework/resource_mgr.cc     |   9 ++
 tensorflow/core/framework/resource_mgr.h      | 106 ++++++++++++++++++
 tensorflow/core/kernels/BUILD                 |   2 +-
 .../core/kernels/resource_variable_ops.cc     |  60 +++++++++-
 .../core/kernels/resource_variable_ops.h      |  10 ++
 tensorflow/core/ops/resource_variable_ops.cc  |  72 ++++++++++++
 8 files changed, 270 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 2ccee79761..6967ad1f03 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -99,9 +99,15 @@ class XlaAssignVariableOp : public AsyncOpKernel {
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("VarHandleOp").Device(DEVICE).HostMemory("resource"),               \
       ResourceHandleOp<Var>);                                                  \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("_VarHandlesOp").Device(DEVICE).HostMemory("resources"),            \
+      ResourceHandlesOp<Var>);                                                 \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("ReadVariableOp").Device(DEVICE).HostMemory("resource"),            \
       ReadVariableOp);                                                         \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("_ReadVariablesOp").Device(DEVICE).HostMemory("resources"),         \
+      ReadVariablesOp);                                                        \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("DestroyResourceOp").Device(DEVICE).HostMemory("resource"),         \
       DestroyResourceOp);                                                      \
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index bc0bfb793c..d85cb379bb 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1057,7 +1057,6 @@ tf_gen_op_libs(
         "random_grad",
         "random_ops",
         "remote_fused_graph_ops",
-        "resource_variable_ops",
         "rpc_ops",
         "scoped_allocator_ops",
         "sdca_ops",
@@ -1099,6 +1098,14 @@ tf_gen_op_libs(
     deps = ["//tensorflow/core/kernels:debug_ops"],
 )
 
+tf_gen_op_libs(
+    is_external = False,
+    op_lib_names = [
+        "resource_variable_ops",
+    ],
+    deps = [":lib"],
+)
+
 # And one for all user ops
 cc_library(
     name = "user_ops_op_lib",
diff --git a/tensorflow/core/framework/resource_mgr.cc b/tensorflow/core/framework/resource_mgr.cc
index ebdaaec153..508a8d3149 100644
--- a/tensorflow/core/framework/resource_mgr.cc
+++ b/tensorflow/core/framework/resource_mgr.cc
@@ -288,4 +288,13 @@ Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
   return ctx->resource_manager()->Delete(p);
 }
 
+Status ResourceHandlesShape(shape_inference::InferenceContext* c) {
+  int n;
+  TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
+  for (int i = 0; i < n; ++i) {
+    c->set_output(i, c->Scalar());
+  }
+  return Status::OK();
+}
+
 }  //  end namespace tensorflow
diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h
index d58deaa3fc..abb6635984 100644
--- a/tensorflow/core/framework/resource_mgr.h
+++ b/tensorflow/core/framework/resource_mgr.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_FRAMEWORK_RESOURCE_MGR_H_
 #define TENSORFLOW_CORE_FRAMEWORK_RESOURCE_MGR_H_
 
+#include <memory>
 #include <string>
 #include <typeindex>
 #include <typeinfo>
@@ -127,6 +128,14 @@ class ResourceMgr {
   Status Lookup(const string& container, const string& name,
                 T** resource) const TF_MUST_USE_RESULT;
 
+  // Similar to Lookup, but looks up multiple resources at once, with only a
+  // single lock acquisition.
+  template <typename T>
+  Status LookupMany(absl::Span<std::pair<const string*, const string*> const>
+                        containers_and_names,
+                    std::vector<std::unique_ptr<T, core::RefCountDeleter>>*
+                        resource) const TF_MUST_USE_RESULT;
+
   // If "container" has a resource "name", returns it in
   // "*resource". Otherwise, invokes creator() to create the resource.
   // The caller takes the ownership of one ref on "*resource".
@@ -246,6 +255,12 @@ Status CreateResource(OpKernelContext* ctx, const ResourceHandle& p, T* value);
 template <typename T>
 Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p, T** value);
 
+// Looks up multiple resources pointed by a sequence of resource handles.
+template <typename T>
+Status LookupResources(
+    OpKernelContext* ctx, absl::Span<ResourceHandle const> p,
+    std::vector<std::unique_ptr<T, core::RefCountDeleter>>* values);
+
 // Looks up or creates a resource.
 template <typename T>
 Status LookupOrCreateResource(OpKernelContext* ctx, const ResourceHandle& p,
@@ -358,6 +373,26 @@ class ResourceHandleOp : public OpKernel {
   std::atomic<bool> initialized_{false};
 };
 
+// Utility op kernel to produce a handle to a resource of type T.
+template <typename T>
+class ResourceHandlesOp : public OpKernel {
+ public:
+  explicit ResourceHandlesOp(OpKernelConstruction* context);
+
+  void Compute(OpKernelContext* ctx) override;
+
+  bool IsExpensive() override { return false; }
+
+ private:
+  std::vector<string> containers_;
+  std::vector<string> names_;
+  mutex mutex_;
+  std::vector<Tensor> resources_;
+  std::atomic<bool> initialized_{false};
+};
+
+Status ResourceHandlesShape(shape_inference::InferenceContext* c);
+
 // Registers a kernel for an op which produces a handle to a resource of the
 // specified type.
 #define REGISTER_RESOURCE_HANDLE_KERNEL(Type)                        \
@@ -389,6 +424,24 @@ Status ResourceMgr::Lookup(const string& container, const string& name,
   return LookupInternal(container, name, resource);
 }
 
+template <typename T>
+Status ResourceMgr::LookupMany(
+    absl::Span<std::pair<const string*, const string*> const>
+        containers_and_names,
+    std::vector<std::unique_ptr<T, core::RefCountDeleter>>* resources) const {
+  CheckDeriveFromResourceBase<T>();
+  tf_shared_lock l(mu_);
+  resources->resize(containers_and_names.size());
+  for (size_t i = 0; i < containers_and_names.size(); ++i) {
+    T* resource;
+    TF_RETURN_IF_ERROR(LookupInternal(*containers_and_names[i].first,
+                                      *containers_and_names[i].second,
+                                      &resource));
+    (*resources)[i].reset(resource);
+  }
+  return Status::OK();
+}
+
 template <typename T>
 Status ResourceMgr::LookupInternal(const string& container, const string& name,
                                    T** resource) const {
@@ -498,6 +551,19 @@ Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p,
   return ctx->resource_manager()->Lookup(p.container(), p.name(), value);
 }
 
+template <typename T>
+Status LookupResources(
+    OpKernelContext* ctx, absl::Span<ResourceHandle const* const> p,
+    std::vector<std::unique_ptr<T, core::RefCountDeleter>>* values) {
+  std::vector<std::pair<const string*, const string*>> containers_and_names(
+      p.size());
+  for (size_t i = 0; i < p.size(); ++i) {
+    TF_RETURN_IF_ERROR(internal::ValidateDeviceAndType<T>(ctx, *p[i]));
+    containers_and_names[i] = {&p[i]->container(), &p[i]->name()};
+  }
+  return ctx->resource_manager()->LookupMany(containers_and_names, values);
+}
+
 template <typename T>
 Status LookupOrCreateResource(OpKernelContext* ctx, const ResourceHandle& p,
                               T** value, std::function<Status(T**)> creator) {
@@ -555,6 +621,46 @@ void ResourceHandleOp<T>::Compute(OpKernelContext* ctx) {
   ctx->set_output(0, resource_);
 }
 
+template <typename T>
+ResourceHandlesOp<T>::ResourceHandlesOp(OpKernelConstruction* context)
+    : OpKernel(context) {
+  int n;
+  OP_REQUIRES_OK(context, context->GetAttr("N", &n));
+  OP_REQUIRES_OK(context, context->GetAttr("containers", &containers_));
+  OP_REQUIRES_OK(context, context->GetAttr("shared_names", &names_));
+  OP_REQUIRES(
+      context, containers_.size() == n,
+      errors::InvalidArgument("Number of containers (", containers_.size(),
+                              ") must be equal to N (", n, ")"));
+  OP_REQUIRES(context, names_.size() == n,
+              errors::InvalidArgument("Number of names (", containers_.size(),
+                                      ") must be equal to N (", n, ")"));
+  resources_.resize(n);
+}
+
+template <typename T>
+void ResourceHandlesOp<T>::Compute(OpKernelContext* ctx) {
+  if (!initialized_.load()) {
+    mutex_lock ml(mutex_);
+    // Checking again to see if another thread has initialized the resource.
+    if (!initialized_.load()) {
+      AllocatorAttributes attr;
+      attr.set_on_host(true);
+      for (size_t i = 0; i < resources_.size(); ++i) {
+        OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_RESOURCE, TensorShape({}),
+                                               &resources_[i], attr));
+        ResourceHandle h =
+            MakeResourceHandle<T>(ctx, containers_[i], names_[i]);
+        resources_[i].template scalar<ResourceHandle>()() = h;
+      }
+      initialized_.store(true);
+    }
+  }
+  for (size_t i = 0; i < resources_.size(); ++i) {
+    ctx->set_output(i, resources_[i]);
+  }
+}
+
 }  //  end namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_FRAMEWORK_RESOURCE_MGR_H_
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 1a3db2c7cd..ed5aff58b8 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2028,8 +2028,8 @@ tf_kernel_library(
         ":variable_ops",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:resource_variable_ops_op_lib",
-        "//third_party/eigen3",
     ],
 )
 
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 26705a8d34..427044ca67 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -51,7 +51,9 @@ limitations under the License.
 #define EIGEN_USE_GPU
 #endif
 
-#include "tensorflow/core/kernels/resource_variable_ops.h"
+#include <memory>
+#include <vector>
+
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -60,10 +62,12 @@ limitations under the License.
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/dense_update_functor.h"
 #include "tensorflow/core/kernels/gather_functor.h"
+#include "tensorflow/core/kernels/resource_variable_ops.h"
 #include "tensorflow/core/kernels/scatter_functor.h"
 #include "tensorflow/core/kernels/training_op_helpers.h"
 #include "tensorflow/core/kernels/variable_ops.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
@@ -72,6 +76,8 @@ limitations under the License.
 namespace tensorflow {
 
 REGISTER_RESOURCE_HANDLE_KERNEL(Var);
+REGISTER_KERNEL_BUILDER(Name("_VarHandlesOp").Device(DEVICE_CPU),
+                        ResourceHandlesOp<Var>);
 
 ReadVariableOp::ReadVariableOp(OpKernelConstruction* c) : OpKernel(c) {
   OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_));
@@ -101,13 +107,58 @@ void ReadVariableOp::Compute(OpKernelContext* ctx) {
   ctx->set_output(0, t);
 }
 
+ReadVariablesOp::ReadVariablesOp(OpKernelConstruction* c) : OpKernel(c) {
+  int n;
+  OP_REQUIRES_OK(c, c->GetAttr("N", &n));
+  OP_REQUIRES_OK(c, c->GetAttr("dtypes", &dtypes_));
+  OP_REQUIRES(c, n == dtypes_.size(),
+              errors::InvalidArgument(
+                  "Mismatched number of arguments to ReadVariablesOp (", n,
+                  " vs. ", dtypes_.size(), ")"));
+}
+
+void ReadVariablesOp::Compute(OpKernelContext* ctx) {
+  std::vector<std::unique_ptr<Var, core::RefCountDeleter>> variables(
+      dtypes_.size());
+  std::vector<const ResourceHandle*> handles(dtypes_.size());
+  for (size_t i = 0; i < dtypes_.size(); ++i) {
+    handles[i] = &HandleFromInput(ctx, i);
+  }
+  const auto status = LookupResources(ctx, handles, &variables);
+  OP_REQUIRES(ctx, status.ok(),
+              errors::FailedPrecondition(
+                  "Error while reading resource variable. This could mean that "
+                  "the variable was uninitialized. ",
+                  status.ToString()));
+
+  for (size_t i = 0; i < dtypes_.size(); ++i) {
+    // We're acquiring a reference to the underlying buffer while
+    // holding a shared lock to guarantee ordering of reads and
+    // writes.
+    tf_shared_lock ml(*variables[i]->mu());
+    const Tensor& t = *variables[i]->tensor();
+    OP_REQUIRES(ctx, dtypes_[i] == t.dtype(),
+                errors::InvalidArgument(
+                    "Trying to read variable ", handles[i]->name(),
+                    " from Container: ", handles[i]->container(),
+                    " with wrong dtype. Expected ", DataTypeString(dtypes_[i]),
+                    " got ", DataTypeString(t.dtype())));
+    ctx->set_output(i, t);
+  }
+}
+
 REGISTER_KERNEL_BUILDER(Name("ReadVariableOp").Device(DEVICE_CPU),
                         ReadVariableOp);
+REGISTER_KERNEL_BUILDER(Name("_ReadVariablesOp").Device(DEVICE_CPU),
+                        ReadVariablesOp);
 
 #if GOOGLE_CUDA
 REGISTER_KERNEL_BUILDER(
     Name("ReadVariableOp").Device(DEVICE_GPU).HostMemory("resource"),
     ReadVariableOp);
+REGISTER_KERNEL_BUILDER(
+    Name("_ReadVariablesOp").Device(DEVICE_GPU).HostMemory("resources"),
+    ReadVariablesOp);
 
 #define REGISTER_GPU_KERNELS(type)                             \
   namespace functor {                                          \
@@ -121,7 +172,12 @@ REGISTER_KERNEL_BUILDER(
                               .Device(DEVICE_GPU)              \
                               .HostMemory("resource")          \
                               .TypeConstraint<type>("dtype"),  \
-                          ResourceHandleOp<Var>)
+                          ResourceHandleOp<Var>)               \
+  REGISTER_KERNEL_BUILDER(Name("_VarHandlesOp")                \
+                              .Device(DEVICE_GPU)              \
+                              .HostMemory("resources")         \
+                              .TypeConstraint<type>("dtypes"), \
+                          ResourceHandlesOp<Var>)
 
 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
 TF_CALL_int64(REGISTER_GPU_KERNELS);
diff --git a/tensorflow/core/kernels/resource_variable_ops.h b/tensorflow/core/kernels/resource_variable_ops.h
index 9b60106f13..cffb732c38 100644
--- a/tensorflow/core/kernels/resource_variable_ops.h
+++ b/tensorflow/core/kernels/resource_variable_ops.h
@@ -28,6 +28,16 @@ class ReadVariableOp : public OpKernel {
   DataType dtype_;
 };
 
+class ReadVariablesOp : public OpKernel {
+ public:
+  explicit ReadVariablesOp(OpKernelConstruction* c);
+  void Compute(OpKernelContext* ctx) override;
+  bool IsExpensive() override { return false; }
+
+ private:
+  DataTypeVector dtypes_;
+};
+
 class DestroyResourceOp : public OpKernel {
  public:
   explicit DestroyResourceOp(OpKernelConstruction* ctx);
diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc
index 26499540f1..adc9cd1486 100644
--- a/tensorflow/core/ops/resource_variable_ops.cc
+++ b/tensorflow/core/ops/resource_variable_ops.cc
@@ -19,6 +19,7 @@
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/core/errors.h"
 
 using ::tensorflow::shape_inference::InferenceContext;
 using ::tensorflow::shape_inference::ShapeAndType;
@@ -56,6 +57,36 @@ Status ReadVariableShapeFn(InferenceContext* c) {
   return Status::OK();
 }
 
+Status ReadVariablesShapeFn(InferenceContext* c) {
+  int n;
+  TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
+  DataTypeVector value_dtypes;
+  TF_RETURN_IF_ERROR(c->GetAttr("dtypes", &value_dtypes));
+  if (n != value_dtypes.size()) {
+    return errors::InvalidArgument(
+        "Mismatched number of arguments to ReadVariablesOp");
+  }
+  for (int i = 0; i < n; ++i) {
+    ShapeAndType shape_and_type;
+    auto* handle_data = c->input_handle_shapes_and_types(i);
+    if (handle_data == nullptr || handle_data->empty()) {
+      shape_and_type.shape = c->UnknownShape();
+      shape_and_type.dtype = DT_INVALID;
+    } else {
+      shape_and_type = (*handle_data)[0];
+      if (shape_and_type.dtype != value_dtypes[i]) {
+        return errors::InvalidArgument(
+            "Trying to read variable with wrong dtype. "
+            "Expected ",
+            DataTypeString(shape_and_type.dtype), " got ",
+            DataTypeString(value_dtypes[i]));
+      }
+    }
+    c->set_output(i, shape_and_type.shape);
+  }
+  return Status::OK();
+}
+
 }  // namespace
 
 REGISTER_OP("VarHandleOp")
@@ -79,12 +110,53 @@ REGISTER_OP("VarHandleOp")
       return Status::OK();
     });
 
+REGISTER_OP("_VarHandlesOp")
+    .Attr("containers: list(string)")
+    .Attr("shared_names: list(string)")
+    .Attr("N: int >= 0")
+    .Attr("dtypes: list(type)")
+    .Attr("shapes: list(shape)")
+    .Output("resources: N * resource")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      int n;
+      TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
+      DataTypeVector dtypes;
+      TF_RETURN_IF_ERROR(c->GetAttr("dtypes", &dtypes));
+      std::vector<PartialTensorShape> shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("shapes", &shapes));
+      if (dtypes.size() != n) {
+        return errors::InvalidArgument("Mismatched number of dtypes (n=", n,
+                                       ", num dtypes=", dtypes.size(), ")");
+      }
+      if (shapes.size() != n) {
+        return errors::InvalidArgument("Mismatched number of shapes (n=", n,
+                                       ", num shapes=", shapes.size(), ")");
+      }
+      for (int i = 0; i < n; ++i) {
+        c->set_output(i, c->Scalar());
+        ShapeHandle s;
+        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shapes[i], &s));
+        c->set_output_handle_shapes_and_types(
+            i, std::vector<ShapeAndType>{{s, dtypes[i]}});
+      }
+
+      return Status::OK();
+    });
+
 REGISTER_OP("ReadVariableOp")
     .Input("resource: resource")
     .Output("value: dtype")
     .Attr("dtype: type")
     .SetShapeFn(ReadVariableShapeFn);
 
+REGISTER_OP("_ReadVariablesOp")
+    .Attr("N: int >= 0")
+    .Input("resources: N * resource")
+    .Output("values: dtypes")
+    .Attr("dtypes: list(type)")
+    .SetShapeFn(ReadVariablesShapeFn);
+
 Status ReadGrad(const AttrSlice& attrs, FunctionDef* g) {
   // clang-format off
   *g = FunctionDefHelper::Define(
-- 
GitLab


From ce58563454de6c33ea3bdea5840234eeefbc835e Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Wed, 26 Sep 2018 13:51:27 -0700
Subject: [PATCH 0749/1357] Quick fix for allowed symbols in tf contrib
 estimator

PiperOrigin-RevId: 214662826
---
 tensorflow/contrib/estimator/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py
index 78914ecaca..419609b1af 100644
--- a/tensorflow/contrib/estimator/__init__.py
+++ b/tensorflow/contrib/estimator/__init__.py
@@ -76,7 +76,7 @@ _allowed_symbols = [
     'stop_if_no_decrease_hook',
     'build_raw_supervised_input_receiver_fn',
     'build_supervised_input_receiver_fn_from_input_fn',
-    'SavedModelEstimator'
+    'SavedModelEstimator',
     'DNNClassifierWithLayerAnnotations',
     'DNNRegressorWithLayerAnnotations',
 ]
-- 
GitLab


From 72b927960625cd2920fea06e242df1ff0d220c77 Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Wed, 26 Sep 2018 14:10:12 -0700
Subject: [PATCH 0750/1357] Specify a preferred_dtype=self.dtype when
 converting Distribution methods' sample-like args to Tensors.

After this change, you could conceivably write tfd.Normal(0., 1.).log_prob(1)

The tf core distributions can't use tfp dtype_util.common_dtype, so you can't yet write tfd.Normal(0, 1).

Works around an eager bug that loses precision in the presence in tf.convert_to_tensor(0.5, preferred_dtype=tf.int32)

PiperOrigin-RevId: 214666222
---
 .../distributions/bernoulli_test.py           | 12 +++++++
 .../kernel_tests/distributions/normal_test.py |  8 +++++
 .../python/ops/distributions/distribution.py  | 34 +++++++++++++++----
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/kernel_tests/distributions/bernoulli_test.py b/tensorflow/python/kernel_tests/distributions/bernoulli_test.py
index 26d013bccb..37b35ba51a 100644
--- a/tensorflow/python/kernel_tests/distributions/bernoulli_test.py
+++ b/tensorflow/python/kernel_tests/distributions/bernoulli_test.py
@@ -118,7 +118,9 @@ class BernoulliTest(test.TestCase):
     self.assertEqual(dist.probs.dtype, dist.stddev().dtype)
     self.assertEqual(dist.probs.dtype, dist.entropy().dtype)
     self.assertEqual(dist.probs.dtype, dist.prob(0).dtype)
+    self.assertEqual(dist.probs.dtype, dist.prob(0.5).dtype)
     self.assertEqual(dist.probs.dtype, dist.log_prob(0).dtype)
+    self.assertEqual(dist.probs.dtype, dist.log_prob(0.5).dtype)
 
     dist64 = make_bernoulli([], dtypes.int64)
     self.assertEqual(dist64.dtype, dtypes.int64)
@@ -181,6 +183,16 @@ class BernoulliTest(test.TestCase):
       return
     self._testPmf(logits=special.logit(p))
 
+  @test_util.run_in_graph_and_eager_modes
+  def testPmfWithFloatArgReturnsXEntropy(self):
+    p = [[0.2], [0.4], [0.3], [0.6]]
+    samps = [0, 0.1, 0.8]
+    self.assertAllClose(
+        np.float32(samps) * np.log(np.float32(p)) +
+        (1 - np.float32(samps)) * np.log(1 - np.float32(p)),
+        self.evaluate(
+            bernoulli.Bernoulli(probs=p, validate_args=False).log_prob(samps)))
+
   def testBroadcasting(self):
     with self.cached_session():
       p = array_ops.placeholder(dtypes.float32)
diff --git a/tensorflow/python/kernel_tests/distributions/normal_test.py b/tensorflow/python/kernel_tests/distributions/normal_test.py
index de73a40b23..6625a88843 100644
--- a/tensorflow/python/kernel_tests/distributions/normal_test.py
+++ b/tensorflow/python/kernel_tests/distributions/normal_test.py
@@ -77,6 +77,14 @@ class NormalTest(test.TestCase):
     self.assertEqual(expected, mu_shape)
     self.assertEqual(expected, sigma_shape)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testSampleLikeArgsGetDistDType(self):
+    dist = normal_lib.Normal(0., 1.)
+    self.assertEqual(dtypes.float32, dist.dtype)
+    for method in ("log_prob", "prob", "log_cdf", "cdf",
+                   "log_survival_function", "survival_function", "quantile"):
+      self.assertEqual(dtypes.float32, getattr(dist, method)(1).dtype)
+
   @test_util.run_in_graph_and_eager_modes
   def testParamShapes(self):
     sample_shape = [10, 3, 4]
diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py
index 76d980679e..12fd039392 100644
--- a/tensorflow/python/ops/distributions/distribution.py
+++ b/tensorflow/python/ops/distributions/distribution.py
@@ -25,6 +25,7 @@ import types
 import numpy as np
 import six
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -127,6 +128,18 @@ def _update_docstring(old_str, append_str):
     return old_str + "\n\n" + append_str
 
 
+def _convert_to_tensor(value, name=None, preferred_dtype=None):
+  """Converts to tensor avoiding an eager bug that loses float precision."""
+  # TODO(b/116672045): Remove this function.
+  if (context.executing_eagerly() and preferred_dtype is not None and
+      (preferred_dtype.is_integer or preferred_dtype.is_bool)):
+    v = ops.convert_to_tensor(value, name=name)
+    if v.dtype.is_floating:
+      return v
+  return ops.convert_to_tensor(
+      value, name=name, preferred_dtype=preferred_dtype)
+
+
 class _DistributionMeta(abc.ABCMeta):
 
   def __new__(mcs, classname, baseclasses, attrs):
@@ -741,7 +754,8 @@ class Distribution(_BaseDistribution):
 
   def _call_log_prob(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
-      value = ops.convert_to_tensor(value, name="value")
+      value = _convert_to_tensor(
+          value, name="value", preferred_dtype=self.dtype)
       try:
         return self._log_prob(value, **kwargs)
       except NotImplementedError as original_exception:
@@ -769,7 +783,8 @@ class Distribution(_BaseDistribution):
 
   def _call_prob(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
-      value = ops.convert_to_tensor(value, name="value")
+      value = _convert_to_tensor(
+          value, name="value", preferred_dtype=self.dtype)
       try:
         return self._prob(value, **kwargs)
       except NotImplementedError as original_exception:
@@ -797,7 +812,8 @@ class Distribution(_BaseDistribution):
 
   def _call_log_cdf(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
-      value = ops.convert_to_tensor(value, name="value")
+      value = _convert_to_tensor(
+          value, name="value", preferred_dtype=self.dtype)
       try:
         return self._log_cdf(value, **kwargs)
       except NotImplementedError as original_exception:
@@ -835,7 +851,8 @@ class Distribution(_BaseDistribution):
 
   def _call_cdf(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
-      value = ops.convert_to_tensor(value, name="value")
+      value = _convert_to_tensor(
+          value, name="value", preferred_dtype=self.dtype)
       try:
         return self._cdf(value, **kwargs)
       except NotImplementedError as original_exception:
@@ -870,7 +887,8 @@ class Distribution(_BaseDistribution):
 
   def _call_log_survival_function(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
-      value = ops.convert_to_tensor(value, name="value")
+      value = _convert_to_tensor(
+          value, name="value", preferred_dtype=self.dtype)
       try:
         return self._log_survival_function(value, **kwargs)
       except NotImplementedError as original_exception:
@@ -909,7 +927,8 @@ class Distribution(_BaseDistribution):
 
   def _call_survival_function(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
-      value = ops.convert_to_tensor(value, name="value")
+      value = _convert_to_tensor(
+          value, name="value", preferred_dtype=self.dtype)
       try:
         return self._survival_function(value, **kwargs)
       except NotImplementedError as original_exception:
@@ -963,7 +982,8 @@ class Distribution(_BaseDistribution):
 
   def _call_quantile(self, value, name, **kwargs):
     with self._name_scope(name, values=[value]):
-      value = ops.convert_to_tensor(value, name="value")
+      value = _convert_to_tensor(
+          value, name="value", preferred_dtype=self.dtype)
       return self._quantile(value, **kwargs)
 
   def quantile(self, value, name="quantile"):
-- 
GitLab


From 9280b3c8a41150022d3ea508f01959ac954c9f73 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 14:22:38 -0700
Subject: [PATCH 0751/1357] Add an experimental Java API to allow half
 precision for FP32 calculation.

PiperOrigin-RevId: 214668283
---
 .../java/org/tensorflow/lite/Interpreter.java    | 16 ++++++++++++++--
 .../lite/NativeInterpreterWrapper.java           | 12 ++++++++++++
 .../main/native/nativeinterpreterwrapper_jni.cc  | 15 +++++++++++----
 .../main/native/nativeinterpreterwrapper_jni.h   |  9 +++++++++
 .../org/tensorflow/lite/InterpreterTest.java     |  4 +++-
 5 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index ffb04496cb..eacfa0c827 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -74,8 +74,19 @@ public final class Interpreter implements AutoCloseable {
       return this;
     }
 
+    /**
+     * Sets whether to allow float16 precision for FP32 calculation when possible. Defaults to false
+     * (disallow).
+     * WARNING: This is an experimental API and subject to change.
+     */
+    public Options setAllowFp16PrecisionForFp32(boolean allow) {
+      this.allowFp16PrecisionForFp32 = allow;
+      return this;
+    }
+
     int numThreads = -1;
     boolean useNNAPI = false;
+    boolean allowFp16PrecisionForFp32 = false;
   }
 
   /**
@@ -256,8 +267,9 @@ public final class Interpreter implements AutoCloseable {
 
   /**
    * Returns native inference timing.
-   * <p>IllegalArgumentException will be thrown if the model is not initialized by the
-   * {@link Interpreter}.
+   *
+   * <p>IllegalArgumentException will be thrown if the model is not initialized by the {@link
+   * Interpreter}.
    */
   public Long getLastNativeInferenceDurationNanoseconds() {
     checkNotClosed();
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 6feff9a618..9bc44bf797 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -45,6 +45,9 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     isMemoryAllocated = true;
     inputTensors = new Tensor[getInputCount(interpreterHandle)];
     outputTensors = new Tensor[getOutputCount(interpreterHandle)];
+    if (options.allowFp16PrecisionForFp32) {
+      setAllowFp16PrecisionForFp32(options.allowFp16PrecisionForFp32);
+    }
   }
 
   NativeInterpreterWrapper(ByteBuffer byteBuffer) {
@@ -72,6 +75,9 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     if (options.useNNAPI) {
       setUseNNAPI(options.useNNAPI);
     }
+    if (options.allowFp16PrecisionForFp32) {
+      setAllowFp16PrecisionForFp32(options.allowFp16PrecisionForFp32);
+    }
   }
 
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
@@ -159,6 +165,10 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     useNNAPI(interpreterHandle, useNNAPI);
   }
 
+  void setAllowFp16PrecisionForFp32(boolean allow) {
+    allowFp16PrecisionForFp32(interpreterHandle, allow);
+  }
+
   void setNumThreads(int numThreads) {
     numThreads(interpreterHandle, numThreads);
   }
@@ -323,6 +333,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native void numThreads(long interpreterHandle, int numThreads);
 
+  private static native void allowFp16PrecisionForFp32(long interpreterHandle, boolean allow);
+
   private static native long createErrorReporter(int size);
 
   private static native long createModel(String modelPathOrBuffer, long errorHandle);
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
index fdcf00a0a0..abb7320bc5 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
@@ -59,7 +59,6 @@ std::vector<int> convertJIntArrayToVector(JNIEnv* env, jintArray inputs) {
   return outputs;
 }
 
-
 int getDataType(TfLiteType data_type) {
   switch (data_type) {
     case kTfLiteFloat32:
@@ -233,11 +232,19 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env,
   interpreter->UseNNAPI(static_cast<bool>(state));
 }
 
+JNIEXPORT void JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_allowFp16PrecisionForFp32(
+    JNIEnv* env, jclass clazz, jlong handle, jboolean allow) {
+  tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
+  if (interpreter == nullptr) return;
+  interpreter->SetAllowFp16PrecisionForFp32(static_cast<bool>(allow));
+}
+
 JNIEXPORT void JNICALL
 Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env,
-                                                           jclass clazz,
-                                                           jlong handle,
-                                                           jint num_threads) {
+                                                             jclass clazz,
+                                                             jlong handle,
+                                                             jint num_threads) {
   tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return;
   interpreter->SetNumThreads(static_cast<int>(num_threads));
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
index 06b35d77c8..aa809dff8a 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
@@ -117,6 +117,15 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env,
                                                            jlong handle,
                                                            jboolean state);
 
+/*
+ *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
+ *  Method:
+ *  Signature: (JZ)V
+ */
+JNIEXPORT void JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_allowFp16PrecisionForFp32(
+    JNIEnv* env, jclass clazz, jlong handle, jboolean allow);
+
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
  *  Method:
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index dfdd7d22b0..fdd5063156 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -323,7 +323,9 @@ public final class InterpreterTest {
     MappedByteBuffer mappedByteBuffer =
         fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size());
     Interpreter interpreter =
-        new Interpreter(mappedByteBuffer, new Interpreter.Options().setUseNNAPI(true));
+        new Interpreter(
+            mappedByteBuffer,
+            new Interpreter.Options().setUseNNAPI(true).setAllowFp16PrecisionForFp32(true));
     float[] oneD = {1.23f, 6.54f, 7.81f};
     float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD};
     float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD};
-- 
GitLab


From dd37be0e66934369bb7f5e4b5a88b982351fbff0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 14:24:07 -0700
Subject: [PATCH 0752/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214668499

---
 tensorflow/go/op/wrappers.go | 562 +++++++++++++++++------------------
 1 file changed, 281 insertions(+), 281 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 9dd487e73b..bb934ca050 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -3890,72 +3890,100 @@ func IsBoostedTreesEnsembleInitialized(scope *Scope, tree_ensemble_handle tf.Out
 	return op.Output(0)
 }
 
-// Computes the sum along sparse segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
-//
-// For example:
-//
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-//
-// # Select two rows, one segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
-// # => [[0 0 0 0]]
-//
-// # Select two rows, two segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
-// # => [[ 1  2  3  4]
-// #     [-1 -2 -3 -4]]
-//
-// # Select all rows, two segments.
-// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
-// # => [[0 0 0 0]
-// #     [5 6 7 8]]
-//
-// # Which is equivalent to:
-// tf.segment_sum(c, tf.constant([0, 0, 1]))
-// ```
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+// BoostedTreesEnsembleResourceHandleOpAttr is an optional argument to BoostedTreesEnsembleResourceHandleOp.
+type BoostedTreesEnsembleResourceHandleOpAttr func(optionalAttr)
+
+// BoostedTreesEnsembleResourceHandleOpContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func BoostedTreesEnsembleResourceHandleOpContainer(value string) BoostedTreesEnsembleResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// BoostedTreesEnsembleResourceHandleOpSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func BoostedTreesEnsembleResourceHandleOpSharedName(value string) BoostedTreesEnsembleResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a handle to a BoostedTreesEnsembleResource
+func BoostedTreesEnsembleResourceHandleOp(scope *Scope, optional ...BoostedTreesEnsembleResourceHandleOpAttr) (resource tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SparseSegmentSum",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
+		Type: "BoostedTreesEnsembleResourceHandleOp",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes hyperbolic sine of x element-wise.
-func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
+// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits.
+type ComputeAccidentalHitsAttr func(optionalAttr)
+
+// ComputeAccidentalHitsSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Computes the ids of the positions in sampled_candidates that match true_labels.
+//
+// When doing log-odds NCE, the result of this op should be passed through a
+// SparseToDense op, then added to the logits of the sampled candidates. This has
+// the effect of 'removing' the sampled labels that match the true labels by
+// making the classifier sure that they are sampled labels.
+//
+// Arguments:
+//	true_classes: The true_classes output of UnpackSparseLabels.
+//	sampled_candidates: The sampled_candidates output of CandidateSampler.
+//	num_true: Number of true labels per context.
+//
+// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label
+// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element
+// is -FLOAT_MAX.
+func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_true": num_true}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Sinh",
+		Type: "ComputeAccidentalHits",
 		Input: []tf.Input{
-			x,
+			true_classes, sampled_candidates,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
 // Computes the minimum along segments of a tensor.
@@ -5142,6 +5170,74 @@ func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf
 	return op.Output(0)
 }
 
+// Computes hyperbolic sine of x element-wise.
+func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sinh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// # Select two rows, one segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+// # => [[0 0 0 0]]
+//
+// # Select two rows, two segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+// # => [[ 1  2  3  4]
+// #     [-1 -2 -3 -4]]
+//
+// # Select all rows, two segments.
+// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+// # => [[0 0 0 0]
+// #     [5 6 7 8]]
+//
+// # Which is equivalent to:
+// tf.segment_sum(c, tf.constant([0, 0, 1]))
+// ```
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSum",
+		Input: []tf.Input{
+			data, indices, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes natural logarithm of (1 + x) element-wise.
 //
 // I.e., \\(y = \log_e (1 + x)\\).
@@ -9491,56 +9587,62 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf
 	return op.Output(0)
 }
 
-// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl.
-type ResourceApplyFtrlAttr func(optionalAttr)
+// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D.
+type FusedResizeAndPadConv2DAttr func(optionalAttr)
 
-// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value.
+// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
 // If not specified, defaults to false
-func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr {
+func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["resize_align_corners"] = value
 	}
 }
 
-// Update '*var' according to the Ftrl-proximal scheme.
+// Performs a resize and padding as a preprocess during a convolution.
 //
-// accum_new = accum + grad * grad
-// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
+// It's often possible to do spatial transformations more efficiently as part of
+// the packing stage of a convolution, so this op allows for an optimized
+// implementation where these stages are fused together. This prevents the need to
+// write out the intermediate results as whole tensors, reducing memory pressure,
+// and we can get some latency gains by merging the transformation calculations.
+// The data_format attribute for Conv2D isn't supported by this op, and defaults to
+// 'NHWC' order.
+// Internally this op uses a single per-graph scratch buffer, which means that it
+// will block if multiple versions are being run in parallel. This is because this
+// operator is primarily an optimization to minimize memory usage.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
-//	l2: L2 regulariation. Must be a scalar.
-//	lr_power: Scaling factor. Must be a scalar.
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
 //
-// Returns the created operation.
-func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) {
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`. Must be in the same order as the dimension specified with format.
+//	padding: The type of padding algorithm to use.
+func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrl",
+		Type: "FusedResizeAndPadConv2D",
 		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, lr_power,
+			input, size, paddings, filter,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
 // RandomUniformAttr is an optional argument to RandomUniform.
@@ -9582,19 +9684,71 @@ func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ..
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomUniform",
+		Input: []tf.Input{
+			shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl.
+type ResourceApplyFtrlAttr func(optionalAttr)
+
+// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// accum_new = accum + grad * grad
+// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 regulariation. Must be a scalar.
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomUniform",
+		Type: "ResourceApplyFtrl",
 		Input: []tf.Input{
-			shape,
+			var_, accum, linear, grad, lr, l1, l2, lr_power,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
 // Encode audio data using the WAV file format.
@@ -11664,6 +11818,51 @@ func Conj(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
+// ProdAttr is an optional argument to Prod.
+type ProdAttr func(optionalAttr)
+
+// ProdKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func ProdKeepDims(value bool) ProdAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the product of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Prod",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // ResizeBilinearAttr is an optional argument to ResizeBilinear.
 type ResizeBilinearAttr func(optionalAttr)
 
@@ -12024,65 +12223,6 @@ func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr)
 	return scope.AddOperation(opspec)
 }
 
-// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits.
-type ComputeAccidentalHitsAttr func(optionalAttr)
-
-// ComputeAccidentalHitsSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Computes the ids of the positions in sampled_candidates that match true_labels.
-//
-// When doing log-odds NCE, the result of this op should be passed through a
-// SparseToDense op, then added to the logits of the sampled candidates. This has
-// the effect of 'removing' the sampled labels that match the true labels by
-// making the classifier sure that they are sampled labels.
-//
-// Arguments:
-//	true_classes: The true_classes output of UnpackSparseLabels.
-//	sampled_candidates: The sampled_candidates output of CandidateSampler.
-//	num_true: Number of true labels per context.
-//
-// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label
-// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element
-// is -FLOAT_MAX.
-func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ComputeAccidentalHits",
-		Input: []tf.Input{
-			true_classes, sampled_candidates,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // QuantizedRelu6Attr is an optional argument to QuantizedRelu6.
 type QuantizedRelu6Attr func(optionalAttr)
 
@@ -16044,109 +16184,6 @@ func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// ProdAttr is an optional argument to Prod.
-type ProdAttr func(optionalAttr)
-
-// ProdKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func ProdKeepDims(value bool) ProdAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the product of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Prod",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D.
-type FusedResizeAndPadConv2DAttr func(optionalAttr)
-
-// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr {
-	return func(m optionalAttr) {
-		m["resize_align_corners"] = value
-	}
-}
-
-// Performs a resize and padding as a preprocess during a convolution.
-//
-// It's often possible to do spatial transformations more efficiently as part of
-// the packing stage of a convolution, so this op allows for an optimized
-// implementation where these stages are fused together. This prevents the need to
-// write out the intermediate results as whole tensors, reducing memory pressure,
-// and we can get some latency gains by merging the transformation calculations.
-// The data_format attribute for Conv2D isn't supported by this op, and defaults to
-// 'NHWC' order.
-// Internally this op uses a single per-graph scratch buffer, which means that it
-// will block if multiple versions are being run in parallel. This is because this
-// operator is primarily an optimization to minimize memory usage.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`. Must be in the same order as the dimension specified with format.
-//	padding: The type of padding algorithm to use.
-func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedResizeAndPadConv2D",
-		Input: []tf.Input{
-			input, size, paddings, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns a list of tensors with the same shapes and contents as the input
 //
 // tensors.
@@ -17616,43 +17653,6 @@ func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_
 	return op.Output(0)
 }
 
-// BoostedTreesEnsembleResourceHandleOpAttr is an optional argument to BoostedTreesEnsembleResourceHandleOp.
-type BoostedTreesEnsembleResourceHandleOpAttr func(optionalAttr)
-
-// BoostedTreesEnsembleResourceHandleOpContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func BoostedTreesEnsembleResourceHandleOpContainer(value string) BoostedTreesEnsembleResourceHandleOpAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// BoostedTreesEnsembleResourceHandleOpSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func BoostedTreesEnsembleResourceHandleOpSharedName(value string) BoostedTreesEnsembleResourceHandleOpAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a handle to a BoostedTreesEnsembleResource
-func BoostedTreesEnsembleResourceHandleOp(scope *Scope, optional ...BoostedTreesEnsembleResourceHandleOpAttr) (resource tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesEnsembleResourceHandleOp",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
 type ResourceApplyMomentumAttr func(optionalAttr)
 
-- 
GitLab


From c551a7dbd08685160c233ccecd444f774666f98e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 14:25:34 -0700
Subject: [PATCH 0753/1357] Kernel signature reworking, update kernel
 DepthConcatenation.

PiperOrigin-RevId: 214668695
---
 .../internal/reference/reference_ops.h        | 27 +++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index cd9e1b255d..f3f1595035 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -1991,12 +1991,35 @@ void PackWithScaling(const PackParams& params,
   }
 }
 
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams& params,
+                        const RuntimeShape* const* input_shapes,
+                        const Scalar* const* input_data,
+                        const RuntimeShape& output_shape, Scalar* output_data) {
+  auto params_copy = params;
+  params_copy.axis = 3;
+  Concatenation(params_copy, input_shapes, input_data, output_shape,
+                output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 template <FusedActivationFunctionType Ac, typename Scalar>
 void DepthConcatenation(const Scalar* const* input_data,
                         const Dims<4>* const* input_dims, int inputs_count,
                         Scalar* output_data, const Dims<4>& output_dims) {
-  Concatenation<Ac, Scalar>(0, input_data, input_dims, inputs_count,
-                            output_data, output_dims);
+  // For now we don't have a model with a Concatenation with fused activation.
+  TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone);
+  std::vector<RuntimeShape> input_shapes(inputs_count);
+  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
+  for (int i = 0; i < inputs_count; ++i) {
+    ShapeFromDims(*input_dims[i], &input_shapes[i]);
+    input_shapes_indirect[i] = &input_shapes[i];
+  }
+  tflite::ConcatenationParams op_params;
+  op_params.inputs_count = inputs_count;
+
+  DepthConcatenation(op_params, input_shapes_indirect.data(), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 inline void LstmCell(
-- 
GitLab


From 2116c6649cfe339ce8a3859eb425806db8ae32b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 14:27:25 -0700
Subject: [PATCH 0754/1357] Misc. micro-optimizations in Grappler optimizers.
 Make shape inference lazy in optimizers that may not trigger.

PiperOrigin-RevId: 214669034
---
 .../direct_session_with_tracking_alloc_test.cc       |  8 ++++----
 tensorflow/core/grappler/graph_view.cc               |  2 +-
 .../core/grappler/optimizers/arithmetic_optimizer.cc | 12 +++++++-----
 tensorflow/core/grappler/optimizers/remapper.cc      |  8 +++++++-
 .../core/grappler/optimizers/shape_optimizer.cc      | 12 +++++++++++-
 5 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
index efd6185f8b..2c63b8704e 100644
--- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
@@ -117,15 +117,15 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) {
           // which increments the value of AllocationId.
           // Thus AllocationId becomes more than TF if MKL
           // is used. Now IDs for MKL are 8 more than TF.
-          EXPECT_EQ(29, cm->AllocationId(node, 0));
-#else
           EXPECT_EQ(21, cm->AllocationId(node, 0));
+#else
+          EXPECT_EQ(13, cm->AllocationId(node, 0));
 #endif  // INTEL_MKL && ENABLE_MKL
         } else {
 #if defined(INTEL_MKL) && defined(ENABLE_MKL)
-          EXPECT_EQ(30, cm->AllocationId(node, 0));
-#else
           EXPECT_EQ(22, cm->AllocationId(node, 0));
+#else
+          EXPECT_EQ(14, cm->AllocationId(node, 0));
 #endif  // INTEL_MKL && ENABLE_MKL
         }
       }
diff --git a/tensorflow/core/grappler/graph_view.cc b/tensorflow/core/grappler/graph_view.cc
index 2619a9a8f3..0b8cb5e919 100644
--- a/tensorflow/core/grappler/graph_view.cc
+++ b/tensorflow/core/grappler/graph_view.cc
@@ -72,7 +72,7 @@ void GraphView::AddUniqueNodeOrDie(NodeDef* node) {
 void GraphView::AddFanouts(NodeDef* node) {
   for (int i = 0; i < node->input_size(); ++i) {
     OutputPort fanin;
-    string fanin_name = ParseNodeName(node->input(i), &fanin.port_id);
+    const string fanin_name = ParseNodeName(node->input(i), &fanin.port_id);
     fanin.node = nodes_[fanin_name];
 
     InputPort input;
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 75ed12635e..3388ee8035 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -276,7 +276,7 @@ class ArithmeticOptimizerStage : public GraphOptimizerStage<string> {
     for (const NodeDef* output : ctx().node_map->GetOutputs(node.name())) {
       for (int i = 0; i < output->input_size(); ++i) {
         auto input = output->input(i);
-        string name = ParseNodeName(input, &position);
+        StringPiece name = ParseNodeNameAsStringPiece(input, &position);
         if (name == node.name() && /*control input*/ position < 0) {
           return true;
         }
@@ -1568,7 +1568,8 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
       for (NodeDef* output : outputs) {
         if (IsControlInput(output->input(0))) continue;
         int port;
-        const string node_name = ParseNodeName(output->input(0), &port);
+        const StringPiece node_name =
+            ParseNodeNameAsStringPiece(output->input(0), &port);
         if (node_name == node.name()) {
           tails->insert(ChainLink(output, port));
         } else {
@@ -1618,7 +1619,8 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
       } else {
         for (NodeDef* new_tail : ctx().node_map->GetOutputs(tail->name())) {
           int port;
-          const string node_name = ParseNodeName(new_tail->input(0), &port);
+          const StringPiece node_name =
+              ParseNodeNameAsStringPiece(new_tail->input(0), &port);
           if (node_name != tail->name()) {
             return Status::OK();
           }
@@ -2929,8 +2931,8 @@ uint64 UniqueNodes::ComputeSignature(const NodeDef& node) const {
 
   for (const auto& input : node.input()) {
     int pos;
-    string node_name = ParseNodeName(input, &pos);
-    h = Hash64CombineUnordered(Hash64(node_name), h);
+    const StringPiece node_name = ParseNodeNameAsStringPiece(input, &pos);
+    h = Hash64CombineUnordered(Hash64(node_name.data(), node_name.size()), h);
     h = Hash64CombineUnordered(std::hash<int>()(pos), h);
   }
   for (const auto& attr : node.attr()) {
diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc
index 008a289cfd..9ada8b7ff9 100644
--- a/tensorflow/core/grappler/optimizers/remapper.cc
+++ b/tensorflow/core/grappler/optimizers/remapper.cc
@@ -168,11 +168,12 @@ void AddBatchNormNodes(GraphDef* optimized_graph, const NodeDef& fused_node) {
 Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item,
                           GraphDef* optimized_graph) {
   GraphProperties properties(item);
-  TF_RETURN_IF_ERROR(properties.InferStatically(false));
+  bool inferred_properties = false;
   GraphView graph(const_cast<GraphDef*>(&item.graph));
 
   // During inference, most of the inputs to FusedBatchNorm are constant, and we
   // can therefore replace the op with a much cheaper set of primitives.
+  optimized_graph->mutable_node()->Reserve(item.graph.node_size());
   for (const NodeDef& node : item.graph.node()) {
     if (node.op() == "FusedBatchNorm" || node.op() == "FusedBatchNormV2") {
       bool optimizable = (node.attr().count("T") == 0 ||
@@ -181,6 +182,11 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item,
                       !node.attr().at("is_training").b());
       if (optimizable) {
         int const_inputs = 0;
+        if (!inferred_properties) {
+          // Infer properties lazily in case they are not needed.
+          TF_RETURN_IF_ERROR(properties.InferStatically(false));
+          inferred_properties = true;
+        }
         const auto& props = properties.GetInputProperties(node.name());
         for (const auto& prop : props) {
           if (prop.has_value()) {
diff --git a/tensorflow/core/grappler/optimizers/shape_optimizer.cc b/tensorflow/core/grappler/optimizers/shape_optimizer.cc
index 4542d17ccc..6ccb1cd783 100644
--- a/tensorflow/core/grappler/optimizers/shape_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/shape_optimizer.cc
@@ -33,7 +33,7 @@ Status ShapeOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   *optimized_graph = item.graph;
 
   GraphProperties properties(item);
-  TF_RETURN_IF_ERROR(properties.InferStatically(false));
+  bool inferred_properties = false;
   GraphView graph(optimized_graph);
 
   // The product of all the dimensions in a tensor shape can be expressed more
@@ -55,6 +55,11 @@ Status ShapeOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       }
       const GraphView::OutputPort reduce_indices =
           graph.GetRegularFanin(GraphView::InputPort(fanout.node, 1));
+      if (!inferred_properties) {
+        // Infer properties lazily in case they are not needed.
+        TF_RETURN_IF_ERROR(properties.InferStatically(false));
+        inferred_properties = true;
+      }
       const auto& prop =
           properties.GetOutputProperties(reduce_indices.node->name());
       if (prop.size() < reduce_indices.port_id) {
@@ -92,6 +97,11 @@ Status ShapeOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       if (!IsSize(*input1.node) || !IsSize(*input2.node)) {
         continue;
       }
+      if (!inferred_properties) {
+        // Infer properties lazily in case they are not needed.
+        TF_RETURN_IF_ERROR(properties.InferStatically(false));
+        inferred_properties = true;
+      }
       const auto& prop1 = properties.GetInputProperties(input1.node->name());
       const auto& prop2 = properties.GetInputProperties(input2.node->name());
       if (prop1.size() != 1 || prop2.size() != 1) {
-- 
GitLab


From a1801ecdbb75b4583d757204611afd9af28b4a49 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Wed, 26 Sep 2018 14:36:59 -0700
Subject: [PATCH 0755/1357] Add experimental asynchronous checkpoint hook.

This triggers checkpoints in a separate thread while allowing training to
continue.  This can effectively parallelize checkpointing and training for
workloads like TPUEstimator, where the weights are only updated after a number
of device iterations.

PiperOrigin-RevId: 214670991
---
 tensorflow/contrib/tpu/BUILD                  |  22 ++
 tensorflow/contrib/tpu/__init__.py            |   1 +
 .../tpu/python/tpu/async_checkpoint.py        | 202 ++++++++++++++++++
 3 files changed, 225 insertions(+)
 create mode 100644 tensorflow/contrib/tpu/python/tpu/async_checkpoint.py

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 4e0b61227e..8355c92a4d 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -35,6 +35,27 @@ cc_library(
     ],
 )
 
+py_library(
+    name = "async_checkpoint",
+    srcs = ["python/tpu/async_checkpoint.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:summary",
+        "//tensorflow/python:summary_ops_v2",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/estimator:estimator_py",
+    ],
+)
+
 py_library(
     name = "tpu_estimator",
     srcs = [
@@ -46,6 +67,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":async_checkpoint",
         ":tpu_lib",
         "//tensorflow/compiler/xla/experimental/xla_sharding",
         "//tensorflow/compiler/xla/python_api:xla_shape",
diff --git a/tensorflow/contrib/tpu/__init__.py b/tensorflow/contrib/tpu/__init__.py
index 3c0456dc2f..766466968a 100644
--- a/tensorflow/contrib/tpu/__init__.py
+++ b/tensorflow/contrib/tpu/__init__.py
@@ -55,6 +55,7 @@
 
 @@TPUDistributionStrategy
 @@keras_to_tpu_model
+@@AsyncCheckpointSaverHook
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
new file mode 100644
index 0000000000..e06a720e82
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
@@ -0,0 +1,202 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+
+"""Hook for asynchronous checkpointing.
+
+This hook dispatches checkpoint writing operations in a separate thread to
+allow execution to continue on the main thread.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+import time
+
+from tensorflow.core.util.event_pb2 import SessionLog
+
+from tensorflow.python.framework import meta_graph
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training_util
+from tensorflow.python.training.session_run_hook import SessionRunArgs
+from tensorflow.python.training.summary_io import SummaryWriterCache
+
+
+class AsyncCheckpointSaverHook(session_run_hook.SessionRunHook):
+  """Saves checkpoints every N steps or seconds."""
+
+  def __init__(self,
+               checkpoint_dir,
+               save_secs=None,
+               save_steps=None,
+               saver=None,
+               checkpoint_basename="model.ckpt",
+               scaffold=None,
+               listeners=None):
+    """Initializes a `CheckpointSaverHook`.
+
+    Args:
+      checkpoint_dir: `str`, base directory for the checkpoint files.
+      save_secs: `int`, save every N secs.
+      save_steps: `int`, save every N steps.
+      saver: `Saver` object, used for saving.
+      checkpoint_basename: `str`, base name for the checkpoint files.
+      scaffold: `Scaffold`, use to get saver object.
+      listeners: List of `CheckpointSaverListener` subclass instances. Used for
+        callbacks that run immediately before or after this hook saves the
+        checkpoint.
+
+    Raises:
+      ValueError: One of `save_steps` or `save_secs` should be set.
+      ValueError: At most one of `saver` or `scaffold` should be set.
+    """
+    logging.info("Create CheckpointSaverHook.")
+    if saver is not None and scaffold is not None:
+      raise ValueError("You cannot provide both saver and scaffold.")
+    self._saver = saver
+    self._save_thread = None
+    self._checkpoint_dir = checkpoint_dir
+    self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
+    self._scaffold = scaffold
+    self._timer = basic_session_run_hooks.SecondOrStepTimer(
+        every_secs=save_secs, every_steps=save_steps)
+    self._listeners = listeners or []
+    self._steps_per_run = 1
+    self._summary_writer = None
+    self._global_step_tensor = None
+
+  def _set_steps_per_run(self, steps_per_run):
+    self._steps_per_run = steps_per_run
+
+  def begin(self):
+    self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir)
+    self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
+    if self._global_step_tensor is None:
+      raise RuntimeError(
+          "Global step should be created to use CheckpointSaverHook.")
+    for l in self._listeners:
+      l.begin()
+
+  def after_create_session(self, session, coord):
+    global_step = session.run(self._global_step_tensor)
+
+    # We do write graph and saver_def at the first call of before_run.
+    # We cannot do this in begin, since we let other hooks to change graph and
+    # add variables in begin. Graph is finalized after all begin calls.
+    training_util.write_graph(
+        ops.get_default_graph().as_graph_def(add_shapes=True),
+        self._checkpoint_dir, "graph.pbtxt")
+    saver_def = self._get_saver().saver_def if self._get_saver() else None
+    graph = ops.get_default_graph()
+    meta_graph_def = meta_graph.create_meta_graph_def(
+        graph_def=graph.as_graph_def(add_shapes=True), saver_def=saver_def)
+    self._summary_writer.add_graph(graph)
+    self._summary_writer.add_meta_graph(meta_graph_def)
+    # The checkpoint saved here is the state at step "global_step".
+    self._save(session, global_step)
+    self._timer.update_last_triggered_step(global_step)
+
+  def before_run(self, run_context):  # pylint: disable=unused-argument
+    return SessionRunArgs(self._global_step_tensor)
+
+  def after_run(self, run_context, run_values):
+    stale_global_step = run_values.results
+    if self._timer.should_trigger_for_step(stale_global_step +
+                                           self._steps_per_run):
+      # get the real value after train op.
+      global_step = run_context.session.run(self._global_step_tensor)
+      if self._timer.should_trigger_for_step(global_step):
+        self._timer.update_last_triggered_step(global_step)
+        if self._save(run_context.session, global_step):
+          run_context.request_stop()
+
+  def end(self, session):
+    if self._save_thread:
+      logging.info("Waiting for any pending checkpoints to finish.")
+      self._save_thread.join()
+
+    last_step = session.run(self._global_step_tensor)
+
+    # Save the last checkpoint synchronously if needed.
+    if last_step != self._timer.last_triggered_step():
+      self._save(session, last_step, asynchronous=False)
+
+    for l in self._listeners:
+      l.end(session, last_step)
+
+  def _save(self, session, step, asynchronous=True):
+    """Saves the latest checkpoint, returns should_stop."""
+
+    def _save_fn():
+      """Run the saver process."""
+      logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
+
+      start_time = time.time()
+      for l in self._listeners:
+        l.before_save(session, step)
+
+      self._get_saver().save(session, self._save_path, global_step=step)
+      self._summary_writer.add_session_log(
+          SessionLog(
+              status=SessionLog.CHECKPOINT, checkpoint_path=self._save_path),
+          step)
+      end_time = time.time()
+      logging.info("Checkpoint actual writing time: (%.3f sec)",
+                   end_time - start_time)
+      logging.info("Checkpoint finished for %d into %s.", step, self._save_path)
+
+    logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
+    for l in self._listeners:
+      l.before_save(session, step)
+
+    if not asynchronous:
+      _save_fn()
+      return
+
+    if self._save_thread is not None:
+      self._save_thread.join(timeout=0.1)
+      if self._save_thread.is_alive():
+        logging.info("Saver thread still in progress, skipping checkpoint.")
+        return
+
+    self._save_thread = threading.Thread(target=_save_fn)
+    self._save_thread.start()
+
+  def _get_saver(self):
+    if self._saver is not None:
+      return self._saver
+    elif self._scaffold is not None:
+      return self._scaffold.saver
+
+    # Get saver from the SAVERS collection if present.
+    collection_key = ops.GraphKeys.SAVERS
+    savers = ops.get_collection(collection_key)
+    if not savers:
+      raise RuntimeError(
+          "No items in collection {}. Please add a saver to the collection "
+          "or provide a saver or scaffold.".format(collection_key))
+    elif len(savers) > 1:
+      raise RuntimeError(
+          "More than one item in collection {}. "
+          "Please indicate which one to use by passing it to the constructor."
+          .format(collection_key))
+
+    self._saver = savers[0]
+    return savers[0]
-- 
GitLab


From 83df298d2ed014d263570c7283322c982be556cc Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 26 Sep 2018 14:38:52 -0700
Subject: [PATCH 0756/1357] [XLA] Remove use of DeconstructTuple from
 MakeFakeArgumentsOrDie.

DeconstructTuple doesn't support nested tuples yet, so MakeFakeArgumentsOrDie failed if any of the arguments were tuple-shaped. But we don't really need it here anyway, just build the arguments one-by-one.

PiperOrigin-RevId: 214671374
---
 tensorflow/compiler/xla/client/lib/testing.cc | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc
index 25cc37edc4..ff0ec76a7f 100644
--- a/tensorflow/compiler/xla/client/lib/testing.cc
+++ b/tensorflow/compiler/xla/client/lib/testing.cc
@@ -97,13 +97,11 @@ std::vector<std::unique_ptr<GlobalData>> MakeFakeArgumentsOrDie(
       << "Computation should have progran shape.";
   auto program_shape = computation.proto().program_shape();
 
-  // Create and run a program which produces a tuple with one element per
-  // parameter, then return the tuple's constituent buffers.
-  std::vector<Shape> param_shapes(program_shape.parameters().begin(),
-                                  program_shape.parameters().end());
-  auto fake_input_tuple =
-      MakeFakeDataOrDie(ShapeUtil::MakeTupleShape(param_shapes), client);
-  return client->DeconstructTuple(*fake_input_tuple).ValueOrDie();
+  std::vector<std::unique_ptr<GlobalData>> results;
+  for (const Shape& shape : program_shape.parameters()) {
+    results.push_back(MakeFakeDataOrDie(shape, client));
+  }
+  return results;
 }
 
 }  // namespace xla
-- 
GitLab


From 82af048bc8c3c044c98a27b1c4c27bb62d4e4a14 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Wed, 26 Sep 2018 14:57:43 -0700
Subject: [PATCH 0757/1357] Rename TFLite Eager delegate -> Flex delegate

PiperOrigin-RevId: 214674717
---
 .../lite/delegates/{eager => flex}/BUILD      |  0
 .../delegates/{eager => flex}/buffer_map.cc   |  8 ++--
 .../delegates/{eager => flex}/buffer_map.h    | 12 ++---
 .../{eager => flex}/buffer_map_test.cc        |  6 +--
 .../delegates/{eager => flex}/delegate.cc     | 34 +++++++-------
 .../lite/delegates/{eager => flex}/delegate.h | 26 +++++-----
 .../{eager => flex}/delegate_data.cc          |  6 +--
 .../delegates/{eager => flex}/delegate_data.h | 16 +++----
 .../{eager => flex}/delegate_data_test.cc     |  6 +--
 .../{eager => flex}/delegate_test.cc          | 14 +++---
 .../lite/delegates/{eager => flex}/kernel.cc  | 30 ++++++------
 .../lite/delegates/{eager => flex}/kernel.h   | 12 ++---
 .../delegates/{eager => flex}/kernel_test.cc  | 16 +++----
 .../delegates/{eager => flex}/test_util.cc    | 47 +++++++++----------
 .../delegates/{eager => flex}/test_util.h     | 20 ++++----
 .../lite/delegates/{eager => flex}/util.cc    |  6 +--
 .../lite/delegates/{eager => flex}/util.h     | 10 ++--
 .../delegates/{eager => flex}/util_test.cc    |  6 +--
 tensorflow/contrib/lite/kernels/register.cc   |  8 ++--
 tensorflow/contrib/lite/model.cc              |  4 +-
 tensorflow/contrib/lite/python/convert.py     |  6 +--
 tensorflow/contrib/lite/python/lite_test.py   |  2 +-
 tensorflow/contrib/lite/testing/BUILD         |  2 +-
 .../contrib/lite/testing/generate_examples.py |  2 +-
 .../contrib/lite/testing/tflite_diff_flags.h  |  4 +-
 .../contrib/lite/testing/tflite_diff_util.h   |  2 +-
 .../contrib/lite/testing/tflite_driver.cc     |  6 +--
 .../contrib/lite/testing/tflite_driver.h      |  4 +-
 tensorflow/contrib/lite/toco/args.h           |  4 +-
 .../contrib/lite/toco/import_tensorflow.cc    |  4 +-
 .../contrib/lite/toco/import_tensorflow.h     |  2 +-
 tensorflow/contrib/lite/toco/tflite/export.cc | 20 ++++----
 tensorflow/contrib/lite/toco/tflite/export.h  |  4 +-
 .../contrib/lite/toco/tflite/export_test.cc   |  2 +-
 .../contrib/lite/toco/tflite/operator.cc      | 26 +++++-----
 .../contrib/lite/toco/tflite/operator.h       |  6 +--
 .../contrib/lite/toco/toco_cmdline_flags.cc   | 24 +++++-----
 tensorflow/contrib/lite/toco/toco_flags.proto | 16 +++----
 tensorflow/contrib/lite/toco/toco_tooling.cc  |  8 ++--
 tensorflow/contrib/lite/tools/benchmark/BUILD |  8 ++--
 .../tools/benchmark/benchmark_tflite_model.cc |  6 +--
 .../tools/benchmark/benchmark_tflite_model.h  |  4 +-
 tensorflow/contrib/lite/util.cc               |  6 +--
 tensorflow/contrib/lite/util.h                |  8 ++--
 tensorflow/contrib/lite/util_test.cc          | 16 +++----
 45 files changed, 239 insertions(+), 240 deletions(-)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/BUILD (100%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map.cc (95%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map.h (86%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map_test.cc (98%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate.cc (76%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate.h (64%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data.cc (94%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data.h (78%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data_test.cc (93%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_test.cc (95%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel.cc (91%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel.h (79%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel_test.cc (94%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/test_util.cc (76%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/test_util.h (90%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/util.cc (96%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/util.h (89%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/util_test.cc (97%)

diff --git a/tensorflow/contrib/lite/delegates/eager/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
similarity index 100%
rename from tensorflow/contrib/lite/delegates/eager/BUILD
rename to tensorflow/contrib/lite/delegates/flex/BUILD
diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc
similarity index 95%
rename from tensorflow/contrib/lite/delegates/eager/buffer_map.cc
rename to tensorflow/contrib/lite/delegates/flex/buffer_map.cc
index e5a19c3997..63e39196d9 100644
--- a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc
+++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 
 #include "tensorflow/c/c_api_internal.h"
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/log_memory.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 // A tensor buffer that is allocated, deallocated and populated by TF Lite.
 class TfLiteTensorBuffer : public tensorflow::TensorBuffer {
@@ -107,5 +107,5 @@ void BufferMap::SetFromTensorFlow(int tensor_index, tensorflow::Tensor tensor) {
   id_to_tensor_[tensor_index] = std::move(tensor);
 }
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.h b/tensorflow/contrib/lite/delegates/flex/buffer_map.h
similarity index 86%
rename from tensorflow/contrib/lite/delegates/eager/buffer_map.h
rename to tensorflow/contrib/lite/delegates/flex/buffer_map.h
index aaaa045840..4ce886568a 100644
--- a/tensorflow/contrib/lite/delegates/eager/buffer_map.h
+++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
 
 #include <map>
 
@@ -21,12 +21,12 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 // Maps a TF Lite tensor index into a TensorFlow tensor.
 //
 // The TF Lite interpreter assigns integer indices to each of its tensors, but
-// the Eager delegate deals in terms of TensorFlow tensors. This class maps
+// the Flex delegate deals in terms of TensorFlow tensors. This class maps
 // from indices to tensors and allows the creation of new tensors to be
 // associated with a given index.
 class BufferMap {
@@ -55,7 +55,7 @@ class BufferMap {
   std::map<int, tensorflow::Tensor> id_to_tensor_;
 };
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
similarity index 98%
rename from tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc
rename to tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
index a046943e56..bb80e25e80 100644
--- a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using ::testing::ElementsAre;
@@ -164,7 +164,7 @@ TEST(BufferMapTest, TensorFlowOverwritesTfLite) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc
similarity index 76%
rename from tensorflow/contrib/lite/delegates/eager/delegate.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate.cc
index 45fc158157..ba065a8ff5 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc
@@ -12,19 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 
 #include <vector>
 
 #include "tensorflow/contrib/lite/context_util.h"
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
-#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 #include "tensorflow/contrib/lite/util.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace delegate {
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
@@ -32,7 +32,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
   TfLiteIntArray* plan;
   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
 
-  // Add all custom ops starting with "Eager" to list of supported nodes.
+  // Add all custom ops starting with "Flex" to list of supported nodes.
   std::vector<int> supported_nodes;
   for (int node_index : TfLiteIntArrayView(plan)) {
     TfLiteNode* node;
@@ -40,7 +40,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
         context, node_index, &node, &registration));
 
-    if (IsEagerOp(registration->custom_name)) {
+    if (IsFlexOp(registration->custom_name)) {
       supported_nodes.push_back(node_index);
     }
   }
@@ -81,28 +81,28 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
 }
 
 }  // namespace delegate
-}  // namespace eager
+}  // namespace flex
 
-std::unique_ptr<EagerDelegate> EagerDelegate::Create() {
-  std::unique_ptr<eager::DelegateData> delegate_data;
-  if (!eager::DelegateData::Create(&delegate_data).ok()) {
+std::unique_ptr<FlexDelegate> FlexDelegate::Create() {
+  std::unique_ptr<flex::DelegateData> delegate_data;
+  if (!flex::DelegateData::Create(&delegate_data).ok()) {
     fprintf(stderr, "Unable to initialize TensorFlow context.\n");
     return nullptr;
   }
 
-  return std::unique_ptr<EagerDelegate>(
-      new EagerDelegate(std::move(delegate_data)));
+  return std::unique_ptr<FlexDelegate>(
+      new FlexDelegate(std::move(delegate_data)));
 }
 
-EagerDelegate::EagerDelegate(std::unique_ptr<eager::DelegateData> delegate_data)
+FlexDelegate::FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data)
     : TfLiteDelegate{
           /*data_=*/delegate_data.get(),
-          /*nullptr,*/ &eager::delegate::Prepare,
-          /*CopyFromBufferHandle=*/&eager::delegate::CopyFromBufferHandle,
+          /*nullptr,*/ &flex::delegate::Prepare,
+          /*CopyFromBufferHandle=*/&flex::delegate::CopyFromBufferHandle,
           /*CopyToBufferHandle=*/nullptr,
           /*FreeBufferHandle=*/nullptr},
       delegate_data_(std::move(delegate_data)) {}
 
-EagerDelegate::~EagerDelegate() {}
+FlexDelegate::~FlexDelegate() {}
 
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.h b/tensorflow/contrib/lite/delegates/flex/delegate.h
similarity index 64%
rename from tensorflow/contrib/lite/delegates/eager/delegate.h
rename to tensorflow/contrib/lite/delegates/flex/delegate.h
index 70f3c15af4..1017780dc7 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate.h
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.h
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
 
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 
 namespace tflite {
 
@@ -24,12 +24,12 @@ namespace tflite {
 // Delegate that can be used to extract parts of a graph that are designed to be
 // executed by TensorFlow's runtime via Eager.
 //
-// The interpreter must be constructed after the EagerDelegate and destructed
-// before the EagerDelegate. This delegate may be used with multiple
+// The interpreter must be constructed after the FlexDelegate and destructed
+// before the FlexDelegate. This delegate may be used with multiple
 // interpreters, but it is *not* thread-safe.
 //
 // Usage:
-//   auto delegate = EagerDelegate::Create();
+//   auto delegate = FlexDelegate::Create();
 //   ... build interpreter ...
 //
 //   if (delegate) {
@@ -39,21 +39,21 @@ namespace tflite {
 //   ... run inference ...
 //   ... destroy interpreter ...
 //   ... destroy delegate ...
-class EagerDelegate : public TfLiteDelegate {
+class FlexDelegate : public TfLiteDelegate {
  public:
   // Creates a delegate that supports TF ops.
   //
-  // If the underyling TF Eager context creation fails, returns null.
-  static std::unique_ptr<EagerDelegate> Create();
+  // If the underyling TF Flex context creation fails, returns null.
+  static std::unique_ptr<FlexDelegate> Create();
 
-  ~EagerDelegate();
+  ~FlexDelegate();
 
  private:
-  explicit EagerDelegate(std::unique_ptr<eager::DelegateData> delegate_data);
+  explicit FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data);
 
-  std::unique_ptr<eager::DelegateData> delegate_data_;
+  std::unique_ptr<flex::DelegateData> delegate_data_;
 };
 
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc
similarity index 94%
rename from tensorflow/contrib/lite/delegates/eager/delegate_data.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate_data.cc
index 0fd5c976f8..8f985f770c 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 tensorflow::Status DelegateData::Create(std::unique_ptr<DelegateData>* data) {
   std::vector<tensorflow::Device*> devices;
 
@@ -43,5 +43,5 @@ DelegateData::DelegateData(tensorflow::EagerContext* eager_context)
 
 DelegateData::~DelegateData() {}
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.h b/tensorflow/contrib/lite/delegates/flex/delegate_data.h
similarity index 78%
rename from tensorflow/contrib/lite/delegates/eager/delegate_data.h
rename to tensorflow/contrib/lite/delegates/flex/delegate_data.h
index 772d26f44e..8d75f0b0ef 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_data.h
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.h
@@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
 
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
-// Data kept by the Eager delegate for the lifetime of an Interpreter.
+// Data kept by the Flex delegate for the lifetime of an Interpreter.
 class DelegateData {
  public:
   // Create a new DelegateData, initialized with a newly-created EagerContext.
@@ -29,7 +29,7 @@ class DelegateData {
 
   ~DelegateData();
 
-  // The EagerContext that is required for execution of Eager Ops.
+  // The EagerContext that is required for execution of Flex Ops.
   tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); }
 
   // Map from TF Lite tensor index to TensorFlow tensor for a given context.
@@ -46,7 +46,7 @@ class DelegateData {
   std::unordered_map<const TfLiteContext*, BufferMap> buffer_map_;
 };
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
similarity index 93%
rename from tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
index def063309f..30b10f435a 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 TEST(DelegateDataTest, Basic) {
@@ -39,7 +39,7 @@ TEST(DelegateDataTest, Basic) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc
similarity index 95%
rename from tensorflow/contrib/lite/delegates/eager/delegate_test.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate_test.cc
index 43ec5d53b8..1813952cef 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
+#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using ::testing::ContainsRegex;
 using ::testing::ElementsAre;
 
-class DelegateTest : public testing::EagerModelTest {
+class DelegateTest : public testing::FlexModelTest {
  public:
   DelegateTest() {
-    delegate_ = EagerDelegate::Create();
+    delegate_ = FlexDelegate::Create();
     interpreter_.reset(new Interpreter(&error_reporter_));
   }
 
@@ -46,7 +46,7 @@ class DelegateTest : public testing::EagerModelTest {
   }
 
  private:
-  std::unique_ptr<EagerDelegate> delegate_;
+  std::unique_ptr<FlexDelegate> delegate_;
 };
 
 TEST_F(DelegateTest, FullGraph) {
@@ -236,7 +236,7 @@ TEST_F(DelegateTest, MultipleInterpretersSameDelegate) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.cc b/tensorflow/contrib/lite/delegates/flex/kernel.cc
similarity index 91%
rename from tensorflow/contrib/lite/delegates/eager/kernel.cc
rename to tensorflow/contrib/lite/delegates/flex/kernel.cc
index 48a2f56baf..e4f1aea990 100644
--- a/tensorflow/contrib/lite/delegates/eager/kernel.cc
+++ b/tensorflow/contrib/lite/delegates/flex/kernel.cc
@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
+#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
 
 #include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/builtin_ops.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/context_util.h"
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
 #include "tensorflow/contrib/lite/string.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
@@ -28,10 +28,10 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 
-// Note: this is part of TF Lite's Eager delegation code which is to be
+// Note: this is part of TF Lite's Flex delegation code which is to be
 // completed soon.
 
-// This is the TF Lite op that is created by the eager delegate to handle
+// This is the TF Lite op that is created by the flex delegate to handle
 // execution of a supported subgraph. The usual flow is that the delegate
 // informs the interpreter of supported nodes in a graph, and each supported
 // subgraph is replaced with one instance of this kernel.
@@ -46,7 +46,7 @@ limitations under the License.
 // corresponding TensorFlow/Eager Op.
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace kernel {
 
 // Controls the lifetime of tensor handles in a vector.
@@ -72,11 +72,11 @@ class VectorOfHandles {
 
 // Executes the TensorFlow op given by 'op_name', with the attributes specified
 // in 'nodedef'. Inputs and outputs are given as indices into the 'buffer_map'.
-tensorflow::Status ExecuteEagerOp(tensorflow::EagerContext* eager_context,
-                                  BufferMap* buffer_map, const string& op_name,
-                                  const tensorflow::NodeDef& nodedef,
-                                  const std::vector<int>& inputs,
-                                  const std::vector<int>& outputs) {
+tensorflow::Status ExecuteFlexOp(tensorflow::EagerContext* eager_context,
+                                 BufferMap* buffer_map, const string& op_name,
+                                 const tensorflow::NodeDef& nodedef,
+                                 const std::vector<int>& inputs,
+                                 const std::vector<int>& outputs) {
   const tensorflow::AttrTypeMap* attr_types;
   TF_RETURN_WITH_CONTEXT_IF_ERROR(
       tensorflow::AttrTypeMapForOp(op_name.c_str(), &attr_types),
@@ -258,13 +258,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Execute the TensorFlow Ops sequentially.
   for (const auto& node_data : op_data->nodes) {
     if (node_data.nodedef.op().empty()) {
-      context->ReportError(context, "Invalid NodeDef in Eager op '%s'",
+      context->ReportError(context, "Invalid NodeDef in Flex op '%s'",
                            node_data.name.c_str());
       return kTfLiteError;
     }
     auto status =
-        ExecuteEagerOp(eager_context, buffer_map, node_data.name,
-                       node_data.nodedef, node_data.inputs, node_data.outputs);
+        ExecuteFlexOp(eager_context, buffer_map, node_data.name,
+                      node_data.nodedef, node_data.inputs, node_data.outputs);
     TF_LITE_ENSURE_OK(context, ConvertStatus(context, status));
   }
 
@@ -295,5 +295,5 @@ TfLiteRegistration GetKernel() {
   return registration;
 }
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.h b/tensorflow/contrib/lite/delegates/flex/kernel.h
similarity index 79%
rename from tensorflow/contrib/lite/delegates/eager/kernel.h
rename to tensorflow/contrib/lite/delegates/flex/kernel.h
index 2478abccaa..ac9313a37b 100644
--- a/tensorflow/contrib/lite/delegates/eager/kernel.h
+++ b/tensorflow/contrib/lite/delegates/flex/kernel.h
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
 
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 // Return the registration object used to initialize and execute ops that will
 // be delegated to TensorFlow's Eager runtime. This TF Lite op is created by
-// the eager delegate to handle execution of a supported subgraph. The usual
+// the flex delegate to handle execution of a supported subgraph. The usual
 // flow is that the delegate informs the interpreter of supported nodes in a
 // graph, and each supported subgraph is replaced with one instance of this
 // kernel.
 TfLiteRegistration GetKernel();
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
similarity index 94%
rename from tensorflow/contrib/lite/delegates/eager/kernel_test.cc
rename to tensorflow/contrib/lite/delegates/flex/kernel_test.cc
index 66f2226626..94a6f8b61a 100644
--- a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
+#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
-#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using ::testing::ContainsRegex;
@@ -31,12 +31,12 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate,
   TfLiteIntArray* size_and_nodes =
       ConvertVectorToTfLiteIntArray(supported_nodes);
   TF_LITE_ENSURE_STATUS(context->ReplaceSubgraphsWithDelegateKernels(
-      context, eager::GetKernel(), size_and_nodes, delegate));
+      context, flex::GetKernel(), size_and_nodes, delegate));
   TfLiteIntArrayFree(size_and_nodes);
   return kTfLiteOk;
 }
 
-class KernelTest : public testing::EagerModelTest {
+class KernelTest : public testing::FlexModelTest {
  public:
   KernelTest() {
     CHECK(DelegateData::Create(&delegate_data_).ok());
@@ -167,7 +167,7 @@ TEST_F(KernelTest, WrongSetOfNodes) {
 
   ASSERT_FALSE(Invoke());
   ASSERT_THAT(error_reporter().error_messages(),
-              ContainsRegex("Invalid NodeDef in Eager op"));
+              ContainsRegex("Invalid NodeDef in Flex op"));
 }
 
 TEST_F(KernelTest, MixedGraph) {
@@ -220,7 +220,7 @@ TEST_F(KernelTest, SplitGraph) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.cc b/tensorflow/contrib/lite/delegates/flex/test_util.cc
similarity index 76%
rename from tensorflow/contrib/lite/delegates/eager/test_util.cc
rename to tensorflow/contrib/lite/delegates/flex/test_util.cc
index d47be761fb..69c336a01a 100644
--- a/tensorflow/contrib/lite/delegates/eager/test_util.cc
+++ b/tensorflow/contrib/lite/delegates/flex/test_util.cc
@@ -13,25 +13,24 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
+#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
 #include "absl/memory/memory.h"
 #include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/string.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace testing {
 
-bool EagerModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; }
+bool FlexModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; }
 
-void EagerModelTest::SetShape(int tensor_index,
-                              const std::vector<int>& values) {
+void FlexModelTest::SetShape(int tensor_index, const std::vector<int>& values) {
   ASSERT_EQ(interpreter_->ResizeInputTensor(tensor_index, values), kTfLiteOk);
   ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
 }
 
-std::vector<int> EagerModelTest::GetShape(int tensor_index) {
+std::vector<int> FlexModelTest::GetShape(int tensor_index) {
   std::vector<int> result;
   auto* dims = interpreter_->tensor(tensor_index)->dims;
   result.reserve(dims->size);
@@ -41,13 +40,13 @@ std::vector<int> EagerModelTest::GetShape(int tensor_index) {
   return result;
 }
 
-TfLiteType EagerModelTest::GetType(int tensor_index) {
+TfLiteType FlexModelTest::GetType(int tensor_index) {
   return interpreter_->tensor(tensor_index)->type;
 }
 
-void EagerModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
-                                const std::vector<int>& outputs,
-                                TfLiteType type, const std::vector<int>& dims) {
+void FlexModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
+                               const std::vector<int>& outputs, TfLiteType type,
+                               const std::vector<int>& dims) {
   interpreter_->AddTensors(num_tensors);
   for (int i = 0; i < num_tensors; ++i) {
     TfLiteQuantizationParams quant;
@@ -66,8 +65,8 @@ void EagerModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
   CHECK_EQ(interpreter_->SetOutputs(outputs), kTfLiteOk);
 }
 
-void EagerModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
-                                    const std::vector<int>& outputs) {
+void FlexModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
+                                   const std::vector<int>& outputs) {
   static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
   reg.builtin_code = BuiltinOperator_MUL;
   reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
@@ -90,8 +89,8 @@ void EagerModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
            kTfLiteOk);
 }
 
-void EagerModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
-                             const std::vector<int>& outputs) {
+void FlexModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
+                            const std::vector<int>& outputs) {
   auto attr = [](const string& key, const string& value) {
     return " attr{ key: '" + key + "' value {" + value + "}}";
   };
@@ -107,28 +106,28 @@ void EagerModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
   if (op == kUnpack) {
     string attributes =
         type_attribute + attr("num", "i: 2") + attr("axis", "i: 0");
-    AddTfOp("EagerUnpack", "Unpack", attributes, inputs, outputs);
+    AddTfOp("FlexUnpack", "Unpack", attributes, inputs, outputs);
   } else if (op == kIdentity) {
     string attributes = type_attribute;
-    AddTfOp("EagerIdentity", "Identity", attributes, inputs, outputs);
+    AddTfOp("FlexIdentity", "Identity", attributes, inputs, outputs);
   } else if (op == kAdd) {
     string attributes = type_attribute;
-    AddTfOp("EagerAdd", "Add", attributes, inputs, outputs);
+    AddTfOp("FlexAdd", "Add", attributes, inputs, outputs);
   } else if (op == kMul) {
     string attributes = type_attribute;
-    AddTfOp("EagerMul", "Mul", attributes, inputs, outputs);
+    AddTfOp("FlexMul", "Mul", attributes, inputs, outputs);
   } else if (op == kNonExistent) {
     AddTfOp("NonExistentOp", "NonExistentOp", "", inputs, outputs);
   } else if (op == kIncompatibleNodeDef) {
     // "Cast" op is created without attributes - making it incompatible.
-    AddTfOp("EagerCast", "Cast", "", inputs, outputs);
+    AddTfOp("FlexCast", "Cast", "", inputs, outputs);
   }
 }
 
-void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
-                             const string& nodedef_str,
-                             const std::vector<int>& inputs,
-                             const std::vector<int>& outputs) {
+void FlexModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
+                            const string& nodedef_str,
+                            const std::vector<int>& inputs,
+                            const std::vector<int>& outputs) {
   static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
   reg.builtin_code = BuiltinOperator_CUSTOM;
   reg.custom_name = tflite_name;
@@ -154,5 +153,5 @@ void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
 }
 
 }  // namespace testing
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.h b/tensorflow/contrib/lite/delegates/flex/test_util.h
similarity index 90%
rename from tensorflow/contrib/lite/delegates/eager/test_util.h
rename to tensorflow/contrib/lite/delegates/flex/test_util.h
index 816db41931..a8c81b90a3 100644
--- a/tensorflow/contrib/lite/delegates/eager/test_util.h
+++ b/tensorflow/contrib/lite/delegates/flex/test_util.h
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
 
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace testing {
 
 enum TfOpType {
@@ -35,12 +35,12 @@ enum TfOpType {
 };
 
 // This class creates models with TF and TFLite ops. In order to use this class
-// to test the Eager delegate, implement a function that calls
+// to test the Flex delegate, implement a function that calls
 // interpreter->ModifyGraphWithDelegate.
-class EagerModelTest : public ::testing::Test {
+class FlexModelTest : public ::testing::Test {
  public:
-  EagerModelTest() {}
-  ~EagerModelTest() {}
+  FlexModelTest() {}
+  ~FlexModelTest() {}
 
   bool Invoke();
 
@@ -104,7 +104,7 @@ class EagerModelTest : public ::testing::Test {
 
  private:
   // Helper method to add a TensorFlow op. tflite_names needs to start with
-  // "Eager" in order to work with the Eager delegate.
+  // "Flex" in order to work with the Flex delegate.
   void AddTfOp(const char* tflite_name, const string& tf_name,
                const string& nodedef_str, const std::vector<int>& inputs,
                const std::vector<int>& outputs);
@@ -113,7 +113,7 @@ class EagerModelTest : public ::testing::Test {
 };
 
 }  // namespace testing
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/util.cc b/tensorflow/contrib/lite/delegates/flex/util.cc
similarity index 96%
rename from tensorflow/contrib/lite/delegates/eager/util.cc
rename to tensorflow/contrib/lite/delegates/flex/util.cc
index 051246bf86..829bc388bf 100644
--- a/tensorflow/contrib/lite/delegates/eager/util.cc
+++ b/tensorflow/contrib/lite/delegates/flex/util.cc
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 TfLiteStatus ConvertStatus(TfLiteContext* context,
                            const tensorflow::Status& status) {
@@ -100,5 +100,5 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) {
   }
 }
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/util.h b/tensorflow/contrib/lite/delegates/flex/util.h
similarity index 89%
rename from tensorflow/contrib/lite/delegates/eager/util.h
rename to tensorflow/contrib/lite/delegates/flex/util.h
index 930cb99cb9..7f910e7316 100644
--- a/tensorflow/contrib/lite/delegates/eager/util.h
+++ b/tensorflow/contrib/lite/delegates/flex/util.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
 
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
@@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 // Converts a tensorflow:Status into a TfLiteStatus. If the original status
 // represented an error, reports it using the given 'context'.
@@ -41,7 +41,7 @@ TF_DataType GetTensorFlowDataType(TfLiteType type);
 // Returns the TfLiteType that corresponds to the given TF C API Data type.
 TfLiteType GetTensorFlowLiteType(TF_DataType);
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/util_test.cc b/tensorflow/contrib/lite/delegates/flex/util_test.cc
similarity index 97%
rename from tensorflow/contrib/lite/delegates/eager/util_test.cc
rename to tensorflow/contrib/lite/delegates/flex/util_test.cc
index aebc91149c..5f049e7b0a 100644
--- a/tensorflow/contrib/lite/delegates/eager/util_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/util_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 
 #include <cstdarg>
 
@@ -22,7 +22,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using tensorflow::DT_FLOAT;
@@ -132,7 +132,7 @@ TEST(UtilTest, TypeConversionsFromTensorFlow) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index 2f4b663a28..9402105fa7 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -125,7 +125,7 @@ TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) {
   context->ReportError(
       context,
       "Regular TensorFlow ops are not supported by this interpreter. Make sure "
-      "you invoke the Eager delegate before inference.");
+      "you invoke the Flex delegate before inference.");
   return kTfLiteError;
 }
 
@@ -136,13 +136,13 @@ const TfLiteRegistration* BuiltinOpResolver::FindOp(tflite::BuiltinOperator op,
 
 const TfLiteRegistration* BuiltinOpResolver::FindOp(const char* op,
                                                     int version) const {
-  // Return the NULL Op for all ops whose name start with "Eager", allowing
+  // Return the NULL Op for all ops whose name start with "Flex", allowing
   // the interpreter to delegate their execution.
-  if (IsEagerOp(op)) {
+  if (IsFlexOp(op)) {
     static TfLiteRegistration null_op{
         nullptr, nullptr, &UnsupportedTensorFlowOp,
         nullptr, nullptr, BuiltinOperator_CUSTOM,
-        "Eager", 1};
+        "Flex",  1};
     return &null_op;
   }
   return MutableOpResolver::FindOp(op, version);
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index ea2817beec..eff6181a61 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -28,7 +28,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/nnapi_delegate.h"
 #endif
 #if defined(TFLITE_EXTENDED)
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #endif
 #include "tensorflow/contrib/lite/version.h"
 
@@ -451,7 +451,7 @@ TfLiteStatus InterpreterBuilder::operator()(
   (**interpreter).SetVariables(std::move(variables));
 
 #if defined(TFLITE_EXTENDED)
-  if (auto delegate = EagerDelegate::Create()) {
+  if (auto delegate = FlexDelegate::Create()) {
     (**interpreter)
         .ModifyGraphWithDelegate(std::move(delegate),
                                  /*allow_dynamic_tensors=*/true);
diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 1f48a826d4..83d341c0b8 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -241,10 +241,10 @@ def build_toco_convert_protos(input_tensors,
     toco.dump_graphviz_dir = dump_graphviz_dir
   toco.dump_graphviz_include_video = dump_graphviz_video
   if converter_mode == ConverterMode.TOCO_EXTENDED:
-    toco.allow_eager_ops = True
+    toco.allow_flex_ops = True
   elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL:
-    toco.allow_eager_ops = True
-    toco.force_eager_ops = True
+    toco.allow_flex_ops = True
+    toco.force_flex_ops = True
 
   model = _model_flags_pb2.ModelFlags()
   model.change_concat_input_ranges = change_concat_input_ranges
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index f112ed5cdd..89324e8a80 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -421,7 +421,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
       interpreter.allocate_tensors()
     self.assertIn(
         'Regular TensorFlow ops are not supported by this interpreter. Make '
-        'sure you invoke the Eager delegate before inference.',
+        'sure you invoke the Flex delegate before inference.',
         str(error.exception))
 
 
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 55ef1172b2..f0bfec2338 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -164,7 +164,7 @@ cc_library(
         ":test_runner",
         "//tensorflow/contrib/lite:builtin_op_data",
         "//tensorflow/contrib/lite:framework",
-        "//tensorflow/contrib/lite/delegates/eager:delegate",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
         "//tensorflow/contrib/lite/kernels:builtin_ops",
     ],
 )
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 014c80b5ef..53bd88d087 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -343,7 +343,7 @@ def toco_convert(graph_def_str, input_tensors, output_tensors,
       opts = ("--input_arrays={0} --output_arrays={1}".format(
           ",".join(input_arrays), ",".join(output_tensors)))
     elif FLAGS.run_with_extended:
-      opts += " --allow_eager_ops --force_eager_ops"
+      opts += " --allow_flex_ops --force_flex_ops"
     cmd = ("%s --input_file=%s --output_file=%s %s > %s 2>&1" %
            (bin_path, graphdef_file.name, output_file.name, opts,
             stdout_file.name))
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_flags.h b/tensorflow/contrib/lite/testing/tflite_diff_flags.h
index 3874bc31d7..ad889a2f19 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_flags.h
+++ b/tensorflow/contrib/lite/testing/tflite_diff_flags.h
@@ -57,7 +57,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
                        "[optional] Number of full runs in each pass."),
       tensorflow::Flag("delegate", &values.delegate,
                        "[optional] Delegate to use for executing ops. Must be "
-                       "`{\"\", EAGER}`"),
+                       "`{\"\", FLEX}`"),
   };
 
   bool no_inputs = *argc == 1;
@@ -70,7 +70,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
              values.input_layer_shape.empty() || values.output_layer.empty()) {
     fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return {};
-  } else if (!(values.delegate == "" || values.delegate == "EAGER")) {
+  } else if (!(values.delegate == "" || values.delegate == "FLEX")) {
     fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return {};
   }
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_util.h b/tensorflow/contrib/lite/testing/tflite_diff_util.h
index f67992139f..28b14bd143 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_util.h
+++ b/tensorflow/contrib/lite/testing/tflite_diff_util.h
@@ -45,7 +45,7 @@ struct DiffOptions {
   // second pass does multiple inferences back to back.
   int num_runs_per_pass;
   // Path to the delegate library to be loaded in order to execute ops. Must be
-  // `{"", EAGER}`.
+  // `{"", FLEX}`.
   string delegate;
 };
 
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc
index 1836eb53b9..0a6da926be 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.cc
+++ b/tensorflow/contrib/lite/testing/tflite_driver.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <iostream>
 
 #include "tensorflow/contrib/lite/builtin_op_data.h"
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #include "tensorflow/contrib/lite/testing/split.h"
 
 namespace tflite {
@@ -138,8 +138,8 @@ class TfLiteDriver::Expectation {
 
 TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name)
     : use_nnapi_(use_nnapi) {
-  if (delegate_name == "EAGER") {
-    delegate_ = EagerDelegate::Create();
+  if (delegate_name == "FLEX") {
+    delegate_ = FlexDelegate::Create();
   }
 }
 
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.h b/tensorflow/contrib/lite/testing/tflite_driver.h
index aed35f877d..dc2a4e5877 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.h
+++ b/tensorflow/contrib/lite/testing/tflite_driver.h
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <map>
 
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
@@ -53,7 +53,7 @@ class TfLiteDriver : public TestRunner {
 
   class Expectation;
 
-  std::unique_ptr<EagerDelegate> delegate_;
+  std::unique_ptr<FlexDelegate> delegate_;
   bool use_nnapi_ = false;
   std::unique_ptr<FlatBufferModel> model_;
   std::unique_ptr<Interpreter> interpreter_;
diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h
index f14dbc258b..2699ac76e1 100644
--- a/tensorflow/contrib/lite/toco/args.h
+++ b/tensorflow/contrib/lite/toco/args.h
@@ -248,9 +248,9 @@ struct ParsedTocoFlags {
   Arg<int64> dedupe_array_min_size_bytes = Arg<int64>(64);
   Arg<bool> split_tflite_lstm_inputs = Arg<bool>(true);
   // WARNING: Experimental interface, subject to change
-  Arg<bool> allow_eager_ops = Arg<bool>(false);
+  Arg<bool> allow_flex_ops = Arg<bool>(false);
   // WARNING: Experimental interface, subject to change
-  Arg<bool> force_eager_ops = Arg<bool>(false);
+  Arg<bool> force_flex_ops = Arg<bool>(false);
 };
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index e02d000e7e..5eaf6e27fc 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -2123,9 +2123,9 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
   Model* model = new Model;
   internal::ConverterMapType converter_map;
 
-  // This is used for the TFLite "Full Eager Mode" conversion. All the ops are
+  // This is used for the TFLite "Full Flex Mode" conversion. All the ops are
   // imported as `TensorFlowUnsupportedOperator`, and later all these ops are
-  // converted to TFLite Eager ops.
+  // converted to TFLite Flex ops.
   if (!tf_import_flags.import_all_ops_as_unsupported) {
     converter_map = internal::GetTensorFlowNodeConverterMap();
   }
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.h b/tensorflow/contrib/lite/toco/import_tensorflow.h
index 7db23f2d44..c5ff96956a 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.h
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.h
@@ -30,7 +30,7 @@ struct TensorFlowImportFlags {
 
   // Do not recognize any op and import all ops as
   // `TensorFlowUnsupportedOperator`. This is used to populated with the
-  // `force_eager_ops` flag.
+  // `force_flex_ops` flag.
   bool import_all_ops_as_unsupported = false;
 };
 
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index fee10b1dff..5cdfd24565 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -50,16 +50,16 @@ namespace {
 details::OperatorKey GetOperatorKey(
     const ::toco::Operator& op,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   string custom_code;
   if (op.type == OperatorType::kUnsupported) {
     const TensorFlowUnsupportedOperator& unsupported_op =
         static_cast<const TensorFlowUnsupportedOperator&>(op);
 
-    // TODO(b/113715895): When `allow_eager_ops` is on, for now there's no way
+    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
     // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_eager_ops) {
-      custom_code = string(::tflite::kEagerCustomCodePrefix) +
+    if (allow_flex_ops) {
+      custom_code = string(::tflite::kFlexCustomCodePrefix) +
                     unsupported_op.tensorflow_op;
     } else {
       custom_code = unsupported_op.tensorflow_op;
@@ -101,11 +101,11 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) {
 void LoadOperatorsMap(
     const Model& model, OperatorsMap* operators_map,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   // First find a list of unique operator types.
   std::set<OperatorKey> keys;
   for (const auto& op : model.operators) {
-    keys.insert(GetOperatorKey(*op, ops_by_type, allow_eager_ops));
+    keys.insert(GetOperatorKey(*op, ops_by_type, allow_flex_ops));
   }
   // Now assign indices to them and fill in the map.
   int index = 0;
@@ -216,7 +216,7 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
 
   for (const auto& op : model.operators) {
     const details::OperatorKey operator_key =
-        GetOperatorKey(*op, ops_by_type, params.allow_eager_ops);
+        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
     int op_index = operators_map.at(operator_key);
     int op_version = operator_key.version;
 
@@ -281,7 +281,7 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
     }
 
     int op_index = operators_map.at(
-        GetOperatorKey(*op, ops_by_type, params.allow_eager_ops));
+        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
 
     auto tflite_op_it = ops_by_type.find(op->type);
     BaseOperator* tflite_op = tflite_op_it == ops_by_type.end()
@@ -334,7 +334,7 @@ Offset<Vector<Offset<Buffer>>> ExportBuffers(
 
 void Export(const Model& model, string* output_file_contents,
             const ExportParams& params) {
-  const auto ops_by_type = BuildOperatorByTypeMap(params.allow_eager_ops);
+  const auto ops_by_type = BuildOperatorByTypeMap(params.allow_flex_ops);
   Export(model, output_file_contents, params, ops_by_type);
 }
 
@@ -349,7 +349,7 @@ void Export(
 
   details::OperatorsMap operators_map;
   details::LoadOperatorsMap(model, &operators_map, ops_by_type,
-                            params.allow_eager_ops);
+                            params.allow_flex_ops);
 
   std::vector<const Array*> buffers_to_write;
   Array empty_array;
diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h
index b070a38768..29d6de4049 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.h
+++ b/tensorflow/contrib/lite/toco/tflite/export.h
@@ -26,7 +26,7 @@ namespace tflite {
 // The parameters for exporting a TFLite model.
 struct ExportParams {
   bool allow_custom_ops = false;
-  bool allow_eager_ops = false;
+  bool allow_flex_ops = false;
   bool quantize_weights = false;
 };
 
@@ -121,7 +121,7 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map);
 void LoadOperatorsMap(
     const Model& model, OperatorsMap* operators_map,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_eager_ops);
+    bool allow_flex_ops);
 
 }  // namespace details
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index 8d4d197c46..93882a91a7 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -105,7 +105,7 @@ TEST_F(ExportTest, LoadOperatorsMap) {
 
   details::OperatorsMap operators;
   const auto ops_by_type = BuildOperatorByTypeMap();
-  // TODO(ycling): Add a test for allow_eager_ops.
+  // TODO(ycling): Add a test for allow_flex_ops.
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
   EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "", 1)]);
   EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "", 1)]);
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index ca2a6a19b3..9addbb81e7 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -1160,8 +1160,8 @@ class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
 class TensorFlowUnsupported : public BaseOperator {
  public:
   TensorFlowUnsupported(const string& name, OperatorType type,
-                        bool allow_eager_ops)
-      : BaseOperator(name, type), allow_eager_ops_(allow_eager_ops) {}
+                        bool allow_flex_ops)
+      : BaseOperator(name, type), allow_flex_ops_(allow_flex_ops) {}
 
   Options Serialize(const Operator& op,
                     flatbuffers::FlatBufferBuilder* builder) const override {
@@ -1177,9 +1177,9 @@ class TensorFlowUnsupported : public BaseOperator {
   std::unique_ptr<Operator> Deserialize(
       const BuiltinOptions* builtin_options,
       const CustomOptions* custom_options) const override {
-    // Deserializing Eager ops doesn't work now.
+    // Deserializing Flex ops doesn't work now.
     // TODO(ycling): Revisit and decide if we should fix the flow for importing
-    // TFLite models with Eager ops.
+    // TFLite models with Flex ops.
     auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
     if (custom_options) {
       auto flexbuffer_map =
@@ -1200,13 +1200,13 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
-    if (allow_eager_ops_) {
+    if (allow_flex_ops_) {
       fbb->Vector([&]() {
         fbb->String(node_def.op());
         fbb->String(op.tensorflow_node_def);
       });
       fbb->Finish();
-      LOG(INFO) << "Writing eager op: " << node_def.op();
+      LOG(INFO) << "Writing flex op: " << node_def.op();
       return std::unique_ptr<flexbuffers::Builder>(fbb.release());
     }
 
@@ -1316,13 +1316,13 @@ class TensorFlowUnsupported : public BaseOperator {
   }
 
  private:
-  const bool allow_eager_ops_;
+  const bool allow_flex_ops_;
 };
 
 namespace {
 // Build a vector containing all the known operators.
 std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
-    bool allow_eager_ops = false) {
+    bool allow_flex_ops = false) {
   std::vector<std::unique_ptr<BaseOperator>> ops;
   using tensorflow::MakeUnique;
   // Builtin Operators.
@@ -1434,7 +1434,7 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
   ops.push_back(MakeUnique<CTCBeamSearchDecoder>(
       "CTC_BEAM_SEARCH_DECODER", OperatorType::kCTCBeamSearchDecoder));
   ops.push_back(MakeUnique<TensorFlowUnsupported>(
-      "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_eager_ops));
+      "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_flex_ops));
 
   // There operators are supported by Toco, but not by TF Lite, and has no
   // attributes.
@@ -1512,11 +1512,11 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
 }  // namespace
 
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   std::map<OperatorType, std::unique_ptr<BaseOperator>> result;
 
   std::vector<std::unique_ptr<BaseOperator>> ops =
-      BuildOperatorList(allow_eager_ops);
+      BuildOperatorList(allow_flex_ops);
   for (auto& op : ops) {
     result[op->type()] = std::move(op);
   }
@@ -1525,11 +1525,11 @@ std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
 }
 
 std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   std::map<string, std::unique_ptr<BaseOperator>> result;
 
   std::vector<std::unique_ptr<BaseOperator>> ops =
-      BuildOperatorList(allow_eager_ops);
+      BuildOperatorList(allow_flex_ops);
   for (auto& op : ops) {
     result[op->name()] = std::move(op);
   }
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 702fb28ea6..13d9f6c49a 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -26,15 +26,15 @@ namespace tflite {
 class BaseOperator;
 
 // Return a map contained all know TF Lite Operators, keyed by their names.
-// TODO(ycling): The pattern to propagate parameters (e.g. allow_eager_ops)
+// TODO(ycling): The pattern to propagate parameters (e.g. allow_flex_ops)
 // is ugly here. Consider refactoring.
 std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
-    bool allow_eager_ops = false);
+    bool allow_flex_ops = false);
 
 // Return a map contained all know TF Lite Operators, keyed by the type of
 // their tf.mini counterparts.
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
-    bool allow_eager_ops = false);
+    bool allow_flex_ops = false);
 
 // These are the flatbuffer types for custom and builtin options.
 using CustomOptions = flatbuffers::Vector<uint8_t>;
diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
index b6aebc0470..cff79776bc 100644
--- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
@@ -167,11 +167,11 @@ bool ParseTocoFlagsFromCommandLineFlags(
            "converted float model. Model size will be reduced and there will "
            "be latency improvements (at the cost of accuracy)."),
       // WARNING: Experimental interface, subject to change
-      Flag("allow_eager_ops", parsed_flags.allow_eager_ops.bind(),
-           parsed_flags.allow_eager_ops.default_value(), ""),
+      Flag("allow_flex_ops", parsed_flags.allow_flex_ops.bind(),
+           parsed_flags.allow_flex_ops.default_value(), ""),
       // WARNING: Experimental interface, subject to change
-      Flag("force_eager_ops", parsed_flags.force_eager_ops.bind(),
-           parsed_flags.force_eager_ops.default_value(), "")};
+      Flag("force_flex_ops", parsed_flags.force_flex_ops.bind(),
+           parsed_flags.force_flex_ops.default_value(), "")};
   bool asked_for_help =
       *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help"));
   if (asked_for_help) {
@@ -266,15 +266,15 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
   READ_TOCO_FLAG(split_tflite_lstm_inputs, FlagRequirement::kNone);
   READ_TOCO_FLAG(quantize_weights, FlagRequirement::kNone);
   READ_TOCO_FLAG(post_training_quantize, FlagRequirement::kNone);
-  READ_TOCO_FLAG(allow_eager_ops, FlagRequirement::kNone);
-  READ_TOCO_FLAG(force_eager_ops, FlagRequirement::kNone);
+  READ_TOCO_FLAG(allow_flex_ops, FlagRequirement::kNone);
+  READ_TOCO_FLAG(force_flex_ops, FlagRequirement::kNone);
 
-  if (parsed_toco_flags.force_eager_ops.value() &&
-      !parsed_toco_flags.allow_eager_ops.value()) {
-    // TODO(ycling): Consider to enforce `allow_eager_ops` when
-    // `force_eager_ops` is true.
-    LOG(WARNING) << "--force_eager_ops should always be used with "
-                    "--allow_eager_ops.";
+  if (parsed_toco_flags.force_flex_ops.value() &&
+      !parsed_toco_flags.allow_flex_ops.value()) {
+    // TODO(ycling): Consider to enforce `allow_flex_ops` when
+    // `force_flex_ops` is true.
+    LOG(WARNING) << "--force_flex_ops should always be used with "
+                    "--allow_flex_ops.";
   }
 
   // Deprecated flag handling.
diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto
index 53d60fed05..ca3e64485e 100644
--- a/tensorflow/contrib/lite/toco/toco_flags.proto
+++ b/tensorflow/contrib/lite/toco/toco_flags.proto
@@ -190,16 +190,16 @@ message TocoFlags {
   // (at the cost of accuracy).
   optional bool post_training_quantize = 26 [default = false];
 
-  // When enabled, unsupported ops will be converted to TFLite Eager ops.
+  // When enabled, unsupported ops will be converted to TFLite Flex ops.
   // TODO(ycling): Consider to rename the following 2 flags and don't call it
-  // "Eager".
-  // `allow_eager_ops` should always be used with `allow_custom_ops`.
+  // "Flex".
+  // `allow_flex_ops` should always be used with `allow_custom_ops`.
   // WARNING: Experimental interface, subject to change
-  optional bool allow_eager_ops = 27 [default = false];
+  optional bool allow_flex_ops = 27 [default = false];
 
-  // When enabled, all TensorFlow ops will be converted to TFLite Eager
-  // ops directly. This will force `allow_eager_ops` to true.
-  // `force_eager_ops` should always be used with `allow_eager_ops`.
+  // When enabled, all TensorFlow ops will be converted to TFLite Flex
+  // ops directly. This will force `allow_flex_ops` to true.
+  // `force_flex_ops` should always be used with `allow_flex_ops`.
   // WARNING: Experimental interface, subject to change
-  optional bool force_eager_ops = 28 [default = false];
+  optional bool force_flex_ops = 28 [default = false];
 }
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index a08b02485f..106494f354 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -198,7 +198,7 @@ std::unique_ptr<Model> Import(const TocoFlags& toco_flags,
               : (toco_flags.output_format() != TENSORFLOW_GRAPHDEF);
 
       tf_import_flags.import_all_ops_as_unsupported =
-          toco_flags.force_eager_ops();
+          toco_flags.force_flex_ops();
 
       model = ImportTensorFlowGraphDef(model_flags, tf_import_flags,
                                        input_file_contents);
@@ -409,9 +409,9 @@ void Export(const TocoFlags& toco_flags, const Model& model,
     case TFLITE: {
       toco::tflite::ExportParams params;
 
-      // Always allow custom ops when eager ops are allowed.
-      if (toco_flags.force_eager_ops() || toco_flags.allow_eager_ops()) {
-        params.allow_eager_ops = true;
+      // Always allow custom ops when flex ops are allowed.
+      if (toco_flags.force_flex_ops() || toco_flags.allow_flex_ops()) {
+        params.allow_flex_ops = true;
         params.allow_custom_ops = true;
       } else if (allow_custom_ops) {
         params.allow_custom_ops = true;
diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index dc97d22401..bc18d40313 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -36,7 +36,7 @@ cc_binary(
 )
 
 cc_binary(
-    name = "benchmark_model_plus_eager",
+    name = "benchmark_model_plus_flex",
     srcs = [
         "benchmark_main.cc",
     ],
@@ -49,7 +49,7 @@ cc_binary(
         "//conditions:default": [],
     }),
     deps = [
-        ":benchmark_tflite_model_plus_eager_lib",
+        ":benchmark_tflite_model_plus_flex_lib",
         ":logging",
     ],
 )
@@ -111,7 +111,7 @@ cc_library(
 )
 
 cc_library(
-    name = "benchmark_tflite_model_plus_eager_lib",
+    name = "benchmark_tflite_model_plus_flex_lib",
     srcs = [
         "benchmark_tflite_model.cc",
         "logging.h",
@@ -123,7 +123,7 @@ cc_library(
         ":logging",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite:string_util",
-        "//tensorflow/contrib/lite/delegates/eager:delegate",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
         "//tensorflow/contrib/lite/kernels:builtin_ops",
         "//tensorflow/contrib/lite/profiling:profile_summarizer",
     ],
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
index ef4f0fa80d..d989ee720d 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -24,7 +24,7 @@ limitations under the License.
 #include <vector>
 
 #ifdef TFLITE_EXTENDED
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #endif  // TFLITE_EXTENDED
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
@@ -306,8 +306,8 @@ void BenchmarkTfLiteModel::Init() {
   interpreter->UseNNAPI(use_nnapi);
 
 #ifdef TFLITE_EXTENDED
-  TFLITE_LOG(INFO) << "Instantiating Eager Delegate";
-  delegate_ = EagerDelegate::Create();
+  TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
+  delegate_ = FlexDelegate::Create();
   if (delegate_) {
     interpreter->ModifyGraphWithDelegate(delegate_.get(),
                                          /*allow_dynamic_tensors=*/true);
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
index 8541512bc8..9343824b4a 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <vector>
 
 #ifdef TFLITE_EXTENDED
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #endif  // TFLITE_EXTENDED
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
@@ -74,7 +74,7 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
 
  private:
 #ifdef TFLITE_EXTENDED
-  std::unique_ptr<EagerDelegate> delegate_;
+  std::unique_ptr<FlexDelegate> delegate_;
 #endif  // TFLITE_EXTENDED
   std::unique_ptr<tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> interpreter;
diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc
index 7950653da9..6aa35b5227 100644
--- a/tensorflow/contrib/lite/util.cc
+++ b/tensorflow/contrib/lite/util.cc
@@ -18,9 +18,9 @@ limitations under the License.
 
 namespace tflite {
 
-bool IsEagerOp(const char* custom_name) {
-  return custom_name && strncmp(custom_name, kEagerCustomCodePrefix,
-                                strlen(kEagerCustomCodePrefix)) == 0;
+bool IsFlexOp(const char* custom_name) {
+  return custom_name && strncmp(custom_name, kFlexCustomCodePrefix,
+                                strlen(kFlexCustomCodePrefix)) == 0;
 }
 
 TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector<int>& input) {
diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h
index 6d81f844f8..31292a6f81 100644
--- a/tensorflow/contrib/lite/util.h
+++ b/tensorflow/contrib/lite/util.h
@@ -26,15 +26,15 @@ limitations under the License.
 
 namespace tflite {
 
-// The prefix of Eager op custom code.
+// The prefix of Flex op custom code.
 // This will be matched agains the `custom_code` field in `OperatorCode`
 // Flatbuffer Table.
 // WARNING: This is an experimental API and subject to change.
-constexpr char kEagerCustomCodePrefix[] = "Eager";
+constexpr char kFlexCustomCodePrefix[] = "Flex";
 
 // Checks whether the prefix of the custom name indicates the operation is an
-// Eager operation.
-bool IsEagerOp(const char* custom_name);
+// Flex operation.
+bool IsFlexOp(const char* custom_name);
 
 // Converts a `std::vector` to a `TfLiteIntArray`. The caller takes ownership
 // of the returned pointer.
diff --git a/tensorflow/contrib/lite/util_test.cc b/tensorflow/contrib/lite/util_test.cc
index c5c1709f1d..25f3aded71 100644
--- a/tensorflow/contrib/lite/util_test.cc
+++ b/tensorflow/contrib/lite/util_test.cc
@@ -41,14 +41,14 @@ TEST(ConvertVectorToTfLiteIntArray, TestWithEmptyVector) {
   TfLiteIntArrayFree(output);
 }
 
-TEST(UtilTest, IsEagerOp) {
-  EXPECT_TRUE(IsEagerOp("Eager"));
-  EXPECT_TRUE(IsEagerOp("EagerOp"));
-  EXPECT_FALSE(IsEagerOp("eager"));
-  EXPECT_FALSE(IsEagerOp("Eage"));
-  EXPECT_FALSE(IsEagerOp("OpEager"));
-  EXPECT_FALSE(IsEagerOp(nullptr));
-  EXPECT_FALSE(IsEagerOp(""));
+TEST(UtilTest, IsFlexOp) {
+  EXPECT_TRUE(IsFlexOp("Flex"));
+  EXPECT_TRUE(IsFlexOp("FlexOp"));
+  EXPECT_FALSE(IsFlexOp("flex"));
+  EXPECT_FALSE(IsFlexOp("Fle"));
+  EXPECT_FALSE(IsFlexOp("OpFlex"));
+  EXPECT_FALSE(IsFlexOp(nullptr));
+  EXPECT_FALSE(IsFlexOp(""));
 }
 
 }  // namespace
-- 
GitLab


From dc90d6c486f2ec1741766b0989e6f6e842d94437 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 26 Sep 2018 14:59:42 -0700
Subject: [PATCH 0758/1357] [TF:XLA] Fix XLA lowering of TF BroadcastTo
 operator.

PiperOrigin-RevId: 214675055
---
 tensorflow/compiler/tests/binary_ops_test.py          | 7 +++++++
 tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc | 5 ++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index e219cf3d88..1b39d53dc0 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -1445,6 +1445,13 @@ class BinaryOpsTest(xla_test.XLATestCase):
           np.array([4, 0], dtype=np.int32),
           expected=np.zeros([4, 0], dtype=dtype))
 
+      x = np.arange(3).reshape((3, 1, 1, 1)).astype(dtype)
+      self._testBinary(
+          array_ops.broadcast_to,
+          x,
+          np.array((3, 7, 8, 9), dtype=np.int32),
+          expected=np.tile(x, (1, 7, 8, 9)))
+
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
index 4bd7c74dca..696c1c39be 100644
--- a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
@@ -64,10 +64,9 @@ class BroadcastToOp : public XlaOpKernel {
                                     output_shape.DebugString()));
 
         broadcast_dims.push_back(broadcast_shape.size());
-        if (output_dims[i] == input_dims[i] || input_dims[i] == 1) {
+        if (output_dims[i] == input_dims[i]) {
           broadcast_shape.push_back(output_dims[i]);
-        }
-        if (output_dims[i] != input_dims[i]) {
+        } else if (output_dims[i] != input_dims[i]) {
           // Add dimensions [I, O/I], which we will later flatten to just
           // [O]. We must do this in two phases since XLA broadcasting does not
           // support tiling.
-- 
GitLab


From ee9c6c17abce8450d08140750b857ad36b0508e8 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 26 Sep 2018 15:19:14 -0700
Subject: [PATCH 0759/1357] Fix Optimizer "No gradients provided" error
 messages to report variables instead of internal processor objects.

PiperOrigin-RevId: 214678470
---
 tensorflow/python/training/optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 699162b30c..f004f3944a 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -585,7 +585,7 @@ class Optimizer(
     var_list = [v for g, v, _ in converted_grads_and_vars if g is not None]
     if not var_list:
       raise ValueError("No gradients provided for any variable: %s." %
-                       ([str(v) for _, _, v in converted_grads_and_vars],))
+                       ([str(v) for _, v, _ in converted_grads_and_vars],))
     with ops.init_scope():
       self._create_slots(var_list)
     update_ops = []
-- 
GitLab


From 844074c2a8e61b744c3de2718e1c9ea7b1d2edc2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 15:19:20 -0700
Subject: [PATCH 0760/1357] Update hooks for distributed jobs with a master
 node, to ensure that summaries are written at the correct interval for jobs
 with long-running evaluations.

PiperOrigin-RevId: 214678483
---
 tensorflow/python/estimator/estimator.py      | 33 ++++++-
 tensorflow/python/estimator/estimator_test.py | 94 +++++++++++++++++++
 2 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index eec64ad452..fd62a79c84 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1394,6 +1394,35 @@ class Estimator(object):
         # It is expected to have one CheckpointSaverHook. If multiple, we pick
         # up the first one to add listener.
         saver_hooks[0]._listeners.extend(saving_listeners)  # pylint: disable=protected-access
+
+    # Add summary hooks to worker 0 if we are running with a master, to ensure
+    # that summaries are written at correct intervals even with long-running
+    # evaluations.
+    save_summary_steps = self._config.save_summary_steps
+    log_step_count_steps = self._config.log_step_count_steps
+    if run_config.TaskType.MASTER in self._config.cluster_spec.jobs:
+      # Update config values to prevent the default hooks from being created on
+      # the master or other workers.
+      save_summary_steps = 0
+      log_step_count_steps = None
+
+      if (self._config.task_type == run_config.TaskType.WORKER and
+          self._config.task_id == 0):
+        if (self._config.save_summary_steps and
+            self._config.save_summary_steps > 0):
+          worker_hooks.append(
+              training.SummarySaverHook(
+                  save_steps=self._config.save_summary_steps,
+                  output_dir=self._config.model_dir,
+                  scaffold=estimator_spec.scaffold))
+
+        if (self._config.log_step_count_steps and
+            self._config.log_step_count_steps > 0):
+          worker_hooks.append(
+              training.StepCounterHook(
+                  every_n_steps=self._config.log_step_count_steps,
+                  output_dir=self._config.model_dir))
+
     with training.MonitoredTrainingSession(
         master=self._config.master,
         is_chief=self._config.is_chief,
@@ -1403,9 +1432,9 @@ class Estimator(object):
         chief_only_hooks=(
             tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)),
         save_checkpoint_secs=0,  # Saving is handled by a hook.
-        save_summaries_steps=self._config.save_summary_steps,
+        save_summaries_steps=save_summary_steps,
         config=self._session_config,
-        log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
+        log_step_count_steps=log_step_count_steps) as mon_sess:
       loss = None
       while not mon_sess.should_stop():
         _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index 1ed5e30b0e..5962086aad 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import functools
 import glob
+import json
 import os
 import tempfile
 
@@ -969,6 +970,99 @@ class EstimatorTrainTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'train_and_evaluate'):
       est.train(dummy_input_fn, steps=1)
 
+  def test_master_distributed_hooks(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_distributed_hooks_for_worker_0(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.WORKER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertTrue(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertTrue(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_distributed_hooks_for_worker_nonzero(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235', 'localhost:1237'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.WORKER,
+            'index': 1
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
 
 def _model_fn_with_eval_metric_ops(features, labels, mode, params):
   _, _ = features, labels
-- 
GitLab


From ffa574283734b7a3c3b33687aa4b55e17e9bd00e Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 26 Sep 2018 15:22:11 -0700
Subject: [PATCH 0761/1357] Fix potential use-after-free in the training ops.

The recent fix to a resource leak introduced a potential use-after-free, because it released a reference on a Var resource before returning a mutex* borrowed from that resource. The mutex* could therefore become garbage if the refcount concurrently dropped to zero (for example, if a concurrent `Session::Reset()` were issued).

This change modifies the mutex accessing utilities to prolong the lifetime of the corresponding Var* beyond the lifetime of the returned mutex*.

PiperOrigin-RevId: 214678937
---
 .../core/kernels/training_op_helpers.cc       | 45 ++++++++++++-------
 tensorflow/core/kernels/training_op_helpers.h | 37 ++++++++++++++-
 tensorflow/core/kernels/training_ops.cc       |  8 +++-
 3 files changed, 69 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/kernels/training_op_helpers.cc b/tensorflow/core/kernels/training_op_helpers.cc
index 83b83fcdb9..4262a5404b 100644
--- a/tensorflow/core/kernels/training_op_helpers.cc
+++ b/tensorflow/core/kernels/training_op_helpers.cc
@@ -15,14 +15,16 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/training_op_helpers.h"
 
+#include "tensorflow/core/util/ptr_util.h"
+
 namespace tensorflow {
 
-mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input) {
+mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input,
+                                Var** maybe_resource) {
+  *maybe_resource = nullptr;
   if (ctx->input_dtype(input) == DT_RESOURCE) {
-    Var* var;
-    if (LookupResource(ctx, HandleFromInput(ctx, input), &var).ok()) {
-      core::ScopedUnref scoped_unref(var);
-      return var->mu();
+    if (LookupResource(ctx, HandleFromInput(ctx, input), maybe_resource).ok()) {
+      return (*maybe_resource)->mu();
     } else {
       ctx->CtxFailureWithWarning(
           errors::Internal("Invalid variable reference."));
@@ -33,12 +35,13 @@ mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input) {
 }
 
 // MaybeLockVariableInputMutexesInOrder is a helper function to acquire mutexes
-// in address order to mitigate deadlock.  Returns a vector of acquired mutexes.
-// Safe to pass duplicates - will only lock each distinct mutex once.  If
-// do_lock is false, returns immediately.  Note that this silently doesn't lock
-// mutexes for invalid variable references; in all usages this is followed by
-// GetInputTensor which will signal a failure.
-std::vector<mutex_lock> MaybeLockVariableInputMutexesInOrder(
+// in address order to mitigate deadlock.  Returns a structure that, when
+// deleted, will release the acquired mutexes. Safe to pass duplicates - will
+// only lock each distinct mutex once.  If do_lock is false, returns
+// immediately.  Note that this silently doesn't lock mutexes for invalid
+// variable references; in all usages this is followed by GetInputTensor which
+// will signal a failure.
+VariableInputLockHolder MaybeLockVariableInputMutexesInOrder(
     OpKernelContext* ctx, bool do_lock, const std::vector<int>& input_ids) {
   bool any_resource = false;
   for (auto i : input_ids) {
@@ -47,14 +50,16 @@ std::vector<mutex_lock> MaybeLockVariableInputMutexesInOrder(
       break;
     }
   }
-  std::vector<mutex_lock> locks;
   if (!do_lock && !any_resource) {
-    return locks;
+    return VariableInputLockHolder({}, {});
   }
+  std::vector<Var*> vars;
   std::vector<mutex*> mutexes;
   std::vector<int> acquire_order;
   for (auto input : input_ids) {
-    mutex* mutex = GetTrainingVariableMutex(ctx, input);
+    Var* var;
+    mutex* mutex = GetTrainingVariableMutex(ctx, input, &var);
+    if (var) vars.push_back(var);
     // Only lock each mutex once if duplicates exist (n^2 but n is 2 or 3).
     if (std::find(mutexes.begin(), mutexes.end(), mutex) == mutexes.end()) {
       acquire_order.push_back(mutexes.size());
@@ -64,13 +69,19 @@ std::vector<mutex_lock> MaybeLockVariableInputMutexesInOrder(
   std::sort(acquire_order.begin(), acquire_order.end(),
             [&mutexes](int a, int b) { return mutexes[a] < mutexes[b]; });
 
+  std::unique_ptr<std::vector<mutex_lock>> locks =
+      MakeUnique<std::vector<mutex_lock>>();
+  locks->reserve(acquire_order.size());
+
   for (auto input : acquire_order) {
-    mutex* mu = GetTrainingVariableMutex(ctx, input);
+    Var* var;
+    mutex* mu = GetTrainingVariableMutex(ctx, input, &var);
+    core::ScopedUnref scoped_unref(var);
     if (mu != nullptr) {
-      locks.emplace_back(*mu);
+      locks->emplace_back(*mu);
     }
   }
-  return locks;
+  return VariableInputLockHolder(std::move(vars), std::move(locks));
 }
 
 void MaybeForwardRefInputToRefOutput(OpKernelContext* ctx, int input,
diff --git a/tensorflow/core/kernels/training_op_helpers.h b/tensorflow/core/kernels/training_op_helpers.h
index 071cb371a7..9f173a80f7 100644
--- a/tensorflow/core/kernels/training_op_helpers.h
+++ b/tensorflow/core/kernels/training_op_helpers.h
@@ -23,9 +23,42 @@ limitations under the License.
 
 namespace tensorflow {
 
-mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input);
+// Returns a borrowed pointer to the mutex for the variable `input` in `ctx`.
+//
+// If `input` corresponds to a `DT_RESOURCE`-type variable input,
+// `*maybe_resource` will be updated to contain the underlying resource, and the
+// caller will be responsible for calling `Unref()` on that resource.
+mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input,
+                                Var** maybe_resource);
 
-std::vector<mutex_lock> MaybeLockVariableInputMutexesInOrder(
+// Utility structure that releases a sequence of borrowed mutexes when it is
+// deleted.
+struct VariableInputLockHolder {
+ public:
+  VariableInputLockHolder(std::vector<Var*> vars,
+                          std::unique_ptr<std::vector<mutex_lock>> locks)
+      : vars_(std::move(vars)), locks_(std::move(locks)) {}
+
+  VariableInputLockHolder(VariableInputLockHolder&& other)
+      : vars_(std::move(other.vars_)), locks_(std::move(other.locks_)) {}
+
+  ~VariableInputLockHolder() {
+    // Release the locks before unreffing the Vars, because each lock
+    // is potentially borrowed from a Var in vars_.
+    locks_.reset();
+    for (Var* var : vars_) {
+      var->Unref();
+    }
+  }
+
+ private:
+  std::vector<Var*> vars_;
+  // NOTE: Use a `std::unique_ptr` instead of moving in a vector directly,
+  // because a `std::vector<mutex_lock>` is not movable on all platforms.
+  std::unique_ptr<std::vector<mutex_lock>> locks_;
+};
+
+VariableInputLockHolder MaybeLockVariableInputMutexesInOrder(
     OpKernelContext* ctx, bool do_lock, const std::vector<int>& input_ids);
 
 void MaybeForwardRefInputToRefOutput(OpKernelContext* ctx, int input,
diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index 9a07ded17d..acf162deec 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -561,7 +561,9 @@ class ApplyAdadeltaOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    mutex* mu = GetTrainingVariableMutex(ctx, 0);
+    Var* resource;
+    mutex* mu = GetTrainingVariableMutex(ctx, 0, &resource);
+    core::ScopedUnref scoped_unref(resource);
     if (use_exclusive_lock_ && mu != nullptr) {
       mutex_lock l1(*mu);
       // Don't try to acquire a lock on the second ref as they share the same
@@ -710,7 +712,9 @@ class SparseApplyAdadeltaOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    mutex* mu = GetTrainingVariableMutex(ctx, 0);
+    Var* var;
+    mutex* mu = GetTrainingVariableMutex(ctx, 0, &var);
+    core::ScopedUnref scoped_unref(var);
     // mu_accum is actually the same mutex as mu_var since currently we use a
     // global mutex.
     //
-- 
GitLab


From adbfda2efb61d7e6c905425155cd92b4343595f8 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Wed, 26 Sep 2018 15:29:31 -0700
Subject: [PATCH 0762/1357] Deprecate tf.manip endpoints instead of endpoints
 under tf.*. This change is according to
 https://github.com/tensorflow/community/pull/16.

PiperOrigin-RevId: 214680285
---
 .../core/api_def/python_api/api_def_BatchToSpaceND.pbtxt      | 4 ++--
 tensorflow/core/api_def/python_api/api_def_GatherNd.pbtxt     | 4 ++--
 tensorflow/core/api_def/python_api/api_def_Reshape.pbtxt      | 4 ++--
 tensorflow/core/api_def/python_api/api_def_ReverseV2.pbtxt    | 4 ++--
 tensorflow/core/api_def/python_api/api_def_ScatterNd.pbtxt    | 4 ++--
 .../core/api_def/python_api/api_def_SpaceToBatchND.pbtxt      | 4 ++--
 tensorflow/core/api_def/python_api/api_def_Tile.pbtxt         | 4 ++--
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/core/api_def/python_api/api_def_BatchToSpaceND.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchToSpaceND.pbtxt
index 9552fc92e3..e395e333bf 100644
--- a/tensorflow/core/api_def/python_api/api_def_BatchToSpaceND.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_BatchToSpaceND.pbtxt
@@ -1,10 +1,10 @@
 op {
   graph_op_name: "BatchToSpaceND"
   endpoint {
-    name: "manip.batch_to_space_nd"
+    name: "batch_to_space_nd"
   }
   endpoint {
-    name: "batch_to_space_nd"
+    name: "manip.batch_to_space_nd"
     deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/python_api/api_def_GatherNd.pbtxt
index 71257c8855..598f23bde3 100644
--- a/tensorflow/core/api_def/python_api/api_def_GatherNd.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_GatherNd.pbtxt
@@ -1,10 +1,10 @@
 op {
   graph_op_name: "GatherNd"
   endpoint {
-    name: "manip.gather_nd"
+    name: "gather_nd"
   }
   endpoint {
-    name: "gather_nd"
+    name: "manip.gather_nd"
     deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Reshape.pbtxt b/tensorflow/core/api_def/python_api/api_def_Reshape.pbtxt
index c469665b66..b3d596de7a 100644
--- a/tensorflow/core/api_def/python_api/api_def_Reshape.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Reshape.pbtxt
@@ -1,10 +1,10 @@
 op {
   graph_op_name: "Reshape"
   endpoint {
-    name: "manip.reshape"
+    name: "reshape"
   }
   endpoint {
-    name: "reshape"
+    name: "manip.reshape"
     deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_ReverseV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_ReverseV2.pbtxt
index 77f595927b..51478b7c34 100644
--- a/tensorflow/core/api_def/python_api/api_def_ReverseV2.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_ReverseV2.pbtxt
@@ -1,10 +1,10 @@
 op {
   graph_op_name: "ReverseV2"
   endpoint {
-    name: "manip.reverse"
+    name: "reverse"
   }
   endpoint {
-    name: "reverse"
+    name: "manip.reverse"
     deprecated: true
   }
   endpoint {
diff --git a/tensorflow/core/api_def/python_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScatterNd.pbtxt
index a65a19b542..85888da45a 100644
--- a/tensorflow/core/api_def/python_api/api_def_ScatterNd.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_ScatterNd.pbtxt
@@ -1,10 +1,10 @@
 op {
   graph_op_name: "ScatterNd"
   endpoint {
-    name: "manip.scatter_nd"
+    name: "scatter_nd"
   }
   endpoint {
-    name: "scatter_nd"
+    name: "manip.scatter_nd"
     deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_SpaceToBatchND.pbtxt b/tensorflow/core/api_def/python_api/api_def_SpaceToBatchND.pbtxt
index af323a6cf3..146b97f444 100644
--- a/tensorflow/core/api_def/python_api/api_def_SpaceToBatchND.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_SpaceToBatchND.pbtxt
@@ -1,10 +1,10 @@
 op {
   graph_op_name: "SpaceToBatchND"
   endpoint {
-    name: "manip.space_to_batch_nd"
+    name: "space_to_batch_nd"
   }
   endpoint {
-    name: "space_to_batch_nd"
+    name: "manip.space_to_batch_nd"
     deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Tile.pbtxt b/tensorflow/core/api_def/python_api/api_def_Tile.pbtxt
index c34061c941..1d8695f1fd 100644
--- a/tensorflow/core/api_def/python_api/api_def_Tile.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Tile.pbtxt
@@ -1,10 +1,10 @@
 op {
   graph_op_name: "Tile"
   endpoint {
-    name: "manip.tile"
+    name: "tile"
   }
   endpoint {
-    name: "tile"
+    name: "manip.tile"
     deprecated: true
   }
 }
-- 
GitLab


From 2511230c0a9b8e2ec652d00dcedbd75d644e5400 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Wed, 26 Sep 2018 15:33:24 -0700
Subject: [PATCH 0763/1357] Preprocess the protobuff input for parse_tensor_op.

PiperOrigin-RevId: 214680988
---
 .../kernels/fuzzing/parse_tensor_op_fuzz.cc   | 32 +++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
index c90ad2cfeb..ada1235449 100644
--- a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
@@ -31,9 +31,37 @@ class FuzzParseTensor : public FuzzSession {
   }
 
   void FuzzImpl(const uint8_t* data, size_t size) final {
+    // We need to be sure that we don't request too many elements (i.e., we
+    // don't make ASAN OOM). In theory, a tensor shape can have arbitrary large
+    // number of elements, up to the limit of the memory available to the OS.
+    // However, due to the tracing done in ASAN, after 2^32 bytes of requested
+    // memory we would get a crash in the fuzzer (see b/34190148). Hence, let's
+    // try parsing the proto here, check that the size (if valid) is below a
+    // maximum threshold (using 2^20 for convenience), and then run the
+    // remainder of the fuzzer testing. Of course, this duplicates some work
+    // but it's better than repeating the investigation whenever Autofuzz
+    // detects another similar OOM.
+    string as_string = string(reinterpret_cast<const char*>(data), size);
+    TensorProto proto;
+    if (!ParseProtoUnlimited(&proto, as_string)) {
+      LOG(WARNING) << "Unable to parse proto of tensor\n";
+      return;
+    }
+    if (!TensorShape::IsValid(proto.tensor_shape())) {
+      LOG(WARNING) << "Invalid tensor shape\n";
+      return;
+    }
+    TensorShape shape(proto.tensor_shape());
+    const int64 num_elements = shape.num_elements();
+    const int64 max_num_elements = 1 << 20;
+    if (num_elements > max_num_elements) {
+      LOG(WARNING) << "Requiring a tensor with too many elements\n";
+      return;
+    }
+
+    // Now we can do the actual fuzz implementation
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
-    input_tensor.scalar<string>()() =
-        string(reinterpret_cast<const char*>(data), size);
+    input_tensor.scalar<string>()() = as_string;
     // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
     RunOneInput(input_tensor).IgnoreError();
   }
-- 
GitLab


From 5dbb021354e0acda667d823e856ec8be88960b35 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 15:34:43 -0700
Subject: [PATCH 0764/1357] Added a C utility to create a ServerDef proto from
 text representation.

PiperOrigin-RevId: 214681193
---
 tensorflow/c/BUILD                      |  1 +
 tensorflow/c/c_api_experimental.cc      | 15 ++++++++
 tensorflow/c/c_api_experimental.h       |  2 ++
 tensorflow/c/c_api_experimental_test.cc | 46 +++++++++++++++++++++++++
 4 files changed, 64 insertions(+)

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 43c279bd80..17e2e292eb 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -246,6 +246,7 @@ tf_cc_test(
         ":c_api_experimental",
         ":c_test_util",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
     ],
diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index 3bcc62cf2d..f316e4ba67 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/platform.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
 
 using tensorflow::FunctionDef;
 using tensorflow::Node;
@@ -8508,6 +8509,20 @@ void TF_EnqueueNamedTensor(TF_Session* session, int tensor_id,
   VLOG(1) << "Enqueuing is done.";
 }
 
+TF_Buffer* TFE_GetServerDef(const char* text_proto, TF_Status* status) {
+  tensorflow::ServerDef server_def;
+  if (!tensorflow::protobuf::TextFormat::ParseFromString(text_proto,
+                                                         &server_def)) {
+    status->status = tensorflow::errors::Internal(
+        "Invalid text proto for ServerDef: ", text_proto);
+    return nullptr;
+  }
+  status->status = tensorflow::Status();
+  TF_Buffer* ret = TF_NewBuffer();
+  TF_CHECK_OK(MessageToBuffer(server_def, ret));
+  return ret;
+}
+
 TFE_Context* TFE_CreateContextFromSession(TF_Session* session,
                                           TF_Status* status) {
   auto* opts = TFE_NewContextOptions();
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index a3ca847d96..950ad9aeed 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -131,6 +131,8 @@ TF_CAPI_EXPORT extern void TF_EnqueueNamedTensor(TF_Session* session,
                                                  int tensor_id,
                                                  TF_Tensor* tensor,
                                                  TF_Status* status);
+// Create a serialized tensorflow.ServerDef proto.
+TF_Buffer* TFE_GetServerDef(const char* text_proto, TF_Status* status);
 
 // TODO: remove this API in favor of the next one.
 TF_CAPI_EXPORT extern TFE_Context* TFE_NewContextFromSession(
diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc
index 30fcfd401d..c6effd3969 100644
--- a/tensorflow/c/c_api_experimental_test.cc
+++ b/tensorflow/c/c_api_experimental_test.cc
@@ -16,8 +16,10 @@ limitations under the License.
 #include "tensorflow/c/c_api_experimental.h"
 #include "tensorflow/c/c_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
 
 namespace tensorflow {
 namespace {
@@ -116,5 +118,49 @@ TEST(CAPI_EXPERIMENTAL, ImagenetIteratorGetNext) {
   TF_DeleteStatus(s);
 }
 
+TEST(CAPI_EXPERIMENTAL, GetServerDefTest) {
+  const string expected_text_proto(R"(cluster {
+  job {
+    name: "worker"
+    tasks {
+      key: 0
+      value: "tpuserver:0"
+    }
+    tasks {
+      key: 1
+      value: "localhost:1"
+    }
+  }
+}
+job_name: "worker"
+task_index: 1
+protocol: "grpc"
+)");
+
+  TF_Status* status = TF_NewStatus();
+  TF_Buffer* result = TFE_GetServerDef(expected_text_proto.c_str(), status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK);
+
+  ServerDef actual;
+  ASSERT_TRUE(actual.ParseFromArray(result->data, result->length));
+  string actual_text_proto;
+  tensorflow::protobuf::TextFormat::PrintToString(actual, &actual_text_proto);
+  EXPECT_EQ(expected_text_proto, actual_text_proto);
+
+  const string malformed_text_proto(R"(cluster {
+  job {
+    name: "worker")");
+  TF_Buffer* null_result =
+      TFE_GetServerDef(malformed_text_proto.c_str(), status);
+  EXPECT_NE(TF_GetCode(status), TF_OK);
+  EXPECT_TRUE(tensorflow::str_util::StrContains(
+      TF_Message(status), "Invalid text proto for ServerDef"));
+  EXPECT_EQ(null_result, nullptr);
+
+  // Cleanup
+  TF_DeleteBuffer(result);
+  TF_DeleteStatus(status);
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 3ab16ebce6a0a9ce20120c3c2dd1f1a8cf5b2ad8 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Wed, 26 Sep 2018 16:01:23 -0700
Subject: [PATCH 0765/1357] Add densenet to the examples_pip

PiperOrigin-RevId: 214685427
---
 tensorflow/contrib/eager/python/examples/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/eager/python/examples/BUILD b/tensorflow/contrib/eager/python/examples/BUILD
index 6f02c90368..97c299a911 100644
--- a/tensorflow/contrib/eager/python/examples/BUILD
+++ b/tensorflow/contrib/eager/python/examples/BUILD
@@ -6,6 +6,7 @@ package(default_visibility = ["//tensorflow:internal"])
 py_library(
     name = "examples_pip",
     deps = [
+        "//tensorflow/contrib/eager/python/examples/densenet",
         "//tensorflow/contrib/eager/python/examples/gan:mnist",
         "//tensorflow/contrib/eager/python/examples/l2hmc",
         "//tensorflow/contrib/eager/python/examples/l2hmc:neural_nets",
-- 
GitLab


From 3b9c747d71f30c6a59f6529f8475d7f56a86a7c5 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Wed, 26 Sep 2018 16:40:44 -0700
Subject: [PATCH 0766/1357] Extract Conv2D dimensions parsing and validation
 into helper functions.

PiperOrigin-RevId: 214691838
---
 tensorflow/core/kernels/conv_ops.cc | 321 ++++++++++++++++------------
 tensorflow/core/kernels/conv_ops.h  |  44 ++++
 2 files changed, 231 insertions(+), 134 deletions(-)

diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 717a9f40a9..78856c4a99 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -264,150 +264,198 @@ class LaunchXsmmConvOp<CPUDevice, float> {
 };
 #endif
 
+#define TF_REQUIRES(EXP, STATUS)                \
+  do {                                          \
+    if (!TF_PREDICT_TRUE(EXP)) return (STATUS); \
+  } while (false)
+
+Status InitConv2DParameters(const OpKernelConstruction* context,
+                            Conv2DParameters* params) {
+  TF_RETURN_IF_ERROR(context->GetAttr("dilations", &params->dilations));
+  TF_RETURN_IF_ERROR(context->GetAttr("strides", &params->strides));
+  TF_RETURN_IF_ERROR(context->GetAttr("padding", &params->padding));
+  string data_format_string;
+  TF_RETURN_IF_ERROR(context->GetAttr("data_format", &data_format_string));
+  TF_REQUIRES(FormatFromString(data_format_string, &params->data_format),
+              errors::InvalidArgument("Invalid data format"));
+
+  const auto& strides = params->strides;
+  const auto& dilations = params->dilations;
+  const auto& data_format = params->data_format;
+
+  TF_REQUIRES(dilations.size() == 4,
+              errors::InvalidArgument("Sliding window dilations field must "
+                                      "specify 4 dimensions"));
+  TF_REQUIRES(strides.size() == 4,
+              errors::InvalidArgument("Sliding window strides field must "
+                                      "specify 4 dimensions"));
+  const int64 stride_n = GetTensorDim(strides, data_format, 'N');
+  const int64 stride_c = GetTensorDim(strides, data_format, 'C');
+  const int64 stride_h = GetTensorDim(strides, data_format, 'H');
+  const int64 stride_w = GetTensorDim(strides, data_format, 'W');
+  TF_REQUIRES(
+      stride_n == 1 && stride_c == 1,
+      errors::InvalidArgument("Current implementation does not yet support "
+                              "strides in the batch and depth dimensions."));
+  TF_REQUIRES(stride_h > 0 && stride_w > 0,
+              errors::InvalidArgument(
+                  "Row and column strides should be larger than 0."));
+
+  const int64 dilation_n = GetTensorDim(dilations, data_format, 'N');
+  const int64 dilation_c = GetTensorDim(dilations, data_format, 'C');
+  const int64 dilation_h = GetTensorDim(dilations, data_format, 'H');
+  const int64 dilation_w = GetTensorDim(dilations, data_format, 'W');
+  TF_REQUIRES(
+      dilation_n == 1 && dilation_c == 1,
+      errors::InvalidArgument("Current implementation does not yet support "
+                              "dilations in the batch and depth dimensions."));
+  TF_REQUIRES(
+      dilation_h > 0 && dilation_w > 0,
+      errors::InvalidArgument("Dilated rates should be larger than 0."));
+
+  return Status::OK();
+}
+
+Status ComputeConv2DDimension(const Conv2DParameters& params,
+                              const Tensor& input, const Tensor& filter,
+                              Conv2DDimensions* dimensions) {
+  // Check that 2D convolution input and filter have exactly 4 dimensions.
+  TF_REQUIRES(input.dims() == 4,
+              errors::InvalidArgument("input must be 4-dimensional",
+                                      input.shape().DebugString()));
+  TF_REQUIRES(filter.dims() == 4,
+              errors::InvalidArgument("filter must be 4-dimensional: ",
+                                      filter.shape().DebugString()));
+  for (int i = 0; i < 3; i++) {
+    TF_REQUIRES(
+        FastBoundsCheck(filter.dim_size(i), std::numeric_limits<int>::max()),
+        errors::InvalidArgument("filter too large"));
+  }
+
+  // The last dimension for input is in_depth. Check that it is the same as the
+  // filter's in_depth or it is evenly divisible by filter's in_depth.
+  const int64 in_depth_raw = GetTensorDim(input, params.data_format, 'C');
+  const int64 patch_depth_raw = filter.dim_size(2);
+  TF_REQUIRES(FastBoundsCheck(in_depth_raw, std::numeric_limits<int>::max()),
+              errors::InvalidArgument("Input depth too large"));
+  TF_REQUIRES(FastBoundsCheck(patch_depth_raw, std::numeric_limits<int>::max()),
+              errors::InvalidArgument("Patch depth too large"));
+  const int in_depth = static_cast<int>(in_depth_raw);
+  const int patch_depth = static_cast<int>(patch_depth_raw);
+  TF_REQUIRES(in_depth % patch_depth == 0,
+              errors::InvalidArgument(
+                  "input depth must be evenly divisible by filter depth: ",
+                  in_depth, " vs ", patch_depth));
+
+  // The last dimension for filter is out_depth.
+  const int out_depth = static_cast<int>(filter.dim_size(3));
+
+  // The second dimension for input is rows/height.
+  // The first dimension for filter is rows/height.
+  const int64 input_rows_raw = GetTensorDim(input, params.data_format, 'H');
+  TF_REQUIRES(FastBoundsCheck(input_rows_raw, std::numeric_limits<int>::max()),
+              errors::InvalidArgument("Input rows too large"));
+  const int input_rows = static_cast<int>(input_rows_raw);
+  const int filter_rows = static_cast<int>(filter.dim_size(0));
+
+  // The third dimension for input is columns/width.
+  // The second dimension for filter is columns/width.
+  const int64 input_cols_raw = GetTensorDim(input, params.data_format, 'W');
+  TF_REQUIRES(FastBoundsCheck(input_cols_raw, std::numeric_limits<int>::max()),
+              errors::InvalidArgument("Input cols too large"));
+  const int input_cols = static_cast<int>(input_cols_raw);
+  const int filter_cols = static_cast<int>(filter.dim_size(1));
+
+  // The first dimension for input is batch.
+  const int64 batch_raw = GetTensorDim(input, params.data_format, 'N');
+  TF_REQUIRES(FastBoundsCheck(batch_raw, std::numeric_limits<int>::max()),
+              errors::InvalidArgument("batch is too large"));
+  const int batch = static_cast<int>(batch_raw);
+
+  // Take the stride and dilation from the second and third dimensions only (we
+  // do not support striding or dilation on the batch or depth dimension).
+  const int stride_rows = GetTensorDim(params.strides, params.data_format, 'H');
+  const int stride_cols = GetTensorDim(params.strides, params.data_format, 'W');
+  const int dilation_rows =
+      GetTensorDim(params.dilations, params.data_format, 'H');
+  const int dilation_cols =
+      GetTensorDim(params.dilations, params.data_format, 'W');
+
+  // Compute windowed output sizes for rows and columns.
+  int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeV2(
+      input_rows, filter_rows, dilation_rows, stride_rows, params.padding,
+      &out_rows, &pad_rows));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeV2(
+      input_cols, filter_cols, dilation_cols, stride_cols, params.padding,
+      &out_cols, &pad_cols));
+
+  dimensions->batch = batch;
+  dimensions->input_rows = input_rows;
+  dimensions->input_cols = input_cols;
+  dimensions->in_depth = in_depth;
+  dimensions->filter_rows = filter_rows;
+  dimensions->filter_cols = filter_cols;
+  dimensions->patch_depth = patch_depth;
+  dimensions->out_depth = out_depth;
+  dimensions->stride_rows = stride_rows;
+  dimensions->stride_cols = stride_cols;
+  dimensions->dilation_rows = dilation_rows;
+  dimensions->dilation_cols = dilation_cols;
+  dimensions->out_rows = out_rows;
+  dimensions->out_cols = out_cols;
+  dimensions->pad_rows = pad_rows;
+  dimensions->pad_cols = pad_cols;
+
+  return Status::OK();
+}
+
+#undef TF_REQUIRES
+
 template <typename Device, typename T>
 class Conv2DOp : public BinaryOp<T> {
  public:
   explicit Conv2DOp(OpKernelConstruction* context) : BinaryOp<T>(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, InitConv2DParameters(context, &params_));
+
     OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
     use_cudnn_ &= CanUseCudnn();
     cudnn_use_autotune_ = CudnnUseAutotune();
-    OP_REQUIRES(context, dilations_.size() == 4,
-                errors::InvalidArgument("Sliding window dilations field must "
-                                        "specify 4 dimensions"));
-    OP_REQUIRES(context, strides_.size() == 4,
-                errors::InvalidArgument("Sliding window strides field must "
-                                        "specify 4 dimensions"));
-    const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
-    const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
-    const int64 stride_h = GetTensorDim(strides_, data_format_, 'H');
-    const int64 stride_w = GetTensorDim(strides_, data_format_, 'W');
-    OP_REQUIRES(
-        context, stride_n == 1 && stride_c == 1,
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES(context, stride_h > 0 && stride_w > 0,
-                errors::InvalidArgument(
-                    "Row and column strides should be larger than 0."));
-
-    const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N');
-    const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C');
-    const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H');
-    const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W');
-    OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1,
-                errors::InvalidArgument(
-                    "Current implementation does not yet support "
-                    "dilations in the batch and depth dimensions."));
-    OP_REQUIRES(
-        context, dilation_h > 0 && dilation_w > 0,
-        errors::InvalidArgument("Dilated rates should be larger than 0."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
   void Compute(OpKernelContext* context) override {
     // Input tensor is of the following dimensions:
     // [ batch, in_rows, in_cols, in_depth ]
-
     const Tensor& input = context->input(0);
 
     // Input filter is of the following dimensions:
     // [ filter_rows, filter_cols, in_depth, out_depth]
     const Tensor& filter = context->input(1);
 
-    // For 2D convolution, there should be 4 dimensions.
-    OP_REQUIRES(context, input.dims() == 4,
-                errors::InvalidArgument("input must be 4-dimensional",
-                                        input.shape().DebugString()));
-    OP_REQUIRES(context, filter.dims() == 4,
-                errors::InvalidArgument("filter must be 4-dimensional: ",
-                                        filter.shape().DebugString()));
-
-    for (int i = 0; i < 3; i++) {
-      OP_REQUIRES(
-          context,
-          FastBoundsCheck(filter.dim_size(i), std::numeric_limits<int>::max()),
-          errors::InvalidArgument("filter too large"));
-    }
+    Conv2DDimensions dimensions;
+    OP_REQUIRES_OK(context,
+                   ComputeConv2DDimension(params_, input, filter, &dimensions));
 
-    // The last dimension for input is in_depth. It must be the same as the
-    // filter's in_depth or be evenly divisible by filter's in_depth.
-    const int64 in_depth = GetTensorDim(input, data_format_, 'C');
-    const int64 patch_depth = filter.dim_size(2);
-    OP_REQUIRES(context, in_depth % patch_depth == 0,
-                errors::InvalidArgument(
-                    "input depth must be evenly divisible by filter depth: ",
-                    in_depth, " vs ", patch_depth));
-
-    // The last dimension for filter is out_depth.
-    const int out_depth = static_cast<int>(filter.dim_size(3));
-
-    // The second dimension for input is rows/height.
-    // The first dimension for filter is rows/height.
-    const int64 input_rows_raw = GetTensorDim(input, data_format_, 'H');
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(input_rows_raw, std::numeric_limits<int>::max()),
-        errors::InvalidArgument("Input rows too large"));
-    const int input_rows = static_cast<int>(input_rows_raw);
-    const int filter_rows = static_cast<int>(filter.dim_size(0));
-
-    // The third dimension for input is columns/width.
-    // The second dimension for filter is columns/width.
-    const int64 input_cols_raw = GetTensorDim(input, data_format_, 'W');
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(input_cols_raw, std::numeric_limits<int>::max()),
-        errors::InvalidArgument("Input cols too large"));
-    const int input_cols = static_cast<int>(input_cols_raw);
-    const int filter_cols = static_cast<int>(filter.dim_size(1));
-
-    // The first dimension for input is batch.
-    const int64 batch_raw = GetTensorDim(input, data_format_, 'N');
-    OP_REQUIRES(context,
-                FastBoundsCheck(batch_raw, std::numeric_limits<int>::max()),
-                errors::InvalidArgument("batch is too large"));
-    const int batch = static_cast<int>(batch_raw);
-
-    // For now we take the stride and dilation from the second and third
-    // dimensions only (we do not support striding or dilation on the batch or
-    // depth dimension).
-    const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
-    const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
-
-    const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H');
-    const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W');
-
-    int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
-    OP_REQUIRES_OK(context, GetWindowedOutputSizeV2(
-                                input_rows, filter_rows, dilation_rows,
-                                stride_rows, padding_, &out_rows, &pad_rows));
-    OP_REQUIRES_OK(context, GetWindowedOutputSizeV2(
-                                input_cols, filter_cols, dilation_cols,
-                                stride_cols, padding_, &out_cols, &pad_cols));
-    TensorShape out_shape =
-        ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
+    TensorShape out_shape = ShapeFromFormat(
+        params_.data_format, dimensions.batch, dimensions.out_rows,
+        dimensions.out_cols, dimensions.out_depth);
 
     // Output tensor is of the following dimensions:
     // [ in_batch, out_rows, out_cols, out_depth ]
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
 
-    VLOG(2) << "Conv2D: in_depth = " << in_depth
-            << ", patch_depth = " << patch_depth
-            << ", input_cols = " << input_cols
-            << ", filter_cols = " << filter_cols
-            << ", input_rows = " << input_rows
-            << ", filter_rows = " << filter_rows
-            << ", stride_rows = " << stride_rows
-            << ", stride_cols = " << stride_cols
-            << ", dilation_rows = " << dilation_rows
-            << ", dilation_cols = " << dilation_cols
-            << ", out_depth = " << out_depth;
+    VLOG(2) << "Conv2D: in_depth = " << dimensions.in_depth
+            << ", patch_depth = " << dimensions.patch_depth
+            << ", input_cols = " << dimensions.input_cols
+            << ", filter_cols = " << dimensions.filter_cols
+            << ", input_rows = " << dimensions.input_rows
+            << ", filter_rows = " << dimensions.filter_rows
+            << ", stride_rows = " << dimensions.stride_rows
+            << ", stride_cols = " << dimensions.stride_cols
+            << ", dilation_rows = " << dimensions.dilation_rows
+            << ", dilation_cols = " << dimensions.dilation_cols
+            << ", out_depth = " << dimensions.out_depth;
 
     // If there is nothing to compute, return.
     if (out_shape.num_elements() == 0) {
@@ -416,36 +464,41 @@ class Conv2DOp : public BinaryOp<T> {
 
 #ifdef TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS
     if (LaunchXsmmConvOp<Device, T>::Run(
-            context, input, filter, batch, input_rows, input_cols, in_depth,
-            filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols,
-            out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols,
-            output, data_format_)) {
+            context, input, filter, dimensions.batch, dimensions.input_rows,
+            dimensions.input_cols, dimensions.in_depth, dimensions.filter_rows,
+            dimensions.filter_cols, dimensions.pad_rows, dimensions.pad_cols,
+            dimensions.out_rows, dimensions.out_cols, dimensions.out_depth,
+            dimensions.dilation_rows, dimensions.dilation_cols,
+            dimensions.stride_rows, dimensions.stride_cols, output,
+            params_.data_format)) {
       return;
     }
 #endif
 
     if (LaunchDeepConvOp<Device, T>::Run(
-            context, input, filter, batch, input_rows, input_cols, in_depth,
-            filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols,
-            out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols,
-            output, data_format_)) {
+            context, input, filter, dimensions.batch, dimensions.input_rows,
+            dimensions.input_cols, dimensions.in_depth, dimensions.filter_rows,
+            dimensions.filter_cols, dimensions.pad_rows, dimensions.pad_cols,
+            dimensions.out_rows, dimensions.out_cols, dimensions.out_depth,
+            dimensions.dilation_rows, dimensions.dilation_cols,
+            dimensions.stride_rows, dimensions.stride_cols, output,
+            params_.data_format)) {
       return;
     }
 
     launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter,
-              dilation_rows, dilation_cols, stride_rows, stride_cols, padding_,
-              output, data_format_);
+              dimensions.dilation_rows, dimensions.dilation_cols,
+              dimensions.stride_rows, dimensions.stride_cols, params_.padding,
+              output, params_.data_format);
   }
 
  private:
-  std::vector<int32> dilations_;
-  std::vector<int32> strides_;
+  Conv2DParameters params_;
   bool use_cudnn_;
-  Padding padding_;
-  TensorFormat data_format_;
-  LaunchConv2DOp<Device, T> launcher_;
   bool cudnn_use_autotune_;
 
+  LaunchConv2DOp<Device, T> launcher_;
+
   TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
 };
 
diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h
index adf4601b43..7ec878e0b2 100644
--- a/tensorflow/core/kernels/conv_ops.h
+++ b/tensorflow/core/kernels/conv_ops.h
@@ -66,6 +66,50 @@ struct Im2ColBufferResource : public ResourceBase {
   string DebugString() { return "Im2ColBufferResource"; }
 };
 
+// Convolution parameters specified by Op attributes.
+struct Conv2DParameters {
+  std::vector<int32> dilations;
+  std::vector<int32> strides;
+  Padding padding;
+  TensorFormat data_format;
+};
+
+// Convolution dimensions inferred from parameters, input and filter tensors.
+struct Conv2DDimensions {
+  int batch;
+  int input_rows;
+  int input_cols;
+  int in_depth;
+
+  int filter_rows;
+  int filter_cols;
+  int patch_depth;
+  int out_depth;
+
+  int stride_rows;
+  int stride_cols;
+
+  int dilation_rows;
+  int dilation_cols;
+
+  int64 out_rows;
+  int64 out_cols;
+  int64 pad_rows;
+  int64 pad_cols;
+};
+
+// Initializes and validates Conv2D parameters configured by OpKernel
+// attributes.
+Status InitConv2DParameters(const OpKernelConstruction* context,
+                            Conv2DParameters* params);
+
+// Computes and validates convolutions dimensions from Conv2D parameters. If
+// parameters are valid, dimensions will be updated with derived convolution
+// dimensions, otherwise error will be returned.
+Status ComputeConv2DDimension(const Conv2DParameters& params,
+                              const Tensor& input, const Tensor& filter,
+                              Conv2DDimensions* dimensions);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_CONV_OPS_H_
-- 
GitLab


From 69650fff2b0f267162c987f35e2747be033a7d80 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 16:49:48 -0700
Subject: [PATCH 0767/1357] Automated rollback of commit
 844074c2a8e61b744c3de2718e1c9ea7b1d2edc2

PiperOrigin-RevId: 214693201
---
 tensorflow/python/estimator/estimator.py      | 33 +------
 tensorflow/python/estimator/estimator_test.py | 94 -------------------
 2 files changed, 2 insertions(+), 125 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index fd62a79c84..eec64ad452 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1394,35 +1394,6 @@ class Estimator(object):
         # It is expected to have one CheckpointSaverHook. If multiple, we pick
         # up the first one to add listener.
         saver_hooks[0]._listeners.extend(saving_listeners)  # pylint: disable=protected-access
-
-    # Add summary hooks to worker 0 if we are running with a master, to ensure
-    # that summaries are written at correct intervals even with long-running
-    # evaluations.
-    save_summary_steps = self._config.save_summary_steps
-    log_step_count_steps = self._config.log_step_count_steps
-    if run_config.TaskType.MASTER in self._config.cluster_spec.jobs:
-      # Update config values to prevent the default hooks from being created on
-      # the master or other workers.
-      save_summary_steps = 0
-      log_step_count_steps = None
-
-      if (self._config.task_type == run_config.TaskType.WORKER and
-          self._config.task_id == 0):
-        if (self._config.save_summary_steps and
-            self._config.save_summary_steps > 0):
-          worker_hooks.append(
-              training.SummarySaverHook(
-                  save_steps=self._config.save_summary_steps,
-                  output_dir=self._config.model_dir,
-                  scaffold=estimator_spec.scaffold))
-
-        if (self._config.log_step_count_steps and
-            self._config.log_step_count_steps > 0):
-          worker_hooks.append(
-              training.StepCounterHook(
-                  every_n_steps=self._config.log_step_count_steps,
-                  output_dir=self._config.model_dir))
-
     with training.MonitoredTrainingSession(
         master=self._config.master,
         is_chief=self._config.is_chief,
@@ -1432,9 +1403,9 @@ class Estimator(object):
         chief_only_hooks=(
             tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)),
         save_checkpoint_secs=0,  # Saving is handled by a hook.
-        save_summaries_steps=save_summary_steps,
+        save_summaries_steps=self._config.save_summary_steps,
         config=self._session_config,
-        log_step_count_steps=log_step_count_steps) as mon_sess:
+        log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
       loss = None
       while not mon_sess.should_stop():
         _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index 5962086aad..1ed5e30b0e 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import functools
 import glob
-import json
 import os
 import tempfile
 
@@ -970,99 +969,6 @@ class EstimatorTrainTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'train_and_evaluate'):
       est.train(dummy_input_fn, steps=1)
 
-  def test_master_distributed_hooks(self):
-    tf_config = json.dumps({
-        'cluster': {
-            run_config.TaskType.PS: ['localhost:1234'],
-            run_config.TaskType.WORKER: ['localhost:1235'],
-            run_config.TaskType.MASTER: ['localhost:1236']
-        },
-        'task': {
-            'type': run_config.TaskType.MASTER,
-            'index': 0
-        }
-    })
-    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
-      est = estimator.Estimator(
-          model_fn=model_fn_global_step_incrementer,
-          config=run_config.RunConfig())
-
-    with test.mock.patch.object(training,
-                                'MonitoredTrainingSession') as mock_sess:
-      est.train(dummy_input_fn, steps=1)
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.StepCounterHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
-      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
-
-  def test_master_distributed_hooks_for_worker_0(self):
-    tf_config = json.dumps({
-        'cluster': {
-            run_config.TaskType.PS: ['localhost:1234'],
-            run_config.TaskType.WORKER: ['localhost:1235'],
-            run_config.TaskType.MASTER: ['localhost:1236']
-        },
-        'task': {
-            'type': run_config.TaskType.WORKER,
-            'index': 0
-        }
-    })
-    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
-      est = estimator.Estimator(
-          model_fn=model_fn_global_step_incrementer,
-          config=run_config.RunConfig())
-
-    with test.mock.patch.object(training,
-                                'MonitoredTrainingSession') as mock_sess:
-      est.train(dummy_input_fn, steps=1)
-      self.assertTrue(
-          any(
-              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertTrue(
-          any(
-              isinstance(hook, basic_session_run_hooks.StepCounterHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
-      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
-
-  def test_master_distributed_hooks_for_worker_nonzero(self):
-    tf_config = json.dumps({
-        'cluster': {
-            run_config.TaskType.PS: ['localhost:1234'],
-            run_config.TaskType.WORKER: ['localhost:1235', 'localhost:1237'],
-            run_config.TaskType.MASTER: ['localhost:1236']
-        },
-        'task': {
-            'type': run_config.TaskType.WORKER,
-            'index': 1
-        }
-    })
-    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
-      est = estimator.Estimator(
-          model_fn=model_fn_global_step_incrementer,
-          config=run_config.RunConfig())
-
-    with test.mock.patch.object(training,
-                                'MonitoredTrainingSession') as mock_sess:
-      est.train(dummy_input_fn, steps=1)
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.StepCounterHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
-      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
-
 
 def _model_fn_with_eval_metric_ops(features, labels, mode, params):
   _, _ = features, labels
-- 
GitLab


From e8ffb444411f20c435ac72a1beb866e39686fba0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 17:28:43 -0700
Subject: [PATCH 0768/1357] internal change only

PiperOrigin-RevId: 214698827
---
 tensorflow/contrib/tpu/profiler/op_profile.proto | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto
index 68cf510e71..b25d06dda8 100644
--- a/tensorflow/contrib/tpu/profiler/op_profile.proto
+++ b/tensorflow/contrib/tpu/profiler/op_profile.proto
@@ -18,13 +18,15 @@ message Profile {
 message Node {
   string name = 1;      // Semantics depend on contents.
   Metrics metrics = 2;  // May be omitted e.g. for fused instructions.
-  repeated Node children = 3;
+  repeated Node children = 3;  // Subjected to pruning.
 
   // Details about what this node represents.
   oneof contents {
     InstructionCategory category = 4;
     XLAInstruction xla = 5;
   }
+
+  int32 num_children = 6;  // Total number of children before pruning.
   // A category of XLA instructions.
   // name is a descriptive string, like "data formatting".
   message InstructionCategory {
-- 
GitLab


From bfda65cc70526c919c57ef8321dd282e463ed8a3 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 26 Sep 2018 17:33:52 -0700
Subject: [PATCH 0769/1357] Removing _PerDeviceGenerator and
 MultiDeviceIterator from contrib now that they're moved to core. I overlooked
 this in the CL to move to core.

PiperOrigin-RevId: 214699544
---
 .../data/python/ops/prefetching_ops.py        | 189 ------------------
 1 file changed, 189 deletions(-)

diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py
index f994425304..58395879e6 100644
--- a/tensorflow/contrib/data/python/ops/prefetching_ops.py
+++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py
@@ -31,7 +31,6 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gen_dataset_ops as core_gen_dataset_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -526,191 +525,3 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
   @property
   def output_classes(self):
     return self._input_dataset.output_classes
-
-
-class _PerDeviceGenerator(dataset_ops.Dataset):
-  """A `dummy` generator dataset."""
-
-  def __init__(self, shard_num, multi_device_iterator_resource, incarnation_id,
-               source_device, target_device, output_shapes, output_types,
-               output_classes):
-    self._target_device = target_device
-    self._output_types = output_types
-    self._output_shapes = output_shapes
-    self._output_classes = output_classes
-    self._flat_output_shapes = nest.flatten(
-        sparse.as_dense_shapes(self._output_shapes, self._output_classes))
-    self._flat_output_types = nest.flatten(
-        sparse.as_dense_types(self._output_types, self._output_classes))
-
-    multi_device_iterator_string_handle = (
-        gen_dataset_ops.multi_device_iterator_to_string_handle(
-            multi_device_iterator_resource))
-
-    @function.Defun()
-    def _init_func():
-      return multi_device_iterator_string_handle
-
-    @function.Defun()
-    def _remote_init_func():
-      return functional_ops.remote_call(
-          target=source_device,
-          args=_init_func.captured_inputs,
-          Tout=[dtypes.string],
-          f=_init_func)
-
-    self._init_func = _remote_init_func
-    self._init_captured_args = _remote_init_func.captured_inputs
-
-    @function.Defun(dtypes.string)
-    def _next_func(string_handle):
-      multi_device_iterator = (
-          gen_dataset_ops.multi_device_iterator_from_string_handle(
-              string_handle=string_handle,
-              output_types=self._flat_output_types,
-              output_shapes=self._flat_output_shapes))
-      return gen_dataset_ops.multi_device_iterator_get_next_from_shard(
-          multi_device_iterator=multi_device_iterator,
-          shard_num=shard_num,
-          incarnation_id=incarnation_id,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-
-    @function.Defun(dtypes.string)
-    def _remote_next_func(string_handle):
-      return functional_ops.remote_call(
-          target=source_device,
-          args=[string_handle] + _next_func.captured_inputs,
-          Tout=self._flat_output_types,
-          f=_next_func)
-
-    self._next_func = _remote_next_func
-    self._next_captured_args = _remote_next_func.captured_inputs
-
-    @function.Defun(dtypes.string)
-    def _finalize_func(unused_string_handle):
-      return array_ops.constant(0, dtypes.int64)
-
-    @function.Defun(dtypes.string)
-    def _remote_finalize_func(string_handle):
-      return functional_ops.remote_call(
-          target=source_device,
-          args=[string_handle] + _finalize_func.captured_inputs,
-          Tout=[dtypes.int64],
-          f=_finalize_func)
-
-    self._finalize_func = _remote_finalize_func
-    self._finalize_captured_args = _remote_finalize_func.captured_inputs
-
-  def _as_variant_tensor(self):
-    with ops.device(self._target_device):
-      return core_gen_dataset_ops.generator_dataset(
-          self._init_captured_args,
-          self._next_captured_args,
-          self._finalize_captured_args,
-          init_func=self._init_func,
-          next_func=self._next_func,
-          finalize_func=self._finalize_func,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-
-  def _inputs(self):
-    # TODO(b/116506223): Determine which datasets should be used as inputs here.
-    return []
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-
-class MultiDeviceIterator(object):
-  """An iterator over multiple devices."""
-
-  def __init__(self,
-               dataset,
-               devices,
-               max_buffer_size=1,
-               prefetch_buffer_size=1,
-               source_device="/cpu:0"):
-    """Constructs a MultiDeviceIterator.
-
-    Args:
-      dataset: The input dataset to be iterated over.
-      devices: The list of devices to fetch data to.
-      max_buffer_size: Maximum size of the host side per device buffer to keep.
-      prefetch_buffer_size: if > 1, then we setup a buffer on each device
-        to prefetch into.
-      source_device: The host device to place the `dataset` on.
-    """
-    self._dataset = dataset
-    self._devices = devices
-    self._source_device = source_device
-    self._source_device_tensor = ops.convert_to_tensor(source_device)
-
-    self._flat_output_shapes = nest.flatten(
-        sparse.as_dense_shapes(self._dataset.output_shapes,
-                               self._dataset.output_classes))
-    self._flat_output_types = nest.flatten(
-        sparse.as_dense_types(self._dataset.output_types,
-                              self._dataset.output_classes))
-
-    # Create the MultiDeviceIterator.
-    with ops.device(self._source_device):
-      self._multi_device_iterator_resource = (
-          gen_dataset_ops.multi_device_iterator(
-              devices=self._devices,
-              shared_name="",
-              container="",
-              output_types=self._flat_output_types,
-              output_shapes=self._flat_output_shapes))
-
-      # The incarnation ID is used to ensure consistency between the per-device
-      # iterators and the multi-device iterator.
-      self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
-          self._dataset._as_variant_tensor(),  # pylint: disable=protected-access
-          self._multi_device_iterator_resource,
-          max_buffer_size=max_buffer_size)
-
-    # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to
-    # initialize the device side of the pipeline. This would allow the
-    # MultiDeviceIterator to choose, for example, to move some transformations
-    # into the device side from its input. It might be useful in rewriting.
-    # Create the per device iterators.
-    self._device_iterators = []
-    i = 0
-    for device in self._devices:
-      ds = _PerDeviceGenerator(
-          i, self._multi_device_iterator_resource, self._incarnation_id,
-          self._source_device_tensor, device, self._dataset.output_shapes,
-          self._dataset.output_types, self._dataset.output_classes)
-      if prefetch_buffer_size > 0:
-        ds = ds.prefetch(prefetch_buffer_size)
-      with ops.device(device):
-        self._device_iterators.append(ds.make_initializable_iterator())
-      i += 1
-
-    device_iterator_initializers = [
-        iterator.initializer for iterator in self._device_iterators
-    ]
-    self._initializer = control_flow_ops.group(*device_iterator_initializers)
-
-  def get_next(self):
-    result = []
-    i = 0
-    for device in self._devices:
-      with ops.device(device):
-        result.append(self._device_iterators[i].get_next())
-      i += 1
-    return result
-
-  @property
-  def initializer(self):
-    return self._initializer
-- 
GitLab


From 7b88cabfec45c9e04ab3d9cf1c2411c6dce4c694 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 17:42:47 -0700
Subject: [PATCH 0770/1357] Add xlogy and xdivy op.

PiperOrigin-RevId: 214700693
---
 .../compiler/tf2xla/kernels/binary_ops.cc     | 18 ++++
 tensorflow/contrib/makefile/tf_op_files.txt   |  2 +
 .../core/api_def/base_api/api_def_Xdivy.pbtxt |  4 +
 .../core/api_def/base_api/api_def_Xlogy.pbtxt |  4 +
 .../api_def/python_api/api_def_Xdivy.pbtxt    |  6 ++
 .../api_def/python_api/api_def_Xlogy.pbtxt    |  6 ++
 tensorflow/core/kernels/BUILD                 |  2 +
 .../core/kernels/cwise_op_gpu_xdivy.cu.cc     | 26 ++++++
 .../core/kernels/cwise_op_gpu_xlogy.cu.cc     | 26 ++++++
 tensorflow/core/kernels/cwise_op_xdivy.cc     | 38 ++++++++
 tensorflow/core/kernels/cwise_op_xlogy.cc     | 41 +++++++++
 tensorflow/core/kernels/cwise_ops.h           | 45 ++++++++++
 tensorflow/core/ops/math_grad.cc              | 34 +++++++
 tensorflow/core/ops/math_grad_test.cc         | 40 +++++++++
 tensorflow/core/ops/math_ops.cc               | 14 +++
 tensorflow/python/ops/math_grad.py            | 34 +++++++
 tensorflow/python/ops/math_grad_test.py       | 88 +++++++++++++++++++
 tensorflow/python/ops/math_ops_test.py        | 71 +++++++++++++++
 .../tools/api/golden/v1/tensorflow.math.pbtxt |  8 ++
 .../tools/api/golden/v2/tensorflow.math.pbtxt |  8 ++
 20 files changed, 515 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_Xdivy.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_Xlogy.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_Xdivy.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_Xlogy.pbtxt
 create mode 100644 tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc
 create mode 100644 tensorflow/core/kernels/cwise_op_gpu_xlogy.cu.cc
 create mode 100644 tensorflow/core/kernels/cwise_op_xdivy.cc
 create mode 100644 tensorflow/core/kernels/cwise_op_xlogy.cc

diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index 66676452d0..a988d3c33e 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -103,6 +103,24 @@ static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
 XLA_MAKE_BINARY(FloorDiv,
                 FloorDivImpl(b, input_type(0), lhs, rhs, broadcast_helper));
 
+static xla::XlaOp XlogyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
+                            xla::XlaOp y, const BCast& broadcast_helper) {
+  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  auto zero = XlaHelpers::Zero(b, dtype);
+  auto is_zero = xla::Eq(x, zero);
+  return xla::Select(is_zero, zero, xla::Mul(x, xla::Log(y)));
+}
+XLA_MAKE_BINARY(Xlogy, XlogyImpl(b, input_type(0), lhs, rhs, broadcast_helper));
+
+static xla::XlaOp XdivyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
+                            xla::XlaOp y, const BCast& broadcast_helper) {
+  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  auto zero = XlaHelpers::Zero(b, dtype);
+  auto is_zero = xla::Eq(x, zero);
+  return xla::Select(is_zero, zero, xla::Div(x, y));
+}
+XLA_MAKE_BINARY(Xdivy, XdivyImpl(b, input_type(0), lhs, rhs, broadcast_helper));
+
 // Implementation of FloorMod. Pseudo-code:
 // T trunc_mod = std::fmod(x, y);
 // return (x < T(0)) == (y < T(0)) ? trunc_mod : std::fmod(trunc_mod + y, y);
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index f81a90809a..91af933cff 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -91,6 +91,8 @@ tensorflow/core/kernels/cwise_op_square.cc
 tensorflow/core/kernels/cwise_op_squared_difference.cc
 tensorflow/core/kernels/cwise_op_sub.cc
 tensorflow/core/kernels/cwise_op_tanh.cc
+tensorflow/core/kernels/cwise_op_xdivy.cc
+tensorflow/core/kernels/cwise_op_xlogy.cc
 tensorflow/core/kernels/cwise_ops_common.cc
 tensorflow/core/kernels/data_format_ops.cc
 tensorflow/core/kernels/decode_bmp_op.cc
diff --git a/tensorflow/core/api_def/base_api/api_def_Xdivy.pbtxt b/tensorflow/core/api_def/base_api/api_def_Xdivy.pbtxt
new file mode 100644
index 0000000000..ca107abc6b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_Xdivy.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Xdivy"
+  summary: "Returns 0 if x == 0, and x / y otherwise, elementwise."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_Xlogy.pbtxt b/tensorflow/core/api_def/base_api/api_def_Xlogy.pbtxt
new file mode 100644
index 0000000000..da625f7836
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_Xlogy.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Xlogy"
+  summary: "Returns 0 if x == 0, and x * log(y) otherwise, elementwise."
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_Xdivy.pbtxt b/tensorflow/core/api_def/python_api/api_def_Xdivy.pbtxt
new file mode 100644
index 0000000000..984442ba2b
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Xdivy.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Xdivy"
+  endpoint {
+    name: "math.xdivy"
+  }
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_Xlogy.pbtxt b/tensorflow/core/api_def/python_api/api_def_Xlogy.pbtxt
new file mode 100644
index 0000000000..b4a5299256
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Xlogy.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Xlogy"
+  endpoint {
+    name: "math.xlogy"
+  }
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index ed5aff58b8..b08562d7d1 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5254,6 +5254,8 @@ filegroup(
         "cwise_op_squared_difference.cc",
         "cwise_op_sub.cc",
         "cwise_op_tanh.cc",
+        "cwise_op_xlogy.cc",
+        "cwise_op_xdivy.cc",
         "data_format_ops.cc",
         "decode_wav_op.cc",
         "deep_conv2d.cc",
diff --git a/tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc
new file mode 100644
index 0000000000..e4b21a66c6
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc
@@ -0,0 +1,26 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+
+namespace tensorflow {
+namespace functor {
+DEFINE_BINARY5(xdivy, Eigen::half, float, double, complex64, complex128);
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_gpu_xlogy.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_xlogy.cu.cc
new file mode 100644
index 0000000000..1e1b5a426e
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_gpu_xlogy.cu.cc
@@ -0,0 +1,26 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+
+namespace tensorflow {
+namespace functor {
+DEFINE_BINARY5(xlogy, Eigen::half, float, double, complex64, complex128);
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_xdivy.cc b/tensorflow/core/kernels/cwise_op_xdivy.cc
new file mode 100644
index 0000000000..6a6aec5e86
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_xdivy.cc
@@ -0,0 +1,38 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+namespace tensorflow {
+REGISTER5(BinaryOp, CPU, "Xdivy", functor::xdivy, float, Eigen::half, double,
+          complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                 \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("Xdivy").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
+      BinaryOp<SYCLDevice, functor::xdivy<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+REGISTER_SYCL_KERNEL(double);
+#undef REGISTER_SYCL_KERNEL
+
+#endif  // TENSORFLOW_USE_SYCL
+
+#if GOOGLE_CUDA
+REGISTER5(BinaryOp, GPU, "Xdivy", functor::xdivy, float, Eigen::half, double,
+          complex64, complex128);
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_xlogy.cc b/tensorflow/core/kernels/cwise_op_xlogy.cc
new file mode 100644
index 0000000000..e71a9109b2
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_xlogy.cc
@@ -0,0 +1,41 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+namespace tensorflow {
+REGISTER5(BinaryOp, CPU, "Xlogy", functor::xlogy, float, Eigen::half, double,
+          complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                 \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("Xlogy").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
+      BinaryOp<SYCLDevice, functor::xlogy<TYPE>>);
+REGISTER_SYCL_KERNEL(Eigen::half);
+REGISTER_SYCL_KERNEL(float);
+REGISTER_SYCL_KERNEL(double);
+REGISTER_SYCL_KERNEL(complex64);
+REGISTER_SYCL_KERNEL(complex128);
+#undef REGISTER_SYCL_KERNEL
+
+#endif  // TENSORFLOW_USE_SYCL
+
+#if GOOGLE_CUDA
+REGISTER5(BinaryOp, GPU, "Xlogy", functor::xlogy, float, Eigen::half, double,
+          complex64, complex128);
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 22eb66e979..66ba827a90 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -471,6 +471,45 @@ struct functor_traits<bitwise_xor_op<Scalar>> {
   enum { Cost = Eigen::NumTraits<Scalar>::AddCost, PacketAccess = true };
 };
 
+// TODO(srvasude): Add packet versions of this operation.
+template <typename Scalar>
+struct xlogy_op {
+  EIGEN_EMPTY_STRUCT_CTOR(xlogy_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
+  operator()(const Scalar& x, const Scalar& y) const {
+    if (x == Scalar(0.)) {
+      return Scalar(0.);
+    }
+    return x * numext::log(y);
+  }
+};
+
+template <typename Scalar>
+struct functor_traits<xlogy_op<Scalar>> {
+  enum {
+    Cost = (sizeof(Scalar) == 4 ? 40 : 85) + Eigen::NumTraits<Scalar>::MulCost,
+    PacketAccess = false
+  };
+};
+
+template <typename Scalar>
+// TODO(srvasude): Add packet versions of this operation.
+struct xdivy_op {
+  EIGEN_EMPTY_STRUCT_CTOR(xdivy_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
+  operator()(const Scalar& x, const Scalar& y) const {
+    if (x == Scalar(0.)) {
+      return Scalar(0.);
+    }
+    return x / y;
+  }
+};
+
+template <typename Scalar>
+struct functor_traits<xdivy_op<Scalar>> {
+  enum { Cost = Eigen::NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+
 }  // end namespace internal
 }  // end namespace Eigen
 
@@ -829,6 +868,12 @@ struct squared_difference
                   T, Eigen::internal::scalar_square_op<T>,
                   Eigen::internal::scalar_difference_op<T>>> {};
 
+template <typename T>
+struct xdivy : base<T, Eigen::internal::xdivy_op<T>> {};
+
+template <typename T>
+struct xlogy : base<T, Eigen::internal::xlogy_op<T>> {};
+
 template <typename T>
 struct less : base<T, Eigen::internal::less<T>, bool> {};
 
diff --git a/tensorflow/core/ops/math_grad.cc b/tensorflow/core/ops/math_grad.cc
index 07f876cb90..55dcc50325 100644
--- a/tensorflow/core/ops/math_grad.cc
+++ b/tensorflow/core/ops/math_grad.cc
@@ -549,6 +549,40 @@ Status PowGrad(const AttrSlice& attrs, FunctionDef* g) {
 }
 REGISTER_OP_GRADIENT("Pow", PowGrad);
 
+Status XlogyGrad(const AttrSlice& attrs, FunctionDef* g) {
+  // clang-format off
+  return GradForBinaryCwise(g, {
+      {{"zeros"}, "ZerosLike", {"x"}},
+      {{"is_x_zero"}, "NotEqual", {"x", "zeros"}},
+      {{"is_zero_cast"}, "Cast", {"is_x_zero"},
+        {{"SrcT", DT_BOOL}, {"DstT", "$T"}}},
+      {{"safe_logy"}, "Xlogy", {"is_zero_cast", "y"}},
+      {{"xlogygrad"}, "Xdivy", {"x", "y"}},
+      {{"gx"}, "Mul", {"safe_logy", "dz"}},
+      {{"gy"}, "Mul", {"xlogygrad", "dz"}},
+  });
+  // clang-format on
+}
+REGISTER_OP_GRADIENT("Xlogy", XlogyGrad);
+
+Status XdivyGrad(const AttrSlice& attrs, FunctionDef* g) {
+  // clang-format off
+  return GradForBinaryCwise(g, {
+      {{"zeros"}, "ZerosLike", {"x"}},
+      {{"is_x_zero"}, "NotEqual", {"x", "zeros"}},
+      {{"is_zero_cast"}, "Cast", {"is_x_zero"},
+        {{"SrcT", DT_BOOL}, {"DstT", "$T"}}},
+      {{"safe_divy"}, "Xdivy", {"is_zero_cast", "y"}},
+      {{"y2"}, "Square", {"y"}},
+      {{"negy2"}, "Neg", {"y2"}},
+      {{"xdivygrad"}, "Xdivy", {"x", "negy2"}},
+      {{"gx"}, "Mul", {"safe_divy", "dz"}},
+      {{"gy"}, "Mul", {"xdivygrad", "dz"}},
+  });
+  // clang-format on
+}
+REGISTER_OP_GRADIENT("Xdivy", XdivyGrad);
+
 Status MaximumMinimumGradHelper(const string& comparator,
                                 const AttrSlice& attrs, FunctionDef* g) {
   // clang-format off
diff --git a/tensorflow/core/ops/math_grad_test.cc b/tensorflow/core/ops/math_grad_test.cc
index 5ee79809ac..9fc6b34147 100644
--- a/tensorflow/core/ops/math_grad_test.cc
+++ b/tensorflow/core/ops/math_grad_test.cc
@@ -909,6 +909,46 @@ TEST_F(MathGradTest, ComplexPow) {
 }
 #endif  // TENSORFLOW_USE_SYCL
 
+TEST_F(MathGradTest, Xlogy) {
+  auto x = test::AsTensor<float>({0.f, 0.f, 2.f, 3.f, 4.f, 5.f},
+                                 TensorShape({2, 3}));
+  auto y = test::AsTensor<float>({.5f, 2.f}, TensorShape({2, 1}));
+  Tensor dx;
+  Tensor dy;
+  auto g = [](float x, float y) -> float { return x == 0. ? 0. : std::log(y); };
+  auto h = [](float x, float y) -> float { return x == 0. ? 0. : x / y; };
+  SymGrad("Xlogy", x, y, &dx, &dy);
+  test::ExpectClose(
+      dx, test::AsTensor<float>({g(0.f, .5f), g(0.f, 0.f), g(2.f, .5f),
+                                 g(3.f, 2.f), g(4.f, 2.f), g(5.f, 2.f)},
+                                TensorShape({2, 3})));
+  test::ExpectClose(
+      dy, test::AsTensor<float>({h(0.f, .5f) + h(0.f, 0.f) + h(2.f, .5f),
+                                 h(3.f, 2.f) + h(4.f, 2.f) + h(5.f, 2.f)},
+                                TensorShape({2, 1})));
+}
+
+TEST_F(MathGradTest, Xdivy) {
+  auto x = test::AsTensor<float>({0.f, 0.f, 2.f, 3.f, 4.f, 5.f},
+                                 TensorShape({2, 3}));
+  auto y = test::AsTensor<float>({.5f, 2.f}, TensorShape({2, 1}));
+  Tensor dx;
+  Tensor dy;
+  auto g = [](float x, float y) -> float { return x == 0. ? 0. : 1 / y; };
+  auto h = [](float x, float y) -> float {
+    return x == 0. ? 0. : -x / (y * y);
+  };
+  SymGrad("Xdivy", x, y, &dx, &dy);
+  test::ExpectClose(
+      dx, test::AsTensor<float>({g(0.f, .5f), g(0.f, 0.f), g(2.f, .5f),
+                                 g(3.f, 2.f), g(4.f, 2.f), g(5.f, 2.f)},
+                                TensorShape({2, 3})));
+  test::ExpectClose(
+      dy, test::AsTensor<float>({h(0.f, .5f) + h(0.f, 0.f) + h(2.f, .5f),
+                                 h(3.f, 2.f) + h(4.f, 2.f) + h(5.f, 2.f)},
+                                TensorShape({2, 1})));
+}
+
 TEST_F(MathGradTest, Maximum) {
   auto x = test::AsTensor<float>({-3.f, -2.f, -1.f, 1.f, 2.f, 3.f},
                                  TensorShape({2, 3}));
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 717263a9b0..3eff728f03 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -429,6 +429,20 @@ Returns (x - y)(x - y) element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
+REGISTER_OP("Xlogy")
+    .Input("x: T")
+    .Input("y: T")
+    .Output("z: T")
+    .Attr("T: {half, float, double, complex64, complex128}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+
+REGISTER_OP("Xdivy")
+    .Input("x: T")
+    .Input("y: T")
+    .Output("z: T")
+    .Attr("T: {half, float, double, complex64, complex128}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+
 #undef BINARY_FEWER
 #undef BINARY_MORE
 
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 8e11c4bce1..35278d9680 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -516,6 +516,40 @@ def _Log1pGrad(op, grad):
     return grad * math_ops.reciprocal(1 + x)
 
 
+@ops.RegisterGradient("Xlogy")
+def _XLogyGrad(op, grad):
+  """Returns gradient of xlogy(x, y) with respect to x and y."""
+  x = op.inputs[0]
+  y = op.inputs[1]
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
+  with ops.control_dependencies([grad]):
+    not_zero_x = math_ops.cast(
+        math_ops.not_equal(x, math_ops.cast(0., dtype=x.dtype)), dtype=x.dtype)
+    partial_x = gen_math_ops.xlogy(not_zero_x, y)
+    partial_y = gen_math_ops.xdivy(x, y)
+    return (array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx),
+            array_ops.reshape(math_ops.reduce_sum(partial_y * grad, ry), sy))
+
+
+@ops.RegisterGradient("Xdivy")
+def _XDivyGrad(op, grad):
+  """Returns gradient of xdivy(x, y) with respect to x and y."""
+  x = op.inputs[0]
+  y = op.inputs[1]
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
+  with ops.control_dependencies([grad]):
+    not_zero_x = math_ops.cast(
+        math_ops.not_equal(x, math_ops.cast(0., dtype=x.dtype)), dtype=x.dtype)
+    partial_x = gen_math_ops.xdivy(not_zero_x, y)
+    partial_y = gen_math_ops.xdivy(math_ops.negative(x), y**2)
+    return (array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx),
+            array_ops.reshape(math_ops.reduce_sum(partial_y * grad, ry), sy))
+
+
 @ops.RegisterGradient("Sinh")
 def _SinhGrad(op, grad):
   """Returns grad * cosh(x)."""
diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py
index 7110e0958c..9cfb050942 100644
--- a/tensorflow/python/ops/math_grad_test.py
+++ b/tensorflow/python/ops/math_grad_test.py
@@ -256,5 +256,93 @@ class DivNoNanGradientTest(test.TestCase):
       self.assertAllClose(dy.eval(), np.zeros(y.shape.as_list()))
 
 
+class XlogyTest(test.TestCase):
+
+  def _xlogy_gradients(self, x, y):
+    xlogy_xgrad = self.evaluate(gradients.gradients(math_ops.xlogy(x, y), x)[0])
+    xlogy_ygrad = self.evaluate(gradients.gradients(math_ops.xlogy(x, y), y)[0])
+    return xlogy_xgrad, xlogy_ygrad
+
+  def testNonZeroValuesGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0.1, dtype=dtype)
+      y = constant_op.constant(3.1, dtype=dtype)
+      xlogy_xgrad, xlogy_ygrad = self._xlogy_gradients(x, y)
+      xlogy_expected_xgrad = self.evaluate(math_ops.log(y))
+      xlogy_expected_ygrad = self.evaluate(x / y)
+      self.assertAllClose(xlogy_expected_xgrad, xlogy_xgrad)
+      self.assertAllClose(xlogy_expected_ygrad, xlogy_ygrad)
+
+  def testZeroXGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0., dtype=dtype)
+      y = constant_op.constant(3.1, dtype=dtype)
+      xlogy_xgrad, xlogy_ygrad = self._xlogy_gradients(x, y)
+      zero = self.evaluate(x)
+      self.assertAllClose(zero, xlogy_xgrad)
+      self.assertAllClose(zero, xlogy_ygrad)
+
+  def testZeroYGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0.1, dtype=dtype)
+      y = constant_op.constant(0., dtype=dtype)
+      xlogy_xgrad, xlogy_ygrad = self._xlogy_gradients(x, y)
+      self.assertAllClose(-np.inf, xlogy_xgrad)
+      self.assertAllClose(np.inf, xlogy_ygrad)
+
+  def testZeroXYGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0., dtype=dtype)
+      y = constant_op.constant(0., dtype=dtype)
+      xlogy_xgrad, xlogy_ygrad = self._xlogy_gradients(x, y)
+      zero = self.evaluate(x)
+      self.assertAllClose(zero, xlogy_xgrad)
+      self.assertAllClose(zero, xlogy_ygrad)
+
+
+class XdivyTest(test.TestCase):
+
+  def _xdivy_gradients(self, x, y):
+    xdivy_xgrad = self.evaluate(gradients.gradients(math_ops.xdivy(x, y), x)[0])
+    xdivy_ygrad = self.evaluate(gradients.gradients(math_ops.xdivy(x, y), y)[0])
+    return xdivy_xgrad, xdivy_ygrad
+
+  def testNonZeroValuesGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0.1, dtype=dtype)
+      y = constant_op.constant(3.1, dtype=dtype)
+      xdivy_xgrad, xdivy_ygrad = self._xdivy_gradients(x, y)
+      xdivy_expected_xgrad = self.evaluate(1 / y)
+      xdivy_expected_ygrad = self.evaluate(-x / y**2)
+      self.assertAllClose(xdivy_expected_xgrad, xdivy_xgrad)
+      self.assertAllClose(xdivy_expected_ygrad, xdivy_ygrad)
+
+  def testZeroXGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0., dtype=dtype)
+      y = constant_op.constant(3.1, dtype=dtype)
+      xdivy_xgrad, xdivy_ygrad = self._xdivy_gradients(x, y)
+      zero = self.evaluate(x)
+      self.assertAllClose(zero, xdivy_xgrad)
+      self.assertAllClose(zero, xdivy_ygrad)
+
+  def testZeroYGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0.1, dtype=dtype)
+      y = constant_op.constant(0., dtype=dtype)
+      xdivy_xgrad, xdivy_ygrad = self._xdivy_gradients(x, y)
+      self.assertAllClose(np.inf, xdivy_xgrad)
+      self.assertAllClose(-np.inf, xdivy_ygrad)
+
+  def testZeroXYGrad(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(0., dtype=dtype)
+      y = constant_op.constant(0., dtype=dtype)
+      xdivy_xgrad, xdivy_ygrad = self._xdivy_gradients(x, y)
+      zero = self.evaluate(x)
+      self.assertAllClose(zero, xdivy_xgrad)
+      self.assertAllClose(zero, xdivy_ygrad)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 1b01d1d37f..f051850d92 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -21,6 +21,7 @@ import numpy as np
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
@@ -488,5 +489,75 @@ class DivNoNanTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(tf_result, np_result)
 
 
+class XlogyTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def testXlogyNoZero(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant([[0.1, 0.2, 3.5], [-2., -5., 30.]], dtype=dtype)
+      y = constant_op.constant([[0.1, 0.2, 3.5], [3.1, 4., 2.]], dtype=dtype)
+      with self.cached_session(use_gpu=True):
+        xlogy = self.evaluate(math_ops.xlogy(x, y))
+        xtimeslogy = self.evaluate(x * math_ops.log(y))
+        self.assertAllClose(xlogy, xtimeslogy)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testXlogyWithZero(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(np.zeros((2, 3)), dtype=dtype)
+      y = constant_op.constant([[0.1, 0.2, 3.5], [0., 1., 2.]], dtype=dtype)
+      with self.cached_session(use_gpu=True):
+        xlogy_tf_np = self.evaluate(math_ops.xlogy(x, y))
+        zeros_np = self.evaluate(array_ops.zeros_like(y))
+        self.assertAllClose(xlogy_tf_np, zeros_np)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testXlogyWithZeroBroadcast(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant([[0.], [1.]], dtype=dtype)
+      y = constant_op.constant([[0.1, 0.2, 3.5], [0., 1., 2.]], dtype=dtype)
+      with self.cached_session(use_gpu=True):
+        xlogy_tf_np = self.evaluate(math_ops.xlogy(x, y))
+        zeros_np = self.evaluate(array_ops.zeros_like(y[0]))
+        xtimes_logy = self.evaluate(math_ops.log(y[1]))
+        self.assertAllClose(zeros_np, xlogy_tf_np[0])
+        self.assertAllClose(xtimes_logy, xlogy_tf_np[1])
+
+
+class XdivyTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def testXdivyNoZero(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant([[0.1, 0.2, 3.5], [-2., -5., 30.]], dtype=dtype)
+      y = constant_op.constant([[0.1, 0.2, 3.5], [3.1, 4., 2.]], dtype=dtype)
+      with self.cached_session(use_gpu=True):
+        xdivy = self.evaluate(math_ops.xdivy(x, y))
+        x_over_y = self.evaluate(x / y)
+        self.assertAllClose(xdivy, x_over_y)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testXdivyWithZero(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant(np.zeros((2, 3)), dtype=dtype)
+      y = constant_op.constant([[0.1, 0.2, 3.5], [0., 1., 2.]], dtype=dtype)
+      with self.cached_session(use_gpu=True):
+        xdivy_tf_np = self.evaluate(math_ops.xdivy(x, y))
+        zeros_np = self.evaluate(array_ops.zeros_like(y))
+        self.assertAllClose(xdivy_tf_np, zeros_np)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testXdivyWithZeroBroadcast(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      x = constant_op.constant([[0.], [1.]], dtype=dtype)
+      y = constant_op.constant([[0.1, 0.2, 3.5], [0., 1., 2.]], dtype=dtype)
+      with self.cached_session(use_gpu=True):
+        xdivy_tf_np = self.evaluate(math_ops.xdivy(x, y))
+        zeros_np = self.evaluate(array_ops.zeros_like(y[0]))
+        x_over_y = self.evaluate(1 / y[1])
+        self.assertAllClose(zeros_np, xdivy_tf_np[0])
+        self.assertAllClose(x_over_y, xdivy_tf_np[1])
+
+
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
index a308c76ebc..72856466ec 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
@@ -232,6 +232,14 @@ tf_module {
     name: "unsorted_segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "xdivy"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "xlogy"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "zeta"
     argspec: "args=[\'x\', \'q\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
index a308c76ebc..72856466ec 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
@@ -232,6 +232,14 @@ tf_module {
     name: "unsorted_segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "xdivy"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "xlogy"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "zeta"
     argspec: "args=[\'x\', \'q\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From 595aad6373d8d6be8e7ec20f688a1b458c6bfab0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 17:55:15 -0700
Subject: [PATCH 0771/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 214701926
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 56 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 56 +++++++++++++++++++
 2 files changed, 112 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 0c64408892..cac4259356 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -76620,6 +76620,62 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "Xdivy"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Xlogy"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "ZerosLike"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index af7d6cbe42..e173c2d072 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -36760,6 +36760,62 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "Xdivy"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Xlogy"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "ZerosLike"
   input_arg {
-- 
GitLab


From c3af9dc70ae6c5df811c91c29da432469cb471fc Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 26 Sep 2018 17:57:29 -0700
Subject: [PATCH 0772/1357] [TF:XLA] Bump open source abseil revision to
 e291c279e458761e77a69b09b129d3d1e81f1e80

PiperOrigin-RevId: 214702169
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 108be8533b..9128a1aafe 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -106,11 +106,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz",
         ],
-        sha256 = "84043ed402d2a2a6ba4cdddb7e85118b1158fd81fe4ac3a14adc343d054c1e2e",
-        strip_prefix = "abseil-cpp-e01d95528ea2137a4a27a88d1f57c6cb260aafed",
+        sha256 = "278a1af58b633be886fe81bf7061dca6b5fea99566850d1319fffdaa1a061792",
+        strip_prefix = "abseil-cpp-e291c279e458761e77a69b09b129d3d1e81f1e80",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
     )
 
-- 
GitLab


From 5d61748f4e9998c9d2017bd01864b8fcb6d2127a Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Wed, 26 Sep 2018 17:58:14 -0700
Subject: [PATCH 0773/1357] Fixed the bug which slows the TPU traning.

PiperOrigin-RevId: 214702243
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index f67e0e6aca..448676c95e 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -817,7 +817,8 @@ def _inject_tpu_inputs_for_dataset(tpu_assignment, mode,
   return input_specs, get_next_ops
 
 
-def _inject_tpu_inputs_for_infeed(tpu_assignment, mode, input_tensors, inputs):
+def _inject_tpu_inputs_for_infeed(tpu_assignment, mode,
+                                  core_id_place_holder, input_tensors, inputs):
   """Append core information to the set of inputs."""
   # This is used during compilation to identify the current TPU core and enable
   # concatenation operations across cores.
@@ -825,8 +826,6 @@ def _inject_tpu_inputs_for_infeed(tpu_assignment, mode, input_tensors, inputs):
     return input_tensors, inputs
 
   # Puts a place holder in input spec.
-  core_id_place_holder = array_ops.placeholder(
-      dtype=dtypes.int32, shape=[1], name='core_id')
   input_tensors = [core_id_place_holder] + input_tensors
 
   # Now fill the core id. For `num_cores` = 2, `batch_size` = 8, we fill the
@@ -874,6 +873,10 @@ class TPUFunction(object):
     self._compilation_cache = {}
     self._cloned_model = None
     self._cloned_optimizer = None
+    # Create a placeholder for the TPU core ID. Cache the placeholder to avoid
+    # modifying the graph for every batch.
+    self._core_id_place_holder = array_ops.placeholder(
+        dtype=dtypes.int32, shape=[1], name='core_id')
 
   def _specialize_model(self, input_specs, infeed_manager):
     """Specialize `self.model` (a Keras model) for the given input shapes."""
@@ -1141,7 +1144,8 @@ class TPUFunction(object):
     inputs = inputs[:len(input_tensors)]
     input_tensors, inputs = (
         _inject_tpu_inputs_for_infeed(
-            self._tpu_assignment, self.execution_mode, input_tensors, inputs))
+            self._tpu_assignment, self.execution_mode,
+            self._core_id_place_holder, input_tensors, inputs))
     return input_tensors, inputs
 
   def _process_outputs(self, outfeed_outputs):
-- 
GitLab


From 0c573ae93b3013b91d8d2493a1daed56d11ccc98 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Wed, 26 Sep 2018 18:03:34 -0700
Subject: [PATCH 0774/1357] Skip SymbolicGradientOp when doing constant folding
 in control flow functionalization.

If we want to evaluate SymbolicGradient op in constant folding, we need to construct Device object and attach it to FunctionLibraryRuntime. In graph rewriting pass, we do not have Device object created yet; it will only be created in XlaCompiler.

PiperOrigin-RevId: 214702943
---
 .../tf2xla/functionalize_control_flow.cc         | 16 +++++++++++++---
 .../core/common_runtime/graph_optimizer.cc       |  4 +++-
 tensorflow/core/common_runtime/graph_optimizer.h |  5 ++++-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 98b333a467..2d45507796 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -109,7 +109,8 @@ Status FunctionalizeControlFlowForFunction(
     auto associated_functions = iter.second;
     for (auto& associated_function : associated_functions) {
       string name = associated_function.func_name();
-      string canonicalized_name = Canonicalize(name, AttrSlice(&attrs));
+      string canonicalized_name =
+          Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
       if (iter != canonicalized_name_to_new_name->end()) {
@@ -119,7 +120,8 @@ Status FunctionalizeControlFlowForFunction(
       } else {
         new_name = fld->UniqueFunctionName(absl::StrCat(name, "_f15n_"));
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
-            name, new_name, attrs, fld, flr, canonicalized_name_to_new_name));
+            name, new_name, associated_function.attrs(), fld, flr,
+            canonicalized_name_to_new_name));
         (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
       }
       // Notice that if "n" is a function call, RewriteAssociatedFunction() will
@@ -152,9 +154,17 @@ Status FunctionalizeControlFlowForFunction(
   opts.set_do_function_inlining(true);
   opts.set_do_constant_folding(true);
   GraphOptimizer optimizer(opts);
+  auto cf_consider_fn = [](const Node* n) {
+    // Skip SymbolicGradient op when doing constant folding.
+    // Enabling SymbolicGradient op in constant folding requires
+    // flr->device() to be non-null, and here we have not constructed
+    // proper Device object yet (it will be constructed in XlaCompiler).
+    return n->type_string() != FunctionLibraryDefinition::kGradientOp;
+  };
   optimizer.Optimize(flr, flr->env(),
                      /*device=*/nullptr, &optimized_graph,
-                     /*shape_map=*/nullptr);
+                     /*shape_map=*/nullptr, /*cse_consider_fn=*/nullptr,
+                     cf_consider_fn);
 
   // Functionalize the function body.
   if (VLOG_IS_ON(4)) {
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index 96ecfb41d4..37a979a8f1 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -38,7 +38,8 @@ void GraphOptimizer::Optimize(
     std::unique_ptr<Graph>* graph,
     const std::unordered_map<string, std::vector<PartialTensorShape>>*
         shape_map,
-    const std::function<bool(const Node*)>& cse_consider_fn) {
+    const std::function<bool(const Node*)>& cse_consider_fn,
+    const std::function<bool(const Node*)>& cf_consider_fn) {
   Graph* g = graph->get();
   DumpGraph("Initial", g);
 
@@ -62,6 +63,7 @@ void GraphOptimizer::Optimize(
     if (opts_.do_constant_folding()) {
       ConstantFoldingOptions cf_opts;
       cf_opts.shape_map = shape_map;
+      cf_opts.consider = cf_consider_fn;
       if (opts_.max_folded_constant_in_bytes() > 0) {
         cf_opts.max_constant_size_in_bytes =
             opts_.max_folded_constant_in_bytes();
diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h
index 80246281cd..789cc56942 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.h
+++ b/tensorflow/core/common_runtime/graph_optimizer.h
@@ -45,12 +45,15 @@ class GraphOptimizer {
   //
   // If cse_consider_fn is not null then only nodes for which cse_consider_fn
   // returns true will be considered for CSE.
+  // If cf_consider_fn is not null then only nodes for which cf_consider_fn
+  // returns true will be considered for CF.
   void Optimize(
       FunctionLibraryRuntime* runtime, Env* env, Device* device,
       std::unique_ptr<Graph>* graph,
       const std::unordered_map<string, std::vector<PartialTensorShape>>*
           shape_map,
-      const std::function<bool(const Node*)>& cse_consider_fn = nullptr);
+      const std::function<bool(const Node*)>& cse_consider_fn = nullptr,
+      const std::function<bool(const Node*)>& cf_consider_fn = nullptr);
 
   const OptimizerOptions& options() { return opts_; }
 
-- 
GitLab


From 3fdee6b8f1199042046ea7851898b17fb3a4c0a8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 18:18:38 -0700
Subject: [PATCH 0775/1357] Refactor build deps by making :framework depend on
 :feature_util to not use the same source dependency twice.

PiperOrigin-RevId: 214704620
---
 tensorflow/core/BUILD | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d85cb379bb..8bf53958b6 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -707,14 +707,11 @@ cc_library(
 cc_library(
     name = "feature_util",
     srcs = ["example/feature_util.cc"],
-    hdrs = [
-        "example/feature_util.h",
-        "platform/types.h",
-    ],
+    hdrs = ["example/feature_util.h"],
     visibility = ["//visibility:public"],
     deps = [
         ":core_stringpiece",
-        ":platform_protobuf",
+        ":lib_proto_parsing",
         ":protos_all_cc",
     ],
 )
@@ -2561,6 +2558,7 @@ tf_cuda_library(
             "**/*test*",
             "**/*main.cc",
             "example/example_parser_configuration.*",
+            "example/feature_util.cc",
             "util/reporter.cc",
             "framework/fake_input.*",
             "framework/op_gen_lib.*",
@@ -2590,6 +2588,7 @@ tf_cuda_library(
         ],
     }),
     deps = [
+        ":feature_util",
         ":lib",
         ":lib_internal",
         ":protos_all_proto_text",
-- 
GitLab


From 78ee51bb8c41d616c01674e405784a51e4ec98a5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 18:21:58 -0700
Subject: [PATCH 0776/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214704902

---
 tensorflow/go/op/wrappers.go | 1628 +++++++++++++++++-----------------
 1 file changed, 814 insertions(+), 814 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index bb934ca050..065c7e3011 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -2562,92 +2562,6 @@ func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// EditDistanceAttr is an optional argument to EditDistance.
-type EditDistanceAttr func(optionalAttr)
-
-// EditDistanceNormalize sets the optional normalize attribute to value.
-//
-// value: boolean (if true, edit distances are normalized by length of truth).
-//
-// The output is:
-// If not specified, defaults to true
-func EditDistanceNormalize(value bool) EditDistanceAttr {
-	return func(m optionalAttr) {
-		m["normalize"] = value
-	}
-}
-
-// Computes the (possibly normalized) Levenshtein Edit Distance.
-//
-// The inputs are variable-length sequences provided by SparseTensors
-//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
-// and
-//   (truth_indices, truth_values, truth_shape).
-//
-// The inputs are:
-//
-// Arguments:
-//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
-// This is an N x R int64 matrix.
-//	hypothesis_values: The values of the hypothesis list SparseTensor.
-// This is an N-length vector.
-//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
-// This is an R-length vector.
-//	truth_indices: The indices of the truth list SparseTensor.
-// This is an M x R int64 matrix.
-//	truth_values: The values of the truth list SparseTensor.
-// This is an M-length vector.
-//	truth_shape: truth indices, vector.
-//
-// Returns A dense float tensor with rank R - 1.
-//
-// For the example input:
-//
-//     // hypothesis represents a 2x1 matrix with variable-length values:
-//     //   (0,0) = ["a"]
-//     //   (1,0) = ["b"]
-//     hypothesis_indices = [[0, 0, 0],
-//                           [1, 0, 0]]
-//     hypothesis_values = ["a", "b"]
-//     hypothesis_shape = [2, 1, 1]
-//
-//     // truth represents a 2x2 matrix with variable-length values:
-//     //   (0,0) = []
-//     //   (0,1) = ["a"]
-//     //   (1,0) = ["b", "c"]
-//     //   (1,1) = ["a"]
-//     truth_indices = [[0, 1, 0],
-//                      [1, 0, 0],
-//                      [1, 0, 1],
-//                      [1, 1, 0]]
-//     truth_values = ["a", "b", "c", "a"]
-//     truth_shape = [2, 2, 2]
-//     normalize = true
-//
-// The output will be:
-//
-//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
-//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
-//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
-func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EditDistance",
-		Input: []tf.Input{
-			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Reverses specific dimensions of a tensor.
 //
 // Given a `tensor`, and a `bool` tensor `dims` representing the dimensions
@@ -3986,61 +3900,163 @@ func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candida
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the minimum along segments of a tensor.
+// FixedUnigramCandidateSamplerAttr is an optional argument to FixedUnigramCandidateSampler.
+type FixedUnigramCandidateSamplerAttr func(optionalAttr)
+
+// FixedUnigramCandidateSamplerVocabFile sets the optional vocab_file attribute to value.
 //
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#segmentation)
-// for an explanation of segments.
+// value: Each valid line in this file (which should have a CSV-like format)
+// corresponds to a valid word ID. IDs are in sequential order, starting from
+// num_reserved_ids. The last entry in each line is expected to be a value
+// corresponding to the count or relative probability. Exactly one of vocab_file
+// and unigrams needs to be passed to this op.
+// If not specified, defaults to ""
+func FixedUnigramCandidateSamplerVocabFile(value string) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["vocab_file"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerDistortion sets the optional distortion attribute to value.
 //
-// This operator is similar to the unsorted segment sum operator found
-// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-// Instead of computing the sum over segments, it computes the minimum such that:
+// value: The distortion is used to skew the unigram probability distribution.
+// Each weight is first raised to the distortion's power before adding to the
+// internal unigram distribution. As a result, distortion = 1.0 gives regular
+// unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
+// a uniform distribution.
+// If not specified, defaults to 1
+func FixedUnigramCandidateSamplerDistortion(value float32) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["distortion"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerNumReservedIds sets the optional num_reserved_ids attribute to value.
 //
-// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such
-// that `segment_ids[j...] == i`.
+// value: Optionally some reserved IDs can be added in the range [0,
+// ..., num_reserved_ids) by the users. One use case is that a special unknown
+// word token is used as ID 0. These IDs will have a sampling probability of 0.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerNumReservedIds(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["num_reserved_ids"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerNumShards sets the optional num_shards attribute to value.
 //
-// If the minimum is empty for a given segment ID `i`, it outputs the largest
-// possible value for the specific numeric type,
-// `output[i] = numeric_limits<T>::max()`.
+// value: A sampler can be used to sample from a subset of the original range
+// in order to speed up the whole computation through parallelism. This parameter
+// (together with 'shard') indicates the number of partitions that are being
+// used in the overall computation.
+// If not specified, defaults to 1
 //
-// If the given segment ID `i` is negative, then the corresponding value is
-// dropped, and will not be included in the result.
+// REQUIRES: value >= 1
+func FixedUnigramCandidateSamplerNumShards(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["num_shards"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerShard sets the optional shard attribute to value.
 //
-// Arguments:
+// value: A sampler can be used to sample from a subset of the original range
+// in order to speed up the whole computation through parallelism. This parameter
+// (together with 'num_shards') indicates the particular partition number of a
+// sampler op, when partitioning is being used.
+// If not specified, defaults to 0
 //
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+// REQUIRES: value >= 0
+func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["shard"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerUnigrams sets the optional unigrams attribute to value.
 //
+// value: A list of unigram counts or probabilities, one per ID in sequential
+// order. Exactly one of vocab_file and unigrams should be passed to this op.
+// If not specified, defaults to <>
+func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["unigrams"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerSeed(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentMin",
-		Input: []tf.Input{
-			data, segment_ids, num_segments,
-		},
+}
+
+// FixedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerSeed2(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes rectified linear 6: `min(max(features, 0), 6)`.
-func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
+// Generates labels for candidate sampling with a learned unigram distribution.
+//
+// A unigram sampler could use a fixed unigram distribution read from a
+// file or passed in as an in-memory array instead of building up the distribution
+// from data on the fly. There is also an option to skew the distribution by
+// applying a distortion power to the weights.
+//
+// The vocabulary file should be in CSV-like format, with the last field
+// being the weight associated with the word.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...FixedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Relu6",
+		Type: "FixedUnigramCandidateSampler",
 		Input: []tf.Input{
-			features,
+			true_classes,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
 // Computes the sum along segments of a tensor.
@@ -4441,6 +4457,162 @@ func SlideDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output,
 	return op.Output(0)
 }
 
+// EditDistanceAttr is an optional argument to EditDistance.
+type EditDistanceAttr func(optionalAttr)
+
+// EditDistanceNormalize sets the optional normalize attribute to value.
+//
+// value: boolean (if true, edit distances are normalized by length of truth).
+//
+// The output is:
+// If not specified, defaults to true
+func EditDistanceNormalize(value bool) EditDistanceAttr {
+	return func(m optionalAttr) {
+		m["normalize"] = value
+	}
+}
+
+// Computes the (possibly normalized) Levenshtein Edit Distance.
+//
+// The inputs are variable-length sequences provided by SparseTensors
+//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
+// and
+//   (truth_indices, truth_values, truth_shape).
+//
+// The inputs are:
+//
+// Arguments:
+//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
+// This is an N x R int64 matrix.
+//	hypothesis_values: The values of the hypothesis list SparseTensor.
+// This is an N-length vector.
+//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
+// This is an R-length vector.
+//	truth_indices: The indices of the truth list SparseTensor.
+// This is an M x R int64 matrix.
+//	truth_values: The values of the truth list SparseTensor.
+// This is an M-length vector.
+//	truth_shape: truth indices, vector.
+//
+// Returns A dense float tensor with rank R - 1.
+//
+// For the example input:
+//
+//     // hypothesis represents a 2x1 matrix with variable-length values:
+//     //   (0,0) = ["a"]
+//     //   (1,0) = ["b"]
+//     hypothesis_indices = [[0, 0, 0],
+//                           [1, 0, 0]]
+//     hypothesis_values = ["a", "b"]
+//     hypothesis_shape = [2, 1, 1]
+//
+//     // truth represents a 2x2 matrix with variable-length values:
+//     //   (0,0) = []
+//     //   (0,1) = ["a"]
+//     //   (1,0) = ["b", "c"]
+//     //   (1,1) = ["a"]
+//     truth_indices = [[0, 1, 0],
+//                      [1, 0, 0],
+//                      [1, 0, 1],
+//                      [1, 1, 0]]
+//     truth_values = ["a", "b", "c", "a"]
+//     truth_shape = [2, 2, 2]
+//     normalize = true
+//
+// The output will be:
+//
+//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
+//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
+//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
+func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EditDistance",
+		Input: []tf.Input{
+			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
+type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the input.
+//
+// Arguments:
+//	input_sizes: An integer vector representing the shape of `input`, based
+// on `data_format`.  For example, if `data_format` is 'NHWC' then
+//  `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape according to `data_format`.  For example, if
+// `data_format` is 'NHWC', output shape is `[batch, in_height,
+// in_width, in_channels]`.  Gradient w.r.t. the input of the
+// convolution.
+func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNativeBackpropInput",
+		Input: []tf.Input{
+			input_sizes, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // ApproximateEqualAttr is an optional argument to ApproximateEqual.
 type ApproximateEqualAttr func(optionalAttr)
 
@@ -4609,33 +4781,90 @@ func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values t
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Returns x + y element-wise.
+// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
+type AllCandidateSamplerAttr func(optionalAttr)
+
+// AllCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a learned unigram distribution.
+//
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to produce.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "AddV2",
+		Type: "AllCandidateSampler",
 		Input: []tf.Input{
-			x, y,
+			true_classes,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes exponential of x element-wise.  \\(y = e^x\\).
-func Exp(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns x + y element-wise.
+//
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Exp",
+		Type: "AddV2",
 		Input: []tf.Input{
-			x,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
@@ -4702,180 +4931,67 @@ func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 
 // Convert the quantized 'input' tensor into a lower-precision 'output', using the
 //
-// output range specified with 'requested_output_min' and 'requested_output_max'.
-//
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-//
-// Arguments:
-//
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	requested_output_min: The float value that the minimum quantized output value represents.
-//	requested_output_max: The float value that the maximum quantized output value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
-//
-// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
-func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "Requantize",
-		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// PreventGradientAttr is an optional argument to PreventGradient.
-type PreventGradientAttr func(optionalAttr)
-
-// PreventGradientMessage sets the optional message attribute to value.
-//
-// value: Will be printed in the error when anyone tries to differentiate
-// this operation.
-// If not specified, defaults to ""
-func PreventGradientMessage(value string) PreventGradientAttr {
-	return func(m optionalAttr) {
-		m["message"] = value
-	}
-}
-
-// An identity op that triggers an error if a gradient is requested.
-//
-// When executed in a graph, this op outputs its input tensor as-is.
-//
-// When building ops to compute gradients, the TensorFlow gradient system
-// will return an error when trying to lookup the gradient of this op,
-// because no gradient must ever be registered for this function.  This
-// op exists to prevent subtle bugs from silently returning unimplemented
-// gradients in some corner cases.
-//
-// Arguments:
-//	input: any tensor.
-//
-// Returns the same input tensor.
-func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PreventGradient",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes asin of x element-wise.
-func Asin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Asin",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the maximum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// This operator is similar to the unsorted segment sum operator found
-// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-// Instead of computing the sum over segments, it computes the maximum such that:
-//
-// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such
-// that `segment_ids[j...] == i`.
-//
-// If the maximum is empty for a given segment ID `i`, it outputs the smallest
-// possible value for the specific numeric type,
-// `output[i] = numeric_limits<T>::lowest()`.
-//
-// If the given segment ID `i` is negative, then the corresponding value is
-// dropped, and will not be included in the result.
+// output range specified with 'requested_output_min' and 'requested_output_max'.
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
-// </div>
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
 //
 // Arguments:
 //
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.END
-//   }
-//   out_arg {
-//     name: "output"
-//     description: <<END
-// Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	requested_output_min: The float value that the minimum quantized output value represents.
+//	requested_output_max: The float value that the maximum quantized output value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
 //
-func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
+func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"out_type": out_type}
 	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentMax",
+		Type: "Requantize",
 		Input: []tf.Input{
-			data, segment_ids, num_segments,
+			input, input_min, input_max, requested_output_min, requested_output_max,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// NthElementAttr is an optional argument to NthElement.
-type NthElementAttr func(optionalAttr)
+// PreventGradientAttr is an optional argument to PreventGradient.
+type PreventGradientAttr func(optionalAttr)
 
-// NthElementReverse sets the optional reverse attribute to value.
+// PreventGradientMessage sets the optional message attribute to value.
 //
-// value: When set to True, find the nth-largest value in the vector and vice
-// versa.
-// If not specified, defaults to false
-func NthElementReverse(value bool) NthElementAttr {
+// value: Will be printed in the error when anyone tries to differentiate
+// this operation.
+// If not specified, defaults to ""
+func PreventGradientMessage(value string) PreventGradientAttr {
 	return func(m optionalAttr) {
-		m["reverse"] = value
+		m["message"] = value
 	}
 }
 
-// Finds values of the `n`-th order statistic for the last dimension.
-//
-// If the input is a vector (rank-1), finds the entries which is the nth-smallest
-// value in the vector and outputs their values as scalar tensor.
+// An identity op that triggers an error if a gradient is requested.
 //
-// For matrices (resp. higher rank input), computes the entries which is the
-// nth-smallest value in each row (resp. vector along the last dimension). Thus,
+// When executed in a graph, this op outputs its input tensor as-is.
 //
-//     values.shape = input.shape[:-1]
+// When building ops to compute gradients, the TensorFlow gradient system
+// will return an error when trying to lookup the gradient of this op,
+// because no gradient must ever be registered for this function.  This
+// op exists to prevent subtle bugs from silently returning unimplemented
+// gradients in some corner cases.
 //
 // Arguments:
-//	input: 1-D or higher with last dimension at least `n+1`.
-//	n: 0-D. Position of sorted vector to select along the last dimension (along
-// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
+//	input: any tensor.
 //
-// Returns The `n`-th order statistic along each last dimensional slice.
-func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
+// Returns the same input tensor.
+func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4884,9 +5000,9 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "NthElement",
+		Type: "PreventGradient",
 		Input: []tf.Input{
-			input, n,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -4894,6 +5010,21 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme
 	return op.Output(0)
 }
 
+// Computes asin of x element-wise.
+func Asin(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Asin",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the sum along sparse segments of a tensor.
 //
 // Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
@@ -6797,6 +6928,63 @@ func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.
 	return components
 }
 
+// Computes rectified linear 6: `min(max(features, 0), 6)`.
+func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu6",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the minimum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#segmentation)
+// for an explanation of segments.
+//
+// This operator is similar to the unsorted segment sum operator found
+// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
+// Instead of computing the sum over segments, it computes the minimum such that:
+//
+// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such
+// that `segment_ids[j...] == i`.
+//
+// If the minimum is empty for a given segment ID `i`, it outputs the largest
+// possible value for the specific numeric type,
+// `output[i] = numeric_limits<T>::max()`.
+//
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentMin",
+		Input: []tf.Input{
+			data, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes rectified linear gradients for a Relu operation.
 //
 // Arguments:
@@ -7770,6 +7958,44 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt
 	return op.Output(0)
 }
 
+// Bucketizes 'input' based on 'boundaries'.
+//
+// For example, if the inputs are
+//     boundaries = [0, 10, 100]
+//     input = [[-5, 10000]
+//              [150,   10]
+//              [5,    100]]
+//
+// then the output will be
+//     output = [[0, 3]
+//               [3, 2]
+//               [1, 3]]
+//
+// Arguments:
+//	input: Any shape of Tensor contains with int or float type.
+//	boundaries: A sorted list of floats gives the boundary of the buckets.
+//
+// Returns Same shape with 'input', each value of input replaced with bucket index.
+//
+// @compatibility(numpy)
+// Equivalent to np.digitize.
+// @end_compatibility
+func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"boundaries": boundaries}
+	opspec := tf.OpSpec{
+		Type: "Bucketize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2.
 type FusedBatchNormV2Attr func(optionalAttr)
 
@@ -8851,23 +9077,87 @@ func OneHotAxis(value int64) OneHotAttr {
 //
 // Then output is `[2 x 2 x 3]`:
 //
-//     ```output =
-//       [
-//         [1.0, 0.0, 0.0]  // one_hot(0)
-//         [0.0, 0.0, 1.0]  // one_hot(2)
-//       ][
-//         [0.0, 1.0, 0.0]  // one_hot(1)
-//         [0.0, 0.0, 0.0]  // one_hot(-1)
-//       ]```
+//     ```output =
+//       [
+//         [1.0, 0.0, 0.0]  // one_hot(0)
+//         [0.0, 0.0, 1.0]  // one_hot(2)
+//       ][
+//         [0.0, 1.0, 0.0]  // one_hot(1)
+//         [0.0, 0.0, 0.0]  // one_hot(-1)
+//       ]```
+//
+// Arguments:
+//	indices: A tensor of indices.
+//	depth: A scalar defining the depth of the one hot dimension.
+//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
+//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
+//
+// Returns The one-hot tensor.
+func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OneHot",
+		Input: []tf.Input{
+			indices, depth, on_value, off_value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes exponential of x element-wise.  \\(y = e^x\\).
+func Exp(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Exp",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// NthElementAttr is an optional argument to NthElement.
+type NthElementAttr func(optionalAttr)
+
+// NthElementReverse sets the optional reverse attribute to value.
+//
+// value: When set to True, find the nth-largest value in the vector and vice
+// versa.
+// If not specified, defaults to false
+func NthElementReverse(value bool) NthElementAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Finds values of the `n`-th order statistic for the last dimension.
+//
+// If the input is a vector (rank-1), finds the entries which is the nth-smallest
+// value in the vector and outputs their values as scalar tensor.
+//
+// For matrices (resp. higher rank input), computes the entries which is the
+// nth-smallest value in each row (resp. vector along the last dimension). Thus,
+//
+//     values.shape = input.shape[:-1]
 //
 // Arguments:
-//	indices: A tensor of indices.
-//	depth: A scalar defining the depth of the one hot dimension.
-//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
-//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
+//	input: 1-D or higher with last dimension at least `n+1`.
+//	n: 0-D. Position of sorted vector to select along the last dimension (along
+// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
 //
-// Returns The one-hot tensor.
-func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
+// Returns The `n`-th order statistic along each last dimensional slice.
+func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -8876,9 +9166,9 @@ func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OneHot",
+		Type: "NthElement",
 		Input: []tf.Input{
-			indices, depth, on_value, off_value,
+			input, n,
 		},
 		Attrs: attrs,
 	}
@@ -8886,6 +9176,55 @@ func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output
 	return op.Output(0)
 }
 
+// Computes the maximum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// This operator is similar to the unsorted segment sum operator found
+// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
+// Instead of computing the sum over segments, it computes the maximum such that:
+//
+// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such
+// that `segment_ids[j...] == i`.
+//
+// If the maximum is empty for a given segment ID `i`, it outputs the smallest
+// possible value for the specific numeric type,
+// `output[i] = numeric_limits<T>::lowest()`.
+//
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.END
+//   }
+//   out_arg {
+//     name: "output"
+//     description: <<END
+// Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+//
+func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentMax",
+		Input: []tf.Input{
+			data, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Transforms a vector of brain.Example protos (as strings) into typed tensors.
 //
 // Arguments:
@@ -10027,6 +10366,118 @@ func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, li
 	return scope.AddOperation(opspec)
 }
 
+// Calculates gains for each feature and returns the best possible split information for the feature.
+//
+// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+//
+// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+//
+// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+//
+// The length of output lists are all of the same length, `num_features`.
+// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature.
+//
+// Arguments:
+//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
+//	stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
+//	l1: l1 regularization factor on leaf weights, per instance based.
+//	l2: l2 regularization factor on leaf weights, per instance based.
+//	tree_complexity: adjustment to the gain, per leaf based.
+//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
+//	max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors.
+//
+// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
+func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"max_splits": max_splits}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCalculateBestGainsPerFeature",
+		Input: []tf.Input{
+			node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list
+}
+
+// EncodePngAttr is an optional argument to EncodePng.
+type EncodePngAttr func(optionalAttr)
+
+// EncodePngCompression sets the optional compression attribute to value.
+//
+// value: Compression level.
+// If not specified, defaults to -1
+func EncodePngCompression(value int64) EncodePngAttr {
+	return func(m optionalAttr) {
+		m["compression"] = value
+	}
+}
+
+// PNG-encode an image.
+//
+// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
+// where `channels` is:
+//
+// *   1: for grayscale.
+// *   2: for grayscale + alpha.
+// *   3: for RGB.
+// *   4: for RGBA.
+//
+// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
+// default or a value from 0 to 9.  9 is the highest compression level, generating
+// the smallest output, but is slower.
+//
+// Arguments:
+//	image: 3-D with shape `[height, width, channels]`.
+//
+// Returns 0-D. PNG-encoded image.
+func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodePng",
+		Input: []tf.Input{
+			image,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute.
 type DataFormatVecPermuteAttr func(optionalAttr)
 
@@ -13751,126 +14202,54 @@ func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr {
 // If not specified, defaults to ""
 func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr {
 	return func(m optionalAttr) {
-		m["dct_method"] = value
-	}
-}
-
-// Decode and Crop a JPEG-encoded image to a uint8 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the JPEG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-//
-// If needed, the JPEG-encoded image is transformed to match the requested number
-// of color channels.
-//
-// The attr `ratio` allows downscaling the image by an integer factor during
-// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
-// downscaling the image later.
-//
-//
-// It is equivalent to a combination of decode and crop, but much faster by only
-// decoding partial jpeg image.
-//
-// Arguments:
-//	contents: 0-D.  The JPEG-encoded image.
-//	crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
-//
-// Returns 3-D with shape `[height, width, channels]`..
-func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeAndCropJpeg",
-		Input: []tf.Input{
-			contents, crop_window,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
-type AllCandidateSamplerAttr func(optionalAttr)
-
-// AllCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["dct_method"] = value
 	}
 }
 
-// Generates labels for candidate sampling with a learned unigram distribution.
+// Decode and Crop a JPEG-encoded image to a uint8 tensor.
 //
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
 //
-// For each batch, this op picks a single set of sampled candidate labels.
+// Accepted values are:
 //
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+// *   0: Use the number of channels in the JPEG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+//
+// If needed, the JPEG-encoded image is transformed to match the requested number
+// of color channels.
+//
+// The attr `ratio` allows downscaling the image by an integer factor during
+// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+// downscaling the image later.
+//
+//
+// It is equivalent to a combination of decode and crop, but much faster by only
+// decoding partial jpeg image.
 //
 // Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to produce.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
+//	contents: 0-D.  The JPEG-encoded image.
+//	crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+// Returns 3-D with shape `[height, width, channels]`..
+func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AllCandidateSampler",
+		Type: "DecodeAndCropJpeg",
 		Input: []tf.Input{
-			true_classes,
+			contents, crop_window,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
 // Adds two `SparseTensor` objects to produce another `SparseTensor`.
@@ -20974,76 +21353,6 @@ func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output,
 	return op.Output(0)
 }
 
-// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
-type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
-
-// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of depthwise convolution with respect to the input.
-//
-// Arguments:
-//	input_sizes: An integer vector representing the shape of `input`, based
-// on `data_format`.  For example, if `data_format` is 'NHWC' then
-//  `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape according to `data_format`.  For example, if
-// `data_format` is 'NHWC', output shape is `[batch, in_height,
-// in_width, in_channels]`.  Gradient w.r.t. the input of the
-// convolution.
-func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropInput",
-		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Stops gradient computation.
 //
 // When executed in a graph, this op outputs its input tensor as-is.
@@ -22987,148 +23296,35 @@ func MapPeekMemoryLimit(value int64) MapPeekAttr {
 // MapPeekContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
 func MapPeekContainer(value string) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapPeekSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapPeekSharedName(value string) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op peeks at the values at the specified key.  If the
-//
-// underlying container does not contain this key
-// this op will block until it does.
-func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapPeek",
-		Input: []tf.Input{
-			key, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapPeek", err)
-		return
-	}
-	return values
-}
-
-// Looks up keys in a table, outputs the corresponding values.
-//
-// The tensor `keys` must of the same type as the keys of the table.
-// The output `values` is of the type of the table values.
-//
-// The scalar `default_value` is the value output for keys not present in the
-// table. It must also be of the same type as the table values.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//
-//
-// Returns Same shape as `keys`.  Values found in the table, or `default_values`
-// for missing keys.
-func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableFindV2",
-		Input: []tf.Input{
-			table_handle, keys, default_value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Bucketizes 'input' based on 'boundaries'.
-//
-// For example, if the inputs are
-//     boundaries = [0, 10, 100]
-//     input = [[-5, 10000]
-//              [150,   10]
-//              [5,    100]]
-//
-// then the output will be
-//     output = [[0, 3]
-//               [3, 2]
-//               [1, 3]]
-//
-// Arguments:
-//	input: Any shape of Tensor contains with int or float type.
-//	boundaries: A sorted list of floats gives the boundary of the buckets.
-//
-// Returns Same shape with 'input', each value of input replaced with bucket index.
-//
-// @compatibility(numpy)
-// Equivalent to np.digitize.
-// @end_compatibility
-func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"boundaries": boundaries}
-	opspec := tf.OpSpec{
-		Type: "Bucketize",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Calculates gains for each feature and returns the best possible split information for the feature.
-//
-// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
-//
-// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
-//
-// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
-//
-// The length of output lists are all of the same length, `num_features`.
-// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature.
-//
-// Arguments:
-//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
-//	stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
-//	l1: l1 regularization factor on leaf weights, per instance based.
-//	l2: l2 regularization factor on leaf weights, per instance based.
-//	tree_complexity: adjustment to the gain, per leaf based.
-//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
-//	max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors.
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapPeekSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapPeekSharedName(value string) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op peeks at the values at the specified key.  If the
 //
-// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
-func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) {
+// underlying container does not contain this key
+// this op will block until it does.
+func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"max_splits": max_splits}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesCalculateBestGainsPerFeature",
+		Type: "MapPeek",
 		Input: []tf.Input{
-			node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight,
+			key, indices,
 		},
 		Attrs: attrs,
 	}
@@ -23138,74 +23334,37 @@ func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Out
 	}
 	var idx int
 	var err error
-	if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapPeek", err)
 		return
 	}
-	return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list
-}
-
-// EncodePngAttr is an optional argument to EncodePng.
-type EncodePngAttr func(optionalAttr)
-
-// EncodePngCompression sets the optional compression attribute to value.
-//
-// value: Compression level.
-// If not specified, defaults to -1
-func EncodePngCompression(value int64) EncodePngAttr {
-	return func(m optionalAttr) {
-		m["compression"] = value
-	}
+	return values
 }
 
-// PNG-encode an image.
-//
-// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
-// where `channels` is:
+// Looks up keys in a table, outputs the corresponding values.
 //
-// *   1: for grayscale.
-// *   2: for grayscale + alpha.
-// *   3: for RGB.
-// *   4: for RGBA.
+// The tensor `keys` must of the same type as the keys of the table.
+// The output `values` is of the type of the table values.
 //
-// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
-// default or a value from 0 to 9.  9 is the highest compression level, generating
-// the smallest output, but is slower.
+// The scalar `default_value` is the value output for keys not present in the
+// table. It must also be of the same type as the table values.
 //
 // Arguments:
-//	image: 3-D with shape `[height, width, channels]`.
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
 //
-// Returns 0-D. PNG-encoded image.
-func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
+//
+// Returns Same shape as `keys`.  Values found in the table, or `default_values`
+// for missing keys.
+func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "EncodePng",
+		Type: "LookupTableFindV2",
 		Input: []tf.Input{
-			image,
+			table_handle, keys, default_value,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -32982,162 +33141,3 @@ func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
 	}
 	return scope.AddOperation(opspec)
 }
-
-// FixedUnigramCandidateSamplerAttr is an optional argument to FixedUnigramCandidateSampler.
-type FixedUnigramCandidateSamplerAttr func(optionalAttr)
-
-// FixedUnigramCandidateSamplerVocabFile sets the optional vocab_file attribute to value.
-//
-// value: Each valid line in this file (which should have a CSV-like format)
-// corresponds to a valid word ID. IDs are in sequential order, starting from
-// num_reserved_ids. The last entry in each line is expected to be a value
-// corresponding to the count or relative probability. Exactly one of vocab_file
-// and unigrams needs to be passed to this op.
-// If not specified, defaults to ""
-func FixedUnigramCandidateSamplerVocabFile(value string) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["vocab_file"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerDistortion sets the optional distortion attribute to value.
-//
-// value: The distortion is used to skew the unigram probability distribution.
-// Each weight is first raised to the distortion's power before adding to the
-// internal unigram distribution. As a result, distortion = 1.0 gives regular
-// unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
-// a uniform distribution.
-// If not specified, defaults to 1
-func FixedUnigramCandidateSamplerDistortion(value float32) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["distortion"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerNumReservedIds sets the optional num_reserved_ids attribute to value.
-//
-// value: Optionally some reserved IDs can be added in the range [0,
-// ..., num_reserved_ids) by the users. One use case is that a special unknown
-// word token is used as ID 0. These IDs will have a sampling probability of 0.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerNumReservedIds(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["num_reserved_ids"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerNumShards sets the optional num_shards attribute to value.
-//
-// value: A sampler can be used to sample from a subset of the original range
-// in order to speed up the whole computation through parallelism. This parameter
-// (together with 'shard') indicates the number of partitions that are being
-// used in the overall computation.
-// If not specified, defaults to 1
-//
-// REQUIRES: value >= 1
-func FixedUnigramCandidateSamplerNumShards(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["num_shards"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerShard sets the optional shard attribute to value.
-//
-// value: A sampler can be used to sample from a subset of the original range
-// in order to speed up the whole computation through parallelism. This parameter
-// (together with 'num_shards') indicates the particular partition number of a
-// sampler op, when partitioning is being used.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["shard"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerUnigrams sets the optional unigrams attribute to value.
-//
-// value: A list of unigram counts or probabilities, one per ID in sequential
-// order. Exactly one of vocab_file and unigrams should be passed to this op.
-// If not specified, defaults to <>
-func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["unigrams"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerSeed(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerSeed2(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// A unigram sampler could use a fixed unigram distribution read from a
-// file or passed in as an in-memory array instead of building up the distribution
-// from data on the fly. There is also an option to skew the distribution by
-// applying a distortion power to the weights.
-//
-// The vocabulary file should be in CSV-like format, with the last field
-// being the weight associated with the word.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...FixedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FixedUnigramCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-- 
GitLab


From 51a6118e5bd85935b1d9ec0e68b92f1f98d14982 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Wed, 26 Sep 2018 18:26:51 -0700
Subject: [PATCH 0777/1357] Automated rollback of commit
 82af048bc8c3c044c98a27b1c4c27bb62d4e4a14

PiperOrigin-RevId: 214705311
---
 .../lite/delegates/{flex => eager}/BUILD      |  0
 .../delegates/{flex => eager}/buffer_map.cc   |  8 ++--
 .../delegates/{flex => eager}/buffer_map.h    | 12 ++---
 .../{flex => eager}/buffer_map_test.cc        |  6 +--
 .../delegates/{flex => eager}/delegate.cc     | 34 +++++++-------
 .../lite/delegates/{flex => eager}/delegate.h | 26 +++++-----
 .../{flex => eager}/delegate_data.cc          |  6 +--
 .../delegates/{flex => eager}/delegate_data.h | 16 +++----
 .../{flex => eager}/delegate_data_test.cc     |  6 +--
 .../{flex => eager}/delegate_test.cc          | 14 +++---
 .../lite/delegates/{flex => eager}/kernel.cc  | 30 ++++++------
 .../lite/delegates/{flex => eager}/kernel.h   | 12 ++---
 .../delegates/{flex => eager}/kernel_test.cc  | 16 +++----
 .../delegates/{flex => eager}/test_util.cc    | 47 ++++++++++---------
 .../delegates/{flex => eager}/test_util.h     | 20 ++++----
 .../lite/delegates/{flex => eager}/util.cc    |  6 +--
 .../lite/delegates/{flex => eager}/util.h     | 10 ++--
 .../delegates/{flex => eager}/util_test.cc    |  6 +--
 tensorflow/contrib/lite/kernels/register.cc   |  8 ++--
 tensorflow/contrib/lite/model.cc              |  4 +-
 tensorflow/contrib/lite/python/convert.py     |  6 +--
 tensorflow/contrib/lite/python/lite_test.py   |  2 +-
 tensorflow/contrib/lite/testing/BUILD         |  2 +-
 .../contrib/lite/testing/generate_examples.py |  2 +-
 .../contrib/lite/testing/tflite_diff_flags.h  |  4 +-
 .../contrib/lite/testing/tflite_diff_util.h   |  2 +-
 .../contrib/lite/testing/tflite_driver.cc     |  6 +--
 .../contrib/lite/testing/tflite_driver.h      |  4 +-
 tensorflow/contrib/lite/toco/args.h           |  4 +-
 .../contrib/lite/toco/import_tensorflow.cc    |  4 +-
 .../contrib/lite/toco/import_tensorflow.h     |  2 +-
 tensorflow/contrib/lite/toco/tflite/export.cc | 20 ++++----
 tensorflow/contrib/lite/toco/tflite/export.h  |  4 +-
 .../contrib/lite/toco/tflite/export_test.cc   |  2 +-
 .../contrib/lite/toco/tflite/operator.cc      | 26 +++++-----
 .../contrib/lite/toco/tflite/operator.h       |  6 +--
 .../contrib/lite/toco/toco_cmdline_flags.cc   | 24 +++++-----
 tensorflow/contrib/lite/toco/toco_flags.proto | 16 +++----
 tensorflow/contrib/lite/toco/toco_tooling.cc  |  8 ++--
 tensorflow/contrib/lite/tools/benchmark/BUILD |  8 ++--
 .../tools/benchmark/benchmark_tflite_model.cc |  6 +--
 .../tools/benchmark/benchmark_tflite_model.h  |  4 +-
 tensorflow/contrib/lite/util.cc               |  6 +--
 tensorflow/contrib/lite/util.h                |  8 ++--
 tensorflow/contrib/lite/util_test.cc          | 16 +++----
 45 files changed, 240 insertions(+), 239 deletions(-)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/BUILD (100%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/buffer_map.cc (95%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/buffer_map.h (86%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/buffer_map_test.cc (98%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/delegate.cc (76%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/delegate.h (64%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/delegate_data.cc (94%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/delegate_data.h (78%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/delegate_data_test.cc (93%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/delegate_test.cc (95%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/kernel.cc (91%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/kernel.h (79%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/kernel_test.cc (94%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/test_util.cc (76%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/test_util.h (90%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/util.cc (96%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/util.h (89%)
 rename tensorflow/contrib/lite/delegates/{flex => eager}/util_test.cc (97%)

diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/eager/BUILD
similarity index 100%
rename from tensorflow/contrib/lite/delegates/flex/BUILD
rename to tensorflow/contrib/lite/delegates/eager/BUILD
diff --git a/tensorflow/contrib/lite/delegates/flex/buffer_map.cc b/tensorflow/contrib/lite/delegates/eager/buffer_map.cc
similarity index 95%
rename from tensorflow/contrib/lite/delegates/flex/buffer_map.cc
rename to tensorflow/contrib/lite/delegates/eager/buffer_map.cc
index 63e39196d9..e5a19c3997 100644
--- a/tensorflow/contrib/lite/delegates/flex/buffer_map.cc
+++ b/tensorflow/contrib/lite/delegates/eager/buffer_map.cc
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
 
 #include "tensorflow/c/c_api_internal.h"
-#include "tensorflow/contrib/lite/delegates/flex/util.h"
+#include "tensorflow/contrib/lite/delegates/eager/util.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/log_memory.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace {
 // A tensor buffer that is allocated, deallocated and populated by TF Lite.
 class TfLiteTensorBuffer : public tensorflow::TensorBuffer {
@@ -107,5 +107,5 @@ void BufferMap::SetFromTensorFlow(int tensor_index, tensorflow::Tensor tensor) {
   id_to_tensor_[tensor_index] = std::move(tensor);
 }
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/flex/buffer_map.h b/tensorflow/contrib/lite/delegates/eager/buffer_map.h
similarity index 86%
rename from tensorflow/contrib/lite/delegates/flex/buffer_map.h
rename to tensorflow/contrib/lite/delegates/eager/buffer_map.h
index 4ce886568a..aaaa045840 100644
--- a/tensorflow/contrib/lite/delegates/flex/buffer_map.h
+++ b/tensorflow/contrib/lite/delegates/eager/buffer_map.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
 
 #include <map>
 
@@ -21,12 +21,12 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 
 // Maps a TF Lite tensor index into a TensorFlow tensor.
 //
 // The TF Lite interpreter assigns integer indices to each of its tensors, but
-// the Flex delegate deals in terms of TensorFlow tensors. This class maps
+// the Eager delegate deals in terms of TensorFlow tensors. This class maps
 // from indices to tensors and allows the creation of new tensors to be
 // associated with a given index.
 class BufferMap {
@@ -55,7 +55,7 @@ class BufferMap {
   std::map<int, tensorflow::Tensor> id_to_tensor_;
 };
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
diff --git a/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc b/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc
similarity index 98%
rename from tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
rename to tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc
index bb80e25e80..a046943e56 100644
--- a/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
+++ b/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/util.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace {
 
 using ::testing::ElementsAre;
@@ -164,7 +164,7 @@ TEST(BufferMapTest, TensorFlowOverwritesTfLite) {
 }
 
 }  // namespace
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate.cc b/tensorflow/contrib/lite/delegates/eager/delegate.cc
similarity index 76%
rename from tensorflow/contrib/lite/delegates/flex/delegate.cc
rename to tensorflow/contrib/lite/delegates/eager/delegate.cc
index ba065a8ff5..45fc158157 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate.cc
+++ b/tensorflow/contrib/lite/delegates/eager/delegate.cc
@@ -12,19 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
 
 #include <vector>
 
 #include "tensorflow/contrib/lite/context_util.h"
-#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
-#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
-#include "tensorflow/contrib/lite/delegates/flex/util.h"
+#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
+#include "tensorflow/contrib/lite/delegates/eager/util.h"
 #include "tensorflow/contrib/lite/util.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace delegate {
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
@@ -32,7 +32,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
   TfLiteIntArray* plan;
   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
 
-  // Add all custom ops starting with "Flex" to list of supported nodes.
+  // Add all custom ops starting with "Eager" to list of supported nodes.
   std::vector<int> supported_nodes;
   for (int node_index : TfLiteIntArrayView(plan)) {
     TfLiteNode* node;
@@ -40,7 +40,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
         context, node_index, &node, &registration));
 
-    if (IsFlexOp(registration->custom_name)) {
+    if (IsEagerOp(registration->custom_name)) {
       supported_nodes.push_back(node_index);
     }
   }
@@ -81,28 +81,28 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
 }
 
 }  // namespace delegate
-}  // namespace flex
+}  // namespace eager
 
-std::unique_ptr<FlexDelegate> FlexDelegate::Create() {
-  std::unique_ptr<flex::DelegateData> delegate_data;
-  if (!flex::DelegateData::Create(&delegate_data).ok()) {
+std::unique_ptr<EagerDelegate> EagerDelegate::Create() {
+  std::unique_ptr<eager::DelegateData> delegate_data;
+  if (!eager::DelegateData::Create(&delegate_data).ok()) {
     fprintf(stderr, "Unable to initialize TensorFlow context.\n");
     return nullptr;
   }
 
-  return std::unique_ptr<FlexDelegate>(
-      new FlexDelegate(std::move(delegate_data)));
+  return std::unique_ptr<EagerDelegate>(
+      new EagerDelegate(std::move(delegate_data)));
 }
 
-FlexDelegate::FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data)
+EagerDelegate::EagerDelegate(std::unique_ptr<eager::DelegateData> delegate_data)
     : TfLiteDelegate{
           /*data_=*/delegate_data.get(),
-          /*nullptr,*/ &flex::delegate::Prepare,
-          /*CopyFromBufferHandle=*/&flex::delegate::CopyFromBufferHandle,
+          /*nullptr,*/ &eager::delegate::Prepare,
+          /*CopyFromBufferHandle=*/&eager::delegate::CopyFromBufferHandle,
           /*CopyToBufferHandle=*/nullptr,
           /*FreeBufferHandle=*/nullptr},
       delegate_data_(std::move(delegate_data)) {}
 
-FlexDelegate::~FlexDelegate() {}
+EagerDelegate::~EagerDelegate() {}
 
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate.h b/tensorflow/contrib/lite/delegates/eager/delegate.h
similarity index 64%
rename from tensorflow/contrib/lite/delegates/flex/delegate.h
rename to tensorflow/contrib/lite/delegates/eager/delegate.h
index 1017780dc7..70f3c15af4 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate.h
+++ b/tensorflow/contrib/lite/delegates/eager/delegate.h
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
 
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
-#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
 
 namespace tflite {
 
@@ -24,12 +24,12 @@ namespace tflite {
 // Delegate that can be used to extract parts of a graph that are designed to be
 // executed by TensorFlow's runtime via Eager.
 //
-// The interpreter must be constructed after the FlexDelegate and destructed
-// before the FlexDelegate. This delegate may be used with multiple
+// The interpreter must be constructed after the EagerDelegate and destructed
+// before the EagerDelegate. This delegate may be used with multiple
 // interpreters, but it is *not* thread-safe.
 //
 // Usage:
-//   auto delegate = FlexDelegate::Create();
+//   auto delegate = EagerDelegate::Create();
 //   ... build interpreter ...
 //
 //   if (delegate) {
@@ -39,21 +39,21 @@ namespace tflite {
 //   ... run inference ...
 //   ... destroy interpreter ...
 //   ... destroy delegate ...
-class FlexDelegate : public TfLiteDelegate {
+class EagerDelegate : public TfLiteDelegate {
  public:
   // Creates a delegate that supports TF ops.
   //
-  // If the underyling TF Flex context creation fails, returns null.
-  static std::unique_ptr<FlexDelegate> Create();
+  // If the underyling TF Eager context creation fails, returns null.
+  static std::unique_ptr<EagerDelegate> Create();
 
-  ~FlexDelegate();
+  ~EagerDelegate();
 
  private:
-  explicit FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data);
+  explicit EagerDelegate(std::unique_ptr<eager::DelegateData> delegate_data);
 
-  std::unique_ptr<flex::DelegateData> delegate_data_;
+  std::unique_ptr<eager::DelegateData> delegate_data_;
 };
 
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate_data.cc b/tensorflow/contrib/lite/delegates/eager/delegate_data.cc
similarity index 94%
rename from tensorflow/contrib/lite/delegates/flex/delegate_data.cc
rename to tensorflow/contrib/lite/delegates/eager/delegate_data.cc
index 8f985f770c..0fd5c976f8 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate_data.cc
+++ b/tensorflow/contrib/lite/delegates/eager/delegate_data.cc
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
 
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 tensorflow::Status DelegateData::Create(std::unique_ptr<DelegateData>* data) {
   std::vector<tensorflow::Device*> devices;
 
@@ -43,5 +43,5 @@ DelegateData::DelegateData(tensorflow::EagerContext* eager_context)
 
 DelegateData::~DelegateData() {}
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate_data.h b/tensorflow/contrib/lite/delegates/eager/delegate_data.h
similarity index 78%
rename from tensorflow/contrib/lite/delegates/flex/delegate_data.h
rename to tensorflow/contrib/lite/delegates/eager/delegate_data.h
index 8d75f0b0ef..772d26f44e 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate_data.h
+++ b/tensorflow/contrib/lite/delegates/eager/delegate_data.h
@@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
 
-#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 
-// Data kept by the Flex delegate for the lifetime of an Interpreter.
+// Data kept by the Eager delegate for the lifetime of an Interpreter.
 class DelegateData {
  public:
   // Create a new DelegateData, initialized with a newly-created EagerContext.
@@ -29,7 +29,7 @@ class DelegateData {
 
   ~DelegateData();
 
-  // The EagerContext that is required for execution of Flex Ops.
+  // The EagerContext that is required for execution of Eager Ops.
   tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); }
 
   // Map from TF Lite tensor index to TensorFlow tensor for a given context.
@@ -46,7 +46,7 @@ class DelegateData {
   std::unordered_map<const TfLiteContext*, BufferMap> buffer_map_;
 };
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc b/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc
similarity index 93%
rename from tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
rename to tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc
index 30b10f435a..def063309f 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
+++ b/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace {
 
 TEST(DelegateDataTest, Basic) {
@@ -39,7 +39,7 @@ TEST(DelegateDataTest, Basic) {
 }
 
 }  // namespace
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate_test.cc b/tensorflow/contrib/lite/delegates/eager/delegate_test.cc
similarity index 95%
rename from tensorflow/contrib/lite/delegates/flex/delegate_test.cc
rename to tensorflow/contrib/lite/delegates/eager/delegate_test.cc
index 1813952cef..43ec5d53b8 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate_test.cc
+++ b/tensorflow/contrib/lite/delegates/eager/delegate_test.cc
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
+#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace {
 
 using ::testing::ContainsRegex;
 using ::testing::ElementsAre;
 
-class DelegateTest : public testing::FlexModelTest {
+class DelegateTest : public testing::EagerModelTest {
  public:
   DelegateTest() {
-    delegate_ = FlexDelegate::Create();
+    delegate_ = EagerDelegate::Create();
     interpreter_.reset(new Interpreter(&error_reporter_));
   }
 
@@ -46,7 +46,7 @@ class DelegateTest : public testing::FlexModelTest {
   }
 
  private:
-  std::unique_ptr<FlexDelegate> delegate_;
+  std::unique_ptr<EagerDelegate> delegate_;
 };
 
 TEST_F(DelegateTest, FullGraph) {
@@ -236,7 +236,7 @@ TEST_F(DelegateTest, MultipleInterpretersSameDelegate) {
 }
 
 }  // namespace
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/flex/kernel.cc b/tensorflow/contrib/lite/delegates/eager/kernel.cc
similarity index 91%
rename from tensorflow/contrib/lite/delegates/flex/kernel.cc
rename to tensorflow/contrib/lite/delegates/eager/kernel.cc
index e4f1aea990..48a2f56baf 100644
--- a/tensorflow/contrib/lite/delegates/flex/kernel.cc
+++ b/tensorflow/contrib/lite/delegates/eager/kernel.cc
@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
+#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
 
 #include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/builtin_ops.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/context_util.h"
-#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
-#include "tensorflow/contrib/lite/delegates/flex/util.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/eager/util.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
 #include "tensorflow/contrib/lite/string.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
@@ -28,10 +28,10 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 
-// Note: this is part of TF Lite's Flex delegation code which is to be
+// Note: this is part of TF Lite's Eager delegation code which is to be
 // completed soon.
 
-// This is the TF Lite op that is created by the flex delegate to handle
+// This is the TF Lite op that is created by the eager delegate to handle
 // execution of a supported subgraph. The usual flow is that the delegate
 // informs the interpreter of supported nodes in a graph, and each supported
 // subgraph is replaced with one instance of this kernel.
@@ -46,7 +46,7 @@ limitations under the License.
 // corresponding TensorFlow/Eager Op.
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace kernel {
 
 // Controls the lifetime of tensor handles in a vector.
@@ -72,11 +72,11 @@ class VectorOfHandles {
 
 // Executes the TensorFlow op given by 'op_name', with the attributes specified
 // in 'nodedef'. Inputs and outputs are given as indices into the 'buffer_map'.
-tensorflow::Status ExecuteFlexOp(tensorflow::EagerContext* eager_context,
-                                 BufferMap* buffer_map, const string& op_name,
-                                 const tensorflow::NodeDef& nodedef,
-                                 const std::vector<int>& inputs,
-                                 const std::vector<int>& outputs) {
+tensorflow::Status ExecuteEagerOp(tensorflow::EagerContext* eager_context,
+                                  BufferMap* buffer_map, const string& op_name,
+                                  const tensorflow::NodeDef& nodedef,
+                                  const std::vector<int>& inputs,
+                                  const std::vector<int>& outputs) {
   const tensorflow::AttrTypeMap* attr_types;
   TF_RETURN_WITH_CONTEXT_IF_ERROR(
       tensorflow::AttrTypeMapForOp(op_name.c_str(), &attr_types),
@@ -258,13 +258,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Execute the TensorFlow Ops sequentially.
   for (const auto& node_data : op_data->nodes) {
     if (node_data.nodedef.op().empty()) {
-      context->ReportError(context, "Invalid NodeDef in Flex op '%s'",
+      context->ReportError(context, "Invalid NodeDef in Eager op '%s'",
                            node_data.name.c_str());
       return kTfLiteError;
     }
     auto status =
-        ExecuteFlexOp(eager_context, buffer_map, node_data.name,
-                      node_data.nodedef, node_data.inputs, node_data.outputs);
+        ExecuteEagerOp(eager_context, buffer_map, node_data.name,
+                       node_data.nodedef, node_data.inputs, node_data.outputs);
     TF_LITE_ENSURE_OK(context, ConvertStatus(context, status));
   }
 
@@ -295,5 +295,5 @@ TfLiteRegistration GetKernel() {
   return registration;
 }
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/flex/kernel.h b/tensorflow/contrib/lite/delegates/eager/kernel.h
similarity index 79%
rename from tensorflow/contrib/lite/delegates/flex/kernel.h
rename to tensorflow/contrib/lite/delegates/eager/kernel.h
index ac9313a37b..2478abccaa 100644
--- a/tensorflow/contrib/lite/delegates/flex/kernel.h
+++ b/tensorflow/contrib/lite/delegates/eager/kernel.h
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
 
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 
 // Return the registration object used to initialize and execute ops that will
 // be delegated to TensorFlow's Eager runtime. This TF Lite op is created by
-// the flex delegate to handle execution of a supported subgraph. The usual
+// the eager delegate to handle execution of a supported subgraph. The usual
 // flow is that the delegate informs the interpreter of supported nodes in a
 // graph, and each supported subgraph is replaced with one instance of this
 // kernel.
 TfLiteRegistration GetKernel();
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
diff --git a/tensorflow/contrib/lite/delegates/flex/kernel_test.cc b/tensorflow/contrib/lite/delegates/eager/kernel_test.cc
similarity index 94%
rename from tensorflow/contrib/lite/delegates/flex/kernel_test.cc
rename to tensorflow/contrib/lite/delegates/eager/kernel_test.cc
index 94a6f8b61a..66f2226626 100644
--- a/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
+++ b/tensorflow/contrib/lite/delegates/eager/kernel_test.cc
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
+#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
-#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace {
 
 using ::testing::ContainsRegex;
@@ -31,12 +31,12 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate,
   TfLiteIntArray* size_and_nodes =
       ConvertVectorToTfLiteIntArray(supported_nodes);
   TF_LITE_ENSURE_STATUS(context->ReplaceSubgraphsWithDelegateKernels(
-      context, flex::GetKernel(), size_and_nodes, delegate));
+      context, eager::GetKernel(), size_and_nodes, delegate));
   TfLiteIntArrayFree(size_and_nodes);
   return kTfLiteOk;
 }
 
-class KernelTest : public testing::FlexModelTest {
+class KernelTest : public testing::EagerModelTest {
  public:
   KernelTest() {
     CHECK(DelegateData::Create(&delegate_data_).ok());
@@ -167,7 +167,7 @@ TEST_F(KernelTest, WrongSetOfNodes) {
 
   ASSERT_FALSE(Invoke());
   ASSERT_THAT(error_reporter().error_messages(),
-              ContainsRegex("Invalid NodeDef in Flex op"));
+              ContainsRegex("Invalid NodeDef in Eager op"));
 }
 
 TEST_F(KernelTest, MixedGraph) {
@@ -220,7 +220,7 @@ TEST_F(KernelTest, SplitGraph) {
 }
 
 }  // namespace
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/flex/test_util.cc b/tensorflow/contrib/lite/delegates/eager/test_util.cc
similarity index 76%
rename from tensorflow/contrib/lite/delegates/flex/test_util.cc
rename to tensorflow/contrib/lite/delegates/eager/test_util.cc
index 69c336a01a..d47be761fb 100644
--- a/tensorflow/contrib/lite/delegates/flex/test_util.cc
+++ b/tensorflow/contrib/lite/delegates/eager/test_util.cc
@@ -13,24 +13,25 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
+#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
 
 #include "absl/memory/memory.h"
 #include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/string.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace testing {
 
-bool FlexModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; }
+bool EagerModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; }
 
-void FlexModelTest::SetShape(int tensor_index, const std::vector<int>& values) {
+void EagerModelTest::SetShape(int tensor_index,
+                              const std::vector<int>& values) {
   ASSERT_EQ(interpreter_->ResizeInputTensor(tensor_index, values), kTfLiteOk);
   ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
 }
 
-std::vector<int> FlexModelTest::GetShape(int tensor_index) {
+std::vector<int> EagerModelTest::GetShape(int tensor_index) {
   std::vector<int> result;
   auto* dims = interpreter_->tensor(tensor_index)->dims;
   result.reserve(dims->size);
@@ -40,13 +41,13 @@ std::vector<int> FlexModelTest::GetShape(int tensor_index) {
   return result;
 }
 
-TfLiteType FlexModelTest::GetType(int tensor_index) {
+TfLiteType EagerModelTest::GetType(int tensor_index) {
   return interpreter_->tensor(tensor_index)->type;
 }
 
-void FlexModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
-                               const std::vector<int>& outputs, TfLiteType type,
-                               const std::vector<int>& dims) {
+void EagerModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
+                                const std::vector<int>& outputs,
+                                TfLiteType type, const std::vector<int>& dims) {
   interpreter_->AddTensors(num_tensors);
   for (int i = 0; i < num_tensors; ++i) {
     TfLiteQuantizationParams quant;
@@ -65,8 +66,8 @@ void FlexModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
   CHECK_EQ(interpreter_->SetOutputs(outputs), kTfLiteOk);
 }
 
-void FlexModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
-                                   const std::vector<int>& outputs) {
+void EagerModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
+                                    const std::vector<int>& outputs) {
   static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
   reg.builtin_code = BuiltinOperator_MUL;
   reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
@@ -89,8 +90,8 @@ void FlexModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
            kTfLiteOk);
 }
 
-void FlexModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
-                            const std::vector<int>& outputs) {
+void EagerModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
+                             const std::vector<int>& outputs) {
   auto attr = [](const string& key, const string& value) {
     return " attr{ key: '" + key + "' value {" + value + "}}";
   };
@@ -106,28 +107,28 @@ void FlexModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
   if (op == kUnpack) {
     string attributes =
         type_attribute + attr("num", "i: 2") + attr("axis", "i: 0");
-    AddTfOp("FlexUnpack", "Unpack", attributes, inputs, outputs);
+    AddTfOp("EagerUnpack", "Unpack", attributes, inputs, outputs);
   } else if (op == kIdentity) {
     string attributes = type_attribute;
-    AddTfOp("FlexIdentity", "Identity", attributes, inputs, outputs);
+    AddTfOp("EagerIdentity", "Identity", attributes, inputs, outputs);
   } else if (op == kAdd) {
     string attributes = type_attribute;
-    AddTfOp("FlexAdd", "Add", attributes, inputs, outputs);
+    AddTfOp("EagerAdd", "Add", attributes, inputs, outputs);
   } else if (op == kMul) {
     string attributes = type_attribute;
-    AddTfOp("FlexMul", "Mul", attributes, inputs, outputs);
+    AddTfOp("EagerMul", "Mul", attributes, inputs, outputs);
   } else if (op == kNonExistent) {
     AddTfOp("NonExistentOp", "NonExistentOp", "", inputs, outputs);
   } else if (op == kIncompatibleNodeDef) {
     // "Cast" op is created without attributes - making it incompatible.
-    AddTfOp("FlexCast", "Cast", "", inputs, outputs);
+    AddTfOp("EagerCast", "Cast", "", inputs, outputs);
   }
 }
 
-void FlexModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
-                            const string& nodedef_str,
-                            const std::vector<int>& inputs,
-                            const std::vector<int>& outputs) {
+void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
+                             const string& nodedef_str,
+                             const std::vector<int>& inputs,
+                             const std::vector<int>& outputs) {
   static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
   reg.builtin_code = BuiltinOperator_CUSTOM;
   reg.custom_name = tflite_name;
@@ -153,5 +154,5 @@ void FlexModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
 }
 
 }  // namespace testing
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/flex/test_util.h b/tensorflow/contrib/lite/delegates/eager/test_util.h
similarity index 90%
rename from tensorflow/contrib/lite/delegates/flex/test_util.h
rename to tensorflow/contrib/lite/delegates/eager/test_util.h
index a8c81b90a3..816db41931 100644
--- a/tensorflow/contrib/lite/delegates/flex/test_util.h
+++ b/tensorflow/contrib/lite/delegates/eager/test_util.h
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
 
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace testing {
 
 enum TfOpType {
@@ -35,12 +35,12 @@ enum TfOpType {
 };
 
 // This class creates models with TF and TFLite ops. In order to use this class
-// to test the Flex delegate, implement a function that calls
+// to test the Eager delegate, implement a function that calls
 // interpreter->ModifyGraphWithDelegate.
-class FlexModelTest : public ::testing::Test {
+class EagerModelTest : public ::testing::Test {
  public:
-  FlexModelTest() {}
-  ~FlexModelTest() {}
+  EagerModelTest() {}
+  ~EagerModelTest() {}
 
   bool Invoke();
 
@@ -104,7 +104,7 @@ class FlexModelTest : public ::testing::Test {
 
  private:
   // Helper method to add a TensorFlow op. tflite_names needs to start with
-  // "Flex" in order to work with the Flex delegate.
+  // "Eager" in order to work with the Eager delegate.
   void AddTfOp(const char* tflite_name, const string& tf_name,
                const string& nodedef_str, const std::vector<int>& inputs,
                const std::vector<int>& outputs);
@@ -113,7 +113,7 @@ class FlexModelTest : public ::testing::Test {
 };
 
 }  // namespace testing
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
diff --git a/tensorflow/contrib/lite/delegates/flex/util.cc b/tensorflow/contrib/lite/delegates/eager/util.cc
similarity index 96%
rename from tensorflow/contrib/lite/delegates/flex/util.cc
rename to tensorflow/contrib/lite/delegates/eager/util.cc
index 829bc388bf..051246bf86 100644
--- a/tensorflow/contrib/lite/delegates/flex/util.cc
+++ b/tensorflow/contrib/lite/delegates/eager/util.cc
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/util.h"
+#include "tensorflow/contrib/lite/delegates/eager/util.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 
 TfLiteStatus ConvertStatus(TfLiteContext* context,
                            const tensorflow::Status& status) {
@@ -100,5 +100,5 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) {
   }
 }
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/flex/util.h b/tensorflow/contrib/lite/delegates/eager/util.h
similarity index 89%
rename from tensorflow/contrib/lite/delegates/flex/util.h
rename to tensorflow/contrib/lite/delegates/eager/util.h
index 7f910e7316..930cb99cb9 100644
--- a/tensorflow/contrib/lite/delegates/flex/util.h
+++ b/tensorflow/contrib/lite/delegates/eager/util.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
 
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
@@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 
 // Converts a tensorflow:Status into a TfLiteStatus. If the original status
 // represented an error, reports it using the given 'context'.
@@ -41,7 +41,7 @@ TF_DataType GetTensorFlowDataType(TfLiteType type);
 // Returns the TfLiteType that corresponds to the given TF C API Data type.
 TfLiteType GetTensorFlowLiteType(TF_DataType);
 
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
diff --git a/tensorflow/contrib/lite/delegates/flex/util_test.cc b/tensorflow/contrib/lite/delegates/eager/util_test.cc
similarity index 97%
rename from tensorflow/contrib/lite/delegates/flex/util_test.cc
rename to tensorflow/contrib/lite/delegates/eager/util_test.cc
index 5f049e7b0a..aebc91149c 100644
--- a/tensorflow/contrib/lite/delegates/flex/util_test.cc
+++ b/tensorflow/contrib/lite/delegates/eager/util_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/flex/util.h"
+#include "tensorflow/contrib/lite/delegates/eager/util.h"
 
 #include <cstdarg>
 
@@ -22,7 +22,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
-namespace flex {
+namespace eager {
 namespace {
 
 using tensorflow::DT_FLOAT;
@@ -132,7 +132,7 @@ TEST(UtilTest, TypeConversionsFromTensorFlow) {
 }
 
 }  // namespace
-}  // namespace flex
+}  // namespace eager
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index 9402105fa7..2f4b663a28 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -125,7 +125,7 @@ TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) {
   context->ReportError(
       context,
       "Regular TensorFlow ops are not supported by this interpreter. Make sure "
-      "you invoke the Flex delegate before inference.");
+      "you invoke the Eager delegate before inference.");
   return kTfLiteError;
 }
 
@@ -136,13 +136,13 @@ const TfLiteRegistration* BuiltinOpResolver::FindOp(tflite::BuiltinOperator op,
 
 const TfLiteRegistration* BuiltinOpResolver::FindOp(const char* op,
                                                     int version) const {
-  // Return the NULL Op for all ops whose name start with "Flex", allowing
+  // Return the NULL Op for all ops whose name start with "Eager", allowing
   // the interpreter to delegate their execution.
-  if (IsFlexOp(op)) {
+  if (IsEagerOp(op)) {
     static TfLiteRegistration null_op{
         nullptr, nullptr, &UnsupportedTensorFlowOp,
         nullptr, nullptr, BuiltinOperator_CUSTOM,
-        "Flex",  1};
+        "Eager", 1};
     return &null_op;
   }
   return MutableOpResolver::FindOp(op, version);
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index eff6181a61..ea2817beec 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -28,7 +28,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/nnapi_delegate.h"
 #endif
 #if defined(TFLITE_EXTENDED)
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
 #endif
 #include "tensorflow/contrib/lite/version.h"
 
@@ -451,7 +451,7 @@ TfLiteStatus InterpreterBuilder::operator()(
   (**interpreter).SetVariables(std::move(variables));
 
 #if defined(TFLITE_EXTENDED)
-  if (auto delegate = FlexDelegate::Create()) {
+  if (auto delegate = EagerDelegate::Create()) {
     (**interpreter)
         .ModifyGraphWithDelegate(std::move(delegate),
                                  /*allow_dynamic_tensors=*/true);
diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 83d341c0b8..1f48a826d4 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -241,10 +241,10 @@ def build_toco_convert_protos(input_tensors,
     toco.dump_graphviz_dir = dump_graphviz_dir
   toco.dump_graphviz_include_video = dump_graphviz_video
   if converter_mode == ConverterMode.TOCO_EXTENDED:
-    toco.allow_flex_ops = True
+    toco.allow_eager_ops = True
   elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL:
-    toco.allow_flex_ops = True
-    toco.force_flex_ops = True
+    toco.allow_eager_ops = True
+    toco.force_eager_ops = True
 
   model = _model_flags_pb2.ModelFlags()
   model.change_concat_input_ranges = change_concat_input_ranges
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index 89324e8a80..f112ed5cdd 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -421,7 +421,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
       interpreter.allocate_tensors()
     self.assertIn(
         'Regular TensorFlow ops are not supported by this interpreter. Make '
-        'sure you invoke the Flex delegate before inference.',
+        'sure you invoke the Eager delegate before inference.',
         str(error.exception))
 
 
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index f0bfec2338..55ef1172b2 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -164,7 +164,7 @@ cc_library(
         ":test_runner",
         "//tensorflow/contrib/lite:builtin_op_data",
         "//tensorflow/contrib/lite:framework",
-        "//tensorflow/contrib/lite/delegates/flex:delegate",
+        "//tensorflow/contrib/lite/delegates/eager:delegate",
         "//tensorflow/contrib/lite/kernels:builtin_ops",
     ],
 )
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 53bd88d087..014c80b5ef 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -343,7 +343,7 @@ def toco_convert(graph_def_str, input_tensors, output_tensors,
       opts = ("--input_arrays={0} --output_arrays={1}".format(
           ",".join(input_arrays), ",".join(output_tensors)))
     elif FLAGS.run_with_extended:
-      opts += " --allow_flex_ops --force_flex_ops"
+      opts += " --allow_eager_ops --force_eager_ops"
     cmd = ("%s --input_file=%s --output_file=%s %s > %s 2>&1" %
            (bin_path, graphdef_file.name, output_file.name, opts,
             stdout_file.name))
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_flags.h b/tensorflow/contrib/lite/testing/tflite_diff_flags.h
index ad889a2f19..3874bc31d7 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_flags.h
+++ b/tensorflow/contrib/lite/testing/tflite_diff_flags.h
@@ -57,7 +57,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
                        "[optional] Number of full runs in each pass."),
       tensorflow::Flag("delegate", &values.delegate,
                        "[optional] Delegate to use for executing ops. Must be "
-                       "`{\"\", FLEX}`"),
+                       "`{\"\", EAGER}`"),
   };
 
   bool no_inputs = *argc == 1;
@@ -70,7 +70,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
              values.input_layer_shape.empty() || values.output_layer.empty()) {
     fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return {};
-  } else if (!(values.delegate == "" || values.delegate == "FLEX")) {
+  } else if (!(values.delegate == "" || values.delegate == "EAGER")) {
     fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return {};
   }
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_util.h b/tensorflow/contrib/lite/testing/tflite_diff_util.h
index 28b14bd143..f67992139f 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_util.h
+++ b/tensorflow/contrib/lite/testing/tflite_diff_util.h
@@ -45,7 +45,7 @@ struct DiffOptions {
   // second pass does multiple inferences back to back.
   int num_runs_per_pass;
   // Path to the delegate library to be loaded in order to execute ops. Must be
-  // `{"", FLEX}`.
+  // `{"", EAGER}`.
   string delegate;
 };
 
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc
index 0a6da926be..1836eb53b9 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.cc
+++ b/tensorflow/contrib/lite/testing/tflite_driver.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <iostream>
 
 #include "tensorflow/contrib/lite/builtin_op_data.h"
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
 #include "tensorflow/contrib/lite/testing/split.h"
 
 namespace tflite {
@@ -138,8 +138,8 @@ class TfLiteDriver::Expectation {
 
 TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name)
     : use_nnapi_(use_nnapi) {
-  if (delegate_name == "FLEX") {
-    delegate_ = FlexDelegate::Create();
+  if (delegate_name == "EAGER") {
+    delegate_ = EagerDelegate::Create();
   }
 }
 
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.h b/tensorflow/contrib/lite/testing/tflite_driver.h
index dc2a4e5877..aed35f877d 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.h
+++ b/tensorflow/contrib/lite/testing/tflite_driver.h
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <map>
 
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
@@ -53,7 +53,7 @@ class TfLiteDriver : public TestRunner {
 
   class Expectation;
 
-  std::unique_ptr<FlexDelegate> delegate_;
+  std::unique_ptr<EagerDelegate> delegate_;
   bool use_nnapi_ = false;
   std::unique_ptr<FlatBufferModel> model_;
   std::unique_ptr<Interpreter> interpreter_;
diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h
index 2699ac76e1..f14dbc258b 100644
--- a/tensorflow/contrib/lite/toco/args.h
+++ b/tensorflow/contrib/lite/toco/args.h
@@ -248,9 +248,9 @@ struct ParsedTocoFlags {
   Arg<int64> dedupe_array_min_size_bytes = Arg<int64>(64);
   Arg<bool> split_tflite_lstm_inputs = Arg<bool>(true);
   // WARNING: Experimental interface, subject to change
-  Arg<bool> allow_flex_ops = Arg<bool>(false);
+  Arg<bool> allow_eager_ops = Arg<bool>(false);
   // WARNING: Experimental interface, subject to change
-  Arg<bool> force_flex_ops = Arg<bool>(false);
+  Arg<bool> force_eager_ops = Arg<bool>(false);
 };
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 5eaf6e27fc..e02d000e7e 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -2123,9 +2123,9 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
   Model* model = new Model;
   internal::ConverterMapType converter_map;
 
-  // This is used for the TFLite "Full Flex Mode" conversion. All the ops are
+  // This is used for the TFLite "Full Eager Mode" conversion. All the ops are
   // imported as `TensorFlowUnsupportedOperator`, and later all these ops are
-  // converted to TFLite Flex ops.
+  // converted to TFLite Eager ops.
   if (!tf_import_flags.import_all_ops_as_unsupported) {
     converter_map = internal::GetTensorFlowNodeConverterMap();
   }
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.h b/tensorflow/contrib/lite/toco/import_tensorflow.h
index c5ff96956a..7db23f2d44 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.h
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.h
@@ -30,7 +30,7 @@ struct TensorFlowImportFlags {
 
   // Do not recognize any op and import all ops as
   // `TensorFlowUnsupportedOperator`. This is used to populated with the
-  // `force_flex_ops` flag.
+  // `force_eager_ops` flag.
   bool import_all_ops_as_unsupported = false;
 };
 
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 5cdfd24565..fee10b1dff 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -50,16 +50,16 @@ namespace {
 details::OperatorKey GetOperatorKey(
     const ::toco::Operator& op,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_flex_ops) {
+    bool allow_eager_ops) {
   string custom_code;
   if (op.type == OperatorType::kUnsupported) {
     const TensorFlowUnsupportedOperator& unsupported_op =
         static_cast<const TensorFlowUnsupportedOperator&>(op);
 
-    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
+    // TODO(b/113715895): When `allow_eager_ops` is on, for now there's no way
     // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_flex_ops) {
-      custom_code = string(::tflite::kFlexCustomCodePrefix) +
+    if (allow_eager_ops) {
+      custom_code = string(::tflite::kEagerCustomCodePrefix) +
                     unsupported_op.tensorflow_op;
     } else {
       custom_code = unsupported_op.tensorflow_op;
@@ -101,11 +101,11 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) {
 void LoadOperatorsMap(
     const Model& model, OperatorsMap* operators_map,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_flex_ops) {
+    bool allow_eager_ops) {
   // First find a list of unique operator types.
   std::set<OperatorKey> keys;
   for (const auto& op : model.operators) {
-    keys.insert(GetOperatorKey(*op, ops_by_type, allow_flex_ops));
+    keys.insert(GetOperatorKey(*op, ops_by_type, allow_eager_ops));
   }
   // Now assign indices to them and fill in the map.
   int index = 0;
@@ -216,7 +216,7 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
 
   for (const auto& op : model.operators) {
     const details::OperatorKey operator_key =
-        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
+        GetOperatorKey(*op, ops_by_type, params.allow_eager_ops);
     int op_index = operators_map.at(operator_key);
     int op_version = operator_key.version;
 
@@ -281,7 +281,7 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
     }
 
     int op_index = operators_map.at(
-        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
+        GetOperatorKey(*op, ops_by_type, params.allow_eager_ops));
 
     auto tflite_op_it = ops_by_type.find(op->type);
     BaseOperator* tflite_op = tflite_op_it == ops_by_type.end()
@@ -334,7 +334,7 @@ Offset<Vector<Offset<Buffer>>> ExportBuffers(
 
 void Export(const Model& model, string* output_file_contents,
             const ExportParams& params) {
-  const auto ops_by_type = BuildOperatorByTypeMap(params.allow_flex_ops);
+  const auto ops_by_type = BuildOperatorByTypeMap(params.allow_eager_ops);
   Export(model, output_file_contents, params, ops_by_type);
 }
 
@@ -349,7 +349,7 @@ void Export(
 
   details::OperatorsMap operators_map;
   details::LoadOperatorsMap(model, &operators_map, ops_by_type,
-                            params.allow_flex_ops);
+                            params.allow_eager_ops);
 
   std::vector<const Array*> buffers_to_write;
   Array empty_array;
diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h
index 29d6de4049..b070a38768 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.h
+++ b/tensorflow/contrib/lite/toco/tflite/export.h
@@ -26,7 +26,7 @@ namespace tflite {
 // The parameters for exporting a TFLite model.
 struct ExportParams {
   bool allow_custom_ops = false;
-  bool allow_flex_ops = false;
+  bool allow_eager_ops = false;
   bool quantize_weights = false;
 };
 
@@ -121,7 +121,7 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map);
 void LoadOperatorsMap(
     const Model& model, OperatorsMap* operators_map,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_flex_ops);
+    bool allow_eager_ops);
 
 }  // namespace details
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index 93882a91a7..8d4d197c46 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -105,7 +105,7 @@ TEST_F(ExportTest, LoadOperatorsMap) {
 
   details::OperatorsMap operators;
   const auto ops_by_type = BuildOperatorByTypeMap();
-  // TODO(ycling): Add a test for allow_flex_ops.
+  // TODO(ycling): Add a test for allow_eager_ops.
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
   EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "", 1)]);
   EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "", 1)]);
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 9addbb81e7..ca2a6a19b3 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -1160,8 +1160,8 @@ class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
 class TensorFlowUnsupported : public BaseOperator {
  public:
   TensorFlowUnsupported(const string& name, OperatorType type,
-                        bool allow_flex_ops)
-      : BaseOperator(name, type), allow_flex_ops_(allow_flex_ops) {}
+                        bool allow_eager_ops)
+      : BaseOperator(name, type), allow_eager_ops_(allow_eager_ops) {}
 
   Options Serialize(const Operator& op,
                     flatbuffers::FlatBufferBuilder* builder) const override {
@@ -1177,9 +1177,9 @@ class TensorFlowUnsupported : public BaseOperator {
   std::unique_ptr<Operator> Deserialize(
       const BuiltinOptions* builtin_options,
       const CustomOptions* custom_options) const override {
-    // Deserializing Flex ops doesn't work now.
+    // Deserializing Eager ops doesn't work now.
     // TODO(ycling): Revisit and decide if we should fix the flow for importing
-    // TFLite models with Flex ops.
+    // TFLite models with Eager ops.
     auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
     if (custom_options) {
       auto flexbuffer_map =
@@ -1200,13 +1200,13 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
-    if (allow_flex_ops_) {
+    if (allow_eager_ops_) {
       fbb->Vector([&]() {
         fbb->String(node_def.op());
         fbb->String(op.tensorflow_node_def);
       });
       fbb->Finish();
-      LOG(INFO) << "Writing flex op: " << node_def.op();
+      LOG(INFO) << "Writing eager op: " << node_def.op();
       return std::unique_ptr<flexbuffers::Builder>(fbb.release());
     }
 
@@ -1316,13 +1316,13 @@ class TensorFlowUnsupported : public BaseOperator {
   }
 
  private:
-  const bool allow_flex_ops_;
+  const bool allow_eager_ops_;
 };
 
 namespace {
 // Build a vector containing all the known operators.
 std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
-    bool allow_flex_ops = false) {
+    bool allow_eager_ops = false) {
   std::vector<std::unique_ptr<BaseOperator>> ops;
   using tensorflow::MakeUnique;
   // Builtin Operators.
@@ -1434,7 +1434,7 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
   ops.push_back(MakeUnique<CTCBeamSearchDecoder>(
       "CTC_BEAM_SEARCH_DECODER", OperatorType::kCTCBeamSearchDecoder));
   ops.push_back(MakeUnique<TensorFlowUnsupported>(
-      "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_flex_ops));
+      "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_eager_ops));
 
   // There operators are supported by Toco, but not by TF Lite, and has no
   // attributes.
@@ -1512,11 +1512,11 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
 }  // namespace
 
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
-    bool allow_flex_ops) {
+    bool allow_eager_ops) {
   std::map<OperatorType, std::unique_ptr<BaseOperator>> result;
 
   std::vector<std::unique_ptr<BaseOperator>> ops =
-      BuildOperatorList(allow_flex_ops);
+      BuildOperatorList(allow_eager_ops);
   for (auto& op : ops) {
     result[op->type()] = std::move(op);
   }
@@ -1525,11 +1525,11 @@ std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
 }
 
 std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
-    bool allow_flex_ops) {
+    bool allow_eager_ops) {
   std::map<string, std::unique_ptr<BaseOperator>> result;
 
   std::vector<std::unique_ptr<BaseOperator>> ops =
-      BuildOperatorList(allow_flex_ops);
+      BuildOperatorList(allow_eager_ops);
   for (auto& op : ops) {
     result[op->name()] = std::move(op);
   }
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 13d9f6c49a..702fb28ea6 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -26,15 +26,15 @@ namespace tflite {
 class BaseOperator;
 
 // Return a map contained all know TF Lite Operators, keyed by their names.
-// TODO(ycling): The pattern to propagate parameters (e.g. allow_flex_ops)
+// TODO(ycling): The pattern to propagate parameters (e.g. allow_eager_ops)
 // is ugly here. Consider refactoring.
 std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
-    bool allow_flex_ops = false);
+    bool allow_eager_ops = false);
 
 // Return a map contained all know TF Lite Operators, keyed by the type of
 // their tf.mini counterparts.
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
-    bool allow_flex_ops = false);
+    bool allow_eager_ops = false);
 
 // These are the flatbuffer types for custom and builtin options.
 using CustomOptions = flatbuffers::Vector<uint8_t>;
diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
index cff79776bc..b6aebc0470 100644
--- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
@@ -167,11 +167,11 @@ bool ParseTocoFlagsFromCommandLineFlags(
            "converted float model. Model size will be reduced and there will "
            "be latency improvements (at the cost of accuracy)."),
       // WARNING: Experimental interface, subject to change
-      Flag("allow_flex_ops", parsed_flags.allow_flex_ops.bind(),
-           parsed_flags.allow_flex_ops.default_value(), ""),
+      Flag("allow_eager_ops", parsed_flags.allow_eager_ops.bind(),
+           parsed_flags.allow_eager_ops.default_value(), ""),
       // WARNING: Experimental interface, subject to change
-      Flag("force_flex_ops", parsed_flags.force_flex_ops.bind(),
-           parsed_flags.force_flex_ops.default_value(), "")};
+      Flag("force_eager_ops", parsed_flags.force_eager_ops.bind(),
+           parsed_flags.force_eager_ops.default_value(), "")};
   bool asked_for_help =
       *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help"));
   if (asked_for_help) {
@@ -266,15 +266,15 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
   READ_TOCO_FLAG(split_tflite_lstm_inputs, FlagRequirement::kNone);
   READ_TOCO_FLAG(quantize_weights, FlagRequirement::kNone);
   READ_TOCO_FLAG(post_training_quantize, FlagRequirement::kNone);
-  READ_TOCO_FLAG(allow_flex_ops, FlagRequirement::kNone);
-  READ_TOCO_FLAG(force_flex_ops, FlagRequirement::kNone);
+  READ_TOCO_FLAG(allow_eager_ops, FlagRequirement::kNone);
+  READ_TOCO_FLAG(force_eager_ops, FlagRequirement::kNone);
 
-  if (parsed_toco_flags.force_flex_ops.value() &&
-      !parsed_toco_flags.allow_flex_ops.value()) {
-    // TODO(ycling): Consider to enforce `allow_flex_ops` when
-    // `force_flex_ops` is true.
-    LOG(WARNING) << "--force_flex_ops should always be used with "
-                    "--allow_flex_ops.";
+  if (parsed_toco_flags.force_eager_ops.value() &&
+      !parsed_toco_flags.allow_eager_ops.value()) {
+    // TODO(ycling): Consider to enforce `allow_eager_ops` when
+    // `force_eager_ops` is true.
+    LOG(WARNING) << "--force_eager_ops should always be used with "
+                    "--allow_eager_ops.";
   }
 
   // Deprecated flag handling.
diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto
index ca3e64485e..53d60fed05 100644
--- a/tensorflow/contrib/lite/toco/toco_flags.proto
+++ b/tensorflow/contrib/lite/toco/toco_flags.proto
@@ -190,16 +190,16 @@ message TocoFlags {
   // (at the cost of accuracy).
   optional bool post_training_quantize = 26 [default = false];
 
-  // When enabled, unsupported ops will be converted to TFLite Flex ops.
+  // When enabled, unsupported ops will be converted to TFLite Eager ops.
   // TODO(ycling): Consider to rename the following 2 flags and don't call it
-  // "Flex".
-  // `allow_flex_ops` should always be used with `allow_custom_ops`.
+  // "Eager".
+  // `allow_eager_ops` should always be used with `allow_custom_ops`.
   // WARNING: Experimental interface, subject to change
-  optional bool allow_flex_ops = 27 [default = false];
+  optional bool allow_eager_ops = 27 [default = false];
 
-  // When enabled, all TensorFlow ops will be converted to TFLite Flex
-  // ops directly. This will force `allow_flex_ops` to true.
-  // `force_flex_ops` should always be used with `allow_flex_ops`.
+  // When enabled, all TensorFlow ops will be converted to TFLite Eager
+  // ops directly. This will force `allow_eager_ops` to true.
+  // `force_eager_ops` should always be used with `allow_eager_ops`.
   // WARNING: Experimental interface, subject to change
-  optional bool force_flex_ops = 28 [default = false];
+  optional bool force_eager_ops = 28 [default = false];
 }
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index 106494f354..a08b02485f 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -198,7 +198,7 @@ std::unique_ptr<Model> Import(const TocoFlags& toco_flags,
               : (toco_flags.output_format() != TENSORFLOW_GRAPHDEF);
 
       tf_import_flags.import_all_ops_as_unsupported =
-          toco_flags.force_flex_ops();
+          toco_flags.force_eager_ops();
 
       model = ImportTensorFlowGraphDef(model_flags, tf_import_flags,
                                        input_file_contents);
@@ -409,9 +409,9 @@ void Export(const TocoFlags& toco_flags, const Model& model,
     case TFLITE: {
       toco::tflite::ExportParams params;
 
-      // Always allow custom ops when flex ops are allowed.
-      if (toco_flags.force_flex_ops() || toco_flags.allow_flex_ops()) {
-        params.allow_flex_ops = true;
+      // Always allow custom ops when eager ops are allowed.
+      if (toco_flags.force_eager_ops() || toco_flags.allow_eager_ops()) {
+        params.allow_eager_ops = true;
         params.allow_custom_ops = true;
       } else if (allow_custom_ops) {
         params.allow_custom_ops = true;
diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index bc18d40313..dc97d22401 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -36,7 +36,7 @@ cc_binary(
 )
 
 cc_binary(
-    name = "benchmark_model_plus_flex",
+    name = "benchmark_model_plus_eager",
     srcs = [
         "benchmark_main.cc",
     ],
@@ -49,7 +49,7 @@ cc_binary(
         "//conditions:default": [],
     }),
     deps = [
-        ":benchmark_tflite_model_plus_flex_lib",
+        ":benchmark_tflite_model_plus_eager_lib",
         ":logging",
     ],
 )
@@ -111,7 +111,7 @@ cc_library(
 )
 
 cc_library(
-    name = "benchmark_tflite_model_plus_flex_lib",
+    name = "benchmark_tflite_model_plus_eager_lib",
     srcs = [
         "benchmark_tflite_model.cc",
         "logging.h",
@@ -123,7 +123,7 @@ cc_library(
         ":logging",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite:string_util",
-        "//tensorflow/contrib/lite/delegates/flex:delegate",
+        "//tensorflow/contrib/lite/delegates/eager:delegate",
         "//tensorflow/contrib/lite/kernels:builtin_ops",
         "//tensorflow/contrib/lite/profiling:profile_summarizer",
     ],
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
index d989ee720d..ef4f0fa80d 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -24,7 +24,7 @@ limitations under the License.
 #include <vector>
 
 #ifdef TFLITE_EXTENDED
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
 #endif  // TFLITE_EXTENDED
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
@@ -306,8 +306,8 @@ void BenchmarkTfLiteModel::Init() {
   interpreter->UseNNAPI(use_nnapi);
 
 #ifdef TFLITE_EXTENDED
-  TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
-  delegate_ = FlexDelegate::Create();
+  TFLITE_LOG(INFO) << "Instantiating Eager Delegate";
+  delegate_ = EagerDelegate::Create();
   if (delegate_) {
     interpreter->ModifyGraphWithDelegate(delegate_.get(),
                                          /*allow_dynamic_tensors=*/true);
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
index 9343824b4a..8541512bc8 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <vector>
 
 #ifdef TFLITE_EXTENDED
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
 #endif  // TFLITE_EXTENDED
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
@@ -74,7 +74,7 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
 
  private:
 #ifdef TFLITE_EXTENDED
-  std::unique_ptr<FlexDelegate> delegate_;
+  std::unique_ptr<EagerDelegate> delegate_;
 #endif  // TFLITE_EXTENDED
   std::unique_ptr<tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> interpreter;
diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc
index 6aa35b5227..7950653da9 100644
--- a/tensorflow/contrib/lite/util.cc
+++ b/tensorflow/contrib/lite/util.cc
@@ -18,9 +18,9 @@ limitations under the License.
 
 namespace tflite {
 
-bool IsFlexOp(const char* custom_name) {
-  return custom_name && strncmp(custom_name, kFlexCustomCodePrefix,
-                                strlen(kFlexCustomCodePrefix)) == 0;
+bool IsEagerOp(const char* custom_name) {
+  return custom_name && strncmp(custom_name, kEagerCustomCodePrefix,
+                                strlen(kEagerCustomCodePrefix)) == 0;
 }
 
 TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector<int>& input) {
diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h
index 31292a6f81..6d81f844f8 100644
--- a/tensorflow/contrib/lite/util.h
+++ b/tensorflow/contrib/lite/util.h
@@ -26,15 +26,15 @@ limitations under the License.
 
 namespace tflite {
 
-// The prefix of Flex op custom code.
+// The prefix of Eager op custom code.
 // This will be matched agains the `custom_code` field in `OperatorCode`
 // Flatbuffer Table.
 // WARNING: This is an experimental API and subject to change.
-constexpr char kFlexCustomCodePrefix[] = "Flex";
+constexpr char kEagerCustomCodePrefix[] = "Eager";
 
 // Checks whether the prefix of the custom name indicates the operation is an
-// Flex operation.
-bool IsFlexOp(const char* custom_name);
+// Eager operation.
+bool IsEagerOp(const char* custom_name);
 
 // Converts a `std::vector` to a `TfLiteIntArray`. The caller takes ownership
 // of the returned pointer.
diff --git a/tensorflow/contrib/lite/util_test.cc b/tensorflow/contrib/lite/util_test.cc
index 25f3aded71..c5c1709f1d 100644
--- a/tensorflow/contrib/lite/util_test.cc
+++ b/tensorflow/contrib/lite/util_test.cc
@@ -41,14 +41,14 @@ TEST(ConvertVectorToTfLiteIntArray, TestWithEmptyVector) {
   TfLiteIntArrayFree(output);
 }
 
-TEST(UtilTest, IsFlexOp) {
-  EXPECT_TRUE(IsFlexOp("Flex"));
-  EXPECT_TRUE(IsFlexOp("FlexOp"));
-  EXPECT_FALSE(IsFlexOp("flex"));
-  EXPECT_FALSE(IsFlexOp("Fle"));
-  EXPECT_FALSE(IsFlexOp("OpFlex"));
-  EXPECT_FALSE(IsFlexOp(nullptr));
-  EXPECT_FALSE(IsFlexOp(""));
+TEST(UtilTest, IsEagerOp) {
+  EXPECT_TRUE(IsEagerOp("Eager"));
+  EXPECT_TRUE(IsEagerOp("EagerOp"));
+  EXPECT_FALSE(IsEagerOp("eager"));
+  EXPECT_FALSE(IsEagerOp("Eage"));
+  EXPECT_FALSE(IsEagerOp("OpEager"));
+  EXPECT_FALSE(IsEagerOp(nullptr));
+  EXPECT_FALSE(IsEagerOp(""));
 }
 
 }  // namespace
-- 
GitLab


From 5b971c7eae5f2049a4725b16a4a44b688d3506b0 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Wed, 26 Sep 2018 19:16:58 -0700
Subject: [PATCH 0778/1357] Fix the eval hook to run the correct number of
 steps when using TPU strategy

PiperOrigin-RevId: 214709465
---
 tensorflow/python/estimator/estimator.py      | 23 ++++++-
 .../training/basic_session_run_hooks.py       |  5 +-
 tensorflow/python/training/evaluation.py      | 68 ++++++++++++++++++-
 3 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index eec64ad452..827b405e51 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -474,11 +474,31 @@ class Estimator(object):
           return _evaluate()
 
   def _convert_eval_steps_to_hooks(self, steps):
+    """Create hooks to run correct number of steps in evaluation.
+
+    Args:
+      steps: number of steps to run during evaluation.
+
+    Raises:
+      ValueError: if steps is less than or equal to zero.
+
+    Returns:
+      List of hooks to be passed to the estimator.
+    """
     if steps is None:
       return []
 
     if steps <= 0:
       raise ValueError('Must specify steps > 0, given: {}'.format(steps))
+
+    # The hooks are declared as private in evaluation.py discourage the use
+    # by other libraries or open source users. This should be the only usage
+    # of the estimator evaluation hooks.
+    if self._eval_distribution:
+      steps_per_run = getattr(self._eval_distribution, 'steps_per_run', 1)
+      if steps_per_run > 1:
+        return [evaluation._MultiStepStopAfterNEvalsHook(  # pylint: disable=protected-access
+            num_evals=steps, steps_per_run=steps_per_run)]
     return [evaluation._StopAfterNEvalsHook(num_evals=steps)]  # pylint: disable=protected-access
 
   def predict(self,
@@ -1474,6 +1494,7 @@ class Estimator(object):
         self._eval_distribution.__class__.__name__ == 'TPUStrategy')
 
     if is_tpu_strategy:
+      steps_per_run_variable = training.get_or_create_steps_per_run_variable()
       def step_fn(ctx, features, labels=None):
         """Runs one step of the eval computation and captures outputs."""
         estimator_spec = self._eval_distribution.call_for_each_tower(
@@ -1490,7 +1511,7 @@ class Estimator(object):
 
       # TODO(priyag): Fix eval step hook to account for steps_per_run.
       ctx = self._eval_distribution.run_steps_on_dataset(
-          step_fn, iterator, iterations=self._eval_distribution.steps_per_run)
+          step_fn, iterator, iterations=steps_per_run_variable)
       update_op = ctx.run_op
       eval_dict = ctx.non_tensor_outputs['eval_dict']
       grouped_estimator_spec = ctx.non_tensor_outputs['estimator_spec']
diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index 3bd4bd75bd..1efabcd854 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -344,7 +344,7 @@ class _MultiStepStopAtStepHook(session_run_hook.SessionRunHook):
       raise ValueError("steps_per_run should be greater than 0")
     self._num_steps = num_steps
     self._last_step = last_step
-    self._steps_per_run = steps_per_run
+    self._steps_per_run_initial_value = steps_per_run
 
   def begin(self):
     self._global_step_tensor = training_util.get_global_step()
@@ -353,7 +353,8 @@ class _MultiStepStopAtStepHook(session_run_hook.SessionRunHook):
     self._steps_per_run_variable = get_or_create_steps_per_run_variable()
 
   def _update_steps_per_run_variable(self, global_step, session):
-    steps = min(self._last_step - global_step, self._steps_per_run)
+    steps = min(self._last_step - global_step,
+                self._steps_per_run_initial_value)
     self._steps_per_run_variable.load(steps, session=session)
 
   def after_create_session(self, session, coord):
diff --git a/tensorflow/python/training/evaluation.py b/tensorflow/python/training/evaluation.py
index b36444a14c..2c4eb02d53 100644
--- a/tensorflow/python/training/evaluation.py
+++ b/tensorflow/python/training/evaluation.py
@@ -18,13 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import time
 import math
+import time
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
@@ -77,6 +78,59 @@ def _get_latest_eval_step_value(update_ops):
     return array_ops.identity(_get_or_create_eval_step().read_value())
 
 
+class _MultiStepStopAfterNEvalsHook(session_run_hook.SessionRunHook):
+  """Run hook used by the evaluation routines to run the `eval_ops` N times."""
+
+  def __init__(self, num_evals, steps_per_run=1):
+    """Constructs the run hook.
+
+    Args:
+      num_evals: The number of evaluations to run for. if set to None, will
+        iterate the dataset until all inputs are exhausted.
+      steps_per_run: Number of steps executed per run call.
+    """
+    self._num_evals = num_evals
+    self._evals_completed = None
+    self._steps_per_run_initial_value = steps_per_run
+
+  def _set_evals_completed_tensor(self, updated_eval_step):
+    self._evals_completed = updated_eval_step
+
+  def begin(self):
+    self._steps_per_run_variable = \
+        basic_session_run_hooks.get_or_create_steps_per_run_variable()
+
+  def after_create_session(self, session, coord):
+    # Update number of steps to run in the first run call
+    if  self._num_evals is None:
+      steps = self._steps_per_run_initial_value
+    else:
+      steps = min(self._steps_per_run_initial_value, self._num_evals)
+    self._steps_per_run_variable.load(steps, session=session)
+
+  def before_run(self, run_context):
+    return session_run_hook.SessionRunArgs({
+        'evals_completed': self._evals_completed
+    })
+
+  def after_run(self, run_context, run_values):
+    evals_completed = run_values.results['evals_completed']
+    # Update number of steps to run in the next iteration
+    if  self._num_evals is None:
+      steps = self._steps_per_run_initial_value
+    else:
+      steps = min(self._num_evals - evals_completed,
+                  self._steps_per_run_initial_value)
+    self._steps_per_run_variable.load(steps, session=run_context.session)
+
+    if self._num_evals is None:
+      logging.info('Evaluation [%d]', evals_completed)
+    else:
+      logging.info('Evaluation [%d/%d]', evals_completed, self._num_evals)
+    if self._num_evals is not None and evals_completed >= self._num_evals:
+      run_context.request_stop()
+
+
 class _StopAfterNEvalsHook(session_run_hook.SessionRunHook):
   """Run hook used by the evaluation routines to run the `eval_ops` N times."""
 
@@ -176,7 +230,15 @@ def _evaluate_once(checkpoint_path,
   hooks = list(hooks or [])
 
   if eval_ops is not None:
-    update_eval_step = state_ops.assign_add(eval_step, 1, use_locking=True)
+    if any([isinstance(h, _MultiStepStopAfterNEvalsHook) for h in hooks]):
+      steps_per_run_variable = \
+          basic_session_run_hooks.get_or_create_steps_per_run_variable()
+      update_eval_step = state_ops.assign_add(
+          eval_step,
+          math_ops.cast(steps_per_run_variable, dtype=eval_step.dtype),
+          use_locking=True)
+    else:
+      update_eval_step = state_ops.assign_add(eval_step, 1, use_locking=True)
 
     if isinstance(eval_ops, dict):
       eval_ops['update_eval_step'] = update_eval_step
@@ -188,7 +250,7 @@ def _evaluate_once(checkpoint_path,
     eval_step_value = _get_latest_eval_step_value(eval_ops)
 
     for h in hooks:
-      if isinstance(h, _StopAfterNEvalsHook):
+      if isinstance(h, (_StopAfterNEvalsHook, _MultiStepStopAfterNEvalsHook)):
         h._set_evals_completed_tensor(eval_step_value)  # pylint: disable=protected-access
 
   logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
-- 
GitLab


From 85258e06edf424492905fd032b02ff4d420b9da1 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Wed, 26 Sep 2018 19:28:14 -0700
Subject: [PATCH 0779/1357] Rename TocoConverter to TFLiteConverter.

PiperOrigin-RevId: 214710175
---
 tensorflow/contrib/lite/python/convert.py     |   7 +-
 tensorflow/contrib/lite/python/lite.py        |  94 ++++++++--
 tensorflow/contrib/lite/python/lite_test.py   | 171 +++++++++++++-----
 .../contrib/lite/python/tflite_convert.py     |  12 +-
 .../contrib/lite/toco/g3doc/python_api.md     |  11 +-
 tensorflow/contrib/lite/toco/tflite/export.cc |   2 +-
 6 files changed, 215 insertions(+), 82 deletions(-)

diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 1f48a826d4..627be8f44f 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -343,13 +343,14 @@ def toco_convert_impl(input_data, input_tensors, output_tensors, *args,
   return data
 
 
-@deprecation.deprecated(None, "Use `lite.TocoConverter` instead.")
+@deprecation.deprecated(None, "Use `lite.TFLiteConverter` instead.")
 def toco_convert(input_data, input_tensors, output_tensors, *args, **kwargs):
-  """"Convert a model using TOCO.
+  """Convert a model using TOCO.
 
   Typically this function is used to convert from TensorFlow GraphDef to TFLite.
   Conversion can be customized by providing arguments that are forwarded to
-  `build_toco_convert_protos` (see documentation for details).
+  `build_toco_convert_protos` (see documentation for details). This function has
+  been deprecated. Please use `lite.TFLiteConverter` instead.
 
   Args:
     input_data: Input data (i.e. often `sess.graph_def`),
diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 2be24455d8..09365f101f 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -17,6 +17,7 @@
 EXPERIMENTAL: APIs here are unstable and likely to change without notice.
 
 @@TocoConverter
+@@TFLiteConverter
 @@toco_convert
 @@toco_convert_protos
 @@Interpreter
@@ -62,9 +63,10 @@ from tensorflow.python.framework.importer import import_graph_def as _import_gra
 from tensorflow.python.lib.io import file_io as _file_io
 from tensorflow.python.saved_model import signature_constants as _signature_constants
 from tensorflow.python.saved_model import tag_constants as _tag_constants
+from tensorflow.python.util import deprecation as _deprecation
 
 
-class TocoConverter(object):
+class TFLiteConverter(object):
   """Convert a TensorFlow model into `output_format` using TOCO.
 
   This is used to convert from a TensorFlow GraphDef or SavedModel into either a
@@ -121,22 +123,22 @@ class TocoConverter(object):
 
     ```python
     # Converting a GraphDef from session.
-    converter = lite.TocoConverter.from_session(sess, in_tensors, out_tensors)
+    converter = lite.TFLiteConverter.from_session(sess, in_tensors, out_tensors)
     tflite_model = converter.convert()
     open("converted_model.tflite", "wb").write(tflite_model)
 
     # Converting a GraphDef from file.
-    converter = lite.TocoConverter.from_frozen_graph(
+    converter = lite.TFLiteConverter.from_frozen_graph(
       graph_def_file, input_arrays, output_arrays)
     tflite_model = converter.convert()
     open("converted_model.tflite", "wb").write(tflite_model)
 
     # Converting a SavedModel.
-    converter = lite.TocoConverter.from_saved_model(saved_model_dir)
+    converter = lite.TFLiteConverter.from_saved_model(saved_model_dir)
     tflite_model = converter.convert()
 
     # Converting a tf.keras model.
-    converter = lite.TocoConverter.from_keras_model_file(keras_model)
+    converter = lite.TFLiteConverter.from_keras_model_file(keras_model)
     tflite_model = converter.convert()
     ```
   """
@@ -147,10 +149,9 @@ class TocoConverter(object):
                output_tensors,
                input_arrays_with_shape=None,
                output_arrays=None):
-    """Constructor for TocoConverter.
+    """Constructor for TFLiteConverter.
 
     Args:
-
       graph_def: Frozen TensorFlow GraphDef.
       input_tensors: List of input tensors. Type and shape are computed using
         `foo.get_shape()` and `foo.dtype`.
@@ -158,8 +159,8 @@ class TocoConverter(object):
       input_arrays_with_shape: Tuple of strings representing input tensor names
         and list of integers representing input shapes
         (e.g., [("foo" : [1, 16, 16, 3])]). Use only when graph cannot be loaded
-        into TensorFlow and when `input_tensors` and `output_tensors` are None.
-        (default None)
+          into TensorFlow and when `input_tensors` and `output_tensors` are
+          None. (default None)
       output_arrays: List of output tensors to freeze graph with. Use only when
         graph cannot be loaded into TensorFlow and when `input_tensors` and
         `output_tensors` are None. (default None)
@@ -195,7 +196,7 @@ class TocoConverter(object):
 
   @classmethod
   def from_session(cls, sess, input_tensors, output_tensors):
-    """Creates a TocoConverter class from a TensorFlow Session.
+    """Creates a TFLiteConverter class from a TensorFlow Session.
 
     Args:
       sess: TensorFlow Session.
@@ -204,7 +205,7 @@ class TocoConverter(object):
       output_tensors: List of output tensors (only .name is used from this).
 
     Returns:
-      TocoConverter class.
+      TFLiteConverter class.
     """
     graph_def = _freeze_graph(sess, output_tensors)
     return cls(graph_def, input_tensors, output_tensors)
@@ -215,7 +216,7 @@ class TocoConverter(object):
                         input_arrays,
                         output_arrays,
                         input_shapes=None):
-    """Creates a TocoConverter class from a file containing a frozen GraphDef.
+    """Creates a TFLiteConverter class from a file containing a frozen GraphDef.
 
     Args:
       graph_def_file: Full filepath of file containing frozen GraphDef.
@@ -224,10 +225,10 @@ class TocoConverter(object):
       input_shapes: Dict of strings representing input tensor names to list of
         integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
         Automatically determined when input shapes is None (e.g., {"foo" :
-        None}). (default None)
+          None}). (default None)
 
     Returns:
-      TocoConverter class.
+      TFLiteConverter class.
 
     Raises:
       IOError:
@@ -310,7 +311,7 @@ class TocoConverter(object):
                        output_arrays=None,
                        tag_set=None,
                        signature_key=None):
-    """Creates a TocoConverter class from a SavedModel.
+    """Creates a TFLiteConverter class from a SavedModel.
 
     Args:
       saved_model_dir: SavedModel directory to convert.
@@ -319,7 +320,7 @@ class TocoConverter(object):
       input_shapes: Dict of strings representing input tensor names to list of
         integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
         Automatically determined when input shapes is None (e.g., {"foo" :
-        None}). (default None)
+          None}). (default None)
       output_arrays: List of output tensors to freeze graph with. Uses output
         arrays from SignatureDef when none are provided. (default None)
       tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
@@ -328,7 +329,7 @@ class TocoConverter(object):
         (default DEFAULT_SERVING_SIGNATURE_DEF_KEY)
 
     Returns:
-      TocoConverter class.
+      TFLiteConverter class.
     """
     if tag_set is None:
       tag_set = set([_tag_constants.SERVING])
@@ -346,7 +347,7 @@ class TocoConverter(object):
                             input_arrays=None,
                             input_shapes=None,
                             output_arrays=None):
-    """Creates a TocoConverter class from a tf.keras model file.
+    """Creates a TFLiteConverter class from a tf.keras model file.
 
     Args:
       model_file: Full filepath of HDF5 file containing the tf.keras model.
@@ -355,12 +356,12 @@ class TocoConverter(object):
       input_shapes: Dict of strings representing input tensor names to list of
         integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
         Automatically determined when input shapes is None (e.g., {"foo" :
-        None}). (default None)
+          None}). (default None)
       output_arrays: List of output tensors to freeze graph with. Uses output
         arrays from SignatureDef when none are provided. (default None)
 
     Returns:
-      TocoConverter class.
+      TFLiteConverter class.
     """
     _keras.backend.clear_session()
     _keras.backend.set_learning_phase(False)
@@ -502,6 +503,59 @@ class TocoConverter(object):
       tensor.set_shape(shape)
 
 
+class TocoConverter(object):
+  """Convert a TensorFlow model into `output_format` using TOCO.
+
+  This class has been deprecated. Please use `lite.TFLiteConverter` instead.
+  """
+
+  @classmethod
+  @_deprecation.deprecated(None,
+                           "Use `lite.TFLiteConverter.from_session` instead.")
+  def from_session(cls, sess, input_tensors, output_tensors):
+    """Creates a TocoConverter class from a TensorFlow Session."""
+    return TFLiteConverter.from_session(sess, input_tensors, output_tensors)
+
+  @classmethod
+  @_deprecation.deprecated(
+      None, "Use `lite.TFLiteConverter.from_frozen_graph` instead.")
+  def from_frozen_graph(cls,
+                        graph_def_file,
+                        input_arrays,
+                        output_arrays,
+                        input_shapes=None):
+    """Creates a TocoConverter class from a file containing a frozen graph."""
+    return TFLiteConverter.from_frozen_graph(graph_def_file, input_arrays,
+                                             output_arrays, input_shapes)
+
+  @classmethod
+  @_deprecation.deprecated(
+      None, "Use `lite.TFLiteConverter.from_saved_model` instead.")
+  def from_saved_model(cls,
+                       saved_model_dir,
+                       input_arrays=None,
+                       input_shapes=None,
+                       output_arrays=None,
+                       tag_set=None,
+                       signature_key=None):
+    """Creates a TocoConverter class from a SavedModel."""
+    return TFLiteConverter.from_saved_model(saved_model_dir, input_arrays,
+                                            input_shapes, output_arrays,
+                                            tag_set, signature_key)
+
+  @classmethod
+  @_deprecation.deprecated(
+      None, "Use `lite.TFLiteConverter.from_keras_model_file` instead.")
+  def from_keras_model_file(cls,
+                            model_file,
+                            input_arrays=None,
+                            input_shapes=None,
+                            output_arrays=None):
+    """Creates a TocoConverter class from a tf.keras model file."""
+    return TFLiteConverter.from_keras_model_file(model_file, input_arrays,
+                                                 input_shapes, output_arrays)
+
+
 def _is_frozen_graph(sess):
   """Determines if the graph is frozen.
 
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index f112ed5cdd..33f8fc1e8c 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -50,18 +50,18 @@ class FromConstructor(test_util.TensorFlowTestCase):
 
     # `output_arrays` is not defined.
     with self.assertRaises(ValueError) as error:
-      lite.TocoConverter(
+      lite.TFLiteConverter(
           None, None, [], input_arrays_with_shape=[('input', [3, 9])])
     self.assertEqual(message, str(error.exception))
 
     # `input_arrays_with_shape` is not defined.
     with self.assertRaises(ValueError) as error:
-      lite.TocoConverter(None, [], None, output_arrays=['output'])
+      lite.TFLiteConverter(None, [], None, output_arrays=['output'])
     self.assertEqual(message, str(error.exception))
 
   # Tests valid constructors using a dummy value for the GraphDef.
   def testValidConstructor(self):
-    converter = lite.TocoConverter(
+    converter = lite.TFLiteConverter(
         None,
         None,
         None,
@@ -76,7 +76,7 @@ class FromConstructor(test_util.TensorFlowTestCase):
         'The batch size cannot be set for this model. Please use '
         'input_shapes parameter.', str(error.exception))
 
-    converter = lite.TocoConverter(None, ['input_tensor'], ['output_tensor'])
+    converter = lite.TFLiteConverter(None, ['input_tensor'], ['output_tensor'])
     self.assertTrue(converter._has_valid_tensors())
 
 
@@ -89,7 +89,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -121,7 +122,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(
+    converter = lite.TFLiteConverter.from_session(
         sess, [in_tensor_1, in_tensor_2], [out_tensor])
     converter.inference_type = lite_constants.QUANTIZED_UINT8
     converter.quantized_input_stats = {
@@ -166,7 +167,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(
+    converter = lite.TFLiteConverter.from_session(
         sess, [in_tensor_1, in_tensor_2], [out_tensor])
     converter.inference_type = lite_constants.QUANTIZED_UINT8
     converter.quantized_input_stats = {'inputA': (0., 1.)}  # mean, std_dev
@@ -182,7 +183,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Test invalid shape. None after 1st dimension.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     with self.assertRaises(ValueError) as error:
       converter.convert()
     self.assertEqual('Provide an input shape for input array \'Placeholder\'.',
@@ -195,7 +197,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Test invalid shape. None after 1st dimension.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     with self.assertRaises(ValueError) as error:
       converter.convert()
     self.assertEqual(
@@ -210,7 +213,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -242,7 +246,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess.run(_global_variables_initializer())
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -272,7 +277,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     converter.output_format = lite_constants.GRAPHVIZ_DOT
     graphviz_output = converter.convert()
     self.assertTrue(graphviz_output)
@@ -285,7 +291,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     graphviz_dir = self.get_temp_dir()
     converter.dump_graphviz_dir = graphviz_dir
     tflite_model = converter.convert()
@@ -299,7 +306,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     self.assertTrue(num_items_graphviz)
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     graphviz_dir = self.get_temp_dir()
     converter.dump_graphviz_dir = graphviz_dir
     converter.dump_graphviz_video = True
@@ -317,7 +325,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     converter.inference_input_type = lite_constants.QUANTIZED_UINT8
     converter.quantized_input_stats = {'Placeholder': (0., 1.)}  # mean, std_dev
     tflite_model = converter.convert()
@@ -347,7 +356,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     converter.inference_type = lite_constants.QUANTIZED_UINT8
     converter.quantized_input_stats = {'Placeholder': (0., 1.)}  # mean, std_dev
     converter.default_ranges_stats = (0, 6)  # min, max
@@ -387,13 +397,13 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert float model.
-    float_converter = lite.TocoConverter.from_session(sess, [in_tensor_1],
-                                                      [out_tensor])
+    float_converter = lite.TFLiteConverter.from_session(sess, [in_tensor_1],
+                                                        [out_tensor])
     float_tflite = float_converter.convert()
     self.assertTrue(float_tflite)
 
     # Convert quantized weights model.
-    quantized_converter = lite.TocoConverter.from_session(
+    quantized_converter = lite.TFLiteConverter.from_session(
         sess, [in_tensor_1], [out_tensor])
     quantized_converter.post_training_quantize = True
     quantized_tflite = quantized_converter.convert()
@@ -409,7 +419,8 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     sess = session.Session()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
     converter.converter_mode = lite.ConverterMode.TOCO_EXTENDED_ALL
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
@@ -424,6 +435,22 @@ class FromSessionTest(test_util.TensorFlowTestCase):
         'sure you invoke the Eager delegate before inference.',
         str(error.exception))
 
+  def testFloatTocoConverter(self):
+    """Tests deprecated test TocoConverter."""
+    in_tensor = array_ops.placeholder(
+        shape=[1, 16, 16, 3], dtype=dtypes.float32)
+    out_tensor = in_tensor + in_tensor
+    sess = session.Session()
+
+    # Convert model and ensure model is not None.
+    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+    # Ensure the interpreter is able to load.
+    interpreter = Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+
 
 class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
@@ -439,8 +466,8 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
     sess.close()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_frozen_graph(graph_def_file,
-                                                     ['Placeholder'], ['add'])
+    converter = lite.TFLiteConverter.from_frozen_graph(graph_def_file,
+                                                       ['Placeholder'], ['add'])
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -474,7 +501,7 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
     sess.close()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_frozen_graph(
+    converter = lite.TFLiteConverter.from_frozen_graph(
         graph_def_file, ['Placeholder'], ['add'],
         input_shapes={'Placeholder': [1, 16, 16, 3]})
     tflite_model = converter.convert()
@@ -503,8 +530,8 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
     # Ensure the graph with variables cannot be converted.
     with self.assertRaises(ValueError) as error:
-      lite.TocoConverter.from_frozen_graph(graph_def_file, ['Placeholder'],
-                                           ['add'])
+      lite.TFLiteConverter.from_frozen_graph(graph_def_file, ['Placeholder'],
+                                             ['add'])
     self.assertEqual('Please freeze the graph using freeze_graph.py.',
                      str(error.exception))
 
@@ -520,8 +547,8 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
     sess.close()
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_frozen_graph(graph_def_file,
-                                                     ['Placeholder'], ['add'])
+    converter = lite.TFLiteConverter.from_frozen_graph(graph_def_file,
+                                                       ['Placeholder'], ['add'])
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -545,8 +572,8 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
   def testInvalidFileNotFound(self):
     with self.assertRaises(IOError) as error:
-      lite.TocoConverter.from_frozen_graph('invalid_file', ['Placeholder'],
-                                           ['add'])
+      lite.TFLiteConverter.from_frozen_graph('invalid_file', ['Placeholder'],
+                                             ['add'])
     self.assertEqual('File \'invalid_file\' does not exist.',
                      str(error.exception))
 
@@ -558,8 +585,8 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
     # Attempts to convert the invalid model.
     with self.assertRaises(IOError) as error:
-      lite.TocoConverter.from_frozen_graph(graph_def_file, ['Placeholder'],
-                                           ['add'])
+      lite.TFLiteConverter.from_frozen_graph(graph_def_file, ['Placeholder'],
+                                             ['add'])
     self.assertEqual(
         'Unable to parse input file \'{}\'.'.format(graph_def_file),
         str(error.exception))
@@ -580,7 +607,7 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
     # Tests the object detection model that cannot be loaded in TensorFlow.
     self._initObjectDetectionArgs()
 
-    converter = lite.TocoConverter.from_frozen_graph(
+    converter = lite.TFLiteConverter.from_frozen_graph(
         self._graph_def_file, self._input_arrays, self._output_arrays,
         self._input_shapes)
     converter.allow_custom_ops = True
@@ -621,7 +648,7 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
     # Missing `input_shapes`.
     with self.assertRaises(ValueError) as error:
-      lite.TocoConverter.from_frozen_graph(
+      lite.TFLiteConverter.from_frozen_graph(
           self._graph_def_file, self._input_arrays, self._output_arrays)
     self.assertEqual('input_shapes must be defined for this model.',
                      str(error.exception))
@@ -632,7 +659,7 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
     # `input_shapes` does not contain the names in `input_arrays`.
     with self.assertRaises(ValueError) as error:
-      lite.TocoConverter.from_frozen_graph(
+      lite.TFLiteConverter.from_frozen_graph(
           self._graph_def_file,
           self._input_arrays,
           self._output_arrays,
@@ -641,6 +668,27 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
         'input_shapes must contain a value for each item in input_array.',
         str(error.exception))
 
+  def testFloatTocoConverter(self):
+    in_tensor = array_ops.placeholder(
+        shape=[1, 16, 16, 3], dtype=dtypes.float32)
+    _ = in_tensor + in_tensor
+    sess = session.Session()
+
+    # Write graph to file.
+    graph_def_file = os.path.join(self.get_temp_dir(), 'model.pb')
+    write_graph(sess.graph_def, '', graph_def_file, False)
+    sess.close()
+
+    # Convert model and ensure model is not None.
+    converter = lite.TocoConverter.from_frozen_graph(graph_def_file,
+                                                     ['Placeholder'], ['add'])
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+    # Ensure the model is able to load.
+    interpreter = Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+
 
 class FromSavedModelTest(test_util.TensorFlowTestCase):
 
@@ -663,7 +711,7 @@ class FromSavedModelTest(test_util.TensorFlowTestCase):
     saved_model_dir = self._createSavedModel(shape=[1, 16, 16, 3])
 
     # Convert model and ensure model is not None.
-    converter = lite.TocoConverter.from_saved_model(saved_model_dir)
+    converter = lite.TFLiteConverter.from_saved_model(saved_model_dir)
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -693,7 +741,7 @@ class FromSavedModelTest(test_util.TensorFlowTestCase):
     """Test a SavedModel, with None in input tensor's shape."""
     saved_model_dir = self._createSavedModel(shape=[None, 16, 16, 3])
 
-    converter = lite.TocoConverter.from_saved_model(saved_model_dir)
+    converter = lite.TFLiteConverter.from_saved_model(saved_model_dir)
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -724,7 +772,7 @@ class FromSavedModelTest(test_util.TensorFlowTestCase):
     """Test a SavedModel ordering of input arrays."""
     saved_model_dir = self._createSavedModel(shape=[1, 16, 16, 3])
 
-    converter = lite.TocoConverter.from_saved_model(
+    converter = lite.TFLiteConverter.from_saved_model(
         saved_model_dir, input_arrays=['inputB', 'inputA'])
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
@@ -757,7 +805,7 @@ class FromSavedModelTest(test_util.TensorFlowTestCase):
     saved_model_dir = self._createSavedModel(shape=[1, 16, 16, 3])
 
     # Check case where input shape is given.
-    converter = lite.TocoConverter.from_saved_model(
+    converter = lite.TFLiteConverter.from_saved_model(
         saved_model_dir,
         input_arrays=['inputA'],
         input_shapes={'inputA': [1, 16, 16, 3]})
@@ -766,12 +814,25 @@ class FromSavedModelTest(test_util.TensorFlowTestCase):
     self.assertTrue(tflite_model)
 
     # Check case where input shape is None.
-    converter = lite.TocoConverter.from_saved_model(
+    converter = lite.TFLiteConverter.from_saved_model(
         saved_model_dir, input_arrays=['inputA'], input_shapes={'inputA': None})
 
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
+  def testSimpleModelTocoConverter(self):
+    """Test a SavedModel with deprecated TocoConverter."""
+    saved_model_dir = self._createSavedModel(shape=[1, 16, 16, 3])
+
+    # Convert model and ensure model is not None.
+    converter = lite.TocoConverter.from_saved_model(saved_model_dir)
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+    # Ensure the model is able to load.
+    interpreter = Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+
 
 class FromKerasFile(test_util.TensorFlowTestCase):
 
@@ -805,7 +866,7 @@ class FromKerasFile(test_util.TensorFlowTestCase):
     """Test a Sequential tf.keras model with default inputs."""
     keras_file = self._getSequentialModel()
 
-    converter = lite.TocoConverter.from_keras_model_file(keras_file)
+    converter = lite.TFLiteConverter.from_keras_model_file(keras_file)
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -845,13 +906,13 @@ class FromKerasFile(test_util.TensorFlowTestCase):
 
     # Invalid input array raises error.
     with self.assertRaises(ValueError) as error:
-      lite.TocoConverter.from_keras_model_file(
+      lite.TFLiteConverter.from_keras_model_file(
           keras_file, input_arrays=['invalid-input'])
     self.assertEqual("Invalid tensors 'invalid-input' were found.",
                      str(error.exception))
 
     # Valid input array.
-    converter = lite.TocoConverter.from_keras_model_file(
+    converter = lite.TFLiteConverter.from_keras_model_file(
         keras_file, input_arrays=['dense_input'])
     tflite_model = converter.convert()
     os.remove(keras_file)
@@ -863,13 +924,13 @@ class FromKerasFile(test_util.TensorFlowTestCase):
 
     # Passing in shape of invalid input array has no impact as long as all input
     # arrays have a shape.
-    converter = lite.TocoConverter.from_keras_model_file(
+    converter = lite.TFLiteConverter.from_keras_model_file(
         keras_file, input_shapes={'invalid-input': [2, 3]})
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
     # Passing in shape of valid input array.
-    converter = lite.TocoConverter.from_keras_model_file(
+    converter = lite.TFLiteConverter.from_keras_model_file(
         keras_file, input_shapes={'dense_input': [2, 3]})
     tflite_model = converter.convert()
     os.remove(keras_file)
@@ -890,13 +951,13 @@ class FromKerasFile(test_util.TensorFlowTestCase):
 
     # Invalid output array raises error.
     with self.assertRaises(ValueError) as error:
-      lite.TocoConverter.from_keras_model_file(
+      lite.TFLiteConverter.from_keras_model_file(
           keras_file, output_arrays=['invalid-output'])
     self.assertEqual("Invalid tensors 'invalid-output' were found.",
                      str(error.exception))
 
     # Valid output array.
-    converter = lite.TocoConverter.from_keras_model_file(
+    converter = lite.TFLiteConverter.from_keras_model_file(
         keras_file, output_arrays=['time_distributed/Reshape_1'])
     tflite_model = converter.convert()
     os.remove(keras_file)
@@ -926,7 +987,7 @@ class FromKerasFile(test_util.TensorFlowTestCase):
         os.close(fd)
 
     # Convert to TFLite model.
-    converter = lite.TocoConverter.from_keras_model_file(keras_file)
+    converter = lite.TFLiteConverter.from_keras_model_file(keras_file)
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -991,7 +1052,7 @@ class FromKerasFile(test_util.TensorFlowTestCase):
         os.close(fd)
 
     # Convert to TFLite model.
-    converter = lite.TocoConverter.from_keras_model_file(keras_file)
+    converter = lite.TFLiteConverter.from_keras_model_file(keras_file)
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -1052,7 +1113,7 @@ class FromKerasFile(test_util.TensorFlowTestCase):
         os.close(fd)
 
     # Convert to TFLite model.
-    converter = lite.TocoConverter.from_keras_model_file(keras_file)
+    converter = lite.TFLiteConverter.from_keras_model_file(keras_file)
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -1086,6 +1147,18 @@ class FromKerasFile(test_util.TensorFlowTestCase):
     np.testing.assert_almost_equal(tflite_result, keras_result, 5)
     os.remove(keras_file)
 
+  def testSequentialModelTocoConverter(self):
+    """Test a Sequential tf.keras model with deprecated TocoConverter."""
+    keras_file = self._getSequentialModel()
+
+    converter = lite.TocoConverter.from_keras_model_file(keras_file)
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+    # Ensure the model is able to load.
+    interpreter = Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py
index c0ff7f37f9..d6d9052a4e 100644
--- a/tensorflow/contrib/lite/python/tflite_convert.py
+++ b/tensorflow/contrib/lite/python/tflite_convert.py
@@ -40,13 +40,13 @@ def _parse_set(values):
 
 
 def _get_toco_converter(flags):
-  """Makes a TocoConverter object based on the flags provided.
+  """Makes a TFLiteConverter object based on the flags provided.
 
   Args:
     flags: argparse.Namespace object containing TFLite flags.
 
   Returns:
-    TocoConverter object.
+    TFLiteConverter object.
 
   Raises:
     ValueError: Invalid flags.
@@ -68,17 +68,17 @@ def _get_toco_converter(flags):
       "output_arrays": output_arrays
   }
 
-  # Create TocoConverter.
+  # Create TFLiteConverter.
   if flags.graph_def_file:
-    converter_fn = lite.TocoConverter.from_frozen_graph
+    converter_fn = lite.TFLiteConverter.from_frozen_graph
     converter_kwargs["graph_def_file"] = flags.graph_def_file
   elif flags.saved_model_dir:
-    converter_fn = lite.TocoConverter.from_saved_model
+    converter_fn = lite.TFLiteConverter.from_saved_model
     converter_kwargs["saved_model_dir"] = flags.saved_model_dir
     converter_kwargs["tag_set"] = _parse_set(flags.saved_model_tag_set)
     converter_kwargs["signature_key"] = flags.saved_model_signature_key
   elif flags.keras_model_file:
-    converter_fn = lite.TocoConverter.from_keras_model_file
+    converter_fn = lite.TFLiteConverter.from_keras_model_file
     converter_kwargs["model_file"] = flags.keras_model_file
   else:
     raise ValueError("--graph_def_file, --saved_model_dir, or "
diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md
index 910fa4c8de..8c31c3dca8 100644
--- a/tensorflow/contrib/lite/toco/g3doc/python_api.md
+++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md
@@ -39,13 +39,18 @@ The API for converting TensorFlow models to TensorFlow Lite as of TensorFlow 1.9
 is `tf.contrib.lite.TocoConverter`. The API for calling the Python intepreter is
 `tf.contrib.lite.Interpreter`.
 
+**NOTE**: As of TensorFlow 1.12, the API for converting TensorFlow models to
+TFLite will be renamed to `TFLiteConverter`. `TFLiteConverter` is semantically
+identically to `TocoConverter`. The API is available at
+`tf.contrib.lite.TFLiteConverter` as of the Sept 26 `tf-nightly`.
+
 `TocoConverter` provides class methods based on the original format of the
 model. `TocoConverter.from_session()` is available for GraphDefs.
 `TocoConverter.from_saved_model()` is available for SavedModels.
 `TocoConverter.from_keras_model_file()` is available for `tf.Keras` files.
-Example usages for simple float-point models are shown in [Basic
-Examples](#basic). Examples usages for more complex models is shown in [Complex
-Examples](#complex).
+Example usages for simple float-point models are shown in
+[Basic Examples](#basic). Examples usages for more complex models is shown in
+[Complex Examples](#complex).
 
 **NOTE**: Currently, `TocoConverter` will cause a fatal error to the Python
 interpreter when the conversion fails. This will be remedied as soon as
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index fee10b1dff..9f60942f47 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -388,7 +388,7 @@ void Export(
            "the standard TensorFlow Lite runtime. If you have a custom "
            "implementation for them you can disable this error with "
            "--allow_custom_ops, or by setting allow_custom_ops=True "
-           "when calling tf.contrib.lite.TocoConverter(). Here is a list "
+           "when calling tf.contrib.lite.TFLiteConverter(). Here is a list "
            "of operators for which  you will need custom implementations: "
         << absl::StrJoin(error_summary_final, ", ") << ".";
   }
-- 
GitLab


From 0d5c68e30f4637329fa233df506d7b97802a5e9b Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Wed, 26 Sep 2018 19:45:39 -0700
Subject: [PATCH 0780/1357]   Fixes bug in tf2xla NMS implementation.

PiperOrigin-RevId: 214711381
---
 tensorflow/compiler/tests/image_ops_test.py   | 43 +++++++++++++++++++
 .../compiler/tf2xla/kernels/image_ops.cc      |  9 +++-
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py
index bbe746e28f..68fdb5caf4 100644
--- a/tensorflow/compiler/tests/image_ops_test.py
+++ b/tensorflow/compiler/tests/image_ops_test.py
@@ -724,6 +724,49 @@ class NonMaxSuppressionTest(xla_test.XLATestCase):
         self.assertEqual(num_valid, 2)
         self.assertAllClose(indices_tf[:num_valid], [3, 0])
 
+  def testNMS3Then1WithScoreMaxThresh(self):
+    # Three boxes are selected based on IOU.
+    # One is filtered out by score threshold.
+    # One is filtered out by max_output_size.
+
+    with compat.forward_compatibility_horizon(2018, 8, 8):
+      boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                    [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+      boxes_np = np.array(boxes_data, dtype=np.float32)
+
+      scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
+      scores_np = np.array(scores_data, dtype=np.float32)
+      max_output_size = 1
+      iou_threshold_np = np.array(0.5, dtype=np.float32)
+      score_threshold_np = np.array(0.4, dtype=np.float32)
+
+      with self.cached_session() as sess:
+        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                              iou_threshold_np.shape)
+        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                                score_threshold_np.shape)
+        with self.test_scope():
+          selected_indices = image_ops.non_max_suppression_padded(
+              boxes=boxes,
+              scores=scores,
+              max_output_size=max_output_size,
+              iou_threshold=iou_threshold,
+              score_threshold=score_threshold,
+              pad_to_max_output_size=True)
+        inputs_feed = {
+            boxes: boxes_np,
+            scores: scores_np,
+            iou_threshold: iou_threshold_np,
+            score_threshold: score_threshold_np
+        }
+        (indices_tf, num_valid) = sess.run(
+            selected_indices, feed_dict=inputs_feed)
+
+        self.assertEqual(indices_tf.size, max_output_size)
+        self.assertEqual(num_valid, 1)
+        self.assertAllClose(indices_tf[:num_valid], [3])
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
index 33a73fe5fd..921b4340c0 100644
--- a/tensorflow/compiler/tf2xla/kernels/image_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
@@ -355,6 +355,9 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     OP_REQUIRES(
         context, output_size >= 0,
         errors::InvalidArgument("Need output_size >= 0, got ", output_size));
+    OP_REQUIRES(context, output_size <= kint32max,
+                errors::InvalidArgument("Need output_size <= kint32Max, got ",
+                                        output_size));
     xla::XlaOp score_thresh = context->Input("score_threshold");
     xla::XlaOp iou_thresh = context->Input("iou_threshold");
 
@@ -439,12 +442,14 @@ class NonMaxSuppressionOp : public XlaOpKernel {
         xla::Broadcast(xla::ConstantR0<int32>(builder, 1), {num_boxes}),
         xla::Broadcast(xla::ConstantR0<int32>(builder, 0), {num_boxes}));
 
-    // num_valid is scalar.
-    xla::XlaOp num_valid = xla::Reduce(
+    // num_valid is scalar. Value should be bound by output_size.
+    xla::XlaOp num_valid_total = xla::Reduce(
         ones_included,
         /*init_value=*/xla::ConstantR0<int>(builder, 0),
         /*computation=*/CreateScalarAddComputation(xla::S32, builder),
         /*dimensions_to_reduce=*/{0});
+    xla::XlaOp num_valid =
+        xla::Min(num_valid_total, xla::ConstantR0<int32>(builder, output_size));
 
     xla::XlaOp output_tuple = TopK(scores_included, output_size);
     xla::XlaOp selected_indices = xla::GetTupleElement(output_tuple, 1);
-- 
GitLab


From de2bcdc7ad149419e270e1443b63581163d75d5d Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Wed, 26 Sep 2018 20:27:46 -0700
Subject: [PATCH 0781/1357] Add Mirrored distribution strategy support for new
 metrics with Keras and Estimator Add support for stateful metrics in model to
 estimator

PiperOrigin-RevId: 214714322
---
 .../examples/simple_estimator_example.py      | 21 ++++--
 .../contrib/distribute/python/keras_test.py   | 66 ++++++++++---------
 .../contrib/tpu/python/tpu/keras_support.py   | 31 +++------
 tensorflow/python/estimator/keras.py          | 39 +++++++----
 tensorflow/python/estimator/keras_test.py     | 28 ++++----
 tensorflow/python/keras/engine/training.py    |  6 --
 .../keras/engine/training_distributed.py      | 51 ++++++++++----
 tensorflow/python/keras/metrics.py            | 16 +++++
 tensorflow/python/keras/models.py             |  9 ++-
 9 files changed, 163 insertions(+), 104 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py b/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py
index 44a69ed23a..79a9803d75 100644
--- a/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py
+++ b/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py
@@ -22,6 +22,8 @@ from __future__ import print_function
 
 import tensorflow as tf
 
+from tensorflow.python.keras import metrics as metrics_module
+
 
 def build_model_fn_optimizer():
   """Simple model_fn with optimizer."""
@@ -45,7 +47,10 @@ def build_model_fn_optimizer():
       return y * y
 
     if mode == tf.estimator.ModeKeys.EVAL:
-      return tf.estimator.EstimatorSpec(mode, loss=loss_fn())
+      acc_obj = metrics_module.BinaryAccuracy()
+      acc_obj.update_state(labels, labels)
+      return tf.estimator.EstimatorSpec(
+          mode, loss=loss_fn(), eval_metric_ops={"Accuracy": acc_obj})
 
     assert mode == tf.estimator.ModeKeys.TRAIN
 
@@ -61,18 +66,26 @@ def main(_):
       ["/device:GPU:0", "/device:GPU:1"])
   config = tf.estimator.RunConfig(train_distribute=distribution,
                                   eval_distribute=distribution)
+  # Since there are 2 devices and 10 samples, we set steps=5.
+  steps = 5
 
-  def input_fn():
+  def train_input_fn():
     features = tf.data.Dataset.from_tensors([[1.]]).repeat(10)
     labels = tf.data.Dataset.from_tensors([1.]).repeat(10)
     return tf.data.Dataset.zip((features, labels))
 
   estimator = tf.estimator.Estimator(
       model_fn=build_model_fn_optimizer(), config=config)
-  estimator.train(input_fn=input_fn, steps=10)
+  estimator.train(input_fn=train_input_fn, steps=steps)
+
+  def eval_input_fn():
+    features = tf.data.Dataset.from_tensors([[1.]]).repeat(10)
+    labels = tf.data.Dataset.from_tensors([1.]).repeat(10)
+    return tf.data.Dataset.zip((features, labels))
 
-  eval_result = estimator.evaluate(input_fn=input_fn, steps=10)
+  eval_result = estimator.evaluate(input_fn=eval_input_fn, steps=steps)
   print("Eval result: {}".format(eval_result))
+  assert eval_result["Accuracy"] == 1.0
 
   def predict_input_fn():
     predict_features = tf.data.Dataset.from_tensors([[1.]]).repeat(10)
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 2e6cd43fd4..a0b8bde132 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -205,6 +205,7 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase):
     keras_model = simple_functional_model()
     keras_model.compile(
         loss='categorical_crossentropy',
+        metrics=[keras.metrics.CategoricalAccuracy()],
         optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01))
     config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED,
                                       model_dir=self._base_dir,
@@ -229,6 +230,7 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase):
     keras_model = simple_sequential_model()
     keras_model.compile(
         loss='categorical_crossentropy',
+        metrics=[keras.metrics.CategoricalAccuracy()],
         optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01))
     config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED,
                                       model_dir=self._base_dir,
@@ -364,7 +366,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
-      metrics = ['mae']
+      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
       strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
                                                      '/device:GPU:0'])
       model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
@@ -399,7 +401,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
-      metrics = ['mae']
+      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
       model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
 
       inputs = np.zeros((10, 3), dtype=np.float32)
@@ -432,7 +434,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
       loss = 'mse'
-      metrics = ['mae']
+      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
       strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
                                                      '/device:CPU:0'])
       model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
@@ -468,7 +470,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
-      metrics = ['mae']
+      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
       model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
 
       inputs = np.zeros((10, 3), dtype=np.float32)
@@ -484,32 +486,6 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                 validation_data=dataset, validation_steps=2)
 
-  def test_raise_error_for_stateful_metrics(self):
-
-    class ExampleStatefulMetric(keras.layers.Layer):
-
-      def __init__(self, name='true_positives', **kwargs):
-        super(ExampleStatefulMetric, self).__init__(name=name, **kwargs)
-        self.stateful = True
-
-      def __call__(self, y_true, y_pred):
-        return y_pred - y_true
-
-    with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae', ExampleStatefulMetric()]
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
-                                                     '/device:GPU:0'])
-      with self.assertRaisesRegexp(
-          NotImplementedError, 'Stateful metrics are not supported with '
-                               'DistributionStrategy.'):
-        model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
-
   def test_unsupported_features(self):
     with self.cached_session():
       x = keras.layers.Input(shape=(3,), name='input')
@@ -750,6 +726,36 @@ class NormalizationLayerWithDistributionStrategyTest(
 class CorrectnessWithDistributionStrategyTest(test.TestCase,
                                               parameterized.TestCase):
 
+  @combinations.generate(all_combinations())
+  def test_metric_correctness(self, distribution):
+    with self.cached_session():
+      keras.backend.set_image_data_format('channels_last')
+      num_samples = 10000
+
+      x_train = np.random.randint(0, 2, num_samples)
+      x_train = np.reshape(x_train, (num_samples, 1))
+      y_train = x_train
+      x_train = x_train.astype('float32')
+      y_train = y_train.astype('float32')
+
+      # Create identity model.
+      model = keras.Sequential()
+      model.add(
+          keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones'))
+      model.compile(
+          loss=keras.losses.mean_squared_error,
+          optimizer=gradient_descent.GradientDescentOptimizer(0.5),
+          metrics=[keras.metrics.BinaryAccuracy()],
+          distribute=distribution)
+
+      batch_size = 64
+      batch_size //= distribution.num_towers
+      train_dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
+      train_dataset = batch_wrapper(train_dataset, batch_size, distribution)
+
+      history = model.fit(x=train_dataset, epochs=1, steps_per_epoch=10)
+      self.assertEqual(history.history['binary_accuracy'], [1.0])
+
   @combinations.generate(all_combinations())
   def test_correctness(self, distribution):
     with self.cached_session():
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 448676c95e..956d0142a3 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -325,18 +325,6 @@ def _replicated_optimizer(opt):
     return KerasCrossShardOptimizer(opt)
 
 
-def _clone_metrics(metrics):
-  """Returns a copy of metrics. A copy is created for stateful metrics."""
-  if metrics is None:
-    return None
-  with variable_scope.variable_scope(
-      'metrics', reuse=variable_scope.AUTO_REUSE):
-    return [
-        m.__class__.from_config(m.get_config()) if isinstance(
-            m, metrics_module.Metric) else m for m in metrics
-    ]
-
-
 def _clone_optimizer(optimizer, config=None):
   """Returns a cloned optimizer with the provided optimizer.config or config."""
   if not isinstance(optimizer, keras_optimizers.Optimizer):
@@ -963,8 +951,9 @@ class TPUFunction(object):
                   optimizer=_replicated_optimizer(self._cloned_optimizer),
                   loss=self.model.loss,
                   loss_weights=self.model.loss_weights,
-                  metrics=_clone_metrics(self.model.metrics),
-                  weighted_metrics=_clone_metrics(self.model.weighted_metrics),
+                  metrics=metrics_module.clone_metrics(self.model.metrics),
+                  weighted_metrics=metrics_module.clone_metrics(
+                      self.model.weighted_metrics),
                   target_tensors=tpu_targets,
               )
 
@@ -1364,13 +1353,9 @@ class KerasTPUModel(models.Model):
       raise ValueError('target_tensors is not supported for TPU execution.')
 
     self._cpu_model.compile(
-        _clone_optimizer(optimizer),
-        loss,
-        _clone_metrics(metrics),
-        loss_weights,
-        sample_weight_mode,
-        _clone_metrics(weighted_metrics),
-        target_tensors,
+        _clone_optimizer(optimizer), loss,
+        metrics_module.clone_metrics(metrics), loss_weights, sample_weight_mode,
+        metrics_module.clone_metrics(weighted_metrics), target_tensors,
         **kwargs)
 
     super(KerasTPUModel, self).compile(optimizer, loss, metrics, loss_weights,
@@ -2126,10 +2111,10 @@ def tpu_model(model, strategy=None):
     cpu_model.compile(
         _clone_optimizer(model.optimizer, optimizer_config),
         model.loss,
-        _clone_metrics(model.metrics),
+        metrics_module.clone_metrics(model.metrics),
         model.loss_weights,
         model.sample_weight_mode,
-        _clone_metrics(model.weighted_metrics),
+        metrics_module.clone_metrics(model.weighted_metrics),
     )
 
   if model_weights:
diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index 6b2765be82..7546771ed3 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 import os
 import re
+import six
 
 from tensorflow.python.client import session
 from tensorflow.python.estimator import estimator as estimator_lib
@@ -31,6 +32,7 @@ from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import metrics
 from tensorflow.python.keras import models
 from tensorflow.python.keras import optimizers
 from tensorflow.python.ops import check_ops
@@ -214,25 +216,40 @@ def _convert_keras_metrics_to_estimator(model):
   if not getattr(model, 'metrics', None):
     return None
 
-  # TODO(psv/fchollet): support stateful metrics
   eval_metric_ops = {}
+
+  def get_metric_name(metric):
+    if isinstance(metric, metrics.Metric):
+      return metric.name
+    if callable(metric):
+      return metric.__name__
+    assert isinstance(metric, six.string_types)
+    return metric
+
   # When each metric maps to an output
   if isinstance(model.metrics, dict):
     for i, output_name in enumerate(model.metrics.keys()):
-      metric_name = model.metrics[output_name]
-      if callable(metric_name):
-        metric_name = metric_name.__name__
+      # `metric` is the user given metric value in `compile`. This can be
+      # metric name (`acc`), metric function (binary_accuracy) or a metric
+      # object (BinaryAccuracy()).
+      metric = model.metrics[output_name]
+      metric_name = get_metric_name(metric)
       # When some outputs use the same metric
       if list(model.metrics.values()).count(metric_name) > 1:
         metric_name += '_' + output_name
-      eval_metric_ops[metric_name] = metrics_module.mean(
-          model.metrics_tensors[i - len(model.metrics)])
+      if isinstance(metric, metrics.Metric):
+        eval_metric_ops[metric_name] = metric
+      else:
+        eval_metric_ops[metric_name] = metrics_module.mean(
+            model.metrics_tensors[i - len(model.metrics)])
   else:
-    for i, metric_name in enumerate(model.metrics):
-      if callable(metric_name):
-        metric_name = metric_name.__name__
-      eval_metric_ops[metric_name] = metrics_module.mean(
-          model.metrics_tensors[i])
+    for i, metric in enumerate(model.metrics):
+      metric_name = get_metric_name(metric)
+      if isinstance(metric, metrics.Metric):
+        eval_metric_ops[metric_name] = metric
+      else:
+        eval_metric_ops[metric_name] = metrics_module.mean(
+            model.metrics_tensors[i])
   return eval_metric_ops
 
 
diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py
index 3758243d7b..288f9b8906 100644
--- a/tensorflow/python/estimator/keras_test.py
+++ b/tensorflow/python/estimator/keras_test.py
@@ -257,7 +257,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
       keras_model.compile(
           loss='categorical_crossentropy',
           optimizer='rmsprop',
-          metrics=['mse', keras.metrics.categorical_accuracy])
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
       with self.cached_session():
         est_keras = keras_lib.model_to_estimator(
@@ -281,7 +281,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
       keras_model.compile(
           loss='categorical_crossentropy',
           optimizer=rmsprop.RMSPropOptimizer(1e-3),
-          metrics=['mse', keras.metrics.categorical_accuracy])
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
       my_hook = MyHook()
       with self.cached_session():
@@ -306,7 +306,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
     my_hook = MyHook()
     with self.cached_session():
       keras_model.fit(x_train, y_train, epochs=1)
@@ -328,7 +328,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
       keras_model.compile(
           loss='categorical_crossentropy',
           optimizer=rmsprop.RMSPropOptimizer(1e-3),
-          metrics=['mse', keras.metrics.categorical_accuracy])
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
       with self.cached_session():
         est_keras = keras_lib.model_to_estimator(
@@ -351,7 +351,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
     with self.cached_session():
       est_keras = keras_lib.model_to_estimator(
@@ -370,7 +370,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
     with self.cached_session():
       # Create state
@@ -662,7 +662,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer='rmsprop',
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
     tf_config = json.dumps({
         'cluster': {
@@ -687,7 +687,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
       keras_model.compile(
           loss='categorical_crossentropy',
           optimizer='rmsprop',
-          metrics=['mse', keras.metrics.categorical_accuracy])
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
       gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3)
       sess_config = config_pb2.ConfigProto(gpu_options=gpu_options)
@@ -706,7 +706,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer='rmsprop',
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
     with self.cached_session():
       est_keras = keras_lib.model_to_estimator(
@@ -736,7 +736,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer='rmsprop',
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
     with self.cached_session():
       with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
@@ -751,7 +751,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer='rmsprop',
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
 
     with self.cached_session():
       with self.assertRaisesRegexp(ValueError, '`model_dir` are set both in '
@@ -765,7 +765,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
     with self.cached_session():
       keras_model.train_on_batch(
           np.random.random((10,) + _INPUT_SIZE),
@@ -776,7 +776,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
       keras_model.compile(
           loss='categorical_crossentropy',
           optimizer=SGD(lr=0.0001, momentum=0.9),
-          metrics=['mse', keras.metrics.categorical_accuracy])
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
       keras_lib.model_to_estimator(
           keras_model=keras_model, config=self._config)
 
@@ -786,7 +786,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     keras_model.compile(
         loss='categorical_crossentropy',
         optimizer=optimizer,
-        metrics=['mse', keras.metrics.categorical_accuracy])
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
     with self.cached_session() as sess:
       keras_model_fn = keras_lib._create_keras_model_fn(keras_model)
       global_step = training_util.create_global_step()
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index ade8a4b32d..46bffd7068 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -647,12 +647,6 @@ class Model(Network):
         skip_target_indices=skip_target_indices,
         sample_weights=self.sample_weights)
 
-    # If using distribution strategy and stateful_metrics, raise an error
-    # since we currently don't support stateful metrics.
-    if self._distribution_strategy is not None and self.stateful_metric_names:
-      raise NotImplementedError('Stateful metrics are not supported with '
-                                'DistributionStrategy.')
-
     # Prepare gradient updates and state updates.
     self.total_loss = total_loss
 
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 8b434ca444..1b64f904d5 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -26,6 +26,7 @@ from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import callbacks as cbks
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.engine import distributed_training_utils
+from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras.utils.generic_utils import Progbar
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variable_scope
@@ -153,6 +154,9 @@ def fit_loop(
   assert steps_per_epoch is not None
 
   for epoch in range(initial_epoch, epochs):
+    # Reset stateful metrics
+    for m in model.stateful_metric_functions:
+      m.reset_states()
     callbacks.on_epoch_begin(epoch)
     epoch_logs = {}
     for step_index in range(steps_per_epoch):
@@ -171,8 +175,9 @@ def fit_loop(
       if not isinstance(outs, list):
         outs = [outs]
 
-      outs = _aggregate_metrics_across_towers(
-          current_strategy.num_towers, out_labels, outs)
+      outs = _aggregate_metrics_across_towers(current_strategy.num_towers,
+                                              out_labels,
+                                              model.stateful_metric_names, outs)
       for l, o in zip(out_labels, outs):
         batch_logs[l] = o
       callbacks.on_batch_end(step_index, batch_logs)
@@ -437,6 +442,13 @@ def test_loop(model, iterator, verbose=0, steps=None):
   else:
     ins = dataset_inputs + dataset_targets
 
+  for m in model.stateful_metric_functions:
+    m.reset_states()
+  stateful_metric_indices = [
+      i for i, name in enumerate(model.metrics_names)
+      if str(name) in model.stateful_metric_names
+  ]
+
   outs = []
   if verbose == 1:
     progbar = Progbar(target=steps)
@@ -452,12 +464,16 @@ def test_loop(model, iterator, verbose=0, steps=None):
   for step in range(steps):
     batch_outs = distributed_test_function(ins)
     batch_outs = _aggregate_metrics_across_towers(
-        current_strategy.num_towers, model.metrics_names, batch_outs)
+        current_strategy.num_towers, model.metrics_names,
+        model.stateful_metric_names, batch_outs)
     if isinstance(batch_outs, list):
       if step == 0:
         outs = [0.] * len(batch_outs)
       for i, batch_out in enumerate(batch_outs):
-        outs[i] += batch_out
+        if i in stateful_metric_indices:
+          outs[i] = batch_out
+        else:
+          outs[i] += batch_out
     else:
       if step == 0:
         outs.append(0.)
@@ -465,7 +481,8 @@ def test_loop(model, iterator, verbose=0, steps=None):
     if verbose >= 1:
       progbar.update(step + 1)
   for i in range(len(outs)):
-    outs[i] /= steps
+    if i not in stateful_metric_indices:
+      outs[i] /= steps
 
   if len(outs) == 1:
     return outs[0]
@@ -816,10 +833,10 @@ def _clone_and_build_model(model, inputs=None, targets=None):
   cloned_model.compile(
       optimizer,
       model.loss,
-      metrics=model.metrics,
+      metrics=metrics_module.clone_metrics(model.metrics),
       loss_weights=model.loss_weights,
       sample_weight_mode=model.sample_weight_mode,
-      weighted_metrics=model.weighted_metrics,
+      weighted_metrics=metrics_module.clone_metrics(model.weighted_metrics),
       target_tensors=targets)
   return cloned_model
 
@@ -834,8 +851,9 @@ def clone_model_on_towers(
     model._make_callback_model()
 
 
-def _aggregate_metrics_across_towers(num_devices, out_labels, outs):
-  """Aggregate metrics values across all towers.
+def _aggregate_metrics_across_towers(num_devices, out_labels,
+                                     stateful_metric_names, outs):
+  """Aggregates stateless metrics values across towers.
 
   When using `MirroredStrategy`, the number of towers is equal to the
   number of devices over which training is distributed. This may not always be
@@ -844,6 +862,7 @@ def _aggregate_metrics_across_towers(num_devices, out_labels, outs):
   Args:
     num_devices: Number of devices over which the model is being distributed.
     out_labels: The list of metric names passed to `compile`.
+    stateful_metric_names: List of stateful metric names on the model.
     outs: The output from all the towers.
 
   Returns:
@@ -858,10 +877,16 @@ def _aggregate_metrics_across_towers(num_devices, out_labels, outs):
   # Each label in `out_labels` corresponds to one set of metrics. The
   # number of metric values corresponds to the number of devices. We
   # currently take the mean of the values.
-  for _ in out_labels[1:]:
-    m = np.mean(outs[current_index:current_index + num_devices])
-    merged_output.append(m)
-    current_index += num_devices
+  for metric_name in out_labels[1:]:
+    if metric_name in stateful_metric_names:
+      # For stateful metrics, we get one aggregated result value.
+      merged_output.append(outs[current_index])
+      current_index += 1
+    else:
+      m = np.mean(outs[current_index:current_index + num_devices])
+      merged_output.append(m)
+      current_index += num_devices
+
   return merged_output
 
 
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index e64241e5cf..f4e8419eb0 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -71,6 +71,22 @@ def check_is_tensor_or_operation(x, name):
         name, x))
 
 
+def clone_metric(metric):
+  """Returns a clone of the metric if stateful, otherwise returns it as is."""
+  if isinstance(metric, Metric):
+    return metric.__class__.from_config(metric.get_config())
+  return metric
+
+
+def clone_metrics(metrics):
+  """Clones the given metric list/dict."""
+  if metrics is None:
+    return None
+  if isinstance(metrics, dict):
+    return {key: clone_metric(value) for key, value in metrics.items()}
+  return [clone_metric(metric) for metric in metrics]
+
+
 def update_state_wrapper(update_state_fn):
   """Decorator to wrap metric `update_state()` with `add_update()`.
 
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index 41c5e3cccf..b04b4df257 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.engine import saving
 from tensorflow.python.keras.engine import sequential
@@ -290,7 +291,9 @@ def _in_place_subclassed_model_reset(model):
     if isinstance(value, Layer):
       attributes_cache[name] = value
       assert value in model._layers
-    elif isinstance(value, (list, tuple)) and name not in ('layers', '_layers'):
+    elif isinstance(
+        value, (list, tuple)) and name not in ('layers', '_layers',
+                                               'stateful_metric_functions'):
       # Handle case: list/tuple of layers (also tracked by the Network API).
       if value and all(isinstance(val, Layer) for val in value):
         raise ValueError('We do not support the use of list-of-layers '
@@ -466,10 +469,10 @@ def clone_and_build_model(
     clone.compile(
         optimizer,
         model.loss,
-        metrics=model.metrics,
+        metrics=metrics_module.clone_metrics(model.metrics),
         loss_weights=model.loss_weights,
         sample_weight_mode=model.sample_weight_mode,
-        weighted_metrics=model.weighted_metrics,
+        weighted_metrics=metrics_module.clone_metrics(model.weighted_metrics),
         target_tensors=target_tensors)
 
   return clone
-- 
GitLab


From 40ffbcc12519fa11e1dfb84f2f54a4f5d9b1b1c8 Mon Sep 17 00:00:00 2001
From: Daryl Ng <darylng@google.com>
Date: Wed, 26 Sep 2018 20:36:49 -0700
Subject: [PATCH 0782/1357] Adding per table load and retrieve ops and
 additional enqueue operations. Other additional refactoring.

PiperOrigin-RevId: 214715083
---
 tensorflow/contrib/tpu/BUILD                  |   5 +
 .../contrib/tpu/ops/tpu_embedding_ops.cc      | 626 ++++++++++++------
 tensorflow/contrib/tpu/utils/BUILD            |  30 +
 ...embedding_optimization_parameters_utils.cc | 255 +++++++
 ..._embedding_optimization_parameters_utils.h |  90 +++
 .../tpu_embedding_output_layout_utils.cc      |  98 +++
 .../utils/tpu_embedding_output_layout_utils.h |  38 ++
 7 files changed, 943 insertions(+), 199 deletions(-)
 create mode 100644 tensorflow/contrib/tpu/utils/BUILD
 create mode 100644 tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc
 create mode 100644 tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h
 create mode 100644 tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc
 create mode 100644 tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 8355c92a4d..ac38612603 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -103,6 +103,9 @@ tf_gen_op_libs(
     ],
     deps = [
         "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
+        "//tensorflow/contrib/tpu/utils:tpu_embedding_optimization_parameters_utils",
+        "//tensorflow/contrib/tpu/utils:tpu_embedding_output_layout_utils",
+        "//tensorflow/core:lib",
         "//tensorflow/core:lib_proto_parsing",
         "//tensorflow/core:protos_all_cc",
     ],
@@ -122,6 +125,8 @@ tf_custom_op_library(
     ],
     deps = [
         "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
+        "//tensorflow/contrib/tpu/utils:tpu_embedding_optimization_parameters_utils",
+        "//tensorflow/contrib/tpu/utils:tpu_embedding_output_layout_utils",
         "//tensorflow/core:lib_proto_parsing",
     ],
 )
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index 18b98939b8..1bd1a31e11 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -14,10 +14,15 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/contrib/tpu/proto/tpu_embedding_configuration.pb.h"
+#include "tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h"
+#include "tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 
 namespace tensorflow {
 
@@ -53,215 +58,339 @@ namespace tensorflow {
 // saving a checkpoint, the model must Retrieve the parameters back into the
 // host CPU memory.
 
-REGISTER_OP("TPUEmbeddingLoadGradientDescentParameters")
-    .Input("parameters: float32")
-    .Attr("tpu_embedding_config: string")
-    .Attr("table_id: int >= 0")
-    .Attr("num_hosts: int >= 1")
-    .Attr("host_id: int >= 0")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Load an embedding table shard into TPU memory for use with GradientDescent.
-
-TPU embeddings use dedicated per-optimizer Ops for loading and retrieving 
-trainable variables and optimizer state from TPU memory. This op enables
-functionality equivalent to GradientDescentOptimizer.
-
-parameters: The shard of the embedding table resident on the host executing this
-    op. For single-TPU models, this is the entire embedding table.
-tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
-table_id: The id of the table specified in the tpu_embedding_config.
-num_hosts: The number of CPU hosts in the distributed training job.
-host_id: Which CPU host in the distributed training job will execute this op.
-)doc");
+namespace {
 
-namespace tpu_embedding_config_util {
+void RegisterPerTableLoadAndRetrieveOps();
 
-Status GradientDescentShapes(shape_inference::InferenceContext *c) {
-  string config_string;
-  TF_RETURN_IF_ERROR(c->GetAttr("tpu_embedding_config", &config_string));
-  tpu::TPUEmbeddingConfiguration config;
-  if (!config.ParseFromString(config_string)) {
-    return errors::InvalidArgument("Malformed tpu_embedding_config.");
+class RegisterPerTableLoadAndRetrieveOpsOnConstruction {
+ public:
+  RegisterPerTableLoadAndRetrieveOpsOnConstruction() {
+    RegisterPerTableLoadAndRetrieveOps();
   }
-
-  int table_id;
-  TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
-  int64 num_tables = config.table_descriptor_size();
-  if (table_id >= num_tables) {
-    return errors::InvalidArgument("Table id >= num_tables");
+};
+
+// Object whose constructor does registrations.
+RegisterPerTableLoadAndRetrieveOpsOnConstruction
+    register_per_table_load_and_retrieve_ops_var;
+
+Status RegisterPerTableLoadOpsForAlgorithmBody(
+    tpu::OptimizationAlgorithm alg, bool is_debug_op,
+    OpRegistrationData* op_reg_data) {
+  tpu::GradientAccumulationSupport grad_accum_support;
+  TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+
+  std::vector<tpu::StateVariableSpecification> state_variable_specs;
+  TF_CHECK_OK(GetOptimizationAlgorithmStateVariables(
+      alg,
+      grad_accum_support == tpu::GradientAccumulationSupport::kSupported &&
+          is_debug_op,
+      &state_variable_specs));
+  auto* op_def = &op_reg_data->op_def;
+  op_def->set_name(
+      strings::StrCat("LoadTPUEmbedding", GetOptimizationAlgorithmName(alg),
+                      "Parameters", (is_debug_op ? "GradAccumDebug" : "")));
+  // It is important for the order of the inputs to the op defined here
+  // to match the order in input_names because the indexes are used in
+  // the combining transformation.
+  for (const auto& parameter : state_variable_specs) {
+    if (parameter.has_user_defined() || is_debug_op) {
+      auto* arg = op_def->add_input_arg();
+      arg->set_name(parameter.name());
+      arg->set_description(
+          strings::StrCat("Value of ", parameter.name(), " used in the ",
+                          GetOptimizationAlgorithmFriendlyName(alg),
+                          " optimization algorithm."));
+      arg->set_type(DT_FLOAT);
+    }
   }
-  int64 width = config.table_descriptor(table_id).dimension();
-  int64 num_rows = config.table_descriptor(table_id).vocabulary_size();
-
-  TF_RETURN_IF_ERROR(c->set_output("parameters", {c->Matrix(num_rows, width)}));
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
+  }
+  {
+    auto* table_name_attr = op_def->add_attr();
+    table_name_attr->set_name("table_name");
+    table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
+  }
+  {
+    auto* num_shards_attr = op_def->add_attr();
+    num_shards_attr->set_name("num_shards");
+    num_shards_attr->set_type("int");
+  }
+  {
+    auto* shard_id_attr = op_def->add_attr();
+    shard_id_attr->set_name("shard_id");
+    shard_id_attr->set_type("int");
+  }
+  op_def->set_summary("Load embedding parameters for a single table.");
+  string parameter_descriptions;
+  for (const auto& parameter : state_variable_specs) {
+    if (parameter.has_user_defined() || is_debug_op) {
+      strings::Appendf(&parameter_descriptions,
+                       R"(
+%s: A tensor containing the initial embedding table %s to use in embedding
+lookups using the %s optimization algorithm.)",
+                       parameter.name().c_str(), parameter.name().c_str(),
+                       GetOptimizationAlgorithmFriendlyName(alg).c_str());
+    }
+  }
+  op_def->set_description(strings::Printf(R"doc(
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+%s
+table_name: Name of this table; must match a name in the
+  EmbeddingLayerConfiguration proto (overrides table_id).
+num_shards: Number of shards into which the embedding tables are divided.
+shard_id: Identifier of shard for this operation.
+table_id: Index of this table in the EmbeddingLayerConfiguration proto
+  (deprecated).
+)doc",
+                                          parameter_descriptions.c_str()));
+  op_def->set_is_commutative(false);
+  op_def->set_is_aggregate(false);
+  op_def->set_is_stateful(true);
+  auto shape_inference_function =
+      [state_variable_specs,
+       is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
+    string table_name;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
+    }
+    int num_shards;
+    TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
+    int shard_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("shard_id", &shard_id));
+    const int user_param_count =
+        std::count_if(state_variable_specs.begin(), state_variable_specs.end(),
+                      [&](const tpu::StateVariableSpecification& sv) {
+                        return sv.has_user_defined() || is_debug_op;
+                      });
+    std::vector<shape_inference::ShapeHandle> inputs(user_param_count);
+    int input_index = 0;
+    for (int i = 0; i < state_variable_specs.size(); ++i) {
+      if (state_variable_specs[i].has_user_defined() || is_debug_op) {
+        std::vector<shape_inference::ShapeHandle> input_temp;
+        TF_RETURN_IF_ERROR(
+            c->input(state_variable_specs[i].name(), &input_temp));
+        if (input_temp.size() != 1) {
+          return errors::InvalidArgument("each input to be rank 1");
+        }
+        inputs[input_index] = input_temp[0];
+        ++input_index;
+      }
+    }
+    // Verify shapes have rank 2 and are compatible when they are
+    // required to be valid.
+    shape_inference::ShapeHandle parameter_shape;
+    TF_RETURN_IF_ERROR(c->WithRank(inputs[0], 2, &parameter_shape));
+    for (int j = 1; j < user_param_count; ++j) {
+      shape_inference::ShapeHandle accumulator_j_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(inputs[j], 2, &accumulator_j_shape));
+      shape_inference::ShapeHandle merged;
+      TF_RETURN_IF_ERROR(
+          c->Merge(parameter_shape, accumulator_j_shape, &merged));
+    }
+    return Status::OK();
+  };
+  op_reg_data->shape_inference_fn = shape_inference_function;
   return Status::OK();
 }
 
-}  // namespace tpu_embedding_config_util
-
-REGISTER_OP("TPUEmbeddingRetrieveGradientDescentParameters")
-    .Output("parameters: float32")
-    .Attr("tpu_embedding_config: string")
-    .Attr("table_id: int")
-    .Attr("num_hosts: int")
-    .Attr("host_id: int")
-    .SetIsStateful()
-    .SetShapeFn(tpu_embedding_config_util::GradientDescentShapes)
-    .Doc(R"doc(
-Retrieve an embedding table shard from TPU memory.
-
-TPU embeddings use dedicated per-optimizer Ops for loading and retrieving 
-trainable variables and optimizer state from TPU memory. This op enables
-functionality equivalent to GradientDescentOptimizer.
-
-tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
-table_id: The id of the table specified in tpu_embedding_config.
-num_hosts: The number of CPU hosts in the distributed training job.
-host_id: Which CPU host in the distributed training job will execute this op.
-)doc");
-
-REGISTER_OP("TPUEmbeddingLoadAdagradParameters")
-    .Input("parameters: float32")
-    .Input("accumulators: float32")
-    .Attr("tpu_embedding_config: string")
-    .Attr("table_id: int >= 0")
-    .Attr("num_hosts: int >= 1")
-    .Attr("host_id: int >= 0")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Load an embedding table shard into TensorNode memories for use with Adagrad.
-
-TPU embeddings use dedicated per-optimizer Ops for loading and retrieving
-trainable variables and optimizer state from TPU memory. This op enables
-functionality equivalent to AdagradOptimizer.
-
-parameters: The shard of the embedding table resident on the host executing this
-    op. For single-TPU models, this is the entire embedding table.
-accumulators: Shard of the Adagrad accumulators resident on the host executing
-    this op.
-tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
-table_id: The id of the table specified in the embedding_config.
-num_hosts: The number of CPU hosts in the distributed training job.
-host_id: Which CPU host in the distributed training job will execute this op.
-)doc");
-
-namespace tpu_embedding_config_util {
-
-Status AdagradShapes(shape_inference::InferenceContext *c) {
-  string config_string;
-  TF_RETURN_IF_ERROR(c->GetAttr("tpu_embedding_config", &config_string));
-  tpu::TPUEmbeddingConfiguration config;
-  if (!config.ParseFromString(config_string)) {
-    return errors::InvalidArgument("Malformed tpu_embedding_config.");
+Status RegisterPerTableRetrieveOpsForAlgorithmBody(
+    tpu::OptimizationAlgorithm alg, bool is_debug_op,
+    OpRegistrationData* op_reg_data) {
+  tpu::GradientAccumulationSupport grad_accum_support;
+  TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+
+  std::vector<tpu::StateVariableSpecification> state_variable_specs;
+  TF_CHECK_OK(GetOptimizationAlgorithmStateVariables(
+      alg,
+      grad_accum_support == tpu::GradientAccumulationSupport::kSupported &&
+          is_debug_op,
+      &state_variable_specs));
+
+  auto* op_def = &op_reg_data->op_def;
+  op_def->set_name(strings::StrCat(
+      "RetrieveTPUEmbedding", tpu::GetOptimizationAlgorithmName(alg),
+      "Parameters", (is_debug_op ? "GradAccumDebug" : "")));
+  // It is important for the order of the outputs of the op defined here
+  // to match the order in output_names because the indexes are used in
+  // the combining transformation.
+  for (const auto& parameter : state_variable_specs) {
+    if (parameter.has_user_defined() || is_debug_op) {
+      auto* arg = op_def->add_output_arg();
+      arg->set_name(parameter.name());
+      arg->set_description(
+          strings::StrCat("Parameter ", parameter.name(), " updated by the ",
+                          tpu::GetOptimizationAlgorithmFriendlyName(alg),
+                          " optimization algorithm."));
+      arg->set_type(DT_FLOAT);
+    }
   }
-
-  int table_id;
-  TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
-  int64 num_tables = config.table_descriptor_size();
-  if (table_id >= num_tables) {
-    return errors::InvalidArgument("Table id >= num_tables");
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
   }
-  int64 width = config.table_descriptor(table_id).dimension();
-  int64 num_rows = config.table_descriptor(table_id).vocabulary_size();
-
-  TF_RETURN_IF_ERROR(c->set_output("parameters", {c->Matrix(num_rows, width)}));
-  TF_RETURN_IF_ERROR(
-      c->set_output("accumulators", {c->Matrix(num_rows, width)}));
+  {
+    auto* table_name_attr = op_def->add_attr();
+    table_name_attr->set_name("table_name");
+    table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
+  }
+  {
+    auto* num_shards_attr = op_def->add_attr();
+    num_shards_attr->set_name("num_shards");
+    num_shards_attr->set_type("int");
+  }
+  {
+    auto* shard_id_attr = op_def->add_attr();
+    shard_id_attr->set_name("shard_id");
+    shard_id_attr->set_type("int");
+  }
+  op_def->set_summary("Retrieve embedding parameters for a single table.");
+  string parameter_descriptions;
+  for (const auto& param : state_variable_specs) {
+    if (param.has_user_defined() || is_debug_op) {
+      strings::Appendf(&parameter_descriptions,
+                       R"(
+%s: A tensor containing the embedding table %s to store with the
+parameters from embedding updates using the %s optimization algorithm.)",
+                       param.name().c_str(), param.name().c_str(),
+                       tpu::GetOptimizationAlgorithmFriendlyName(alg).c_str());
+    }
+  }
+  op_def->set_description(strings::Printf(R"doc(
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+%s
+table_name: Name of this table; must match a name in the
+  EmbeddingLayerConfiguration proto (overrides table_id).
+num_shards: Number of shards into which the embedding tables are divided.
+shard_id: Identifier of shard for this operation.
+table_id: Index of this table in the EmbeddingLayerConfiguration proto
+  (deprecated).
+)doc",
+                                          parameter_descriptions.c_str()));
+  op_def->set_is_commutative(false);
+  op_def->set_is_aggregate(false);
+  op_def->set_is_stateful(true);
+  auto shape_inference_function =
+      [state_variable_specs,
+       is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
+    string table_name;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
+    }
+    int num_shards;
+    TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
+    int shard_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("shard_id", &shard_id));
+    for (int j = 0; j < state_variable_specs.size(); ++j) {
+      if (state_variable_specs[j].has_user_defined() || is_debug_op) {
+        auto shape = c->MakeShape(
+            std::vector<shape_inference::DimensionHandle>(2, c->UnknownDim()));
+        TF_RETURN_IF_ERROR(
+            c->set_output(state_variable_specs[j].name(),
+                          std::vector<shape_inference::ShapeHandle>(1, shape)));
+      }
+    }
+    return Status::OK();
+  };
+  op_reg_data->shape_inference_fn = shape_inference_function;
   return Status::OK();
 }
 
-}  // namespace tpu_embedding_config_util
-
-REGISTER_OP("TPUEmbeddingRetrieveAdagradParameters")
-    .Output("parameters: float32")
-    .Output("accumulators: float32")
-    .Attr("tpu_embedding_config: string")
-    .Attr("table_id: int >= 0")
-    .Attr("num_hosts: int >= 1")
-    .Attr("host_id: int >= 0")
-    .SetIsStateful()
-    .SetShapeFn(tpu_embedding_config_util::AdagradShapes)
-    .Doc(R"doc(
-Retrieve an embedding table shard from TPU memory.
-
-TPU embeddings use dedicated per-optimizer Ops for loading and retrieving 
-trainable variables and optimizer state from TPU memory. This op enables
-functionality equivalent to AdagradOptimizer.
-
-tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
-table_id: The id of the table specified in the embedding_config_json.
-num_hosts: The number of CPU hosts in the distributed training job.
-host_id: Which CPU host in the distributed training job will execute this op.
-)doc");
-
-REGISTER_OP("TPUEmbeddingEnqueueSparseBatch")
-    .Input("sample_indices: num_tables * int32")
-    .Input("embedding_indices: num_tables * int32")
-    .Input("aggregation_weights: num_tables * float32")
-    .Attr("num_tables: int")
-    .Attr("device_ordinal: int = -1")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-An op that feeds a batch of embedding indices and weights to the TPU.
-
-Embedding lookups are equivalent to sparse-dense matrix multiplications: the
-sparse matrix contains nonzeros in column j in order to retrieve row j from the
-embedding table.
-
-The three Tensor list arguments (sample_indices, embedding_indices, and
-aggregation_weights) represent these sparse matrices in COO format. The Tensor
-lists each have one entry for each embedding table specified in the model.
-For the kth embedding table, the three Tensors at position k in the list
-specify a COO-format sparse matrix. For the kth table, the row indices,
-column indices, and nonzero values of the COO sparse matrix are specified by
-sample_indices[k], embedding_indices[k], and aggregation_weights[k],
-respectively. Entries must be sorted by row index, then by column index.
-
-There should be at most one TPUEmbeddingEnqueueSparseBatch op in a signle
-training step per TPU shard.
-
-sample_indices: A list of rank 1 Tensors specifying row indices of the COO
-    sparse matrix representing the embedding lookups for each table.
-embedding_indices: A list of rank 1 Tensors  specifying column indices of the
-    COO sparse matrix representing the embedding lookups for each table.
-aggregation_weights: A list of rank 1 Tensors specifying the nonzero values
-    of the COO sparse matrix representing the embedding lookups for each table.
-device_ordinal: The TPU device to use. This should be -1 when the Op
-    is running on a TPU device, and >= 0 when the Op is running on the CPU
-    device.
-)doc");
-
-namespace tpu_embedding_config_util {
-
-Status ActivationShapes(shape_inference::InferenceContext *c) {
-  string config_string;
-  TF_RETURN_IF_ERROR(c->GetAttr("tpu_embedding_config", &config_string));
-  tpu::TPUEmbeddingConfiguration config;
-  if (!config.ParseFromString(config_string)) {
-    return errors::InvalidArgument("Malformed tpu_embedding_config.");
+void RegisterPerTableLoadAndRetrieveOps() {
+  // Load ops
+  for (tpu::OptimizationAlgorithm alg : tpu::GetOptimizationAlgorithms()) {
+    OpRegistry::Global()->Register(
+        [alg](OpRegistrationData* op_reg_data) -> Status {
+          return RegisterPerTableLoadOpsForAlgorithmBody(alg, false,
+                                                         op_reg_data);
+        });
+    tpu::GradientAccumulationSupport grad_accum_support;
+    TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+    if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
+      // TODO(gkurian): Condition this on being used internally within Google.
+      OpRegistry::Global()->Register(
+          [alg](OpRegistrationData* op_reg_data) -> Status {
+            return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
+                                                           op_reg_data);
+          });
+    }
   }
-  int64 batch_size = config.batch_size_per_tensor_core();
-  int64 num_tables = config.table_descriptor_size();
-  for (int table_id = 0; table_id < num_tables; ++table_id) {
-    int64 width = config.table_descriptor(table_id).dimension();
-    int64 num_features = config.table_descriptor(table_id).vocabulary_size();
-    c->set_output(table_id, c->Matrix(batch_size * num_features, width));
+  // Retrieve ops
+  for (tpu::OptimizationAlgorithm alg : tpu::GetOptimizationAlgorithms()) {
+    OpRegistry::Global()->Register(
+        [alg](OpRegistrationData* op_reg_data) -> Status {
+          return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, false,
+                                                             op_reg_data);
+        });
+    tpu::GradientAccumulationSupport grad_accum_support;
+    TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+    if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
+      // TODO(gkurian): Condition this on being used internally within Google.
+      OpRegistry::Global()->Register(
+          [alg](OpRegistrationData* op_reg_data) -> Status {
+            return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
+                                                               op_reg_data);
+          });
+    }
   }
-  return Status::OK();
 }
 
-}  // namespace tpu_embedding_config_util
+}  // namespace
 
-REGISTER_OP("TPUEmbeddingReceiveActivations")
-    .Output("outputs: num_tables * float")
-    .Attr("num_tables: int >= 1")
-    .Attr("tpu_embedding_config: string")
+REGISTER_OP("RecvTPUEmbeddingActivations")
+    .Output("outputs: num_outputs * float")
+    .Attr("num_outputs: int >= 1")
+    .Attr("config: string")
     .SetIsStateful()
-    .SetShapeFn(tpu_embedding_config_util::ActivationShapes)
+    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
+      string config_string;
+      TF_RETURN_IF_ERROR(c->GetAttr("config", &config_string));
+      tpu::TPUEmbeddingConfiguration config;
+      if (!config.ParseFromString(config_string)) {
+        return errors::InvalidArgument("Malformed tpu_embedding_config.");
+      }
+      tpu::AddDefaultEmbeddingOutputLayoutIfNeeded(&config);
+      std::vector<TensorShapeProto> output_shapes;
+      TF_RETURN_IF_ERROR(ComputeOutputTensorShapes(config, &output_shapes));
+      if (c->num_outputs() != output_shapes.size()) {
+        return errors::InvalidArgument("num outputs != size of output shapes");
+      }
+      for (int i = 0; i < c->num_outputs(); ++i) {
+        shape_inference::ShapeHandle output_shape;
+        TF_RETURN_IF_ERROR(
+            c->MakeShapeFromShapeProto(output_shapes[i], &output_shape));
+        c->set_output(i, output_shape);
+      }
+      return Status::OK();
+    })
     .Doc(R"doc(
 An op that receives embedding activations on the TPU.
 
@@ -274,9 +403,9 @@ one ReceieveActivations op in the TPU graph.
 
 outputs: A TensorList of embedding activations containing one Tensor per
     embedding table in the model.
-num_tables: The number of output activation tensors, equal to the number of
+num_outputs: The number of output activation tensors, equal to the number of
     embedding tables in the model.
-tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+config: Serialized TPUEmbeddingConfiguration proto.
 )doc");
 
 REGISTER_OP("TPUEmbeddingActivations")
@@ -306,10 +435,10 @@ lookup_id: Identifier of the set of embedding indices which produced these
     activations.
 )doc");
 
-REGISTER_OP("TPUEmbeddingSendGradients")
-    .Input("gradients: num_tables * float32")
-    .Attr("num_tables: int >= 1")
-    .Attr("tpu_embedding_config: string")
+REGISTER_OP("SendTPUEmbeddingGradients")
+    .Input("inputs: N * float32")
+    .Attr("N: int >= 1")
+    .Attr("config: string")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
@@ -321,8 +450,107 @@ with respect to the embedding activations. The embedding tables are updated
 from these gradients via the optimizer specified in the configuration given
 to tpu.initialize_system.
 
-gradients: A TensorList of gradients with which to update embedding tables.
-tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+inputs: A TensorList of gradients with which to update embedding tables.
+config: Serialized TPUEmbeddingConfiguration proto.
+)doc");
+
+REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
+    .Input("batch: N * int32")
+    .Attr("N: int")
+    .Attr("device_ordinal: int = -1")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+An op that enqueues a list of input batch tensors to TPUEmbedding.
+
+batch: A list of 1D tensors, one for each embedding table, containing the
+batch inputs represented as integers.
+device_ordinal: The TPU device to use. This should be -1 when the Op
+is running on a TPU device, and >= 0 when the Op is running on the CPU
+device.
+)doc");
+
+REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
+    .Input("sample_indices: N * int32")
+    .Input("embedding_indices: N * int32")
+    .Input("aggregation_weights: N * float32")
+    .Attr("N: int")
+    .Attr("device_ordinal: int = -1")
+    .Attr("combiners: list(string) = []")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
+      std::vector<string> combiners;
+      TF_RETURN_IF_ERROR(c->GetAttr("combiners", &combiners));
+      int n;
+      TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
+      if (!combiners.empty() && combiners.size() != n) {
+        return errors::InvalidArgument("Invalid length of combiners. Have ",
+                                       combiners.size(), " but expected 0 or ",
+                                       n);
+      }
+
+      return Status::OK();
+    })
+    .Doc(R"doc(
+An op that enqueues TPUEmbedding input indices from a SparseTensor.
+
+This Op eases the porting of code that uses embedding_lookup_sparse(),
+although some Python preprocessing of the SparseTensor arguments to
+embedding_lookup_sparse() is required to produce the arguments to this Op,
+since only a single EnqueueTPUEmbedding Op is allowed per training step.
+
+The tensors at corresponding positions in the three input lists
+must have the same shape, i.e. rank 1 with dim_size() equal to the total
+number of lookups into the table described by the corresponding table_id.
+
+sample_indices: A list of Rank 1 Tensors specifying the training example and
+    feature to which the corresponding embedding_indices and aggregation_weights
+    values belong. sample_indices[i] must equal b * nf + f, where nf is the
+    number of features from the corresponding table, f is in [0, nf), and
+    b is in [0, training batch size).
+embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables.
+aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per
+    (training example, feature) -- aggregation weights.
+device_ordinal: The TPU device to use. This should be -1 when the Op
+is running on a TPU device, and >= 0 when the Op is running on the CPU
+device.
+combiners: A list of string scalars whose values are 'mean', 'sum', or 'sqrtn'
+to specify how to normalize the embedding activations after weighted summation.
+)doc");
+
+REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
+    .Input("sample_indices: N * int32")
+    .Input("embedding_indices: N * int32")
+    .Input("aggregation_weights: N * float32")
+    .Attr("N: int")
+    .Attr("device_ordinal: int = -1")
+    .Attr("combiners: list(string) = []")
+    .Attr("table_ids: list(int)")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse().
+
+sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
+to ith feature. table_ids[i] indicates which embedding table to look up ith
+feature.
+
+sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in
+embedding_lookup_sparse().
+embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values
+ in embedding_lookup_sparse().
+aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values
+ in embedding_lookup_sparse().
+device_ordinal: The TPU device to use. This should be -1 when the Op
+is running on a TPU device, and >= 0 when the Op is running on the CPU
+device.
+combiners: A list of strings, one for each embedding table, specifying the
+reduction operation.  Currently, 'sum', 'mean' and 'sqrtn' are supported. It is
+invalid to have the sum of the weights be 0 for 'mean' or the sum of the squared
+weights be 0 for 'sqrtn'. If combiners isn't passed, the default is to
+use 'sum' for all tables.
+table_ids: A list of int. table_ids[i] indicates which embedding table to look
+up ith feature.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/utils/BUILD b/tensorflow/contrib/tpu/utils/BUILD
new file mode 100644
index 0000000000..c27b737287
--- /dev/null
+++ b/tensorflow/contrib/tpu/utils/BUILD
@@ -0,0 +1,30 @@
+# Description: Utilities for TPU Operations
+
+licenses(["notice"])  # Apache 2.0
+
+cc_library(
+    name = "tpu_embedding_optimization_parameters_utils",
+    srcs = ["tpu_embedding_optimization_parameters_utils.cc"],
+    hdrs = ["tpu_embedding_optimization_parameters_utils.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/contrib/tpu/proto:optimization_parameters_proto_cc",
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib_proto_parsing",
+        "@com_google_absl//absl/base",
+    ],
+)
+
+cc_library(
+    name = "tpu_embedding_output_layout_utils",
+    srcs = ["tpu_embedding_output_layout_utils.cc"],
+    hdrs = ["tpu_embedding_output_layout_utils.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_output_layout_proto_cc",
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc b/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc
new file mode 100644
index 0000000000..76cb5531cd
--- /dev/null
+++ b/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc
@@ -0,0 +1,255 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tpu {
+
+string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      return "Adagrad";
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      return "StochasticGradientDescent";
+    case OptimizationAlgorithm::kFtrl:
+      return "FTRL";
+    case OptimizationAlgorithm::kAdam:
+      return "ADAM";
+    case OptimizationAlgorithm::kMomentum:
+      return "Momentum";
+    case OptimizationAlgorithm::kRmsProp:
+      return "RMSProp";
+    case OptimizationAlgorithm::kCenteredRmsProp:
+      return "CenteredRMSProp";
+    case OptimizationAlgorithm::kMdlAdagradLight:
+      return "MDLAdagradLight";
+    case OptimizationAlgorithm::kAdadelta:
+      return "Adadelta";
+    case OptimizationAlgorithm::kProximalAdagrad:
+      return "ProximalAdagrad";
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
+      return "*** Not set ***";
+  }
+}
+
+string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      return "Adagrad";
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      return "stochastic gradient descent";
+    case OptimizationAlgorithm::kFtrl:
+      return "FTRL";
+    case OptimizationAlgorithm::kAdam:
+      return "ADAM";
+    case OptimizationAlgorithm::kMomentum:
+      return "Momentum";
+    case OptimizationAlgorithm::kRmsProp:
+      return "RMSProp";
+    case OptimizationAlgorithm::kCenteredRmsProp:
+      return "centered RMSProp";
+    case OptimizationAlgorithm::kMdlAdagradLight:
+      return "MDL Adagrad Light";
+    case OptimizationAlgorithm::kAdadelta:
+      return "Adadelta";
+    case OptimizationAlgorithm::kProximalAdagrad:
+      return "proximal Adagrad";
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
+      return "unknown (not specified)";
+  }
+}
+
+// Returns the number of optimization parameter vectors used by the optimization
+// algorithm, excluding the weights themselves and assuming no gradient
+// accumulation.
+Status GetBaseAuxiliaryParameterCount(OptimizationAlgorithm alg, int* count) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      *count = 1;
+      return Status::OK();
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      *count = 0;
+      return Status::OK();
+    case OptimizationAlgorithm::kFtrl:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kAdam:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kMomentum:
+      *count = 1;
+      return Status::OK();
+    case OptimizationAlgorithm::kRmsProp:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kCenteredRmsProp:
+      *count = 3;
+      return Status::OK();
+    case OptimizationAlgorithm::kMdlAdagradLight:
+      *count = 3;
+      return Status::OK();
+    case OptimizationAlgorithm::kAdadelta:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kProximalAdagrad:
+      *count = 1;
+      return Status::OK();
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
+      return errors::InvalidArgument("No optimization algorithm specified");
+  }
+}
+
+Status GetGradientAccumulationSupport(OptimizationAlgorithm alg,
+                                      GradientAccumulationSupport* support) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      *support = GradientAccumulationSupport::kSupported;
+      return Status::OK();
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      *support = GradientAccumulationSupport::kUnnecessary;
+      return Status::OK();
+    default: {
+      int auxiliary_parameter_count;
+      TF_RETURN_IF_ERROR(
+          GetBaseAuxiliaryParameterCount(alg, &auxiliary_parameter_count));
+      *support = auxiliary_parameter_count + 1 <= kMaxAuxiliaryParameterCount
+                     ? GradientAccumulationSupport::kSupported
+                     : GradientAccumulationSupport::kNotSupported;
+      return Status::OK();
+    }
+  }
+}
+namespace {
+// Make a normal state variable specification.
+StateVariableSpecification MakeStandardStateVariableSpecification(
+    const string& name) {
+  StateVariableSpecification result;
+  result.set_name(name);
+  result.mutable_user_defined();
+  return result;
+}
+}  // namespace
+
+Status GetOptimizationAlgorithmStateVariables(
+    OptimizationAlgorithm alg, bool use_gradient_accumulation,
+    std::vector<StateVariableSpecification>* state_variables) {
+  // The first parameter set is always the weights themselves.
+  state_variables->push_back(
+      MakeStandardStateVariableSpecification("parameters"));
+  // The order of the returned parameters needs to match the offsets used by
+  // the algorithm implementations in test_util.cc and
+  // address_handler_program_creator.cc.
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators"));
+      break;
+    }
+    case OptimizationAlgorithm::kStochasticGradientDescent: {
+      // None.
+      break;
+    }
+    case OptimizationAlgorithm::kFtrl: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators"));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("linears"));
+      break;
+    }
+    case OptimizationAlgorithm::kAdam: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("momenta"));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("velocities"));
+      break;
+    }
+    case OptimizationAlgorithm::kMomentum: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("momenta"));
+      break;
+    }
+    case OptimizationAlgorithm::kRmsProp: {
+      state_variables->push_back(MakeStandardStateVariableSpecification("ms"));
+      state_variables->push_back(MakeStandardStateVariableSpecification("mom"));
+      break;
+    }
+    case OptimizationAlgorithm::kCenteredRmsProp: {
+      state_variables->push_back(MakeStandardStateVariableSpecification("ms"));
+      state_variables->push_back(MakeStandardStateVariableSpecification("mom"));
+      state_variables->push_back(MakeStandardStateVariableSpecification("mg"));
+      break;
+    }
+    case OptimizationAlgorithm::kMdlAdagradLight: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators"));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("weights"));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("benefits"));
+      break;
+    }
+    case OptimizationAlgorithm::kAdadelta: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators"));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("updates"));
+      break;
+    }
+    case OptimizationAlgorithm::kProximalAdagrad: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators"));
+      break;
+    }
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET: {
+      return errors::InvalidArgument("No optimization algorithm specified");
+    }
+  }
+  // This needs to be last so that the save/restore ops do not need to know
+  // about gradient accumulation.
+  if (use_gradient_accumulation) {
+    StateVariableSpecification gradient_acc;
+    gradient_acc.set_name("gradient_accumulators");
+    gradient_acc.mutable_fill_with_constant()->set_initial_value(
+        kGradientAccumulatorInitialValue);
+    state_variables->push_back(std::move(gradient_acc));
+  }
+  if (state_variables->size() > kMaxAuxiliaryParameterCount + 1) {
+    return errors::InvalidArgument(
+        "Optimization algorithm", GetOptimizationAlgorithmName(alg),
+        "does not support gradient accumulation because it "
+        "already has too many other accumulators");
+  }
+  return Status::OK();
+}  // namespace tpu
+
+std::vector<OptimizationAlgorithm> GetOptimizationAlgorithms() {
+  return {
+      OptimizationAlgorithm::kAdagrad,
+      OptimizationAlgorithm::kStochasticGradientDescent,
+      OptimizationAlgorithm::kFtrl,
+      OptimizationAlgorithm::kAdam,
+      OptimizationAlgorithm::kMomentum,
+      OptimizationAlgorithm::kRmsProp,
+      OptimizationAlgorithm::kCenteredRmsProp,
+      OptimizationAlgorithm::kMdlAdagradLight,
+      OptimizationAlgorithm::kAdadelta,
+      OptimizationAlgorithm::kProximalAdagrad,
+  };
+}
+
+}  // namespace tpu
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h b/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h
new file mode 100644
index 0000000000..81d50264ed
--- /dev/null
+++ b/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h
@@ -0,0 +1,90 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_
+#define TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_
+
+#include <string>
+#include "absl/base/casts.h"
+#include "tensorflow/contrib/tpu/proto/optimization_parameters.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+namespace tpu {
+
+using OptimizationAlgorithm = OptimizationParameters::ParametersCase;
+
+// Returns the name of the optimization algorithm.
+string GetOptimizationAlgorithmName(OptimizationAlgorithm alg);
+
+// Returns a user-friendly name for the optimization algorithm.
+string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg);
+
+// Returns all supported optimization algorithms.
+std::vector<OptimizationAlgorithm> GetOptimizationAlgorithms();
+
+enum class GradientAccumulationSupport {
+  // Accumulation cannot be used with this optimizer.
+  kNotSupported,
+
+  // Accumulation is unnecessary because optimizer application is commutative.
+  kUnnecessary,
+
+  // Accumulation is allowed and changes optimizer behavior.
+  kSupported,
+};
+
+// Returns the number of optimization parameter vectors used by the optimization
+// algorithm, excluding the weights themselves and assuming no gradient
+// accumulation.
+Status GetBaseAuxiliaryParameterCount(OptimizationAlgorithm alg, int *count);
+
+// Returns whether (and how) an optimization algorithm supports gradient
+// accumulation.
+Status GetGradientAccumulationSupport(OptimizationAlgorithm alg,
+                                      GradientAccumulationSupport *support);
+
+// Returns the parameter specifications for the optimization algorithm (the main
+// parameters first, followed by any auxiliary parameters such as Adagrad
+// accumulators).
+Status GetOptimizationAlgorithmStateVariables(
+    OptimizationAlgorithm alg, bool use_gradient_accumulation,
+    std::vector<StateVariableSpecification> *state_variables);
+
+// Maximum value of auxiliar_parameter_count for any optimization algorithm.
+static constexpr int kMaxAuxiliaryParameterCount = 3;
+
+// Fill value for gradient accumulators. This is a denormal so that it will be
+// flushed to zero on the current TPU platforms and needs to continue to have
+// the following properties in the future:
+//
+// 1. Does not have the same bit pattern as a zero and can be distinguished from
+// it using integer operations.
+// 2. Treated as zero by floating-point arithmetic operations (at least addition
+// and subtraction).
+// 3. Cannot be produced by any floating-point arithmetic operation, including
+// those involving itself.
+//
+// It does not need to compare equal or not equal to zero in floating point. We
+// need to use a non-zero value here because some optimization algorithms are
+// not no-ops on zero gradients, so we need to distinguish an accumulated
+// gradient of zero from one that has been cleared after its gradients have
+// already been applied to the parameters and accumulators.
+const float kGradientAccumulatorInitialValue = absl::bit_cast<float, uint32>(1);
+
+}  // namespace tpu
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_
diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc b/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc
new file mode 100644
index 0000000000..8480ec4b8b
--- /dev/null
+++ b/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc
@@ -0,0 +1,98 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h"
+#include "tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tpu {
+
+void AddDefaultEmbeddingOutputLayoutIfNeeded(
+    TPUEmbeddingConfiguration* config) {
+  if (config->has_output_layout()) {
+    // Model or previous step has already filled this in.
+    return;
+  }
+
+  TPUEmbeddingOutputLayout* layout = config->mutable_output_layout();
+  // Create output tensors.
+  for (const auto& table : config->table_descriptor()) {
+    TPUEmbeddingOutputLayout::EmbeddingOutputTensor* output =
+        layout->add_output();
+    TPUEmbeddingOutputLayout::TwoDOutputTensor* two_d = output->mutable_two_d();
+    two_d->set_dim1_size(table.dimension());
+    two_d->set_dim0_size_per_sample(table.num_features());
+  }
+
+  // Create table output locations.
+  for (int table_id = 0; table_id < config->table_descriptor_size();
+       ++table_id) {
+    TPUEmbeddingOutputLayout::TableDescriptor* output_table =
+        layout->add_table();
+    const auto& table = config->table_descriptor(table_id);
+    for (int feature_index = 0; feature_index < table.num_features();
+         ++feature_index) {
+      TPUEmbeddingOutputLayout::FeatureDescriptor* output_feature =
+          output_table->add_feature();
+      TPUEmbeddingOutputLayout::OutputLocation* output_location =
+          output_feature->add_output_location();
+      output_location->set_tensor_index(table_id);
+      output_location->set_dim0_offset(feature_index);
+      output_location->set_dim1_offset(0);
+    }
+  }
+}
+
+Status ComputeOutputTensorShapes(const TPUEmbeddingConfiguration& config,
+                                 std::vector<TensorShapeProto>* shapes) {
+  if (!config.has_output_layout()) {
+    return errors::InvalidArgument(
+        "TPUEmbeddingConfiguration is missing output layout.");
+  }
+  const TPUEmbeddingOutputLayout& layout = config.output_layout();
+  int batch_size = config.batch_size_per_tensor_core();
+
+  for (int i = 0; i < layout.output_size(); ++i) {
+    const auto& output = layout.output(i);
+    TensorShapeProto shape;
+    switch (output.output_format_case()) {
+      case TPUEmbeddingOutputLayout::EmbeddingOutputTensor::OutputFormatCase::
+          kTwoD: {
+        auto* dim0 = shape.add_dim();
+        dim0->set_size(output.two_d().dim0_size_per_sample() * batch_size);
+        auto* dim1 = shape.add_dim();
+        dim1->set_size(output.two_d().dim1_size());
+        break;
+      }
+      case TPUEmbeddingOutputLayout::EmbeddingOutputTensor::OutputFormatCase::
+          OUTPUT_FORMAT_NOT_SET: {
+        return errors::InvalidArgument(
+            "Output layout in TPUEmbeddingConfiguration has unset embedding "
+            "output tensor format.");
+      }
+      default: {
+        return errors::InvalidArgument(
+            "Output layout in TPUEmbeddingConfiguration has invalid or "
+            "unhandled embedding output tensor format.");
+      }
+    }
+    shapes->push_back(shape);
+  }
+  return Status::OK();
+}
+
+}  // namespace tpu
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h b/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h
new file mode 100644
index 0000000000..c10fbeeff2
--- /dev/null
+++ b/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h
@@ -0,0 +1,38 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_
+#define TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_
+
+#include "tensorflow/contrib/tpu/proto/tpu_embedding_configuration.pb.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+namespace tpu {
+
+// Creates a default output layout for compatibility if none was provided by the
+// model.
+void AddDefaultEmbeddingOutputLayoutIfNeeded(TPUEmbeddingConfiguration* config);
+
+// Computes the shape of the output tensors from an output layout.
+Status ComputeOutputTensorShapes(
+    const TPUEmbeddingConfiguration& config,
+    std::vector<tensorflow::TensorShapeProto>* shapes);
+
+}  // namespace tpu
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_
-- 
GitLab


From 941e757a2364bb2e7cf41b8d980d7639849c6c5d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Sep 2018 20:45:43 -0700
Subject: [PATCH 0783/1357] Fix custom getter handling in tpu.rewrite() and
 friends.

It used to save the existing custom getter then overwrites the custom getter. That means the previous custom getter will never be called inside "computation". It now create a new custom getter that calls the previous custom getter.

PiperOrigin-RevId: 214715720
---
 tensorflow/contrib/tpu/python/tpu/tpu.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 712b02ff0d..883e08bf47 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -661,6 +661,10 @@ def split_compile_and_replicate(computation,
       # be less confusing to clients if they knowingly choose to use resource
       # variables.
       # Partitioned variables is not supported (b/112311320).
+      vscope = variable_scope.get_variable_scope()
+      saved_use_resource = vscope.use_resource
+      saved_custom_getter = vscope.custom_getter
+
       def custom_getter(getter, name, *args, **kwargs):
         """Variables on TPU have a few restrictions."""
         partitioner = kwargs["partitioner"]
@@ -671,12 +675,10 @@ def split_compile_and_replicate(computation,
               "`partitioner` that is {} for variable {}. "
               "Setting `partitioner` to `None`."
               .format(partitioner, name))
-        return getter(name, *args, **kwargs)
-
-      vscope = variable_scope.get_variable_scope()
-
-      saved_use_resource = vscope.use_resource
-      saved_custom_getter = vscope.custom_getter
+        if saved_custom_getter is None:
+          return getter(name, *args, **kwargs)
+        else:
+          return saved_custom_getter(getter, name, *args, **kwargs)
 
       vscope.set_use_resource(True)
       vscope.set_custom_getter(custom_getter)
-- 
GitLab


From a40cfd42e20d7e4520c1306666c9dfee97eb0a2e Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 26 Sep 2018 22:00:22 -0700
Subject: [PATCH 0784/1357] Automated rollback of commit
 e00d7744dbab5c73e4d8ffa8a7d361f7b2dcefff

PiperOrigin-RevId: 214721004
---
 .../estimator/dnn_with_layer_annotations.py   |  19 +-
 tensorflow/python/estimator/BUILD             |   2 +
 tensorflow/python/estimator/canned/dnn.py     | 188 ++++++++++++++----
 .../estimator/canned/dnn_linear_combined.py   |   7 +-
 .../python/estimator/canned/dnn_test.py       | 161 +++++++++++----
 .../estimator/canned/dnn_testing_utils.py     | 116 ++++++-----
 .../python/feature_column/feature_column.py   |  33 ++-
 .../feature_column/feature_column_v2.py       |  14 ++
 8 files changed, 396 insertions(+), 144 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 3fd9f12c61..5faf0aacfe 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -75,7 +75,9 @@ def make_input_layer_with_layer_annotations(original_input_layer):
                                          weight_collections=None,
                                          trainable=True,
                                          cols_to_vars=None,
-                                         cols_to_output_tensors=None):
+                                         scope=None,
+                                         cols_to_output_tensors=None,
+                                         from_template=False):
     """Returns a dense `Tensor` as input layer based on given `feature_columns`.
 
     Generally a single example in training data is described with
@@ -111,9 +113,12 @@ def make_input_layer_with_layer_annotations(original_input_layer):
         'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
           shape=(5, 10)]} If a column creates no variables, its value will be an
           empty list.
+      scope: A name or variable scope to use
       cols_to_output_tensors: If not `None`, must be a dictionary that will be
         filled with a mapping from '_FeatureColumn' to the associated output
         `Tensor`s.
+      from_template: True if the method is being instantiated from a
+        `make_template`.
 
     Returns:
       A `Tensor` which represents input layer of a model. Its shape
@@ -131,7 +136,9 @@ def make_input_layer_with_layer_annotations(original_input_layer):
         weight_collections=weight_collections,
         trainable=trainable,
         cols_to_vars=cols_to_vars,
-        cols_to_output_tensors=local_cols_to_output_tensors)
+        scope=scope,
+        cols_to_output_tensors=local_cols_to_output_tensors,
+        from_template=from_template)
 
     if cols_to_output_tensors is not None:
       cols_to_output_tensors = local_cols_to_output_tensors
@@ -296,9 +303,9 @@ def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, 'input_layer',
+        feature_column_lib, '_internal_input_layer',
         make_input_layer_with_layer_annotations(
-            feature_column_lib.input_layer)):
+            feature_column_lib._internal_input_layer)):  # pylint: disable=protected-access
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
@@ -417,9 +424,9 @@ def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, 'input_layer',
+        feature_column_lib, '_internal_input_layer',
         make_input_layer_with_layer_annotations(
-            feature_column_lib.input_layer)):
+            feature_column_lib._internal_input_layer)):  # pylint: disable=protected-access
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 7f2349954d..ba1b7ec2b5 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -281,6 +281,7 @@ py_library(
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
@@ -303,6 +304,7 @@ py_test(
         ":pandas_io",
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
index 1c0c4581c0..97971f9561 100644
--- a/tensorflow/python/estimator/canned/dnn.py
+++ b/tensorflow/python/estimator/canned/dnn.py
@@ -24,7 +24,10 @@ from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.engine import training
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.layers import normalization
 from tensorflow.python.ops import init_ops
@@ -45,8 +48,14 @@ def _add_hidden_layer_summary(value, tag):
   summary.histogram('%s/activation' % tag, value)
 
 
-def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
-                          dropout, input_layer_partitioner, batch_norm):
+def _dnn_logit_fn_builder(units,
+                          hidden_units,
+                          feature_columns,
+                          activation_fn,
+                          dropout,
+                          input_layer_partitioner,
+                          batch_norm,
+                          shared_state_manager=None):
   """Function builder for a dnn logit_fn.
 
   Args:
@@ -60,6 +69,8 @@ def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
       coordinate.
     input_layer_partitioner: Partitioner for input layer.
     batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
 
   Returns:
     A logit_fn (see below).
@@ -85,50 +96,132 @@ def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
       A `Tensor` representing the logits, or a list of `Tensor`'s representing
       multiple logits in the MultiHead case.
     """
-    is_training = mode == model_fn.ModeKeys.TRAIN
-    with variable_scope.variable_scope(
-        'input_from_feature_columns',
-        values=tuple(six.itervalues(features)),
-        partitioner=input_layer_partitioner):
-      net = feature_column_lib.input_layer(
-          features=features, feature_columns=feature_columns)
+    dnn_model = _DNNModel(
+        units,
+        hidden_units,
+        feature_columns,
+        activation_fn,
+        dropout,
+        input_layer_partitioner,
+        batch_norm,
+        shared_state_manager,
+        name='dnn')
+    return dnn_model(features, mode)
+
+  return dnn_logit_fn
+
+
+def _get_previous_name_scope():
+  current_name_scope = ops.get_name_scope()
+  return current_name_scope.rsplit('/', 1)[0] + '/'
+
+
+class _DNNModel(training.Model):
+  """A DNN Model."""
+
+  def __init__(self,
+               units,
+               hidden_units,
+               feature_columns,
+               activation_fn,
+               dropout,
+               input_layer_partitioner,
+               batch_norm,
+               shared_state_manager,
+               name=None,
+               **kwargs):
+    super(_DNNModel, self).__init__(name=name, **kwargs)
+    self._is_v2 = False
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      self._is_v2 = True
+      self._input_layer = feature_column_v2.FeatureLayer(
+          feature_columns=feature_columns,
+          name='input_layer',
+          shared_state_manager=shared_state_manager)
+    else:
+      self._input_layer = feature_column.InputLayer(
+          feature_columns=feature_columns,
+          name='input_layer',
+          create_scope_now=False)
+
+    self._add_layer(self._input_layer, 'input_layer')
+
+    self._dropout = dropout
+    self._batch_norm = batch_norm
+
+    self._hidden_layers = []
+    self._dropout_layers = []
+    self._batch_norm_layers = []
+    self._hidden_layer_scope_names = []
     for layer_id, num_hidden_units in enumerate(hidden_units):
       with variable_scope.variable_scope(
-          'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope:
-        net = core_layers.dense(
-            net,
+          'hiddenlayer_%d' % layer_id) as hidden_layer_scope:
+        hidden_layer = core_layers.Dense(
             units=num_hidden_units,
             activation=activation_fn,
             kernel_initializer=init_ops.glorot_uniform_initializer(),
-            name=hidden_layer_scope)
-        if dropout is not None and is_training:
-          net = core_layers.dropout(net, rate=dropout, training=True)
-        if batch_norm:
-          # TODO(hjm): In future, if this becomes popular, we can enable
-          # customization of the batch normalization params by accepting a
-          # list of `BatchNormalization` instances as `batch_norm`.
-          net = normalization.batch_normalization(
-              net,
+            name=hidden_layer_scope,
+            _scope=hidden_layer_scope)
+        self._add_layer(hidden_layer, hidden_layer_scope.name)
+        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
+        self._hidden_layers.append(hidden_layer)
+        if self._dropout is not None:
+          dropout_layer = core_layers.Dropout(rate=self._dropout)
+          self._add_layer(dropout_layer, dropout_layer.name)
+          self._dropout_layers.append(dropout_layer)
+        if self._batch_norm:
+          batch_norm_layer = normalization.BatchNormalization(
               # The default momentum 0.99 actually crashes on certain
               # problem, so here we use 0.999, which is the default of
               # tf.contrib.layers.batch_norm.
               momentum=0.999,
-              training=is_training,
-              name='batchnorm_%d' % layer_id)
-      _add_hidden_layer_summary(net, hidden_layer_scope.name)
-
-    with variable_scope.variable_scope('logits', values=(net,)) as logits_scope:
-      logits = core_layers.dense(
-          net,
+              trainable=True,
+              name='batchnorm_%d' % layer_id,
+              _scope='batchnorm_%d' % layer_id)
+          self._add_layer(batch_norm_layer, batch_norm_layer.name)
+          self._batch_norm_layers.append(batch_norm_layer)
+
+    with variable_scope.variable_scope('logits') as logits_scope:
+      self._logits_layer = core_layers.Dense(
           units=units,
           activation=None,
           kernel_initializer=init_ops.glorot_uniform_initializer(),
-          name=logits_scope)
-    _add_hidden_layer_summary(logits, logits_scope.name)
-
-    return logits
-
-  return dnn_logit_fn
+          name=logits_scope,
+          _scope=logits_scope)
+      self._add_layer(self._logits_layer, logits_scope.name)
+      self._logits_scope_name = logits_scope.name
+    self._logits_layer._use_resource_variables = False  # pylint: disable=protected-access
+    self._input_layer_partitioner = input_layer_partitioner
+
+  def call(self, features, mode):
+    is_training = mode == model_fn.ModeKeys.TRAIN
+    # The Keras training.Model adds a name_scope with the name of the model
+    # which modifies the constructed graph. Hence we add another name_scope
+    # here which is the one before the training.Model one was applied.
+    # TODO(rohanj): Remove this in TF 2.0 (b/116728605)
+    with ops.name_scope(name=_get_previous_name_scope()):
+      # TODO(rohanj): Remove dependence on variable scope for partitioning.
+      with variable_scope.variable_scope(
+          'input_from_feature_columns',
+          partitioner=self._input_layer_partitioner):
+        net = self._input_layer(features)
+      for i in range(len(self._hidden_layers)):
+        net = self._hidden_layers[i](net)
+        if self._dropout is not None and is_training:
+          net = self._dropout_layers[i](net, training=True)
+        if self._batch_norm:
+          net = self._batch_norm_layers[i](net, training=is_training)
+        _add_hidden_layer_summary(net, self._hidden_layer_scope_names[i])
+
+      logits = self._logits_layer(net)
+      _add_hidden_layer_summary(logits, self._logits_scope_name)
+      return logits
+
+  def _add_layer(self, layer, layer_name):
+    # "Magic" required for keras.Model classes to track all the variables in
+    # a list of layers.Layer objects.
+    # TODO(ashankar): Figure out API so user code doesn't have to do this.
+    setattr(self, layer_name, layer)
 
 
 def _dnn_model_fn(features,
@@ -143,7 +236,8 @@ def _dnn_model_fn(features,
                   input_layer_partitioner=None,
                   config=None,
                   use_tpu=False,
-                  batch_norm=False):
+                  batch_norm=False,
+                  shared_state_manager=None):
   """Deep Neural Net model_fn.
 
   Args:
@@ -167,6 +261,8 @@ def _dnn_model_fn(features,
     use_tpu: Whether to make a DNN model able to run on TPU. Will make function
       return a `_TPUEstimatorSpec` instance and disable variable partitioning.
     batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
 
   Returns:
     An `EstimatorSpec` instance.
@@ -202,7 +298,8 @@ def _dnn_model_fn(features,
         activation_fn=activation_fn,
         dropout=dropout,
         input_layer_partitioner=input_layer_partitioner,
-        batch_norm=batch_norm)
+        batch_norm=batch_norm,
+        shared_state_manager=shared_state_manager)
     logits = logit_fn(features=features, mode=mode)
 
     if use_tpu:
@@ -370,6 +467,10 @@ class DNNClassifier(estimator.Estimator):
     """
     head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
         n_classes, weight_column, label_vocabulary, loss_reduction)
+
+    shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+        feature_columns)
+
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
@@ -384,7 +485,8 @@ class DNNClassifier(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
 
     super(DNNClassifier, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,
@@ -532,6 +634,10 @@ class DNNRegressor(estimator.Estimator):
       batch_norm: Whether to use batch normalization after each hidden layer.
     """
 
+    shared_state_manager = None
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
@@ -539,7 +645,8 @@ class DNNRegressor(estimator.Estimator):
           labels=labels,
           mode=mode,
           head=head_lib._regression_head(  # pylint: disable=protected-access
-              label_dimension=label_dimension, weight_column=weight_column,
+              label_dimension=label_dimension,
+              weight_column=weight_column,
               loss_reduction=loss_reduction),
           hidden_units=hidden_units,
           feature_columns=tuple(feature_columns or []),
@@ -548,7 +655,8 @@ class DNNRegressor(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
 
     super(DNNRegressor, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py
index 9799cf9e98..f712244c8d 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py
@@ -27,6 +27,7 @@ from tensorflow.python.estimator.canned import dnn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import linear
 from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import nn
@@ -142,6 +143,9 @@ def _dnn_linear_combined_model_fn(features,
           max_partitions=num_ps_replicas,
           min_slice_size=64 << 20))
 
+  shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+      list(linear_feature_columns) + list(dnn_feature_columns))
+
   # Build DNN Logits.
   dnn_parent_scope = 'dnn'
 
@@ -169,8 +173,9 @@ def _dnn_linear_combined_model_fn(features,
           feature_columns=dnn_feature_columns,
           activation_fn=dnn_activation_fn,
           dropout=dnn_dropout,
+          batch_norm=batch_norm,
           input_layer_partitioner=input_layer_partitioner,
-          batch_norm=batch_norm)
+          shared_state_manager=shared_state_manager)
       dnn_logits = dnn_logit_fn(features=features, mode=mode)
 
   linear_parent_scope = 'linear'
diff --git a/tensorflow/python/estimator/canned/dnn_test.py b/tensorflow/python/estimator/canned/dnn_test.py
index fc90b7c35e..756696cea0 100644
--- a/tensorflow/python/estimator/canned/dnn_test.py
+++ b/tensorflow/python/estimator/canned/dnn_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import shutil
 import tempfile
 
+from absl.testing import parameterized
 import numpy as np
 import six
 
@@ -33,6 +34,7 @@ from tensorflow.python.estimator.export import export
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.estimator.inputs import pandas_io
 from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import data_flow_ops
@@ -62,15 +64,32 @@ class DNNModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
 
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNModelFnTest.__init__(self, dnn._dnn_model_fn)
+    dnn_testing_utils.BaseDNNModelFnTest.__init__(
+        self, dnn._dnn_model_fn, fc_impl=feature_column)
+
+
+class DNNModelFnV2Test(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNModelFnTest.__init__(
+        self, dnn._dnn_model_fn, fc_impl=feature_column_v2)
 
 
 class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
 
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNLogitFnTest.__init__(self,
-                                                  dnn._dnn_logit_fn_builder)
+    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
+        self, dnn._dnn_logit_fn_builder, fc_impl=feature_column)
+
+
+class DNNLogitFnV2Test(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
+        self, dnn._dnn_logit_fn_builder, fc_impl=feature_column_v2)
 
 
 class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
@@ -78,8 +97,17 @@ class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
 
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(self, _dnn_classifier_fn,
-                                                       _dnn_regressor_fn)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
+        self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNWarmStartingV2Test(dnn_testing_utils.BaseDNNWarmStartingTest,
+                            test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
+        self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column_v2)
 
 
 class DNNClassifierEvaluateTest(
@@ -88,7 +116,16 @@ class DNNClassifierEvaluateTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn)
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNClassifierEvaluateV2Test(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
 
 
 class DNNClassifierPredictTest(
@@ -97,7 +134,16 @@ class DNNClassifierPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn)
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNClassifierPredictV2Test(dnn_testing_utils.BaseDNNClassifierPredictTest,
+                                 test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
 
 
 class DNNClassifierTrainTest(
@@ -106,7 +152,16 @@ class DNNClassifierTrainTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn)
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
+                               test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
 
 
 def _dnn_regressor_fn(*args, **kwargs):
@@ -119,7 +174,16 @@ class DNNRegressorEvaluateTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn)
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNRegressorEvaluateV2Test(dnn_testing_utils.BaseDNNRegressorEvaluateTest,
+                                 test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
 
 
 class DNNRegressorPredictTest(
@@ -128,7 +192,16 @@ class DNNRegressorPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn)
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNRegressorPredictV2Test(dnn_testing_utils.BaseDNNRegressorPredictTest,
+                                test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
 
 
 class DNNRegressorTrainTest(
@@ -137,7 +210,16 @@ class DNNRegressorTrainTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn)
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
+                              test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
 
 
 def _queue_parsed_features(feature_map):
@@ -156,7 +238,8 @@ def _queue_parsed_features(feature_map):
   return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
 
 
-class DNNRegressorIntegrationTest(test.TestCase):
+@parameterized.parameters((feature_column,), (feature_column_v2,))
+class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -166,11 +249,11 @@ class DNNRegressorIntegrationTest(test.TestCase):
       writer_cache.FileWriterCache.clear()
       shutil.rmtree(self._model_dir)
 
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      label_dimension, batch_size):
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          fc_impl):
+    feature_columns = [fc_impl.numeric_column('x', shape=(input_dimension,))]
+
     est = dnn.DNNRegressor(
         hidden_units=(2, 2),
         feature_columns=feature_columns,
@@ -194,14 +277,14 @@ class DNNRegressorIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, label_dimension), predictions.shape)
 
     # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self):
+  def test_numpy_input_fn(self, fc_impl):
     """Tests complete flow with numpy_input_fn."""
     label_dimension = 2
     batch_size = 10
@@ -230,9 +313,10 @@ class DNNRegressorIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_pandas_input_fn(self):
+  def test_pandas_input_fn(self, fc_impl):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -263,9 +347,10 @@ class DNNRegressorIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_input_fn_from_parse_example(self):
+  def test_input_fn_from_parse_example(self, fc_impl):
     """Tests complete flow with input_fn constructed from parse_example."""
     label_dimension = 2
     batch_size = 10
@@ -313,9 +398,11 @@ class DNNRegressorIntegrationTest(test.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
 
+@parameterized.parameters((feature_column,), (feature_column_v2,))
 class DNNClassifierIntegrationTest(test.TestCase):
 
   def setUp(self):
@@ -329,11 +416,10 @@ class DNNClassifierIntegrationTest(test.TestCase):
   def _as_label(self, data_in_float):
     return np.rint(data_in_float).astype(np.int64)
 
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      n_classes, batch_size):
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, n_classes, batch_size, fc_impl):
+    feature_columns = [fc_impl.numeric_column('x', shape=(input_dimension,))]
+
     est = dnn.DNNClassifier(
         hidden_units=(2, 2),
         feature_columns=feature_columns,
@@ -357,14 +443,14 @@ class DNNClassifierIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
 
     # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self):
+  def test_numpy_input_fn(self, fc_impl):
     """Tests complete flow with numpy_input_fn."""
     n_classes = 3
     input_dimension = 2
@@ -396,9 +482,10 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_pandas_input_fn(self):
+  def test_pandas_input_fn(self, fc_impl):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -430,9 +517,10 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_input_fn_from_parse_example(self):
+  def test_input_fn_from_parse_example(self, fc_impl):
     """Tests complete flow with input_fn constructed from parse_example."""
     input_dimension = 2
     n_classes = 3
@@ -484,7 +572,8 @@ class DNNClassifierIntegrationTest(test.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index 11f1e93630..cd66d0a3bd 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -104,6 +104,7 @@ def create_checkpoint(weights_and_biases,
     weights_and_biases: Iterable of tuples of weight and bias values.
     global_step: Initial global step to save in checkpoint.
     model_dir: Directory into which checkpoint is saved.
+    batch_norm_vars: Variables used for batch normalization.
   """
   weights, biases = zip(*weights_and_biases)
   if batch_norm_vars:
@@ -244,8 +245,9 @@ def mock_optimizer(testcase, hidden_units, expected_loss=None):
 class BaseDNNModelFnTest(object):
   """Tests that _dnn_model_fn passes expected logits to mock head."""
 
-  def __init__(self, dnn_model_fn):
+  def __init__(self, dnn_model_fn, fc_impl=feature_column):
     self._dnn_model_fn = dnn_model_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -272,7 +274,7 @@ class BaseDNNModelFnTest(object):
           head=head,
           hidden_units=hidden_units,
           feature_columns=[
-              feature_column.numeric_column(
+              self._fc_impl.numeric_column(
                   'age', shape=np.array(inputs).shape[1:])
           ],
           optimizer=mock_optimizer(self, hidden_units))
@@ -462,8 +464,8 @@ class BaseDNNModelFnTest(object):
             head=head,
             hidden_units=hidden_units,
             feature_columns=[
-                feature_column.numeric_column('age'),
-                feature_column.numeric_column('height')
+                self._fc_impl.numeric_column('age'),
+                self._fc_impl.numeric_column('height')
             ],
             optimizer=mock_optimizer(self, hidden_units))
         with monitored_session.MonitoredTrainingSession(
@@ -499,7 +501,7 @@ class BaseDNNModelFnTest(object):
             head=head,
             hidden_units=hidden_units,
             feature_columns=[
-                feature_column.numeric_column(
+                self._fc_impl.numeric_column(
                     'age', shape=np.array(inputs).shape[1:])
             ],
             optimizer=mock_optimizer(self, hidden_units))
@@ -508,8 +510,9 @@ class BaseDNNModelFnTest(object):
 class BaseDNNLogitFnTest(object):
   """Tests correctness of logits calculated from _dnn_logit_fn_builder."""
 
-  def __init__(self, dnn_logit_fn_builder):
+  def __init__(self, dnn_logit_fn_builder, fc_impl=feature_column):
     self._dnn_logit_fn_builder = dnn_logit_fn_builder
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -541,7 +544,7 @@ class BaseDNNLogitFnTest(object):
             units=logits_dimension,
             hidden_units=hidden_units,
             feature_columns=[
-                feature_column.numeric_column(
+                self._fc_impl.numeric_column(
                     'age', shape=np.array(inputs).shape[1:])
             ],
             activation_fn=nn.relu,
@@ -786,8 +789,8 @@ class BaseDNNLogitFnTest(object):
               units=logits_dimension,
               hidden_units=hidden_units,
               feature_columns=[
-                  feature_column.numeric_column('age'),
-                  feature_column.numeric_column('height')
+                  self._fc_impl.numeric_column('age'),
+                  self._fc_impl.numeric_column('height')
               ],
               activation_fn=nn.relu,
               dropout=None,
@@ -806,9 +809,13 @@ class BaseDNNLogitFnTest(object):
 
 class BaseDNNWarmStartingTest(object):
 
-  def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn):
+  def __init__(self,
+               _dnn_classifier_fn,
+               _dnn_regressor_fn,
+               fc_impl=feature_column):
     self._dnn_classifier_fn = _dnn_classifier_fn
     self._dnn_regressor_fn = _dnn_regressor_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     # Create a directory to save our old checkpoint and vocabularies to.
@@ -843,8 +850,8 @@ class BaseDNNWarmStartingTest(object):
 
   def test_classifier_basic_warm_starting(self):
     """Tests correctness of DNNClassifier default warm-start."""
-    city = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
 
@@ -875,8 +882,8 @@ class BaseDNNWarmStartingTest(object):
 
   def test_regressor_basic_warm_starting(self):
     """Tests correctness of DNNRegressor default warm-start."""
-    city = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
 
@@ -905,8 +912,8 @@ class BaseDNNWarmStartingTest(object):
 
   def test_warm_starting_selective_variables(self):
     """Tests selecting variables to warm-start."""
-    city = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
 
@@ -958,8 +965,8 @@ class BaseDNNWarmStartingTest(object):
     vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
     with open(vocab_file, 'w') as f:
       f.write('\n'.join(vocab_list))
-    occupation = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_file(
+    occupation = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_file(
             'occupation',
             vocabulary_file=vocab_file,
             vocabulary_size=len(vocab_list)),
@@ -985,8 +992,8 @@ class BaseDNNWarmStartingTest(object):
                                   'new_occupation_vocab')
     with open(new_vocab_file, 'w') as f:
       f.write('\n'.join(new_vocab_list))
-    new_occupation = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_file(
+    new_occupation = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_file(
             'occupation',
             vocabulary_file=new_vocab_file,
             vocabulary_size=len(new_vocab_list)),
@@ -1051,8 +1058,8 @@ class BaseDNNWarmStartingTest(object):
 
   def test_warm_starting_with_naming_change(self):
     """Tests warm-starting with a Tensor name remapping."""
-    locality = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    locality = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
             'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
 
@@ -1068,8 +1075,8 @@ class BaseDNNWarmStartingTest(object):
     # Create a second DNNClassifier, warm-started from the first.  Use a
     # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
     # accumulator values that change).
-    city = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
     warm_started_dnn_classifier = self._dnn_classifier_fn(
@@ -1101,8 +1108,9 @@ class BaseDNNWarmStartingTest(object):
 
 class BaseDNNClassifierEvaluateTest(object):
 
-  def __init__(self, dnn_classifier_fn):
+  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
     self._dnn_classifier_fn = dnn_classifier_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1121,7 +1129,7 @@ class BaseDNNClassifierEvaluateTest(object):
 
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age')],
+        feature_columns=[self._fc_impl.numeric_column('age')],
         model_dir=self._model_dir)
     def _input_fn():
       # batch_size = 2, one false label, and one true.
@@ -1161,7 +1169,7 @@ class BaseDNNClassifierEvaluateTest(object):
 
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
         n_classes=n_classes,
         model_dir=self._model_dir)
     def _input_fn():
@@ -1192,7 +1200,7 @@ class BaseDNNClassifierEvaluateTest(object):
 
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age')],
+        feature_columns=[self._fc_impl.numeric_column('age')],
         model_dir=self._model_dir)
     def _input_fn():
       # batch_size = 2, one false label, and one true.
@@ -1218,7 +1226,7 @@ class BaseDNNClassifierEvaluateTest(object):
 
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
         n_classes=n_classes,
         weight_column='w',
         model_dir=self._model_dir)
@@ -1238,8 +1246,9 @@ class BaseDNNClassifierEvaluateTest(object):
 
 class BaseDNNRegressorEvaluateTest(object):
 
-  def __init__(self, dnn_regressor_fn):
+  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
     self._dnn_regressor_fn = dnn_regressor_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1259,7 +1268,7 @@ class BaseDNNRegressorEvaluateTest(object):
 
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age')],
+        feature_columns=[self._fc_impl.numeric_column('age')],
         model_dir=self._model_dir)
     def _input_fn():
       return {'age': [[10.]]}, [[1.]]
@@ -1289,7 +1298,7 @@ class BaseDNNRegressorEvaluateTest(object):
 
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
         label_dimension=label_dimension,
         model_dir=self._model_dir)
     def _input_fn():
@@ -1320,7 +1329,7 @@ class BaseDNNRegressorEvaluateTest(object):
 
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=[feature_column.numeric_column('age', shape=[2])],
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
         label_dimension=label_dimension,
         weight_column='w',
         model_dir=self._model_dir)
@@ -1339,8 +1348,9 @@ class BaseDNNRegressorEvaluateTest(object):
 
 class BaseDNNClassifierPredictTest(object):
 
-  def __init__(self, dnn_classifier_fn):
+  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
     self._dnn_classifier_fn = dnn_classifier_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1361,7 +1371,7 @@ class BaseDNNClassifierPredictTest(object):
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
         label_vocabulary=label_vocabulary,
-        feature_columns=(feature_column.numeric_column('x'),),
+        feature_columns=(self._fc_impl.numeric_column('x'),),
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
         x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
@@ -1405,7 +1415,7 @@ class BaseDNNClassifierPredictTest(object):
 
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=(2, 2),
-        feature_columns=(feature_column.numeric_column('x', shape=(2,)),),
+        feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),),
         label_vocabulary=label_vocabulary,
         n_classes=3,
         model_dir=self._model_dir)
@@ -1453,8 +1463,9 @@ class BaseDNNClassifierPredictTest(object):
 
 class BaseDNNRegressorPredictTest(object):
 
-  def __init__(self, dnn_regressor_fn):
+  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
     self._dnn_regressor_fn = dnn_regressor_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1475,7 +1486,7 @@ class BaseDNNRegressorPredictTest(object):
 
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=(feature_column.numeric_column('x'),),
+        feature_columns=(self._fc_impl.numeric_column('x'),),
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
         x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
@@ -1497,7 +1508,7 @@ class BaseDNNRegressorPredictTest(object):
 
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=(2, 2),
-        feature_columns=(feature_column.numeric_column('x', shape=(2,)),),
+        feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),),
         label_dimension=3,
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
@@ -1594,8 +1605,9 @@ def _assert_simple_summary(testcase, expected_values, actual_summary):
 
 class BaseDNNClassifierTrainTest(object):
 
-  def __init__(self, dnn_classifier_fn):
+  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
     self._dnn_classifier_fn = dnn_classifier_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1609,7 +1621,7 @@ class BaseDNNClassifierTrainTest(object):
     hidden_units = (2, 2)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         model_dir=self._model_dir)
 
     # Train for a few steps, then validate final checkpoint.
@@ -1625,7 +1637,7 @@ class BaseDNNClassifierTrainTest(object):
     n_classes = 3
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         n_classes=n_classes,
         model_dir=self._model_dir)
 
@@ -1643,7 +1655,7 @@ class BaseDNNClassifierTrainTest(object):
         self, hidden_units=hidden_units)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1682,7 +1694,7 @@ class BaseDNNClassifierTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1728,7 +1740,7 @@ class BaseDNNClassifierTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_classifier = self._dnn_classifier_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1759,7 +1771,7 @@ class BaseDNNClassifierTrainTest(object):
     dnn_classifier = self._dnn_classifier_fn(
         n_classes=n_classes,
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1793,8 +1805,9 @@ class BaseDNNClassifierTrainTest(object):
 
 class BaseDNNRegressorTrainTest(object):
 
-  def __init__(self, dnn_regressor_fn):
+  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
     self._dnn_regressor_fn = dnn_regressor_fn
+    self._fc_impl = fc_impl
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1808,7 +1821,7 @@ class BaseDNNRegressorTrainTest(object):
     hidden_units = (2, 2)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         model_dir=self._model_dir)
 
     # Train for a few steps, then validate final checkpoint.
@@ -1824,7 +1837,7 @@ class BaseDNNRegressorTrainTest(object):
     opt = mock_optimizer(self, hidden_units=hidden_units)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1864,7 +1877,7 @@ class BaseDNNRegressorTrainTest(object):
         self, hidden_units=hidden_units, expected_loss=expected_loss)
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
-        feature_columns=(feature_column.numeric_column('age'),),
+        feature_columns=(self._fc_impl.numeric_column('age'),),
         optimizer=opt,
         model_dir=self._model_dir)
     self.assertEqual(0, opt.minimize.call_count)
@@ -1917,7 +1930,8 @@ class BaseDNNRegressorTrainTest(object):
     dnn_regressor = self._dnn_regressor_fn(
         hidden_units=hidden_units,
         feature_columns=[
-            feature_column.numeric_column('age', shape=[input_dimension])],
+            self._fc_impl.numeric_column('age', shape=[input_dimension])
+        ],
         label_dimension=label_dimension,
         optimizer=opt,
         model_dir=self._model_dir)
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 9984379e9d..226e273660 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -170,7 +170,8 @@ def _internal_input_layer(features,
                           trainable=True,
                           cols_to_vars=None,
                           scope=None,
-                          cols_to_output_tensors=None):
+                          cols_to_output_tensors=None,
+                          from_template=False):
   """See input_layer. `scope` is a name or variable scope to use."""
 
   feature_columns = _normalize_feature_columns(feature_columns)
@@ -186,10 +187,7 @@ def _internal_input_layer(features,
   if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
     weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
 
-  # a non-None `scope` can allow for variable reuse, when, e.g., this function
-  # is wrapped by a `make_template`.
-  with variable_scope.variable_scope(
-      scope, default_name='input_layer', values=features.values()):
+  def _get_logits():  # pylint: disable=missing-docstring
     builder = _LazyBuilder(features)
     output_tensors = []
     ordered_columns = []
@@ -217,6 +215,16 @@ def _internal_input_layer(features,
     _verify_static_batch_size_equality(output_tensors, ordered_columns)
     return array_ops.concat(output_tensors, 1)
 
+  # If we're constructing from the `make_template`, that by default adds a
+  # variable scope with the name of the layer. In that case, we dont want to
+  # add another `variable_scope` as that would break checkpoints.
+  if from_template:
+    return _get_logits()
+  else:
+    with variable_scope.variable_scope(
+        scope, default_name='input_layer', values=features.values()):
+      return _get_logits()
+
 
 @tf_export('feature_column.input_layer')
 def input_layer(features,
@@ -301,17 +309,18 @@ class InputLayer(object):
                feature_columns,
                weight_collections=None,
                trainable=True,
-               cols_to_vars=None):
+               cols_to_vars=None,
+               name='feature_column_input_layer',
+               create_scope_now=True):
     """See `input_layer`."""
 
     self._feature_columns = feature_columns
     self._weight_collections = weight_collections
     self._trainable = trainable
     self._cols_to_vars = cols_to_vars
+    self._name = name
     self._input_layer_template = template.make_template(
-        'feature_column_input_layer',
-        _internal_input_layer,
-        create_scope_now_=True)
+        self._name, _internal_input_layer, create_scope_now_=create_scope_now)
     self._scope = self._input_layer_template.variable_scope
 
   def __call__(self, features):
@@ -321,7 +330,11 @@ class InputLayer(object):
         weight_collections=self._weight_collections,
         trainable=self._trainable,
         cols_to_vars=None,
-        scope=self._scope)
+        from_template=True)
+
+  @property
+  def name(self):
+    return self._name
 
   @property
   def non_trainable_variables(self):
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 57f7af7635..b62c16ea5a 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -2045,6 +2045,14 @@ class DenseColumn(FeatureColumn):
     pass
 
 
+def is_feature_column_v2(feature_columns):
+  """Returns True if all feature columns are V2."""
+  for feature_column in feature_columns:
+    if not isinstance(feature_column, FeatureColumn):
+      return False
+  return True
+
+
 def _create_weighted_sum(column,
                          transformation_cache,
                          state_manager,
@@ -2782,6 +2790,12 @@ class SharedEmbeddingStateManager(Layer):
     return self._var_dict[name]
 
 
+def maybe_create_shared_state_manager(feature_columns):
+  if is_feature_column_v2(feature_columns):
+    return SharedEmbeddingStateManager()
+  return None
+
+
 class SharedEmbeddingColumn(
     DenseColumn, SequenceDenseColumn,
     collections.namedtuple(
-- 
GitLab


From 5df53ab7eb81c67459e2a95e8fbcb71999c703ad Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Wed, 26 Sep 2018 22:43:25 -0700
Subject: [PATCH 0785/1357] Enable constant folding for device memory tensors.

PiperOrigin-RevId: 214723970
---
 .../core/common_runtime/constant_folding.cc   | 34 ++++++++++++++-----
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 99cb9ac6a0..419867ff58 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -470,19 +470,19 @@ bool ReplaceTensorWithConstant(
     const ConstantFoldNameGenerator& generate_new_name) {
   // Be conservative when replacing a tensor with a constant, when not
   // running on CPU.
-  // 1) If the destination tensor is not an int32 tensor, and has HOST_MEMORY
+  // 1) Do not replace another constant.
+  // 2) If the destination tensor is not an int32 tensor, and has HOST_MEMORY
   // constraint, do not replace it.
-  // 2) If the destination tensor is an int32 tensor, but has DEVICE_MEMORY
-  // constraint, do not replace it.
-  // 3) If the constant op created does not have a kernel implementation
-  // for the device, do not use it.
-  // 4) If the size of the constant in bytes is too large (>
+  // 3) If the size of the constant in bytes is too large (>
   // max_constant_in_bytes), do not replace it. This prevents the size of the
   // Graph from growing too large.
+  // 4) If the constant op created does not have a kernel implementation
+  // for the device, do not use it.
   // TODO(keveman): Consider adding a new constant op that has a kernel
   // implementation for all types, but with HostMemory constraint on it's
   // output.
-  // 5) Do not replace another constant.
+  // 5) If the constant op for the device has different output memory type
+  // from the original op output memory type, do not replace it.
   if (tensor.first->IsConstant()) {
     return false;
   }
@@ -497,8 +497,7 @@ bool ReplaceTensorWithConstant(
       return false;
     }
     bool is_int32 = tensor.first->output_type(tensor.second) == DT_INT32;
-    if ((memory_type == HOST_MEMORY && !is_int32) ||
-        (memory_type == DEVICE_MEMORY && is_int32)) {
+    if (memory_type == HOST_MEMORY && !is_int32) {
       return false;
     }
   }
@@ -536,6 +535,23 @@ bool ReplaceTensorWithConstant(
   if (!NodeBuilder(builder).Finalize(graph, &constant_node).ok()) {
     return false;
   }
+  if (partition_device && device_type != DEVICE_CPU) {
+    MemoryType original_output_memory_type;
+    if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
+                             &original_output_memory_type)
+             .ok()) {
+      return false;
+    }
+    MemoryType const_output_memory_type;
+    if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
+                             &const_output_memory_type)
+             .ok()) {
+      return false;
+    }
+    if (original_output_memory_type != const_output_memory_type) {
+      return false;
+    }
+  }
   for (auto edge : edges_to_remove) {
     graph->AddEdge(constant_node, 0, edge->dst(), edge->dst_input());
     graph->RemoveEdge(edge);
-- 
GitLab


From c85998ba9ca005774d81f0f15ee8055f19c6a888 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 00:31:05 -0700
Subject: [PATCH 0786/1357] Fix documentation of ready_for_local_init_op in
 tf.Supervisor, which mentions incorrect default value.

PiperOrigin-RevId: 214731772
---
 tensorflow/python/training/supervisor.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py
index 0755364bbe..a5e626d320 100644
--- a/tensorflow/python/training/supervisor.py
+++ b/tensorflow/python/training/supervisor.py
@@ -242,10 +242,9 @@ class Supervisor(object):
       ready_for_local_init_op: 1-D string `Tensor`.  This tensor is evaluated by
         supervisors in `prepare_or_wait_for_session()` to check if the model is
         ready to run the local_init_op.
-        The model is considered ready if it returns an empty array.  Defaults to
-        the tensor returned from
-        `tf.report_uninitialized_variables(tf.global_variables())`. If `None`,
-        the model is not checked for readiness before running local_init_op.
+        The model is considered ready if it returns an empty array. Defaults to
+        `None`. If `None`, the model is not checked for readiness before running
+        local_init_op.
       is_chief: If True, create a chief supervisor in charge of initializing
         and restoring the model.  If False, create a supervisor that relies
         on a chief supervisor for inits and restore.
-- 
GitLab


From 4e56ea8f9bc398e4cd8bf66abf58cc872c922067 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 00:34:34 -0700
Subject: [PATCH 0787/1357] Add support for explicit fetches when creating
 grappler items

PiperOrigin-RevId: 214732243
---
 .../core/grappler/grappler_item_builder.cc    |  8 +++++--
 .../core/grappler/grappler_item_builder.h     |  2 ++
 .../grappler/grappler_item_builder_test.cc    | 23 +++++++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index 029515ad3c..369046666d 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -192,9 +192,13 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
     const string feed_name = NodeName(feed_node);
     new_item->feed.emplace_back(feed_name, Tensor());
   }
+  for (const auto& fetch_node : cfg.fetch_nodes) {
+    new_item->fetch.emplace_back(NodeName(fetch_node));
+  }
 
-  // Attempt to detect the fetch node(s).
-  if (meta_graph.collection_def().count("train_op") > 0) {
+  // Attempt to detect the fetch node(s) if they were not set explicitly.
+  if (new_item->fetch.empty() &&
+      meta_graph.collection_def().count("train_op") > 0) {
     const CollectionDef& nodes = meta_graph.collection_def().at("train_op");
     if (nodes.has_node_list()) {
       for (const auto& node : nodes.node_list().value()) {
diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h
index aafd2fdcda..1698587f8c 100644
--- a/tensorflow/core/grappler/grappler_item_builder.h
+++ b/tensorflow/core/grappler/grappler_item_builder.h
@@ -49,6 +49,8 @@ struct ItemConfig {
   bool prune_graph = false;
   // Override feed nodes list.
   std::set<string> feed_nodes;
+  // Override fetch nodes list.
+  std::set<string> fetch_nodes;
 };
 
 // Factory method for creating a GrapplerItem from a MetaGraphDef.
diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc
index 4b90bf3038..d00981f174 100644
--- a/tensorflow/core/grappler/grappler_item_builder_test.cc
+++ b/tensorflow/core/grappler/grappler_item_builder_test.cc
@@ -313,6 +313,29 @@ TEST_F(GrapplerItemBuilderTest, FromGraphWithUnknownDimInSignatureInput) {
   EXPECT_EQ(item2->feed[0].second.NumElements(), 1);
 }
 
+TEST_F(GrapplerItemBuilderTest, ExplicitFeedAndFetch) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto x = ops::Const(s.WithOpName("x"), 0);
+  auto y = ops::Const(s.WithOpName("y"), 1);
+  auto z = ops::Add(s.WithOpName("z"), x, y);
+
+  MetaGraphDef meta_graph;
+  TF_CHECK_OK(s.ToGraphDef(meta_graph.mutable_graph_def()));
+
+  ItemConfig config;
+  config.feed_nodes.insert("x");
+  config.fetch_nodes.insert("z");
+
+  std::unique_ptr<GrapplerItem> item =
+      GrapplerItemFromMetaGraphDef("0", meta_graph, config);
+  ASSERT_TRUE(item != nullptr);
+
+  EXPECT_EQ(item->feed.size(), 1);
+  EXPECT_EQ(item->fetch.size(), 1);
+  EXPECT_EQ(item->feed[0].first, "x");
+  EXPECT_EQ(item->fetch[0], "z");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 19d8963bc0ea64e10ff08ad4e7cc76813a182196 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 02:09:02 -0700
Subject: [PATCH 0788/1357] compat: Update forward compatibility horizon to
 2018-09-27

PiperOrigin-RevId: 214741709
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index ce230731b0..88cad5d6d9 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 26)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 27)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From abf26356209cba1ba895a06d9ce55ad01dad7fc6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 06:12:59 -0700
Subject: [PATCH 0789/1357] Update kernel evals to use new kernel signatures.

PiperOrigin-RevId: 214763814
---
 .../contrib/lite/kernels/concatenation.cc     | 39 +++++++++------
 tensorflow/contrib/lite/kernels/gather.cc     | 14 ++++--
 .../contrib/lite/kernels/internal/tensor.h    | 14 +-----
 .../lite/kernels/internal/tensor_ctypes.h     |  4 --
 tensorflow/contrib/lite/kernels/select.cc     | 12 ++---
 tensorflow/contrib/lite/kernels/split.cc      | 27 +++++------
 .../contrib/lite/kernels/strided_slice.cc     | 48 ++++++++-----------
 tensorflow/contrib/lite/kernels/transpose.cc  | 23 ++++-----
 .../contrib/lite/kernels/transpose_test.cc    | 24 +++++-----
 9 files changed, 93 insertions(+), 112 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/concatenation.cc b/tensorflow/contrib/lite/kernels/concatenation.cc
index 25ea556d5a..7ad3399ffd 100644
--- a/tensorflow/contrib/lite/kernels/concatenation.cc
+++ b/tensorflow/contrib/lite/kernels/concatenation.cc
@@ -100,20 +100,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 // allocate and populate these during Prepare().
 // TODO(ycling): Activation function parameter is ignored. For now we dont have
 // a model with a Concatenation with fused activation function.
-#define TF_LITE_CONCATENATION(type, scalar)                                 \
-  VectorOfTensors<scalar> all_inputs(*context, *node->inputs);              \
-  type::Concatenation<FusedActivationFunctionType::kNone, scalar>(          \
-      RemapDim(NumDimensions(output), axis), all_inputs.data(),             \
-      all_inputs.dims(), node->inputs->size, GetTensorData<scalar>(output), \
-      GetTensorDims(output))
-
-#define TF_LITE_CONCATENATION_QUANTIZED(type)                                  \
-  VectorOfQuantizedTensors all_inputs(*context, *node->inputs);                \
-  type::Concatenation(                                                         \
-      RemapDim(NumDimensions(output), axis), all_inputs.data(),                \
-      all_inputs.dims(), all_inputs.zero_point(), all_inputs.scale(),          \
-      node->inputs->size, GetTensorData<uint8>(output), GetTensorDims(output), \
-      output->params.zero_point, output->params.scale)
+#define TF_LITE_CONCATENATION(type, scalar)                                \
+  {                                                                        \
+    VectorOfTensors<scalar> all_inputs(*context, *node->inputs);           \
+    tflite::ConcatenationParams op_params;                                 \
+    op_params.axis = axis;                                                 \
+    op_params.inputs_count = node->inputs->size;                           \
+    type::Concatenation(op_params, all_inputs.shapes(), all_inputs.data(), \
+                        GetTensorShape(output),                            \
+                        GetTensorData<scalar>(output));                    \
+  }
+
+#define TF_LITE_CONCATENATION_QUANTIZED(type)                                 \
+  {                                                                           \
+    VectorOfQuantizedTensors all_inputs(*context, *node->inputs);             \
+    tflite::ConcatenationParams op_params;                                    \
+    op_params.axis = axis;                                                    \
+    op_params.input_zeropoint = all_inputs.zero_point();                      \
+    op_params.input_scale = all_inputs.scale();                               \
+    op_params.inputs_count = node->inputs->size;                              \
+    op_params.output_zeropoint = output->params.zero_point;                   \
+    op_params.output_scale = output->params.scale;                            \
+    type::ConcatenationWithScaling(op_params, all_inputs.shapes(),            \
+                                   all_inputs.data(), GetTensorShape(output), \
+                                   GetTensorData<uint8>(output));             \
+  }
 
   switch (output->type) {  // Already know in/outtypes are same.
     case kTfLiteFloat32:
diff --git a/tensorflow/contrib/lite/kernels/gather.cc b/tensorflow/contrib/lite/kernels/gather.cc
index badd2de11a..b5afeb1a7b 100644
--- a/tensorflow/contrib/lite/kernels/gather.cc
+++ b/tensorflow/contrib/lite/kernels/gather.cc
@@ -84,11 +84,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* positions = GetInput(context, node, kInputPositions);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   const int input_rank = NumDimensions(input);
-#define TF_LITE_GATHER(data_type, index_type)                            \
-  optimized_ops::Gather(                                                 \
-      GetTensorData<data_type>(input), GetTensorDims(input), input_rank, \
-      GetTensorData<index_type>(positions), GetTensorDims(positions),    \
-      GetTensorData<data_type>(output), GetTensorDims(output));
+#define TF_LITE_GATHER(data_type, index_type)                              \
+  {                                                                        \
+    tflite::GatherParams op_params;                                        \
+    op_params.input_rank = input_rank;                                     \
+    optimized_ops::Gather(                                                 \
+        op_params, GetTensorShape(input), GetTensorData<data_type>(input), \
+        GetTensorShape(positions), GetTensorData<index_type>(positions),   \
+        GetTensorShape(output), GetTensorData<data_type>(output));         \
+  }
   switch (input->type) {
     case kTfLiteFloat32:
       TF_LITE_GATHER(float, int32_t);
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h
index f1b08383b0..765c3a03ef 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor.h
@@ -56,23 +56,19 @@ class VectorOfTensors {
     int num_tensors = tensor_list.size;
 
     all_data_.reserve(num_tensors);
-    all_dims_.reserve(num_tensors);
-    all_dims_ptr_.reserve(num_tensors);
     all_shape_.reserve(num_tensors);
     all_shape_ptr_.reserve(num_tensors);
 
     for (int i = 0; i < num_tensors; ++i) {
       TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
       all_data_.push_back(GetTensorData<T>(t));
-      all_dims_.push_back(GetTensorDims(t));
       all_shape_.push_back(GetTensorShape(t));
     }
 
     // Taking the pointer from inside a std::vector is only OK if the vector is
-    // never modified, so we populate all_dims in the previous loop and then we
+    // never modified, so we populate all_shape in the previous loop and then we
     // are free to grab iterators here.
     for (int i = 0; i < num_tensors; ++i) {
-      all_dims_ptr_.push_back(&all_dims_[i]);
       all_shape_ptr_.push_back(&all_shape_[i]);
     }
   }
@@ -82,12 +78,6 @@ class VectorOfTensors {
   //   f[0][1] is the second element of the first tensor.
   T* const* data() const { return all_data_.data(); }
 
-  // Return a pointer the dim pointers of all tensors in the list. For
-  // example:
-  //   const Dims<4>* const* d = v.dims();
-  //   dims[1] are the dimensions of the second tensor in the list.
-  const Dims<4>* const* dims() const { return all_dims_ptr_.data(); }
-
   // Return a pointer the shape pointers of all tensors in the list. For
   // example:
   //   const RuntimeShape* const* d = v.dims();
@@ -96,8 +86,6 @@ class VectorOfTensors {
 
  private:
   std::vector<T*> all_data_;
-  std::vector<Dims<4>> all_dims_;
-  std::vector<Dims<4>*> all_dims_ptr_;
   std::vector<RuntimeShape> all_shape_;
   std::vector<RuntimeShape*> all_shape_ptr_;
 };
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h b/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h
index 77e22a08b4..5e688ce452 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h
@@ -86,10 +86,6 @@ inline const bool* GetTensorData(const TfLiteTensor* tensor) {
   return tensor != nullptr ? tensor->data.b : nullptr;
 }
 
-inline int RemapDim(int max_dimensions, int d) {
-  return max_dimensions - d - 1;
-}
-
 // TODO(ahentz): the implementations in kernels/internal/ take a Dims<4> object
 // even if the original tensors were not 4D. We should consider rewriting them
 // to take a more generic 'shape' object.
diff --git a/tensorflow/contrib/lite/kernels/select.cc b/tensorflow/contrib/lite/kernels/select.cc
index 3959502d91..4780a86ee5 100644
--- a/tensorflow/contrib/lite/kernels/select.cc
+++ b/tensorflow/contrib/lite/kernels/select.cc
@@ -70,12 +70,12 @@ TfLiteStatus SelectEval(TfLiteContext* context, TfLiteNode* node) {
 
   bool is_rank_one = !HaveSameShapes(input_condition, input_x);
 
-#define TF_LITE_SELECT(type, op)                                          \
-  reference_ops::op(GetTensorData<bool>(input_condition),                 \
-                    GetTensorDims(input_condition),                       \
-                    GetTensorData<type>(input_x), GetTensorDims(input_x), \
-                    GetTensorData<type>(input_y), GetTensorDims(input_y), \
-                    GetTensorData<type>(output), GetTensorDims(output));
+#define TF_LITE_SELECT(type, op)                                           \
+  reference_ops::op(GetTensorShape(input_condition),                       \
+                    GetTensorData<bool>(input_condition),                  \
+                    GetTensorShape(input_x), GetTensorData<type>(input_x), \
+                    GetTensorShape(input_y), GetTensorData<type>(input_y), \
+                    GetTensorShape(output), GetTensorData<type>(output));
 
 #define TF_LITE_SWITCH(type, op)                                               \
   switch (type) {                                                              \
diff --git a/tensorflow/contrib/lite/kernels/split.cc b/tensorflow/contrib/lite/kernels/split.cc
index 719e2dc606..dab887bf9c 100644
--- a/tensorflow/contrib/lite/kernels/split.cc
+++ b/tensorflow/contrib/lite/kernels/split.cc
@@ -109,25 +109,24 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   if (axis_value < 0) {
     axis_value += NumDimensions(op_context.input);
   }
-  axis_value = RemapDim(NumDimensions(op_context.input), axis_value);
 
   // TODO(ahentz): Our usage of VectorOfTensors could be optimized by
   // calculating it in Prepare, unless we defer shape calculation.
   // TODO(ahentz): We can improve the optimized_ops version to handle other
   // cases too.
-#define TF_LITE_SPLIT(scalar)                                                  \
-  VectorOfTensors<scalar> all_outputs(*context, *node->outputs);               \
-  if (axis_value == NumDimensions(op_context.input)) {                         \
-    optimized_ops::TensorFlowSplit<FusedActivationFunctionType::kNone,         \
-                                   scalar>(                                    \
-        GetTensorData<scalar>(op_context.input),                               \
-        GetTensorDims(op_context.input), NumOutputs(node), all_outputs.data(), \
-        all_outputs.dims());                                                   \
-  } else {                                                                     \
-    reference_ops::TensorFlowSplit<scalar>(                                    \
-        GetTensorData<scalar>(op_context.input),                               \
-        GetTensorDims(op_context.input), axis_value, NumOutputs(node),         \
-        all_outputs.data(), all_outputs.dims());                               \
+#define TF_LITE_SPLIT(scalar)                                         \
+  VectorOfTensors<scalar> all_outputs(*context, *node->outputs);      \
+  tflite::SplitParams op_params;                                      \
+  op_params.num_split = NumOutputs(node);                             \
+  op_params.axis = axis_value;                                        \
+  if (axis_value == 0) {                                              \
+    optimized_ops::Split(op_params, GetTensorShape(op_context.input), \
+                         GetTensorData<scalar>(op_context.input),     \
+                         all_outputs.shapes(), all_outputs.data());   \
+  } else {                                                            \
+    reference_ops::Split(op_params, GetTensorShape(op_context.input), \
+                         GetTensorData<scalar>(op_context.input),     \
+                         all_outputs.shapes(), all_outputs.data());   \
   }
   switch (op_context.input->type) {
     case kTfLiteFloat32: {
diff --git a/tensorflow/contrib/lite/kernels/strided_slice.cc b/tensorflow/contrib/lite/kernels/strided_slice.cc
index 87ffcc4110..06b36dd196 100644
--- a/tensorflow/contrib/lite/kernels/strided_slice.cc
+++ b/tensorflow/contrib/lite/kernels/strided_slice.cc
@@ -57,17 +57,6 @@ struct StridedSliceContext {
   int dims;
 };
 
-// Reverse order of bits in the mask to match the expected order in kernel
-inline int ReverseMaskBits(int mask, int num_dimensions) {
-  int out = 0;
-  for (int dim = 0; dim < num_dimensions; dim++) {
-    out <<= 1;
-    out += (mask & 1);
-    mask >>= 1;
-  }
-  return out;
-}
-
 // This Op only supports 1-4D cases and since we use the reference 4D
 // implementation, the 1-3D tensors are mapped to 4D.
 const int kMaxDim = 4;
@@ -198,30 +187,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   std::vector<int32_t> stops;
   std::vector<int32_t> strides;
 
-  for (int idx = op_context.dims - 1; idx >= 0; --idx) {
-    starts.emplace_back(GetTensorData<int32_t>(op_context.begin)[idx]);
-    stops.emplace_back(GetTensorData<int32_t>(op_context.end)[idx]);
-    strides.emplace_back(GetTensorData<int32_t>(op_context.strides)[idx]);
-  }
-
   for (int i = op_context.dims; i < kMaxDim; i++) {
     starts.emplace_back(0);
     stops.emplace_back(1);
     strides.emplace_back(1);
   }
 
-  int begin_mask =
-      ReverseMaskBits(op_context.params->begin_mask, op_context.dims);
-  int end_mask = ReverseMaskBits(op_context.params->end_mask, op_context.dims);
-  int shrink_axis_mask =
-      ReverseMaskBits(op_context.params->shrink_axis_mask, op_context.dims);
-
-#define TF_LITE_STRIDED_SLICE(kernel_type, data_type)                          \
-  kernel_type::StridedSlice(                                                   \
-      GetTensorData<data_type>(op_context.input),                              \
-      GetTensorDims(op_context.input), begin_mask, end_mask, shrink_axis_mask, \
-      starts, stops, strides, GetTensorData<data_type>(op_context.output),     \
-      GetTensorDims(op_context.output))
+  for (int idx = 0; idx < op_context.dims; ++idx) {
+    starts.emplace_back(GetTensorData<int32_t>(op_context.begin)[idx]);
+    stops.emplace_back(GetTensorData<int32_t>(op_context.end)[idx]);
+    strides.emplace_back(GetTensorData<int32_t>(op_context.strides)[idx]);
+  }
+
+  int begin_mask = op_context.params->begin_mask << (4 - op_context.dims);
+  int end_mask = op_context.params->end_mask << (4 - op_context.dims);
+  int shrink_axis_mask = op_context.params->shrink_axis_mask
+                         << (4 - op_context.dims);
+  TF_LITE_ENSURE_EQ(context, starts.size(), 4);
+  auto op_params = ::tflite::strided_slice::BuildStridedSliceParams(
+      begin_mask, end_mask, shrink_axis_mask, starts, stops, strides);
+
+#define TF_LITE_STRIDED_SLICE(kernel_type, data_type)                    \
+  kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \
+                            GetTensorData<data_type>(op_context.input),  \
+                            GetTensorShape(op_context.output),           \
+                            GetTensorData<data_type>(op_context.output))
 
   switch (op_context.input->type) {
     case kTfLiteFloat32:
diff --git a/tensorflow/contrib/lite/kernels/transpose.cc b/tensorflow/contrib/lite/kernels/transpose.cc
index 95359962e0..e42a30420b 100644
--- a/tensorflow/contrib/lite/kernels/transpose.cc
+++ b/tensorflow/contrib/lite/kernels/transpose.cc
@@ -92,26 +92,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context));
   }
 
-  // Reverse the permuted axes and convert to 4D due to the way Dims are
-  // constructed in GetTensorDims.
   const int* perm_data = GetTensorData<int32_t>(op_context.perm);
   const int size = op_context.perm->dims->data[0];
-  const int kOutputDimensionNum = 4;
-  int reversed_perm[kOutputDimensionNum];
-
-  for (int output_k = 0, input_k = size - 1; output_k < size;
-       ++output_k, --input_k) {
-    reversed_perm[output_k] = size - perm_data[input_k] - 1;
-  }
-  for (int k = size; k < kOutputDimensionNum; ++k) {
-    reversed_perm[k] = k;
+  TransposeParams params;
+  params.perm_count = size;
+  for (int i = 0; i < size; ++i) {
+    params.perm[i] = perm_data[i];
   }
 
 #define TF_LITE_TRANSPOSE(type, scalar)                     \
-  type::Transpose(GetTensorData<scalar>(op_context.input),  \
-                  GetTensorDims(op_context.input),          \
-                  GetTensorData<scalar>(op_context.output), \
-                  GetTensorDims(op_context.output), reversed_perm)
+  type::Transpose(params, GetTensorShape(op_context.input), \
+                  GetTensorData<scalar>(op_context.input),  \
+                  GetTensorShape(op_context.output),        \
+                  GetTensorData<scalar>(op_context.output))
 
   switch (op_context.input->type) {
     case kTfLiteFloat32:
diff --git a/tensorflow/contrib/lite/kernels/transpose_test.cc b/tensorflow/contrib/lite/kernels/transpose_test.cc
index 337bc144b9..79ef0a7c56 100644
--- a/tensorflow/contrib/lite/kernels/transpose_test.cc
+++ b/tensorflow/contrib/lite/kernels/transpose_test.cc
@@ -51,21 +51,21 @@ void RunTestPermutation(const std::vector<int>& shape,
     reversed_perms[k] = k;
   }
 
-  // Make input and output dims (i.e. reversed shape and dest_shape).
-  Dims<4> input_dims = GetTensorDims(shape);
-  Dims<4> output_dims;
-  for (int i = 0; i < 4; i++) {
-    output_dims.sizes[i] = input_dims.sizes[reversed_perms[i]];
+  // Make input and output shapes.
+  const RuntimeShape input_shape = GetTensorShape(shape);
+  RuntimeShape output_shape(perms.size());
+  for (int i = 0; i < perms.size(); i++) {
+    output_shape.SetDim(i, input_shape.Dims(perms[i]));
   }
-  output_dims.strides[0] = 1;
-  for (int k = 1; k < 4; k++) {
-    output_dims.strides[k] =
-        output_dims.strides[k - 1] * output_dims.sizes[k - 1];
+
+  TransposeParams params;
+  params.perm_count = perms.size();
+  for (int i = 0; i < perms.size(); ++i) {
+    params.perm[i] = perms[i];
   }
 
-  reference_ops::Transpose<float>(input.data(), input_dims,
-                                  input_transposed->data(), output_dims,
-                                  reversed_perms);
+  reference_ops::Transpose<float>(params, input_shape, input.data(),
+                                  output_shape, input_transposed->data());
 }
 
 TEST(TransposeTest, TestRefOps1D) {
-- 
GitLab


From 77e2686a2958eb76e0164828d5d536b86c72464b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 06:19:58 -0700
Subject: [PATCH 0790/1357] Reduce the size of
 //tensorflow/tools/pip_package:simple_console_windows

This change reduce the size of //tensorflow/tools/pip_package:simple_console_windows's zip file from 1000027677 bytes to 47690474 bytes for a CPU build. For GPU build, it will avoid going over 4GB when multiple CUDA compatibility are specified.

To fix #22390

PiperOrigin-RevId: 214764423
---
 configure.py                                  |  8 +++++++
 .../contrib/eager/python/examples/gan/BUILD   |  1 +
 .../python/examples/linear_regression/BUILD   |  1 +
 .../eager/python/examples/rnn_colorbot/BUILD  |  1 +
 .../eager/python/examples/rnn_ptb/BUILD       |  1 +
 tensorflow/contrib/lite/toco/python/BUILD     |  1 +
 tensorflow/contrib/timeseries/examples/BUILD  |  1 +
 tensorflow/python/debug/BUILD                 |  1 +
 tensorflow/python/tools/BUILD                 |  8 +++++++
 tensorflow/tensorflow.bzl                     | 23 +++++++++++++++++++
 tensorflow/tools/dist_test/server/BUILD       |  1 +
 11 files changed, 47 insertions(+)

diff --git a/configure.py b/configure.py
index f42da2cbb1..3fcaaa9d0e 100644
--- a/configure.py
+++ b/configure.py
@@ -1440,6 +1440,14 @@ def set_windows_build_flags(environ_cp):
   # TODO(pcloudy): Remove this flag when upgrading Bazel to 0.16.0
   # Short object file path will be enabled by default.
   write_to_bazelrc('build --experimental_shortened_obj_file_path=true')
+  # When building zip file for some py_binary and py_test targets, don't
+  # include its dependencies. This is for:
+  #   1. Running python tests against the system installed TF pip package.
+  #   2. Avoiding redundant files in
+  #      //tensorflow/tools/pip_package:simple_console_windows,
+  #      which is a py_binary used during creating TF pip package.
+  #      See https://github.com/tensorflow/tensorflow/issues/22390
+  write_to_bazelrc('build --define=no_tensorflow_py_deps=true')
 
   if get_var(
       environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline',
diff --git a/tensorflow/contrib/eager/python/examples/gan/BUILD b/tensorflow/contrib/eager/python/examples/gan/BUILD
index c61ec2dbae..d64c8eb9ce 100644
--- a/tensorflow/contrib/eager/python/examples/gan/BUILD
+++ b/tensorflow/contrib/eager/python/examples/gan/BUILD
@@ -3,6 +3,7 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//tensorflow:internal"])
 
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 py_binary(
     name = "mnist",
diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD
index 2f6cfdf31e..74ce9e84f0 100644
--- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD
+++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD
@@ -3,6 +3,7 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//tensorflow:internal"])
 
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 py_binary(
     name = "linear_regression",
diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD b/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD
index f83eb5c476..d500b632eb 100644
--- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD
+++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD
@@ -3,6 +3,7 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//tensorflow:internal"])
 
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 py_binary(
     name = "rnn_colorbot",
diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD b/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD
index 4b4792cd49..2cc2fcbfeb 100644
--- a/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD
+++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD
@@ -3,6 +3,7 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//tensorflow:internal"])
 
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 py_binary(
     name = "rnn_ptb",
diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD
index 33c5b16462..cf97ba7084 100644
--- a/tensorflow/contrib/lite/toco/python/BUILD
+++ b/tensorflow/contrib/lite/toco/python/BUILD
@@ -4,6 +4,7 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 cc_library(
     name = "toco_python_api",
diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD
index 21c0c30c19..57797214d1 100644
--- a/tensorflow/contrib/timeseries/examples/BUILD
+++ b/tensorflow/contrib/timeseries/examples/BUILD
@@ -1,4 +1,5 @@
 load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 package(
     default_visibility = ["//tensorflow:internal"],
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 849d165bfa..e84482d2b2 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -18,6 +18,7 @@ exports_files(["LICENSE"])
 
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 load("//tensorflow:tensorflow.bzl", "if_not_windows")
 
 py_library(
diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD
index 75824d83e6..384c7a82d2 100644
--- a/tensorflow/python/tools/BUILD
+++ b/tensorflow/python/tools/BUILD
@@ -8,6 +8,7 @@ licenses(["notice"])  # Apache 2.0
 exports_files(["LICENSE"])
 
 load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 # Transitive dependencies of this target will be included in the pip package.
 py_library(
@@ -21,6 +22,13 @@ py_library(
         ":saved_model_cli",
         ":saved_model_utils",
         ":strip_unused",
+        # The following py_library are needed because
+        # py_binary may not depend on them when --define=no_tensorflow_py_deps=true
+        # is specified. See https://github.com/tensorflow/tensorflow/issues/22390
+        ":freeze_graph_lib",
+        ":optimize_for_inference_lib",
+        ":selective_registration_header_lib",
+        ":strip_unused_lib",
     ],
 )
 
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index d6c75d675c..8f8bfadf78 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1693,6 +1693,29 @@ register_extension_info(
     label_regex_for_dep = "{extension_name}",
 )
 
+# Similar to py_test above, this macro is used to exclude dependencies for some py_binary
+# targets in order to reduce the size of //tensorflow/tools/pip_package:simple_console_windows.
+# See https://github.com/tensorflow/tensorflow/issues/22390
+def py_binary(name, deps = [], **kwargs):
+    # Add an extra target for dependencies to avoid nested select statement.
+    native.py_library(
+        name = name + "_deps",
+        deps = deps,
+    )
+    native.py_binary(
+        name = name,
+        deps = select({
+            "//conditions:default": [":" + name + "_deps"],
+            clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
+        }),
+        **kwargs
+    )
+
+register_extension_info(
+    extension_name = "py_binary",
+    label_regex_for_dep = "{extension_name}",
+)
+
 def tf_py_test(
         name,
         srcs,
diff --git a/tensorflow/tools/dist_test/server/BUILD b/tensorflow/tools/dist_test/server/BUILD
index 003a19a9ab..3aa53a5615 100644
--- a/tensorflow/tools/dist_test/server/BUILD
+++ b/tensorflow/tools/dist_test/server/BUILD
@@ -8,6 +8,7 @@ licenses(["notice"])  # Apache 2.0
 exports_files(["LICENSE"])
 
 load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_binary")
 
 py_binary(
     name = "grpc_tensorflow_server",
-- 
GitLab


From 234229b014cb0cfe4bf8e9466db79d596085faba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 06:50:20 -0700
Subject: [PATCH 0791/1357] Update logic used in get_variable to populate
 custom_getter's kwargs.

The new implementation ensures that the 'constraints' kwarg is propagated by customer getters whose signature includes a keyworded, variable length argument dictionary, as well as those explicitly including the 'constraints' argument.

PiperOrigin-RevId: 214767296
---
 tensorflow/python/ops/variable_scope.py       |  6 +-
 tensorflow/python/util/function_utils.py      | 23 +++++
 tensorflow/python/util/function_utils_test.py | 87 +++++++++++++++++++
 3 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index a43676cd70..562e1ad6cb 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -515,8 +515,10 @@ class _VariableStore(object):
           "synchronization": synchronization,
           "aggregation": aggregation,
       }
-      # `fn_args` can handle functions, `functools.partial`, `lambda`.
-      if "constraint" in function_utils.fn_args(custom_getter):
+      # `fn_args` and `has_kwargs` can handle functions, `functools.partial`,
+      # `lambda`.
+      if ("constraint" in function_utils.fn_args(custom_getter) or
+          function_utils.has_kwargs(custom_getter)):
         custom_getter_kwargs["constraint"] = constraint
       return custom_getter(**custom_getter_kwargs)
     else:
diff --git a/tensorflow/python/util/function_utils.py b/tensorflow/python/util/function_utils.py
index 4e9b07e20a..a56dfbff8e 100644
--- a/tensorflow/python/util/function_utils.py
+++ b/tensorflow/python/util/function_utils.py
@@ -59,6 +59,29 @@ def fn_args(fn):
   return tuple(args)
 
 
+def has_kwargs(fn):
+  """Returns whether the passed callable has **kwargs in its signature.
+
+  Args:
+    fn: Function, or function-like object (e.g., result of `functools.partial`).
+
+  Returns:
+    `bool`: if `fn` has **kwargs in its signature.
+
+  Raises:
+     `TypeError`: If fn is not a Function, or function-like object.
+  """
+  if isinstance(fn, functools.partial):
+    fn = fn.func
+  elif _is_callable_object(fn):
+    fn = fn.__call__
+  elif not callable(fn):
+    raise TypeError(
+        'fn should be a function-like object, but is of type {}.'.format(
+            type(fn)))
+  return tf_inspect.getfullargspec(fn).varkw is not None
+
+
 def get_func_name(func):
   """Returns name of passed callable."""
   _, func = tf_decorator.unwrap(func)
diff --git a/tensorflow/python/util/function_utils_test.py b/tensorflow/python/util/function_utils_test.py
index 1588328c26..ce768637f5 100644
--- a/tensorflow/python/util/function_utils_test.py
+++ b/tensorflow/python/util/function_utils_test.py
@@ -135,6 +135,93 @@ class FnArgsTest(test.TestCase):
     self.assertEqual(3, double_wrapped_fn(a=3))
 
 
+class HasKwargsTest(test.TestCase):
+
+  def test_simple_function(self):
+
+    fn_has_kwargs = lambda **x: x
+    self.assertTrue(function_utils.has_kwargs(fn_has_kwargs))
+
+    fn_has_no_kwargs = lambda x: x
+    self.assertFalse(function_utils.has_kwargs(fn_has_no_kwargs))
+
+  def test_callable(self):
+
+    class FooHasKwargs(object):
+
+      def __call__(self, **x):
+        del x
+    self.assertTrue(function_utils.has_kwargs(FooHasKwargs()))
+
+    class FooHasNoKwargs(object):
+
+      def __call__(self, x):
+        del x
+    self.assertFalse(function_utils.has_kwargs(FooHasNoKwargs()))
+
+  def test_bounded_method(self):
+
+    class FooHasKwargs(object):
+
+      def fn(self, **x):
+        del x
+    self.assertTrue(function_utils.has_kwargs(FooHasKwargs().fn))
+
+    class FooHasNoKwargs(object):
+
+      def fn(self, x):
+        del x
+    self.assertFalse(function_utils.has_kwargs(FooHasNoKwargs().fn))
+
+  def test_partial_function(self):
+    expected_test_arg = 123
+
+    def fn_has_kwargs(test_arg, **x):
+      if test_arg != expected_test_arg:
+        return ValueError('partial fn does not work correctly')
+      return x
+
+    wrapped_fn = functools.partial(fn_has_kwargs, test_arg=123)
+    self.assertTrue(function_utils.has_kwargs(wrapped_fn))
+
+    def fn_has_no_kwargs(x, test_arg):
+      if test_arg != expected_test_arg:
+        return ValueError('partial fn does not work correctly')
+      return x
+
+    wrapped_fn = functools.partial(fn_has_no_kwargs, test_arg=123)
+    self.assertFalse(function_utils.has_kwargs(wrapped_fn))
+
+  def test_double_partial(self):
+    expected_test_arg1 = 123
+    expected_test_arg2 = 456
+
+    def fn_has_kwargs(test_arg1, test_arg2, **x):
+      if test_arg1 != expected_test_arg1 or test_arg2 != expected_test_arg2:
+        return ValueError('partial does not work correctly')
+      return x
+
+    wrapped_fn = functools.partial(fn_has_kwargs, test_arg2=456)
+    double_wrapped_fn = functools.partial(wrapped_fn, test_arg1=123)
+
+    self.assertTrue(function_utils.has_kwargs(double_wrapped_fn))
+
+    def fn_has_no_kwargs(x, test_arg1, test_arg2):
+      if test_arg1 != expected_test_arg1 or test_arg2 != expected_test_arg2:
+        return ValueError('partial does not work correctly')
+      return x
+
+    wrapped_fn = functools.partial(fn_has_no_kwargs, test_arg2=456)
+    double_wrapped_fn = functools.partial(wrapped_fn, test_arg1=123)
+
+    self.assertFalse(function_utils.has_kwargs(double_wrapped_fn))
+
+  def test_raises_type_error(self):
+    with self.assertRaisesRegexp(
+        TypeError, 'fn should be a function-like object'):
+      function_utils.has_kwargs('not a function')
+
+
 class GetFuncNameTest(test.TestCase):
 
   def testWithSimpleFunction(self):
-- 
GitLab


From 3d30dd424c0404ea5349c0d2acdde2acd4e0aa97 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 06:54:10 -0700
Subject: [PATCH 0792/1357] Update kernel evals to use new kernel signatures.

PiperOrigin-RevId: 214767788
---
 .../contrib/lite/kernels/activations.cc       | 113 +++++++++++-------
 .../contrib/lite/kernels/comparisons.cc       |  51 ++++----
 tensorflow/contrib/lite/kernels/dequantize.cc |  14 +--
 tensorflow/contrib/lite/kernels/div.cc        |  27 +++--
 tensorflow/contrib/lite/kernels/fake_quant.cc |  13 +-
 .../internal/logsoftmax_quantized_test.cc     |  32 +++--
 .../internal/softmax_quantized_test.cc        |  28 +++--
 .../contrib/lite/kernels/log_softmax_test.cc  |   5 +-
 .../contrib/lite/kernels/softmax_test.cc      |  12 +-
 .../contrib/lite/kernels/sparse_to_dense.cc   |   5 +-
 10 files changed, 180 insertions(+), 120 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc
index b2d9b84979..cf9441aee3 100644
--- a/tensorflow/contrib/lite/kernels/activations.cc
+++ b/tensorflow/contrib/lite/kernels/activations.cc
@@ -348,18 +348,22 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     } break;
     case kTfLiteInt16: {
-      optimized_ops::Tanh(GetTensorData<int16_t>(input), GetTensorShape(input),
-                          data->input_left_shift,
-                          GetTensorData<int16_t>(output),
-                          GetTensorShape(output));
+      TanhParams params;
+      params.input_left_shift = data->input_left_shift;
+      optimized_ops::Tanh(params, GetTensorShape(input),
+                          GetTensorData<int16_t>(input), GetTensorShape(output),
+                          GetTensorData<int16_t>(output));
       return kTfLiteOk;
     } break;
     case kTfLiteUInt8: {
-      optimized_ops::Tanh(GetTensorData<uint8_t>(input), GetTensorShape(input),
-                          input->params.zero_point, data->input_range_radius,
-                          data->input_multiplier, data->input_left_shift,
-                          GetTensorData<uint8_t>(output),
-                          GetTensorShape(output));
+      TanhParams params;
+      params.input_zero_point = input->params.zero_point;
+      params.input_range_radius = data->input_range_radius;
+      params.input_multiplier = data->input_multiplier;
+      params.input_left_shift = data->input_left_shift;
+      optimized_ops::Tanh(params, GetTensorShape(input),
+                          GetTensorData<uint8_t>(input), GetTensorShape(output),
+                          GetTensorData<uint8_t>(output));
       return kTfLiteOk;
     } break;
     default:
@@ -385,17 +389,21 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
       break;
     }
     case kTfLiteInt16: {
+      LogisticParams params;
       optimized_ops::Logistic(
-          GetTensorData<int16>(input), GetTensorShape(input),
-          GetTensorData<int16_t>(output), GetTensorShape(output));
+          params, GetTensorShape(input), GetTensorData<int16_t>(input),
+          GetTensorShape(output), GetTensorData<int16_t>(output));
       break;
     }
     case kTfLiteUInt8: {
+      LogisticParams params;
+      params.input_zero_point = input->params.zero_point;
+      params.input_range_radius = data->input_range_radius;
+      params.input_multiplier = data->input_multiplier;
+      params.input_left_shift = data->input_left_shift;
       optimized_ops::Logistic(
-          GetTensorData<uint8_t>(input), GetTensorShape(input),
-          input->params.zero_point, data->input_range_radius,
-          data->input_multiplier, data->input_left_shift,
-          GetTensorData<uint8_t>(output), GetTensorShape(output));
+          params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+          GetTensorShape(output), GetTensorData<uint8_t>(output));
       break;
     }
     default:
@@ -459,11 +467,13 @@ void Softmax3DFloat(const TfLiteTensor* input, TfLiteTensor* output,
   const int batch_size = input->dims->data[0];
   const int intermediate_size = input->dims->data[1];
   const int input_size = input->dims->data[2];
+  SoftmaxParams op_params;
+  op_params.beta = params->beta;
   optimized_ops::Softmax(
+      op_params, GetTensorShape({batch_size, intermediate_size, 1, input_size}),
       GetTensorData<float>(input),
       GetTensorShape({batch_size, intermediate_size, 1, input_size}),
-      params->beta, GetTensorData<float>(output),
-      GetTensorShape({batch_size, intermediate_size, 1, input_size}));
+      GetTensorData<float>(output));
 }
 
 void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
@@ -473,10 +483,14 @@ void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
   // tensor is 4D in a special way. We will convert a (Y) shape into a (1,
   // 1, 1, Y) shape.
   const int input_size = input->dims->data[0];
-  optimized_ops::Softmax(
-      GetTensorData<uint8_t>(input), GetTensorShape({1, 1, 1, input_size}),
-      data->input_multiplier, data->input_left_shift, data->diff_min,
-      GetTensorData<uint8_t>(output), GetTensorShape({1, 1, 1, input_size}));
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  optimized_ops::Softmax(op_params, GetTensorShape({1, 1, 1, input_size}),
+                         GetTensorData<uint8_t>(input),
+                         GetTensorShape({1, 1, 1, input_size}),
+                         GetTensorData<uint8_t>(output));
 }
 void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
                         TfLiteSoftmaxParams* params, OpData* data) {
@@ -486,11 +500,15 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
   // 1, 1, Y) shape.
   const int batch_size = input->dims->data[0];
   const int input_size = input->dims->data[1];
-  optimized_ops::Softmax(GetTensorData<uint8_t>(input),
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  optimized_ops::Softmax(op_params,
+                         GetTensorShape({batch_size, 1, 1, input_size}),
+                         GetTensorData<uint8_t>(input),
                          GetTensorShape({batch_size, 1, 1, input_size}),
-                         data->input_multiplier, data->input_left_shift,
-                         data->diff_min, GetTensorData<uint8_t>(output),
-                         GetTensorShape({batch_size, 1, 1, input_size}));
+                         GetTensorData<uint8_t>(output));
 }
 
 void Softmax3DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
@@ -498,28 +516,36 @@ void Softmax3DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
   const int batch_size = input->dims->data[0];
   const int intermediate_size = input->dims->data[1];
   const int input_size = input->dims->data[2];
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
   optimized_ops::Softmax(
+      op_params, GetTensorShape({batch_size, intermediate_size, 1, input_size}),
       GetTensorData<uint8_t>(input),
       GetTensorShape({batch_size, intermediate_size, 1, input_size}),
-      data->input_multiplier, data->input_left_shift, data->diff_min,
-      GetTensorData<uint8_t>(output),
-      GetTensorShape({batch_size, intermediate_size, 1, input_size}));
+      GetTensorData<uint8_t>(output));
 }
 
 // Takes a 4D tensor and perform softmax along the forth dimension.
 void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
                     TfLiteSoftmaxParams* params) {
-  optimized_ops::Softmax(GetTensorData<float>(input), GetTensorShape(input),
-                         params->beta, GetTensorData<float>(output),
-                         GetTensorShape(output));
+  SoftmaxParams op_params;
+  op_params.beta = params->beta;
+  optimized_ops::Softmax(op_params, GetTensorShape(input),
+                         GetTensorData<float>(input), GetTensorShape(output),
+                         GetTensorData<float>(output));
 }
 
 void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
                         TfLiteSoftmaxParams* params, OpData* data) {
-  optimized_ops::Softmax(GetTensorData<uint8_t>(input), GetTensorShape(input),
-                         data->input_multiplier, data->input_left_shift,
-                         data->diff_min, GetTensorData<uint8_t>(output),
-                         GetTensorShape(output));
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  optimized_ops::Softmax(op_params, GetTensorShape(input),
+                         GetTensorData<uint8_t>(input), GetTensorShape(output),
+                         GetTensorData<uint8_t>(output));
 }
 
 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
@@ -591,17 +617,20 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* output = GetOutput(context, node, 0);
   switch (input->type) {
     case kTfLiteFloat32:
+      SoftmaxParams op_params;
       optimized_ops::LogSoftmax(
-          GetTensorData<float>(input), GetTensorShape(input),
-          GetTensorData<float>(output), GetTensorShape(output));
+          op_params, GetTensorShape(input), GetTensorData<float>(input),
+          GetTensorShape(output), GetTensorData<float>(output));
       return kTfLiteOk;
     case kTfLiteUInt8:
+      op_params.input_multiplier = data->input_multiplier;
+      op_params.input_left_shift = data->input_left_shift;
+      op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
+      op_params.reverse_scaling_right_shift = data->reverse_scaling_right_shift;
+      op_params.diff_min = data->diff_min;
       optimized_ops::LogSoftmax(
-          GetTensorData<uint8_t>(input), GetTensorShape(input),
-          data->input_multiplier, data->input_left_shift,
-          data->reverse_scaling_divisor, data->reverse_scaling_right_shift,
-          data->diff_min, GetTensorData<uint8_t>(output),
-          GetTensorShape(output));
+          op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+          GetTensorShape(output), GetTensorData<uint8_t>(output));
       return kTfLiteOk;
     default:
       context->ReportError(context, "Only float32 supported currently., got %d",
diff --git a/tensorflow/contrib/lite/kernels/comparisons.cc b/tensorflow/contrib/lite/kernels/comparisons.cc
index 4cd96348a2..f765235e04 100644
--- a/tensorflow/contrib/lite/kernels/comparisons.cc
+++ b/tensorflow/contrib/lite/kernels/comparisons.cc
@@ -83,20 +83,24 @@ TfLiteStatus ComparisonPrepare(TfLiteContext* context, TfLiteNode* node) {
       QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier,              \
                                           &input2_multiplier, &input2_shift);  \
                                                                                \
+      ComparisonParams op_params;                                              \
+      op_params.left_shift = left_shift;                                       \
+      op_params.input1_offset = input1_offset;                                 \
+      op_params.input1_multiplier = input1_multiplier;                         \
+      op_params.input1_shift = -input1_shift;                                  \
+      op_params.input2_offset = input2_offset;                                 \
+      op_params.input2_multiplier = input2_multiplier;                         \
+      op_params.input2_shift = -input2_shift;                                  \
       if (requires_broadcast) {                                                \
-        reference_ops::Broadcast##opname(                                      \
-            left_shift, GetTensorData<uint8_t>(input1), GetTensorDims(input1), \
-            input1_offset, input1_multiplier, input1_shift,                    \
-            GetTensorData<uint8_t>(input2), GetTensorDims(input2),             \
-            input2_offset, input2_multiplier, input2_shift,                    \
-            GetTensorData<bool>(output), GetTensorDims(output));               \
+        reference_ops::Broadcast4DSlow##opname##WithScaling(                   \
+            op_params, GetTensorShape(input1), GetTensorData<uint8_t>(input1), \
+            GetTensorShape(input2), GetTensorData<uint8_t>(input2),            \
+            GetTensorShape(output), GetTensorData<bool>(output));              \
       } else {                                                                 \
-        reference_ops::opname(                                                 \
-            left_shift, GetTensorData<uint8_t>(input1), GetTensorDims(input1), \
-            input1_offset, input1_multiplier, input1_shift,                    \
-            GetTensorData<uint8_t>(input2), GetTensorDims(input2),             \
-            input2_offset, input2_multiplier, input2_shift,                    \
-            GetTensorData<bool>(output), GetTensorDims(output));               \
+        reference_ops::opname##WithScaling(                                    \
+            op_params, GetTensorShape(input1), GetTensorData<uint8_t>(input1), \
+            GetTensorShape(input2), GetTensorData<uint8_t>(input2),            \
+            GetTensorShape(output), GetTensorData<bool>(output));              \
       }                                                                        \
     }                                                                          \
   }
@@ -108,16 +112,19 @@ TF_LITE_QUANTIZE_COMPARISON(Less);
 TF_LITE_QUANTIZE_COMPARISON(LessEqual);
 #undef TF_LITE_QUANTIZE_COMPARISON
 
-#define TF_LITE_COMPARISON(type, opname, requires_broadcast)    \
-  requires_broadcast                                            \
-      ? reference_ops::Broadcast##opname(                       \
-            GetTensorData<type>(input1), GetTensorDims(input1), \
-            GetTensorData<type>(input2), GetTensorDims(input2), \
-            GetTensorData<bool>(output), GetTensorDims(output)) \
-      : reference_ops::opname(                                  \
-            GetTensorData<type>(input1), GetTensorDims(input1), \
-            GetTensorData<type>(input2), GetTensorDims(input2), \
-            GetTensorData<bool>(output), GetTensorDims(output));
+#define TF_LITE_COMPARISON(type, opname, requires_broadcast)                  \
+  {                                                                           \
+    ComparisonParams op_params;                                               \
+    requires_broadcast                                                        \
+        ? reference_ops::Broadcast4DSlow##opname##NoScaling(                  \
+              op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
+              GetTensorShape(input2), GetTensorData<type>(input2),            \
+              GetTensorShape(output), GetTensorData<bool>(output))            \
+        : reference_ops::opname##NoScaling(                                   \
+              op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
+              GetTensorShape(input2), GetTensorData<type>(input2),            \
+              GetTensorShape(output), GetTensorData<bool>(output));           \
+  }
 
 TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
diff --git a/tensorflow/contrib/lite/kernels/dequantize.cc b/tensorflow/contrib/lite/kernels/dequantize.cc
index 3a08f48b00..59bf64e0af 100644
--- a/tensorflow/contrib/lite/kernels/dequantize.cc
+++ b/tensorflow/contrib/lite/kernels/dequantize.cc
@@ -77,13 +77,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     return kTfLiteOk;
   }
 
-  auto zero_point = op_context.input->params.zero_point;
-  auto scale = op_context.input->params.scale;
-
-  optimized_ops::Dequantize(GetTensorData<uint8_t>(op_context.input),
-                            GetTensorDims(op_context.input), zero_point, scale,
-                            GetTensorData<float>(op_context.output),
-                            GetTensorDims(op_context.output));
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = op_context.input->params.zero_point;
+  op_params.scale = op_context.input->params.scale;
+  optimized_ops::Dequantize(op_params, GetTensorShape(op_context.input),
+                            GetTensorData<uint8_t>(op_context.input),
+                            GetTensorShape(op_context.output),
+                            GetTensorData<float>(op_context.output));
 
   if (IsConstantTensor(op_context.input)) {
     op_data->float_dequantized_weights_initialized = true;
diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc
index 7945c095b1..8d4bb51006 100644
--- a/tensorflow/contrib/lite/kernels/div.cc
+++ b/tensorflow/contrib/lite/kernels/div.cc
@@ -81,24 +81,27 @@ template <KernelType kernel_type>
 void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
              const OpData* data, const TfLiteTensor* input1,
              const TfLiteTensor* input2, TfLiteTensor* output) {
-#define TF_LITE_DIV(type, opname, data_type)                            \
-  data_type output_activation_min, output_activation_max;               \
-  CalculateActivationRange(params->activation, &output_activation_min,  \
-                           &output_activation_max);                     \
-  type::opname(GetTensorData<data_type>(input1), GetTensorDims(input1), \
-               GetTensorData<data_type>(input2), GetTensorDims(input2), \
-               output_activation_min, output_activation_max,            \
-               GetTensorData<data_type>(output), GetTensorDims(output))
+#define TF_LITE_DIV(type, opname, data_type)                             \
+  tflite::ArithmeticParams op_params;                                    \
+  data_type output_activation_min, output_activation_max;                \
+  CalculateActivationRange(params->activation, &output_activation_min,   \
+                           &output_activation_max);                      \
+  SetActivationParams(output_activation_min, output_activation_max,      \
+                      &op_params);                                       \
+  type::opname(op_params, GetTensorShape(input1),                        \
+               GetTensorData<data_type>(input1), GetTensorShape(input2), \
+               GetTensorData<data_type>(input2), GetTensorShape(output), \
+               GetTensorData<data_type>(output))
   if (output->type == kTfLiteInt32) {
     if (kernel_type == kReference) {
       if (data->requires_broadcast) {
-        TF_LITE_DIV(reference_ops, BroadcastDiv, int32_t);
+        TF_LITE_DIV(reference_ops, BroadcastDiv4DSlow, int32_t);
       } else {
         TF_LITE_DIV(reference_ops, Div, int32_t);
       }
     } else {
       if (data->requires_broadcast) {
-        TF_LITE_DIV(optimized_ops, BroadcastDiv, int32_t);
+        TF_LITE_DIV(optimized_ops, BroadcastDiv4DSlow, int32_t);
       } else {
         TF_LITE_DIV(optimized_ops, Div, int32_t);
       }
@@ -106,13 +109,13 @@ void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
   } else if (output->type == kTfLiteFloat32) {
     if (kernel_type == kReference) {
       if (data->requires_broadcast) {
-        TF_LITE_DIV(reference_ops, BroadcastDiv, float);
+        TF_LITE_DIV(reference_ops, BroadcastDiv4DSlow, float);
       } else {
         TF_LITE_DIV(reference_ops, Div, float);
       }
     } else {
       if (data->requires_broadcast) {
-        TF_LITE_DIV(optimized_ops, BroadcastDiv, float);
+        TF_LITE_DIV(optimized_ops, BroadcastDiv4DSlow, float);
       } else {
         TF_LITE_DIV(optimized_ops, Div, float);
       }
diff --git a/tensorflow/contrib/lite/kernels/fake_quant.cc b/tensorflow/contrib/lite/kernels/fake_quant.cc
index f9bc3747cb..b51af72fe6 100644
--- a/tensorflow/contrib/lite/kernels/fake_quant.cc
+++ b/tensorflow/contrib/lite/kernels/fake_quant.cc
@@ -68,11 +68,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params =
       reinterpret_cast<TfLiteFakeQuantParams*>(node->builtin_data);
 
-  reference_ops::FakeQuant(GetTensorData<float>(op_context.input),
-                           GetTensorDims(op_context.input), params->min,
-                           params->max, params->num_bits,
-                           GetTensorData<float>(op_context.output),
-                           GetTensorDims(op_context.output));
+  tflite::FakeQuantParams op_params;
+  op_params.num_bits = params->num_bits;
+  op_params.minmax.min = params->min;
+  op_params.minmax.max = params->max;
+  reference_ops::FakeQuant(op_params, GetTensorShape(op_context.input),
+                           GetTensorData<float>(op_context.input),
+                           GetTensorShape(op_context.output),
+                           GetTensorData<float>(op_context.output));
 
   return kTfLiteOk;
 }
diff --git a/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc
index 3624c20ae3..2252ca1bcc 100644
--- a/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc
@@ -43,11 +43,15 @@ void RunLogSoftmaxFloatReference(const uint8* input_data,
 
   // Reference data generated via Dequant of input into float, and then applying
   // float LogSoftmax.
-  reference_ops::Dequantize(
-      input_data, ToRuntimeDims(shape_common), input_offset, input_scale,
-      reference_dequant_data.data(), ToRuntimeDims(shape_common));
-  optimized_ops::LogSoftmax(reference_dequant_data.data(), shape_common,
-                            reference_output_float_data.data(), shape_common);
+  DequantizationParams dq_params;
+  dq_params.zero_point = input_offset;
+  dq_params.scale = input_scale;
+  reference_ops::Dequantize(dq_params, shape_common, input_data, shape_common,
+                            reference_dequant_data.data());
+  SoftmaxParams sm_params;
+  optimized_ops::LogSoftmax(sm_params, shape_common,
+                            reference_dequant_data.data(), shape_common,
+                            reference_output_float_data.data());
   // Work with quantized scaling for LogSoftmax, under which 255 represents 0,
   // and -16 gets nudged up to 0.
   for (int i = 0; i < ref_buffer_size; i++) {
@@ -129,14 +133,16 @@ void RunOneLogSoftmaxTest(const uint8* input_data,
   const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
                                                      input_beta_left_shift);
 
-  optimized_ops::LogSoftmax(input_data, shape_common, input_beta_multiplier,
-                            input_beta_left_shift, reverse_scaling_divisor,
-                            reverse_scaling_right_shift, diff_min,
-                            optimized_logsoftmax_output.data(), shape_common);
-  reference_ops::LogSoftmax(
-      input_data, shape_common, input_beta_multiplier, input_beta_left_shift,
-      reverse_scaling_divisor, reverse_scaling_right_shift, diff_min,
-      reference_quant_logsoftmax_output.data(), shape_common);
+  SoftmaxParams params;
+  params.input_multiplier = input_beta_multiplier;
+  params.input_left_shift = input_beta_left_shift;
+  params.reverse_scaling_divisor = reverse_scaling_divisor;
+  params.reverse_scaling_right_shift = reverse_scaling_right_shift;
+  params.diff_min = diff_min;
+  optimized_ops::LogSoftmax(params, shape_common, input_data, shape_common,
+                            optimized_logsoftmax_output.data());
+  reference_ops::LogSoftmax(params, shape_common, input_data, shape_common,
+                            reference_quant_logsoftmax_output.data());
 
   CheckOutputData(optimized_logsoftmax_output.data(),
                   reference_float_logsoftmax_output.data(), shape_common,
diff --git a/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc
index ca94e7740e..831fb3c243 100644
--- a/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc
@@ -43,11 +43,15 @@ void RunSoftmaxFloatReference(const uint8* input_data,
 
   // Reference data generated via Dequant of input into float, and then applying
   // float Softmax.
-  reference_ops::Dequantize(
-      input_data, ToRuntimeDims(shape_common), input_offset, input_scale,
-      reference_dequant_data.data(), ToRuntimeDims(shape_common));
-  optimized_ops::Softmax(reference_dequant_data.data(), shape_common, beta,
-                         reference_output_float_data.data(), shape_common);
+  DequantizationParams dq_params;
+  dq_params.zero_point = input_offset;
+  dq_params.scale = input_scale;
+  reference_ops::Dequantize(dq_params, shape_common, input_data, shape_common,
+                            reference_dequant_data.data());
+  SoftmaxParams sm_params;
+  sm_params.beta = beta;
+  optimized_ops::Softmax(sm_params, shape_common, reference_dequant_data.data(),
+                         shape_common, reference_output_float_data.data());
   // Work with quantized scaling for Softmax, under which 256 represents 1, but
   // we limit this to 255.
   for (int i = 0; i < ref_buffer_size; i++) {
@@ -116,12 +120,14 @@ void RunOneSoftmaxTest(const uint8* input_data,
   const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
                                                      input_beta_left_shift);
 
-  optimized_ops::Softmax(input_data, shape_common, input_beta_multiplier,
-                         input_beta_left_shift, diff_min,
-                         optimized_softmax_output.data(), shape_common);
-  reference_ops::Softmax(input_data, shape_common, input_beta_multiplier,
-                         input_beta_left_shift, diff_min,
-                         reference_quant_softmax_output.data(), shape_common);
+  SoftmaxParams params;
+  params.input_multiplier = input_beta_multiplier;
+  params.input_left_shift = input_beta_left_shift;
+  params.diff_min = diff_min;
+  optimized_ops::Softmax(params, shape_common, input_data, shape_common,
+                         optimized_softmax_output.data());
+  reference_ops::Softmax(params, shape_common, input_data, shape_common,
+                         reference_quant_softmax_output.data());
 
   CheckOutputData(optimized_softmax_output.data(),
                   reference_float_softmax_output.data(), shape_common,
diff --git a/tensorflow/contrib/lite/kernels/log_softmax_test.cc b/tensorflow/contrib/lite/kernels/log_softmax_test.cc
index 9a8d35e82c..1acc966cdc 100644
--- a/tensorflow/contrib/lite/kernels/log_softmax_test.cc
+++ b/tensorflow/contrib/lite/kernels/log_softmax_test.cc
@@ -91,8 +91,9 @@ TEST(LogSoftmaxOpTest, CompareWithTFmini) {
 
   std::unique_ptr<float[]> output_buffer(new float[input_size * batch_size]);
   auto input_shape = RuntimeShape({batch_size, 1, 1, input_size});
-  tflite::reference_ops::LogSoftmax(input_buffer, input_shape,
-                                    output_buffer.get(), input_shape);
+  SoftmaxParams params;
+  tflite::reference_ops::LogSoftmax(params, input_shape, input_buffer,
+                                    input_shape, output_buffer.get());
 
   std::vector<float> expected;
   expected.insert(expected.end(), output_buffer.get(),
diff --git a/tensorflow/contrib/lite/kernels/softmax_test.cc b/tensorflow/contrib/lite/kernels/softmax_test.cc
index 727822f6be..bd66980226 100644
--- a/tensorflow/contrib/lite/kernels/softmax_test.cc
+++ b/tensorflow/contrib/lite/kernels/softmax_test.cc
@@ -93,8 +93,10 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaEq1) {
 
   std::unique_ptr<float[]> output_buffer(new float[input_size * batch_size]);
   auto input_shape = RuntimeShape({batch_size, 1, 1, input_size});
-  tflite::reference_ops::Softmax(input_buffer, input_shape, beta,
-                                 output_buffer.get(), input_shape);
+  SoftmaxParams params;
+  params.beta = beta;
+  tflite::reference_ops::Softmax(params, input_shape, input_buffer, input_shape,
+                                 output_buffer.get());
 
   std::vector<float> expected;
   expected.insert(expected.end(), output_buffer.get(),
@@ -120,8 +122,10 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaNotEq1) {
 
   std::unique_ptr<float[]> output_buffer(new float[input_size * batch_size]);
   auto input_shape = RuntimeShape({batch_size, 1, 1, input_size});
-  tflite::reference_ops::Softmax(input_buffer, input_shape, beta,
-                                 output_buffer.get(), input_shape);
+  SoftmaxParams params;
+  params.beta = beta;
+  tflite::reference_ops::Softmax(params, input_shape, input_buffer, input_shape,
+                                 output_buffer.get());
 
   std::vector<float> expected;
   expected.insert(expected.end(), output_buffer.get(),
diff --git a/tensorflow/contrib/lite/kernels/sparse_to_dense.cc b/tensorflow/contrib/lite/kernels/sparse_to_dense.cc
index 178568e07c..349fa0bd28 100644
--- a/tensorflow/contrib/lite/kernels/sparse_to_dense.cc
+++ b/tensorflow/contrib/lite/kernels/sparse_to_dense.cc
@@ -210,8 +210,9 @@ TfLiteStatus SparseToDenseImpl(TfLiteContext* context, TfLiteNode* node) {
                                                   &indices_vector));
   reference_ops::SparseToDense(indices_vector, GetTensorData<T>(values),
                                *GetTensorData<T>(default_value),
-                               GetTensorData<T>(output), GetTensorDims(output),
-                               value_is_scalar);
+                               value_is_scalar, GetTensorShape(output),
+                               GetTensorData<T>(output));
+
   return kTfLiteOk;
 }
 
-- 
GitLab


From 40a36ea2bd415f498baf3ec065a274ba8e712e84 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 08:02:42 -0700
Subject: [PATCH 0793/1357] Update HasKwargsTest ensuring that internal checks
 for tests involving functools.partial are triggered.

PiperOrigin-RevId: 214775194
---
 tensorflow/python/util/function_utils_test.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/python/util/function_utils_test.py b/tensorflow/python/util/function_utils_test.py
index ce768637f5..e5b0843e4b 100644
--- a/tensorflow/python/util/function_utils_test.py
+++ b/tensorflow/python/util/function_utils_test.py
@@ -183,6 +183,8 @@ class HasKwargsTest(test.TestCase):
 
     wrapped_fn = functools.partial(fn_has_kwargs, test_arg=123)
     self.assertTrue(function_utils.has_kwargs(wrapped_fn))
+    some_kwargs = dict(x=1, y=2, z=3)
+    self.assertEqual(wrapped_fn(**some_kwargs), some_kwargs)
 
     def fn_has_no_kwargs(x, test_arg):
       if test_arg != expected_test_arg:
@@ -191,6 +193,8 @@ class HasKwargsTest(test.TestCase):
 
     wrapped_fn = functools.partial(fn_has_no_kwargs, test_arg=123)
     self.assertFalse(function_utils.has_kwargs(wrapped_fn))
+    some_arg = 1
+    self.assertEqual(wrapped_fn(some_arg), some_arg)
 
   def test_double_partial(self):
     expected_test_arg1 = 123
@@ -205,6 +209,8 @@ class HasKwargsTest(test.TestCase):
     double_wrapped_fn = functools.partial(wrapped_fn, test_arg1=123)
 
     self.assertTrue(function_utils.has_kwargs(double_wrapped_fn))
+    some_kwargs = dict(x=1, y=2, z=3)
+    self.assertEqual(double_wrapped_fn(**some_kwargs), some_kwargs)
 
     def fn_has_no_kwargs(x, test_arg1, test_arg2):
       if test_arg1 != expected_test_arg1 or test_arg2 != expected_test_arg2:
@@ -215,6 +221,8 @@ class HasKwargsTest(test.TestCase):
     double_wrapped_fn = functools.partial(wrapped_fn, test_arg1=123)
 
     self.assertFalse(function_utils.has_kwargs(double_wrapped_fn))
+    some_arg = 1
+    self.assertEqual(double_wrapped_fn(some_arg), some_arg)
 
   def test_raises_type_error(self):
     with self.assertRaisesRegexp(
-- 
GitLab


From 9a68681c3e9bf7e51423dcdbefd25da9c365d256 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 08:26:48 -0700
Subject: [PATCH 0794/1357] [XLA] Allow the stream to be used for
 host-to-device transfers to be specified separately from the compute stream
 in ServiceRunOptions

PiperOrigin-RevId: 214778267
---
 tensorflow/compiler/xla/executable_run_options.cc | 10 ++++++++++
 tensorflow/compiler/xla/executable_run_options.h  |  8 ++++++++
 2 files changed, 18 insertions(+)

diff --git a/tensorflow/compiler/xla/executable_run_options.cc b/tensorflow/compiler/xla/executable_run_options.cc
index a472747bd1..0f9b591c70 100644
--- a/tensorflow/compiler/xla/executable_run_options.cc
+++ b/tensorflow/compiler/xla/executable_run_options.cc
@@ -45,6 +45,16 @@ stream_executor::Stream* ExecutableRunOptions::stream() const {
   return stream_;
 }
 
+ExecutableRunOptions& ExecutableRunOptions::set_host_to_device_stream(
+    stream_executor::Stream* stream) {
+  host_to_device_stream_ = stream;
+  return *this;
+}
+
+stream_executor::Stream* ExecutableRunOptions::host_to_device_stream() const {
+  return host_to_device_stream_;
+}
+
 ExecutableRunOptions& ExecutableRunOptions::set_intra_op_thread_pool(
     const Eigen::ThreadPoolDevice* intra_op_thread_pool) {
   intra_op_thread_pool_ = intra_op_thread_pool;
diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h
index 416131be00..ba3217f31b 100644
--- a/tensorflow/compiler/xla/executable_run_options.h
+++ b/tensorflow/compiler/xla/executable_run_options.h
@@ -65,6 +65,13 @@ class ExecutableRunOptions {
   ExecutableRunOptions& set_stream(stream_executor::Stream* stream);
   stream_executor::Stream* stream() const;
 
+  // If set, this is the stream to perform any pre-computation transfers on.
+  // The platform of the stream must match the platform the executable was
+  // built for.  A value of nullptr indicates the option has not been set.
+  ExecutableRunOptions& set_host_to_device_stream(
+      stream_executor::Stream* stream);
+  stream_executor::Stream* host_to_device_stream() const;
+
   // Sets the thread pool device on which to run Eigen subcomputations.
   // Does not take ownership.
   ExecutableRunOptions& set_intra_op_thread_pool(
@@ -90,6 +97,7 @@ class ExecutableRunOptions {
   const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr;
   ExecutionProfile* execution_profile_ = nullptr;
   int rng_seed_ = 0;
+  stream_executor::Stream* host_to_device_stream_ = nullptr;
 };
 
 }  // namespace xla
-- 
GitLab


From 77244534c0325f61509ac769efc8b462dec00b95 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 27 Sep 2018 08:55:25 -0700
Subject: [PATCH 0795/1357] [tf.data] Minor refactoring of tf.data tests.

PiperOrigin-RevId: 214781794
---
 tensorflow/python/data/kernel_tests/BUILD     | 334 ++++++++++--------
 .../kernel_tests/batch_dataset_op_test.py     |  10 +-
 .../kernel_tests/cache_dataset_op_test.py     |   5 +-
 .../concatenate_dataset_op_test.py            |   3 +-
 .../dataset_constructor_op_test.py            |   8 +-
 .../dataset_from_generator_op_test.py         |   3 +-
 .../data/kernel_tests/dataset_ops_test.py     |   3 +-
 .../kernel_tests/filter_dataset_op_test.py    |   8 +-
 .../kernel_tests/flat_map_dataset_op_test.py  |   3 +-
 .../python/data/kernel_tests/inputs_test.py   |   3 +-
 .../interleave_dataset_op_test.py             |   3 +-
 .../list_files_dataset_op_test.py             |   3 +-
 .../data/kernel_tests/map_dataset_op_test.py  |   8 +-
 .../multi_device_iterator_test.py             |   3 +-
 .../data/kernel_tests/optional_ops_test.py    |   3 +-
 .../kernel_tests/prefetch_dataset_op_test.py  |   3 +-
 .../kernel_tests/range_dataset_op_test.py     |   3 +-
 .../kernel_tests/reader_dataset_ops_test.py   |   7 +-
 .../kernel_tests/sequence_dataset_op_test.py  |   3 +-
 .../kernel_tests/shard_dataset_op_test.py     |   3 +-
 .../kernel_tests/shuffle_dataset_op_test.py   |   3 +-
 .../python/data/kernel_tests/test_base.py     |  29 ++
 .../kernel_tests/window_dataset_op_test.py    |   8 +-
 .../data/kernel_tests/zip_dataset_op_test.py  |   3 +-
 tensorflow/tools/pip_package/BUILD            |   1 +
 25 files changed, 260 insertions(+), 203 deletions(-)
 create mode 100644 tensorflow/python/data/kernel_tests/test_base.py

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 7a6f03d4d3..fdcbfc3684 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -15,6 +15,7 @@ tf_py_test(
     size = "small",
     srcs = ["batch_dataset_op_test.py"],
     additional_deps = [
+        ":test_base",
         "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
@@ -30,11 +31,45 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "cache_dataset_op_test",
+    size = "small",
+    srcs = ["cache_dataset_op_test.py"],
+    additional_deps = [
+        ":test_base",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
+tf_py_test(
+    name = "concatenate_dataset_op_test",
+    size = "small",
+    srcs = ["concatenate_dataset_op_test.py"],
+    additional_deps = [
+        ":test_base",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
 tf_py_test(
     name = "dataset_constructor_op_test",
     size = "small",
     srcs = ["dataset_constructor_op_test.py"],
     additional_deps = [
+        ":test_base",
         "//third_party/py/numpy",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
@@ -63,6 +98,7 @@ tf_py_test(
     size = "medium",
     srcs = ["dataset_from_generator_op_test.py"],
     additional_deps = [
+        ":test_base",
         "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -78,6 +114,7 @@ tf_py_test(
     size = "small",
     srcs = ["dataset_ops_test.py"],
     additional_deps = [
+        ":test_base",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python/data/ops:dataset_ops",
@@ -89,6 +126,7 @@ tf_py_test(
     size = "small",
     srcs = ["filter_dataset_op_test.py"],
     additional_deps = [
+        ":test_base",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -106,6 +144,7 @@ tf_py_test(
     size = "small",
     srcs = ["flat_map_dataset_op_test.py"],
     additional_deps = [
+        ":test_base",
         "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -123,6 +162,7 @@ tf_py_test(
     size = "small",
     srcs = ["list_files_dataset_op_test.py"],
     additional_deps = [
+        ":test_base",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -133,91 +173,52 @@ tf_py_test(
 )
 
 tf_py_test(
-    name = "interleave_dataset_op_test",
+    name = "inputs_test",
     size = "small",
-    srcs = ["interleave_dataset_op_test.py"],
+    srcs = ["inputs_test.py"],
     additional_deps = [
+        ":test_base",
         "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
 
 tf_py_test(
-    name = "map_dataset_op_test",
+    name = "interleave_dataset_op_test",
     size = "small",
-    srcs = ["map_dataset_op_test.py"],
+    srcs = ["interleave_dataset_op_test.py"],
     additional_deps = [
+        ":test_base",
         "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:data_flow_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:lookup_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:script_ops",
+        "//tensorflow/python:session",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "prefetch_dataset_op_test",
-    size = "small",
-    srcs = ["prefetch_dataset_op_test.py"],
-    additional_deps = [
-        "@absl_py//absl/testing:parameterized",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
+        "//tensorflow/python:training",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
 
-tf_py_test(
-    name = "range_dataset_op_test",
+cuda_py_test(
+    name = "iterator_ops_test",
     size = "small",
-    srcs = ["range_dataset_op_test.py"],
+    srcs = ["iterator_ops_test.py"],
     additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
-tf_py_test(
-    name = "reader_dataset_ops_test",
-    size = "small",
-    srcs = ["reader_dataset_ops_test.py"],
-    additional_deps = [
+        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/training/checkpointable:util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -225,82 +226,133 @@ tf_py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:gradients",
         "//tensorflow/python:io_ops",
-        "//tensorflow/python:lib",
+        "//tensorflow/python:math_ops",
         "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:training",
+        "//tensorflow/python/compat:compat",
         "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python:variables",
     ],
+    grpc_enabled = True,
 )
 
 tf_py_test(
-    name = "sequence_dataset_op_test",
+    name = "iterator_ops_cluster_test",
     size = "small",
-    srcs = ["sequence_dataset_op_test.py"],
+    srcs = ["iterator_ops_cluster_test.py"],
     additional_deps = [
-        "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:session",
         "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:lookup_ops",
+    ],
+    grpc_enabled = True,
+    tags = [
+        "no_oss",  # Test flaky due to port collisions.
+        "no_windows",
     ],
 )
 
 tf_py_test(
-    name = "shuffle_dataset_op_test",
+    name = "map_dataset_op_test",
     size = "small",
-    srcs = ["shuffle_dataset_op_test.py"],
+    srcs = ["map_dataset_op_test.py"],
     additional_deps = [
+        ":test_base",
+        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
+        "//tensorflow/python:data_flow_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:lookup_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:variable_scope",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
     ],
 )
 
-tf_py_test(
-    name = "shard_dataset_op_test",
+cuda_py_test(
+    name = "multi_device_iterator_test",
     size = "small",
-    srcs = ["shard_dataset_op_test.py"],
+    srcs = ["multi_device_iterator_test.py"],
     additional_deps = [
+        ":test_base",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:multi_device_iterator_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    tags = [
+        "no_windows_gpu",
     ],
 )
 
-tf_py_test(
-    name = "cache_dataset_op_test",
+cuda_py_test(
+    name = "optional_ops_test",
     size = "small",
-    srcs = ["cache_dataset_op_test.py"],
+    srcs = ["optional_ops_test.py"],
     additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
+        ":test_base",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:optional_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:tensor_shape",
     ],
 )
 
 tf_py_test(
-    name = "zip_dataset_op_test",
+    name = "prefetch_dataset_op_test",
     size = "small",
-    srcs = ["zip_dataset_op_test.py"],
+    srcs = ["prefetch_dataset_op_test.py"],
     additional_deps = [
-        "//third_party/py/numpy",
+        ":test_base",
+        "@absl_py//absl/testing:parameterized",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dataset_ops_gen",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python/data/ops:dataset_ops",
@@ -308,32 +360,33 @@ tf_py_test(
 )
 
 tf_py_test(
-    name = "concatenate_dataset_op_test",
+    name = "range_dataset_op_test",
     size = "small",
-    srcs = ["concatenate_dataset_op_test.py"],
+    srcs = ["range_dataset_op_test.py"],
     additional_deps = [
-        "//third_party/py/numpy",
+        ":test_base",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:variables",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/ops:iterator_ops",
     ],
 )
 
-cuda_py_test(
-    name = "iterator_ops_test",
+tf_py_test(
+    name = "reader_dataset_ops_test",
     size = "small",
-    srcs = ["iterator_ops_test.py"],
+    srcs = ["reader_dataset_ops_test.py"],
     additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/util:sparse",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/checkpointable:util",
+        ":test_base",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -341,91 +394,65 @@ cuda_py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:gradients",
         "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
+        "//tensorflow/python:lib",
         "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:training",
-        "//tensorflow/python/compat:compat",
         "//tensorflow/python:util",
-        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:readers",
     ],
-    grpc_enabled = True,
 )
 
 tf_py_test(
-    name = "iterator_ops_cluster_test",
+    name = "sequence_dataset_op_test",
     size = "small",
-    srcs = ["iterator_ops_cluster_test.py"],
+    srcs = ["sequence_dataset_op_test.py"],
     additional_deps = [
-        "//tensorflow/core:protos_all_py",
+        ":test_base",
+        "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:session",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:lookup_ops",
-    ],
-    grpc_enabled = True,
-    tags = [
-        "no_oss",  # Test flaky due to port collisions.
-        "no_windows",
     ],
 )
 
-cuda_py_test(
-    name = "optional_ops_test",
+tf_py_test(
+    name = "shard_dataset_op_test",
     size = "small",
-    srcs = ["optional_ops_test.py"],
+    srcs = ["shard_dataset_op_test.py"],
     additional_deps = [
-        "@absl_py//absl/testing:parameterized",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:optional_ops",
+        ":test_base",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
 
-cuda_py_test(
-    name = "multi_device_iterator_test",
+tf_py_test(
+    name = "shuffle_dataset_op_test",
     size = "small",
-    srcs = ["multi_device_iterator_test.py"],
+    srcs = ["shuffle_dataset_op_test.py"],
     additional_deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:multi_device_iterator_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
+        ":test_base",
+        "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
     ],
-    tags = [
-        "no_windows_gpu",
+)
+
+py_library(
+    name = "test_base",
+    srcs = ["test_base.py"],
+    deps = [
+        "//tensorflow/python:client_testlib",
     ],
 )
 
@@ -434,6 +461,7 @@ tf_py_test(
     size = "small",
     srcs = ["window_dataset_op_test.py"],
     additional_deps = [
+        ":test_base",
         "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
@@ -447,14 +475,16 @@ tf_py_test(
 )
 
 tf_py_test(
-    name = "inputs_test",
+    name = "zip_dataset_op_test",
     size = "small",
-    srcs = ["inputs_test.py"],
+    srcs = ["zip_dataset_op_test.py"],
     additional_deps = [
-        "@absl_py//absl/testing:parameterized",
+        ":test_base",
         "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
diff --git a/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
index c48708a2b9..9cb4daf284 100644
--- a/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
@@ -24,6 +24,7 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -37,7 +38,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class BatchDatasetTest(test.TestCase, parameterized.TestCase):
+class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ('even', 28, 14, False),
@@ -115,11 +116,6 @@ class BatchDatasetTest(test.TestCase, parameterized.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(get_next)
 
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
   def testBatchSparse(self):
 
     def _sparse(i):
@@ -227,7 +223,7 @@ def _random_seq_lens(count):
   return np.random.randint(20, size=(count,)).astype(np.int32)
 
 
-class PaddedBatchDatasetTest(test.TestCase, parameterized.TestCase):
+class PaddedBatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ('default_padding', _random_seq_lens(32), 4, [-1], False),
diff --git a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py
index d5f5b2fe05..63625fac03 100644
--- a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py
@@ -23,6 +23,7 @@ import tempfile
 
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
@@ -34,7 +35,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
-class FileCacheDatasetTest(test.TestCase):
+class FileCacheDatasetTest(test_base.DatasetTestBase):
 
   def setUp(self):
     self.tmp_dir = tempfile.mkdtemp()
@@ -200,7 +201,7 @@ class FileCacheDatasetTest(test.TestCase):
       self.assertAllEqual(elements, elements_itr2)
 
 
-class MemoryCacheDatasetTest(test.TestCase):
+class MemoryCacheDatasetTest(test_base.DatasetTestBase):
 
   def testCacheDatasetPassthrough(self):
     with ops.device("cpu:0"):
diff --git a/tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py b/tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py
index 5dfb84f28e..83af31f380 100644
--- a/tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import errors
@@ -26,7 +27,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.platform import test
 
 
-class ConcatenateDatasetTest(test.TestCase):
+class ConcatenateDatasetTest(test_base.DatasetTestBase):
 
   def testConcatenateDataset(self):
     input_components = (
diff --git a/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
index e43564a2eb..bc6b36285a 100644
--- a/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
@@ -23,6 +23,7 @@ import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
@@ -36,7 +37,7 @@ from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import test
 
 
-class DatasetConstructorTest(test.TestCase):
+class DatasetConstructorTest(test_base.DatasetTestBase):
 
   def testFromTensors(self):
     """Test a dataset that represents a single tuple of tensors."""
@@ -58,11 +59,6 @@ class DatasetConstructorTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
   def testFromTensorsSparse(self):
     """Test a dataset that represents a single tuple of tensors."""
     components = (sparse_tensor.SparseTensorValue(
diff --git a/tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py b/tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py
index cd0c1ddf1e..cb8cb9a77d 100644
--- a/tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py
@@ -22,6 +22,7 @@ import threading
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -30,7 +31,7 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-class DatasetConstructorTest(test.TestCase):
+class DatasetConstructorTest(test_base.DatasetTestBase):
 
   def _testFromGenerator(self, generator, elem_sequence, num_repeats,
                          output_types=None):
diff --git a/tensorflow/python/data/kernel_tests/dataset_ops_test.py b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
index 239aa85175..f115f9d9c7 100644
--- a/tensorflow/python/data/kernel_tests/dataset_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
@@ -19,11 +19,12 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.core.framework import graph_pb2
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
 
-class DatasetOpsTest(test.TestCase):
+class DatasetOpsTest(test_base.DatasetTestBase):
 
   def testAsSerializedGraph(self):
     dataset = dataset_ops.Dataset.range(10)
diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
index 19944d389f..6b7afafa5d 100644
--- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
@@ -22,6 +22,7 @@ import time
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -33,7 +34,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class FilterDatasetTest(test.TestCase):
+class FilterDatasetTest(test_base.DatasetTestBase):
 
   def testFilterDataset(self):
     components = (
@@ -129,11 +130,6 @@ class FilterDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
   def testSparse(self):
 
     def _map_fn(i):
diff --git a/tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py
index 1123cbff62..68038f9cfc 100644
--- a/tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py
@@ -22,6 +22,7 @@ import random
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
@@ -30,7 +31,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import server_lib
 
 
-class FlatMapDatasetTest(test.TestCase):
+class FlatMapDatasetTest(test_base.DatasetTestBase):
 
   # pylint: disable=g-long-lambda
   def testFlatMapDataset(self):
diff --git a/tensorflow/python/data/kernel_tests/inputs_test.py b/tensorflow/python/data/kernel_tests/inputs_test.py
index 4c9279dd95..d089b49bcc 100644
--- a/tensorflow/python/data/kernel_tests/inputs_test.py
+++ b/tensorflow/python/data/kernel_tests/inputs_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
 from tensorflow.python.data.util import nest
@@ -27,7 +28,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.platform import test
 
 
-class InputsTest(test.TestCase, parameterized.TestCase):
+class InputsTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @staticmethod
   def make_apply_fn(dataset):
diff --git a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
index e7e51df65e..92bb67b6ff 100644
--- a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
@@ -22,6 +22,7 @@ import itertools
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
@@ -30,7 +31,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-class InterleaveDatasetTest(test.TestCase, parameterized.TestCase):
+class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _interleave(self, lists, cycle_length, block_length):
     num_open = 0
diff --git a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py
index c4b338a58f..8eb13815d4 100644
--- a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py
@@ -22,6 +22,7 @@ from os import path
 import shutil
 import tempfile
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -30,7 +31,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class ListFilesDatasetOpTest(test.TestCase):
+class ListFilesDatasetOpTest(test_base.DatasetTestBase):
 
   def setUp(self):
     self.tmp_dir = tempfile.mkdtemp()
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index ae04995436..230ae3f3fd 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -27,6 +27,7 @@ import numpy as np
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -47,7 +48,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
 
 
-class MapDatasetTest(test.TestCase, parameterized.TestCase):
+class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _buildMapDataset(self, components, count):
     def _map_fn(x, y, z):
@@ -574,11 +575,6 @@ class MapDatasetTest(test.TestCase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
   def testSparse(self):
 
     def _sparse(i):
diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
index 056664b83b..1cf6dd1bea 100644
--- a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import multi_device_iterator_ops
 from tensorflow.python.framework import dtypes
@@ -29,7 +30,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class MultiDeviceIteratorTest(test.TestCase):
+class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
   def testNoGetNext(self):
     dataset = dataset_ops.Dataset.range(10)
diff --git a/tensorflow/python/data/kernel_tests/optional_ops_test.py b/tensorflow/python/data/kernel_tests/optional_ops_test.py
index 706a65fe55..604e3ad88e 100644
--- a/tensorflow/python/data/kernel_tests/optional_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/optional_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import optional_ops
@@ -35,7 +36,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class OptionalTest(test.TestCase, parameterized.TestCase):
+class OptionalTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testFromValue(self):
diff --git a/tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
index cc97bac609..76e2697b29 100644
--- a/tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -26,7 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class PrefetchDatasetTest(test.TestCase, parameterized.TestCase):
+class PrefetchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.parameters((-1), (0), (5))
   def testBufferSize(self, buffer_size):
diff --git a/tensorflow/python/data/kernel_tests/range_dataset_op_test.py b/tensorflow/python/data/kernel_tests/range_dataset_op_test.py
index 51e90785e7..b7e2a5f615 100644
--- a/tensorflow/python/data/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/range_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import os
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import dtypes
@@ -34,7 +35,7 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 
 
-class RangeDatasetTest(test.TestCase):
+class RangeDatasetTest(test_base.DatasetTestBase):
 
   def tearDown(self):
     # Remove all checkpoint files.
diff --git a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
index aa3636364d..aef2dd1d9c 100644
--- a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
@@ -21,6 +21,7 @@ import gzip
 import os
 import zlib
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import readers
@@ -46,7 +47,7 @@ except ImportError:
   psutil_import_succeeded = False
 
 
-class TextLineDatasetTest(test.TestCase):
+class TextLineDatasetTest(test_base.DatasetTestBase):
 
   def _lineText(self, f, l):
     return compat.as_bytes("%d: %d" % (f, l))
@@ -199,7 +200,7 @@ class TextLineDatasetTest(test.TestCase):
       self.assertNotIn(filename, [open_file.path for open_file in open_files])
 
 
-class FixedLengthRecordReaderTest(test.TestCase):
+class FixedLengthRecordReaderTest(test_base.DatasetTestBase):
 
   def setUp(self):
     super(FixedLengthRecordReaderTest, self).setUp()
@@ -621,7 +622,7 @@ class FixedLengthRecordReaderTest(test.TestCase):
           sess.run(get_next_op)
 
 
-class TFRecordDatasetTest(test.TestCase):
+class TFRecordDatasetTest(test_base.DatasetTestBase):
 
   def setUp(self):
     super(TFRecordDatasetTest, self).setUp()
diff --git a/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py
index 37e2333560..e86356dee7 100644
--- a/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -26,7 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class SequenceDatasetTest(test.TestCase):
+class SequenceDatasetTest(test_base.DatasetTestBase):
 
   def testRepeatTensorDataset(self):
     """Test a dataset that repeats its input multiple times."""
diff --git a/tensorflow/python/data/kernel_tests/shard_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shard_dataset_op_test.py
index 137f6341ce..b9f3c79da5 100644
--- a/tensorflow/python/data/kernel_tests/shard_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shard_dataset_op_test.py
@@ -17,12 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
 
 
-class ShardDatasetOpTest(test.TestCase):
+class ShardDatasetOpTest(test_base.DatasetTestBase):
 
   def testSimpleCase(self):
     dataset = dataset_ops.Dataset.range(10).shard(5, 2)
diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index f294840706..347af18576 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -21,6 +21,7 @@ import collections
 
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
@@ -30,7 +31,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ShuffleDatasetTest(test.TestCase):
+class ShuffleDatasetTest(test_base.DatasetTestBase):
 
   def testShuffleDataset(self):
     components = (
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
new file mode 100644
index 0000000000..b4f64115b7
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -0,0 +1,29 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test utilities for tf.data functionality."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.platform import test
+
+
+class DatasetTestBase(test.TestCase):
+  """Base class for dataset tests."""
+
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
diff --git a/tensorflow/python/data/kernel_tests/window_dataset_op_test.py b/tensorflow/python/data/kernel_tests/window_dataset_op_test.py
index fd4348426d..9d06781094 100644
--- a/tensorflow/python/data/kernel_tests/window_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/window_dataset_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -29,7 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class WindowDatasetTest(test.TestCase, parameterized.TestCase):
+class WindowDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("1", 20, 14, 7, 1),
@@ -150,11 +151,6 @@ class WindowDatasetTest(test.TestCase, parameterized.TestCase):
                 stride_t: stride
             })
 
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
   def testWindowSparse(self):
 
     def _sparse(i):
diff --git a/tensorflow/python/data/kernel_tests/zip_dataset_op_test.py b/tensorflow/python/data/kernel_tests/zip_dataset_op_test.py
index 3106effbd3..9d76387a34 100644
--- a/tensorflow/python/data/kernel_tests/zip_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/zip_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -26,7 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ZipDatasetTest(test.TestCase):
+class ZipDatasetTest(test_base.DatasetTestBase):
 
   def testZipDataset(self):
     component_placeholders = [
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 12354a6ab2..9d816f0672 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -108,6 +108,7 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python:meta_graph_testdata",
     "//tensorflow/python:spectral_ops_test_util",
     "//tensorflow/python:util_example_parser_configuration",
+    "//tensorflow/python/data/kernel_tests:test_base",
     "//tensorflow/python/debug:debug_pip",
     "//tensorflow/python/eager:eager_pip",
     "//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files",
-- 
GitLab


From cd1bdeafecf39bc55409b75cf27cecf273237ca2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 08:56:28 -0700
Subject: [PATCH 0796/1357] Added nest support for attr.s decorated classes.

PiperOrigin-RevId: 214781911
---
 tensorflow/python/util/nest.py      | 16 ++++++++-
 tensorflow/python/util/nest_test.py | 34 +++++++++++++++++++
 tensorflow/python/util/util.cc      | 51 +++++++++++++++++++++++++++++
 tensorflow/python/util/util.h       |  9 +++++
 tensorflow/python/util/util.i       | 12 +++++++
 5 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 653ca525dc..758cba7487 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -19,6 +19,9 @@ This module can perform operations on nested structures. A nested structure is a
 Python sequence, tuple (including `namedtuple`), or dict that can contain
 further sequences, tuples, and dicts.
 
+attr.s decorated classes (http://www.attrs.org) are also supported, in the
+same way as `namedtuple`.
+
 The utilities here assume (and do not check) that the nested structures form a
 'tree', i.e., no references in the structure of the input of these functions
 should be recursive.
@@ -38,6 +41,12 @@ import six as _six
 from tensorflow.python import pywrap_tensorflow as _pywrap_tensorflow
 
 
+def _get_attrs_values(obj):
+  """Returns the list of values from an attrs instance."""
+  attrs = getattr(obj.__class__, "__attrs_attrs__")
+  return [getattr(obj, a.name) for a in attrs]
+
+
 def _sorted(dict_):
   """Returns a sorted list of the dict keys, with error if keys not sortable."""
   try:
@@ -64,6 +73,7 @@ def _is_namedtuple(instance, strict=False):
 
 # See the swig file (util.i) for documentation.
 _is_mapping = _pywrap_tensorflow.IsMapping
+_is_attrs = _pywrap_tensorflow.IsAttrs
 
 
 def _sequence_like(instance, args):
@@ -85,7 +95,7 @@ def _sequence_like(instance, args):
     # corresponding `OrderedDict` to pack it back).
     result = dict(zip(_sorted(instance), args))
     return type(instance)((key, result[key]) for key in _six.iterkeys(instance))
-  elif _is_namedtuple(instance):
+  elif _is_namedtuple(instance) or _is_attrs(instance):
     return type(instance)(*args)
   else:
     # Not a namedtuple
@@ -93,6 +103,7 @@ def _sequence_like(instance, args):
 
 
 def _yield_value(iterable):
+  """Yields the next value from the given iterable."""
   if _is_mapping(iterable):
     # Iterate through dictionaries in a deterministic order by sorting the
     # keys. Notice this means that we ignore the original order of `OrderedDict`
@@ -101,6 +112,9 @@ def _yield_value(iterable):
     # corresponding `OrderedDict` to pack it back).
     for key in _sorted(iterable):
       yield iterable[key]
+  elif _is_attrs(iterable):
+    for value in _get_attrs_values(iterable):
+      yield value
   else:
     for value in iterable:
       yield value
diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py
index bfb4c6f910..e03a8daaa1 100644
--- a/tensorflow/python/util/nest_test.py
+++ b/tensorflow/python/util/nest_test.py
@@ -33,6 +33,11 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import nest
 
+try:
+  import attr  # pylint:disable=g-import-not-at-top
+except ImportError:
+  attr = None
+
 
 class _CustomMapping(collections.Mapping):
 
@@ -53,6 +58,35 @@ class NestTest(parameterized.TestCase, test.TestCase):
 
   PointXY = collections.namedtuple("Point", ["x", "y"])  # pylint: disable=invalid-name
 
+  if attr:
+    class BadAttr(object):
+      """Class that has a non-iterable __attrs_attrs__."""
+      __attrs_attrs__ = None
+
+    @attr.s
+    class SampleAttr(object):
+      field1 = attr.ib()
+      field2 = attr.ib()
+
+  @test_util.assert_no_new_pyobjects_executing_eagerly
+  def testAttrsFlattenAndPack(self):
+    if attr is None:
+      self.skipTest("attr module is unavailable.")
+
+    field_values = [1, 2]
+    sample_attr = NestTest.SampleAttr(*field_values)
+    self.assertFalse(nest._is_attrs(field_values))
+    self.assertTrue(nest._is_attrs(sample_attr))
+    flat = nest.flatten(sample_attr)
+    self.assertEqual(field_values, flat)
+    restructured_from_flat = nest.pack_sequence_as(sample_attr, flat)
+    self.assertIsInstance(restructured_from_flat, NestTest.SampleAttr)
+    self.assertEqual(restructured_from_flat, sample_attr)
+
+    # Check that flatten fails if attributes are not iterable
+    with self.assertRaisesRegexp(TypeError, "object is not iterable"):
+      flat = nest.flatten(NestTest.BadAttr())
+
   @test_util.assert_no_new_pyobjects_executing_eagerly
   def testFlattenAndPack(self):
     structure = ((3, 4), 5, (6, 7, (9, 10), 8))
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 2087957b31..38b8491c66 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -192,6 +192,19 @@ int IsMappingHelper(PyObject* o) {
   return check_cache->CachedLookup(o);
 }
 
+// Returns 1 if `o` is an instance of attrs-decorated class.
+// Returns 0 otherwise.
+int IsAttrsHelper(PyObject* o) {
+  Safe_PyObjectPtr cls(PyObject_GetAttrString(o, "__class__"));
+  if (cls) {
+    return PyObject_HasAttrString(cls.get(), "__attrs_attrs__");
+  } else {
+    // PyObject_GetAttrString returns null on error
+    PyErr_Clear();
+    return 0;
+  }
+}
+
 // Returns 1 if `o` is considered a sequence for the purposes of Flatten().
 // Returns 0 otherwise.
 // Returns -1 if an error occurred.
@@ -206,6 +219,7 @@ int IsSequenceHelper(PyObject* o) {
   });
   // We treat dicts and other mappings as special cases of sequences.
   if (IsMappingHelper(o)) return true;
+  if (IsAttrsHelper(o)) return true;
   if (PySet_Check(o) && !WarnedThatSetIsNotSequence) {
     LOG(WARNING) << "Sets are not currently considered sequences, "
                     "but this may change in the future, "
@@ -354,6 +368,38 @@ class SparseTensorValueIterator : public ValueIterator {
   Safe_PyObjectPtr tensor_;
 };
 
+class AttrsValueIterator : public ValueIterator {
+ public:
+  explicit AttrsValueIterator(PyObject* nested) : nested_(nested) {
+    Py_INCREF(nested);
+    cls_.reset(PyObject_GetAttrString(nested_.get(), "__class__"));
+    if (cls_) {
+      attrs_.reset(PyObject_GetAttrString(cls_.get(), "__attrs_attrs__"));
+      if (attrs_) {
+        iter_.reset(PyObject_GetIter(attrs_.get()));
+      }
+    }
+    if (!iter_ || PyErr_Occurred()) invalidate();
+  }
+
+  Safe_PyObjectPtr next() override {
+    Safe_PyObjectPtr result;
+    Safe_PyObjectPtr item(PyIter_Next(iter_.get()));
+    if (item) {
+      Safe_PyObjectPtr name(PyObject_GetAttrString(item.get(), "name"));
+      result.reset(PyObject_GetAttr(nested_.get(), name.get()));
+    }
+
+    return result;
+  }
+
+ private:
+  Safe_PyObjectPtr nested_;
+  Safe_PyObjectPtr cls_;
+  Safe_PyObjectPtr attrs_;
+  Safe_PyObjectPtr iter_;
+};
+
 bool IsSparseTensorValueType(PyObject* o) {
   if (TF_PREDICT_FALSE(SparseTensorValueType == nullptr)) {
     return false;
@@ -372,6 +418,8 @@ ValueIteratorPtr GetValueIterator(PyObject* nested) {
     return absl::make_unique<DictValueIterator>(nested);
   } else if (IsMappingHelper(nested)) {
     return absl::make_unique<MappingValueIterator>(nested);
+  } else if (IsAttrsHelper(nested)) {
+    return absl::make_unique<AttrsValueIterator>(nested);
   } else {
     return absl::make_unique<SequenceValueIterator>(nested);
   }
@@ -383,6 +431,8 @@ ValueIteratorPtr GetValueIteratorForData(PyObject* nested) {
     return absl::make_unique<DictValueIterator>(nested);
   } else if (IsMappingHelper(nested)) {
     return absl::make_unique<MappingValueIterator>(nested);
+  } else if (IsAttrsHelper(nested)) {
+    return absl::make_unique<AttrsValueIterator>(nested);
   } else if (IsSparseTensorValueType(nested)) {
     return absl::make_unique<SparseTensorValueIterator>(nested);
   } else {
@@ -639,6 +689,7 @@ void RegisterSparseTensorValueClass(PyObject* sparse_tensor_value_class) {
 
 bool IsSequence(PyObject* o) { return IsSequenceHelper(o) == 1; }
 bool IsMapping(PyObject* o) { return IsMappingHelper(o) == 1; }
+bool IsAttrs(PyObject* o) { return IsAttrsHelper(o) == 1; }
 
 PyObject* Flatten(PyObject* nested) {
   PyObject* list = PyList_New(0);
diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h
index 343605285e..01f85ea1dc 100644
--- a/tensorflow/python/util/util.h
+++ b/tensorflow/python/util/util.h
@@ -56,6 +56,15 @@ PyObject* IsNamedtuple(PyObject* o, bool strict);
 //   True if the sequence subclasses mapping.
 bool IsMapping(PyObject* o);
 
+// Returns a true if its input is an instance of an attr.s decorated class.
+//
+// Args:
+//   o: the input to be checked.
+//
+// Returns:
+//   True if the object is an instance of an attr.s decorated class.
+bool IsAttrs(PyObject* o);
+
 // Implements the same interface as tensorflow.util.nest._same_namedtuples
 // Returns Py_True iff the two namedtuples have the same name and fields.
 // Raises RuntimeError if `o1` or `o2` don't look like namedtuples (don't have
diff --git a/tensorflow/python/util/util.i b/tensorflow/python/util/util.i
index 104a615636..32a6e684fa 100644
--- a/tensorflow/python/util/util.i
+++ b/tensorflow/python/util/util.i
@@ -65,6 +65,18 @@ Returns:
 %unignore tensorflow::swig::IsMapping;
 %noexception tensorflow::swig::IsMapping;
 
+%feature("docstring") tensorflow::swig::IsAttrs
+"""Returns True iff `instance` is an instance of an `attr.s` decorated class.
+
+Args:
+  instance: An instance of a Python object.
+
+Returns:
+  True if `instance` is an instance of an `attr.s` decorated class.
+"""
+%unignore tensorflow::swig::IsAttrs;
+%noexception tensorflow::swig::IsAttrs;
+
 %feature("docstring") tensorflow::swig::SameNamedtuples
 "Returns True if the two namedtuples have the same name and fields."
 %unignore tensorflow::swig::SameNamedtuples;
-- 
GitLab


From 941b4e0f226de76f083401842e73bd9efd6db2d0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 10:23:59 -0700
Subject: [PATCH 0797/1357] Fix support for custom optimizers in explicit
 schedule

PiperOrigin-RevId: 214794973
---
 .../grappler/optimizers/meta_optimizer.cc     | 25 ++++++++++++++--
 .../core/grappler/optimizers/meta_optimizer.h |  4 +++
 .../optimizers/meta_optimizer_test.cc         | 30 +++++++++++++++++++
 .../core/protobuf/rewriter_config.proto       |  4 +--
 4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index c59645e5f2..e18a5f21d2 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -172,11 +172,12 @@ Status MetaOptimizer::InitializeOptimizers(
     optimizers->push_back(MakeUnique<ScopedAllocatorOptimizer>(
         cfg_.scoped_allocator_optimization(), cfg_.scoped_allocator_opts()));
   }
-  return InitializeCustomGraphOptimizers(optimizers);
+  return InitializeCustomGraphOptimizers(std::set<string>(), optimizers);
 }
 
 Status MetaOptimizer::InitializeOptimizersByName(
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
+  std::set<string> initialized_custom_optimizers;
   for (const string& optimizer_name : cfg_.optimizers()) {
     auto optimizer = MakeNewOptimizer(optimizer_name);
     if (optimizer) {
@@ -190,18 +191,26 @@ Status MetaOptimizer::InitializeOptimizersByName(
 
     if (custom_optimizer) {
       VLOG(2) << "Registered custom graph optimizer: " << optimizer_name;
-      TF_RETURN_IF_ERROR(custom_optimizer->Init());
+      TF_RETURN_IF_ERROR(custom_optimizer->Init(
+          GetCustomGraphOptimizerConfig(optimizer_name)));
       optimizers->push_back(std::move(custom_optimizer));
+      initialized_custom_optimizers.insert(optimizer_name);
     } else {
       VLOG(2) << "Can't register an optimizer by name: " << optimizer_name;
     }
   }
-  return InitializeCustomGraphOptimizers(optimizers);
+  return InitializeCustomGraphOptimizers(initialized_custom_optimizers,
+                                         optimizers);
 }
 
 Status MetaOptimizer::InitializeCustomGraphOptimizers(
+    const std::set<string>& pre_initialized_optimizers,
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
   for (const auto& optimizer_config : cfg_.custom_optimizers()) {
+    if (pre_initialized_optimizers.find(optimizer_config.name()) !=
+        pre_initialized_optimizers.end()) {
+      continue;
+    }
     // Initialize the ExperimentalImplementationSelector here instead of
     // CustomizeOptimizer registry, due the static link issue in TensorRT for
     // double registry.
@@ -237,6 +246,16 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers(
   return Status::OK();
 }
 
+const RewriterConfig::CustomGraphOptimizer*
+MetaOptimizer::GetCustomGraphOptimizerConfig(const string& name) const {
+  for (const auto& config : cfg_.custom_optimizers()) {
+    if (config.name() == name) {
+      return &config;
+    }
+  }
+  return nullptr;
+}
+
 Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
                                     GraphDef* optimized_graph) {
   int min_graph_nodes = cfg_.min_graph_nodes() == 0 ? kDefaultMinGraphNodes
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h
index 831c5e37c0..99a0a33ffa 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h
@@ -54,7 +54,11 @@ class MetaOptimizer : public GraphOptimizer {
       std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const;
   // Initialize active optimizers from RewriterConfig.custom_optimizers.
   Status InitializeCustomGraphOptimizers(
+      const std::set<string>& pre_initialized_optimizers,
       std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const;
+  // Returns the config for a custom graph optimizer. Null if none was found.
+  const RewriterConfig::CustomGraphOptimizer* GetCustomGraphOptimizerConfig(
+      const string& name) const;
 
   // Run optimization pass over a single GrapplerItem. Meta optimizer might run
   // multiple such passes: 1) for the main graph 2) for the function library
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index e74e0f7501..c477c4d4b1 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -71,6 +71,17 @@ class TestGraphOptimizer : public TestOptimizer {
 
 REGISTER_GRAPH_OPTIMIZER(TestGraphOptimizer);
 
+class TestOptimizerWithParams : public TestOptimizer {
+ public:
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    CHECK(config != nullptr);
+    return Status::OK();
+  }
+};
+
+REGISTER_GRAPH_OPTIMIZER(TestOptimizerWithParams);
+
 class MetaOptimizerTest : public GrapplerTest {};
 
 TEST_F(MetaOptimizerTest, RunsCustomOptimizer) {
@@ -90,6 +101,25 @@ TEST_F(MetaOptimizerTest, RunsCustomOptimizer) {
   EXPECT_TRUE(TestOptimizer::IsOptimized());
 }
 
+TEST_F(MetaOptimizerTest, RunsCustomOptimizerWithParams) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  TestOptimizer::SetOptimized(false);
+  RewriterConfig rewriter_config;
+  rewriter_config.add_optimizers("TestOptimizerWithParams");
+  auto* custom_config = rewriter_config.add_custom_optimizers();
+  custom_config->set_name("TestOptimizerWithParams");
+  (*custom_config->mutable_parameter_map())["foo"] = AttrValue();
+
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+  GraphDef output;
+  const Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  EXPECT_TRUE(TestOptimizer::IsOptimized());
+}
+
 TEST_F(MetaOptimizerTest, RunsCustomOptimizerAndCustomGraphOptimizer) {
   TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
   GrapplerItem item;
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index bb8f88336d..482178a540 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -143,8 +143,8 @@ message RewriterConfig {
   // not configurable (in contrast to memory optimization passes through the
   // meta-optimizer) and act only on manual op annotations.
   //
-  // Custom registered optimizers will be run after the base optimizers, in
-  // the order that they are specified.
+  // Custom optimizers (see custom_optimizers) that are not part of this
+  // schedule will be run after - in the order that they were specified.
   repeated string optimizers = 100;
 
   // Message to describe custom graph optimizer and its parameters
-- 
GitLab


From 3002b10e29363854c6fc20d788bc65233fd5116f Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 27 Sep 2018 10:25:58 -0700
Subject: [PATCH 0798/1357] Update L2HMC graph benchmark to be more similar to
 eager benchmark.

PiperOrigin-RevId: 214795331
---
 .../eager/python/examples/l2hmc/l2hmc_test.py | 162 ++++++++++--------
 1 file changed, 91 insertions(+), 71 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py
index c38a1597b8..1c925e455b 100644
--- a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py
+++ b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py
@@ -45,6 +45,17 @@ def step(dynamics, optimizer, samples):
   return loss, samples
 
 
+# To be defunnable, the function cannot return an Operation, so the above
+# function is used for defun or eager, and this function is used in graph to be
+# able to run the gradient updates.
+def graph_step(dynamics, optimizer, samples):
+  loss, grads, samples, _ = l2hmc.loss_and_grads(
+      dynamics, samples, loss_fn=l2hmc.compute_loss)
+  train_op = optimizer.apply_gradients(zip(grads, dynamics.variables))
+
+  return train_op, loss, samples
+
+
 def warmup(dynamics,
            optimizer,
            n_iters=1,
@@ -134,51 +145,48 @@ class L2hmcBenchmark(tf.test.Benchmark):
     """Benchmark Graph performance."""
 
     hparams = get_default_hparams()
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-      energy_fn, _, _ = l2hmc.get_scg_energy_fn()
-      dynamics = l2hmc.Dynamics(
-          x_dim=hparams.x_dim,
-          minus_loglikelihood_fn=energy_fn,
-          n_steps=hparams.n_steps,
-          eps=hparams.eps)
-      x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim])
-      loss, x_out, _ = l2hmc.compute_loss(dynamics, x)
-
-      global_step = tf.Variable(0., name="global_step", trainable=False)
-      learning_rate = tf.train.exponential_decay(
-          hparams.learning_rate, global_step, 1000, 0.96, staircase=True)
-      optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
-      train_op = optimizer.minimize(loss, global_step=global_step)
-
-      # Single thread; fairer comparison against eager
-      session_conf = tf.ConfigProto(
-          intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
-
-      with tf.Session(config=session_conf) as sess:
-        sess.run(tf.global_variables_initializer())
-
-        # Warmup to reduce initialization effect when timing
-        samples = npr.normal(size=[hparams.n_samples, hparams.x_dim])
-        for _ in range(hparams.n_warmup_iters):
-          _, _, _, _ = sess.run(
-              [x_out, loss, train_op, learning_rate], feed_dict={x: samples})
-
-        # Training
-        start_time = time.time()
-        for i in range(hparams.n_iters):
-          samples, loss_np, _, _ = sess.run(
-              [x_out, loss, train_op, learning_rate], feed_dict={x: samples})
-          print("Iteration %d: loss %.4f" % (i, loss_np))
-        wall_time = time.time() - start_time
-        examples_per_sec = hparams.n_samples / wall_time
-
-        self.report_benchmark(
-            name="graph_train_%s" % ("gpu"
-                                     if tf.test.is_gpu_available() else "cpu"),
-            iters=hparams.n_iters,
-            extras={"examples_per_sec": examples_per_sec},
-            wall_time=wall_time)
+    tf.enable_resource_variables()
+    for sample_size in [10, 25, 50, 100, 200]:
+      hparams.n_samples = sample_size
+      tf.reset_default_graph()
+      with tf.Graph().as_default():
+        energy_fn, _, _ = l2hmc.get_scg_energy_fn()
+        x = tf.random_normal([hparams.n_samples, hparams.x_dim],
+                             dtype=tf.float32)
+        dynamics = l2hmc.Dynamics(
+            x_dim=hparams.x_dim,
+            minus_loglikelihood_fn=energy_fn,
+            n_steps=hparams.n_steps,
+            eps=hparams.eps)
+        loss, _, _ = l2hmc.compute_loss(dynamics, x)
+
+        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
+        train_op, loss, _ = graph_step(dynamics, optimizer, x)
+
+        # Single thread; fairer comparison against eager
+        session_conf = tf.ConfigProto(inter_op_parallelism_threads=1)
+
+        with tf.Session(config=session_conf) as sess:
+          sess.run(tf.global_variables_initializer())
+
+          # Warmup to reduce initialization effect when timing
+          for _ in range(hparams.n_warmup_iters):
+            _, _ = sess.run([train_op, loss])
+
+          # Training
+          start_time = time.time()
+          for i in range(hparams.n_iters):
+            _, loss_np = sess.run([train_op, loss])
+            print("Iteration %d: loss %.4f" % (i, loss_np))
+          wall_time = (time.time() - start_time) / hparams.n_iters
+          examples_per_sec = hparams.n_samples / wall_time
+
+          self.report_benchmark(
+              name="graph_train_%s_%d" %
+              ("gpu" if tf.test.is_gpu_available() else "cpu", sample_size),
+              iters=hparams.n_iters,
+              extras={"examples_per_sec": examples_per_sec},
+              wall_time=wall_time)
 
   def benchmark_eager(self):
     self._benchmark_eager()
@@ -190,32 +198,44 @@ class L2hmcBenchmark(tf.test.Benchmark):
     """Benchmark Eager performance."""
 
     hparams = get_default_hparams()
-    energy_fn, _, _ = l2hmc.get_scg_energy_fn()
-    dynamics = l2hmc.Dynamics(
-        x_dim=hparams.x_dim,
-        minus_loglikelihood_fn=energy_fn,
-        n_steps=hparams.n_steps,
-        eps=hparams.eps)
-    optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
-    step_fn = tfe.defun(step) if defun else step
-
-    # Warmup to reduce initialization effect when timing
-    warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters, step_fn=step_fn)
-
-    # Training
-    samples = tf.random_normal(
-        shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32)
-    start_time = time.time()
-    fit(dynamics, samples, optimizer, step_fn=step_fn, n_iters=hparams.n_iters)
-    wall_time = time.time() - start_time
-    examples_per_sec = hparams.n_samples / wall_time
-
-    self.report_benchmark(
-        name="eager_train_%s%s" % ("gpu" if tf.test.is_gpu_available() else
-                                   "cpu", "_defun" if defun else ""),
-        iters=hparams.n_iters,
-        extras={"examples_per_sec": examples_per_sec},
-        wall_time=wall_time)
+    for sample_size in [10, 25, 50, 100, 200]:
+      hparams.n_samples = sample_size
+      energy_fn, _, _ = l2hmc.get_scg_energy_fn()
+      dynamics = l2hmc.Dynamics(
+          x_dim=hparams.x_dim,
+          minus_loglikelihood_fn=energy_fn,
+          n_steps=hparams.n_steps,
+          eps=hparams.eps)
+      optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
+      step_fn = tfe.defun(step) if defun else step
+
+      # Warmup to reduce initialization effect when timing
+      warmup(
+          dynamics,
+          optimizer,
+          n_iters=hparams.n_warmup_iters,
+          n_samples=hparams.n_samples,
+          step_fn=step_fn)
+
+      # Training
+      samples = tf.random_normal(
+          shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32)
+      start_time = time.time()
+      fit(dynamics,
+          samples,
+          optimizer,
+          step_fn=step_fn,
+          n_iters=hparams.n_iters)
+      wall_time = (time.time() - start_time) / hparams.n_iters
+      examples_per_sec = hparams.n_samples / wall_time
+
+      self.report_benchmark(
+          name="eager_train_%s%s_%d" %
+          ("gpu" if tf.test.is_gpu_available() else "cpu",
+           "_defun" if defun else "", sample_size),
+          iters=hparams.n_iters,
+          extras={"examples_per_sec": examples_per_sec},
+          wall_time=wall_time)
 
     del dynamics
 
-- 
GitLab


From 334244be6864dd1dbec9bc8bb4996cc286a8e3e3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 10:31:36 -0700
Subject: [PATCH 0799/1357] Add tf.strings.unicode_script, which detects the
 script of a unicode codepoint based on standard ranges.

PiperOrigin-RevId: 214796357
---
 .../base_api/api_def_UnicodeScript.pbtxt      | 28 ++++++
 .../python_api/api_def_UnicodeScript.pbtxt    |  6 ++
 tensorflow/core/kernels/BUILD                 | 12 +++
 tensorflow/core/kernels/unicode_script_op.cc  | 53 +++++++++++
 tensorflow/core/ops/string_ops.cc             |  5 ++
 tensorflow/python/kernel_tests/BUILD          | 12 +++
 .../kernel_tests/unicode_script_op_test.py    | 57 ++++++++++++
 .../api/golden/v1/tensorflow.strings.pbtxt    |  4 +
 .../api/golden/v2/tensorflow.strings.pbtxt    |  4 +
 tensorflow/tools/lib_package/BUILD            |  2 +
 tensorflow/tools/pip_package/BUILD            |  1 +
 tensorflow/workspace.bzl                      |  2 +
 third_party/icu/BUILD                         |  1 +
 third_party/icu/BUILD.bazel                   | 88 +++++++++++++++++++
 third_party/icu/workspace.bzl                 | 15 ++++
 15 files changed, 290 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_UnicodeScript.pbtxt
 create mode 100644 tensorflow/core/kernels/unicode_script_op.cc
 create mode 100644 tensorflow/python/kernel_tests/unicode_script_op_test.py
 create mode 100644 third_party/icu/BUILD
 create mode 100644 third_party/icu/BUILD.bazel
 create mode 100644 third_party/icu/workspace.bzl

diff --git a/tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt
new file mode 100644
index 0000000000..7898fe8d6b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt
@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "UnicodeScript"
+  endpoint {
+    name: "UnicodeScript"
+  }
+  in_arg {
+    name: "input"
+    description: <<END
+A Tensor of int32 Unicode code points.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A Tensor of int32 script codes corresponding to each input code point.
+END
+  }
+  summary: <<END
+Determine the script codes of a given tensor of Unicode integer code points.
+END
+  description: <<END
+This operation converts Unicode code points to script codes corresponding to
+each code point. Script codes correspond to International Components for
+Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
+Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
+match input shape.
+END
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_UnicodeScript.pbtxt b/tensorflow/core/api_def/python_api/api_def_UnicodeScript.pbtxt
new file mode 100644
index 0000000000..a884a46143
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_UnicodeScript.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnicodeScript"
+  endpoint {
+    name: "strings.unicode_script"
+  }
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0534b1829d..0b8e9ec527 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4431,6 +4431,7 @@ cc_library(
         ":string_strip_op",
         ":string_to_hash_bucket_op",
         ":substr_op",
+        ":unicode_script_op",
     ],
 )
 
@@ -5471,6 +5472,7 @@ filegroup(
             "batch_kernels.*",
             "regex_full_match_op.cc",
             "regex_replace_op.cc",
+            "unicode_script_op.cc",
             # Ops that are inherently incompatible with Android (e.g. tied to x86 platform).
             "mkl_*",
             "xsmm_*",
@@ -6565,6 +6567,16 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "unicode_script_op",
+    srcs = ["unicode_script_op.cc"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:string_ops_op_lib",
+        "@icu//:common",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 # Google-internal targets.  These must be at the end for syncrepo.
 
diff --git a/tensorflow/core/kernels/unicode_script_op.cc b/tensorflow/core/kernels/unicode_script_op.cc
new file mode 100644
index 0000000000..085e397eba
--- /dev/null
+++ b/tensorflow/core/kernels/unicode_script_op.cc
@@ -0,0 +1,53 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "unicode/errorcode.h"  // TF:icu
+#include "unicode/uscript.h"  // TF:icu
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+
+class UnicodeScriptOp : public OpKernel {
+ public:
+  explicit UnicodeScriptOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor* input_tensor;
+    OP_REQUIRES_OK(context, context->input("input", &input_tensor));
+    const auto& input_flat = input_tensor->flat<int32>();
+
+    Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output("output", input_tensor->shape(),
+                                            &output_tensor));
+    auto output_flat = output_tensor->flat<int32>();
+
+    icu::ErrorCode status;
+    for (int i = 0; i < input_flat.size(); i++) {
+      UScriptCode script_code = uscript_getScript(input_flat(i), status);
+      if (status.isSuccess()) {
+        output_flat(i) = script_code;
+      } else {
+        output_flat(i) = -1;
+        status.reset();
+      }
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("UnicodeScript").Device(DEVICE_CPU),
+                        UnicodeScriptOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index da1d2a6432..b4fbde54d9 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -244,4 +244,9 @@ REGISTER_OP("Substr")
       return shape_inference::BroadcastBinaryOpShapeFn(c);
     });
 
+REGISTER_OP("UnicodeScript")
+    .Input("input: int32")
+    .Output("output: int32")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 5183e4d30c..c2e36e5e19 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1097,6 +1097,18 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "unicode_script_op_test",
+    size = "small",
+    srcs = ["unicode_script_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:string_ops",
+    ],
+)
+
 cuda_py_test(
     name = "topk_op_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/unicode_script_op_test.py b/tensorflow/python/kernel_tests/unicode_script_op_test.py
new file mode 100644
index 0000000000..927e5459ed
--- /dev/null
+++ b/tensorflow/python/kernel_tests/unicode_script_op_test.py
@@ -0,0 +1,57 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+"""Functional tests for UnicodeScript op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+
+
+class UnicodeScriptOpTest(test.TestCase):
+
+  def testValidScripts(self):
+    inputs = [
+        ord("a"),
+        0x0411,  # CYRILLIC CAPITAL LETTER BE
+        0x82b8,  # CJK UNIFIED IDEOGRAPH-82B8
+        ord(",")
+    ]
+    with self.cached_session():
+      input_vector = constant_op.constant(inputs, dtypes.int32)
+      outputs = string_ops.unicode_script(input_vector).eval()
+      self.assertAllEqual(
+          outputs,
+          [
+              25,  # USCRIPT_LATIN (LATN)
+              8,  # USCRIPT_CYRILLIC (CYRL)
+              17,  # USCRIPT_HAN (HANI)
+              0  # USCRIPT_COMMON (ZYYY)
+          ])
+
+  def testInvalidScript(self):
+    inputs = [-100, 0xffffff]
+    with self.cached_session():
+      input_vector = constant_op.constant(inputs, dtypes.int32)
+      outputs = string_ops.unicode_script(input_vector).eval()
+      self.assertAllEqual(outputs, [-1, -1])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index c52581dec1..312e94b41d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -48,4 +48,8 @@ tf_module {
     name: "to_number"
     argspec: "args=[\'string_tensor\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "unicode_script"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index c52581dec1..312e94b41d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -48,4 +48,8 @@ tf_module {
     name: "to_number"
     argspec: "args=[\'string_tensor\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "unicode_script"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index b450bc42c5..095ac1f4cc 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -125,6 +125,7 @@ genrule(
         "@gemmlowp//:LICENSE",
         "@gif_archive//:COPYING",
         "@highwayhash//:LICENSE",
+        "@icu//:icu4c/LICENSE",
         "@jpeg//:LICENSE.md",
         "@llvm//:LICENSE.TXT",
         "@lmdb//:LICENSE",
@@ -192,6 +193,7 @@ genrule(
         "@gemmlowp//:LICENSE",
         "@gif_archive//:COPYING",
         "@highwayhash//:LICENSE",
+        "@icu//:icu4j/main/shared/licenses/LICENSE",
         "@jpeg//:LICENSE.md",
         "@llvm//:LICENSE.TXT",
         "@lmdb//:LICENSE",
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 9d816f0672..cce60ccea0 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -153,6 +153,7 @@ filegroup(
         "@gemmlowp//:LICENSE",
         "@gif_archive//:COPYING",
         "@highwayhash//:LICENSE",
+        "@icu//:icu4c/LICENSE",
         "@jpeg//:LICENSE.md",
         "@lmdb//:LICENSE",
         "@local_config_sycl//sycl:LICENSE.text",
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 4bf2ff3fb5..e5a0a0b2b7 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -21,9 +21,11 @@ load(
     "def_file_filter_configure",
 )
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
+load("//third_party/icu:workspace.bzl", icu = "repo")
 
 def initialize_third_party():
     flatbuffers()
+    icu()
 
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
diff --git a/third_party/icu/BUILD b/third_party/icu/BUILD
new file mode 100644
index 0000000000..82bab3ffd9
--- /dev/null
+++ b/third_party/icu/BUILD
@@ -0,0 +1 @@
+# This empty BUILD file is required to make Bazel treat this directory as a package.
diff --git a/third_party/icu/BUILD.bazel b/third_party/icu/BUILD.bazel
new file mode 100644
index 0000000000..36d6b9006b
--- /dev/null
+++ b/third_party/icu/BUILD.bazel
@@ -0,0 +1,88 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files([
+    "icu4c/LICENSE",
+    "icu4j/main/shared/licenses/LICENSE",
+])
+
+cc_library(
+    name = "headers",
+    hdrs = glob(["icu4c/source/common/unicode/*.h"]),
+    includes = [
+        "icu4c/source/common",
+    ],
+    deps = [
+    ],
+)
+
+cc_library(
+    name = "common",
+    hdrs = glob(["icu4c/source/common/unicode/*.h"]),
+    includes = [
+        "icu4c/source/common",
+    ],
+    deps = [
+        ":icuuc",
+    ],
+)
+
+cc_library(
+    name = "icuuc",
+    srcs = glob(
+        [
+            "icu4c/source/common/*.c",
+            "icu4c/source/common/*.cpp",
+            "icu4c/source/stubdata/*.cpp",
+        ],
+    ),
+    hdrs = glob([
+        "icu4c/source/common/*.h",
+    ]),
+    copts = [
+        "-DU_COMMON_IMPLEMENTATION",
+        "-DU_HAVE_STD_ATOMICS",
+    ] + select({
+        ":android": [
+            "-fdata-sections",
+            "-DGOOGLE_VENDOR_SRC_BRANCH",
+            "-DU_HAVE_NL_LANGINFO_CODESET=0",
+            "-Wno-deprecated-declarations",
+        ],
+        ":apple": [
+            "-DGOOGLE_VENDOR_SRC_BRANCH",
+            "-Wno-shorten-64-to-32",
+            "-Wno-unused-variable",
+        ],
+        ":windows": [
+            "/utf-8",
+            "/DLOCALE_ALLOW_NEUTRAL_NAMES=0",
+        ],
+        "//conditions:default": [],
+    }),
+    tags = ["requires-rtti"],
+    visibility = [
+        "//visibility:private",
+    ],
+    deps = [
+        ":headers",
+    ],
+)
+
+config_setting(
+    name = "android",
+    values = {"crosstool_top": "//external:android/crosstool"},
+)
+
+config_setting(
+    name = "apple",
+    values = {"cpu": "darwin"},
+)
+
+config_setting(
+    name = "windows",
+    values = {"cpu": "x64_windows"},
+)
diff --git a/third_party/icu/workspace.bzl b/third_party/icu/workspace.bzl
new file mode 100644
index 0000000000..bfebf4219b
--- /dev/null
+++ b/third_party/icu/workspace.bzl
@@ -0,0 +1,15 @@
+"""Loads a lightweight subset of the ICU library for Unicode processing."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "icu",
+        strip_prefix = "icu-release-62-1",
+        sha256 = "e15ffd84606323cbad5515bf9ecdf8061cc3bf80fb883b9e6aa162e485aa9761",
+        urls = [
+            "https://mirror.bazel.build/github.com/unicode-org/icu/archive/release-62-1.tar.gz",
+            "https://github.com/unicode-org/icu/archive/release-62-1.tar.gz",
+        ],
+        build_file = "//third_party/icu:BUILD.bazel",
+    )
-- 
GitLab


From 6d41787c32483b28f8c93973f28d4d078ea0b37e Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 27 Sep 2018 10:53:36 -0700
Subject: [PATCH 0800/1357] Add opaque field to custom call. The intent of this
 field is to enable more information to be encoded in the custom call and
 passed through to the backend.

PiperOrigin-RevId: 214800539
---
 tensorflow/compiler/xla/client/xla_builder.cc |  8 ++++---
 tensorflow/compiler/xla/client/xla_builder.h  | 24 +++++++++++--------
 tensorflow/compiler/xla/service/hlo.proto     |  8 +++++--
 .../compiler/xla/service/hlo_instruction.cc   |  9 +++----
 .../compiler/xla/service/hlo_instruction.h    |  5 ++--
 .../compiler/xla/service/hlo_instructions.cc  | 14 ++++++++---
 .../compiler/xla/service/hlo_instructions.h   |  8 +++++--
 tensorflow/compiler/xla/service/hlo_parser.cc |  7 ++++--
 .../compiler/xla/service/hlo_parser_test.cc   | 12 ++++++++++
 9 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 95ff6432a5..5277de6a85 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -1278,7 +1278,7 @@ XlaOp XlaBuilder::AfterAll(absl::Span<const XlaOp> tokens) {
 
 XlaOp XlaBuilder::CustomCall(const string& call_target_name,
                              absl::Span<const XlaOp> operands,
-                             const Shape& shape) {
+                             const Shape& shape, const string& opaque) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
     if (absl::StartsWith(call_target_name, "$")) {
@@ -1289,6 +1289,7 @@ XlaOp XlaBuilder::CustomCall(const string& call_target_name,
     }
     *instr.mutable_shape() = shape;
     instr.set_custom_call_target(call_target_name);
+    instr.set_custom_call_opaque(opaque);
     return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands);
   });
 }
@@ -2681,8 +2682,9 @@ XlaOp Call(XlaBuilder* builder, const XlaComputation& computation,
 }
 
 XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
-                 absl::Span<const XlaOp> operands, const Shape& shape) {
-  return builder->CustomCall(call_target_name, operands, shape);
+                 absl::Span<const XlaOp> operands, const Shape& shape,
+                 const string& opaque) {
+  return builder->CustomCall(call_target_name, operands, shape, opaque);
 }
 
 XlaOp Complex(const XlaOp& real, const XlaOp& imag,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index d0c59fa6f2..1da6ddd318 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -577,11 +577,9 @@ class XlaBuilder {
              absl::Span<const XlaOp> operands);
 
   // Enqueues a custom call instruction onto the computation.
-  // During code generation, a call instruction is emitted which targets a
-  // symbol with the name |call_target_name|.  The |operands| are passed to the
-  // call instruction.  |shape| is the resultant shape.
   XlaOp CustomCall(const string& call_target_name,
-                   absl::Span<const XlaOp> operands, const Shape& shape);
+                   absl::Span<const XlaOp> operands, const Shape& shape,
+                   const string& opaque);
 
   // The following methods enqueue element-wise binary arithmetic operations
   // onto the computation. The shapes of the operands have to match unless one
@@ -1195,7 +1193,8 @@ class XlaBuilder {
   friend XlaOp Call(XlaBuilder* builder, const XlaComputation& computation,
                     absl::Span<const XlaOp> operands);
   friend XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
-                          absl::Span<const XlaOp> operands, const Shape& shape);
+                          absl::Span<const XlaOp> operands, const Shape& shape,
+                          const string& opaque);
   friend XlaOp Complex(const XlaOp& real, const XlaOp& imag,
                        absl::Span<const int64> broadcast_dimensions);
   friend XlaOp Conj(const XlaOp& operand);
@@ -1717,12 +1716,17 @@ XlaOp OutfeedWithToken(const XlaOp& operand, const XlaOp& token,
 XlaOp Call(XlaBuilder* builder, const XlaComputation& computation,
            absl::Span<const XlaOp> operands);
 
-// Enqueues a custom call instruction onto the computation.
-// During code generation, a call instruction is emitted which targets a
-// symbol with the name |call_target_name|.  The |operands| are passed to the
-// call instruction.  |shape| is the resultant shape.
+// Enqueues a custom call instruction onto the computation. A custom call
+// invokes code external to XLA. The |operands| are passed to the external code,
+// and the external code is expected to produce a result of the given
+// |shape|. The exact mechanism is backend-specific. For example, in the CPU
+// backend, a call instruction is emitted which targets a symbol with the name
+// |call_target_name|.  |call_target_name| and |opaque| can arbitrary strings,
+// but |call_target_name| should be short as it may be used in labels. |opaque|
+// can encode arbitrarily large amounts of information.
 XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
-                 absl::Span<const XlaOp> operands, const Shape& shape);
+                 absl::Span<const XlaOp> operands, const Shape& shape,
+                 const string& opaque = "");
 
 // The following methods enqueue element-wise binary arithmetic operations
 // onto the computation. The shapes of the operands have to match unless one
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index b19ec12638..caaca16f71 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto";
 option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
-// Next ID: 53
+// Next ID: 54
 message HloInstructionProto {
   reserved 10;
   reserved "parameter_name";
@@ -124,9 +124,13 @@ message HloInstructionProto {
   // The string representation of the infeed configuration.
   bytes infeed_config = 27;
 
-  // Name of a global symbol to call, only present for kCustomCall.
+  // Name of a external target (eg, global symbol) to call, only present for
+  // kCustomCall.
   string custom_call_target = 28;
 
+  // Opaque string, only present for kCustomCall.
+  string custom_call_opaque = 53;
+
   // Shape of outfeed request.
   xla.Shape outfeed_shape = 29;
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index f7ec854d80..23787dbc8a 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -379,7 +379,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       break;
     case HloOpcode::kCustomCall:
       instruction = CreateCustomCall(proto.shape(), all_operands(),
-                                     proto.custom_call_target());
+                                     proto.custom_call_target(),
+                                     proto.custom_call_opaque());
       if (proto.has_window()) {
         static_cast<HloCustomCallInstruction*>(instruction.get())
             ->set_window(proto.window());
@@ -1108,9 +1109,9 @@ bool HloInstruction::HasSideEffect() const {
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateCustomCall(
     const Shape& shape, absl::Span<HloInstruction* const> operands,
-    absl::string_view custom_call_target) {
-  return absl::make_unique<HloCustomCallInstruction>(shape, operands,
-                                                     custom_call_target);
+    absl::string_view custom_call_target, absl::string_view opaque) {
+  return absl::make_unique<HloCustomCallInstruction>(
+      shape, operands, custom_call_target, opaque);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateTuple(
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index d615df0831..009bd3bab3 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -718,10 +718,11 @@ class HloInstruction {
       HloComputation* computation);
 
   // Creates a custom call instruction that applies the given custom call target
-  // to the given operands. "shape" is the resultant shape.
+  // to the given operands. "opaque" can be an arbitrary string with a
+  // backend-specific interpretation. "shape" is the resultant shape.
   static std::unique_ptr<HloInstruction> CreateCustomCall(
       const Shape& shape, absl::Span<HloInstruction* const> operands,
-      absl::string_view custom_call_target);
+      absl::string_view custom_call_target, absl::string_view opaque = "");
 
   // Creates a tuple instruction with the given elements. This is a convenience
   // wrapper around CreateVariadic.
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index e92882c22a..cd71bc3323 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1830,9 +1830,10 @@ HloSelectAndScatterInstruction::CloneWithNewOperandsImpl(
 
 HloCustomCallInstruction::HloCustomCallInstruction(
     const Shape& shape, absl::Span<HloInstruction* const> operands,
-    absl::string_view custom_call_target)
+    absl::string_view custom_call_target, absl::string_view opaque)
     : HloInstruction(HloOpcode::kCustomCall, shape),
       custom_call_target_(custom_call_target.begin(), custom_call_target.end()),
+      opaque_(opaque.begin(), opaque.end()),
       feature_group_count_(1) {
   for (auto operand : operands) {
     AppendOperand(operand);
@@ -1849,6 +1850,7 @@ HloInstructionProto HloCustomCallInstruction::ToProto() const {
         *convolution_dimension_numbers_;
   }
   proto.set_custom_call_target(custom_call_target_);
+  proto.set_custom_call_opaque(opaque_);
   proto.set_feature_group_count(feature_group_count_);
   return proto;
 }
@@ -1872,6 +1874,11 @@ std::vector<string> HloCustomCallInstruction::ExtraAttributesToStringImpl(
   // an HloComputation.
   extra.push_back(
       StrCat("custom_call_target=\"", CEscape(custom_call_target_), "\""));
+  // If the opaque string becomes enormous we may want to reconsider printing
+  // this inline and consider other options.
+  if (!opaque_.empty()) {
+    extra.push_back(StrCat("opaque=\"", CEscape(opaque_), "\""));
+  }
   return extra;
 }
 
@@ -1897,7 +1904,8 @@ bool HloCustomCallInstruction::IdenticalSlowPath(
   if (feature_group_count_ != casted_other.feature_group_count_) {
     return false;
   }
-  return custom_call_target_ == casted_other.custom_call_target_;
+  return custom_call_target_ == casted_other.custom_call_target_ &&
+         opaque_ == casted_other.opaque_;
 }
 
 std::unique_ptr<HloInstruction>
@@ -1905,7 +1913,7 @@ HloCustomCallInstruction::CloneWithNewOperandsImpl(
     const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   auto cloned = absl::make_unique<HloCustomCallInstruction>(
-      shape, new_operands, custom_call_target());
+      shape, new_operands, custom_call_target(), opaque());
   if (window_ != nullptr) {
     cloned->set_window(*window_);
   }
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 2d7bc83855..9c22f5db7e 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -1070,7 +1070,8 @@ class HloCustomCallInstruction : public HloInstruction {
  public:
   explicit HloCustomCallInstruction(const Shape& shape,
                                     absl::Span<HloInstruction* const> operands,
-                                    absl::string_view custom_call_target);
+                                    absl::string_view custom_call_target,
+                                    absl::string_view opaque);
   const Window& window() const override {
     CHECK(window_ != nullptr);
     return *window_;
@@ -1090,6 +1091,7 @@ class HloCustomCallInstruction : public HloInstruction {
     convolution_dimension_numbers_ =
         absl::make_unique<ConvolutionDimensionNumbers>(dnums);
   }
+  const string& opaque() const { return opaque_; }
   const string& custom_call_target() const { return custom_call_target_; }
   void set_feature_group_count(int64 feature_group_count) {
     feature_group_count_ = feature_group_count;
@@ -1109,8 +1111,10 @@ class HloCustomCallInstruction : public HloInstruction {
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
       const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
-  // Name of a global symbol to call, only present for kCustomCall.
+  // Name of a global symbol to call.
   string custom_call_target_;
+  // Opaque string interpreted by the backend.
+  string opaque_;
   // Describes the window in a windowed operation such as convolution.
   std::unique_ptr<Window> window_;
   // Describes the dimension numbers used for a convolution.
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 37197b273b..25b70740e3 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -1266,11 +1266,13 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     }
     case HloOpcode::kCustomCall: {
       optional<string> custom_call_target;
+      optional<string> opaque;
       optional<Window> window;
       optional<ConvolutionDimensionNumbers> dnums;
       optional<int64> feature_group_count;
       attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString,
                                      &custom_call_target};
+      attrs["opaque"] = {/*required=*/false, AttrTy::kString, &opaque};
       attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window};
       attrs["dim_labels"] = {/*required=*/false,
                              AttrTy::kConvolutionDimensionNumbers, &dnums};
@@ -1279,8 +1281,9 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
       if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
         return false;
       }
-      instruction = builder->AddInstruction(HloInstruction::CreateCustomCall(
-          shape, operands, *custom_call_target));
+      instruction = builder->AddInstruction(
+          HloInstruction::CreateCustomCall(shape, operands, *custom_call_target,
+                                           opaque.has_value() ? *opaque : ""));
       if (window.has_value()) {
         instruction->set_window(*window);
       }
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index cca50fab54..96db96bdb9 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1002,6 +1002,18 @@ ENTRY CustomCall {
   ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar"
 }
 
+)"
+},
+// CustomCall with opaque value.
+{
+"CustomCallWithOpaque",
+R"(HloModule custom_call
+
+ENTRY CustomCall {
+  constant = f32[1]{0} constant({12345})
+  ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar", opaque="this string is opaque"
+}
+
 )"
 },
 // Variables with non-default names
-- 
GitLab


From dcf72802384fdab6744d3c16577091a82bc2cce0 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 27 Sep 2018 11:01:56 -0700
Subject: [PATCH 0801/1357] Clean up unused members in DirectSession and
 Executor.

PiperOrigin-RevId: 214802032
---
 .../core/common_runtime/direct_session.cc     |  8 ++++----
 .../core/common_runtime/direct_session.h      | 20 ++++++++-----------
 tensorflow/core/common_runtime/executor.h     |  6 ------
 3 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index af5d5b17e7..841181f8c3 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -363,7 +363,7 @@ Status DirectSession::MaybeInitializeExecutionState(
 Status DirectSession::Create(const GraphDef& graph) {
   TF_RETURN_IF_ERROR(init_error_);
   if (graph.node_size() > 0) {
-    mutex_lock l(graph_def_lock_);
+    mutex_lock l(graph_state_lock_);
     if (graph_created_) {
       return errors::AlreadyExists(
           "A Graph has already been created for this session.");
@@ -375,7 +375,7 @@ Status DirectSession::Create(const GraphDef& graph) {
 
 Status DirectSession::Extend(const GraphDef& graph) {
   TF_RETURN_IF_ERROR(CheckNotClosed());
-  mutex_lock l(graph_def_lock_);
+  mutex_lock l(graph_state_lock_);
   return ExtendLocked(graph);
 }
 
@@ -1172,7 +1172,7 @@ Status DirectSession::CreateExecutors(
 
   int graph_def_version;
   {
-    mutex_lock l(graph_def_lock_);
+    mutex_lock l(graph_state_lock_);
     graph_def_version =
         execution_state_->original_graph_def().versions().producer();
   }
@@ -1400,7 +1400,7 @@ Status DirectSession::CreateGraphs(
     std::unique_ptr<FunctionLibraryDefinition>* flib_def,
     RunStateArgs* run_state_args, DataTypeVector* input_types,
     DataTypeVector* output_types, int64* collective_graph_key) {
-  mutex_lock l(graph_def_lock_);
+  mutex_lock l(graph_state_lock_);
   std::unique_ptr<ClientGraph> client_graph;
 
   std::unique_ptr<GraphExecutionState> temp_exec_state_holder;
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index c2cf3c7fd7..4a6a921ea7 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -215,7 +215,7 @@ class DirectSession : public Session {
   // if not already initialized.
   Status MaybeInitializeExecutionState(const GraphDef& graph,
                                        bool* out_already_initialized)
-      EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
 
   // Retrieves an already existing set of executors to run 'inputs' and
   // 'outputs', or creates and caches them for future use.
@@ -248,7 +248,7 @@ class DirectSession : public Session {
                                    RunMetadata* run_metadata);
 
   ::tensorflow::Status ExtendLocked(const GraphDef& graph)
-      EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
 
   ::tensorflow::Status ResourceHandleToInputTensor(
       const Tensor& resource_tensor, Tensor* retrieved_tensor);
@@ -289,7 +289,7 @@ class DirectSession : public Session {
   }
 
   ::tensorflow::Status CheckGraphCreated(const char* method) {
-    mutex_lock l(graph_def_lock_);
+    mutex_lock l(graph_state_lock_);
     if (!graph_created_) {
       return errors::InvalidArgument(
           "Session was not created with a graph before ", method, "!");
@@ -313,10 +313,8 @@ class DirectSession : public Session {
   DeviceSet device_set_;
 
   string session_handle_;
-  bool graph_created_ GUARDED_BY(graph_def_lock_) = false;
-
-  mutex graph_def_lock_;
-  GraphDef graph_def_ GUARDED_BY(graph_def_lock_);
+  mutex graph_state_lock_;
+  bool graph_created_ GUARDED_BY(graph_state_lock_) = false;
 
   // The thread-pools to use for running ops, with a bool indicating if the pool
   // is owned.
@@ -367,11 +365,11 @@ class DirectSession : public Session {
   // nodes can not be moved to a different device.  Maps node names to
   // device names.
   std::unordered_map<string, string> stateful_placements_
-      GUARDED_BY(graph_def_lock_);
+      GUARDED_BY(graph_state_lock_);
 
   // Execution_state; used when placing the entire graph.
   std::unique_ptr<GraphExecutionState> execution_state_
-      GUARDED_BY(graph_def_lock_);
+      GUARDED_BY(graph_state_lock_);
 
   // The function library, before any rewrites or optimizations have been
   // performed. In particular, CreateGraphs() may need to modify the function
@@ -386,7 +384,7 @@ class DirectSession : public Session {
   std::atomic<int64> edge_name_counter_ = {0};
   std::atomic<int64> handle_name_counter_ = {0};
 
-  // For generating step ids that are unique across all sessions.
+  // For generating step ids that are unique across this sessions.
   static std::atomic_int_fast64_t step_id_counter_;
 
   // Global timeout for all blocking operations in this session.
@@ -395,8 +393,6 @@ class DirectSession : public Session {
   // Manages all the cost models for the graphs executed in this session.
   CostModelManager cost_model_manager_;
 
-  Executor::Args::NodeOutputsCallback node_outputs_callback_ = nullptr;
-
   // For testing collective graph key generation.
   mutex collective_graph_key_lock_;
   int64 collective_graph_key_ GUARDED_BY(collective_graph_key_lock_) = -1;
diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index 6cd4fd22ea..34bf73972f 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -97,12 +97,6 @@ class Executor {
     typedef std::function<void()> Closure;
     typedef std::function<void(Closure)> Runner;
     Runner runner = nullptr;
-
-    // A callback that is invoked each time a node has finished executing.
-    typedef std::function<Status(const string& node_name, const int output_slot,
-                                 const Tensor* tensor, const bool is_ref,
-                                 OpKernelContext* ctx)>
-        NodeOutputsCallback;
   };
   typedef std::function<void(const Status&)> DoneCallback;
   virtual void RunAsync(const Args& args, DoneCallback done) = 0;
-- 
GitLab


From 50b94fa1d50a916eaf7a5a46d93260e9b0f93554 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Thu, 27 Sep 2018 11:07:09 -0700
Subject: [PATCH 0802/1357] Internal change

PiperOrigin-RevId: 214803223
---
 tensorflow/contrib/fused_conv/BUILD           |  35 +-
 .../fused_conv2d_bias_activation_op.cc        |   4 +-
 .../fused_conv2d_bias_activation_op_test.py   | 891 +----------------
 ...sed_conv2d_bias_activation_op_test_base.py | 945 ++++++++++++++++++
 .../tools/pip_package/pip_smoke_test.py       |   1 +
 5 files changed, 985 insertions(+), 891 deletions(-)
 create mode 100644 tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py

diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD
index 9725233e7f..490da9b33b 100644
--- a/tensorflow/contrib/fused_conv/BUILD
+++ b/tensorflow/contrib/fused_conv/BUILD
@@ -17,11 +17,14 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_kernel_library",
+    "tf_custom_op_library",
+    "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+)
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
-load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
-load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 tf_custom_op_py_library(
@@ -109,12 +112,13 @@ tf_gen_op_wrapper_py(
     deps = [":fused_conv2d_bias_activation_op_op_lib"],
 )
 
-cuda_py_test(
-    name = "fused_conv2d_bias_activation_op_test",
-    srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"],
-    additional_deps = [
+py_library(
+    name = "fused_conv2d_bias_activation_op_test_base",
+    testonly = 1,
+    srcs = ["python/ops/fused_conv2d_bias_activation_op_test_base.py"],
+    visibility = ["//tensorflow/compiler/tf2xla:internal"],
+    deps = [
         ":fused_conv_py",
-        "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client",
         "//tensorflow/python:client_testlib",
@@ -127,8 +131,21 @@ cuda_py_test(
         "//tensorflow/python:random_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+cuda_py_test(
+    name = "fused_conv2d_bias_activation_op_test",
+    size = "large",
+    srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"],
+    additional_deps = [
+        ":fused_conv2d_bias_activation_op_test_base",
+        "//tensorflow/python:client_testlib",
     ],
     tags = [
+        "no_pip",
         "requires-gpu-sm70",
     ],
 )
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index e9e6464d06..93b1aaa85e 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -111,8 +111,8 @@ class FusedConv2DBiasActivationOp : public OpKernel {
         context,
         (GetTensorDim(strides, data_format_, 'N') == 1 &&
          GetTensorDim(strides, data_format_, 'C') == 1),
-        errors::InvalidArgument("Convolutional strides are not supported in "
-                                "the batch or depth dimensions."));
+        errors::Unimplemented("Convolutional strides are not supported in "
+                              "the batch and depth dimensions."));
 
     // Assuming qint8 <--> NCHW_VECT_C, OIHW_VECT_I (int8x4) here.
     constexpr bool is_int8x4 = std::is_same<T, qint8>::value;
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
index 4894298694..e5c8a34fc1 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
@@ -12,896 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Functional tests for fused conv2d bias and activation operation."""
+
+"""Tests for fused convolutions."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors_impl
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op_test_base as test_base
 from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging
-
-
-def GetShrunkInceptionShapes(shrink=10):
-  """Iterator for smaller versions of convolution shapes in 2015 Inception.
-
-  Relative to inception, each depth value is `depth // shrink`.
-
-  Args:
-    shrink: Factor to shrink each depth value by relative to Inception.
-
-  Yields:
-    Tuple (input_size, filter_size, out_size, stride, padding), the convolution
-    parameters of Inception layers.
-  """
-  input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [
-      4, 8, 8, 2048
-  ], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [
-      4, 8, 8, 1760
-  ], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [
-      4, 17, 17, 192
-  ], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [
-      4, 17, 17, 192
-  ], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [
-      4, 17, 17, 192
-  ], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [
-      4, 17, 17, 160
-  ], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024],
-                 [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [
-                     4, 17, 17, 768
-                 ], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768],
-                 [4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [
-                     4, 35, 35, 64
-                 ], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [
-                     4, 35, 35, 256
-                 ], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [
-                     4, 35, 35, 192
-                 ], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]]
-  filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [
-      1, 1, 2048, 192
-  ], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384],
-                  [1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [
-                      1, 1, 1760, 320
-                  ], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [
-                      3, 3, 128, 320
-                  ], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [
-                      1, 3, 192, 256
-                  ], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [
-                      3, 3, 192, 224
-                  ], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [
-                      3, 1, 192, 192
-                  ], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [
-                      1, 3, 128, 192
-                  ], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [
-                      3, 1, 128, 128
-                  ], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [
-                      1, 1, 768, 128
-                  ], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [
-                      3, 3, 64, 96
-                  ], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64],
-                  [1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [
-                      1, 1, 192, 64
-                  ], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64,
-                                                        64], [1, 1, 24, 64]]
-  out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [
-      4, 8, 8, 384
-  ], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [
-      4, 8, 8, 192
-  ], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [
-      4, 17, 17, 192
-  ], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [
-      4, 17, 17, 256
-  ], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [
-      4, 17, 17, 192
-  ], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [
-      4, 17, 17, 160
-  ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [
-      4, 17, 17, 256
-  ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [
-      4, 17, 17, 128
-  ], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [
-      4, 35, 35, 64
-  ], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96],
-               [4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48],
-               [4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]]
-  strides = [
-      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
-      1, 1, 1, 1, 1
-  ]
-  # Shrink sizes to make the test faster
-  for i in input_sizes:
-    i[3] //= shrink
-  for f in filter_sizes:
-    f[2] //= shrink
-    f[3] //= shrink
-  for o in out_sizes:
-    o[3] //= shrink
-  # pylint: disable=invalid-name
-  VALID = "VALID"
-  SAME = "SAME"
-  # pylint: enable=invalid-name
-  paddings = [
-      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
-      VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
-      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
-      SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME,
-      SAME, SAME, SAME, SAME, VALID, VALID, VALID
-  ]
-  for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides,
-                           paddings):
-    yield i, f, o, s, p
-
-
-def GetTestConfigs():
-  """Get all the valid tests configs to run.
-
-  Returns:
-    all the valid test configs as tuples of data_format and use_gpu.
-  """
-  test_configs = [("NCHW", True), ("NHWC", True)]
-  return test_configs
-
-
-class FusedConv2DBiasActivationTest(test.TestCase):
-
-  def _DtypesToTest(self, use_gpu):
-    return [dtypes.float32]
-
-  def _FilterFormatsToTest(self, use_gpu):
-    return ["HWIO", "OIHW"]
-
-  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias,
-                            strides, padding, activation_mode, data_format,
-                            filter_format, dtype):
-    """Verifies the output values of the convolution function.
-
-    Args:
-      tensor_in_sizes: Input tensor dimensions in
-        [batch, input_rows, input_cols, input_depth].
-      filter_in_sizes: Filter tensor dimensions in
-        [kernel_rows, kernel_cols, input_depth, output_depth].
-      bias: 1-D bias tensor of length output_depth.
-      strides: Stride: [col_stride, row_stride]
-      padding: Padding type.
-      activation_mode: Activation mode.
-      data_format: Format of the data tensors.
-      filter_format: Filter format to use for the fused convolution.
-      dtype: Data type for inputs and outputs.
-    Returns:
-      Symbolic tensor value and reference value that can be used to
-      execute the computation and verify the results.
-    """
-    input_size = np.prod(tensor_in_sizes)
-    filter_size = np.prod(filter_in_sizes)
-    bias_size = filter_in_sizes[-1]  # equals to output depth
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    x1 = [f * 1.0 for f in range(1, input_size + 1)]
-    x2 = [f * 1.0 for f in range(1, filter_size + 1)]
-    # This is to guarantee that there is always negative values after
-    # bias add so that we can test whether relu works correctly.
-    x3 = bias
-    with self.test_session(use_gpu=True):
-      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
-      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
-      fused_t2 = t2
-      if filter_format == "OIHW":
-        fused_t2 = HwioToOihw(t2)
-      t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype)
-      strides = [1] + strides + [1]
-      if data_format == "NCHW":
-        t1 = test_util.NHWCToNCHW(t1)
-        strides = test_util.NHWCToNCHW(strides)
-      output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-          t1,
-          fused_t2,
-          t3,
-          strides=strides,
-          padding=padding,
-          data_format=data_format,
-          filter_format=filter_format,
-          activation_mode=activation_mode)
-      ref_conv_output = nn_ops.conv2d(
-          t1, t2, strides=strides, padding=padding, data_format=data_format)
-      ref_bias_output = nn_ops.bias_add(
-          ref_conv_output, t3, data_format=data_format)
-      ref_output = nn_ops.relu(ref_bias_output)
-      if data_format == "NCHW":
-        output = test_util.NCHWToNHWC(output)
-        ref_output = test_util.NCHWToNHWC(ref_output)
-
-      return output, ref_output
-
-  def _CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides,
-                        padding):
-    """Verifies that CPU and GPU produce the same values.
-
-    Args:
-      tensor_in_sizes: Input tensor dimensions in
-        [batch, input_rows, input_cols, input_depth].
-      filter_in_sizes: Filter tensor dimensions in
-        [kernel_rows, kernel_cols, input_depth, output_depth].
-      conv_strides: [row_stride, col_stride] for the convolution;
-      padding: Padding type.
-    """
-    x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
-    x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
-    x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32)
-
-    def _SetupVal(data_format, use_gpu):
-      with self.test_session(use_gpu=use_gpu):
-        t1 = constant_op.constant(x1, shape=tensor_in_sizes)
-        t2 = constant_op.constant(x2, shape=filter_in_sizes)
-        t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]])
-        strides = [1] + conv_strides + [1]
-        if data_format == "NCHW":
-          t1 = test_util.NHWCToNCHW(t1)
-          strides = test_util.NHWCToNCHW(strides)
-        output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-            t1,
-            t2,
-            t3,
-            strides=strides,
-            padding=padding,
-            data_format=data_format,
-            activation_mode="Relu")
-
-        if data_format == "NCHW":
-          output = test_util.NCHWToNHWC(output)
-        return output
-
-    tensors = []
-    for (data_format, use_gpu) in GetTestConfigs():
-      tensors.append(_SetupVal(data_format, use_gpu))
-    with self.cached_session() as sess:
-      values = sess.run(tensors)
-      for i in range(1, len(values)):
-        self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3)
-
-  def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides,
-                    padding):
-    tensors = []
-    ref_tensors = []
-    for (data_format, use_gpu) in GetTestConfigs():
-      for dtype in self._DtypesToTest(use_gpu):
-        for filter_format in self._FilterFormatsToTest(use_gpu):
-          result, expected = self._SetupValuesForDevice(
-              tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu",
-              data_format, filter_format, dtype)
-        tensors.append(result)
-        ref_tensors.append(expected)
-      with self.cached_session() as sess:
-        values = sess.run(tensors)
-        ref_values = sess.run(ref_tensors)
-        for i in range(len(tensors)):
-          conv = tensors[i]
-          value = values[i]
-          ref_value = ref_values[i]
-          tf_logging.info("expected = ", ref_value)
-          tf_logging.info("actual = ", value)
-          tol = 1e-5
-          if value.dtype == np.float16:
-            tol = 1e-3
-          self.assertAllClose(
-              np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol)
-          self.assertShapeEqual(value, conv)
-
-  def testConv2D1x1Filter(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2D1x1Filter test.")
-      return
-    # expected_output = [
-    #    0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0,
-    #    86.0, 43.0, 165.0, 131.0, 97.0
-    # ]
-    medians = [-45.0, -130.0, -215.0]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 2, 3, 3],
-        filter_in_sizes=[1, 1, 3, 3],
-        bias=medians,
-        strides=[1, 1],
-        padding="VALID")
-
-  def testConv2DEmpty(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2DEmpty test.")
-      return
-    # expected_output = []
-    self._VerifyValues(
-        tensor_in_sizes=[0, 2, 3, 3],
-        filter_in_sizes=[1, 1, 3, 3],
-        bias=[0.0, 0.0, 0.0],
-        strides=[1, 1],
-        padding="VALID")
-
-  def testConv2D2x2Filter(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2D2x2Filter test.")
-      return
-    # expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 2, 3, 3],
-        filter_in_sizes=[2, 2, 3, 3],
-        bias=[-2500.0, -2500.0, -2500.0],
-        strides=[1, 1],
-        padding="VALID")
-
-  def testConv2D1x2Filter(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2D1x2Filter test.")
-      return
-    # expected_output = [
-    #    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0
-    # ]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 2, 3, 3],
-        filter_in_sizes=[1, 2, 3, 3],
-        bias=[-500.0, -500.0, -500.0],
-        strides=[1, 1],
-        padding="VALID")
-
-  def testConv2D2x2FilterStride2(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2D2x2FilterStride2 test.")
-      return
-    # expected_output = [0.0, 67.0, 163.0]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 2, 3, 3],
-        filter_in_sizes=[2, 2, 3, 3],
-        bias=[-2300.0, -2300.0, -2300.0],
-        strides=[2, 2],
-        padding="VALID")
-
-  def testConv2D2x2FilterStride2Same(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.")
-      return
-    # expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 2, 3, 3],
-        filter_in_sizes=[2, 2, 3, 3],
-        bias=[-2300.0, -1000.0, -1000.0],
-        strides=[2, 2],
-        padding="SAME")
-
-  def testConv2D2x2FilterStride1x2(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.")
-      return
-    # expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 3, 6, 1],
-        filter_in_sizes=[2, 2, 1, 1],
-        bias=[-90.0],
-        strides=[1, 2],
-        padding="VALID")
-
-  def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.")
-      return
-    # expected_output = [0, 0, 175, 205]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 7, 7, 1],
-        filter_in_sizes=[2, 2, 1, 1],
-        bias=[-100.0],
-        strides=[3, 3],
-        padding="VALID")
-
-  def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.")
-      return
-    # expected = [0, 0, 2, 4]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 3, 3, 1],
-        filter_in_sizes=[1, 1, 1, 1],
-        bias=[-5.0],
-        strides=[2, 2],
-        padding="SAME")
-
-    # expected = [0, 0, 4, 6]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 4, 4, 1],
-        filter_in_sizes=[1, 1, 1, 1],
-        bias=[-5.0],
-        strides=[2, 2],
-        padding="SAME")
-
-    # expected = [4, 0, 1, 0]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 4, 4, 1],
-        filter_in_sizes=[2, 2, 1, 1],
-        bias=[-40.0],
-        strides=[3, 3],
-        padding="SAME")
-
-  def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.")
-      return
-    # expected = [0, 5]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 2, 2, 1],
-        filter_in_sizes=[2, 2, 1, 2],
-        bias=[-50.0, -55.0],
-        strides=[1, 1],
-        padding="VALID")
-
-    # expected = [0, 2, 282, 322]
-    self._VerifyValues(
-        tensor_in_sizes=[1, 8, 8, 1],
-        filter_in_sizes=[2, 2, 1, 1],
-        bias=[-200.0],
-        strides=[4, 4],
-        padding="SAME")
-
-  def testShapeFunctionEdgeCases(self):
-    # All shapes unknown.
-    c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-        array_ops.placeholder(dtypes.float32),
-        array_ops.placeholder(dtypes.float32),
-        array_ops.placeholder(dtypes.float32),
-        strides=[1, 1, 1, 1],
-        padding="SAME",
-        activation_mode="Relu")
-    self.assertEqual([None, None, None, None], c1.get_shape().as_list())
-
-    # Incorrect input shape.
-    with self.assertRaises(ValueError):
-      fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-          array_ops.placeholder(dtypes.float32, shape=[1, 3]),
-          array_ops.placeholder(dtypes.float32),
-          array_ops.placeholder(dtypes.float32),
-          strides=[1, 1, 1, 1],
-          padding="SAME",
-          activation_mode="Relu")
-
-    # Incorrect filter shape.
-    with self.assertRaises(ValueError):
-      fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-          array_ops.placeholder(dtypes.float32),
-          array_ops.placeholder(dtypes.float32, shape=[1, 3]),
-          array_ops.placeholder(dtypes.float32),
-          strides=[1, 1, 1, 1],
-          padding="SAME",
-          activation_mode="Relu")
-
-    # Depth mismatch.
-    with self.assertRaises(ValueError):
-      fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-          array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]),
-          array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]),
-          array_ops.placeholder(dtypes.float32),
-          strides=[1, 1, 1, 1],
-          padding="SAME",
-          activation_mode="Relu")
-
-  def testOpEdgeCases(self, gpu_only=True):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping OpEdgeCases tests.")
-      return
-    with self.cached_session() as sess:
-      # Illegal strides.
-      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-                                   "Convolutional strides are not supported in "
-                                   "the batch or depth dimensions."):
-        sess.run(
-            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-                array_ops.placeholder(dtypes.float32),
-                array_ops.placeholder(dtypes.float32),
-                array_ops.placeholder(dtypes.float32),
-                strides=[2, 1, 1, 1],
-                padding="SAME",
-                activation_mode="Relu"))
-      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-                                   "Convolutional strides are not supported in "
-                                   "the batch or depth dimensions."):
-        sess.run(
-            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-                array_ops.placeholder(dtypes.float32),
-                array_ops.placeholder(dtypes.float32),
-                array_ops.placeholder(dtypes.float32),
-                strides=[1, 1, 1, 2],
-                padding="SAME",
-                activation_mode="Relu"))
-
-      # Illegal activation mode.
-      with self.assertRaisesRegexp(ValueError,
-                                   "Op passed string 'Tanh' not in:"):
-        sess.run(
-            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-                array_ops.placeholder(dtypes.float32),
-                array_ops.placeholder(dtypes.float32),
-                array_ops.placeholder(dtypes.float32),
-                strides=[1, 1, 1, 1],
-                padding="SAME",
-                activation_mode="Tanh"))
-
-      # Filter larger than input.
-      with self.assertRaisesRegexp(ValueError, "Negative dimension size"):
-        sess.run(
-            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-                array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]),
-                array_ops.placeholder(dtypes.float32, shape=[20, 21, 3, 2]),
-                array_ops.placeholder(dtypes.float32, shape=[2]),
-                strides=[1, 1, 1, 1],
-                padding="VALID",
-                activation_mode="Relu"))
-      with self.assertRaisesRegexp(ValueError, "Negative dimension size"):
-        sess.run(
-            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-                array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]),
-                array_ops.placeholder(dtypes.float32, shape=[21, 20, 3, 2]),
-                array_ops.placeholder(dtypes.float32, shape=[2]),
-                strides=[1, 1, 1, 1],
-                padding="VALID",
-                activation_mode="Relu"))
-
-
-def GetInceptionFwdTest(input_size, filter_size, stride, padding,
-                        gpu_only=True):
-
-  def Test(self):
-    if gpu_only and not test.is_gpu_available():
-      tf_logging.info("Skipping InceptionFwd %s", (input_size, filter_size,
-                                                   stride, padding))
-      return
-    tf_logging.info("Testing InceptionFwd %s", (input_size, filter_size, stride,
-                                                padding))
-    self._CompareFwdValues(input_size, filter_size, [stride, stride], padding)
-
-  return Test
-
-
-def CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type):
-  """Calculates the size of an output dimension of a strided convolution.
-
-  Given the sizes of the corresponding dimension of the input and filter shapes,
-  and the stride and padding_types, calculates the size of the output dimension.
-  This function can be called separately for each input dimension.
-
-  Args:
-    input_dim: An `int` specifying the size of the input dimension.
-    filter_dim: An `int` specifying the size of the filter dimension.
-    stride: An `int` specifying the step size of the convolution along the
-      input dimension.
-    padding_type: either 'VALID' or 'SAME'.
-
-  Returns:
-    The size of the output dimension.
-  """
-  if padding_type == "VALID":
-    return (input_dim - filter_dim + stride) // stride
-  else:  # padding_type == 'SAME'
-    return (input_dim + stride - 1) // stride
-
-
-def NchwVectCToNchw(in_tensor):
-  # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W]
-  t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3])
-  n = in_tensor.shape.dims[0].value
-  c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
-  h = in_tensor.shape.dims[2].value
-  w = in_tensor.shape.dims[3].value
-  return array_ops.reshape(t, [n, c, h, w])
-
-
-def OihwVectIToHwio(in_tensor):
-  # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W]
-  t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0])
-  o = in_tensor.shape.dims[0].value
-  i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
-  h = in_tensor.shape.dims[2].value
-  w = in_tensor.shape.dims[3].value
-  return array_ops.reshape(t, [h, w, i, o])
-
-
-def NchwToNchwVectC(in_tensor):
-  n, c, h, w = in_tensor.shape.as_list()
-  assert c % 4 == 0
-  t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w])
-  return array_ops.transpose(t, [0, 1, 3, 4, 2])
-
-
-def HwioToOihw(in_tensor):
-  return array_ops.transpose(in_tensor, [3, 2, 0, 1])
-
-
-def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel,
-                                          padding, strides, side_input_scale,
-                                          side_input, biases, apply_relu):
-  """Simulates the int8 fused 2-D convolution op using separate float ops.
-
-    The arguments and return values have the same format, meanings and
-    restrictions as the actual op.
-  Args:
-    conv_input_scale: A scalar 'float'.
-    conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
-    kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout.
-    padding: A `string` from: `"SAME", "VALID"`.
-    strides: A list of `ints`.
-    side_input_scale: A scalar 'float'.
-    side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
-    biases: A `Tensor` of type `float32` in NCHW layout.
-    apply_relu: A boolean to specify whether to apply "Relu" activation function
-      that clips outputs to the range [0, 127], or "None" activation that clips
-      to the range [-128, 127].
-  Returns:
-    A `Tensor` of type `qint8` in NCHW_VECT_C layout.
-  """
-  conv_result = nn_ops.conv2d(
-      NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)),
-      OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)),
-      strides=strides,
-      padding=padding,
-      data_format="NCHW") * conv_input_scale
-
-  conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw(
-      gen_array_ops.dequantize(side_input, -128, 127))
-
-  output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW")
-  if apply_relu:
-    output = nn_ops.relu(output)
-
-  result, _, _ = gen_array_ops.quantize_v2(
-      NchwToNchwVectC(output), -128, 127, dtypes.qint8)
-  return result
-
-
-class FusedConvInt8Tests(test.TestCase):
-  _test_params = [
-      {
-          "batch_size": 1,
-          "input_channels": 4,
-          "output_channels": 4,
-          "input_height": 8,
-          "input_width": 8,
-          "filter_height": 6,
-          "filter_width": 6,
-          "vertical_stride": 2,
-          "horizontal_stride": 2,
-          "conv_input_scale": 0.002,
-          "side_input_scale": 0.0,
-          "bias_scale": 1,
-          "padding_type": "SAME"
-      },
-      {
-          "batch_size": 1,
-          "input_channels": 4,
-          "output_channels": 4,
-          "input_height": 6,
-          "input_width": 6,
-          "filter_height": 6,
-          "filter_width": 6,
-          "vertical_stride": 2,
-          "horizontal_stride": 2,
-          "conv_input_scale": 0.002,
-          "side_input_scale": 0.0,
-          "bias_scale": 1,
-          "padding_type": "SAME"
-      },
-      {
-          "batch_size": 2,
-          "input_channels": 8,
-          "output_channels": 16,
-          "input_height": 8,
-          "input_width": 8,
-          "filter_height": 3,
-          "filter_width": 3,
-          "vertical_stride": 2,
-          "horizontal_stride": 2,
-          "conv_input_scale": 0.002,
-          "side_input_scale": 0.0,
-          "bias_scale": 1,
-          "padding_type": "VALID"
-      },
-      {
-          "batch_size": 2,
-          "input_channels": 8,
-          "output_channels": 16,
-          "input_height": 8,
-          "input_width": 8,
-          "filter_height": 3,
-          "filter_width": 3,
-          "vertical_stride": 2,
-          "horizontal_stride": 2,
-          "conv_input_scale": 0.002,
-          "side_input_scale": 0.0,
-          "bias_scale": 1,
-          "padding_type": "SAME"
-      },
-      {
-          "batch_size": 2,
-          "input_channels": 8,
-          "output_channels": 16,
-          "input_height": 8,
-          "input_width": 8,
-          "filter_height": 3,
-          "filter_width": 3,
-          "vertical_stride": 2,
-          "horizontal_stride": 2,
-          "conv_input_scale": 0.002,
-          "side_input_scale": 0.5,
-          "bias_scale": 1,
-          "padding_type": "VALID"
-      },
-      {
-          "batch_size": 2,
-          "input_channels": 16,
-          "output_channels": 16,
-          "input_height": 9,
-          "input_width": 9,
-          "filter_height": 3,
-          "filter_width": 3,
-          "vertical_stride": 1,
-          "horizontal_stride": 1,
-          "conv_input_scale": 0.001,
-          "side_input_scale": 0.5,
-          "bias_scale": 1,
-          "padding_type": "SAME"
-      },
-      {
-          "batch_size": 3,
-          "input_channels": 8,
-          "output_channels": 8,
-          "input_height": 9,
-          "input_width": 9,
-          "filter_height": 5,
-          "filter_width": 5,
-          "vertical_stride": 1,
-          "horizontal_stride": 1,
-          "conv_input_scale": 0.001,
-          "side_input_scale": 0.5,
-          "bias_scale": 1,
-          "padding_type": "SAME"
-      },
-      {
-          "batch_size": 3,
-          "input_channels": 8,
-          "output_channels": 8,
-          "input_height": 9,
-          "input_width": 9,
-          "filter_height": 7,
-          "filter_width": 1,
-          "vertical_stride": 2,
-          "horizontal_stride": 1,
-          "conv_input_scale": 0.002,
-          "side_input_scale": 0.5,
-          "bias_scale": 1,
-          "padding_type": "SAME"
-      },
-      {
-          "batch_size": 3,
-          "input_channels": 8,
-          "output_channels": 8,
-          "input_height": 9,
-          "input_width": 9,
-          "filter_height": 1,
-          "filter_width": 7,
-          "vertical_stride": 1,
-          "horizontal_stride": 1,
-          "conv_input_scale": 0.002,
-          "side_input_scale": 0.5,
-          "bias_scale": 1,
-          "padding_type": "SAME"
-      },
-  ]
-
-  def runTest(self, test_param, apply_relu):
-    batch_size = test_param["batch_size"]
-    input_channels = test_param["input_channels"]
-    output_channels = test_param["output_channels"]
-    input_height = test_param["input_height"]
-    input_width = test_param["input_width"]
-    filter_height = test_param["filter_height"]
-    filter_width = test_param["filter_width"]
-    vertical_stride = test_param["vertical_stride"]
-    horizontal_stride = test_param["horizontal_stride"]
-    conv_input_scale = test_param["conv_input_scale"]
-    side_input_scale = test_param["side_input_scale"]
-    bias_scale = test_param["bias_scale"]
-    padding_type = test_param["padding_type"]
-
-    conv_input, _, _ = gen_array_ops.quantize_v2(
-        random_ops.random_uniform(
-            [batch_size, input_channels // 4, input_height, input_width, 4],
-            minval=-0.0,
-            maxval=1.0,
-            dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
-
-    kernel, _, _ = gen_array_ops.quantize_v2(
-        random_ops.random_uniform(
-            [
-                output_channels, input_channels // 4, filter_height,
-                filter_width, 4
-            ],
-            minval=-1.0,
-            maxval=1.0,
-            dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
-
-    output_height = CalculateConvolvedOutputDim(input_height, filter_height,
-                                                vertical_stride, padding_type)
-    output_width = CalculateConvolvedOutputDim(input_width, filter_width,
-                                               horizontal_stride, padding_type)
-    tf_logging.info("output_height=", output_height, ", output_width=",
-                    output_width)
-
-    side_input, _, _ = gen_array_ops.quantize_v2(
-        random_ops.random_uniform(
-            [batch_size, output_channels // 4, output_height, output_width, 4],
-            minval=0.0,
-            maxval=1.0,
-            dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
-
-    biases = random_ops.random_uniform(
-        [output_channels],
-        minval=-10 * bias_scale,
-        maxval=20 * bias_scale,
-        dtype=dtypes.float32)
-
-    strides = [1, 1, vertical_stride, horizontal_stride]
-
-    actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-        conv_input,
-        kernel,
-        biases,
-        strides=strides,
-        padding=padding_type,
-        conv_input_scale=conv_input_scale,
-        side_input_scale=side_input_scale,
-        side_input=side_input,
-        activation_mode="Relu" if apply_relu else "None",
-        data_format="NCHW_VECT_C",
-        filter_format="OIHW_VECT_I")
 
-    expected = SimulateFusedConv2dBiasActivationInt8(
-        conv_input_scale, conv_input, kernel, padding_type, strides,
-        side_input_scale, side_input, biases, apply_relu)
 
-    with self.test_session(use_gpu=True) as sess:
-      actual_y, expected_y = sess.run([actual, expected])
-      self.assertAllClose(actual_y, expected_y, rtol=0, atol=1)
+# Instantiate the two test suites from test_base, mixing in test.TestCase as
+# the test framework.
+class FusedConv2DBiasActivationTest(test_base.FusedConv2DBiasActivationTest,
+                                    test.TestCase):
+  pass
 
-  def testFusedConvInt8(self):
-    if not test.is_gpu_available(
-        cuda_only=True, min_cuda_compute_capability=(6, 1)):
-      tf_logging.info("int8 test skipped because not run with --config=cuda or "
-                      "no GPUs with compute capability >= 6.1 are available.")
-      return
-    for apply_relu in [True, False]:
-      for test_param in self._test_params:
-        self.runTest(test_param, apply_relu)
 
+class FusedConvInt8Tests(test_base.FusedConvInt8Tests, test.TestCase):
+  pass
 
-if __name__ == "__main__":
-  for index, (input_size_, filter_size_, output_size_, stride_,
-              padding_) in enumerate(GetShrunkInceptionShapes()):
-    setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index),
-            GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_))
 
-  # TODO(b/35359731)
-  # Fwd, BckInput, and BackFilter to test that for certain input parameter
-  # set, winograd nonfused algorithm will be excluded from conv autotune. If
-  # in such case, winograd nonfused algorithm is added as one option of the
-  # conv autotune, and cuDNN version is smaller than 7, the following tests
-  # will fail.
-  ishape = [1, 400, 400, 1]
-  fshape = [1, 1, 1, 256]
-  oshape = [1, 400, 400, 256]
-  setattr(FusedConv2DBiasActivationTest,
-          "testInceptionFwd_No_Winograd_Nonfused",
-          GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True))
+if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py
new file mode 100644
index 0000000000..35fc65e4ba
--- /dev/null
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py
@@ -0,0 +1,945 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Provides test suites that can be run to test fused convolutions.
+
+Each of the two test suites in this module, FusedConv2DBiasActivationTest and
+FusedConvInt8Tests, should be "instantiated" by declaring a class which inherits
+from the FusedConv test and a class that provides the standard test.TestCase
+API.
+
+See e.g. fused_conv2d_bias_activation_op_test.py in this folder.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import numpy as np
+
+from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+
+def _GetShrunkInceptionShapes(shrink=10):
+  """Iterator for smaller versions of convolution shapes in 2015 Inception.
+
+  Relative to inception, each depth value is `depth // shrink`.
+
+  Args:
+    shrink: Factor to shrink each depth value by relative to Inception.
+
+  Yields:
+    Tuple (input_size, filter_size, out_size, stride, padding), the convolution
+    parameters of Inception layers.
+  """
+  input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [
+      4, 8, 8, 2048
+  ], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [
+      4, 8, 8, 1760
+  ], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [
+      4, 17, 17, 192
+  ], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [
+      4, 17, 17, 192
+  ], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [
+      4, 17, 17, 192
+  ], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [
+      4, 17, 17, 160
+  ], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024],
+                 [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [
+                     4, 17, 17, 768
+                 ], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768],
+                 [4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [
+                     4, 35, 35, 64
+                 ], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [
+                     4, 35, 35, 256
+                 ], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [
+                     4, 35, 35, 192
+                 ], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]]
+  filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [
+      1, 1, 2048, 192
+  ], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384],
+                  [1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [
+                      1, 1, 1760, 320
+                  ], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [
+                      3, 3, 128, 320
+                  ], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [
+                      1, 3, 192, 256
+                  ], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [
+                      3, 3, 192, 224
+                  ], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [
+                      3, 1, 192, 192
+                  ], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [
+                      1, 3, 128, 192
+                  ], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [
+                      3, 1, 128, 128
+                  ], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [
+                      1, 1, 768, 128
+                  ], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [
+                      3, 3, 64, 96
+                  ], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64],
+                  [1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [
+                      1, 1, 192, 64
+                  ], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64,
+                                                        64], [1, 1, 24, 64]]
+  out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [
+      4, 8, 8, 384
+  ], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [
+      4, 8, 8, 192
+  ], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [
+      4, 17, 17, 192
+  ], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [
+      4, 17, 17, 256
+  ], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [
+      4, 17, 17, 192
+  ], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [
+      4, 17, 17, 160
+  ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [
+      4, 17, 17, 256
+  ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [
+      4, 17, 17, 128
+  ], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [
+      4, 35, 35, 64
+  ], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96],
+               [4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48],
+               [4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]]
+  strides = [
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1
+  ]
+  # Shrink sizes to make the test faster
+  for i in input_sizes:
+    i[3] //= shrink
+  for f in filter_sizes:
+    f[2] //= shrink
+    f[3] //= shrink
+  for o in out_sizes:
+    o[3] //= shrink
+  # pylint: disable=invalid-name
+  VALID = "VALID"
+  SAME = "SAME"
+  # pylint: enable=invalid-name
+  paddings = [
+      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
+      VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
+      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
+      SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME,
+      SAME, SAME, SAME, SAME, VALID, VALID, VALID
+  ]
+  for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides,
+                           paddings):
+    yield i, f, o, s, p
+
+
+def _GetTestConfigs():
+  """Get all the valid tests configs to run.
+
+  Returns:
+    all the valid test configs as tuples of data_format and use_gpu.
+  """
+  test_configs = [("NCHW", True), ("NHWC", True)]
+  return test_configs
+
+
+def _IotaNdF32Constant(dim_sizes):
+
+  def MakeList(dims):
+    if len(dims) == 1:
+      return [float(1 + f) for f in range(dims[0])]
+    return [MakeList(dims[1:]) for _ in range(dims[0])]
+
+  return constant_op.constant(MakeList(dim_sizes), dtype=dtypes.float32)
+
+
+def _GetInceptionFwdTest(input_size,
+                         filter_size,
+                         stride,
+                         padding,
+                         gpu_only=True):
+
+  def Test(self):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping InceptionFwd %s",
+                      (input_size, filter_size, stride, padding))
+      return
+    tf_logging.info("Testing InceptionFwd %s",
+                    (input_size, filter_size, stride, padding))
+    self.CompareFwdValues(input_size, filter_size, [stride, stride], padding)
+
+  return Test
+
+
+class FusedConv2DBiasActivationTest(object):
+
+  @contextlib.contextmanager
+  def test_scope(self):  # pylint: disable=invalid-name
+    """Can be overridden in base classes to provide a test scope."""
+    yield
+
+  def _DtypesToTest(self, use_gpu):
+    return [dtypes.float32]
+
+  def _FilterFormatsToTest(self, use_gpu):
+    return ["HWIO", "OIHW"]
+
+  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias,
+                            strides, padding, activation_mode, data_format,
+                            filter_format, dtype):
+    """Verifies the output values of the convolution function.
+
+    Args:
+      tensor_in_sizes: Input tensor dimensions in
+        [batch, input_rows, input_cols, input_depth].
+      filter_in_sizes: Filter tensor dimensions in
+        [kernel_rows, kernel_cols, input_depth, output_depth].
+      bias: 1-D bias tensor of length output_depth.
+      strides: Stride: [col_stride, row_stride]
+      padding: Padding type.
+      activation_mode: Activation mode.
+      data_format: Format of the data tensors.
+      filter_format: Filter format to use for the fused convolution.
+      dtype: Data type for inputs and outputs.
+    Returns:
+      Symbolic tensor value and reference value that can be used to
+      execute the computation and verify the results.
+    """
+    input_size = np.prod(tensor_in_sizes)
+    filter_size = np.prod(filter_in_sizes)
+    bias_size = filter_in_sizes[-1]  # equals to output depth
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, input_size + 1)]
+    x2 = [f * 1.0 for f in range(1, filter_size + 1)]
+    # This is to guarantee that there are always negative values after
+    # bias add so that we can test whether relu works correctly.
+    x3 = bias
+    with self.cached_session(use_gpu=True), self.test_scope():
+      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
+      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
+      fused_t2 = t2
+      if filter_format == "OIHW":
+        fused_t2 = _HwioToOihw(t2)
+      t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype)
+      strides = [1] + strides + [1]
+      if data_format == "NCHW":
+        t1 = test_util.NHWCToNCHW(t1)
+        strides = test_util.NHWCToNCHW(strides)
+      output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+          t1,
+          fused_t2,
+          t3,
+          strides=strides,
+          padding=padding,
+          data_format=data_format,
+          filter_format=filter_format,
+          activation_mode=activation_mode)
+      ref_conv_output = nn_ops.conv2d(
+          t1, t2, strides=strides, padding=padding, data_format=data_format)
+      ref_bias_output = nn_ops.bias_add(
+          ref_conv_output, t3, data_format=data_format)
+      ref_output = nn_ops.relu(ref_bias_output)
+      if data_format == "NCHW":
+        output = test_util.NCHWToNHWC(output)
+        ref_output = test_util.NCHWToNHWC(ref_output)
+
+      return output, ref_output
+
+  def CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides,
+                       padding):
+    """Verifies that CPU and GPU produce the same values.
+
+    Args:
+      tensor_in_sizes: Input tensor dimensions in
+        [batch, input_rows, input_cols, input_depth].
+      filter_in_sizes: Filter tensor dimensions in
+        [kernel_rows, kernel_cols, input_depth, output_depth].
+      conv_strides: [row_stride, col_stride] for the convolution;
+      padding: Padding type.
+    """
+    x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
+    x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
+    x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32)
+
+    def _SetupVal(data_format, use_gpu):
+      with self.cached_session(use_gpu=use_gpu), self.test_scope():
+        t1 = constant_op.constant(x1, shape=tensor_in_sizes)
+        t2 = constant_op.constant(x2, shape=filter_in_sizes)
+        t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]])
+        strides = [1] + conv_strides + [1]
+        if data_format == "NCHW":
+          t1 = test_util.NHWCToNCHW(t1)
+          strides = test_util.NHWCToNCHW(strides)
+        output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+            t1,
+            t2,
+            t3,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            activation_mode="Relu")
+
+        if data_format == "NCHW":
+          output = test_util.NCHWToNHWC(output)
+        return output
+
+    tensors = []
+    for (data_format, use_gpu) in _GetTestConfigs():
+      tensors.append(_SetupVal(data_format, use_gpu))
+    with self.cached_session() as sess, self.test_scope():
+      values = sess.run(tensors)
+      for i in range(1, len(values)):
+        self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3)
+
+  def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides,
+                    padding):
+    tensors = []
+    ref_tensors = []
+    for (data_format, use_gpu) in _GetTestConfigs():
+      for dtype in self._DtypesToTest(use_gpu):
+        for filter_format in self._FilterFormatsToTest(use_gpu):
+          result, expected = self._SetupValuesForDevice(
+              tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu",
+              data_format, filter_format, dtype)
+        tensors.append(result)
+        ref_tensors.append(expected)
+      with self.cached_session() as sess, self.test_scope():
+        values = sess.run(tensors)
+        ref_values = sess.run(ref_tensors)
+        for i in range(len(tensors)):
+          conv = tensors[i]
+          value = values[i]
+          ref_value = ref_values[i]
+          tf_logging.info("expected = %s", ref_value)
+          tf_logging.info("actual = %s", value)
+          tol = 1e-5
+          if value.dtype == np.float16:
+            tol = 1e-3
+          self.assertAllClose(
+              np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol)
+          self.assertShapeEqual(value, conv)
+
+  def testConv2D1x1Filter(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2D1x1Filter test.")
+      return
+    # expected_output = [
+    #    0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0,
+    #    86.0, 43.0, 165.0, 131.0, 97.0
+    # ]
+    medians = [-45.0, -130.0, -215.0]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 2, 3, 3],
+        filter_in_sizes=[1, 1, 3, 3],
+        bias=medians,
+        strides=[1, 1],
+        padding="VALID")
+
+  def testConv2DEmpty(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2DEmpty test.")
+      return
+    # expected_output = []
+    self._VerifyValues(
+        tensor_in_sizes=[0, 2, 3, 3],
+        filter_in_sizes=[1, 1, 3, 3],
+        bias=[0.0, 0.0, 0.0],
+        strides=[1, 1],
+        padding="VALID")
+
+  def testConv2D2x2Filter(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2D2x2Filter test.")
+      return
+    # expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 2, 3, 3],
+        filter_in_sizes=[2, 2, 3, 3],
+        bias=[-2500.0, -2500.0, -2500.0],
+        strides=[1, 1],
+        padding="VALID")
+
+  def testConv2D1x2Filter(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2D1x2Filter test.")
+      return
+    # expected_output = [
+    #    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0
+    # ]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 2, 3, 3],
+        filter_in_sizes=[1, 2, 3, 3],
+        bias=[-500.0, -500.0, -500.0],
+        strides=[1, 1],
+        padding="VALID")
+
+  def testConv2D2x2FilterStride2(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2D2x2FilterStride2 test.")
+      return
+    # expected_output = [0.0, 67.0, 163.0]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 2, 3, 3],
+        filter_in_sizes=[2, 2, 3, 3],
+        bias=[-2300.0, -2300.0, -2300.0],
+        strides=[2, 2],
+        padding="VALID")
+
+  def testConv2D2x2FilterStride2Same(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.")
+      return
+    # expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 2, 3, 3],
+        filter_in_sizes=[2, 2, 3, 3],
+        bias=[-2300.0, -1000.0, -1000.0],
+        strides=[2, 2],
+        padding="SAME")
+
+  def testConv2D2x2FilterStride1x2(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.")
+      return
+    # expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 3, 6, 1],
+        filter_in_sizes=[2, 2, 1, 1],
+        bias=[-90.0],
+        strides=[1, 2],
+        padding="VALID")
+
+  def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.")
+      return
+    # expected_output = [0, 0, 175, 205]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 7, 7, 1],
+        filter_in_sizes=[2, 2, 1, 1],
+        bias=[-100.0],
+        strides=[3, 3],
+        padding="VALID")
+
+  def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.")
+      return
+    # expected = [0, 0, 2, 4]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 3, 3, 1],
+        filter_in_sizes=[1, 1, 1, 1],
+        bias=[-5.0],
+        strides=[2, 2],
+        padding="SAME")
+
+    # expected = [0, 0, 4, 6]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 4, 4, 1],
+        filter_in_sizes=[1, 1, 1, 1],
+        bias=[-5.0],
+        strides=[2, 2],
+        padding="SAME")
+
+    # expected = [4, 0, 1, 0]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 4, 4, 1],
+        filter_in_sizes=[2, 2, 1, 1],
+        bias=[-40.0],
+        strides=[3, 3],
+        padding="SAME")
+
+  def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.")
+      return
+    # expected = [0, 5]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 2, 2, 1],
+        filter_in_sizes=[2, 2, 1, 2],
+        bias=[-50.0, -55.0],
+        strides=[1, 1],
+        padding="VALID")
+
+    # expected = [0, 2, 282, 322]
+    self._VerifyValues(
+        tensor_in_sizes=[1, 8, 8, 1],
+        filter_in_sizes=[2, 2, 1, 1],
+        bias=[-200.0],
+        strides=[4, 4],
+        padding="SAME")
+
+  def testShapeFunctionEdgeCases(self):
+    # All shapes unknown.
+    c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+        array_ops.placeholder(dtypes.float32),
+        array_ops.placeholder(dtypes.float32),
+        array_ops.placeholder(dtypes.float32),
+        strides=[1, 1, 1, 1],
+        padding="SAME",
+        activation_mode="Relu")
+    self.assertEqual([None, None, None, None], c1.get_shape().as_list())
+
+    # Incorrect input shape.
+    with self.assertRaises(ValueError):
+      fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+          array_ops.placeholder(dtypes.float32, shape=[1, 3]),
+          array_ops.placeholder(dtypes.float32),
+          array_ops.placeholder(dtypes.float32),
+          strides=[1, 1, 1, 1],
+          padding="SAME",
+          activation_mode="Relu")
+
+    # Incorrect filter shape.
+    with self.assertRaises(ValueError):
+      fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+          array_ops.placeholder(dtypes.float32),
+          array_ops.placeholder(dtypes.float32, shape=[1, 3]),
+          array_ops.placeholder(dtypes.float32),
+          strides=[1, 1, 1, 1],
+          padding="SAME",
+          activation_mode="Relu")
+
+    # Depth mismatch.
+    with self.assertRaises(ValueError):
+      fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+          array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]),
+          array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]),
+          array_ops.placeholder(dtypes.float32),
+          strides=[1, 1, 1, 1],
+          padding="SAME",
+          activation_mode="Relu")
+
+  def testOpEdgeCases(self, gpu_only=True):
+    if gpu_only and not test.is_gpu_available():
+      tf_logging.info("Skipping OpEdgeCases tests.")
+      return
+    with self.cached_session() as sess, self.test_scope():
+      # Illegal strides.
+      with self.assertRaisesRegexp(
+          errors_impl.UnimplementedError,
+          ".*strides.*in the batch and depth dimensions"):
+        sess.run(
+            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+                _IotaNdF32Constant([1, 1, 1, 1]),
+                _IotaNdF32Constant([1, 1, 1, 1]),
+                _IotaNdF32Constant([1]),
+                strides=[2, 1, 1, 1],
+                padding="SAME",
+                activation_mode="Relu"))
+      with self.assertRaisesRegexp(
+          errors_impl.UnimplementedError,
+          ".*strides.*in the batch and depth dimensions"):
+        sess.run(
+            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+                _IotaNdF32Constant([1, 1, 1, 1]),
+                _IotaNdF32Constant([1, 1, 1, 1]),
+                _IotaNdF32Constant([1]),
+                strides=[1, 1, 1, 2],
+                padding="SAME",
+                activation_mode="Relu"))
+
+      # Illegal activation mode.
+      with self.assertRaisesRegexp(ValueError,
+                                   "Op passed string 'Tanh' not in:"):
+        sess.run(
+            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+                _IotaNdF32Constant([1, 1, 1, 1]),
+                _IotaNdF32Constant([1, 1, 1, 1]),
+                _IotaNdF32Constant([1]),
+                strides=[1, 1, 1, 1],
+                padding="SAME",
+                activation_mode="Tanh"))
+
+      # Filter larger than input.
+      with self.assertRaisesRegexp(ValueError, "Negative dimension size"):
+        sess.run(
+            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+                _IotaNdF32Constant([32, 20, 20, 3]),
+                _IotaNdF32Constant([20, 21, 3, 2]),
+                _IotaNdF32Constant([2]),
+                strides=[1, 1, 1, 1],
+                padding="VALID",
+                activation_mode="Relu"))
+      with self.assertRaisesRegexp(ValueError, "Negative dimension size"):
+        sess.run(
+            fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+                _IotaNdF32Constant([32, 20, 20, 3]),
+                _IotaNdF32Constant([21, 20, 3, 2]),
+                _IotaNdF32Constant([2]),
+                strides=[1, 1, 1, 1],
+                padding="VALID",
+                activation_mode="Relu"))
+
+
+# Add InceptionFwd tests to FusedConv2DBiasActivationTest.
+for index, (input_size_, filter_size_, output_size_, stride_,
+            padding_) in enumerate(_GetShrunkInceptionShapes()):
+  setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index),
+          _GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_))
+
+# TODO(b/35359731)
+# Fwd, BckInput, and BackFilter to test that for certain input parameter
+# set, winograd nonfused algorithm will be excluded from conv autotune. If
+# in such case, winograd nonfused algorithm is added as one option of the
+# conv autotune, and cuDNN version is smaller than 7, the following tests
+# will fail.
+ishape = [1, 400, 400, 1]
+fshape = [1, 1, 1, 256]
+oshape = [1, 400, 400, 256]
+setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_No_Winograd_Nonfused",
+        _GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True))
+
+
+def _CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type):
+  """Calculates the size of an output dimension of a strided convolution.
+
+  Given the sizes of the corresponding dimension of the input and filter shapes,
+  and the stride and padding_types, calculates the size of the output dimension.
+  This function can be called separately for each input dimension.
+
+  Args:
+    input_dim: An `int` specifying the size of the input dimension.
+    filter_dim: An `int` specifying the size of the filter dimension.
+    stride: An `int` specifying the step size of the convolution along the
+      input dimension.
+    padding_type: either 'VALID' or 'SAME'.
+
+  Returns:
+    The size of the output dimension.
+  """
+  if padding_type == "VALID":
+    return (input_dim - filter_dim + stride) // stride
+  else:  # padding_type == 'SAME'
+    return (input_dim + stride - 1) // stride
+
+
+def _NchwVectCToNchw(in_tensor):
+  # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W]
+  t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3])
+  n = in_tensor.shape.dims[0].value
+  c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
+  h = in_tensor.shape.dims[2].value
+  w = in_tensor.shape.dims[3].value
+  return array_ops.reshape(t, [n, c, h, w])
+
+
+def _OihwVectIToHwio(in_tensor):
+  # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W]
+  t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0])
+  o = in_tensor.shape.dims[0].value
+  i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
+  h = in_tensor.shape.dims[2].value
+  w = in_tensor.shape.dims[3].value
+  return array_ops.reshape(t, [h, w, i, o])
+
+
+def _NchwToNchwVectC(in_tensor):
+  n, c, h, w = in_tensor.shape.as_list()
+  assert c % 4 == 0
+  t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w])
+  return array_ops.transpose(t, [0, 1, 3, 4, 2])
+
+
+def _HwioToOihw(in_tensor):
+  return array_ops.transpose(in_tensor, [3, 2, 0, 1])
+
+
+def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel,
+                                           padding, strides, side_input_scale,
+                                           side_input, biases, apply_relu):
+  """Simulates the int8 fused 2-D convolution op using separate float ops.
+
+    The arguments and return values have the same format, meanings and
+    restrictions as the actual op.
+  Args:
+    conv_input_scale: A scalar 'float'.
+    conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
+    kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout.
+    padding: A `string` from: `"SAME", "VALID"`.
+    strides: A list of `ints`.
+    side_input_scale: A scalar 'float'.
+    side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
+    biases: A `Tensor` of type `float32` in NCHW layout.
+    apply_relu: A boolean to specify whether to apply "Relu" activation function
+      that clips outputs to the range [0, 127], or "None" activation that clips
+      to the range [-128, 127].
+  Returns:
+    A `Tensor` of type `qint8` in NCHW_VECT_C layout.
+  """
+  conv_result = nn_ops.conv2d(
+      _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)),
+      _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)),
+      strides=strides,
+      padding=padding,
+      data_format="NCHW") * conv_input_scale
+
+  conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw(
+      gen_array_ops.dequantize(side_input, -128, 127))
+
+  output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW")
+  if apply_relu:
+    output = nn_ops.relu(output)
+
+  result, _, _ = gen_array_ops.quantize_v2(
+      _NchwToNchwVectC(output), -128, 127, dtypes.qint8)
+  return result
+
+
+# TODO(b/114580749): XLA:CPU/GPU don't support int8 at the moment, so this test
+# doesn't currently use XLA.
+class FusedConvInt8Tests(object):
+  _test_params = [
+      {
+          "batch_size": 1,
+          "input_channels": 4,
+          "output_channels": 4,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 6,
+          "filter_width": 6,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 1,
+          "input_channels": 4,
+          "output_channels": 4,
+          "input_height": 6,
+          "input_width": 6,
+          "filter_height": 6,
+          "filter_width": 6,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 2,
+          "input_channels": 8,
+          "output_channels": 16,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "VALID"
+      },
+      {
+          "batch_size": 2,
+          "input_channels": 8,
+          "output_channels": 16,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 2,
+          "input_channels": 8,
+          "output_channels": 16,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "VALID"
+      },
+      {
+          "batch_size": 2,
+          "input_channels": 16,
+          "output_channels": 16,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 1,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.001,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 3,
+          "input_channels": 8,
+          "output_channels": 8,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 5,
+          "filter_width": 5,
+          "vertical_stride": 1,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.001,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 3,
+          "input_channels": 8,
+          "output_channels": 8,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 7,
+          "filter_width": 1,
+          "vertical_stride": 2,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 3,
+          "input_channels": 8,
+          "output_channels": 8,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 1,
+          "filter_width": 7,
+          "vertical_stride": 1,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+  ]
+
+  @contextlib.contextmanager
+  def test_scope(self):  # pylint: disable=invalid-name
+    """Can be overridden in base classes to provide a test scope."""
+    yield
+
+  def runTest(self, test_param, apply_relu):
+    batch_size = test_param["batch_size"]
+    input_channels = test_param["input_channels"]
+    output_channels = test_param["output_channels"]
+    input_height = test_param["input_height"]
+    input_width = test_param["input_width"]
+    filter_height = test_param["filter_height"]
+    filter_width = test_param["filter_width"]
+    vertical_stride = test_param["vertical_stride"]
+    horizontal_stride = test_param["horizontal_stride"]
+    conv_input_scale = test_param["conv_input_scale"]
+    side_input_scale = test_param["side_input_scale"]
+    bias_scale = test_param["bias_scale"]
+    padding_type = test_param["padding_type"]
+
+    with self.cached_session(use_gpu=True) as sess, self.test_scope():
+      conv_input, _, _ = gen_array_ops.quantize_v2(
+          random_ops.random_uniform(
+              [batch_size, input_channels // 4, input_height, input_width, 4],
+              minval=-0.0,
+              maxval=1.0,
+              dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
+
+      kernel, _, _ = gen_array_ops.quantize_v2(
+          random_ops.random_uniform([
+              output_channels, input_channels // 4, filter_height, filter_width,
+              4
+          ],
+                                    minval=-1.0,
+                                    maxval=1.0,
+                                    dtype=dtypes.float32), -1.0, 1.0,
+          dtypes.qint8)
+
+      output_height = _CalculateConvolvedOutputDim(
+          input_height, filter_height, vertical_stride, padding_type)
+      output_width = _CalculateConvolvedOutputDim(
+          input_width, filter_width, horizontal_stride, padding_type)
+      tf_logging.info("output_height=%s, output_width=%s", output_height,
+                      output_width)
+
+      side_input, _, _ = gen_array_ops.quantize_v2(
+          random_ops.random_uniform([
+              batch_size, output_channels // 4, output_height, output_width, 4
+          ],
+                                    minval=0.0,
+                                    maxval=1.0,
+                                    dtype=dtypes.float32), -1.0, 1.0,
+          dtypes.qint8)
+
+      biases = random_ops.random_uniform([output_channels],
+                                         minval=-10 * bias_scale,
+                                         maxval=20 * bias_scale,
+                                         dtype=dtypes.float32)
+
+      strides = [1, 1, vertical_stride, horizontal_stride]
+
+      actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+          conv_input,
+          kernel,
+          biases,
+          strides=strides,
+          padding=padding_type,
+          conv_input_scale=conv_input_scale,
+          side_input_scale=side_input_scale,
+          side_input=side_input,
+          activation_mode="Relu" if apply_relu else "None",
+          data_format="NCHW_VECT_C",
+          filter_format="OIHW_VECT_I")
+
+      expected = _SimulateFusedConv2dBiasActivationInt8(
+          conv_input_scale, conv_input, kernel, padding_type, strides,
+          side_input_scale, side_input, biases, apply_relu)
+
+      actual_y, expected_y = sess.run([actual, expected])
+      self.assertAllClose(actual_y, expected_y, rtol=0, atol=1)
+
+  def testFusedConvInt8(self):
+    if not test.is_gpu_available(
+        cuda_only=True, min_cuda_compute_capability=(6, 1)):
+      tf_logging.info("int8 test skipped because not run with --config=cuda or "
+                      "no GPUs with compute capability >= 6.1 are available.")
+      return
+    for apply_relu in [True, False]:
+      for test_param in self._test_params:
+        self.runTest(test_param, apply_relu)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index bfc007bc39..c6ef82ccdc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -90,6 +90,7 @@ BLACKLIST = [
     "//tensorflow/contrib/lite/python:interpreter.py",
     "//tensorflow/contrib/lite/python:interpreter_test.py",
     "//tensorflow/contrib/ffmpeg:test_data",
+    "//tensorflow/contrib/fused_conv:fused_conv2d_bias_activation_op_test_base",
     "//tensorflow/contrib/hadoop:test_data",
     "//tensorflow/contrib/factorization/examples:mnist",
     "//tensorflow/contrib/factorization/examples:mnist.py",
-- 
GitLab


From db3e59a545f06780583ad839da9e19d847dfd392 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Thu, 27 Sep 2018 11:11:34 -0700
Subject: [PATCH 0803/1357] Internal change.

PiperOrigin-RevId: 214804105
---
 .../testing/model_coverage/model_coverage_lib.py     | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
index f8ab394c60..5ca57d083d 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
@@ -183,7 +183,11 @@ def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
     np.testing.assert_almost_equal(tf_result, tflite_result, tolerance)
 
 
-def test_frozen_graph(filename, input_arrays, output_arrays, **kwargs):
+def test_frozen_graph(filename,
+                      input_arrays,
+                      output_arrays,
+                      input_shapes=None,
+                      **kwargs):
   """Validates the TensorFlow frozen graph converts to a TFLite model.
 
   Converts the TensorFlow frozen graph to TFLite and checks the accuracy of the
@@ -193,10 +197,14 @@ def test_frozen_graph(filename, input_arrays, output_arrays, **kwargs):
     filename: Full filepath of file containing frozen GraphDef.
     input_arrays: List of input tensors to freeze graph with.
     output_arrays: List of output tensors to freeze graph with.
+    input_shapes: Dict of strings representing input tensor names to list of
+      integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
+      Automatically determined when input shapes is None (e.g., {"foo" : None}).
+        (default None)
     **kwargs: Additional arguments to be passed into the converter.
   """
   converter = _lite.TocoConverter.from_frozen_graph(filename, input_arrays,
-                                                    output_arrays)
+                                                    output_arrays, input_shapes)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_frozen_graph(filename, input_arrays, output_arrays)
-- 
GitLab


From d2a674a959101c35b8cf65c79a603baa16936805 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 11:28:48 -0700
Subject: [PATCH 0804/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 214807362
---
 tensorflow/core/ops/compat/ops_history.v1.pbtxt | 11 +++++++++++
 tensorflow/core/ops/ops.pbtxt                   | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index cac4259356..7625524674 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -74910,6 +74910,17 @@ op {
     type: "type"
   }
 }
+op {
+  name: "UnicodeScript"
+  input_arg {
+    name: "input"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT32
+  }
+}
 op {
   name: "UniformCandidateSampler"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index e173c2d072..83af07431c 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -35648,6 +35648,17 @@ op {
     type: "type"
   }
 }
+op {
+  name: "UnicodeScript"
+  input_arg {
+    name: "input"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT32
+  }
+}
 op {
   name: "UniformCandidateSampler"
   input_arg {
-- 
GitLab


From 9a0a768d4416d157664d864d992a62782beea4a4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 11:54:40 -0700
Subject: [PATCH 0805/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214812088

---
 tensorflow/go/op/wrappers.go | 650 +++++++++++++++++------------------
 1 file changed, 325 insertions(+), 325 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 065c7e3011..96df1eee30 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4059,50 +4059,6 @@ func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the sum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such
-// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
-// need not be sorted and need not cover all values in the full
-// range of valid values.
-//
-// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-// If the given segment ID `i` is negative, the value is dropped and will not be
-// added to the sum of the segment.
-//
-// `num_segments` should equal the number of distinct segment IDs.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
-//
-//
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentSum",
-		Input: []tf.Input{
-			data, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -10714,6 +10670,129 @@ func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value
 	return op.Output(0)
 }
 
+// This op consumes a lock created by `MutexLock`.
+//
+// This op exists to consume a tensor created by `MutexLock` (other than
+// direct control dependencies).  It should be the only that consumes the tensor,
+// and will raise an error if it is not.  Its only purpose is to keep the
+// mutex lock tensor alive until it is consumed by this op.
+//
+// **NOTE**: This operation must run on the same device as its input.  This may
+// be enforced via the `colocate_with` mechanism.
+//
+// Arguments:
+//	mutex_lock: A tensor returned by `MutexLock`.
+//
+// Returns the created operation.
+func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ConsumeMutexLock",
+		Input: []tf.Input{
+			mutex_lock,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
+type ResourceScatterNdAddAttr func(optionalAttr)
+
+// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Adds sparse `updates` to individual values or slices within a given
+//
+// variable according to `indices`.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+// ```
+//
+// For example, say we want to update 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that update would look like this:
+//
+// ```python
+//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
+//     indices = tf.constant([[4], [3], [1] ,[7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     update = tf.scatter_nd_add(ref, indices, updates)
+//     with tf.Session() as sess:
+//       print sess.run(update)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 12, 3, 14, 14, 6, 7, 20]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdAdd",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Mutually reduces multiple tensors of identical type and shape.
+func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
+	opspec := tf.OpSpec{
+		Type: "CollectiveReduce",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Updates the tree ensemble by either adding a layer to the last tree being grown
 //
 // or by starting a new tree.
@@ -11455,68 +11534,31 @@ func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, upd
 	return scope.AddOperation(opspec)
 }
 
-// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
-type ResourceScatterNdAddAttr func(optionalAttr)
+// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
+type StatelessRandomNormalAttr func(optionalAttr)
 
-// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
+// StatelessRandomNormalDtype sets the optional dtype attribute to value.
 //
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["dtype"] = value
 	}
 }
 
-// Adds sparse `updates` to individual values or slices within a given
-//
-// variable according to `indices`.
-//
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-// ```
-//
-// For example, say we want to update 4 scattered elements to a rank-1 tensor to
-// 8 elements. In Python, that update would look like this:
-//
-// ```python
-//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
-//     indices = tf.constant([[4], [3], [1] ,[7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     update = tf.scatter_nd_add(ref, indices, updates)
-//     with tf.Session() as sess:
-//       print sess.run(update)
-// ```
-//
-// The resulting update to ref would look like this:
+// Outputs deterministic pseudorandom values from a normal distribution.
 //
-//     [1, 12, 3, 14, 14, 6, 7, 20]
+// The generated values will have mean 0 and standard deviation 1.
 //
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
+// The outputs are a deterministic function of `shape` and `seed`.
 //
 // Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of
-// values to add to ref.
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
 //
-// Returns the created operation.
-func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
+// Returns Random values with specified shape.
+func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -11525,25 +11567,9 @@ func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, update
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdAdd",
-		Input: []tf.Input{
-			ref, indices, updates,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Mutually reduces multiple tensors of identical type and shape.
-func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
-	opspec := tf.OpSpec{
-		Type: "CollectiveReduce",
+		Type: "StatelessRandomNormal",
 		Input: []tf.Input{
-			input,
+			shape, seed,
 		},
 		Attrs: attrs,
 	}
@@ -11551,31 +11577,83 @@ func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key
 	return op.Output(0)
 }
 
-// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
-type StatelessRandomNormalAttr func(optionalAttr)
-
-// StatelessRandomNormalDtype sets the optional dtype attribute to value.
+// Creates a sequence of numbers.
 //
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
+// This operation creates a sequence of numbers that begins at `start` and
+// extends by increments of `delta` up to but not including `limit`.
+//
+// For example:
+//
+// ```
+// # 'start' is 3
+// # 'limit' is 18
+// # 'delta' is 3
+// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
+// ```
+//
+// Arguments:
+//	start: 0-D (scalar). First entry in the sequence.
+//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
+//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
+//
+// Returns 1-D.
+func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Range",
+		Input: []tf.Input{
+			start, limit, delta,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
+type ResourceApplyMomentumAttr func(optionalAttr)
+
+// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Outputs deterministic pseudorandom values from a normal distribution.
+// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
 //
-// The generated values will have mean 0 and standard deviation 1.
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
 //
-// The outputs are a deterministic function of `shape` and `seed`.
+// want to use Nesterov momentum.
+//
+// accum = accum * momentum + grad
+// var -= lr * accum
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//	momentum: Momentum. Must be a scalar.
 //
-// Returns Random values with specified shape.
-func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -11584,14 +11662,13 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatelessRandomNormal",
+		Type: "ResourceApplyMomentum",
 		Input: []tf.Input{
-			shape, seed,
+			var_, accum, lr, grad, momentum,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
 // DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
@@ -15062,6 +15139,78 @@ func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (o
 	return op.Output(0)
 }
 
+// Returns the last element of the input list as well as a list with all but that element.
+//
+// Fails if the list is empty.
+//
+// input_handle: the input list
+// tensor: the withdrawn last element of the list
+// element_dtype: the type of elements in the list
+// element_shape: the shape of the output tensor
+func TensorListPopBack(scope *Scope, input_handle tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListPopBack",
+		Input: []tf.Input{
+			input_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
+type MaxPoolGradGradAttr func(optionalAttr)
+
+// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3.
 type TensorArrayGatherV3Attr func(optionalAttr)
 
@@ -15108,33 +15257,6 @@ func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow
 	return op.Output(0)
 }
 
-// This op consumes a lock created by `MutexLock`.
-//
-// This op exists to consume a tensor created by `MutexLock` (other than
-// direct control dependencies).  It should be the only that consumes the tensor,
-// and will raise an error if it is not.  Its only purpose is to keep the
-// mutex lock tensor alive until it is consumed by this op.
-//
-// **NOTE**: This operation must run on the same device as its input.  This may
-// be enforced via the `colocate_with` mechanism.
-//
-// Arguments:
-//	mutex_lock: A tensor returned by `MutexLock`.
-//
-// Returns the created operation.
-func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ConsumeMutexLock",
-		Input: []tf.Input{
-			mutex_lock,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Returns x / y element-wise for integer types.
 //
 // Truncation designates that negative numbers will round fractional quantities
@@ -18032,138 +18154,6 @@ func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_
 	return op.Output(0)
 }
 
-// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
-type ResourceApplyMomentumAttr func(optionalAttr)
-
-// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-//
-// want to use Nesterov momentum.
-//
-// accum = accum * momentum + grad
-// var -= lr * accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyMomentum",
-		Input: []tf.Input{
-			var_, accum, lr, grad, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
-type MaxPoolGradGradAttr func(optionalAttr)
-
-// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGrad",
-		Input: []tf.Input{
-			orig_input, orig_output, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the last element of the input list as well as a list with all but that element.
-//
-// Fails if the list is empty.
-//
-// input_handle: the input list
-// tensor: the withdrawn last element of the list
-// element_dtype: the type of elements in the list
-// element_shape: the shape of the output tensor
-func TensorListPopBack(scope *Scope, input_handle tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListPopBack",
-		Input: []tf.Input{
-			input_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Returns element-wise integer closest to x.
 //
 // If the result is midway between two representable values,
@@ -21645,6 +21635,50 @@ func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
+// Computes the sum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such
+// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
+// need not be sorted and need not cover all values in the full
+// range of valid values.
+//
+// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+// If the given segment ID `i` is negative, the value is dropped and will not be
+// added to the sum of the segment.
+//
+// `num_segments` should equal the number of distinct segment IDs.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentSum",
+		Input: []tf.Input{
+			data, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the product along segments of a tensor.
 //
 // Read
@@ -22272,40 +22306,6 @@ func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (ou
 	return op.Output(0)
 }
 
-// Creates a sequence of numbers.
-//
-// This operation creates a sequence of numbers that begins at `start` and
-// extends by increments of `delta` up to but not including `limit`.
-//
-// For example:
-//
-// ```
-// # 'start' is 3
-// # 'limit' is 18
-// # 'delta' is 3
-// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
-// ```
-//
-// Arguments:
-//	start: 0-D (scalar). First entry in the sequence.
-//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
-//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
-//
-// Returns 1-D.
-func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Range",
-		Input: []tf.Input{
-			start, limit, delta,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // DestroyResourceOpAttr is an optional argument to DestroyResourceOp.
 type DestroyResourceOpAttr func(optionalAttr)
 
-- 
GitLab


From 561a3c4331ebfaac3e61c524911bf6fe85f4ebc9 Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Thu, 27 Sep 2018 12:20:33 -0700
Subject: [PATCH 0806/1357] Dynamic subdivisions in collective ring reduce.

Before this change, a CollectiveOp user was required to specify subdiv_offsets
for the RingReduce algorithm.  During ring reduction, we created chunks of the
tensor to exchange between devices.  If the chunks were too large, or if the
hardware supported multiple data exchanges in parallel, the user could further
subdivide the chunk by specifying more than 1 subdiv offset.  Each subdiv
offset corresponded to another subdivision of the chunk, so effectively the
total number of tensor chunks is number of devices * number of subdivs.

After this change, we can dynamically infer the number of subdivisions based on
a target chunk size.  In ring_reducer.cc, we start with 1 subdiv, and keep
increasing until chunk size is less than MAX_CHUNK_SIZE.  Currently,
MAX_CHUNK_SIZE is set at 4 MB, although it may make sense to change this based
on specific hardware.

As a part of this change, a user can now provide an empty subdiv_offset list.
If empty, we dynamically add subdivisions based on the above algorithm.  If
non-empty, we take the user-specified subdivions.

PiperOrigin-RevId: 214815959
---
 .../core/common_runtime/ring_reducer.cc       | 75 +++++++++++++++--
 .../core/common_runtime/ring_reducer_test.cc  | 83 +++++++++++++++----
 tensorflow/core/kernels/collective_ops.cc     | 21 +++--
 3 files changed, 147 insertions(+), 32 deletions(-)

diff --git a/tensorflow/core/common_runtime/ring_reducer.cc b/tensorflow/core/common_runtime/ring_reducer.cc
index a81f8650bf..b1fe928ba7 100644
--- a/tensorflow/core/common_runtime/ring_reducer.cc
+++ b/tensorflow/core/common_runtime/ring_reducer.cc
@@ -41,6 +41,16 @@ limitations under the License.
 
 // Set true for greater intelligibility of debug mode log messages.
 #define READABLE_KEYS false
+// RingReduce algorithm exchanges chunks of tensor between devices.  The chunk
+// size depends on the number of subdivisions specified in the algorithm.  If
+// the user does not specify the number of subdivisions, we infer the number
+// dynamically so that the resulting chunk size does not exceed
+// kMaxChunkSizeBytes, empirically set at 4 MiB.
+constexpr size_t kMaxChunkSizeBytes = (4 * 1024 * 1024);
+// kMaxSubdivsPerDev is used to give an upper bound on the number of
+// subdivisions dynamically generated.  A reasonable value would be a small
+// multiple of the number of NICs adjacent to each device.
+constexpr int kMaxSubdivsPerDevice = 2;
 
 namespace tensorflow {
 namespace {
@@ -92,7 +102,62 @@ RingReducer::RingReducer()
 
 RingReducer::~RingReducer() { group_size_tensor_ready_.WaitForNotification(); }
 
+Status GenerateSubdivsInCollectiveParams(CollectiveParams* col_params) {
+  if (col_params->instance.shape.num_elements() == 0) {
+    return errors::Internal("shape in CollectiveParams should be non-empty");
+  }
+  const int kAvgDevPerTask =
+      col_params->group.group_size / col_params->group.num_tasks;
+  const int kMaxNumSubdivs = kMaxSubdivsPerDevice * kAvgDevPerTask;
+  if (kMaxNumSubdivs <= 0) {
+    return errors::Internal("Unexpected kMaxNumSubdivs ", kMaxNumSubdivs,
+                            " in RingReducer");
+  }
+  // NOTE(ayushd): If no subdiv_offsets have been specified, dynamically add
+  // as many offsets as needed so that the size of tensor chunks <=
+  // kMaxChunkSizeBytes.  Empirically, chunks that are too small or too large
+  // lead to worse performance.
+  int num_subdivs = 0;
+  const size_t tensor_size = col_params->instance.shape.num_elements() *
+                             DataTypeSize(col_params->instance.data_type);
+  size_t chunk_size;
+  do {
+    ++num_subdivs;
+    int num_chunks = col_params->group.group_size * num_subdivs;
+    chunk_size = tensor_size / num_chunks;
+    VLOG(2) << "num_subdivs " << num_subdivs << " num_chunks " << num_chunks
+            << " chunk_size " << chunk_size;
+  } while (chunk_size > kMaxChunkSizeBytes && num_subdivs < kMaxNumSubdivs);
+  if (num_subdivs <= 0) {
+    return errors::Internal("Unexpected num_subdivs ", num_subdivs,
+                            " in RingReducer");
+  }
+
+  int subdiv_stride = kAvgDevPerTask / num_subdivs;
+  if (subdiv_stride == 0) subdiv_stride = 1;
+  col_params->instance.impl_details.subdiv_offsets.reserve(num_subdivs);
+  for (int sdi = 0; sdi < num_subdivs; ++sdi) {
+    int subdiv_offset = subdiv_stride * sdi;
+    if (sdi % 2 == 1) subdiv_offset *= -1;
+    col_params->instance.impl_details.subdiv_offsets.push_back(subdiv_offset);
+  }
+
+  if (VLOG_IS_ON(2)) {
+    string subdiv_buf;
+    for (const int subdiv_offset :
+         col_params->instance.impl_details.subdiv_offsets) {
+      strings::StrAppend(&subdiv_buf, " ", subdiv_offset);
+    }
+    VLOG(2) << "Dynamically generated " << num_subdivs
+            << " subdiv_offsets:" << subdiv_buf << " tensor_size "
+            << tensor_size << " chunk_size " << chunk_size;
+  }
+
+  return Status::OK();
+}
+
 Status RingReducer::InitializeCollectiveParams(CollectiveParams* col_params) {
+  // TODO(b/113171733): change CHECKs to return errors.
   CHECK_EQ(col_params->instance.type, REDUCTION_COLLECTIVE);
   CHECK_EQ(col_params->instance.impl_details.collective_name, "RingReduce");
   const string& device_name =
@@ -123,12 +188,11 @@ Status RingReducer::InitializeCollectiveParams(CollectiveParams* col_params) {
   dev_per_task.push_back(dev_count);
   CHECK_EQ(col_params->group.num_tasks, dev_per_task.size());
 
-  // Generate a ring permutation for each requested offset.
   if (col_params->instance.impl_details.subdiv_offsets.empty()) {
-    return errors::Internal(
-        "Subdiv offsets should be non-empty for ring reducer, size=",
-        col_params->instance.impl_details.subdiv_offsets.size());
+    TF_RETURN_IF_ERROR(GenerateSubdivsInCollectiveParams(col_params));
   }
+
+  // Generate a ring permutation for requested offset.
   VLOG(2) << "Setting up perms for col_params " << col_params
           << " subdiv_permutations "
           << &col_params->instance.impl_details.subdiv_permutations;
@@ -646,7 +710,8 @@ bool RingReducer::RunAsyncParts() {
         case RF_SEND:
           --send_pending_count;
           break;
-        default: {}  // Ignore any other actions
+        default: {
+        }  // Ignore any other actions
       }
     }
   }
diff --git a/tensorflow/core/common_runtime/ring_reducer_test.cc b/tensorflow/core/common_runtime/ring_reducer_test.cc
index 28df85399e..75aba43572 100644
--- a/tensorflow/core/common_runtime/ring_reducer_test.cc
+++ b/tensorflow/core/common_runtime/ring_reducer_test.cc
@@ -549,37 +549,38 @@ class RingReducerTest : public ::testing::Test {
   int32 reduce_counter_ GUARDED_BY(mu_) = 0;
 };
 
-TEST_F(RingReducerTest, InitializeParams) {
-  static const int kNumDevsPerTask = 8;
-  static const int kNumTasks = 3;
-  static const int kNumDevs = kNumDevsPerTask * kNumTasks;
+CollectiveParams SetUpCollectiveParams(const int num_devs_per_task,
+                                       const int num_tasks) {
   CollectiveParams cp;
-  std::vector<string> device_names;
-  std::vector<string> task_names;
+  const int kNumDevs = num_devs_per_task * num_tasks;
   cp.group.group_key = 1;
   cp.group.group_size = kNumDevs;
   cp.group.device_type = DeviceType("GPU");
-  cp.group.num_tasks = kNumTasks;
+  cp.group.num_tasks = num_tasks;
   cp.instance.instance_key = 3;
   cp.instance.type = REDUCTION_COLLECTIVE;
   cp.instance.data_type = DataType(DT_FLOAT);
-  cp.instance.shape = TensorShape({5});
+  cp.instance.shape = TensorShape({kNumDevs});
   cp.instance.impl_details.collective_name = "RingReduce";
   cp.instance.impl_details.subdiv_offsets.push_back(0);
   cp.is_source = false;
   for (int i = 0; i < kNumDevs; ++i) {
-    int task_id = i / kNumDevsPerTask;
-    int dev_id = i % kNumDevsPerTask;
+    int task_id = i / num_devs_per_task;
+    int dev_id = i % num_devs_per_task;
     string task_name = strings::StrCat("/job:worker/replica:0/task:", task_id);
-    task_names.push_back(task_name);
     string device_name = strings::StrCat(task_name, "/device:GPU:", dev_id);
-    device_names.push_back(device_name);
     cp.instance.task_names.push_back(task_name);
     cp.instance.device_names.push_back(device_name);
   }
+  return cp;
+}
 
-  int test_rank = 0;
-  cp.default_rank = test_rank;
+TEST_F(RingReducerTest, InitializeParams) {
+  const int kNumDevsPerTask = 8;
+  const int kNumTasks = 3;
+  CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks);
+
+  cp.default_rank = 0;
   cp.instance.impl_details.subdiv_offsets = {0, 4};
   RunSubdivPermsTest(&cp,
                      {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
@@ -588,8 +589,15 @@ TEST_F(RingReducerTest, InitializeParams) {
                        8, 9, 10, 11, 20, 21, 22, 23, 16, 17, 18, 19}},
                      {0, 4});
 
-  test_rank = 3;
-  cp.default_rank = test_rank;
+  cp.instance.impl_details.subdiv_offsets = {0, -4};
+  RunSubdivPermsTest(&cp,
+                     {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
+                      {3,  2,  1,  0,  7,  6,  5,  4,  11, 10, 9,  8,
+                       15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20}},
+                     {0, 3});
+
+  cp.default_rank = 3;
   cp.instance.impl_details.subdiv_offsets = {3, -3};
   RunSubdivPermsTest(&cp,
                      {{3,  4, 5, 6,  7,  0,  1,  2,  11, 12, 13, 14,
@@ -599,6 +607,49 @@ TEST_F(RingReducerTest, InitializeParams) {
                      {0, 1});
 }
 
+TEST_F(RingReducerTest, AutomaticSubdivs) {
+  const int kNumDevsPerTask = 8;
+  const int kNumTasks = 3;
+  const int kNumDevs = kNumDevsPerTask * kNumTasks;
+  CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks);
+
+  // Test automatic generation of subdiv offsets.
+  cp.default_rank = 0;
+  cp.instance.impl_details.subdiv_offsets.clear();
+  RunSubdivPermsTest(&cp, {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                            12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
+                     {0});
+
+  // Set shape so that with 2 subdivs chunk_size is 3 MiB.  This should cause 2
+  // offsets, {0, -4}, to be generated.
+  {
+    int num_subdivs = 2;
+    int num_chunks = kNumDevs * num_subdivs;
+    size_t chunk_size = 3 * 1048576;  // 3 MB
+    size_t tensor_size = chunk_size * num_chunks;
+    cp.instance.shape =
+        TensorShape({static_cast<int64>(tensor_size / DataTypeSize(DT_FLOAT))});
+  }
+  cp.instance.impl_details.subdiv_offsets.clear();
+  RunSubdivPermsTest(&cp,
+                     {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
+                      {3,  2,  1,  0,  7,  6,  5,  4,  11, 10, 9,  8,
+                       15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20}},
+                     {0, 3});
+}
+
+TEST_F(RingReducerTest, AutomaticSubdivUpperBound) {
+  const int kNumDevsPerTask = 1;
+  const int kNumTasks = 4;
+  CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks);
+
+  cp.default_rank = 0;
+  cp.instance.impl_details.subdiv_offsets.clear();
+  cp.instance.shape = TensorShape({104857600 / DataTypeSize(DT_FLOAT)});
+  RunSubdivPermsTest(&cp, {{0, 1, 2, 3}, {0, 1, 2, 3}}, {0, 0});
+}
+
 // TODO(b/113171733): change to use TEST_P.
 #define DEF_TEST(B, T, W, D, S, L, A)                                         \
   TEST_F(RingReducerTest,                                                     \
diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc
index e0da91125b..fa959b5a0e 100644
--- a/tensorflow/core/kernels/collective_ops.cc
+++ b/tensorflow/core/kernels/collective_ops.cc
@@ -132,6 +132,7 @@ class CollectiveReduceOpKernel : public CollectiveOpKernel {
             "Failed to get CollectiveExecutor from OpKernelContext for Op ",
             col_params_.name),
         done);
+    col_params_.instance.shape = c->input(0).shape();
     // Allocate output on the first pass through this function.  This must be
     // done immediately, while we're still in the executor thread.  Otherwise
     // the memory is not guaranteed to be unused by any concurrently executing
@@ -171,7 +172,7 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel {
     OP_REQUIRES_OK(
         c, c->GetAttr("instance_key", &col_params_.instance.instance_key));
     OP_REQUIRES_OK(c, c->GetAttr("T", &col_params_.instance.data_type));
-    OP_REQUIRES_OK(c, c->GetAttr("shape", &shape_));
+    OP_REQUIRES_OK(c, c->GetAttr("shape", &col_params_.instance.shape));
     col_params_.is_source = true;
     col_params_.instance.impl_details.subdiv_offsets = {0};
 
@@ -195,13 +196,14 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel {
     if (c->mutable_output(0) == nullptr) {
       // Allocate the output tensor, trying to reuse the input.
       Tensor* output = nullptr;
-      OP_REQUIRES_OK_ASYNC(
-          c, c->forward_input_or_allocate_output({0}, 0, shape_, &output),
-          done);
+      OP_REQUIRES_OK_ASYNC(c,
+                           c->forward_input_or_allocate_output(
+                               {0}, 0, col_params_.instance.shape, &output),
+                           done);
     }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
     OP_REQUIRES_ASYNC(
-        c, shape_.IsSameSize(c->input(0).shape()),
+        c, col_params_.instance.shape.IsSameSize(c->input(0).shape()),
         errors::Internal("Declared shape of op ", col_params_.name,
                          " does not match shape of input"),
         done);
@@ -214,8 +216,6 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel {
   }
 
  private:
-  TensorShape shape_;
-
   TF_DISALLOW_COPY_AND_ASSIGN(CollectiveBcastSendOpKernel);
 };
 
@@ -234,7 +234,7 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel {
     OP_REQUIRES_OK(
         c, c->GetAttr("instance_key", &col_params_.instance.instance_key));
     OP_REQUIRES_OK(c, c->GetAttr("T", &col_params_.instance.data_type));
-    OP_REQUIRES_OK(c, c->GetAttr("shape", &shape_));
+    OP_REQUIRES_OK(c, c->GetAttr("shape", &col_params_.instance.shape));
     col_params_.is_source = false;
     col_params_.instance.impl_details.subdiv_offsets = {0};
 
@@ -258,7 +258,8 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel {
     if (c->mutable_output(0) == nullptr) {
       // No input, so must allocate output.
       Tensor* output = nullptr;
-      OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape_, &output), done);
+      OP_REQUIRES_OK_ASYNC(
+          c, c->allocate_output(0, col_params_.instance.shape, &output), done);
     }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
 
@@ -270,8 +271,6 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel {
   }
 
  private:
-  TensorShape shape_;
-
   TF_DISALLOW_COPY_AND_ASSIGN(CollectiveBcastRecvOpKernel);
 };
 
-- 
GitLab


From 750466c6e6624d279de7f9a43accd682d487509c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 12:37:05 -0700
Subject: [PATCH 0807/1357] Introduce the abstraction of RunHandler which each
 DirectSession can use for the duration of a single RunInternal() call from
 RunHandlerPool. We want to leverage this abstraction for improving the
 cross-session inter-op parallelism for lower latency inference in the future.
 In the case that global pools aren't used, this change should be a no-op.

PiperOrigin-RevId: 214818187
---
 tensorflow/core/BUILD                         |  16 ++
 .../core/common_runtime/direct_session.cc     |  49 +++-
 .../core/common_runtime/direct_session.h      |   3 +
 .../common_runtime/direct_session_test.cc     |  28 ++
 tensorflow/core/framework/run_handler.cc      | 248 ++++++++++++++++++
 tensorflow/core/framework/run_handler.h       |  95 +++++++
 tensorflow/core/framework/run_handler_util.cc |  57 ++++
 tensorflow/core/framework/run_handler_util.h  |  43 +++
 .../core/framework/run_handler_util_test.cc   |  93 +++++++
 tensorflow/core/protobuf/config.proto         |   5 +
 ...ensorflow.-run-options.-experimental.pbtxt |   6 +
 .../golden/v1/tensorflow.-run-options.pbtxt   |   6 +
 ...ensorflow.-run-options.-experimental.pbtxt |   6 +
 .../golden/v2/tensorflow.-run-options.pbtxt   |   6 +
 14 files changed, 655 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/core/framework/run_handler.cc
 create mode 100644 tensorflow/core/framework/run_handler.h
 create mode 100644 tensorflow/core/framework/run_handler_util.cc
 create mode 100644 tensorflow/core/framework/run_handler_util.h
 create mode 100644 tensorflow/core/framework/run_handler_util_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ca247dc56b..01e2e9f62b 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2484,6 +2484,8 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [
     "framework/op_segment.h",
     "framework/rendezvous.h",  # only needed for tests
     "framework/resource_var.h",
+    "framework/run_handler.h",
+    "framework/run_handler_util.h",
     "framework/tensor_reference.h",
     "framework/tracking_allocator.h",  # only needed for tests
     "framework/unique_tensor_references.h",
@@ -2970,6 +2972,7 @@ tf_cuda_library(
         ":core_cpu_internal",
         ":device_tracer",
         ":framework",
+        ":framework_internal",
         ":graph",
         ":lib",
         ":lib_internal",
@@ -4117,6 +4120,19 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "framework_run_handler_util_test",
+    size = "small",
+    srcs = ["framework/run_handler_util_test.cc"],
+    linkstatic = tf_kernel_tests_linkstatic(),
+    deps = [
+        ":framework_internal",
+        ":lib",
+        ":test",
+        ":test_main",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "common_runtime_direct_session_test",
     size = "small",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 841181f8c3..458e133b68 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -40,6 +40,7 @@ limitations under the License.
 #include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/run_handler.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -244,6 +245,21 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool,
 #endif  // __ANDROID__
 }
 
+static RunHandlerPool* GetOrCreateRunHandlerPool(
+    const SessionOptions& options) {
+  static RunHandlerPool* pool =
+      new RunHandlerPool(NumInterOpThreadsFromSessionOptions(options));
+  return pool;
+}
+
+bool DirectSession::ShouldUseRunHandlerPool() const {
+  if (options_.config.session_inter_op_thread_pool_size() > 0 ||
+      options_.config.use_per_session_threads()) {
+    return false;
+  }
+  return true;
+}
+
 DirectSession::DirectSession(const SessionOptions& options,
                              const DeviceMgr* device_mgr,
                              DirectSessionFactory* const factory)
@@ -582,16 +598,37 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
     }
   }
 
-  Executor::Args::Runner default_runner = [this,
-                                           pool](Executor::Args::Closure c) {
-    SchedClosure(pool, std::move(c));
-  };
+  std::unique_ptr<RunHandler> handler;
+  if (ShouldUseRunHandlerPool() &&
+      run_options.experimental().use_run_handler_pool()) {
+    // Non-null only when a global inter-op pool is used.
+    VLOG(1) << "Using RunHandler to scheduler inter-op closures.";
+    handler = GetOrCreateRunHandlerPool(options_)->Get();
+  }
+  auto* handler_ptr = handler.get();
+
+  Executor::Args::Runner default_runner = nullptr;
+
+  if (pool == nullptr) {
+    default_runner = [](Executor::Args::Closure c) { c(); };
+  } else if (handler_ptr != nullptr) {
+    default_runner = [handler_ptr](Executor::Args::Closure c) {
+      handler_ptr->ScheduleInterOpClosure(std::move(c));
+    };
+  } else {
+    default_runner = [this, pool](Executor::Args::Closure c) {
+      SchedClosure(pool, std::move(c));
+    };
+  }
+
   for (const auto& item : executors_and_keys->items) {
-    // TODO(zhengxq): support partial run.
-    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    // TODO(azaks): support partial run.
+    // TODO(azaks): if the device picks its own threadpool, we need to assign
     //     less threads to the main compute pool by default.
     thread::ThreadPool* device_thread_pool =
         item.device->tensorflow_device_thread_pool();
+    // TODO(crk): Investigate usage of RunHandlerPool when using device specific
+    // thread pool(s).
     if (!device_thread_pool) {
       args.runner = default_runner;
     } else {
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 4a6a921ea7..3a168bbe3f 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -247,6 +247,9 @@ class DirectSession : public Session {
                                    ExecutorsAndKeys* executors_and_keys,
                                    RunMetadata* run_metadata);
 
+  // Returns whether inter-op execution uses a global pool.
+  bool ShouldUseRunHandlerPool() const;
+
   ::tensorflow::Status ExtendLocked(const GraphDef& graph)
       EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
 
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index 65e816c202..e3e431f800 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -625,6 +625,34 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts_Callable) {
   EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2);
 }
 
+TEST_F(DirectSessionMinusAXTest, UseRunHandlerPool) {
+  Initialize({3, 2, -1, 0});
+  auto session = CreateSession();
+  ASSERT_TRUE(session != nullptr);
+  TF_ASSERT_OK(session->Create(def_));
+  std::vector<std::pair<string, Tensor>> inputs;
+
+  // Request two targets: one fetch output and one non-fetched output.
+  std::vector<string> output_names = {y_ + ":0"};
+  std::vector<string> target_nodes = {y_neg_};
+  std::vector<Tensor> outputs;
+
+  // Prepares RunOptions and RunMetadata
+  RunOptions run_options;
+  run_options.mutable_experimental()->set_use_run_handler_pool(true);
+
+  Status s = session->Run(run_options, inputs, output_names, target_nodes,
+                          &outputs, nullptr);
+  TF_ASSERT_OK(s);
+
+  ASSERT_EQ(1, outputs.size());
+  // The first output should be initialized and have the correct
+  // output.
+  auto mat = outputs[0].matrix<float>();
+  ASSERT_TRUE(outputs[0].IsInitialized());
+  EXPECT_FLOAT_EQ(5.0, mat(0, 0));
+}
+
 TEST(DirectSessionTest, KeepsStateAcrossRunsOfSession) {
   GraphDef def;
   Graph g(OpRegistry::Global());
diff --git a/tensorflow/core/framework/run_handler.cc b/tensorflow/core/framework/run_handler.cc
new file mode 100644
index 0000000000..9c6490a603
--- /dev/null
+++ b/tensorflow/core/framework/run_handler.cc
@@ -0,0 +1,248 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/framework/run_handler.h"
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/run_handler_util.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+
+// Contains the concrete implementation of the RunHandler.
+// Externally visible RunHandler class simply forwards the work to this one.
+class RunHandler::Impl {
+ public:
+  explicit Impl(RunHandlerPool::Impl* pool_impl) : pool_impl_(pool_impl) {
+    Reset();
+  }
+
+  ~Impl() {}
+
+  void set_inter_op_scheduling_range(std::uint_fast32_t start,
+                                     std::uint_fast32_t limit) {
+    inter_op_scheduling_range_.store(EncodePartition(start, limit),
+                                     std::memory_order_release);
+  }
+
+  std::uint_fast32_t inter_op_scheduling_range() const {
+    return inter_op_scheduling_range_.load(std::memory_order_acquire);
+  }
+
+  // Stores now time (in microseconds) since unix epoch when the handler is
+  // requested via RunHandlerPool::Get().
+  uint64 start_time_us() const { return start_time_us_; }
+
+  void ScheduleInterOpClosure(std::function<void()> fn);
+
+  void Reset();
+
+  RunHandlerPool::Impl* pool_impl() { return pool_impl_; }
+
+ private:
+  // Encoding/decoding logic for storing [start, limit) into a single
+  // uint_fast32_t int. We assume that pool_num_threads < (1 << 16).
+  const int kMaxPartitionBits = 16;
+  const int kMaxThreads = 1 << kMaxPartitionBits;
+
+  std::uint_fast32_t EncodePartition(std::uint_fast32_t start,
+                                     std::uint_fast32_t limit) {
+    return (start << kMaxPartitionBits) | limit;
+  }
+
+  void DecodePartition(std::uint_fast32_t val, std::uint_fast32_t* start,
+                       std::uint_fast32_t* limit) {
+    *limit = val & (kMaxThreads - 1);
+    val >>= kMaxPartitionBits;
+    *start = val;
+  }
+
+  std::atomic_uint_fast32_t inter_op_scheduling_range_;
+  RunHandlerPool::Impl* pool_impl_;  // NOT OWNED.
+  uint64 start_time_us_;
+};
+
+// Contains shared state across all run handlers present in the pool. Also
+// responsible for pool management decisions.
+// This class is thread safe.
+class RunHandlerPool::Impl {
+ public:
+  // Maximum number of handlers pre-created during pool construction time. The
+  // number has been chosen expecting each handler might at least want 1
+  // inter-op thread for execution (during compute intensive workloads like
+  // inference).
+  static const int kMaxHandlers = 128;
+
+  explicit Impl(int num_inter_op_threads)
+      : inter_op_thread_pool_(new thread::ThreadPool(
+            Env::Default(), ThreadOptions(), "inter_op", num_inter_op_threads)),
+        iterations_(0) {
+    VLOG(1) << "Creating a RunHandlerPool with max handlers: " << kMaxHandlers;
+    for (int i = 0; i < kMaxHandlers; ++i) {
+      handlers_.emplace_back(new RunHandler::Impl(this));
+      free_handlers_.push_back(handlers_.back().get());
+    }
+  }
+
+  ~Impl() {
+    // Sanity check that all handlers have been returned back to the pool before
+    // destruction.
+    DCHECK_EQ(handlers_.size(), kMaxHandlers);
+    DCHECK_EQ(free_handlers_.size(), handlers_.size());
+    DCHECK_EQ(sorted_active_handlers_.size(), 0);
+  }
+
+  thread::ThreadPool* inter_op_thread_pool() const {
+    return inter_op_thread_pool_.get();
+  }
+
+  std::unique_ptr<RunHandler> Get() LOCKS_EXCLUDED(mu_) {
+    mutex_lock l(mu_);
+    while (free_handlers_.empty()) {
+      one_handler_free_.wait(l);
+    }
+    // Remove the last entry from free_handlers_ and add to the end of
+    // sorted_active_handlers_.
+    auto* handler_impl = free_handlers_.back();
+    handler_impl->Reset();
+    // Sortedness isn't violated if we simply add at the end of the list, since
+    // handlers are expected to be obtained in increasing order of time.
+    sorted_active_handlers_.push_back(handler_impl);
+    DCHECK_LE(sorted_active_handlers_.size(), kMaxHandlers);
+    free_handlers_.pop_back();
+
+    RecomputePoolStatsLocked();
+    return WrapUnique<RunHandler>(new RunHandler(handler_impl));
+  }
+
+  void ReleaseHandler(RunHandler::Impl* handler) LOCKS_EXCLUDED(mu_) {
+    {
+      mutex_lock l(mu_);
+      DCHECK_GT(sorted_active_handlers_.size(), 0);
+
+      uint64 now = tensorflow::Env::Default()->NowMicros();
+      double elapsed = (now - handler->start_time_us()) / 1000.0;
+      time_hist_.Add(elapsed);
+
+      // Erase from and update sorted_active_handlers_. Add it to the end of
+      // free_handlers_.
+      auto iter = std::find(sorted_active_handlers_.begin(),
+                            sorted_active_handlers_.end(), handler);
+      DCHECK(iter != sorted_active_handlers_.end())
+          << "Unexpected handler: " << handler
+          << " is being requested for release";
+
+      // Remove this handler from this list and add it to the list of free
+      // handlers.
+      sorted_active_handlers_.erase(iter);
+      free_handlers_.push_back(handler);
+      DCHECK_LE(free_handlers_.size(), kMaxHandlers);
+
+      RecomputePoolStatsLocked();
+    }
+    one_handler_free_.notify_one();
+  }
+
+ private:
+  void RecomputePoolStatsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Thread safe part.
+  const std::unique_ptr<thread::ThreadPool> inter_op_thread_pool_;
+
+  // Thread compatible part used only by lock under RunHandlerPool.
+  // Handlers are sorted by start time.
+  std::vector<RunHandler::Impl*> sorted_active_handlers_ GUARDED_BY(mu_);
+  std::vector<RunHandler::Impl*> free_handlers_ GUARDED_BY(mu_);
+  std::vector<std::unique_ptr<RunHandler::Impl>> handlers_ GUARDED_BY(mu_);
+  // Histogram of elapsed runtime of every handler (in ms).
+  histogram::Histogram time_hist_ GUARDED_BY(mu_);
+  std::vector<std::uint_fast32_t> inter_op_start_ GUARDED_BY(mu_);
+  std::vector<std::uint_fast32_t> inter_op_limit_ GUARDED_BY(mu_);
+  int64 iterations_ GUARDED_BY(mu_);
+  condition_variable one_handler_free_;
+  mutex mu_;
+};
+
+void RunHandlerPool::Impl::RecomputePoolStatsLocked() {
+  int num_active_requests = sorted_active_handlers_.size();
+  if (num_active_requests == 0) return;
+
+  int num_threads = inter_op_thread_pool_->NumThreads();
+
+  inter_op_start_.resize(num_active_requests);
+  inter_op_limit_.resize(num_active_requests);
+
+  const int kMinThreadsPerRequest = 3;
+  ComputeInterOpSchedulingRanges(num_active_requests, num_threads,
+                                 kMinThreadsPerRequest, &inter_op_start_,
+                                 &inter_op_limit_);
+
+  for (int i = 0; i < num_active_requests; ++i) {
+    sorted_active_handlers_[i]->set_inter_op_scheduling_range(
+        inter_op_start_[i], inter_op_limit_[i]);
+  }
+
+  if (iterations_++ % 5000 == 0 && VLOG_IS_ON(1)) {
+    VLOG(1) << "Printing time histogram: " << time_hist_.ToString();
+    VLOG(1) << "Active session runs: " << num_active_requests;
+    uint64 now = tensorflow::Env::Default()->NowMicros();
+    string ranges_str = "";
+    string times_str = "";
+    for (int i = 0; i < num_active_requests; ++i) {
+      if (i > 0) {
+        times_str += " ";
+        ranges_str += " ";
+      }
+
+      times_str += strings::StrCat(
+          (now - sorted_active_handlers_[i]->start_time_us()) / 1000.0, " ms.");
+      ranges_str += strings::StrCat("[", inter_op_start_[i], ", ",
+                                    inter_op_limit_[i], ")");
+    }
+    VLOG(1) << "Elapsed times are: " << times_str;
+    VLOG(1) << "Ranges are: " << ranges_str;
+  }
+}
+
+void RunHandler::Impl::ScheduleInterOpClosure(std::function<void()> fn) {
+  std::uint_fast32_t start = 0, limit = 0;
+  DecodePartition(inter_op_scheduling_range(), &start, &limit);
+  pool_impl_->inter_op_thread_pool()->Schedule(std::move(fn));
+}
+
+void RunHandler::Impl::Reset() {
+  set_inter_op_scheduling_range(
+      0, pool_impl_->inter_op_thread_pool()->NumThreads());
+  start_time_us_ = tensorflow::Env::Default()->NowMicros();
+}
+
+RunHandlerPool::RunHandlerPool(int num_inter_op_threads)
+    : impl_(new Impl(num_inter_op_threads)) {}
+
+RunHandlerPool::~RunHandlerPool() {}
+
+std::unique_ptr<RunHandler> RunHandlerPool::Get() { return impl_->Get(); }
+
+RunHandler::RunHandler(Impl* impl) : impl_(impl) {}
+
+void RunHandler::ScheduleInterOpClosure(std::function<void()> fn) {
+  impl_->ScheduleInterOpClosure(std::move(fn));
+}
+
+RunHandler::~RunHandler() { impl_->pool_impl()->ReleaseHandler(impl_); }
+}  // namespace tensorflow
diff --git a/tensorflow/core/framework/run_handler.h b/tensorflow/core/framework/run_handler.h
new file mode 100644
index 0000000000..72fa6301b4
--- /dev/null
+++ b/tensorflow/core/framework/run_handler.h
@@ -0,0 +1,95 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
+#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
+
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+
+class RunHandler;
+
+// RunHandlerPool is a fixed size pool of pre-allocated RunHandlers
+// that can be used for tracking inter-op work for a given Session::Run().
+// RunHandler(s) in the pool are initially 'inactive'. A RunHandler becomes
+// 'active' when its unique_ptr is returned by Get() and is being used by a
+// client. It becomes 'inactive' once more when its unique_ptr gets destroyed.
+//
+// Expected usage:
+//
+// * Create a single RunHandlerPool (say run_handler_pool_).
+//
+// * When a Session::Run() is invoked, obtain a handler by:
+// auto handler = run_handler_pool_->Get();
+//
+// * Use handler for scheduling all inter-op work by:
+// handler->ScheduleInterOpClosure(closure);
+//
+// This class is thread safe.
+class RunHandlerPool {
+ public:
+  explicit RunHandlerPool(int num_inter_op_threads);
+  ~RunHandlerPool();
+
+  // Returns an inactive RunHandler from the pool.
+  //
+  // RunHandlers in RunHandlerPool are initially 'inactive'.
+  // A RunHandler becomes 'active' when its unique_ptr its returned by Get()
+  // and is being used by a client.  It becomes 'inactive' once more when the
+  // unique_ptr is destroyed.
+  //
+  // Will block unless there is an inactive handler.
+  std::unique_ptr<RunHandler> Get();
+
+ private:
+  class Impl;
+  friend class RunHandler;
+
+  std::unique_ptr<Impl> impl_;
+};
+
+// RunHandler can be used to schedule inter-op closures to run on a global pool
+// shared across all Session::Run(s).
+//
+// It can only be created via RunHandlerPool::Get().
+//
+// This class can be used instead of directly scheduling closures on a global
+// pool since it maintains a global view across all sessions and optimizes pool
+// scheduling to improve (median and tail) latency.
+//
+// This class is thread safe.
+class RunHandler {
+ public:
+  void ScheduleInterOpClosure(std::function<void()> fn);
+
+  ~RunHandler();
+
+ private:
+  class Impl;
+  friend class RunHandlerPool::Impl;
+
+  explicit RunHandler(Impl* impl);
+
+  Impl* impl_;  // NOT OWNED.
+};
+
+}  // end namespace tensorflow.
+
+#endif  // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
diff --git a/tensorflow/core/framework/run_handler_util.cc b/tensorflow/core/framework/run_handler_util.cc
new file mode 100644
index 0000000000..3087998c69
--- /dev/null
+++ b/tensorflow/core/framework/run_handler_util.cc
@@ -0,0 +1,57 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/run_handler_util.h"
+
+#include <algorithm>
+#include <cmath>
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads,
+                                    int min_threads_per_request,
+                                    std::vector<std::uint_fast32_t>* start_vec,
+                                    std::vector<std::uint_fast32_t>* end_vec) {
+  // Each request is expected to have weight W[i] = num_active_requests - i.
+  // Therefore, total_weight = sum of all request weights.
+  float total_weight = 0.5f * num_active_requests * (num_active_requests + 1);
+  float demand_factor = static_cast<float>(num_threads) / total_weight;
+  float last_cumulative_weight = 0.0;
+  min_threads_per_request = std::max(1, min_threads_per_request);
+  for (int i = 0; i != num_active_requests; i++) {
+    float cumulative_weight =
+        static_cast<float>(i + 1) *
+        (num_active_requests - static_cast<float>(i) * 0.5f);
+    float weight = cumulative_weight - last_cumulative_weight;
+    // Quantize thread_demand by rounding up, and also satisfying
+    // `min_threads_per_request` constraint.
+    // Note: We subtract a small epsilon (0.00001) to prevent ceil(..) from
+    // rounding weights like 4.0 to 5.
+    int demand =
+        std::max(min_threads_per_request,
+                 static_cast<int>(ceil(weight * demand_factor - 0.00001f)));
+    // For the quantized range [start, end); compute the floor of real start,
+    // and expand downwards from there with length `demand` and adjust for
+    // boundary conditions.
+    int start = last_cumulative_weight * demand_factor;
+    int end = std::min(num_threads, start + demand);
+    start = std::max(0, std::min(start, end - demand));
+    start_vec->at(i) = start;
+    end_vec->at(i) = end;
+    last_cumulative_weight = cumulative_weight;
+  }
+}
+}  // namespace tensorflow
diff --git a/tensorflow/core/framework/run_handler_util.h b/tensorflow/core/framework/run_handler_util.h
new file mode 100644
index 0000000000..c0c36aeccb
--- /dev/null
+++ b/tensorflow/core/framework/run_handler_util.h
@@ -0,0 +1,43 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
+#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
+
+#include <cstdint>
+#include <vector>
+
+namespace tensorflow {
+
+// Assign thread ranges to requests.
+// Requests are numbered 0...num_active_requests-1, and
+// threads are numbered 0...num_threads-1.
+// On return, the range start_vec->at(i)...end_vec->at(i)-1
+// indicates the subrange of the threads available to request i.
+// The ranges given to different requests may overlap.
+// Lower numbered requests will tend to be assigned more threads.
+// Thus, a client might associate older requests with lower
+// array indices so they receive access to more threads.
+// However, the routine ensures that each request is given access
+// to at least min(min_threads_per_request, num_threads)  threads.
+// Every thread will be assigned to at least one request range,
+// assuming there is at least one request.
+void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads,
+                                    int min_threads_per_request,
+                                    std::vector<std::uint_fast32_t>* start_vec,
+                                    std::vector<std::uint_fast32_t>* end_vec);
+
+}  // end namespace tensorflow
+#endif  // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
diff --git a/tensorflow/core/framework/run_handler_util_test.cc b/tensorflow/core/framework/run_handler_util_test.cc
new file mode 100644
index 0000000000..a1928c132b
--- /dev/null
+++ b/tensorflow/core/framework/run_handler_util_test.cc
@@ -0,0 +1,93 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/run_handler_util.h"
+
+#include <vector>
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+namespace tensorflow {
+namespace {
+
+void VerifyFunction(int num_active_requests, int num_threads,
+                    int min_threads_per_request, bool print_stats = false) {
+  if (print_stats) {
+    LOG(INFO) << "Test case# num_active_requests: " << num_active_requests
+              << " num_threads: " << num_threads
+              << " min_threads: " << min_threads_per_request;
+  }
+  std::vector<std::uint_fast32_t> start(num_active_requests);
+  std::vector<std::uint_fast32_t> end(num_active_requests);
+
+  ComputeInterOpSchedulingRanges(num_active_requests, num_threads,
+                                 min_threads_per_request, &start, &end);
+  string range_str = "";
+  for (int i = 0; i < num_active_requests; ++i) {
+    if (i > 0) range_str += " ";
+    range_str += strings::StrCat("[", start[i], ", ", end[i], ")");
+
+    ASSERT_GE(start[i], 0) << range_str;
+    ASSERT_LE(end[i], num_threads) << range_str;
+    if (i > 0) {
+      // Due to linearly decreasing demand, #threads(i - 1) >= #threads(i)
+      ASSERT_GE(end[i - 1] - start[i - 1], end[i] - start[i]) << range_str;
+      // No missing threads.
+      ASSERT_GE(end[i - 1], start[i]) << range_str;
+    }
+    // Each interval is at least of size 'min_threads_per_request'.
+    ASSERT_GE((end[i] - start[i]), min_threads_per_request) << range_str;
+    // Verify that assigned (quantized) threads is not overly estimated
+    // from real demand, when the demand is high (>=
+    // min_threads_per_request).
+    float entry_weight = num_active_requests - i;
+    float total_weight = 0.5f * num_active_requests * (num_active_requests + 1);
+    float thread_demand = (entry_weight * num_threads) / total_weight;
+    if (thread_demand > min_threads_per_request) {
+      // We expect some over-estimation of threads due to quantization,
+      // but we hope it's not more than 1 extra thread.
+      ASSERT_NEAR(end[i] - start[i], thread_demand, 1.0)
+          << "Ranges: " << range_str << " thread_demand: " << thread_demand
+          << " i: " << i;
+    }
+  }
+  ASSERT_EQ(end[num_active_requests - 1], num_threads);
+  ASSERT_EQ(start[0], 0);
+  if (print_stats) {
+    LOG(INFO) << "Assigned ranges: " << range_str;
+  }
+}
+
+TEST(RunHandlerUtilTest, TestComputeInterOpSchedulingRanges) {
+  const int kMinThreadsPerRequestBound = 12;
+  const int kMaxActiveRequests = 128;
+  const int kMaxThreads = 128;
+
+  for (int min_threads_per_request = 1;
+       min_threads_per_request <= kMinThreadsPerRequestBound;
+       ++min_threads_per_request) {
+    for (int num_active_requests = 1; num_active_requests <= kMaxActiveRequests;
+         ++num_active_requests) {
+      for (int num_threads = min_threads_per_request;
+           num_threads <= kMaxThreads; ++num_threads) {
+        VerifyFunction(num_active_requests, num_threads,
+                       min_threads_per_request);
+      }
+    }
+  }
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 85cd02350a..104ab039cb 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -453,6 +453,11 @@ message RunOptions {
     // same group_key value (in a distributed computation where tasks
     // run disjoint graphs).
     int64 collective_graph_key = 1;
+    // If true, then operations (using the inter-op pool) across all
+    // session::run() calls will be centrally scheduled, optimizing for (median
+    // and tail) latency.
+    // Consider using this option for CPU-bound workloads like inference.
+    bool use_run_handler_pool = 2;
   };
 
   Experimental experimental = 8;
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
index 537e73aa89..47b5b56faf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
@@ -8,5 +8,11 @@ tf_proto {
       label: LABEL_OPTIONAL
       type: TYPE_INT64
     }
+    field {
+      name: "use_run_handler_pool"
+      number: 2
+      label: LABEL_OPTIONAL
+      type: TYPE_BOOL
+    }
   }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
index cec04a2bf0..c0c2e7b9f8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
@@ -55,6 +55,12 @@ tf_proto {
         label: LABEL_OPTIONAL
         type: TYPE_INT64
       }
+      field {
+        name: "use_run_handler_pool"
+        number: 2
+        label: LABEL_OPTIONAL
+        type: TYPE_BOOL
+      }
     }
     enum_type {
       name: "TraceLevel"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
index 537e73aa89..47b5b56faf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
@@ -8,5 +8,11 @@ tf_proto {
       label: LABEL_OPTIONAL
       type: TYPE_INT64
     }
+    field {
+      name: "use_run_handler_pool"
+      number: 2
+      label: LABEL_OPTIONAL
+      type: TYPE_BOOL
+    }
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
index cec04a2bf0..c0c2e7b9f8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
@@ -55,6 +55,12 @@ tf_proto {
         label: LABEL_OPTIONAL
         type: TYPE_INT64
       }
+      field {
+        name: "use_run_handler_pool"
+        number: 2
+        label: LABEL_OPTIONAL
+        type: TYPE_BOOL
+      }
     }
     enum_type {
       name: "TraceLevel"
-- 
GitLab


From 1084594657a5d139102ac794f84d1427a710e39a Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Thu, 27 Sep 2018 12:51:52 -0700
Subject: [PATCH 0808/1357] TFLite: Rename ResetVariableTensorsToZero ->
 ResetVariableTensors PiperOrigin-RevId: 214820383

---
 .../contrib/lite/experimental/c/c_api_experimental.cc    | 5 ++---
 .../contrib/lite/experimental/c/c_api_experimental.h     | 2 +-
 .../lite/experimental/c/c_api_experimental_test.cc       | 2 +-
 tensorflow/contrib/lite/interpreter.cc                   | 9 ++++-----
 tensorflow/contrib/lite/interpreter.h                    | 7 +++++--
 tensorflow/contrib/lite/kernels/test_util.cc             | 2 +-
 tensorflow/contrib/lite/python/interpreter.py            | 4 ++--
 .../python/interpreter_wrapper/interpreter_wrapper.cc    | 4 ++--
 .../python/interpreter_wrapper/interpreter_wrapper.h     | 2 +-
 tensorflow/contrib/lite/testing/tflite_driver.cc         | 2 +-
 10 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc
index 0f16595811..29f8701f53 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc
+++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc
@@ -21,9 +21,8 @@ limitations under the License.
 extern "C" {
 #endif  // __cplusplus
 
-TFL_Status TFL_InterpreterResetVariableTensorsToZero(
-    TFL_Interpreter* interpreter) {
-  return interpreter->impl->ResetVariableTensorsToZero();
+TFL_Status TFL_InterpreterResetVariableTensors(TFL_Interpreter* interpreter) {
+  return interpreter->impl->ResetVariableTensors();
 }
 
 void TFL_InterpreterOptionsAddBuiltinOp(TFL_InterpreterOptions* options,
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h
index b8de7b9964..fca5d92f77 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h
+++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h
@@ -25,7 +25,7 @@ extern "C" {
 typedef TfLiteBuiltinOperator TFL_BuiltinOperator;
 
 // Resets all variable tensors to zero.
-TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensorsToZero(
+TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensors(
     TFL_Interpreter* interpreter);
 
 // Adds an op registration for a builtin operator.
diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc
index d86ad00d6d..1b1bedb754 100644
--- a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc
+++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc
@@ -44,7 +44,7 @@ TEST(CApiExperimentalSimple, Smoke) {
   TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options);
   ASSERT_NE(interpreter, nullptr);
   ASSERT_EQ(TFL_InterpreterAllocateTensors(interpreter), kTfLiteOk);
-  EXPECT_EQ(TFL_InterpreterResetVariableTensorsToZero(interpreter), kTfLiteOk);
+  EXPECT_EQ(TFL_InterpreterResetVariableTensors(interpreter), kTfLiteOk);
   EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteOk);
 
   TFL_DeleteInterpreter(interpreter);
diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc
index 2657bcd42b..88e41ffc55 100644
--- a/tensorflow/contrib/lite/interpreter.cc
+++ b/tensorflow/contrib/lite/interpreter.cc
@@ -451,16 +451,15 @@ TfLiteStatus Interpreter::AllocateTensors() {
 
   // Reset the variable tensors to zero after (re)allocating the tensors.
   // Developers shouldn't rely on the side effect of this function to reset
-  // variable tesnsors. They should call `ResetVariableTensorsToZero` directly
+  // variable tesnsors. They should call `ResetVariableTensors` directly
   // instead.
-  ResetVariableTensorsToZero();
+  ResetVariableTensors();
 
   return kTfLiteOk;
 }
 
-// TODO(ycling): Consider to provide other functions to initialize variable
-// tensors to non-zero values.
-TfLiteStatus Interpreter::ResetVariableTensorsToZero() {
+// TODO(ycling): Support non-zero default values.
+TfLiteStatus Interpreter::ResetVariableTensors() {
   for (auto& tensor : tensors_) {
     if (!tensor.is_variable) {
       continue;
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index aa2bc4def6..7ef736d01b 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -421,9 +421,12 @@ class Interpreter {
     allow_buffer_handle_output_ = allow_buffer_handle_output;
   }
 
-  // Reset all variable tensors to zero.
+  // Reset all variable tensors to the default value.
+  // If a variable tensor doesn't have a buffer, reset it to zero.
+  // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
+  // to the value of the buffer.
   // WARNING: This is an experimental API and subject to change.
-  TfLiteStatus ResetVariableTensorsToZero();
+  TfLiteStatus ResetVariableTensors();
 
   // Retrieve an operator's description of its work, for profiling purposes.
   const char* OpProfilingString(const TfLiteRegistration& op_reg,
diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc
index 0fdb0a3935..05a7c23ba1 100644
--- a/tensorflow/contrib/lite/kernels/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/test_util.cc
@@ -122,7 +122,7 @@ void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
 
   CHECK(interpreter_->AllocateTensors() == kTfLiteOk)
       << "Cannot allocate tensors";
-  interpreter_->ResetVariableTensorsToZero();
+  interpreter_->ResetVariableTensors();
 }
 
 void SingleOpModel::Invoke() { CHECK(interpreter_->Invoke() == kTfLiteOk); }
diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py
index 1be61fe053..5700bf7892 100644
--- a/tensorflow/contrib/lite/python/interpreter.py
+++ b/tensorflow/contrib/lite/python/interpreter.py
@@ -253,5 +253,5 @@ class Interpreter(object):
     self._ensure_safe()
     self._interpreter.Invoke()
 
-  def reset_all_variables_to_zero(self):
-    return self._interpreter.ResetVariableTensorsToZero()
+  def reset_all_variables(self):
+    return self._interpreter.ResetVariableTensors()
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
index 9ab05f3068..418f19a179 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
@@ -466,9 +466,9 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer(
                                   error_msg);
 }
 
-PyObject* InterpreterWrapper::ResetVariableTensorsToZero() {
+PyObject* InterpreterWrapper::ResetVariableTensors() {
   TFLITE_PY_ENSURE_VALID_INTERPRETER();
-  TFLITE_PY_CHECK(interpreter_->ResetVariableTensorsToZero());
+  TFLITE_PY_CHECK(interpreter_->ResetVariableTensors());
   Py_RETURN_NONE;
 }
 
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
index 641dd93db5..f5ca81e62a 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
@@ -65,7 +65,7 @@ class InterpreterWrapper {
   PyObject* TensorQuantization(int i) const;
   PyObject* SetTensor(int i, PyObject* value);
   PyObject* GetTensor(int i) const;
-  PyObject* ResetVariableTensorsToZero();
+  PyObject* ResetVariableTensors();
 
   // Returns a reference to tensor index i as a numpy array. The base_object
   // should be the interpreter object providing the memory.
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc
index 1836eb53b9..17aa8cb293 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.cc
+++ b/tensorflow/contrib/lite/testing/tflite_driver.cc
@@ -301,7 +301,7 @@ bool TfLiteDriver::CheckResults() {
 }
 
 void TfLiteDriver::ResetLSTMStateTensors() {
-  interpreter_->ResetVariableTensorsToZero();
+  interpreter_->ResetVariableTensors();
 }
 
 }  // namespace testing
-- 
GitLab


From 4cedc8b6e738b7a188c9c091cf667bacafae44b7 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 27 Sep 2018 13:18:33 -0700
Subject: [PATCH 0809/1357] Updating the V2 variables API.

PiperOrigin-RevId: 214824023
---
 .../compiler/aot/tests/make_test_graphs.py    |   4 +-
 tensorflow/compiler/tests/lstm.py             |   2 +-
 .../dnn_tree_combined_estimator_test.py       |   2 +-
 .../python/external_regret_optimizer.py       |   4 +-
 .../python/swap_regret_optimizer.py           |   9 +-
 .../copy_graph/python/util/copy_elements.py   |   6 +-
 .../copy_graph/python/util/copy_test.py       |   4 +-
 .../python/kernel_tests/iterator_ops_test.py  |   2 +-
 .../python/kernel_tests/moving_stats_test.py  |   6 +-
 .../estimator/python/estimator/hooks_test.py  |   2 +-
 .../framework/python/ops/variables_test.py    |  28 +-
 .../graph_editor/tests/transform_test.py      |   2 +-
 .../layers/python/layers/layers_test.py       |   4 +-
 .../learn/python/learn/graph_actions_test.py  |  12 +-
 .../learn/python/learn/monitors_test.py       |  10 +-
 .../python/kernel_tests/sdca_ops_test.py      |   8 +-
 .../metrics/python/ops/metric_ops_test.py     |  19 +-
 .../contrib/model_pruning/python/pruning.py   |   3 +-
 .../model_pruning/python/pruning_test.py      |  22 +-
 .../opt/python/training/addsign_test.py       |  12 +-
 .../drop_stale_gradient_optimizer_test.py     |   4 +-
 .../training/external_optimizer_test.py       |  22 +-
 .../training/model_average_optimizer_test.py  |   3 +-
 .../opt/python/training/powersign_test.py     |  12 +-
 .../rnn/python/kernel_tests/core_rnn_test.py  |   4 +-
 .../contrib/session_bundle/exporter_test.py   |   6 +-
 .../kernel_tests/scatter_add_ndim_op_test.py  |  14 +-
 .../tensorrt/python/trt_convert_test.py       |   2 +-
 .../python/training/device_setter_test.py     |   8 +-
 tensorflow/python/client/session_test.py      |   2 +-
 .../python/debug/cli/analyzer_cli_test.py     |  20 +-
 .../python/debug/cli/stepper_cli_test.py      |   4 +-
 .../python/debug/lib/debug_utils_test.py      |   4 +-
 .../debug/lib/dist_session_debug_grpc_test.py |   4 +-
 .../python/debug/lib/grpc_large_data_test.py  |  12 +-
 .../debug/lib/session_debug_file_test.py      |   4 +-
 .../debug/lib/session_debug_grpc_test.py      |  46 +--
 .../python/debug/lib/session_debug_testlib.py |  90 ++---
 tensorflow/python/debug/lib/stepper_test.py   |  14 +-
 .../debug/wrappers/dumping_wrapper_test.py    |   2 +-
 .../debug/wrappers/local_cli_wrapper_test.py  |  14 +-
 tensorflow/python/estimator/estimator_test.py |  56 +--
 tensorflow/python/framework/function_test.py  |   2 +-
 .../python/framework/graph_util_test.py       |   8 +-
 tensorflow/python/framework/subscribe_test.py |   4 +-
 tensorflow/python/grappler/item_test.py       |   2 +-
 .../python/grappler/memory_optimizer_test.py  |  10 +-
 .../python/grappler/tf_optimizer_test.py      |   2 +-
 tensorflow/python/keras/engine/base_layer.py  |   4 +-
 .../python/kernel_tests/array_ops_test.py     |   4 +-
 .../kernel_tests/control_flow_ops_py_test.py  |  56 +--
 .../kernel_tests/dense_update_ops_test.py     |   6 +-
 .../kernel_tests/identity_op_py_test.py       |   2 +-
 .../resource_variable_ops_test.py             |   2 +-
 .../kernel_tests/scatter_nd_ops_test.py       |   4 +-
 .../python/kernel_tests/scatter_ops_test.py   |   4 +-
 .../kernel_tests/variable_scope_test.py       |   4 +-
 .../python/kernel_tests/variables_test.py     |  36 +-
 tensorflow/python/ops/gradients_test.py       |   2 +-
 tensorflow/python/ops/matmul_benchmark.py     |   8 +-
 tensorflow/python/ops/variable_scope.py       | 117 ++++++-
 tensorflow/python/ops/variables.py            | 323 +++++++++++++++---
 tensorflow/python/saved_model/loader_test.py  |  14 +-
 .../python/saved_model/saved_model_test.py    |  56 +--
 tensorflow/python/tools/freeze_graph_test.py  |   6 +-
 .../python/training/checkpointable/util.py    |   2 +-
 .../training/learning_rate_decay_test.py      |   4 +-
 .../python/training/monitored_session_test.py |  28 +-
 .../python/training/quantize_training_test.py |   3 +-
 .../python/training/queue_runner_test.py      |  22 +-
 tensorflow/python/training/saver_test.py      | 217 ++++++------
 ...server_lib_same_variables_no_clear_test.py |   4 +-
 tensorflow/python/training/server_lib_test.py |  18 +-
 .../python/training/session_manager_test.py   |  98 +++---
 tensorflow/python/training/supervisor_test.py |  52 +--
 .../training/sync_replicas_optimizer_test.py  |  17 +-
 .../python/training/training_ops_test.py      |  32 +-
 .../python/training/training_util_test.py     |   4 +-
 .../api/golden/v1/tensorflow.-variable.pbtxt  |   1 +
 .../tools/api/golden/v1/tensorflow.pbtxt      |   4 +
 .../v2/tensorflow.-variable-scope.pbtxt       | 105 ------
 ...ensorflow.-variable.-save-slice-info.pbtxt |  17 -
 .../api/golden/v2/tensorflow.-variable.pbtxt  | 130 -------
 .../golden/v2/tensorflow.initializers.pbtxt   |  12 -
 .../tools/api/golden/v2/tensorflow.pbtxt      |  92 -----
 .../golden/v2/tensorflow.variable_scope.pbtxt |   9 -
 86 files changed, 1015 insertions(+), 1040 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt

diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py
index de135d7a23..64b861a730 100644
--- a/tensorflow/compiler/aot/tests/make_test_graphs.py
+++ b/tensorflow/compiler/aot/tests/make_test_graphs.py
@@ -47,7 +47,7 @@ def tfadd(_):
 
 def tfadd_with_ckpt(out_dir):
   x = array_ops.placeholder(dtypes.int32, name='x_hold')
-  y = variables.Variable(constant_op.constant([0]), name='y_saved')
+  y = variables.VariableV1(constant_op.constant([0]), name='y_saved')
   math_ops.add(x, y, name='x_y_sum')
 
   init_op = variables.initialize_all_variables()
@@ -62,7 +62,7 @@ def tfadd_with_ckpt(out_dir):
 
 def tfadd_with_ckpt_saver(out_dir):
   x = array_ops.placeholder(dtypes.int32, name='x_hold')
-  y = variables.Variable(constant_op.constant([0]), name='y_saved')
+  y = variables.VariableV1(constant_op.constant([0]), name='y_saved')
   math_ops.add(x, y, name='x_y_sum')
 
   init_op = variables.initialize_all_variables()
diff --git a/tensorflow/compiler/tests/lstm.py b/tensorflow/compiler/tests/lstm.py
index 43c469d032..73b3638e80 100644
--- a/tensorflow/compiler/tests/lstm.py
+++ b/tensorflow/compiler/tests/lstm.py
@@ -117,7 +117,7 @@ def LSTMLayer(cell_name, weights, m, c, x_seq, pad_seq):
 
 def RandomVar(shape, name=None):
   """Returns a variable of the given shape initialized to random values."""
-  return variables.Variable(
+  return variables.VariableV1(
       random_ops.random_uniform(shape), dtype=dtypes.float32, name=name)
 
 
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
index 839eedd3a8..04baa329a0 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
@@ -189,7 +189,7 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
     # Train for a few steps.
     est.train(input_fn=_train_input_fn, steps=1000)
     # 10 steps for dnn, 3  for 1 tree of depth 3 + 1 after the tree finished
-    self._assert_checkpoint(est.model_dir, global_step=14)
+    self._assert_checkpoint(est.model_dir, global_step=15)
     res = est.evaluate(input_fn=_eval_input_fn, steps=1)
     self.assertLess(0.5, res["auc"])
     est.predict(input_fn=_eval_input_fn)
diff --git a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py
index d1af15f7e4..67f8ac2b93 100644
--- a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py
+++ b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py
@@ -102,9 +102,9 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius):
         0.0,
         (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum(
             1.0, standard_ops.reduce_sum(inactive)))
-    multipliers += scale * inactive
+    multipliers = multipliers + (scale * inactive)
     new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype)
-    multipliers *= new_inactive
+    multipliers = multipliers * new_inactive
     return (iteration, multipliers, new_inactive, inactive)
 
   iteration = standard_ops.constant(0)
diff --git a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py
index 2c673d9347..a6cb1f62f0 100644
--- a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py
+++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py
@@ -175,9 +175,9 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix):
     scale = (1.0 - standard_ops.reduce_sum(
         matrix, axis=0, keepdims=True)) / standard_ops.maximum(
             1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True))
-    matrix += scale * inactive
+    matrix = matrix + (scale * inactive)
     new_inactive = standard_ops.cast(matrix > 0, matrix.dtype)
-    matrix *= new_inactive
+    matrix = matrix * new_inactive
     return (iteration, matrix, new_inactive, inactive)
 
   iteration = standard_ops.constant(0)
@@ -210,8 +210,9 @@ def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix):
 
   # For numerical reasons, make sure that the largest matrix element is zero
   # before exponentiating.
-  log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keepdims=True)
-  log_matrix -= standard_ops.log(
+  log_matrix = log_matrix - standard_ops.reduce_max(
+      log_matrix, axis=0, keepdims=True)
+  log_matrix = log_matrix - standard_ops.log(
       standard_ops.reduce_sum(
           standard_ops.exp(log_matrix), axis=0, keepdims=True))
   return log_matrix
diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
index 6c9ab6aeb8..9c5871da34 100644
--- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
@@ -31,7 +31,7 @@ from __future__ import division
 from __future__ import print_function
 
 from copy import deepcopy
-from tensorflow.python.ops.variables import Variable
+from tensorflow.python.ops.variables import VariableV1
 from tensorflow.python.client.session import Session
 from tensorflow.python.framework import ops
 
@@ -55,7 +55,7 @@ def copy_variable_to_graph(org_instance, to_graph, scope=''):
     TypeError: If `org_instance` is not a `Variable`.
   """
 
-  if not isinstance(org_instance, Variable):
+  if not isinstance(org_instance, VariableV1):
     raise TypeError(str(org_instance) + ' is not a Variable')
 
   #The name of the new variable
@@ -88,7 +88,7 @@ def copy_variable_to_graph(org_instance, to_graph, scope=''):
 
   #Initialize the new variable
   with to_graph.as_default():
-    new_var = Variable(
+    new_var = VariableV1(
         init_value,
         trainable,
         name=new_name,
diff --git a/tensorflow/contrib/copy_graph/python/util/copy_test.py b/tensorflow/contrib/copy_graph/python/util/copy_test.py
index 05744bec4e..ba97c78456 100644
--- a/tensorflow/contrib/copy_graph/python/util/copy_test.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py
@@ -36,7 +36,7 @@ class CopyVariablesTest(test.TestCase):
 
     with graph1.as_default():
       #Define a Variable in graph1
-      some_var = variables.Variable(2)
+      some_var = variables.VariableV1(2)
       #Initialize session
       sess1 = session_lib.Session()
       #Initialize the Variable
@@ -72,7 +72,7 @@ class CopyOpsTest(test.TestCase):
     with graph1.as_default():
       #Initialize a basic expression y = ax + b
       x = array_ops.placeholder("float")
-      a = variables.Variable(3.0)
+      a = variables.VariableV1(3.0)
       b = constant_op.constant(4.0)
       ax = math_ops.multiply(x, a)
       y = math_ops.add(ax, b)
diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
index 704c0d1eb2..7e2326bd17 100644
--- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
@@ -42,7 +42,7 @@ class CheckpointInputPipelineHookTest(test.TestCase):
     del config
     global_step = training_util.get_or_create_global_step()
     update_global_step_op = global_step.assign_add(1)
-    latest_feature = variables.Variable(
+    latest_feature = variables.VariableV1(
         0, name='latest_feature', dtype=dtypes.int64)
     store_latest_feature_op = latest_feature.assign(features)
     ops.add_to_collection('my_vars', global_step)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py b/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py
index 3c988dad8a..be7c756bea 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py
@@ -38,8 +38,8 @@ class MovingReduceMeanVarianceTest(test.TestCase):
     true_stddev = np.array([[1.1, 0.5]])
     with self.cached_session() as sess:
       # Start "x" out with this mean.
-      mean_var = variables.Variable(array_ops.zeros_like(true_mean))
-      variance_var = variables.Variable(array_ops.ones_like(true_stddev))
+      mean_var = variables.VariableV1(array_ops.zeros_like(true_mean))
+      variance_var = variables.VariableV1(array_ops.ones_like(true_stddev))
       x = random_ops.random_normal(shape, dtype=np.float64, seed=0)
       x = true_stddev * x + true_mean
       ema, emv = moving_stats.assign_moving_mean_variance(
@@ -115,7 +115,7 @@ class MovingLogExponentialMovingMeanExpTest(test.TestCase):
       # Start "x" out with this mean.
       x = random_ops.random_normal(shape, dtype=np.float64, seed=0)
       x = true_stddev * x + true_mean
-      log_mean_exp_var = variables.Variable(array_ops.zeros_like(true_mean))
+      log_mean_exp_var = variables.VariableV1(array_ops.zeros_like(true_mean))
       variables.global_variables_initializer().run()
       log_mean_exp = moving_stats.assign_log_moving_mean_exp(
           log_mean_exp_var, x, decay=decay)
diff --git a/tensorflow/contrib/estimator/python/estimator/hooks_test.py b/tensorflow/contrib/estimator/python/estimator/hooks_test.py
index c6c6cad95a..62ffad56da 100644
--- a/tensorflow/contrib/estimator/python/estimator/hooks_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/hooks_test.py
@@ -294,7 +294,7 @@ class InMemoryEvaluatorHookTest(test.TestCase):
 
     def model_fn(features, labels, mode):
       _, _ = features, labels
-      w = variables.Variable(
+      w = variables.VariableV1(
           initial_value=[0.],
           trainable=False,
           collections=[ops.GraphKeys.SAVEABLE_OBJECTS])
diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py
index f9b0efd1da..c223df5b6e 100644
--- a/tensorflow/contrib/framework/python/ops/variables_test.py
+++ b/tensorflow/contrib/framework/python/ops/variables_test.py
@@ -192,7 +192,7 @@ class GlobalStepTest(test.TestCase):
   def test_invalid_dtype(self):
     with ops.Graph().as_default() as g:
       self.assertEquals(None, variables_lib2.get_global_step())
-      variables_lib.Variable(
+      variables_lib.VariableV1(
           0.0,
           trainable=False,
           dtype=dtypes.float32,
@@ -205,7 +205,7 @@ class GlobalStepTest(test.TestCase):
   def test_invalid_shape(self):
     with ops.Graph().as_default() as g:
       self.assertEquals(None, variables_lib2.get_global_step())
-      variables_lib.Variable(
+      variables_lib.VariableV1(
           [0],
           trainable=False,
           dtype=dtypes.int32,
@@ -229,7 +229,7 @@ class GlobalStepTest(test.TestCase):
   def test_get_global_step(self):
     with ops.Graph().as_default() as g:
       self.assertEquals(None, variables_lib2.get_global_step())
-      variables_lib.Variable(
+      variables_lib.VariableV1(
           0,
           trainable=False,
           dtype=dtypes.int32,
@@ -607,10 +607,10 @@ class ModelVariablesTest(test.TestCase):
     with self.cached_session():
       with variable_scope.variable_scope('A'):
         variables_lib2.local_variable([5])
-        a = variables_lib.Variable([5])
+        a = variables_lib.VariableV1([5])
       with variable_scope.variable_scope('B'):
         variables_lib2.local_variable([5])
-        b = variables_lib.Variable([5])
+        b = variables_lib.VariableV1([5])
       self.assertEquals([a], variables_lib2.get_trainable_variables('A'))
       self.assertEquals([b], variables_lib2.get_trainable_variables('B'))
 
@@ -953,7 +953,7 @@ class AssignFromCheckpointTest(test.TestCase):
       # Create a set of variables to save in the checkpoint.
       for var_name in var_names_to_values:
         var_value = var_names_to_values[var_name]
-        var_list.append(variables_lib.Variable(var_value, name=var_name))
+        var_list.append(variables_lib.VariableV1(var_value, name=var_name))
       saver = saver_lib.Saver(var_list)
       init_op = variables_lib.variables_initializer(var_list)
       sess.run(init_op)
@@ -1106,7 +1106,7 @@ class AssignFromCheckpointFnTest(test.TestCase):
       # Create a set of variables to save in the checkpoint.
       for var_name in var_names_to_values:
         var_value = var_names_to_values[var_name]
-        var_list.append(variables_lib.Variable(var_value, name=var_name))
+        var_list.append(variables_lib.VariableV1(var_value, name=var_name))
       saver = saver_lib.Saver(var_list)
       init_op = variables_lib.variables_initializer(var_list)
       sess.run(init_op)
@@ -1297,7 +1297,7 @@ class AssignFromCheckpointFnTest(test.TestCase):
 class ZeroInitializerOpTest(test.TestCase):
 
   def _testZeroInitializer(self, shape, initializer, use_init):
-    var = variables_lib.Variable(initializer)
+    var = variables_lib.VariableV1(initializer)
     var_zero = variables_lib2.zero_initializer(var)
     with self.cached_session() as sess:
       with self.assertRaisesOpError('Attempting to use uninitialized value'):
@@ -1350,12 +1350,12 @@ class FilterVariablesTest(test.TestCase):
     g = ops.Graph()
     with g.as_default():
       var_list = []
-      var_list.append(variables_lib.Variable(0, name='conv1/weights'))
-      var_list.append(variables_lib.Variable(0, name='conv1/biases'))
-      var_list.append(variables_lib.Variable(0, name='conv2/weights'))
-      var_list.append(variables_lib.Variable(0, name='conv2/biases'))
-      var_list.append(variables_lib.Variable(0, name='clfs/weights'))
-      var_list.append(variables_lib.Variable(0, name='clfs/biases'))
+      var_list.append(variables_lib.VariableV1(0, name='conv1/weights'))
+      var_list.append(variables_lib.VariableV1(0, name='conv1/biases'))
+      var_list.append(variables_lib.VariableV1(0, name='conv2/weights'))
+      var_list.append(variables_lib.VariableV1(0, name='conv2/biases'))
+      var_list.append(variables_lib.VariableV1(0, name='clfs/weights'))
+      var_list.append(variables_lib.VariableV1(0, name='clfs/biases'))
       self._var_list = var_list
 
   def _test_filter_variables(self,
diff --git a/tensorflow/contrib/graph_editor/tests/transform_test.py b/tensorflow/contrib/graph_editor/tests/transform_test.py
index 97f38c923f..0ebcdc2688 100644
--- a/tensorflow/contrib/graph_editor/tests/transform_test.py
+++ b/tensorflow/contrib/graph_editor/tests/transform_test.py
@@ -214,7 +214,7 @@ class TransformTest(test.TestCase):
 
   def test_graph_replace_gradients(self):
     ops.reset_default_graph()
-    w = variables.Variable(0.0, name="w")
+    w = variables.VariableV1(0.0, name="w")
     y = math_ops.multiply(math_ops.multiply(w, w, name="mul1"), w, name="mul2")
     g = gradients_impl.gradients(y, w, name="grad")[0]
 
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 85af9de4e4..3b7ae72e9c 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -2360,7 +2360,7 @@ class BatchNormTest(test.TestCase):
             batch_size * height * width, expected_var)
       images = constant_op.constant(
           image_values, shape=image_shape, dtype=dtypes.float32)
-      is_training = variables_lib.Variable(True)
+      is_training = variables_lib.VariableV1(True)
       output = _layers.batch_norm(
           images,
           decay=0.1,
@@ -2507,7 +2507,7 @@ class BatchNormTest(test.TestCase):
             batch_size * height * width, expected_var)
       images = constant_op.constant(
           image_values, shape=image_shape, dtype=dtypes.float32)
-      is_training = variables_lib.Variable(True)
+      is_training = variables_lib.VariableV1(True)
       output = _layers.batch_norm(
           images,
           decay=0.1,
diff --git a/tensorflow/contrib/learn/python/learn/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/graph_actions_test.py
index 33180b778a..a160cb54a3 100644
--- a/tensorflow/contrib/learn/python/learn/graph_actions_test.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions_test.py
@@ -162,9 +162,9 @@ class GraphActionsTest(test.TestCase):
       Tuple of 3 `Tensor` objects, 2 input and 1 output.
     """
     variables_lib.create_global_step()
-    in0 = variables.Variable(1.0)
+    in0 = variables.VariableV1(1.0)
     in1 = variables_lib.local_variable(2.0)
-    fake_table = variables.Variable(
+    fake_table = variables.VariableV1(
         3.0,
         trainable=False,
         collections=['fake_tables'],
@@ -312,8 +312,8 @@ class GraphActionsTest(test.TestCase):
   def test_evaluate_ready_for_local_init(self):
     with ops.Graph().as_default() as g, self.session(g):
       variables_lib.create_global_step()
-      v = variables.Variable(1.0)
-      variables.Variable(
+      v = variables.VariableV1(1.0)
+      variables.VariableV1(
           v + 1, collections=[ops.GraphKeys.LOCAL_VARIABLES], trainable=False)
       ready_for_local_init_op = variables.report_uninitialized_variables(
           variables.global_variables())
@@ -456,9 +456,9 @@ class GraphActionsTrainTest(test.TestCase):
       Tuple of 3 `Tensor` objects, 2 input and 1 output.
     """
     variables_lib.create_global_step()
-    in0 = variables.Variable(1.0)
+    in0 = variables.VariableV1(1.0)
     in1 = variables_lib.local_variable(2.0)
-    fake_table = variables.Variable(
+    fake_table = variables.VariableV1(
         3.0,
         trainable=False,
         collections=['fake_tables'],
diff --git a/tensorflow/contrib/learn/python/learn/monitors_test.py b/tensorflow/contrib/learn/python/learn/monitors_test.py
index 83e48a36e7..d4a7169bb6 100644
--- a/tensorflow/contrib/learn/python/learn/monitors_test.py
+++ b/tensorflow/contrib/learn/python/learn/monitors_test.py
@@ -247,7 +247,7 @@ class MonitorsTest(test.TestCase):
 
   def test_logging_trainable(self):
     with ops.Graph().as_default() as g, self.session(g):
-      var = variables.Variable(constant_op.constant(42.0), name='foo')
+      var = variables.VariableV1(constant_op.constant(42.0), name='foo')
       var.initializer.run()
       cof = constant_op.constant(1.0)
       loss = math_ops.subtract(
@@ -261,7 +261,7 @@ class MonitorsTest(test.TestCase):
     with ops.Graph().as_default() as g, self.session(g):
       log_dir = 'log/dir'
       summary_writer = testing.FakeSummaryWriter(log_dir, g)
-      var = variables.Variable(0.0)
+      var = variables.VariableV1(0.0)
       var.initializer.run()
       tensor = state_ops.assign_add(var, 1.0)
       summary_op = summary.scalar('my_summary', tensor)
@@ -526,8 +526,8 @@ class MonitorsTest(test.TestCase):
     monitor0 = learn.monitors.GraphDump()
     monitor1 = learn.monitors.GraphDump()
     with ops.Graph().as_default() as g, self.session(g):
-      const_var = variables.Variable(42.0, name='my_const')
-      counter_var = variables.Variable(0.0, name='my_counter')
+      const_var = variables.VariableV1(42.0, name='my_const')
+      counter_var = variables.VariableV1(0.0, name='my_counter')
       assign_add = state_ops.assign_add(counter_var, 1.0, name='my_assign_add')
       variables.global_variables_initializer().run()
 
@@ -569,7 +569,7 @@ class MonitorsTest(test.TestCase):
     monitor = learn.monitors.CaptureVariable(
         var_name='my_assign_add:0', every_n=8, first_n=2)
     with ops.Graph().as_default() as g, self.session(g):
-      var = variables.Variable(0.0, name='my_var')
+      var = variables.VariableV1(0.0, name='my_var')
       var.initializer.run()
       state_ops.assign_add(var, 1.0, name='my_assign_add')
       self._run_monitor(monitor, num_epochs=3, num_steps_per_epoch=10)
diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
index 9ecf023e03..8466dc36d1 100644
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -125,7 +125,7 @@ def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero):
       ],
       example_ids=[str(i) for i in range(num_examples)])
 
-  weights = variables_lib.Variable(
+  weights = variables_lib.VariableV1(
       array_ops.zeros([dim], dtype=dtypes.float32))
   variables_dict = dict(
       sparse_features_weights=[weights],
@@ -184,7 +184,7 @@ def make_dense_examples_and_variables_dicts(dense_features_values, weights,
     dense_tensors.append(dense_tensor)
     # Add variables of shape [feature_column_dimension].
     dense_weights.append(
-        variables_lib.Variable(
+        variables_lib.VariableV1(
             array_ops.zeros(
                 [dense_tensor.get_shape().as_list()[1]], dtype=dtypes.float32)))
 
@@ -341,7 +341,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         examples = make_example_dict(example_protos, example_weights)
         # Explicitly make age a [1]-shaped Variable (which cannot be
         # partitioned), while making gender a PartitionedVariable.
-        age_weights = variables_lib.Variable(
+        age_weights = variables_lib.VariableV1(
             array_ops.zeros([1], dtype=dtypes.float32))
         with variable_scope.variable_scope(
             name_or_scope=('variables/shard_{}'.format(num_shards)
@@ -801,7 +801,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
           labels=[1.0, 0.0])
       # Replace with a variable of size 1 instead of 2.
       variables['dense_features_weights'] = [
-          variables_lib.Variable(array_ops.zeros(
+          variables_lib.VariableV1(array_ops.zeros(
               [1], dtype=dtypes.float32))
       ]
       options = dict(
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index 955b83b44d..fc64f343ab 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -2069,11 +2069,11 @@ class StreamingDynamicAUCTest(test.TestCase):
     num_batches = 100
     labels = np.array([])
     predictions = np.array([])
-    tf_labels = variables.Variable(
+    tf_labels = variables.VariableV1(
         array_ops.ones(batch_size, dtypes_lib.int32),
         collections=[ops.GraphKeys.LOCAL_VARIABLES],
         dtype=dtypes_lib.int32)
-    tf_predictions = variables.Variable(
+    tf_predictions = variables.VariableV1(
         array_ops.ones(batch_size),
         collections=[ops.GraphKeys.LOCAL_VARIABLES],
         dtype=dtypes_lib.float32)
@@ -2133,15 +2133,15 @@ class StreamingDynamicAUCTest(test.TestCase):
     labels = np.array([])
     predictions = np.array([])
     weights = np.array([])
-    tf_labels = variables.Variable(
+    tf_labels = variables.VariableV1(
         array_ops.ones(batch_size, dtypes_lib.int32),
         collections=[ops.GraphKeys.LOCAL_VARIABLES],
         dtype=dtypes_lib.int32)
-    tf_predictions = variables.Variable(
+    tf_predictions = variables.VariableV1(
         array_ops.ones(batch_size),
         collections=[ops.GraphKeys.LOCAL_VARIABLES],
         dtype=dtypes_lib.float32)
-    tf_weights = variables.Variable(
+    tf_weights = variables.VariableV1(
         array_ops.ones(batch_size),
         collections=[ops.GraphKeys.LOCAL_VARIABLES],
         dtype=dtypes_lib.float32)
@@ -2311,10 +2311,11 @@ class AucWithConfidenceIntervalsTest(test.TestCase):
     num_batches = 100
     labels = np.array([])
     predictions = np.array([])
-    tf_labels = variables.Variable(array_ops.ones(batch_size, dtypes_lib.int32),
-                                   collections=[ops.GraphKeys.LOCAL_VARIABLES],
-                                   dtype=dtypes_lib.int32)
-    tf_predictions = variables.Variable(
+    tf_labels = variables.VariableV1(
+        array_ops.ones(batch_size, dtypes_lib.int32),
+        collections=[ops.GraphKeys.LOCAL_VARIABLES],
+        dtype=dtypes_lib.int32)
+    tf_predictions = variables.VariableV1(
         array_ops.ones(batch_size),
         collections=[ops.GraphKeys.LOCAL_VARIABLES],
         dtype=dtypes_lib.float32)
diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py
index a81abac2fa..67e58ff15d 100644
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@@ -247,7 +247,8 @@ class Pruning(object):
 
     # Stores the tensorflow sparsity variable.
     # Built using self._setup_sparsity() or provided externally
-    self._sparsity = sparsity if sparsity else self._setup_sparsity()
+    self._sparsity = (sparsity
+                      if sparsity is not None else self._setup_sparsity())
 
     # List of tensorflow assignments ops for new masks and thresholds
     self._assign_ops = []
diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py
index cd3d8e76bb..1b6da5ce2b 100644
--- a/tensorflow/contrib/model_pruning/python/pruning_test.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_test.py
@@ -45,7 +45,7 @@ class PruningHParamsTest(test.TestCase):
     # Add global step variable to the graph
     self.global_step = training_util.get_or_create_global_step()
     # Add sparsity
-    self.sparsity = variables.Variable(0.5, name="sparsity")
+    self.sparsity = variables.VariableV1(0.5, name="sparsity")
     # Parse hparams
     self.pruning_hparams = pruning.get_pruning_hparams().parse(
         self.TEST_HPARAMS)
@@ -88,7 +88,7 @@ class PruningTest(test.TestCase):
     width = 10
     height = 20
     with self.cached_session():
-      weights = variables.Variable(
+      weights = variables.VariableV1(
           random_ops.random_normal([width, height], stddev=1), name="weights")
       masked_weights = pruning.apply_mask(weights,
                                           variable_scope.get_variable_scope())
@@ -99,10 +99,10 @@ class PruningTest(test.TestCase):
 
   def testUpdateSingleMask(self):
     with self.cached_session() as session:
-      weights = variables.Variable(
+      weights = variables.VariableV1(
           math_ops.linspace(1.0, 100.0, 100), name="weights")
       masked_weights = pruning.apply_mask(weights)
-      sparsity = variables.Variable(0.5, name="sparsity")
+      sparsity = variables.VariableV1(0.5, name="sparsity")
       p = pruning.Pruning(sparsity=sparsity)
       p._spec.threshold_decay = 0.0
       mask_update_op = p.mask_update_op()
@@ -115,8 +115,8 @@ class PruningTest(test.TestCase):
 
   def _blockMasking(self, hparams, weights, expected_mask):
 
-    threshold = variables.Variable(0.0, name="threshold")
-    sparsity = variables.Variable(0.5, name="sparsity")
+    threshold = variables.VariableV1(0.0, name="threshold")
+    sparsity = variables.VariableV1(0.5, name="sparsity")
     test_spec = ",".join(hparams)
     pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
 
@@ -169,7 +169,7 @@ class PruningTest(test.TestCase):
     partitioner = partitioned_variables.variable_axis_size_partitioner(40)
     with self.cached_session() as session:
       with variable_scope.variable_scope("", partitioner=partitioner):
-        sparsity = variables.Variable(0.5, name="Sparsity")
+        sparsity = variables.VariableV1(0.5, name="Sparsity")
         weights = variable_scope.get_variable(
             "weights", initializer=math_ops.linspace(1.0, 100.0, 100))
         masked_weights = pruning.apply_mask(
@@ -190,10 +190,10 @@ class PruningTest(test.TestCase):
     ]
     test_spec = ",".join(param_list)
     pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
-    weights = variables.Variable(
+    weights = variables.VariableV1(
         math_ops.linspace(1.0, 100.0, 100), name="weights")
     masked_weights = pruning.apply_mask(weights)
-    sparsity = variables.Variable(0.00, name="sparsity")
+    sparsity = variables.VariableV1(0.00, name="sparsity")
     # Set up pruning
     p = pruning.Pruning(pruning_hparams, sparsity=sparsity)
     p._spec.threshold_decay = 0.0
@@ -222,11 +222,11 @@ class PruningTest(test.TestCase):
     pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
 
     with variable_scope.variable_scope("layer1"):
-      w1 = variables.Variable(
+      w1 = variables.VariableV1(
           math_ops.linspace(1.0, 100.0, 100), name="weights")
       _ = pruning.apply_mask(w1)
     with variable_scope.variable_scope("layer2"):
-      w2 = variables.Variable(
+      w2 = variables.VariableV1(
           math_ops.linspace(1.0, 100.0, 100), name="weights")
       _ = pruning.apply_mask(w2)
 
diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py
index 628a735e72..6150fa117f 100644
--- a/tensorflow/contrib/opt/python/training/addsign_test.py
+++ b/tensorflow/contrib/opt/python/training/addsign_test.py
@@ -80,9 +80,9 @@ class AddSignTest(test.TestCase):
           global_step = resource_variable_ops.ResourceVariable(
               0, trainable=False)
         else:
-          var0 = variables.Variable(var0_np)
-          var1 = variables.Variable(var1_np)
-          global_step = variables.Variable(
+          var0 = variables.VariableV1(var0_np)
+          var1 = variables.VariableV1(var1_np)
+          global_step = variables.VariableV1(
               0, trainable=False)
         grads0 = constant_op.constant(grads0_np)
         grads1 = constant_op.constant(grads1_np)
@@ -183,9 +183,9 @@ class AddSignTest(test.TestCase):
           global_step = resource_variable_ops.ResourceVariable(
               0, trainable=False)
         else:
-          var0 = variables.Variable(var0_np)
-          var1 = variables.Variable(var1_np)
-          global_step = variables.Variable(
+          var0 = variables.VariableV1(var0_np)
+          var1 = variables.VariableV1(var1_np)
+          global_step = variables.VariableV1(
               0, trainable=False)
         grads0_np_indices = np.array([0, 1], dtype=np.int32)
         grads0 = ops.IndexedSlices(
diff --git a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py
index 53232082e1..0a69096768 100644
--- a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py
@@ -61,8 +61,8 @@ def _get_workers(num_workers, staleness):
     graph = ops.Graph()
     with graph.as_default():
       global_step = training_util.create_global_step()
-      var_0 = variables.Variable(0.0, name='v0')
-      var_1 = variables.Variable(1.0, name='v1')
+      var_0 = variables.VariableV1(0.0, name='v0')
+      var_1 = variables.VariableV1(1.0, name='v1')
       compute_gradients_queue = data_flow_ops.FIFOQueue(
           -1, global_step.dtype.base_dtype, shapes=(),
           name='compute_gradients_queue', shared_name='compute_gradients_queue')
diff --git a/tensorflow/contrib/opt/python/training/external_optimizer_test.py b/tensorflow/contrib/opt/python/training/external_optimizer_test.py
index 9997103016..70c5f8ff19 100644
--- a/tensorflow/contrib/opt/python/training/external_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/external_optimizer_test.py
@@ -69,9 +69,9 @@ class TestCase(test.TestCase):
 class ExternalOptimizerInterfaceTest(TestCase):
 
   def test_optimize(self):
-    scalar = variables.Variable(random_ops.random_normal([]), 'scalar')
-    vector = variables.Variable(random_ops.random_normal([2]), 'vector')
-    matrix = variables.Variable(random_ops.random_normal([2, 3]), 'matrix')
+    scalar = variables.VariableV1(random_ops.random_normal([]), 'scalar')
+    vector = variables.VariableV1(random_ops.random_normal([2]), 'vector')
+    matrix = variables.VariableV1(random_ops.random_normal([2, 3]), 'matrix')
 
     minimum_location = constant_op.constant(np.arange(9), dtype=dtypes.float32)
 
@@ -96,7 +96,7 @@ class ExternalOptimizerInterfaceTest(TestCase):
 
   def test_callbacks(self):
     vector_val = np.array([7., -2.], dtype=np.float32)
-    vector = variables.Variable(vector_val, 'vector')
+    vector = variables.VariableV1(vector_val, 'vector')
 
     minimum_location_val = np.arange(2)
     minimum_location = constant_op.constant(
@@ -160,7 +160,7 @@ class ScipyOptimizerInterfaceTest(TestCase):
                                 rtol=1e-5,
                                 atol=1e-5,
                                 dimension=5):
-    x = variables.Variable(array_ops.zeros(dimension))
+    x = variables.VariableV1(array_ops.zeros(dimension))
     optimizer = external_optimizer.ScipyOptimizerInterface(
         self._objective(x), method=method, options=options)
 
@@ -173,7 +173,7 @@ class ScipyOptimizerInterfaceTest(TestCase):
   def test_unconstrained(self):
 
     dimension = 5
-    x = variables.Variable(array_ops.zeros(dimension))
+    x = variables.VariableV1(array_ops.zeros(dimension))
     optimizer = external_optimizer.ScipyOptimizerInterface(self._objective(x))
 
     with self.cached_session() as sess:
@@ -230,7 +230,7 @@ class ScipyOptimizerInterfaceTest(TestCase):
 
   def test_nonlinear_programming(self):
     vector_initial_value = [7., 7.]
-    vector = variables.Variable(vector_initial_value, 'vector')
+    vector = variables.VariableV1(vector_initial_value, 'vector')
 
     # Make norm as small as possible.
     loss = math_ops.reduce_sum(math_ops.square(vector))
@@ -249,7 +249,7 @@ class ScipyOptimizerInterfaceTest(TestCase):
 
   def test_scalar_bounds(self):
     vector_initial_value = [7., 7.]
-    vector = variables.Variable(vector_initial_value, 'vector')
+    vector = variables.VariableV1(vector_initial_value, 'vector')
 
     # Make norm as small as possible.
     loss = math_ops.reduce_sum(math_ops.square(vector))
@@ -267,7 +267,7 @@ class ScipyOptimizerInterfaceTest(TestCase):
 
   def test_vector_bounds(self):
     vector_initial_value = [7., 7.]
-    vector = variables.Variable(vector_initial_value, 'vector')
+    vector = variables.VariableV1(vector_initial_value, 'vector')
 
     # Make norm as small as possible.
     loss = math_ops.reduce_sum(math_ops.square(vector))
@@ -287,7 +287,7 @@ class ScipyOptimizerInterfaceTest(TestCase):
     # after running optimizer.minimize().
     # Bug reference: b/64065260
     vector_initial_value = [7., 7.]
-    vector = variables.Variable(vector_initial_value, 'vector')
+    vector = variables.VariableV1(vector_initial_value, 'vector')
     loss = math_ops.reduce_sum(math_ops.square(vector))
 
     optimizer = external_optimizer.ScipyOptimizerInterface(
@@ -301,7 +301,7 @@ class ScipyOptimizerInterfaceTest(TestCase):
 
   def test_callbacks(self):
     vector_val = np.array([7., -2.], dtype=np.float32)
-    vector = variables.Variable(vector_val, 'vector')
+    vector = variables.VariableV1(vector_val, 'vector')
 
     minimum_location_val = np.arange(2)
     minimum_location = constant_op.constant(
diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py
index b1fc50a21f..a25455e95d 100644
--- a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py
@@ -110,10 +110,11 @@ def _get_workers(num_workers, steps, workers):
 
 
 class ModelAverageOptimizerTest(test.TestCase):
+
   def _run(self, train_op, sess):
     sess.run(train_op)
 
-  def test1Workers2Period(self):
+  def disabled_test1Workers2Period(self):
     num_workers = 2
     steps = 2
     num_ps = 1
diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py
index 0bcf5d230a..1cf9901dc0 100644
--- a/tensorflow/contrib/opt/python/training/powersign_test.py
+++ b/tensorflow/contrib/opt/python/training/powersign_test.py
@@ -81,9 +81,9 @@ class PowerSignTest(test.TestCase):
           global_step = resource_variable_ops.ResourceVariable(
               0, trainable=False)
         else:
-          var0 = variables.Variable(var0_np)
-          var1 = variables.Variable(var1_np)
-          global_step = variables.Variable(
+          var0 = variables.VariableV1(var0_np)
+          var1 = variables.VariableV1(var1_np)
+          global_step = variables.VariableV1(
               0, trainable=False)
         grads0 = constant_op.constant(grads0_np)
         grads1 = constant_op.constant(grads1_np)
@@ -188,9 +188,9 @@ class PowerSignTest(test.TestCase):
           global_step = resource_variable_ops.ResourceVariable(
               0, trainable=False)
         else:
-          var0 = variables.Variable(var0_np)
-          var1 = variables.Variable(var1_np)
-          global_step = variables.Variable(
+          var0 = variables.VariableV1(var0_np)
+          var1 = variables.VariableV1(var1_np)
+          global_step = variables.VariableV1(
               0, trainable=False)
         grads0_np_indices = np.array([0, 1], dtype=np.int32)
         grads0 = ops.IndexedSlices(
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
index bf699db3ed..f31ad53d3c 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
@@ -163,8 +163,8 @@ class TestStateSaverWithCounters(TestStateSaver):
 
   def __init__(self, batch_size, state_size):
     super(TestStateSaverWithCounters, self).__init__(batch_size, state_size)
-    self._num_state_calls = variables_lib.Variable(0)
-    self._num_save_state_calls = variables_lib.Variable(0)
+    self._num_state_calls = variables_lib.VariableV1(0)
+    self._num_save_state_calls = variables_lib.VariableV1(0)
 
   def state(self, name):
     with ops_lib.control_dependencies(
diff --git a/tensorflow/contrib/session_bundle/exporter_test.py b/tensorflow/contrib/session_bundle/exporter_test.py
index 86df425da0..68419ffea0 100644
--- a/tensorflow/contrib/session_bundle/exporter_test.py
+++ b/tensorflow/contrib/session_bundle/exporter_test.py
@@ -64,10 +64,10 @@ class SaveRestoreShardedTest(test.TestCase):
       # v2 is an unsaved variable derived from v0 and v1.  It is used to
       # exercise the ability to run an init op when restoring a graph.
       with sess.graph.device("/cpu:0"):
-        v0 = variables.Variable(10, name="v0")
+        v0 = variables.VariableV1(10, name="v0")
       with sess.graph.device("/cpu:1"):
-        v1 = variables.Variable(20, name="v1")
-      v2 = variables.Variable(1, name="v2", trainable=False, collections=[])
+        v1 = variables.VariableV1(20, name="v1")
+      v2 = variables.VariableV1(1, name="v2", trainable=False, collections=[])
       assign_v2 = state_ops.assign(v2, math_ops.add(v0, v1))
       init_op = control_flow_ops.group(assign_v2, name="init_op")
 
diff --git a/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py b/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
index 1c4e18dbda..0b02bdcb50 100644
--- a/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
+++ b/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
@@ -27,7 +27,7 @@ from tensorflow.python.platform import googletest
 class ScatterAddNdimTest(test_util.TensorFlowTestCase):
 
   def test1dim(self):
-    input_data = variables.Variable(
+    input_data = variables.VariableV1(
         [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.])
     indices = [[1], [10]]
     updates = [100., 200.]
@@ -40,8 +40,8 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
           input_data.eval())
 
   def test3dim(self):
-    input_data = variables.Variable([[[1., 2., 3.], [4., 5., 6.]],
-                                     [[7., 8., 9.], [10., 11., 12.]]])
+    input_data = variables.VariableV1([[[1., 2., 3.], [4., 5., 6.]],
+                                       [[7., 8., 9.], [10., 11., 12.]]])
     indices = [[0, 0, 1], [1, 1, 2]]
     updates = [100., 200.]
 
@@ -53,7 +53,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
 
   def testNoUpdates(self):
     init_val = [[[1., 2., 3.], [4., 5., 6.]], [[7., 8., 9.], [10., 11., 12.]]]
-    input_data = variables.Variable(init_val)
+    input_data = variables.VariableV1(init_val)
     indices = []
     updates = []
 
@@ -64,7 +64,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
 
   def testBadInput(self):
     init_val = [[[1., 2., 3.], [4., 5., 6.]], [[7., 8., 9.], [10., 11., 12.]]]
-    input_data = variables.Variable(init_val)
+    input_data = variables.VariableV1(init_val)
     indices = [[0, 0, 1], [1, 1, 2]]
     updates = [100.]
     with self.cached_session():
@@ -75,8 +75,8 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(init_val, input_data.eval())
 
   def testIncompleteIndices(self):
-    input_data = variables.Variable([[[1., 2., 3.], [4., 5., 6.]],
-                                     [[7., 8., 9.], [10., 11., 12.]]])
+    input_data = variables.VariableV1([[[1., 2., 3.], [4., 5., 6.]],
+                                       [[7., 8., 9.], [10., 11., 12.]]])
     indices = [[0, 0], [1, 1]]
     updates = [[100., 200., 300.], [400., 500., 600.]]
 
diff --git a/tensorflow/contrib/tensorrt/python/trt_convert_test.py b/tensorflow/contrib/tensorrt/python/trt_convert_test.py
index f3a1ef0d47..52cb0bd9f9 100644
--- a/tensorflow/contrib/tensorrt/python/trt_convert_test.py
+++ b/tensorflow/contrib/tensorrt/python/trt_convert_test.py
@@ -94,7 +94,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase):
       with g.device("/GPU:0"):
         inp = array_ops.placeholder(
             dtype=dtypes.float32, shape=[None, 1, 1], name="input")
-        var = variables.Variable([[[1.0]]], dtype=dtypes.float32, name="v1")
+        var = variables.VariableV1([[[1.0]]], dtype=dtypes.float32, name="v1")
         add = inp + var.value()
         mul = inp * add
         add = mul + add
diff --git a/tensorflow/contrib/training/python/training/device_setter_test.py b/tensorflow/contrib/training/python/training/device_setter_test.py
index 20746d911c..3bb2dce83d 100644
--- a/tensorflow/contrib/training/python/training/device_setter_test.py
+++ b/tensorflow/contrib/training/python/training/device_setter_test.py
@@ -98,10 +98,10 @@ class GreedyLoadBalancingStrategyTest(test.TestCase):
             cluster=_CLUSTER_SPEC,
             ps_strategy=device_setter_lib.GreedyLoadBalancingStrategy(
                 2, device_setter_lib.byte_size_load_fn))):
-      u = variables.Variable(array_ops.zeros([2, 2]))
-      v = variables.Variable(array_ops.zeros([2, 1]))
-      w = variables.Variable(array_ops.zeros([2, 2]))
-      x = variables.Variable(array_ops.zeros([1, 3]))
+      u = variables.VariableV1(array_ops.zeros([2, 2]))
+      v = variables.VariableV1(array_ops.zeros([2, 1]))
+      w = variables.VariableV1(array_ops.zeros([2, 2]))
+      x = variables.VariableV1(array_ops.zeros([1, 3]))
       a = v + w
       self.assertDeviceEqual("/job:ps/task:0", u.device)
       self.assertDeviceEqual("/job:ps/task:0", u.initializer.device)
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index f576435136..5c0c405306 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1022,7 +1022,7 @@ class SessionTest(test_util.TensorFlowTestCase):
     with session.Session():
       a = constant_op.constant(1.0, shape=[1, 2])
       b = constant_op.constant(2.0, shape=[1, 2], name='b')
-      v = variables.Variable(a, a.dtype)
+      v = variables.VariableV1(a, a.dtype)
       assign_a_to_v = state_ops.assign(v, a)
 
       assign_a_to_v.eval()
diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py
index 4630bda590..f197a9e4dc 100644
--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@@ -599,11 +599,11 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
       v_name = "simple_mul_add/v"
 
       u_init = constant_op.constant(u_init_val, shape=[2, 2], name="u_init")
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       cls._u_line_number = line_number_above()
 
       v_init = constant_op.constant(v_init_val, shape=[2, 1], name="v_init")
-      v = variables.Variable(v_init, name=v_name)
+      v = variables.VariableV1(v_init, name=v_name)
       cls._v_line_number = line_number_above()
 
       w = math_ops.matmul(u, v, name="simple_mul_add/matmul")
@@ -612,7 +612,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
       x = math_ops.add(w, w, name="simple_mul_add/add")
       cls._x_line_number = line_number_above()
 
-      a = variables.Variable([1, 3, 3, 7], name="a")
+      a = variables.VariableV1([1, 3, 3, 7], name="a")
 
       u.initializer.run()
       v.initializer.run()
@@ -1371,7 +1371,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     # Verify the annotation of the line that creates u.
     index = self._findSourceLine(out, self._u_line_number)
     self.assertEqual(
-        ["L%d         u = variables.Variable(u_init, name=u_name)" %
+        ["L%d         u = variables.VariableV1(u_init, name=u_name)" %
          self._u_line_number,
          "    simple_mul_add/u",
          "    simple_mul_add/u/Assign",
@@ -1388,7 +1388,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     # Verify the annotation of the line that creates v.
     index = self._findSourceLine(out, self._v_line_number)
     self.assertEqual(
-        ["L%d         v = variables.Variable(v_init, name=v_name)" %
+        ["L%d         v = variables.VariableV1(v_init, name=v_name)" %
          self._v_line_number,
          "    simple_mul_add/v"],
         out.lines[index : index + 2])
@@ -1425,7 +1425,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     # Verify the annotation of the line that creates u.
     index = self._findSourceLine(out, self._u_line_number)
     self.assertEqual(
-        ["L%d         u = variables.Variable(u_init, name=u_name)" %
+        ["L%d         u = variables.VariableV1(u_init, name=u_name)" %
          self._u_line_number,
          "    simple_mul_add/u/read:0",
          "    simple_mul_add/u:0"],
@@ -1447,7 +1447,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
 
     index = self._findSourceLine(out, self._u_line_number)
     self.assertEqual(
-        ["L%d         u = variables.Variable(u_init, name=u_name)" %
+        ["L%d         u = variables.VariableV1(u_init, name=u_name)" %
          self._u_line_number,
          "    simple_mul_add/u",
          "    simple_mul_add/u/Assign",
@@ -1470,7 +1470,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
 
     index = self._findSourceLine(out, self._u_line_number)
     self.assertEqual(
-        ["L%d         u = variables.Variable(u_init, name=u_name)" %
+        ["L%d         u = variables.VariableV1(u_init, name=u_name)" %
          self._u_line_number,
          "    simple_mul_add/u",
          "    (... Omitted 2 of 3 op(s) ...) +5"],
@@ -1580,7 +1580,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     """List an input tree containing tensors from non-:0 output slot."""
 
     with session.Session(config=no_rewrite_session_config()) as sess:
-      x = variables.Variable([1, 3, 3, 7], name="x")
+      x = variables.VariableV1([1, 3, 3, 7], name="x")
       _, idx = array_ops.unique(x, name="x_unique")
       idx_times_two = math_ops.multiply(idx, 2, name="idx_times_two")
       sess.run(x.initializer)
@@ -1684,7 +1684,7 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
     with session.Session(config=no_rewrite_session_config()) as sess:
       x_init_val = np.array([5.0, 3.0])
       x_init = constant_op.constant(x_init_val, shape=[2])
-      x = variables.Variable(x_init, name="control_deps/x")
+      x = variables.VariableV1(x_init, name="control_deps/x")
 
       y = math_ops.add(x, x, name="control_deps/y")
       y = control_flow_ops.with_dependencies(
diff --git a/tensorflow/python/debug/cli/stepper_cli_test.py b/tensorflow/python/debug/cli/stepper_cli_test.py
index ee8cabca0d..7b8a42c253 100644
--- a/tensorflow/python/debug/cli/stepper_cli_test.py
+++ b/tensorflow/python/debug/cli/stepper_cli_test.py
@@ -132,8 +132,8 @@ def _parse_updated(lines):
 class NodeStepperSimpleGraphTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
-    self.a = variables.Variable(10.0, name="a")
-    self.b = variables.Variable(20.0, name="b")
+    self.a = variables.VariableV1(10.0, name="a")
+    self.b = variables.VariableV1(20.0, name="b")
 
     self.c = math_ops.add(self.a, self.b, name="c")  # Should be 30.0.
     self.d = math_ops.subtract(self.a, self.c, name="d")  # Should be -20.0.
diff --git a/tensorflow/python/debug/lib/debug_utils_test.py b/tensorflow/python/debug/lib/debug_utils_test.py
index 5b1875e092..23ab98444c 100644
--- a/tensorflow/python/debug/lib/debug_utils_test.py
+++ b/tensorflow/python/debug/lib/debug_utils_test.py
@@ -46,8 +46,8 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
       cls._b_init = constant_op.constant(
           cls._b_init_val, shape=[2, 1], name="b_init")
 
-      cls._a = variables.Variable(cls._a_init, name="a1")
-      cls._b = variables.Variable(cls._b_init, name="b")
+      cls._a = variables.VariableV1(cls._a_init, name="a1")
+      cls._b = variables.VariableV1(cls._b_init, name="b")
       cls._c = constant_op.constant(cls._c_val, shape=[2, 1], name="c")
 
       # Matrix product of a and b.
diff --git a/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py b/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py
index 46a7be5808..74498c8ea3 100644
--- a/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py
+++ b/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py
@@ -118,8 +118,8 @@ class DistributedSessionDebugTest(test_util.TensorFlowTestCase):
     """
     with ops.Graph().as_default() as graph:
       with ops.device("/job:worker/task:0/cpu:0"):
-        self.a = variables.Variable(10.0, name="a")
-        self.b = variables.Variable(100.0, name="b")
+        self.a = variables.VariableV1(10.0, name="a")
+        self.b = variables.VariableV1(100.0, name="b")
         self.inc_a = state_ops.assign_add(self.a, 2.0, name="inc_a")
         self.dec_b = state_ops.assign_add(self.b, -5.0, name="dec_b")
         self.p = math_ops.multiply(self.inc_a, self.dec_b, name="p")
diff --git a/tensorflow/python/debug/lib/grpc_large_data_test.py b/tensorflow/python/debug/lib/grpc_large_data_test.py
index 5bc477a9ba..ccc21bcf94 100644
--- a/tensorflow/python/debug/lib/grpc_large_data_test.py
+++ b/tensorflow/python/debug/lib/grpc_large_data_test.py
@@ -61,7 +61,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
     with self.test_session(
         use_gpu=True,
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
-      u = variables.Variable(42.0, name="original_u")
+      u = variables.VariableV1(42.0, name="original_u")
       for _ in xrange(50 * 1000):
         u = array_ops.identity(u)
       sess.run(variables.global_variables_initializer())
@@ -94,7 +94,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
 
       u_init = constant_op.constant(
           u_init_val_array, dtype=dtypes.float32, name="u_init")
-      u = variables.Variable(u_init, name="u")
+      u = variables.VariableV1(u_init, name="u")
 
       def watch_fn(fetches, feeds):
         del fetches, feeds  # Unused by this watch_fn.
@@ -117,7 +117,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
           b"", b"spam", b"A" * 2500 * 1024, b"B" * 2500 * 1024, b"egg", b""]
       u_init = constant_op.constant(
           u_init_val, dtype=dtypes.string, name="u_init")
-      u = variables.Variable(u_init, name="u")
+      u = variables.VariableV1(u_init, name="u")
 
       def watch_fn(fetches, feeds):
         del fetches, feeds
@@ -146,7 +146,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
 
       u_init = constant_op.constant(
           u_init_val_array, dtype=dtypes.string, name="u_init")
-      u = variables.Variable(u_init, name="u")
+      u = variables.VariableV1(u_init, name="u")
 
       def watch_fn(fetches, feeds):
         del fetches, feeds
@@ -167,7 +167,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       u_init = constant_op.constant(
           [], dtype=dtypes.float32, shape=[0], name="u_init")
-      u = variables.Variable(u_init, name="u")
+      u = variables.VariableV1(u_init, name="u")
 
       def watch_fn(fetches, feeds):
         del fetches, feeds
@@ -189,7 +189,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       u_init = constant_op.constant(
           [], dtype=dtypes.string, shape=[0], name="u_init")
-      u = variables.Variable(u_init, name="u")
+      u = variables.VariableV1(u_init, name="u")
 
       def watch_fn(fetches, feeds):
         del fetches, feeds
diff --git a/tensorflow/python/debug/lib/session_debug_file_test.py b/tensorflow/python/debug/lib/session_debug_file_test.py
index ba0f15b4e2..1874160dd6 100644
--- a/tensorflow/python/debug/lib/session_debug_file_test.py
+++ b/tensorflow/python/debug/lib/session_debug_file_test.py
@@ -58,9 +58,9 @@ class SessionDebugFileTest(session_debug_testlib.SessionDebugTestBase):
       v_name = "diff_Watch/v"
 
       u_init = constant_op.constant(u_init_val, shape=[2, 2])
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       v_init = constant_op.constant(v_init_val, shape=[2, 1])
-      v = variables.Variable(v_init, name=v_name)
+      v = variables.VariableV1(v_init, name=v_name)
 
       w = math_ops.matmul(u, v, name="diff_Watch/matmul")
 
diff --git a/tensorflow/python/debug/lib/session_debug_grpc_test.py b/tensorflow/python/debug/lib/session_debug_grpc_test.py
index 91f21cb1f3..bfc9a3a382 100644
--- a/tensorflow/python/debug/lib/session_debug_grpc_test.py
+++ b/tensorflow/python/debug/lib/session_debug_grpc_test.py
@@ -148,8 +148,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
           sess, "localhost:%d" % self._server_port, watch_fn="foo")
 
   def testGrpcDebugWrapperSessionWithoutWatchFnWorks(self):
-    u = variables.Variable(2.1, name="u")
-    v = variables.Variable(20.0, name="v")
+    u = variables.VariableV1(2.1, name="u")
+    v = variables.VariableV1(20.0, name="v")
     w = math_ops.multiply(u, v, name="w")
 
     sess = session.Session(
@@ -175,8 +175,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
       del feeds, fetch_keys
       return ["DebugIdentity", "DebugNumericSummary"], r".*/read", None
 
-    u = variables.Variable(2.1, name="u")
-    v = variables.Variable(20.0, name="v")
+    u = variables.VariableV1(2.1, name="u")
+    v = variables.VariableV1(20.0, name="v")
     w = math_ops.multiply(u, v, name="w")
 
     sess = session.Session(
@@ -209,8 +209,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
           op_type_regex_whitelist=None,
           tolerate_debug_op_creation_failures=True)
 
-    u = variables.Variable(2.1, name="u")
-    v = variables.Variable(20.0, name="v")
+    u = variables.VariableV1(2.1, name="u")
+    v = variables.VariableV1(20.0, name="v")
     w = math_ops.multiply(u, v, name="w")
 
     sess = session.Session(
@@ -241,8 +241,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
         14, len(dump.get_tensors("v/read", 0, "DebugNumericSummary")[0]))
 
   def testTensorBoardDebugHookWorks(self):
-    u = variables.Variable(2.1, name="u")
-    v = variables.Variable(20.0, name="v")
+    u = variables.VariableV1(2.1, name="u")
+    v = variables.VariableV1(20.0, name="v")
     w = math_ops.multiply(u, v, name="w")
 
     sess = session.Session(
@@ -286,8 +286,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
       self._server.query_source_file_line(__file__, 1)
 
   def testTensorBoardDebugHookDisablingTracebackSourceCodeSendingWorks(self):
-    u = variables.Variable(2.1, name="u")
-    v = variables.Variable(20.0, name="v")
+    u = variables.VariableV1(2.1, name="u")
+    v = variables.VariableV1(20.0, name="v")
     w = math_ops.multiply(u, v, name="w")
 
     sess = session.Session(
@@ -381,8 +381,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
   def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self):
     with session.Session(
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
-      v_1 = variables.Variable(50.0, name="v_1")
-      v_2 = variables.Variable(-50.0, name="v_1")
+      v_1 = variables.VariableV1(50.0, name="v_1")
+      v_2 = variables.VariableV1(-50.0, name="v_1")
       delta_1 = constant_op.constant(5.0, name="delta_1")
       delta_2 = constant_op.constant(-5.0, name="delta_2")
       inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
@@ -451,8 +451,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
 
     with session.Session(
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
-      v_1 = variables.Variable(50.0, name="v_1")
-      v_2 = variables.Variable(-50.0, name="v_1")
+      v_1 = variables.VariableV1(50.0, name="v_1")
+      v_2 = variables.VariableV1(-50.0, name="v_1")
       # These two nodes have names that match those in the
       # toggle_watch_on_core_metadata argument used when calling
       # start_server_on_separate_thread().
@@ -491,7 +491,7 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
   def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenServers(self):
     with session.Session(
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
-      v = variables.Variable(50.0, name="v")
+      v = variables.VariableV1(50.0, name="v")
       delta = constant_op.constant(5.0, name="delta")
       inc_v = state_ops.assign_add(v, delta, name="inc_v")
 
@@ -534,8 +534,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
   def testToggleBreakpointsWorks(self):
     with session.Session(
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
-      v_1 = variables.Variable(50.0, name="v_1")
-      v_2 = variables.Variable(-50.0, name="v_2")
+      v_1 = variables.VariableV1(50.0, name="v_1")
+      v_2 = variables.VariableV1(-50.0, name="v_2")
       delta_1 = constant_op.constant(5.0, name="delta_1")
       delta_2 = constant_op.constant(-5.0, name="delta_2")
       inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
@@ -592,8 +592,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
   def testTensorBoardDebuggerWrapperToggleBreakpointsWorks(self):
     with session.Session(
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
-      v_1 = variables.Variable(50.0, name="v_1")
-      v_2 = variables.Variable(-50.0, name="v_2")
+      v_1 = variables.VariableV1(50.0, name="v_1")
+      v_2 = variables.VariableV1(-50.0, name="v_2")
       delta_1 = constant_op.constant(5.0, name="delta_1")
       delta_2 = constant_op.constant(-5.0, name="delta_2")
       inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
@@ -665,8 +665,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
   def testTensorBoardDebuggerWrapperDisablingTracebackSourceSendingWorks(self):
     with session.Session(
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
-      v_1 = variables.Variable(50.0, name="v_1")
-      v_2 = variables.Variable(-50.0, name="v_2")
+      v_1 = variables.VariableV1(50.0, name="v_1")
+      v_2 = variables.VariableV1(-50.0, name="v_2")
       delta_1 = constant_op.constant(5.0, name="delta_1")
       delta_2 = constant_op.constant(-5.0, name="delta_2")
       inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
@@ -699,7 +699,7 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
 
   def testGetGrpcDebugWatchesReturnsCorrectAnswer(self):
     with session.Session() as sess:
-      v = variables.Variable(50.0, name="v")
+      v = variables.VariableV1(50.0, name="v")
       delta = constant_op.constant(5.0, name="delta")
       inc_v = state_ops.assign_add(v, delta, name="inc_v")
 
@@ -743,7 +743,7 @@ class DelayedDebugServerTest(test_util.TensorFlowTestCase):
 
     with self.cached_session() as sess:
       a_init = constant_op.constant(42.0, name="a_init")
-      a = variables.Variable(a_init, name="a")
+      a = variables.VariableV1(a_init, name="a")
 
       def watch_fn(fetches, feeds):
         del fetches, feeds
diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py
index 070d9c4cd7..25ef91b575 100644
--- a/tensorflow/python/debug/lib/session_debug_testlib.py
+++ b/tensorflow/python/debug/lib/session_debug_testlib.py
@@ -70,7 +70,7 @@ class _RNNCellForTest(rnn_cell_impl.RNNCell):
   def __init__(self, input_output_size, state_size):
     self._input_output_size = input_output_size
     self._state_size = state_size
-    self._w = variables.Variable(1.0, dtype=dtypes.float32, name="w")
+    self._w = variables.VariableV1(1.0, dtype=dtypes.float32, name="w")
 
   @property
   def output_size(self):
@@ -182,9 +182,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       w_name = "w"
 
       u_init = constant_op.constant(u_init_val, shape=[2, 2])
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       v_init = constant_op.constant(v_init_val, shape=[2, 1])
-      v = variables.Variable(v_init, name=v_name)
+      v = variables.VariableV1(v_init, name=v_name)
 
       w = math_ops.matmul(u, v, name=w_name)
 
@@ -221,8 +221,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testCopyNodesHaveCorrectDebugOpsAndURLsAttributeValues(self):
     with session.Session() as sess:
-      u = variables.Variable(2.1, name="u")
-      v = variables.Variable(20.0, name="v")
+      u = variables.VariableV1(2.1, name="u")
+      v = variables.VariableV1(20.0, name="v")
       w = math_ops.multiply(u, v, name="w")
 
       sess.run(variables.global_variables_initializer())
@@ -324,8 +324,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
       str1_name = "str1"
       str2_name = "str2"
-      str1 = variables.Variable(str1_init, name=str1_name)
-      str2 = variables.Variable(str2_init, name=str2_name)
+      str1 = variables.VariableV1(str1_init, name=str1_name)
+      str2 = variables.VariableV1(str2_init, name=str2_name)
       # Concatenate str1 and str2
       str_concat = math_ops.add(str1, str2, name="str_concat")
 
@@ -387,9 +387,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       s_name = "%s/s" % op_namespace
 
       u_init = constant_op.constant(u_init_val, shape=[2, 2])
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       s_init = constant_op.constant(s_init_val)
-      s = variables.Variable(s_init, name=s_name)
+      s = variables.VariableV1(s_init, name=s_name)
 
       run_options = config_pb2.RunOptions(output_partition_graphs=True)
       debug_urls = self._debug_urls()
@@ -439,7 +439,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
       u_init_val = np.array(11.0)
       u_init = constant_op.constant(u_init_val)
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
 
       # "v" is the increment.
       v_name = "testDumpToFileWhileLoop/v"
@@ -447,7 +447,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
       v_init_val = np.array(2.0)
       v_init = constant_op.constant(v_init_val)
-      v = variables.Variable(v_init, name=v_name)
+      v = variables.VariableV1(v_init, name=v_name)
 
       u.initializer.run()
       v.initializer.run()
@@ -605,8 +605,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testDebugCondWatchingWholeGraphWorks(self):
     with session.Session() as sess:
-      x = variables.Variable(10.0, name="x")
-      y = variables.Variable(20.0, name="y")
+      x = variables.VariableV1(10.0, name="x")
+      y = variables.VariableV1(20.0, name="y")
       cond = control_flow_ops.cond(
           x > y, lambda: math_ops.add(x, 1), lambda: math_ops.add(y, 1))
 
@@ -628,9 +628,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       z_name = "testFindNodesWithBadTensorValues/z"
 
       u_init = constant_op.constant([2.0, 4.0])
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       v_init = constant_op.constant([2.0, 1.0])
-      v = variables.Variable(v_init, name=v_name)
+      v = variables.VariableV1(v_init, name=v_name)
 
       # Expected output: [0.0, 3.0]
       w = math_ops.subtract(u, v, name=w_name)
@@ -679,9 +679,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       z_name = "testFindInfOrNanWithOpNameExclusion/z"
 
       u_init = constant_op.constant([2.0, 4.0])
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       v_init = constant_op.constant([2.0, 1.0])
-      v = variables.Variable(v_init, name=v_name)
+      v = variables.VariableV1(v_init, name=v_name)
 
       # Expected output: [0.0, 3.0]
       w = math_ops.subtract(u, v, name=w_name)
@@ -725,7 +725,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       w_name = "testDumpGraphStructureLookup/w"
 
       u_init = constant_op.constant([2.0, 4.0])
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       v = math_ops.add(u, u, name=v_name)
       w = math_ops.add(v, v, name=w_name)
 
@@ -859,9 +859,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testGraphPathFindingOnControlEdgesWorks(self):
     with session.Session(config=no_rewrite_session_config()) as sess:
-      v1 = variables.Variable(1.0, name="v1")
-      v2 = variables.Variable(2.0, name="v2")
-      v3 = variables.Variable(3.0, name="v3")
+      v1 = variables.VariableV1(1.0, name="v1")
+      v2 = variables.VariableV1(2.0, name="v2")
+      v3 = variables.VariableV1(3.0, name="v3")
       a = math_ops.add(v1, v2, name="a")
       with ops.control_dependencies([a]):
         c = math_ops.subtract(v3, v3, name="c")
@@ -875,8 +875,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testGraphPathFindingReverseRefEdgeWorks(self):
     with session.Session(config=no_rewrite_session_config()) as sess:
-      v = variables.Variable(10.0, name="v")
-      delta = variables.Variable(1.0, name="delta")
+      v = variables.VariableV1(10.0, name="v")
+      delta = variables.VariableV1(1.0, name="delta")
       inc_v = state_ops.assign_add(v, delta, name="inc_v")
 
       sess.run(variables.global_variables_initializer())
@@ -894,7 +894,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       w_name = "testDumpCausalityCheck/w"
 
       u_init = constant_op.constant([2.0, 4.0])
-      u = variables.Variable(u_init, name=u_name)
+      u = variables.VariableV1(u_init, name=u_name)
       v = math_ops.add(u, u, name=v_name)
       w = math_ops.add(v, v, name=w_name)
 
@@ -980,7 +980,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       w_name = "oneOfTwoSlots/w"
       y_name = "oneOfTwoSlots/y"
 
-      x = variables.Variable([1, 3, 3, 7], dtype=dtypes.int32, name=x_name)
+      x = variables.VariableV1([1, 3, 3, 7], dtype=dtypes.int32, name=x_name)
       sess.run(x.initializer)
 
       unique_x, indices, _ = array_ops.unique_with_counts(x, name=u_name)
@@ -1039,9 +1039,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
     with session.Session(config=no_rewrite_session_config()) as sess:
       u_init = constant_op.constant(10.0)
-      u = variables.Variable(u_init, name="gdo/u")
+      u = variables.VariableV1(u_init, name="gdo/u")
       v_init = constant_op.constant(20.0)
-      v = variables.Variable(v_init, name="gdo/v")
+      v = variables.VariableV1(v_init, name="gdo/v")
 
       w = math_ops.multiply(u, v, name="gdo/w")
       # gdo stands for GradientDescentOptimizer.
@@ -1085,7 +1085,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
     with session.Session() as sess:
       x_init = constant_op.constant([2, 2, 3, 5, 5])
-      x = variables.Variable(x_init, name="unconnected/x")
+      x = variables.VariableV1(x_init, name="unconnected/x")
 
       # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the
       # graph. Let the debugger watch the unused slot 1.
@@ -1225,14 +1225,14 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testDebugNumericSummaryOnInitializedTensorGivesCorrectResult(self):
     with session.Session(config=no_rewrite_session_config()) as sess:
-      a = variables.Variable(
+      a = variables.VariableV1(
           [
               np.nan, np.nan, 0.0, 0.0, 0.0, -1.0, -3.0, 3.0, 7.0, -np.inf,
               -np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.nan, np.nan
           ],
           dtype=np.float32,
           name="numeric_summary/a")
-      b = variables.Variable(
+      b = variables.VariableV1(
           [0.0] * 18, dtype=np.float32, name="numeric_summary/b")
       c = math_ops.add(a, b, name="numeric_summary/c")
 
@@ -1249,7 +1249,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testDebugNumericSummaryOnUninitializedTensorGivesCorrectResult(self):
     with session.Session() as sess:
-      a = variables.Variable(
+      a = variables.VariableV1(
           [42], dtype=np.float32, name="numeric_summary_uninit/a")
 
       _, dump = self._debug_run_and_get_dump(
@@ -1275,9 +1275,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testDebugNumericSummaryFailureIsToleratedWhenOrdered(self):
     with session.Session() as sess:
-      a = variables.Variable("1", name="a")
-      b = variables.Variable("3", name="b")
-      c = variables.Variable("2", name="c")
+      a = variables.VariableV1("1", name="a")
+      b = variables.VariableV1("3", name="b")
+      c = variables.VariableV1("2", name="c")
 
       d = math_ops.add(a, b, name="d")
       e = math_ops.add(d, c, name="e")
@@ -1313,9 +1313,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testDebugNumericSummaryInvalidAttributesStringAreCaught(self):
     with session.Session(config=no_rewrite_session_config()) as sess:
-      a = variables.Variable(10.0, name="a")
-      b = variables.Variable(0.0, name="b")
-      c = variables.Variable(0.0, name="c")
+      a = variables.VariableV1(10.0, name="a")
+      b = variables.VariableV1(0.0, name="b")
+      c = variables.VariableV1(0.0, name="c")
 
       x = math_ops.divide(a, b, name="x")
       y = math_ops.multiply(x, c, name="y")
@@ -1361,9 +1361,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testDebugNumericSummaryMuteOnHealthyMutesOnlyHealthyTensorDumps(self):
     with session.Session(config=no_rewrite_session_config()) as sess:
-      a = variables.Variable(10.0, name="a")
-      b = variables.Variable(0.0, name="b")
-      c = variables.Variable(0.0, name="c")
+      a = variables.VariableV1(10.0, name="a")
+      b = variables.VariableV1(0.0, name="b")
+      c = variables.VariableV1(0.0, name="c")
 
       x = math_ops.divide(a, b, name="x")
       y = math_ops.multiply(x, c, name="y")
@@ -1396,8 +1396,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
 
   def testDebugNumericSummaryMuteOnHealthyAndCustomBoundsWork(self):
     with session.Session() as sess:
-      a = variables.Variable([10.0, 10.0], name="a")
-      b = variables.Variable([10.0, 2.0], name="b")
+      a = variables.VariableV1([10.0, 10.0], name="a")
+      b = variables.VariableV1([10.0, 2.0], name="b")
 
       x = math_ops.add(a, b, name="x")  # [20.0, 12.0]
       y = math_ops.divide(x, b, name="y")  # [2.0, 6.0]
@@ -1436,9 +1436,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
   def testLookUpNodePythonTracebackWorks(self):
     with session.Session() as sess:
       u_init = constant_op.constant(10.0)
-      u = variables.Variable(u_init, name="traceback/u")
+      u = variables.VariableV1(u_init, name="traceback/u")
       v_init = constant_op.constant(20.0)
-      v = variables.Variable(v_init, name="traceback/v")
+      v = variables.VariableV1(v_init, name="traceback/v")
 
       w = math_ops.multiply(u, v, name="traceback/w")
 
@@ -1487,7 +1487,7 @@ class DebugConcurrentRunCallsTest(test_util.TensorFlowTestCase):
       self.skipTest("No testing concurrent runs on a single GPU.")
 
     with session.Session() as sess:
-      v = variables.Variable(30.0, name="v")
+      v = variables.VariableV1(30.0, name="v")
       constants = []
       for i in xrange(self._num_concurrent_runs):
         constants.append(constant_op.constant(1.0, name="c%d" % i))
diff --git a/tensorflow/python/debug/lib/stepper_test.py b/tensorflow/python/debug/lib/stepper_test.py
index 9a3d0efabf..3839c67198 100644
--- a/tensorflow/python/debug/lib/stepper_test.py
+++ b/tensorflow/python/debug/lib/stepper_test.py
@@ -36,8 +36,8 @@ from tensorflow.python.training import gradient_descent
 class StepperTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
-    self.a = variables.Variable(2.0, name="a")
-    self.b = variables.Variable(3.0, name="b")
+    self.a = variables.VariableV1(2.0, name="a")
+    self.b = variables.VariableV1(3.0, name="b")
 
     self.c = math_ops.multiply(self.a, self.b, name="c")  # Should be 6.0.
     self.d = math_ops.multiply(self.a, self.a, name="d")  # Should be 4.0.
@@ -49,7 +49,7 @@ class StepperTest(test_util.TensorFlowTestCase):
 
     # The there nodes x, y and z form a graph with "cross-links" in. I.e., x
     # and y are both direct inputs to z, but x is also a direct input to y.
-    self.x = variables.Variable(2.0, name="x")  # Should be 2.0
+    self.x = variables.VariableV1(2.0, name="x")  # Should be 2.0
     self.y = math_ops.negative(self.x, name="y")  # Should be -2.0.
 
     self.z = math_ops.multiply(self.x, self.y, name="z")  # Should be -4.0.
@@ -580,7 +580,7 @@ class StepperTestWithPlaceHolders(test_util.TensorFlowTestCase):
 class StepperAssignAddTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
-    self.v = variables.Variable(10.0, name="v")
+    self.v = variables.VariableV1(10.0, name="v")
     self.p = math_ops.add(self.v, self.v, name="p")
     self.q = math_ops.multiply(self.p, self.p, name="q")
     self.delta = constant_op.constant(2.0, name="delta")
@@ -711,9 +711,9 @@ class StepperBackwardRunTest(test_util.TensorFlowTestCase):
     Construct a backward graph using the GradientDescentOptimizer.
     """
 
-    self.a = variables.Variable(1.0, name="a")
-    self.b = variables.Variable(2.0, name="b")
-    self.c = variables.Variable(4.0, name="c")
+    self.a = variables.VariableV1(1.0, name="a")
+    self.b = variables.VariableV1(2.0, name="b")
+    self.c = variables.VariableV1(4.0, name="c")
     self.d = math_ops.multiply(self.a, self.b, name="d")
     self.e = math_ops.multiply(self.b, self.c, name="e")
     self.f = math_ops.multiply(self.d, self.e, name="f")
diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py
index 254201c393..11011a5c13 100644
--- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py
+++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py
@@ -46,7 +46,7 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase):
   def setUp(self):
     self.session_root = tempfile.mkdtemp()
 
-    self.v = variables.Variable(10.0, dtype=dtypes.float32, name="v")
+    self.v = variables.VariableV1(10.0, dtype=dtypes.float32, name="v")
     self.delta = constant_op.constant(1.0, dtype=dtypes.float32, name="delta")
     self.eta = constant_op.constant(-1.4, dtype=dtypes.float32, name="eta")
     self.inc_v = state_ops.assign_add(self.v, self.delta, name="inc_v")
diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py
index 05c9eaa4d2..149a7497df 100644
--- a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py
+++ b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py
@@ -132,8 +132,8 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
   def setUp(self):
     self._tmp_dir = tempfile.mktemp()
 
-    self.v = variables.Variable(10.0, name="v")
-    self.w = variables.Variable(21.0, name="w")
+    self.v = variables.VariableV1(10.0, name="v")
+    self.w = variables.VariableV1(21.0, name="w")
     self.delta = constant_op.constant(1.0, name="delta")
     self.inc_v = state_ops.assign_add(self.v, self.delta, name="inc_v")
 
@@ -358,7 +358,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
   def testDebuggingMakeCallableTensorRunnerWorks(self):
     wrapped_sess = LocalCLIDebuggerWrapperSessionForTest(
         [["run"], ["run"]], self.sess, dump_root=self._tmp_dir)
-    v = variables.Variable(42)
+    v = variables.VariableV1(42)
     tensor_runner = wrapped_sess.make_callable(v)
     self.sess.run(v.initializer)
 
@@ -382,7 +382,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
   def testDebuggingMakeCallableOperationRunnerWorks(self):
     wrapped_sess = LocalCLIDebuggerWrapperSessionForTest(
         [["run"], ["run"]], self.sess, dump_root=self._tmp_dir)
-    v = variables.Variable(10.0)
+    v = variables.VariableV1(10.0)
     inc_v = state_ops.assign_add(v, 1.0)
     op_runner = wrapped_sess.make_callable(inc_v.op)
     self.sess.run(v.initializer)
@@ -403,7 +403,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, len(wrapped_sess.observers["debug_dumps"]))
 
   def testDebuggingMakeCallableFromOptionsWithZeroFeedWorks(self):
-    variable_1 = variables.Variable(
+    variable_1 = variables.VariableV1(
         10.5, dtype=dtypes.float32, name="variable_1")
     a = math_ops.add(variable_1, variable_1, "callable_a")
     math_ops.add(a, a, "callable_b")
@@ -480,7 +480,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
       self.assertItemsEqual(["callable_a", "callable_b"], node_names)
 
   def testDebugMakeCallableFromOptionsWithCustomOptionsAndMetadataWorks(self):
-    variable_1 = variables.Variable(
+    variable_1 = variables.VariableV1(
         10.5, dtype=dtypes.float32, name="variable_1")
     a = math_ops.add(variable_1, variable_1, "callable_a")
     math_ops.add(a, a, "callable_b")
@@ -528,7 +528,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
   def testRuntimeErrorBeforeGraphExecutionIsRaised(self):
     # Use an impossible device name to cause an error before graph execution.
     with ops.device("/device:GPU:1337"):
-      w = variables.Variable([1.0] * 10, name="w")
+      w = variables.VariableV1([1.0] * 10, name="w")
 
     wrapped_sess = LocalCLIDebuggerWrapperSessionForTest(
         [["run"]], self.sess, dump_root=self._tmp_dir)
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index 1ed5e30b0e..bc2504ca19 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -1017,7 +1017,7 @@ class EstimatorGetVariablesTest(test.TestCase):
 
     def _model_fn(features, labels, mode):
       _, _ = features, labels
-      variables.Variable(1., name='one')
+      variables.VariableV1(1., name='one')
       return model_fn_lib.EstimatorSpec(
           mode=mode,
           loss=constant_op.constant(0.),
@@ -1033,8 +1033,8 @@ class EstimatorGetVariablesTest(test.TestCase):
 
     def _model_fn(features, labels, mode):
       _, _ = features, labels
-      variables.Variable(1., name='one')
-      variables.Variable(3., name='three')
+      variables.VariableV1(1., name='one')
+      variables.VariableV1(3., name='three')
       return model_fn_lib.EstimatorSpec(
           mode=mode,
           loss=constant_op.constant(0.),
@@ -1178,13 +1178,13 @@ class EstimatorEvaluateTest(test.TestCase):
     def _model_fn(features, labels, mode, params):
       del features, labels, params
       mean = metrics_module.Mean()
-      mean.update_state(variables.Variable(2.) + 1)
+      mean.update_state(variables.VariableV1(2.) + 1)
       return model_fn_lib.EstimatorSpec(
           mode,
           loss=constant_op.constant(1.),
           eval_metric_ops={
               'mean1': mean,
-              'mean2': metrics_lib.mean(variables.Variable(2.) + 1)
+              'mean2': metrics_lib.mean(variables.VariableV1(2.) + 1)
           })
 
     est = estimator.Estimator(model_fn=_model_fn)
@@ -1332,7 +1332,7 @@ class EstimatorEvaluateTest(test.TestCase):
 
     def _model_fn_with_incremental_loss(features, labels, mode):
       _, _ = features, labels
-      local_weight = variables.Variable(
+      local_weight = variables.VariableV1(
           0., name='local_weight', collections=[ops.GraphKeys.LOCAL_VARIABLES])
       # Loss will be 2, 4, 6, ...
       loss = 2 * state_ops.assign_add(local_weight, 1.)
@@ -1385,7 +1385,7 @@ class EstimatorEvaluateTest(test.TestCase):
     def _get_model_fn(val=1):
       def _model_fn(features, labels, mode):
         del features, labels  # unused
-        variables.Variable(val, name='weight')
+        variables.VariableV1(val, name='weight')
         return model_fn_lib.EstimatorSpec(
             mode=mode,
             predictions=constant_op.constant([[1.]]),
@@ -1409,7 +1409,7 @@ class EstimatorEvaluateTest(test.TestCase):
 
     def _model_fn_scaffold(features, labels, mode):
       _, _ = features, labels
-      variables.Variable(1., name='weight')
+      variables.VariableV1(1., name='weight')
       self.mock_saver = get_mock_saver()
       return model_fn_lib.EstimatorSpec(
           mode=mode,
@@ -1603,7 +1603,7 @@ class EstimatorPredictTest(test.TestCase):
   def test_no_checkpoint_uses_init(self):
     def _model_fn(features, labels, mode, params, config):
       del features, labels, params, config
-      x = variables.Variable([[3.]], name='x')
+      x = variables.VariableV1([[3.]], name='x')
       return model_fn_lib.EstimatorSpec(mode, predictions=math_ops.add(x, 1.))
     est = estimator.Estimator(model_fn=_model_fn)
     # Expected prediction value is 1 + the value of the Variable that is newly
@@ -1614,7 +1614,7 @@ class EstimatorPredictTest(test.TestCase):
     def _make_model_fn(x):
       def _variable_creating_and_export_model_fn(features, labels, mode):
         _, _ = features, labels
-        x_var = variables.Variable([[x]], name='x')
+        x_var = variables.VariableV1([[x]], name='x')
         return model_fn_lib.EstimatorSpec(
             mode,
             predictions=math_ops.add(x_var, 1.),
@@ -1936,7 +1936,7 @@ class EstimatorPredictTest(test.TestCase):
 
     def _model_fn(features, labels, mode):
       _, _ = features, labels
-      v = variables.Variable([[16.]], name='weight')
+      v = variables.VariableV1([[16.]], name='weight')
       prediction = v * 2
       return model_fn_lib.EstimatorSpec(
           mode,
@@ -1953,7 +1953,7 @@ class EstimatorPredictTest(test.TestCase):
 
     def _model_fn(features, labels, mode):
       _, _ = features, labels
-      v = variables.Variable([[16.]], name='weight')
+      v = variables.VariableV1([[16.]], name='weight')
       prediction = v * 2
       return model_fn_lib.EstimatorSpec(
           mode,
@@ -1974,7 +1974,7 @@ class EstimatorPredictTest(test.TestCase):
 
     def _model_fn_scaffold(features, labels, mode):
       _, _ = features, labels
-      variables.Variable(1., name='weight')
+      variables.VariableV1(1., name='weight')
       self.mock_saver = get_mock_saver()
       return model_fn_lib.EstimatorSpec(
           mode=mode,
@@ -2029,7 +2029,7 @@ class EstimatorPredictTest(test.TestCase):
 
 def _model_fn_for_export_tests(features, labels, mode):
   _, _ = features, labels
-  variables.Variable(1., name='weight')
+  variables.VariableV1(1., name='weight')
   scores = constant_op.constant([3.])
   classes = constant_op.constant(['wumpus'])
   update_global_step = state_ops.assign_add(training.get_global_step(), 1)
@@ -2052,11 +2052,11 @@ def _x_y_input_fn():
 
 def _model_fn_with_x_y(features, labels, mode):
   _ = labels
-  variables.Variable(1., name='weight')
+  variables.VariableV1(1., name='weight')
   scores = constant_op.constant([3.])
   classes = constant_op.constant(['wumpus'])
   if mode == model_fn_lib.ModeKeys.PREDICT:
-    variables.Variable(36., name='name_collision')
+    variables.VariableV1(36., name='name_collision')
     return model_fn_lib.EstimatorSpec(
         mode,
         predictions=constant_op.constant(10.),
@@ -2076,8 +2076,8 @@ def _model_fn_with_x_y(features, labels, mode):
             metrics_lib.mean(
                 features['x'] - features['y'], name='{}mean'.format(prefix))
     }
-    variables.Variable(1., name='later_var')
-    variables.Variable(3., name='name_collision')
+    variables.VariableV1(1., name='later_var')
+    variables.VariableV1(3., name='name_collision')
     return model_fn_lib.EstimatorSpec(
         mode,
         predictions=multiplied,
@@ -2411,9 +2411,9 @@ class EstimatorExportTest(test.TestCase):
     def _model_fn_with_predict_only_vars(features, labels, mode):
       _, _ = features, labels
       if mode == model_fn_lib.ModeKeys.PREDICT:
-        variables.Variable(1., name='only_in_predict')
+        variables.VariableV1(1., name='only_in_predict')
       else:
-        variables.Variable(1., name='otherwise')
+        variables.VariableV1(1., name='otherwise')
 
       prediction = constant_op.constant(1.)
       return model_fn_lib.EstimatorSpec(
@@ -2684,7 +2684,7 @@ class EstimatorExportTest(test.TestCase):
 
     def _model_fn_scaffold(features, labels, mode):
       _, _ = features, labels
-      variables.Variable(1., name='weight')
+      variables.VariableV1(1., name='weight')
       self.mock_saver = get_mock_saver()
       scores = constant_op.constant([3.])
       return model_fn_lib.EstimatorSpec(
@@ -2717,7 +2717,7 @@ class EstimatorExportTest(test.TestCase):
 
     def _model_fn_scaffold(features, labels, mode):
       _, _ = features, labels
-      variables.Variable(1., name='weight')
+      variables.VariableV1(1., name='weight')
 
       scores = constant_op.constant([3.])
       if mode == model_fn_lib.ModeKeys.PREDICT:
@@ -2762,8 +2762,8 @@ class EstimatorExportTest(test.TestCase):
 
     def _model_fn_scaffold(features, labels, mode):
       _, _ = features, labels
-      my_int = variables.Variable(1, name='my_int',
-                                  collections=[ops.GraphKeys.LOCAL_VARIABLES])
+      my_int = variables.VariableV1(1, name='my_int',
+                                    collections=[ops.GraphKeys.LOCAL_VARIABLES])
       _ = training.get_or_create_steps_per_run_variable()
       scores = constant_op.constant([3.])
       with ops.control_dependencies([
@@ -2808,8 +2808,8 @@ class EstimatorExportTest(test.TestCase):
 
     def _model_fn_scaffold(features, labels, mode):
       _, _ = features, labels
-      my_int = variables.Variable(1, name='my_int',
-                                  collections=[ops.GraphKeys.LOCAL_VARIABLES])
+      my_int = variables.VariableV1(1, name='my_int',
+                                    collections=[ops.GraphKeys.LOCAL_VARIABLES])
       scores = constant_op.constant([3.])
       with ops.control_dependencies([
           variables.local_variables_initializer(),
@@ -3038,7 +3038,7 @@ class EstimatorExportTest(test.TestCase):
 
     def _model_fn(features, labels, mode):
       _, _ = features, labels
-      variables.Variable(1., name='weight')
+      variables.VariableV1(1., name='weight')
       return model_fn_lib.EstimatorSpec(
           mode,
           predictions=constant_op.constant(10.),
@@ -3081,7 +3081,7 @@ class EstimatorHookOrderingTest(test.TestCase):
       """A graph that generates NaN's for testing."""
       del features, labels
 
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, name='global_step')
       inc_global_step = state_ops.assign_add(global_step, 1)
       nan_const = constant_op.constant(np.nan, dtype=dtypes.float32)
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index f740e5cfaa..87f567db0e 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -113,7 +113,7 @@ class FunctionTest(test.TestCase):
       return a
 
     with ops.Graph().as_default():
-      var = variables.Variable([18.0])
+      var = variables.VariableV1([18.0])
       call = MyIdentityFunc(var._ref())  # pylint: disable=protected-access
       self.assertEqual("MyIdentity", call.op.name)
       for cfg in _OptimizerOptions():
diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py
index 2dafb94ba7..563a177dd0 100644
--- a/tensorflow/python/framework/graph_util_test.py
+++ b/tensorflow/python/framework/graph_util_test.py
@@ -104,13 +104,13 @@ class DeviceFunctionsTest(test.TestCase):
 
   def testNestedDeviceFunctions(self):
     with ops.Graph().as_default():
-      var_0 = variables.Variable(0)
+      var_0 = variables.VariableV1(0)
       with ops.device(test_device_func_pin_variable_to_cpu):
-        var_1 = variables.Variable(1)
+        var_1 = variables.VariableV1(1)
         with ops.device(lambda op: "/device:GPU:0"):
-          var_2 = variables.Variable(2)
+          var_2 = variables.VariableV1(2)
         with ops.device("/device:GPU:0"):  # Implicit merging device function.
-          var_3 = variables.Variable(3)
+          var_3 = variables.VariableV1(3)
 
     self.assertDeviceEqual(var_0.device, None)
     self.assertDeviceEqual(var_1.device, "/device:CPU:0")
diff --git a/tensorflow/python/framework/subscribe_test.py b/tensorflow/python/framework/subscribe_test.py
index 1d594e4078..cab426844d 100644
--- a/tensorflow/python/framework/subscribe_test.py
+++ b/tensorflow/python/framework/subscribe_test.py
@@ -212,8 +212,8 @@ class SubscribeTest(test_util.TensorFlowTestCase):
 
   def testSubscribeVariable(self):
     """Confirm that variables can be subscribed."""
-    v1 = variables.Variable(0.0)
-    v2 = variables.Variable(4.0)
+    v1 = variables.VariableV1(0.0)
+    v2 = variables.VariableV1(4.0)
     add = math_ops.add(v1, v2)
     assign_v1 = v1.assign(3.0)
 
diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py
index c40de9da0a..d3d96c646c 100644
--- a/tensorflow/python/grappler/item_test.py
+++ b/tensorflow/python/grappler/item_test.py
@@ -110,7 +110,7 @@ class ItemTest(test.TestCase):
   def testColocationContraints(self):
     with ops.Graph().as_default() as g:
       c = constant_op.constant([10])
-      v = variables.Variable([3], dtype=dtypes.int32)
+      v = variables.VariableV1([3], dtype=dtypes.int32)
       i = gen_array_ops.ref_identity(v)
       a = state_ops.assign(i, c)
       train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index b658edff2d..03b42f6453 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -39,8 +39,8 @@ class MemoryOptimizerSwapTest(test.TestCase):
 
   def testNoSwapping(self):
     """Make sure the graph is preserved when there is nothing to swap."""
-    a = variables.Variable(10, name='a')
-    b = variables.Variable(20, name='b')
+    a = variables.VariableV1(10, name='a')
+    b = variables.VariableV1(20, name='b')
     c = math_ops.add_n([a, b], name='c')
     d = math_ops.add_n([b, c], name='d')
     train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
@@ -60,8 +60,8 @@ class MemoryOptimizerSwapTest(test.TestCase):
 
   def testSimpleSwap(self):
     """Check that the swap annotations are followed."""
-    a = variables.Variable(10, name='a')
-    b = variables.Variable(20, name='b')
+    a = variables.VariableV1(10, name='a')
+    b = variables.VariableV1(20, name='b')
     c = math_ops.add_n([a, b], name='c')
     d = math_ops.add_n([b, c], name='d')
     train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
@@ -244,7 +244,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
         init_op_name=init_op_name,
         train_op_name=train_op_name,
         loss_op_name=loss_op_name)
-    self.assertAllClose(original_loss, memory_optimized_loss, rtol=1e-4)
+    self.assertAllClose(original_loss, memory_optimized_loss, rtol=1e-2)
 
   def _annotated_graph(self):
     graph = ops.Graph()
diff --git a/tensorflow/python/grappler/tf_optimizer_test.py b/tensorflow/python/grappler/tf_optimizer_test.py
index 5a9afe7257..eca0f67982 100644
--- a/tensorflow/python/grappler/tf_optimizer_test.py
+++ b/tensorflow/python/grappler/tf_optimizer_test.py
@@ -57,7 +57,7 @@ class PyWrapOptimizeGraphTest(test.TestCase):
   def testKeepNodes(self):
     g = ops.Graph()
     with g.as_default():
-      a1 = variables.Variable(
+      a1 = variables.VariableV1(
           1.0)  # Must be preserved since it's in the collection 'variables'.
       a2 = constant_op.constant(0, shape=[50, 50], name='keep')
       ops.add_to_collection('a2', a2)  # Explicitly add to collection.
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index cb19a412a2..e98b131ae6 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -1972,7 +1972,9 @@ def make_variable(name,
   if use_resource is None:
     use_resource = True
 
-  v = tf_variables.Variable(
+  # TODO(apassos,rohanj) figure out how to remove collections from here so we
+  # can remove the V1.
+  v = tf_variables.VariableV1(
       initial_value=init_val,
       name=name,
       trainable=trainable,
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 2fe85839d0..c5547b19be 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -1001,14 +1001,14 @@ class SliceAssignTest(test_util.TensorFlowTestCase):
         errors.FailedPreconditionError,
         "Attempting to use uninitialized value Variable"):
       with self.cached_session() as sess:
-        v = variables.Variable([1, 2])
+        v = variables.VariableV1([1, 2])
         sess.run(v[:].assign([1, 2]))
 
   def testTypeError(self):
     init_val = constant_op.constant([1, 2], dtype=dtypes.int32)
     too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8)
     too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64)
-    v = variables.Variable(init_val)
+    v = variables.VariableV1(init_val)
     with self.assertRaises(TypeError):
       v[:].assign(too_small_val)
     with self.assertRaises(TypeError):
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index fc4d2a3809..083de84775 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -130,7 +130,7 @@ class ControlFlowTest(test.TestCase):
 
   def testRefIdentity(self):
     with self.cached_session():
-      v = variables.Variable(7)
+      v = variables.VariableV1(7)
 
       v = control_flow_ops._Identity(v)
       op = state_ops.assign(v, 9)
@@ -142,7 +142,7 @@ class ControlFlowTest(test.TestCase):
 
   def testRefEnter(self):
     with self.cached_session():
-      v = variables.Variable(7)
+      v = variables.VariableV1(7)
 
       enter_v = control_flow_ops._Enter(v, "foo_1", is_constant=True)
       nine = constant_op.constant(9)
@@ -155,7 +155,7 @@ class ControlFlowTest(test.TestCase):
 
   def testRefSwitch(self):
     with self.cached_session():
-      v = variables.Variable(7)
+      v = variables.VariableV1(7)
 
       p = constant_op.constant(True)
       v1 = control_flow_ops._SwitchRefOrTensor(v._ref(), p)  # pylint: disable=protected-access
@@ -796,7 +796,7 @@ class ControlFlowTest(test.TestCase):
 
   def testWhileWithRefs_1(self):
     with self.cached_session() as sess:
-      x = variables.Variable(0)._ref()  # pylint: disable=protected-access
+      x = variables.VariableV1(0)._ref()  # pylint: disable=protected-access
       i = constant_op.constant(0)
       c = lambda i, x: math_ops.less(i, 100)
 
@@ -2317,7 +2317,7 @@ class ControlFlowTest(test.TestCase):
 
   def testWhileWithRefsWithGradients_1(self):
     with self.cached_session() as sess:
-      x = variables.Variable(0.)._ref()  # pylint: disable=protected-access
+      x = variables.VariableV1(0.)._ref()  # pylint: disable=protected-access
       i = constant_op.constant(0)
       c = lambda i, x: math_ops.less(i, 10)
 
@@ -2329,7 +2329,7 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(c, body, [i, x], parallel_iterations=5)
 
-      grad_ys = [variables.Variable(73)._ref()]  # pylint: disable=protected-access
+      grad_ys = [variables.VariableV1(73)._ref()]  # pylint: disable=protected-access
       grad = gradients_impl.gradients([r[1]], [x], grad_ys=grad_ys)
 
       variables.global_variables_initializer().run()
@@ -2779,7 +2779,7 @@ class ControlFlowTest(test.TestCase):
 
   def testWithOpsDependencies(self):
     with self.cached_session() as sess:
-      v = variables.Variable(0.0)
+      v = variables.VariableV1(0.0)
       c = constant_op.constant(10)
 
       # Fetching v directly will result in an uninitialized error
@@ -2802,7 +2802,7 @@ class ControlFlowTest(test.TestCase):
 
   def testWithTensorDependencies(self):
     with self.cached_session():
-      v = variables.Variable(0.0)
+      v = variables.VariableV1(0.0)
       c1 = constant_op.constant(10)
       c2 = constant_op.constant(20)
 
@@ -2828,7 +2828,7 @@ class ControlFlowTest(test.TestCase):
 
   def testWithIndexedSlicesDependencies(self):
     with self.cached_session():
-      v = variables.Variable(
+      v = variables.VariableV1(
           np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(np.float32))
       v_at_1 = ops.IndexedSlices(v, constant_op.constant([1]))
       gather_v_at_1 = array_ops.gather(v_at_1.values, v_at_1.indices)
@@ -2851,18 +2851,18 @@ class ControlFlowTest(test.TestCase):
     with ops.Graph().as_default():
       # device set on tensor => same device on dep.
       with ops.device("/job:ps"):
-        vd = variables.Variable([0.0])
+        vd = variables.VariableV1([0.0])
       with_vd_dep = control_flow_ops.with_dependencies([vd.initializer], vd)
       self.assertTrue("/job:ps" in with_vd_dep.device)
 
       # No device set on tensor => no device on dep.
-      vnod = variables.Variable([0.0])
+      vnod = variables.VariableV1([0.0])
       with_vnod_dep = control_flow_ops.with_dependencies([vnod.initializer],
                                                          vnod)
       self.assertDeviceEqual(None, with_vnod_dep.device)
 
       # device set on tensor, default device on graph => default device on dep.
-      vdef = variables.Variable([0.0], name="vdef")
+      vdef = variables.VariableV1([0.0], name="vdef")
       with ops.device("/job:worker/device:GPU:1"):
         with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer],
                                                            vdef)
@@ -2872,8 +2872,8 @@ class ControlFlowTest(test.TestCase):
 
   def testGroup(self):
     with self.cached_session() as sess:
-      v1 = variables.Variable([0.0])
-      v2 = variables.Variable([1.0])
+      v1 = variables.VariableV1([0.0])
+      v2 = variables.VariableV1([1.0])
 
       # Group init1 and init2 and run.
       init = control_flow_ops.group(v1.initializer, v2.initializer)
@@ -2955,29 +2955,29 @@ class ControlFlowTest(test.TestCase):
     p1 = array_ops.placeholder(dtypes.float32)
     p2 = array_ops.placeholder(dtypes.float32)
     p3 = array_ops.placeholder(dtypes.float32)
-    v1 = variables.Variable(p1, validate_shape=False)
-    v2 = variables.Variable(p2, validate_shape=False)
-    v3 = variables.Variable(p3, validate_shape=False)
+    v1 = variables.VariableV1(p1, validate_shape=False)
+    v2 = variables.VariableV1(p2, validate_shape=False)
+    v3 = variables.VariableV1(p3, validate_shape=False)
     self.assertIs(None, v1.get_shape().ndims)
     s = control_flow_ops.ref_select(index, [v1, v2, v3])
     self.assertIs(None, s.get_shape().ndims)
 
     # All inputs known but different.
-    v1 = variables.Variable([[1, 2]])
-    v2 = variables.Variable([[2], [1]])
+    v1 = variables.VariableV1([[1, 2]])
+    v2 = variables.VariableV1([[2], [1]])
     s = control_flow_ops.ref_select(index, [v1, v2])
     self.assertIs(None, s.get_shape().ndims)
 
     # All inputs known and same.
-    v1 = variables.Variable([[1, 2]])
-    v2 = variables.Variable([[1, 2]])
+    v1 = variables.VariableV1([[1, 2]])
+    v2 = variables.VariableV1([[1, 2]])
     s = control_flow_ops.ref_select(index, [v1, v2])
     self.assertEqual([1, 2], s.get_shape())
 
     # Possibly the same but not guaranteed.
-    v1 = variables.Variable([[1., 2.]])
+    v1 = variables.VariableV1([[1., 2.]])
     p2 = array_ops.placeholder(dtypes.float32, shape=[None, 2])
-    v2 = variables.Variable(p2, validate_shape=False)
+    v2 = variables.VariableV1(p2, validate_shape=False)
     s = control_flow_ops.ref_select(index, [v1, v2])
     self.assertEqual(None, s.get_shape())
 
@@ -3160,11 +3160,11 @@ class TupleTest(test.TestCase):
   def testTensors(self):
     for v1_first in [True, False]:
       with self.cached_session():
-        v1 = variables.Variable([1.0])
+        v1 = variables.VariableV1([1.0])
         add1 = math_ops.add(
             control_flow_ops.with_dependencies([v1.initializer], v1._ref()),  # pylint: disable=protected-access
             2.0)
-        v2 = variables.Variable([10.0])
+        v2 = variables.VariableV1([10.0])
         add2 = math_ops.add(
             control_flow_ops.with_dependencies([v2.initializer], v2._ref()),  # pylint: disable=protected-access
             20.0)
@@ -3190,14 +3190,14 @@ class TupleTest(test.TestCase):
   def testIndexedSlices(self):
     for v1_first in [True, False]:
       with self.cached_session():
-        v1 = variables.Variable(
+        v1 = variables.VariableV1(
             np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(
                 np.float32))
         v1_at_1 = ops.IndexedSlices(
             control_flow_ops.with_dependencies([v1.initializer], v1._ref()),  # pylint: disable=protected-access
             constant_op.constant([1]))
 
-        v2 = variables.Variable(
+        v2 = variables.VariableV1(
             np.array([[0.1, 1.1], [10.1, 11.1], [20.1, 21.1]]).astype(
                 np.float32))
         v2_at_1 = ops.IndexedSlices(
@@ -3229,7 +3229,7 @@ class TupleTest(test.TestCase):
 
   def testAcceptTensorsAsControlInputs(self):
     with self.cached_session():
-      var = variables.Variable(0)
+      var = variables.VariableV1(0)
       assign = state_ops.assign(var, 1)
       t, = control_flow_ops.tuple(
           [constant_op.constant(0)], control_inputs=[assign])
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_test.py b/tensorflow/python/kernel_tests/dense_update_ops_test.py
index 06c3271850..120e10314f 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_test.py
@@ -87,7 +87,7 @@ class AssignOpTest(test.TestCase):
   def testAssignNonStrictShapeChecking(self):
     with self.cached_session():
       data = array_ops.fill([1024, 1024], 0)
-      p = variables.Variable([1])
+      p = variables.VariableV1([1])
       a = state_ops.assign(p, data, validate_shape=False)
       a.op.run()
       self.assertAllEqual(p.eval(), data.eval())
@@ -100,14 +100,14 @@ class AssignOpTest(test.TestCase):
 
   def testInitRequiredAssignAdd(self):
     with self.cached_session():
-      p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32)
+      p = variables.VariableV1(array_ops.fill([1024, 1024], 1), dtypes.int32)
       a = state_ops.assign_add(p, array_ops.fill([1024, 1024], 0))
       with self.assertRaisesOpError("use uninitialized"):
         a.op.run()
 
   def testInitRequiredAssignSub(self):
     with self.cached_session():
-      p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32)
+      p = variables.VariableV1(array_ops.fill([1024, 1024], 1), dtypes.int32)
       a = state_ops.assign_sub(p, array_ops.fill([1024, 1024], 0))
       with self.assertRaisesOpError("use uninitialized"):
         a.op.run()
diff --git a/tensorflow/python/kernel_tests/identity_op_py_test.py b/tensorflow/python/kernel_tests/identity_op_py_test.py
index 37f9f716f8..88ea10c22a 100644
--- a/tensorflow/python/kernel_tests/identity_op_py_test.py
+++ b/tensorflow/python/kernel_tests/identity_op_py_test.py
@@ -61,7 +61,7 @@ class IdentityOpTest(test.TestCase):
   def testRefIdentityShape(self):
     with self.cached_session():
       shape = [2, 3]
-      tensor = variables.Variable(
+      tensor = variables.VariableV1(
           constant_op.constant(
               [[1, 2, 3], [6, 5, 4]], dtype=dtypes.int32))
       self.assertEquals(shape, tensor.get_shape())
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index f90545f84c..1365d4b240 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -290,7 +290,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     self.assertEqual(self.evaluate(read), [[2]])
 
   def testUseResource(self):
-    v = variables.Variable(1.0, use_resource=True)
+    v = variables.VariableV1(1.0, use_resource=True)
     self.assertTrue(isinstance(v, resource_variable_ops.ResourceVariable))
 
   def testEagerNoUseResource(self):
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index 86e063cb36..4b92309e4d 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -136,7 +136,7 @@ class StatefulScatterNdTest(test.TestCase):
         new = ref.copy()
         np_scatter(new, indices, updates)
         # Scatter via tensorflow
-        ref_var = variables.Variable(ref)
+        ref_var = variables.VariableV1(ref)
         ref_var.initializer.run()
         tf_scatter(ref_var, indices, updates).eval()
 
@@ -258,7 +258,7 @@ class StatefulScatterNdTest(test.TestCase):
       params = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32)
       updates = np.array([-3, -4, -5]).astype(np.float32)
       with self.test_session(use_gpu=False):
-        ref = variables.Variable(params)
+        ref = variables.VariableV1(params)
         ref.initializer.run()
 
         # Indices all in range, no problem.
diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py
index 1a0fa744ae..527b7daf10 100644
--- a/tensorflow/python/kernel_tests/scatter_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_ops_test.py
@@ -178,7 +178,7 @@ class ScatterTest(test.TestCase):
             np_scatter = _TF_OPS_TO_NUMPY[tf_scatter]
           np_scatter(new, indices, updates)
           # Scatter via tensorflow
-          ref = variables.Variable(old)
+          ref = variables.VariableV1(old)
           ref.initializer.run()
           tf_scatter(ref, indices, updates).eval()
           self.assertAllClose(ref.eval(), new)
@@ -294,7 +294,7 @@ class ScatterTest(test.TestCase):
       updates = np.array([-3, -4, -5]).astype(np.float32)
       if not test.is_gpu_available():
         with self.test_session(use_gpu=False):
-          ref = variables.Variable(params)
+          ref = variables.VariableV1(params)
           ref.initializer.run()
 
           # Indices all in range, no problem.
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 401e1ae102..33f464fb90 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -394,10 +394,10 @@ class VariableScopeTest(test.TestCase):
     old = variable_scope._DEFAULT_USE_RESOURCE
     try:
       variable_scope.enable_resource_variables()
-      self.assertTrue(isinstance(variables_lib.Variable(1.0),
+      self.assertTrue(isinstance(variables_lib.VariableV1(1.0),
                                  resource_variable_ops.ResourceVariable))
       variable_scope.disable_resource_variables()
-      self.assertFalse(isinstance(variables_lib.Variable(1.0),
+      self.assertFalse(isinstance(variables_lib.VariableV1(1.0),
                                   resource_variable_ops.ResourceVariable))
     finally:
       variable_scope._DEFAULT_USE_RESOURCE = old
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 2e7975667c..942ceedc8b 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -43,14 +43,14 @@ class VariablesTestCase(test.TestCase):
 
   def testInitialization(self):
     with self.cached_session():
-      var0 = variables.Variable(0.0)
+      var0 = variables.VariableV1(0.0)
       self.assertEqual("Variable:0", var0.name)
       self.assertEqual("Variable", var0._shared_name)
       self.assertEqual([], var0.get_shape())
       self.assertEqual([], var0.get_shape())
       self.assertEqual([], var0.shape)
 
-      var1 = variables.Variable(1.1)
+      var1 = variables.VariableV1(1.1)
       self.assertEqual("Variable_1:0", var1.name)
       self.assertEqual("Variable_1", var1._shared_name)
       self.assertEqual([], var1.get_shape())
@@ -143,7 +143,7 @@ class VariablesTestCase(test.TestCase):
 
   def testZeroSizeStringAssign(self):
     with self.cached_session() as sess:
-      array = variables.Variable(
+      array = variables.VariableV1(
           initial_value=array_ops.zeros((0,), dtype=dtypes.string),
           name="foo",
           trainable=False,
@@ -192,7 +192,7 @@ class VariablesTestCase(test.TestCase):
         # d get the control dep.
         d = constant_op.constant(2.0)
         # variables do not.
-        var_x = variables.Variable(2.0)
+        var_x = variables.VariableV1(2.0)
       self.assertEqual([c.op], d.op.control_inputs)
       self.assertEqual([], var_x.initializer.control_inputs)
       self.assertEqual([], var_x.value().op.control_inputs)
@@ -280,10 +280,10 @@ class VariablesTestCase(test.TestCase):
 
   def testCollections(self):
     with self.cached_session():
-      var_x = variables.Variable(2.0)
-      var_y = variables.Variable(2.0, trainable=False)
-      var_z = variables.Variable(2.0, trainable=True)
-      var_t = variables.Variable(
+      var_x = variables.VariableV1(2.0)
+      var_y = variables.VariableV1(2.0, trainable=False)
+      var_z = variables.VariableV1(2.0, trainable=True)
+      var_t = variables.VariableV1(
           2.0,
           trainable=True,
           collections=[
@@ -296,9 +296,9 @@ class VariablesTestCase(test.TestCase):
   def testCollectionsWithScope(self):
     with self.cached_session():
       with ops.name_scope("scope_1"):
-        var_x = variables.Variable(2.0)
+        var_x = variables.VariableV1(2.0)
       with ops.name_scope("scope_2"):
-        var_y = variables.Variable(2.0)
+        var_y = variables.VariableV1(2.0)
 
       self.assertEqual([var_x, var_y], variables.global_variables())
       self.assertEqual([var_x], variables.global_variables("scope_1"))
@@ -399,7 +399,7 @@ class VariablesTestCase(test.TestCase):
 
   def testColocation(self):
     with ops.device("/job:ps"):
-      var = variables.Variable(0, name="v")
+      var = variables.VariableV1(0, name="v")
     with ops.device("/job:worker/task:7"):
       assign_op = var.assign(1)
     self.assertDeviceEqual("/job:ps", assign_op.device)
@@ -522,7 +522,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose(np.ones((5, 5), np.float32), var.eval())
 
   def testRepr(self):
-    var = variables.Variable(np.zeros((5, 5), np.float32), name="noop")
+    var = variables.VariableV1(np.zeros((5, 5), np.float32), name="noop")
     self.assertEqual(
         "<tf.Variable 'noop:0' shape=(5, 5) dtype=float32_ref>",
         repr(var))
@@ -556,8 +556,8 @@ class IsInitializedTest(test.TestCase):
 
   def testVariableList(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
-      v = variables.Variable([1, 2], name="v")
-      w = variables.Variable([3, 4], name="w")
+      v = variables.VariableV1([1, 2], name="v")
+      w = variables.VariableV1([3, 4], name="w")
       uninited = variables.report_uninitialized_variables()
       self.assertAllEqual(np.array([b"v", b"w"]), sess.run(uninited))
       sess.run(w.initializer)
@@ -593,8 +593,8 @@ class ObsoleteIsInitializedTest(test.TestCase):
 
   def testVariables(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
-      v = variables.Variable([1, 2])
-      w = variables.Variable([3, 4])
+      v = variables.VariableV1([1, 2])
+      w = variables.VariableV1([3, 4])
       _ = v, w
       inited = variables.assert_variables_initialized()
       with self.assertRaisesOpError("Attempting to use uninitialized value"):
@@ -604,8 +604,8 @@ class ObsoleteIsInitializedTest(test.TestCase):
 
   def testVariableList(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
-      v = variables.Variable([1, 2])
-      w = variables.Variable([3, 4])
+      v = variables.VariableV1([1, 2])
+      w = variables.VariableV1([3, 4])
       inited = variables.assert_variables_initialized([v])
       with self.assertRaisesOpError("Attempting to use uninitialized value"):
         inited.op.run()
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 4f6e5dc473..3c9b7a01c7 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -273,7 +273,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
   def testVariableRefGradient(self):
     with ops.Graph().as_default():
       init = constant_op.constant(100.0)
-      var = variables.Variable(init)
+      var = variables.VariableV1(init)
       gradient = gradients.gradients(var._ref(), var)
       self.assertIsNotNone(gradient)
 
diff --git a/tensorflow/python/ops/matmul_benchmark.py b/tensorflow/python/ops/matmul_benchmark.py
index 6e5fe74290..138149e63d 100644
--- a/tensorflow/python/ops/matmul_benchmark.py
+++ b/tensorflow/python/ops/matmul_benchmark.py
@@ -49,13 +49,13 @@ def build_graph(device, n, m, k, transpose_a, transpose_b, dtype):
   """
   with ops.device('%s' % device):
     if not transpose_a:
-      x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype))
+      x = variables.VariableV1(random_ops.random_uniform([n, m], dtype=dtype))
     else:
-      x = variables.Variable(random_ops.random_uniform([m, n], dtype=dtype))
+      x = variables.VariableV1(random_ops.random_uniform([m, n], dtype=dtype))
     if not transpose_b:
-      y = variables.Variable(random_ops.random_uniform([m, k], dtype=dtype))
+      y = variables.VariableV1(random_ops.random_uniform([m, k], dtype=dtype))
     else:
-      y = variables.Variable(random_ops.random_uniform([k, m], dtype=dtype))
+      y = variables.VariableV1(random_ops.random_uniform([k, m], dtype=dtype))
 
     z = math_ops.matmul(x, y, transpose_a=transpose_a, transpose_b=transpose_b)
     return control_flow_ops.group(z)
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 562e1ad6cb..af5c7d4050 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -198,7 +198,7 @@ VariableSynchronization = variables.VariableSynchronization  # pylint: disable=i
 VariableAggregation = variables.VariableAggregation  # pylint: disable=invalid-name
 
 AUTO_REUSE = _ReuseMode.AUTO_REUSE
-tf_export("AUTO_REUSE").export_constant(__name__, "AUTO_REUSE")
+tf_export(v1=["AUTO_REUSE"]).export_constant(__name__, "AUTO_REUSE")
 AUTO_REUSE.__doc__ = """
 When passed in as the value for the `reuse` flag, AUTO_REUSE indicates that
 get_variable() should create the requested variable if it doesn't exist or, if
@@ -908,7 +908,7 @@ class _VariableStore(object):
     if use_resource is None:
       # Set the default value if unspecified.
       use_resource = _DEFAULT_USE_RESOURCE
-    v = variable(
+    v = variables.VariableV1(
         initial_value=init_val,
         name=name,
         trainable=trainable,
@@ -994,7 +994,7 @@ def no_regularizer(_):
 
 
 # TODO(alive): support caching devices and partitioned variables in Eager mode.
-@tf_export("VariableScope")
+@tf_export(v1=["VariableScope"])
 class VariableScope(object):
   """Variable scope object to carry defaults to provide to `get_variable`.
 
@@ -1342,7 +1342,7 @@ def get_variable_scope_store():
   return scope_store
 
 
-@tf_export("get_variable_scope")
+@tf_export(v1=["get_variable_scope"])
 def get_variable_scope():
   """Returns the current variable scope."""
   return get_variable_scope_store().current_scope
@@ -1451,7 +1451,7 @@ class EagerVariableStore(object):
 # The argument list for get_variable must match arguments to get_local_variable.
 # So, if you are updating the arguments, also update arguments to
 # get_local_variable below.
-@tf_export("get_variable")
+@tf_export(v1=["get_variable"])
 def get_variable(name,
                  shape=None,
                  dtype=None,
@@ -1596,7 +1596,7 @@ get_variable.__doc__ = get_variable_or_local_docstring % (
 
 # The argument list for get_local_variable must match arguments to get_variable.
 # So, if you are updating the arguments, also update arguments to get_variable.
-@tf_export("get_local_variable")
+@tf_export(v1=["get_local_variable"])
 def get_local_variable(  # pylint: disable=missing-docstring
     name,
     shape=None,
@@ -1941,7 +1941,7 @@ def _get_unique_variable_scope(prefix):
 # Named like a function for backwards compatibility with the
 # @tf_contextlib.contextmanager version, which was switched to a class to avoid
 # some object creation overhead.
-@tf_export("variable_scope")  # pylint: disable=invalid-name
+@tf_export(v1=["variable_scope"])  # pylint: disable=invalid-name
 class variable_scope(object):
   """A context manager for defining ops that creates variables (layers).
 
@@ -2322,7 +2322,7 @@ class variable_scope(object):
 
 
 # pylint: disable=g-doc-return-or-yield
-@tf_export("variable_op_scope")
+@tf_export(v1=["variable_op_scope"])
 @tf_contextlib.contextmanager
 def variable_op_scope(values,
                       name_or_scope,
@@ -2443,7 +2443,33 @@ def default_variable_creator(next_creator=None, **kwargs):
         expected_shape=expected_shape, import_scope=import_scope)
 
 
+def default_variable_creator_v2(next_creator=None, **kwargs):
+  """Default variable creator."""
+  assert next_creator is None
+  initial_value = kwargs.get("initial_value", None)
+  trainable = kwargs.get("trainable", None)
+  validate_shape = kwargs.get("validate_shape", True)
+  caching_device = kwargs.get("caching_device", None)
+  name = kwargs.get("name", None)
+  variable_def = kwargs.get("variable_def", None)
+  dtype = kwargs.get("dtype", None)
+  import_scope = kwargs.get("import_scope", None)
+  constraint = kwargs.get("constraint", None)
+
+  # Set trainable value based on synchronization value.
+  synchronization = kwargs.get("synchronization", VariableSynchronization.AUTO)
+  trainable = _get_trainable_value(
+      synchronization=synchronization, trainable=trainable)
+
+  return resource_variable_ops.ResourceVariable(
+      initial_value=initial_value, trainable=trainable,
+      validate_shape=validate_shape, caching_device=caching_device,
+      name=name, dtype=dtype, constraint=constraint, variable_def=variable_def,
+      import_scope=import_scope)
+
+
 variables.default_variable_creator = default_variable_creator
+variables.default_variable_creator_v2 = default_variable_creator_v2
 
 
 def _make_getter(captured_getter, captured_previous):
@@ -2452,11 +2478,12 @@ def _make_getter(captured_getter, captured_previous):
 
 
 # TODO(apassos) remove forwarding symbol
-variable = variables.Variable
+variable = variables.VariableV1
 
 
+@tf_export(v1=["variable_creator_scope"])
 @tf_contextlib.contextmanager
-def variable_creator_scope(variable_creator):
+def variable_creator_scope_v1(variable_creator):
   """Scope which defines a variable creation function to be used by variable().
 
   variable_creator is expected to be a function with the following signature:
@@ -2527,3 +2554,73 @@ def variable_creator_scope(variable_creator):
   """
   with ops.get_default_graph()._variable_creator_scope(variable_creator):  # pylint: disable=protected-access
     yield
+
+
+# Note: only the docstrings differ between this and v1.
+@tf_export(v2=["variable_creator_scope"])
+@tf_contextlib.contextmanager
+def variable_creator_scope(variable_creator):
+  """Scope which defines a variable creation function to be used by variable().
+
+  variable_creator is expected to be a function with the following signature:
+
+  ```
+    def variable_creator(next_creator, **kwargs)
+  ```
+
+  The creator is supposed to eventually call the next_creator to create a
+  variable if it does want to create a variable and not call Variable or
+  ResourceVariable directly. This helps make creators composable. A creator may
+  choose to create multiple variables, return already existing variables, or
+  simply register that a variable was created and defer to the next creators in
+  line. Creators can also modify the keyword arguments seen by the next
+  creators.
+
+  Custom getters in the variable scope will eventually resolve down to these
+  custom creators when they do create variables.
+
+  The valid keyword arguments in kwds are:
+      initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
+        which is the initial value for the Variable. The initial value must have
+        a shape specified unless `validate_shape` is set to False. Can also be a
+        callable with no argument that returns the initial value when called. In
+        that case, `dtype` must be specified. (Note that initializer functions
+        from init_ops.py must first be bound to a shape before being used here.)
+      trainable: If `True`, the default, GradientTapes automatically watch
+        uses of this Variable.
+      validate_shape: If `False`, allows the variable to be initialized with a
+        value of unknown shape. If `True`, the default, the shape of
+        `initial_value` must be known.
+      caching_device: Optional device string describing where the Variable
+        should be cached for reading.  Defaults to the Variable's device.
+        If not `None`, caches on another device.  Typical use is to cache
+        on the device where the Ops using the Variable reside, to deduplicate
+        copying through `Switch` and other conditional statements.
+      name: Optional name for the variable. Defaults to `'Variable'` and gets
+        uniquified automatically.
+      dtype: If set, initial_value will be converted to the given type.
+        If `None`, either the datatype will be kept (if `initial_value` is
+        a Tensor), or `convert_to_tensor` will decide.
+      constraint: A constraint function to be applied to the variable after
+        updates by some algorithms.
+      synchronization: Indicates when a distributed a variable will be
+        aggregated. Accepted values are constants defined in the class
+        `tf.VariableSynchronization`. By default the synchronization is set to
+        `AUTO` and the current `DistributionStrategy` chooses
+        when to synchronize. If `synchronization` is set to `ON_READ`,
+        `trainable` must not be set to `True`.
+      aggregation: Indicates how a distributed variable will be aggregated.
+        Accepted values are constants defined in the class
+        `tf.VariableAggregation`.
+
+  This set may grow over time, so it's important the signature of creators is as
+  mentioned above.
+
+  Args:
+    variable_creator: the passed creator
+
+  Yields:
+    A scope in which the creator is active
+  """
+  with ops.get_default_graph()._variable_creator_scope(variable_creator):  # pylint: disable=protected-access
+    yield
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 7a46157739..8da1e9fe56 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -46,6 +46,11 @@ def default_variable_creator(_, **kwds):
   raise NotImplementedError("variable_scope needs to be imported")
 
 
+def default_variable_creator_v2(_, **kwds):
+  del kwds
+  raise NotImplementedError("variable_scope needs to be imported")
+
+
 def _make_getter(captured_getter, captured_previous):
   """To avoid capturing loop variables."""
   def getter(**kwargs):
@@ -101,21 +106,21 @@ class VariableAggregation(enum.Enum):
 class VariableMetaclass(type):
   """Metaclass to allow construction of tf.Variable to be overridden."""
 
-  def _variable_call(cls,
-                     initial_value=None,
-                     trainable=None,
-                     collections=None,
-                     validate_shape=True,
-                     caching_device=None,
-                     name=None,
-                     variable_def=None,
-                     dtype=None,
-                     expected_shape=None,
-                     import_scope=None,
-                     constraint=None,
-                     use_resource=None,
-                     synchronization=VariableSynchronization.AUTO,
-                     aggregation=VariableAggregation.NONE):
+  def _variable_v1_call(cls,
+                        initial_value=None,
+                        trainable=None,
+                        collections=None,
+                        validate_shape=True,
+                        caching_device=None,
+                        name=None,
+                        variable_def=None,
+                        dtype=None,
+                        expected_shape=None,
+                        import_scope=None,
+                        constraint=None,
+                        use_resource=None,
+                        synchronization=VariableSynchronization.AUTO,
+                        aggregation=VariableAggregation.NONE):
     """Call on Variable class. Useful to force the signature."""
     previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
     for getter in ops.get_default_graph()._variable_creator_stack:  # pylint: disable=protected-access
@@ -140,14 +145,49 @@ class VariableMetaclass(type):
         synchronization=synchronization,
         aggregation=aggregation)
 
+  def _variable_v2_call(cls,
+                        initial_value=None,
+                        trainable=None,
+                        validate_shape=True,
+                        caching_device=None,
+                        name=None,
+                        variable_def=None,
+                        dtype=None,
+                        import_scope=None,
+                        constraint=None,
+                        synchronization=VariableSynchronization.AUTO,
+                        aggregation=VariableAggregation.NONE):
+    """Call on Variable class. Useful to force the signature."""
+    previous_getter = lambda **kws: default_variable_creator_v2(None, **kws)
+    for getter in ops.get_default_graph()._variable_creator_stack:  # pylint: disable=protected-access
+      previous_getter = _make_getter(getter, previous_getter)
+
+    # Reset `aggregation` that is explicitly set as `None` to the enum NONE.
+    if aggregation is None:
+      aggregation = VariableAggregation.NONE
+    return previous_getter(
+        initial_value=initial_value,
+        trainable=trainable,
+        validate_shape=validate_shape,
+        caching_device=caching_device,
+        name=name,
+        variable_def=variable_def,
+        dtype=dtype,
+        import_scope=import_scope,
+        constraint=constraint,
+        synchronization=synchronization,
+        aggregation=aggregation)
+
   def __call__(cls, *args, **kwargs):
-    if cls is Variable:
-      return cls._variable_call(*args, **kwargs)
+    if cls is VariableV1:
+      return cls._variable_v1_call(*args, **kwargs)
+    elif cls is Variable:
+      return cls._variable_v2_call(*args, **kwargs)
     else:
       return super(VariableMetaclass, cls).__call__(*args, **kwargs)
 
 
-@tf_export("Variable")
+@tf_export(v2=["Variable"])
 class Variable(six.with_metaclass(VariableMetaclass,
                                   checkpointable.CheckpointableBase)):
   """See the [Variables Guide](https://tensorflow.org/guide/variables).
@@ -267,16 +307,13 @@ class Variable(six.with_metaclass(VariableMetaclass,
   def __init__(self,
                initial_value=None,
                trainable=True,
-               collections=None,
                validate_shape=True,
                caching_device=None,
                name=None,
                variable_def=None,
                dtype=None,
-               expected_shape=None,
                import_scope=None,
                constraint=None,
-               use_resource=None,
                synchronization=VariableSynchronization.AUTO,
                aggregation=VariableAggregation.NONE):
     """Creates a new variable with value `initial_value`.
@@ -297,11 +334,8 @@ class Variable(six.with_metaclass(VariableMetaclass,
         callable with no argument that returns the initial value when called. In
         that case, `dtype` must be specified. (Note that initializer functions
         from init_ops.py must first be bound to a shape before being used here.)
-      trainable: If `True`, the default, also adds the variable to the graph
-        collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as
-        the default list of variables to use by the `Optimizer` classes.
-      collections: List of graph collections keys. The new variable is added to
-        these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
+      trainable: If `True`, the default, GradientTapes automatically watch uses
+        of this variable.
       validate_shape: If `False`, allows the variable to be initialized with a
         value of unknown shape. If `True`, the default, the shape of
         `initial_value` must be known.
@@ -319,8 +353,6 @@ class Variable(six.with_metaclass(VariableMetaclass,
       dtype: If set, initial_value will be converted to the given type.
         If `None`, either the datatype will be kept (if `initial_value` is
         a Tensor), or `convert_to_tensor` will decide.
-      expected_shape: A TensorShape. If set, initial_value is expected
-        to have this shape.
       import_scope: Optional `string`. Name scope to add to the
         `Variable.` Only used when initializing from protocol buffer.
       constraint: An optional projection function to be applied to the variable
@@ -330,9 +362,6 @@ class Variable(six.with_metaclass(VariableMetaclass,
         variable and return the Tensor for the projected value
         (which must have the same shape). Constraints are not safe to
         use when doing asynchronous distributed training.
-      use_resource: if True, a ResourceVariable is created; otherwise an
-       old-style ref-based variable is created. When eager execution is enabled
-       a resource variable is always created.
       synchronization: Indicates when a distributed a variable will be
         aggregated. Accepted values are constants defined in the class
         `tf.VariableSynchronization`. By default the synchronization is set to
@@ -1009,11 +1038,207 @@ class Variable(six.with_metaclass(VariableMetaclass,
     raise NotImplementedError
 
 
+@tf_export(v1=["Variable"])
+class VariableV1(Variable):
+  """See the [Variables Guide](https://tensorflow.org/guide/variables).
+
+  A variable maintains state in the graph across calls to `run()`. You add a
+  variable to the graph by constructing an instance of the class `Variable`.
+
+  The `Variable()` constructor requires an initial value for the variable,
+  which can be a `Tensor` of any type and shape. The initial value defines the
+  type and shape of the variable. After construction, the type and shape of
+  the variable are fixed. The value can be changed using one of the assign
+  methods.
+
+  If you want to change the shape of a variable later you have to use an
+  `assign` Op with `validate_shape=False`.
+
+  Just like any `Tensor`, variables created with `Variable()` can be used as
+  inputs for other Ops in the graph. Additionally, all the operators
+  overloaded for the `Tensor` class are carried over to variables, so you can
+  also add nodes to the graph by just doing arithmetic on variables.
+
+  ```python
+  import tensorflow as tf
+
+  # Create a variable.
+  w = tf.Variable(<initial-value>, name=<optional-name>)
+
+  # Use the variable in the graph like any Tensor.
+  y = tf.matmul(w, ...another variable or tensor...)
+
+  # The overloaded operators are available too.
+  z = tf.sigmoid(w + y)
+
+  # Assign a new value to the variable with `assign()` or a related method.
+  w.assign(w + 1.0)
+  w.assign_add(1.0)
+  ```
+
+  When you launch the graph, variables have to be explicitly initialized before
+  you can run Ops that use their value. You can initialize a variable by
+  running its *initializer op*, restoring the variable from a save file, or
+  simply running an `assign` Op that assigns a value to the variable. In fact,
+  the variable *initializer op* is just an `assign` Op that assigns the
+  variable's initial value to the variable itself.
+
+  ```python
+  # Launch the graph in a session.
+  with tf.Session() as sess:
+      # Run the variable initializer.
+      sess.run(w.initializer)
+      # ...you now can run ops that use the value of 'w'...
+  ```
+
+  The most common initialization pattern is to use the convenience function
+  `global_variables_initializer()` to add an Op to the graph that initializes
+  all the variables. You then run that Op after launching the graph.
+
+  ```python
+  # Add an Op to initialize global variables.
+  init_op = tf.global_variables_initializer()
+
+  # Launch the graph in a session.
+  with tf.Session() as sess:
+      # Run the Op that initializes global variables.
+      sess.run(init_op)
+      # ...you can now run any Op that uses variable values...
+  ```
+
+  If you need to create a variable with an initial value dependent on another
+  variable, use the other variable's `initialized_value()`. This ensures that
+  variables are initialized in the right order.
+
+  All variables are automatically collected in the graph where they are
+  created. By default, the constructor adds the new variable to the graph
+  collection `GraphKeys.GLOBAL_VARIABLES`. The convenience function
+  `global_variables()` returns the contents of that collection.
+
+  When building a machine learning model it is often convenient to distinguish
+  between variables holding the trainable model parameters and other variables
+  such as a `global step` variable used to count training steps. To make this
+  easier, the variable constructor supports a `trainable=<bool>` parameter. If
+  `True`, the new variable is also added to the graph collection
+  `GraphKeys.TRAINABLE_VARIABLES`. The convenience function
+  `trainable_variables()` returns the contents of this collection. The
+  various `Optimizer` classes use this collection as the default list of
+  variables to optimize.
+
+  WARNING: tf.Variable objects by default have a non-intuitive memory model. A
+  Variable is represented internally as a mutable Tensor which can
+  non-deterministically alias other Tensors in a graph. The set of operations
+  which consume a Variable and can lead to aliasing is undetermined and can
+  change across TensorFlow versions. Avoid writing code which relies on the
+  value of a Variable either changing or not changing as other operations
+  happen. For example, using Variable objects or simple functions thereof as
+  predicates in a `tf.cond` is dangerous and error-prone:
+
+  ```
+  v = tf.Variable(True)
+  tf.cond(v, lambda: v.assign(False), my_false_fn)  # Note: this is broken.
+  ```
+
+  Here replacing adding `use_resource=True` when constructing the variable will
+  fix any nondeterminism issues:
+  ```
+  v = tf.Variable(True, use_resource=True)
+  tf.cond(v, lambda: v.assign(False), my_false_fn)
+  ```
+
+  To use the replacement for variables which does
+  not have these issues:
+
+  * Add `use_resource=True` when constructing `tf.Variable`;
+  * Call `tf.get_variable_scope().set_use_resource(True)` inside a
+    `tf.variable_scope` before the `tf.get_variable()` call.
+  """
+
+  def __init__(self,  # pylint: disable=super-init-not-called
+               initial_value=None,
+               trainable=True,
+               collections=None,
+               validate_shape=True,
+               caching_device=None,
+               name=None,
+               variable_def=None,
+               dtype=None,
+               expected_shape=None,
+               import_scope=None,
+               constraint=None,
+               use_resource=None,
+               synchronization=VariableSynchronization.AUTO,
+               aggregation=VariableAggregation.NONE):
+    """Creates a new variable with value `initial_value`.
+
+    The new variable is added to the graph collections listed in `collections`,
+    which defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
+
+    If `trainable` is `True` the variable is also added to the graph collection
+    `GraphKeys.TRAINABLE_VARIABLES`.
+
+    This constructor creates both a `variable` Op and an `assign` Op to set the
+    variable to its initial value.
+
+    Args:
+      initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
+        which is the initial value for the Variable. The initial value must have
+        a shape specified unless `validate_shape` is set to False. Can also be a
+        callable with no argument that returns the initial value when called. In
+        that case, `dtype` must be specified. (Note that initializer functions
+        from init_ops.py must first be bound to a shape before being used here.)
+      trainable: If `True`, the default, also adds the variable to the graph
+        collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as
+        the default list of variables to use by the `Optimizer` classes.
+      collections: List of graph collections keys. The new variable is added to
+        these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
+      validate_shape: If `False`, allows the variable to be initialized with a
+        value of unknown shape. If `True`, the default, the shape of
+        `initial_value` must be known.
+      caching_device: Optional device string describing where the Variable
+        should be cached for reading.  Defaults to the Variable's device.
+        If not `None`, caches on another device.  Typical use is to cache
+        on the device where the Ops using the Variable reside, to deduplicate
+        copying through `Switch` and other conditional statements.
+      name: Optional name for the variable. Defaults to `'Variable'` and gets
+        uniquified automatically.
+      variable_def: `VariableDef` protocol buffer. If not `None`, recreates
+        the Variable object with its contents, referencing the variable's nodes
+        in the graph, which must already exist. The graph is not changed.
+        `variable_def` and the other arguments are mutually exclusive.
+      dtype: If set, initial_value will be converted to the given type.
+        If `None`, either the datatype will be kept (if `initial_value` is
+        a Tensor), or `convert_to_tensor` will decide.
+      expected_shape: A TensorShape. If set, initial_value is expected
+        to have this shape.
+      import_scope: Optional `string`. Name scope to add to the
+        `Variable.` Only used when initializing from protocol buffer.
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
+      use_resource: whether to use resource variables.
+      synchronization: unused
+      aggregation: unused
+
+    Raises:
+      ValueError: If both `variable_def` and initial_value are specified.
+      ValueError: If the initial value is not specified, or does not have a
+        shape and `validate_shape` is `True`.
+      RuntimeError: If eager execution is enabled.
+    """
+
+  SaveSliceInfo = Variable.SaveSliceInfo
+
+
 # TODO(apassos): do not repeat all comments here
-class RefVariable(Variable):
+class RefVariable(VariableV1):
   """Ref-based implementation of variables."""
 
-  def __init__(self,
+  def __init__(self,  # pylint: disable=super-init-not-called
                initial_value=None,
                trainable=True,
                collections=None,
@@ -1873,7 +2098,7 @@ class RefVariable(Variable):
   def _OverloadAllOperators():  # pylint: disable=invalid-name
     """Register overloads for all operators."""
     for operator in ops.Tensor.OVERLOADABLE_OPERATORS:
-      Variable._OverloadOperator(operator)
+      Variable._OverloadOperator(operator)  # pylint: disable=protected-access
     # For slicing, bind getitem differently than a tensor (use SliceHelperVar
     # instead)
     # pylint: disable=protected-access
@@ -2401,7 +2626,7 @@ class PartitionedVariable(object):
         "assign() has not been implemented for PartitionedVariable.")
 
 
-@tf_export("global_variables")
+@tf_export(v1=["global_variables"])
 def global_variables(scope=None):
   """Returns global variables.
 
@@ -2427,7 +2652,7 @@ def global_variables(scope=None):
   return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES, scope)
 
 
-@tf_export("all_variables")
+@tf_export(v1=["all_variables"])
 @deprecated("2017-03-02", "Please use tf.global_variables instead.")
 def all_variables():
   """See `tf.global_variables`."""
@@ -2452,7 +2677,7 @@ def _all_saveable_objects(scope=None):
           ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS, scope))
 
 
-@tf_export("local_variables")
+@tf_export(v1=["local_variables"])
 def local_variables(scope=None):
   """Returns local variables.
 
@@ -2480,7 +2705,7 @@ def local_variables(scope=None):
   return ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES, scope)
 
 
-@tf_export("model_variables")
+@tf_export(v1=["model_variables"])
 def model_variables(scope=None):
   """Returns all variables in the MODEL_VARIABLES collection.
 
@@ -2497,7 +2722,7 @@ def model_variables(scope=None):
   return ops.get_collection(ops.GraphKeys.MODEL_VARIABLES, scope)
 
 
-@tf_export("trainable_variables")
+@tf_export(v1=["trainable_variables"])
 def trainable_variables(scope=None):
   """Returns all variables created with `trainable=True`.
 
@@ -2519,7 +2744,7 @@ def trainable_variables(scope=None):
   return ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope)
 
 
-@tf_export("moving_average_variables")
+@tf_export(v1=["moving_average_variables"])
 def moving_average_variables(scope=None):
   """Returns all variables that maintain their moving averages.
 
@@ -2541,7 +2766,7 @@ def moving_average_variables(scope=None):
   return ops.get_collection(ops.GraphKeys.MOVING_AVERAGE_VARIABLES, scope)
 
 
-@tf_export("initializers.variables", "variables_initializer")
+@tf_export(v1=["initializers.variables", "variables_initializer"])
 def variables_initializer(var_list, name="init"):
   """Returns an Op that initializes a list of variables.
 
@@ -2567,7 +2792,7 @@ def variables_initializer(var_list, name="init"):
   return control_flow_ops.no_op(name=name)
 
 
-@tf_export("initialize_variables")
+@tf_export(v1=["initialize_variables"])
 @tf_should_use.should_use_result
 @deprecated("2017-03-02", "Use `tf.variables_initializer` instead.")
 def initialize_variables(var_list, name="init"):
@@ -2575,7 +2800,7 @@ def initialize_variables(var_list, name="init"):
   return variables_initializer(var_list, name=name)
 
 
-@tf_export("initializers.global_variables", "global_variables_initializer")
+@tf_export(v1=["initializers.global_variables", "global_variables_initializer"])
 def global_variables_initializer():
   """Returns an Op that initializes global variables.
 
@@ -2589,7 +2814,7 @@ def global_variables_initializer():
   return variables_initializer(global_variables())
 
 
-@tf_export("initialize_all_variables")
+@tf_export(v1=["initialize_all_variables"])
 @tf_should_use.should_use_result
 @deprecated("2017-03-02", "Use `tf.global_variables_initializer` instead.")
 def initialize_all_variables():
@@ -2597,7 +2822,7 @@ def initialize_all_variables():
   return global_variables_initializer()
 
 
-@tf_export("initializers.local_variables", "local_variables_initializer")
+@tf_export(v1=["initializers.local_variables", "local_variables_initializer"])
 def local_variables_initializer():
   """Returns an Op that initializes all local variables.
 
@@ -2611,7 +2836,7 @@ def local_variables_initializer():
   return variables_initializer(local_variables())
 
 
-@tf_export("initialize_local_variables")
+@tf_export(v1=["initialize_local_variables"])
 @tf_should_use.should_use_result
 @deprecated("2017-03-02", "Use `tf.local_variables_initializer` instead.")
 def initialize_local_variables():
@@ -2619,7 +2844,7 @@ def initialize_local_variables():
   return local_variables_initializer()
 
 
-@tf_export("is_variable_initialized")
+@tf_export(v1=["is_variable_initialized"])
 @tf_should_use.should_use_result
 def is_variable_initialized(variable):
   """Tests if a variable has been initialized.
@@ -2634,7 +2859,7 @@ def is_variable_initialized(variable):
   return state_ops.is_variable_initialized(variable)
 
 
-@tf_export("assert_variables_initialized")
+@tf_export(v1=["assert_variables_initialized"])
 @tf_should_use.should_use_result
 def assert_variables_initialized(var_list=None):
   """Returns an Op to check if variables are initialized.
@@ -2677,7 +2902,7 @@ def assert_variables_initialized(var_list=None):
       return array_ops.stack(ranks)
 
 
-@tf_export("report_uninitialized_variables")
+@tf_export(v1=["report_uninitialized_variables"])
 @tf_should_use.should_use_result
 def report_uninitialized_variables(var_list=None,
                                    name="report_uninitialized_variables"):
diff --git a/tensorflow/python/saved_model/loader_test.py b/tensorflow/python/saved_model/loader_test.py
index b7e217a35b..924b2e7c06 100644
--- a/tensorflow/python/saved_model/loader_test.py
+++ b/tensorflow/python/saved_model/loader_test.py
@@ -47,8 +47,8 @@ class SavedModelLoaderTest(test.TestCase):
   def setUp(self):
     """Write test SavedModels to a temp directory."""
     with session.Session(graph=ops.Graph()) as sess:
-      x = variables.Variable(5, name="x")
-      y = variables.Variable(11, name="y")
+      x = variables.VariableV1(5, name="x")
+      y = variables.VariableV1(11, name="y")
       z = x + y
       sess.run(variables.global_variables_initializer())
 
@@ -134,8 +134,8 @@ class SavedModelLoaderTest(test.TestCase):
   def test_restore_variables(self):
     loader = loader_impl.SavedModelLoader(SAVED_MODEL_WITH_MAIN_OP)
     with self.session(graph=ops.Graph()) as sess:
-      x = variables.Variable(0, name="x")
-      y = variables.Variable(0, name="y")
+      x = variables.VariableV1(0, name="x")
+      y = variables.VariableV1(0, name="y")
       z = x * y
 
       sess.run(variables.global_variables_initializer())
@@ -186,8 +186,10 @@ class SavedModelLoaderTest(test.TestCase):
     """
     path = _get_export_dir("no_variable_saved_model")
     with session.Session(graph=ops.Graph()) as sess:
-      x = variables.Variable(5, name="x", collections=["not_global_variable"])
-      y = variables.Variable(11, name="y", collections=["not_global_variable"])
+      x = variables.VariableV1(
+          5, name="x", collections=["not_global_variable"])
+      y = variables.VariableV1(
+          11, name="y", collections=["not_global_variable"])
       self.assertFalse(variables._all_saveable_objects())
       z = x + y
       sess.run(variables.variables_initializer([x, y]))
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index 49d52d3bee..80b75b7ee6 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -60,7 +60,7 @@ class SavedModelTest(test.TestCase):
     return os.path.join(test.get_temp_dir(), label)
 
   def _init_and_validate_variable(self, sess, variable_name, variable_value):
-    v = variables.Variable(variable_value, name=variable_name)
+    v = variables.VariableV1(variable_value, name=variable_name)
     sess.run(variables.global_variables_initializer())
     self.assertEqual(variable_value, v.eval())
 
@@ -458,7 +458,7 @@ class SavedModelTest(test.TestCase):
     # Graph with a single variable added to a collection. SavedModel invoked to:
     # - add with weights.
     with self.session(graph=ops.Graph()) as sess:
-      v = variables.Variable(42, name="v")
+      v = variables.VariableV1(42, name="v")
       ops.add_to_collection("foo_vars", v)
       sess.run(variables.global_variables_initializer())
       self.assertEqual(42, v.eval())
@@ -468,7 +468,7 @@ class SavedModelTest(test.TestCase):
     # SavedModel invoked to:
     # - simply add the model (weights are not updated).
     with self.session(graph=ops.Graph()) as sess:
-      v = variables.Variable(43, name="v")
+      v = variables.VariableV1(43, name="v")
       ops.add_to_collection("bar_vars", v)
       sess.run(variables.global_variables_initializer())
       self.assertEqual(43, v.eval())
@@ -780,13 +780,13 @@ class SavedModelTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       # Add `v1` and `v2` variables to the graph.
-      v1 = variables.Variable(1, name="v1")
+      v1 = variables.VariableV1(1, name="v1")
       ops.add_to_collection("v", v1)
-      v2 = variables.Variable(2, name="v2")
+      v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
       # Initialize another variable `v3` to 42.
-      v3 = variables.Variable(42, name="v3")
+      v3 = variables.VariableV1(42, name="v3")
       ops.add_to_collection("v", v3)
 
       # Set up an assignment op to be run as part of the main_op.
@@ -815,13 +815,13 @@ class SavedModelTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       # Add `v1` and `v2` variables to the graph.
-      v1 = variables.Variable(1, name="v1")
+      v1 = variables.VariableV1(1, name="v1")
       ops.add_to_collection("v", v1)
-      v2 = variables.Variable(2, name="v2")
+      v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
       # Initialize another variable `v3` to 42.
-      v3 = variables.Variable(42, name="v3", trainable=False, collections=[])
+      v3 = variables.VariableV1(42, name="v3", trainable=False, collections=[])
       ops.add_to_collection("v", v3)
 
       # Set up an assignment op to be run as part of the legacy_init_op.
@@ -860,11 +860,11 @@ class SavedModelTest(test.TestCase):
     g = ops.Graph()
     with self.session(graph=g) as sess:
       # Initialize variable `v1` to 1.
-      v1 = variables.Variable(1, name="v1")
+      v1 = variables.VariableV1(1, name="v1")
       ops.add_to_collection("v", v1)
 
       # Initialize another variable `v2` to 42.
-      v2 = variables.Variable(42, name="v2", trainable=False, collections=[])
+      v2 = variables.VariableV1(42, name="v2", trainable=False, collections=[])
       ops.add_to_collection("v", v2)
 
       # Set up an assignment op to be run as part of the init op.
@@ -889,9 +889,9 @@ class SavedModelTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       # Add `v1` and `v2` variables to the graph.
-      v1 = variables.Variable(1, name="v1")
+      v1 = variables.VariableV1(1, name="v1")
       ops.add_to_collection("v", v1)
-      v2 = variables.Variable(2, name="v2")
+      v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
       sess.run(variables.global_variables_initializer())
@@ -918,9 +918,9 @@ class SavedModelTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       # Add `v1` and `v2` variables to the graph.
-      v1 = variables.Variable(1, name="v1")
+      v1 = variables.VariableV1(1, name="v1")
       ops.add_to_collection("v", v1)
-      v2 = variables.Variable(2, name="v2")
+      v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
       sess.run(variables.global_variables_initializer())
@@ -947,9 +947,9 @@ class SavedModelTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       # Add `v1` and `v2` variables to the graph.
-      v1 = variables.Variable(1, name="v1")
+      v1 = variables.VariableV1(1, name="v1")
       ops.add_to_collection("v", v1)
-      v2 = variables.Variable(2, name="v2")
+      v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
       sess.run(variables.global_variables_initializer())
@@ -1071,13 +1071,13 @@ class SavedModelTest(test.TestCase):
         graph=ops.Graph(),
         config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
       with sess.graph.device("/cpu:0"):
-        v1 = variables.Variable(1, name="v1")
+        v1 = variables.VariableV1(1, name="v1")
       with sess.graph.device("/cpu:1"):
-        v2 = variables.Variable(2, name="v2")
+        v2 = variables.VariableV1(2, name="v2")
 
       # v3 is an unsaved variable derived from v1 and v2.  It is used to
       # exercise the ability to run an init op when restoring a graph.
-      v3 = variables.Variable(1, name="v3", trainable=False, collections=[])
+      v3 = variables.VariableV1(1, name="v3", trainable=False, collections=[])
       assign_v3 = state_ops.assign(v3, math_ops.add(v1, v2))
       init_op = control_flow_ops.group(assign_v3, name="init_op")
 
@@ -1140,7 +1140,7 @@ class SavedModelTest(test.TestCase):
     builder = saved_model_builder.SavedModelBuilder(export_dir)
 
     with self.session(graph=ops.Graph()) as sess:
-      variables.Variable(1, name="v1")
+      variables.VariableV1(1, name="v1")
       sess.run(variables.global_variables_initializer())
       custom_saver = training.Saver(name="my_saver")
       builder.add_meta_graph_and_variables(sess, ["tag"], saver=custom_saver)
@@ -1162,7 +1162,7 @@ class SavedModelTest(test.TestCase):
     builder = saved_model_builder.SavedModelBuilder(export_dir)
 
     with self.session(graph=ops.Graph()) as sess:
-      variables.Variable(1, name="v1")
+      variables.VariableV1(1, name="v1")
       sess.run(variables.global_variables_initializer())
       training.Saver(name="my_saver")
       builder.add_meta_graph_and_variables(sess, ["tag"])
@@ -1184,7 +1184,7 @@ class SavedModelTest(test.TestCase):
     builder = saved_model_builder.SavedModelBuilder(export_dir)
 
     with self.session(graph=ops.Graph()) as sess:
-      variables.Variable(1, name="v1")
+      variables.VariableV1(1, name="v1")
       sess.run(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(sess, ["tag_0"])
 
@@ -1293,8 +1293,8 @@ class SavedModelTest(test.TestCase):
     # Add a graph with two float32 variables and a Complex Op composing them
     # with strip_default_attrs enabled.
     with session.Session(graph=ops.Graph()) as sess:
-      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
-      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag")
       math_ops.complex(real_num, imag_num, name="complex")
       sess.run(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(
@@ -1303,8 +1303,8 @@ class SavedModelTest(test.TestCase):
     # Add a graph with the same float32 variables and a Complex Op composing
     # them with strip_default_attrs disabled.
     with session.Session(graph=ops.Graph()) as sess:
-      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
-      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag")
       math_ops.complex(real_num, imag_num, name="complex")
       sess.run(variables.global_variables_initializer())
       builder.add_meta_graph(["bar"], strip_default_attrs=False)
@@ -1366,7 +1366,7 @@ class SavedModelTest(test.TestCase):
     # Add a graph with a single variable and a test op with a defaultless
     # float32 attr, "test_attr".
     with session.Session(graph=ops.Graph()) as sess:
-      variables.Variable(1.0, dtype=dtypes.float64, name="var")
+      variables.VariableV1(1.0, dtype=dtypes.float64, name="var")
       test_ops.test_attr(T=dtypes.float32, name="test_attr")
       sess.run(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(sess, ["foo"])
diff --git a/tensorflow/python/tools/freeze_graph_test.py b/tensorflow/python/tools/freeze_graph_test.py
index e38945fabc..5dc14a6961 100644
--- a/tensorflow/python/tools/freeze_graph_test.py
+++ b/tensorflow/python/tools/freeze_graph_test.py
@@ -60,7 +60,7 @@ class FreezeGraphTest(test_util.TensorFlowTestCase):
     # We'll create an input graph that has a single variable containing 1.0,
     # and that then multiplies it by 2.
     with ops.Graph().as_default():
-      variable_node = variables.Variable(1.0, name="variable_node")
+      variable_node = variables.VariableV1(1.0, name="variable_node")
       output_node = math_ops.multiply(variable_node, 2.0, name="output_node")
       sess = session.Session()
       init = variables.global_variables_initializer()
@@ -138,7 +138,7 @@ class FreezeGraphTest(test_util.TensorFlowTestCase):
       features = parsing_ops.parse_example(examples, feature_configs)
       feature = features[feature_name]
 
-      variable_node = variables.Variable(1.0, name="variable_node")
+      variable_node = variables.VariableV1(1.0, name="variable_node")
       scores = math_ops.multiply(variable_node, feature, name="output_node")
       class_feature = array_ops.fill(array_ops.shape(feature),
                                      "class_%s" % feature_name)
@@ -174,7 +174,7 @@ class FreezeGraphTest(test_util.TensorFlowTestCase):
     output_graph_filename = os.path.join(tmp_dir, "output_graph.pb")
 
     with ops.Graph().as_default():
-      variable_node = variables.Variable(1.0, name="variable_node")
+      variable_node = variables.VariableV1(1.0, name="variable_node")
       output_node = math_ops.multiply(variable_node, 2.0, name="output_node")
       sess = session.Session()
       init = variables.global_variables_initializer()
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index 56c4043d9d..eff15b24ce 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -247,7 +247,7 @@ def _default_getter(name, shape, dtype, initializer=None,
       def initial_value():
         return initializer(
             shape_object.as_list(), dtype=dtype, partition_info=partition_info)
-    return variables.Variable(
+    return variables.VariableV1(
         initial_value=initial_value,
         name=name,
         dtype=variable_dtype,
diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py
index 5a9215730e..03a32f6ca0 100644
--- a/tensorflow/python/training/learning_rate_decay_test.py
+++ b/tensorflow/python/training/learning_rate_decay_test.py
@@ -63,7 +63,7 @@ class LRDecayTest(test_util.TensorFlowTestCase):
 
   def testVariables(self):
     with self.cached_session():
-      step = variables.Variable(1)
+      step = variables.VariableV1(1)
       assign_1 = step.assign(1)
       assign_2 = step.assign(2)
       assign_100 = step.assign(100)
@@ -121,7 +121,7 @@ class LRDecayTest(test_util.TensorFlowTestCase):
 
     # Test that ref types are valid.
     if not context.executing_eagerly():
-      x = variables.Variable(0.0)
+      x = variables.VariableV1(0.0)
       x_ref = x.op.outputs[0]   # float32_ref tensor should be accepted
       boundaries, values = [1.0, 2.0], [1, 2, 3]
       learning_rate_decay.piecewise_constant(x_ref, boundaries, values)
diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py
index 2d7799d66a..c870d99de9 100644
--- a/tensorflow/python/training/monitored_session_test.py
+++ b/tensorflow/python/training/monitored_session_test.py
@@ -69,8 +69,8 @@ class ScaffoldTest(test.TestCase):
   def test_defaults_empty_graph(self):
     with ops.Graph().as_default():
       scaffold = monitored_session.Scaffold()
-      variables.Variable(1, name='my_var')
-      variables.Variable(
+      variables.VariableV1(1, name='my_var')
+      variables.VariableV1(
           2, name='my_local_var', collections=[ops.GraphKeys.LOCAL_VARIABLES])
       scaffold.finalize()
       self.assertTrue(isinstance(scaffold.init_op, ops.Operation))
@@ -105,7 +105,7 @@ class ScaffoldTest(test.TestCase):
 
   def test_caches_values(self):
     with ops.Graph().as_default():
-      variables.Variable([1])
+      variables.VariableV1([1])
       scaffold1 = monitored_session.Scaffold()
       scaffold1.finalize()
       scaffold2 = monitored_session.Scaffold()
@@ -119,7 +119,7 @@ class ScaffoldTest(test.TestCase):
 
   def test_raise_error_if_more_than_one_cached_item(self):
     with ops.Graph().as_default():
-      variables.Variable([1])
+      variables.VariableV1([1])
       ops.add_to_collection(ops.GraphKeys.SAVERS, saver_lib.Saver())
       ops.add_to_collection(ops.GraphKeys.SAVERS, saver_lib.Saver())
       with self.assertRaisesRegexp(RuntimeError, 'More than one item'):
@@ -127,7 +127,7 @@ class ScaffoldTest(test.TestCase):
 
   def test_uses_passed_values(self):
     with ops.Graph().as_default():
-      variables.Variable([1])
+      variables.VariableV1([1])
       saver = saver_lib.Saver()
       scaffold = monitored_session.Scaffold(
           init_op=2,
@@ -148,7 +148,7 @@ class ScaffoldTest(test.TestCase):
 
   def test_graph_is_finalized(self):
     with ops.Graph().as_default():
-      variables.Variable([1])
+      variables.VariableV1([1])
       monitored_session.Scaffold().finalize()
       with self.assertRaisesRegexp(RuntimeError,
                                    'Graph is finalized and cannot be modified'):
@@ -157,7 +157,7 @@ class ScaffoldTest(test.TestCase):
   def test_new_scaffold_from_default_scaffold(self):
     scaffold1 = monitored_session.Scaffold()
     with ops.Graph().as_default():
-      variables.Variable([1])
+      variables.VariableV1([1])
       saver = saver_lib.Saver()
       scaffold2 = monitored_session.Scaffold(
           init_op=2,
@@ -180,7 +180,7 @@ class ScaffoldTest(test.TestCase):
 
   def test_new_scaffold_from_existing_scaffold(self):
     with ops.Graph().as_default():
-      variables.Variable([1])
+      variables.VariableV1([1])
       saver = saver_lib.Saver()
       scaffold1 = monitored_session.Scaffold(
           init_op=2,
@@ -1374,7 +1374,7 @@ class MonitoredSessionTest(test.TestCase):
 
   def test_defaults(self):
     with ops.Graph().as_default():
-      a_var = variables.Variable(0)
+      a_var = variables.VariableV1(0)
       with monitored_session.MonitoredSession() as session:
         self.assertEqual(0, session.run(a_var))
 
@@ -1700,7 +1700,7 @@ class MonitoredSessionTest(test.TestCase):
 
   def test_graph_finalized_during_run_unfinalized_after_exit(self):
     with ops.Graph().as_default() as g:
-      a_var = variables.Variable(0)
+      a_var = variables.VariableV1(0)
       with monitored_session.MonitoredSession() as session:
         self.assertEqual(0, session.run(a_var))
         self.assertTrue(g.finalized)
@@ -1708,7 +1708,7 @@ class MonitoredSessionTest(test.TestCase):
 
   def test_keep_finalized_graph_as_finalized(self):
     with ops.Graph().as_default() as g:
-      a_var = variables.Variable(0)
+      a_var = variables.VariableV1(0)
       monitored_session.Scaffold().finalize()
       with monitored_session.MonitoredSession() as session:
         self.assertEqual(0, session.run(a_var))
@@ -2032,7 +2032,7 @@ class MonitoredSessionTest(test.TestCase):
     with ops.Graph().as_default():
       c = array_ops.placeholder(dtypes.float32)
       v = array_ops.identity(c)
-      graph_state = variables.Variable(0.0)
+      graph_state = variables.VariableV1(0.0)
       graph_side_effect = state_ops.assign_add(graph_state, 0.31)
 
       def step_fn(step_context):
@@ -2088,7 +2088,7 @@ class MonitoredSessionTest(test.TestCase):
       c = array_ops.placeholder(dtypes.float32)
       v = array_ops.identity(c)
       vv = constant_op.constant(3.2)
-      graph_state = variables.Variable(0.0)
+      graph_state = variables.VariableV1(0.0)
       graph_side_effect = state_ops.assign_add(graph_state, 0.31)
 
       class Hook(session_run_hook.SessionRunHook):
@@ -2125,7 +2125,7 @@ class SingularMonitoredSessionTest(test.TestCase):
 
   def test_handles_initialization(self):
     with ops.Graph().as_default():
-      a_var = variables.Variable(0)
+      a_var = variables.VariableV1(0)
       with monitored_session.SingularMonitoredSession() as session:
         # If it's not initialized, following statement raises an error.
         self.assertEqual(0, session.run(a_var))
diff --git a/tensorflow/python/training/quantize_training_test.py b/tensorflow/python/training/quantize_training_test.py
index 9754adea85..6edbf7665f 100644
--- a/tensorflow/python/training/quantize_training_test.py
+++ b/tensorflow/python/training/quantize_training_test.py
@@ -58,7 +58,8 @@ class PywrapQuantizeTrainingTest(test.TestCase):
     g = ops.Graph()
     with session.Session(graph=g) as sess:
       a = constant_op.constant(6.0, shape=[1, 1], name='a')
-      b = variables.Variable(constant_op.constant(7.0, shape=[1, 1]), name='b')
+      b = variables.VariableV1(
+          constant_op.constant(7.0, shape=[1, 1]), name='b')
       c = math_ops.matmul(a, b, name='matmul')
 
       init_op = variables.global_variables_initializer()
diff --git a/tensorflow/python/training/queue_runner_test.py b/tensorflow/python/training/queue_runner_test.py
index 9b9e28af2b..15fe42bbd8 100644
--- a/tensorflow/python/training/queue_runner_test.py
+++ b/tensorflow/python/training/queue_runner_test.py
@@ -44,7 +44,7 @@ class QueueRunnerTest(test.TestCase):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
       zero64 = constant_op.constant(0, dtype=dtypes.int64)
-      var = variables.Variable(zero64)
+      var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
       variables.global_variables_initializer().run()
@@ -64,9 +64,9 @@ class QueueRunnerTest(test.TestCase):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
       zero64 = constant_op.constant(0, dtype=dtypes.int64)
-      var0 = variables.Variable(zero64)
+      var0 = variables.VariableV1(zero64)
       count_up_to_3 = var0.count_up_to(3)
-      var1 = variables.Variable(zero64)
+      var1 = variables.VariableV1(zero64)
       count_up_to_30 = var1.count_up_to(30)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
       qr = queue_runner_impl.QueueRunner(queue, [count_up_to_3, count_up_to_30])
@@ -131,7 +131,7 @@ class QueueRunnerTest(test.TestCase):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
       zero64 = constant_op.constant(0, dtype=dtypes.int64)
-      var = variables.Variable(zero64)
+      var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
       variables.global_variables_initializer().run()
@@ -184,7 +184,7 @@ class QueueRunnerTest(test.TestCase):
     with self.cached_session() as sess:
       with session.Session() as other_sess:
         zero64 = constant_op.constant(0, dtype=dtypes.int64)
-        var = variables.Variable(zero64)
+        var = variables.VariableV1(zero64)
         count_up_to = var.count_up_to(3)
         queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
         variables.global_variables_initializer().run()
@@ -199,7 +199,7 @@ class QueueRunnerTest(test.TestCase):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
       zero64 = constant_op.constant(0, dtype=dtypes.int64)
-      var = variables.Variable(zero64)
+      var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
       variables.global_variables_initializer().run()
@@ -215,7 +215,7 @@ class QueueRunnerTest(test.TestCase):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
       zero64 = constant_op.constant(0, dtype=dtypes.int64)
-      var = variables.Variable(zero64)
+      var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
       variables.global_variables_initializer().run()
@@ -250,7 +250,7 @@ class QueueRunnerTest(test.TestCase):
   def testStartQueueRunners(self):
     # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
     zero64 = constant_op.constant(0, dtype=dtypes.int64)
-    var = variables.Variable(zero64)
+    var = variables.VariableV1(zero64)
     count_up_to = var.count_up_to(3)
     queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
     init_op = variables.global_variables_initializer()
@@ -267,7 +267,7 @@ class QueueRunnerTest(test.TestCase):
 
   def testStartQueueRunnersRaisesIfNotASession(self):
     zero64 = constant_op.constant(0, dtype=dtypes.int64)
-    var = variables.Variable(zero64)
+    var = variables.VariableV1(zero64)
     count_up_to = var.count_up_to(3)
     queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
     init_op = variables.global_variables_initializer()
@@ -280,7 +280,7 @@ class QueueRunnerTest(test.TestCase):
 
   def testStartQueueRunnersIgnoresMonitoredSession(self):
     zero64 = constant_op.constant(0, dtype=dtypes.int64)
-    var = variables.Variable(zero64)
+    var = variables.VariableV1(zero64)
     count_up_to = var.count_up_to(3)
     queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
     init_op = variables.global_variables_initializer()
@@ -297,7 +297,7 @@ class QueueRunnerTest(test.TestCase):
     graph = ops.Graph()
     with graph.as_default():
       zero64 = constant_op.constant(0, dtype=dtypes.int64)
-      var = variables.Variable(zero64)
+      var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
       init_op = variables.global_variables_initializer()
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 69b1055ebe..49e6e6546d 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -311,8 +311,8 @@ class SaverTest(test.TestCase):
 
     # Build a graph with 2 parameter nodes, and Save and
     # Restore nodes for them.
-    v0 = variables.Variable(10.0, name="v0")
-    v1 = variables.Variable(20.0, name="v1")
+    v0 = variables.VariableV1(10.0, name="v0")
+    v1 = variables.VariableV1(20.0, name="v1")
     v2 = saver_test_utils.CheckpointedOp(name="v2")
     v2_init = v2.insert("k1", 30.0)
     save = saver_module.Saver(
@@ -350,8 +350,8 @@ class SaverTest(test.TestCase):
     # Start a second session.  In that session the parameter nodes
     # have not been initialized either.
     with self.cached_session() as sess:
-      v0 = variables.Variable(-1.0, name="v0")
-      v1 = variables.Variable(-1.0, name="v1")
+      v0 = variables.VariableV1(-1.0, name="v0")
+      v1 = variables.VariableV1(-1.0, name="v1")
       v2 = saver_test_utils.CheckpointedOp(name="v2")
       save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable})
 
@@ -370,7 +370,7 @@ class SaverTest(test.TestCase):
       self.assertEqual(30.0, v2.values().eval())
 
   def testFilenameTensor(self):
-    v0 = variables.Variable(0, name="v0")
+    v0 = variables.VariableV1(0, name="v0")
     filename = b"somerandomfilename"
     save = saver_module.Saver({"v0": v0}, filename=filename)
     with self.cached_session() as sess:
@@ -379,7 +379,7 @@ class SaverTest(test.TestCase):
       self.assertEqual(sess.run(tensor), filename)
 
   def testInvalidPath(self):
-    v0 = variables.Variable(0, name="v0")
+    v0 = variables.VariableV1(0, name="v0")
     for ver in (saver_pb2.SaverDef.V1, saver_pb2.SaverDef.V2):
       with self.cached_session() as sess:
         save = saver_module.Saver({"v0": v0}, write_version=ver)
@@ -392,7 +392,7 @@ class SaverTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Build a graph with 1 node, and save and restore for them.
-      v = variables.Variable(np.int64(15), name="v")
+      v = variables.VariableV1(np.int64(15), name="v")
       save = saver_module.Saver({"v": v}, restore_sequentially=True)
       variables.global_variables_initializer().run()
 
@@ -402,7 +402,7 @@ class SaverTest(test.TestCase):
       self.assertEqual(save_path, val)
 
       with self.cached_session() as sess:
-        v = variables.Variable(np.int64(-1), name="v")
+        v = variables.VariableV1(np.int64(-1), name="v")
         save = saver_module.Saver({"v": v})
 
       with self.assertRaisesWithPredicateMatch(
@@ -416,9 +416,9 @@ class SaverTest(test.TestCase):
 
   def testSomeErrors(self):
     with ops_lib.Graph().as_default():
-      v0 = variables.Variable([10.0], name="v0")
-      v1 = variables.Variable([20.0], name="v1")
-      v2 = variables.Variable([20.0], name="v2")
+      v0 = variables.VariableV1([10.0], name="v0")
+      v1 = variables.VariableV1([20.0], name="v1")
+      v2 = variables.VariableV1([20.0], name="v2")
       v2._set_save_slice_info(
           variables.Variable.SaveSliceInfo("v1", [1], [0], [1]))
 
@@ -446,7 +446,7 @@ class SaverTest(test.TestCase):
 
   def testSameName(self):
     with ops_lib.Graph().as_default():
-      v0 = variables.Variable([10.0], name="v0")
+      v0 = variables.VariableV1([10.0], name="v0")
       v2 = saver_test_utils.CheckpointedOp(name="v2")
 
       # Saving one variable under two names raises an error.
@@ -468,8 +468,8 @@ class SaverTest(test.TestCase):
     with self.session(graph=ops_lib.Graph()) as sess:
       # Build a graph with 2 parameter nodes, and Save and
       # Restore nodes for them.
-      v0 = variables.Variable(10.0, name="v0")
-      v1 = variables.Variable(20.0, name="v1")
+      v0 = variables.VariableV1(10.0, name="v0")
+      v1 = variables.VariableV1(20.0, name="v1")
       v2 = saver_test_utils.CheckpointedOp(name="v2")
       v2_init = v2.insert("k1", 30.0)
       save = saver_module.Saver([v0, v1, v2.saveable])
@@ -490,8 +490,8 @@ class SaverTest(test.TestCase):
     # Start a second session.  In that session the variables
     # have not been initialized either.
     with self.session(graph=ops_lib.Graph()) as sess:
-      v0 = variables.Variable(-1.0, name="v0")
-      v1 = variables.Variable(-1.0, name="v1")
+      v0 = variables.VariableV1(-1.0, name="v0")
+      v1 = variables.VariableV1(-1.0, name="v1")
       v2 = saver_test_utils.CheckpointedOp(name="v2")
       save = saver_module.Saver([v0, v1, v2.saveable])
 
@@ -515,8 +515,8 @@ class SaverTest(test.TestCase):
     # Build another graph with 2 nodes, initialized
     # differently, and a Restore node for them.
     with self.session(graph=ops_lib.Graph()) as sess:
-      v0_2 = variables.Variable(1000.0, name="v0")
-      v1_2 = variables.Variable(2000.0, name="v1")
+      v0_2 = variables.VariableV1(1000.0, name="v0")
+      v1_2 = variables.VariableV1(2000.0, name="v1")
       v2_2 = saver_test_utils.CheckpointedOp(name="v2")
       save2 = saver_module.Saver([v0_2, v1_2, v2_2.saveable])
       v2_2.insert("k1000", 3000.0).run()
@@ -574,14 +574,14 @@ class SaverTest(test.TestCase):
     save_path = os.path.join(self.get_temp_dir(), "gpu")
     with session.Session("", graph=ops_lib.Graph()) as sess:
       with sess.graph.device(test.gpu_device_name()):
-        v0_1 = variables.Variable(123.45)
+        v0_1 = variables.VariableV1(123.45)
       save = saver_module.Saver({"v0": v0_1})
       variables.global_variables_initializer().run()
       save.save(sess, save_path)
 
     with session.Session("", graph=ops_lib.Graph()) as sess:
       with sess.graph.device(test.gpu_device_name()):
-        v0_2 = variables.Variable(543.21)
+        v0_2 = variables.VariableV1(543.21)
       save = saver_module.Saver({"v0": v0_2})
       variables.global_variables_initializer().run()
 
@@ -591,22 +591,22 @@ class SaverTest(test.TestCase):
     save_path = os.path.join(self.get_temp_dir(), "gpu")
     with session.Session("", graph=ops_lib.Graph()) as sess:
       with sess.graph.device(test.gpu_device_name()):
-        v0_1 = variables.Variable(123.45)
+        v0_1 = variables.VariableV1(123.45)
       save = saver_module.Saver({"v0": v0_1}, sharded=True, allow_empty=True)
       variables.global_variables_initializer().run()
       save.save(sess, save_path)
 
     with session.Session("", graph=ops_lib.Graph()) as sess:
       with sess.graph.device(test.gpu_device_name()):
-        v0_2 = variables.Variable(543.21)
+        v0_2 = variables.VariableV1(543.21)
       save = saver_module.Saver({"v0": v0_2}, sharded=True, allow_empty=True)
       variables.global_variables_initializer().run()
 
   def testVariables(self):
     save_path = os.path.join(self.get_temp_dir(), "variables")
     with session.Session("", graph=ops_lib.Graph()) as sess:
-      one = variables.Variable(1.0)
-      twos = variables.Variable([2.0, 2.0, 2.0])
+      one = variables.VariableV1(1.0)
+      twos = variables.VariableV1([2.0, 2.0, 2.0])
       v2 = saver_test_utils.CheckpointedOp(name="v2")
       init = variables.global_variables_initializer()
       save = saver_module.Saver()
@@ -615,8 +615,8 @@ class SaverTest(test.TestCase):
       save.save(sess, save_path)
 
     with session.Session("", graph=ops_lib.Graph()) as sess:
-      one = variables.Variable(0.0)
-      twos = variables.Variable([0.0, 0.0, 0.0])
+      one = variables.VariableV1(0.0)
+      twos = variables.VariableV1([0.0, 0.0, 0.0])
       v2 = saver_test_utils.CheckpointedOp(name="v2")
       # Saver with no arg, defaults to 'all variables'.
       save = saver_module.Saver()
@@ -628,14 +628,14 @@ class SaverTest(test.TestCase):
 
   def testVarListShouldBeEmptyInDeferredBuild(self):
     with ops_lib.Graph().as_default():
-      v = variables.Variable(1.0)
+      v = variables.VariableV1(1.0)
       with self.assertRaisesRegexp(ValueError, "defer_build"):
         saver_module.Saver([v], defer_build=True)
 
   def testBuildShouldBeCalledBeforeSaveInCaseOfDeferBuild(self):
     save_path = os.path.join(self.get_temp_dir(), "error_deferred_build")
     with ops_lib.Graph().as_default(), session.Session() as sess:
-      variables.Variable(1.0)
+      variables.VariableV1(1.0)
       saver = saver_module.Saver(defer_build=True)
       with self.assertRaisesRegexp(RuntimeError, "build"):
         saver.save(sess, save_path)
@@ -643,18 +643,18 @@ class SaverTest(test.TestCase):
   def testDeferredBuild(self):
     save_path = os.path.join(self.get_temp_dir(), "deferred_build")
     with session.Session("", graph=ops_lib.Graph()) as sess:
-      one = variables.Variable(1.0)
+      one = variables.VariableV1(1.0)
       save = saver_module.Saver(defer_build=True)
       # if build is not deferred, saver cannot save the `twos`.
-      twos = variables.Variable([2.0, 2.0, 2.0])
+      twos = variables.VariableV1([2.0, 2.0, 2.0])
       init = variables.global_variables_initializer()
       save.build()
       init.run()
       save.save(sess, save_path)
 
     with session.Session("", graph=ops_lib.Graph()) as sess:
-      one = variables.Variable(0.0)
-      twos = variables.Variable([0.0, 0.0, 0.0])
+      one = variables.VariableV1(0.0)
+      twos = variables.VariableV1([0.0, 0.0, 0.0])
       # Saver with no arg, defaults to 'all variables'.
       save = saver_module.Saver()
       save.restore(sess, save_path)
@@ -664,7 +664,7 @@ class SaverTest(test.TestCase):
   def testReshape(self):
     save_path = os.path.join(self.get_temp_dir(), "variables_reshape")
     with session.Session("", graph=ops_lib.Graph()) as sess:
-      var = variables.Variable([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+      var = variables.VariableV1([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
       init = variables.global_variables_initializer()
       save = saver_module.Saver()
       init.run()
@@ -672,7 +672,7 @@ class SaverTest(test.TestCase):
 
     # Error when restoring with default reshape=False
     with session.Session("", graph=ops_lib.Graph()) as sess:
-      var = variables.Variable([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
+      var = variables.VariableV1([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
       save = saver_module.Saver()
       with self.assertRaisesRegexp(
           errors_impl.InvalidArgumentError,
@@ -681,7 +681,7 @@ class SaverTest(test.TestCase):
 
     # Restored to new shape with reshape=True
     with session.Session("", graph=ops_lib.Graph()) as sess:
-      var = variables.Variable([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
+      var = variables.VariableV1([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
       save = saver_module.Saver(reshape=True)
       save.restore(sess, save_path)
       self.assertAllClose([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], var.eval())
@@ -731,8 +731,8 @@ class SaverTest(test.TestCase):
     for save_path in paths:
       # Build a graph with 2 parameter nodes, and Save and
       # Restore nodes for them.
-      v0 = variables.Variable(10.0, name="v0")
-      v1 = variables.Variable(20.0, name="v1")
+      v0 = variables.VariableV1(10.0, name="v0")
+      v1 = variables.VariableV1(20.0, name="v1")
       save = saver_module.Saver({"v0": v0, "v1": v1}, restore_sequentially=True)
       init_all_op = variables.global_variables_initializer()
 
@@ -770,8 +770,8 @@ class SaverTest(test.TestCase):
 
     # Build a graph with 2 parameter nodes, and Save and
     # Restore nodes for them.
-    v0 = variables.Variable(10.0, name="v0")
-    v1 = variables.Variable(20.0, name="v1")
+    v0 = variables.VariableV1(10.0, name="v0")
+    v1 = variables.VariableV1(20.0, name="v1")
     save = saver_module.Saver({"v0": v0, "v1": v1}, restore_sequentially=True)
     init_all_op = variables.global_variables_initializer()
 
@@ -859,10 +859,10 @@ class SaveRestoreShardedTest(test.TestCase):
         target="",
         config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
       with sess.graph.device("/cpu:0"):
-        v0 = variables.Variable(10, name="v0")
+        v0 = variables.VariableV1(10, name="v0")
         t0 = saver_test_utils.CheckpointedOp(name="t0")
       with sess.graph.device("/cpu:1"):
-        v1 = variables.Variable(20, name="v1")
+        v1 = variables.VariableV1(20, name="v1")
         t1 = saver_test_utils.CheckpointedOp(name="t1")
       save = saver_module.Saver(
           {
@@ -890,7 +890,7 @@ class SaveRestoreShardedTest(test.TestCase):
           target="",
           config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
         with sess.graph.device("/cpu:0"):
-          v0 = variables.Variable(111, name="v0")
+          v0 = variables.VariableV1(111, name="v0")
           t0 = saver_test_utils.CheckpointedOp(name="t0")
         save = saver_module.Saver(
             {
@@ -914,7 +914,7 @@ class SaveRestoreShardedTest(test.TestCase):
           target="",
           config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
         with sess.graph.device("/cpu:0"):
-          v1 = variables.Variable(222)
+          v1 = variables.VariableV1(222)
           t1 = saver_test_utils.CheckpointedOp(name="t1")
         save = saver_module.Saver(
             {
@@ -938,10 +938,10 @@ class SaveRestoreShardedTest(test.TestCase):
         target="",
         config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
       with sess.graph.device("/cpu:0"):
-        v0 = variables.Variable(111, name="v0")
+        v0 = variables.VariableV1(111, name="v0")
         t0 = saver_test_utils.CheckpointedOp(name="t0")
       with sess.graph.device("/cpu:1"):
-        v1 = variables.Variable(222, name="v1")
+        v1 = variables.VariableV1(222, name="v1")
         t1 = saver_test_utils.CheckpointedOp(name="t1")
       save = saver_module.Saver(
           {
@@ -984,7 +984,7 @@ class SaveRestoreShardedTest(test.TestCase):
 
   def testSaverDef(self):
     with self.cached_session():
-      v0 = variables.Variable(123, name="v0")
+      v0 = variables.VariableV1(123, name="v0")
       save = saver_module.Saver({"v0": v0}, sharded=True)
       sd = save.as_saver_def()
       self.assertTrue(sd.sharded)
@@ -1023,7 +1023,7 @@ class SaveRestoreShardedTest(test.TestCase):
           if use_resource:
             vs = [resource_variable_ops.ResourceVariable(rnd, name=var_name)]
           else:
-            vs = [variables.Variable(rnd, name=var_name)]
+            vs = [variables.VariableV1(rnd, name=var_name)]
 
         variables.global_variables_initializer().run()
         if call_saver_with_dict:
@@ -1054,7 +1054,7 @@ class SaveRestoreShardedTest(test.TestCase):
           ]
         else:
           new_vs = [
-              variables.Variable(
+              variables.VariableV1(
                   array_ops.zeros(
                       shape=var_full_shape),  # != original contents.
                   name=var_name)
@@ -1210,7 +1210,7 @@ class MaxToKeepTest(test.TestCase):
     save_dir = self._get_test_dir("max_to_keep_non_sharded")
 
     with self.cached_session() as sess:
-      v = variables.Variable(10.0, name="v")
+      v = variables.VariableV1(10.0, name="v")
       save = saver_module.Saver({"v": v}, max_to_keep=2)
       variables.global_variables_initializer().run()
       self.assertEqual([], save.last_checkpoints)
@@ -1389,9 +1389,9 @@ class MaxToKeepTest(test.TestCase):
         target="",
         config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
       with sess.graph.device("/cpu:0"):
-        v0 = variables.Variable(111, name="v0")
+        v0 = variables.VariableV1(111, name="v0")
       with sess.graph.device("/cpu:1"):
-        v1 = variables.Variable(222, name="v1")
+        v1 = variables.VariableV1(222, name="v1")
       save = saver_module.Saver(
           {
               "v0": v0,
@@ -1448,7 +1448,7 @@ class MaxToKeepTest(test.TestCase):
     save_dir2 = self._get_test_dir("max_to_keep_0")
 
     with self.cached_session() as sess:
-      v = variables.Variable(10.0, name="v")
+      v = variables.VariableV1(10.0, name="v")
       variables.global_variables_initializer().run()
 
       # Test max_to_keep being None.
@@ -1475,7 +1475,7 @@ class MaxToKeepTest(test.TestCase):
     save_dir = self._get_test_dir("no_meta_graph")
 
     with self.cached_session() as sess:
-      v = variables.Variable(10.0, name="v")
+      v = variables.VariableV1(10.0, name="v")
       save = saver_module.Saver({"v": v})
       variables.global_variables_initializer().run()
 
@@ -1632,13 +1632,13 @@ class MetaGraphTest(test.TestCase):
     filename = os.path.join(test_dir, "metafile")
     with self.cached_session():
       # Creates a graph.
-      v0 = variables.Variable(1.0, name="v0")
+      v0 = variables.VariableV1(1.0, name="v0")
       control_flow_ops.cond(
           math_ops.less(v0, 10), lambda: math_ops.add(v0, 1),
           lambda: math_ops.subtract(v0, 1))
       control_flow_ops.while_loop(lambda i: math_ops.less(i, 10),
                                   lambda i: math_ops.add(i, 1), [v0])
-      var = variables.Variable(constant_op.constant(0, dtype=dtypes.int64))
+      var = variables.VariableV1(constant_op.constant(0, dtype=dtypes.int64))
       count_up_to = var.count_up_to(3)
       input_queue = data_flow_ops.FIFOQueue(
           30, dtypes.float32, shared_name="collection_queue")
@@ -1687,7 +1687,7 @@ class MetaGraphTest(test.TestCase):
   def testAddCollectionDefFails(self):
     with self.cached_session():
       # Creates a graph.
-      v0 = variables.Variable(10.0, name="v0")
+      v0 = variables.VariableV1(10.0, name="v0")
       # Creates a saver.
       save = saver_module.Saver({"v0": v0})
       # Generates MetaGraphDef.
@@ -1711,8 +1711,8 @@ class MetaGraphTest(test.TestCase):
     saver1_ckpt = os.path.join(test_dir, "saver1.ckpt")
     with self.session(graph=ops_lib.Graph()) as sess:
       # Creates a graph.
-      v0 = variables.Variable([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0")
-      v1 = variables.Variable(11.0, name="v1")
+      v0 = variables.VariableV1([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0")
+      v1 = variables.VariableV1(11.0, name="v1")
       # Creates 2 savers.
       saver0 = saver_module.Saver({"v0": v0}, name="saver0")
       saver1 = saver_module.Saver({"v1": v1}, name="saver1")
@@ -1788,8 +1788,8 @@ class MetaGraphTest(test.TestCase):
     saver1_ckpt = os.path.join(test_dir, "saver1.ckpt")
     with self.session(graph=ops_lib.Graph()) as sess:
       # Creates a graph.
-      v0 = variables.Variable([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0")
-      v1 = variables.Variable(11.0, name="v1")
+      v0 = variables.VariableV1([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0")
+      v1 = variables.VariableV1(11.0, name="v1")
 
       # Creates 2 savers.
       saver0 = saver_module.Saver({"v0": v0}, name="saver0")
@@ -1840,7 +1840,7 @@ class MetaGraphTest(test.TestCase):
     filename = os.path.join(test_dir, "metafile")
     with self.session(graph=ops_lib.Graph()):
       # Creates a graph.
-      variables.Variable(10.0, name="v0")
+      variables.VariableV1(10.0, name="v0")
       # Exports the graph as binary format.
       saver_module.export_meta_graph(filename, as_text=False)
     with self.session(graph=ops_lib.Graph()):
@@ -1871,8 +1871,8 @@ class MetaGraphTest(test.TestCase):
     test_dir = self._get_test_dir("slice_saver")
     filename = os.path.join(test_dir, "metafile")
     with self.cached_session():
-      v1 = variables.Variable([20.0], name="v1")
-      v2 = variables.Variable([20.0], name="v2")
+      v1 = variables.VariableV1([20.0], name="v1")
+      v2 = variables.VariableV1([20.0], name="v2")
       v2._set_save_slice_info(
           variables.Variable.SaveSliceInfo("v1", [1], [0], [1]))
 
@@ -1899,7 +1899,7 @@ class MetaGraphTest(test.TestCase):
     # Hidden 1
     images = constant_op.constant(1.2, dtypes.float32, shape=[100, 28])
     with ops_lib.name_scope("hidden1"):
-      weights = variables.Variable(
+      weights = variables.VariableV1(
           random_ops.truncated_normal(
               [28, 128], stddev=1.0 / math.sqrt(float(28))),
           name="weights")
@@ -1907,7 +1907,7 @@ class MetaGraphTest(test.TestCase):
       # the save and restore of control flow context (which doesn't make any
       # sense here from a machine learning perspective).  The typical biases is
       # a simple Variable without the conditions.
-      biases = variables.Variable(
+      biases = variables.VariableV1(
           control_flow_ops.cond(
               math_ops.less(random.random(), 0.5),
               lambda: array_ops.ones([128]), lambda: array_ops.zeros([128])),
@@ -1915,7 +1915,7 @@ class MetaGraphTest(test.TestCase):
       hidden1 = nn_ops.relu(math_ops.matmul(images, weights) + biases)
     # Hidden 2
     with ops_lib.name_scope("hidden2"):
-      weights = variables.Variable(
+      weights = variables.VariableV1(
           random_ops.truncated_normal(
               [128, 32], stddev=1.0 / math.sqrt(float(128))),
           name="weights")
@@ -1933,15 +1933,16 @@ class MetaGraphTest(test.TestCase):
 
       _, biases = control_flow_ops.while_loop(
           loop_cond, loop_body,
-          [constant_op.constant(0), variables.Variable(array_ops.zeros([32]))])
+          [constant_op.constant(0),
+           variables.VariableV1(array_ops.zeros([32]))])
       hidden2 = nn_ops.relu(math_ops.matmul(hidden1, weights) + biases)
     # Linear
     with ops_lib.name_scope("softmax_linear"):
-      weights = variables.Variable(
+      weights = variables.VariableV1(
           random_ops.truncated_normal(
               [32, 10], stddev=1.0 / math.sqrt(float(32))),
           name="weights")
-      biases = variables.Variable(array_ops.zeros([10]), name="biases")
+      biases = variables.VariableV1(array_ops.zeros([10]), name="biases")
       logits = math_ops.matmul(hidden2, weights) + biases
       ops_lib.add_to_collection("logits", logits)
     init_all_op = variables.global_variables_initializer()
@@ -2028,7 +2029,7 @@ class MetaGraphTest(test.TestCase):
 
     # Create while loop using `outer_body_fn`.
     with ops_lib.Graph().as_default():
-      var = variables.Variable(0.0)
+      var = variables.VariableV1(0.0)
       var_name = var.name
       output = graph_fn(var)
       output_name = output.name
@@ -2122,8 +2123,8 @@ class MetaGraphTest(test.TestCase):
   def testStrippedOpListDef(self):
     with self.cached_session():
       # Creates a graph.
-      v0 = variables.Variable(0.0)
-      var = variables.Variable(10.0)
+      v0 = variables.VariableV1(0.0)
+      var = variables.VariableV1(10.0)
       math_ops.add(v0, var)
 
       @function.Defun(dtypes.float32)
@@ -2161,8 +2162,8 @@ class MetaGraphTest(test.TestCase):
     # With strip_default_attrs enabled, attributes "T" (float32) and "Tout"
     # (complex64) in the "Complex" op must be removed.
     with self.cached_session():
-      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
-      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag")
       math_ops.complex(real_num, imag_num, name="complex")
 
       save = saver_module.Saver({"real_num": real_num, "imag_num": imag_num})
@@ -2178,8 +2179,8 @@ class MetaGraphTest(test.TestCase):
     # (complex64) in the "Complex" op must *not* be removed, even if they map
     # to their defaults.
     with self.session(graph=ops_lib.Graph()):
-      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
-      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag")
       math_ops.complex(real_num, imag_num, name="complex")
 
       save = saver_module.Saver({"real_num": real_num, "imag_num": imag_num})
@@ -2198,9 +2199,9 @@ class MetaGraphTest(test.TestCase):
     image = array_ops.placeholder(dtypes.float32, [None, 784], name="image")
     label = array_ops.placeholder(dtypes.float32, [None, 10], name="label")
     with session.Session() as sess:
-      weights = variables.Variable(
+      weights = variables.VariableV1(
           random_ops.random_uniform([784, 10]), name="weights")
-      bias = variables.Variable(array_ops.zeros([10]), name="bias")
+      bias = variables.VariableV1(array_ops.zeros([10]), name="bias")
       logit = nn_ops.relu(math_ops.matmul(image, weights) + bias, name="logits")
       nn_ops.softmax(logit, name="prediction")
       cost = nn_ops.softmax_cross_entropy_with_logits(labels=label,
@@ -2243,7 +2244,7 @@ class MetaGraphTest(test.TestCase):
       self.assertIsNone(new_saver_1)
 
       # Create a variable in graph_2 under scope "my_scope".
-      variables.Variable(array_ops.zeros([10]), name="my_scope/my_var")
+      variables.VariableV1(array_ops.zeros([10]), name="my_scope/my_var")
       sess.run(variables.global_variables_initializer())
       # Restore the checkpoint into a different scope "subgraph_2".
       new_saver_2 = saver_module.import_meta_graph(
@@ -2268,9 +2269,9 @@ class MetaGraphTest(test.TestCase):
     image = array_ops.placeholder(dtypes.float32, [None, 784], name="image")
     label = array_ops.placeholder(dtypes.float32, [None, 10], name="label")
     with session.Session() as sess:
-      weights = variables.Variable(
+      weights = variables.VariableV1(
           random_ops.random_uniform([784, 10]), name="weights")
-      bias = variables.Variable(array_ops.zeros([10]), name="bias")
+      bias = variables.VariableV1(array_ops.zeros([10]), name="bias")
       logit = nn_ops.relu(math_ops.matmul(image, weights) + bias, name="logits")
       nn_ops.softmax(logit, name="prediction")
       cost = nn_ops.softmax_cross_entropy_with_logits(labels=label,
@@ -2299,9 +2300,9 @@ class MetaGraphTest(test.TestCase):
       with ops_lib.device("/job:ps/replica:0/task:0/device:GPU:0"):
         image = array_ops.placeholder(dtypes.float32, [None, 784], name="image")
         label = array_ops.placeholder(dtypes.float32, [None, 10], name="label")
-        weights = variables.Variable(
+        weights = variables.VariableV1(
             random_ops.random_uniform([784, 10]), name="weights")
-        bias = variables.Variable(array_ops.zeros([10]), name="bias")
+        bias = variables.VariableV1(array_ops.zeros([10]), name="bias")
         logit = nn_ops.relu(math_ops.matmul(image, weights) + bias)
         nn_ops.softmax(logit, name="prediction")
         cost = nn_ops.softmax_cross_entropy_with_logits(labels=label,
@@ -2332,9 +2333,9 @@ class MetaGraphTest(test.TestCase):
       with ops_lib.device("/job:ps/replica:0/task:0/device:GPU:0"):
         image = array_ops.placeholder(dtypes.float32, [None, 784], name="image")
         label = array_ops.placeholder(dtypes.float32, [None, 10], name="label")
-        weights = variables.Variable(
+        weights = variables.VariableV1(
             random_ops.random_uniform([784, 10]), name="weights")
-        bias = variables.Variable(array_ops.zeros([10]), name="bias")
+        bias = variables.VariableV1(array_ops.zeros([10]), name="bias")
         logit = nn_ops.relu(math_ops.matmul(image, weights) + bias)
         nn_ops.softmax(logit, name="prediction")
         cost = nn_ops.softmax_cross_entropy_with_logits(labels=label,
@@ -2385,9 +2386,9 @@ class CheckpointReaderTest(test.TestCase):
 
   def testDebugString(self):
     # Builds a graph.
-    v0 = variables.Variable(
+    v0 = variables.VariableV1(
         [[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0")
-    v1 = variables.Variable(
+    v1 = variables.VariableV1(
         [[[1], [2]], [[3], [4]], [[5], [6]]], dtype=dtypes.float32, name="v1")
     init_all_op = variables.global_variables_initializer()
     save = saver_module.Saver(
@@ -2444,7 +2445,8 @@ class WriteGraphTest(test.TestCase):
 
   def testWriteGraph(self):
     test_dir = self._get_test_dir("write_graph_dir")
-    variables.Variable([[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0")
+    variables.VariableV1(
+        [[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0")
     path = graph_io.write_graph(ops_lib.get_default_graph(),
                                 os.path.join(test_dir, "l1"), "graph.pbtxt")
     truth = os.path.join(test_dir, "l1", "graph.pbtxt")
@@ -2453,7 +2455,8 @@ class WriteGraphTest(test.TestCase):
 
   def testRecursiveCreate(self):
     test_dir = self._get_test_dir("deep_dir")
-    variables.Variable([[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0")
+    variables.VariableV1(
+        [[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0")
     path = graph_io.write_graph(ops_lib.get_default_graph().as_graph_def(),
                                 os.path.join(test_dir, "l1", "l2", "l3"),
                                 "graph.pbtxt")
@@ -2477,7 +2480,7 @@ class ScopedGraphTest(test.TestCase):
       images = constant_op.constant(
           1.2, dtypes.float32, shape=[100, 28], name="images")
       with ops_lib.name_scope("hidden1"):
-        weights1 = variables.Variable(
+        weights1 = variables.VariableV1(
             random_ops.truncated_normal(
                 [28, 128], stddev=1.0 / math.sqrt(float(28))),
             name="weights")
@@ -2485,7 +2488,7 @@ class ScopedGraphTest(test.TestCase):
         # coverage the save and restore of control flow context (which doesn't
         # make any sense here from a machine learning perspective).  The typical
         # biases is a simple Variable without the conditions.
-        biases1 = variables.Variable(
+        biases1 = variables.VariableV1(
             control_flow_ops.cond(
                 math_ops.less(random.random(), 0.5),
                 lambda: array_ops.ones([128]), lambda: array_ops.zeros([128])),
@@ -2494,7 +2497,7 @@ class ScopedGraphTest(test.TestCase):
 
       # Hidden 2
       with ops_lib.name_scope("hidden2"):
-        weights2 = variables.Variable(
+        weights2 = variables.VariableV1(
             random_ops.truncated_normal(
                 [128, 32], stddev=1.0 / math.sqrt(float(128))),
             name="weights")
@@ -2511,16 +2514,16 @@ class ScopedGraphTest(test.TestCase):
           return it + 1, biases2
 
         _, biases2 = control_flow_ops.while_loop(loop_cond, loop_body, [
-            constant_op.constant(0), variables.Variable(array_ops.zeros([32]))
+            constant_op.constant(0), variables.VariableV1(array_ops.zeros([32]))
         ])
         hidden2 = nn_ops.relu(math_ops.matmul(hidden1, weights2) + biases2)
       # Linear
       with ops_lib.name_scope("softmax_linear"):
-        weights3 = variables.Variable(
+        weights3 = variables.VariableV1(
             random_ops.truncated_normal(
                 [32, 10], stddev=1.0 / math.sqrt(float(32))),
             name="weights")
-        biases3 = variables.Variable(array_ops.zeros([10]), name="biases")
+        biases3 = variables.VariableV1(array_ops.zeros([10]), name="biases")
         logits = math_ops.matmul(hidden2, weights3) + biases3
         ops_lib.add_to_collection("logits", logits)
 
@@ -2566,7 +2569,7 @@ class ScopedGraphTest(test.TestCase):
     with graph.as_default():
       # Hidden 2
       with ops_lib.name_scope("hidden2"):
-        weights = variables.Variable(
+        weights = variables.VariableV1(
             random_ops.truncated_normal(
                 [128, 32], stddev=1.0 / math.sqrt(float(128))),
             name="weights")
@@ -2583,16 +2586,16 @@ class ScopedGraphTest(test.TestCase):
           return it + 1, biases
 
         _, biases = control_flow_ops.while_loop(loop_cond, loop_body, [
-            constant_op.constant(0), variables.Variable(array_ops.zeros([32]))
+            constant_op.constant(0), variables.VariableV1(array_ops.zeros([32]))
         ])
         hidden2 = nn_ops.relu(math_ops.matmul(hidden1, weights) + biases)
       # Linear
       with ops_lib.name_scope("softmax_linear"):
-        weights = variables.Variable(
+        weights = variables.VariableV1(
             random_ops.truncated_normal(
                 [32, 10], stddev=1.0 / math.sqrt(float(32))),
             name="weights")
-        biases = variables.Variable(array_ops.zeros([10]), name="biases")
+        biases = variables.VariableV1(array_ops.zeros([10]), name="biases")
         logits = math_ops.matmul(hidden2, weights) + biases
         ops_lib.add_to_collection("logits", logits)
 
@@ -2629,9 +2632,9 @@ class ScopedGraphTest(test.TestCase):
       with ops_lib.name_scope("hidden1"):
         images = constant_op.constant(
             1.0, dtypes.float32, shape=[3, 2], name="images")
-        weights1 = variables.Variable(
+        weights1 = variables.VariableV1(
             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], name="weights")
-        biases1 = variables.Variable([0.1] * 3, name="biases")
+        biases1 = variables.VariableV1([0.1] * 3, name="biases")
         nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu")
 
     # Run the graph and save scoped checkpoint.
@@ -2685,9 +2688,9 @@ class ScopedGraphTest(test.TestCase):
       with ops_lib.name_scope("hidden1"):
         images = constant_op.constant(
             1.0, dtypes.float32, shape=[3, 2], name="images")
-        weights1 = variables.Variable(
+        weights1 = variables.VariableV1(
             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], name="weights")
-        biases1 = variables.Variable([0.1] * 3, name="biases")
+        biases1 = variables.VariableV1([0.1] * 3, name="biases")
         nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu")
 
     # Run the graph and save scoped checkpoint.
@@ -2720,12 +2723,12 @@ class ScopedGraphTest(test.TestCase):
     graph = ops_lib.Graph()
     with graph.as_default():
       with ops_lib.name_scope("hidden1"):
-        variable1 = variables.Variable([1.0], name="variable1")
+        variable1 = variables.VariableV1([1.0], name="variable1")
         saver1 = saver_module.Saver(var_list=[variable1])
         graph.add_to_collection(ops_lib.GraphKeys.SAVERS, saver1)
 
       with ops_lib.name_scope("hidden2"):
-        variable2 = variables.Variable([2.0], name="variable2")
+        variable2 = variables.VariableV1([2.0], name="variable2")
       saver2 = saver_module.Saver(var_list=[variable2], name="hidden2/")
       graph.add_to_collection(ops_lib.GraphKeys.SAVERS, saver2)
 
@@ -2978,7 +2981,7 @@ class CheckpointableCompatibilityTests(test.TestCase):
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
 
     with ops_lib.Graph().as_default() as g:
-      a = variables.Variable(1., name="a")
+      a = variables.VariableV1(1., name="a")
       a_saver = saver_module.Saver([a])
 
       with self.session(graph=g) as sess:
@@ -2986,7 +2989,7 @@ class CheckpointableCompatibilityTests(test.TestCase):
         save_path = a_saver.save(sess=sess, save_path=checkpoint_prefix)
 
     with ops_lib.Graph().as_default() as g:
-      a = variables.Variable([1.], name="a")
+      a = variables.VariableV1([1.], name="a")
       a_saver = saver_module.Saver([a])
       with self.session(graph=g) as sess:
         with self.assertRaisesRegexp(
diff --git a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py
index c7e84e9ba1..5aa7f45c2b 100644
--- a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py
+++ b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py
@@ -37,8 +37,8 @@ class SameVariablesNoClearTest(test.TestCase):
     server = server_lib.Server.create_local_server()
 
     with session.Session(server.target) as sess_1:
-      v0 = variables.Variable([[2, 1]], name="v0")
-      v1 = variables.Variable([[1], [2]], name="v1")
+      v0 = variables.VariableV1([[2, 1]], name="v0")
+      v1 = variables.VariableV1([[1], [2]], name="v1")
       v2 = math_ops.matmul(v0, v1)
       sess_1.run([v0.initializer, v1.initializer])
       self.assertAllEqual([[4]], sess_1.run(v2))
diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py
index 063044f0d0..cf995707fc 100644
--- a/tensorflow/python/training/server_lib_test.py
+++ b/tensorflow/python/training/server_lib_test.py
@@ -76,9 +76,9 @@ class GrpcServerTest(test.TestCase):
   def testResetFails(self):
     # Creates variable with container name.
     with ops.container("test0"):
-      v0 = variables.Variable(1.0, name="v0")
+      v0 = variables.VariableV1(1.0, name="v0")
     # Creates variable with default container.
-    v1 = variables.Variable(2.0, name="v1")
+    v1 = variables.VariableV1(2.0, name="v1")
     # Verifies resetting the non-existent target returns error.
     with self.assertRaises(errors_impl.NotFoundError):
       session.Session.reset("nonexistent", ["test0"])
@@ -234,8 +234,8 @@ class GrpcServerTest(test.TestCase):
           [0.], dtype=dtypes.float32))
       self.assertIsNotNone(input_queue)
 
-      var = variables.Variable(1., dtype=dtypes.float32, trainable=False,
-                               name="var")
+      var = variables.VariableV1(1., dtype=dtypes.float32, trainable=False,
+                                 name="var")
 
       sess.run(variables.global_variables_initializer())
       queue_runner_impl.start_queue_runners(sess)
@@ -245,7 +245,7 @@ class GrpcServerTest(test.TestCase):
     server = self._cached_server
 
     init_value = array_ops.placeholder(dtypes.int32)
-    v = variables.Variable(init_value, validate_shape=False, name="v")
+    v = variables.VariableV1(init_value, validate_shape=False, name="v")
 
     sharing_config = config_pb2.ConfigProto(isolate_session_state=False)
     sharing_sess_0 = session.Session(server.target, config=sharing_config)
@@ -302,7 +302,7 @@ class GrpcServerTest(test.TestCase):
     isolate_config = config_pb2.ConfigProto(isolate_session_state=True)
 
     with ops.Graph().as_default():
-      w_vector = variables.Variable([1, 2, 3], name="w")
+      w_vector = variables.VariableV1([1, 2, 3], name="w")
       with session.Session(server.target, config=sharing_config) as sess:
         with self.assertRaises(errors_impl.FailedPreconditionError):
           sess.run(w_vector)
@@ -310,20 +310,20 @@ class GrpcServerTest(test.TestCase):
         self.assertAllEqual([1, 2, 3], sess.run(w_vector))
 
     with ops.Graph().as_default():
-      w_vector = variables.Variable([4, 5, 6], name="w")
+      w_vector = variables.VariableV1([4, 5, 6], name="w")
       with session.Session(server.target, config=sharing_config) as sess:
         self.assertAllEqual([1, 2, 3], sess.run(w_vector))
         sess.run(w_vector.initializer)
         self.assertAllEqual([4, 5, 6], sess.run(w_vector))
 
     with ops.Graph().as_default():
-      w_scalar = variables.Variable(86, name="w")
+      w_scalar = variables.VariableV1(86, name="w")
       with session.Session(server.target, config=sharing_config) as sess:
         with self.assertRaises(errors_impl.InvalidArgumentError):
           sess.run(w_scalar.initializer)
 
     with ops.Graph().as_default():
-      w_scalar = variables.Variable(37, name="w")
+      w_scalar = variables.VariableV1(37, name="w")
       with session.Session(server.target, config=isolate_config) as sess:
         with self.assertRaises(errors_impl.FailedPreconditionError):
           sess.run(w_scalar)
diff --git a/tensorflow/python/training/session_manager_test.py b/tensorflow/python/training/session_manager_test.py
index f1d18f7704..2b5c3b01de 100644
--- a/tensorflow/python/training/session_manager_test.py
+++ b/tensorflow/python/training/session_manager_test.py
@@ -40,7 +40,7 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionSucceeds(self):
     with ops.Graph().as_default():
-      v = variables.Variable([1.0, 2.0, 3.0], name="v")
+      v = variables.VariableV1([1.0, 2.0, 3.0], name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables())
       sess = sm.prepare_session(
@@ -50,7 +50,7 @@ class SessionManagerTest(test.TestCase):
   def testPrepareSessionSucceedsWithInitFeedDict(self):
     with ops.Graph().as_default():
       p = array_ops.placeholder(dtypes.float32, shape=(3,))
-      v = variables.Variable(p, name="v")
+      v = variables.VariableV1(p, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables())
       sess = sm.prepare_session(
@@ -61,7 +61,7 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionSucceedsWithInitFn(self):
     with ops.Graph().as_default():
-      v = variables.Variable([125], name="v")
+      v = variables.VariableV1([125], name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables())
       sess = sm.prepare_session(
@@ -79,7 +79,7 @@ class SessionManagerTest(test.TestCase):
     gfile.MakeDirs(checkpoint_dir)
 
     with ops.Graph().as_default():
-      v = variables.Variable([1.0, 2.0, 3.0], name="v")
+      v = variables.VariableV1([1.0, 2.0, 3.0], name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables())
       saver = saver_lib.Saver({"v": v})
@@ -97,7 +97,7 @@ class SessionManagerTest(test.TestCase):
       # Renames the checkpoint directory.
       os.rename(checkpoint_dir, checkpoint_dir2)
       gfile.MakeDirs(checkpoint_dir)
-      v = variables.Variable([6.0, 7.0, 8.0], name="v")
+      v = variables.VariableV1([6.0, 7.0, 8.0], name="v")
       with self.cached_session():
         self.assertEqual(False, variables.is_variable_initialized(v).eval())
       session_manager.SessionManager(
@@ -134,7 +134,7 @@ class SessionManagerTest(test.TestCase):
                                checkpoint_filename_with_path=None):
     # Create a new Graph and SessionManager and recover from a checkpoint.
     with ops.Graph().as_default():
-      v = variables.Variable(2, name="v")
+      v = variables.VariableV1(2, name="v")
       with session_lib.Session():
         self.assertEqual(False, variables.is_variable_initialized(v).eval())
       sm2 = session_manager.SessionManager(
@@ -162,7 +162,7 @@ class SessionManagerTest(test.TestCase):
     gfile.MakeDirs(checkpoint_dir)
 
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
+      v = variables.VariableV1(1, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables())
       saver = saver_lib.Saver({"v": v})
@@ -186,7 +186,7 @@ class SessionManagerTest(test.TestCase):
 
   def testWaitForSessionReturnsNoneAfterTimeout(self):
     with ops.Graph().as_default():
-      variables.Variable(1, name="v")
+      variables.VariableV1(1, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables(),
           recovery_wait_secs=1)
@@ -217,7 +217,7 @@ class SessionManagerTest(test.TestCase):
     gfile.MakeDirs(checkpoint_dir)
 
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
+      v = variables.VariableV1(1, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables())
       saver = saver_lib.Saver({"v": v})
@@ -230,8 +230,8 @@ class SessionManagerTest(test.TestCase):
                  os.path.join(checkpoint_dir, "recover_session_checkpoint"))
     # Create a new Graph and SessionManager and recover.
     with ops.Graph().as_default():
-      v = variables.Variable(2, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(2, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -275,7 +275,7 @@ class SessionManagerTest(test.TestCase):
     gfile.MakeDirs(checkpoint_dir)
 
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
+      v = variables.VariableV1(1, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.report_uninitialized_variables())
       saver = saver_lib.Saver({"v": v})
@@ -288,8 +288,8 @@ class SessionManagerTest(test.TestCase):
                  os.path.join(checkpoint_dir, "recover_session_checkpoint"))
     # Create a new Graph and SessionManager and recover.
     with ops.Graph().as_default():
-      v = variables.Variable(2, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(2, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -321,7 +321,7 @@ class SessionManagerTest(test.TestCase):
     # local_init_op exactly once, regardless of whether the session was
     # successfully recovered.
     with ops.Graph().as_default():
-      w = variables.Variable(
+      w = variables.VariableV1(
           1,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -356,8 +356,8 @@ class SessionManagerTest(test.TestCase):
 
     # Create a new Graph and SessionManager and recover.
     with ops.Graph().as_default():
-      v = variables.Variable(2, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(2, name="v")
+      w = variables.VariableV1(
           1,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -389,8 +389,8 @@ class SessionManagerTest(test.TestCase):
   def testWaitForSessionLocalInit(self):
     server = server_lib.Server.create_local_server()
     with ops.Graph().as_default() as graph:
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -420,8 +420,8 @@ class SessionManagerTest(test.TestCase):
 
   def testWaitForSessionWithReadyForLocalInitOpFailsToReadyLocal(self):
     with ops.Graph().as_default() as graph:
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -439,8 +439,8 @@ class SessionManagerTest(test.TestCase):
 
   def testWaitForSessionInsufficientReadyForLocalInitCheck(self):
     with ops.Graph().as_default() as graph:
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -456,13 +456,13 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionWithReadyForLocalInitOp(self):
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
           name="w")
-      x = variables.Variable(
+      x = variables.VariableV1(
           3 * v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -495,25 +495,25 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionWithPartialInitOp(self):
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
           name="w")
-      x = variables.Variable(
+      x = variables.VariableV1(
           3 * v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
           name="x")
       # TODO(b/70206927): Use ResourceVariables once they are handled properly.
-      v_res = variables.Variable(1, name="v_res")
-      w_res = variables.Variable(
+      v_res = variables.VariableV1(1, name="v_res")
+      w_res = variables.VariableV1(
           v_res,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
           name="w_res")
-      x_res = variables.Variable(
+      x_res = variables.VariableV1(
           3 * v_res,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -565,7 +565,7 @@ class SessionManagerTest(test.TestCase):
     # cyclic dependencies.
     with ops.Graph().as_default():
       i = control_flow_ops.while_loop(lambda i: i < 1, lambda i: i + 1, [0])
-      v = variables.Variable(array_ops.identity(i), name="v")
+      v = variables.VariableV1(array_ops.identity(i), name="v")
       with self.cached_session():
         self.assertEqual(False, variables.is_variable_initialized(v).eval())
       sm = session_manager.SessionManager(
@@ -579,8 +579,8 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionDidNotInitLocalVariable(self):
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -596,8 +596,8 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionDidNotInitLocalVariableList(self):
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -613,8 +613,8 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionWithReadyNotReadyForLocal(self):
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -634,8 +634,8 @@ class SessionManagerTest(test.TestCase):
 
   def testPrepareSessionWithInsufficientReadyForLocalInitCheck(self):
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
-      w = variables.Variable(
+      v = variables.VariableV1(1, name="v")
+      w = variables.VariableV1(
           v,
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -656,7 +656,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
 
   def testPrepareSessionSucceeds(self):
     with ops.Graph().as_default():
-      v = variables.Variable([1.0, 2.0, 3.0], name="v")
+      v = variables.VariableV1([1.0, 2.0, 3.0], name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.assert_variables_initialized())
       sess = sm.prepare_session(
@@ -666,7 +666,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
   def testPrepareSessionSucceedsWithInitFeedDict(self):
     with ops.Graph().as_default():
       p = array_ops.placeholder(dtypes.float32, shape=(3,))
-      v = variables.Variable(p, name="v")
+      v = variables.VariableV1(p, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.assert_variables_initialized())
       sess = sm.prepare_session(
@@ -677,7 +677,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
 
   def testPrepareSessionSucceedsWithInitFn(self):
     with ops.Graph().as_default():
-      v = variables.Variable([125], name="v")
+      v = variables.VariableV1([125], name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.assert_variables_initialized())
       sess = sm.prepare_session(
@@ -695,7 +695,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
     gfile.MakeDirs(checkpoint_dir)
 
     with ops.Graph().as_default():
-      v = variables.Variable([1.0, 2.0, 3.0], name="v")
+      v = variables.VariableV1([1.0, 2.0, 3.0], name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.assert_variables_initialized())
       saver = saver_lib.Saver({"v": v})
@@ -713,7 +713,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
       # Renames the checkpoint directory.
       os.rename(checkpoint_dir, checkpoint_dir2)
       gfile.MakeDirs(checkpoint_dir)
-      v = variables.Variable([6.0, 7.0, 8.0], name="v")
+      v = variables.VariableV1([6.0, 7.0, 8.0], name="v")
       with self.cached_session():
         self.assertEqual(False, variables.is_variable_initialized(v).eval())
       session_manager.SessionManager(
@@ -755,7 +755,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
     gfile.MakeDirs(checkpoint_dir)
 
     with ops.Graph().as_default():
-      v = variables.Variable(1, name="v")
+      v = variables.VariableV1(1, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.assert_variables_initialized())
       saver = saver_lib.Saver({"v": v})
@@ -768,7 +768,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
                  os.path.join(checkpoint_dir, "recover_session_checkpoint"))
     # Create a new Graph and SessionManager and recover.
     with ops.Graph().as_default():
-      v = variables.Variable(2, name="v")
+      v = variables.VariableV1(2, name="v")
       with self.cached_session():
         self.assertEqual(False, variables.is_variable_initialized(v).eval())
       sm2 = session_manager.SessionManager(
@@ -785,7 +785,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
 
   def testWaitForSessionReturnsNoneAfterTimeout(self):
     with ops.Graph().as_default():
-      variables.Variable(1, name="v")
+      variables.VariableV1(1, name="v")
       sm = session_manager.SessionManager(
           ready_op=variables.assert_variables_initialized(),
           recovery_wait_secs=1)
diff --git a/tensorflow/python/training/supervisor_test.py b/tensorflow/python/training/supervisor_test.py
index caf6eba3e0..7cd99d8680 100644
--- a/tensorflow/python/training/supervisor_test.py
+++ b/tensorflow/python/training/supervisor_test.py
@@ -423,7 +423,7 @@ class SupervisorTest(test.TestCase):
   def testLogdirButExplicitlyNoSummaryWriter(self):
     logdir = self._test_dir("explicit_no_summary_writer")
     with ops.Graph().as_default():
-      variables.Variable([1.0], name="foo")
+      variables.VariableV1([1.0], name="foo")
       summary.scalar("c1", constant_op.constant(1))
       summary.scalar("c2", constant_op.constant(2))
       summary.scalar("c3", constant_op.constant(3))
@@ -491,7 +491,7 @@ class SupervisorTest(test.TestCase):
 
   def testNoLogdirSucceeds(self):
     with ops.Graph().as_default():
-      variables.Variable([1.0, 2.0, 3.0])
+      variables.VariableV1([1.0, 2.0, 3.0])
       sv = supervisor.Supervisor(logdir="", summary_op=None)
       sess = sv.prepare_or_wait_for_session("")
       sess.close()
@@ -499,7 +499,7 @@ class SupervisorTest(test.TestCase):
 
   def testUseSessionManager(self):
     with ops.Graph().as_default():
-      variables.Variable([1.0, 2.0, 3.0])
+      variables.VariableV1([1.0, 2.0, 3.0])
       sm = session_manager_lib.SessionManager()
       # Pass in session_manager. The additional init_op is ignored.
       sv = supervisor.Supervisor(logdir="", session_manager=sm)
@@ -508,7 +508,7 @@ class SupervisorTest(test.TestCase):
   def testInitOp(self):
     logdir = self._test_dir("default_init_op")
     with ops.Graph().as_default():
-      v = variables.Variable([1.0, 2.0, 3.0])
+      v = variables.VariableV1([1.0, 2.0, 3.0])
       sv = supervisor.Supervisor(logdir=logdir)
       sess = sv.prepare_or_wait_for_session("")
       self.assertAllClose([1.0, 2.0, 3.0], sess.run(v))
@@ -517,7 +517,7 @@ class SupervisorTest(test.TestCase):
   def testInitFn(self):
     logdir = self._test_dir("default_init_op")
     with ops.Graph().as_default():
-      v = variables.Variable([1.0, 2.0, 3.0])
+      v = variables.VariableV1([1.0, 2.0, 3.0])
 
       def _init_fn(sess):
         sess.run(v.initializer)
@@ -531,7 +531,7 @@ class SupervisorTest(test.TestCase):
     logdir = self._test_dir("feed_dict_init_op")
     with ops.Graph().as_default():
       p = array_ops.placeholder(dtypes.float32, shape=(3,))
-      v = variables.Variable(p, name="v")
+      v = variables.VariableV1(p, name="v")
       sv = supervisor.Supervisor(
           logdir=logdir,
           init_op=variables.global_variables_initializer(),
@@ -550,10 +550,10 @@ class SupervisorTest(test.TestCase):
       g = ops.Graph()
       with g.as_default():
         with ops.device("/job:local"):
-          v = variables.Variable(
+          v = variables.VariableV1(
               1, name="default_ready_for_local_init_op_v_" + str(uid))
           vadd = v.assign_add(1)
-          w = variables.Variable(
+          w = variables.VariableV1(
               v,
               trainable=False,
               collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -590,7 +590,7 @@ class SupervisorTest(test.TestCase):
 
     # Create a checkpoint.
     with ops.Graph().as_default():
-      v = variables.Variable(
+      v = variables.VariableV1(
           10.0, name="ready_for_local_init_op_restore_v_" + str(uid))
       summary.scalar("ready_for_local_init_op_restore_v_" + str(uid), v)
       sv = supervisor.Supervisor(logdir=logdir)
@@ -607,10 +607,10 @@ class SupervisorTest(test.TestCase):
       g = ops.Graph()
       with g.as_default():
         with ops.device("/job:local"):
-          v = variables.Variable(
+          v = variables.VariableV1(
               1.0, name="ready_for_local_init_op_restore_v_" + str(uid))
           vadd = v.assign_add(1)
-          w = variables.Variable(
+          w = variables.VariableV1(
               v,
               trainable=False,
               collections=[ops.GraphKeys.LOCAL_VARIABLES],
@@ -642,13 +642,13 @@ class SupervisorTest(test.TestCase):
     logdir = self._test_dir("default_local_init_op")
     with ops.Graph().as_default():
       # A local variable.
-      v = variables.Variable(
+      v = variables.VariableV1(
           [1.0, 2.0, 3.0],
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES])
 
       # An entity which is initialized through a TABLE_INITIALIZER.
-      w = variables.Variable([4, 5, 6], trainable=False, collections=[])
+      w = variables.VariableV1([4, 5, 6], trainable=False, collections=[])
       ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, w.initializer)
 
       # This shouldn't add a variable to the VARIABLES collection responsible
@@ -668,7 +668,7 @@ class SupervisorTest(test.TestCase):
     with ops.Graph().as_default():
       with ops.device("/job:localhost"):
         # A local variable.
-        v = variables.Variable(
+        v = variables.VariableV1(
             [1.0, 2.0, 3.0],
             trainable=False,
             collections=[ops.GraphKeys.LOCAL_VARIABLES])
@@ -687,8 +687,8 @@ class SupervisorTest(test.TestCase):
     server = server_lib.Server.create_local_server()
     logdir = self._test_dir("default_init_op_fails")
     with ops.Graph().as_default():
-      v = variables.Variable([1.0, 2.0, 3.0], name="v")
-      variables.Variable([4.0, 5.0, 6.0], name="w")
+      v = variables.VariableV1([1.0, 2.0, 3.0], name="v")
+      variables.VariableV1([4.0, 5.0, 6.0], name="w")
       # w will not be initialized.
       sv = supervisor.Supervisor(logdir=logdir, init_op=v.initializer)
       with self.assertRaisesRegexp(RuntimeError,
@@ -699,11 +699,11 @@ class SupervisorTest(test.TestCase):
     server = server_lib.Server.create_local_server()
     logdir = self._test_dir("default_init_op_fails_for_local_variable")
     with ops.Graph().as_default():
-      v = variables.Variable(
+      v = variables.VariableV1(
           [1.0, 2.0, 3.0],
           name="v",
           collections=[ops.GraphKeys.LOCAL_VARIABLES])
-      variables.Variable(
+      variables.VariableV1(
           [1.0, 2.0, 3.0],
           name="w",
           collections=[ops.GraphKeys.LOCAL_VARIABLES])
@@ -716,17 +716,17 @@ class SupervisorTest(test.TestCase):
   def testSetupFail(self):
     logdir = self._test_dir("setup_fail")
     with ops.Graph().as_default():
-      variables.Variable([1.0, 2.0, 3.0], name="v")
+      variables.VariableV1([1.0, 2.0, 3.0], name="v")
       with self.assertRaisesRegexp(ValueError, "must have their device set"):
         supervisor.Supervisor(logdir=logdir, is_chief=False)
     with ops.Graph().as_default(), ops.device("/job:ps"):
-      variables.Variable([1.0, 2.0, 3.0], name="v")
+      variables.VariableV1([1.0, 2.0, 3.0], name="v")
       supervisor.Supervisor(logdir=logdir, is_chief=False)
 
   def testDefaultGlobalStep(self):
     logdir = self._test_dir("default_global_step")
     with ops.Graph().as_default():
-      variables.Variable(287, name="global_step")
+      variables.VariableV1(287, name="global_step")
       sv = supervisor.Supervisor(logdir=logdir)
       sess = sv.prepare_or_wait_for_session("")
       self.assertEquals(287, sess.run(sv.global_step))
@@ -735,7 +735,7 @@ class SupervisorTest(test.TestCase):
   def testRestoreFromMetaGraph(self):
     logdir = self._test_dir("restore_from_meta_graph")
     with ops.Graph().as_default():
-      variables.Variable(1, name="v0")
+      variables.VariableV1(1, name="v0")
       sv = supervisor.Supervisor(logdir=logdir)
       sess = sv.prepare_or_wait_for_session("")
       filename = sv.saver.save(sess, sv.save_path)
@@ -757,7 +757,7 @@ class SupervisorTest(test.TestCase):
     logdir = self._test_dir("standard_services_without_global_step")
     # Create a checkpoint.
     with ops.Graph().as_default():
-      v = variables.Variable([1.0], name="foo")
+      v = variables.VariableV1([1.0], name="foo")
       summary.scalar("v", v[0])
       sv = supervisor.Supervisor(logdir=logdir)
       meta_graph_def = meta_graph.create_meta_graph_def(
@@ -796,7 +796,7 @@ class SupervisorTest(test.TestCase):
     self.assertRaises(StopIteration, lambda: next(rr))
     # There should be a checkpoint file with the variable "foo"
     with ops.Graph().as_default(), self.cached_session() as sess:
-      v = variables.Variable([10.10], name="foo")
+      v = variables.VariableV1([10.10], name="foo")
       sav = saver_lib.Saver([v])
       sav.restore(sess, save_path)
       self.assertEqual(1.0, v.eval()[0])
@@ -807,7 +807,7 @@ class SupervisorTest(test.TestCase):
     logdir = self._test_dir("standard_services_with_global_step")
     # Create a checkpoint.
     with ops.Graph().as_default():
-      v = variables.Variable([123], name="global_step")
+      v = variables.VariableV1([123], name="global_step")
       sv = supervisor.Supervisor(logdir=logdir)
       meta_graph_def = meta_graph.create_meta_graph_def(
           saver_def=sv.saver.saver_def)
@@ -860,7 +860,7 @@ class SupervisorTest(test.TestCase):
     self.assertRaises(StopIteration, lambda: next(rr))
     # There should be a checkpoint file with the variable "foo"
     with ops.Graph().as_default(), self.cached_session() as sess:
-      v = variables.Variable([-12], name="global_step")
+      v = variables.VariableV1([-12], name="global_step")
       sav = saver_lib.Saver([v])
       sav.restore(sess, save_path)
       self.assertEqual(123, v.eval()[0])
diff --git a/tensorflow/python/training/sync_replicas_optimizer_test.py b/tensorflow/python/training/sync_replicas_optimizer_test.py
index fff17402e2..1ef8756ef6 100644
--- a/tensorflow/python/training/sync_replicas_optimizer_test.py
+++ b/tensorflow/python/training/sync_replicas_optimizer_test.py
@@ -40,11 +40,12 @@ def get_workers(num_workers, replicas_to_aggregate, workers):
     is_chief = (worker_id == 0)
     with graph.as_default():
       with ops.device("/job:ps/task:0"):
-        global_step = variables.Variable(0, name="global_step", trainable=False)
-        var_0 = variables.Variable(0.0, name="v0")
+        global_step = variables.VariableV1(
+            0, name="global_step", trainable=False)
+        var_0 = variables.VariableV1(0.0, name="v0")
       with ops.device("/job:ps/task:1"):
-        var_1 = variables.Variable(1.0, name="v1")
-        var_sparse = variables.Variable([[3.0], [4.0]], name="v_sparse")
+        var_1 = variables.VariableV1(1.0, name="v1")
+        var_sparse = variables.VariableV1([[3.0], [4.0]], name="v_sparse")
 
       with ops.device("/job:worker/task:" + str(worker_id)):
         grads_0 = constant_op.constant(0.1 + worker_id * 0.2)
@@ -272,8 +273,8 @@ class SyncReplicasOptimizerHookTest(test.TestCase):
         replicas_to_aggregate=1,
         total_num_replicas=1)
     hook = opt.make_session_run_hook(True)
-    v = variables.Variable([0.])
-    global_step = variables.Variable(0, name="global_step", trainable=False)
+    v = variables.VariableV1([0.])
+    global_step = variables.VariableV1(0, name="global_step", trainable=False)
     opt.minimize(v, global_step=global_step)
     hook.begin()
 
@@ -282,8 +283,8 @@ class SyncReplicasOptimizerHookTest(test.TestCase):
         opt=adam.AdamOptimizer(0.01),
         replicas_to_aggregate=1,
         total_num_replicas=1)
-    v = variables.Variable([0.], name="fetch_variable_test")
-    global_step = variables.Variable(0, name="global_step", trainable=False)
+    v = variables.VariableV1([0.], name="fetch_variable_test")
+    global_step = variables.VariableV1(0, name="global_step", trainable=False)
     opt.minimize(v, global_step=global_step)
     opt_variables = opt.variables()
     beta1_power, beta2_power = opt._opt._get_beta_accumulators()
diff --git a/tensorflow/python/training/training_ops_test.py b/tensorflow/python/training/training_ops_test.py
index d131a11067..f410ceaaff 100644
--- a/tensorflow/python/training/training_ops_test.py
+++ b/tensorflow/python/training/training_ops_test.py
@@ -51,7 +51,7 @@ class TrainingOpsTest(TensorFlowTestCase):
   def _testTypes(self, x, alpha, delta, use_gpu=None):
     self.setUp()
     with self.test_session(use_gpu=use_gpu):
-      var = variables.Variable(x)
+      var = variables.VariableV1(x)
       variables.global_variables_initializer().run()
       self.assertAllCloseAccordingToType(x, var.eval())
       apply_sgd = training_ops.apply_gradient_descent(var, alpha, delta)
@@ -70,8 +70,8 @@ class TrainingOpsTest(TensorFlowTestCase):
   def _testTypesForAdagrad(self, x, y, lr, grad, use_gpu=None):
     self.setUp()
     with self.test_session(use_gpu=use_gpu):
-      var = variables.Variable(x)
-      accum = variables.Variable(y)
+      var = variables.VariableV1(x)
+      accum = variables.VariableV1(y)
       variables.global_variables_initializer().run()
 
       self.assertAllCloseAccordingToType(x, var.eval())
@@ -94,9 +94,9 @@ class TrainingOpsTest(TensorFlowTestCase):
                         lr_power=-0.5):
     self.setUp()
     with self.test_session(use_gpu=use_gpu):
-      var = variables.Variable(x)
-      accum = variables.Variable(y)
-      linear = variables.Variable(z)
+      var = variables.VariableV1(x)
+      accum = variables.VariableV1(y)
+      linear = variables.VariableV1(z)
       variables.global_variables_initializer().run()
 
       self.assertAllCloseAccordingToType(x, var.eval())
@@ -148,8 +148,8 @@ class TrainingOpsTest(TensorFlowTestCase):
   def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices):
     self.setUp()
     with self.test_session(use_gpu=False):
-      var = variables.Variable(x)
-      accum = variables.Variable(y)
+      var = variables.VariableV1(x)
+      accum = variables.VariableV1(y)
       variables.global_variables_initializer().run()
 
       self.assertAllCloseAccordingToType(x, var.eval())
@@ -178,9 +178,9 @@ class TrainingOpsTest(TensorFlowTestCase):
                               lr_power=-0.5):
     self.setUp()
     with self.test_session(use_gpu=False):
-      var = variables.Variable(x)
-      accum = variables.Variable(y)
-      linear = variables.Variable(z)
+      var = variables.VariableV1(x)
+      accum = variables.VariableV1(y)
+      linear = variables.VariableV1(z)
       variables.global_variables_initializer().run()
 
       self.assertAllCloseAccordingToType(x, var.eval())
@@ -257,9 +257,9 @@ class TrainingOpsTest(TensorFlowTestCase):
   def _testTypesForAdam(self, var, m, v, grad, use_gpu):
     self.setUp()
     with self.test_session(use_gpu=use_gpu):
-      var_t = variables.Variable(var)
-      m_t = variables.Variable(m)
-      v_t = variables.Variable(v)
+      var_t = variables.VariableV1(var)
+      m_t = variables.VariableV1(m)
+      v_t = variables.VariableV1(v)
 
       t = 1
       beta1 = np.array(0.9, dtype=var.dtype)
@@ -270,8 +270,8 @@ class TrainingOpsTest(TensorFlowTestCase):
       epsilon = np.array(1e-8, dtype=var.dtype)
       beta1_t = constant_op.constant(beta1, self._toType(var.dtype), [])
       beta2_t = constant_op.constant(beta2, self._toType(var.dtype), [])
-      beta1_power_t = variables.Variable(beta1_power)
-      beta2_power_t = variables.Variable(beta2_power)
+      beta1_power_t = variables.VariableV1(beta1_power)
+      beta2_power_t = variables.VariableV1(beta2_power)
       lr_t = constant_op.constant(lr, self._toType(var.dtype), [])
       epsilon_t = constant_op.constant(epsilon, self._toType(var.dtype), [])
       variables.global_variables_initializer().run()
diff --git a/tensorflow/python/training/training_util_test.py b/tensorflow/python/training/training_util_test.py
index 6cc177e0e8..ba64e785ac 100644
--- a/tensorflow/python/training/training_util_test.py
+++ b/tensorflow/python/training/training_util_test.py
@@ -49,7 +49,7 @@ class GlobalStepTest(test.TestCase):
   def test_invalid_shape(self):
     with ops.Graph().as_default() as g:
       self.assertIsNone(training_util.get_global_step())
-      variables.Variable(
+      variables.VariableV1(
           [0],
           trainable=False,
           dtype=dtypes.int32,
@@ -73,7 +73,7 @@ class GlobalStepTest(test.TestCase):
   def test_get_global_step(self):
     with ops.Graph().as_default() as g:
       self.assertIsNone(training_util.get_global_step())
-      variables.Variable(
+      variables.VariableV1(
           0,
           trainable=False,
           dtype=dtypes.int32,
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt
index 05698b03ee..af7fc9d4ef 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt
@@ -1,5 +1,6 @@
 path: "tensorflow.Variable"
 tf_class {
+  is_instance: "<class \'tensorflow.python.ops.variables.VariableV1\'>"
   is_instance: "<class \'tensorflow.python.ops.variables.Variable\'>"
   is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 503e145a91..509ceff9df 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -2220,6 +2220,10 @@ tf_module {
     name: "variable_axis_size_partitioner"
     argspec: "args=[\'max_shard_bytes\', \'axis\', \'bytes_per_string_element\', \'max_shards\'], varargs=None, keywords=None, defaults=[\'0\', \'16\', \'None\'], "
   }
+  member_method {
+    name: "variable_creator_scope"
+    argspec: "args=[\'variable_creator\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "variable_op_scope"
     argspec: "args=[\'values\', \'name_or_scope\', \'default_name\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt
deleted file mode 100644
index c13eb7b8bb..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt
+++ /dev/null
@@ -1,105 +0,0 @@
-path: "tensorflow.VariableScope"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.variable_scope.VariableScope\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "caching_device"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "constraint"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "custom_getter"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "initializer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "original_name_scope"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "partitioner"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "reuse"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "use_resource"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'reuse\', \'name\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'name_scope\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'None\', \'None\', \'None\', \'\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_collection"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_variable"
-    argspec: "args=[\'self\', \'var_store\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'reuse\', \'trainable\', \'collections\', \'caching_device\', \'partitioner\', \'validate_shape\', \'use_resource\', \'custom_getter\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "global_variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "local_variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reuse_variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_caching_device"
-    argspec: "args=[\'self\', \'caching_device\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_custom_getter"
-    argspec: "args=[\'self\', \'custom_getter\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_dtype"
-    argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_initializer"
-    argspec: "args=[\'self\', \'initializer\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_partitioner"
-    argspec: "args=[\'self\', \'partitioner\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_regularizer"
-    argspec: "args=[\'self\', \'regularizer\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_use_resource"
-    argspec: "args=[\'self\', \'use_resource\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "trainable_variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt
deleted file mode 100644
index ac3ccd468b..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt
+++ /dev/null
@@ -1,17 +0,0 @@
-path: "tensorflow.Variable.SaveSliceInfo"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.variables.SaveSliceInfo\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "spec"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'full_name\', \'full_shape\', \'var_offset\', \'var_shape\', \'save_slice_info_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "to_proto"
-    argspec: "args=[\'self\', \'export_scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
deleted file mode 100644
index 05698b03ee..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
+++ /dev/null
@@ -1,130 +0,0 @@
-path: "tensorflow.Variable"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.variables.Variable\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "SaveSliceInfo"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "constraint"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "device"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "graph"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "initial_value"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "initializer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "op"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'initial_value\', \'trainable\', \'collections\', \'validate_shape\', \'caching_device\', \'name\', \'variable_def\', \'dtype\', \'expected_shape\', \'import_scope\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "assign"
-    argspec: "args=[\'self\', \'value\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], "
-  }
-  member_method {
-    name: "assign_add"
-    argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], "
-  }
-  member_method {
-    name: "assign_sub"
-    argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], "
-  }
-  member_method {
-    name: "count_up_to"
-    argspec: "args=[\'self\', \'limit\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "eval"
-    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "from_proto"
-    argspec: "args=[\'variable_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "get_shape"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "initialized_value"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "load"
-    argspec: "args=[\'self\', \'value\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "read_value"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "scatter_add"
-    argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "scatter_nd_add"
-    argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "scatter_nd_sub"
-    argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "scatter_nd_update"
-    argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "scatter_sub"
-    argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "scatter_update"
-    argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "set_shape"
-    argspec: "args=[\'self\', \'shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "to_proto"
-    argspec: "args=[\'self\', \'export_scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "value"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
index d499c67d89..e3c63fe737 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
@@ -48,10 +48,6 @@ tf_module {
     name: "zeros"
     mtype: "<type \'type\'>"
   }
-  member_method {
-    name: "global_variables"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "he_normal"
     argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -68,12 +64,4 @@ tf_module {
     name: "lecun_uniform"
     argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "local_variables"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], "
-  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 96212f5528..d2dc8bc85f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1,9 +1,5 @@
 path: "tensorflow"
 tf_module {
-  member {
-    name: "AUTO_REUSE"
-    mtype: "<enum \'_ReuseMode\'>"
-  }
   member {
     name: "AggregationMethod"
     mtype: "<type \'type\'>"
@@ -232,18 +228,10 @@ tf_module {
     name: "VarLenFeature"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "Variable"
-    mtype: "<class \'tensorflow.python.ops.variables.VariableMetaclass\'>"
-  }
   member {
     name: "VariableAggregation"
     mtype: "<class \'enum.EnumMeta\'>"
   }
-  member {
-    name: "VariableScope"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "VariableSynchronization"
     mtype: "<class \'enum.EnumMeta\'>"
@@ -552,10 +540,6 @@ tf_module {
     name: "user_ops"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "variable_scope"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "variance_scaling_initializer"
     mtype: "<type \'type\'>"
@@ -616,10 +600,6 @@ tf_module {
     name: "add_to_collections"
     argspec: "args=[\'names\', \'value\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "all_variables"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "angle"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -732,10 +712,6 @@ tf_module {
     name: "assert_type"
     argspec: "args=[\'tensor\', \'tf_type\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
-  member_method {
-    name: "assert_variables_initialized"
-    argspec: "args=[\'var_list\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "atan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1136,10 +1112,6 @@ tf_module {
     name: "get_default_session"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_local_variable"
-    argspec: "args=[\'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'collections\', \'caching_device\', \'partitioner\', \'validate_shape\', \'use_resource\', \'custom_getter\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
   member_method {
     name: "get_seed"
     argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
@@ -1152,26 +1124,10 @@ tf_module {
     name: "get_session_tensor"
     argspec: "args=[\'handle\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "get_variable"
-    argspec: "args=[\'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'collections\', \'caching_device\', \'partitioner\', \'validate_shape\', \'use_resource\', \'custom_getter\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "get_variable_scope"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "global_norm"
     argspec: "args=[\'t_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "global_variables"
-    argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "global_variables_initializer"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "gradients"
     argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\'], "
@@ -1248,18 +1204,6 @@ tf_module {
     name: "initialize_all_tables"
     argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'init_all_tables\'], "
   }
-  member_method {
-    name: "initialize_all_variables"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "initialize_local_variables"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "initialize_variables"
-    argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], "
-  }
   member_method {
     name: "invert_permutation"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1288,10 +1232,6 @@ tf_module {
     name: "is_strictly_increasing"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "is_variable_initialized"
-    argspec: "args=[\'variable\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "lbeta"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1328,14 +1268,6 @@ tf_module {
     name: "load_op_library"
     argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "local_variables"
-    argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "local_variables_initializer"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "log"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1448,14 +1380,6 @@ tf_module {
     name: "mod"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "model_variables"
-    argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "moving_average_variables"
-    argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "multinomial"
     argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
@@ -1656,10 +1580,6 @@ tf_module {
     name: "register_tensor_conversion_function"
     argspec: "args=[\'base_type\', \'conversion_func\', \'priority\'], varargs=None, keywords=None, defaults=[\'100\'], "
   }
-  member_method {
-    name: "report_uninitialized_variables"
-    argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'report_uninitialized_variables\'], "
-  }
   member_method {
     name: "required_space_to_batch_paddings"
     argspec: "args=[\'input_shape\', \'block_shape\', \'base_paddings\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
@@ -2068,10 +1988,6 @@ tf_module {
     name: "trace"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "trainable_variables"
-    argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "transpose"
     argspec: "args=[\'a\', \'perm\', \'name\', \'conjugate\'], varargs=None, keywords=None, defaults=[\'None\', \'transpose\', \'False\'], "
@@ -2140,14 +2056,6 @@ tf_module {
     name: "variable_axis_size_partitioner"
     argspec: "args=[\'max_shard_bytes\', \'axis\', \'bytes_per_string_element\', \'max_shards\'], varargs=None, keywords=None, defaults=[\'0\', \'16\', \'None\'], "
   }
-  member_method {
-    name: "variable_op_scope"
-    argspec: "args=[\'values\', \'name_or_scope\', \'default_name\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "variables_initializer"
-    argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], "
-  }
   member_method {
     name: "verify_tensor_all_finite"
     argspec: "args=[\'t\', \'msg\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt
deleted file mode 100644
index e62dec93e6..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt
+++ /dev/null
@@ -1,9 +0,0 @@
-path: "tensorflow.variable_scope"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.variable_scope.variable_scope\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\', \'auxiliary_name_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
-  }
-}
-- 
GitLab


From f172c52ac74ae6db228119b90785add81648372e Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Thu, 27 Sep 2018 12:57:24 -0700
Subject: [PATCH 0810/1357] Fixed the broken unit tests

---
 configure.py                    |  2 +-
 tensorflow/workspace.bzl        | 16 ++++++++--------
 third_party/mkl/build_defs.bzl  |  2 +-
 third_party/ngraph/ngraph.BUILD |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/configure.py b/configure.py
index cc6a654a61..f71caa1994 100644
--- a/configure.py
+++ b/configure.py
@@ -1631,7 +1631,7 @@ def main():
     config_info_line('monolithic', 'Config for mostly static monolithic build.')
     config_info_line('gdr', 'Build with GDR support.')
     config_info_line('verbs', 'Build with libverbs support.')
-    config_info_line('ngraph', 'Build with Intel ngraph support.')
+    config_info_line('ngraph', 'Build with Intel nGraph support.')
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index e5a0a0b2b7..6966783efd 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -862,11 +862,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "ngraph",
         urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.8.0.tar.gz",
-            "https://github.com/NervanaSystems/ngraph/archive/v0.8.0.tar.gz",
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz",
+            "https://github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz",
         ],
-        sha256 = "a8cf3ef2d0e6d31b54eb33f6a9e795f562195ce5c2a857e729ca9c35241cc45c",
-        strip_prefix = "ngraph-0.8.0",
+        sha256 = "bf9dcc88e5c66021e3aac80491a231711211540d613bf9b6bd28db3f5bb86b62",
+        strip_prefix = "ngraph-0.8.1",
         build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
     )
 
@@ -884,11 +884,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "ngraph_tf",
         urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.6.0.tar.gz",
-            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.6.0.tar.gz",
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz",
+            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz",
         ],
-        sha256 = "1f49391c02bef24872e9f85591e60e0e7eef12a337db71390444118049fe451f",
-        strip_prefix = "ngraph-tf-0.6.0",
+        sha256 = "402f84c748c113780a60f35f39aab118435285543aee4900d712b76fbf8a21ee",
+        strip_prefix = "ngraph-tf-0.6.1",
         build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
     )
 
diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl
index bb798e715a..10c2d90c84 100644
--- a/third_party/mkl/build_defs.bzl
+++ b/third_party/mkl/build_defs.bzl
@@ -92,7 +92,7 @@ def if_enable_mkl(if_true, if_false = []):
       A select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        "//third_party/mkl:enable_mkl": if_true,
+        str(Label("//third_party/mkl:enable_mkl")): if_true,
         "//conditions:default": if_false,
     })
 
diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index 71b2187011..6602a480af 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -110,7 +110,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.8.0\\"',
+        '-D NGRAPH_VERSION=\\"0.8.1\\"',
         "-D NGRAPH_DEX_ONLY",
     ],
     visibility = ["//visibility:public"],
@@ -144,7 +144,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.8.0\\"',
+        '-D NGRAPH_VERSION=\\"0.8.1\\"',
     ],
     visibility = ["//visibility:public"],
     alwayslink = 1,
-- 
GitLab


From 5220e565b7cc32a5f757896c76c7d57c33bcd323 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 27 Sep 2018 14:01:27 -0700
Subject: [PATCH 0811/1357] Don't use tensorflow::Edge after freeing it

Even with this bug we were accidentally doing the right thing (so the test case
doesn't actually fail without the fix): deleting an Edge sets its input and
output indices to kControlSlot-1 so we'd normally expect to fail when there is a
control edge out of the TF cluster (because a control edge would be recognized
as a data edge).  But AddEdge(x, -1, y, -1) seems to do the right thing for both
control and data edges.

PiperOrigin-RevId: 214831204
---
 tensorflow/compiler/jit/BUILD                 |   2 +
 tensorflow/compiler/jit/build_xla_ops_pass.cc |  11 +-
 .../compiler/jit/build_xla_ops_pass_test.cc   | 112 ++++++++++++++++++
 3 files changed, 116 insertions(+), 9 deletions(-)
 create mode 100644 tensorflow/compiler/jit/build_xla_ops_pass_test.cc

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 4e184729ef..5bf4af1014 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -478,6 +478,7 @@ tf_cc_test(
     name = "compilation_passes_test",
     size = "small",
     srcs = [
+        "build_xla_ops_pass_test.cc",
         "encapsulate_subgraphs_pass_test.cc",
         "encapsulate_xla_computations_pass_test.cc",
         "mark_for_compilation_pass_test.cc",
@@ -486,6 +487,7 @@ tf_cc_test(
     deps = [
         ":common",
         ":compilation_passes",
+        ":node_matchers",
         ":xla_cluster_util",
         ":xla_gpu_device",
         "//tensorflow/cc:cc_ops",
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
index 13a518d0e8..9e3fd93cda 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc
@@ -112,16 +112,9 @@ static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
   std::vector<const Edge*> out_edges(old_node->out_edges().begin(),
                                      old_node->out_edges().end());
   for (const Edge* edge : out_edges) {
-    Node* dst = edge->dst();
-    int src_output = edge->src_output();
-    int dst_input = edge->dst_input();
+    // TODO(sanjoy): This does not update NodeDef inputs.
+    g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input());
     g->RemoveEdge(edge);
-
-    if (edge->IsControlEdge()) {
-      g->AddControlEdge(new_node, dst);
-    } else {
-      g->AddEdge(new_node, src_output, dst, dst_input);
-    }
   }
 }
 
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
new file mode 100644
index 0000000000..b7cb4506b9
--- /dev/null
+++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
@@ -0,0 +1,112 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/build_xla_ops_pass.h"
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/resource_variable_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/jit/defs.h"
+#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/jit/node_matchers.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+using ::tensorflow::testing::FindNodeByName;
+using ::tensorflow::testing::matchers::CtrlDeps;
+using ::tensorflow::testing::matchers::NodeWith;
+using ::tensorflow::testing::matchers::Op;
+
+Status BuildXlaOps(const Scope& s, std::unique_ptr<Graph>* result) {
+  auto graph = absl::make_unique<Graph>(OpRegistry::Global());
+  TF_RETURN_IF_ERROR(s.ToGraph(graph.get()));
+
+  // Assign all nodes to the CPU device.
+  static const char* kCpuDevice = "/job:localhost/replica:0/task:0/cpu:0";
+  for (Node* n : graph->nodes()) {
+    if (n->assigned_device_name().empty()) {
+      n->set_assigned_device_name(kCpuDevice);
+    }
+  }
+
+  GraphOptimizationPassOptions opt_options;
+  opt_options.graph = &graph;
+  BuildXlaOpsPass pass;
+  TF_RETURN_IF_ERROR(pass.Run(opt_options));
+  *result = std::move(graph);
+  return Status::OK();
+}
+
+Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name,
+                             const string& node_name, Node** result) {
+  NodeDef call_node;
+  call_node.set_name(node_name);
+  call_node.set_op(callee_name);
+  AddNodeAttr(kXlaCompiledKernelAttr, true, &call_node);
+  AddNodeAttr(kXlaNumConstantArgsAttr, 0, &call_node);
+  AddNodeAttr(kXlaNumResourceArgsAttr, 0, &call_node);
+  Status s;
+  *result = graph->AddNode(call_node, &s);
+  return s;
+}
+
+Node* MakeWrite(const Scope& scope, const string& id) {
+  Output var_handle =
+      ops::VarHandleOp(scope.WithOpName("Var" + id), DT_FLOAT, TensorShape({}));
+  Output value_to_write =
+      ops::Const(scope.WithOpName("ValueToAssign" + id), 1.0f);
+  ops::AssignVariableOp assign_op(scope.WithOpName("Assignee" + id), var_handle,
+                                  value_to_write);
+  return assign_op.operation.node();
+}
+
+FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) {
+  FunctionDefLibrary flib_def;
+  FunctionDef func = FunctionDefHelper::Create(
+      /*function_name=*/name, /*in_def=*/{}, /*out_def=*/{"out: float"},
+      /*attr_def*/
+      {}, /*node_def=*/{FunctionDefHelper::Const("one", 1.0f)},
+      /*ret_def=*/{{"out", "out:output:0"}});
+  *flib_def.add_function() = std::move(func);
+  return flib_def;
+}
+
+TEST(BuildXlaOps, ControlDepsPreserved) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  FunctionDefLibrary flib_def =
+      CreateFunctionDefLibWithConstFunction("cluster_0");
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+  Node* call;
+  TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call));
+  Node* write_op = MakeWrite(root, "write");
+  root.graph()->AddControlEdge(call, write_op);
+
+  std::unique_ptr<Graph> graph;
+  TF_ASSERT_OK(BuildXlaOps(root, &graph));
+
+  Node* write_op_new = FindNodeByName(graph.get(), write_op->name());
+  ASSERT_NE(write_op_new, nullptr);
+  EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun")))));
+}
+
+}  // namespace
+}  // namespace tensorflow
-- 
GitLab


From 2fb9377a5ec610b8eff853fd1d2d53eabf711eda Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Thu, 27 Sep 2018 14:03:52 -0700
Subject: [PATCH 0812/1357] Enable worker heartbeat polling for all available
 workers.

PiperOrigin-RevId: 214831772
---
 .../contrib/tpu/python/tpu/session_support.py | 52 ++++++++++++-------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py
index 3e91e2df32..24b9bd136b 100644
--- a/tensorflow/contrib/tpu/python/tpu/session_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/session_support.py
@@ -41,6 +41,25 @@ class CoordinatorShutdownException(Exception):
   pass
 
 
+def _make_heartbeat_op(session, device, request_ph):
+  """Return a heartbeat op or None if heartbeats are not supported by device."""
+  try:
+    with ops.device(device):
+      heartbeat_op = tpu_ops.worker_heartbeat(request_ph)
+      request = event_pb2.WorkerHeartbeatRequest()
+      options = config_pb2.RunOptions(timeout_in_ms=5000)
+      session.run(
+          heartbeat_op,
+          feed_dict={request_ph: request.SerializeToString()},
+          options=options)
+      return heartbeat_op
+  except errors.InvalidArgumentError as _:
+    return None
+  except errors.DeadlineExceededError as _:
+    logging.warning('Timeout connecting to %s when testing heartbeat', device)
+    return None
+
+
 class WorkerHeartbeatManager(object):
   """Manages the status/heartbeat monitor for a set of workers."""
 
@@ -72,30 +91,27 @@ class WorkerHeartbeatManager(object):
         name='worker_heartbeat_request', dtype=dtypes.string)
 
     heartbeat_ops = []
+    kept_devices = []
     for device in devices:
-      with ops.device(device):
-        heartbeat_ops.append(tpu_ops.worker_heartbeat(request_placeholder))
+      heartbeat_op = _make_heartbeat_op(session, device, request_placeholder)
+      if heartbeat_op is not None:
+        kept_devices.append(device)
+        heartbeat_ops.append(heartbeat_op)
+      else:
+        logging.warning('Heartbeat support not available for %s', device)
 
-    return WorkerHeartbeatManager(session, devices, heartbeat_ops,
+    return WorkerHeartbeatManager(session, kept_devices, heartbeat_ops,
                                   request_placeholder)
 
-  def heartbeat_supported(self):
-    """Returns True if heartbeat operations are supported on all workers."""
-    try:
-      # Send ping to verify worker has heartbeat support.
-      self.ping()
-      return True
-    except errors.InvalidArgumentError as _:
-      return False
+  def num_workers(self):
+    return len(self._devices)
 
   def configure(self, message):
     """Configure heartbeat manager for all devices.
 
     Args:
       message: `event_pb2.WorkerHeartbeatRequest`
-
     Returns: `None`
-
     """
     logging.info('Configuring worker heartbeat: %s',
                  text_format.MessageToString(message))
@@ -184,7 +200,6 @@ class WatchdogManager(threading.Thread):
     """Initialize a watchdog manager.
 
     Args:
-
       session: Session connected to worker devices.  A cloned session and graph
         will be created for managing worker pings.
       devices: Set of devices to monitor.  If none, all workers will be
@@ -277,16 +292,14 @@ class GracefulShutdownHook(session_run_hook.SessionRunHook):
           target=training_session.sess_str, graph=self._graph)
       self._workers = WorkerHeartbeatManager.from_devices(
           self._session, all_worker_devices(self._session))
-      self._heartbeat_supported = self._workers.heartbeat_supported()
+      self._heartbeat_supported = self._workers.num_workers() > 0
       if self._heartbeat_supported:
         self._workers.configure(
             event_pb2.WorkerHeartbeatRequest(
                 shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR))
       else:
         logging.warn(
-            'Worker heartbeats not supported by all workers.  No failure '
-            'handling will be enabled.'
-        )
+            'No workers support hearbeats. Failure handling will be disabled.')
 
   def saver(self):
     if self._saver:
@@ -303,8 +316,7 @@ class GracefulShutdownHook(session_run_hook.SessionRunHook):
       logging.error(
           'Multiple savers in the SAVERS collection.  On-demand checkpointing '
           'will be disabled. Pass an explicit `saver` to the constructor to '
-          'override this behavior.'
-      )
+          'override this behavior.')
       return None
 
     return savers[0]
-- 
GitLab


From cc83067469bc30bba55932c587f31ef68f15792f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 14:04:06 -0700
Subject: [PATCH 0813/1357] Migrate a few conv kernels to use new kernel
 signatures.

PiperOrigin-RevId: 214831837
---
 tensorflow/contrib/lite/kernels/conv.cc       | 70 +++++++++----------
 .../kernels/internal/optimized/cblas_conv.h   | 54 ++++++++------
 .../internal/optimized/multithreaded_conv.h   | 60 +++++++++-------
 3 files changed, 100 insertions(+), 84 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc
index 101b4fc961..dbcadbee14 100644
--- a/tensorflow/contrib/lite/kernels/conv.cc
+++ b/tensorflow/contrib/lite/kernels/conv.cc
@@ -86,6 +86,18 @@ struct OpData {
   bool run_multithreaded_kernel;
 };
 
+inline PaddingType RuntimePaddingType(TfLitePadding padding) {
+  switch (padding) {
+    case TfLitePadding::kTfLitePaddingSame:
+      return PaddingType::kSame;
+    case TfLitePadding::kTfLitePaddingValid:
+      return PaddingType::kValid;
+    case TfLitePadding::kTfLitePaddingUnknown:
+    default:
+      return PaddingType::kNone;
+  }
+}
+
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   // This is a builtin op, so we don't use the contents in 'buffer', if any.
   // Instead, we allocate a new object to use as scratch space for im2col, and
@@ -487,18 +499,18 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
   } else {
     effective_kernel_type = kernel_type;
   }
+  ConvParams op_params;
+  op_params.padding_type = RuntimePaddingType(params->padding);
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
   switch (effective_kernel_type) {
     case kReference: {
-      ConvParams op_params;
-      op_params.padding_type = PaddingType::kSame;
-      op_params.padding_values.width = data->padding.width;
-      op_params.padding_values.height = data->padding.height;
-      op_params.stride_width = params->stride_width;
-      op_params.stride_height = params->stride_height;
-      op_params.dilation_width_factor = params->dilation_width_factor;
-      op_params.dilation_height_factor = params->dilation_height_factor;
-      op_params.float_activation_min = output_activation_min;
-      op_params.float_activation_max = output_activation_max;
       reference_ops::Conv(op_params, GetTensorShape(input),
                           GetTensorData<float>(input), GetTensorShape(filter),
                           GetTensorData<float>(filter), GetTensorShape(bias),
@@ -508,16 +520,6 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
       break;
     }
     case kGenericOptimized: {
-      ConvParams op_params;
-      op_params.padding_type = PaddingType::kSame;
-      op_params.padding_values.width = data->padding.width;
-      op_params.padding_values.height = data->padding.height;
-      op_params.stride_width = params->stride_width;
-      op_params.stride_height = params->stride_height;
-      op_params.dilation_width_factor = params->dilation_width_factor;
-      op_params.dilation_height_factor = params->dilation_height_factor;
-      op_params.float_activation_min = output_activation_min;
-      op_params.float_activation_max = output_activation_max;
       optimized_ops::Conv(op_params, GetTensorShape(input),
                           GetTensorData<float>(input), GetTensorShape(filter),
                           GetTensorData<float>(filter), GetTensorShape(bias),
@@ -534,25 +536,21 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
         filter_data = GetTensorData<float>(filter);
       }
       multithreaded_ops::Conv(
-          *eigen_support::GetThreadPoolDevice(context),
-          GetTensorData<float>(input), GetTensorDims(input), filter_data,
-          GetTensorDims(filter), GetTensorData<float>(bias),
-          GetTensorDims(bias), params->stride_width, params->stride_height,
-          data->padding.width, data->padding.height, params->padding,
-          output_activation_min, output_activation_max,
-          GetTensorData<float>(output), GetTensorDims(output),
-          GetTensorData<float>(im2col), GetTensorDims(im2col));
+          *eigen_support::GetThreadPoolDevice(context), op_params,
+          GetTensorShape(input), GetTensorData<float>(input),
+          GetTensorShape(filter), filter_data, GetTensorShape(bias),
+          GetTensorData<float>(bias), GetTensorShape(output),
+          GetTensorData<float>(output), GetTensorShape(im2col),
+          GetTensorData<float>(im2col));
       break;
     }
     case kCblasOptimized: {
-      cblas_ops::Conv(GetTensorData<float>(input), GetTensorDims(input),
-                      GetTensorData<float>(filter), GetTensorDims(filter),
-                      GetTensorData<float>(bias), GetTensorDims(bias),
-                      params->stride_width, params->stride_height,
-                      data->padding.width, data->padding.height,
-                      output_activation_min, output_activation_max,
-                      GetTensorData<float>(output), GetTensorDims(output),
-                      GetTensorData<float>(im2col), GetTensorDims(im2col));
+      cblas_ops::Conv(op_params, GetTensorShape(input),
+                      GetTensorData<float>(input), GetTensorShape(filter),
+                      GetTensorData<float>(filter), GetTensorShape(bias),
+                      GetTensorData<float>(bias), GetTensorShape(output),
+                      GetTensorData<float>(output), GetTensorShape(im2col),
+                      GetTensorData<float>(im2col));
       break;
     }
   }
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h b/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h
index 40d42bbae9..2d96da65c3 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h
@@ -31,20 +31,29 @@ limitations under the License.
 namespace tflite {
 namespace cblas_ops {
 
-inline void Conv(const float* input_data, const Dims<4>& input_dims,
-                 const float* filter_data, const Dims<4>& filter_dims,
-                 const float* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int pad_width,
-                 int pad_height, float output_activation_min,
-                 float output_activation_max, float* output_data,
-                 const Dims<4>& output_dims, float* im2col_data,
-                 const Dims<4>& im2col_dims) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& filter_shape,
+                 const float* filter_data, const RuntimeShape& bias_shape,
+                 const float* bias_data, const RuntimeShape& output_shape,
+                 float* output_data, const RuntimeShape& im2col_shape,
+                 float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   gemmlowp::ScopedProfilingLabel label("Conv/cblas");
 
   const float* gemm_input_data = nullptr;
-  const Dims<4>* gemm_input_dims = nullptr;
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+  const RuntimeShape* gemm_input_shape = nullptr;
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
   const bool need_im2col = stride_width != 1 || stride_height != 1 ||
                            filter_width != 1 || filter_height != 1;
   if (need_im2col) {
@@ -55,18 +64,17 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
     op_params.padding_values.height = pad_height;
     op_params.stride_width = stride_width;
     op_params.stride_height = stride_height;
-    op_params.dilation_width_factor = 1;
-    op_params.dilation_height_factor = 1;
+    op_params.dilation_width_factor = dilation_width_factor;
+    op_params.dilation_height_factor = dilation_height_factor;
     optimized_ops::Im2col(op_params, filter_height, filter_width, 0,
-                          DimsToShape(input_dims), input_data,
-                          DimsToShape(im2col_dims), im2col_data);
+                          input_shape, input_data, im2col_shape, im2col_data);
 
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else {
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    gemm_input_dims = &input_dims;
+    gemm_input_shape = &input_shape;
   }
 
   // The following code computes matrix multiplication c = a * transponse(b)
@@ -78,10 +86,10 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
   const float* a = gemm_input_data;
   const float* b = filter_data;
   float* c = output_data;
-  int m = gemm_input_dims->sizes[1] * gemm_input_dims->sizes[2] *
-          gemm_input_dims->sizes[3];
-  int n = output_dims.sizes[0];
-  int k = gemm_input_dims->sizes[0];
+  const int gemm_input_dims = gemm_input_shape->DimensionsCount();
+  int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1);
+  int n = output_shape.Dims(3);
+  int k = gemm_input_shape->Dims(gemm_input_dims - 1);
   // The stride of matrix a, b and c respectively.
   int stride_a = k;
   int stride_b = k;
@@ -91,8 +99,8 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
               stride_a, b, stride_b, 0.0f, c, stride_c);
 
   optimized_ops::AddBiasAndEvalActivationFunction(
-      output_activation_min, output_activation_max, DimsToShape(bias_dims),
-      bias_data, DimsToShape(output_dims), output_data);
+      output_activation_min, output_activation_max, bias_shape, bias_data,
+      output_shape, output_data);
 }
 
 }  // namespace cblas_ops
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h
index b5d001cc9e..4139cf4eba 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h
@@ -69,13 +69,13 @@ struct MatMulConvFunctor {
 template <class T>
 class EigenTensorConvFunctor {
  private:
-  Eigen::PaddingType TfLitePadding2EigenPadding(TfLitePadding padding) {
+  Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding) {
     switch (padding) {
-      case kTfLitePaddingValid:
+      case PaddingType::kValid:
         return Eigen::PADDING_VALID;
-      case kTfLitePaddingSame:
+      case PaddingType::kSame:
         return Eigen::PADDING_SAME;
-      case kTfLitePaddingUnknown:
+      case PaddingType::kNone:
         assert(false);  // should never get here.
         return Eigen::PADDING_VALID;
     }
@@ -89,7 +89,7 @@ class EigenTensorConvFunctor {
                   int input_width, int input_depth, const T* filter_data,
                   int filter_height, int filter_width, int filter_count,
                   int stride_rows, int stride_cols, int pad_width,
-                  int pad_height, TfLitePadding padding, T* output_data,
+                  int pad_height, PaddingType padding, T* output_data,
                   int output_height, int output_width) {
     const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 &&
                                 stride_rows == 1 && stride_cols == 1);
@@ -127,28 +127,38 @@ class EigenTensorConvFunctor {
                               input_depth, filter_count);
       output.device(device) =
           Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows,
-                                    TfLitePadding2EigenPadding(padding));
+                                    RuntimePadding2EigenPadding(padding));
     }
   }
 };
 
-inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data,
-                 const Dims<4>& input_dims, const float* filter_data,
-                 const Dims<4>& filter_dims, const float* bias_data,
-                 const Dims<4>& bias_dims, int stride_width, int stride_height,
-                 int pad_width, int pad_height, TfLitePadding padding,
-                 float output_activation_min, float output_activation_max,
-                 float* output_data, const Dims<4>& output_dims,
-                 float* im2col_data, const Dims<4>& im2col_dims) {
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0);
-  const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
+inline void Conv(const Eigen::ThreadPoolDevice& device,
+                 const ConvParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& filter_shape,
+                 const float* filter_data, const RuntimeShape& bias_shape,
+                 const float* bias_data, const RuntimeShape& output_shape,
+                 float* output_data, const RuntimeShape& im2col_shape,
+                 float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const PaddingType padding = params.padding_type;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
   EigenTensorConvFunctor<float> conv_functor;
   conv_functor(device, input_data, im2col_data, batches, input_height,
                input_width, input_depth, filter_data, filter_height,
@@ -157,8 +167,8 @@ inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data,
                output_width);
 
   optimized_ops::AddBiasAndEvalActivationFunction(
-      output_activation_min, output_activation_max, DimsToShape(bias_dims),
-      bias_data, DimsToShape(output_dims), output_data);
+      output_activation_min, output_activation_max, bias_shape, bias_data,
+      output_shape, output_data);
 }
 
 }  // namespace multithreaded_ops
-- 
GitLab


From d0397c3314600da0c9cdc300ae87483331d54298 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Thu, 27 Sep 2018 14:25:18 -0700
Subject: [PATCH 0814/1357] Rename TFLite Eager delegate -> Flex delegate

PiperOrigin-RevId: 214835588
---
 .../lite/delegates/{eager => flex}/BUILD      |  0
 .../delegates/{eager => flex}/buffer_map.cc   |  8 ++--
 .../delegates/{eager => flex}/buffer_map.h    | 12 ++---
 .../{eager => flex}/buffer_map_test.cc        |  6 +--
 .../delegates/{eager => flex}/delegate.cc     | 34 +++++++-------
 .../lite/delegates/{eager => flex}/delegate.h | 26 +++++-----
 .../{eager => flex}/delegate_data.cc          |  6 +--
 .../delegates/{eager => flex}/delegate_data.h | 16 +++----
 .../{eager => flex}/delegate_data_test.cc     |  6 +--
 .../{eager => flex}/delegate_test.cc          | 14 +++---
 .../lite/delegates/{eager => flex}/kernel.cc  | 30 ++++++------
 .../lite/delegates/{eager => flex}/kernel.h   | 12 ++---
 .../delegates/{eager => flex}/kernel_test.cc  | 16 +++----
 .../delegates/{eager => flex}/test_util.cc    | 47 +++++++++----------
 .../delegates/{eager => flex}/test_util.h     | 20 ++++----
 .../lite/delegates/{eager => flex}/util.cc    |  6 +--
 .../lite/delegates/{eager => flex}/util.h     | 10 ++--
 .../delegates/{eager => flex}/util_test.cc    |  6 +--
 tensorflow/contrib/lite/kernels/register.cc   |  8 ++--
 tensorflow/contrib/lite/model.cc              |  4 +-
 tensorflow/contrib/lite/python/convert.py     |  6 +--
 tensorflow/contrib/lite/python/lite_test.py   |  2 +-
 tensorflow/contrib/lite/testing/BUILD         |  2 +-
 .../contrib/lite/testing/generate_examples.py |  2 +-
 .../contrib/lite/testing/tflite_diff_flags.h  |  4 +-
 .../contrib/lite/testing/tflite_diff_util.h   |  2 +-
 .../contrib/lite/testing/tflite_driver.cc     |  6 +--
 .../contrib/lite/testing/tflite_driver.h      |  4 +-
 tensorflow/contrib/lite/toco/args.h           |  4 +-
 .../contrib/lite/toco/import_tensorflow.cc    |  4 +-
 .../contrib/lite/toco/import_tensorflow.h     |  2 +-
 tensorflow/contrib/lite/toco/tflite/export.cc | 20 ++++----
 tensorflow/contrib/lite/toco/tflite/export.h  |  4 +-
 .../contrib/lite/toco/tflite/export_test.cc   |  2 +-
 .../contrib/lite/toco/tflite/operator.cc      | 26 +++++-----
 .../contrib/lite/toco/tflite/operator.h       |  6 +--
 .../contrib/lite/toco/toco_cmdline_flags.cc   | 24 +++++-----
 tensorflow/contrib/lite/toco/toco_flags.proto | 16 +++----
 tensorflow/contrib/lite/toco/toco_tooling.cc  |  8 ++--
 tensorflow/contrib/lite/tools/benchmark/BUILD |  8 ++--
 .../tools/benchmark/benchmark_tflite_model.cc |  6 +--
 .../tools/benchmark/benchmark_tflite_model.h  |  4 +-
 tensorflow/contrib/lite/util.cc               |  6 +--
 tensorflow/contrib/lite/util.h                |  8 ++--
 tensorflow/contrib/lite/util_test.cc          | 16 +++----
 45 files changed, 239 insertions(+), 240 deletions(-)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/BUILD (100%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map.cc (95%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map.h (86%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map_test.cc (98%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate.cc (76%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate.h (64%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data.cc (94%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data.h (78%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data_test.cc (93%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_test.cc (95%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel.cc (91%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel.h (79%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel_test.cc (94%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/test_util.cc (76%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/test_util.h (90%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/util.cc (96%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/util.h (89%)
 rename tensorflow/contrib/lite/delegates/{eager => flex}/util_test.cc (97%)

diff --git a/tensorflow/contrib/lite/delegates/eager/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
similarity index 100%
rename from tensorflow/contrib/lite/delegates/eager/BUILD
rename to tensorflow/contrib/lite/delegates/flex/BUILD
diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc
similarity index 95%
rename from tensorflow/contrib/lite/delegates/eager/buffer_map.cc
rename to tensorflow/contrib/lite/delegates/flex/buffer_map.cc
index e5a19c3997..63e39196d9 100644
--- a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc
+++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 
 #include "tensorflow/c/c_api_internal.h"
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/log_memory.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 // A tensor buffer that is allocated, deallocated and populated by TF Lite.
 class TfLiteTensorBuffer : public tensorflow::TensorBuffer {
@@ -107,5 +107,5 @@ void BufferMap::SetFromTensorFlow(int tensor_index, tensorflow::Tensor tensor) {
   id_to_tensor_[tensor_index] = std::move(tensor);
 }
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.h b/tensorflow/contrib/lite/delegates/flex/buffer_map.h
similarity index 86%
rename from tensorflow/contrib/lite/delegates/eager/buffer_map.h
rename to tensorflow/contrib/lite/delegates/flex/buffer_map.h
index aaaa045840..4ce886568a 100644
--- a/tensorflow/contrib/lite/delegates/eager/buffer_map.h
+++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
 
 #include <map>
 
@@ -21,12 +21,12 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 // Maps a TF Lite tensor index into a TensorFlow tensor.
 //
 // The TF Lite interpreter assigns integer indices to each of its tensors, but
-// the Eager delegate deals in terms of TensorFlow tensors. This class maps
+// the Flex delegate deals in terms of TensorFlow tensors. This class maps
 // from indices to tensors and allows the creation of new tensors to be
 // associated with a given index.
 class BufferMap {
@@ -55,7 +55,7 @@ class BufferMap {
   std::map<int, tensorflow::Tensor> id_to_tensor_;
 };
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
similarity index 98%
rename from tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc
rename to tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
index a046943e56..bb80e25e80 100644
--- a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using ::testing::ElementsAre;
@@ -164,7 +164,7 @@ TEST(BufferMapTest, TensorFlowOverwritesTfLite) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc
similarity index 76%
rename from tensorflow/contrib/lite/delegates/eager/delegate.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate.cc
index 45fc158157..ba065a8ff5 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc
@@ -12,19 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 
 #include <vector>
 
 #include "tensorflow/contrib/lite/context_util.h"
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
-#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 #include "tensorflow/contrib/lite/util.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace delegate {
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
@@ -32,7 +32,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
   TfLiteIntArray* plan;
   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
 
-  // Add all custom ops starting with "Eager" to list of supported nodes.
+  // Add all custom ops starting with "Flex" to list of supported nodes.
   std::vector<int> supported_nodes;
   for (int node_index : TfLiteIntArrayView(plan)) {
     TfLiteNode* node;
@@ -40,7 +40,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
         context, node_index, &node, &registration));
 
-    if (IsEagerOp(registration->custom_name)) {
+    if (IsFlexOp(registration->custom_name)) {
       supported_nodes.push_back(node_index);
     }
   }
@@ -81,28 +81,28 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
 }
 
 }  // namespace delegate
-}  // namespace eager
+}  // namespace flex
 
-std::unique_ptr<EagerDelegate> EagerDelegate::Create() {
-  std::unique_ptr<eager::DelegateData> delegate_data;
-  if (!eager::DelegateData::Create(&delegate_data).ok()) {
+std::unique_ptr<FlexDelegate> FlexDelegate::Create() {
+  std::unique_ptr<flex::DelegateData> delegate_data;
+  if (!flex::DelegateData::Create(&delegate_data).ok()) {
     fprintf(stderr, "Unable to initialize TensorFlow context.\n");
     return nullptr;
   }
 
-  return std::unique_ptr<EagerDelegate>(
-      new EagerDelegate(std::move(delegate_data)));
+  return std::unique_ptr<FlexDelegate>(
+      new FlexDelegate(std::move(delegate_data)));
 }
 
-EagerDelegate::EagerDelegate(std::unique_ptr<eager::DelegateData> delegate_data)
+FlexDelegate::FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data)
     : TfLiteDelegate{
           /*data_=*/delegate_data.get(),
-          /*nullptr,*/ &eager::delegate::Prepare,
-          /*CopyFromBufferHandle=*/&eager::delegate::CopyFromBufferHandle,
+          /*nullptr,*/ &flex::delegate::Prepare,
+          /*CopyFromBufferHandle=*/&flex::delegate::CopyFromBufferHandle,
           /*CopyToBufferHandle=*/nullptr,
           /*FreeBufferHandle=*/nullptr},
       delegate_data_(std::move(delegate_data)) {}
 
-EagerDelegate::~EagerDelegate() {}
+FlexDelegate::~FlexDelegate() {}
 
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.h b/tensorflow/contrib/lite/delegates/flex/delegate.h
similarity index 64%
rename from tensorflow/contrib/lite/delegates/eager/delegate.h
rename to tensorflow/contrib/lite/delegates/flex/delegate.h
index 70f3c15af4..1017780dc7 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate.h
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.h
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
 
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 
 namespace tflite {
 
@@ -24,12 +24,12 @@ namespace tflite {
 // Delegate that can be used to extract parts of a graph that are designed to be
 // executed by TensorFlow's runtime via Eager.
 //
-// The interpreter must be constructed after the EagerDelegate and destructed
-// before the EagerDelegate. This delegate may be used with multiple
+// The interpreter must be constructed after the FlexDelegate and destructed
+// before the FlexDelegate. This delegate may be used with multiple
 // interpreters, but it is *not* thread-safe.
 //
 // Usage:
-//   auto delegate = EagerDelegate::Create();
+//   auto delegate = FlexDelegate::Create();
 //   ... build interpreter ...
 //
 //   if (delegate) {
@@ -39,21 +39,21 @@ namespace tflite {
 //   ... run inference ...
 //   ... destroy interpreter ...
 //   ... destroy delegate ...
-class EagerDelegate : public TfLiteDelegate {
+class FlexDelegate : public TfLiteDelegate {
  public:
   // Creates a delegate that supports TF ops.
   //
-  // If the underyling TF Eager context creation fails, returns null.
-  static std::unique_ptr<EagerDelegate> Create();
+  // If the underyling TF Flex context creation fails, returns null.
+  static std::unique_ptr<FlexDelegate> Create();
 
-  ~EagerDelegate();
+  ~FlexDelegate();
 
  private:
-  explicit EagerDelegate(std::unique_ptr<eager::DelegateData> delegate_data);
+  explicit FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data);
 
-  std::unique_ptr<eager::DelegateData> delegate_data_;
+  std::unique_ptr<flex::DelegateData> delegate_data_;
 };
 
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc
similarity index 94%
rename from tensorflow/contrib/lite/delegates/eager/delegate_data.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate_data.cc
index 0fd5c976f8..8f985f770c 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 tensorflow::Status DelegateData::Create(std::unique_ptr<DelegateData>* data) {
   std::vector<tensorflow::Device*> devices;
 
@@ -43,5 +43,5 @@ DelegateData::DelegateData(tensorflow::EagerContext* eager_context)
 
 DelegateData::~DelegateData() {}
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.h b/tensorflow/contrib/lite/delegates/flex/delegate_data.h
similarity index 78%
rename from tensorflow/contrib/lite/delegates/eager/delegate_data.h
rename to tensorflow/contrib/lite/delegates/flex/delegate_data.h
index 772d26f44e..8d75f0b0ef 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_data.h
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.h
@@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
 
-#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h"
+#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
-// Data kept by the Eager delegate for the lifetime of an Interpreter.
+// Data kept by the Flex delegate for the lifetime of an Interpreter.
 class DelegateData {
  public:
   // Create a new DelegateData, initialized with a newly-created EagerContext.
@@ -29,7 +29,7 @@ class DelegateData {
 
   ~DelegateData();
 
-  // The EagerContext that is required for execution of Eager Ops.
+  // The EagerContext that is required for execution of Flex Ops.
   tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); }
 
   // Map from TF Lite tensor index to TensorFlow tensor for a given context.
@@ -46,7 +46,7 @@ class DelegateData {
   std::unordered_map<const TfLiteContext*, BufferMap> buffer_map_;
 };
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
similarity index 93%
rename from tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
index def063309f..30b10f435a 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 TEST(DelegateDataTest, Basic) {
@@ -39,7 +39,7 @@ TEST(DelegateDataTest, Basic) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc
similarity index 95%
rename from tensorflow/contrib/lite/delegates/eager/delegate_test.cc
rename to tensorflow/contrib/lite/delegates/flex/delegate_test.cc
index 43ec5d53b8..1813952cef 100644
--- a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
+#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using ::testing::ContainsRegex;
 using ::testing::ElementsAre;
 
-class DelegateTest : public testing::EagerModelTest {
+class DelegateTest : public testing::FlexModelTest {
  public:
   DelegateTest() {
-    delegate_ = EagerDelegate::Create();
+    delegate_ = FlexDelegate::Create();
     interpreter_.reset(new Interpreter(&error_reporter_));
   }
 
@@ -46,7 +46,7 @@ class DelegateTest : public testing::EagerModelTest {
   }
 
  private:
-  std::unique_ptr<EagerDelegate> delegate_;
+  std::unique_ptr<FlexDelegate> delegate_;
 };
 
 TEST_F(DelegateTest, FullGraph) {
@@ -236,7 +236,7 @@ TEST_F(DelegateTest, MultipleInterpretersSameDelegate) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.cc b/tensorflow/contrib/lite/delegates/flex/kernel.cc
similarity index 91%
rename from tensorflow/contrib/lite/delegates/eager/kernel.cc
rename to tensorflow/contrib/lite/delegates/flex/kernel.cc
index 48a2f56baf..e4f1aea990 100644
--- a/tensorflow/contrib/lite/delegates/eager/kernel.cc
+++ b/tensorflow/contrib/lite/delegates/flex/kernel.cc
@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
+#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
 
 #include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/builtin_ops.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/context_util.h"
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
 #include "tensorflow/contrib/lite/string.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
@@ -28,10 +28,10 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 
-// Note: this is part of TF Lite's Eager delegation code which is to be
+// Note: this is part of TF Lite's Flex delegation code which is to be
 // completed soon.
 
-// This is the TF Lite op that is created by the eager delegate to handle
+// This is the TF Lite op that is created by the flex delegate to handle
 // execution of a supported subgraph. The usual flow is that the delegate
 // informs the interpreter of supported nodes in a graph, and each supported
 // subgraph is replaced with one instance of this kernel.
@@ -46,7 +46,7 @@ limitations under the License.
 // corresponding TensorFlow/Eager Op.
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace kernel {
 
 // Controls the lifetime of tensor handles in a vector.
@@ -72,11 +72,11 @@ class VectorOfHandles {
 
 // Executes the TensorFlow op given by 'op_name', with the attributes specified
 // in 'nodedef'. Inputs and outputs are given as indices into the 'buffer_map'.
-tensorflow::Status ExecuteEagerOp(tensorflow::EagerContext* eager_context,
-                                  BufferMap* buffer_map, const string& op_name,
-                                  const tensorflow::NodeDef& nodedef,
-                                  const std::vector<int>& inputs,
-                                  const std::vector<int>& outputs) {
+tensorflow::Status ExecuteFlexOp(tensorflow::EagerContext* eager_context,
+                                 BufferMap* buffer_map, const string& op_name,
+                                 const tensorflow::NodeDef& nodedef,
+                                 const std::vector<int>& inputs,
+                                 const std::vector<int>& outputs) {
   const tensorflow::AttrTypeMap* attr_types;
   TF_RETURN_WITH_CONTEXT_IF_ERROR(
       tensorflow::AttrTypeMapForOp(op_name.c_str(), &attr_types),
@@ -258,13 +258,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Execute the TensorFlow Ops sequentially.
   for (const auto& node_data : op_data->nodes) {
     if (node_data.nodedef.op().empty()) {
-      context->ReportError(context, "Invalid NodeDef in Eager op '%s'",
+      context->ReportError(context, "Invalid NodeDef in Flex op '%s'",
                            node_data.name.c_str());
       return kTfLiteError;
     }
     auto status =
-        ExecuteEagerOp(eager_context, buffer_map, node_data.name,
-                       node_data.nodedef, node_data.inputs, node_data.outputs);
+        ExecuteFlexOp(eager_context, buffer_map, node_data.name,
+                      node_data.nodedef, node_data.inputs, node_data.outputs);
     TF_LITE_ENSURE_OK(context, ConvertStatus(context, status));
   }
 
@@ -295,5 +295,5 @@ TfLiteRegistration GetKernel() {
   return registration;
 }
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.h b/tensorflow/contrib/lite/delegates/flex/kernel.h
similarity index 79%
rename from tensorflow/contrib/lite/delegates/eager/kernel.h
rename to tensorflow/contrib/lite/delegates/flex/kernel.h
index 2478abccaa..ac9313a37b 100644
--- a/tensorflow/contrib/lite/delegates/eager/kernel.h
+++ b/tensorflow/contrib/lite/delegates/flex/kernel.h
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
 
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 // Return the registration object used to initialize and execute ops that will
 // be delegated to TensorFlow's Eager runtime. This TF Lite op is created by
-// the eager delegate to handle execution of a supported subgraph. The usual
+// the flex delegate to handle execution of a supported subgraph. The usual
 // flow is that the delegate informs the interpreter of supported nodes in a
 // graph, and each supported subgraph is replaced with one instance of this
 // kernel.
 TfLiteRegistration GetKernel();
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
similarity index 94%
rename from tensorflow/contrib/lite/delegates/eager/kernel_test.cc
rename to tensorflow/contrib/lite/delegates/flex/kernel_test.cc
index 66f2226626..94a6f8b61a 100644
--- a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/kernel.h"
+#include "tensorflow/contrib/lite/delegates/flex/kernel.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h"
-#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
+#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using ::testing::ContainsRegex;
@@ -31,12 +31,12 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate,
   TfLiteIntArray* size_and_nodes =
       ConvertVectorToTfLiteIntArray(supported_nodes);
   TF_LITE_ENSURE_STATUS(context->ReplaceSubgraphsWithDelegateKernels(
-      context, eager::GetKernel(), size_and_nodes, delegate));
+      context, flex::GetKernel(), size_and_nodes, delegate));
   TfLiteIntArrayFree(size_and_nodes);
   return kTfLiteOk;
 }
 
-class KernelTest : public testing::EagerModelTest {
+class KernelTest : public testing::FlexModelTest {
  public:
   KernelTest() {
     CHECK(DelegateData::Create(&delegate_data_).ok());
@@ -167,7 +167,7 @@ TEST_F(KernelTest, WrongSetOfNodes) {
 
   ASSERT_FALSE(Invoke());
   ASSERT_THAT(error_reporter().error_messages(),
-              ContainsRegex("Invalid NodeDef in Eager op"));
+              ContainsRegex("Invalid NodeDef in Flex op"));
 }
 
 TEST_F(KernelTest, MixedGraph) {
@@ -220,7 +220,7 @@ TEST_F(KernelTest, SplitGraph) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.cc b/tensorflow/contrib/lite/delegates/flex/test_util.cc
similarity index 76%
rename from tensorflow/contrib/lite/delegates/eager/test_util.cc
rename to tensorflow/contrib/lite/delegates/flex/test_util.cc
index d47be761fb..69c336a01a 100644
--- a/tensorflow/contrib/lite/delegates/eager/test_util.cc
+++ b/tensorflow/contrib/lite/delegates/flex/test_util.cc
@@ -13,25 +13,24 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/lite/delegates/eager/test_util.h"
+#include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
 #include "absl/memory/memory.h"
 #include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/contrib/lite/string.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace testing {
 
-bool EagerModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; }
+bool FlexModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; }
 
-void EagerModelTest::SetShape(int tensor_index,
-                              const std::vector<int>& values) {
+void FlexModelTest::SetShape(int tensor_index, const std::vector<int>& values) {
   ASSERT_EQ(interpreter_->ResizeInputTensor(tensor_index, values), kTfLiteOk);
   ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
 }
 
-std::vector<int> EagerModelTest::GetShape(int tensor_index) {
+std::vector<int> FlexModelTest::GetShape(int tensor_index) {
   std::vector<int> result;
   auto* dims = interpreter_->tensor(tensor_index)->dims;
   result.reserve(dims->size);
@@ -41,13 +40,13 @@ std::vector<int> EagerModelTest::GetShape(int tensor_index) {
   return result;
 }
 
-TfLiteType EagerModelTest::GetType(int tensor_index) {
+TfLiteType FlexModelTest::GetType(int tensor_index) {
   return interpreter_->tensor(tensor_index)->type;
 }
 
-void EagerModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
-                                const std::vector<int>& outputs,
-                                TfLiteType type, const std::vector<int>& dims) {
+void FlexModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
+                               const std::vector<int>& outputs, TfLiteType type,
+                               const std::vector<int>& dims) {
   interpreter_->AddTensors(num_tensors);
   for (int i = 0; i < num_tensors; ++i) {
     TfLiteQuantizationParams quant;
@@ -66,8 +65,8 @@ void EagerModelTest::AddTensors(int num_tensors, const std::vector<int>& inputs,
   CHECK_EQ(interpreter_->SetOutputs(outputs), kTfLiteOk);
 }
 
-void EagerModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
-                                    const std::vector<int>& outputs) {
+void FlexModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
+                                   const std::vector<int>& outputs) {
   static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
   reg.builtin_code = BuiltinOperator_MUL;
   reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
@@ -90,8 +89,8 @@ void EagerModelTest::AddTfLiteMulOp(const std::vector<int>& inputs,
            kTfLiteOk);
 }
 
-void EagerModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
-                             const std::vector<int>& outputs) {
+void FlexModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
+                            const std::vector<int>& outputs) {
   auto attr = [](const string& key, const string& value) {
     return " attr{ key: '" + key + "' value {" + value + "}}";
   };
@@ -107,28 +106,28 @@ void EagerModelTest::AddTfOp(TfOpType op, const std::vector<int>& inputs,
   if (op == kUnpack) {
     string attributes =
         type_attribute + attr("num", "i: 2") + attr("axis", "i: 0");
-    AddTfOp("EagerUnpack", "Unpack", attributes, inputs, outputs);
+    AddTfOp("FlexUnpack", "Unpack", attributes, inputs, outputs);
   } else if (op == kIdentity) {
     string attributes = type_attribute;
-    AddTfOp("EagerIdentity", "Identity", attributes, inputs, outputs);
+    AddTfOp("FlexIdentity", "Identity", attributes, inputs, outputs);
   } else if (op == kAdd) {
     string attributes = type_attribute;
-    AddTfOp("EagerAdd", "Add", attributes, inputs, outputs);
+    AddTfOp("FlexAdd", "Add", attributes, inputs, outputs);
   } else if (op == kMul) {
     string attributes = type_attribute;
-    AddTfOp("EagerMul", "Mul", attributes, inputs, outputs);
+    AddTfOp("FlexMul", "Mul", attributes, inputs, outputs);
   } else if (op == kNonExistent) {
     AddTfOp("NonExistentOp", "NonExistentOp", "", inputs, outputs);
   } else if (op == kIncompatibleNodeDef) {
     // "Cast" op is created without attributes - making it incompatible.
-    AddTfOp("EagerCast", "Cast", "", inputs, outputs);
+    AddTfOp("FlexCast", "Cast", "", inputs, outputs);
   }
 }
 
-void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
-                             const string& nodedef_str,
-                             const std::vector<int>& inputs,
-                             const std::vector<int>& outputs) {
+void FlexModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
+                            const string& nodedef_str,
+                            const std::vector<int>& inputs,
+                            const std::vector<int>& outputs) {
   static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
   reg.builtin_code = BuiltinOperator_CUSTOM;
   reg.custom_name = tflite_name;
@@ -154,5 +153,5 @@ void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name,
 }
 
 }  // namespace testing
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.h b/tensorflow/contrib/lite/delegates/flex/test_util.h
similarity index 90%
rename from tensorflow/contrib/lite/delegates/eager/test_util.h
rename to tensorflow/contrib/lite/delegates/flex/test_util.h
index 816db41931..a8c81b90a3 100644
--- a/tensorflow/contrib/lite/delegates/eager/test_util.h
+++ b/tensorflow/contrib/lite/delegates/flex/test_util.h
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
 
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace testing {
 
 enum TfOpType {
@@ -35,12 +35,12 @@ enum TfOpType {
 };
 
 // This class creates models with TF and TFLite ops. In order to use this class
-// to test the Eager delegate, implement a function that calls
+// to test the Flex delegate, implement a function that calls
 // interpreter->ModifyGraphWithDelegate.
-class EagerModelTest : public ::testing::Test {
+class FlexModelTest : public ::testing::Test {
  public:
-  EagerModelTest() {}
-  ~EagerModelTest() {}
+  FlexModelTest() {}
+  ~FlexModelTest() {}
 
   bool Invoke();
 
@@ -104,7 +104,7 @@ class EagerModelTest : public ::testing::Test {
 
  private:
   // Helper method to add a TensorFlow op. tflite_names needs to start with
-  // "Eager" in order to work with the Eager delegate.
+  // "Flex" in order to work with the Flex delegate.
   void AddTfOp(const char* tflite_name, const string& tf_name,
                const string& nodedef_str, const std::vector<int>& inputs,
                const std::vector<int>& outputs);
@@ -113,7 +113,7 @@ class EagerModelTest : public ::testing::Test {
 };
 
 }  // namespace testing
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/util.cc b/tensorflow/contrib/lite/delegates/flex/util.cc
similarity index 96%
rename from tensorflow/contrib/lite/delegates/eager/util.cc
rename to tensorflow/contrib/lite/delegates/flex/util.cc
index 051246bf86..829bc388bf 100644
--- a/tensorflow/contrib/lite/delegates/eager/util.cc
+++ b/tensorflow/contrib/lite/delegates/flex/util.cc
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 TfLiteStatus ConvertStatus(TfLiteContext* context,
                            const tensorflow::Status& status) {
@@ -100,5 +100,5 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) {
   }
 }
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/delegates/eager/util.h b/tensorflow/contrib/lite/delegates/flex/util.h
similarity index 89%
rename from tensorflow/contrib/lite/delegates/eager/util.h
rename to tensorflow/contrib/lite/delegates/flex/util.h
index 930cb99cb9..7f910e7316 100644
--- a/tensorflow/contrib/lite/delegates/eager/util.h
+++ b/tensorflow/contrib/lite/delegates/flex/util.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
-#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
+#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
 
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/contrib/lite/c/c_api_internal.h"
@@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 
 // Converts a tensorflow:Status into a TfLiteStatus. If the original status
 // represented an error, reports it using the given 'context'.
@@ -41,7 +41,7 @@ TF_DataType GetTensorFlowDataType(TfLiteType type);
 // Returns the TfLiteType that corresponds to the given TF C API Data type.
 TfLiteType GetTensorFlowLiteType(TF_DataType);
 
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
-#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_
+#endif  // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_
diff --git a/tensorflow/contrib/lite/delegates/eager/util_test.cc b/tensorflow/contrib/lite/delegates/flex/util_test.cc
similarity index 97%
rename from tensorflow/contrib/lite/delegates/eager/util_test.cc
rename to tensorflow/contrib/lite/delegates/flex/util_test.cc
index aebc91149c..5f049e7b0a 100644
--- a/tensorflow/contrib/lite/delegates/eager/util_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/util_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/lite/delegates/eager/util.h"
+#include "tensorflow/contrib/lite/delegates/flex/util.h"
 
 #include <cstdarg>
 
@@ -22,7 +22,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
-namespace eager {
+namespace flex {
 namespace {
 
 using tensorflow::DT_FLOAT;
@@ -132,7 +132,7 @@ TEST(UtilTest, TypeConversionsFromTensorFlow) {
 }
 
 }  // namespace
-}  // namespace eager
+}  // namespace flex
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index 2f4b663a28..9402105fa7 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -125,7 +125,7 @@ TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) {
   context->ReportError(
       context,
       "Regular TensorFlow ops are not supported by this interpreter. Make sure "
-      "you invoke the Eager delegate before inference.");
+      "you invoke the Flex delegate before inference.");
   return kTfLiteError;
 }
 
@@ -136,13 +136,13 @@ const TfLiteRegistration* BuiltinOpResolver::FindOp(tflite::BuiltinOperator op,
 
 const TfLiteRegistration* BuiltinOpResolver::FindOp(const char* op,
                                                     int version) const {
-  // Return the NULL Op for all ops whose name start with "Eager", allowing
+  // Return the NULL Op for all ops whose name start with "Flex", allowing
   // the interpreter to delegate their execution.
-  if (IsEagerOp(op)) {
+  if (IsFlexOp(op)) {
     static TfLiteRegistration null_op{
         nullptr, nullptr, &UnsupportedTensorFlowOp,
         nullptr, nullptr, BuiltinOperator_CUSTOM,
-        "Eager", 1};
+        "Flex",  1};
     return &null_op;
   }
   return MutableOpResolver::FindOp(op, version);
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index ea2817beec..eff6181a61 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -28,7 +28,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/nnapi_delegate.h"
 #endif
 #if defined(TFLITE_EXTENDED)
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #endif
 #include "tensorflow/contrib/lite/version.h"
 
@@ -451,7 +451,7 @@ TfLiteStatus InterpreterBuilder::operator()(
   (**interpreter).SetVariables(std::move(variables));
 
 #if defined(TFLITE_EXTENDED)
-  if (auto delegate = EagerDelegate::Create()) {
+  if (auto delegate = FlexDelegate::Create()) {
     (**interpreter)
         .ModifyGraphWithDelegate(std::move(delegate),
                                  /*allow_dynamic_tensors=*/true);
diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 627be8f44f..73a420c47b 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -241,10 +241,10 @@ def build_toco_convert_protos(input_tensors,
     toco.dump_graphviz_dir = dump_graphviz_dir
   toco.dump_graphviz_include_video = dump_graphviz_video
   if converter_mode == ConverterMode.TOCO_EXTENDED:
-    toco.allow_eager_ops = True
+    toco.allow_flex_ops = True
   elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL:
-    toco.allow_eager_ops = True
-    toco.force_eager_ops = True
+    toco.allow_flex_ops = True
+    toco.force_flex_ops = True
 
   model = _model_flags_pb2.ModelFlags()
   model.change_concat_input_ranges = change_concat_input_ranges
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index 33f8fc1e8c..7b0df01d1d 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -432,7 +432,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
       interpreter.allocate_tensors()
     self.assertIn(
         'Regular TensorFlow ops are not supported by this interpreter. Make '
-        'sure you invoke the Eager delegate before inference.',
+        'sure you invoke the Flex delegate before inference.',
         str(error.exception))
 
   def testFloatTocoConverter(self):
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 55ef1172b2..f0bfec2338 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -164,7 +164,7 @@ cc_library(
         ":test_runner",
         "//tensorflow/contrib/lite:builtin_op_data",
         "//tensorflow/contrib/lite:framework",
-        "//tensorflow/contrib/lite/delegates/eager:delegate",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
         "//tensorflow/contrib/lite/kernels:builtin_ops",
     ],
 )
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 014c80b5ef..53bd88d087 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -343,7 +343,7 @@ def toco_convert(graph_def_str, input_tensors, output_tensors,
       opts = ("--input_arrays={0} --output_arrays={1}".format(
           ",".join(input_arrays), ",".join(output_tensors)))
     elif FLAGS.run_with_extended:
-      opts += " --allow_eager_ops --force_eager_ops"
+      opts += " --allow_flex_ops --force_flex_ops"
     cmd = ("%s --input_file=%s --output_file=%s %s > %s 2>&1" %
            (bin_path, graphdef_file.name, output_file.name, opts,
             stdout_file.name))
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_flags.h b/tensorflow/contrib/lite/testing/tflite_diff_flags.h
index 3874bc31d7..ad889a2f19 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_flags.h
+++ b/tensorflow/contrib/lite/testing/tflite_diff_flags.h
@@ -57,7 +57,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
                        "[optional] Number of full runs in each pass."),
       tensorflow::Flag("delegate", &values.delegate,
                        "[optional] Delegate to use for executing ops. Must be "
-                       "`{\"\", EAGER}`"),
+                       "`{\"\", FLEX}`"),
   };
 
   bool no_inputs = *argc == 1;
@@ -70,7 +70,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
              values.input_layer_shape.empty() || values.output_layer.empty()) {
     fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return {};
-  } else if (!(values.delegate == "" || values.delegate == "EAGER")) {
+  } else if (!(values.delegate == "" || values.delegate == "FLEX")) {
     fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return {};
   }
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_util.h b/tensorflow/contrib/lite/testing/tflite_diff_util.h
index f67992139f..28b14bd143 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_util.h
+++ b/tensorflow/contrib/lite/testing/tflite_diff_util.h
@@ -45,7 +45,7 @@ struct DiffOptions {
   // second pass does multiple inferences back to back.
   int num_runs_per_pass;
   // Path to the delegate library to be loaded in order to execute ops. Must be
-  // `{"", EAGER}`.
+  // `{"", FLEX}`.
   string delegate;
 };
 
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc
index 17aa8cb293..ef49e6f8bc 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.cc
+++ b/tensorflow/contrib/lite/testing/tflite_driver.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <iostream>
 
 #include "tensorflow/contrib/lite/builtin_op_data.h"
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #include "tensorflow/contrib/lite/testing/split.h"
 
 namespace tflite {
@@ -138,8 +138,8 @@ class TfLiteDriver::Expectation {
 
 TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name)
     : use_nnapi_(use_nnapi) {
-  if (delegate_name == "EAGER") {
-    delegate_ = EagerDelegate::Create();
+  if (delegate_name == "FLEX") {
+    delegate_ = FlexDelegate::Create();
   }
 }
 
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.h b/tensorflow/contrib/lite/testing/tflite_driver.h
index aed35f877d..dc2a4e5877 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.h
+++ b/tensorflow/contrib/lite/testing/tflite_driver.h
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <map>
 
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
@@ -53,7 +53,7 @@ class TfLiteDriver : public TestRunner {
 
   class Expectation;
 
-  std::unique_ptr<EagerDelegate> delegate_;
+  std::unique_ptr<FlexDelegate> delegate_;
   bool use_nnapi_ = false;
   std::unique_ptr<FlatBufferModel> model_;
   std::unique_ptr<Interpreter> interpreter_;
diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h
index f14dbc258b..2699ac76e1 100644
--- a/tensorflow/contrib/lite/toco/args.h
+++ b/tensorflow/contrib/lite/toco/args.h
@@ -248,9 +248,9 @@ struct ParsedTocoFlags {
   Arg<int64> dedupe_array_min_size_bytes = Arg<int64>(64);
   Arg<bool> split_tflite_lstm_inputs = Arg<bool>(true);
   // WARNING: Experimental interface, subject to change
-  Arg<bool> allow_eager_ops = Arg<bool>(false);
+  Arg<bool> allow_flex_ops = Arg<bool>(false);
   // WARNING: Experimental interface, subject to change
-  Arg<bool> force_eager_ops = Arg<bool>(false);
+  Arg<bool> force_flex_ops = Arg<bool>(false);
 };
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index e02d000e7e..5eaf6e27fc 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -2123,9 +2123,9 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
   Model* model = new Model;
   internal::ConverterMapType converter_map;
 
-  // This is used for the TFLite "Full Eager Mode" conversion. All the ops are
+  // This is used for the TFLite "Full Flex Mode" conversion. All the ops are
   // imported as `TensorFlowUnsupportedOperator`, and later all these ops are
-  // converted to TFLite Eager ops.
+  // converted to TFLite Flex ops.
   if (!tf_import_flags.import_all_ops_as_unsupported) {
     converter_map = internal::GetTensorFlowNodeConverterMap();
   }
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.h b/tensorflow/contrib/lite/toco/import_tensorflow.h
index 7db23f2d44..c5ff96956a 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.h
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.h
@@ -30,7 +30,7 @@ struct TensorFlowImportFlags {
 
   // Do not recognize any op and import all ops as
   // `TensorFlowUnsupportedOperator`. This is used to populated with the
-  // `force_eager_ops` flag.
+  // `force_flex_ops` flag.
   bool import_all_ops_as_unsupported = false;
 };
 
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 9f60942f47..0c9fac249c 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -50,16 +50,16 @@ namespace {
 details::OperatorKey GetOperatorKey(
     const ::toco::Operator& op,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   string custom_code;
   if (op.type == OperatorType::kUnsupported) {
     const TensorFlowUnsupportedOperator& unsupported_op =
         static_cast<const TensorFlowUnsupportedOperator&>(op);
 
-    // TODO(b/113715895): When `allow_eager_ops` is on, for now there's no way
+    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
     // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_eager_ops) {
-      custom_code = string(::tflite::kEagerCustomCodePrefix) +
+    if (allow_flex_ops) {
+      custom_code = string(::tflite::kFlexCustomCodePrefix) +
                     unsupported_op.tensorflow_op;
     } else {
       custom_code = unsupported_op.tensorflow_op;
@@ -101,11 +101,11 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) {
 void LoadOperatorsMap(
     const Model& model, OperatorsMap* operators_map,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   // First find a list of unique operator types.
   std::set<OperatorKey> keys;
   for (const auto& op : model.operators) {
-    keys.insert(GetOperatorKey(*op, ops_by_type, allow_eager_ops));
+    keys.insert(GetOperatorKey(*op, ops_by_type, allow_flex_ops));
   }
   // Now assign indices to them and fill in the map.
   int index = 0;
@@ -216,7 +216,7 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
 
   for (const auto& op : model.operators) {
     const details::OperatorKey operator_key =
-        GetOperatorKey(*op, ops_by_type, params.allow_eager_ops);
+        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
     int op_index = operators_map.at(operator_key);
     int op_version = operator_key.version;
 
@@ -281,7 +281,7 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
     }
 
     int op_index = operators_map.at(
-        GetOperatorKey(*op, ops_by_type, params.allow_eager_ops));
+        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
 
     auto tflite_op_it = ops_by_type.find(op->type);
     BaseOperator* tflite_op = tflite_op_it == ops_by_type.end()
@@ -334,7 +334,7 @@ Offset<Vector<Offset<Buffer>>> ExportBuffers(
 
 void Export(const Model& model, string* output_file_contents,
             const ExportParams& params) {
-  const auto ops_by_type = BuildOperatorByTypeMap(params.allow_eager_ops);
+  const auto ops_by_type = BuildOperatorByTypeMap(params.allow_flex_ops);
   Export(model, output_file_contents, params, ops_by_type);
 }
 
@@ -349,7 +349,7 @@ void Export(
 
   details::OperatorsMap operators_map;
   details::LoadOperatorsMap(model, &operators_map, ops_by_type,
-                            params.allow_eager_ops);
+                            params.allow_flex_ops);
 
   std::vector<const Array*> buffers_to_write;
   Array empty_array;
diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h
index b070a38768..29d6de4049 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.h
+++ b/tensorflow/contrib/lite/toco/tflite/export.h
@@ -26,7 +26,7 @@ namespace tflite {
 // The parameters for exporting a TFLite model.
 struct ExportParams {
   bool allow_custom_ops = false;
-  bool allow_eager_ops = false;
+  bool allow_flex_ops = false;
   bool quantize_weights = false;
 };
 
@@ -121,7 +121,7 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map);
 void LoadOperatorsMap(
     const Model& model, OperatorsMap* operators_map,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_eager_ops);
+    bool allow_flex_ops);
 
 }  // namespace details
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index 8d4d197c46..93882a91a7 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -105,7 +105,7 @@ TEST_F(ExportTest, LoadOperatorsMap) {
 
   details::OperatorsMap operators;
   const auto ops_by_type = BuildOperatorByTypeMap();
-  // TODO(ycling): Add a test for allow_eager_ops.
+  // TODO(ycling): Add a test for allow_flex_ops.
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
   EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "", 1)]);
   EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "", 1)]);
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index ca2a6a19b3..9addbb81e7 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -1160,8 +1160,8 @@ class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
 class TensorFlowUnsupported : public BaseOperator {
  public:
   TensorFlowUnsupported(const string& name, OperatorType type,
-                        bool allow_eager_ops)
-      : BaseOperator(name, type), allow_eager_ops_(allow_eager_ops) {}
+                        bool allow_flex_ops)
+      : BaseOperator(name, type), allow_flex_ops_(allow_flex_ops) {}
 
   Options Serialize(const Operator& op,
                     flatbuffers::FlatBufferBuilder* builder) const override {
@@ -1177,9 +1177,9 @@ class TensorFlowUnsupported : public BaseOperator {
   std::unique_ptr<Operator> Deserialize(
       const BuiltinOptions* builtin_options,
       const CustomOptions* custom_options) const override {
-    // Deserializing Eager ops doesn't work now.
+    // Deserializing Flex ops doesn't work now.
     // TODO(ycling): Revisit and decide if we should fix the flow for importing
-    // TFLite models with Eager ops.
+    // TFLite models with Flex ops.
     auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
     if (custom_options) {
       auto flexbuffer_map =
@@ -1200,13 +1200,13 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
-    if (allow_eager_ops_) {
+    if (allow_flex_ops_) {
       fbb->Vector([&]() {
         fbb->String(node_def.op());
         fbb->String(op.tensorflow_node_def);
       });
       fbb->Finish();
-      LOG(INFO) << "Writing eager op: " << node_def.op();
+      LOG(INFO) << "Writing flex op: " << node_def.op();
       return std::unique_ptr<flexbuffers::Builder>(fbb.release());
     }
 
@@ -1316,13 +1316,13 @@ class TensorFlowUnsupported : public BaseOperator {
   }
 
  private:
-  const bool allow_eager_ops_;
+  const bool allow_flex_ops_;
 };
 
 namespace {
 // Build a vector containing all the known operators.
 std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
-    bool allow_eager_ops = false) {
+    bool allow_flex_ops = false) {
   std::vector<std::unique_ptr<BaseOperator>> ops;
   using tensorflow::MakeUnique;
   // Builtin Operators.
@@ -1434,7 +1434,7 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
   ops.push_back(MakeUnique<CTCBeamSearchDecoder>(
       "CTC_BEAM_SEARCH_DECODER", OperatorType::kCTCBeamSearchDecoder));
   ops.push_back(MakeUnique<TensorFlowUnsupported>(
-      "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_eager_ops));
+      "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_flex_ops));
 
   // There operators are supported by Toco, but not by TF Lite, and has no
   // attributes.
@@ -1512,11 +1512,11 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
 }  // namespace
 
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   std::map<OperatorType, std::unique_ptr<BaseOperator>> result;
 
   std::vector<std::unique_ptr<BaseOperator>> ops =
-      BuildOperatorList(allow_eager_ops);
+      BuildOperatorList(allow_flex_ops);
   for (auto& op : ops) {
     result[op->type()] = std::move(op);
   }
@@ -1525,11 +1525,11 @@ std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
 }
 
 std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
-    bool allow_eager_ops) {
+    bool allow_flex_ops) {
   std::map<string, std::unique_ptr<BaseOperator>> result;
 
   std::vector<std::unique_ptr<BaseOperator>> ops =
-      BuildOperatorList(allow_eager_ops);
+      BuildOperatorList(allow_flex_ops);
   for (auto& op : ops) {
     result[op->name()] = std::move(op);
   }
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 702fb28ea6..13d9f6c49a 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -26,15 +26,15 @@ namespace tflite {
 class BaseOperator;
 
 // Return a map contained all know TF Lite Operators, keyed by their names.
-// TODO(ycling): The pattern to propagate parameters (e.g. allow_eager_ops)
+// TODO(ycling): The pattern to propagate parameters (e.g. allow_flex_ops)
 // is ugly here. Consider refactoring.
 std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
-    bool allow_eager_ops = false);
+    bool allow_flex_ops = false);
 
 // Return a map contained all know TF Lite Operators, keyed by the type of
 // their tf.mini counterparts.
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
-    bool allow_eager_ops = false);
+    bool allow_flex_ops = false);
 
 // These are the flatbuffer types for custom and builtin options.
 using CustomOptions = flatbuffers::Vector<uint8_t>;
diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
index b6aebc0470..cff79776bc 100644
--- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
@@ -167,11 +167,11 @@ bool ParseTocoFlagsFromCommandLineFlags(
            "converted float model. Model size will be reduced and there will "
            "be latency improvements (at the cost of accuracy)."),
       // WARNING: Experimental interface, subject to change
-      Flag("allow_eager_ops", parsed_flags.allow_eager_ops.bind(),
-           parsed_flags.allow_eager_ops.default_value(), ""),
+      Flag("allow_flex_ops", parsed_flags.allow_flex_ops.bind(),
+           parsed_flags.allow_flex_ops.default_value(), ""),
       // WARNING: Experimental interface, subject to change
-      Flag("force_eager_ops", parsed_flags.force_eager_ops.bind(),
-           parsed_flags.force_eager_ops.default_value(), "")};
+      Flag("force_flex_ops", parsed_flags.force_flex_ops.bind(),
+           parsed_flags.force_flex_ops.default_value(), "")};
   bool asked_for_help =
       *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help"));
   if (asked_for_help) {
@@ -266,15 +266,15 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
   READ_TOCO_FLAG(split_tflite_lstm_inputs, FlagRequirement::kNone);
   READ_TOCO_FLAG(quantize_weights, FlagRequirement::kNone);
   READ_TOCO_FLAG(post_training_quantize, FlagRequirement::kNone);
-  READ_TOCO_FLAG(allow_eager_ops, FlagRequirement::kNone);
-  READ_TOCO_FLAG(force_eager_ops, FlagRequirement::kNone);
+  READ_TOCO_FLAG(allow_flex_ops, FlagRequirement::kNone);
+  READ_TOCO_FLAG(force_flex_ops, FlagRequirement::kNone);
 
-  if (parsed_toco_flags.force_eager_ops.value() &&
-      !parsed_toco_flags.allow_eager_ops.value()) {
-    // TODO(ycling): Consider to enforce `allow_eager_ops` when
-    // `force_eager_ops` is true.
-    LOG(WARNING) << "--force_eager_ops should always be used with "
-                    "--allow_eager_ops.";
+  if (parsed_toco_flags.force_flex_ops.value() &&
+      !parsed_toco_flags.allow_flex_ops.value()) {
+    // TODO(ycling): Consider to enforce `allow_flex_ops` when
+    // `force_flex_ops` is true.
+    LOG(WARNING) << "--force_flex_ops should always be used with "
+                    "--allow_flex_ops.";
   }
 
   // Deprecated flag handling.
diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto
index 53d60fed05..ca3e64485e 100644
--- a/tensorflow/contrib/lite/toco/toco_flags.proto
+++ b/tensorflow/contrib/lite/toco/toco_flags.proto
@@ -190,16 +190,16 @@ message TocoFlags {
   // (at the cost of accuracy).
   optional bool post_training_quantize = 26 [default = false];
 
-  // When enabled, unsupported ops will be converted to TFLite Eager ops.
+  // When enabled, unsupported ops will be converted to TFLite Flex ops.
   // TODO(ycling): Consider to rename the following 2 flags and don't call it
-  // "Eager".
-  // `allow_eager_ops` should always be used with `allow_custom_ops`.
+  // "Flex".
+  // `allow_flex_ops` should always be used with `allow_custom_ops`.
   // WARNING: Experimental interface, subject to change
-  optional bool allow_eager_ops = 27 [default = false];
+  optional bool allow_flex_ops = 27 [default = false];
 
-  // When enabled, all TensorFlow ops will be converted to TFLite Eager
-  // ops directly. This will force `allow_eager_ops` to true.
-  // `force_eager_ops` should always be used with `allow_eager_ops`.
+  // When enabled, all TensorFlow ops will be converted to TFLite Flex
+  // ops directly. This will force `allow_flex_ops` to true.
+  // `force_flex_ops` should always be used with `allow_flex_ops`.
   // WARNING: Experimental interface, subject to change
-  optional bool force_eager_ops = 28 [default = false];
+  optional bool force_flex_ops = 28 [default = false];
 }
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index a08b02485f..106494f354 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -198,7 +198,7 @@ std::unique_ptr<Model> Import(const TocoFlags& toco_flags,
               : (toco_flags.output_format() != TENSORFLOW_GRAPHDEF);
 
       tf_import_flags.import_all_ops_as_unsupported =
-          toco_flags.force_eager_ops();
+          toco_flags.force_flex_ops();
 
       model = ImportTensorFlowGraphDef(model_flags, tf_import_flags,
                                        input_file_contents);
@@ -409,9 +409,9 @@ void Export(const TocoFlags& toco_flags, const Model& model,
     case TFLITE: {
       toco::tflite::ExportParams params;
 
-      // Always allow custom ops when eager ops are allowed.
-      if (toco_flags.force_eager_ops() || toco_flags.allow_eager_ops()) {
-        params.allow_eager_ops = true;
+      // Always allow custom ops when flex ops are allowed.
+      if (toco_flags.force_flex_ops() || toco_flags.allow_flex_ops()) {
+        params.allow_flex_ops = true;
         params.allow_custom_ops = true;
       } else if (allow_custom_ops) {
         params.allow_custom_ops = true;
diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index dc97d22401..bc18d40313 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -36,7 +36,7 @@ cc_binary(
 )
 
 cc_binary(
-    name = "benchmark_model_plus_eager",
+    name = "benchmark_model_plus_flex",
     srcs = [
         "benchmark_main.cc",
     ],
@@ -49,7 +49,7 @@ cc_binary(
         "//conditions:default": [],
     }),
     deps = [
-        ":benchmark_tflite_model_plus_eager_lib",
+        ":benchmark_tflite_model_plus_flex_lib",
         ":logging",
     ],
 )
@@ -111,7 +111,7 @@ cc_library(
 )
 
 cc_library(
-    name = "benchmark_tflite_model_plus_eager_lib",
+    name = "benchmark_tflite_model_plus_flex_lib",
     srcs = [
         "benchmark_tflite_model.cc",
         "logging.h",
@@ -123,7 +123,7 @@ cc_library(
         ":logging",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite:string_util",
-        "//tensorflow/contrib/lite/delegates/eager:delegate",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
         "//tensorflow/contrib/lite/kernels:builtin_ops",
         "//tensorflow/contrib/lite/profiling:profile_summarizer",
     ],
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
index ef4f0fa80d..d989ee720d 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -24,7 +24,7 @@ limitations under the License.
 #include <vector>
 
 #ifdef TFLITE_EXTENDED
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #endif  // TFLITE_EXTENDED
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
@@ -306,8 +306,8 @@ void BenchmarkTfLiteModel::Init() {
   interpreter->UseNNAPI(use_nnapi);
 
 #ifdef TFLITE_EXTENDED
-  TFLITE_LOG(INFO) << "Instantiating Eager Delegate";
-  delegate_ = EagerDelegate::Create();
+  TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
+  delegate_ = FlexDelegate::Create();
   if (delegate_) {
     interpreter->ModifyGraphWithDelegate(delegate_.get(),
                                          /*allow_dynamic_tensors=*/true);
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
index 8541512bc8..9343824b4a 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <vector>
 
 #ifdef TFLITE_EXTENDED
-#include "tensorflow/contrib/lite/delegates/eager/delegate.h"
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #endif  // TFLITE_EXTENDED
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
@@ -74,7 +74,7 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
 
  private:
 #ifdef TFLITE_EXTENDED
-  std::unique_ptr<EagerDelegate> delegate_;
+  std::unique_ptr<FlexDelegate> delegate_;
 #endif  // TFLITE_EXTENDED
   std::unique_ptr<tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> interpreter;
diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc
index 7950653da9..6aa35b5227 100644
--- a/tensorflow/contrib/lite/util.cc
+++ b/tensorflow/contrib/lite/util.cc
@@ -18,9 +18,9 @@ limitations under the License.
 
 namespace tflite {
 
-bool IsEagerOp(const char* custom_name) {
-  return custom_name && strncmp(custom_name, kEagerCustomCodePrefix,
-                                strlen(kEagerCustomCodePrefix)) == 0;
+bool IsFlexOp(const char* custom_name) {
+  return custom_name && strncmp(custom_name, kFlexCustomCodePrefix,
+                                strlen(kFlexCustomCodePrefix)) == 0;
 }
 
 TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector<int>& input) {
diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h
index 6d81f844f8..31292a6f81 100644
--- a/tensorflow/contrib/lite/util.h
+++ b/tensorflow/contrib/lite/util.h
@@ -26,15 +26,15 @@ limitations under the License.
 
 namespace tflite {
 
-// The prefix of Eager op custom code.
+// The prefix of Flex op custom code.
 // This will be matched agains the `custom_code` field in `OperatorCode`
 // Flatbuffer Table.
 // WARNING: This is an experimental API and subject to change.
-constexpr char kEagerCustomCodePrefix[] = "Eager";
+constexpr char kFlexCustomCodePrefix[] = "Flex";
 
 // Checks whether the prefix of the custom name indicates the operation is an
-// Eager operation.
-bool IsEagerOp(const char* custom_name);
+// Flex operation.
+bool IsFlexOp(const char* custom_name);
 
 // Converts a `std::vector` to a `TfLiteIntArray`. The caller takes ownership
 // of the returned pointer.
diff --git a/tensorflow/contrib/lite/util_test.cc b/tensorflow/contrib/lite/util_test.cc
index c5c1709f1d..25f3aded71 100644
--- a/tensorflow/contrib/lite/util_test.cc
+++ b/tensorflow/contrib/lite/util_test.cc
@@ -41,14 +41,14 @@ TEST(ConvertVectorToTfLiteIntArray, TestWithEmptyVector) {
   TfLiteIntArrayFree(output);
 }
 
-TEST(UtilTest, IsEagerOp) {
-  EXPECT_TRUE(IsEagerOp("Eager"));
-  EXPECT_TRUE(IsEagerOp("EagerOp"));
-  EXPECT_FALSE(IsEagerOp("eager"));
-  EXPECT_FALSE(IsEagerOp("Eage"));
-  EXPECT_FALSE(IsEagerOp("OpEager"));
-  EXPECT_FALSE(IsEagerOp(nullptr));
-  EXPECT_FALSE(IsEagerOp(""));
+TEST(UtilTest, IsFlexOp) {
+  EXPECT_TRUE(IsFlexOp("Flex"));
+  EXPECT_TRUE(IsFlexOp("FlexOp"));
+  EXPECT_FALSE(IsFlexOp("flex"));
+  EXPECT_FALSE(IsFlexOp("Fle"));
+  EXPECT_FALSE(IsFlexOp("OpFlex"));
+  EXPECT_FALSE(IsFlexOp(nullptr));
+  EXPECT_FALSE(IsFlexOp(""));
 }
 
 }  // namespace
-- 
GitLab


From 8276ef6088ecedd4a5f62a8eacd35a075a43746c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 15:07:12 -0700
Subject: [PATCH 0815/1357] Updates Interpreter to be initialized with a
 MappedByteBuffer for backward compatibility.

PiperOrigin-RevId: 214843130
---
 .../java/org/tensorflow/lite/Interpreter.java     | 15 +++++++++++++++
 .../java/org/tensorflow/lite/InterpreterTest.java |  4 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index eacfa0c827..5cc6e754f3 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -17,6 +17,7 @@ package org.tensorflow.lite;
 
 import java.io.File;
 import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
 import java.util.HashMap;
 import java.util.Map;
 import org.checkerframework.checker.nullness.qual.NonNull;
@@ -148,6 +149,20 @@ public final class Interpreter implements AutoCloseable {
     this(byteBuffer, new Options().setNumThreads(numThreads));
   }
 
+  /**
+   * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file.
+   *
+   * <p>The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code
+   * Interpreter}.
+   *
+   * @deprecated Prefer using the {@link #Interpreter(ByteBuffer,Options)} constructor. This method
+   *     will be removed in a future release.
+   */
+  @Deprecated
+  public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer) {
+    this(mappedByteBuffer, /* options= */ null);
+  }
+
   /**
    * Initializes a {@code Interpreter} with a {@code ByteBuffer} of a model file and a set of custom
    * {@link #Options}.
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index fdd5063156..a98fca0132 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -71,7 +71,7 @@ public final class InterpreterTest {
     Path path = MODEL_FILE.toPath();
     FileChannel fileChannel =
         (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ));
-    MappedByteBuffer mappedByteBuffer =
+    ByteBuffer mappedByteBuffer =
         fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size());
     Interpreter interpreter = new Interpreter(mappedByteBuffer);
     float[] oneD = {1.23f, 6.54f, 7.81f};
@@ -118,7 +118,7 @@ public final class InterpreterTest {
     byteBuffer.order(ByteOrder.nativeOrder());
     fileChannel.read(byteBuffer);
     try {
-      Interpreter interpreter = new Interpreter(byteBuffer);
+      new Interpreter(byteBuffer);
       fail();
     } catch (IllegalArgumentException e) {
       assertThat(e)
-- 
GitLab


From 17320a0543de32715159a732be065a55a3d990db Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Thu, 27 Sep 2018 15:28:50 -0700
Subject: [PATCH 0816/1357] Fix heartbeat probing.

PiperOrigin-RevId: 214846488
---
 .../contrib/tpu/python/tpu/session_support.py | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py
index 24b9bd136b..05264f5a46 100644
--- a/tensorflow/contrib/tpu/python/tpu/session_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/session_support.py
@@ -44,21 +44,25 @@ class CoordinatorShutdownException(Exception):
 def _make_heartbeat_op(session, device, request_ph):
   """Return a heartbeat op or None if heartbeats are not supported by device."""
   try:
-    with ops.device(device):
-      heartbeat_op = tpu_ops.worker_heartbeat(request_ph)
-      request = event_pb2.WorkerHeartbeatRequest()
-      options = config_pb2.RunOptions(timeout_in_ms=5000)
-      session.run(
-          heartbeat_op,
-          feed_dict={request_ph: request.SerializeToString()},
-          options=options)
-      return heartbeat_op
+    # Test if we can connect in a isolated graph + session
+    with ops.Graph().as_default():
+      with session_lib.Session(target=session.sess_str) as temp_session:
+        with ops.device(device):
+          heartbeat_op = tpu_ops.worker_heartbeat('')
+          options = config_pb2.RunOptions(timeout_in_ms=5000)
+          temp_session.run(heartbeat_op, options=options)
   except errors.InvalidArgumentError as _:
+    logging.warning('Error running heartbeat on %s', device)
     return None
   except errors.DeadlineExceededError as _:
     logging.warning('Timeout connecting to %s when testing heartbeat', device)
     return None
 
+  # If we successfully connected and pinged the worker, go ahead and construct
+  # the operation.
+  with ops.device(device):
+    return tpu_ops.worker_heartbeat(request_ph)
+
 
 class WorkerHeartbeatManager(object):
   """Manages the status/heartbeat monitor for a set of workers."""
@@ -171,7 +175,7 @@ class WorkerHeartbeatManager(object):
 def all_worker_devices(session):
   """Return a list of devices for each worker in the system."""
   devices = session.list_devices()
-  return [device.name for device in devices if 'CPU' in device.name]
+  return [device.name for device in devices if ':CPU:' in device.name]
 
 
 class WatchdogManager(threading.Thread):
-- 
GitLab


From a3291ab1f2cb9ea2c4e4b3b9b26ad1a1866dfc50 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Thu, 27 Sep 2018 15:32:00 -0700
Subject: [PATCH 0817/1357] Update function registration with both inference
 function and forward/backward function pair.

PiperOrigin-RevId: 214847027
---
 tensorflow/python/eager/function.py      | 21 ++++++++++++--
 tensorflow/python/eager/function_test.py | 37 +++++++++++++++---------
 2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index b28befeb62..dd3e1a3723 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1328,8 +1328,25 @@ def register(func, *args, **kwargs):
                      "Got type: %s" % type(func))
   concrete_func = func.get_concrete_function(*args, **kwargs)
   graph = ops.get_default_graph()
-  concrete_func._inference_function.add_to_graph(graph)   # pylint: disable=protected-access
-  # TODO(scottzhu): support concrete_func._backward_graph_function in future.
+
+  # There are two situations for the actual call of a defun:
+  # 1. If none of the input args are resource variables or watch by any tape,
+  #   it will run the _inference_function of concrete_func for forward pass, and
+  #   the gradient will be generated by standard mechanism.
+  # 2. Otherwise, defun will create two functions, one for forward pass, and the
+  #   backward pass will be created via tape.
+  # When registering the function, we put both cases into graph.
+  # pylint: disable=protected-access
+  concrete_func._inference_function.add_to_graph(graph)
+
+  if concrete_func._backward_graph_function is None:
+    concrete_func._construct_backprop_function()
+  forward_function = concrete_func._forward_function
+  backward_function = concrete_func._backward_graph_function._inference_function
+  forward_function.add_to_graph(graph)
+  backward_function.add_to_graph(graph)
+  # pylint: enable=protected-access
+
   return concrete_func
 
 
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 59faf967c5..34a2648e26 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1669,12 +1669,23 @@ class FunctionTest(test.TestCase):
 
         graph = ops.get_default_graph()
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 2)
+        self.assertEqual(len(graph._functions), 6)
+        # two sets of functions, each of them are (inference, forward, backward)
         functions = list(graph._functions.values())
-        pre_register_matmul_func_name = functions[0].definition.signature.name
-        self.assertRegexpMatches(pre_register_matmul_func_name, '.*matmul.*')
-        pre_register_add_func_name = functions[1].definition.signature.name
-        self.assertRegexpMatches(pre_register_add_func_name, '.*add.*')
+        captured_function_names = [
+            f.definition.signature.name for f in functions
+        ]
+        expected_func_name_regex = [
+            '.*inference.*matmul.*',
+            '.*forward.*matmul.*',
+            '.*inference.*backward.*matmul.*',
+            '.*inference.*add.*',
+            '.*forward.*add.*',
+            '.*inference.*backward.*add.*',
+        ]
+        for i in range(len(functions)):
+          self.assertRegexpMatches(captured_function_names[i],
+                                   expected_func_name_regex[i])
 
         sq = defun_matmul(t, t)
         double = add(t, t)
@@ -1682,12 +1693,11 @@ class FunctionTest(test.TestCase):
         self.assertAllEqual(double.eval().reshape(-1), [2, 4, 6, 8])
         # Make sure the pre registered function is used, and no other function
         # is added.
-        self.assertEqual(len(graph._functions), 2)
+        self.assertEqual(len(graph._functions), 6)
         functions = list(graph._functions.values())
-        called_func_name = functions[0].definition.signature.name
-        self.assertEqual(pre_register_matmul_func_name, called_func_name)
-        called_func_name = functions[1].definition.signature.name
-        self.assertEqual(pre_register_add_func_name, called_func_name)
+        for i in range(len(functions)):
+          self.assertEquals(captured_function_names[i],
+                            functions[i].definition.signature.name)
 
   def testRegisterFunctionWithInputSignature(self):
     def matmul(x, y):
@@ -1705,7 +1715,7 @@ class FunctionTest(test.TestCase):
 
         graph = ops.get_default_graph()
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 1)
+        self.assertEqual(len(graph._functions), 3)
 
         # Test input param shape mismatch
         t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
@@ -1728,7 +1738,7 @@ class FunctionTest(test.TestCase):
         graph = ops.get_default_graph()
         # Only one function is registered since the input param are in same type
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 1)
+        self.assertEqual(len(graph._functions), 3)
 
   def testCallingFunctionWithDifferentVariables(self):
 
@@ -1767,7 +1777,8 @@ class FunctionTest(test.TestCase):
                                  'be Tensors;.*'):
       graph_function('Not a Tensor.')
 
-  def testSwapImplementationWithGrapplerPlugin(self):
+  # TODO(scottzhu): Revive the test once the grappler plugin is updated.
+  def disabled_testSwapImplementationWithGrapplerPlugin(self):
     rewrites = rewriter_config_pb2.RewriterConfig()
     # function_optimizer has to be turn off, otherwise it will delete the
     # registered function if it does not get called.
-- 
GitLab


From bdab0b3c111bbe1c9656fa2228f1a4d28df5a7bf Mon Sep 17 00:00:00 2001
From: Mingsheng Hong <hongm@google.com>
Date: Thu, 27 Sep 2018 15:32:38 -0700
Subject: [PATCH 0818/1357] Added an experimental API for user to set an
 internal error status.

See
https://github.com/apple/swift/pull/19588/files#diff-923cd5ac82727b31d446c23641b3d749
for an example usage.

Also removed an experimental API that's no longer needed.

PiperOrigin-RevId: 214847132
---
 tensorflow/c/c_api_experimental.cc | 34 +++---------------------------
 tensorflow/c/c_api_experimental.h  |  6 ++----
 2 files changed, 5 insertions(+), 35 deletions(-)

diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index f316e4ba67..d4b78138e9 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -8738,35 +8738,7 @@ void TFE_TensorHandlePrintDebugString(TFE_TensorHandle* handle) {
   TF_DeleteStatus(status);
 }
 
-TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx) {
-  // Intentionally LOG into INFO below for ease of debugging.
-  VLOG(1) << "TFE_RunConstOp called";
-
-  auto* status = TF_NewStatus();
-  auto* op = TFE_NewOp(ctx, "Const", status);
-  CheckOk(status);
-  TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
-
-  auto* tensor =
-      TF_AllocateTensor(TF_FLOAT, /*shape.data()*/ nullptr, /*shape.size()*/ 0,
-                        TF_DataTypeSize(TF_FLOAT) * 1);
-  auto* ptr = reinterpret_cast<char*>(TF_TensorData(tensor));
-  *reinterpret_cast<float*>(ptr) = 17.0;
-
-  TFE_OpSetAttrTensor(op, "value", tensor, status);
-  CheckOk(status);
-  TF_DeleteTensor(tensor);
-  VLOG(1) << "New op created";
-
-  TFE_TensorHandle* retval;
-  int num_retvals = 1;
-  TFE_Execute(op, &retval, &num_retvals, status);
-  CheckOk(status);
-  CHECK_EQ(num_retvals, 1);
-  VLOG(1) << "Op executed";
-
-  TFE_DeleteOp(op);
-  TF_DeleteStatus(status);
-
-  return retval;
+TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status,
+                                                      const char* errMsg) {
+  status->status = tensorflow::errors::Internal(errMsg);
 }
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index 950ad9aeed..d98d532e32 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -180,10 +180,8 @@ TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueVariantTensor(
 TF_CAPI_EXPORT extern void TFE_TensorHandlePrintDebugString(
     TFE_TensorHandle* handle);
 
-// Returns a const scalar tensor.
-// Caller owns both the input and the output tensor handles.
-// TODO: Remove this API with hard-coded tensor computation.
-TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx);
+TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status,
+                                                      const char* errMsg);
 
 #ifdef __cplusplus
 } /* end extern "C" */
-- 
GitLab


From 8f85a9de475f0acf0abef4fabc12943e2e487bf7 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Thu, 27 Sep 2018 15:37:49 -0700
Subject: [PATCH 0819/1357] Do not specify dilation rate to depthwise conv2d.

PiperOrigin-RevId: 214848057
---
 tensorflow/contrib/quantize/python/fold_batch_norms.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index e5790a6e13..7575b1b6cd 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -418,10 +418,11 @@ def _CloneWithNewOperands(layer_op, input_tensor, weight_tensor,
         transpose_b=layer_op.get_attr('transpose_b'),
         name=new_layer_name)
   elif layer_op.type == 'DepthwiseConv2dNative':
+    # We don't copy dilation rate because we reuse the input SpaceToBatch
+    # and create our own BatchToSpace operation below.
     conv = nn.depthwise_conv2d(
         input_tensor,
         weight_tensor,
-        rate=layer_op.get_attr('dilations'),
         strides=layer_op.get_attr('strides'),
         padding=layer_op.get_attr('padding'),
         name=new_layer_name)
-- 
GitLab


From bfec3d54fed955a4b145220e64c48b94fbb04ae7 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Thu, 27 Sep 2018 15:38:48 -0700
Subject: [PATCH 0820/1357] [XLA] Use a result cache to speed up
 InstructionFusion::CanFuseOnAllPaths()

PiperOrigin-RevId: 214848216
---
 .../xla/service/instruction_fusion.cc         | 29 ++++++++++++++-----
 .../compiler/xla/service/instruction_fusion.h | 11 +++++--
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 3fdc2cee9a..e884122fcb 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -188,13 +188,20 @@ bool InstructionFusion::EffectivelyAtMostUnary(HloInstruction* hlo) {
 
 bool InstructionFusion::CanFuseOnAllPaths(
     HloInstruction* producer, HloInstruction* consumer,
-    const HloInstructionSet& do_not_duplicate) {
+    const HloInstructionSet& do_not_fuse,
+    tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>, bool>*
+        result_cache) {
   if (consumer == producer) {
     return true;
   }
   if (!consumer->IsFusible()) {
     return false;
   }
+  auto cache_it = result_cache->find(std::make_pair(producer, consumer));
+  if (cache_it != result_cache->end()) {
+    return cache_it->second;
+  }
+  bool result = true;
   for (int64 i = 0, e = consumer->operand_count(); i < e; ++i) {
     auto* consumer_operand = consumer->mutable_operand(i);
     // If the operand is not on a path to the producer, it doesn't matter
@@ -202,20 +209,23 @@ bool InstructionFusion::CanFuseOnAllPaths(
     if (!reachability_->IsReachable(producer, consumer_operand)) {
       continue;
     }
-    if (do_not_duplicate.count(consumer_operand) > 0 ||
-        !ShouldFuse(consumer, i)) {
-      return false;
+    if (do_not_fuse.count(consumer_operand) > 0 || !ShouldFuse(consumer, i)) {
+      result = false;
+      break;
     }
     // The producer is reachable from consumer_operand which means we need
     // to be able to fuse consumer_operand into consumer in order for
     // producer to be fusible into consumer on all paths.
     // Perform the recursive step: make sure producer can be fused into
     // consumer_operand on all paths.
-    if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_duplicate)) {
-      return false;
+    if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_fuse,
+                           result_cache)) {
+      result = false;
+      break;
     }
   }
-  return true;
+  result_cache->emplace(std::make_pair(producer, consumer), result);
+  return result;
 }
 
 InstructionFusion::HloInstructionSet
@@ -231,6 +241,8 @@ InstructionFusion::ComputeGloballyUnfusible(
   // fusing operations that require duplication later depending on
   // is_expensive_().
   HloInstructionSet do_not_duplicate;
+  tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>, bool>
+      can_fuse_on_all_paths_result_cache;
   for (HloInstruction* consumer : post_order) {
     for (HloInstruction* producer : consumer->operands()) {
       if (do_not_duplicate.count(producer) > 0) {
@@ -286,7 +298,8 @@ InstructionFusion::ComputeGloballyUnfusible(
       // A will be not allowed to be fused into B, as it cannot be fused via
       // all paths.
       if (producer->IsFusible() &&
-          CanFuseOnAllPaths(producer, consumer, do_not_duplicate)) {
+          CanFuseOnAllPaths(producer, consumer, do_not_duplicate,
+                            &can_fuse_on_all_paths_result_cache)) {
         continue;
       }
       do_not_duplicate.insert(producer);
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index 7e1196fb7f..c1ec3b18a1 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -151,8 +151,15 @@ class InstructionFusion : public HloModulePass {
 
   // Whether or not we can fuse producer into consumer on all paths
   // from the producer to the consumer where nodes are HLOs and edges are uses.
-  bool CanFuseOnAllPaths(HloInstruction* producer, HloInstruction* consumer,
-                         const HloInstructionSet& do_not_fuse);
+  //
+  // A map from <producer, consumer> to a bool is required as the result cache
+  // to store and query the results of calls to this function, in order to avoid
+  // repeated computations.
+  bool CanFuseOnAllPaths(
+      HloInstruction* producer, HloInstruction* consumer,
+      const HloInstructionSet& do_not_fuse,
+      tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>,
+                               bool>* result_cache);
 
   // Computes the set of nodes that we do not want to fuse into any of their
   // consumers based on a global analysis of the HLO graph.
-- 
GitLab


From b56164c72b8f123bfc675f930111af8801fe034f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 15:49:53 -0700
Subject: [PATCH 0821/1357] Automated rollback of commit
 425e96f3ae4eb338268e3738260f9d79e4bdd893. Revert #20539.

PiperOrigin-RevId: 214849875
---
 tensorflow/contrib/layers/python/layers/embedding_ops.py | 8 +++-----
 tensorflow/python/feature_column/feature_column_v2.py    | 8 +++-----
 tensorflow/python/ops/embedding_ops.py                   | 8 +++-----
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py
index 17ee8c0733..60e1d85ea9 100644
--- a/tensorflow/contrib/layers/python/layers/embedding_ops.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py
@@ -112,11 +112,9 @@ def safe_embedding_lookup_sparse(embedding_weights,
   dtype = sparse_weights.dtype if sparse_weights is not None else None
   if isinstance(embedding_weights, variables.PartitionedVariable):
     embedding_weights = list(embedding_weights)
-  if not isinstance(embedding_weights[0],
-                    resource_variable_ops.ResourceVariable):
-    embedding_weights = [
-        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
-    ]
+  embedding_weights = [
+      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
+  ]
 
   contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                               [sparse_weights])
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index b62c16ea5a..289f6d0d14 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -3447,11 +3447,9 @@ def _safe_embedding_lookup_sparse(embedding_weights,
     raise ValueError('Missing embedding_weights %s.' % embedding_weights)
 
   dtype = sparse_weights.dtype if sparse_weights is not None else None
-  if not isinstance(embedding_weights[0],
-                    resource_variable_ops.ResourceVariable):
-    embedding_weights = [
-        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
-    ]
+  embedding_weights = [
+      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
+  ]
 
   with ops.name_scope(name, 'embedding_lookup',
                       embedding_weights + [sparse_ids,
diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py
index 60d73a1693..6263041b8d 100644
--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@@ -550,11 +550,9 @@ def safe_embedding_lookup_sparse(embedding_weights,
     raise ValueError('Missing embedding_weights %s.' % embedding_weights)
 
   dtype = sparse_weights.dtype if sparse_weights is not None else None
-  if not isinstance(embedding_weights[0],
-                    resource_variable_ops.ResourceVariable):
-    embedding_weights = [
-        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
-    ]
+  embedding_weights = [
+      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
+  ]
 
   with ops.name_scope(name, 'embedding_lookup',
                       embedding_weights + [sparse_ids,
-- 
GitLab


From b8c86c3bbd8271ed968087f24e7fb704103bc733 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 15:50:41 -0700
Subject: [PATCH 0822/1357] Support saving/restoring of string tensors with
 lengths greater than 2^32.

PiperOrigin-RevId: 214849978
---
 tensorflow/core/util/tensor_bundle/BUILD      |   1 +
 .../core/util/tensor_bundle/tensor_bundle.cc  |  52 +++++++++-----
 .../util/tensor_bundle/tensor_bundle_test.cc  |  64 +++++++++++++++++-
 .../testdata/old_string_tensors/README        |   3 +
 .../foo.data-00000-of-00001                   | Bin 0 -> 1080 bytes
 .../testdata/old_string_tensors/foo.index     | Bin 0 -> 211 bytes
 6 files changed, 100 insertions(+), 20 deletions(-)
 create mode 100644 tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README
 create mode 100644 tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.data-00000-of-00001
 create mode 100644 tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.index

diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD
index 648358606c..4d4db86df2 100644
--- a/tensorflow/core/util/tensor_bundle/BUILD
+++ b/tensorflow/core/util/tensor_bundle/BUILD
@@ -64,6 +64,7 @@ cc_library(
 tf_cc_test(
     name = "tensor_bundle_test",
     srcs = ["tensor_bundle_test.cc"],
+    data = glob(["testdata/**"]),
     deps = [
         ":tensor_bundle",
         "//tensorflow/core:framework",
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
index ea8a259d1a..2dcb57a1f9 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
@@ -64,27 +64,36 @@ namespace {
 // Reads "num_elements" string elements from file[offset, offset+size) into the
 // length-N "destination".  Discards the original content of "destination".
 //
-// Checksums the string lengths (as restored uint32, not varint32 bytes) and
-// string bytes, and stores it into "actual_crc32c".
+// Checksums the string lengths (as restored uint32 or uint64, not varint64
+// bytes) and string bytes, and stores it into "actual_crc32c".
 Status ReadStringTensor(io::InputBuffer* buffered_file, size_t num_elements,
                         size_t offset, size_t size, string* destination,
                         uint32* actual_crc32c) {
   if (size == 0) return Status::OK();
   CHECK_GT(size, 0);
 
-  // Reads "num_elements" varint32's from "buffered_file".
+  // Reads "num_elements" varint64's from "buffered_file".
   TF_RETURN_IF_ERROR(buffered_file->Seek(offset));
-  std::vector<uint32> string_lengths(num_elements);
+  std::vector<uint64> string_lengths(num_elements);
   for (size_t i = 0; i < num_elements; ++i) {
-    TF_RETURN_IF_ERROR(buffered_file->ReadVarint32(&string_lengths[i]));
+    TF_RETURN_IF_ERROR(buffered_file->ReadVarint64(&string_lengths[i]));
+    if (string_lengths[i] <= UINT32_MAX) {
+      // We need to do this because older checkpoints only used uint32s and we
+      // should still support them.
+      const uint32 elem_size_uint32 = static_cast<uint32>(string_lengths[i]);
+      *actual_crc32c = crc32c::Extend(
+          *actual_crc32c, reinterpret_cast<const char*>(&elem_size_uint32),
+          sizeof(uint32));
+    } else {
+      *actual_crc32c = crc32c::Extend(
+          *actual_crc32c, reinterpret_cast<const char*>(&string_lengths[i]),
+          sizeof(uint64));
+    }
   }
   if (offset + size < buffered_file->Tell()) {
     return errors::DataLoss("String lengths longer than expected offset ",
                             offset + size);
   }
-  *actual_crc32c =
-      crc32c::Value(reinterpret_cast<const char*>(string_lengths.data()),
-                    sizeof(uint32) * num_elements);
 
   // Reads the length-checksum.
   uint32 length_checksum = 0;
@@ -104,7 +113,7 @@ Status ReadStringTensor(io::InputBuffer* buffered_file, size_t num_elements,
 
   // Reads the actual string bytes.
   for (size_t i = 0; i < num_elements; ++i) {
-    const uint32 string_length = string_lengths[i];
+    const uint64 string_length = string_lengths[i];
     string* buffer = &destination[i];
 
     buffer->resize(string_length);
@@ -218,8 +227,8 @@ Status WriteTensor(const Tensor& val, FileOutputBuffer* out,
 Status WriteStringTensor(const Tensor& val, FileOutputBuffer* out,
                          size_t* bytes_written, uint32* crc32c) {
   // On-disk format:
-  //   [varint32 len0]..[varint32 lenL][4 byte cksum on lengths][string bytes]
-  // Var "crc32c" checksums the string lengths (as uint32, not varint32 bytes),
+  //   [varint64 len0]..[varint64 lenL][4 byte cksum on lengths][string bytes]
+  // Var "crc32c" checksums the string lengths (as uint64, not varint64 bytes),
   // the length-checksum, and all the string bytes.
   DCHECK_EQ(val.dtype(), DT_STRING);
   const string* strings = GetStringBackingBuffer(val);
@@ -230,12 +239,21 @@ Status WriteStringTensor(const Tensor& val, FileOutputBuffer* out,
   *crc32c = 0;
   for (int64 i = 0; i < val.NumElements(); ++i) {
     const string* elem = &strings[i];
-    DCHECK_EQ(elem->size(), static_cast<uint32>(elem->size()));
-    const uint32 elem_size = static_cast<uint32>(elem->size());
-
-    core::PutVarint32(&lengths, elem_size);
-    *crc32c = crc32c::Extend(*crc32c, reinterpret_cast<const char*>(&elem_size),
-                             sizeof(uint32));
+    DCHECK_EQ(elem->size(), static_cast<uint64>(elem->size()));
+    const uint64 elem_size = static_cast<uint64>(elem->size());
+
+    core::PutVarint64(&lengths, elem_size);
+    if (elem_size <= UINT32_MAX) {
+      // We need to do this because older checkpoints only used uint32s and we
+      // should still support them.
+      const uint32 elem_size_uint32 = static_cast<uint32>(elem_size);
+      *crc32c = crc32c::Extend(*crc32c,
+                               reinterpret_cast<const char*>(&elem_size_uint32),
+                               sizeof(uint32));
+    } else {
+      *crc32c = crc32c::Extend(
+          *crc32c, reinterpret_cast<const char*>(&elem_size), sizeof(uint64));
+    }
   }
   TF_RETURN_IF_ERROR(out->Append(lengths));
   *bytes_written = lengths.size();
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
index 59c42baa06..9567e4750b 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
@@ -39,6 +39,11 @@ string Prefix(const string& prefix) {
   return strings::StrCat(testing::TmpDir(), "/", prefix);
 }
 
+string TestdataPrefix(const string& prefix) {
+  return strings::StrCat(testing::TensorFlowSrcRoot(),
+                         "/core/util/tensor_bundle/testdata/", prefix);
+}
+
 template <typename T>
 Tensor Constant(T v, TensorShape shape) {
   Tensor ret(DataTypeToEnum<T>::value, shape);
@@ -458,7 +463,26 @@ TEST(TensorBundleTest, NonStandardShapes) {
   TestNonStandardShapes<qint8>();
 }
 
+TEST(TensorBundleTest, StringTensorsOldFormat) {
+  // Test string tensor bundle made with previous version of code that use
+  // varint32s to store string lengths (we now use varint64s).
+  BundleReader reader(Env::Default(), TestdataPrefix("old_string_tensors/foo"));
+  TF_ASSERT_OK(reader.status());
+  EXPECT_EQ(AllTensorKeys(&reader),
+            std::vector<string>({"floats", "scalar", "string_tensor", "strs"}));
+
+  Expect<string>(&reader, "string_tensor", Tensor(DT_STRING, TensorShape({1})));
+  Expect<string>(&reader, "scalar", test::AsTensor<string>({"hello"}));
+  Expect<string>(
+      &reader, "strs",
+      test::AsTensor<string>({"hello", "", "x01", string(1 << 10, 'c')}));
+  Expect<float>(&reader, "floats", Constant_2x3<float>(16.18));
+}
+
 TEST(TensorBundleTest, StringTensors) {
+  constexpr size_t kLongLength = static_cast<size_t>(UINT32_MAX) + 1;
+  Tensor long_string_tensor(DT_STRING, TensorShape({1}));
+
   {
     BundleWriter writer(Env::Default(), Prefix("foo"));
     TF_EXPECT_OK(writer.Add("string_tensor",
@@ -467,6 +491,12 @@ TEST(TensorBundleTest, StringTensors) {
     TF_EXPECT_OK(writer.Add(
         "strs",
         test::AsTensor<string>({"hello", "", "x01", string(1 << 25, 'c')})));
+
+    // Requires a 64-bit length.
+    string* backing_string = long_string_tensor.flat<string>().data();
+    backing_string->assign(kLongLength, 'd');
+    TF_EXPECT_OK(writer.Add("long_scalar", long_string_tensor));
+
     // Mixes in some floats.
     TF_EXPECT_OK(writer.Add("floats", Constant_2x3<float>(16.18)));
     TF_ASSERT_OK(writer.Finish());
@@ -474,9 +504,9 @@ TEST(TensorBundleTest, StringTensors) {
   {
     BundleReader reader(Env::Default(), Prefix("foo"));
     TF_ASSERT_OK(reader.status());
-    EXPECT_EQ(
-        AllTensorKeys(&reader),
-        std::vector<string>({"floats", "scalar", "string_tensor", "strs"}));
+    EXPECT_EQ(AllTensorKeys(&reader),
+              std::vector<string>({"floats", "long_scalar", "scalar",
+                                   "string_tensor", "strs"}));
 
     Expect<string>(&reader, "string_tensor",
                    Tensor(DT_STRING, TensorShape({1})));
@@ -484,7 +514,35 @@ TEST(TensorBundleTest, StringTensors) {
     Expect<string>(
         &reader, "strs",
         test::AsTensor<string>({"hello", "", "x01", string(1 << 25, 'c')}));
+
     Expect<float>(&reader, "floats", Constant_2x3<float>(16.18));
+
+    // We don't use the Expect function so we can re-use the
+    // `long_string_tensor` buffer for reading out long_scalar to keep memory
+    // usage reasonable.
+    EXPECT_TRUE(reader.Contains("long_scalar"));
+    DataType dtype;
+    TensorShape shape;
+    TF_ASSERT_OK(reader.LookupDtypeAndShape("long_scalar", &dtype, &shape));
+    EXPECT_EQ(DT_STRING, dtype);
+    EXPECT_EQ(TensorShape({1}), shape);
+
+    // Zero-out the string so that we can be sure the new one is read in.
+    string* backing_string = long_string_tensor.flat<string>().data();
+    backing_string->assign("");
+
+    // Read long_scalar and check it contains kLongLength 'd's.
+    TF_ASSERT_OK(reader.Lookup("long_scalar", &long_string_tensor));
+    ASSERT_EQ(backing_string, long_string_tensor.flat<string>().data());
+    EXPECT_EQ(kLongLength, backing_string->length());
+    for (char c : *backing_string) {
+      // Not using ASSERT_EQ('d', c) because this way is twice as fast due to
+      // compiler optimizations.
+      if (c != 'd') {
+        FAIL() << "long_scalar is not full of 'd's as expected.";
+        break;
+      }
+    }
   }
 }
 
diff --git a/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README b/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README
new file mode 100644
index 0000000000..428d3ef79e
--- /dev/null
+++ b/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README
@@ -0,0 +1,3 @@
+This tensor bundle was generated from cl/214343133, before string tensor
+lengths were written as varint64s. This is here to check backwards
+compatibility between the new code and old checkpoints.
diff --git a/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.data-00000-of-00001 b/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.data-00000-of-00001
new file mode 100644
index 0000000000000000000000000000000000000000..23b488e5feaefa970927bfd93c4a989fb494fae9
GIT binary patch
literal 1080
zcmZQrRxN17dh^&E=Zw^xoP1UW<_3;AL9;J|c@+kR$)jL21V%$(#D>6<f<{Md7y$UV
BqVNC!

literal 0
HcmV?d00001

diff --git a/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.index b/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.index
new file mode 100644
index 0000000000000000000000000000000000000000..a22a69e6e1de790801fa87b8cf677a73ee287b9c
GIT binary patch
literal 211
zcmZQzVB=tvV&Y(AU=vHr$xkdP=3o@!5MttB5@O<DR#?EHAz|t=t0R(uO|UpQF(<Kz
zgI$P4h>3$yfmMUc^v#0<nv6XBB}JKe>G370dByo~c@0+6z3J@xm>GqNp;}lJ_%$YS
zm<D}g`@_J%z`zJ1F74aw1){;61D_UdU|?h_VW@)(ZD6iRX=db@z{m#@!jHp$H*~9%
Hy59x>Q*9-v

literal 0
HcmV?d00001

-- 
GitLab


From ece50dd9992ac17e3094c7f6d1914febd7a036b5 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 27 Sep 2018 16:05:51 -0700
Subject: [PATCH 0823/1357] [tf.data Introducing tf.data.Dataset.reduce() which
 reduces elements of a (finite) dataset to a single element.

PiperOrigin-RevId: 214852364
---
 .../base_api/api_def_ReduceDataset.pbtxt      |  26 ++++
 .../data/group_by_reducer_dataset_op.cc       |   4 +-
 .../data/group_by_window_dataset_op.cc        |   4 +-
 tensorflow/core/kernels/data/iterator_ops.cc  | 111 ++++++++++++++++
 .../core/kernels/data/scan_dataset_op.cc      |   4 +-
 tensorflow/core/ops/dataset_ops.cc            |  13 ++
 tensorflow/python/data/kernel_tests/BUILD     |  18 +++
 .../kernel_tests/reduce_dataset_op_test.py    | 124 ++++++++++++++++++
 tensorflow/python/data/ops/dataset_ops.py     | 120 +++++++++++++++++
 .../golden/v1/tensorflow.data.-dataset.pbtxt  |   4 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   4 +
 .../golden/v2/tensorflow.data.-dataset.pbtxt  |   4 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   4 +
 17 files changed, 447 insertions(+), 9 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt
 create mode 100644 tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py

diff --git a/tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt
new file mode 100644
index 0000000000..08414b3e68
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt
@@ -0,0 +1,26 @@
+op {
+  visibility: HIDDEN
+  graph_op_name: "ReduceDataset"
+  in_arg {
+    name: "input_dataset"
+    description: <<END
+A variant tensor representing the input dataset.
+END
+  }
+  in_arg {
+    name: "initial_state"
+    description: <<END
+A nested structure of tensors, representing the initial state of the
+transformation.
+END
+  }
+  attr {
+    name: "f"
+    description: <<END
+A function that maps `(old_state, input_element)` to `new_state`. It must take
+two arguments and return a nested structures of tensors. The structure of
+`new_state` must match the structure of `initial_state`.
+END
+  }
+  summary: "Reduces the input dataset to a singleton using a reduce function."
+}
diff --git a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
index d6ee42a7c6..e7244ee208 100644
--- a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
@@ -30,8 +30,7 @@ namespace {
 class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit GroupByReducerDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("key_func", &key_func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("init_func", &init_func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("reduce_func", &reduce_func_));
@@ -421,7 +420,6 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
   };
 
-  const int graph_def_version_;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
   NameAttrList key_func_;
diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
index 8b417bb1c2..14aefe5d54 100644
--- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
@@ -31,8 +31,7 @@ namespace {
 class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit GroupByWindowDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("key_func", &key_func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("reduce_func", &reduce_func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("window_size_func", &window_size_func_));
@@ -507,7 +506,6 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
   };
 
-  const int graph_def_version_;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
   NameAttrList key_func_;
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index c0bc507ec0..7a833668ac 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -659,6 +659,115 @@ class ToSingleElementOp : public AsyncOpKernel {
   BackgroundWorker background_worker_;
 };
 
+class ReduceDatasetOp : public AsyncOpKernel {
+ public:
+  explicit ReduceDatasetOp(OpKernelConstruction* ctx)
+      : AsyncOpKernel(ctx),
+        background_worker_(
+            ctx->env(),
+            strings::StrCat("reduce_thread_", SanitizeThreadSuffix(name()))) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &reduce_func_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_inter_op_parallelism",
+                                     &use_inter_op_parallelism_));
+  }
+
+  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
+    // The call to `iterator->GetNext()` may block and depend on an
+    // inter-op thread pool thread, so we issue the call from the
+    // owned thread pool.
+    background_worker_.Schedule([this, ctx, done]() {
+      DatasetBase* dataset;
+      OP_REQUIRES_OK_ASYNC(
+          ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset), done);
+      OpInputList inputs;
+      OP_REQUIRES_OK_ASYNC(ctx, ctx->input_list("initial_state", &inputs),
+                           done);
+      std::vector<Tensor> state(inputs.begin(), inputs.end());
+
+      std::unique_ptr<CapturedFunction> captured_func;
+      OP_REQUIRES_OK_ASYNC(
+          ctx,
+          CapturedFunction::Create(reduce_func_, ctx, "other_arguments",
+                                   use_inter_op_parallelism_, &captured_func),
+          done);
+
+      IteratorContext iter_ctx(ctx);
+      OP_REQUIRES_OK_ASYNC(ctx, captured_func->Instantiate(&iter_ctx), done);
+
+      std::unique_ptr<IteratorBase> iterator;
+      OP_REQUIRES_OK_ASYNC(
+          ctx, dataset->MakeIterator(&iter_ctx, "ReduceIterator", &iterator),
+          done);
+
+      // NOTE(jsimsa): We must destroy the iterator before calling `done()`, to
+      // avoid destruction races.
+      IteratorBase* raw_iterator = iterator.release();
+      auto cleanup = gtl::MakeCleanup([raw_iterator, done] {
+        delete raw_iterator;
+        done();
+      });
+
+      // Iterate through the input dataset.
+      Status status;
+      while (true) {
+        std::vector<Tensor> next_input_element;
+        bool end_of_input;
+        status = raw_iterator->GetNext(&iter_ctx, &next_input_element,
+                                       &end_of_input);
+        if (!status.ok() || end_of_input) {
+          break;
+        }
+
+        // Run the reduce function to update the current state.
+        std::vector<Tensor> args;
+        args.reserve(state.size() + next_input_element.size());
+        std::copy(state.begin(), state.end(), std::back_inserter(args));
+        std::copy(next_input_element.begin(), next_input_element.end(),
+                  std::back_inserter(args));
+
+        std::vector<Tensor> reduce_func_output;
+        status =
+            captured_func->Run(&iter_ctx, std::move(args), &reduce_func_output);
+        if (!status.ok()) {
+          break;
+        }
+        std::swap(reduce_func_output, state);
+      }
+
+      if (!status.ok()) {
+        ctx->SetStatus(status);
+        return;
+      }
+      for (int i = 0; i < state.size(); ++i) {
+        OP_REQUIRES_ASYNC(
+            ctx, state[i].dtype() == output_types_[i],
+            errors::InvalidArgument(
+                "The result does not match the expected type for component ", i,
+                ". Expected: ", DataTypeString(output_types_[i]),
+                ". Actual: ", DataTypeString(state[i].dtype()), "."),
+            done);
+        OP_REQUIRES_ASYNC(
+            ctx, output_shapes_[i].IsCompatibleWith(state[i].shape()),
+            errors::InvalidArgument(
+                "The result does not match the expected shape for component ",
+                i, ". Expected: ", output_shapes_[i].DebugString(),
+                ". Actual: ", state[i].shape().DebugString(), "."),
+            done);
+        ctx->set_output(i, state[i]);
+      }
+    });
+  }
+
+ private:
+  NameAttrList reduce_func_;
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+  bool use_inter_op_parallelism_;
+  BackgroundWorker background_worker_;
+};
+
 class OneShotIteratorOp : public AsyncOpKernel {
  public:
   explicit OneShotIteratorOp(OpKernelConstruction* ctx)
@@ -1146,6 +1255,8 @@ REGISTER_KERNEL_BUILDER(Name("AnonymousIterator").Device(DEVICE_GPU),
                         AnonymousIteratorHandleOp);
 REGISTER_KERNEL_BUILDER(Name("DatasetToSingleElement").Device(DEVICE_CPU),
                         ToSingleElementOp);
+REGISTER_KERNEL_BUILDER(Name("ReduceDataset").Device(DEVICE_CPU),
+                        ReduceDatasetOp);
 REGISTER_KERNEL_BUILDER(Name("OneShotIterator").Device(DEVICE_CPU),
                         OneShotIteratorOp);
 REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE_CPU),
diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc
index dbe31f37b8..2a911aa368 100644
--- a/tensorflow/core/kernels/data/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/scan_dataset_op.cc
@@ -32,8 +32,7 @@ namespace {
 class ScanDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit ScanDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("Tstate", &state_types_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -258,7 +257,6 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
   };
 
-  const int graph_def_version_;
   DataTypeVector state_types_;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 1ada623cf5..71f4cc3c4c 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -756,6 +756,19 @@ REGISTER_OP("DatasetToSingleElement")
     .Attr("output_shapes: list(shape) >= 1")
     .SetShapeFn(IteratorGetNextShapeFn);
 
+REGISTER_OP("ReduceDataset")
+    .Input("input_dataset: variant")
+    .Input("initial_state: Tstate")
+    .Input("other_arguments: Targuments")
+    .Output("components: output_types")
+    .Attr("f: func")
+    .Attr("Tstate: list(type) >= 1")
+    .Attr("Targuments: list(type) >= 0")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .Attr("use_inter_op_parallelism: bool = true")
+    .SetShapeFn(IteratorGetNextShapeFn);
+
 REGISTER_OP("IteratorToStringHandle")
     .Input("resource_handle: resource")
     .Output("string_handle: string")
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index fdcbfc3684..5f9818566f 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -404,6 +404,24 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "reduce_dataset_op_test",
+    size = "small",
+    srcs = ["reduce_dataset_op_test.py"],
+    additional_deps = [
+        ":test_base",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 tf_py_test(
     name = "sequence_dataset_op_test",
     size = "small",
diff --git a/tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py b/tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py
new file mode 100644
index 0000000000..11e07300b9
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py
@@ -0,0 +1,124 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class ReduceDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  def testSum(self):
+    for i in range(10):
+      ds = dataset_ops.Dataset.range(1, i + 1)
+      result = ds.reduce(np.int64(0), lambda x, y: x + y)
+      with self.cached_session() as sess:
+        self.assertEqual(((i + 1) * i) // 2, sess.run(result))
+
+  def testSumTuple(self):
+
+    def reduce_fn(state, value):
+      v1, v2 = value
+      return state + v1 + v2
+
+    for i in range(10):
+      ds = dataset_ops.Dataset.range(1, i + 1)
+      ds = dataset_ops.Dataset.zip((ds, ds))
+      result = ds.reduce(np.int64(0), reduce_fn)
+      with self.cached_session() as sess:
+        self.assertEqual(((i + 1) * i), sess.run(result))
+
+  def testSumAndCount(self):
+
+    def reduce_fn(state, value):
+      s, c = state
+      return s + value, c + 1
+
+    for i in range(10):
+      ds = dataset_ops.Dataset.range(1, i + 1)
+      result = ds.reduce((np.int64(0), np.int64(0)), reduce_fn)
+      with self.cached_session() as sess:
+        s, c = sess.run(result)
+        self.assertEqual(((i + 1) * i) // 2, s)
+        self.assertEqual(i, c)
+
+  def testSquareUsingPlaceholder(self):
+    delta = array_ops.placeholder(dtype=dtypes.int64)
+
+    def reduce_fn(state, _):
+      return state + delta
+
+    for i in range(10):
+      ds = dataset_ops.Dataset.range(1, i + 1)
+      result = ds.reduce(np.int64(0), reduce_fn)
+      with self.cached_session() as sess:
+        square = sess.run(result, feed_dict={delta: i})
+        self.assertEqual(i * i, square)
+
+  def testSparse(self):
+
+    def reduce_fn(_, value):
+      return value
+
+    def make_sparse_fn(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1]))
+
+    for i in range(10):
+      ds = dataset_ops.Dataset.from_tensors(make_sparse_fn(i+1))
+      result = ds.reduce(make_sparse_fn(0), reduce_fn)
+      with self.cached_session() as sess:
+        self.assertSparseValuesEqual(make_sparse_fn(i+1), sess.run(result))
+
+  def testNested(self):
+
+    def reduce_fn(state, value):
+      state["dense"] += value["dense"]
+      state["sparse"] = value["sparse"]
+      return state
+
+    def make_sparse_fn(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1]))
+
+    def map_fn(i):
+      return {"dense": math_ops.cast(i, dtype=dtypes.int64),
+              "sparse": make_sparse_fn(math_ops.cast(i, dtype=dtypes.int64))}
+
+    for i in range(10):
+      ds = dataset_ops.Dataset.range(1, i + 1).map(map_fn)
+      result = ds.reduce(map_fn(0), reduce_fn)
+      with self.cached_session() as sess:
+        result = sess.run(result)
+        self.assertEqual(((i + 1) * i) // 2, result["dense"])
+        self.assertSparseValuesEqual(make_sparse_fn(i), result["sparse"])
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index ac87a451b1..6bba72a8e9 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1205,6 +1205,126 @@ class Dataset(object):
       shift = size
     return WindowDataset(self, size, shift, stride, drop_remainder)
 
+  def reduce(self, initial_state, reduce_func):
+    """Reduces the input dataset to a single element.
+
+    The transformation calls `reduce_func` successively on every element of
+    the input dataset until the dataset is exhausted, aggregating information in
+    its internal state. The `initial_state` argument is used for the initial
+    state and the final state is returned as the result.
+
+    For example:
+    - `tf.data.Dataset.range(5).reduce(np.int64(0), lambda x, _: x + 1)`
+      produces `5`
+    - `tf.data.Dataset.range(5).reduce(np.int64(0), lambda x, y: x + y)`
+      produces `10`
+
+    Args:
+      initial_state: A nested structure of tensors, representing the initial
+        state of the transformation.
+      reduce_func: A function that maps `(old_state, input_element)` to
+        `new_state`. It must take two arguments and return a nested structure
+        of tensors. The structure of `new_state` must match the structure of
+        `initial_state`.
+
+    Returns:
+      A nested structure of `tf.Tensor` objects, corresponding to the final
+      state of the transformation.
+
+    """
+
+    with ops.name_scope("initial_state"):
+      # Convert any `SparseTensorValue`s to `SparseTensor`s and all other
+      # values to tensors.
+      initial_state = nest.pack_sequence_as(initial_state, [
+          sparse_tensor_lib.SparseTensor.from_value(t)
+          if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor(
+              t, name="component_%d" % i)
+          for i, t in enumerate(nest.flatten(initial_state))
+      ])
+
+    # Compute initial values for the state classes, shapes and types based on
+    # the initial state.
+    state_classes = sparse.get_classes(initial_state)
+    state_shapes = nest.pack_sequence_as(
+        initial_state, [t.get_shape() for t in nest.flatten(initial_state)])
+    state_types = nest.pack_sequence_as(
+        initial_state, [t.dtype for t in nest.flatten(initial_state)])
+
+    # Iteratively rerun the reduce function until reaching a fixed point on
+    # `self._state_shapes`.
+    need_to_rerun = True
+    while need_to_rerun:
+
+      wrapped_func = StructuredFunctionWrapper(
+          reduce_func,
+          "reduce()",
+          input_classes=(state_classes, self.output_classes),
+          input_shapes=(state_shapes, self.output_shapes),
+          input_types=(state_types, self.output_types),
+          add_to_graph=False)
+
+      # Extract and validate class information from the returned values.
+      output_classes = wrapped_func.output_classes
+      for new_state_class, state_class in zip(
+          nest.flatten(output_classes), nest.flatten(state_classes)):
+        if not issubclass(new_state_class, state_class):
+          raise TypeError(
+              "The element classes for the new state must match the initial "
+              "state. Expected %s; got %s." % (state_classes,
+                                               wrapped_func.output_classes))
+
+      # Extract and validate type information from the returned values.
+      output_types = wrapped_func.output_types
+      for new_state_type, state_type in zip(
+          nest.flatten(output_types), nest.flatten(state_types)):
+        if new_state_type != state_type:
+          raise TypeError(
+              "The element types for the new state must match the initial "
+              "state. Expected %s; got %s." % (state_types,
+                                               wrapped_func.output_types))
+
+      # Extract shape information from the returned values.
+      output_shapes = wrapped_func.output_shapes
+      flat_state_shapes = nest.flatten(state_shapes)
+      flat_new_state_shapes = nest.flatten(output_shapes)
+      weakened_state_shapes = [
+          original.most_specific_compatible_shape(new)
+          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
+      ]
+
+      need_to_rerun = False
+      for original_shape, weakened_shape in zip(flat_state_shapes,
+                                                weakened_state_shapes):
+        if original_shape.ndims is not None and (
+            weakened_shape.ndims is None or
+            original_shape.as_list() != weakened_shape.as_list()):
+          need_to_rerun = True
+          break
+
+      if need_to_rerun:
+        state_shapes = nest.pack_sequence_as(state_shapes,
+                                             weakened_state_shapes)
+
+    reduce_func = wrapped_func.function
+    reduce_func.add_to_graph(ops.get_default_graph())
+
+    return sparse.deserialize_sparse_tensors(
+        nest.pack_sequence_as(
+            output_types,
+            gen_dataset_ops.reduce_dataset(
+                self._as_variant_tensor(),  # pylint: disable=protected-access
+                nest.flatten(sparse.serialize_sparse_tensors(initial_state)),
+                reduce_func.captured_inputs,
+                f=reduce_func,
+                output_shapes=nest.flatten(
+                    sparse.as_dense_shapes(output_shapes, output_classes)),
+                output_types=nest.flatten(
+                    sparse.as_dense_types(output_types, output_classes)))),
+        output_types,
+        output_shapes,
+        output_classes)
+
 
 class DatasetSource(Dataset):
   """Abstract class representing a dataset with no inputs."""
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
index c3ba2dba57..825afb622f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
@@ -90,6 +90,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
index 3541671bee..cdad5f6360 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -91,6 +91,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
index b113c18ee0..df41bff1b5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -91,6 +91,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
index 7210bf5db4..028bcc2ce9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
@@ -91,6 +91,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
index c3ba2dba57..825afb622f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
@@ -90,6 +90,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
index 3541671bee..cdad5f6360 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -91,6 +91,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
index b113c18ee0..df41bff1b5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -91,6 +91,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
index 7210bf5db4..028bcc2ce9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
@@ -91,6 +91,10 @@ tf_class {
     name: "range"
     argspec: "args=[], varargs=args, keywords=None, defaults=None"
   }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "repeat"
     argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From d8a370274d6ab8c68edcce66849b4e96aed2fa0d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 16:10:08 -0700
Subject: [PATCH 0824/1357] Optimize ParseNodeNameAsStringPiece and related
 functions, since they are the most costly functions in Grappler.

PiperOrigin-RevId: 214853009
---
 .../core/grappler/optimizers/data/BUILD       |   1 +
 .../optimizers/data/function_utils.cc         |   1 +
 tensorflow/core/grappler/utils.cc             |  39 -------
 tensorflow/core/grappler/utils.h              | 110 +++++++++++++-----
 tensorflow/core/grappler/utils_test.cc        |  19 +++
 5 files changed, 102 insertions(+), 68 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index d198a2a591..81c1bddf67 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -94,6 +94,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core/grappler:utils",
+        "//tensorflow/core:lib_internal",
     ] + tf_protos_all(),
 )
 
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.cc b/tensorflow/core/grappler/optimizers/data/function_utils.cc
index e3f6d8e1ea..311df15bc2 100644
--- a/tensorflow/core/grappler/optimizers/data/function_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index db6e4e6852..5867d01324 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -156,45 +156,6 @@ bool IsControlInput(const string& name) {
   return !name.empty() && name[0] == '^';
 }
 
-string NodeName(const string& name) {
-  int position;
-  return ParseNodeName(name, &position);
-}
-
-int NodePosition(const string& name) {
-  int position;
-  ParseNodeNameAsStringPiece(name, &position);
-  return position;
-}
-
-int NodePositionIfSameNode(const string& input_name, const string& node_name) {
-  const bool is_ctrl = input_name[0] == '^';
-  auto input_it = is_ctrl ? input_name.begin() + 1 : input_name.begin();
-  auto node_it = node_name.begin();
-  if (node_name.empty() ||
-      std::distance(input_it, input_name.end()) < node_name.size()) {
-    return -2;
-  }
-  while (node_it != node_name.end()) {
-    if (*input_it++ != *node_it++) {
-      return -2;
-    }
-  }
-  if (input_it == input_name.end()) {
-    return is_ctrl ? -1 : 0;
-  } else if (*input_it++ == ':') {
-    StringPiece remaining(&(*input_it),
-                          std::distance(input_it, input_name.end()));
-    int position;
-    if (!strings::safe_strto32(remaining, &position)) {
-      return -2;
-    }
-    return is_ctrl ? -1 : position;
-  } else {
-    return -2;
-  }
-}
-
 string AddPrefixToNodeName(const string& name, const string& prefix,
                            const string& delimiter) {
   if (!name.empty()) {
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 296ee1678e..95126d470c 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
-#include "tensorflow/core/lib/strings/scanner.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -102,40 +101,92 @@ bool IsControlInput(const string& name);
 // True iff 'name1' and 'name2' refer to the same input.
 bool IsSameInput(const string& name1, const string& name2);
 
+// Returns the trailing position number (or zero if no number is present) if
+// NodeName(input_name) is equal to node_name. Returns -1 for control inputs.
+// Returns -2 if NodeName(input_name) is not equal to node_name.
+// Note: This function is used very heavily, and this hand-optimized
+// version is 3-4x faster than the version using Scanner, which it replaced.
+// This is worth the reduction in readability.
+inline int NodePositionIfSameNode(const string& input_name,
+                                  const string& node_name) {
+  if (input_name.empty()) return -2;
+  const bool is_ctrl = input_name[0] == '^';
+  auto input_it = is_ctrl ? input_name.begin() + 1 : input_name.begin();
+  auto node_it = node_name.begin();
+  if (node_name.empty() ||
+      std::distance(input_it, input_name.end()) < node_name.size()) {
+    return -2;
+  }
+  while (node_it != node_name.end()) {
+    if (*input_it++ != *node_it++) {
+      return -2;
+    }
+  }
+  if (input_it == input_name.end()) {
+    return is_ctrl ? -1 : 0;
+  } else if (*input_it++ == ':') {
+    StringPiece remaining(&(*input_it),
+                          std::distance(input_it, input_name.end()));
+    int position;
+    if (!strings::safe_strto32(remaining, &position)) {
+      return -2;
+    }
+    return is_ctrl ? -1 : position;
+  } else {
+    return -2;
+  }
+}
+
 // Return the node name corresponding to 'name' if name is valid, or the empty
 // string otherwise.
-string NodeName(const string& name);
+inline StringPiece NodeNameAsStringPiece(const string& name) {
+  static const string empty;
+  if (name.empty()) return StringPiece(empty);
+  const auto begin_it = name[0] == '^' ? name.begin() + 1 : name.begin();
+  auto end_it = begin_it;
+  while (end_it != name.end() && *end_it != ':') {
+    ++end_it;
+  }
+  if (end_it != name.end() && *end_it != ':') {
+    return StringPiece(empty);
+  }
+  return StringPiece(&(*begin_it), std::distance(begin_it, end_it));
+}
 
-// Get the trailing position number ":{digits}" (if any) of a node name.
-// Returns -1 for control inputs.
-int NodePosition(const string& name);
+// Return the node name corresponding to 'name' if name is valid, or the empty
+// string otherwise.
+inline string NodeName(const string& name) {
+  return string(NodeNameAsStringPiece(name));
+}
 
+// Returns the node name and position in a single call.
 inline StringPiece ParseNodeNameAsStringPiece(const string& name,
                                               int* position) {
-  // Strip the prefix '^' (if any), and strip the trailing ":{digits} (if any)
-  // to get a node name.
-  strings::Scanner scan(name);
-  scan.ZeroOrOneLiteral("^")
-      .RestartCapture()
-      .One(strings::Scanner::LETTER_DIGIT_DOT_UNDERSCORE)
-      .Any(strings::Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE);
-  StringPiece capture;
-  StringPiece remaining;
-  if (scan.Peek(':') != ':' || !scan.GetResult(&remaining, &capture)) {
+  static const string empty;
+  if (name.empty()) {
     *position = 0;
-    static const string empty;
     return StringPiece(empty);
-  } else {
-    if (name[0] == '^') {
-      *position = -1;
-    } else if (remaining.empty()) {
-      *position = 0;
-    } else {
-      // Skip the first ':' character.
-      CHECK(strings::safe_strto32(remaining.substr(1), position));
+  }
+  const bool is_ctrl = name[0] == '^';
+  const auto begin_it = is_ctrl ? name.begin() + 1 : name.begin();
+  *position = is_ctrl ? -1 : 0;
+  auto end_it = begin_it;
+  while (end_it != name.end() && *end_it != ':') {
+    ++end_it;
+  }
+  const StringPiece node_name(&(*begin_it), std::distance(begin_it, end_it));
+  if (end_it != name.end()) {
+    if (*end_it != ':') {
+      return StringPiece(empty);
+    } else if (!is_ctrl) {
+      ++end_it;
+      StringPiece remaining(&(*end_it), std::distance(end_it, name.end()));
+      if (!strings::safe_strto32(remaining, position)) {
+        return StringPiece(empty);
+      }
     }
-    return capture;
   }
+  return node_name;
 }
 
 // Returns the node name and position in a single call.
@@ -143,10 +194,11 @@ inline string ParseNodeName(const string& name, int* position) {
   return string(ParseNodeNameAsStringPiece(name, position));
 }
 
-// Returns NodePosition(input_name) if NodeName(input_name) == node_name.
-// Otherwise returns -2;
-// REQUIRES: inputs_name.size() > 0 && node_name.size() > 0.
-int NodePositionIfSameNode(const string& input_name, const string& node_name);
+inline int NodePosition(const string& name) {
+  int position;
+  ParseNodeNameAsStringPiece(name, &position);
+  return position;
+}
 
 // Add a prefix to a node name with a custom delimiter.
 string AddPrefixToNodeName(const string& name, const string& prefix,
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index 6b787a6910..9b6c1f690b 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -371,6 +371,25 @@ BM_NodePositionIfSameNode("^foo/bar/baz", "foo/bar/baz", Match_Ctrl);
 BM_NodePositionIfSameNode("blah", "foo/bar/baz", NoMatch_0);
 BM_NodePositionIfSameNode("foo/bar/baz/gnu", "foo/bar/baz", NoMatch_end);
 
+#define BM_ParseNodeNameAsStringPiece(I, NAME)                               \
+  static void BM_ParseNodeNameAsStringPiece_##NAME(int iters) {              \
+    string input = I;                                                        \
+    for (int i = 0; i < iters; ++i) {                                        \
+      int position;                                                          \
+      const StringPiece name = ParseNodeNameAsStringPiece(input, &position); \
+      CHECK_GE(position, -1);                                                \
+      CHECK(!name.empty());                                                  \
+    }                                                                        \
+  }                                                                          \
+  BENCHMARK(BM_ParseNodeNameAsStringPiece_##NAME)
+
+BM_ParseNodeNameAsStringPiece("foo", foo);
+BM_ParseNodeNameAsStringPiece("foo/bar/baz", foo_bar_baz);
+BM_ParseNodeNameAsStringPiece("^foo/bar/baz", foo_bar_baz_ctrl);
+BM_ParseNodeNameAsStringPiece("foo:123", foo123);
+BM_ParseNodeNameAsStringPiece("foo/bar/baz:123", foo_bar_baz_123);
+BM_ParseNodeNameAsStringPiece("^foo/bar/baz:123", foo_bar_baz_123_ctrl);
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From f41573b7956871b4142c97eb85ddf163ad641976 Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Thu, 27 Sep 2018 16:16:20 -0700
Subject: [PATCH 0825/1357] Automated rollback of commit
 750466c6e6624d279de7f9a43accd682d487509c

PiperOrigin-RevId: 214853846
---
 tensorflow/core/BUILD                         |  16 --
 .../core/common_runtime/direct_session.cc     |  49 +---
 .../core/common_runtime/direct_session.h      |   3 -
 .../common_runtime/direct_session_test.cc     |  28 --
 tensorflow/core/framework/run_handler.cc      | 248 ------------------
 tensorflow/core/framework/run_handler.h       |  95 -------
 tensorflow/core/framework/run_handler_util.cc |  57 ----
 tensorflow/core/framework/run_handler_util.h  |  43 ---
 .../core/framework/run_handler_util_test.cc   |  93 -------
 tensorflow/core/protobuf/config.proto         |   5 -
 ...ensorflow.-run-options.-experimental.pbtxt |   6 -
 .../golden/v1/tensorflow.-run-options.pbtxt   |   6 -
 ...ensorflow.-run-options.-experimental.pbtxt |   6 -
 .../golden/v2/tensorflow.-run-options.pbtxt   |   6 -
 14 files changed, 6 insertions(+), 655 deletions(-)
 delete mode 100644 tensorflow/core/framework/run_handler.cc
 delete mode 100644 tensorflow/core/framework/run_handler.h
 delete mode 100644 tensorflow/core/framework/run_handler_util.cc
 delete mode 100644 tensorflow/core/framework/run_handler_util.h
 delete mode 100644 tensorflow/core/framework/run_handler_util_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 01e2e9f62b..ca247dc56b 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2484,8 +2484,6 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [
     "framework/op_segment.h",
     "framework/rendezvous.h",  # only needed for tests
     "framework/resource_var.h",
-    "framework/run_handler.h",
-    "framework/run_handler_util.h",
     "framework/tensor_reference.h",
     "framework/tracking_allocator.h",  # only needed for tests
     "framework/unique_tensor_references.h",
@@ -2972,7 +2970,6 @@ tf_cuda_library(
         ":core_cpu_internal",
         ":device_tracer",
         ":framework",
-        ":framework_internal",
         ":graph",
         ":lib",
         ":lib_internal",
@@ -4120,19 +4117,6 @@ tf_cc_test(
     ],
 )
 
-tf_cc_test(
-    name = "framework_run_handler_util_test",
-    size = "small",
-    srcs = ["framework/run_handler_util_test.cc"],
-    linkstatic = tf_kernel_tests_linkstatic(),
-    deps = [
-        ":framework_internal",
-        ":lib",
-        ":test",
-        ":test_main",
-    ],
-)
-
 tf_cuda_cc_test(
     name = "common_runtime_direct_session_test",
     size = "small",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 458e133b68..841181f8c3 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -40,7 +40,6 @@ limitations under the License.
 #include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/framework/run_handler.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -245,21 +244,6 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool,
 #endif  // __ANDROID__
 }
 
-static RunHandlerPool* GetOrCreateRunHandlerPool(
-    const SessionOptions& options) {
-  static RunHandlerPool* pool =
-      new RunHandlerPool(NumInterOpThreadsFromSessionOptions(options));
-  return pool;
-}
-
-bool DirectSession::ShouldUseRunHandlerPool() const {
-  if (options_.config.session_inter_op_thread_pool_size() > 0 ||
-      options_.config.use_per_session_threads()) {
-    return false;
-  }
-  return true;
-}
-
 DirectSession::DirectSession(const SessionOptions& options,
                              const DeviceMgr* device_mgr,
                              DirectSessionFactory* const factory)
@@ -598,37 +582,16 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
     }
   }
 
-  std::unique_ptr<RunHandler> handler;
-  if (ShouldUseRunHandlerPool() &&
-      run_options.experimental().use_run_handler_pool()) {
-    // Non-null only when a global inter-op pool is used.
-    VLOG(1) << "Using RunHandler to scheduler inter-op closures.";
-    handler = GetOrCreateRunHandlerPool(options_)->Get();
-  }
-  auto* handler_ptr = handler.get();
-
-  Executor::Args::Runner default_runner = nullptr;
-
-  if (pool == nullptr) {
-    default_runner = [](Executor::Args::Closure c) { c(); };
-  } else if (handler_ptr != nullptr) {
-    default_runner = [handler_ptr](Executor::Args::Closure c) {
-      handler_ptr->ScheduleInterOpClosure(std::move(c));
-    };
-  } else {
-    default_runner = [this, pool](Executor::Args::Closure c) {
-      SchedClosure(pool, std::move(c));
-    };
-  }
-
+  Executor::Args::Runner default_runner = [this,
+                                           pool](Executor::Args::Closure c) {
+    SchedClosure(pool, std::move(c));
+  };
   for (const auto& item : executors_and_keys->items) {
-    // TODO(azaks): support partial run.
-    // TODO(azaks): if the device picks its own threadpool, we need to assign
+    // TODO(zhengxq): support partial run.
+    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
     //     less threads to the main compute pool by default.
     thread::ThreadPool* device_thread_pool =
         item.device->tensorflow_device_thread_pool();
-    // TODO(crk): Investigate usage of RunHandlerPool when using device specific
-    // thread pool(s).
     if (!device_thread_pool) {
       args.runner = default_runner;
     } else {
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 3a168bbe3f..4a6a921ea7 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -247,9 +247,6 @@ class DirectSession : public Session {
                                    ExecutorsAndKeys* executors_and_keys,
                                    RunMetadata* run_metadata);
 
-  // Returns whether inter-op execution uses a global pool.
-  bool ShouldUseRunHandlerPool() const;
-
   ::tensorflow::Status ExtendLocked(const GraphDef& graph)
       EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
 
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index e3e431f800..65e816c202 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -625,34 +625,6 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts_Callable) {
   EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2);
 }
 
-TEST_F(DirectSessionMinusAXTest, UseRunHandlerPool) {
-  Initialize({3, 2, -1, 0});
-  auto session = CreateSession();
-  ASSERT_TRUE(session != nullptr);
-  TF_ASSERT_OK(session->Create(def_));
-  std::vector<std::pair<string, Tensor>> inputs;
-
-  // Request two targets: one fetch output and one non-fetched output.
-  std::vector<string> output_names = {y_ + ":0"};
-  std::vector<string> target_nodes = {y_neg_};
-  std::vector<Tensor> outputs;
-
-  // Prepares RunOptions and RunMetadata
-  RunOptions run_options;
-  run_options.mutable_experimental()->set_use_run_handler_pool(true);
-
-  Status s = session->Run(run_options, inputs, output_names, target_nodes,
-                          &outputs, nullptr);
-  TF_ASSERT_OK(s);
-
-  ASSERT_EQ(1, outputs.size());
-  // The first output should be initialized and have the correct
-  // output.
-  auto mat = outputs[0].matrix<float>();
-  ASSERT_TRUE(outputs[0].IsInitialized());
-  EXPECT_FLOAT_EQ(5.0, mat(0, 0));
-}
-
 TEST(DirectSessionTest, KeepsStateAcrossRunsOfSession) {
   GraphDef def;
   Graph g(OpRegistry::Global());
diff --git a/tensorflow/core/framework/run_handler.cc b/tensorflow/core/framework/run_handler.cc
deleted file mode 100644
index 9c6490a603..0000000000
--- a/tensorflow/core/framework/run_handler.cc
+++ /dev/null
@@ -1,248 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/core/framework/run_handler.h"
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/run_handler_util.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/util/ptr_util.h"
-
-namespace tensorflow {
-
-// Contains the concrete implementation of the RunHandler.
-// Externally visible RunHandler class simply forwards the work to this one.
-class RunHandler::Impl {
- public:
-  explicit Impl(RunHandlerPool::Impl* pool_impl) : pool_impl_(pool_impl) {
-    Reset();
-  }
-
-  ~Impl() {}
-
-  void set_inter_op_scheduling_range(std::uint_fast32_t start,
-                                     std::uint_fast32_t limit) {
-    inter_op_scheduling_range_.store(EncodePartition(start, limit),
-                                     std::memory_order_release);
-  }
-
-  std::uint_fast32_t inter_op_scheduling_range() const {
-    return inter_op_scheduling_range_.load(std::memory_order_acquire);
-  }
-
-  // Stores now time (in microseconds) since unix epoch when the handler is
-  // requested via RunHandlerPool::Get().
-  uint64 start_time_us() const { return start_time_us_; }
-
-  void ScheduleInterOpClosure(std::function<void()> fn);
-
-  void Reset();
-
-  RunHandlerPool::Impl* pool_impl() { return pool_impl_; }
-
- private:
-  // Encoding/decoding logic for storing [start, limit) into a single
-  // uint_fast32_t int. We assume that pool_num_threads < (1 << 16).
-  const int kMaxPartitionBits = 16;
-  const int kMaxThreads = 1 << kMaxPartitionBits;
-
-  std::uint_fast32_t EncodePartition(std::uint_fast32_t start,
-                                     std::uint_fast32_t limit) {
-    return (start << kMaxPartitionBits) | limit;
-  }
-
-  void DecodePartition(std::uint_fast32_t val, std::uint_fast32_t* start,
-                       std::uint_fast32_t* limit) {
-    *limit = val & (kMaxThreads - 1);
-    val >>= kMaxPartitionBits;
-    *start = val;
-  }
-
-  std::atomic_uint_fast32_t inter_op_scheduling_range_;
-  RunHandlerPool::Impl* pool_impl_;  // NOT OWNED.
-  uint64 start_time_us_;
-};
-
-// Contains shared state across all run handlers present in the pool. Also
-// responsible for pool management decisions.
-// This class is thread safe.
-class RunHandlerPool::Impl {
- public:
-  // Maximum number of handlers pre-created during pool construction time. The
-  // number has been chosen expecting each handler might at least want 1
-  // inter-op thread for execution (during compute intensive workloads like
-  // inference).
-  static const int kMaxHandlers = 128;
-
-  explicit Impl(int num_inter_op_threads)
-      : inter_op_thread_pool_(new thread::ThreadPool(
-            Env::Default(), ThreadOptions(), "inter_op", num_inter_op_threads)),
-        iterations_(0) {
-    VLOG(1) << "Creating a RunHandlerPool with max handlers: " << kMaxHandlers;
-    for (int i = 0; i < kMaxHandlers; ++i) {
-      handlers_.emplace_back(new RunHandler::Impl(this));
-      free_handlers_.push_back(handlers_.back().get());
-    }
-  }
-
-  ~Impl() {
-    // Sanity check that all handlers have been returned back to the pool before
-    // destruction.
-    DCHECK_EQ(handlers_.size(), kMaxHandlers);
-    DCHECK_EQ(free_handlers_.size(), handlers_.size());
-    DCHECK_EQ(sorted_active_handlers_.size(), 0);
-  }
-
-  thread::ThreadPool* inter_op_thread_pool() const {
-    return inter_op_thread_pool_.get();
-  }
-
-  std::unique_ptr<RunHandler> Get() LOCKS_EXCLUDED(mu_) {
-    mutex_lock l(mu_);
-    while (free_handlers_.empty()) {
-      one_handler_free_.wait(l);
-    }
-    // Remove the last entry from free_handlers_ and add to the end of
-    // sorted_active_handlers_.
-    auto* handler_impl = free_handlers_.back();
-    handler_impl->Reset();
-    // Sortedness isn't violated if we simply add at the end of the list, since
-    // handlers are expected to be obtained in increasing order of time.
-    sorted_active_handlers_.push_back(handler_impl);
-    DCHECK_LE(sorted_active_handlers_.size(), kMaxHandlers);
-    free_handlers_.pop_back();
-
-    RecomputePoolStatsLocked();
-    return WrapUnique<RunHandler>(new RunHandler(handler_impl));
-  }
-
-  void ReleaseHandler(RunHandler::Impl* handler) LOCKS_EXCLUDED(mu_) {
-    {
-      mutex_lock l(mu_);
-      DCHECK_GT(sorted_active_handlers_.size(), 0);
-
-      uint64 now = tensorflow::Env::Default()->NowMicros();
-      double elapsed = (now - handler->start_time_us()) / 1000.0;
-      time_hist_.Add(elapsed);
-
-      // Erase from and update sorted_active_handlers_. Add it to the end of
-      // free_handlers_.
-      auto iter = std::find(sorted_active_handlers_.begin(),
-                            sorted_active_handlers_.end(), handler);
-      DCHECK(iter != sorted_active_handlers_.end())
-          << "Unexpected handler: " << handler
-          << " is being requested for release";
-
-      // Remove this handler from this list and add it to the list of free
-      // handlers.
-      sorted_active_handlers_.erase(iter);
-      free_handlers_.push_back(handler);
-      DCHECK_LE(free_handlers_.size(), kMaxHandlers);
-
-      RecomputePoolStatsLocked();
-    }
-    one_handler_free_.notify_one();
-  }
-
- private:
-  void RecomputePoolStatsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
-
-  // Thread safe part.
-  const std::unique_ptr<thread::ThreadPool> inter_op_thread_pool_;
-
-  // Thread compatible part used only by lock under RunHandlerPool.
-  // Handlers are sorted by start time.
-  std::vector<RunHandler::Impl*> sorted_active_handlers_ GUARDED_BY(mu_);
-  std::vector<RunHandler::Impl*> free_handlers_ GUARDED_BY(mu_);
-  std::vector<std::unique_ptr<RunHandler::Impl>> handlers_ GUARDED_BY(mu_);
-  // Histogram of elapsed runtime of every handler (in ms).
-  histogram::Histogram time_hist_ GUARDED_BY(mu_);
-  std::vector<std::uint_fast32_t> inter_op_start_ GUARDED_BY(mu_);
-  std::vector<std::uint_fast32_t> inter_op_limit_ GUARDED_BY(mu_);
-  int64 iterations_ GUARDED_BY(mu_);
-  condition_variable one_handler_free_;
-  mutex mu_;
-};
-
-void RunHandlerPool::Impl::RecomputePoolStatsLocked() {
-  int num_active_requests = sorted_active_handlers_.size();
-  if (num_active_requests == 0) return;
-
-  int num_threads = inter_op_thread_pool_->NumThreads();
-
-  inter_op_start_.resize(num_active_requests);
-  inter_op_limit_.resize(num_active_requests);
-
-  const int kMinThreadsPerRequest = 3;
-  ComputeInterOpSchedulingRanges(num_active_requests, num_threads,
-                                 kMinThreadsPerRequest, &inter_op_start_,
-                                 &inter_op_limit_);
-
-  for (int i = 0; i < num_active_requests; ++i) {
-    sorted_active_handlers_[i]->set_inter_op_scheduling_range(
-        inter_op_start_[i], inter_op_limit_[i]);
-  }
-
-  if (iterations_++ % 5000 == 0 && VLOG_IS_ON(1)) {
-    VLOG(1) << "Printing time histogram: " << time_hist_.ToString();
-    VLOG(1) << "Active session runs: " << num_active_requests;
-    uint64 now = tensorflow::Env::Default()->NowMicros();
-    string ranges_str = "";
-    string times_str = "";
-    for (int i = 0; i < num_active_requests; ++i) {
-      if (i > 0) {
-        times_str += " ";
-        ranges_str += " ";
-      }
-
-      times_str += strings::StrCat(
-          (now - sorted_active_handlers_[i]->start_time_us()) / 1000.0, " ms.");
-      ranges_str += strings::StrCat("[", inter_op_start_[i], ", ",
-                                    inter_op_limit_[i], ")");
-    }
-    VLOG(1) << "Elapsed times are: " << times_str;
-    VLOG(1) << "Ranges are: " << ranges_str;
-  }
-}
-
-void RunHandler::Impl::ScheduleInterOpClosure(std::function<void()> fn) {
-  std::uint_fast32_t start = 0, limit = 0;
-  DecodePartition(inter_op_scheduling_range(), &start, &limit);
-  pool_impl_->inter_op_thread_pool()->Schedule(std::move(fn));
-}
-
-void RunHandler::Impl::Reset() {
-  set_inter_op_scheduling_range(
-      0, pool_impl_->inter_op_thread_pool()->NumThreads());
-  start_time_us_ = tensorflow::Env::Default()->NowMicros();
-}
-
-RunHandlerPool::RunHandlerPool(int num_inter_op_threads)
-    : impl_(new Impl(num_inter_op_threads)) {}
-
-RunHandlerPool::~RunHandlerPool() {}
-
-std::unique_ptr<RunHandler> RunHandlerPool::Get() { return impl_->Get(); }
-
-RunHandler::RunHandler(Impl* impl) : impl_(impl) {}
-
-void RunHandler::ScheduleInterOpClosure(std::function<void()> fn) {
-  impl_->ScheduleInterOpClosure(std::move(fn));
-}
-
-RunHandler::~RunHandler() { impl_->pool_impl()->ReleaseHandler(impl_); }
-}  // namespace tensorflow
diff --git a/tensorflow/core/framework/run_handler.h b/tensorflow/core/framework/run_handler.h
deleted file mode 100644
index 72fa6301b4..0000000000
--- a/tensorflow/core/framework/run_handler.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
-#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
-
-#include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/lib/histogram/histogram.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/thread_annotations.h"
-#include "tensorflow/core/protobuf/config.pb.h"
-
-namespace tensorflow {
-
-class RunHandler;
-
-// RunHandlerPool is a fixed size pool of pre-allocated RunHandlers
-// that can be used for tracking inter-op work for a given Session::Run().
-// RunHandler(s) in the pool are initially 'inactive'. A RunHandler becomes
-// 'active' when its unique_ptr is returned by Get() and is being used by a
-// client. It becomes 'inactive' once more when its unique_ptr gets destroyed.
-//
-// Expected usage:
-//
-// * Create a single RunHandlerPool (say run_handler_pool_).
-//
-// * When a Session::Run() is invoked, obtain a handler by:
-// auto handler = run_handler_pool_->Get();
-//
-// * Use handler for scheduling all inter-op work by:
-// handler->ScheduleInterOpClosure(closure);
-//
-// This class is thread safe.
-class RunHandlerPool {
- public:
-  explicit RunHandlerPool(int num_inter_op_threads);
-  ~RunHandlerPool();
-
-  // Returns an inactive RunHandler from the pool.
-  //
-  // RunHandlers in RunHandlerPool are initially 'inactive'.
-  // A RunHandler becomes 'active' when its unique_ptr its returned by Get()
-  // and is being used by a client.  It becomes 'inactive' once more when the
-  // unique_ptr is destroyed.
-  //
-  // Will block unless there is an inactive handler.
-  std::unique_ptr<RunHandler> Get();
-
- private:
-  class Impl;
-  friend class RunHandler;
-
-  std::unique_ptr<Impl> impl_;
-};
-
-// RunHandler can be used to schedule inter-op closures to run on a global pool
-// shared across all Session::Run(s).
-//
-// It can only be created via RunHandlerPool::Get().
-//
-// This class can be used instead of directly scheduling closures on a global
-// pool since it maintains a global view across all sessions and optimizes pool
-// scheduling to improve (median and tail) latency.
-//
-// This class is thread safe.
-class RunHandler {
- public:
-  void ScheduleInterOpClosure(std::function<void()> fn);
-
-  ~RunHandler();
-
- private:
-  class Impl;
-  friend class RunHandlerPool::Impl;
-
-  explicit RunHandler(Impl* impl);
-
-  Impl* impl_;  // NOT OWNED.
-};
-
-}  // end namespace tensorflow.
-
-#endif  // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
diff --git a/tensorflow/core/framework/run_handler_util.cc b/tensorflow/core/framework/run_handler_util.cc
deleted file mode 100644
index 3087998c69..0000000000
--- a/tensorflow/core/framework/run_handler_util.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/run_handler_util.h"
-
-#include <algorithm>
-#include <cmath>
-#include "tensorflow/core/platform/logging.h"
-
-namespace tensorflow {
-
-void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads,
-                                    int min_threads_per_request,
-                                    std::vector<std::uint_fast32_t>* start_vec,
-                                    std::vector<std::uint_fast32_t>* end_vec) {
-  // Each request is expected to have weight W[i] = num_active_requests - i.
-  // Therefore, total_weight = sum of all request weights.
-  float total_weight = 0.5f * num_active_requests * (num_active_requests + 1);
-  float demand_factor = static_cast<float>(num_threads) / total_weight;
-  float last_cumulative_weight = 0.0;
-  min_threads_per_request = std::max(1, min_threads_per_request);
-  for (int i = 0; i != num_active_requests; i++) {
-    float cumulative_weight =
-        static_cast<float>(i + 1) *
-        (num_active_requests - static_cast<float>(i) * 0.5f);
-    float weight = cumulative_weight - last_cumulative_weight;
-    // Quantize thread_demand by rounding up, and also satisfying
-    // `min_threads_per_request` constraint.
-    // Note: We subtract a small epsilon (0.00001) to prevent ceil(..) from
-    // rounding weights like 4.0 to 5.
-    int demand =
-        std::max(min_threads_per_request,
-                 static_cast<int>(ceil(weight * demand_factor - 0.00001f)));
-    // For the quantized range [start, end); compute the floor of real start,
-    // and expand downwards from there with length `demand` and adjust for
-    // boundary conditions.
-    int start = last_cumulative_weight * demand_factor;
-    int end = std::min(num_threads, start + demand);
-    start = std::max(0, std::min(start, end - demand));
-    start_vec->at(i) = start;
-    end_vec->at(i) = end;
-    last_cumulative_weight = cumulative_weight;
-  }
-}
-}  // namespace tensorflow
diff --git a/tensorflow/core/framework/run_handler_util.h b/tensorflow/core/framework/run_handler_util.h
deleted file mode 100644
index c0c36aeccb..0000000000
--- a/tensorflow/core/framework/run_handler_util.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
-#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
-
-#include <cstdint>
-#include <vector>
-
-namespace tensorflow {
-
-// Assign thread ranges to requests.
-// Requests are numbered 0...num_active_requests-1, and
-// threads are numbered 0...num_threads-1.
-// On return, the range start_vec->at(i)...end_vec->at(i)-1
-// indicates the subrange of the threads available to request i.
-// The ranges given to different requests may overlap.
-// Lower numbered requests will tend to be assigned more threads.
-// Thus, a client might associate older requests with lower
-// array indices so they receive access to more threads.
-// However, the routine ensures that each request is given access
-// to at least min(min_threads_per_request, num_threads)  threads.
-// Every thread will be assigned to at least one request range,
-// assuming there is at least one request.
-void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads,
-                                    int min_threads_per_request,
-                                    std::vector<std::uint_fast32_t>* start_vec,
-                                    std::vector<std::uint_fast32_t>* end_vec);
-
-}  // end namespace tensorflow
-#endif  // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
diff --git a/tensorflow/core/framework/run_handler_util_test.cc b/tensorflow/core/framework/run_handler_util_test.cc
deleted file mode 100644
index a1928c132b..0000000000
--- a/tensorflow/core/framework/run_handler_util_test.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/run_handler_util.h"
-
-#include <vector>
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/test.h"
-namespace tensorflow {
-namespace {
-
-void VerifyFunction(int num_active_requests, int num_threads,
-                    int min_threads_per_request, bool print_stats = false) {
-  if (print_stats) {
-    LOG(INFO) << "Test case# num_active_requests: " << num_active_requests
-              << " num_threads: " << num_threads
-              << " min_threads: " << min_threads_per_request;
-  }
-  std::vector<std::uint_fast32_t> start(num_active_requests);
-  std::vector<std::uint_fast32_t> end(num_active_requests);
-
-  ComputeInterOpSchedulingRanges(num_active_requests, num_threads,
-                                 min_threads_per_request, &start, &end);
-  string range_str = "";
-  for (int i = 0; i < num_active_requests; ++i) {
-    if (i > 0) range_str += " ";
-    range_str += strings::StrCat("[", start[i], ", ", end[i], ")");
-
-    ASSERT_GE(start[i], 0) << range_str;
-    ASSERT_LE(end[i], num_threads) << range_str;
-    if (i > 0) {
-      // Due to linearly decreasing demand, #threads(i - 1) >= #threads(i)
-      ASSERT_GE(end[i - 1] - start[i - 1], end[i] - start[i]) << range_str;
-      // No missing threads.
-      ASSERT_GE(end[i - 1], start[i]) << range_str;
-    }
-    // Each interval is at least of size 'min_threads_per_request'.
-    ASSERT_GE((end[i] - start[i]), min_threads_per_request) << range_str;
-    // Verify that assigned (quantized) threads is not overly estimated
-    // from real demand, when the demand is high (>=
-    // min_threads_per_request).
-    float entry_weight = num_active_requests - i;
-    float total_weight = 0.5f * num_active_requests * (num_active_requests + 1);
-    float thread_demand = (entry_weight * num_threads) / total_weight;
-    if (thread_demand > min_threads_per_request) {
-      // We expect some over-estimation of threads due to quantization,
-      // but we hope it's not more than 1 extra thread.
-      ASSERT_NEAR(end[i] - start[i], thread_demand, 1.0)
-          << "Ranges: " << range_str << " thread_demand: " << thread_demand
-          << " i: " << i;
-    }
-  }
-  ASSERT_EQ(end[num_active_requests - 1], num_threads);
-  ASSERT_EQ(start[0], 0);
-  if (print_stats) {
-    LOG(INFO) << "Assigned ranges: " << range_str;
-  }
-}
-
-TEST(RunHandlerUtilTest, TestComputeInterOpSchedulingRanges) {
-  const int kMinThreadsPerRequestBound = 12;
-  const int kMaxActiveRequests = 128;
-  const int kMaxThreads = 128;
-
-  for (int min_threads_per_request = 1;
-       min_threads_per_request <= kMinThreadsPerRequestBound;
-       ++min_threads_per_request) {
-    for (int num_active_requests = 1; num_active_requests <= kMaxActiveRequests;
-         ++num_active_requests) {
-      for (int num_threads = min_threads_per_request;
-           num_threads <= kMaxThreads; ++num_threads) {
-        VerifyFunction(num_active_requests, num_threads,
-                       min_threads_per_request);
-      }
-    }
-  }
-}
-
-}  // namespace
-}  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 104ab039cb..85cd02350a 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -453,11 +453,6 @@ message RunOptions {
     // same group_key value (in a distributed computation where tasks
     // run disjoint graphs).
     int64 collective_graph_key = 1;
-    // If true, then operations (using the inter-op pool) across all
-    // session::run() calls will be centrally scheduled, optimizing for (median
-    // and tail) latency.
-    // Consider using this option for CPU-bound workloads like inference.
-    bool use_run_handler_pool = 2;
   };
 
   Experimental experimental = 8;
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
index 47b5b56faf..537e73aa89 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
@@ -8,11 +8,5 @@ tf_proto {
       label: LABEL_OPTIONAL
       type: TYPE_INT64
     }
-    field {
-      name: "use_run_handler_pool"
-      number: 2
-      label: LABEL_OPTIONAL
-      type: TYPE_BOOL
-    }
   }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
index c0c2e7b9f8..cec04a2bf0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
@@ -55,12 +55,6 @@ tf_proto {
         label: LABEL_OPTIONAL
         type: TYPE_INT64
       }
-      field {
-        name: "use_run_handler_pool"
-        number: 2
-        label: LABEL_OPTIONAL
-        type: TYPE_BOOL
-      }
     }
     enum_type {
       name: "TraceLevel"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
index 47b5b56faf..537e73aa89 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
@@ -8,11 +8,5 @@ tf_proto {
       label: LABEL_OPTIONAL
       type: TYPE_INT64
     }
-    field {
-      name: "use_run_handler_pool"
-      number: 2
-      label: LABEL_OPTIONAL
-      type: TYPE_BOOL
-    }
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
index c0c2e7b9f8..cec04a2bf0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
@@ -55,12 +55,6 @@ tf_proto {
         label: LABEL_OPTIONAL
         type: TYPE_INT64
       }
-      field {
-        name: "use_run_handler_pool"
-        number: 2
-        label: LABEL_OPTIONAL
-        type: TYPE_BOOL
-      }
     }
     enum_type {
       name: "TraceLevel"
-- 
GitLab


From 5f67bf69d3f53d1cd3bb86ebeeb03ea2bba5911b Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Thu, 27 Sep 2018 16:16:26 -0700
Subject: [PATCH 0826/1357] Support nested variants in CopyHostToDevice and
 CopyDeviceToHost.

PiperOrigin-RevId: 214853860
---
 tensorflow/core/common_runtime/copy_tensor.cc | 82 +++++++++++--------
 tensorflow/python/kernel_tests/BUILD          |  4 +-
 .../python/kernel_tests/list_ops_test.py      | 26 ++++++
 3 files changed, 75 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc
index d800a86199..6e2eb66b94 100644
--- a/tensorflow/core/common_runtime/copy_tensor.cc
+++ b/tensorflow/core/common_runtime/copy_tensor.cc
@@ -61,26 +61,33 @@ void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator,
       status_cb->Unref();
     };
     auto copier = std::bind(
-        [dst, recv_dev_context, out_allocator, status_cb](
-            StatusCallback wrapped_done_,
-            // Begin unbound arguments
-            const Tensor& from, Tensor* to) {
-          if (!DMAHelper::CanUseDMA(&from)) {
-            Status err = errors::InvalidArgument(
-                "During Variant Host->Device Copy: "
-                "non-DMA-copy attempted of tensor type: ",
-                DataTypeString(from.dtype()));
-            status_cb->UpdateStatus(err);
-            return err;
-          }
-          if (status_cb->ok()) {
+        [dst, recv_dev_context, out_allocator, status_cb, cpu_allocator,
+         edge_name](StatusCallback wrapped_done_,
+                    // Begin unbound arguments
+                    const Tensor& from, Tensor* to) {
+          if (from.dtype() == DT_VARIANT) {
             status_cb->Ref();
-            *to = Tensor(out_allocator, from.dtype(), from.shape());
-            recv_dev_context->CopyCPUTensorToDevice(&from, dst, to,
-                                                    wrapped_done_);
+            CopyHostToDevice(&from, cpu_allocator, out_allocator, edge_name,
+                             dst, to, recv_dev_context, wrapped_done_);
             return Status::OK();
           } else {
-            return status_cb->status();
+            if (!DMAHelper::CanUseDMA(&from)) {
+              Status err = errors::InvalidArgument(
+                  "During Variant Host->Device Copy: "
+                  "non-DMA-copy attempted of tensor type: ",
+                  DataTypeString(from.dtype()));
+              status_cb->UpdateStatus(err);
+              return err;
+            }
+            if (status_cb->ok()) {
+              status_cb->Ref();
+              *to = Tensor(out_allocator, from.dtype(), from.shape());
+              recv_dev_context->CopyCPUTensorToDevice(&from, dst, to,
+                                                      wrapped_done_);
+              return Status::OK();
+            } else {
+              return status_cb->status();
+            }
           }
         },
         std::move(wrapped_done), std::placeholders::_1, std::placeholders::_2);
@@ -119,26 +126,33 @@ void CopyDeviceToHost(const Tensor* input, Allocator* cpu_allocator,
       status_cb->Unref();
     };
     auto copier = std::bind(
-        [edge_name, src, send_dev_context, out_allocator, status_cb](
-            StatusCallback wrapped_done_,
-            // Begin unbound arguments
-            const Tensor& from, Tensor* to) {
-          if (!DMAHelper::CanUseDMA(&from)) {
-            Status err = errors::InvalidArgument(
-                "During Variant Device->Host Copy: "
-                "non-DMA-copy attempted of tensor type: ",
-                DataTypeString(from.dtype()));
-            status_cb->UpdateStatus(err);
-            return err;
-          }
-          if (status_cb->ok()) {
+        [edge_name, src, send_dev_context, out_allocator, status_cb,
+         cpu_allocator](StatusCallback wrapped_done_,
+                        // Begin unbound arguments
+                        const Tensor& from, Tensor* to) {
+          if (from.dtype() == DT_VARIANT) {
             status_cb->Ref();
-            *to = Tensor(out_allocator, from.dtype(), from.shape());
-            send_dev_context->CopyDeviceTensorToCPU(&from, edge_name, src, to,
-                                                    wrapped_done_);
+            CopyDeviceToHost(&from, cpu_allocator, out_allocator, edge_name,
+                             src, to, send_dev_context, wrapped_done_);
             return Status::OK();
           } else {
-            return status_cb->status();
+            if (!DMAHelper::CanUseDMA(&from)) {
+              Status err = errors::InvalidArgument(
+                  "During Variant Device->Host Copy: "
+                  "non-DMA-copy attempted of tensor type: ",
+                  DataTypeString(from.dtype()));
+              status_cb->UpdateStatus(err);
+              return err;
+            }
+            if (status_cb->ok()) {
+              status_cb->Ref();
+              *to = Tensor(out_allocator, from.dtype(), from.shape());
+              send_dev_context->CopyDeviceTensorToCPU(&from, edge_name, src, to,
+                                                      wrapped_done_);
+              return Status::OK();
+            } else {
+              return status_cb->status();
+            }
           }
         },
         std::move(wrapped_done), std::placeholders::_1, std::placeholders::_2);
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index c2e36e5e19..280c18ec00 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3257,8 +3257,7 @@ tf_py_test(
     tags = ["no_gpu"],  # TODO(b/111656070)
 )
 
-# TODO(b/116053459): Replace with cuda_py_test.
-tf_py_test(
+cuda_py_test(
     name = "while_v2_test",
     size = "medium",
     srcs = ["while_v2_test.py"],
@@ -3278,5 +3277,4 @@ tf_py_test(
         "//tensorflow/python:while_v2",
     ],
     grpc_enabled = True,
-    tags = ["no_gpu"],  # TODO(b/116053459)
 )
diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py
index 0f5607712b..ae413edaec 100644
--- a/tensorflow/python/kernel_tests/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/list_ops_test.py
@@ -170,6 +170,32 @@ class ListOpsTest(test_util.TensorFlowTestCase):
             list_ops.tensor_list_pop_back(
                 l_cpu, element_dtype=dtypes.float32)[1]), 2.0)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testCPUGPUCopyNested(self):
+    if not context.num_gpus():
+      return
+    t = constant_op.constant([1.0, 2.0])
+    child_l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape())
+    l = list_ops.empty_tensor_list(
+        element_shape=constant_op.constant([], dtype=dtypes.int32),
+        element_dtype=dtypes.variant)
+    l = list_ops.tensor_list_push_back(l, child_l)
+    with context.device("gpu:0"):
+      l_gpu = array_ops.identity(l)
+      _, child_l_gpu = list_ops.tensor_list_pop_back(
+          l_gpu, element_dtype=dtypes.variant)
+      self.assertAllEqual(
+          self.evaluate(
+              list_ops.tensor_list_pop_back(
+                  child_l_gpu, element_dtype=dtypes.float32)[1]), 2.0)
+    l_cpu = array_ops.identity(l_gpu)
+    _, child_l_cpu = list_ops.tensor_list_pop_back(
+        l_cpu, element_dtype=dtypes.variant)
+    self.assertAllEqual(
+        self.evaluate(
+            list_ops.tensor_list_pop_back(
+                child_l_cpu, element_dtype=dtypes.float32)[1]), 2.0)
+
   def testGraphStack(self):
     with self.cached_session():
       tl = list_ops.empty_tensor_list(
-- 
GitLab


From 2330933ddd0b29ad206e351c9120e621cdaf6312 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Thu, 27 Sep 2018 16:19:09 -0700
Subject: [PATCH 0827/1357] Rename TFLite Extended -> TFLite Flex

PiperOrigin-RevId: 214854303
---
 tensorflow/contrib/lite/build_def.bzl                  |  4 ++--
 tensorflow/contrib/lite/model.cc                       |  4 ++--
 tensorflow/contrib/lite/python/convert.py              |  8 ++++----
 tensorflow/contrib/lite/python/lite_test.py            |  4 ++--
 tensorflow/contrib/lite/testing/generate_examples.py   | 10 +++++-----
 .../testing/model_coverage/model_coverage_lib_test.py  |  2 +-
 tensorflow/contrib/lite/tools/benchmark/BUILD          |  4 ++--
 .../lite/tools/benchmark/benchmark_tflite_model.cc     |  8 ++++----
 .../lite/tools/benchmark/benchmark_tflite_model.h      |  8 ++++----
 9 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 7f5c6bdc2f..7ef26de69f 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -301,7 +301,7 @@ def generated_test_conversion_modes():
     """Returns a list of conversion modes."""
 
     # TODO(nupurgarg): Add "pb2lite" when it's in open source. b/113614050.
-    return ["toco-extended", ""]
+    return ["toco-flex", ""]
 
 def generated_test_models_all():
     """Generates a list of all tests with the different converters.
@@ -335,7 +335,7 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs):
         # TODO(nupurgarg): Comment in when pb2lite is in open source. b/113614050.
         # if conversion_mode == "pb2lite":
         #     toco = "//tensorflow/contrib/lite/experimental/pb2lite:pb2lite"
-        flags = "--ignore_toco_errors --run_with_extended"
+        flags = "--ignore_toco_errors --run_with_flex"
         kwargs["tags"].append("skip_already_failing")
         kwargs["tags"].append("no_oss")
         kwargs["tags"].append("notap")
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index eff6181a61..d50c345194 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -27,7 +27,7 @@ limitations under the License.
 #ifndef TFLITE_MCU
 #include "tensorflow/contrib/lite/nnapi_delegate.h"
 #endif
-#if defined(TFLITE_EXTENDED)
+#if defined(TFLITE_FLEX)
 #include "tensorflow/contrib/lite/delegates/flex/delegate.h"
 #endif
 #include "tensorflow/contrib/lite/version.h"
@@ -450,7 +450,7 @@ TfLiteStatus InterpreterBuilder::operator()(
   }
   (**interpreter).SetVariables(std::move(variables));
 
-#if defined(TFLITE_EXTENDED)
+#if defined(TFLITE_FLEX)
   if (auto delegate = FlexDelegate::Create()) {
     (**interpreter)
         .ModifyGraphWithDelegate(std::move(delegate),
diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 73a420c47b..613a1530f7 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -67,12 +67,12 @@ class ConverterMode(enum.Enum):
   # Convert model using TOCO such that only unsupported operations are
   # represented as TensorFlow ops.
   # WARNING: Experimental interface, subject to change.
-  TOCO_EXTENDED = "TOCO_EXTENDED"
+  TOCO_FLEX = "TOCO_FLEX"
 
   # Convert model using TOCO such that all operations are represented as
   # TensorFlow ops.
   # WARNING: Experimental interface, subject to change.
-  TOCO_EXTENDED_ALL = "TOCO_EXTENDED_ALL"
+  TOCO_FLEX_ALL = "TOCO_FLEX_ALL"
 
   def __str__(self):
     return self.value
@@ -240,9 +240,9 @@ def build_toco_convert_protos(input_tensors,
   if dump_graphviz_dir:
     toco.dump_graphviz_dir = dump_graphviz_dir
   toco.dump_graphviz_include_video = dump_graphviz_video
-  if converter_mode == ConverterMode.TOCO_EXTENDED:
+  if converter_mode == ConverterMode.TOCO_FLEX:
     toco.allow_flex_ops = True
-  elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL:
+  elif converter_mode == ConverterMode.TOCO_FLEX_ALL:
     toco.allow_flex_ops = True
     toco.force_flex_ops = True
 
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index 7b0df01d1d..d243a494f6 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -412,7 +412,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     # Ensure that the quantized weights tflite model is smaller.
     self.assertTrue(len(quantized_tflite) < len(float_tflite))
 
-  def testExtendedMode(self):
+  def testFlexMode(self):
     in_tensor = array_ops.placeholder(
         shape=[1, 16, 16, 3], dtype=dtypes.float32)
     out_tensor = in_tensor + in_tensor
@@ -421,7 +421,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     # Convert model and ensure model is not None.
     converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
                                                   [out_tensor])
-    converter.converter_mode = lite.ConverterMode.TOCO_EXTENDED_ALL
+    converter.converter_mode = lite.ConverterMode.TOCO_FLEX_ALL
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 53bd88d087..18036fac6f 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -81,9 +81,9 @@ parser.add_argument(
     action="store_true",
     help="Include intermediate graphdefs in the output zip files.")
 parser.add_argument(
-    "--run_with_extended",
+    "--run_with_flex",
     action="store_true",
-    help="Whether the TFLite Extended converter is being used.")
+    help="Whether the TFLite Flex converter is being used.")
 
 RANDOM_SEED = 342
 TEST_INPUT_DEPTH = 3
@@ -339,10 +339,10 @@ def toco_convert(graph_def_str, input_tensors, output_tensors,
     graphdef_file.flush()
 
     # TODO(aselle): Switch this to subprocess at some point.
-    if "pb2lite" in bin_path and FLAGS.run_with_extended:
+    if "pb2lite" in bin_path and FLAGS.run_with_flex:
       opts = ("--input_arrays={0} --output_arrays={1}".format(
           ",".join(input_arrays), ",".join(output_tensors)))
-    elif FLAGS.run_with_extended:
+    elif FLAGS.run_with_flex:
       opts += " --allow_flex_ops --force_flex_ops"
     cmd = ("%s --input_file=%s --output_file=%s %s > %s 2>&1" %
            (bin_path, graphdef_file.name, output_file.name, opts,
@@ -3333,7 +3333,7 @@ def main(unused_args):
   # list of valid conversion modes is defined in
   # generated_test_conversion_modes() in build_def.bzl.
   test_function = ("make_%s_tests" % (out.replace(".zip", "").replace(
-      "pb2lite", "").replace("toco-extended", "").rstrip("_")))
+      "pb2lite", "").replace("toco-flex", "").rstrip("_")))
   if test_function not in globals():
     raise RuntimeError("Can't find a test function to create %r. Tried %r" %
                        (out, test_function))
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
index 5f3355e734..1498f86c6f 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -123,7 +123,7 @@ class EvaluateKerasModel(test.TestCase):
     keras_file = self._saveKerasModel(model)
 
     model_coverage.test_keras_model(
-        keras_file, converter_mode=lite.ConverterMode.TOCO_EXTENDED)
+        keras_file, converter_mode=lite.ConverterMode.TOCO_FLEX)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index bc18d40313..502e181139 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -40,7 +40,7 @@ cc_binary(
     srcs = [
         "benchmark_main.cc",
     ],
-    copts = common_copts + ["-DTFLITE_EXTENDED"],
+    copts = common_copts + ["-DTFLITE_FLEX"],
     linkopts = tflite_linkopts() + select({
         "//tensorflow:android": [
             "-pie",  # Android 5.0 and later supports only PIE
@@ -117,7 +117,7 @@ cc_library(
         "logging.h",
     ],
     hdrs = ["benchmark_tflite_model.h"],
-    copts = common_copts + ["-DTFLITE_EXTENDED"],
+    copts = common_copts + ["-DTFLITE_FLEX"],
     deps = [
         ":benchmark_model_lib",
         ":logging",
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
index d989ee720d..463d5993f4 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -23,9 +23,9 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#ifdef TFLITE_EXTENDED
+#ifdef TFLITE_FLEX
 #include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif  // TFLITE_EXTENDED
+#endif  // TFLITE_FLEX
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/op_resolver.h"
@@ -305,14 +305,14 @@ void BenchmarkTfLiteModel::Init() {
 
   interpreter->UseNNAPI(use_nnapi);
 
-#ifdef TFLITE_EXTENDED
+#ifdef TFLITE_FLEX
   TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
   delegate_ = FlexDelegate::Create();
   if (delegate_) {
     interpreter->ModifyGraphWithDelegate(delegate_.get(),
                                          /*allow_dynamic_tensors=*/true);
   }
-#endif  // TFLITE_EXTENDED
+#endif  // TFLITE_FLEX
 
   auto interpreter_inputs = interpreter->inputs();
 
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
index 9343824b4a..b091e18a29 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
@@ -20,9 +20,9 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#ifdef TFLITE_EXTENDED
+#ifdef TFLITE_FLEX
 #include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif  // TFLITE_EXTENDED
+#endif  // TFLITE_FLEX
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
 #include "tensorflow/contrib/lite/tools/benchmark/benchmark_model.h"
@@ -73,9 +73,9 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
   void PrepareInputsAndOutputs() override;
 
  private:
-#ifdef TFLITE_EXTENDED
+#ifdef TFLITE_FLEX
   std::unique_ptr<FlexDelegate> delegate_;
-#endif  // TFLITE_EXTENDED
+#endif  // TFLITE_FLEX
   std::unique_ptr<tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> interpreter;
   std::vector<InputLayerInfo> inputs;
-- 
GitLab


From 0a9ee95ed9c26bef58e9daadcb6935807d90fcd3 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Thu, 27 Sep 2018 17:04:17 -0700
Subject: [PATCH 0828/1357] Disable summary ops from lower-level xla.compile
 API rather than xla.estimator_model_fn

PiperOrigin-RevId: 214860981
---
 tensorflow/contrib/compiler/xla.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py
index 1e30525159..873b03580d 100644
--- a/tensorflow/contrib/compiler/xla.py
+++ b/tensorflow/contrib/compiler/xla.py
@@ -293,7 +293,8 @@ def _compile_internal(computation, inputs=None):
     saved_use_resource = vscope.use_resource
     vscope.set_use_resource(True)
 
-    outputs = computation(*computation_inputs)
+    with _disable_summary_context():
+      outputs = computation(*computation_inputs)
 
     # Restore variable scope after computation.
     vscope.set_use_resource(saved_use_resource)
@@ -371,13 +372,13 @@ def _disable_summary_context():
   Yields:
     None.
   """
-  origional_skip_summary_func = summary_op_util.skip_summary
+  original_skip_summary_func = summary_op_util.skip_summary
   summary_op_util.skip_summary = lambda: True
 
   try:
     yield
   finally:
-    summary_op_util.skip_summary = origional_skip_summary_func
+    summary_op_util.skip_summary = original_skip_summary_func
 
 
 class _CapturedObject(object):
@@ -436,8 +437,7 @@ class _ModelFnWrapper(object):
     if mode == model_fn_lib.ModeKeys.TRAIN:
       train_step, captured_scaffold_fn = self._make_train_step(
           features, labels, params)
-      with _disable_summary_context():
-        (loss,) = compile(train_step)
+      (loss,) = compile(train_step)
       return model_fn_lib.EstimatorSpec(
           mode=mode,
           loss=loss,
@@ -446,8 +446,7 @@ class _ModelFnWrapper(object):
     elif mode == model_fn_lib.ModeKeys.EVAL:
       eval_step, captured_eval_metric_fn, captured_scaffold_fn = (
           self._make_eval_step(features, labels, params))
-      with _disable_summary_context():
-        outputs = compile(eval_step)
+      outputs = compile(eval_step)
       loss = outputs[0]
 
       # Calculate eval_metric_ops if eval_metric_fn is set and captured.
-- 
GitLab


From 9b8390e7cd664d8fad9dd3f7172a56135585b481 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Thu, 27 Sep 2018 17:04:19 -0700
Subject: [PATCH 0829/1357] Remove testing non-core APIs from
 api_compatibility_test.

Some APIs are moving out of core TF repo. These APIs will have their own
API compat tests. Adding flag --only_test_core_api=true which will not
check for changes to non-core APIs.

PiperOrigin-RevId: 214860984
---
 tensorflow/tools/api/tests/BUILD              |  1 +
 .../tools/api/tests/api_compatibility_test.py | 39 +++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD
index 4efa4a9651..3cbea41dca 100644
--- a/tensorflow/tools/api/tests/BUILD
+++ b/tensorflow/tools/api/tests/BUILD
@@ -19,6 +19,7 @@ py_test(
         "api_compatibility_test.py",
         "//tensorflow:tf_python_api_gen_v2",
     ],
+    args = ["--only_test_core_api=true"],
     data = [
         "//tensorflow/tools/api/golden:api_golden_v1",
         "//tensorflow/tools/api/golden:api_golden_v2",
diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index d06c7f2d49..6487a6267e 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -56,6 +56,14 @@ _UPDATE_GOLDENS_HELP = """
      have to be authorized by TensorFlow leads.
 """
 
+# DEFINE_boolean, only_test_core_api, default False:
+_ONLY_TEST_CORE_API_HELP = """
+    Some TF APIs are being moved outside of the tensorflow/ directory. There is
+    no garuntee which versions of these APIs will be present when running this
+    test. Therefore, do not error out on API changes in non-core TF code
+    if this flag is set.
+"""
+
 # DEFINE_boolean, verbose_diffs, default True:
 _VERBOSE_DIFFS_HELP = """
      If set to true, print line by line diffs on all libraries. If set to
@@ -67,6 +75,8 @@ _API_GOLDEN_FOLDER_V2 = 'tensorflow/tools/api/golden/v2'
 _TEST_README_FILE = 'tensorflow/tools/api/tests/README.txt'
 _UPDATE_WARNING_FILE = 'tensorflow/tools/api/tests/API_UPDATE_WARNING.txt'
 
+_NON_CORE_PACKAGES = ['estimator']
+
 
 def _KeyToFilePath(key, api_version):
   """From a given key, construct a filepath.
@@ -111,6 +121,19 @@ def _VerifyNoSubclassOfMessageVisitor(path, parent, unused_children):
         'They are not yet supported by the API tools.' % path)
 
 
+def _FilterNonCoreGoldenFiles(golden_file_list):
+  """Filter out non-core API pbtxt files."""
+  filtered_file_list = []
+  filtered_package_prefixes = [
+      'tensorflow.%s.' % p for p in _NON_CORE_PACKAGES]
+  for f in golden_file_list:
+    if any([f.rsplit('/')[-1].startswith(pre)
+            for pre in filtered_package_prefixes]):
+      continue
+    filtered_file_list.append(f)
+  return filtered_file_list
+
+
 class ApiCompatibilityTest(test.TestCase):
 
   def __init__(self, *args, **kwargs):
@@ -233,6 +256,9 @@ class ApiCompatibilityTest(test.TestCase):
       return
     visitor = public_api.PublicAPIVisitor(_VerifyNoSubclassOfMessageVisitor)
     visitor.do_not_descend_map['tf'].append('contrib')
+    if FLAGS.only_test_core_api:
+      visitor.do_not_descend_map['tf'].extend(
+          _NON_CORE_PACKAGES)
     traverse.traverse(tf_v2.compat.v1, visitor)
 
   def testNoSubclassOfMessageV2(self):
@@ -240,6 +266,9 @@ class ApiCompatibilityTest(test.TestCase):
       return
     visitor = public_api.PublicAPIVisitor(_VerifyNoSubclassOfMessageVisitor)
     visitor.do_not_descend_map['tf'].append('contrib')
+    if FLAGS.only_test_core_api:
+      visitor.do_not_descend_map['tf'].extend(
+          _NON_CORE_PACKAGES)
     traverse.traverse(tf_v2, visitor)
 
   def _checkBackwardsCompatibility(
@@ -252,6 +281,9 @@ class ApiCompatibilityTest(test.TestCase):
     public_api_visitor.do_not_descend_map['tf'].append('contrib')
     public_api_visitor.do_not_descend_map['tf.GPUOptions'] = [
         'Experimental']
+    if FLAGS.only_test_core_api:
+      public_api_visitor.do_not_descend_map['tf'].extend(
+          _NON_CORE_PACKAGES)
     if additional_private_map:
       public_api_visitor.private_map.update(additional_private_map)
 
@@ -260,6 +292,8 @@ class ApiCompatibilityTest(test.TestCase):
 
     # Read all golden files.
     golden_file_list = file_io.get_matching_files(golden_file_pattern)
+    if FLAGS.only_test_core_api:
+      golden_file_list = _FilterNonCoreGoldenFiles(golden_file_list)
 
     def _ReadFileToProto(filename):
       """Read a filename, create a protobuf from its contents."""
@@ -325,6 +359,11 @@ if __name__ == '__main__':
   parser = argparse.ArgumentParser()
   parser.add_argument(
       '--update_goldens', type=bool, default=False, help=_UPDATE_GOLDENS_HELP)
+  # TODO(mikecase): Create Estimator's own API compatibility test or
+  # a more general API compatibility test for use for TF components.
+  parser.add_argument(
+      '--only_test_core_api', type=bool, default=False,
+      help=_ONLY_TEST_CORE_API_HELP)
   parser.add_argument(
       '--verbose_diffs', type=bool, default=True, help=_VERBOSE_DIFFS_HELP)
   FLAGS, unparsed = parser.parse_known_args()
-- 
GitLab


From 7fbc44d63b25eddfc384922809426319728f949c Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 27 Sep 2018 17:19:17 -0700
Subject: [PATCH 0830/1357] [Java]: Release 1.11.0

PiperOrigin-RevId: 214862838
---
 tensorflow/java/maven/libtensorflow/pom.xml              | 2 +-
 tensorflow/java/maven/libtensorflow_jni/pom.xml          | 2 +-
 tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml      | 2 +-
 tensorflow/java/maven/pom.xml                            | 2 +-
 tensorflow/java/maven/proto/pom.xml                      | 2 +-
 tensorflow/java/maven/spark-tensorflow-connector/pom.xml | 2 +-
 tensorflow/java/maven/tensorflow-hadoop/pom.xml          | 2 +-
 tensorflow/java/maven/tensorflow/pom.xml                 | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml
index 9fc6969c20..6b3e305e5d 100644
--- a/tensorflow/java/maven/libtensorflow/pom.xml
+++ b/tensorflow/java/maven/libtensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc2</version>
+    <version>1.11.0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml
index 68712082e1..f130515934 100644
--- a/tensorflow/java/maven/libtensorflow_jni/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc2</version>
+    <version>1.11.0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
index f031173c99..67ecc2d597 100644
--- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc2</version>
+    <version>1.11.0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni_gpu</artifactId>
diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml
index 2cac27990e..8ba859da01 100644
--- a/tensorflow/java/maven/pom.xml
+++ b/tensorflow/java/maven/pom.xml
@@ -6,7 +6,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.tensorflow</groupId>
   <artifactId>parentpom</artifactId>
-  <version>1.11.0-rc2</version>
+  <version>1.11.0</version>
   <packaging>pom</packaging>
 
   <url>https://www.tensorflow.org</url>
diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml
index 8a93091276..dcd654d713 100644
--- a/tensorflow/java/maven/proto/pom.xml
+++ b/tensorflow/java/maven/proto/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc2</version>
+    <version>1.11.0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>proto</artifactId>
diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
index 014bd8d212..45214f834c 100644
--- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
+++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
@@ -6,7 +6,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>spark-tensorflow-connector_2.11</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0-rc2</version>
+    <version>1.11.0</version>
     <name>spark-tensorflow-connector</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord connector for Apache Spark DataFrames</description>
diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
index d07c5fcd98..a8669ee72b 100644
--- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml
+++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
@@ -5,7 +5,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>tensorflow-hadoop</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0-rc2</version>
+    <version>1.11.0</version>
     <name>tensorflow-hadoop</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop</description>
diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml
index af0c68a4ed..67d628ba11 100644
--- a/tensorflow/java/maven/tensorflow/pom.xml
+++ b/tensorflow/java/maven/tensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0-rc2</version>
+    <version>1.11.0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>tensorflow</artifactId>
-- 
GitLab


From f7e5a4e5f1de355cbbe70215f08d962e027cd0dc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 17:20:54 -0700
Subject: [PATCH 0831/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 214863042
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 53 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 53 +++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 7625524674..32ce31cf23 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -44855,6 +44855,59 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ReduceDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "initial_state"
+    type_list_attr: "Tstate"
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Tstate"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "use_inter_op_parallelism"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
 op {
   name: "ReduceJoin"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 83af07431c..02a7f8d717 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -22868,6 +22868,59 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ReduceDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "initial_state"
+    type_list_attr: "Tstate"
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Tstate"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "use_inter_op_parallelism"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
 op {
   name: "ReduceJoin"
   input_arg {
-- 
GitLab


From c1f557705143f69988ec272f2cf659c7d525974c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 17:45:56 -0700
Subject: [PATCH 0832/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214866490

---
 tensorflow/go/op/wrappers.go | 508 +++++++++++++++++------------------
 1 file changed, 254 insertions(+), 254 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 96df1eee30..2f297d5161 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -26837,6 +26837,260 @@ func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) {
 	return op.Output(0)
 }
 
+// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler.
+type LearnedUnigramCandidateSamplerAttr func(optionalAttr)
+
+// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a learned unigram distribution.
+//
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LearnedUnigramCandidateSampler",
+		Input: []tf.Input{
+			true_classes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// SerializeSparseAttr is an optional argument to SerializeSparse.
+type SerializeSparseAttr func(optionalAttr)
+
+// SerializeSparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeSparse",
+		Input: []tf.Input{
+			sparse_indices, sparse_values, sparse_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2.
+type RandomShuffleQueueV2Attr func(optionalAttr)
+
+// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value.
+//
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shapes"] = value
+	}
+}
+
+// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value.
+//
+// value: Dequeue will block unless there would be this
+// many elements after the dequeue or the queue is closed. This
+// ensures a minimum level of mixing of elements.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["min_after_dequeue"] = value
+	}
+}
+
+// RandomShuffleQueueV2Seed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 is set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, a random seed is used.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// RandomShuffleQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that randomizes the order of elements.
+//
+// Arguments:
+//	component_types: The type of each component in a value.
+//
+// Returns The handle to the queue.
+func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomShuffleQueueV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Draw bounding boxes on a batch of images.
+//
+// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
+// boxes specified by the locations in `boxes`. The coordinates of the each
+// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example, if an image is 100 x 200 pixels (height x width) and the bounding
+// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
+//
+// Parts of the bounding box may fall outside the image.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
+//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
+// boxes.
+//
+// Returns 4-D with the same shape as `images`. The batch of input images with
+// bounding boxes drawn on the images.
+func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DrawBoundingBoxes",
+		Input: []tf.Input{
+			images, boxes,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Gets the next output from the given iterator.
 //
 // This operation is a synchronous version IteratorGetNext. It should only be used
@@ -30988,260 +31242,6 @@ func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths
 	return op.Output(0)
 }
 
-// SerializeSparseAttr is an optional argument to SerializeSparse.
-type SerializeSparseAttr func(optionalAttr)
-
-// SerializeSparseOutType sets the optional out_type attribute to value.
-//
-// value: The `dtype` to use for serialization; the supported types are `string`
-// (default) and `variant`.
-// If not specified, defaults to DT_STRING
-func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
-//
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeSparse",
-		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2.
-type RandomShuffleQueueV2Attr func(optionalAttr)
-
-// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value.
-//
-// value: Dequeue will block unless there would be this
-// many elements after the dequeue or the queue is closed. This
-// ensures a minimum level of mixing of elements.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["min_after_dequeue"] = value
-	}
-}
-
-// RandomShuffleQueueV2Seed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 is set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, a random seed is used.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// RandomShuffleQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that randomizes the order of elements.
-//
-// Arguments:
-//	component_types: The type of each component in a value.
-//
-// Returns The handle to the queue.
-func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomShuffleQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Draw bounding boxes on a batch of images.
-//
-// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
-// boxes specified by the locations in `boxes`. The coordinates of the each
-// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example, if an image is 100 x 200 pixels (height x width) and the bounding
-// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
-// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
-//
-// Parts of the bounding box may fall outside the image.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
-//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
-// boxes.
-//
-// Returns 4-D with the same shape as `images`. The batch of input images with
-// bounding boxes drawn on the images.
-func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DrawBoundingBoxes",
-		Input: []tf.Input{
-			images, boxes,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler.
-type LearnedUnigramCandidateSamplerAttr func(optionalAttr)
-
-// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LearnedUnigramCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // Computes gradients for the scaled exponential linear (Selu) operation.
 //
 // Arguments:
-- 
GitLab


From 4bab3e375b7fffbc8878313089a2bd680952aced Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Thu, 27 Sep 2018 17:54:44 -0700
Subject: [PATCH 0833/1357] Change test size as it has been timing out
 consistently

PiperOrigin-RevId: 214867453
---
 tensorflow/contrib/distribute/python/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 7eead6e472..e329b964c4 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -453,7 +453,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "estimator_training_test",
-    size = "large",
+    size = "enormous",
     srcs = ["estimator_training_test.py"],
     additional_deps = [
         ":combinations",
-- 
GitLab


From 96f3428e33e18477661b8d8cf78f2db457c8881b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 18:43:55 -0700
Subject: [PATCH 0834/1357] Let feature columns correctly handle rank-1 sparse
 tensors from an empty batch.

reshape can't determine the size of the last dimension when reshaping
shape (0) to (0, 1).

PiperOrigin-RevId: 214872677
---
 .../python/feature_column/feature_column.py      |  2 +-
 .../python/feature_column/feature_column_test.py | 12 ++++++++++++
 .../python/feature_column/feature_column_v2.py   |  2 +-
 .../feature_column/feature_column_v2_test.py     | 16 ++++++++++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 226e273660..618e70f3a5 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -2318,7 +2318,7 @@ class _LazyBuilder(object):
       # Input_tensor must have rank 1.
       if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
         return sparse_ops.sparse_reshape(
-            input_tensor, [array_ops.shape(input_tensor)[0], -1])
+            input_tensor, [array_ops.shape(input_tensor)[0], 1])
       else:
         return array_ops.expand_dims(input_tensor, -1)
 
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index abb79efa68..1ae510250c 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -169,6 +169,18 @@ class LazyColumnTest(test.TestCase):
         TypeError, '"key" must be either a "str" or "_FeatureColumn".'):
       builder.get(NotAFeatureColumn())
 
+  def test_expand_dim_rank_1_sparse_tensor_empty_batch(self):
+    # empty 1-D sparse tensor:
+    builder = _LazyBuilder(features={'a': sparse_tensor.SparseTensor(
+        indices=np.reshape(np.array([], dtype=np.int64), (0, 1)),
+        dense_shape=[0],
+        values=np.array([]))})
+    with self.cached_session():
+      spv = builder.get('a').eval()
+      self.assertAllEqual(np.array([0, 1], dtype=np.int64), spv.dense_shape)
+      self.assertAllEqual(
+          np.reshape(np.array([], dtype=np.int64), (0, 2)), spv.indices)
+
 
 class NumericColumnTest(test.TestCase):
 
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 289f6d0d14..538641c251 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -2341,7 +2341,7 @@ class FeatureTransformationCache(object):
       # Input_tensor must have rank 1.
       if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
         return sparse_ops.sparse_reshape(
-            input_tensor, [array_ops.shape(input_tensor)[0], -1])
+            input_tensor, [array_ops.shape(input_tensor)[0], 1])
       else:
         return array_ops.expand_dims(input_tensor, -1)
 
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index 58168e0f9e..2970431167 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -177,6 +177,22 @@ class LazyColumnTest(test.TestCase):
         TypeError, '"key" must be either a "str" or "FeatureColumn".'):
       transformation_cache.get(NotAFeatureColumn(), None)
 
+  def test_expand_dim_rank_1_sparse_tensor_empty_batch(self):
+    # empty 1-D sparse tensor:
+    transformation_cache = FeatureTransformationCache(
+        features={
+            'a':
+                sparse_tensor.SparseTensor(
+                    indices=np.reshape(np.array([], dtype=np.int64), (0, 1)),
+                    dense_shape=[0],
+                    values=np.array([]))
+        })
+    with self.cached_session():
+      spv = transformation_cache.get('a', None).eval()
+      self.assertAllEqual(np.array([0, 1], dtype=np.int64), spv.dense_shape)
+      self.assertAllEqual(
+          np.reshape(np.array([], dtype=np.int64), (0, 2)), spv.indices)
+
 
 class NumericColumnTest(test.TestCase):
 
-- 
GitLab


From 70f071f7afb2deffddbd9937d7a76b1e1c0b2b75 Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Thu, 27 Sep 2018 19:20:59 -0700
Subject: [PATCH 0835/1357] Fix failing test.

PiperOrigin-RevId: 214875840
---
 .../estimator_batch/dnn_tree_combined_estimator_test.py       | 3 ++-
 .../contrib/boosted_trees/estimator_batch/estimator_test.py   | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
index 04baa329a0..6b6fe9663a 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
@@ -188,7 +188,8 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
 
     # Train for a few steps.
     est.train(input_fn=_train_input_fn, steps=1000)
-    # 10 steps for dnn, 3  for 1 tree of depth 3 + 1 after the tree finished
+    # 10 steps for dnn + 3 for 1 tree of depth 3 + 1 after the tree finished
+    # + 1 for resource variables.
     self._assert_checkpoint(est.model_dir, global_step=15)
     res = est.evaluate(input_fn=_eval_input_fn, steps=1)
     self.assertLess(0.5, res["auc"])
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
index c155128c0e..d7b14e00ba 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
@@ -238,8 +238,8 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase):
         output_leaf_index=False)
 
     classifier.fit(input_fn=_train_input_fn, steps=15)
-    # When no override of global steps, 5 steps were used.
-    self._assert_checkpoint(classifier.model_dir, global_step=5)
+    # When no override of global steps, 6 steps were used.
+    self._assert_checkpoint(classifier.model_dir, global_step=6)
 
   def testOverridesGlobalSteps(self):
     learner_config = learner_pb2.LearnerConfig()
-- 
GitLab


From acb13e448786838feb500973f51279dc90eeab50 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 27 Sep 2018 20:01:16 -0700
Subject: [PATCH 0836/1357] Fix visibility

PiperOrigin-RevId: 214878220
---
 tensorflow/tools/docs/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD
index b218e900bf..2a858b4fd6 100644
--- a/tensorflow/tools/docs/BUILD
+++ b/tensorflow/tools/docs/BUILD
@@ -37,6 +37,7 @@ py_library(
     name = "doc_controls",
     srcs = ["doc_controls.py"],
     srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
 )
 
 py_test(
-- 
GitLab


From a309e136dcfdd13dc8e8eb7570b6c5945bb6f967 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 20:02:51 -0700
Subject: [PATCH 0837/1357] Keras Lambda - enhancements to output_shape
 computation

PiperOrigin-RevId: 214878428
---
 tensorflow/python/keras/layers/core.py      | 51 +++++++++++++++------
 tensorflow/python/keras/layers/core_test.py | 45 ++++++++++++++++++
 2 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 4032202986..efa21955e6 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -671,22 +671,34 @@ class Lambda(Layer):
     if mask is not None:
       self.supports_masking = True
     self.mask = mask
-    if output_shape is None:
-      self._output_shape = None
-    elif isinstance(output_shape, (tuple, list)):
-      self._output_shape = tuple(output_shape)
-    else:
-      if not callable(output_shape):
-        raise TypeError('In Lambda, `output_shape` '
-                        'must be a list, a tuple, or a function.')
-      self._output_shape = output_shape
+    if (output_shape is not None and not isinstance(output_shape,
+                                                    (tuple, list)) and
+        not callable(output_shape)):
+      raise TypeError('In Lambda, `output_shape` '
+                      'must be a list, a tuple, or a function.')
+    # Convert a list representing a single shape into a tuple.
+    if (isinstance(output_shape, list) and isinstance(output_shape[0],
+                                                      (int, type(None)))):
+      output_shape = tuple(output_shape)
+    self._output_shape = output_shape
 
   @tf_utils.shape_type_conversion
   def compute_output_shape(self, input_shape):
     if self._output_shape is None:
       if context.executing_eagerly():
-        raise NotImplementedError
-      x = K.placeholder(shape=input_shape)
+        # Make use of existing autocomputation for Eager mode but provide
+        # Lambda-specific error message.
+        try:
+          return super(Lambda, self).compute_output_shape(input_shape)
+        except NotImplementedError:
+          raise NotImplementedError('We could not automatically infer '
+                                    'the static shape of the Lambda\'s output.'
+                                    ' Please specify the `output_shape` for'
+                                    ' this Lambda.')
+      if isinstance(input_shape, list):
+        x = [K.placeholder(shape=shape) for shape in input_shape]
+      else:
+        x = K.placeholder(shape=input_shape)
       x = self.call(x)
       if isinstance(x, list):
         return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x]
@@ -697,16 +709,27 @@ class Lambda(Layer):
         num_samples = input_shape[0][0]
       else:
         num_samples = input_shape[0] if input_shape else None
-      return tensor_shape.TensorShape((num_samples,) +
-                                      tuple(self._output_shape))
+      # List here represents multiple outputs.
+      if isinstance(self._output_shape, list):
+        return [
+            tensor_shape.TensorShape((num_samples,) + tuple(single_shape))
+            for single_shape in self._output_shape
+        ]
+      return tensor_shape.TensorShape((num_samples,) + self._output_shape)
     else:
       shape = self._output_shape(input_shape)
       if not isinstance(shape, (list, tuple)):
         raise ValueError(
             '`output_shape` function must return a tuple or a list of tuples.')
+      # List here can represent multiple outputs or single output.
       if isinstance(shape, list):
-        if isinstance(shape[0], int) or shape[0] is None:
+        # Convert list representing single output into a tuple.
+        if isinstance(shape[0], (int, type(None))):
           shape = tuple(shape)
+        else:
+          return [
+              tensor_shape.TensorShape(single_shape) for single_shape in shape
+          ]
       return tensor_shape.TensorShape(shape)
 
   def call(self, inputs, mask=None):
diff --git a/tensorflow/python/keras/layers/core_test.py b/tensorflow/python/keras/layers/core_test.py
index 1df1d575b1..f0fea1f65c 100644
--- a/tensorflow/python/keras/layers/core_test.py
+++ b/tensorflow/python/keras/layers/core_test.py
@@ -252,6 +252,51 @@ class CoreLayersTest(test.TestCase):
       l(keras.backend.variable(np.ones((1, 1))))
       self.assertEqual('lambda', l.get_config()['output_shape_type'])
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_lambda_output_shape_autocalculate_multiple_inputs(self):
+
+    def lambda_fn(x):
+      return math_ops.matmul(x[0], x[1])
+
+    l = keras.layers.Lambda(lambda_fn)
+    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
+    self.assertAllEqual((10, 20), output_shape)
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_lambda_output_shape_list_multiple_outputs(self):
+
+    def lambda_fn(x):
+      return x
+
+    l = keras.layers.Lambda(lambda_fn, output_shape=[(10,), (20,)])
+    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
+    self.assertAllEqual([(10, 10), (10, 20)], output_shape)
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_lambda_output_shape_tuple_with_none(self):
+
+    def lambda_fn(x):
+      return x
+
+    l = keras.layers.Lambda(lambda_fn, output_shape=(None, 10))
+    output_shape = l.compute_output_shape((5, 10, 20))
+    # Dimension(None) != Dimension(None), so check
+    # str representations for equality.
+    self.assertAllEqual(('5', '?', '10'), tuple([str(s) for s in output_shape]))
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_lambda_output_shape_function_multiple_outputs(self):
+
+    def lambda_fn(x):
+      return x
+
+    def output_shape_fn(input_shape):
+      return input_shape
+
+    l = keras.layers.Lambda(lambda_fn, output_shape=output_shape_fn)
+    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
+    self.assertAllEqual([(10, 10), (10, 20)], output_shape)
+
   def test_lambda_config_serialization(self):
     with self.cached_session():
       # test serialization with output_shape and output_shape_type
-- 
GitLab


From d377fdee3a5e266ac330a6742c15ece8e7ed8aa0 Mon Sep 17 00:00:00 2001
From: Daryl Ng <darylng@google.com>
Date: Thu, 27 Sep 2018 20:10:31 -0700
Subject: [PATCH 0838/1357] Adding to tpu_lib depenencies to
 optimization_parameters_py, tpu_embedding_configuration_py, and
 tpu_embedding_output_layout_py.

PiperOrigin-RevId: 214879168
---
 tensorflow/contrib/tpu/BUILD | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index ac38612603..e9aa037634 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -252,7 +252,10 @@ py_library(
         ":tpu_py",
         "//tensorflow/contrib/cluster_resolver:tpu_cluster_resolver_py",
         "//tensorflow/contrib/tpu/proto:compilation_result_proto_py",
+        "//tensorflow/contrib/tpu/proto:optimization_parameters_proto_py",
         "//tensorflow/contrib/tpu/proto:topology_proto_py",
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_py",
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_output_layout_proto_py",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
-- 
GitLab


From 986193d79e00f1780fb3278ed890a72f7285f66e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 20:14:30 -0700
Subject: [PATCH 0839/1357] Move obsolete kernel code to legacy files.

PiperOrigin-RevId: 214879388
---
 .../internal/optimized/depthwiseconv_float.h  |   74 --
 .../internal/optimized/depthwiseconv_uint8.h  |  102 --
 .../internal/optimized/legacy_optimized_ops.h |  941 ++++++++++++++-
 .../internal/optimized/optimized_ops.h        |  798 ------------
 .../internal/reference/depthwiseconv_float.h  |   75 --
 .../internal/reference/depthwiseconv_uint8.h  |  103 --
 .../internal/reference/fully_connected.h      |  134 ---
 .../internal/reference/legacy_reference_ops.h | 1067 ++++++++++++++++-
 .../internal/reference/reference_ops.h        |  762 ------------
 .../lite/kernels/internal/reference/softmax.h |   23 -
 10 files changed, 2001 insertions(+), 2078 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index 114575a96a..d8dd7bba89 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -1092,80 +1092,6 @@ inline void DepthwiseConv(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height,
-                          int dilation_width_factor, int dilation_height_factor,
-                          int pad_width, int pad_height, int depth_multiplier,
-                          float output_activation_min,
-                          float output_activation_max, float* output_data,
-                          const Dims<4>& output_dims) {
-  tflite::DepthwiseParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.depth_multiplier = depth_multiplier;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
-                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                bias_data, DimsToShape(output_dims), output_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
-                          float output_activation_min,
-                          float output_activation_max, float* output_data,
-                          const Dims<4>& output_dims) {
-  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
-                bias_dims, stride_width, stride_height, 1, 1, pad_width,
-                pad_height, depth_multiplier, output_activation_min,
-                output_activation_max, output_data, output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                   const float* filter_data, const Dims<4>& filter_dims,
-                   const float* bias_data, const Dims<4>& bias_dims,
-                   int stride_width, int stride_height, int pad_width,
-                   int pad_height, int depth_multiplier, float* output_data,
-                   const Dims<4>& output_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
-                bias_dims, stride_width, stride_height, pad_width, pad_height,
-                depth_multiplier, output_activation_min, output_activation_max,
-                output_data, output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                   const float* filter_data, const Dims<4>& filter_dims,
-                   const float* bias_data, const Dims<4>& bias_dims, int stride,
-                   int pad_width, int pad_height, int depth_multiplier,
-                   float* output_data, const Dims<4>& output_dims) {
-  DepthwiseConv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
-                    bias_dims, stride, stride, pad_width, pad_height,
-                    depth_multiplier, output_data, output_dims);
-}
-
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index a70545599b..803eff292a 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -24,9 +24,6 @@ limitations under the License.
 namespace tflite {
 namespace optimized_ops {
 
-// TODO(b/80418076): Move to legacy ops file, along with invocations.
-static constexpr int kDepthwiseReverseShift = -1;
-
 // Implementation of quantized DepthwiseConv
 
 template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
@@ -1996,105 +1993,6 @@ inline void DepthwiseConv(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                          int32 input_offset, const uint8* filter_data,
-                          const Dims<4>& filter_dims, int32 filter_offset,
-                          const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height,
-                          int dilation_width_factor, int dilation_height_factor,
-                          int pad_width, int pad_height, int depth_multiplier,
-                          int32 output_offset, int32 output_multiplier,
-                          int output_shift, int32 output_activation_min,
-                          int32 output_activation_max, uint8* output_data,
-                          const Dims<4>& output_dims) {
-  tflite::DepthwiseParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.depth_multiplier = depth_multiplier;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kDepthwiseReverseShift * output_shift;
-
-  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
-                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                bias_data, DimsToShape(output_dims), output_data);
-}
-
-inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                          int32 input_offset, const uint8* filter_data,
-                          const Dims<4>& filter_dims, int32 filter_offset,
-                          const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
-                          int32 output_offset, int32 output_multiplier,
-                          int output_shift, int32 output_activation_min,
-                          int32 output_activation_max, uint8* output_data,
-                          const Dims<4>& output_dims) {
-  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
-                filter_offset, bias_data, bias_dims, stride_width,
-                stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
-                output_offset, output_multiplier, output_shift,
-                output_activation_min, output_activation_max, output_data,
-                output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy, for compatibility with old checked-in code.
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                   int32 input_offset, const uint8* filter_data,
-                   const Dims<4>& filter_dims, int32 filter_offset,
-                   const int32* bias_data, const Dims<4>& bias_dims,
-                   int stride_width, int stride_height, int pad_width,
-                   int pad_height, int depth_multiplier, int32 output_offset,
-                   int32 output_multiplier, int output_shift,
-                   int32 output_activation_min, int32 output_activation_max,
-                   uint8* output_data, const Dims<4>& output_dims) {
-  if (Ac == FusedActivationFunctionType::kNone) {
-    TFLITE_DCHECK_EQ(output_activation_min, 0);
-    TFLITE_DCHECK_EQ(output_activation_max, 255);
-  }
-  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
-                filter_offset, bias_data, bias_dims, stride_width,
-                stride_height, pad_width, pad_height, depth_multiplier,
-                output_offset, output_multiplier, output_shift,
-                output_activation_min, output_activation_max, output_data,
-                output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy, for compatibility with old checked-in code.
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                   int32 input_offset, const uint8* filter_data,
-                   const Dims<4>& filter_dims, int32 filter_offset,
-                   const int32* bias_data, const Dims<4>& bias_dims, int stride,
-                   int pad_width, int pad_height, int depth_multiplier,
-                   int32 output_offset, int32 output_multiplier,
-                   int output_shift, int32 output_activation_min,
-                   int32 output_activation_max, uint8* output_data,
-                   const Dims<4>& output_dims) {
-  DepthwiseConv<Ac>(input_data, input_dims, input_offset, filter_data,
-                    filter_dims, filter_offset, bias_data, bias_dims, stride,
-                    stride, pad_width, pad_height, depth_multiplier,
-                    output_offset, output_multiplier, output_shift,
-                    output_activation_min, output_activation_max, output_data,
-                    output_dims);
-}
-
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h
index b6151c40b3..4218be20a4 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h
@@ -19,6 +19,8 @@ limitations under the License.
 #include <sys/types.h>
 
 #include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
 #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h"
 #include "tensorflow/contrib/lite/kernels/internal/types.h"
@@ -28,9 +30,857 @@ namespace optimized_ops {
 
 // Unoptimized reference ops:
 using reference_ops::ArgMax;
+using reference_ops::ArgMinMax;
+using reference_ops::Broadcast4DSlowGreater;
+using reference_ops::Broadcast4DSlowGreaterEqual;
+using reference_ops::Broadcast4DSlowGreaterEqualWithScaling;
+using reference_ops::Broadcast4DSlowGreaterWithScaling;
+using reference_ops::Broadcast4DSlowLess;
+using reference_ops::Broadcast4DSlowLessEqual;
+using reference_ops::Broadcast4DSlowLessEqualWithScaling;
+using reference_ops::Broadcast4DSlowLessWithScaling;
+using reference_ops::BroadcastAdd4DSlow;
+using reference_ops::BroadcastGreater;
+using reference_ops::BroadcastGreaterEqual;
+using reference_ops::BroadcastLess;
+using reference_ops::BroadcastLessEqual;
+using reference_ops::BroadcastMul4DSlow;
+using reference_ops::BroadcastSub4DSlow;
+using reference_ops::Concatenation;
+using reference_ops::ConcatenationWithScaling;
+using reference_ops::DepthConcatenation;
+using reference_ops::Dequantize;
+using reference_ops::Div;
+using reference_ops::FakeQuant;
+using reference_ops::Gather;
+using reference_ops::Greater;
+using reference_ops::GreaterEqual;
+using reference_ops::GreaterEqualWithScaling;
+using reference_ops::GreaterWithScaling;
+using reference_ops::Less;
+using reference_ops::LessEqual;
+using reference_ops::LessEqualWithScaling;
+using reference_ops::LessWithScaling;
+using reference_ops::Mean;
+using reference_ops::RankOneSelect;
 using reference_ops::Relu1;
 using reference_ops::Relu6;
+using reference_ops::ReluX;
+using reference_ops::Select;
 using reference_ops::SpaceToBatchND;
+using reference_ops::Split;
+using reference_ops::StridedSlice;
+using reference_ops::TensorFlowSplit;
+using reference_ops::Transpose;
+
+static constexpr int kDepthwiseReverseShift = -1;
+
+template <typename Scalar, int N>
+VectorMap<Scalar> MapAsVector(Scalar* data, const Dims<N>& dims) {
+  const int size = FlatSize(dims);
+  return VectorMap<Scalar>(data, size, 1);
+}
+
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar* data,
+                                                const Dims<N>& dims) {
+  const int rows = dims.sizes[0];
+  int cols = 1;
+  for (int d = 1; d < N; d++) {
+    cols *= dims.sizes[d];
+  }
+  return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithLastDimAsCols(Scalar* data,
+                                               const Dims<N>& dims) {
+  const int cols = dims.sizes[N - 1];
+  int rows = 1;
+  for (int d = 0; d < N - 1; d++) {
+    rows *= dims.sizes[d];
+  }
+  return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Scalar, int N>
+ArrayMap<Scalar> MapAsArrayWithFirstDimAsRows(Scalar* data,
+                                              const Dims<N>& dims) {
+  const int rows = dims.sizes[0];
+  int cols = 1;
+  for (int d = 1; d < N; d++) {
+    cols *= dims.sizes[d];
+  }
+  return ArrayMap<Scalar>(data, rows, cols);
+}
+
+// TODO(b/62193649): this function is only needed as long
+// as we have the --variable_batch hack.
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithGivenNumberOfRows(Scalar* data,
+                                                   const Dims<N>& dims,
+                                                   int rows) {
+  const int flatsize = FlatSize(dims);
+  TFLITE_DCHECK((flatsize % rows) == 0);
+  const int cols = flatsize / rows;
+  return MatrixMap<Scalar>(data, rows, cols);
+}
+
+inline bool AreSameDims(const Dims<4>& dims1, const Dims<4>& dims2) {
+  for (int i = 0; i < 4; i++) {
+    if (dims1.sizes[i] != dims2.sizes[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
+
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, 1, 1, pad_width,
+                pad_height, depth_multiplier, output_activation_min,
+                output_activation_max, output_data, output_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                   const float* filter_data, const Dims<4>& filter_dims,
+                   const float* bias_data, const Dims<4>& bias_dims,
+                   int stride_width, int stride_height, int pad_width,
+                   int pad_height, int depth_multiplier, float* output_data,
+                   const Dims<4>& output_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, pad_width, pad_height,
+                depth_multiplier, output_activation_min, output_activation_max,
+                output_data, output_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                   const float* filter_data, const Dims<4>& filter_dims,
+                   const float* bias_data, const Dims<4>& bias_dims, int stride,
+                   int pad_width, int pad_height, int depth_multiplier,
+                   float* output_data, const Dims<4>& output_dims) {
+  DepthwiseConv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
+                    bias_dims, stride, stride, pad_width, pad_height,
+                    depth_multiplier, output_data, output_dims);
+}
+
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kDepthwiseReverseShift * output_shift;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
+
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
+// Legacy, for compatibility with old checked-in code.
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                   int32 input_offset, const uint8* filter_data,
+                   const Dims<4>& filter_dims, int32 filter_offset,
+                   const int32* bias_data, const Dims<4>& bias_dims,
+                   int stride_width, int stride_height, int pad_width,
+                   int pad_height, int depth_multiplier, int32 output_offset,
+                   int32 output_multiplier, int output_shift,
+                   int32 output_activation_min, int32 output_activation_max,
+                   uint8* output_data, const Dims<4>& output_dims) {
+  if (Ac == FusedActivationFunctionType::kNone) {
+    TFLITE_DCHECK_EQ(output_activation_min, 0);
+    TFLITE_DCHECK_EQ(output_activation_max, 255);
+  }
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
+// Legacy, for compatibility with old checked-in code.
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                   int32 input_offset, const uint8* filter_data,
+                   const Dims<4>& filter_dims, int32 filter_offset,
+                   const int32* bias_data, const Dims<4>& bias_dims, int stride,
+                   int pad_width, int pad_height, int depth_multiplier,
+                   int32 output_offset, int32 output_multiplier,
+                   int output_shift, int32 output_activation_min,
+                   int32 output_activation_max, uint8* output_data,
+                   const Dims<4>& output_dims) {
+  DepthwiseConv<Ac>(input_data, input_dims, input_offset, filter_data,
+                    filter_dims, filter_offset, bias_data, bias_dims, stride,
+                    stride, pad_width, pad_height, depth_multiplier,
+                    output_offset, output_multiplier, output_shift,
+                    output_activation_min, output_activation_max, output_data,
+                    output_dims);
+}
+
+inline void AddBiasAndEvalActivationFunction(const float* bias_data,
+                                             const Dims<4>& bias_dims,
+                                             float* array_data,
+                                             const Dims<4>& array_dims,
+                                             float output_activation_min,
+                                             float output_activation_max) {
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   DimsToShape(bias_dims), bias_data,
+                                   DimsToShape(array_dims), array_data);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void AddBiasAndEvalActivationFunction(const float* bias_data,
+                                      const Dims<4>& bias_dims,
+                                      float* array_data,
+                                      const Dims<4>& array_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  AddBiasAndEvalActivationFunction(bias_data, bias_dims, array_data, array_dims,
+                                   output_activation_min,
+                                   output_activation_max);
+}
+
+inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                           const float* weights_data,
+                           const Dims<4>& weights_dims, const float* bias_data,
+                           const Dims<4>& bias_dims,
+                           float output_activation_min,
+                           float output_activation_max, float* output_data,
+                           const Dims<4>& output_dims) {
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(weights_dims), weights_data,
+                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+                 output_data);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                    const float* weights_data, const Dims<4>& weights_dims,
+                    const float* bias_data, const Dims<4>& bias_dims,
+                    float* output_data, const Dims<4>& output_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data,
+                 bias_dims, output_activation_min, output_activation_max,
+                 output_data, output_dims);
+}
+
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, uint8* output_data,
+                           const Dims<4>& output_dims,
+                           gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+inline void FullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
+    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
+    const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset,
+    int32 output_multiplier, int output_shift, int32 output_activation_min,
+    int32 output_activation_max, int16* output_data, const Dims<4>& output_dims,
+    gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data_int32, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                    int32 input_offset, const uint8* filter_data,
+                    const Dims<4>& filter_dims, int32 filter_offset,
+                    const int32* bias_data, const Dims<4>& bias_dims,
+                    int32 output_offset, int32 output_multiplier,
+                    int output_shift, int32 output_activation_min,
+                    int32 output_activation_max, uint8* output_data,
+                    const Dims<4>& output_dims,
+                    gemmlowp::GemmContext* gemm_context) {
+  static_assert(Ac == FusedActivationFunctionType::kNone ||
+                    Ac == FusedActivationFunctionType::kRelu ||
+                    Ac == FusedActivationFunctionType::kRelu6 ||
+                    Ac == FusedActivationFunctionType::kRelu1,
+                "");
+  FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims,
+                 filter_offset, bias_data, bias_dims, output_offset,
+                 output_multiplier, output_shift, output_activation_min,
+                 output_activation_max, output_data, output_dims, gemm_context);
+}
+
+inline void ShuffledFullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims,
+    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
+    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
+    int output_shift, int32 output_activation_min, int32 output_activation_max,
+    int16* output_data, const Dims<4>& output_dims,
+    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
+                         DimsToShape(weights_dims), shuffled_weights_data,
+                         DimsToShape(bias_dims), bias_data,
+                         DimsToShape(output_dims), output_data,
+                         shuffled_input_workspace_data, gemm_context);
+}
+
+template <typename T>
+inline void ExtractPatchIntoBufferColumn(
+    const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
+    int stride_width, int stride_height, int pad_width, int pad_height,
+    int in_width, int in_height, int in_depth, int single_buffer_length,
+    int buffer_id, const T* in_data, T* conv_buffer_data, uint8 zero_byte) {
+  ExtractPatchIntoBufferColumn(
+      DimsToShape(input_dims), w, h, b, kheight, kwidth, stride_width,
+      stride_height, pad_width, pad_height, in_width, in_height, in_depth,
+      single_buffer_length, buffer_id, in_data, conv_buffer_data, zero_byte);
+}
+
+template <typename T>
+void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
+                   const Dims<4>& filter_dims, int stride_width,
+                   int stride_height, int dilation_width_factor,
+                   int dilation_height_factor, int pad_width, int pad_height,
+                   const Dims<4>& output_dims, uint8 zero_byte,
+                   T* im2col_data) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+
+  DilatedIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), DimsToShape(output_dims),
+                im2col_data);
+}
+
+template <typename T>
+void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
+            int stride_height, int pad_width, int pad_height, int kheight,
+            int kwidth, uint8 zero_byte, T* output_data,
+            const Dims<4>& output_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+
+  Im2col(op_params, kheight, kwidth, zero_byte, DimsToShape(input_dims),
+         input_data, DimsToShape(output_dims), output_data);
+}
+
+// legacy, for compatibility with old checked-in code
+template <typename T>
+void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
+            int pad_width, int pad_height, int kheight, int kwidth,
+            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
+  Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
+         kwidth, zero_byte, output_data, output_dims);
+}
+
+inline void Conv(const float* input_data, const Dims<4>& input_dims,
+                 const float* filter_data, const Dims<4>& filter_dims,
+                 const float* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 float output_activation_min, float output_activation_max,
+                 float* output_data, const Dims<4>& output_dims,
+                 float* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
+                       const int8_t* filter_data, const Dims<4>& filter_dims,
+                       const float* bias_data, const Dims<4>& bias_dims,
+                       int stride_width, int stride_height, int pad_width,
+                       int pad_height, float* scaling_factors_ptr,
+                       float output_activation_min, float output_activation_max,
+                       float* output_data, const Dims<4>& output_dims,
+                       int8_t* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  HybridConv(op_params, scaling_factors_ptr, DimsToShape(input_dims),
+             input_data, DimsToShape(filter_dims), filter_data,
+             DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+             output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+template <FusedActivationFunctionType Ac>
+void Conv(const float* input_data, const Dims<4>& input_dims,
+          const float* filter_data, const Dims<4>& filter_dims,
+          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
+          int stride_height, int dilation_width_factor,
+          int dilation_height_factor, int pad_width, int pad_height,
+          float* output_data, const Dims<4>& output_dims, float* im2col_data,
+          const Dims<4>& im2col_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
+       stride_width, stride_height, dilation_width_factor,
+       dilation_height_factor, pad_width, pad_height, output_activation_min,
+       output_activation_max, output_data, output_dims, im2col_data,
+       im2col_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void Conv(const float* input_data, const Dims<4>& input_dims,
+          const float* filter_data, const Dims<4>& filter_dims,
+          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
+          int stride_height, int pad_width, int pad_height, float* output_data,
+          const Dims<4>& output_dims, float* im2col_data,
+          const Dims<4>& im2col_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
+       stride_width, stride_height, 1, 1, pad_width, pad_height,
+       output_activation_min, output_activation_max, output_data, output_dims,
+       im2col_data, im2col_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void Conv(const float* input_data, const Dims<4>& input_dims,
+          const float* filter_data, const Dims<4>& filter_dims,
+          const float* bias_data, const Dims<4>& bias_dims, int stride,
+          int pad_width, int pad_height, float* output_data,
+          const Dims<4>& output_dims, float* im2col_data,
+          const Dims<4>& im2col_dims) {
+  Conv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
+           bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data,
+           output_dims, im2col_data, im2col_dims);
+}
+
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 int32 output_offset, int32 output_multiplier, int output_shift,
+                 int32 output_activation_min, int32 output_activation_max,
+                 uint8* output_data, const Dims<4>& output_dims,
+                 uint8* im2col_data, const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data, gemm_context);
+}
+
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int pad_width,
+                 int pad_height, int32 output_offset, int32 output_multiplier,
+                 int output_shift, int32 output_activation_min,
+                 int32 output_activation_max, uint8* output_data,
+                 const Dims<4>& output_dims, uint8* im2col_data,
+                 const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
+       filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1,
+       pad_width, pad_height, output_offset, output_multiplier, output_shift,
+       output_activation_min, output_activation_max, output_data, output_dims,
+       im2col_data, im2col_dims, gemm_context);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int pad_width,
+                 int pad_height, int32 output_offset, int32 output_multiplier,
+                 int output_shift, int32 output_activation_min,
+                 int32 output_activation_max, uint8* output_data,
+                 const Dims<4>& output_dims, uint8* im2col_data,
+                 const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  static_assert(Ac == FusedActivationFunctionType::kNone ||
+                    Ac == FusedActivationFunctionType::kRelu ||
+                    Ac == FusedActivationFunctionType::kRelu6 ||
+                    Ac == FusedActivationFunctionType::kRelu1,
+                "");
+  if (Ac == FusedActivationFunctionType::kNone) {
+    TFLITE_DCHECK_EQ(output_activation_min, 0);
+    TFLITE_DCHECK_EQ(output_activation_max, 255);
+  }
+  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
+       filter_offset, bias_data, bias_dims, stride_width, stride_height,
+       pad_width, pad_height, output_offset, output_multiplier, output_shift,
+       output_activation_min, output_activation_max, output_data, output_dims,
+       im2col_data, im2col_dims, gemm_context);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void Conv(const uint8* input_data, const Dims<4>& input_dims,
+          int32 input_offset, const uint8* filter_data,
+          const Dims<4>& filter_dims, int32 filter_offset,
+          const int32* bias_data, const Dims<4>& bias_dims, int stride,
+          int pad_width, int pad_height, int32 output_offset,
+          int32 output_multiplier, int output_shift,
+          int32 output_activation_min, int32 output_activation_max,
+          uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data,
+          const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) {
+  static_assert(Ac == FusedActivationFunctionType::kNone ||
+                    Ac == FusedActivationFunctionType::kRelu ||
+                    Ac == FusedActivationFunctionType::kRelu6 ||
+                    Ac == FusedActivationFunctionType::kRelu1,
+                "");
+  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
+       filter_offset, bias_data, bias_dims, stride, stride, pad_width,
+       pad_height, output_offset, output_multiplier, output_shift,
+       output_activation_min, output_activation_max, output_data, output_dims,
+       im2col_data, im2col_dims, gemm_context);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac, typename T>
+void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
+            int pad_width, int pad_height, int kheight, int kwidth,
+            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
+  Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
+         kwidth, zero_byte, output_data, output_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void ConvAsGemm(const float* input_data, const Dims<4>& input_dims,
+                const float* filter_data, const Dims<4>& filter_dims,
+                const float* bias_data, const Dims<4>& bias_dims,
+                float* output_data, const Dims<4>& output_dims) {
+  gemmlowp::ScopedProfilingLabel label("ConvAsGemm");
+
+  const auto input_matrix_map =
+      MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+  const auto filter_matrix_map =
+      MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
+  auto output_matrix_map =
+      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+
+  Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map);
+
+  AddBiasAndEvalActivationFunction<Ac>(bias_data, bias_dims, output_data,
+                                       output_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims,
+                int32 input_offset, const uint8* filter_data,
+                const Dims<4>& filter_dims, int32 filter_offset,
+                const int32* bias_data, const Dims<4>& bias_dims,
+                int32 output_offset, int32 output_multiplier, int output_shift,
+                int32 output_activation_min, int32 output_activation_max,
+                uint8* output_data, const Dims<4>& output_dims,
+                gemmlowp::GemmContext* gemm_context) {
+  gemmlowp::ScopedProfilingLabel label("ConvAsGemm/8bit");
+  static_assert(Ac == FusedActivationFunctionType::kNone ||
+                    Ac == FusedActivationFunctionType::kRelu ||
+                    Ac == FusedActivationFunctionType::kRelu6 ||
+                    Ac == FusedActivationFunctionType::kRelu1,
+                "");
+  const int input_rows = input_dims.sizes[0];
+  const int input_cols = FlatSizeSkipDim(input_dims, 0);
+  const int filter_rows = filter_dims.sizes[3];
+  const int filter_cols = FlatSizeSkipDim(filter_dims, 3);
+  const int output_rows = output_dims.sizes[0];
+  const int output_cols = FlatSizeSkipDim(output_dims, 0);
+  TFLITE_DCHECK_EQ(output_rows, filter_rows);
+  TFLITE_DCHECK_EQ(output_cols, input_cols);
+  TFLITE_DCHECK_EQ(filter_cols, input_rows);
+  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows);
+  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
+  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
+  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
+  gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::RowMajor> filter_matrix(
+      filter_data, output_rows, filter_cols, filter_cols);
+  gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::ColMajor> input_matrix(
+      input_data, filter_cols, output_cols, filter_cols);
+  gemmlowp::MatrixMap<uint8, gemmlowp::MapOrder::ColMajor> output_matrix(
+      output_data, output_rows, output_cols, output_rows);
+  const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp(
+      bias_data, output_rows, output_offset, output_multiplier, -output_shift,
+      output_activation_min, output_activation_max);
+  gemmlowp::GemmWithOutputPipeline<uint8, uint8,
+                                   gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
+      gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset,
+      input_offset, output_pipeline);
+}
+
+inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, float* output_data,
+                          const Dims<4>& output_dims, float* im2col_data,
+                          const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(output_dims),
+                output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+template <typename T>
+void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
+                     const Dims<4>& filter_dims, int stride_width,
+                     int stride_height, int pad_width, int pad_height,
+                     const Dims<4>& output_dims, uint8 zero_byte,
+                     T* im2col_data) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
+                  DimsToShape(filter_dims), DimsToShape(output_dims),
+                  im2col_data);
+}
+
+inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
+                     const float* prev_activ_data,
+                     const Dims<4>& prev_activ_dims, const float* weights_data,
+                     const Dims<4>& weights_dims, const float* bias_data,
+                     const Dims<4>& bias_dims, const float* prev_state_data,
+                     const Dims<4>& prev_state_dims, float* output_state_data,
+                     const Dims<4>& output_state_dims, float* output_activ_data,
+                     const Dims<4>& output_activ_dims, float* concat_temp_data,
+                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
+                     const Dims<4>& activ_temp_dims) {
+  tflite::LstmCellParams op_params;
+  // Float LSTM cell does not need parameters to be set: leave untouched.
+
+  LstmCell(op_params, DimsToShape(input_dims), input_data,
+           DimsToShape(prev_activ_dims), prev_activ_data,
+           DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims),
+           bias_data, DimsToShape(prev_state_dims), prev_state_data,
+           DimsToShape(output_state_dims), output_state_data,
+           DimsToShape(output_activ_dims), output_activ_data,
+           DimsToShape(concat_temp_dims), concat_temp_data,
+           DimsToShape(activ_temp_dims), activ_temp_data);
+}
+
+template <int StateIntegerBits>
+void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
+              const uint8* prev_activ_data_uint8,
+              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
+              const Dims<4>& weights_dims, const int32* bias_data_int32,
+              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
+              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
+              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
+              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
+              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
+              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
+              int32 accum_multiplier, int accum_shift,
+              gemmlowp::GemmContext* gemm_context) {
+  tflite::LstmCellParams op_params;
+  op_params.weights_zero_point = weights_zero_point;
+  op_params.accum_multiplier = accum_multiplier;
+  op_params.accum_shift = accum_shift;
+
+  LstmCell<StateIntegerBits>(
+      op_params, DimsToShape(input_dims), input_data_uint8,
+      DimsToShape(prev_activ_dims), prev_activ_data_uint8,
+      DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims),
+      bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16,
+      DimsToShape(output_state_dims), output_state_data_int16,
+      DimsToShape(output_activ_dims), output_activ_data_uint8,
+      DimsToShape(concat_temp_dims), concat_temp_data_uint8,
+      DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context);
+}
+
+template <typename T>
+void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims,
+                  const T* input2_data, const Dims<4>& input2_dims,
+                  T output_activation_min, T output_activation_max,
+                  T* output_data, const Dims<4>& output_dims) {
+  tflite::ArithmeticParams op_params;
+  SetActivationParams(output_activation_min, output_activation_max, &op_params);
+
+  BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data,
+                     DimsToShape(input2_dims), input2_data,
+                     DimsToShape(output_dims), output_data);
+}
 
 template <FusedActivationFunctionType Ac>
 void L2Normalization(const float* input_data, const RuntimeShape& input_shape,
@@ -574,6 +1424,14 @@ void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride,
              filter_width, filter_height, output_data, output_dims);
 }
 
+inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
+                    float beta, float* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.beta = beta;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void Softmax(const float* input_data, const Dims<4>& input_dims,
                     float beta, float* output_data,
                     const Dims<4>& output_dims) {
@@ -581,6 +1439,16 @@ inline void Softmax(const float* input_data, const Dims<4>& input_dims,
           DimsToShape(output_dims));
 }
 
+inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
+                    int32 input_beta_multiplier, int32 input_beta_left_shift,
+                    int diff_min, uint8* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_beta_multiplier;
+  params.input_left_shift = input_beta_left_shift;
+  params.diff_min = diff_min;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
 inline void Softmax(const uint8* input_data, const Dims<4>& input_dims,
                     int32 input_beta_multiplier, int32 input_beta_left_shift,
                     int diff_min, uint8* output_data,
@@ -590,12 +1458,33 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims,
           DimsToShape(output_dims));
 }
 
+inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
+                       float* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  // No params currently used for float LogSoftmax.
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims,
                        float* output_data, const Dims<4>& output_dims) {
   LogSoftmax(input_data, DimsToShape(input_dims), output_data,
              DimsToShape(output_dims));
 }
 
+inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
+                       int32 input_multiplier, int32 input_left_shift,
+                       int32 reverse_scaling_divisor,
+                       int32 reverse_scaling_right_shift, int diff_min,
+                       uint8* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  params.reverse_scaling_divisor = reverse_scaling_divisor;
+  params.reverse_scaling_right_shift = reverse_scaling_right_shift;
+  params.diff_min = diff_min;
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims,
                        int32 input_multiplier, int32 input_left_shift,
                        int32 reverse_scaling_divisor,
@@ -607,6 +1496,18 @@ inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims,
              DimsToShape(output_dims));
 }
 
+inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
+                     int32 input_zero_point, int32 input_range_radius,
+                     int32 input_multiplier, int input_left_shift,
+                     uint8* output_data, const RuntimeShape& output_shape) {
+  LogisticParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void Logistic(const float* input_data, const Dims<4>& input_dims,
                      float* output_data, const Dims<4>& output_dims) {
   Logistic(DimsToShape(input_dims), input_data, DimsToShape(output_dims),
@@ -622,6 +1523,20 @@ inline void Logistic(const uint8* input_data, const Dims<4>& input_dims,
            DimsToShape(output_dims));
 }
 
+inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
+                     const RuntimeShape& output_shape, int16* output_data) {
+  LogisticParams params;
+  // No params currently needed by int16 Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
+                     int16* output_data, const RuntimeShape& output_shape) {
+  LogisticParams params;
+  // No params currently needed by int16 Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void Logistic(const int16* input_data, const Dims<4>& input_dims,
                      int16* output_data, const Dims<4>& output_dims) {
   Logistic(input_data, DimsToShape(input_dims), output_data,
@@ -634,6 +1549,18 @@ inline void Tanh(const float* input_data, const Dims<4>& input_dims,
        output_data);
 }
 
+inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
+                 int32 input_zero_point, int32 input_range_radius,
+                 int32 input_multiplier, int input_left_shift,
+                 uint8* output_data, const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void Tanh(const uint8* input_data, const Dims<4>& input_dims,
                  int32 input_zero_point, int32 input_range_radius,
                  int32 input_multiplier, int input_left_shift,
@@ -643,6 +1570,14 @@ inline void Tanh(const uint8* input_data, const Dims<4>& input_dims,
        DimsToShape(output_dims));
 }
 
+inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
+                 int input_left_shift, int16* output_data,
+                 const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
 inline void Tanh(const int16* input_data, const Dims<4>& input_dims,
                  int input_left_shift, int16* output_data,
                  const Dims<4>& output_dims) {
@@ -777,7 +1712,6 @@ inline void BroadcastMul(const float* input1_data, const Dims<4>& input1_dims,
                      DimsToShape(output_dims), output_data);
 }
 
-// Legacy Dims<4>.
 inline void LocalResponseNormalization(const float* input_data,
                                        const Dims<4>& input_dims, int range,
                                        float bias, float alpha, float beta,
@@ -793,7 +1727,6 @@ inline void LocalResponseNormalization(const float* input_data,
                              DimsToShape(output_dims), output_data);
 }
 
-// Legacy Dims<4> version.
 template <typename SrcT, typename DstT>
 void Cast(const SrcT* input_data, const Dims<4>& input_dims, DstT* output_data,
           const Dims<4>& output_dims) {
@@ -801,14 +1734,12 @@ void Cast(const SrcT* input_data, const Dims<4>& input_dims, DstT* output_data,
        output_data);
 }
 
-// Legacy Dims<4> version.
 inline void Floor(const float* input_data, const Dims<4>& input_dims,
                   float* output_data, const Dims<4>& output_dims) {
   Floor(DimsToShape(input_dims), input_data, DimsToShape(output_dims),
         output_data);
 }
 
-// Legacy Dims<4>
 inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims,
                            const int32* output_size_data,
                            const Dims<4>& output_size_dims, float* output_data,
@@ -820,7 +1751,6 @@ inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims,
                  DimsToShape(output_dims), output_data);
 }
 
-// Legacy Dims<4>
 inline void ResizeBilinear(const uint8* input_data, const Dims<4>& input_dims,
                            const int32* output_size_data,
                            const Dims<4>& output_size_dims, uint8* output_data,
@@ -850,7 +1780,6 @@ inline void ResizeBilinear(const uint8* input_data, const Dims<4>& input_dims,
                  output_data, output_dims, /*align_corners=*/false);
 }
 
-// Legacy Dims<4>.
 template <typename T>
 inline void BatchToSpaceND(const T* input_data, const Dims<4>& input_dims,
                            const int32* block_shape_data,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 732880d9da..77f84e0c1c 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -52,10 +52,6 @@ using reference_ops::Broadcast4DSlowLessEqual;
 using reference_ops::Broadcast4DSlowLessEqualWithScaling;
 using reference_ops::Broadcast4DSlowLessWithScaling;
 using reference_ops::BroadcastAdd4DSlow;
-using reference_ops::BroadcastGreater;
-using reference_ops::BroadcastGreaterEqual;
-using reference_ops::BroadcastLess;
-using reference_ops::BroadcastLessEqual;
 using reference_ops::BroadcastMul4DSlow;
 using reference_ops::BroadcastSub4DSlow;
 using reference_ops::Concatenation;
@@ -82,7 +78,6 @@ using reference_ops::Select;
 using reference_ops::SpaceToBatchND;
 using reference_ops::Split;
 using reference_ops::StridedSlice;
-using reference_ops::TensorFlowSplit;
 using reference_ops::Transpose;
 
 // TODO(b/80247582) Remove this constant.
@@ -112,12 +107,6 @@ VectorMap<Scalar> MapAsVector(Scalar* data, const RuntimeShape& shape) {
   return VectorMap<Scalar>(data, size, 1);
 }
 
-template <typename Scalar, int N>
-VectorMap<Scalar> MapAsVector(Scalar* data, const Dims<N>& dims) {
-  const int size = FlatSize(dims);
-  return VectorMap<Scalar>(data, size, 1);
-}
-
 // Make a local VectorMap typedef allowing to map a float array
 // as a Eigen matrix expression. The same explanation as for VectorMap
 // above also applies here.
@@ -145,28 +134,6 @@ MatrixMap<Scalar> MapAsMatrixWithFirstDimAsCols(Scalar* data,
   return MatrixMap<Scalar>(data, rows, cols);
 }
 
-template <typename Scalar, int N>
-MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar* data,
-                                                const Dims<N>& dims) {
-  const int rows = dims.sizes[0];
-  int cols = 1;
-  for (int d = 1; d < N; d++) {
-    cols *= dims.sizes[d];
-  }
-  return MatrixMap<Scalar>(data, rows, cols);
-}
-
-template <typename Scalar, int N>
-MatrixMap<Scalar> MapAsMatrixWithLastDimAsCols(Scalar* data,
-                                               const Dims<N>& dims) {
-  const int cols = dims.sizes[N - 1];
-  int rows = 1;
-  for (int d = 0; d < N - 1; d++) {
-    rows *= dims.sizes[d];
-  }
-  return MatrixMap<Scalar>(data, rows, cols);
-}
-
 template <typename Scalar>
 using ArrayMap = typename std::conditional<
     std::is_const<Scalar>::value,
@@ -174,17 +141,6 @@ using ArrayMap = typename std::conditional<
                                   Eigen::Dynamic, Eigen::Dynamic>>,
     Eigen::Map<Eigen::Array<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
 
-template <typename Scalar, int N>
-ArrayMap<Scalar> MapAsArrayWithFirstDimAsRows(Scalar* data,
-                                              const Dims<N>& dims) {
-  const int rows = dims.sizes[0];
-  int cols = 1;
-  for (int d = 1; d < N; d++) {
-    cols *= dims.sizes[d];
-  }
-  return ArrayMap<Scalar>(data, rows, cols);
-}
-
 template <typename Scalar>
 ArrayMap<Scalar> MapAsArrayWithLastDimAsRows(Scalar* data,
                                              const RuntimeShape& shape) {
@@ -206,20 +162,6 @@ struct TTypes {
       UnalignedConstMatrix;
 };
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-// TODO(b/62193649): this function is only needed as long
-// as we have the --variable_batch hack.
-template <typename Scalar, int N>
-MatrixMap<Scalar> MapAsMatrixWithGivenNumberOfRows(Scalar* data,
-                                                   const Dims<N>& dims,
-                                                   int rows) {
-  const int flatsize = FlatSize(dims);
-  TFLITE_DCHECK((flatsize % rows) == 0);
-  const int cols = flatsize / rows;
-  return MatrixMap<Scalar>(data, rows, cols);
-}
-
 // TODO(b/62193649): this function is only needed as long
 // as we have the --variable_batch hack.
 template <typename Scalar>
@@ -271,15 +213,6 @@ SaturatingRoundingMultiplyByPOTParam(
       SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
 }
 
-inline bool AreSameDims(const Dims<4>& dims1, const Dims<4>& dims2) {
-  for (int i = 0; i < 4; i++) {
-    if (dims1.sizes[i] != dims2.sizes[i]) {
-      return false;
-    }
-  }
-  return true;
-}
-
 inline void AddBiasAndEvalActivationFunction(float output_activation_min,
                                              float output_activation_max,
                                              const RuntimeShape& bias_shape,
@@ -353,33 +286,6 @@ inline void AddBiasAndEvalActivationFunction(float output_activation_min,
 #endif
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void AddBiasAndEvalActivationFunction(const float* bias_data,
-                                             const Dims<4>& bias_dims,
-                                             float* array_data,
-                                             const Dims<4>& array_dims,
-                                             float output_activation_min,
-                                             float output_activation_max) {
-  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
-                                   DimsToShape(bias_dims), bias_data,
-                                   DimsToShape(array_dims), array_data);
-}
-
-// Note: This to be converted to RuntimeShapes along with Conv.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void AddBiasAndEvalActivationFunction(const float* bias_data,
-                                      const Dims<4>& bias_dims,
-                                      float* array_data,
-                                      const Dims<4>& array_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  AddBiasAndEvalActivationFunction(bias_data, bias_dims, array_data, array_dims,
-                                   output_activation_min,
-                                   output_activation_max);
-}
-
 template <typename Lhs, typename Rhs, typename Result>
 void Gemm(const Eigen::MatrixBase<Lhs>& lhs, const Eigen::MatrixBase<Rhs>& rhs,
           Eigen::MatrixBase<Result>* result) {
@@ -926,38 +832,6 @@ inline void FullyConnected(
                                    output_data);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                           const float* weights_data,
-                           const Dims<4>& weights_dims, const float* bias_data,
-                           const Dims<4>& bias_dims,
-                           float output_activation_min,
-                           float output_activation_max, float* output_data,
-                           const Dims<4>& output_dims) {
-  tflite::FullyConnectedParams op_params;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(weights_dims), weights_data,
-                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-                 output_data);
-}
-
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                    const float* weights_data, const Dims<4>& weights_dims,
-                    const float* bias_data, const Dims<4>& bias_dims,
-                    float* output_data, const Dims<4>& output_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data,
-                 bias_dims, output_activation_min, output_activation_max,
-                 output_data, output_dims);
-}
-
 #ifdef USE_NEON
 inline void FullyConnectedAsGEMV(
     const RuntimeShape& input_shape, const uint8* input_data,
@@ -1204,33 +1078,6 @@ inline void FullyConnected(
       input_offset, output_pipeline);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, uint8* output_data,
-                           const Dims<4>& output_dims,
-                           gemmlowp::GemmContext* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                 bias_data, DimsToShape(output_dims), output_data,
-                 gemm_context);
-}
-
 inline void FullyConnected(
     const FullyConnectedParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
@@ -1318,54 +1165,6 @@ inline void FullyConnected(
       input_offset, output_pipeline);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
-    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
-    const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset,
-    int32 output_multiplier, int output_shift, int32 output_activation_min,
-    int32 output_activation_max, int16* output_data, const Dims<4>& output_dims,
-    gemmlowp::GemmContext* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                 bias_data_int32, DimsToShape(output_dims), output_data,
-                 gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                    int32 input_offset, const uint8* filter_data,
-                    const Dims<4>& filter_dims, int32 filter_offset,
-                    const int32* bias_data, const Dims<4>& bias_dims,
-                    int32 output_offset, int32 output_multiplier,
-                    int output_shift, int32 output_activation_min,
-                    int32 output_activation_max, uint8* output_data,
-                    const Dims<4>& output_dims,
-                    gemmlowp::GemmContext* gemm_context) {
-  static_assert(Ac == FusedActivationFunctionType::kNone ||
-                    Ac == FusedActivationFunctionType::kRelu ||
-                    Ac == FusedActivationFunctionType::kRelu6 ||
-                    Ac == FusedActivationFunctionType::kRelu1,
-                "");
-  FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims,
-                 filter_offset, bias_data, bias_dims, output_offset,
-                 output_multiplier, output_shift, output_activation_min,
-                 output_activation_max, output_data, output_dims, gemm_context);
-}
-
 // Internal function doing the actual arithmetic work for
 // ShuffledFullyConnected.
 // May be called either directly by it (single-threaded case) or may be used
@@ -1810,29 +1609,6 @@ inline void ShuffledFullyConnected(
   gemm_context->workers_pool()->Execute(tasks);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void ShuffledFullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
-    int output_shift, int32 output_activation_min, int32 output_activation_max,
-    int16* output_data, const Dims<4>& output_dims,
-    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
-                         DimsToShape(weights_dims), shuffled_weights_data,
-                         DimsToShape(bias_dims), bias_data,
-                         DimsToShape(output_dims), output_data,
-                         shuffled_input_workspace_data, gemm_context);
-}
-
 template <typename T>
 inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w,
                                          int h, int b, int kheight, int kwidth,
@@ -1923,20 +1699,6 @@ inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T>
-inline void ExtractPatchIntoBufferColumn(
-    const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
-    int stride_width, int stride_height, int pad_width, int pad_height,
-    int in_width, int in_height, int in_depth, int single_buffer_length,
-    int buffer_id, const T* in_data, T* conv_buffer_data, uint8 zero_byte) {
-  ExtractPatchIntoBufferColumn(
-      DimsToShape(input_dims), w, h, b, kheight, kwidth, stride_width,
-      stride_height, pad_width, pad_height, in_width, in_height, in_depth,
-      single_buffer_length, buffer_id, in_data, conv_buffer_data, zero_byte);
-}
-
 template <typename T>
 void DilatedIm2col(const ConvParams& params, uint8 zero_byte,
                    const RuntimeShape& input_shape, const T* input_data,
@@ -2020,30 +1782,6 @@ void DilatedIm2col(const ConvParams& params, uint8 zero_byte,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T>
-void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
-                   const Dims<4>& filter_dims, int stride_width,
-                   int stride_height, int dilation_width_factor,
-                   int dilation_height_factor, int pad_width, int pad_height,
-                   const Dims<4>& output_dims, uint8 zero_byte,
-                   T* im2col_data) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-
-  DilatedIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
-                DimsToShape(filter_dims), DimsToShape(output_dims),
-                im2col_data);
-}
-
 template <typename T>
 void Im2col(const ConvParams& params, int kheight, int kwidth, uint8 zero_byte,
             const RuntimeShape& input_shape, const T* input_data,
@@ -2079,36 +1817,6 @@ void Im2col(const ConvParams& params, int kheight, int kwidth, uint8 zero_byte,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T>
-void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
-            int stride_height, int pad_width, int pad_height, int kheight,
-            int kwidth, uint8 zero_byte, T* output_data,
-            const Dims<4>& output_dims) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
-
-  Im2col(op_params, kheight, kwidth, zero_byte, DimsToShape(input_dims),
-         input_data, DimsToShape(output_dims), output_data);
-}
-
-// legacy, for compatibility with old checked-in code
-template <typename T>
-void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
-            int pad_width, int pad_height, int kheight, int kwidth,
-            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
-  Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
-         kwidth, zero_byte, output_data, output_dims);
-}
-
 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
                  const float* input_data, const RuntimeShape& filter_shape,
                  const float* filter_data, const RuntimeShape& bias_shape,
@@ -2172,33 +1880,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
                                    output_data);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Conv(const float* input_data, const Dims<4>& input_dims,
-                 const float* filter_data, const Dims<4>& filter_dims,
-                 const float* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 float output_activation_min, float output_activation_max,
-                 float* output_data, const Dims<4>& output_dims,
-                 float* im2col_data, const Dims<4>& im2col_dims) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
-       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-       output_data, DimsToShape(im2col_dims), im2col_data);
-}
-
 inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
                        const RuntimeShape& input_shape,
                        const int8_t* input_data,
@@ -2279,82 +1960,6 @@ inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
                                    output_data);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
-                       const int8_t* filter_data, const Dims<4>& filter_dims,
-                       const float* bias_data, const Dims<4>& bias_dims,
-                       int stride_width, int stride_height, int pad_width,
-                       int pad_height, float* scaling_factors_ptr,
-                       float output_activation_min, float output_activation_max,
-                       float* output_data, const Dims<4>& output_dims,
-                       int8_t* im2col_data, const Dims<4>& im2col_dims) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  HybridConv(op_params, scaling_factors_ptr, DimsToShape(input_dims),
-             input_data, DimsToShape(filter_dims), filter_data,
-             DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-             output_data, DimsToShape(im2col_dims), im2col_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <FusedActivationFunctionType Ac>
-void Conv(const float* input_data, const Dims<4>& input_dims,
-          const float* filter_data, const Dims<4>& filter_dims,
-          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
-          int stride_height, int dilation_width_factor,
-          int dilation_height_factor, int pad_width, int pad_height,
-          float* output_data, const Dims<4>& output_dims, float* im2col_data,
-          const Dims<4>& im2col_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
-       stride_width, stride_height, dilation_width_factor,
-       dilation_height_factor, pad_width, pad_height, output_activation_min,
-       output_activation_max, output_data, output_dims, im2col_data,
-       im2col_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void Conv(const float* input_data, const Dims<4>& input_dims,
-          const float* filter_data, const Dims<4>& filter_dims,
-          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
-          int stride_height, int pad_width, int pad_height, float* output_data,
-          const Dims<4>& output_dims, float* im2col_data,
-          const Dims<4>& im2col_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
-       stride_width, stride_height, 1, 1, pad_width, pad_height,
-       output_activation_min, output_activation_max, output_data, output_dims,
-       im2col_data, im2col_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void Conv(const float* input_data, const Dims<4>& input_dims,
-          const float* filter_data, const Dims<4>& filter_dims,
-          const float* bias_data, const Dims<4>& bias_dims, int stride,
-          int pad_width, int pad_height, float* output_data,
-          const Dims<4>& output_dims, float* im2col_data,
-          const Dims<4>& im2col_dims) {
-  Conv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
-           bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data,
-           output_dims, im2col_data, im2col_dims);
-}
-
 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
                  const uint8* input_data, const RuntimeShape& filter_shape,
                  const uint8* filter_data, const RuntimeShape& bias_shape,
@@ -2446,192 +2051,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
       input_offset, output_pipeline);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 int32 output_offset, int32 output_multiplier, int output_shift,
-                 int32 output_activation_min, int32 output_activation_max,
-                 uint8* output_data, const Dims<4>& output_dims,
-                 uint8* im2col_data, const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
-       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-       output_data, DimsToShape(im2col_dims), im2col_data, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int pad_width,
-                 int pad_height, int32 output_offset, int32 output_multiplier,
-                 int output_shift, int32 output_activation_min,
-                 int32 output_activation_max, uint8* output_data,
-                 const Dims<4>& output_dims, uint8* im2col_data,
-                 const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
-  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
-       filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1,
-       pad_width, pad_height, output_offset, output_multiplier, output_shift,
-       output_activation_min, output_activation_max, output_data, output_dims,
-       im2col_data, im2col_dims, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int pad_width,
-                 int pad_height, int32 output_offset, int32 output_multiplier,
-                 int output_shift, int32 output_activation_min,
-                 int32 output_activation_max, uint8* output_data,
-                 const Dims<4>& output_dims, uint8* im2col_data,
-                 const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
-  static_assert(Ac == FusedActivationFunctionType::kNone ||
-                    Ac == FusedActivationFunctionType::kRelu ||
-                    Ac == FusedActivationFunctionType::kRelu6 ||
-                    Ac == FusedActivationFunctionType::kRelu1,
-                "");
-  if (Ac == FusedActivationFunctionType::kNone) {
-    TFLITE_DCHECK_EQ(output_activation_min, 0);
-    TFLITE_DCHECK_EQ(output_activation_max, 255);
-  }
-  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
-       filter_offset, bias_data, bias_dims, stride_width, stride_height,
-       pad_width, pad_height, output_offset, output_multiplier, output_shift,
-       output_activation_min, output_activation_max, output_data, output_dims,
-       im2col_data, im2col_dims, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void Conv(const uint8* input_data, const Dims<4>& input_dims,
-          int32 input_offset, const uint8* filter_data,
-          const Dims<4>& filter_dims, int32 filter_offset,
-          const int32* bias_data, const Dims<4>& bias_dims, int stride,
-          int pad_width, int pad_height, int32 output_offset,
-          int32 output_multiplier, int output_shift,
-          int32 output_activation_min, int32 output_activation_max,
-          uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data,
-          const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) {
-  static_assert(Ac == FusedActivationFunctionType::kNone ||
-                    Ac == FusedActivationFunctionType::kRelu ||
-                    Ac == FusedActivationFunctionType::kRelu6 ||
-                    Ac == FusedActivationFunctionType::kRelu1,
-                "");
-  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
-       filter_offset, bias_data, bias_dims, stride, stride, pad_width,
-       pad_height, output_offset, output_multiplier, output_shift,
-       output_activation_min, output_activation_max, output_data, output_dims,
-       im2col_data, im2col_dims, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac, typename T>
-void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
-            int pad_width, int pad_height, int kheight, int kwidth,
-            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
-  Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
-         kwidth, zero_byte, output_data, output_dims);
-}
-
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void ConvAsGemm(const float* input_data, const Dims<4>& input_dims,
-                const float* filter_data, const Dims<4>& filter_dims,
-                const float* bias_data, const Dims<4>& bias_dims,
-                float* output_data, const Dims<4>& output_dims) {
-  gemmlowp::ScopedProfilingLabel label("ConvAsGemm");
-
-  const auto input_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
-  const auto filter_matrix_map =
-      MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
-  auto output_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
-
-  Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map);
-
-  AddBiasAndEvalActivationFunction<Ac>(bias_data, bias_dims, output_data,
-                                       output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims,
-                int32 input_offset, const uint8* filter_data,
-                const Dims<4>& filter_dims, int32 filter_offset,
-                const int32* bias_data, const Dims<4>& bias_dims,
-                int32 output_offset, int32 output_multiplier, int output_shift,
-                int32 output_activation_min, int32 output_activation_max,
-                uint8* output_data, const Dims<4>& output_dims,
-                gemmlowp::GemmContext* gemm_context) {
-  gemmlowp::ScopedProfilingLabel label("ConvAsGemm/8bit");
-  static_assert(Ac == FusedActivationFunctionType::kNone ||
-                    Ac == FusedActivationFunctionType::kRelu ||
-                    Ac == FusedActivationFunctionType::kRelu6 ||
-                    Ac == FusedActivationFunctionType::kRelu1,
-                "");
-  const int input_rows = input_dims.sizes[0];
-  const int input_cols = FlatSizeSkipDim(input_dims, 0);
-  const int filter_rows = filter_dims.sizes[3];
-  const int filter_cols = FlatSizeSkipDim(filter_dims, 3);
-  const int output_rows = output_dims.sizes[0];
-  const int output_cols = FlatSizeSkipDim(output_dims, 0);
-  TFLITE_DCHECK_EQ(output_rows, filter_rows);
-  TFLITE_DCHECK_EQ(output_cols, input_cols);
-  TFLITE_DCHECK_EQ(filter_cols, input_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
-  gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::RowMajor> filter_matrix(
-      filter_data, output_rows, filter_cols, filter_cols);
-  gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::ColMajor> input_matrix(
-      input_data, filter_cols, output_cols, filter_cols);
-  gemmlowp::MatrixMap<uint8, gemmlowp::MapOrder::ColMajor> output_matrix(
-      output_data, output_rows, output_cols, output_rows);
-  const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp(
-      bias_data, output_rows, output_offset, output_multiplier, -output_shift,
-      output_activation_min, output_activation_max);
-  gemmlowp::GemmWithOutputPipeline<uint8, uint8,
-                                   gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
-      gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset,
-      input_offset, output_pipeline);
-}
-
 template <typename T>
 inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
                          const RuntimeShape& unextended_input_shape,
@@ -3548,21 +2967,6 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-template <typename T>
-void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims,
-                  const T* input2_data, const Dims<4>& input2_dims,
-                  T output_activation_min, T output_activation_max,
-                  T* output_data, const Dims<4>& output_dims) {
-  tflite::ArithmeticParams op_params;
-  SetActivationParams(output_activation_min, output_activation_max, &op_params);
-
-  BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data,
-                     DimsToShape(input2_dims), input2_data,
-                     DimsToShape(output_dims), output_data);
-}
-
 // TODO(aselle): This is not actually optimized yet.
 inline void SubNonBroadcast(const ArithmeticParams& params,
                             const RuntimeShape& input1_shape,
@@ -3756,31 +3160,6 @@ inline void LstmCell(
       output_state_map.tanh();
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
-                     const float* prev_activ_data,
-                     const Dims<4>& prev_activ_dims, const float* weights_data,
-                     const Dims<4>& weights_dims, const float* bias_data,
-                     const Dims<4>& bias_dims, const float* prev_state_data,
-                     const Dims<4>& prev_state_dims, float* output_state_data,
-                     const Dims<4>& output_state_dims, float* output_activ_data,
-                     const Dims<4>& output_activ_dims, float* concat_temp_data,
-                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
-                     const Dims<4>& activ_temp_dims) {
-  tflite::LstmCellParams op_params;
-  // Float LSTM cell does not need parameters to be set: leave untouched.
-
-  LstmCell(op_params, DimsToShape(input_dims), input_data,
-           DimsToShape(prev_activ_dims), prev_activ_data,
-           DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims),
-           bias_data, DimsToShape(prev_state_dims), prev_state_data,
-           DimsToShape(output_state_dims), output_state_data,
-           DimsToShape(output_activ_dims), output_activ_data,
-           DimsToShape(concat_temp_dims), concat_temp_data,
-           DimsToShape(activ_temp_dims), activ_temp_data);
-}
-
 // Quantized LSTM cell. Currently just a copy of the reference impl in
 // reference_ops.h. See the big function comment there, not replicating it
 // here.
@@ -4071,37 +3450,6 @@ inline void LstmCell(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <int StateIntegerBits>
-void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
-              const uint8* prev_activ_data_uint8,
-              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
-              const Dims<4>& weights_dims, const int32* bias_data_int32,
-              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
-              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
-              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
-              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
-              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
-              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
-              int32 accum_multiplier, int accum_shift,
-              gemmlowp::GemmContext* gemm_context) {
-  tflite::LstmCellParams op_params;
-  op_params.weights_zero_point = weights_zero_point;
-  op_params.accum_multiplier = accum_multiplier;
-  op_params.accum_shift = accum_shift;
-
-  LstmCell<StateIntegerBits>(
-      op_params, DimsToShape(input_dims), input_data_uint8,
-      DimsToShape(prev_activ_dims), prev_activ_data_uint8,
-      DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims),
-      bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16,
-      DimsToShape(output_state_dims), output_state_data_int16,
-      DimsToShape(output_activ_dims), output_activ_data_uint8,
-      DimsToShape(concat_temp_dims), concat_temp_data_uint8,
-      DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context);
-}
-
 inline int NodeOffset(int b, int h, int w, int height, int width) {
   return (b * height + h) * width + w;
 }
@@ -4561,16 +3909,6 @@ inline void Softmax(const SoftmaxParams& params,
   out_mat.array().rowwise() *= scale;
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
-                    float beta, float* output_data,
-                    const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.beta = beta;
-  Softmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Softmax(const SoftmaxParams& params,
                     const RuntimeShape& input_shape, const uint8* input_data,
                     const RuntimeShape& output_shape, uint8* output_data) {
@@ -4782,19 +4120,6 @@ inline void Softmax(const SoftmaxParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
-                    int32 input_beta_multiplier, int32 input_beta_left_shift,
-                    int diff_min, uint8* output_data,
-                    const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.input_multiplier = input_beta_multiplier;
-  params.input_left_shift = input_beta_left_shift;
-  params.diff_min = diff_min;
-  Softmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 // TODO(myenik): This is the same as the reference implementation, not actually
 // optimized yet.
 inline void LogSoftmax(const SoftmaxParams& params,
@@ -4832,15 +4157,6 @@ inline void LogSoftmax(const SoftmaxParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy
-inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
-                       float* output_data, const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  // No params currently used for float LogSoftmax.
-  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 template <int OutputIntegerBits, int InputIntegerBits>
 inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
 log_x_for_x_greater_than_or_equal_to_1_impl(
@@ -5045,22 +4361,6 @@ inline void LogSoftmax(const SoftmaxParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
-                       int32 input_multiplier, int32 input_left_shift,
-                       int32 reverse_scaling_divisor,
-                       int32 reverse_scaling_right_shift, int diff_min,
-                       uint8* output_data, const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.input_multiplier = input_multiplier;
-  params.input_left_shift = input_left_shift;
-  params.reverse_scaling_divisor = reverse_scaling_divisor;
-  params.reverse_scaling_right_shift = reverse_scaling_right_shift;
-  params.diff_min = diff_min;
-  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
                      const RuntimeShape& output_shape, float* output_data) {
   gemmlowp::ScopedProfilingLabel label("Logistic");
@@ -5219,20 +4519,6 @@ inline void Logistic(const LogisticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
-                     int32 input_zero_point, int32 input_range_radius,
-                     int32 input_multiplier, int input_left_shift,
-                     uint8* output_data, const RuntimeShape& output_shape) {
-  LogisticParams params;
-  params.input_zero_point = input_zero_point;
-  params.input_range_radius = input_range_radius;
-  params.input_multiplier = input_multiplier;
-  params.input_left_shift = input_left_shift;
-  Logistic(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Logistic(const LogisticParams& params,
                      const RuntimeShape& input_shape, const int16* input_data,
                      const RuntimeShape& output_shape, int16* output_data) {
@@ -5294,24 +4580,6 @@ inline void Logistic(const LogisticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy version.
-inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
-                     const RuntimeShape& output_shape, int16* output_data) {
-  LogisticParams params;
-  // No params currently needed by int16 Logistic.
-  Logistic(params, input_shape, input_data, output_shape, output_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy version.
-inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
-                     int16* output_data, const RuntimeShape& output_shape) {
-  LogisticParams params;
-  // No params currently needed by int16 Logistic.
-  Logistic(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
                  const RuntimeShape& output_shape, float* output_data) {
   gemmlowp::ScopedProfilingLabel label("Tanh");
@@ -5479,20 +4747,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
-                 int32 input_zero_point, int32 input_range_radius,
-                 int32 input_multiplier, int input_left_shift,
-                 uint8* output_data, const RuntimeShape& output_shape) {
-  TanhParams params;
-  params.input_zero_point = input_zero_point;
-  params.input_range_radius = input_range_radius;
-  params.input_multiplier = input_multiplier;
-  params.input_left_shift = input_left_shift;
-  Tanh(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
                  const int16* input_data, const RuntimeShape& output_shape,
                  int16* output_data) {
@@ -5594,16 +4848,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
-                 int input_left_shift, int16* output_data,
-                 const RuntimeShape& output_shape) {
-  TanhParams params;
-  params.input_left_shift = input_left_shift;
-  Tanh(params, input_shape, input_data, output_shape, output_data);
-}
-
 template <typename SrcT, typename DstT>
 inline void Cast(const RuntimeShape& input_shape, const SrcT* input_data,
                  const RuntimeShape& output_shape, DstT* output_data) {
@@ -6486,27 +5730,6 @@ void TransposeIm2col(const ConvParams& params, uint8 zero_byte,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T>
-void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
-                     const Dims<4>& filter_dims, int stride_width,
-                     int stride_height, int pad_width, int pad_height,
-                     const Dims<4>& output_dims, uint8 zero_byte,
-                     T* im2col_data) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-
-  TransposeIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
-                  DimsToShape(filter_dims), DimsToShape(output_dims),
-                  im2col_data);
-}
-
 inline void TransposeConv(
     const ConvParams& params, const RuntimeShape& input_shape,
     const float* input_data, const RuntimeShape& filter_shape,
@@ -6530,27 +5753,6 @@ inline void TransposeConv(
   Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, float* output_data,
-                          const Dims<4>& output_dims, float* im2col_data,
-                          const Dims<4>& im2col_dims) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-
-  TransposeConv(op_params, DimsToShape(input_dims), input_data,
-                DimsToShape(filter_dims), filter_data, DimsToShape(output_dims),
-                output_data, DimsToShape(im2col_dims), im2col_data);
-}
-
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
index a8428528c9..11224270a4 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -94,81 +94,6 @@ inline void DepthwiseConv(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height,
-                          int dilation_width_factor, int dilation_height_factor,
-                          int pad_width, int pad_height, int depth_multiplier,
-                          float output_activation_min,
-                          float output_activation_max, float* output_data,
-                          const Dims<4>& output_dims) {
-  tflite::DepthwiseParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.depth_multiplier = depth_multiplier;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
-                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                bias_data, DimsToShape(output_dims), output_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
-                          float output_activation_min,
-                          float output_activation_max, float* output_data,
-                          const Dims<4>& output_dims) {
-  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
-                bias_dims, stride_width, stride_height, 1, 1, pad_width,
-                pad_height, depth_multiplier, output_activation_min,
-                output_activation_max, output_data, output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy, for compatibility with old checked-in code.
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                   const float* filter_data, const Dims<4>& filter_dims,
-                   const float* bias_data, const Dims<4>& bias_dims,
-                   int stride_width, int stride_height, int pad_width,
-                   int pad_height, int depth_multiplier, float* output_data,
-                   const Dims<4>& output_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
-                bias_dims, stride_width, stride_height, pad_width, pad_height,
-                depth_multiplier, output_activation_min, output_activation_max,
-                output_data, output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy, for compatibility with old checked-in code.
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
-                   const float* filter_data, const Dims<4>& filter_dims,
-                   const float* bias_data, const Dims<4>& bias_dims, int stride,
-                   int pad_width, int pad_height, int depth_multiplier,
-                   float* output_data, const Dims<4>& output_dims) {
-  DepthwiseConv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
-                    bias_dims, stride, stride, pad_width, pad_height,
-                    depth_multiplier, output_data, output_dims);
-}
-
 }  // end namespace reference_ops
 }  // end namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
index e8fc566502..eab28e6c84 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -25,9 +25,6 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
-// TODO(b/80418076): Move to legacy ops file, along with invocations.
-static constexpr int kDepthwiseReverseShift = -1;
-
 inline void DepthwiseConv(
     const DepthwiseParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
@@ -109,106 +106,6 @@ inline void DepthwiseConv(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                          int32 input_offset, const uint8* filter_data,
-                          const Dims<4>& filter_dims, int32 filter_offset,
-                          const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height,
-                          int dilation_width_factor, int dilation_height_factor,
-                          int pad_width, int pad_height, int depth_multiplier,
-                          int32 output_offset, int32 output_multiplier,
-                          int output_shift, int32 output_activation_min,
-                          int32 output_activation_max, uint8* output_data,
-                          const Dims<4>& output_dims) {
-  tflite::DepthwiseParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.depth_multiplier = depth_multiplier;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kDepthwiseReverseShift * output_shift;
-
-  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
-                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                bias_data, DimsToShape(output_dims), output_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                          int32 input_offset, const uint8* filter_data,
-                          const Dims<4>& filter_dims, int32 filter_offset,
-                          const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
-                          int32 output_offset, int32 output_multiplier,
-                          int output_shift, int32 output_activation_min,
-                          int32 output_activation_max, uint8* output_data,
-                          const Dims<4>& output_dims) {
-  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
-                filter_offset, bias_data, bias_dims, stride_width,
-                stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
-                output_offset, output_multiplier, output_shift,
-                output_activation_min, output_activation_max, output_data,
-                output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy, for compatibility with old checked-in code.
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                   int32 input_offset, const uint8* filter_data,
-                   const Dims<4>& filter_dims, int32 filter_offset,
-                   const int32* bias_data, const Dims<4>& bias_dims,
-                   int stride_width, int stride_height, int pad_width,
-                   int pad_height, int depth_multiplier, int32 output_offset,
-                   int32 output_multiplier, int output_shift,
-                   int32 output_activation_min, int32 output_activation_max,
-                   uint8* output_data, const Dims<4>& output_dims) {
-  if (Ac == FusedActivationFunctionType::kNone) {
-    TFLITE_DCHECK_EQ(output_activation_min, 0);
-    TFLITE_DCHECK_EQ(output_activation_max, 255);
-  }
-  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
-                filter_offset, bias_data, bias_dims, stride_width,
-                stride_height, pad_width, pad_height, depth_multiplier,
-                output_offset, output_multiplier, output_shift,
-                output_activation_min, output_activation_max, output_data,
-                output_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy, for compatibility with old checked-in code.
-template <FusedActivationFunctionType Ac>
-void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
-                   int32 input_offset, const uint8* filter_data,
-                   const Dims<4>& filter_dims, int32 filter_offset,
-                   const int32* bias_data, const Dims<4>& bias_dims, int stride,
-                   int pad_width, int pad_height, int depth_multiplier,
-                   int32 output_offset, int32 output_multiplier,
-                   int output_shift, int32 output_activation_min,
-                   int32 output_activation_max, uint8* output_data,
-                   const Dims<4>& output_dims) {
-  DepthwiseConv<Ac>(input_data, input_dims, input_offset, filter_data,
-                    filter_dims, filter_offset, bias_data, bias_dims, stride,
-                    stride, pad_width, pad_height, depth_multiplier,
-                    output_offset, output_multiplier, output_shift,
-                    output_activation_min, output_activation_max, output_data,
-                    output_dims);
-}
-
 }  // end namespace reference_ops
 }  // end namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h b/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h
index 23325e8c4c..3c7fd29256 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h
@@ -62,39 +62,6 @@ inline void FullyConnected(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                           const float* weights_data,
-                           const Dims<4>& weights_dims, const float* bias_data,
-                           const Dims<4>& bias_dims,
-                           float output_activation_min,
-                           float output_activation_max, float* output_data,
-                           const Dims<4>& output_dims) {
-  tflite::FullyConnectedParams op_params;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(weights_dims), weights_data,
-                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-                 output_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                    const float* weights_data, const Dims<4>& weights_dims,
-                    const float* bias_data, const Dims<4>& bias_dims,
-                    float* output_data, const Dims<4>& output_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data,
-                 bias_dims, output_activation_min, output_activation_max,
-                 output_data, output_dims);
-}
-
 inline void FullyConnected(
     const FullyConnectedParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
@@ -144,32 +111,6 @@ inline void FullyConnected(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, uint8* output_data,
-                           const Dims<4>& output_dims, void* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                 bias_data, DimsToShape(output_dims), output_data,
-                 gemm_context);
-}
-
 inline void FullyConnected(
     const FullyConnectedParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
@@ -224,32 +165,6 @@ inline void FullyConnected(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, int16* output_data,
-                           const Dims<4>& output_dims, void* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  FullyConnected(op_params, DimsToShape(input_dims), input_data,
-                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
-                 bias_data, DimsToShape(output_dims), output_data,
-                 gemm_context);
-}
-
 inline void ShuffledFullyConnected(
     const FullyConnectedParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& weights_shape,
@@ -405,55 +320,6 @@ inline void ShuffledFullyConnected(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void ShuffledFullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
-    int output_shift, int32 output_activation_min, int32 output_activation_max,
-    int16* output_data, const Dims<4>& output_dims,
-    uint8* shuffled_input_workspace_data, void* gemm_context) {
-  tflite::FullyConnectedParams op_params;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
-                         DimsToShape(weights_dims), shuffled_weights_data,
-                         DimsToShape(bias_dims), bias_data,
-                         DimsToShape(output_dims), output_data,
-                         shuffled_input_workspace_data, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                    int32 input_offset, const uint8* filter_data,
-                    const Dims<4>& filter_dims, int32 filter_offset,
-                    const int32* bias_data, const Dims<4>& bias_dims,
-                    int32 output_offset, int32 output_multiplier,
-                    int output_shift, int32 output_activation_min,
-                    int32 output_activation_max, uint8* output_data,
-                    const Dims<4>& output_dims, void* gemm_context) {
-  static_assert(Ac == FusedActivationFunctionType::kNone ||
-                    Ac == FusedActivationFunctionType::kRelu ||
-                    Ac == FusedActivationFunctionType::kRelu6 ||
-                    Ac == FusedActivationFunctionType::kRelu1,
-                "");
-  if (Ac == FusedActivationFunctionType::kNone) {
-    TFLITE_DCHECK_EQ(output_activation_min, 0);
-    TFLITE_DCHECK_EQ(output_activation_max, 255);
-  }
-  FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims,
-                 filter_offset, bias_data, bias_dims, output_offset,
-                 output_multiplier, output_shift, output_activation_min,
-                 output_activation_max, output_data, output_dims, gemm_context);
-}
-
 }  // namespace reference_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
index 683ccdc74d..be99240b1f 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
@@ -19,6 +19,8 @@ limitations under the License.
 #include <sys/types.h>
 
 #include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
 #include "tensorflow/contrib/lite/kernels/internal/types.h"
 
@@ -26,6 +28,1070 @@ namespace tflite {
 
 namespace reference_ops {
 
+static constexpr int kDepthwiseReverseShift = -1;
+
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
+
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, 1, 1, pad_width,
+                pad_height, depth_multiplier, output_activation_min,
+                output_activation_max, output_data, output_dims);
+}
+
+// Legacy, for compatibility with old checked-in code.
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                   const float* filter_data, const Dims<4>& filter_dims,
+                   const float* bias_data, const Dims<4>& bias_dims,
+                   int stride_width, int stride_height, int pad_width,
+                   int pad_height, int depth_multiplier, float* output_data,
+                   const Dims<4>& output_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, pad_width, pad_height,
+                depth_multiplier, output_activation_min, output_activation_max,
+                output_data, output_dims);
+}
+
+// Legacy, for compatibility with old checked-in code.
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                   const float* filter_data, const Dims<4>& filter_dims,
+                   const float* bias_data, const Dims<4>& bias_dims, int stride,
+                   int pad_width, int pad_height, int depth_multiplier,
+                   float* output_data, const Dims<4>& output_dims) {
+  DepthwiseConv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
+                    bias_dims, stride, stride, pad_width, pad_height,
+                    depth_multiplier, output_data, output_dims);
+}
+
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.depth_multiplier = depth_multiplier;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kDepthwiseReverseShift * output_shift;
+
+  DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                bias_data, DimsToShape(output_dims), output_data);
+}
+
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
+// Legacy, for compatibility with old checked-in code.
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                   int32 input_offset, const uint8* filter_data,
+                   const Dims<4>& filter_dims, int32 filter_offset,
+                   const int32* bias_data, const Dims<4>& bias_dims,
+                   int stride_width, int stride_height, int pad_width,
+                   int pad_height, int depth_multiplier, int32 output_offset,
+                   int32 output_multiplier, int output_shift,
+                   int32 output_activation_min, int32 output_activation_max,
+                   uint8* output_data, const Dims<4>& output_dims) {
+  if (Ac == FusedActivationFunctionType::kNone) {
+    TFLITE_DCHECK_EQ(output_activation_min, 0);
+    TFLITE_DCHECK_EQ(output_activation_max, 255);
+  }
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
+// Legacy, for compatibility with old checked-in code.
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                   int32 input_offset, const uint8* filter_data,
+                   const Dims<4>& filter_dims, int32 filter_offset,
+                   const int32* bias_data, const Dims<4>& bias_dims, int stride,
+                   int pad_width, int pad_height, int depth_multiplier,
+                   int32 output_offset, int32 output_multiplier,
+                   int output_shift, int32 output_activation_min,
+                   int32 output_activation_max, uint8* output_data,
+                   const Dims<4>& output_dims) {
+  DepthwiseConv<Ac>(input_data, input_dims, input_offset, filter_data,
+                    filter_dims, filter_offset, bias_data, bias_dims, stride,
+                    stride, pad_width, pad_height, depth_multiplier,
+                    output_offset, output_multiplier, output_shift,
+                    output_activation_min, output_activation_max, output_data,
+                    output_dims);
+}
+
+inline void Conv(const float* input_data, const Dims<4>& input_dims,
+                 const float* filter_data, const Dims<4>& filter_dims,
+                 const float* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 float output_activation_min, float output_activation_max,
+                 float* output_data, const Dims<4>& output_dims,
+                 float* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+template <FusedActivationFunctionType Ac>
+void Conv(const float* input_data, const Dims<4>& input_dims,
+          const float* filter_data, const Dims<4>& filter_dims,
+          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
+          int stride_height, int dilation_width_factor,
+          int dilation_height_factor, int pad_width, int pad_height,
+          float* output_data, const Dims<4>& output_dims, float* im2col_data,
+          const Dims<4>& im2col_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
+       stride_width, stride_height, dilation_width_factor,
+       dilation_height_factor, pad_width, pad_height, output_activation_min,
+       output_activation_max, output_data, output_dims, im2col_data,
+       im2col_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void Conv(const float* input_data, const Dims<4>& input_dims,
+          const float* filter_data, const Dims<4>& filter_dims,
+          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
+          int stride_height, int pad_width, int pad_height, float* output_data,
+          const Dims<4>& output_dims, float* im2col_data,
+          const Dims<4>& im2col_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
+       stride_width, stride_height, 1, 1, pad_width, pad_height,
+       output_activation_min, output_activation_max, output_data, output_dims,
+       im2col_data, im2col_dims);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void Conv(const float* input_data, const Dims<4>& input_dims,
+          const float* filter_data, const Dims<4>& filter_dims,
+          const float* bias_data, const Dims<4>& bias_dims, int stride,
+          int pad_width, int pad_height, float* output_data,
+          const Dims<4>& output_dims, float* im2col_data,
+          const Dims<4>& im2col_dims) {
+  Conv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
+           bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data,
+           output_dims, im2col_data, im2col_dims);
+}
+
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 int32 output_offset, int32 output_multiplier, int output_shift,
+                 int32 output_activation_min, int32 output_activation_max,
+                 uint8* output_data, const Dims<4>& output_dims,
+                 uint8* im2col_data, const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data, gemm_context);
+}
+
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int pad_width,
+                 int pad_height, int32 output_offset, int32 output_multiplier,
+                 int output_shift, int32 output_activation_min,
+                 int32 output_activation_max, uint8* output_data,
+                 const Dims<4>& output_dims, uint8* im2col_data,
+                 const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
+       filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1,
+       pad_width, pad_height, output_offset, output_multiplier, output_shift,
+       output_activation_min, output_activation_max, output_data, output_dims,
+       im2col_data, im2col_dims, gemm_context);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int pad_width,
+                 int pad_height, int32 output_offset, int32 output_multiplier,
+                 int output_shift, int32 output_activation_min,
+                 int32 output_activation_max, uint8* output_data,
+                 const Dims<4>& output_dims, uint8* im2col_data,
+                 const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  static_assert(Ac == FusedActivationFunctionType::kNone ||
+                    Ac == FusedActivationFunctionType::kRelu ||
+                    Ac == FusedActivationFunctionType::kRelu6 ||
+                    Ac == FusedActivationFunctionType::kRelu1,
+                "");
+  if (Ac == FusedActivationFunctionType::kNone) {
+    TFLITE_DCHECK_EQ(output_activation_min, 0);
+    TFLITE_DCHECK_EQ(output_activation_max, 255);
+  }
+  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
+       filter_offset, bias_data, bias_dims, stride_width, stride_height,
+       pad_width, pad_height, output_offset, output_multiplier, output_shift,
+       output_activation_min, output_activation_max, output_data, output_dims,
+       im2col_data, im2col_dims, gemm_context);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void Conv(const uint8* input_data, const Dims<4>& input_dims,
+          int32 input_offset, const uint8* filter_data,
+          const Dims<4>& filter_dims, int32 filter_offset,
+          const int32* bias_data, const Dims<4>& bias_dims, int stride,
+          int pad_width, int pad_height, int32 output_offset,
+          int32 output_multiplier, int output_shift,
+          int32 output_activation_min, int32 output_activation_max,
+          uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data,
+          const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) {
+  Conv<Ac>(input_data, input_dims, input_offset, filter_data, filter_dims,
+           filter_offset, bias_data, bias_dims, stride, stride, pad_width,
+           pad_height, output_offset, output_multiplier, output_shift,
+           output_activation_min, output_activation_max, output_data,
+           output_dims, im2col_data, im2col_dims, gemm_context);
+}
+
+inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, float* output_data,
+                          const Dims<4>& output_dims, float* im2col_data,
+                          const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(output_dims),
+                output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                           const float* weights_data,
+                           const Dims<4>& weights_dims, const float* bias_data,
+                           const Dims<4>& bias_dims,
+                           float output_activation_min,
+                           float output_activation_max, float* output_data,
+                           const Dims<4>& output_dims) {
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(weights_dims), weights_data,
+                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+                 output_data);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                    const float* weights_data, const Dims<4>& weights_dims,
+                    const float* bias_data, const Dims<4>& bias_dims,
+                    float* output_data, const Dims<4>& output_dims) {
+  float output_activation_min, output_activation_max;
+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
+  FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data,
+                 bias_dims, output_activation_min, output_activation_max,
+                 output_data, output_dims);
+}
+
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, uint8* output_data,
+                           const Dims<4>& output_dims,
+                           gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, int16* output_data,
+                           const Dims<4>& output_dims,
+                           gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+inline void ShuffledFullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims,
+    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
+    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
+    int output_shift, int32 output_activation_min, int32 output_activation_max,
+    int16* output_data, const Dims<4>& output_dims,
+    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.output_multiplier = output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = kReverseShift * output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
+                         DimsToShape(weights_dims), shuffled_weights_data,
+                         DimsToShape(bias_dims), bias_data,
+                         DimsToShape(output_dims), output_data,
+                         shuffled_input_workspace_data, gemm_context);
+}
+
+// legacy, for compatibility with old checked-in code
+template <FusedActivationFunctionType Ac>
+void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                    int32 input_offset, const uint8* filter_data,
+                    const Dims<4>& filter_dims, int32 filter_offset,
+                    const int32* bias_data, const Dims<4>& bias_dims,
+                    int32 output_offset, int32 output_multiplier,
+                    int output_shift, int32 output_activation_min,
+                    int32 output_activation_max, uint8* output_data,
+                    const Dims<4>& output_dims,
+                    gemmlowp::GemmContext* gemm_context) {
+  static_assert(Ac == FusedActivationFunctionType::kNone ||
+                    Ac == FusedActivationFunctionType::kRelu ||
+                    Ac == FusedActivationFunctionType::kRelu6 ||
+                    Ac == FusedActivationFunctionType::kRelu1,
+                "");
+  if (Ac == FusedActivationFunctionType::kNone) {
+    TFLITE_DCHECK_EQ(output_activation_min, 0);
+    TFLITE_DCHECK_EQ(output_activation_max, 255);
+  }
+  FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims,
+                 filter_offset, bias_data, bias_dims, output_offset,
+                 output_multiplier, output_shift, output_activation_min,
+                 output_activation_max, output_data, output_dims, gemm_context);
+}
+
+inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
+                     const float* prev_activ_data,
+                     const Dims<4>& prev_activ_dims, const float* weights_data,
+                     const Dims<4>& weights_dims, const float* bias_data,
+                     const Dims<4>& bias_dims, const float* prev_state_data,
+                     const Dims<4>& prev_state_dims, float* output_state_data,
+                     const Dims<4>& output_state_dims, float* output_activ_data,
+                     const Dims<4>& output_activ_dims, float* concat_temp_data,
+                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
+                     const Dims<4>& activ_temp_dims) {
+  tflite::LstmCellParams op_params;
+  // Float LSTM cell does not need parameters to be set: leave untouched.
+
+  LstmCell(op_params, DimsToShape(input_dims), input_data,
+           DimsToShape(prev_activ_dims), prev_activ_data,
+           DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims),
+           bias_data, DimsToShape(prev_state_dims), prev_state_data,
+           DimsToShape(output_state_dims), output_state_data,
+           DimsToShape(output_activ_dims), output_activ_data,
+           DimsToShape(concat_temp_dims), concat_temp_data,
+           DimsToShape(activ_temp_dims), activ_temp_data);
+}
+
+template <int StateIntegerBits>
+void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
+              const uint8* prev_activ_data_uint8,
+              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
+              const Dims<4>& weights_dims, const int32* bias_data_int32,
+              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
+              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
+              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
+              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
+              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
+              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
+              int32 accum_multiplier, int accum_shift,
+              gemmlowp::GemmContext* gemm_context) {
+  tflite::LstmCellParams op_params;
+  op_params.weights_zero_point = weights_zero_point;
+  op_params.accum_multiplier = accum_multiplier;
+  op_params.accum_shift = accum_shift;
+
+  LstmCell<StateIntegerBits>(
+      op_params, DimsToShape(input_dims), input_data_uint8,
+      DimsToShape(prev_activ_dims), prev_activ_data_uint8,
+      DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims),
+      bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16,
+      DimsToShape(output_state_dims), output_state_data_int16,
+      DimsToShape(output_activ_dims), output_activ_data_uint8,
+      DimsToShape(concat_temp_dims), concat_temp_data_uint8,
+      DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context);
+}
+
+template <typename T>
+void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims,
+                  const T* input2_data, const Dims<4>& input2_dims,
+                  T output_activation_min, T output_activation_max,
+                  T* output_data, const Dims<4>& output_dims) {
+  tflite::ArithmeticParams op_params;
+  SetActivationParams(output_activation_min, output_activation_max, &op_params);
+
+  BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data,
+                     DimsToShape(input2_dims), input2_data,
+                     DimsToShape(output_dims), output_data);
+}
+
+template <typename T>
+inline void Div(const T* input1_data, const Dims<4>& input1_dims,
+                const T* input2_data, const Dims<4>& input2_dims,
+                T output_activation_min, T output_activation_max,
+                T* output_data, const Dims<4>& output_dims) {
+  tflite::ArithmeticParams op_params;
+  SetActivationParams(output_activation_min, output_activation_max, &op_params);
+
+  Div(op_params, DimsToShape(input1_dims), input1_data,
+      DimsToShape(input2_dims), input2_data, DimsToShape(output_dims),
+      output_data);
+}
+
+template <FusedActivationFunctionType Ac, typename Scalar>
+inline void Concatenation(int concat_dim, const Scalar* const* input_data,
+                          const Dims<4>* const* input_dims, int inputs_count,
+                          Scalar* output_data, const Dims<4>& output_dims) {
+  // For now we don't have a model with a Concatenation with fused activation.
+  TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone);
+
+  std::vector<RuntimeShape> input_shapes(inputs_count);
+  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
+  for (int i = 0; i < inputs_count; ++i) {
+    ShapeFromDims(*input_dims[i], &input_shapes[i]);
+    input_shapes_indirect[i] = &input_shapes[i];
+  }
+  tflite::ConcatenationParams op_params;
+  op_params.axis = 3 - concat_dim;
+  op_params.inputs_count = inputs_count;
+
+  Concatenation(op_params, input_shapes_indirect.data(), input_data,
+                DimsToShape(output_dims), output_data);
+}
+
+inline void Concatenation(int concat_dim, const uint8* const* input_data,
+                          const Dims<4>* const* input_dims,
+                          const int32* input_zeropoint,
+                          const float* input_scale, int inputs_count,
+                          uint8* output_data, const Dims<4>& output_dims,
+                          const int32 output_zeropoint,
+                          const float output_scale) {
+  std::vector<RuntimeShape> input_shapes(inputs_count);
+  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
+  for (int i = 0; i < inputs_count; ++i) {
+    ShapeFromDims(*input_dims[i], &input_shapes[i]);
+    input_shapes_indirect[i] = &input_shapes[i];
+  }
+  tflite::ConcatenationParams op_params;
+  op_params.axis = 3 - concat_dim;
+  op_params.input_zeropoint = input_zeropoint;
+  op_params.input_scale = input_scale;
+  op_params.inputs_count = inputs_count;
+  op_params.output_zeropoint = output_zeropoint;
+  op_params.output_scale = output_scale;
+
+  ConcatenationWithScaling(op_params, input_shapes_indirect.data(), input_data,
+                           DimsToShape(output_dims), output_data);
+}
+
+template <FusedActivationFunctionType Ac, typename Scalar>
+void DepthConcatenation(const Scalar* const* input_data,
+                        const Dims<4>* const* input_dims, int inputs_count,
+                        Scalar* output_data, const Dims<4>& output_dims) {
+  // For now we don't have a model with a Concatenation with fused activation.
+  TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone);
+
+  std::vector<RuntimeShape> input_shapes(inputs_count);
+  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
+  for (int i = 0; i < inputs_count; ++i) {
+    ShapeFromDims(*input_dims[i], &input_shapes[i]);
+    input_shapes_indirect[i] = &input_shapes[i];
+  }
+  tflite::ConcatenationParams op_params;
+  op_params.inputs_count = inputs_count;
+
+  DepthConcatenation(op_params, input_shapes_indirect.data(), input_data,
+                     DimsToShape(output_dims), output_data);
+}
+
+template <typename Scalar>
+void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims,
+                     int axis, int outputs_count, Scalar* const* output_data,
+                     const Dims<4>* const* output_dims) {
+  std::vector<RuntimeShape> output_shapes(outputs_count);
+  std::vector<const RuntimeShape*> output_shapes_indirect(outputs_count);
+  for (int i = 0; i < outputs_count; ++i) {
+    ShapeFromDims(*output_dims[i], &output_shapes[i]);
+    output_shapes_indirect[i] = &output_shapes[i];
+  }
+  tflite::SplitParams op_params;
+  op_params.axis = 3 - axis;
+  op_params.num_split = outputs_count;
+
+  Split(op_params, DimsToShape(input_dims), input_data,
+        output_shapes_indirect.data(), output_data);
+}
+
+template <FusedActivationFunctionType Ac, typename Scalar>
+void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims,
+                     int outputs_count, Scalar* const* output_data,
+                     const Dims<4>* const* output_dims) {
+  TFLITE_DCHECK_GE(outputs_count, 1);
+  for (int i = 0; i < outputs_count; i++) {
+    /* batches = */ MatchingArraySize(*output_dims[i], 3, input_dims, 3);
+    /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2);
+    /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1);
+  }
+  // For now we don't have a model with a Split with fused activation.
+  TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone);
+
+  TensorFlowSplit(input_data, input_dims, /*axis=*/0, outputs_count,
+                  output_data, output_dims);
+}
+
+inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
+                    float beta, float* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.beta = beta;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
+                    int32 input_beta_multiplier, int32 input_beta_left_shift,
+                    int diff_min, uint8* output_data,
+                    const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_beta_multiplier;
+  params.input_left_shift = input_beta_left_shift;
+  params.diff_min = diff_min;
+  Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
+                       float* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  // No params currently used for float LogSoftmax.
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
+                       int32 input_multiplier, int32 input_left_shift,
+                       int32 reverse_scaling_divisor,
+                       int32 reverse_scaling_right_shift, int diff_min,
+                       uint8* output_data, const RuntimeShape& output_shape) {
+  SoftmaxParams params;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  params.reverse_scaling_divisor = reverse_scaling_divisor;
+  params.reverse_scaling_right_shift = reverse_scaling_right_shift;
+  params.diff_min = diff_min;
+  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
+                     int32 input_zero_point, int32 input_range_radius,
+                     int32 input_multiplier, int input_left_shift,
+                     uint8* output_data, const RuntimeShape& output_shape) {
+  LogisticParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
+                     const RuntimeShape& output_shape, int16* output_data) {
+  LogisticParams params;
+  // No params currently needed by int16 Logistic.
+  Logistic(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
+                 int32 input_zero_point, int32 input_range_radius,
+                 int32 input_multiplier, int input_left_shift,
+                 uint8* output_data, const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_zero_point = input_zero_point;
+  params.input_range_radius = input_range_radius;
+  params.input_multiplier = input_multiplier;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
+                 int input_left_shift, int16* output_data,
+                 const RuntimeShape& output_shape) {
+  TanhParams params;
+  params.input_left_shift = input_left_shift;
+  Tanh(params, input_shape, input_data, output_shape, output_data);
+}
+
+inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims,
+                       int32 zero_point, double scale, float* output_data,
+                       const Dims<4>& output_dims) {
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = zero_point;
+  op_params.scale = scale;
+
+  Dequantize(op_params, DimsToShape(input_dims), input_data,
+             DimsToShape(output_dims), output_data);
+}
+
+inline void FakeQuant(const float* input_data, const Dims<4>& input_dims,
+                      float rmin, float rmax, int num_bits, float* output_data,
+                      const Dims<4>& output_dims) {
+  tflite::FakeQuantParams op_params;
+  op_params.num_bits = num_bits;
+  op_params.minmax.min = rmin;
+  op_params.minmax.max = rmax;
+
+  FakeQuant(op_params, DimsToShape(input_dims), input_data,
+            DimsToShape(output_dims), output_data);
+}
+
+template <typename T>
+inline void Gather(const T* input_data, const Dims<4>& input_dims,
+                   int input_rank, const int32* coords_data,
+                   const Dims<4>& coords_dims, T* output_data,
+                   const Dims<4>& output_dims) {
+  tflite::GatherParams op_params;
+  op_params.input_rank = input_rank;
+
+  Gather(op_params, DimsToShape(input_dims), input_data,
+         DimsToShape(coords_dims), coords_data, DimsToShape(output_dims),
+         output_data);
+}
+
+inline uint32 LegacyReverseBits32(uint32 n) {
+  n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1);
+  n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2);
+  n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4);
+  return (((n & 0xFF) << 24) | ((n & 0xFF00) << 8) | ((n & 0xFF0000) >> 8) |
+          ((n & 0xFF000000) >> 24));
+}
+
+inline void StridedSliceReverseIndices(tflite::StridedSliceParams* p) {
+  TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count);
+  TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count);
+
+  std::reverse(p->start_indices, p->start_indices + p->start_indices_count);
+  std::reverse(p->stop_indices, p->stop_indices + p->stop_indices_count);
+  std::reverse(p->strides, p->strides + p->strides_count);
+
+  p->begin_mask = LegacyReverseBits32(static_cast<uint32>(p->begin_mask)) >>
+                  (32 - p->start_indices_count);
+  p->ellipsis_mask =
+      LegacyReverseBits32(static_cast<uint32>(p->ellipsis_mask)) >>
+      (32 - p->start_indices_count);
+  p->end_mask = LegacyReverseBits32(static_cast<uint32>(p->end_mask)) >>
+                (32 - p->start_indices_count);
+  p->new_axis_mask =
+      LegacyReverseBits32(static_cast<uint32>(p->new_axis_mask)) >>
+      (32 - p->start_indices_count);
+  p->shrink_axis_mask =
+      LegacyReverseBits32(static_cast<uint32>(p->shrink_axis_mask)) >>
+      (32 - p->start_indices_count);
+}
+
+template <typename T>
+inline void StridedSlice(const T* input_data, const Dims<4>& input_dims,
+                         int begin_mask, int end_mask, int shrink_axis_mask,
+                         const std::vector<int>& start_indices,
+                         const std::vector<int>& stop_indices,
+                         const std::vector<int>& strides, T* output_data,
+                         const Dims<4>& output_dims) {
+  TFLITE_DCHECK_EQ(start_indices.size(), 4);
+  auto op_params = strided_slice::BuildStridedSliceParams(
+      begin_mask, end_mask, shrink_axis_mask, start_indices, stop_indices,
+      strides);
+  StridedSliceReverseIndices(&op_params);
+
+  StridedSlice(op_params, DimsToShape(input_dims), input_data,
+               DimsToShape(output_dims), output_data);
+}
+
+template <typename T>
+inline void Mean(const T* input_data, const Dims<4>& input_dims,
+                 const std::vector<int>& reduction_indices, T* output_data,
+                 const Dims<4>& output_dims) {
+  tflite::MeanParams op_params;
+  op_params.axis_count = reduction_indices.size();
+  for (int i = 0; i < op_params.axis_count; ++i) {
+    op_params.axis[i] = reduction_indices[op_params.axis_count - 1 - i];
+  }
+
+  Mean(op_params, DimsToShape(input_dims), input_data, DimsToShape(output_dims),
+       output_data);
+}
+
+template <typename T>
+void Transpose(const T* input, const Dims<4>& input_dims, T* output,
+               const Dims<4>& output_dims, const int* permuted_axes) {
+  TransposeParams params;
+  params.perm_count = 4;
+  for (int i = 0; i < 4; ++i) {
+    params.perm[i] = 3 - permuted_axes[3 - i];
+  }
+  Transpose(params, DimsToShape(input_dims), input, DimsToShape(output_dims),
+            output);
+}
+
+template <typename T, ComparisonFn<T> F>
+inline void Comparison(const T* input1_data, const Dims<4>& input1_dims,
+                       const T* input2_data, const Dims<4>& input2_dims,
+                       bool* output_data, const Dims<4>& output_dims) {
+  ComparisonParams op_params;
+  // No parameters needed.
+  ComparisonImpl<T, F>(op_params, DimsToShape(input1_dims), input1_data,
+                       DimsToShape(input2_dims), input2_data,
+                       DimsToShape(output_dims), output_data);
+}
+
+template <typename T, ComparisonFn<int32> F>
+inline void Comparison(int left_shift, const T* input1_data,
+                       const Dims<4>& input1_dims, int32 input1_offset,
+                       int32 input1_multiplier, int input1_shift,
+                       const T* input2_data, const Dims<4>& input2_dims,
+                       int32 input2_offset, int32 input2_multiplier,
+                       int input2_shift, bool* output_data,
+                       const Dims<4>& output_dims) {
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = left_shift;
+  op_params.input1_offset = input1_offset;
+  op_params.input1_multiplier = input1_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.input1_shift = kReverseShift * input1_shift;
+  op_params.input2_offset = input2_offset;
+  op_params.input2_multiplier = input2_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.input2_shift = kReverseShift * input2_shift;
+
+  ComparisonWithScaling<T, F>(op_params, DimsToShape(input1_dims), input1_data,
+                              DimsToShape(input2_dims), input2_data,
+                              DimsToShape(output_dims), output_data);
+}
+
+template <typename T, ComparisonFn<T> F>
+inline void BroadcastComparison(const T* input1_data,
+                                const Dims<4>& input1_dims,
+                                const T* input2_data,
+                                const Dims<4>& input2_dims, bool* output_data,
+                                const Dims<4>& output_dims) {
+  ComparisonParams op_params;
+  // No parameters needed.
+  BroadcastComparison4DSlowImpl<T, F>(op_params, DimsToShape(input1_dims),
+                                      input1_data, DimsToShape(input2_dims),
+                                      input2_data, DimsToShape(output_dims),
+                                      output_data);
+}
+
+template <typename T, ComparisonFn<int32> F>
+inline void BroadcastComparison(int left_shift, const T* input1_data,
+                                const Dims<4>& input1_dims, int32 input1_offset,
+                                int32 input1_multiplier, int input1_shift,
+                                const T* input2_data,
+                                const Dims<4>& input2_dims, int32 input2_offset,
+                                int32 input2_multiplier, int input2_shift,
+                                bool* output_data, const Dims<4>& output_dims) {
+  ComparisonParams op_params;
+
+  op_params.left_shift = left_shift;
+  op_params.input1_offset = input1_offset;
+  op_params.input1_multiplier = input1_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.input1_shift = kReverseShift * input1_shift;
+  op_params.input2_offset = input2_offset;
+  op_params.input2_multiplier = input2_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.input2_shift = kReverseShift * input2_shift;
+
+  BroadcastComparison4DSlowWithScaling<T, F>(
+      op_params, DimsToShape(input1_dims), input1_data,
+      DimsToShape(input2_dims), input2_data, DimsToShape(output_dims),
+      output_data);
+}
+
+#define TFLITE_LEGACY_COMPARISON_OP(name)                                     \
+  template <typename T>                                                       \
+  inline void name(const T* input1_data, const Dims<4>& input1_dims,          \
+                   const T* input2_data, const Dims<4>& input2_dims,          \
+                   bool* output_data, const Dims<4>& output_dims) {           \
+    gemmlowp::ScopedProfilingLabel label(#name);                              \
+    Comparison<T, name##Fn>(input1_data, input1_dims, input2_data,            \
+                            input2_dims, output_data, output_dims);           \
+  }                                                                           \
+  template <typename T>                                                       \
+  inline void name(                                                           \
+      int left_shift, const T* input1_data, const Dims<4>& input1_dims,       \
+      int32 input1_offset, int32 input1_multiplier, int input1_shift,         \
+      const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset,  \
+      int32 input2_multiplier, int input2_shift, bool* output_data,           \
+      const Dims<4>& output_dims) {                                           \
+    gemmlowp::ScopedProfilingLabel label(#name "/8bit");                      \
+    Comparison<T, name##Fn>(left_shift, input1_data, input1_dims,             \
+                            input1_offset, input1_multiplier, input1_shift,   \
+                            input2_data, input2_dims, input2_offset,          \
+                            input2_multiplier, input2_shift, output_data,     \
+                            output_dims);                                     \
+  }                                                                           \
+  template <typename T>                                                       \
+  inline void Broadcast##name(                                                \
+      const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, \
+      const Dims<4>& input2_dims, bool* output_data,                          \
+      const Dims<4>& output_dims) {                                           \
+    gemmlowp::ScopedProfilingLabel label("Broadcast" #name);                  \
+    BroadcastComparison<T, name##Fn>(input1_data, input1_dims, input2_data,   \
+                                     input2_dims, output_data, output_dims);  \
+  }                                                                           \
+  template <typename T>                                                       \
+  inline void Broadcast##name(                                                \
+      int left_shift, const T* input1_data, const Dims<4>& input1_dims,       \
+      int32 input1_offset, int32 input1_multiplier, int input1_shift,         \
+      const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset,  \
+      int32 input2_multiplier, int input2_shift, bool* output_data,           \
+      const Dims<4>& output_dims) {                                           \
+    gemmlowp::ScopedProfilingLabel label("Broadcast" #name "/8bit");          \
+    BroadcastComparison<T, name##Fn>(left_shift, input1_data, input1_dims,    \
+                                     input1_offset, input1_multiplier,        \
+                                     input1_shift, input2_data, input2_dims,  \
+                                     input2_offset, input2_multiplier,        \
+                                     input2_shift, output_data, output_dims); \
+  }
+TFLITE_LEGACY_COMPARISON_OP(Equal);
+TFLITE_LEGACY_COMPARISON_OP(NotEqual);
+TFLITE_LEGACY_COMPARISON_OP(Greater);
+TFLITE_LEGACY_COMPARISON_OP(GreaterEqual);
+TFLITE_LEGACY_COMPARISON_OP(Less);
+TFLITE_LEGACY_COMPARISON_OP(LessEqual);
+#undef TFLITE_LEGACY_COMPARISON_OP
+
+template <typename D, typename T>
+inline void Select(const D* input_condition_data,
+                   const Dims<4>& input_condition_dims, const T* input_x_data,
+                   const Dims<4>& input_x_dims, const T* input_y_data,
+                   const Dims<4>& input_y_dims, T* output_data,
+                   const Dims<4>& output_dims) {
+  Select(DimsToShape(input_condition_dims), input_condition_data,
+         DimsToShape(input_x_dims), input_x_data, DimsToShape(input_y_dims),
+         input_y_data, DimsToShape(output_dims), output_data);
+}
+
+template <typename D, typename T>
+inline void RankOneSelect(const D* input_condition_data,
+                          const Dims<4>& input_condition_dims,
+                          const T* input_x_data, const Dims<4>& input_x_dims,
+                          const T* input_y_data, const Dims<4>& input_y_dims,
+                          T* output_data, const Dims<4>& output_dims) {
+  RankOneSelect(DimsToShape(input_condition_dims), input_condition_data,
+                DimsToShape(input_x_dims), input_x_data,
+                DimsToShape(input_y_dims), input_y_data,
+                DimsToShape(output_dims), output_data);
+}
+
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
+                          const T* values, T default_value, T* output_data,
+                          const Dims<4>& output_dims, bool value_is_scalar) {
+  SparseToDense(indices, values, default_value, value_is_scalar,
+                DimsToShape(output_dims), output_data);
+}
+
+template <typename Scalar>
+void Pack(int dim, const Scalar* const* input_data,
+          const Dims<4>* const* input_dims, int inputs_count,
+          Scalar* output_data, const Dims<4>& output_dims) {
+  std::vector<RuntimeShape> input_shapes(inputs_count);
+  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
+  for (int i = 0; i < inputs_count; ++i) {
+    ShapeFromDims(*input_dims[i], &input_shapes[i]);
+    input_shapes_indirect[i] = &input_shapes[i];
+  }
+  tflite::PackParams op_params;
+  op_params.axis = 3 - dim;
+  op_params.inputs_count = inputs_count;
+
+  Pack(op_params, input_shapes_indirect.data(), input_data,
+       DimsToShape(output_dims), output_data);
+}
+
+template <typename Scalar>
+void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims,
+            int dimensions, int outputs_count, Scalar* const* output_datas,
+            const Dims<4>& output_dims) {
+  tflite::UnpackParams op_params;
+  op_params.axis = 3 - axis;
+  op_params.num_split = outputs_count;
+
+  Unpack(op_params, DimsToShape(input_dims), input_data,
+         DimsToShape(output_dims), output_datas);
+}
+
+template <typename Scalar>
+void Pack(int dim, const Scalar* const* input_data,
+          const Dims<4>* const* input_dims, const int32* input_zeropoint,
+          const float* input_scale, int inputs_count, Scalar* output_data,
+          const Dims<4>& output_dims, const int32 output_zeropoint,
+          const float output_scale) {
+  std::vector<RuntimeShape> input_shapes(inputs_count);
+  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
+  for (int i = 0; i < inputs_count; ++i) {
+    ShapeFromDims(*input_dims[i], &input_shapes[i]);
+    input_shapes_indirect[i] = &input_shapes[i];
+  }
+  tflite::PackParams op_params;
+  op_params.axis = 3 - dim;
+  op_params.input_zeropoint = input_zeropoint;
+  op_params.input_scale = input_scale;
+  op_params.inputs_count = inputs_count;
+  op_params.output_zeropoint = output_zeropoint;
+  op_params.output_scale = output_scale;
+
+  PackWithScaling(op_params, input_shapes_indirect.data(), input_data,
+                  DimsToShape(output_dims), output_data);
+}
+
 template <FusedActivationFunctionType Ac>
 void L2Normalization(const float* input_data, const RuntimeShape& input_shape,
                      float* output_data, const RuntimeShape& output_shape) {
@@ -342,7 +1408,6 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
               DimsToShape(output_dims), output_data);
 }
 
-// Legacy.
 // Transitional version that will be moved shortly to legacy_reference_ops, as
 // part of RuntimeShape revisions.
 inline void BroadcastMul4DSlow(const uint8* input1_data,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index f3f1595035..59f17ae854 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -231,83 +231,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Conv(const float* input_data, const Dims<4>& input_dims,
-                 const float* filter_data, const Dims<4>& filter_dims,
-                 const float* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 float output_activation_min, float output_activation_max,
-                 float* output_data, const Dims<4>& output_dims,
-                 float* im2col_data, const Dims<4>& im2col_dims) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
-       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-       output_data, DimsToShape(im2col_dims), im2col_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <FusedActivationFunctionType Ac>
-void Conv(const float* input_data, const Dims<4>& input_dims,
-          const float* filter_data, const Dims<4>& filter_dims,
-          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
-          int stride_height, int dilation_width_factor,
-          int dilation_height_factor, int pad_width, int pad_height,
-          float* output_data, const Dims<4>& output_dims, float* im2col_data,
-          const Dims<4>& im2col_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
-       stride_width, stride_height, dilation_width_factor,
-       dilation_height_factor, pad_width, pad_height, output_activation_min,
-       output_activation_max, output_data, output_dims, im2col_data,
-       im2col_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void Conv(const float* input_data, const Dims<4>& input_dims,
-          const float* filter_data, const Dims<4>& filter_dims,
-          const float* bias_data, const Dims<4>& bias_dims, int stride_width,
-          int stride_height, int pad_width, int pad_height, float* output_data,
-          const Dims<4>& output_dims, float* im2col_data,
-          const Dims<4>& im2col_dims) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims,
-       stride_width, stride_height, 1, 1, pad_width, pad_height,
-       output_activation_min, output_activation_max, output_data, output_dims,
-       im2col_data, im2col_dims);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void Conv(const float* input_data, const Dims<4>& input_dims,
-          const float* filter_data, const Dims<4>& filter_dims,
-          const float* bias_data, const Dims<4>& bias_dims, int stride,
-          int pad_width, int pad_height, float* output_data,
-          const Dims<4>& output_dims, float* im2col_data,
-          const Dims<4>& im2col_dims) {
-  Conv<Ac>(input_data, input_dims, filter_data, filter_dims, bias_data,
-           bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data,
-           output_dims, im2col_data, im2col_dims);
-}
-
 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
                  const uint8* input_data, const RuntimeShape& filter_shape,
                  const uint8* filter_data, const RuntimeShape& bias_shape,
@@ -391,111 +314,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 int32 output_offset, int32 output_multiplier, int output_shift,
-                 int32 output_activation_min, int32 output_activation_max,
-                 uint8* output_data, const Dims<4>& output_dims,
-                 uint8* im2col_data, const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-  op_params.dilation_width_factor = dilation_width_factor;
-  op_params.dilation_height_factor = dilation_height_factor;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = kReverseShift * output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
-       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
-       output_data, DimsToShape(im2col_dims), im2col_data, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int pad_width,
-                 int pad_height, int32 output_offset, int32 output_multiplier,
-                 int output_shift, int32 output_activation_min,
-                 int32 output_activation_max, uint8* output_data,
-                 const Dims<4>& output_dims, uint8* im2col_data,
-                 const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
-  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
-       filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1,
-       pad_width, pad_height, output_offset, output_multiplier, output_shift,
-       output_activation_min, output_activation_max, output_data, output_dims,
-       im2col_data, im2col_dims, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int pad_width,
-                 int pad_height, int32 output_offset, int32 output_multiplier,
-                 int output_shift, int32 output_activation_min,
-                 int32 output_activation_max, uint8* output_data,
-                 const Dims<4>& output_dims, uint8* im2col_data,
-                 const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
-  static_assert(Ac == FusedActivationFunctionType::kNone ||
-                    Ac == FusedActivationFunctionType::kRelu ||
-                    Ac == FusedActivationFunctionType::kRelu6 ||
-                    Ac == FusedActivationFunctionType::kRelu1,
-                "");
-  if (Ac == FusedActivationFunctionType::kNone) {
-    TFLITE_DCHECK_EQ(output_activation_min, 0);
-    TFLITE_DCHECK_EQ(output_activation_max, 255);
-  }
-  Conv(input_data, input_dims, input_offset, filter_data, filter_dims,
-       filter_offset, bias_data, bias_dims, stride_width, stride_height,
-       pad_width, pad_height, output_offset, output_multiplier, output_shift,
-       output_activation_min, output_activation_max, output_data, output_dims,
-       im2col_data, im2col_dims, gemm_context);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// legacy, for compatibility with old checked-in code
-template <FusedActivationFunctionType Ac>
-void Conv(const uint8* input_data, const Dims<4>& input_dims,
-          int32 input_offset, const uint8* filter_data,
-          const Dims<4>& filter_dims, int32 filter_offset,
-          const int32* bias_data, const Dims<4>& bias_dims, int stride,
-          int pad_width, int pad_height, int32 output_offset,
-          int32 output_multiplier, int output_shift,
-          int32 output_activation_min, int32 output_activation_max,
-          uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data,
-          const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) {
-  Conv<Ac>(input_data, input_dims, input_offset, filter_data, filter_dims,
-           filter_offset, bias_data, bias_dims, stride, stride, pad_width,
-           pad_height, output_offset, output_multiplier, output_shift,
-           output_activation_min, output_activation_max, output_data,
-           output_dims, im2col_data, im2col_dims, gemm_context);
-}
-
 template <typename T>
 inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
                          const RuntimeShape& unextended_input_shape,
@@ -1385,21 +1203,6 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-template <typename T>
-void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims,
-                  const T* input2_data, const Dims<4>& input2_dims,
-                  T output_activation_min, T output_activation_max,
-                  T* output_data, const Dims<4>& output_dims) {
-  tflite::ArithmeticParams op_params;
-  SetActivationParams(output_activation_min, output_activation_max, &op_params);
-
-  BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data,
-                     DimsToShape(input2_dims), input2_data,
-                     DimsToShape(output_dims), output_data);
-}
-
 template <typename T>
 inline void Div(const ArithmeticParams& params,
                 const RuntimeShape& input1_shape, const T* input1_data,
@@ -1418,21 +1221,6 @@ inline void Div(const ArithmeticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-template <typename T>
-inline void Div(const T* input1_data, const Dims<4>& input1_dims,
-                const T* input2_data, const Dims<4>& input2_dims,
-                T output_activation_min, T output_activation_max,
-                T* output_data, const Dims<4>& output_dims) {
-  tflite::ArithmeticParams op_params;
-  SetActivationParams(output_activation_min, output_activation_max, &op_params);
-
-  Div(op_params, DimsToShape(input1_dims), input1_data,
-      DimsToShape(input2_dims), input2_data, DimsToShape(output_dims),
-      output_data);
-}
-
 inline void SubNonBroadcast(const ArithmeticParams& params,
                             const RuntimeShape& input1_shape,
                             const float* input1_data,
@@ -1772,34 +1560,10 @@ inline void Concatenation(const ConcatenationParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-template <FusedActivationFunctionType Ac, typename Scalar>
-inline void Concatenation(int concat_dim, const Scalar* const* input_data,
-                          const Dims<4>* const* input_dims, int inputs_count,
-                          Scalar* output_data, const Dims<4>& output_dims) {
-  // For now we don't have a model with a Concatenation with fused activation.
-  TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone);
-
-  std::vector<RuntimeShape> input_shapes(inputs_count);
-  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
-  for (int i = 0; i < inputs_count; ++i) {
-    ShapeFromDims(*input_dims[i], &input_shapes[i]);
-    input_shapes_indirect[i] = &input_shapes[i];
-  }
-  tflite::ConcatenationParams op_params;
-  op_params.axis = 3 - concat_dim;
-  op_params.inputs_count = inputs_count;
-
-  Concatenation(op_params, input_shapes_indirect.data(), input_data,
-                DimsToShape(output_dims), output_data);
-}
-
 // TODO(prabhumk): This is the same as the optimized implementation.
 // TODO(prabhumk): The quantized implementation of concatentation isn't fully
 // quantized as it takes scale as a floating point value. This should be fixed
 // when optimizng this routine further.
-
 inline void ConcatenationWithScaling(const ConcatenationParams& params,
                                      const RuntimeShape* const* input_shapes,
                                      const uint8* const* input_data,
@@ -1862,33 +1626,6 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-inline void Concatenation(int concat_dim, const uint8* const* input_data,
-                          const Dims<4>* const* input_dims,
-                          const int32* input_zeropoint,
-                          const float* input_scale, int inputs_count,
-                          uint8* output_data, const Dims<4>& output_dims,
-                          const int32 output_zeropoint,
-                          const float output_scale) {
-  std::vector<RuntimeShape> input_shapes(inputs_count);
-  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
-  for (int i = 0; i < inputs_count; ++i) {
-    ShapeFromDims(*input_dims[i], &input_shapes[i]);
-    input_shapes_indirect[i] = &input_shapes[i];
-  }
-  tflite::ConcatenationParams op_params;
-  op_params.axis = 3 - concat_dim;
-  op_params.input_zeropoint = input_zeropoint;
-  op_params.input_scale = input_scale;
-  op_params.inputs_count = inputs_count;
-  op_params.output_zeropoint = output_zeropoint;
-  op_params.output_scale = output_scale;
-
-  ConcatenationWithScaling(op_params, input_shapes_indirect.data(), input_data,
-                           DimsToShape(output_dims), output_data);
-}
-
 template <typename Scalar>
 void Pack(const PackParams& params, const RuntimeShape* const* input_shapes,
           const Scalar* const* input_data, const RuntimeShape& output_shape,
@@ -2002,26 +1739,6 @@ void DepthConcatenation(const ConcatenationParams& params,
                 output_data);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-template <FusedActivationFunctionType Ac, typename Scalar>
-void DepthConcatenation(const Scalar* const* input_data,
-                        const Dims<4>* const* input_dims, int inputs_count,
-                        Scalar* output_data, const Dims<4>& output_dims) {
-  // For now we don't have a model with a Concatenation with fused activation.
-  TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone);
-  std::vector<RuntimeShape> input_shapes(inputs_count);
-  std::vector<const RuntimeShape*> input_shapes_indirect(inputs_count);
-  for (int i = 0; i < inputs_count; ++i) {
-    ShapeFromDims(*input_dims[i], &input_shapes[i]);
-    input_shapes_indirect[i] = &input_shapes[i];
-  }
-  tflite::ConcatenationParams op_params;
-  op_params.inputs_count = inputs_count;
-
-  DepthConcatenation(op_params, input_shapes_indirect.data(), input_data,
-                     DimsToShape(output_dims), output_data);
-}
-
 inline void LstmCell(
     const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
     const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
@@ -2139,31 +1856,6 @@ inline void LstmCell(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
-                     const float* prev_activ_data,
-                     const Dims<4>& prev_activ_dims, const float* weights_data,
-                     const Dims<4>& weights_dims, const float* bias_data,
-                     const Dims<4>& bias_dims, const float* prev_state_data,
-                     const Dims<4>& prev_state_dims, float* output_state_data,
-                     const Dims<4>& output_state_dims, float* output_activ_data,
-                     const Dims<4>& output_activ_dims, float* concat_temp_data,
-                     const Dims<4>& concat_temp_dims, float* activ_temp_data,
-                     const Dims<4>& activ_temp_dims) {
-  tflite::LstmCellParams op_params;
-  // Float LSTM cell does not need parameters to be set: leave untouched.
-
-  LstmCell(op_params, DimsToShape(input_dims), input_data,
-           DimsToShape(prev_activ_dims), prev_activ_data,
-           DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims),
-           bias_data, DimsToShape(prev_state_dims), prev_state_data,
-           DimsToShape(output_state_dims), output_state_data,
-           DimsToShape(output_activ_dims), output_activ_data,
-           DimsToShape(concat_temp_dims), concat_temp_data,
-           DimsToShape(activ_temp_dims), activ_temp_data);
-}
-
 // Quantized LSTM cell implementation.
 // The quantization of the input, output arrays is as follows:
 //  - The input activations are quantized as uint8 on the interval
@@ -2438,37 +2130,6 @@ inline void LstmCell(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <int StateIntegerBits>
-void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
-              const uint8* prev_activ_data_uint8,
-              const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8,
-              const Dims<4>& weights_dims, const int32* bias_data_int32,
-              const Dims<4>& bias_dims, const int16* prev_state_data_int16,
-              const Dims<4>& prev_state_dims, int16* output_state_data_int16,
-              const Dims<4>& output_state_dims, uint8* output_activ_data_uint8,
-              const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8,
-              const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16,
-              const Dims<4>& activ_temp_dims, int32 weights_zero_point,
-              int32 accum_multiplier, int accum_shift,
-              gemmlowp::GemmContext* gemm_context) {
-  tflite::LstmCellParams op_params;
-  op_params.weights_zero_point = weights_zero_point;
-  op_params.accum_multiplier = accum_multiplier;
-  op_params.accum_shift = accum_shift;
-
-  LstmCell<StateIntegerBits>(
-      op_params, DimsToShape(input_dims), input_data_uint8,
-      DimsToShape(prev_activ_dims), prev_activ_data_uint8,
-      DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims),
-      bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16,
-      DimsToShape(output_state_dims), output_state_data_int16,
-      DimsToShape(output_activ_dims), output_activ_data_uint8,
-      DimsToShape(concat_temp_dims), concat_temp_data_uint8,
-      DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context);
-}
-
 template <typename Scalar>
 void Split(const SplitParams& params, const RuntimeShape& input_shape,
            const Scalar* input_data, const RuntimeShape* const* output_shapes,
@@ -2511,45 +2172,6 @@ void Split(const SplitParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-template <typename Scalar>
-void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims,
-                     int axis, int outputs_count, Scalar* const* output_data,
-                     const Dims<4>* const* output_dims) {
-  std::vector<RuntimeShape> output_shapes(outputs_count);
-  std::vector<const RuntimeShape*> output_shapes_indirect(outputs_count);
-  for (int i = 0; i < outputs_count; ++i) {
-    ShapeFromDims(*output_dims[i], &output_shapes[i]);
-    output_shapes_indirect[i] = &output_shapes[i];
-  }
-  tflite::SplitParams op_params;
-  op_params.axis = 3 - axis;
-  op_params.num_split = outputs_count;
-
-  Split(op_params, DimsToShape(input_dims), input_data,
-        output_shapes_indirect.data(), output_data);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-template <FusedActivationFunctionType Ac, typename Scalar>
-void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims,
-                     int outputs_count, Scalar* const* output_data,
-                     const Dims<4>* const* output_dims) {
-  TFLITE_DCHECK_GE(outputs_count, 1);
-  for (int i = 0; i < outputs_count; i++) {
-    /* batches = */ MatchingArraySize(*output_dims[i], 3, input_dims, 3);
-    /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2);
-    /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1);
-  }
-  // For now we don't have a model with a Split with fused activation.
-  TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone);
-
-  TensorFlowSplit(input_data, input_dims, /*axis=*/0, outputs_count,
-                  output_data, output_dims);
-}
-
 inline int NodeOffset(int b, int h, int w, int height, int width) {
   return (b * height + h) * width + w;
 }
@@ -2880,15 +2502,6 @@ inline void LogSoftmax(const SoftmaxParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy
-inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
-                       float* output_data, const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  // No params currently used for float LogSoftmax.
-  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 // Although currently the name of this function says that it cannot handle
 // values less than 1, in practice it can handle as low as 1/x_max, where
 // x_max is the largest representable input.  In other words, the output range
@@ -3093,22 +2706,6 @@ inline void LogSoftmax(const SoftmaxParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
-                       int32 input_multiplier, int32 input_left_shift,
-                       int32 reverse_scaling_divisor,
-                       int32 reverse_scaling_right_shift, int diff_min,
-                       uint8* output_data, const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.input_multiplier = input_multiplier;
-  params.input_left_shift = input_left_shift;
-  params.reverse_scaling_divisor = reverse_scaling_divisor;
-  params.reverse_scaling_right_shift = reverse_scaling_right_shift;
-  params.diff_min = diff_min;
-  LogSoftmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
                      const RuntimeShape& output_shape, float* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -3170,20 +2767,6 @@ inline void Logistic(const LogisticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
-                     int32 input_zero_point, int32 input_range_radius,
-                     int32 input_multiplier, int input_left_shift,
-                     uint8* output_data, const RuntimeShape& output_shape) {
-  LogisticParams params;
-  params.input_zero_point = input_zero_point;
-  params.input_range_radius = input_range_radius;
-  params.input_multiplier = input_multiplier;
-  params.input_left_shift = input_left_shift;
-  Logistic(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Logistic(const LogisticParams& params,
                      const RuntimeShape& input_shape, const int16* input_data,
                      const RuntimeShape& output_shape, int16* output_data) {
@@ -3203,15 +2786,6 @@ inline void Logistic(const LogisticParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Logistic(const RuntimeShape& input_shape, const int16* input_data,
-                     const RuntimeShape& output_shape, int16* output_data) {
-  LogisticParams params;
-  // No params currently needed by int16 Logistic.
-  Logistic(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
                  const RuntimeShape& output_shape, float* output_data) {
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -3275,20 +2849,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
-                 int32 input_zero_point, int32 input_range_radius,
-                 int32 input_multiplier, int input_left_shift,
-                 uint8* output_data, const RuntimeShape& output_shape) {
-  TanhParams params;
-  params.input_zero_point = input_zero_point;
-  params.input_range_radius = input_range_radius;
-  params.input_multiplier = input_multiplier;
-  params.input_left_shift = input_left_shift;
-  Tanh(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
                  const int16* input_data, const RuntimeShape& output_shape,
                  int16* output_data) {
@@ -3323,16 +2883,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
-                 int input_left_shift, int16* output_data,
-                 const RuntimeShape& output_shape) {
-  TanhParams params;
-  params.input_left_shift = input_left_shift;
-  Tanh(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Dequantize(const tflite::DequantizationParams& op_params,
                        const RuntimeShape& input_shape, const uint8* input_data,
                        const RuntimeShape& output_shape, float* output_data) {
@@ -3347,19 +2897,6 @@ inline void Dequantize(const tflite::DequantizationParams& op_params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims,
-                       int32 zero_point, double scale, float* output_data,
-                       const Dims<4>& output_dims) {
-  tflite::DequantizationParams op_params;
-  op_params.zero_point = zero_point;
-  op_params.scale = scale;
-
-  Dequantize(op_params, DimsToShape(input_dims), input_data,
-             DimsToShape(output_dims), output_data);
-}
-
 inline void FakeQuant(const tflite::FakeQuantParams& op_params,
                       const RuntimeShape& input_shape, const float* input_data,
                       const RuntimeShape& output_shape, float* output_data) {
@@ -3383,20 +2920,6 @@ inline void FakeQuant(const tflite::FakeQuantParams& op_params,
                     output_data, flat_size);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-inline void FakeQuant(const float* input_data, const Dims<4>& input_dims,
-                      float rmin, float rmax, int num_bits, float* output_data,
-                      const Dims<4>& output_dims) {
-  tflite::FakeQuantParams op_params;
-  op_params.num_bits = num_bits;
-  op_params.minmax.min = rmin;
-  op_params.minmax.max = rmax;
-
-  FakeQuant(op_params, DimsToShape(input_dims), input_data,
-            DimsToShape(output_dims), output_data);
-}
-
 template <typename SrcT, typename DstT>
 inline void Cast(const RuntimeShape& input_shape, const SrcT* input_data,
                  const RuntimeShape& output_shape, DstT* output_data) {
@@ -3456,23 +2979,6 @@ inline void Gather(const tflite::GatherParams& op_params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4> version.
-// When moving legacy ops to legacy_reference_ops, replace content with looser
-// implementation.
-template <typename T>
-inline void Gather(const T* input_data, const Dims<4>& input_dims,
-                   int input_rank, const int32* coords_data,
-                   const Dims<4>& coords_dims, T* output_data,
-                   const Dims<4>& output_dims) {
-  tflite::GatherParams op_params;
-  op_params.input_rank = input_rank;
-
-  Gather(op_params, DimsToShape(input_dims), input_data,
-         DimsToShape(coords_dims), coords_data, DimsToShape(output_dims),
-         output_data);
-}
-
 template <typename T>
 inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
                            const RuntimeShape& unextended_input_shape,
@@ -3802,58 +3308,6 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline uint32 LegacyReverseBits32(uint32 n) {
-  n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1);
-  n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2);
-  n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4);
-  return (((n & 0xFF) << 24) | ((n & 0xFF00) << 8) | ((n & 0xFF0000) >> 8) |
-          ((n & 0xFF000000) >> 24));
-}
-
-inline void StridedSliceReverseIndices(tflite::StridedSliceParams* p) {
-  TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count);
-  TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count);
-
-  std::reverse(p->start_indices, p->start_indices + p->start_indices_count);
-  std::reverse(p->stop_indices, p->stop_indices + p->stop_indices_count);
-  std::reverse(p->strides, p->strides + p->strides_count);
-
-  p->begin_mask = LegacyReverseBits32(static_cast<uint32>(p->begin_mask)) >>
-                  (32 - p->start_indices_count);
-  p->ellipsis_mask =
-      LegacyReverseBits32(static_cast<uint32>(p->ellipsis_mask)) >>
-      (32 - p->start_indices_count);
-  p->end_mask = LegacyReverseBits32(static_cast<uint32>(p->end_mask)) >>
-                (32 - p->start_indices_count);
-  p->new_axis_mask =
-      LegacyReverseBits32(static_cast<uint32>(p->new_axis_mask)) >>
-      (32 - p->start_indices_count);
-  p->shrink_axis_mask =
-      LegacyReverseBits32(static_cast<uint32>(p->shrink_axis_mask)) >>
-      (32 - p->start_indices_count);
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T>
-inline void StridedSlice(const T* input_data, const Dims<4>& input_dims,
-                         int begin_mask, int end_mask, int shrink_axis_mask,
-                         const std::vector<int>& start_indices,
-                         const std::vector<int>& stop_indices,
-                         const std::vector<int>& strides, T* output_data,
-                         const Dims<4>& output_dims) {
-  TFLITE_DCHECK_EQ(start_indices.size(), 4);
-  auto op_params = strided_slice::BuildStridedSliceParams(
-      begin_mask, end_mask, shrink_axis_mask, start_indices, stop_indices,
-      strides);
-  StridedSliceReverseIndices(&op_params);
-
-  StridedSlice(op_params, DimsToShape(input_dims), input_data,
-               DimsToShape(output_dims), output_data);
-}
-
 template <typename T>
 inline void Slice(const tflite::SliceParams& op_params,
                   const RuntimeShape& input_shape, const T* input_data,
@@ -4119,22 +3573,6 @@ inline void Mean(const tflite::MeanParams& op_params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy Dims<4>.
-template <typename T>
-inline void Mean(const T* input_data, const Dims<4>& input_dims,
-                 const std::vector<int>& reduction_indices, T* output_data,
-                 const Dims<4>& output_dims) {
-  tflite::MeanParams op_params;
-  op_params.axis_count = reduction_indices.size();
-  for (int i = 0; i < op_params.axis_count; ++i) {
-    op_params.axis[i] = reduction_indices[op_params.axis_count - 1 - i];
-  }
-
-  Mean(op_params, DimsToShape(input_dims), input_data, DimsToShape(output_dims),
-       output_data);
-}
-
 // Computes the mean of elements across dimensions given in axis.
 // It does so in two stages, first calculates the sum of elements along the axis
 // then divides it by the number of element in axis for quantized values.
@@ -4392,20 +3830,6 @@ void Transpose(const TransposeParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T>
-void Transpose(const T* input, const Dims<4>& input_dims, T* output,
-               const Dims<4>& output_dims, const int* permuted_axes) {
-  TransposeParams params;
-  params.perm_count = 4;
-  for (int i = 0; i < 4; ++i) {
-    params.perm[i] = 3 - permuted_axes[3 - i];
-  }
-  Transpose(params, DimsToShape(input_dims), input, DimsToShape(output_dims),
-            output);
-}
-
 inline void TransposeConv(
     const ConvParams& params, const RuntimeShape& input_shape,
     const float* input_data, const RuntimeShape& filter_shape,
@@ -4479,27 +3903,6 @@ inline void TransposeConv(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, float* output_data,
-                          const Dims<4>& output_dims, float* im2col_data,
-                          const Dims<4>& im2col_dims) {
-  tflite::ConvParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = pad_width;
-  op_params.padding_values.height = pad_height;
-  op_params.stride_width = stride_width;
-  op_params.stride_height = stride_height;
-
-  TransposeConv(op_params, DimsToShape(input_dims), input_data,
-                DimsToShape(filter_dims), filter_data, DimsToShape(output_dims),
-                output_data, DimsToShape(im2col_dims), im2col_data);
-}
-
 template <typename T>
 inline bool EqualFn(T lhs, T rhs) {
   return lhs == rhs;
@@ -4553,19 +3956,6 @@ inline void Comparison(const ComparisonParams& op_params,
                            input2_data, output_shape, output_data);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T, ComparisonFn<T> F>
-inline void Comparison(const T* input1_data, const Dims<4>& input1_dims,
-                       const T* input2_data, const Dims<4>& input2_dims,
-                       bool* output_data, const Dims<4>& output_dims) {
-  ComparisonParams op_params;
-  // No parameters needed.
-  ComparisonImpl<T, F>(op_params, DimsToShape(input1_dims), input1_data,
-                       DimsToShape(input2_dims), input2_data,
-                       DimsToShape(output_dims), output_data);
-}
-
 template <typename T, ComparisonFn<int32> F>
 inline void ComparisonWithScaling(
     const ComparisonParams& op_params, const RuntimeShape& input1_shape,
@@ -4596,32 +3986,6 @@ inline void ComparisonWithScaling(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T, ComparisonFn<int32> F>
-inline void Comparison(int left_shift, const T* input1_data,
-                       const Dims<4>& input1_dims, int32 input1_offset,
-                       int32 input1_multiplier, int input1_shift,
-                       const T* input2_data, const Dims<4>& input2_dims,
-                       int32 input2_offset, int32 input2_multiplier,
-                       int input2_shift, bool* output_data,
-                       const Dims<4>& output_dims) {
-  tflite::ComparisonParams op_params;
-  op_params.left_shift = left_shift;
-  op_params.input1_offset = input1_offset;
-  op_params.input1_multiplier = input1_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.input1_shift = kReverseShift * input1_shift;
-  op_params.input2_offset = input2_offset;
-  op_params.input2_multiplier = input2_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.input2_shift = kReverseShift * input2_shift;
-
-  ComparisonWithScaling<T, F>(op_params, DimsToShape(input1_dims), input1_data,
-                              DimsToShape(input2_dims), input2_data,
-                              DimsToShape(output_dims), output_data);
-}
-
 template <typename T, ComparisonFn<T> F>
 inline void BroadcastComparison4DSlowImpl(
     const ComparisonParams& op_params,
@@ -4665,22 +4029,6 @@ inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
                                           output_shape, output_data);
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T, ComparisonFn<T> F>
-inline void BroadcastComparison(const T* input1_data,
-                                const Dims<4>& input1_dims,
-                                const T* input2_data,
-                                const Dims<4>& input2_dims, bool* output_data,
-                                const Dims<4>& output_dims) {
-  ComparisonParams op_params;
-  // No parameters needed.
-  BroadcastComparison4DSlowImpl<T, F>(op_params, DimsToShape(input1_dims),
-                                      input1_data, DimsToShape(input2_dims),
-                                      input2_data, DimsToShape(output_dims),
-                                      output_data);
-}
-
 template <typename T, ComparisonFn<int32> F>
 inline void BroadcastComparison4DSlowWithScaling(
     const ComparisonParams& op_params,
@@ -4731,80 +4079,7 @@ inline void BroadcastComparison4DSlowWithScaling(
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T, ComparisonFn<int32> F>
-inline void BroadcastComparison(int left_shift, const T* input1_data,
-                                const Dims<4>& input1_dims, int32 input1_offset,
-                                int32 input1_multiplier, int input1_shift,
-                                const T* input2_data,
-                                const Dims<4>& input2_dims, int32 input2_offset,
-                                int32 input2_multiplier, int input2_shift,
-                                bool* output_data, const Dims<4>& output_dims) {
-  ComparisonParams op_params;
-
-  op_params.left_shift = left_shift;
-  op_params.input1_offset = input1_offset;
-  op_params.input1_multiplier = input1_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.input1_shift = kReverseShift * input1_shift;
-  op_params.input2_offset = input2_offset;
-  op_params.input2_multiplier = input2_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.input2_shift = kReverseShift * input2_shift;
-
-  BroadcastComparison4DSlowWithScaling<T, F>(
-      op_params, DimsToShape(input1_dims), input1_data,
-      DimsToShape(input2_dims), input2_data, DimsToShape(output_dims),
-      output_data);
-}
-
 #define TFLITE_COMPARISON_OP(name)                                             \
-  template <typename T>                                                        \
-  inline void name(const T* input1_data, const Dims<4>& input1_dims,           \
-                   const T* input2_data, const Dims<4>& input2_dims,           \
-                   bool* output_data, const Dims<4>& output_dims) {            \
-    gemmlowp::ScopedProfilingLabel label(#name);                               \
-    Comparison<T, name##Fn>(input1_data, input1_dims, input2_data,             \
-                            input2_dims, output_data, output_dims);            \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void name(                                                            \
-      int left_shift, const T* input1_data, const Dims<4>& input1_dims,        \
-      int32 input1_offset, int32 input1_multiplier, int input1_shift,          \
-      const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset,   \
-      int32 input2_multiplier, int input2_shift, bool* output_data,            \
-      const Dims<4>& output_dims) {                                            \
-    gemmlowp::ScopedProfilingLabel label(#name "/8bit");                       \
-    Comparison<T, name##Fn>(left_shift, input1_data, input1_dims,              \
-                            input1_offset, input1_multiplier, input1_shift,    \
-                            input2_data, input2_dims, input2_offset,           \
-                            input2_multiplier, input2_shift, output_data,      \
-                            output_dims);                                      \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void Broadcast##name(                                                 \
-      const T* input1_data, const Dims<4>& input1_dims, const T* input2_data,  \
-      const Dims<4>& input2_dims, bool* output_data,                           \
-      const Dims<4>& output_dims) {                                            \
-    gemmlowp::ScopedProfilingLabel label("Broadcast" #name);                   \
-    BroadcastComparison<T, name##Fn>(input1_data, input1_dims, input2_data,    \
-                                     input2_dims, output_data, output_dims);   \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void Broadcast##name(                                                 \
-      int left_shift, const T* input1_data, const Dims<4>& input1_dims,        \
-      int32 input1_offset, int32 input1_multiplier, int input1_shift,          \
-      const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset,   \
-      int32 input2_multiplier, int input2_shift, bool* output_data,            \
-      const Dims<4>& output_dims) {                                            \
-    gemmlowp::ScopedProfilingLabel label("Broadcast" #name "/8bit");           \
-    BroadcastComparison<T, name##Fn>(left_shift, input1_data, input1_dims,     \
-                                     input1_offset, input1_multiplier,         \
-                                     input1_shift, input2_data, input2_dims,   \
-                                     input2_offset, input2_multiplier,         \
-                                     input2_shift, output_data, output_dims);  \
-  }                                                                            \
   inline void name(const ComparisonParams& op_params,                          \
                    const RuntimeShape& input1_shape, const float* input1_data, \
                    const RuntimeShape& input2_shape, const float* input2_data, \
@@ -4889,19 +4164,6 @@ void Select(const RuntimeShape& input_condition_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename D, typename T>
-inline void Select(const D* input_condition_data,
-                   const Dims<4>& input_condition_dims, const T* input_x_data,
-                   const Dims<4>& input_x_dims, const T* input_y_data,
-                   const Dims<4>& input_y_dims, T* output_data,
-                   const Dims<4>& output_dims) {
-  Select(DimsToShape(input_condition_dims), input_condition_data,
-         DimsToShape(input_x_dims), input_x_data, DimsToShape(input_y_dims),
-         input_y_data, DimsToShape(output_dims), output_data);
-}
-
 template <typename D, typename T>
 void RankOneSelect(const RuntimeShape& input_condition_shape,
                    const D* input_condition_data,
@@ -4923,20 +4185,6 @@ void RankOneSelect(const RuntimeShape& input_condition_shape,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename D, typename T>
-inline void RankOneSelect(const D* input_condition_data,
-                          const Dims<4>& input_condition_dims,
-                          const T* input_x_data, const Dims<4>& input_x_dims,
-                          const T* input_y_data, const Dims<4>& input_y_dims,
-                          T* output_data, const Dims<4>& output_dims) {
-  RankOneSelect(DimsToShape(input_condition_dims), input_condition_data,
-                DimsToShape(input_x_dims), input_x_data,
-                DimsToShape(input_y_dims), input_y_data,
-                DimsToShape(output_dims), output_data);
-}
-
 // For easy implementation, the indices is always a vector of size-4 vectors.
 template <typename T, typename TI>
 inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
@@ -4978,16 +4226,6 @@ inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-template <typename T, typename TI>
-inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
-                          const T* values, T default_value, T* output_data,
-                          const Dims<4>& output_dims, bool value_is_scalar) {
-  SparseToDense(indices, values, default_value, value_is_scalar,
-                DimsToShape(output_dims), output_data);
-}
-
 template <typename T>
 inline void Pow(const RuntimeShape& input1_shape, const T* input1_data,
                 const RuntimeShape& input2_shape, const T* input2_data,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/softmax.h b/tensorflow/contrib/lite/kernels/internal/reference/softmax.h
index 006174e8db..7d44296134 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/softmax.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/softmax.h
@@ -57,16 +57,6 @@ inline void Softmax(const SoftmaxParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy.
-inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
-                    float beta, float* output_data,
-                    const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.beta = beta;
-  Softmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 inline void Softmax(const SoftmaxParams& params,
                     const RuntimeShape& input_shape, const uint8* input_data,
                     const RuntimeShape& output_shape, uint8* output_data) {
@@ -151,19 +141,6 @@ inline void Softmax(const SoftmaxParams& params,
   }
 }
 
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-// Legacy
-inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
-                    int32 input_beta_multiplier, int32 input_beta_left_shift,
-                    int diff_min, uint8* output_data,
-                    const RuntimeShape& output_shape) {
-  SoftmaxParams params;
-  params.input_multiplier = input_beta_multiplier;
-  params.input_left_shift = input_beta_left_shift;
-  params.diff_min = diff_min;
-  Softmax(params, input_shape, input_data, output_shape, output_data);
-}
-
 // Performs softmax along the input of size (input_size * batch_size).
 inline void Softmax(const float* in, const int input_size, const int batch_size,
                     const float beta, float* out) {
-- 
GitLab


From 370d385c3029a7972ba201c8303942b30f09521c Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Thu, 27 Sep 2018 20:52:53 -0700
Subject: [PATCH 0840/1357] Creating a LinearModel that works with V2 feature
 columns.

In subsequent change I'll change canned estimators to support FeatureColumn V2
and use this LinearModel.

PiperOrigin-RevId: 214882241
---
 .../feature_column/feature_column_v2.py       |  574 ++---
 .../feature_column/feature_column_v2_test.py  | 2042 ++++-------------
 2 files changed, 597 insertions(+), 2019 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 538641c251..a8d5bfb437 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -136,14 +136,11 @@ import six
 
 
 from tensorflow.python.eager import context
-from tensorflow.python.feature_column import feature_column as fc_old
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.engine.base_layer import Layer
-from tensorflow.python.layers import base
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
@@ -153,7 +150,6 @@ from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variable_scope
@@ -245,28 +241,19 @@ class StateManager(object):
     raise NotImplementedError('StateManager.get_resource')
 
 
-class _InputLayerStateManager(StateManager):
-  """Manages the state of InputLayer."""
+class _StateManagerImpl(StateManager):
+  """Manages the state of FeatureLayer and LinearModel."""
 
-  def __init__(self, layer, feature_columns, trainable):
-    """Creates an _InputLayerStateManager object.
+  def __init__(self, layer, trainable):
+    """Creates an _StateManagerImpl object.
 
     Args:
       layer: The input layer this state manager is associated with.
-      feature_columns: List of feature columns for the input layer
       trainable: Whether by default, variables created are trainable or not.
     """
     self._trainable = trainable
     self._layer = layer
-    self._cols_to_vars_map = {}
-    self._cols_to_names_map = {}
-    for column in sorted(feature_columns, key=lambda x: x.name):
-      self._cols_to_vars_map[column] = {}
-      base_name = column.name
-      if isinstance(column, SharedEmbeddingColumn):
-        base_name = column.shared_collection_name
-      with variable_scope.variable_scope(base_name) as vs:
-        self._cols_to_names_map[column] = _strip_leading_slashes(vs.name)
+    self._cols_to_vars_map = collections.defaultdict(lambda: {})
 
   def create_variable(self,
                       feature_column,
@@ -277,19 +264,19 @@ class _InputLayerStateManager(StateManager):
                       initializer=None):
     if name in self._cols_to_vars_map[feature_column]:
       raise ValueError('Variable already exists.')
-    with variable_scope.variable_scope(self._cols_to_names_map[feature_column]):
-      var = self._layer.add_variable(
-          name=name,
-          shape=shape,
-          dtype=dtype,
-          initializer=initializer,
-          trainable=self._trainable and trainable,
-          # TODO(rohanj): Get rid of this hack once we have a mechanism for
-          # specifying a default partitioner for an entire layer. In that case,
-          # the default getter for Layers should work.
-          getter=variable_scope.get_variable)
-      self._cols_to_vars_map[feature_column][name] = var
-      return var
+
+    var = self._layer.add_variable(
+        name=name,
+        shape=shape,
+        dtype=dtype,
+        initializer=initializer,
+        trainable=self._trainable and trainable,
+        # TODO(rohanj): Get rid of this hack once we have a mechanism for
+        # specifying a default partitioner for an entire layer. In that case,
+        # the default getter for Layers should work.
+        getter=variable_scope.get_variable)
+    self._cols_to_vars_map[feature_column][name] = var
+    return var
 
   def get_variable(self, feature_column, name):
     if name in self._cols_to_vars_map[feature_column]:
@@ -313,12 +300,15 @@ class FeatureLayer(Layer):
   keywords_embedded = embedding_column(
       categorical_column_with_hash_bucket("keywords", 10K), dimensions=16)
   columns = [price, keywords_embedded, ...]
-  features = tf.parse_example(..., features=make_parse_example_spec(columns))
   feature_layer = FeatureLayer(columns)
+
+  features = tf.parse_example(..., features=make_parse_example_spec(columns))
   dense_tensor = feature_layer(features)
   for units in [128, 64, 32]:
     dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu)
-  prediction = tf.layers.dense(dense_tensor, 1)."""
+  prediction = tf.layers.dense(dense_tensor, 1).
+  ```
+  """
 
   def __init__(self,
                feature_columns,
@@ -375,8 +365,7 @@ class FeatureLayer(Layer):
     super(FeatureLayer, self).__init__(name=name, trainable=trainable, **kwargs)
 
     self._feature_columns = _normalize_feature_columns(feature_columns)
-    self._state_manager = _InputLayerStateManager(self, self._feature_columns,
-                                                  self.trainable)
+    self._state_manager = _StateManagerImpl(self, self.trainable)
     self._shared_state_manager = shared_state_manager
     for column in sorted(self._feature_columns, key=lambda x: x.name):
       if not isinstance(column, DenseColumn):
@@ -395,7 +384,8 @@ class FeatureLayer(Layer):
         column.create_state(self._shared_state_manager)
       else:
         with variable_scope.variable_scope(None, default_name=self.name):
-          column.create_state(self._state_manager)
+          with variable_scope.variable_scope(None, default_name=column.name):
+            column.create_state(self._state_manager)
       super(FeatureLayer, self).build(None)
 
   def call(self, features, cols_to_output_tensors=None):
@@ -448,20 +438,18 @@ class FeatureLayer(Layer):
     return (input_shape[0], total_elements)
 
 
-def linear_model(features,
-                 feature_columns,
-                 units=1,
-                 sparse_combiner='sum',
-                 weight_collections=None,
-                 trainable=True,
-                 cols_to_vars=None):
-  """Returns a linear prediction `Tensor` based on given `feature_columns`.
+def _strip_leading_slashes(name):
+  return name.rsplit('/', 1)[-1]
+
+
+class LinearModel(Layer):
+  """Produces a linear prediction `Tensor` based on given `feature_columns`.
 
-  This function generates a weighted sum based on output dimension `units`.
+  This layer generates a weighted sum based on output dimension `units`.
   Weighted sum refers to logits in classification problems. It refers to the
   prediction itself for linear regression problems.
 
-  Note on supported columns: `linear_model` treats categorical columns as
+  Note on supported columns: `LinearModel` treats categorical columns as
   `indicator_column`s. To be specific, assume the input as `SparseTensor` looks
   like:
 
@@ -486,308 +474,189 @@ def linear_model(features,
   keywords = categorical_column_with_hash_bucket("keywords", 10K)
   keywords_price = crossed_column('keywords', price_buckets, ...)
   columns = [price_buckets, keywords, keywords_price ...]
+  linear_model = LinearModel(columns)
+
   features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  prediction = linear_model(features, columns)
+  prediction = linear_model(features)
   ```
-
-  Args:
-    features: A mapping from key to tensors. `_FeatureColumn`s look up via these
-      keys. For example `numeric_column('price')` will look at 'price' key in
-      this dict. Values are `Tensor` or `SparseTensor` depending on
-      corresponding `_FeatureColumn`.
-    feature_columns: An iterable containing the FeatureColumns to use as inputs
-      to your model. All items should be instances of classes derived from
-      `_FeatureColumn`s.
-    units: An integer, dimensionality of the output space. Default value is 1.
-    sparse_combiner: A string specifying how to reduce if a categorical column
-      is multivalent. Except `numeric_column`, almost all columns passed to
-      `linear_model` are considered as categorical columns.  It combines each
-      categorical column independently. Currently "mean", "sqrtn" and "sum" are
-      supported, with "sum" the default for linear model. "sqrtn" often achieves
-      good accuracy, in particular with bag-of-words columns.
-        * "sum": do not normalize features in the column
-        * "mean": do l1 normalization on features in the column
-        * "sqrtn": do l2 normalization on features in the column
-      For example, for two features represented as the categorical columns:
-
-      ```python
-        # Feature 1
-
-        shape = [2, 2]
-        {
-            [0, 0]: "a"
-            [0, 1]: "b"
-            [1, 0]: "c"
-        }
-
-        # Feature 2
-
-        shape = [2, 3]
-        {
-            [0, 0]: "d"
-            [1, 0]: "e"
-            [1, 1]: "f"
-            [1, 2]: "g"
-        }
-      ```
-      with `sparse_combiner` as "mean", the linear model outputs conceptly are:
-      ```
-        y_0 = 1.0 / 2.0 * ( w_a + w_ b) + w_c + b_0
-        y_1 = w_d + 1.0 / 3.0 * ( w_e + w_ f + w_g) + b_1
-      ```
-      where `y_i` is the output, `b_i` is the bias, and `w_x` is the weight
-      assigned to the presence of `x` in the input features.
-    weight_collections: A list of collection names to which the Variable will be
-      added. Note that, variables will also be added to collections
-      `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
-    trainable: If `True` also add the variable to the graph collection
-      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-    cols_to_vars: If not `None`, must be a dictionary that will be filled with a
-      mapping from `_FeatureColumn` to associated list of `Variable`s.  For
-      example, after the call, we might have cols_to_vars = {
-        _NumericColumn(
-          key='numeric_feature1', shape=(1,):
-        [<tf.Variable 'linear_model/price2/weights:0' shape=(1, 1)>],
-        'bias': [<tf.Variable 'linear_model/bias_weights:0' shape=(1,)>],
-        _NumericColumn(
-          key='numeric_feature2', shape=(2,)):
-        [<tf.Variable 'linear_model/price1/weights:0' shape=(2, 1)>]}
-      If a column creates no variables, its value will be an empty list. Note
-      that cols_to_vars will also contain a string key 'bias' that maps to a
-      list of Variables.
-
-  Returns:
-    A `Tensor` which represents predictions/logits of a linear model. Its shape
-    is (batch_size, units) and its dtype is `float32`.
-
-  Raises:
-    ValueError: if an item in `feature_columns` is neither a `_DenseColumn`
-      nor `_CategoricalColumn`.
-  """
-  with variable_scope.variable_scope(None, 'linear_model') as vs:
-    model_name = _strip_leading_slashes(vs.name)
-  linear_model_layer = _LinearModel(
-      feature_columns=feature_columns,
-      units=units,
-      sparse_combiner=sparse_combiner,
-      weight_collections=weight_collections,
-      trainable=trainable,
-      name=model_name)
-  retval = linear_model_layer(features)  # pylint: disable=not-callable
-  if cols_to_vars is not None:
-    cols_to_vars.update(linear_model_layer.cols_to_vars())
-  return retval
-
-
-def _add_to_collections(var, weight_collections):
-  """Adds a var to the list of weight_collections provided.
-
-  Handles the case for partitioned and non-partitioned variables.
-
-  Args:
-    var: A variable or Partitioned Variable.
-    weight_collections: List of collections to add variable to.
-  """
-  for weight_collection in weight_collections:
-    # The layer self.add_variable call already adds it to GLOBAL_VARIABLES.
-    if weight_collection == ops.GraphKeys.GLOBAL_VARIABLES:
-      continue
-    # TODO(rohanj): Explore adding a _get_variable_list method on `Variable`
-    # so that we don't have to do this check.
-    if isinstance(var, variables.PartitionedVariable):
-      for constituent_var in list(var):
-        ops.add_to_collection(weight_collection, constituent_var)
-    else:
-      ops.add_to_collection(weight_collection, var)
-
-
-class _FCLinearWrapper(base.Layer):
-  """Wraps a _FeatureColumn in a layer for use in a linear model.
-
-  See `linear_model` above.
   """
 
   def __init__(self,
-               feature_column,
+               feature_columns,
                units=1,
                sparse_combiner='sum',
-               weight_collections=None,
                trainable=True,
                name=None,
+               shared_state_manager=None,
                **kwargs):
-    super(_FCLinearWrapper, self).__init__(
-        trainable=trainable, name=name, **kwargs)
-    self._feature_column = feature_column
-    self._units = units
-    self._sparse_combiner = sparse_combiner
-    self._weight_collections = weight_collections
+    """Constructs a LinearModel.
 
-  def build(self, _):
-    if isinstance(self._feature_column, fc_old._CategoricalColumn):  # pylint: disable=protected-access
-      weight = self.add_variable(
-          name='weights',
-          shape=(self._feature_column._num_buckets, self._units),  # pylint: disable=protected-access
-          initializer=init_ops.zeros_initializer(),
-          trainable=self.trainable)
-    else:
-      num_elements = self._feature_column._variable_shape.num_elements()  # pylint: disable=protected-access
-      weight = self.add_variable(
-          name='weights',
-          shape=[num_elements, self._units],
-          initializer=init_ops.zeros_initializer(),
-          trainable=self.trainable)
-    _add_to_collections(weight, self._weight_collections)
-    self._weight_var = weight
-    self.built = True
-
-  def call(self, builder):
-    weighted_sum = fc_old._create_weighted_sum(  # pylint: disable=protected-access
-        column=self._feature_column,
-        builder=builder,
-        units=self._units,
-        sparse_combiner=self._sparse_combiner,
-        weight_collections=self._weight_collections,
-        trainable=self.trainable,
-        weight_var=self._weight_var)
-    return weighted_sum
+    Args:
+      feature_columns: An iterable containing the FeatureColumns to use as
+        inputs to your model. All items should be instances of classes derived
+        from `_FeatureColumn`s.
+      units: An integer, dimensionality of the output space. Default value is 1.
+      sparse_combiner: A string specifying how to reduce if a categorical column
+        is multivalent. Except `numeric_column`, almost all columns passed to
+        `linear_model` are considered as categorical columns.  It combines each
+        categorical column independently. Currently "mean", "sqrtn" and "sum"
+        are supported, with "sum" the default for linear model. "sqrtn" often
+        achieves good accuracy, in particular with bag-of-words columns.
+          * "sum": do not normalize features in the column
+          * "mean": do l1 normalization on features in the column
+          * "sqrtn": do l2 normalization on features in the column
+        For example, for two features represented as the categorical columns:
+
+          ```python
+          # Feature 1
+
+          shape = [2, 2]
+          {
+              [0, 0]: "a"
+              [0, 1]: "b"
+              [1, 0]: "c"
+          }
+
+          # Feature 2
+
+          shape = [2, 3]
+          {
+              [0, 0]: "d"
+              [1, 0]: "e"
+              [1, 1]: "f"
+              [1, 2]: "g"
+          }
+          ```
+
+        with `sparse_combiner` as "mean", the linear model outputs conceptly are
+        ```
+        y_0 = 1.0 / 2.0 * ( w_a + w_ b) + w_c + b_0
+        y_1 = w_d + 1.0 / 3.0 * ( w_e + w_ f + w_g) + b_1
+        ```
+        where `y_i` is the output, `b_i` is the bias, and `w_x` is the weight
+        assigned to the presence of `x` in the input features.
+      trainable: If `True` also add the variable to the graph collection
+        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+      name: Name to give to the Linear Model. All variables and ops created will
+        be scoped by this name.
+      shared_state_manager: SharedEmbeddingStateManager that manages the state
+        of SharedEmbeddingColumns. For more info, look at `FeatureLayer`.
+      **kwargs: Keyword arguments to construct a layer.
 
+    Raises:
+      ValueError: if an item in `feature_columns` is neither a `DenseColumn`
+        nor `CategoricalColumn`.
+    """
+    super(LinearModel, self).__init__(name=name, trainable=trainable, **kwargs)
 
-class _BiasLayer(base.Layer):
-  """A layer for the bias term.
-  """
+    self._feature_columns = _normalize_feature_columns(feature_columns)
+    self._feature_columns = sorted(self._feature_columns, key=lambda x: x.name)
+    for column in self._feature_columns:
+      if not isinstance(column, (DenseColumn, CategoricalColumn)):
+        raise ValueError(
+            'Items of feature_columns must be either a '
+            'DenseColumn or CategoricalColumn. Given: {}'.format(column))
 
-  def __init__(self,
-               units=1,
-               trainable=True,
-               weight_collections=None,
-               name=None,
-               **kwargs):
-    super(_BiasLayer, self).__init__(trainable=trainable, name=name, **kwargs)
     self._units = units
-    self._weight_collections = weight_collections
-
-  def build(self, _):
-    self._bias_variable = self.add_variable(
-        'bias_weights',
-        shape=[self._units],
-        initializer=init_ops.zeros_initializer(),
-        trainable=self.trainable)
-    _add_to_collections(self._bias_variable, self._weight_collections)
-    self.built = True
-
-  def call(self, _):
-    return self._bias_variable
+    self._sparse_combiner = sparse_combiner
 
+    self._state_manager = _StateManagerImpl(self, self.trainable)
+    self._shared_state_manager = shared_state_manager
+    self._bias_variable = None
 
-def _get_expanded_variable_list(var_list):
-  returned_list = []
-  for variable in var_list:
-    if (isinstance(variable, variables.Variable) or
-        resource_variable_ops.is_resource_variable(variable)):
-      returned_list.append(variable)  # Single variable case.
-    else:  # Must be a PartitionedVariable, so convert into a list.
-      returned_list.extend(list(variable))
-  return returned_list
+  def build(self, _):
+    # Create state for shared embedding columns.
+    for column in self._feature_columns:
+      if isinstance(column, SharedEmbeddingColumn):
+        column.create_state(self._shared_state_manager)
 
+    # We need variable scopes for now because we want the variable partitioning
+    # information to percolate down. We also use _pure_variable_scope's here
+    # since we want to open up a name_scope in the `call` method while creating
+    # the ops.
+    with variable_scope._pure_variable_scope(self.name):  # pylint: disable=protected-access
+      for column in self._feature_columns:
+        with variable_scope._pure_variable_scope(column.name):  # pylint: disable=protected-access
+          # Create the state for each feature column
+          if not isinstance(column, SharedEmbeddingColumn):
+            column.create_state(self._state_manager)
+
+          # Create a weight variable for each column.
+          if isinstance(column, CategoricalColumn):
+            first_dim = column.num_buckets
+          else:
+            first_dim = column.variable_shape.num_elements()
+          self._state_manager.create_variable(
+              column,
+              name='weights',
+              dtype=dtypes.float32,
+              shape=(first_dim, self._units),
+              initializer=init_ops.zeros_initializer(),
+              trainable=self.trainable)
+
+      # Create a bias variable.
+      self._bias_variable = self.add_variable(
+          name='bias_weights',
+          dtype=dtypes.float32,
+          shape=[self._units],
+          initializer=init_ops.zeros_initializer(),
+          trainable=self.trainable,
+          # TODO(rohanj): Get rid of this hack once we have a mechanism for
+          # specifying a default partitioner for an entire layer. In that case,
+          # the default getter for Layers should work.
+          getter=variable_scope.get_variable)
 
-def _strip_leading_slashes(name):
-  return name.rsplit('/', 1)[-1]
+    super(LinearModel, self).build(None)
 
+  def call(self, features):
+    """Returns a `Tensor` the represents the predictions of a linear model.
 
-class _LinearModel(training.Model):
-  """Creates a linear model using feature columns.
+    Args:
+      features: A mapping from key to tensors. `_FeatureColumn`s look up via
+        these keys. For example `numeric_column('price')` will look at 'price'
+        key in this dict. Values are `Tensor` or `SparseTensor` depending on
+        corresponding `_FeatureColumn`.
 
-  See `linear_model` for details.
-  """
+    Returns:
+      A `Tensor` which represents predictions/logits of a linear model. Its
+      shape is (batch_size, units) and its dtype is `float32`.
 
-  def __init__(self,
-               feature_columns,
-               units=1,
-               sparse_combiner='sum',
-               weight_collections=None,
-               trainable=True,
-               name=None,
-               **kwargs):
-    super(_LinearModel, self).__init__(name=name, **kwargs)
-    self._feature_columns = fc_old._normalize_feature_columns(  # pylint: disable=protected-access
-        feature_columns)
-    self._weight_collections = list(weight_collections or [])
-    if ops.GraphKeys.GLOBAL_VARIABLES not in self._weight_collections:
-      self._weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
-    if ops.GraphKeys.MODEL_VARIABLES not in self._weight_collections:
-      self._weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
-
-    column_layers = {}
-    for column in sorted(self._feature_columns, key=lambda x: x.name):
-      with variable_scope.variable_scope(
-          None, default_name=column._var_scope_name) as vs:  # pylint: disable=protected-access
-        # Having the fully expressed variable scope name ends up doubly
-        # expressing the outer scope (scope with which this method was called)
-        # in the name of the variable that would get created.
-        column_name = _strip_leading_slashes(vs.name)
-      column_layer = _FCLinearWrapper(column, units, sparse_combiner,
-                                      self._weight_collections, trainable,
-                                      column_name, **kwargs)
-      column_layers[column_name] = column_layer
-    self._column_layers = self._add_layers(column_layers)
-    self._bias_layer = _BiasLayer(
-        units=units,
-        trainable=trainable,
-        weight_collections=self._weight_collections,
-        name='bias_layer',
-        **kwargs)
-    self._cols_to_vars = {}
-
-  def cols_to_vars(self):
-    """Returns a dict mapping _FeatureColumns to variables.
-
-    See `linear_model` for more information.
-    This is not populated till `call` is called i.e. layer is built.
+    Raises:
+      ValueError: If features are not a dictionary.
     """
-    return self._cols_to_vars
-
-  def call(self, features):
-    with variable_scope.variable_scope(self.name):
-      for column in self._feature_columns:
-        if not isinstance(
-            column,
-            (
-                fc_old._DenseColumn,  # pylint: disable=protected-access
-                fc_old._CategoricalColumn)):  # pylint: disable=protected-access
-          raise ValueError(
-              'Items of feature_columns must be either a '
-              '_DenseColumn or _CategoricalColumn. Given: {}'.format(column))
-      weighted_sums = []
-      ordered_columns = []
-      builder = fc_old._LazyBuilder(features)  # pylint: disable=protected-access
-      for layer in sorted(self._column_layers.values(), key=lambda x: x.name):
-        column = layer._feature_column  # pylint: disable=protected-access
-        ordered_columns.append(column)
-        weighted_sum = layer(builder)
+    if not isinstance(features, dict):
+      raise ValueError('We expected a dictionary here. Instead we got: ',
+                       features)
+    transformation_cache = FeatureTransformationCache(features)
+    weighted_sums = []
+    for column in self._feature_columns:
+      with ops.name_scope(column.name):
+        # All the weights used in the linear model are owned by the state
+        # manager associated with this Linear Model.
+        weight_var = self._state_manager.get_variable(column, 'weights')
+
+        # The embedding weights for the SharedEmbeddingColumn are owned by
+        # the shared_state_manager and so we need to pass that in while
+        # creating the weighted sum. For all other columns, the state is owned
+        # by the Linear Model's state manager.
+        if isinstance(column, SharedEmbeddingColumn):
+          state_manager = self._shared_state_manager
+        else:
+          state_manager = self._state_manager
+        weighted_sum = _create_weighted_sum(
+            column=column,
+            transformation_cache=transformation_cache,
+            state_manager=state_manager,
+            sparse_combiner=self._sparse_combiner,
+            weight_var=weight_var)
         weighted_sums.append(weighted_sum)
-        self._cols_to_vars[column] = ops.get_collection(
-            ops.GraphKeys.GLOBAL_VARIABLES, scope=layer.scope_name)
-
-      _verify_static_batch_size_equality(weighted_sums, ordered_columns)
-      predictions_no_bias = math_ops.add_n(
-          weighted_sums, name='weighted_sum_no_bias')
-      predictions = nn_ops.bias_add(
-          predictions_no_bias,
-          self._bias_layer(  # pylint: disable=not-callable
-              builder,
-              scope=variable_scope.get_variable_scope()),  # pylint: disable=not-callable
-          name='weighted_sum')
-      bias = self._bias_layer.variables[0]
-      self._cols_to_vars['bias'] = _get_expanded_variable_list([bias])
-    return predictions
 
-  def _add_layers(self, layers):
-    # "Magic" required for keras.Model classes to track all the variables in
-    # a list of layers.Layer objects.
-    # TODO(ashankar): Figure out API so user code doesn't have to do this.
-    for name, layer in layers.items():
-      setattr(self, 'layer-%s' % name, layer)
-    return layers
+    _verify_static_batch_size_equality(weighted_sums, self._feature_columns)
+    predictions_no_bias = math_ops.add_n(
+        weighted_sums, name='weighted_sum_no_bias')
+    predictions = nn_ops.bias_add(
+        predictions_no_bias, self._bias_variable, name='weighted_sum')
+    return predictions
 
 
 def _transform_features(features, feature_columns, state_manager):
@@ -2053,58 +1922,32 @@ def is_feature_column_v2(feature_columns):
   return True
 
 
-def _create_weighted_sum(column,
-                         transformation_cache,
-                         state_manager,
-                         units,
-                         sparse_combiner,
-                         weight_collections,
-                         trainable,
-                         weight_var=None):
+def _create_weighted_sum(column, transformation_cache, state_manager,
+                         sparse_combiner, weight_var):
   """Creates a weighted sum for a dense/categorical column for linear_model."""
   if isinstance(column, CategoricalColumn):
     return _create_categorical_column_weighted_sum(
         column=column,
         transformation_cache=transformation_cache,
         state_manager=state_manager,
-        units=units,
         sparse_combiner=sparse_combiner,
-        weight_collections=weight_collections,
-        trainable=trainable,
         weight_var=weight_var)
   else:
     return _create_dense_column_weighted_sum(
         column=column,
         transformation_cache=transformation_cache,
         state_manager=state_manager,
-        units=units,
-        weight_collections=weight_collections,
-        trainable=trainable,
         weight_var=weight_var)
 
 
-def _create_dense_column_weighted_sum(column,
-                                      transformation_cache,
-                                      state_manager,
-                                      units,
-                                      weight_collections,
-                                      trainable,
-                                      weight_var=None):
+def _create_dense_column_weighted_sum(column, transformation_cache,
+                                      state_manager, weight_var):
   """Create a weighted sum of a dense column for linear_model."""
   tensor = column.get_dense_tensor(transformation_cache, state_manager)
   num_elements = column.variable_shape.num_elements()
   batch_size = array_ops.shape(tensor)[0]
   tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
-  if weight_var is not None:
-    weight = weight_var
-  else:
-    weight = variable_scope.get_variable(
-        name='weights',
-        shape=[num_elements, units],
-        initializer=init_ops.zeros_initializer(),
-        trainable=trainable,
-        collections=weight_collections)
-  return math_ops.matmul(tensor, weight, name='weighted_sum')
+  return math_ops.matmul(tensor, weight_var, name='weighted_sum')
 
 
 class CategoricalColumn(FeatureColumn):
@@ -2145,14 +1988,8 @@ class CategoricalColumn(FeatureColumn):
     pass
 
 
-def _create_categorical_column_weighted_sum(column,
-                                            transformation_cache,
-                                            state_manager,
-                                            units,
-                                            sparse_combiner,
-                                            weight_collections,
-                                            trainable,
-                                            weight_var=None):
+def _create_categorical_column_weighted_sum(
+    column, transformation_cache, state_manager, sparse_combiner, weight_var):
   # pylint: disable=g-doc-return-or-yield,g-doc-args
   """Create a weighted sum of a categorical column for linear_model.
 
@@ -2191,17 +2028,8 @@ def _create_categorical_column_weighted_sum(column,
     weight_tensor = sparse_ops.sparse_reshape(
         weight_tensor, [array_ops.shape(weight_tensor)[0], -1])
 
-  if weight_var is not None:
-    weight = weight_var
-  else:
-    weight = variable_scope.get_variable(
-        name='weights',
-        shape=(column.num_buckets, units),
-        initializer=init_ops.zeros_initializer(),
-        trainable=trainable,
-        collections=weight_collections)
   return _safe_embedding_lookup_sparse(
-      weight,
+      weight_var,
       id_tensor,
       sparse_weights=weight_tensor,
       combiner=sparse_combiner,
@@ -2836,6 +2664,10 @@ class SharedEmbeddingColumn(
 
   def create_state(self, state_manager):
     """Creates the shared embedding lookup variable."""
+    if not isinstance(state_manager, SharedEmbeddingStateManager):
+      raise ValueError('Expected state_manager to be of type '
+                       'SharedEmbeddingStateManager. Obtained type: {}'.format(
+                           type(state_manager)))
     embedding_shape = (self.categorical_column.num_buckets, self.dimension)
     state_manager.create_variable(
         name=self.shared_collection_name,
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index 2970431167..a13a5010e1 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -31,9 +31,7 @@ from tensorflow.python.client import session
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column as fc_old
 from tensorflow.python.feature_column import feature_column_v2 as fc
-from tensorflow.python.feature_column.feature_column_v2 import _LinearModel
 from tensorflow.python.feature_column.feature_column_v2 import _transform_features
 from tensorflow.python.feature_column.feature_column_v2 import FeatureColumn
 from tensorflow.python.feature_column.feature_column_v2 import FeatureLayer
@@ -48,7 +46,6 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
@@ -360,26 +357,12 @@ class NumericColumnTest(test.TestCase):
     self.assertEqual(a.default_value, ((3., 2.),))
 
   def test_linear_model(self):
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default():
-      features = {'price': [[1.], [5.]]}
-      predictions = fc.linear_model(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias.eval())
-        self.assertAllClose([[0.]], price_var.eval())
-        self.assertAllClose([[0.], [0.]], predictions.eval())
-        sess.run(price_var.assign([[10.]]))
-        self.assertAllClose([[10.], [50.]], predictions.eval())
-
-  def test_keras_linear_model(self):
-    price = fc_old.numeric_column('price')
+    price = fc.numeric_column('price')
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
-      predictions = get_keras_linear_model_predictions(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([price])
+      predictions = model(features)
+      price_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([0.], bias.eval())
         self.assertAllClose([[0.]], price_var.eval())
@@ -564,13 +547,13 @@ class BucketizedColumnTest(test.TestCase):
 
   def test_linear_model_one_input_value(self):
     """Tests linear_model() for input with shape=[1]."""
-    price = fc_old.numeric_column('price', shape=[1])
-    bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    price = fc.numeric_column('price', shape=[1])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
       features = {'price': [[-1.], [1.], [5.], [6.]]}
-      predictions = fc.linear_model(features, [bucketized_price])
-      bias = get_linear_model_bias()
-      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      model = fc.LinearModel([bucketized_price])
+      predictions = model(features)
+      bucketized_price_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([0.], bias.eval())
         # One weight variable per bucket, all initialized to zero.
@@ -589,13 +572,13 @@ class BucketizedColumnTest(test.TestCase):
 
   def test_linear_model_two_input_values(self):
     """Tests linear_model() for input with shape=[2]."""
-    price = fc_old.numeric_column('price', shape=[2])
-    bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    price = fc.numeric_column('price', shape=[2])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
       features = {'price': [[-1., 1.], [5., 6.]]}
-      predictions = fc.linear_model(features, [bucketized_price])
-      bias = get_linear_model_bias()
-      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      model = fc.LinearModel([bucketized_price])
+      predictions = model(features)
+      bucketized_price_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([0.], bias.eval())
         # One weight per bucket per input column, all initialized to zero.
@@ -616,62 +599,6 @@ class BucketizedColumnTest(test.TestCase):
         sess.run(bias.assign([1.]))
         self.assertAllClose([[81.], [141.]], predictions.eval())
 
-  def test_keras_linear_model_one_input_value(self):
-    """Tests _LinearModel for input with shape=[1]."""
-    price = fc_old.numeric_column('price', shape=[1])
-    bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6])
-    with ops.Graph().as_default():
-      features = {'price': [[-1.], [1.], [5.], [6.]]}
-      predictions = get_keras_linear_model_predictions(features,
-                                                       [bucketized_price])
-      bias = get_linear_model_bias()
-      bucketized_price_var = get_linear_model_column_var(bucketized_price)
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias.eval())
-        # One weight variable per bucket, all initialized to zero.
-        self.assertAllClose([[0.], [0.], [0.], [0.], [0.]],
-                            bucketized_price_var.eval())
-        self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval())
-        sess.run(
-            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]]))
-        # price -1. is in the 0th bucket, whose weight is 10.
-        # price 1. is in the 1st bucket, whose weight is 20.
-        # price 5. is in the 3rd bucket, whose weight is 40.
-        # price 6. is in the 4th bucket, whose weight is 50.
-        self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval())
-        sess.run(bias.assign([1.]))
-        self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval())
-
-  def test_keras_linear_model_two_input_values(self):
-    """Tests _LinearModel for input with shape=[2]."""
-    price = fc_old.numeric_column('price', shape=[2])
-    bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6])
-    with ops.Graph().as_default():
-      features = {'price': [[-1., 1.], [5., 6.]]}
-      predictions = get_keras_linear_model_predictions(features,
-                                                       [bucketized_price])
-      bias = get_linear_model_bias()
-      bucketized_price_var = get_linear_model_column_var(bucketized_price)
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias.eval())
-        # One weight per bucket per input column, all initialized to zero.
-        self.assertAllClose(
-            [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]],
-            bucketized_price_var.eval())
-        self.assertAllClose([[0.], [0.]], predictions.eval())
-        sess.run(
-            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.],
-                                         [60.], [70.], [80.], [90.], [100.]]))
-        # 1st example:
-        #   price -1. is in the 0th bucket, whose weight is 10.
-        #   price 1. is in the 6th bucket, whose weight is 70.
-        # 2nd example:
-        #   price 5. is in the 3rd bucket, whose weight is 40.
-        #   price 6. is in the 9th bucket, whose weight is 100.
-        self.assertAllClose([[80.], [140.]], predictions.eval())
-        sess.run(bias.assign([1.]))
-        self.assertAllClose([[81.], [141.]], predictions.eval())
-
 
 class HashedCategoricalColumnTest(test.TestCase):
 
@@ -852,39 +779,18 @@ class HashedCategoricalColumnTest(test.TestCase):
         transformation_cache.get(hashed_sparse, None), id_weight_pair.id_tensor)
 
   def test_linear_model(self):
-    wire_column = fc_old.categorical_column_with_hash_bucket('wire', 4)
-    self.assertEqual(4, wire_column._num_buckets)
-    with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          wire_column.name: sparse_tensor.SparseTensorValue(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=('marlo', 'skywalker', 'omar'),
-              dense_shape=(2, 2))
-      }, (wire_column,))
-      bias = get_linear_model_bias()
-      wire_var = get_linear_model_column_var(wire_column)
-      with _initialized_session():
-        self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
-        # 'marlo' -> 3: wire_var[3] = 4
-        # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6
-        self.assertAllClose(((4.,), (6.,)), predictions.eval())
-
-  def test_keras_linear_model(self):
-    wire_column = fc_old.categorical_column_with_hash_bucket('wire', 4)
-    self.assertEqual(4, wire_column._num_buckets)
+    wire_column = fc.categorical_column_with_hash_bucket('wire', 4)
+    self.assertEqual(4, wire_column.num_buckets)
     with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
+      model = fc.LinearModel((wire_column,))
+      predictions = model({
           wire_column.name:
               sparse_tensor.SparseTensorValue(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=('marlo', 'skywalker', 'omar'),
                   dense_shape=(2, 2))
-      }, (wire_column,))
-      bias = get_linear_model_bias()
-      wire_var = get_linear_model_column_var(wire_column)
+      })
+      wire_var, bias = model.variables
       with _initialized_session():
         self.assertAllClose((0.,), bias.eval())
         self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
@@ -1103,93 +1009,12 @@ class CrossedColumnTest(test.TestCase):
 
     Uses data from test_get_sparse_tesnsors_simple.
     """
-    a = fc_old.numeric_column('a', dtype=dtypes.int32, shape=(2,))
-    b = fc_old.bucketized_column(a, boundaries=(0, 1))
-    crossed = fc_old.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
-    with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          'a': constant_op.constant(((-1., .5), (.5, 1.))),
-          'c': sparse_tensor.SparseTensor(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=['cA', 'cB', 'cC'],
-              dense_shape=(2, 2)),
-      }, (crossed,))
-      bias = get_linear_model_bias()
-      crossed_var = get_linear_model_column_var(crossed)
-      with _initialized_session() as sess:
-        self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(
-            ((0.,), (0.,), (0.,), (0.,), (0.,)), crossed_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,))))
-        # Expected ids after cross = (1, 0, 1, 3, 4, 2)
-        self.assertAllClose(((3.,), (14.,)), predictions.eval())
-        sess.run(bias.assign((.1,)))
-        self.assertAllClose(((3.1,), (14.1,)), predictions.eval())
-
-  def test_linear_model_with_weights(self):
-
-    class _TestColumnWithWeights(fc_old._CategoricalColumn):
-      """Produces sparse IDs and sparse weights."""
-
-      @property
-      def name(self):
-        return 'test_column'
-
-      @property
-      def _parse_example_spec(self):
-        return {
-            self.name: parsing_ops.VarLenFeature(dtypes.int32),
-            '{}_weights'.format(self.name): parsing_ops.VarLenFeature(
-                dtypes.float32),
-            }
-
-      @property
-      def _num_buckets(self):
-        return 5
-
-      def _transform_feature(self, inputs):
-        return (inputs.get(self.name),
-                inputs.get('{}_weights'.format(self.name)))
-
-      def _get_sparse_tensors(self, inputs, weight_collections=None,
-                              trainable=None):
-        """Populates both id_tensor and weight_tensor."""
-        ids_and_weights = inputs.get(self)
-        return fc_old._CategoricalColumn.IdWeightPair(
-            id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1])
-
-    t = _TestColumnWithWeights()
-    crossed = fc_old.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5)
-    with ops.Graph().as_default():
-      with self.assertRaisesRegexp(
-          ValueError,
-          'crossed_column does not support weight_tensor.*{}'.format(t.name)):
-        fc.linear_model({
-            t.name: sparse_tensor.SparseTensor(
-                indices=((0, 0), (1, 0), (1, 1)),
-                values=[0, 1, 2],
-                dense_shape=(2, 2)),
-            '{}_weights'.format(t.name): sparse_tensor.SparseTensor(
-                indices=((0, 0), (1, 0), (1, 1)),
-                values=[1., 10., 2.],
-                dense_shape=(2, 2)),
-            'c': sparse_tensor.SparseTensor(
-                indices=((0, 0), (1, 0), (1, 1)),
-                values=['cA', 'cB', 'cC'],
-                dense_shape=(2, 2)),
-        }, (crossed,))
-
-  def test_keras_linear_model(self):
-    """Tests _LinearModel.
-
-    Uses data from test_get_sparse_tesnsors_simple.
-    """
-    a = fc_old.numeric_column('a', dtype=dtypes.int32, shape=(2,))
-    b = fc_old.bucketized_column(a, boundaries=(0, 1))
-    crossed = fc_old.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
+    a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,))
+    b = fc.bucketized_column(a, boundaries=(0, 1))
+    crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
     with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
+      model = fc.LinearModel((crossed,))
+      predictions = model({
           'a':
               constant_op.constant(((-1., .5), (.5, 1.))),
           'c':
@@ -1197,847 +1022,126 @@ class CrossedColumnTest(test.TestCase):
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=['cA', 'cB', 'cC'],
                   dense_shape=(2, 2)),
-      }, (crossed,))
-      bias = get_linear_model_bias()
-      crossed_var = get_linear_model_column_var(crossed)
+      })
+      crossed_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)),
-                            crossed_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,))))
-        # Expected ids after cross = (1, 0, 1, 3, 4, 2)
-        self.assertAllClose(((3.,), (14.,)), predictions.eval())
-        sess.run(bias.assign((.1,)))
-        self.assertAllClose(((3.1,), (14.1,)), predictions.eval())
-
-  def test_keras_linear_model_with_weights(self):
-
-    class _TestColumnWithWeights(fc_old._CategoricalColumn):
-      """Produces sparse IDs and sparse weights."""
-
-      @property
-      def name(self):
-        return 'test_column'
-
-      @property
-      def _parse_example_spec(self):
-        return {
-            self.name:
-                parsing_ops.VarLenFeature(dtypes.int32),
-            '{}_weights'.format(self.name):
-                parsing_ops.VarLenFeature(dtypes.float32),
-        }
-
-      @property
-      def _num_buckets(self):
-        return 5
-
-      def _transform_feature(self, inputs):
-        return (inputs.get(self.name),
-                inputs.get('{}_weights'.format(self.name)))
-
-      def _get_sparse_tensors(self,
-                              inputs,
-                              weight_collections=None,
-                              trainable=None):
-        """Populates both id_tensor and weight_tensor."""
-        ids_and_weights = inputs.get(self)
-        return fc_old._CategoricalColumn.IdWeightPair(
-            id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1])
-
-    t = _TestColumnWithWeights()
-    crossed = fc_old.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5)
-    with ops.Graph().as_default():
-      with self.assertRaisesRegexp(
-          ValueError,
-          'crossed_column does not support weight_tensor.*{}'.format(t.name)):
-        get_keras_linear_model_predictions({
-            t.name:
-                sparse_tensor.SparseTensor(
-                    indices=((0, 0), (1, 0), (1, 1)),
-                    values=[0, 1, 2],
-                    dense_shape=(2, 2)),
-            '{}_weights'.format(t.name):
-                sparse_tensor.SparseTensor(
-                    indices=((0, 0), (1, 0), (1, 1)),
-                    values=[1., 10., 2.],
-                    dense_shape=(2, 2)),
-            'c':
-                sparse_tensor.SparseTensor(
-                    indices=((0, 0), (1, 0), (1, 1)),
-                    values=['cA', 'cB', 'cC'],
-                    dense_shape=(2, 2)),
-        }, (crossed,))
-
-
-def get_linear_model_bias(name='linear_model'):
-  with variable_scope.variable_scope(name, reuse=True):
-    return variable_scope.get_variable('bias_weights')
-
-
-def get_linear_model_column_var(column, name='linear_model'):
-  return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
-                            name + '/' + column.name)[0]
-
-
-def get_keras_linear_model_predictions(features,
-                                       feature_columns,
-                                       units=1,
-                                       sparse_combiner='sum',
-                                       weight_collections=None,
-                                       trainable=True,
-                                       cols_to_vars=None):
-  keras_linear_model = _LinearModel(
-      feature_columns,
-      units,
-      sparse_combiner,
-      weight_collections,
-      trainable,
-      name='linear_model')
-  retval = keras_linear_model(features)  # pylint: disable=not-callable
-  if cols_to_vars is not None:
-    cols_to_vars.update(keras_linear_model.cols_to_vars())
-  return retval
-
-
-class LinearModelTest(test.TestCase):
-
-  def test_raises_if_empty_feature_columns(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'feature_columns must not be empty'):
-      fc.linear_model(features={}, feature_columns=[])
-
-  def test_should_be_feature_column(self):
-    with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'):
-      fc.linear_model(features={'a': [[0]]}, feature_columns='NotSupported')
-
-  def test_should_be_dense_or_categorical_column(self):
-
-    class NotSupportedColumn(fc_old._FeatureColumn):
-
-      @property
-      def name(self):
-        return 'NotSupportedColumn'
-
-      def _transform_feature(self, cache):
-        pass
-
-      @property
-      def _parse_example_spec(self):
-        pass
-
-    with self.assertRaisesRegexp(
-        ValueError, 'must be either a _DenseColumn or _CategoricalColumn'):
-      fc.linear_model(
-          features={'a': [[0]]}, feature_columns=[NotSupportedColumn()])
-
-  def test_does_not_support_dict_columns(self):
-    with self.assertRaisesRegexp(
-        ValueError, 'Expected feature_columns to be iterable, found dict.'):
-      fc.linear_model(
-          features={'a': [[0]]},
-          feature_columns={'a': fc_old.numeric_column('a')})
-
-  def test_raises_if_duplicate_name(self):
-    with self.assertRaisesRegexp(
-        ValueError, 'Duplicate feature column name found for columns'):
-      fc.linear_model(
-          features={'a': [[0]]},
-          feature_columns=[
-              fc_old.numeric_column('a'),
-              fc_old.numeric_column('a')
-          ])
-
-  def test_dense_bias(self):
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default():
-      features = {'price': [[1.], [5.]]}
-      predictions = fc.linear_model(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias.eval())
-        sess.run(price_var.assign([[10.]]))
-        sess.run(bias.assign([5.]))
-        self.assertAllClose([[15.], [55.]], predictions.eval())
-
-  def test_sparse_bias(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default():
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
-          indices=[[0, 0], [1, 0], [1, 1]],
-          dense_shape=[2, 2])
-      features = {'wire_cast': wire_tensor}
-      predictions = fc.linear_model(features, [wire_cast])
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias.eval())
-        self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval())
-        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
-        sess.run(bias.assign([5.]))
-        self.assertAllClose([[1005.], [10015.]], predictions.eval())
-
-  def test_dense_and_sparse_bias(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default():
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
-          indices=[[0, 0], [1, 0], [1, 1]],
-          dense_shape=[2, 2])
-      features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]}
-      predictions = fc.linear_model(features, [wire_cast, price])
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      price_var = get_linear_model_column_var(price)
-      with _initialized_session() as sess:
-        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
-        sess.run(bias.assign([5.]))
-        sess.run(price_var.assign([[10.]]))
-        self.assertAllClose([[1015.], [10065.]], predictions.eval())
-
-  def test_dense_and_sparse_column(self):
-    """When the column is both dense and sparse, uses sparse tensors."""
-
-    class _DenseAndSparseColumn(fc_old._DenseColumn, fc_old._CategoricalColumn):
-
-      @property
-      def name(self):
-        return 'dense_and_sparse_column'
-
-      @property
-      def _parse_example_spec(self):
-        return {self.name: parsing_ops.VarLenFeature(self.dtype)}
-
-      def _transform_feature(self, inputs):
-        return inputs.get(self.name)
-
-      @property
-      def _variable_shape(self):
-        raise ValueError('Should not use this method.')
-
-      def _get_dense_tensor(self, inputs, weight_collections=None,
-                            trainable=None):
-        raise ValueError('Should not use this method.')
-
-      @property
-      def _num_buckets(self):
-        return 4
-
-      def _get_sparse_tensors(self, inputs, weight_collections=None,
-                              trainable=None):
-        sp_tensor = sparse_tensor.SparseTensor(
-            indices=[[0, 0], [1, 0], [1, 1]],
-            values=[2, 0, 3],
-            dense_shape=[2, 2])
-        return fc_old._CategoricalColumn.IdWeightPair(sp_tensor, None)
-
-    dense_and_sparse_column = _DenseAndSparseColumn()
-    with ops.Graph().as_default():
-      sp_tensor = sparse_tensor.SparseTensor(
-          values=['omar', 'stringer', 'marlo'],
-          indices=[[0, 0], [1, 0], [1, 1]],
-          dense_shape=[2, 2])
-      features = {dense_and_sparse_column.name: sp_tensor}
-      predictions = fc.linear_model(features, [dense_and_sparse_column])
-      bias = get_linear_model_bias()
-      dense_and_sparse_column_var = get_linear_model_column_var(
-          dense_and_sparse_column)
-      with _initialized_session() as sess:
-        sess.run(dense_and_sparse_column_var.assign(
-            [[10.], [100.], [1000.], [10000.]]))
-        sess.run(bias.assign([5.]))
-        self.assertAllClose([[1005.], [10015.]], predictions.eval())
-
-  def test_dense_multi_output(self):
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default():
-      features = {'price': [[1.], [5.]]}
-      predictions = fc.linear_model(features, [price], units=3)
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      with _initialized_session() as sess:
-        self.assertAllClose(np.zeros((3,)), bias.eval())
-        self.assertAllClose(np.zeros((1, 3)), price_var.eval())
-        sess.run(price_var.assign([[10., 100., 1000.]]))
-        sess.run(bias.assign([5., 6., 7.]))
-        self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]],
-                            predictions.eval())
-
-  def test_sparse_multi_output(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default():
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
-          indices=[[0, 0], [1, 0], [1, 1]],
-          dense_shape=[2, 2])
-      features = {'wire_cast': wire_tensor}
-      predictions = fc.linear_model(features, [wire_cast], units=3)
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      with _initialized_session() as sess:
-        self.assertAllClose(np.zeros((3,)), bias.eval())
-        self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval())
-        sess.run(
-            wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], [
-                1000., 1100., 1200.
-            ], [10000., 11000., 12000.]]))
-        sess.run(bias.assign([5., 6., 7.]))
-        self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]],
-                            predictions.eval())
-
-  def test_dense_multi_dimension(self):
-    price = fc_old.numeric_column('price', shape=2)
-    with ops.Graph().as_default():
-      features = {'price': [[1., 2.], [5., 6.]]}
-      predictions = fc.linear_model(features, [price])
-      price_var = get_linear_model_column_var(price)
-      with _initialized_session() as sess:
-        self.assertAllClose([[0.], [0.]], price_var.eval())
-        sess.run(price_var.assign([[10.], [100.]]))
-        self.assertAllClose([[210.], [650.]], predictions.eval())
-
-  def test_sparse_multi_rank(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default():
-      wire_tensor = array_ops.sparse_placeholder(dtypes.string)
-      wire_value = sparse_tensor.SparseTensorValue(
-          values=['omar', 'stringer', 'marlo', 'omar'],  # hashed = [2, 0, 3, 2]
-          indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]],
-          dense_shape=[2, 2, 2])
-      features = {'wire_cast': wire_tensor}
-      predictions = fc.linear_model(features, [wire_cast])
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      with _initialized_session() as sess:
-        self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval())
-        self.assertAllClose(
-            np.zeros((2, 1)),
-            predictions.eval(feed_dict={wire_tensor: wire_value}))
-        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
         self.assertAllClose(
-            [[1010.], [11000.]],
-            predictions.eval(feed_dict={wire_tensor: wire_value}))
-
-  def test_sparse_combiner(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default():
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
-          indices=[[0, 0], [1, 0], [1, 1]],
-          dense_shape=[2, 2])
-      features = {'wire_cast': wire_tensor}
-      predictions = fc.linear_model(
-          features, [wire_cast], sparse_combiner='mean')
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      with _initialized_session() as sess:
-        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
-        sess.run(bias.assign([5.]))
-        self.assertAllClose([[1005.], [5010.]], predictions.eval())
-
-  def test_sparse_combiner_with_negative_weights(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    wire_cast_weights = fc_old.weighted_categorical_column(wire_cast, 'weights')
-
-    with ops.Graph().as_default():
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
-          indices=[[0, 0], [1, 0], [1, 1]],
-          dense_shape=[2, 2])
-      features = {
-          'wire_cast': wire_tensor,
-          'weights': constant_op.constant([[1., 1., -1.0]])
-      }
-      predictions = fc.linear_model(
-          features, [wire_cast_weights], sparse_combiner='sum')
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      with _initialized_session() as sess:
-        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
-        sess.run(bias.assign([5.]))
-        self.assertAllClose([[1005.], [-9985.]], predictions.eval())
-
-  def test_dense_multi_dimension_multi_output(self):
-    price = fc_old.numeric_column('price', shape=2)
-    with ops.Graph().as_default():
-      features = {'price': [[1., 2.], [5., 6.]]}
-      predictions = fc.linear_model(features, [price], units=3)
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      with _initialized_session() as sess:
-        self.assertAllClose(np.zeros((3,)), bias.eval())
-        self.assertAllClose(np.zeros((2, 3)), price_var.eval())
-        sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]]))
-        sess.run(bias.assign([2., 3., 4.]))
-        self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]],
-                            predictions.eval())
-
-  def test_raises_if_shape_mismatch(self):
-    price = fc_old.numeric_column('price', shape=2)
-    with ops.Graph().as_default():
-      features = {'price': [[1.], [5.]]}
-      with self.assertRaisesRegexp(
-          Exception,
-          r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
-        fc.linear_model(features, [price])
-
-  def test_dense_reshaping(self):
-    price = fc_old.numeric_column('price', shape=[1, 2])
-    with ops.Graph().as_default():
-      features = {'price': [[[1., 2.]], [[5., 6.]]]}
-      predictions = fc.linear_model(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias.eval())
-        self.assertAllClose([[0.], [0.]], price_var.eval())
-        self.assertAllClose([[0.], [0.]], predictions.eval())
-        sess.run(price_var.assign([[10.], [100.]]))
-        self.assertAllClose([[210.], [650.]], predictions.eval())
-
-  def test_dense_multi_column(self):
-    price1 = fc_old.numeric_column('price1', shape=2)
-    price2 = fc_old.numeric_column('price2')
-    with ops.Graph().as_default():
-      features = {
-          'price1': [[1., 2.], [5., 6.]],
-          'price2': [[3.], [4.]]
-      }
-      predictions = fc.linear_model(features, [price1, price2])
-      bias = get_linear_model_bias()
-      price1_var = get_linear_model_column_var(price1)
-      price2_var = get_linear_model_column_var(price2)
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias.eval())
-        self.assertAllClose([[0.], [0.]], price1_var.eval())
-        self.assertAllClose([[0.]], price2_var.eval())
-        self.assertAllClose([[0.], [0.]], predictions.eval())
-        sess.run(price1_var.assign([[10.], [100.]]))
-        sess.run(price2_var.assign([[1000.]]))
-        sess.run(bias.assign([7.]))
-        self.assertAllClose([[3217.], [4657.]], predictions.eval())
-
-  def test_fills_cols_to_vars(self):
-    price1 = fc_old.numeric_column('price1', shape=2)
-    price2 = fc_old.numeric_column('price2')
-    with ops.Graph().as_default():
-      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
-      cols_to_vars = {}
-      fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars)
-      bias = get_linear_model_bias()
-      price1_var = get_linear_model_column_var(price1)
-      price2_var = get_linear_model_column_var(price2)
-      self.assertAllEqual(cols_to_vars['bias'], [bias])
-      self.assertAllEqual(cols_to_vars[price1], [price1_var])
-      self.assertAllEqual(cols_to_vars[price2], [price2_var])
-
-  def test_fills_cols_to_vars_partitioned_variables(self):
-    price1 = fc_old.numeric_column('price1', shape=2)
-    price2 = fc_old.numeric_column('price2', shape=3)
-    with ops.Graph().as_default():
-      features = {
-          'price1': [[1., 2.], [6., 7.]],
-          'price2': [[3., 4., 5.], [8., 9., 10.]]
-      }
-      cols_to_vars = {}
-      with variable_scope.variable_scope(
-          'linear',
-          partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)):
-        fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars)
-      with _initialized_session():
-        self.assertEqual([0.], cols_to_vars['bias'][0].eval())
-        # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables.
-        self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval())
-        self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval())
-        # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and
-        # a [1, 1] Variable.
-        self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval())
-        self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval())
-
-  def test_dense_collection(self):
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default() as g:
-      features = {'price': [[1.], [5.]]}
-      fc.linear_model(features, [price], weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      self.assertIn(bias, my_vars)
-      self.assertIn(price_var, my_vars)
-
-  def test_sparse_collection(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default() as g:
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-      features = {'wire_cast': wire_tensor}
-      fc.linear_model(
-          features, [wire_cast], weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      self.assertIn(bias, my_vars)
-      self.assertIn(wire_cast_var, my_vars)
-
-  def test_dense_trainable_default(self):
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default() as g:
-      features = {'price': [[1.], [5.]]}
-      fc.linear_model(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertIn(bias, trainable_vars)
-      self.assertIn(price_var, trainable_vars)
-
-  def test_sparse_trainable_default(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default() as g:
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-      features = {'wire_cast': wire_tensor}
-      fc.linear_model(features, [wire_cast])
-      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      self.assertIn(bias, trainable_vars)
-      self.assertIn(wire_cast_var, trainable_vars)
-
-  def test_dense_trainable_false(self):
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default() as g:
-      features = {'price': [[1.], [5.]]}
-      fc.linear_model(features, [price], trainable=False)
-      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertEqual([], trainable_vars)
-
-  def test_sparse_trainable_false(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default() as g:
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-      features = {'wire_cast': wire_tensor}
-      fc.linear_model(features, [wire_cast], trainable=False)
-      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertEqual([], trainable_vars)
-
-  def test_column_order(self):
-    price_a = fc_old.numeric_column('price_a')
-    price_b = fc_old.numeric_column('price_b')
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default() as g:
-      features = {
-          'price_a': [[1.]],
-          'price_b': [[3.]],
-          'wire_cast':
-              sparse_tensor.SparseTensor(
-                  values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-      }
-      fc.linear_model(
-          features, [price_a, wire_cast, price_b],
-          weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
-      self.assertIn('price_a', my_vars[0].name)
-      self.assertIn('price_b', my_vars[1].name)
-      self.assertIn('wire_cast', my_vars[2].name)
-
-    with ops.Graph().as_default() as g:
-      features = {
-          'price_a': [[1.]],
-          'price_b': [[3.]],
-          'wire_cast':
-              sparse_tensor.SparseTensor(
-                  values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-      }
-      fc.linear_model(
-          features, [wire_cast, price_b, price_a],
-          weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
-      self.assertIn('price_a', my_vars[0].name)
-      self.assertIn('price_b', my_vars[1].name)
-      self.assertIn('wire_cast', my_vars[2].name)
-
-  def test_static_batch_size_mismatch(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
-    with ops.Graph().as_default():
-      features = {
-          'price1': [[1.], [5.], [7.]],  # batchsize = 3
-          'price2': [[3.], [4.]]  # batchsize = 2
-      }
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-      fc.linear_model(features, [price1, price2])
-
-  def test_subset_of_static_batch_size_mismatch(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
-    price3 = fc_old.numeric_column('price3')
-    with ops.Graph().as_default():
-      features = {
-          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
-          'price2': [[3.], [4.]],  # batchsize = 2
-          'price3': [[3.], [4.], [5.]]  # batchsize = 3
-      }
-      with self.assertRaisesRegexp(
-          ValueError,
-          'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-        fc.linear_model(features, [price1, price2, price3])
-
-  def test_runtime_batch_size_mismatch(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
-    with ops.Graph().as_default():
-      features = {
-          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
-          'price2': [[3.], [4.]]  # batchsize = 2
-      }
-      predictions = fc.linear_model(features, [price1, price2])
-      with _initialized_session() as sess:
-        with self.assertRaisesRegexp(errors.OpError,
-                                     'must have the same size and shape'):
-          sess.run(
-              predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]})
-
-  def test_runtime_batch_size_matches(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
-    with ops.Graph().as_default():
-      features = {
-          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
-          'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
-      }
-      predictions = fc.linear_model(features, [price1, price2])
-      with _initialized_session() as sess:
-        sess.run(
-            predictions,
-            feed_dict={
-                features['price1']: [[1.], [5.]],
-                features['price2']: [[1.], [5.]],
-            })
-
-  def test_with_numpy_input_fn(self):
-    price = fc_old.numeric_column('price')
-    price_buckets = fc_old.bucketized_column(
-        price, boundaries=[
-            0.,
-            10.,
-            100.,
-        ])
-    body_style = fc_old.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'price': np.array([-1., 2., 13., 104.]),
-            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
-        },
-        batch_size=2,
-        shuffle=False)
-    features = input_fn()
-    net = fc.linear_model(features, [price_buckets, body_style])
-    # self.assertEqual(1 + 3 + 5, net.shape[1])
-    with _initialized_session() as sess:
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-
-      bias = get_linear_model_bias()
-      price_buckets_var = get_linear_model_column_var(price_buckets)
-      body_style_var = get_linear_model_column_var(body_style)
-
-      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
-      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
-      sess.run(bias.assign([5.]))
-
-      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def test_with_1d_sparse_tensor(self):
-    price = fc_old.numeric_column('price')
-    price_buckets = fc_old.bucketized_column(
-        price, boundaries=[
-            0.,
-            10.,
-            100.,
-        ])
-    body_style = fc_old.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-
-    # Provides 1-dim tensor and dense tensor.
-    features = {
-        'price': constant_op.constant([-1., 12.,]),
-        'body-style': sparse_tensor.SparseTensor(
-            indices=((0,), (1,)),
-            values=('sedan', 'hardtop'),
-            dense_shape=(2,)),
-    }
-    self.assertEqual(1, features['price'].shape.ndims)
-    self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
-
-    net = fc.linear_model(features, [price_buckets, body_style])
-    with _initialized_session() as sess:
-      bias = get_linear_model_bias()
-      price_buckets_var = get_linear_model_column_var(price_buckets)
-      body_style_var = get_linear_model_column_var(body_style)
-
-      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
-      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
-      sess.run(bias.assign([5.]))
-
-      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
-
-  def test_with_1d_unknown_shape_sparse_tensor(self):
-    price = fc_old.numeric_column('price')
-    price_buckets = fc_old.bucketized_column(
-        price, boundaries=[
-            0.,
-            10.,
-            100.,
-        ])
-    body_style = fc_old.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-    country = fc_old.categorical_column_with_vocabulary_list(
-        'country', vocabulary_list=['US', 'JP', 'CA'])
+            ((0.,), (0.,), (0.,), (0.,), (0.,)), crossed_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,))))
+        # Expected ids after cross = (1, 0, 1, 3, 4, 2)
+        self.assertAllClose(((3.,), (14.,)), predictions.eval())
+        sess.run(bias.assign((.1,)))
+        self.assertAllClose(((3.1,), (14.1,)), predictions.eval())
 
-    # Provides 1-dim tensor and dense tensor.
-    features = {
-        'price': array_ops.placeholder(dtypes.float32),
-        'body-style': array_ops.sparse_placeholder(dtypes.string),
-        'country': array_ops.placeholder(dtypes.string),
-    }
-    self.assertIsNone(features['price'].shape.ndims)
-    self.assertIsNone(features['body-style'].get_shape().ndims)
+  def test_linear_model_with_weights(self):
 
-    price_data = np.array([-1., 12.])
-    body_style_data = sparse_tensor.SparseTensorValue(
-        indices=((0,), (1,)),
-        values=('sedan', 'hardtop'),
-        dense_shape=(2,))
-    country_data = np.array(['US', 'CA'])
+    class _TestColumnWithWeights(fc.CategoricalColumn):
+      """Produces sparse IDs and sparse weights."""
 
-    net = fc.linear_model(features, [price_buckets, body_style, country])
-    bias = get_linear_model_bias()
-    price_buckets_var = get_linear_model_column_var(price_buckets)
-    body_style_var = get_linear_model_column_var(body_style)
-    with _initialized_session() as sess:
-      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
-      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
-      sess.run(bias.assign([5.]))
+      @property
+      def name(self):
+        return 'test_column'
 
-      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
-                          sess.run(
-                              net,
-                              feed_dict={
-                                  features['price']: price_data,
-                                  features['body-style']: body_style_data,
-                                  features['country']: country_data
-                              }))
+      @property
+      def parse_example_spec(self):
+        return {
+            self.name: parsing_ops.VarLenFeature(dtypes.int32),
+            '{}_weights'.format(self.name): parsing_ops.VarLenFeature(
+                dtypes.float32),
+            }
 
-  def test_with_rank_0_feature(self):
-    price = fc_old.numeric_column('price')
-    features = {
-        'price': constant_op.constant(0),
-    }
-    self.assertEqual(0, features['price'].shape.ndims)
+      @property
+      def num_buckets(self):
+        return 5
 
-    # Static rank 0 should fail
-    with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
-      fc.linear_model(features, [price])
+      def transform_feature(self, transformation_cache, state_manager):
+        return (transformation_cache.get(self.name, state_manager),
+                transformation_cache.get('{}_weights'.format(self.name),
+                                         state_manager))
 
-    # Dynamic rank 0 should fail
-    features = {
-        'price': array_ops.placeholder(dtypes.float32),
-    }
-    net = fc.linear_model(features, [price])
-    self.assertEqual(1, net.shape[1])
-    with _initialized_session() as sess:
-      with self.assertRaisesOpError('Feature .* cannot have rank 0'):
-        sess.run(net, feed_dict={features['price']: np.array(1)})
+      def get_sparse_tensors(self, transformation_cache, state_manager):
+        """Populates both id_tensor and weight_tensor."""
+        ids_and_weights = transformation_cache.get(self, state_manager)
+        return fc.CategoricalColumn.IdWeightPair(
+            id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1])
 
-  def test_multiple_linear_models(self):
-    price = fc_old.numeric_column('price')
+    t = _TestColumnWithWeights()
+    crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5)
     with ops.Graph().as_default():
-      features1 = {'price': [[1.], [5.]]}
-      features2 = {'price': [[2.], [10.]]}
-      predictions1 = fc.linear_model(features1, [price])
-      predictions2 = fc.linear_model(features2, [price])
-      bias1 = get_linear_model_bias(name='linear_model')
-      bias2 = get_linear_model_bias(name='linear_model_1')
-      price_var1 = get_linear_model_column_var(price, name='linear_model')
-      price_var2 = get_linear_model_column_var(price, name='linear_model_1')
-      with _initialized_session() as sess:
-        self.assertAllClose([0.], bias1.eval())
-        sess.run(price_var1.assign([[10.]]))
-        sess.run(bias1.assign([5.]))
-        self.assertAllClose([[15.], [55.]], predictions1.eval())
-        self.assertAllClose([0.], bias2.eval())
-        sess.run(price_var2.assign([[10.]]))
-        sess.run(bias2.assign([5.]))
-        self.assertAllClose([[25.], [105.]], predictions2.eval())
+      with self.assertRaisesRegexp(
+          ValueError,
+          'crossed_column does not support weight_tensor.*{}'.format(t.name)):
+        model = fc.LinearModel((crossed,))
+        model({
+            t.name:
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=[0, 1, 2],
+                    dense_shape=(2, 2)),
+            '{}_weights'.format(t.name):
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=[1., 10., 2.],
+                    dense_shape=(2, 2)),
+            'c':
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=['cA', 'cB', 'cC'],
+                    dense_shape=(2, 2)),
+        })
 
 
-class _LinearModelTest(test.TestCase):
+class LinearModelTest(test.TestCase):
 
   def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegexp(ValueError,
                                  'feature_columns must not be empty'):
-      get_keras_linear_model_predictions(features={}, feature_columns=[])
+      fc.LinearModel(feature_columns=[])
 
   def test_should_be_feature_column(self):
-    with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'):
-      get_keras_linear_model_predictions(
-          features={'a': [[0]]}, feature_columns='NotSupported')
+    with self.assertRaisesRegexp(ValueError, 'must be a FeatureColumn'):
+      fc.LinearModel(feature_columns='NotSupported')
 
   def test_should_be_dense_or_categorical_column(self):
 
-    class NotSupportedColumn(fc_old._FeatureColumn):
+    class NotSupportedColumn(fc.FeatureColumn):
 
       @property
       def name(self):
         return 'NotSupportedColumn'
 
-      def _transform_feature(self, cache):
+      def transform_feature(self, transformation_cache, state_manager):
         pass
 
       @property
-      def _parse_example_spec(self):
+      def parse_example_spec(self):
         pass
 
     with self.assertRaisesRegexp(
-        ValueError, 'must be either a _DenseColumn or _CategoricalColumn'):
-      get_keras_linear_model_predictions(
-          features={'a': [[0]]}, feature_columns=[NotSupportedColumn()])
+        ValueError, 'must be either a DenseColumn or CategoricalColumn'):
+      fc.LinearModel(feature_columns=[NotSupportedColumn()])
 
   def test_does_not_support_dict_columns(self):
     with self.assertRaisesRegexp(
         ValueError, 'Expected feature_columns to be iterable, found dict.'):
-      fc.linear_model(
-          features={'a': [[0]]},
-          feature_columns={'a': fc_old.numeric_column('a')})
+      fc.LinearModel(feature_columns={'a': fc.numeric_column('a')})
 
   def test_raises_if_duplicate_name(self):
     with self.assertRaisesRegexp(
         ValueError, 'Duplicate feature column name found for columns'):
-      get_keras_linear_model_predictions(
-          features={'a': [[0]]},
-          feature_columns=[
-              fc_old.numeric_column('a'),
-              fc_old.numeric_column('a')
-          ])
+      fc.LinearModel(
+          feature_columns=[fc.numeric_column('a'),
+                           fc.numeric_column('a')])
 
   def test_dense_bias(self):
-    price = fc_old.numeric_column('price')
+    price = fc.numeric_column('price')
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
-      predictions = get_keras_linear_model_predictions(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([price])
+      predictions = model(features)
+      price_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([0.], bias.eval())
         sess.run(price_var.assign([[10.]]))
@@ -2045,16 +1149,16 @@ class _LinearModelTest(test.TestCase):
         self.assertAllClose([[15.], [55.]], predictions.eval())
 
   def test_sparse_bias(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
     with ops.Graph().as_default():
       wire_tensor = sparse_tensor.SparseTensor(
           values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
           indices=[[0, 0], [1, 0], [1, 1]],
           dense_shape=[2, 2])
       features = {'wire_cast': wire_tensor}
-      predictions = get_keras_linear_model_predictions(features, [wire_cast])
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
+      model = fc.LinearModel([wire_cast])
+      predictions = model(features)
+      wire_cast_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([0.], bias.eval())
         self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval())
@@ -2063,19 +1167,17 @@ class _LinearModelTest(test.TestCase):
         self.assertAllClose([[1005.], [10015.]], predictions.eval())
 
   def test_dense_and_sparse_bias(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    price = fc_old.numeric_column('price')
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    price = fc.numeric_column('price')
     with ops.Graph().as_default():
       wire_tensor = sparse_tensor.SparseTensor(
           values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
           indices=[[0, 0], [1, 0], [1, 1]],
           dense_shape=[2, 2])
       features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]}
-      predictions = get_keras_linear_model_predictions(features,
-                                                       [wire_cast, price])
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([wire_cast, price])
+      predictions = model(features)
+      price_var, wire_cast_var, bias = model.variables
       with _initialized_session() as sess:
         sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
         sess.run(bias.assign([5.]))
@@ -2085,42 +1187,36 @@ class _LinearModelTest(test.TestCase):
   def test_dense_and_sparse_column(self):
     """When the column is both dense and sparse, uses sparse tensors."""
 
-    class _DenseAndSparseColumn(fc_old._DenseColumn, fc_old._CategoricalColumn):
+    class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn):
 
       @property
       def name(self):
         return 'dense_and_sparse_column'
 
       @property
-      def _parse_example_spec(self):
+      def parse_example_spec(self):
         return {self.name: parsing_ops.VarLenFeature(self.dtype)}
 
-      def _transform_feature(self, inputs):
-        return inputs.get(self.name)
+      def transform_feature(self, transformation_cache, state_manager):
+        return transformation_cache.get(self.name, state_manager)
 
       @property
-      def _variable_shape(self):
+      def variable_shape(self):
         raise ValueError('Should not use this method.')
 
-      def _get_dense_tensor(self,
-                            inputs,
-                            weight_collections=None,
-                            trainable=None):
+      def get_dense_tensor(self, transformation_cache, state_manager):
         raise ValueError('Should not use this method.')
 
       @property
-      def _num_buckets(self):
+      def num_buckets(self):
         return 4
 
-      def _get_sparse_tensors(self,
-                              inputs,
-                              weight_collections=None,
-                              trainable=None):
+      def get_sparse_tensors(self, transformation_cache, state_manager):
         sp_tensor = sparse_tensor.SparseTensor(
             indices=[[0, 0], [1, 0], [1, 1]],
             values=[2, 0, 3],
             dense_shape=[2, 2])
-        return fc_old._CategoricalColumn.IdWeightPair(sp_tensor, None)
+        return fc.CategoricalColumn.IdWeightPair(sp_tensor, None)
 
     dense_and_sparse_column = _DenseAndSparseColumn()
     with ops.Graph().as_default():
@@ -2129,26 +1225,22 @@ class _LinearModelTest(test.TestCase):
           indices=[[0, 0], [1, 0], [1, 1]],
           dense_shape=[2, 2])
       features = {dense_and_sparse_column.name: sp_tensor}
-      predictions = get_keras_linear_model_predictions(
-          features, [dense_and_sparse_column])
-      bias = get_linear_model_bias()
-      dense_and_sparse_column_var = get_linear_model_column_var(
-          dense_and_sparse_column)
+      model = fc.LinearModel([dense_and_sparse_column])
+      predictions = model(features)
+      dense_and_sparse_column_var, bias = model.variables
       with _initialized_session() as sess:
-        sess.run(
-            dense_and_sparse_column_var.assign([[10.], [100.], [1000.],
-                                                [10000.]]))
+        sess.run(dense_and_sparse_column_var.assign(
+            [[10.], [100.], [1000.], [10000.]]))
         sess.run(bias.assign([5.]))
         self.assertAllClose([[1005.], [10015.]], predictions.eval())
 
   def test_dense_multi_output(self):
-    price = fc_old.numeric_column('price')
+    price = fc.numeric_column('price')
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
-      predictions = get_keras_linear_model_predictions(
-          features, [price], units=3)
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([price], units=3)
+      predictions = model(features)
+      price_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose(np.zeros((3,)), bias.eval())
         self.assertAllClose(np.zeros((1, 3)), price_var.eval())
@@ -2158,41 +1250,41 @@ class _LinearModelTest(test.TestCase):
                             predictions.eval())
 
   def test_sparse_multi_output(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
     with ops.Graph().as_default():
       wire_tensor = sparse_tensor.SparseTensor(
           values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
           indices=[[0, 0], [1, 0], [1, 1]],
           dense_shape=[2, 2])
       features = {'wire_cast': wire_tensor}
-      predictions = get_keras_linear_model_predictions(
-          features, [wire_cast], units=3)
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
+      model = fc.LinearModel([wire_cast], units=3)
+      predictions = model(features)
+      wire_cast_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose(np.zeros((3,)), bias.eval())
         self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval())
         sess.run(
-            wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.],
-                                  [1000., 1100.,
-                                   1200.], [10000., 11000., 12000.]]))
+            wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], [
+                1000., 1100., 1200.
+            ], [10000., 11000., 12000.]]))
         sess.run(bias.assign([5., 6., 7.]))
         self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]],
                             predictions.eval())
 
   def test_dense_multi_dimension(self):
-    price = fc_old.numeric_column('price', shape=2)
+    price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
       features = {'price': [[1., 2.], [5., 6.]]}
-      predictions = get_keras_linear_model_predictions(features, [price])
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([price])
+      predictions = model(features)
+      price_var, _ = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([[0.], [0.]], price_var.eval())
         sess.run(price_var.assign([[10.], [100.]]))
         self.assertAllClose([[210.], [650.]], predictions.eval())
 
   def test_sparse_multi_rank(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
     with ops.Graph().as_default():
       wire_tensor = array_ops.sparse_placeholder(dtypes.string)
       wire_value = sparse_tensor.SparseTensorValue(
@@ -2200,8 +1292,9 @@ class _LinearModelTest(test.TestCase):
           indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]],
           dense_shape=[2, 2, 2])
       features = {'wire_cast': wire_tensor}
-      predictions = get_keras_linear_model_predictions(features, [wire_cast])
-      wire_cast_var = get_linear_model_column_var(wire_cast)
+      model = fc.LinearModel([wire_cast])
+      predictions = model(features)
+      wire_cast_var, _ = model.variables
       with _initialized_session() as sess:
         self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval())
         self.assertAllClose(
@@ -2213,30 +1306,49 @@ class _LinearModelTest(test.TestCase):
             predictions.eval(feed_dict={wire_tensor: wire_value}))
 
   def test_sparse_combiner(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
     with ops.Graph().as_default():
       wire_tensor = sparse_tensor.SparseTensor(
           values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
           indices=[[0, 0], [1, 0], [1, 1]],
           dense_shape=[2, 2])
       features = {'wire_cast': wire_tensor}
-      predictions = get_keras_linear_model_predictions(
-          features, [wire_cast], sparse_combiner='mean')
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
+      model = fc.LinearModel([wire_cast], sparse_combiner='mean')
+      predictions = model(features)
+      wire_cast_var, bias = model.variables
       with _initialized_session() as sess:
         sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
         sess.run(bias.assign([5.]))
         self.assertAllClose([[1005.], [5010.]], predictions.eval())
 
+  def test_sparse_combiner_with_negative_weights(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast_weights = fc.weighted_categorical_column(wire_cast, 'weights')
+
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {
+          'wire_cast': wire_tensor,
+          'weights': constant_op.constant([[1., 1., -1.0]])
+      }
+      model = fc.LinearModel([wire_cast_weights], sparse_combiner='sum')
+      predictions = model(features)
+      wire_cast_var, bias = model.variables
+      with _initialized_session() as sess:
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [-9985.]], predictions.eval())
+
   def test_dense_multi_dimension_multi_output(self):
-    price = fc_old.numeric_column('price', shape=2)
+    price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
       features = {'price': [[1., 2.], [5., 6.]]}
-      predictions = get_keras_linear_model_predictions(
-          features, [price], units=3)
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([price], units=3)
+      predictions = model(features)
+      price_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose(np.zeros((3,)), bias.eval())
         self.assertAllClose(np.zeros((2, 3)), price_var.eval())
@@ -2246,21 +1358,22 @@ class _LinearModelTest(test.TestCase):
                             predictions.eval())
 
   def test_raises_if_shape_mismatch(self):
-    price = fc_old.numeric_column('price', shape=2)
+    price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
       with self.assertRaisesRegexp(
           Exception,
           r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
-        get_keras_linear_model_predictions(features, [price])
+        model = fc.LinearModel([price])
+        model(features)
 
   def test_dense_reshaping(self):
-    price = fc_old.numeric_column('price', shape=[1, 2])
+    price = fc.numeric_column('price', shape=[1, 2])
     with ops.Graph().as_default():
       features = {'price': [[[1., 2.]], [[5., 6.]]]}
-      predictions = get_keras_linear_model_predictions(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([price])
+      predictions = model(features)
+      price_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([0.], bias.eval())
         self.assertAllClose([[0.], [0.]], price_var.eval())
@@ -2269,15 +1382,16 @@ class _LinearModelTest(test.TestCase):
         self.assertAllClose([[210.], [650.]], predictions.eval())
 
   def test_dense_multi_column(self):
-    price1 = fc_old.numeric_column('price1', shape=2)
-    price2 = fc_old.numeric_column('price2')
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
-      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
-      predictions = get_keras_linear_model_predictions(features,
-                                                       [price1, price2])
-      bias = get_linear_model_bias()
-      price1_var = get_linear_model_column_var(price1)
-      price2_var = get_linear_model_column_var(price2)
+      features = {
+          'price1': [[1., 2.], [5., 6.]],
+          'price2': [[3.], [4.]]
+      }
+      model = fc.LinearModel([price1, price2])
+      predictions = model(features)
+      price1_var, price2_var, bias = model.variables
       with _initialized_session() as sess:
         self.assertAllClose([0.], bias.eval())
         self.assertAllClose([[0.], [0.]], price1_var.eval())
@@ -2288,118 +1402,55 @@ class _LinearModelTest(test.TestCase):
         sess.run(bias.assign([7.]))
         self.assertAllClose([[3217.], [4657.]], predictions.eval())
 
-  def test_fills_cols_to_vars(self):
-    price1 = fc_old.numeric_column('price1', shape=2)
-    price2 = fc_old.numeric_column('price2')
-    with ops.Graph().as_default():
-      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
-      cols_to_vars = {}
-      get_keras_linear_model_predictions(
-          features, [price1, price2], cols_to_vars=cols_to_vars)
-      bias = get_linear_model_bias()
-      price1_var = get_linear_model_column_var(price1)
-      price2_var = get_linear_model_column_var(price2)
-      self.assertAllEqual(cols_to_vars['bias'], [bias])
-      self.assertAllEqual(cols_to_vars[price1], [price1_var])
-      self.assertAllEqual(cols_to_vars[price2], [price2_var])
-
-  def test_fills_cols_to_vars_partitioned_variables(self):
-    price1 = fc_old.numeric_column('price1', shape=2)
-    price2 = fc_old.numeric_column('price2', shape=3)
-    with ops.Graph().as_default():
-      features = {
-          'price1': [[1., 2.], [6., 7.]],
-          'price2': [[3., 4., 5.], [8., 9., 10.]]
-      }
-      cols_to_vars = {}
-      with variable_scope.variable_scope(
-          'linear',
-          partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)):
-        get_keras_linear_model_predictions(
-            features, [price1, price2], cols_to_vars=cols_to_vars)
-      with _initialized_session():
-        self.assertEqual([0.], cols_to_vars['bias'][0].eval())
-        # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables.
-        self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval())
-        self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval())
-        # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and
-        # a [1, 1] Variable.
-        self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval())
-        self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval())
-
-  def test_dense_collection(self):
-    price = fc_old.numeric_column('price')
-    with ops.Graph().as_default() as g:
-      features = {'price': [[1.], [5.]]}
-      get_keras_linear_model_predictions(
-          features, [price], weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
-      self.assertIn(bias, my_vars)
-      self.assertIn(price_var, my_vars)
-
-  def test_sparse_collection(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default() as g:
-      wire_tensor = sparse_tensor.SparseTensor(
-          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-      features = {'wire_cast': wire_tensor}
-      get_keras_linear_model_predictions(
-          features, [wire_cast], weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
-      self.assertIn(bias, my_vars)
-      self.assertIn(wire_cast_var, my_vars)
-
   def test_dense_trainable_default(self):
-    price = fc_old.numeric_column('price')
+    price = fc.numeric_column('price')
     with ops.Graph().as_default() as g:
       features = {'price': [[1.], [5.]]}
-      get_keras_linear_model_predictions(features, [price])
-      bias = get_linear_model_bias()
-      price_var = get_linear_model_column_var(price)
+      model = fc.LinearModel([price])
+      model(features)
+      price_var, bias = model.variables
       trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
       self.assertIn(bias, trainable_vars)
       self.assertIn(price_var, trainable_vars)
 
   def test_sparse_trainable_default(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
     with ops.Graph().as_default() as g:
       wire_tensor = sparse_tensor.SparseTensor(
           values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
       features = {'wire_cast': wire_tensor}
-      get_keras_linear_model_predictions(features, [wire_cast])
+      model = fc.LinearModel([wire_cast])
+      model(features)
       trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      bias = get_linear_model_bias()
-      wire_cast_var = get_linear_model_column_var(wire_cast)
+      wire_cast_var, bias = model.variables
       self.assertIn(bias, trainable_vars)
       self.assertIn(wire_cast_var, trainable_vars)
 
   def test_dense_trainable_false(self):
-    price = fc_old.numeric_column('price')
+    price = fc.numeric_column('price')
     with ops.Graph().as_default() as g:
       features = {'price': [[1.], [5.]]}
-      get_keras_linear_model_predictions(features, [price], trainable=False)
+      model = fc.LinearModel([price], trainable=False)
+      model(features)
       trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
       self.assertEqual([], trainable_vars)
 
   def test_sparse_trainable_false(self):
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
     with ops.Graph().as_default() as g:
       wire_tensor = sparse_tensor.SparseTensor(
           values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
       features = {'wire_cast': wire_tensor}
-      get_keras_linear_model_predictions(features, [wire_cast], trainable=False)
+      model = fc.LinearModel([wire_cast], trainable=False)
+      model(features)
       trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
       self.assertEqual([], trainable_vars)
 
   def test_column_order(self):
-    price_a = fc_old.numeric_column('price_a')
-    price_b = fc_old.numeric_column('price_b')
-    wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
-    with ops.Graph().as_default() as g:
+    price_a = fc.numeric_column('price_a')
+    price_b = fc.numeric_column('price_b')
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
       features = {
           'price_a': [[1.]],
           'price_b': [[3.]],
@@ -2407,15 +1458,15 @@ class _LinearModelTest(test.TestCase):
               sparse_tensor.SparseTensor(
                   values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
       }
-      get_keras_linear_model_predictions(
-          features, [price_a, wire_cast, price_b],
-          weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
+      model = fc.LinearModel([price_a, wire_cast, price_b])
+      model(features)
+
+      my_vars = model.variables
       self.assertIn('price_a', my_vars[0].name)
       self.assertIn('price_b', my_vars[1].name)
       self.assertIn('wire_cast', my_vars[2].name)
 
-    with ops.Graph().as_default() as g:
+    with ops.Graph().as_default():
       features = {
           'price_a': [[1.]],
           'price_b': [[3.]],
@@ -2423,17 +1474,45 @@ class _LinearModelTest(test.TestCase):
               sparse_tensor.SparseTensor(
                   values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
       }
-      get_keras_linear_model_predictions(
-          features, [wire_cast, price_b, price_a],
-          weight_collections=['my-vars'])
-      my_vars = g.get_collection('my-vars')
+      model = fc.LinearModel([wire_cast, price_b, price_a])
+      model(features)
+
+      my_vars = model.variables
       self.assertIn('price_a', my_vars[0].name)
       self.assertIn('price_b', my_vars[1].name)
       self.assertIn('wire_cast', my_vars[2].name)
 
+  def test_variable_names(self):
+    price1 = fc.numeric_column('price1')
+    dense_feature = fc.numeric_column('dense_feature')
+    dense_feature_bucketized = fc.bucketized_column(
+        dense_feature, boundaries=[0.])
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
+    all_cols = [price1, dense_feature_bucketized, some_embedding_column]
+
+    with ops.Graph().as_default():
+      model = fc.LinearModel(all_cols)
+      features = {
+          'price1': [[3.], [4.]],
+          'dense_feature': [[-1.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+      }
+      model(features)
+      variable_names = [var.name for var in model.variables]
+      self.assertItemsEqual([
+          'linear_model/dense_feature_bucketized/weights:0',
+          'linear_model/price1/weights:0',
+          'linear_model/sparse_feature_embedding/embedding_weights:0',
+          'linear_model/sparse_feature_embedding/weights:0',
+          'linear_model/bias_weights:0',
+      ], variable_names)
+
   def test_static_batch_size_mismatch(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       features = {
           'price1': [[1.], [5.], [7.]],  # batchsize = 3
@@ -2442,12 +1521,13 @@ class _LinearModelTest(test.TestCase):
     with self.assertRaisesRegexp(
         ValueError,
         'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-      get_keras_linear_model_predictions(features, [price1, price2])
+      model = fc.LinearModel([price1, price2])
+      model(features)
 
   def test_subset_of_static_batch_size_mismatch(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
-    price3 = fc_old.numeric_column('price3')
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    price3 = fc.numeric_column('price3')
     with ops.Graph().as_default():
       features = {
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
@@ -2457,18 +1537,19 @@ class _LinearModelTest(test.TestCase):
       with self.assertRaisesRegexp(
           ValueError,
           'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-        get_keras_linear_model_predictions(features, [price1, price2, price3])
+        model = fc.LinearModel([price1, price2, price3])
+        model(features)
 
   def test_runtime_batch_size_mismatch(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       features = {
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
           'price2': [[3.], [4.]]  # batchsize = 2
       }
-      predictions = get_keras_linear_model_predictions(features,
-                                                       [price1, price2])
+      model = fc.LinearModel([price1, price2])
+      predictions = model(features)
       with _initialized_session() as sess:
         with self.assertRaisesRegexp(errors.OpError,
                                      'must have the same size and shape'):
@@ -2476,15 +1557,15 @@ class _LinearModelTest(test.TestCase):
               predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]})
 
   def test_runtime_batch_size_matches(self):
-    price1 = fc_old.numeric_column('price1')
-    price2 = fc_old.numeric_column('price2')
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       features = {
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
           'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
       }
-      predictions = get_keras_linear_model_predictions(features,
-                                                       [price1, price2])
+      model = fc.LinearModel([price1, price2])
+      predictions = model(features)
       with _initialized_session() as sess:
         sess.run(
             predictions,
@@ -2494,14 +1575,14 @@ class _LinearModelTest(test.TestCase):
             })
 
   def test_with_numpy_input_fn(self):
-    price = fc_old.numeric_column('price')
-    price_buckets = fc_old.bucketized_column(
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
         price, boundaries=[
             0.,
             10.,
             100.,
         ])
-    body_style = fc_old.categorical_column_with_vocabulary_list(
+    body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
 
     input_fn = numpy_io.numpy_input_fn(
@@ -2512,16 +1593,14 @@ class _LinearModelTest(test.TestCase):
         batch_size=2,
         shuffle=False)
     features = input_fn()
-    net = get_keras_linear_model_predictions(features,
-                                             [price_buckets, body_style])
+    model = fc.LinearModel([price_buckets, body_style])
+    net = model(features)
     # self.assertEqual(1 + 3 + 5, net.shape[1])
     with _initialized_session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
 
-      bias = get_linear_model_bias()
-      price_buckets_var = get_linear_model_column_var(price_buckets)
-      body_style_var = get_linear_model_column_var(body_style)
+      body_style_var, price_buckets_var, bias = model.variables
 
       sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
@@ -2533,38 +1612,31 @@ class _LinearModelTest(test.TestCase):
       coord.join(threads)
 
   def test_with_1d_sparse_tensor(self):
-    price = fc_old.numeric_column('price')
-    price_buckets = fc_old.bucketized_column(
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
         price, boundaries=[
             0.,
             10.,
             100.,
         ])
-    body_style = fc_old.categorical_column_with_vocabulary_list(
+    body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
 
     # Provides 1-dim tensor and dense tensor.
     features = {
-        'price':
-            constant_op.constant([
-                -1.,
-                12.,
-            ]),
-        'body-style':
-            sparse_tensor.SparseTensor(
-                indices=((0,), (1,)),
-                values=('sedan', 'hardtop'),
-                dense_shape=(2,)),
+        'price': constant_op.constant([-1., 12.,]),
+        'body-style': sparse_tensor.SparseTensor(
+            indices=((0,), (1,)),
+            values=('sedan', 'hardtop'),
+            dense_shape=(2,)),
     }
     self.assertEqual(1, features['price'].shape.ndims)
     self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
 
-    net = get_keras_linear_model_predictions(features,
-                                             [price_buckets, body_style])
+    model = fc.LinearModel([price_buckets, body_style])
+    net = model(features)
     with _initialized_session() as sess:
-      bias = get_linear_model_bias()
-      price_buckets_var = get_linear_model_column_var(price_buckets)
-      body_style_var = get_linear_model_column_var(body_style)
+      body_style_var, price_buckets_var, bias = model.variables
 
       sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
@@ -2573,16 +1645,16 @@ class _LinearModelTest(test.TestCase):
       self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
-    price = fc_old.numeric_column('price')
-    price_buckets = fc_old.bucketized_column(
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
         price, boundaries=[
             0.,
             10.,
             100.,
         ])
-    body_style = fc_old.categorical_column_with_vocabulary_list(
+    body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-    country = fc_old.categorical_column_with_vocabulary_list(
+    country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
 
     # Provides 1-dim tensor and dense tensor.
@@ -2596,14 +1668,14 @@ class _LinearModelTest(test.TestCase):
 
     price_data = np.array([-1., 12.])
     body_style_data = sparse_tensor.SparseTensorValue(
-        indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,))
+        indices=((0,), (1,)),
+        values=('sedan', 'hardtop'),
+        dense_shape=(2,))
     country_data = np.array(['US', 'CA'])
 
-    net = get_keras_linear_model_predictions(
-        features, [price_buckets, body_style, country])
-    bias = get_linear_model_bias()
-    price_buckets_var = get_linear_model_column_var(price_buckets)
-    body_style_var = get_linear_model_column_var(body_style)
+    model = fc.LinearModel([price_buckets, body_style, country])
+    net = model(features)
+    body_style_var, _, price_buckets_var, bias = model.variables
     with _initialized_session() as sess:
       sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
@@ -2619,7 +1691,7 @@ class _LinearModelTest(test.TestCase):
                               }))
 
   def test_with_rank_0_feature(self):
-    price = fc_old.numeric_column('price')
+    price = fc.numeric_column('price')
     features = {
         'price': constant_op.constant(0),
     }
@@ -2627,18 +1699,41 @@ class _LinearModelTest(test.TestCase):
 
     # Static rank 0 should fail
     with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
-      get_keras_linear_model_predictions(features, [price])
+      model = fc.LinearModel([price])
+      model(features)
 
     # Dynamic rank 0 should fail
     features = {
         'price': array_ops.placeholder(dtypes.float32),
     }
-    net = get_keras_linear_model_predictions(features, [price])
+    model = fc.LinearModel([price])
+    net = model(features)
     self.assertEqual(1, net.shape[1])
     with _initialized_session() as sess:
       with self.assertRaisesOpError('Feature .* cannot have rank 0'):
         sess.run(net, feed_dict={features['price']: np.array(1)})
 
+  def test_multiple_linear_models(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features1 = {'price': [[1.], [5.]]}
+      features2 = {'price': [[2.], [10.]]}
+      model1 = fc.LinearModel([price])
+      model2 = fc.LinearModel([price])
+      predictions1 = model1(features1)
+      predictions2 = model2(features2)
+      price_var1, bias1 = model1.variables
+      price_var2, bias2 = model2.variables
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias1.eval())
+        sess.run(price_var1.assign([[10.]]))
+        sess.run(bias1.assign([5.]))
+        self.assertAllClose([[15.], [55.]], predictions1.eval())
+        self.assertAllClose([0.], bias2.eval())
+        sess.run(price_var2.assign([[10.]]))
+        sess.run(bias2.assign([5.]))
+        self.assertAllClose([[25.], [105.]], predictions2.eval())
+
 
 class FeatureLayerTest(test.TestCase):
 
@@ -3739,47 +2834,22 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
           id_weight_pair.id_tensor.eval())
 
   def test_linear_model(self):
-    wire_column = fc_old.categorical_column_with_vocabulary_file(
-        key='wire',
-        vocabulary_file=self._wire_vocabulary_file_name,
-        vocabulary_size=self._wire_vocabulary_size,
-        num_oov_buckets=1)
-    self.assertEqual(4, wire_column._num_buckets)
-    with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          wire_column.name: sparse_tensor.SparseTensorValue(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=('marlo', 'skywalker', 'omar'),
-              dense_shape=(2, 2))
-      }, (wire_column,))
-      bias = get_linear_model_bias()
-      wire_var = get_linear_model_column_var(wire_column)
-      with _initialized_session():
-        self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
-        # 'marlo' -> 2: wire_var[2] = 3
-        # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
-        self.assertAllClose(((3.,), (5.,)), predictions.eval())
-
-  def test_keras_linear_model(self):
-    wire_column = fc_old.categorical_column_with_vocabulary_file(
+    wire_column = fc.categorical_column_with_vocabulary_file(
         key='wire',
         vocabulary_file=self._wire_vocabulary_file_name,
         vocabulary_size=self._wire_vocabulary_size,
         num_oov_buckets=1)
-    self.assertEqual(4, wire_column._num_buckets)
+    self.assertEqual(4, wire_column.num_buckets)
     with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
+      model = fc.LinearModel((wire_column,))
+      predictions = model({
           wire_column.name:
               sparse_tensor.SparseTensorValue(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=('marlo', 'skywalker', 'omar'),
                   dense_shape=(2, 2))
-      }, (wire_column,))
-      bias = get_linear_model_bias()
-      wire_var = get_linear_model_column_var(wire_column)
+      })
+      wire_var, bias = model.variables
       with _initialized_session():
         self.assertAllClose((0.,), bias.eval())
         self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
@@ -4131,54 +3201,30 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
     with _initialized_session():
-      _assert_sparse_tensor_value(
-          self,
-          sparse_tensor.SparseTensorValue(
-              indices=inputs.indices,
-              values=np.array((2, 60, 0, 4), dtype=np.int64),
-              dense_shape=inputs.dense_shape),
-          id_weight_pair.id_tensor.eval())
-
-  def test_linear_model(self):
-    wire_column = fc_old.categorical_column_with_vocabulary_list(
-        key='aaa',
-        vocabulary_list=('omar', 'stringer', 'marlo'),
-        num_oov_buckets=1)
-    self.assertEqual(4, wire_column._num_buckets)
-    with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          wire_column.name: sparse_tensor.SparseTensorValue(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=('marlo', 'skywalker', 'omar'),
-              dense_shape=(2, 2))
-      }, (wire_column,))
-      bias = get_linear_model_bias()
-      wire_var = get_linear_model_column_var(wire_column)
-      with _initialized_session():
-        self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
-        # 'marlo' -> 2: wire_var[2] = 3
-        # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
-        self.assertAllClose(((3.,), (5.,)), predictions.eval())
+      _assert_sparse_tensor_value(
+          self,
+          sparse_tensor.SparseTensorValue(
+              indices=inputs.indices,
+              values=np.array((2, 60, 0, 4), dtype=np.int64),
+              dense_shape=inputs.dense_shape),
+          id_weight_pair.id_tensor.eval())
 
-  def test_keras_linear_model(self):
-    wire_column = fc_old.categorical_column_with_vocabulary_list(
+  def test_linear_model(self):
+    wire_column = fc.categorical_column_with_vocabulary_list(
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'),
         num_oov_buckets=1)
-    self.assertEqual(4, wire_column._num_buckets)
+    self.assertEqual(4, wire_column.num_buckets)
     with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
+      model = fc.LinearModel((wire_column,))
+      predictions = model({
           wire_column.name:
               sparse_tensor.SparseTensorValue(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=('marlo', 'skywalker', 'omar'),
                   dense_shape=(2, 2))
-      }, (wire_column,))
-      bias = get_linear_model_bias()
-      wire_var = get_linear_model_column_var(wire_column)
+      })
+      wire_var, bias = model.variables
       with _initialized_session():
         self.assertAllClose((0.,), bias.eval())
         self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
@@ -4398,39 +3444,18 @@ class IdentityCategoricalColumnTest(test.TestCase):
           }))
 
   def test_linear_model(self):
-    column = fc_old.categorical_column_with_identity(key='aaa', num_buckets=3)
-    self.assertEqual(3, column.num_buckets)
-    with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          column.name: sparse_tensor.SparseTensorValue(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=(0, 2, 1),
-              dense_shape=(2, 2))
-      }, (column,))
-      bias = get_linear_model_bias()
-      weight_var = get_linear_model_column_var(column)
-      with _initialized_session():
-        self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        weight_var.assign(((1.,), (2.,), (3.,))).eval()
-        # weight_var[0] = 1
-        # weight_var[2] + weight_var[1] = 3+2 = 5
-        self.assertAllClose(((1.,), (5.,)), predictions.eval())
-
-  def test_keras_linear_model(self):
-    column = fc_old.categorical_column_with_identity(key='aaa', num_buckets=3)
+    column = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
     self.assertEqual(3, column.num_buckets)
     with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
+      model = fc.LinearModel((column,))
+      predictions = model({
           column.name:
               sparse_tensor.SparseTensorValue(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(0, 2, 1),
                   dense_shape=(2, 2))
-      }, (column,))
-      bias = get_linear_model_bias()
-      weight_var = get_linear_model_column_var(column)
+      })
+      weight_var, bias = model.variables
       with _initialized_session():
         self.assertAllClose((0.,), bias.eval())
         self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
@@ -4656,27 +3681,8 @@ class IndicatorColumnTest(test.TestCase):
       self.assertAllEqual([[0., 1., 1.]], indicator_tensor.eval())
 
   def test_linear_model(self):
-    animal = fc_old.indicator_column(
-        fc_old.categorical_column_with_identity('animal', num_buckets=4))
-    with ops.Graph().as_default():
-      features = {
-          'animal':
-              sparse_tensor.SparseTensor(
-                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
-      }
-
-      predictions = fc.linear_model(features, [animal])
-      weight_var = get_linear_model_column_var(animal)
-      with _initialized_session():
-        # All should be zero-initialized.
-        self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval())
-        self.assertAllClose([[0.]], predictions.eval())
-        weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
-        self.assertAllClose([[2. + 3.]], predictions.eval())
-
-  def test_keras_linear_model(self):
-    animal = fc_old.indicator_column(
-        fc_old.categorical_column_with_identity('animal', num_buckets=4))
+    animal = fc.indicator_column(
+        fc.categorical_column_with_identity('animal', num_buckets=4))
     with ops.Graph().as_default():
       features = {
           'animal':
@@ -4684,8 +3690,9 @@ class IndicatorColumnTest(test.TestCase):
                   indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
       }
 
-      predictions = get_keras_linear_model_predictions(features, [animal])
-      weight_var = get_linear_model_column_var(animal)
+      model = fc.LinearModel([animal])
+      predictions = model(features)
+      weight_var, _ = model.variables
       with _initialized_session():
         # All should be zero-initialized.
         self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval())
@@ -5137,17 +4144,16 @@ class EmbeddingColumnTest(test.TestCase):
       return zeros_embedding_values
 
     # Build columns.
-    categorical_column = fc_old.categorical_column_with_identity(
+    categorical_column = fc.categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc_old.embedding_column(
+    embedding_column = fc.embedding_column(
         categorical_column,
         dimension=embedding_dimension,
         initializer=_initializer)
 
     with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          categorical_column.name: sparse_input
-      }, (embedding_column,))
+      model = fc.LinearModel((embedding_column,))
+      predictions = model({categorical_column.name: sparse_input})
       expected_var_names = (
           'linear_model/bias_weights:0',
           'linear_model/aaa_embedding/weights:0',
@@ -5189,82 +4195,6 @@ class EmbeddingColumnTest(test.TestCase):
         # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
         self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
 
-  def test_keras_linear_model(self):
-    # Inputs.
-    batch_size = 4
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(batch_size, 5))
-
-    # Embedding variable.
-    embedding_dimension = 2
-    embedding_shape = (vocabulary_size, embedding_dimension)
-    zeros_embedding_values = np.zeros(embedding_shape)
-
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual(embedding_shape, shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return zeros_embedding_values
-
-    # Build columns.
-    categorical_column = fc_old.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc_old.embedding_column(
-        categorical_column,
-        dimension=embedding_dimension,
-        initializer=_initializer)
-
-    with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
-          categorical_column.name: sparse_input
-      }, (embedding_column,))
-      expected_var_names = (
-          'linear_model/bias_weights:0',
-          'linear_model/aaa_embedding/weights:0',
-          'linear_model/aaa_embedding/embedding_weights:0',
-      )
-      self.assertItemsEqual(
-          expected_var_names,
-          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
-      trainable_vars = {
-          v.name: v
-          for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      }
-      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
-      bias = trainable_vars['linear_model/bias_weights:0']
-      embedding_weights = trainable_vars[
-          'linear_model/aaa_embedding/embedding_weights:0']
-      linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0']
-      with _initialized_session():
-        # Predictions with all zero weights.
-        self.assertAllClose(np.zeros((1,)), bias.eval())
-        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
-        self.assertAllClose(
-            np.zeros((embedding_dimension, 1)), linear_weights.eval())
-        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
-
-        # Predictions with all non-zero weights.
-        embedding_weights.assign((
-            (1., 2.),  # id 0
-            (3., 5.),  # id 1
-            (7., 11.)  # id 2
-        )).eval()
-        linear_weights.assign(((4.,), (6.,))).eval()
-        # example 0, ids [2], embedding[0] = [7, 11]
-        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
-        # example 2, ids [], embedding[2] = [0, 0]
-        # example 3, ids [1], embedding[3] = [3, 5]
-        # sum(embeddings * linear_weights)
-        # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
-        self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
-
   def test_feature_layer(self):
     # Inputs.
     vocabulary_size = 3
@@ -5765,27 +4695,31 @@ class SharedEmbeddingColumnTest(test.TestCase):
       return zeros_embedding_values
 
     # Build columns.
-    categorical_column_a = fc_old.categorical_column_with_identity(
+    categorical_column_a = fc.categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
-    categorical_column_b = fc_old.categorical_column_with_identity(
+    categorical_column_b = fc.categorical_column_with_identity(
         key='bbb', num_buckets=vocabulary_size)
-    embedding_column_a, embedding_column_b = fc_old.shared_embedding_columns(
+    embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
         [categorical_column_a, categorical_column_b],
         dimension=embedding_dimension,
         initializer=_initializer)
 
     with ops.Graph().as_default():
-      predictions = fc.linear_model({
+      model = fc.LinearModel(
+          (embedding_column_a, embedding_column_b),
+          shared_state_manager=fc.SharedEmbeddingStateManager())
+      predictions = model({
           categorical_column_a.name: input_a,
-          categorical_column_b.name: input_b,
-      }, (embedding_column_a, embedding_column_b))
+          categorical_column_b.name: input_b
+      })
+
       # Linear weights do not follow the column name. But this is a rare use
       # case, and fixing it would add too much complexity to the code.
       expected_var_names = (
           'linear_model/bias_weights:0',
-          'linear_model/aaa_bbb_shared_embedding/weights:0',
-          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0',
-          'linear_model/aaa_bbb_shared_embedding_1/weights:0',
+          'linear_model/aaa_shared_embedding/weights:0',
+          'shared_embedding_state_manager/aaa_bbb_shared_embedding:0',
+          'linear_model/bbb_shared_embedding/weights:0',
       )
       self.assertItemsEqual(
           expected_var_names,
@@ -5797,102 +4731,11 @@ class SharedEmbeddingColumnTest(test.TestCase):
       self.assertItemsEqual(expected_var_names, trainable_vars.keys())
       bias = trainable_vars['linear_model/bias_weights:0']
       embedding_weights = trainable_vars[
-          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0']
-      linear_weights_a = trainable_vars[
-          'linear_model/aaa_bbb_shared_embedding/weights:0']
-      linear_weights_b = trainable_vars[
-          'linear_model/aaa_bbb_shared_embedding_1/weights:0']
-      with _initialized_session():
-        # Predictions with all zero weights.
-        self.assertAllClose(np.zeros((1,)), bias.eval())
-        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
-        self.assertAllClose(
-            np.zeros((embedding_dimension, 1)), linear_weights_a.eval())
-        self.assertAllClose(
-            np.zeros((embedding_dimension, 1)), linear_weights_b.eval())
-        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
-
-        # Predictions with all non-zero weights.
-        embedding_weights.assign((
-            (1., 2.),  # id 0
-            (3., 5.),  # id 1
-            (7., 11.)  # id 2
-        )).eval()
-        linear_weights_a.assign(((4.,), (6.,))).eval()
-        # example 0, ids [2], embedding[0] = [7, 11]
-        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
-        # sum(embeddings * linear_weights)
-        # = [4*7 + 6*11, 4*2 + 6*3.5] = [94, 29]
-        linear_weights_b.assign(((3.,), (5.,))).eval()
-        # example 0, ids [0], embedding[0] = [1, 2]
-        # example 1, ids [], embedding[1] = 0, 0]
-        # sum(embeddings * linear_weights)
-        # = [3*1 + 5*2, 3*0 +5*0] = [13, 0]
-        self.assertAllClose([[94. + 13.], [29.]], predictions.eval())
-
-  def test_keras_linear_model(self):
-    # Inputs.
-    batch_size = 2
-    vocabulary_size = 3
-    # -1 values are ignored.
-    input_a = np.array([
-        [2, -1, -1],  # example 0, ids [2]
-        [0, 1, -1]
-    ])  # example 1, ids [0, 1]
-    input_b = np.array([
-        [0, -1, -1],  # example 0, ids [0]
-        [-1, -1, -1]
-    ])  # example 1, ids []
-
-    # Embedding variable.
-    embedding_dimension = 2
-    embedding_shape = (vocabulary_size, embedding_dimension)
-    zeros_embedding_values = np.zeros(embedding_shape)
-
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual(embedding_shape, shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return zeros_embedding_values
-
-    # Build columns.
-    categorical_column_a = fc_old.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    categorical_column_b = fc_old.categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    embedding_column_a, embedding_column_b = fc_old.shared_embedding_columns(
-        [categorical_column_a, categorical_column_b],
-        dimension=embedding_dimension,
-        initializer=_initializer)
-
-    with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
-          categorical_column_a.name: input_a,
-          categorical_column_b.name: input_b,
-      }, (embedding_column_a, embedding_column_b))
-      # Linear weights do not follow the column name. But this is a rare use
-      # case, and fixing it would add too much complexity to the code.
-      expected_var_names = (
-          'linear_model/bias_weights:0',
-          'linear_model/aaa_bbb_shared_embedding/weights:0',
-          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0',
-          'linear_model/aaa_bbb_shared_embedding_1/weights:0',
-      )
-      self.assertItemsEqual(
-          expected_var_names,
-          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
-      trainable_vars = {
-          v.name: v
-          for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      }
-      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
-      bias = trainable_vars['linear_model/bias_weights:0']
-      embedding_weights = trainable_vars[
-          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0']
+          'shared_embedding_state_manager/aaa_bbb_shared_embedding:0']
       linear_weights_a = trainable_vars[
-          'linear_model/aaa_bbb_shared_embedding/weights:0']
+          'linear_model/aaa_shared_embedding/weights:0']
       linear_weights_b = trainable_vars[
-          'linear_model/aaa_bbb_shared_embedding_1/weights:0']
+          'linear_model/bbb_shared_embedding/weights:0']
       with _initialized_session():
         # Predictions with all zero weights.
         self.assertAllClose(np.zeros((1,)), bias.eval())
@@ -6291,13 +5134,14 @@ class WeightedCategoricalColumnTest(test.TestCase):
               dense_shape=(2, 2)),
           weight_tensor.eval())
 
-  def test_keras_linear_model(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
+  def test_linear_model(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
             key='ids', num_buckets=3),
         weight_feature_key='values')
     with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
+      model = fc.LinearModel((column,))
+      predictions = model({
           'ids':
               sparse_tensor.SparseTensorValue(
                   indices=((0, 0), (1, 0), (1, 1)),
@@ -6308,9 +5152,8 @@ class WeightedCategoricalColumnTest(test.TestCase):
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(.5, 1., .1),
                   dense_shape=(2, 2))
-      }, (column,))
-      bias = get_linear_model_bias()
-      weight_var = get_linear_model_column_var(column)
+      })
+      weight_var, bias = model.variables
       with _initialized_session():
         self.assertAllClose((0.,), bias.eval())
         self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
@@ -6321,15 +5164,16 @@ class WeightedCategoricalColumnTest(test.TestCase):
         # = 3*1 + 2*.1 = 3+.2 = 3.2
         self.assertAllClose(((.5,), (3.2,)), predictions.eval())
 
-  def test_keras_linear_model_mismatched_shape(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
+  def test_linear_model_mismatched_shape(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
             key='ids', num_buckets=3),
         weight_feature_key='values')
     with ops.Graph().as_default():
-      with self.assertRaisesRegexp(ValueError,
-                                   r'Dimensions.*are not compatible'):
-        get_keras_linear_model_predictions({
+      with self.assertRaisesRegexp(
+          ValueError, r'Dimensions.*are not compatible'):
+        model = fc.LinearModel((column,))
+        model({
             'ids':
                 sparse_tensor.SparseTensorValue(
                     indices=((0, 0), (1, 0), (1, 1)),
@@ -6340,122 +5184,23 @@ class WeightedCategoricalColumnTest(test.TestCase):
                     indices=((0, 0), (0, 1), (1, 0), (1, 1)),
                     values=(.5, 11., 1., .1),
                     dense_shape=(2, 2))
-        }, (column,))
-
-  def test_keras_linear_model_mismatched_dense_values(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
-            key='ids', num_buckets=3),
-        weight_feature_key='values')
-    with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions(
-          {
-              'ids':
-                  sparse_tensor.SparseTensorValue(
-                      indices=((0, 0), (1, 0), (1, 1)),
-                      values=(0, 2, 1),
-                      dense_shape=(2, 2)),
-              'values': ((.5,), (1.,))
-          }, (column,),
-          sparse_combiner='mean')
-      # Disabling the constant folding optimizer here since it changes the
-      # error message differently on CPU and GPU.
-      config = config_pb2.ConfigProto()
-      config.graph_options.rewrite_options.constant_folding = (
-          rewriter_config_pb2.RewriterConfig.OFF)
-      with _initialized_session(config):
-        with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'):
-          predictions.eval()
+        })
 
-  def test_keras_linear_model_mismatched_dense_shape(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
+  def test_linear_model_mismatched_dense_values(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
             key='ids', num_buckets=3),
         weight_feature_key='values')
     with ops.Graph().as_default():
-      predictions = get_keras_linear_model_predictions({
+      model = fc.LinearModel((column,), sparse_combiner='mean')
+      predictions = model({
           'ids':
               sparse_tensor.SparseTensorValue(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(0, 2, 1),
                   dense_shape=(2, 2)),
-          'values': ((.5,), (1.,), (.1,))
-      }, (column,))
-      bias = get_linear_model_bias()
-      weight_var = get_linear_model_column_var(column)
-      with _initialized_session():
-        self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        weight_var.assign(((1.,), (2.,), (3.,))).eval()
-        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
-        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
-        # = 3*1 + 2*.1 = 3+.2 = 3.2
-        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
-
-  def test_linear_model(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
-            key='ids', num_buckets=3),
-        weight_feature_key='values')
-    with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          'ids': sparse_tensor.SparseTensorValue(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=(0, 2, 1),
-              dense_shape=(2, 2)),
-          'values': sparse_tensor.SparseTensorValue(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=(.5, 1., .1),
-              dense_shape=(2, 2))
-      }, (column,))
-      bias = get_linear_model_bias()
-      weight_var = get_linear_model_column_var(column)
-      with _initialized_session():
-        self.assertAllClose((0.,), bias.eval())
-        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
-        self.assertAllClose(((0.,), (0.,)), predictions.eval())
-        weight_var.assign(((1.,), (2.,), (3.,))).eval()
-        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
-        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
-        # = 3*1 + 2*.1 = 3+.2 = 3.2
-        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
-
-  def test_linear_model_mismatched_shape(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
-            key='ids', num_buckets=3),
-        weight_feature_key='values')
-    with ops.Graph().as_default():
-      with self.assertRaisesRegexp(
-          ValueError, r'Dimensions.*are not compatible'):
-        fc.linear_model({
-            'ids': sparse_tensor.SparseTensorValue(
-                indices=((0, 0), (1, 0), (1, 1)),
-                values=(0, 2, 1),
-                dense_shape=(2, 2)),
-            'values': sparse_tensor.SparseTensorValue(
-                indices=((0, 0), (0, 1), (1, 0), (1, 1)),
-                values=(.5, 11., 1., .1),
-                dense_shape=(2, 2))
-        }, (column,))
-
-  def test_linear_model_mismatched_dense_values(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
-            key='ids', num_buckets=3),
-        weight_feature_key='values')
-    with ops.Graph().as_default():
-      predictions = fc.linear_model(
-          {
-              'ids':
-                  sparse_tensor.SparseTensorValue(
-                      indices=((0, 0), (1, 0), (1, 1)),
-                      values=(0, 2, 1),
-                      dense_shape=(2, 2)),
-              'values': ((.5,), (1.,))
-          }, (column,),
-          sparse_combiner='mean')
+          'values': ((.5,), (1.,))
+      })
       # Disabling the constant folding optimizer here since it changes the
       # error message differently on CPU and GPU.
       config = config_pb2.ConfigProto()
@@ -6466,20 +5211,21 @@ class WeightedCategoricalColumnTest(test.TestCase):
           predictions.eval()
 
   def test_linear_model_mismatched_dense_shape(self):
-    column = fc_old.weighted_categorical_column(
-        categorical_column=fc_old.categorical_column_with_identity(
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
             key='ids', num_buckets=3),
         weight_feature_key='values')
     with ops.Graph().as_default():
-      predictions = fc.linear_model({
-          'ids': sparse_tensor.SparseTensorValue(
-              indices=((0, 0), (1, 0), (1, 1)),
-              values=(0, 2, 1),
-              dense_shape=(2, 2)),
+      model = fc.LinearModel((column,))
+      predictions = model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
           'values': ((.5,), (1.,), (.1,))
-      }, (column,))
-      bias = get_linear_model_bias()
-      weight_var = get_linear_model_column_var(column)
+      })
+      weight_var, bias = model.variables
       with _initialized_session():
         self.assertAllClose((0.,), bias.eval())
         self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
-- 
GitLab


From 97cba0b88cb3ce6a3f3cc66a8c4fd414bd3ac1a8 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Thu, 27 Sep 2018 20:59:37 -0700
Subject: [PATCH 0841/1357] Allowing source_device to be set to /cpu:0 for
 multi device iterator in distribution strategies. That is always the
 appropriate option.

In the existing code, we would set it to a partially specified "worker" name that was ambiguous and end up on the GPU.

PiperOrigin-RevId: 214882658
---
 tensorflow/contrib/distribute/python/mirrored_strategy.py | 3 +--
 tensorflow/contrib/distribute/python/values.py            | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 945f450387..504f45a695 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -482,8 +482,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
       return values.PerDeviceDataset(
           self._call_dataset_fn(dataset_fn),
           self._devices,
-          self._prefetch_on_device,
-          source_device=device_util.resolve("/device:CPU:0"))
+          self._prefetch_on_device)
 
   # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed.
   def _run_steps_on_dataset(self, fn, iterator, iterations,
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index a0cd029f51..cce41e7717 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -708,10 +708,8 @@ class PerDeviceDataset(object):
       dataset,
       devices,
       prefetch_on_device=None,
-      source_device="/cpu:0",
   ):
     self._devices = devices
-    self._source_device = source_device if source_device is not None else "/cpu:0"
 
     # Default to using prefetching in graph mode, unless specified.
     # TODO(rohanj): Enable prefetching in eager mode.
@@ -750,7 +748,7 @@ class PerDeviceDataset(object):
                        "Please use `make_one_shot_iterator` instead.")
     if self._prefetch_on_device:
       dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator(
-          self._dataset, self._devices, source_device=self._source_device)
+          self._dataset, self._devices)
     else:
       dataset_iterator = self._dataset.make_initializable_iterator()
     return PerDeviceDataIterator(
@@ -838,7 +836,6 @@ class MultiWorkerDataset(object):
         self._datasets[worker] = PerDeviceDataset(
             worker_input,
             worker_devices,
-            source_device=worker,
             prefetch_on_device=prefetch_on_device)
 
   def make_one_shot_iterator(self):
-- 
GitLab


From 7fd14feb9cbc690b362633639b27393576472c79 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Sep 2018 21:11:42 -0700
Subject: [PATCH 0842/1357] Kernel signature reworking, remove Dims from tensor
 functions.

PiperOrigin-RevId: 214883775
---
 .../contrib/lite/kernels/internal/tensor.h    |  4 ---
 .../lite/kernels/internal/tensor_ctypes.h     | 29 ---------------
 .../lite/kernels/internal/tensor_test.cc      | 36 ++++++++++---------
 3 files changed, 20 insertions(+), 49 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h
index 765c3a03ef..689cea03e7 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor.h
@@ -37,10 +37,6 @@ inline const std::complex<float>* GetTensorData(const TfLiteTensor* tensor) {
              : nullptr;
 }
 
-inline Dims<4> GetTensorDims(std::vector<int32_t> data) {
-  return GetTensorDims(data.data(), data.size());
-}
-
 inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
   return RuntimeShape(data.size(), data.data());
 }
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h b/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h
index 5e688ce452..9f5b33d217 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h
@@ -86,35 +86,6 @@ inline const bool* GetTensorData(const TfLiteTensor* tensor) {
   return tensor != nullptr ? tensor->data.b : nullptr;
 }
 
-// TODO(ahentz): the implementations in kernels/internal/ take a Dims<4> object
-// even if the original tensors were not 4D. We should consider rewriting them
-// to take a more generic 'shape' object.
-inline Dims<4> GetTensorDims(const int data[], const int size) {
-  Dims<4> d;
-  for (int i = 0; i < 4; ++i) {
-    int src = size - i - 1;
-    if (src >= 0) {
-      d.sizes[i] = data[src];
-    } else {
-      d.sizes[i] = 1;
-    }
-  }
-  d.strides[0] = 1;
-  for (int i = 1; i < 4; i++) {
-    d.strides[i] = d.strides[i - 1] * d.sizes[i - 1];
-  }
-  return d;
-}
-
-inline Dims<4> GetTensorDims(const TfLiteTensor* tensor) {
-  if (tensor == nullptr) {
-    return Dims<4>();
-  }
-
-  auto* dims = tensor->dims;
-  return GetTensorDims(dims->data, dims->size);
-}
-
 inline RuntimeShape GetTensorShape(const TfLiteTensor* tensor) {
   if (tensor == nullptr) {
     return RuntimeShape();
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_test.cc b/tensorflow/contrib/lite/kernels/internal/tensor_test.cc
index bf2068d320..2ed73ba82d 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/tensor_test.cc
@@ -21,28 +21,32 @@ namespace {
 
 using ::testing::ElementsAre;
 
-TEST(TensorTest, GetTensorDims4D) {
-  Dims<4> d = GetTensorDims({2, 3, 4, 5});
-  EXPECT_THAT(d.sizes, ElementsAre(5, 4, 3, 2));
-  EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 60));
+TEST(TensorTest, GetTensorShape4D) {
+  RuntimeShape d = GetTensorShape({2, 3, 4, 5});
+  EXPECT_THAT(
+      std::vector<int32>(d.DimsData(), d.DimsData() + d.DimensionsCount()),
+      ElementsAre(2, 3, 4, 5));
 }
 
-TEST(TensorTest, GetTensorDims3D) {
-  Dims<4> d = GetTensorDims({3, 4, 5});
-  EXPECT_THAT(d.sizes, ElementsAre(5, 4, 3, 1));
-  EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 60));
+TEST(TensorTest, GetTensorShape3D) {
+  RuntimeShape d = GetTensorShape({3, 4, 5});
+  EXPECT_THAT(
+      std::vector<int32>(d.DimsData(), d.DimsData() + d.DimensionsCount()),
+      ElementsAre(3, 4, 5));
 }
 
-TEST(TensorTest, GetTensorDims2D) {
-  Dims<4> d = GetTensorDims({4, 5});
-  EXPECT_THAT(d.sizes, ElementsAre(5, 4, 1, 1));
-  EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 20));
+TEST(TensorTest, GetTensorShape2D) {
+  RuntimeShape d = GetTensorShape({4, 5});
+  EXPECT_THAT(
+      std::vector<int32>(d.DimsData(), d.DimsData() + d.DimensionsCount()),
+      ElementsAre(4, 5));
 }
 
-TEST(TensorTest, GetTensorDims1D) {
-  Dims<4> d = GetTensorDims({5});
-  EXPECT_THAT(d.sizes, ElementsAre(5, 1, 1, 1));
-  EXPECT_THAT(d.strides, ElementsAre(1, 5, 5, 5));
+TEST(TensorTest, GetTensorShape1D) {
+  RuntimeShape d = GetTensorShape({5});
+  EXPECT_THAT(
+      std::vector<int32>(d.DimsData(), d.DimsData() + d.DimensionsCount()),
+      ElementsAre(5));
 }
 
 }  // namespace
-- 
GitLab


From efe17306442aa91192df953ae537d3f9b824dae6 Mon Sep 17 00:00:00 2001
From: IMBurbank <bassmanburbank@gmail.com>
Date: Thu, 27 Sep 2018 22:21:47 -0600
Subject: [PATCH 0843/1357] Updated python3 tf_inspect.getargspec calls to use
 getfullargspec and repackage the return values into the getargspec struct.

---
 .../python/losses/python/tuple_losses_impl.py |  2 +-
 .../labeled_tensor/python/ops/_typecheck.py   |  2 +-
 .../layers/python/layers/rev_block_lib.py     |  3 +-
 .../python/learn/estimators/estimator.py      |  4 +-
 .../learn/python/learn/estimators/head.py     |  2 +-
 .../learn/python/learn/experiment_test.py     |  2 +-
 .../learn/python/learn/export_strategy.py     |  2 +-
 .../contrib/learn/python/learn/metric_spec.py |  2 +-
 .../contrib/learn/python/learn/monitors.py    |  2 +-
 .../contrib/tpu/python/tpu/tpu_function.py    |  2 +-
 tensorflow/python/framework/errors_impl.py    |  2 +-
 tensorflow/python/framework/function.py       |  6 +-
 tensorflow/python/keras/backend_test.py       |  2 +-
 tensorflow/python/keras/testing_utils.py      |  2 +-
 .../kernel_tests/variable_scope_test.py       |  4 +-
 tensorflow/python/ops/variable_scope.py       |  4 +-
 tensorflow/python/util/tf_contextlib_test.py  |  2 +-
 tensorflow/python/util/tf_inspect.py          | 89 ++++++++++++-------
 .../api/lib/python_object_to_proto_visitor.py |  4 +-
 19 files changed, 79 insertions(+), 59 deletions(-)

diff --git a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
index 00a83e5e55..221c70c38b 100644
--- a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
+++ b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
@@ -101,7 +101,7 @@ def _args_to_gan_model(loss_fn):
   """
   # Match arguments in `loss_fn` to elements of `namedtuple`.
   # TODO(joelshor): Properly handle `varargs` and `keywords`.
-  argspec = tf_inspect.getfullargspec(loss_fn)
+  argspec = tf_inspect.getargspec(loss_fn)
   defaults = argspec.defaults or []
 
   required_args = set(argspec.args[:-len(defaults)])
diff --git a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
index 0e23039847..80fa17ec1f 100644
--- a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
@@ -230,7 +230,7 @@ def accepts(*types):
 
   def check_accepts(f):
     """Check the types."""
-    spec = tf_inspect.getfullargspec(f)
+    spec = tf_inspect.getargspec(f)
 
     num_function_arguments = len(spec.args)
     if len(types) != num_function_arguments:
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
index 55979cc391..06da32072f 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -576,8 +576,7 @@ def _recomputing_grad_fn(compute_fn,
 
 def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """See recompute_grad."""
-  has_is_recompute_kwarg = (
-      "is_recomputing" in tf_inspect.getfullargspec(fn).args)
+  has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args
   for arg in args:
     if not isinstance(arg, framework_ops.Tensor):
       raise ValueError("All inputs to function must be Tensors")
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index b88923bca2..c1de42782e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -199,11 +199,11 @@ def _model_fn_args(fn):
   if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'):
     # Handle functools.partial and similar objects.
     return tuple([
-        arg for arg in tf_inspect.getfullargspec(fn.func).args[len(fn.args):]
+        arg for arg in tf_inspect.getargspec(fn.func).args[len(fn.args):]
         if arg not in set(fn.keywords.keys())
     ])
   # Handle function.
-  return tuple(tf_inspect.getfullargspec(fn).args)
+  return tuple(tf_inspect.getargspec(fn).args)
 
 
 def _get_replica_device_setter(config):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index 63dd08316b..c6f79e00d5 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -1861,7 +1861,7 @@ def _get_arguments(func):
   _, func = tf_decorator.unwrap(func)
   if hasattr(func, "__code__"):
     # Regular function.
-    return tf_inspect.getfullargspec(func)
+    return tf_inspect.getargspec(func)
   elif hasattr(func, "func"):
     # Partial function.
     return _get_arguments(func.func)
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index 6926696fb6..fb16c94c29 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -126,7 +126,7 @@ class TestBaseEstimator(object):
 
 def _check_method_supports_args(method, kwargs):
   """Checks that the given method supports the given args."""
-  supported_args = tuple(tf_inspect.getfullargspec(method).args)
+  supported_args = tuple(tf_inspect.getargspec(method).args)
   for kwarg in kwargs:
     if kwarg not in supported_args:
       raise ValueError(
diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py
index 0d6e0cdc18..075cab536e 100644
--- a/tensorflow/contrib/learn/python/learn/export_strategy.py
+++ b/tensorflow/contrib/learn/python/learn/export_strategy.py
@@ -96,7 +96,7 @@ class ExportStrategy(
     """
     # don't break existing export_fns that don't accept checkpoint_path and
     # eval_result
-    export_fn_args = tf_inspect.getfullargspec(self.export_fn).args
+    export_fn_args = tf_inspect.getargspec(self.export_fn).args
     kwargs = {}
     if 'checkpoint_path' in export_fn_args:
       kwargs['checkpoint_path'] = checkpoint_path
diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
index 604d6d46b4..97220365d5 100644
--- a/tensorflow/contrib/learn/python/learn/metric_spec.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -51,7 +51,7 @@ def _args(fn):
     return tuple(
         [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())])
   # Handle function.
-  return tuple(tf_inspect.getfullargspec(fn).args)
+  return tuple(tf_inspect.getargspec(fn).args)
 
 
 _CANONICAL_LABELS_ARG = 'labels'
diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py
index 5f61e0264f..3d691d4340 100644
--- a/tensorflow/contrib/learn/python/learn/monitors.py
+++ b/tensorflow/contrib/learn/python/learn/monitors.py
@@ -1303,7 +1303,7 @@ class RunHookAdapterForMonitors(session_run_hook.SessionRunHook):
   def end(self, session):
     self._last_step = None
     for m in self._monitors:
-      if "session" in tf_inspect.getfullargspec(m.end).args:
+      if "session" in tf_inspect.getargspec(m.end).args:
         m.end(session=session)
       else:
         m.end()
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
index 9c4bd1c4d1..0c7a38dbbb 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
@@ -80,7 +80,7 @@ def check_function_argument_count(func, input_arity, infeed_queue):
   number_of_arguments_needed = input_arity
   if infeed_queue is not None:
     number_of_arguments_needed += infeed_queue.number_of_tuple_elements
-  arg_spec = tf_inspect.getfullargspec(func)
+  arg_spec = tf_inspect.getargspec(func)
   number_of_args = len(arg_spec.args)
   if arg_spec.defaults is None:
     number_of_defaults = 0
diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index c373e75a74..5af71f2cfb 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -55,7 +55,7 @@ class OpError(Exception):
 
   def __reduce__(self):
     # Allow the subclasses to accept less arguments in their __init__.
-    init_argspec = tf_inspect.getfullargspec(self.__class__.__init__)
+    init_argspec = tf_inspect.getargspec(self.__class__.__init__)
     args = tuple(getattr(self, arg) for arg in init_argspec.args[1:])
     return self.__class__, args
 
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 3db6f683c9..225208944e 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -132,9 +132,9 @@ class Defun(object):
       raise ValueError("func %s must be callable" % func)
 
     # Func should not use kwargs and defaults.
-    argspec = tf_inspect.getfullargspec(func)
-    if argspec.varkw or argspec.defaults:
-      raise ValueError("Functions with argument defaults or varkw "
+    argspec = tf_inspect.getargspec(func)
+    if argspec.keywords or argspec.defaults:
+      raise ValueError("Functions with argument defaults or keywords "
                        "arguments are not supported.")
 
     # Computes how many arguments 'func' has.
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 31191d0d35..ab71589940 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -452,7 +452,7 @@ class BackendLinearAlgebraTest(test.TestCase):
         compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5),
                                          keras_kwargs={'axis': -1},
                                          np_kwargs={'axis': -1})
-        if 'keepdims' in tf_inspect.getfullargspec(keras_op).args:
+        if 'keepdims' in tf_inspect.getargspec(keras_op).args:
           compare_single_input_op_to_numpy(keras_op, np_op,
                                            input_shape=(4, 7, 5),
                                            keras_kwargs={'axis': 1,
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 1afaba5653..501b50ba5f 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -102,7 +102,7 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
   layer.set_weights(weights)
 
   # test and instantiation from weights
-  if 'weights' in tf_inspect.getfullargspec(layer_cls.__init__):
+  if 'weights' in tf_inspect.getargspec(layer_cls.__init__):
     kwargs['weights'] = weights
     layer = layer_cls(**kwargs)
 
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 1d0b72b17a..401e1ae102 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -998,8 +998,8 @@ class VariableScopeTest(test.TestCase):
 
   def testSignatureGetVarVsGetLocalVar(self):
     """get_{local,}variable() must take the same list of args."""
-    arg_names = tf_inspect.getfullargspec(variable_scope.get_variable)[0]
-    local_arg_names = tf_inspect.getfullargspec(
+    arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0]
+    local_arg_names = tf_inspect.getargspec(
         variable_scope.get_local_variable)[0]
     self.assertEqual(arg_names, local_arg_names)
 
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 3cc1eb916d..a43676cd70 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -892,14 +892,14 @@ class _VariableStore(object):
         if shape and shape.is_fully_defined():
           init_val = lambda: initializer(  # pylint: disable=g-long-lambda
               shape.as_list(), dtype=dtype, partition_info=partition_info)
-        elif not tf_inspect.getfullargspec(initializer).args:
+        elif not tf_inspect.getargspec(initializer).args:
           init_val = initializer
         else:
           raise ValueError("You can only pass an initializer function that "
                            "expects no arguments to its callable when the "
                            "shape is not fully defined. The given initializer "
                            "function expects the following args %s" %
-                           tf_inspect.getfullargspec(initializer).args)
+                           tf_inspect.getargspec(initializer).args)
         variable_dtype = dtype.base_dtype
 
     # Create the variable.
diff --git a/tensorflow/python/util/tf_contextlib_test.py b/tensorflow/python/util/tf_contextlib_test.py
index 1e921b5ea3..4a5bf388a6 100644
--- a/tensorflow/python/util/tf_contextlib_test.py
+++ b/tensorflow/python/util/tf_contextlib_test.py
@@ -83,7 +83,7 @@ class TfContextlibTest(test.TestCase):
     self.assertFalse(isinstance(target, tf_decorator.TFDecorator))
 
   def testGetArgSpecReturnsWrappedArgSpec(self):
-    argspec = tf_inspect.getfullargspec(test_params_and_defaults)
+    argspec = tf_inspect.getargspec(test_params_and_defaults)
     self.assertEqual(['a', 'b', 'c', 'd'], argspec.args)
     self.assertEqual((2, True, 'hello'), argspec.defaults)
 
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 234850ac3f..3cd6c515b9 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -36,6 +36,53 @@ else:
       'annotations'
   ])
 
+if hasattr(_inspect, 'getfullargspec'):
+  _getfullargspec = _inspect.getfullargspec  # pylint: disable=invalid-name
+  
+  def _getargspec(target):
+    """A python3 version of getargspec.
+
+    Calls `getfullargspec` and assigns args, varargs, 
+    varkw, and defaults to a python 2/3 compatible `ArgSpec`.
+
+    The parameter name 'varkw' is changed to 'keywords' to fit the 
+    `ArgSpec` struct.
+
+    Args:
+      target: the target object to inspect.
+    Returns:
+      An ArgSpec with args, varargs, keywords, and defaults parameters
+      from FullArgSpec.
+    """
+    fullargspecs = getfullargspec(target)
+    argspecs = ArgSpec(
+        args=fullargspecs.args,
+        varargs=fullargspecs.varargs,
+        keywords=fullargspecs.varkw,
+        defaults=fullargspecs.defaults)
+    return argspecs
+else:
+  _getargspec = _inspect.getargspec
+
+  def _getfullargspec(target):
+    """A python2 version of getfullargspec.
+
+    Args:
+      target: the target object to inspect.
+    Returns:
+      A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations.
+    """
+    argspecs = getargspec(target)
+    fullargspecs = FullArgSpec(
+        args=argspecs.args,
+        varargs=argspecs.varargs,
+        varkw=argspecs.keywords,
+        defaults=argspecs.defaults,
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+    return fullargspecs
+
 
 def currentframe():
   """TFDecorator-aware replacement for inspect.currentframe."""
@@ -45,10 +92,8 @@ def currentframe():
 def getargspec(obj):
   """TFDecorator-aware replacement for `inspect.getargspec`.
 
-  This should not be called from other modules. It is deprecated in python3.
-
-  Use `getfullargspec`. It is a TFDecorator-aware replacement for 
-  `inspect.getfullargspec` compatible with both python2 and python3.
+  Note: `getfullargspec` is recommended as the python 2/3 compatible 
+  replacement for this function.
 
   Args:
     obj: A function, partial function, or callable object, possibly
@@ -56,8 +101,8 @@ def getargspec(obj):
 
   Returns:
     The `ArgSpec` that describes the signature of the outermost decorator that
-    changes the callable's signature. If the callable is not decorated,
-    `inspect.getargspec()` will be called directly on the object.
+    changes the callable's signature, or the `ArgSpec` that describes 
+    the object if not decorated.
 
   Raises:
     ValueError: When callable's signature can not be expressed with
@@ -77,24 +122,24 @@ def getargspec(obj):
 
   try:
     # Python3 will handle most callables here (not partial).
-    return _inspect.getargspec(target)
+    return _getargspec(target)
   except TypeError:
     pass
 
   if isinstance(target, type):
     try:
-      return _inspect.getargspec(target.__init__)
+      return _getargspec(target.__init__)
     except TypeError:
       pass
 
     try:
-      return _inspect.getargspec(target.__new__)
+      return _getargspec(target.__new__)
     except TypeError:
       pass
 
   # The `type(target)` ensures that if a class is received we don't return
   # the signature of it's __call__ method.
-  return _inspect.getargspec(type(target).__call__)
+  return _getargspec(type(target).__call__)
 
 
 def _get_argspec_for_partial(obj):
@@ -177,30 +222,6 @@ def _get_argspec_for_partial(obj):
   return ArgSpec(args, varargs, keywords, tuple(all_defaults[first_default:]))
 
 
-if hasattr(_inspect, 'getfullargspec'):
-  _getfullargspec = _inspect.getfullargspec
-else:
-
-  def _getfullargspec(target):
-    """A python2 version of getfullargspec.
-
-    Args:
-      target: the target object to inspect.
-    Returns:
-      A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations.
-    """
-    argspecs = getargspec(target)
-    fullargspecs = FullArgSpec(
-        args=argspecs.args,
-        varargs=argspecs.varargs,
-        varkw=argspecs.keywords,
-        defaults=argspecs.defaults,
-        kwonlyargs=[],
-        kwonlydefaults=None,
-        annotations={})
-    return fullargspecs
-
-
 def getfullargspec(obj):
   """TFDecorator-aware replacement for `inspect.getfullargspec`.
 
diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
index a8e69fda4f..3a48cf683c 100644
--- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
+++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
@@ -47,9 +47,9 @@ def _SanitizedArgSpec(obj):
     string, a string representation of the argspec.
   """
   output_string = ''
-  unsanitized_arg_spec = tf_inspect.getfullargspec(obj)
+  unsanitized_arg_spec = tf_inspect.getargspec(obj)
 
-  for clean_attr in ('args', 'varargs', 'varkw'):
+  for clean_attr in ('args', 'varargs', 'keywords'):
     output_string += '%s=%s, ' % (clean_attr,
                                   getattr(unsanitized_arg_spec, clean_attr))
 
-- 
GitLab


From f4eccdda0ca2b06328363191975fa8364ba14728 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 27 Sep 2018 21:54:33 -0700
Subject: [PATCH 0844/1357] Run buildifier on workspace.bzl.

PiperOrigin-RevId: 214886657
---
 tensorflow/workspace.bzl | 380 +++++++++++++++++++--------------------
 1 file changed, 190 insertions(+), 190 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 6966783efd..70bade060e 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -57,39 +57,39 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     # Point //external/local_config_arm_compiler to //external/arm_compiler
     arm_compiler_configure(
         name = "local_config_arm_compiler",
-        remote_config_repo = "../arm_compiler",
         build_file = clean_dep("//third_party/toolchains/cpus/arm:BUILD"),
+        remote_config_repo = "../arm_compiler",
     )
 
     mkl_repository(
         name = "mkl_linux",
+        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
+        sha256 = "e2233534a9d15c387e22260997af4312a39e9f86f791768409be273b5453c4e6",
+        strip_prefix = "mklml_lnx_2019.0.20180710",
         urls = [
             "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.16/mklml_lnx_2019.0.20180710.tgz",
             "https://github.com/intel/mkl-dnn/releases/download/v0.16/mklml_lnx_2019.0.20180710.tgz",
         ],
-        sha256 = "e2233534a9d15c387e22260997af4312a39e9f86f791768409be273b5453c4e6",
-        strip_prefix = "mklml_lnx_2019.0.20180710",
-        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
     )
     mkl_repository(
         name = "mkl_windows",
+        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
+        sha256 = "3fdcff17b018a0082491adf3ba143358265336a801646e46e0191ec8d58d24a2",
+        strip_prefix = "mklml_win_2019.0.20180710",
         urls = [
             "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.16/mklml_win_2019.0.20180710.zip",
             "https://github.com/intel/mkl-dnn/releases/download/v0.16/mklml_win_2019.0.20180710.zip",
         ],
-        sha256 = "3fdcff17b018a0082491adf3ba143358265336a801646e46e0191ec8d58d24a2",
-        strip_prefix = "mklml_win_2019.0.20180710",
-        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
     )
     mkl_repository(
         name = "mkl_darwin",
+        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
+        sha256 = "411a30014a938eb83fb9f37b3dbe8e371b106fc1dd621fc23123cadc72737ce6",
+        strip_prefix = "mklml_mac_2019.0.20180710",
         urls = [
             "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.16/mklml_mac_2019.0.20180710.tgz",
             "https://github.com/intel/mkl-dnn/releases/download/v0.16/mklml_mac_2019.0.20180710.tgz",
         ],
-        sha256 = "411a30014a938eb83fb9f37b3dbe8e371b106fc1dd621fc23123cadc72737ce6",
-        strip_prefix = "mklml_mac_2019.0.20180710",
-        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
     )
 
     if path_prefix:
@@ -98,39 +98,40 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "mkl_dnn",
+        build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
+        sha256 = "363cc9239eacf8e7917753c6d8c94f767e4cd049160d0654a61ef32d5e1b3049",
+        strip_prefix = "mkl-dnn-4e333787e0d66a1dca1218e99a891d493dbc8ef1",
         urls = [
             "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/4e333787e0d66a1dca1218e99a891d493dbc8ef1.tar.gz",
             "https://github.com/intel/mkl-dnn/archive/4e333787e0d66a1dca1218e99a891d493dbc8ef1.tar.gz",
         ],
-        sha256 = "363cc9239eacf8e7917753c6d8c94f767e4cd049160d0654a61ef32d5e1b3049",
-        strip_prefix = "mkl-dnn-4e333787e0d66a1dca1218e99a891d493dbc8ef1",
-        build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
     )
 
     tf_http_archive(
         name = "com_google_absl",
+        build_file = clean_dep("//third_party:com_google_absl.BUILD"),
+        sha256 = "278a1af58b633be886fe81bf7061dca6b5fea99566850d1319fffdaa1a061792",
+        strip_prefix = "abseil-cpp-e291c279e458761e77a69b09b129d3d1e81f1e80",
         urls = [
             "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz",
             "https://github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz",
         ],
-        sha256 = "278a1af58b633be886fe81bf7061dca6b5fea99566850d1319fffdaa1a061792",
-        strip_prefix = "abseil-cpp-e291c279e458761e77a69b09b129d3d1e81f1e80",
-        build_file = clean_dep("//third_party:com_google_absl.BUILD"),
     )
 
     tf_http_archive(
         name = "eigen_archive",
+        build_file = clean_dep("//third_party:eigen.BUILD"),
+        sha256 = "d956415d784fa4e42b6a2a45c32556d6aec9d0a3d8ef48baee2522ab762556a9",
+        strip_prefix = "eigen-eigen-fd6845384b86",
         urls = [
             "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz",
             "https://bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz",
         ],
-        sha256 = "d956415d784fa4e42b6a2a45c32556d6aec9d0a3d8ef48baee2522ab762556a9",
-        strip_prefix = "eigen-eigen-fd6845384b86",
-        build_file = clean_dep("//third_party:eigen.BUILD"),
     )
 
     tf_http_archive(
         name = "arm_compiler",
+        build_file = clean_dep("//:arm_compiler.BUILD"),
         sha256 = "970285762565c7890c6c087d262b0a18286e7d0384f13a37786d8521773bc969",
         strip_prefix = "tools-0e906ebc527eab1cdbf7adabff5b474da9562e9f/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf",
         urls = [
@@ -139,216 +140,211 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
             # remove the whitelist entry in third_party/repo.bzl.
             # "https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz",
         ],
-        build_file = clean_dep("//:arm_compiler.BUILD"),
     )
 
     tf_http_archive(
         name = "libxsmm_archive",
+        build_file = clean_dep("//third_party:libxsmm.BUILD"),
+        sha256 = "cd8532021352b4a0290d209f7f9bfd7c2411e08286a893af3577a43457287bfa",
+        strip_prefix = "libxsmm-1.9",
         urls = [
             "https://mirror.bazel.build/github.com/hfp/libxsmm/archive/1.9.tar.gz",
             "https://github.com/hfp/libxsmm/archive/1.9.tar.gz",
         ],
-        sha256 = "cd8532021352b4a0290d209f7f9bfd7c2411e08286a893af3577a43457287bfa",
-        strip_prefix = "libxsmm-1.9",
-        build_file = clean_dep("//third_party:libxsmm.BUILD"),
     )
 
     tf_http_archive(
         name = "ortools_archive",
+        build_file = clean_dep("//third_party:ortools.BUILD"),
+        sha256 = "d025a95f78b5fc5eaa4da5f395f23d11c23cf7dbd5069f1f627f002de87b86b9",
+        strip_prefix = "or-tools-6.7.2/src",
         urls = [
             "https://mirror.bazel.build/github.com/google/or-tools/archive/v6.7.2.tar.gz",
             "https://github.com/google/or-tools/archive/v6.7.2.tar.gz",
         ],
-        sha256 = "d025a95f78b5fc5eaa4da5f395f23d11c23cf7dbd5069f1f627f002de87b86b9",
-        strip_prefix = "or-tools-6.7.2/src",
-        build_file = clean_dep("//third_party:ortools.BUILD"),
     )
 
     tf_http_archive(
         name = "com_googlesource_code_re2",
+        sha256 = "803c7811146edeef8f91064de37c6f19136ff01a2a8cdb3230e940b2fd9f07fe",
+        strip_prefix = "re2-2018-07-01",
+        system_build_file = clean_dep("//third_party/systemlibs:re2.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/google/re2/archive/2018-07-01.tar.gz",
             "https://github.com/google/re2/archive/2018-07-01.tar.gz",
         ],
-        sha256 = "803c7811146edeef8f91064de37c6f19136ff01a2a8cdb3230e940b2fd9f07fe",
-        strip_prefix = "re2-2018-07-01",
-        system_build_file = clean_dep("//third_party/systemlibs:re2.BUILD"),
     )
 
     tf_http_archive(
         name = "com_github_googlecloudplatform_google_cloud_cpp",
-        urls = [
-            "https://mirror.bazel.build/github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz",
-            "https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz",
-        ],
         sha256 = "fdd3b3aecce60987e5525e55bf3a21d68a8695320bd5b980775af6507eec3944",
         strip_prefix = "google-cloud-cpp-14760a86c4ffab9943b476305c4fe927ad95db1c",
         system_build_file = clean_dep("//third_party/systemlibs:google_cloud_cpp.BUILD"),
         system_link_files = {
             "//third_party/systemlibs:google_cloud_cpp.google.cloud.bigtable.BUILD": "google/cloud/bigtable/BUILD",
         },
+        urls = [
+            "https://mirror.bazel.build/github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz",
+            "https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz",
+        ],
     )
 
     tf_http_archive(
         name = "com_github_googleapis_googleapis",
+        build_file = clean_dep("//third_party:googleapis.BUILD"),
+        sha256 = "824870d87a176f26bcef663e92051f532fac756d1a06b404055dc078425f4378",
+        strip_prefix = "googleapis-f81082ea1e2f85c43649bee26e0d9871d4b41cdb",
+        system_build_file = clean_dep("//third_party/systemlibs:googleapis.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/googleapis/googleapis/archive/f81082ea1e2f85c43649bee26e0d9871d4b41cdb.zip",
             "https://github.com/googleapis/googleapis/archive/f81082ea1e2f85c43649bee26e0d9871d4b41cdb.zip",
         ],
-        sha256 = "824870d87a176f26bcef663e92051f532fac756d1a06b404055dc078425f4378",
-        strip_prefix = "googleapis-f81082ea1e2f85c43649bee26e0d9871d4b41cdb",
-        build_file = clean_dep("//third_party:googleapis.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:googleapis.BUILD"),
     )
 
     tf_http_archive(
         name = "gemmlowp",
+        sha256 = "b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658",
+        strip_prefix = "gemmlowp-38ebac7b059e84692f53e5938f97a9943c120d98",
         urls = [
             "https://mirror.bazel.build/github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip",
             "https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip",
         ],
-        sha256 = "b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658",
-        strip_prefix = "gemmlowp-38ebac7b059e84692f53e5938f97a9943c120d98",
     )
 
     tf_http_archive(
         name = "farmhash_archive",
+        build_file = clean_dep("//third_party:farmhash.BUILD"),
+        sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0",
+        strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45",
         urls = [
             "https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz",
             "https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz",
         ],
-        sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0",
-        strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45",
-        build_file = clean_dep("//third_party:farmhash.BUILD"),
     )
 
     tf_http_archive(
         name = "highwayhash",
+        build_file = clean_dep("//third_party:highwayhash.BUILD"),
+        sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37",
+        strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968",
         urls = [
             "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
             "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
         ],
-        sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37",
-        strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968",
-        build_file = clean_dep("//third_party:highwayhash.BUILD"),
     )
 
     tf_http_archive(
         name = "nasm",
+        build_file = clean_dep("//third_party:nasm.BUILD"),
+        sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011",
+        strip_prefix = "nasm-2.13.03",
+        system_build_file = clean_dep("//third_party/systemlibs:nasm.BUILD"),
         urls = [
             "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
             "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2",
             "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
         ],
-        sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011",
-        strip_prefix = "nasm-2.13.03",
-        build_file = clean_dep("//third_party:nasm.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:nasm.BUILD"),
     )
 
     tf_http_archive(
         name = "jpeg",
+        build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
+        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
+        strip_prefix = "libjpeg-turbo-2.0.0",
+        system_build_file = clean_dep("//third_party/systemlibs:jpeg.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
             "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
         ],
-        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
-        strip_prefix = "libjpeg-turbo-2.0.0",
-        build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:jpeg.BUILD"),
     )
 
     tf_http_archive(
         name = "png_archive",
+        build_file = clean_dep("//third_party:png.BUILD"),
+        patch_file = clean_dep("//third_party:png_fix_rpi.patch"),
+        sha256 = "e45ce5f68b1d80e2cb9a2b601605b374bdf51e1798ef1c2c2bd62131dfcf9eef",
+        strip_prefix = "libpng-1.6.34",
+        system_build_file = clean_dep("//third_party/systemlibs:png.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz",
             "https://github.com/glennrp/libpng/archive/v1.6.34.tar.gz",
         ],
-        sha256 = "e45ce5f68b1d80e2cb9a2b601605b374bdf51e1798ef1c2c2bd62131dfcf9eef",
-        strip_prefix = "libpng-1.6.34",
-        build_file = clean_dep("//third_party:png.BUILD"),
-        patch_file = clean_dep("//third_party:png_fix_rpi.patch"),
-        system_build_file = clean_dep("//third_party/systemlibs:png.BUILD"),
     )
 
     tf_http_archive(
         name = "org_sqlite",
+        build_file = clean_dep("//third_party:sqlite.BUILD"),
+        sha256 = "ad68c1216c3a474cf360c7581a4001e952515b3649342100f2d7ca7c8e313da6",
+        strip_prefix = "sqlite-amalgamation-3240000",
+        system_build_file = clean_dep("//third_party/systemlibs:sqlite.BUILD"),
         urls = [
             "https://mirror.bazel.build/www.sqlite.org/2018/sqlite-amalgamation-3240000.zip",
             "https://www.sqlite.org/2018/sqlite-amalgamation-3240000.zip",
         ],
-        sha256 = "ad68c1216c3a474cf360c7581a4001e952515b3649342100f2d7ca7c8e313da6",
-        strip_prefix = "sqlite-amalgamation-3240000",
-        build_file = clean_dep("//third_party:sqlite.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:sqlite.BUILD"),
     )
 
     tf_http_archive(
         name = "gif_archive",
+        build_file = clean_dep("//third_party:gif.BUILD"),
+        sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1",
+        strip_prefix = "giflib-5.1.4",
+        system_build_file = clean_dep("//third_party/systemlibs:gif.BUILD"),
         urls = [
             "https://mirror.bazel.build/ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
             "http://pilotfiber.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
         ],
-        sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1",
-        strip_prefix = "giflib-5.1.4",
-        build_file = clean_dep("//third_party:gif.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:gif.BUILD"),
     )
 
     tf_http_archive(
         name = "six_archive",
+        build_file = clean_dep("//third_party:six.BUILD"),
+        sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
+        strip_prefix = "six-1.10.0",
+        system_build_file = clean_dep("//third_party/systemlibs:six.BUILD"),
         urls = [
             "https://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
             "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
         ],
-        sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
-        strip_prefix = "six-1.10.0",
-        build_file = clean_dep("//third_party:six.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:six.BUILD"),
     )
 
     tf_http_archive(
         name = "astor_archive",
+        build_file = clean_dep("//third_party:astor.BUILD"),
+        sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d",
+        strip_prefix = "astor-0.6.2",
+        system_build_file = clean_dep("//third_party/systemlibs:astor.BUILD"),
         urls = [
             "https://mirror.bazel.build/pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz",
             "https://pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz",
         ],
-        sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d",
-        strip_prefix = "astor-0.6.2",
-        build_file = clean_dep("//third_party:astor.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:astor.BUILD"),
     )
 
     tf_http_archive(
         name = "gast_archive",
+        build_file = clean_dep("//third_party:gast.BUILD"),
+        sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930",
+        strip_prefix = "gast-0.2.0",
+        system_build_file = clean_dep("//third_party/systemlibs:gast.BUILD"),
         urls = [
             "https://mirror.bazel.build/pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz",
             "https://pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz",
         ],
-        sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930",
-        strip_prefix = "gast-0.2.0",
-        build_file = clean_dep("//third_party:gast.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:gast.BUILD"),
     )
 
     tf_http_archive(
         name = "termcolor_archive",
+        build_file = clean_dep("//third_party:termcolor.BUILD"),
+        sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b",
+        strip_prefix = "termcolor-1.1.0",
+        system_build_file = clean_dep("//third_party/systemlibs:termcolor.BUILD"),
         urls = [
             "https://mirror.bazel.build/pypi.python.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz",
             "https://pypi.python.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz",
         ],
-        sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b",
-        strip_prefix = "termcolor-1.1.0",
-        build_file = clean_dep("//third_party:termcolor.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:termcolor.BUILD"),
     )
 
     tf_http_archive(
         name = "absl_py",
-        urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz",
-            "https://github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz",
-        ],
         sha256 = "95160f778a62c7a60ddeadc7bf2d83f85a23a27359814aca12cf949e896fa82c",
         strip_prefix = "abseil-py-pypi-v0.2.2",
         system_build_file = clean_dep("//third_party/systemlibs:absl_py.BUILD"),
@@ -356,17 +352,21 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
             "//third_party/systemlibs:absl_py.absl.flags.BUILD": "absl/flags/BUILD",
             "//third_party/systemlibs:absl_py.absl.testing.BUILD": "absl/testing/BUILD",
         },
+        urls = [
+            "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz",
+            "https://github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz",
+        ],
     )
 
     tf_http_archive(
         name = "org_python_pypi_backports_weakref",
+        build_file = clean_dep("//third_party:backports_weakref.BUILD"),
+        sha256 = "8813bf712a66b3d8b85dc289e1104ed220f1878cf981e2fe756dfaabe9a82892",
+        strip_prefix = "backports.weakref-1.0rc1/src",
         urls = [
             "https://mirror.bazel.build/pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda83e264fa9c5c1c98ceabd81/backports.weakref-1.0rc1.tar.gz",
             "https://pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda83e264fa9c5c1c98ceabd81/backports.weakref-1.0rc1.tar.gz",
         ],
-        sha256 = "8813bf712a66b3d8b85dc289e1104ed220f1878cf981e2fe756dfaabe9a82892",
-        strip_prefix = "backports.weakref-1.0rc1/src",
-        build_file = clean_dep("//third_party:backports_weakref.BUILD"),
     )
 
     filegroup_external(
@@ -389,9 +389,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "protobuf_archive",
-        urls = PROTOBUF_URLS,
         sha256 = PROTOBUF_SHA256,
         strip_prefix = PROTOBUF_STRIP_PREFIX,
+        urls = PROTOBUF_URLS,
     )
 
     # We need to import the protobuf library under the names com_google_protobuf
@@ -399,222 +399,222 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     # Unfortunately there is no way to alias http_archives at the moment.
     tf_http_archive(
         name = "com_google_protobuf",
-        urls = PROTOBUF_URLS,
         sha256 = PROTOBUF_SHA256,
         strip_prefix = PROTOBUF_STRIP_PREFIX,
+        urls = PROTOBUF_URLS,
     )
 
     tf_http_archive(
         name = "com_google_protobuf_cc",
-        urls = PROTOBUF_URLS,
         sha256 = PROTOBUF_SHA256,
         strip_prefix = PROTOBUF_STRIP_PREFIX,
+        urls = PROTOBUF_URLS,
     )
 
     tf_http_archive(
         name = "nsync",
+        sha256 = "692f9b30e219f71a6371b98edd39cef3cbda35ac3abc4cd99ce19db430a5591a",
+        strip_prefix = "nsync-1.20.1",
+        system_build_file = clean_dep("//third_party/systemlibs:nsync.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/google/nsync/archive/1.20.1.tar.gz",
             "https://github.com/google/nsync/archive/1.20.1.tar.gz",
         ],
-        sha256 = "692f9b30e219f71a6371b98edd39cef3cbda35ac3abc4cd99ce19db430a5591a",
-        strip_prefix = "nsync-1.20.1",
-        system_build_file = clean_dep("//third_party/systemlibs:nsync.BUILD"),
     )
 
     tf_http_archive(
         name = "com_google_googletest",
+        sha256 = "353ab86e35cea1cd386115279cf4b16695bbf21b897bfbf2721cf4cb5f64ade8",
+        strip_prefix = "googletest-997d343dd680e541ef96ce71ee54a91daf2577a0",
         urls = [
             "https://mirror.bazel.build/github.com/google/googletest/archive/997d343dd680e541ef96ce71ee54a91daf2577a0.zip",
             "https://github.com/google/googletest/archive/997d343dd680e541ef96ce71ee54a91daf2577a0.zip",
         ],
-        sha256 = "353ab86e35cea1cd386115279cf4b16695bbf21b897bfbf2721cf4cb5f64ade8",
-        strip_prefix = "googletest-997d343dd680e541ef96ce71ee54a91daf2577a0",
     )
 
     tf_http_archive(
         name = "com_github_gflags_gflags",
+        sha256 = "ae27cdbcd6a2f935baa78e4f21f675649271634c092b1be01469440495609d0e",
+        strip_prefix = "gflags-2.2.1",
         urls = [
             "https://mirror.bazel.build/github.com/gflags/gflags/archive/v2.2.1.tar.gz",
             "https://github.com/gflags/gflags/archive/v2.2.1.tar.gz",
         ],
-        sha256 = "ae27cdbcd6a2f935baa78e4f21f675649271634c092b1be01469440495609d0e",
-        strip_prefix = "gflags-2.2.1",
     )
 
     tf_http_archive(
         name = "pcre",
+        build_file = clean_dep("//third_party:pcre.BUILD"),
         sha256 = "69acbc2fbdefb955d42a4c606dfde800c2885711d2979e356c0636efde9ec3b5",
+        strip_prefix = "pcre-8.42",
+        system_build_file = clean_dep("//third_party/systemlibs:pcre.BUILD"),
         urls = [
             "https://mirror.bazel.build/ftp.exim.org/pub/pcre/pcre-8.42.tar.gz",
             "http://ftp.exim.org/pub/pcre/pcre-8.42.tar.gz",
         ],
-        strip_prefix = "pcre-8.42",
-        build_file = clean_dep("//third_party:pcre.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:pcre.BUILD"),
     )
 
     tf_http_archive(
         name = "swig",
+        build_file = clean_dep("//third_party:swig.BUILD"),
         sha256 = "58a475dbbd4a4d7075e5fe86d4e54c9edde39847cdb96a3053d87cb64a23a453",
+        strip_prefix = "swig-3.0.8",
+        system_build_file = clean_dep("//third_party/systemlibs:swig.BUILD"),
         urls = [
             "https://mirror.bazel.build/ufpr.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz",
             "http://ufpr.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz",
             "http://pilotfiber.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz",
         ],
-        strip_prefix = "swig-3.0.8",
-        build_file = clean_dep("//third_party:swig.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:swig.BUILD"),
     )
 
     tf_http_archive(
         name = "curl",
+        build_file = clean_dep("//third_party:curl.BUILD"),
         sha256 = "e9c37986337743f37fd14fe8737f246e97aec94b39d1b71e8a5973f72a9fc4f5",
+        strip_prefix = "curl-7.60.0",
+        system_build_file = clean_dep("//third_party/systemlibs:curl.BUILD"),
         urls = [
             "https://mirror.bazel.build/curl.haxx.se/download/curl-7.60.0.tar.gz",
             "https://curl.haxx.se/download/curl-7.60.0.tar.gz",
         ],
-        strip_prefix = "curl-7.60.0",
-        build_file = clean_dep("//third_party:curl.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:curl.BUILD"),
     )
 
     tf_http_archive(
         name = "grpc",
+        sha256 = "50db9cf2221354485eb7c3bd55a4c27190caef7048a2a1a15fbe60a498f98b44",
+        strip_prefix = "grpc-1.13.0",
+        system_build_file = clean_dep("//third_party/systemlibs:grpc.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/grpc/grpc/archive/v1.13.0.tar.gz",
             "https://github.com/grpc/grpc/archive/v1.13.0.tar.gz",
         ],
-        sha256 = "50db9cf2221354485eb7c3bd55a4c27190caef7048a2a1a15fbe60a498f98b44",
-        strip_prefix = "grpc-1.13.0",
-        system_build_file = clean_dep("//third_party/systemlibs:grpc.BUILD"),
     )
 
     tf_http_archive(
         name = "linenoise",
+        build_file = clean_dep("//third_party:linenoise.BUILD"),
         sha256 = "7f51f45887a3d31b4ce4fa5965210a5e64637ceac12720cfce7954d6a2e812f7",
+        strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3",
         urls = [
             "https://mirror.bazel.build/github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz",
             "https://github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz",
         ],
-        strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3",
-        build_file = clean_dep("//third_party:linenoise.BUILD"),
     )
 
     # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror.
     # Switch to an official source of snapshots if/when possible.
     tf_http_archive(
         name = "llvm",
+        build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
+        sha256 = "a4f8bfe7e3e69069934a87e612a1d4d3b8b6af13e0f1213a42a6046e1bcd50d8",
+        strip_prefix = "llvm-d3429e96fe1e45b1dc0106463832523f37faf271",
         urls = [
             "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz",
             "https://github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz",
         ],
-        sha256 = "a4f8bfe7e3e69069934a87e612a1d4d3b8b6af13e0f1213a42a6046e1bcd50d8",
-        strip_prefix = "llvm-d3429e96fe1e45b1dc0106463832523f37faf271",
-        build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
     )
 
     tf_http_archive(
         name = "lmdb",
+        build_file = clean_dep("//third_party:lmdb.BUILD"),
+        sha256 = "f3927859882eb608868c8c31586bb7eb84562a40a6bf5cc3e13b6b564641ea28",
+        strip_prefix = "lmdb-LMDB_0.9.22/libraries/liblmdb",
+        system_build_file = clean_dep("//third_party/systemlibs:lmdb.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/LMDB/lmdb/archive/LMDB_0.9.22.tar.gz",
             "https://github.com/LMDB/lmdb/archive/LMDB_0.9.22.tar.gz",
         ],
-        sha256 = "f3927859882eb608868c8c31586bb7eb84562a40a6bf5cc3e13b6b564641ea28",
-        strip_prefix = "lmdb-LMDB_0.9.22/libraries/liblmdb",
-        build_file = clean_dep("//third_party:lmdb.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:lmdb.BUILD"),
     )
 
     tf_http_archive(
         name = "jsoncpp_git",
+        build_file = clean_dep("//third_party:jsoncpp.BUILD"),
+        sha256 = "c49deac9e0933bcb7044f08516861a2d560988540b23de2ac1ad443b219afdb6",
+        strip_prefix = "jsoncpp-1.8.4",
+        system_build_file = clean_dep("//third_party/systemlibs:jsoncpp.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz",
             "https://github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz",
         ],
-        sha256 = "c49deac9e0933bcb7044f08516861a2d560988540b23de2ac1ad443b219afdb6",
-        strip_prefix = "jsoncpp-1.8.4",
-        build_file = clean_dep("//third_party:jsoncpp.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:jsoncpp.BUILD"),
     )
 
     tf_http_archive(
         name = "boringssl",
+        sha256 = "1188e29000013ed6517168600fc35a010d58c5d321846d6a6dfee74e4c788b45",
+        strip_prefix = "boringssl-7f634429a04abc48e2eb041c81c5235816c96514",
+        system_build_file = clean_dep("//third_party/systemlibs:boringssl.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz",
             "https://github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz",
         ],
-        sha256 = "1188e29000013ed6517168600fc35a010d58c5d321846d6a6dfee74e4c788b45",
-        strip_prefix = "boringssl-7f634429a04abc48e2eb041c81c5235816c96514",
-        system_build_file = clean_dep("//third_party/systemlibs:boringssl.BUILD"),
     )
 
     tf_http_archive(
         name = "zlib_archive",
+        build_file = clean_dep("//third_party:zlib.BUILD"),
+        sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1",
+        strip_prefix = "zlib-1.2.11",
+        system_build_file = clean_dep("//third_party/systemlibs:zlib.BUILD"),
         urls = [
             "https://mirror.bazel.build/zlib.net/zlib-1.2.11.tar.gz",
             "https://zlib.net/zlib-1.2.11.tar.gz",
         ],
-        sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1",
-        strip_prefix = "zlib-1.2.11",
-        build_file = clean_dep("//third_party:zlib.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:zlib.BUILD"),
     )
 
     tf_http_archive(
         name = "fft2d",
+        build_file = clean_dep("//third_party/fft2d:fft2d.BUILD"),
+        sha256 = "52bb637c70b971958ec79c9c8752b1df5ff0218a4db4510e60826e0cb79b5296",
         urls = [
             "https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz",
             "http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz",
         ],
-        sha256 = "52bb637c70b971958ec79c9c8752b1df5ff0218a4db4510e60826e0cb79b5296",
-        build_file = clean_dep("//third_party/fft2d:fft2d.BUILD"),
     )
 
     tf_http_archive(
         name = "snappy",
+        build_file = clean_dep("//third_party:snappy.BUILD"),
+        sha256 = "3dfa02e873ff51a11ee02b9ca391807f0c8ea0529a4924afa645fbf97163f9d4",
+        strip_prefix = "snappy-1.1.7",
+        system_build_file = clean_dep("//third_party/systemlibs:snappy.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.7.tar.gz",
             "https://github.com/google/snappy/archive/1.1.7.tar.gz",
         ],
-        sha256 = "3dfa02e873ff51a11ee02b9ca391807f0c8ea0529a4924afa645fbf97163f9d4",
-        strip_prefix = "snappy-1.1.7",
-        build_file = clean_dep("//third_party:snappy.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:snappy.BUILD"),
     )
 
     tf_http_archive(
         name = "nccl_archive",
+        build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"),
+        sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176",
+        strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7",
         urls = [
             "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
             "https://github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
         ],
-        sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176",
-        strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7",
-        build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"),
     )
 
     tf_http_archive(
         name = "kafka",
+        build_file = clean_dep("//third_party:kafka/BUILD"),
+        patch_file = clean_dep("//third_party/kafka:config.patch"),
+        sha256 = "cc6ebbcd0a826eec1b8ce1f625ffe71b53ef3290f8192b6cae38412a958f4fd3",
+        strip_prefix = "librdkafka-0.11.5",
         urls = [
             "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz",
             "https://github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz",
         ],
-        sha256 = "cc6ebbcd0a826eec1b8ce1f625ffe71b53ef3290f8192b6cae38412a958f4fd3",
-        strip_prefix = "librdkafka-0.11.5",
-        build_file = clean_dep("//third_party:kafka/BUILD"),
-        patch_file = clean_dep("//third_party/kafka:config.patch"),
     )
 
     tf_http_archive(
         name = "aws",
+        build_file = clean_dep("//third_party:aws.BUILD"),
+        sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
+        strip_prefix = "aws-sdk-cpp-1.3.15",
         urls = [
             "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
             "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
         ],
-        sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
-        strip_prefix = "aws-sdk-cpp-1.3.15",
-        build_file = clean_dep("//third_party:aws.BUILD"),
     )
 
     java_import_external(
@@ -644,14 +644,14 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "jemalloc",
+        build_file = clean_dep("//third_party:jemalloc.BUILD"),
+        sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8",
+        strip_prefix = "jemalloc-4.4.0",
+        system_build_file = clean_dep("//third_party/systemlibs:jemalloc.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
             "https://github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
         ],
-        sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8",
-        strip_prefix = "jemalloc-4.4.0",
-        build_file = clean_dep("//third_party:jemalloc.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:jemalloc.BUILD"),
     )
 
     java_import_external(
@@ -700,196 +700,196 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "com_google_pprof",
+        build_file = clean_dep("//third_party:pprof.BUILD"),
+        sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4",
+        strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650",
         urls = [
             "https://mirror.bazel.build/github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz",
             "https://github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz",
         ],
-        sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4",
-        strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650",
-        build_file = clean_dep("//third_party:pprof.BUILD"),
     )
 
     tf_http_archive(
         name = "cub_archive",
+        build_file = clean_dep("//third_party:cub.BUILD"),
+        sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3",
+        strip_prefix = "cub-1.8.0",
         urls = [
             "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip",
             "https://github.com/NVlabs/cub/archive/1.8.0.zip",
         ],
-        sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3",
-        strip_prefix = "cub-1.8.0",
-        build_file = clean_dep("//third_party:cub.BUILD"),
     )
 
     tf_http_archive(
         name = "cython",
+        build_file = clean_dep("//third_party:cython.BUILD"),
+        delete = ["BUILD.bazel"],
         sha256 = "bccc9aa050ea02595b2440188813b936eaf345e85fb9692790cecfe095cf91aa",
+        strip_prefix = "cython-0.28.4",
+        system_build_file = clean_dep("//third_party/systemlibs:cython.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/cython/cython/archive/0.28.4.tar.gz",
             "https://github.com/cython/cython/archive/0.28.4.tar.gz",
         ],
-        strip_prefix = "cython-0.28.4",
-        build_file = clean_dep("//third_party:cython.BUILD"),
-        delete = ["BUILD.bazel"],
-        system_build_file = clean_dep("//third_party/systemlibs:cython.BUILD"),
     )
 
     tf_http_archive(
         name = "bazel_toolchains",
+        sha256 = "3b604699685c5c65dd3f6f17425570a4b2f00ddba2f750db15acc72e55bb098b",
+        strip_prefix = "bazel-toolchains-37acf1841ab1475c98a152cb9e446460c8ae29e1",
         urls = [
             "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz",
             "https://github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz",
         ],
-        strip_prefix = "bazel-toolchains-37acf1841ab1475c98a152cb9e446460c8ae29e1",
-        sha256 = "3b604699685c5c65dd3f6f17425570a4b2f00ddba2f750db15acc72e55bb098b",
     )
 
     tf_http_archive(
         name = "arm_neon_2_x86_sse",
+        build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"),
         sha256 = "c8d90aa4357f8079d427e87a6f4c493da1fa4140aee926c05902d7ec1533d9a5",
         strip_prefix = "ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d",
         urls = [
             "https://mirror.bazel.build/github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz",
             "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz",
         ],
-        build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"),
     )
 
     tf_http_archive(
         name = "double_conversion",
+        build_file = clean_dep("//third_party:double_conversion.BUILD"),
+        sha256 = "2f7fbffac0d98d201ad0586f686034371a6d152ca67508ab611adc2386ad30de",
+        strip_prefix = "double-conversion-3992066a95b823efc8ccc1baf82a1cfc73f6e9b8",
+        system_build_file = clean_dep("//third_party/systemlibs:double_conversion.BUILD"),
         urls = [
             "https://mirror.bazel.build/github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip",
             "https://github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip",
         ],
-        sha256 = "2f7fbffac0d98d201ad0586f686034371a6d152ca67508ab611adc2386ad30de",
-        strip_prefix = "double-conversion-3992066a95b823efc8ccc1baf82a1cfc73f6e9b8",
-        build_file = clean_dep("//third_party:double_conversion.BUILD"),
-        system_build_file = clean_dep("//third_party/systemlibs:double_conversion.BUILD"),
     )
 
     tf_http_archive(
         name = "tflite_mobilenet",
+        build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"),
         sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b",
         urls = [
             "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
             "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
         ],
-        build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"),
     )
 
     tf_http_archive(
         name = "tflite_mobilenet_ssd",
+        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
         sha256 = "767057f2837a46d97882734b03428e8dd640b93236052b312b2f0e45613c1cf0",
         urls = [
             "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_ssd_tflite_v1.zip",
             "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_ssd_tflite_v1.zip",
         ],
-        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
     )
 
     tf_http_archive(
         name = "tflite_mobilenet_ssd_quant",
+        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
         sha256 = "a809cd290b4d6a2e8a9d5dad076e0bd695b8091974e0eed1052b480b2f21b6dc",
         urls = [
             "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_0.75_quant_2018_06_29.zip",
             "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_0.75_quant_2018_06_29.zip",
         ],
-        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
     )
 
     tf_http_archive(
         name = "tflite_mobilenet_ssd_quant_protobuf",
+        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
         sha256 = "09280972c5777f1aa775ef67cb4ac5d5ed21970acd8535aeca62450ef14f0d79",
+        strip_prefix = "ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18",
         urls = [
             "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz",
             "http://storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz",
         ],
-        strip_prefix = "ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18",
-        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
     )
 
     tf_http_archive(
         name = "tflite_conv_actions_frozen",
+        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
         sha256 = "d947b38cba389b5e2d0bfc3ea6cc49c784e187b41a071387b3742d1acac7691e",
         urls = [
             "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/conv_actions_tflite.zip",
             "https://storage.googleapis.com/download.tensorflow.org/models/tflite/conv_actions_tflite.zip",
         ],
-        build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
     )
 
     tf_http_archive(
         name = "tflite_smartreply",
+        build_file = clean_dep("//third_party:tflite_smartreply.BUILD"),
         sha256 = "8980151b85a87a9c1a3bb1ed4748119e4a85abd3cb5744d83da4d4bd0fbeef7c",
         urls = [
             "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip",
             "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip",
         ],
-        build_file = clean_dep("//third_party:tflite_smartreply.BUILD"),
     )
 
     tf_http_archive(
         name = "tflite_ovic_testdata",
+        build_file = clean_dep("//third_party:tflite_ovic_testdata.BUILD"),
         sha256 = "a9a705d8d519220178e2e65d383fdb21da37fdb31d1e909b0a1acdac46479e9c",
+        strip_prefix = "ovic",
         urls = [
             "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/data/ovic.zip",
             "https://storage.googleapis.com/download.tensorflow.org/data/ovic.zip",
         ],
-        build_file = clean_dep("//third_party:tflite_ovic_testdata.BUILD"),
-        strip_prefix = "ovic",
     )
 
     tf_http_archive(
         name = "build_bazel_rules_android",
         sha256 = "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806",
+        strip_prefix = "rules_android-0.1.1",
         urls = [
             "https://mirror.bazel.build/github.com/bazelbuild/rules_android/archive/v0.1.1.zip",
             "https://github.com/bazelbuild/rules_android/archive/v0.1.1.zip",
         ],
-        strip_prefix = "rules_android-0.1.1",
     )
 
     tf_http_archive(
         name = "tbb",
+        build_file = clean_dep("//third_party/ngraph:tbb.BUILD"),
+        sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13",
+        strip_prefix = "tbb-tbb_2018",
         urls = [
             "https://mirror.bazel.build/github.com/01org/tbb/archive/tbb_2018.zip",
             "https://github.com/01org/tbb/archive/tbb_2018.zip",
         ],
-        sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13",
-        strip_prefix = "tbb-tbb_2018",
-        build_file = clean_dep("//third_party/ngraph:tbb.BUILD"),
     )
 
     tf_http_archive(
         name = "ngraph",
+        build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
+        sha256 = "bf9dcc88e5c66021e3aac80491a231711211540d613bf9b6bd28db3f5bb86b62",
+        strip_prefix = "ngraph-0.8.1",
         urls = [
             "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz",
             "https://github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz",
         ],
-        sha256 = "bf9dcc88e5c66021e3aac80491a231711211540d613bf9b6bd28db3f5bb86b62",
-        strip_prefix = "ngraph-0.8.1",
-        build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
     )
 
     tf_http_archive(
         name = "nlohmann_json_lib",
+        build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"),
+        sha256 = "9f3549824af3ca7e9707a2503959886362801fb4926b869789d6929098a79e47",
+        strip_prefix = "json-3.1.1",
         urls = [
             "https://mirror.bazel.build/github.com/nlohmann/json/archive/v3.1.1.tar.gz",
             "https://github.com/nlohmann/json/archive/v3.1.1.tar.gz",
         ],
-        sha256 = "9f3549824af3ca7e9707a2503959886362801fb4926b869789d6929098a79e47",
-        strip_prefix = "json-3.1.1",
-        build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"),
     )
 
     tf_http_archive(
         name = "ngraph_tf",
+        build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
+        sha256 = "402f84c748c113780a60f35f39aab118435285543aee4900d712b76fbf8a21ee",
+        strip_prefix = "ngraph-tf-0.6.1",
         urls = [
             "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz",
             "https://github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz",
         ],
-        sha256 = "402f84c748c113780a60f35f39aab118435285543aee4900d712b76fbf8a21ee",
-        strip_prefix = "ngraph-tf-0.6.1",
-        build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
     )
 
     ##############################################################################
-- 
GitLab


From d56c298f1ef14b5a738e1e0b7bbc66fcd736be3e Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 27 Sep 2018 21:57:48 -0700
Subject: [PATCH 0845/1357] Remove AWS, GCP, Kafka and HDFS options from
 configure. Make them default on for supported operating systems.

PiperOrigin-RevId: 214886845
---
 configure.py                                  | 12 ---
 tensorflow/BUILD                              | 96 -------------------
 tensorflow/contrib/BUILD                      | 50 +++-------
 .../core/platform/default/build_config.bzl    | 45 ++++-----
 tensorflow/tools/lib_package/BUILD            | 38 ++++----
 tensorflow/tools/pip_package/BUILD            | 27 ++----
 6 files changed, 55 insertions(+), 213 deletions(-)

diff --git a/configure.py b/configure.py
index f71caa1994..55fce8b93b 100644
--- a/configure.py
+++ b/configure.py
@@ -1488,11 +1488,7 @@ def main():
   setup_python(environ_cp)
 
   if is_windows():
-    environ_cp['TF_NEED_AWS'] = '0'
-    environ_cp['TF_NEED_GCP'] = '0'
-    environ_cp['TF_NEED_HDFS'] = '0'
     environ_cp['TF_NEED_JEMALLOC'] = '0'
-    environ_cp['TF_NEED_KAFKA'] = '0'
     environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
     environ_cp['TF_NEED_COMPUTECPP'] = '0'
     environ_cp['TF_NEED_OPENCL'] = '0'
@@ -1518,14 +1514,6 @@ def main():
 
   set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc',
                 'with_jemalloc', True)
-  set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform',
-                'with_gcp_support', True, 'gcp')
-  set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System',
-                'with_hdfs_support', True, 'hdfs')
-  set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform',
-                'with_aws_support', True, 'aws')
-  set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform',
-                'with_kafka_support', True, 'kafka')
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
                 False, 'xla')
 
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 3610eea42a..5f73da68a2 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -224,60 +224,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-config_setting(
-    name = "with_gcp_support",
-    define_values = {"with_gcp_support": "true"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_hdfs_support",
-    define_values = {"with_hdfs_support": "true"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_aws_support",
-    define_values = {"with_aws_support": "true"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_kafka_support",
-    define_values = {"with_kafka_support": "true"},
-    visibility = ["//visibility:public"],
-)
-
-# Crosses between platforms and file system libraries not supported on those
-# platforms due to limitations in nested select() statements.
-config_setting(
-    name = "with_gcp_support_windows_override",
-    define_values = {"with_gcp_support": "true"},
-    values = {"cpu": "x64_windows"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_hdfs_support_windows_override",
-    define_values = {"with_hdfs_support": "true"},
-    values = {"cpu": "x64_windows"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_aws_support_windows_override",
-    define_values = {"with_aws_support": "true"},
-    values = {"cpu": "x64_windows"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_kafka_support_windows_override",
-    define_values = {"with_kafka_support": "true"},
-    values = {"cpu": "x64_windows"},
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "with_cuda_support_windows_override",
     define_values = {"using_cuda_nvcc": "true"},
@@ -285,48 +231,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-config_setting(
-    name = "with_gcp_support_android_override",
-    define_values = {"with_gcp_support": "true"},
-    values = {"crosstool_top": "//external:android/crosstool"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_hdfs_support_android_override",
-    define_values = {"with_hdfs_support": "true"},
-    values = {"crosstool_top": "//external:android/crosstool"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_aws_support_android_override",
-    define_values = {"with_aws_support": "true"},
-    values = {"crosstool_top": "//external:android/crosstool"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_gcp_support_ios_override",
-    define_values = {"with_gcp_support": "true"},
-    values = {"crosstool_top": "//tools/osx/crosstool:crosstool"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_hdfs_support_ios_override",
-    define_values = {"with_hdfs_support": "true"},
-    values = {"crosstool_top": "//tools/osx/crosstool:crosstool"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_aws_support_ios_override",
-    define_values = {"with_aws_support": "true"},
-    values = {"crosstool_top": "//tools/osx/crosstool:crosstool"},
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "with_xla_support",
     define_values = {"with_xla_support": "true"},
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index ae5ca32bcf..1a9ae8ac3a 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -112,26 +112,14 @@ py_library(
         "//tensorflow/python:util",
         "//tensorflow/python/estimator:estimator_py",
     ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + select({
-        "//tensorflow:with_kafka_support_windows_override": [],
-        "//tensorflow:with_kafka_support": [
-            "//tensorflow/contrib/kafka",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_aws_support_windows_override": [],
-        "//tensorflow:with_aws_support": [
-            "//tensorflow/contrib/kinesis",
-        ],
-        "//conditions:default": [],
-    }) + if_not_windows_cuda([
-        "//tensorflow/contrib/fused_conv:fused_conv_py",  # unresolved symbols, need to export more symbols
-    ]) + if_not_windows([
-    ]) + select({
         "//tensorflow:linux_s390x": [],
         "//tensorflow:windows": [],
         "//conditions:default": [
             "//tensorflow/contrib/bigtable",
             "//tensorflow/contrib/cloud:cloud_py",
+            "//tensorflow/contrib/fused_conv:fused_conv_py",  # unresolved symbols, need to export more symbols
+            "//tensorflow/contrib/kafka",
+            "//tensorflow/contrib/kinesis",
             "//tensorflow/contrib/tensorrt:init_py",
             "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
         ],
@@ -159,20 +147,14 @@ cc_library(
     ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([
         "//tensorflow/contrib/nccl:nccl_kernels",
     ]) + select({
-        "//tensorflow:with_kafka_support_windows_override": [],
-        "//tensorflow:with_kafka_support": [
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": [
             "//tensorflow/contrib/kafka:dataset_kernels",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_aws_support_windows_override": [],
-        "//tensorflow:with_aws_support": [
             "//tensorflow/contrib/kinesis:dataset_kernels",
+            "//tensorflow/contrib/tensorrt:trt_engine_op_kernel",
         ],
-        "//conditions:default": [],
-    }) + if_not_windows([
-        "//tensorflow/contrib/tensorrt:trt_engine_op_kernel",
-    ]),
+    }),
 )
 
 cc_library(
@@ -198,18 +180,12 @@ cc_library(
         "//tensorflow/contrib/text:all_ops",
         "//tensorflow/contrib/tpu:all_ops",
     ] + select({
-        "//tensorflow:with_kafka_support_windows_override": [],
-        "//tensorflow:with_kafka_support": [
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": [
             "//tensorflow/contrib/kafka:dataset_ops_op_lib",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_aws_support_windows_override": [],
-        "//tensorflow:with_aws_support": [
             "//tensorflow/contrib/kinesis:dataset_ops_op_lib",
+            "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib",
         ],
-        "//conditions:default": [],
-    }) + if_not_windows([
-        "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib",
-    ]),
+    }),
 )
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index bb841aeab7..3b14757945 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -641,54 +641,41 @@ def tf_additional_lib_deps():
 
 def tf_additional_core_deps():
     return select({
-        "//tensorflow:with_gcp_support_android_override": [],
-        "//tensorflow:with_gcp_support_ios_override": [],
-        "//tensorflow:with_gcp_support": [
+        "//tensorflow:android": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//conditions:default": [
             "//tensorflow/core/platform/cloud:gcs_file_system",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_hdfs_support_windows_override": [],
-        "//tensorflow:with_hdfs_support_android_override": [],
-        "//tensorflow:with_hdfs_support_ios_override": [],
-        "//tensorflow:with_hdfs_support": [
-            "//tensorflow/core/platform/hadoop:hadoop_file_system",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_aws_support_windows_override": [],
-        "//tensorflow:with_aws_support_android_override": [],
-        "//tensorflow:with_aws_support_ios_override": [],
-        "//tensorflow:with_aws_support": [
             "//tensorflow/core/platform/s3:s3_file_system",
+            "//tensorflow/core/platform/hadoop:hadoop_file_system",
         ],
-        "//conditions:default": [],
     })
 
 # TODO(jart, jhseu): Delete when GCP is default on.
 def tf_additional_cloud_op_deps():
     return select({
-        "//tensorflow:with_gcp_support_windows_override": [],
-        "//tensorflow:with_gcp_support_android_override": [],
-        "//tensorflow:with_gcp_support_ios_override": [],
-        "//tensorflow:with_gcp_support": [
+        "//tensorflow:android": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//conditions:default": [
             "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib",
             "//tensorflow/contrib/cloud:gcs_config_ops_op_lib",
         ],
-        "//conditions:default": [],
     })
 
 # TODO(jart, jhseu): Delete when GCP is default on.
 def tf_additional_cloud_kernel_deps():
     return select({
-        "//tensorflow:with_gcp_support_windows_override": [],
-        "//tensorflow:with_gcp_support_android_override": [],
-        "//tensorflow:with_gcp_support_ios_override": [],
-        "//tensorflow:with_gcp_support": [
+        "//tensorflow:android": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//conditions:default": [
             "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops",
             "//tensorflow/contrib/cloud/kernels:gcs_config_ops",
         ],
-        "//conditions:default": [],
     })
 
 def tf_lib_proto_parsing_deps():
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index 095ac1f4cc..b9f4902639 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -137,16 +137,6 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
-        "//tensorflow:with_aws_support": [
-            "@aws//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_gcp_support": [
-            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow:with_jemalloc_linux_x86_64": [
             "@jemalloc//:COPYING",
         ],
@@ -171,7 +161,14 @@ genrule(
             "@grpc//third_party/nanopb:LICENSE.txt",
             "@grpc//third_party/address_sorting:LICENSE",
         ],
-    ),
+    ) + select({
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": [
+            "@aws//:LICENSE",
+            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
+        ],
+    }),
     outs = ["include/tensorflow/c/LICENSE"],
     cmd = "$(location :concat_licenses.sh) $(SRCS) >$@",
     tools = [":concat_licenses.sh"],
@@ -205,16 +202,6 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
-        "//tensorflow:with_aws_support": [
-            "@aws//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_gcp_support": [
-            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow:with_jemalloc_linux_x86_64": [
             "@jemalloc//:COPYING",
         ],
@@ -232,7 +219,14 @@ genrule(
     ]) + if_mkl([
         "//third_party/mkl:LICENSE",
         "//third_party/mkl_dnn:LICENSE",
-    ]),
+    ]) + select({
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": [
+            "@aws//:LICENSE",
+            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
+        ],
+    }),
     outs = ["include/tensorflow/jni/LICENSE"],
     cmd = "$(location :concat_licenses.sh) $(SRCS) >$@",
     tools = [":concat_licenses.sh"],
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index cce60ccea0..f1de22300b 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -169,17 +169,6 @@ filegroup(
         "@zlib_archive//:zlib.h",
         "@org_python_pypi_backports_weakref//:LICENSE",
     ] + select({
-        "//tensorflow:with_aws_support": [
-            "@aws//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_gcp_support": [
-            "@com_github_googleapis_googleapis//:LICENSE",
-            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow:with_jemalloc_linux_x86_64": [
             "@jemalloc//:COPYING",
         ],
@@ -187,11 +176,6 @@ filegroup(
             "@jemalloc//:COPYING",
         ],
         "//conditions:default": [],
-    }) + select({
-        "//tensorflow:with_kafka_support": [
-            "@kafka//:LICENSE",
-        ],
-        "//conditions:default": [],
     }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
@@ -215,7 +199,16 @@ filegroup(
         "@ngraph_tf//:LICENSE",
         "@nlohmann_json_lib//:LICENSE.MIT",
         "@tbb//:LICENSE",
-    ]) + tf_additional_license_deps(),
+    ]) + tf_additional_license_deps() + select({
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": [
+            "@aws//:LICENSE",
+            "@com_github_googleapis_googleapis//:LICENSE",
+            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
+            "@kafka//:LICENSE",
+        ],
+    }),
 )
 
 sh_binary(
-- 
GitLab


From 6ebe9baae06c06d0a70a424a55c78f5af07b49f7 Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Thu, 27 Sep 2018 22:57:39 -0700
Subject: [PATCH 0846/1357] Fix error that occurs when attempting to use
 TensorFlow optimizers with Keras and DistributionStrategy

PiperOrigin-RevId: 214890580
---
 .../contrib/distribute/python/combinations.py |   3 +
 .../contrib/distribute/python/keras_test.py   | 121 ++++---
 tensorflow/python/keras/engine/training.py    |   3 +-
 .../keras/engine/training_distributed.py      | 341 +++++++++---------
 4 files changed, 240 insertions(+), 228 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py
index 244d1fcec8..82ca041cc2 100644
--- a/tensorflow/contrib/distribute/python/combinations.py
+++ b/tensorflow/contrib/distribute/python/combinations.py
@@ -59,6 +59,7 @@ from tensorflow.python.training import adagrad
 from tensorflow.python.training import adam
 from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import rmsprop
 from tensorflow.python.util import tf_inspect
 
 
@@ -354,6 +355,8 @@ gradient_descent_optimizer_v1_fn = NamedObject(
     "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2))
 adagrad_optimizer_v1_fn = NamedObject(
     "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
+rmsprop_optimizer_v1_fn = NamedObject(
+    "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001))
 optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn,
                  adagrad_optimizer_v1_fn]
 
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index a0b8bde132..3aab2c521f 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -173,13 +173,42 @@ def batch_wrapper(dataset, batch_size, distribution):
     return dataset.batch(batch_size)
 
 
-def all_combinations():
+def get_model():
+  x = keras.layers.Input(shape=(3,), name='input')
+  y = keras.layers.Dense(4, name='dense')(x)
+  model = keras.Model(x, y)
+  return model
+
+
+def get_dataset(distribution):
+  inputs = np.zeros((10, 3), dtype=np.float32)
+  targets = np.zeros((10, 4), dtype=np.float32)
+  dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+  dataset = dataset.repeat(100)
+  dataset = batch_wrapper(dataset, 10, distribution)
+  return dataset
+
+
+strategies = [combinations.default_strategy,
+              combinations.one_device_strategy,
+              combinations.mirrored_strategy_with_gpu_and_cpu,
+              combinations.mirrored_strategy_with_two_gpus,
+              combinations.tpu_strategy_one_step]
+
+
+def strategy_combinations():
   return combinations.combine(
-      distribution=[combinations.default_strategy,
-                    combinations.one_device_strategy,
-                    combinations.mirrored_strategy_with_gpu_and_cpu,
-                    combinations.mirrored_strategy_with_two_gpus,
-                    combinations.tpu_strategy_one_step],
+      distribution=strategies,
+      mode=['graph'])
+
+
+def strategy_and_optimizer_combinations():
+  return combinations.combine(
+      distribution=strategies,
+      optimizer=[combinations.adagrad_optimizer_v1_fn,
+                 combinations.adam_optimizer_v1_fn,
+                 combinations.gradient_descent_optimizer_v1_fn,
+                 combinations.rmsprop_optimizer_v1_fn],
       mode=['graph'])
 
 
@@ -360,9 +389,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
   def test_calling_model_with_numpy_arrays(self):
     with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
+      model = get_model()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
@@ -392,23 +419,17 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       # with batch_size
       model.predict(inputs, batch_size=8)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(strategy_combinations())
   def test_calling_model_on_same_dataset(self, distribution):
     with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
+      model = get_model()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
       metrics = ['mae', keras.metrics.CategoricalAccuracy()]
       model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
 
-      inputs = np.zeros((10, 3), dtype=np.float32)
-      targets = np.zeros((10, 4), dtype=np.float32)
-      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.repeat(100)
-      dataset = batch_wrapper(dataset, 10, distribution)
+      dataset = get_dataset(distribution)
 
       # Call fit with validation data
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
@@ -461,23 +482,17 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(strategy_combinations())
   def test_fit_eval_and_predict_methods_on_dataset(self, distribution):
     with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
+      model = get_model()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
       metrics = ['mae', keras.metrics.CategoricalAccuracy()]
       model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
 
-      inputs = np.zeros((10, 3), dtype=np.float32)
-      targets = np.zeros((10, 4), dtype=np.float32)
-      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.repeat(100)
-      dataset = batch_wrapper(dataset, 10, distribution)
+      dataset = get_dataset(distribution)
 
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
       model.evaluate(dataset, steps=2, verbose=1)
@@ -486,11 +501,23 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                 validation_data=dataset, validation_steps=2)
 
+  @combinations.generate(strategy_and_optimizer_combinations())
+  def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer):
+    with self.cached_session():
+      model = get_model()
+
+      loss = 'mse'
+      model.compile(optimizer(), loss, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
+      model.evaluate(dataset, steps=2, verbose=1)
+      model.predict(dataset, steps=2)
+
   def test_unsupported_features(self):
     with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
+      model = get_model()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
@@ -500,11 +527,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
 
-      inputs = np.zeros((10, 3), dtype=np.float32)
-      targets = np.zeros((10, 4), dtype=np.float32)
-      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.repeat(100)
-      dataset = dataset.batch(10)
+      dataset = get_dataset(strategy)
 
       # Test with validation split
       with self.assertRaisesRegexp(
@@ -541,9 +564,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
   def test_calling_with_unsupported_predefined_callbacks(self):
     with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
+      model = get_model()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
@@ -552,11 +573,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
                                                      '/device:GPU:0'])
       model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
 
-      inputs = np.zeros((10, 3), dtype=np.float32)
-      targets = np.zeros((10, 4), dtype=np.float32)
-      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.repeat(100)
-      dataset = dataset.batch(10)
+      dataset = get_dataset(strategy)
 
       def schedule(_):
         return 0.001
@@ -580,9 +597,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
   def test_dataset_input_shape_validation(self):
     with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
+      model = get_model()
 
       optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
       loss = 'mse'
@@ -616,17 +631,13 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       mode=['graph']))
   def test_dataset_input_shape_fully_defined(self, distribution):
     with self.cached_session():
-      x = keras.layers.Input(shape=(3,), name='input')
-      y = keras.layers.Dense(4, name='dense')(x)
-      model = keras.Model(x, y)
+      model = get_model()
 
       optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
       loss = 'mse'
       model.compile(optimizer, loss, distribute=distribution)
 
-      inputs = np.zeros((10, 3), dtype=np.float32)
-      targets = np.zeros((10, 4), dtype=np.float32)
-      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+      dataset = get_dataset(distribution)
       # Input shapes are not fully known. Batch dimension is unknown as we are
       # not using the drop_remainder argument.
       dataset = dataset.repeat(100).batch(10)
@@ -698,7 +709,7 @@ class LossMaskingWithDistributionStrategyTest(test.TestCase):
 class NormalizationLayerWithDistributionStrategyTest(
     test.TestCase, parameterized.TestCase):
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(strategy_combinations())
   def test_batchnorm_correctness(self, distribution):
     with self.cached_session():
       model = keras.models.Sequential()
@@ -726,7 +737,7 @@ class NormalizationLayerWithDistributionStrategyTest(
 class CorrectnessWithDistributionStrategyTest(test.TestCase,
                                               parameterized.TestCase):
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(strategy_combinations())
   def test_metric_correctness(self, distribution):
     with self.cached_session():
       keras.backend.set_image_data_format('channels_last')
@@ -756,7 +767,7 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase,
       history = model.fit(x=train_dataset, epochs=1, steps_per_epoch=10)
       self.assertEqual(history.history['binary_accuracy'], [1.0])
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(strategy_combinations())
   def test_correctness(self, distribution):
     with self.cached_session():
       keras.backend.set_image_data_format('channels_last')
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 46bffd7068..5091cac836 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -851,7 +851,8 @@ class Model(Network):
     # able to clone a Dataset on multiple workers we can remove this lambda.
     result = self._distribution_strategy.distribute_dataset(lambda: x)
     iterator = result.make_initializable_iterator()
-    K.get_session().run(iterator.initializer)
+    with self._distribution_strategy.scope():
+      K.get_session().run(iterator.initializer)
 
     training_utils.validate_iterator_input(x, y, sample_weight,
                                            validation_split)
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 1b64f904d5..a6470458d2 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -112,100 +112,99 @@ def fit_loop(
     dataset_targets = distributed_training_utils.flatten_perdevice_values(
         current_strategy, targets)
 
-  # Create a train function that is composed of all the parameters above.
-  distributed_train_function = K.Function(
-      all_inputs, all_outputs,
-      updates=all_updates,
-      name='distributed_train_function',
-      **all_session_args)
-
-  # We need to set sample_weights to None since there are sample weight
-  # placeholders that are created with default values.
-  sample_weights = [None for _ in range(len(model.outputs) *
-                                        current_strategy.num_towers)]
-  if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
-    ins = dataset_inputs + dataset_targets + sample_weights + [1]
-  else:
-    ins = dataset_inputs + dataset_targets
+    # Create a train function that is composed of all the parameters above.
+    distributed_train_function = K.Function(
+        all_inputs, all_outputs,
+        updates=all_updates,
+        name='distributed_train_function',
+        **all_session_args)
+
+    # We need to set sample_weights to None since there are sample weight
+    # placeholders that are created with default values.
+    sample_weights = [None for _ in range(len(model.outputs) *
+                                          current_strategy.num_towers)]
+    if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
+      ins = dataset_inputs + dataset_targets + sample_weights + [1]
+    else:
+      ins = dataset_inputs + dataset_targets
 
-  do_validation = False
-  if validation_steps:
-    do_validation = True
+    do_validation = False
+    if validation_steps:
+      do_validation = True
 
-  # Copy the weights from the original model to each of the replicated models.
-  orig_model_weights = model.get_weights()
-  with current_strategy.scope():
+    # Copy the weights from the original model to each of the replicated models.
+    orig_model_weights = model.get_weights()
     distributed_model = current_strategy.unwrap(model._grouped_model)[0]
     distributed_training_utils.set_weights(
         current_strategy, distributed_model, orig_model_weights)
 
-  callbacks = cbks.configure_callbacks(
-      callbacks,
-      model,
-      do_validation=do_validation,
-      val_inputs=None,
-      val_targets=None,
-      epochs=epochs,
-      steps_per_epoch=steps_per_epoch,
-      verbose=verbose)
-  out_labels = model.metrics_names or []
-  callbacks.on_train_begin()
-
-  assert steps_per_epoch is not None
-
-  for epoch in range(initial_epoch, epochs):
-    # Reset stateful metrics
-    for m in model.stateful_metric_functions:
-      m.reset_states()
-    callbacks.on_epoch_begin(epoch)
-    epoch_logs = {}
-    for step_index in range(steps_per_epoch):
-      batch_logs = {'batch': step_index, 'size': 1}
-      callbacks.on_batch_begin(step_index, batch_logs)
-      try:
-        outs = distributed_train_function(ins)
-      except errors.OutOfRangeError:
-        logging.warning('Your dataset iterator ran out of data; '
-                        'interrupting training. Make sure that your dataset '
-                        'can generate at least `steps_per_epoch * epochs` '
-                        'batches (in this case, %d batches).' %
-                        steps_per_epoch * epochs)
-        break
-
-      if not isinstance(outs, list):
-        outs = [outs]
-
-      outs = _aggregate_metrics_across_towers(current_strategy.num_towers,
-                                              out_labels,
-                                              model.stateful_metric_names, outs)
-      for l, o in zip(out_labels, outs):
-        batch_logs[l] = o
-      callbacks.on_batch_end(step_index, batch_logs)
+    callbacks = cbks.configure_callbacks(
+        callbacks,
+        model,
+        do_validation=do_validation,
+        val_inputs=None,
+        val_targets=None,
+        epochs=epochs,
+        steps_per_epoch=steps_per_epoch,
+        verbose=verbose)
+    out_labels = model.metrics_names or []
+    callbacks.on_train_begin()
+
+    assert steps_per_epoch is not None
+
+    for epoch in range(initial_epoch, epochs):
+      # Reset stateful metrics
+      for m in model.stateful_metric_functions:
+        m.reset_states()
+      callbacks.on_epoch_begin(epoch)
+      epoch_logs = {}
+      for step_index in range(steps_per_epoch):
+        batch_logs = {'batch': step_index, 'size': 1}
+        callbacks.on_batch_begin(step_index, batch_logs)
+        try:
+          outs = distributed_train_function(ins)
+        except errors.OutOfRangeError:
+          logging.warning('Your dataset iterator ran out of data; '
+                          'interrupting training. Make sure that your dataset '
+                          'can generate at least `steps_per_epoch * epochs` '
+                          'batches (in this case, %d batches).' %
+                          steps_per_epoch * epochs)
+          break
+
+        if not isinstance(outs, list):
+          outs = [outs]
+
+        outs = _aggregate_metrics_across_towers(current_strategy.num_towers,
+                                                out_labels,
+                                                model.stateful_metric_names,
+                                                outs)
+        for l, o in zip(out_labels, outs):
+          batch_logs[l] = o
+        callbacks.on_batch_end(step_index, batch_logs)
+        if callbacks.model.stop_training:
+          break
+      if do_validation:
+        val_outs = test_loop(
+            model,
+            val_iterator,
+            steps=validation_steps,
+            verbose=0)
+        if not isinstance(val_outs, list):
+          val_outs = [val_outs]
+        # Same labels assumed.
+        for l, o in zip(out_labels, val_outs):
+          epoch_logs['val_' + l] = o
+
+      callbacks.on_epoch_end(epoch, epoch_logs)
       if callbacks.model.stop_training:
         break
-    if do_validation:
-      val_outs = test_loop(
-          model,
-          val_iterator,
-          steps=validation_steps,
-          verbose=0)
-      if not isinstance(val_outs, list):
-        val_outs = [val_outs]
-      # Same labels assumed.
-      for l, o in zip(out_labels, val_outs):
-        epoch_logs['val_' + l] = o
+    callbacks.on_train_end()
 
-    callbacks.on_epoch_end(epoch, epoch_logs)
-    if callbacks.model.stop_training:
-      break
-  callbacks.on_train_end()
-
-  # Copy the weights back from the replicated model to the original model.
-  with current_strategy.scope():
+    # Copy the weights back from the replicated model to the original model.
     updated_weights = current_strategy.unwrap(
         model._grouped_model)[0].get_weights()
     model.set_weights(updated_weights)
-  return model.history
+    return model.history
 
 
 def _experimental_fit_loop(
@@ -427,66 +426,65 @@ def test_loop(model, iterator, verbose=0, steps=None):
     dataset_targets = distributed_training_utils.flatten_perdevice_values(
         current_strategy, targets)
 
-  distributed_test_function = K.Function(
-      all_inputs, all_outputs,
-      updates=all_updates,
-      name='distributed_test_function',
-      **all_session_args)
-
-  # We need to set sample_weights to None since there are sample weight
-  # placeholders that are created with default values.
-  sample_weights = [None for _ in range(len(model.outputs) *
-                                        current_strategy.num_towers)]
-  if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
-    ins = dataset_inputs + dataset_targets + sample_weights + [0]
-  else:
-    ins = dataset_inputs + dataset_targets
+    distributed_test_function = K.Function(
+        all_inputs, all_outputs,
+        updates=all_updates,
+        name='distributed_test_function',
+        **all_session_args)
 
-  for m in model.stateful_metric_functions:
-    m.reset_states()
-  stateful_metric_indices = [
-      i for i, name in enumerate(model.metrics_names)
-      if str(name) in model.stateful_metric_names
-  ]
+    # We need to set sample_weights to None since there are sample weight
+    # placeholders that are created with default values.
+    sample_weights = [None for _ in range(len(model.outputs) *
+                                          current_strategy.num_towers)]
+    if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
+      ins = dataset_inputs + dataset_targets + sample_weights + [0]
+    else:
+      ins = dataset_inputs + dataset_targets
 
-  outs = []
-  if verbose == 1:
-    progbar = Progbar(target=steps)
+    for m in model.stateful_metric_functions:
+      m.reset_states()
+    stateful_metric_indices = [
+        i for i, name in enumerate(model.metrics_names)
+        if str(name) in model.stateful_metric_names
+    ]
 
-  # Copy the weights from the original model to each of the replicated models.
-  orig_model_weights = model.get_weights()
-  with current_strategy.scope():
+    outs = []
+    if verbose == 1:
+      progbar = Progbar(target=steps)
+
+    # Copy the weights from the original model to each of the replicated models.
+    orig_model_weights = model.get_weights()
     distributed_model = current_strategy.unwrap(model._grouped_model)[0]
     distributed_training_utils.set_weights(
         current_strategy, distributed_model, orig_model_weights)
 
-  assert steps is not None
-  for step in range(steps):
-    batch_outs = distributed_test_function(ins)
-    batch_outs = _aggregate_metrics_across_towers(
-        current_strategy.num_towers, model.metrics_names,
-        model.stateful_metric_names, batch_outs)
-    if isinstance(batch_outs, list):
-      if step == 0:
-        outs = [0.] * len(batch_outs)
-      for i, batch_out in enumerate(batch_outs):
-        if i in stateful_metric_indices:
-          outs[i] = batch_out
-        else:
-          outs[i] += batch_out
-    else:
-      if step == 0:
-        outs.append(0.)
-      outs[0] += batch_outs
-    if verbose >= 1:
-      progbar.update(step + 1)
-  for i in range(len(outs)):
-    if i not in stateful_metric_indices:
-      outs[i] /= steps
+    assert steps is not None
+    for step in range(steps):
+      batch_outs = distributed_test_function(ins)
+      batch_outs = _aggregate_metrics_across_towers(
+          current_strategy.num_towers, model.metrics_names,
+          model.stateful_metric_names, batch_outs)
+      if isinstance(batch_outs, list):
+        if step == 0:
+          outs = [0.] * len(batch_outs)
+        for i, batch_out in enumerate(batch_outs):
+          if i in stateful_metric_indices:
+            outs[i] = batch_out
+          else:
+            outs[i] += batch_out
+      else:
+        if step == 0:
+          outs.append(0.)
+        outs[0] += batch_outs
+      if verbose >= 1:
+        progbar.update(step + 1)
+    for i in range(len(outs)):
+      if i not in stateful_metric_indices:
+        outs[i] /= steps
 
-  if len(outs) == 1:
-    return outs[0]
-  return outs
+    if len(outs) == 1:
+      return outs[0]
+    return outs
 
 
 def _experimental_test_loop(model, iterator, verbose=0, steps=None):
@@ -647,51 +645,50 @@ def predict_loop(model, iterator, verbose=0, steps=None):
     dataset_inputs = distributed_training_utils.flatten_perdevice_values(
         current_strategy, inputs)
 
-  distributed_predict_function = K.Function(
-      all_inputs, all_outputs,
-      updates=all_updates,
-      name='distributed_predict_function',
-      **all_session_args)
+    distributed_predict_function = K.Function(
+        all_inputs, all_outputs,
+        updates=all_updates,
+        name='distributed_predict_function',
+        **all_session_args)
 
-  if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
-    ins = dataset_inputs + [0]
-  else:
-    ins = dataset_inputs
+    if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
+      ins = dataset_inputs + [0]
+    else:
+      ins = dataset_inputs
 
-  if verbose == 1:
-    progbar = Progbar(target=steps)
+    if verbose == 1:
+      progbar = Progbar(target=steps)
 
-  # Copy the weights from the original model to each of the replicated models.
-  orig_model_weights = model.get_weights()
-  with current_strategy.scope():
+    # Copy the weights from the original model to each of the replicated models.
+    orig_model_weights = model.get_weights()
     distributed_model = current_strategy.unwrap(model._grouped_model)[0]
     distributed_training_utils.set_weights(
         current_strategy, distributed_model, orig_model_weights)
 
-  if steps is not None:
-    # Since we do not know how many samples we will see, we cannot pre-allocate
-    # the returned Numpy arrays. Instead, we store one array per batch seen
-    # and concatenate them upon returning.
-    unconcatenated_outs = []
-    for step in range(steps):
-      batch_outs = distributed_predict_function(ins)
-      if not isinstance(batch_outs, list):
-        batch_outs = [batch_outs]
-      if step == 0:
-        for _ in batch_outs:
-          unconcatenated_outs.append([])
-      # TODO(anjalisridhar): Should combine the outputs from multiple towers
-      # correctly here.
-      for i, batch_out in enumerate(batch_outs):
-        unconcatenated_outs[i].append(batch_out)
-      if verbose >= 1:
-        progbar.update(step + 1)
-    if len(unconcatenated_outs) == 1:
-      return np.concatenate(unconcatenated_outs[0], axis=0)
-    return [
-        np.concatenate(unconcatenated_outs[i], axis=0)
-        for i in range(len(unconcatenated_outs))
-    ]
+    if steps is not None:
+      # Since we do not know how many samples we will see, we cannot
+      # pre-allocate the returned Numpy arrays. Instead, we store one array per
+      # batch seen and concatenate them upon returning.
+      unconcatenated_outs = []
+      for step in range(steps):
+        batch_outs = distributed_predict_function(ins)
+        if not isinstance(batch_outs, list):
+          batch_outs = [batch_outs]
+        if step == 0:
+          for _ in batch_outs:
+            unconcatenated_outs.append([])
+        # TODO(anjalisridhar): Should combine the outputs from multiple towers
+        # correctly here.
+        for i, batch_out in enumerate(batch_outs):
+          unconcatenated_outs[i].append(batch_out)
+        if verbose >= 1:
+          progbar.update(step + 1)
+      if len(unconcatenated_outs) == 1:
+        return np.concatenate(unconcatenated_outs[0], axis=0)
+      return [
+          np.concatenate(unconcatenated_outs[i], axis=0)
+          for i in range(len(unconcatenated_outs))
+      ]
 
 
 def _experimental_predict_loop(model, iterator, verbose=0, steps=None):
-- 
GitLab


From fa8c1eabd06f3043be820bf476e8413818853f17 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 00:04:20 -0700
Subject: [PATCH 0847/1357] Internal

PiperOrigin-RevId: 214895147
---
 tensorflow/examples/android/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD
index f327b645f5..f5f0d7c3c8 100644
--- a/tensorflow/examples/android/BUILD
+++ b/tensorflow/examples/android/BUILD
@@ -68,6 +68,7 @@ android_binary(
     srcs = glob([
         "src/**/*.java",
     ]),
+    aapt_version = "aapt",
     # Package assets from assets dir as well as all model targets. Remove undesired models
     # (and corresponding Activities in source) to reduce APK size.
     assets = [
-- 
GitLab


From d0690d46466bf0393ad65544d1e8c55e948df133 Mon Sep 17 00:00:00 2001
From: EFanZh <efanzh@gmail.com>
Date: Fri, 28 Sep 2018 15:20:26 +0800
Subject: [PATCH 0848/1357] Fix some documentation errors

---
 tensorflow/contrib/distribute/python/mirrored_strategy.py | 5 +++--
 tensorflow/python/keras/engine/training.py                | 2 +-
 tensorflow/python/training/distribute.py                  | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 504f45a695..c0861da567 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -318,12 +318,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
   [TensorFlow's documentation](https://www.tensorflow.org/deploy/distributed).
   The distribution strategy inherits these concepts as well and in addition to
   that we also clarify several more concepts:
-    * **In-graph replication**: the `client` creates a single `tf.Graph` that
+
+  * **In-graph replication**: the `client` creates a single `tf.Graph` that
     specifies tasks for devices on all workers. The `client` then creates a
     client session which will talk to the `master` service of a `worker`. Then
     the `master` will partition the graph and distribute the work to all
     participating workers.
-    * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one
+  * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one
     physical machine. We will have multiple `worker`s with different `task`
     index. They all do similar things except for one worker checkpointing model
     variables, writing summaries, etc. in addition to its ordinary work.
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 5091cac836..1bd8422658 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -2356,6 +2356,6 @@ class DistributedCallbackModel(Model):
     # Whitelisted atttributes of the model that can be accessed by the user
     # during a callback.
     if item not in ['_setattr_tracking']:
-      logging.warning('You are accessing attribute ' + item + 'of the '
+      logging.warning('You are accessing attribute ' + item + ' of the '
                       'DistributedCallbackModel that may not have been set '
                       'correctly.')
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index 419a9ec12b..fd4704285c 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -631,7 +631,7 @@ class DistributionStrategy(object):
 
     Args:
       fn: function to run using this distribution strategy. The function must
-        have the following signature: def fn(context, *inputs).
+        have the following signature: `def fn(context, *inputs)`.
         `context` is an instance of `MultiStepContext` that will be passed when
         `fn` is run. `context` can be used to specify the outputs to be returned
         from `fn` by calling `context.set_last_step_output`. It can also be used
@@ -797,9 +797,9 @@ class DistributionStrategy(object):
     return merged(results)
     ```
 
-    Otherwise this returns `fn(var, *args, **kwargs)` colocated with `var`.'
+    Otherwise this returns `fn(var, *args, **kwargs)` colocated with `var`.
 
-    Neither *args nor **kwargs may contain per-device values.
+    Neither `*args` nor `**kwargs` may contain per-device values.
     If they contain mirrored values, they will be unwrapped before
     calling `fn`.
 
-- 
GitLab


From 19b2383cc0e221262be0780180558cf5bbb3e37e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 02:01:03 -0700
Subject: [PATCH 0849/1357] compat: Update forward compatibility horizon to
 2018-09-28

PiperOrigin-RevId: 214904795
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 88cad5d6d9..b74fce3a4c 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 27)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 28)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 8eb27871583d9fc61e046493acaa0df2839bc1c7 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Fri, 28 Sep 2018 18:51:34 +0800
Subject: [PATCH 0850/1357] remove slash

---
 tensorflow/python/ops/variables.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 69f63bc8e6..262cd61e5a 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2401,7 +2401,8 @@ class PartitionedVariable(object):
     partition_axes = self._partition_axes()
     if len(partition_axes) > 1:
       raise NotImplementedError(
-          "Multi-axis partition assign_fn is not supported "
+          "Cannot do assign action along more than one dimension: %s.  "
+          "Multi-axis partition assign action is not supported "
           % str(partition_axes))
     partition_ix = partition_axes[0]
     size_splits_list = [
@@ -2409,7 +2410,7 @@ class PartitionedVariable(object):
     value_list = array_ops.split(
         value, size_splits_list, axis=partition_ix)
     op_list = [
-        assign_fn(var, value_list[idx], idx) \
+        assign_fn(var, value_list[idx], idx)
         for idx, var in enumerate(self._variable_list)]
     return op_list
 
-- 
GitLab


From 32627bfba19606d3c3a34f5d02ae9428675bbc42 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Fri, 28 Sep 2018 07:28:19 -0700
Subject: [PATCH 0851/1357] Allow testManyCPUs to encounter non-CPU devices.

PiperOrigin-RevId: 214932861
---
 tensorflow/python/client/session_test.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 5c0c405306..347833ce8f 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -120,11 +120,17 @@ class SessionTest(test_util.TensorFlowTestCase):
       inp = constant_op.constant(10.0, name='W1')
       self.assertAllEqual(inp.eval(), 10.0)
 
-      devices = sess.list_devices()
-      self.assertEqual(2, len(devices))
-      for device in devices:
-        self.assertEqual('CPU', framework_device_lib.DeviceSpec.from_string(
-            device.name).device_type)
+      num_cpu_devices = 0
+      num_gpu_devices = 0
+      for device in sess.list_devices():
+        device_type = framework_device_lib.DeviceSpec.from_string(
+            device.name).device_type
+        if device_type == 'CPU':
+          num_cpu_devices += 1
+        elif device_type == 'GPU':
+          num_gpu_devices += 1
+      self.assertEqual(2, num_cpu_devices)
+      self.assertEqual(0, num_gpu_devices)
 
   def testPerSessionThreads(self):
     with session.Session(
-- 
GitLab


From 4e955be2ae1c920623778c15357129fea9a3bdab Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Fri, 28 Sep 2018 08:26:55 -0700
Subject: [PATCH 0852/1357] Fixing a couple of small bugs with the multi device
 iterator having to deal with the case when the background thread terminated
 because the iterator finished and yet some other requests were coming in.

1. The GetNextFromShard would see an empty buffer and return cancelled instead
of OutOfRange errors
2. On shutdown, we weren't calling all the pending callbacks.

Tested with runs_per_test=5000

PiperOrigin-RevId: 214939274
---
 .../kernels/data/multi_device_iterator_ops.cc | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
index 5f143967d9..d909b9e9d3 100644
--- a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
+++ b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
@@ -134,19 +134,17 @@ class MultiDeviceIterator : public ResourceBase {
     void Reset() LOCKS_EXCLUDED(mu_) {
       {
         mutex_lock l(mu_);
-        if (background_thread_finished_) {
-          return;
-        }
-
-        cancelled_ = true;
-        // Wake up the background thread.
-        for (int i = 0; i < size_; ++i) {
-          buffer_[i].cond_var.notify_all();
-        }
+        if (!background_thread_finished_) {
+          cancelled_ = true;
+          // Wake up the background thread.
+          for (int i = 0; i < size_; ++i) {
+            buffer_[i].cond_var.notify_all();
+          }
 
-        // Make sure background thread has finished first.
-        while (!background_thread_finished_) {
-          shutdown_cond_var_.wait(l);
+          // Make sure background thread has finished first.
+          while (!background_thread_finished_) {
+            shutdown_cond_var_.wait(l);
+          }
         }
       }
       RunPendingCallbacks();
@@ -182,7 +180,7 @@ class MultiDeviceIterator : public ResourceBase {
             buffer_[shard_num].cond_var.notify_all();
           }
         } else {
-          if (background_thread_finished_) {
+          if (end_of_iterator_) {
             produced_output = true;
             elem.end_of_sequence = true;
           } else {
@@ -219,8 +217,12 @@ class MultiDeviceIterator : public ResourceBase {
           while (!buffer_[i].callbacks.empty()) {
             if (buffer_[i].data.empty()) {
               HostBufferElement elem;
-              elem.status =
-                  errors::Cancelled("Cancelled and buffer not filled.");
+              if (end_of_iterator_) {
+                elem.end_of_sequence = true;
+              } else {
+                elem.status =
+                    errors::Cancelled("Cancelled and buffer not filled.");
+              }
               cancellation_elements.push_back(std::move(elem));
             } else {
               cancellation_elements.push_back(
@@ -293,6 +295,7 @@ class MultiDeviceIterator : public ResourceBase {
           {
             mutex_lock l(mu_);
             background_thread_finished_ = true;
+            end_of_iterator_ = true;
             shutdown_cond_var_.notify_all();
           }
           RunPendingCallbacks();
@@ -312,6 +315,7 @@ class MultiDeviceIterator : public ResourceBase {
     std::unique_ptr<Thread> background_thread_ GUARDED_BY(mu_);
     bool background_thread_finished_ GUARDED_BY(mu_) = false;
     bool background_thread_started_ GUARDED_BY(mu_) = false;
+    bool end_of_iterator_ GUARDED_BY(mu_) = false;
     bool cancelled_ GUARDED_BY(mu_) = false;
     condition_variable shutdown_cond_var_ GUARDED_BY(mu_);
 
-- 
GitLab


From a74a3217f7ff2dbee2fb618aa658cf666861545c Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sat, 4 Aug 2018 14:13:00 +0800
Subject: [PATCH 0853/1357] Move bazel.rc to workspace root to support
 bazel-0.18.0

Bazel 0.18.0 will contain a change for which rc files it accepts.
https://github.com/bazelbuild/bazel/commit/ec83598cb6ee4136166bb562a24dc5dfa58921db
https://github.com/bazelbuild/bazel/issues/4502

Old bazel used to read %workspace%/tools/bazel.rc. New bazel will not
read that and instead will only read %workspace%/.bazelrc.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tools/bazel.rc => .bazelrc | 4 +++-
 .gitignore                 | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)
 rename tools/bazel.rc => .bazelrc (98%)

diff --git a/tools/bazel.rc b/.bazelrc
similarity index 98%
rename from tools/bazel.rc
rename to .bazelrc
index 3734fab715..9f09fdff97 100644
--- a/tools/bazel.rc
+++ b/.bazelrc
@@ -29,7 +29,7 @@ build:mkl -c opt
 
 # This config option is used to enable MKL-DNN open source library only,
 # without depending on MKL binary version.
-build:mkl_open_source_only --define=build_with_mkl_dnn_only=true 
+build:mkl_open_source_only --define=build_with_mkl_dnn_only=true
 build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true
 
 build:download_clang --crosstool_top=@local_config_download_clang//:toolchain
@@ -84,3 +84,5 @@ build:dynamic_kernels --define=dynamic_loaded_kernels=true
 build --define=PREFIX=/usr
 build --define=LIBDIR=$(PREFIX)/lib
 build --define=INCLUDEDIR=$(PREFIX)/include
+
+# Do not commit the tf_configure.bazelrc line
diff --git a/.gitignore b/.gitignore
index 1ef4c297ee..cb65f447d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 .DS_Store
 .ipynb_checkpoints
 node_modules
-/.bazelrc
 /.tf_configure.bazelrc
 /bazel-*
 /bazel_pip
-- 
GitLab


From d3f6b72bc7356d5c94289e32426dc482b8ededf0 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sat, 4 Aug 2018 14:28:02 +0800
Subject: [PATCH 0854/1357] configure: use workspace-relative path to
 tf_configure_bazelrc

/.bazelrc is not gitignored anymore so this should help in case the
import line is accidentally committed. Bazel 0.18.0 will support a new
'try-import' statement that should be used once 0.18.0 has been out long
enough.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 configure.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/configure.py b/configure.py
index 55fce8b93b..129d9c5fe7 100644
--- a/configure.py
+++ b/configure.py
@@ -257,11 +257,7 @@ def reset_tf_configure_bazelrc(workspace_path):
       if _TF_BAZELRC_FILENAME in l:
         continue
       f.write('%s\n' % l)
-    if is_windows():
-      tf_bazelrc_path = _TF_BAZELRC.replace('\\', '/')
-    else:
-      tf_bazelrc_path = _TF_BAZELRC
-    f.write('import %s\n' % tf_bazelrc_path)
+    f.write('import %%workspace%%/%s\n' % _TF_BAZELRC_FILENAME)
 
 
 def cleanup_makefile():
-- 
GitLab


From e06783e7bb80f664c7ec9be90680ac6ddcbd598f Mon Sep 17 00:00:00 2001
From: Brian Patton <bjp@google.com>
Date: Fri, 28 Sep 2018 08:38:25 -0700
Subject: [PATCH 0855/1357] Fix a latex render nit

PiperOrigin-RevId: 214940748
---
 tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt b/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt
index 40d7d371ca..7142a0e3f2 100644
--- a/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt
@@ -9,7 +9,7 @@ The lower regularized incomplete Gamma function is defined as:
 
 where
 
-\\(gamma(a, x) = int_{0}^{x} t^{a-1} exp(-t) dt\\)
+\\(gamma(a, x) = \\int_{0}^{x} t^{a-1} exp(-t) dt\\)
 
 is the lower incomplete Gamma function.
 
-- 
GitLab


From c7bb3c3d65e4e064d53630d4b524522eed6f3f44 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 28 Sep 2018 08:38:53 -0700
Subject: [PATCH 0856/1357] [tf.data] Move `tf.contrib.data` C++ code to a core
 "experimental" directory.

NOTE: All ops and kernels previously previously defined in
tensorflow/contrib/data have had their name prefixed with
"Experimental" to indicate that they are not (yet) stable, and thus
not subject to backwards or forwards compatibility guarantees.
PiperOrigin-RevId: 214940819
---
 tensorflow/contrib/BUILD                      |   3 -
 tensorflow/contrib/cmake/python_modules.txt   |   1 -
 tensorflow/contrib/data/BUILD                 |  38 -----
 .../contrib/data/ops/indexed_dataset_ops.cc   |  80 ---------
 .../contrib/data/python/kernel_tests/BUILD    |   3 +-
 .../kernel_tests/indexed_dataset_ops_test.py  |  12 +-
 tensorflow/contrib/data/python/ops/BUILD      |  57 +------
 .../data/python/ops/contrib_op_loader.py      |  24 ---
 .../contrib/data/python/ops/error_ops.py      |   5 +-
 .../data/python/ops/indexed_dataset_ops.py    |  25 +--
 .../contrib/data/python/ops/interleave_ops.py |  13 +-
 .../contrib/data/python/ops/optimization.py   |   5 +-
 .../data/python/ops/prefetching_ops.py        |  37 ++--
 tensorflow/contrib/data/python/ops/readers.py |   6 +-
 .../contrib/data/python/ops/threadpool.py     |   9 +-
 tensorflow/contrib/data/python/ops/unique.py  |   5 +-
 tensorflow/core/BUILD                         |   2 +
 ...pi_def_ExperimentalAssertNextDataset.pbtxt |   4 +
 .../api_def_ExperimentalCSVDataset.pbtxt      |   4 +
 ...xperimentalDirectedInterleaveDataset.pbtxt |  21 +++
 ...xperimentalFunctionBufferingResource.pbtxt |  58 +++++++
 ...ntalFunctionBufferingResourceGetNext.pbtxt |  25 +++
 ...mentalFunctionBufferingResourceReset.pbtxt |  13 ++
 ...f_ExperimentalIdentityIndexedDataset.pbtxt |   4 +
 ..._def_ExperimentalIgnoreErrorsDataset.pbtxt |   8 +
 ...pi_def_ExperimentalIndexedDatasetGet.pbtxt |   4 +
 ...xperimentalIndexedDatasetMaterialize.pbtxt |   4 +
 ...pi_def_ExperimentalIteratorGetDevice.pbtxt |   8 +
 .../api_def_ExperimentalLMDBDataset.pbtxt     |   4 +
 ...mentalMaterializedIndexDatasetHandle.pbtxt |   4 +
 ...pi_def_ExperimentalThreadPoolDataset.pbtxt |  13 ++
 ...api_def_ExperimentalThreadPoolHandle.pbtxt |  35 ++++
 .../api_def_ExperimentalUniqueDataset.pbtxt   |   8 +
 tensorflow/core/kernels/data/BUILD            |   1 +
 .../kernels/data/experimental}/BUILD          |  90 +++++-----
 .../experimental}/assert_next_dataset_op.cc   |   5 +-
 .../data/experimental}/csv_dataset_op.cc      |   3 +-
 .../directed_interleave_dataset_op.cc         |   5 +-
 .../experimental}/identity_indexed_dataset.cc |   7 +-
 .../experimental}/ignore_errors_dataset_op.cc |   6 +-
 .../data/experimental}/indexed_dataset.cc     |  14 +-
 .../data/experimental}/indexed_dataset.h      |   6 +-
 .../data/experimental}/lmdb_dataset_op.cc     |   3 +-
 .../data/experimental}/prefetching_kernels.cc |  23 +--
 .../experimental}/threadpool_dataset_op.cc    |   7 +-
 .../data/experimental}/unique_dataset_op.cc   |   7 +-
 .../ops/experimental_dataset_ops.cc}          | 161 +++++++++---------
 tensorflow/python/BUILD                       |   9 +
 tensorflow/tools/pip_package/BUILD            |   1 -
 49 files changed, 469 insertions(+), 421 deletions(-)
 delete mode 100644 tensorflow/contrib/data/ops/indexed_dataset_ops.cc
 delete mode 100644 tensorflow/contrib/data/python/ops/contrib_op_loader.py
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResource.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetGet.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIteratorGetDevice.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalLMDBDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalUniqueDataset.pbtxt
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/BUILD (52%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/assert_next_dataset_op.cc (97%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/csv_dataset_op.cc (99%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/directed_interleave_dataset_op.cc (98%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/identity_indexed_dataset.cc (96%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/ignore_errors_dataset_op.cc (96%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/indexed_dataset.cc (97%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/indexed_dataset.h (95%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/lmdb_dataset_op.cc (98%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/prefetching_kernels.cc (95%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/threadpool_dataset_op.cc (97%)
 rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/unique_dataset_op.cc (97%)
 rename tensorflow/{contrib/data/ops/dataset_ops.cc => core/ops/experimental_dataset_ops.cc} (62%)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 1a9ae8ac3a..98dff965a9 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -132,7 +132,6 @@ cc_library(
     deps = [
         "//tensorflow/contrib/boosted_trees:boosted_trees_kernels",
         "//tensorflow/contrib/coder:all_kernels",
-        "//tensorflow/contrib/data/kernels:dataset_kernels",
         "//tensorflow/contrib/factorization/kernels:all_kernels",
         "//tensorflow/contrib/hadoop:dataset_kernels",
         "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels",
@@ -163,8 +162,6 @@ cc_library(
     deps = [
         "//tensorflow/contrib/boosted_trees:boosted_trees_ops_op_lib",
         "//tensorflow/contrib/coder:all_ops",
-        "//tensorflow/contrib/data:dataset_ops_op_lib",
-        "//tensorflow/contrib/data:indexed_dataset_ops_op_lib",
         "//tensorflow/contrib/factorization:all_ops",
         "//tensorflow/contrib/framework:all_ops",
         "//tensorflow/contrib/hadoop:dataset_ops_op_lib",
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index c0763f4c0e..2975b167ec 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -132,7 +132,6 @@ tensorflow/contrib/cudnn_rnn/python
 tensorflow/contrib/cudnn_rnn/python/layers
 tensorflow/contrib/cudnn_rnn/python/ops
 tensorflow/contrib/data
-tensorflow/contrib/data/kernels
 tensorflow/contrib/data/python
 tensorflow/contrib/data/python/kernel_tests
 tensorflow/contrib/data/python/kernel_tests/serialization
diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD
index 9f710613dd..38f1c65a4d 100644
--- a/tensorflow/contrib/data/BUILD
+++ b/tensorflow/contrib/data/BUILD
@@ -4,17 +4,6 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load(
-    "//tensorflow:tensorflow.bzl",
-    "tf_custom_op_library",
-    "tf_gen_op_libs",
-    "if_not_windows",
-)
-load(
-    "//tensorflow/core:platform/default/build_config_root.bzl",
-    "if_static",
-)
-
 py_library(
     name = "data",
     srcs = ["__init__.py"],
@@ -25,30 +14,3 @@ py_library(
         "//tensorflow/python:util",
     ],
 )
-
-cc_library(
-    name = "lib_proto_parsing_for_dataset_ops",
-    deps = if_not_windows(["//tensorflow/core:lib_proto_parsing"]),
-)
-
-tf_custom_op_library(
-    name = "_dataset_ops.so",
-    srcs = [
-        "ops/dataset_ops.cc",
-        "ops/indexed_dataset_ops.cc",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/kernels:dataset_kernels",
-        "//tensorflow/contrib/data/kernels:indexed_dataset",
-    ] + if_static(
-        extra_deps = [":lib_proto_parsing_for_dataset_ops"],
-        otherwise = [],
-    ),
-)
-
-tf_gen_op_libs(
-    op_lib_names = [
-        "dataset_ops",
-        "indexed_dataset_ops",
-    ],
-)
diff --git a/tensorflow/contrib/data/ops/indexed_dataset_ops.cc b/tensorflow/contrib/data/ops/indexed_dataset_ops.cc
deleted file mode 100644
index cd9b7c68a0..0000000000
--- a/tensorflow/contrib/data/ops/indexed_dataset_ops.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-
-namespace tensorflow {
-
-REGISTER_OP("IdentityIndexedDataset")
-    .Input("size: uint64")
-    .Output("handle: variant")
-    .SetIsStateful()
-    .SetShapeFn(
-        shape_inference::ScalarShape);  // TODO(saeta): check input shapes.
-
-///////////////////////////////////////////////////////////////////////////////
-//     IndexedDataset Internals
-///////////////////////////////////////////////////////////////////////////////
-
-// Creates the handle.
-REGISTER_OP("MaterializedIndexDatasetHandle")
-    .Output("handle: resource")
-    .Attr("container: string")
-    .Attr("shared_name: string")
-    .Attr("output_types: list(type) >= 1")
-    .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
-
-// Actually materialize the materialize handle.
-REGISTER_OP("IndexedDatasetMaterialize")
-    .Input("dataset: variant")
-    .Input("materialized: resource")
-    .SetShapeFn(shape_inference::NoOutputs);
-
-namespace {
-
-Status GetShapeFn(shape_inference::InferenceContext* c) {
-  shape_inference::ShapeHandle unused;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
-  std::vector<PartialTensorShape> output_shapes;
-  TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
-  if (output_shapes.size() != c->num_outputs()) {
-    return errors::InvalidArgument(
-        "`output_shapes` must be the same length as `output_types` (",
-        output_shapes.size(), " vs. ", c->num_outputs());
-  }
-  for (size_t i = 0; i < output_shapes.size(); ++i) {
-    shape_inference::ShapeHandle output_shape_handle;
-    TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
-        output_shapes[i], &output_shape_handle));
-    c->set_output(static_cast<int>(i), output_shape_handle);
-  }
-  return Status::OK();
-}
-
-}  // namespace
-
-REGISTER_OP("IndexedDatasetGet")
-    .Input("materialized: resource")
-    .Input("index: uint64")
-    .Output("components: output_types")
-    .Attr("output_types: list(type) >= 1")
-    .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(GetShapeFn)
-    .Doc(R"doc(
-Gets the element at `index` from `materialized` IndexedDataset.
-)doc");
-
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index ce52c990ce..21ac40eb21 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -139,12 +139,11 @@ py_test(
     name = "indexed_dataset_ops_test",
     srcs = ["indexed_dataset_ops_test.py"],
     deps = [
-        "//tensorflow/contrib/data/python/ops:contrib_op_loader",
-        "//tensorflow/contrib/data/python/ops:gen_dataset_ops",
         "//tensorflow/contrib/data/python/ops:indexed_dataset_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
diff --git a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
index 9c508d686d..46a7127b52 100644
--- a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
@@ -19,29 +19,29 @@ from __future__ import print_function
 
 import unittest
 
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
 from tensorflow.contrib.data.python.ops import indexed_dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 from tensorflow.python.platform import test
 
 
 class IndexedDatasetOpsTest(test.TestCase):
 
   def testLowLevelIndexedDatasetOps(self):
-    identity = gen_dataset_ops.identity_indexed_dataset(
+    identity = ged_ops.experimental_identity_indexed_dataset(
         ops.convert_to_tensor(16, dtype=dtypes.uint64))
-    handle = gen_dataset_ops.materialized_index_dataset_handle(
+    handle = ged_ops.experimental_materialized_index_dataset_handle(
         container="",
         shared_name="",
         output_types=[dtypes.uint64],
         output_shapes=[[]])
-    materialize = gen_dataset_ops.indexed_dataset_materialize(identity, handle)
+    materialize = ged_ops.experimental_indexed_dataset_materialize(
+        identity, handle)
     index = array_ops.placeholder(dtypes.uint64)
-    get_op = gen_dataset_ops.indexed_dataset_get(
+    get_op = ged_ops.experimental_indexed_dataset_get(
         handle, index, output_types=[dtypes.uint64], output_shapes=[[]])
 
     with self.cached_session() as sess:
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index a14781cd93..5cd1ed542b 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -78,7 +78,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":batching",
-        ":gen_dataset_ops",
         ":interleave_ops",
         ":optimization",
         ":parsing_ops",
@@ -86,6 +85,7 @@ py_library(
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dataset_ops_gen",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:lib",
         "//tensorflow/python:platform",
@@ -148,8 +148,7 @@ py_library(
     srcs = ["error_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":contrib_op_loader",
-        ":gen_dataset_ops",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
         "//tensorflow/python/data/util:sparse",
@@ -179,12 +178,11 @@ py_library(
     srcs = ["interleave_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":contrib_op_loader",
-        ":gen_dataset_ops",
         ":random_ops",
         "//tensorflow/contrib/stateless",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:util",
@@ -199,9 +197,8 @@ py_library(
     srcs = ["optimization.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":contrib_op_loader",
-        ":gen_dataset_ops",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python/data/util:nest",
         "//tensorflow/python/data/util:sparse",
@@ -304,8 +301,7 @@ py_library(
     srcs = ["threadpool.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":contrib_op_loader",
-        ":gen_dataset_ops",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
@@ -321,9 +317,8 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":contrib_op_loader",
-        ":gen_dataset_ops",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
         "//tensorflow/python/data/util:sparse",
@@ -342,47 +337,11 @@ py_library(
     ],
 )
 
-tf_gen_op_wrapper_py(
-    name = "gen_dataset_ops",
-    out = "gen_dataset_ops.py",
-    deps = [
-        "//tensorflow/contrib/data:dataset_ops_op_lib",
-        "//tensorflow/contrib/data:indexed_dataset_ops_op_lib",
-    ],
-)
-
-tf_kernel_library(
-    name = "dataset_ops_kernels",
-    deps = [
-        "//tensorflow/contrib/data/kernels:dataset_kernels",
-        "//tensorflow/core:framework",
-    ],
-    alwayslink = 1,
-)
-
-tf_custom_op_py_library(
-    name = "contrib_op_loader",
-    srcs = ["contrib_op_loader.py"],
-    dso = ["//tensorflow/contrib/data:_dataset_ops.so"],
-    kernels = [
-        ":dataset_ops_kernels",
-        "//tensorflow/contrib/data:indexed_dataset_ops_op_lib",
-        "//tensorflow/contrib/data:dataset_ops_op_lib",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":gen_dataset_ops",
-        "//tensorflow/contrib/util:util_py",
-        "//tensorflow/python:platform",
-    ],
-)
-
 py_library(
     name = "indexed_dataset_ops",
     srcs = ["indexed_dataset_ops.py"],
     deps = [
-        ":contrib_op_loader",
-        ":gen_dataset_ops",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
@@ -394,7 +353,7 @@ py_library(
     name = "prefetching_ops",
     srcs = ["prefetching_ops.py"],
     deps = [
-        ":contrib_op_loader",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
diff --git a/tensorflow/contrib/data/python/ops/contrib_op_loader.py b/tensorflow/contrib/data/python/ops/contrib_op_loader.py
deleted file mode 100644
index 8f495a9dc9..0000000000
--- a/tensorflow/contrib/data/python/ops/contrib_op_loader.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Python helper for loading contrib ops and kernels."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.util import loader
-from tensorflow.python.platform import resource_loader
-
-_dataset_ops = loader.load_op_library(
-    resource_loader.get_path_to_datafile("../../_dataset_ops.so"))
diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py
index 615dbcabd4..f962e623ee 100644
--- a/tensorflow/contrib/data/python/ops/error_ops.py
+++ b/tensorflow/contrib/data/python/ops/error_ops.py
@@ -17,9 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
 
 
 def ignore_errors():
@@ -60,7 +59,7 @@ class _IgnoreErrorsDataset(dataset_ops.UnaryDataset):
     self._input_dataset = input_dataset
 
   def _as_variant_tensor(self):
-    return gen_dataset_ops.ignore_errors_dataset(
+    return gen_experimental_dataset_ops.experimental_ignore_errors_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         **dataset_ops.flat_structure(self))
 
diff --git a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py b/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
index cc76ab0850..9c06474a2f 100644
--- a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
@@ -19,14 +19,13 @@ from __future__ import print_function
 
 import abc
 
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 
 
 class MaterializedIndexedDataset(object):
@@ -57,7 +56,7 @@ class MaterializedIndexedDataset(object):
       A tensor containing the values corresponding to `index`.
     """
     # TODO(saeta): nest.pack_sequence_as(...)
-    return gen_dataset_ops.indexed_dataset_get(
+    return ged_ops.experimental_indexed_dataset_get(
         self._materialized_resource,
         index,
         output_types=nest.flatten(
@@ -90,16 +89,18 @@ class IndexedDataset(dataset_ops.Dataset):
       container = ""
     if shared_name is None:
       shared_name = ""
-    materialized_resource = gen_dataset_ops.materialized_index_dataset_handle(
-        container=container,
-        shared_name=shared_name,
-        output_types=nest.flatten(
-            sparse.as_dense_types(self.output_types, self.output_classes)),
-        output_shapes=nest.flatten(
-            sparse.as_dense_types(self.output_shapes, self.output_classes)))
+    materialized_resource = (
+        ged_ops.experimental_materialized_index_dataset_handle(
+            container=container,
+            shared_name=shared_name,
+            output_types=nest.flatten(
+                sparse.as_dense_types(self.output_types, self.output_classes)),
+            output_shapes=nest.flatten(
+                sparse.as_dense_types(self.output_shapes,
+                                      self.output_classes))))
 
     with ops.colocate_with(materialized_resource):
-      materializer = gen_dataset_ops.indexed_dataset_materialize(
+      materializer = ged_ops.experimental_indexed_dataset_materialize(
           self._as_variant_tensor(), materialized_resource)
     return MaterializedIndexedDataset(materialized_resource, materializer,
                                       self.output_classes, self.output_types,
@@ -170,7 +171,7 @@ class IdentityIndexedDataset(IndexedDataset):
     return tensor_shape.scalar()
 
   def _as_variant_tensor(self):
-    return gen_dataset_ops.identity_indexed_dataset(self._size)
+    return ged_ops.experimental_identity_indexed_dataset(self._size)
 
   def _inputs(self):
     return []
diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py
index bfa3fdf543..1ee9db1aa8 100644
--- a/tensorflow/contrib/data/python/ops/interleave_ops.py
+++ b/tensorflow/contrib/data/python/ops/interleave_ops.py
@@ -18,8 +18,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import stateless
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
 from tensorflow.contrib.data.python.ops import random_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
@@ -28,6 +26,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.util import deprecation
 
@@ -167,10 +166,12 @@ class _DirectedInterleaveDataset(dataset_ops.Dataset):
 
   def _as_variant_tensor(self):
     # pylint: disable=protected-access
-    return gen_dataset_ops.directed_interleave_dataset(
-        self._selector_input._as_variant_tensor(),
-        [data_input._as_variant_tensor() for data_input in self._data_inputs],
-        **dataset_ops.flat_structure(self))
+    return (
+        gen_experimental_dataset_ops.experimental_directed_interleave_dataset(
+            self._selector_input._as_variant_tensor(), [
+                data_input._as_variant_tensor()
+                for data_input in self._data_inputs
+            ], **dataset_ops.flat_structure(self)))
     # pylint: enable=protected-access
 
   def _inputs(self):
diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/contrib/data/python/ops/optimization.py
index 3eb172acd5..7f5ce97228 100644
--- a/tensorflow/contrib/data/python/ops/optimization.py
+++ b/tensorflow/contrib/data/python/ops/optimization.py
@@ -17,12 +17,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
 
 # A constant that can be used to enable auto-tuning.
 AUTOTUNE = -1
@@ -97,7 +96,7 @@ class _AssertNextDataset(dataset_ops.UnaryDataset):
         transformations, dtype=dtypes.string, name="transformations")
 
   def _as_variant_tensor(self):
-    return contrib_gen_dataset_ops.assert_next_dataset(
+    return gen_experimental_dataset_ops.experimental_assert_next_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         self._transformations,
         **dataset_ops.flat_structure(self))
diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py
index 58395879e6..46f82e453a 100644
--- a/tensorflow/contrib/data/python/ops/prefetching_ops.py
+++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py
@@ -19,8 +19,6 @@ from __future__ import print_function
 
 import warnings
 
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.util import nest
@@ -32,7 +30,8 @@ from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import functional_ops
-from tensorflow.python.ops import gen_dataset_ops as core_gen_dataset_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 from tensorflow.python.ops import resource_variable_ops
 
 
@@ -64,7 +63,7 @@ def function_buffering_resource(string_arg,
   """
   if shared_name is None:
     shared_name = ""
-  return gen_dataset_ops.function_buffering_resource(
+  return ged_ops.experimental_function_buffering_resource(
       string_arg=string_arg,
       target_device=target_device,
       shared_name=shared_name,
@@ -78,14 +77,14 @@ def function_buffering_resource(string_arg,
 def function_buffering_resource_get_next(function_buffer_resource,
                                          output_types,
                                          name=None):
-  return gen_dataset_ops.function_buffering_resource_get_next(
+  return ged_ops.experimental_function_buffering_resource_get_next(
       function_buffer_resource=function_buffer_resource,
       output_types=output_types,
       name=name)
 
 
 def function_buffering_resource_reset(function_buffer_resource, name=None):
-  return gen_dataset_ops.function_buffering_resource_reset(
+  return ged_ops.experimental_function_buffering_resource_reset(
       function_buffer_resource=function_buffer_resource, name=name)
 
 
@@ -136,7 +135,7 @@ class _PrefetchToDeviceIterator(object):
       ret = remote_iterator.get_next()
       return nest.flatten(sparse.serialize_sparse_tensors(ret))
 
-    iterator_device = gen_dataset_ops.iterator_get_device(
+    iterator_device = ged_ops.experimental_iterator_get_device(
         self._input_iterator._iterator_resource)
 
     with ops.device(device):
@@ -162,10 +161,11 @@ class _PrefetchToDeviceIterator(object):
     if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
       warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
 
-    flat_ret = gen_dataset_ops.function_buffering_resource_get_next(
+    flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
         self._buffering_resource,
-        output_types=nest.flatten(sparse.as_dense_types(
-            self.output_types, self.output_classes)), name=name)
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        name=name)
 
     ret = sparse.deserialize_sparse_tensors(
         nest.pack_sequence_as(self.output_types, flat_ret),
@@ -219,7 +219,7 @@ class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
                buffer_size):
     with ops.device("/device:CPU:0"):
       super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset)
-      input_iterator_handle = core_gen_dataset_ops.iterator_to_string_handle(
+      input_iterator_handle = gen_dataset_ops.iterator_to_string_handle(
           self._resource)
 
     self._device = device
@@ -238,7 +238,8 @@ class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
       self._buffering_resource = function_buffering_resource(
           f=_prefetch_fn,
           output_types=self._flat_output_types,
-          target_device=gen_dataset_ops.iterator_get_device(self._resource),
+          target_device=ged_ops.experimental_iterator_get_device(
+              self._resource),
           string_arg=input_iterator_handle,
           buffer_size=buffer_size,
           shared_name=iterator_ops._generate_shared_name(
@@ -252,7 +253,7 @@ class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
     # TODO(b/77291417): Fix
     with context.execution_mode(context.SYNC):
       with ops.device(self._device):
-        ret = gen_dataset_ops.function_buffering_resource_get_next(
+        ret = ged_ops.experimental_function_buffering_resource_get_next(
             function_buffer_resource=self._buffering_resource,
             output_types=self._flat_output_types)
       return sparse.deserialize_sparse_tensors(
@@ -409,12 +410,12 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
       """
       # pylint: disable=protected-access
       ds_variant = self._input_dataset._as_variant_tensor()
-      resource = core_gen_dataset_ops.anonymous_iterator(
+      resource = gen_dataset_ops.anonymous_iterator(
           output_types=self._flat_output_types,
           output_shapes=self._flat_output_shapes)
       with ops.control_dependencies(
-          [core_gen_dataset_ops.make_iterator(ds_variant, resource)]):
-        return core_gen_dataset_ops.iterator_to_string_handle(resource)
+          [gen_dataset_ops.make_iterator(ds_variant, resource)]):
+        return gen_dataset_ops.iterator_to_string_handle(resource)
 
     @function.Defun()
     def _remote_init_func():
@@ -463,7 +464,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
       Returns:
         Tensor constant 0
       """
-      iterator_resource = core_gen_dataset_ops.iterator_from_string_handle_v2(
+      iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
           string_handle,
           output_types=self._flat_output_types,
           output_shapes=self._flat_output_shapes)
@@ -504,7 +505,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
 
   def _as_variant_tensor(self):
     with ops.device(self._target_device):
-      return core_gen_dataset_ops.generator_dataset(
+      return gen_dataset_ops.generator_dataset(
           self._init_captured_args,
           self._next_captured_args,
           self._finalize_captured_args,
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index d9d06e2703..360971e200 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -23,7 +23,6 @@ import csv
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops
 from tensorflow.contrib.data.python.ops import interleave_ops
 from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.contrib.data.python.ops import parsing_ops
@@ -38,6 +37,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.util import deprecation
 
@@ -629,7 +629,7 @@ class CsvDataset(dataset_ops.DatasetSource):
 
   def _as_variant_tensor(self):
     # Constructs graph node for the dataset op.
-    return contrib_gen_dataset_ops.csv_dataset(
+    return gen_experimental_dataset_ops.experimental_csv_dataset(
         filenames=self._filenames,
         record_defaults=self._record_defaults,
         buffer_size=self._buffer_size,
@@ -1013,7 +1013,7 @@ class LMDBDataset(dataset_ops.DatasetSource):
         filenames, dtype=dtypes.string, name="filenames")
 
   def _as_variant_tensor(self):
-    return contrib_gen_dataset_ops.lmdb_dataset(
+    return gen_experimental_dataset_ops.experimental_lmdb_dataset(
         self._filenames,
         output_types=nest.flatten(self.output_types),
         output_shapes=nest.flatten(self.output_shapes))
diff --git a/tensorflow/contrib/data/python/ops/threadpool.py b/tensorflow/contrib/data/python/ops/threadpool.py
index 9d165ad52a..f73c3fd9cb 100644
--- a/tensorflow/contrib/data/python/ops/threadpool.py
+++ b/tensorflow/contrib/data/python/ops/threadpool.py
@@ -19,10 +19,9 @@ from __future__ import print_function
 
 import threading
 
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 from tensorflow.python.ops import resource_variable_ops
 
 _uid_counter = 0
@@ -47,7 +46,7 @@ class PrivateThreadPool(object):
     """Creates a `PrivateThreadPool` with the given number of threads."""
     if context.executing_eagerly():
       shared_name = _generate_shared_name("privatethreadpool")
-      self._resource = gen_dataset_ops.thread_pool_handle(
+      self._resource = ged_ops.experimental_thread_pool_handle(
           num_threads=num_threads,
           max_intra_op_parallelism=max_intra_op_parallelism,
           display_name=display_name,
@@ -55,7 +54,7 @@ class PrivateThreadPool(object):
       self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
           handle=self._resource, handle_device=context.context().device_name)
     else:
-      self._resource = gen_dataset_ops.thread_pool_handle(
+      self._resource = ged_ops.experimental_thread_pool_handle(
           num_threads=num_threads,
           max_intra_op_parallelism=max_intra_op_parallelism,
           display_name=display_name)
@@ -70,7 +69,7 @@ class _ThreadPoolDataset(dataset_ops.UnaryDataset):
     self._thread_pool = thread_pool
 
   def _as_variant_tensor(self):
-    return gen_dataset_ops.thread_pool_dataset(
+    return ged_ops.experimental_thread_pool_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         self._thread_pool._resource,  # pylint: disable=protected-access
         **dataset_ops.flat_structure(self))
diff --git a/tensorflow/contrib/data/python/ops/unique.py b/tensorflow/contrib/data/python/ops/unique.py
index bad67a580d..ed363a7090 100644
--- a/tensorflow/contrib/data/python/ops/unique.py
+++ b/tensorflow/contrib/data/python/ops/unique.py
@@ -17,10 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import contrib_op_loader  # pylint: disable=unused-import
-from tensorflow.contrib.data.python.ops import gen_dataset_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import gen_experimental_dataset_ops
 
 
 def unique():
@@ -61,7 +60,7 @@ class _UniqueDataset(dataset_ops.UnaryDataset):
           "`tf.int32`, `tf.int64`, or `tf.string` component.")
 
   def _as_variant_tensor(self):
-    return gen_dataset_ops.unique_dataset(
+    return gen_experimental_dataset_ops.experimental_unique_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         **dataset_ops.flat_structure(self))
 
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ca247dc56b..50fe308b73 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1039,6 +1039,7 @@ tf_gen_op_libs(
         "dataset_ops",
         "decode_proto_ops",
         "encode_proto_ops",
+        "experimental_dataset_ops",
         "function_ops",
         "functional_ops",
         "image_ops",
@@ -1169,6 +1170,7 @@ cc_library(
         ":dataset_ops_op_lib",
         ":decode_proto_ops_op_lib",
         ":encode_proto_ops_op_lib",
+        ":experimental_dataset_ops_op_lib",
         ":function_ops_op_lib",
         ":functional_ops_op_lib",
         ":image_ops_op_lib",
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt
new file mode 100644
index 0000000000..fa8fc96bb2
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalAssertNextDataset"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt
new file mode 100644
index 0000000000..5fd88e7a0c
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalCSVDataset"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt
new file mode 100644
index 0000000000..ac1f9719fe
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt
@@ -0,0 +1,21 @@
+op {
+  graph_op_name: "ExperimentalDirectedInterleaveDataset"
+  in_arg {
+    name: "selector_input_dataset"
+    description: <<END
+A dataset of scalar `DT_INT64` elements that determines which of the
+`N` data inputs should produce the next output element.
+END
+  }
+  in_arg {
+    name: "data_input_datasets"
+    description: <<END
+`N` datasets with the same type that will be interleaved according to
+the values of `selector_input_dataset`.
+END
+  }
+  summary: <<END
+A substitute for `InterleaveDataset` on a fixed list of `N` datasets.
+END
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResource.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResource.pbtxt
new file mode 100644
index 0000000000..66511eff60
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResource.pbtxt
@@ -0,0 +1,58 @@
+op {
+  graph_op_name: "ExperimentalFunctionBufferingResource"
+  in_arg {
+    name: "string_arg"
+    description: <<END
+String argument to the function call.
+END
+  }
+  in_arg {
+    name: "target_device"
+    description: <<END
+Target device to execute the function on.
+END
+  }
+  out_arg {
+    name: "resource"
+    description: <<END
+Handle to the resource created.
+END
+  }
+  attr {
+    name: "shared_name"
+    description: <<END
+If non-empty, this resource will be shared under the given name across
+multiple sessions.
+END
+  }
+  attr {
+    name: "container"
+    description: <<END
+If non-empty, this resource is placed in the given container.
+Otherwise, a default container is used.
+END
+  }
+  attr {
+    name: "f"
+    description: <<END
+Function to be executed.
+END
+  }
+  attr {
+    name: "buffer_size"
+    description: <<END
+Size of the buffer.
+END
+  }
+  attr {
+    name: "output_types"
+    description: <<END
+The type list for the return values.
+END
+  }
+  summary: <<END
+Creates a resource that fills up a buffer by making function calls.
+END
+  visibility: HIDDEN
+}
+
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt
new file mode 100644
index 0000000000..bf4b66b22b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt
@@ -0,0 +1,25 @@
+op {
+  graph_op_name: "ExperimentalFunctionBufferingResourceGetNext"
+  in_arg {
+    name: "function_buffer_resource"
+    description: <<END
+The FunctionBufferingResource handle.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A list of return values.
+END
+  }
+  attr {
+    name: "output_types"
+    description: <<END
+The type list for the return values.
+END
+  }
+  summary: <<END
+Gets the next element from a FunctionBufferingResource.
+END
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt
new file mode 100644
index 0000000000..729718ddb3
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt
@@ -0,0 +1,13 @@
+op {
+  graph_op_name: "ExperimentalFunctionBufferingResourceReset"
+  in_arg {
+    name: "function_buffer_resource"
+    description: <<END
+The FunctionBufferingResource handle.
+END
+  }
+  summary: <<END
+Resets the FunctionBufferingResource.
+END
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt
new file mode 100644
index 0000000000..fe266c111f
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalIdentityIndexedDataset"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt
new file mode 100644
index 0000000000..d42546516d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt
@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "ExperimentalIgnoreErrorsDataset"
+  summary: <<END
+Creates a dataset that contains the elements of `input_dataset` ignoring errors.
+END
+  visibility: HIDDEN
+}
+
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetGet.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetGet.pbtxt
new file mode 100644
index 0000000000..e285f87e10
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetGet.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalIndexedDatasetGet"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt
new file mode 100644
index 0000000000..60c32473b5
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalIndexedDatasetMaterialize"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalIteratorGetDevice.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalIteratorGetDevice.pbtxt
new file mode 100644
index 0000000000..b72b229e9a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalIteratorGetDevice.pbtxt
@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "ExperimentalIteratorGetDevice"
+  summary: <<END
+Returns the name of the device on which `resource` has been placed.
+END
+  visibility: HIDDEN
+}
+
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalLMDBDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalLMDBDataset.pbtxt
new file mode 100644
index 0000000000..b38b23a51d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalLMDBDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalLMDBDataset"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt
new file mode 100644
index 0000000000..9676b9d284
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalMaterializedIndexDatasetHandle"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolDataset.pbtxt
new file mode 100644
index 0000000000..d73b5bfda3
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolDataset.pbtxt
@@ -0,0 +1,13 @@
+op {
+  graph_op_name: "ExperimentalThreadPoolDataset"
+  in_arg {
+    name: "thread_pool"
+    description: <<END
+A resource produced by the ThreadPoolHandle op.
+END
+  }
+  summary: <<END
+Creates a dataset that uses a custom thread pool to compute `input_dataset`.
+END
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolHandle.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolHandle.pbtxt
new file mode 100644
index 0000000000..48bf93406c
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolHandle.pbtxt
@@ -0,0 +1,35 @@
+op {
+  graph_op_name: "ExperimentalThreadPoolHandle"
+  out_arg {
+    name: "handle"
+    description: <<END
+A resource that can be consumed by one or more ExperimentalThreadPoolDataset
+ops.
+END
+  }
+  attr {
+    name: "num_threads"
+    description: <<END
+The number of threads in the thread pool.
+END
+  }
+  attr {
+    name: "max_intra_op_parallelism"
+    description: <<END
+The maximum degree of parallelism to use within operations that execute on this
+threadpool.
+END
+  }
+  attr {
+    name: "display_name"
+    description: <<END
+A human-readable name for the threads that may be visible in some
+visualizations.
+threadpool.
+END
+  }
+  summary: <<END
+Creates a dataset that uses a custom thread pool to compute `input_dataset`.
+END
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalUniqueDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalUniqueDataset.pbtxt
new file mode 100644
index 0000000000..68ed797a0c
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalUniqueDataset.pbtxt
@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "ExperimentalUniqueDataset"
+  summary: <<END
+Creates a dataset that contains the unique elements of `input_dataset`.
+END
+  visibility: HIDDEN
+}
+
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 87efdff789..6333853cdf 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -765,6 +765,7 @@ tf_kernel_library(
         ":window_dataset_op",
         ":writer_ops",
         ":zip_dataset_op",
+        "//tensorflow/core/kernels/data/experimental:dataset_kernels",
     ],
 )
 
diff --git a/tensorflow/contrib/data/kernels/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
similarity index 52%
rename from tensorflow/contrib/data/kernels/BUILD
rename to tensorflow/core/kernels/data/experimental/BUILD
index ec6cb37193..43406db3ed 100644
--- a/tensorflow/contrib/data/kernels/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -1,22 +1,26 @@
 # Description:
-#   Contains kernels for datasets and iterators.
+#   Contains experimental kernels for datasets and iterators.
 package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_kernel_library",
+)
+
 cc_library(
     name = "indexed_dataset_headers",
     hdrs = ["indexed_dataset.h"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:framework",
         "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
 )
 
-cc_library(
+tf_kernel_library(
     name = "indexed_dataset",
     srcs = [
         "identity_indexed_dataset.cc",
@@ -24,103 +28,102 @@ cc_library(
     ],
     deps = [
         ":indexed_dataset_headers",
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "prefetching_kernels",
     srcs = ["prefetching_kernels.cc"],
     deps = [
-        "//tensorflow/core:core_cpu_headers_lib",
-        "//tensorflow/core:framework_headers_lib",
-        "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "directed_interleave_dataset_op",
     srcs = ["directed_interleave_dataset_op.cc"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "csv_dataset_op",
     srcs = ["csv_dataset_op.cc"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
-        "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "ignore_errors_dataset_op",
     srcs = ["ignore_errors_dataset_op.cc"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
         "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "lmdb_dataset_op",
     srcs = ["lmdb_dataset_op.cc"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//third_party/eigen3",
         "@lmdb",
-        "@protobuf_archive//:protobuf_headers",
     ],
 )
 
-cc_library(
+tf_kernel_library(
     name = "threadpool_dataset_op",
     srcs = ["threadpool_dataset_op.cc"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "unique_dataset_op",
     srcs = ["unique_dataset_op.cc"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "assert_next_dataset_op",
     srcs = ["assert_next_dataset_op.cc"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
         "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
-    alwayslink = 1,
 )
 
-cc_library(
+tf_kernel_library(
     name = "dataset_kernels",
     deps = [
         ":assert_next_dataset_op",
@@ -132,8 +135,5 @@ cc_library(
         ":prefetching_kernels",
         ":threadpool_dataset_op",
         ":unique_dataset_op",
-        "//tensorflow/core:framework_headers_lib",
-        "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
     ],
 )
diff --git a/tensorflow/contrib/data/kernels/assert_next_dataset_op.cc b/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc
similarity index 97%
rename from tensorflow/contrib/data/kernels/assert_next_dataset_op.cc
rename to tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc
index c19a609780..3511cca0f5 100644
--- a/tensorflow/contrib/data/kernels/assert_next_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc
@@ -147,8 +147,9 @@ class AssertNextDatasetOp : public UnaryDatasetOpKernel {
   std::vector<PartialTensorShape> output_shapes_;
 };
 
-REGISTER_KERNEL_BUILDER(Name("AssertNextDataset").Device(DEVICE_CPU),
-                        AssertNextDatasetOp);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalAssertNextDataset").Device(DEVICE_CPU),
+    AssertNextDatasetOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/csv_dataset_op.cc b/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
similarity index 99%
rename from tensorflow/contrib/data/kernels/csv_dataset_op.cc
rename to tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
index 21ec50fb6b..7451ca4cb1 100644
--- a/tensorflow/contrib/data/kernels/csv_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
@@ -852,7 +852,8 @@ class CSVDatasetOp : public DatasetOpKernel {
 };  // class CSVDatasetOp
 
 // Register the kernel implementation for CSVDataset.
-REGISTER_KERNEL_BUILDER(Name("CSVDataset").Device(DEVICE_CPU), CSVDatasetOp);
+REGISTER_KERNEL_BUILDER(Name("ExperimentalCSVDataset").Device(DEVICE_CPU),
+                        CSVDatasetOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc b/tensorflow/core/kernels/data/experimental/directed_interleave_dataset_op.cc
similarity index 98%
rename from tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc
rename to tensorflow/core/kernels/data/experimental/directed_interleave_dataset_op.cc
index a5321620bf..c47a9099c4 100644
--- a/tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/directed_interleave_dataset_op.cc
@@ -272,8 +272,9 @@ class DirectedInterleaveDatasetOp : public DatasetOpKernel {
   };
 };
 
-REGISTER_KERNEL_BUILDER(Name("DirectedInterleaveDataset").Device(DEVICE_CPU),
-                        DirectedInterleaveDatasetOp);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalDirectedInterleaveDataset").Device(DEVICE_CPU),
+    DirectedInterleaveDatasetOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/identity_indexed_dataset.cc b/tensorflow/core/kernels/data/experimental/identity_indexed_dataset.cc
similarity index 96%
rename from tensorflow/contrib/data/kernels/identity_indexed_dataset.cc
rename to tensorflow/core/kernels/data/experimental/identity_indexed_dataset.cc
index c3cb45dbf7..2141f118ca 100644
--- a/tensorflow/contrib/data/kernels/identity_indexed_dataset.cc
+++ b/tensorflow/core/kernels/data/experimental/identity_indexed_dataset.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/data/kernels/indexed_dataset.h"
+#include "tensorflow/core/kernels/data/experimental/indexed_dataset.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -147,8 +147,9 @@ class IdentityIndexedDatasetOp : public IndexedDatasetOpKernel {
   };
 };
 
-REGISTER_KERNEL_BUILDER(Name("IdentityIndexedDataset").Device(DEVICE_CPU),
-                        IdentityIndexedDatasetOp);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalIdentityIndexedDataset").Device(DEVICE_CPU),
+    IdentityIndexedDatasetOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/ignore_errors_dataset_op.cc b/tensorflow/core/kernels/data/experimental/ignore_errors_dataset_op.cc
similarity index 96%
rename from tensorflow/contrib/data/kernels/ignore_errors_dataset_op.cc
rename to tensorflow/core/kernels/data/experimental/ignore_errors_dataset_op.cc
index beec344534..b34377c642 100644
--- a/tensorflow/contrib/data/kernels/ignore_errors_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/ignore_errors_dataset_op.cc
@@ -15,7 +15,6 @@ limitations under the License.
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
 namespace data {
@@ -133,8 +132,9 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
   };
 };
 
-REGISTER_KERNEL_BUILDER(Name("IgnoreErrorsDataset").Device(DEVICE_CPU),
-                        IgnoreErrorsDatasetOp);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalIgnoreErrorsDataset").Device(DEVICE_CPU),
+    IgnoreErrorsDatasetOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/indexed_dataset.cc b/tensorflow/core/kernels/data/experimental/indexed_dataset.cc
similarity index 97%
rename from tensorflow/contrib/data/kernels/indexed_dataset.cc
rename to tensorflow/core/kernels/data/experimental/indexed_dataset.cc
index ced8ab0d60..75ea462f40 100644
--- a/tensorflow/contrib/data/kernels/indexed_dataset.cc
+++ b/tensorflow/core/kernels/data/experimental/indexed_dataset.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/data/kernels/indexed_dataset.h"
+#include "tensorflow/core/kernels/data/experimental/indexed_dataset.h"
 
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -361,12 +361,14 @@ class IndexedDatasetGet : public OpKernel {
 };
 
 REGISTER_KERNEL_BUILDER(
-    Name("MaterializedIndexDatasetHandle").Device(DEVICE_CPU),
+    Name("ExperimentalMaterializedIndexDatasetHandle").Device(DEVICE_CPU),
     MaterializedHandleOp);
-REGISTER_KERNEL_BUILDER(Name("IndexedDatasetMaterialize").Device(DEVICE_CPU),
-                        MaterializeDatasetOp);
-REGISTER_KERNEL_BUILDER(Name("IndexedDatasetGet").Device(DEVICE_CPU),
-                        IndexedDatasetGet);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalIndexedDatasetMaterialize").Device(DEVICE_CPU),
+    MaterializeDatasetOp);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalIndexedDatasetGet").Device(DEVICE_CPU),
+    IndexedDatasetGet);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/indexed_dataset.h b/tensorflow/core/kernels/data/experimental/indexed_dataset.h
similarity index 95%
rename from tensorflow/contrib/data/kernels/indexed_dataset.h
rename to tensorflow/core/kernels/data/experimental/indexed_dataset.h
index 7aa2d3fdbc..27a8360cbc 100644
--- a/tensorflow/contrib/data/kernels/indexed_dataset.h
+++ b/tensorflow/core/kernels/data/experimental/indexed_dataset.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_DATA_KERNELS_INDEXED_DATASET_H_
-#define TENSORFLOW_CONTRIB_DATA_KERNELS_INDEXED_DATASET_H_
+#ifndef TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_INDEXED_DATASET_H_
+#define TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_INDEXED_DATASET_H_
 
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -116,4 +116,4 @@ Status StoreIndexedDatasetInVariantTensor(IndexedDataset* dataset,
 }  // namespace data
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CONTRIB_DATA_KERNELS_INDEXED_DATASET_H_
+#endif  // TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_INDEXED_DATASET_H_
diff --git a/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc b/tensorflow/core/kernels/data/experimental/lmdb_dataset_op.cc
similarity index 98%
rename from tensorflow/contrib/data/kernels/lmdb_dataset_op.cc
rename to tensorflow/core/kernels/data/experimental/lmdb_dataset_op.cc
index d233c1f8ec..8a88d32f0c 100644
--- a/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/lmdb_dataset_op.cc
@@ -210,7 +210,8 @@ class LMDBDatasetOp : public DatasetOpKernel {
   };
 };
 
-REGISTER_KERNEL_BUILDER(Name("LMDBDataset").Device(DEVICE_CPU), LMDBDatasetOp);
+REGISTER_KERNEL_BUILDER(Name("ExperimentalLMDBDataset").Device(DEVICE_CPU),
+                        LMDBDatasetOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/core/kernels/data/experimental/prefetching_kernels.cc
similarity index 95%
rename from tensorflow/contrib/data/kernels/prefetching_kernels.cc
rename to tensorflow/core/kernels/data/experimental/prefetching_kernels.cc
index 96f1dd0059..2c6179d9f5 100644
--- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc
+++ b/tensorflow/core/kernels/data/experimental/prefetching_kernels.cc
@@ -338,20 +338,20 @@ class FunctionBufferResourceHandleOp : public OpKernel {
   DataTypeVector output_types_;
 };
 
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResource")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResource")
                             .Device(DEVICE_CPU)
                             .HostMemory("resource")
                             .HostMemory("string_arg")
                             .HostMemory("target_device"),
                         FunctionBufferResourceHandleOp);
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResource")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResource")
                             .Device(DEVICE_GPU)
                             .HostMemory("resource")
                             .HostMemory("string_arg")
                             .HostMemory("target_device"),
                         FunctionBufferResourceHandleOp);
 #if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResource")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResource")
                             .Device(DEVICE_SYCL)
                             .HostMemory("resource")
                             .HostMemory("string_arg")
@@ -403,16 +403,16 @@ class FunctionBufferingResourceGetNextOp : public AsyncOpKernel {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceGetNext")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResourceGetNext")
                             .Device(DEVICE_CPU)
                             .HostMemory("function_buffer_resource"),
                         FunctionBufferingResourceGetNextOp);
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceGetNext")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResourceGetNext")
                             .Device(DEVICE_GPU)
                             .HostMemory("function_buffer_resource"),
                         FunctionBufferingResourceGetNextOp);
 #if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceGetNext")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResourceGetNext")
                             .Device(DEVICE_SYCL)
                             .HostMemory("function_buffer_resource"),
                         FunctionBufferingResourceGetNextOp);
@@ -440,16 +440,16 @@ class FunctionBufferingResourceResetOp : public OpKernel {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceReset")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResourceReset")
                             .Device(DEVICE_CPU)
                             .HostMemory("function_buffer_resource"),
                         FunctionBufferingResourceResetOp);
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceReset")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResourceReset")
                             .Device(DEVICE_GPU)
                             .HostMemory("function_buffer_resource"),
                         FunctionBufferingResourceResetOp);
 #if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceReset")
+REGISTER_KERNEL_BUILDER(Name("ExperimentalFunctionBufferingResourceReset")
                             .Device(DEVICE_SYCL)
                             .HostMemory("function_buffer_resource"),
                         FunctionBufferingResourceResetOp);
@@ -473,8 +473,9 @@ class IteratorGetDeviceOp : public OpKernel {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("IteratorGetDevice").Device(DEVICE_CPU),
-                        IteratorGetDeviceOp);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalIteratorGetDevice").Device(DEVICE_CPU),
+    IteratorGetDeviceOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
similarity index 97%
rename from tensorflow/contrib/data/kernels/threadpool_dataset_op.cc
rename to tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
index 30fa97a636..c80493d3a1 100644
--- a/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
@@ -209,10 +209,11 @@ class ThreadPoolDatasetOp : public UnaryDatasetOpKernel {
   };
 };
 
-REGISTER_KERNEL_BUILDER(Name("ThreadPoolHandle").Device(DEVICE_CPU),
+REGISTER_KERNEL_BUILDER(Name("ExperimentalThreadPoolHandle").Device(DEVICE_CPU),
                         ThreadPoolHandleOp);
-REGISTER_KERNEL_BUILDER(Name("ThreadPoolDataset").Device(DEVICE_CPU),
-                        ThreadPoolDatasetOp);
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalThreadPoolDataset").Device(DEVICE_CPU),
+    ThreadPoolDatasetOp);
 
 }  // namespace
 }  // namespace data
diff --git a/tensorflow/contrib/data/kernels/unique_dataset_op.cc b/tensorflow/core/kernels/data/experimental/unique_dataset_op.cc
similarity index 97%
rename from tensorflow/contrib/data/kernels/unique_dataset_op.cc
rename to tensorflow/core/kernels/data/experimental/unique_dataset_op.cc
index 57fc5697a4..cd612e0eb2 100644
--- a/tensorflow/contrib/data/kernels/unique_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/unique_dataset_op.cc
@@ -199,8 +199,9 @@ class UniqueDatasetOp : public UnaryDatasetOpKernel {
             HANDLE_TYPE(DT_INT64);
             HANDLE_TYPE(DT_STRING);
             default:
-              LOG(FATAL) << "UniqueDataset unhandled data type: "
-                         << DataTypeString(lhs.dtype());
+              DCHECK(false) << "UniqueDataset unhandled data type: "
+                            << DataTypeString(lhs.dtype());
+              return false;
           }
         }
       };
@@ -215,7 +216,7 @@ class UniqueDatasetOp : public UnaryDatasetOpKernel {
   };
 };
 
-REGISTER_KERNEL_BUILDER(Name("UniqueDataset").Device(DEVICE_CPU),
+REGISTER_KERNEL_BUILDER(Name("ExperimentalUniqueDataset").Device(DEVICE_CPU),
                         UniqueDatasetOp);
 
 }  // namespace
diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc
similarity index 62%
rename from tensorflow/contrib/data/ops/dataset_ops.cc
rename to tensorflow/core/ops/experimental_dataset_ops.cc
index d1a771f005..f6bd5dce26 100644
--- a/tensorflow/contrib/data/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/experimental_dataset_ops.cc
@@ -17,24 +17,16 @@ limitations under the License.
 
 namespace tensorflow {
 
-REGISTER_OP("DirectedInterleaveDataset")
+REGISTER_OP("ExperimentalDirectedInterleaveDataset")
     .Input("selector_input_dataset: variant")
     .Input("data_input_datasets: N * variant")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("N: int >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A substitute for `InterleaveDataset` on a fixed list of `N` datasets.
-
-selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines
-  which of the `N` data inputs should produce the next output element.
-data_input_datasets: `N` datasets with the same type that will be interleaved
-  according to the values of `selector_input_dataset`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
-REGISTER_OP("CSVDataset")
+REGISTER_OP("ExperimentalCSVDataset")
     .Input("filenames: string")
     .Input("compression_type: string")
     .Input("buffer_size: int64")
@@ -76,35 +68,26 @@ REGISTER_OP("CSVDataset")
       return shape_inference::ScalarShape(c);
     });
 
-REGISTER_OP("IgnoreErrorsDataset")
+REGISTER_OP("ExperimentalIgnoreErrorsDataset")
     .Input("input_dataset: variant")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that contains the elements of `input_dataset` ignoring errors.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
-REGISTER_OP("UniqueDataset")
+REGISTER_OP("ExperimentalUniqueDataset")
     .Input("input_dataset: variant")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that contains the unique elements of `input_dataset`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
-REGISTER_OP("IteratorGetDevice")
+REGISTER_OP("ExperimentalIteratorGetDevice")
     .Input("resource: resource")
     .Output("device: string")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Returns the name of the device on which `resource` has been placed.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
-REGISTER_OP("FunctionBufferingResource")
+REGISTER_OP("ExperimentalFunctionBufferingResource")
     .Input("string_arg: string")
     .Input("target_device: string")
     .Output("resource: resource")
@@ -113,77 +96,36 @@ REGISTER_OP("FunctionBufferingResource")
     .Attr("f: func")
     .Attr("buffer_size: int")
     .Attr("output_types: list(type)")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Creates a resource that fills up a buffer by making function calls.
-
-string_arg: String argument to the function call.
-target_device: Target device to execute the function on.
-resource: Handle to the resource created.
-f: Function to be executed.
-buffer_size: Size of the buffer.
-container: If non-empty, this resource is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this resource will be shared under the given name
-  across multiple sessions.
-output_types: The type list for the return values.
-)doc");
-
-REGISTER_OP("FunctionBufferingResourceGetNext")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("ExperimentalFunctionBufferingResourceGetNext")
     .Input("function_buffer_resource: resource")
     .Attr("output_types: list(type)")
     .Output("output: output_types")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Gets the next element from a FunctionBufferingResource.
+    .SetShapeFn(shape_inference::UnknownShape);
 
-function_buffer_resource: The FunctionBufferingResource handle.
-output: A list of return values.
-output_types: The type list for the return values.
-)doc");
-
-REGISTER_OP("FunctionBufferingResourceReset")
+REGISTER_OP("ExperimentalFunctionBufferingResourceReset")
     .Input("function_buffer_resource: resource")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Resets the FunctionBufferingResource.
-
-function_buffer_resource: The FunctionBufferingResource handle.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
-REGISTER_OP("ThreadPoolDataset")
+REGISTER_OP("ExperimentalThreadPoolDataset")
     .Input("input_dataset: variant")
     .Input("thread_pool: resource")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-
-handle: A resource produced by the ThreadPoolHandle op.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
-REGISTER_OP("ThreadPoolHandle")
+REGISTER_OP("ExperimentalThreadPoolHandle")
     .Output("handle: resource")
     .SetShapeFn(shape_inference::ScalarShape)
     .Attr("num_threads: int")
     .Attr("max_intra_op_parallelism: int = 1")
     .Attr("display_name: string")
     .Attr("container: string = ''")
-    .Attr("shared_name: string = ''")
-    .Doc(R"doc(
-Creates a custom thread pool with the given number of threads.
-
-handle: A resource that can be consumed by one or more ThreadPoolDataset ops.
-num_threads: The number of threads in the thread pool.
-max_intra_op_parallelism: The maximum degree of parallelism to use within
-  operations that execute on this threadpool.
-display_name: A human-readable name for the threads that may be visible in
-  some visualizations.
-)doc");
-
-REGISTER_OP("AssertNextDataset")
+    .Attr("shared_name: string = ''");
+
+REGISTER_OP("ExperimentalAssertNextDataset")
     .Input("input_dataset: variant")
     .Input("transformations: string")
     .Output("handle: variant")
@@ -196,7 +138,7 @@ REGISTER_OP("AssertNextDataset")
       return shape_inference::ScalarShape(c);
     });
 
-REGISTER_OP("LMDBDataset")
+REGISTER_OP("ExperimentalLMDBDataset")
     .Input("filenames: string")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
@@ -205,4 +147,61 @@ REGISTER_OP("LMDBDataset")
                       // stateful to inhibit constant folding.
     .SetShapeFn(shape_inference::ScalarShape);
 
+REGISTER_OP("ExperimentalIdentityIndexedDataset")
+    .Input("size: uint64")
+    .Output("handle: variant")
+    .SetIsStateful()
+    .SetShapeFn(
+        shape_inference::ScalarShape);  // TODO(saeta): check input shapes.
+
+///////////////////////////////////////////////////////////////////////////////
+//     IndexedDataset Internals
+///////////////////////////////////////////////////////////////////////////////
+
+// Creates the handle.
+REGISTER_OP("ExperimentalMaterializedIndexDatasetHandle")
+    .Output("handle: resource")
+    .Attr("container: string")
+    .Attr("shared_name: string")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+// Actually materialize the materialize handle.
+REGISTER_OP("ExperimentalIndexedDatasetMaterialize")
+    .Input("dataset: variant")
+    .Input("materialized: resource")
+    .SetShapeFn(shape_inference::NoOutputs);
+
+namespace {
+
+Status GetShapeFn(shape_inference::InferenceContext* c) {
+  shape_inference::ShapeHandle unused;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+  std::vector<PartialTensorShape> output_shapes;
+  TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+  if (output_shapes.size() != c->num_outputs()) {
+    return errors::InvalidArgument(
+        "`output_shapes` must be the same length as `output_types` (",
+        output_shapes.size(), " vs. ", c->num_outputs());
+  }
+  for (size_t i = 0; i < output_shapes.size(); ++i) {
+    shape_inference::ShapeHandle output_shape_handle;
+    TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+        output_shapes[i], &output_shape_handle));
+    c->set_output(static_cast<int>(i), output_shape_handle);
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+REGISTER_OP("ExperimentalIndexedDatasetGet")
+    .Input("materialized: resource")
+    .Input("index: uint64")
+    .Output("components: output_types")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(GetShapeFn);
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 410b3a553a..91cafea042 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1638,6 +1638,15 @@ tf_gen_op_wrapper_private_py(
     ],
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "experimental_dataset_ops_gen",
+    visibility = [
+        "//learning/brain/python/ops:__pkg__",
+        "//tensorflow:__subpackages__",
+        "//tensorflow/python/kernel_tests:__pkg__",
+    ],
+)
+
 tf_gen_op_wrapper_private_py(
     name = "image_ops_gen",
     visibility = ["//learning/brain/python/ops:__pkg__"],
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index f1de22300b..7d925a8fef 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -67,7 +67,6 @@ COMMON_PIP_DEPS = [
     "//tensorflow/contrib/data/python/kernel_tests/serialization:dataset_serialization_test_base",
     "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base",
     "//tensorflow/contrib/data/python/kernel_tests:test_utils",
-    "//tensorflow/contrib/data/python/ops:contrib_op_loader",
     "//tensorflow/contrib/eager/python/examples:examples_pip",
     "//tensorflow/contrib/eager/python:evaluator",
     "//tensorflow/contrib/gan:gan",
-- 
GitLab


From 9ef0ec921cc6de670fd2fdba1be49e0eca2a1043 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 08:47:34 -0700
Subject: [PATCH 0857/1357] internal change only

PiperOrigin-RevId: 214941829
---
 tensorflow/contrib/tpu/profiler/op_profile.proto | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto
index b25d06dda8..292108f949 100644
--- a/tensorflow/contrib/tpu/profiler/op_profile.proto
+++ b/tensorflow/contrib/tpu/profiler/op_profile.proto
@@ -66,8 +66,8 @@ message Metrics {
   //  - it does not reveal the peak core FLOPS of the hardware
   double flops = 2;
 
-  // The VMEM bandwidth used to load operands from HBM, as a fraction of
-  // thereotical VMEM bandwidth on the specific hardware.
+  // The memory bandwidth used to load operands, as a fraction of
+  // thereotical memory bandwidth on the specific hardware.
   double memory_bandwidth = 3;
 
   double raw_time = 11;   // Elapsed core-time in picoseconds.
-- 
GitLab


From 35459cbaa0f654393b242c5357f6939b05267ab8 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Fri, 28 Sep 2018 08:56:06 -0700
Subject: [PATCH 0858/1357] Build TF with XLA support by default.

Building binaries with XLA support does not enable it by default, it
simply makes it accessible via default binary builds.

PiperOrigin-RevId: 214942824
---
 configure.py                                      | 4 ++--
 tensorflow/tools/ci_build/builds/run_pip_tests.sh | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure.py b/configure.py
index 55fce8b93b..9899ae10e8 100644
--- a/configure.py
+++ b/configure.py
@@ -1504,6 +1504,7 @@ def main():
   if is_macos():
     environ_cp['TF_NEED_JEMALLOC'] = '0'
     environ_cp['TF_NEED_TENSORRT'] = '0'
+    environ_cp['TF_ENABLE_XLA'] = '0'
 
   # The numpy package on ppc64le uses OpenBLAS which has multi-threading
   # issues that lead to incorrect answers.  Set OMP_NUM_THREADS=1 at
@@ -1515,7 +1516,7 @@ def main():
   set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc',
                 'with_jemalloc', True)
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
-                False, 'xla')
+                True, 'xla')
 
 
   set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
@@ -1624,4 +1625,3 @@ def main():
 
 if __name__ == '__main__':
   main()
-
diff --git a/tensorflow/tools/ci_build/builds/run_pip_tests.sh b/tensorflow/tools/ci_build/builds/run_pip_tests.sh
index 17198a6560..7d5cf3f843 100755
--- a/tensorflow/tools/ci_build/builds/run_pip_tests.sh
+++ b/tensorflow/tools/ci_build/builds/run_pip_tests.sh
@@ -111,7 +111,6 @@ bazel clean
 # virtualenv.
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
-export TF_ENABLE_XLA=0
 
 # Obtain the path to Python binary
 if [[ ${IS_VIRTUALENV} == "1" ]]; then
-- 
GitLab


From 97498f64ef097096b756c6b262f3ae38965e8685 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 28 Sep 2018 09:17:51 -0700
Subject: [PATCH 0859/1357] [TF:XLA] Add comment explaining why there is no
 PrimitiveTypeToDataType function.

PiperOrigin-RevId: 214945748
---
 tensorflow/compiler/tf2xla/type_util.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/type_util.h b/tensorflow/compiler/tf2xla/type_util.h
index bda667eb1f..6354216eee 100644
--- a/tensorflow/compiler/tf2xla/type_util.h
+++ b/tensorflow/compiler/tf2xla/type_util.h
@@ -25,6 +25,14 @@ namespace tensorflow {
 // Converts a Tensorflow DataType to an XLA PrimitiveType.
 Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type);
 
+// N.B.: there is intentionally no function to convert an XLA PrimitiveType to
+// a TensorFlow DataType. The mapping from TF types to XLA types is not
+// one-to-one: for example, both DT_INT8 and DT_QINT8 map to xla::S8. So the
+// inverse would not be a well-defined function. If you find that you want the
+// inverse mapping, then most likely you should be preserving the original
+// TensorFlow type, rather than trying to convert an XLA type into a TensorFlow
+// type.
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_TF2XLA_TYPE_UTIL_H_
-- 
GitLab


From f4014108a310928cd897085a8bc7d757c641a1c3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 09:21:32 -0700
Subject: [PATCH 0860/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 214946257
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 415 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 415 ++++++++++++++++++
 2 files changed, 830 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 32ce31cf23..43c14d83b5 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -21531,6 +21531,421 @@ op {
     }
   }
 }
+op {
+  name: "ExperimentalAssertNextDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "transformations"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ExperimentalCSVDataset"
+  input_arg {
+    name: "filenames"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "compression_type"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "header"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "field_delim"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "use_quote_delim"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "na_value"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "select_cols"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "record_defaults"
+    type_list_attr: "output_types"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalDirectedInterleaveDataset"
+  input_arg {
+    name: "selector_input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "data_input_datasets"
+    type: DT_VARIANT
+    number_attr: "N"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ExperimentalFunctionBufferingResource"
+  input_arg {
+    name: "string_arg"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "target_device"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "buffer_size"
+    type: "int"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalFunctionBufferingResourceGetNext"
+  input_arg {
+    name: "function_buffer_resource"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalFunctionBufferingResourceReset"
+  input_arg {
+    name: "function_buffer_resource"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIdentityIndexedDataset"
+  input_arg {
+    name: "size"
+    type: DT_UINT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIgnoreErrorsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ExperimentalIndexedDatasetGet"
+  input_arg {
+    name: "materialized"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "index"
+    type: DT_UINT64
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIndexedDatasetMaterialize"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "materialized"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIteratorGetDevice"
+  input_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "device"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalLMDBDataset"
+  input_arg {
+    name: "filenames"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalMaterializedIndexDatasetHandle"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalThreadPoolDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "thread_pool"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalThreadPoolHandle"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "num_threads"
+    type: "int"
+  }
+  attr {
+    name: "max_intra_op_parallelism"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "display_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalUniqueDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "Expm1"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 02a7f8d717..abee803889 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -10038,6 +10038,421 @@ op {
     }
   }
 }
+op {
+  name: "ExperimentalAssertNextDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "transformations"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ExperimentalCSVDataset"
+  input_arg {
+    name: "filenames"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "compression_type"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "header"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "field_delim"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "use_quote_delim"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "na_value"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "select_cols"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "record_defaults"
+    type_list_attr: "output_types"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalDirectedInterleaveDataset"
+  input_arg {
+    name: "selector_input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "data_input_datasets"
+    type: DT_VARIANT
+    number_attr: "N"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ExperimentalFunctionBufferingResource"
+  input_arg {
+    name: "string_arg"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "target_device"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "buffer_size"
+    type: "int"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalFunctionBufferingResourceGetNext"
+  input_arg {
+    name: "function_buffer_resource"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalFunctionBufferingResourceReset"
+  input_arg {
+    name: "function_buffer_resource"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIdentityIndexedDataset"
+  input_arg {
+    name: "size"
+    type: DT_UINT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIgnoreErrorsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ExperimentalIndexedDatasetGet"
+  input_arg {
+    name: "materialized"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "index"
+    type: DT_UINT64
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIndexedDatasetMaterialize"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "materialized"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalIteratorGetDevice"
+  input_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "device"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalLMDBDataset"
+  input_arg {
+    name: "filenames"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalMaterializedIndexDatasetHandle"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalThreadPoolDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "thread_pool"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalThreadPoolHandle"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "num_threads"
+    type: "int"
+  }
+  attr {
+    name: "max_intra_op_parallelism"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "display_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ExperimentalUniqueDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "Expm1"
   input_arg {
-- 
GitLab


From 4eb53d3e5f7bec3c757a06d186ff31fe52083e6d Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 28 Sep 2018 09:27:29 -0700
Subject: [PATCH 0861/1357] Simplify eager/graph Layer.losses conditionals

Fixes an issue where losses created while executing eagerly were returned as unevaluated lambdas in a defun.

Lazily evaluates Layer losses by default when possible. Even when graph building this is generally a better thing to do (e.g. losses called in a while_loop).

Allows calls to Layer.add_loss when executing eagerly, but only for losses which are not conditional on inputs (no activity regularizers).

PiperOrigin-RevId: 214947108
---
 tensorflow/python/keras/engine/base_layer.py  | 157 ++++++++----------
 .../keras/engine/training_eager_test.py       |  14 ++
 .../python/keras/engine/training_test.py      |  12 ++
 tensorflow/python/layers/base.py              |  16 +-
 .../python/layers/convolutional_test.py       |  36 ++--
 tensorflow/python/layers/core_test.py         |   6 +-
 6 files changed, 140 insertions(+), 101 deletions(-)

diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index e98b131ae6..a75ce30d31 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections as collections_lib
 import enum  # pylint: disable=g-bad-import-order
+import functools
 import inspect  # Necessary supplement to tf_inspect to deal with variadic args.
 
 import numpy as np
@@ -160,9 +161,13 @@ class Layer(checkpointable.CheckpointableBase):
     self._trainable_weights = []
     self._non_trainable_weights = []
     self._updates = []
-    # When executing eagerly, _losses is a list of zero-argument lambdas which
-    # return tensors. When using graph execution, _losses is a list of ops.
+    # A list of zero-argument lambdas which return Tensors, used for variable
+    # regularizers.
+    self._callable_losses = []
+    # A list of Tensors containing activity regularizers and losses manually
+    # added through `add_loss`. Empty when executing eagerly.
     self._losses = []
+    self._in_call = False  # Flag for error checking in add_loss
     self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name
     self._call_fn_args = function_utils.fn_args(self.call)
     self._compute_previous_mask = ('mask' in self._call_fn_args or
@@ -359,20 +364,20 @@ class Layer(checkpointable.CheckpointableBase):
   def losses(self):
     """Losses which are associated with this `Layer`.
 
-    Note that when executing eagerly, getting this property evaluates
-    regularizers. When using graph execution, variable regularization ops have
-    already been created and are simply returned here.
+    Variable regularization tensors are created when this property is accessed,
+    so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+    propagate gradients back to the corresponding variables.
 
     Returns:
       A list of tensors.
     """
-    if context.executing_eagerly():
-      # _losses may only contain variable regularization losses when executing
-      # eagerly, and they have been saved as lambdas to be executed when
-      # requested.
-      return [regularizer() for regularizer in self._losses]
-    else:
-      return self._losses
+    collected_losses = []
+    collected_losses.extend(self._losses)
+    for regularizer in self._callable_losses:
+      loss_tensor = regularizer()
+      if loss_tensor is not None:
+        collected_losses.append(loss_tensor)
+    return collected_losses
 
   @doc_controls.for_subclass_implementers
   def add_loss(self, losses, inputs=None):
@@ -393,7 +398,9 @@ class Layer(checkpointable.CheckpointableBase):
     from `Layer.call()`).
 
     Arguments:
-      losses: Loss tensor, or list/tuple of tensors.
+      losses: Loss tensor, or list/tuple of tensors. Rather than tensors, losses
+        may also be zero-argument callables which create a loss tensor. Only
+        callable losses are supported when executing eagerly.
       inputs: If anything other than None is passed, it signals the losses
         are conditional on some of the layer's inputs,
         and thus they should only be run where these inputs are available.
@@ -403,29 +410,45 @@ class Layer(checkpointable.CheckpointableBase):
         (e.g. weight regularization losses).
 
     Raises:
-      RuntimeError: If called in Eager mode.
+      RuntimeError: If called in Eager mode with a `Tensor` rather than a
+        callable, or if `inputs` is not None.
     """
-    if context.executing_eagerly():
-      # TODO(fchollet): it should be possible (and highly desirable) to support
-      # `add_loss` in eager mode. This allows great convenience and flexibility
-      # in defining custom losses on the fly (e.g. in VAEs).
-      # Simply appending the loss value to `self._losses`
-      # is the correct behavior.
-      # The only caveat is that we need to force the user to only call
-      # `add_loss` from inside a model or Layer's `call` method
-      # (otherwise the loss computation cannot be backproped through).
-      raise RuntimeError('Layer.add_loss not supported in Eager mode.')
-
+    executing_eagerly = context.executing_eagerly()
+    if executing_eagerly:
+      if inputs is not None:
+        raise RuntimeError(
+            'Activity regularization (via the "inputs" argument to '
+            'Layer.add_loss) is not supported when executing eagerly. Consider '
+            'returning activity regularization losses from a Model\'s call() '
+            'method.')
+      if getattr(self, '_in_call', False):
+        # TODO(psv): Support activity regularization and a way to reset losses.
+        raise RuntimeError(
+            'Adding losses inside a Layer\'s call() method is not currently '
+            'supported when executing eagerly. Please file a feature request '
+            'if you need this limitation lifted.')
     losses = generic_utils.to_list(losses)
-    losses = [ops.convert_to_tensor(loss, dtype=backend.floatx())
-              if not tensor_util.is_tensor(loss) else loss for loss in losses]
-    self._losses += losses
-    if inputs is None:
-      for loss in losses:
-        loss._unconditional_loss = True  # pylint: disable=protected-access
-    else:
-      for loss in losses:
-        loss._unconditional_loss = False  # pylint: disable=protected-access
+
+    def _tag_unconditional(loss):
+      if callable(loss):
+        loss = loss()
+      if loss is None:
+        return None  # Will be filtered out when computing the .losses property
+      if not tensor_util.is_tensor(loss):
+        loss = ops.convert_to_tensor(loss, dtype=backend.floatx())
+      loss._unconditional_loss = (inputs is None)  # pylint: disable=protected-access
+      return loss
+
+    for loss in losses:
+      if callable(loss):
+        self._callable_losses.append(
+            functools.partial(_tag_unconditional, loss))
+      else:
+        if executing_eagerly:
+          raise RuntimeError(
+              'Layer.add_loss only supported for zero-argument lambdas when '
+              'executing eagerly.')
+        self._losses.append(_tag_unconditional(loss))
 
   def get_losses_for(self, inputs):
     """Retrieves losses relevant to a specific set of inputs.
@@ -599,56 +622,20 @@ class Layer(checkpointable.CheckpointableBase):
     return variable
 
   def _handle_weight_regularization(self, name, variable, regularizer):
-    # `init_graph` should point to the graph in which variable initialization
-    # will occur; it should be None if and only if initialization will take
-    # place in the eager context.
-    init_graph = None
-    if not context.executing_eagerly():
-      default_graph = ops.get_default_graph()
-      if default_graph.building_function:
-        with ops.init_scope():
-          # Retrieve the variables from the graph into which variables
-          # will be lifted; if initialization ops will be lifted into
-          # the eager context, then there is nothing to retrieve, since variable
-          # collections are not supported when eager execution is enabled.
-          if not context.executing_eagerly():
-            init_graph = ops.get_default_graph()
-      else:
-        # Initialization ops will not be lifted out of the default graph.
-        init_graph = default_graph
-
-    if init_graph is not None:  # pylint: disable=protected-access
-      # The variable was created and initialized in a graph.
-      if regularizer:
-        if isinstance(variable, tf_variables.PartitionedVariable):
-          for v in variable:
-            with ops.colocate_with(v.op):
-              with ops.name_scope(name + '/Regularizer'):
-                regularization = regularizer(v)
-            if regularization is not None:
-              self.add_loss(regularization)
-        else:
-          with ops.colocate_with(variable.op):
-            with ops.name_scope(name + '/Regularizer'):
-              regularization = regularizer(variable)
-          if regularization is not None:
-            self.add_loss(regularization)
-    elif regularizer:  # initialization took place in an eager context
-      if isinstance(variable, tf_variables.PartitionedVariable):
-        raise RuntimeError(
-            'Partitioned variable regularization is not yet '
-            'supported when executing eagerly. File a feature request'
-            'if this is important to you.')
-      # Save a zero-argument lambda which runs the regularizer on the
-      # variable, to be executed when `Layer.losses` is requested.
-      # This makes losses responsive to variable updates when executing
-      # eagerly.
-      #
-      # TODO(akshayka): Do the same for graphs as well, so that losses
-      # collected in a while_loop can be run outside its control flow
-      # context and so that losses won't be swallowed up by graph functions
-      # (i.e., `.losses()` should always create regularizers).
-      self._losses.append(lambda: regularizer(variable))
+    """Create lambdas which compute regularization losses."""
+
+    def _loss_for_variable(v):
+      """Creates a regularization loss `Tensor` for variable `v`."""
+      with ops.colocate_with(v):
+        with ops.name_scope(name + '/Regularizer'):
+          regularization = regularizer(v)
+      return regularization
+
+    if isinstance(variable, tf_variables.PartitionedVariable):
+      for v in variable:
+        self.add_loss(functools.partial(_loss_for_variable, v))
+    else:
+      self.add_loss(functools.partial(_loss_for_variable, variable))
 
   def _handle_activity_regularization(self, inputs, outputs):
     # Apply activity regularization.
@@ -766,7 +753,9 @@ class Layer(checkpointable.CheckpointableBase):
         self._assert_input_compatibility(inputs)
 
       if not in_deferred_mode:
+        self._in_call = True
         outputs = self.call(inputs, *args, **kwargs)
+        self._in_call = False
         if outputs is None:
           raise ValueError('A layer\'s `call` method should return a Tensor '
                            'or a list of Tensors, not None (layer: ' +
diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py
index db7ccb181f..1f5176c4d7 100644
--- a/tensorflow/python/keras/engine/training_eager_test.py
+++ b/tensorflow/python/keras/engine/training_eager_test.py
@@ -192,6 +192,20 @@ class CorrectnessTest(test.TestCase):
     history = model.fit(iterator, epochs=1, steps_per_epoch=10)
     self.assertEqual(np.around(history.history['loss'][-1], decimals=4), 0.6173)
 
+  def test_no_loss_in_call(self):
+
+    class HasLoss(keras.layers.Layer):
+
+      def call(self, x):
+        self.add_loss(x)
+        return x
+
+    layer = HasLoss()
+    with self.assertRaises(RuntimeError):
+      layer(1.)
+
+    with ops.Graph().as_default():
+      layer(1.)
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 30be4131a4..54ad74c08b 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -27,6 +27,7 @@ import numpy as np
 from tensorflow.python import keras
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util as tf_test_util
@@ -2427,6 +2428,17 @@ class TestTrainingWithMetrics(test.TestCase):
       scores = model.train_on_batch(x, y, sample_weight=w)
       self.assertArrayNear(scores, [0.2, 0.8, 0.8], 0.1)
 
+  def test_losses_in_defun(self):
+    with context.eager_mode():
+      layer = keras.layers.Dense(1, kernel_regularizer='l1')
+      layer(array_ops.ones([1, 10]))
+
+      @function.defun
+      def get_losses():
+        return layer.losses
+
+      self.assertAllEqual(self.evaluate(layer.losses),
+                          self.evaluate(get_losses()))
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 3ba880d7a1..e399ece232 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -131,10 +131,20 @@ class Layer(base_layer.Layer):
 
   def add_loss(self, losses, inputs=None):
     previous_losses_length = len(self._losses)
+    previous_callable_losses_length = len(self._callable_losses)
     super(Layer, self).add_loss(losses, inputs=inputs)
-    # TODO(fchollet): deprecate collection below.
-    new_losses = self._losses[previous_losses_length:]
-    _add_elements_to_collection(new_losses, ops.GraphKeys.REGULARIZATION_LOSSES)
+    if not context.executing_eagerly():
+      # TODO(fchollet): deprecate collection below.
+      new_losses = self._losses[previous_losses_length:]
+      new_callable_losses = self._callable_losses[
+          previous_callable_losses_length:]
+      for regularizer in new_callable_losses:
+        loss_tensor = regularizer()
+        if loss_tensor is not None:
+          new_losses.append(loss_tensor)
+      _add_elements_to_collection(
+          new_losses,
+          ops.GraphKeys.REGULARIZATION_LOSSES)
 
   def _name_scope(self):
     """Determines op naming for the Layer."""
diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py
index d61d3b6dba..257fa27156 100644
--- a/tensorflow/python/layers/convolutional_test.py
+++ b/tensorflow/python/layers/convolutional_test.py
@@ -207,7 +207,8 @@ class ConvTest(test.TestCase):
     layer.apply(images)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testConv2DBiasRegularizer(self):
     height, width = 7, 9
@@ -217,7 +218,8 @@ class ConvTest(test.TestCase):
     layer.apply(images)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testConv2DNoBias(self):
     height, width = 7, 9
@@ -445,7 +447,8 @@ class SeparableConv1DTest(test.TestCase):
     layer.apply(data)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testSeparableConv1DPointwiseRegularizer(self):
     length = 9
@@ -455,7 +458,8 @@ class SeparableConv1DTest(test.TestCase):
     layer.apply(data)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testSeparableConv1DBiasRegularizer(self):
     length = 9
@@ -465,7 +469,8 @@ class SeparableConv1DTest(test.TestCase):
     layer.apply(data)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testSeparableConv1DNoBias(self):
     length = 9
@@ -682,7 +687,8 @@ class SeparableConv2DTest(test.TestCase):
     layer.apply(images)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testSeparableConv2DPointwiseRegularizer(self):
     height, width = 7, 9
@@ -692,7 +698,8 @@ class SeparableConv2DTest(test.TestCase):
     layer.apply(images)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testSeparableConv2DBiasRegularizer(self):
     height, width = 7, 9
@@ -702,7 +709,8 @@ class SeparableConv2DTest(test.TestCase):
     layer.apply(images)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testSeparableConv2DNoBias(self):
     height, width = 7, 9
@@ -839,7 +847,8 @@ class Conv2DTransposeTest(test.TestCase):
     layer.apply(images)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testConv2DTransposeBiasRegularizer(self):
     height, width = 7, 9
@@ -849,7 +858,8 @@ class Conv2DTransposeTest(test.TestCase):
     layer.apply(images)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testConv2DTransposeNoBias(self):
     height, width = 7, 9
@@ -1017,7 +1027,8 @@ class Conv3DTransposeTest(test.TestCase):
     layer.apply(volumes)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testConv3DTransposeBiasRegularizer(self):
     depth, height, width = 5, 7, 9
@@ -1027,7 +1038,8 @@ class Conv3DTransposeTest(test.TestCase):
     layer.apply(volumes)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(layer.losses, loss_keys)
+    self.evaluate([v.initializer for v in layer.variables])
+    self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys))
 
   def testConv3DTransposeNoBias(self):
     depth, height, width = 5, 7, 9
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 46009a30ac..d26f3f4789 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -197,7 +197,8 @@ class DenseTest(test.TestCase):
     _ = dense(inputs)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(dense.losses, loss_keys)
+    self.evaluate([v.initializer for v in dense.variables])
+    self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys))
 
   def testKernelRegularizerWithReuse(self):
     regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
@@ -218,7 +219,8 @@ class DenseTest(test.TestCase):
     _ = dense(inputs)
     loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
     self.assertEqual(len(loss_keys), 1)
-    self.assertListEqual(dense.losses, loss_keys)
+    self.evaluate([v.initializer for v in dense.variables])
+    self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys))
 
   def testFunctionalDense(self):
     with self.cached_session():
-- 
GitLab


From fe0140fcfc33f109191cf0ebe423aed28ec67bb6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 09:47:00 -0700
Subject: [PATCH 0862/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 214949709

---
 tensorflow/go/op/wrappers.go | 3192 +++++++++++++++++-----------------
 1 file changed, 1596 insertions(+), 1596 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 2f297d5161..b4d4db3e4d 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -3742,27 +3742,6 @@ func BoostedTreesMakeStatsSummary(scope *Scope, node_ids tf.Output, gradients tf
 	return op.Output(0)
 }
 
-// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//
-// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest
-// layer.
-func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesGetEnsembleStates",
-		Input: []tf.Input{
-			tree_ensemble_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
 // Creates a tree ensemble model and returns a handle to it.
 //
 // Arguments:
@@ -4059,168 +4038,291 @@ func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
-type ResourceStridedSliceAssignAttr func(optionalAttr)
+// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler.
+type LogUniformCandidateSamplerAttr func(optionalAttr)
 
-// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
+// LogUniformCandidateSamplerSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
+func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["begin_mask"] = value
+		m["seed"] = value
 	}
 }
 
-// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
+// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
+func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["end_mask"] = value
+		m["seed2"] = value
 	}
 }
 
-// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
+// Generates labels for candidate sampling with a log-uniform distribution.
+//
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LogUniformCandidateSampler",
+		Input: []tf.Input{
+			true_classes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
+// UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler.
+type UniformCandidateSamplerAttr func(optionalAttr)
+
+// UniformCandidateSamplerSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
+func UniformCandidateSamplerSeed(value int64) UniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
+		m["seed"] = value
 	}
 }
 
-// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// UniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
+func UniformCandidateSamplerSeed2(value int64) UniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
+		m["seed2"] = value
 	}
 }
 
-// Assign `value` to the sliced l-value reference of `ref`.
+// Generates labels for candidate sampling with a uniform distribution.
 //
-// The values of `value` are assigned to the positions in the variable
-// `ref` that are selected by the slice parameters. The slice parameters
-// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
 //
-// NOTE this op currently does not support broadcasting and so `value`'s
-// shape must be exactly the shape produced by the slice of `ref`.
+// For each batch, this op picks a single set of sampled candidate labels.
 //
-// Returns the created operation.
-func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...UniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceStridedSliceAssign",
+		Type: "UniformCandidateSampler",
 		Input: []tf.Input{
-			ref, begin, end, strides, value,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// ArgMaxAttr is an optional argument to ArgMax.
-type ArgMaxAttr func(optionalAttr)
+// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
+type GenerateVocabRemappingAttr func(optionalAttr)
 
-// ArgMaxOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
+// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
+//
+// value: Number of entries in the old vocab file to consider.  If -1,
+// use the entire old vocabulary.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
 	return func(m optionalAttr) {
-		m["output_type"] = value
+		m["old_vocab_size"] = value
 	}
 }
 
-// Returns the index with the largest value across dimensions of a tensor.
+// Given a path to new and old vocabulary files, returns a remapping Tensor of
 //
-// Note that in case of ties the identity of the return value is not guaranteed.
+// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
+// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
+// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
+// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
+// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
+// default value of -1.
+//
+// `num_vocab_offset` enables
+// use in the partitioned variable case, and should generally be set through
+// examining partitioning info.  The format of the files should be a text file,
+// with each line containing a single entity within the vocabulary.
+//
+// For example, with `new_vocab_file` a text file containing each of the following
+// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
+// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
+// `[0, -1, 2]`.
+//
+// The op also returns a count of how many entries in the new vocabulary
+// were present in the old vocabulary, which is used to calculate the number of
+// values to initialize in a weight matrix remapping
+//
+// This functionality can be used to remap both row vocabularies (typically,
+// features) and column vocabularies (typically, classes) from TensorFlow
+// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
+// corresponding to div-partitioned variables.  Moreover, the underlying remapping
+// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
+// use the corresponding index_table_from_file() as the FeatureColumn framework
+// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
 //
 // Arguments:
+//	new_vocab_file: Path to the new vocab file.
+//	old_vocab_file: Path to the old vocab file.
+//	new_vocab_offset: How many entries into the new vocab file to start reading.
+//	num_new_vocab: Number of entries in the new vocab file to remap.
 //
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
+// Returns A Tensor of length num_new_vocab where the element at index i
+// is equal to the old ID that maps to the new ID i.  This element is -1 for any
+// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab.
+func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ArgMax",
+		Type: "GenerateVocabRemapping",
 		Input: []tf.Input{
-			input, dimension,
+			new_vocab_file, old_vocab_file,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Returns which elements of x are finite.
-//
-// @compatibility(numpy)
-// Equivalent to np.isfinite
-// @end_compatibility
-func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
+// Broadcasts a tensor value to one or more other devices.
+func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
 	opspec := tf.OpSpec{
-		Type: "IsFinite",
+		Type: "CollectiveBcastSend",
 		Input: []tf.Input{
-			x,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MatMulAttr is an optional argument to MatMul.
-type MatMulAttr func(optionalAttr)
-
-// MatMulTransposeA sets the optional transpose_a attribute to value.
-//
-// value: If true, "a" is transposed before multiplication.
-// If not specified, defaults to false
-func MatMulTransposeA(value bool) MatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
+// Mutually reduces multiple tensors of identical type and shape.
+func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64) (data tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// MatMulTransposeB sets the optional transpose_b attribute to value.
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
+	opspec := tf.OpSpec{
+		Type: "CollectiveReduce",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AbortAttr is an optional argument to Abort.
+type AbortAttr func(optionalAttr)
+
+// AbortErrorMsg sets the optional error_msg attribute to value.
 //
-// value: If true, "b" is transposed before multiplication.
+// value: A string which is the message associated with the exception.
+// If not specified, defaults to ""
+func AbortErrorMsg(value string) AbortAttr {
+	return func(m optionalAttr) {
+		m["error_msg"] = value
+	}
+}
+
+// AbortExitWithoutError sets the optional exit_without_error attribute to value.
 // If not specified, defaults to false
-func MatMulTransposeB(value bool) MatMulAttr {
+func AbortExitWithoutError(value bool) AbortAttr {
 	return func(m optionalAttr) {
-		m["transpose_b"] = value
+		m["exit_without_error"] = value
 	}
 }
 
-// Multiply the matrix "a" by the matrix "b".
+// Raise a exception to abort the process when called.
 //
-// The inputs must be two-dimensional matrices and the inner dimension of
-// "a" (after being transposed if transpose_a is true) must match the
-// outer dimension of "b" (after being transposed if transposed_b is
-// true).
+// If exit_without_error is true, the process will exit normally,
+// otherwise it will exit with a SIGABORT signal.
 //
-// *Note*: The default kernel implementation for MatMul on GPUs uses
-// cublas.
-func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) {
+// Returns nothing but an exception.
+//
+// Returns the created operation.
+func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4229,258 +4331,163 @@ func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (pro
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatMul",
-		Input: []tf.Input{
-			a, b,
-		},
+		Type: "Abort",
+
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Selects elements from `x` or `y`, depending on `condition`.
-//
-// The `x`, and `y` tensors must all have the same shape, and the
-// output will also have that shape.
-//
-// The `condition` tensor must be a scalar if `x` and `y` are scalars.
-// If `x` and `y` are vectors or higher rank, then `condition` must be either a
-// scalar, a vector with size matching the first dimension of `x`, or must have
-// the same shape as `x`.
-//
-// The `condition` tensor acts as a mask that chooses, based on the value at each
-// element, whether the corresponding element / row in the output should be
-// taken from `x` (if true) or `y` (if false).
-//
-// If `condition` is a vector and `x` and `y` are higher rank matrices, then
-// it chooses which row (outer dimension) to copy from `x` and `y`.
-// If `condition` has the same shape as `x` and `y`, then it chooses which
-// element to copy from `x` and `y`.
-//
-// For example:
-//
-// ```python
-// # 'condition' tensor is [[True,  False]
-// #                        [False, True]]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e)  # => [[1, 6], [7, 4]]
-//
-//
-// # 'condition' tensor is [True, False]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e) ==> [[1, 2],
-//                              [7, 8]]
+// Forwards the input to the output.
 //
-// ```
+// This operator represents the loop termination condition used by the
+// "pivot" switches of a loop.
 //
 // Arguments:
+//	input: A boolean scalar, representing the branch predicate of the Switch op.
 //
-//	x: = A `Tensor` which may have the same shape as `condition`.
-// If `condition` is rank 1, `x` may have higher rank,
-// but its first dimension must match the size of `condition`.
-//	y: = A `Tensor` with the same type and shape as `x`.
-//
-// Returns = A `Tensor` with the same type and shape as `x` and `y`.
-func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) {
+// Returns The same tensor as `input`.
+func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Select",
+		Type: "LoopCond",
 		Input: []tf.Input{
-			condition, x, y,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of x OR y element-wise.
+// Returns a tensor of zeros with the same shape and type as x.
 //
-// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Arguments:
+//	x: a tensor of type T.
+//
+// Returns a tensor of the same shape and type as x but filled with zeros.
+func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LogicalOr",
+		Type: "ZerosLike",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
-//
-// The regularized incomplete beta integral is defined as:
-//
-//
-// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
-//
-// where
-//
-//
-// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
-//
-//
-// is the incomplete beta function and \\(B(a, b)\\) is the *complete*
-// beta function.
-func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) {
+// Returns a copy of the input tensor.
+func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Betainc",
+		Type: "Snapshot",
 		Input: []tf.Input{
-			a, b, x,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Return a tensor with the same shape and contents as the input tensor or value.
-func Identity(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
+type ResourceStridedSliceAssignAttr func(optionalAttr)
+
+// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["begin_mask"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Identity",
-		Input: []tf.Input{
-			input,
-		},
+}
+
+// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
-//
-// This is the angle \( \theta \in [-\pi, \pi] \) such that
-// \[ x = r \cos(\theta) \]
-// and
-// \[ y = r \sin(\theta) \]
-// where \(r = \sqrt(x^2 + y^2) \).
-func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
+// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Atan2",
-		Input: []tf.Input{
-			y, x,
-		},
+}
+
+// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Creates a dataset that passes a sliding window over `input_dataset`.
-//
-// Arguments:
+// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["shrink_axis_mask"] = value
+	}
+}
+
+// Assign `value` to the sliced l-value reference of `ref`.
 //
-//	window_size: A scalar representing the number of elements in the
-// sliding window.
-//	window_shift: A scalar representing the steps moving the sliding window
-// forward in one iteration. It must be positive.
-//	window_stride: A scalar representing the stride of the input elements of the sliding window.
-// It must be positive.
+// The values of `value` are assigned to the positions in the variable
+// `ref` that are selected by the slice parameters. The slice parameters
+// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
 //
+// NOTE this op currently does not support broadcasting and so `value`'s
+// shape must be exactly the shape produced by the slice of `ref`.
 //
-func SlideDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns the created operation.
+func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SlideDataset",
+		Type: "ResourceStridedSliceAssign",
 		Input: []tf.Input{
-			input_dataset, window_size, window_shift, window_stride,
+			ref, begin, end, strides, value,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// EditDistanceAttr is an optional argument to EditDistance.
-type EditDistanceAttr func(optionalAttr)
+// ArgMaxAttr is an optional argument to ArgMax.
+type ArgMaxAttr func(optionalAttr)
 
-// EditDistanceNormalize sets the optional normalize attribute to value.
-//
-// value: boolean (if true, edit distances are normalized by length of truth).
-//
-// The output is:
-// If not specified, defaults to true
-func EditDistanceNormalize(value bool) EditDistanceAttr {
+// ArgMaxOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
 	return func(m optionalAttr) {
-		m["normalize"] = value
+		m["output_type"] = value
 	}
 }
 
-// Computes the (possibly normalized) Levenshtein Edit Distance.
-//
-// The inputs are variable-length sequences provided by SparseTensors
-//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
-// and
-//   (truth_indices, truth_values, truth_shape).
+// Returns the index with the largest value across dimensions of a tensor.
 //
-// The inputs are:
+// Note that in case of ties the identity of the return value is not guaranteed.
 //
 // Arguments:
-//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
-// This is an N x R int64 matrix.
-//	hypothesis_values: The values of the hypothesis list SparseTensor.
-// This is an N-length vector.
-//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
-// This is an R-length vector.
-//	truth_indices: The indices of the truth list SparseTensor.
-// This is an M x R int64 matrix.
-//	truth_values: The values of the truth list SparseTensor.
-// This is an M-length vector.
-//	truth_shape: truth indices, vector.
-//
-// Returns A dense float tensor with rank R - 1.
-//
-// For the example input:
-//
-//     // hypothesis represents a 2x1 matrix with variable-length values:
-//     //   (0,0) = ["a"]
-//     //   (1,0) = ["b"]
-//     hypothesis_indices = [[0, 0, 0],
-//                           [1, 0, 0]]
-//     hypothesis_values = ["a", "b"]
-//     hypothesis_shape = [2, 1, 1]
 //
-//     // truth represents a 2x2 matrix with variable-length values:
-//     //   (0,0) = []
-//     //   (0,1) = ["a"]
-//     //   (1,0) = ["b", "c"]
-//     //   (1,1) = ["a"]
-//     truth_indices = [[0, 1, 0],
-//                      [1, 0, 0],
-//                      [1, 0, 1],
-//                      [1, 1, 0]]
-//     truth_values = ["a", "b", "c", "a"]
-//     truth_shape = [2, 2, 2]
-//     normalize = true
-//
-// The output will be:
-//
-//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
-//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
-//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
-func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4489,9 +4496,9 @@ func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "EditDistance",
+		Type: "ArgMax",
 		Input: []tf.Input{
-			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
+			input, dimension,
 		},
 		Attrs: attrs,
 	}
@@ -4499,69 +4506,69 @@ func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values
 	return op.Output(0)
 }
 
-// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
-type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+// Returns which elements of x are finite.
+//
+// @compatibility(numpy)
+// Equivalent to np.isfinite
+// @end_compatibility
+func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsFinite",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
+// MatMulAttr is an optional argument to MatMul.
+type MatMulAttr func(optionalAttr)
+
+// MatMulTransposeA sets the optional transpose_a attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
+// value: If true, "a" is transposed before multiplication.
+// If not specified, defaults to false
+func MatMulTransposeA(value bool) MatMulAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["transpose_a"] = value
 	}
 }
 
-// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
+// MatMulTransposeB sets the optional transpose_b attribute to value.
 //
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
+// value: If true, "b" is transposed before multiplication.
+// If not specified, defaults to false
+func MatMulTransposeB(value bool) MatMulAttr {
 	return func(m optionalAttr) {
-		m["dilations"] = value
+		m["transpose_b"] = value
 	}
 }
 
-// Computes the gradients of depthwise convolution with respect to the input.
+// Multiply the matrix "a" by the matrix "b".
 //
-// Arguments:
-//	input_sizes: An integer vector representing the shape of `input`, based
-// on `data_format`.  For example, if `data_format` is 'NHWC' then
-//  `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
+// The inputs must be two-dimensional matrices and the inner dimension of
+// "a" (after being transposed if transpose_a is true) must match the
+// outer dimension of "b" (after being transposed if transposed_b is
+// true).
 //
-// Returns 4-D with shape according to `data_format`.  For example, if
-// `data_format` is 'NHWC', output shape is `[batch, in_height,
-// in_width, in_channels]`.  Gradient w.r.t. the input of the
-// convolution.
-func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
+// *Note*: The default kernel implementation for MatMul on GPUs uses
+// cublas.
+func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropInput",
+		Type: "MatMul",
 		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
+			a, b,
 		},
 		Attrs: attrs,
 	}
@@ -4569,47 +4576,79 @@ func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, fil
 	return op.Output(0)
 }
 
-// ApproximateEqualAttr is an optional argument to ApproximateEqual.
-type ApproximateEqualAttr func(optionalAttr)
-
-// ApproximateEqualTolerance sets the optional tolerance attribute to value.
-// If not specified, defaults to 1e-05
-func ApproximateEqualTolerance(value float32) ApproximateEqualAttr {
-	return func(m optionalAttr) {
-		m["tolerance"] = value
-	}
-}
-
-// Returns the truth value of abs(x-y) < tolerance element-wise.
-func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) {
+// Selects elements from `x` or `y`, depending on `condition`.
+//
+// The `x`, and `y` tensors must all have the same shape, and the
+// output will also have that shape.
+//
+// The `condition` tensor must be a scalar if `x` and `y` are scalars.
+// If `x` and `y` are vectors or higher rank, then `condition` must be either a
+// scalar, a vector with size matching the first dimension of `x`, or must have
+// the same shape as `x`.
+//
+// The `condition` tensor acts as a mask that chooses, based on the value at each
+// element, whether the corresponding element / row in the output should be
+// taken from `x` (if true) or `y` (if false).
+//
+// If `condition` is a vector and `x` and `y` are higher rank matrices, then
+// it chooses which row (outer dimension) to copy from `x` and `y`.
+// If `condition` has the same shape as `x` and `y`, then it chooses which
+// element to copy from `x` and `y`.
+//
+// For example:
+//
+// ```python
+// # 'condition' tensor is [[True,  False]
+// #                        [False, True]]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e)  # => [[1, 6], [7, 4]]
+//
+//
+// # 'condition' tensor is [True, False]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e) ==> [[1, 2],
+//                              [7, 8]]
+//
+// ```
+//
+// Arguments:
+//
+//	x: = A `Tensor` which may have the same shape as `condition`.
+// If `condition` is rank 1, `x` may have higher rank,
+// but its first dimension must match the size of `condition`.
+//	y: = A `Tensor` with the same type and shape as `x`.
+//
+// Returns = A `Tensor` with the same type and shape as `x` and `y`.
+func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ApproximateEqual",
+		Type: "Select",
 		Input: []tf.Input{
-			x, y,
+			condition, x, y,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns x / y element-wise.
+// Returns the truth value of x OR y element-wise.
 //
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
 // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Div",
+		Type: "LogicalOr",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -4618,65 +4657,92 @@ func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// Returns x * y element-wise.
+// Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
 //
-// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// The regularized incomplete beta integral is defined as:
+//
+//
+// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
+//
+// where
+//
+//
+// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
+//
+//
+// is the incomplete beta function and \\(B(a, b)\\) is the *complete*
+// beta function.
+func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Mul",
+		Type: "Betainc",
 		Input: []tf.Input{
-			x, y,
+			a, b, x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// BiasAddAttr is an optional argument to BiasAdd.
-type BiasAddAttr func(optionalAttr)
-
-// BiasAddDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the bias tensor will be added to the last dimension
-// of the value tensor.
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// The tensor will be added to "in_channels", the third-to-the-last
-//     dimension.
-// If not specified, defaults to "NHWC"
-func BiasAddDataFormat(value string) BiasAddAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
+// Return a tensor with the same shape and contents as the input tensor or value.
+func Identity(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Identity",
+		Input: []tf.Input{
+			input,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Adds `bias` to `value`.
+// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
 //
-// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-// Broadcasting is supported, so `value` may have any number of dimensions.
+// This is the angle \( \theta \in [-\pi, \pi] \) such that
+// \[ x = r \cos(\theta) \]
+// and
+// \[ y = r \sin(\theta) \]
+// where \(r = \sqrt(x^2 + y^2) \).
+func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan2",
+		Input: []tf.Input{
+			y, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that passes a sliding window over `input_dataset`.
 //
 // Arguments:
-//	value: Any number of dimensions.
-//	bias: 1-D with size the last dimension of `value`.
 //
-// Returns Broadcasted sum of `value` and `bias`.
-func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
+//	window_size: A scalar representing the number of elements in the
+// sliding window.
+//	window_shift: A scalar representing the steps moving the sliding window
+// forward in one iteration. It must be positive.
+//	window_stride: A scalar representing the stride of the input elements of the sliding window.
+// It must be positive.
+//
+//
+func SlideDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "BiasAdd",
+		Type: "SlideDataset",
 		Input: []tf.Input{
-			value, bias,
+			input_dataset, window_size, window_shift, window_stride,
 		},
 		Attrs: attrs,
 	}
@@ -4684,41 +4750,74 @@ func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddA
 	return op.Output(0)
 }
 
-// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
-type SparseReduceSumSparseAttr func(optionalAttr)
+// EditDistanceAttr is an optional argument to EditDistance.
+type EditDistanceAttr func(optionalAttr)
 
-// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
+// EditDistanceNormalize sets the optional normalize attribute to value.
 //
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
+// value: boolean (if true, edit distances are normalized by length of truth).
+//
+// The output is:
+// If not specified, defaults to true
+func EditDistanceNormalize(value bool) EditDistanceAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["normalize"] = value
 	}
 }
 
-// Computes the sum of elements across dimensions of a SparseTensor.
-//
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
-// SparseTensor.
+// Computes the (possibly normalized) Levenshtein Edit Distance.
 //
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
+// The inputs are variable-length sequences provided by SparseTensors
+//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
+// and
+//   (truth_indices, truth_values, truth_shape).
 //
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
+// The inputs are:
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
+// This is an N x R int64 matrix.
+//	hypothesis_values: The values of the hypothesis list SparseTensor.
+// This is an N-length vector.
+//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
+// This is an R-length vector.
+//	truth_indices: The indices of the truth list SparseTensor.
+// This is an M x R int64 matrix.
+//	truth_values: The values of the truth list SparseTensor.
+// This is an M-length vector.
+//	truth_shape: truth indices, vector.
+//
+// Returns A dense float tensor with rank R - 1.
+//
+// For the example input:
+//
+//     // hypothesis represents a 2x1 matrix with variable-length values:
+//     //   (0,0) = ["a"]
+//     //   (1,0) = ["b"]
+//     hypothesis_indices = [[0, 0, 0],
+//                           [1, 0, 0]]
+//     hypothesis_values = ["a", "b"]
+//     hypothesis_shape = [2, 1, 1]
+//
+//     // truth represents a 2x2 matrix with variable-length values:
+//     //   (0,0) = []
+//     //   (0,1) = ["a"]
+//     //   (1,0) = ["b", "c"]
+//     //   (1,1) = ["a"]
+//     truth_indices = [[0, 1, 0],
+//                      [1, 0, 0],
+//                      [1, 0, 1],
+//                      [1, 1, 0]]
+//     truth_values = ["a", "b", "c", "a"]
+//     truth_shape = [2, 2, 2]
+//     normalize = true
+//
+// The output will be:
+//
+//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
+//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
+//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
+func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4727,146 +4826,183 @@ func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values t
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseReduceSumSparse",
+		Type: "EditDistance",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
+			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
-type AllCandidateSamplerAttr func(optionalAttr)
+// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
+type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
 
-// AllCandidateSamplerSeed sets the optional seed attribute to value.
+// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["data_format"] = value
 	}
 }
 
-// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["dilations"] = value
 	}
 }
 
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+// Computes the gradients of depthwise convolution with respect to the input.
 //
 // Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to produce.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
+//	input_sizes: An integer vector representing the shape of `input`, based
+// on `data_format`.  For example, if `data_format` is 'NHWC' then
+//  `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+// Returns 4-D with shape according to `data_format`.  For example, if
+// `data_format` is 'NHWC', output shape is `[batch, in_height,
+// in_width, in_channels]`.  Gradient w.r.t. the input of the
+// convolution.
+func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AllCandidateSampler",
+		Type: "DepthwiseConv2dNativeBackpropInput",
 		Input: []tf.Input{
-			true_classes,
+			input_sizes, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Returns x + y element-wise.
-//
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AddV2",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns an element-wise indication of the sign of a number.
+// ApproximateEqualAttr is an optional argument to ApproximateEqual.
+type ApproximateEqualAttr func(optionalAttr)
+
+// ApproximateEqualTolerance sets the optional tolerance attribute to value.
+// If not specified, defaults to 1e-05
+func ApproximateEqualTolerance(value float32) ApproximateEqualAttr {
+	return func(m optionalAttr) {
+		m["tolerance"] = value
+	}
+}
+
+// Returns the truth value of abs(x-y) < tolerance element-wise.
+func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ApproximateEqual",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x / y element-wise.
 //
-// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Div",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x * y element-wise.
 //
-// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
-func Sign(scope *Scope, x tf.Output) (y tf.Output) {
+// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Sign",
+		Type: "Mul",
 		Input: []tf.Input{
-			x,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ArgMinAttr is an optional argument to ArgMin.
-type ArgMinAttr func(optionalAttr)
+// BiasAddAttr is an optional argument to BiasAdd.
+type BiasAddAttr func(optionalAttr)
 
-// ArgMinOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMinOutputType(value tf.DataType) ArgMinAttr {
+// BiasAddDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the bias tensor will be added to the last dimension
+// of the value tensor.
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// The tensor will be added to "in_channels", the third-to-the-last
+//     dimension.
+// If not specified, defaults to "NHWC"
+func BiasAddDataFormat(value string) BiasAddAttr {
 	return func(m optionalAttr) {
-		m["output_type"] = value
+		m["data_format"] = value
 	}
 }
 
-// Returns the index with the smallest value across dimensions of a tensor.
+// Adds `bias` to `value`.
 //
-// Note that in case of ties the identity of the return value is not guaranteed.
+// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+// Broadcasting is supported, so `value` may have any number of dimensions.
 //
 // Arguments:
+//	value: Any number of dimensions.
+//	bias: 1-D with size the last dimension of `value`.
 //
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
+// Returns Broadcasted sum of `value` and `bias`.
+func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4875,9 +5011,9 @@ func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ArgMin",
+		Type: "BiasAdd",
 		Input: []tf.Input{
-			input, dimension,
+			value, bias,
 		},
 		Attrs: attrs,
 	}
@@ -4885,33 +5021,52 @@ func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 	return op.Output(0)
 }
 
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
+type SparseReduceSumSparseAttr func(optionalAttr)
+
+// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
 //
-// output range specified with 'requested_output_min' and 'requested_output_max'.
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the sum of elements across dimensions of a SparseTensor.
 //
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
+// SparseTensor.
 //
-// Arguments:
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
 //
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	requested_output_min: The float value that the minimum quantized output value represents.
-//	requested_output_max: The float value that the maximum quantized output value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
 //
-// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
-func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Requantize",
+		Type: "SparseReduceSumSparse",
 		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
+			input_indices, input_values, input_shape, reduction_axes,
 		},
 		Attrs: attrs,
 	}
@@ -4919,95 +5074,277 @@ func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// PreventGradientAttr is an optional argument to PreventGradient.
-type PreventGradientAttr func(optionalAttr)
+// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
+type AllCandidateSamplerAttr func(optionalAttr)
 
-// PreventGradientMessage sets the optional message attribute to value.
+// AllCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// value: Will be printed in the error when anyone tries to differentiate
-// this operation.
-// If not specified, defaults to ""
-func PreventGradientMessage(value string) PreventGradientAttr {
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["message"] = value
+		m["seed"] = value
 	}
 }
 
-// An identity op that triggers an error if a gradient is requested.
+// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
 //
-// When executed in a graph, this op outputs its input tensor as-is.
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a learned unigram distribution.
 //
-// When building ops to compute gradients, the TensorFlow gradient system
-// will return an error when trying to lookup the gradient of this op,
-// because no gradient must ever be registered for this function.  This
-// op exists to prevent subtle bugs from silently returning unimplemented
-// gradients in some corner cases.
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
 //
 // Arguments:
-//	input: any tensor.
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to produce.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
 //
-// Returns the same input tensor.
-func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "PreventGradient",
+		Type: "AllCandidateSampler",
 		Input: []tf.Input{
-			input,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes asin of x element-wise.
-func Asin(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns x + y element-wise.
+//
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Asin",
+		Type: "AddV2",
 		Input: []tf.Input{
-			x,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the sum along sparse segments of a tensor.
-//
-// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// For example:
-//
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+// Returns an element-wise indication of the sign of a number.
 //
-// tf.sparse_segment_sum_with_num_segments(
-//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
-// # => [[0 0 0 0]
-// #     [0 0 0 0]
-// #     [0 0 0 0]]
+// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
 //
-// tf.sparse_segment_sum_with_num_segments(c,
-//                                         tf.constant([0, 1]),
-//                                         tf.constant([0, 2],
-//                                         num_segments=4))
-// # => [[ 1  2  3  4]
-// #     [ 0  0  0  0]
-// #     [-1 -2 -3 -4]
+// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
+func Sign(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sign",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ArgMinAttr is an optional argument to ArgMin.
+type ArgMinAttr func(optionalAttr)
+
+// ArgMinOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMinOutputType(value tf.DataType) ArgMinAttr {
+	return func(m optionalAttr) {
+		m["output_type"] = value
+	}
+}
+
+// Returns the index with the smallest value across dimensions of a tensor.
+//
+// Note that in case of ties the identity of the return value is not guaranteed.
+//
+// Arguments:
+//
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ArgMin",
+		Input: []tf.Input{
+			input, dimension,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+//
+// output range specified with 'requested_output_min' and 'requested_output_max'.
+//
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+//
+// Arguments:
+//
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	requested_output_min: The float value that the minimum quantized output value represents.
+//	requested_output_max: The float value that the maximum quantized output value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//
+// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
+func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "Requantize",
+		Input: []tf.Input{
+			input, input_min, input_max, requested_output_min, requested_output_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// PreventGradientAttr is an optional argument to PreventGradient.
+type PreventGradientAttr func(optionalAttr)
+
+// PreventGradientMessage sets the optional message attribute to value.
+//
+// value: Will be printed in the error when anyone tries to differentiate
+// this operation.
+// If not specified, defaults to ""
+func PreventGradientMessage(value string) PreventGradientAttr {
+	return func(m optionalAttr) {
+		m["message"] = value
+	}
+}
+
+// An identity op that triggers an error if a gradient is requested.
+//
+// When executed in a graph, this op outputs its input tensor as-is.
+//
+// When building ops to compute gradients, the TensorFlow gradient system
+// will return an error when trying to lookup the gradient of this op,
+// because no gradient must ever be registered for this function.  This
+// op exists to prevent subtle bugs from silently returning unimplemented
+// gradients in some corner cases.
+//
+// Arguments:
+//	input: any tensor.
+//
+// Returns the same input tensor.
+func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PreventGradient",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes asin of x element-wise.
+func Asin(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Asin",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor.
+//
+// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// tf.sparse_segment_sum_with_num_segments(
+//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+// # => [[0 0 0 0]
+// #     [0 0 0 0]
+// #     [0 0 0 0]]
+//
+// tf.sparse_segment_sum_with_num_segments(c,
+//                                         tf.constant([0, 1]),
+//                                         tf.constant([0, 2],
+//                                         num_segments=4))
+// # => [[ 1  2  3  4]
+// #     [ 0  0  0  0]
+// #     [-1 -2 -3 -4]
 // #     [ 0  0  0  0]]
 // ```
 //
@@ -10182,23 +10519,6 @@ func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...Ass
 	return scope.AddOperation(opspec)
 }
 
-// Broadcasts a tensor value to one or more other devices.
-func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "CollectiveBcastSend",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Split a `SparseTensor` into `num_split` tensors along one dimension.
 //
 // If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
@@ -10776,23 +11096,6 @@ func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, update
 	return scope.AddOperation(opspec)
 }
 
-// Mutually reduces multiple tensors of identical type and shape.
-func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
-	opspec := tf.OpSpec{
-		Type: "CollectiveReduce",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Updates the tree ensemble by either adding a layer to the last tree being grown
 //
 // or by starting a new tree.
@@ -11671,6 +11974,49 @@ func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.
 	return scope.AddOperation(opspec)
 }
 
+// Exits the current frame to its parent frame.
+//
+// Exit makes its input `data` available to the parent frame.
+//
+// Arguments:
+//	data: The tensor to be made available to the parent frame.
+//
+// Returns The same tensor as `data`.
+func Exit(scope *Scope, data tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Exit",
+		Input: []tf.Input{
+			data,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produce a string tensor that encodes the state of a Reader.
+//
+// Not all Readers support being serialized, so this can produce an
+// Unimplemented error.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderSerializeStateV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
 type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
 
@@ -11804,68 +12150,6 @@ func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (o
 	return op.Output(0)
 }
 
-// StringSplitV2Attr is an optional argument to StringSplitV2.
-type StringSplitV2Attr func(optionalAttr)
-
-// StringSplitV2Maxsplit sets the optional maxsplit attribute to value.
-//
-// value: An `int`. If `maxsplit > 0`, limit of the split of the result.
-// If not specified, defaults to -1
-func StringSplitV2Maxsplit(value int64) StringSplitV2Attr {
-	return func(m optionalAttr) {
-		m["maxsplit"] = value
-	}
-}
-
-// Split elements of `source` based on `sep` into a `SparseTensor`.
-//
-// Let N be the size of source (typically N will be the batch size). Split each
-// element of `source` based on `sep` and return a `SparseTensor`
-// containing the split tokens. Empty tokens are ignored.
-//
-// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
-// then the output will be
-// ```
-// st.indices = [0, 0;
-//               0, 1;
-//               1, 0;
-//               1, 1;
-//               1, 2]
-// st.shape = [2, 3]
-// st.values = ['hello', 'world', 'a', 'b', 'c']
-// ```
-//
-// If `sep` is given, consecutive delimiters are not grouped together and are
-// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
-// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
-// string, consecutive whitespace are regarded as a single separator, and the
-// result will contain no empty strings at the startor end if the string has
-// leading or trailing whitespace.
-//
-// Note that the above mentioned behavior matches python's str.split.
-//
-// Arguments:
-//	input: `1-D` string `Tensor`, the strings to split.
-//	sep: `0-D` string `Tensor`, the delimiter character.
-func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...StringSplitV2Attr) (indices tf.Output, values tf.Output, shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringSplitV2",
-		Input: []tf.Input{
-			input, sep,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // MaxPoolAttr is an optional argument to MaxPool.
 type MaxPoolAttr func(optionalAttr)
 
@@ -12435,21 +12719,6 @@ func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...
 	return op.Output(0)
 }
 
-// Computes softsign: `features / (abs(features) + 1)`.
-func Softsign(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Softsign",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a TensorList which, when stacked, has the value of `tensor`.
 //
 // Each tensor in the result list corresponds to one row of the input tensor.
@@ -12470,81 +12739,6 @@ func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Outpu
 	return op.Output(0)
 }
 
-// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
-type GenerateVocabRemappingAttr func(optionalAttr)
-
-// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
-//
-// value: Number of entries in the old vocab file to consider.  If -1,
-// use the entire old vocabulary.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
-	return func(m optionalAttr) {
-		m["old_vocab_size"] = value
-	}
-}
-
-// Given a path to new and old vocabulary files, returns a remapping Tensor of
-//
-// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
-// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
-// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
-// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
-// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
-// default value of -1.
-//
-// `num_vocab_offset` enables
-// use in the partitioned variable case, and should generally be set through
-// examining partitioning info.  The format of the files should be a text file,
-// with each line containing a single entity within the vocabulary.
-//
-// For example, with `new_vocab_file` a text file containing each of the following
-// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
-// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
-// `[0, -1, 2]`.
-//
-// The op also returns a count of how many entries in the new vocabulary
-// were present in the old vocabulary, which is used to calculate the number of
-// values to initialize in a weight matrix remapping
-//
-// This functionality can be used to remap both row vocabularies (typically,
-// features) and column vocabularies (typically, classes) from TensorFlow
-// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
-// corresponding to div-partitioned variables.  Moreover, the underlying remapping
-// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
-// use the corresponding index_table_from_file() as the FeatureColumn framework
-// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
-//
-// Arguments:
-//	new_vocab_file: Path to the new vocab file.
-//	old_vocab_file: Path to the old vocab file.
-//	new_vocab_offset: How many entries into the new vocab file to start reading.
-//	num_new_vocab: Number of entries in the new vocab file to remap.
-//
-// Returns A Tensor of length num_new_vocab where the element at index i
-// is equal to the old ID that maps to the new ID i.  This element is -1 for any
-// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab.
-func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "GenerateVocabRemapping",
-		Input: []tf.Input{
-			new_vocab_file, old_vocab_file,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Assigns sparse updates to the variable referenced by `resource`.
 //
 // This operation computes
@@ -13547,6 +13741,27 @@ func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAtt
 	return op.Output(0)
 }
 
+// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//
+// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest
+// layer.
+func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesGetEnsembleStates",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
 // ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
 type ResourceApplyPowerSignAttr func(optionalAttr)
 
@@ -16327,79 +16542,6 @@ func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...Ra
 	return op.Output(0)
 }
 
-// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler.
-type LogUniformCandidateSamplerAttr func(optionalAttr)
-
-// LogUniformCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a log-uniform distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LogUniformCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // Returns the max of x and y (i.e. x > y ? x : y) element-wise.
 //
 // *NOTE*: `Maximum` supports broadcasting. More about broadcasting
@@ -19444,31 +19586,6 @@ func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output
 	return op.Output(0)
 }
 
-// Read an element from the TensorArray into output `value`.
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	dtype: The type of the elem that is returned.
-//
-// Returns The tensor that is read from the TensorArray.
-func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayReadV3",
-		Input: []tf.Input{
-			handle, index, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // QuantizeV2Attr is an optional argument to QuantizeV2.
 type QuantizeV2Attr func(optionalAttr)
 
@@ -20866,6 +20983,201 @@ func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (ou
 	return op.Output(0)
 }
 
+// EnterAttr is an optional argument to Enter.
+type EnterAttr func(optionalAttr)
+
+// EnterIsConstant sets the optional is_constant attribute to value.
+//
+// value: If true, the output is constant within the child frame.
+// If not specified, defaults to false
+func EnterIsConstant(value bool) EnterAttr {
+	return func(m optionalAttr) {
+		m["is_constant"] = value
+	}
+}
+
+// EnterParallelIterations sets the optional parallel_iterations attribute to value.
+//
+// value: The number of iterations allowed to run in parallel.
+// If not specified, defaults to 10
+func EnterParallelIterations(value int64) EnterAttr {
+	return func(m optionalAttr) {
+		m["parallel_iterations"] = value
+	}
+}
+
+// Creates or finds a child frame, and makes `data` available to the child frame.
+//
+// This op is used together with `Exit` to create loops in the graph.
+// The unique `frame_name` is used by the `Executor` to identify frames. If
+// `is_constant` is true, `output` is a constant in the child frame; otherwise
+// it may be changed in the child frame. At most `parallel_iterations` iterations
+// are run in parallel in the child frame.
+//
+// Arguments:
+//	data: The tensor to be made available to the child frame.
+//	frame_name: The name of the child frame.
+//
+// Returns The same tensor as `data`.
+func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"frame_name": frame_name}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Enter",
+		Input: []tf.Input{
+			data,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Add all input tensors element wise.
+//
+// Arguments:
+//	inputs: Must all be the same size and shape.
+func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AddN",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TryRpcAttr is an optional argument to TryRpc.
+type TryRpcAttr func(optionalAttr)
+
+// TryRpcProtocol sets the optional protocol attribute to value.
+//
+// value: RPC protocol to use.  Empty string means use the default protocol.
+// Options include 'grpc'.
+// If not specified, defaults to ""
+func TryRpcProtocol(value string) TryRpcAttr {
+	return func(m optionalAttr) {
+		m["protocol"] = value
+	}
+}
+
+// TryRpcFailFast sets the optional fail_fast attribute to value.
+//
+// value: `boolean`. If `true` (default), then failures to connect
+// (i.e., the server does not immediately respond) cause an RPC failure.
+// If not specified, defaults to true
+func TryRpcFailFast(value bool) TryRpcAttr {
+	return func(m optionalAttr) {
+		m["fail_fast"] = value
+	}
+}
+
+// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
+//
+// value: `int`. If `0` (default), then the kernel will run the RPC
+// request and only time out if the RPC deadline passes or the session times out.
+// If this value is greater than `0`, then the op will raise an exception if
+// the RPC takes longer than `timeout_in_ms`.
+// If not specified, defaults to 0
+func TryRpcTimeoutInMs(value int64) TryRpcAttr {
+	return func(m optionalAttr) {
+		m["timeout_in_ms"] = value
+	}
+}
+
+// Perform batches of RPC requests.
+//
+// This op asynchronously performs either a single RPC request, or a batch
+// of requests.  RPC requests are defined by three main parameters:
+//
+//   - `address` (the host+port or BNS address of the request)
+//   - `method` (the method name for the request)
+//   - `request` (the serialized proto string, or vector of strings,
+//      of the RPC request argument).
+//
+// For example, if you have an RPC service running on port localhost:2345,
+// and its interface is configured with the following proto declaration:
+//
+// ```
+// service MyService {
+//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
+//   }
+// };
+// ```
+//
+// then call this op with arguments:
+//
+// ```
+// address = "localhost:2345"
+// method = "MyService/MyMethod"
+// ```
+//
+// The `request` tensor is a string tensor representing serialized `MyRequestProto`
+// strings; and the output string tensor `response` will have the same shape
+// and contain (upon successful completion) corresponding serialized
+// `MyResponseProto` strings.
+//
+// For example, to send a single, empty, `MyRequestProto`, call
+// this op with `request = ""`.  To send 5 **parallel** empty requests,
+// call this op with `request = ["", "", "", "", ""]`.
+//
+// More generally, one can create a batch of `MyRequestProto` serialized protos
+// from regular batched tensors using the `encode_proto` op, and convert
+// the response `MyResponseProto` serialized protos to batched tensors
+// using the `decode_proto` op.
+//
+// **NOTE** Working with serialized proto strings is faster than instantiating
+// actual proto objects in memory, so no performance degradation is expected
+// compared to writing custom kernels for this workflow.
+//
+// Unlike the standard `Rpc` op, if the connection fails or the remote worker
+// returns an error status, this op does **not** reraise the exception.
+// Instead, the `status_code` and `status_message` entry for the corresponding RPC
+// call is set with the error returned from the RPC call.  The `response` tensor
+// will contain valid response values for those minibatch entries whose RPCs did
+// not fail; the rest of the entries will have empty strings.
+//
+// Arguments:
+//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `method` and `request`.
+//	method: `0-D` or `1-D`.  The method address on the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `request`.
+//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `method`.
+//
+// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`.  Values correspond to tensorflow Status enum codes.Same shape as `request`.  Values correspond to Status messages
+// returned from the RPC calls.
+func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TryRpc",
+		Input: []tf.Input{
+			address, method, request,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // Delete the tensor specified by its handle in the session.
 //
 // Arguments:
@@ -21612,29 +21924,6 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Forwards the input to the output.
-//
-// This operator represents the loop termination condition used by the
-// "pivot" switches of a loop.
-//
-// Arguments:
-//	input: A boolean scalar, representing the branch predicate of the Switch op.
-//
-// Returns The same tensor as `input`.
-func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LoopCond",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes the sum along segments of a tensor.
 //
 // Read
@@ -24163,6 +24452,31 @@ func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr
 	return op.Output(0)
 }
 
+// Read an element from the TensorArray into output `value`.
+//
+// Arguments:
+//	handle: The handle to a TensorArray.
+//
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	dtype: The type of the elem that is returned.
+//
+// Returns The tensor that is read from the TensorArray.
+func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayReadV3",
+		Input: []tf.Input{
+			handle, index, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the gradient for the tanh of `x` wrt its input.
 //
 // Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`
@@ -27849,178 +28163,6 @@ func FakeParam(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Outpu
 	return op.Output(0)
 }
 
-// EncodeProtoAttr is an optional argument to EncodeProto.
-type EncodeProtoAttr func(optionalAttr)
-
-// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value.
-// If not specified, defaults to "local://"
-func EncodeProtoDescriptorSource(value string) EncodeProtoAttr {
-	return func(m optionalAttr) {
-		m["descriptor_source"] = value
-	}
-}
-
-// The op serializes protobuf messages provided in the input tensors.
-//
-// The types of the tensors in `values` must match the schema for the
-// fields specified in `field_names`. All the tensors in `values` must
-// have a common shape prefix, *batch_shape*.
-//
-// The `sizes` tensor specifies repeat counts for each field.  The repeat
-// count (last dimension) of a each tensor in `values` must be greater
-// than or equal to corresponding repeat count in `sizes`.
-//
-// A `message_type` name must be provided to give context for the field
-// names. The actual message descriptor can be looked up either in the
-// linked-in descriptor pool or a filename provided by the caller using
-// the `descriptor_source` attribute.
-//
-// The `descriptor_source` attribute selects a source of protocol
-// descriptors to consult when looking up `message_type`. This may be a
-// filename containing a serialized `FileDescriptorSet` message,
-// or the special value `local://`, in which case only descriptors linked
-// into the code will be searched; the filename can be on any filesystem
-// accessible to TensorFlow.
-//
-// You can build a `descriptor_source` file using the `--descriptor_set_out`
-// and `--include_imports` options to the protocol compiler `protoc`.
-//
-// The `local://` database only covers descriptors linked into the
-// code via C++ libraries, not Python imports. You can link in a proto descriptor
-// by creating a cc_library target with alwayslink=1.
-//
-// There are a few special cases in the value mapping:
-//
-// Submessage and group fields must be pre-serialized as TensorFlow strings.
-//
-// TensorFlow lacks support for unsigned int64s, so they must be
-// represented as `tf.int64` with the same twos-complement bit pattern
-// (the obvious way).
-//
-// Unsigned int32 values can be represented exactly with `tf.int64`, or
-// with sign wrapping if the input is of type `tf.int32`.
-//
-// Arguments:
-//	sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`.
-//	values: List of tensors containing values for the corresponding field.
-//	field_names: List of strings containing proto field names.
-//	message_type: Name of the proto message type to decode.
-//
-// Returns Tensor of serialized protos with shape `batch_shape`.
-func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodeProto",
-		Input: []tf.Input{
-			sizes, tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a TensorArray for storing the gradients of values in the given handle.
-//
-// If the given TensorArray gradient already exists, returns a reference to it.
-//
-// Locks the size of the original TensorArray by disabling its dynamic size flag.
-//
-// **A note about the input flow_in:**
-//
-// The handle flow_in forces the execution of the gradient lookup to occur
-// only after certain other operations have occurred.  For example, when
-// the forward TensorArray is dynamically sized, writes to this TensorArray
-// may resize the object.  The gradient TensorArray is statically sized based
-// on the size of the forward TensorArray when this operation executes.
-// Furthermore, the size of the forward TensorArray is frozen by this call.
-// As a result, the flow is used to ensure that the call to generate the gradient
-// TensorArray only happens after all writes are executed.
-//
-// In the case of dynamically sized TensorArrays, gradient computation should
-// only be performed on read operations that have themselves been chained via
-// flow to occur only after all writes have executed. That way the final size
-// of the forward TensorArray is known when this operation is called.
-//
-// **A note about the source attribute:**
-//
-// TensorArray gradient calls use an accumulator TensorArray object.  If
-// multiple gradients are calculated and run in the same session, the multiple
-// gradient nodes may accidentally flow through the same accumulator TensorArray.
-// This double counts and generally breaks the TensorArray gradient flow.
-//
-// The solution is to identify which gradient call this particular
-// TensorArray gradient is being called in.  This is performed by identifying
-// a unique string (e.g. "gradients", "gradients_1", ...) from the input
-// gradient Tensor's name.  This string is used as a suffix when creating
-// the TensorArray gradient object here (the attribute `source`).
-//
-// The attribute `source` is added as a suffix to the forward TensorArray's
-// name when performing the creation / lookup, so that each separate gradient
-// calculation gets its own TensorArray accumulator.
-//
-// Arguments:
-//	handle: The handle to the forward TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	source: The gradient source string, used to decide which gradient TensorArray
-// to return.
-func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"source": source}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayGradV3",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Creates a dataset that splits a SparseTensor into elements row-wise.
-func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseTensorSliceDataset",
-		Input: []tf.Input{
-			indices, values, dense_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x / y element-wise for real types.
-//
-// If `x` and `y` are reals, this will return the floating-point division.
-//
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RealDiv",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 //     Adds v into specified rows of x.
 //
 //     Computes y = x; y[i, :] += v; return y.
@@ -28316,65 +28458,314 @@ func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...Sta
 	return op.Output(0)
 }
 
-// Creates a dataset that concatenates `input_dataset` with `another_dataset`.
-func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ConcatenateDataset",
-		Input: []tf.Input{
-			input_dataset, another_dataset,
-		},
-		Attrs: attrs,
+// StringSplitV2Attr is an optional argument to StringSplitV2.
+type StringSplitV2Attr func(optionalAttr)
+
+// StringSplitV2Maxsplit sets the optional maxsplit attribute to value.
+//
+// value: An `int`. If `maxsplit > 0`, limit of the split of the result.
+// If not specified, defaults to -1
+func StringSplitV2Maxsplit(value int64) StringSplitV2Attr {
+	return func(m optionalAttr) {
+		m["maxsplit"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Adds a value to the current value of a variable.
+// Split elements of `source` based on `sep` into a `SparseTensor`.
 //
-// Any ReadVariableOp with a control dependency on this op is guaranteed to
-// see the incremented value or a subsequent newer one.
+// Let N be the size of source (typically N will be the batch size). Split each
+// element of `source` based on `sep` and return a `SparseTensor`
+// containing the split tokens. Empty tokens are ignored.
 //
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
+// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
+// then the output will be
+// ```
+// st.indices = [0, 0;
+//               0, 1;
+//               1, 0;
+//               1, 1;
+//               1, 2]
+// st.shape = [2, 3]
+// st.values = ['hello', 'world', 'a', 'b', 'c']
+// ```
 //
-// Returns the created operation.
-func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+// If `sep` is given, consecutive delimiters are not grouped together and are
+// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
+// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
+// string, consecutive whitespace are regarded as a single separator, and the
+// result will contain no empty strings at the startor end if the string has
+// leading or trailing whitespace.
+//
+// Note that the above mentioned behavior matches python's str.split.
+//
+// Arguments:
+//	input: `1-D` string `Tensor`, the strings to split.
+//	sep: `0-D` string `Tensor`, the delimiter character.
+func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...StringSplitV2Attr) (indices tf.Output, values tf.Output, shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "AssignAddVariableOp",
+		Type: "StringSplitV2",
 		Input: []tf.Input{
-			resource, value,
+			input, sep,
 		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Records the latency of producing `input_dataset` elements in a StatsAggregator.
-func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Computes softsign: `features / (abs(features) + 1)`.
+func Softsign(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "LatencyStatsDataset",
+		Type: "Softsign",
 		Input: []tf.Input{
-			input_dataset, tag,
+			features,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MapSizeAttr is an optional argument to MapSize.
-type MapSizeAttr func(optionalAttr)
+// EncodeProtoAttr is an optional argument to EncodeProto.
+type EncodeProtoAttr func(optionalAttr)
+
+// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value.
+// If not specified, defaults to "local://"
+func EncodeProtoDescriptorSource(value string) EncodeProtoAttr {
+	return func(m optionalAttr) {
+		m["descriptor_source"] = value
+	}
+}
+
+// The op serializes protobuf messages provided in the input tensors.
+//
+// The types of the tensors in `values` must match the schema for the
+// fields specified in `field_names`. All the tensors in `values` must
+// have a common shape prefix, *batch_shape*.
+//
+// The `sizes` tensor specifies repeat counts for each field.  The repeat
+// count (last dimension) of a each tensor in `values` must be greater
+// than or equal to corresponding repeat count in `sizes`.
+//
+// A `message_type` name must be provided to give context for the field
+// names. The actual message descriptor can be looked up either in the
+// linked-in descriptor pool or a filename provided by the caller using
+// the `descriptor_source` attribute.
+//
+// The `descriptor_source` attribute selects a source of protocol
+// descriptors to consult when looking up `message_type`. This may be a
+// filename containing a serialized `FileDescriptorSet` message,
+// or the special value `local://`, in which case only descriptors linked
+// into the code will be searched; the filename can be on any filesystem
+// accessible to TensorFlow.
+//
+// You can build a `descriptor_source` file using the `--descriptor_set_out`
+// and `--include_imports` options to the protocol compiler `protoc`.
+//
+// The `local://` database only covers descriptors linked into the
+// code via C++ libraries, not Python imports. You can link in a proto descriptor
+// by creating a cc_library target with alwayslink=1.
+//
+// There are a few special cases in the value mapping:
+//
+// Submessage and group fields must be pre-serialized as TensorFlow strings.
+//
+// TensorFlow lacks support for unsigned int64s, so they must be
+// represented as `tf.int64` with the same twos-complement bit pattern
+// (the obvious way).
+//
+// Unsigned int32 values can be represented exactly with `tf.int64`, or
+// with sign wrapping if the input is of type `tf.int32`.
+//
+// Arguments:
+//	sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`.
+//	values: List of tensors containing values for the corresponding field.
+//	field_names: List of strings containing proto field names.
+//	message_type: Name of the proto message type to decode.
+//
+// Returns Tensor of serialized protos with shape `batch_shape`.
+func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeProto",
+		Input: []tf.Input{
+			sizes, tf.OutputList(values),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorArray for storing the gradients of values in the given handle.
+//
+// If the given TensorArray gradient already exists, returns a reference to it.
+//
+// Locks the size of the original TensorArray by disabling its dynamic size flag.
+//
+// **A note about the input flow_in:**
+//
+// The handle flow_in forces the execution of the gradient lookup to occur
+// only after certain other operations have occurred.  For example, when
+// the forward TensorArray is dynamically sized, writes to this TensorArray
+// may resize the object.  The gradient TensorArray is statically sized based
+// on the size of the forward TensorArray when this operation executes.
+// Furthermore, the size of the forward TensorArray is frozen by this call.
+// As a result, the flow is used to ensure that the call to generate the gradient
+// TensorArray only happens after all writes are executed.
+//
+// In the case of dynamically sized TensorArrays, gradient computation should
+// only be performed on read operations that have themselves been chained via
+// flow to occur only after all writes have executed. That way the final size
+// of the forward TensorArray is known when this operation is called.
+//
+// **A note about the source attribute:**
+//
+// TensorArray gradient calls use an accumulator TensorArray object.  If
+// multiple gradients are calculated and run in the same session, the multiple
+// gradient nodes may accidentally flow through the same accumulator TensorArray.
+// This double counts and generally breaks the TensorArray gradient flow.
+//
+// The solution is to identify which gradient call this particular
+// TensorArray gradient is being called in.  This is performed by identifying
+// a unique string (e.g. "gradients", "gradients_1", ...) from the input
+// gradient Tensor's name.  This string is used as a suffix when creating
+// the TensorArray gradient object here (the attribute `source`).
+//
+// The attribute `source` is added as a suffix to the forward TensorArray's
+// name when performing the creation / lookup, so that each separate gradient
+// calculation gets its own TensorArray accumulator.
+//
+// Arguments:
+//	handle: The handle to the forward TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	source: The gradient source string, used to decide which gradient TensorArray
+// to return.
+func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"source": source}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGradV3",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Creates a dataset that splits a SparseTensor into elements row-wise.
+func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorSliceDataset",
+		Input: []tf.Input{
+			indices, values, dense_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x / y element-wise for real types.
+//
+// If `x` and `y` are reals, this will return the floating-point division.
+//
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RealDiv",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that concatenates `input_dataset` with `another_dataset`.
+func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ConcatenateDataset",
+		Input: []tf.Input{
+			input_dataset, another_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds a value to the current value of a variable.
+//
+// Any ReadVariableOp with a control dependency on this op is guaranteed to
+// see the incremented value or a subsequent newer one.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
+//
+// Returns the created operation.
+func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AssignAddVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Records the latency of producing `input_dataset` elements in a StatsAggregator.
+func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "LatencyStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MapSizeAttr is an optional argument to MapSize.
+type MapSizeAttr func(optionalAttr)
 
 // MapSizeCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
@@ -32542,602 +32933,211 @@ func CudnnRNNParamsToCanonicalSeed2(value int64) CudnnRNNParamsToCanonicalAttr {
 // Retrieves a set of weights from the opaque params buffer that can be saved and
 // restored in a way compatible with future runs.
 //
-// Note that the params buffer may not be compatible across different GPUs. So any
-// save and restoration should be converted to and from the canonical weights and
-// biases.
-//
-// num_layers: Specifies the number of layers in the RNN model.
-// num_units: Specifies the size of the hidden state.
-// input_size: Specifies the size of the input state.
-// num_params: number of parameter sets for all layers.
-//     Each layer may contain multiple parameter sets, with each set consisting of
-//     a weight matrix and a bias vector.
-// weights: the canonical form of weights that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// biases: the canonical form of biases that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     The actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used.
-//     dir = (direction == bidirectional) ? 2 : 1
-// dropout: dropout probability. When set to 0., dropout is disabled.
-// seed: the 1st part of a seed to initialize dropout.
-// seed2: the 2nd part of a seed to initialize dropout.
-func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params int64, optional ...CudnnRNNParamsToCanonicalAttr) (weights []tf.Output, biases []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_params": num_params}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNParamsToCanonical",
-		Input: []tf.Input{
-			num_layers, num_units, input_size, params,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil {
-		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
-		return
-	}
-	if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil {
-		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
-		return
-	}
-	return weights, biases
-}
-
-// UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler.
-type UniformCandidateSamplerAttr func(optionalAttr)
-
-// UniformCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func UniformCandidateSamplerSeed(value int64) UniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// UniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func UniformCandidateSamplerSeed2(value int64) UniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a uniform distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...UniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UniformCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// CTCLossAttr is an optional argument to CTCLoss.
-type CTCLossAttr func(optionalAttr)
-
-// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
-//
-// value: Scalar, if true then repeated labels are
-// collapsed prior to the CTC calculation.
-// If not specified, defaults to false
-func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["preprocess_collapse_repeated"] = value
-	}
-}
-
-// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
-//
-// value: Scalar.  If set to false, *during* CTC calculation
-// repeated non-blank labels will not be merged and are interpreted as
-// individual labels.  This is a simplified version of CTC.
-// If not specified, defaults to true
-func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ctc_merge_repeated"] = value
-	}
-}
-
-// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
-//
-// value: Scalar. If set to true, during CTC
-// calculation, items that have longer output sequences than input sequences
-// are skipped: they don't contribute to the loss term and have zero-gradient.
-// If not specified, defaults to false
-func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ignore_longer_outputs_than_inputs"] = value
-	}
-}
-
-// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
-//
-// the gradient.  This class performs the softmax operation for you, so inputs
-// should be e.g. linear projections of outputs by an LSTM.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
-// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
-// `(batch b, time t)`.
-//	labels_values: The values (labels) associated with the given batch and time.
-//	sequence_length: A vector containing sequence lengths (batch).
-//
-// Returns A vector (batch) containing log-probabilities.The gradient of `loss`.  3-D, shape:
-// `(max_time x batch_size x num_classes)`.
-func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCLoss",
-		Input: []tf.Input{
-			inputs, labels_indices, labels_values, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
-type CTCGreedyDecoderAttr func(optionalAttr)
-
-// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If True, merge repeated classes in output.
-// If not specified, defaults to false
-func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
-	return func(m optionalAttr) {
-		m["merge_repeated"] = value
-	}
-}
-
-// Performs greedy decoding on the logits given in inputs.
-//
-// A note about the attribute merge_repeated: if enabled, when
-// consecutive logits' maximum indices are the same, only the first of
-// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
-// becomes "A B B" if merge_repeated = True and "A B B B B" if
-// merge_repeated = False.
-//
-// Regardless of the value of merge_repeated, if the maximum index of a given
-// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
-// element is emitted.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
-//
-// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
-// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
-// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
-// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
-// log-probabilities.
-func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCGreedyDecoder",
-		Input: []tf.Input{
-			inputs, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// Forwards `data` to the output port determined by `pred`.
-//
-// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
-// the data goes to `output_false`.
-//
-// See also `RefSwitch` and `Merge`.
-//
-// Arguments:
-//	data: The tensor to be forwarded to the appropriate output.
-//	pred: A scalar that specifies which output port will receive data.
-//
-// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
-func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Switch",
-		Input: []tf.Input{
-			data, pred,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Add all input tensors element wise.
-//
-// Arguments:
-//	inputs: Must all be the same size and shape.
-func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AddN",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TryRpcAttr is an optional argument to TryRpc.
-type TryRpcAttr func(optionalAttr)
-
-// TryRpcProtocol sets the optional protocol attribute to value.
-//
-// value: RPC protocol to use.  Empty string means use the default protocol.
-// Options include 'grpc'.
-// If not specified, defaults to ""
-func TryRpcProtocol(value string) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["protocol"] = value
-	}
-}
-
-// TryRpcFailFast sets the optional fail_fast attribute to value.
-//
-// value: `boolean`. If `true` (default), then failures to connect
-// (i.e., the server does not immediately respond) cause an RPC failure.
-// If not specified, defaults to true
-func TryRpcFailFast(value bool) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["fail_fast"] = value
-	}
-}
-
-// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
-//
-// value: `int`. If `0` (default), then the kernel will run the RPC
-// request and only time out if the RPC deadline passes or the session times out.
-// If this value is greater than `0`, then the op will raise an exception if
-// the RPC takes longer than `timeout_in_ms`.
-// If not specified, defaults to 0
-func TryRpcTimeoutInMs(value int64) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["timeout_in_ms"] = value
-	}
-}
-
-// Perform batches of RPC requests.
-//
-// This op asynchronously performs either a single RPC request, or a batch
-// of requests.  RPC requests are defined by three main parameters:
-//
-//   - `address` (the host+port or BNS address of the request)
-//   - `method` (the method name for the request)
-//   - `request` (the serialized proto string, or vector of strings,
-//      of the RPC request argument).
-//
-// For example, if you have an RPC service running on port localhost:2345,
-// and its interface is configured with the following proto declaration:
-//
-// ```
-// service MyService {
-//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
-//   }
-// };
-// ```
-//
-// then call this op with arguments:
-//
-// ```
-// address = "localhost:2345"
-// method = "MyService/MyMethod"
-// ```
-//
-// The `request` tensor is a string tensor representing serialized `MyRequestProto`
-// strings; and the output string tensor `response` will have the same shape
-// and contain (upon successful completion) corresponding serialized
-// `MyResponseProto` strings.
-//
-// For example, to send a single, empty, `MyRequestProto`, call
-// this op with `request = ""`.  To send 5 **parallel** empty requests,
-// call this op with `request = ["", "", "", "", ""]`.
-//
-// More generally, one can create a batch of `MyRequestProto` serialized protos
-// from regular batched tensors using the `encode_proto` op, and convert
-// the response `MyResponseProto` serialized protos to batched tensors
-// using the `decode_proto` op.
-//
-// **NOTE** Working with serialized proto strings is faster than instantiating
-// actual proto objects in memory, so no performance degradation is expected
-// compared to writing custom kernels for this workflow.
-//
-// Unlike the standard `Rpc` op, if the connection fails or the remote worker
-// returns an error status, this op does **not** reraise the exception.
-// Instead, the `status_code` and `status_message` entry for the corresponding RPC
-// call is set with the error returned from the RPC call.  The `response` tensor
-// will contain valid response values for those minibatch entries whose RPCs did
-// not fail; the rest of the entries will have empty strings.
-//
-// Arguments:
-//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `method` and `request`.
-//	method: `0-D` or `1-D`.  The method address on the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `request`.
-//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `method`.
-//
-// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`.  Values correspond to tensorflow Status enum codes.Same shape as `request`.  Values correspond to Status messages
-// returned from the RPC calls.
-func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) {
+// Note that the params buffer may not be compatible across different GPUs. So any
+// save and restoration should be converted to and from the canonical weights and
+// biases.
+//
+// num_layers: Specifies the number of layers in the RNN model.
+// num_units: Specifies the size of the hidden state.
+// input_size: Specifies the size of the input state.
+// num_params: number of parameter sets for all layers.
+//     Each layer may contain multiple parameter sets, with each set consisting of
+//     a weight matrix and a bias vector.
+// weights: the canonical form of weights that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// biases: the canonical form of biases that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     The actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used.
+//     dir = (direction == bidirectional) ? 2 : 1
+// dropout: dropout probability. When set to 0., dropout is disabled.
+// seed: the 1st part of a seed to initialize dropout.
+// seed2: the 2nd part of a seed to initialize dropout.
+func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params int64, optional ...CudnnRNNParamsToCanonicalAttr) (weights []tf.Output, biases []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_params": num_params}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TryRpc",
+		Type: "CudnnRNNParamsToCanonical",
 		Input: []tf.Input{
-			address, method, request,
+			num_layers, num_units, input_size, params,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil {
+		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
+		return
+	}
+	if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil {
+		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
+		return
+	}
+	return weights, biases
 }
 
-// EnterAttr is an optional argument to Enter.
-type EnterAttr func(optionalAttr)
+// CTCLossAttr is an optional argument to CTCLoss.
+type CTCLossAttr func(optionalAttr)
 
-// EnterIsConstant sets the optional is_constant attribute to value.
+// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
 //
-// value: If true, the output is constant within the child frame.
+// value: Scalar, if true then repeated labels are
+// collapsed prior to the CTC calculation.
 // If not specified, defaults to false
-func EnterIsConstant(value bool) EnterAttr {
+func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
 	return func(m optionalAttr) {
-		m["is_constant"] = value
+		m["preprocess_collapse_repeated"] = value
 	}
 }
 
-// EnterParallelIterations sets the optional parallel_iterations attribute to value.
+// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
 //
-// value: The number of iterations allowed to run in parallel.
-// If not specified, defaults to 10
-func EnterParallelIterations(value int64) EnterAttr {
+// value: Scalar.  If set to false, *during* CTC calculation
+// repeated non-blank labels will not be merged and are interpreted as
+// individual labels.  This is a simplified version of CTC.
+// If not specified, defaults to true
+func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
 	return func(m optionalAttr) {
-		m["parallel_iterations"] = value
+		m["ctc_merge_repeated"] = value
 	}
 }
 
-// Creates or finds a child frame, and makes `data` available to the child frame.
+// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
 //
-// This op is used together with `Exit` to create loops in the graph.
-// The unique `frame_name` is used by the `Executor` to identify frames. If
-// `is_constant` is true, `output` is a constant in the child frame; otherwise
-// it may be changed in the child frame. At most `parallel_iterations` iterations
-// are run in parallel in the child frame.
+// value: Scalar. If set to true, during CTC
+// calculation, items that have longer output sequences than input sequences
+// are skipped: they don't contribute to the loss term and have zero-gradient.
+// If not specified, defaults to false
+func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["ignore_longer_outputs_than_inputs"] = value
+	}
+}
+
+// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
+//
+// the gradient.  This class performs the softmax operation for you, so inputs
+// should be e.g. linear projections of outputs by an LSTM.
 //
 // Arguments:
-//	data: The tensor to be made available to the child frame.
-//	frame_name: The name of the child frame.
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
+// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
+// `(batch b, time t)`.
+//	labels_values: The values (labels) associated with the given batch and time.
+//	sequence_length: A vector containing sequence lengths (batch).
 //
-// Returns The same tensor as `data`.
-func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) {
+// Returns A vector (batch) containing log-probabilities.The gradient of `loss`.  3-D, shape:
+// `(max_time x batch_size x num_classes)`.
+func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"frame_name": frame_name}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Enter",
+		Type: "CTCLoss",
 		Input: []tf.Input{
-			data,
+			inputs, labels_indices, labels_values, sequence_length,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Produce a string tensor that encodes the state of a Reader.
-//
-// Not all Readers support being serialized, so this can produce an
-// Unimplemented error.
+// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
+type CTCGreedyDecoderAttr func(optionalAttr)
+
+// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
 //
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderSerializeStateV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
+// value: If True, merge repeated classes in output.
+// If not specified, defaults to false
+func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Exits the current frame to its parent frame.
+// Performs greedy decoding on the logits given in inputs.
 //
-// Exit makes its input `data` available to the parent frame.
+// A note about the attribute merge_repeated: if enabled, when
+// consecutive logits' maximum indices are the same, only the first of
+// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
+// becomes "A B B" if merge_repeated = True and "A B B B B" if
+// merge_repeated = False.
+//
+// Regardless of the value of merge_repeated, if the maximum index of a given
+// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
+// element is emitted.
 //
 // Arguments:
-//	data: The tensor to be made available to the parent frame.
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
 //
-// Returns The same tensor as `data`.
-func Exit(scope *Scope, data tf.Output) (output tf.Output) {
+// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
+// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
+// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
+// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
+// log-probabilities.
+func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Exit",
-		Input: []tf.Input{
-			data,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a copy of the input tensor.
-func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Snapshot",
+		Type: "CTCGreedyDecoder",
 		Input: []tf.Input{
-			input,
+			inputs, sequence_length,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// Returns a tensor of zeros with the same shape and type as x.
+// Forwards `data` to the output port determined by `pred`.
+//
+// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
+// the data goes to `output_false`.
+//
+// See also `RefSwitch` and `Merge`.
 //
 // Arguments:
-//	x: a tensor of type T.
+//	data: The tensor to be forwarded to the appropriate output.
+//	pred: A scalar that specifies which output port will receive data.
 //
-// Returns a tensor of the same shape and type as x but filled with zeros.
-func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
+func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ZerosLike",
+		Type: "Switch",
 		Input: []tf.Input{
-			x,
+			data, pred,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AbortAttr is an optional argument to Abort.
-type AbortAttr func(optionalAttr)
-
-// AbortErrorMsg sets the optional error_msg attribute to value.
-//
-// value: A string which is the message associated with the exception.
-// If not specified, defaults to ""
-func AbortErrorMsg(value string) AbortAttr {
-	return func(m optionalAttr) {
-		m["error_msg"] = value
-	}
-}
-
-// AbortExitWithoutError sets the optional exit_without_error attribute to value.
-// If not specified, defaults to false
-func AbortExitWithoutError(value bool) AbortAttr {
-	return func(m optionalAttr) {
-		m["exit_without_error"] = value
-	}
-}
-
-// Raise a exception to abort the process when called.
-//
-// If exit_without_error is true, the process will exit normally,
-// otherwise it will exit with a SIGABORT signal.
-//
-// Returns nothing but an exception.
-//
-// Returns the created operation.
-func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Abort",
-
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
-- 
GitLab


From 7052b44b032a35edb10893ce08993a54e2a76e1d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 09:55:33 -0700
Subject: [PATCH 0863/1357] Roll-forward of CL 214320700: Split up SPARSE_DEPS,
 adding each individual dependency only to the sparse operators that need it.

Automated rollback of commit 120620caf23a044b8aa2db6ba5984384ec936009

PiperOrigin-RevId: 214950946
---
 tensorflow/core/kernels/BUILD | 43 ++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0b8e9ec527..30171708c1 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4049,11 +4049,6 @@ cc_library(
 )
 
 SPARSE_DEPS = [
-    ":bounds_check",
-    ":cwise_op",
-    ":fill_functor",
-    ":scatter_functor",
-    "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:sparse_ops_op_lib",
@@ -4086,7 +4081,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_cross_op",
     prefix = "sparse_cross_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
@@ -4098,13 +4095,19 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_dense_binary_op_shared",
     prefix = "sparse_dense_binary_op_shared",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":cwise_op",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_sparse_binary_op_shared",
     prefix = "sparse_sparse_binary_op_shared",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":cwise_op",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
@@ -4136,7 +4139,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_softmax",
     prefix = "sparse_softmax",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
@@ -4148,25 +4153,37 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_tensor_dense_add_op",
     prefix = "sparse_tensor_dense_add_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":scatter_functor",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_tensor_dense_matmul_op",
     prefix = "sparse_tensor_dense_matmul_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":bounds_check",
+        ":fill_functor",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_to_dense_op",
     prefix = "sparse_to_dense_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
     name = "sparse_xent_op",
     prefix = "sparse_xent_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":bounds_check",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(
-- 
GitLab


From b47f0b1b0ac8047d53a824f4ca82a12387a16e4d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 10:05:17 -0700
Subject: [PATCH 0864/1357] Updating the V2 variables API for boosted_trees.

PiperOrigin-RevId: 214952666
---
 .../dnn_tree_combined_estimator_test.py       |  2 +-
 .../python/training/functions/gbdt_batch.py   | 18 +++++++-------
 .../training/functions/gbdt_batch_test.py     | 24 +++++++++----------
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
index 6b6fe9663a..83a8dee632 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
@@ -190,7 +190,7 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
     est.train(input_fn=_train_input_fn, steps=1000)
     # 10 steps for dnn + 3 for 1 tree of depth 3 + 1 after the tree finished
     # + 1 for resource variables.
-    self._assert_checkpoint(est.model_dir, global_step=15)
+    self._assert_checkpoint(est.model_dir, global_step=14)
     res = est.evaluate(input_fn=_eval_input_fn, steps=1)
     self.assertLess(0.5, res["auc"])
     est.predict(input_fn=_eval_input_fn)
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
index c7eb2493a8..8531e97f90 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
@@ -402,13 +402,13 @@ class GradientBoostedDecisionTreeModel(object):
     self._feature_columns = feature_columns
     self._learner_config_serialized = learner_config.SerializeToString()
     self._num_quantiles = num_quantiles
-    self._max_tree_depth = variables.Variable(
+    self._max_tree_depth = variables.VariableV1(
         initial_value=self._learner_config.constraints.max_tree_depth)
-    self._attempted_trees = variables.Variable(
+    self._attempted_trees = variables.VariableV1(
         initial_value=array_ops.zeros([], dtypes.int64),
         trainable=False,
         name="attempted_trees")
-    self._finalized_trees = variables.Variable(
+    self._finalized_trees = variables.VariableV1(
         initial_value=array_ops.zeros([], dtypes.int64),
         trainable=False,
         name="finalized_trees")
@@ -770,28 +770,28 @@ class GradientBoostedDecisionTreeModel(object):
         fc_name_idx += 1
 
       # Create ensemble stats variables.
-      num_layer_examples = variables.Variable(
+      num_layer_examples = variables.VariableV1(
           initial_value=array_ops.zeros([], dtypes.int64),
           name="num_layer_examples",
           trainable=False)
-      num_layer_steps = variables.Variable(
+      num_layer_steps = variables.VariableV1(
           initial_value=array_ops.zeros([], dtypes.int64),
           name="num_layer_steps",
           trainable=False)
-      num_layers = variables.Variable(
+      num_layers = variables.VariableV1(
           initial_value=array_ops.zeros([], dtypes.int64),
           name="num_layers",
           trainable=False)
-      active_tree = variables.Variable(
+      active_tree = variables.VariableV1(
           initial_value=array_ops.zeros([], dtypes.int64),
           name="active_tree",
           trainable=False)
-      active_layer = variables.Variable(
+      active_layer = variables.VariableV1(
           initial_value=array_ops.zeros([], dtypes.int64),
           name="active_layer",
           trainable=False)
       # Variable that becomes false once bias centering is done.
-      continue_centering = variables.Variable(
+      continue_centering = variables.VariableV1(
           initial_value=self._center_bias,
           name="continue_centering",
           trainable=False)
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
index 9d9941f696..6d20a2e7f4 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
@@ -239,7 +239,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -503,7 +503,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -607,7 +607,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -711,7 +711,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -783,7 +783,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -847,7 +847,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -1090,7 +1090,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       weights = array_ops.ones([batch_size, 1], dtypes.float32)
 
       partition_ids = array_ops.zeros([batch_size], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -1194,7 +1194,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       weights = array_ops.ones([batch_size, 1], dtypes.float32)
 
       partition_ids = array_ops.zeros([batch_size], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -1299,7 +1299,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       weights = array_ops.ones([batch_size, 1], dtypes.float32)
 
       partition_ids = array_ops.zeros([batch_size], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -1405,7 +1405,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -1524,7 +1524,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
@@ -1656,7 +1656,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
       predictions = array_ops.constant(
           [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32)
       partition_ids = array_ops.zeros([4], dtypes.int32)
-      ensemble_stamp = variables.Variable(
+      ensemble_stamp = variables.VariableV1(
           initial_value=0,
           name="ensemble_stamp",
           trainable=False,
-- 
GitLab


From 301e3043e67493ce3777d2b36b43d0210f7b920c Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Fri, 28 Sep 2018 10:25:42 -0700
Subject: [PATCH 0865/1357] Disable auto_shard for MirroredStrategy by default.
 We will re-enable it when it is more robust.

PiperOrigin-RevId: 214956066
---
 tensorflow/contrib/distribute/README.md                  | 3 ++-
 .../contrib/distribute/python/mirrored_strategy.py       | 8 ++++++--
 tensorflow/contrib/distribute/python/values.py           | 9 ++++++---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md
index 91a27f97b7..2e025765e4 100644
--- a/tensorflow/contrib/distribute/README.md
+++ b/tensorflow/contrib/distribute/README.md
@@ -231,7 +231,8 @@ The same `input_fn` will be used for all workers if you use
 important to shuffle your dataset in your `input_fn`.
 
 `MirroredStrategy` will insert a `tf.dataset.Dataset.shard` call in you
-`input_fn`. As a result, each worker gets a fraction of your input data.
+`input_fn` if `auto_shard_dataset` is set to `True`. As a result, each worker
+gets a fraction of your input data.
 
 ### Performance Tips
 
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 504f45a695..93d42e09a2 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -347,6 +347,8 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
       set, the `configure` method will try to find the best one.
     prefetch_on_device: optional boolean to specify whether to prefetch input
       data to devices.
+    auto_shard_dataset: whether to auto-shard the dataset when there are
+      multiple workers.
   """
 
   def __init__(self,
@@ -354,11 +356,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
                num_gpus=None,
                num_gpus_per_worker=None,
                cross_tower_ops=None,
-               prefetch_on_device=None):
+               prefetch_on_device=None,
+               auto_shard_dataset=False):
     super(MirroredStrategy, self).__init__()
 
     self._cross_tower_ops = cross_tower_ops
     self._prefetch_on_device = prefetch_on_device
+    self._auto_shard_dataset = auto_shard_dataset
     # Rememeber num GPUs which might be needed by `configure` method.
     if num_gpus is not None and num_gpus_per_worker is not None:
       raise ValueError(
@@ -477,7 +481,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
     if self._cluster_spec:
       return values.MultiWorkerDataset(
           partial(self._call_dataset_fn, dataset_fn), self._worker_device_map,
-          self._prefetch_on_device)
+          self._prefetch_on_device, self._auto_shard_dataset)
     else:
       return values.PerDeviceDataset(
           self._call_dataset_fn(dataset_fn),
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index cce41e7717..327775a729 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -814,7 +814,8 @@ class MultiWorkerDataset(object):
   eager mode.
   """
 
-  def __init__(self, dataset_fn, worker_device_map, prefetch_on_device=None):
+  def __init__(self, dataset_fn, worker_device_map, prefetch_on_device=None,
+               auto_shard=False):
     """Initialize the MultiWorkerDataset object.
 
     Args:
@@ -822,6 +823,7 @@ class MultiWorkerDataset(object):
       worker_device_map: a dict mapping from each worker to a list of devices
         that belong to this worker.
       prefetch_on_device: whether to prefetch to devices.
+      auto_shard: whether to auto-shard the dataset.
     """
     self._worker_device_map = worker_device_map
     self._datasets = {}
@@ -831,8 +833,9 @@ class MultiWorkerDataset(object):
         six.iteritems(worker_device_map)):
       with ops.device(worker):
         worker_input = dataset_fn()
-        worker_input = input_ops.auto_shard_dataset(
-            worker_input, len(worker_device_map), i)
+        if auto_shard:
+          worker_input = input_ops.auto_shard_dataset(
+              worker_input, len(worker_device_map), i)
         self._datasets[worker] = PerDeviceDataset(
             worker_input,
             worker_devices,
-- 
GitLab


From 0e926947421cc47546efb7f7e2dd8505fbe0ac45 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Fri, 28 Sep 2018 10:56:02 -0700
Subject: [PATCH 0866/1357] [tf.data] Throws appropriate error while trying to
 checkpoint input pipeline with associated stats_aggregator.

PiperOrigin-RevId: 214961678
---
 .../serialization/stats_dataset_serialization_test.py | 11 +++++++++++
 .../core/kernels/data/stats_aggregator_dataset_op.cc  | 10 ++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py b/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py
index 14cd3e9c4a..a10f85263a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@@ -90,6 +91,16 @@ class StatsDatasetSerializationTest(
         lambda: self._build_dataset_multiple_tags(num_outputs, tag1, tag2),
         None, num_outputs)
 
+  def _build_dataset_stats_aggregator(self):
+    stats_aggregator = stats_ops.StatsAggregator()
+    return dataset_ops.Dataset.range(10).apply(
+        stats_ops.set_stats_aggregator(stats_aggregator))
+
+  def test_set_stats_aggregator_not_support_checkpointing(self):
+    with self.assertRaisesRegexp(errors.UnimplementedError,
+                                 "does not support checkpointing"):
+      self.run_core_tests(self._build_dataset_stats_aggregator, None, 10)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
index 7e528a71be..c8abfb9eb5 100644
--- a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
@@ -118,16 +118,14 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
 
      protected:
       Status SaveInternal(IteratorStateWriter* writer) override {
-        mutex_lock l(mu_);
-        TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
-        return Status::OK();
+        return errors::Unimplemented(dataset()->DebugString(),
+                                     " does not support checkpointing");
       }
 
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
-        mutex_lock l(mu_);
-        TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
-        return Status::OK();
+        return errors::Unimplemented(dataset()->DebugString(),
+                                     " does not support checkpointing");
       }
 
      private:
-- 
GitLab


From d644fa0542a5a9995512674c7ac708468941fe28 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Fri, 28 Sep 2018 11:10:45 -0700
Subject: [PATCH 0867/1357] [tf.data] Referencing an internal issue.

PiperOrigin-RevId: 214964640
---
 tensorflow/core/kernels/data/map_and_batch_dataset_op.cc       | 2 ++
 tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc | 3 +++
 tensorflow/core/kernels/data/parallel_map_iterator.cc          | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 2bbf4af664..b4c7f9e510 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -37,6 +37,8 @@ namespace {
 // See documentation in ../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
+// TODO(b/116852688): Make coordination between the performance model and this
+// transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 2e6e0465f7..2bb38bf0b9 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -1084,6 +1084,9 @@ REGISTER_KERNEL_BUILDER(Name("ParallelInterleaveDataset").Device(DEVICE_CPU),
 // The above design choices were made with automated optimizations in mind,
 // isolating the degree of parallelism as the single tunable knob of this
 // implementation.
+//
+// TODO(b/116852688): Make coordination between the performance model and this
+// transformation more robust.
 class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
  public:
   explicit ParallelInterleaveDatasetV2Op(OpKernelConstruction* ctx)
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index ee20249bfe..8393024c51 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -27,6 +27,8 @@ namespace tensorflow {
 namespace data {
 namespace {
 
+// TODO(b/116852688): Make coordination between the performance model and this
+// transformation more robust.
 class ParallelMapIterator : public DatasetBaseIterator {
  public:
   explicit ParallelMapIterator(
-- 
GitLab


From 1a834d3aa84ba47afe39c22fffd60d03ca151d30 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Fri, 28 Sep 2018 11:12:42 -0700
Subject: [PATCH 0868/1357] Remove @{} api_links and ban "@{}" from python and
 md files.

PiperOrigin-RevId: 214964988
---
 tensorflow/contrib/data/python/ops/optimization.py     |  2 +-
 .../python/collective_all_reduce_strategy.py           |  2 +-
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py     |  4 ++--
 tensorflow/python/distribute/distribute_coordinator.py |  4 ++--
 tensorflow/python/distribute/estimator_training.py     |  2 +-
 tensorflow/python/estimator/estimator.py               | 10 +++++-----
 tensorflow/python/ops/rnn_cell_impl.py                 |  2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/contrib/data/python/ops/optimization.py
index 7f5ce97228..30348ede36 100644
--- a/tensorflow/contrib/data/python/ops/optimization.py
+++ b/tensorflow/contrib/data/python/ops/optimization.py
@@ -53,7 +53,7 @@ def model():
 
   Returns:
     A `Dataset` transformation function, which can be passed to
-    @{tf.data.Dataset.apply}.
+    `tf.data.Dataset.apply`.
   """
 
   def _apply_fn(dataset):
diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
index c900b41e14..9809204f8f 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
@@ -216,7 +216,7 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy):
     """Configures the object.
 
     Args:
-      session_config: a @{tf.ConfigProto}
+      session_config: a `tf.ConfigProto`
       cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the
         cluster configurations.
       task_type: the current task type, such as "worker".
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 23c54511ca..764d85877a 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -231,7 +231,7 @@ class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec):  # pylint: disable=prote
   `metric_fn` runs on CPU to generate metrics and `tensors` represents the
   `Tensor`s transferred from TPU system to CPU host and passed to `metric_fn`.
   To be precise, TPU evaluation expects a slightly different signature from the
-  @{tf.estimator.Estimator}. While `EstimatorSpec.eval_metric_ops` expects a
+  `tf.estimator.Estimator`. While `EstimatorSpec.eval_metric_ops` expects a
   dict, `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`.
   The `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. The
   `tensors` usually specify the model logits, which are transferred back from
@@ -254,7 +254,7 @@ class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec):  # pylint: disable=prote
   sending tensors from TPU to CPU. To reduce the overhead, try reducing the
   size of the tensors. The `tensors` are concatenated along their major (batch)
   dimension, and so must be >= rank 1. The `host_call` is useful for writing
-  summaries with @{tf.contrib.summary.create_file_writer}.
+  summaries with `tf.contrib.summary.create_file_writer`.
   """
 
   def __new__(cls,
diff --git a/tensorflow/python/distribute/distribute_coordinator.py b/tensorflow/python/distribute/distribute_coordinator.py
index bd3562f1ff..b9b77d4a5b 100644
--- a/tensorflow/python/distribute/distribute_coordinator.py
+++ b/tensorflow/python/distribute/distribute_coordinator.py
@@ -126,7 +126,7 @@ class _WorkerContext(object):
         replicated training.
       task_id: an integer indicating id of the corresponding task. It can be
         None if it is local training or in-graph replicated training.
-      session_config: an optional @{tf.ConfigProto} object.
+      session_config: an optional `tf.ConfigProto` object.
       rpc_layer: optional string specifying the RPC protocol for communication
         with worker masters. If None or empty, hosts in the `cluster_spec` will
         be used directly.
@@ -685,7 +685,7 @@ def run_distribute_coordinator(worker_fn,
       in a cluster. If not set or empty, fall back to local training.
     task_type: the current task type, optional if this is a client.
     task_id: the current task id, optional if this is a client.
-    session_config: an optional @{tf.ConfigProto} object which will be passed
+    session_config: an optional `tf.ConfigProto` object which will be passed
       to `strategy`'s `configure` method and used to create a session.
     rpc_layer: optional string, the protocol for RPC, e.g. "grpc".
 
diff --git a/tensorflow/python/distribute/estimator_training.py b/tensorflow/python/distribute/estimator_training.py
index 8daa34c885..0289689134 100644
--- a/tensorflow/python/distribute/estimator_training.py
+++ b/tensorflow/python/distribute/estimator_training.py
@@ -62,7 +62,7 @@ def _get_global_id(cluster_spec, task_type, task_id, chief_task_type):
 
   # Sort task names in cluster by "chief"/"master", "evaluator", "worker"
   # and "ps". More details can be found at the documentation of
-  # @{tf.estimator.RunConfig.global_id_in_cluster}.
+  # `tf.estimator.RunConfig.global_id_in_cluster`.
   task_type_ordered_list = []
   if chief_task_type in cluster_spec.jobs:
     task_type_ordered_list = [chief_task_type]
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 827b405e51..b933cedb99 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -144,7 +144,7 @@ class Estimator(object):
           * `labels`: This is the second item returned from the `input_fn`
                  passed to `train`, `evaluate`, and `predict`. This should be a
                  single `tf.Tensor` or `dict` of same (for multi-head models).
-                 If mode is @{tf.estimator.ModeKeys.PREDICT}, `labels=None` will
+                 If mode is `tf.estimator.ModeKeys.PREDICT`, `labels=None` will
                  be passed. If the `model_fn`'s signature does not accept
                  `mode`, the `model_fn` must still be able to handle
                  `labels=None`.
@@ -803,9 +803,9 @@ class Estimator(object):
     those features and labels, and restores the given checkpoint
     (or, lacking that, the most recent checkpoint) into the graph.
     Only one of the modes is used for saving variables to the `SavedModel`
-    (order of preference: @{tf.estimator.ModeKeys#TRAIN$TRAIN},
-    @{tf.estimator.ModeKeys#EVAL$EVAL}, then
-    @{tf.estimator.ModeKeys#PREDICT$PREDICT}), such that up to three
+    (order of preference: `tf.estimator.ModeKeys.TRAIN`,
+    `tf.estimator.ModeKeys.EVAL`, then
+    `tf.estimator.ModeKeys.PREDICT`), such that up to three
     `tf.MetaGraphDefs` are saved with a single set of variables in a single
     `SavedModel` directory.
 
@@ -1101,7 +1101,7 @@ class Estimator(object):
     """Creates the global step tensor in graph.
 
     The global step tensor must be an integer type with name 'global_step' and
-    be added to the collection @{tf.GraphKeys#GLOBAL_STEP$GLOBAL_STEP}.
+    be added to the collection `tf.GraphKeys.GLOBAL_STEP`.
 
     Args:
       graph: The graph in which to create the global step tensor.
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 43cca1a498..c2751e529a 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -611,7 +611,7 @@ class LSTMStateTuple(_LSTMStateTuple):
 # TODO(scottzhu): Stop exporting this class in TF 2.0.
 @tf_export("nn.rnn_cell.BasicLSTMCell")
 class BasicLSTMCell(LayerRNNCell):
-  """DEPRECATED: Please use @{tf.nn.rnn_cell.LSTMCell} instead.
+  """DEPRECATED: Please use `tf.nn.rnn_cell.LSTMCell` instead.
 
   Basic LSTM recurrent network cell.
 
-- 
GitLab


From 90aa10fcf5c80591b31988754e6221d6c2b8bbd0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 11:28:35 -0700
Subject: [PATCH 0869/1357] internal change only

PiperOrigin-RevId: 214967868
---
 tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
index b498599962..8e6e9aa0cd 100644
--- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
@@ -156,8 +156,7 @@ bool NewSession(const string& service_addr,
           channel_args));
   NewProfileSessionResponse new_session_response;
   TF_QCHECK_OK(FromGrpcStatus(
-      stub->NewSession(&context, new_session_request, &new_session_response)))
-      << new_session_response.error_message();
+      stub->NewSession(&context, new_session_request, &new_session_response)));
 
   std::cout << "Profile session succeed for host(s):"
             << str_util::Join(hostnames, ",") << std::endl;
-- 
GitLab


From e00954e8626c74b263b90527e0c020cfd64136b2 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Fri, 28 Sep 2018 12:08:42 -0700
Subject: [PATCH 0870/1357] Puts the keras optimizer weights on device.

PiperOrigin-RevId: 214974535
---
 .../contrib/tpu/python/tpu/keras_support.py   | 11 +++-
 .../tpu/python/tpu/keras_tpu_variables.py     | 53 +++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 956d0142a3..696656e840 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -959,7 +959,16 @@ class TPUFunction(object):
 
       # Compute our outfeed depending on the execution mode
       if is_training:
-        self._cloned_model._make_train_function()
+        if not isinstance(self._cloned_optimizer, keras_optimizers.TFOptimizer):
+          # For Keras optimizer, we try to place the variable weights on the TPU
+          # device. Keras creates optimizer variables (e.g. momentum values for
+          # the Momentum optimizer) when _make_train_function is invoked.
+          with keras_tpu_variables.replicated_variable_for_optimizer(
+              self._tpu_assignment.num_towers):
+            self._cloned_model._make_train_function()
+        else:
+          self._cloned_model._make_train_function()
+
         self._outfeed_spec = [
             tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
             for tensor in self._cloned_model.train_function.outputs
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
index 170977d8ab..598da7418e 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
@@ -25,10 +25,15 @@ from __future__ import print_function
 
 import contextlib
 
+import numpy as np
+
 from tensorflow.python.client import session as session_lib
+from tensorflow.python.framework import dtypes as dtypes_module
 from tensorflow.python.framework import ops
+from tensorflow.python.keras import backend
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_resource_variable_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 
 
@@ -285,3 +290,51 @@ def replicated_scope(num_replicas):
 
   return variable_scope.variable_scope(
       "", custom_getter=_replicated_variable_getter)
+
+
+@contextlib.contextmanager
+def replicated_variable_for_optimizer(num_replicas):
+  """Context manager for optimizer weights. Overrides K.variable."""
+  if num_replicas == 1:
+    yield
+    return
+
+  try:
+    old_v = backend.variable
+
+    def opt_variable(value, dtype=None, name=None, constraint=None):
+      """Instantiates a variable and returns it."""
+      if dtype is None:
+        dtype = backend.floatx()
+
+      variables = []
+      for i in range(num_replicas):
+        # Keras holds the variables in optimizer class instance , so the name
+        # does not matter here. ResourceVariable constructor will find a unique
+        # name (including name=None) for each replica.
+        with ops.device("device:TPU:{}".format(i)):
+          v = resource_variable_ops.ResourceVariable(
+              value,
+              dtype=dtypes_module.as_dtype(dtype),
+              name=name,
+              constraint=constraint)
+          variables.append(v)
+      name = "replicate_{}_{}".format("variable" if name is None else name,
+                                      ops.uid())
+      v = ReplicatedVariable(name, variables)
+
+      # pylint: disable=protected-access
+
+      if isinstance(value, np.ndarray):
+        v._keras_shape = value.shape
+      elif hasattr(value, "shape"):
+        v._keras_shape = backend.int_shape(value)
+      v._uses_learning_phase = False
+      backend.track_variable(v)
+      return v
+
+    backend.variable = opt_variable
+    yield
+
+  finally:
+    backend.variable = old_v
-- 
GitLab


From b5feceb9058e06eac3de86ec45c44f5637054855 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Tue, 25 Sep 2018 00:42:42 -0700
Subject: [PATCH 0871/1357] Added the feature to disable MKL support of
 TensorFlow by environmental variable TF_DISABLE_MKL=1

---
 .../core/common_runtime/mkl_cpu_allocator.h   | 54 +++++++++++++------
 .../core/common_runtime/process_util.cc       |  5 ++
 .../core/common_runtime/threadpool_device.cc  |  4 ++
 tensorflow/core/graph/mkl_layout_pass.cc      |  5 ++
 .../core/graph/mkl_tfconversion_pass.cc       |  5 ++
 tensorflow/core/util/util.cc                  | 20 +++++++
 tensorflow/core/util/util.h                   |  5 ++
 7 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 429b19599b..516138d28d 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
+#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
@@ -163,6 +164,12 @@ class MklCPUAllocator : public Allocator {
   }
 
   Status Initialize() {
+    if (DisableMKL()) {
+        VLOG(1) << "TF-MKL: Disabling pool allocator";
+        tf_disable_pool_allocator_flag_ = true;
+        return Status::OK();
+    }
+
     VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
 
     // Set upper bound on memory allocation to physical RAM available on the
@@ -217,6 +224,10 @@ class MklCPUAllocator : public Allocator {
   inline string Name() override { return kName; }
 
   inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
+    if (tf_disable_pool_allocator_flag_) {
+      return port::AlignedMalloc(num_bytes, alignment);
+    }
+
     // If the allocation size is less than threshold, call small allocator,
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
@@ -227,6 +238,10 @@ class MklCPUAllocator : public Allocator {
   }
 
   inline void DeallocateRaw(void* ptr) override {
+    if (tf_disable_pool_allocator_flag_) {
+      port::AlignedFree(ptr);
+      return;
+    }
     // Check if ptr is for "small" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
     if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -237,26 +252,30 @@ class MklCPUAllocator : public Allocator {
   }
 
   void GetStats(AllocatorStats* stats) override {
-    AllocatorStats l_stats, s_stats;
-    small_size_allocator_->GetStats(&s_stats);
-    large_size_allocator_->GetStats(&l_stats);
-
-    // Combine statistics from small-size and large-size allocator.
-    stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
-    stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
-    stats->max_bytes_in_use =
-        l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
-    // Since small-size allocations go to MklSmallSizeAllocator,
-    // max_alloc_size from large_size_allocator would be the maximum
-    // size allocated by MklCPUAllocator.
-    stats->max_alloc_size = l_stats.max_alloc_size;
-    stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+    if (!tf_disable_pool_allocator_flag_) {
+      AllocatorStats l_stats, s_stats;
+      small_size_allocator_->GetStats(&s_stats);
+      large_size_allocator_->GetStats(&l_stats);
+
+      // Combine statistics from small-size and large-size allocator.
+      stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+      stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+      stats->max_bytes_in_use =
+          l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+      // Since small-size allocations go to MklSmallSizeAllocator,
+      // max_alloc_size from large_size_allocator would be the maximum
+      // size allocated by MklCPUAllocator.
+      stats->max_alloc_size = l_stats.max_alloc_size;
+      stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+    }
   }
 
   void ClearStats() override {
-    small_size_allocator_->ClearStats();
-    large_size_allocator_->ClearStats();
+    if (!tf_disable_pool_allocator_flag_) {
+      small_size_allocator_->ClearStats();
+      large_size_allocator_->ClearStats();
+    }
   }
 
  private:
@@ -295,6 +314,7 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
+  bool tf_disable_pool_allocator_flag_ = false;
   Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index a5d31b75c7..60fa601907 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/util.h"
 
 namespace tensorflow {
 
@@ -56,6 +57,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
+  // Early return if MKL is disabled
+  if (DisableMKL())
+    return port::NumSchedulableCPUs();
+
   // MKL library executes ops in parallel using OMP threads
   // Set inter_op conservatively to avoid thread oversubscription that could
   // lead to severe perf degradations and OMP resource exhaustion
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 8587d1783a..29c01d7f72 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/util.h"
 
 #ifdef INTEL_MKL
 #ifdef _OPENMP
@@ -49,6 +50,9 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
+  // Eearly return when MKL is disabled
+  if (DisableMKL())
+    return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 06d3fefef1..7394b1cddf 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_layout_pass.h"
@@ -4511,6 +4512,10 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of a graph
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index 8c5ffd71a3..6804ab84ce 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_tfconversion_pass.h"
@@ -424,6 +425,10 @@ Status MklToTfConversionPass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of graph
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 1e5a9c5712..44d5becb9c 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -120,4 +120,24 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
   return result;
 }
 
+#ifdef INTEL_MKL
+bool DisableMKL() {
+  enum MklStatus {
+    MKL_DEFAULT = 0,
+    MKL_ON = 1,
+    MKL_OFF = 2
+  };
+  static MklStatus status = MKL_DEFAULT;
+  if (status == MKL_DEFAULT) {
+    char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
+    if ((tf_disable_mkl != NULL) && (std::stoi(tf_disable_mkl) == 1)) {
+      VLOG(2) << "TF-MKL: Disabling MKL";
+      status = MKL_OFF;
+    } else {
+      status = MKL_ON;
+    }
+  }
+  return status == MKL_OFF ? true : false;
+}
+#endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index 93dfd51ab5..ba90ad52c2 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -56,6 +56,11 @@ string PrintMemory(const char* ptr, size_t n);
 // "tensor", "tensor[i]", "tensor[i, j]", etc.
 string SliceDebugString(const TensorShape& shape, const int64 flat);
 
+// disable MKL in runtime
+#ifdef INTEL_MKL
+bool DisableMKL();
+#endif
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_UTIL_UTIL_H_
-- 
GitLab


From 5e66d25666aad9fa76ed8cc0d2b162db76ea0cc8 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Fri, 28 Sep 2018 12:46:10 -0700
Subject: [PATCH 0872/1357] Add flag for enabling while_v2. Add a single test
 flag for enabling v2 control flow in tests since we do not plan to support v2
 ops with legacy control flow. We have 2 test decorators now:
 @with_control_flow_v2: Enables all tests in a class to run with v2 control
 flow. @disable_control_flow_v2: Disables a test function from running in v2.
 I have removed the skiptests to avoid setup/teardown overheads. Enable tests
 in control_flow_ops_py_test that run with control_flow_v2.

PiperOrigin-RevId: 214980108
---
 tensorflow/python/BUILD                       |   1 +
 tensorflow/python/framework/test_util.py      |  84 ++++++--
 tensorflow/python/kernel_tests/BUILD          |   3 +-
 .../kernel_tests/control_flow_ops_py_test.py  | 180 +++++++++---------
 tensorflow/python/ops/control_flow_ops.py     |  16 ++
 tensorflow/python/ops/while_v2.py             |   4 +
 6 files changed, 187 insertions(+), 101 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 91cafea042..9275ad767e 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2017,6 +2017,7 @@ py_library(
         ":array_ops",
         ":cond_v2_impl",
         ":constant_op",
+        ":control_flow_ops",
         ":control_flow_util",
         ":framework_ops",
         ":function_def_to_graph",
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index cd0b03be43..6673bc5561 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -24,8 +24,8 @@ from collections import OrderedDict
 import contextlib
 import gc
 import itertools
-import os
 import math
+import os
 import random
 import re
 import tempfile
@@ -402,11 +402,14 @@ def with_c_shapes(cls):
   return cls
 
 
-def enable_cond_v2(fn):
-  """Decorator for enabling CondV2 on a test.
+def enable_control_flow_v2(fn):
+  """Decorator for enabling CondV2 and WhileV2 on a test.
 
-  Note this enables using CondV2 after running the test class's setup/teardown
-  methods.
+  Note this enables using CondV2 and WhileV2 after running the test class's
+  setup/teardown methods.
+
+  In addition to this, callers must import the while_v2 module in order to set
+  the _while_v2 module in control_flow_ops.
 
   Args:
     fn: the function to be wrapped
@@ -416,21 +419,56 @@ def enable_cond_v2(fn):
   """
 
   def wrapper(*args, **kwargs):
-    prev_value = control_flow_ops.ENABLE_COND_V2
+    enable_cond_v2_old = control_flow_ops.ENABLE_COND_V2
+    enable_while_v2_old = control_flow_ops.ENABLE_WHILE_V2
     control_flow_ops.ENABLE_COND_V2 = True
+    control_flow_ops.ENABLE_WHILE_V2 = True
     try:
       fn(*args, **kwargs)
     finally:
-      control_flow_ops.ENABLE_COND_V2 = prev_value
+      control_flow_ops.ENABLE_COND_V2 = enable_cond_v2_old
+      control_flow_ops.ENABLE_WHILE_V2 = enable_while_v2_old
 
   return wrapper
 
 
-def with_cond_v2(cls):
-  """Adds methods that call original methods but with CondV2 enabled.
+def with_control_flow_v2(cls):
+  """Adds methods that call original methods with WhileV2 and CondV2 enabled.
 
-  Note this enables CondV2 in new methods after running the test class's
-  setup method.
+  Note this enables CondV2 and WhileV2 in new methods after running the test
+  class's setup method.
+
+  In addition to this, callers must import the while_v2 module in order to set
+  the _while_v2 module in control_flow_ops.
+
+  If a test function has _disable_control_flow_v2 attr set to True (using the
+  @disable_control_flow_v2 decorator), the v2 function is not generated for it.
+
+  Example:
+
+  @test_util.with_control_flow_v2
+  class ControlFlowTest(test.TestCase):
+
+    def testEnabledForV2(self):
+      ...
+
+    @test_util.disable_control_flow_v2("b/xyzabc")
+    def testDisabledForV2(self):
+      ...
+
+  Generated class:
+  class ControlFlowTest(test.TestCase):
+
+    def testEnabledForV2(self):
+      ...
+
+    def testEnabledForV2WithControlFlowV2(self):
+      // Enable V2 flags.
+      testEnabledForV2(self)
+      // Restore V2 flags.
+
+    def testDisabledForV2(self):
+      ...
 
   Args:
     cls: class to decorate
@@ -438,15 +476,33 @@ def with_cond_v2(cls):
   Returns:
     cls with new test methods added
   """
-  if control_flow_ops.ENABLE_COND_V2:
+  if control_flow_ops.ENABLE_WHILE_V2 and control_flow_ops.ENABLE_COND_V2:
     return cls
 
   for name, value in cls.__dict__.copy().items():
-    if callable(value) and name.startswith("test"):
-      setattr(cls, name + "WithCondV2", enable_cond_v2(value))
+    if (callable(value) and name.startswith("test") and
+        not getattr(value, "_disable_control_flow_v2", False)):
+      setattr(cls, name + "WithControlFlowV2", enable_control_flow_v2(value))
   return cls
 
 
+def disable_control_flow_v2(unused_msg):
+  """Decorator for a function in a with_control_flow_v2 enabled test class.
+
+  Blocks the function from being run with v2 control flow ops.
+
+  Args:
+    unused_msg: Reason for disabling.
+
+  Returns:
+    The wrapped function with _disable_control_flow_v2 attr set to True.
+  """
+  def wrapper(func):
+    func._disable_control_flow_v2 = True
+    return func
+  return wrapper
+
+
 def assert_no_new_pyobjects_executing_eagerly(f):
   """Decorator for asserting that no new Python objects persist after a test.
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 280c18ec00..65b9e04ed9 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1480,7 +1480,7 @@ cuda_py_test(
     name = "control_flow_ops_py_test",
     # TODO(b/70473603): change this back to "small" once the C API is
     # permanently enabled
-    size = "medium",
+    size = "large",
     srcs = ["control_flow_ops_py_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -1512,6 +1512,7 @@ cuda_py_test(
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
+        "//tensorflow/python:while_v2",
     ],
 )
 
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 083de84775..d91a848e01 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -23,7 +23,6 @@ from __future__ import print_function
 import collections
 import math
 import time
-import unittest
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
@@ -63,6 +62,7 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops import while_v2  # pylint: disable=unused-import
 # pylint: disable=unused-import
 import tensorflow.python.ops.tensor_array_grad
 # pylint: enable=unused-import
@@ -125,7 +125,7 @@ def isum(s, maximum_iterations=None):
   return r_s
 
 
-@test_util.with_cond_v2
+@test_util.with_control_flow_v2
 class ControlFlowTest(test.TestCase):
 
   def testRefIdentity(self):
@@ -332,10 +332,8 @@ class ControlFlowTest(test.TestCase):
       with self.assertRaisesOpError("has inputs from different frames"):
         res.eval(feed_dict={data: 1.0})
 
+  @test_util.disable_control_flow_v2("b/113294340")
   def testCondBool(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113296297")
-
     values = constant_op.constant(10)
     fn1 = lambda: math_ops.add(values, 1)
     fn2 = lambda: math_ops.subtract(values, 1)
@@ -366,6 +364,7 @@ class ControlFlowTest(test.TestCase):
                                          "has been marked as not fetchable"):
               sess.run(t, feed_dict={x: 3})
 
+  @test_util.disable_control_flow_v2("Not relevant")
   def testFeedable(self):
     with self.cached_session() as sess:
       c = constant_op.constant(2)
@@ -383,10 +382,8 @@ class ControlFlowTest(test.TestCase):
             with self.assertRaisesRegexp(ValueError, "may not be fed"):
               sess.run(r, feed_dict={t: 3})
 
+  @test_util.disable_control_flow_v2("b/113296180 (IndexedSlices)")
   def testCondIndexedSlices(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113296180")
-
     with self.cached_session():
       values = constant_op.constant(10)
       indices = constant_op.constant(0)
@@ -401,10 +398,8 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
 
+  @test_util.disable_control_flow_v2("b/113296161 (SparseTensors)")
   def testCondSparseTensor(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113296161 (SparseTensors)")
-
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
       indices = constant_op.constant(
@@ -435,10 +430,8 @@ class ControlFlowTest(test.TestCase):
 
       self.assertEqual(1.0, control_flow_ops.cond(rv, case, lambda: t).eval())
 
+  @test_util.disable_control_flow_v2("b/113293074")
   def testCondIndexedSlicesDifferentTypes(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113293074")
-
     with self.cached_session():
       values = constant_op.constant(10)
       i_32 = ops.convert_to_tensor(0, name="one", dtype=dtypes.int32)
@@ -510,10 +503,8 @@ class ControlFlowTest(test.TestCase):
       result = r.eval()
     self.assertAllEqual(12, result)
 
+  @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
   def testCond_4(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113324949 (ref vars)")
-
     with self.cached_session():
       v1 = variables.Variable(7)
       v2 = variables.Variable(7)
@@ -587,10 +578,8 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(constant_op.constant(False), true_fn, false_fn)
       self.assertAllEqual([2.0], r.eval())
 
+  @test_util.disable_control_flow_v2("b/79881896 (control deps)")
   def testCondWithControl(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/79881896")
-
     with self.cached_session():
       control_holder = array_ops.placeholder(dtypes.float32, shape=())
       a = constant_op.constant(3)
@@ -629,10 +618,9 @@ class ControlFlowTest(test.TestCase):
       merged_op = control_flow_ops.merge([assign_v, orig_v])
       self.assertAllEqual([1.0], sess.run(merged_op.output))
 
+  @test_util.disable_control_flow_v2(
+      "b/112477618 (Operation returned from cond)")
   def testCondSwitchIdentity(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/112477618 (Operation returned from cond)")
-
     # Make sure the recv identity is not removed by optimization.
     with session.Session(config=opt_cfg()) as sess:
       pred = constant_op.constant(True)
@@ -646,10 +634,9 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
       sess.run(r)
 
+  @test_util.disable_control_flow_v2(
+      "b/112477618 (Operation returned from cond)")
   def testCondRecvIdentity(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/112477618 (Operation returned from cond)")
-
     # Make sure the switch identity is not removed by optimization.
     with session.Session(config=opt_cfg()) as sess:
       with ops.device(test.gpu_device_name()):
@@ -665,10 +652,8 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
       sess.run(r)
 
+  @test_util.disable_control_flow_v2("b/113346829 (gpu failure)")
   def testCondGrad_1(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113346829 (gpu failure)")
-
     graph = ops.Graph()
     with graph.as_default():
       x = constant_op.constant(10.0, name="x")
@@ -694,10 +679,9 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(42.0, grad.eval(feed_dict={c: 1}))
       self.assertAllEqual(3.0, grad.eval(feed_dict={c: 3}))
 
+  @test_util.disable_control_flow_v2(
+      "b/110550782 (gradient w.r.t external variable)")
   def testCondGrad_3(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/110550782 (gradient w.r.t external variable)")
-
     with self.cached_session():
       c = array_ops.placeholder(dtypes.int32, shape=[])
       ox = constant_op.constant(10.0)
@@ -729,10 +713,8 @@ class ControlFlowTest(test.TestCase):
       result = gradients_impl.gradients(z, x)[0]
       self.assertEqual(1.0, result.eval())
 
+  @test_util.disable_control_flow_v2("b/113327884")
   def testCondGrad_Gather(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113327884")
-
     with self.cached_session() as sess:
       v1 = variables.Variable([1.0, 42.0])
       c = array_ops.placeholder(dtypes.int32, shape=[])
@@ -756,6 +738,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(dense_gv, [0.0, 2.0])
 
   # Microbenchmark: 256,000 iterations/s.
+  @test_util.disable_control_flow_v2("b/116630618 (Times out)")
   def testWhile_1(self):
     with self.cached_session():
       n = constant_op.constant(0)
@@ -764,6 +747,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20)
       self.assertEqual(10000, r.eval())
 
+  @test_util.disable_control_flow_v2("b/79881896 (control deps)")
   def testWhileExternalControlDependencies(self):
     with self.cached_session():
       v = variables.Variable(0.0)
@@ -779,6 +763,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(result.eval(), 2)
       self.assertAllEqual(v.eval(), 1.0)
 
+  @test_util.disable_control_flow_v2("b/79881896 (control deps)")
   def testWhileExternalControlDependenciesNoInput(self):
     with self.cached_session():
       v = variables.Variable(0.0)
@@ -794,6 +779,7 @@ class ControlFlowTest(test.TestCase):
       result.eval()
       self.assertAllEqual(v.eval(), 1.0)
 
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileWithRefs_1(self):
     with self.cached_session() as sess:
       x = variables.VariableV1(0)._ref()  # pylint: disable=protected-access
@@ -824,18 +810,22 @@ class ControlFlowTest(test.TestCase):
       r = isum(s)
       self.assertAllEqual(45, r.eval())
 
+  @test_util.disable_control_flow_v2("b/115776323 (max_iters)")
   def testWhileWithMaximumIterations(self):
     with self.cached_session():
       s = constant_op.constant([1, 2, 3, 4, 5])
       r = isum(s, maximum_iterations=3)
       self.assertAllEqual([1 + 3, 2 + 3, 3 + 3, 4 + 3, 5 + 3], r.eval())
 
+  @test_util.disable_control_flow_v2("b/116339888 (non-tensor loop var)")
   def testWhileWithMaximumIterationsAndSingleArgument(self):
     with self.cached_session():
       r = control_flow_ops.while_loop(
           lambda i: i < 3, lambda i: i + 1, [0], maximum_iterations=1)
       self.assertEqual(1, r.eval())
 
+  @test_util.disable_control_flow_v2(
+      "b/116248044 (nested), b/115920078 (gradients)")
   def testSingleNestedMaximumIterationsWhileLoopGradientInXLAContext(self):
     v = constant_op.constant(1.0)
 
@@ -861,6 +851,7 @@ class ControlFlowTest(test.TestCase):
     # Should execute without issue.
     self.assertEqual(3, self.evaluate(loop_execute))
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested while_loop)")
   def testInvalidMaximumIterationsWhileLoopGradientInXLAContext(self):
     v = constant_op.constant(1.0)
 
@@ -904,10 +895,8 @@ class ControlFlowTest(test.TestCase):
         r"context '.*' \(currently defined in '.*'\)"):
       _ = gradients_impl.gradients(loop_with_maxiter, v)
 
+  @test_util.disable_control_flow_v2("b/115776323 (max_iters)")
   def testInvalidMaximumIterationsFromSiblingContextWhileLoopInXLAContext(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294340 (enable while_v2)")
-
     v = constant_op.constant(1.0)
 
     def create_while_loop():
@@ -939,6 +928,8 @@ class ControlFlowTest(test.TestCase):
         r"while loop context '' \(currently defined in 'cond/.+'\)"):
       _ = gradients_impl.gradients(loop, v)
 
+  @test_util.disable_control_flow_v2(
+      "b/116248044 (nesting), b/115776323 (max_iters)")
   def testNestedWhileLoopWithMaxItersFromOuterContextInXLAContext(self):
     v = constant_op.constant(1.0)
 
@@ -1048,6 +1039,7 @@ class ControlFlowTest(test.TestCase):
       result = r[3].eval()
     self.assertAllEqual(42, result)
 
+  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhile_5(self):
     with self.cached_session():
 
@@ -1072,6 +1064,7 @@ class ControlFlowTest(test.TestCase):
       result = r[2].eval()
     self.assertAllEqual(np.array([0, 1, 2, 3, 4, 5, 6]), result)
 
+  @test_util.disable_control_flow_v2("b/116338794 (buffer_reuse)")
   def testBufferForwarding(self):
     run_options = config_pb2.RunOptions(
         trace_level=config_pb2.RunOptions.FULL_TRACE)
@@ -1122,6 +1115,7 @@ class ControlFlowTest(test.TestCase):
     self._testWhile_Gpu_1(use_gpu=False)
     self._testWhile_Gpu_1(use_gpu=True)
 
+  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileShape(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1139,6 +1133,7 @@ class ControlFlowTest(test.TestCase):
       r = r[1] * array_ops.ones([8, 8])
       self.assertAllEqual(np.ones((8, 8)), r.eval())
 
+  @test_util.disable_control_flow_v2("b/116339888 (non-tensor loop var)")
   def testWhileWithNonTensorInput_Scalar(self):
     with self.cached_session():
       n = 0
@@ -1147,6 +1142,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20)
       self.assertEqual(10000, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116339888 (non-tensor loop var)")
   def testWhileWithNonTensorInput_Vector(self):
     with self.cached_session():
       n = np.array([0])  # Note, [0] would not work here; that is a list
@@ -1155,6 +1151,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20)
       self.assertEqual([10000], r.eval())
 
+  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileShapeInference(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1169,7 +1166,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(
           c, b, [i, m],
           [i.get_shape(), tensor_shape.TensorShape([None, 2])])
-      self.assertTrue(r[1].get_shape()[0].value is None)
+      self.assertIsNone(r[1].get_shape()[0].value)
       self.assertEqual(r[1].get_shape()[1], tensor_shape.Dimension(2))
 
       with self.assertRaisesRegexp(
@@ -1180,6 +1177,7 @@ class ControlFlowTest(test.TestCase):
           r"tf.while_loop to specify a less-specific shape."):
         r = control_flow_ops.while_loop(c, b, [i, m])
 
+  @test_util.disable_control_flow_v2("b/116328420 (SparseTensor)")
   def testWhileShapeInferenceSparseTensor(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
@@ -1211,6 +1209,7 @@ class ControlFlowTest(test.TestCase):
             c, b, [i, x],
             [i.get_shape(), tensor_shape.TensorShape([5])])
 
+  @test_util.disable_control_flow_v2("b/116282023 (IndexedSlices)")
   def testWhileShapeInferenceIndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant([[2.0, 4.0], [3.0, 5.0]], name="values")
@@ -1265,6 +1264,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n])
       self.assertEqual(225, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested while)")
   def testNestedWhile_1(self):
     self._testNestedWhile_1(use_gpu=False)
     self._testNestedWhile_1(use_gpu=True)
@@ -1297,6 +1297,7 @@ class ControlFlowTest(test.TestCase):
           outer_c, outer_b, [s0], parallel_iterations=1)
       self.assertEqual(1048576.0, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested while)")
   def testNestedWhile_2(self):
     self._testNestedWhile_2(use_gpu=False)
     self._testNestedWhile_2(use_gpu=True)
@@ -1350,6 +1351,7 @@ class ControlFlowTest(test.TestCase):
             lambda x: x < 10, lambda x: x + array_ops.identity(c), [x0])
       self.assertEqual(10, sess.run(r, {b: True}))
 
+  @test_util.disable_control_flow_v2("b/79881896 (control_deps)")
   def testWhileWithControl_5(self):
     with self.cached_session() as sess:
       b = array_ops.placeholder(dtypes.bool)
@@ -1364,9 +1366,6 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, sess.run(r, {b: True}))
 
   def testWhileCondWithControl(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294377 (unknown shape)")
-
     # Ensure that no control edges by an outer control dependency context are
     # added to nodes inside cond/while contexts.
     with self.cached_session() as sess:
@@ -1380,10 +1379,8 @@ class ControlFlowTest(test.TestCase):
                                            (constant_op.constant(5),))
       self.assertEqual(0, sess.run(loop))
 
+  @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
   def testWhileCondWithControl_1(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113324949 (ref vars)")
-
     with self.cached_session():
       v = variable_scope.get_variable(
           "v", [], initializer=init_ops.constant_initializer(2))
@@ -1405,9 +1402,8 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(4, r.eval())
       self.assertAllClose(65536.0, v.eval())
 
+  @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
   def testWhileCondExitControl(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294340 (enable while_v2)")
 
     with self.cached_session():
       v = variables.Variable(1)
@@ -1432,8 +1428,6 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(99, v.eval())
 
   def testCondWhile_1(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294340 (enable while_v2)")
 
     with self.cached_session():
       n = ops.convert_to_tensor(0, name="n")
@@ -1445,8 +1439,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(10, r.eval())
 
   def testCondWhile_2(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294340 (enable while_v2)")
 
     with self.cached_session():
       n = ops.convert_to_tensor(0)
@@ -1458,9 +1450,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(10, r.eval())
 
   def _testCondWhile_3(self, use_gpu):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294340 (enable while_v2)")
-
     with self.test_session(use_gpu=use_gpu) as sess:
       p = array_ops.placeholder(dtypes.bool)
       n = constant_op.constant(0.0)
@@ -1477,18 +1466,17 @@ class ControlFlowTest(test.TestCase):
                                 lambda: control_flow_ops.while_loop(c, b, [n]),
                                 lambda: math_ops.multiply(n, 2.0))
       r1 = gradients_impl.gradients(r, [n])
-      self.assertEqual(10, sess.run(r, {p: True}))
+      self.assertEqual(10., sess.run(r, {p: True}))
       self.assertEqual([1.0], sess.run(r1, {p: True}))
       self.assertEqual(0.0, sess.run(r, {p: False}))
       self.assertEqual([2.0], sess.run(r1, {p: False}))
 
+  @test_util.disable_control_flow_v2("b/116743589")
   def testCondWhile_3(self):
     self._testCondWhile_3(use_gpu=False)
     self._testCondWhile_3(use_gpu=True)
 
   def testWhileCond_1(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294377 (unknown shape)")
 
     with self.cached_session():
       i = ops.convert_to_tensor(0, name="i")
@@ -1505,8 +1493,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(10, r.eval())
 
   def testWhileCond_2(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294377 (unknown shape)")
 
     with self.cached_session():
       n = ops.convert_to_tensor(0, name="n")
@@ -1516,8 +1502,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(10, r.eval())
 
   def testWhileCond_3(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294377 (unknown shape)")
 
     with self.cached_session():
       n = ops.convert_to_tensor(0)
@@ -1532,6 +1516,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(10, r.eval())
 
   # NOTE: It is ok to have parallel_iterations > 1
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileUpdateVariable_1(self):
     with self.cached_session():
       select = variables.Variable([3.0, 4.0, 5.0])
@@ -1554,6 +1539,7 @@ class ControlFlowTest(test.TestCase):
       result = select.eval()
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileUpdateVariable_2(self):
     with self.cached_session():
       select1 = variables.Variable([3.0, 4.0, 5.0])
@@ -1580,6 +1566,7 @@ class ControlFlowTest(test.TestCase):
       result2 = select2.eval()
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result2)
 
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileUpdateVariable_3(self):
     with self.cached_session():
       select = variables.Variable([3.0, 4.0, 5.0])
@@ -1601,7 +1588,7 @@ class ControlFlowTest(test.TestCase):
       result = r[1].eval()
     self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
-  # b/24814703
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileUpdateVariable_4(self):
     with self.cached_session():
       var_a = variables.Variable(0, name="a")
@@ -1629,7 +1616,7 @@ class ControlFlowTest(test.TestCase):
       lpa.eval()  # Run the loop
       self.assertEqual(10, var_b.eval())
 
-  # b/24736492
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileUpdateVariable_5(self):
     with self.cached_session():
       # Create some variables.
@@ -1659,7 +1646,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, var_a.eval())
       self.assertEqual(10, var_b.eval())
 
-  # b/24814668
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileUpdateVariable_6(self):
     with self.cached_session():
       # Create some variables.
@@ -1689,6 +1676,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(55, var_b.eval())
       self.assertEqual(10, var_a.eval())
 
+  @test_util.disable_control_flow_v2("b/116742472 (resource accumulator)")
   def testWhileQueue_1(self):
     with self.cached_session():
       q = data_flow_ops.FIFOQueue(-1, dtypes.int32)
@@ -1707,6 +1695,7 @@ class ControlFlowTest(test.TestCase):
       for i in xrange(10):
         self.assertEqual([i], q.dequeue().eval())
 
+  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileStack_1(self):
     with self.cached_session():
       s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo")
@@ -1775,6 +1764,7 @@ class ControlFlowTest(test.TestCase):
     with self.session(graph=graph) as sess:
       self.assertAllClose(1024.0, sess.run(r))
 
+  @test_util.disable_control_flow_v2("b/116351701 (colocation)")
   def testWhileGrad_ColocateGradients(self):
     self._testWhileGrad_ColocateGradients(colocate=False)
     self._testWhileGrad_ColocateGradients(colocate=True)
@@ -1790,6 +1780,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(1024.0, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileGrad_Shape(self):
     with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=[None])
@@ -1861,8 +1852,6 @@ class ControlFlowTest(test.TestCase):
     self._testWhileGrad_Mul(use_gpu=True, p_iters=10)
 
   def _testNestedWhileCondWhileGrad(self, use_gpu):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294377 (unknown shape)")
 
     with self.test_session(use_gpu=use_gpu):
       v = constant_op.constant(1.0)
@@ -1885,10 +1874,12 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(512.0, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested while)")
   def testNestedWhileCondWhileGrad(self):
     self._testNestedWhileCondWhileGrad(use_gpu=False)
     self._testNestedWhileCondWhileGrad(use_gpu=True)
 
+  @test_util.disable_control_flow_v2("b/116823782")
   def testWhileGrad_Variable(self):
     with self.cached_session():
       a = variables.Variable(3.0)
@@ -1902,8 +1893,6 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(216.0, r[0].eval())
 
   def testWhileGradInCond(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/110550782 (gradient w.r.t external variable)")
 
     with self.cached_session():
       n = ops.convert_to_tensor(1.0, name="n")
@@ -1919,6 +1908,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(math_ops.less(1, 2), fn1, lambda: x)
       self.assertAllClose(9.0, r.eval(feed_dict={x: 1.0}))
 
+  @test_util.disable_control_flow_v2("b/116340060")
   def testGradInWhileWrtInitialLoopVal(self):
     with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=(), name="x")
@@ -1936,6 +1926,7 @@ class ControlFlowTest(test.TestCase):
           "loop invariants or wrt the input parameters to the loop body."):
         control_flow_ops.while_loop(lambda i, x: i < 3, body, [0, y])
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested while)")
   def testWhileGradInWhile(self):
     with self.cached_session():
       n = ops.convert_to_tensor(1.0, name="n")
@@ -1952,9 +1943,8 @@ class ControlFlowTest(test.TestCase):
                                       [tensor_shape.unknown_shape()])
       self.assertAllClose(9.0, r.eval(feed_dict={x: 1.0}))
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested while)")
   def testCondGradInNestedWhiles(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113346829 (gpu failure)")
 
     def outer_body(i, x):
       _, x = control_flow_ops.while_loop(
@@ -1972,6 +1962,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(i_val, 3)
       self.assertAllClose(x_val, 1.0)
 
+  @test_util.disable_control_flow_v2("b/116255781 (flat_args)")
   def testWhile_NestedInput(self):
     with self.cached_session() as sess:
       named = collections.namedtuple("named", ("a", "b"))
@@ -1999,6 +1990,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual([100.0, 1.0, 102.0, 3.0, 4.0 + 100 * 2.0],
                        sess.run(r_flattened))
 
+  @test_util.disable_control_flow_v2("b/116255781(flat_args)")
   def testWhile_NestedBadArityFails(self):
     with self.cached_session():
       named = collections.namedtuple("named", ("a", "b"))
@@ -2057,6 +2049,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients([rx], x)
       self.assertAllClose(1024.0, r[0].eval())
 
+  @test_util.disable_control_flow_v2("b/116355153 (back_prop flag)")
   def testWhileGrad_NoGradient(self):
     with self.cached_session():
       v = constant_op.constant(2.0, name="v")
@@ -2067,6 +2060,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)
       self.assertAllClose(1.0, r[0].eval())
 
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileGrad_NoDependency(self):
     with self.cached_session() as sess:
       variable = variables.Variable(array_ops.ones([2, 3]))
@@ -2180,10 +2174,12 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(8.0, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested)")
   def testNestedWhileGrad_Simple(self):
     self._testNestedWhileGrad_Simple(use_gpu=False)
     self._testNestedWhileGrad_Simple(use_gpu=True)
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested)")
   def testNestedWhileGrad_SerialInner(self):
     with self.cached_session():
       v = constant_op.constant(1.0)
@@ -2207,6 +2203,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(256.0, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116248044 (nested)")
   def testNestedWhileGrad_ParallelInner(self):
     with self.cached_session():
       v = constant_op.constant(1.0)
@@ -2230,6 +2227,8 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(512.0, r.eval())
 
+  @test_util.disable_control_flow_v2(
+      "Nested loops and TensorArrays not supported")
   def testNestedWhileGrad_ParallelIterations(self):
     # Make sure the stack pushes and pops of an inner loop are executed in
     # the sequential order of the iterations of its outer loop.
@@ -2268,13 +2267,12 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(1024.0, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116272044 (cond_in_while)")
   def testWhileCondGrad_Simple(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113294377 (unknown shape)")
-
     self._testWhileCondGrad_Simple(use_gpu=False)
     self._testWhileCondGrad_Simple(use_gpu=True)
 
+  @test_util.disable_control_flow_v2("b/116272044 (cond_in_while)")
   def testWhileCondGrad_UnknownShape(self):
     with self.cached_session() as sess:
       v = array_ops.placeholder(dtypes.float32)
@@ -2292,6 +2290,7 @@ class ControlFlowTest(test.TestCase):
       r = sess.run(r, feed_dict={v: 2.0})
       self.assertAllClose(1024.0, r)
 
+  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileGrad_Concat(self):
     with self.cached_session() as sess:
       x = variable_scope.get_variable("x", initializer=[[1., 2.]])
@@ -2315,6 +2314,7 @@ class ControlFlowTest(test.TestCase):
       sess.run(op)
       self.assertAllClose([[0.98000002, 1.98000002]], sess.run(x))
 
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileWithRefsWithGradients_1(self):
     with self.cached_session() as sess:
       x = variables.VariableV1(0.)._ref()  # pylint: disable=protected-access
@@ -2343,6 +2343,7 @@ class ControlFlowTest(test.TestCase):
     self.assertEqual(0, value_x)
     self.assertEqual(73, value_x_grad)
 
+  @test_util.disable_control_flow_v2("b/116282023 (IndexedSlices)")
   def testWhileGrad_IndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
@@ -2364,6 +2365,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r.values, values)[0]
       self.assertAllClose(np.array([1024.0, 1024.0]), r.eval())
 
+  @test_util.disable_control_flow_v2("b/116328420 (SparseTensor)")
   def testWhileGrad_SparseTensor(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
@@ -2386,6 +2388,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r.values, values)[0]
       self.assertAllClose(np.array([1024.0, 1024.0]), r.eval())
 
+  @test_util.disable_control_flow_v2("b/115920078 (gradients)")
   def testCallGradInLoop(self):
     with self.cached_session() as sess:
       i0 = constant_op.constant(0)
@@ -2405,6 +2408,8 @@ class ControlFlowTest(test.TestCase):
           c, b, [i0, constant_op.constant(0.0)])
       self.assertAllClose(600.0, sess.run(output_grad)[1])
 
+  @test_util.disable_control_flow_v2(
+      "b/116255781 (flat_args), b/115660901 (TensorArray)")
   def testWhileAndTensorArray(self):
     with self.cached_session() as sess:
       param = constant_op.constant(2.0)
@@ -2509,6 +2514,7 @@ class ControlFlowTest(test.TestCase):
       all_ops = x.graph.get_operations()
       self.assertFalse(any([name in op.name for op in all_ops]))
 
+  @test_util.disable_control_flow_v2("b/116255781 (flat args)")
   def testWhileGradGradFail(self):
     theta = variables.Variable(initial_value=1.)
 
@@ -2538,6 +2544,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, y)[0]
       self.assertEqual(388.0, r.eval())
 
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileGradientWithNontrainablePath1(self):
     q = variables.Variable([7., 8.])
 
@@ -2555,6 +2562,7 @@ class ControlFlowTest(test.TestCase):
       sess.run(q.initializer)
       self.assertAllClose([0., 0.], sess.run(dy_dq))
 
+  @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileGradientWithNontrainablePath2(self):
     q = variables.Variable([7., 8.])
 
@@ -2572,6 +2580,7 @@ class ControlFlowTest(test.TestCase):
       sess.run(q.initializer)
       self.assertAllClose([1., 1.], sess.run(dy_dq))
 
+  @test_util.disable_control_flow_v2("b/115920078 (gradients)")
   def testIssue16504(self):
     c = constant_op.constant(np.arange(100), dtype=dtypes.float32)
     w = variables.Variable(
@@ -2595,6 +2604,7 @@ class ControlFlowTest(test.TestCase):
     grad, = gradients_impl.gradients(w, c)
     self.assertIsNotNone(grad)
 
+  @test_util.disable_control_flow_v2("b/116270461 (resource)")
   def testStopGradMultiFlows(self):
     with self.cached_session():
 
@@ -2653,10 +2663,9 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(4.0, i.eval(feed_dict={d: 1}))
       self.assertAllClose(2.0 * math.sqrt(2), i.eval(feed_dict={d: 2}))
 
+  @test_util.disable_control_flow_v2(
+      "b/112477618 (Operation returned from cond)")
   def testCase(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/112477618 (Operation returned from cond)")
-
     with self.cached_session():
       x = constant_op.constant(1)
       y = constant_op.constant(2)
@@ -2708,10 +2717,9 @@ class ControlFlowTest(test.TestCase):
 
       self.assertAllEqual(r6.eval(), 0)
 
+  @test_util.disable_control_flow_v2(
+      "b/112477618 (Operation returned from cond)")
   def testCaseSideEffects(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/112477618 (Operation returned from cond)")
-
     with self.cached_session() as sess:
       v0 = variables.Variable(-1)
       v1 = variables.Variable(-1)
@@ -2746,10 +2754,8 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(0, r0.eval())
       self.assertAllEqual(sess.run([v0, v1, v2]), [0, -1, -1])
 
+  @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
   def testOneOpCond(self):
-    if control_flow_ops.ENABLE_COND_V2:
-      return unittest.skip("b/113324949 (ref vars)")
-
     with self.cached_session():
       v = variables.Variable(0)
       c = ops.convert_to_tensor(0)
@@ -3031,9 +3037,11 @@ class ControlFlowTest(test.TestCase):
 
       r = gradients_impl.gradients(r, x)[0]
       self.assertEqual(r.eval(), 524288.0)
-      self.assertEqual(
-          len([op for op in x.graph.get_operations() if op.type == "StackV2"]),
-          1)
+      # while_v2 does not have stacks.
+      if not control_flow_ops.ENABLE_WHILE_V2:
+        self.assertEqual(
+            len([op for op in x.graph.get_operations() if op.type == "StackV2"
+                ]), 1)
 
 
 class ControlFlowContextCheckTest(test.TestCase):
@@ -3393,7 +3401,7 @@ class WhileOpBenchmark(test.Benchmark):
         name="unroll_same_device", iters=iters, wall_time=duration)
 
 
-@test_util.with_cond_v2
+@test_util.with_control_flow_v2
 class EagerTest(test.TestCase):
 
   def testCond(self):
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 87f8bd85a5..9d7d31df22 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -60,8 +60,17 @@ from tensorflow.python.util import nest
 from tensorflow.python.util import tf_should_use
 from tensorflow.python.util.tf_export import tf_export
 
+# The while_v2 module.
+_while_v2 = None
 
 ENABLE_COND_V2 = os.getenv("TF_ENABLE_COND_V2", "0") != "0"
+# Note: Setting this to True is not sufficient to switch to the v2 while_loop.
+# Users must also import the while_v2 module to set the _while_v2 module
+# variable above. We do this to avoid a circular dependency:
+# control_flow_ops -> while_v2 -> gradients_impl -> control_flow_ops
+# A ValueError is raised in tf.while_loop if this is set to True and the
+# `_while_v2` module is not set.
+ENABLE_WHILE_V2 = os.getenv("TF_ENABLE_WHILE_V2", "0") != "0"
 
 
 # We override the 'tuple' for a control flow op, so we keep python's
@@ -3211,6 +3220,13 @@ def while_loop(cond,
   ```
 
   """
+  if ENABLE_WHILE_V2 and not context.executing_eagerly():
+    if not _while_v2:
+      raise ValueError("The while_v2 module is not set. Did you forget to "
+                       "import tensorflow.python.ops."
+                       "while_v2?")
+    return _while_v2.while_loop(cond, body, loop_vars, name)
+
   with ops.name_scope(name, "while", loop_vars):
     if not loop_vars:
       raise ValueError("No loop variables provided")
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 875be31602..6791e1cd61 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -24,6 +24,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import sys
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.eager import function
@@ -33,6 +34,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl as cond_v2
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gradients_impl
@@ -41,6 +43,8 @@ from tensorflow.python.util import nest
 
 # pylint: disable=protected-access
 
+control_flow_ops._while_v2 = sys.modules[__name__]
+
 # TODO(b/79881896): Handle external control dependencies. tf.while_loop allows
 # control dependencies on external nodes with at least 1 output.
 # Another idea is to create const nodes outside the loop and add control edges
-- 
GitLab


From 6d02ee8e581bf5211f362b80175122e3782fb37a Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Fri, 28 Sep 2018 12:49:38 -0700
Subject: [PATCH 0873/1357] Simplify batch_dot logic

Remove dead logical branch.

PiperOrigin-RevId: 214980627
---
 tensorflow/python/keras/backend.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 4589c821e5..584facc859 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -1511,12 +1511,8 @@ def batch_dot(x, y, axes=None):
       out = math_ops.reduce_sum(
           math_ops.multiply(array_ops.transpose(x, [1, 0]), y), axes[1])
   else:
-    if axes is not None:
-      adj_x = None if axes[0] == ndim(x) - 1 else True
-      adj_y = True if axes[1] == ndim(y) - 1 else None
-    else:
-      adj_x = None
-      adj_y = None
+    adj_x = None if axes[0] == ndim(x) - 1 else True
+    adj_y = True if axes[1] == ndim(y) - 1 else None
     out = math_ops.matmul(x, y, adjoint_a=adj_x, adjoint_b=adj_y)
   if diff:
     if x_ndim > y_ndim:
-- 
GitLab


From 00c503b85c2d4b6ab44305e94d66237925eed6bf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 13:25:02 -0700
Subject: [PATCH 0874/1357] Cleanup

PiperOrigin-RevId: 214985873
---
 tensorflow/contrib/decision_trees/proto/BUILD | 1 -
 tensorflow/contrib/training/BUILD             | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tensorflow/contrib/decision_trees/proto/BUILD b/tensorflow/contrib/decision_trees/proto/BUILD
index 3b50a48336..06940a90d5 100644
--- a/tensorflow/contrib/decision_trees/proto/BUILD
+++ b/tensorflow/contrib/decision_trees/proto/BUILD
@@ -17,7 +17,6 @@ tf_proto_library(
     name = "generic_tree_model",
     srcs = ["generic_tree_model.proto"],
     cc_api_version = 2,
-    java_api_version = 2,
     visibility = ["//visibility:public"],
 )
 
diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD
index ddf8365d61..b565ebd073 100644
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
@@ -313,6 +313,5 @@ tf_proto_library(
     name = "protos_all",
     srcs = glob(["**/*.proto"]),
     cc_api_version = 2,
-    java_api_version = 2,
     visibility = ["//visibility:public"],
 )
-- 
GitLab


From c30e729f8f830ea2da46eaa7a5354395c5119def Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 13:27:41 -0700
Subject: [PATCH 0875/1357] Internal change

PiperOrigin-RevId: 214986255
---
 tensorflow/contrib/lite/java/aar_with_jni.bzl | 53 ++++++++++---------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/tensorflow/contrib/lite/java/aar_with_jni.bzl b/tensorflow/contrib/lite/java/aar_with_jni.bzl
index db837cf29e..9d2aead266 100644
--- a/tensorflow/contrib/lite/java/aar_with_jni.bzl
+++ b/tensorflow/contrib/lite/java/aar_with_jni.bzl
@@ -3,12 +3,12 @@
 load("@build_bazel_rules_android//android:rules.bzl", "android_binary")
 
 def aar_with_jni(name, android_library):
-  # Generate dummy AndroidManifest.xml for dummy apk usage
-  # (dummy apk is generated by <name>_dummy_app_for_so target below)
-  native.genrule(
-      name = name + "_binary_manifest_generator",
-      outs = [name + "_generated_AndroidManifest.xml"],
-      cmd = """
+    # Generate dummy AndroidManifest.xml for dummy apk usage
+    # (dummy apk is generated by <name>_dummy_app_for_so target below)
+    native.genrule(
+        name = name + "_binary_manifest_generator",
+        outs = [name + "_generated_AndroidManifest.xml"],
+        cmd = """
 cat > $(OUTS) <<EOF
 <manifest
   xmlns:android="http://schemas.android.com/apk/res/android"
@@ -17,27 +17,28 @@ cat > $(OUTS) <<EOF
 </manifest>
 EOF
 """,
-  )
+    )
 
-  # Generate dummy apk including .so files and later we extract out
-  # .so files and throw away the apk.
-  android_binary(
-      name = name + "_dummy_app_for_so",
-      manifest = name + "_generated_AndroidManifest.xml",
-      custom_package = "dummy.package.for.so",
-      deps = [android_library],
-      # In some platforms we don't have an Android SDK/NDK and this target
-      # can't be built. We need to prevent the build system from trying to
-      # use the target in that case.
-      tags = ["manual"],
-  )
+    # Generate dummy apk including .so files and later we extract out
+    # .so files and throw away the apk.
+    android_binary(
+        name = name + "_dummy_app_for_so",
+        aapt_version = "aapt",
+        manifest = name + "_generated_AndroidManifest.xml",
+        custom_package = "dummy.package.for.so",
+        deps = [android_library],
+        # In some platforms we don't have an Android SDK/NDK and this target
+        # can't be built. We need to prevent the build system from trying to
+        # use the target in that case.
+        tags = ["manual"],
+    )
 
-  native.genrule(
-      name = name,
-      srcs = [android_library + ".aar", name + "_dummy_app_for_so_unsigned.apk"],
-      outs = [name + ".aar"],
-      tags = ["manual"],
-      cmd = """
+    native.genrule(
+        name = name,
+        srcs = [android_library + ".aar", name + "_dummy_app_for_so_unsigned.apk"],
+        outs = [name + ".aar"],
+        tags = ["manual"],
+        cmd = """
 cp $(location {}.aar) $(location :{}.aar)
 chmod +w $(location :{}.aar)
 origdir=$$PWD
@@ -46,4 +47,4 @@ unzip $$origdir/$(location :{}_dummy_app_for_so_unsigned.apk) "lib/*"
 cp -r lib jni
 zip -r $$origdir/$(location :{}.aar) jni/*/*.so
 """.format(android_library, name, name, name, name),
-  )
+    )
-- 
GitLab


From 19143aa0e2ac3cdf0d6826e7e1d00cd864080394 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 13:31:01 -0700
Subject: [PATCH 0876/1357] Internal change.

PiperOrigin-RevId: 214986756
---
 tensorflow/core/kernels/BUILD        |  4 +++-
 tensorflow/python/kernel_tests/BUILD | 22 +++++++++++++++-------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 30171708c1..9439ab332c 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1197,8 +1197,10 @@ tf_cc_test(
 
 tf_cc_test(
     name = "example_parsing_ops_test",
-    size = "large",
+    size = "medium",
     srcs = ["example_parsing_ops_test.cc"],
+    shard_count = 4,
+    tags = ["optonly"],
     deps = [
         ":example_parsing_ops",
         ":ops_testutil",
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 65b9e04ed9..9490746fd9 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2359,7 +2359,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "transpose_op_test",
-    size = "large",
+    size = "medium",
     srcs = ["transpose_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2367,10 +2367,11 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
-    shard_count = 2,
+    shard_count = 4,
     tags = [
         "no_gpu",
         "no_oss",
+        "optonly",  # times out
     ],
 )
 
@@ -2489,6 +2490,7 @@ cuda_py_test(
         "//tensorflow/python:nn_grad",
         "//tensorflow/python:nn_ops",
     ],
+    shard_count = 2,
     tags = [
         "optonly",  # flaky timeouts unless optimized
     ],
@@ -2509,7 +2511,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "conv_ops_test",
-    size = "large",
+    size = "medium",
     srcs = ["conv_ops_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2528,6 +2530,9 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     shard_count = 4,
+    tags = [
+        "optonly",  # times out
+    ],
 )
 
 cuda_py_test(
@@ -2587,7 +2592,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "fft_ops_test",
-    size = "large",
+    size = "medium",
     srcs = ["fft_ops_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2597,7 +2602,8 @@ cuda_py_test(
         "//tensorflow/python:spectral_ops",
         "//tensorflow/python:spectral_ops_test_util",
     ],
-    shard_count = 3,
+    shard_count = 4,
+    tags = ["optonly"],
 )
 
 cuda_py_test(
@@ -2662,7 +2668,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "scatter_ops_test",
-    size = "large",  # NOTE: This is not run by default.
+    size = "medium",  # NOTE: This is not run by default.
     srcs = ["scatter_ops_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2671,11 +2677,13 @@ cuda_py_test(
         "//tensorflow/python:state_ops",
         "//tensorflow/python:variables",
     ],
+    shard_count = 2,
+    tags = ["optonly"],
 )
 
 cuda_py_test(
     name = "slice_op_test",
-    size = "large",
+    size = "medium",
     srcs = ["slice_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
-- 
GitLab


From 64be2ecc07c698df05d88051ec42a0409d1a9863 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 13:36:18 -0700
Subject: [PATCH 0877/1357] Do not pass in the bazel default toolchain via
 extra_toolchains.

Without this the default toolchain is used for a subset of the build and the
tests do not actually run on GPUs.

This uncovered a setup problem in the Docker image that needed fixing.

PiperOrigin-RevId: 214987676
---
 .../Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04      |  2 +-
 third_party/gpus/crosstool/BUILD.tpl               | 14 ++++++++++++++
 third_party/toolchains/BUILD                       |  4 +---
 .../ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD          |  2 +-
 .../preconfig/ubuntu14.04/gcc-nvcc/BUILD           | 14 ++++++++++++++
 5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04
index a30858db82..dd8d705331 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04
@@ -26,7 +26,7 @@ ENV NVIDIA_VISIBLE_DEVICES all
 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
 ENV NVIDIA_REQUIRE_CUDA "cuda>=9.0"
 ENV NCCL_VERSION 2.2.13
-ENV CUDNN_VERSION 7.2.1.38
+ENV CUDNN_VERSION 7.1.4.18
 
 # TODO(b/110903506): /usr/loca/cuda/lib64/stubs should not be needed in
 # LD_LIBRARY_PATH. The stubs/libcuda.so is not meant to used at runtime. The
diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl
index f638756d23..c8812fab33 100644
--- a/third_party/gpus/crosstool/BUILD.tpl
+++ b/third_party/gpus/crosstool/BUILD.tpl
@@ -2,6 +2,20 @@ licenses(["restricted"])
 
 package(default_visibility = ["//visibility:public"])
 
+toolchain(
+    name = "toolchain-linux-x86_64",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    toolchain = ":cc-compiler-local",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
 cc_toolchain_suite(
     name = "toolchain",
     toolchains = {
diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
index 7256a7d96e..bcbc4dda11 100644
--- a/third_party/toolchains/BUILD
+++ b/third_party/toolchains/BUILD
@@ -26,12 +26,10 @@ platform(
     constraint_values = [
         "@bazel_tools//platforms:x86_64",
         "@bazel_tools//platforms:linux",
-        "@bazel_tools//tools/cpp:clang",
-        "@bazel_toolchains//constraints:xenial",
     ],
     remote_execution_properties = """
         properties: {
             name: "container-image"
-            value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:06b585f42eed3b2030e9566b8f88f48d7472fa0f47e59765bc115376c8801bdf"
+            value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:e5099ff15650986e268a43ee99e2d2b7ffe2459b8b6935385078d1d3b2ed4d02"
         }""",
 )
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD b/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
index 2d3e41127d..05abcb56d8 100755
--- a/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
+++ b/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
@@ -1253,7 +1253,7 @@ genrule(
         "cuda/lib/libcupti.so.9.0",
     ],
     cmd = """
-if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.480" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.2.1" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0"
+if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.480" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.1.4" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0"
    """,
 )
 
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD b/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
index a56b4513fb..6442e7628a 100755
--- a/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
+++ b/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
@@ -2,6 +2,20 @@ licenses(["restricted"])
 
 package(default_visibility = ["//visibility:public"])
 
+toolchain(
+    name = "toolchain-linux-x86_64",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    toolchain = ":cc-compiler-local",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
 cc_toolchain_suite(
     name = "toolchain",
     toolchains = {
-- 
GitLab


From 1724d155f00b49bc817189247cbfb0df2092a9da Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Fri, 28 Sep 2018 13:50:12 -0700
Subject: [PATCH 0878/1357] Automated rollback of commit
 7f1d70d97f543d69a9f02cd6df0964f22f9278f3

PiperOrigin-RevId: 214989908
---
 tensorflow/contrib/distribute/python/BUILD    |  28 ++-
 .../distribute/python/metrics_v1_test.py      |   3 +-
 .../distribute/python/minimize_loss_test.py   |  26 +-
 .../distribute/python/mirrored_strategy.py    |   3 +-
 .../python/mirrored_strategy_multigpu_test.py |  12 +-
 .../contrib/distribute/python/monitor.py      |   1 -
 .../distribute/python/optimizer_v2_test.py    |   8 +-
 .../distribute/python/prefetching_ops_v2.py   | 232 ++++++++++++++++++
 .../python/prefetching_ops_v2_test.py         |  90 +++++++
 .../contrib/distribute/python/step_fn.py      |   7 +-
 .../contrib/distribute/python/step_fn_test.py |   1 -
 .../contrib/distribute/python/values.py       |  51 ++--
 .../contrib/distribute/python/values_test.py  |  23 +-
 13 files changed, 396 insertions(+), 89 deletions(-)
 create mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2.py
 create mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index e329b964c4..422983dbef 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -22,6 +22,7 @@ py_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":input_ops",
+        ":prefetching_ops_v2",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:device_util",
@@ -29,7 +30,6 @@ py_library(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:multi_device_iterator_ops",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/training/checkpointable:base",
         "@six_archive//:six",
@@ -647,6 +647,32 @@ cuda_py_test(
     ],
 )
 
+py_library(
+    name = "prefetching_ops_v2",
+    srcs = ["prefetching_ops_v2.py"],
+    deps = [
+        "//tensorflow/contrib/data/python/ops:prefetching_ops",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+cuda_py_test(
+    name = "prefetching_ops_v2_test",
+    srcs = ["prefetching_ops_v2_test.py"],
+    additional_deps = [
+        ":prefetching_ops_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
 py_library(
     name = "input_ops",
     srcs = ["input_ops.py"],
diff --git a/tensorflow/contrib/distribute/python/metrics_v1_test.py b/tensorflow/contrib/distribute/python/metrics_v1_test.py
index f7773aff4f..8163494c8e 100644
--- a/tensorflow/contrib/distribute/python/metrics_v1_test.py
+++ b/tensorflow/contrib/distribute/python/metrics_v1_test.py
@@ -86,11 +86,10 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
   def _test_metric(self, distribution, dataset_fn, metric_fn, expected_fn):
     with ops.Graph().as_default(), distribution.scope():
       iterator = distribution.distribute_dataset(
-          dataset_fn).make_initializable_iterator()
+          dataset_fn).make_one_shot_iterator()
       value, update = distribution.call_for_each_tower(
           metric_fn, iterator.get_next())
       update = distribution.group(update)
-      self.evaluate(iterator.initializer)
       self.evaluate(variables.local_variables_initializer())
       # TODO(josh11b): Once we switch to using a global batch size for input,
       # replace "distribution.num_towers" with "1".
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index d082d5c419..ba147e7824 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -41,14 +41,6 @@ from tensorflow.python.ops.losses import losses_impl
 
 class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
 
-  def _get_iterator(self, ds):
-    if context.executing_eagerly():
-      iterator = ds.make_one_shot_iterator()
-    else:
-      iterator = ds.make_initializable_iterator()
-      self.evaluate(iterator.initializer)
-    return iterator
-
   @combinations.generate(
       combinations.times(
           combinations.distributions_and_v1_optimizers(),
@@ -70,7 +62,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, *inputs, run_concurrently=layer.built))
 
-      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
+      iterator = distribution.distribute_dataset(
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -106,7 +99,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
-      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
+      iterator = distribution.distribute_dataset(
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return distribution.group(
@@ -165,7 +159,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, *inputs, run_concurrently=layer.built))
 
-      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
+      iterator = distribution.distribute_dataset(
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -249,7 +244,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
           fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS)
         return control_flow_ops.group(fetches)
 
-      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
+      iterator = distribution.distribute_dataset(
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -342,7 +338,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, x, y, run_concurrently=False))
 
-      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
+      iterator = distribution.distribute_dataset(
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -435,7 +432,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             output=loss)
         return distribution.group(train_op)
 
-      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
+      iterator = distribution.distribute_dataset(
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         initial_loss = lambda: constant_op.constant(1e7)
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 93d42e09a2..4d7516063c 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -484,8 +484,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
           self._prefetch_on_device, self._auto_shard_dataset)
     else:
       return values.PerDeviceDataset(
-          self._call_dataset_fn(dataset_fn),
-          self._devices,
+          self._call_dataset_fn(dataset_fn), self._devices,
           self._prefetch_on_device)
 
   # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed.
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index 04c712ce1d..f51e543624 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -300,15 +300,9 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
 
     dist = mirrored_strategy.MirroredStrategy(
         ["/device:GPU:0", "/device:CPU:0"])
-    ds = dist.distribute_dataset(
-        lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10))
-    if context.executing_eagerly():
-      iterator = ds.make_one_shot_iterator()
-    else:
-      iterator = ds.make_initializable_iterator()
-      self.evaluate([iterator.initializer])
-
-    features = iterator.get_next()
+    features = dist.distribute_dataset(
+        lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10)
+    ).make_one_shot_iterator().get_next()
 
     with dist.scope():
       result = dist.call_for_each_tower(
diff --git a/tensorflow/contrib/distribute/python/monitor.py b/tensorflow/contrib/distribute/python/monitor.py
index 17b7ab74f6..7644acedc9 100644
--- a/tensorflow/contrib/distribute/python/monitor.py
+++ b/tensorflow/contrib/distribute/python/monitor.py
@@ -51,7 +51,6 @@ class Monitor(object):
     else:
       if session is None:
         raise ValueError("Should provide a `session` in Graph mode.")
-      session.run(step_callable._iterator.initializer)  # pylint: disable=protected-access
       self._run_step = session.make_callable(step_callable())
       session.run(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/optimizer_v2_test.py b/tensorflow/contrib/distribute/python/optimizer_v2_test.py
index 3064433129..6e9ba37a19 100644
--- a/tensorflow/contrib/distribute/python/optimizer_v2_test.py
+++ b/tensorflow/contrib/distribute/python/optimizer_v2_test.py
@@ -42,11 +42,8 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase):
       model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
-      ds = distribution.distribute_dataset(dataset_fn)
-      if context.executing_eagerly():
-        iterator = ds.make_one_shot_iterator()
-      else:
-        iterator = ds.make_initializable_iterator()
+      iterator = distribution.distribute_dataset(
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return control_flow_ops.group(distribution.unwrap(
@@ -55,7 +52,6 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase):
 
       if not context.executing_eagerly():
         with self.cached_session() as sess:
-          sess.run(iterator.initializer)
           run_step = sess.make_callable(run_step())
         self.evaluate(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
new file mode 100644
index 0000000000..8d949943b7
--- /dev/null
+++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
@@ -0,0 +1,232 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Extension of prefetching_ops to support more than one device."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import warnings
+
+from tensorflow.contrib.data.python.ops import prefetching_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.util import nest as data_nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
+from tensorflow.python.util import nest
+
+
+# pylint: disable=protected-access
+class _PrefetchToDeviceIterator(object):
+  """A replacement for `tf.data.Iterator` that prefetches to another device.
+
+  Args:
+    input_dataset: The input dataset.
+    one_shot: If true, we make a one shot iterator that's already initialized.
+    devices: Devices on which to prefetch.
+    buffer_size: Size of the prefetching buffer.
+    shared_name: (Optional.) If non-empty, the returned iterator will be shared
+      under the given name across multiple sessions that share the same devices
+      (e.g. when using a remote server). Only used if one_shot is False.
+
+  Returns:
+    An Iterator type object.
+  """
+
+  def __init__(self,
+               input_dataset,
+               one_shot,
+               devices,
+               buffer_size,
+               shared_name=None):
+    self._input_dataset = input_dataset
+    self._get_next_call_count = 0
+    self._one_shot = one_shot
+    if shared_name is None:
+      shared_name = ""
+    self._devices = devices
+
+    if self._one_shot:
+      self._input_iterator = input_dataset.make_one_shot_iterator()
+    else:
+      self._input_iterator = iterator_ops.Iterator.from_structure(
+          self._input_dataset.output_types, self._input_dataset.output_shapes,
+          shared_name, self._input_dataset.output_classes)
+    input_iterator_handle = self._input_iterator.string_handle()
+
+    @function.Defun(dtypes.string)
+    def _prefetch_fn(handle):
+      """Prefetches one element from `input_iterator`."""
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          handle, self._input_iterator.output_types,
+          self._input_iterator.output_shapes,
+          self._input_iterator.output_classes)
+      ret = remote_iterator.get_next()
+      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+
+    target_device = ged_ops.experimental_iterator_get_device(
+        self._input_iterator._iterator_resource)
+    self._buffering_resources = []
+    for device in nest.flatten(self._devices):
+      with ops.device(device):
+        buffer_resource_handle = prefetching_ops.function_buffering_resource(
+            f=_prefetch_fn,
+            output_types=data_nest.flatten(
+                sparse.as_dense_types(self._input_dataset.output_types,
+                                      self._input_dataset.output_classes)),
+            target_device=target_device,
+            string_arg=input_iterator_handle,
+            buffer_size=buffer_size,
+            shared_name=shared_name)
+        self._buffering_resources.append(buffer_resource_handle)
+
+    if not self._one_shot:
+      reset_ops = []
+      for buffer_resource in self._buffering_resources:
+        reset_ops.append(
+            ged_ops.experimental_function_buffering_resource_reset(
+                buffer_resource))
+      with ops.control_dependencies(reset_ops):
+        self._initializer = self._input_iterator.make_initializer(
+            self._input_dataset)
+
+  def get_next(self, name=None):
+    """See `tf.data.Iterator.get_next`."""
+    self._get_next_call_count += 1
+    if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
+      warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
+
+    flat_result = []
+    # TODO(priyag): This will fail if the input size (typically number of
+    # batches) is not divisible by number of devices.
+    # How do we handle that more gracefully / let the user know?
+    for buffer_resource in self._buffering_resources:
+      flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
+          buffer_resource,
+          output_types=data_nest.flatten(
+              sparse.as_dense_types(self.output_types, self.output_classes)),
+          name=name)
+
+      ret = sparse.deserialize_sparse_tensors(
+          data_nest.pack_sequence_as(self.output_types, flat_ret),
+          self.output_types, self.output_shapes, self.output_classes)
+
+      for tensor, shape in zip(
+          data_nest.flatten(ret), data_nest.flatten(self.output_shapes)):
+        if isinstance(tensor, ops.Tensor):
+          tensor.set_shape(shape)
+      flat_result.append(ret)
+
+    return nest.pack_sequence_as(self._devices, flat_result)
+
+  @property
+  def initializer(self):
+    if self._one_shot:
+      raise NotImplementedError("Can't initialize a one_shot_iterator")
+    return self._initializer
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+# pylint: enable=protected-access
+
+
+class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` whose iterator prefetches elements to other device(s)."""
+
+  def __init__(self, input_dataset, devices, buffer_size):
+    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._devices = devices
+    self._buffer_size = buffer_size if buffer_size is not None else 1
+
+  def make_one_shot_iterator(self):
+    return _PrefetchToDeviceIterator(
+        self._input_dataset,
+        one_shot=True,
+        devices=self._devices,
+        buffer_size=self._buffer_size)
+
+  def make_initializable_iterator(self, shared_name=None):
+    if context.executing_eagerly():
+      raise RuntimeError(
+          "make_initializable_iterator is not supported when eager "
+          "execution is enabled.")
+
+    return _PrefetchToDeviceIterator(
+        self._input_dataset,
+        one_shot=False,
+        devices=self._devices,
+        buffer_size=self._buffer_size,
+        shared_name=shared_name)
+
+  def _as_variant_tensor(self):
+    # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset
+    # transformation methods is called.
+    # TODO(mrry): Investigate support for chaining further transformations after
+    # the prefetch, including GPU support.
+    raise NotImplementedError("`prefetch_to_devices()` must be the last "
+                              "transformation in a dataset pipeline.")
+
+  # TODO(priyag): Fix the output types, shapes and classes to match the result
+  # of get_next (which has the additional nesting layer of devices now).
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+
+def prefetch_to_devices(devices, buffer_size=None):
+  """A transformation that prefetches dataset values to the given `devices`.
+
+  NOTE: Although the transformation creates a `tf.data.Dataset`, the
+  transformation must be the final `Dataset` in the input pipeline.
+
+  Args:
+    devices: A nested structure of devices on which to prefetch the data. It can
+      be a single device name, or a tuple or list of device names.
+    buffer_size: (Optional.) The number of elements to buffer on each device.
+      Defaults to an automatically chosen value.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _PrefetchToDeviceDataset(dataset, devices, buffer_size)
+
+  return _apply_fn
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
new file mode 100644
index 0000000000..16799104e8
--- /dev/null
+++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
@@ -0,0 +1,90 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for prefetching_ops_v2."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.distribute.python import prefetching_ops_v2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+
+
+class PrefetchingOpsV2Test(test.TestCase):
+
+  def testPrefetchToOneDevice(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops_v2.prefetch_to_devices("/gpu:0"))
+
+    iterator = device_dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToTwoDevicesInAList(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"]))
+
+    iterator = device_dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    output = []
+    # TODO(rohanj): Modify test to go till the end of the dataset when we
+    # switch to MultiDeviceIterator.
+    with self.cached_session() as sess:
+      for _ in range(4):
+        result = sess.run(next_element)
+        self.assertEqual(2, len(result))
+        output.extend(result)
+      self.assertEquals(set(range(8)), set(output))
+
+  def testPrefetchToTwoDevicesWithReinit(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"]))
+
+    iterator = device_dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    # TODO(rohanj): Modify test to go till the end of the dataset when we
+    # switch to MultiDeviceIterator.
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer)
+      for _ in range(4):
+        sess.run(next_element)
+      sess.run(iterator.initializer)
+      for _ in range(4):
+        sess.run(next_element)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/distribute/python/step_fn.py b/tensorflow/contrib/distribute/python/step_fn.py
index 23bf36184f..1b5a4f64e5 100644
--- a/tensorflow/contrib/distribute/python/step_fn.py
+++ b/tensorflow/contrib/distribute/python/step_fn.py
@@ -19,7 +19,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
 from tensorflow.python.training import optimizer as optimizer_lib
 
 
@@ -51,11 +50,7 @@ class StandardInputStep(Step):
   def __init__(self, dataset_fn, distribution):
     super(StandardInputStep, self).__init__(distribution)
     self._distributed_input = distribution.distribute_dataset(dataset_fn)
-    if context.executing_eagerly():
-      self._iterator = self._distributed_input.make_one_shot_iterator()
-    else:
-      # TODO(priyag): Expose initializer via some initializer property.
-      self._iterator = self._distributed_input.make_initializable_iterator()
+    self._iterator = self._distributed_input.make_one_shot_iterator()
 
 
 class StandardSingleLossStep(StandardInputStep):
diff --git a/tensorflow/contrib/distribute/python/step_fn_test.py b/tensorflow/contrib/distribute/python/step_fn_test.py
index 1ff9b9ceec..f1ada49fa3 100644
--- a/tensorflow/contrib/distribute/python/step_fn_test.py
+++ b/tensorflow/contrib/distribute/python/step_fn_test.py
@@ -50,7 +50,6 @@ class SingleLossStepTest(test.TestCase, parameterized.TestCase):
         run_step = single_loss_step
       else:
         with self.cached_session() as sess:
-          sess.run(single_loss_step._iterator.initializer)
           run_step = sess.make_callable(single_loss_step())
       self.evaluate(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 327775a729..4955ded4d5 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -26,7 +26,7 @@ import weakref
 import six
 
 from tensorflow.contrib.distribute.python import input_ops
-from tensorflow.python.data.ops import multi_device_iterator_ops
+from tensorflow.contrib.distribute.python import prefetching_ops_v2
 from tensorflow.python.eager import context
 from tensorflow.python.framework import device as tf_device
 from tensorflow.python.framework import ops
@@ -683,7 +683,7 @@ class PerDeviceDataIterator(object):
   def get_next(self, name=None):
     """Scatter the input across devices."""
     if self._prefetch_on_device:
-      data_list = self._iterator.get_next()
+      data_list = self._iterator.get_next(name=name)
       index = dict(zip(self._devices, data_list))
     else:
       batch = self._iterator.get_next(name=name)
@@ -703,24 +703,21 @@ class PerDeviceDataIterator(object):
 class PerDeviceDataset(object):
   """Like `tf.data.Dataset` split devices, producing `PerDevice` data."""
 
-  def __init__(
-      self,
-      dataset,
-      devices,
-      prefetch_on_device=None,
-  ):
+  def __init__(self, dataset, devices, prefetch_on_device=None):
     self._devices = devices
 
     # Default to using prefetching in graph mode, unless specified.
-    # TODO(rohanj): Enable prefetching in eager mode.
+    # TODO(priyag): Enable prefetching in eager mode.
     self._prefetch_on_device = prefetch_on_device
     if self._prefetch_on_device is None:
       self._prefetch_on_device = not context.executing_eagerly()
     assert not (self._prefetch_on_device and context.executing_eagerly()), (
         "Prefetching is only supported in graph mode currently")
 
-    self._dataset = dataset
-    if not self._prefetch_on_device:
+    if self._prefetch_on_device:
+      self._dataset = dataset.apply(
+          prefetching_ops_v2.prefetch_to_devices(self._devices))
+    else:
       # TODO(priyag): If dropping remainder is not appropriate, find another
       # approach to distributing the dataset when not possible to divide evenly.
       # Possibly not an issue when we start using PartitionedDataset.
@@ -728,33 +725,15 @@ class PerDeviceDataset(object):
 
   def make_one_shot_iterator(self):
     """Get a one time use iterator for the distributed PerDeviceDataset."""
-    # Graph mode prefetching with one shot iterator is disabled.
-    if not context.executing_eagerly():
-      raise ValueError("Cannot create a one shot iterator. Please use "
-                       "`make_initializable_iterator()` instead.")
-    # Eager mode prefetching would error out in constructor. Only remaining
-    # cases are non-prefetching eager / graph mode. We delegate to
-    # PerDeviceDataIterator to handle them.
     dataset_iterator = self._dataset.make_one_shot_iterator()
-    return PerDeviceDataIterator(
-        dataset_iterator, self._devices, prefetch_on_device=False)
+    return PerDeviceDataIterator(dataset_iterator, self._devices,
+                                 self._prefetch_on_device)
 
   def make_initializable_iterator(self):
     """Get an initializable iterator for the distributed PerDeviceDataset."""
-    # Eager mode generates already initialized iterators. Hence we cannot create
-    # an initializable iterator.
-    if context.executing_eagerly():
-      raise ValueError("Cannot create initializable iterator in Eager mode. "
-                       "Please use `make_one_shot_iterator` instead.")
-    if self._prefetch_on_device:
-      dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator(
-          self._dataset, self._devices)
-    else:
-      dataset_iterator = self._dataset.make_initializable_iterator()
-    return PerDeviceDataIterator(
-        dataset_iterator,
-        self._devices,
-        prefetch_on_device=self._prefetch_on_device)
+    dataset_iterator = self._dataset.make_initializable_iterator()
+    return PerDeviceDataIterator(dataset_iterator, self._devices,
+                                 self._prefetch_on_device)
 
 
 class MultiWorkerDataIterator(object):
@@ -837,9 +816,7 @@ class MultiWorkerDataset(object):
           worker_input = input_ops.auto_shard_dataset(
               worker_input, len(worker_device_map), i)
         self._datasets[worker] = PerDeviceDataset(
-            worker_input,
-            worker_devices,
-            prefetch_on_device=prefetch_on_device)
+            worker_input, worker_devices, prefetch_on_device=prefetch_on_device)
 
   def make_one_shot_iterator(self):
     iterators = {}
diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index 002d61f46e..ae3e134333 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -349,11 +349,7 @@ class PerDeviceDatasetTest(test.TestCase):
   def _test_iterator_no_prefetch(self, devices, dataset, expected_values):
     per_device_dataset = values.PerDeviceDataset(
         dataset, devices, prefetch_on_device=False)
-    if context.executing_eagerly():
-      iterator = per_device_dataset.make_one_shot_iterator()
-    else:
-      iterator = per_device_dataset.make_initializable_iterator()
-      self.evaluate([iterator.initializer])
+    iterator = per_device_dataset.make_one_shot_iterator()
 
     for expected_value in expected_values:
       next_element = iterator.get_next()
@@ -370,14 +366,21 @@ class PerDeviceDatasetTest(test.TestCase):
     if not context.executing_eagerly():
       per_device_dataset = values.PerDeviceDataset(
           dataset, devices, prefetch_on_device=True)
-      iterator = per_device_dataset.make_initializable_iterator()
-      self.evaluate([iterator.initializer])
+      iterator = per_device_dataset.make_one_shot_iterator()
 
+      # With prefetching, we cannot guarantee which input ends up on which
+      # device, so we verify that the complete set seen on all devices is
+      # correct, and equal numbers are distributed to each device.
+      combined_actual = []
+      combined_expected = []
       for expected_value in expected_values:
         next_element = iterator.get_next()
-        computed_value = self.evaluate(
-            [values.select_device(d, next_element) for d in devices])
-        self.assertEqual(expected_value, computed_value)
+        combined_actual.extend(
+            self.evaluate(
+                [values.select_device(d, next_element) for d in devices]))
+        combined_expected.extend(expected_value)
+
+      self.assertEqual(set(combined_expected), set(combined_actual))
 
       with self.assertRaises(errors.OutOfRangeError):
         next_element = iterator.get_next()
-- 
GitLab


From f83da5b0aa37ba55c1b2eaa093e6d043b73f5982 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 14:08:25 -0700
Subject: [PATCH 0879/1357] Introduce the abstraction of RunHandler which each
 DirectSession can use for the duration of a single RunInternal() call from
 RunHandlerPool. It is used for running inter-op closures with a global
 scheduler (which in the future) to improve both median and tail latency (for
 use-cases like CPU inference). In the case that global pools aren't used,
 this change should be a no-op.

PiperOrigin-RevId: 214992852
---
 tensorflow/core/BUILD                         |  16 ++
 .../core/common_runtime/direct_session.cc     |  49 +++-
 .../core/common_runtime/direct_session.h      |   3 +
 .../common_runtime/direct_session_test.cc     |  28 ++
 tensorflow/core/framework/run_handler.cc      | 249 ++++++++++++++++++
 tensorflow/core/framework/run_handler.h       |  95 +++++++
 tensorflow/core/framework/run_handler_util.cc |  57 ++++
 tensorflow/core/framework/run_handler_util.h  |  43 +++
 .../core/framework/run_handler_util_test.cc   |  93 +++++++
 tensorflow/core/protobuf/config.proto         |   5 +
 ...ensorflow.-run-options.-experimental.pbtxt |   6 +
 .../golden/v1/tensorflow.-run-options.pbtxt   |   6 +
 ...ensorflow.-run-options.-experimental.pbtxt |   6 +
 .../golden/v2/tensorflow.-run-options.pbtxt   |   6 +
 14 files changed, 656 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/core/framework/run_handler.cc
 create mode 100644 tensorflow/core/framework/run_handler.h
 create mode 100644 tensorflow/core/framework/run_handler_util.cc
 create mode 100644 tensorflow/core/framework/run_handler_util.h
 create mode 100644 tensorflow/core/framework/run_handler_util_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 50fe308b73..7da4b9fbd0 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2486,6 +2486,8 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [
     "framework/op_segment.h",
     "framework/rendezvous.h",  # only needed for tests
     "framework/resource_var.h",
+    "framework/run_handler.h",
+    "framework/run_handler_util.h",
     "framework/tensor_reference.h",
     "framework/tracking_allocator.h",  # only needed for tests
     "framework/unique_tensor_references.h",
@@ -2972,6 +2974,7 @@ tf_cuda_library(
         ":core_cpu_internal",
         ":device_tracer",
         ":framework",
+        ":framework_internal",
         ":graph",
         ":lib",
         ":lib_internal",
@@ -4119,6 +4122,19 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "framework_run_handler_util_test",
+    size = "small",
+    srcs = ["framework/run_handler_util_test.cc"],
+    linkstatic = tf_kernel_tests_linkstatic(),
+    deps = [
+        ":framework_internal",
+        ":lib",
+        ":test",
+        ":test_main",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "common_runtime_direct_session_test",
     size = "small",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 841181f8c3..458e133b68 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -40,6 +40,7 @@ limitations under the License.
 #include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/run_handler.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -244,6 +245,21 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool,
 #endif  // __ANDROID__
 }
 
+static RunHandlerPool* GetOrCreateRunHandlerPool(
+    const SessionOptions& options) {
+  static RunHandlerPool* pool =
+      new RunHandlerPool(NumInterOpThreadsFromSessionOptions(options));
+  return pool;
+}
+
+bool DirectSession::ShouldUseRunHandlerPool() const {
+  if (options_.config.session_inter_op_thread_pool_size() > 0 ||
+      options_.config.use_per_session_threads()) {
+    return false;
+  }
+  return true;
+}
+
 DirectSession::DirectSession(const SessionOptions& options,
                              const DeviceMgr* device_mgr,
                              DirectSessionFactory* const factory)
@@ -582,16 +598,37 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
     }
   }
 
-  Executor::Args::Runner default_runner = [this,
-                                           pool](Executor::Args::Closure c) {
-    SchedClosure(pool, std::move(c));
-  };
+  std::unique_ptr<RunHandler> handler;
+  if (ShouldUseRunHandlerPool() &&
+      run_options.experimental().use_run_handler_pool()) {
+    // Non-null only when a global inter-op pool is used.
+    VLOG(1) << "Using RunHandler to scheduler inter-op closures.";
+    handler = GetOrCreateRunHandlerPool(options_)->Get();
+  }
+  auto* handler_ptr = handler.get();
+
+  Executor::Args::Runner default_runner = nullptr;
+
+  if (pool == nullptr) {
+    default_runner = [](Executor::Args::Closure c) { c(); };
+  } else if (handler_ptr != nullptr) {
+    default_runner = [handler_ptr](Executor::Args::Closure c) {
+      handler_ptr->ScheduleInterOpClosure(std::move(c));
+    };
+  } else {
+    default_runner = [this, pool](Executor::Args::Closure c) {
+      SchedClosure(pool, std::move(c));
+    };
+  }
+
   for (const auto& item : executors_and_keys->items) {
-    // TODO(zhengxq): support partial run.
-    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    // TODO(azaks): support partial run.
+    // TODO(azaks): if the device picks its own threadpool, we need to assign
     //     less threads to the main compute pool by default.
     thread::ThreadPool* device_thread_pool =
         item.device->tensorflow_device_thread_pool();
+    // TODO(crk): Investigate usage of RunHandlerPool when using device specific
+    // thread pool(s).
     if (!device_thread_pool) {
       args.runner = default_runner;
     } else {
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 4a6a921ea7..3a168bbe3f 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -247,6 +247,9 @@ class DirectSession : public Session {
                                    ExecutorsAndKeys* executors_and_keys,
                                    RunMetadata* run_metadata);
 
+  // Returns whether inter-op execution uses a global pool.
+  bool ShouldUseRunHandlerPool() const;
+
   ::tensorflow::Status ExtendLocked(const GraphDef& graph)
       EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
 
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index 65e816c202..e3e431f800 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -625,6 +625,34 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts_Callable) {
   EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2);
 }
 
+TEST_F(DirectSessionMinusAXTest, UseRunHandlerPool) {
+  Initialize({3, 2, -1, 0});
+  auto session = CreateSession();
+  ASSERT_TRUE(session != nullptr);
+  TF_ASSERT_OK(session->Create(def_));
+  std::vector<std::pair<string, Tensor>> inputs;
+
+  // Request two targets: one fetch output and one non-fetched output.
+  std::vector<string> output_names = {y_ + ":0"};
+  std::vector<string> target_nodes = {y_neg_};
+  std::vector<Tensor> outputs;
+
+  // Prepares RunOptions and RunMetadata
+  RunOptions run_options;
+  run_options.mutable_experimental()->set_use_run_handler_pool(true);
+
+  Status s = session->Run(run_options, inputs, output_names, target_nodes,
+                          &outputs, nullptr);
+  TF_ASSERT_OK(s);
+
+  ASSERT_EQ(1, outputs.size());
+  // The first output should be initialized and have the correct
+  // output.
+  auto mat = outputs[0].matrix<float>();
+  ASSERT_TRUE(outputs[0].IsInitialized());
+  EXPECT_FLOAT_EQ(5.0, mat(0, 0));
+}
+
 TEST(DirectSessionTest, KeepsStateAcrossRunsOfSession) {
   GraphDef def;
   Graph g(OpRegistry::Global());
diff --git a/tensorflow/core/framework/run_handler.cc b/tensorflow/core/framework/run_handler.cc
new file mode 100644
index 0000000000..0c4007eafc
--- /dev/null
+++ b/tensorflow/core/framework/run_handler.cc
@@ -0,0 +1,249 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/framework/run_handler.h"
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/run_handler_util.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+
+// Contains the concrete implementation of the RunHandler.
+// Externally visible RunHandler class simply forwards the work to this one.
+class RunHandler::Impl {
+ public:
+  explicit Impl(RunHandlerPool::Impl* pool_impl) : pool_impl_(pool_impl) {
+    Reset();
+  }
+
+  ~Impl() {}
+
+  void set_inter_op_scheduling_range(std::uint_fast32_t start,
+                                     std::uint_fast32_t limit) {
+    inter_op_scheduling_range_.store(EncodePartition(start, limit),
+                                     std::memory_order_release);
+  }
+
+  std::uint_fast32_t inter_op_scheduling_range() const {
+    return inter_op_scheduling_range_.load(std::memory_order_acquire);
+  }
+
+  // Stores now time (in microseconds) since unix epoch when the handler is
+  // requested via RunHandlerPool::Get().
+  uint64 start_time_us() const { return start_time_us_; }
+
+  void ScheduleInterOpClosure(std::function<void()> fn);
+
+  void Reset();
+
+  RunHandlerPool::Impl* pool_impl() { return pool_impl_; }
+
+ private:
+  // Encoding/decoding logic for storing [start, limit) into a single
+  // uint_fast32_t int. We assume that pool_num_threads < (1 << 16).
+  const int kMaxPartitionBits = 16;
+  const int kMaxThreads = 1 << kMaxPartitionBits;
+
+  std::uint_fast32_t EncodePartition(std::uint_fast32_t start,
+                                     std::uint_fast32_t limit) {
+    return (start << kMaxPartitionBits) | limit;
+  }
+
+  void DecodePartition(std::uint_fast32_t val, std::uint_fast32_t* start,
+                       std::uint_fast32_t* limit) {
+    *limit = val & (kMaxThreads - 1);
+    val >>= kMaxPartitionBits;
+    *start = val;
+  }
+
+  std::atomic_uint_fast32_t inter_op_scheduling_range_;
+  RunHandlerPool::Impl* pool_impl_;  // NOT OWNED.
+  uint64 start_time_us_;
+};
+
+// Contains shared state across all run handlers present in the pool. Also
+// responsible for pool management decisions.
+// This class is thread safe.
+class RunHandlerPool::Impl {
+ public:
+  explicit Impl(int num_inter_op_threads)
+      : max_handlers_(128),
+        inter_op_thread_pool_(new thread::ThreadPool(
+            Env::Default(), ThreadOptions(), "inter_op", num_inter_op_threads)),
+        iterations_(0) {
+    VLOG(1) << "Creating a RunHandlerPool with max handlers: " << max_handlers_;
+    for (int i = 0; i < max_handlers_; ++i) {
+      handlers_.emplace_back(new RunHandler::Impl(this));
+      free_handlers_.push_back(handlers_.back().get());
+    }
+  }
+
+  ~Impl() {
+    // Sanity check that all handlers have been returned back to the pool before
+    // destruction.
+    DCHECK_EQ(handlers_.size(), max_handlers_);
+    DCHECK_EQ(free_handlers_.size(), handlers_.size());
+    DCHECK_EQ(sorted_active_handlers_.size(), 0);
+  }
+
+  thread::ThreadPool* inter_op_thread_pool() const {
+    return inter_op_thread_pool_.get();
+  }
+
+  std::unique_ptr<RunHandler> Get() LOCKS_EXCLUDED(mu_) {
+    mutex_lock l(mu_);
+    while (free_handlers_.empty()) {
+      one_handler_free_.wait(l);
+    }
+    // Remove the last entry from free_handlers_ and add to the end of
+    // sorted_active_handlers_.
+    auto* handler_impl = free_handlers_.back();
+    handler_impl->Reset();
+    // Sortedness isn't violated if we simply add at the end of the list, since
+    // handlers are expected to be obtained in increasing order of time.
+    sorted_active_handlers_.push_back(handler_impl);
+    DCHECK_LE(sorted_active_handlers_.size(), max_handlers_);
+    free_handlers_.pop_back();
+
+    RecomputePoolStatsLocked();
+    return WrapUnique<RunHandler>(new RunHandler(handler_impl));
+  }
+
+  void ReleaseHandler(RunHandler::Impl* handler) LOCKS_EXCLUDED(mu_) {
+    {
+      mutex_lock l(mu_);
+      DCHECK_GT(sorted_active_handlers_.size(), 0);
+
+      uint64 now = tensorflow::Env::Default()->NowMicros();
+      double elapsed = (now - handler->start_time_us()) / 1000.0;
+      time_hist_.Add(elapsed);
+
+      // Erase from and update sorted_active_handlers_. Add it to the end of
+      // free_handlers_.
+      auto iter = std::find(sorted_active_handlers_.begin(),
+                            sorted_active_handlers_.end(), handler);
+      DCHECK(iter != sorted_active_handlers_.end())
+          << "Unexpected handler: " << handler
+          << " is being requested for release";
+
+      // Remove this handler from this list and add it to the list of free
+      // handlers.
+      sorted_active_handlers_.erase(iter);
+      free_handlers_.push_back(handler);
+      DCHECK_LE(free_handlers_.size(), max_handlers_);
+
+      RecomputePoolStatsLocked();
+    }
+    one_handler_free_.notify_one();
+  }
+
+ private:
+  void RecomputePoolStatsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Maximum number of handlers pre-created during pool construction time. The
+  // number has been chosen expecting each handler might at least want 1
+  // inter-op thread for execution (during compute intensive workloads like
+  // inference).
+  const int max_handlers_;
+
+  // Thread safe part.
+  const std::unique_ptr<thread::ThreadPool> inter_op_thread_pool_;
+
+  // Thread compatible part used only by lock under RunHandlerPool.
+  // Handlers are sorted by start time.
+  std::vector<RunHandler::Impl*> sorted_active_handlers_ GUARDED_BY(mu_);
+  std::vector<RunHandler::Impl*> free_handlers_ GUARDED_BY(mu_);
+  std::vector<std::unique_ptr<RunHandler::Impl>> handlers_ GUARDED_BY(mu_);
+  // Histogram of elapsed runtime of every handler (in ms).
+  histogram::Histogram time_hist_ GUARDED_BY(mu_);
+  std::vector<std::uint_fast32_t> inter_op_start_ GUARDED_BY(mu_);
+  std::vector<std::uint_fast32_t> inter_op_limit_ GUARDED_BY(mu_);
+  int64 iterations_ GUARDED_BY(mu_);
+  condition_variable one_handler_free_;
+  mutex mu_;
+};
+
+void RunHandlerPool::Impl::RecomputePoolStatsLocked() {
+  int num_active_requests = sorted_active_handlers_.size();
+  if (num_active_requests == 0) return;
+
+  int num_threads = inter_op_thread_pool_->NumThreads();
+
+  inter_op_start_.resize(num_active_requests);
+  inter_op_limit_.resize(num_active_requests);
+
+  const int kMinThreadsPerRequest = 3;
+  ComputeInterOpSchedulingRanges(num_active_requests, num_threads,
+                                 kMinThreadsPerRequest, &inter_op_start_,
+                                 &inter_op_limit_);
+
+  for (int i = 0; i < num_active_requests; ++i) {
+    sorted_active_handlers_[i]->set_inter_op_scheduling_range(
+        inter_op_start_[i], inter_op_limit_[i]);
+  }
+
+  if (iterations_++ % 5000 == 0 && VLOG_IS_ON(1)) {
+    VLOG(1) << "Printing time histogram: " << time_hist_.ToString();
+    VLOG(1) << "Active session runs: " << num_active_requests;
+    uint64 now = tensorflow::Env::Default()->NowMicros();
+    string ranges_str = "";
+    string times_str = "";
+    for (int i = 0; i < num_active_requests; ++i) {
+      if (i > 0) {
+        times_str += " ";
+        ranges_str += " ";
+      }
+
+      times_str += strings::StrCat(
+          (now - sorted_active_handlers_[i]->start_time_us()) / 1000.0, " ms.");
+      ranges_str += strings::StrCat("[", inter_op_start_[i], ", ",
+                                    inter_op_limit_[i], ")");
+    }
+    VLOG(1) << "Elapsed times are: " << times_str;
+    VLOG(1) << "Ranges are: " << ranges_str;
+  }
+}
+
+void RunHandler::Impl::ScheduleInterOpClosure(std::function<void()> fn) {
+  std::uint_fast32_t start = 0, limit = 0;
+  DecodePartition(inter_op_scheduling_range(), &start, &limit);
+  pool_impl_->inter_op_thread_pool()->Schedule(std::move(fn));
+}
+
+void RunHandler::Impl::Reset() {
+  set_inter_op_scheduling_range(
+      0, pool_impl_->inter_op_thread_pool()->NumThreads());
+  start_time_us_ = tensorflow::Env::Default()->NowMicros();
+}
+
+RunHandlerPool::RunHandlerPool(int num_inter_op_threads)
+    : impl_(new Impl(num_inter_op_threads)) {}
+
+RunHandlerPool::~RunHandlerPool() {}
+
+std::unique_ptr<RunHandler> RunHandlerPool::Get() { return impl_->Get(); }
+
+RunHandler::RunHandler(Impl* impl) : impl_(impl) {}
+
+void RunHandler::ScheduleInterOpClosure(std::function<void()> fn) {
+  impl_->ScheduleInterOpClosure(std::move(fn));
+}
+
+RunHandler::~RunHandler() { impl_->pool_impl()->ReleaseHandler(impl_); }
+}  // namespace tensorflow
diff --git a/tensorflow/core/framework/run_handler.h b/tensorflow/core/framework/run_handler.h
new file mode 100644
index 0000000000..72fa6301b4
--- /dev/null
+++ b/tensorflow/core/framework/run_handler.h
@@ -0,0 +1,95 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
+#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
+
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+
+class RunHandler;
+
+// RunHandlerPool is a fixed size pool of pre-allocated RunHandlers
+// that can be used for tracking inter-op work for a given Session::Run().
+// RunHandler(s) in the pool are initially 'inactive'. A RunHandler becomes
+// 'active' when its unique_ptr is returned by Get() and is being used by a
+// client. It becomes 'inactive' once more when its unique_ptr gets destroyed.
+//
+// Expected usage:
+//
+// * Create a single RunHandlerPool (say run_handler_pool_).
+//
+// * When a Session::Run() is invoked, obtain a handler by:
+// auto handler = run_handler_pool_->Get();
+//
+// * Use handler for scheduling all inter-op work by:
+// handler->ScheduleInterOpClosure(closure);
+//
+// This class is thread safe.
+class RunHandlerPool {
+ public:
+  explicit RunHandlerPool(int num_inter_op_threads);
+  ~RunHandlerPool();
+
+  // Returns an inactive RunHandler from the pool.
+  //
+  // RunHandlers in RunHandlerPool are initially 'inactive'.
+  // A RunHandler becomes 'active' when its unique_ptr its returned by Get()
+  // and is being used by a client.  It becomes 'inactive' once more when the
+  // unique_ptr is destroyed.
+  //
+  // Will block unless there is an inactive handler.
+  std::unique_ptr<RunHandler> Get();
+
+ private:
+  class Impl;
+  friend class RunHandler;
+
+  std::unique_ptr<Impl> impl_;
+};
+
+// RunHandler can be used to schedule inter-op closures to run on a global pool
+// shared across all Session::Run(s).
+//
+// It can only be created via RunHandlerPool::Get().
+//
+// This class can be used instead of directly scheduling closures on a global
+// pool since it maintains a global view across all sessions and optimizes pool
+// scheduling to improve (median and tail) latency.
+//
+// This class is thread safe.
+class RunHandler {
+ public:
+  void ScheduleInterOpClosure(std::function<void()> fn);
+
+  ~RunHandler();
+
+ private:
+  class Impl;
+  friend class RunHandlerPool::Impl;
+
+  explicit RunHandler(Impl* impl);
+
+  Impl* impl_;  // NOT OWNED.
+};
+
+}  // end namespace tensorflow.
+
+#endif  // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_
diff --git a/tensorflow/core/framework/run_handler_util.cc b/tensorflow/core/framework/run_handler_util.cc
new file mode 100644
index 0000000000..3087998c69
--- /dev/null
+++ b/tensorflow/core/framework/run_handler_util.cc
@@ -0,0 +1,57 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/run_handler_util.h"
+
+#include <algorithm>
+#include <cmath>
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads,
+                                    int min_threads_per_request,
+                                    std::vector<std::uint_fast32_t>* start_vec,
+                                    std::vector<std::uint_fast32_t>* end_vec) {
+  // Each request is expected to have weight W[i] = num_active_requests - i.
+  // Therefore, total_weight = sum of all request weights.
+  float total_weight = 0.5f * num_active_requests * (num_active_requests + 1);
+  float demand_factor = static_cast<float>(num_threads) / total_weight;
+  float last_cumulative_weight = 0.0;
+  min_threads_per_request = std::max(1, min_threads_per_request);
+  for (int i = 0; i != num_active_requests; i++) {
+    float cumulative_weight =
+        static_cast<float>(i + 1) *
+        (num_active_requests - static_cast<float>(i) * 0.5f);
+    float weight = cumulative_weight - last_cumulative_weight;
+    // Quantize thread_demand by rounding up, and also satisfying
+    // `min_threads_per_request` constraint.
+    // Note: We subtract a small epsilon (0.00001) to prevent ceil(..) from
+    // rounding weights like 4.0 to 5.
+    int demand =
+        std::max(min_threads_per_request,
+                 static_cast<int>(ceil(weight * demand_factor - 0.00001f)));
+    // For the quantized range [start, end); compute the floor of real start,
+    // and expand downwards from there with length `demand` and adjust for
+    // boundary conditions.
+    int start = last_cumulative_weight * demand_factor;
+    int end = std::min(num_threads, start + demand);
+    start = std::max(0, std::min(start, end - demand));
+    start_vec->at(i) = start;
+    end_vec->at(i) = end;
+    last_cumulative_weight = cumulative_weight;
+  }
+}
+}  // namespace tensorflow
diff --git a/tensorflow/core/framework/run_handler_util.h b/tensorflow/core/framework/run_handler_util.h
new file mode 100644
index 0000000000..c0c36aeccb
--- /dev/null
+++ b/tensorflow/core/framework/run_handler_util.h
@@ -0,0 +1,43 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
+#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
+
+#include <cstdint>
+#include <vector>
+
+namespace tensorflow {
+
+// Assign thread ranges to requests.
+// Requests are numbered 0...num_active_requests-1, and
+// threads are numbered 0...num_threads-1.
+// On return, the range start_vec->at(i)...end_vec->at(i)-1
+// indicates the subrange of the threads available to request i.
+// The ranges given to different requests may overlap.
+// Lower numbered requests will tend to be assigned more threads.
+// Thus, a client might associate older requests with lower
+// array indices so they receive access to more threads.
+// However, the routine ensures that each request is given access
+// to at least min(min_threads_per_request, num_threads)  threads.
+// Every thread will be assigned to at least one request range,
+// assuming there is at least one request.
+void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads,
+                                    int min_threads_per_request,
+                                    std::vector<std::uint_fast32_t>* start_vec,
+                                    std::vector<std::uint_fast32_t>* end_vec);
+
+}  // end namespace tensorflow
+#endif  // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_
diff --git a/tensorflow/core/framework/run_handler_util_test.cc b/tensorflow/core/framework/run_handler_util_test.cc
new file mode 100644
index 0000000000..a1928c132b
--- /dev/null
+++ b/tensorflow/core/framework/run_handler_util_test.cc
@@ -0,0 +1,93 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/run_handler_util.h"
+
+#include <vector>
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+namespace tensorflow {
+namespace {
+
+void VerifyFunction(int num_active_requests, int num_threads,
+                    int min_threads_per_request, bool print_stats = false) {
+  if (print_stats) {
+    LOG(INFO) << "Test case# num_active_requests: " << num_active_requests
+              << " num_threads: " << num_threads
+              << " min_threads: " << min_threads_per_request;
+  }
+  std::vector<std::uint_fast32_t> start(num_active_requests);
+  std::vector<std::uint_fast32_t> end(num_active_requests);
+
+  ComputeInterOpSchedulingRanges(num_active_requests, num_threads,
+                                 min_threads_per_request, &start, &end);
+  string range_str = "";
+  for (int i = 0; i < num_active_requests; ++i) {
+    if (i > 0) range_str += " ";
+    range_str += strings::StrCat("[", start[i], ", ", end[i], ")");
+
+    ASSERT_GE(start[i], 0) << range_str;
+    ASSERT_LE(end[i], num_threads) << range_str;
+    if (i > 0) {
+      // Due to linearly decreasing demand, #threads(i - 1) >= #threads(i)
+      ASSERT_GE(end[i - 1] - start[i - 1], end[i] - start[i]) << range_str;
+      // No missing threads.
+      ASSERT_GE(end[i - 1], start[i]) << range_str;
+    }
+    // Each interval is at least of size 'min_threads_per_request'.
+    ASSERT_GE((end[i] - start[i]), min_threads_per_request) << range_str;
+    // Verify that assigned (quantized) threads is not overly estimated
+    // from real demand, when the demand is high (>=
+    // min_threads_per_request).
+    float entry_weight = num_active_requests - i;
+    float total_weight = 0.5f * num_active_requests * (num_active_requests + 1);
+    float thread_demand = (entry_weight * num_threads) / total_weight;
+    if (thread_demand > min_threads_per_request) {
+      // We expect some over-estimation of threads due to quantization,
+      // but we hope it's not more than 1 extra thread.
+      ASSERT_NEAR(end[i] - start[i], thread_demand, 1.0)
+          << "Ranges: " << range_str << " thread_demand: " << thread_demand
+          << " i: " << i;
+    }
+  }
+  ASSERT_EQ(end[num_active_requests - 1], num_threads);
+  ASSERT_EQ(start[0], 0);
+  if (print_stats) {
+    LOG(INFO) << "Assigned ranges: " << range_str;
+  }
+}
+
+TEST(RunHandlerUtilTest, TestComputeInterOpSchedulingRanges) {
+  const int kMinThreadsPerRequestBound = 12;
+  const int kMaxActiveRequests = 128;
+  const int kMaxThreads = 128;
+
+  for (int min_threads_per_request = 1;
+       min_threads_per_request <= kMinThreadsPerRequestBound;
+       ++min_threads_per_request) {
+    for (int num_active_requests = 1; num_active_requests <= kMaxActiveRequests;
+         ++num_active_requests) {
+      for (int num_threads = min_threads_per_request;
+           num_threads <= kMaxThreads; ++num_threads) {
+        VerifyFunction(num_active_requests, num_threads,
+                       min_threads_per_request);
+      }
+    }
+  }
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 85cd02350a..104ab039cb 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -453,6 +453,11 @@ message RunOptions {
     // same group_key value (in a distributed computation where tasks
     // run disjoint graphs).
     int64 collective_graph_key = 1;
+    // If true, then operations (using the inter-op pool) across all
+    // session::run() calls will be centrally scheduled, optimizing for (median
+    // and tail) latency.
+    // Consider using this option for CPU-bound workloads like inference.
+    bool use_run_handler_pool = 2;
   };
 
   Experimental experimental = 8;
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
index 537e73aa89..47b5b56faf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
@@ -8,5 +8,11 @@ tf_proto {
       label: LABEL_OPTIONAL
       type: TYPE_INT64
     }
+    field {
+      name: "use_run_handler_pool"
+      number: 2
+      label: LABEL_OPTIONAL
+      type: TYPE_BOOL
+    }
   }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
index cec04a2bf0..c0c2e7b9f8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
@@ -55,6 +55,12 @@ tf_proto {
         label: LABEL_OPTIONAL
         type: TYPE_INT64
       }
+      field {
+        name: "use_run_handler_pool"
+        number: 2
+        label: LABEL_OPTIONAL
+        type: TYPE_BOOL
+      }
     }
     enum_type {
       name: "TraceLevel"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
index 537e73aa89..47b5b56faf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt
@@ -8,5 +8,11 @@ tf_proto {
       label: LABEL_OPTIONAL
       type: TYPE_INT64
     }
+    field {
+      name: "use_run_handler_pool"
+      number: 2
+      label: LABEL_OPTIONAL
+      type: TYPE_BOOL
+    }
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
index cec04a2bf0..c0c2e7b9f8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt
@@ -55,6 +55,12 @@ tf_proto {
         label: LABEL_OPTIONAL
         type: TYPE_INT64
       }
+      field {
+        name: "use_run_handler_pool"
+        number: 2
+        label: LABEL_OPTIONAL
+        type: TYPE_BOOL
+      }
     }
     enum_type {
       name: "TraceLevel"
-- 
GitLab


From 17d73444f332490c733d37063710e72dc69d1141 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 14:10:19 -0700
Subject: [PATCH 0880/1357] Update hooks for distributed jobs with a master
 node, to ensure that summaries are written at the correct interval for jobs
 with long-running evaluations.

PiperOrigin-RevId: 214993119
---
 tensorflow/python/estimator/estimator.py      | 34 ++++++-
 tensorflow/python/estimator/estimator_test.py | 94 +++++++++++++++++++
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index b933cedb99..34faf03bb0 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1414,6 +1414,36 @@ class Estimator(object):
         # It is expected to have one CheckpointSaverHook. If multiple, we pick
         # up the first one to add listener.
         saver_hooks[0]._listeners.extend(saving_listeners)  # pylint: disable=protected-access
+
+    # Add summary hooks to worker 0 if we are running with a master, to ensure
+    # that summaries are written at correct intervals even with long-running
+    # evaluations.
+    save_summary_steps = self._config.save_summary_steps
+    log_step_count_steps = self._config.log_step_count_steps
+    if (self._config.cluster_spec and self._config.cluster_spec.jobs and
+        (run_config.TaskType.MASTER in self._config.cluster_spec.jobs)):
+      # Update config values to prevent the default hooks from being created on
+      # the master or other workers.
+      save_summary_steps = 0
+      log_step_count_steps = None
+
+      if (self._config.task_type == run_config.TaskType.WORKER and
+          self._config.task_id == 0):
+        if (self._config.save_summary_steps and
+            self._config.save_summary_steps > 0):
+          worker_hooks.append(
+              training.SummarySaverHook(
+                  save_steps=self._config.save_summary_steps,
+                  output_dir=self._config.model_dir,
+                  scaffold=estimator_spec.scaffold))
+
+        if (self._config.log_step_count_steps and
+            self._config.log_step_count_steps > 0):
+          worker_hooks.append(
+              training.StepCounterHook(
+                  every_n_steps=self._config.log_step_count_steps,
+                  output_dir=self._config.model_dir))
+
     with training.MonitoredTrainingSession(
         master=self._config.master,
         is_chief=self._config.is_chief,
@@ -1423,9 +1453,9 @@ class Estimator(object):
         chief_only_hooks=(
             tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)),
         save_checkpoint_secs=0,  # Saving is handled by a hook.
-        save_summaries_steps=self._config.save_summary_steps,
+        save_summaries_steps=save_summary_steps,
         config=self._session_config,
-        log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
+        log_step_count_steps=log_step_count_steps) as mon_sess:
       loss = None
       while not mon_sess.should_stop():
         _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index bc2504ca19..246dfb1a4b 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import functools
 import glob
+import json
 import os
 import tempfile
 
@@ -969,6 +970,99 @@ class EstimatorTrainTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'train_and_evaluate'):
       est.train(dummy_input_fn, steps=1)
 
+  def test_master_distributed_hooks(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_distributed_hooks_for_worker_0(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.WORKER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertTrue(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertTrue(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_distributed_hooks_for_worker_nonzero(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235', 'localhost:1237'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.WORKER,
+            'index': 1
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
 
 def _model_fn_with_eval_metric_ops(features, labels, mode, params):
   _, _ = features, labels
-- 
GitLab


From 5863cad53afad2fcc5d8a8dac7c2cf88e0e8ebb9 Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Fri, 28 Sep 2018 14:36:16 -0700
Subject: [PATCH 0881/1357] Copy shape into CollectiveParams only once per
 CollectiveReduce kernel.

PiperOrigin-RevId: 214997213
---
 tensorflow/core/kernels/collective_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc
index fa959b5a0e..82e2913b64 100644
--- a/tensorflow/core/kernels/collective_ops.cc
+++ b/tensorflow/core/kernels/collective_ops.cc
@@ -132,7 +132,6 @@ class CollectiveReduceOpKernel : public CollectiveOpKernel {
             "Failed to get CollectiveExecutor from OpKernelContext for Op ",
             col_params_.name),
         done);
-    col_params_.instance.shape = c->input(0).shape();
     // Allocate output on the first pass through this function.  This must be
     // done immediately, while we're still in the executor thread.  Otherwise
     // the memory is not guaranteed to be unused by any concurrently executing
@@ -144,6 +143,7 @@ class CollectiveReduceOpKernel : public CollectiveOpKernel {
                            c->forward_input_or_allocate_output(
                                {0}, 0, c->input(0).shape(), &output),
                            done);
+      col_params_.instance.shape = c->input(0).shape();
     }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
     auto actual_done = [c, col_exec, done](const Status& s) {
-- 
GitLab


From dee0481c07ed952d01b12704c89e50869a383c68 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Fri, 28 Sep 2018 15:07:29 -0700
Subject: [PATCH 0882/1357] Adding FeatureColumn V2 support for linear canned
 estimators.

Since we now have support for FeatureColumnV2 for both DNN and Linear models,
adding tests for the combined canned estimators as well.

PiperOrigin-RevId: 215002573
---
 tensorflow/python/estimator/BUILD             |   1 +
 tensorflow/python/estimator/canned/dnn.py     |   3 -
 .../canned/dnn_linear_combined_test.py        | 268 ++++++++++++++----
 tensorflow/python/estimator/canned/linear.py  |  83 ++++--
 .../python/estimator/canned/linear_test.py    | 138 ++++++++-
 .../estimator/canned/linear_testing_utils.py  | 184 +++++++-----
 tensorflow/python/feature_column/BUILD        |   2 +-
 .../feature_column/feature_column_v2.py       | 100 ++++---
 .../feature_column/feature_column_v2_test.py  |   4 +-
 9 files changed, 579 insertions(+), 204 deletions(-)

diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index ba1b7ec2b5..1c4c5951df 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -344,6 +344,7 @@ py_test(
         ":pandas_io",
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
index 97971f9561..a6c2aaa7d9 100644
--- a/tensorflow/python/estimator/canned/dnn.py
+++ b/tensorflow/python/estimator/canned/dnn.py
@@ -131,9 +131,7 @@ class _DNNModel(training.Model):
                name=None,
                **kwargs):
     super(_DNNModel, self).__init__(name=name, **kwargs)
-    self._is_v2 = False
     if feature_column_v2.is_feature_column_v2(feature_columns):
-      self._is_v2 = True
       self._input_layer = feature_column_v2.FeatureLayer(
           feature_columns=feature_columns,
           name='input_layer',
@@ -190,7 +188,6 @@ class _DNNModel(training.Model):
           _scope=logits_scope)
       self._add_layer(self._logits_layer, logits_scope.name)
       self._logits_scope_name = logits_scope.name
-    self._logits_layer._use_resource_variables = False  # pylint: disable=protected-access
     self._input_layer_partitioner = input_layer_partitioner
 
   def call(self, features, mode):
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
index d16318659b..ae968e717a 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import shutil
 import tempfile
 
+from absl.testing import parameterized
 import numpy as np
 import six
 
@@ -35,6 +36,7 @@ from tensorflow.python.estimator.export import export
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.estimator.inputs import pandas_io
 from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import nn
@@ -119,7 +121,16 @@ class LinearOnlyRegressorPartitionerTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorPartitionerV2Test(
+    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearOnlyRegressorEvaluationTest(
@@ -128,7 +139,16 @@ class LinearOnlyRegressorEvaluationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorEvaluationV2Test(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearOnlyRegressorPredictTest(
@@ -137,7 +157,16 @@ class LinearOnlyRegressorPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorPredictV2Test(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearOnlyRegressorIntegrationTest(
@@ -146,7 +175,16 @@ class LinearOnlyRegressorIntegrationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorIntegrationV2Test(
+    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearOnlyRegressorTrainingTest(
@@ -155,7 +193,16 @@ class LinearOnlyRegressorTrainingTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorTrainingV2Test(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 def _linear_classifier_fn(feature_columns,
@@ -185,7 +232,18 @@ class LinearOnlyClassifierTrainingTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierTrainingV2Test(
+    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
 class LinearOnlyClassifierClassesEvaluationTest(
@@ -194,7 +252,18 @@ class LinearOnlyClassifierClassesEvaluationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierClassesEvaluationV2Test(
+    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
 class LinearOnlyClassifierPredictTest(
@@ -203,7 +272,18 @@ class LinearOnlyClassifierPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierPredictV2Test(
+    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
 class LinearOnlyClassifierIntegrationTest(
@@ -212,9 +292,21 @@ class LinearOnlyClassifierIntegrationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierIntegrationV2Test(
+    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
+@parameterized.parameters((feature_column,), (feature_column_v2,))
 class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
 
   def setUp(self):
@@ -225,13 +317,15 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
       writer_cache.FileWriterCache.clear()
       shutil.rmtree(self._model_dir)
 
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      label_dimension, batch_size):
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          fc_impl):
     linear_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
     dnn_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
     feature_columns = linear_feature_columns + dnn_feature_columns
     est = dnn_linear_combined.DNNLinearCombinedRegressor(
         linear_feature_columns=linear_feature_columns,
@@ -257,14 +351,14 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, label_dimension), predictions.shape)
 
     # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self):
+  def test_numpy_input_fn(self, fc_impl):
     """Tests complete flow with numpy_input_fn."""
     label_dimension = 2
     batch_size = 10
@@ -293,9 +387,10 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_pandas_input_fn(self):
+  def test_pandas_input_fn(self, fc_impl):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -326,9 +421,10 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_input_fn_from_parse_example(self):
+  def test_input_fn_from_parse_example(self, fc_impl):
     """Tests complete flow with input_fn constructed from parse_example."""
     label_dimension = 2
     batch_size = 10
@@ -376,7 +472,8 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=label_dimension,
         label_dimension=label_dimension,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
 
 # A function to mimic dnn-classifier init reuse same tests.
@@ -407,7 +504,16 @@ class DNNOnlyClassifierEvaluateTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn)
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNOnlyClassifierEvaluateV2Test(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
 
 
 class DNNOnlyClassifierPredictTest(
@@ -416,7 +522,16 @@ class DNNOnlyClassifierPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn)
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNOnlyClassifierPredictV2Test(
+    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
 
 
 class DNNOnlyClassifierTrainTest(
@@ -425,7 +540,16 @@ class DNNOnlyClassifierTrainTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn)
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNOnlyClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
+                                   test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
 
 
 # A function to mimic dnn-regressor init reuse same tests.
@@ -454,7 +578,16 @@ class DNNOnlyRegressorEvaluateTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn)
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNOnlyRegressorEvaluateV2Test(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
 
 
 class DNNOnlyRegressorPredictTest(
@@ -463,7 +596,16 @@ class DNNOnlyRegressorPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn)
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNOnlyRegressorPredictV2Test(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
 
 
 class DNNOnlyRegressorTrainTest(
@@ -472,9 +614,19 @@ class DNNOnlyRegressorTrainTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn)
+        self, _dnn_regressor_fn, fc_impl=feature_column)
 
 
+class DNNOnlyRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
+                                  test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+@parameterized.parameters((feature_column,), (feature_column_v2,))
 class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
 
   def setUp(self):
@@ -488,13 +640,14 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
   def _as_label(self, data_in_float):
     return np.rint(data_in_float).astype(np.int64)
 
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      n_classes, batch_size):
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, n_classes, batch_size, fc_impl):
     linear_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
     dnn_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
     feature_columns = linear_feature_columns + dnn_feature_columns
     est = dnn_linear_combined.DNNLinearCombinedClassifier(
         linear_feature_columns=linear_feature_columns,
@@ -520,14 +673,14 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
 
     # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self):
+  def test_numpy_input_fn(self, fc_impl):
     """Tests complete flow with numpy_input_fn."""
     n_classes = 3
     input_dimension = 2
@@ -559,9 +712,10 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_pandas_input_fn(self):
+  def test_pandas_input_fn(self, fc_impl):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -593,9 +747,10 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
         predict_input_fn=predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
-  def test_input_fn_from_parse_example(self):
+  def test_input_fn_from_parse_example(self, fc_impl):
     """Tests complete flow with input_fn constructed from parse_example."""
     input_dimension = 2
     n_classes = 3
@@ -647,9 +802,11 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
         predict_input_fn=_predict_input_fn,
         input_dimension=input_dimension,
         n_classes=n_classes,
-        batch_size=batch_size)
+        batch_size=batch_size,
+        fc_impl=fc_impl)
 
 
+@parameterized.parameters((feature_column,), (feature_column_v2,))
 class DNNLinearCombinedTests(test.TestCase):
 
   def setUp(self):
@@ -681,9 +838,9 @@ class DNNLinearCombinedTests(test.TestCase):
 
     return optimizer_mock
 
-  def test_train_op_calls_both_dnn_and_linear(self):
+  def test_train_op_calls_both_dnn_and_linear(self, fc_impl):
     opt = gradient_descent.GradientDescentOptimizer(1.)
-    x_column = feature_column.numeric_column('x')
+    x_column = fc_impl.numeric_column('x')
     input_fn = numpy_io.numpy_input_fn(
         x={'x': np.array([[0.], [1.]])},
         y=np.array([[0.], [1.]]),
@@ -708,7 +865,7 @@ class DNNLinearCombinedTests(test.TestCase):
                      checkpoint_utils.load_variable(
                          self._model_dir, 'dnn_called'))
 
-  def test_dnn_and_linear_logits_are_added(self):
+  def test_dnn_and_linear_logits_are_added(self, fc_impl):
     with ops.Graph().as_default():
       variables_lib.Variable([[1.0]], name='linear/linear_model/x/weights')
       variables_lib.Variable([2.0], name='linear/linear_model/bias_weights')
@@ -719,7 +876,7 @@ class DNNLinearCombinedTests(test.TestCase):
       variables_lib.Variable(1, name='global_step', dtype=dtypes.int64)
       linear_testing_utils.save_variables_to_ckpt(self._model_dir)
 
-    x_column = feature_column.numeric_column('x')
+    x_column = fc_impl.numeric_column('x')
     est = dnn_linear_combined.DNNLinearCombinedRegressor(
         linear_feature_columns=[x_column],
         dnn_hidden_units=[1],
@@ -737,6 +894,7 @@ class DNNLinearCombinedTests(test.TestCase):
         next(est.predict(input_fn=input_fn)))
 
 
+@parameterized.parameters((feature_column,), (feature_column_v2,))
 class DNNLinearCombinedWarmStartingTest(test.TestCase):
 
   def setUp(self):
@@ -758,11 +916,11 @@ class DNNLinearCombinedWarmStartingTest(test.TestCase):
     writer_cache.FileWriterCache.clear()
     shutil.rmtree(self._ckpt_and_vocab_dir)
 
-  def test_classifier_basic_warm_starting(self):
+  def test_classifier_basic_warm_starting(self, fc_impl):
     """Tests correctness of DNNLinearCombinedClassifier default warm-start."""
-    age = feature_column.numeric_column('age')
-    city = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    age = fc_impl.numeric_column('age')
+    city = fc_impl.embedding_column(
+        fc_impl.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
 
@@ -798,11 +956,11 @@ class DNNLinearCombinedWarmStartingTest(test.TestCase):
           dnn_lc_classifier.get_variable_value(variable_name),
           warm_started_dnn_lc_classifier.get_variable_value(variable_name))
 
-  def test_regressor_basic_warm_starting(self):
+  def test_regressor_basic_warm_starting(self, fc_impl):
     """Tests correctness of DNNLinearCombinedRegressor default warm-start."""
-    age = feature_column.numeric_column('age')
-    city = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    age = fc_impl.numeric_column('age')
+    city = fc_impl.embedding_column(
+        fc_impl.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
 
@@ -836,11 +994,11 @@ class DNNLinearCombinedWarmStartingTest(test.TestCase):
           dnn_lc_regressor.get_variable_value(variable_name),
           warm_started_dnn_lc_regressor.get_variable_value(variable_name))
 
-  def test_warm_starting_selective_variables(self):
+  def test_warm_starting_selective_variables(self, fc_impl):
     """Tests selecting variables to warm-start."""
-    age = feature_column.numeric_column('age')
-    city = feature_column.embedding_column(
-        feature_column.categorical_column_with_vocabulary_list(
+    age = fc_impl.numeric_column('age')
+    city = fc_impl.embedding_column(
+        fc_impl.categorical_column_with_vocabulary_list(
             'city', vocabulary_list=['Mountain View', 'Palo Alto']),
         dimension=5)
 
diff --git a/tensorflow/python/estimator/canned/linear.py b/tensorflow/python/estimator/canned/linear.py
index 115dd18518..8b96284bd3 100644
--- a/tensorflow/python/estimator/canned/linear.py
+++ b/tensorflow/python/estimator/canned/linear.py
@@ -25,14 +25,18 @@ import six
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variable_ops
 from tensorflow.python.ops.losses import losses
 from tensorflow.python.summary import summary
 from tensorflow.python.training import ftrl
+from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import estimator_export
 
 
@@ -46,23 +50,42 @@ def _get_default_optimizer(feature_columns):
   return ftrl.FtrlOptimizer(learning_rate=learning_rate)
 
 
-def _compute_fraction_of_zero(cols_to_vars):
-  """Given a linear cols_to_vars dict, compute the fraction of zero weights.
+def _get_expanded_variable_list(var_list):
+  """Given a list of variables, expands them if they are partitioned.
 
   Args:
-    cols_to_vars: A dictionary mapping FeatureColumns to lists of tf.Variables
-      like one returned from feature_column_lib.linear_model.
+    var_list: A list of variables.
+
+  Returns:
+    A list of variables where each partitioned variable is expanded to its
+    components.
+  """
+  returned_list = []
+  for variable in var_list:
+    if (isinstance(variable, variable_ops.Variable) or
+        resource_variable_ops.is_resource_variable(variable)):
+      returned_list.append(variable)  # Single variable case.
+    else:  # Must be a PartitionedVariable, so convert into a list.
+      returned_list.extend(list(variable))
+  return returned_list
+
+
+# TODO(rohanj): Consider making this a public utility method.
+def _compute_fraction_of_zero(variables):
+  """Given a linear variables list, compute the fraction of zero weights.
+
+  Args:
+    variables: A list or list of list of variables
 
   Returns:
     The fraction of zeros (sparsity) in the linear model.
   """
   all_weight_vars = []
-  for var_or_var_list in cols_to_vars.values():
+  for var_or_var_list in variables:
+    var_list = nest.flatten(var_or_var_list)
     # Skip empty-lists associated with columns that created no Variables.
-    if var_or_var_list:
-      all_weight_vars += [
-          array_ops.reshape(var, [-1]) for var in var_or_var_list
-      ]
+    if var_list:
+      all_weight_vars += [array_ops.reshape(var, [-1]) for var in var_list]
   return nn.zero_fraction(array_ops.concat(all_weight_vars, axis=0))
 
 
@@ -92,14 +115,36 @@ def _linear_logit_fn_builder(units, feature_columns, sparse_combiner='sum'):
     Returns:
       A `Tensor` representing the logits.
     """
-    cols_to_vars = {}
-    logits = feature_column_lib.linear_model(
-        features=features,
-        feature_columns=feature_columns,
-        units=units,
-        sparse_combiner=sparse_combiner,
-        cols_to_vars=cols_to_vars)
-    bias = cols_to_vars.pop('bias')
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+      linear_model = feature_column_v2.LinearModel(
+          feature_columns=feature_columns,
+          units=units,
+          sparse_combiner=sparse_combiner,
+          shared_state_manager=shared_state_manager)
+      logits = linear_model(features)
+      bias = linear_model.bias_variable
+
+      # We'd like to get all the non-bias variables associated with this
+      # LinearModel. This includes the shared embedding variables as well.
+      variables = linear_model.variables
+      variables.remove(bias)
+      variables.extend(shared_state_manager.variables)
+
+      # Expand (potential) Partitioned variables
+      bias = _get_expanded_variable_list([bias])
+      variables = _get_expanded_variable_list(variables)
+    else:
+      linear_model = feature_column._LinearModel(  # pylint: disable=protected-access
+          feature_columns=feature_columns,
+          units=units,
+          sparse_combiner=sparse_combiner,
+          name='linear_model')
+      logits = linear_model(features)
+      cols_to_vars = linear_model.cols_to_vars()
+      bias = cols_to_vars.pop('bias')
+      variables = cols_to_vars.values()
+
     if units > 1:
       summary.histogram('bias', bias)
     else:
@@ -107,7 +152,7 @@ def _linear_logit_fn_builder(units, feature_columns, sparse_combiner='sum'):
       # so we should provide a scalar summary.
       summary.scalar('bias', bias[0][0])
     summary.scalar('fraction_of_zero_weights',
-                   _compute_fraction_of_zero(cols_to_vars))
+                   _compute_fraction_of_zero(variables))
     return logits
 
   return linear_logit_fn
diff --git a/tensorflow/python/estimator/canned/linear_test.py b/tensorflow/python/estimator/canned/linear_test.py
index 59a230417d..3e6da5de22 100644
--- a/tensorflow/python/estimator/canned/linear_test.py
+++ b/tensorflow/python/estimator/canned/linear_test.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 from tensorflow.python.estimator.canned import linear
 from tensorflow.python.estimator.canned import linear_testing_utils
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.platform import test
 
 
@@ -40,7 +42,16 @@ class LinearRegressorPartitionerTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorPartitionerV2Test(
+    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearRegressorEvaluationTest(
@@ -49,7 +60,16 @@ class LinearRegressorEvaluationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorEvaluationV2Test(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearRegressorPredictTest(
@@ -58,7 +78,16 @@ class LinearRegressorPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorPredictV2Test(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearRegressorIntegrationTest(
@@ -67,7 +96,16 @@ class LinearRegressorIntegrationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorIntegrationV2Test(
+    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
 
 class LinearRegressorTrainingTest(
@@ -76,19 +114,37 @@ class LinearRegressorTrainingTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_regressor_fn)
+        self, _linear_regressor_fn, fc_lib=feature_column)
 
 
-# Tests for Linear Classifier.
+class LinearRegressorTrainingV2Test(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
 
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
 
+
+# Tests for Linear Classifier.
 class LinearClassifierTrainingTest(
     linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
 
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierTrainingV2Test(
+    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
 class LinearClassifierEvaluationTest(
@@ -97,7 +153,18 @@ class LinearClassifierEvaluationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierEvaluationV2Test(
+    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
 class LinearClassifierPredictTest(
@@ -106,7 +173,18 @@ class LinearClassifierPredictTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierPredictV2Test(
+    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
 class LinearClassifierIntegrationTest(
@@ -115,7 +193,18 @@ class LinearClassifierIntegrationTest(
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn)
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierIntegrationV2Test(
+    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
 
 
 # Tests for Linear logit_fn.
@@ -124,7 +213,17 @@ class LinearLogitFnTest(linear_testing_utils.BaseLinearLogitFnTest,
 
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearLogitFnTest.__init__(self)
+    linear_testing_utils.BaseLinearLogitFnTest.__init__(
+        self, fc_lib=feature_column)
+
+
+class LinearLogitFnV2Test(linear_testing_utils.BaseLinearLogitFnTest,
+                          test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearLogitFnTest.__init__(
+        self, fc_lib=feature_column_v2)
 
 
 # Tests for warm-starting with Linear logit_fn.
@@ -134,7 +233,22 @@ class LinearWarmStartingTest(linear_testing_utils.BaseLinearWarmStartingTest,
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     test.TestCase.__init__(self, methodName)
     linear_testing_utils.BaseLinearWarmStartingTest.__init__(
-        self, _linear_classifier_fn, _linear_regressor_fn)
+        self,
+        _linear_classifier_fn,
+        _linear_regressor_fn,
+        fc_lib=feature_column)
+
+
+class LinearWarmStartingV2Test(linear_testing_utils.BaseLinearWarmStartingTest,
+                               test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearWarmStartingTest.__init__(
+        self,
+        _linear_classifier_fn,
+        _linear_regressor_fn,
+        fc_lib=feature_column_v2)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index 65cdd50061..827352a70b 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -37,7 +37,8 @@ from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.export import export
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.estimator.inputs import pandas_io
-from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
@@ -152,8 +153,9 @@ class CheckPartitionerVarHook(session_run_hook.SessionRunHook):
 
 class BaseLinearRegressorPartitionerTest(object):
 
-  def __init__(self, linear_regressor_fn):
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
     self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -173,7 +175,7 @@ class BaseLinearRegressorPartitionerTest(object):
       return [partitions, 1] if shape[0] == x_dim else [1]
 
     regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.categorical_column_with_hash_bucket(
+        feature_columns=(self._fc_lib.categorical_column_with_hash_bucket(
             'language', hash_bucket_size=x_dim),),
         partitioner=_partitioner,
         model_dir=self._model_dir)
@@ -209,9 +211,8 @@ class BaseLinearRegressorPartitionerTest(object):
         '_get_replica_device_setter',
         return_value=lambda _: '/cpu:0'):
       linear_regressor = self._linear_regressor_fn(
-          feature_columns=(
-              feature_column_lib.categorical_column_with_hash_bucket(
-                  'language', hash_bucket_size=x_dim),),
+          feature_columns=(self._fc_lib.categorical_column_with_hash_bucket(
+              'language', hash_bucket_size=x_dim),),
           config=FakeRunConfig(),
           model_dir=self._model_dir)
 
@@ -232,8 +233,9 @@ class BaseLinearRegressorPartitionerTest(object):
 # TODO(b/36813849): Add tests with dynamic shape inputs using placeholders.
 class BaseLinearRegressorEvaluationTest(object):
 
-  def __init__(self, linear_regressor_fn):
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
     self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -252,7 +254,7 @@ class BaseLinearRegressorEvaluationTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         model_dir=self._model_dir)
     eval_metrics = linear_regressor.evaluate(
         input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1)
@@ -276,7 +278,7 @@ class BaseLinearRegressorEvaluationTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         model_dir=self._model_dir)
     eval_metrics = linear_regressor.evaluate(
         input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
@@ -308,7 +310,7 @@ class BaseLinearRegressorEvaluationTest(object):
       return features, labels
 
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         weight_column='weights',
         model_dir=self._model_dir)
     eval_metrics = linear_regressor.evaluate(input_fn=_input_fn, steps=1)
@@ -336,8 +338,7 @@ class BaseLinearRegressorEvaluationTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column(
-            'age', shape=(x_dim,)),),
+        feature_columns=(self._fc_lib.numeric_column('age', shape=(x_dim,)),),
         label_dimension=label_dim,
         model_dir=self._model_dir)
     input_fn = numpy_io.numpy_input_fn(
@@ -374,8 +375,8 @@ class BaseLinearRegressorEvaluationTest(object):
 
     batch_size = 2
     feature_columns = [
-        feature_column_lib.numeric_column('age'),
-        feature_column_lib.numeric_column('height')
+        self._fc_lib.numeric_column('age'),
+        self._fc_lib.numeric_column('height')
     ]
     input_fn = numpy_io.numpy_input_fn(
         x={'age': np.array([20, 40]),
@@ -402,8 +403,9 @@ class BaseLinearRegressorEvaluationTest(object):
 
 class BaseLinearRegressorPredictTest(object):
 
-  def __init__(self, linear_regressor_fn):
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
     self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -422,7 +424,7 @@ class BaseLinearRegressorPredictTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('x'),),
+        feature_columns=(self._fc_lib.numeric_column('x'),),
         model_dir=self._model_dir)
 
     predict_input_fn = numpy_io.numpy_input_fn(
@@ -441,7 +443,7 @@ class BaseLinearRegressorPredictTest(object):
     batch_size = 2
     label_dimension = 3
     x_dim = 4
-    feature_columns = (feature_column_lib.numeric_column('x', shape=(x_dim,)),)
+    feature_columns = (self._fc_lib.numeric_column('x', shape=(x_dim,)),)
     with ops.Graph().as_default():
       variables_lib.Variable(  # shape=[x_dim, label_dimension]
           [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]],
@@ -479,8 +481,8 @@ class BaseLinearRegressorPredictTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('x0'),
-                         feature_column_lib.numeric_column('x1')),
+        feature_columns=(self._fc_lib.numeric_column('x0'),
+                         self._fc_lib.numeric_column('x1')),
         model_dir=self._model_dir)
 
     predict_input_fn = numpy_io.numpy_input_fn(
@@ -515,9 +517,8 @@ class BaseLinearRegressorPredictTest(object):
               dense_shape=[2, 2]),
       })
 
-    feature_columns = (
-        feature_column_lib.categorical_column_with_vocabulary_list(
-            'language', vocabulary_list=['a', 'b', 'c']),)
+    feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list(
+        'language', vocabulary_list=['a', 'b', 'c']),)
 
     # Check prediction for each sparse_combiner.
     # With sparse_combiner = 'sum', we have
@@ -561,8 +562,9 @@ class BaseLinearRegressorPredictTest(object):
 
 class BaseLinearRegressorIntegrationTest(object):
 
-  def __init__(self, linear_regressor_fn):
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
     self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -575,7 +577,7 @@ class BaseLinearRegressorIntegrationTest(object):
   def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
                           input_dimension, label_dimension, prediction_length):
     feature_columns = [
-        feature_column_lib.numeric_column('x', shape=(input_dimension,))
+        self._fc_lib.numeric_column('x', shape=(input_dimension,))
     ]
     est = self._linear_regressor_fn(
         feature_columns=feature_columns,
@@ -597,7 +599,7 @@ class BaseLinearRegressorIntegrationTest(object):
     self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
 
     # EXPORT
-    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
+    feature_spec = self._fc_lib.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
@@ -729,8 +731,9 @@ class BaseLinearRegressorIntegrationTest(object):
 
 class BaseLinearRegressorTrainingTest(object):
 
-  def __init__(self, linear_regressor_fn):
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
     self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -808,7 +811,7 @@ class BaseLinearRegressorTrainingTest(object):
     label = 5.
     age = 17
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         model_dir=self._model_dir)
 
     # Train for a few steps, and validate final checkpoint.
@@ -820,7 +823,7 @@ class BaseLinearRegressorTrainingTest(object):
   def testTrainWithOneDimLabel(self):
     label_dimension = 1
     batch_size = 20
-    feature_columns = [feature_column_lib.numeric_column('age', shape=(1,))]
+    feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))]
     est = self._linear_regressor_fn(
         feature_columns=feature_columns,
         label_dimension=label_dimension,
@@ -840,7 +843,7 @@ class BaseLinearRegressorTrainingTest(object):
   def testTrainWithOneDimWeight(self):
     label_dimension = 1
     batch_size = 20
-    feature_columns = [feature_column_lib.numeric_column('age', shape=(1,))]
+    feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))]
     est = self._linear_regressor_fn(
         feature_columns=feature_columns,
         label_dimension=label_dimension,
@@ -867,7 +870,7 @@ class BaseLinearRegressorTrainingTest(object):
     # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
     mock_optimizer = self._mock_optimizer(expected_loss=25.)
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         model_dir=self._model_dir,
         optimizer=mock_optimizer)
     self.assertEqual(0, mock_optimizer.minimize.call_count)
@@ -900,7 +903,7 @@ class BaseLinearRegressorTrainingTest(object):
     # loss = (logits - label)^2 = (175 - 5)^2 = 28900
     mock_optimizer = self._mock_optimizer(expected_loss=28900.)
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         model_dir=self._model_dir,
         optimizer=mock_optimizer)
     self.assertEqual(0, mock_optimizer.minimize.call_count)
@@ -935,7 +938,7 @@ class BaseLinearRegressorTrainingTest(object):
     # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004
     mock_optimizer = self._mock_optimizer(expected_loss=52004.)
     linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         model_dir=self._model_dir,
         optimizer=mock_optimizer)
     self.assertEqual(0, mock_optimizer.minimize.call_count)
@@ -954,8 +957,9 @@ class BaseLinearRegressorTrainingTest(object):
 
 class BaseLinearClassifierTrainingTest(object):
 
-  def __init__(self, linear_classifier_fn):
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
     self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1031,7 +1035,7 @@ class BaseLinearClassifierTrainingTest(object):
     label = 0
     age = 17
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         model_dir=self._model_dir)
 
@@ -1051,7 +1055,7 @@ class BaseLinearClassifierTrainingTest(object):
     batch_size = 20
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         model_dir=self._model_dir)
     data_rank_1 = np.array([0, 1])
@@ -1078,7 +1082,7 @@ class BaseLinearClassifierTrainingTest(object):
     batch_size = 20
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         model_dir=self._model_dir)
     data_rank_1 = np.array([0, 1])
@@ -1103,7 +1107,7 @@ class BaseLinearClassifierTrainingTest(object):
     batch_size = 20
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         weight_column='w',
         n_classes=n_classes,
         model_dir=self._model_dir)
@@ -1129,7 +1133,7 @@ class BaseLinearClassifierTrainingTest(object):
     batch_size = 20
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         weight_column='w',
         n_classes=n_classes,
         model_dir=self._model_dir)
@@ -1166,7 +1170,7 @@ class BaseLinearClassifierTrainingTest(object):
         expected_loss=-1 * math.log(1.0/n_classes))
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         optimizer=mock_optimizer,
         model_dir=self._model_dir)
@@ -1229,7 +1233,7 @@ class BaseLinearClassifierTrainingTest(object):
     mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         optimizer=mock_optimizer,
         model_dir=self._model_dir)
@@ -1277,7 +1281,7 @@ class BaseLinearClassifierTrainingTest(object):
     mock_optimizer = self._mock_optimizer(expected_loss=1.1132617)
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         optimizer=mock_optimizer,
         model_dir=self._model_dir)
@@ -1341,7 +1345,7 @@ class BaseLinearClassifierTrainingTest(object):
     mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
 
     est = linear.LinearClassifier(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         optimizer=mock_optimizer,
         model_dir=self._model_dir)
@@ -1368,8 +1372,9 @@ class BaseLinearClassifierTrainingTest(object):
 
 class BaseLinearClassifierEvaluationTest(object):
 
-  def __init__(self, linear_classifier_fn):
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
     self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1398,7 +1403,7 @@ class BaseLinearClassifierEvaluationTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     est = self._linear_classifier_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         model_dir=self._model_dir)
     eval_metrics = est.evaluate(
@@ -1464,7 +1469,7 @@ class BaseLinearClassifierEvaluationTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     est = self._linear_classifier_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         model_dir=self._model_dir)
     eval_metrics = est.evaluate(
@@ -1540,7 +1545,7 @@ class BaseLinearClassifierEvaluationTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     est = self._linear_classifier_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         n_classes=n_classes,
         weight_column='w',
         model_dir=self._model_dir)
@@ -1605,8 +1610,9 @@ class BaseLinearClassifierEvaluationTest(object):
 
 class BaseLinearClassifierPredictTest(object):
 
-  def __init__(self, linear_classifier_fn):
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
     self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1634,7 +1640,7 @@ class BaseLinearClassifierPredictTest(object):
       save_variables_to_ckpt(self._model_dir)
 
     est = self._linear_classifier_fn(
-        feature_columns=(feature_column_lib.numeric_column('age'),),
+        feature_columns=(self._fc_lib.numeric_column('age'),),
         label_vocabulary=label_vocabulary,
         n_classes=n_classes,
         model_dir=self._model_dir)
@@ -1730,9 +1736,8 @@ class BaseLinearClassifierPredictTest(object):
               dense_shape=[2, 2]),
       })
 
-    feature_columns = (
-        feature_column_lib.categorical_column_with_vocabulary_list(
-            'language', vocabulary_list=['a', 'b', 'c']),)
+    feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list(
+        'language', vocabulary_list=['a', 'b', 'c']),)
 
     # Check prediction for each sparse_combiner.
     # With sparse_combiner = 'sum', we have
@@ -1776,8 +1781,9 @@ class BaseLinearClassifierPredictTest(object):
 
 class BaseLinearClassifierIntegrationTest(object):
 
-  def __init__(self, linear_classifier_fn):
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
     self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
@@ -1789,7 +1795,7 @@ class BaseLinearClassifierIntegrationTest(object):
   def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn,
                           predict_input_fn, input_dimension, prediction_length):
     feature_columns = [
-        feature_column_lib.numeric_column('x', shape=(input_dimension,))
+        self._fc_lib.numeric_column('x', shape=(input_dimension,))
     ]
     est = self._linear_classifier_fn(
         feature_columns=feature_columns,
@@ -1811,7 +1817,7 @@ class BaseLinearClassifierIntegrationTest(object):
     self.assertAllEqual((prediction_length, 1), predictions.shape)
 
     # EXPORT
-    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
+    feature_spec = self._fc_lib.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
@@ -1961,9 +1967,12 @@ class BaseLinearClassifierIntegrationTest(object):
 
 class BaseLinearLogitFnTest(object):
 
+  def __init__(self, fc_lib=feature_column):
+    self._fc_lib = fc_lib
+
   def test_basic_logit_correctness(self):
     """linear_logit_fn simply wraps feature_column_lib.linear_model."""
-    age = feature_column_lib.numeric_column('age')
+    age = self._fc_lib.numeric_column('age')
     with ops.Graph().as_default():
       logit_fn = linear._linear_logit_fn_builder(units=2, feature_columns=[age])
       logits = logit_fn(features={'age': [[23.], [31.]]})
@@ -1983,12 +1992,14 @@ class BaseLinearLogitFnTest(object):
 
   def test_compute_fraction_of_zero(self):
     """Tests the calculation of sparsity."""
-    age = feature_column_lib.numeric_column('age')
-    occupation = feature_column_lib.categorical_column_with_hash_bucket(
+    if self._fc_lib != feature_column:
+      return
+    age = feature_column.numeric_column('age')
+    occupation = feature_column.categorical_column_with_hash_bucket(
         'occupation', hash_bucket_size=5)
     with ops.Graph().as_default():
       cols_to_vars = {}
-      feature_column_lib.linear_model(
+      feature_column.linear_model(
           features={
               'age': [[23.], [31.]],
               'occupation': [['doctor'], ['engineer']]
@@ -1997,7 +2008,42 @@ class BaseLinearLogitFnTest(object):
           units=3,
           cols_to_vars=cols_to_vars)
       cols_to_vars.pop('bias')
-      fraction_zero = linear._compute_fraction_of_zero(cols_to_vars)
+      fraction_zero = linear._compute_fraction_of_zero(cols_to_vars.values())
+      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                                   'linear_model/age')[0]
+      with tf_session.Session() as sess:
+        sess.run([variables_lib.global_variables_initializer()])
+        # Upon initialization, all variables will be zero.
+        self.assertAllClose(1, fraction_zero.eval())
+
+        sess.run(age_var.assign([[2.0, 0.0, -1.0]]))
+        # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets
+        # x 3-dim output) are zero.
+        self.assertAllClose(16. / 18., fraction_zero.eval())
+
+  def test_compute_fraction_of_zero_v2(self):
+    """Tests the calculation of sparsity."""
+    if self._fc_lib != feature_column_v2:
+      return
+
+    age = feature_column_v2.numeric_column('age')
+    occupation = feature_column_v2.categorical_column_with_hash_bucket(
+        'occupation', hash_bucket_size=5)
+    shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+    with ops.Graph().as_default():
+      model = feature_column_v2.LinearModel(
+          feature_columns=[age, occupation],
+          units=3,
+          shared_state_manager=shared_state_manager)
+      features = {
+          'age': [[23.], [31.]],
+          'occupation': [['doctor'], ['engineer']]
+      }
+      model(features)
+      variables = model.variables
+      variables.remove(model.bias_variable)
+      variables.extend(shared_state_manager.variables)
+      fraction_zero = linear._compute_fraction_of_zero(variables)
       age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
                                    'linear_model/age')[0]
       with tf_session.Session() as sess:
@@ -2013,9 +2059,13 @@ class BaseLinearLogitFnTest(object):
 
 class BaseLinearWarmStartingTest(object):
 
-  def __init__(self, _linear_classifier_fn, _linear_regressor_fn):
+  def __init__(self,
+               _linear_classifier_fn,
+               _linear_regressor_fn,
+               fc_lib=feature_column):
     self._linear_classifier_fn = _linear_classifier_fn
     self._linear_regressor_fn = _linear_regressor_fn
+    self._fc_lib = fc_lib
 
   def setUp(self):
     # Create a directory to save our old checkpoint and vocabularies to.
@@ -2039,7 +2089,7 @@ class BaseLinearWarmStartingTest(object):
 
   def test_classifier_basic_warm_starting(self):
     """Tests correctness of LinearClassifier default warm-start."""
-    age = feature_column_lib.numeric_column('age')
+    age = self._fc_lib.numeric_column('age')
 
     # Create a LinearClassifier and train to save a checkpoint.
     linear_classifier = self._linear_classifier_fn(
@@ -2066,7 +2116,7 @@ class BaseLinearWarmStartingTest(object):
 
   def test_regressor_basic_warm_starting(self):
     """Tests correctness of LinearRegressor default warm-start."""
-    age = feature_column_lib.numeric_column('age')
+    age = self._fc_lib.numeric_column('age')
 
     # Create a LinearRegressor and train to save a checkpoint.
     linear_regressor = self._linear_regressor_fn(
@@ -2091,7 +2141,7 @@ class BaseLinearWarmStartingTest(object):
 
   def test_warm_starting_selective_variables(self):
     """Tests selecting variables to warm-start."""
-    age = feature_column_lib.numeric_column('age')
+    age = self._fc_lib.numeric_column('age')
 
     # Create a LinearClassifier and train to save a checkpoint.
     linear_classifier = self._linear_classifier_fn(
@@ -2128,7 +2178,7 @@ class BaseLinearWarmStartingTest(object):
     vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
     with open(vocab_file, 'w') as f:
       f.write('\n'.join(vocab_list))
-    occupation = feature_column_lib.categorical_column_with_vocabulary_file(
+    occupation = self._fc_lib.categorical_column_with_vocabulary_file(
         'occupation',
         vocabulary_file=vocab_file,
         vocabulary_size=len(vocab_list))
@@ -2152,7 +2202,7 @@ class BaseLinearWarmStartingTest(object):
                                   'new_occupation_vocab')
     with open(new_vocab_file, 'w') as f:
       f.write('\n'.join(new_vocab_list))
-    new_occupation = feature_column_lib.categorical_column_with_vocabulary_file(
+    new_occupation = self._fc_lib.categorical_column_with_vocabulary_file(
         'occupation',
         vocabulary_file=new_vocab_file,
         vocabulary_size=len(new_vocab_list))
@@ -2205,7 +2255,7 @@ class BaseLinearWarmStartingTest(object):
 
   def test_warm_starting_with_naming_change(self):
     """Tests warm-starting with a Tensor name remapping."""
-    age_in_years = feature_column_lib.numeric_column('age_in_years')
+    age_in_years = self._fc_lib.numeric_column('age_in_years')
 
     # Create a LinearClassifier and train to save a checkpoint.
     linear_classifier = self._linear_classifier_fn(
@@ -2219,7 +2269,7 @@ class BaseLinearWarmStartingTest(object):
     # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
     # accumulator values that change).
     warm_started_linear_classifier = self._linear_classifier_fn(
-        feature_columns=[feature_column_lib.numeric_column('age')],
+        feature_columns=[self._fc_lib.numeric_column('age')],
         n_classes=4,
         optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
         # The 'age' variable correspond to the 'age_in_years' variable in the
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index 5800b693b4..ac53a84eef 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -156,7 +156,7 @@ py_test(
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/estimator:numpy_io",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index a8d5bfb437..b79373c475 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -271,6 +271,7 @@ class _StateManagerImpl(StateManager):
         dtype=dtype,
         initializer=initializer,
         trainable=self._trainable and trainable,
+        use_resource=True,
         # TODO(rohanj): Get rid of this hack once we have a mechanism for
         # specifying a default partitioner for an entire layer. In that case,
         # the default getter for Layers should work.
@@ -383,8 +384,8 @@ class FeatureLayer(Layer):
       if isinstance(column, SharedEmbeddingColumn):
         column.create_state(self._shared_state_manager)
       else:
-        with variable_scope.variable_scope(None, default_name=self.name):
-          with variable_scope.variable_scope(None, default_name=column.name):
+        with variable_scope._pure_variable_scope(self.name):  # pylint: disable=protected-access
+          with variable_scope._pure_variable_scope(column.name):  # pylint: disable=protected-access
             column.create_state(self._state_manager)
       super(FeatureLayer, self).build(None)
 
@@ -414,19 +415,20 @@ class FeatureLayer(Layer):
     output_tensors = []
     ordered_columns = []
     for column in sorted(self._feature_columns, key=lambda x: x.name):
-      ordered_columns.append(column)
-      if isinstance(column, SharedEmbeddingColumn):
-        tensor = column.get_dense_tensor(transformation_cache,
-                                         self._shared_state_manager)
-      else:
-        tensor = column.get_dense_tensor(transformation_cache,
-                                         self._state_manager)
-      num_elements = column.variable_shape.num_elements()
-      batch_size = array_ops.shape(tensor)[0]
-      tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
-      output_tensors.append(tensor)
-      if cols_to_output_tensors is not None:
-        cols_to_output_tensors[column] = tensor
+      with ops.name_scope(column.name):
+        ordered_columns.append(column)
+        if isinstance(column, SharedEmbeddingColumn):
+          tensor = column.get_dense_tensor(transformation_cache,
+                                           self._shared_state_manager)
+        else:
+          tensor = column.get_dense_tensor(transformation_cache,
+                                           self._state_manager)
+        num_elements = column.variable_shape.num_elements()
+        batch_size = array_ops.shape(tensor)[0]
+        tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
+        output_tensors.append(tensor)
+        if cols_to_output_tensors is not None:
+          cols_to_output_tensors[column] = tensor
 
     _verify_static_batch_size_equality(output_tensors, ordered_columns)
     return array_ops.concat(output_tensors, 1)
@@ -601,6 +603,7 @@ class LinearModel(Layer):
           shape=[self._units],
           initializer=init_ops.zeros_initializer(),
           trainable=self.trainable,
+          use_resource=True,
           # TODO(rohanj): Get rid of this hack once we have a mechanism for
           # specifying a default partitioner for an entire layer. In that case,
           # the default getter for Layers should work.
@@ -627,36 +630,41 @@ class LinearModel(Layer):
     if not isinstance(features, dict):
       raise ValueError('We expected a dictionary here. Instead we got: ',
                        features)
-    transformation_cache = FeatureTransformationCache(features)
-    weighted_sums = []
-    for column in self._feature_columns:
-      with ops.name_scope(column.name):
-        # All the weights used in the linear model are owned by the state
-        # manager associated with this Linear Model.
-        weight_var = self._state_manager.get_variable(column, 'weights')
-
-        # The embedding weights for the SharedEmbeddingColumn are owned by
-        # the shared_state_manager and so we need to pass that in while
-        # creating the weighted sum. For all other columns, the state is owned
-        # by the Linear Model's state manager.
-        if isinstance(column, SharedEmbeddingColumn):
-          state_manager = self._shared_state_manager
-        else:
-          state_manager = self._state_manager
-        weighted_sum = _create_weighted_sum(
-            column=column,
-            transformation_cache=transformation_cache,
-            state_manager=state_manager,
-            sparse_combiner=self._sparse_combiner,
-            weight_var=weight_var)
-        weighted_sums.append(weighted_sum)
-
-    _verify_static_batch_size_equality(weighted_sums, self._feature_columns)
-    predictions_no_bias = math_ops.add_n(
-        weighted_sums, name='weighted_sum_no_bias')
-    predictions = nn_ops.bias_add(
-        predictions_no_bias, self._bias_variable, name='weighted_sum')
-    return predictions
+    with ops.name_scope(self.name):
+      transformation_cache = FeatureTransformationCache(features)
+      weighted_sums = []
+      for column in self._feature_columns:
+        with ops.name_scope(column.name):
+          # All the weights used in the linear model are owned by the state
+          # manager associated with this Linear Model.
+          weight_var = self._state_manager.get_variable(column, 'weights')
+
+          # The embedding weights for the SharedEmbeddingColumn are owned by
+          # the shared_state_manager and so we need to pass that in while
+          # creating the weighted sum. For all other columns, the state is owned
+          # by the Linear Model's state manager.
+          if isinstance(column, SharedEmbeddingColumn):
+            state_manager = self._shared_state_manager
+          else:
+            state_manager = self._state_manager
+          weighted_sum = _create_weighted_sum(
+              column=column,
+              transformation_cache=transformation_cache,
+              state_manager=state_manager,
+              sparse_combiner=self._sparse_combiner,
+              weight_var=weight_var)
+          weighted_sums.append(weighted_sum)
+
+      _verify_static_batch_size_equality(weighted_sums, self._feature_columns)
+      predictions_no_bias = math_ops.add_n(
+          weighted_sums, name='weighted_sum_no_bias')
+      predictions = nn_ops.bias_add(
+          predictions_no_bias, self._bias_variable, name='weighted_sum')
+      return predictions
+
+  @property
+  def bias_variable(self):
+    return self._bias_variable
 
 
 def _transform_features(features, feature_columns, state_manager):
@@ -2605,6 +2613,7 @@ class SharedEmbeddingStateManager(Layer):
           dtype=dtype,
           trainable=self.trainable and trainable,
           initializer=initializer,
+          use_resource=True,
           # TODO(rohanj): Get rid of this hack once we have a mechanism for
           # specifying a default partitioner for an entire layer. In that case,
           # the default getter for Layers should work.
@@ -3279,6 +3288,7 @@ def _safe_embedding_lookup_sparse(embedding_weights,
     raise ValueError('Missing embedding_weights %s.' % embedding_weights)
 
   dtype = sparse_weights.dtype if sparse_weights is not None else None
+  # TODO(rohanj): Look into removing this convert_to_tensor call.
   embedding_weights = [
       ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
   ]
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index a13a5010e1..d3787146ed 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -5170,8 +5170,8 @@ class WeightedCategoricalColumnTest(test.TestCase):
             key='ids', num_buckets=3),
         weight_feature_key='values')
     with ops.Graph().as_default():
-      with self.assertRaisesRegexp(
-          ValueError, r'Dimensions.*are not compatible'):
+      with self.assertRaisesRegexp(ValueError,
+                                   r'Dimensions.*are not compatible'):
         model = fc.LinearModel((column,))
         model({
             'ids':
-- 
GitLab


From 2f559f2d5f75cf80183ae0d855110809404019f7 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Fri, 28 Sep 2018 15:14:43 -0700
Subject: [PATCH 0883/1357] Handle noinline gradient function in control flow
 functionalization.

PiperOrigin-RevId: 215003704
---
 .../tf2xla/functionalize_control_flow.cc      | 84 +++++++++++--------
 tensorflow/compiler/tf2xla/tf2xla_util.cc     | 30 +++++--
 tensorflow/compiler/tf2xla/tf2xla_util.h      | 51 +++++++----
 tensorflow/core/framework/function.cc         |  8 ++
 tensorflow/core/framework/function.h          |  5 ++
 5 files changed, 121 insertions(+), 57 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 2d45507796..36c6f5d316 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -92,13 +92,51 @@ Status FunctionalizeControlFlowForFunction(
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
 
+  // Call graph optimizer. The most important optimization we need is constant
+  // folding, which will replace ops like Shape/BroadcastGradientArgs with
+  // constant shape input. Without this optimization, those ops might become
+  // dynamic input for then/else body function and XLA will complain that input
+  // is not compile time constant. We enable function inlining as well, because
+  // otherwise we won't be able to infer shape for any node depending on
+  // function call nodes.
+  if (VLOG_IS_ON(4)) {
+    dump_graph::DumpGraphToFile(
+        absl::StrCat("functionalize_control_flow_before_opt_", func_name),
+        *body->graph, fld);
+  }
+  // Optimizer accepts std::unique_ptr<Graph>* as input and might change
+  // underlying pointer, thus we create a new Graph and copy from body->graph.
+  std::unique_ptr<Graph> optimized_graph(new Graph(fld));
+  CopyGraph(*body->graph, optimized_graph.get());
+  OptimizerOptions opts;
+  opts.set_opt_level(OptimizerOptions::L0);
+  opts.set_do_function_inlining(true);
+  opts.set_do_constant_folding(true);
+  GraphOptimizer optimizer(opts);
+  auto cf_consider_fn = [](const Node* n) {
+    // Skip SymbolicGradient op when doing constant folding.
+    // Enabling SymbolicGradient op in constant folding requires
+    // flr->device() to be non-null, and here we have not constructed
+    // proper Device object yet (it will be constructed in XlaCompiler).
+    return n->type_string() != FunctionLibraryDefinition::kGradientOp;
+  };
+  optimizer.Optimize(flr, flr->env(),
+                     /*device=*/nullptr, &optimized_graph,
+                     /*shape_map=*/nullptr, /*cse_consider_fn=*/nullptr,
+                     cf_consider_fn);
+  if (VLOG_IS_ON(4)) {
+    dump_graph::DumpGraphToFile(
+        absl::StrCat("functionalize_control_flow_after_opt_", func_name),
+        *optimized_graph, fld);
+  }
+
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
   // might involve node deletion/addition. Avoid modifying nodes while iterating
   // it.
   std::vector<std::pair<Node*, std::vector<AssociatedFunctionInfo>>>
       nodes_to_associated_functions;
-  for (auto* n : body->graph->nodes()) {
+  for (auto* n : optimized_graph->nodes()) {
     auto associated_functions = GetAssociatedFunctions(*n, flr);
     if (!associated_functions.empty()) {
       nodes_to_associated_functions.push_back({n, associated_functions});
@@ -118,7 +156,14 @@ Status FunctionalizeControlFlowForFunction(
         // but still rewrite the node.
         new_name = iter->second;
       } else {
-        new_name = fld->UniqueFunctionName(absl::StrCat(name, "_f15n_"));
+        if (associated_function.type() ==
+            AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
+          // For SymbolicGradient, `name` is always "SymbolicGradient",
+          // which is not very informative. Use node name instead.
+          new_name = fld->UniqueFunctionName(absl::StrCat(n->name(), "_f15n_"));
+        } else {
+          new_name = fld->UniqueFunctionName(absl::StrCat(name, "_f15n_"));
+        }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
             canonicalized_name_to_new_name));
@@ -129,43 +174,10 @@ Status FunctionalizeControlFlowForFunction(
       // That's fine because in that case, associated_functions will only have
       // one member and the loop will only run once.
       TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-          body->graph, n, fld, associated_function, new_name));
+          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  // Call graph optimizer. The most important optimization we need is constant
-  // folding, which will replace ops like Shape/BroadcastGradientArgs with
-  // constant shape input. Without this optimization, those ops might become
-  // dynamic input for then/else body function and XLA will complain that input
-  // is not compile time constant. We enable function inlining as well, because
-  // otherwise we won't be able to infer shape for any node depending on
-  // function call nodes.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_opt_", func_name),
-        *body->graph, fld);
-  }
-  // Optimizer accepts std::unique_ptr<Graph>* as input and might change
-  // underlying pointer, thus we create a new Graph and copy from body->graph.
-  std::unique_ptr<Graph> optimized_graph(new Graph(fld));
-  CopyGraph(*body->graph, optimized_graph.get());
-  OptimizerOptions opts;
-  opts.set_opt_level(OptimizerOptions::L0);
-  opts.set_do_function_inlining(true);
-  opts.set_do_constant_folding(true);
-  GraphOptimizer optimizer(opts);
-  auto cf_consider_fn = [](const Node* n) {
-    // Skip SymbolicGradient op when doing constant folding.
-    // Enabling SymbolicGradient op in constant folding requires
-    // flr->device() to be non-null, and here we have not constructed
-    // proper Device object yet (it will be constructed in XlaCompiler).
-    return n->type_string() != FunctionLibraryDefinition::kGradientOp;
-  };
-  optimizer.Optimize(flr, flr->env(),
-                     /*device=*/nullptr, &optimized_graph,
-                     /*shape_map=*/nullptr, /*cse_consider_fn=*/nullptr,
-                     cf_consider_fn);
-
   // Functionalize the function body.
   if (VLOG_IS_ON(4)) {
     dump_graph::DumpGraphToFile(
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc
index d6f42bac86..01dd3ba10f 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc
@@ -336,9 +336,9 @@ bool HasAssociatedFunction(const NodeDef& node_def,
   }
 
   if (node_def.op() == FunctionLibraryDefinition::kGradientOp) {
-    // Skip gradient op. Gradient op has "f" attr, which is set to the function
-    // we are getting gradient for. That function is not associated with the op.
-    return false;
+    // Gradient op has "f" attr, which is set to the function we are getting
+    // gradient for. We need to functionalize the gradient function.
+    return true;
   }
 
   for (const auto& iter : node_def.attr()) {
@@ -357,17 +357,18 @@ std::vector<AssociatedFunctionInfo> GetAssociatedFunctions(
   if (flr->GetFunctionLibraryDefinition()->Contains(op)) {
     // This is a function call node.
     AttrValueMap attrs(node.attrs().begin(), node.attrs().end());
-    results.emplace_back(AssociatedFunctionInfo(op, attrs));
+    results.emplace_back(AssociatedFunctionInfo::FunctionCall(op, attrs));
   } else if (node.type_string() == FunctionLibraryDefinition::kGradientOp) {
-    // Skip gradient op. Gradient op has "f" attr, which is set to the function
-    // we are getting gradient for. That function is not associated with the op.
+    // This is a SymbolicGradient op.
+    AttrValueMap attrs(node.attrs().begin(), node.attrs().end());
+    results.emplace_back(AssociatedFunctionInfo::SymbolicGradient(op, attrs));
   } else {
     // Collect all function attrs for the node.
     for (auto& iter : node.attrs()) {
       if (iter.second.has_func()) {
         VLOG(2) << "Found function attr for node " << node.name() << ": "
                 << iter.first << " = " << iter.second.func().name();
-        results.emplace_back(AssociatedFunctionInfo(
+        results.emplace_back(AssociatedFunctionInfo::FunctionAttr(
             iter.second.func().name(), iter.second.func().attr(), iter.first));
       }
     }
@@ -410,6 +411,21 @@ Status RewriteAssociatedFunction(
       graph->RemoveNode(node);
       break;
     }
+    case AssociatedFunctionInfo::kSymbolicGradient: {
+      NameAttrList func;
+      TF_RETURN_IF_ERROR(GetNodeAttr(
+          node->attrs(), FunctionLibraryDefinition::kFuncAttr, &func));
+      GradientDef gradient_def;
+      gradient_def.set_function_name(func.name());
+      gradient_def.set_gradient_func(rewritten_function_name);
+      string original_grad_func = fld->FindGradient(func.name());
+      if (original_grad_func.empty()) {
+        TF_RETURN_IF_ERROR(fld->AddGradientDef(gradient_def));
+      } else if (original_grad_func != rewritten_function_name) {
+        TF_RETURN_IF_ERROR(fld->ReplaceGradient(gradient_def));
+      }
+      break;
+    }
     case AssociatedFunctionInfo::kFunctionAttr: {
       // Change function attr to rewritten functions.
       NameAttrList func;
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.h b/tensorflow/compiler/tf2xla/tf2xla_util.h
index 6065d0bb9a..53eab8b63e 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.h
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.h
@@ -65,21 +65,33 @@ uint32 GetXLARandomSeed();
 class AssociatedFunctionInfo {
  public:
   enum AssociatedFunctionType {
-    kFunctionCallNode = 0,
-    kFunctionAttr = 1,
+    kFunctionAttr = 0,
+    kFunctionCallNode = 1,
+    kSymbolicGradient = 2,
   };
 
-  // The node is a function call.
-  AssociatedFunctionInfo(const string& func_name, const AttrValueMap& attrs)
-      : type_(kFunctionCallNode), func_name_(func_name), attrs_(attrs) {}
-
   // The function is an attr of the node.
-  AssociatedFunctionInfo(const string& func_name, const AttrValueMap& attrs,
-                         const string& attr_name)
-      : type_(kFunctionAttr),
-        func_name_(func_name),
-        attrs_(attrs),
-        attr_name_(attr_name) {}
+  static AssociatedFunctionInfo FunctionAttr(const string& func_name,
+                                             const AttrValueMap& attrs,
+                                             const string& attr_name) {
+    return AssociatedFunctionInfo(kFunctionAttr, func_name, attrs, attr_name);
+  }
+
+  // The node is a function call.
+  static AssociatedFunctionInfo FunctionCall(const string& func_name,
+                                             const AttrValueMap& attrs) {
+    // attr_name will not be used in this case.
+    return AssociatedFunctionInfo(kFunctionCallNode, func_name, attrs,
+                                  /*attr_name=*/"");
+  }
+
+  // The node is a SymbolicGradient op.
+  static AssociatedFunctionInfo SymbolicGradient(const string& func_name,
+                                                 const AttrValueMap& attrs) {
+    // attr_name will not be used in this case.
+    return AssociatedFunctionInfo(kSymbolicGradient, func_name, attrs,
+                                  /*attr_name=*/"");
+  }
 
   AssociatedFunctionType type() const { return type_; }
 
@@ -90,6 +102,13 @@ class AssociatedFunctionInfo {
   const AttrValueMap& attrs() const { return attrs_; }
 
  private:
+  AssociatedFunctionInfo(AssociatedFunctionType type, const string& func_name,
+                         const AttrValueMap& attrs, const string& attr_name)
+      : type_(type),
+        func_name_(func_name),
+        attrs_(attrs),
+        attr_name_(attr_name) {}
+
   // Available for all instances.
   AssociatedFunctionType type_;
   string func_name_;
@@ -105,14 +124,18 @@ bool HasAssociatedFunction(const NodeDef& node_def,
 
 // Gets functions associated with the node. Current cases:
 // 1. For function call node, its function name;
-// 2. For nodes like XlaWhile/XlaIf, all their function attributes.
+// 2. For SymbolicGradient op, returned func_name will be "SymbolicGradient",
+//    and returned attrs will be this node's attributes;
+// 3. For nodes like XlaWhile/XlaIf, all their function attributes.
 std::vector<AssociatedFunctionInfo> GetAssociatedFunctions(
     const Node& node, FunctionLibraryRuntime* flr);
 
 // Changes associated functions for the node. Current cases:
 // 1. For function call node, creates a new node with the new function name and
 //    remove the old node;
-// 2. For nodes like XlaWhile/XlaIf, modify their function attributes.
+// 2. For SymbolicGradient op, add or replace GradientDef in
+//    FunctionLibraryDefinition;
+// 3. For nodes like XlaWhile/XlaIf, modify their function attributes.
 Status RewriteAssociatedFunction(
     Graph* graph, Node* node, FunctionLibraryDefinition* fld,
     const AssociatedFunctionInfo& associated_function,
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index a17959a448..20f957190b 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -1101,6 +1101,14 @@ Status FunctionLibraryDefinition::ReplaceFunction(const string& func,
   return Status::OK();
 }
 
+Status FunctionLibraryDefinition::ReplaceGradient(const GradientDef& grad) {
+  mutex_lock l(mu_);
+  bool added;
+  TF_RETURN_IF_ERROR(RemoveGradient(grad.function_name()));
+  TF_RETURN_IF_ERROR(AddGradientDefHelper(grad, &added));
+  return Status::OK();
+}
+
 Status FunctionLibraryDefinition::RemoveFunction(const string& func) {
   const auto& i = function_defs_.find(func);
   if (i == function_defs_.end()) {
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index e01eb7503d..4d6d68e214 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -331,6 +331,11 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
   // a non-OK status if "func" was not found in the library, OK otherwise.
   Status ReplaceFunction(const string& func, const FunctionDef& fdef);
 
+  // Replaces the gradient corresponding to `grad.function_name()`. Returns
+  // a non-OK status if "grad.function_name()" was not found in the library, OK
+  // otherwise.
+  Status ReplaceGradient(const GradientDef& grad);
+
   // Adds the functions and gradients in 'other' to this function library.
   // Duplicate functions and gradients are ignored.
   // This operation is atomic.
-- 
GitLab


From 1c4a48ddd49f78fbd8ea3defd3a8755c91284166 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 28 Sep 2018 15:22:06 -0700
Subject: [PATCH 0884/1357] [tf.data] Merged contrib.data's DatasetTestBase
 with the DatasetTestBase in core (and added that as a base class for all the
 contrib tests). Also changed the assertDatasetsEqual functions so they are
 both graph and eager compatible (took the code from CSVDatasetTest) :)

PiperOrigin-RevId: 215004892
---
 .../contrib/data/python/kernel_tests/BUILD    | 37 ++++++---
 .../kernel_tests/batch_dataset_op_test.py     |  9 +--
 .../python/kernel_tests/bucketing_test.py     |  9 ++-
 .../kernel_tests/csv_dataset_op_test.py       | 43 ++--------
 .../dataset_constructor_op_test.py            |  3 +-
 .../directed_interleave_dataset_test.py       |  3 +-
 .../kernel_tests/get_single_element_test.py   |  3 +-
 .../kernel_tests/indexed_dataset_ops_test.py  |  3 +-
 .../interleave_dataset_op_test.py             |  3 +-
 .../python/kernel_tests/iterator_ops_test.py  |  3 +-
 .../kernel_tests/lmdb_dataset_op_test.py      |  3 +-
 .../kernel_tests/map_dataset_op_test.py       |  3 +-
 .../python/kernel_tests/map_defun_op_test.py  |  4 +-
 .../python/kernel_tests/optimization/BUILD    |  9 ++-
 .../assert_next_dataset_op_test.py            |  3 +-
 .../optimization/hoist_random_uniform_test.py |  3 +-
 .../map_and_filter_fusion_test.py             |  3 +-
 .../optimization/map_parallelization_test.py  |  3 +-
 .../optimization/map_vectorization_test.py    | 14 ++--
 .../optimization/model_dataset_op_test.py     |  3 +-
 .../optimization/noop_elimination_test.py     |  3 +-
 .../optimization/optimize_dataset_op_test.py  |  3 +-
 .../python/kernel_tests/parsing_ops_test.py   |  3 +-
 .../kernel_tests/prefetching_ops_test.py      |  7 +-
 .../kernel_tests/range_dataset_op_test.py     |  3 +-
 .../kernel_tests/reader_dataset_ops_test.py   |  3 +-
 .../reader_dataset_ops_test_base.py           | 10 +--
 .../data/python/kernel_tests/resample_test.py |  3 +-
 .../kernel_tests/scan_dataset_op_test.py      |  3 +-
 .../kernel_tests/shuffle_dataset_op_test.py   |  3 +-
 .../kernel_tests/slide_dataset_op_test.py     |  8 +-
 .../kernel_tests/sql_dataset_op_test_base.py  |  5 +-
 .../kernel_tests/stats_dataset_test_base.py   |  4 +-
 .../data/python/kernel_tests/test_utils.py    | 73 -----------------
 .../threadpool_dataset_ops_test.py            |  4 +-
 .../kernel_tests/unique_dataset_op_test.py    |  3 +-
 .../kernel_tests/window_dataset_op_test.py    |  3 +-
 .../python/kernel_tests/writer_ops_test.py    |  3 +-
 tensorflow/python/data/kernel_tests/BUILD     |  3 +
 .../python/data/kernel_tests/test_base.py     | 80 +++++++++++++++++++
 tensorflow/tools/pip_package/BUILD            |  1 -
 41 files changed, 209 insertions(+), 183 deletions(-)
 delete mode 100644 tensorflow/contrib/data/python/kernel_tests/test_utils.py

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 21ac40eb21..33784afa3f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -31,6 +31,7 @@ py_test(
         "//tensorflow/python:string_ops",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
@@ -54,6 +55,7 @@ py_test(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -77,6 +79,7 @@ py_test(
         "//tensorflow/python:platform",
         "//tensorflow/python:platform_test",
         "//tensorflow/python:session",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:readers",
         "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
@@ -97,6 +100,7 @@ py_test(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
     ],
@@ -112,6 +116,7 @@ py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
         "//tensorflow/python:random_seed",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -130,6 +135,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -144,6 +150,7 @@ py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -169,6 +176,7 @@ py_test(
         "//tensorflow/python:script_ops",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@six_archive//:six",
     ],
@@ -188,6 +196,7 @@ py_test(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/estimator:estimator_py",
     ],
@@ -214,6 +223,7 @@ py_test(
         "//tensorflow/python:platform",
         "//tensorflow/python:platform_test",
         "//tensorflow/python:session",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//third_party/py/numpy",
     ],
 )
@@ -239,6 +249,7 @@ py_test(
         "//tensorflow/python:io_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -258,6 +269,7 @@ py_test(
         "//tensorflow/python:io_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -282,6 +294,7 @@ py_test(
         "//tensorflow/python:functional_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:session",
+        "//tensorflow/python/data/kernel_tests:test_base",
     ],
 )
 
@@ -300,6 +313,7 @@ py_test(
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
         "//third_party/py/numpy",
@@ -315,6 +329,7 @@ cuda_py_test(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
@@ -340,6 +355,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -365,6 +381,7 @@ py_library(
         "//tensorflow/python:lib",
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:iterator_ops",
         "//tensorflow/python/data/ops:readers",
     ],
@@ -411,6 +428,7 @@ py_test(
         "//tensorflow/python:random_ops",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
@@ -433,6 +451,7 @@ py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
@@ -453,6 +472,7 @@ py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -470,6 +490,7 @@ py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
@@ -489,6 +510,7 @@ py_library(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "@org_sqlite//:python",
     ],
 )
@@ -533,6 +555,7 @@ py_library(
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/kernel_tests:test_base",
     ],
 )
 
@@ -549,6 +572,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:script_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
@@ -567,6 +591,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -587,6 +612,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
@@ -604,17 +630,8 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:lib",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:readers",
     ],
 )
-
-py_library(
-    name = "test_utils",
-    srcs = ["test_utils.py"],
-    deps = [
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/util:nest",
-    ],
-)
diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index e2508de9e9..fed7de5f2b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -25,6 +25,7 @@ import numpy as np
 
 from tensorflow.contrib.data.python.ops import batching
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -40,12 +41,8 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class BatchDatasetTest(test.TestCase, parameterized.TestCase):
+class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testDenseToSparseBatchDataset(self):
     components = np.random.randint(12, size=(100,)).astype(np.int32)
@@ -723,7 +720,7 @@ class BatchDatasetTest(test.TestCase, parameterized.TestCase):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
 
-class RestructuredDatasetTest(test.TestCase):
+class RestructuredDatasetTest(test_base.DatasetTestBase):
 
   def test_assert_element_shape(self):
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
index 48971f2ccc..ae401f786c 100644
--- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
@@ -22,6 +22,7 @@ import random
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -35,7 +36,7 @@ from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
 
 
-class GroupByReducerTest(test.TestCase):
+class GroupByReducerTest(test_base.DatasetTestBase):
 
   def checkResults(self, dataset, shapes, values):
     self.assertEqual(shapes, dataset.output_shapes)
@@ -198,7 +199,7 @@ class GroupByReducerTest(test.TestCase):
       self.assertEqual(y, 45)
 
 
-class GroupByWindowTest(test.TestCase):
+class GroupByWindowTest(test_base.DatasetTestBase):
 
   def testSimple(self):
     components = np.random.randint(100, size=(200,)).astype(np.int64)
@@ -345,7 +346,7 @@ class GroupByWindowTest(test.TestCase):
 # NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
 # Currently, they use a constant batch size, though should be made to use a
 # different batch size per key.
-class BucketTest(test.TestCase):
+class BucketTest(test_base.DatasetTestBase):
 
   def _dynamicPad(self, bucket, window, window_size):
     # TODO(mrry): To match `tf.contrib.training.bucket()`, implement a
@@ -570,7 +571,7 @@ def _get_record_shape(sparse):
   return tensor_shape.TensorShape([None])
 
 
-class BucketBySequenceLength(test.TestCase):
+class BucketBySequenceLength(test_base.DatasetTestBase):
 
   def testBucket(self):
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
index f8e74e4583..5b3c512b64 100644
--- a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
@@ -30,6 +30,7 @@ import numpy as np
 from tensorflow.contrib.data.python.ops import error_ops
 from tensorflow.contrib.data.python.ops import readers
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
@@ -43,37 +44,7 @@ from tensorflow.python.platform import test
 
 
 @test_util.run_all_in_graph_and_eager_modes
-class CsvDatasetOpTest(test.TestCase):
-
-  def _get_next(self, dataset):
-    # Returns a no argument function whose result is fed to self.evaluate to
-    # yield the next element
-    it = dataset.make_one_shot_iterator()
-    if context.executing_eagerly():
-      return it.get_next
-    else:
-      get_next = it.get_next()
-      return lambda: get_next
-
-  def _assert_datasets_equal(self, ds1, ds2):
-    assert ds1.output_shapes == ds2.output_shapes, ('output_shapes differ: %s, '
-                                                    '%s') % (ds1.output_shapes,
-                                                             ds2.output_shapes)
-    assert ds1.output_types == ds2.output_types
-    assert ds1.output_classes == ds2.output_classes
-    next1 = self._get_next(ds1)
-    next2 = self._get_next(ds2)
-    # Run through datasets and check that outputs match, or errors match.
-    while True:
-      try:
-        op1 = self.evaluate(next1())
-      except (errors.OutOfRangeError, ValueError) as e:
-        # If op1 throws an exception, check that op2 throws same exception.
-        with self.assertRaises(type(e)):
-          self.evaluate(next2())
-        break
-      op2 = self.evaluate(next2())
-      self.assertAllEqual(op1, op2)
+class CsvDatasetOpTest(test_base.DatasetTestBase):
 
   def _setup_files(self, inputs, linebreak='\n', compression_type=None):
     filenames = []
@@ -108,7 +79,7 @@ class CsvDatasetOpTest(test.TestCase):
     """Checks that CsvDataset is equiv to TextLineDataset->map(decode_csv)."""
     dataset_actual, dataset_expected = self._make_test_datasets(
         inputs, **kwargs)
-    self._assert_datasets_equal(dataset_actual, dataset_expected)
+    self.assertDatasetsEqual(dataset_actual, dataset_expected)
 
   def _verify_output_or_err(self,
                             dataset,
@@ -116,7 +87,7 @@ class CsvDatasetOpTest(test.TestCase):
                             expected_err_re=None):
     if expected_err_re is None:
       # Verify that output is expected, without errors
-      nxt = self._get_next(dataset)
+      nxt = self.getNext(dataset)
       expected_output = [[
           v.encode('utf-8') if isinstance(v, str) else v for v in op
       ] for op in expected_output]
@@ -128,7 +99,7 @@ class CsvDatasetOpTest(test.TestCase):
     else:
       # Verify that OpError is produced as expected
       with self.assertRaisesOpError(expected_err_re):
-        nxt = self._get_next(dataset)
+        nxt = self.getNext(dataset)
         while True:
           try:
             self.evaluate(nxt())
@@ -354,7 +325,7 @@ class CsvDatasetOpTest(test.TestCase):
     inputs = [['1,,3,4', '5,6,,8']]
     ds_actual, ds_expected = self._make_test_datasets(
         inputs, record_defaults=record_defaults)
-    self._assert_datasets_equal(
+    self.assertDatasetsEqual(
         ds_actual.repeat(5).prefetch(1),
         ds_expected.repeat(5).prefetch(1))
 
@@ -377,7 +348,7 @@ class CsvDatasetOpTest(test.TestCase):
 
     ds = readers.make_csv_dataset(
         file_path, batch_size=1, shuffle=False, num_epochs=1)
-    nxt = self._get_next(ds)
+    nxt = self.getNext(ds)
 
     result = list(self.evaluate(nxt()).values())
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
index a2ab3de52e..722e87e555 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
@@ -25,7 +26,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class DatasetConstructorTest(test.TestCase):
+class DatasetConstructorTest(test_base.DatasetTestBase):
 
   def testRestructureDataset(self):
     components = (array_ops.placeholder(dtypes.int32),
diff --git a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py
index eb110324d1..bc10c21472 100644
--- a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py
@@ -20,13 +20,14 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import random_seed
 from tensorflow.python.platform import test
 
 
-class DirectedInterleaveDatasetTest(test.TestCase):
+class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
 
   def testBasic(self):
     selector_dataset = dataset_ops.Dataset.range(10).repeat(100)
diff --git a/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py b/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py
index f3968cdc15..cc22ea1df7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.contrib.data.python.ops import get_single_element
 from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -30,7 +31,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class GetSingleElementTest(test.TestCase, parameterized.TestCase):
+class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("Zero", 0, 1),
diff --git a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
index 46a7127b52..d4d3d4adb2 100644
--- a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import unittest
 
 from tensorflow.contrib.data.python.ops import indexed_dataset_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -28,7 +29,7 @@ from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 from tensorflow.python.platform import test
 
 
-class IndexedDatasetOpsTest(test.TestCase):
+class IndexedDatasetOpsTest(test_base.DatasetTestBase):
 
   def testLowLevelIndexedDatasetOps(self):
     identity = ged_ops.experimental_identity_indexed_dataset(
diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
index b9e74dfddb..28bd670ab5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
@@ -25,6 +25,7 @@ import time
 from six.moves import zip_longest
 
 from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -36,7 +37,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-class ParallelInterleaveDatasetTest(test.TestCase):
+class ParallelInterleaveDatasetTest(test_base.DatasetTestBase):
 
   def setUp(self):
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
index 7e2326bd17..58a1d7c93b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.data.python.ops import iterator_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn
@@ -33,7 +34,7 @@ from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training import training_util
 
 
-class CheckpointInputPipelineHookTest(test.TestCase):
+class CheckpointInputPipelineHookTest(test_base.DatasetTestBase):
 
   @staticmethod
   def _model_fn(features, labels, mode, config):
diff --git a/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py
index 1cc5ddc9a2..d2a72272db 100644
--- a/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py
@@ -22,6 +22,7 @@ import os
 import shutil
 
 from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -31,7 +32,7 @@ from tensorflow.python.util import compat
 prefix_path = "tensorflow/core/lib"
 
 
-class LMDBDatasetTest(test.TestCase):
+class LMDBDatasetTest(test_base.DatasetTestBase):
 
   def setUp(self):
     super(LMDBDatasetTest, self).setUp()
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
index e8519381d6..385c4ef6ea 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
@@ -29,6 +29,7 @@ from tensorflow.contrib.data.python.ops import error_ops
 from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -41,7 +42,7 @@ from tensorflow.python.util import compat
 _NUMPY_RANDOM_SEED = 42
 
 
-class MapDatasetTest(test.TestCase):
+class MapDatasetTest(test_base.DatasetTestBase):
 
   def testMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
index 25aea0393f..751e6d5b30 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
@@ -21,6 +21,7 @@ import time
 
 from tensorflow.contrib.data.python.ops import map_defun
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -33,7 +34,8 @@ from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
-class MapDefunTest(test.TestCase):
+
+class MapDefunTest(test_base.DatasetTestBase):
 
   def testMapDefunSimple(self):
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
index 1ae92bdeff..d7b5edcd9a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
@@ -15,6 +15,7 @@ py_test(
         "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -31,6 +32,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -57,7 +59,6 @@ py_test(
     srcs = ["map_vectorization_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/kernel_tests:test_utils",
         "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:client_testlib",
@@ -67,6 +68,7 @@ py_test(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:session",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
@@ -85,6 +87,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -102,6 +105,7 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -121,6 +125,7 @@ py_test(
         "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -137,6 +142,7 @@ py_test(
         "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -151,6 +157,7 @@ py_test(
         "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
index d10da80442..fe1b5280ba 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
@@ -18,12 +18,13 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
 
 
-class AssertNextDatasetTest(test.TestCase):
+class AssertNextDatasetTest(test_base.DatasetTestBase):
 
   def testAssertNext(self):
     dataset = dataset_ops.Dataset.from_tensors(0).apply(
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
index 9518c2e1ad..b43efb5c7c 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -31,7 +32,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 
 
-class HoistRandomUniformTest(test.TestCase, parameterized.TestCase):
+class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @staticmethod
   def map_functions():
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
index e75edf6086..e9e3fc81e5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -28,7 +29,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class MapAndFilterFusionTest(test.TestCase, parameterized.TestCase):
+class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @staticmethod
   def map_functions():
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
index dd547db086..f7907eb890 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -30,7 +31,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 
 
-class MapParallelizationTest(test.TestCase, parameterized.TestCase):
+class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @staticmethod
   def map_functions():
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
index 5b493f44c9..a5ea85f454 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
@@ -22,9 +22,9 @@ import time
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests import test_utils
 from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -36,7 +36,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase):
+class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _get_test_datasets(self,
                          base_dataset,
@@ -85,7 +85,7 @@ class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase):
                                                            [3, 4]]).repeat(5)
     unoptimized, optimized = self._get_test_datasets(base_dataset, map_fn,
                                                      num_parallel_calls)
-    self._assert_datasets_equal(unoptimized, optimized)
+    self.assertDatasetsEqual(unoptimized, optimized)
 
   def testOptimizationBadMapFn(self):
     # Test map functions that give an error
@@ -112,7 +112,7 @@ class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase):
     # TODO(rachelim): when this optimization works, turn on expect_optimized
     unoptimized, optimized = self._get_test_datasets(
         base_dataset, map_fn, expect_optimized=False)
-    self._assert_datasets_equal(optimized, unoptimized)
+    self.assertDatasetsEqual(optimized, unoptimized)
 
   def testOptimizationIgnoreStateful(self):
 
@@ -124,7 +124,7 @@ class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase):
                                                            [3, 4]]).repeat(5)
     unoptimized, optimized = self._get_test_datasets(
         base_dataset, map_fn, expect_optimized=False)
-    self._assert_datasets_raise_same_error(
+    self.assertDatasetsRaiseSameError(
         unoptimized, optimized, errors.InvalidArgumentError,
         [("OneShotIterator", "OneShotIterator_1", 1),
          ("IteratorGetNext", "IteratorGetNext_1", 1)])
@@ -138,7 +138,7 @@ class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase):
     base_dataset = dataset_ops.Dataset.range(20).batch(3, drop_remainder=False)
     unoptimized, optimized = self._get_test_datasets(
         base_dataset, map_fn, expect_optimized=False)
-    self._assert_datasets_equal(unoptimized, optimized)
+    self.assertDatasetsEqual(unoptimized, optimized)
 
   def testOptimizationIgnoreRaggedMap(self):
     # Don't optimize when the output of the map fn shapes are unknown.
@@ -148,7 +148,7 @@ class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase):
     base_dataset = dataset_ops.Dataset.range(20).batch(1, drop_remainder=True)
     unoptimized, optimized = self._get_test_datasets(
         base_dataset, map_fn, expect_optimized=False)
-    self._assert_datasets_raise_same_error(
+    self.assertDatasetsRaiseSameError(
         unoptimized, optimized, errors.InvalidArgumentError,
         [("OneShotIterator", "OneShotIterator_1", 1),
          ("IteratorGetNext", "IteratorGetNext_1", 1)])
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
index 3b62a7e468..33c250ab2a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
@@ -23,12 +23,13 @@ import numpy as np
 
 from tensorflow.contrib.data.python.ops import batching
 from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class ModelDatasetTest(test.TestCase):
+class ModelDatasetTest(test_base.DatasetTestBase):
 
   def testModelMap(self):
     k = 1024 * 1024
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
index 507feda3ad..b9e60cfa4e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -26,7 +27,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class NoopEliminationTest(test.TestCase):
+class NoopEliminationTest(test_base.DatasetTestBase):
 
   def testNoopElimination(self):
     a = constant_op.constant(1, dtype=dtypes.int64)
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
index a3fb824ce9..04f499f8c5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -28,7 +29,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 
 
-class OptimizeDatasetTest(test.TestCase):
+class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationDefault(self):
     dataset = dataset_ops.Dataset.range(10).apply(
diff --git a/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py
index c4623bca73..66ccaceea5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.contrib.data.python.ops import parsing_ops as contrib_parsing_ops
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
@@ -72,7 +73,7 @@ def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
     i += 1
 
 
-class ParseExampleTest(test.TestCase):
+class ParseExampleTest(test_base.DatasetTestBase):
 
   def _test(self,
             input_tensor,
diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
index 33a64ea767..7a6a7a709a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
@@ -22,6 +22,7 @@ import threading
 from tensorflow.contrib.data.python.ops import prefetching_ops
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.compat import compat
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
@@ -35,7 +36,7 @@ from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import test
 
 
-class PrefetchingKernelsOpsTest(test.TestCase):
+class PrefetchingKernelsOpsTest(test_base.DatasetTestBase):
 
   def setUp(self):
     self._event = threading.Event()
@@ -244,7 +245,7 @@ class PrefetchingKernelsOpsTest(test.TestCase):
       sess.run(destroy_op)
 
 
-class PrefetchToDeviceTest(test.TestCase):
+class PrefetchToDeviceTest(test_base.DatasetTestBase):
 
   def testPrefetchToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
@@ -445,7 +446,7 @@ class PrefetchToDeviceTest(test.TestCase):
         sess.run(next_element)
 
 
-class CopyToDeviceTest(test.TestCase):
+class CopyToDeviceTest(test_base.DatasetTestBase):
 
   def testCopyToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
index db8fe6aa1b..2e901587f4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 from tensorflow.contrib.data.python.ops import counter
 from tensorflow.contrib.data.python.ops import enumerate_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -27,7 +28,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.platform import test
 
 
-class RangeDatasetTest(test.TestCase):
+class RangeDatasetTest(test_base.DatasetTestBase):
 
   def testEnumerateDataset(self):
     components = (["a", "b"], [1, 2], [37.0, 38])
diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
index ed75b27a44..66ed547b6d 100644
--- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
@@ -25,6 +25,7 @@ import numpy as np
 
 from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import constant_op
@@ -242,7 +243,7 @@ class ReadBatchFeaturesTest(
         self.assertEqual(32, shape[0])
 
 
-class MakeCsvDatasetTest(test.TestCase):
+class MakeCsvDatasetTest(test_base.DatasetTestBase):
 
   def _make_csv_dataset(self, filenames, batch_size, num_epochs=1, **kwargs):
     return readers.make_csv_dataset(
diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py
index 08b9f03816..f443b5501b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py
@@ -25,6 +25,7 @@ import zlib
 from tensorflow.contrib.data.python.ops import readers
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.framework import constant_op
@@ -32,11 +33,10 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class FixedLengthRecordDatasetTestBase(test.TestCase):
+class FixedLengthRecordDatasetTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing FixedLengthRecordDataset."""
 
   def setUp(self):
@@ -63,7 +63,7 @@ class FixedLengthRecordDatasetTestBase(test.TestCase):
     return filenames
 
 
-class ReadBatchFeaturesTestBase(test.TestCase):
+class ReadBatchFeaturesTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing `make_batched_feature_dataset`."""
 
   def setUp(self):
@@ -273,7 +273,7 @@ class ReadBatchFeaturesTestBase(test.TestCase):
         self.assertAllEqual(expected_batch[i], actual_batch[i])
 
 
-class TextLineDatasetTestBase(test.TestCase):
+class TextLineDatasetTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing TextLineDataset."""
 
   def _lineText(self, f, l):
@@ -313,7 +313,7 @@ class TextLineDatasetTestBase(test.TestCase):
     return filenames
 
 
-class TFRecordDatasetTestBase(test.TestCase):
+class TFRecordDatasetTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing TFRecordDataset."""
 
   def setUp(self):
diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py
index 16b1441baa..32474bd411 100644
--- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py
@@ -24,6 +24,7 @@ import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.contrib.data.python.ops import resampling
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -57,7 +58,7 @@ def _time_resampling(
   return end_time - start_time
 
 
-class ResampleTest(test.TestCase, parameterized.TestCase):
+class ResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("InitialDistributionKnown", True),
diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
index dde678bd54..bdf80eae4e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
@@ -22,6 +22,7 @@ import itertools
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import scan_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
@@ -33,7 +34,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ScanDatasetTest(test.TestCase):
+class ScanDatasetTest(test_base.DatasetTestBase):
 
   def _counting_dataset(self, start, scan_fn):
     return dataset_ops.Dataset.from_tensors(0).repeat().apply(
diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
index 440e48db30..c97002a255 100644
--- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
@@ -20,13 +20,14 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import shuffle_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
-class ShuffleAndRepeatTest(test.TestCase):
+class ShuffleAndRepeatTest(test_base.DatasetTestBase):
 
   def _build_ds(self, seed, count=5, num_elements=20):
     return dataset_ops.Dataset.range(num_elements).apply(
diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py
index 90d18dca2a..c5a7862322 100644
--- a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py
@@ -21,6 +21,7 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import sliding
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -30,7 +31,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class SlideDatasetTest(test.TestCase, parameterized.TestCase):
+class SlideDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("1", 20, 14, 7, 1),
@@ -197,11 +198,6 @@ class SlideDatasetTest(test.TestCase, parameterized.TestCase):
           sliding.sliding_window_batch(
               window_size=1, stride=1, window_shift=1, window_stride=1))
 
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
   def testSlideSparse(self):
 
     def _sparse(i):
diff --git a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py b/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py
index 1f5c725a92..319a2ea263 100644
--- a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py
@@ -24,12 +24,13 @@ import os
 import sqlite3
 
 from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class SqlDatasetTestBase(test.TestCase):
+class SqlDatasetTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing SqlDataset."""
 
   def _createSqlDataset(self, output_types, num_repeats=1):
@@ -92,5 +93,3 @@ class SqlDatasetTestBase(test.TestCase):
           9007199254740992.0)])
     conn.commit()
     conn.close()
-
-
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
index b1b4c23510..80f2625927 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
@@ -19,10 +19,10 @@ from __future__ import print_function
 
 
 from tensorflow.core.framework import summary_pb2
-from tensorflow.python.platform import test
+from tensorflow.python.data.kernel_tests import test_base
 
 
-class StatsDatasetTestBase(test.TestCase):
+class StatsDatasetTestBase(test_base.DatasetTestBase):
   """Base class for testing statistics gathered in `StatsAggregator`."""
 
   def _assertSummaryContains(self, summary_str, tag):
diff --git a/tensorflow/contrib/data/python/kernel_tests/test_utils.py b/tensorflow/contrib/data/python/kernel_tests/test_utils.py
deleted file mode 100644
index 4c3353fe40..0000000000
--- a/tensorflow/contrib/data/python/kernel_tests/test_utils.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Test utilities for tf.data functionality."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import re
-
-from tensorflow.python.data.util import nest
-from tensorflow.python.framework import errors
-from tensorflow.python.platform import test
-
-
-class DatasetTestBase(test.TestCase):
-  """Base class for dataset tests."""
-
-  def _assert_datasets_equal(self, dataset1, dataset2):
-    # TODO(rachelim): support sparse tensor outputs
-    next1 = dataset1.make_one_shot_iterator().get_next()
-    next2 = dataset2.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      while True:
-        try:
-          op1 = sess.run(next1)
-        except errors.OutOfRangeError:
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(next2)
-          break
-        op2 = sess.run(next2)
-
-        op1 = nest.flatten(op1)
-        op2 = nest.flatten(op2)
-        assert len(op1) == len(op2)
-        for i in range(len(op1)):
-          self.assertAllEqual(op1[i], op2[i])
-
-  def _assert_datasets_raise_same_error(self,
-                                        dataset1,
-                                        dataset2,
-                                        exception_class,
-                                        replacements=None):
-    # We are defining next1 and next2 in the same line so that we get identical
-    # file:line_number in the error messages
-    # pylint: disable=line-too-long
-    next1, next2 = dataset1.make_one_shot_iterator().get_next(), dataset2.make_one_shot_iterator().get_next()
-    # pylint: enable=line-too-long
-    with self.cached_session() as sess:
-      try:
-        sess.run(next1)
-        raise ValueError(
-            "Expected dataset to raise an error of type %s, but it did not." %
-            repr(exception_class))
-      except exception_class as e:
-        expected_message = e.message
-        for old, new, count in replacements:
-          expected_message = expected_message.replace(old, new, count)
-        # Check that the first segment of the error messages are the same.
-        with self.assertRaisesRegexp(exception_class,
-                                     re.escape(expected_message)):
-          sess.run(next2)
diff --git a/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py
index 8d335e87d5..08de3a9143 100644
--- a/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py
@@ -24,6 +24,7 @@ import numpy as np
 
 from tensorflow.contrib.data.python.ops import threadpool
 from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -31,7 +32,8 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-class OverrideThreadpoolDatasetTest(test.TestCase, parameterized.TestCase):
+class OverrideThreadpoolDatasetTest(test_base.DatasetTestBase,
+                                    parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("1", 1, None),
diff --git a/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
index f994c8563f..8856ce5afb 100644
--- a/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -25,7 +26,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class UniqueDatasetTest(test.TestCase):
+class UniqueDatasetTest(test_base.DatasetTestBase):
 
   def _testSimpleHelper(self, dtype, test_cases):
     """Test the `unique()` transformation on a list of test cases.
diff --git a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
index 8b7b3ac0f7..79134c7bc6 100644
--- a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.contrib.data.python.ops import batching
 from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -31,7 +32,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-class WindowDatasetTest(test.TestCase, parameterized.TestCase):
+class WindowDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _structuredDataset(self, structure, shape, dtype):
     if structure is None:
diff --git a/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py
index 867ee2ba37..fca546a570 100644
--- a/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import os
 
 from tensorflow.contrib.data.python.ops import writers
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
 from tensorflow.python.framework import dtypes
@@ -30,7 +31,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class TFRecordWriterTest(test.TestCase):
+class TFRecordWriterTest(test_base.DatasetTestBase):
 
   def setUp(self):
     super(TFRecordWriterTest, self).setUp()
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 5f9818566f..cadfe7f9e0 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -471,6 +471,9 @@ py_library(
     srcs = ["test_base.py"],
     deps = [
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/util:nest",
     ],
 )
 
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index b4f64115b7..b730e10949 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -17,6 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import re
+
+from tensorflow.python.data.util import nest
+from tensorflow.python.eager import context
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.platform import test
 
 
@@ -24,6 +30,80 @@ class DatasetTestBase(test.TestCase):
   """Base class for dataset tests."""
 
   def assertSparseValuesEqual(self, a, b):
+    """Asserts that two SparseTensors/SparseTensorValues are equal."""
     self.assertAllEqual(a.indices, b.indices)
     self.assertAllEqual(a.values, b.values)
     self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+  def getNext(self, dataset):
+    """Returns a callable that returns the next element of the dataset.
+
+    Example use:
+    ```python
+    # In both graph and eager modes
+    dataset = ...
+    nxt = self.getNext(dataset)
+    result = self.evaluate(nxt())
+    ```
+
+    Args:
+      dataset: A dataset whose next element is returned
+
+    Returns:
+      A callable that returns the next element of `dataset`
+    """
+    it = dataset.make_one_shot_iterator()
+    if context.executing_eagerly():
+      return it.get_next
+    else:
+      nxt = it.get_next()
+      return lambda: nxt
+
+  def assertDatasetsEqual(self, dataset1, dataset2):
+    """Checks that datasets are equal. Supports both graph and eager mode."""
+    self.assertEqual(dataset1.output_types, dataset2.output_types)
+    self.assertEqual(dataset1.output_classes, dataset2.output_classes)
+
+    next1 = self.getNext(dataset1)
+    next2 = self.getNext(dataset2)
+    while True:
+      try:
+        op1 = self.evaluate(next1())
+      except errors.OutOfRangeError:
+        with self.assertRaises(errors.OutOfRangeError):
+          self.evaluate(next2())
+        break
+      op2 = self.evaluate(next2())
+
+      op1 = nest.flatten(op1)
+      op2 = nest.flatten(op2)
+      assert len(op1) == len(op2)
+      for i in range(len(op1)):
+        if isinstance(
+            op1[i],
+            (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
+          self.assertSparseValuesEqual(op1[i], op2[i])
+        else:
+          self.assertAllEqual(op1[i], op2[i])
+
+  def assertDatasetsRaiseSameError(self,
+                                   dataset1,
+                                   dataset2,
+                                   exception_class,
+                                   replacements=None):
+    """Checks that datasets raise the same error on the first get_next call."""
+    next1 = self.getNext(dataset1)
+    next2 = self.getNext(dataset2)
+    try:
+      self.evaluate(next1())
+      raise ValueError(
+          'Expected dataset to raise an error of type %s, but it did not.' %
+          repr(exception_class))
+    except exception_class as e:
+      expected_message = e.message
+      for old, new, count in replacements:
+        expected_message = expected_message.replace(old, new, count)
+      # Check that the first segment of the error messages are the same.
+      with self.assertRaisesRegexp(exception_class,
+                                   re.escape(expected_message)):
+        self.evaluate(next2())
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 7d925a8fef..c621812535 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -66,7 +66,6 @@ COMMON_PIP_DEPS = [
     "//tensorflow/contrib/constrained_optimization:constrained_optimization_pip",
     "//tensorflow/contrib/data/python/kernel_tests/serialization:dataset_serialization_test_base",
     "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base",
-    "//tensorflow/contrib/data/python/kernel_tests:test_utils",
     "//tensorflow/contrib/eager/python/examples:examples_pip",
     "//tensorflow/contrib/eager/python:evaluator",
     "//tensorflow/contrib/gan:gan",
-- 
GitLab


From e4eeda33ca1d4a08ae2be7400f71b218fba25ccc Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 28 Sep 2018 15:27:40 -0700
Subject: [PATCH 0885/1357] Internal change.

PiperOrigin-RevId: 215005698
---
 ...nsorflow.-config-proto.-experimental.pbtxt |  24 --
 .../api/golden/tensorflow.-config-proto.pbtxt | 148 ---------
 .../golden/tensorflow.data.-iterator.pbtxt    |  46 ---
 ....estimator.-boosted-trees-classifier.pbtxt |  58 ----
 ...w.estimator.-boosted-trees-regressor.pbtxt |  58 ----
 .../tensorflow.estimator.-run-config.pbtxt    | 105 -------
 .../tools/api/golden/tensorflow.image.pbtxt   | 251 ---------------
 .../api/golden/tensorflow.keras.-model.pbtxt  | 268 ----------------
 .../golden/tensorflow.keras.-sequential.pbtxt | 289 ------------------
 .../golden/tensorflow.keras.activations.pbtxt |  55 ----
 .../tensorflow.keras.models.-model.pbtxt      | 268 ----------------
 .../tensorflow.keras.models.-sequential.pbtxt | 289 ------------------
 12 files changed, 1859 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.image.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.activations.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt

diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt
deleted file mode 100644
index eb41deee13..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt
+++ /dev/null
@@ -1,24 +0,0 @@
-path: "tensorflow.ConfigProto.Experimental"
-tf_proto {
-  descriptor {
-    name: "Experimental"
-    field {
-      name: "collective_group_leader"
-      number: 1
-      label: LABEL_OPTIONAL
-      type: TYPE_STRING
-    }
-    field {
-      name: "client_handles_error_formatting"
-      number: 2
-      label: LABEL_OPTIONAL
-      type: TYPE_BOOL
-    }
-    field {
-      name: "executor_type"
-      number: 3
-      label: LABEL_OPTIONAL
-      type: TYPE_STRING
-    }
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt
deleted file mode 100644
index e565b903d2..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt
+++ /dev/null
@@ -1,148 +0,0 @@
-path: "tensorflow.ConfigProto"
-tf_proto {
-  descriptor {
-    name: "ConfigProto"
-    field {
-      name: "device_count"
-      number: 1
-      label: LABEL_REPEATED
-      type: TYPE_MESSAGE
-      type_name: ".tensorflow.ConfigProto.DeviceCountEntry"
-    }
-    field {
-      name: "intra_op_parallelism_threads"
-      number: 2
-      label: LABEL_OPTIONAL
-      type: TYPE_INT32
-    }
-    field {
-      name: "inter_op_parallelism_threads"
-      number: 5
-      label: LABEL_OPTIONAL
-      type: TYPE_INT32
-    }
-    field {
-      name: "use_per_session_threads"
-      number: 9
-      label: LABEL_OPTIONAL
-      type: TYPE_BOOL
-    }
-    field {
-      name: "session_inter_op_thread_pool"
-      number: 12
-      label: LABEL_REPEATED
-      type: TYPE_MESSAGE
-      type_name: ".tensorflow.ThreadPoolOptionProto"
-    }
-    field {
-      name: "placement_period"
-      number: 3
-      label: LABEL_OPTIONAL
-      type: TYPE_INT32
-    }
-    field {
-      name: "device_filters"
-      number: 4
-      label: LABEL_REPEATED
-      type: TYPE_STRING
-    }
-    field {
-      name: "gpu_options"
-      number: 6
-      label: LABEL_OPTIONAL
-      type: TYPE_MESSAGE
-      type_name: ".tensorflow.GPUOptions"
-    }
-    field {
-      name: "allow_soft_placement"
-      number: 7
-      label: LABEL_OPTIONAL
-      type: TYPE_BOOL
-    }
-    field {
-      name: "log_device_placement"
-      number: 8
-      label: LABEL_OPTIONAL
-      type: TYPE_BOOL
-    }
-    field {
-      name: "graph_options"
-      number: 10
-      label: LABEL_OPTIONAL
-      type: TYPE_MESSAGE
-      type_name: ".tensorflow.GraphOptions"
-    }
-    field {
-      name: "operation_timeout_in_ms"
-      number: 11
-      label: LABEL_OPTIONAL
-      type: TYPE_INT64
-    }
-    field {
-      name: "rpc_options"
-      number: 13
-      label: LABEL_OPTIONAL
-      type: TYPE_MESSAGE
-      type_name: ".tensorflow.RPCOptions"
-    }
-    field {
-      name: "cluster_def"
-      number: 14
-      label: LABEL_OPTIONAL
-      type: TYPE_MESSAGE
-      type_name: ".tensorflow.ClusterDef"
-    }
-    field {
-      name: "isolate_session_state"
-      number: 15
-      label: LABEL_OPTIONAL
-      type: TYPE_BOOL
-    }
-    field {
-      name: "experimental"
-      number: 16
-      label: LABEL_OPTIONAL
-      type: TYPE_MESSAGE
-      type_name: ".tensorflow.ConfigProto.Experimental"
-    }
-    nested_type {
-      name: "DeviceCountEntry"
-      field {
-        name: "key"
-        number: 1
-        label: LABEL_OPTIONAL
-        type: TYPE_STRING
-      }
-      field {
-        name: "value"
-        number: 2
-        label: LABEL_OPTIONAL
-        type: TYPE_INT32
-      }
-      options {
-        map_entry: true
-      }
-    }
-    nested_type {
-      name: "Experimental"
-      field {
-        name: "collective_group_leader"
-        number: 1
-        label: LABEL_OPTIONAL
-        type: TYPE_STRING
-      }
-      field {
-        name: "client_handles_error_formatting"
-        number: 2
-        label: LABEL_OPTIONAL
-        type: TYPE_BOOL
-      }
-      field {
-        name: "executor_type"
-        number: 3
-        label: LABEL_OPTIONAL
-        type: TYPE_STRING
-      }
-    }
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt
deleted file mode 100644
index 4f0147a523..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt
+++ /dev/null
@@ -1,46 +0,0 @@
-path: "tensorflow.data.Iterator"
-tf_class {
-  is_instance: "<class \'tensorflow.python.data.ops.iterator_ops.Iterator\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "initializer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_classes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shapes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_types"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'iterator_resource\', \'initializer\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_string_handle"
-    argspec: "args=[\'string_handle\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "from_structure"
-    argspec: "args=[\'output_types\', \'output_shapes\', \'shared_name\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_next"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "make_initializer"
-    argspec: "args=[\'self\', \'dataset\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "string_handle"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt
deleted file mode 100644
index c23b04b4ef..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ /dev/null
@@ -1,58 +0,0 @@
-path: "tensorflow.estimator.BoostedTreesClassifier"
-tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "config"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "model_dir"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "model_fn"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "params"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'min_node_weight\', \'config\', \'center_bias\', \'pruning_mode\'], varargs=None, keywords=None, defaults=[\'None\', \'<object object instance>\', \'None\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\', \'False\', \'none\'], "
-  }
-  member_method {
-    name: "eval_dir"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "evaluate"
-    argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
-  }
-  member_method {
-    name: "get_variable_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_variable_value"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "latest_checkpoint"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict"
-    argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], "
-  }
-  member_method {
-    name: "train"
-    argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt
deleted file mode 100644
index 6878d28fff..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ /dev/null
@@ -1,58 +0,0 @@
-path: "tensorflow.estimator.BoostedTreesRegressor"
-tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "config"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "model_dir"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "model_fn"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "params"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'label_dimension\', \'weight_column\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'min_node_weight\', \'config\', \'center_bias\', \'pruning_mode\'], varargs=None, keywords=None, defaults=[\'None\', \'<object object instance>\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\', \'False\', \'none\'], "
-  }
-  member_method {
-    name: "eval_dir"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "evaluate"
-    argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
-  }
-  member_method {
-    name: "get_variable_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_variable_value"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "latest_checkpoint"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict"
-    argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], "
-  }
-  member_method {
-    name: "train"
-    argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
deleted file mode 100644
index bf1f94b6ae..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
+++ /dev/null
@@ -1,105 +0,0 @@
-path: "tensorflow.estimator.RunConfig"
-tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.run_config.RunConfig\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "cluster_spec"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "device_fn"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "eval_distribute"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "evaluation_master"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "global_id_in_cluster"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "is_chief"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "keep_checkpoint_every_n_hours"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "keep_checkpoint_max"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "log_step_count_steps"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "master"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "model_dir"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "num_ps_replicas"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "num_worker_replicas"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "protocol"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "save_checkpoints_secs"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "save_checkpoints_steps"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "save_summary_steps"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "service"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "session_config"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "task_id"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "task_type"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "tf_random_seed"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "train_distribute"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'train_distribute\', \'device_fn\', \'protocol\', \'eval_distribute\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'<object object instance>\', \'<object object instance>\', \'None\', \'5\', \'10000\', \'100\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "replace"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt
deleted file mode 100644
index 5c46dc5ee7..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt
+++ /dev/null
@@ -1,251 +0,0 @@
-path: "tensorflow.image"
-tf_module {
-  member {
-    name: "ResizeMethod"
-    mtype: "<type \'type\'>"
-  }
-  member_method {
-    name: "adjust_brightness"
-    argspec: "args=[\'image\', \'delta\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "adjust_contrast"
-    argspec: "args=[\'images\', \'contrast_factor\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "adjust_gamma"
-    argspec: "args=[\'image\', \'gamma\', \'gain\'], varargs=None, keywords=None, defaults=[\'1\', \'1\'], "
-  }
-  member_method {
-    name: "adjust_hue"
-    argspec: "args=[\'image\', \'delta\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "adjust_jpeg_quality"
-    argspec: "args=[\'image\', \'jpeg_quality\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "adjust_saturation"
-    argspec: "args=[\'image\', \'saturation_factor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "central_crop"
-    argspec: "args=[\'image\', \'central_fraction\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "convert_image_dtype"
-    argspec: "args=[\'image\', \'dtype\', \'saturate\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "crop_and_resize"
-    argspec: "args=[\'image\', \'boxes\', \'box_ind\', \'crop_size\', \'method\', \'extrapolation_value\', \'name\'], varargs=None, keywords=None, defaults=[\'bilinear\', \'0\', \'None\'], "
-  }
-  member_method {
-    name: "crop_to_bounding_box"
-    argspec: "args=[\'image\', \'offset_height\', \'offset_width\', \'target_height\', \'target_width\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "decode_and_crop_jpeg"
-    argspec: "args=[\'contents\', \'crop_window\', \'channels\', \'ratio\', \'fancy_upscaling\', \'try_recover_truncated\', \'acceptable_fraction\', \'dct_method\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'1\', \'True\', \'False\', \'1\', \'\', \'None\'], "
-  }
-  member_method {
-    name: "decode_bmp"
-    argspec: "args=[\'contents\', \'channels\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
-  }
-  member_method {
-    name: "decode_gif"
-    argspec: "args=[\'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "decode_image"
-    argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'uint8\'>\", \'None\'], "
-  }
-  member_method {
-    name: "decode_jpeg"
-    argspec: "args=[\'contents\', \'channels\', \'ratio\', \'fancy_upscaling\', \'try_recover_truncated\', \'acceptable_fraction\', \'dct_method\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'1\', \'True\', \'False\', \'1\', \'\', \'None\'], "
-  }
-  member_method {
-    name: "decode_png"
-    argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \"<dtype: \'uint8\'>\", \'None\'], "
-  }
-  member_method {
-    name: "draw_bounding_boxes"
-    argspec: "args=[\'images\', \'boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "encode_jpeg"
-    argspec: "args=[\'image\', \'format\', \'quality\', \'progressive\', \'optimize_size\', \'chroma_downsampling\', \'density_unit\', \'x_density\', \'y_density\', \'xmp_metadata\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'95\', \'False\', \'False\', \'True\', \'in\', \'300\', \'300\', \'\', \'None\'], "
-  }
-  member_method {
-    name: "encode_png"
-    argspec: "args=[\'image\', \'compression\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
-  }
-  member_method {
-    name: "extract_glimpse"
-    argspec: "args=[\'input\', \'size\', \'offsets\', \'centered\', \'normalized\', \'uniform_noise\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "extract_image_patches"
-    argspec: "args=[\'images\', \'ksizes\', \'strides\', \'rates\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "extract_jpeg_shape"
-    argspec: "args=[\'contents\', \'output_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
-  }
-  member_method {
-    name: "flip_left_right"
-    argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "flip_up_down"
-    argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "grayscale_to_rgb"
-    argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "hsv_to_rgb"
-    argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "image_gradients"
-    argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "is_jpeg"
-    argspec: "args=[\'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "non_max_suppression"
-    argspec: "args=[\'boxes\', \'scores\', \'max_output_size\', \'iou_threshold\', \'score_threshold\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'-inf\', \'None\'], "
-  }
-  member_method {
-    name: "non_max_suppression_overlaps"
-    argspec: "args=[\'overlaps\', \'scores\', \'max_output_size\', \'overlap_threshold\', \'score_threshold\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'-inf\', \'None\'], "
-  }
-  member_method {
-    name: "non_max_suppression_padded"
-    argspec: "args=[\'boxes\', \'scores\', \'max_output_size\', \'iou_threshold\', \'score_threshold\', \'pad_to_max_output_size\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'-inf\', \'False\', \'None\'], "
-  }
-  member_method {
-    name: "pad_to_bounding_box"
-    argspec: "args=[\'image\', \'offset_height\', \'offset_width\', \'target_height\', \'target_width\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "per_image_standardization"
-    argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "psnr"
-    argspec: "args=[\'a\', \'b\', \'max_val\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "random_brightness"
-    argspec: "args=[\'image\', \'max_delta\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "random_contrast"
-    argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "random_flip_left_right"
-    argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "random_flip_up_down"
-    argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "random_hue"
-    argspec: "args=[\'image\', \'max_delta\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "random_jpeg_quality"
-    argspec: "args=[\'image\', \'min_jpeg_quality\', \'max_jpeg_quality\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "random_saturation"
-    argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "resize_area"
-    argspec: "args=[\'images\', \'size\', \'align_corners\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "resize_bicubic"
-    argspec: "args=[\'images\', \'size\', \'align_corners\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "resize_bilinear"
-    argspec: "args=[\'images\', \'size\', \'align_corners\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "resize_image_with_crop_or_pad"
-    argspec: "args=[\'image\', \'target_height\', \'target_width\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "resize_image_with_pad"
-    argspec: "args=[\'image\', \'target_height\', \'target_width\', \'method\'], varargs=None, keywords=None, defaults=[\'0\'], "
-  }
-  member_method {
-    name: "resize_images"
-    argspec: "args=[\'images\', \'size\', \'method\', \'align_corners\', \'preserve_aspect_ratio\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\'], "
-  }
-  member_method {
-    name: "resize_nearest_neighbor"
-    argspec: "args=[\'images\', \'size\', \'align_corners\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "rgb_to_grayscale"
-    argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "rgb_to_hsv"
-    argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "rgb_to_yiq"
-    argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "rgb_to_yuv"
-    argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "rot90"
-    argspec: "args=[\'image\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
-  }
-  member_method {
-    name: "sample_distorted_bounding_box"
-    argspec: "args=[\'image_size\', \'bounding_boxes\', \'seed\', \'seed2\', \'min_object_covered\', \'aspect_ratio_range\', \'area_range\', \'max_attempts\', \'use_image_if_no_bounding_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'0.1\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "sobel_edges"
-    argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "ssim"
-    argspec: "args=[\'img1\', \'img2\', \'max_val\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "ssim_multiscale"
-    argspec: "args=[\'img1\', \'img2\', \'max_val\', \'power_factors\'], varargs=None, keywords=None, defaults=[\'(0.0448, 0.2856, 0.3001, 0.2363, 0.1333)\'], "
-  }
-  member_method {
-    name: "total_variation"
-    argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "transpose_image"
-    argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "yiq_to_rgb"
-    argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "yuv_to_rgb"
-    argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
deleted file mode 100644
index e579fe6a1a..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
+++ /dev/null
@@ -1,268 +0,0 @@
-path: "tensorflow.keras.Model"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_spec"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "layers"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "state_updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "stateful"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "uses_learning_phase"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "fit"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_layer"
-    argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "load_weights"
-    argspec: "args=[\'self\', \'filepath\', \'by_name\'], varargs=None, keywords=None, defaults=[\'False\'], "
-  }
-  member_method {
-    name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
-  }
-  member_method {
-    name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "predict_on_batch"
-    argspec: "args=[\'self\', \'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "save"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\'], varargs=None, keywords=None, defaults=[\'True\', \'True\'], "
-  }
-  member_method {
-    name: "save_weights"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "summary"
-    argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "test_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "to_json"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "to_yaml"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "train_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
deleted file mode 100644
index 6f05cdd093..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
+++ /dev/null
@@ -1,289 +0,0 @@
-path: "tensorflow.keras.Sequential"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.engine.sequential.Sequential\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_spec"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "layers"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "state_updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "stateful"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "uses_learning_phase"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'layers\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add"
-    argspec: "args=[\'self\', \'layer\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "fit"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_layer"
-    argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "load_weights"
-    argspec: "args=[\'self\', \'filepath\', \'by_name\'], varargs=None, keywords=None, defaults=[\'False\'], "
-  }
-  member_method {
-    name: "pop"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
-  }
-  member_method {
-    name: "predict_classes"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], "
-  }
-  member_method {
-    name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "predict_on_batch"
-    argspec: "args=[\'self\', \'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict_proba"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], "
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "save"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\'], varargs=None, keywords=None, defaults=[\'True\', \'True\'], "
-  }
-  member_method {
-    name: "save_weights"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "summary"
-    argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "symbolic_set_inputs"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "test_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "to_json"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "to_yaml"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "train_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.activations.pbtxt
deleted file mode 100644
index 2e9de9ebb2..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.keras.activations.pbtxt
+++ /dev/null
@@ -1,55 +0,0 @@
-path: "tensorflow.keras.activations"
-tf_module {
-  member_method {
-    name: "deserialize"
-    argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "elu"
-    argspec: "args=[\'x\', \'alpha\'], varargs=None, keywords=None, defaults=[\'1.0\'], "
-  }
-  member_method {
-    name: "get"
-    argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "hard_sigmoid"
-    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "linear"
-    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "relu"
-    argspec: "args=[\'x\', \'alpha\', \'max_value\', \'threshold\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\', \'0\'], "
-  }
-  member_method {
-    name: "selu"
-    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "serialize"
-    argspec: "args=[\'activation\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "sigmoid"
-    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "softmax"
-    argspec: "args=[\'x\', \'axis\'], varargs=None, keywords=None, defaults=[\'-1\'], "
-  }
-  member_method {
-    name: "softplus"
-    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "softsign"
-    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "tanh"
-    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
deleted file mode 100644
index 56914e1746..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
+++ /dev/null
@@ -1,268 +0,0 @@
-path: "tensorflow.keras.models.Model"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_spec"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "layers"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "state_updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "stateful"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "uses_learning_phase"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "fit"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_layer"
-    argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "load_weights"
-    argspec: "args=[\'self\', \'filepath\', \'by_name\'], varargs=None, keywords=None, defaults=[\'False\'], "
-  }
-  member_method {
-    name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
-  }
-  member_method {
-    name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "predict_on_batch"
-    argspec: "args=[\'self\', \'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "save"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\'], varargs=None, keywords=None, defaults=[\'True\', \'True\'], "
-  }
-  member_method {
-    name: "save_weights"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "summary"
-    argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "test_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "to_json"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "to_yaml"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "train_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
deleted file mode 100644
index 4c1c54001d..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
+++ /dev/null
@@ -1,289 +0,0 @@
-path: "tensorflow.keras.models.Sequential"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.engine.sequential.Sequential\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_spec"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "layers"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "state_updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "stateful"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "uses_learning_phase"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'layers\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add"
-    argspec: "args=[\'self\', \'layer\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'training\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "fit"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_layer"
-    argspec: "args=[\'self\', \'name\', \'index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "load_weights"
-    argspec: "args=[\'self\', \'filepath\', \'by_name\'], varargs=None, keywords=None, defaults=[\'False\'], "
-  }
-  member_method {
-    name: "pop"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
-  }
-  member_method {
-    name: "predict_classes"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], "
-  }
-  member_method {
-    name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
-  }
-  member_method {
-    name: "predict_on_batch"
-    argspec: "args=[\'self\', \'x\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict_proba"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], "
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "save"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'include_optimizer\'], varargs=None, keywords=None, defaults=[\'True\', \'True\'], "
-  }
-  member_method {
-    name: "save_weights"
-    argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "summary"
-    argspec: "args=[\'self\', \'line_length\', \'positions\', \'print_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "symbolic_set_inputs"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "test_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "to_json"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "to_yaml"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "train_on_batch"
-    argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-}
-- 
GitLab


From f5086804c758812ec9ed67233c58e18236246299 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 28 Sep 2018 15:48:20 -0700
Subject: [PATCH 0886/1357] Add documentation of the ownership semantics to
 {Lookup,Create,LookupOrCreate}Resource().

PiperOrigin-RevId: 215008650
---
 tensorflow/core/framework/resource_mgr.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h
index abb6635984..4a531648d9 100644
--- a/tensorflow/core/framework/resource_mgr.h
+++ b/tensorflow/core/framework/resource_mgr.h
@@ -248,10 +248,16 @@ Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
                        ResourceHandle* handle);
 
 // Create a resource pointed by a given resource handle.
+//
+// If successful, the caller transfers the ownership of one ref on `resource` to
+// `ctx->resource_mgr()`.
 template <typename T>
 Status CreateResource(OpKernelContext* ctx, const ResourceHandle& p, T* value);
 
 // Looks up a resource pointed by a given resource handle.
+//
+// If the lookup is successful, the caller takes the ownership of one ref on
+// `*value`, and must call its `Unref()` method when it has finished using it.
 template <typename T>
 Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p, T** value);
 
@@ -262,6 +268,11 @@ Status LookupResources(
     std::vector<std::unique_ptr<T, core::RefCountDeleter>>* values);
 
 // Looks up or creates a resource.
+//
+// If successful, the caller takes the ownership of one ref on `*value`, and
+// must call its `Unref()` method when it has finished using it. If the
+// `creator` is invoked, its reference on the created resource is transferred
+// to `ctx->resource_mgr()`.
 template <typename T>
 Status LookupOrCreateResource(OpKernelContext* ctx, const ResourceHandle& p,
                               T** value, std::function<Status(T**)> creator);
-- 
GitLab


From 3f4423fad57694bc8d7adc427d65e5a18c8592b2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 15:58:19 -0700
Subject: [PATCH 0887/1357] Internal changes only.

PiperOrigin-RevId: 215009955
---
 .../contrib/tpu/ops/tpu_embedding_ops.cc      | 42 +++----------------
 1 file changed, 6 insertions(+), 36 deletions(-)

diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index 1bd1a31e11..bc1a0c5284 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -103,19 +103,10 @@ Status RegisterPerTableLoadOpsForAlgorithmBody(
       arg->set_type(DT_FLOAT);
     }
   }
-  {
-    auto* table_id_attr = op_def->add_attr();
-    table_id_attr->set_name("table_id");
-    table_id_attr->set_type("int");
-    table_id_attr->set_has_minimum(true);
-    table_id_attr->set_minimum(-1);
-    table_id_attr->mutable_default_value()->set_i(-1);
-  }
   {
     auto* table_name_attr = op_def->add_attr();
     table_name_attr->set_name("table_name");
     table_name_attr->set_type("string");
-    table_name_attr->mutable_default_value()->set_s("");
   }
   {
     auto* num_shards_attr = op_def->add_attr();
@@ -147,11 +138,9 @@ parameters that are loaded from a checkpoint before a training loop is
 executed.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto (overrides table_id).
+  EmbeddingLayerConfiguration proto.
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
-table_id: Index of this table in the EmbeddingLayerConfiguration proto
-  (deprecated).
 )doc",
                                           parameter_descriptions.c_str()));
   op_def->set_is_commutative(false);
@@ -160,14 +149,10 @@ table_id: Index of this table in the EmbeddingLayerConfiguration proto
   auto shape_inference_function =
       [state_variable_specs,
        is_debug_op](shape_inference::InferenceContext* c) -> Status {
-    int table_id;
-    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
     string table_name;
     TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    // Exactly one must be non-default.
-    if ((table_id >= 0) == (!table_name.empty())) {
-      return errors::InvalidArgument(
-          "exactly one of table_id or table_name must be non-default");
+    if (table_name.empty()) {
+      return errors::InvalidArgument("table_name attribute must be set");
     }
     int num_shards;
     TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
@@ -240,19 +225,10 @@ Status RegisterPerTableRetrieveOpsForAlgorithmBody(
       arg->set_type(DT_FLOAT);
     }
   }
-  {
-    auto* table_id_attr = op_def->add_attr();
-    table_id_attr->set_name("table_id");
-    table_id_attr->set_type("int");
-    table_id_attr->set_has_minimum(true);
-    table_id_attr->set_minimum(-1);
-    table_id_attr->mutable_default_value()->set_i(-1);
-  }
   {
     auto* table_name_attr = op_def->add_attr();
     table_name_attr->set_name("table_name");
     table_name_attr->set_type("string");
-    table_name_attr->mutable_default_value()->set_s("");
   }
   {
     auto* num_shards_attr = op_def->add_attr();
@@ -283,11 +259,9 @@ the correct embedding table configuration. For example, this op is
 used to retrieve updated parameters before saving a checkpoint.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto (overrides table_id).
+  EmbeddingLayerConfiguration proto.
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
-table_id: Index of this table in the EmbeddingLayerConfiguration proto
-  (deprecated).
 )doc",
                                           parameter_descriptions.c_str()));
   op_def->set_is_commutative(false);
@@ -296,14 +270,10 @@ table_id: Index of this table in the EmbeddingLayerConfiguration proto
   auto shape_inference_function =
       [state_variable_specs,
        is_debug_op](shape_inference::InferenceContext* c) -> Status {
-    int table_id;
-    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
     string table_name;
     TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    // Exactly one must be non-default.
-    if ((table_id >= 0) == (!table_name.empty())) {
-      return errors::InvalidArgument(
-          "exactly one of table_id or table_name must be non-default");
+    if (table_name.empty()) {
+      return errors::InvalidArgument("table_name must be non-empty");
     }
     int num_shards;
     TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
-- 
GitLab


From 0a1132ece84bd76d6dceaf8d29211959b5dca216 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 16:03:41 -0700
Subject: [PATCH 0888/1357] fix broken tests.

PiperOrigin-RevId: 215010842
---
 .../opt/python/training/shampoo_test.py       | 40 +++++++++----------
 .../timeseries/python/timeseries/head_test.py |  2 +-
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/shampoo_test.py b/tensorflow/contrib/opt/python/training/shampoo_test.py
index 05bcf2cfa3..a2fd8fbd87 100644
--- a/tensorflow/contrib/opt/python/training/shampoo_test.py
+++ b/tensorflow/contrib/opt/python/training/shampoo_test.py
@@ -54,9 +54,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     grad_np_2 = np.random.rand(size)
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = constant_op.constant(grad_np, dtype=dtypes.float32)
       grad_2 = constant_op.constant(grad_np_2, dtype=dtypes.float32)
@@ -105,9 +105,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     grad_np_2 = np.random.rand(size[0], size[1])
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = constant_op.constant(grad_np, dtype=dtypes.float32)
       grad_2 = constant_op.constant(grad_np_2, dtype=dtypes.float32)
@@ -164,9 +164,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     grad_np_2 = np.random.rand(size[0], size[1], size[2])
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = constant_op.constant(grad_np, dtype=dtypes.float32)
       grad_2 = constant_op.constant(grad_np_2, dtype=dtypes.float32)
@@ -254,9 +254,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     grad_np_2 = np.random.rand(size)
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = constant_op.constant(grad_np, dtype=dtypes.float32)
       grad_2 = constant_op.constant(grad_np_2, dtype=dtypes.float32)
@@ -310,9 +310,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     grad_np_2 = np.random.rand(size[0], size[1])
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = constant_op.constant(grad_np, dtype=dtypes.float32)
       grad_2 = constant_op.constant(grad_np_2, dtype=dtypes.float32)
@@ -383,9 +383,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     grad_np_2 = np.random.rand(sample_size_2, size[1])
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = ops.IndexedSlices(
           constant_op.constant(grad_np, dtype=dtypes.float32),
@@ -463,9 +463,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     grad_np = np.random.rand(sample_size, size[1], size[2])
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = ops.IndexedSlices(
           constant_op.constant(grad_np, dtype=dtypes.float32),
@@ -533,9 +533,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     gbar_weight = 0.1
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = constant_op.constant(grad_np, dtype=dtypes.float32)
       grad_2 = constant_op.constant(grad_np_2, dtype=dtypes.float32)
@@ -628,9 +628,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     mat_g3 = np.zeros_like(mat_g3_a)
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = array_ops.placeholder(dtypes.float32, shape=size)
 
@@ -705,9 +705,9 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
     mat_g3 = np.zeros_like(mat_g3_a)
 
     with self.cached_session() as sess:
-      global_step = variables.Variable(
+      global_step = variables.VariableV1(
           0, dtype=dtypes.int64, use_resource=use_resource_var)
-      var = variables.Variable(
+      var = variables.VariableV1(
           init_var_np, dtype=dtypes.float32, use_resource=use_resource_var)
       grad = array_ops.placeholder(dtypes.float32, shape=size)
 
diff --git a/tensorflow/contrib/timeseries/python/timeseries/head_test.py b/tensorflow/contrib/timeseries/python/timeseries/head_test.py
index 647455ae42..04d17bc123 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/head_test.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/head_test.py
@@ -104,7 +104,7 @@ class EvaluationMetricsTests(test.TestCase):
           "ticker":
               array_ops.reshape(
                   math_ops.cast(
-                      variables.Variable(
+                      variables.VariableV1(
                           name="ticker",
                           initial_value=0,
                           dtype=dtypes.int64,
-- 
GitLab


From a98bac521406bedef3ff2b9af9564b21ddda4d82 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 28 Sep 2018 16:09:49 -0700
Subject: [PATCH 0889/1357] [TF:XLA] Bump open source abseil revision to
 48cd2c3f351ff188bc85684b84a91b6e6d17d896

This has absl::flat_hash_map in it.

PiperOrigin-RevId: 215011713
---
 tensorflow/contrib/makefile/Makefile | 3 ++-
 tensorflow/workspace.bzl             | 8 ++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index d962a5e12d..36125c198e 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -133,7 +133,8 @@ $(wildcard tensorflow/contrib/makefile/downloads/absl/absl/*/*benchmark*.cc) \
 $(wildcard tensorflow/contrib/makefile/downloads/absl/absl/*/*/*benchmark*.cc) \
 $(wildcard tensorflow/contrib/makefile/downloads/absl/absl/*/*/*/*benchmark*.cc) \
 $(wildcard tensorflow/contrib/makefile/downloads/absl/absl/*/*/*/*/*benchmark*.cc) \
-tensorflow/contrib/makefile/downloads/absl/absl/synchronization/internal/mutex_nonprod.cc
+tensorflow/contrib/makefile/downloads/absl/absl/synchronization/internal/mutex_nonprod.cc \
+tensorflow/contrib/makefile/downloads/absl/absl/hash/internal/print_hash_of.cc
 
 ABSL_CC_SRCS := $(filter-out $(ABSL_CC_EXCLUDE_SRCS), $(ABSL_CC_ALL_SRCS))
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 70bade060e..9b4b698874 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -110,11 +110,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "278a1af58b633be886fe81bf7061dca6b5fea99566850d1319fffdaa1a061792",
-        strip_prefix = "abseil-cpp-e291c279e458761e77a69b09b129d3d1e81f1e80",
+        sha256 = "7dd09690ae7ca4551de3111d4a86b75b23ec17445f273d3c42bdcdc1c7b02e4e",
+        strip_prefix = "abseil-cpp-48cd2c3f351ff188bc85684b84a91b6e6d17d896",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz",
         ],
     )
 
-- 
GitLab


From 478d370eb116ad2294134d75a886637a7d6da225 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 28 Sep 2018 16:10:45 -0700
Subject: [PATCH 0890/1357] [tf.data] Use Graph instead of GraphDef/FunctionDef
 for vectorization transforms

PiperOrigin-RevId: 215011835
---
 .../core/grappler/optimizers/data/BUILD       |   7 +-
 .../grappler/optimizers/data/graph_utils.h    |   4 +-
 .../optimizers/data/map_vectorization.cc      |  28 +-
 .../optimizers/data/map_vectorization_test.cc | 112 +++--
 .../optimizers/data/vectorization/BUILD       |   3 +-
 .../data/vectorization/cast_vectorizer.cc     |  29 +-
 .../data/vectorization/unpack_vectorizer.cc   |  36 +-
 .../data/vectorization/vectorizer.h           |  23 +-
 .../vectorization/vectorizer_registry_test.cc |  16 +-
 .../optimizers/data/vectorization_utils.cc    | 451 +++++++++++-------
 .../optimizers/data/vectorization_utils.h     |  35 +-
 .../data/vectorization_utils_test.cc          | 205 +++++---
 12 files changed, 574 insertions(+), 375 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 81c1bddf67..5a3abbb545 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -124,10 +124,10 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
-        "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
     ] + tf_protos_all(),
 )
@@ -523,6 +523,7 @@ cc_library(
         ":function_utils",
         ":graph_utils",
         "@com_google_absl//absl/strings",
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -538,6 +539,7 @@ tf_cc_test(
     srcs = ["vectorization_utils_test.cc"],
     visibility = ["//visibility:public"],
     deps = [
+        ":graph_utils",
         ":function_utils",
         ":vectorization_utils",
         "//tensorflow/core:framework",
@@ -547,7 +549,10 @@ tf_cc_test(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+        # For ops we need registered
+        "//tensorflow/core/kernels/data:dataset_ops",
         "//tensorflow/core/kernels:cast_op",
+        "//tensorflow/core/kernels:logging_ops",
         "//tensorflow/tools/graph_transforms:transform_utils",
     ] + tf_protos_all(),
 )
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index 5dd7819100..3af34f6904 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -116,8 +116,8 @@ std::vector<int> FindAllGraphNodesWithOp(const string& op,
 // is unique across the graph.
 void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph, NodeDef* node);
 
-// Sets the node name using the `prefix` name as a prefix while guaranteeing the
-// name is unique across the graph.
+// Sets the function name using the `prefix` name as a prefix while guaranteeing
+// the name is unique across the function library.
 void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
                                 FunctionDef* function);
 
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index 32ab912619..9328a7ca99 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -86,21 +86,19 @@ FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
   // efficient vectorization with VectorizeMapDefun.
   FunctionDef* vectorized_func =
       CreateMapDefunWrapper(map_node, orig_func, library);
-  NodeDef* map_defun_node = vectorized_func->mutable_node_def()->Mutable(0);
-  DCHECK_EQ(map_defun_node->op(), "MapDefun");
-
-  // Create a copy of the original function so that we can mutate it, and
-  // attach that to the map defun node.
-  FunctionDef* map_defun_fn = library->add_function();
-  *map_defun_fn = orig_func;
-  graph_utils::SetUniqueGraphFunctionName(orig_func.signature().name(), library,
-                                          map_defun_fn);
-  (*map_defun_node->mutable_attr())["f"].mutable_func()->set_name(
-      map_defun_fn->signature().name());
-
-  vectorization_utils::VectorizeMapDefun(vectorized_func, map_defun_fn,
-                                         map_defun_node);
-  return vectorized_func;
+  const NodeDef& map_defun_node = vectorized_func->node_def(0);
+  DCHECK_EQ(map_defun_node.op(), "MapDefun");
+
+  // TODO(b/116285210): Unreferenced functions should get cleaned up later
+  FunctionDef* result;
+  Status s = vectorization_utils::VectorizeMapDefun(
+      *vectorized_func, map_defun_node, library, &result);
+
+  if (!s.ok()) {
+    LOG(ERROR) << "VectorizeMapDefun failed: " << s;
+    return vectorized_func;
+  }
+  return result;
 }
 
 bool IsOutputShapesFullyDefined(const NodeDef& node) {
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
index ed1bd6bc97..f4faf41549 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
@@ -30,72 +30,51 @@ namespace {
 using test::function::GDef;
 using test::function::NDef;
 
-void MakeTensorShapeProtoHelper(const gtl::ArraySlice<int> dims,
-                                TensorShapeProto* t) {
-  for (size_t i = 0; i < dims.size(); ++i) {
-    auto* d = t->add_dim();
-    d->set_size(dims[i]);
-  }
-}
-
-AttrValue MakeShapeListAttr(
-    const gtl::ArraySlice<const gtl::ArraySlice<int>>& shapes) {
-  AttrValue shapes_attr;
-  for (size_t i = 0; i < shapes.size(); ++i) {
-    MakeTensorShapeProtoHelper(shapes[i],
-                               shapes_attr.mutable_list()->add_shape());
-  }
-
-  return shapes_attr;
-}
-
-NodeDef MakeMapNodeHelper(
-    StringPiece name, StringPiece input_node_name, StringPiece function_name,
-    StringPiece map_op_name,
-    const gtl::ArraySlice<const gtl::ArraySlice<int>>& output_shapes,
-    const gtl::ArraySlice<DataType>& output_types) {
+NodeDef MakeMapNodeHelper(StringPiece name, StringPiece input_node_name,
+                          StringPiece function_name, StringPiece map_op_name,
+                          gtl::ArraySlice<PartialTensorShape> output_shapes,
+                          gtl::ArraySlice<DataType> output_types) {
   return test::function::NDef(
       name, map_op_name, {string(input_node_name)},
       {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
        {"Targuments", {}},
-       {"output_shapes", MakeShapeListAttr(output_shapes)},
+       {"output_shapes", output_shapes},
        {"output_types", output_types}});
 }
 
-NodeDef MakeMapNode(
-    StringPiece name, StringPiece input_node_name, StringPiece function_name,
-    const gtl::ArraySlice<const gtl::ArraySlice<int>>& output_shapes,
-    const gtl::ArraySlice<DataType>& output_types) {
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name,
+                    gtl::ArraySlice<PartialTensorShape> output_shapes,
+                    gtl::ArraySlice<DataType> output_types) {
   return MakeMapNodeHelper(name, input_node_name, function_name, "MapDataset",
                            output_shapes, output_types);
 }
 
-NodeDef MakeBatchNode(
-    StringPiece name, StringPiece input_node_name,
-    StringPiece input_batch_size_name,
-    const gtl::ArraySlice<const gtl::ArraySlice<int>>& output_shapes,
-    const gtl::ArraySlice<DataType>& output_types) {
-  return NDef(name, "BatchDataset",
-              {string(input_node_name), string(input_batch_size_name)},
-              {{"output_types", output_types},
-               {"output_shapes", MakeShapeListAttr(output_shapes)}});
+NodeDef MakeBatchNode(StringPiece name, StringPiece input_node_name,
+                      StringPiece input_batch_size_name,
+                      gtl::ArraySlice<PartialTensorShape> output_shapes,
+                      gtl::ArraySlice<DataType> output_types) {
+  return NDef(
+      name, "BatchDataset",
+      {string(input_node_name), string(input_batch_size_name)},
+      {{"output_types", output_types}, {"output_shapes", output_shapes}});
 }
 
-NodeDef MakeBatchV2Node(
-    StringPiece name, StringPiece input_node_name,
-    StringPiece input_batch_size_name, StringPiece input_drop_remainder_name,
-    const gtl::ArraySlice<const gtl::ArraySlice<int>>& output_shapes,
-    const gtl::ArraySlice<DataType>& output_types) {
-  return NDef(name, "BatchDatasetV2",
-              {string(input_node_name), string(input_batch_size_name),
-               string(input_drop_remainder_name)},
-              {{"output_types", output_types},
-               {"output_shapes", MakeShapeListAttr(output_shapes)}});
+NodeDef MakeBatchV2Node(StringPiece name, StringPiece input_node_name,
+                        StringPiece input_batch_size_name,
+                        StringPiece input_drop_remainder_name,
+                        gtl::ArraySlice<PartialTensorShape> output_shapes,
+                        gtl::ArraySlice<DataType> output_types) {
+  return NDef(
+      name, "BatchDatasetV2",
+      {string(input_node_name), string(input_batch_size_name),
+       string(input_drop_remainder_name)},
+      {{"output_types", output_types}, {"output_shapes", output_shapes}});
 }
 
-NodeDef MakeRangeNode(StringPiece name, const gtl::ArraySlice<string>& inputs) {
+NodeDef MakeRangeNode(StringPiece name, gtl::ArraySlice<string> inputs) {
   return NDef(name, "RangeDataset", inputs,
-              {{"output_shapes", MakeShapeListAttr({{}})},
+              {{"output_shapes", gtl::ArraySlice<TensorShape>({{}})},
                {"output_types", gtl::ArraySlice<DataType>({DT_INT64})}});
 }
 
@@ -184,7 +163,7 @@ TEST(MapVectorizationTest, VectorizeWithUndefinedOutputTypes) {
   item.graph = GDef(
       {NDef("batch_size", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
        NDef("input", "InputDataset", {},
-            {{"output_shapes", MakeShapeListAttr({{}})}}),
+            {{"output_shapes", gtl::ArraySlice<TensorShape>({{}})}}),
        MakeMapNode("map", "input", "XTimesTwo", {{}}, {DT_INT32}),
        MakeBatchNode("batch", "map", "batch_size", {{-1}}, {DT_INT32})},
       // FunctionLib
@@ -196,6 +175,37 @@ TEST(MapVectorizationTest, VectorizeWithUndefinedOutputTypes) {
   TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
 }
 
+TEST(MapVectorizationTest, VectorizeWithFullyDefinedFunction) {
+  GrapplerItem item;
+  item.graph = GDef(
+      {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+       NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+       NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("batch_size", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       MakeRangeNode("range", {"start", "stop", "step"}),
+       MakeMapNode("map", "range", "Func", {{}}, {DT_INT32}),
+       MakeBatchNode("batch", "map", "batch_size", {{-1}}, {DT_INT32})},
+      // FunctionLib
+      {FunctionDefHelper::Create(
+          "Func", {"x: int64", "y: int64"}, {"res: int64", "res2: int64"}, {},
+          {{{"o"}, "Mul", {"x", "x"}, {{"T", DT_INT64}}}},
+          {{"res", "o:z"}, {"res2", "o:z"}})});
+  MapVectorization optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  EXPECT_EQ(graph_utils::FindAllGraphNodesWithOp("MapDataset", output).size(),
+            1);
+  EXPECT_EQ(graph_utils::FindAllGraphNodesWithOp("BatchDataset", output).size(),
+            1);
+  const NodeDef& map_node =
+      output.node(graph_utils::FindGraphNodeWithOp("MapDataset", output));
+  const NodeDef& batch_node =
+      output.node(graph_utils::FindGraphNodeWithOp("BatchDataset", output));
+  EXPECT_EQ(map_node.input(0), batch_node.name());
+  EXPECT_EQ(batch_node.input(0), "range");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 1462cb234d..37aa24b947 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -9,13 +9,14 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all")
 
 VECTORIZER_DEPS = [
     ":vectorizer_registry",
-    "//tensorflow/core/grappler/optimizers/data:function_utils",
+    "//tensorflow/core/grappler/optimizers/data:graph_utils",
 ] + tf_protos_all()
 
 cc_library(
     name = "vectorizer",
     hdrs = ["vectorizer.h"],
     deps = [
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
     ] + tf_protos_all(),
 )
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
index c1739737a0..3af6bab409 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 namespace tensorflow {
@@ -23,26 +23,21 @@ namespace vectorization_utils {
 
 class CastVectorizer : public Vectorizer {
  public:
-  Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
-                   FunctionDef* outer_scope,
-                   std::map<string, string>* conversion_map) override {
-    if (inputs.size() != 1) {
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<Port>* input_ports,
+                   std::vector<Port>* output_ports) override {
+    Status s;
+    if (node.num_inputs() != 1) {
       return errors::Internal("Cast op should only have one input.");
     }
 
-    // Add new Cast node
-    NodeDef* new_cast_node = outer_scope->add_node_def();
-    *new_cast_node = node;
-    new_cast_node->clear_name();
-    function_utils::SetUniqueFunctionNodeName(
-        strings::StrCat("vectorized/", node.name()), outer_scope,
-        new_cast_node);
-    new_cast_node->set_input(0, inputs[0]);
-
-    // Add the output mapping to conversion map
-    (*conversion_map)[strings::StrCat(node.name(), ":y:0")] =
-        strings::StrCat(new_cast_node->name(), ":y:0");
+    // Add new Cast node with the same op and attrs as the original node
+    auto new_cast_node = outer_scope->AddNode(node.def(), &s);
+    TF_RETURN_IF_ERROR(s);
 
+    // Add input and output mappings
+    input_ports->push_back({new_cast_node, 0});
+    output_ports->push_back({new_cast_node, 0});
     return Status::OK();
   }
 };
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
index 776d3179c5..74ce520ce1 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 namespace tensorflow {
@@ -23,31 +23,29 @@ namespace vectorization_utils {
 
 class UnpackVectorizer : public Vectorizer {
  public:
-  Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
-                   FunctionDef* outer_scope,
-                   std::map<string, string>* conversion_map) override {
-    if (inputs.size() != 1) {
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<Port>* input_ports,
+                   std::vector<Port>* output_ports) override {
+    Status s;
+    if (node.num_inputs() != 1) {
       return errors::Internal("Unpack op should only have one input.");
     }
 
-    // Add new Unpack node
-    NodeDef* new_unpack_node = outer_scope->add_node_def();
-    *new_unpack_node = node;
-    new_unpack_node->clear_name();
-    function_utils::SetUniqueFunctionNodeName(
-        strings::StrCat("vectorized/", node.name()), outer_scope,
-        new_unpack_node);
+    // Add new Unpack node with the same op and attrs as the original node
+    auto new_unpack_node = outer_scope->AddNode(node.def(), &s);
+    TF_RETURN_IF_ERROR(s);
 
     // Increment "axis" attr by 1:
-    (*new_unpack_node->mutable_attr())["axis"].set_i(
-        node.attr().at("axis").i() + 1);
-    new_unpack_node->set_input(0, inputs[0]);
+    int new_axis = node.def().attr().at("axis").i() + 1;
+    new_unpack_node->AddAttr("axis", new_axis);
 
-    // Add the output mappings to conversion map
-    int num = new_unpack_node->attr().at("num").i();
+    // Add the input mappings
+    input_ports->push_back({new_unpack_node, 0});
+
+    // Add the output mappings
+    int num = node.def().attr().at("num").i();
     for (int i = 0; i < num; ++i) {
-      (*conversion_map)[strings::StrCat(node.name(), ":output:", i)] =
-          strings::StrCat(new_unpack_node->name(), ":output:", i);
+      output_ports->push_back({new_unpack_node, i});
     }
 
     return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
index d341dbba7d..56eb88c95e 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
@@ -17,30 +17,33 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_H_
 
 #include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
 namespace grappler {
 namespace vectorization_utils {
 
+// Describes a tensor with its operation Node and output position
+typedef std::pair<Node*, int> Port;
+
 // Interface for vectorization of TensorFlow operations. See `CastVectorizer`
 // for an example.
 class Vectorizer {
  public:
   virtual ~Vectorizer() {}
 
-  // Vectorizes an operation, `node`, by adding operation(s) to `outer_scope`
+  // Vectorizes an operation, `node`, by adding Node(s) to `outer_scope`
   // that produce the same vector output(s) as executing `node`'s op
-  // on elements of the vector inputs, and adding mappings to `conversion_map`
-  // from old output tensor names to new (vectorized) output tensor names.
-  // The new node(s) collectively have the same number of inputs and outputs as
-  // the node being converted, and use the tensor names in `inputs` as their
-  // inputs.
-  virtual Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
-                           FunctionDef* outer_scope,
-                           std::map<string, string>* conversion_map) = 0;
+  // on elements of the vector inputs. The new Node(s) collectively have the
+  // same number of input and output ports as the node being converted.
+  // Adds mappings for the new nodes' input and output ports to `inputs` and
+  // `outputs` respectively, where the i'th Port in inputs/outputs
+  // corresponds to the i'th input/output port of the node to be converted.
+  virtual Status Vectorize(const Node& node, Graph* outer_scope,
+                           std::vector<Port>* input_ports,
+                           std::vector<Port>* output_ports) = 0;
 };
 
 }  // namespace vectorization_utils
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
index 86e303564b..663ceba027 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
@@ -24,9 +24,9 @@ namespace vectorization_utils {
 
 class TestVectorizer : public Vectorizer {
  public:
-  Status Vectorize(const NodeDef& node, gtl::ArraySlice<string> inputs,
-                   FunctionDef* outer_scope,
-                   std::map<string, string>* conversion_map) override {
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<Port>* inputs,
+                   std::vector<Port>* outputs) override {
     return Status::OK();
   }
 };
@@ -39,10 +39,12 @@ TEST(TestVectorizer, TestTestVectorizer) {
   auto vectorizer = VectorizerRegistry::Global()->Get("test_op");
   EXPECT_NE(vectorizer, nullptr);
 
-  FunctionDef function;
-  NodeDef node;
-  std::map<string, string> conversion_map;
-  EXPECT_TRUE(vectorizer->Vectorize(node, {}, &function, &conversion_map).ok());
+  Graph g(OpRegistry::Global());
+  NodeDef node_def;
+  Status s;
+  Node* node = g.AddNode(node_def, &s);
+  std::vector<Port> inputs, outputs;
+  EXPECT_TRUE(vectorizer->Vectorize(*node, &g, &inputs, &outputs).ok());
 }
 
 }  // namespace vectorization_utils
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index cb56b65985..cea667f668 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -14,13 +14,17 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
+#include <memory>
 #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 #include "absl/strings/str_join.h"
+#include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
@@ -36,255 +40,346 @@ namespace tensorflow {
 namespace grappler {
 namespace vectorization_utils {
 
-using function_utils::FunctionDefTensorDesc;
-
 namespace {
 
-void AddMapDefunOutput(FunctionDef* map_defun_fn, NodeDef* map_defun_node,
-                       const string& output_retval, const DataType t) {
-  // Set to unknown shape
-  TensorShapeProto tensor_shape_proto;
-  PartialTensorShape().AsProto(&tensor_shape_proto);
+// Describes a tensor with its operation Node and output position
+typedef std::pair<Node*, int> TensorDesc;
 
-  function_utils::AddFunctionOutputWithUniqueName(
-      "vectorized_out", output_retval, map_defun_fn, t);
+const char* const kRetValOp = "_Retval";
 
-  *(*map_defun_node->mutable_attr())["output_shapes"]
-       .mutable_list()
-       ->add_shape() = tensor_shape_proto;
-  (*map_defun_node->mutable_attr())["output_types"].mutable_list()->add_type(t);
+void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
+                        Graph* graph) {
+  // NOTE: We need two for loops here because we can't mutate the set of output
+  // edges as we iterate over them.
+  std::vector<const Edge*> edges_to_replace;
+  for (auto edge : old_src.first->out_edges()) {
+    if (edge->src_output() == old_src.second) {
+      edges_to_replace.push_back(edge);
+    }
+  }
+  for (auto edge : edges_to_replace) {
+    graph->AddEdge(new_src.first, new_src.second, edge->dst(),
+                   edge->dst_input());
+    graph->RemoveEdge(edge);
+  }
 }
 
-void RemoveMapDefunOutput(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
-                          NodeDef* map_defun_node, int output_position) {
-  DCHECK_LT(output_position, map_defun_fn->signature().output_arg_size())
-      << "Trying to remove output that doesn't exist. Output number: "
-      << output_position;
+Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
+                         const TensorDesc& output) {
+  // Note that we don't update MapDefun attrs as we go, only when we are done
+  DataType type = output.first->output_type(output.second);
+  int index = map_defun_fn->ret_nodes.size();
 
-  int num_later_outputs =
-      map_defun_fn->signature().output_arg_size() - output_position - 1;
+  NodeDef ret_node_def;
+  ret_node_def.set_name("map_out");
+  ret_node_def.set_op(kRetValOp);
+  AddNodeAttr("T", type, &ret_node_def);
+  AddNodeAttr("index", index, &ret_node_def);
 
-  // Remove from map_defun_fn's ret dict and output args
-  map_defun_fn->mutable_ret()->erase(
-      map_defun_fn->signature().output_arg(output_position).name());
-  map_defun_fn->mutable_signature()->mutable_output_arg()->DeleteSubrange(
-      output_position, 1);
+  Status s;
+  Node* ret_node = map_defun_fn->graph->AddNode(ret_node_def, &s);
+  TF_RETURN_IF_ERROR(s);
 
-  // Renumber outputs that come after
-  for (int i = 0; i < num_later_outputs; ++i) {
-    function_utils::ReplaceReferences(
-        strings::StrCat(map_defun_node->name(),
-                        ":output:", output_position + i + 1),
-        strings::StrCat(map_defun_node->name(),
-                        ":output:", output_position + i),
-        outer_scope);
-  }
-  map_defun_node->mutable_attr()
-      ->at("output_shapes")
-      .mutable_list()
-      ->mutable_shape()
-      ->DeleteSubrange(output_position, 1);
-  map_defun_node->mutable_attr()
-      ->at("output_types")
-      .mutable_list()
-      ->mutable_type()
-      ->ExtractSubrange(output_position, 1, nullptr);
+  map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0);
+  map_defun_fn->ret_nodes.push_back(ret_node);
+  map_defun_fn->ret_types.push_back(type);
+
+  return s;
 }
 
-int FindOutputToConvert(const FunctionDef& function,
-                        const std::set<string>& unconvertible,
-                        FunctionDefTensorDesc* f) {
-  for (int i = function.signature().output_arg_size() - 1; i >= 0; --i) {
-    const string& ret_key = function.signature().output_arg(i).name();
-    *f = FunctionDefTensorDesc(function.ret().at(ret_key));
+void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
+                          FunctionBody* map_defun_fn, Node* map_defun_node) {
+  // Note that we don't update MapDefun attrs as we go, only when we are done
+  DCHECK_LT(output_position, map_defun_fn->ret_nodes.size())
+      << "Trying to remove output that doesn't exist. Output number: "
+      << output_position;
+
+  int num_later_outputs = map_defun_fn->ret_nodes.size() - output_position - 1;
 
-    if (unconvertible.find(f->node_name) == unconvertible.end()) {
-      return i;
-    }
+  // Modify map_defun_fn's signature and remove the output node from its graph
+  map_defun_fn->graph->RemoveNode(map_defun_fn->ret_nodes[output_position]);
+  map_defun_fn->ret_nodes.erase(map_defun_fn->ret_nodes.begin() +
+                                output_position);
+  map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() +
+                                output_position);
+
+  // Renumber the nodes and edges that come after
+  for (int i = 0; i < num_later_outputs; ++i) {
+    ReplaceEdgeSources({map_defun_node, output_position + i + 1},
+                       {map_defun_node, output_position + i}, outer_scope);
+    // Each ret node has an "index" attr that has to be updated
+    map_defun_fn->ret_nodes[output_position + i]->AddAttr("index",
+                                                          output_position + i);
   }
-  return -1;
 }
 
 // Helper class that vectorizes the body of a MapDefun node, adding new
 // operations to the graph that collectively compute the same value as what
 // running the MapDefun function on slices of the input would produce.
-// Each instance of the class encapsulates all the data necessary to vectorize a
-// MapDefun op in place.
+// This class transforms the input FunctionDefs into their corresponding
+// Graph objects and works on the graphs directly, then converts them back
+// to FunctionDefs when GetResult is called.
 class Vectorization {
  public:
-  Vectorization(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
-                NodeDef* map_defun_node)
-      : outer_scope_(outer_scope),
-        map_defun_fn_(map_defun_fn),
-        map_defun_node_(map_defun_node) {}
+  explicit Vectorization(FunctionDefLibrary* lib)
+      : lib_(lib), lib_def_(OpRegistry::Global(), *lib) {}
 
-  // Repeatedly tries to convert outputs of map_defun_fn_ into new nodes in
-  // the outer_scope_, until there are no convertible outputs remaining.
-  // This method is idempotent.
-  void Vectorize();
+  // Adds the vectorized function and new map_defun_fn to lib, and points
+  // vectorized_function to the former. Returns an error status if
+  // the conversion between FunctionDef -> Graph -> FunctionDef failed anywhere
+  // along the way.
+  Status Vectorize(const FunctionDef& outer_scope,
+                   const NodeDef& map_defun_node, FunctionDef** result);
 
  private:
-  // Vectorizes the map defun function's output at output_position
-  Status ConvertOutput(int output_position, const FunctionDefTensorDesc& desc);
-  // Given a descriptor of the original output tensor, gets a string
-  // corresponding to the converted output tensor.
-  Status ConvertOutputHelper(const FunctionDefTensorDesc& output_desc,
-                             string* converted);
-  Status AddConversionMappingFromInput(
-      const FunctionDefTensorDesc& output_desc);
+  // Converts FunctionDefs to Graphs.
+  Status Initialize(const FunctionDef& outer_scope,
+                    const NodeDef& map_defun_node);
+
+  // Converts Graphs back to FunctionDefs and adds them to `lib_`.
+  Status GetResult(FunctionDef** vectorized_function);
+
+  // Repeatedly tries to convert outputs of `map_defun_fn_` into new nodes in
+  // `outer_scope_`, until there are no convertible outputs remaining.
+  void VectorizeHelper();
+
+  // Vectorizes map_defun_fn's output at output_position.
+  Status ConvertOutput(int output_position);
 
   // Adds mappings from node's outputs tensors to converted output tensors,
   // creating the necessary new node(s). Generally, the steps to convert an op
   // are:
-  // 1) Promote the inputs of the op inputs to outputs of the map_defun_fn_,
-  //    and modify map_defun_node_ attrs accordingly
-  // 2) Create new node(s) in outer_scope_ that act on batched input tensors.
+  // 1) Create new node(s) in `outer_scope_` that act on batched input tensors.
   //    These operations collectively compute the same value as what running
   //    the original operation on slices of the input tensors would produce.
   //    For example, a Cast op in MapDefun translates to a Cast op in
-  //    outer_scope_, since the vectorized version of Cast is itself.
-  // 3) Set inputs of new node(s) to the corresponding converted inputs (that
-  //    are now outputs of map_defun_node_)
-  // 4) For each output of the old node, add the mapping of output strings to
-  //    the conversion map (eg "Cast:y:0" -> "Vectorize/Cast:y:0")
-  Status AddConversionMappingFromOp(const NodeDef& node,
-                                    const FunctionDefTensorDesc& output_desc);
-
-  // Maps a tensor name to the name of the corresponding vectorized tensor. For
-  // example, "Cast:y:0" -> "Vectorize/Cast:y:0"
-  std::map<string, string> conversion_map_;
-  // Unconvertible node names
-  std::set<string> unconvertible_;
-
-  FunctionDef* outer_scope_;
-  FunctionDef* map_defun_fn_;
-  NodeDef* map_defun_node_;
+  //    `outer_scope_`, since the vectorized version of Cast is itself.
+  // 2) Promote the inputs of the op inputs to outputs of the
+  //    `map_defun_node_` and `map_defun_fn_`.
+  // 3) Add edges between the promoted inputs (that are now outputs of
+  //    `map_defun_node`) and the inputs ports of the new node(s).
+  // 4) For each output of the old node, add the mapping of output tensors to
+  //    the conversion map.
+  Status AddConversionMapping(Node* op_node);
+
+  // Maps a tensor to the corresponding vectorized tensor. For example,
+  // {"Cast" Node*, 0} -> {"Vectorize/Cast" Node*, 0}
+  std::map<TensorDesc, TensorDesc> conversion_map_;
+
+  // Unconvertible ret nodes
+  std::set<Node*> unconvertible_;
+
+  FunctionDefLibrary* lib_;  // Not owned
+  FunctionLibraryDefinition lib_def_;
+  // Note that FunctionBody has a pointer to a Graph object that corresponds
+  // to the function's subgraph, with additional kArgOp and kRetValOp nodes
+  // that denote that function arguments and return values. These nodes have the
+  // attrs "T" for the type, and "index" for the argument / retval index
+  // respectively. FunctionBody also keeps track of arg/ret_nodes and
+  // arg/ret_types, that should be ordered according to argument/output indices.
+  std::unique_ptr<Graph> outer_scope_;
+  std::unique_ptr<FunctionBody> map_defun_fn_;
+  Node* map_defun_node_ = nullptr;  // Owned by `outer_scope`
+  Status status_;
 };
 
-Status Vectorization::AddConversionMappingFromOp(
-    const NodeDef& node, const FunctionDefTensorDesc& output_desc) {
-  for (const string& input_name : node.input()) {
-    if (IsControlInput(input_name)) {
+Status Vectorization::AddConversionMapping(Node* op_node) {
+  for (auto edge : op_node->in_edges()) {
+    if (edge->IsControlEdge()) {
       return errors::InvalidArgument(
           "Vectorizing outputs with control inputs is currently not "
           "supported.");
     }
   }
 
-  // TODO(rachelim): Have some mechanism for registering converters and some
-  // uniform, simpler way to represent them.
-
-  DataTypeVector types;
-  const OpDef* op_def = nullptr;
-  TF_RETURN_IF_ERROR(OpRegistry::Global()->LookUpOpDef(node.op(), &op_def));
-  TF_RETURN_IF_ERROR(InputTypesForNode(node, *op_def, &types));
-
-  std::vector<string> promoted_inputs;
-  promoted_inputs.reserve(node.input_size());
-  for (int i = 0; i < node.input_size(); ++i) {
-    promoted_inputs.push_back(strings::StrCat(
-        map_defun_node_->name(),
-        ":output:", map_defun_fn_->signature().output_arg_size() + i));
-  }
-
-  auto vectorizer = VectorizerRegistry::Global()->Get(node.op());
+  auto vectorizer = VectorizerRegistry::Global()->Get(op_node->type_string());
   if (vectorizer == nullptr) {
     return errors::Unimplemented("No vectorizer registered for op: ",
-                                 node.op());
+                                 op_node->type_string());
+  }
+  std::vector<Port> input_ports, output_ports;
+  input_ports.reserve(op_node->num_inputs());
+  output_ports.reserve(op_node->num_outputs());
+  TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(),
+                                           &input_ports, &output_ports));
+
+  std::vector<const Edge*> input_edges;
+  TF_RETURN_IF_ERROR(op_node->input_edges(&input_edges));
+
+  if (op_node->num_outputs() != output_ports.size() ||
+      op_node->num_inputs() != input_ports.size() ||
+      input_edges.size() != input_ports.size()) {
+    return errors::Internal("Vectorizer inputs/outputs don't match.");
   }
 
-  TF_RETURN_IF_ERROR(vectorizer->Vectorize(node, promoted_inputs, outer_scope_,
-                                           &conversion_map_));
+  // Promote the inputs of the op to MapDefun outputs and connect the edges
+  // accordingly.
+  for (size_t i = 0; i < op_node->num_inputs(); ++i) {
+    auto edge = input_edges[i];
+    TF_RETURN_IF_ERROR(AddMapDefunOutput(map_defun_fn_.get(), map_defun_node_,
+                                         {edge->src(), edge->src_output()}));
+    outer_scope_->AddEdge(map_defun_node_, map_defun_fn_->ret_nodes.size() - 1,
+                          input_ports[i].first, input_ports[i].second);
+  }
 
-  // If we get here, the conversion was successful, so we promote the inputs
-  // of the ops to MapDefun outputs.
-  for (int i = 0; i < types.size(); ++i) {
-    AddMapDefunOutput(map_defun_fn_, map_defun_node_, node.input(i), types[i]);
+  // Add output mappings.
+  for (size_t i = 0; i < op_node->num_outputs(); ++i) {
+    conversion_map_.insert({{op_node, i}, std::move(output_ports[i])});
   }
 
   return Status::OK();
 }
 
-Status Vectorization::AddConversionMappingFromInput(
-    const FunctionDefTensorDesc& output_desc) {
-  int input_index = function_utils::FindFunctionInputWithName(
-      output_desc.node_name, *map_defun_fn_);
-  if (input_index == -1) {
-    return errors::Internal("Cannot convert non-existent input.");
+Status Vectorization::ConvertOutput(int output_position) {
+  // ret_edge->src() is the actual op that generated the retval, and
+  // ret_edge->dst() is the retval node whose op is "_Retval"
+  const Edge* ret_edge;
+  TF_RETURN_IF_ERROR(
+      map_defun_fn_->ret_nodes[output_position]->input_edge(0, &ret_edge));
+
+  TensorDesc output({ret_edge->src(), ret_edge->src_output()});
+  TensorDesc converted_output;
+  if (auto found = gtl::FindOrNull(conversion_map_, output)) {
+    // It's possible the output already has a mapping, if it comes from a node
+    // that has already been converted.
+    converted_output = *found;
+  } else {
+    TF_RETURN_IF_ERROR(AddConversionMapping(output.first));
+    converted_output = conversion_map_.at(output);
   }
 
-  conversion_map_[output_desc.full_str] = map_defun_node_->input(input_index);
+  ReplaceEdgeSources({map_defun_node_, output_position}, converted_output,
+                     outer_scope_.get());
+  RemoveMapDefunOutput(output_position, outer_scope_.get(), map_defun_fn_.get(),
+                       map_defun_node_);
+
   return Status::OK();
 }
 
-Status Vectorization::ConvertOutputHelper(
-    const FunctionDefTensorDesc& output_desc, string* converted) {
-  // It's possible the output already has a mapping, if it comes from a node
-  // that has already been converted.
-  if (auto found = gtl::FindOrNull(conversion_map_, output_desc.full_str)) {
-    *converted = *found;
-    return Status::OK();
+Status Vectorization::Vectorize(const FunctionDef& outer_scope,
+                                const NodeDef& map_defun_node,
+                                FunctionDef** result) {
+  TF_RETURN_IF_ERROR(Initialize(outer_scope, map_defun_node));
+  VectorizeHelper();
+  return GetResult(result);
+}
+
+void Vectorization::VectorizeHelper() {
+  while (true) {
+    int output_position = graph_utils::GetFirstElementIndexWithPredicate(
+        [this](Node* n) {
+          return this->unconvertible_.find(n) == this->unconvertible_.end();
+        },
+        map_defun_fn_->ret_nodes);
+
+    // No outputs left to convert
+    if (output_position == -1) break;
+
+    Status s = ConvertOutput(output_position);
+    if (!s.ok()) {
+      Node* output_node = map_defun_fn_->ret_nodes.at(output_position);
+      VLOG(2) << "Could not convert the output at node: "
+              << output_node->DebugString() << "\nError: " << s;
+      unconvertible_.insert(output_node);
+    }
   }
 
-  int index = function_utils::FindFunctionNodeWithName(output_desc.node_name,
-                                                       *map_defun_fn_);
-  if (index == -1) {  // The output comes from an input
-    TF_RETURN_IF_ERROR(AddConversionMappingFromInput(output_desc));
+  // If we've converted all the outputs of the MapDefun function, we no longer
+  // need the MapDefun node and can delete it.
+  if (map_defun_fn_->ret_nodes.empty()) {
+    outer_scope_->RemoveNode(map_defun_node_);
   } else {
-    TF_RETURN_IF_ERROR(AddConversionMappingFromOp(
-        map_defun_fn_->node_def(index), output_desc));
+    // Update MapDefun node attrs accordingly
+    DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size());
+    map_defun_node_->AddAttr(
+        "output_shapes",
+        std::vector<PartialTensorShape>(map_defun_fn_->ret_types.size()));
+    map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types);
   }
-  *converted = conversion_map_.at(output_desc.full_str);
-  return Status::OK();
 }
+Status Vectorization::Initialize(const FunctionDef& outer_scope,
+                                 const NodeDef& map_defun_node) {
+  // Convert outer_scope and map_defun_fn to FunctionBodys so we can
+  // work on Graphs directly.
+  const FunctionDef* map_defun_fn =
+      lib_def_.Find(map_defun_node.attr().at("f").func().name());
+
+  if (map_defun_fn == nullptr) {
+    return errors::NotFound("Could not find function with name ",
+                            map_defun_node.attr().at("f").func().name(),
+                            " in function library.");
+  }
 
-Status Vectorization::ConvertOutput(int output_position,
-                                    const FunctionDefTensorDesc& output_desc) {
-  string converted_output_name;
-  TF_RETURN_IF_ERROR(ConvertOutputHelper(output_desc, &converted_output_name));
+  auto get_func_sig = [this](const string& op, const OpDef** sig) {
+    return this->lib_def_.LookUpOpDef(op, sig);
+  };
+
+  FunctionBody* outer_fn;
+  TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(outer_scope, {}, &lib_def_,
+                                             get_func_sig, &outer_fn));
+  // We don't need outer_fn, just the graph
+  outer_scope_.reset(outer_fn->graph);
+  outer_fn->graph = nullptr;
+  delete outer_fn;
+
+  FunctionBody* tmp;
+  TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(*map_defun_fn, {}, &lib_def_,
+                                             get_func_sig, &tmp));
+  map_defun_fn_.reset(tmp);
+
+  // Find the MapDefun node in outer_scope_
+  int node_id = graph_utils::GetFirstElementIndexWithPredicate(
+      [&map_defun_node](Node* n) { return n->name() == map_defun_node.name(); },
+      outer_scope_->nodes());
+  if (node_id == -1) {
+    return errors::NotFound("Could not find node with name ",
+                            map_defun_node.name(), " in outer_scope.");
+  }
+  map_defun_node_ = outer_scope_->FindNodeId(node_id);
+
+  // Add mappings from map_defun_fn_ arg nodes to map_defun_node_ input nodes to
+  // the conversion map
+  for (auto arg_node : map_defun_fn_->arg_nodes) {
+    Node* input_node;
+    TF_RETURN_IF_ERROR(map_defun_node_->input_node(
+        arg_node->attrs().Find("index")->i(), &input_node));
 
-  // Remove the old output and make everything that referenced it point
-  // to the new string
-  function_utils::ReplaceReferences(
-      strings::StrCat(map_defun_node_->name(), ":output:", output_position),
-      converted_output_name, outer_scope_);
-  RemoveMapDefunOutput(outer_scope_, map_defun_fn_, map_defun_node_,
-                       output_position);
+    conversion_map_.insert({{arg_node, 0}, {input_node, 0}});
+  }
 
   return Status::OK();
 }
 
-void Vectorization::Vectorize() {
-  while (true) {
-    FunctionDefTensorDesc desc;
-    int output_position =
-        FindOutputToConvert(*map_defun_fn_, unconvertible_, &desc);
-    if (output_position == -1) break;
+Status Vectorization::GetResult(FunctionDef** vectorized_function) {
+  TF_RETURN_IF_ERROR(status_);
 
-    if (!ConvertOutput(output_position, desc).ok()) {
-      unconvertible_.insert(desc.node_name);
-    }
-  }
+  if (!map_defun_fn_->ret_nodes.empty()) {
+    FunctionDef* map_defun_fn = lib_->add_function();
+    graph_utils::SetUniqueGraphFunctionName("map_defun_fn", lib_, map_defun_fn);
+    TF_RETURN_IF_ERROR(GraphToFunctionDef(
+        *map_defun_fn_->graph, map_defun_fn->signature().name(), map_defun_fn));
 
-  // If we've converted all the outputs of the MapDefun function, we no longer
-  // need the MapDefun node and can delete it.
-  if (map_defun_fn_->signature().output_arg_size() == 0) {
-    outer_scope_->mutable_node_def()->DeleteSubrange(
-        function_utils::FindFunctionNodeWithName(map_defun_node_->name(),
-                                                 *outer_scope_),
-        1);
+    AttrValue func_attr;
+    func_attr.mutable_func()->set_name(map_defun_fn->signature().name());
+    map_defun_node_->AddAttr("f", func_attr);
   }
 
-  if (!unconvertible_.empty()) {
-    VLOG(2) << "The following nodes could not be converted: ["
-            << absl::StrJoin(unconvertible_, ", ") << "].";
-  }
+  *vectorized_function = lib_->add_function();
+  graph_utils::SetUniqueGraphFunctionName("vectorized_fn", lib_,
+                                          *vectorized_function);
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(
+      *outer_scope_, (*vectorized_function)->signature().name(),
+      *vectorized_function));
+  return Status::OK();
 }
+
 }  // namespace
 
-void VectorizeMapDefun(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
-                       NodeDef* map_defun_node) {
-  Vectorization(outer_scope, map_defun_fn, map_defun_node).Vectorize();
+Status VectorizeMapDefun(const FunctionDef& outer_scope,
+                         const NodeDef& map_defun_node, FunctionDefLibrary* lib,
+                         FunctionDef** result) {
+  *result = nullptr;
+  return Vectorization(lib).Vectorize(outer_scope, map_defun_node, result);
 }
 
 }  // end namespace vectorization_utils
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.h b/tensorflow/core/grappler/optimizers/data/vectorization_utils.h
index bb405faa77..bd7d390900 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.h
@@ -24,22 +24,28 @@ namespace tensorflow {
 namespace grappler {
 namespace vectorization_utils {
 
-// Given a function, `map_defun_fn`, that is mapped across some input vector
-// elements via a MapDefun operation, `VectorizeMapDefun` attempts to
-// vectorize the MapDefun by "lifting" operations from the `map_defun_fn` to the
-// `outer_scope`; that is, replacing `map_defun_fn` operations with new
-// `outer_scope` operations that produce the same vector output(s) as executing
-// the `map_defun_fn` operations on elements of vector input(s) would. If all
-// `map_defun_fn` operations are successfully lifted, `map_defun_node` is
-// eliminated from `outer_scope` altogether. However, if some operations cannot
-// be lifted, and this vectorization only succeeds partially, `map_defun_node`
-// remains to be used for operations that were not lifted.
+// Given a MapDefun node (`map_defun_node`) in a FunctionDef (`outer_scope`)
+// that maps a function in lib across some input vector elements,
+// `VectorizeMapDefun` attempts to create a vectorized version of `outer_scope`
+// by "lifting" operations from the MapDefun function to the new function
+// (`result`); that is, replacing operations in the MapDefun function with
+// operations that produce the same vector output(s) as executing the original
+// operations on elements of vector input(s) would. If all operations in the
+// MapDefun function are successfully lifted, `result` has no MapDefun node
+// altogether. However, if some operations cannot be lifted, and this
+// vectorization only succeeds partially, a MapDefun node remains in `result` to
+// be used for operations that were not lifted, and the modified MapDefun
+// function is added to `lib`. The newly vectorized function `result` is also
+// added to `lib`.
+//
+// Returns Status::OK() if the vectorization is completely or partially
+// successful. Otherwise, returns an error, and sets `result` to nullptr.
 //
 // Example:
 //   If the input to the `VectorizeMapDefun` function is a MapDefun
 // whose `map_defun_fn` performs the Cast operation, the vectorization will
 // eliminate the MapDefun. This is because the Cast operation supports
-// any tensor shape and can thus be lifted to the `outer_scope`.
+// any tensor shape and can thus be lifted to `result`.
 //
 // Before:
 //
@@ -68,7 +74,7 @@ namespace vectorization_utils {
 //
 // After:
 //
-// outer_scope     +------+
+// result          +------+
 // +---------------+ Arg0 +---------+
 // |               +---+--+         |
 // |                   |            |
@@ -80,8 +86,9 @@ namespace vectorization_utils {
 // +---------------+ Ret0 +---------+
 //                 +------+
 //
-void VectorizeMapDefun(FunctionDef* outer_scope, FunctionDef* map_defun_fn,
-                       NodeDef* map_defun_node);
+Status VectorizeMapDefun(const FunctionDef& outer_scope,
+                         const NodeDef& map_defun_node, FunctionDefLibrary* lib,
+                         FunctionDef** result);
 
 }  // end namespace vectorization_utils
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index e129fa9237..1ff62217dd 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/tools/graph_transforms/transform_utils.h"
@@ -60,6 +61,11 @@ NodeDef* AddMapDefunNode(const string& name, const std::vector<string>& inputs,
   return node;
 }
 
+string GetRetval(const FunctionDef& function_def, int index) {
+  return function_def.ret().at(
+      function_def.signature().output_arg(index).name());
+}
+
 // TODO(rachelim): Use FunctionDefHelper::Create instead
 FunctionDef CreateFunction(
     StringPiece name, const std::vector<std::pair<string, DataType>>& inputs,
@@ -85,7 +91,6 @@ FunctionDef CreateFunction(
   return func;
 }
 
-TEST(FunctionDefInputDescTest, ConstructedCorrectly) {}
 
 // Before:
 //
@@ -133,10 +138,15 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
       {{}, {}}, inner.signature().name(), &outer);
   CHECK_NOTNULL(map_defun);
 
-  VectorizeMapDefun(&outer, &inner, map_defun);
-  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
-  EXPECT_EQ(outer.ret().at("mapdefun"), "ret0");
-  EXPECT_EQ(outer.ret().at("mapdefun_0"), "ret1");
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  EXPECT_EQ(GetRetval(*vectorized, 0), "ret0");
+  EXPECT_EQ(GetRetval(*vectorized, 1), "ret1");
 }
 
 // Before:
@@ -149,12 +159,12 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
 // |   +-----------+ Arg0 +---+ Arg1 +----+   |
 // |   |           +---+--+   +---+--+    |   |
 // |   |               |          |       |   |
-// |   |   +------+    |      +---v--+    |   |
-// |   |   |Const |    |      | Op0  |    |   |
-// |   |   +---v--+    |      +---+--+    |   |
+// |   |   +------+    |          |       |   |
+// |   |   |Const |    |          |       |   |
+// |   |   +---v--+    |          |       |   |
 // |   |       |       |          |       |   |
 // |   |       |   +---v--+   +---v--+    |   |
-// |   |       +---| XOp1 |   | XOp2 |    |   |
+// |   |       +---| XOp1 |   | Cast |    |   |
 // |   |           +---+--+   +---+--+    |   |
 // |   |               |          |       |   |
 // |   | MapDefun  +---v--+   +---v--+    |   |
@@ -165,23 +175,50 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
 // +---------------+ Ret0 +---+ Ret1 +--------+
 //                 +------+   +------+
 //
-//   where XOp1 and XOp2 are not convertible.
+//   where XOp1 is not convertible.
 //
 // After:
 //
-// No change because the ops are not convertible.
+//
+//                 +------+   +------+
+// +---------------+ Arg0 +---+ Arg1 +--------+
+// |               +---+--+   +---+--+        |
+// |                   |          |           |
+// |               +---v--+       |           |
+// |   +-----------+ Arg0 +-+     |           |
+// |   |           +---+--+ |     |           |
+// |   |               |    |     |           |
+// |   |   +------+    |    |     |           |
+// |   |   |Const |    |    |     |           |
+// |   |   +---v--+    |    |     |           |
+// |   |       |       |    |     |           |
+// |   |       |   +---v--+ | +---v--+        |
+// |   |       +---| XOp1 | | | Cast |        |
+// |   |           +---+--+ | +---+--+        |
+// |   |               |    |     |           |
+// |   | MapDefun  +---v--+ |     |           |
+// |   +-----------+ Ret0 +-+     |           |
+// |               +---+--+       |           |
+// |                   |          |           |
+// |               +---v--+   +---v--+        |
+// +---------------+ Ret0 +---+ Ret1 +--------+
+//                 +------+   +------+
 //
 TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
   FunctionDef inner =
       CreateFunction("inner_function", {{"arg0", DT_INT32}, {"arg1", DT_INT32}},
                      {{"ret0", DT_INT32}, {"ret1", DT_INT32}},
-                     {{"ret0", "XOp1:output:0"}, {"ret1", "XOp2:output:0"}});
+                     {{"ret0", "MatMul:product:0"}, {"ret1", "Cast:y:0"}});
+  // TODO(rachelim): If we ever write a converter for MatMul, we have to
+  // change this test.
   NodeDef* x_op1 =
-      function_utils::AddNode("XOp1", "XOp1", {"const", "arg0"}, {}, &inner);
+      function_utils::AddNode("MatMul", "MatMul", {"arg0", "arg0"}, {}, &inner);
   CHECK_NOTNULL(x_op1);
+  graph_transforms::SetNodeAttr("T", DT_INT32, x_op1);
 
-  NodeDef* x_op2 = function_utils::AddNode("XOp2", "XOp2", {"op1"}, {}, &inner);
-  CHECK_NOTNULL(x_op2);
+  NodeDef* cast_node =
+      AddCastNode("Cast", {"arg1"}, DT_INT32, DT_INT32, false, &inner);
+  CHECK_NOTNULL(cast_node);
 
   FunctionDef outer = CreateFunction(
       "outer_function", {{"x", DT_INT32}, {"y", DT_INT32}},
@@ -193,12 +230,22 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
       {{}, {}}, inner.signature().name(), &outer);
   CHECK_NOTNULL(map_defun);
 
-  FunctionDef outer_copy(outer);
-  FunctionDef inner_copy(inner);
-  VectorizeMapDefun(&outer, &inner, map_defun);
-  // They should be unchanged
-  EXPECT_TRUE(FunctionDefsEqual(outer_copy, outer));
-  EXPECT_TRUE(FunctionDefsEqual(inner_copy, inner));
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+
+  auto map_defun_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized));
+  // The Cast node should be converted just fine.
+  EXPECT_EQ(GetRetval(*vectorized, 1), "Cast:y:0");
+
+  // The inner function should only have one retval.
+  FunctionLibraryDefinition lib_def(OpRegistry::Global(), lib);
+  const FunctionDef* map_defun_fn =
+      lib_def.Find(map_defun_node.attr().at("f").func().name());
+  EXPECT_EQ(map_defun_fn->signature().output_arg_size(), 1);
 }
 
 // Before:
@@ -257,14 +304,19 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
                       inner.signature().name(), &outer);
   CHECK_NOTNULL(map_defun);
 
-  VectorizeMapDefun(&outer, &inner, map_defun);
-  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
-  const NodeDef& cast_node =
-      outer.node_def(function_utils::FindFunctionNodeWithOp("Cast", outer));
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  const NodeDef& cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
   EXPECT_EQ(cast_node.input(0), "x");
-  EXPECT_EQ(outer.ret().at("mapdefun"),
+  EXPECT_EQ(GetRetval(*vectorized, 0),
             strings::StrCat(cast_node.name(), ":y:0"));
-  EXPECT_EQ(outer.node_def_size(), 1);
+  EXPECT_EQ(vectorized->node_def_size(), 1);
 }
 
 // Before:
@@ -330,16 +382,21 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
                       {{}, {}}, inner.signature().name(), &outer);
   CHECK_NOTNULL(map_defun);
 
-  VectorizeMapDefun(&outer, &inner, map_defun);
-  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
-  const NodeDef& cast_node =
-      outer.node_def(function_utils::FindFunctionNodeWithOp("Cast", outer));
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  const NodeDef& cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
   EXPECT_EQ(cast_node.input(0), "x");
-  EXPECT_EQ(outer.ret().at("mapdefun"),
+  EXPECT_EQ(GetRetval(*vectorized, 0),
             strings::StrCat(cast_node.name(), ":y:0"));
-  EXPECT_EQ(outer.ret().at("mapdefun_0"),
+  EXPECT_EQ(GetRetval(*vectorized, 1),
             strings::StrCat(cast_node.name(), ":y:0"));
-  EXPECT_EQ(outer.node_def_size(), 1);
+  EXPECT_EQ(vectorized->node_def_size(), 1);
 }
 
 // Before:
@@ -411,21 +468,26 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
       {{1}, {1}, {1}}, inner.signature().name(), &outer);
   CHECK_NOTNULL(map_defun);
 
-  VectorizeMapDefun(&outer, &inner, map_defun);
-  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
-  const NodeDef& unpack_node =
-      outer.node_def(function_utils::FindFunctionNodeWithOp("Unpack", outer));
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  const NodeDef& unpack_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Unpack", *vectorized));
   EXPECT_EQ(unpack_node.input(0), "x");
   EXPECT_EQ(unpack_node.attr().at("axis").i(), 1);
   EXPECT_EQ(unpack_node.attr().at("T").type(), DT_INT32);
   EXPECT_EQ(unpack_node.attr().at("num").i(), 3);
-  EXPECT_EQ(outer.ret().at("mapdefun"),
+  EXPECT_EQ(GetRetval(*vectorized, 0),
             strings::StrCat(unpack_node.name(), ":output:0"));
-  EXPECT_EQ(outer.ret().at("mapdefun_0"),
+  EXPECT_EQ(GetRetval(*vectorized, 1),
             strings::StrCat(unpack_node.name(), ":output:1"));
-  EXPECT_EQ(outer.ret().at("mapdefun_1"),
+  EXPECT_EQ(GetRetval(*vectorized, 2),
             strings::StrCat(unpack_node.name(), ":output:2"));
-  EXPECT_EQ(outer.node_def_size(), 1);
+  EXPECT_EQ(vectorized->node_def_size(), 1);
 }
 
 // Before:
@@ -486,7 +548,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
        {"ret1", "MyUnstack:output:1"},
        {"ret2", "MyUnstack:output:2"}});
   NodeDef* cast_op =
-      AddCastNode("Cast", {"arg0"}, DT_INT32, DT_INT64, false, &inner);
+      AddCastNode("Cast", {"arg0"}, DT_INT32, DT_INT32, false, &inner);
   CHECK_NOTNULL(cast_op);
   NodeDef* unstack_op =
       AddUnstackNode("MyUnstack", {"Cast:y:0"}, DT_INT32, 0, 3, &inner);
@@ -505,25 +567,30 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
       {{1}, {1}, {1}}, inner.signature().name(), &outer);
   CHECK_NOTNULL(map_defun);
 
-  VectorizeMapDefun(&outer, &inner, map_defun);
-  EXPECT_TRUE(!function_utils::ContainsFunctionNodeWithOp("MapDefun", outer));
-  const NodeDef& cast_node =
-      outer.node_def(function_utils::FindFunctionNodeWithOp("Cast", outer));
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  const NodeDef& cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
   EXPECT_EQ(cast_node.input(0), "x");
-  const NodeDef& unpack_node =
-      outer.node_def(function_utils::FindFunctionNodeWithOp("Unpack", outer));
+  const NodeDef& unpack_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Unpack", *vectorized));
   EXPECT_EQ(unpack_node.input(0), strings::StrCat(cast_node.name(), ":y:0"));
   EXPECT_EQ(unpack_node.attr().at("axis").i(), 1);
   EXPECT_EQ(unpack_node.attr().at("T").type(), DT_INT32);
   EXPECT_EQ(unpack_node.attr().at("num").i(), 3);
 
-  EXPECT_EQ(outer.ret().at("mapdefun"),
+  EXPECT_EQ(GetRetval(*vectorized, 0),
             strings::StrCat(unpack_node.name(), ":output:0"));
-  EXPECT_EQ(outer.ret().at("mapdefun_0"),
+  EXPECT_EQ(GetRetval(*vectorized, 1),
             strings::StrCat(unpack_node.name(), ":output:1"));
-  EXPECT_EQ(outer.ret().at("mapdefun_1"),
+  EXPECT_EQ(GetRetval(*vectorized, 2),
             strings::StrCat(unpack_node.name(), ":output:2"));
-  EXPECT_EQ(outer.node_def_size(), 2);
+  EXPECT_EQ(vectorized->node_def_size(), 2);
 }
 
 // Before:
@@ -561,9 +628,11 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
   FunctionDef inner =
       CreateFunction("inner_function", {{"arg0", DT_INT32}},
                      {{"ret0", DT_INT64}}, {{"ret0", "Cast:y:0"}});
-  // The attrs aren't relevant
-  NodeDef* print_op =
-      function_utils::AddNode("Print", "Print", {"arg0", "arg0"}, {}, &inner);
+  NodeDef* print_op = function_utils::AddNode(
+      "Print", "Print", {"arg0", "arg0"}, {/*attrs*/}, &inner);
+  graph_transforms::SetNodeAttr("T", DT_INT32, print_op);
+  graph_transforms::SetNodeAttr("U", gtl::ArraySlice<DataType>({DT_INT32}),
+                                print_op);
   CHECK_NOTNULL(print_op);
   NodeDef* cast_op = AddCastNode("Cast", {"arg0", "^Print"}, DT_INT32, DT_INT64,
                                  false, &inner);
@@ -578,11 +647,27 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
                       inner.signature().name(), &outer);
   CHECK_NOTNULL(map_defun);
 
-  FunctionDef outer_copy(outer);
-  FunctionDef inner_copy(inner);
-  VectorizeMapDefun(&outer, &inner, map_defun);
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   // They should be unchanged
-  EXPECT_TRUE(FunctionDefsEqual(outer_copy, outer));
+  // We check this somewhat manually as the names of nodes may have changed
+  EXPECT_EQ(vectorized->node_def_size(), 1);
+  const NodeDef& map_defun_node = vectorized->node_def(0);
+  EXPECT_EQ(map_defun_node.op(), "MapDefun");
+  FunctionLibraryDefinition lib_def(OpRegistry::Global(), lib);
+  const FunctionDef* map_defun_fn =
+      lib_def.Find(map_defun_node.attr().at("f").func().name());
+
+  const NodeDef& print_node = map_defun_fn->node_def(
+      function_utils::FindFunctionNodeWithOp("Print", *map_defun_fn));
+  const NodeDef& cast_node = map_defun_fn->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *map_defun_fn));
+  string control_input = strings::StrCat("^", print_node.name());
+  EXPECT_TRUE(cast_node.input(0) == control_input ||
+              cast_node.input(1) == control_input);
 }
 
 // TODO(rachelim): More test cases when we get around to implementing them:
-- 
GitLab


From 4eef4925853a284fdfd4b5fae4b65f594a883b3b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 16:31:05 -0700
Subject: [PATCH 0891/1357] Add a rewrite_config option to disable
 meta_optimizer.

PiperOrigin-RevId: 215014737
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 6 ++++++
 tensorflow/core/protobuf/rewriter_config.proto        | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index e18a5f21d2..406c1b60ce 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -115,6 +115,9 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
 
 Status MetaOptimizer::InitializeOptimizers(
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
+  if (cfg_.disable_meta_optimizer()) {
+    return Status::OK();
+  }
   if (!cfg_.disable_model_pruning()) {
     optimizers->push_back(MakeUnique<ModelPruner>());
   }
@@ -489,6 +492,9 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item,
 }
 
 bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
+  if (cfg.disable_meta_optimizer()) {
+    return false;
+  }
   return !cfg.disable_model_pruning() ||
          cfg.layout_optimizer() != RewriterConfig::OFF ||
          cfg.function_optimization() != RewriterConfig::OFF ||
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 482178a540..8e0448d536 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -77,6 +77,8 @@ message RewriterConfig {
   Toggle scoped_allocator_optimization = 15;
   // Force small ops onto the CPU (default is ON).
   Toggle pin_to_host_optimization = 18;
+  // Disable the entire meta optimizer (off by default).
+  bool disable_meta_optimizer = 19;
 
   // Controls how many times we run the optimizers in meta optimizer (default
   // is once).
-- 
GitLab


From 0a341bbcb35d72d14bfda17f7f0cb0c61f323bce Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 16:36:07 -0700
Subject: [PATCH 0892/1357] Internal change

PiperOrigin-RevId: 215015490
---
 tensorflow/contrib/lite/examples/android/BUILD                   | 1 +
 tensorflow/contrib/lite/java/demo/app/src/main/BUILD             | 1 +
 tensorflow/contrib/lite/java/ovic/demo/app/BUILD                 | 1 +
 .../contrib/lite/models/smartreply/demo/app/src/main/BUILD       | 1 +
 4 files changed, 4 insertions(+)

diff --git a/tensorflow/contrib/lite/examples/android/BUILD b/tensorflow/contrib/lite/examples/android/BUILD
index 4d2437e7d3..d180cb4785 100644
--- a/tensorflow/contrib/lite/examples/android/BUILD
+++ b/tensorflow/contrib/lite/examples/android/BUILD
@@ -28,6 +28,7 @@ android_binary(
     srcs = glob([
         "app/src/main/java/**/*.java",
     ]),
+    aapt_version = "aapt",
     # Package assets from assets dir as well as all model targets.
     # Remove undesired models (and corresponding Activities in source)
     # to reduce APK size.
diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD
index 220d6c2159..5ad738389e 100644
--- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD
+++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD
@@ -7,6 +7,7 @@ licenses(["notice"])  # Apache 2.0
 android_binary(
     name = "TfLiteCameraDemo",
     srcs = glob(["java/**/*.java"]),
+    aapt_version = "aapt",
     assets = [
         "//tensorflow/contrib/lite/java/demo/app/src/main/assets:labels_mobilenet_quant_v1_224.txt",
         "@tflite_mobilenet//:mobilenet_quant_v1_224.tflite",
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
index b2e3a9bd7d..058240aada 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
@@ -8,6 +8,7 @@ android_binary(
     srcs = [
         "OvicBenchmarkerActivity.java",
     ],
+    aapt_version = "aapt",
     assets = [
         "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata",
         "//tensorflow/contrib/lite/java/ovic/src/testdata:labels.txt",
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
index f18a2ca07a..2e5033dab1 100644
--- a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
@@ -20,6 +20,7 @@ filegroup(
 android_binary(
     name = "SmartReplyDemo",
     srcs = glob(["java/**/*.java"]),
+    aapt_version = "aapt",
     assets = [":assets"],
     assets_dir = "",
     custom_package = "com.example.android.smartreply",
-- 
GitLab


From 541677bfee008a093daab2d033bd72650d886126 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Fri, 28 Sep 2018 16:41:58 -0700
Subject: [PATCH 0893/1357] Add option to disable initialization/shutdown of
 the TPU.

PiperOrigin-RevId: 215016286
---
 tensorflow/contrib/tpu/__init__.py                   |  3 +++
 .../contrib/tpu/python/tpu/async_checkpoint.py       | 12 ++++++------
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py   |  9 +++++++--
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/tpu/__init__.py b/tensorflow/contrib/tpu/__init__.py
index 766466968a..6ce6b779a2 100644
--- a/tensorflow/contrib/tpu/__init__.py
+++ b/tensorflow/contrib/tpu/__init__.py
@@ -55,7 +55,9 @@
 
 @@TPUDistributionStrategy
 @@keras_to_tpu_model
+
 @@AsyncCheckpointSaverHook
+@@TPUInMemoryEvalHook
 """
 
 from __future__ import absolute_import
@@ -65,6 +67,7 @@ from __future__ import print_function
 # pylint: disable=wildcard-import,unused-import
 from tensorflow.contrib.tpu.python import profiler
 from tensorflow.contrib.tpu.python.ops.tpu_ops import *
+from tensorflow.contrib.tpu.python.tpu.async_checkpoint import *
 from tensorflow.contrib.tpu.python.tpu.bfloat16 import *
 from tensorflow.contrib.tpu.python.tpu.device_assignment import *
 from tensorflow.contrib.tpu.python.tpu.keras_support import tpu_model as keras_to_tpu_model
diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
index e06a720e82..20b7ba0997 100644
--- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
+++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ======================================
-
 """Hook for asynchronous checkpointing.
 
 This hook dispatches checkpoint writing operations in a separate thread to
@@ -28,18 +27,16 @@ import threading
 import time
 
 from tensorflow.core.util.event_pb2 import SessionLog
-
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training_util
 from tensorflow.python.training.session_run_hook import SessionRunArgs
 from tensorflow.python.training.summary_io import SummaryWriterCache
 
 
-class AsyncCheckpointSaverHook(session_run_hook.SessionRunHook):
+class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
   """Saves checkpoints every N steps or seconds."""
 
   def __init__(self,
@@ -67,7 +64,7 @@ class AsyncCheckpointSaverHook(session_run_hook.SessionRunHook):
       ValueError: One of `save_steps` or `save_secs` should be set.
       ValueError: At most one of `saver` or `scaffold` should be set.
     """
-    logging.info("Create CheckpointSaverHook.")
+    logging.info("Create AsyncCheckpointSaverHook.")
     if saver is not None and scaffold is not None:
       raise ValueError("You cannot provide both saver and scaffold.")
     self._saver = saver
@@ -144,6 +141,10 @@ class AsyncCheckpointSaverHook(session_run_hook.SessionRunHook):
   def _save(self, session, step, asynchronous=True):
     """Saves the latest checkpoint, returns should_stop."""
 
+    # Skip saving on step 0
+    if step == 0:
+      return
+
     def _save_fn():
       """Run the saver process."""
       logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
@@ -162,7 +163,6 @@ class AsyncCheckpointSaverHook(session_run_hook.SessionRunHook):
                    end_time - start_time)
       logging.info("Checkpoint finished for %d into %s.", step, self._save_path)
 
-    logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
     for l in self._listeners:
       l.before_save(session, step)
 
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 764d85877a..545cee637f 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -404,12 +404,17 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
     self._feed_error = None
     self._finished = False
+    self._should_initialize_tpu = True
 
   def begin(self):
     logging.info('TPU job name %s', self._master_job)
     self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-    self._init_ops = [tpu.initialize_system(job=self._master_job)]
-    self._finalize_ops = [tpu.shutdown_system(job=self._master_job)]
+    if self._should_initialize_tpu:
+      self._init_ops = [tpu.initialize_system(job=self._master_job)]
+      self._finalize_ops = [tpu.shutdown_system(job=self._master_job)]
+    else:
+      self._init_ops = []
+      self._finalize_ops = []
 
     summary_writer_init_ops = contrib_summary.summary_writer_initializer_op()
     self._init_ops.extend(summary_writer_init_ops)
-- 
GitLab


From 43e4905a8e554291656bcf65eb7d17d6019df9f8 Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Fri, 28 Sep 2018 16:47:53 -0700
Subject: [PATCH 0894/1357] Synchronize open source and closed source build
 with regard to flex delegate.

PiperOrigin-RevId: 215016968
---
 tensorflow/contrib/lite/BUILD | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index f320b53d94..f3ebe3b245 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -26,6 +26,14 @@ config_setting(
     },
 )
 
+# Enables inclusion of TensorFlow kernels via the TF Lite Flex delegate.
+# WARNING: This build flag is experimental and subject to change.
+config_setting(
+    name = "with_tflite_flex",
+    define_values = {"with_tflite_flex": "true"},
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "schema_fbs_version",
     hdrs = ["version.h"],
@@ -157,6 +165,10 @@ cc_library(
         "stderr_reporter.h",
     ],
     copts = tflite_copts(),
+    defines = select({
+        ":with_tflite_flex": ["TFLITE_FLEX"],
+        "//conditions:default": [],
+    }),
     linkopts = [
     ] + select({
         "//tensorflow:android": [
@@ -180,7 +192,12 @@ cc_library(
         "//tensorflow/contrib/lite/nnapi:nnapi_lib",
         "//tensorflow/contrib/lite/profiling:profiler",
         "//tensorflow/contrib/lite/schema:schema_fbs",
-    ],
+    ] + select({
+        ":with_tflite_flex": [
+            "//tensorflow/contrib/lite/delegates/flex:delegate",
+        ],
+        "//conditions:default": [],
+    }),
 )
 
 cc_library(
-- 
GitLab


From 3c01aa2b00ee4c3fda412b23da39fd0894c04cf7 Mon Sep 17 00:00:00 2001
From: Piotr Padlewski <prazek@google.com>
Date: Fri, 28 Sep 2018 17:04:06 -0700
Subject: [PATCH 0895/1357] Bunch of micro move optimizations

PiperOrigin-RevId: 215018984
---
 tensorflow/core/framework/node_def_util.h   |  1 -
 tensorflow/core/framework/op.h              | 20 ++++----
 tensorflow/core/framework/op_def_builder.cc | 24 ++++-----
 tensorflow/core/framework/op_def_builder.h  | 14 +++---
 tensorflow/core/grappler/utils/functions.cc | 55 +++++++++++----------
 tensorflow/core/grappler/utils/functions.h  |  5 +-
 6 files changed, 62 insertions(+), 57 deletions(-)

diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index 187bfa2c88..0ff67554eb 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_NODE_DEF_UTIL_H_
 
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "tensorflow/core/framework/attr_value_util.h"
diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h
index 25f8de8dcc..81ed5f95f0 100644
--- a/tensorflow/core/framework/op.h
+++ b/tensorflow/core/framework/op.h
@@ -209,16 +209,16 @@ template <>
 class OpDefBuilderWrapper<true> {
  public:
   OpDefBuilderWrapper(const char name[]) : builder_(name) {}
-  OpDefBuilderWrapper<true>& Attr(StringPiece spec) {
-    builder_.Attr(spec);
+  OpDefBuilderWrapper<true>& Attr(string spec) {
+    builder_.Attr(std::move(spec));
     return *this;
   }
-  OpDefBuilderWrapper<true>& Input(StringPiece spec) {
-    builder_.Input(spec);
+  OpDefBuilderWrapper<true>& Input(string spec) {
+    builder_.Input(std::move(spec));
     return *this;
   }
-  OpDefBuilderWrapper<true>& Output(StringPiece spec) {
-    builder_.Output(spec);
+  OpDefBuilderWrapper<true>& Output(string spec) {
+    builder_.Output(std::move(spec));
     return *this;
   }
   OpDefBuilderWrapper<true>& SetIsCommutative() {
@@ -237,12 +237,12 @@ class OpDefBuilderWrapper<true> {
     builder_.SetAllowsUninitializedInput();
     return *this;
   }
-  OpDefBuilderWrapper<true>& Deprecated(int version, StringPiece explanation) {
-    builder_.Deprecated(version, explanation);
+  OpDefBuilderWrapper<true>& Deprecated(int version, string explanation) {
+    builder_.Deprecated(version, std::move(explanation));
     return *this;
   }
-  OpDefBuilderWrapper<true>& Doc(StringPiece text) {
-    builder_.Doc(text);
+  OpDefBuilderWrapper<true>& Doc(string text) {
+    builder_.Doc(std::move(text));
     return *this;
   }
   OpDefBuilderWrapper<true>& SetShapeFn(
diff --git a/tensorflow/core/framework/op_def_builder.cc b/tensorflow/core/framework/op_def_builder.cc
index 34a7a43d38..8a9bb63182 100644
--- a/tensorflow/core/framework/op_def_builder.cc
+++ b/tensorflow/core/framework/op_def_builder.cc
@@ -526,32 +526,32 @@ void FinalizeDoc(const string& text, OpDef* op_def,
 
 }  // namespace
 
-OpDefBuilder::OpDefBuilder(StringPiece op_name) {
-  op_def()->set_name(string(op_name));  // NOLINT
+OpDefBuilder::OpDefBuilder(string op_name) {
+  op_def()->set_name(std::move(op_name));
 }
 
-OpDefBuilder& OpDefBuilder::Attr(StringPiece spec) {
-  attrs_.emplace_back(spec.data(), spec.size());
+OpDefBuilder& OpDefBuilder::Attr(string spec) {
+  attrs_.push_back(std::move(spec));
   return *this;
 }
 
-OpDefBuilder& OpDefBuilder::Input(StringPiece spec) {
-  inputs_.emplace_back(spec.data(), spec.size());
+OpDefBuilder& OpDefBuilder::Input(string spec) {
+  inputs_.push_back(std::move(spec));
   return *this;
 }
 
-OpDefBuilder& OpDefBuilder::Output(StringPiece spec) {
-  outputs_.emplace_back(spec.data(), spec.size());
+OpDefBuilder& OpDefBuilder::Output(string spec) {
+  outputs_.push_back(std::move(spec));
   return *this;
 }
 
 #ifndef TF_LEAN_BINARY
-OpDefBuilder& OpDefBuilder::Doc(StringPiece text) {
+OpDefBuilder& OpDefBuilder::Doc(string text) {
   if (!doc_.empty()) {
     errors_.push_back(
         strings::StrCat("Extra call to Doc() for Op ", op_def()->name()));
   } else {
-    doc_.assign(text.data(), text.size());
+    doc_ = std::move(text);
   }
   return *this;
 }
@@ -577,14 +577,14 @@ OpDefBuilder& OpDefBuilder::SetAllowsUninitializedInput() {
   return *this;
 }
 
-OpDefBuilder& OpDefBuilder::Deprecated(int version, StringPiece explanation) {
+OpDefBuilder& OpDefBuilder::Deprecated(int version, string explanation) {
   if (op_def()->has_deprecation()) {
     errors_.push_back(
         strings::StrCat("Deprecated called twice for Op ", op_def()->name()));
   } else {
     OpDeprecation* deprecation = op_def()->mutable_deprecation();
     deprecation->set_version(version);
-    deprecation->set_explanation(string(explanation));
+    deprecation->set_explanation(std::move(explanation));
   }
   return *this;
 }
diff --git a/tensorflow/core/framework/op_def_builder.h b/tensorflow/core/framework/op_def_builder.h
index 0b39d6e848..8077b20598 100644
--- a/tensorflow/core/framework/op_def_builder.h
+++ b/tensorflow/core/framework/op_def_builder.h
@@ -51,7 +51,7 @@ struct OpRegistrationData {
 class OpDefBuilder {
  public:
   // Constructs an OpDef with just the name field set.
-  explicit OpDefBuilder(StringPiece op_name);
+  explicit OpDefBuilder(string op_name);
 
   // Adds an attr to this OpDefBuilder (and returns *this). The spec has
   // format "<name>:<type>" or "<name>:<type>=<default>"
@@ -84,7 +84,7 @@ class OpDefBuilder {
   // * Ability to restrict the type of the tensor like the existing
   //   restrictions for type attrs.
   // Perhaps by linking the type of the tensor to a type attr?
-  OpDefBuilder& Attr(StringPiece spec);
+  OpDefBuilder& Attr(string spec);
 
   // Adds an input or output to this OpDefBuilder (and returns *this).
   // The spec has form "<name>:<type-expr>" or "<name>:Ref(<type-expr>)"
@@ -101,8 +101,8 @@ class OpDefBuilder {
   // in the spec?
   // TODO(josh11b): SparseInput() and SparseOutput() matching the Python
   // handling?
-  OpDefBuilder& Input(StringPiece spec);
-  OpDefBuilder& Output(StringPiece spec);
+  OpDefBuilder& Input(string spec);
+  OpDefBuilder& Output(string spec);
 
   // Turns on the indicated boolean flag in this OpDefBuilder (and
   // returns *this).
@@ -112,7 +112,7 @@ class OpDefBuilder {
   OpDefBuilder& SetAllowsUninitializedInput();
 
   // Deprecate the op at a certain GraphDef version.
-  OpDefBuilder& Deprecated(int version, StringPiece explanation);
+  OpDefBuilder& Deprecated(int version, string explanation);
 
   // Adds docs to this OpDefBuilder (and returns *this).
   // Docs have the format:
@@ -128,9 +128,9 @@ class OpDefBuilder {
   // to suppress the automatically-generated type documentation in
   // generated output.
 #ifndef TF_LEAN_BINARY
-  OpDefBuilder& Doc(StringPiece text);
+  OpDefBuilder& Doc(string text);
 #else
-  OpDefBuilder& Doc(StringPiece text) { return *this; }
+  OpDefBuilder& Doc(string text) { return *this; }
 #endif
 
   // Sets the shape function to be used for shape inference.
diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index a428aea7f5..6861fb423c 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc
@@ -41,7 +41,8 @@ Status RegisterFunctionBodyOutputs(const OpRegistrationData& registration,
   tensorflow::NameRangeMap outputs_range_map;
   TF_RETURN_IF_ERROR(tensorflow::NameRangesForNode(
       node, registration.op_def, nullptr, &outputs_range_map));
-  connectivity->RegisterFunctionBodyOutputs(node.name(), outputs_range_map);
+  connectivity->RegisterFunctionBodyOutputs(node.name(),
+                                            std::move(outputs_range_map));
   return Status::OK();
 }
 
@@ -75,20 +76,22 @@ Status ResolveFunctionBodyNodeAttrPlaceholders(
 }  // namespace
 
 void GrapplerFunctionConnectivity::RegisterInputArgExpansion(
-    const InputArgExpansion& input_arg_expansion) {
-  const auto& input_name = input_arg_expansion.input_name;
+    InputArgExpansion input_arg_expansion) {
+  string input_name = input_arg_expansion.input_name;
   const auto& placeholders = input_arg_expansion.placeholders;
-  input_arg_expansions_.emplace(input_name, input_arg_expansion);
+
   for (int i = 0; i < placeholders.size(); ++i) {
     const string& placeholder = input_arg_expansion.placeholders[i];
-    input_arg_placeholders_.emplace(
-        placeholder, InputArgPlaceholder{input_name, /*position=*/i});
+    input_arg_placeholders_.insert(
+        {placeholder, InputArgPlaceholder{input_name, /*position=*/i}});
   }
+  input_arg_expansions_.insert(
+      {std::move(input_name), std::move(input_arg_expansion)});
 }
 
 void GrapplerFunctionConnectivity::RegisterFunctionBodyOutputs(
-    const string& node_name, const tensorflow::NameRangeMap& outputs) {
-  function_body_outputs_[node_name] = outputs;
+    const string& node_name, tensorflow::NameRangeMap&& outputs) {
+  function_body_outputs_[node_name] = std::move(outputs);
 }
 
 Status GrapplerFunctionConnectivity::ExpandFunctionDefInput(
@@ -174,11 +177,12 @@ Status GrapplerFunctionConnectivity::ExpandFunctionDefInput(
         const auto& output_range = output->second;
 
         if (position == -1) {
+          graph_def_inputs->reserve(graph_def_inputs->size() +
+                                    output_range.second - output_range.first);
           // If position is not defined expand node output range
           for (int i = output_range.first; i < output_range.second; ++i) {
-            i == 0 ? graph_def_inputs->push_back(node_name)
-                   : graph_def_inputs->push_back(
-                         strings::StrCat(node_name, ":", i));
+            graph_def_inputs->push_back(
+                i == 0 ? node_name : strings::StrCat(node_name, ":", i));
           }
         } else {
           if (position > (output_range.second - output_range.first)) {
@@ -187,9 +191,8 @@ Status GrapplerFunctionConnectivity::ExpandFunctionDefInput(
                 " position: ", position, " (out of range)");
           }
           int pos = output_range.first + position;
-          pos == 0 ? graph_def_inputs->push_back(node_name)
-                   : graph_def_inputs->push_back(
-                         strings::StrCat(node_name, ":", pos));
+          graph_def_inputs->push_back(
+              pos == 0 ? node_name : strings::StrCat(node_name, ":", pos));
         }
 
         return Status::OK();
@@ -211,8 +214,8 @@ Status GrapplerFunctionConnectivity::ExpandNodeInputs(
   }
 
   function_body_node->clear_input();
-  for (const string& expanded_input : expanded_inputs)
-    function_body_node->add_input(expanded_input);
+  for (string& expanded_input : expanded_inputs)
+    function_body_node->add_input(std::move(expanded_input));
   return Status::OK();
 }
 
@@ -323,7 +326,7 @@ GrapplerFunctionItem::GrapplerFunctionItem(
   // Fill the feed nodes with input placeholders.
   for (const InputArgExpansion& input_arg : input_arg_expansions_) {
     for (const string& placeholder : input_arg.placeholders) {
-      feed.emplace_back(placeholder, Tensor());
+      feed.push_back({placeholder, Tensor()});
       input_arg_placeholders_.insert(placeholder);
     }
   }
@@ -460,7 +463,7 @@ Status InstantiationBodyParameters(
 
       auto it = func_instantiation_attr.find(placeholder);
       if (it != func_instantiation_attr.end()) {
-        body_parameters->emplace(placeholder, it->second);
+        body_parameters->insert({placeholder, it->second});
       } else {
         return errors::InvalidArgument("Can't resolve placeholder: ",
                                        placeholder);
@@ -498,10 +501,6 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
   // GraphDef input format (name[:position])
   GrapplerFunctionConnectivity connectivity;
 
-  std::vector<InputArgExpansion> inputs;
-  std::vector<OutputArgExpansion> outputs;
-  std::vector<string> keep_nodes;
-
   // Function body shares the library with the graph that instantiated it.
   GraphDef function_body;
   *function_body.mutable_library() = flib.ToProto();
@@ -518,6 +517,9 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
     }
   }
 
+  std::vector<InputArgExpansion> inputs;
+  inputs.reserve(signature.input_arg_size());
+
   // For each input argument create a placeholder in function body.
   for (const OpDef::ArgDef& input : signature.input_arg()) {
     if (!input.type_list_attr().empty() || !input.number_attr().empty()) {
@@ -542,9 +544,10 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
                                       /*is_ref*/ input.is_ref(),
                                       /*placeholders=*/{input.name()}};
     connectivity.RegisterInputArgExpansion(input_expansion);
-    inputs.push_back(input_expansion);
+    inputs.push_back(std::move(input_expansion));
   }
 
+  std::vector<string> keep_nodes;
   // Add all function nodes to the function body
   for (const NodeDef& func_def_node : func.node_def()) {
     NodeDef* new_node = function_body.add_node();
@@ -572,6 +575,8 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
     TF_RETURN_IF_ERROR(connectivity.ExpandNodeInputs(&node));
   }
 
+  std::vector<OutputArgExpansion> outputs;
+  outputs.reserve(signature.output_arg_size());
   // Add function outputs
   for (const OpDef::ArgDef& out : signature.output_arg()) {
     std::vector<string> output_tensors;
@@ -589,8 +594,8 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
     OutputArgExpansion output{/*output_name=*/out.name(),
                               /*data_type=*/output_data_type,
                               /*is_ref=*/out.is_ref(),
-                              /*output_tensors=*/output_tensors};
-    outputs.push_back(output);
+                              /*output_tensors=*/std::move(output_tensors)};
+    outputs.push_back(std::move(output));
   }
 
   bool is_stateful = signature.is_stateful();
diff --git a/tensorflow/core/grappler/utils/functions.h b/tensorflow/core/grappler/utils/functions.h
index 733caf325f..ef944ced09 100644
--- a/tensorflow/core/grappler/utils/functions.h
+++ b/tensorflow/core/grappler/utils/functions.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -70,9 +71,9 @@ struct OutputArgExpansion {
 // and fold it back when doing backward conversion.
 class GrapplerFunctionConnectivity {
  public:
-  void RegisterInputArgExpansion(const InputArgExpansion& input_arg_expansion);
+  void RegisterInputArgExpansion(InputArgExpansion input_arg_expansion);
   void RegisterFunctionBodyOutputs(const string& node_name,
-                                   const tensorflow::NameRangeMap& outputs);
+                                   tensorflow::NameRangeMap&& outputs);
 
   // Expand input encoded in FunctionDef format (name[:output][:position]) into
   // multiple inputs in GraphDef format (name[:position]).
-- 
GitLab


From 6d354f6bd686d748d02039f26197f590b817b8c3 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 28 Sep 2018 17:04:41 -0700
Subject: [PATCH 0896/1357] [tf.data] Use `std::make_shared` as appropriate in
 `ParallelMapIterator`.

PiperOrigin-RevId: 215019058
---
 .../kernels/data/parallel_map_iterator.cc     | 40 +++++++++----------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 8393024c51..da067a4e6f 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -106,18 +106,17 @@ class ParallelMapIterator : public DatasetBaseIterator {
     TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("invocation_results.size"),
                                            invocation_results_.size()));
     for (size_t i = 0; i < invocation_results_.size(); i++) {
-      std::shared_ptr<InvocationResult> result = invocation_results_[i];
-      TF_RETURN_IF_ERROR(WriteStatusLocked(writer, i, result->status));
+      const auto& result = *(invocation_results_[i]);
+      TF_RETURN_IF_ERROR(WriteStatusLocked(writer, i, result.status));
       TF_RETURN_IF_ERROR(writer->WriteScalar(
           full_name(strings::StrCat("invocation_results[", i, "].size")),
-          result->return_values.size()));
-      for (size_t j = 0; j < result->return_values.size(); j++) {
-        TF_RETURN_IF_ERROR(
-            writer->WriteTensor(full_name(strings::StrCat(
-                                    "invocation_results[", i, "][", j, "]")),
-                                result->return_values[j]));
+          result.return_values.size()));
+      for (size_t j = 0; j < result.return_values.size(); j++) {
+        TF_RETURN_IF_ERROR(writer->WriteTensor(
+            full_name(strings::StrCat("invocation_results[", i, "][", j, "]")),
+            result.return_values[j]));
       }
-      if (result->end_of_input) {
+      if (result.end_of_input) {
         TF_RETURN_IF_ERROR(writer->WriteScalar(
             full_name(
                 strings::StrCat("invocation_results[", i, "].end_of_input")),
@@ -135,9 +134,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
     TF_RETURN_IF_ERROR(reader->ReadScalar(
         full_name("invocation_results.size"), &invocation_results_size));
     for (size_t i = 0; i < invocation_results_size; i++) {
-      std::shared_ptr<InvocationResult> result(new InvocationResult());
-      invocation_results_.push_back(result);
-      TF_RETURN_IF_ERROR(ReadStatusLocked(reader, i, &result->status));
+      invocation_results_.push_back(std::make_shared<InvocationResult>());
+      auto& result = *invocation_results_.back();
+      TF_RETURN_IF_ERROR(ReadStatusLocked(reader, i, &result.status));
       size_t num_return_values;
       {
         int64 size;
@@ -153,17 +152,16 @@ class ParallelMapIterator : public DatasetBaseIterator {
               ": ", size, " is not a valid value of type size_t."));
         }
       }
-      result->return_values.reserve(num_return_values);
+      result.return_values.reserve(num_return_values);
       for (size_t j = 0; j < num_return_values; j++) {
-        result->return_values.emplace_back();
-        TF_RETURN_IF_ERROR(
-            reader->ReadTensor(full_name(strings::StrCat(
-                                   "invocation_results[", i, "][", j, "]")),
-                               &result->return_values.back()));
+        result.return_values.emplace_back();
+        TF_RETURN_IF_ERROR(reader->ReadTensor(
+            full_name(strings::StrCat("invocation_results[", i, "][", j, "]")),
+            &result.return_values.back()));
       }
-      result->end_of_input = reader->Contains(full_name(
+      result.end_of_input = reader->Contains(full_name(
           strings::StrCat("invocation_results[", i, "].end_of_input")));
-      result->notification.Notify();
+      result.notification.Notify();
     }
     return Status::OK();
   }
@@ -259,7 +257,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
           return;
         }
         while (!busy()) {
-          invocation_results_.emplace_back(new InvocationResult());
+          invocation_results_.push_back(std::make_shared<InvocationResult>());
           new_calls.push_back(invocation_results_.back());
           num_calls_++;
         }
-- 
GitLab


From 05e5d2a1a9d5471d634043135834ecae4355926a Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 28 Sep 2018 17:11:09 -0700
Subject: [PATCH 0897/1357] [TF] Fix incorrect type constraint on _VarHandlesOp
 kernel on GPU. The kernel supports any combination of the dtypes, and does
 not need a separate kernel for each dtype.

PiperOrigin-RevId: 215019812
---
 .../core/kernels/resource_variable_ops.cc      | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 427044ca67..23d76986bf 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -172,17 +172,21 @@ REGISTER_KERNEL_BUILDER(
                               .Device(DEVICE_GPU)              \
                               .HostMemory("resource")          \
                               .TypeConstraint<type>("dtype"),  \
-                          ResourceHandleOp<Var>)               \
-  REGISTER_KERNEL_BUILDER(Name("_VarHandlesOp")                \
-                              .Device(DEVICE_GPU)              \
-                              .HostMemory("resources")         \
-                              .TypeConstraint<type>("dtypes"), \
-                          ResourceHandlesOp<Var>)
-
+                          ResourceHandleOp<Var>)
 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
 TF_CALL_int64(REGISTER_GPU_KERNELS);
 TF_CALL_variant(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
+
+REGISTER_KERNEL_BUILDER(Name("_VarHandlesOp")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("resources")
+                            .TypeConstraint("dtypes",
+                                            {DT_INT64, DT_COMPLEX64,
+                                             DT_COMPLEX128, DT_HALF, DT_FLOAT,
+                                             DT_DOUBLE, DT_BOOL, DT_VARIANT}),
+                        ResourceHandlesOp<Var>);
+
 #endif  // GOOGLE_CUDA
 
 template <typename T>
-- 
GitLab


From 47503fdbfa72357e1419972986a4415ab3ad92a6 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 28 Sep 2018 17:17:36 -0700
Subject: [PATCH 0898/1357] Disable jemalloc, and remove its configuration
 option.

PiperOrigin-RevId: 215020524
---
 configure.py   | 2 --
 tools/bazel.rc | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.py b/configure.py
index 9899ae10e8..0a3b9a7894 100644
--- a/configure.py
+++ b/configure.py
@@ -1513,8 +1513,6 @@ def main():
   if is_ppc64le():
     write_action_env_to_bazelrc('OMP_NUM_THREADS', 1)
 
-  set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc',
-                'with_jemalloc', True)
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
                 True, 'xla')
 
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 3734fab715..0cd148ed87 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -73,6 +73,7 @@ build --define=grpc_no_ares=true
 build --spawn_strategy=standalone
 build --genrule_strategy=standalone
 build -c opt
+build --define=with_jemalloc=false
 
 # Other build flags.
 build --define=grpc_no_ares=true
-- 
GitLab


From 737915c01dcab743256df7f7b1ff1545b951252d Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Fri, 28 Sep 2018 17:26:51 -0700
Subject: [PATCH 0899/1357] Internal change.

PiperOrigin-RevId: 215021487
---
 tensorflow/core/util/tensor_bundle/BUILD | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD
index 4d4db86df2..f40ec9b752 100644
--- a/tensorflow/core/util/tensor_bundle/BUILD
+++ b/tensorflow/core/util/tensor_bundle/BUILD
@@ -65,6 +65,10 @@ tf_cc_test(
     name = "tensor_bundle_test",
     srcs = ["tensor_bundle_test.cc"],
     data = glob(["testdata/**"]),
+    tags = [
+        "nomsan",
+        "notsan",
+    ],
     deps = [
         ":tensor_bundle",
         "//tensorflow/core:framework",
-- 
GitLab


From 5f822d694af6e4aa57fe8a426032a91dc61e30d6 Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Fri, 28 Sep 2018 18:07:33 -0700
Subject: [PATCH 0900/1357] Internal change.

PiperOrigin-RevId: 215025019
---
 tensorflow/contrib/factorization/BUILD                | 9 ++++++++-
 tensorflow/contrib/opt/BUILD                          | 5 +++++
 tensorflow/contrib/timeseries/python/timeseries/BUILD | 7 ++++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index e344d7a23b..510f292508 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -154,6 +154,8 @@ tf_py_test(
     ],
     tags = [
         "no_pip",  # b/38283730
+        "noasan",  # b/116875897
+        "nomsan",
         "notsan",  # Flaky: b/30756419
     ],
 )
@@ -177,7 +179,11 @@ tf_py_test(
         "//tensorflow/python:random_seed",
         "//tensorflow/python:variables",
     ],
-    tags = ["notsan"],  # b/62863147
+    tags = [
+        "noasan",  # b/116875897
+        "nomsan",
+        "notsan",  # b/62863147
+    ],
 )
 
 py_library(
@@ -276,6 +282,7 @@ tf_py_test(
         "manual",
         "noasan",  # times out b/63678675
         "nomsan",
+        "notsan",  # b/116875897
     ],
 )
 
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index f4ac70eb1a..6a67c6295d 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -377,6 +377,11 @@ py_test(
     size = "large",
     srcs = ["python/training/shampoo_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "noasan",  # b/116875897
+        "nomsan",
+        "notsan",
+    ],
     deps = [
         ":opt_py",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD
index c230919168..cb1f707028 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/BUILD
+++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD
@@ -159,7 +159,12 @@ py_test(
     ],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = ["no_pip_gpu"],  # b/63391119
+    tags = [
+        "no_pip_gpu",  # b/63391119
+        "noasan",  # b/116875897
+        "nomsan",
+        "notsan",
+    ],
     deps = [
         ":estimators",
         ":feature_keys",
-- 
GitLab


From b34ddf043324e52ee0acdfe62cb18beab7fed08e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 18:22:13 -0700
Subject: [PATCH 0901/1357] Added flag to enable non-lazy Adam optimizer
 implementation for TPU embeddings (actual implementation is pending). Added
 comments with pointers to C++ implementations of optimizers.

PiperOrigin-RevId: 215026002
---
 .../tpu/proto/optimization_parameters.proto   | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index fc1320501b..a43f45554f 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -22,13 +22,22 @@ message LearningRate {
   }
 }
 
+// Each optimizer's parameter proto has a link to its documentation and CPU
+// implementation (if available) for user reference.
+
+// https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L151
 message AdagradParameters {
   float initial_accumulator = 1;
 }
 
+// https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L423
 message StochasticGradientDescentParameters {
 }
 
+// https://www.tensorflow.org/api_docs/python/tf/train/FtrlOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L192
 message FtrlParameters {
   float l1 = 1;
   float l2 = 2;
@@ -41,21 +50,38 @@ message FtrlParameters {
 // learning rate feature instead, setting the learning rate to:
 // user learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
 // Here, t is the current timestep.
+//
+// https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
 // https://github.com/tensorflow/tensorflow/blob/ab51450c817674c8ff08a7ae4f8ac50cdc4bed8b/tensorflow/python/training/adam.py#L54
+//
+// Note that the code by default implements the lazy version of Adam
+// (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/LazyAdamOptimizer)
+// unless the use_non_lazy_adam parameter is set, in which case it implements
+// the normal version of Adam that updates all parameters in the embedding
+// table, even for entries that are not used in the current minibatch
+// (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If
+// use_non_lazy_adam is enabled, use_gradient_accumulation is also required in
+// order to get correct results; a warning will be printed otherwise (which may
+// change to an error in the future).
 message AdamParameters {
   float beta1 = 3;
   float beta2 = 4;
   float epsilon = 5;
   float initial_m = 6;
   float initial_v = 7;
+  bool use_non_lazy_adam = 8;
 }
 
+// https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L271
 message MomentumParameters {
   float momentum = 1;
   bool use_nesterov = 2;
   float initial_accum = 3;
 }
 
+// https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L356
 message RmsPropParameters {
   float rho = 1;
   float momentum = 2;
@@ -64,6 +90,8 @@ message RmsPropParameters {
   float initial_mom = 5;
 }
 
+// https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L372
 message CenteredRmsPropParameters {
   float rho = 1;
   float momentum = 2;
@@ -73,6 +101,7 @@ message CenteredRmsPropParameters {
   float initial_mg = 6;
 }
 
+// Variant of algorithm in http://proceedings.mlr.press/v44/shamir15.pdf
 message MdlAdagradLightParameters {
   float l2 = 1;
   float lr_power = 2;
@@ -91,6 +120,8 @@ message MdlAdagradLightParameters {
   float initial_benefit = 15;
 }
 
+// https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L68
 message AdadeltaParameters {
   float rho = 1;
   float epsilon = 2;
@@ -98,6 +129,8 @@ message AdadeltaParameters {
   float initial_update = 4;
 }
 
+// https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
+// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L164
 message ProximalAdagradParameters {
   float l1 = 1;
   float l2 = 2;
-- 
GitLab


From e4fea9419ac387ddcb9c932abaa8e92fb045e29f Mon Sep 17 00:00:00 2001
From: knightXun <badgangkiller@gmail.com>
Date: Sat, 29 Sep 2018 00:42:23 +0800
Subject: [PATCH 0902/1357] print error information, when the os is not
 supported

---
 tensorflow/go/test.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/go/test.sh b/tensorflow/go/test.sh
index 6083608f22..47c3a68379 100755
--- a/tensorflow/go/test.sh
+++ b/tensorflow/go/test.sh
@@ -63,6 +63,9 @@ then
   else
     export DYLD_LIBRARY_PATH="${PWD}/tensorflow:${DYLD_LIBRARY_PATH}"
   fi
+else 
+  echo "Only support Linux/Darwin, System $OS is not supported"
+  exit 1
 fi
 
 # Document the Go version and run tests
-- 
GitLab


From abd5c32c0fa6451e73b491affdd86d852a74177f Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Fri, 28 Sep 2018 18:27:37 -0700
Subject: [PATCH 0903/1357] Automated rollback of commit
 70f071f7afb2deffddbd9937d7a76b1e1c0b2b75

PiperOrigin-RevId: 215026418
---
 .../estimator_batch/dnn_tree_combined_estimator_test.py       | 3 +--
 .../contrib/boosted_trees/estimator_batch/estimator_test.py   | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
index 83a8dee632..839eedd3a8 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
@@ -188,8 +188,7 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
 
     # Train for a few steps.
     est.train(input_fn=_train_input_fn, steps=1000)
-    # 10 steps for dnn + 3 for 1 tree of depth 3 + 1 after the tree finished
-    # + 1 for resource variables.
+    # 10 steps for dnn, 3  for 1 tree of depth 3 + 1 after the tree finished
     self._assert_checkpoint(est.model_dir, global_step=14)
     res = est.evaluate(input_fn=_eval_input_fn, steps=1)
     self.assertLess(0.5, res["auc"])
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
index d7b14e00ba..c155128c0e 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
@@ -238,8 +238,8 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase):
         output_leaf_index=False)
 
     classifier.fit(input_fn=_train_input_fn, steps=15)
-    # When no override of global steps, 6 steps were used.
-    self._assert_checkpoint(classifier.model_dir, global_step=6)
+    # When no override of global steps, 5 steps were used.
+    self._assert_checkpoint(classifier.model_dir, global_step=5)
 
   def testOverridesGlobalSteps(self):
     learner_config = learner_pb2.LearnerConfig()
-- 
GitLab


From d37f771cc5a208cdc88a50a65f491b3c06c9f262 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Fri, 28 Sep 2018 18:41:31 -0700
Subject: [PATCH 0904/1357] Move TPU variables to the TPU device in
 TPUStrategy.

PiperOrigin-RevId: 215027511
---
 tensorflow/contrib/distribute/python/BUILD    |   1 +
 .../contrib/distribute/python/tpu_strategy.py | 175 +++++++-
 .../contrib/distribute/python/values.py       | 381 ++++++++++++++++++
 .../tpu/python/tpu/keras_tpu_variables.py     |   2 +-
 tensorflow/contrib/tpu/python/tpu/tpu.py      |  11 +-
 tensorflow/python/eager/backprop.py           |   2 +-
 tensorflow/python/estimator/estimator.py      |   4 +
 tensorflow/python/estimator/util.py           |   8 +-
 tensorflow/python/training/optimizer.py       |   5 +-
 tensorflow/python/training/session_manager.py |   5 +
 10 files changed, 565 insertions(+), 29 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 422983dbef..cfb9d42a6f 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -28,6 +28,7 @@ py_library(
         "//tensorflow/python:device_util",
         "//tensorflow/python:distribute",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
         "//tensorflow/python/eager:context",
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index a6762e5e87..1b555482d3 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -29,6 +29,7 @@ from tensorflow.contrib.tpu.python.tpu import tpu
 from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
 from tensorflow.contrib.tpu.python.tpu import training_loop
 from tensorflow.python.eager import context
+from tensorflow.python.eager import tape
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -37,9 +38,13 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.training import device_util
+from tensorflow.python.training import distribute as distribute_lib
 from tensorflow.python.util import nest
 
 
+_TPU_INITIALIZE_SYSTEM_COLLECTION = "TPU_STRATEGY_INITIALIZE"
+
+
 def get_tpu_system_metadata(tpu_cluster_resolver):
   """Retrieves TPU system metadata given a TPUClusterResolver."""
   master = tpu_cluster_resolver.master()
@@ -56,6 +61,58 @@ def get_tpu_system_metadata(tpu_cluster_resolver):
   return tpu_system_metadata
 
 
+# TODO(jhseu): Deduplicate with MirroredStrategy?
+def _create_tpu_mirrored_variable(devices, real_mirrored_creator, *args,
+                                  **kwargs):  # pylint: disable=g-missing-docstring
+  # Figure out what collections this variable should be added to.
+  # We'll add the TPUMirroredVariable to those collections instead.
+  collections = kwargs.pop("collections", None)
+  if collections is None:
+    collections = [ops.GraphKeys.GLOBAL_VARIABLES]
+  kwargs["collections"] = []
+
+  # TODO(jhseu): Should we have different behavior for different
+  # synchronization settings?
+
+  # Get aggregation value
+  # TODO(jhseu): Support aggregation in a tower context.
+  aggregation = kwargs.pop("aggregation", vs.VariableAggregation.NONE)
+  if aggregation not in [
+      vs.VariableAggregation.NONE,
+      vs.VariableAggregation.SUM,
+      vs.VariableAggregation.MEAN,
+      vs.VariableAggregation.ONLY_FIRST_TOWER,
+  ]:
+    raise ValueError("Invalid variable aggregation mode: {} for variable: {}"
+                     .format(aggregation, kwargs["name"]))
+
+  # Ignore user-specified caching device, not needed for mirrored variables.
+  kwargs.pop("caching_device", None)
+
+  # TODO(josh11b,apassos): It would be better if variable initialization
+  # was never recorded on the tape instead of having to do this manually
+  # here.
+  with tape.stop_recording():
+    index = real_mirrored_creator(devices, *args, **kwargs)
+    result = values.TPUMirroredVariable(index, index[devices[0]], aggregation)
+
+  if not context.executing_eagerly():
+    g = ops.get_default_graph()
+    # If "trainable" is True, next_creator() will add the member variables
+    # to the TRAINABLE_VARIABLES collection, so we manually remove
+    # them and replace with the MirroredVariable. We can't set
+    # "trainable" to False for next_creator() since that causes functions
+    # like implicit_gradients to skip those variables.
+    if kwargs.get("trainable", True):
+      collections.append(ops.GraphKeys.TRAINABLE_VARIABLES)
+      l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES)
+      for v in index.values():
+        l.remove(v)
+    g.add_to_collections(collections, result)
+  return result
+
+
+# TODO(jhseu): Stop inheriting from OneDeviceStrategy.
 class TPUStrategy(one_device_strategy.OneDeviceStrategy):
   """Experimental TPU distribution strategy implementation."""
 
@@ -82,6 +139,15 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     # TODO(sourabhbajaj): Change this from num_cores to metadata_override
     self._num_cores_override = num_cores
 
+    # TODO(jhseu): Switch to DeviceAssignment to support pods and model
+    # parallelism.
+    device_map = {d.name: i for i, d in enumerate(self._tpu_metadata.devices)
+                  if "device:TPU:" in d.name}
+    self._device_index = values.PerDevice(device_map)
+    self._tpu_devices = sorted(device_map.keys())
+    # Only create variables for the number of towers we're running.
+    self._tpu_devices = self._tpu_devices[:self.num_towers]
+
     # TODO(sourabhbajaj): Remove this once performance of running one step
     # at a time is comparable to multiple steps.
     self.steps_per_run = steps_per_run
@@ -239,6 +305,8 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     return ctx
 
   def _call_for_each_tower(self, fn, *args, **kwargs):
+    # TODO(jhseu): Consider making it so call_for_each_tower implies that we're
+    # in a tpu.rewrite(), and update TPUMirroredVariable accordingly.
     kwargs.pop('run_concurrently', None)
     with one_device_strategy._OneDeviceTowerContext(self):  # pylint: disable=protected-access
       return fn(*args, **kwargs)
@@ -248,7 +316,15 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       # TODO(priyag): Add appopriate call here when eager is supported for TPUs.
       raise NotImplementedError('Eager mode not supported in TPUStrategy.')
     else:
-      return [tpu.initialize_system()]
+      # TODO(jhseu): We need this hack because DistributionStrategies must be
+      # pickleable for copy.deepcopy(). Remove when initialize_system goes away.
+      graph = ops.get_default_graph()
+      tpu_init = graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION)
+      if tpu_init:
+        return tpu_init
+      graph.add_to_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION,
+                              tpu.initialize_system())
+      return graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION)
 
   def finalize(self):
     if context.executing_eagerly():
@@ -257,21 +333,53 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     else:
       return [tpu.shutdown_system()]
 
+  def _get_devices_from(self, colocate_with=None):
+     # TODO(jhseu): Change this when we support model parallelism.
+    return self._tpu_devices
+
+  def _create_variable(self, next_creator, *args, **kwargs):
+    """Create a TPUMirroredVariable. See `DistributionStrategy.scope`."""
+    colocate_with = kwargs.pop("colocate_with", None)
+    devices = self._get_devices_from(colocate_with)
+
+    def _real_mirrored_creator(devices, *args, **kwargs):  # pylint: disable=g-missing-docstring
+      index = {}
+      for i, d in enumerate(devices):
+        with ops.device(d):
+          if i > 0:
+            # Give replicas meaningful distinct names:
+            var0name = index[devices[0]].name.split(":")[0]
+            # We append a / to variable names created on towers with id > 0 to
+            # ensure that we ignore the name scope and instead use the given
+            # name as the absolute name of the variable.
+            kwargs["name"] = "%s/replica_%d/" % (var0name, i)
+            # Initialize replicas with the same value:
+            if context.executing_eagerly():
+              kwargs["initial_value"] = array_ops.identity(
+                  index[devices[0]].value())
+            else:
+              def initial_value_fn(device=d):
+                with ops.device(device):
+                  return array_ops.identity(index[devices[0]].initial_value)
+              kwargs["initial_value"] = initial_value_fn
+          with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
+            v = next_creator(*args, **kwargs)
+          assert not isinstance(v, values.TPUMirroredVariable)
+          index[d] = v
+      return index
+
+    return _create_tpu_mirrored_variable(devices, _real_mirrored_creator, *args,
+                                         **kwargs)
+
   def _reduce(self, aggregation, value, destinations):
-    graph = ops.get_default_graph()
-    cf_context = graph._get_control_flow_context()  # pylint: disable=protected-access
-    # If we're inside the ReplicateContext, reduction should be done using
-    # CrossReplicaSum while outside we can directly use an add_n op.
-    while cf_context:
-      if isinstance(cf_context, tpu.TPUReplicateContext):
-        if aggregation == vs.VariableAggregation.MEAN:
-          # TODO(jhseu):  Revisit once we support model-parallelism.
-          value *= (1. / self.num_towers)
-        elif aggregation != vs.VariableAggregation.SUM:
-          raise NotImplementedError(
-              'Currently only support sum & mean in TPUStrategy.')
-        return tpu_ops.cross_replica_sum(value)
-      cf_context = cf_context.outer_context
+    if values._enclosing_tpu_context() is not None:  # pylint: disable=protected-access
+      if aggregation == vs.VariableAggregation.MEAN:
+        # TODO(jhseu):  Revisit once we support model-parallelism.
+        value *= (1. / self.num_towers)
+      elif aggregation != vs.VariableAggregation.SUM:
+        raise NotImplementedError(
+            "Currently only support sum & mean in TPUStrategy.")
+      return tpu_ops.cross_replica_sum(value)
 
     # Validate that the destination is same as the host device
     # Note we don't do this when in replicate context as the reduction is
@@ -290,6 +398,35 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       return output * (1. / len(value))
     return output
 
+  def _update(self, var, fn, *args, **kwargs):
+    # TODO(jhseu): Consider supporting grouped==False.
+    assert isinstance(var, values.TPUMirroredVariable)
+    if values._enclosing_tpu_context() is not None:  # pylint: disable=protected-access
+      return fn(var, *args, **kwargs)
+
+    # Otherwise, we revert to MirroredStrategy behavior and update each variable
+    # directly.
+    updates = {}
+    for d, v in var._index.items():  # pylint: disable=protected-access
+      name = "update_%d" % self._device_index.get(d)
+      with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name):
+        # If args and kwargs are not mirrored, the value is returned as is.
+        updates[d] = fn(v,
+                        *values.select_device_mirrored(d, args),
+                        **values.select_device_mirrored(d, kwargs))
+
+    # Make a single control dependency to keep the variables mirrored. If one
+    # assignment is fetched, then run all assignments.
+    sorted_keys = sorted(updates.keys())
+    update_tuple = control_flow_ops.tuple([updates[d] for d in sorted_keys])
+    for i, d in enumerate(sorted_keys):
+      updates[d] = update_tuple[i]
+    return values.regroup(updates, values.Mirrored)
+
+  def read_var(self, var):
+    assert isinstance(var, values.TPUMirroredVariable)
+    return var.read_value()
+
   def _unwrap(self, value):
     if isinstance(value, list):
       return value
@@ -323,6 +460,14 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
   def should_save_summary(self):
     return True
 
+  @property
+  def worker_devices(self):
+    return self._tpu_devices
+
+  @property
+  def parameter_devices(self):
+    return self._tpu_devices
+
   def get_host_cpu_device(self, host_id):
     if self._tpu_cluster_resolver.get_master() in ('', 'local'):
       return '/replica:0/task:0/device:CPU:0'
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 4955ded4d5..c18faeb67d 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -22,17 +22,20 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import contextlib
 import weakref
 import six
 
 from tensorflow.contrib.distribute.python import input_ops
 from tensorflow.contrib.distribute.python import prefetching_ops_v2
 from tensorflow.python.eager import context
+from tensorflow.python.eager import tape
 from tensorflow.python.framework import device as tf_device
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import variables as variables_lib
@@ -453,6 +456,384 @@ ops.register_tensor_conversion_function(MirroredVariable,
                                         _tensor_conversion_mirrored)
 
 
+def _enclosing_tpu_context():
+  # pylint: disable=protected-access
+  tpu_context = ops.get_default_graph()._get_control_flow_context()
+  # pylint: enable=protected-access
+  while tpu_context is not None and not isinstance(
+      tpu_context, control_flow_ops.XLAControlFlowContext):
+    tpu_context = tpu_context.outer_context
+  return tpu_context
+
+
+# TODO(jhseu): Deduplicate code. We copy code because we don't want to
+# inherit from DistributedDelegate. DistributedDelegate will not work in a
+# tpu.replicate() because it assumes that you're in a device context where you
+# can operate on a single version of the variable, but a tpu.replicate()
+# operates on all variables and is replicated during a rewrite pass.
+class TPUMirroredVariable(checkpointable.CheckpointableBase):
+  """Holds a map from device to TPU variables whose values are kept in sync."""
+
+  def __init__(self, index, primary_var, aggregation):
+    # Use a weakref to make it easy to map from the contained values
+    # to the container without introducing a reference cycle.
+    for v in six.itervalues(index):
+      v._mirrored_container = weakref.ref(self)  # pylint: disable=protected-access
+    self._index = {device_util.canonicalize(key): value
+                   for key, value in six.iteritems(index)}
+    self._primary_var = primary_var
+    self._common_name = self._primary_var.name.split(":")[0]
+    self._aggregation = aggregation
+    # Needed for GradientTape
+    self._trainable = self._primary_var.trainable
+
+  def _get(self, device=None):
+    """Returns the value for the current device or raises a ValueError."""
+    if device is None:
+      tower_context = distribution_strategy_context.get_tower_context()
+      if tower_context:
+        device = tower_context.device
+      else:
+        device = distribute_lib.get_update_device()
+        if device is None:
+          return self._get_cross_tower()
+    device = device_util.canonicalize(device)
+    try:
+      return self._index[device]
+    except KeyError as e:
+      six.raise_from(
+          ValueError("Device %s not found in %s (current device %s)" %
+                     (device, self._index.keys(), device_util.current())), e)
+
+  # pylint: disable=multiple-statements
+  def __add__(self, o): return self.read_value() + o
+  def __radd__(self, o): return o + self.read_value()
+  def __sub__(self, o): return self.read_value() - o
+  def __rsub__(self, o): return o - self.read_value()
+  def __mul__(self, o): return self.read_value() * o
+  def __rmul__(self, o): return o * self.read_value()
+  def __truediv__(self, o): return self.read_value() / o
+  def __rtruediv__(self, o): return o / self.read_value()
+  def __floordiv__(self, o): return self.read_value() // o
+  def __rfloordiv__(self, o): return o // self.read_value()
+  def __mod__(self, o): return self.read_value() % o
+  def __rmod__(self, o): return o % self.read_value()
+  def __lt__(self, o): return self.read_value() < o
+  def __le__(self, o): return self.read_value() <= o
+  def __gt__(self, o): return self.read_value() > o
+  def __ge__(self, o): return self.read_value() >= o
+  def __and__(self, o): return self.read_value() & o
+  def __rand__(self, o): return o & self.read_value()
+  def __or__(self, o): return self.read_value() | o
+  def __ror__(self, o): return o | self.read_value()
+  def __xor__(self, o): return self.read_value() ^ o
+  def __rxor__(self, o): return o ^ self.read_value()
+  def __getitem__(self, o): return self.read_value()[o]
+  def __pow__(self, o, modulo=None): return pow(self.read_value(), o, modulo)
+  def __rpow__(self, o): return pow(o, self.read_value())
+  def __invert__(self): return ~self.read_value()
+  def __neg__(self): return -self.read_value()
+  def __abs__(self): return abs(self.read_value())
+
+  def __div__(self, o):
+    try:
+      return self.read_value().__div__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  def __rdiv__(self, o):
+    try:
+      return self.read_value().__rdiv__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  def __matmul__(self, o):
+    try:
+      return self.read_value().__matmul__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  def __rmatmul__(self, o):
+    try:
+      return self.read_value().__rmatmul__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  @property
+  def handle(self):
+    # If we're in a tpu.rewrite(), return the replicated handle.
+    tpu_context = _enclosing_tpu_context()
+    if tpu_context is not None:
+      return tpu_context.get_replicated_var_handle(
+          self._common_name, nest.flatten(self._index))
+
+    device = distribute_lib.get_update_device()
+    if device is None:
+      return self._primary_var.handle
+    device = device_util.canonicalize(device)
+    try:
+      return self._index[device].handle
+    except KeyError as e:
+      six.raise_from(
+          ValueError("Device %s not found in %s (current device %s)" %
+                     (device, self._index.keys(), device_util.current())), e)
+
+  # The arguments to update() are automatically unwrapped so the update()
+  # function would normally see regular variables, not MirroredVariables.
+  # However, the update function can still operate on wrapped MirroredVariables
+  # through object members, captured arguments, etc. This is more likely in an
+  # update_non_slot() function (like OptimizerV2._finish), which can
+  # update several non-slot variables in one call.
+  def _assign_func(self, *args, **kwargs):
+    if distribution_strategy_context.get_distribution_strategy().__class__.__name__ != "TPUStrategy":
+      raise ValueError("You may only assign to a TPUMirroredVariable within a "
+                       "TPUStrategy.")
+    f = kwargs.pop("f")
+    if distribution_strategy_context.get_cross_tower_context():
+      if _enclosing_tpu_context() is not None:
+        return distribution_strategy_context.get_distribution_strategy().update(
+            self, f, *args, **kwargs)
+
+      update_device = distribute_lib.get_update_device()
+      # We are calling update on the mirrored variable in cross tower context.
+      if update_device is not None:
+        # We are calling an assign function on the mirrored variable in cross
+        # tower context.
+        v = self._get(device=update_device)
+        return f(v, *args, **kwargs)
+
+      return distribution_strategy_context.get_distribution_strategy().update(
+          self, f, *args, **kwargs)
+    else:
+      _assert_tower_context()
+      # We are calling an assign function on the mirrored variable in tower
+      # context.
+      # We reduce the value we want to assign/add/sub. More details about how we
+      # handle the different use cases can be found in the _reduce method.
+      # We call the function on each of the mirrored variables with the reduced
+      # value.
+      if self._aggregation == vs.VariableAggregation.NONE:
+        raise ValueError("You must specify an aggregation method to update a "
+                         "TPUMirroredVariable in Tower Context.")
+
+      def merge_fn(strategy, value, *other_args, **other_kwargs):
+        return strategy.update(
+            self, f,
+            strategy.reduce(
+                aggregation=self._aggregation, value=value, destinations=self),
+            *other_args, **other_kwargs)
+
+      return distribution_strategy_context.get_tower_context().merge_call(
+          merge_fn, *args, **kwargs)
+
+  @contextlib.contextmanager
+  def _handle_graph(self, handle):
+    # Note: might have an eager tensor but not be executing eagerly when
+    # building functions.
+    if (context.executing_eagerly() or isinstance(handle, ops.EagerTensor)
+        or ops.has_default_graph()):
+      yield
+    else:
+      with handle.graph.as_default():
+        yield
+
+  @property
+  def trainable(self):
+    return self._trainable
+
+  def _read_variable_op(self, parent_op=None):
+    if self.trainable:
+      tape.variable_accessed(self)
+    if parent_op is not None:
+      with ops.control_dependencies([parent_op]):
+        return gen_resource_variable_ops.read_variable_op(
+            self.handle, self.dtype)
+
+    return gen_resource_variable_ops.read_variable_op(
+        self.handle, self.dtype)
+
+  def read_value(self):
+    return self._read_variable_op()
+
+  def assign_sub(self, *args, **kwargs):
+    def assign_sub_fn(var, delta, **kw):
+      name = kw.pop("name", None)
+      read_value = kw.pop("read_value", True)
+      with self._handle_graph(var.handle):
+        op = gen_resource_variable_ops.assign_sub_variable_op(
+            var.handle, ops.convert_to_tensor(delta, dtype=self.dtype),
+            name=name)
+      if read_value:
+        return self._read_variable_op(parent_op=op)
+      return op
+
+    return self._assign_func(f=assign_sub_fn, *args, **kwargs)
+
+  def assign_add(self, *args, **kwargs):
+    def assign_add_fn(var, delta, **kw):
+      name = kw.pop("name", None)
+      read_value = kw.pop("read_value", True)
+      with self._handle_graph(var.handle):
+        op = gen_resource_variable_ops.assign_add_variable_op(
+            var.handle, ops.convert_to_tensor(delta, dtype=self.dtype),
+            name=name)
+      if read_value:
+        return self._read_variable_op(parent_op=op)
+      return op
+
+    return self._assign_func(f=assign_add_fn, *args, **kwargs)
+
+  def assign(self, *args, **kwargs):
+    def assign_fn(var, value, **kw):
+      name = kw.pop("name", None)
+      read_value = kw.pop("read_value", True)
+      with self._handle_graph(var.handle):
+        op = gen_resource_variable_ops.assign_variable_op(
+            var.handle, ops.convert_to_tensor(value, dtype=self.dtype),
+            name=name)
+      if read_value:
+        return self._read_variable_op(parent_op=op)
+      return op
+
+    return self._assign_func(f=assign_fn, *args, **kwargs)
+
+  @property
+  def aggregation(self):
+    return self._aggregation
+
+  @property
+  def constraint(self):
+    return None
+
+  @property
+  def initializer(self):
+    return control_flow_ops.group(
+        [v.initializer for v in nest.flatten(self._index)])
+
+  @property
+  def graph(self):
+    return self._primary_var.graph
+
+  @property
+  def _shared_name(self):
+    return self._common_name
+
+  @property
+  def _unique_id(self):
+    return self._primary_var._unique_id  # pylint: disable=protected-access
+
+  @property
+  def name(self):
+    return self._primary_var.name
+
+  @property
+  def dtype(self):
+    return self._primary_var.dtype
+
+  @property
+  def shape(self):
+    return self._primary_var.shape
+
+  def get_shape(self):
+    return self._primary_var.get_shape()
+
+  def to_proto(self, export_scope=None):
+    return self._primary_var.to_proto(export_scope=export_scope)
+
+  def _get_cross_tower(self):
+    device = device_util.canonicalize(device_util.current())
+    if device in self._index:
+      return self._index[device]
+    return self._primary_var
+
+  def _as_graph_element(self):
+    # pylint: disable=protected-access
+    if distribution_strategy_context.get_cross_tower_context():
+      return self._primary_var._as_graph_element()
+    return self._read_variable_op()
+
+  def _gather_saveables_for_checkpoint(self):
+    """Overrides CheckpointableBase method.
+
+    This allows both name-based and object-based save and restore of
+    MirroredVariables.
+
+    Returns:
+      A dictionary mapping attribute names to `SaveableObject` factories.
+    """
+    def _saveable_factory(name=self._common_name):
+      return _MirroredSaveable(self, self._primary_var, name)
+    return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory}
+
+  def _should_act_as_resource_variable(self):
+    """Pass resource_variable_ops.is_resource_variable check."""
+    pass
+
+  # Needed to pass ResourceVariable checks.
+  @property
+  def op(self):
+    return self._primary_var.op
+
+  @property
+  def _in_graph_mode(self):
+    return self._primary_var._in_graph_mode   # pylint: disable=protected-access
+
+  def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False):
+    """Converts a variable to a tensor."""
+    # pylint: disable=protected-access
+    if _enclosing_tpu_context() is None:
+      return self._get()._dense_var_to_tensor(dtype, name, as_ref)
+    # pylint: enable=protected-access
+    if dtype is not None and dtype != self.dtype:
+      raise NotImplementedError
+    if as_ref:
+      return self.handle
+    else:
+      return self.read_value()
+
+  def is_initialized(self, name=None):
+    """Identifies if all the component variables are initialized.
+
+    Args:
+      name: Name of the final `logical_and` op.
+
+    Returns:
+      The op that evaluates to True or False depending on if all the
+      component variables are initialized.
+    """
+    # TODO(jhseu): Do we need TPU context implementation?
+
+    # We have to cast the self._index.values() to a `list` because when we
+    # use `model_to_estimator` to run tf.keras models, self._index.values() is
+    # of type `dict_values` and not `list`.
+    values_list = nest.flatten(self._index)
+    result = values_list[0].is_initialized()
+    # We iterate through the list of values except the last one to allow us to
+    # name the final `logical_and` op the same name that is passed by the user
+    # to the `is_initialized` op. For distributed variables, the
+    # `is_initialized` op is a `logical_and` op.
+    for v in values_list[1:-1]:
+      result = math_ops.logical_and(result, v.is_initialized())
+    result = math_ops.logical_and(result, values_list[-1].is_initialized(),
+                                  name=name)
+    return result
+
+
+# Register a conversion function which reads the value of the variable,
+# allowing instances of the class to be used as tensors.
+def _tensor_conversion_tpu_mirrored(var, dtype=None, name=None, as_ref=False):
+  return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref)  # pylint: disable=protected-access
+
+
+ops.register_tensor_conversion_function(TPUMirroredVariable,
+                                        _tensor_conversion_tpu_mirrored)
+ops.register_dense_tensor_like_type(TPUMirroredVariable)
+
+
 class _TowerLocalSaveable(saver.BaseSaverBuilder.SaveableObject):
   """Class for defining how to restore a TowerLocalVariable."""
 
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
index 598da7418e..004b1012e5 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
@@ -78,7 +78,7 @@ class ReplicatedVariable(object):
     if tpu_context is None:
       return self._primary_var.handle
 
-    return tpu_context.get_replicated_var_handle(self)
+    return tpu_context.get_replicated_var_handle(self._name, self._vars)
 
   @contextlib.contextmanager
   def _assign_dependencies(self):
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 883e08bf47..11aaa1c66a 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -155,19 +155,20 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
     self._pivot = pivot
     self._replicated_vars = {}
 
-  def get_replicated_var_handle(self, var):
+  def get_replicated_var_handle(self, name, vars_):
     """Returns a variable handle for replicated TPU variable 'var'.
 
     This is a method used by an experimental replicated variable implementation
     and is not intended as a public API.
 
     Args:
-      var: The replicated TPU variable.
+      name: The common name of the variable.
+      vars_: The replicated TPU variables.
 
     Returns:
       The handle of the TPU replicated input node.
     """
-    handle = self._replicated_vars.get(var)
+    handle = self._replicated_vars.get(name)
     if handle is not None:
       return handle
 
@@ -183,10 +184,10 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
     saved_context = graph._get_control_flow_context()
     graph._set_control_flow_context(self.outer_context)
     handle = tpu_ops.tpu_replicated_input(
-        [v.handle for v in var._vars], name=var.name + "/handle")
+        [v.handle for v in vars_], name=name + "/handle")
     graph._set_control_flow_context(saved_context)
     # pylint: enable=protected-access
-    self._replicated_vars[var] = handle
+    self._replicated_vars[name] = handle
     return handle
 
   def report_unsupported_operations(self):
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 78f3198011..deac29111f 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -619,7 +619,7 @@ pywrap_tensorflow.TFE_Py_RegisterVSpace(_default_vspace)
 
 def _handle_or_self(x):
   """If x is ResourceVariable, return its handle, else x."""
-  if isinstance(x, resource_variable_ops.ResourceVariable):
+  if resource_variable_ops.is_resource_variable(x):
     x = x.handle
   return x
 
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 34faf03bb0..e6d82f0db7 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -468,6 +468,10 @@ class Estimator(object):
 
       with ops.Graph().as_default():
         if self._eval_distribution:
+          # We want to create the iterations variable outside the distribution
+          # scope as that is just stored on the host and mainly used to drive
+          # the loop and doesn't need to be a Mirrored/Device variable.
+          training.get_or_create_steps_per_run_variable()
           with self._eval_distribution.scope():
             return _evaluate()
         else:
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index 31e4778e72..fb110c4b7b 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -22,7 +22,6 @@ from __future__ import print_function
 import os
 import time
 
-from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import training
@@ -144,14 +143,11 @@ class StrategyInitFinalizeHook(training.SessionRunHook):
     self._finalize_fn = finalize_fn
 
   def begin(self):
+    # We only create the init ops, but don't run it. We rely on SessionManager
+    # to run it for us.
     self._init_ops = self._initialization_fn()
     self._finalize_ops = self._finalize_fn()
 
-  def after_create_session(self, session, coord):
-    logging.info('Initialize system')
-    session.run(self._init_ops,
-                options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
-
   def end(self, session):
     logging.info('Finalize system.')
     session.run(self._finalize_ops)
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index f004f3944a..30b0ed20c8 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -471,7 +471,10 @@ class Optimizer(
 
       if var_list is None:
         var_list = tape.watched_variables()
-      grads = tape.gradient(loss_value, var_list, grad_loss)
+      # TODO(jhseu): Figure out why GradientTape's gradients don't require loss
+      # to be executed.
+      with ops.control_dependencies([loss_value]):
+        grads = tape.gradient(loss_value, var_list, grad_loss)
       return list(zip(grads, var_list))
 
     # Non-callable/Tensor loss case
diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py
index a2e0645ba8..5e4749f306 100644
--- a/tensorflow/python/training/session_manager.py
+++ b/tensorflow/python/training/session_manager.py
@@ -25,6 +25,7 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -182,6 +183,10 @@ class SessionManager(object):
     """
     self._target = master
     sess = session.Session(self._target, graph=self._graph, config=config)
+    # TODO(jhseu): Delete once tpu.initialize_system() goes away.
+    sess.run(
+        distribution_strategy_context.get_distribution_strategy().initialize()
+    )
 
     if checkpoint_dir and checkpoint_filename_with_path:
       raise ValueError("Can not provide both checkpoint_dir and "
-- 
GitLab


From 3760cb47f3603638cf88c8771640af9debd30bad Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Fri, 28 Sep 2018 19:07:21 -0700
Subject: [PATCH 0905/1357] Fix bad reference to self._name in TPUEstimator
 infeed loop.

PiperOrigin-RevId: 215029224
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 545cee637f..3aa5b6efa1 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -426,10 +426,10 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
   def _run_infeed(self, queue_ctx, session):
     logging.info('Starting infeed thread controller.')
     if self._initial_infeed_sleep_secs:
-      logging.info('%s thread sleeping for %d seconds.', self._name,
+      logging.info('Infeed thread sleeping for %d seconds.',
                    self._initial_infeed_sleep_secs)
       time.sleep(self._initial_infeed_sleep_secs)
-      logging.info('%s thread starting after sleep', self._name)
+      logging.info('Infeed thread starting after sleep')
 
     with self._rendezvous.catch_errors(source='infeed', session=session):
       if self._run_infeed_loop_on_coordinator:
-- 
GitLab


From b5c66300d2c15a9bf1a8631161efa1a057e6ed31 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 19:35:15 -0700
Subject: [PATCH 0906/1357] Add learning_rates input to the
 SendTPUEmbeddingGradients op. This allows the learning rate to be modified at
 runtime. The implementation is not yet complete.

PiperOrigin-RevId: 215030536
---
 tensorflow/contrib/tpu/BUILD                  |  3 +
 .../contrib/tpu/ops/tpu_embedding_ops.cc      | 86 ++++++++++++-------
 tensorflow/contrib/tpu/python/ops/tpu_ops.py  | 27 ++++++
 3 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index e9aa037634..0c4bdab191 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -133,6 +133,9 @@ tf_custom_op_library(
 
 tf_gen_op_wrapper_py(
     name = "tpu_ops",
+    hidden = [
+        "SendTPUEmbeddingGradients",
+    ],
     deps = [
         ":cross_replica_ops_op_lib",
         ":heartbeat_ops_op_lib",
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index bc1a0c5284..6b0730b40c 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -365,11 +365,11 @@ REGISTER_OP("RecvTPUEmbeddingActivations")
 An op that receives embedding activations on the TPU.
 
 The TPU system performs the embedding lookups and aggregations specified by
-the arguments to TPUEmbeddingEnqueueSparseBatch. The results of these
-aggregations are visible to the Tensorflow Graph as the outputs of a
-TPUEmbeddingDequeueActivations Op. This op returns a list containing one
-Tensor of activations per table specified in the model. There can be at most
-one ReceieveActivations op in the TPU graph.
+the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
+results of these aggregations are visible to the Tensorflow Graph as the
+outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
+one Tensor of activations per table specified in the model. There can be at
+most one RecvTPUEmbeddingActivations op in the TPU graph.
 
 outputs: A TensorList of embedding activations containing one Tensor per
     embedding table in the model.
@@ -407,10 +407,25 @@ lookup_id: Identifier of the set of embedding indices which produced these
 
 REGISTER_OP("SendTPUEmbeddingGradients")
     .Input("inputs: N * float32")
+    .Input("learning_rates: NN * float32")
     .Attr("N: int >= 1")
+    .Attr("NN: int >= 0 = 0")
     .Attr("config: string")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
+    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
+      int nn;
+      TF_RETURN_IF_ERROR(c->GetAttr("NN", &nn));
+      std::vector<shape_inference::ShapeHandle> learning_rates;
+      TF_RETURN_IF_ERROR(c->input("learning_rates", &learning_rates));
+      for (int i = 0; i < nn; ++i) {
+        // Verify that each learning_rates element is scalar
+        shape_inference::ShapeHandle learning_rates_shape;
+        TF_RETURN_IF_ERROR(
+            c->WithRank(learning_rates[i], 0, &learning_rates_shape));
+      }
+
+      return Status::OK();
+    })
     .Doc(R"doc(
 An op that performs gradient updates of embedding tables.
 
@@ -421,6 +436,11 @@ from these gradients via the optimizer specified in the configuration given
 to tpu.initialize_system.
 
 inputs: A TensorList of gradients with which to update embedding tables.
+    It contains one tensor per embedding table in the model.
+learning_rates: A list of float32 scalars, one for each embedding table,
+    containing the learning rates for each table when dynamic learning rate is
+    enabled through the OptimizationParameters in TPUEmbeddingConfiguration.
+    When the learning rate is constant, the list should be empty.
 config: Serialized TPUEmbeddingConfiguration proto.
 )doc");
 
@@ -434,10 +454,9 @@ REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
 An op that enqueues a list of input batch tensors to TPUEmbedding.
 
 batch: A list of 1D tensors, one for each embedding table, containing the
-batch inputs represented as integers.
-device_ordinal: The TPU device to use. This should be -1 when the Op
-is running on a TPU device, and >= 0 when the Op is running on the CPU
-device.
+    indices into the tables.
+device_ordinal: The TPU device to use. Should be >= 0 and less than the number
+    of TPU cores in the task on which the node is placed.
 )doc");
 
 REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
@@ -467,7 +486,8 @@ An op that enqueues TPUEmbedding input indices from a SparseTensor.
 This Op eases the porting of code that uses embedding_lookup_sparse(),
 although some Python preprocessing of the SparseTensor arguments to
 embedding_lookup_sparse() is required to produce the arguments to this Op,
-since only a single EnqueueTPUEmbedding Op is allowed per training step.
+since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
+step.
 
 The tensors at corresponding positions in the three input lists
 must have the same shape, i.e. rank 1 with dim_size() equal to the total
@@ -477,15 +497,18 @@ sample_indices: A list of Rank 1 Tensors specifying the training example and
     feature to which the corresponding embedding_indices and aggregation_weights
     values belong. sample_indices[i] must equal b * nf + f, where nf is the
     number of features from the corresponding table, f is in [0, nf), and
-    b is in [0, training batch size).
+    b is in [0, batch size).
 embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables.
 aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per
     (training example, feature) -- aggregation weights.
-device_ordinal: The TPU device to use. This should be -1 when the Op
-is running on a TPU device, and >= 0 when the Op is running on the CPU
-device.
-combiners: A list of string scalars whose values are 'mean', 'sum', or 'sqrtn'
-to specify how to normalize the embedding activations after weighted summation.
+device_ordinal: The TPU device to use. Should be >= 0 and less than the number
+    of TPU cores in the task on which the node is placed.
+combiners: A list of string scalars, one for each embedding table that specify
+    how to normalize the embedding activations after weighted summation.
+    Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+    the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+    0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+    all tables.
 )doc");
 
 REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
@@ -505,22 +528,27 @@ sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
 to ith feature. table_ids[i] indicates which embedding table to look up ith
 feature.
 
+The tensors at corresponding positions in the three input lists (sample_indices,
+embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
+with dim_size() equal to the total number of lookups into the table described by
+the corresponding feature.
+
 sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in
-embedding_lookup_sparse().
+    embedding_lookup_sparse().
 embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values
- in embedding_lookup_sparse().
+    in embedding_lookup_sparse().
 aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values
- in embedding_lookup_sparse().
-device_ordinal: The TPU device to use. This should be -1 when the Op
-is running on a TPU device, and >= 0 when the Op is running on the CPU
-device.
-combiners: A list of strings, one for each embedding table, specifying the
-reduction operation.  Currently, 'sum', 'mean' and 'sqrtn' are supported. It is
-invalid to have the sum of the weights be 0 for 'mean' or the sum of the squared
-weights be 0 for 'sqrtn'. If combiners isn't passed, the default is to
-use 'sum' for all tables.
+    in embedding_lookup_sparse().
+device_ordinal: The TPU device to use. Should be >= 0 and less than the number
+    of TPU cores in the task on which the node is placed.
+combiners: A list of string scalars, one for each embedding table that specify
+    how to normalize the embedding activations after weighted summation.
+    Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+    the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+    0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+    all tables.
 table_ids: A list of int. table_ids[i] indicates which embedding table to look
-up ith feature.
+    up ith feature in the list.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index a1aee69691..e2e4acadab 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
@@ -200,6 +200,33 @@ if platform.system() != "Windows":
     return gen_tpu_ops.infeed_dequeue_tuple(dtypes, shapes, name=name)
   # pylint: enable=redefined-outer-name
 
+  # pylint: disable=protected-access
+  def send_tpu_embedding_gradients(inputs,
+                                   config,
+                                   learning_rates=None,
+                                   name=None):
+    """A placeholder op for feeding per-sample gradients to the embedding layer.
+
+    Args:
+      inputs: A TensorList of gradients with which to update embedding tables.
+        Contains one tensor per embedding table in the model.
+      config: Serialized TPUEmbeddingConfiguration proto.
+      learning_rates: A TensorList of float32 scalars, one for each embedding
+        table, containing the learning rates for each table when dynamic
+        learning rate is enabled through the OptimizationParameters in
+        TPUEmbeddingConfiguration. When the learning rate is constant, the list
+        should be empty (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      A SendTPUEmbeddingGradients operation.
+    """
+    if learning_rates is None:
+      learning_rates = []
+    return gen_tpu_ops._send_tpu_embedding_gradients(
+        inputs=inputs, learning_rates=learning_rates, config=config, name=name)
+
+
 else:
   # We have already built the appropriate libraries into the binary via CMake
   # if we have built contrib, so we don't need this
-- 
GitLab


From d936d819752916d3122f02def571ecac9e995029 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Fri, 28 Sep 2018 19:49:23 -0700
Subject: [PATCH 0907/1357] Lower the MKLCpuAllocator priority so that it can
 use default allocator when MKL is disabled, and with some  minor changes

---
 .../core/common_runtime/mkl_cpu_allocator.h   | 54 ++++++-------------
 .../core/common_runtime/process_util.cc       | 37 ++++++-------
 .../core/common_runtime/threadpool_device.cc  |  4 +-
 3 files changed, 36 insertions(+), 59 deletions(-)

diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 516138d28d..429b19599b 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
@@ -164,12 +163,6 @@ class MklCPUAllocator : public Allocator {
   }
 
   Status Initialize() {
-    if (DisableMKL()) {
-        VLOG(1) << "TF-MKL: Disabling pool allocator";
-        tf_disable_pool_allocator_flag_ = true;
-        return Status::OK();
-    }
-
     VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
 
     // Set upper bound on memory allocation to physical RAM available on the
@@ -224,10 +217,6 @@ class MklCPUAllocator : public Allocator {
   inline string Name() override { return kName; }
 
   inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
-    if (tf_disable_pool_allocator_flag_) {
-      return port::AlignedMalloc(num_bytes, alignment);
-    }
-
     // If the allocation size is less than threshold, call small allocator,
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
@@ -238,10 +227,6 @@ class MklCPUAllocator : public Allocator {
   }
 
   inline void DeallocateRaw(void* ptr) override {
-    if (tf_disable_pool_allocator_flag_) {
-      port::AlignedFree(ptr);
-      return;
-    }
     // Check if ptr is for "small" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
     if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -252,30 +237,26 @@ class MklCPUAllocator : public Allocator {
   }
 
   void GetStats(AllocatorStats* stats) override {
-    if (!tf_disable_pool_allocator_flag_) {
-      AllocatorStats l_stats, s_stats;
-      small_size_allocator_->GetStats(&s_stats);
-      large_size_allocator_->GetStats(&l_stats);
-
-      // Combine statistics from small-size and large-size allocator.
-      stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
-      stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
-      stats->max_bytes_in_use =
-          l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
-      // Since small-size allocations go to MklSmallSizeAllocator,
-      // max_alloc_size from large_size_allocator would be the maximum
-      // size allocated by MklCPUAllocator.
-      stats->max_alloc_size = l_stats.max_alloc_size;
-      stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
-    }
+    AllocatorStats l_stats, s_stats;
+    small_size_allocator_->GetStats(&s_stats);
+    large_size_allocator_->GetStats(&l_stats);
+
+    // Combine statistics from small-size and large-size allocator.
+    stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+    stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+    stats->max_bytes_in_use =
+        l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+    // Since small-size allocations go to MklSmallSizeAllocator,
+    // max_alloc_size from large_size_allocator would be the maximum
+    // size allocated by MklCPUAllocator.
+    stats->max_alloc_size = l_stats.max_alloc_size;
+    stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
   }
 
   void ClearStats() override {
-    if (!tf_disable_pool_allocator_flag_) {
-      small_size_allocator_->ClearStats();
-      large_size_allocator_->ClearStats();
-    }
+    small_size_allocator_->ClearStats();
+    large_size_allocator_->ClearStats();
   }
 
  private:
@@ -314,7 +295,6 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
-  bool tf_disable_pool_allocator_flag_ = false;
   Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index 60fa601907..b3064a4c08 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -57,28 +57,25 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
-  // Early return if MKL is disabled
-  if (DisableMKL())
-    return port::NumSchedulableCPUs();
-
-  // MKL library executes ops in parallel using OMP threads
-  // Set inter_op conservatively to avoid thread oversubscription that could
-  // lead to severe perf degradations and OMP resource exhaustion
-  int mkl_intra_op = 1;
-#ifdef _OPENMP
-  mkl_intra_op = omp_get_max_threads();
-#endif  // _OPENMP
-  CHECK_GE(mkl_intra_op, 1);
-  const int32 mkl_inter_op = std::max(
-      (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
-  VLOG(0) << "Creating new thread pool with default inter op setting: "
-          << mkl_inter_op
-          << ". Tune using inter_op_parallelism_threads for best performance.";
-  return mkl_inter_op;
-#else
+  if (!DisableMKL()) {
+    // MKL library executes ops in parallel using OMP threads
+    // Set inter_op conservatively to avoid thread oversubscription that could
+    // lead to severe perf degradations and OMP resource exhaustion
+    int mkl_intra_op = 1;
+  #ifdef _OPENMP
+    mkl_intra_op = omp_get_max_threads();
+  #endif  // _OPENMP
+    CHECK_GE(mkl_intra_op, 1);
+    const int32 mkl_inter_op = std::max(
+        (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
+    VLOG(0) << "Creating new thread pool with default inter op setting: "
+            << mkl_inter_op
+            << ". Tune using inter_op_parallelism_threads for best performance.";
+    return mkl_inter_op;
+  }
+#endif  // INTEL_MKL
   // Default to using the number of cores available in the process.
   return port::NumSchedulableCPUs();
-#endif  // INTEL_MKL
 }
 
 thread::ThreadPool* NewThreadPoolFromSessionOptions(
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 29c01d7f72..f188016610 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -50,7 +50,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
-  // Eearly return when MKL is disabled
+  // Early return when MKL is disabled
   if (DisableMKL())
     return;
 #ifdef _OPENMP
@@ -118,7 +118,7 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
-- 
GitLab


From 2e0e934e0b3c00863918c78bf55524eea3f0c0dc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 28 Sep 2018 20:51:11 -0700
Subject: [PATCH 0908/1357] Make tf.contrib.crf compatible with TPUs by using
 utils.smart_cond instead of tf.cond, which allows the static shape to be
 propagated correctly when available.

PiperOrigin-RevId: 215034102
---
 tensorflow/contrib/crf/python/ops/crf.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py
index 2a91dcb63a..43bb43129b 100644
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -56,7 +56,6 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.layers import utils
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn
@@ -214,10 +213,11 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
                                log_norm)
     return log_norm
 
-  max_seq_len = array_ops.shape(inputs)[1]
-  return control_flow_ops.cond(pred=math_ops.equal(max_seq_len, 1),
-                               true_fn=_single_seq_fn,
-                               false_fn=_multi_seq_fn)
+  return utils.smart_cond(
+      pred=math_ops.equal(inputs.shape[1].value or
+                          array_ops.shape(inputs)[1], 1),
+      true_fn=_single_seq_fn,
+      false_fn=_multi_seq_fn)
 
 
 def crf_log_likelihood(inputs,
-- 
GitLab


From d78595d333c9b5c8a0705ba6852c08b107d6c462 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 00:59:02 -0700
Subject: [PATCH 0909/1357] Make cuda_py_test create a gpu and cpu target.

Currently, we run tests on machines with GPUs based on the "gpu" tag, and the
tests automatically adapt to whether a GPU is available. Creating two targets,
one tagged with "gpu" and one not, will make us run the tests in both modes.

PiperOrigin-RevId: 215045035
---
 tensorflow/python/data/kernel_tests/BUILD     |  1 +
 tensorflow/tensorflow.bzl                     | 39 +++++++++++--------
 .../tools/pip_package/pip_smoke_test.py       |  2 +-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index cadfe7f9e0..99d7f70513 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -318,6 +318,7 @@ cuda_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
     tags = [
+        "no_oss",  # TODO(b/116813115): Investigate timeout and re-enable.
         "no_windows_gpu",
     ],
 )
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index cad5de1b0c..dead44c57e 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1798,22 +1798,29 @@ def cuda_py_test(
         flaky = 0,
         xla_enabled = False,
         grpc_enabled = False):
-    test_tags = tags + tf_cuda_tests_tags()
-    tf_py_test(
-        name = name,
-        size = size,
-        srcs = srcs,
-        data = data,
-        main = main,
-        args = args,
-        tags = test_tags,
-        shard_count = shard_count,
-        additional_deps = additional_deps,
-        kernels = kernels,
-        flaky = flaky,
-        xla_enabled = xla_enabled,
-        grpc_enabled = grpc_enabled,
-    )
+    if main == None:
+        main = name + ".py"
+    for config in ["cpu", "gpu"]:
+        test_name = name
+        test_tags = tags
+        if config == "gpu":
+            test_name += "_gpu"
+            test_tags = test_tags + tf_cuda_tests_tags()
+        tf_py_test(
+            name = test_name,
+            size = size,
+            srcs = srcs,
+            data = data,
+            main = main,
+            args = args,
+            tags = test_tags,
+            shard_count = shard_count,
+            additional_deps = additional_deps,
+            kernels = kernels,
+            flaky = flaky,
+            xla_enabled = xla_enabled,
+            grpc_enabled = grpc_enabled,
+        )
 
 register_extension_info(
     extension_name = "cuda_py_test",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index c6ef82ccdc..e7f9628fa6 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -142,7 +142,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = ["_test", "_test.py"]
+  ignore_extensions = ["_test", "_test.py", "_test_gpu", "_test_gpu.py"]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
-- 
GitLab


From d8db18b4201d9d82d1c93ed5453914ff16f1adf4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 02:02:02 -0700
Subject: [PATCH 0910/1357] compat: Update forward compatibility horizon to
 2018-09-29

PiperOrigin-RevId: 215048726
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index b74fce3a4c..24a795c787 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 28)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 29)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From f16111286b19f4145df63b73c45be1645bde8737 Mon Sep 17 00:00:00 2001
From: Bairen Yi <byi@connect.ust.hk>
Date: Sat, 29 Sep 2018 22:13:09 +0800
Subject: [PATCH 0911/1357] Added log entries for copying unpinned memory RDMA

Currently there are large number of tensors managed
by non-visitable memory allocators in CPU-only PS.
GPU workers seem less prone to this problem.

Copying large sized tensor buffers may introduce
non-trivial overhead. Should probably fix this.

Signed-off-by: Bairen Yi <byi@connect.ust.hk>
---
 tensorflow/contrib/gdr/gdr_memory_manager.cc | 156 +++++++++++--------
 1 file changed, 93 insertions(+), 63 deletions(-)

diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index bb06f1c41c..3549cedb70 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <fstream>
 #include <list>
 #include <map>
-#include <set>
 
 #include <fcntl.h>
 #include <rdma/rdma_cma.h>
@@ -30,19 +29,17 @@ limitations under the License.
 #include <sys/epoll.h>
 
 #include "tensorflow/contrib/gdr/gdr.pb.h"
-#include "tensorflow/core/common_runtime/bfc_allocator.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
-#include "tensorflow/core/common_runtime/pool_allocator.h"
 #include "tensorflow/core/common_runtime/process_state.h"
 #if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #endif  // GOOGLE_CUDA
-#include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/numa.h"
 
 namespace tensorflow {
 
@@ -70,14 +67,11 @@ bool IsGDRAvailable() {
 int TryToReadNumaNode(ibv_device* device) {
 #if defined(__APPLE__)
   LOG(INFO) << "OS X does not support NUMA - returning NUMA node 0";
-  return 0;
+  return port::kNUMANoAffinity;
 #elif defined(PLATFORM_WINDOWS)
   // Windows support for NUMA is not currently implemented. Return node 0.
-  return 0;
+  return port::kNUMANoAffinity;
 #else
-  VLOG(2) << "Trying to read NUMA node for device: " << device->name;
-  static const int kUnknownNumaNode = -1;
-
   auto filename = string(device->ibdev_path) + "/device/numa_node";
 
   std::ifstream ifs(filename.c_str());
@@ -91,12 +85,12 @@ int TryToReadNumaNode(ibv_device* device) {
                 << value
                 << "), but there must be at least one NUMA node"
                    ", so returning NUMA node zero";
-      return 0;
+      return port::kNUMANoAffinity;
     }
     LOG(INFO) << "NUMA node for device: " << device->name << " is " << value;
     return value;
   }
-  return kUnknownNumaNode;
+  return port::kNUMANoAffinity;
 #endif
 }
 
@@ -138,8 +132,6 @@ class GdrMemoryManager : public RemoteMemoryManager {
       Device* device, DeviceContext* device_context, bool on_host,
       StatusCallback done) override;
 
-  static void RegMemVisitors();
-
  protected:
   Status CreateEndpoint(const string& host, const string& port,
                         RdmaEndpointPtr& endpoint);
@@ -150,7 +142,8 @@ class GdrMemoryManager : public RemoteMemoryManager {
 
   ibv_mr* FindMemoryRegion(void* addr, size_t length);
 
-  void InsertMemoryRegion(void* addr, size_t length);
+  void InsertMemoryRegion(void* addr, size_t length,
+                          const std::string& allocator_name);
 
   void EvictMemoryRegion(void* addr, size_t length);
 
@@ -160,6 +153,7 @@ class GdrMemoryManager : public RemoteMemoryManager {
   RdmaEndpointPtr listening_;
   std::atomic<bool> stopped_;
   int epfd_;
+  int numa_node_;
 
   // Server side endpoints
   // Accessed sequentially in Run() so not protected by lock
@@ -190,46 +184,10 @@ GdrMemoryManager::GdrMemoryManager(const string& host, const string& port)
       port_(port),
       listening_(nullptr, EndpointDeleter),
       stopped_(true),
-      next_key_(0) {
-  static std::once_flag flag;
-  std::call_once(flag, []() { RegMemVisitors(); });
-}
+      next_key_(0) {}
 
 GdrMemoryManager::~GdrMemoryManager() { close(epfd_); }
 
-/*static*/ void GdrMemoryManager::RegMemVisitors() {
-  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
-                                           size_t num_bytes) {
-    GdrMemoryManager::Singleton().InsertMemoryRegion(
-        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
-  };
-  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
-                                          size_t num_bytes) {
-    GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes);
-  };
-  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
-  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
-
-#if GOOGLE_CUDA
-  if (IsGDRAvailable()) {
-    int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
-
-    // Note we don't free allocated GPU memory so there is no free visitor
-    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
-                                                  size_t num_bytes) {
-      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
-          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
-    };
-    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
-                                                     cuda_alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
-                                                          alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
-    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
-  }
-#endif  // GOOGLE_CUDA
-}
-
 Status GdrMemoryManager::Init() {
   epfd_ = epoll_create1(0);
   if (epfd_ == -1) {
@@ -289,6 +247,42 @@ Status GdrMemoryManager::Init() {
                                "cannot add server to epoll");
   }
 
+  numa_node_ = TryToReadNumaNode(listening_->verbs->device);
+
+  SubAllocator::Visitor alloc_visitor = [this](void* ptr, int numa_node,
+                                               size_t num_bytes) {
+    VLOG(2) << "Registering RDMA capable memory region on numa_node "
+            << numa_node;
+    InsertMemoryRegion(ptr, num_bytes, strings::StrCat("CPU:", numa_node));
+  };
+  SubAllocator::Visitor free_visitor = [this](void* ptr, int numa_node,
+                                              size_t num_bytes) {
+    VLOG(2) << "De-registering RDMA capable memory region on numa_node "
+            << numa_node;
+    EvictMemoryRegion(ptr, num_bytes);
+  };
+  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
+  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
+  LOG(INFO) << "Instrumenting CPU allocator(s)";
+
+#if GOOGLE_CUDA
+  if (IsGDRAvailable()) {
+    int bus_id = numa_node_ + 1;
+
+    SubAllocator::Visitor cuda_alloc_visitor = [this](void* ptr, int gpu_id,
+                                                      size_t num_bytes) {
+      VLOG(2) << "Registering RDMA capable memory region on GPU " << gpu_id;
+      InsertMemoryRegion(ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
+    };
+    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
+                                                     cuda_alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
+                                                          alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
+    LOG(INFO) << "Instrumenting GPU allocator(s) with bus_id " << bus_id;
+  }
+#endif  // GOOGLE_CUDA
+
   return Status::OK();
 }
 
@@ -405,7 +399,7 @@ void GdrMemoryManager::TransportOptionsFromTensor(
   ibv_mr* mr = FindMemoryRegion(addr, length);
 
 #if GOOGLE_CUDA
-  if (!on_host) {
+  if (device->tensorflow_gpu_device_info() && !on_host) {
     Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
     Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape());
     GPUUtil::CopyGPUTensorToCPU(
@@ -456,11 +450,27 @@ void GdrMemoryManager::TransportOptionsFromTensor(
 #endif
 
   if (mr == nullptr) {
-    done(errors::Unavailable("Cannot find pinned memory region"));
-    return;
+    Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_);
+    Tensor host_copy(alloc, tensor.dtype(), tensor.shape());
+
+    std::memcpy(DMAHelper::buffer(&host_copy)->data(), buffer->data(), length);
+    VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer";
+
+    buffer = DMAHelper::buffer(&host_copy);
+    addr = buffer->data();
+    length = buffer->size();
+
+    mr = FindMemoryRegion(addr, length);
+    if (mr == nullptr) {
+      done(errors::Unavailable("Cannot find pinned memory region"));
+      return;
+    }
+
+    buffer->Ref();
+  } else {
+    buffer->Ref();
   }
 
-  buffer->Ref();
   TensorKey tensor_key = next_key_++;
   {
     mutex_lock l(server_mu_);
@@ -470,7 +480,7 @@ void GdrMemoryManager::TransportOptionsFromTensor(
   uint64_t checksum = 0;
   if (VLOG_IS_ON(2)) {
 #ifdef GOOGLE_CUDA
-    if (!on_host) {
+    if (device->tensorflow_gpu_device_info() && !on_host) {
       checksum = GPUUtil::Checksum(device, device_context, tensor);
     } else {
       checksum = GPUUtil::Checksum(tensor);
@@ -508,7 +518,8 @@ void GdrMemoryManager::TensorFromTransportOptions(
   Tensor host_copy;
 #if GOOGLE_CUDA
   if (mr == nullptr && !on_host) {
-    Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
+    Allocator* alloc =
+        GPUProcessState::singleton()->GetCUDAHostAllocator(numa_node_);
     host_copy = Tensor(alloc, tensor->dtype(), tensor->shape());
     buffer = DMAHelper::buffer(&host_copy);
     addr = buffer->data();
@@ -518,8 +529,18 @@ void GdrMemoryManager::TensorFromTransportOptions(
 #endif  // GOOGLE_CUDA
 
   if (mr == nullptr) {
-    done(errors::Unavailable("Cannot find pinned memory region"));
-    return;
+    Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_);
+    host_copy = Tensor(alloc, tensor->dtype(), tensor->shape());
+
+    buffer = DMAHelper::buffer(&host_copy);
+    addr = buffer->data();
+    length = buffer->size();
+
+    mr = FindMemoryRegion(addr, length);
+    if (mr == nullptr) {
+      done(errors::Unavailable("Cannot find pinned memory region"));
+      return;
+    }
   }
 
   decltype(clients_)::iterator iter;
@@ -568,7 +589,8 @@ void GdrMemoryManager::TensorFromTransportOptions(
   }
 
 #if GOOGLE_CUDA
-  if (host_copy.NumElements() > 0) {
+  if (device->tensorflow_gpu_device_info() && !on_host &&
+      host_copy.NumElements() > 0) {
     uint64_t checksum = 0;
     if (VLOG_IS_ON(2)) {
       checksum = GPUUtil::Checksum(host_copy);
@@ -598,6 +620,12 @@ void GdrMemoryManager::TensorFromTransportOptions(
   }
 #endif  // GOOGLE_CUDA
 
+  if ((on_host || !device->tensorflow_gpu_device_info()) &&
+      host_copy.NumElements() > 0) {
+    std::memcpy(DMAHelper::buffer(tensor)->data(), addr, length);
+    VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer";
+  }
+
   uint64_t end = Env::Default()->NowMicros();
 
   VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey()
@@ -607,7 +635,7 @@ void GdrMemoryManager::TensorFromTransportOptions(
   uint64_t checksum = 0;
   if (VLOG_IS_ON(2)) {
 #ifdef GOOGLE_CUDA
-    if (device->tensorflow_gpu_device_info() && (!on_host)) {
+    if (device->tensorflow_gpu_device_info() && !on_host) {
       checksum = GPUUtil::Checksum(device, device_context, *tensor);
     } else {
       checksum = GPUUtil::Checksum(*tensor);
@@ -668,7 +696,8 @@ ibv_mr* GdrMemoryManager::FindMemoryRegion(void* addr, size_t length) {
   }
 }
 
-void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) {
+void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length,
+                                          const std::string& allocator_name) {
   if (length == 0) return;
   ibv_mr* mr = rdma_reg_read(listening_.get(), addr, length);
   if (mr != nullptr) {
@@ -676,7 +705,8 @@ void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) {
     auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator);
     mrs_.insert(iter, {mr, &MRDeleter});
   } else {
-    LOG(WARNING) << "Cannot register memory region";
+    LOG(WARNING) << "Cannot register memory region allocated by "
+                 << allocator_name;
   }
 }
 
-- 
GitLab


From eb6c1bdcbf6093888f2b443fdb49f836f3352316 Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Tue, 13 Mar 2018 07:23:18 +0000
Subject: [PATCH 0912/1357] Update core.py

Added `data_format` to flatten to allow changing of it during inference time.
---
 tensorflow/python/layers/core.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 9879e5020f..5f89e3c0c3 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -268,7 +268,14 @@ def dropout(inputs,
 @tf_export('layers.Flatten')
 class Flatten(keras_layers.Flatten, base.Layer):
   """Flattens an input tensor while preserving the batch axis (axis 0).
-
+  
+  Arguments:
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, ..., channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, ...)`.
+      
   Examples:
 
   ```
@@ -285,11 +292,16 @@ class Flatten(keras_layers.Flatten, base.Layer):
 
 
 @tf_export('layers.flatten')
-def flatten(inputs, name=None):
+def flatten(inputs, data_format='channels_last', name=None):
   """Flattens an input tensor while preserving the batch axis (axis 0).
 
   Arguments:
     inputs: Tensor input.
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
     name: The name of the layer (string).
 
   Returns:
@@ -307,7 +319,7 @@ def flatten(inputs, name=None):
     # now `y` has shape `(None, None)`
   ```
   """
-  layer = Flatten(name=name)
+  layer = Flatten(data_format=data_format, name=name)
   return layer.apply(inputs)
 
 
-- 
GitLab


From dd928d5ae31dd0484e5e4a96c6322adecc4e511b Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 19:24:10 +0000
Subject: [PATCH 0913/1357] Added Flatten Test

---
 tensorflow/python/layers/core_test.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index d26f3f4789..0d019897aa 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -476,6 +476,22 @@ class FlattenTest(test.TestCase):
     shape = core_layers.Flatten().compute_output_shape((None, 3, None))
     self.assertEqual(shape.as_list(), [None, None])
 
+  def testDataFormat(self):
+    np_input_channels_last = np.arange(3, 7).reshape([1, 2, 3, 2])
+
+    with self.test_session() as sess:
+      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_last')(x)
+      np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last})
+
+      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_first')(x)
+      np_input_channels_first = np.transpose(np_input_channels_last,
+                                             [0, 3, 1, 2])
+      np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
+
+      self.assertEqual(np_output_cl, np_output_cf)
+
   def testFunctionalFlatten(self):
     x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
     y = core_layers.flatten(x, name='flatten')
-- 
GitLab


From 579aecd2de1f0582858f83e3c8da2a8dbb57993b Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 20:08:59 +0000
Subject: [PATCH 0914/1357] added dtype to test

---
 tensorflow/python/layers/core_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 0d019897aa..31f3a4e0b0 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -477,7 +477,7 @@ class FlattenTest(test.TestCase):
     self.assertEqual(shape.as_list(), [None, None])
 
   def testDataFormat(self):
-    np_input_channels_last = np.arange(3, 7).reshape([1, 2, 3, 2])
+    np_input_channels_last = np.arange(12, dtype='float32').reshape([1, 2, 3, 2])
 
     with self.test_session() as sess:
       x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
-- 
GitLab


From 76964f315f7c52d63ce6578d87278a96c7394ece Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 22:01:21 +0000
Subject: [PATCH 0915/1357] pylint compliance

---
 tensorflow/python/layers/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 5f89e3c0c3..5919fa543e 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -268,14 +268,14 @@ def dropout(inputs,
 @tf_export('layers.Flatten')
 class Flatten(keras_layers.Flatten, base.Layer):
   """Flattens an input tensor while preserving the batch axis (axis 0).
-  
+
   Arguments:
     data_format: A string, one of `channels_last` (default) or `channels_first`.
       The ordering of the dimensions in the inputs.
       `channels_last` corresponds to inputs with shape
       `(batch, ..., channels)` while `channels_first` corresponds to
       inputs with shape `(batch, channels, ...)`.
-      
+
   Examples:
 
   ```
-- 
GitLab


From 110baa57112a95c2644896ce6ff75894e1ae61c7 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 23:10:55 +0000
Subject: [PATCH 0916/1357] Extended to N-dims

---
 tensorflow/python/layers/core_test.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 31f3a4e0b0..d5b8a0ff65 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -476,15 +476,31 @@ class FlattenTest(test.TestCase):
     shape = core_layers.Flatten().compute_output_shape((None, 3, None))
     self.assertEqual(shape.as_list(), [None, None])
 
-  def testDataFormat(self):
-    np_input_channels_last = np.arange(12, dtype='float32').reshape([1, 2, 3, 2])
+  def testDataFormat5d(self):
+    np_input_channels_last = np.arange(120, dtype='float32').reshape([1, 5, 4, 3, 2])
 
     with self.test_session() as sess:
-      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      x = array_ops.placeholder(shape=(1, 5, 4, 3, 2), dtype='float32')
       y = core_layers.Flatten(data_format='channels_last')(x)
       np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last})
 
-      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      x = array_ops.placeholder(shape=(1, 2, 5, 4, 3), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_first')(x)
+      np_input_channels_first = np.transpose(np_input_channels_last,
+                                             [0, 4, 1, 2, 3])
+      np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
+
+      self.assertEqual(np_output_cl, np_output_cf)
+
+  def testDataFormat4d(self):
+    np_input_channels_last = np.arange(24, dtype='float32').reshape([1, 4, 3, 2])
+
+    with self.test_session() as sess:
+      x = array_ops.placeholder(shape=(1, 4, 3, 2), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_last')(x)
+      np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last})
+
+      x = array_ops.placeholder(shape=(1, 2, 4, 3), dtype='float32')
       y = core_layers.Flatten(data_format='channels_first')(x)
       np_input_channels_first = np.transpose(np_input_channels_last,
                                              [0, 3, 1, 2])
-- 
GitLab


From 4de591a03a9bd49a05d67fe48f9358dbdac51561 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sat, 30 Jun 2018 08:14:40 +0100
Subject: [PATCH 0917/1357] Fixed Pylint Issues

---
 tensorflow/python/layers/core_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index d5b8a0ff65..8ad0e8c4ba 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -477,7 +477,8 @@ class FlattenTest(test.TestCase):
     self.assertEqual(shape.as_list(), [None, None])
 
   def testDataFormat5d(self):
-    np_input_channels_last = np.arange(120, dtype='float32').reshape([1, 5, 4, 3, 2])
+    np_input_channels_last = np.arange(120, dtype='float32').reshape(
+        [1, 5, 4, 3, 2])
 
     with self.test_session() as sess:
       x = array_ops.placeholder(shape=(1, 5, 4, 3, 2), dtype='float32')
@@ -493,7 +494,8 @@ class FlattenTest(test.TestCase):
       self.assertEqual(np_output_cl, np_output_cf)
 
   def testDataFormat4d(self):
-    np_input_channels_last = np.arange(24, dtype='float32').reshape([1, 4, 3, 2])
+    np_input_channels_last = np.arange(24, dtype='float32').reshape(
+        [1, 4, 3, 2])
 
     with self.test_session() as sess:
       x = array_ops.placeholder(shape=(1, 4, 3, 2), dtype='float32')
-- 
GitLab


From 46fc7a9530e9c8f6bf909de8df8c97e4b38a99a5 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Mon, 23 Jul 2018 23:06:48 +0100
Subject: [PATCH 0918/1357] Fixed Tests

---
 tensorflow/python/layers/core_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 8ad0e8c4ba..22ed75dda7 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -491,7 +491,7 @@ class FlattenTest(test.TestCase):
                                              [0, 4, 1, 2, 3])
       np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
 
-      self.assertEqual(np_output_cl, np_output_cf)
+      self.assertAllEqual(np_output_cl, np_output_cf)
 
   def testDataFormat4d(self):
     np_input_channels_last = np.arange(24, dtype='float32').reshape(
@@ -508,7 +508,7 @@ class FlattenTest(test.TestCase):
                                              [0, 3, 1, 2])
       np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
 
-      self.assertEqual(np_output_cl, np_output_cf)
+      self.assertAllEqual(np_output_cl, np_output_cf)
 
   def testFunctionalFlatten(self):
     x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
-- 
GitLab


From da930ea7fd16c903346ff36f5f57548dbea98bdc Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Tue, 21 Aug 2018 08:17:29 +0100
Subject: [PATCH 0919/1357] Updated golden

---
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 509ceff9df..e65ffeb12e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -832,10 +832,6 @@ tf_module {
     name: "broadcast_static_shape"
     argspec: "args=[\'shape_x\', \'shape_y\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "broadcast_to"
-    argspec: "args=[\'input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "case"
     argspec: "args=[\'pred_fn_pairs\', \'default\', \'exclusive\', \'strict\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'False\', \'case\'], "
-- 
GitLab


From 459accb2b7bdea542415f3a744cbe9e348f847d6 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Tue, 21 Aug 2018 21:02:13 +0100
Subject: [PATCH 0920/1357] Updated layers

---
 tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
index df74c32e1f..5d9ea2e5a3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
   }
   member_method {
     name: "max_pooling1d"
-- 
GitLab


From a58135a6a9637db0908c88f39df22b69bafaec3d Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Sat, 25 Aug 2018 16:04:34 +0100
Subject: [PATCH 0921/1357] Updated protobuf

---
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index e65ffeb12e..509ceff9df 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -832,6 +832,10 @@ tf_module {
     name: "broadcast_static_shape"
     argspec: "args=[\'shape_x\', \'shape_y\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "broadcast_to"
+    argspec: "args=[\'input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "case"
     argspec: "args=[\'pred_fn_pairs\', \'default\', \'exclusive\', \'strict\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'False\', \'case\'], "
-- 
GitLab


From 8e87c649fc290c758c4240bf202de0c7f0f3a4ad Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Sat, 29 Sep 2018 17:38:44 +0100
Subject: [PATCH 0922/1357] Updated v2

---
 tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
index df74c32e1f..5fd6ba1192 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
@@ -122,8 +122,8 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
+    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "  
+}
   member_method {
     name: "max_pooling1d"
     argspec: "args=[\'inputs\', \'pool_size\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'valid\', \'channels_last\', \'None\'], "
-- 
GitLab


From 32059ed204ecbee7828057d23a1c1daf561c87fd Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Sat, 29 Sep 2018 17:42:52 +0100
Subject: [PATCH 0923/1357] Update tensorflow.layers.pbtxt

---
 tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
index 5fd6ba1192..5d9ea2e5a3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
@@ -122,8 +122,8 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "  
-}
+    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
+  }
   member_method {
     name: "max_pooling1d"
     argspec: "args=[\'inputs\', \'pool_size\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'valid\', \'channels_last\', \'None\'], "
-- 
GitLab


From 70a395f9795a48c21bc35cdf1dc44778f73a7bba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 11:58:55 -0700
Subject: [PATCH 0924/1357] Automated rollback of commit
 d78595d333c9b5c8a0705ba6852c08b107d6c462

PiperOrigin-RevId: 215073584
---
 tensorflow/python/data/kernel_tests/BUILD     |  1 -
 tensorflow/tensorflow.bzl                     | 39 ++++++++-----------
 .../tools/pip_package/pip_smoke_test.py       |  2 +-
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 99d7f70513..cadfe7f9e0 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -318,7 +318,6 @@ cuda_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
     tags = [
-        "no_oss",  # TODO(b/116813115): Investigate timeout and re-enable.
         "no_windows_gpu",
     ],
 )
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index dead44c57e..cad5de1b0c 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1798,29 +1798,22 @@ def cuda_py_test(
         flaky = 0,
         xla_enabled = False,
         grpc_enabled = False):
-    if main == None:
-        main = name + ".py"
-    for config in ["cpu", "gpu"]:
-        test_name = name
-        test_tags = tags
-        if config == "gpu":
-            test_name += "_gpu"
-            test_tags = test_tags + tf_cuda_tests_tags()
-        tf_py_test(
-            name = test_name,
-            size = size,
-            srcs = srcs,
-            data = data,
-            main = main,
-            args = args,
-            tags = test_tags,
-            shard_count = shard_count,
-            additional_deps = additional_deps,
-            kernels = kernels,
-            flaky = flaky,
-            xla_enabled = xla_enabled,
-            grpc_enabled = grpc_enabled,
-        )
+    test_tags = tags + tf_cuda_tests_tags()
+    tf_py_test(
+        name = name,
+        size = size,
+        srcs = srcs,
+        data = data,
+        main = main,
+        args = args,
+        tags = test_tags,
+        shard_count = shard_count,
+        additional_deps = additional_deps,
+        kernels = kernels,
+        flaky = flaky,
+        xla_enabled = xla_enabled,
+        grpc_enabled = grpc_enabled,
+    )
 
 register_extension_info(
     extension_name = "cuda_py_test",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index e7f9628fa6..c6ef82ccdc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -142,7 +142,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = ["_test", "_test.py", "_test_gpu", "_test_gpu.py"]
+  ignore_extensions = ["_test", "_test.py"]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
-- 
GitLab


From 639d0dd8c1ba8d2956ccb59604c157de7ba0a7f2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 12:00:53 -0700
Subject: [PATCH 0925/1357] Cleanup

PiperOrigin-RevId: 215073641
---
 tensorflow/core/BUILD          | 3 ---
 tensorflow/core/profiler/BUILD | 1 -
 2 files changed, 4 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 7da4b9fbd0..57819cec70 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -239,7 +239,6 @@ tf_proto_library(
     srcs = [],
     cc_api_version = 2,
     default_header = True,
-    java_api_version = 2,
     js_api_version = 2,
     protodeps = [
         ":protos_all_proto",
@@ -2385,7 +2384,6 @@ tf_proto_library(
     srcs = ERROR_CODES_PROTO_SRCS,
     cc_api_version = 2,
     default_header = True,
-    java_api_version = 2,
     js_api_version = 2,
     provide_cc_alias = True,
 )
@@ -2406,7 +2404,6 @@ tf_proto_library(
     srcs = COMMON_PROTO_SRCS + ADDITIONAL_CORE_PROTO_SRCS,
     cc_api_version = 2,
     default_header = True,
-    java_api_version = 2,
     js_api_version = 2,
     protodeps = [
         ":error_codes_proto",
diff --git a/tensorflow/core/profiler/BUILD b/tensorflow/core/profiler/BUILD
index af034bdd7d..2bf371276e 100644
--- a/tensorflow/core/profiler/BUILD
+++ b/tensorflow/core/profiler/BUILD
@@ -40,7 +40,6 @@ tf_proto_library(
     name = "protos_all",
     srcs = glob(["**/*.proto"]),
     cc_api_version = 2,
-    java_api_version = 2,
     protodeps = tf_additional_all_protos(),
     visibility = ["//visibility:public"],
 )
-- 
GitLab


From 4cf1b45b2e9188086bcb7d12654cd3e130e9b823 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 14:13:01 -0700
Subject: [PATCH 0926/1357] Disable PinToHostOptimizer for NoOp.

PiperOrigin-RevId: 215079134
---
 .../core/grappler/optimizers/pin_to_host_optimizer.cc     | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 2190d38937..89eb76046e 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -169,7 +169,13 @@ bool IsTPUGraphDef(const GraphDef& def) {
 }
 
 // All the nodes that should be blacklisted and not swapped.
-bool IsBlacklisted(const NodeDef& node) { return IsCollective(node); }
+bool IsBlacklisted(const NodeDef& node) {
+  return
+      // Collective ops should not be swapped.
+      IsCollective(node) ||
+      // NoOp breaks perf regression tests (probably due to group dependencies).
+      IsNoOp(node);
+}
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
-- 
GitLab


From 2538e68a69e585696175bd972cae119e06bde294 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 16:13:51 -0700
Subject: [PATCH 0927/1357] Remove workaround for symlinked headers.

PiperOrigin-RevId: 215083669
---
 third_party/gpus/cuda_configure.bzl | 33 +++++++++--------------------
 third_party/py/python_configure.bzl |  4 ++--
 2 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index f5fdd3a75e..69f4599c16 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -1107,8 +1107,8 @@ def symlink_genrule_for_dir(
             # $(@D) will include the full path to the file.
             dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
 
-            # On Windows, symlink is not supported, so we just copy all the files.
-            cmd = "cp -f" if _is_windows(repository_ctx) else "ln -s"
+            # Copy the headers to create a sandboxable setup.
+            cmd = "cp -f"
             command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
             outs.append('        "' + dest_dir + dest_files[i] + '",')
     genrule = _genrule(
@@ -1334,27 +1334,14 @@ def _create_local_cuda_repository(repository_ctx):
         cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
         cuda_defines["%{host_compiler_warnings}"] = ""
 
-        # TODO(klimek): We currently need to inject "/" as builtin directory path
-        # to disable bazel's dependency checks.
-        # The problem is that:
-        # - the python rules symlink the python headers into the bazel root
-        # - the rules use 'includes' in the BUILD file to redirect includes of the
-        #   python headers through those paths
-        # - bazel currently uses -isystem for include paths specified via 'includes'
-        # - gcc follows symlinks when resolving files via -isystem paths, and puts
-        #   the resolved paths into the .d file, which makes the dependency check
-        #   fail for bazel
-        # There are multiple possible ways to solve this:
-        # 1. make bazel not use -isystem for paths specified via 'includes'
-        # 2. cp the headers instead of symlinking them
-        #
-        # Once this is fixed, the right builtin directory path is:
-        # (host_compiler_includes +
-        #    "\n  cxx_builtin_include_directory: \"%s\"" % cuda_include_path)
-        # The cuda directory needs to be passed, as there is currently no rule
-        # providing the cuda headers in the same way the python headers are
-        # provided.
-        cuda_defines["%{host_compiler_includes}"] = "\n  cxx_builtin_include_directory: \"/\""
+        # nvcc has the system include paths built in and will automatically
+        # search them; we cannot work around that, so we add the relevant cuda
+        # system paths to the allowed compiler specific include paths.
+        cuda_defines["%{host_compiler_includes}"] = (
+            host_compiler_includes + "\n" +
+            _cuda_include_path(repository_ctx, cuda_config) +
+            "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
+            "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
         nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" %
                                             (
                                                 cuda_config.cuda_toolkit_path,
diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl
index 3c7e5c8469..53264630a1 100644
--- a/third_party/py/python_configure.bzl
+++ b/third_party/py/python_configure.bzl
@@ -130,8 +130,8 @@ def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
       # If we have only one file to link we do not want to use the dest_dir, as
       # $(@D) will include the full path to the file.
       dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i]
-      # On Windows, symlink is not supported, so we just copy all the files.
-      cmd = 'cp -f' if _is_windows(repository_ctx) else 'ln -s'
+      # Copy the headers to create a sandboxable setup.
+      cmd = 'cp -f'
       command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest))
       outs.append('        "' + dest_dir + dest_files[i] + '",')
   genrule = _genrule(src_dir, genrule_name, " && ".join(command),
-- 
GitLab


From e0da6256cd116d17057374594f2fc191cf201f42 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Sat, 29 Sep 2018 23:29:28 -0700
Subject: [PATCH 0928/1357] Fixed format errors reported by clang-format

---
 tensorflow/core/common_runtime/process_util.cc      | 11 ++++++-----
 tensorflow/core/common_runtime/threadpool_device.cc |  6 +++---
 tensorflow/core/util/util.cc                        |  8 ++------
 tensorflow/core/util/util.h                         |  2 +-
 4 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index b3064a4c08..4570496637 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -62,15 +62,16 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
     // Set inter_op conservatively to avoid thread oversubscription that could
     // lead to severe perf degradations and OMP resource exhaustion
     int mkl_intra_op = 1;
-  #ifdef _OPENMP
+#ifdef _OPENMP
     mkl_intra_op = omp_get_max_threads();
-  #endif  // _OPENMP
+#endif  // _OPENMP
     CHECK_GE(mkl_intra_op, 1);
     const int32 mkl_inter_op = std::max(
         (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
-    VLOG(0) << "Creating new thread pool with default inter op setting: "
-            << mkl_inter_op
-            << ". Tune using inter_op_parallelism_threads for best performance.";
+    VLOG(0)
+        << "Creating new thread pool with default inter op setting: "
+        << mkl_inter_op
+        << ". Tune using inter_op_parallelism_threads for best performance.";
     return mkl_inter_op;
   }
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index f188016610..6404d8bc6a 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -51,8 +51,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
   // Early return when MKL is disabled
-  if (DisableMKL())
-    return;
+  if (DisableMKL()) return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
@@ -118,7 +117,8 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200),
+                       MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 44d5becb9c..489999d1e8 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -122,11 +122,7 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
 
 #ifdef INTEL_MKL
 bool DisableMKL() {
-  enum MklStatus {
-    MKL_DEFAULT = 0,
-    MKL_ON = 1,
-    MKL_OFF = 2
-  };
+  enum MklStatus { MKL_DEFAULT = 0, MKL_ON = 1, MKL_OFF = 2 };
   static MklStatus status = MKL_DEFAULT;
   if (status == MKL_DEFAULT) {
     char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
@@ -139,5 +135,5 @@ bool DisableMKL() {
   }
   return status == MKL_OFF ? true : false;
 }
-#endif
+#endif  // INTEL_MKL
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index ba90ad52c2..4aa47aa48a 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -59,7 +59,7 @@ string SliceDebugString(const TensorShape& shape, const int64 flat);
 // disable MKL in runtime
 #ifdef INTEL_MKL
 bool DisableMKL();
-#endif
+#endif  // INTEL_MKL
 
 }  // namespace tensorflow
 
-- 
GitLab


From 2b456a2b5dc6b5bb092b3986a400acb77b21a30f Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Sun, 30 Sep 2018 01:12:34 -0700
Subject: [PATCH 0929/1357] Added some minor format changes

---
 tensorflow/core/common_runtime/process_util.cc      | 6 +++---
 tensorflow/core/common_runtime/threadpool_device.cc | 6 +++---
 tensorflow/core/util/util.cc                        | 2 +-
 tensorflow/core/util/util.h                         | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index b3064a4c08..c75d8a8ce6 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -62,15 +62,15 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
     // Set inter_op conservatively to avoid thread oversubscription that could
     // lead to severe perf degradations and OMP resource exhaustion
     int mkl_intra_op = 1;
-  #ifdef _OPENMP
+#ifdef _OPENMP
     mkl_intra_op = omp_get_max_threads();
-  #endif  // _OPENMP
+#endif  // _OPENMP
     CHECK_GE(mkl_intra_op, 1);
     const int32 mkl_inter_op = std::max(
         (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
     VLOG(0) << "Creating new thread pool with default inter op setting: "
             << mkl_inter_op
-            << ". Tune using inter_op_parallelism_threads for best performance.";
+            << ".Tune using inter_op_parallelism_threads for best performance.";
     return mkl_inter_op;
   }
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index f188016610..6404d8bc6a 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -51,8 +51,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
   // Early return when MKL is disabled
-  if (DisableMKL())
-    return;
+  if (DisableMKL()) return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
@@ -118,7 +117,8 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200),
+                       MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 44d5becb9c..6e78777dd9 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -139,5 +139,5 @@ bool DisableMKL() {
   }
   return status == MKL_OFF ? true : false;
 }
-#endif
+#endif  // INTEL_MKL
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index ba90ad52c2..4aa47aa48a 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -59,7 +59,7 @@ string SliceDebugString(const TensorShape& shape, const int64 flat);
 // disable MKL in runtime
 #ifdef INTEL_MKL
 bool DisableMKL();
-#endif
+#endif  // INTEL_MKL
 
 }  // namespace tensorflow
 
-- 
GitLab


From a00fe72261cf6fe4a00467139e401de14c16224c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 30 Sep 2018 02:00:58 -0700
Subject: [PATCH 0930/1357] compat: Update forward compatibility horizon to
 2018-09-30

PiperOrigin-RevId: 215109054
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 24a795c787..1f7cfe48b3 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 29)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 30)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 4ecce5aa64587afe1cd07ee4c92bbb5ce2cf85df Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Sun, 30 Sep 2018 06:52:22 -0700
Subject: [PATCH 0931/1357] Removing the setuptools upper limit.

PiperOrigin-RevId: 215120867
---
 tensorflow/tools/pip_package/setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index b95e1f5c87..a9d8b0cff5 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -56,7 +56,6 @@ REQUIRED_PACKAGES = [
     'numpy >= 1.13.3',
     'six >= 1.10.0',
     'protobuf >= 3.6.0',
-    'setuptools <= 39.1.0',
     'tensorboard >= 1.11.0, < 1.12.0',
     'termcolor >= 1.1.0',
 ]
-- 
GitLab


From 5fa4e1ac928b0512b28e955c588c5a7eab2ea046 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 30 Sep 2018 11:57:45 -0700
Subject: [PATCH 0932/1357] Parallel_for: fix converters for some ops that
 don't support broadcasting.

PiperOrigin-RevId: 215133508
---
 tensorflow/python/ops/parallel_for/pfor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py
index e0f6d51881..83cbe64ff2 100644
--- a/tensorflow/python/ops/parallel_for/pfor.py
+++ b/tensorflow/python/ops/parallel_for/pfor.py
@@ -1987,14 +1987,12 @@ def _convert_cast(pfor_input):
 @RegisterPForWithArgs("Pow", math_ops.pow)
 @RegisterPForWithArgs("RealDiv", math_ops.divide)
 @RegisterPForWithArgs("Real", math_ops.real)
-@RegisterPForWithArgs("ReciprocalGrad", math_ops.reciprocal_grad)
 @RegisterPForWithArgs("Reciprocal", math_ops.reciprocal)
 @RegisterPForWithArgs("Relu6", nn_ops.relu6)
 @RegisterPForWithArgs("Relu", nn_ops.relu)
 @RegisterPForWithArgs("RightShift", bitwise_ops.right_shift)
 @RegisterPForWithArgs("Rint", math_ops.rint)
 @RegisterPForWithArgs("Round", math_ops.round)
-@RegisterPForWithArgs("RsqrtGrad", math_ops.rsqrt_grad)
 @RegisterPForWithArgs("Rsqrt", math_ops.rsqrt)
 @RegisterPForWithArgs("Selu", nn_ops.selu)
 @RegisterPForWithArgs("Sigmoid", math_ops.sigmoid)
@@ -2003,7 +2001,6 @@ def _convert_cast(pfor_input):
 @RegisterPForWithArgs("Sin", math_ops.sin)
 @RegisterPForWithArgs("Softplus", nn_ops.softplus)
 @RegisterPForWithArgs("Softsign", nn_ops.softsign)
-@RegisterPForWithArgs("SqrtGrad", math_ops.sqrt_grad)
 @RegisterPForWithArgs("Sqrt", math_ops.sqrt)
 @RegisterPForWithArgs("SquaredDifference", math_ops.squared_difference)
 @RegisterPForWithArgs("Square", math_ops.square)
@@ -2095,6 +2092,9 @@ def _convert_biasaddgrad(pfor_input):
 @RegisterPForWithArgs("SoftplusGrad")
 @RegisterPForWithArgs("SoftsignGrad")
 @RegisterPForWithArgs("TanhGrad")
+@RegisterPForWithArgs("SqrtGrad")
+@RegisterPForWithArgs("RsqrtGrad")
+@RegisterPForWithArgs("ReciprocalGrad")
 def _convert_grads(pfor_input, op_type, *args, **kw_args):
   del args
   del kw_args
-- 
GitLab


From 76c4853b50f201b4a809ac66746c798e049b294c Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sun, 30 Sep 2018 20:03:29 -0700
Subject: [PATCH 0933/1357] Bump the version of protobuf TF pip package depends
 on.

Fixes #21719

PiperOrigin-RevId: 215154273
---
 tensorflow/tools/pip_package/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index a9d8b0cff5..88c9c20d36 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -55,7 +55,7 @@ REQUIRED_PACKAGES = [
     'keras_preprocessing >= 1.0.3',
     'numpy >= 1.13.3',
     'six >= 1.10.0',
-    'protobuf >= 3.6.0',
+    'protobuf >= 3.6.1',
     'tensorboard >= 1.11.0, < 1.12.0',
     'termcolor >= 1.1.0',
 ]
-- 
GitLab


From b797bfb750504e03a38a988c44e3c52e902e87c4 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Sun, 30 Sep 2018 22:34:28 -0700
Subject: [PATCH 0934/1357] [HloOrdering] Make parameter always defined before
 other instructions.

- Make parameter always defined before other instructions.
- Add extra indentations to the predecessor field in ToString() method to make it clear.

PiperOrigin-RevId: 215162840
---
 .../compiler/xla/service/hlo_ordering.cc      | 10 +++++++---
 .../compiler/xla/service/hlo_ordering_test.cc | 20 +++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc
index f1dc08bafa..23d41d91d6 100644
--- a/tensorflow/compiler/xla/service/hlo_ordering.cc
+++ b/tensorflow/compiler/xla/service/hlo_ordering.cc
@@ -92,14 +92,18 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a,
 }
 
 bool HloOrdering::IsDefinedBefore(const HloValue& a, const HloValue& b) const {
-  // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b'
-  // is live into the module.
+  // Entry parameter should always be defined before other instructions.
   const HloModule* module = b.defining_instruction()->parent()->parent();
   if (b.defining_instruction()->parent() == module->entry_computation() &&
       b.defining_instruction()->opcode() == HloOpcode::kParameter) {
     return false;
   }
 
+  if (a.defining_instruction()->parent() == module->entry_computation() &&
+      a.defining_instruction()->opcode() == HloOpcode::kParameter) {
+    return true;
+  }
+
   // Phi values require special handling. Because XLA does not have a phi
   // instruction, the definition instruction of the phis values are
   // placeholders: either the subcomputation parameter (body or condition) or
@@ -316,7 +320,7 @@ string PredecessorHloOrdering::ToStringHelper(const string& name) const {
       for (auto predecessor : all) {
         if (predecessors_.at(computation)
                 ->IsReachable(predecessor, instruction)) {
-          pieces.push_back(absl::StrFormat("  %s", predecessor->name()));
+          pieces.push_back(absl::StrFormat("    %s", predecessor->name()));
         }
       }
     }
diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc
index 00970bcda3..b045adc964 100644
--- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc
@@ -174,6 +174,26 @@ TEST_F(HloOrderingTest, InstructionsInWhileComputations) {
   EXPECT_FALSE(ordering.ExecutesBefore(body_param, cond_param));
 }
 
+TEST_F(HloOrderingTest, ParametersDefinedBeforeOthers) {
+  // Entry parameter should always be defined before other instruction.
+  auto module = CreateNewModule();
+  const Shape scalar_shape = ShapeUtil::MakeShape(xla::F32, {});
+  auto builder = HloComputation::Builder(TestName());
+  auto constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1.0)));
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  module->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(auto dataflow,
+                          HloDataflowAnalysis::Run(*module, /*ssa_form=*/true));
+
+  DependencyHloOrdering ordering(module.get());
+  EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(param),
+                                       dataflow->GetValueDefinedAt(constant)));
+  EXPECT_TRUE(!ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(constant),
+                                        dataflow->GetValueDefinedAt(param)));
+}
+
 TEST_F(HloOrderingTest, ValuesInWhileComputations) {
   // Tests the ordering of values (defined by dataflow analysis) in the body and
   // condition of a while instruction. HLO code:
-- 
GitLab


From 03c5f9cdce62f6711b91fe81505e3c085e54a771 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 02:03:50 -0700
Subject: [PATCH 0935/1357] compat: Update forward compatibility horizon to
 2018-10-01

PiperOrigin-RevId: 215179315
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 1f7cfe48b3..bea5aa990f 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 30)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 1)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 0fd21d8c34e15bc3013e93014d101b672e1f3687 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 02:41:01 -0700
Subject: [PATCH 0936/1357] [TF:XLA] Teach deadness analysis more of
 distributive property.

PiperOrigin-RevId: 215183847
---
 tensorflow/compiler/jit/deadness_analysis.cc  | 107 ++++++++++++++----
 .../compiler/jit/deadness_analysis_test.cc    |  31 ++++-
 2 files changed, 112 insertions(+), 26 deletions(-)

diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index 9128b48da3..25e2e9a7af 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/jit/deadness_analysis.h"
+#include "absl/algorithm/container.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -383,6 +384,8 @@ class PredicateFactory {
   }
 
   Predicate* MakeAndOrImpl(absl::Span<Predicate* const> operands, bool is_and);
+  Predicate* MakeInternedAndOr(std::vector<Predicate*> simplified_ops,
+                               Predicate::Kind pred_kind);
 
   // Predicate instances are interned, meaning that there is only a single
   // instance of a Predicate object with a given content.  This makes checking
@@ -429,11 +432,40 @@ class PredicateFactory {
       interned_symbol_instances_;
 };
 
+Predicate* PredicateFactory::MakeInternedAndOr(
+    std::vector<Predicate*> simplified_ops, Predicate::Kind pred_kind) {
+  std::stable_sort(
+      simplified_ops.begin(), simplified_ops.end(),
+      [](Predicate* a, Predicate* b) { return a->hash() < b->hash(); });
+
+  auto it = interned_and_or_instances_.find({pred_kind, simplified_ops});
+  if (it != interned_and_or_instances_.end()) {
+    return it->second.get();
+  }
+
+  simplified_ops.shrink_to_fit();
+  // NB!  Because we'll use a non-owning reference to simplified_ops in the
+  // key for interned_and_or_instances_ we need to be careful to std::move()
+  // it all the way through.
+  absl::Span<Predicate* const> operands_slice = simplified_ops;
+  std::unique_ptr<Predicate> new_pred =
+      pred_kind == Predicate::Kind::kAnd
+          ? Make<AndPredicate>(std::move(simplified_ops))
+          : Make<OrPredicate>(std::move(simplified_ops));
+
+  Predicate* new_pred_ptr = new_pred.get();
+  interned_and_or_instances_.emplace(
+      SignatureForAndOr(pred_kind, operands_slice), std::move(new_pred));
+  return new_pred_ptr;
+}
+
 // Common code to create AndPredicate or OrPredicate instances.
 Predicate* PredicateFactory::MakeAndOrImpl(
     absl::Span<Predicate* const> operands, bool is_and) {
   Predicate::Kind pred_kind =
       is_and ? Predicate::Kind::kAnd : Predicate::Kind::kOr;
+  Predicate::Kind other_pred_kind =
+      is_and ? Predicate::Kind::kOr : Predicate::Kind::kAnd;
   gtl::FlatSet<Predicate*> simplified_ops_set;
   std::vector<Predicate*> simplified_ops;
   for (Predicate* op : operands) {
@@ -472,30 +504,63 @@ Predicate* PredicateFactory::MakeAndOrImpl(
     }
   }
 
-  std::stable_sort(
-      simplified_ops.begin(), simplified_ops.end(),
-      [](Predicate* a, Predicate* b) { return a->hash() < b->hash(); });
+  // If all ops contain the same subop, then factor it out thanks to the
+  // distributive property. Such as:
+  // - (A & B) | (A & C) | (A & D) => A & (B | C | D)
+  // - (A | B) & (A | C) & (A | D) => A | (B & C & D)
+  //
+  // First find any predicates contained in all subops.
+  std::vector<Predicate*> common_inner_operands;
+  gtl::FlatSet<Predicate*> common_inner_operands_set;
+  for (Predicate* op : simplified_ops) {
+    if (op->kind() != other_pred_kind) {
+      common_inner_operands.clear();
+      break;
+    }
 
-  auto it = interned_and_or_instances_.find({pred_kind, simplified_ops});
-  if (it == interned_and_or_instances_.end()) {
-    simplified_ops.shrink_to_fit();
-    // NB!  Because we'll use a non-owning reference to simplified_ops in the
-    // key for interned_and_or_instances_ we need to be careful to std::move()
-    // it all the way through.
-    absl::Span<Predicate* const> operands_slice = simplified_ops;
-    std::unique_ptr<Predicate> new_pred =
-        is_and ? Make<AndPredicate>(std::move(simplified_ops))
-               : Make<OrPredicate>(std::move(simplified_ops));
+    if (common_inner_operands.empty()) {
+      common_inner_operands.insert(common_inner_operands.end(),
+                                   op->GetOperands().begin(),
+                                   op->GetOperands().end());
+    } else {
+      std::vector<Predicate*> sub_ops_intersection;
+      common_inner_operands.clear();
+      absl::c_copy_if(op->GetOperands(),
+                      std::back_inserter(common_inner_operands),
+                      [&](Predicate* sub_op) {
+                        return common_inner_operands_set.count(sub_op) == 1;
+                      });
+    }
+    if (common_inner_operands.empty()) break;
+    common_inner_operands_set.clear();
+    common_inner_operands_set.insert(common_inner_operands.begin(),
+                                     common_inner_operands.end());
+  }
 
-    Predicate* new_pred_ptr = new_pred.get();
-    CHECK(interned_and_or_instances_
-              .emplace(SignatureForAndOr(pred_kind, operands_slice),
-                       std::move(new_pred))
-              .second);
-    return new_pred_ptr;
-  } else {
-    return it->second.get();
+  if (common_inner_operands.empty()) {
+    return MakeInternedAndOr(std::move(simplified_ops), pred_kind);
   }
+
+  // For all predicates that can be factored out, remove them and recreate the
+  // subops.
+  std::vector<Predicate*> factored_ops;
+  for (Predicate* op : simplified_ops) {
+    std::vector<Predicate*> new_sub_op_ops;
+    absl::c_copy_if(op->GetOperands(), std::back_inserter(new_sub_op_ops),
+                    [&](Predicate* sub_op) {
+                      return std::find(common_inner_operands.begin(),
+                                       common_inner_operands.end(),
+                                       sub_op) == common_inner_operands.end();
+                    });
+    factored_ops.push_back(MakeAndOrImpl(new_sub_op_ops, !is_and));
+  }
+
+  Predicate* new_inner_op = MakeAndOrImpl(factored_ops, is_and);
+  std::vector<Predicate*> outer_ops;
+  outer_ops.push_back(new_inner_op);
+  outer_ops.insert(outer_ops.end(), common_inner_operands.begin(),
+                   common_inner_operands.end());
+  return MakeAndOrImpl(outer_ops, !is_and);
 }
 
 class DeadnessAnalysisImpl : public DeadnessAnalysis {
diff --git a/tensorflow/compiler/jit/deadness_analysis_test.cc b/tensorflow/compiler/jit/deadness_analysis_test.cc
index 28a56044d5..617e31488c 100644
--- a/tensorflow/compiler/jit/deadness_analysis_test.cc
+++ b/tensorflow/compiler/jit/deadness_analysis_test.cc
@@ -384,10 +384,31 @@ TEST(DeadnessAnalysisTest, OrOfAnd) {
   EXPECT_FALSE(result->HasInputsWithMismatchingDeadness(*add2.node()));
 }
 
-TEST(DeadnessAnalysisTest, NEGATIVE_AndOrDistributive) {
-  // This demonstrates one of the weaknesses in the current approach -- since we
-  // only do some basic simplifications we can't see that "(A|B)&C" ==
-  // "(A&C)|(B&C)".
+TEST(DeadnessAnalysisTest, AndOrDistributiveSimplified) {
+  // (*A | (~*A & ((~*B & ~*A) | (~*A & *B)))) == #true
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  ops::Switch sw_0 = CreateSwitch(root, "A");
+  ops::Switch sw_1 = CreateSwitch(root, "B");
+  Output add0 =
+      ops::Add(root.WithOpName("and0"), sw_0.output_false, sw_1.output_true);
+  Output add1 =
+      ops::Add(root.WithOpName("and1"), sw_0.output_false, sw_1.output_false);
+  ops::Merge or2(root.WithOpName("or2"), {add0, add1});
+  Output add3 =
+      ops::Add(root.WithOpName("and3"), or2.output, sw_0.output_false);
+  ops::Merge or4(root.WithOpName("or4"), {add3, sw_0.output_true});
+
+  std::unique_ptr<DeadnessAnalysis> result;
+  TF_ASSERT_OK(AnalyzeDeadness(root.graph(), &result));
+
+  PredicateMapTy predicate_map;
+  TF_ASSERT_OK(ComputePredicates(*root.graph(), &predicate_map));
+  EXPECT_EQ(predicate_map[ControlOutputFor(or4.output)], "#true");
+}
+
+TEST(DeadnessAnalysisTest, AndOrDistributive) {
+  // (A|B)&C == (A&C)|(B&C)
   Scope root = Scope::NewRootScope().ExitOnError();
 
   ops::Switch sw_0 = CreateSwitch(root, "0");
@@ -408,7 +429,7 @@ TEST(DeadnessAnalysisTest, NEGATIVE_AndOrDistributive) {
   std::unique_ptr<DeadnessAnalysis> result;
   TF_ASSERT_OK(AnalyzeDeadness(root.graph(), &result));
 
-  EXPECT_TRUE(result->HasInputsWithMismatchingDeadness(*add2.node()));
+  EXPECT_FALSE(result->HasInputsWithMismatchingDeadness(*add3.node()));
 }
 
 TEST(DeadnessAnalysisTest, Ternary) {
-- 
GitLab


From c1c63c936c4bc51b401b82fbe54ed1945f49a314 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 03:27:05 -0700
Subject: [PATCH 0937/1357] Moves the creation of regularizer ops in
 get_variable out of surrounding context.

This resembles the behaviour for initializer ops.

PiperOrigin-RevId: 215187942
---
 tensorflow/python/ops/variable_scope.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index af5c7d4050..5032ca79f9 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -939,7 +939,8 @@ class _VariableStore(object):
     if regularizer:
       with ops.colocate_with(v):
         with ops.name_scope(name + "/Regularizer/"):
-          loss = regularizer(v)
+          with ops.init_scope():
+            loss = regularizer(v)
         if loss is not None:
           if context.executing_eagerly():
             v_name = "v_%s" % type(v)
-- 
GitLab


From 9a169bf3ba840af8ab3caae7ea1c69c682be3ab7 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 1 Oct 2018 03:34:35 -0700
Subject: [PATCH 0938/1357] Add allowed optimizations to GrapplerItem.

(1) Skip UnaryOpComposition rewrite if the optimized graph needs to have a gradient registered for all nodes.

PiperOrigin-RevId: 215188461
---
 tensorflow/core/grappler/grappler_item.cc     |   1 +
 tensorflow/core/grappler/grappler_item.h      |   9 ++
 tensorflow/core/grappler/op_types.cc          |   4 +
 tensorflow/core/grappler/op_types.h           |   1 +
 tensorflow/core/grappler/optimizers/BUILD     |   2 +
 .../optimizers/arithmetic_optimizer.cc        |   4 +
 .../grappler/optimizers/meta_optimizer.cc     |  19 +++
 .../optimizers/meta_optimizer_test.cc         | 126 ++++++++++++++++++
 8 files changed, 166 insertions(+)

diff --git a/tensorflow/core/grappler/grappler_item.cc b/tensorflow/core/grappler/grappler_item.cc
index bbc0fedd22..2c490f3966 100644
--- a/tensorflow/core/grappler/grappler_item.cc
+++ b/tensorflow/core/grappler/grappler_item.cc
@@ -38,6 +38,7 @@ GrapplerItem::GrapplerItem(const GrapplerItem& other, GraphDef* graph_def) {
   restore_op = other.restore_op;
   save_restore_loc_tensor = other.save_restore_loc_tensor;
   queue_runners = other.queue_runners;
+  allowed_optimizations = other.allowed_optimizations;
   graph.Swap(graph_def);
 }
 
diff --git a/tensorflow/core/grappler/grappler_item.h b/tensorflow/core/grappler/grappler_item.h
index 939e5fa046..a0748abfe6 100644
--- a/tensorflow/core/grappler/grappler_item.h
+++ b/tensorflow/core/grappler/grappler_item.h
@@ -77,6 +77,15 @@ struct GrapplerItem {
   // Return a set of node names that must be preserved. This includes feed and
   // fetch nodes, keep_ops, init_ops.
   std::unordered_set<string> NodesToPreserve() const;
+
+  // Restrict types of optimizations that are allowed for this GrapplerItem.
+  struct AllowedOptimizations {
+    // Is it allowed to add nodes to the graph that do not have registered
+    // gradient function.
+    bool non_differentiable_rewrites = true;
+  };
+
+  AllowedOptimizations allowed_optimizations;
 };
 
 // Return the transitive fanin of a set of terminal nodes.
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 3521669b63..9f0d9dbf28 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -425,6 +425,10 @@ bool IsSwitch(const NodeDef& node) {
   return op == "Switch" || op == "RefSwitch";
 }
 
+bool IsSymbolicGradient(const NodeDef& node) {
+  return node.op() == "SymbolicGradient";
+}
+
 bool IsTanhGrad(const NodeDef& node) { return node.op() == "TanhGrad"; }
 
 bool IsTile(const NodeDef& node) { return node.op() == "Tile"; }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 25ab6b65ac..7f86a5f295 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -149,6 +149,7 @@ bool IsStridedSliceGrad(const NodeDef& node);
 bool IsSub(const NodeDef& node);
 bool IsSum(const NodeDef& node);
 bool IsSwitch(const NodeDef& node);
+bool IsSymbolicGradient(const NodeDef& node);
 bool IsTanhGrad(const NodeDef& node);
 bool IsTile(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 960d1addb3..c708f84948 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -525,6 +525,7 @@ cc_library(
         "//tensorflow/core:core_cpu_base",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/utils:colocation",
@@ -541,6 +542,7 @@ tf_cuda_cc_test(
         ":custom_graph_optimizer_registry",
         ":meta_optimizer",
         "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
         "//tensorflow/core:test",
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 3388ee8035..7d5014ee0a 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -3249,6 +3249,10 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   optimized_graph_ = &optimized_item.graph;
   node_map_.reset(new NodeMap(optimized_graph_));
 
+  // Disable restricted graph rewrites.
+  options_.unary_ops_composition &=
+      item.allowed_optimizations.non_differentiable_rewrites;
+
   if (options_.dedup_computations) {
     DedupComputations();
   }
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 406c1b60ce..a5f851fb1a 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -37,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
@@ -413,6 +414,15 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   FunctionLibraryDefinition flib(OpRegistry::Global(),
                                  optimized_graph->library());
 
+  // Find functions for which we might need to compute a gradient at runtime.
+  gtl::FlatSet<string> differentiable_functions;
+  for (const NodeDef& node : optimized_graph->node()) {
+    if (IsSymbolicGradient(node)) {
+      const auto* f_attr = gtl::FindOrNull(node.attr(), "f");
+      if (f_attr) differentiable_functions.insert(f_attr->func().name());
+    }
+  }
+
   // Optimize each function only once.
   std::unordered_set<string> optimized_funcs;
   bool optimize_function_library = true;
@@ -428,6 +438,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 
       // Skip parametrized functions (function type or body is defined only at
       // function call time by caller node attributes).
+      // They should be specialized to their instantiation type parameters by
+      // the function optimizer, before we can optimize function body.
       if (IsParametrized(func)) continue;
 
       VLOG(3) << "Optimize function: function=" << func_name;
@@ -442,6 +454,13 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       TF_RETURN_IF_ERROR(MakeGrapplerFunctionItem(
           func, flib, item.graph.versions().producer(), &func_item));
 
+      // If we need to compute the gradient of optimized function at runtime, we
+      // can't perform non-differentiable rewrites.
+      if (differentiable_functions.find(func_name) !=
+          differentiable_functions.end()) {
+        func_item.allowed_optimizations.non_differentiable_rewrites = false;
+      }
+
       // Optimize function body graph.
       GraphDef optimized_func_graph;
       TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index c477c4d4b1..3f3f43382f 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/grappler_test.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -82,6 +83,48 @@ class TestOptimizerWithParams : public TestOptimizer {
 
 REGISTER_GRAPH_OPTIMIZER(TestOptimizerWithParams);
 
+// Record various properties of the GrapplerItems passed for optimization.
+class GrapplerItemPropertiesAccumulator : public CustomGraphOptimizer {
+ public:
+  static void SetAllowedOptimizations(
+      gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>*
+          allowed_optimizations) {
+    allowed_optimizations_ = allowed_optimizations;
+  }
+  static void ResetAllowedOptimizations() { allowed_optimizations_ = nullptr; }
+
+  GrapplerItemPropertiesAccumulator() {}
+  string name() const override {
+    return "grappler_item_properties_accumulator";
+  }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override {
+    *optimized_graph = item.graph;
+    if (allowed_optimizations_) {
+      allowed_optimizations_->insert({item.id, item.allowed_optimizations});
+    }
+    return Status::OK();
+  }
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override {}
+
+ private:
+  static gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>*
+      allowed_optimizations_;
+};
+
+gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>*
+    GrapplerItemPropertiesAccumulator::allowed_optimizations_;
+
+REGISTER_GRAPH_OPTIMIZER(GrapplerItemPropertiesAccumulator);
+
 class MetaOptimizerTest : public GrapplerTest {};
 
 TEST_F(MetaOptimizerTest, RunsCustomOptimizer) {
@@ -335,6 +378,89 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibrary) {
   test::ExpectTensorEqual<int>(tensors_expected[1], tensors[1]);
 }
 
+TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) {
+  using test::function::NDef;
+  using FDH = FunctionDefHelper;
+
+  // We will record what type of optimizations meta optimizer allows for each
+  // GrapplerItem (main graph and graphs for each function).
+  gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>
+      allowed_optimizations;
+  GrapplerItemPropertiesAccumulator::SetAllowedOptimizations(
+      &allowed_optimizations);
+
+  // Just record properties of optimized Grappler items.
+  RewriterConfig rewriter_config;
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO);
+  rewriter_config.add_optimizers("GrapplerItemPropertiesAccumulator");
+  rewriter_config.set_min_graph_nodes(-1);
+
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+
+  // Define simple function library with two identical mul functions.
+  FunctionDef mul_func_1 = FunctionDefHelper::Create(
+      "MyMul1", {"x:float", "y:float"}, {"z:float"}, {},
+      {{{"mul"}, "Mul", {"x", "y"}, {}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"z", "mul:z:0"}});
+
+  FunctionDef mul_func_2 = FunctionDefHelper::Create(
+      "MyMul2", {"x:float", "y:float"}, {"z:float"}, {},
+      {{{"mul"}, "Mul", {"x", "y"}, {}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"z", "mul:z:0"}});
+
+  // Tensorflow graph:
+  //
+  //   x0 = tf.Placeholder(tf.float);
+  //   x1 = tf.Placeholder(tf.float);
+  //   dy = tf.Placeholder(tf.float);
+  //
+  //   mul_1 = MyMul1(x0, x1);
+  //   mul_2 = MyMul2(x0, x1);
+  //   dx = SymbolicGradient({x0, x1, dy}, f=MyMul2)
+  GrapplerItem item;
+  item.id = "main";
+  item.graph = test::function::GDef(
+      {NDef("x0", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       NDef("x1", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       NDef("dy", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       // Calls into function library
+       NDef("mul_1", "MyMul1", {"x0", "x1"}, {}, kDevice),
+       NDef("mul_2", "MyMul2", {"x0", "x1"}, {}, kDevice),
+       // Symbolic gradient of a MyMul2
+       NDef("dx", "SymbolicGradient", {"x0", "x1", "dy"},
+            {{"f", FDH::FunctionRef("MyMul2", {})},
+             {"Tin", DataTypeSlice{DT_FLOAT}},
+             {"Tout", DataTypeSlice{DT_FLOAT, DT_FLOAT}}},
+            kDevice)},
+      // FunctionLib
+      {mul_func_1, mul_func_2});
+  item.fetch = {"mul_1", "mul_2", "dx"};
+
+  GraphDef output;
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  // Our custom optimizer must be called for the main graph and for the two
+  // functions.
+  ASSERT_EQ(allowed_optimizations.size(), 3);
+
+  auto allowed_optimizations_main =
+      gtl::FindOrNull(allowed_optimizations, "main");
+  ASSERT_NE(allowed_optimizations_main, nullptr);
+  EXPECT_TRUE(allowed_optimizations_main->non_differentiable_rewrites);
+
+  auto allowed_optimizations_my_mul_1 =
+      gtl::FindOrNull(allowed_optimizations, "MyMul1");
+  ASSERT_NE(allowed_optimizations_my_mul_1, nullptr);
+  EXPECT_TRUE(allowed_optimizations_my_mul_1->non_differentiable_rewrites);
+
+  auto allowed_optimizations_my_mul_2 =
+      gtl::FindOrNull(allowed_optimizations, "MyMul2");
+  ASSERT_NE(allowed_optimizations_my_mul_2, nullptr);
+  EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites);
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From b73c5f80926de3b724a92a57cf0bc49aa7de37bd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 05:50:51 -0700
Subject: [PATCH 0939/1357] Automated rollback of commit
 3f4423fad57694bc8d7adc427d65e5a18c8592b2

PiperOrigin-RevId: 215200418
---
 .../contrib/tpu/ops/tpu_embedding_ops.cc      | 42 ++++++++++++++++---
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index 6b0730b40c..5c27d59f82 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -103,10 +103,19 @@ Status RegisterPerTableLoadOpsForAlgorithmBody(
       arg->set_type(DT_FLOAT);
     }
   }
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
+  }
   {
     auto* table_name_attr = op_def->add_attr();
     table_name_attr->set_name("table_name");
     table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
   }
   {
     auto* num_shards_attr = op_def->add_attr();
@@ -138,9 +147,11 @@ parameters that are loaded from a checkpoint before a training loop is
 executed.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto.
+  EmbeddingLayerConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
+table_id: Index of this table in the EmbeddingLayerConfiguration proto
+  (deprecated).
 )doc",
                                           parameter_descriptions.c_str()));
   op_def->set_is_commutative(false);
@@ -149,10 +160,14 @@ shard_id: Identifier of shard for this operation.
   auto shape_inference_function =
       [state_variable_specs,
        is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
     string table_name;
     TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    if (table_name.empty()) {
-      return errors::InvalidArgument("table_name attribute must be set");
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
     }
     int num_shards;
     TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
@@ -225,10 +240,19 @@ Status RegisterPerTableRetrieveOpsForAlgorithmBody(
       arg->set_type(DT_FLOAT);
     }
   }
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
+  }
   {
     auto* table_name_attr = op_def->add_attr();
     table_name_attr->set_name("table_name");
     table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
   }
   {
     auto* num_shards_attr = op_def->add_attr();
@@ -259,9 +283,11 @@ the correct embedding table configuration. For example, this op is
 used to retrieve updated parameters before saving a checkpoint.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto.
+  EmbeddingLayerConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
+table_id: Index of this table in the EmbeddingLayerConfiguration proto
+  (deprecated).
 )doc",
                                           parameter_descriptions.c_str()));
   op_def->set_is_commutative(false);
@@ -270,10 +296,14 @@ shard_id: Identifier of shard for this operation.
   auto shape_inference_function =
       [state_variable_specs,
        is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
     string table_name;
     TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    if (table_name.empty()) {
-      return errors::InvalidArgument("table_name must be non-empty");
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
     }
     int num_shards;
     TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
-- 
GitLab


From 7c5eb354a6b5b2d5a2e27d8ce3dc4861cb51153c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 07:15:23 -0700
Subject: [PATCH 0940/1357] In TensorFlow configure, write the
 .tf_configure.bazelrc into the --workspace path if provided.

This allows repositories that depend on TensorFlow to execute
'bazel run @org_tensorflow//:configure -- --workspace $(pwd)'
to configure TensorFlow.
END_PUBLIC

Before this change, the .tf_configure.bazelrc ended up in the bazel exec root, and 'bazel clean' would undo the configuration.

PiperOrigin-RevId: 215209207
---
 configure.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/configure.py b/configure.py
index 0a3b9a7894..796c6231e8 100644
--- a/configure.py
+++ b/configure.py
@@ -48,10 +48,9 @@ _SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15, 16]
 
 _DEFAULT_PROMPT_ASK_ATTEMPTS = 10
 
-_TF_WORKSPACE_ROOT = os.path.abspath(os.path.dirname(__file__))
 _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
-_TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
-_TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
+_TF_WORKSPACE_ROOT = ''
+_TF_BAZELRC = ''
 
 if platform.machine() == 'ppc64le':
   _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/'
@@ -243,10 +242,10 @@ def setup_python(environ_cp):
     f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path)
 
 
-def reset_tf_configure_bazelrc(workspace_path):
+def reset_tf_configure_bazelrc():
   """Reset file that contains customized config settings."""
   open(_TF_BAZELRC, 'w').close()
-  bazelrc_path = os.path.join(workspace_path, '.bazelrc')
+  bazelrc_path = os.path.join(_TF_WORKSPACE_ROOT, '.bazelrc')
 
   data = []
   if os.path.exists(bazelrc_path):
@@ -1469,21 +1468,27 @@ def config_info_line(name, help_text):
 
 
 def main():
+  global _TF_WORKSPACE_ROOT
+  global _TF_BAZELRC
+
   parser = argparse.ArgumentParser()
   parser.add_argument(
       '--workspace',
       type=str,
-      default=_TF_WORKSPACE_ROOT,
+      default=os.path.abspath(os.path.dirname(__file__)),
       help='The absolute path to your active Bazel workspace.')
   args = parser.parse_args()
 
+  _TF_WORKSPACE_ROOT = args.workspace
+  _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
+
   # Make a copy of os.environ to be clear when functions and getting and setting
   # environment variables.
   environ_cp = dict(os.environ)
 
   check_bazel_version('0.15.0')
 
-  reset_tf_configure_bazelrc(args.workspace)
+  reset_tf_configure_bazelrc()
   cleanup_makefile()
   setup_python(environ_cp)
 
-- 
GitLab


From 9a2f872acd0c38d74d60e4f67701241aa1a26419 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 08:21:58 -0700
Subject: [PATCH 0941/1357] Move from deprecated self.test_session() to
 self.cached_session() or self.session().

* Move from self.test_session(graph=ops.Graph(), ...) to self.session(...) (semantically equivalent).
* Move from self.test_session() to self.cached_session(config=self.config) when run_in_graph_and_eager_modes(config=config) is set to be consistent between eager and non eager modes.

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 215216964
---
 tensorflow/contrib/distribute/python/values_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index ae3e134333..121d2fbb3f 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -641,7 +641,7 @@ class MirroredVariableTest(test.TestCase):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
 
-    with self.test_session() as sess:
+    with self.cached_session(config=self.config) as sess:
       v, devices, mirrored = _make_mirrored()
 
       # Overwrite the initial values.
@@ -744,7 +744,7 @@ class MirroredVariableTest(test.TestCase):
     if context.num_gpus() < 1 or context.executing_eagerly():
       self.skipTest("A GPU is not available for this test or it's eager mode.")
 
-    with self.test_session(
+    with self.session(
         graph=ops.Graph()) as sess, mirrored_strategy.MirroredStrategy(
             ["/device:GPU:0"]).scope():
       with ops.device("/device:GPU:0"):
@@ -827,7 +827,7 @@ class TowerLocalVariableTest(test.TestCase):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
 
-    with self.test_session() as sess:
+    with self.cached_session(config=self.config) as sess:
       v, tower_local = _make_tower_local(variable_scope.VariableAggregation.SUM)
 
       # Overwrite the initial values.
@@ -850,7 +850,7 @@ class TowerLocalVariableTest(test.TestCase):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
 
-    with self.test_session() as sess:
+    with self.cached_session(config=self.config) as sess:
       v, tower_local = _make_tower_local(
           variable_scope.VariableAggregation.MEAN)
 
-- 
GitLab


From e285dea8d9626b832f34d65159639f294c2d6881 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Mon, 1 Oct 2018 09:23:48 -0700
Subject: [PATCH 0942/1357] Update documentation. - Use absolute links instead
 of relative links. Relative links break when published on website. - Correct
 NNAPI abbreviation.

PiperOrigin-RevId: 215225415
---
 tensorflow/contrib/lite/g3doc/performance.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/g3doc/performance.md b/tensorflow/contrib/lite/g3doc/performance.md
index 0ae9400068..6b7943caf8 100644
--- a/tensorflow/contrib/lite/g3doc/performance.md
+++ b/tensorflow/contrib/lite/g3doc/performance.md
@@ -7,12 +7,12 @@ Mobile and embedded devices have limited computational resources and it is impor
 Some models may be too large to run on embedded devices. Instead of large models it is better to use a slightly less precise but smaller model for embedded devices. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices.
 
 You can retrain the listed models on your own dataset by using transfer learning. Check out our transfer learning tutorial for
-[image classification] (https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and
+[image classification](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and
  [object detection](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
 
 
 ## Profile your model
-Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](../tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
+Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
 
 ## Profile and optimize operators in the graph
 If a particular operator appears frequently in the model and based on profiling you find the operator consuming the most amount of time, you can look into optimizing the operator.
@@ -22,7 +22,7 @@ If a particular operator appears frequently in the model and based on profiling
 If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. Fully quantized models can be remarkably power efficient as well.
 
 ## Tweak the number of threads
-Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](../interpreter.h) threads.
+Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads.
 
 ## Eliminate redundant copies
 Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to [mmap a model file](https://github.com/tensorflow/tensorflow/blob/9982fd6c8831cbd2f58954f79ea71f26660393bc/tensorflow/contrib/lite/model.h#L152) and avoid copies. If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151).
@@ -31,8 +31,8 @@ Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to
 Platform specific tools like [Android profiler](https://developer.android.com/studio/profile/android-profiler) and [Instruments](https://help.apple.com/instruments/mac/current/) provide a wealth of profiling information that can be used to debug your app. Sometimes the performance bug may be not in the model but in parts of application code that interact with the model. Make sure to familiarize yourself with platform specific profiling tools and best practices for your platform.
 
 ## Use hardware accelerators available on the device
-Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/) on Android.
-You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable NNAPI call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance.
+Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [Neural Networks API](https://developer.android.com/ndk/guides/neuralnetworks/) on Android.
+You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable Neural Networks API call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance.
 
 ## Need more help
 The Tensorflow team is happy to help diagnose and address specific performance issues you may be facing. Please file a bug on [github](https://github.com/tensorflow/tensorflow/issues) with details of the issue.
-- 
GitLab


From 03a18ca576410d49e8f0692464e35e900a54f59f Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Mon, 1 Oct 2018 10:01:20 -0700
Subject: [PATCH 0943/1357] Remove outdated integration test in preparation for
 update of keras_preprocessing.

PiperOrigin-RevId: 215231309
---
 .../python/keras/preprocessing/image_test.py  | 37 -------------------
 1 file changed, 37 deletions(-)

diff --git a/tensorflow/python/keras/preprocessing/image_test.py b/tensorflow/python/keras/preprocessing/image_test.py
index 362cbc1dc9..4abaadfcd3 100644
--- a/tensorflow/python/keras/preprocessing/image_test.py
+++ b/tensorflow/python/keras/preprocessing/image_test.py
@@ -94,43 +94,6 @@ class TestImage(test.TestCase):
         self.assertEqual(x.shape[1:], images.shape[1:])
         break
 
-  def test_image_data_generator_with_validation_split(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    for test_images in _generate_test_images():
-      img_list = []
-      for im in test_images:
-        img_list.append(keras.preprocessing.image.img_to_array(im)[None, ...])
-
-      images = np.vstack(img_list)
-      generator = keras.preprocessing.image.ImageDataGenerator(
-          validation_split=0.5)
-      seq = generator.flow(
-          images,
-          np.arange(images.shape[0]),
-          shuffle=False,
-          batch_size=3,
-          subset='validation')
-      _, y = seq[0]
-      self.assertEqual(list(y), [0, 1, 2])
-      seq = generator.flow(
-          images,
-          np.arange(images.shape[0]),
-          shuffle=False,
-          batch_size=3,
-          subset='training')
-      _, y2 = seq[0]
-      self.assertEqual(list(y2), [4, 5, 6])
-
-      with self.assertRaises(ValueError):
-        generator.flow(
-            images,
-            np.arange(images.shape[0]),
-            shuffle=False,
-            batch_size=3,
-            subset='foo')
-
   def test_image_data_generator_with_split_value_error(self):
     with self.assertRaises(ValueError):
       keras.preprocessing.image.ImageDataGenerator(validation_split=5)
-- 
GitLab


From a5fc8b064884b926ade9f7973dc096c0677a14e0 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Mon, 1 Oct 2018 10:35:02 -0700
Subject: [PATCH 0944/1357] Name fusion parameters simply "param_X". Where "X"
 is the parameter number. Previously, fusion parameter names including the
 name of the original instruction which produced the value which was
 confusing.

PiperOrigin-RevId: 215238171
---
 .../compiler/xla/service/hlo_computation.cc   | 36 +++----------------
 .../compiler/xla/service/hlo_instructions.cc  |  3 +-
 2 files changed, 6 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 0e5920af7a..4613d6762e 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -122,30 +122,6 @@ HloInstruction* HloComputation::AddParameter(
   return instructions_.back().get();
 }
 
-namespace {
-
-// Returns the new name for a fusion parameter when we change its number.
-//
-// Fusion parameters are named foo.param_1, bar.param_2, etc. We are
-// renumbering the parameters, so replace the final number in the name with
-// the updated value.
-string RenameFusionParameter(const string& original_name, int64 new_param_no) {
-  const string param_underscore = ".param_";
-  size_t index = original_name.rfind(param_underscore);
-  if (index == string::npos) {
-    return original_name;
-  }
-  string after_param = original_name.substr(index + param_underscore.size());
-  int64 numeric_suffix;
-  if (absl::SimpleAtoi(after_param, &numeric_suffix)) {
-    return StrCat(original_name.substr(0, index + param_underscore.size()),
-                  new_param_no);
-  }
-  return original_name;
-}
-
-}  // namespace
-
 Status HloComputation::RemoveParameter(int64 param_no) {
   CHECK_GE(param_no, 0);
   CHECK_LT(param_no, param_instructions_.size());
@@ -158,11 +134,9 @@ Status HloComputation::RemoveParameter(int64 param_no) {
 
   while (param_no < param_instructions_.size()) {
     param_instruction = param_instructions_[param_no];
-    string param_name =
-        RenameFusionParameter(param_instruction->name(), param_no);
     HloInstruction* new_instr =
         AddInstructionInternal(HloInstruction::CreateParameter(
-            param_no, param_instruction->shape(), param_name));
+            param_no, param_instruction->shape(), StrCat("param_", param_no)));
     TF_RETURN_IF_ERROR(param_instruction->ReplaceAllUsesWith(new_instr));
     param_instructions_[param_no] = new_instr;
     TF_RETURN_IF_ERROR(RemoveInstruction(param_instruction));
@@ -186,11 +160,9 @@ Status HloComputation::RemoveUnusedParameters() {
 
     if (removed > 0) {
       const int64 param_no = i - removed;
-      string param_name =
-          RenameFusionParameter(param_instruction->name(), param_no);
-      HloInstruction* new_instr =
-          AddInstructionInternal(HloInstruction::CreateParameter(
-              param_no, param_instruction->shape(), param_name));
+      HloInstruction* new_instr = AddInstructionInternal(
+          HloInstruction::CreateParameter(param_no, param_instruction->shape(),
+                                          StrCat("param_", param_no)));
       TF_RETURN_IF_ERROR(param_instruction->ReplaceAllUsesWith(new_instr));
       param_instructions_[param_no] = new_instr;
       TF_RETURN_IF_ERROR(RemoveInstruction(param_instruction));
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index cd71bc3323..ad45a82941 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1042,7 +1042,8 @@ HloInstruction* HloFusionInstruction::AddFusionOperand(
   const int64 param_no = operand_count();
   // Name the parameter after the instruction it represents in the outer
   // (non-fusion) computation.
-  string param_name = StrCat(new_operand->name(), ".param_", param_no);
+  // string param_name = StrCat(new_operand->name(), ".param_", param_no);
+  string param_name = StrCat("param_", param_no);
   HloInstruction* fused_parameter =
       fused_instructions_computation()->AddParameter(
           HloInstruction::CreateParameter(param_no, new_operand->shape(),
-- 
GitLab


From a6478312ef296ba9684931135851e9c7bb460444 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 1 Oct 2018 10:36:07 -0700
Subject: [PATCH 0945/1357] Replace the tf.name_scope call with an internal
 context manager that can contain additional boilerplate later on.
 Unfortunately it could not be extended to include the error handling.

PiperOrigin-RevId: 215238369
---
 tensorflow/python/autograph/converters/BUILD  |  6 +--
 .../{name_scopes.py => function_scopes.py}    | 32 ++++++++-------
 ...scopes_test.py => function_scopes_test.py} | 40 +++++++++----------
 tensorflow/python/autograph/core/BUILD        | 12 ++++++
 .../autograph/core/converter_testing.py       |  2 +
 .../autograph/core/function_wrapping.py       | 30 ++++++++++++++
 .../autograph/core/function_wrapping_test.py  | 34 ++++++++++++++++
 .../python/autograph/impl/conversion.py       |  6 ++-
 8 files changed, 122 insertions(+), 40 deletions(-)
 rename tensorflow/python/autograph/converters/{name_scopes.py => function_scopes.py} (72%)
 rename tensorflow/python/autograph/converters/{name_scopes_test.py => function_scopes_test.py} (71%)
 create mode 100644 tensorflow/python/autograph/core/function_wrapping.py
 create mode 100644 tensorflow/python/autograph/core/function_wrapping_test.py

diff --git a/tensorflow/python/autograph/converters/BUILD b/tensorflow/python/autograph/converters/BUILD
index 7b029de8ed..f06dc78f0e 100644
--- a/tensorflow/python/autograph/converters/BUILD
+++ b/tensorflow/python/autograph/converters/BUILD
@@ -27,10 +27,10 @@ py_library(
         "decorators.py",
         "directives.py",
         "error_handlers.py",
+        "function_scopes.py",
         "list_comprehensions.py",
         "lists.py",
         "logical_expressions.py",
-        "name_scopes.py",
         "return_statements.py",
         "side_effect_guards.py",
         "slices.py",
@@ -157,8 +157,8 @@ py_test(
 )
 
 py_test(
-    name = "name_scopes_test",
-    srcs = ["name_scopes_test.py"],
+    name = "function_scopes_test",
+    srcs = ["function_scopes_test.py"],
     deps = [
         ":converters",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/autograph/converters/name_scopes.py b/tensorflow/python/autograph/converters/function_scopes.py
similarity index 72%
rename from tensorflow/python/autograph/converters/name_scopes.py
rename to tensorflow/python/autograph/converters/function_scopes.py
index a9c55ccff0..284b5b3519 100644
--- a/tensorflow/python/autograph/converters/name_scopes.py
+++ b/tensorflow/python/autograph/converters/function_scopes.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Wraps a function body with a `name_scope` of the function name."""
+"""Wraps the body of a converted function with auxiliary constructs."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -24,8 +24,8 @@ from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import templates
 
 
-class FunctionNameScopeTransformer(converter.Base):
-  """Wrap a function body with a `name_scope` of the function name."""
+class FunctionBodyTransformer(converter.Base):
+  """Wraps function bodies around autograph-specific boilerplate."""
 
   def _name_for_current_scope(self):
     innermost = self.enclosing_entities[-1]
@@ -49,26 +49,28 @@ class FunctionNameScopeTransformer(converter.Base):
   def visit_FunctionDef(self, node):
     node = self.generic_visit(node)
 
-    unscoped_body = []
-    scoped_body = node.body
-    if scoped_body:
-      first = scoped_body[0]
-      if isinstance(first, gast.Expr) and isinstance(first.value, gast.Str):
-        # Skip any docstring.
-        unscoped_body = scoped_body[:1]
-        scoped_body = scoped_body[1:]
+    final_body = []
+    indented_body = node.body
+    if node.body:
+      first_statement = node.body[0]
+      # Skip the docstring, if any.
+      if (isinstance(first_statement, gast.Expr) and
+          isinstance(first_statement.value, gast.Str)):
+        indented_body = indented_body[1:]
+        final_body.append(first_statement)
 
     template = """
-      with tf.name_scope(scope_name):
+      with ag__.function_scope(scope_name):
         body
     """
     scoped_body = templates.replace(
         template,
         scope_name=gast.Str(self._name_for_current_scope()),
-        body=scoped_body)
-    node.body = unscoped_body + scoped_body
+        body=indented_body)
+    final_body.extend(scoped_body)
+    node.body = final_body
     return node
 
 
 def transform(node, ctx):
-  return FunctionNameScopeTransformer(ctx).visit(node)
+  return FunctionBodyTransformer(ctx).visit(node)
diff --git a/tensorflow/python/autograph/converters/name_scopes_test.py b/tensorflow/python/autograph/converters/function_scopes_test.py
similarity index 71%
rename from tensorflow/python/autograph/converters/name_scopes_test.py
rename to tensorflow/python/autograph/converters/function_scopes_test.py
index 73933c1c4f..e5ce03a109 100644
--- a/tensorflow/python/autograph/converters/name_scopes_test.py
+++ b/tensorflow/python/autograph/converters/function_scopes_test.py
@@ -12,51 +12,51 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for for_canonicalization module."""
+"""Tests for function_scopes module."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.autograph.converters import name_scopes
+from tensorflow.python.autograph.converters import function_scopes
 from tensorflow.python.autograph.core import converter_testing
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
-class FunctionNameScopeTransformer(converter_testing.TestCase):
+class FunctionBodyTransformerTest(converter_testing.TestCase):
 
   def test_basic(self):
 
     def test_fn(l):
-      """This should stay here."""
+      """Docstring."""
       a = 1
       l += a
       return l
 
-    with self.converted(test_fn, name_scopes, {}, ops.name_scope) as result:
+    with self.converted(test_fn, function_scopes, {}) as result:
       result_op = result.test_fn(constant_op.constant(1))
       self.assertIn('test_fn/', result_op.op.name)
-      self.assertEqual('This should stay here.', result.test_fn.__doc__)
+      self.assertEqual('Docstring.', result.test_fn.__doc__)
 
-  def test_long_docstring(self):
+  def test_multiline_docstring(self):
 
-    def test_fn(l):
-      """Multi-line docstring.
+    tf = None
+
+    def test_fn():
+      """First sentence.
 
-      Args:
-        l: A thing.
-      Returns:
-        l
+      Second sentence.
       """
-      return l + 1
+      return tf.constant(1)
 
-    with self.converted(test_fn, name_scopes, {}, ops.name_scope) as result:
-      result_op = result.test_fn(constant_op.constant(1))
+    with self.converted(test_fn, function_scopes, {},
+                        constant_op.constant) as result:
+      result_op = result.test_fn()
       self.assertIn('test_fn/', result_op.op.name)
-      self.assertIn('Multi-line docstring.', result.test_fn.__doc__)
-      self.assertIn('Returns:', result.test_fn.__doc__)
+      self.assertIn('First sentence.', result.test_fn.__doc__)
+      self.assertIn('Second sentence.', result.test_fn.__doc__)
 
   def test_nested_functions(self):
 
@@ -68,7 +68,7 @@ class FunctionNameScopeTransformer(converter_testing.TestCase):
       l += 1
       return l, inner_fn(l)
 
-    with self.converted(test_fn, name_scopes, {}, ops.name_scope) as result:
+    with self.converted(test_fn, function_scopes, {}, ops.name_scope) as result:
       first, second = result.test_fn(constant_op.constant(1))
       self.assertIn('test_fn/', first.op.name)
       self.assertNotIn('inner_fn', first.op.name)
@@ -88,7 +88,7 @@ class FunctionNameScopeTransformer(converter_testing.TestCase):
 
     ns = {'TestClass': TestClass}
     node, ctx = self.prepare(TestClass, ns, owner_type=TestClass)
-    node = name_scopes.transform(node, ctx)
+    node = function_scopes.transform(node, ctx)
 
     with self.compiled(node, {}, ops.name_scope) as result:
       first, second = result.TestClass().test_fn(constant_op.constant(1))
diff --git a/tensorflow/python/autograph/core/BUILD b/tensorflow/python/autograph/core/BUILD
index 85fecf084d..843e381f31 100644
--- a/tensorflow/python/autograph/core/BUILD
+++ b/tensorflow/python/autograph/core/BUILD
@@ -20,11 +20,13 @@ py_library(
         "config.py",
         "converter.py",
         "errors.py",
+        "function_wrapping.py",
         "naming.py",
     ],
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
+        "//tensorflow/python:framework_ops",
         "//tensorflow/python/autograph/pyct",
         "//tensorflow/python/autograph/pyct/static_analysis",
         "//tensorflow/python/autograph/utils",
@@ -46,6 +48,16 @@ py_test(
     ],
 )
 
+py_test(
+    name = "function_wrapping_test",
+    srcs = ["function_wrapping_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":core",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 py_test(
     name = "naming_test",
     srcs = ["naming_test.py"],
diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py
index 7ce1b7c4c5..dc2d419d34 100644
--- a/tensorflow/python/autograph/core/converter_testing.py
+++ b/tensorflow/python/autograph/core/converter_testing.py
@@ -29,6 +29,7 @@ from tensorflow.python.autograph import utils
 from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import errors
+from tensorflow.python.autograph.core import function_wrapping
 from tensorflow.python.autograph.pyct import compiler
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import pretty_printer
@@ -112,6 +113,7 @@ class TestCase(test.TestCase):
       fake_ag.__dict__['utils'] = utils
       fake_ag.__dict__['rewrite_graph_construction_error'] = (
           errors.rewrite_graph_construction_error)
+      fake_ag.__dict__['function_scope'] = function_wrapping.function_scope
       result.__dict__['ag__'] = fake_ag
       for k, v in namespace.items():
         result.__dict__[k] = v
diff --git a/tensorflow/python/autograph/core/function_wrapping.py b/tensorflow/python/autograph/core/function_wrapping.py
new file mode 100644
index 0000000000..21b66eff02
--- /dev/null
+++ b/tensorflow/python/autograph/core/function_wrapping.py
@@ -0,0 +1,30 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Support for wrapping converted functions bodies with auxiliary logic."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+
+from tensorflow.python.framework import ops
+
+
+@contextlib.contextmanager
+def function_scope(function_name):
+  """Returns a context manager for the converted body of a function."""
+  with ops.name_scope(function_name):
+    yield
diff --git a/tensorflow/python/autograph/core/function_wrapping_test.py b/tensorflow/python/autograph/core/function_wrapping_test.py
new file mode 100644
index 0000000000..5e217055c7
--- /dev/null
+++ b/tensorflow/python/autograph/core/function_wrapping_test.py
@@ -0,0 +1,34 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for function_wrapping module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.autograph.core import function_wrapping
+from tensorflow.python.framework import constant_op
+from tensorflow.python.platform import test
+
+
+class FunctionWrappingTest(test.TestCase):
+
+  def test_function_scope_name(self):
+    with function_wrapping.function_scope('test_name'):
+      t = constant_op.constant(1)
+    self.assertIn('test_name', t.name)
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index a0d13c82a8..52abd40626 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -34,15 +34,16 @@ from tensorflow.python.autograph.converters import control_flow
 from tensorflow.python.autograph.converters import decorators
 from tensorflow.python.autograph.converters import directives
 from tensorflow.python.autograph.converters import error_handlers
+from tensorflow.python.autograph.converters import function_scopes
 from tensorflow.python.autograph.converters import lists
 from tensorflow.python.autograph.converters import logical_expressions
-from tensorflow.python.autograph.converters import name_scopes
 from tensorflow.python.autograph.converters import return_statements
 from tensorflow.python.autograph.converters import side_effect_guards
 from tensorflow.python.autograph.converters import slices
 from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import errors
+from tensorflow.python.autograph.core import function_wrapping
 from tensorflow.python.autograph.pyct import ast_util
 from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.pyct import origin_info
@@ -257,6 +258,7 @@ def _add_self_references(namespace, autograph_module):
     ag_internal.converted_call = autograph_module.converted_call
     ag_internal.ConversionOptions = autograph_module.ConversionOptions
     ag_internal.utils = utils
+    ag_internal.function_scope = function_wrapping.function_scope
     ag_internal.rewrite_graph_construction_error = (
         errors.rewrite_graph_construction_error)
     # TODO(mdan): Add safeguards against name clashes.
@@ -346,7 +348,7 @@ def node_to_graph(node, context, rewrite_errors=True):
   node = converter.apply_(node, context, conditional_expressions)
   node = converter.apply_(node, context, logical_expressions)
   node = converter.apply_(node, context, side_effect_guards)
-  node = converter.apply_(node, context, name_scopes)
+  node = converter.apply_(node, context, function_scopes)
   if rewrite_errors:
     node = converter.apply_(node, context, error_handlers)
   return node
-- 
GitLab


From 57a831d20929e71279d164905fed93e1f518ee37 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 10:41:58 -0700
Subject: [PATCH 0946/1357] Bugfix: When a subgraph is encapsulated and
 replaced by XlaLaunch op, the requested device placement of the XlaLaunch op
 must be derived from the subgraph. PiperOrigin-RevId: 215239672

---
 tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc    | 6 ++++++
 .../compiler/jit/encapsulate_xla_computations_pass.cc    | 2 ++
 .../jit/encapsulate_xla_computations_pass_test.cc        | 9 ++++++---
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index e0632ff7e4..15faf31077 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -748,6 +748,12 @@ Node* Encapsulator::Subgraph::MakeNodeImage(const Graph* graph_in, Node* node) {
     graph_->set_versions(graph_in->versions());
   }
 
+  // TODO(b/116981129): Enhance how the device for the encapsulated subgraph is
+  // determined. In case of hard placement, ensure all the encapsulated nodes
+  // have the same requested device, which in turn will be the requested device
+  // for the entire encapsulated subgraph. In case of soft placement, use a
+  // deterministic approach to fill in the requested device. Handle co-location
+  // constraints similarly if they exist.
   if (device_.empty()) {
     device_ = node->assigned_device_name().empty()
                   ? node->requested_device()
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index 97ef8cd3cb..755c364c62 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -297,7 +297,9 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Target the XLA CPU/GPU backends.
     VLOG(2) << "Replacing with XlaLaunch";
+    VLOG(2) << "Device is " << launch->requested_device();
     def.set_op("XlaLaunch");
+    def.set_device(launch->requested_device());
     AddNodeAttr("Tconstants", DataTypeVector{}, &def);
     AddNodeAttr("Targs", arg_types, &def);
     AddNodeAttr("Nresources", num_variables, &def);
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
index f643fb0cfe..479038ac8e 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
@@ -55,6 +55,7 @@ static std::unique_ptr<Graph> MakeOuterGraph(
           .Input(u.node()->name(), 0, DT_RESOURCE)
           .Input(v.node()->name(), 0, DT_RESOURCE)
           .Input(w.node()->name(), 0, DT_RESOURCE)
+          .Device("/gpu:0")
           .Attr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0")
           .Attr("_variable_start_index", 4)
           .Finalize(&def));
@@ -107,10 +108,11 @@ static std::unique_ptr<Graph> MakeBodyGraph() {
 
   auto add_attrs = [](Node* node) {
     node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+    node->set_requested_device("/gpu:0");
   };
 
   auto b_identity = ops::Identity(scope.WithOpName("B_identity"), arg1);
-
+  add_attrs(b_identity.node());
   auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), arg4, DT_FLOAT);
   add_attrs(read_u.node());
   auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), arg5, DT_FLOAT);
@@ -215,6 +217,7 @@ TEST(EncapsulateXlaComputations, Encapsulate) {
 
     auto add_attrs = [](Node* node) {
       node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+      node->set_requested_device("/gpu:0");
     };
 
     auto b_identity = ops::Identity(scope.WithOpName("B_identity"), b);
@@ -317,8 +320,8 @@ TEST(EncapsulateXlaComputations, BuildXlaLaunchOp) {
   NameAttrList function;
   function.set_name("launch0");
   auto launch = ops::XlaLaunch(
-      scope.WithOpName("launch0"), std::initializer_list<Input>{},
-      std::initializer_list<Input>{a, b, c, d},
+      scope.WithOpName("launch0").WithDevice("/gpu:0"),
+      std::initializer_list<Input>{}, std::initializer_list<Input>{a, b, c, d},
       std::initializer_list<Input>{u, v, w},
       DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}, function);
 
-- 
GitLab


From ec2b5f889fb3eb677f7b8198cbd8d505b2779fa7 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 10:42:14 -0700
Subject: [PATCH 0947/1357] Automated rollback of commit
 5f822d694af6e4aa57fe8a426032a91dc61e30d6

PiperOrigin-RevId: 215239710
---
 tensorflow/contrib/factorization/BUILD           |  9 +--------
 .../contrib/factorization/python/ops/gmm_ops.py  | 14 +++++++-------
 .../factorization/python/ops/wals_test.py        | 16 ++++++++--------
 tensorflow/contrib/opt/BUILD                     |  5 -----
 .../contrib/timeseries/python/timeseries/BUILD   |  7 +------
 5 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index 510f292508..e344d7a23b 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -154,8 +154,6 @@ tf_py_test(
     ],
     tags = [
         "no_pip",  # b/38283730
-        "noasan",  # b/116875897
-        "nomsan",
         "notsan",  # Flaky: b/30756419
     ],
 )
@@ -179,11 +177,7 @@ tf_py_test(
         "//tensorflow/python:random_seed",
         "//tensorflow/python:variables",
     ],
-    tags = [
-        "noasan",  # b/116875897
-        "nomsan",
-        "notsan",  # b/62863147
-    ],
+    tags = ["notsan"],  # b/62863147
 )
 
 py_library(
@@ -282,7 +276,6 @@ tf_py_test(
         "manual",
         "noasan",  # times out b/63678675
         "nomsan",
-        "notsan",  # b/116875897
     ],
 )
 
diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
index e076631bc1..d365ad1117 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
@@ -154,10 +154,10 @@ class GmmAlgorithm(object):
   def _create_variables(self):
     """Initializes GMM algorithm."""
     init_value = array_ops.constant([], dtype=dtypes.float32)
-    self._means = variables.Variable(init_value,
-                                     name=self.CLUSTERS_VARIABLE,
-                                     validate_shape=False)
-    self._covs = variables.Variable(
+    self._means = variables.VariableV1(init_value,
+                                       name=self.CLUSTERS_VARIABLE,
+                                       validate_shape=False)
+    self._covs = variables.VariableV1(
         init_value, name=self.CLUSTERS_COVS_VARIABLE, validate_shape=False)
     # Mixture weights, representing the probability that a randomly
     # selected unobservable data (in EM terms) was generated by component k.
@@ -165,9 +165,9 @@ class GmmAlgorithm(object):
         array_ops.tile([1.0 / self._num_classes], [self._num_classes]),
         name=self.CLUSTERS_WEIGHT,
         validate_shape=False)
-    self._cluster_centers_initialized = variables.Variable(False,
-                                                           dtype=dtypes.bool,
-                                                           name='initialized')
+    self._cluster_centers_initialized = variables.VariableV1(False,
+                                                             dtype=dtypes.bool,
+                                                             name='initialized')
 
   def _initialize_variables(self, data, initial_means=None):
     """Initializes variables.
diff --git a/tensorflow/contrib/factorization/python/ops/wals_test.py b/tensorflow/contrib/factorization/python/ops/wals_test.py
index 9bdbd05015..75d577f429 100644
--- a/tensorflow/contrib/factorization/python/ops/wals_test.py
+++ b/tensorflow/contrib/factorization/python/ops/wals_test.py
@@ -420,13 +420,13 @@ class WALSMatrixFactorizationUnsupportedTest(test.TestCase):
 class SweepHookTest(test.TestCase):
 
   def test_sweeps(self):
-    is_row_sweep_var = variables.Variable(True)
-    is_sweep_done_var = variables.Variable(False)
-    init_done = variables.Variable(False)
-    row_prep_done = variables.Variable(False)
-    col_prep_done = variables.Variable(False)
-    row_train_done = variables.Variable(False)
-    col_train_done = variables.Variable(False)
+    is_row_sweep_var = variables.VariableV1(True)
+    is_sweep_done_var = variables.VariableV1(False)
+    init_done = variables.VariableV1(False)
+    row_prep_done = variables.VariableV1(False)
+    col_prep_done = variables.VariableV1(False)
+    row_train_done = variables.VariableV1(False)
+    col_train_done = variables.VariableV1(False)
 
     init_op = state_ops.assign(init_done, True)
     row_prep_op = state_ops.assign(row_prep_done, True)
@@ -486,7 +486,7 @@ class StopAtSweepHookTest(test.TestCase):
 
   def test_stop(self):
     hook = wals_lib._StopAtSweepHook(last_sweep=10)
-    completed_sweeps = variables.Variable(
+    completed_sweeps = variables.VariableV1(
         8, name=wals_lib.WALSMatrixFactorization.COMPLETED_SWEEPS)
     train_op = state_ops.assign_add(completed_sweeps, 1)
     hook.begin()
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 6a67c6295d..f4ac70eb1a 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -377,11 +377,6 @@ py_test(
     size = "large",
     srcs = ["python/training/shampoo_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "noasan",  # b/116875897
-        "nomsan",
-        "notsan",
-    ],
     deps = [
         ":opt_py",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD
index cb1f707028..c230919168 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/BUILD
+++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD
@@ -159,12 +159,7 @@ py_test(
     ],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = [
-        "no_pip_gpu",  # b/63391119
-        "noasan",  # b/116875897
-        "nomsan",
-        "notsan",
-    ],
+    tags = ["no_pip_gpu"],  # b/63391119
     deps = [
         ":estimators",
         ":feature_keys",
-- 
GitLab


From ce1cdd52eda4b40ff8fb8c09bc178210883b3773 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 1 Oct 2018 10:57:32 -0700
Subject: [PATCH 0948/1357] Make GCS filesystem/metadata lookup retries
 configurable

PiperOrigin-RevId: 215243030
---
 .../cloud/compute_engine_metadata_client.cc   |   15 +-
 .../cloud/compute_engine_metadata_client.h    |   10 +-
 .../compute_engine_metadata_client_test.cc    |    6 +-
 .../compute_engine_zone_provider_test.cc      |    8 +-
 .../core/platform/cloud/gcs_file_system.cc    |   25 +-
 .../core/platform/cloud/gcs_file_system.h     |    7 +-
 .../platform/cloud/gcs_file_system_test.cc    | 1286 +++++++++--------
 .../cloud/google_auth_provider_test.cc        |   20 +-
 .../platform/cloud/retrying_file_system.h     |   67 +-
 .../cloud/retrying_file_system_test.cc        |  102 +-
 .../core/platform/cloud/retrying_utils.cc     |   35 +-
 .../core/platform/cloud/retrying_utils.h      |   29 +-
 .../platform/cloud/retrying_utils_test.cc     |   32 +-
 13 files changed, 849 insertions(+), 793 deletions(-)

diff --git a/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc b/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc
index f41b83ac34..affb68ebbb 100644
--- a/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <utility>
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
-#include "tensorflow/core/platform/cloud/retrying_utils.h"
 
 namespace tensorflow {
 
@@ -25,21 +24,14 @@ namespace {
 
 // The URL to retrieve metadata when running in Google Compute Engine.
 constexpr char kGceMetadataBaseUrl[] = "http://metadata/computeMetadata/v1/";
-// The default initial delay between retries with exponential backoff.
-constexpr int kInitialRetryDelayUsec = 500000;  // 0.5 sec
 
 }  // namespace
 
-ComputeEngineMetadataClient::ComputeEngineMetadataClient(
-    std::shared_ptr<HttpRequest::Factory> http_request_factory)
-    : ComputeEngineMetadataClient(std::move(http_request_factory),
-                                  kInitialRetryDelayUsec) {}
-
 ComputeEngineMetadataClient::ComputeEngineMetadataClient(
     std::shared_ptr<HttpRequest::Factory> http_request_factory,
-    int64 initial_retry_delay_usec)
+    const RetryConfig& config)
     : http_request_factory_(std::move(http_request_factory)),
-      initial_retry_delay_usec_(initial_retry_delay_usec) {}
+      retry_config_(config) {}
 
 Status ComputeEngineMetadataClient::GetMetadata(
     const string& path, std::vector<char>* response_buffer) {
@@ -52,8 +44,7 @@ Status ComputeEngineMetadataClient::GetMetadata(
     return Status::OK();
   };
 
-  return RetryingUtils::CallWithRetries(get_metadata_from_gce,
-                                        initial_retry_delay_usec_);
+  return RetryingUtils::CallWithRetries(get_metadata_from_gce, retry_config_);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/compute_engine_metadata_client.h b/tensorflow/core/platform/cloud/compute_engine_metadata_client.h
index 534ccf30b2..7f060327da 100644
--- a/tensorflow/core/platform/cloud/compute_engine_metadata_client.h
+++ b/tensorflow/core/platform/cloud/compute_engine_metadata_client.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/cloud/http_request.h"
+#include "tensorflow/core/platform/cloud/retrying_utils.h"
 
 namespace tensorflow {
 
@@ -31,10 +32,11 @@ namespace tensorflow {
 class ComputeEngineMetadataClient {
  public:
   explicit ComputeEngineMetadataClient(
-      std::shared_ptr<HttpRequest::Factory> http_request_factory);
-  ComputeEngineMetadataClient(
       std::shared_ptr<HttpRequest::Factory> http_request_factory,
-      int64 initial_retry_delay_usec);
+      const RetryConfig& config = RetryConfig(
+          10000,  /* init_delay_time_us = 1 ms */
+          1000000 /* max_delay_time_us = 1 s */
+          ));
   virtual ~ComputeEngineMetadataClient() {}
 
   /// \brief Get the metadata value for a given attribute of the metadata
@@ -54,7 +56,7 @@ class ComputeEngineMetadataClient {
 
  private:
   std::shared_ptr<HttpRequest::Factory> http_request_factory_;
-  const int64 initial_retry_delay_usec_;
+  const RetryConfig retry_config_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(ComputeEngineMetadataClient);
 };
diff --git a/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc b/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc
index 4c41ccaa0e..e891b4a5e9 100644
--- a/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc
@@ -30,7 +30,8 @@ TEST(ComputeEngineMetadataClientTest, GetMetadata) {
 
   std::shared_ptr<HttpRequest::Factory> http_factory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  ComputeEngineMetadataClient client(http_factory, 0);
+  ComputeEngineMetadataClient client(http_factory,
+                                     RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<char> result;
   TF_EXPECT_OK(
@@ -56,7 +57,8 @@ TEST(ComputeEngineMetadataClientTest, RetryOnFailure) {
 
   std::shared_ptr<HttpRequest::Factory> http_factory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  ComputeEngineMetadataClient client(http_factory, 0);
+  ComputeEngineMetadataClient client(http_factory,
+                                     RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<char> result;
   TF_EXPECT_OK(
diff --git a/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc b/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc
index f7477eca23..476e4f9c1f 100644
--- a/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc
@@ -34,8 +34,8 @@ TEST_F(ComputeEngineZoneProviderTest, GetZone) {
 
   auto httpRequestFactory = std::make_shared<FakeHttpRequestFactory>(&requests);
 
-  auto metadata_client =
-      std::make_shared<ComputeEngineMetadataClient>(httpRequestFactory, 0);
+  auto metadata_client = std::make_shared<ComputeEngineMetadataClient>(
+      httpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
 
   ComputeEngineZoneProvider provider(metadata_client);
 
@@ -55,8 +55,8 @@ TEST_F(ComputeEngineZoneProviderTest, InvalidZoneString) {
 
   auto httpRequestFactory = std::make_shared<FakeHttpRequestFactory>(&requests);
 
-  auto metadata_client =
-      std::make_shared<ComputeEngineMetadataClient>(httpRequestFactory, 0);
+  auto metadata_client = std::make_shared<ComputeEngineMetadataClient>(
+      httpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
 
   ComputeEngineZoneProvider provider(metadata_client);
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 83ea8539ed..c61b68aeeb 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -333,14 +333,14 @@ class GcsWritableFile : public WritableFile {
                   GcsFileSystem* filesystem,
                   GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
-                  int64 initial_retry_delay_usec)
+                  RetryConfig retry_config)
       : bucket_(bucket),
         object_(object),
         filesystem_(filesystem),
         timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
-        initial_retry_delay_usec_(initial_retry_delay_usec) {
+        retry_config_(retry_config) {
     // TODO: to make it safer, outfile_ should be constructed from an FD
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
@@ -357,14 +357,14 @@ class GcsWritableFile : public WritableFile {
                   GcsFileSystem* filesystem, const string& tmp_content_filename,
                   GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
-                  int64 initial_retry_delay_usec)
+                  RetryConfig retry_config)
       : bucket_(bucket),
         object_(object),
         filesystem_(filesystem),
         timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
-        initial_retry_delay_usec_(initial_retry_delay_usec) {
+        retry_config_(retry_config) {
     tmp_content_filename_ = tmp_content_filename;
     outfile_.open(tmp_content_filename_,
                   std::ofstream::binary | std::ofstream::app);
@@ -441,7 +441,7 @@ class GcsWritableFile : public WritableFile {
           first_attempt = false;
           return UploadToSession(session_uri, already_uploaded);
         },
-        initial_retry_delay_usec_);
+        retry_config_);
     if (upload_status.code() == errors::Code::NOT_FOUND) {
       // GCS docs recommend retrying the whole upload. We're relying on the
       // RetryingFileSystem to retry the Sync() call.
@@ -586,7 +586,7 @@ class GcsWritableFile : public WritableFile {
   GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
-  int64 initial_retry_delay_usec_;
+  RetryConfig retry_config_;
 };
 
 class GcsReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
@@ -791,7 +791,7 @@ GcsFileSystem::GcsFileSystem(
     std::unique_ptr<ZoneProvider> zone_provider, size_t block_size,
     size_t max_bytes, uint64 max_staleness, uint64 stat_cache_max_age,
     size_t stat_cache_max_entries, uint64 matching_paths_cache_max_age,
-    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec,
+    size_t matching_paths_cache_max_entries, RetryConfig retry_config,
     TimeoutConfig timeouts, const std::unordered_set<string>& allowed_locations,
     std::pair<const string, const string>* additional_header)
     : auth_provider_(std::move(auth_provider)),
@@ -806,7 +806,7 @@ GcsFileSystem::GcsFileSystem(
           kCacheNeverExpire, kBucketLocationCacheMaxEntries)),
       allowed_locations_(allowed_locations),
       timeouts_(timeouts),
-      initial_retry_delay_usec_(initial_retry_delay_usec),
+      retry_config_(retry_config),
       additional_header_(additional_header) {}
 
 Status GcsFileSystem::NewRandomAccessFile(
@@ -941,7 +941,7 @@ Status GcsFileSystem::NewWritableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(bucket, object, this, &timeouts_,
                                     [this, fname]() { ClearFileCaches(fname); },
-                                    initial_retry_delay_usec_));
+                                    retry_config_));
   return Status::OK();
 }
 
@@ -981,7 +981,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, this, old_content_filename, &timeouts_,
-      [this, fname]() { ClearFileCaches(fname); }, initial_retry_delay_usec_));
+      [this, fname]() { ClearFileCaches(fname); }, retry_config_));
   return Status::OK();
 }
 
@@ -1534,7 +1534,7 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
   // on the server side, we can't just retry the whole RenameFile operation
   // because the source object is already gone.
   return RetryingUtils::DeleteWithRetries(
-      [this, &src]() { return DeleteFile(src); }, initial_retry_delay_usec_);
+      [this, &src]() { return DeleteFile(src); }, retry_config_);
 }
 
 Status GcsFileSystem::IsDirectory(const string& fname) {
@@ -1590,8 +1590,7 @@ Status GcsFileSystem::DeleteRecursively(const string& dirname,
     // and therefore RetryingFileSystem won't pay attention to the failures,
     // we need to make sure these failures are properly retried.
     const auto& delete_file_status = RetryingUtils::DeleteWithRetries(
-        [this, &full_path]() { return DeleteFile(full_path); },
-        initial_retry_delay_usec_);
+        [this, &full_path]() { return DeleteFile(full_path); }, retry_config_);
     if (!delete_file_status.ok()) {
       if (IsDirectory(full_path).ok()) {
         // The object is a directory marker.
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 71db707687..d0840a3046 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -93,7 +93,7 @@ class GcsFileSystem : public FileSystem {
                 uint64 stat_cache_max_age, size_t stat_cache_max_entries,
                 uint64 matching_paths_cache_max_age,
                 size_t matching_paths_cache_max_entries,
-                int64 initial_retry_delay_usec, TimeoutConfig timeouts,
+                RetryConfig retry_config, TimeoutConfig timeouts,
                 const std::unordered_set<string>& allowed_locations,
                 std::pair<const string, const string>* additional_header);
 
@@ -332,7 +332,7 @@ class GcsFileSystem : public FileSystem {
   GcsStatsInterface* stats_ = nullptr;  // Not owned.
 
   /// The initial delay for exponential backoffs when retrying failed calls.
-  const int64 initial_retry_delay_usec_ = 1000000L;
+  RetryConfig retry_config_;
 
   // Additional header material to be transmitted with all GCS requests
   std::unique_ptr<std::pair<const string, const string>> additional_header_;
@@ -344,7 +344,8 @@ class GcsFileSystem : public FileSystem {
 class RetryingGcsFileSystem : public RetryingFileSystem<GcsFileSystem> {
  public:
   RetryingGcsFileSystem()
-      : RetryingFileSystem(std::unique_ptr<GcsFileSystem>(new GcsFileSystem)) {}
+      : RetryingFileSystem(std::unique_ptr<GcsFileSystem>(new GcsFileSystem),
+                           RetryConfig(100000 /* init_delay_time_us */)) {}
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 14376ad339..702802b185 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -24,6 +24,8 @@ namespace tensorflow {
 namespace {
 
 static GcsFileSystem::TimeoutConfig kTestTimeoutConfig(5, 1, 10, 20, 30);
+static RetryConfig kTestRetryConfig(0 /* init_delay_time_us */);
+
 // Default (empty) constraint config
 static std::unordered_set<string>* kAllowedLocationsDefault =
     new std::unordered_set<string>();
@@ -62,16 +64,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
            "Range: 6-11\n"
            "Timeouts: 5 1 20\n",
            "6789")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -108,9 +110,9 @@ TEST(GcsFileSystemTest,
                    0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig,
-                   *kAllowedLocationsAuto, nullptr /* gcs additional header */);
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsAuto,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -150,9 +152,9 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithLocationConstraintCaching) {
                    0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig,
-                   *kAllowedLocationsAuto, nullptr /* gcs additional header */);
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsAuto,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
 
@@ -191,9 +193,9 @@ TEST(GcsFileSystemTest,
                    0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig,
-                   *kAllowedLocationsAuto, nullptr /* gcs additional header */);
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsAuto,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(tensorflow::errors::FailedPrecondition(
@@ -216,16 +218,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_DifferentN) {
            "Range: 3-12\n"
            "Timeouts: 5 1 20\n",
            "3456789")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -283,7 +285,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 9 /* block size */,
       18 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -372,7 +374,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 9 /* block size */,
       18 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -414,17 +416,17 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
                            "Range: 8-15\n"
                            "Timeouts: 5 1 20\n",
                            "89abcdef")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 8 /* block size */,
-      16 /* max bytes */, 3600 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   8 /* block size */, 16 /* max bytes */,
+                   3600 /* max staleness */, 3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
   char scratch[100];
   StringPiece result;
   // There should only be two HTTP requests issued to GCS even though we iterate
@@ -492,7 +494,7 @@ TEST(GcsFileSystemTest,
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 9 /* block size */,
       18 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -513,17 +515,17 @@ TEST(GcsFileSystemTest,
 
 TEST(GcsFileSystemTest, NewRandomAccessFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
-      0 /* read ahead bytes */, 0 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* read ahead bytes */, 0 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -547,16 +549,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_InconsistentRead) {
            "012")});
 
   // Set stat_cache_max_age to 1000s so that StatCache could work.
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 1e3 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   1e3 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   // Stat the file first so that the file stats are cached.
   FileStatistics stat;
@@ -621,7 +623,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 8 /* block size */,
       8 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -703,16 +705,16 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
                            "Timeouts: 5 1 30\n"
                            "Put body: t2\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -773,17 +775,17 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
            "Range: 0-7\n"
            "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 8 /* block size */,
-      8 /* max bytes */, 3600 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   8 /* block size */, 8 /* max bytes */,
+                   3600 /* max staleness */, 3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
   // Pull the file's first block into the cache. This will trigger the first
   // HTTP request to GCS.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -867,9 +869,9 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 2 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+      0 /* matching paths cache max entries */,
+      RetryConfig(2 /* .init_delay_time_us */), kTestTimeoutConfig,
+      *kAllowedLocationsDefault, nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -918,16 +920,16 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
                            "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -948,16 +950,16 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
 
 TEST(GcsFileSystemTest, NewWritableFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1013,7 +1015,7 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 32 /* block size */,
       32 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1041,16 +1043,16 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
 
 TEST(GcsFileSystemTest, NewAppendableFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1075,16 +1077,16 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                            "Range: 0-",
                            content.size() - 1, "\n", "Timeouts: 5 1 20\n"),
            content)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
@@ -1096,16 +1098,16 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
 
 TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1120,16 +1122,16 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt"));
 }
@@ -1150,16 +1152,16 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
            "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder"));
 }
@@ -1176,16 +1178,16 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket1"));
   TF_EXPECT_OK(fs.FileExists("gs://bucket1/"));
@@ -1206,16 +1208,16 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{\"items\": []}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(errors::Code::NOT_FOUND,
             fs.FileExists("gs://bucket/path/file1.txt").code());
@@ -1233,19 +1235,19 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
-  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
-            fs.FileExists("gs://bucket2/").code());
-  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
+  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
+            fs.FileExists("gs://bucket2/").code());
+  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.FileExists("gs://bucket2").code());
 }
 
@@ -1279,7 +1281,7 @@ TEST(GcsFileSystemTest, FileExists_StatCache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1306,7 +1308,7 @@ TEST(GcsFileSystemTest, FileExists_DirectoryMark) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1322,16 +1324,16 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1350,16 +1352,16 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
       "\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1379,16 +1381,16 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }],"
       "\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1407,16 +1409,16 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
       "\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1432,16 +1434,16 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", &children));
@@ -1457,16 +1459,16 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1498,16 +1500,16 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "  { \"name\": \"path/file4.txt\" },"
            "  { \"name\": \"path/file5.txt\" }]}")});
 
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1525,16 +1527,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
       "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/subpath/file2.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(
@@ -1553,16 +1555,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result));
@@ -1582,16 +1584,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", &result));
@@ -1608,16 +1610,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result));
@@ -1634,16 +1636,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", &result));
@@ -1652,16 +1654,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
 
 TEST(GcsFileSystemTest, GetMatchingPaths_OnlyWildcard) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1686,16 +1688,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/file2.txt\" },"
            "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 3600 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   3600 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   // Repeated calls to fs.GetMatchingPaths on these patterns should not lead to
   // any additional HTTP requests to GCS.
@@ -1729,16 +1731,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache_Flush) {
            "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subpath/file2.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 3600 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   3600 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   // This loop should trigger the first HTTP request to GCS.
   for (int i = 0; i < 10; i++) {
@@ -1800,7 +1802,7 @@ TEST(GcsFileSystemTest, DeleteFile) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 16 /* block size */,
       16 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1821,16 +1823,16 @@ TEST(GcsFileSystemTest, DeleteFile) {
 
 TEST(GcsFileSystemTest, DeleteFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.DeleteFile("gs://bucket/").code());
@@ -1871,7 +1873,7 @@ TEST(GcsFileSystemTest, DeleteFile_StatCacheRemoved) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 16 /* block size */,
       16 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1894,16 +1896,16 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1923,16 +1925,16 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
                            "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1943,16 +1945,16 @@ TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
       "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket"));
 }
@@ -1965,16 +1967,16 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
       "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.DeleteDir("gs://bucket/path/").code());
@@ -1988,16 +1990,16 @@ TEST(GcsFileSystemTest, GetFileSize) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", &size));
@@ -2006,16 +2008,16 @@ TEST(GcsFileSystemTest, GetFileSize) {
 
 TEST(GcsFileSystemTest, GetFileSize_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   uint64 size;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -2092,16 +2094,16 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/"));
 }
@@ -2191,7 +2193,7 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 16 /* block size */,
       64 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
   // Do an initial read of the source and destination files to load their
@@ -2272,7 +2274,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_FlushTargetStatCache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
   // Do an initial stat of the destination file to load their contents into the
@@ -2332,16 +2334,16 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(
       fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt"));
@@ -2374,16 +2376,16 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Post: yes\n"
            "Timeouts: 5 1 10\n",
            "{\"done\": false}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(
       errors::Code::UNIMPLEMENTED,
@@ -2399,16 +2401,16 @@ TEST(GcsFileSystemTest, Stat_Object) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
@@ -2433,16 +2435,16 @@ TEST(GcsFileSystemTest, Stat_Folder) {
            "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat));
@@ -2466,16 +2468,16 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/path", &stat).code());
@@ -2487,16 +2489,16 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/", &stat));
@@ -2511,16 +2513,16 @@ TEST(GcsFileSystemTest, Stat_BucketNotFound) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/", &stat).code());
@@ -2556,7 +2558,7 @@ TEST(GcsFileSystemTest, Stat_Cache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -2598,7 +2600,7 @@ TEST(GcsFileSystemTest, Stat_Cache_Flush) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
   // There should be a single HTTP request to GCS for fs.Stat in this loop.
@@ -2628,16 +2630,16 @@ TEST(GcsFileSystemTest, Stat_FilenameEndingWithSlash) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"5\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/dir/", &stat));
@@ -2660,16 +2662,16 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::NOT_FOUND,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -2691,16 +2693,16 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
            "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -2722,16 +2724,16 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/"));
@@ -2749,16 +2751,16 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/"));
@@ -2770,16 +2772,16 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::NOT_FOUND, fs.IsDirectory("gs://bucket/").code());
 }
@@ -2812,16 +2814,16 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                            "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath/"));
@@ -2839,16 +2841,16 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket"));
@@ -2911,16 +2913,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
                            "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -3004,16 +3006,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
 
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -3039,16 +3041,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   int64 undeleted_files, undeleted_dirs;
   EXPECT_EQ(error::Code::NOT_FOUND,
@@ -3130,7 +3132,7 @@ TEST(GcsFileSystemTest, AdditionalRequestHeaderTest) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       add_header /* gcs additional header */);
 
@@ -3199,16 +3201,16 @@ TEST(GcsFileSystemTest, CreateHttpRequest) {
                            "Auth Token: fake_token\n"
                            "Header Hello: world\n",
                            "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<HttpRequest> request;
   TF_EXPECT_OK(fs.CreateHttpRequest(&request));
@@ -3262,16 +3264,16 @@ TEST(GcsFileSystemTest, Stat_StatsRecording) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TestGcsStats stats;
   fs.SetStats(&stats);
@@ -3289,16 +3291,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_StatsRecording) {
       "Range: 0-5\n"
       "Timeouts: 5 1 20\n",
       "012345")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TestGcsStats stats;
   fs.SetStats(&stats);
diff --git a/tensorflow/core/platform/cloud/google_auth_provider_test.cc b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
index 07b88a880f..ec31c5ee8c 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider_test.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
@@ -93,8 +93,8 @@ TEST_F(GoogleAuthProviderTest, EnvironmentVariable_Caching) {
 
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
   oauth_client->return_token = "fake-token";
@@ -129,8 +129,8 @@ TEST_F(GoogleAuthProviderTest, GCloudRefreshToken) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
 
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
@@ -178,8 +178,8 @@ TEST_F(GoogleAuthProviderTest, RunningOnGCE) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
 
@@ -206,8 +206,8 @@ TEST_F(GoogleAuthProviderTest, OverrideForTesting) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&empty_requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
 
@@ -228,8 +228,8 @@ TEST_F(GoogleAuthProviderTest, NothingAvailable) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
 
diff --git a/tensorflow/core/platform/cloud/retrying_file_system.h b/tensorflow/core/platform/cloud/retrying_file_system.h
index 941ab7ad65..5ce6670dc7 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system.h
+++ b/tensorflow/core/platform/cloud/retrying_file_system.h
@@ -34,9 +34,9 @@ template <typename Underlying>
 class RetryingFileSystem : public FileSystem {
  public:
   RetryingFileSystem(std::unique_ptr<Underlying> base_file_system,
-                     int64 delay_microseconds = 1000000)
+                     const RetryConfig& retry_config)
       : base_file_system_(std::move(base_file_system)),
-        initial_delay_microseconds_(delay_microseconds) {}
+        retry_config_(retry_config) {}
 
   Status NewRandomAccessFile(
       const string& filename,
@@ -55,7 +55,7 @@ class RetryingFileSystem : public FileSystem {
   Status FileExists(const string& fname) override {
     return RetryingUtils::CallWithRetries(
         [this, &fname]() { return base_file_system_->FileExists(fname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status GetChildren(const string& dir, std::vector<string>* result) override {
@@ -63,7 +63,7 @@ class RetryingFileSystem : public FileSystem {
         [this, &dir, result]() {
           return base_file_system_->GetChildren(dir, result);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status GetMatchingPaths(const string& pattern,
@@ -72,31 +72,31 @@ class RetryingFileSystem : public FileSystem {
         [this, &pattern, result]() {
           return base_file_system_->GetMatchingPaths(pattern, result);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status Stat(const string& fname, FileStatistics* stat) override {
     return RetryingUtils::CallWithRetries(
         [this, &fname, stat]() { return base_file_system_->Stat(fname, stat); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status DeleteFile(const string& fname) override {
     return RetryingUtils::DeleteWithRetries(
         [this, &fname]() { return base_file_system_->DeleteFile(fname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status CreateDir(const string& dirname) override {
     return RetryingUtils::CallWithRetries(
         [this, &dirname]() { return base_file_system_->CreateDir(dirname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status DeleteDir(const string& dirname) override {
     return RetryingUtils::DeleteWithRetries(
         [this, &dirname]() { return base_file_system_->DeleteDir(dirname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status GetFileSize(const string& fname, uint64* file_size) override {
@@ -104,7 +104,7 @@ class RetryingFileSystem : public FileSystem {
         [this, &fname, file_size]() {
           return base_file_system_->GetFileSize(fname, file_size);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status RenameFile(const string& src, const string& target) override {
@@ -112,13 +112,13 @@ class RetryingFileSystem : public FileSystem {
         [this, &src, &target]() {
           return base_file_system_->RenameFile(src, target);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status IsDirectory(const string& dirname) override {
     return RetryingUtils::CallWithRetries(
         [this, &dirname]() { return base_file_system_->IsDirectory(dirname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status DeleteRecursively(const string& dirname, int64* undeleted_files,
@@ -128,7 +128,7 @@ class RetryingFileSystem : public FileSystem {
           return base_file_system_->DeleteRecursively(dirname, undeleted_files,
                                                       undeleted_dirs);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   void FlushCaches() override { base_file_system_->FlushCaches(); }
@@ -137,7 +137,7 @@ class RetryingFileSystem : public FileSystem {
 
  private:
   std::unique_ptr<Underlying> base_file_system_;
-  const int64 initial_delay_microseconds_;
+  const RetryConfig retry_config_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(RetryingFileSystem);
 };
@@ -147,9 +147,8 @@ namespace retrying_internals {
 class RetryingRandomAccessFile : public RandomAccessFile {
  public:
   RetryingRandomAccessFile(std::unique_ptr<RandomAccessFile> base_file,
-                           int64 delay_microseconds)
-      : base_file_(std::move(base_file)),
-        initial_delay_microseconds_(delay_microseconds) {}
+                           const RetryConfig& retry_config)
+      : base_file_(std::move(base_file)), retry_config_(retry_config) {}
 
   Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
@@ -157,20 +156,19 @@ class RetryingRandomAccessFile : public RandomAccessFile {
         [this, offset, n, result, scratch]() {
           return base_file_->Read(offset, n, result, scratch);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
  private:
   std::unique_ptr<RandomAccessFile> base_file_;
-  const int64 initial_delay_microseconds_;
+  const RetryConfig retry_config_;
 };
 
 class RetryingWritableFile : public WritableFile {
  public:
   RetryingWritableFile(std::unique_ptr<WritableFile> base_file,
-                       int64 delay_microseconds)
-      : base_file_(std::move(base_file)),
-        initial_delay_microseconds_(delay_microseconds) {}
+                       const RetryConfig& retry_config)
+      : base_file_(std::move(base_file)), retry_config_(retry_config) {}
 
   ~RetryingWritableFile() override {
     // Makes sure the retrying version of Close() is called in the destructor.
@@ -179,25 +177,24 @@ class RetryingWritableFile : public WritableFile {
 
   Status Append(StringPiece data) override {
     return RetryingUtils::CallWithRetries(
-        [this, &data]() { return base_file_->Append(data); },
-        initial_delay_microseconds_);
+        [this, &data]() { return base_file_->Append(data); }, retry_config_);
   }
   Status Close() override {
     return RetryingUtils::CallWithRetries(
-        [this]() { return base_file_->Close(); }, initial_delay_microseconds_);
+        [this]() { return base_file_->Close(); }, retry_config_);
   }
   Status Flush() override {
     return RetryingUtils::CallWithRetries(
-        [this]() { return base_file_->Flush(); }, initial_delay_microseconds_);
+        [this]() { return base_file_->Flush(); }, retry_config_);
   }
   Status Sync() override {
     return RetryingUtils::CallWithRetries(
-        [this]() { return base_file_->Sync(); }, initial_delay_microseconds_);
+        [this]() { return base_file_->Sync(); }, retry_config_);
   }
 
  private:
   std::unique_ptr<WritableFile> base_file_;
-  const int64 initial_delay_microseconds_;
+  const RetryConfig retry_config_;
 };
 
 }  // namespace retrying_internals
@@ -210,9 +207,9 @@ Status RetryingFileSystem<Underlying>::NewRandomAccessFile(
       [this, &filename, &base_file]() {
         return base_file_system_->NewRandomAccessFile(filename, &base_file);
       },
-      initial_delay_microseconds_));
+      retry_config_));
   result->reset(new retrying_internals::RetryingRandomAccessFile(
-      std::move(base_file), initial_delay_microseconds_));
+      std::move(base_file), retry_config_));
   return Status::OK();
 }
 
@@ -224,9 +221,9 @@ Status RetryingFileSystem<Underlying>::NewWritableFile(
       [this, &filename, &base_file]() {
         return base_file_system_->NewWritableFile(filename, &base_file);
       },
-      initial_delay_microseconds_));
+      retry_config_));
   result->reset(new retrying_internals::RetryingWritableFile(
-      std::move(base_file), initial_delay_microseconds_));
+      std::move(base_file), retry_config_));
   return Status::OK();
 }
 
@@ -238,9 +235,9 @@ Status RetryingFileSystem<Underlying>::NewAppendableFile(
       [this, &filename, &base_file]() {
         return base_file_system_->NewAppendableFile(filename, &base_file);
       },
-      initial_delay_microseconds_));
+      retry_config_));
   result->reset(new retrying_internals::RetryingWritableFile(
-      std::move(base_file), initial_delay_microseconds_));
+      std::move(base_file), retry_config_));
   return Status::OK();
 }
 
@@ -252,7 +249,7 @@ Status RetryingFileSystem<Underlying>::NewReadOnlyMemoryRegionFromFile(
         return base_file_system_->NewReadOnlyMemoryRegionFromFile(filename,
                                                                   result);
       },
-      initial_delay_microseconds_);
+      retry_config_);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/retrying_file_system_test.cc b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
index 5910fef1d2..868eea096c 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
@@ -184,7 +184,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_ImmediateSuccess) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -211,7 +212,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_SuccessWith3rdTry) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -235,7 +237,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_AllRetriesFailed) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -265,7 +268,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_NoRetriesForSomeErrors) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -291,7 +295,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_ImmediateSuccess) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -317,7 +322,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_SuccessWith3rdTry) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -343,7 +349,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_SuccessWith3rdTry_ViaDestructor) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -368,7 +375,8 @@ TEST(RetryingFileSystemTest, NewAppendableFile_SuccessWith3rdTry) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped appendable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -391,7 +399,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_AllRetriesFailed) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -412,7 +421,8 @@ TEST(RetryingFileSystemTest,
        std::make_tuple("NewReadOnlyMemoryRegionFromFile", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::unique_ptr<ReadOnlyMemoryRegion> result;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile("filename.txt", &result));
@@ -423,7 +433,8 @@ TEST(RetryingFileSystemTest, NewReadOnlyMemoryRegionFromFile_AllRetriesFailed) {
       CreateRetriableErrors("NewReadOnlyMemoryRegionFromFile", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::unique_ptr<ReadOnlyMemoryRegion> result;
   const auto& status =
@@ -440,7 +451,8 @@ TEST(RetryingFileSystemTest, GetChildren_SuccessWith2ndTry) {
        std::make_tuple("GetChildren", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetChildren("gs://path", &result));
@@ -450,7 +462,8 @@ TEST(RetryingFileSystemTest, GetChildren_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("GetChildren", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.GetChildren("gs://path", &result);
@@ -466,7 +479,8 @@ TEST(RetryingFileSystemTest, GetMatchingPaths_SuccessWith2ndTry) {
        std::make_tuple("GetMatchingPaths", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://path/dir", &result));
@@ -477,7 +491,8 @@ TEST(RetryingFileSystemTest, GetMatchingPaths_AllRetriesFailed) {
       CreateRetriableErrors("GetMatchingPaths", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.GetMatchingPaths("gs://path/dir", &result);
@@ -492,7 +507,8 @@ TEST(RetryingFileSystemTest, DeleteFile_SuccessWith2ndTry) {
        std::make_tuple("DeleteFile", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.DeleteFile("gs://path/file.txt"));
@@ -502,7 +518,8 @@ TEST(RetryingFileSystemTest, DeleteFile_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("DeleteFile", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.DeleteFile("gs://path/file.txt");
@@ -517,7 +534,8 @@ TEST(RetryingFileSystemTest, CreateDir_SuccessWith2ndTry) {
        std::make_tuple("CreateDir", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.CreateDir("gs://path/newdir"));
@@ -527,7 +545,8 @@ TEST(RetryingFileSystemTest, CreateDir_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("CreateDir", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.CreateDir("gs://path/newdir");
@@ -542,7 +561,8 @@ TEST(RetryingFileSystemTest, DeleteDir_SuccessWith2ndTry) {
        std::make_tuple("DeleteDir", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.DeleteDir("gs://path/dir"));
@@ -552,7 +572,8 @@ TEST(RetryingFileSystemTest, DeleteDir_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("DeleteDir", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.DeleteDir("gs://path/dir");
@@ -568,7 +589,8 @@ TEST(RetryingFileSystemTest, GetFileSize_SuccessWith2ndTry) {
        std::make_tuple("GetFileSize", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://path/file.txt", &size));
@@ -578,7 +600,8 @@ TEST(RetryingFileSystemTest, GetFileSize_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("GetFileSize", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   uint64 size;
   const auto& status = fs.GetFileSize("gs://path/file.txt", &size);
@@ -593,7 +616,8 @@ TEST(RetryingFileSystemTest, RenameFile_SuccessWith2ndTry) {
        std::make_tuple("RenameFile", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   TF_EXPECT_OK(fs.RenameFile("old_name", "new_name"));
 }
@@ -602,7 +626,8 @@ TEST(RetryingFileSystemTest, RenameFile_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("RenameFile", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   const auto& status = fs.RenameFile("old_name", "new_name");
   EXPECT_TRUE(
@@ -616,7 +641,8 @@ TEST(RetryingFileSystemTest, Stat_SuccessWith2ndTry) {
        std::make_tuple("Stat", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("file_name", &stat));
@@ -626,7 +652,8 @@ TEST(RetryingFileSystemTest, Stat_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("Stat", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   FileStatistics stat;
   const auto& status = fs.Stat("file_name", &stat);
@@ -639,7 +666,8 @@ TEST(RetryingFileSystemTest, FileExists_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("FileExists", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   const auto& status = fs.FileExists("file_name");
   EXPECT_TRUE(
@@ -653,7 +681,8 @@ TEST(RetryingFileSystemTest, FileExists_SuccessWith2ndTry) {
        std::make_tuple("FileExists", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   TF_EXPECT_OK(fs.FileExists("gs://path/dir"));
 }
@@ -665,7 +694,8 @@ TEST(RetryingFileSystemTest, IsDirectory_SuccessWith2ndTry) {
        std::make_tuple("IsDirectory", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   TF_EXPECT_OK(fs.IsDirectory("gs://path/dir"));
 }
@@ -674,7 +704,8 @@ TEST(RetryingFileSystemTest, IsDirectory_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("IsDirectory", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   const auto& status = fs.IsDirectory("gs://path/dir");
   EXPECT_TRUE(
@@ -689,7 +720,8 @@ TEST(RetryingFileSystemTest, DeleteRecursively_SuccessWith2ndTry) {
        std::make_tuple("DeleteRecursively", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
   int64 undeleted_files, undeleted_dirs;
 
   TF_EXPECT_OK(
@@ -701,7 +733,8 @@ TEST(RetryingFileSystemTest, DeleteRecursively_AllRetriesFailed) {
       CreateRetriableErrors("DeleteRecursively", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
   int64 undeleted_files, undeleted_dirs;
 
   const auto& status =
@@ -715,7 +748,8 @@ TEST(RetryingFileSystemTest, FlushCaches) {
   ExpectedCalls none;
   bool flushed = false;
   std::unique_ptr<MockFileSystem> base_fs(new MockFileSystem(none, &flushed));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
   fs.FlushCaches();
   EXPECT_TRUE(flushed);
 }
diff --git a/tensorflow/core/platform/cloud/retrying_utils.cc b/tensorflow/core/platform/cloud/retrying_utils.cc
index d2df422024..cb0aecdd35 100644
--- a/tensorflow/core/platform/cloud/retrying_utils.cc
+++ b/tensorflow/core/platform/cloud/retrying_utils.cc
@@ -23,11 +23,6 @@ namespace tensorflow {
 
 namespace {
 
-// In case of failure, every call will be retried kMaxRetries times.
-constexpr int kMaxRetries = 10;
-// Maximum backoff time in microseconds.
-constexpr int64 kMaximumBackoffMicroseconds = 32000000;  // 32 seconds.
-
 bool IsRetriable(error::Code code) {
   switch (code) {
     case error::UNAVAILABLE:
@@ -43,40 +38,41 @@ bool IsRetriable(error::Code code) {
 }  // namespace
 
 Status RetryingUtils::CallWithRetries(const std::function<Status()>& f,
-                                      const int64 initial_delay_microseconds) {
-  return CallWithRetries(f, initial_delay_microseconds, [](int64 micros) {
-    return Env::Default()->SleepForMicroseconds(micros);
-  });
+                                      const RetryConfig& config) {
+  return CallWithRetries(
+      f,
+      [](int64 micros) { return Env::Default()->SleepForMicroseconds(micros); },
+      config);
 }
 
 Status RetryingUtils::CallWithRetries(
-    const std::function<Status()>& f, const int64 initial_delay_microseconds,
-    const std::function<void(int64)>& sleep_usec) {
+    const std::function<Status()>& f,
+    const std::function<void(int64)>& sleep_usec, const RetryConfig& config) {
   int retries = 0;
   while (true) {
     auto status = f();
     if (!IsRetriable(status.code())) {
       return status;
     }
-    if (retries >= kMaxRetries) {
+    if (retries >= config.max_retries) {
       // Return AbortedError, so that it doesn't get retried again somewhere
       // at a higher level.
       return Status(
           error::ABORTED,
           strings::StrCat(
-              "All ", kMaxRetries,
+              "All ", config.max_retries,
               " retry attempts failed. The last failure: ", status.ToString()));
     }
     int64 delay_micros = 0;
-    if (initial_delay_microseconds > 0) {
+    if (config.init_delay_time_us > 0) {
       const int64 random_micros = random::New64() % 1000000;
-      delay_micros = std::min(initial_delay_microseconds << retries,
-                              kMaximumBackoffMicroseconds) +
+      delay_micros = std::min(config.init_delay_time_us << retries,
+                              config.max_delay_time_us) +
                      random_micros;
     }
     LOG(INFO) << "The operation failed and will be automatically retried in "
               << (delay_micros / 1000000.0) << " seconds (attempt "
-              << (retries + 1) << " out of " << kMaxRetries
+              << (retries + 1) << " out of " << config.max_retries
               << "), caused by: " << status.ToString();
     sleep_usec(delay_micros);
     retries++;
@@ -84,8 +80,7 @@ Status RetryingUtils::CallWithRetries(
 }
 
 Status RetryingUtils::DeleteWithRetries(
-    const std::function<Status()>& delete_func,
-    const int64 initial_delay_microseconds) {
+    const std::function<Status()>& delete_func, const RetryConfig& config) {
   bool is_retried = false;
   return RetryingUtils::CallWithRetries(
       [delete_func, &is_retried]() {
@@ -96,7 +91,7 @@ Status RetryingUtils::DeleteWithRetries(
         is_retried = true;
         return status;
       },
-      initial_delay_microseconds);
+      config);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/retrying_utils.h b/tensorflow/core/platform/cloud/retrying_utils.h
index 546b8d1c4a..1a7ce1b122 100644
--- a/tensorflow/core/platform/cloud/retrying_utils.h
+++ b/tensorflow/core/platform/cloud/retrying_utils.h
@@ -21,6 +21,26 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Default time before reporting failure: ~100 seconds.
+struct RetryConfig {
+  RetryConfig(int64 init_delay_time_us = 100 * 1000,
+              int64 max_delay_time_us = 32 * 1000 * 1000,
+              int max_retries = 10) {
+    this->init_delay_time_us = init_delay_time_us;
+    this->max_delay_time_us = max_delay_time_us;
+    this->max_retries = max_retries;
+  }
+
+  // In case of failure, every call will be retried max_retries times.
+  int max_retries;
+
+  // Initial backoff time
+  int64 init_delay_time_us;
+
+  // Maximum backoff time in microseconds.
+  int64 max_delay_time_us;
+};
+
 class RetryingUtils {
  public:
   /// \brief Retries the function in case of failure with exponential backoff.
@@ -31,18 +51,19 @@ class RetryingUtils {
   /// retries.
   /// If all retries failed, returns the last error status.
   static Status CallWithRetries(const std::function<Status()>& f,
-                                const int64 initial_delay_microseconds);
+                                const RetryConfig& config);
+
   /// sleep_usec is a function that sleeps for the given number of microseconds.
   static Status CallWithRetries(const std::function<Status()>& f,
-                                const int64 initial_delay_microseconds,
-                                const std::function<void(int64)>& sleep_usec);
+                                const std::function<void(int64)>& sleep_usec,
+                                const RetryConfig& config);
   /// \brief A retrying wrapper for a function that deletes a resource.
   ///
   /// The function takes care of the scenario when a delete operation
   /// returns a failure but succeeds under the hood: if a retry returns
   /// NOT_FOUND, the whole operation is considered a success.
   static Status DeleteWithRetries(const std::function<Status()>& delete_func,
-                                  const int64 initial_delay_microseconds);
+                                  const RetryConfig& config);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/retrying_utils_test.cc b/tensorflow/core/platform/cloud/retrying_utils_test.cc
index 1b6527618a..75fe8a98f4 100644
--- a/tensorflow/core/platform/cloud/retrying_utils_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_utils_test.cc
@@ -30,7 +30,8 @@ TEST(RetryingUtilsTest, CallWithRetries_RetryDelays) {
   };
   std::function<Status()> f = []() { return errors::Unavailable("Failed."); };
 
-  const auto& status = RetryingUtils::CallWithRetries(f, 500000L, sleep);
+  const auto& status = RetryingUtils::CallWithRetries(
+      f, sleep, RetryConfig(500000 /* init_delay_time_us */));
   EXPECT_EQ(errors::Code::ABORTED, status.code());
   EXPECT_TRUE(str_util::StrContains(
       status.error_message(),
@@ -60,8 +61,10 @@ TEST(RetryingUtilsTest, CallWithRetries_NotFoundIsNotRetried) {
     results.erase(results.begin());
     return result;
   };
-  EXPECT_EQ(errors::Code::NOT_FOUND,
-            RetryingUtils::CallWithRetries(f, 0).code());
+  EXPECT_EQ(
+      errors::Code::NOT_FOUND,
+      RetryingUtils::CallWithRetries(f, RetryConfig(0 /* init_delay_time_us */))
+          .code());
 }
 
 TEST(RetryingUtilsTest, CallWithRetries_ImmediateSuccess) {
@@ -74,7 +77,8 @@ TEST(RetryingUtilsTest, CallWithRetries_ImmediateSuccess) {
     results.erase(results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::CallWithRetries(f, 1.0, sleep));
+  TF_EXPECT_OK(RetryingUtils::CallWithRetries(
+      f, sleep, RetryConfig(1L /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, CallWithRetries_EventualSuccess) {
@@ -86,7 +90,8 @@ TEST(RetryingUtilsTest, CallWithRetries_EventualSuccess) {
     results.erase(results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::CallWithRetries(f, 0));
+  TF_EXPECT_OK(RetryingUtils::CallWithRetries(
+      f, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_ImmediateSuccess) {
@@ -96,7 +101,8 @@ TEST(RetryingUtilsTest, DeleteWithRetries_ImmediateSuccess) {
     delete_results.erase(delete_results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(delete_func, 0));
+  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(
+      delete_func, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_EventualSuccess) {
@@ -106,7 +112,8 @@ TEST(RetryingUtilsTest, DeleteWithRetries_EventualSuccess) {
     delete_results.erase(delete_results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(delete_func, 0));
+  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(
+      delete_func, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_PermissionDeniedNotRetried) {
@@ -118,7 +125,9 @@ TEST(RetryingUtilsTest, DeleteWithRetries_PermissionDeniedNotRetried) {
     return result;
   };
   EXPECT_EQ(errors::Code::PERMISSION_DENIED,
-            RetryingUtils::DeleteWithRetries(delete_func, 0).code());
+            RetryingUtils::DeleteWithRetries(
+                delete_func, RetryConfig(0 /* init_delay_time_us */))
+                .code());
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_SuccessThroughFileNotFound) {
@@ -129,7 +138,8 @@ TEST(RetryingUtilsTest, DeleteWithRetries_SuccessThroughFileNotFound) {
     delete_results.erase(delete_results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(delete_func, 0));
+  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(
+      delete_func, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_FirstNotFoundReturnedAsIs) {
@@ -140,7 +150,9 @@ TEST(RetryingUtilsTest, DeleteWithRetries_FirstNotFoundReturnedAsIs) {
     return result;
   };
   EXPECT_EQ(error::NOT_FOUND,
-            RetryingUtils::DeleteWithRetries(delete_func, 0).code());
+            RetryingUtils::DeleteWithRetries(
+                delete_func, RetryConfig(0 /* init_delay_time_us */))
+                .code());
 }
 
 }  // namespace
-- 
GitLab


From 84a051e7d0cd1406c1bb846efc677c8aa3fc896e Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 1 Oct 2018 11:12:03 -0700
Subject: [PATCH 0949/1357] Fix typo.

PiperOrigin-RevId: 215246174
---
 tensorflow/python/autograph/CONTRIBUTING.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/python/autograph/CONTRIBUTING.md b/tensorflow/python/autograph/CONTRIBUTING.md
index 1ded5ba5f6..f3587a4384 100644
--- a/tensorflow/python/autograph/CONTRIBUTING.md
+++ b/tensorflow/python/autograph/CONTRIBUTING.md
@@ -9,8 +9,6 @@ In preparation for TF 2.0, we moved the code base of AutoGraph from
 does not impact functionality, and AutoGraph will remain accessible under
 `tensorflow.contrib.autograph` until `tensorflow.contrib` is retired.
 
-When 
-
 ## TensorFlow Code of Conduct
 Please review and follow the [TensorFlow Code of Conduct](../../CODE_OF_CONDUCT.md).
 
-- 
GitLab


From 2bbf05148ad94928c1c828d40e479afdf34e2ef8 Mon Sep 17 00:00:00 2001
From: Christopher Olston <olston@google.com>
Date: Mon, 1 Oct 2018 11:24:41 -0700
Subject: [PATCH 0950/1357] Automated rollback of commit
 6a787235b95dd3040fc5ff7fb7104585e746c66a

PiperOrigin-RevId: 215248737
---
 tensorflow/core/kernels/batching_util/BUILD | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD
index 039b0db144..0d53240330 100644
--- a/tensorflow/core/kernels/batching_util/BUILD
+++ b/tensorflow/core/kernels/batching_util/BUILD
@@ -12,11 +12,6 @@ cc_library(
     name = "periodic_function_dynamic",
     srcs = ["periodic_function.cc"],
     hdrs = ["periodic_function.h"],
-    visibility = [
-        "//learning/serving:__subpackages__",
-        "//tensorflow:internal",
-        "//tensorflow_serving:__subpackages__",
-    ],
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:protos_all_cc",
@@ -25,11 +20,6 @@ cc_library(
 
 cc_library(
     name = "periodic_function",
-    visibility = [
-        "//learning/serving:__subpackages__",
-        "//tensorflow:internal",
-        "//tensorflow_serving:__subpackages__",
-    ],
     deps = [
         ":periodic_function_dynamic",
         "//tensorflow/core:lib",
@@ -198,11 +188,6 @@ cc_library(
     testonly = 1,
     srcs = ["fake_clock_env.cc"],
     hdrs = ["fake_clock_env.h"],
-    visibility = [
-        "//learning/serving:__subpackages__",
-        "//tensorflow:internal",
-        "//tensorflow_serving:__subpackages__",
-    ],
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core:tensorflow",
-- 
GitLab


From a9b01e8a31a02188bc81349c103f136095f322ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 11:26:02 -0700
Subject: [PATCH 0951/1357] internal change only

PiperOrigin-RevId: 215248985
---
 tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
index 8e6e9aa0cd..1c5ea2d997 100644
--- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
@@ -237,7 +237,8 @@ void StartMonitoring(const tensorflow::string& service_addr, int duration_ms,
     MonitorResponse response;
     TF_QCHECK_OK(FromGrpcStatus(stub->Monitor(&context, request, &response)));
 
-    std::cout << "Xprof Monitoring Results (Sample " << query + 1 << "):\n\n"
+    std::cout << "Cloud TPU Monitoring Results (Sample " << query + 1
+              << "):\n\n"
               << response.data() << std::flush;
   }
 }
-- 
GitLab


From f0f301f05fb1f1965c966ef57cc390e48d966f12 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 1 Oct 2018 11:29:30 -0700
Subject: [PATCH 0952/1357] Add deprecation notice for BasicRNNCell, which will
 be replaced by keras.SimpleRNNCell.

PiperOrigin-RevId: 215249611
---
 tensorflow/python/kernel_tests/rnn_test.py    |  39 ++++
 tensorflow/python/ops/rnn_cell_impl.py        |   4 +-
 ...orflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt | 202 ------------------
 .../golden/v2/tensorflow.nn.rnn_cell.pbtxt    |   4 -
 4 files changed, 42 insertions(+), 207 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt

diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 05ad9f6336..2f6963f6b8 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -535,6 +535,45 @@ class RNNTest(test.TestCase):
     self.assertAllClose(tf_out, k_out)
     self.assertAllClose(tf_state, k_state)
 
+  def testSimpleRNNCellAndBasicRNNCellComparison(self):
+    input_shape = 10
+    output_shape = 5
+    timestep = 4
+    batch = 20
+    (x_train, _), _ = testing_utils.get_test_data(
+        train_samples=batch,
+        test_samples=0,
+        input_shape=(timestep, input_shape),
+        num_classes=output_shape)
+    fix_weights_generator = keras.layers.SimpleRNNCell(output_shape)
+    fix_weights_generator.build((None, input_shape))
+    # The SimpleRNNCell contains 3 weights: kernel, recurrent_kernel, and bias
+    # The BasicRNNCell contains 2 weight: kernel and bias, where kernel is
+    # zipped [kernel, recurrent_kernel] in SimpleRNNCell.
+    keras_weights = fix_weights_generator.get_weights()
+    kernel, recurrent_kernel, bias = keras_weights
+    tf_weights = [np.concatenate((kernel, recurrent_kernel)), bias]
+
+    with self.test_session(graph=ops_lib.Graph()) as sess:
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      cell = keras.layers.SimpleRNNCell(output_shape)
+      k_out, k_state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      cell.set_weights(keras_weights)
+      [k_out, k_state] = sess.run([k_out, k_state], {inputs: x_train})
+    with self.test_session(graph=ops_lib.Graph()) as sess:
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      cell = rnn_cell_impl.BasicRNNCell(output_shape)
+      tf_out, tf_state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      cell.set_weights(tf_weights)
+      [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train})
+
+    self.assertAllClose(tf_out, k_out)
+    self.assertAllClose(tf_state, k_state)
+
   def testBasicLSTMCellInterchangeWithLSTMCell(self):
     with self.session(graph=ops_lib.Graph()) as sess:
       basic_cell = rnn_cell_impl.BasicLSTMCell(1)
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index c2751e529a..dd4f3d7a99 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -370,7 +370,7 @@ class LayerRNNCell(RNNCell):
                                      *args, **kwargs)
 
 
-@tf_export("nn.rnn_cell.BasicRNNCell")
+@tf_export(v1=["nn.rnn_cell.BasicRNNCell"])
 class BasicRNNCell(LayerRNNCell):
   """The most basic RNN cell.
 
@@ -393,6 +393,8 @@ class BasicRNNCell(LayerRNNCell):
       `trainable` etc when constructing the cell from configs of get_config().
   """
 
+  @deprecated(None, "This class is equivalent as tf.keras.layers.SimpleRNNCell,"
+                    " and will be replaced by that in Tensorflow 2.0.")
   def __init__(self,
                num_units,
                activation=None,
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
deleted file mode 100644
index a4483fefa2..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
+++ /dev/null
@@ -1,202 +0,0 @@
-path: "tensorflow.nn.rnn_cell.BasicRNNCell"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.BasicRNNCell\'>"
-  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.LayerRNNCell\'>"
-  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
-  is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "graph"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_size"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "scope_name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "state_size"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'num_units\', \'activation\', \'reuse\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_initial_state"
-    argspec: "args=[\'self\', \'inputs\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "zero_state"
-    argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
index 64697e8a02..24767e250f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
@@ -4,10 +4,6 @@ tf_module {
     name: "BasicLSTMCell"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "BasicRNNCell"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "DeviceWrapper"
     mtype: "<type \'type\'>"
-- 
GitLab


From 7cabc6be4e32dfb7f42c7f5e33549984bfdb68a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 11:44:17 -0700
Subject: [PATCH 0953/1357] Allow zero number of inputs in XRT execute
 operation.

PiperOrigin-RevId: 215252408
---
 tensorflow/compiler/xrt/ops/xrt_execute_op.cc |  2 +-
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 41 +++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xrt/ops/xrt_execute_op.cc b/tensorflow/compiler/xrt/ops/xrt_execute_op.cc
index fda4c31298..40ec1b0ba9 100644
--- a/tensorflow/compiler/xrt/ops/xrt_execute_op.cc
+++ b/tensorflow/compiler/xrt/ops/xrt_execute_op.cc
@@ -21,7 +21,7 @@ limitations under the License.
 namespace tensorflow {
 
 REGISTER_OP("XRTExecute")
-    .Attr("Ninputs: int")
+    .Attr("Ninputs: int >= 0")
     .Input("computation_handle: int64")
     .Input("execution_config: string")
     .Input("input_handles: Ninputs * int64")
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index 2952feb16a..f590fbf0d9 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -108,6 +108,14 @@ bool CompareLiteralToLiteralProto(const xla::Literal& a,
   return equal;
 }
 
+xla::XlaComputation OnePlusTwo() {
+  xla::XlaBuilder builder("OnePlusTwo");
+  auto c0 = xla::ConstantR0(&builder, 1.0f);
+  auto c1 = xla::ConstantR0(&builder, 2.0f);
+  xla::Add(c0, c1);
+  return builder.Build().ValueOrDie();
+}
+
 xla::XlaComputation AddAndScale() {
   xla::XlaBuilder builder("AddAndScale");
   auto p0 = xla::Parameter(&builder, 0,
@@ -346,6 +354,39 @@ TEST(RawApiTest, CompileAndExecute) {
   EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
 }
 
+TEST(RawApiTest, CompileAndExecuteZeroArg) {
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->mutable_result() = xla::ShapeUtil::MakeShape(xla::F32, {});
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+  StoreComputationSnapshot(OnePlusTwo(), c.mutable_hlo_snapshot());
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto e_config =
+      ops::Const(root.WithDevice("/device:CPU:0"), e.SerializeAsString());
+  auto computation =
+      ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto result = ops::XRTExecute(root, c_handle, e_config,
+                                std::initializer_list<Input>({}));
+  auto read_back = ops::XRTReadLiteralAndRelease(root, result);
+  TF_ASSERT_OK(root.status());
+
+  ClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({read_back}, &outputs));
+
+  xla::LiteralProto response;
+  EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<string>()()));
+
+  auto expected = xla::LiteralUtil::CreateR0<float>(3.0f);
+  EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+}
+
 TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   xrt::XLAAllocation p0;
   p0.set_device_ordinal(0);
-- 
GitLab


From f1fd53748b99532b2572b8909efcd4f5c06ce28d Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 1 Oct 2018 11:53:27 -0700
Subject: [PATCH 0954/1357] Updating function and class tf_export decorators
 for endpoints according to https://github.com/tensorflow/community/pull/16.
 In addition to the changes in the doc, I made the following updates (these
 changes make sense to me and I didn't notice them when compiling the doc): *
 deprecate saved_model.builder.SavedModelBuilder - replaced with
 saved_model.SavedModelBuilder * deprecate python_io.tf_record_iterator -
 replaced with io.tf_record_iterator * deprecate python_io.TFRecordWriter -
 replaced with io.TFRecordWriter * move reduce_join to tf.string

PiperOrigin-RevId: 215253944
---
 tensorflow/python/framework/dtypes.py         |   4 +-
 tensorflow/python/framework/errors_impl.py    |   6 +-
 tensorflow/python/framework/graph_io.py       |   2 +-
 tensorflow/python/framework/importer.py       |   2 +-
 tensorflow/python/framework/random_seed.py    |   6 +-
 tensorflow/python/framework/sparse_tensor.py  |   2 +-
 tensorflow/python/lib/io/tf_record.py         |  13 +-
 tensorflow/python/ops/array_ops.py            |  44 ++--
 .../python/ops/candidate_sampling_ops.py      |   8 +-
 tensorflow/python/ops/check_ops.py            |  63 ++++--
 tensorflow/python/ops/clip_ops.py             |   8 +-
 tensorflow/python/ops/confusion_matrix.py     |   4 +-
 tensorflow/python/ops/control_flow_ops.py     |   2 +-
 tensorflow/python/ops/data_flow_ops.py        |  17 +-
 tensorflow/python/ops/init_ops.py             |   5 +
 tensorflow/python/ops/linalg_ops.py           |  15 +-
 tensorflow/python/ops/lookup_ops.py           |   2 +-
 tensorflow/python/ops/manip_ops.py            |   4 +-
 tensorflow/python/ops/math_ops.py             | 145 ++++++++------
 tensorflow/python/ops/nn_impl.py              |   6 +-
 tensorflow/python/ops/nn_ops.py               |   8 +-
 tensorflow/python/ops/numerics.py             |   4 +-
 tensorflow/python/ops/parsing_ops.py          |  18 +-
 tensorflow/python/ops/random_ops.py           |  19 +-
 tensorflow/python/ops/sparse_ops.py           | 107 ++++++----
 tensorflow/python/ops/special_math_ops.py     |   4 +-
 tensorflow/python/ops/string_ops.py           |   7 +-
 tensorflow/python/saved_model/builder_impl.py |   7 +-
 tensorflow/python/saved_model/loader_impl.py  |   8 +-
 tensorflow/python/saved_model/main_op_impl.py |   5 +-
 .../saved_model/signature_def_utils_impl.py   |  27 ++-
 tensorflow/python/saved_model/utils_impl.py   |  10 +-
 .../tools/api/generator/api_init_files.bzl    |   1 +
 .../tools/api/generator/api_init_files_v1.bzl |   1 +
 tensorflow/python/training/input.py           |   3 +-
 .../api/golden/v1/tensorflow.debugging.pbtxt  |  96 +++++++++
 .../golden/v1/tensorflow.dtypes.-d-type.pbtxt |  77 +++++++
 .../api/golden/v1/tensorflow.dtypes.pbtxt     |  20 ++
 .../api/golden/v1/tensorflow.graph_util.pbtxt |   4 +
 .../api/golden/v1/tensorflow.image.pbtxt      |   4 +
 .../golden/v1/tensorflow.initializers.pbtxt   |   4 +
 .../v1/tensorflow.io.-fixed-len-feature.pbtxt |  27 +++
 ...rflow.io.-fixed-len-sequence-feature.pbtxt |  31 +++
 ...tensorflow.io.-padding-f-i-f-o-queue.pbtxt |  66 ++++++
 .../v1/tensorflow.io.-priority-queue.pbtxt    |  66 ++++++
 .../golden/v1/tensorflow.io.-queue-base.pbtxt |  65 ++++++
 .../tensorflow.io.-random-shuffle-queue.pbtxt |  66 ++++++
 .../v1/tensorflow.io.-sparse-feature.pbtxt    |  35 ++++
 ...flow.io.-t-f-record-compression-type.pbtxt |  20 ++
 .../tensorflow.io.-t-f-record-options.pbtxt   |  17 ++
 .../v1/tensorflow.io.-t-f-record-writer.pbtxt |  21 ++
 .../v1/tensorflow.io.-var-len-feature.pbtxt   |  19 ++
 .../tools/api/golden/v1/tensorflow.io.pbtxt   |  84 ++++++++
 .../api/golden/v1/tensorflow.linalg.pbtxt     |  12 ++
 .../tools/api/golden/v1/tensorflow.math.pbtxt | 188 ++++++++++++++++++
 .../tools/api/golden/v1/tensorflow.nn.pbtxt   |  12 ++
 .../tools/api/golden/v1/tensorflow.pbtxt      |   8 +
 .../golden/v1/tensorflow.quantization.pbtxt   |   4 +
 .../api/golden/v1/tensorflow.random.pbtxt     |  47 +++++
 .../v1/tensorflow.saved_model.-builder.pbtxt  |  21 ++
 .../golden/v1/tensorflow.saved_model.pbtxt    |  44 ++++
 ...arse.-sparse-conditional-accumulator.pbtxt |  46 +++++
 .../v1/tensorflow.sparse.-sparse-tensor.pbtxt |  54 +++++
 .../api/golden/v1/tensorflow.sparse.pbtxt     | 112 +++++++++++
 .../api/golden/v1/tensorflow.strings.pbtxt    |   4 +
 .../api/golden/v1/tensorflow.train.pbtxt      |   4 +
 .../api/golden/v2/tensorflow.debugging.pbtxt  |  96 +++++++++
 .../golden/v2/tensorflow.dtypes.-d-type.pbtxt |  77 +++++++
 .../api/golden/v2/tensorflow.dtypes.pbtxt     |  20 ++
 .../api/golden/v2/tensorflow.graph_util.pbtxt |   4 +
 .../api/golden/v2/tensorflow.image.pbtxt      |   4 +
 .../golden/v2/tensorflow.initializers.pbtxt   |   4 +
 .../v2/tensorflow.io.-fixed-len-feature.pbtxt |  27 +++
 ...rflow.io.-fixed-len-sequence-feature.pbtxt |  31 +++
 ...tensorflow.io.-padding-f-i-f-o-queue.pbtxt |  66 ++++++
 .../v2/tensorflow.io.-priority-queue.pbtxt    |  66 ++++++
 .../golden/v2/tensorflow.io.-queue-base.pbtxt |  65 ++++++
 .../tensorflow.io.-random-shuffle-queue.pbtxt |  66 ++++++
 .../v2/tensorflow.io.-sparse-feature.pbtxt    |  35 ++++
 ...flow.io.-t-f-record-compression-type.pbtxt |  20 ++
 .../tensorflow.io.-t-f-record-options.pbtxt   |  17 ++
 .../v2/tensorflow.io.-t-f-record-writer.pbtxt |  21 ++
 .../v2/tensorflow.io.-var-len-feature.pbtxt   |  19 ++
 .../tools/api/golden/v2/tensorflow.io.pbtxt   |  84 ++++++++
 .../api/golden/v2/tensorflow.linalg.pbtxt     |  12 ++
 .../tools/api/golden/v2/tensorflow.math.pbtxt | 188 ++++++++++++++++++
 .../tools/api/golden/v2/tensorflow.nn.pbtxt   |  12 ++
 .../tools/api/golden/v2/tensorflow.pbtxt      |   8 +
 .../golden/v2/tensorflow.quantization.pbtxt   |   4 +
 .../api/golden/v2/tensorflow.random.pbtxt     |  47 +++++
 .../v2/tensorflow.saved_model.-builder.pbtxt  |  21 ++
 .../golden/v2/tensorflow.saved_model.pbtxt    |  44 ++++
 ...arse.-sparse-conditional-accumulator.pbtxt |  46 +++++
 .../v2/tensorflow.sparse.-sparse-tensor.pbtxt |  54 +++++
 .../api/golden/v2/tensorflow.sparse.pbtxt     | 112 +++++++++++
 .../api/golden/v2/tensorflow.strings.pbtxt    |   4 +
 .../api/golden/v2/tensorflow.train.pbtxt      |   4 +
 97 files changed, 2926 insertions(+), 217 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt

diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py
index c3f70df7d8..64d3b42d89 100644
--- a/tensorflow/python/framework/dtypes.py
+++ b/tensorflow/python/framework/dtypes.py
@@ -26,7 +26,7 @@ from tensorflow.python.util.tf_export import tf_export
 _np_bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
 
 
-@tf_export("DType")
+@tf_export("dtypes.DType", "DType")
 class DType(object):
   """Represents the type of the elements in a `Tensor`.
 
@@ -658,7 +658,7 @@ _PYTHON_TO_TF = {
 }
 
 
-@tf_export("as_dtype")
+@tf_export("dtypes.as_dtype", "as_dtype")
 def as_dtype(type_value):
   """Converts the given `type_value` to a `DType`.
 
diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index 5af71f2cfb..8b303fa8a9 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -25,11 +25,13 @@ from tensorflow.core.lib.core import error_codes_pb2
 from tensorflow.python import pywrap_tensorflow as c_api
 from tensorflow.python.framework import c_api_util
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("OpError", "errors.OpError")
+@tf_export("errors.OpError", "OpError")
+@deprecation.deprecated_endpoints("OpError")
 class OpError(Exception):
   """A generic error that is raised when TensorFlow execution fails.
 
@@ -72,7 +74,7 @@ class OpError(Exception):
     or `Recv` op, there will be no corresponding
     `tf.Operation`
     object.  In that case, this will return `None`, and you should
-    instead use the `tf.OpError.node_def` to
+    instead use the `tf.errors.OpError.node_def` to
     discover information about the op.
 
     Returns:
diff --git a/tensorflow/python/framework/graph_io.py b/tensorflow/python/framework/graph_io.py
index be30b16f5f..47e1344eae 100644
--- a/tensorflow/python/framework/graph_io.py
+++ b/tensorflow/python/framework/graph_io.py
@@ -27,7 +27,7 @@ from tensorflow.python.lib.io import file_io
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('train.write_graph')
+@tf_export('io.write_graph', 'train.write_graph')
 def write_graph(graph_or_graph_def, logdir, name, as_text=True):
   """Writes a graph proto to a file.
 
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index e48e67c8a1..c6595918ae 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -329,7 +329,7 @@ def _SetDefaultAttrValues(node_def, op_def):
         node_def.attr[key].CopyFrom(attr_def.default_value)
 
 
-@tf_export('import_graph_def')
+@tf_export('graph_util.import_graph_def', 'import_graph_def')
 @deprecated_args(None, 'Please file an issue at '
                  'https://github.com/tensorflow/tensorflow/issues if you depend'
                  ' on this feature.', 'op_dict')
diff --git a/tensorflow/python/framework/random_seed.py b/tensorflow/python/framework/random_seed.py
index 2f9504889a..6f9f347a99 100644
--- a/tensorflow/python/framework/random_seed.py
+++ b/tensorflow/python/framework/random_seed.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -33,7 +34,8 @@ def _truncate_seed(seed):
   return seed % _MAXINT32  # Truncate to fit into 32-bit integer
 
 
-@tf_export('get_seed')
+@tf_export('random.get_seed', 'get_seed')
+@deprecation.deprecated_endpoints('get_seed')
 def get_seed(op_seed):
   """Returns the local seeds an operation should use given an op-specific seed.
 
@@ -80,7 +82,7 @@ def get_seed(op_seed):
   return seeds
 
 
-@tf_export('set_random_seed')
+@tf_export('random.set_random_seed', 'set_random_seed')
 def set_random_seed(seed):
   """Sets the graph-level random seed.
 
diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py
index d1bdd9b80a..41ef2e11d1 100644
--- a/tensorflow/python/framework/sparse_tensor.py
+++ b/tensorflow/python/framework/sparse_tensor.py
@@ -33,7 +33,7 @@ _override_helper = ops._override_helper
 # pylint: enable=protected-access
 
 
-@tf_export("SparseTensor")
+@tf_export("sparse.SparseTensor", "SparseTensor")
 class SparseTensor(_TensorLike):
   """Represents a sparse tensor.
 
diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py
index cce71a2bab..9ab683d96a 100644
--- a/tensorflow/python/lib/io/tf_record.py
+++ b/tensorflow/python/lib/io/tf_record.py
@@ -22,10 +22,12 @@ from __future__ import print_function
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import errors
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("python_io.TFRecordCompressionType")
+@tf_export("io.TFRecordCompressionType", "python_io.TFRecordCompressionType")
+@deprecation.deprecated_endpoints("python_io.TFRecordCompressionType")
 class TFRecordCompressionType(object):
   """The type of compression for the record."""
   NONE = 0
@@ -33,7 +35,8 @@ class TFRecordCompressionType(object):
   GZIP = 2
 
 
-@tf_export("python_io.TFRecordOptions")
+@tf_export("io.TFRecordOptions", "python_io.TFRecordOptions")
+@deprecation.deprecated_endpoints("python_io.TFRecordOptions")
 class TFRecordOptions(object):
   """Options used for manipulating TFRecord files."""
   compression_type_map = {
@@ -143,7 +146,8 @@ class TFRecordOptions(object):
     return options
 
 
-@tf_export("python_io.tf_record_iterator")
+@tf_export("io.tf_record_iterator", "python_io.tf_record_iterator")
+@deprecation.deprecated_endpoints("python_io.tf_record_iterator")
 def tf_record_iterator(path, options=None):
   """An iterator that read the records from a TFRecords file.
 
@@ -175,7 +179,8 @@ def tf_record_iterator(path, options=None):
     reader.Close()
 
 
-@tf_export("python_io.TFRecordWriter")
+@tf_export("io.TFRecordWriter", "python_io.TFRecordWriter")
+@deprecation.deprecated_endpoints("python_io.TFRecordWriter")
 class TFRecordWriter(object):
   """A class to write records to a TFRecords file.
 
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index a7f57e94e3..9f5149d5ac 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1204,7 +1204,8 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
     return _apply_mask_1d(tensor, mask, axis)
 
 
-@tf_export("sparse_mask")
+@tf_export("sparse.mask", "sparse_mask")
+@deprecation.deprecated_endpoints("sparse_mask")
 def sparse_mask(a, mask_indices, name=None):
   """Masks elements of `IndexedSlices`.
 
@@ -1226,7 +1227,7 @@ def sparse_mask(a, mask_indices, name=None):
   # `b` will be the subset of `a` slices at its second and third indices, so
   # we want to mask its first and last indices (which are at absolute
   # indices 12, 45)
-  b = tf.sparse_mask(a, [12, 45])
+  b = tf.sparse.mask(a, [12, 45])
 
   b.indices  # [26, 37]
   tf.shape(b.values)  # [2, 10]
@@ -1382,7 +1383,7 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
                     [10, 11, 12]]])
 
   # Take the transpose of the matrices in dimension-0
-  # (this common operation has a shorthand `matrix_transpose`)
+  # (this common operation has a shorthand `linalg.transpose`)
   tf.transpose(x, perm=[0, 2, 1])  # [[[1,  4],
                                    #   [2,  5],
                                    #   [3,  6]],
@@ -1421,7 +1422,8 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
 
 
 # pylint: disable=invalid-name
-@tf_export("matrix_transpose", "linalg.transpose")
+@tf_export("linalg.transpose", "matrix_transpose")
+@deprecation.deprecated_endpoints("matrix_transpose")
 def matrix_transpose(a, name="matrix_transpose", conjugate=False):
   """Transposes last two dimensions of tensor `a`.
 
@@ -1429,19 +1431,19 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
 
   ```python
   x = tf.constant([[1, 2, 3], [4, 5, 6]])
-  tf.matrix_transpose(x)  # [[1, 4],
+  tf.linalg.transpose(x)  # [[1, 4],
                           #  [2, 5],
                           #  [3, 6]]
 
   x = tf.constant([[1 + 1j, 2 + 2j, 3 + 3j],
                    [4 + 4j, 5 + 5j, 6 + 6j]])
-  tf.matrix_transpose(x, conjugate=True)  # [[1 - 1j, 4 - 4j],
+  tf.linalg.transpose(x, conjugate=True)  # [[1 - 1j, 4 - 4j],
                                           #  [2 - 2j, 5 - 5j],
                                           #  [3 - 3j, 6 - 6j]]
 
   # Matrix with two batch dimensions.
   # x.shape is [1, 2, 3, 4]
-  # tf.matrix_transpose(x) is shape [1, 2, 4, 3]
+  # tf.linalg.transpose(x) is shape [1, 2, 4, 3]
   ```
 
   Note that `tf.matmul` provides kwargs allowing for transpose of arguments.
@@ -1452,14 +1454,14 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
   tf.matmul(matrix, b, transpose_b=True)
 
   # Inefficient!
-  tf.matmul(matrix, tf.matrix_transpose(b))
+  tf.matmul(matrix, tf.linalg.transpose(b))
   ```
 
   @compatibility(numpy)
   In `numpy` transposes are memory-efficient constant time operations as they
   simply return a new view of the same data with adjusted `strides`.
 
-  TensorFlow does not support strides, `matrix_transposes` return a new tensor
+  TensorFlow does not support strides, `linalg.transposes` return a new tensor
   with the items permuted.
   @end_compatibility
 
@@ -1467,7 +1469,7 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
     a: A `Tensor` with `rank >= 2`.
     name: A name for the operation (optional).
     conjugate: Optional bool. Setting it to `True` is mathematically equivalent
-      to tf.conj(tf.matrix_transpose(input)).
+      to tf.conj(tf.linalg.transpose(input)).
 
   Returns:
     A transposed batch matrix `Tensor`.
@@ -1756,7 +1758,8 @@ def _normalize_sparse_shape(shape, name):
   return (ops.convert_to_tensor(shape, dtype=dtypes.int64, name=name), rank)
 
 
-@tf_export("sparse_placeholder")
+@tf_export("sparse.placeholder", "sparse_placeholder")
+@deprecation.deprecated_endpoints("sparse_placeholder")
 def sparse_placeholder(dtype, shape=None, name=None):
   """Inserts a placeholder for a sparse tensor that will be always fed.
 
@@ -1767,8 +1770,8 @@ def sparse_placeholder(dtype, shape=None, name=None):
   For example:
 
   ```python
-  x = tf.sparse_placeholder(tf.float32)
-  y = tf.sparse_reduce_sum(x)
+  x = tf.sparse.placeholder(tf.float32)
+  y = tf.sparse.reduce_sum(x)
 
   with tf.Session() as sess:
     print(sess.run(y))  # ERROR: will fail because x was not fed.
@@ -2250,7 +2253,8 @@ def required_space_to_batch_paddings(input_shape,
     return result_paddings, result_crops
 
 
-@tf_export("space_to_batch")
+@tf_export("nn.space_to_batch", "space_to_batch")
+@deprecation.deprecated_endpoints("space_to_batch")
 def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=redefined-builtin
   result = space_to_batch_nd(
       input,
@@ -2264,7 +2268,8 @@ def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=r
 space_to_batch.__doc__ = gen_array_ops.space_to_batch.__doc__
 
 
-@tf_export("space_to_depth")
+@tf_export("nn.space_to_depth", "space_to_depth")
+@deprecation.deprecated_endpoints("space_to_depth")
 def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.space_to_depth(input, block_size, data_format, name=name)
 
@@ -2272,7 +2277,8 @@ def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint:
 space_to_depth.__doc__ = gen_array_ops.space_to_depth.__doc__
 
 
-@tf_export("depth_to_space")
+@tf_export("nn.depth_to_space", "depth_to_space")
+@deprecation.deprecated_endpoints("depth_to_space")
 def depth_to_space(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.depth_to_space(input, block_size, data_format, name=name)
 
@@ -2747,7 +2753,8 @@ def batch_gather(params, indices, name=None):
 @tf_export("quantize_v2")
 @deprecation.deprecated(
     "2017-10-25",
-    "`tf.quantize_v2` is deprecated, please use `tf.quantize` instead.")
+    "`tf.quantize_v2` is deprecated, please use `tf.quantization.quantize` "
+    "instead.")  # pylint: disable=missing-docstring
 def quantize_v2(input,  # pylint: disable=redefined-builtin
                 min_range,
                 max_range,
@@ -2769,7 +2776,8 @@ quantize_v2.__doc__ = """Please use `tf.quantize` instead."""
 
 # We want to expose tf.quantize instead of tf.quantize_v2; we can deprecate
 # tf.quantize_v2 in next version of TensorFlow.
-@tf_export("quantize")
+@tf_export("quantization.quantize", "quantize")
+@deprecation.deprecated_endpoints("quantize")
 def quantize(input,  # pylint: disable=redefined-builtin
              min_range,
              max_range,
diff --git a/tensorflow/python/ops/candidate_sampling_ops.py b/tensorflow/python/ops/candidate_sampling_ops.py
index 9ea1ea9c92..98dde995c9 100644
--- a/tensorflow/python/ops/candidate_sampling_ops.py
+++ b/tensorflow/python/ops/candidate_sampling_ops.py
@@ -23,10 +23,12 @@ from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops  # pylint: disable=unused-import
 from tensorflow.python.ops import gen_candidate_sampling_ops
 from tensorflow.python.ops import math_ops  # pylint: disable=unused-import
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('nn.uniform_candidate_sampler')
+@tf_export('random.uniform_candidate_sampler', 'nn.uniform_candidate_sampler')
+@deprecation.deprecated_endpoints('nn.uniform_candidate_sampler')
 def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
                               range_max, seed=None, name=None):
   """Samples a set of classes using a uniform base distribution.
@@ -82,7 +84,9 @@ def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
       seed2=seed2, name=name)
 
 
-@tf_export('nn.log_uniform_candidate_sampler')
+@tf_export('random.log_uniform_candidate_sampler',
+           'nn.log_uniform_candidate_sampler')
+@deprecation.deprecated_endpoints('nn.log_uniform_candidate_sampler')
 def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
                                   range_max, seed=None, name=None):
   """Samples a set of classes using a log-uniform (Zipfian) base distribution.
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index c3cf6e61f2..d607f1d9fb 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 NUMERIC_TYPES = frozenset(
@@ -91,7 +92,8 @@ def _shape_and_dtype_str(tensor):
   return 'shape=%s dtype=%s' % (tensor.shape, tensor.dtype.name)
 
 
-@tf_export('assert_proper_iterable')
+@tf_export('debugging.assert_proper_iterable', 'assert_proper_iterable')
+@deprecation.deprecated_endpoints('assert_proper_iterable')
 def assert_proper_iterable(values):
   """Static assert that values is a "proper" iterable.
 
@@ -119,7 +121,8 @@ def assert_proper_iterable(values):
         'Expected argument "values" to be iterable.  Found: %s' % type(values))
 
 
-@tf_export('assert_negative')
+@tf_export('debugging.assert_negative', 'assert_negative')
+@deprecation.deprecated_endpoints('assert_negative')
 def assert_negative(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x < 0` holds element-wise.
 
@@ -160,7 +163,8 @@ def assert_negative(x, data=None, summarize=None, message=None, name=None):
     return assert_less(x, zero, data=data, summarize=summarize)
 
 
-@tf_export('assert_positive')
+@tf_export('debugging.assert_positive', 'assert_positive')
+@deprecation.deprecated_endpoints('assert_positive')
 def assert_positive(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x > 0` holds element-wise.
 
@@ -200,7 +204,8 @@ def assert_positive(x, data=None, summarize=None, message=None, name=None):
     return assert_less(zero, x, data=data, summarize=summarize)
 
 
-@tf_export('assert_non_negative')
+@tf_export('debugging.assert_non_negative', 'assert_non_negative')
+@deprecation.deprecated_endpoints('assert_non_negative')
 def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x >= 0` holds element-wise.
 
@@ -242,7 +247,8 @@ def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
     return assert_less_equal(zero, x, data=data, summarize=summarize)
 
 
-@tf_export('assert_non_positive')
+@tf_export('debugging.assert_non_positive', 'assert_non_positive')
+@deprecation.deprecated_endpoints('assert_non_positive')
 def assert_non_positive(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x <= 0` holds element-wise.
 
@@ -284,7 +290,7 @@ def assert_non_positive(x, data=None, summarize=None, message=None, name=None):
     return assert_less_equal(x, zero, data=data, summarize=summarize)
 
 
-@tf_export('assert_equal')
+@tf_export('debugging.assert_equal', 'assert_equal')
 def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x == y` holds element-wise.
 
@@ -384,7 +390,8 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_none_equal')
+@tf_export('debugging.assert_none_equal', 'assert_none_equal')
+@deprecation.deprecated_endpoints('assert_none_equal')
 def assert_none_equal(
     x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x != y` holds for all elements.
@@ -435,7 +442,8 @@ def assert_none_equal(
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_near')
+@tf_export('debugging.assert_near', 'assert_near')
+@deprecation.deprecated_endpoints('assert_near')
 def assert_near(
     x, y, rtol=None, atol=None, data=None, summarize=None, message=None,
     name=None):
@@ -513,7 +521,7 @@ def assert_near(
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_less')
+@tf_export('debugging.assert_less', 'assert_less')
 def assert_less(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x < y` holds element-wise.
 
@@ -561,7 +569,8 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_less_equal')
+@tf_export('debugging.assert_less_equal', 'assert_less_equal')
+@deprecation.deprecated_endpoints('assert_less_equal')
 def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x <= y` holds element-wise.
 
@@ -609,7 +618,7 @@ def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_greater')
+@tf_export('debugging.assert_greater', 'assert_greater')
 def assert_greater(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x > y` holds element-wise.
 
@@ -657,7 +666,8 @@ def assert_greater(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_greater_equal')
+@tf_export('debugging.assert_greater_equal', 'assert_greater_equal')
+@deprecation.deprecated_endpoints('assert_greater_equal')
 def assert_greater_equal(x, y, data=None, summarize=None, message=None,
                          name=None):
   """Assert the condition `x >= y` holds element-wise.
@@ -755,7 +765,7 @@ def _assert_rank_condition(
   return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_rank')
+@tf_export('debugging.assert_rank', 'assert_rank')
 def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
   """Assert `x` has rank equal to `rank`.
 
@@ -817,7 +827,8 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
   return assert_op
 
 
-@tf_export('assert_rank_at_least')
+@tf_export('debugging.assert_rank_at_least', 'assert_rank_at_least')
+@deprecation.deprecated_endpoints('assert_rank_at_least')
 def assert_rank_at_least(
     x, rank, data=None, summarize=None, message=None, name=None):
   """Assert `x` has rank equal to `rank` or higher.
@@ -948,7 +959,8 @@ def _assert_ranks_condition(
   return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_rank_in')
+@tf_export('debugging.assert_rank_in', 'assert_rank_in')
+@deprecation.deprecated_endpoints('assert_rank_in')
 def assert_rank_in(
     x, ranks, data=None, summarize=None, message=None, name=None):
   """Assert `x` has rank in `ranks`.
@@ -1010,7 +1022,8 @@ def assert_rank_in(
   return assert_op
 
 
-@tf_export('assert_integer')
+@tf_export('debugging.assert_integer', 'assert_integer')
+@deprecation.deprecated_endpoints('assert_integer')
 def assert_integer(x, message=None, name=None):
   """Assert that `x` is of integer dtype.
 
@@ -1048,7 +1061,8 @@ def assert_integer(x, message=None, name=None):
     return control_flow_ops.no_op('statically_determined_was_integer')
 
 
-@tf_export('assert_type')
+@tf_export('debugging.assert_type', 'assert_type')
+@deprecation.deprecated_endpoints('assert_type')
 def assert_type(tensor, tf_type, message=None, name=None):
   """Statically asserts that the given `Tensor` is of the specified type.
 
@@ -1095,12 +1109,14 @@ def _get_diff_for_monotonic_comparison(x):
   return control_flow_ops.cond(is_shorter_than_two, short_result, diff)
 
 
-@tf_export('is_numeric_tensor')
+@tf_export('debugging.is_numeric_tensor', 'is_numeric_tensor')
+@deprecation.deprecated_endpoints('is_numeric_tensor')
 def is_numeric_tensor(tensor):
   return isinstance(tensor, ops.Tensor) and tensor.dtype in NUMERIC_TYPES
 
 
-@tf_export('is_non_decreasing')
+@tf_export('debugging.is_non_decreasing', 'is_non_decreasing')
+@deprecation.deprecated_endpoints('is_non_decreasing')
 def is_non_decreasing(x, name=None):
   """Returns `True` if `x` is non-decreasing.
 
@@ -1127,7 +1143,8 @@ def is_non_decreasing(x, name=None):
     return math_ops.reduce_all(math_ops.less_equal(zero, diff))
 
 
-@tf_export('is_strictly_increasing')
+@tf_export('debugging.is_strictly_increasing', 'is_strictly_increasing')
+@deprecation.deprecated_endpoints('is_strictly_increasing')
 def is_strictly_increasing(x, name=None):
   """Returns `True` if `x` is strictly increasing.
 
@@ -1202,7 +1219,8 @@ def _assert_same_base_type(items, expected_type=None):
     return expected_type
 
 
-@tf_export('assert_same_float_dtype')
+@tf_export('debugging.assert_same_float_dtype', 'assert_same_float_dtype')
+@deprecation.deprecated_endpoints('assert_same_float_dtype')
 def assert_same_float_dtype(tensors=None, dtype=None):
   """Validate and return float type based on `tensors` and `dtype`.
 
@@ -1231,7 +1249,8 @@ def assert_same_float_dtype(tensors=None, dtype=None):
   return dtype
 
 
-@tf_export('assert_scalar')
+@tf_export('debugging.assert_scalar', 'assert_scalar')
+@deprecation.deprecated_endpoints('assert_scalar')
 def assert_scalar(tensor, name=None):
   with ops.name_scope(name, 'assert_scalar', [tensor]) as name_scope:
     tensor = ops.convert_to_tensor(tensor, name=name_scope)
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index 29468431b3..45516068f4 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import numerics
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -76,8 +77,8 @@ def clip_by_value(t, clip_value_min, clip_value_max,
 
   return t_max
   # TODO(scottzhu): switch to use new implmentation in 2 weeks.
-    # return gen_math_ops.clip_by_value(
-    #     t, clip_value_min, clip_value_max, name=name)
+  # return gen_math_ops.clip_by_value(
+  #     t, clip_value_min, clip_value_max, name=name)
 
 
 # TODO(scottzhu): switch to use new implmentation in 2 weeks.
@@ -159,7 +160,8 @@ def clip_by_norm(t, clip_norm, axes=None, name=None):
   return tclip
 
 
-@tf_export("global_norm")
+@tf_export("linalg.global_norm", "global_norm")
+@deprecation.deprecated_endpoints("global_norm")
 def global_norm(t_list, name=None):
   """Computes the global norm of multiple tensors.
 
diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py
index c09154129f..8259142456 100644
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@@ -26,6 +26,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -89,7 +90,8 @@ def remove_squeezable_dimensions(
     return labels, predictions
 
 
-@tf_export('confusion_matrix')
+@tf_export('train.confusion_matrix', 'confusion_matrix')
+@deprecation.deprecated_endpoints('confusion_matrix')
 def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32,
                      name=None, weights=None):
   """Computes the confusion matrix from predictions and labels.
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 9d7d31df22..8ad71fe00c 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -106,7 +106,7 @@ def _summarize_eager(tensor, summarize=None):
 
 # Assert and Print are special symbols in python, so we must
 # use an upper-case version of them.
-@tf_export("Assert")
+@tf_export("debugging.Assert", "Assert")
 @tf_should_use.should_use_result
 def Assert(condition, data, summarize=None, name=None):
   """Asserts that the given condition is true.
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 69c0fcbbee..97b6f3bd9c 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -39,6 +39,7 @@ from tensorflow.python.ops import resource_variable_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_data_flow_ops import *
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 # pylint: enable=wildcard-import
@@ -112,7 +113,8 @@ def _shape_common(s1, s2):
 
 
 # pylint: disable=protected-access
-@tf_export("QueueBase")
+@tf_export("io.QueueBase", "QueueBase")
+@deprecation.deprecated_endpoints("QueueBase")
 class QueueBase(object):
   """Base class for queue implementations.
 
@@ -604,7 +606,8 @@ def _shared_name(shared_name):
   return shared_name
 
 
-@tf_export("RandomShuffleQueue")
+@tf_export("io.RandomShuffleQueue", "RandomShuffleQueue")
+@deprecation.deprecated_endpoints("RandomShuffleQueue")
 class RandomShuffleQueue(QueueBase):
   """A queue implementation that dequeues elements in a random order.
 
@@ -746,7 +749,8 @@ class FIFOQueue(QueueBase):
     super(FIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("PaddingFIFOQueue")
+@tf_export("io.PaddingFIFOQueue", "PaddingFIFOQueue")
+@deprecation.deprecated_endpoints("PaddingFIFOQueue")
 class PaddingFIFOQueue(QueueBase):
   """A FIFOQueue that supports batching variable-sized tensors by padding.
 
@@ -820,7 +824,8 @@ class PaddingFIFOQueue(QueueBase):
     super(PaddingFIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("PriorityQueue")
+@tf_export("io.PriorityQueue", "PriorityQueue")
+@deprecation.deprecated_endpoints("PriorityQueue")
 class PriorityQueue(QueueBase):
   """A queue implementation that dequeues elements in prioritized order.
 
@@ -1300,7 +1305,9 @@ class ConditionalAccumulator(ConditionalAccumulatorBase):
     return out
 
 
-@tf_export("SparseConditionalAccumulator")
+@tf_export("sparse.SparseConditionalAccumulator",
+           "SparseConditionalAccumulator")
+@deprecation.deprecated_endpoints("SparseConditionalAccumulator")
 class SparseConditionalAccumulator(ConditionalAccumulatorBase):
   """A conditional accumulator for aggregating sparse gradients.
 
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index fff3d9b930..65bb77b474 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -43,6 +43,7 @@ from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import linalg_ops_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.deprecation import  deprecated_arg_values
 from tensorflow.python.util.tf_export import tf_export
@@ -341,6 +342,7 @@ class TruncatedNormal(Initializer):
 
 @tf_export("initializers.uniform_unit_scaling",
            "uniform_unit_scaling_initializer")
+@deprecation.deprecated_endpoints("uniform_unit_scaling_initializer")
 class UniformUnitScaling(Initializer):
   """Initializer that generates tensors without scaling variance.
 
@@ -401,6 +403,7 @@ class UniformUnitScaling(Initializer):
 
 @tf_export("keras.initializers.VarianceScaling",
            "initializers.variance_scaling", "variance_scaling_initializer")
+@deprecation.deprecated_endpoints("variance_scaling_initializer")
 class VarianceScaling(Initializer):
   """Initializer capable of adapting its scale to the shape of weights tensors.
 
@@ -494,6 +497,7 @@ class VarianceScaling(Initializer):
 
 @tf_export("keras.initializers.Orthogonal", "initializers.orthogonal",
            "orthogonal_initializer", "keras.initializers.orthogonal")
+@deprecation.deprecated_endpoints("orthogonal_initializer")
 class Orthogonal(Initializer):
   """Initializer that generates an orthogonal matrix.
 
@@ -1149,6 +1153,7 @@ class GlorotUniform(VarianceScaling):
 
 @tf_export("glorot_normal_initializer", "keras.initializers.glorot_normal",
            "initializers.glorot_normal")
+@deprecation.deprecated_endpoints("glorot_normal_initializer")
 class GlorotNormal(VarianceScaling):
   """The Glorot normal initializer, also called Xavier normal initializer.
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index f4a93560be..bf4354fa73 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -80,6 +80,7 @@ def _RegularizedGramianCholesky(matrix, l2_regularizer, first_kind):
 
 
 @tf_export('cholesky_solve', 'linalg.cholesky_solve')
+@deprecation.deprecated_endpoints('cholesky_solve')
 def cholesky_solve(chol, rhs, name=None):
   """Solves systems of linear eqns `A X = RHS`, given Cholesky factorizations.
 
@@ -167,7 +168,8 @@ def eye(num_rows,
                              name=name)
 
 
-@tf_export('matrix_solve_ls', 'linalg.lstsq')
+@tf_export('linalg.lstsq', 'matrix_solve_ls')
+@deprecation.deprecated_endpoints('matrix_solve_ls')
 def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
   r"""Solves one or more linear least-squares problems.
 
@@ -220,7 +222,7 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
       squares sense.
 
   Raises:
-    NotImplementedError: matrix_solve_ls is currently disabled for complex128
+    NotImplementedError: linalg.lstsq is currently disabled for complex128
     and l2_regularizer != 0 due to poor accuracy.
   """
 
@@ -303,7 +305,8 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
         matrix, rhs, l2_regularizer, fast=fast, name=name)
 
 
-@tf_export('self_adjoint_eig', 'linalg.eigh')
+@tf_export('linalg.eigh', 'self_adjoint_eig')
+@deprecation.deprecated_endpoints('self_adjoint_eig')
 def self_adjoint_eig(tensor, name=None):
   """Computes the eigen decomposition of a batch of self-adjoint matrices.
 
@@ -325,12 +328,13 @@ def self_adjoint_eig(tensor, name=None):
   return e, v
 
 
-@tf_export('self_adjoint_eigvals', 'linalg.eigvalsh')
+@tf_export('linalg.eigvalsh', 'self_adjoint_eigvals')
+@deprecation.deprecated_endpoints('self_adjoint_eigvals')
 def self_adjoint_eigvals(tensor, name=None):
   """Computes the eigenvalues of one or more self-adjoint matrices.
 
   Note: If your program backpropagates through this function, you should replace
-  it with a call to tf.self_adjoint_eig (possibly ignoring the second output) to
+  it with a call to tf.linalg.eigvalsh (possibly ignoring the second output) to
   avoid computing the eigen decomposition twice. This is because the
   eigenvectors are used to compute the gradient w.r.t. the eigenvalues. See
   _SelfAdjointEigV2Grad in linalg_grad.py.
@@ -348,6 +352,7 @@ def self_adjoint_eigvals(tensor, name=None):
 
 
 @tf_export('svd', 'linalg.svd')
+@deprecation.deprecated_endpoints('svd')
 def svd(tensor, full_matrices=False, compute_uv=True, name=None):
   r"""Computes the singular value decompositions of one or more matrices.
 
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 5443699ddd..cffaa983d4 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -59,7 +59,7 @@ def initialize_all_tables(name="init_all_tables"):
   return tables_initializer(name)
 
 
-@tf_export("tables_initializer")
+@tf_export("initializers.tables_initializer", "tables_initializer")
 def tables_initializer(name="init_all_tables"):
   """Returns an Op that initializes all tables of the default graph.
 
diff --git a/tensorflow/python/ops/manip_ops.py b/tensorflow/python/ops/manip_ops.py
index 6633565a64..d9d0728287 100644
--- a/tensorflow/python/ops/manip_ops.py
+++ b/tensorflow/python/ops/manip_ops.py
@@ -19,11 +19,13 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.ops import gen_manip_ops as _gen_manip_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
 # pylint: disable=protected-access
-@tf_export('manip.roll')
+@tf_export('roll', 'manip.roll')
+@deprecation.deprecated_endpoints('manip.roll')
 def roll(input, shift, axis):  # pylint: disable=redefined-builtin
   return _gen_manip_ops.roll(input, shift, axis)
 
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index f57abf6704..83b8b5a3a4 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -70,7 +70,7 @@ def _set_doc(doc):
 
 
 # pylint: disable=redefined-builtin
-@tf_export("argmax")
+@tf_export("math.argmax", "argmax")
 @deprecation.deprecated_args(None, "Use the `axis` argument instead",
                              "dimension")
 @_set_doc(
@@ -88,7 +88,7 @@ def argmax(input,
   return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type)
 
 
-@tf_export("argmin")
+@tf_export("math.argmin", "argmin")
 @deprecation.deprecated_args(None, "Use the `axis` argument instead",
                              "dimension")
 @_set_doc(
@@ -111,7 +111,7 @@ def argmin(input,
 
 # pylint: disable=anomalous-backslash-in-string,protected-access
 # pylint: disable=g-docstring-has-escape
-@tf_export("abs")
+@tf_export("math.abs", "abs")
 def abs(x, name=None):  # pylint: disable=redefined-builtin
   r"""Computes the absolute value of a tensor.
 
@@ -186,7 +186,7 @@ class DivideDelegateWithName(object):
     return _div_python2(self.x, y, self.name)
 
 
-@tf_export("divide")
+@tf_export("math.divide", "divide")
 def divide(x, y, name=None):
   """Computes Python style division of `x` by `y`."""
 
@@ -198,7 +198,7 @@ def divide(x, y, name=None):
     return x / y
 
 
-@tf_export("multiply")
+@tf_export("math.multiply", "multiply")
 def multiply(x, y, name=None):
   return gen_math_ops.mul(x, y, name)
 
@@ -218,7 +218,7 @@ _mul.__doc__ = (
     gen_math_ops.mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__))
 
 
-@tf_export("subtract")
+@tf_export("math.subtract", "subtract")
 def subtract(x, y, name=None):
   return gen_math_ops.sub(x, y, name)
 
@@ -239,7 +239,7 @@ _sub.__doc__ = (
 
 
 # pylint: disable=g-docstring-has-escape
-@tf_export("negative")
+@tf_export("math.negative", "negative")
 def negative(x, name=None):
   """Computes numerical negative value element-wise.
 
@@ -288,7 +288,7 @@ def _neg(x, name=None):
 # pylint: enable=g-docstring-has-escape
 
 
-@tf_export("sign")
+@tf_export("math.sign", "sign")
 def sign(x, name=None):
   """Returns an element-wise indication of the sign of a number.
 
@@ -319,7 +319,7 @@ def sign(x, name=None):
       return gen_math_ops.sign(x, name=name)
 
 
-@tf_export("square")
+@tf_export("math.square", "square")
 def square(x, name=None):
   r"""Computes square of x element-wise.
 
@@ -342,7 +342,7 @@ def square(x, name=None):
       return gen_math_ops.square(x, name=name)
 
 
-@tf_export("sqrt")
+@tf_export("math.sqrt", "sqrt")
 def sqrt(x, name=None):
   r"""Computes square root of x element-wise.
 
@@ -365,7 +365,8 @@ def sqrt(x, name=None):
       return gen_math_ops.sqrt(x, name=name)
 
 
-@tf_export("erf")
+@tf_export("math.erf", "erf")
+@deprecation.deprecated_endpoints("erf")
 def erf(x, name=None):
   """Computes the Gauss error function of `x` element-wise.
 
@@ -386,7 +387,7 @@ def erf(x, name=None):
       return gen_math_ops.erf(x, name=name)
 
 
-@tf_export("scalar_mul")
+@tf_export("math.scalar_mul", "scalar_mul")
 def scalar_mul(scalar, x):
   """Multiplies a scalar times a `Tensor` or `IndexedSlices` object.
 
@@ -416,7 +417,7 @@ def scalar_mul(scalar, x):
     raise ValueError("Only scalar multiply works, got shape %s" % shape)
 
 
-@tf_export("pow")
+@tf_export("math.pow", "pow")
 def pow(x, y, name=None):  # pylint: disable=redefined-builtin
   r"""Computes the power of one value to another.
 
@@ -444,7 +445,7 @@ def pow(x, y, name=None):  # pylint: disable=redefined-builtin
 
 
 # pylint: disable=redefined-builtin,redefined-outer-name
-@tf_export("complex")
+@tf_export("dtypes.complex", "complex")
 def complex(real, imag, name=None):
   r"""Converts two real numbers to a complex number.
 
@@ -486,7 +487,8 @@ def complex(real, imag, name=None):
     return gen_math_ops._complex(real, imag, Tout=Tout, name=name)
 
 
-@tf_export("real")
+@tf_export("math.real", "real")
+@deprecation.deprecated_endpoints("real")
 def real(input, name=None):
   r"""Returns the real part of a complex (or real) tensor.
 
@@ -517,7 +519,8 @@ def real(input, name=None):
       return input
 
 
-@tf_export("imag")
+@tf_export("math.imag", "imag")
+@deprecation.deprecated_endpoints("imag")
 def imag(input, name=None):
   r"""Returns the imaginary part of a complex (or real) tensor.
 
@@ -547,7 +550,8 @@ def imag(input, name=None):
       return array_ops.zeros_like(input)
 
 
-@tf_export("angle")
+@tf_export("math.angle", "angle")
+@deprecation.deprecated_endpoints("angle")
 def angle(input, name=None):
   r"""Returns the element-wise argument of a complex (or real) tensor.
 
@@ -586,7 +590,7 @@ def angle(input, name=None):
 # pylint: enable=redefined-outer-name,redefined-builtin
 
 
-@tf_export("round")
+@tf_export("math.round", "round")
 def round(x, name=None):  # pylint: disable=redefined-builtin
   """Rounds the values of a tensor to the nearest integer, element-wise.
 
@@ -613,7 +617,7 @@ def round(x, name=None):  # pylint: disable=redefined-builtin
     return gen_math_ops.round(x, name=name)
 
 
-@tf_export("cast")
+@tf_export("dtypes.cast", "cast")
 def cast(x, dtype, name=None):
   """Casts a tensor to a new type.
 
@@ -676,7 +680,7 @@ def cast(x, dtype, name=None):
     return x
 
 
-@tf_export("saturate_cast")
+@tf_export("dtypes.saturate_cast", "saturate_cast")
 def saturate_cast(value, dtype, name=None):
   """Performs a safe saturating cast of `value` to `dtype`.
 
@@ -995,7 +999,7 @@ def _div_python2(x, y, name=None):
       return gen_math_ops.floor_div(x, y, name=name)
 
 
-@tf_export("truediv")
+@tf_export("math.truediv", "truediv")
 def truediv(x, y, name=None):
   """Divides x / y elementwise (using Python 3 division operator semantics).
 
@@ -1006,7 +1010,7 @@ def truediv(x, y, name=None):
   arguments are cast to floating types first.   This op is generated by normal
   `x / y` division in Python 3 and in Python 2.7 with
   `from __future__ import division`.  If you want integer division that rounds
-  down, use `x // y` or `tf.floordiv`.
+  down, use `x // y` or `tf.math.floordiv`.
 
   `x` and `y` must have the same numeric type.  If the inputs are floating
   point, the output will have the same type.  If the inputs are integral, the
@@ -1078,7 +1082,8 @@ mod = gen_math_ops.floor_mod
 
 # TODO(aselle): Deprecate this once all internal functionality uses
 # tf.truncatediv
-@tf_export("floordiv")
+@tf_export("math.floordiv", "floordiv")
+@deprecation.deprecated_endpoints("floordiv")
 def floordiv(x, y, name=None):
   """Divides `x / y` elementwise, rounding toward the most negative integer.
 
@@ -1151,7 +1156,8 @@ _OverrideBinaryOperatorHelper(gen_math_ops.floor_mod, "mod")
 _OverrideBinaryOperatorHelper(pow, "pow")
 
 
-@tf_export("logical_xor")
+@tf_export("math.logical_xor", "logical_xor")
+@deprecation.deprecated_endpoints("logical_xor")
 def logical_xor(x, y, name="LogicalXor"):
   """x ^ y = (x | y) & ~(x & y)."""
   # TODO(alemi) Make this a cwise op if people end up relying on it.
@@ -1277,7 +1283,7 @@ def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output):
   return output
 
 
-@tf_export("reduce_sum")
+@tf_export("math.reduce_sum", "reduce_sum")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_sum(input_tensor,
@@ -1339,7 +1345,7 @@ def reduce_sum(input_tensor,
                                    name=name))
 
 
-@tf_export("count_nonzero")
+@tf_export("math.count_nonzero", "count_nonzero")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def count_nonzero(input_tensor,
@@ -1417,7 +1423,7 @@ def count_nonzero(input_tensor,
         dtype=dtype)
 
 
-@tf_export("reduce_mean")
+@tf_export("math.reduce_mean", "reduce_mean")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_mean(input_tensor,
@@ -1489,7 +1495,7 @@ def reduce_mean(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_prod")
+@tf_export("math.reduce_prod", "reduce_prod")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_prod(input_tensor,
@@ -1539,7 +1545,7 @@ def reduce_prod(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_min")
+@tf_export("math.reduce_min", "reduce_min")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_min(input_tensor,
@@ -1588,7 +1594,7 @@ def reduce_min(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_max")
+@tf_export("math.reduce_max", "reduce_max")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_max(input_tensor,
@@ -1637,7 +1643,7 @@ def reduce_max(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_all")
+@tf_export("math.reduce_all", "reduce_all")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_all(input_tensor,
@@ -1695,7 +1701,7 @@ def reduce_all(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_any")
+@tf_export("math.reduce_any", "reduce_any")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_any(input_tensor,
@@ -1753,7 +1759,7 @@ def reduce_any(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_logsumexp")
+@tf_export("math.reduce_logsumexp", "reduce_logsumexp")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_logsumexp(input_tensor,
@@ -1827,7 +1833,8 @@ def reduce_logsumexp(input_tensor,
     return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result)
 
 
-@tf_export("trace", "linalg.trace")
+@tf_export("linalg.trace", "trace")
+@deprecation.deprecated_endpoints("trace")
 def trace(x, name=None):
   """Compute the trace of a tensor `x`.
 
@@ -1841,12 +1848,12 @@ def trace(x, name=None):
 
   ```python
   x = tf.constant([[1, 2], [3, 4]])
-  tf.trace(x)  # 5
+  tf.linalg.trace(x)  # 5
 
   x = tf.constant([[1, 2, 3],
                    [4, 5, 6],
                    [7, 8, 9]])
-  tf.trace(x)  # 15
+  tf.linalg.trace(x)  # 15
 
   x = tf.constant([[[1, 2, 3],
                     [4, 5, 6],
@@ -1854,7 +1861,7 @@ def trace(x, name=None):
                    [[-1, -2, -3],
                     [-4, -5, -6],
                     [-7, -8, -9]]])
-  tf.trace(x)  # [15, -15]
+  tf.linalg.trace(x)  # [15, -15]
   ```
 
   Args:
@@ -1869,7 +1876,7 @@ def trace(x, name=None):
     return reduce_sum(array_ops.matrix_diag_part(x), [-1], name=name)
 
 
-@tf_export("matmul")
+@tf_export("linalg.matmul", "matmul")
 def matmul(a,
            b,
            transpose_a=False,
@@ -2131,7 +2138,7 @@ def _as_indexed_slices_list(inputs, optimize=True):
   return casted_outputs
 
 
-@tf_export("add_n")
+@tf_export("math.add_n", "add_n")
 def add_n(inputs, name=None):
   """Adds all input tensors element-wise.
 
@@ -2166,14 +2173,15 @@ def add_n(inputs, name=None):
   return gen_math_ops.add_n(inputs, name=name)
 
 
-@tf_export("accumulate_n")
+@tf_export("math.accumulate_n", "accumulate_n")
+@deprecation.deprecated_endpoints("accumulate_n")
 def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
   """Returns the element-wise sum of a list of tensors.
 
   Optionally, pass `shape` and `tensor_dtype` for shape and type checking,
   otherwise, these are inferred.
 
-  `tf.accumulate_n` performs the same operation as `tf.add_n`, but does not
+  `tf.math.accumulate_n` performs the same operation as `tf.add_n`, but does not
   wait for all of its inputs to be ready before beginning to sum. This can
   save memory if inputs are ready at different times, since minimum temporary
   storage is proportional to the output size rather than the inputs size.
@@ -2185,10 +2193,10 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
   ```python
   a = tf.constant([[1, 2], [3, 4]])
   b = tf.constant([[5, 0], [0, 6]])
-  tf.accumulate_n([a, b, a])  # [[7, 4], [6, 14]]
+  tf.math.accumulate_n([a, b, a])  # [[7, 4], [6, 14]]
 
   # Explicitly pass shape and type
-  tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
+  tf.math.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
                                                                  # [[7,  4],
                                                                  #  [6, 14]]
   ```
@@ -2252,7 +2260,7 @@ def _accumulate_n_grad(op, grad):
   return [grad] * len(op.inputs)
 
 
-@tf_export("nn.sigmoid", "sigmoid")
+@tf_export("math.sigmoid", "nn.sigmoid", "sigmoid")
 def sigmoid(x, name=None):
   """Computes sigmoid of `x` element-wise.
 
@@ -2275,7 +2283,8 @@ def sigmoid(x, name=None):
     return gen_math_ops.sigmoid(x, name=name)
 
 
-@tf_export("log_sigmoid")
+@tf_export("math.log_sigmoid", "log_sigmoid")
+@deprecation.deprecated_endpoints("log_sigmoid")
 def log_sigmoid(x, name=None):
   """Computes log sigmoid of `x` element-wise.
 
@@ -2294,7 +2303,7 @@ def log_sigmoid(x, name=None):
     return gen_math_ops.neg(gen_nn_ops.softplus(-x), name=name)
 
 
-@tf_export("nn.tanh", "tanh")
+@tf_export("math.tanh", "nn.tanh", "tanh")
 def tanh(x, name=None):
   """Computes hyperbolic tangent of `x` element-wise.
 
@@ -2315,7 +2324,8 @@ def tanh(x, name=None):
       return gen_math_ops.tanh(x, name=name)
 
 
-@tf_export("bincount")
+@tf_export("math.bincount", "bincount")
+@deprecation.deprecated_endpoints("bincount")
 def bincount(arr,
              weights=None,
              minlength=None,
@@ -2362,7 +2372,7 @@ def bincount(arr,
   return gen_math_ops.bincount(arr, output_size, weights)
 
 
-@tf_export("cumsum")
+@tf_export("math.cumsum", "cumsum")
 def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
   """Compute the cumulative sum of the tensor `x` along `axis`.
 
@@ -2414,7 +2424,8 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
         x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
-@tf_export("cumprod")
+@tf_export("math.cumprod", "cumprod")
+@deprecation.deprecated_endpoints("cumprod")
 def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
   """Compute the cumulative product of the tensor `x` along `axis`.
 
@@ -2422,7 +2433,7 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
   first element of the input is identical to the first element of the output:
 
   ```python
-  tf.cumprod([a, b, c])  # [a, a * b, a * b * c]
+  tf.math.cumprod([a, b, c])  # [a, a * b, a * b * c]
   ```
 
   By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
@@ -2430,21 +2441,21 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
   instead:
 
   ```python
-  tf.cumprod([a, b, c], exclusive=True)  # [1, a, a * b]
+  tf.math.cumprod([a, b, c], exclusive=True)  # [1, a, a * b]
   ```
 
   By setting the `reverse` kwarg to `True`, the cumprod is performed in the
   opposite direction:
 
   ```python
-  tf.cumprod([a, b, c], reverse=True)  # [a * b * c, b * c, c]
+  tf.math.cumprod([a, b, c], reverse=True)  # [a * b * c, b * c, c]
   ```
 
   This is more efficient than using separate `tf.reverse` ops.
   The `reverse` and `exclusive` kwargs can also be combined:
 
   ```python
-  tf.cumprod([a, b, c], exclusive=True, reverse=True)  # [b * c, c, 1]
+  tf.math.cumprod([a, b, c], exclusive=True, reverse=True)  # [b * c, c, 1]
   ```
 
   Args:
@@ -2466,7 +2477,8 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
         x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
-@tf_export("conj")
+@tf_export("math.conj", "conj")
+@deprecation.deprecated_endpoints("conj")
 def conj(x, name=None):
   r"""Returns the complex conjugate of a complex number.
 
@@ -2480,7 +2492,7 @@ def conj(x, name=None):
   For example:
 
       # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-      tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+      tf.math.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
 
   If `x` is real, it is returned unchanged.
 
@@ -2566,7 +2578,8 @@ def _unsorted_segment_N(data, segment_ids, num_segments):
   return gen_math_ops.maximum(N, 1)
 
 
-@tf_export("unsorted_segment_mean")
+@tf_export("math.unsorted_segment_mean", "unsorted_segment_mean")
+@deprecation.deprecated_endpoints("unsorted_segment_mean")
 def unsorted_segment_mean(data, segment_ids, num_segments, name=None):
   r"""Computes the mean along segments of a tensor.
 
@@ -2608,7 +2621,8 @@ def unsorted_segment_mean(data, segment_ids, num_segments, name=None):
     return summed / N
 
 
-@tf_export("unsorted_segment_sqrt_n")
+@tf_export("math.unsorted_segment_sqrt_n", "unsorted_segment_sqrt_n")
+@deprecation.deprecated_endpoints("unsorted_segment_sqrt_n")
 def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None):
   r"""Computes the sum along segments of a tensor divided by the sqrt(N).
 
@@ -2653,7 +2667,8 @@ def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None):
     return summed / gen_math_ops.sqrt(N)
 
 
-@tf_export("sparse_segment_sum")
+@tf_export("sparse.segment_sum", "sparse_segment_sum")
+@deprecation.deprecated_endpoints("sparse_segment_sum")
 def sparse_segment_sum(data, indices, segment_ids, name=None,
                        num_segments=None):
   r"""Computes the sum along sparse segments of a tensor.
@@ -2674,16 +2689,16 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
   c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
 
   # Select two rows, one segment.
-  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+  tf.sparse.segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
   # => [[0 0 0 0]]
 
   # Select two rows, two segment.
-  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+  tf.sparse.segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
   # => [[ 1  2  3  4]
   #     [-1 -2 -3 -4]]
 
   # With missing segment ids.
-  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 2]),
+  tf.sparse.segment_sum(c, tf.constant([0, 1]), tf.constant([0, 2]),
                         num_segments=4)
   # => [[ 1  2  3  4]
   #     [ 0  0  0  0]
@@ -2691,7 +2706,7 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
   #     [ 0  0  0  0]]
 
   # Select all rows, two segments.
-  tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+  tf.sparse.segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
   # => [[0 0 0 0]
   #     [5 6 7 8]]
 
@@ -2726,7 +2741,8 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
         data=data, indices=indices, segment_ids=segment_ids, name=name)
 
 
-@tf_export("sparse_segment_mean")
+@tf_export("sparse.segment_mean", "sparse_segment_mean")
+@deprecation.deprecated_endpoints("sparse_segment_mean")
 def sparse_segment_mean(data,
                         indices,
                         segment_ids,
@@ -2771,7 +2787,8 @@ def sparse_segment_mean(data,
         data=data, indices=indices, segment_ids=segment_ids, name=name)
 
 
-@tf_export("sparse_segment_sqrt_n")
+@tf_export("sparse.segment_sqrt_n", "sparse_segment_sqrt_n")
+@deprecation.deprecated_endpoints("sparse_segment_sqrt_n")
 def sparse_segment_sqrt_n(data,
                           indices,
                           segment_ids,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 2a1919e66f..453848fc00 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -328,7 +328,7 @@ def swish(features):
   return features * math_ops.sigmoid(features)
 
 
-@tf_export("nn.l2_normalize")
+@tf_export("math.l2_normalize", "linalg.l2_normalize", "nn.l2_normalize")
 @deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
   """Normalizes along dimension `axis` using an L2 norm.
@@ -360,7 +360,7 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
     return math_ops.multiply(x, x_inv_norm, name=name)
 
 
-@tf_export("nn.zero_fraction")
+@tf_export("math.zero_fraction", "nn.zero_fraction")
 def zero_fraction(value, name=None):
   """Returns the fraction of zeros in `value`.
 
@@ -689,7 +689,7 @@ def moments(
     # Compute true mean while keeping the dims for proper broadcasting.
     mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean")
     # sample variance, not unbiased variance
-    # Note: stop_gradient does not change the gradient that gets 
+    # Note: stop_gradient does not change the gradient that gets
     #       backpropagated to the mean from the variance calculation,
     #       because that gradient is zero
     variance = math_ops.reduce_mean(
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 9ef177e97b..fd71e7cc39 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1692,7 +1692,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-@tf_export("nn.softmax")
+@tf_export("nn.softmax", "math.softmax")
 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def softmax(logits, axis=None, name=None, dim=None):
   """Computes softmax activations.
@@ -1722,7 +1722,7 @@ def softmax(logits, axis=None, name=None, dim=None):
   return _softmax(logits, gen_nn_ops.softmax, axis, name)
 
 
-@tf_export("nn.log_softmax")
+@tf_export("nn.log_softmax", "math.log_softmax")
 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def log_softmax(logits, axis=None, name=None, dim=None):
   """Computes log softmax activations.
@@ -2329,7 +2329,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):  # pylint: di
     return ret
 
 
-@tf_export("nn.top_k")
+@tf_export("math.top_k", "nn.top_k")
 def top_k(input, k=1, sorted=True, name=None):  # pylint: disable=redefined-builtin
   """Finds values and indices of the `k` largest entries for the last dimension.
 
@@ -2644,7 +2644,7 @@ def erosion2d(value, kernel, strides, rates, padding, name=None):
             name=name))
 
 
-@tf_export("nn.in_top_k")
+@tf_export("math.in_top_k", "nn.in_top_k")
 def in_top_k(predictions, targets, k, name=None):
   r"""Says whether the targets are in the top `K` predictions.
 
diff --git a/tensorflow/python/ops/numerics.py b/tensorflow/python/ops/numerics.py
index 8fcbd7d834..002e87b411 100644
--- a/tensorflow/python/ops/numerics.py
+++ b/tensorflow/python/ops/numerics.py
@@ -24,10 +24,12 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("verify_tensor_all_finite")
+@tf_export("debugging.assert_all_finite", "verify_tensor_all_finite")
+@deprecation.deprecated_endpoints("verify_tensor_all_finite")
 def verify_tensor_all_finite(t, msg, name=None):
   """Assert that the tensor does not contain any NaN's or Inf's.
 
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index b3e03a0135..ff50fe0d09 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.gen_parsing_ops import *
 # pylint: enable=wildcard-import,undefined-variable
 from tensorflow.python.platform import tf_logging
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -45,7 +46,7 @@ ops.NotDifferentiable("SerializeTensor")
 ops.NotDifferentiable("StringToNumber")
 
 
-@tf_export("VarLenFeature")
+@tf_export("io.VarLenFeature", "VarLenFeature")
 class VarLenFeature(collections.namedtuple("VarLenFeature", ["dtype"])):
   """Configuration for parsing a variable-length input feature.
 
@@ -55,7 +56,7 @@ class VarLenFeature(collections.namedtuple("VarLenFeature", ["dtype"])):
   pass
 
 
-@tf_export("SparseFeature")
+@tf_export("io.SparseFeature", "SparseFeature")
 class SparseFeature(
     collections.namedtuple(
         "SparseFeature",
@@ -130,7 +131,7 @@ class SparseFeature(
         cls, index_key, value_key, dtype, size, already_sorted)
 
 
-@tf_export("FixedLenFeature")
+@tf_export("io.FixedLenFeature", "FixedLenFeature")
 class FixedLenFeature(collections.namedtuple(
     "FixedLenFeature", ["shape", "dtype", "default_value"])):
   """Configuration for parsing a fixed-length input feature.
@@ -150,7 +151,7 @@ class FixedLenFeature(collections.namedtuple(
         cls, shape, dtype, default_value)
 
 
-@tf_export("FixedLenSequenceFeature")
+@tf_export("io.FixedLenSequenceFeature", "FixedLenSequenceFeature")
 class FixedLenSequenceFeature(collections.namedtuple(
     "FixedLenSequenceFeature",
     ["shape", "dtype", "allow_missing", "default_value"])):
@@ -360,7 +361,7 @@ def _prepend_none_dimension(features):
     return features
 
 
-@tf_export("parse_example")
+@tf_export("io.parse_example", "parse_example")
 def parse_example(serialized, features, name=None, example_names=None):
   # pylint: disable=line-too-long
   """Parses `Example` protos into a `dict` of tensors.
@@ -761,7 +762,7 @@ def _process_raw_parameters(names, dense_defaults, sparse_keys, sparse_types,
           dense_shapes_as_proto, dense_shapes)
 
 
-@tf_export("parse_single_example")
+@tf_export("io.parse_single_example", "parse_single_example")
 def parse_single_example(serialized, features, name=None, example_names=None):
   """Parses a single `Example` proto.
 
@@ -1244,7 +1245,7 @@ def _parse_sequence_example_raw(serialized,
 
 # TODO(sundberg): rewrite this method to call the batch version, which is more
 # efficient especially for large inputs.
-@tf_export("parse_single_sequence_example")
+@tf_export("io.parse_single_sequence_example", "parse_single_sequence_example")
 def parse_single_sequence_example(
     serialized, context_features=None, sequence_features=None,
     example_name=None, name=None):
@@ -1564,7 +1565,8 @@ def _parse_single_sequence_example_raw(serialized,
 
 
 # Swap `name` and `na_value` for backward compatibility.
-@tf_export("decode_csv")
+@tf_export("io.decode_csv", "decode_csv")
+@deprecation.deprecated_endpoints("decode_csv")
 def decode_csv(records,
                record_defaults,
                field_delim=",",
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index 4baf506385..c2eb9dfc5d 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import math_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_random_ops import *
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 # pylint: enable=wildcard-import
@@ -43,7 +44,7 @@ def _ShapeTensor(shape):
   return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
 
 
-@tf_export("random_normal")
+@tf_export("random.normal", "random_normal")
 def random_normal(shape,
                   mean=0.0,
                   stddev=1.0,
@@ -136,7 +137,7 @@ def parameterized_truncated_normal(shape,
     return rnd
 
 
-@tf_export("truncated_normal")
+@tf_export("random.truncated_normal", "truncated_normal")
 def truncated_normal(shape,
                      mean=0.0,
                      stddev=1.0,
@@ -181,7 +182,7 @@ ops.NotDifferentiable("ParameterizedTruncatedNormal")
 ops.NotDifferentiable("TruncatedNormal")
 
 
-@tf_export("random_uniform")
+@tf_export("random.uniform", "random_uniform")
 def random_uniform(shape,
                    minval=0,
                    maxval=None,
@@ -246,7 +247,7 @@ def random_uniform(shape,
 ops.NotDifferentiable("RandomUniform")
 
 
-@tf_export("random_shuffle")
+@tf_export("random.shuffle", "random_shuffle")
 def random_shuffle(value, seed=None, name=None):
   """Randomly shuffles a tensor along its first dimension.
 
@@ -277,7 +278,7 @@ def random_shuffle(value, seed=None, name=None):
       value, seed=seed1, seed2=seed2, name=name)
 
 
-@tf_export("random_crop")
+@tf_export("image.random_crop", "random_crop")
 def random_crop(value, size, seed=None, name=None):
   """Randomly crops a tensor to a given size.
 
@@ -320,7 +321,7 @@ def random_crop(value, size, seed=None, name=None):
     return array_ops.slice(value, offset, size, name=name)
 
 
-@tf_export("multinomial")
+@tf_export("random.multinomial", "multinomial")
 def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
   """Draws samples from a multinomial distribution.
 
@@ -356,7 +357,8 @@ def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
 ops.NotDifferentiable("Multinomial")
 
 
-@tf_export("random_gamma")
+@tf_export("random.gamma", "random_gamma")
+@deprecation.deprecated_endpoints("random_gamma")
 def random_gamma(shape,
                  alpha,
                  beta=None,
@@ -439,7 +441,8 @@ def random_gamma(shape,
             shape, alpha_broadcast, seed=seed1, seed2=seed2) / beta)
 
 
-@tf_export("random_poisson")
+@tf_export("random.poisson", "random_poisson")
+@deprecation.deprecated_endpoints("random_poisson")
 def random_poisson(lam, shape, dtype=dtypes.float32, seed=None, name=None):
   """Draws `shape` samples from each of the given Poisson distribution(s).
 
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 400a42a3c0..7e3dbdbad4 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -185,7 +185,8 @@ def sparse_eye(num_rows,
 
 
 # pylint: disable=protected-access
-@tf_export("sparse_concat")
+@tf_export("sparse.concat", "sparse_concat")
+@deprecation.deprecated_endpoints("sparse_concat")
 @deprecation.deprecated_args(
     None, "concat_dim is deprecated, use axis instead", "concat_dim")
 def sparse_concat(axis,
@@ -317,7 +318,8 @@ def sparse_concat(axis,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse_add")
+@tf_export("sparse.add", "sparse_add")
+@deprecation.deprecated_endpoints("sparse_add")
 def sparse_add(a, b, thresh=0):
   """Adds two tensors, at least one of each is a `SparseTensor`.
 
@@ -557,7 +559,8 @@ def sparse_dense_cwise_add(sp_t, dense_t):
   return sparse_tensor.SparseTensor(sp_t.indices, result, sp_t.dense_shape)
 
 
-@tf_export("sparse_reorder")
+@tf_export("sparse.reorder", "sparse_reorder")
+@deprecation.deprecated_endpoints("sparse_reorder")
 def sparse_reorder(sp_input, name=None):
   """Reorders a `SparseTensor` into the canonical, row-major ordering.
 
@@ -607,7 +610,8 @@ def sparse_reorder(sp_input, name=None):
   return sparse_tensor.SparseTensor(reordered_ind, reordered_val, dense_shape)
 
 
-@tf_export("sparse_reshape")
+@tf_export("sparse.reshape", "sparse_reshape")
+@deprecation.deprecated_endpoints("sparse_reshape")
 def sparse_reshape(sp_input, shape, name=None):
   """Reshapes a `SparseTensor` to represent values in a new dense shape.
 
@@ -700,7 +704,8 @@ class KeywordRequired(object):
     return "KeywordRequired()"
 
 
-@tf_export("sparse_split")
+@tf_export("sparse.split", "sparse_split")
+@deprecation.deprecated_endpoints("sparse_split")
 @deprecation.deprecated_args(
     None, "split_dim is deprecated, use axis instead", "split_dim")
 def sparse_split(keyword_required=KeywordRequired(),
@@ -773,7 +778,8 @@ def sparse_split(keyword_required=KeywordRequired(),
   return sparse_tensors
 
 
-@tf_export("sparse_slice")
+@tf_export("sparse.slice", "sparse_slice")
+@deprecation.deprecated_endpoints("sparse_slice")
 def sparse_slice(sp_input, start, size, name=None):
   """Slice a `SparseTensor` based on the `start` and `size.
 
@@ -785,11 +791,11 @@ def sparse_slice(sp_input, start, size, name=None):
 
   Graphically the output tensors are:
 
-      sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
+      sparse.slice([0, 0], [2, 4]) = shape = [2, 4]
       [    a  ]
       [b c    ]
 
-      sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
+      sparse.slice([0, 4], [2, 3]) = shape = [2, 3]
       [ d e  ]
       [      ]
 
@@ -823,6 +829,9 @@ def sparse_slice(sp_input, start, size, name=None):
 
 
 @tf_export("sparse_to_dense")
+@deprecation.deprecated(
+    None,
+    "Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.")
 def sparse_to_dense(sparse_indices,
                     output_shape,
                     sparse_values,
@@ -878,7 +887,8 @@ def sparse_to_dense(sparse_indices,
       name=name)
 
 
-@tf_export("sparse_reduce_max")
+@tf_export("sparse.reduce_max", "sparse_reduce_max")
+@deprecation.deprecated_endpoints("sparse_reduce_max")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_max(sp_input, axis=None, keepdims=None,
@@ -912,16 +922,16 @@ def sparse_reduce_max(sp_input, axis=None, keepdims=None,
   # 'x' represents [[1, ?, 2]
   #                 [?, 3, ?]]
   # where ? is implicitly-zero.
-  tf.sparse_reduce_max(x) ==> 3
-  tf.sparse_reduce_max(x, 0) ==> [1, 3, 2]
-  tf.sparse_reduce_max(x, 1) ==> [2, 3]  # Can also use -1 as the axis.
-  tf.sparse_reduce_max(x, 1, keepdims=True) ==> [[2], [3]]
-  tf.sparse_reduce_max(x, [0, 1]) ==> 3
+  tf.sparse.reduce_max(x) ==> 3
+  tf.sparse.reduce_max(x, 0) ==> [1, 3, 2]
+  tf.sparse.reduce_max(x, 1) ==> [2, 3]  # Can also use -1 as the axis.
+  tf.sparse.reduce_max(x, 1, keepdims=True) ==> [[2], [3]]
+  tf.sparse.reduce_max(x, [0, 1]) ==> 3
 
   # 'y' represents [[-7, ?]
   #                 [ 4, 3]
   #                 [ ?, ?]
-  tf.sparse_reduce_max(x, 1) ==> [-7, 4, 0]
+  tf.sparse.reduce_max(x, 1) ==> [-7, 4, 0]
   ```
 
   Args:
@@ -945,7 +955,8 @@ def sparse_reduce_max(sp_input, axis=None, keepdims=None,
       math_ops._ReductionDims(sp_input, axis, reduction_axes), keepdims)
 
 
-@tf_export("sparse_reduce_max_sparse")
+@tf_export("sparse.reduce_max_sparse", "sparse_reduce_max_sparse")
+@deprecation.deprecated_endpoints("sparse_reduce_max_sparse")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_max_sparse(sp_input,
@@ -995,7 +1006,8 @@ def sparse_reduce_max_sparse(sp_input,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse_reduce_sum")
+@tf_export("sparse.reduce_sum", "sparse_reduce_sum")
+@deprecation.deprecated_endpoints("sparse_reduce_sum")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
@@ -1021,11 +1033,11 @@ def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
   # 'x' represents [[1, ?, 1]
   #                 [?, 1, ?]]
   # where ? is implicitly-zero.
-  tf.sparse_reduce_sum(x) ==> 3
-  tf.sparse_reduce_sum(x, 0) ==> [1, 1, 1]
-  tf.sparse_reduce_sum(x, 1) ==> [2, 1]  # Can also use -1 as the axis.
-  tf.sparse_reduce_sum(x, 1, keepdims=True) ==> [[2], [1]]
-  tf.sparse_reduce_sum(x, [0, 1]) ==> 3
+  tf.sparse.reduce_sum(x) ==> 3
+  tf.sparse.reduce_sum(x, 0) ==> [1, 1, 1]
+  tf.sparse.reduce_sum(x, 1) ==> [2, 1]  # Can also use -1 as the axis.
+  tf.sparse.reduce_sum(x, 1, keepdims=True) ==> [[2], [1]]
+  tf.sparse.reduce_sum(x, [0, 1]) ==> 3
   ```
 
   Args:
@@ -1049,7 +1061,8 @@ def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
       math_ops._ReductionDims(sp_input, axis, reduction_axes), keepdims)
 
 
-@tf_export("sparse_reduce_sum_sparse")
+@tf_export("sparse.reduce_sum_sparse", "sparse_reduce_sum_sparse")
+@deprecation.deprecated_endpoints("sparse_reduce_sum_sparse")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_sum_sparse(sp_input,
@@ -1099,7 +1112,8 @@ def sparse_reduce_sum_sparse(sp_input,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse_tensor_to_dense")
+@tf_export("sparse.to_dense", "sparse_tensor_to_dense")
+@deprecation.deprecated_endpoints("sparse_tensor_to_dense")
 def sparse_tensor_to_dense(sp_input,
                            default_value=0,
                            validate_indices=True,
@@ -1151,7 +1165,8 @@ def sparse_tensor_to_dense(sp_input,
       name=name)
 
 
-@tf_export("sparse_to_indicator")
+@tf_export("sparse.to_indicator", "sparse_to_indicator")
+@deprecation.deprecated_endpoints("sparse_to_indicator")
 def sparse_to_indicator(sp_input, vocab_size, name=None):
   """Converts a `SparseTensor` of ids into a dense bool indicator tensor.
 
@@ -1214,7 +1229,8 @@ def sparse_to_indicator(sp_input, vocab_size, name=None):
         sp_new, default_value=False, validate_indices=False, name=name)
 
 
-@tf_export("sparse_merge")
+@tf_export("sparse.merge", "sparse_merge")
+@deprecation.deprecated_endpoints("sparse_merge")
 def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
                  already_sorted=False):
   """Combines a batch of feature ids and values into a single `SparseTensor`.
@@ -1358,7 +1374,8 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
         sorted_result.indices, sorted_result.values, new_shape)
 
 
-@tf_export("sparse_retain")
+@tf_export("sparse.retain", "sparse_retain")
+@deprecation.deprecated_endpoints("sparse_retain")
 def sparse_retain(sp_input, to_retain):
   """Retains specified non-empty values within a `SparseTensor`.
 
@@ -1402,7 +1419,8 @@ def sparse_retain(sp_input, to_retain):
                                     array_ops.identity(sp_input.dense_shape))
 
 
-@tf_export("sparse_reset_shape")
+@tf_export("sparse.reset_shape", "sparse_reset_shape")
+@deprecation.deprecated_endpoints("sparse_reset_shape")
 def sparse_reset_shape(sp_input, new_shape=None):
   """Resets the shape of a `SparseTensor` with indices and values unchanged.
 
@@ -1503,7 +1521,8 @@ def sparse_reset_shape(sp_input, new_shape=None):
   return sparse_tensor.SparseTensor(in_indices, in_values, output_shape_tensor)
 
 
-@tf_export("sparse_fill_empty_rows")
+@tf_export("sparse.fill_empty_rows", "sparse_fill_empty_rows")
+@deprecation.deprecated_endpoints("sparse_fill_empty_rows")
 def sparse_fill_empty_rows(sp_input, default_value, name=None):
   """Fills empty rows in the input 2-D `SparseTensor` with a default value.
 
@@ -1567,7 +1586,8 @@ def sparse_fill_empty_rows(sp_input, default_value, name=None):
         dense_shape=sp_input.dense_shape), empty_row_indicator)
 
 
-@tf_export("serialize_sparse")
+@tf_export("io.serialize_sparse", "serialize_sparse")
+@deprecation.deprecated_endpoints("serialize_sparse")
 def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
   """Serialize a `SparseTensor` into a 3-vector (1-D `Tensor`) object.
 
@@ -1593,7 +1613,8 @@ def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
       out_type=out_type)
 
 
-@tf_export("serialize_many_sparse")
+@tf_export("io.serialize_many_sparse", "serialize_many_sparse")
+@deprecation.deprecated_endpoints("serialize_many_sparse")
 def serialize_many_sparse(sp_input, name=None, out_type=dtypes.string):
   """Serialize `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor`.
 
@@ -1694,7 +1715,8 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
   return sparse_tensor.SparseTensor(output_indices, output_values, output_shape)
 
 
-@tf_export("deserialize_many_sparse")
+@tf_export("io.deserialize_many_sparse", "deserialize_many_sparse")
+@deprecation.deprecated_endpoints("deserialize_many_sparse")
 def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
   """Deserialize and concatenate `SparseTensors` from a serialized minibatch.
 
@@ -1712,7 +1734,7 @@ def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
 
   The input `SparseTensor` objects' indices are assumed ordered in
   standard lexicographic order.  If this is not the case, after this
-  step run `sparse_reorder` to restore index ordering.
+  step run `sparse.reorder` to restore index ordering.
 
   For example, if the serialized input is a `[2, 3]` matrix representing two
   original `SparseTensor` objects:
@@ -1764,7 +1786,8 @@ def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
   return sparse_tensor.SparseTensor(output_indices, output_values, output_shape)
 
 
-@tf_export("sparse_tensor_dense_matmul")
+@tf_export("sparse.matmul", "sparse_tensor_dense_matmul")
+@deprecation.deprecated_endpoints("sparse_tensor_dense_matmul")
 def sparse_tensor_dense_matmul(sp_a,
                                b,
                                adjoint_a=False,
@@ -1777,7 +1800,7 @@ def sparse_tensor_dense_matmul(sp_a,
   following input format is recommended for optimal behavior:
 
   * If `adjoint_a == false`: `A` should be sorted in lexicographically
-    increasing order.  Use `sparse_reorder` if you're not sure.
+    increasing order.  Use `sparse.reorder` if you're not sure.
   * If `adjoint_a == true`: `A` should be sorted in order of increasing
     dimension 1 (i.e., "column major" order instead of "row major" order).
 
@@ -1981,7 +2004,8 @@ def sparse_tensor_dense_matmul(sp_a,
         adjoint_b=adjoint_b)
 
 
-@tf_export("sparse_softmax")
+@tf_export("sparse.softmax", "sparse_softmax")
+@deprecation.deprecated_endpoints("sparse_softmax")
 def sparse_softmax(sp_input, name=None):
   """Applies softmax to a batched N-D `SparseTensor`.
 
@@ -2036,7 +2060,8 @@ def sparse_softmax(sp_input, name=None):
                                       sp_input.dense_shape)
 
 
-@tf_export("sparse_maximum")
+@tf_export("sparse.maximum", "sparse_maximum")
+@deprecation.deprecated_endpoints("sparse_maximum")
 def sparse_maximum(sp_a, sp_b, name=None):
   """Returns the element-wise max of two SparseTensors.
 
@@ -2073,7 +2098,8 @@ def sparse_maximum(sp_a, sp_b, name=None):
   return sparse_tensor.SparseTensor(out_indices, out_values, sp_a.dense_shape)
 
 
-@tf_export("sparse_minimum")
+@tf_export("sparse.minimum", "sparse_minimum")
+@deprecation.deprecated_endpoints("sparse_minimum")
 def sparse_minimum(sp_a, sp_b, name=None):
   """Returns the element-wise min of two SparseTensors.
 
@@ -2110,7 +2136,8 @@ def sparse_minimum(sp_a, sp_b, name=None):
   return sparse_tensor.SparseTensor(out_indices, out_values, sp_a.dense_shape)
 
 
-@tf_export("sparse_transpose")
+@tf_export("sparse.transpose", "sparse_transpose")
+@deprecation.deprecated_endpoints("sparse_transpose")
 def sparse_transpose(sp_input, perm=None, name=None):
   """Transposes a `SparseTensor`
 
@@ -2259,7 +2286,7 @@ def _take_many_sparse_from_tensors_map(sparse_map_op,
 
   The input `SparseTensor` objects' indices are assumed ordered in
   standard lexicographic order.  If this is not the case, after this
-  step run `sparse_reorder` to restore index ordering.
+  step run `sparse.reorder` to restore index ordering.
 
   For example, if the serialized input is a `[2, 3]` matrix representing two
   original `SparseTensor` objects:
diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py
index 9a10abfcf7..cfab943896 100644
--- a/tensorflow/python/ops/special_math_ops.py
+++ b/tensorflow/python/ops/special_math_ops.py
@@ -29,11 +29,13 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
 # TODO(b/27419586) Change docstring for required dtype of x once int allowed
-@tf_export('lbeta')
+@tf_export('math.lbeta', 'lbeta')
+@deprecation.deprecated_endpoints('lbeta')
 def lbeta(x, name=None):
   r"""Computes \\(ln(|Beta(x)|)\\), reducing along the last dimension.
 
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 046a48d192..e83c08f643 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -310,8 +310,9 @@ def _reduce_join_reduction_dims(x, axis, reduction_indices):
     return math_ops.range(array_ops.rank(x) - 1, -1, -1)
 
 
-@tf_export("reduce_join")
-def reduce_join(inputs, axis=None,
+@tf_export("strings.reduce_join", "reduce_join")
+@deprecation.deprecated_endpoints("reduce_join")
+def reduce_join(inputs, axis=None,  # pylint: disable=missing-docstring
                 keep_dims=False,
                 separator="",
                 name=None,
@@ -329,6 +330,8 @@ def reduce_join(inputs, axis=None,
 
 reduce_join.__doc__ = deprecation.rewrite_argument_docstring(
     gen_string_ops.reduce_join.__doc__, "reduction_indices", "axis")
+reduce_join.__doc__ = reduce_join.__doc__.replace("tf.reduce_join(",
+                                                  "tf.strings.reduce_join(")
 
 
 # This wrapper provides backwards compatibility for code that predates the
diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py
index 8e7f123a85..8bf057f69d 100644
--- a/tensorflow/python/saved_model/builder_impl.py
+++ b/tensorflow/python/saved_model/builder_impl.py
@@ -36,10 +36,13 @@ from tensorflow.python.saved_model import utils_impl as saved_model_utils
 from tensorflow.python.training import saver as tf_saver
 from tensorflow.python.util import compat
 from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util.deprecation import deprecated_endpoints
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("saved_model.builder.SavedModelBuilder")
+@tf_export("saved_model.Builder",
+           "saved_model.builder.SavedModelBuilder")
+@deprecated_endpoints("saved_model.builder.SavedModelBuilder")
 class SavedModelBuilder(object):
   """Builds the `SavedModel` protocol buffer and saves variables and assets.
 
@@ -61,7 +64,7 @@ class SavedModelBuilder(object):
   Typical usage for the `SavedModelBuilder`:
   ```python
   ...
-  builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+  builder = tf.saved_model.Builder(export_dir)
 
   with tf.Session(graph=tf.Graph()) as sess:
     ...
diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py
index e8536108e8..895644a030 100644
--- a/tensorflow/python/saved_model/loader_impl.py
+++ b/tensorflow/python/saved_model/loader_impl.py
@@ -34,6 +34,7 @@ from tensorflow.python.saved_model import constants
 from tensorflow.python.saved_model import utils_impl as saved_model_utils
 from tensorflow.python.training import saver as tf_saver
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -144,7 +145,10 @@ def _get_main_op_tensor(
   return main_op_tensor
 
 
-@tf_export("saved_model.loader.maybe_saved_model_directory")
+@tf_export("saved_model.maybe_saved_model_directory",
+           "saved_model.loader.maybe_saved_model_directory")
+@deprecation.deprecated_endpoints(
+    "saved_model.loader.maybe_saved_model_directory")
 def maybe_saved_model_directory(export_dir):
   """Checks whether the provided export directory could contain a SavedModel.
 
@@ -165,7 +169,7 @@ def maybe_saved_model_directory(export_dir):
   return file_io.file_exists(txt_path) or file_io.file_exists(pb_path)
 
 
-@tf_export("saved_model.loader.load")
+@tf_export("saved_model.load", "saved_model.loader.load")
 def load(sess, tags, export_dir, import_scope=None, **saver_kwargs):
   """Loads the model from a SavedModel as specified by tags.
 
diff --git a/tensorflow/python/saved_model/main_op_impl.py b/tensorflow/python/saved_model/main_op_impl.py
index 631ee63729..ad4511b28e 100644
--- a/tensorflow/python/saved_model/main_op_impl.py
+++ b/tensorflow/python/saved_model/main_op_impl.py
@@ -22,6 +22,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -42,7 +43,9 @@ def main_op():
 
 
 # TODO(sukritiramesh): Integrate with Saver for complete restore functionality.
-@tf_export('saved_model.main_op.main_op_with_restore')
+@tf_export('saved_model.main_op_with_restore',
+           'saved_model.main_op.main_op_with_restore')
+@deprecation.deprecated_endpoints('saved_model.main_op.main_op_with_restore')
 def main_op_with_restore(restore_op_name):
   """Returns a main op to init variables, tables and restore the graph.
 
diff --git a/tensorflow/python/saved_model/signature_def_utils_impl.py b/tensorflow/python/saved_model/signature_def_utils_impl.py
index 37f927f381..a1034416e9 100644
--- a/tensorflow/python/saved_model/signature_def_utils_impl.py
+++ b/tensorflow/python/saved_model/signature_def_utils_impl.py
@@ -24,10 +24,14 @@ from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python.framework import ops
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import utils
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('saved_model.signature_def_utils.build_signature_def')
+@tf_export('saved_model.build_signature_def',
+           'saved_model.signature_def_utils.build_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.build_signature_def')
 def build_signature_def(inputs=None, outputs=None, method_name=None):
   """Utility function to build a SignatureDef protocol buffer.
 
@@ -53,7 +57,10 @@ def build_signature_def(inputs=None, outputs=None, method_name=None):
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.regression_signature_def')
+@tf_export('saved_model.regression_signature_def',
+           'saved_model.signature_def_utils.regression_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.regression_signature_def')
 def regression_signature_def(examples, predictions):
   """Creates regression signature from given examples and predictions.
 
@@ -95,7 +102,10 @@ def regression_signature_def(examples, predictions):
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.classification_signature_def')
+@tf_export('saved_model.classification_signature_def',
+           'saved_model.signature_def_utils.classification_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.classification_signature_def')
 def classification_signature_def(examples, classes, scores):
   """Creates classification signature from given examples and predictions.
 
@@ -148,7 +158,10 @@ def classification_signature_def(examples, classes, scores):
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.predict_signature_def')
+@tf_export('saved_model.predict_signature_def',
+           'saved_model.signature_def_utils.predict_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.predict_signature_def')
 def predict_signature_def(inputs, outputs):
   """Creates prediction signature from given inputs and outputs.
 
@@ -239,7 +252,10 @@ def _supervised_signature_def(
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.is_valid_signature')
+@tf_export('saved_model.is_valid_signature',
+           'saved_model.signature_def_utils.is_valid_signature')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.is_valid_signature')
 def is_valid_signature(signature_def):
   """Determine whether a SignatureDef can be served by TensorFlow Serving."""
   if signature_def is None:
@@ -313,4 +329,3 @@ def _is_valid_classification_signature(signature_def):
     return False
 
   return True
-
diff --git a/tensorflow/python/saved_model/utils_impl.py b/tensorflow/python/saved_model/utils_impl.py
index 06d09325c8..0bba7b6fac 100644
--- a/tensorflow/python/saved_model/utils_impl.py
+++ b/tensorflow/python/saved_model/utils_impl.py
@@ -27,13 +27,16 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.saved_model import constants
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
 # TensorInfo helpers.
 
 
-@tf_export("saved_model.utils.build_tensor_info")
+@tf_export("saved_model.build_tensor_info",
+           "saved_model.utils.build_tensor_info")
+@deprecation.deprecated_endpoints("saved_model.utils.build_tensor_info")
 def build_tensor_info(tensor):
   """Utility function to build TensorInfo proto.
 
@@ -57,7 +60,10 @@ def build_tensor_info(tensor):
   return tensor_info
 
 
-@tf_export("saved_model.utils.get_tensor_from_tensor_info")
+@tf_export("saved_model.get_tensor_from_tensor_info",
+           "saved_model.utils.get_tensor_from_tensor_info")
+@deprecation.deprecated_endpoints(
+    "saved_model.utils.get_tensor_from_tensor_info")
 def get_tensor_from_tensor_info(tensor_info, graph=None, import_scope=None):
   """Returns the Tensor or SparseTensor described by a TensorInfo proto.
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 92446e2f8f..5ce5410e0b 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -69,6 +69,7 @@ TENSORFLOW_API_INIT_FILES = [
     "profiler/__init__.py",
     "python_io/__init__.py",
     "quantization/__init__.py",
+    "random/__init__.py",
     "resource_loader/__init__.py",
     "strings/__init__.py",
     "saved_model/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index bc2f3516d1..587eb232f5 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -69,6 +69,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "profiler/__init__.py",
     "python_io/__init__.py",
     "quantization/__init__.py",
+    "random/__init__.py",
     "resource_loader/__init__.py",
     "strings/__init__.py",
     "saved_model/__init__.py",
diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py
index 9d9db70890..eb131ac9f7 100644
--- a/tensorflow/python/training/input.py
+++ b/tensorflow/python/training/input.py
@@ -56,7 +56,8 @@ _restore_sparse = sparse_ops._take_many_sparse_from_tensors_map
 # pylint: enable=protected-access
 
 
-@tf_export("train.match_filenames_once")
+@tf_export("io.match_filenames_once", "train.match_filenames_once")
+@deprecation.deprecated_endpoints("train.match_filenames_once")
 def match_filenames_once(pattern, name=None):
   """Save the list of files matching pattern, so it is only computed once.
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt
index d9efe97821..ab6287f8cd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt
@@ -1,5 +1,89 @@
 path: "tensorflow.debugging"
 tf_module {
+  member_method {
+    name: "Assert"
+    argspec: "args=[\'condition\', \'data\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_all_finite"
+    argspec: "args=[\'t\', \'msg\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_integer"
+    argspec: "args=[\'x\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_near"
+    argspec: "args=[\'x\', \'y\', \'rtol\', \'atol\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_none_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_proper_iterable"
+    argspec: "args=[\'values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "assert_rank"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_at_least"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_in"
+    argspec: "args=[\'x\', \'ranks\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_same_float_dtype"
+    argspec: "args=[\'tensors\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_scalar"
+    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_type"
+    argspec: "args=[\'tensor\', \'tf_type\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "check_numerics"
     argspec: "args=[\'tensor\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +100,16 @@ tf_module {
     name: "is_nan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "is_non_decreasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "is_numeric_tensor"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_strictly_increasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt
new file mode 100644
index 0000000000..423eca32a2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt
@@ -0,0 +1,77 @@
+path: "tensorflow.dtypes.DType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "as_datatype_enum"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "as_numpy_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "base_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_bool"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_complex"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_floating"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_integer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_numpy_compatible"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_quantized"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_unsigned"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "limits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "max"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "min"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "real_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'type_enum\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
index 98e1feed00..ea23feca84 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
@@ -1,7 +1,27 @@
 path: "tensorflow.dtypes"
 tf_module {
+  member {
+    name: "DType"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "as_dtype"
+    argspec: "args=[\'type_value\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "as_string"
     argspec: "args=[\'input\', \'precision\', \'scientific\', \'shortest\', \'width\', \'fill\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'False\', \'False\', \'-1\', \'\', \'None\'], "
   }
+  member_method {
+    name: "cast"
+    argspec: "args=[\'x\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "complex"
+    argspec: "args=[\'real\', \'imag\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "saturate_cast"
+    argspec: "args=[\'value\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt
index eeabf845dc..162ee76ee7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "extract_sub_graph"
     argspec: "args=[\'graph_def\', \'dest_nodes\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "import_graph_def"
+    argspec: "args=[\'graph_def\', \'input_map\', \'return_elements\', \'name\', \'op_dict\', \'producer_op_list\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "must_run_on_cpu"
     argspec: "args=[\'node\', \'pin_variables_on_cpu\'], varargs=None, keywords=None, defaults=[\'False\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt
index 5c46dc5ee7..0a231f1b65 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt
@@ -148,6 +148,10 @@ tf_module {
     name: "random_contrast"
     argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "random_crop"
+    argspec: "args=[\'value\', \'size\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "random_flip_left_right"
     argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt
index d499c67d89..19ca62122e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt
@@ -72,6 +72,10 @@ tf_module {
     name: "local_variables"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "tables_initializer"
+    argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'init_all_tables\'], "
+  }
   member_method {
     name: "variables"
     argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt
new file mode 100644
index 0000000000..cd0e51c8c7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.io.FixedLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt
new file mode 100644
index 0000000000..8a38f25fdf
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt
@@ -0,0 +1,31 @@
+path: "tensorflow.io.FixedLenSequenceFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "allow_missing"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
new file mode 100644
index 0000000000..85306fdcac
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PaddingFIFOQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PaddingFIFOQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'dtypes\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'padding_fifo_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt
new file mode 100644
index 0000000000..02d8037b34
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PriorityQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PriorityQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'types\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'priority_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt
new file mode 100644
index 0000000000..a30481a0ea
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt
@@ -0,0 +1,65 @@
+path: "tensorflow.io.QueueBase"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtypes\', \'shapes\', \'names\', \'queue_ref\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt
new file mode 100644
index 0000000000..82cbf9884f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.RandomShuffleQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.RandomShuffleQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'min_after_dequeue\', \'dtypes\', \'shapes\', \'names\', \'seed\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'random_shuffle_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt
new file mode 100644
index 0000000000..216947b4ed
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt
@@ -0,0 +1,35 @@
+path: "tensorflow.io.SparseFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "already_sorted"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "index_key"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "value_key"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt
new file mode 100644
index 0000000000..b598f73d7e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt
@@ -0,0 +1,20 @@
+path: "tensorflow.io.TFRecordCompressionType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordCompressionType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "GZIP"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "NONE"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "ZLIB"
+    mtype: "<type \'int\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt
new file mode 100644
index 0000000000..bfbf37ccf4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt
@@ -0,0 +1,17 @@
+path: "tensorflow.io.TFRecordOptions"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordOptions\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "compression_type_map"
+    mtype: "<type \'dict\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'compression_type\', \'flush_mode\', \'input_buffer_size\', \'output_buffer_size\', \'window_bits\', \'compression_level\', \'compression_method\', \'mem_level\', \'compression_strategy\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_compression_type_string"
+    argspec: "args=[\'cls\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6fd443f6d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.io.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flush"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'record\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt
new file mode 100644
index 0000000000..fd835dbfbb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt
@@ -0,0 +1,19 @@
+path: "tensorflow.io.VarLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
index 8938cf217b..dccf136788 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
@@ -1,5 +1,49 @@
 path: "tensorflow.io"
 tf_module {
+  member {
+    name: "FixedLenFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "FixedLenSequenceFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PaddingFIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PriorityQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "QueueBase"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomShuffleQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordCompressionType"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordOptions"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "VarLenFeature"
+    mtype: "<type \'type\'>"
+  }
   member_method {
     name: "decode_base64"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -8,6 +52,10 @@ tf_module {
     name: "decode_compressed"
     argspec: "args=[\'bytes\', \'compression_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], "
   }
+  member_method {
+    name: "decode_csv"
+    argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\', \'None\'], "
+  }
   member_method {
     name: "decode_json_example"
     argspec: "args=[\'json_examples\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,18 +64,38 @@ tf_module {
     name: "decode_raw"
     argspec: "args=[\'bytes\', \'out_type\', \'little_endian\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
   }
+  member_method {
+    name: "deserialize_many_sparse"
+    argspec: "args=[\'serialized_sparse\', \'dtype\', \'rank\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "encode_base64"
     argspec: "args=[\'input\', \'pad\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "match_filenames_once"
+    argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "matching_files"
     argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "parse_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "parse_sequence_example"
     argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_names\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "parse_single_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_single_sequence_example"
+    argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "parse_tensor"
     argspec: "args=[\'serialized\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -36,8 +104,24 @@ tf_module {
     name: "read_file"
     argspec: "args=[\'filename\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "serialize_many_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "serialize_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "tf_record_iterator"
+    argspec: "args=[\'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "write_file"
     argspec: "args=[\'filename\', \'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "write_graph"
+    argspec: "args=[\'graph_or_graph_def\', \'logdir\', \'name\', \'as_text\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
index d979116887..6ac95d96da 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
@@ -108,10 +108,18 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "global_norm"
+    argspec: "args=[\'t_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "inv"
     argspec: "args=[\'input\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logdet"
     argspec: "args=[\'matrix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -124,6 +132,10 @@ tf_module {
     name: "lstsq"
     argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
   }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'a\', \'b\', \'transpose_a\', \'transpose_b\', \'adjoint_a\', \'adjoint_b\', \'a_is_sparse\', \'b_is_sparse\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "norm"
     argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
index 72856466ec..459b9e3684 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.math"
 tf_module {
+  member_method {
+    name: "abs"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "accumulate_n"
+    argspec: "args=[\'inputs\', \'shape\', \'tensor_dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "acos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -12,6 +20,22 @@ tf_module {
     name: "add"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "add_n"
+    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "angle"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "argmax"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "argmin"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
   member_method {
     name: "asin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -52,10 +76,18 @@ tf_module {
     name: "betainc"
     argspec: "args=[\'a\', \'b\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "bincount"
+    argspec: "args=[\'arr\', \'weights\', \'minlength\', \'maxlength\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int32\'>\"], "
+  }
   member_method {
     name: "ceil"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "conj"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "cos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -64,14 +96,34 @@ tf_module {
     name: "cosh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "count_nonzero"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'int64\'>\", \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "cumprod"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "cumsum"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "digamma"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "divide"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "erf"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -88,6 +140,10 @@ tf_module {
     name: "floor"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "floordiv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "greater"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -104,10 +160,26 @@ tf_module {
     name: "igammac"
     argspec: "args=[\'a\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "imag"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "in_top_k"
+    argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "invert_permutation"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "lbeta"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -128,6 +200,14 @@ tf_module {
     name: "log1p"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "log_sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "log_softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logical_and"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -140,6 +220,10 @@ tf_module {
     name: "logical_or"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "logical_xor"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'LogicalXor\'], "
+  }
   member_method {
     name: "maximum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -148,6 +232,14 @@ tf_module {
     name: "minimum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "multiply"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "negative"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "not_equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -160,18 +252,66 @@ tf_module {
     name: "polyval"
     argspec: "args=[\'coeffs\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "pow"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "real"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "reciprocal"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "reduce_all"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_any"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_logsumexp"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_mean"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_min"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_prod"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "round"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "rsqrt"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "scalar_mul"
+    argspec: "args=[\'scalar\', \'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -192,6 +332,14 @@ tf_module {
     name: "segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "sign"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "sin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -200,6 +348,10 @@ tf_module {
     name: "sinh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "softplus"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -208,18 +360,46 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sqrt"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "square"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "squared_difference"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "subtract"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "tan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "tanh"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "top_k"
+    argspec: "args=[\'input\', \'k\', \'sorted\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "truediv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_mean"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_min"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -228,6 +408,10 @@ tf_module {
     name: "unsorted_segment_prod"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_sqrt_n"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -240,6 +424,10 @@ tf_module {
     name: "xlogy"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "zero_fraction"
+    argspec: "args=[\'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "zeta"
     argspec: "args=[\'x\', \'q\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
index d9e5b0d0fc..9b28ce5746 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
@@ -100,6 +100,10 @@ tf_module {
     name: "ctc_loss"
     argspec: "args=[\'labels\', \'inputs\', \'sequence_length\', \'preprocess_collapse_repeated\', \'ctc_merge_repeated\', \'ignore_longer_outputs_than_inputs\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'False\', \'True\'], "
   }
+  member_method {
+    name: "depth_to_space"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "depthwise_conv2d"
     argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'rate\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
@@ -304,6 +308,14 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "space_to_batch"
+    argspec: "args=[\'input\', \'paddings\', \'block_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "space_to_depth"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "sparse_softmax_cross_entropy_with_logits"
     argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 509ceff9df..a268529c1f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -496,6 +496,10 @@ tf_module {
     name: "quint8"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "random"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "random_normal_initializer"
     mtype: "<type \'type\'>"
@@ -1744,6 +1748,10 @@ tf_module {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "roll"
+    argspec: "args=[\'input\', \'shift\', \'axis\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "round"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
index 6d865efed0..77c92aeb0d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
@@ -28,6 +28,10 @@ tf_module {
     name: "fake_quant_with_min_max_vars_per_channel_gradient"
     argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'num_bits\', \'narrow_range\', \'name\'], varargs=None, keywords=None, defaults=[\'8\', \'False\', \'None\'], "
   }
+  member_method {
+    name: "quantize"
+    argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\'], "
+  }
   member_method {
     name: "quantized_concat"
     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
new file mode 100644
index 0000000000..a568dd4cd8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
@@ -0,0 +1,47 @@
+path: "tensorflow.random"
+tf_module {
+  member_method {
+    name: "gamma"
+    argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_seed"
+    argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "log_uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "multinomial"
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "poisson"
+    argspec: "args=[\'lam\', \'shape\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_random_seed"
+    argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'value\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "truncated_normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform"
+    argspec: "args=[\'shape\', \'minval\', \'maxval\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt
new file mode 100644
index 0000000000..67457de070
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.saved_model.Builder"
+tf_class {
+  is_instance: "<class \'tensorflow.python.saved_model.builder_impl.SavedModelBuilder\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "add_meta_graph"
+    argspec: "args=[\'self\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "add_meta_graph_and_variables"
+    argspec: "args=[\'self\', \'sess\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "save"
+    argspec: "args=[\'self\', \'as_text\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
index e1a0385092..3f4965fc69 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.saved_model"
 tf_module {
+  member {
+    name: "Builder"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "builder"
     mtype: "<type \'module\'>"
@@ -32,6 +36,46 @@ tf_module {
     name: "utils"
     mtype: "<type \'module\'>"
   }
+  member_method {
+    name: "build_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "build_tensor_info"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "classification_signature_def"
+    argspec: "args=[\'examples\', \'classes\', \'scores\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_tensor_from_tensor_info"
+    argspec: "args=[\'tensor_info\', \'graph\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "is_valid_signature"
+    argspec: "args=[\'signature_def\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "load"
+    argspec: "args=[\'sess\', \'tags\', \'export_dir\', \'import_scope\'], varargs=None, keywords=saver_kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "main_op_with_restore"
+    argspec: "args=[\'restore_op_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "maybe_saved_model_directory"
+    argspec: "args=[\'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "predict_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "regression_signature_def"
+    argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "simple_save"
     argspec: "args=[\'session\', \'export_dir\', \'inputs\', \'outputs\', \'legacy_init_op\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
new file mode 100644
index 0000000000..cd97716c9d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
@@ -0,0 +1,46 @@
+path: "tensorflow.sparse.SparseConditionalAccumulator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.SparseConditionalAccumulator\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.ConditionalAccumulatorBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "accumulator_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'shape\', \'shared_name\', \'name\', \'reduction_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'sparse_conditional_accumulator\', \'MEAN\'], "
+  }
+  member_method {
+    name: "apply_grad"
+    argspec: "args=[\'self\', \'grad_indices\', \'grad_values\', \'grad_shape\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
+  }
+  member_method {
+    name: "apply_indexed_slices_grad"
+    argspec: "args=[\'self\', \'grad\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
+  }
+  member_method {
+    name: "num_accumulated"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "set_global_step"
+    argspec: "args=[\'self\', \'new_global_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_indexed_slices_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt
new file mode 100644
index 0000000000..02e59a63e1
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt
@@ -0,0 +1,54 @@
+path: "tensorflow.sparse.SparseTensor"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.sparse_tensor.SparseTensor\'>"
+  is_instance: "<class \'tensorflow.python.framework.ops._TensorLike\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dense_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "graph"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "indices"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "op"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'indices\', \'values\', \'dense_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "consumers"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "eval"
+    argspec: "args=[\'self\', \'feed_dict\', \'session\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_shape"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
index ba9e651b34..32bd8d5f8e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
@@ -1,5 +1,21 @@
 path: "tensorflow.sparse"
 tf_module {
+  member {
+    name: "SparseConditionalAccumulator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseTensor"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "add"
+    argspec: "args=[\'a\', \'b\', \'thresh\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "concat"
+    argspec: "args=[\'axis\', \'sp_inputs\', \'name\', \'expand_nonconcat_dim\', \'concat_dim\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
   member_method {
     name: "cross"
     argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +32,100 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "fill_empty_rows"
+    argspec: "args=[\'sp_input\', \'default_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "mask"
+    argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'sp_a\', \'b\', \'adjoint_a\', \'adjoint_b\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "maximum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'sp_ids\', \'sp_values\', \'vocab_size\', \'name\', \'already_sorted\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "minimum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "placeholder"
+    argspec: "args=[\'dtype\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reorder"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reset_shape"
+    argspec: "args=[\'sp_input\', \'new_shape\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reshape"
+    argspec: "args=[\'sp_input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "retain"
+    argspec: "args=[\'sp_input\', \'to_retain\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "segment_mean"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sqrt_n"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sum"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "slice"
+    argspec: "args=[\'sp_input\', \'start\', \'size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "split"
+    argspec: "args=[\'keyword_required\', \'sp_input\', \'num_split\', \'axis\', \'name\', \'split_dim\'], varargs=None, keywords=None, defaults=[\'KeywordRequired()\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "to_dense"
+    argspec: "args=[\'sp_input\', \'default_value\', \'validate_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "to_indicator"
+    argspec: "args=[\'sp_input\', \'vocab_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "transpose"
+    argspec: "args=[\'sp_input\', \'perm\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index 312e94b41d..ebdaf57231 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "length"
     argspec: "args=[\'input\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
+  member_method {
+    name: "reduce_join"
+    argspec: "args=[\'inputs\', \'axis\', \'keep_dims\', \'separator\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'\', \'None\', \'None\'], "
+  }
   member_method {
     name: "regex_full_match"
     argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
index 9f35395284..45c81fdd3b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
@@ -272,6 +272,10 @@ tf_module {
     name: "checkpoint_exists"
     argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "confusion_matrix"
+    argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'int32\'>\", \'None\', \'None\'], "
+  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt
index d9efe97821..ab6287f8cd 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt
@@ -1,5 +1,89 @@
 path: "tensorflow.debugging"
 tf_module {
+  member_method {
+    name: "Assert"
+    argspec: "args=[\'condition\', \'data\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_all_finite"
+    argspec: "args=[\'t\', \'msg\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_integer"
+    argspec: "args=[\'x\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_near"
+    argspec: "args=[\'x\', \'y\', \'rtol\', \'atol\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_none_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_proper_iterable"
+    argspec: "args=[\'values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "assert_rank"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_at_least"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_in"
+    argspec: "args=[\'x\', \'ranks\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_same_float_dtype"
+    argspec: "args=[\'tensors\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_scalar"
+    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_type"
+    argspec: "args=[\'tensor\', \'tf_type\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "check_numerics"
     argspec: "args=[\'tensor\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +100,16 @@ tf_module {
     name: "is_nan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "is_non_decreasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "is_numeric_tensor"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_strictly_increasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt
new file mode 100644
index 0000000000..423eca32a2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt
@@ -0,0 +1,77 @@
+path: "tensorflow.dtypes.DType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "as_datatype_enum"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "as_numpy_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "base_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_bool"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_complex"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_floating"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_integer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_numpy_compatible"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_quantized"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_unsigned"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "limits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "max"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "min"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "real_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'type_enum\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
index 98e1feed00..ea23feca84 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
@@ -1,7 +1,27 @@
 path: "tensorflow.dtypes"
 tf_module {
+  member {
+    name: "DType"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "as_dtype"
+    argspec: "args=[\'type_value\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "as_string"
     argspec: "args=[\'input\', \'precision\', \'scientific\', \'shortest\', \'width\', \'fill\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'False\', \'False\', \'-1\', \'\', \'None\'], "
   }
+  member_method {
+    name: "cast"
+    argspec: "args=[\'x\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "complex"
+    argspec: "args=[\'real\', \'imag\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "saturate_cast"
+    argspec: "args=[\'value\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt
index eeabf845dc..162ee76ee7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "extract_sub_graph"
     argspec: "args=[\'graph_def\', \'dest_nodes\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "import_graph_def"
+    argspec: "args=[\'graph_def\', \'input_map\', \'return_elements\', \'name\', \'op_dict\', \'producer_op_list\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "must_run_on_cpu"
     argspec: "args=[\'node\', \'pin_variables_on_cpu\'], varargs=None, keywords=None, defaults=[\'False\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt
index 5c46dc5ee7..0a231f1b65 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt
@@ -148,6 +148,10 @@ tf_module {
     name: "random_contrast"
     argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "random_crop"
+    argspec: "args=[\'value\', \'size\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "random_flip_left_right"
     argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
index e3c63fe737..d49181714f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
@@ -64,4 +64,8 @@ tf_module {
     name: "lecun_uniform"
     argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "tables_initializer"
+    argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'init_all_tables\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt
new file mode 100644
index 0000000000..cd0e51c8c7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.io.FixedLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt
new file mode 100644
index 0000000000..8a38f25fdf
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt
@@ -0,0 +1,31 @@
+path: "tensorflow.io.FixedLenSequenceFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "allow_missing"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
new file mode 100644
index 0000000000..85306fdcac
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PaddingFIFOQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PaddingFIFOQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'dtypes\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'padding_fifo_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
new file mode 100644
index 0000000000..02d8037b34
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PriorityQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PriorityQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'types\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'priority_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
new file mode 100644
index 0000000000..a30481a0ea
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
@@ -0,0 +1,65 @@
+path: "tensorflow.io.QueueBase"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtypes\', \'shapes\', \'names\', \'queue_ref\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
new file mode 100644
index 0000000000..82cbf9884f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.RandomShuffleQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.RandomShuffleQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'min_after_dequeue\', \'dtypes\', \'shapes\', \'names\', \'seed\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'random_shuffle_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt
new file mode 100644
index 0000000000..216947b4ed
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt
@@ -0,0 +1,35 @@
+path: "tensorflow.io.SparseFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "already_sorted"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "index_key"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "value_key"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt
new file mode 100644
index 0000000000..b598f73d7e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt
@@ -0,0 +1,20 @@
+path: "tensorflow.io.TFRecordCompressionType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordCompressionType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "GZIP"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "NONE"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "ZLIB"
+    mtype: "<type \'int\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt
new file mode 100644
index 0000000000..bfbf37ccf4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt
@@ -0,0 +1,17 @@
+path: "tensorflow.io.TFRecordOptions"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordOptions\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "compression_type_map"
+    mtype: "<type \'dict\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'compression_type\', \'flush_mode\', \'input_buffer_size\', \'output_buffer_size\', \'window_bits\', \'compression_level\', \'compression_method\', \'mem_level\', \'compression_strategy\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_compression_type_string"
+    argspec: "args=[\'cls\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6fd443f6d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.io.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flush"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'record\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt
new file mode 100644
index 0000000000..fd835dbfbb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt
@@ -0,0 +1,19 @@
+path: "tensorflow.io.VarLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
index 8938cf217b..dccf136788 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
@@ -1,5 +1,49 @@
 path: "tensorflow.io"
 tf_module {
+  member {
+    name: "FixedLenFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "FixedLenSequenceFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PaddingFIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PriorityQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "QueueBase"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomShuffleQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordCompressionType"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordOptions"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "VarLenFeature"
+    mtype: "<type \'type\'>"
+  }
   member_method {
     name: "decode_base64"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -8,6 +52,10 @@ tf_module {
     name: "decode_compressed"
     argspec: "args=[\'bytes\', \'compression_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], "
   }
+  member_method {
+    name: "decode_csv"
+    argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\', \'None\'], "
+  }
   member_method {
     name: "decode_json_example"
     argspec: "args=[\'json_examples\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,18 +64,38 @@ tf_module {
     name: "decode_raw"
     argspec: "args=[\'bytes\', \'out_type\', \'little_endian\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
   }
+  member_method {
+    name: "deserialize_many_sparse"
+    argspec: "args=[\'serialized_sparse\', \'dtype\', \'rank\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "encode_base64"
     argspec: "args=[\'input\', \'pad\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "match_filenames_once"
+    argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "matching_files"
     argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "parse_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "parse_sequence_example"
     argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_names\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "parse_single_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_single_sequence_example"
+    argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "parse_tensor"
     argspec: "args=[\'serialized\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -36,8 +104,24 @@ tf_module {
     name: "read_file"
     argspec: "args=[\'filename\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "serialize_many_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "serialize_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "tf_record_iterator"
+    argspec: "args=[\'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "write_file"
     argspec: "args=[\'filename\', \'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "write_graph"
+    argspec: "args=[\'graph_or_graph_def\', \'logdir\', \'name\', \'as_text\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
index d979116887..6ac95d96da 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
@@ -108,10 +108,18 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "global_norm"
+    argspec: "args=[\'t_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "inv"
     argspec: "args=[\'input\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logdet"
     argspec: "args=[\'matrix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -124,6 +132,10 @@ tf_module {
     name: "lstsq"
     argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
   }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'a\', \'b\', \'transpose_a\', \'transpose_b\', \'adjoint_a\', \'adjoint_b\', \'a_is_sparse\', \'b_is_sparse\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "norm"
     argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
index 72856466ec..459b9e3684 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.math"
 tf_module {
+  member_method {
+    name: "abs"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "accumulate_n"
+    argspec: "args=[\'inputs\', \'shape\', \'tensor_dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "acos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -12,6 +20,22 @@ tf_module {
     name: "add"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "add_n"
+    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "angle"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "argmax"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "argmin"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
   member_method {
     name: "asin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -52,10 +76,18 @@ tf_module {
     name: "betainc"
     argspec: "args=[\'a\', \'b\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "bincount"
+    argspec: "args=[\'arr\', \'weights\', \'minlength\', \'maxlength\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int32\'>\"], "
+  }
   member_method {
     name: "ceil"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "conj"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "cos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -64,14 +96,34 @@ tf_module {
     name: "cosh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "count_nonzero"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'int64\'>\", \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "cumprod"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "cumsum"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "digamma"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "divide"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "erf"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -88,6 +140,10 @@ tf_module {
     name: "floor"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "floordiv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "greater"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -104,10 +160,26 @@ tf_module {
     name: "igammac"
     argspec: "args=[\'a\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "imag"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "in_top_k"
+    argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "invert_permutation"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "lbeta"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -128,6 +200,14 @@ tf_module {
     name: "log1p"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "log_sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "log_softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logical_and"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -140,6 +220,10 @@ tf_module {
     name: "logical_or"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "logical_xor"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'LogicalXor\'], "
+  }
   member_method {
     name: "maximum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -148,6 +232,14 @@ tf_module {
     name: "minimum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "multiply"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "negative"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "not_equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -160,18 +252,66 @@ tf_module {
     name: "polyval"
     argspec: "args=[\'coeffs\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "pow"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "real"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "reciprocal"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "reduce_all"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_any"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_logsumexp"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_mean"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_min"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_prod"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "round"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "rsqrt"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "scalar_mul"
+    argspec: "args=[\'scalar\', \'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -192,6 +332,14 @@ tf_module {
     name: "segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "sign"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "sin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -200,6 +348,10 @@ tf_module {
     name: "sinh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "softplus"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -208,18 +360,46 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sqrt"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "square"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "squared_difference"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "subtract"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "tan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "tanh"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "top_k"
+    argspec: "args=[\'input\', \'k\', \'sorted\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "truediv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_mean"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_min"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -228,6 +408,10 @@ tf_module {
     name: "unsorted_segment_prod"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_sqrt_n"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -240,6 +424,10 @@ tf_module {
     name: "xlogy"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "zero_fraction"
+    argspec: "args=[\'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "zeta"
     argspec: "args=[\'x\', \'q\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
index d9e5b0d0fc..9b28ce5746 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
@@ -100,6 +100,10 @@ tf_module {
     name: "ctc_loss"
     argspec: "args=[\'labels\', \'inputs\', \'sequence_length\', \'preprocess_collapse_repeated\', \'ctc_merge_repeated\', \'ignore_longer_outputs_than_inputs\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'False\', \'True\'], "
   }
+  member_method {
+    name: "depth_to_space"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "depthwise_conv2d"
     argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'rate\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
@@ -304,6 +308,14 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "space_to_batch"
+    argspec: "args=[\'input\', \'paddings\', \'block_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "space_to_depth"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "sparse_softmax_cross_entropy_with_logits"
     argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index d2dc8bc85f..5b3ea75bce 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -456,6 +456,10 @@ tf_module {
     name: "quint8"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "random"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "random_normal_initializer"
     mtype: "<type \'type\'>"
@@ -1608,6 +1612,10 @@ tf_module {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "roll"
+    argspec: "args=[\'input\', \'shift\', \'axis\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "round"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
index 6d865efed0..77c92aeb0d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
@@ -28,6 +28,10 @@ tf_module {
     name: "fake_quant_with_min_max_vars_per_channel_gradient"
     argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'num_bits\', \'narrow_range\', \'name\'], varargs=None, keywords=None, defaults=[\'8\', \'False\', \'None\'], "
   }
+  member_method {
+    name: "quantize"
+    argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\'], "
+  }
   member_method {
     name: "quantized_concat"
     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
new file mode 100644
index 0000000000..a568dd4cd8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
@@ -0,0 +1,47 @@
+path: "tensorflow.random"
+tf_module {
+  member_method {
+    name: "gamma"
+    argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_seed"
+    argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "log_uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "multinomial"
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "poisson"
+    argspec: "args=[\'lam\', \'shape\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_random_seed"
+    argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'value\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "truncated_normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform"
+    argspec: "args=[\'shape\', \'minval\', \'maxval\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt
new file mode 100644
index 0000000000..67457de070
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.saved_model.Builder"
+tf_class {
+  is_instance: "<class \'tensorflow.python.saved_model.builder_impl.SavedModelBuilder\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "add_meta_graph"
+    argspec: "args=[\'self\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "add_meta_graph_and_variables"
+    argspec: "args=[\'self\', \'sess\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "save"
+    argspec: "args=[\'self\', \'as_text\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
index e1a0385092..3f4965fc69 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.saved_model"
 tf_module {
+  member {
+    name: "Builder"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "builder"
     mtype: "<type \'module\'>"
@@ -32,6 +36,46 @@ tf_module {
     name: "utils"
     mtype: "<type \'module\'>"
   }
+  member_method {
+    name: "build_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "build_tensor_info"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "classification_signature_def"
+    argspec: "args=[\'examples\', \'classes\', \'scores\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_tensor_from_tensor_info"
+    argspec: "args=[\'tensor_info\', \'graph\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "is_valid_signature"
+    argspec: "args=[\'signature_def\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "load"
+    argspec: "args=[\'sess\', \'tags\', \'export_dir\', \'import_scope\'], varargs=None, keywords=saver_kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "main_op_with_restore"
+    argspec: "args=[\'restore_op_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "maybe_saved_model_directory"
+    argspec: "args=[\'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "predict_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "regression_signature_def"
+    argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "simple_save"
     argspec: "args=[\'session\', \'export_dir\', \'inputs\', \'outputs\', \'legacy_init_op\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
new file mode 100644
index 0000000000..cd97716c9d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
@@ -0,0 +1,46 @@
+path: "tensorflow.sparse.SparseConditionalAccumulator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.SparseConditionalAccumulator\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.ConditionalAccumulatorBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "accumulator_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'shape\', \'shared_name\', \'name\', \'reduction_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'sparse_conditional_accumulator\', \'MEAN\'], "
+  }
+  member_method {
+    name: "apply_grad"
+    argspec: "args=[\'self\', \'grad_indices\', \'grad_values\', \'grad_shape\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
+  }
+  member_method {
+    name: "apply_indexed_slices_grad"
+    argspec: "args=[\'self\', \'grad\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
+  }
+  member_method {
+    name: "num_accumulated"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "set_global_step"
+    argspec: "args=[\'self\', \'new_global_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_indexed_slices_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt
new file mode 100644
index 0000000000..02e59a63e1
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt
@@ -0,0 +1,54 @@
+path: "tensorflow.sparse.SparseTensor"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.sparse_tensor.SparseTensor\'>"
+  is_instance: "<class \'tensorflow.python.framework.ops._TensorLike\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dense_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "graph"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "indices"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "op"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'indices\', \'values\', \'dense_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "consumers"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "eval"
+    argspec: "args=[\'self\', \'feed_dict\', \'session\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_shape"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
index ba9e651b34..32bd8d5f8e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
@@ -1,5 +1,21 @@
 path: "tensorflow.sparse"
 tf_module {
+  member {
+    name: "SparseConditionalAccumulator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseTensor"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "add"
+    argspec: "args=[\'a\', \'b\', \'thresh\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "concat"
+    argspec: "args=[\'axis\', \'sp_inputs\', \'name\', \'expand_nonconcat_dim\', \'concat_dim\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
   member_method {
     name: "cross"
     argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +32,100 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "fill_empty_rows"
+    argspec: "args=[\'sp_input\', \'default_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "mask"
+    argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'sp_a\', \'b\', \'adjoint_a\', \'adjoint_b\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "maximum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'sp_ids\', \'sp_values\', \'vocab_size\', \'name\', \'already_sorted\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "minimum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "placeholder"
+    argspec: "args=[\'dtype\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reorder"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reset_shape"
+    argspec: "args=[\'sp_input\', \'new_shape\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reshape"
+    argspec: "args=[\'sp_input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "retain"
+    argspec: "args=[\'sp_input\', \'to_retain\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "segment_mean"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sqrt_n"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sum"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "slice"
+    argspec: "args=[\'sp_input\', \'start\', \'size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "split"
+    argspec: "args=[\'keyword_required\', \'sp_input\', \'num_split\', \'axis\', \'name\', \'split_dim\'], varargs=None, keywords=None, defaults=[\'KeywordRequired()\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "to_dense"
+    argspec: "args=[\'sp_input\', \'default_value\', \'validate_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "to_indicator"
+    argspec: "args=[\'sp_input\', \'vocab_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "transpose"
+    argspec: "args=[\'sp_input\', \'perm\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index 312e94b41d..ebdaf57231 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "length"
     argspec: "args=[\'input\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
+  member_method {
+    name: "reduce_join"
+    argspec: "args=[\'inputs\', \'axis\', \'keep_dims\', \'separator\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'\', \'None\', \'None\'], "
+  }
   member_method {
     name: "regex_full_match"
     argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index cb6da5088b..7e980fe44d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -252,6 +252,10 @@ tf_module {
     name: "checkpoint_exists"
     argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "confusion_matrix"
+    argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'int32\'>\", \'None\', \'None\'], "
+  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
-- 
GitLab


From 694367b574dcaf5ac90f3e42b8dee8fa51ca9f38 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 11:58:17 -0700
Subject: [PATCH 0955/1357] Automated rollback of commit
 cb98ceba9cff8c10ee3c7e89dc8925c88b28118e

PiperOrigin-RevId: 215254762
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++--
 tensorflow/core/protobuf/rewriter_config.proto        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index a5f851fb1a..c3d70a1fdf 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -139,7 +139,7 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
-  if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) {
+  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
@@ -527,7 +527,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
-         cfg.pin_to_host_optimization() != RewriterConfig::OFF ||
+         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 8e0448d536..8c31468ff5 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -75,7 +75,7 @@ message RewriterConfig {
   // Try to allocate some independent Op outputs contiguously in order to
   // merge or eliminate downstream Ops (off by default).
   Toggle scoped_allocator_optimization = 15;
-  // Force small ops onto the CPU (default is ON).
+  // Force small ops onto the CPU (default is OFF).
   Toggle pin_to_host_optimization = 18;
   // Disable the entire meta optimizer (off by default).
   bool disable_meta_optimizer = 19;
-- 
GitLab


From c4b3ce081b8abfae5560814ec445f0169cb4c368 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 1 Oct 2018 12:03:53 -0700
Subject: [PATCH 0956/1357] Add new attributes for the defun forward/backward
 functions.

PiperOrigin-RevId: 215255826
---
 tensorflow/python/eager/function.py      | 39 ++++++++++++++++++------
 tensorflow/python/eager/function_test.py | 15 +++++++++
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index dd3e1a3723..60a4f018cd 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 import collections
 import functools
+import re
 import sys
 import threading
 import weakref
@@ -61,9 +62,15 @@ cond_v2_impl._function = sys.modules[__name__]  # pylint: disable=protected-acce
 # This is to avoid a circular dependency with gradients_impl
 gradients_impl._function = sys.modules[__name__]  # pylint: disable=protected-access
 
+FORWARD_FUNCTION_ATTRIBUTE_NAME = "forward_function_name"
+BACKWARD_FUNCTION_ATTRIBUTE_NAME = "backward_function_name"
 
 # TODO(scottzhu): Update this to allow arbitrary attribute names in future.
-WHITELIST_FUNCTION_ATTRIBUTE_PREFIX = "experimental_"
+WHITELIST_FUNCTION_ATTRIBUTE_REGEX = [
+    "experimental_.*",
+    FORWARD_FUNCTION_ATTRIBUTE_NAME,
+    BACKWARD_FUNCTION_ATTRIBUTE_NAME
+]
 
 
 def _create_substitute_placeholder(value, name=None, dtype=None):
@@ -140,10 +147,11 @@ def _parse_func_attrs(attributes):
   """
   attrs = {}
   for key, value in attributes.items():
-    if not key.startswith(WHITELIST_FUNCTION_ATTRIBUTE_PREFIX):
+    if not any([re.match(reg, key)
+                for reg in WHITELIST_FUNCTION_ATTRIBUTE_REGEX]):
       raise ValueError("Attribute name is not whitelisted. "
                        "Whitelisted: prefix %s, got: %s" %
-                       (WHITELIST_FUNCTION_ATTRIBUTE_PREFIX, key))
+                       (WHITELIST_FUNCTION_ATTRIBUTE_REGEX, key))
 
     if isinstance(value, attr_value_pb2.AttrValue):
       attrs[key] = value
@@ -154,7 +162,7 @@ def _parse_func_attrs(attributes):
       attrs[key] = attr_value_pb2.AttrValue(i=value)
     elif isinstance(value, float):
       attrs[key] = attr_value_pb2.AttrValue(f=value)
-    elif isinstance(value, str):
+    elif isinstance(value, (str, bytes)):
       attrs[key] = attr_value_pb2.AttrValue(s=compat.as_bytes(value))
     else:
       raise ValueError("Unsupported attribute type for %s with type %s" %
@@ -705,6 +713,7 @@ class Function(object):
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
+    forward_function_name = _forward_name(self._func_graph.name)
     with backwards_graph.as_default():
       gradients_wrt_outputs = [
           graph_placeholder(x.dtype, x.shape) for x in self._func_graph.outputs
@@ -715,11 +724,11 @@ class Function(object):
           grad_ys=gradients_wrt_outputs,
           src_graph=self._func_graph)
 
-    self._forward_function = _EagerDefinedFunction(
-        _forward_name(
-            self._func_graph.name), self._func_graph, self._func_graph.inputs,
-        self._func_graph.outputs + list(backwards_graph.captures.keys()),
-        self._attrs)
+    backwards_graph_captures = list(backwards_graph.captures.keys())
+
+    backward_function_attr = _parse_func_attrs(
+        {FORWARD_FUNCTION_ATTRIBUTE_NAME: forward_function_name})
+    backward_function_attr.update(self._attrs)
 
     # The ordering of `backwards_graph.inputs` is important: inputs of
     # `self._backward_graph_function` correspond to outputs of
@@ -732,7 +741,17 @@ class Function(object):
         grad for grad in _flatten(gradients_wrt_inputs) if grad is not None)
     backwards_graph.structured_outputs = gradients_wrt_inputs
     self._backward_graph_function = Function(
-        backwards_graph, attrs=self._attrs)
+        backwards_graph, attrs=backward_function_attr)
+
+    forward_function_attr = _parse_func_attrs({
+        BACKWARD_FUNCTION_ATTRIBUTE_NAME:
+            self._backward_graph_function._inference_function.name})  # pylint: disable=protected-access
+    forward_function_attr.update(self._attrs)
+
+    self._forward_function = _EagerDefinedFunction(
+        forward_function_name, self._func_graph, self._func_graph.inputs,
+        self._func_graph.outputs + backwards_graph_captures,
+        forward_function_attr)
 
   def _backprop_call(self, args):
     """Calls the forward function and records the result on a tape.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 34a2648e26..afe3ba9893 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1687,6 +1687,21 @@ class FunctionTest(test.TestCase):
           self.assertRegexpMatches(captured_function_names[i],
                                    expected_func_name_regex[i])
 
+        # Check the forward and backward function has the correct attributes.
+        self.assertEquals(
+            functions[1].definition.attr['backward_function_name'].s,
+            functions[2].name)
+        self.assertEquals(
+            functions[2].definition.attr['forward_function_name'].s,
+            functions[1].name)
+
+        self.assertEquals(
+            functions[4].definition.attr['backward_function_name'].s,
+            functions[5].name)
+        self.assertEquals(
+            functions[5].definition.attr['forward_function_name'].s,
+            functions[4].name)
+
         sq = defun_matmul(t, t)
         double = add(t, t)
         self.assertAllEqual(sq.eval().reshape(-1), [7, 10, 15, 22])
-- 
GitLab


From f0c219d095f38f7ce6febfb68d4f84d64aa1829a Mon Sep 17 00:00:00 2001
From: Youlong Cheng <ylc@google.com>
Date: Mon, 1 Oct 2018 12:28:32 -0700
Subject: [PATCH 0957/1357]  Expose tpu_host_placement_function().

PiperOrigin-RevId: 215259803
---
 tensorflow/contrib/tpu/python/tpu/tpu_context.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index 7cfb6c38fa..da6bdf67d6 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -154,6 +154,20 @@ class TPUContext(object):
     # as far as model is replicated to all cores in the system.
     return self._internal_ctx.device_for_replica(replica_id)
 
+  @property
+  def tpu_host_placement_function(self):
+    """Returns the TPU host place function.
+
+    The place function takes host_id as the input and returns the TF device
+    for the correspoding host.
+    """
+
+    def _placement_function(host_id):
+      """Return the host device given host_id."""
+      return self._internal_ctx.tpu_host_placement_function(host_id=host_id)
+
+    return _placement_function
+
 
 class _InternalTPUContext(object):
   """A context holds immutable states of TPU computation.
-- 
GitLab


From 5c8c48df7fd4ccbe4a9dec035fdec6b02a5d6016 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 12:54:56 -0700
Subject: [PATCH 0958/1357] Internal build specification change

PiperOrigin-RevId: 215263951
---
 tensorflow/core/BUILD | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 57819cec70..0aae29d10c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -271,6 +271,12 @@ proto_library(
     visibility = ["//visibility:public"],
 )
 
+java_proto_library(
+    name = "example_java_proto",
+    visibility = ["//visibility:public"],
+    deps = [":example_protos"],
+)
+
 closure_proto_library(
     name = "example_protos_closure",
     visibility = ["//visibility:public"],
-- 
GitLab


From 3648cb0198690d551ea5c8eefcf706c8fa67f4f0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 13:07:12 -0700
Subject: [PATCH 0959/1357] Add option to initialize the TPU system.

PiperOrigin-RevId: 215266241
---
 tensorflow/python/tools/saved_model_cli.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index 3dbccd1409..2fcb0fa029 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -267,7 +267,8 @@ def scan_meta_graph_def(meta_graph_def):
 
 def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key,
                                    input_tensor_key_feed_dict, outdir,
-                                   overwrite_flag, worker=None, tf_debug=False):
+                                   overwrite_flag, worker=None, init_tpu=False,
+                                   tf_debug=False):
   """Runs SavedModel and fetch all outputs.
 
   Runs the input dictionary through the MetaGraphDef within a SavedModel
@@ -287,6 +288,8 @@ def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key,
         the same name exists.
     worker: If provided, the session will be run on the worker.  Valid worker
         specification is a bns or gRPC path.
+    init_tpu: If true, the TPU system will be initialized after the session
+        is created.
     tf_debug: A boolean flag to use TensorFlow Debugger (TFDBG) to observe the
         intermediate Tensor values and runtime GraphDefs while running the
         SavedModel.
@@ -328,6 +331,12 @@ def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key,
   ]
 
   with session.Session(worker, graph=ops_lib.Graph()) as sess:
+    if init_tpu:
+      print('Initializing TPU System ...')
+      # This is needed for freshly started worker, or if the job
+      # restarts after a preemption.
+      sess.run(tf.contrib.tpu.initialize_system())
+
     loader.load(sess, tag_set.split(','), saved_model_dir)
 
     if tf_debug:
@@ -632,7 +641,7 @@ def run(args):
   run_saved_model_with_feed_dict(args.dir, args.tag_set, args.signature_def,
                                  tensor_key_feed_dict, args.outdir,
                                  args.overwrite, worker=args.worker,
-                                 tf_debug=args.tf_debug)
+                                 init_tpu=args.init_tpu, tf_debug=args.tf_debug)
 
 
 def scan(args):
@@ -775,6 +784,12 @@ def create_parser():
       default=None,
       help='if specified, a Session will be run on the worker. '
            'Valid worker specification is a bns or gRPC path.')
+  parser_run.add_argument(
+      '--init_tpu',
+      action='store_true',
+      default=None,
+      help='if specified, tpu.initialize_system will be called on the Session. '
+           'This option should be only used if the worker is a TPU job.')
   parser_run.set_defaults(func=run)
 
   # scan command
-- 
GitLab


From 3c6e6885f32e7638ece306dad3a5081b06137bdc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 13:08:10 -0700
Subject: [PATCH 0960/1357] Check in and refactor the OVIC detector
 benchmarker.

PiperOrigin-RevId: 215266415
---
 tensorflow/contrib/lite/java/ovic/BUILD       |  61 +++++-
 .../contrib/lite/java/ovic/demo/app/BUILD     |   5 +-
 .../demo/app/OvicBenchmarkerActivity.java     |  77 +++++---
 .../demo/app/res/layout/activity_main.xml     |  27 ++-
 .../java/ovic/demo/app/res/values/strings.xml |   3 +-
 .../java/org/tensorflow/ovic/BoundingBox.java |  68 +++++++
 .../org/tensorflow/ovic/OvicBenchmarker.java  | 152 ++++++---------
 ...ult.java => OvicClassificationResult.java} |  12 +-
 .../org/tensorflow/ovic/OvicClassifier.java   |  10 +-
 .../ovic/OvicClassifierBenchmarker.java       | 142 ++++++++++++++
 .../tensorflow/ovic/OvicDetectionResult.java  |  91 +++++++++
 .../org/tensorflow/ovic/OvicDetector.java     | 184 ++++++++++++++++++
 .../ovic/OvicDetectorBenchmarker.java         | 160 +++++++++++++++
 .../org/tensorflow/ovic/OvicValidator.java    |   2 +-
 .../tensorflow/ovic/OvicClassifierTest.java   |   6 +-
 .../org/tensorflow/ovic/OvicDetectorTest.java | 149 ++++++++++++++
 .../contrib/lite/java/ovic/src/testdata/BUILD |   5 +-
 .../java/ovic/src/testdata/coco_labels.txt    |  91 +++++++++
 18 files changed, 1101 insertions(+), 144 deletions(-)
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java
 rename tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/{OvicSingleImageResult.java => OvicClassificationResult.java} (83%)
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt

diff --git a/tensorflow/contrib/lite/java/ovic/BUILD b/tensorflow/contrib/lite/java/ovic/BUILD
index bb0be04ca2..ea9b9ed4b6 100644
--- a/tensorflow/contrib/lite/java/ovic/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/BUILD
@@ -9,6 +9,7 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow/java:build_defs.bzl", "JAVACOPTS")
 
+# Build targets for OVIC classification.
 java_test(
     name = "OvicClassifierTest",
     size = "medium",
@@ -45,8 +46,9 @@ android_library(
     name = "ovicbenchmarkerlib",
     srcs = [
         "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java",
+        "src/main/java/org/tensorflow/ovic/OvicClassificationResult.java",
         "src/main/java/org/tensorflow/ovic/OvicClassifier.java",
-        "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java",
+        "src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java",
     ],
     manifest = "//tensorflow/contrib/lite/java:AndroidManifest.xml",
     tags = ["no_oss"],
@@ -60,8 +62,8 @@ android_library(
 java_library(
     name = "ovicbenchmarkerlib_java",
     srcs = [
+        "src/main/java/org/tensorflow/ovic/OvicClassificationResult.java",
         "src/main/java/org/tensorflow/ovic/OvicClassifier.java",
-        "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java",
     ],
     javacopts = JAVACOPTS,
     tags = ["no_oss"],
@@ -73,3 +75,58 @@ java_library(
         "@org_checkerframework_qual",
     ],
 )
+
+# Build targets for OVIC detection.
+java_test(
+    name = "OvicDetectorTest",
+    size = "medium",
+    srcs = ["src/test/java/org/tensorflow/ovic/OvicDetectorTest.java"],
+    data = [
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:coco_labels.txt",
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata",
+        "@tflite_mobilenet_ssd_quant//:detect.tflite",
+    ],
+    javacopts = JAVACOPTS,
+    tags = ["no_oss"],
+    test_class = "org.tensorflow.ovic.OvicDetectorTest",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/contrib/lite/java/ovic:ovicdetectionbenchmarkerlib_java",
+        "@com_google_truth",
+        "@junit",
+    ],
+)
+
+android_library(
+    name = "ovicdetectionbenchmarkerlib",
+    srcs = [
+        "src/main/java/org/tensorflow/ovic/BoundingBox.java",
+        "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetectionResult.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetector.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java",
+    ],
+    manifest = "//tensorflow/contrib/lite/java:AndroidManifest.xml",
+    deps = [
+        "//tensorflow/contrib/lite/java:tensorflowlite",
+        "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
+        "@org_checkerframework_qual",
+    ],
+)
+
+java_library(
+    name = "ovicdetectionbenchmarkerlib_java",
+    srcs = [
+        "src/main/java/org/tensorflow/ovic/BoundingBox.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetectionResult.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetector.java",
+    ],
+    javacopts = JAVACOPTS,
+    deps = [
+        "//tensorflow/contrib/lite/java:libtensorflowlite_jni.so",
+        "//tensorflow/contrib/lite/java:tensorflowlite_java",
+        "//tensorflow/contrib/lite/java/src/main/native",
+        "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
+        "@org_checkerframework_qual",
+    ],
+)
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
index 058240aada..f567358ea3 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
@@ -10,8 +10,10 @@ android_binary(
     ],
     aapt_version = "aapt",
     assets = [
-        "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata",
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:coco_labels.txt",
         "//tensorflow/contrib/lite/java/ovic/src/testdata:labels.txt",
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata",
+        "@tflite_mobilenet_ssd_quant//:detect.tflite",
     ],
     assets_dir = "",
     custom_package = "ovic.demo.app",
@@ -25,6 +27,7 @@ android_binary(
     deps = [
         "//tensorflow/contrib/lite/java:tensorflowlite",
         "//tensorflow/contrib/lite/java/ovic:ovicbenchmarkerlib",
+        "//tensorflow/contrib/lite/java/ovic:ovicdetectionbenchmarkerlib",
         "@androidsdk//com.android.support:support-v13-25.2.0",
         "@androidsdk//com.android.support:support-v4-25.2.0",
     ],
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
index 4adf94aeb6..48c29ecebe 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
@@ -35,19 +35,18 @@ import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
 import java.text.DecimalFormat;
 import org.tensorflow.ovic.OvicBenchmarker;
-import org.tensorflow.ovic.OvicSingleImageResult;
-
+import org.tensorflow.ovic.OvicClassifierBenchmarker;
+import org.tensorflow.ovic.OvicDetectorBenchmarker;
 
 /** Class that benchmark image classifier models. */
 public class OvicBenchmarkerActivity extends Activity {
   /** Tag for the {@link Log}. */
   private static final String TAG = "OvicBenchmarkerActivity";
 
-  /** Name of the label file stored in Assets. */
-  private static final String LABEL_PATH = "labels.txt";
-
-  private static final String TEST_IMAGE_PATH = "test_image_224.jpg";
-  private static final String MODEL_PATH = "float_model.lite";
+  /** Name of the task-dependent data files stored in Assets. */
+  private static String labelPath = null;
+  private static String testImagePath = null;
+  private static String modelPath = null;
   /**
    * Each bottom press will launch a benchmarking experiment. The experiment stops when either the
    * total native latency reaches WALL_TIME or the number of iterations reaches MAX_ITERATIONS,
@@ -66,8 +65,6 @@ public class OvicBenchmarkerActivity extends Activity {
   private MappedByteBuffer model = null;
   private InputStream labelInputStream = null;
   private OvicBenchmarker benchmarker;
-  /** Inference result of each iteration. */
-  OvicSingleImageResult iterResult = null;
 
   private TextView textView = null;
   // private Button startButton = null;
@@ -83,21 +80,31 @@ public class OvicBenchmarkerActivity extends Activity {
   }
 
   private Bitmap loadTestBitmap() throws IOException {
-    InputStream imageStream = getAssets().open(TEST_IMAGE_PATH);
+    InputStream imageStream = getAssets().open(testImagePath);
     return BitmapFactory.decodeStream(imageStream);
   }
 
-  public void initializeTest() throws IOException {
+  public void initializeTest(boolean benchmarkClassification) throws IOException {
     Log.i(TAG, "Initializing benchmarker.");
-    benchmarker = new OvicBenchmarker(WALL_TIME);
+    if (benchmarkClassification) {
+      benchmarker = new OvicClassifierBenchmarker(WALL_TIME);
+      labelPath = "labels.txt";
+      testImagePath = "test_image_224.jpg";
+      modelPath = "quantized_model.lite";
+    } else {  // Benchmarking detection.
+      benchmarker = new OvicDetectorBenchmarker(WALL_TIME);
+      labelPath = "coco_labels.txt";
+      testImagePath = "test_image_224.jpg";
+      modelPath = "detect.tflite";
+    }
     AssetManager am = getAssets();
-    AssetFileDescriptor fileDescriptor = am.openFd(MODEL_PATH);
+    AssetFileDescriptor fileDescriptor = am.openFd(modelPath);
     FileInputStream modelInputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
     FileChannel fileChannel = modelInputStream.getChannel();
     long startOffset = fileDescriptor.getStartOffset();
     long declaredLength = fileDescriptor.getDeclaredLength();
     model = fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
-    labelInputStream = am.open(LABEL_PATH);
+    labelInputStream = am.open(labelPath);
   }
 
   public Boolean doTestIteration() throws IOException, InterruptedException {
@@ -117,24 +124,44 @@ public class OvicBenchmarkerActivity extends Activity {
     Log.i(TAG, "Going to do test iter.");
     // Start testing.
     Bitmap testImageBitmap = loadTestBitmap();
-    iterResult = benchmarker.doTestIteration(testImageBitmap);
-    testImageBitmap.recycle();
-    if (iterResult == null) {
+    try {
+      if (!benchmarker.processBitmap(testImageBitmap)) {
+        throw new RuntimeException("Failed to run test.");
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw e;
+    } finally {
+      testImageBitmap.recycle();
+    }
+    String iterResultString = benchmarker.getLastResultString();
+    if (iterResultString == null) {
       throw new RuntimeException("Inference failed to produce a result.");
     }
-    Log.i(TAG, iterResult.toString());
+    Log.i(TAG, iterResultString);
     return true;
   }
 
-  public void startPressed(View view) throws IOException {
-    Log.i(TAG, "Start pressed");
+  public void detectPressed(View view) throws IOException {
+    benchmarkSession(false);
+  }
+  public void classifyPressed(View view) throws IOException {
+    benchmarkSession(true);
+  }
+
+  private void benchmarkSession(boolean benchmarkClassification) throws IOException {
     try {
-      initializeTest();
+      initializeTest(benchmarkClassification);
     } catch (IOException e) {
       Log.e(TAG, "Can't initialize benchmarker.", e);
       throw e;
     }
     String displayText = "";
+    if (benchmarkClassification) {
+      displayText = "Classification benchmark: ";
+    } else {
+      displayText = "Detection benchmark: ";
+    }
     try {
       setProcessorAffinity(BIG_CORE_MASK);
     } catch (IOException e) {
@@ -144,7 +171,6 @@ public class OvicBenchmarkerActivity extends Activity {
     Log.i(TAG, "Successfully initialized benchmarker.");
     int testIter = 0;
     Boolean iterSuccess = false;
-    double totalLatency = 0.0f;
     while (testIter < MAX_ITERATIONS) {
       try {
         iterSuccess = doTestIteration();
@@ -153,23 +179,22 @@ public class OvicBenchmarkerActivity extends Activity {
         throw e;
       } catch (InterruptedException e) {
         Log.e(TAG, "Interrupted at iteration " + testIter);
+        displayText += e.getMessage() + "\n";
       }
       if (!iterSuccess) {
         break;
       }
       testIter++;
-      totalLatency += (double) iterResult.latency;
     }
-    ;
     Log.i(TAG, "Benchmarking finished");
 
     if (textView != null) {
       if (testIter > 0) {
         textView.setText(
             displayText
-                + MODEL_PATH
+                + modelPath
                 + ": Average latency="
-                + df2.format(totalLatency / testIter)
+                + df2.format(benchmarker.getTotalRunTime() / testIter)
                 + "ms after "
                 + testIter
                 + " runs.");
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml
index e9d83bae54..1bce60ff7d 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml
@@ -30,14 +30,14 @@
     android:layout_height="wrap_content"
     android:text="@string/initial_status_msg"
     android:id="@+id/textView"
-    android:layout_above="@+id/button_start"
+    android:layout_above="@+id/button_clf_start"
     android:layout_alignParentTop="true"/>
 
   <Button
     android:layout_width="wrap_content"
     android:layout_height="wrap_content"
-    android:text="@string/start_label"
-    android:id="@id/button_start"
+    android:text="@string/start_clf_label"
+    android:id="@id/button_clf_start"
     android:layout_alignParentBottom="true"
     android:layout_alignParentLeft="true"
     android:background="@drawable/start_button_color"
@@ -49,6 +49,25 @@
     android:textColor="#ffffff"
     android:enabled="true"
     style="?android:attr/buttonBarButtonStyle"
-    android:onClick="startPressed"/>
+    android:onClick="classifyPressed"/>
+
+  <Button
+    android:layout_width="wrap_content"
+    android:layout_height="wrap_content"
+    android:text="@string/start_det_label"
+    android:id="@+id/button_det_start"
+    android:layout_alignParentBottom="true"
+    android:layout_alignParentRight="true"
+    android:layout_toRightOf="@id/button_clf_start"
+    android:background="@drawable/start_button_color"
+    android:padding="10dp"
+    android:layout_marginRight="100dp"
+    android:layout_marginLeft="30dp"
+    android:layout_marginTop="10dp"
+    android:foreground="#000000"
+    android:textColor="#ffffff"
+    android:enabled="true"
+    style="?android:attr/buttonBarButtonStyle"
+    android:onClick="detectPressed"/>
 
 </RelativeLayout>
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml b/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml
index d26beb1d27..53525908d3 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml
@@ -17,6 +17,7 @@
 <resources>
     <string name="app_name" translatable="false">Benchmarker</string>
 
-    <string name="start_label" translatable="false">Start</string>
+    <string name="start_clf_label" translatable="false">Clf</string>
+    <string name="start_det_label" translatable="false">Det</string>
     <string name="initial_status_msg" translatable="false"> Press start to run the benchmarks.</string>
 </resources>
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java
new file mode 100644
index 0000000000..9bf7d005d2
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java
@@ -0,0 +1,68 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+/** Class for holding a detection bounding box with category and confidence. */
+public class BoundingBox {
+  // Upper left point.
+  public float x1;
+  public float y1;
+
+  // Lower right point.
+  public float x2;
+  public float y2;
+
+  // The area of the box
+  public float area;
+
+  // The object category
+  public int category;
+
+  // The confidence of the detection
+  public float score;
+
+  public BoundingBox(float x1, float y1, float x2, float y2, int category, float score) {
+    this.x1 = x1;
+    this.y1 = y1;
+    this.x2 = x2;
+    this.y2 = y2;
+    this.category = category;
+    this.score = score;
+    // -1 stands for area not initialized
+    this.area = -1;
+  }
+
+  // The intersection area of two bounding boxes
+  public float intersect(BoundingBox bbx) {
+    return Math.max(0, Math.min(x2, bbx.x2) - Math.max(x1, bbx.x1))
+        * Math.max(0, Math.min(y2, bbx.y2) - Math.max(y1, bbx.y1));
+  }
+
+  // The union area of two bounding boxes
+  public float union(BoundingBox bbx) {
+    return bbx.getArea() + this.getArea() - this.intersect(bbx);
+  }
+
+  public float getArea() {
+    if (area < 0) {
+      area = (x2 - x1) * (y2 - y1);
+    }
+    return area;
+  }
+
+  public float computeIoU(BoundingBox bbx) {
+    return (float) (this.intersect(bbx) * 1.0 / this.union(bbx));
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
index 4cda258bee..15d9511f50 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
@@ -20,11 +20,10 @@ import android.util.Log;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
 import java.nio.MappedByteBuffer;
 
 /**
- * Class that benchmarks image classifier models.
+ * Base class that benchmarks image models.
  *
  * <p>===================== General workflow =======================
  *
@@ -33,37 +32,40 @@ import java.nio.MappedByteBuffer;
  * benchmarker.getReadyToTest(labelInputStream, model);
  * while (!benchmarker.shouldStop()) {
  *   Bitmap bitmap = ...
- *   benchmarker.doTestIteration(bitmap);
+ *   imgId = ...
+ *   benchmarker.processBitmap(bitmap, imgId);
  * }
  * }</pre>
  */
-public class OvicBenchmarker {
+public abstract class OvicBenchmarker {
   /** Tag for the {@link Log}. */
   private static final String TAG = "OvicBenchmarker";
 
-  /** Evaluation transformation parameters. */
-  private static final float CENTRAL_FRACTION = 0.875f;
-
   /** Dimensions of inputs. */
-  private static final int DIM_BATCH_SIZE = 1;
-  private static final int DIM_PIXEL_SIZE = 3;
-  private int imgHeight = 224;
-  private int imgWidth = 224;
+  protected static final int DIM_BATCH_SIZE = 1;
+  protected static final int DIM_PIXEL_SIZE = 3;
+  protected int imgHeight = 224;
+  protected int imgWidth = 224;
+
+  /** Preprocess parameters (only used when input is float). */
+  protected static final float IMAGE_MEAN = 127.5f;
+  protected static final float IMAGE_STD = 127.5f;
+
+  /** Whether input is float or quantized. */
+  protected Boolean quantizedInput = null;
 
   /* Preallocated buffers for storing image data in. */
-  private int[] intValues = null;
+  protected int[] intValues = null;
 
   /** A ByteBuffer to hold image data, to be feed into classifier as inputs. */
-  private ByteBuffer imgData = null;
-
-  private OvicClassifier classifier;
+  protected ByteBuffer imgData = null;
 
   /** Total runtime in ms. */
-  private double totalRuntime = 0.0;
+  protected double totalRuntime = 0.0;
   /** Total allowed runtime in ms. */
-  private double wallTime = 20000 * 30.0;
-
-  private Boolean benchmarkStarted = null;
+  protected double wallTime = 20000 * 30.0;
+  /** Record whether benchmark has started (used to skip the first image). */
+  protected boolean benchmarkStarted = false;
 
   /**
    * Initializes an {@link OvicBenchmarker}
@@ -76,6 +78,11 @@ public class OvicBenchmarker {
     this.wallTime = wallTime;
   }
 
+  /** Return the cumulative latency of all runs so far. */
+  public double getTotalRunTime() {
+    return totalRuntime;
+  }
+
   /** Check whether the benchmarker should stop. */
   public Boolean shouldStop() {
     if (totalRuntime >= wallTime) {
@@ -90,105 +97,62 @@ public class OvicBenchmarker {
     return false;
   }
 
-  /** Check whether the benchmarker is ready to start classifying images. */
-  public Boolean readyToTest() {
-    return (classifier != null);
-  }
+  /** Abstract class for checking whether the benchmarker is ready to start processing images */
+  public abstract boolean readyToTest();
 
   /**
-   * Getting the benchmarker ready for classifying images.
+   * Abstract class for getting the benchmarker ready.
    *
    * @param labelInputStream: an {@link InputStream} specifying where the list of labels should be
    *     read from.
    * @param model: a {@link MappedByteBuffer} model to benchmark.
    */
-  public void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model) {
-    try {
-      Log.i(TAG, "Creating classifier.");
-      classifier = new OvicClassifier(labelInputStream, model);
-      int [] inputDims = classifier.getInputDims();
-      imgHeight = inputDims[1];
-      imgWidth = inputDims[2];
-      // Only accept QUANTIZED_UINT8 input.
-      imgData = ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE);
-      imgData.order(ByteOrder.nativeOrder());
-      intValues = new int[imgHeight * imgWidth];
-    } catch (Exception e) {
-        Log.e(TAG, e.getMessage());
-        Log.e(TAG, "Failed to initialize ImageNet classifier for the benchmarker.");
-    }
-  }
-
-  /** Return how many classes are predicted per image. */
-  public int getNumPredictions() {
-    return classifier.getNumPredictions();
-  }
+  public abstract void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model);
 
   /**
    * Perform test on a single bitmap image.
    *
-   * @param bitmap: a {@link Bitmap} image to classify.
+   * @param bitmap: a {@link Bitmap} image to process.
+   * @param imageId: an ID uniquely representing the image.
    */
-  public OvicSingleImageResult doTestIteration(Bitmap bitmap)
-      throws IOException, InterruptedException {
-    if (shouldStop() || !readyToTest()) {
-      return null;
-    }
-    OvicSingleImageResult iterResult = null;
-    try {
-      Log.i(TAG, "Converting bitmap.");
-      convertBitmapToInput(bitmap);
-      Log.i(TAG, "Classifying image.");
-      iterResult = classifier.classifyByteBuffer(imgData);
-    } catch (RuntimeException e) {
-      Log.e(TAG, e.getMessage());
-      Log.e(TAG, "Failed to classify image.");
-    }
-    if (iterResult == null || iterResult.latency == null) {
-      throw new RuntimeException("Classification result or timing is invalid.");
-    }
-    Log.d(TAG, "Native inference latency: " + iterResult.latency);
-    Log.i(TAG, iterResult.toString());
+  public abstract boolean processBitmap(Bitmap bitmap, int imageId)
+      throws IOException, InterruptedException;
 
-    if (!benchmarkStarted) {  // Skip the first image to discount warming-up time.
-      benchmarkStarted = true;
-    } else {
-      totalRuntime += (double) iterResult.latency;
-    }
-    return iterResult;
+  /** Perform test on a single bitmap image without an image ID. */
+  public boolean processBitmap(Bitmap bitmap) throws IOException, InterruptedException {
+    return processBitmap(bitmap, /* imageId = */ 0);
   }
 
+  /** Returns the last inference results as string. */
+  public abstract String getLastResultString();
+
   /**
-   * Writes Image data into a {@link ByteBuffer}.
-   *
-   * @param bitmap: a {@link Bitmap} source image.
-   */
-  private void convertBitmapToInput(Bitmap bitmap) throws RuntimeException {
-    if (imgData == null) {
+   * Loads input buffer from intValues into ByteBuffer for the interpreter.
+   * Input buffer must be loaded in intValues and output will be placed in imgData.
+  */
+  protected void loadsInputToByteBuffer() {
+    if (imgData == null || intValues == null || quantizedInput == null) {
       throw new RuntimeException("Benchmarker is not yet ready to test.");
     }
-    imgData.rewind();
-    // Perform transformations corresponding to evaluation mode.
-    float width = (float) bitmap.getWidth();
-    float height = (float) bitmap.getHeight();
-    int stWidth = Math.round((width - width * CENTRAL_FRACTION) / 2);
-    int stHeight = Math.round((height - height * CENTRAL_FRACTION) / 2);
-    int newWidth = Math.round(width - stWidth * 2);
-    int newHeight = Math.round(height - stHeight * 2);
-    bitmap = Bitmap.createBitmap(bitmap, stWidth, stHeight, newWidth, newHeight);
-    bitmap = Bitmap.createScaledBitmap(bitmap, imgWidth, imgHeight, true);
-    bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
-
     // Convert the image to ByteBuffer.
+    imgData.rewind();
     int pixel = 0;
     long startTime = SystemClock.uptimeMillis();
 
     for (int i = 0; i < imgHeight; ++i) {
       for (int j = 0; j < imgWidth; ++j) {
-        final int val = intValues[pixel++];
-        imgData.put((byte) ((val >> 16) & 0xFF));
-        imgData.put((byte) ((val >> 8) & 0xFF));
-        imgData.put((byte) (val & 0xFF));
+        final int pixelValue = intValues[pixel++];
+        if (quantizedInput) {
+          // Quantized model
+          imgData.put((byte) ((pixelValue >> 16) & 0xFF));
+          imgData.put((byte) ((pixelValue >> 8) & 0xFF));
+          imgData.put((byte) (pixelValue & 0xFF));
+        } else {
+          // Float model
+          imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+        }
       }
     }
     long endTime = SystemClock.uptimeMillis();
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassificationResult.java
similarity index 83%
rename from tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java
rename to tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassificationResult.java
index 4af9a65c2f..5ab804e6ee 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassificationResult.java
@@ -1,4 +1,4 @@
-/*Copyright 2018 Google LLC
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,17 +17,17 @@ package org.tensorflow.ovic;
 import java.util.ArrayList;
 
 /** Result class for inference run on a single image. */
-public class OvicSingleImageResult {
+public class OvicClassificationResult {
 
   /** Top K classes and probabilities. */
-  public ArrayList<String> topKClasses;
-  public ArrayList<Float> topKProbs;
-  public ArrayList<Integer> topKIndices;
+  public final ArrayList<String> topKClasses;
+  public final ArrayList<Float> topKProbs;
+  public final ArrayList<Integer> topKIndices;
 
   /** Latency (ms). */
   public Long latency;
 
-  OvicSingleImageResult() {
+  OvicClassificationResult() {
     topKClasses = new ArrayList<>();
     topKProbs = new ArrayList<>();
     topKIndices = new ArrayList<>();
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
index fd610b054f..d8a54c1f3b 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
@@ -31,7 +31,7 @@ import java.util.PriorityQueue;
 import org.tensorflow.lite.Interpreter;
 import org.tensorflow.lite.TestHelper;
 
-/** Benchmark ImageNet Classifier with Tensorflow Lite. */
+/** Class for running ImageNet classification with a TfLite model. */
 public class OvicClassifier {
 
   /** Tag for the {@link Log}. */
@@ -106,7 +106,7 @@ public class OvicClassifier {
 
   /** Classifies a {@link ByteBuffer} image. */
   // @throws RuntimeException if model is uninitialized.
-  public OvicSingleImageResult classifyByteBuffer(ByteBuffer imgData) {
+  public OvicClassificationResult classifyByteBuffer(ByteBuffer imgData) {
     if (tflite == null) {
       throw new RuntimeException(TAG + ": ImageNet classifier has not been initialized; Failed.");
     }
@@ -122,7 +122,7 @@ public class OvicClassifier {
         labelProbArray[0][i] = (inferenceOutputArray[0][i] & 0xff) / 255.0f;
       }
     }
-    OvicSingleImageResult iterResult = computeTopKLabels();
+    OvicClassificationResult iterResult = computeTopKLabels();
     iterResult.latency = getLastNativeInferenceLatencyMilliseconds();
     return iterResult;
   }
@@ -174,7 +174,7 @@ public class OvicClassifier {
   }
 
   /** Computes top-K labels. */
-  private OvicSingleImageResult computeTopKLabels() {
+  private OvicClassificationResult computeTopKLabels() {
     if (labelList == null) {
       throw new RuntimeException("Label file has not been loaded.");
     }
@@ -184,7 +184,7 @@ public class OvicClassifier {
         sortedLabels.poll();
       }
     }
-    OvicSingleImageResult singleImageResult = new OvicSingleImageResult();
+    OvicClassificationResult singleImageResult = new OvicClassificationResult();
     if (sortedLabels.size() != RESULTS_TO_SHOW) {
       throw new RuntimeException(
           "Number of returned labels does not match requirement: "
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java
new file mode 100644
index 0000000000..0cdd0f7bec
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java
@@ -0,0 +1,142 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import android.graphics.Bitmap;
+import android.util.Log;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+
+/** Class that benchmarks image classifier models. */
+public final class OvicClassifierBenchmarker extends OvicBenchmarker {
+  /** Tag for the {@link Log}. */
+  private static final String TAG = "OvicClassifierBenchmarker";
+
+  /** ImageNet preprocessing parameters. */
+  private static final float CENTRAL_FRACTION = 0.875f;
+  private OvicClassifier classifier;
+  private OvicClassificationResult iterResult = null;
+
+  public OvicClassifierBenchmarker(double wallTime) {
+    super(wallTime);
+  }
+
+  /** Test if the classifier is ready for benchmarking. */
+  @Override
+  public boolean readyToTest() {
+    return (classifier != null);
+  }
+
+  /**
+   * Getting the benchmarker ready for classifying images.
+   *
+   * @param labelInputStream: an {@link InputStream} specifying where the list of labels should be
+   *     read from.
+   * @param model: a {@link MappedByteBuffer} model to benchmark.
+   */
+  @Override
+   public void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model) {
+    try {
+      Log.i(TAG, "Creating classifier.");
+      classifier = new OvicClassifier(labelInputStream, model);
+      int [] inputDims = classifier.getInputDims();
+      imgHeight = inputDims[1];
+      imgWidth = inputDims[2];
+      quantizedInput = true;
+      // Only accept QUANTIZED_UINT8 input.
+      imgData = ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE);
+      imgData.order(ByteOrder.nativeOrder());
+      intValues = new int[imgHeight * imgWidth];
+    } catch (Exception e) {
+        Log.e(TAG, e.getMessage());
+        Log.e(TAG, "Failed to initialize ImageNet classifier for the benchmarker.");
+    }
+  }
+
+  /**
+   * Perform classification on a single bitmap image.
+   *
+   * @param bitmap: a {@link Bitmap} image to process.
+   * @param imageId: an ID uniquely representing the image.
+   */
+  @Override
+  public boolean processBitmap(Bitmap bitmap, int imageId)
+      throws IOException, InterruptedException {
+    if (shouldStop() || !readyToTest()) {
+      return false;
+    }
+    try {
+      Log.i(TAG, "Converting bitmap.");
+      convertBitmapToInput(bitmap);
+      Log.i(TAG, "Classifying image: " + imageId);
+      iterResult = classifier.classifyByteBuffer(imgData);
+    } catch (RuntimeException e) {
+      Log.e(TAG, e.getMessage());
+      Log.e(TAG, "Failed to classify image.");
+    }
+    if (iterResult == null || iterResult.latency == null) {
+      throw new RuntimeException("Classification result or timing is invalid.");
+    }
+    Log.d(TAG, "Native inference latency: " + iterResult.latency);
+    Log.i(TAG, iterResult.toString());
+
+    if (!benchmarkStarted) {  // Skip the first image to discount warming-up time.
+      benchmarkStarted = true;
+    } else {
+      totalRuntime += ((double) iterResult.latency);
+    }
+    return true;
+  }
+
+  /** Return how many classes are predicted per image. */
+  public int getNumPredictions() {
+    return classifier.getNumPredictions();
+  }
+
+  public OvicClassificationResult getLastClassificationResult() {
+    return iterResult;
+  }
+
+  @Override
+  public String getLastResultString() {
+    if (iterResult == null) {
+      return null;
+    } else {
+      return iterResult.toString();
+    }
+  }
+
+  /**
+   * Preprocess bitmap according to ImageNet protocol then writes result into a {@link ByteBuffer}.
+   *
+   * @param bitmap: a {@link Bitmap} source image.
+   */
+  private void convertBitmapToInput(Bitmap bitmap) {
+    // Perform transformations corresponding to evaluation mode.
+    float width = (float) bitmap.getWidth();
+    float height = (float) bitmap.getHeight();
+    int stWidth = Math.round((width - width * CENTRAL_FRACTION) / 2);
+    int stHeight = Math.round((height - height * CENTRAL_FRACTION) / 2);
+    int newWidth = Math.round(width - stWidth * 2);
+    int newHeight = Math.round(height - stHeight * 2);
+    bitmap = Bitmap.createBitmap(bitmap, stWidth, stHeight, newWidth, newHeight);
+    bitmap = Bitmap.createScaledBitmap(bitmap, imgWidth, imgHeight, true);
+    bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
+    loadsInputToByteBuffer();
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java
new file mode 100644
index 0000000000..cf2902a5cb
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java
@@ -0,0 +1,91 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import java.util.ArrayList;
+
+/** Result class for inference run on a single image. */
+public class OvicDetectionResult {
+
+  // Top K classes and probabilities.
+  public final ArrayList<BoundingBox> detections;
+  // Latency (ms).
+  public Long latency = -1L;
+  // id of the image.
+  public int id = -1;
+  // Number of valid detections (separately maintained, maybe different from detections.size()).
+  public int count = 0;
+
+  // Create OvicDetectionResult object with pre-filled capacity. Note that detections.size() will
+  // be equal to capacity after this call.
+  OvicDetectionResult(int capacity) {
+    detections = new ArrayList<BoundingBox>(capacity);
+    for (int i = 0; i < capacity; i++) {
+      detections.add(new BoundingBox(-1.0f, -1.0f, -1.0f, -1.0f, -1, -1.0f));
+    }
+  }
+
+  public void resetTo(Long latency, int id) {
+    count = 0;
+    this.latency = latency;
+    this.id = id;
+  }
+
+  public void addBox(float x1, float y1, float x2, float y2, int category, float score) {
+    detections.get(count).x1 = x1;
+    detections.get(count).y1 = y1;
+    detections.get(count).x2 = x2;
+    detections.get(count).y2 = y2;
+    detections.get(count).category = category;
+    detections.get(count).score = score;
+    count += 1;
+  }
+
+  public void scaleUp(double scaleFactorWidth, double scaleFactorHeight) {
+    for (BoundingBox box : detections) {
+      box.x1 = (float) (box.x1 * scaleFactorWidth);
+      box.y1 = (float) (box.y1 * scaleFactorHeight);
+      box.x2 = (float) (box.x2 * scaleFactorWidth);
+      box.y2 = (float) (box.y2 * scaleFactorHeight);
+    }
+  }
+
+  @Override
+  public String toString() {
+    String textToShow = latency + "ms";
+    int k = 0;
+    for (BoundingBox box : detections) {
+      textToShow +=
+          "\nPrediction ["
+              + k
+              + "] = Class "
+              + box.category
+              + " ("
+              + box.x1
+              + ", "
+              + box.y1
+              + ", "
+              + box.x2
+              + ", "
+              + box.y2
+              + ") : "
+              + box.score;
+      k++;
+    }
+
+
+    return textToShow;
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java
new file mode 100644
index 0000000000..56836a79e5
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java
@@ -0,0 +1,184 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.tensorflow.lite.Interpreter;
+import org.tensorflow.lite.TestHelper;
+
+/** Class for running COCO detection with a TfLite model. */
+public class OvicDetector implements AutoCloseable {
+
+  /** Tag for the {@link Log}. */
+  private static final String TAG = "OvicDetector";
+
+  /** An instance of the driver class to run model inference with Tensorflow Lite. */
+  private Interpreter tflite;
+
+  /** Labels corresponding to the output of the vision model. */
+  private final List<String> labelList;
+
+  /** Define the output format. */
+  private final Boolean inputIsFloat;
+
+  /** Number of detections per image. 10 for demo, 100 for the actual competition. */
+  private static final int NUM_RESULTS = 10;
+
+  /** The output arrays for the mobilenet SSD. */
+  private float[][][] outputLocations;
+  private float[][] outputClasses;
+  private float[][] outputScores;
+  private float[] numDetections;
+  private Map<Integer, Object> outputMap;
+
+  /** Input resolution. */
+  private final int[] inputDims;
+
+  /** Final result. */
+  public OvicDetectionResult result = null;
+
+  OvicDetector(InputStream labelInputStream, MappedByteBuffer model) throws IOException {
+    // Load the label list.
+    labelList = loadLabelList(labelInputStream);
+
+    // Create the TfLite interpreter.
+    tflite = new Interpreter(model, new Interpreter.Options().setNumThreads(1));
+    inputDims = TestHelper.getInputDims(tflite, 0);
+    inputIsFloat = TestHelper.getInputDataType(tflite, 0).equals("float");
+    if (inputDims.length != 4) {
+      throw new RuntimeException("The model's input dimensions must be 4 (BWHC).");
+    }
+    if (inputDims[0] != 1) {
+      throw new RuntimeException(
+          "The model must have a batch size of 1, got " + inputDims[0] + " instead.");
+    }
+    if (inputDims[3] != 3) {
+      throw new RuntimeException(
+          "The model must have three color channels, got " + inputDims[3] + " instead.");
+    }
+    // Check the resolution.
+    int minSide = Math.min(inputDims[1], inputDims[2]);
+    int maxSide = Math.max(inputDims[1], inputDims[2]);
+    if (minSide <= 0 || maxSide > 1000) {
+      throw new RuntimeException("The model's resolution must be between (0, 1000].");
+    }
+
+    // Initialize the input array and result arrays. The input images are stored in a list of
+    // Object. Since this function anaylzed one image per time, there is only 1 item.
+    // The output is fomulated as a map of int -> Object. The output arrays are added to the map.
+    outputLocations = new float[1][NUM_RESULTS][4];
+    outputClasses = new float[1][NUM_RESULTS];
+    outputScores = new float[1][NUM_RESULTS];
+    numDetections = new float[1];
+    outputMap = new HashMap<>();
+    outputMap.put(0, outputLocations);
+    outputMap.put(1, outputClasses);
+    outputMap.put(2, outputScores);
+    outputMap.put(3, numDetections);
+    // Preallocate the result. This will be where inference result is stored after each
+    // detectByteBuffer call.
+    result = new OvicDetectionResult(NUM_RESULTS);
+  }
+
+  public Boolean quantizedInput() {
+    return !inputIsFloat;
+  }
+
+  /** Reads label list from Assets. */
+  private static List<String> loadLabelList(InputStream labelInputStream) throws IOException {
+    List<String> labelList = new ArrayList<>();
+    try (BufferedReader reader =
+        new BufferedReader(new InputStreamReader(labelInputStream, StandardCharsets.UTF_8))) {
+      String line;
+      while ((line = reader.readLine()) != null) {
+        labelList.add(line);
+      }
+    }
+    return labelList;
+  }
+
+  /**
+   * The interface to run the detection. This method currently only support float mobilenet_ssd
+   * model. The quantized models will be added in the future.
+   *
+   * @param imgData The image buffer in ByteBuffer format.
+   * @return boolean indicator of whether detection was a success. If success, the detection results
+   *  is available in the result member variable.
+   *     See OvicDetectionResult.java for details.
+   */
+  boolean detectByteBuffer(ByteBuffer imgData, int imageId) {
+    if (tflite == null) {
+      throw new RuntimeException(TAG + ": Detector has not been initialized; Failed.");
+    }
+    if (inputIsFloat == null) {
+      throw new RuntimeException(TAG + ": Detector input type has not been resolved.");
+    }
+
+    Object[] inputArray = {imgData};
+    tflite.runForMultipleInputsOutputs(inputArray, outputMap);
+
+    Long latency = getLastNativeInferenceLatencyMilliseconds();
+
+    // Update the results.
+    result.resetTo(latency, imageId);
+    for (int i = 0; i < NUM_RESULTS; i++) {
+      result.addBox(outputLocations[0][i][1] * inputDims[1],
+              outputLocations[0][i][0] * inputDims[1],
+              outputLocations[0][i][3] * inputDims[2],
+              outputLocations[0][i][2] * inputDims[2],
+              Math.round(outputClasses[0][i] + 1 /* Label offset */),
+              outputScores[0][i]);
+    }
+    return true;  // Marks that the result is available.
+  }
+
+  /*
+   * Get native inference latency of last image detection run.
+   *  @throws RuntimeException if model is uninitialized.
+   *  @return The inference latency in millisecond.
+   */
+  public Long getLastNativeInferenceLatencyMilliseconds() {
+    if (tflite == null) {
+      throw new RuntimeException(TAG + ": ImageNet classifier has not been initialized; Failed.");
+    }
+    Long latency = tflite.getLastNativeInferenceDurationNanoseconds();
+    return (latency == null) ? null : (Long) (latency / 1000000);
+  }
+
+  public int[] getInputDims() {
+    return inputDims;
+  }
+
+  public List<String> getLabels() {
+    return labelList;
+  }
+
+  /** Closes tflite to release resources. */
+  @Override
+  public void close() {
+    tflite.close();
+    tflite = null;
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java
new file mode 100644
index 0000000000..1a4e193ff2
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java
@@ -0,0 +1,160 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import android.graphics.Bitmap;
+import android.util.Log;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+
+/**
+ * Class that benchmarks object detection models.
+ */
+public final class OvicDetectorBenchmarker extends OvicBenchmarker {
+  /** Tag for the {@link Log}. */
+  private static final String TAG = "OvicDetectorBenchmarker";
+
+  public double scaleFactorWidth = 1.0f;
+  public double scaleFactorHeight = 1.0f;
+  private Bitmap scaledBitmap = null;  // Preallocate bitmap for scaling.
+
+  private OvicDetector detector;
+
+  /**
+   * Initializes an {@link OvicDetectionBenchmarker}
+   *
+   * @param wallTime: a double number specifying the total amount of time to benchmark.
+   */
+  public OvicDetectorBenchmarker(double wallTime) {
+    super(wallTime);
+  }
+
+  /** Check to see if the detector is ready to test. */
+  @Override
+  public boolean readyToTest() {
+    return (detector != null);
+  }
+
+  /**
+   * Getting the benchmarker ready for detecting images.
+   *
+   * @param labelInputStream: an {@link InputStream} specifying where the list of labels should be
+   *     read from.
+   * @param model: a {@link MappedByteBuffer} model to benchmark.
+   */
+  @Override
+  public void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model) {
+    try {
+      Log.i(TAG, "Creating detector.");
+      detector = new OvicDetector(labelInputStream, model);
+      quantizedInput = detector.quantizedInput();
+      int[] inputDims = detector.getInputDims();
+      imgHeight = inputDims[1];
+      imgWidth = inputDims[2];
+      if (quantizedInput) {
+        imgData = ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE);
+      } else {
+        imgData =
+            ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE * 4);
+      }
+      imgData.order(ByteOrder.nativeOrder());
+      intValues = new int[imgHeight * imgWidth];
+      benchmarkStarted = false;
+    } catch (Exception e) {
+      Log.e(TAG, e.getMessage());
+      Log.e(TAG, "Failed to initialize COCO detector for the benchmarker.", e);
+    }
+  }
+
+  /**
+   * Perform detection on a single ByteBuffer {@link ByteBuffer} image. The image must have the
+   * same dimension that the model expects.
+   *
+   * @param image: a {@link ByteBuffer} image to process.
+   * @param imageId: an ID uniquely representing the image.
+   */
+  public boolean processBuffer(ByteBuffer image, int imageId) {
+    if (!readyToTest()) {
+      return false;
+    }
+    try {
+      if (!detector.detectByteBuffer(image, imageId)) {
+        return false;
+      }
+    } catch (RuntimeException e) {
+      Log.e(TAG, e.getMessage());
+      return false;
+    }
+
+    if (!benchmarkStarted) { // Skip the first image to discount warming-up time.
+      benchmarkStarted = true;
+    } else {
+      totalRuntime += ((double) detector.result.latency);
+    }
+    return true;  // Indicating that result is ready.
+  }
+
+  /**
+   * Perform detection on a single bitmap image.
+   *
+   * @param bitmap: a {@link Bitmap} image to process.
+   * @param imageId: an ID uniquely representing the image.
+   */
+  @Override
+  public boolean processBitmap(Bitmap bitmap, int imageId)
+      throws IOException, InterruptedException {
+    if (shouldStop() || !readyToTest()) {
+      return false;
+    }
+    convertBitmapToInput(bitmap);  // Scale bitmap if needed, store result in imgData.
+    if (!processBuffer(imgData, imageId)) {
+      return false;
+    }
+    // Scale results back to original image coordinates.
+    detector.result.scaleUp(scaleFactorWidth, scaleFactorHeight);
+    return true;  // Indicating that result is ready.
+  }
+
+  public OvicDetectionResult getLastDetectionResult() {
+    return detector.result;
+  }
+
+  @Override
+  public String getLastResultString() {
+    if (detector.result == null) {
+      return null;
+    }
+    return detector.result.toString();
+  }
+
+  /**
+   * Preprocess bitmap image into {@link ByteBuffer} format for the detector.
+   *
+   * @param bitmap: a {@link Bitmap} source image.
+   */
+  private void convertBitmapToInput(Bitmap bitmap) {
+    int originalWidth = bitmap.getWidth();
+    int originalHeight = bitmap.getHeight();
+    scaledBitmap = Bitmap.createScaledBitmap(bitmap, imgWidth, imgHeight, true);
+    scaleFactorWidth = originalWidth * 1.0 / imgWidth;
+    scaleFactorHeight = originalHeight * 1.0 / imgHeight;
+    scaledBitmap.getPixels(intValues, 0, imgWidth, 0, 0, imgWidth, imgHeight);
+    scaledBitmap.recycle();
+    loadsInputToByteBuffer();
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java
index a504ec74a9..baa14baf92 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java
@@ -51,7 +51,7 @@ public class OvicValidator {
       MappedByteBuffer model = loadModelFile(modelFile);
       OvicClassifier classifier = new OvicClassifier(labelsInputStream, model);
       ByteBuffer imgData = createByteBufferForClassifier(classifier);
-      OvicSingleImageResult testResult = classifier.classifyByteBuffer(imgData);
+      OvicClassificationResult testResult = classifier.classifyByteBuffer(imgData);
       if (testResult.topKClasses.isEmpty()) {
         throw new RuntimeException("Failed to return top K predictions.");
       }
diff --git a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java
index 1587c3c56f..99e874ca78 100644
--- a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java
+++ b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java
@@ -1,4 +1,4 @@
-/*Copyright 2018 Google LLC
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ public final class OvicClassifierTest {
   private MappedByteBuffer lowResModel = null;
   private ByteBuffer testImage = null;
   private ByteBuffer lowResTestImage = null;
-  private OvicSingleImageResult testResult = null;
+  private OvicClassificationResult testResult = null;
   private static final String LABELS_PATH =
       "tensorflow/contrib/lite/java/ovic/src/testdata/labels.txt";
   private static final String QUANTIZED_MODEL_PATH =
@@ -147,7 +147,7 @@ public final class OvicClassifierTest {
     return imgData;
   }
 
-  private static void assertCorrectTopK(OvicSingleImageResult testResult) {
+  private static void assertCorrectTopK(OvicClassificationResult testResult) {
     assertThat(testResult.topKClasses.size() > 0).isTrue();
     Boolean topKAccurate = false;
     // Assert that the correct class is in the top K.
diff --git a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java
new file mode 100644
index 0000000000..4681e26052
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java
@@ -0,0 +1,149 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import javax.imageio.ImageIO;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit test for {@link org.tensorflow.ovic.OvicDetector}. */
+@RunWith(JUnit4.class)
+public final class OvicDetectorTest {
+  private OvicDetector detector = null;
+  private InputStream labelsInputStream = null;
+  private MappedByteBuffer model = null;
+  private ByteBuffer testImage = null;
+
+  private static final float IMAGE_MEAN = 128f;
+  private static final float IMAGE_STD = 128f;
+
+  private Boolean quantizedInput = null;
+  private static final String LABELS_PATH =
+      "tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt";
+  private static final String MODEL_PATH =
+      "external/tflite_mobilenet_ssd_quant/detect.tflite";
+  private static final String TEST_IMAGE_PATH =
+      "external/tflite_ovic_testdata/test_image_224.jpg";
+  private static final int GROUNDTRUTH = 1 /* Person */;
+
+  @Before
+  public void setUp() {
+    try {
+      // load models.
+      model = loadModelFile(MODEL_PATH);
+
+      // Load label files;
+      File labelsfile = new File(LABELS_PATH);
+      labelsInputStream = new FileInputStream(labelsfile);
+
+      // Create detector.
+      detector = new OvicDetector(labelsInputStream, model);
+      quantizedInput = detector.quantizedInput();
+
+      // Load test image and convert into byte buffer.
+      File imageFile = new File(TEST_IMAGE_PATH);
+      BufferedImage rawimg = ImageIO.read(imageFile);
+      int[] inputDims = detector.getInputDims();
+      BufferedImage img = new BufferedImage(inputDims[1], inputDims[2], rawimg.getType());
+      Graphics2D g = img.createGraphics();
+      g.drawImage(rawimg, 0, 0, inputDims[1], inputDims[2], null);
+      g.dispose();
+      testImage = toByteBuffer(img);
+    } catch (IOException e) {
+      System.out.println(e.getMessage());
+    }
+
+    System.out.println("Successfully setup");
+  }
+
+  private static MappedByteBuffer loadModelFile(String modelFilePath) throws IOException {
+    File modelfile = new File(modelFilePath);
+    FileInputStream inputStream = new FileInputStream(modelfile);
+    FileChannel fileChannel = inputStream.getChannel();
+    long startOffset = 0L;
+    long declaredLength = fileChannel.size();
+    return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
+  }
+
+  private ByteBuffer toByteBuffer(BufferedImage image) {
+    ByteBuffer imgData;
+    if (quantizedInput) {
+      imgData = ByteBuffer.allocateDirect(image.getHeight() * image.getWidth() * 3);
+    } else {
+      imgData = ByteBuffer.allocateDirect(image.getHeight() * image.getWidth() * 12);
+    }
+    imgData.order(ByteOrder.nativeOrder());
+    for (int y = 0; y < image.getHeight(); y++) {
+      for (int x = 0; x < image.getWidth(); x++) {
+        int pixelValue = image.getRGB(x, y);
+        if (quantizedInput) {
+          // Quantized model
+          imgData.put((byte) ((pixelValue >> 16) & 0xFF));
+          imgData.put((byte) ((pixelValue >> 8) & 0xFF));
+          imgData.put((byte) (pixelValue & 0xFF));
+        } else {
+          // Float model
+          imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+        }
+      }
+    }
+    return imgData;
+  }
+
+  @Test
+  public void ovicDetector_detectSuccess() throws Exception {
+    assertThat(detector.detectByteBuffer(testImage, 1)).isTrue();
+    assertThat(detector.result != null).isTrue();
+  }
+
+  @Test
+  public void ovicDetector_simpleBatchTest() throws Exception {
+    final int numRepeats = 5;
+    for (int i = 0; i < numRepeats; i++) {
+      assertThat(detector.detectByteBuffer(testImage, 1)).isTrue();
+      OvicDetectionResult result = detector.result;
+      Boolean detectWithinTop5 = false;
+      for (int j = 0; j < Math.min(5, result.count); j++) {
+        if (result.detections.get(j).category == GROUNDTRUTH) {
+          detectWithinTop5 = true;
+          break;
+        }
+      }
+      if (!detectWithinTop5) {
+        System.out.println("---------------- Image " + i + " ---------------------");
+        System.out.println("Expect category " + GROUNDTRUTH);
+        System.out.println("Detection results: ");
+        System.out.println(result.toString());
+      }
+      assertThat(detectWithinTop5).isTrue();
+    }
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD b/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD
index 1021ea30dd..051aa2204e 100644
--- a/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD
@@ -14,6 +14,9 @@ filegroup(
 )
 
 exports_files(
-    ["labels.txt"],
+    [
+        "labels.txt",
+        "coco_labels.txt",
+    ],
     visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt b/tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt
new file mode 100644
index 0000000000..d91f535b1a
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt
@@ -0,0 +1,91 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+empty
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+empty
+backpack
+umbrella
+empty
+empty
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+empty
+wine glasses
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+empty
+dining table
+empty
+empty
+toilet
+empty
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+empty
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
+empty
-- 
GitLab


From 1630584951975479dee852cf6f7603fe6819fde1 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 13:28:17 -0700
Subject: [PATCH 0961/1357] Fixes possible out-of-bounds access by strided
 slice.

PiperOrigin-RevId: 215269882
---
 tensorflow/core/kernels/strided_slice_op.cc      | 2 +-
 tensorflow/python/kernel_tests/array_ops_test.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index f0575de4d9..3e8a4c5b72 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -149,7 +149,7 @@ class StridedSliceOp : public OpKernel {
       // NDIM and T
       if (is_simple_slice && std::is_same<Device, CPUDevice>::value &&
           input_dims == 2 && processing_shape.dims() == 2 &&
-          final_shape.dims() == 2) {
+          final_shape.dims() == 2 && new_axis_mask == 0) {
         MemCpyFunctor<T> functor;
         if (functor.Copy(input, begin, end, result)) {
           return;
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index c5547b19be..dcc594789e 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -615,6 +615,14 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[:, 0]
       _ = checker[:, :, 0]
 
+  def testBothNewAxisAndShrink(self):
+    with self.test_session(use_gpu=True):
+      ones = array_ops.placeholder(shape=[2, 2], dtype=dtypes.int16)
+      self.assertAllEqual(
+          ones[array_ops.newaxis, :, 0].eval(
+              feed_dict={ones: [[1, 1], [1, 1]]}),
+          [[1, 1]])
+
   def testTensorIndexing(self):
     with self.test_session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
-- 
GitLab


From c86f5941359526b91d85daf844e94ff5d39b2d6c Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 1 Oct 2018 13:40:30 -0700
Subject: [PATCH 0962/1357] Make cond_v2 If op lowering work in a defun +
 eager.

Prior to this change, the lowering pass assumed that the If op
functions would be available in the If op's graph. If the If op is
defined in a defun and then called via eager execution, the functions
will be in the eager context, but not in the defun's graph. This
change makes the lowering pass correctly use the function library
passed in by the caller via GraphOptimizationPassOptions.

PiperOrigin-RevId: 215271990
---
 tensorflow/core/common_runtime/lower_if_op.cc | 43 ++++++++++++-------
 tensorflow/core/common_runtime/lower_if_op.h  |  5 ++-
 .../core/common_runtime/lower_if_op_test.cc   |  4 +-
 .../kernel_tests/control_flow_ops_py_test.py  | 22 ++++++++++
 4 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc
index dfce7c23e7..a02084f223 100644
--- a/tensorflow/core/common_runtime/lower_if_op.cc
+++ b/tensorflow/core/common_runtime/lower_if_op.cc
@@ -38,11 +38,12 @@ class CondBuilder {
  public:
   enum Branch { kElseBranch = 0, kThenBranch = 1 };
 
-  // Create a CondBuilder to create the lowering of If op.  that has then and
+  // Create a CondBuilder to create the lowered form of `if_op` with then and
   // else functions named `then_fn_name` and `else_fn_name` respectively in the
-  // given graph.
+  // `graph`. The functions should be available in `flib`.
   CondBuilder(Node* if_op, const string& then_fn_name,
-              const string& else_fn_name, Graph* graph);
+              const string& else_fn_name, const FunctionLibraryDefinition& flib,
+              Graph* graph);
 
   // Constructs the basic conditional control flow using switch and merge nodes.
   Status CreatePivotNodes();
@@ -89,6 +90,7 @@ class CondBuilder {
   Node* then_call_node_;
   Node* else_call_node_;
   Graph* graph_;
+  const FunctionLibraryDefinition& flib_;
   string name_;
 
   NodeBuilder then_call_builder_;
@@ -96,9 +98,11 @@ class CondBuilder {
 };
 
 CondBuilder::CondBuilder(Node* if_op, const string& then_fn_name,
-                         const string& else_fn_name, Graph* graph)
+                         const string& else_fn_name,
+                         const FunctionLibraryDefinition& flib, Graph* graph)
     : if_op_(if_op),
       graph_(graph),
+      flib_(flib),
       name_(if_op->name()),
       then_call_builder_(NewName("then"), then_fn_name, graph->op_registry()),
       else_call_builder_(NewName("else"), else_fn_name, graph->op_registry()) {
@@ -193,15 +197,15 @@ Status CondBuilder::AddOutputs() {
   return Status::OK();
 }
 
-Status InlineCallInGraph(Node* n, Graph* g) {
-  const auto& lib = g->flib_def();
-  const FunctionDef* fdef = lib.Find(n->type_string());
+Status InlineCallInGraph(Node* n, const FunctionLibraryDefinition& flib,
+                         Graph* g) {
+  const FunctionDef* fdef = flib.Find(n->type_string());
   CHECK(fdef != nullptr);
   FunctionBody* fbody;
   TF_RETURN_IF_ERROR(
-      FunctionDefToBodyHelper(*fdef, n->attrs(), &lib,
-                              [&lib](const string& op, const OpDef** sig) {
-                                return lib.LookUpOpDef(op, sig);
+      FunctionDefToBodyHelper(*fdef, n->attrs(), &flib,
+                              [&flib](const string& op, const OpDef** sig) {
+                                return flib.LookUpOpDef(op, sig);
                               },
                               &fbody));
   // TODO(jpienaar): Improve this interface to make the need to delete it
@@ -219,8 +223,8 @@ Status CondBuilder::BuildLoweredIfOutput() {
 }
 
 Status CondBuilder::InlineCallNodes() {
-  TF_RETURN_IF_ERROR(InlineCallInGraph(then_call_node_, graph_));
-  TF_RETURN_IF_ERROR(InlineCallInGraph(else_call_node_, graph_));
+  TF_RETURN_IF_ERROR(InlineCallInGraph(then_call_node_, flib_, graph_));
+  TF_RETURN_IF_ERROR(InlineCallInGraph(else_call_node_, flib_, graph_));
   return Status::OK();
 }
 
@@ -240,6 +244,12 @@ Status LowerIfOpPass::Run(const GraphOptimizationPassOptions& options) {
     return errors::Internal("Lowering If op requires a graph to be available.");
   }
 
+  FunctionLibraryDefinition* flib = options.flib_def;
+  if (flib == nullptr) {
+    return errors::Internal(
+        "Lowering If op requires a FunctionLibraryDefinition to be available.");
+  }
+
   // Match all the nodes that need to be rewritten.
   gtl::InlinedVector<Node*, 2> matches;
   for (Node* n : g->op_nodes()) {
@@ -251,12 +261,14 @@ Status LowerIfOpPass::Run(const GraphOptimizationPassOptions& options) {
     }
   }
   for (Node* n : matches) {
-    TF_RETURN_IF_ERROR(RewriteNode(n, g));
+    TF_RETURN_IF_ERROR(RewriteNode(n, *flib, g));
   }
   return Status::OK();
 }
 
-Status LowerIfOpPass::RewriteNode(Node* n, Graph* g) {
+Status LowerIfOpPass::RewriteNode(Node* n,
+                                  const FunctionLibraryDefinition& flib,
+                                  Graph* g) {
   const AttrValue* then_attr = n->attrs().Find("then_branch");
   if (then_attr == nullptr) {
     return errors::InvalidArgument("Then branch function missing");
@@ -266,7 +278,8 @@ Status LowerIfOpPass::RewriteNode(Node* n, Graph* g) {
     return errors::InvalidArgument("Else branch function missing");
   }
 
-  CondBuilder cb(n, then_attr->func().name(), else_attr->func().name(), g);
+  CondBuilder cb(n, then_attr->func().name(), else_attr->func().name(), flib,
+                 g);
   TF_RETURN_IF_ERROR(cb.CreatePivotNodes());
   TF_RETURN_IF_ERROR(cb.AddInputs());
   TF_RETURN_IF_ERROR(cb.AddOutputs());
diff --git a/tensorflow/core/common_runtime/lower_if_op.h b/tensorflow/core/common_runtime/lower_if_op.h
index a9ef39ae5c..5ab1123e3f 100644
--- a/tensorflow/core/common_runtime/lower_if_op.h
+++ b/tensorflow/core/common_runtime/lower_if_op.h
@@ -29,8 +29,9 @@ class LowerIfOpPass : public GraphOptimizationPass {
   Status Run(const GraphOptimizationPassOptions& options) override;
 
  private:
-  // Rewrite the given If node `n` in graph `g` to use the switch-merge form.
-  Status RewriteNode(Node* n, Graph* g);
+  // Rewrite the given If node `n` in graph `g` to use the switch-merge
+  // form. `flib` should contain the branch functions referenced by `n`.
+  Status RewriteNode(Node* n, const FunctionLibraryDefinition& flib, Graph* g);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/lower_if_op_test.cc b/tensorflow/core/common_runtime/lower_if_op_test.cc
index 319a617b32..044a355d06 100644
--- a/tensorflow/core/common_runtime/lower_if_op_test.cc
+++ b/tensorflow/core/common_runtime/lower_if_op_test.cc
@@ -36,9 +36,7 @@ namespace tensorflow {
 namespace {
 
 Status Rewrite(std::unique_ptr<Graph>* graph) {
-  FunctionDefLibrary flib;
-  FunctionLibraryDefinition flib_def((*graph)->op_registry(), flib);
-
+  FunctionLibraryDefinition flib_def((*graph)->flib_def());
   GraphOptimizationPassOptions opt_options;
   opt_options.graph = graph;
   opt_options.flib_def = &flib_def;
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index d91a848e01..ae61be614e 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -31,6 +31,7 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -3414,6 +3415,27 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(r.numpy(), 10)
       self.assertFalse(isinstance(r, list))
 
+  def testCondInDefun(self):
+    if "GPU" in [d.device_type for d in device_lib.list_local_devices()]:
+      return unittest.skip("b/113346829 (gpu failure)")
+
+    with context.eager_mode():
+
+      @eager_function.defun
+      def foo(pred):
+        # TODO(b/111124878): this only needs to output one element.
+        fn1 = lambda: (constant_op.constant(10), constant_op.constant(100))
+        fn2 = lambda: (constant_op.constant(20), constant_op.constant(200))
+        return control_flow_ops.cond(constant_op.constant(pred), fn1, fn2)
+
+      r = foo(True)
+      self.assertAllEqual(r[0].numpy(), 10)
+      self.assertNotIsInstance(r, list)
+
+      r = foo(False)
+      self.assertAllEqual(r[0].numpy(), 20)
+      self.assertFalse(isinstance(r, list))
+
   def testWhileLoop(self):
     with context.eager_mode():
       tensor = constant_op.constant([1, 2, 3, 4, 5])
-- 
GitLab


From 44acd839c57494860666c799afd24360f1df3bed Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 13:42:40 -0700
Subject: [PATCH 0963/1357] Fix reported cuDNN default version during
 configuration.

PiperOrigin-RevId: 215272308
---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 2de2365ff3..57d9574d1f 100644
--- a/configure.py
+++ b/configure.py
@@ -884,7 +884,7 @@ def set_tf_cudnn_version(environ_cp):
   """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""
   ask_cudnn_version = (
       'Please specify the cuDNN version you want to use. '
-      '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION
+      '[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION
 
   for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     tf_cudnn_version = get_from_env_or_user_or_default(
-- 
GitLab


From 3039a4694e22674b502257ae34b0a5b614a631f3 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 1 Oct 2018 13:43:49 -0700
Subject: [PATCH 0964/1357] [XLA] Migrate from gtl::FlatMap to
 absl::flat_hash_map

PiperOrigin-RevId: 215272497
---
 tensorflow/compiler/jit/BUILD                 |  5 +++
 tensorflow/compiler/jit/deadness_analysis.cc  | 22 ++++++------
 .../compiler/jit/deadness_analysis_internal.h |  4 +--
 tensorflow/compiler/jit/kernels/BUILD         |  1 +
 tensorflow/compiler/jit/kernels/xla_ops.cc    |  3 +-
 .../jit/mark_for_compilation_pass_test.cc     | 11 +++---
 .../jit/resource_operation_safety_analysis.cc |  1 -
 .../compiler/jit/xla_compilation_cache.h      |  6 ++--
 tensorflow/compiler/tf2xla/BUILD              |  2 ++
 .../tf2xla/resource_operation_table.cc        | 14 ++++----
 .../tf2xla/resource_operation_table_test.cc   |  3 +-
 tensorflow/compiler/xla/client/BUILD          |  1 +
 tensorflow/compiler/xla/client/xla_builder.h  |  4 +--
 tensorflow/compiler/xla/service/BUILD         | 33 +++++++++++++++++
 .../compiler/xla/service/allocation_tracker.h |  5 +--
 .../xla/service/batchnorm_expander.cc         |  1 -
 .../xla/service/bfloat16_propagation.h        |  6 ++--
 .../compiler/xla/service/buffer_assignment.cc | 30 ++++++++--------
 .../compiler/xla/service/buffer_assignment.h  | 23 ++++++------
 .../compiler/xla/service/buffer_liveness.h    |  1 -
 tensorflow/compiler/xla/service/call_graph.h  |  6 ++--
 .../compiler/xla/service/copy_insertion.cc    | 11 +++---
 tensorflow/compiler/xla/service/cpu/BUILD     |  4 +++
 .../xla/service/cpu/cpu_layout_assignment.cc  |  3 +-
 .../compiler/xla/service/cpu/cpu_runtime.cc   |  4 +--
 .../compiler/xla/service/cpu/ir_emitter.cc    |  4 +--
 .../compiler/xla/service/cpu/ir_emitter.h     | 10 +++---
 .../service/cpu/target_machine_features.cc    |  1 +
 .../xla/service/cpu/target_machine_features.h |  5 ++-
 tensorflow/compiler/xla/service/defuser.cc    |  3 +-
 .../compiler/xla/service/dfs_hlo_visitor.h    |  1 -
 tensorflow/compiler/xla/service/gpu/BUILD     |  2 ++
 .../xla/service/gpu/gpu_executable.cc         |  3 +-
 .../compiler/xla/service/gpu/gpu_executable.h |  4 +--
 .../xla/service/gpu/stream_assignment.h       |  4 +--
 .../compiler/xla/service/heap_simulator.cc    | 17 +++++----
 .../compiler/xla/service/heap_simulator.h     | 21 ++++++-----
 .../xla/service/heap_simulator_test.cc        |  4 +--
 .../xla/service/hlo_alias_analysis.cc         |  7 ++--
 .../compiler/xla/service/hlo_alias_analysis.h |  3 +-
 .../compiler/xla/service/hlo_clone_context.h  | 12 +++----
 .../compiler/xla/service/hlo_computation.cc   | 11 +++---
 .../compiler/xla/service/hlo_computation.h    | 10 +++---
 .../compiler/xla/service/hlo_domain_map.cc    |  5 +--
 .../compiler/xla/service/hlo_domain_map.h     |  9 +++--
 .../compiler/xla/service/hlo_instruction.cc   | 13 +++----
 .../compiler/xla/service/hlo_instruction.h    |  8 ++---
 .../compiler/xla/service/hlo_instructions.cc  |  6 ++--
 .../xla/service/hlo_memory_scheduler.cc       | 35 +++++++++----------
 .../xla/service/hlo_memory_scheduler.h        | 11 +++---
 .../xla/service/hlo_memory_scheduler_test.cc  |  5 +--
 tensorflow/compiler/xla/service/hlo_module.cc |  5 +--
 .../xla/service/hlo_module_group_metadata.h   | 14 ++++----
 .../xla/service/hlo_module_group_util.h       |  4 +--
 tensorflow/compiler/xla/service/hlo_opcode.cc |  4 +--
 .../compiler/xla/service/hlo_ordering.h       |  8 ++---
 .../compiler/xla/service/hlo_pass_pipeline.cc |  3 +-
 .../compiler/xla/service/hlo_reachability.h   |  4 +--
 .../xla/service/hlo_rematerialization.cc      | 11 +++---
 .../xla/service/hlo_rematerialization.h       |  4 +--
 .../compiler/xla/service/hlo_schedule.cc      | 19 +++++-----
 .../compiler/xla/service/hlo_schedule.h       |  6 ++--
 .../compiler/xla/service/hlo_verifier.cc      |  6 ++--
 .../xla/service/indexed_array_analysis.cc     |  3 +-
 .../xla/service/indexed_array_analysis.h      |  4 +--
 .../xla/service/instruction_fusion.cc         |  8 ++---
 .../compiler/xla/service/instruction_fusion.h |  5 +--
 .../compiler/xla/service/layout_assignment.h  |  6 ++--
 tensorflow/compiler/xla/service/llvm_ir/BUILD |  1 +
 .../xla/service/llvm_ir/alias_analysis.h      | 10 +++---
 .../xla/service/multi_output_fusion.cc        |  2 +-
 .../xla/service/multi_output_fusion.h         |  3 +-
 .../compiler/xla/service/name_uniquer.h       |  4 +--
 .../xla/service/reduce_precision_insertion.h  |  1 -
 .../xla/service/tuple_points_to_analysis.h    |  1 -
 .../service/while_loop_constant_sinking.cc    |  1 -
 .../while_loop_invariant_code_motion.cc       |  8 ++---
 .../xla/service/while_loop_simplifier.cc      |  6 ++--
 tensorflow/compiler/xla/tests/BUILD           |  1 +
 .../xla/tests/xla_hlo_profile_test.cc         |  8 ++---
 80 files changed, 319 insertions(+), 259 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 5bf4af1014..29b60d1dbe 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -258,6 +258,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:variable_ops",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -323,6 +324,7 @@ cc_library(
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
@@ -400,6 +402,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:bounds_check",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
@@ -471,6 +474,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -509,6 +513,7 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/grappler/optimizers/data:graph_utils",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index 25e2e9a7af..e63d4b7792 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -420,15 +421,15 @@ class PredicateFactory {
     }
   };
 
-  gtl::FlatMap<SignatureForAndOr, std::unique_ptr<Predicate>,
-               HashSignatureForAndOr>
+  absl::flat_hash_map<SignatureForAndOr, std::unique_ptr<Predicate>,
+                      HashSignatureForAndOr>
       interned_and_or_instances_;
-  gtl::FlatMap<SignatureForNot, std::unique_ptr<Predicate>>
+  absl::flat_hash_map<SignatureForNot, std::unique_ptr<Predicate>>
       interned_not_instances_;
-  gtl::FlatMap<SignatureForAndRec, std::unique_ptr<Predicate>>
+  absl::flat_hash_map<SignatureForAndRec, std::unique_ptr<Predicate>>
       interned_and_rec_instances_;
-  gtl::FlatMap<SignatureForSymbol, std::unique_ptr<Predicate>,
-               HashSignatureForSymbol>
+  absl::flat_hash_map<SignatureForSymbol, std::unique_ptr<Predicate>,
+                      HashSignatureForSymbol>
       interned_symbol_instances_;
 };
 
@@ -572,7 +573,8 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis {
   Status PopulateWithReversePostOrder(absl::Span<Node* const> rpo);
   bool HasInputsWithMismatchingDeadness(const Node& node) override;
   void Print() const override;
-  gtl::FlatMap<TensorId, string, TensorId::Hasher> PredicateMapAsString() const;
+  absl::flat_hash_map<TensorId, string, TensorId::Hasher> PredicateMapAsString()
+      const;
 
  private:
   enum class EdgeKind { kDataAndControl, kDataOnly, kControlOnly };
@@ -614,7 +616,7 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis {
   Status HandleNode(Node* n, std::vector<bool>* should_revisit);
 
   const Graph& graph_;
-  gtl::FlatMap<TensorId, Predicate*, TensorId::Hasher> predicate_map_;
+  absl::flat_hash_map<TensorId, Predicate*, TensorId::Hasher> predicate_map_;
   PredicateFactory predicate_factory_;
   bool vlog_;
 };
@@ -977,9 +979,9 @@ DeadnessAnalysis::~DeadnessAnalysis() {}
   return Status::OK();
 }
 
-gtl::FlatMap<TensorId, string, TensorId::Hasher>
+absl::flat_hash_map<TensorId, string, TensorId::Hasher>
 DeadnessAnalysisImpl::PredicateMapAsString() const {
-  gtl::FlatMap<TensorId, string, TensorId::Hasher> result;
+  absl::flat_hash_map<TensorId, string, TensorId::Hasher> result;
   std::vector<TensorId> tensor_ids;
   for (const auto& kv_pair : predicate_map_) {
     CHECK(result.insert({kv_pair.first, kv_pair.second->ToString()}).second);
diff --git a/tensorflow/compiler/jit/deadness_analysis_internal.h b/tensorflow/compiler/jit/deadness_analysis_internal.h
index 3df2679c62..354782374a 100644
--- a/tensorflow/compiler/jit/deadness_analysis_internal.h
+++ b/tensorflow/compiler/jit/deadness_analysis_internal.h
@@ -16,15 +16,15 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_JIT_DEADNESS_ANALYSIS_INTERNAL_H_
 #define TENSORFLOW_COMPILER_JIT_DEADNESS_ANALYSIS_INTERNAL_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace tensorflow {
 namespace deadness_analysis_internal {
 
 // Returns a map describing the predicate each Tensor was mapped to.  For
 // testing purposes only.
-using PredicateMapTy = gtl::FlatMap<TensorId, string, TensorId::Hasher>;
+using PredicateMapTy = absl::flat_hash_map<TensorId, string, TensorId::Hasher>;
 Status ComputePredicates(const Graph& graph, PredicateMapTy* out_predicate_map);
 
 // Returns a map describing the predicate each Tensor was mapped to.  For
diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD
index 0839f1cb3d..26cb3af9d6 100644
--- a/tensorflow/compiler/jit/kernels/BUILD
+++ b/tensorflow/compiler/jit/kernels/BUILD
@@ -26,6 +26,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core/kernels:variable_ops",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
     alwayslink = 1,
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
index a85006eb03..cfd27a6510 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.cc
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/kernels/xla_ops.h"
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
@@ -163,7 +164,7 @@ class XlaExecutableClosureStore {
  private:
   mutex mutex_;
   int64 key_counter_ GUARDED_BY(mutex_);
-  gtl::FlatMap<KeyT, XlaExecutableClosure> closures_ GUARDED_BY(mutex_);
+  absl::flat_hash_map<KeyT, XlaExecutableClosure> closures_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosureStore);
 };
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
index 4f9145b479..2a80c745e3 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.h"
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/match.h"
 #include "tensorflow/cc/framework/ops.h"
@@ -61,10 +62,10 @@ std::unordered_map<string, string> GetClusters(const Graph& graph) {
   return ids;
 }
 
-gtl::FlatMap<string, std::vector<string>> GetClusterSets(
+absl::flat_hash_map<string, std::vector<string>> GetClusterSets(
     const Graph& g, std::vector<string>* cluster_names = nullptr) {
   CHECK(cluster_names == nullptr || cluster_names->empty());
-  gtl::FlatMap<string, std::vector<string>> cluster_sets;
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets;
   for (const auto& p : GetClusters(g)) {
     cluster_sets[p.second].push_back(p.first);
   }
@@ -566,7 +567,7 @@ TEST(XlaCompilationTest, ResourcesClusteringAllowed) {
   std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
   TF_EXPECT_OK(root.ToGraph(graph.get()));
   TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
-  gtl::FlatMap<string, std::vector<string>> cluster_sets =
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets =
       GetClusterSets(*graph);
   ASSERT_EQ(cluster_sets.size(), 1);
   std::vector<string> expected_clustered_nodes = {"AssignmentW", "ReadR",
@@ -586,7 +587,7 @@ TEST(XlaCompilationTest, ResourcesClusteringDisallowed) {
   std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
   TF_EXPECT_OK(root.ToGraph(graph.get()));
   TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
-  gtl::FlatMap<string, std::vector<string>> cluster_sets =
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets =
       GetClusterSets(*graph);
   ASSERT_EQ(cluster_sets.size(), 1);
   std::vector<string> expected_clustered_nodes = {"AssignmentW",
@@ -616,7 +617,7 @@ TEST(XlaCompilationTest, ChainOfOps) {
   TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
 
   std::vector<string> cluster_names;
-  gtl::FlatMap<string, std::vector<string>> cluster_sets =
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets =
       GetClusterSets(*graph, &cluster_names);
 
   ASSERT_EQ(cluster_sets.size(), 2);
diff --git a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
index 56e35c0059..657bb409db 100644
--- a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
+++ b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
@@ -89,7 +89,6 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/util/ptr_util.h"
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h
index 10ad87e38c..17c0321c1e 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.h
+++ b/tensorflow/compiler/jit/xla_compilation_cache.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_JIT_XLA_COMPILATION_CACHE_H_
 #define TENSORFLOW_COMPILER_JIT_XLA_COMPILATION_CACHE_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
 #include "tensorflow/compiler/tf2xla/xla_context.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
@@ -24,7 +25,6 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
@@ -152,7 +152,7 @@ class XlaCompilationCache : public ResourceBase {
   };
 
   mutex compile_cache_mu_;
-  gtl::FlatMap<Signature, std::unique_ptr<Entry>, Signature::Hash> cache_
+  absl::flat_hash_map<Signature, std::unique_ptr<Entry>, Signature::Hash> cache_
       GUARDED_BY(compile_cache_mu_);
 
   struct CompileStats {
@@ -165,7 +165,7 @@ class XlaCompilationCache : public ResourceBase {
   mutex compile_stats_mu_;
 
   // Maps cluster names to compilation statistics for said cluster.
-  gtl::FlatMap<string, CompileStats> compile_stats_
+  absl::flat_hash_map<string, CompileStats> compile_stats_
       GUARDED_BY(compile_stats_mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(XlaCompilationCache);
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index ba1e3b2b4f..3f631f91ec 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -635,6 +635,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:ops",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -649,6 +650,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/tf2xla/resource_operation_table.cc b/tensorflow/compiler/tf2xla/resource_operation_table.cc
index 20f2ce2919..72b240996f 100644
--- a/tensorflow/compiler/tf2xla/resource_operation_table.cc
+++ b/tensorflow/compiler/tf2xla/resource_operation_table.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/resource_operation_table.h"
 #include "absl/algorithm/container.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "absl/container/flat_hash_map.h"
 
 namespace tensorflow {
 /*static*/ absl::string_view XlaResourceOpInfo::XlaResourceOpKindToString(
@@ -30,9 +30,9 @@ namespace tensorflow {
   }
 }
 
-static gtl::FlatMap<absl::string_view, XlaResourceOpInfo>*
+static absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>*
 CreateResourceOpInfoMap() {
-  auto* result = new gtl::FlatMap<absl::string_view, XlaResourceOpInfo>;
+  auto* result = new absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>;
 
   auto add = [&](absl::string_view op, XlaResourceOpKind op_kind,
                  XlaResourceKind resource_kind) {
@@ -103,15 +103,15 @@ CreateResourceOpInfoMap() {
   return result;
 }
 
-static const gtl::FlatMap<absl::string_view, XlaResourceOpInfo>&
+static const absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>&
 GetStaticResourceOpInfoMap() {
-  static gtl::FlatMap<absl::string_view, XlaResourceOpInfo>* op_info_map =
-      CreateResourceOpInfoMap();
+  static absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>*
+      op_info_map = CreateResourceOpInfoMap();
   return *op_info_map;
 }
 
 const XlaResourceOpInfo* GetResourceOpInfoForOp(absl::string_view op) {
-  const gtl::FlatMap<absl::string_view, XlaResourceOpInfo>& op_infos =
+  const absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>& op_infos =
       GetStaticResourceOpInfoMap();
   auto it = op_infos.find(op);
   return it == op_infos.end() ? nullptr : &it->second;
diff --git a/tensorflow/compiler/tf2xla/resource_operation_table_test.cc b/tensorflow/compiler/tf2xla/resource_operation_table_test.cc
index a85ef040a7..956f597301 100644
--- a/tensorflow/compiler/tf2xla/resource_operation_table_test.cc
+++ b/tensorflow/compiler/tf2xla/resource_operation_table_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/resource_operation_table.h"
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -33,7 +34,7 @@ bool HasResourceInputOrOutput(const OpDef& op_def) {
 }
 
 TEST(ResourceOperationTableTest, HaveAllResourceOps) {
-  gtl::FlatMap<string, bool> known_resource_ops;
+  absl::flat_hash_map<string, bool> known_resource_ops;
   for (absl::string_view known_resource_op :
        resource_op_table_internal::GetKnownResourceOps()) {
     ASSERT_TRUE(
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index f825f67b44..1191cff109 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -220,6 +220,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:shape_inference",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 1da6ddd318..b7295e8a53 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <type_traits>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/padding.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stacktrace.h"
@@ -1027,7 +1027,7 @@ class XlaBuilder {
 
   // A map from XlaOp::Handle to the index in the instructions_ vector where the
   // instruction is held.
-  tensorflow::gtl::FlatMap<int64, int64> handle_to_index_;
+  absl::flat_hash_map<int64, int64> handle_to_index_;
 
   // The embedded computations used by this computation. Each computation was
   // the entry computation of some XlaComputation, the key is the unique id of
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index e800cf470c..8da6364786 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -146,6 +146,7 @@ cc_library(
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -250,6 +251,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -333,6 +335,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -395,6 +398,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/types:span",
     ],
 )
@@ -485,6 +489,7 @@ cc_library(
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -903,6 +908,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
@@ -952,6 +958,7 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:types",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -987,6 +994,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
     ],
@@ -1034,6 +1042,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -1087,6 +1096,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
     ],
@@ -1125,6 +1135,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1146,6 +1157,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1196,6 +1208,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:optional",
     ],
@@ -1216,6 +1229,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -1260,6 +1274,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -1280,6 +1295,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -1304,6 +1320,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1330,6 +1347,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -1385,6 +1403,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/types:span",
     ],
@@ -1640,6 +1659,7 @@ cc_library(
         ":while_loop_analysis",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
     ],
@@ -1671,6 +1691,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -2203,6 +2224,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
     ],
@@ -2263,6 +2285,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2319,6 +2342,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2345,6 +2369,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2416,6 +2441,7 @@ cc_library(
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
@@ -2460,6 +2486,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2588,6 +2615,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2701,6 +2729,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -3147,6 +3176,7 @@ cc_library(
         ":hlo_pass_pipeline",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -3269,6 +3299,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -3298,6 +3329,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -3354,6 +3386,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index a7d8927cf7..af227fe4da 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/backend.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -110,7 +111,7 @@ class AllocationTracker {
 
   // A map from device memory opaque value to allocation. One such map is
   // maintained per device ordinal.
-  using AllocationMap = tensorflow::gtl::FlatMap<const void*, Allocation>;
+  using AllocationMap = absl::flat_hash_map<const void*, Allocation>;
 
   tensorflow::mutex mutex_;
 
@@ -146,7 +147,7 @@ class AllocationTracker {
   // non-owning "view" into a tuple's sub-buffers.  The sub-buffers are then
   // free'd when both the view *and* the original tuple are Unregistered.  This
   // refcounting is managed in opaque_to_allocation_map_.
-  tensorflow::gtl::FlatMap<int64, std::vector<std::unique_ptr<ShapedBuffer>>>
+  absl::flat_hash_map<int64, std::vector<std::unique_ptr<ShapedBuffer>>>
       handle_to_shaped_buffers_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(AllocationTracker);
diff --git a/tensorflow/compiler/xla/service/batchnorm_expander.cc b/tensorflow/compiler/xla/service/batchnorm_expander.cc
index 30d33e0d35..f70f6ddfec 100644
--- a/tensorflow/compiler/xla/service/batchnorm_expander.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander.cc
@@ -35,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h
index 6a62439f88..c74326f631 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.h
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/bfloat16_support.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -186,7 +187,7 @@ class BFloat16Propagation : public HloModulePass {
 
   // Mapping from each HloComputation to the number of callers to it in the
   // module. Populated at the beginning of this pass.
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> caller_counts_;
+  absl::flat_hash_map<const HloComputation*, int64> caller_counts_;
 
   // We first store the potential F32-to-BF16 changes to changes_to_bf16_, which
   // are subject to further adjustment, then finally applied to the HLOs. This
@@ -195,8 +196,7 @@ class BFloat16Propagation : public HloModulePass {
   //
   // For each HloInstruction, changes_to_bf16_ stores the affected buffers in
   // the output as a map from in-place pointers to subshapes to shape indices.
-  tensorflow::gtl::FlatMap<HloInstruction*,
-                           tensorflow::gtl::FlatMap<Shape*, ShapeIndex>>
+  absl::flat_hash_map<HloInstruction*, absl::flat_hash_map<Shape*, ShapeIndex>>
       changes_to_bf16_;
 
   // Whether the last processed HLO module has been changed by this pass.
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 34a7be0e9c..3efa0b1dad 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <ostream>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -41,9 +42,9 @@ limitations under the License.
 namespace xla {
 namespace {
 
+using absl::flat_hash_map;
 using absl::StrAppend;
 using absl::StrAppendFormat;
-using ::tensorflow::gtl::FlatMap;
 using ::tensorflow::gtl::FlatSet;
 using ::tensorflow::strings::HumanReadableNumBytes;
 
@@ -519,7 +520,8 @@ void BufferAssignment::AddAssignment(BufferAllocation* allocation,
 // BufferAllocation.
 void BufferAssignment::CombineTempAllocations() {
   VLOG(1) << "CombineTempAllocations()";
-  FlatMap<LogicalBuffer::Color, BufferAllocation, LogicalBuffer::Color::Hasher>
+  flat_hash_map<LogicalBuffer::Color, BufferAllocation,
+                LogicalBuffer::Color::Hasher>
       combined_allocation_map;
 
   // Move all temp allocations into a single run at the end of the allocations
@@ -582,7 +584,8 @@ void BufferAssignment::CombineTempAllocations() {
   }
 
   // Update allocation indices to their new positions.
-  allocation_index_for_buffer_.clear_no_resize();
+  allocation_index_for_buffer_.erase(allocation_index_for_buffer_.begin(),
+                                     allocation_index_for_buffer_.end());
   for (size_t index = 0; index < allocations_.size(); ++index) {
     BufferAllocation* allocation = &allocations_[index];
     allocation->set_index(index);
@@ -814,7 +817,7 @@ Status BufferAssigner::AssignBuffersForComputation(
     const HloComputation* computation, bool is_thread_local,
     const FlatSet<const LogicalBuffer*>& colocated_buffers,
     const FlatSet<BufferAllocation::Index>& colocated_allocations,
-    FlatMap<const HloComputation*, FlatSet<const LogicalBuffer*>>*
+    flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>*
         buffers_to_assign_sequentially,
     BufferAssignment* assignment) {
   // Buffers are sorted and assigned to BufferAllocations in decreasing order of
@@ -833,7 +836,7 @@ Status BufferAssigner::AssignBuffersForComputation(
 
   // Generate a post order sort of instructions for sorting of the
   // LogicalBuffers.
-  FlatMap<const HloInstruction*, int> post_order_position;
+  flat_hash_map<const HloInstruction*, int> post_order_position;
   int position = 0;
   for (auto* instruction : computation->MakeInstructionPostOrder()) {
     post_order_position.emplace(instruction, position);
@@ -1043,12 +1046,12 @@ Status BufferAssigner::AssignBuffersForComputation(
   return Status::OK();
 }
 
-FlatMap<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
-        LogicalBuffer::Color::Hasher>
+flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+              LogicalBuffer::Color::Hasher>
 BufferAssigner::SplitBuffersByColor(
     const FlatSet<const LogicalBuffer*>& buffers) {
-  FlatMap<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
-          LogicalBuffer::Color::Hasher>
+  flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+                LogicalBuffer::Color::Hasher>
       color_map;
   for (auto buffer : buffers) {
     color_map[buffer->color()].insert(buffer);
@@ -1057,7 +1060,7 @@ BufferAssigner::SplitBuffersByColor(
 }
 
 Status BufferAssigner::AssignBuffersWithSequentialOrdering(
-    const FlatMap<const HloComputation*, FlatSet<const LogicalBuffer*>>&
+    const flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>&
         buffers_to_assign_sequentially,
     bool run_whole_module_heap_simulation, BufferAssignment* assignment) {
   // Run the sequence of instructions through the heap simulator.  The heuristic
@@ -1155,9 +1158,8 @@ std::vector<const LogicalBuffer*> ComputePeakMemoryLogicalBuffers(
     const BufferAllocation& allocation, const HeapSimulatorTrace& heap_trace) {
   // Create a map from LogicalBuffer::Id to LogicalBuffer* for the logical
   // buffers in this allocation.
-  tensorflow::gtl::FlatMap<LogicalBuffer::Id, const LogicalBuffer*>
-      id_to_buffer;
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, int64> buffer_sizes;
+  absl::flat_hash_map<LogicalBuffer::Id, const LogicalBuffer*> id_to_buffer;
+  absl::flat_hash_map<const LogicalBuffer*, int64> buffer_sizes;
   for (const auto& pair : allocation.assigned_buffers()) {
     const LogicalBuffer* buffer = pair.first;
     const BufferAllocation::OffsetSize& offset_size = pair.second;
@@ -1679,7 +1681,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
 
   // First assign buffers for global computatations. Temporary buffers for
   // sequential computations are collected in 'buffers_to_assign_sequentially'.
-  FlatMap<const HloComputation*, FlatSet<const LogicalBuffer*>>
+  flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>
       buffers_to_assign_sequentially;
   for (auto* computation : global_computations) {
     TF_RETURN_IF_ERROR(AssignBuffersForComputation(
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h
index 24ba7c16f5..9ba40617a3 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.h
+++ b/tensorflow/compiler/xla/service/buffer_assignment.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -148,7 +148,7 @@ class BufferAllocation {
 
   // Access to the logical buffers assigned to this allocation, and their
   // associated logical offsets and sizes.
-  const tensorflow::gtl::FlatMap<const LogicalBuffer*, OffsetSize>&
+  const absl::flat_hash_map<const LogicalBuffer*, OffsetSize>&
   assigned_buffers() const {
     return assigned_buffers_;
   }
@@ -323,7 +323,7 @@ class BufferAllocation {
 
   // Mapping from the set of buffers assigned to this allocation to their
   // logical offsets and sizes.
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, OffsetSize> assigned_buffers_;
+  absl::flat_hash_map<const LogicalBuffer*, OffsetSize> assigned_buffers_;
 
   int64 fragmentation_bytes_ = 0;
   std::vector<HeapSimulatorTrace> heap_traces_;
@@ -500,7 +500,7 @@ class BufferAssignment {
   int64 temp_allocation_total_size_ = 0;
 
   // Maps Buffers to the index of the BufferAllocation which holds the buffer.
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, BufferAllocation::Index>
+  absl::flat_hash_map<const LogicalBuffer*, BufferAllocation::Index>
       allocation_index_for_buffer_;
 
   const HloModule* module_;
@@ -557,8 +557,8 @@ class BufferAssigner {
       const tensorflow::gtl::FlatSet<const LogicalBuffer*>& colocated_buffers,
       const tensorflow::gtl::FlatSet<BufferAllocation::Index>&
           colocated_allocations,
-      tensorflow::gtl::FlatMap<const HloComputation*,
-                               tensorflow::gtl::FlatSet<const LogicalBuffer*>>*
+      absl::flat_hash_map<const HloComputation*,
+                          tensorflow::gtl::FlatSet<const LogicalBuffer*>>*
           buffers_to_assign_sequentially,
       BufferAssignment* assignment);
 
@@ -568,9 +568,8 @@ class BufferAssigner {
   // 'run_whole_module_heap_simulation' is true, the heap simulation will be run
   // assuming all global computations are sequentially ordered.
   Status AssignBuffersWithSequentialOrdering(
-      const tensorflow::gtl::FlatMap<
-          const HloComputation*,
-          tensorflow::gtl::FlatSet<const LogicalBuffer*>>&
+      const absl::flat_hash_map<const HloComputation*,
+                                tensorflow::gtl::FlatSet<const LogicalBuffer*>>&
           buffers_to_assign_sequentially,
       bool run_whole_module_heap_simulation, BufferAssignment* assignment);
 
@@ -624,9 +623,9 @@ class BufferAssigner {
 
   // Split a set of buffers into several sets, each of which contains buffers
   // colored with the same color.
-  tensorflow::gtl::FlatMap<LogicalBuffer::Color,
-                           tensorflow::gtl::FlatSet<const LogicalBuffer*>,
-                           LogicalBuffer::Color::Hasher>
+  absl::flat_hash_map<LogicalBuffer::Color,
+                      tensorflow::gtl::FlatSet<const LogicalBuffer*>,
+                      LogicalBuffer::Color::Hasher>
   SplitBuffersByColor(
       const tensorflow::gtl::FlatSet<const LogicalBuffer*>& buffers);
 
diff --git a/tensorflow/compiler/xla/service/buffer_liveness.h b/tensorflow/compiler/xla/service/buffer_liveness.h
index cdd3cf4032..2911bbcfbf 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.h
+++ b/tensorflow/compiler/xla/service/buffer_liveness.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h
index 3af2ab5edf..0c2e9b99db 100644
--- a/tensorflow/compiler/xla/service/call_graph.h
+++ b/tensorflow/compiler/xla/service/call_graph.h
@@ -20,10 +20,10 @@ limitations under the License.
 
 #include <ostream>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -157,7 +157,7 @@ class CallGraphNode {
 
   // The map from instruction to index in callsites_ for looking up the callsite
   // (if any) associated with a particular instruction in this computation.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> callsite_instructions_;
+  absl::flat_hash_map<const HloInstruction*, int64> callsite_instructions_;
 
   // The call sites in other computations which call this computation.
   std::vector<CallSite> caller_callsites_;
@@ -267,7 +267,7 @@ class CallGraph {
 
   // Map from HLO computation to the index of the corresponding call graph node
   // in nodes_.
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> node_indices_;
+  absl::flat_hash_map<const HloComputation*, int64> node_indices_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index b65dfef9c9..7f78412924 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -432,7 +432,7 @@ class CopyRemover {
       // Construct a list for each HLO buffer in the alias analysis. Maintain a
       // map from HloValue to the respective list element representing that
       // value. The map is used to construct the copy info map below.
-      tensorflow::gtl::FlatMap<const HloValue*, ValueNode*> value_to_node;
+      absl::flat_hash_map<const HloValue*, ValueNode*> value_to_node;
       for (const HloBuffer& buffer : alias_analysis.buffers()) {
         // Verify values contained in the buffer are strictly ordered. This
         // should always be the case after adding copies to eliminate
@@ -480,7 +480,7 @@ class CopyRemover {
     // respective ValueNode representing that value.
     void AddValueList(
         absl::Span<const HloValue* const> values,
-        tensorflow::gtl::FlatMap<const HloValue*, ValueNode*>* value_to_node) {
+        absl::flat_hash_map<const HloValue*, ValueNode*>* value_to_node) {
       ValueNode* tail = nullptr;
       ValueNode* head = nullptr;
       for (const HloValue* value : values) {
@@ -516,8 +516,7 @@ class CopyRemover {
     // respective ValueNode.
     void CreateCopyMap(
         const HloModule& module,
-        const tensorflow::gtl::FlatMap<const HloValue*, ValueNode*>&
-            value_to_node) {
+        const absl::flat_hash_map<const HloValue*, ValueNode*>& value_to_node) {
       for (HloComputation* computation : module.computations()) {
         for (HloInstruction* instruction : computation->instructions()) {
           // Add copies with unambiguous source values to the map. Copies with
@@ -916,7 +915,7 @@ class CopyRemover {
       ValueNode* src = nullptr;
       ValueNode* dest = nullptr;
     };
-    tensorflow::gtl::FlatMap<const HloInstruction*, CopyNodes> copy_map_;
+    absl::flat_hash_map<const HloInstruction*, CopyNodes> copy_map_;
   };
 
   HloModule* module_;
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index b7103118ac..6a83909a3b 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -290,6 +290,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
@@ -309,6 +310,7 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@llvm//:analysis",
         "@llvm//:target",
     ],
@@ -471,6 +473,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
         "//tensorflow/stream_executor",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/synchronization",
         "@com_google_absl//absl/types:span",
     ],
@@ -762,6 +765,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:layout_assignment",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index bfecbd6e01..c291bf2d1b 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <numeric>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
@@ -38,7 +39,7 @@ using absl::nullopt;
 using absl::optional;
 
 using ShouldMakeOperandColMajorCache =
-    tensorflow::gtl::FlatMap<const HloInstruction*, bool>;
+    absl::flat_hash_map<const HloInstruction*, bool>;
 }  // namespace
 
 static bool ShouldMakeAllUsersColMajor(const HloInstruction* instruction) {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
index 20cf855735..a9febe891b 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <functional>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/core/platform/dynamic_annotations.h"
@@ -30,8 +31,7 @@ namespace cpu {
 namespace runtime {
 
 XfeedManager* GetXfeedManager(int device_ordinal) {
-  static tensorflow::gtl::FlatMap<int, XfeedManager*>* managers =
-      new tensorflow::gtl::FlatMap<int, XfeedManager*>();
+  static auto* managers = new absl::flat_hash_map<int, XfeedManager*>();
   static absl::Mutex* mutex = new absl::Mutex();
 
   absl::MutexLock lock(mutex);
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index c3e8020783..953a75c35f 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
@@ -67,7 +68,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -1398,7 +1398,7 @@ static bool ReductionPreservesLayout(const HloInstruction& reduce) {
   //
   // So if we reduce f32[A,B,C,D] on dimensions 1 and 2, this map contains
   // [0->0, 3->1].
-  gtl::FlatMap<int64, int64> unreduced_dim_map;
+  absl::flat_hash_map<int64, int64> unreduced_dim_map;
 
   gtl::FlatSet<int64> reduced_dims(reduce.dimensions().begin(),
                                    reduce.dimensions().end());
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index daafef4eb3..586f27b104 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "llvm/ADT/Triple.h"
@@ -47,7 +48,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -427,7 +427,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // Maps the buffer allocation slices for the parameters to the computation
   // being compiled to their parameter numbers.  Only relevant for thread local
   // computations.
-  tensorflow::gtl::FlatMap<BufferAllocation::Index, int64>
+  absl::flat_hash_map<BufferAllocation::Index, int64>
       computation_parameter_allocations_;
 
   // Maps HLO instructions to their index into the profile counter array.
@@ -567,11 +567,11 @@ class IrEmitter : public DfsHloVisitorWithDefault,
     }
   };
 
-  tensorflow::gtl::FlatMap<const Literal*, llvm::Constant*,
-                           LiteralPtrHashFunctor, LiteralPtrEqualityFunctor>
+  absl::flat_hash_map<const Literal*, llvm::Constant*, LiteralPtrHashFunctor,
+                      LiteralPtrEqualityFunctor>
       emitted_literals_;
 
-  tensorflow::gtl::FlatMap<BufferAllocation::Index, llvm::Constant*>
+  absl::flat_hash_map<BufferAllocation::Index, llvm::Constant*>
       constant_buffer_to_global_;
 
   std::vector<const HloComputation*> thread_local_computations_;
diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.cc b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
index a0cd8ee2d2..5cdac203af 100644
--- a/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
+++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
+#include "tensorflow/core/platform/logging.h"
 
 namespace xla {
 namespace cpu {
diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.h b/tensorflow/compiler/xla/service/cpu/target_machine_features.h
index 8b00ae9e47..a383b4a4a0 100644
--- a/tensorflow/compiler/xla/service/cpu/target_machine_features.h
+++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace cpu {
@@ -97,8 +97,7 @@ class LLVMTargetMachineFeatures : public TargetMachineFeatures {
   // This is mutated from within `GetTargetTransformInfoFor` which is
   // semantically a getter (and thus `const`); and is therefore declared
   // mutable.  Making this mutable is okay because it has cache semantics.
-  mutable tensorflow::gtl::FlatMap<const llvm::Function*,
-                                   llvm::TargetTransformInfo>
+  mutable absl::flat_hash_map<const llvm::Function*, llvm::TargetTransformInfo>
       target_transform_info_cache_;
   llvm::TargetMachine* target_machine_;
 };
diff --git a/tensorflow/compiler/xla/service/defuser.cc b/tensorflow/compiler/xla/service/defuser.cc
index d124f74d19..661539cccb 100644
--- a/tensorflow/compiler/xla/service/defuser.cc
+++ b/tensorflow/compiler/xla/service/defuser.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -48,7 +49,7 @@ Status Defuse(HloInstruction* fusion_instruction) {
       fusion_instruction->fused_instructions_computation();
 
   // A map from fused instruction to its defused clone.
-  tensorflow::gtl::FlatMap<const HloInstruction*, HloInstruction*>
+  absl::flat_hash_map<const HloInstruction*, HloInstruction*>
       defused_instructions;
   // Initialize map to contain the fusion instruction parameters mapping
   // to the operands of the fusion instruction.
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index 5761573791..68d01d75a2 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 51968d13d4..e65d3fa332 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -91,6 +91,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_reachability",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -357,6 +358,7 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:cufft_plugin",
         "//tensorflow/core/platform/default/build_config:stream_executor_cuda",  # build_cleaner: keep
         "//tensorflow/stream_executor",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 31a9f9b1be..5742632782 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
@@ -197,7 +198,7 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) {
   }
   module_spec.AddCudaPtxInMemory(ptx().c_str());
 
-  tensorflow::gtl::FlatMap<int64, se::DeviceMemoryBase> globals;
+  absl::flat_hash_map<int64, se::DeviceMemoryBase> globals;
   se::ModuleHandle module_handle;
   executor->LoadModule(module_spec, &module_handle);
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index 38b0f8f15b..0e276282e4 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "absl/types/span.h"
@@ -35,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 
@@ -101,7 +101,7 @@ class GpuExecutable : public Executable {
   const PointsToSet& GetRootPointsToSet() const;
 
   using BufferAllocToDeviceMemoryMap =
-      tensorflow::gtl::FlatMap<BufferAllocation::Index, se::DeviceMemoryBase>;
+      absl::flat_hash_map<BufferAllocation::Index, se::DeviceMemoryBase>;
 
   // Loads the PTX or CUBIN for this executable into `executor` and resolves the
   // globals corresponding to constant buffers.  Returns a map mapping buffer
diff --git a/tensorflow/compiler/xla/service/gpu/stream_assignment.h b/tensorflow/compiler/xla/service/gpu/stream_assignment.h
index c2df83aaa4..52d38b6f20 100644
--- a/tensorflow/compiler/xla/service/gpu/stream_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/stream_assignment.h
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_ASSIGNMENT_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_ASSIGNMENT_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace gpu {
@@ -34,7 +34,7 @@ class StreamAssignment {
 
  private:
   int stream_count_ = 1;  // At least the main stream.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> hlo_to_stream_number_;
+  absl::flat_hash_map<const HloInstruction*, int> hlo_to_stream_number_;
 };
 
 // Assigns GPU streams to instructions in `module`.
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index 2bd04259c0..147776c8c4 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -18,13 +18,14 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/util.h"
 
 namespace xla {
 
-using tensorflow::gtl::FlatMap;
+using absl::flat_hash_map;
 using tensorflow::gtl::FlatSet;
 
 /*static*/
@@ -56,7 +57,7 @@ StatusOr<int64> HeapSimulator::MinimumMemoryForComputation(
     const HloComputation& computation, const HloInstructionSequence& sequence,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+    const absl::flat_hash_map<const HloComputation*, int64>*
         memory_by_computation) {
   TF_ASSIGN_OR_RETURN(
       HeapSimulator::Result result,
@@ -88,7 +89,7 @@ StatusOr<HeapSimulator::Result> HeapSimulator::Run(
     const HloInstructionSequence& instruction_sequence,
     const TuplePointsToAnalysis& points_to_analysis,
     const BufferValue::SizeFunction& size_fn, const Options& options,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+    const absl::flat_hash_map<const HloComputation*, int64>*
         memory_by_computation) {
   HeapSimulator heap(std::move(algorithm), size_fn, options,
                      /*schedule=*/nullptr, memory_by_computation);
@@ -115,8 +116,10 @@ Status HeapSimulator::RunComputation(
   // 'used_buffers' is the reverse map - it tracks which buffers were used by an
   // instruction, so that we can remove the instructions from a buffer's live
   // set after they are visited.
-  FlatMap<const BufferValue*, FlatSet<const HloInstruction*>> live_buffers;
-  FlatMap<const HloInstruction*, FlatSet<const BufferValue*>> used_buffers;
+  flat_hash_map<const BufferValue*, FlatSet<const HloInstruction*>>
+      live_buffers;
+  flat_hash_map<const HloInstruction*, FlatSet<const BufferValue*>>
+      used_buffers;
   auto add_user_to_buffer = [this, &live_buffers, &used_buffers](
                                 const HloInstruction* user,
                                 const BufferValue* buffer) {
@@ -345,7 +348,7 @@ HeapSimulator::HeapSimulator(
     std::unique_ptr<HeapAlgorithm> algorithm,
     const BufferValue::SizeFunction& size_fn, const Options& options,
     const HloSchedule* schedule,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+    const absl::flat_hash_map<const HloComputation*, int64>*
         memory_by_computation)
     : no_fragmentation_stats_(absl::make_unique<NoFragmentationStatsHeap>()),
       algorithm_(std::move(algorithm)),
@@ -536,7 +539,7 @@ void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size,
 
 void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
     const HloInstruction* instruction,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // We only count the memory usage of the largest subcomputation, instead of
   // adding them all, because subcomputations won't execute in parallel.
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index 7d6dcc0dc9..a5bb3f81f7 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/buffer_value_containers.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
@@ -30,7 +31,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -58,7 +58,7 @@ class HeapSimulator {
   // Result represents the result of the heap simulation.
   struct Result {
     // The assignment of buffers to chunks.
-    tensorflow::gtl::FlatMap<const BufferValue*, Chunk> chunk_map;
+    absl::flat_hash_map<const BufferValue*, Chunk> chunk_map;
 
     // The total size in bytes of the heap, containing all assigned chunks.
     int64 heap_size = 0;
@@ -100,7 +100,7 @@ class HeapSimulator {
       const HloComputation& computation, const HloInstructionSequence& sequence,
       const TuplePointsToAnalysis& points_to_analysis,
       const LogicalBuffer::SizeFunction& size_function,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+      const absl::flat_hash_map<const HloComputation*, int64>*
           memory_by_computation = nullptr);
 
   // Run the heap simulation with the given algorithm, assuming the given
@@ -130,7 +130,7 @@ class HeapSimulator {
       const TuplePointsToAnalysis& points_to_analysis,
       const BufferValue::SizeFunction& size_fn,
       const Options& options = Options(),
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+      const absl::flat_hash_map<const HloComputation*, int64>*
           memory_by_computation = nullptr);
 
  private:
@@ -140,7 +140,7 @@ class HeapSimulator {
   HeapSimulator(std::unique_ptr<HeapAlgorithm> algorithm,
                 const BufferValue::SizeFunction& size_fn,
                 const Options& options, const HloSchedule* schedule = nullptr,
-                const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+                const absl::flat_hash_map<const HloComputation*, int64>*
                     memory_by_computation = nullptr);
   ~HeapSimulator();
 
@@ -172,7 +172,7 @@ class HeapSimulator {
   // handle subcomputations. It would be good to unify the handling of
   // subcomputations, but it's not clear how.
   const HloSchedule* schedule_;
-  const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+  const absl::flat_hash_map<const HloComputation*, int64>*
       memory_by_computation_;
 
   // In addition to Alloc and Free, the heap simulator exposes a concept of
@@ -193,7 +193,7 @@ class HeapSimulator {
     const BufferValue* canonical = nullptr;
     int64 refcount = 0;
   };
-  tensorflow::gtl::FlatMap<const BufferValue*, std::shared_ptr<SharedGroup>>
+  absl::flat_hash_map<const BufferValue*, std::shared_ptr<SharedGroup>>
       shared_buffers_;
 
   // Hold some sets for error-checking the sequence of Alloc and Free calls.
@@ -235,7 +235,7 @@ class HeapAlgorithm {
   // analysis, it's not worth making major changes to HeapSimulator now.
   virtual void AccountForSubcomputationMemory(
       const HloInstruction* instruction,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+      const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) {}
 
   // Free de-allocates a previously allocated buffer.
@@ -262,7 +262,7 @@ class NoFragmentationStatsHeap : public HeapAlgorithm {
 
   void AccountForSubcomputationMemory(
       const HloInstruction* instruction,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+      const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) override;
 
   void Free(const BufferValue* buffer, int64 size) override;
@@ -382,8 +382,7 @@ class GlobalDecreasingSizeBestFitHeap : public HeapAlgorithm {
     // Free time of the buffer.
     int64 end;
   };
-  tensorflow::gtl::FlatMap<const BufferValue*, BufferInterval>
-      buffer_intervals_;
+  absl::flat_hash_map<const BufferValue*, BufferInterval> buffer_intervals_;
 };
 
 // A heap algorithm that chooses the best results from other algorithms added to
diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc
index 191fbf8194..ea0bced923 100644
--- a/tensorflow/compiler/xla/service/heap_simulator_test.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace {
@@ -174,7 +174,7 @@ class HeapSimulatorTracker {
 
     // Construct the module sequence grouped by computation.
     HloSchedule schedule(module_.get());
-    tensorflow::gtl::FlatMap<const HloInstruction*, int> reverse_position;
+    absl::flat_hash_map<const HloInstruction*, int> reverse_position;
     for (int i = 0; i < full_module_sequence.size(); ++i) {
       const HloInstruction* instruction = full_module_sequence[i];
       schedule.GetOrCreateSequence(instruction->parent())
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index 0986da65cb..b6e1f52cf5 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -290,13 +291,11 @@ class BufferValueMap {
   const HloDataflowAnalysis& dataflow_;
 
   // A map containing the set of values contained in each buffer.
-  tensorflow::gtl::FlatMap<BufferNumber,
-                           tensorflow::gtl::FlatSet<const HloValue*>>
+  absl::flat_hash_map<BufferNumber, tensorflow::gtl::FlatSet<const HloValue*>>
       buffers_;
 
   // A map indicating which buffer each value is contained in.
-  tensorflow::gtl::FlatMap<const HloValue*, BufferNumber>
-      value_to_buffer_number_;
+  absl::flat_hash_map<const HloValue*, BufferNumber> value_to_buffer_number_;
 
   // The buffer number of the next buffer to be created.
   BufferNumber next_buffer_number_ = 0;
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.h b/tensorflow/compiler/xla/service/hlo_alias_analysis.h
index e345804537..372f99ff01 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/hlo_buffer.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
@@ -110,7 +111,7 @@ class HloAliasAnalysis {
   std::unique_ptr<HloDataflowAnalysis> dataflow_analysis_;
 
   // A map indicating which buffer a value is contained in.
-  tensorflow::gtl::FlatMap<const HloValue*, HloBuffer*> value_to_buffer_;
+  absl::flat_hash_map<const HloValue*, HloBuffer*> value_to_buffer_;
 
   // A lazily constructed vector containing all HloBuffers sorted by
   // HloBuffer::Id.
diff --git a/tensorflow/compiler/xla/service/hlo_clone_context.h b/tensorflow/compiler/xla/service/hlo_clone_context.h
index 658643b427..24910ca07b 100644
--- a/tensorflow/compiler/xla/service/hlo_clone_context.h
+++ b/tensorflow/compiler/xla/service/hlo_clone_context.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/map_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -73,12 +73,12 @@ class HloCloneContext {
     return FindOrDie(computations_, old_computation);
   }
 
-  const tensorflow::gtl::FlatMap<const HloInstruction*, HloInstruction*>&
+  const absl::flat_hash_map<const HloInstruction*, HloInstruction*>&
   cloned_instructions() const {
     return instructions_;
   }
 
-  const tensorflow::gtl::FlatMap<const HloComputation*, HloComputation*>&
+  const absl::flat_hash_map<const HloComputation*, HloComputation*>&
   cloned_computations() const {
     return computations_;
   }
@@ -86,10 +86,8 @@ class HloCloneContext {
  private:
   HloModule* module_;
   string suffix_;
-  tensorflow::gtl::FlatMap<const HloInstruction*, HloInstruction*>
-      instructions_;
-  tensorflow::gtl::FlatMap<const HloComputation*, HloComputation*>
-      computations_;
+  absl::flat_hash_map<const HloInstruction*, HloInstruction*> instructions_;
+  absl::flat_hash_map<const HloComputation*, HloComputation*> computations_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 4613d6762e..257dd5876f 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <sstream>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
@@ -297,7 +298,7 @@ void ComputeComputationPostOrder(
 void HloComputation::ComputeInstructionPostOrder(
     const HloComputation::ChannelDependencyMap& channel_dependency_map,
     std::vector<HloInstruction*>* post_order, HloInstruction* root,
-    tensorflow::gtl::FlatMap<HloInstruction*, VisitState>* visited) const {
+    absl::flat_hash_map<HloInstruction*, VisitState>* visited) const {
   std::vector<HloInstruction*> dfs_stack;
   dfs_stack.push_back(root);
   while (!dfs_stack.empty()) {
@@ -394,7 +395,7 @@ std::vector<HloInstruction*> HloComputation::MakeInstructionPostOrder() const {
   std::vector<HloInstruction*> post_order;
   post_order.reserve(instruction_count());
   std::vector<HloInstruction*> trace_instructions;
-  tensorflow::gtl::FlatMap<HloInstruction*, VisitState> visited;
+  absl::flat_hash_map<HloInstruction*, VisitState> visited;
   for (auto& instruction : instructions_) {
     if (instruction->opcode() == HloOpcode::kTrace) {
       // Trace instructions aren't handled by the DFS visitor. Add trace
@@ -505,9 +506,9 @@ HloComputationProto HloComputation::ToProto() const {
 /* static */ StatusOr<std::unique_ptr<HloComputation>>
 HloComputation::CreateFromProto(
     const HloComputationProto& proto,
-    const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map) {
-  tensorflow::gtl::FlatMap<int64, HloInstruction*> instruction_map;
-  tensorflow::gtl::FlatMap<HloInstruction*, int64> to_proto_id;
+    const absl::flat_hash_map<int64, HloComputation*>& computation_map) {
+  absl::flat_hash_map<int64, HloInstruction*> instruction_map;
+  absl::flat_hash_map<HloInstruction*, int64> to_proto_id;
   std::vector<std::unique_ptr<HloInstruction>> instructions;
   int64 parameter_count = 0;
   for (const HloInstructionProto& instruction_proto : proto.instructions()) {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 936a53bd7e..af929ac009 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/iterator_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -40,7 +41,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -188,7 +188,7 @@ class HloComputation {
   //     calls.
   static StatusOr<std::unique_ptr<HloComputation>> CreateFromProto(
       const HloComputationProto& proto,
-      const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map);
+      const absl::flat_hash_map<int64, HloComputation*>& computation_map);
 
   // Gets the instructions in this computation.
   //
@@ -414,14 +414,14 @@ class HloComputation {
   // cross-replica-sum the union of the dependencies for all participating
   // instructions.
   using ChannelDependencyMap =
-      tensorflow::gtl::FlatMap<int64, absl::InlinedVector<HloInstruction*, 1>>;
+      absl::flat_hash_map<int64, absl::InlinedVector<HloInstruction*, 1>>;
   ChannelDependencyMap ComputeChannelDependencies() const;
 
   enum VisitState { kVisiting, kVisited };
   void ComputeInstructionPostOrder(
       const HloComputation::ChannelDependencyMap& channel_dependency_map,
       std::vector<HloInstruction*>* post_order, HloInstruction* root,
-      tensorflow::gtl::FlatMap<HloInstruction*, VisitState>* visited) const;
+      absl::flat_hash_map<HloInstruction*, VisitState>* visited) const;
 
   string name_;
   int64 unique_id_;
@@ -439,7 +439,7 @@ class HloComputation {
   // instruction pointer to location in the list for fast lookup.
   using InstructionList = std::list<std::unique_ptr<HloInstruction>>;
   InstructionList instructions_;
-  tensorflow::gtl::FlatMap<const HloInstruction*, InstructionList::iterator>
+  absl::flat_hash_map<const HloInstruction*, InstructionList::iterator>
       instruction_iterators_;
 
   std::vector<HloInstruction*> param_instructions_;
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc
index 113fd18eae..159c39d557 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <algorithm>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -106,8 +107,8 @@ Status HloDomainMap::PopulateDomainMetadataMap() {
   auto equal = [](const DomainMetadata* a, const DomainMetadata* b) {
     return a->Matches(*b);
   };
-  tensorflow::gtl::FlatMap<const DomainMetadata*, int64, decltype(hash),
-                           decltype(equal)>
+  absl::flat_hash_map<const DomainMetadata*, int64, decltype(hash),
+                      decltype(equal)>
       domain_metadata(1024, hash, equal);
 
   for (auto& domain : instruction_domains_) {
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h
index 56b557d7ce..8584bc021d 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.h
@@ -19,13 +19,13 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -77,8 +77,7 @@ class HloDomainMap {
  private:
   // Map used for representing instruction ordering, i.e.
   // order_map[a] < order_map[b] means a must be ordered before b.
-  using InstructionOrderMap =
-      tensorflow::gtl::FlatMap<const HloInstruction*, int64>;
+  using InstructionOrderMap = absl::flat_hash_map<const HloInstruction*, int64>;
 
   HloDomainMap(string domain_kind) : domain_kind_(std::move(domain_kind)) {}
 
@@ -120,8 +119,8 @@ class HloDomainMap {
 
   string domain_kind_;
   std::vector<std::unique_ptr<DomainMetadata::Domain>> instruction_domains_;
-  tensorflow::gtl::FlatMap<HloInstruction*, int64> instruction_to_domain_;
-  tensorflow::gtl::FlatMap<HloInstruction*, int64> domain_metadata_id_;
+  absl::flat_hash_map<HloInstruction*, int64> instruction_to_domain_;
+  absl::flat_hash_map<HloInstruction*, int64> domain_metadata_id_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 23787dbc8a..5d5c9c7e58 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/ascii.h"
@@ -43,7 +44,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/human_readable_json.h"
@@ -59,8 +59,8 @@ using absl::StrJoin;
 /* static */
 StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     const HloInstructionProto& proto,
-    const tensorflow::gtl::FlatMap<int64, HloInstruction*>& instruction_map,
-    const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map) {
+    const absl::flat_hash_map<int64, HloInstruction*>& instruction_map,
+    const absl::flat_hash_map<int64, HloComputation*>& computation_map) {
   TF_RET_CHECK(!proto.opcode().empty());
   TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode()));
   TF_RET_CHECK(proto.has_shape());
@@ -266,7 +266,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           << "Expect 1 called computation for fusion instruction but sees "
           << proto.called_computation_ids_size();
       const int64 fusion_id = proto.called_computation_ids(0);
-      auto* fused_computation = FindPtrOrNull(computation_map, fusion_id);
+      auto* fused_computation =
+          tensorflow::gtl::FindPtrOrNull(computation_map, fusion_id);
       TF_RET_CHECK(fused_computation != nullptr)
           << "No fusion computation with id " << fusion_id;
       instruction = CreateFusion(proto.shape(), fusion_kind, all_operands(),
@@ -2661,14 +2662,14 @@ class HloInstruction::FusionReusesParamElements {
   // the value of this parameter, which would save stack space but not allow us
   // to finish early if we find a reuse.
   static UseKind Compute(int64 i, const HloInstruction& hlo) {
-    tensorflow::gtl::FlatMap<const HloInstruction*, UseKind> memoization_cache;
+    absl::flat_hash_map<const HloInstruction*, UseKind> memoization_cache;
     return ComputeInternal(i, hlo, &memoization_cache);
   }
 
  private:
   static UseKind ComputeInternal(
       int64 i, const HloInstruction& hlo,
-      tensorflow::gtl::FlatMap<const HloInstruction*, UseKind>* cache) {
+      absl::flat_hash_map<const HloInstruction*, UseKind>* cache) {
     if (auto hlo_param = DynCast<HloParameterInstruction>(&hlo)) {
       if (hlo_param->parameter_number() == i) {
         return UseKind::kUse;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 009bd3bab3..1bfdc88abc 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -32,6 +32,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
@@ -50,7 +51,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/iterator_range.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -247,7 +247,7 @@ class CanonicalNameMap {
 
  private:
   int64 index;
-  tensorflow::gtl::FlatMap<string, string> canonical_name_map;
+  absl::flat_hash_map<string, string> canonical_name_map;
 };
 
 // HLO instructions are the atomic unit of the high-level compiler's IR.
@@ -350,8 +350,8 @@ class HloInstruction {
   //     calls.
   static StatusOr<std::unique_ptr<HloInstruction>> CreateFromProto(
       const HloInstructionProto& proto,
-      const tensorflow::gtl::FlatMap<int64, HloInstruction*>& instruction_map,
-      const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map);
+      const absl::flat_hash_map<int64, HloInstruction*>& instruction_map,
+      const absl::flat_hash_map<int64, HloComputation*>& computation_map);
 
   // Creates a parameter-retrieving instruction.
   static std::unique_ptr<HloInstruction> CreateParameter(int64 parameter_number,
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index ad45a82941..1bc168c8b7 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <deque>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/escaping.h"
 #include "absl/strings/str_cat.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/window_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace {
@@ -1099,7 +1099,7 @@ void HloFusionInstruction::MergeFusionInstructionIntoMultiOutput(
   // Note that we add the unfused instructions to this->parent_ computation.
   // This is necessary because the unique_id needs for an instruction and
   // it's only added when inserting to the computation.
-  tensorflow::gtl::FlatMap<HloInstruction*, HloInstruction*> old_to_new;
+  absl::flat_hash_map<HloInstruction*, HloInstruction*> old_to_new;
   std::vector<HloInstruction*> unfused_instructions;
   auto computation_to_merge =
       instruction_to_merge->fused_instructions_computation();
@@ -1392,7 +1392,7 @@ std::unique_ptr<HloInstruction> HloFusionInstruction::CloneWithNewOperandsImpl(
 }
 
 Status HloFusionInstruction::DeduplicateFusionOperands() {
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> operand_indices;
+  absl::flat_hash_map<const HloInstruction*, int> operand_indices;
   std::vector<int> operands_to_remove;
   for (int i = 0; i < operand_count(); ++i) {
     auto emplace_result = operand_indices.emplace(operand(i), i);
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 6a4e766788..1c2b2868fd 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
@@ -74,7 +75,7 @@ class ListScheduler {
       const HloComputation& computation,
       const TuplePointsToAnalysis& points_to_analysis,
       const LogicalBuffer::SizeFunction& size_function,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+      const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) {
     ListScheduler scheduler(computation, points_to_analysis, size_function,
                             memory_by_computation);
@@ -99,7 +100,7 @@ class ListScheduler {
   ListScheduler(const HloComputation& computation,
                 const TuplePointsToAnalysis& points_to_analysis,
                 const LogicalBuffer::SizeFunction& size_function,
-                const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+                const absl::flat_hash_map<const HloComputation*, int64>&
                     memory_by_computation)
       : computation_(computation),
         points_to_analysis_(points_to_analysis),
@@ -234,8 +235,7 @@ class ListScheduler {
 
     // Populate the ready list with instructions which have no operands or
     // control predecessors.
-    tensorflow::gtl::FlatMap<const HloInstruction*, int64>
-        unscheduled_pred_count;
+    absl::flat_hash_map<const HloInstruction*, int64> unscheduled_pred_count;
     for (auto* instruction : computation_.instructions()) {
       // TODO(b/34466113): Replace this and above with successors() or
       // predecessors() when these methods are added to HloInstruction.
@@ -251,8 +251,8 @@ class ListScheduler {
     std::multimap<Priority, ReadyListEntry> ready_queue;
 
     // Map of ready instructions to their iterators in ready_queue.
-    tensorflow::gtl::FlatMap<const HloInstruction*,
-                             std::multimap<Priority, ReadyListEntry>::iterator>
+    absl::flat_hash_map<const HloInstruction*,
+                        std::multimap<Priority, ReadyListEntry>::iterator>
         ready_instructions;
 
     auto add_to_ready_queue = [&](HloInstruction* inst) {
@@ -347,12 +347,11 @@ class ListScheduler {
   // Computations are analyzed in post-order. When scheduling an instruction
   // that includes subcomputations, such as a while loop, we use this map to
   // look up the memory needed by subcomputations.
-  const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+  const absl::flat_hash_map<const HloComputation*, int64>&
       memory_by_computation_;
 
   // A map containing the LogicalBuffers that each instruction uses.
-  tensorflow::gtl::FlatMap<const HloInstruction*,
-                           std::vector<const LogicalBuffer*>>
+  absl::flat_hash_map<const HloInstruction*, std::vector<const LogicalBuffer*>>
       buffer_uses_;
 
   // A map containing the count of unscheduled HLOs which using a particular
@@ -379,7 +378,7 @@ StatusOr<HloInstructionSequence> ScheduleComputationHelper(
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
     const MemorySchedulerAlgorithm& algorithm,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   VLOG(2) << "Computation: " << computation.name();
   if (algorithm) {
@@ -396,13 +395,13 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // These variables are a hack to prevent overflows.
   int64 cumulative_total_size = 0;
   int64 total_hlos = computation.parent()->instruction_count();
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> extra_users;
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> total_sizes;
+  absl::flat_hash_map<const HloInstruction*, int64> extra_users;
+  absl::flat_hash_map<const HloInstruction*, int64> total_sizes;
   for (const HloInstruction* hlo : computation.MakeInstructionPostOrder()) {
     if (ListScheduler::IgnoreInstruction(*hlo)) {
       extra_users[hlo] = 0;
@@ -467,7 +466,7 @@ StatusOr<HloInstructionSequence> ListMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   return ListScheduler::Run(computation, points_to_analysis, size_function,
                             memory_by_computation);
@@ -477,7 +476,7 @@ StatusOr<HloInstructionSequence> PostOrderMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   return HloInstructionSequence(computation.MakeInstructionPostOrder());
 }
@@ -486,7 +485,7 @@ StatusOr<HloInstructionSequence> DefaultMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // We try a few schedulers and choose whichever returns a lower min-memory,
   // not accounting for fragmentation.
@@ -549,7 +548,7 @@ StatusOr<HloSchedule> ScheduleModule(
   HloSchedule schedule(&module);
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(&module));
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> memory_by_computation;
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
   for (const auto* computation : module.MakeComputationPostOrder()) {
     if (!computation->IsFusionComputation()) {
       TF_ASSIGN_OR_RETURN(HloInstructionSequence computation_sequence,
@@ -577,7 +576,7 @@ StatusOr<HloInstructionSequence> ScheduleComputation(
   CHECK(!computation.IsFusionComputation());
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(computation.parent()));
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> empty_map;
+  absl::flat_hash_map<const HloComputation*, int64> empty_map;
   return ScheduleComputationHelper(computation, *points_to_analysis,
                                    size_function, nullptr, empty_map);
 }
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
index 9964c6fdd7..a4c1d3db81 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
@@ -37,7 +38,7 @@ namespace xla {
 typedef std::function<StatusOr<HloInstructionSequence>(
     const HloComputation&, const TuplePointsToAnalysis&,
     const LogicalBuffer::SizeFunction&,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&)>
+    const absl::flat_hash_map<const HloComputation*, int64>&)>
     MemorySchedulerAlgorithm;
 
 // List scheduler
@@ -45,7 +46,7 @@ StatusOr<HloInstructionSequence> ListMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // DFS-order scheduler
@@ -53,7 +54,7 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // Naive Post Order scheduler
@@ -61,7 +62,7 @@ StatusOr<HloInstructionSequence> PostOrderMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // The default scheduling algorithm. Runs both the list scheduler
@@ -71,7 +72,7 @@ StatusOr<HloInstructionSequence> DefaultMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // Returns an HloSchedule which seeks to minimize the memory required for
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
index 1b9e9bfc77..5a9fccc7dd 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_dce.h"
@@ -247,7 +248,7 @@ TEST_F(HloSchedulingTest, ListAccountsForSubcomputations) {
   EXPECT_TRUE(ordering.ExecutesBefore(bcast, add));
   EXPECT_TRUE(ordering.ExecutesBefore(transpose, add));
 
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> memory_by_computation;
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
   memory_by_computation[cond_computation] = 17;
   memory_by_computation[body_computation] = 16;
   std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
@@ -409,7 +410,7 @@ TEST_F(HloSchedulingTest, HeapSimulatorAccountsForSubcomputations) {
   EXPECT_EQ(module->entry_computation()->instruction_count(),
             schedule.sequence(module->entry_computation()).size());
 
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> memory_by_computation;
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
   memory_by_computation[cond_computation] = 17;
   memory_by_computation[body_computation] = 16;
   std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index b3949f3a6d..9359e9a8be 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -285,8 +286,8 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
       << ShapeUtil::HumanStringWithLayout(expected_program_shape.result())
       << ", actual: " << ShapeUtil::HumanStringWithLayout(result_shape);
 
-  tensorflow::gtl::FlatMap<int64, HloComputation*> computation_map;
-  tensorflow::gtl::FlatMap<HloComputation*, int64> to_proto_id;
+  absl::flat_hash_map<int64, HloComputation*> computation_map;
+  absl::flat_hash_map<HloComputation*, int64> to_proto_id;
   std::vector<std::unique_ptr<HloComputation>> computations;
   HloComputation* entry = nullptr;
   for (const HloComputationProto& computation_proto : proto.computations()) {
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
index 278d94cdd3..0311b73207 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -30,7 +31,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -250,25 +250,25 @@ class HloModuleGroupMetadata {
   std::vector<std::unique_ptr<std::vector<HloInstruction*>>> companion_sets_;
 
   // Map from each companion while instruction to the index into companion_set_.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> companion_set_index_;
+  absl::flat_hash_map<const HloInstruction*, int64> companion_set_index_;
 
   // Map from computation to the instruction using it (a kWhile, kConditional).
-  tensorflow::gtl::FlatMap<const HloComputation*, TrackedInstruction>
+  absl::flat_hash_map<const HloComputation*, TrackedInstruction>
       tracked_instructions_;
 
   // Maps tracked instructions (kWhile, kConditional, kCall, ...) to the set of
   // communicating instructions within the proper called computation(s).
-  tensorflow::gtl::FlatMap<HloInstruction*, std::vector<HloInstruction*>>
+  absl::flat_hash_map<HloInstruction*, std::vector<HloInstruction*>>
       tracked_instructions_comms_;
 
   // All channels in the module.
   std::vector<Channel> channels_;
 
   // Map from channel ids to the index in channels_.
-  tensorflow::gtl::FlatMap<int64, int64> channel_id_map_;
+  absl::flat_hash_map<int64, int64> channel_id_map_;
 
   // Map from all-reduce ids to the all reduce instructions.
-  tensorflow::gtl::FlatMap<int64, std::vector<HloInstruction*>> all_reduce_map_;
+  absl::flat_hash_map<int64, std::vector<HloInstruction*>> all_reduce_map_;
 
   // The maximum channel id used in the module group.
   int64 max_channel_id_ = -1;
@@ -276,7 +276,7 @@ class HloModuleGroupMetadata {
   // The modules that this metadata was built from.
   const std::vector<HloModule*>& modules_;
 
-  tensorflow::gtl::FlatMap<HloModule*, std::unique_ptr<TuplePointsToAnalysis>>
+  absl::flat_hash_map<HloModule*, std::unique_ptr<TuplePointsToAnalysis>>
       points_to_analyses_;
 };
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.h b/tensorflow/compiler/xla/service/hlo_module_group_util.h
index 309c23045d..f21b44bcd9 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -87,7 +87,7 @@ class HloModuleGroupUtil {
   // * visit_state: map from each instruction to its visit state.
   // * visit_function: function called when each instruction group.
   // * root: the root instruction of the traversal.
-  using VisitStates = tensorflow::gtl::FlatMap<HloInstruction*, VisitState>;
+  using VisitStates = absl::flat_hash_map<HloInstruction*, VisitState>;
   Status VisitTopologicalOrder(VisitStates* visit_state,
                                const VisitFunction& visit_function,
                                HloInstruction* root);
diff --git a/tensorflow/compiler/xla/service/hlo_opcode.cc b/tensorflow/compiler/xla/service/hlo_opcode.cc
index 2d4e38589f..4551a1c2e2 100644
--- a/tensorflow/compiler/xla/service/hlo_opcode.cc
+++ b/tensorflow/compiler/xla/service/hlo_opcode.cc
@@ -14,9 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -31,7 +31,7 @@ string HloOpcodeString(HloOpcode opcode) {
 }
 
 StatusOr<HloOpcode> StringToHloOpcode(const string& opcode_name) {
-  static auto* opcode_map = new tensorflow::gtl::FlatMap<string, HloOpcode>({
+  static auto* opcode_map = new absl::flat_hash_map<string, HloOpcode>({
 #define STRING_TO_OPCODE_ENTRY(enum_name, opcode_name, ...) \
   {opcode_name, HloOpcode::enum_name},
       HLO_OPCODE_LIST(STRING_TO_OPCODE_ENTRY)
diff --git a/tensorflow/compiler/xla/service/hlo_ordering.h b/tensorflow/compiler/xla/service/hlo_ordering.h
index b0361c3f02..66313492eb 100644
--- a/tensorflow/compiler/xla/service/hlo_ordering.h
+++ b/tensorflow/compiler/xla/service/hlo_ordering.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/hlo_value.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -120,8 +120,8 @@ class PredecessorHloOrdering : public HloOrdering {
   // predecessors. An instruction is an element of its own predecessor set.
   //
   // Subclasses should fill this in to define the desired ordering.
-  tensorflow::gtl::FlatMap<const HloComputation*,
-                           std::unique_ptr<HloReachabilityMap>>
+  absl::flat_hash_map<const HloComputation*,
+                      std::unique_ptr<HloReachabilityMap>>
       predecessors_;
 };
 
@@ -204,7 +204,7 @@ class SequentialHloOrdering : public HloOrdering {
   // this map so more than one instruction may have the same position
   // value. This is not a problem because ExecutesBefore also verifies
   // instructions are in the same computation.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> order_position_;
+  absl::flat_hash_map<const HloInstruction*, int> order_position_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 8c2f928ca1..59fd01cb58 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <functional>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
@@ -98,7 +99,7 @@ void HloPassPipeline::MaybeDumpHlo(const HloModule& module,
   if (!proto_dump_path.empty()) {
     static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
     static auto* const module_id_to_pass_number =
-        new tensorflow::gtl::FlatMap<int64, int64>();
+        new absl::flat_hash_map<int64, int64>();
 
     tensorflow::mutex_lock lock(mu);
     const int64 pass_number = (*module_id_to_pass_number)[module.unique_id()]++;
diff --git a/tensorflow/compiler/xla/service/hlo_reachability.h b/tensorflow/compiler/xla/service/hlo_reachability.h
index b66a2aa4bd..5a5f01f8fd 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.h
+++ b/tensorflow/compiler/xla/service/hlo_reachability.h
@@ -19,11 +19,11 @@ limitations under the License.
 #include <list>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -154,7 +154,7 @@ class HloReachabilityMap {
 
   // Dense assignment from HloInstruction* to number. These numbers index
   // into the bit_vectors_ vector and into the bits within a BitVector.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> indices_;
+  absl::flat_hash_map<const HloInstruction*, int> indices_;
 
   // Bitvectors holding the reachability to each instruction. The bit vector for
   // instruction X includes ones for each instruction which X is reachable from.
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index a438671936..abdd9a9212 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <set>
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -75,7 +76,7 @@ bool IsRematerializable(const HloInstruction* instruction) {
 // cache before, and eventually calling the IsRematerializable() API.
 bool CanBeRematerialized(
     const HloInstruction* instruction,
-    tensorflow::gtl::FlatMap<const HloInstruction*, bool>* remat_able) {
+    absl::flat_hash_map<const HloInstruction*, bool>* remat_able) {
   auto it = remat_able->find(instruction);
   if (it != remat_able->end()) {
     return it->second;
@@ -268,7 +269,7 @@ class InstructionList {
   Item* first_;
 
   // Item for each instruction.
-  tensorflow::gtl::FlatMap<const HloInstruction*, Item*> item_map_;
+  absl::flat_hash_map<const HloInstruction*, Item*> item_map_;
 };
 
 // Return the items which use the given LogicalBuffer. Sets
@@ -503,7 +504,7 @@ MemoryUsageTracker::MemoryUsageTracker(
   PointsToSet::BufferSet live_out_set =
       points_to_analysis.GetPointsToSet(computation_->root_instruction())
           .CreateFlattenedSet();
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, BufferId>
+  absl::flat_hash_map<const LogicalBuffer*, BufferId>
       logical_buffer_to_buffer_id;
 
   for (auto* item = instruction_list_.first(); item != nullptr;
@@ -854,7 +855,7 @@ int64 RematerializationCost(const HloInstruction* instruction,
 Item* PickRematerializationCandidate(
     const MemoryUsageTracker& memory_tracker,
     const InstructionList& instruction_list, int64 memory_limit_bytes,
-    tensorflow::gtl::FlatMap<const HloInstruction*, bool>* remat_able) {
+    absl::flat_hash_map<const HloInstruction*, bool>* remat_able) {
   Item* best_item = nullptr;
   int64 best_cost = 0;
 
@@ -983,7 +984,7 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
   tensorflow::gtl::FlatSet<const HloInstruction*> remat_move_instructions;
 
   // The map from instructions to their rematerializable status.
-  tensorflow::gtl::FlatMap<const HloInstruction*, bool> remat_able;
+  absl::flat_hash_map<const HloInstruction*, bool> remat_able;
 
   // The peak memory of the computation at any point in the instruction
   // sequence.
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 7330d73c09..5a02e3a8bb 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -15,6 +15,7 @@
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -115,8 +116,7 @@ class HloRematerialization : public HloModulePass {
   // computations called from sequential context
   // (CallContext::kSequential). These values are updated as rematerialization
   // occurs.
-  tensorflow::gtl::FlatMap<const HloComputation*, int64>
-      computation_peak_memory_;
+  absl::flat_hash_map<const HloComputation*, int64> computation_peak_memory_;
 
   std::unique_ptr<TuplePointsToAnalysis> points_to_analysis_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.cc b/tensorflow/compiler/xla/service/hlo_schedule.cc
index 3fc5dbeb02..7c5c98f04e 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.cc
+++ b/tensorflow/compiler/xla/service/hlo_schedule.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -30,7 +31,7 @@ namespace xla {
 
 /* static */ StatusOr<HloSchedule> HloSchedule::CreateFromProto(
     const HloModule* module, const HloScheduleProto& proto) {
-  tensorflow::gtl::FlatMap<int64, const HloComputation*> id_to_computation;
+  absl::flat_hash_map<int64, const HloComputation*> id_to_computation;
   for (const HloComputation* computation : module->computations()) {
     id_to_computation[computation->unique_id()] = computation;
   }
@@ -44,7 +45,7 @@ namespace xla {
         << "No computation exists in HLO module with id " << computation_id;
     const HloComputation* computation = comp_it->second;
 
-    tensorflow::gtl::FlatMap<int64, const HloInstruction*> id_to_instruction;
+    absl::flat_hash_map<int64, const HloInstruction*> id_to_instruction;
     for (const HloInstruction* instruction : computation->instructions()) {
       id_to_instruction[instruction->unique_id()] = instruction;
     }
@@ -112,7 +113,7 @@ Status HloSchedule::UpdateComputationSchedule(
     const HloComputation* computation) {
   // Map from unique ID to HloInstruction pointer for instructions in the
   // computation.
-  tensorflow::gtl::FlatMap<int, const HloInstruction*> id_to_instruction;
+  absl::flat_hash_map<int, const HloInstruction*> id_to_instruction;
   for (const HloInstruction* instruction : computation->instructions()) {
     InsertOrDie(&id_to_instruction, instruction->unique_id(), instruction);
   }
@@ -126,15 +127,13 @@ Status HloSchedule::UpdateComputationSchedule(
   // Map from HloInstruction X to newly added instructions (instruction is in
   // computation, but not in schedule) which use X. If an instruction is not in
   // the map, then it has no users which are newly added instructions.
-  tensorflow::gtl::FlatMap<const HloInstruction*,
-                           std::vector<const HloInstruction*>>
+  absl::flat_hash_map<const HloInstruction*, std::vector<const HloInstruction*>>
       new_instruction_uses;
 
   // For each newly added instruction, this is the count of the instruction's
   // operands that have not yet been scheduled. When this value reaches zero,
   // then the instruction may be placed in the schedule.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int>
-      unscheduled_operand_count;
+  absl::flat_hash_map<const HloInstruction*, int> unscheduled_operand_count;
 
   // Create a worklist of newly added instructions which are ready to be added
   // to the schedule. Initialize worklist with those that have zero operands.
@@ -217,9 +216,9 @@ Status HloSchedule::Update() {
     }
     for (auto it = sequences_.begin(); it != sequences_.end();) {
       if (nonfusion_computations_ids.count(it->first) == 0) {
-        it = sequences_.erase(it);
+        sequences_.erase(it++);
       } else {
-        it++;
+        ++it;
       }
     }
   }
@@ -254,7 +253,7 @@ Status HloSchedule::Verify() const {
   // For each computation verify the set of instructions is the same and that
   // each dependency and control edge is honored.
   for (const HloComputation* computation : nonfusion_computations) {
-    tensorflow::gtl::FlatMap<const HloInstruction*, int> instruction_position;
+    absl::flat_hash_map<const HloInstruction*, int> instruction_position;
     int pos = 0;
     for (const HloInstruction* instruction :
          sequence(computation).instructions()) {
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.h b/tensorflow/compiler/xla/service/hlo_schedule.h
index 270fe6039f..0a714101ee 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.h
+++ b/tensorflow/compiler/xla/service/hlo_schedule.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -103,8 +104,7 @@ class HloSchedule {
 
   // Returns a map from HloComputation unique ID to instruction sequence. The
   // map contains all sequences in the schedule.
-  const tensorflow::gtl::FlatMap<int64, HloInstructionSequence>& sequences()
-      const {
+  const absl::flat_hash_map<int64, HloInstructionSequence>& sequences() const {
     return sequences_;
   }
 
@@ -148,7 +148,7 @@ class HloSchedule {
   // A map from computation unique ID to instruction sequence. Unique IDs are
   // used rather than HloComputation pointers because HLO pointers are not
   // unique across HLO transformations because pointers may be recycled.
-  tensorflow::gtl::FlatMap<int64, HloInstructionSequence> sequences_;
+  absl::flat_hash_map<int64, HloInstructionSequence> sequences_;
 };
 
 std::ostream& operator<<(std::ostream& out, const HloSchedule& schedule);
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 6eb6658904..a7727824fe 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <set>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
@@ -23,7 +24,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -993,7 +993,7 @@ Status CheckSameIsHostTransfer(const HloInstruction* instr1,
 
 // Checks various invariants of send and recv instructions.
 Status VerifySendsAndRecvs(const HloModule& module) {
-  tensorflow::gtl::FlatMap<int64, const HloInstruction*> host_channels;
+  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
   // Host send/recv instructions must have their own unique channel.
   auto check_unique_host_channel = [&](const HloInstruction* instruction) {
     const HloSendRecvInstruction* sendrecv =
@@ -1061,7 +1061,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
-  tensorflow::gtl::FlatMap<string, const HloInstruction*> instructions;
+  absl::flat_hash_map<string, const HloInstruction*> instructions;
 
   for (auto* computation : module->computations()) {
     for (const auto& instruction : computation->instructions()) {
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 06f0e1ed25..7ee789276d 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/indexed_array_analysis.h"
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -95,7 +96,7 @@ Status IndexedArrayAnalysis::TraverseAndPopulateCache(
   absl::InlinedVector<const HloInstruction*, 4> stack;
 
   enum DfsState { kDiscovered, kVisited };
-  gtl::FlatMap<const HloInstruction*, DfsState> dfs_state_map;
+  absl::flat_hash_map<const HloInstruction*, DfsState> dfs_state_map;
 
   stack.push_back(root);
   InsertOrDie(&dfs_state_map, root, kDiscovered);
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h
index 3e238f97a0..e5aa67fd85 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.h
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h
@@ -18,10 +18,10 @@ limitations under the License.
 
 #include <type_traits>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace xla {
@@ -360,7 +360,7 @@ class IndexedArrayAnalysis {
 
   std::vector<std::unique_ptr<Array>> owned_tensors_;
   std::vector<Literal> owned_literals_;
-  tensorflow::gtl::FlatMap<const HloInstruction*, Array*> cache_;
+  absl::flat_hash_map<const HloInstruction*, Array*> cache_;
 };
 
 // A pass that prints all non-trivial results returned by IndexedArrayAnalysis.
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index e884122fcb..5a99c40df4 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -22,11 +22,11 @@ limitations under the License.
 #include <vector>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -189,7 +189,7 @@ bool InstructionFusion::EffectivelyAtMostUnary(HloInstruction* hlo) {
 bool InstructionFusion::CanFuseOnAllPaths(
     HloInstruction* producer, HloInstruction* consumer,
     const HloInstructionSet& do_not_fuse,
-    tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>, bool>*
+    absl::flat_hash_map<std::pair<HloInstruction*, HloInstruction*>, bool>*
         result_cache) {
   if (consumer == producer) {
     return true;
@@ -241,7 +241,7 @@ InstructionFusion::ComputeGloballyUnfusible(
   // fusing operations that require duplication later depending on
   // is_expensive_().
   HloInstructionSet do_not_duplicate;
-  tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>, bool>
+  absl::flat_hash_map<std::pair<HloInstruction*, HloInstruction*>, bool>
       can_fuse_on_all_paths_result_cache;
   for (HloInstruction* consumer : post_order) {
     for (HloInstruction* producer : consumer->operands()) {
@@ -430,7 +430,7 @@ class ReversePostOrderFusionQueue : public FusionQueue {
 
  private:
   std::vector<HloInstruction*> post_order_;
-  tensorflow::gtl::FlatMap<HloInstruction*, int> post_order_index_;
+  absl::flat_hash_map<HloInstruction*, int> post_order_index_;
 };
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index c1ec3b18a1..da2032f6c7 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -1,3 +1,4 @@
+#include "absl/container/flat_hash_map.h"
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -158,8 +159,8 @@ class InstructionFusion : public HloModulePass {
   bool CanFuseOnAllPaths(
       HloInstruction* producer, HloInstruction* consumer,
       const HloInstructionSet& do_not_fuse,
-      tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>,
-                               bool>* result_cache);
+      absl::flat_hash_map<std::pair<HloInstruction*, HloInstruction*>, bool>*
+          result_cache);
 
   // Computes the set of nodes that we do not want to fuse into any of their
   // consumers based on a global analysis of the HLO graph.
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index e29c199c42..1591256fad 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -38,7 +39,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -228,8 +228,8 @@ class LayoutConstraints {
   // Array-shaped buffers which have not yet been constrained.
   std::set<LogicalBuffer::Id> unconstrained_buffer_ids_;
 
-  mutable tensorflow::gtl::FlatMap<const HloInstruction*,
-                                   std::unique_ptr<PointsToSet::BufferSet>>
+  mutable absl::flat_hash_map<const HloInstruction*,
+                              std::unique_ptr<PointsToSet::BufferSet>>
       buffer_sets_cache_;
 
   HloComputation* computation_;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index 540bbb7c7a..3934d2e493 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -38,6 +38,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:logical_buffer",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@llvm//:core",
     ],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
index 8d9fa99d82..88cde2d3d9 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
@@ -16,13 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_ALIAS_ANALYSIS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_ALIAS_ANALYSIS_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "llvm/IR/Module.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -77,14 +77,14 @@ class AliasAnalysis {
   // A map from a buffer slice to metadata corresponding to its alias.scope
   // metadata.  The index kParameterAliasSet is used to hold aliasing
   // information for parameters.
-  tensorflow::gtl::FlatMap<BufferAllocation::Slice, llvm::MDNode*,
-                           BufferAllocation::Slice::Hasher>
+  absl::flat_hash_map<BufferAllocation::Slice, llvm::MDNode*,
+                      BufferAllocation::Slice::Hasher>
       alias_scope_metadata_;
 
   // A map from a buffer slice to metadata corresponding to its noalias
   // metadata.
-  tensorflow::gtl::FlatMap<BufferAllocation::Slice, llvm::MDNode*,
-                           BufferAllocation::Slice::Hasher>
+  absl::flat_hash_map<BufferAllocation::Slice, llvm::MDNode*,
+                      BufferAllocation::Slice::Hasher>
       noalias_metadata_;
 };
 
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index b9ec31c497..95b1c20663 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h
index 0344626b26..9508ab2ed1 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.h
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
@@ -126,7 +127,7 @@ class MultiOutputFusion : public HloModulePass {
   std::vector<FusionCandidate> candidates_;
 
   // A map that maps an instruction to the index_.
-  tensorflow::gtl::FlatMap<HloInstruction*, int> candidates_index_;
+  absl::flat_hash_map<HloInstruction*, int> candidates_index_;
 
   // The reachability map of current computation.
   std::unique_ptr<HloReachabilityMap> reachability_;
diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h
index 6dd89c240f..1ac60f1cf4 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.h
+++ b/tensorflow/compiler/xla/service/name_uniquer.h
@@ -18,9 +18,9 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -78,7 +78,7 @@ class NameUniquer {
 
   // Map from name prefix to the generator data structure which tracks used
   // identifiers and generates new ones.
-  tensorflow::gtl::FlatMap<string, SequentialIdGenerator> generated_names_;
+  absl::flat_hash_map<string, SequentialIdGenerator> generated_names_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(NameUniquer);
 };
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.h b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
index 4bb22428f3..0b4e82e8d6 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion.h
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index a9e8a51e09..78392d3bb2 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -36,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
index 56145822be..067cfcc17d 100644
--- a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
+++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/while_util.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
index e8fe33e626..2590473c77 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -15,17 +15,17 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/tuple_util.h"
 #include "tensorflow/compiler/xla/service/while_util.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
+using absl::flat_hash_map;
 using absl::InlinedVector;
-using tensorflow::gtl::FlatMap;
 using tensorflow::gtl::FlatSet;
 
 // Copies `to_hoist` to the computation containing `while_instr`, hoisting its
@@ -34,7 +34,7 @@ using tensorflow::gtl::FlatSet;
 // function hoists the operands in `unhoisted_invariant_instructions` and moves
 // them into `hoisted_instructions`.
 static void CreateLoopInvariantCopy(
-    FlatMap<HloInstruction*, HloInstruction*>* hoisted_instructions,
+    flat_hash_map<HloInstruction*, HloInstruction*>* hoisted_instructions,
     FlatSet<HloInstruction*>* unhoisted_invariant_instructions,
     HloInstruction* while_instr, HloInstruction* to_hoist) {
   HloComputation* parent_of_while = while_instr->parent();
@@ -147,7 +147,7 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody(
 
   // Maps instructions in the while body to instructions hoisted outside the
   // while that compute the same value.
-  FlatMap<HloInstruction*, HloInstruction*> hoisted_instructions;
+  flat_hash_map<HloInstruction*, HloInstruction*> hoisted_instructions;
 
   // Contains instructions that can be legally hoisted, but were deemed to be
   // unprofitable to be hoisted alone by NotWorthHoistingIndividually.  When we
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 9a74f22395..07de8492ba 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -14,12 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
 #include "tensorflow/compiler/xla/service/while_loop_analysis.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -181,7 +181,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
                                           used_tuple_indices.end());
   std::sort(new_to_old_tuple_idx.begin(), new_to_old_tuple_idx.end());
 
-  tensorflow::gtl::FlatMap<int64, int64> old_to_new_tuple_idx;
+  absl::flat_hash_map<int64, int64> old_to_new_tuple_idx;
   for (int64 new_idx = 0; new_idx < new_to_old_tuple_idx.size(); ++new_idx) {
     int64 old_idx = new_to_old_tuple_idx[new_idx];
     old_to_new_tuple_idx[old_idx] = new_idx;
@@ -405,7 +405,7 @@ static StatusOr<bool> TryPropagateConstant(HloInstruction* while_op) {
   // build a map from the tuple element index to the constant value. Limit this
   // to scalar constant values because propagating array constants can regress
   // performance by forcing us to copy constants.
-  tensorflow::gtl::FlatMap<int, const HloInstruction*> index_to_constant;
+  absl::flat_hash_map<int, const HloInstruction*> index_to_constant;
   for (int i = 0; i < root_operands.size(); i++) {
     HloInstruction* instr = root_operands[i];
     if (instr->opcode() == HloOpcode::kGetTupleElement &&
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index f474ecb18c..06b6330321 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -422,6 +422,7 @@ xla_test(
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core:test",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
index db5a824de0..a6e70eb6ca 100644
--- a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
+++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/regexp.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -83,7 +83,7 @@ struct ParsedProfileOutputLine {
 
 Status ParseOneProfileOutputLine(
     const string& line, bool expect_hlo,
-    gtl::FlatMap<string, ParsedProfileOutputLine>* parsed_results,
+    absl::flat_hash_map<string, ParsedProfileOutputLine>* parsed_results,
     absl::Span<const absl::string_view> opcodes_to_ignore = {}) {
   string separator = "[^:]*:: +";
   string match_percentage = R"(\d+\.\d*% +\d+Σ)";
@@ -208,7 +208,7 @@ XLA_TEST_F(HloProfileTest, ProfileSingleComputation) {
   std::vector<string> profile_output_lines =
       absl::StrSplit(profile_output, '\n');
 
-  gtl::FlatMap<string, ParsedProfileOutputLine> parsed_profile_lines;
+  absl::flat_hash_map<string, ParsedProfileOutputLine> parsed_profile_lines;
 
   TF_ASSERT_OK(ParseOneProfileOutputLine(
       profile_output_lines[1], /*expect_hlo=*/false, &parsed_profile_lines));
@@ -314,7 +314,7 @@ XLA_TEST_F(HloProfileTest, ProfileWhileComputation) {
 
   ASSERT_NE(while_body_profile_end, profile_output_lines.end());
 
-  gtl::FlatMap<string, ParsedProfileOutputLine> parsed_profile_lines;
+  absl::flat_hash_map<string, ParsedProfileOutputLine> parsed_profile_lines;
 
   for (auto while_body_profile_i = while_body_profile_start + 1;
        while_body_profile_i != while_body_profile_end; while_body_profile_i++) {
-- 
GitLab


From ec900f15e352e4b203b1f0678f7d2ff042df57d5 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 1 Oct 2018 13:46:31 -0700
Subject: [PATCH 0965/1357] Minor speed improvements to defun.

- EncodeArg in C instead of python.
- Also caches parsed device specs, and device spec hashes
- Adds a common way to register python types in C.
- Fastpath canonicalize function inputs when no kwargs are passed
- Set the func name attr directly instead of creating an op to wrap it.
- Rewrite IsAttrsHelper without caching

Before:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 101.803263028
  extras {
    key: "examples_per_sec"
    value {
      double_value: 9822.86785562
    }
  }
}

After:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 47.2899993261
  extras {
    key: "examples_per_sec"
    value {
      double_value: 21146.1199884
    }
  }
}
PiperOrigin-RevId: 215272962
---
 tensorflow/c/eager/c_api.cc                  |   8 +
 tensorflow/c/eager/c_api.h                   |   3 +
 tensorflow/python/eager/BUILD                |   1 +
 tensorflow/python/eager/function.py          | 100 +++------
 tensorflow/python/eager/function_test.py     |  26 ++-
 tensorflow/python/eager/pywrap_tfe.h         |   4 +
 tensorflow/python/eager/pywrap_tfe_src.cc    | 223 ++++++++++++++++++-
 tensorflow/python/framework/device.py        |  12 +-
 tensorflow/python/framework/sparse_tensor.py |   2 +-
 tensorflow/python/pywrap_tfe.i               |   1 +
 tensorflow/python/util/nest.py               |   4 +-
 tensorflow/python/util/util.cc               | 223 +++++++++++--------
 tensorflow/python/util/util.h                |  34 ++-
 tensorflow/python/util/util.i                |  10 +-
 14 files changed, 462 insertions(+), 189 deletions(-)

diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 0bf3d9542b..3554ec0bf3 100755
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -578,6 +578,14 @@ void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name,
   op->operation.MutableAttrs()->Set(attr_name, attr_value);
 }
 
+void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name,
+                               const char* data, size_t length) {
+  tensorflow::AttrValue attr_value;
+  tensorflow::NameAttrList* func = attr_value.mutable_func();
+  func->set_name(data, length);
+  op->operation.MutableAttrs()->Set(attr_name, attr_value);
+}
+
 void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor,
                          TF_Status* status) {
   tensorflow::Tensor t;
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index 6323f8a053..b2454d8722 100755
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -313,6 +313,9 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunction(TFE_Op* op,
                                                  const char* attr_name,
                                                  const TFE_Op* value);
 
+TF_CAPI_EXPORT void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name,
+                                              const char* data, size_t length);
+
 TF_CAPI_EXPORT extern void TFE_OpSetAttrTensor(TFE_Op* op,
                                                const char* attr_name,
                                                TF_Tensor* tensor,
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index d3d997e6df..d0c1a93118 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -37,6 +37,7 @@ cc_library(
         "//tensorflow/python:safe_ptr",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
+        "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:variant",
     ],
 )
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 60a4f018cd..3b6f288fb9 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1005,52 +1005,8 @@ def func_graph_from_py_func(name,
   return func_graph
 
 
-_TensorType = collections.namedtuple("_TensorType", ["dtype", "shape"])
-
-
-def _encode_arg(arg):
-  """A canonical representation for this argument, for use in a cache key."""
-
-  # `defun` uses dtypes and shapes instead of `Tensors` as cache keys. Dtypes
-  # are used because TensorFlow graphs are not parametric w.r.t. dtypes. Shapes
-  # are used for both performance reasons, as much TensorFlow code specializes
-  # on known shapes to produce slimmer graphs, and correctness, as some
-  # high-level APIs require shapes to be fully-known.
-  #
-  # TODO(akshayka): Add support for sparse tensors.
-  #
-  # pylint: disable=protected-access
-  if isinstance(arg, ops.Tensor):
-    return _TensorType(arg.dtype, arg._shape_tuple())
-  elif isinstance(arg, ops.IndexedSlices):
-    if arg.dense_shape is not None:
-      return tuple([
-          _TensorType(arg.values.dtype, arg.values._shape_tuple()),
-          _TensorType(arg.indices.dtype, arg.indices._shape_tuple()),
-          _TensorType(arg.dense_shape.dtype, arg.dense_shape._shape_tuple()),
-      ])
-    else:
-      return tuple([
-          _TensorType(arg.values.dtype, arg.values._shape_tuple()),
-          _TensorType(arg.indices.dtype, arg.indices._shape_tuple()),
-      ])
-  # pylint: enable=protected-access
-  elif isinstance(arg, (list, tuple)):
-    return tuple([_encode_arg(elem) for elem in arg])
-  elif isinstance(arg, dict):
-    return tuple(
-        (_encode_arg(key), _encode_arg(arg[key])) for key in sorted(arg))
-  else:
-    try:
-      # If possible, keep only a weak reference to Python objects. Weak
-      # references hash to the same value as the original object.
-      # TODO(allenl): Clean up dead functions and their cache keys if the cache
-      # gets large. Right now creating objects with a defunned method, calling
-      # the method, and losing a reference to the object in a loop will leak
-      # memory here.
-      return weakref.ref(arg)
-    except TypeError:
-      return arg
+pywrap_tensorflow.RegisterType("Tensor", ops.Tensor)
+pywrap_tensorflow.RegisterType("IndexedSlices", ops.IndexedSlices)
 
 
 def _deterministic_dict_values(dictionary):
@@ -1120,6 +1076,8 @@ class PolymorphicFunction(object):
         offset + index: default
         for index, default in enumerate(fullargspec.defaults or [])
     }
+    self._default_values = fullargspec.defaults
+    self._default_values_start_index = offset
     if input_signature is None:
       self._input_signature = None
     else:
@@ -1180,7 +1138,7 @@ class PolymorphicFunction(object):
     """Computes the cache key given inputs and execution context."""
     if self._input_signature is None:
       inputs = (args, kwargs) if kwargs else args
-      cache_key = tuple(_encode_arg(arg) for arg in inputs)
+      cache_key = pywrap_tensorflow.TFE_Py_EncodeArg(inputs)
     else:
       del args, kwargs
       cache_key = self._flat_input_signature
@@ -1203,7 +1161,7 @@ class PolymorphicFunction(object):
     colocation_stack = (() if executing_eagerly else
                         tuple(default_graph._colocation_stack.peek_objs()))  # pylint: disable=protected-access
 
-    return cache_key + (execution_context, device_functions, colocation_stack)
+    return (cache_key, execution_context, device_functions, colocation_stack)
 
   def _canonicalize_function_inputs(self, *args, **kwargs):
     """Canonicalizes `args` and `kwargs`.
@@ -1231,26 +1189,32 @@ class PolymorphicFunction(object):
     # Maps from index of arg to its corresponding value, according to `args`
     # and `kwargs`; seeded with the default values for the named args that
     # aren't in `args`.
-    arg_indices_to_values = {
-        index: default
-        for index, default in six.iteritems(self._arg_indices_to_default_values)
-        if index >= len(args)
-    }
-    consumed_args = []
-    for arg, value in six.iteritems(kwargs):
-      index = self._args_to_indices.get(arg, None)
-      if index is not None:
-        arg_indices_to_values[index] = value
-        consumed_args.append(arg)
-      elif self._input_signature is not None:
-        raise ValueError("Cannot define a TensorFlow function from a Python "
-                         "function with keyword arguments when "
-                         "input_signature is provided.")
-    for arg in consumed_args:
-      # After this loop, `kwargs` will only contain true keyword arguments, as
-      # opposed to named arguments called in a keyword-like fashion.
-      kwargs.pop(arg)
-    inputs = args + _deterministic_dict_values(arg_indices_to_values)
+    if not kwargs:
+      if self._default_values:
+        inputs = args + self._default_values[len(args) -
+                                             self._default_values_start_index:]
+      else:
+        inputs = args
+    else:
+      arg_indices_to_values = {
+          index: default for index, default in six.iteritems(
+              self._arg_indices_to_default_values) if index >= len(args)
+      }
+      consumed_args = []
+      for arg, value in six.iteritems(kwargs):
+        index = self._args_to_indices.get(arg, None)
+        if index is not None:
+          arg_indices_to_values[index] = value
+          consumed_args.append(arg)
+        elif self._input_signature is not None:
+          raise ValueError("Cannot define a TensorFlow function from a Python "
+                           "function with keyword arguments when "
+                           "input_signature is provided.")
+      for arg in consumed_args:
+        # After this loop, `kwargs` will only contain true keyword arguments, as
+        # opposed to named arguments called in a keyword-like fashion.
+        kwargs.pop(arg)
+      inputs = args + _deterministic_dict_values(arg_indices_to_values)
     flat_inputs = nest.flatten(inputs)
 
     # Check for NumPy arrays in arguments and convert them to Tensors.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index afe3ba9893..9ce367a837 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1237,6 +1237,24 @@ class FunctionTest(test.TestCase):
     x = constant_op.constant([1.0, 2.0])
     self.assertAllEqual([2., 4.], self.evaluate(defined(x)))
 
+  def testCacheObjectHashCollisions(self):
+
+    class Foo(object):
+
+      def __hash__(self):
+        return 42
+
+    def func(foo):
+      del foo
+      return
+
+    defined = function.defun(func)
+    defined(Foo())
+    self.assertEqual(len(defined._function_cache), 1)
+
+    defined(Foo())
+    self.assertEqual(len(defined._function_cache), 2)
+
   def testPythonFunctionWithDefaultArgs(self):
 
     def func(foo, bar=1, baz=2):
@@ -1250,20 +1268,20 @@ class FunctionTest(test.TestCase):
 
     def cache_keys():
       """Sanitizes cache keys of non-input metadata."""
-      return tuple(key[:3] for key in defined._function_cache)
+      return tuple(key[0] for key in defined._function_cache)
 
     # `True` corresponds to the fact that we're executing eagerly
-    self.assertIn((0, 1, 20), cache_keys())
+    self.assertIn(('tRRR', (0, 1, 20)), cache_keys())
 
     defined(1)  # bar=1, baz=2
-    self.assertIn((1, 1, 2), cache_keys())
+    self.assertIn(('tRRR', (1, 1, 2)), cache_keys())
 
     # This matches the previous call.
     defined(foo=1)
     self.assertEqual(len(defined._function_cache), 2)
 
     defined(1, 2, 3)
-    self.assertIn((1, 2, 3), cache_keys())
+    self.assertIn(('tRRR', (1, 2, 3)), cache_keys())
 
     # This matches the previous call.
     defined(1, bar=2, baz=3)
diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h
index f1b4042ec9..decd635b58 100755
--- a/tensorflow/python/eager/pywrap_tfe.h
+++ b/tensorflow/python/eager/pywrap_tfe.h
@@ -224,4 +224,8 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim);
 // The shape is represented as a Python tuple of integers.
 PyObject* TFE_Py_TensorShapeOnDevice(PyObject* tensor);
 
+// Encodes the object as a tuple that is meant to be used as part of the key
+// for the defun function cache.
+PyObject* TFE_Py_EncodeArg(PyObject*);
+
 #endif  // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 196e20e4d7..4b9f7f4100 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/python/eager/pywrap_tfe.h"
 
+#include "absl/strings/str_cat.h"
 #include "absl/types/variant.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_internal.h"
@@ -567,11 +568,8 @@ bool SetOpAttrScalar(
         return false;
       }
     }
-    TFE_Op* func = TFE_NewOp(
-        ctx, string(func_name.data(), func_name.size()).c_str(), status);
-    if (TF_GetCode(status) != TF_OK) return false;
-    TFE_OpSetAttrFunction(op, key, func);
-    TFE_DeleteOp(func);
+    TF_SetStatus(status, TF_OK, "");
+    TFE_OpSetAttrFunctionName(op, key, func_name.data(), func_name.size());
   } else {
     TF_SetStatus(
         status, TF_UNIMPLEMENTED,
@@ -2748,3 +2746,218 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs,
 
   return RecordGradient(op_name, inputs, attrs, results, name);
 }
+
+namespace {
+
+tensorflow::int64 GetPyNoneHash() {
+  tensorflow::int64 py_none_hash = PyObject_Hash(Py_None);
+  return py_none_hash;
+}
+
+struct EncodeResult {
+  string str;
+  std::vector<PyObject*> objects;
+
+  PyObject* ToPyTuple() {
+    PyObject* result = PyTuple_New(2);
+
+    PyTuple_SET_ITEM(result, 0, GetPythonObjectFromString(str.c_str()));
+
+    if (objects.empty()) {
+      Py_INCREF(Py_None);
+      PyTuple_SET_ITEM(result, 1, Py_None);
+    } else {
+      PyObject* objects_tuple = PyTuple_New(objects.size());
+
+      for (int i = 0; i < objects.size(); i++) {
+        PyTuple_SET_ITEM(objects_tuple, i, objects[i]);
+      }
+
+      PyTuple_SET_ITEM(result, 1, objects_tuple);
+    }
+
+    return result;
+  }
+};
+
+tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
+  if (EagerTensor_CheckExact(arg)) {
+    TFE_TensorHandle* t = EagerTensor_Handle(arg);
+    tensorflow::TensorShape tensor_shape;
+    TF_RETURN_IF_ERROR(t->handle->Shape(&tensor_shape));
+    absl::StrAppend(&result->str, t->handle->dtype);
+
+    for (tensorflow::int64 dim_size : tensor_shape.dim_sizes()) {
+      absl::StrAppend(&result->str, dim_size);
+    }
+
+    return tensorflow::Status::OK();
+  }
+
+  tensorflow::Safe_PyObjectPtr dtype_object(
+      PyObject_GetAttrString(arg, "dtype"));
+
+  if (dtype_object == nullptr) {
+    return tensorflow::errors::InvalidArgument(
+        "ops.Tensor object doesn't have dtype() attr.");
+  }
+
+  tensorflow::Safe_PyObjectPtr dtype_enum(
+      PyObject_GetAttrString(dtype_object.get(), "_type_enum"));
+
+  if (dtype_enum == nullptr) {
+    return tensorflow::errors::InvalidArgument(
+        "ops.Tensor's dtype object doesn't have _type_enum() attr.");
+  }
+
+  tensorflow::DataType dtype =
+      static_cast<tensorflow::DataType>(MakeInt(dtype_enum.get()));
+
+  absl::StrAppend(&result->str, dtype);
+  static char _shape_tuple[] = "_shape_tuple";
+  tensorflow::Safe_PyObjectPtr shape_tuple(
+      PyObject_CallMethod(arg, _shape_tuple, nullptr));
+
+  if (shape_tuple == nullptr) {
+    return tensorflow::errors::InvalidArgument(
+        "ops.Tensor object doesn't have _shape_tuple() method.");
+  }
+
+  if (shape_tuple.get() == Py_None) {
+    // Unknown shape, encode that directly.
+    absl::StrAppend(&result->str, GetPyNoneHash());
+    return tensorflow::Status::OK();
+  }
+
+  tensorflow::Safe_PyObjectPtr shape_seq(PySequence_Fast(
+      shape_tuple.get(), "shape_tuple didn't return a sequence"));
+
+  int len = PySequence_Fast_GET_SIZE(shape_seq.get());
+  for (int i = 0; i < len; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(shape_seq.get(), i);
+    if (item == Py_None) {
+      absl::StrAppend(&result->str, GetPyNoneHash());
+    } else {
+      absl::StrAppend(&result->str, MakeInt(item));
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+const char kTensor[] = "T";
+const char kIndexedSlices[] = "I";
+const char kList[] = "L";
+const char kTuple[] = "t";
+const char kDict[] = "D";
+const char kRaw[] = "R";
+
+tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result);
+
+// This function doesn't set the type of sequence before
+tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
+                                         EncodeResult* result) {
+  tensorflow::Safe_PyObjectPtr arg_seq(
+      PySequence_Fast(arg, "unable to create seq from list/tuple"));
+
+  absl::StrAppend(&result->str, type);
+  int len = PySequence_Fast_GET_SIZE(arg_seq.get());
+  for (int i = 0; i < len; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(arg_seq.get(), i);
+    if (item == Py_None) {
+      absl::StrAppend(&result->str, GetPyNoneHash());
+    } else {
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(item, result));
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result) {
+  if (tensorflow::swig::IsTensor(arg)) {
+    absl::StrAppend(&result->str, kTensor);
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(arg, result));
+  } else if (tensorflow::swig::IsIndexedSlices(arg)) {
+    absl::StrAppend(&result->str, kIndexedSlices);
+    tensorflow::Safe_PyObjectPtr values(PyObject_GetAttrString(arg, "values"));
+    if (values == nullptr) {
+      PyErr_Clear();
+      return tensorflow::errors::InvalidArgument(
+          "IndexedSlices does not have a values attr");
+    }
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(values.get(), result));
+
+    tensorflow::Safe_PyObjectPtr indices(
+        PyObject_GetAttrString(arg, "indices"));
+    if (indices == nullptr) {
+      PyErr_Clear();
+      return tensorflow::errors::InvalidArgument(
+          "IndexedSlices does not have a indices attr");
+    }
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(indices.get(), result));
+
+    tensorflow::Safe_PyObjectPtr dense_shape(
+        PyObject_GetAttrString(arg, "dense_shape"));
+    if (dense_shape == nullptr) {
+      PyErr_Clear();
+      return tensorflow::errors::InvalidArgument(
+          "IndexedSlices does not have a dense_shape attr");
+    }
+    if (dense_shape.get() != Py_None) {
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(dense_shape.get(), result));
+    }
+  } else if (PyList_Check(arg)) {
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kList, result));
+  } else if (PyTuple_Check(arg)) {
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kTuple, result));
+  } else if (PyDict_Check(arg)) {
+    tensorflow::Safe_PyObjectPtr keys(PyDict_Keys(arg));
+    if (PyList_Sort(keys.get()) == -1) {
+      return tensorflow::errors::Internal("Unable to sort keys");
+    }
+
+    absl::StrAppend(&result->str, kDict);
+    int len = PyList_Size(keys.get());
+
+    for (int i = 0; i < len; i++) {
+      PyObject* key = PyList_GetItem(keys.get(), i);
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(key, result));
+      PyObject* value = PyDict_GetItem(arg, key);
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(value, result));
+    }
+  } else {
+    PyObject* object = PyWeakref_NewRef(arg, nullptr);
+
+    if (object == nullptr) {
+      PyErr_Clear();
+
+      object = arg;
+      Py_INCREF(object);
+    }
+
+    absl::StrAppend(&result->str, kRaw);
+    result->objects.push_back(object);
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace
+
+// `defun` uses dtypes and shapes instead of `Tensors` as cache keys. Dtypes
+// are used because TensorFlow graphs are not parametric w.r.t. dtypes. Shapes
+// are used for both performance reasons, as much TensorFlow code specializes
+// on known shapes to produce slimmer graphs, and correctness, as some
+// high-level APIs require shapes to be fully-known.
+//
+// TODO(nareshmodi): Add support for sparse tensors.
+PyObject* TFE_Py_EncodeArg(PyObject* arg) {
+  EncodeResult result;
+  const auto status = TFE_Py_EncodeArgHelper(arg, &result);
+  if (MaybeRaiseExceptionFromStatus(status, nullptr)) {
+    return nullptr;
+  }
+
+  return result.ToPyTuple();
+}
diff --git a/tensorflow/python/framework/device.py b/tensorflow/python/framework/device.py
index 06c653097a..7f6e0a75a5 100644
--- a/tensorflow/python/framework/device.py
+++ b/tensorflow/python/framework/device.py
@@ -87,6 +87,7 @@ class DeviceSpec(object):
     else:
       self.device_type = device_type
     self.device_index = device_index
+    self._hash = hash(self.to_string())
 
   def _clear(self):
     self._job = None
@@ -234,7 +235,7 @@ class DeviceSpec(object):
     return self.to_string() == other.to_string()
 
   def __hash__(self):
-    return hash(self.to_string())
+    return self._hash
 
 
 def check_valid(spec):
@@ -266,6 +267,7 @@ def canonical_name(device):
 # possible to compare the device function stacks belonging to different
 # graphs in a meaningful way.
 _cached_device_functions = {}
+_cached_device_specs = {}
 _cache_lock = threading.Lock()
 
 
@@ -297,7 +299,13 @@ def merge_device(spec):
   """
   with _cache_lock:
     if not isinstance(spec, DeviceSpec):
-      spec = DeviceSpec.from_string(spec or "")
+      cached_device_spec = _cached_device_specs.get(spec, None)
+      if cached_device_spec is None:
+        device_spec = DeviceSpec.from_string(spec or "")
+        _cached_device_specs[spec] = device_spec
+        spec = device_spec
+      else:
+        spec = cached_device_spec
     cached_function = _cached_device_functions.get(spec, None)
     if cached_function is not None:
       return cached_function
diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py
index 41ef2e11d1..440e3a0968 100644
--- a/tensorflow/python/framework/sparse_tensor.py
+++ b/tensorflow/python/framework/sparse_tensor.py
@@ -245,7 +245,7 @@ class SparseTensor(_TensorLike):
 SparseTensorValue = collections.namedtuple(
     "SparseTensorValue", ["indices", "values", "dense_shape"])
 tf_export("SparseTensorValue")(SparseTensorValue)
-pywrap_tensorflow.RegisterSparseTensorValueClass(SparseTensorValue)
+pywrap_tensorflow.RegisterType("SparseTensorValue", SparseTensorValue)
 
 
 @tf_export("convert_to_tensor_or_sparse_tensor")
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index c411a58b70..61e0abbfcb 100755
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -67,6 +67,7 @@ limitations under the License.
 %rename("%s") TFE_ContextStartStep;
 %rename("%s") TFE_ContextEndStep;
 %rename("%s") TFE_Py_RegisterVSpace;
+%rename("%s") TFE_Py_EncodeArg;
 
 %{
 #include "tensorflow/python/eager/pywrap_tfe.h"
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 758cba7487..d67dbde304 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -819,5 +819,5 @@ def flatten_with_joined_string_paths(structure, separator="/"):
   return list(zip(flat_string_paths, flatten(structure)))
 
 
-_pywrap_tensorflow.RegisterSequenceClass(_collections.Sequence)
-_pywrap_tensorflow.RegisterMappingClass(_collections.Mapping)
+_pywrap_tensorflow.RegisterType("Mapping", _collections.Mapping)
+_pywrap_tensorflow.RegisterType("Sequence", _collections.Sequence)
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 38b8491c66..7b3e618e84 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -29,14 +29,51 @@ limitations under the License.
 namespace tensorflow {
 namespace swig {
 
-namespace {
+std::unordered_map<string, PyObject*>* PythonTypesMap() {
+  static auto* m = new std::unordered_map<string, PyObject*>();
+  return m;
+}
+
+PyObject* GetRegisteredType(const string& key) {
+  auto* m = PythonTypesMap();
+  auto it = m->find(key);
+  if (it == m->end()) return nullptr;
+  return it->second;
+}
+
+PyObject* RegisterType(PyObject* type_name, PyObject* type) {
+  if (!PyType_Check(type)) {
+    PyErr_SetString(PyExc_TypeError,
+                    tensorflow::strings::StrCat("Expecting a type, got ",
+                                                Py_TYPE(type)->tp_name)
+                        .c_str());
+    return nullptr;
+  }
 
-// Type object for collections.Sequence. This is set by RegisterSequenceClass.
-PyObject* CollectionsSequenceType = nullptr;
-// Type object for collections.Mapping, set by RegisterMappingClass.
-PyObject* CollectionsMappingType = nullptr;
-PyTypeObject* SparseTensorValueType = nullptr;
+  string key;
+  if (PyBytes_Check(type_name)) {
+    key = PyBytes_AsString(type_name);
+  }
+#if PY_MAJOR_VERSION >= 3
+  if (PyUnicode_Check(type_name)) {
+    key = PyUnicode_AsUTF8(type_name);
+  }
+#endif
 
+  if (PythonTypesMap()->find(key) != PythonTypesMap()->end()) {
+    PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat(
+                                         "Type already registered for ", key)
+                                         .c_str());
+    return nullptr;
+  }
+
+  Py_INCREF(type);
+  PythonTypesMap()->emplace(key, type);
+
+  Py_RETURN_NONE;
+}
+
+namespace {
 const int kMaxItemsInCache = 1024;
 
 bool WarnedThatSetIsNotSequence = false;
@@ -177,46 +214,82 @@ class CachedTypeCheck {
 // Returns -1 if an error occurred.
 int IsMappingHelper(PyObject* o) {
   static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
-    return PyObject_IsInstance(to_check, CollectionsMappingType);
+    PyObject* collections_mapping_type = GetRegisteredType("Mapping");
+    if (TF_PREDICT_FALSE(collections_mapping_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "collections.Mapping type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"Mapping\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    return PyObject_IsInstance(to_check, collections_mapping_type);
   });
   if (PyDict_Check(o)) return true;
-  if (TF_PREDICT_FALSE(CollectionsMappingType == nullptr)) {
-    PyErr_SetString(
-        PyExc_RuntimeError,
-        tensorflow::strings::StrCat(
-            "collections.Mapping type has not been set. "
-            "Please call RegisterMappingClass before using this module")
-            .c_str());
-    return -1;
-  }
   return check_cache->CachedLookup(o);
 }
 
 // Returns 1 if `o` is an instance of attrs-decorated class.
 // Returns 0 otherwise.
 int IsAttrsHelper(PyObject* o) {
-  Safe_PyObjectPtr cls(PyObject_GetAttrString(o, "__class__"));
-  if (cls) {
-    return PyObject_HasAttrString(cls.get(), "__attrs_attrs__");
-  } else {
+  static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
+    Safe_PyObjectPtr cls(PyObject_GetAttrString(to_check, "__class__"));
+    if (cls) {
+      return PyObject_HasAttrString(cls.get(), "__attrs_attrs__");
+    }
+
     // PyObject_GetAttrString returns null on error
     PyErr_Clear();
     return 0;
-  }
+  });
+  return check_cache->CachedLookup(o);
 }
 
-// Returns 1 if `o` is considered a sequence for the purposes of Flatten().
+// Returns 1 if `o` is an object of type IndexedSlices.
 // Returns 0 otherwise.
 // Returns -1 if an error occurred.
-int IsSequenceHelper(PyObject* o) {
+int IsIndexedSlicesHelper(PyObject* o) {
   static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
-    int is_instance = PyObject_IsInstance(to_check, CollectionsSequenceType);
-
-    // Don't cache a failed is_instance check.
-    if (is_instance == -1) return -1;
+    PyObject* indexed_slices_type = GetRegisteredType("IndexedSlices");
+    if (TF_PREDICT_FALSE(indexed_slices_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "IndexedSlices type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"IndexedSlices\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    return PyObject_IsInstance(to_check, indexed_slices_type);
+  });
+  return check_cache->CachedLookup(o);
+}
 
-    return static_cast<int>(is_instance != 0 && !IsString(to_check));
+// Returns 1 if `o` is a Tensor.
+// Returns 0 otherwise.
+// Returns -1 if an error occurred.
+int IsTensorHelper(PyObject* o) {
+  static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
+    PyObject* tensor_type = GetRegisteredType("Tensor");
+    if (TF_PREDICT_FALSE(tensor_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "Tensor type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"Tensor\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    return PyObject_IsInstance(to_check, tensor_type);
   });
+  return check_cache->CachedLookup(o);
+}
+
+// Returns 1 if `o` is considered a sequence for the purposes of Flatten().
+// Returns 0 otherwise.
+// Returns -1 if an error occurred.
+int IsSequenceHelper(PyObject* o) {
   // We treat dicts and other mappings as special cases of sequences.
   if (IsMappingHelper(o)) return true;
   if (IsAttrsHelper(o)) return true;
@@ -226,15 +299,24 @@ int IsSequenceHelper(PyObject* o) {
                     "so consider avoiding using them.";
     WarnedThatSetIsNotSequence = true;
   }
-  if (TF_PREDICT_FALSE(CollectionsSequenceType == nullptr)) {
-    PyErr_SetString(
-        PyExc_RuntimeError,
-        tensorflow::strings::StrCat(
-            "collections.Sequence type has not been set. "
-            "Please call RegisterSequenceClass before using this module")
-            .c_str());
-    return -1;
-  }
+  static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
+    PyObject* collections_sequence_type = GetRegisteredType("Sequence");
+    if (TF_PREDICT_FALSE(collections_sequence_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "collections.Sequence type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"Sequence\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    int is_instance = PyObject_IsInstance(to_check, collections_sequence_type);
+
+    // Don't cache a failed is_instance check.
+    if (is_instance == -1) return -1;
+
+    return static_cast<int>(is_instance != 0 && !IsString(to_check));
+  });
   return check_cache->CachedLookup(o);
 }
 
@@ -401,11 +483,13 @@ class AttrsValueIterator : public ValueIterator {
 };
 
 bool IsSparseTensorValueType(PyObject* o) {
-  if (TF_PREDICT_FALSE(SparseTensorValueType == nullptr)) {
+  PyObject* sparse_tensor_value_type = GetRegisteredType("SparseTensorValue");
+  if (TF_PREDICT_FALSE(sparse_tensor_value_type == nullptr)) {
     return false;
   }
 
-  return PyObject_TypeCheck(o, SparseTensorValueType) == 1;
+  return PyObject_TypeCheck(
+             o, reinterpret_cast<PyTypeObject*>(sparse_tensor_value_type)) == 1;
 }
 
 int IsSequenceForDataHelper(PyObject* o) {
@@ -647,49 +731,11 @@ bool AssertSameStructureHelper(
 
 }  // namespace
 
-void RegisterSequenceClass(PyObject* sequence_class) {
-  if (!PyType_Check(sequence_class)) {
-    PyErr_SetString(
-        PyExc_TypeError,
-        tensorflow::strings::StrCat(
-            "Expecting a class definition for `collections.Sequence`. Got ",
-            Py_TYPE(sequence_class)->tp_name)
-            .c_str());
-    return;
-  }
-  CollectionsSequenceType = sequence_class;
-}
-
-void RegisterMappingClass(PyObject* mapping_class) {
-  if (!PyType_Check(mapping_class)) {
-    PyErr_SetString(
-        PyExc_TypeError,
-        tensorflow::strings::StrCat(
-            "Expecting a class definition for `collections.Mapping`. Got ",
-            Py_TYPE(mapping_class)->tp_name)
-            .c_str());
-    return;
-  }
-  CollectionsMappingType = mapping_class;
-}
-
-void RegisterSparseTensorValueClass(PyObject* sparse_tensor_value_class) {
-  if (!PyType_Check(sparse_tensor_value_class)) {
-    PyErr_SetString(
-        PyExc_TypeError,
-        tensorflow::strings::StrCat(
-            "Expecting a class definition for `SparseTensorValue`. Got ",
-            Py_TYPE(sparse_tensor_value_class)->tp_name)
-            .c_str());
-    return;
-  }
-  SparseTensorValueType =
-      reinterpret_cast<PyTypeObject*>(sparse_tensor_value_class);
-}
-
 bool IsSequence(PyObject* o) { return IsSequenceHelper(o) == 1; }
 bool IsMapping(PyObject* o) { return IsMappingHelper(o) == 1; }
 bool IsAttrs(PyObject* o) { return IsAttrsHelper(o) == 1; }
+bool IsTensor(PyObject* o) { return IsTensorHelper(o) == 1; }
+bool IsIndexedSlices(PyObject* o) { return IsIndexedSlicesHelper(o) == 1; }
 
 PyObject* Flatten(PyObject* nested) {
   PyObject* list = PyList_New(0);
@@ -737,13 +783,15 @@ PyObject* IsNamedtuple(PyObject* o, bool strict) {
     }
   }
 
-  if (TF_PREDICT_FALSE(CollectionsSequenceType == nullptr)) {
-    PyErr_SetString(
-        PyExc_RuntimeError,
-        tensorflow::strings::StrCat(
-            "collections.Sequence type has not been set. "
-            "Please call RegisterSequenceClass before using this module")
-            .c_str());
+  PyObject* collections_sequence_type = GetRegisteredType("Sequence");
+
+  if (TF_PREDICT_FALSE(collections_sequence_type == nullptr)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    tensorflow::strings::StrCat(
+                        "collections.Sequence type has not been set. "
+                        "Please register the type with the identifier "
+                        "\"Sequence\" using RegisterType.")
+                        .c_str());
     return nullptr;
   }
 
@@ -755,7 +803,8 @@ PyObject* IsNamedtuple(PyObject* o, bool strict) {
   }
 
   Safe_PyObjectPtr fields = make_safe(PyObject_GetAttrString(o, "_fields"));
-  int is_instance = PyObject_IsInstance(fields.get(), CollectionsSequenceType);
+  int is_instance =
+      PyObject_IsInstance(fields.get(), collections_sequence_type);
   if (is_instance == 0) {
     Py_RETURN_FALSE;
   } else if (is_instance == -1) {
diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h
index 01f85ea1dc..f37cd527d8 100644
--- a/tensorflow/python/util/util.h
+++ b/tensorflow/python/util/util.h
@@ -65,6 +65,24 @@ bool IsMapping(PyObject* o);
 //   True if the object is an instance of an attr.s decorated class.
 bool IsAttrs(PyObject* o);
 
+// Returns a true if its input is an ops.Tensor.
+//
+// Args:
+//   seq: the input to be checked.
+//
+// Returns:
+//   True if the object is a tensor.
+bool IsTensor(PyObject* o);
+
+// Returns a true if its input is an ops.IndexesSlices.
+//
+// Args:
+//   seq: the input to be checked.
+//
+// Returns:
+//   True if the object is an ops.IndexedSlices.
+bool IsIndexedSlices(PyObject* o);
+
 // Implements the same interface as tensorflow.util.nest._same_namedtuples
 // Returns Py_True iff the two namedtuples have the same name and fields.
 // Raises RuntimeError if `o1` or `o2` don't look like namedtuples (don't have
@@ -130,18 +148,6 @@ PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types);
 //   TypeError: The nest is or contains a dict with non-sortable keys.
 PyObject* Flatten(PyObject* nested);
 
-// RegisterSequenceClass is used to pass PyTypeObject for collections.Sequence
-// (which is defined in python) into the C++ world.
-// Alternative approach could be to import the collections modules and retrieve
-// the type from the module. This approach also requires some trigger from
-// Python so that we know that Python interpreter had been initialzied.
-void RegisterSequenceClass(PyObject* sequence_class);
-// Like RegisterSequenceClass, but for collections.Mapping.
-void RegisterMappingClass(PyObject* mapping_class);
-// Similar to the above functions, except for the
-// sparse_tensor.SparseTensorValue class.
-void RegisterSparseTensorValueClass(PyObject* sparse_tensor_value_class);
-
 // The tensorflow.python.data package has its own nest utility that follows very
 // slightly different semantics for its functions than the tensorflow.python
 // nest utility. Returns a true if its input is a collections.Sequence (except
@@ -167,6 +173,10 @@ PyObject* FlattenForData(PyObject* nested);
 PyObject* AssertSameStructureForData(PyObject* o1, PyObject* o2,
                                      bool check_types);
 
+// RegisterType is used to pass PyTypeObject (which is defined in python) for an
+// arbitrary identifier `type_name` into C++.
+PyObject* RegisterType(PyObject* type_name, PyObject* type);
+
 }  // namespace swig
 }  // namespace tensorflow
 
diff --git a/tensorflow/python/util/util.i b/tensorflow/python/util/util.i
index 32a6e684fa..3c0ec87fa4 100644
--- a/tensorflow/python/util/util.i
+++ b/tensorflow/python/util/util.i
@@ -28,14 +28,8 @@ limitations under the License.
 // for functions in this module because they use python methods that need GIL.
 // TODO(iga): Find a way not to leak such definitions across files.
 
-%unignore tensorflow::swig::RegisterSequenceClass;
-%noexception tensorflow::swig::RegisterSequenceClass;
-
-%unignore tensorflow::swig::RegisterMappingClass;
-%noexception tensorflow::swig::RegisterMappingClass;
-
-%unignore tensorflow::swig::RegisterSparseTensorValueClass;
-%noexception tensorflow::swig::RegisterSparseTensorValueClass;
+%unignore tensorflow::swig::RegisterType;
+%noexception tensorflow::swig::RegisterType;
 
 %feature("docstring") tensorflow::swig::IsSequence
 """Returns a true if its input is a collections.Sequence (except strings).
-- 
GitLab


From 3aa8b781b342c36302bd500737ab4ce9b2b87a45 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 1 Oct 2018 14:07:17 -0700
Subject: [PATCH 0966/1357] Disable async remote tests

PiperOrigin-RevId: 215276816
---
 tensorflow/contrib/eager/python/remote_test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py
index ba6fe9701d..7aa4b598b8 100644
--- a/tensorflow/contrib/eager/python/remote_test.py
+++ b/tensorflow/contrib/eager/python/remote_test.py
@@ -47,8 +47,9 @@ def run_sync_and_async(f):
 
   @functools.wraps(f)
   def decorator(self, *args, **kwargs):
-    with context.execution_mode(context.ASYNC):
-      f(self, *args, **kwargs)
+    # TODO(b/117110239): Re-enable.
+    # with context.execution_mode(context.ASYNC):
+    #   f(self, *args, **kwargs)
 
     with context.execution_mode(context.SYNC):
       f(self, *args, **kwargs)
-- 
GitLab


From d7edbeb8dcc81a9cabc922ae46f549fe6b498eb9 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Mon, 1 Oct 2018 14:09:23 -0700
Subject: [PATCH 0967/1357] Update keras_applications to 1.0.6 and
 keras_preprocessing to 1.0.5. This removes the transitive keras and scipy
 dependencies in TensorFlow.

PiperOrigin-RevId: 215277190
---
 tensorflow/tools/ci_build/Dockerfile.cmake                | 4 ++--
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 8 ++++----
 .../ci_build/install/install_python3.5_pip_packages.sh    | 4 ++--
 .../ci_build/install/install_python3.6_pip_packages.sh    | 4 ++--
 tensorflow/tools/docker/Dockerfile                        | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel                  | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel-gpu              | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel-mkl              | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel-mkl-horovod      | 4 ++--
 tensorflow/tools/docker/Dockerfile.gpu                    | 4 ++--
 tensorflow/tools/docker/Dockerfile.mkl                    | 4 ++--
 tensorflow/tools/docker/Dockerfile.mkl-horovod            | 4 ++--
 tensorflow/tools/pip_package/setup.py                     | 4 ++--
 13 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake
index b7450c83de..ef0024fdb4 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cmake
+++ b/tensorflow/tools/ci_build/Dockerfile.cmake
@@ -28,8 +28,8 @@ RUN pip install --upgrade astor
 RUN pip install --upgrade gast
 RUN pip install --upgrade numpy
 RUN pip install --upgrade termcolor
-RUN pip install keras_applications==1.0.5
-RUN pip install keras_preprocessing==1.0.3
+RUN pip install --upgrade keras_applications
+RUN pip install --upgrade keras_preprocessing
 
 # Install golang
 RUN apt-get install -t xenial-backports -y golang-1.9
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 4ced96f90b..b90f3f3b97 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -115,10 +115,10 @@ pip2 install --upgrade setuptools==39.1.0
 pip3 install --upgrade setuptools==39.1.0
 
 # Keras
-pip2 install keras_applications==1.0.5 --no-deps
-pip3 install keras_applications==1.0.5 --no-deps
-pip2 install keras_preprocessing==1.0.3 --no-deps
-pip3 install keras_preprocessing==1.0.3 --no-deps
+pip2 install keras_applications==1.0.6 --no-deps
+pip3 install keras_applications==1.0.6 --no-deps
+pip2 install keras_preprocessing==1.0.5 --no-deps
+pip3 install keras_preprocessing==1.0.5 --no-deps
 pip2 install --upgrade h5py==2.8.0
 pip3 install --upgrade h5py==2.8.0
 
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 37e6b51f66..61d4fe3fe8 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -85,8 +85,8 @@ pip3.5 install --upgrade termcolor
 pip3.5 install --upgrade setuptools==39.1.0
 
 # Keras
-pip3.5 install keras_applications==1.0.5
-pip3.5 install keras_preprocessing==1.0.3
+pip3.5 install keras_applications==1.0.6
+pip3.5 install keras_preprocessing==1.0.5
 pip3.5 install --upgrade h5py==2.8.0
 
 # Install last working version of setuptools.
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index 7520ff74cb..8949af8a88 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -102,7 +102,7 @@ pip3 install --upgrade setuptools==39.1.0
 pip3 install --upgrade h5py==2.8.0
 
 # Keras
-pip3 install keras_applications==1.0.5
-pip3 install keras_preprocessing==1.0.3
+pip3 install keras_applications==1.0.6
+pip3 install keras_preprocessing==1.0.5
 
 # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh)
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index b5a6c05193..205128ad58 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -29,8 +29,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index c741e8ad0c..6f8e91fccf 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -33,8 +33,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index f544725af4..69a117fda6 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -55,8 +55,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index db7c701289..e433e9ebb2 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -52,8 +52,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
index 987b582d10..48f2400569 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
@@ -45,8 +45,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index 781bf9e851..7dc92a888b 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -42,8 +42,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/docker/Dockerfile.mkl b/tensorflow/tools/docker/Dockerfile.mkl
index 641c9e3b16..ac41cffe4b 100755
--- a/tensorflow/tools/docker/Dockerfile.mkl
+++ b/tensorflow/tools/docker/Dockerfile.mkl
@@ -38,8 +38,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/docker/Dockerfile.mkl-horovod b/tensorflow/tools/docker/Dockerfile.mkl-horovod
index 2b11679f54..4daf4fefff 100755
--- a/tensorflow/tools/docker/Dockerfile.mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.mkl-horovod
@@ -38,8 +38,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 88c9c20d36..d864a7a039 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -51,8 +51,8 @@ REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
     'astor >= 0.6.0',
     'gast >= 0.2.0',
-    'keras_applications >= 1.0.5',
-    'keras_preprocessing >= 1.0.3',
+    'keras_applications >= 1.0.6',
+    'keras_preprocessing >= 1.0.5',
     'numpy >= 1.13.3',
     'six >= 1.10.0',
     'protobuf >= 3.6.1',
-- 
GitLab


From 094e1953b7df0bbb9bd4d0e3329b3b4611edf984 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 14:14:32 -0700
Subject: [PATCH 0968/1357] Fix benchmark regression.

PiperOrigin-RevId: 215278033
---
 tensorflow/python/ops/conv2d_benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/conv2d_benchmark.py b/tensorflow/python/ops/conv2d_benchmark.py
index 28111c2730..f40488afbe 100644
--- a/tensorflow/python/ops/conv2d_benchmark.py
+++ b/tensorflow/python/ops/conv2d_benchmark.py
@@ -63,9 +63,9 @@ def build_graph(device, dtype, data_format, input_shape, filter_shape, strides,
     An array of tensors to run()
   """
   with ops.device("/%s:0" % device):
-    inp = variables.Variable(
+    inp = variables.VariableV1(
         random_ops.truncated_normal(input_shape, dtype=dtype))
-    filt = variables.Variable(
+    filt = variables.VariableV1(
         random_ops.truncated_normal(filter_shape, dtype=dtype))
 
     outputs = []
-- 
GitLab


From 5e3c2255b7f90146a895cd20267de699fbb15c27 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Mon, 1 Oct 2018 14:38:57 -0700
Subject: [PATCH 0969/1357] internal change

PiperOrigin-RevId: 215282721
---
 tensorflow/docs_src/BUILD                     |   14 -
 tensorflow/docs_src/__init__.py               |    0
 .../performance/xla/operation_semantics.md    | 2426 +++++++++++++++++
 tensorflow/tools/docs/BUILD                   |    1 -
 tensorflow/tools/docs/build_docs_test.py      |    6 +-
 5 files changed, 2430 insertions(+), 17 deletions(-)
 delete mode 100644 tensorflow/docs_src/BUILD
 delete mode 100644 tensorflow/docs_src/__init__.py
 create mode 100644 tensorflow/docs_src/performance/xla/operation_semantics.md

diff --git a/tensorflow/docs_src/BUILD b/tensorflow/docs_src/BUILD
deleted file mode 100644
index 34bf7b6a11..0000000000
--- a/tensorflow/docs_src/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-# Files used to generate TensorFlow docs.
-
-licenses(["notice"])  # Apache 2.0
-
-package(
-    default_visibility = ["//tensorflow:internal"],
-)
-
-exports_files(["LICENSE"])
-
-filegroup(
-    name = "docs_src",
-    data = glob(["**/*.md"]),
-)
diff --git a/tensorflow/docs_src/__init__.py b/tensorflow/docs_src/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
new file mode 100644
index 0000000000..96d269bec4
--- /dev/null
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -0,0 +1,2426 @@
+# Operation Semantics
+
+The following describes the semantics of operations defined in the
+[`XlaBuilder`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+interface. Typically, these operations map one-to-one to operations defined in
+the RPC interface in
+[`xla_data.proto`](https://www.tensorflow.org/code/tensorflow/compiler/xla/xla_data.proto).
+
+A note on nomenclature: the generalized data type XLA deals with is an
+N-dimensional array holding elements of some uniform type (such as 32-bit
+float). Throughout the documentation, *array* is used to denote an
+arbitrary-dimensional array. For convenience, special cases have more specific
+and familiar names; for example a *vector* is a 1-dimensional array and a
+*matrix* is a 2-dimensional array.
+
+## AllToAll
+
+See also
+[`XlaBuilder::AllToAll`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Alltoall is a collective operation that sends data from all cores to all cores.
+It has two phases:
+
+1.  the scatter phase. On each core, the operand is split into `split_count`
+    number of blocks along the `split_dimensions`, and the blocks are scattered
+    to all cores, e.g., the ith block is send to the ith core.
+2.  the gather phase. Each core concatenates the received blocks along the
+    `concat_dimension`.
+
+The participating cores can be configured by:
+
+-   `replica_groups`: each ReplicaGroup contains a list of replica id. If empty,
+    all replicas belong to one group in the order of 0 - (n-1). Alltoall will be
+    applied within subgroups in the specified order. For example, replica
+    groups = {{1,2,3},{4,5,0}} means, an Alltoall will be applied within replica
+    1, 2, 3, and in the gather phase, the received blocks will be concatenated
+    in the order of 1, 2, 3; another Alltoall will be applied within replica 4,
+    5, 0, and the concatenation order is 4, 5, 0.
+
+Prerequisites:
+
+-   The dimension size of the operand on the split_dimension is divisible by
+    split_count.
+-   The operand's shape is not tuple.
+
+<b> `AllToAll(operand, split_dimension, concat_dimension, split_count,
+replica_groups)` </b>
+
+
+| Arguments          | Type                  | Semantics                       |
+| ------------------ | --------------------- | ------------------------------- |
+| `operand`          | `XlaOp`               | n dimensional input array       |
+| `split_dimension`  | `int64`               | A value in the interval `[0,    |
+:                    :                       : n)` that names the dimension    :
+:                    :                       : along which the operand is      :
+:                    :                       : split                           :
+| `concat_dimension` | `int64`               | a value in the interval `[0,    |
+:                    :                       : n)` that names the dimension    :
+:                    :                       : along which the split blocks    :
+:                    :                       : are concatenated                :
+| `split_count`      | `int64`               | the number of cores that        |
+:                    :                       : participate this operation. If  :
+:                    :                       : `replica_groups` is empty, this :
+:                    :                       : should be the number of         :
+:                    :                       : replicas; otherwise, this       :
+:                    :                       : should be equal to the number   :
+:                    :                       : of replicas in each group.      :
+| `replica_groups`   | `ReplicaGroup` vector | each group contains a list of   |
+:                    :                       : replica id.                     :
+
+Below shows an example of Alltoall.
+
+```
+XlaBuilder b("alltoall");
+auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x");
+AllToAll(x, /*split_dimension=*/1, /*concat_dimension=*/0, /*split_count=*/4);
+```
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/xla/ops_alltoall.png">
+</div>
+
+In this example, there are 4 cores participating the Alltoall. On each core, the
+operand is split into 4 parts along dimension 0, so each part has shape
+f32[4,4]. The 4 parts are scattered to all cores. Then each core concatenates
+the received parts along dimension 1, in the order or core 0-4. So the output on
+each core has shape f32[16,4].
+
+## BatchNormGrad
+
+See also
+[`XlaBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Calculates gradients of batch norm.
+
+<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `XlaOp`                 | n dimensional array to be        |
+:                 :                         : normalized (x)                   :
+| `scale`         | `XlaOp`                 | 1 dimensional array              |
+:                 :                         : (\\(\gamma\\))                   :
+| `mean`          | `XlaOp`                 | 1 dimensional array (\\(\mu\\))  |
+| `variance`      | `XlaOp`                 | 1 dimensional array              |
+:                 :                         : (\\(\sigma^2\\))                 :
+| `grad_output`   | `XlaOp`                 | Gradients passed to              |
+:                 :                         : `BatchNormTraining`              :
+:                 :                         : (\\( \nabla y\\))                :
+| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
+| `feature_index` | `int64`                 | Index to feature dimension in    |
+:                 :                         : `operand`                        :
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the gradients with
+respect to `operand`, `offset` and `scale` across all the other dimensions. The
+`feature_index` must be a valid index for the feature dimension in `operand`.
+
+The three gradients are defined by the following formulas (assuming a
+4-dimensional tensor as `operand` and with feature dimension index \\(l\\),
+batch size `m` and spatial sizes `w` and `h`):
+
+\\[ \begin{split} c_l&=
+\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h
+\left( \nabla y_{ijkl} \frac{x_{ijkl} - \mu_l}{\sigma^2_l+\epsilon} \right)
+\\\\
+\nabla x_{ijkl} &= \frac{\gamma_{l}}{\sqrt{\sigma^2_{l}+\epsilon}}
+\left( \nabla y_{ijkl} - \mathrm{mean}(\nabla y) - c_l (x_{ijkl} - \mu_{l})
+\right)
+\\\\
+\nabla \gamma_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \left( \nabla y_{ijkl}
+\frac{x_{ijkl} - \mu_l}{\sqrt{\sigma^2_{l}+\epsilon}} \right)
+\\\\\
+\nabla \beta_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl}
+\end{split} \\]
+
+The inputs `mean` and `variance` represent moments value
+across batch and spatial dimensions.
+
+The output type is a tuple of three handles:
+
+| Outputs        | Type                    | Semantics                         |
+| -------------  | ----------------------- | --------------------------------- |
+| `grad_operand` | `XlaOp`                 | gradient with respect to input    |
+:                :                         : `operand` (\\( \nabla x\\))       :
+| `grad_scale`   | `XlaOp`                 | gradient with respect to input    |
+:                :                         : `scale` (\\( \nabla \gamma\\))    :
+| `grad_offset`  | `XlaOp`                 | gradient with respect to input    |
+:                :                         : `offset`(\\( \nabla \beta\\))     :
+
+## BatchNormInference
+
+See also
+[`XlaBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
+
+Arguments       | Type    | Semantics
+--------------- | ------- | ---------------------------------------
+`operand`       | `XlaOp` | n dimensional array to be normalized
+`scale`         | `XlaOp` | 1 dimensional array
+`offset`        | `XlaOp` | 1 dimensional array
+`mean`          | `XlaOp` | 1 dimensional array
+`variance`      | `XlaOp` | 1 dimensional array
+`epsilon`       | `float` | Epsilon value
+`feature_index` | `int64` | Index to feature dimension in `operand`
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and uses the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
+computing `mean` and `variance` for each batch. It uses the input `mean` and
+`variance` instead as estimated values. The purpose of this op is to reduce
+latency in inference, hence the name `BatchNormInference`.
+
+The output is an n-dimensional, normalized array with the same shape as input
+`operand`.
+
+## BatchNormTraining
+
+See also
+[`XlaBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and [`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
+
+Arguments       | Type    | Semantics
+--------------- | ------- | ----------------------------------------
+`operand`       | `XlaOp` | n dimensional array to be normalized (x)
+`scale`         | `XlaOp` | 1 dimensional array (\\(\gamma\\))
+`offset`        | `XlaOp` | 1 dimensional array (\\(\beta\\))
+`epsilon`       | `float` | Epsilon value (\\(\epsilon\\))
+`feature_index` | `int64` | Index to feature dimension in `operand`
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and uses the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+The algorithm goes as follows for each batch in `operand` \\(x\\) that
+contains `m` elements with `w` and `h` as the size of spatial dimensions
+(assuming `operand` is an 4 dimensional array):
+
+- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
+\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
+
+- Calculates batch variance \\(\sigma^2_l\\):
+\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
+
+- Normalizes, scales and shifts:
+\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
+
+The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
+
+The output type is a tuple of three `XlaOp`s:
+
+| Outputs      | Type                    | Semantics                            |
+| ------------ | ----------------------- | -------------------------------------|
+| `output`     | `XlaOp`                 | n dimensional array with the same    |
+:              :                         : shape as input `operand` (y)         :
+| `batch_mean` | `XlaOp`                 | 1 dimensional array (\\(\mu\\))      |
+| `batch_var`  | `XlaOp`                 | 1 dimensional array (\\(\sigma^2\\)) |
+
+The `batch_mean` and `batch_var` are moments calculated across the batch and
+spatial dimensions using the formulas above.
+
+## BitcastConvertType
+
+See also
+[`XlaBuilder::BitcastConvertType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Similar to a `tf.bitcast` in TensorFlow, performs an element-wise bitcast
+operation from a data shape to a target shape. The dimensions must match, and
+the conversion is an element-wise one; e.g. `s32` elements become `f32` elements
+via bitcast routine. Bitcast is implemented as a low-level cast, so machines
+with different floating-point representations will give different results.
+
+<b> `BitcastConvertType(operand, new_element_type)` </b>
+
+Arguments          | Type            | Semantics
+------------------ | --------------- | ---------------------------
+`operand`          | `XlaOp`         | array of type T with dims D
+`new_element_type` | `PrimitiveType` | type U
+
+The dimensions of the operand and the target shape must match. The bit-width of
+the source and destination element types must be equal. The source
+and destination element types must not be tuples.
+
+## Broadcast
+
+See also
+[`XlaBuilder::Broadcast`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Adds dimensions to an array by duplicating the data in the array.
+
+<b> `Broadcast(operand, broadcast_sizes)` </b>
+
+Arguments         | Type                | Semantics
+----------------- | ------------------- | -------------------------------
+`operand`         | `XlaOp`             | The array to duplicate
+`broadcast_sizes` | `ArraySlice<int64>` | The sizes of the new dimensions
+
+The new dimensions are inserted on the left, i.e. if `broadcast_sizes` has
+values `{a0, ..., aN}` and the operand shape has dimensions `{b0, ..., bM}` then
+the shape of the output has dimensions `{a0, ..., aN, b0, ..., bM}`.
+
+The new dimensions index into copies of the operand, i.e.
+
+```
+output[i0, ..., iN, j0, ..., jM] = operand[j0, ..., jM]
+```
+
+For example, if `operand` is a scalar `f32` with value `2.0f`, and
+`broadcast_sizes` is `{2, 3}`, then the result will be an array with shape
+`f32[2, 3]` and all the values in the result will be `2.0f`.
+
+## Call
+
+See also
+[`XlaBuilder::Call`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Invokes a computation with the given arguments.
+
+<b> `Call(computation, args...)` </b>
+
+| Arguments     | Type                   | Semantics                           |
+| ------------- | ---------------------- | ----------------------------------- |
+| `computation` | `XlaComputation`       | computation of type `T_0, T_1, ..., |
+:               :                        : T_N -> S` with N parameters of      :
+:               :                        : arbitrary type                      :
+| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type       |
+
+The arity and types of the `args` must match the parameters of the
+`computation`. It is allowed to have no `args`.
+
+## Clamp
+
+See also
+[`XlaBuilder::Clamp`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Clamps an operand to within the range between a minimum and maximum value.
+
+<b> `Clamp(min, operand, max)` </b>
+
+Arguments | Type    | Semantics
+--------- | ------- | ---------------
+`min`     | `XlaOp` | array of type T
+`operand` | `XlaOp` | array of type T
+`max`     | `XlaOp` | array of type T
+
+Given an operand and minimum and maximum values, returns the operand if it is in
+the range between the minimum and maximum, else returns the minimum value if the
+operand is below this range or the maximum value if the operand is above this
+range.  That is, `clamp(a, x, b) =  min(max(a, x), b)`.
+
+All three arrays must be the same shape. Alternatively, as a restricted form of
+[broadcasting](broadcasting.md), `min` and/or `max` can be a scalar of type `T`.
+
+Example with scalar `min` and `max`:
+
+```
+let operand: s32[3] = {-1, 5, 9};
+let min: s32 = 0;
+let max: s32 = 6;
+==>
+Clamp(min, operand, max) = s32[3]{0, 5, 6};
+```
+
+## Collapse
+
+See also
+[`XlaBuilder::Collapse`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and the `tf.reshape` operation.
+
+Collapses dimensions of an array into one dimension.
+
+<b> `Collapse(operand, dimensions)` </b>
+
+Arguments    | Type           | Semantics
+------------ | -------------- | -----------------------------------------------
+`operand`    | `XlaOp`        | array of type T
+`dimensions` | `int64` vector | in-order, consecutive subset of T's dimensions.
+
+Collapse replaces the given subset of the operand's dimensions by a single
+dimension. The input arguments are an arbitrary array of type T and a
+compile-time-constant vector of dimension indices. The dimension indices must be
+an in-order (low to high dimension numbers), consecutive subset of T's
+dimensions. Thus, {0, 1, 2}, {0, 1}, or {1, 2} are all valid dimension sets, but
+{1, 0} or {0, 2} are not. They are replaced by a single new dimension, in the
+same position in the dimension sequence as those they replace, with the new
+dimension size equal to the product of original dimension sizes. The lowest
+dimension number in `dimensions` is the slowest varying dimension (most major)
+in the loop nest which collapses these dimension, and the highest dimension
+number is fastest varying (most minor). See the `tf.reshape` operator
+if more general collapse ordering is needed.
+
+For example, let v be an array of 24 elements:
+
+```
+let v = f32[4x2x3] {{{10, 11, 12},  {15, 16, 17}},
+                    {{20, 21, 22},  {25, 26, 27}},
+                    {{30, 31, 32},  {35, 36, 37}},
+                    {{40, 41, 42},  {45, 46, 47}}};
+
+// Collapse to a single dimension, leaving one dimension.
+let v012 = Collapse(v, {0,1,2});
+then v012 == f32[24] {10, 11, 12, 15, 16, 17,
+                      20, 21, 22, 25, 26, 27,
+                      30, 31, 32, 35, 36, 37,
+                      40, 41, 42, 45, 46, 47};
+
+// Collapse the two lower dimensions, leaving two dimensions.
+let v01 = Collapse(v, {0,1});
+then v01 == f32[4x6] {{10, 11, 12, 15, 16, 17},
+                      {20, 21, 22, 25, 26, 27},
+                      {30, 31, 32, 35, 36, 37},
+                      {40, 41, 42, 45, 46, 47}};
+
+// Collapse the two higher dimensions, leaving two dimensions.
+let v12 = Collapse(v, {1,2});
+then v12 == f32[8x3] {{10, 11, 12},
+                      {15, 16, 17},
+                      {20, 21, 22},
+                      {25, 26, 27},
+                      {30, 31, 32},
+                      {35, 36, 37},
+                      {40, 41, 42},
+                      {45, 46, 47}};
+
+```
+
+## Concatenate
+
+See also
+[`XlaBuilder::ConcatInDim`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Concatenate composes an array from multiple array operands. The array is of the
+same rank as each of the input array operands (which must be of the same rank as
+each other) and contains the arguments in the order that they were specified.
+
+<b> `Concatenate(operands..., dimension)` </b>
+
+| Arguments   | Type                  | Semantics                              |
+| ----------- | --------------------- | -------------------------------------- |
+| `operands`  | sequence of N `XlaOp` | N arrays of type T with dimensions     |
+:             :                       : [L0, L1, ...]. Requires N >= 1.        :
+| `dimension` | `int64`               | A value in the interval `[0, N)` that  |
+:             :                       : names the dimension to be concatenated :
+:             :                       : between the `operands`.                :
+
+With the exception of `dimension` all dimensions must be the same. This is
+because XLA does not support "ragged" arrays. Also note that rank-0 values
+cannot be concatenated (as it's impossible to name the dimension along which the
+concatenation occurs).
+
+1-dimensional example:
+
+```
+Concat({{2, 3}, {4, 5}, {6, 7}}, 0)
+>>> {2, 3, 4, 5, 6, 7}
+```
+
+2-dimensional example:
+
+```
+let a = {
+  {1, 2},
+  {3, 4},
+  {5, 6},
+};
+let b = {
+  {7, 8},
+};
+Concat({a, b}, 0)
+>>> {
+  {1, 2},
+  {3, 4},
+  {5, 6},
+  {7, 8},
+}
+```
+
+Diagram:
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="https://www.tensorflow.org/images/ops_concatenate.png">
+</div>
+
+## Conditional
+
+See also
+[`XlaBuilder::Conditional`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Conditional(pred, true_operand, true_computation, false_operand,
+false_computation)` </b>
+
+Arguments           | Type             | Semantics
+------------------- | ---------------- | ---------------------------------
+`pred`              | `XlaOp`          | Scalar of type `PRED`
+`true_operand`      | `XlaOp`          | Argument of type `T_0`
+`true_computation`  | `XlaComputation` | XlaComputation of type `T_0 -> S`
+`false_operand`     | `XlaOp`          | Argument of type `T_1`
+`false_computation` | `XlaComputation` | XlaComputation of type `T_1 -> S`
+
+Executes `true_computation` if `pred` is `true`, `false_computation` if `pred`
+is `false`, and returns the result.
+
+The `true_computation` must take in a single argument of type `T_0` and will be
+invoked with `true_operand` which must be of the same type. The
+`false_computation` must take in a single argument of type `T_1` and will be
+invoked with `false_operand` which must be of the same type. The type of the
+returned value of `true_computation` and `false_computation` must be the same.
+
+Note that only one of `true_computation` and `false_computation` will be
+executed depending on the value of `pred`.
+
+## Conv (convolution)
+
+See also
+[`XlaBuilder::Conv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+As ConvWithGeneralPadding, but the padding is specified in a short-hand way as
+either SAME or VALID. SAME padding pads the input (`lhs`) with zeroes so that
+the output has the same shape as the input when not taking striding into
+account. VALID padding simply means no padding.
+
+## ConvWithGeneralPadding (convolution)
+
+See also
+[`XlaBuilder::ConvWithGeneralPadding`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Computes a convolution of the kind used in neural networks. Here, a convolution
+can be thought of as a n-dimensional window moving across a n-dimensional base
+area and a computation is performed for each possible position of the window.
+
+| Arguments             | Type                 | Semantics                     |
+| --------------------- | -------------------- | ----------------------------- |
+| `lhs`                 | `XlaOp`              | rank n+2 array of inputs      |
+| `rhs`                 | `XlaOp`              | rank n+2 array of kernel      |
+:                       :                      : weights                       :
+| `window_strides`      | `ArraySlice<int64>`  | n-d array of kernel strides   |
+| `padding`             | `ArraySlice<         | n-d array of (low, high)      |
+:                       : pair<int64, int64>>` : padding                       :
+| `lhs_dilation`        | `ArraySlice<int64>`  | n-d lhs dilation factor array |
+| `rhs_dilation`        | `ArraySlice<int64>`  | n-d rhs dilation factor array |
+| `feature_group_count` | int64                | the number of feature groups  |
+
+Let n be the number of spatial dimensions. The `lhs` argument is a rank n+2
+array describing the base area. This is called the input, even though of course
+the rhs is also an input. In a neural network, these are the input activations.
+The n+2 dimensions are, in this order:
+
+*   `batch`: Each coordinate in this dimension represents an independent input
+    for which convolution is carried out.
+*   `z/depth/features`: Each (y,x) position in the base area has a vector
+    associated to it, which goes into this dimension.
+*   `spatial_dims`: Describes the `n` spatial dimensions that define the base
+    area that the window moves across.
+
+The `rhs` argument is a rank n+2 array describing the convolutional
+filter/kernel/window. The dimensions are, in this order:
+
+*   `output-z`: The `z` dimension of the output.
+*   `input-z`: The size of this dimension times `feature_group_count` should
+    equal the size of the `z` dimension in lhs.
+*   `spatial_dims`: Describes the `n` spatial dimensions that define the n-d
+    window that moves across the base area.
+
+The `window_strides` argument specifies the stride of the convolutional window
+in the spatial dimensions. For example, if the stride in the first spatial
+dimension is 3, then the window can only be placed at coordinates where the
+first spatial index is divisible by 3.
+
+The `padding` argument specifies the amount of zero padding to be applied to the
+base area. The amount of padding can be negative -- the absolute value of
+negative padding indicates the number of elements to remove from the specified
+dimension before doing the convolution. `padding[0]` specifies the padding for
+dimension `y` and `padding[1]` specifies the padding for dimension `x`. Each
+pair has the low padding as the first element and the high padding as the second
+element. The low padding is applied in the direction of lower indices while the
+high padding is applied in the direction of higher indices. For example, if
+`padding[1]` is `(2,3)` then there will be a padding by 2 zeroes on the left and
+by 3 zeroes on the right in the second spatial dimension. Using padding is
+equivalent to inserting those same zero values into the input (`lhs`) before
+doing the convolution.
+
+The `lhs_dilation` and `rhs_dilation` arguments specify the dilation factor to
+be applied to the lhs and rhs, respectively, in each spatial dimension. If the
+dilation factor in a spatial dimension is d, then d-1 holes are implicitly
+placed between each of the entries in that dimension, increasing the size of the
+array. The holes are filled with a no-op value, which for convolution means
+zeroes.
+
+Dilation of the rhs is also called atrous convolution. For more details, see
+`tf.nn.atrous_conv2d`. Dilation of the lhs is also called transposed
+convolution. For more details, see `tf.nn.conv2d_transpose`.
+
+The `feature_group_count` argument (default value 1) can be used for grouped
+convolutions. `feature_group_count` needs to be a divisor of both the input and
+the output feature dimension. If `feature_group_count` is greater than 1, it
+means that conceptually the input and output feature dimension and the `rhs`
+output feature dimension are split evenly into `feature_group_count` many
+groups, each group consisting of a consecutive subsequence of features. The
+input feature dimension of `rhs` needs to be equal to the `lhs` input feature
+dimension divided by `feature_group_count` (so it already has the size of a
+group of input features). The i-th groups are used together to compute
+`feature_group_count` many separate convolutions. The results of these
+convolutions are concatenated together in the output feature dimension.
+
+For depthwise convolution the `feature_group_count` argument would be set to the
+input feature dimension, and the filter would be reshaped from
+`[filter_height, filter_width, in_channels, channel_multiplier]` to
+`[filter_height, filter_width, 1, in_channels * channel_multiplier]`. For more
+details, see `tf.nn.depthwise_conv2d`.
+
+The output shape has these dimensions, in this order:
+
+*   `batch`: Same size as `batch` on the input (`lhs`).
+*   `z`: Same size as `output-z` on the kernel (`rhs`).
+*   `spatial_dims`: One value for each valid placement of the convolutional
+    window.
+
+The valid placements of the convolutional window are determined by the strides
+and the size of the base area after padding.
+
+To describe what a convolution does, consider a 2d convolution, and pick some
+fixed `batch`, `z`, `y`, `x` coordinates in the output. Then `(y,x)` is a
+position of a corner of the window within the base area (e.g. the upper left
+corner, depending on how you interpret the spatial dimensions). We now have a 2d
+window, taken from the base area, where each 2d point is associated to a 1d
+vector, so we get a 3d box. From the convolutional kernel, since we fixed the
+output coordinate `z`, we also have a 3d box. The two boxes have the same
+dimensions, so we can take the sum of the element-wise products between the two
+boxes (similar to a dot product). That is the output value.
+
+Note that if `output-z` is e.g., 5, then each position of the window produces 5
+values in the output into the `z` dimension of the output. These values differ
+in what part of the convolutional kernel is used - there is a separate 3d box of
+values used for each `output-z` coordinate. So you could think of it as 5
+separate convolutions with a different filter for each of them.
+
+Here is pseudo-code for a 2d convolution with padding and striding:
+
+```
+for (b, oz, oy, ox) {  // output coordinates
+  value = 0;
+  for (iz, ky, kx) {  // kernel coordinates and input z
+    iy = oy*stride_y + ky - pad_low_y;
+    ix = ox*stride_x + kx - pad_low_x;
+    if ((iy, ix) inside the base area considered without padding) {
+      value += input(b, iz, iy, ix) * kernel(oz, iz, ky, kx);
+    }
+  }
+  output(b, oz, oy, ox) = value;
+}
+```
+
+## ConvertElementType
+
+See also
+[`XlaBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Similar to an element-wise `static_cast` in C++, performs an element-wise
+conversion operation from a data shape to a target shape. The dimensions must
+match, and the conversion is an element-wise one; e.g. `s32` elements become
+`f32` elements via an `s32`-to-`f32` conversion routine.
+
+<b> `ConvertElementType(operand, new_element_type)` </b>
+
+Arguments          | Type            | Semantics
+------------------ | --------------- | ---------------------------
+`operand`          | `XlaOp`         | array of type T with dims D
+`new_element_type` | `PrimitiveType` | type U
+
+The dimensions of the operand and the target shape must match. The source and
+destination element types must not be tuples.
+
+A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
+conversion routine such as round-to-nearest-even.
+
+> Note: The precise float-to-int and visa-versa conversions are currently
+> unspecified, but may become additional arguments to the convert operation in
+> the future.  Not all possible conversions have been implemented for all
+>targets.
+
+```
+let a: s32[3] = {0, 1, 2};
+let b: f32[3] = convert(a, f32);
+then b == f32[3]{0.0, 1.0, 2.0}
+```
+
+## CrossReplicaSum
+
+See also
+[`XlaBuilder::CrossReplicaSum`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Computes a sum across replicas.
+
+<b> `CrossReplicaSum(operand)` </b>
+
+Arguments | Type    | Semantics
+--------- | ------- | -----------------------------
+`operand` | `XlaOp` | Array to sum across replicas.
+| `replica_group_ids`    | `int64` vector | Group ID for each replica.      |
+
+The output shape is the same as the input shape. For example, if there are two
+replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.25)`
+respectively on the two replicas, then the output value from this op will be
+`(4.0, 7.75)` on both replicas.
+
+`replica_group_ids` identifies the group ID of each replica. The group ID must
+either be empty (all replicas belong to a single group), or contain the same
+number of elements as the number of replicas. For example, if
+`replica_group_ids` = {0, 1, 2, 3, 0, 1, 2, 3} has eight replicas, there are
+four subgroups of replica IDs: {0, 4}, {1, 5}, {2, 6}, and {3, 7}. The size of
+each subgroup *must* be identical, so, for example, using:
+`replica_group_ids` = {0, 1, 2, 0} for four replicas is invalid.
+
+Computing the result of CrossReplicaSum requires having one input from each
+replica, so if one replica executes a CrossReplicaSum node more times than
+another, then the former replica will wait forever. Since the replicas are all
+running the same program, there are not a lot of ways for that to happen, but it
+is possible when a while loop's condition depends on data from infeed and the
+data that is infed causes the while loop to iterate more times on one replica
+than another.
+
+## CustomCall
+
+See also
+[`XlaBuilder::CustomCall`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Call a user-provided function within a computation.
+
+<b> `CustomCall(target_name, args..., shape)` </b>
+
+| Arguments     | Type                   | Semantics                         |
+| ------------- | ---------------------- | --------------------------------- |
+| `target_name` | `string`               | Name of the function. A call      |
+:               :                        : instruction will be emitted which :
+:               :                        : targets this symbol name.         :
+| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type,    |
+:               :                        : which will be passed to the       :
+:               :                        : function.                         :
+| `shape`       | `Shape`                | Output shape of the function      |
+
+The function signature is the same, regardless of the arity or type of args:
+
+```
+extern "C" void target_name(void* out, void** in);
+```
+
+For example, if CustomCall is used as follows:
+
+```
+let x = f32[2] {1,2};
+let y = f32[2x3] {{10, 20, 30}, {40, 50, 60}};
+
+CustomCall("myfunc", {x, y}, f32[3x3])
+```
+
+Here is an example of an implementation of `myfunc`:
+
+```
+extern "C" void myfunc(void* out, void** in) {
+  float (&x)[2] = *static_cast<float(*)[2]>(in[0]);
+  float (&y)[2][3] = *static_cast<float(*)[2][3]>(in[1]);
+  EXPECT_EQ(1, x[0]);
+  EXPECT_EQ(2, x[1]);
+  EXPECT_EQ(10, y[0][0]);
+  EXPECT_EQ(20, y[0][1]);
+  EXPECT_EQ(30, y[0][2]);
+  EXPECT_EQ(40, y[1][0]);
+  EXPECT_EQ(50, y[1][1]);
+  EXPECT_EQ(60, y[1][2]);
+  float (&z)[3][3] = *static_cast<float(*)[3][3]>(out);
+  z[0][0] = x[1] + y[1][0];
+  // ...
+}
+```
+
+The user-provided function must not have side-effects and its execution must be
+idempotent.
+
+> Note: The opaque nature of the user-provided function restricts optimization
+> opportunities for the compiler. Try to express your computation in terms of
+> native XLA ops whenever possible; only use CustomCall as a last resort.
+
+## Dot
+
+See also
+[`XlaBuilder::Dot`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Dot(lhs, rhs)` </b>
+
+Arguments | Type    | Semantics
+--------- | ------- | ---------------
+`lhs`     | `XlaOp` | array of type T
+`rhs`     | `XlaOp` | array of type T
+
+The exact semantics of this operation depend on the ranks of the operands:
+
+| Input                   | Output                | Semantics               |
+| ----------------------- | --------------------- | ----------------------- |
+| vector [n] `dot` vector | scalar                | vector dot product      |
+: [n]                     :                       :                         :
+| matrix [m x k] `dot`    | vector [m]            | matrix-vector           |
+: vector [k]              :                       : multiplication          :
+| matrix [m x k] `dot`    | matrix [m x n]        | matrix-matrix           |
+: matrix [k x n]          :                       : multiplication          :
+
+The operation performs sum of products over the last dimension of `lhs` and the
+one-before-last dimension of `rhs`. These are the "contracted" dimensions. The
+contracted dimensions of `lhs` and `rhs` must be of the same size. In practice,
+it can be used to perform dot products between vectors, vector/matrix
+multiplications or matrix/matrix multiplications.
+
+## DotGeneral
+
+See also
+[`XlaBuilder::DotGeneral`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `DotGeneral(lhs, rhs, dimension_numbers)` </b>
+
+Arguments           | Type                  | Semantics
+------------------- | --------------------- | ---------------
+`lhs`               | `XlaOp`               | array of type T
+`rhs`               | `XlaOp`               | array of type T
+`dimension_numbers` | `DotDimensionNumbers` | array of type T
+
+As Dot, but allows contracting and batch dimension numbers to be specified for
+both the 'lhs' and 'rhs'.
+
+| DotDimensionNumbers Fields | Type                    | Semantics
+| --------- | ----------------------- | ---------------
+| 'lhs_contracting_dimensions' | repeated int64 | 'lhs' contracting dimension numbers |
+| 'rhs_contracting_dimensions' | repeated int64 | 'rhs' contracting dimension numbers |
+| 'lhs_batch_dimensions' | repeated int64 | 'lhs' batch dimension numbers |
+| 'rhs_batch_dimensions' | repeated int64 | 'rhs' batch dimension numbers |
+
+DotGeneral performs the sum of products over contracting dimensions specified
+in 'dimension_numbers'.
+
+Associated contracting dimension numbers from the 'lhs' and 'rhs' do not need
+to be the same, but must be listed in the same order in both
+'lhs/rhs_contracting_dimensions' arrays and have the same dimension sizes.
+There must be exactly one contracting dimension on both 'lhs' and 'rhs'.
+
+Example with contracting dimension numbers:
+
+```
+lhs = { {1.0, 2.0, 3.0},
+        {4.0, 5.0, 6.0} }
+
+rhs = { {1.0, 1.0, 1.0},
+        {2.0, 2.0, 2.0} }
+
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(1);
+dnums.add_rhs_contracting_dimensions(1);
+
+DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0},
+                                 {15.0, 30.0} }
+```
+
+Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same
+dimension number, must be listed in the same order in both arrays, must
+have the same dimension sizes, and must be ordered before contracting and
+non-contracting/non-batch dimension numbers.
+
+Example with batch dimension numbers (batch size 2, 2x2 matrices):
+
+```
+lhs = { { {1.0, 2.0},
+          {3.0, 4.0} },
+        { {5.0, 6.0},
+          {7.0, 8.0} } }
+
+rhs = { { {1.0, 0.0},
+          {0.0, 1.0} },
+        { {1.0, 0.0},
+          {0.0, 1.0} } }
+
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(2);
+dnums.add_rhs_contracting_dimensions(1);
+dnums.add_lhs_batch_dimensions(0);
+dnums.add_rhs_batch_dimensions(0);
+
+DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
+                                   {3.0, 4.0} },
+                                 { {5.0, 6.0},
+                                   {7.0, 8.0} } }
+```
+
+| Input                               | Output            | Semantics        |
+| ----------------------------------- | ----------------- | ---------------- |
+| [b0, m, k] `dot` [b0, k, n]         | [b0, m, n]        |  batch matmul    |
+| [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n]    |  batch matmul    |
+
+It follows that the resulting dimension number starts with the batch dimension,
+then the 'lhs' non-contracting/non-batch dimension, and finally the 'rhs'
+non-contracting/non-batch dimension.
+
+## DynamicSlice
+
+See also
+[`XlaBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+DynamicSlice extracts a sub-array from the input array at dynamic
+`start_indices`. The size of the slice in each dimension is passed in
+`size_indices`, which specify the end point of exclusive slice intervals in each
+dimension: [start, start + size). The shape of `start_indices` must be rank ==
+1, with dimension size equal to the rank of `operand`.
+
+<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
+
+| Arguments       | Type                | Semantics                           |
+| --------------- | ------------------- | ----------------------------------- |
+| `operand`       | `XlaOp`             | N dimensional array of type T       |
+| `start_indices` | `XlaOp`             | Rank 1 array of N integers          |
+:                 :                     : containing the starting indices of  :
+:                 :                     : the slice for each dimension. Value :
+:                 :                     : must be greater than or equal to    :
+:                 :                     : zero.                               :
+| `size_indices`  | `ArraySlice<int64>` | List of N integers containing the   |
+:                 :                     : slice size for each dimension. Each :
+:                 :                     : value must be strictly greater than :
+:                 :                     : zero, and start + size must be less :
+:                 :                     : than or equal to the size of the    :
+:                 :                     : dimension to avoid wrapping modulo  :
+:                 :                     : dimension size.                     :
+
+The effective slice indices are computed by applying the following
+transformation for each index `i` in `[1, N)` before performing the slice:
+
+```
+start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - size_indices[i])
+```
+
+This ensures that the extracted slice is always in-bounds with respect to the
+operand array. If the slice is in-bounds before the transformation is applied,
+the transformation has no effect.
+
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let s = {2}
+
+DynamicSlice(a, s, {2}) produces:
+  {2.0, 3.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let s = {2, 1}
+
+DynamicSlice(b, s, {2, 2}) produces:
+  { { 7.0,  8.0},
+    {10.0, 11.0} }
+```
+## DynamicUpdateSlice
+
+See also
+[`XlaBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+DynamicUpdateSlice generates a result which is the value of the input array
+`operand`, with a slice `update` overwritten at `start_indices`.
+The shape of `update` determines the shape of the sub-array of the result which
+is updated.
+The shape of `start_indices` must be rank == 1, with dimension size equal to
+the rank of `operand`.
+
+<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
+
+| Arguments       | Type    | Semantics                                        |
+| --------------- | ------- | ------------------------------------------------ |
+| `operand`       | `XlaOp` | N dimensional array of type T                    |
+| `update`        | `XlaOp` | N dimensional array of type T containing the     |
+:                 :         : slice update. Each dimension of update shape     :
+:                 :         : must be strictly greater than zero, and start +  :
+:                 :         : update must be less than or equal to the operand :
+:                 :         : size for each dimension to avoid generating      :
+:                 :         : out-of-bounds update indices.                    :
+| `start_indices` | `XlaOp` | Rank 1 array of N integers containing the        |
+:                 :         : starting indices of the slice for each           :
+:                 :         : dimension. Value must be greater than or equal   :
+:                 :         : to zero.                                         :
+
+The effective slice indices are computed by applying the following
+transformation for each index `i` in `[1, N)` before performing the slice:
+
+```
+start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - update.dimension_size[i])
+```
+
+This ensures that the updated slice is always in-bounds with respect to the
+operand array. If the slice is in-bounds before the transformation is applied,
+the transformation has no effect.
+
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let u = {5.0, 6.0}
+let s = {2}
+
+DynamicUpdateSlice(a, u, s) produces:
+  {0.0, 1.0, 5.0, 6.0, 4.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let u =
+ { {12.0,  13.0},
+   {14.0,  15.0},
+   {16.0,  17.0} }
+
+let s = {1, 1}
+
+DynamicUpdateSlice(b, u, s) produces:
+ { {0.0,  1.0,  2.0},
+   {3.0, 12.0, 13.0},
+   {6.0, 14.0, 15.0},
+   {9.0, 16.0, 17.0} }
+```
+
+## Element-wise binary arithmetic operations
+
+See also
+[`XlaBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+A set of element-wise binary arithmetic operations is supported.
+
+<b> `Op(lhs, rhs)` </b>
+
+Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
+(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
+(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
+
+Arguments | Type    | Semantics
+--------- | ------- | ----------------------------------------
+`lhs`     | `XlaOp` | left-hand-side operand: array of type T
+`rhs`     | `XlaOp` | right-hand-side operand: array of type T
+
+The arguments' shapes have to be either similar or compatible. See the
+[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
+be compatible. The result of an operation has a shape which is the result of
+broadcasting the two input arrays. In this variant, operations between arrays of
+different ranks are *not* supported, unless one of the operands is a scalar.
+
+When `Op` is `Rem`, the sign of the result is taken from the dividend, and the
+absolute value of the result is always less than the divisor's absolute value.
+
+Integer division overflow (signed/unsigned division/remainder by zero or signed
+divison/remainder of `INT_SMIN` with `-1`) produces an implementation defined
+value.
+
+An alternative variant with different-rank broadcasting support exists for these
+operations:
+
+<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
+
+Where `Op` is the same as above. This variant of the operation should be used
+for arithmetic operations between arrays of different ranks (such as adding a
+matrix to a vector).
+
+The additional `broadcast_dimensions` operand is a slice of integers used to
+expand the rank of the lower-rank operand up to the rank of the higher-rank
+operand. `broadcast_dimensions` maps the dimensions of the lower-rank shape to
+the dimensions of the higher-rank shape. The unmapped dimensions of the expanded
+shape are filled with dimensions of size one. Degenerate-dimension broadcasting
+then broadcasts the shapes along these degenerate dimensions to equalize the
+shapes of both operands. The semantics are described in detail on the
+[broadcasting page](../../performance/xla/broadcasting.md).
+
+## Element-wise comparison operations
+
+See also
+[`XlaBuilder::Eq`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+A set of standard element-wise binary comparison operations is supported. Note
+that standard IEEE 754 floating-point comparison semantics apply when comparing
+floating-point types.
+
+<b> `Op(lhs, rhs)` </b>
+
+Where `Op` is one of `Eq` (equal-to), `Ne` (not equal-to), `Ge`
+(greater-or-equal-than), `Gt` (greater-than), `Le` (less-or-equal-than), `Lt`
+(less-than).
+
+Arguments | Type    | Semantics
+--------- | ------- | ----------------------------------------
+`lhs`     | `XlaOp` | left-hand-side operand: array of type T
+`rhs`     | `XlaOp` | right-hand-side operand: array of type T
+
+The arguments' shapes have to be either similar or compatible. See the
+[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
+be compatible. The result of an operation has a shape which is the result of
+broadcasting the two input arrays with the element type `PRED`. In this variant,
+operations between arrays of different ranks are *not* supported, unless one of
+the operands is a scalar.
+
+An alternative variant with different-rank broadcasting support exists for these
+operations:
+
+<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
+
+Where `Op` is the same as above. This variant of the operation should be used
+for comparison operations between arrays of different ranks (such as adding a
+matrix to a vector).
+
+The additional `broadcast_dimensions` operand is a slice of integers specifying
+the dimensions to use for broadcasting the operands. The semantics are described
+in detail on the [broadcasting page](../../performance/xla/broadcasting.md).
+
+## Element-wise unary functions
+
+XlaBuilder supports these element-wise unary functions:
+
+<b>`Abs(operand)`</b> Element-wise abs `x -> |x|`.
+
+<b>`Ceil(operand)`</b> Element-wise ceil `x -> ⌈x⌉`.
+
+<b>`Cos(operand)`</b> Element-wise cosine `x -> cos(x)`.
+
+<b>`Exp(operand)`</b> Element-wise natural exponential `x -> e^x`.
+
+<b>`Floor(operand)`</b> Element-wise floor `x -> ⌊x⌋`.
+
+<b>`IsFinite(operand)`</b> Tests whether each element of `operand` is finite,
+i.e., is not positive or negative infinity, and is not `NaN`. Returns an array
+of `PRED` values with the same shape as the input, where each element is `true`
+if and only if the corresponding input element is finite.
+
+<b>`Log(operand)`</b> Element-wise natural logarithm `x -> ln(x)`.
+
+<b>`LogicalNot(operand)`</b> Element-wise logical not `x -> !(x)`.
+
+<b>`Neg(operand)`</b> Element-wise negation `x -> -x`.
+
+<b>`Sign(operand)`</b> Element-wise sign operation `x -> sgn(x)` where
+
+$$\text{sgn}(x) = \begin{cases} -1 & x < 0\\ 0 & x = 0\\ 1 & x > 0 \end{cases}$$
+
+using the comparison operator of the element type of `operand`.
+
+<b>`Tanh(operand)`</b> Element-wise hyperbolic tangent `x -> tanh(x)`.
+
+
+Arguments | Type    | Semantics
+--------- | ------- | ---------------------------
+`operand` | `XlaOp` | The operand to the function
+
+The function is applied to each element in the `operand` array, resulting in an
+array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
+
+## Gather
+
+The XLA gather operation stitches together several slices (each slice at a
+potentially different runtime offset) of an input array.
+
+### General Semantics
+
+See also
+[`XlaBuilder::Gather`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+For a more intuitive description, see the "Informal Description" section below.
+
+<b> `gather(operand, start_indices, offset_dims, collapsed_slice_dims, slice_sizes, start_index_map)` </b>
+
+|Arguments         | Type                    | Semantics                       |
+|----------------- | ----------------------- | --------------------------------|
+|`operand`         | `XlaOp`                 | The array we’re gathering       |
+:                  :                         : from.                           :
+|`start_indices`   | `XlaOp`                 | Array containing the starting  |
+:                  :                         : indices of the slices we gather.:
+|`index_vector_dim` | `int64`                | The dimension in                |
+:                  :                         : `start_indices` that "contains" :
+:                  :                         : the starting indices.  See      :
+:                  :                         : below for a detailed            :
+:                  :                         : description.                    :
+|`offset_dims`     | `ArraySlice<int64>`     | The set of dimensions in  the   :
+:                  :                         : output shape that offset into a :
+:                  :                         : array sliced from operand.     :
+|`slice_sizes`     | `ArraySlice<int64>`      | `slice_sizes[i]` is the bounds |
+:                  :                          : for the slice on dimension `i`.:
+|`collapsed_slice_dims` | `ArraySlice<int64>` | The set of dimensions in each  :
+|                  :                          | slice that are collapsed away. :
+|                  :                          | These dimensions must have size:
+|                  :                          | 1.                             |
+|`start_index_map` | `ArraySlice<int64>`      | A map that describes how to map|
+:                  :                          : indices in `start_indices` to  :
+:                  :                          : to legal indices into operand. :
+
+For convenience, we label dimensions in the output array not in `offset_dims`
+as `batch_dims`.
+
+The output is an array of rank `batch_dims.size` + `operand.rank` -
+`collapsed_slice_dims`.size.
+
+If `index_vector_dim` is equal to `start_indices.rank` we implicitly consider
+`start_indices` to have a trailing `1` dimension (i.e. if `start_indices` was of
+shape `[6,7]` and `index_vector_dim` is `2` then we implicitly consider the
+shape of `start_indices` to be `[6,7,1]`).
+
+The bounds for the output array along dimension `i` is computed as follows:
+
+  1. If `i` is present in `batch_dims` (i.e. is equal to `batch_dims[k]` for
+     some `k`) then we pick the corresponding dimension bounds out of
+     `start_indices.shape`, skipping `index_vector_dim` (i.e. pick
+     `start_indices.shape.dims`[`k`] if `k` < `index_vector_dim` and
+     `start_indices.shape.dims`[`k`+`1`] otherwise).
+
+  2. If `i` is present in `offset_dims` (i.e. equal to `offset_dims`[`k`] for
+     some `k`) then we pick the corresponding bound out of `slice_sizes` after
+     accounting for `collapsed_slice_dims` (i.e. we pick
+     `adjusted_slice_sizes`[`k`] where `adjusted_slice_sizes` is `slice_sizes`
+     with the bounds at indices `collapsed_slice_dims` removed).
+
+Formally, the operand index `In` corresponding to an output index `Out` is
+computed as follows:
+
+  1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }.  Use `G` to slice out
+     vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where
+     Combine(A, b) inserts b at position `index_vector_dim` into A.  Note that
+     this is well defined even if `G` is empty -- if `G` is empty then `S` =
+     `start_indices`.
+
+  2. Create a starting index, `S`<sub>`in`</sub>, into `operand` using `S` by
+     scattering `S` using `start_index_map`.  More precisely:
+       1. `S`<sub>`in`</sub>[`start_index_map`[`k`]] = `S`[`k`] if `k` <
+          `start_index_map.size`.
+       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
+
+  3. Create an index `O`<sub>`in`</sub> into `operand` by scattering the indices
+     at the offset dimensions in `Out` according to the `collapsed_slice_dims`
+     set.  More precisely:
+       1. `O`<sub>`in`</sub>[`expand_offset_dims`(`k`)] =
+          `Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size`
+          (`expand_offset_dims` is defined below).
+       2. `O`<sub>`in`</sub>[`_`] = `0` otherwise.
+  4. `In` is `O`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
+     addition.
+
+`expand_offset_dims` is the monotonic function with domain [`0`, `offset.size`)
+and range [`0`, `operand.rank`) \ `collapsed_slice_dims`.  So if, e.g.,
+`offset.size` is `4`, `operand.rank` is `6` and `collapsed_slice_dims` is {`0`,
+`2`} then `expand_offset_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}.
+
+### Informal Description and Examples
+
+Informally, every index `Out` in the output array corresponds to an element `E`
+in the operand array, computed as follows:
+
+  - We use the batch dimensions in `Out` to look up a starting index from
+    `start_indices`.
+
+  - We use `start_index_map` to map the starting index (which may have size less
+    than operand.rank) to a "full" starting index into operand.
+
+  - We dynamic-slice out a slice with size `slice_sizes` using the full starting
+    index.
+
+  - We reshape the slice by collapsing the `collapsed_slice_dims` dimensions.
+    Since all collapsed slice dimensions have to have bound 1 this reshape is
+    always legal.
+
+  - We use the offset dimensions in `Out` to index into this slice to get the
+    input element, `E`, corresponding to output index `Out`.
+
+`index_vector_dim` is set to `start_indices.rank` - `1` in all of the
+examples that follow.  More interesting values for `index_vector_dim` does not
+change the operation fundamentally, but makes the visual representation more
+cumbersome.
+
+To get an intuition on how all of the above fits together, let's look at an
+example that gathers 5 slices of shape `[8,6]` from a `[16,11]` array.  The
+position of a slice into the `[16,11]` array can be represented as an index
+vector of shape `S64[2]`, so the set of 5 positions can be represented as a
+`S64[5,2]` array.
+
+The behavior of the gather operation can then be depicted as an index
+transformation that takes [`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>], an index in
+the output shape, and maps it to an element in the input array in the following
+way:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/ops_xla_gather_0.svg">
+</div>
+
+We first select an (`X`,`Y`) vector from the gather indices array using `G`.
+The element in the output array at index
+[`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>] is then the element in the input
+array at index [`X`+`O`<sub>`0`</sub>,`Y`+`O`<sub>`1`</sub>].
+
+`slice_sizes` is `[8,6]`, which decides the range of W<sub>`0`</sub> and
+W<sub>`1`</sub>, and this in turn decides the bounds of the slice.
+
+This gather operation acts as a batch dynamic slice with `G` as the batch
+dimension.
+
+The gather indices may be multidimensional.  For instance, a more general
+version of the example above using a "gather indices" array of shape `[4,5,2]`
+would translate indices like this:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/ops_xla_gather_1.svg">
+</div>
+
+Again, this acts as a batch dynamic slice `G`<sub>`0`</sub> and
+`G`<sub>`1`</sub> as the batch dimensions.  The slice size is still `[8,6]`.
+
+The gather operation in XLA generalizes the informal semantics outlined above in
+the following ways:
+
+ 1. We can configure which dimensions in the output shape are the offset
+    dimensions (dimensions containing `O`<sub>`0`</sub>, `O`<sub>`1`</sub> in
+    the last example).  The output batch dimensions (dimensions containing
+    `G`<sub>`0`</sub>, `G`<sub>`1`</sub> in the last example) are defined to be
+    the output dimensions that are not offset dimensions.
+
+ 2. The number of output offset dimensions explicitly present in the output
+    shape may be smaller than the input rank.  These "missing" dimensions, which
+    are listed explicitly as `collapsed_slice_dims`, must have a slice size of
+    `1`.  Since they have a slice size of `1` the only valid index for them is
+    `0` and eliding them does not introduce ambiguity.
+
+ 3. The slice extracted from the "Gather Indices" array ((`X`, `Y`) in the last
+    example) may have fewer elements than the input array rank, and an explicit
+    mapping dictates how the index should be expanded to have the same rank as
+    the input.
+
+As a final example, we use (2) and (3) to implement `tf.gather_nd`:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/ops_xla_gather_2.svg">
+</div>
+
+`G`<sub>`0`</sub> and `G`<sub>`1`</sub> are used to slice out a starting index
+from the gather indices array as usual, except the starting index has only one
+element, `X`.  Similarly, there is only one output offset index with the value
+`O`<sub>`0`</sub>.  However, before being used as indices into the input array,
+these are expanded in accordance to "Gather Index Mapping" (`start_index_map` in
+the formal description) and "Offset Mapping" (`expand_offset_dims` in the formal
+description) into [`0`,`O`<sub>`0`</sub>] and [`X`,`0`] respectively, adding up
+to [`X`,`O`<sub>`0`</sub>].  In other words, the output index
+[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`O`<sub>`0`</sub>] maps to the input index
+[`GatherIndices`[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`0`],`X`] which gives us
+the semantics for `tf.gather_nd`.
+
+`slice_sizes` for this case is `[1,11]`.  Intuitively this means that every
+index `X` in the gather indices array picks an entire row and the result is the
+concatenation of all these rows.
+
+## GetTupleElement
+
+See also
+[`XlaBuilder::GetTupleElement`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Indexes into a tuple with a compile-time-constant value.
+
+The value must be a compile-time-constant so that shape inference can determine
+the type of the resulting value.
+
+This is analogous to `std::get<int N>(t)` in C++. Conceptually:
+
+```
+let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+let s: s32 = 5;
+let t: (f32[10], s32) = tuple(v, s);
+let element_1: s32 = gettupleelement(t, 1);  // Inferred shape matches s32.
+```
+
+See also `tf.tuple`.
+
+## Infeed
+
+See also
+[`XlaBuilder::Infeed`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Infeed(shape)` </b>
+
+| Argument | Type    | Semantics                                             |
+| -------- | ------- | ----------------------------------------------------- |
+| `shape`  | `Shape` | Shape of the data read from the Infeed interface. The |
+:          :         : layout field of the shape must be set to match the    :
+:          :         : layout of the data sent to the device; otherwise its  :
+:          :         : behavior is undefined.                                :
+
+Reads a single data item from the implicit Infeed streaming interface of the
+device, interpreting the data as the given shape and its layout, and returns a
+`XlaOp` of the data. Multiple Infeed operations are allowed in a
+computation, but there must be a total order among the Infeed operations. For
+example, two Infeeds in the code below have a total order since there is a
+dependency between the while loops.
+
+```
+result1 = while (condition, init = init_value) {
+  Infeed(shape)
+}
+
+result2 = while (condition, init = result1) {
+  Infeed(shape)
+}
+```
+
+Nested tuple shapes are not supported. For an empty tuple shape, the Infeed
+operation is effectively a no-op and proceeds without reading any data from the
+Infeed of the device.
+
+> Note: We plan to allow multiple Infeed operations without a total order, in
+> which case the compiler will provide information about how the Infeed
+> operations are serialized in the compiled program.
+
+## Iota
+
+<b> `Iota()` </b>
+
+Builds a constant literal on device rather than a potentially large host
+transfer.  Creates a rank 1 tensor of values starting at zero and incrementing
+by one.
+
+Arguments          | Type            | Semantics
+------------------ | --------------- | ---------------------------
+`type`             | `PrimitiveType` | type U
+`size`             | `int64`         | The number of elements in the tensor.
+
+## Map
+
+See also
+[`XlaBuilder::Map`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Map(operands..., computation)` </b>
+
+| Arguments         | Type                   | Semantics                      |
+| ----------------- | ---------------------- | ------------------------------ |
+| `operands`        | sequence of N `XlaOp`s | N arrays of types T_0..T_{N-1} |
+| `computation`     | `XlaComputation`       | computation of type `T_0, T_1, |
+:                   :                        : ..., T_{N + M -1} -> S` with N :
+:                   :                        : parameters of type T and M of  :
+:                   :                        : arbitrary type                 :
+| `dimensions`      | `int64` array          | array of map dimensions        |
+
+Applies a scalar function over the given `operands` arrays, producing an array
+of the same dimensions where each element is the result of the mapped function
+applied to the corresponding elements in the input arrays.
+
+The mapped function is an arbitrary computation with the restriction that it has
+N inputs of scalar type `T` and a single output with type `S`. The output has
+the same dimensions as the operands except that the element type T is replaced
+with S.
+
+For example: `Map(op1, op2, op3, computation, par1)` maps `elem_out <-
+computation(elem1, elem2, elem3, par1)` at each (multi-dimensional) index in the
+input arrays to produce the output array.
+
+## Pad
+
+See also
+[`XlaBuilder::Pad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Pad(operand, padding_value, padding_config)` </b>
+
+| Arguments        | Type            | Semantics                               |
+| ---------------- | --------------- | --------------------------------------- |
+| `operand`        | `XlaOp`         | array of type `T`                       |
+| `padding_value`  | `XlaOp`         | scalar of type `T` to fill in the added |
+:                  :                 : padding                                 :
+| `padding_config` | `PaddingConfig` | padding amount on both edges (low,      |
+:                  :                 : high) and between the elements of each  :
+:                  :                 : dimension                               :
+
+Expands the given `operand` array by padding around the array as well as between
+the elements of the array with the given `padding_value`. `padding_config`
+specifies the amount of edge padding and the interior padding for each
+dimension.
+
+`PaddingConfig` is a repeated field of `PaddingConfigDimension`, which contains
+three fields for each dimension: `edge_padding_low`, `edge_padding_high`, and
+`interior_padding`. `edge_padding_low` and `edge_padding_high` specify the
+amount of padding added at the low-end (next to index 0) and the high-end (next
+to the highest index) of each dimension respectively. The amount of edge padding
+can be negative -- the absolute value of negative padding indicates the number
+of elements to remove from the specified dimension. `interior_padding` specifies
+the amount of padding added between any two elements in each dimension. Interior
+padding occurs logically before edge padding, so in the case of negative edge
+padding elements are removed from the interior-padded operand. This operation is
+a no-op if the edge padding pairs are all (0, 0) and the interior padding values
+are all 0. The figure below shows examples of different `edge_padding` and
+`interior_padding` values for a two-dimensional array.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="https://www.tensorflow.org/images/ops_pad.png">
+</div>
+
+## Recv
+
+See also
+[`XlaBuilder::Recv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Recv(shape, channel_handle)` </b>
+
+| Arguments        | Type            | Semantics                            |
+| ---------------- | --------------- | ------------------------------------ |
+| `shape`          | `Shape`         | shape of the data to receive         |
+| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair |
+
+Receives data of the given shape from a `Send` instruction in another
+computation that shares the same channel handle. Returns a
+XlaOp for the received data.
+
+The client API of `Recv` operation represents synchronous communication.
+However, the instruction is internally decomposed into 2 HLO instructions
+(`Recv` and `RecvDone`) to enable asynchronous data transfers. See also
+[`HloInstruction::CreateRecv` and `HloInstruction::CreateRecvDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
+
+<b>`Recv(const Shape& shape, int64 channel_id)`</b>
+
+Allocates resources required to receive data from a `Send` instruction with the
+same channel_id. Returns a context for the allocated resources, which is used
+by a following `RecvDone` instruction to wait for the completion of the data
+transfer. The context is a tuple of {receive buffer (shape), request identifier
+(U32)} and it can only be used by a `RecvDone` instruction.
+
+<b> `RecvDone(HloInstruction context)` </b>
+
+Given a context created by a `Recv` instruction, waits for the data transfer to
+complete and returns the received data.
+
+## Reduce
+
+See also
+[`XlaBuilder::Reduce`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Applies a reduction function to one or more arrays in parallel.
+
+<b> `Reduce(operands..., init_values..., computation, dimensions)` </b>
+
+Arguments     | Type                  | Semantics
+------------- | --------------------- | ---------------------------------------
+`operands`    | Sequence of N `XlaOp` | N arrays of types `T_0, ..., T_N`.
+`init_values` | Sequence of N `XlaOp` | N scalars of types `T_0, ..., T_N`.
+`computation` | `XlaComputation`      | computation of type
+              :                       : `T_0, ..., T_N, T_0, ..., T_N -> Collate(T_0, ..., T_N)`
+`dimensions`  | `int64` array         | unordered array of dimensions to reduce
+
+Where:
+* N is required to be greater or equal to 1.
+* All input arrays must have the same dimensions.
+* If `N = 1`, `Collate(T)` is `T`.
+* If `N > 1`, `Collate(T_0, ..., T_N)` is a tuple of `N` elements of type `T`.
+
+The output of the op is `Collate(Q_0, ..., Q_N)` where `Q_i` is an array of type
+`T_i`, the dimensions of which are described below.
+
+This operation reduces one or more dimensions of each input array into scalars.
+The rank of each returned array is `rank(operand) - len(dimensions)`.
+`init_value` is the initial value used for every reduction and may be inserted
+anywhere during computation by the back-end. In most cases, `init_value` is an
+identity of the reduction function (for example, 0 for addition). The applied
+`computation` is always passed the `init_value` on the left-hand side.
+
+The evaluation order of the reduction function is arbitrary and may be
+non-deterministic. Therefore, the reduction function should not be overly
+sensitive to reassociation.
+
+Some reduction functions like addition are not strictly associative for floats.
+However, if the range of the data is limited, floating-point addition is close
+enough to being associative for most practical uses. It is possible to conceive
+of some completely non-associative reductions, however, and these will produce
+incorrect or unpredictable results in XLA reductions.
+
+As an example, when reducing across one dimension in a single 1D array with
+values [10, 11, 12, 13], with reduction function `f` (this is `computation`)
+then that could be computed as
+
+`f(10, f(11, f(12, f(init_value, 13)))`
+
+but there are also many other possibilities, e.g.
+
+`f(init_value, f(f(10, f(init_value, 11)), f(f(init_value, 12), f(init_value, 13))))`
+
+The following is a rough pseudo-code example of how reduction could be
+implemented, using summation as the reduction computation with an initial value
+of 0.
+
+```python
+result_shape <- remove all dims in dimensions from operand_shape
+
+# Iterate over all elements in result_shape. The number of r's here is equal
+# to the rank of the result
+for r0 in range(result_shape[0]), r1 in range(result_shape[1]), ...:
+  # Initialize this result element
+  result[r0, r1...] <- 0
+
+  # Iterate over all the reduction dimensions
+  for d0 in range(dimensions[0]), d1 in range(dimensions[1]), ...:
+    # Increment the result element with the value of the operand's element.
+    # The index of the operand's element is constructed from all ri's and di's
+    # in the right order (by construction ri's and di's together index over the
+    # whole operand shape).
+    result[r0, r1...] += operand[ri... di]
+```
+
+Here's an example of reducing a 2D array (matrix). The shape has rank 2,
+dimension 0 of size 2 and dimension 1 of size 3:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_2d_matrix.png">
+</div>
+
+Results of reducing dimensions 0 or 1 with an "add" function:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_2d_matrix.png">
+</div>
+
+Note that both reduction results are 1D arrays. The diagram shows one as column
+and another as row just for visual convenience.
+
+For a more complex example, here is a 3D array. Its rank is 3, dimension 0 of
+size 4, dimension 1 of size 2 and dimension 2 of size 3. For simplicity, the
+values 1 to 6 are replicated across dimension 0.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_3d_matrix.png">
+</div>
+
+Similarly to the 2D example, we can reduce just one dimension. If we reduce
+dimension 0, for example, we get a rank-2 array where all values across
+dimension 0 were folded into a scalar:
+
+```text
+|  4   8  12 |
+| 16  20  24 |
+```
+
+If we reduce dimension 2, we also get a rank-2 array where all values across
+dimension 2 were folded into a scalar:
+
+```text
+| 6  15 |
+| 6  15 |
+| 6  15 |
+| 6  15 |
+```
+
+Note that the relative order between the remaining dimensions in the input is
+preserved in the output, but some dimensions may get assigned new numbers (since
+the rank changes).
+
+We can also reduce multiple dimensions. Add-reducing dimensions 0 and 1 produces
+the 1D array `| 20 28 36 |`.
+
+Reducing the 3D array over all its dimensions produces the scalar `84`.
+
+When `N > 1`, reduce function application is slightly more complex, as it is
+applied simultaneously to all inputs. For example, consider the following
+reduction function, which can be used to compute the max and the argmax of a
+a 1-D tensor in parallel:
+
+```
+f: (Float, Int, Float, Int) -> Float, Int
+f(max, argmax, value, index):
+  if value >= argmax:
+    return (value, index)
+  else:
+    return (max, argmax)
+```
+
+For 1-D Input arrays `V = Float[N], K = Int[N]`, and init values
+`I_V = Float, I_K =  Int`, the result `f_(N-1)` of reducing across the only
+input dimension is equivalent to the following recursive application:
+```
+f_0 = f(I_V, I_K, V_0, K_0)
+f_1 = f(f_0.first, f_0.second, V_1, K_1)
+...
+f_(N-1) = f(f_(N-2).first, f_(N-2).second, V_(N-1), K_(N-1))
+```
+
+Applying this reduction to an array of values, and an array of sequential
+indices (i.e. iota), will co-iterate over the arrays, and return a tuple
+containing the maximal value and the matching index.
+
+## ReducePrecision
+
+See also
+[`XlaBuilder::ReducePrecision`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Models the effect of converting floating-point values to a lower-precision
+format (such as IEEE-FP16) and back to the original format.  The number of
+exponent and mantissa bits in the lower-precision format can be specified
+arbitrarily, although all bit sizes may not be supported on all hardware
+implementations.
+
+<b> `ReducePrecision(operand, mantissa_bits, exponent_bits)` </b>
+
+Arguments       | Type    | Semantics
+--------------- | ------- | -------------------------------------------------
+`operand`       | `XlaOp` | array of floating-point type `T`.
+`exponent_bits` | `int32` | number of exponent bits in lower-precision format
+`mantissa_bits` | `int32` | number of mantissa bits in lower-precision format
+
+The result is an array of type `T`.  The input values are rounded to the nearest
+value representable with the given number of mantissa bits (using "ties to even"
+semantics), and any values that exceed the range specified by the number of
+exponent bits are clamped to positive or negative infinity.  `NaN` values are
+retained, although they may be converted to canonical `NaN` values.
+
+The lower-precision format must have at least one exponent bit (in order to
+distinguish a zero value from an infinity, since both have a zero mantissa), and
+must have a non-negative number of mantissa bits.  The number of exponent or
+mantissa bits may exceed the corresponding value for type `T`; the corresponding
+portion of the conversion is then simply a no-op.
+
+## ReduceWindow
+
+See also
+[`XlaBuilder::ReduceWindow`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Applies a reduction function to all elements in each window of the input
+multi-dimensional array, producing an output multi-dimensional array with the
+same number of elements as the number of valid positions of the window. A
+pooling layer can be expressed as a `ReduceWindow`. Similar to
+[`Reduce`](#reduce), the applied `computation` is always passed the `init_value`
+on the left-hand side.
+
+<b> `ReduceWindow(operand, init_value, computation, window_dimensions,
+window_strides, padding)` </b>
+
+| Arguments           | Type                | Semantics                        |
+| ------------------- | ------------------- | -------------------------------- |
+| `operand`           | `XlaOp`             | N dimensional array containing   |
+:                     :                     : elements of type T. This is the  :
+:                     :                     : base area on which the window is :
+:                     :                     : placed.                          :
+| `init_value`        | `XlaOp`             | Starting value for the           |
+:                     :                     : reduction. See [Reduce](#reduce) :
+:                     :                     : for details.                     :
+| `computation`       | `XlaComputation`    | Reduction function of type `T, T |
+:                     :                     : -> T`, to apply to all elements  :
+:                     :                     : in each window                   :
+| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : dimension values                 :
+| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : stride values                    :
+| `padding`           | `Padding`           | padding type for window          |
+:                     :                     : (Padding\:\:kSame or             :
+:                     :                     : Padding\:\:kValid)               :
+
+Below code and figure shows an example of using `ReduceWindow`. Input is a
+matrix of size [4x6] and both window_dimensions and window_stride_dimensions are
+[2x3].
+
+```
+// Create a computation for the reduction (maximum).
+XlaComputation max;
+{
+  XlaBuilder builder(client_, "max");
+  auto y = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "y");
+  auto x = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "x");
+  builder.Max(y, x);
+  max = builder.Build().ConsumeValueOrDie();
+}
+
+// Create a ReduceWindow computation with the max reduction computation.
+XlaBuilder builder(client_, "reduce_window_2x3");
+auto shape = ShapeUtil::MakeShape(F32, {4, 6});
+auto input = builder.Parameter(0, shape, "input");
+builder.ReduceWindow(
+    input, *max,
+    /*init_val=*/builder.ConstantLiteral(LiteralUtil::MinValue(F32)),
+    /*window_dimensions=*/{2, 3},
+    /*window_stride_dimensions=*/{2, 3},
+    Padding::kValid);
+```
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_window.png">
+</div>
+
+Stride of 1 in a dimension specifies that the position of a window in the
+dimension is 1 element away from its adjacent window. In order to specify that
+no windows overlap with each other, window_stride_dimensions should be equal to
+window_dimensions. The figure below illustrates the use of two different stride
+values. Padding is applied to each dimension of the input and the calculations
+are the same as though the input came in with the dimensions it has after
+padding.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:75%" src="https://www.tensorflow.org/images/ops_reduce_window_stride.png">
+</div>
+
+The evaluation order of the reduction function is arbitrary and may be
+non-deterministic. Therefore, the reduction function should not be overly
+sensitive to reassociation. See the discussion about associativity in the
+context of [`Reduce`](#reduce) for more details.
+
+## Reshape
+
+See also
+[`XlaBuilder::Reshape`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and the [`Collapse`](#collapse) operation.
+
+Reshapes the dimensions of an array into a new configuration.
+
+<b> `Reshape(operand, new_sizes)` </b>
+<b> `Reshape(operand, dimensions, new_sizes)` </b>
+
+Arguments    | Type           | Semantics
+------------ | -------------- | ---------------------------------------
+`operand`    | `XlaOp`        | array of type T
+`dimensions` | `int64` vector | order in which dimensions are collapsed
+`new_sizes`  | `int64` vector | vector of sizes of new dimensions
+
+Conceptually, reshape first flattens an array into a one-dimensional vector of
+data values, and then refines this vector into a new shape. The input arguments
+are an arbitrary array of type T, a compile-time-constant vector of dimension
+indices, and a compile-time-constant vector of dimension sizes for the result.
+The values in the `dimension` vector, if given, must be a permutation of all of
+T's dimensions; the default if not given is `{0, ..., rank - 1}`. The order of
+the dimensions in `dimensions` is from slowest-varying dimension (most major) to
+fastest-varying dimension (most minor) in the loop nest which collapses the
+input array into a single dimension. The `new_sizes` vector determines the size
+of the output array. The value at index 0 in `new_sizes` is the size of
+dimension 0, the value at index 1 is the size of dimension 1, and so on. The
+product of the `new_size` dimensions must equal the product of the operand's
+dimension sizes. When refining the collapsed array into the multidimensional
+array defined by `new_sizes`, the dimensions in `new_sizes` are ordered from
+slowest varying (most major) and to fastest varying (most minor).
+
+For example, let v be an array of 24 elements:
+
+```
+let v = f32[4x2x3] {{{10, 11, 12}, {15, 16, 17}},
+                    {{20, 21, 22}, {25, 26, 27}},
+                    {{30, 31, 32}, {35, 36, 37}},
+                    {{40, 41, 42}, {45, 46, 47}}};
+
+In-order collapse:
+let v012_24 = Reshape(v, {0,1,2}, {24});
+then v012_24 == f32[24] {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
+                         30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47};
+
+let v012_83 = Reshape(v, {0,1,2}, {8,3});
+then v012_83 == f32[8x3] {{10, 11, 12}, {15, 16, 17},
+                          {20, 21, 22}, {25, 26, 27},
+                          {30, 31, 32}, {35, 36, 37},
+                          {40, 41, 42}, {45, 46, 47}};
+
+Out-of-order collapse:
+let v021_24 = Reshape(v, {1,2,0}, {24});
+then v012_24 == f32[24]  {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
+                          15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47};
+
+let v021_83 = Reshape(v, {1,2,0}, {8,3});
+then v021_83 == f32[8x3] {{10, 20, 30}, {40, 11, 21},
+                          {31, 41, 12}, {22, 32, 42},
+                          {15, 25, 35}, {45, 16, 26},
+                          {36, 46, 17}, {27, 37, 47}};
+
+
+let v021_262 = Reshape(v, {1,2,0}, {2,6,2});
+then v021_262 == f32[2x6x2] {{{10, 20}, {30, 40},
+                              {11, 21}, {31, 41},
+                              {12, 22}, {32, 42}},
+                             {{15, 25}, {35, 45},
+                              {16, 26}, {36, 46},
+                              {17, 27}, {37, 47}}};
+```
+
+As a special case, reshape can transform a single-element array to a scalar and
+vice versa. For example,
+
+```
+Reshape(f32[1x1] {{5}}, {0,1}, {}) == 5;
+Reshape(5, {}, {1,1}) == f32[1x1] {{5}};
+```
+
+## Rev (reverse)
+
+See also
+[`XlaBuilder::Rev`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b>`Rev(operand, dimensions)`</b>
+
+Arguments    | Type                | Semantics
+------------ | ------------------- | ---------------------
+`operand`    | `XlaOp`             | array of type T
+`dimensions` | `ArraySlice<int64>` | dimensions to reverse
+
+Reverses the order of elements in the `operand` array along the specified
+`dimensions`, generating an output array of the same shape. Each element of the
+operand array at a multidimensional index is stored into the output array at a
+transformed index. The multidimensional index is transformed by reversing the
+index in each dimension to be reversed (i.e., if a dimension of size N is one of
+the reversing dimensions, its index i is transformed into N - 1 - i).
+
+One use for the `Rev` operation is to reverse the convolution weight array along
+the two window dimensions during the gradient computation in neural networks.
+
+## RngNormal
+
+See also
+[`XlaBuilder::RngNormal`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Constructs an output of a given shape with random numbers generated following
+the $$N(\mu, \sigma)$$ normal distribution. The parameters $$\mu$$ and
+$$\sigma$$, and output shape have to have a floating point elemental type. The
+parameters furthermore have to be scalar valued.
+
+<b>`RngNormal(mu, sigma, shape)`</b>
+
+| Arguments | Type    | Semantics                                           |
+| --------- | ------- | --------------------------------------------------- |
+| `mu`      | `XlaOp` | Scalar of type T specifying mean of generated       |
+:           :         : numbers                                   :
+| `sigma`   | `XlaOp` | Scalar of type T specifying standard deviation of   |
+:           :         : generated numbers                                   :
+| `shape`   | `Shape` | Output shape of type T                              |
+
+## RngUniform
+
+See also
+[`XlaBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Constructs an output of a given shape with random numbers generated following
+the uniform distribution over the interval $$[a,b)$$. The parameters and output
+element type have to be a boolean type, an integral type or a floating point
+types, and the types have to be consistent. The CPU and GPU backends currently
+only support F64, F32, F16, BF16, S64, U64, S32 and U32. Furthermore, the
+parameters need to be scalar valued. If $$b <= a$$ the result is
+implementation-defined.
+
+<b>`RngUniform(a, b, shape)`</b>
+
+| Arguments | Type                    | Semantics                         |
+| --------- | ----------------------- | --------------------------------- |
+| `a`       | `XlaOp`                 | Scalar of type T specifying lower |
+:           :                         : limit of interval                 :
+| `b`       | `XlaOp`                 | Scalar of type T specifying upper |
+:           :                         : limit of interval                 :
+| `shape`   | `Shape`                 | Output shape of type T            |
+
+## Scatter
+
+The XLA scatter operation generates a result which is the value of the input
+tensor `operand`, with several slices (at indices specified by
+`scatter_indices`) updated with the values in `updates` using
+`update_computation`.
+
+See also
+[`XlaBuilder::Scatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `scatter(operand, scatter_indices, updates, update_computation, index_vector_dim, update_window_dims, inserted_window_dims, scatter_dims_to_operand_dims)` </b>
+
+|Arguments         | Type                   | Semantics                        |
+|------------------|------------------------|----------------------------------|
+|`operand`         | `XlaOp`                | Tensor to be scattered into.     |
+|`scatter_indices` | `XlaOp`                | Tensor containing the starting   |
+:                  :                        : indices of the slices that must  :
+:                  :                        : be scattered to.                 :
+|`updates`         | `XlaOp`                | Tensor containing the values that|
+:                  :                        : must be used for scattering.     :
+|`update_computation`| `XlaComputation`     | Computation to be used for       |
+:                  :                        : combining the existing values in :
+:                  :                        : the input tensor and the updates :
+:                  :                        : during scatter. This computation :
+:                  :                        : should be of type `T, T -> T`.   :
+|`index_vector_dim`| `int64`                | The dimension in                 |
+:                  :                        : `scatter_indices` that contains  :
+:                  :                        : the starting indices.            :
+|`update_window_dims`| `ArraySlice<int64>`  | The set of dimensions in         |
+:                  :                        : `updates` shape that are _window :
+:                  :                        : dimensions_.                     :
+|`inserted_window_dims`| `ArraySlice<int64>`| The set of _window dimensions_   |
+:                  :                        : that must be inserted into       :
+:                  :                        : `updates` shape.                 :
+|`scatter_dims_to_operand_dims`| `ArraySlice<int64>`  | A dimensions map from  |
+:                  :                        : the scatter indices to the       :
+:                  :                        : operand index space. This array  :
+:                  :                        : is interpreted as mapping `i` to :
+:                  :                        : `scatter_dims_to_operand_dims[i]`:
+:                  :                        : . It has to be one-to-one and    :
+:                  :                        : total.                           :
+
+If `index_vector_dim` is equal to `scatter_indices.rank` we implicitly consider
+`scatter_indices` to have a trailing `1` dimension.
+
+We define `update_scatter_dims` of type `ArraySlice<int64>` as the set of
+dimensions in `updates` shape that are not in `update_window_dims`, in ascending
+order.
+
+The arguments of scatter should follow these constraints:
+
+  - `updates` tensor must be of rank `update_window_dims.size +
+  scatter_indices.rank - 1`.
+
+  - Bounds of dimension `i` in `updates` must conform to the following:
+      - If `i` is present in `update_window_dims` (i.e. equal to
+        `update_window_dims`[`k`] for some `k`), then the bound of dimension
+        `i` in `updates` must not exceed the corresponding bound of `operand`
+        after accounting for the `inserted_window_dims` (i.e.
+        `adjusted_window_bounds`[`k`], where `adjusted_window_bounds` contains
+        the bounds of `operand` with the bounds at indices
+        `inserted_window_dims` removed).
+      - If `i` is present in `update_scatter_dims` (i.e. equal to
+        `update_scatter_dims`[`k`] for some `k`), then the bound of dimension
+        `i` in `updates` must be equal to the corresponding bound of
+        `scatter_indices`, skipping `index_vector_dim` (i.e.
+        `scatter_indices.shape.dims`[`k`], if `k` < `index_vector_dim` and
+        `scatter_indices.shape.dims`[`k+1`] otherwise).
+
+  - `update_window_dims` must be in ascending order, not have any repeating
+    dimension numbers, and be in the range `[0, updates.rank)`.
+
+  - `inserted_window_dims` must be in ascending order, not have any
+    repeating dimension numbers, and be in the range `[0, operand.rank)`.
+
+  - `scatter_dims_to_operand_dims.size` must be equal to
+    `scatter_indices`[`index_vector_dim`], and its values must be in the range
+    `[0, operand.rank)`.
+
+For a given index `U` in the `updates` tensor, the corresponding index `I` in
+the `operand` tensor into which this update has to be applied is computed as
+follows:
+
+  1. Let `G` = { `U`[`k`] for `k` in `update_scatter_dims` }. Use `G` to look up
+     an index vector `S` in the `scatter_indices` tensor such that `S`[`i`] =
+     `scatter_indices`[Combine(`G`, `i`)] where Combine(A, b) inserts b at
+     positions `index_vector_dim` into A.
+  2. Create an index `S`<sub>`in`</sub> into `operand` using `S` by scattering
+     `S` using the `scatter_dims_to_operand_dims` map. More formally:
+       1. `S`<sub>`in`</sub>[`scatter_dims_to_operand_dims`[`k`]] = `S`[`k`] if
+          `k` < `scatter_dims_to_operand_dims.size`.
+       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
+  3. Create an index `W`<sub>`in`</sub> into `operand` by scattering the indices
+     at `update_window_dims` in `U` according to `inserted_window_dims`.
+     More formally:
+       1. `W`<sub>`in`</sub>[`window_dims_to_operand_dims`(`k`)] = `U`[`k`] if
+          `k` < `update_window_dims.size`, where `window_dims_to_operand_dims`
+          is the monotonic function with domain [`0`, `update_window_dims.size`)
+          and range [`0`, `operand.rank`) \\ `inserted_window_dims`. (For
+          example, if `update_window_dims.size` is `4`, `operand.rank` is `6`,
+          and `inserted_window_dims` is {`0`, `2`} then
+          `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`,
+          `3`→`5`}).
+       2. `W`<sub>`in`</sub>[`_`] = `0` otherwise.
+  4. `I` is `W`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
+     addition.
+
+In summary, the scatter operation can be defined as follows.
+
+   - Initialize `output` with `operand`, i.e. for all indices `O` in the
+     `operand` tensor:\
+       `output`[`O`] = `operand`[`O`]
+   - For every index `U` in the `updates` tensor and the corresponding index `O`
+     in the `operand` tensor:\
+       `output`[`O`] = `update_computation`(`output`[`O`], `updates`[`U`])
+
+The order in which updates are applied is non-deterministic. So, when multiple
+indices in `updates` refer to the same index in `operand`, the corresponding
+value in `output` will be non-deterministic.
+
+Note that the first parameter that is passed into the `update_computation` will
+always be the current value from the `output` tensor and the second parameter
+will always be the value from the `updates` tensor. This is important
+specifically for cases when the `update_computation` is _not commutative_.
+
+Informally, the scatter op can be viewed as an _inverse_ of the gather op, i.e.
+the scatter op updates the elements in the input that are extracted by the
+corresponding gather op.
+
+For a detailed informal description and examples, refer to the
+"Informal Description" section under `Gather`.
+
+## Select
+
+See also
+[`XlaBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Constructs an output array from elements of two input arrays, based on the
+values of a predicate array.
+
+<b> `Select(pred, on_true, on_false)` </b>
+
+Arguments  | Type    | Semantics
+---------- | ------- | ------------------
+`pred`     | `XlaOp` | array of type PRED
+`on_true`  | `XlaOp` | array of type T
+`on_false` | `XlaOp` | array of type T
+
+The arrays `on_true` and `on_false` must have the same shape. This is also the
+shape of the output array. The array `pred` must have the same dimensionality as
+`on_true` and `on_false`, with the `PRED` element type.
+
+For each element `P` of `pred`, the corresponding element of the output array is
+taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
+value of `P` is `false`. As a restricted form of [broadcasting]
+(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
+output array is taken wholly from `on_true` if `pred` is `true`, and from
+`on_false` if `pred` is `false`.
+
+Example with non-scalar `pred`:
+
+```
+let pred: PRED[4] = {true, false, false, true};
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
+```
+
+Example with scalar `pred`:
+
+```
+let pred: PRED = true;
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
+```
+
+Selections between tuples are supported. Tuples are considered to be scalar
+types for this purpose. If `on_true` and `on_false` are tuples (which must have
+the same shape!) then `pred` has to be a scalar of type `PRED`.
+
+## SelectAndScatter
+
+See also
+[`XlaBuilder::SelectAndScatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+This operation can be considered as a composite operation that first computes
+`ReduceWindow` on the `operand` array to select an element from each window, and
+then scatters the `source` array to the indices of the selected elements to
+construct an output array with the same shape as the operand array. The binary
+`select` function is used to select an element from each window by applying it
+across each window, and it is called with the property that the first
+parameter's index vector is lexicographically less than the second parameter's
+index vector. The `select` function returns `true` if the first parameter is
+selected and returns `false` if the second parameter is selected, and the
+function must hold transitivity (i.e., if `select(a, b)` and `select(b, c)` are
+`true`, then `select(a, c)` is also `true`) so that the selected element does
+not depend on the order of the elements traversed for a given window.
+
+The function `scatter` is applied at each selected index in the output array. It
+takes two scalar parameters:
+
+1.  Current value at the selected index in the output array
+2.  The scatter value from `source` that applies to the selected index
+
+It combines the two parameters and returns a scalar value that's used to update
+the value at the selected index in the output array. Initially, all indices of
+the output array are set to `init_value`.
+
+The output array has the same shape as the `operand` array and the `source`
+array must have the same shape as the result of applying a `ReduceWindow`
+operation on the `operand` array. `SelectAndScatter` can be used to
+backpropagate the gradient values for a pooling layer in a neural network.
+
+<b>`SelectAndScatter(operand, select, window_dimensions, window_strides,
+padding, source, init_value, scatter)`</b>
+
+| Arguments           | Type                | Semantics                        |
+| ------------------- | ------------------- | -------------------------------- |
+| `operand`           | `XlaOp`             | array of type T over which the   |
+:                     :                     : windows slide                    :
+| `select`            | `XlaComputation`    | binary computation of type `T, T |
+:                     :                     : -> PRED`, to apply to all        :
+:                     :                     : elements in each window; returns :
+:                     :                     : `true` if the first parameter is :
+:                     :                     : selected and returns `false` if  :
+:                     :                     : the second parameter is selected :
+| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : dimension values                 :
+| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : stride values                    :
+| `padding`           | `Padding`           | padding type for window          |
+:                     :                     : (Padding\:\:kSame or             :
+:                     :                     : Padding\:\:kValid)               :
+| `source`            | `XlaOp`             | array of type T with the values  |
+:                     :                     : to scatter                       :
+| `init_value`        | `XlaOp`             | scalar value of type T for the   |
+:                     :                     : initial value of the output      :
+:                     :                     : array                            :
+| `scatter`           | `XlaComputation`    | binary computation of type `T, T |
+:                     :                     : -> T`, to apply each scatter     :
+:                     :                     : source element with its          :
+:                     :                     : destination element              :
+
+The figure below shows examples of using `SelectAndScatter`, with the `select`
+function computing the maximal value among its parameters. Note that when the
+windows overlap, as in the figure (2) below, an index of the `operand` array may
+be selected multiple times by different windows. In the figure, the element of
+value 9 is selected by both of the top windows (blue and red) and the binary
+addition `scatter` function produces the output element of value 8 (2 + 6).
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%"
+    src="https://www.tensorflow.org/images/ops_scatter_to_selected_window_element.png">
+</div>
+
+The evaluation order of the `scatter` function is arbitrary and may be
+non-deterministic. Therefore, the `scatter` function should not be overly
+sensitive to reassociation. See the discussion about associativity in the
+context of [`Reduce`](#reduce) for more details.
+
+## Send
+
+See also
+[`XlaBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Send(operand, channel_handle)` </b>
+
+Arguments        | Type            | Semantics
+---------------- | --------------- | -----------------------------------------
+`operand`        | `XlaOp`         | data to send (array of type T)
+`channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair
+
+Sends the given operand data to a `Recv` instruction in another computation
+that shares the same channel handle. Does not return any data.
+
+Similar to the `Recv` operation, the client API of `Send` operation represents
+synchronous communication, and is internally decomposed into 2 HLO instructions
+(`Send` and `SendDone`) to enable asynchronous data transfers. See also
+[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
+
+<b>`Send(HloInstruction operand, int64 channel_id)`</b>
+
+Initiates an asynchronous transfer of the operand to the resources allocated by
+the `Recv` instruction with the same channel id. Returns a context, which is
+used by a following `SendDone` instruction to wait for the completion of the
+data transfer. The context is a tuple of {operand (shape), request identifier
+(U32)} and it can only be used by a `SendDone` instruction.
+
+<b> `SendDone(HloInstruction context)` </b>
+
+Given a context created by a `Send` instruction, waits for the data transfer to
+complete.  The instruction does not return any data.
+
+<b> Scheduling of channel instructions </b>
+
+The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
+`Send`, `SendDone`) is as below.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:70%" src="../../images/send_recv_order.png">
+</div>
+
+* `Recv` happens before `Send`
+* `Send` happens before `RecvDone`
+* `Recv` happens before `RecvDone`
+* `Send` happens before `SendDone`
+
+When the backend compilers generate a linear schedule for each computation that
+communicates via channel instructions, there must not be cycles across the
+computations. For example, below schedules lead to deadlocks.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/send_recv_schedule.png">
+</div>
+
+## Slice
+
+See also
+[`XlaBuilder::Slice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Slicing extracts a sub-array from the input array. The sub-array is of the same
+rank as the input and contains the values inside a bounding box within the input
+array where the dimensions and indices of the bounding box are given as
+arguments to the slice operation.
+
+<b> `Slice(operand, start_indices, limit_indices)` </b>
+
+| Arguments       | Type                | Semantics                            |
+| --------------- | ------------------- | ------------------------------------ |
+| `operand`       | `XlaOp`             | N dimensional array of type T        |
+| `start_indices` | `ArraySlice<int64>` | List of N integers containing the    |
+:                 :                     : starting indices of the slice for    :
+:                 :                     : each dimension. Values must be       :
+:                 :                     : greater than or equal to zero.       :
+| `limit_indices` | `ArraySlice<int64>` | List of N integers containing the    |
+:                 :                     : ending indices (exclusive) for the   :
+:                 :                     : slice for each dimension. Each value :
+:                 :                     : must be greater than or equal to the :
+:                 :                     : respective `start_indices` value for :
+:                 :                     : the dimension and less than or equal :
+:                 :                     : to the size of the dimension.        :
+
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+Slice(a, {2}, {4}) produces:
+  {2.0, 3.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+
+Slice(b, {2, 1}, {4, 3}) produces:
+  { { 7.0,  8.0},
+    {10.0, 11.0} }
+```
+
+## Sort
+
+See also
+[`XlaBuilder::Sort`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+There are two versions of the Sort instruction: a single-operand and a
+two-operand version.
+
+<b>`Sort(operand)`</b>
+
+Arguments   | Type    | Semantics
+----------- | ------- | --------------------
+`operand`   | `XlaOp` | The operand to sort.
+`dimension` | `int64` | The dimension along which to sort.
+
+Sorts the elements in the operand in ascending order along the provided
+dimension. For example, for a rank-2 (matrix) operand, a `dimension` value of 0
+will sort each column independently, and a `dimension` value of 1 will sort each
+row independently. If the operand's elements have floating point type, and the
+operand contains NaN elements, the order of elements in the output is
+implementation-defined.
+
+<b>`Sort(key, value)`</b>
+
+Sorts both the key and the value operands. The keys are sorted as in the
+single-operand version. The values are sorted according to the order of their
+corresponding keys. For example, if the inputs are `keys = [3, 1]` and
+`values = [42, 50]`, then the output of the sort is the tuple 
+`{[1, 3], [50, 42]}`.
+
+The sort is not guaranteed to be stable, that is, if the keys array contains
+duplicates, the order of their corresponding values may not be preserved.
+
+Arguments   | Type    | Semantics
+----------- | ------- | -------------------
+`keys`      | `XlaOp` | The sort keys.
+`values`    | `XlaOp` | The values to sort.
+`dimension` | `int64` | The dimension along which to sort.
+
+The `keys` and `values` must have the same dimensions, but may have different
+element types.
+
+## Transpose
+
+See also the `tf.reshape` operation.
+
+<b>`Transpose(operand)`</b>
+
+Arguments     | Type                | Semantics
+------------- | ------------------- | ------------------------------
+`operand`     | `XlaOp`             | The operand to transpose.
+`permutation` | `ArraySlice<int64>` | How to permute the dimensions.
+
+
+Permutes the operand dimensions with the given permutation, so
+`∀ i . 0 ≤ i < rank ⇒ input_dimensions[permutation[i]] = output_dimensions[i]`.
+
+This is the same as Reshape(operand, permutation,
+                            Permute(permutation, operand.shape.dimensions)).
+
+## Tuple
+
+See also
+[`XlaBuilder::Tuple`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+A tuple containing a variable number of data handles, each of which has its own
+shape.
+
+This is analogous to `std::tuple` in C++. Conceptually:
+
+```
+let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+let s: s32 = 5;
+let t: (f32[10], s32) = tuple(v, s);
+```
+
+Tuples can be deconstructed (accessed) via the [`GetTupleElement`]
+(#gettupleelement) operation.
+
+## While
+
+See also
+[`XlaBuilder::While`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `While(condition, body, init)` </b>
+
+| Arguments   | Type             | Semantics                                |
+| ----------- | ---------------- | ---------------------------------------- |
+| `condition` | `XlaComputation` | XlaComputation of type `T -> PRED` which |
+:             :                  : defines the termination condition of the :
+:             :                  : loop.                                    :
+| `body`      | `XlaComputation` | XlaComputation of type `T -> T` which    |
+:             :                  : defines the body of the loop.            :
+| `init`      | `T`              | Initial value for the parameter of       |
+:             :                  : `condition` and `body`.                  :
+
+Sequentially executes the `body` until the `condition` fails. This is similar to
+a typical while loop in many other languages except for the differences and
+restrictions listed below.
+
+*   A `While` node returns a value of type `T`, which is the result from the
+    last execution of the `body`.
+*   The shape of the type `T` is statically determined and must be the same
+    across all iterations.
+
+The T parameters of the computations are initialized with the `init` value in
+the first iteration and are automatically updated to the new result from `body`
+in each subsequent iteration.
+
+One main use case of the `While` node is to implement the repeated execution of
+training in neural networks. Simplified pseudocode is shown below with a graph
+that represents the computation. The code can be found in
+[`while_test.cc`](https://www.tensorflow.org/code/tensorflow/compiler/xla/tests/while_test.cc).
+The type `T` in this example is a `Tuple` consisting of an `int32` for the
+iteration count and a `vector[10]` for the accumulator. For 1000 iterations, the
+loop keeps adding a constant vector to the accumulator.
+
+```
+// Pseudocode for the computation.
+init = {0, zero_vector[10]} // Tuple of int32 and float[10].
+result = init;
+while (result(0) < 1000) {
+  iteration = result(0) + 1;
+  new_vector = result(1) + constant_vector[10];
+  result = {iteration, new_vector};
+}
+```
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="https://www.tensorflow.org/images/ops_while.png">
+</div>
diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD
index 2a858b4fd6..1a53f24177 100644
--- a/tensorflow/tools/docs/BUILD
+++ b/tensorflow/tools/docs/BUILD
@@ -127,7 +127,6 @@ py_test(
     name = "build_docs_test",
     size = "small",
     srcs = ["build_docs_test.py"],
-    data = ["//tensorflow/docs_src"],
     srcs_version = "PY2AND3",
     tags = [
         # No reason to run sanitizers or fastbuild for this test.
diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py
index 0cbf8b478f..4d3bedda2d 100644
--- a/tensorflow/tools/docs/build_docs_test.py
+++ b/tensorflow/tools/docs/build_docs_test.py
@@ -30,9 +30,11 @@ from tensorflow.tools.docs import generate_lib
 
 class Flags(object):
   resource_root = resource_loader.get_root_dir_with_all_resources()
-  src_dir = os.path.join(resource_root, 'tensorflow/docs_src')
+  src_dir = os.path.join(googletest.GetTempDir(), 'input')
+  os.mkdir(src_dir)
   base_dir = os.path.join(resource_root, 'tensorflow/')
-  output_dir = googletest.GetTempDir()
+  output_dir = os.path.join(googletest.GetTempDir(), 'output')
+  os.mkdir(output_dir)
 
 
 class BuildDocsTest(googletest.TestCase):
-- 
GitLab


From df7221d84988e5f7c1cc2775d8f5f44ffdd5918b Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 14:39:31 -0700
Subject: [PATCH 0970/1357] Drop external control dependencies in tfe.defun.

They shouldn't help given the automatic control dependencies, and are tricky
to capture in the general case.

PiperOrigin-RevId: 215282837
---
 tensorflow/python/eager/function.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 3b6f288fb9..f261d92d64 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -269,6 +269,15 @@ class FuncGraph(ops.Graph):
   def variables(self, var_list):
     self._weak_variables = [weakref.ref(v) for v in var_list]
 
+  def control_dependencies(self, control_inputs):
+    # Drop control dependencies to outside of the graph. TODO(b/117109273)
+    # unclear how to capture an op, not a tensor.
+    if not control_inputs:
+      return super(FuncGraph, self).control_dependencies(control_inputs)
+    return super(FuncGraph, self).control_dependencies(
+        [c for c in control_inputs
+         if getattr(c, "graph", None) is self])
+
   def create_op(
       self,
       op_type,
-- 
GitLab


From 9084e999b3caf65833f9651c72bc09eb3094eba5 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Mon, 1 Oct 2018 15:08:25 -0700
Subject: [PATCH 0971/1357] Don't run initialize ops if it's empty. Fixes a bug
 when using the profiler.

PiperOrigin-RevId: 215287936
---
 tensorflow/python/training/session_manager.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py
index 5e4749f306..cd313c2ce0 100644
--- a/tensorflow/python/training/session_manager.py
+++ b/tensorflow/python/training/session_manager.py
@@ -184,9 +184,11 @@ class SessionManager(object):
     self._target = master
     sess = session.Session(self._target, graph=self._graph, config=config)
     # TODO(jhseu): Delete once tpu.initialize_system() goes away.
-    sess.run(
+    initialize_ops = (
         distribution_strategy_context.get_distribution_strategy().initialize()
     )
+    if initialize_ops:
+      sess.run(initialize_ops)
 
     if checkpoint_dir and checkpoint_filename_with_path:
       raise ValueError("Can not provide both checkpoint_dir and "
-- 
GitLab


From c7237e6070dbf4acd1ade5a40dc676418cbd889b Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 1 Oct 2018 15:10:19 -0700
Subject: [PATCH 0972/1357] Don't generate backward function and delete when
 its not necessary

PiperOrigin-RevId: 215288224
---
 tensorflow/c/eager/tape.h                 | 7 +++----
 tensorflow/python/eager/pywrap_tfe_src.cc | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 41b5b8ff36..5ba55a203f 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -130,7 +130,7 @@ class GradientTape {
       const string& op_type, std::vector<TapeTensor>& output_tensors,
       gtl::ArraySlice<int64> input_tensor_id,
       gtl::ArraySlice<tensorflow::DataType> input_dtypes,
-      BackwardFunction* backward_function,
+      const std::function<BackwardFunction*()>& backward_function_getter,
       const std::function<void(BackwardFunction*)>& backward_function_deleter);
 
   void DeleteTrace(int64 tensor_id);
@@ -206,10 +206,9 @@ void GradientTape<Gradient, BackwardFunction, TapeTensor>::RecordOperation(
     const string& op_type, std::vector<TapeTensor>& output_tensors,
     gtl::ArraySlice<int64> input_tensor_id,
     gtl::ArraySlice<tensorflow::DataType> input_dtypes,
-    BackwardFunction* backward_function,
+    const std::function<BackwardFunction*()>& backward_function_getter,
     const std::function<void(BackwardFunction*)>& backward_function_deleter) {
   if (!ShouldRecord(input_tensor_id, input_dtypes)) {
-    backward_function_deleter(backward_function);
     return;
   }
   std::vector<int64> ids;
@@ -229,7 +228,7 @@ void GradientTape<Gradient, BackwardFunction, TapeTensor>::RecordOperation(
     tensors.push_back(o);
   }
   op_tape_[op_id] = OpTapeEntry<BackwardFunction, TapeTensor>{
-      op_type, std::move(tensors), ids, backward_function,
+      op_type, std::move(tensors), std::move(ids), backward_function_getter(),
       backward_function_deleter};
 }
 
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 4b9f7f4100..ae1e12f9c3 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1567,9 +1567,8 @@ void TapeSetRecordOperation(
   }
 
   for (TFE_Py_Tape* tape : SafeTapeSet()) {
-    auto* function = backward_function_getter();
     tape->tape->RecordOperation(op_type_str, output_info, input_ids,
-                                input_dtypes, function,
+                                input_dtypes, backward_function_getter,
                                 backward_function_killer);
   }
 }
-- 
GitLab


From cca204f12a5838f0ffdd4a80c27d451cf61d3636 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 15:25:27 -0700
Subject: [PATCH 0973/1357] Added option (off by default) to enable a
 higher-performance variant of the Adam optimizer's variable update formula.

PiperOrigin-RevId: 215290881
---
 tensorflow/contrib/tpu/proto/optimization_parameters.proto | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index a43f45554f..8529b48c15 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -62,7 +62,10 @@ message FtrlParameters {
 // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If
 // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in
 // order to get correct results; a warning will be printed otherwise (which may
-// change to an error in the future).
+// change to an error in the future). If use_max_with_epsilon is set, the Adam
+// variable update formula will be changed from m / (sqrt(v) + epsilon) to
+// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU
+// training and is not expected to harm model quality.
 message AdamParameters {
   float beta1 = 3;
   float beta2 = 4;
@@ -70,6 +73,7 @@ message AdamParameters {
   float initial_m = 6;
   float initial_v = 7;
   bool use_non_lazy_adam = 8;
+  bool use_max_with_epsilon = 9;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
-- 
GitLab


From 52574f95279d8cd5ec22cfc24668b9586e41367a Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Mon, 1 Oct 2018 15:26:59 -0700
Subject: [PATCH 0974/1357] Remove jemalloc build files and dead configuration
 options.

PiperOrigin-RevId: 215291195
---
 configure.py                                  |   2 -
 tensorflow/BUILD                              |  39 --
 tensorflow/contrib/cmake/CMakeLists.txt       |  11 -
 .../contrib/cmake/external/jemalloc.cmake     |  50 ---
 .../core/platform/default/build_config.bzl    |  20 +-
 tensorflow/core/platform/posix/port.cc        |  36 +-
 tensorflow/core/platform/windows/port.cc      |  51 +--
 tensorflow/tools/lib_package/BUILD            |  16 -
 tensorflow/tools/pip_package/BUILD            |   8 -
 tensorflow/workspace.bzl                      |  12 -
 third_party/jemalloc.BUILD                    | 356 ------------------
 third_party/systemlibs/jemalloc.BUILD         |  30 --
 third_party/systemlibs/syslibs_configure.bzl  |   1 -
 tools/bazel.rc                                |   1 -
 14 files changed, 11 insertions(+), 622 deletions(-)
 delete mode 100644 tensorflow/contrib/cmake/external/jemalloc.cmake
 delete mode 100644 third_party/jemalloc.BUILD
 delete mode 100644 third_party/systemlibs/jemalloc.BUILD

diff --git a/configure.py b/configure.py
index 57d9574d1f..0efa11aa41 100644
--- a/configure.py
+++ b/configure.py
@@ -1493,7 +1493,6 @@ def main():
   setup_python(environ_cp)
 
   if is_windows():
-    environ_cp['TF_NEED_JEMALLOC'] = '0'
     environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
     environ_cp['TF_NEED_COMPUTECPP'] = '0'
     environ_cp['TF_NEED_OPENCL'] = '0'
@@ -1507,7 +1506,6 @@ def main():
     environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0'
 
   if is_macos():
-    environ_cp['TF_NEED_JEMALLOC'] = '0'
     environ_cp['TF_NEED_TENSORRT'] = '0'
     environ_cp['TF_ENABLE_XLA'] = '0'
 
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 4876b51a6f..9b62a50452 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -203,21 +203,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-# TODO(jhseu): Enable on other platforms other than Linux.
-config_setting(
-    name = "with_jemalloc_linux_x86_64",
-    define_values = {"with_jemalloc": "true"},
-    values = {"cpu": "k8"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_jemalloc_linux_ppc64le",
-    define_values = {"with_jemalloc": "true"},
-    values = {"cpu": "ppc"},
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "with_default_optimizations",
     define_values = {"with_default_optimizations": "true"},
@@ -265,30 +250,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-config_setting(
-    name = "with_jemalloc_linux_x86_64_dynamic",
-    define_values = {
-        "with_jemalloc": "true",
-        "framework_shared_object": "true",
-    },
-    values = {
-        "cpu": "k8",
-    },
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_jemalloc_linux_ppc64le_dynamic",
-    define_values = {
-        "with_jemalloc": "true",
-        "framework_shared_object": "true",
-    },
-    values = {
-        "cpu": "ppc",
-    },
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "using_cuda_clang",
     define_values = {
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index c6d6f04168..f675c135f4 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -30,7 +30,6 @@ endif()
 
 option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
 option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF)
-option(tensorflow_ENABLE_JEMALLOC_SUPPORT "Enable jemalloc support" OFF)
 option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON)
 option(tensorflow_BUILD_PYTHON_BINDINGS "Build the Python bindings" ON)
 option(tensorflow_BUILD_ALL_KERNELS "Build all OpKernels" ON)
@@ -218,10 +217,6 @@ if (tensorflow_WIN_CPU_SIMD_OPTIONS)
   endif()
 endif()
 
-if (tensorflow_ENABLE_JEMALLOC_SUPPORT)
-  add_definitions(-DTENSORFLOW_USE_JEMALLOC -DJEMALLOC_EXPORT=)
-endif()
-
 # External dependencies
 include(zlib)
 include(gif)
@@ -329,12 +324,6 @@ if(tensorflow_ENABLE_GRPC_SUPPORT)
     list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl)
   endif()
 endif()
-if(tensorflow_ENABLE_JEMALLOC_SUPPORT)
-  include(jemalloc)
-  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${jemalloc_STATIC_LIBRARIES})
-  list(APPEND tensorflow_EXTERNAL_DEPENDENCIES jemalloc)
-  include_directories(${jemalloc_INCLUDE_DIRS})
-endif()
 if(tensorflow_ENABLE_SNAPPY_SUPPORT)
   include(snappy)
   list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_STATIC_LIBRARIES})
diff --git a/tensorflow/contrib/cmake/external/jemalloc.cmake b/tensorflow/contrib/cmake/external/jemalloc.cmake
deleted file mode 100644
index afadcc007d..0000000000
--- a/tensorflow/contrib/cmake/external/jemalloc.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-include (ExternalProject)
-
-set(jemalloc_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc/include)
-set(jemalloc_URL https://mirror.bazel.build/github.com/jemalloc/jemalloc-cmake/archive/jemalloc-cmake.4.3.1.tar.gz)
-set(jemalloc_HASH SHA256=f9be9a05fe906deb5c1c8ca818071a7d2e27d66fd87f5ba9a7bf3750bcedeaf0)
-set(jemalloc_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc)
-
-if (WIN32)
-    set(jemalloc_INCLUDE_DIRS
-        ${jemalloc_INCLUDE_DIRS} 
-        ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc/include/msvc_compat
-    )
-    if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
-        set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/Release/jemalloc.lib)
-    else()
-        set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/jemalloc.lib)
-    endif()
-else()
-    set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/Release/jemalloc.a)
-endif()
-
-ExternalProject_Add(jemalloc
-    PREFIX jemalloc
-    URL ${jemalloc_URL}
-    URL_HASH ${jemalloc_HASH}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    BUILD_BYPRODUCTS ${jemalloc_STATIC_LIBRARIES}
-    BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release --target jemalloc
-    INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "Skipping install step."
-    CMAKE_CACHE_ARGS
-        -DCMAKE_BUILD_TYPE:STRING=Release
-        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -Dwith-jemalloc-prefix:STRING=jemalloc_
-        -Dwithout-export:BOOL=ON
-)
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 3b14757945..d884c1aa7c 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -615,11 +615,7 @@ def tf_kernel_tests_linkstatic():
 
 def tf_additional_lib_defines():
     """Additional defines needed to build TF libraries."""
-    return select({
-        "//tensorflow:with_jemalloc_linux_x86_64": ["TENSORFLOW_USE_JEMALLOC"],
-        "//tensorflow:with_jemalloc_linux_ppc64le": ["TENSORFLOW_USE_JEMALLOC"],
-        "//conditions:default": [],
-    })
+    return []
 
 def tf_additional_lib_deps():
     """Additional dependencies needed to build TF libraries."""
@@ -631,13 +627,7 @@ def tf_additional_lib_deps():
     ] + if_static(
         ["@nsync//:nsync_cpp"],
         ["@nsync//:nsync_headers"],
-    ) + select({
-        "//tensorflow:with_jemalloc_linux_x86_64_dynamic": ["@jemalloc//:jemalloc_headers"],
-        "//tensorflow:with_jemalloc_linux_ppc64le_dynamic": ["@jemalloc//:jemalloc_headers"],
-        "//tensorflow:with_jemalloc_linux_x86_64": ["@jemalloc//:jemalloc_impl"],
-        "//tensorflow:with_jemalloc_linux_ppc64le": ["@jemalloc//:jemalloc_impl"],
-        "//conditions:default": [],
-    })
+    )
 
 def tf_additional_core_deps():
     return select({
@@ -725,11 +715,7 @@ def tf_additional_binary_deps():
             "//tensorflow/stream_executor:cuda_platform",
             "//tensorflow/core/platform/default/build_config:cuda",
         ],
-    ) + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": ["@jemalloc//:jemalloc_impl"],
-        "//tensorflow:with_jemalloc_linux_ppc64le": ["@jemalloc//:jemalloc_impl"],
-        "//conditions:default": [],
-    }) + [
+    ) + [
         # TODO(allenl): Split these out into their own shared objects (they are
         # here because they are shared between contrib/ op shared objects and
         # core).
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index b46b9927cd..acdd7798ea 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -13,10 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef TENSORFLOW_USE_JEMALLOC
-#include "jemalloc/jemalloc.h"
-#endif
-
 #include "absl/base/internal/sysinfo.h"
 
 #include "tensorflow/core/platform/cpu_info.h"
@@ -101,11 +97,7 @@ void* AlignedMalloc(size_t size, int minimum_alignment) {
   // memory aligned to at least the size of a pointer.
   const int required_alignment = sizeof(void*);
   if (minimum_alignment < required_alignment) return Malloc(size);
-#ifdef TENSORFLOW_USE_JEMALLOC
-  int err = jemalloc_posix_memalign(&ptr, minimum_alignment, size);
-#else
   int err = posix_memalign(&ptr, minimum_alignment, size);
-#endif
   if (err != 0) {
     return nullptr;
   } else {
@@ -116,29 +108,11 @@ void* AlignedMalloc(size_t size, int minimum_alignment) {
 
 void AlignedFree(void* aligned_memory) { Free(aligned_memory); }
 
-void* Malloc(size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_malloc(size);
-#else
-  return malloc(size);
-#endif
-}
+void* Malloc(size_t size) { return malloc(size); }
 
-void* Realloc(void* ptr, size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_realloc(ptr, size);
-#else
-  return realloc(ptr, size);
-#endif
-}
+void* Realloc(void* ptr, size_t size) { return realloc(ptr, size); }
 
-void Free(void* ptr) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  jemalloc_free(ptr);
-#else
-  free(ptr);
-#endif
-}
+void Free(void* ptr) { free(ptr); }
 
 void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
   return AlignedMalloc(size, minimum_alignment);
@@ -146,9 +120,7 @@ void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
 
 void NUMAFree(void* ptr, size_t size) { Free(ptr); }
 
-int NUMAGetMemAffinity(const void* addr) {
-  return kNUMANoAffinity;
-}
+int NUMAGetMemAffinity(const void* addr) { return kNUMANoAffinity; }
 
 void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
   // No-op.
diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc
index 5375f56372..911ea1902f 100644
--- a/tensorflow/core/platform/windows/port.cc
+++ b/tensorflow/core/platform/windows/port.cc
@@ -13,10 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef TENSORFLOW_USE_JEMALLOC
-#include "jemalloc/jemalloc.h"
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -70,55 +66,16 @@ void NUMASetThreadNodeAffinity(int node) {}
 int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
 
 void* AlignedMalloc(size_t size, int minimum_alignment) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  void* ptr = NULL;
-  // posix_memalign requires that the requested alignment be at least
-  // sizeof(void*). In this case, fall back on malloc which should return
-  // memory aligned to at least the size of a pointer.
-  const int required_alignment = sizeof(void*);
-  if (minimum_alignment < required_alignment) return Malloc(size);
-  int err = jemalloc_posix_memalign(&ptr, minimum_alignment, size);
-  if (err != 0) {
-    return NULL;
-  } else {
-    return ptr;
-  }
-#else
   return _aligned_malloc(size, minimum_alignment);
-#endif
 }
 
-void AlignedFree(void* aligned_memory) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  jemalloc_free(aligned_memory);
-#else
-  _aligned_free(aligned_memory);
-#endif
-}
+void AlignedFree(void* aligned_memory) { _aligned_free(aligned_memory); }
 
-void* Malloc(size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_malloc(size);
-#else
-  return malloc(size);
-#endif
-}
+void* Malloc(size_t size) { return malloc(size); }
 
-void* Realloc(void* ptr, size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_realloc(ptr, size);
-#else
-  return realloc(ptr, size);
-#endif
-}
+void* Realloc(void* ptr, size_t size) { return realloc(ptr, size); }
 
-void Free(void* ptr) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_free(ptr);
-#else
-  return free(ptr);
-#endif
-}
+void Free(void* ptr) { return free(ptr); }
 
 void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
   return AlignedMalloc(size, minimum_alignment);
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index b9f4902639..85514b8629 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -137,14 +137,6 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": [
-            "@jemalloc//:COPYING",
-        ],
-        "//tensorflow:with_jemalloc_linux_ppc64le": [
-            "@jemalloc//:COPYING",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
@@ -202,14 +194,6 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": [
-            "@jemalloc//:COPYING",
-        ],
-        "//tensorflow:with_jemalloc_linux_ppc64le": [
-            "@jemalloc//:COPYING",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index c621812535..3a1c4a45d4 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -167,14 +167,6 @@ filegroup(
         "@zlib_archive//:zlib.h",
         "@org_python_pypi_backports_weakref//:LICENSE",
     ] + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": [
-            "@jemalloc//:COPYING",
-        ],
-        "//tensorflow:with_jemalloc_linux_ppc64le": [
-            "@jemalloc//:COPYING",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 9b4b698874..bcc89ef729 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -642,18 +642,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         testonly_ = True,
     )
 
-    tf_http_archive(
-        name = "jemalloc",
-        build_file = clean_dep("//third_party:jemalloc.BUILD"),
-        sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8",
-        strip_prefix = "jemalloc-4.4.0",
-        system_build_file = clean_dep("//third_party/systemlibs:jemalloc.BUILD"),
-        urls = [
-            "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
-            "https://github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
-        ],
-    )
-
     java_import_external(
         name = "com_google_testing_compile",
         jar_sha256 = "edc180fdcd9f740240da1a7a45673f46f59c5578d8cd3fbc912161f74b5aebb8",
diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD
deleted file mode 100644
index 1b0829b8fe..0000000000
--- a/third_party/jemalloc.BUILD
+++ /dev/null
@@ -1,356 +0,0 @@
-# Description:
-# jemalloc - a general-purpose scalable concurrent malloc implementation
-
-licenses(["notice"])  # BSD
-
-exports_files(["COPYING"])
-
-load("@org_tensorflow//third_party:common.bzl", "template_rule")
-
-cc_library(
-    name = "jemalloc_headers",
-    hdrs = ["include/jemalloc/jemalloc.h"],
-    includes = ["include"],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_impl",
-    srcs = [
-        "src/arena.c",
-        "src/atomic.c",
-        "src/base.c",
-        "src/bitmap.c",
-        "src/chunk.c",
-        "src/chunk_dss.c",
-        "src/chunk_mmap.c",
-        "src/ckh.c",
-        "src/ctl.c",
-        "src/extent.c",
-        "src/hash.c",
-        "src/huge.c",
-        "src/jemalloc.c",
-        "src/mb.c",
-        "src/mutex.c",
-        "src/nstime.c",
-        "src/pages.c",
-        "src/prng.c",
-        "src/prof.c",
-        "src/quarantine.c",
-        "src/rtree.c",
-        "src/spin.c",
-        "src/stats.c",
-        "src/tcache.c",
-        "src/tsd.c",
-        "src/util.c",
-        "src/witness.c",
-    ],
-    hdrs = [
-        "include/jemalloc/internal/arena.h",
-        "include/jemalloc/internal/assert.h",
-        "include/jemalloc/internal/atomic.h",
-        "include/jemalloc/internal/base.h",
-        "include/jemalloc/internal/bitmap.h",
-        "include/jemalloc/internal/chunk.h",
-        "include/jemalloc/internal/chunk_dss.h",
-        "include/jemalloc/internal/chunk_mmap.h",
-        "include/jemalloc/internal/ckh.h",
-        "include/jemalloc/internal/ctl.h",
-        "include/jemalloc/internal/extent.h",
-        "include/jemalloc/internal/hash.h",
-        "include/jemalloc/internal/huge.h",
-        "include/jemalloc/internal/jemalloc_internal.h",
-        "include/jemalloc/internal/jemalloc_internal_decls.h",
-        "include/jemalloc/internal/jemalloc_internal_defs.h",
-        "include/jemalloc/internal/jemalloc_internal_macros.h",
-        "include/jemalloc/internal/mb.h",
-        "include/jemalloc/internal/mutex.h",
-        "include/jemalloc/internal/nstime.h",
-        "include/jemalloc/internal/pages.h",
-        "include/jemalloc/internal/ph.h",
-        "include/jemalloc/internal/private_namespace.h",
-        "include/jemalloc/internal/prng.h",
-        "include/jemalloc/internal/prof.h",
-        "include/jemalloc/internal/ql.h",
-        "include/jemalloc/internal/qr.h",
-        "include/jemalloc/internal/quarantine.h",
-        "include/jemalloc/internal/rb.h",
-        "include/jemalloc/internal/rtree.h",
-        "include/jemalloc/internal/size_classes.h",
-        "include/jemalloc/internal/smoothstep.h",
-        "include/jemalloc/internal/spin.h",
-        "include/jemalloc/internal/stats.h",
-        "include/jemalloc/internal/tcache.h",
-        "include/jemalloc/internal/ticker.h",
-        "include/jemalloc/internal/tsd.h",
-        "include/jemalloc/internal/util.h",
-        "include/jemalloc/internal/valgrind.h",
-        "include/jemalloc/internal/witness.h",
-    ],
-    # Same flags that jemalloc uses to build.
-    copts = [
-        "-O3",
-        "-funroll-loops",
-        "-D_GNU_SOURCE",
-        "-D_REENTRANT",
-    ],
-    includes = ["include"],
-    # pthread_atfork() is called for PPC.
-    linkopts = select({
-        "@org_tensorflow//tensorflow:linux_ppc64le": [
-            "-lpthread",
-        ],
-        "@org_tensorflow//tensorflow:linux_x86_64": [
-            "-lpthread",
-        ],
-        "//conditions:default": [
-        ],
-    }),
-    visibility = ["//visibility:public"],
-    deps = [":jemalloc_headers"],
-)
-
-sh_binary(
-    name = "jemalloc_sh",
-    srcs = ["include/jemalloc/jemalloc.sh"],
-)
-
-genrule(
-    name = "jemalloc_h",
-    srcs = [
-        ":jemalloc_defs_h",
-        ":jemalloc_macros_h",
-        ":jemalloc_mangle_h",
-        ":jemalloc_protos_h",
-        ":jemalloc_rename_h",
-        ":jemalloc_typedefs_h",
-    ],
-    outs = ["include/jemalloc/jemalloc.h"],
-    cmd = "$(location :jemalloc_sh) $$(dirname $(location :jemalloc_defs_h))/../../ >$@",
-    tools = [":jemalloc_sh"],
-)
-
-# Add to this list if you want to export more symbols from jemalloc.
-genrule(
-    name = "public_symbols_txt",
-    outs = ["include/jemalloc/internal/public_symbols.txt"],
-    cmd = "\n".join([
-        "cat <<'EOF' > $@",
-        "free:jemalloc_free",
-        "malloc:jemalloc_malloc",
-        "posix_memalign:jemalloc_posix_memalign",
-        "realloc:jemalloc_realloc",
-        "EOF",
-    ]),
-)
-
-sh_binary(
-    name = "jemalloc_mangle_sh",
-    srcs = ["include/jemalloc/jemalloc_mangle.sh"],
-)
-
-genrule(
-    name = "jemalloc_mangle_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/jemalloc_mangle.h"],
-    cmd = "$(location :jemalloc_mangle_sh) $(location :public_symbols_txt) je_ >$@",
-    tools = [":jemalloc_mangle_sh"],
-)
-
-sh_binary(
-    name = "jemalloc_rename_sh",
-    srcs = ["include/jemalloc/jemalloc_rename.sh"],
-)
-
-genrule(
-    name = "jemalloc_rename_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/jemalloc_rename.h"],
-    cmd = "$(location :jemalloc_rename_sh) $(location :public_symbols_txt) >$@",
-    tools = [":jemalloc_rename_sh"],
-)
-
-sh_binary(
-    name = "private_namespace_sh",
-    srcs = ["include/jemalloc/internal/private_namespace.sh"],
-)
-
-genrule(
-    name = "private_namespace_h",
-    srcs = ["include/jemalloc/internal/private_symbols.txt"],
-    outs = ["include/jemalloc/internal/private_namespace.h"],
-    cmd = "$(location :private_namespace_sh) $(location include/jemalloc/internal/private_symbols.txt) >$@",
-    tools = [":private_namespace_sh"],
-)
-
-sh_binary(
-    name = "public_namespace_sh",
-    srcs = ["include/jemalloc/internal/public_namespace.sh"],
-)
-
-genrule(
-    name = "public_namespace_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/internal/public_namespace.h"],
-    cmd = "$(location :public_namespace_sh) $(location :public_symbols_txt) >$@",
-    tools = [":public_namespace_sh"],
-)
-
-sh_binary(
-    name = "size_classes_sh",
-    srcs = ["include/jemalloc/internal/size_classes.sh"],
-)
-
-# Size classes for Linux x86_64 and ppc64le. Update if adding builds for other
-# architectures. See size_classes.sh for details on the arguments.
-# For default case, kept the arguments same as that of  x86_64 for now.
-genrule(
-    name = "size_classes_h",
-    outs = ["include/jemalloc/internal/size_classes.h"],
-    cmd = select({
-        "@org_tensorflow//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@",
-        "@org_tensorflow//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
-        "//conditions:default": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
-    }),
-    tools = [":size_classes_sh"],
-)
-
-template_rule(
-    name = "jemalloc_internal_h",
-    src = "include/jemalloc/internal/jemalloc_internal.h.in",
-    out = "include/jemalloc/internal/jemalloc_internal.h",
-    substitutions = {
-        "@private_namespace@": "je_",
-        "@install_suffix@": "",
-    },
-)
-
-template_rule(
-    name = "jemalloc_internal_defs_h",
-    src = "include/jemalloc/internal/jemalloc_internal_defs.h.in",
-    out = "include/jemalloc/internal/jemalloc_internal_defs.h",
-    substitutions = {
-        "#undef JEMALLOC_PREFIX": "#define JEMALLOC_PREFIX \"jemalloc_\"",
-        "#undef JEMALLOC_CPREFIX": "#define JEMALLOC_CPREFIX \"JEMALLOC_\"",
-        "#undef JEMALLOC_PRIVATE_NAMESPACE": "#define JEMALLOC_PRIVATE_NAMESPACE je_",
-        "#undef CPU_SPINWAIT": "\n".join([
-            "#if defined(__powerpc64__) || defined(__powerpc__)",
-            "#define CPU_SPINWAIT __asm__ volatile(\"or 27,27,27\")",
-            "#else",
-            "#define CPU_SPINWAIT __asm__ volatile(\"pause\")",
-            "#endif",
-        ]),
-        "#undef JEMALLOC_HAVE_BUILTIN_CLZ": "#define JEMALLOC_HAVE_BUILTIN_CLZ",
-        "#undef JEMALLOC_USE_SYSCALL": "#define JEMALLOC_USE_SYSCALL",
-        "#undef JEMALLOC_HAVE_SECURE_GETENV": "#define JEMALLOC_HAVE_SECURE_GETENV",
-        "#undef JEMALLOC_HAVE_PTHREAD_ATFORK": "#define JEMALLOC_HAVE_PTHREAD_ATFORK",
-        "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1",
-        # Newline required because of substitution conflicts.
-        "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC\n": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1\n",
-        "#undef JEMALLOC_THREADED_INIT": "#define JEMALLOC_THREADED_INIT",
-        "#undef JEMALLOC_TLS_MODEL": "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))",
-        "#undef JEMALLOC_CC_SILENCE": "#define JEMALLOC_CC_SILENCE",
-        "#undef JEMALLOC_STATS": "#define JEMALLOC_STATS",
-        "#undef JEMALLOC_TCACHE": "#define JEMALLOC_TCACHE",
-        "#undef JEMALLOC_DSS": "#define JEMALLOC_DSS",
-        "#undef JEMALLOC_FILL": "#define JEMALLOC_FILL",
-        "#undef LG_TINY_MIN": "#define LG_TINY_MIN 3",
-        "#undef LG_PAGE": "\n".join([
-            "#if defined(__powerpc64__) || defined(__powerpc__)",
-            "#define LG_PAGE 16",
-            "#else",
-            "#define LG_PAGE 12",
-            "#endif",
-        ]),
-        "#undef JEMALLOC_MAPS_COALESCE": "#define JEMALLOC_MAPS_COALESCE",
-        "#undef JEMALLOC_TLS": "#define JEMALLOC_TLS",
-        "#undef JEMALLOC_INTERNAL_UNREACHABLE": "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable",
-        "#undef JEMALLOC_INTERNAL_FFSLL": "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll",
-        # Newline required because of substitution conflicts.
-        "#undef JEMALLOC_INTERNAL_FFSL\n": "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl\n",
-        "#undef JEMALLOC_INTERNAL_FFS\n": "#define JEMALLOC_INTERNAL_FFS __builtin_ffs\n",
-        "#undef JEMALLOC_CACHE_OBLIVIOUS": "#define JEMALLOC_CACHE_OBLIVIOUS",
-        "#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY": "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY",
-        "#undef JEMALLOC_HAVE_MADVISE": "#define JEMALLOC_HAVE_MADVISE",
-        "#undef JEMALLOC_PURGE_MADVISE_DONTNEED": "#define JEMALLOC_PURGE_MADVISE_DONTNEED",
-        "#undef JEMALLOC_THP": "#define JEMALLOC_THP",
-        "#undef JEMALLOC_HAS_ALLOCA_H": "#define JEMALLOC_HAS_ALLOCA_H 1",
-        # Newline required because of substitution conflicts.
-        "#undef LG_SIZEOF_INT\n": "#define LG_SIZEOF_INT 2\n",
-        "#undef LG_SIZEOF_LONG\n": "#define LG_SIZEOF_LONG 3\n",
-        "#undef LG_SIZEOF_LONG_LONG": "#define LG_SIZEOF_LONG_LONG 3",
-        "#undef LG_SIZEOF_INTMAX_T": "#define LG_SIZEOF_INTMAX_T 3",
-        "#undef JEMALLOC_GLIBC_MALLOC_HOOK": "#define JEMALLOC_GLIBC_MALLOC_HOOK",
-        "#undef JEMALLOC_GLIBC_MEMALIGN_HOOK": "#define JEMALLOC_GLIBC_MEMALIGN_HOOK",
-        "#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP": "#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP",
-        "#undef JEMALLOC_CONFIG_MALLOC_CONF": "#define JEMALLOC_CONFIG_MALLOC_CONF \"\"",
-    },
-)
-
-template_rule(
-    name = "jemalloc_defs_h",
-    src = "include/jemalloc/jemalloc_defs.h.in",
-    out = "include/jemalloc/jemalloc_defs.h",
-    substitutions = {
-        "#undef JEMALLOC_HAVE_ATTR": "#define JEMALLOC_HAVE_ATTR",
-        "#undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE": "#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE",
-        "#undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF",
-        "#undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF",
-        "#undef JEMALLOC_OVERRIDE_MEMALIGN": "#define JEMALLOC_OVERRIDE_MEMALIGN",
-        "#undef JEMALLOC_OVERRIDE_VALLOC": "#define JEMALLOC_OVERRIDE_VALLOC",
-        "#undef JEMALLOC_USABLE_SIZE_CONST": "#define JEMALLOC_USABLE_SIZE_CONST",
-        "#undef JEMALLOC_USE_CXX_THROW": "#define JEMALLOC_USE_CXX_THROW",
-        "#undef LG_SIZEOF_PTR": "#define LG_SIZEOF_PTR 3",
-    },
-)
-
-template_rule(
-    name = "jemalloc_macros_h",
-    src = "include/jemalloc/jemalloc_macros.h.in",
-    out = "include/jemalloc/jemalloc_macros.h",
-    substitutions = {
-        "@jemalloc_version@": "0.0.0",
-        "@jemalloc_version_major@": "0",
-        "@jemalloc_version_minor@": "0",
-        "@jemalloc_version_bugfix@": "0",
-        "@jemalloc_version_nrev@": "0",
-        "@jemalloc_version_gid@": "0000000000000000000000000000000000000000",
-    },
-)
-
-template_rule(
-    name = "jemalloc_protos_h",
-    src = "include/jemalloc/jemalloc_protos.h.in",
-    out = "include/jemalloc/jemalloc_protos.h",
-    substitutions = {
-        "@aligned_alloc": "aligned_alloc",
-        "@calloc": "calloc",
-        "@cbopaque": "cbopaque",
-        "@dallocx": "dallocx",
-        "@free": "free",
-        "@je": "je",
-        "@mallctl": "mallctl",
-        "@mallctlnametomib": "mallctlnametomib",
-        "@mallctlbymib": "mallctlbymib",
-        "@malloc_stats_print": "malloc_stats_print",
-        "@malloc_usable_size": "malloc_usable_size",
-        "@malloc": "malloc",
-        "@mallocx": "mallocx",
-        "@memalign": "memalign",
-        "@nallocx": "nallocx",
-        "@posix_memalign": "posix_memalign",
-        "@rallocx": "rallocx",
-        "@realloc": "realloc",
-        "@sallocx": "sallocx",
-        "@sdallocx": "sdallocx",
-        "@valloc": "valloc",
-        "@xallocx": "xallocx",
-    },
-)
-
-template_rule(
-    name = "jemalloc_typedefs_h",
-    src = "include/jemalloc/jemalloc_typedefs.h.in",
-    out = "include/jemalloc/jemalloc_typedefs.h",
-    substitutions = {},
-)
diff --git a/third_party/systemlibs/jemalloc.BUILD b/third_party/systemlibs/jemalloc.BUILD
deleted file mode 100644
index 6a48d582ba..0000000000
--- a/third_party/systemlibs/jemalloc.BUILD
+++ /dev/null
@@ -1,30 +0,0 @@
-licenses(["notice"])  # BSD
-
-filegroup(
-    name = "COPYING",
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_headers",
-    defines = [
-        "jemalloc_posix_memalign=posix_memalign",
-        "jemalloc_malloc=malloc",
-        "jemalloc_realloc=realloc",
-        "jemalloc_free=free",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_impl",
-    linkopts = ["-ljemalloc"],
-    defines = [
-        "jemalloc_posix_memalign=posix_memalign",
-        "jemalloc_malloc=malloc",
-        "jemalloc_realloc=realloc",
-        "jemalloc_free=free",
-    ],
-    visibility = ["//visibility:public"],
-    deps = [":jemalloc_headers"],
-)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index 8b0ab39eaf..b03d3380d7 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -23,7 +23,6 @@ VALID_LIBS = [
     "gast_archive",
     "gif_archive",
     "grpc",
-    "jemalloc",
     "jpeg",
     "jsoncpp_git",
     "lmdb",
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 0cd148ed87..3734fab715 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -73,7 +73,6 @@ build --define=grpc_no_ares=true
 build --spawn_strategy=standalone
 build --genrule_strategy=standalone
 build -c opt
-build --define=with_jemalloc=false
 
 # Other build flags.
 build --define=grpc_no_ares=true
-- 
GitLab


From 55d96e8ea93407da156c156702a38fd8b5d06b2a Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 1 Oct 2018 15:34:08 -0700
Subject: [PATCH 0975/1357] Fix Android builds when using
 --define=with_tflite_flex

PiperOrigin-RevId: 215292521
---
 tensorflow/contrib/lite/delegates/flex/BUILD | 6 +++---
 tensorflow/core/common_runtime/eager/BUILD   | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
index bf5d91899c..9dd38958e5 100644
--- a/tensorflow/contrib/lite/delegates/flex/BUILD
+++ b/tensorflow/contrib/lite/delegates/flex/BUILD
@@ -20,7 +20,7 @@ cc_library(
         "//tensorflow/contrib/lite:kernel_api",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite_no_runtime",
+            "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:framework",
@@ -60,7 +60,7 @@ cc_library(
         "//tensorflow/contrib/lite:util",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite_no_runtime",
+            "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:lib",
@@ -178,7 +178,7 @@ cc_library(
         "//tensorflow/contrib/lite:kernel_api",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite_no_runtime",
+            "//tensorflow/core:android_tensorflow_lib",
         ],
         "//conditions:default": [
             "//tensorflow/core:lib",
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index be5f3bae3a..7b74c67c85 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -147,10 +147,11 @@ tf_cuda_library(
         "kernel_and_device.h",
     ],
     visibility = ["//tensorflow:internal"],
-    deps = select({
+    deps = [
+        "@farmhash_archive//:farmhash",
+    ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
-            "//util/hash:farmhash_fingerprint",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu_lib",
@@ -219,13 +220,13 @@ tf_cuda_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":kernel_and_device",
+        "@farmhash_archive//:farmhash",
         # Only the TF_AttrType enum is required, so pull in just the C headers.
         # TODO(b/113535673): Break this dependency and avoid the C header completely.
         "//tensorflow/c:c_api_headers",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
-            "//util/hash:farmhash_fingerprint",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu",
-- 
GitLab


From dc4ac1b84c9c74655f04254779516f9968a5c385 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 1 Oct 2018 15:41:29 -0700
Subject: [PATCH 0976/1357] Clean up the build_xla_ops to use the generated C++
 TF op wrappers.

This cleanup will make the future CL implementing lazy compilation simpler.

Includes some supporting changes:

 - Teach NewInternalScope to create a scope that doesn't do shape inference.  We
   need this because we don't have a ShapeRefiner that has been run over the
   entire graph available in the build_xla_ops pass.

 - Add a WithAssignedDevice modifier to tensorflow::Scope.

 - Make cc_op_gen write out an Operation field for nodes which may not
   necessarily have any outputs.  We already did this in most cases, but we
   weren't doing it for nodes that have possibly-empty list outputs.

 - Minor change renaming ops/xla_jit_op.cc to ops/xla_jit_ops.cc, now that we
   have more than one XLA JIT op.

PiperOrigin-RevId: 215293817
---
 tensorflow/cc/framework/cc_op_gen.cc          |  10 +-
 tensorflow/cc/framework/scope.cc              |  33 +++-
 tensorflow/cc/framework/scope.h               |   4 +
 tensorflow/cc/framework/scope_internal.h      |   5 +
 tensorflow/compiler/jit/BUILD                 |   4 +
 tensorflow/compiler/jit/build_xla_ops_pass.cc | 180 ++++++++----------
 .../compiler/jit/build_xla_ops_pass_test.cc   |  32 +++-
 .../encapsulate_xla_computations_pass_test.cc |   2 +-
 tensorflow/compiler/tf2xla/cc/BUILD           |   7 +-
 tensorflow/core/graph/node_builder.cc         |   7 +
 tensorflow/core/graph/node_builder.h          |   4 +
 11 files changed, 174 insertions(+), 114 deletions(-)

diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index a32d1b1eb5..39593370d1 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -853,11 +853,7 @@ void OpInfo::WriteClassDecl(WritableFile* h) const {
     }
   }
 
-  strings::StrAppend(&class_decl, "\n");
-
-  if (output_types.empty()) {
-    strings::StrAppend(&class_decl, "  Operation operation;\n");
-  }
+  strings::StrAppend(&class_decl, "\n  Operation operation;\n");
   for (int i = 0; i < output_types.size(); ++i) {
     strings::StrAppend(&class_decl, "  ", output_types[i], " ", output_names[i],
                        ";\n");
@@ -878,9 +874,11 @@ void OpInfo::GetOutput(string* out) const {
   string return_on_error =
       strings::StrCat("if (!", scope_str, ".ok()) return;");
 
+  strings::StrAppend(out, "  this->operation = Operation(ret);\n");
+
   // No outputs.
   if (graph_op_def.output_arg_size() == 0) {
-    strings::StrAppend(out, "  this->operation = Operation(ret);\n  return;\n");
+    strings::StrAppend(out, "  return;\n");
     return;
   }
   if (graph_op_def.output_arg_size() == 1) {
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index 7f6ac4cae7..6abc9e268e 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -62,7 +62,7 @@ Scope::Impl::Impl(const std::shared_ptr<Graph>& graph,
       refiner_(refiner),
       scope_used_(nullptr),
       colocation_constraints_(),
-      disable_shape_inference_(false) {}
+      disable_shape_inference_(refiner_ == nullptr) {}
 
 Scope Scope::NewRootScope() {
   Graph* graph = new Graph(OpRegistry::Global());
@@ -94,6 +94,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ScopeName, const string& name,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -110,6 +111,7 @@ Scope::Impl::Impl(const Scope& other, Tags::OpName, const string& name,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -132,6 +134,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ControlDeps,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -163,6 +166,7 @@ Scope::Impl::Impl(const Scope& other, Tags::SingleUseScope,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -178,6 +182,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ExitOnError)
       exit_on_error_(true),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -194,6 +199,7 @@ Scope::Impl::Impl(const Scope& other, Tags::KernelLabel,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(kernel_label),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -210,12 +216,30 @@ Scope::Impl::Impl(const Scope& other, Tags::Colocate,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(
           clear_colocations
               ? std::unordered_set<string>()
               : other.impl()->GetColocationConstraints(colocate_with_op)),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
+Scope::Impl::Impl(const Scope& other, Tags::AssignedDevice,
+                  const string& assigned_device)
+    : graph_(other.impl()->graph_),
+      status_(other.impl()->status_),
+      name_map_(other.impl()->name_map_),
+      refiner_(other.impl()->refiner_),
+      scope_used_(other.impl()->scope_used_),
+      control_deps_(other.impl()->control_deps_),
+      name_(other.impl()->name_),
+      op_name_(other.impl()->op_name_),
+      exit_on_error_(other.impl()->exit_on_error_),
+      kernel_label_(other.impl()->kernel_label_),
+      device_(other.impl()->device_),
+      assigned_device_(assigned_device),
+      colocation_constraints_(other.impl()->colocation_constraints_),
+      disable_shape_inference_(other.impl()->disable_shape_inference_) {}
+
 std::unordered_set<string> Scope::Impl::GetColocationConstraints(
     const Operation& colocate_with_op) const {
   std::unordered_set<string> current_constraints(colocation_constraints_);
@@ -299,6 +323,9 @@ void Scope::UpdateBuilder(NodeBuilder* builder) const {
   if (!impl()->device_.empty()) {
     builder->Device(impl()->device_);
   }
+  if (!impl()->assigned_device_.empty()) {
+    builder->AssignedDevice(impl()->assigned_device_);
+  }
 }
 
 string Scope::Impl::GetUniqueName(const string& prefix,
@@ -394,6 +421,10 @@ Scope Scope::WithDevice(const string& device) const {
   return Scope(new Impl(*this, Impl::Tags::Device(), device));
 }
 
+Scope Scope::WithAssignedDevice(const string& assigned_device) const {
+  return Scope(new Impl(*this, Impl::Tags::AssignedDevice(), assigned_device));
+}
+
 Scope Scope::ColocateWith(const Operation& op) const {
   return Scope(new Impl(*this, Impl::Tags::Colocate(), op,
                         /* clear_colocations */ false));
diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h
index 30c32bd44b..e307d8989b 100644
--- a/tensorflow/cc/framework/scope.h
+++ b/tensorflow/cc/framework/scope.h
@@ -133,6 +133,10 @@ class Scope {
   /// the device field set to 'device'.
   Scope WithDevice(const string& device) const;
 
+  /// Returns a new scope.  All ops created within the returned scope will have
+  /// their assigned device set to `assigned_device`.
+  Scope WithAssignedDevice(const string& assigned_device) const;
+
   /// Return a new scope. All ops created within the returned scope will be
   /// co-located on the device where op is placed.
   /// NOTE: This function is intended to be use internal libraries only for
diff --git a/tensorflow/cc/framework/scope_internal.h b/tensorflow/cc/framework/scope_internal.h
index 58adaef2e9..514e02e841 100644
--- a/tensorflow/cc/framework/scope_internal.h
+++ b/tensorflow/cc/framework/scope_internal.h
@@ -26,6 +26,8 @@ class ShapeRefiner;
 // graph, status, name_map, and refiner.
 // This is intended to enable the C API (which are used by other language
 // bindings) to create a Scope and access C++ functionality (i.e. gradients).
+//
+// Shape inference is disabled if `refiner` is nullptr.
 Scope NewInternalScope(Graph* graph, Status* status, ShapeRefiner* refiner);
 
 class Scope::Impl {
@@ -58,6 +60,7 @@ class Scope::Impl {
     enum class ExitOnError;
     enum class KernelLabel;
     enum class Colocate;
+    enum class AssignedDevice;
   };
 
   Impl(Graph* graph, Status* status, NameMap* name_map, ShapeRefiner* refiner,
@@ -74,6 +77,7 @@ class Scope::Impl {
   Impl(const Scope& other, Tags::KernelLabel, const string& kernel_label);
   Impl(const Scope& other, Tags::Colocate, const Operation& colocate_with_op,
        bool clear_colocations);
+  Impl(const Scope& other, Tags::AssignedDevice, const string& assigned_device);
 
   std::unordered_set<string> GetColocationConstraints(
       const Operation& colocate_with_op) const;
@@ -107,6 +111,7 @@ class Scope::Impl {
   const bool exit_on_error_ = false;
   const string kernel_label_ = "";
   const string device_ = "";
+  const string assigned_device_ = "";
   const std::unordered_set<string> colocation_constraints_;
 
   // If true, Scope::DoShapeInference() always returns Status:OK().
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 29b60d1dbe..f20270931f 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -385,12 +385,16 @@ cc_library(
         ":shape_inference_helpers",
         ":union_find",
         ":xla_cluster_util",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:ops",
+        "//tensorflow/cc:scope_internal",
         "//tensorflow/compiler/jit/graphcycles",
         "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags",
         "//tensorflow/compiler/jit/ops:xla_ops",
         "//tensorflow/compiler/tf2xla:dump_graph",
         "//tensorflow/compiler/tf2xla:resource_operation_table",
         "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:core_cpu",
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
index 9e3fd93cda..5974696b77 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc
@@ -14,8 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/jit/build_xla_ops_pass.h"
+#include "absl/algorithm/container.h"
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope_internal.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/common_runtime/function.h"
@@ -31,132 +35,108 @@ limitations under the License.
 #include "tensorflow/core/public/version.h"
 
 namespace tensorflow {
-
-static Status BuildXlaCompileNode(
-    const string& nodename, const string& function_name,
-    const AttrValueMap& function_attr, const string& device_name,
-    const DataTypeVector& constant_dtypes, int num_resources,
-    const DataTypeVector& arg_dtypes, Graph* graph, Node** node) {
-  NodeDef def;
-  def.set_name(graph->NewName(nodename));
-  def.set_op("_XlaCompile");
-  def.set_device(device_name);
-  AddNodeAttr("Tconstants", constant_dtypes, &def);
-  AddNodeAttr("Targs", arg_dtypes, &def);
-  AddNodeAttr("Nresources", num_resources, &def);
-  NameAttrList function;
-  function.set_name(function_name);
-  *function.mutable_attr() = function_attr;
-  AddNodeAttr("function", function, &def);
-
-  Status status;
-  *node = graph->AddNode(def, &status);
-  return status;
+namespace {
+void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
+  std::vector<const Edge*> out_edges(old_node->out_edges().begin(),
+                                     old_node->out_edges().end());
+  for (const Edge* edge : out_edges) {
+    // TODO(sanjoy): This does not update NodeDef inputs.  To be able to update
+    // NodeDef inputs we first need to fix encapsulate_subgraphs_pass to fix up
+    // the NodeDef inputs to the function call nodes.
+    g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input());
+    g->RemoveEdge(edge);
+  }
 }
 
-static Status BuildXlaRunNode(const string& nodename, const string& device_name,
-                              const DataTypeVector& arg_dtypes,
-                              const DataTypeVector& result_dtypes, Graph* graph,
-                              Node** node) {
-  NodeDef def;
-  def.set_name(graph->NewName(nodename));
-  def.set_op("_XlaRun");
-  def.set_device(device_name);
-  AddNodeAttr("Targs", arg_dtypes, &def);
-  AddNodeAttr("Tresults", result_dtypes, &def);
+struct XlaClusterInfo {
+  std::vector<Output> constant_inputs;
+  std::vector<Output> non_constant_inputs;
+  std::vector<Output> resource_inputs;
+  NameAttrList function;
+};
 
-  Status status;
-  *node = graph->AddNode(def, &status);
-  return status;
+Output IncomingEdgeAsOutput(const Edge* e) {
+  return Output(e->src(), e->src_output());
 }
 
-static Status GetXlaAttrs(Node* node, int* num_constant_args,
-                          int* num_resource_args, DataTypeVector* const_dtypes,
-                          DataTypeVector* arg_dtypes) {
+Status GetXlaClusterInfo(Node* n, XlaClusterInfo* result) {
+  int num_constant_inputs, num_resource_inputs;
   TF_RETURN_IF_ERROR(
-      GetNodeAttr(node->attrs(), kXlaNumConstantArgsAttr, num_constant_args));
+      GetNodeAttr(n->attrs(), kXlaNumConstantArgsAttr, &num_constant_inputs));
   TF_RETURN_IF_ERROR(
-      GetNodeAttr(node->attrs(), kXlaNumResourceArgsAttr, num_resource_args));
+      GetNodeAttr(n->attrs(), kXlaNumResourceArgsAttr, &num_resource_inputs));
 
-  if (*num_constant_args < 0 || *num_resource_args < 0 ||
-      *num_constant_args + *num_resource_args > node->num_inputs()) {
+  if (num_constant_inputs < 0 || num_resource_inputs < 0 ||
+      num_constant_inputs + num_resource_inputs > n->num_inputs()) {
     return errors::InvalidArgument(
         "Invalid number of constant/resource arguments to XLA kernel.");
   }
 
-  const int num_nonconst_args =
-      node->num_inputs() - *num_constant_args - *num_resource_args;
-
-  const DataTypeVector& input_types = node->input_types();
-  std::copy(input_types.begin(), input_types.begin() + *num_constant_args,
-            std::back_inserter(*const_dtypes));
-  std::copy(input_types.begin() + *num_constant_args,
-            input_types.begin() + *num_constant_args + num_nonconst_args,
-            std::back_inserter(*arg_dtypes));
-  return Status::OK();
-}
-
-static void CopyIncomingEdges(Graph* g, Node* old_node, Node* new_node,
-                              int prefix_to_ignore) {
-  for (const Edge* edge : old_node->in_edges()) {
-    if (edge->IsControlEdge()) {
-      g->AddControlEdge(edge->src(), new_node);
-    } else if (edge->dst_input() >= prefix_to_ignore) {
-      g->AddEdge(edge->src(), edge->src_output(), new_node,
-                 edge->dst_input() - prefix_to_ignore);
-    }
-  }
-}
+  int num_non_constant_inputs =
+      n->num_inputs() - num_constant_inputs - num_resource_inputs;
 
-static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
-  std::vector<const Edge*> out_edges(old_node->out_edges().begin(),
-                                     old_node->out_edges().end());
-  for (const Edge* edge : out_edges) {
-    // TODO(sanjoy): This does not update NodeDef inputs.
-    g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input());
-    g->RemoveEdge(edge);
-  }
-}
+  std::vector<const Edge*> input_edges_vector;
+  TF_RETURN_IF_ERROR(n->input_edges(&input_edges_vector));
+  absl::Span<const Edge*> input_edges(input_edges_vector);
 
-static Status ReplaceNodeWithXlaCompileAndRun(Graph* g, Node* n) {
-  int num_constant_args, num_resource_args;
-  DataTypeVector const_dtypes;
-  DataTypeVector arg_dtypes;
+  absl::c_transform(input_edges.subspan(0, num_constant_inputs),
+                    std::back_inserter(result->constant_inputs),
+                    IncomingEdgeAsOutput);
 
-  TF_RETURN_IF_ERROR(GetXlaAttrs(n, &num_constant_args, &num_resource_args,
-                                 &const_dtypes, &arg_dtypes));
+  absl::c_transform(
+      input_edges.subspan(num_constant_inputs, num_non_constant_inputs),
+      std::back_inserter(result->non_constant_inputs), IncomingEdgeAsOutput);
 
-  Node *compile_node, *run_node;
+  absl::c_transform(
+      input_edges.subspan(num_constant_inputs + num_non_constant_inputs,
+                          num_resource_inputs),
+      std::back_inserter(result->resource_inputs), IncomingEdgeAsOutput);
 
-  TF_RETURN_IF_ERROR(BuildXlaCompileNode(
-      n->name(), n->type_string(), n->def().attr(), n->requested_device(),
-      const_dtypes, num_resource_args, arg_dtypes, g, &compile_node));
+  result->function.set_name(n->type_string());
+  *result->function.mutable_attr() = n->def().attr();
+  return Status::OK();
+}
 
-  DataTypeVector arg_dtypes_with_resources = arg_dtypes;
-  for (int i = 0; i < num_resource_args; i++) {
-    arg_dtypes_with_resources.push_back(DT_RESOURCE);
+Status CopyIncomingControlEdges(Graph* g, Node* from, Node* to) {
+  for (const Edge* e : from->in_edges()) {
+    if (e->IsControlEdge()) {
+      g->AddControlEdge(e->src(), to);
+    }
   }
 
-  TF_RETURN_IF_ERROR(BuildXlaRunNode(n->name(), n->requested_device(),
-                                     arg_dtypes_with_resources,
-                                     n->output_types(), g, &run_node));
-
-  compile_node->set_assigned_device_name(n->assigned_device_name());
-  run_node->set_assigned_device_name(n->assigned_device_name());
+  return Status::OK();
+}
 
-  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/compile_node,
-                    /*prefix_to_ignore=*/0);
-  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/run_node,
-                    /*prefix_to_ignore=*/num_constant_args);
+Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) {
+  Status status;
+  Scope root = NewInternalScope(g, &status, /*refiner=*/nullptr)
+                   .NewSubScope(n->name())
+                   .WithDevice(n->requested_device())
+                   .WithAssignedDevice(n->assigned_device_name());
+
+  XlaClusterInfo cluster_info;
+  TF_RETURN_IF_ERROR(GetXlaClusterInfo(n, &cluster_info));
+
+  ops::_XlaCompile xla_compile(root.WithOpName("xla_compile"),
+                               /*constants=*/cluster_info.constant_inputs,
+                               /*args=*/cluster_info.non_constant_inputs,
+                               /*resources=*/cluster_info.resource_inputs,
+                               cluster_info.function);
+  TF_RETURN_IF_ERROR(
+      CopyIncomingControlEdges(g, /*from=*/n, /*to=*/xla_compile.key.node()));
 
-  // The compilation_key output.
-  g->AddEdge(compile_node, 0, run_node, n->num_inputs() - num_constant_args);
+  std::vector<Output> xla_run_args = cluster_info.non_constant_inputs;
+  absl::c_copy(cluster_info.resource_inputs, std::back_inserter(xla_run_args));
+  ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args,
+                       xla_compile.key, n->output_types());
 
-  MoveOutgoingEdges(g, /*old_node=*/n, /*new_node=*/run_node);
+  MoveOutgoingEdges(g, /*old_node=*/n,
+                    /*new_node=*/xla_run.operation.node());
   g->RemoveNode(n);
 
   return Status::OK();
 }
+}  // namespace
 
 Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) {
   Graph* graph = options.graph->get();
@@ -170,7 +150,7 @@ Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) {
     // Only compile nodes that are marked for compilation by the
     // compilation-marking pass (via 'attr_name').
     if (IsXlaCompiledKernel(*n)) {
-      TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndRun(graph, n));
+      TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndXlaRun(graph, n));
     }
   }
 
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
index b7cb4506b9..9d56db7b6b 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
@@ -56,18 +56,26 @@ Status BuildXlaOps(const Scope& s, std::unique_ptr<Graph>* result) {
 }
 
 Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name,
-                             const string& node_name, Node** result) {
+                             const string& node_name, int num_constant_args,
+                             int num_resource_args, Node** result) {
   NodeDef call_node;
   call_node.set_name(node_name);
   call_node.set_op(callee_name);
   AddNodeAttr(kXlaCompiledKernelAttr, true, &call_node);
-  AddNodeAttr(kXlaNumConstantArgsAttr, 0, &call_node);
-  AddNodeAttr(kXlaNumResourceArgsAttr, 0, &call_node);
+  AddNodeAttr(kXlaNumConstantArgsAttr, num_constant_args, &call_node);
+  AddNodeAttr(kXlaNumResourceArgsAttr, num_resource_args, &call_node);
   Status s;
   *result = graph->AddNode(call_node, &s);
   return s;
 }
 
+Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name,
+                             const string& node_name, Node** result) {
+  return MakeXlaCompiledKernel(graph, callee_name, node_name,
+                               /*num_constant_args=*/0, /*num_resource_args=*/0,
+                               result);
+}
+
 Node* MakeWrite(const Scope& scope, const string& id) {
   Output var_handle =
       ops::VarHandleOp(scope.WithOpName("Var" + id), DT_FLOAT, TensorShape({}));
@@ -108,5 +116,23 @@ TEST(BuildXlaOps, ControlDepsPreserved) {
   EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun")))));
 }
 
+TEST(BuildXlaOps, CleanFailureOnBogusAttr) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  FunctionDefLibrary flib_def =
+      CreateFunctionDefLibWithConstFunction("cluster_0");
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+  Node* call;
+  TF_ASSERT_OK(
+      MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", 100, 100, &call));
+  Node* write_op = MakeWrite(root, "write");
+  root.graph()->AddControlEdge(call, write_op);
+
+  std::unique_ptr<Graph> graph;
+  Status failure_status = BuildXlaOps(root, &graph);
+  ASSERT_FALSE(failure_status.ok());
+  EXPECT_EQ(failure_status.code(), error::INVALID_ARGUMENT);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
index 479038ac8e..22531a4ace 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
@@ -19,7 +19,7 @@ limitations under the License.
 #include "tensorflow/cc/ops/resource_variable_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
-#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_op.h"
+#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h"
 #include "tensorflow/compiler/tf2xla/test_util.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/graph/graph_constructor.h"
diff --git a/tensorflow/compiler/tf2xla/cc/BUILD b/tensorflow/compiler/tf2xla/cc/BUILD
index ea8d1b3d14..adcdb6c8f7 100644
--- a/tensorflow/compiler/tf2xla/cc/BUILD
+++ b/tensorflow/compiler/tf2xla/cc/BUILD
@@ -30,14 +30,15 @@ cc_library(
 
 tf_gen_op_wrapper_cc(
     name = "xla_jit_op_gen",
-    out_ops_file = "ops/xla_jit_op",
+    include_internal_ops = 1,
+    out_ops_file = "ops/xla_jit_ops",
     deps = ["//tensorflow/compiler/jit/ops:xla_ops"],
 )
 
 cc_library(
     name = "xla_jit_ops",
-    srcs = ["ops/xla_jit_op.cc"],
-    hdrs = ["ops/xla_jit_op.h"],
+    srcs = ["ops/xla_jit_ops.cc"],
+    hdrs = ["ops/xla_jit_ops.h"],
     deps = [
         "//tensorflow/cc:const_op",
         "//tensorflow/cc:ops",
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index a446e0d136..d92874909f 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -99,6 +99,11 @@ NodeBuilder& NodeBuilder::Device(StringPiece device_spec) {
   return *this;
 }
 
+NodeBuilder& NodeBuilder::AssignedDevice(StringPiece device) {
+  assigned_device_ = string(device);
+  return *this;
+}
+
 Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const {
   // In case of error, set *created_node to nullptr.
   if (created_node != nullptr) *created_node = nullptr;
@@ -115,6 +120,8 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const {
   Node* node = graph->AddNode(node_def, &status);
   if (!status.ok()) return status;
 
+  node->set_assigned_device_name(assigned_device_);
+
   for (size_t i = 0; i < inputs_.size(); ++i) {
     if (inputs_[i].node != nullptr) {  // Skip back edges.
       graph->AddEdge(inputs_[i].node, inputs_[i].index, node, i);
diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h
index 4727ee7b56..d576985a23 100644
--- a/tensorflow/core/graph/node_builder.h
+++ b/tensorflow/core/graph/node_builder.h
@@ -100,6 +100,9 @@ class NodeBuilder {
   // "assigned device" in the Node).
   NodeBuilder& Device(StringPiece device_spec);
 
+  // Sets the device name in the "assigned device" field in tensorflow::Node.
+  NodeBuilder& AssignedDevice(StringPiece device);
+
   // Set the value of an attr.  attr_name must match the name of one of
   // attrs defined by the Op, and value must have the corresponding type
   // (see SetAttrValue() in ../framework/attr_value_util.h for legal
@@ -141,6 +144,7 @@ class NodeBuilder {
   std::vector<NodeOut> inputs_;
   std::vector<Node*> control_inputs_;
   std::vector<string> errors_;
+  string assigned_device_;
 };
 
 // IMPLEMENTATION -------------------------------------------------------------
-- 
GitLab


From 28a5ce4cf8702a6605e13a99c861ec6f2cd75929 Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Mon, 1 Oct 2018 15:47:52 -0700
Subject: [PATCH 0977/1357]   Improve error message in transpose shape
 inference.

PiperOrigin-RevId: 215294817
---
 tensorflow/compiler/xla/service/shape_inference.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 7194b2cafd..6ccea9d2b5 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -2380,7 +2380,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
       !std::is_permutation(dimensions.begin(), dimensions.end(),
                            indices.begin())) {
     return InvalidArgument(
-        "Transpose dimensions not a permutation of the operand dimensions.");
+        "Transpose dimensions [%s] are not a permutation of the operand "
+        "dimensions (operand shape is %s).",
+        StrJoin(dimensions, ","), ShapeUtil::HumanString(operand));
   }
 
   // Permute(dimensions,input) computes output[dimensions[i]]=input[i]. However,
-- 
GitLab


From 6509437545f8fc973b39489c285811ea8cc8b15a Mon Sep 17 00:00:00 2001
From: Zhenyu Tan <tanzheny@google.com>
Date: Mon, 1 Oct 2018 15:52:16 -0700
Subject: [PATCH 0978/1357] If keras_model_path is google storage url, provide
 util to download model remotely.

PiperOrigin-RevId: 215295504
---
 tensorflow/python/estimator/keras.py      | 48 ++++++++++++++++++++---
 tensorflow/python/estimator/keras_test.py |  6 ---
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index 7546771ed3..5d5ed81fbb 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -368,6 +368,44 @@ def _save_first_checkpoint(keras_model, custom_objects, config):
   return latest_path
 
 
+def _get_file_from_google_storage(keras_model_path, model_dir):
+  """Get file from google storage and download to local file.
+
+  Args:
+    keras_model_path: a google storage path for compiled keras model.
+    model_dir: the directory from estimator config.
+
+  Returns:
+    The path where keras model is saved.
+
+  Raises:
+    ValueError: if storage object name does not end with .h5.
+  """
+  try:
+    from google.cloud import storage  # pylint:disable=g-import-not-at-top
+  except ImportError:
+    raise TypeError('Could not save model to Google cloud storage; please '
+                    'install `google-cloud-storage` via '
+                    '`pip install google-cloud-storage`.')
+  storage_client = storage.Client()
+  path, blob_name = os.path.split(keras_model_path)
+  _, bucket_name = os.path.split(path)
+  keras_model_dir = os.path.join(model_dir, 'keras')
+  if not gfile.Exists(keras_model_dir):
+    gfile.MakeDirs(keras_model_dir)
+  file_name = os.path.join(keras_model_dir, 'keras_model.h5')
+  try:
+    blob = storage_client.get_bucket(bucket_name).blob(blob_name)
+    blob.download_to_filename(file_name)
+  except:
+    raise ValueError('Failed to download keras model, please check '
+                     'environment variable GOOGLE_APPLICATION_CREDENTIALS '
+                     'and model path storage.googleapis.com/{bucket}/{object}.')
+  logging.info('Saving model to {}'.format(file_name))
+  del storage_client
+  return file_name
+
+
 def model_to_estimator(keras_model=None,
                        keras_model_path=None,
                        custom_objects=None,
@@ -407,12 +445,13 @@ def model_to_estimator(keras_model=None,
         'Please specity either `keras_model` or `keras_model_path`, '
         'but not both.')
 
+  config = estimator_lib.maybe_overwrite_model_dir_and_session_config(
+      config, model_dir)
   if not keras_model:
     if keras_model_path.startswith(
         'gs://') or 'storage.googleapis.com' in keras_model_path:
-      raise ValueError(
-          '%s is not a local path. Please copy the model locally first.' %
-          keras_model_path)
+      keras_model_path = _get_file_from_google_storage(keras_model_path,
+                                                       config.model_dir)
     logging.info('Loading models from %s', keras_model_path)
     keras_model = models.load_model(keras_model_path)
   else:
@@ -425,9 +464,6 @@ def model_to_estimator(keras_model=None,
         'Please compile the model with `model.compile()` '
         'before calling `model_to_estimator()`.')
 
-  config = estimator_lib.maybe_overwrite_model_dir_and_session_config(config,
-                                                                      model_dir)
-
   keras_model_fn = _create_keras_model_fn(keras_model, custom_objects)
   if _any_weight_initialized(keras_model):
     # Warn if config passed to estimator tries to update GPUOptions. If a
diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py
index 288f9b8906..4e285fa25a 100644
--- a/tensorflow/python/estimator/keras_test.py
+++ b/tensorflow/python/estimator/keras_test.py
@@ -581,12 +581,6 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(ValueError, 'compiled'):
         keras_lib.model_to_estimator(keras_model=keras_model)
 
-    with self.cached_session():
-      keras_model = simple_sequential_model()
-      with self.assertRaisesRegexp(ValueError, 'not a local path'):
-        keras_lib.model_to_estimator(
-            keras_model_path='gs://bucket/object')
-
   def test_invalid_ionames_error(self):
     (x_train, y_train), (_, _) = testing_utils.get_test_data(
         train_samples=_TRAIN_SIZE,
-- 
GitLab


From 8559bc2c4c7616c5da8b4f7a3e1405c549a6068d Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Mon, 1 Oct 2018 15:58:21 -0700
Subject: [PATCH 0979/1357] Add email comment explicitly authorizing
 distributions/special_math.py be released under Apache 2.0.

PiperOrigin-RevId: 215296386
---
 .../python/ops/distributions/special_math.py  | 61 ++++++++++++++++++-
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py
index 31b7a36fd3..ccc667cae3 100644
--- a/tensorflow/python/ops/distributions/special_math.py
+++ b/tensorflow/python/ops/distributions/special_math.py
@@ -12,6 +12,62 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+
+# Functions "ndtr" and "ndtri" are derived from calculations made in:
+# https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
+# In the following email exchange, the author gives his consent to redistribute
+# derived works under an Apache 2.0 license.
+#
+# From: Stephen Moshier <steve@moshier.net>
+# Date: Sat, Jun 9, 2018 at 2:36 PM
+# Subject: Re: Licensing cephes under Apache (BSD-like) license.
+# To: rif <rif@google.com>
+#
+#
+#
+# Hello Rif,
+#
+# Yes, Google may distribute Cephes files under the Apache 2 license.
+#
+# If clarification is needed, I do not favor BSD over other free licenses.
+# I would agree that Apache 2 seems to cover the concern you mentioned
+# about sublicensees.
+#
+# Best wishes for good luck with your projects!
+# Steve Moshier
+#
+#
+#
+# On Thu, 31 May 2018, rif wrote:
+#
+# > Hello Steve.
+# > My name is Rif. I work on machine learning software at Google.
+# >
+# > Your cephes software continues to be incredibly useful and widely used. I
+# > was wondering whether it would be permissible for us to use the Cephes code
+# > under the Apache 2.0 license, which is extremely similar in permissions to
+# > the BSD license (Wikipedia comparisons). This would be quite helpful to us
+# > in terms of avoiding multiple licenses on software.
+# >
+# > I'm sorry to bother you with this (I can imagine you're sick of hearing
+# > about this by now), but I want to be absolutely clear we're on the level and
+# > not misusing your important software. In former conversation with Eugene
+# > Brevdo (ebrevdo@google.com), you wrote "If your licensing is similar to BSD,
+# > the formal way that has been handled is simply to add a statement to the
+# > effect that you are incorporating the Cephes software by permission of the
+# > author." I wanted to confirm that (a) we could use the Apache license, (b)
+# > that we don't need to (and probably you don't want to) keep getting
+# > contacted about individual uses, because your intent is generally to allow
+# > this software to be reused under "BSD-like" license, and (c) you're OK
+# > letting incorporators decide whether a license is sufficiently BSD-like?
+# >
+# > Best,
+# >
+# > rif
+# >
+# >
+# >
+
 """Special Math Ops."""
 
 from __future__ import absolute_import
@@ -135,7 +191,7 @@ def _ndtri(p):
 
   # Constants used in piece-wise rational approximations. Taken from the cephes
   # library:
-  # https://github.com/scipy/scipy/blob/master/scipy/special/cephes/ndtri.c
+  # https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
   p0 = list(reversed([-5.99633501014107895267E1,
                       9.80010754185999661536E1,
                       -5.66762857469070293439E1,
@@ -305,7 +361,8 @@ def log_ndtr(x, series_order=3, name="log_ndtr"):
     else:
       raise TypeError("x.dtype=%s is not supported." % x.dtype)
 
-    # The basic idea here was ported from py/scipy/special/cephes/ndtr.c.
+    # The basic idea here was ported from:
+    #   https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
     # We copy the main idea, with a few changes
     # * For x >> 1, and X ~ Normal(0, 1),
     #     Log[P[X < x]] = Log[1 - P[X < -x]] approx -P[X < -x],
-- 
GitLab


From 55f561e6740d61b3665594babce4be72ad955bc6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 16:07:09 -0700
Subject: [PATCH 0980/1357] Small tweaks to comments and documentation strings.

PiperOrigin-RevId: 215297961
---
 tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index 5c27d59f82..ef2f8dd36d 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -46,7 +46,7 @@ namespace tensorflow {
 // 5. TPUEmbeddingActivations, when used with appropriate Python libraries,
 //    enables the automatic differentiation of models that use embeddings.
 // 6. TPUEmbeddingSendGradients takes a list of Tensors (of the same shapes
-//    as those returned by TPUEmbeddingReceivActivations) containing gradients
+//    as those returned by TPUEmbeddingReceiveActivations) containing gradients
 //    to use in updating the embedding tables.
 // 7. Before saving a checkpoint, use the TPUEmbeddingRetrieve Op to update
 //    the Graph's embedding table Variables from the updated tables in the
@@ -147,7 +147,7 @@ parameters that are loaded from a checkpoint before a training loop is
 executed.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto (overrides table_id).
+  TPUEmbeddingConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
 table_id: Index of this table in the EmbeddingLayerConfiguration proto
@@ -283,7 +283,7 @@ the correct embedding table configuration. For example, this op is
 used to retrieve updated parameters before saving a checkpoint.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto (overrides table_id).
+  TPUEmbeddingConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
 table_id: Index of this table in the EmbeddingLayerConfiguration proto
-- 
GitLab


From 24333d8e55bdd995089e93122750340bf8d1ddba Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 1 Oct 2018 16:09:45 -0700
Subject: [PATCH 0981/1357] [TF/XLA] Optimize
 `Encapsulator::GetFunctionNameAttr()`.

The previous version was hitting a very slow path in `GetNodeAttr()`, which is expensive when the named attr is not found. This change inlines the logic of finding the two relevant attrs inside `GetFunctionNameAttr()` and avoids constructing a status object with a serialized `NodeDef` when the attr can't be found.

PiperOrigin-RevId: 215298411
---
 .../jit/encapsulate_subgraphs_pass.cc         | 43 ++++++++++---------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index 15faf31077..d165341f21 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -1363,28 +1363,31 @@ void Encapsulator::Subgraph::GetOutsideCompilationSubgraphNames(
 
 Status Encapsulator::GetFunctionNameAttr(
     Node const* node, string* attr, string* outside_compilation_attr) const {
-  Status s = GetNodeAttr(node->attrs(), group_attribute_, attr);
-  if (s.code() == error::Code::NOT_FOUND) {
-    // Return empty attr if there's no group_attribute.
-    attr->clear();
-  } else {
-    TF_RETURN_IF_ERROR(s);
-  }
-  bool has_group_attr = s.ok();
-  s = GetNodeAttr(node->attrs(), outside_compilation_attribute_,
-                  outside_compilation_attr);
-  if (s.code() == error::Code::NOT_FOUND) {
-    // Return empty attr if there's no outside_compilation attribute.
-    outside_compilation_attr->clear();
-  } else {
-    TF_RETURN_IF_ERROR(s);
-    if (!has_group_attr) {
-      return errors::InvalidArgument(
-          "Node ", node->name(), " has ", outside_compilation_attribute_,
-          " attribute but no ", group_attribute_, " attribute.");
+  AttrSlice attrs = node->attrs();
+  attr->clear();
+  outside_compilation_attr->clear();
+  bool found_group_attribute = false;
+  bool found_outside_compilation_attribute = false;
+  for (const auto& node_attr : attrs) {
+    if (node_attr.first == group_attribute_) {
+      TF_RETURN_IF_ERROR(AttrValueHasType(node_attr.second, "string"));
+      *attr = node_attr.second.s();
+      found_group_attribute = true;
+    } else if (node_attr.first == outside_compilation_attribute_) {
+      TF_RETURN_IF_ERROR(AttrValueHasType(node_attr.second, "string"));
+      *outside_compilation_attr = node_attr.second.s();
+      found_outside_compilation_attribute = true;
     }
+    if (found_group_attribute && found_outside_compilation_attribute) break;
+  }
+
+  if (found_outside_compilation_attribute && !found_group_attribute) {
+    return errors::InvalidArgument(
+        "Node ", node->name(), " has ", outside_compilation_attribute_,
+        " attribute but no ", group_attribute_, " attribute.");
+  } else {
+    return Status::OK();
   }
-  return Status::OK();
 }
 
 bool IsInSubgraph(const string& func_id, const string& outside_compilation_id) {
-- 
GitLab


From 49bbfec04b729960999ef054e3acab719631b101 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 16:16:43 -0700
Subject: [PATCH 0982/1357] Override implementation of log survival for
 Exponential distribution to better handle small values.

PiperOrigin-RevId: 215299532
---
 .../distributions/exponential_test.py            | 16 ++++++++++++++++
 .../python/ops/distributions/exponential.py      |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/tensorflow/python/kernel_tests/distributions/exponential_test.py b/tensorflow/python/kernel_tests/distributions/exponential_test.py
index 27d1291912..367f8bb0f1 100644
--- a/tensorflow/python/kernel_tests/distributions/exponential_test.py
+++ b/tensorflow/python/kernel_tests/distributions/exponential_test.py
@@ -81,6 +81,22 @@ class ExponentialTest(test.TestCase):
     expected_cdf = stats.expon.cdf(x, scale=1 / lam_v)
     self.assertAllClose(self.evaluate(cdf), expected_cdf)
 
+  def testExponentialLogSurvival(self):
+    batch_size = 7
+    lam = constant_op.constant([2.0] * batch_size)
+    lam_v = 2.0
+    x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0, 10.0], dtype=np.float32)
+
+    exponential = exponential_lib.Exponential(rate=lam)
+
+    log_survival = exponential.log_survival_function(x)
+    self.assertEqual(log_survival.get_shape(), (7,))
+
+    if not stats:
+      return
+    expected_log_survival = stats.expon.logsf(x, scale=1 / lam_v)
+    self.assertAllClose(self.evaluate(log_survival), expected_log_survival)
+
   def testExponentialMean(self):
     lam_v = np.array([1.0, 4.0, 2.5])
     exponential = exponential_lib.Exponential(rate=lam_v)
diff --git a/tensorflow/python/ops/distributions/exponential.py b/tensorflow/python/ops/distributions/exponential.py
index 4325a14449..02129b5e2a 100644
--- a/tensorflow/python/ops/distributions/exponential.py
+++ b/tensorflow/python/ops/distributions/exponential.py
@@ -114,6 +114,9 @@ class Exponential(gamma.Gamma):
   def rate(self):
     return self._rate
 
+  def _log_survival_function(self, value):
+    return self._log_prob(value) - math_ops.log(self._rate)
+
   def _sample_n(self, n, seed=None):
     shape = array_ops.concat([[n], array_ops.shape(self._rate)], 0)
     # Uniform variates must be sampled from the open-interval `(0, 1)` rather
-- 
GitLab


From bb1f9e1a57c8bc18325b3c86298be96e6647a0a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 16:31:13 -0700
Subject: [PATCH 0983/1357] Change semantics of DistributionStrategy.update()
 to make sure the output depends on the updates across all mirrors. Before
 this change, update() would return a Mirrored value that where each component
 was an update to a single mirror. This caused a problem since for reading
 purposes other DistributionStrategy methods would consider it okay to read
 any single component, and so if you for example did something like
 session.run(strategy.update(...)) it would only perform the update on one
 replica. The fix is to have the output be a Mirrored value that is actually
 the identity operation returning the output on that device, but that has a
 control dependency making sure that the update actually happens on all the
 replicas. This fix was already present in MirroredVariable._assign_func, this
 CL moves the fix into update() and generalizes it to multiple return values.

To disable this new grouping behavior, you may now pass
"grouped=False" to update(). For example, some callers (like Optimizer)
are performing a lot of updates and they prefer to group all of them
together at once for performance reasons.  In this case, we still want
to make sure the caller executes the update on all replicas, so we
return an unwrapped value instead of a Mirrored value. This has the
happy side effect of removing a bunch of unwrap calls in client code,
since unwrapping was the only safe way to use the Mirrored value we
used to return.

PiperOrigin-RevId: 215301909
---
 .../collective_all_reduce_strategy_test.py    |  3 +-
 .../distribute/python/mirrored_strategy.py    | 12 +++--
 .../python/mirrored_strategy_multigpu_test.py |  2 +-
 .../distribute/python/one_device_strategy.py  | 17 +++++--
 .../python/parameter_server_strategy.py       | 22 ++++++--
 .../python/parameter_server_strategy_test.py  |  3 +-
 .../distribute/python/strategy_test_lib.py    |  6 ++-
 .../contrib/distribute/python/tpu_strategy.py | 36 ++++++++-----
 .../contrib/distribute/python/values.py       | 36 ++++++++-----
 .../contrib/optimizer_v2/optimizer_v2.py      | 32 +++++-------
 tensorflow/python/training/distribute.py      | 51 +++++++++++--------
 .../training/distribution_strategy_context.py |  2 +
 tensorflow/python/training/optimizer.py       | 10 ++--
 13 files changed, 144 insertions(+), 88 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
index 33ffbf6abe..6796a23d46 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
@@ -128,7 +128,8 @@ class CollectiveAllReduceStrategyTestBase(
             # TODO(yuefengz): support non-Mirrored variable as destinations.
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(
+                d.update(v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 4d7516063c..6bd380a22d 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -627,9 +627,11 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
     return self._get_cross_tower_ops().batch_reduce(aggregation,
                                                     value_destination_pairs)
 
-  def _update(self, var, fn, *args, **kwargs):
+  def _update(self, var, options, fn, *args, **kwargs):
     # TODO(josh11b): In eager mode, use one thread per device.
     assert isinstance(var, values.DistributedVariable)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     updates = {}
     for d, v in var._index.items():  # pylint: disable=protected-access
       name = "update_%d" % self._device_index.get(d)
@@ -638,10 +640,12 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
         updates[d] = fn(v,
                         *values.select_device_mirrored(d, args),
                         **values.select_device_mirrored(d, kwargs))
-    return values.regroup(updates, values.Mirrored)
+    return values.update_regroup(self, updates, should_group)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
     assert isinstance(colocate_with, list)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     # TODO(josh11b): In eager mode, use one thread per device.
     updates = {}
     for d in colocate_with:
@@ -649,7 +653,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
       with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name):
         updates[d] = fn(*values.select_device_mirrored(d, args),
                         **values.select_device_mirrored(d, kwargs))
-    return values.regroup(updates, values.Mirrored)
+    return values.update_regroup(self, updates, should_group)
 
   def read_var(self, tower_local_var):
     """Read the aggregate value of a tower-local variable."""
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index f51e543624..eeac528329 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -826,7 +826,7 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
 
       with dist.scope():
         ret_v_sum = dist.call_for_each_tower(model_fn, run_concurrently=False)
-        update_ops = dist.unwrap(dist.update(ret_v_sum, update, 5.0))
+        update_ops = dist.update(ret_v_sum, update, 5.0, grouped=False)
 
         # Initialize variables.
         self.evaluate(variables.global_variables_initializer())
diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py
index 23b220f64b..f525919048 100644
--- a/tensorflow/contrib/distribute/python/one_device_strategy.py
+++ b/tensorflow/contrib/distribute/python/one_device_strategy.py
@@ -141,14 +141,21 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy):
       else:
         assert False
 
-  def _update(self, var, fn, *args, **kwargs):
-    with ops.device(self._device), distribute_lib.UpdateContext(self._device):
-      return fn(var, *args, **kwargs)
+  def _update(self, var, options, fn, *args, **kwargs):
+    # The implementations of _update() and _update_non_slot() are identical
+    # except _update() passes `var` as the first argument to `fn()`.
+    return self._update_non_slot(var, options, fn, var, *args, **kwargs)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
     del colocate_with
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     with ops.device(self._device), distribute_lib.UpdateContext(self._device):
-      return fn(*args, **kwargs)
+      result = fn(*args, **kwargs)
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   def read_var(self, tower_local_var):
     """Read the aggregate value of a tower-local variable."""
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
index 1125d027f6..6ddd91507b 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
@@ -343,21 +343,33 @@ class ParameterServerStrategy(distribute_lib.DistributionStrategy):
 
     return nest.map_structure(_select_fn, structured)
 
-  def _update(self, var, fn, *args, **kwargs):
+  def _update(self, var, options, fn, *args, **kwargs):
     if isinstance(var, values.AggregatingVariable):
       var = var.get()
     if not isinstance(var, resource_variable_ops.ResourceVariable):
       raise ValueError(
           "You can not update `var` %r. It must be a Variable." % var)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     with ops.colocate_with(var), distribute_lib.UpdateContext(var.device):
-      return fn(var, *self._select_single_value(args),
-                **self._select_single_value(kwargs))
+      result = fn(var, *self._select_single_value(args),
+                  **self._select_single_value(kwargs))
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   # TODO(yuefengz): does it need to call _select_single_value?
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     with ops.device(
         colocate_with.device), distribute_lib.UpdateContext(colocate_with):
-      return fn(*args, **kwargs)
+      result = fn(*args, **kwargs)
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   def _unwrap(self, val):
     if isinstance(val, values.DistributedValues):
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index 12789e0bc9..353d11a583 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
@@ -395,7 +395,8 @@ class ParameterServerStrategyTestBase(
             # TODO(yuefengz): support non-Mirrored variable as destinations.
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(
+                d.update(v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
diff --git a/tensorflow/contrib/distribute/python/strategy_test_lib.py b/tensorflow/contrib/distribute/python/strategy_test_lib.py
index 5d498fb629..fd280f5754 100644
--- a/tensorflow/contrib/distribute/python/strategy_test_lib.py
+++ b/tensorflow/contrib/distribute/python/strategy_test_lib.py
@@ -115,7 +115,8 @@ class DistributionTestBase(test.TestCase):
           with ops.control_dependencies([fetched]):
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(d.update(
+                v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
@@ -169,7 +170,8 @@ class DistributionTestBase(test.TestCase):
           with ops.control_dependencies([fetched]):
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(d.update(
+                v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 1b555482d3..c3c7df3cd8 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -297,6 +297,7 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       # For outputs that have already been aggregated, take the first value
       # from the list as each value should be the same. Else return the full
       # list of values.
+      # TODO(josh11b): If aggregation is NONE, we should return a PerDevice value.
       if aggregation is not variables_lib.VariableAggregation.NONE:
         # TODO(priyag): Should this return the element or a list with 1 element
         last_step_tensor_outputs_dict[name] = output[0]
@@ -398,11 +399,16 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       return output * (1. / len(value))
     return output
 
-  def _update(self, var, fn, *args, **kwargs):
-    # TODO(jhseu): Consider supporting grouped==False.
+  def _update(self, var, options, fn, *args, **kwargs):
     assert isinstance(var, values.TPUMirroredVariable)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
+
     if values._enclosing_tpu_context() is not None:  # pylint: disable=protected-access
-      return fn(var, *args, **kwargs)
+      if should_group:
+        return fn(var, *args, **kwargs)
+      else:
+        return [fn(var, *args, **kwargs)]
 
     # Otherwise, we revert to MirroredStrategy behavior and update each variable
     # directly.
@@ -414,23 +420,25 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
         updates[d] = fn(v,
                         *values.select_device_mirrored(d, args),
                         **values.select_device_mirrored(d, kwargs))
+    return values.update_regroup(self, updates, should_group)
 
-    # Make a single control dependency to keep the variables mirrored. If one
-    # assignment is fetched, then run all assignments.
-    sorted_keys = sorted(updates.keys())
-    update_tuple = control_flow_ops.tuple([updates[d] for d in sorted_keys])
-    for i, d in enumerate(sorted_keys):
-      updates[d] = update_tuple[i]
-    return values.regroup(updates, values.Mirrored)
+  # TODO(josh11b): Need to implement _update_non_slot()!
 
   def read_var(self, var):
     assert isinstance(var, values.TPUMirroredVariable)
     return var.read_value()
 
-  def _unwrap(self, value):
-    if isinstance(value, list):
-      return value
-    return [value]
+  def _unwrap(self, val):
+    if isinstance(val, values.DistributedValues):
+      # Return in a deterministic order.
+      return [val.get(device=d) for d in sorted(val.devices)]
+    elif isinstance(val, list):
+      # TODO(josh11b): We need to remove this case; per device values should
+      # be represented using a PerDevice wrapper instead of a list with
+      # one entry per device.
+      return val
+    return [val]
+
 
   @property
   def num_towers(self):
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index c18faeb67d..18ceba42c2 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -366,18 +366,7 @@ class MirroredVariable(DistributedVariable, Mirrored,
       # We are calling assign on the mirrored variable in cross tower context,
       # use update to update the variable.
       strategy = distribution_strategy_context.get_distribution_strategy()
-      updates = strategy.update(self, f, *args, **kwargs)
-      grouped = strategy.group(updates)
-      if isinstance(updates, DistributedValues) and updates.is_tensor_like:
-        # Make sure we run all updates. Without this, something like
-        # session.run(mirrored_var.assign*(...)) may only update one tower.
-        index = {}
-        for d in updates.devices:
-          with ops.device(d), ops.control_dependencies([grouped]):
-            index[d] = array_ops.identity(updates.get(d))
-        return Mirrored(index)
-      else:
-        return grouped
+      return strategy.update(self, f, *args, **kwargs)
     else:
       _assert_tower_context()
       # We are calling an assign function on the mirrored variable in tower
@@ -1049,6 +1038,29 @@ def select_device_mirrored(device, structured):
   return nest.map_structure(_get_mirrored, structured)
 
 
+def update_regroup(strategy, updates, should_group):
+  """Regroup for an update, with dependencies to ensure all updates execute."""
+  regrouped = regroup(updates, Mirrored)
+  if not should_group:
+    return nest.map_structure(strategy.unwrap, regrouped)
+  grouped_flat = []
+  for u in nest.flatten(regrouped):
+    if isinstance(u, DistributedValues):
+      g = strategy.group(u)
+      if u.is_tensor_like:
+        # Make sure we run all updates. Without this, something like
+        # session.run(strategy.update(...)) may only update one tower.
+        index = {}
+        for d in u.devices:
+          with ops.device(d), ops.control_dependencies([g]):
+            index[d] = array_ops.identity(u.get(d))
+        g = Mirrored(index)
+    else:
+      g = u
+    grouped_flat.append(g)
+  return nest.pack_sequence_as(regrouped, grouped_flat)
+
+
 class PerDeviceDataIterator(object):
   """An iterator (like `tf.data.Iterator`) into a `PerDeviceDataset`."""
 
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 6af59dcfbf..53e27c08c4 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -30,7 +30,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import distribute as distribute_lib
@@ -965,8 +964,7 @@ class OptimizerV2(optimizer_v1.Optimizer):
       # Use the processors to update the variables.
       update_ops = []
       for grad, var in grads_and_vars:
-        update_ops.extend(distribution.unwrap(distribution.update(
-            var, update, grad)))
+        update_ops.extend(distribution.update(var, update, grad, grouped=False))
 
       # Give the child class a chance to do something after applying
       # gradients
@@ -978,26 +976,24 @@ class OptimizerV2(optimizer_v1.Optimizer):
 
       update_ops = control_flow_ops.group(update_ops)
       with ops.control_dependencies([update_ops]):
-        finish_updates = distribution.update_non_slot(non_slot_devices, finish)
-      if finish_updates is None:
-        finish_updates = update_ops
+        finish_updates = distribution.update_non_slot(
+            non_slot_devices, finish, grouped=False)
+      # We said grouped=False, which means finish_updates is always a list.
+      # It will be [None] when finish() returns None.
+      if finish_updates == [None]:
+        finish_updates = [update_ops]
 
       # Update `global_step` (if any).
       if global_step is None:
         apply_updates = distribution.group(finish_updates, name=name)
       else:
-        with ops.control_dependencies(distribution.unwrap(finish_updates)):
-
-          def update_global_step(global_step):
-            if isinstance(global_step, resource_variable_ops.ResourceVariable):
-              return global_step.assign_add(
-                  ops.convert_to_tensor(1, dtype=global_step.dtype),
-                  read_value=False)
-            else:
-              return state_ops.assign_add(global_step, 1)
-
-          apply_updates = distribution.group(
-              distribution.update(global_step, update_global_step), name=name)
+        with ops.control_dependencies(finish_updates):
+
+          def update_global_step(global_step, name):
+            return global_step.assign_add(1, read_value=False, name=name)
+
+          apply_updates = distribution.update(
+              global_step, update_global_step, name)
 
       # Add the training op to the TRAIN_OP graph collection in graph mode.
       if not eager_execution:
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index 419a9ec12b..a92a1bdee7 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -26,7 +26,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.platform import tf_logging
@@ -807,15 +806,22 @@ class DistributionStrategy(object):
       var: Variable, possibly mirrored to multiple devices, to operate on.
       fn: Function to call. Should take the variable as the first argument.
       *args: Additional positional arguments to pass to `fn()`.
-      **kwargs: Keyword arguments to pass to `fn()`.
+      **kwargs: Keyword arguments to pass to `fn()`. If "grouped=False" is
+        specified, the return value will be unwrapped.
 
     Returns:
-      Merged return value of `fn` across all towers.
+      By default, the merged return value of `fn` across all towers.  The merged
+      result has dependencies to make sure that if it is evaluated at all, the
+      side effects (updates) will happen on every tower. If instead
+      "grouped=False" is specified, this function will return a nest of lists
+      where each list has an element per tower, and the caller is responsible
+      for ensuring all elements are executed.
     """
     _require_cross_tower_context(self)
-    return self._update(var, fn, *args, **kwargs)
+    options = {"grouped": kwargs.pop("grouped", True)}
+    return self._update(var, options, fn, *args, **kwargs)
 
-  def _update(self, var, fn, *args, **kwargs):
+  def _update(self, var, options, fn, *args, **kwargs):
     raise NotImplementedError("must be implemented in descendants")
 
   def update_non_slot(self, colocate_with, fn, *args, **kwargs):
@@ -825,15 +831,18 @@ class DistributionStrategy(object):
       colocate_with: The return value of `non_slot_devices()`.
       fn: Function to execute.
       *args: Positional arguments to pass to `fn()`.
-      **kwargs: Keyword arguments to pass to `fn()`.
+      **kwargs: Keyword arguments to pass to `fn()`. If "grouped=False" is
+        specified, the return value will be unwrapped and the caller is
+        responsible for ensuring all elements are executed.
 
     Returns:
       Return value of `fn`, possibly merged across devices.
     """
     _require_cross_tower_context(self)
-    return self._update_non_slot(colocate_with, fn, *args, **kwargs)
+    options = {"grouped": kwargs.pop("grouped", True)}
+    return self._update_non_slot(colocate_with, options, fn, *args, **kwargs)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
     raise NotImplementedError("must be implemented in descendants")
 
   def unwrap(self, value):
@@ -1134,17 +1143,22 @@ class _DefaultDistributionStrategy(DistributionStrategy):
     del aggregation, destinations
     return value
 
-  def _update(self, var, fn, *args, **kwargs):
-    # TODO(josh11b): Figure out what we should be passing to UpdateContext()
-    # once that value is used for something.
-    with ops.colocate_with(var), UpdateContext(var):
-      return fn(var, *args, **kwargs)
+  def _update(self, var, options, fn, *args, **kwargs):
+    # The implementations of _update() and _update_non_slot() are identical
+    # except _update() passes `var` as the first argument to `fn()`.
+    return self._update_non_slot(var, options, fn, var, *args, **kwargs)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     # TODO(josh11b): Figure out what we should be passing to UpdateContext()
     # once that value is used for something.
     with ops.colocate_with(colocate_with), UpdateContext(colocate_with):
-      return fn(*args, **kwargs)
+      result = fn(*args, **kwargs)
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   def read_var(self, tower_local_var):
     return array_ops.identity(tower_local_var)
@@ -1193,13 +1207,10 @@ class _DefaultDistributionStrategy(DistributionStrategy):
 def increment_var(v, amount=1):
   """`v += amount`, distributed-aware version."""
   def update(vu):
-    if isinstance(vu, resource_variable_ops.ResourceVariable):
-      return vu.assign_add(amount, read_value=False)
-    else:
-      return state_ops.assign_add(vu, amount)
+    return vu.assign_add(amount, read_value=False)
 
   def merge_fn(dist, vm):
-    return dist.group(dist.update(vm, update))
+    return dist.update(vm, update)
 
   tower_context = distribution_strategy_context.get_tower_context()
   return tower_context.merge_call(merge_fn, v)
diff --git a/tensorflow/python/training/distribution_strategy_context.py b/tensorflow/python/training/distribution_strategy_context.py
index 998b5c35ce..ce580a406f 100644
--- a/tensorflow/python/training/distribution_strategy_context.py
+++ b/tensorflow/python/training/distribution_strategy_context.py
@@ -89,6 +89,7 @@ def get_tower_context():
   """Returns the current TowerContext or None if in a cross-tower context.
 
   Note that execution:
+
   1. starts in the default (single-tower) tower context (this function
      will return the default TowerContext object);
   2. switches to cross-tower context (in which case this will return
@@ -121,6 +122,7 @@ def get_cross_tower_context():
   """Returns the current DistributionStrategy if in a cross-tower context.
 
   Note that execution:
+
   1. starts in the default (single-tower) tower context;
   2. switches to cross-tower context when entering a
      `with DistributionStrategy.scope():` block;
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 30b0ed20c8..47034919e1 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -692,7 +692,7 @@ class Optimizer(
       update_ops = [
           op
           for grad, var in grads_and_vars
-          for op in distribution.unwrap(distribution.update(var, update, grad))
+          for op in distribution.update(var, update, grad, grouped=False)
       ]
 
       def finish(self, update_ops):
@@ -700,13 +700,13 @@ class Optimizer(
 
       non_slot_devices = distribution.non_slot_devices(var_list)
       finish_updates = distribution.update_non_slot(
-          non_slot_devices, finish, self, update_ops)
+          non_slot_devices, finish, self, update_ops, grouped=False)
       if global_step is None:
         apply_updates = distribution.group(finish_updates, name=name)
       else:
-        with ops.control_dependencies(distribution.unwrap(finish_updates)):
-          apply_updates = distribution.group(distribution.update(
-              global_step, state_ops.assign_add, 1, name=name))
+        with ops.control_dependencies(finish_updates):
+          apply_updates = distribution.update(
+              global_step, state_ops.assign_add, 1, name=name)
 
       if not context.executing_eagerly():
         if isinstance(apply_updates, ops.Tensor):
-- 
GitLab


From b72265dc002e712fc3d0f33434f13c7a36a484b2 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 1 Oct 2018 16:45:11 -0700
Subject: [PATCH 0984/1357] [tf.data] Deprecate `tf.contrib.data` and introduce
 `tf.data.experimental` to replace it.

This change prepares `tf.data` for TensorFlow 2.0, where `tf.contrib` will no longer exist. It retains the pre-existing endpoints in `tf.contrib.data` with deprecation warnings.

Note there are some exceptions to the move:

* Deprecated symbols in `tf.contrib.data` have not been moved to `tf.data.experimental`, because replacements already exist.
* `tf.contrib.data.LMDBDataset` has not been moved, because we plan to move it to a SIG-maintained repository.
* `tf.contrib.data.assert_element_shape()` has not yet been moved, because it depends on functionality in `tf.contrib`, and it will move in a later change.
* `tf.contrib.data.AUTOTUNE` has not yet been moved, because we have not yet determined how to `tf_export()` a Python integer.
* The stats-related API endpoints have not yet appeared in a released version of TensorFlow, so these are moved to `tf.data.experimental` without retaining an endpoint in `tf.contrib.data`.

In addition, this change includes some build rule and ApiDef refactoring:
* Some of the "//third_party/tensorflow/python:training" dependencies had to be split in order to avoid a circular dependency.
* The `tf.contrib.stateless` ops now have a private core library for the generated wrappers (and accordingly are hidden in their ApiDef) so that `tf.data.experimental.sample_from_datasets()` can depend on them.

PiperOrigin-RevId: 215304249
---
 tensorflow/contrib/bigtable/README.md         |   4 +-
 .../bigtable/python/ops/bigtable_api.py       |   4 +-
 tensorflow/contrib/cmake/python_modules.txt   |   1 -
 tensorflow/contrib/data/README.md             |  18 +-
 tensorflow/contrib/data/__init__.py           |  11 +-
 .../contrib/data/python/kernel_tests/BUILD    | 560 +----------
 .../kernel_tests/assert_element_shape_test.py | 226 +++++
 .../kernel_tests/reduce_dataset_test.py       |  62 ++
 .../kernel_tests/window_dataset_op_test.py    | 527 ----------
 tensorflow/contrib/data/python/ops/BUILD      | 170 +---
 .../contrib/data/python/ops/batching.py       | 549 +----------
 tensorflow/contrib/data/python/ops/counter.py |  13 +-
 .../contrib/data/python/ops/enumerate_ops.py  |  15 +-
 .../contrib/data/python/ops/error_ops.py      |  37 +-
 .../data/python/ops/get_single_element.py     |  29 +-
 .../contrib/data/python/ops/grouping.py       | 441 +--------
 .../contrib/data/python/ops/interleave_ops.py | 149 +--
 .../contrib/data/python/ops/iterator_ops.py   | 167 +---
 .../contrib/data/python/ops/parsing_ops.py    | 107 +--
 .../data/python/ops/prefetching_ops.py        | 486 +---------
 .../contrib/data/python/ops/random_ops.py     |  34 +-
 tensorflow/contrib/data/python/ops/readers.py | 674 +------------
 .../contrib/data/python/ops/resampling.py     | 260 +----
 .../contrib/data/python/ops/scan_ops.py       | 137 +--
 .../contrib/data/python/ops/shuffle_ops.py    |  56 +-
 .../contrib/data/python/ops/threadpool.py     |  88 +-
 tensorflow/contrib/data/python/ops/unique.py  |  43 +-
 tensorflow/contrib/data/python/ops/writers.py |  40 +-
 .../distribute/python/prefetching_ops_v2.py   |   2 +-
 tensorflow/contrib/eager/python/datasets.py   |   4 +-
 .../contrib/eager/python/datasets_test.py     |   6 +-
 .../python/examples/revnet/imagenet_input.py  |  12 +-
 .../estimator/python/estimator/rnn_test.py    |   2 +-
 tensorflow/contrib/lookup/lookup_ops_test.py  |   2 +-
 tensorflow/contrib/stateless/BUILD            |   8 +-
 tensorflow/contrib/stateless/__init__.py      |   5 +-
 tensorflow/contrib/tpu/python/tpu/datasets.py |   4 +-
 tensorflow/contrib/tpu/tpu_estimator.md       |   2 +-
 tensorflow/contrib/training/BUILD             |   2 +-
 .../training/tensor_queue_dataset_test.py     |   2 +-
 .../api_def_StatelessMultinomial.pbtxt        |   4 +
 .../api_def_StatelessRandomNormal.pbtxt       |   4 +
 .../api_def_StatelessRandomUniform.pbtxt      |   4 +
 .../api_def_StatelessTruncatedNormal.pbtxt    |   4 +
 .../examples/get_started/regression/test.py   |   2 +-
 tensorflow/python/BUILD                       |  34 +
 tensorflow/python/data/BUILD                  |   1 +
 tensorflow/python/data/__init__.py            |   1 +
 tensorflow/python/data/experimental/BUILD     |  16 +
 .../python/data/experimental/__init__.py      | 109 +++
 .../data/experimental/kernel_tests/BUILD      | 569 +++++++++++
 .../kernel_tests/batch_dataset_op_test.py     | 317 +-----
 .../kernel_tests/bucketing_test.py            |   2 +-
 .../kernel_tests/csv_dataset_op_test.py       |   4 +-
 .../dataset_constructor_op_test.py            |   2 +-
 .../dataset_serialization_test_base.py        |   2 +-
 .../directed_interleave_dataset_test.py       |   4 +-
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../kernel_tests/get_single_element_test.py   |  30 +-
 .../kernel_tests/indexed_dataset_ops_test.py  |   2 +-
 .../interleave_dataset_op_test.py             |   2 +-
 .../kernel_tests/iterator_ops_test.py         |   2 +-
 .../kernel_tests/map_dataset_op_test.py       |   6 +-
 .../kernel_tests/map_defun_op_test.py         |   2 +-
 .../kernel_tests/optimization/BUILD           |  30 +-
 .../assert_next_dataset_op_test.py            |   2 +-
 .../optimization/hoist_random_uniform_test.py |   2 +-
 .../optimization/latency_all_edges_test.py    |   6 +-
 .../map_and_filter_fusion_test.py             |   2 +-
 .../optimization/map_parallelization_test.py  |   2 +-
 .../optimization/map_vectorization_test.py    |   2 +-
 .../optimization/model_dataset_op_test.py     |   4 +-
 .../optimization/noop_elimination_test.py     |   2 +-
 .../optimization/optimize_dataset_op_test.py  |   2 +-
 .../kernel_tests/parsing_ops_test.py          |   3 +-
 .../kernel_tests/prefetching_ops_test.py      |   2 +-
 .../kernel_tests/range_dataset_op_test.py     |   4 +-
 .../kernel_tests/reader_dataset_ops_test.py   |   4 +-
 .../reader_dataset_ops_test_base.py           |   2 +-
 .../kernel_tests/resample_test.py             |   2 +-
 .../kernel_tests/scan_dataset_op_test.py      |   2 +-
 .../kernel_tests/serialization/BUILD          |  46 +-
 .../batch_dataset_serialization_test.py       |   4 +-
 .../cache_dataset_serialization_test.py       |   2 +-
 .../concatenate_dataset_serialization_test.py |   2 +-
 .../csv_dataset_serialization_test.py         |   4 +-
 .../dataset_constructor_serialization_test.py |   2 +-
 .../dataset_serialization_test_base.py        | 692 ++++++++++++++
 .../filter_dataset_serialization_test.py      |   2 +-
 ...ength_record_dataset_serialization_test.py |   4 +-
 .../flat_map_dataset_serialization_test.py    |   2 +-
 .../group_by_reducer_serialization_test.py    |   4 +-
 .../group_by_window_serialization_test.py     |   4 +-
 .../ignore_errors_serialization_test.py       |   4 +-
 .../interleave_dataset_serialization_test.py  |   2 +-
 ...ap_and_batch_dataset_serialization_test.py |   4 +-
 .../map_dataset_serialization_test.py         |   2 +-
 .../optimize_dataset_serialization_test.py    |   4 +-
 ...padded_batch_dataset_serialization_test.py |   2 +-
 ...l_interleave_dataset_serialization_test.py |   4 +-
 ...parallel_map_dataset_serialization_test.py |   4 +-
 ...arse_example_dataset_serialization_test.py |   4 +-
 .../prefetch_dataset_serialization_test.py    |   2 +-
 .../range_dataset_serialization_test.py       |   2 +-
 ...sample_from_datasets_serialization_test.py |   4 +-
 .../scan_dataset_serialization_test.py        |   4 +-
 .../sequence_dataset_serialization_test.py    |   2 +-
 .../serialization_integration_test.py         |   2 +-
 ...e_and_repeat_dataset_serialization_test.py |   4 +-
 .../shuffle_dataset_serialization_test.py     |   4 +-
 .../sql_dataset_serialization_test.py         |   6 +-
 .../stats_dataset_serialization_test.py       |   4 +-
 .../textline_dataset_serialization_test.py    |   4 +-
 .../tf_record_dataset_serialization_test.py   |   4 +-
 .../unbatch_dataset_serialization_test.py     |   4 +-
 .../unique_dataset_serialization_test.py      |   4 +-
 .../zip_dataset_serialization_test.py         |   2 +-
 .../serialization_integration_test.py         |  85 ++
 .../kernel_tests/shuffle_dataset_op_test.py   |   2 +-
 .../kernel_tests/sql_dataset_op_test.py       |   2 +-
 .../kernel_tests/sql_dataset_op_test_base.py  |   2 +-
 .../kernel_tests/stats_dataset_ops_test.py    |   4 +-
 .../kernel_tests/stats_dataset_test_base.py   |   0
 .../threadpool_dataset_ops_test.py            |   4 +-
 .../kernel_tests/unique_dataset_op_test.py    |   2 +-
 .../kernel_tests/writer_ops_test.py           |   2 +-
 tensorflow/python/data/experimental/ops/BUILD | 377 ++++++++
 .../python/data/experimental/ops/batching.py  | 669 +++++++++++++
 .../python/data/experimental/ops/counter.py   |  55 ++
 .../data/experimental/ops/enumerate_ops.py    |  60 ++
 .../python/data/experimental/ops/error_ops.py |  78 ++
 .../experimental/ops/get_single_element.py    |  72 ++
 .../python/data/experimental/ops/grouping.py  | 551 +++++++++++
 .../experimental}/ops/indexed_dataset_ops.py  |   0
 .../data/experimental/ops/interleave_ops.py   | 262 +++++
 .../data/experimental/ops/iterator_ops.py     | 268 ++++++
 .../data/experimental}/ops/map_defun.py       |   0
 .../data/experimental}/ops/optimization.py    |   0
 .../data/experimental/ops/parsing_ops.py      | 152 +++
 .../data/experimental/ops/prefetching_ops.py  | 531 ++++++++++
 .../data/experimental/ops/random_ops.py       |  54 ++
 .../python/data/experimental/ops/readers.py   | 904 ++++++++++++++++++
 .../data/experimental/ops/resampling.py       | 296 ++++++
 .../python/data/experimental/ops/scan_ops.py  | 177 ++++
 .../data/experimental/ops/shuffle_ops.py      | 102 ++
 .../data/experimental}/ops/stats_ops.py       |  14 +-
 .../data/experimental/ops/threadpool.py       | 104 ++
 .../python/data/experimental/ops/unique.py    |  79 ++
 .../python/data/experimental/ops/writers.py   |  60 ++
 tensorflow/python/data/ops/dataset_ops.py     |   4 +-
 tensorflow/python/data/ops/optional_ops.py    |   4 +-
 tensorflow/python/data/ops/readers.py         |   4 +-
 .../debug/examples/debug_tflearn_iris.py      |  14 +-
 .../tools/api/generator/api_init_files.bzl    |   1 +
 .../tools/api/generator/api_init_files_v1.bzl |   1 +
 ...ntal.-checkpoint-input-pipeline-hook.pbtxt |  30 +
 ...erimental.-csv-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-csv-dataset.pbtxt | 127 +++
 ...nsorflow.data.experimental.-optional.pbtxt |  28 +
 ...mental.-random-dataset.__metaclass__.pbtxt |  14 +
 ...ow.data.experimental.-random-dataset.pbtxt | 127 +++
 ...ensorflow.data.experimental.-reducer.pbtxt |  21 +
 ...erimental.-sql-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-sql-dataset.pbtxt | 127 +++
 ....data.experimental.-stats-aggregator.pbtxt |  13 +
 ...data.experimental.-t-f-record-writer.pbtxt |  13 +
 .../v1/tensorflow.data.experimental.pbtxt     | 139 +++
 .../tools/api/golden/v1/tensorflow.data.pbtxt |   4 +
 ...ntal.-checkpoint-input-pipeline-hook.pbtxt |  30 +
 ...erimental.-csv-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-csv-dataset.pbtxt | 127 +++
 ...nsorflow.data.experimental.-optional.pbtxt |  28 +
 ...mental.-random-dataset.__metaclass__.pbtxt |  14 +
 ...ow.data.experimental.-random-dataset.pbtxt | 127 +++
 ...ensorflow.data.experimental.-reducer.pbtxt |  21 +
 ...erimental.-sql-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-sql-dataset.pbtxt | 127 +++
 ....data.experimental.-stats-aggregator.pbtxt |  13 +
 ...data.experimental.-t-f-record-writer.pbtxt |  13 +
 .../v2/tensorflow.data.experimental.pbtxt     | 139 +++
 .../tools/api/golden/v2/tensorflow.data.pbtxt |   4 +
 tensorflow/tools/pip_package/BUILD            |   4 +-
 182 files changed, 8389 insertions(+), 4960 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py
 delete mode 100644 tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt
 create mode 100644 tensorflow/python/data/experimental/BUILD
 create mode 100644 tensorflow/python/data/experimental/__init__.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/BUILD
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/batch_dataset_op_test.py (67%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/bucketing_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/csv_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/dataset_constructor_op_test.py (97%)
 rename tensorflow/{contrib/data/python/kernel_tests/serialization => python/data/experimental/kernel_tests}/dataset_serialization_test_base.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/directed_interleave_dataset_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/filter_dataset_op_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/get_single_element_test.py (76%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/indexed_dataset_ops_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/interleave_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/iterator_ops_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/map_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/map_defun_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/BUILD (81%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/assert_next_dataset_op_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/hoist_random_uniform_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/latency_all_edges_test.py (91%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/map_and_filter_fusion_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/map_parallelization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/map_vectorization_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/model_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/noop_elimination_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/optimize_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/parsing_ops_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/prefetching_ops_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/range_dataset_op_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/reader_dataset_ops_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/reader_dataset_ops_test_base.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/resample_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/scan_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/BUILD (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/batch_dataset_serialization_test.py (94%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/cache_dataset_serialization_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/concatenate_dataset_serialization_test.py (94%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/csv_dataset_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/dataset_constructor_serialization_test.py (97%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/filter_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/flat_map_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/group_by_reducer_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/group_by_window_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/ignore_errors_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/interleave_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py (94%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/map_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/optimize_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/padded_batch_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/parallel_map_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/parse_example_dataset_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/prefetch_dataset_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/range_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/sample_from_datasets_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/scan_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/sequence_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/serialization_integration_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/shuffle_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/sql_dataset_serialization_test.py (88%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/stats_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/textline_dataset_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/tf_record_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/unbatch_dataset_serialization_test.py (91%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/unique_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/zip_dataset_serialization_test.py (94%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/shuffle_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/sql_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/sql_dataset_op_test_base.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/stats_dataset_ops_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/stats_dataset_test_base.py (100%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/threadpool_dataset_ops_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/unique_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/writer_ops_test.py (98%)
 create mode 100644 tensorflow/python/data/experimental/ops/BUILD
 create mode 100644 tensorflow/python/data/experimental/ops/batching.py
 create mode 100644 tensorflow/python/data/experimental/ops/counter.py
 create mode 100644 tensorflow/python/data/experimental/ops/enumerate_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/error_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/get_single_element.py
 create mode 100644 tensorflow/python/data/experimental/ops/grouping.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/indexed_dataset_ops.py (100%)
 create mode 100644 tensorflow/python/data/experimental/ops/interleave_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/iterator_ops.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/map_defun.py (100%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/optimization.py (100%)
 create mode 100644 tensorflow/python/data/experimental/ops/parsing_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/prefetching_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/random_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/readers.py
 create mode 100644 tensorflow/python/data/experimental/ops/resampling.py
 create mode 100644 tensorflow/python/data/experimental/ops/scan_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/shuffle_ops.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/stats_ops.py (92%)
 create mode 100644 tensorflow/python/data/experimental/ops/threadpool.py
 create mode 100644 tensorflow/python/data/experimental/ops/unique.py
 create mode 100644 tensorflow/python/data/experimental/ops/writers.py
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt

diff --git a/tensorflow/contrib/bigtable/README.md b/tensorflow/contrib/bigtable/README.md
index f33eaf7e3d..2c44abed5e 100644
--- a/tensorflow/contrib/bigtable/README.md
+++ b/tensorflow/contrib/bigtable/README.md
@@ -203,7 +203,7 @@ def interleave_fn(index):
   start = tf.string_join(['training_data_', start_idx_str])
   end = tf.string_join(['training_data_', end_idx_str])
   return table.scan_range(start_idx, end_idx, columns=columns)
-ds = ds.apply(tf.contrib.data.parallel_interleave(
+ds = ds.apply(tf.data.experimental.parallel_interleave(
     interleave_fn, cycle_length=NUM_PARALLEL_READS, prefetch_input_elements=1))
 ```
 
@@ -249,7 +249,7 @@ def make_row_key_dataset():
    - ...
    - fake-data-23498103
   """
-  counter_dataset = tf.contrib.data.Counter()
+  counter_dataset = tf.data.experimental.Counter()
   width = 8
   row_key_prefix = 'fake-data-'
   ds = counter_dataset.map(lambda index: tf.as_string(index,
diff --git a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
index cf56822ff4..7c87b0daeb 100644
--- a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
+++ b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
@@ -31,8 +31,8 @@ from six import iteritems
 from six import string_types
 
 from tensorflow.contrib.bigtable.ops import gen_bigtable_ops
-from tensorflow.contrib.data.python.ops import interleave_ops
 from tensorflow.contrib.util import loader
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
@@ -228,7 +228,7 @@ class BigtableTable(object):
     """Retrieves a sampling of row keys from the Bigtable table.
 
     This dataset is most often used in conjunction with
-    `tf.contrib.data.parallel_interleave` to construct a set of ranges for
+    `tf.data.experimental.parallel_interleave` to construct a set of ranges for
     scanning in parallel.
 
     Returns:
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index 9b80eb559f..6e72670142 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -134,7 +134,6 @@ tensorflow/contrib/cudnn_rnn/python/ops
 tensorflow/contrib/data
 tensorflow/contrib/data/python
 tensorflow/contrib/data/python/kernel_tests
-tensorflow/contrib/data/python/kernel_tests/serialization
 tensorflow/contrib/data/python/ops
 tensorflow/contrib/decision_trees
 tensorflow/contrib/decision_trees/proto
diff --git a/tensorflow/contrib/data/README.md b/tensorflow/contrib/data/README.md
index 848782e8d8..90be7a66ca 100644
--- a/tensorflow/contrib/data/README.md
+++ b/tensorflow/contrib/data/README.md
@@ -1,10 +1,12 @@
 `tf.contrib.data` API
 =====================
 
-NOTE: The `tf.contrib.data` module has been deprecated. Use `tf.data` instead.
-We are continuing to support existing code using the `tf.contrib.data` APIs in
-the current version of TensorFlow, but will eventually remove support. The
-`tf.data` APIs are subject to backwards compatibility guarantees.
+NOTE: The `tf.contrib.data` module has been deprecated. Use `tf.data` instead,
+or `tf.data.experimental` for the experimental transformations previously hosted
+in this module. We are continuing to support existing code using the
+`tf.contrib.data` APIs in the current version of TensorFlow, but will eventually
+remove support. The non-experimental `tf.data` APIs are subject to backwards
+compatibility guarantees.
 
 Porting your code to `tf.data`
 ------------------------------
@@ -25,13 +27,13 @@ instead apply them using `Dataset.apply()` transformation. The full list of
 changes is as follows:
 
 * `dataset.dense_to_sparse_batch(...)` is now
-  `dataset.apply(tf.contrib.data.dense_to_sparse_batch(...)`.
+  `dataset.apply(tf.data.experimental.dense_to_sparse_batch(...)`.
 * `dataset.enumerate(...)` is now
-  `dataset.apply(tf.contrib.data.enumerate_dataset(...))`.
+  `dataset.apply(tf.data.experimental.enumerate_dataset(...))`.
 * `dataset.group_by_window(...)` is now
-  `dataset.apply(tf.contrib.data.group_by_window(...))`.
+  `dataset.apply(tf.data.experimental.group_by_window(...))`.
 * `dataset.ignore_errors()` is now
-  `dataset.apply(tf.contrib.data.ignore_errors())`.
+  `dataset.apply(tf.data.experimental.ignore_errors())`.
 * `dataset.unbatch()` is now `dataset.apply(tf.contrib.data.unbatch())`.
 
 The `Dataset.make_dataset_resource()` and `Iterator.dispose_op()` methods have
diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 3cb51279c3..c3d3e981fa 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -96,10 +96,6 @@ from tensorflow.contrib.data.python.ops.interleave_ops import sample_from_datase
 from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave
 from tensorflow.contrib.data.python.ops.iterator_ops import CheckpointInputPipelineHook
 from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator
-
-# Optimization constant that can be used to enable auto-tuning.
-from tensorflow.contrib.data.python.ops.optimization import AUTOTUNE
-
 from tensorflow.contrib.data.python.ops.parsing_ops import parse_example_dataset
 from tensorflow.contrib.data.python.ops.prefetching_ops import copy_to_device
 from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device
@@ -114,11 +110,12 @@ from tensorflow.contrib.data.python.ops.resampling import rejection_resample
 from tensorflow.contrib.data.python.ops.scan_ops import scan
 from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat
 from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch
-from tensorflow.contrib.data.python.ops.stats_ops import latency_stats
-from tensorflow.contrib.data.python.ops.stats_ops import set_stats_aggregator
-from tensorflow.contrib.data.python.ops.stats_ops import StatsAggregator
 from tensorflow.contrib.data.python.ops.unique import unique
 from tensorflow.contrib.data.python.ops.writers import TFRecordWriter
+
+# Optimization constant that can be used to enable auto-tuning.
+from tensorflow.python.data.experimental.ops.optimization import AUTOTUNE
+
 from tensorflow.python.data.ops.iterator_ops import get_next_as_optional
 from tensorflow.python.data.ops.optional_ops import Optional
 # pylint: enable=unused-import
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 33784afa3f..42f538b4ba 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -8,51 +8,17 @@ load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "py_test")
 
 py_test(
-    name = "batch_dataset_op_test",
-    size = "medium",
-    srcs = ["batch_dataset_op_test.py"],
+    name = "assert_element_shape_test",
+    srcs = ["assert_element_shape_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",  # (b/79552534)
-        "no_pip",
-    ],
     deps = [
         "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
         "//tensorflow/python:script_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "bucketing_test",
-    size = "medium",
-    srcs = ["bucketing_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python/data/kernel_tests:test_base",
@@ -61,147 +27,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "csv_dataset_op_test",
-    size = "medium",
-    srcs = ["csv_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:error_ops",
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:session",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/eager:context",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "dataset_constructor_op_test",
-    size = "medium",
-    srcs = ["dataset_constructor_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "nomac",  # b/62040583
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-    ],
-)
-
-py_test(
-    name = "directed_interleave_dataset_test",
-    size = "medium",
-    srcs = ["directed_interleave_dataset_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:random_seed",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "get_single_element_test",
-    size = "small",
-    srcs = ["get_single_element_test.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:get_single_element",
-        "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "indexed_dataset_ops_test",
-    srcs = ["indexed_dataset_ops_test.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:indexed_dataset_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "interleave_dataset_op_test",
-    size = "medium",
-    srcs = ["interleave_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "notap",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "@six_archive//:six",
-    ],
-)
-
-py_test(
-    name = "iterator_ops_test",
-    size = "small",
-    srcs = ["iterator_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/estimator:estimator_py",
-    ],
-)
-
 py_test(
     name = "lmdb_dataset_op_test",
     size = "medium",
@@ -229,252 +54,18 @@ py_test(
 )
 
 py_test(
-    name = "map_dataset_op_test",
-    size = "medium",
-    srcs = ["map_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "noasan",  # times out
-        "optonly",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:error_ops",
-        "//tensorflow/contrib/data/python/ops:optimization",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "filter_dataset_op_test",
-    size = "medium",
-    srcs = ["filter_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "map_defun_op_test",
+    name = "reduce_dataset_test",
     size = "small",
-    srcs = ["map_defun_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    srcs = ["reduce_dataset_test.py"],
     deps = [
-        "//tensorflow/contrib/data/python/ops:map_defun",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:data_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python/data/kernel_tests:test_base",
-    ],
-)
-
-py_test(
-    name = "parsing_ops_test",
-    size = "small",
-    srcs = ["parsing_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:parsing_ops",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//third_party/py/numpy",
-    ],
-)
-
-cuda_py_test(
-    name = "prefetching_ops_test",
-    size = "small",
-    srcs = ["prefetching_ops_test.py"],
-    additional_deps = [
-        "//tensorflow/contrib/data/python/ops:prefetching_ops",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python/compat:compat",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-    tags = ["no_windows_gpu"],
-)
-
-py_test(
-    name = "range_dataset_op_test",
-    size = "small",
-    srcs = ["range_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:counter",
-        "//tensorflow/contrib/data/python/ops:enumerate_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-py_library(
-    name = "reader_dataset_ops_test_base",
-    testonly = 1,
-    srcs = [
-        "reader_dataset_ops_test_base.py",
-    ],
-    srcs_version = "PY2AND3",
-    visibility = [
-        "//tensorflow/contrib/data/python/kernel_tests:__pkg__",
-        "//tensorflow/contrib/data/python/kernel_tests/serialization:__pkg__",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/core:protos_all_py",
+        "//tensorflow/contrib/data/python/ops:get_single_element",
+        "//tensorflow/contrib/data/python/ops:grouping",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
-
-py_test(
-    name = "reader_dataset_ops_test",
-    size = "medium",
-    srcs = ["reader_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":reader_dataset_ops_test_base",
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/data/util:nest",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "resample_test",
-    size = "medium",
-    srcs = ["resample_test.py"],
-    shard_count = 2,
-    srcs_version = "PY2AND3",
-    tags = [
-        "noasan",
-        "optonly",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:resampling",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:util",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
-        "@six_archive//:six",
-    ],
-)
-
-py_test(
-    name = "scan_dataset_op_test",
-    size = "small",
-    srcs = ["scan_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:scan_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/eager:context",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "shuffle_dataset_op_test",
-    size = "medium",
-    srcs = ["shuffle_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "optonly",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:shuffle_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
     ],
 )
 
@@ -496,142 +87,3 @@ py_test(
         "@absl_py//absl/testing:parameterized",
     ],
 )
-
-py_library(
-    name = "sql_dataset_op_test_base",
-    srcs = ["sql_dataset_op_test_base.py"],
-    srcs_version = "PY2AND3",
-    visibility = [
-        "//tensorflow/contrib/data/python/kernel_tests:__pkg__",
-        "//tensorflow/contrib/data/python/kernel_tests/serialization:__pkg__",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "@org_sqlite//:python",
-    ],
-)
-
-py_test(
-    name = "sql_dataset_op_test",
-    size = "small",
-    srcs = ["sql_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":sql_dataset_op_test_base",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-    ],
-)
-
-py_test(
-    name = "stats_dataset_ops_test",
-    size = "medium",
-    srcs = ["stats_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":reader_dataset_ops_test_base",
-        ":stats_dataset_test_base",
-        "//tensorflow/contrib/data/python/ops:stats_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "stats_dataset_test_base",
-    srcs = ["stats_dataset_test_base.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/data/kernel_tests:test_base",
-    ],
-)
-
-py_test(
-    name = "threadpool_dataset_ops_test",
-    size = "small",
-    srcs = ["threadpool_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:threadpool",
-        "//tensorflow/contrib/data/python/ops:unique",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "unique_dataset_op_test",
-    size = "small",
-    srcs = ["unique_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:unique",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-py_test(
-    name = "window_dataset_op_test",
-    size = "medium",
-    srcs = ["window_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "writer_ops_test",
-    size = "small",
-    srcs = ["writer_ops_test.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:writers",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
diff --git a/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py b/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py
new file mode 100644
index 0000000000..0456463a19
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py
@@ -0,0 +1,226 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import script_ops
+from tensorflow.python.platform import test
+
+
+class AssertElementShapeTest(test_base.DatasetTestBase):
+
+  def test_assert_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
+    expected_shapes = (tensor_shape.TensorShape(2),
+                       tensor_shape.TensorShape((3, 4)))
+    self.assertEqual(expected_shapes, dataset.output_shapes)
+
+    result = dataset.apply(batching.assert_element_shape(expected_shapes))
+    self.assertEqual(expected_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((3, 10)))
+    with self.assertRaises(ValueError):
+      dataset.apply(batching.assert_element_shape(wrong_shapes))
+
+  def test_assert_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    expected_shapes = (tensor_shape.TensorShape(2),
+                       tensor_shape.TensorShape((3, 4)))
+    result = dataset.apply(batching.assert_element_shape(expected_shapes))
+    self.assertEqual(expected_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((3, 10)))
+    iterator = (
+        dataset.apply(batching.assert_element_shape(wrong_shapes))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(get_next)
+
+  def test_assert_partial_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
+    partial_expected_shape = (
+        tensor_shape.TensorShape(None),  # Unknown shape
+        tensor_shape.TensorShape((None, 4)))  # Partial shape
+    result = dataset.apply(
+        batching.assert_element_shape(partial_expected_shape))
+    # Partial shapes are merged with actual shapes:
+    actual_shapes = (tensor_shape.TensorShape(2),
+                     tensor_shape.TensorShape((3, 4)))
+    self.assertEqual(actual_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_partial_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((None, 10)))
+    with self.assertRaises(ValueError):
+      dataset.apply(batching.assert_element_shape(wrong_shapes))
+
+  def test_assert_partial_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    expected_shapes = (tensor_shape.TensorShape(2),
+                       tensor_shape.TensorShape((None, 4)))
+    result = dataset.apply(batching.assert_element_shape(expected_shapes))
+    self.assertEqual(expected_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_partial_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((None, 10)))
+    iterator = (
+        dataset.apply(batching.assert_element_shape(wrong_shapes))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py b/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py
new file mode 100644
index 0000000000..e7281d5318
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py
@@ -0,0 +1,62 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.contrib.data.python.ops import get_single_element
+from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class ReduceDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("SumZero", 0),
+      ("SumOne", 1),
+      ("SumFive", 5),
+      ("SumTen", 10),
+  )
+  def testReduceDataset(self, stop):
+    def init_fn(_):
+      return np.int64(0)
+
+    def reduce_fn(state, value):
+      return state + value
+
+    def finalize_fn(state):
+      return state
+
+    sum_reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
+
+    stop_t = array_ops.placeholder(dtypes.int64, shape=[])
+    dataset = dataset_ops.Dataset.range(stop_t)
+    element = get_single_element.reduce_dataset(dataset, sum_reducer)
+
+    with self.cached_session() as sess:
+      value = sess.run(element, feed_dict={stop_t: stop})
+      self.assertEqual(stop * (stop - 1) / 2, value)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
deleted file mode 100644
index 79134c7bc6..0000000000
--- a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
+++ /dev/null
@@ -1,527 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the experimental input pipeline ops."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import grouping
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.platform import test
-
-
-class WindowDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
-
-  def _structuredDataset(self, structure, shape, dtype):
-    if structure is None:
-      return dataset_ops.Dataset.from_tensors(
-          array_ops.zeros(shape, dtype=dtype))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredDataset(substructure, shape, dtype)
-              for substructure in structure
-          ]))
-
-  def _structuredElement(self, structure, shape, dtype):
-    if structure is None:
-      return array_ops.zeros(shape, dtype=dtype)
-    else:
-      return tuple([
-          self._structuredElement(substructure, shape, dtype)
-          for substructure in structure
-      ])
-
-  def _assertEqual(self, xs, ys):
-    self.assertEqual(type(xs), type(ys))
-    if isinstance(xs, tuple) and isinstance(ys, tuple):
-      self.assertEqual(len(xs), len(ys))
-      for x, y in zip(xs, ys):
-        self._assertEqual(x, y)
-    elif isinstance(xs, np.ndarray) and isinstance(ys, np.ndarray):
-      self.assertAllEqual(xs, ys)
-    else:
-      self.assertEqual(xs, ys)
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([]), dtypes.bool),
-      ("2", None, np.int32([]), dtypes.int32),
-      ("3", None, np.int32([]), dtypes.float32),
-      ("4", None, np.int32([]), dtypes.string),
-      ("5", None, np.int32([2]), dtypes.int32),
-      ("6", None, np.int32([2, 2]), dtypes.int32),
-      ("7", (None, None, None), np.int32([]), dtypes.int32),
-      ("8", (None, (None, None)), np.int32([]), dtypes.int32),
-  )
-  def testWindowDatasetFlatMap(self, structure, shape, dtype):
-    """Tests windowing by chaining it with flat map.
-
-    Args:
-      structure: the input structure
-      shape: the input shape
-      dtype: the input data type
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return args[0]
-      return dataset_ops.Dataset.zip(
-          tuple([fn(*arg) if isinstance(arg, tuple) else arg for arg in args]))
-
-    dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply(
-        grouping.window_dataset(5)).flat_map(fn)
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(self._structuredElement(structure, shape, dtype))
-      for _ in range(5):
-        actual = sess.run(get_next)
-        self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([]), dtypes.bool),
-      ("2", None, np.int32([]), dtypes.int32),
-      ("3", None, np.int32([]), dtypes.float32),
-      ("4", None, np.int32([]), dtypes.string),
-      ("5", None, np.int32([2]), dtypes.int32),
-      ("6", None, np.int32([2, 2]), dtypes.int32),
-      ("7", (None, None, None), np.int32([]), dtypes.int32),
-      ("8", (None, (None, None)), np.int32([]), dtypes.int32),
-  )
-  def testWindowDatasetBatchDense(self, structure, shape, dtype):
-    """Tests batching of dense tensor windows.
-
-    Args:
-      structure: the input structure
-      shape: the input shape
-      dtype: the input data type
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.batch_window(args[0])
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.batch_window(arg)
-          for arg in args
-      ])
-
-    dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply(
-        grouping.window_dataset(5)).apply(grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(
-          self._structuredElement(structure, np.concatenate(
-              ([5], shape), axis=0), dtype))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([])),
-      ("2", np.int32([1])),
-      ("3", np.int32([1, 2, 3])),
-  )
-  def testWindowDatasetBatchDenseDynamicShape(self, shape):
-    """Tests batching of dynamically shaped dense tensor windows.
-
-    Args:
-      shape: the input shape
-    """
-
-    shape_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensors(
-        array_ops.zeros(shape_t)).repeat(5).apply(
-            grouping.window_dataset(5)).apply(
-                grouping._map_x_dataset(batching.batch_window))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shape_t: shape})
-      expected = sess.run(
-          self._structuredElement(None, np.concatenate(([5], shape), axis=0),
-                                  dtypes.int32))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  def _make_dense_to_sparse_fn(self, is_scalar):
-
-    def dense_to_sparse_scalar(tensor):
-      indices = [[]]
-      values = array_ops.expand_dims(tensor, 0)
-      shape = []
-      return sparse_tensor.SparseTensorValue(indices, values, shape)
-
-    def dense_to_sparse_non_scalar(tensor):
-      indices = array_ops.where(array_ops.ones_like(tensor, dtype=dtypes.bool))
-      values = array_ops.gather_nd(tensor, indices)
-      shape = array_ops.shape(tensor, out_type=dtypes.int64)
-      return sparse_tensor.SparseTensorValue(indices, values, shape)
-
-    if is_scalar:
-      return dense_to_sparse_scalar
-    return dense_to_sparse_non_scalar
-
-  def _structuredSparseDataset(self, structure, shape, dtype):
-    dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0)  # pylint: disable=g-explicit-length-test
-    if structure is None:
-      return dataset_ops.Dataset.from_tensors(
-          dense_to_sparse(array_ops.zeros(shape, dtype=dtype)))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredSparseDataset(substructure, shape, dtype)
-              for substructure in structure
-          ]))
-
-  def _structuredSparseElement(self, structure, shape, dtype):
-    dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0)  # pylint: disable=g-explicit-length-test
-    if structure is None:
-      return dense_to_sparse(array_ops.zeros(shape, dtype=dtype))
-    else:
-      return tuple([
-          self._structuredSparseElement(substructure, shape, dtype)
-          for substructure in structure
-      ])
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([]), dtypes.bool),
-      ("2", None, np.int32([]), dtypes.int32),
-      ("3", None, np.int32([]), dtypes.float32),
-      ("4", None, np.int32([]), dtypes.string),
-      ("5", None, np.int32([2]), dtypes.int32),
-      ("6", None, np.int32([2, 2]), dtypes.int32),
-      ("7", (None, None, None), np.int32([]), dtypes.int32),
-      ("8", (None, (None, None)), np.int32([]), dtypes.int32),
-  )
-  def testWindowDatasetBatchSparse(self, structure, shape, dtype):
-    """Tests batching of sparse tensor windows.
-
-    Args:
-      structure: the input structure
-      shape: the input shape
-      dtype: the input data type
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.batch_window(args[0])
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.batch_window(arg)
-          for arg in args
-      ])
-
-    dataset = self._structuredSparseDataset(
-        structure, shape, dtype).repeat(5).apply(
-            grouping.window_dataset(5)).apply(grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(
-          self._structuredSparseElement(structure,
-                                        np.concatenate(([5], shape), axis=0),
-                                        dtype))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([])),
-      ("2", np.int32([1])),
-      ("3", np.int32([1, 2, 3])),
-  )
-  def testWindowDatasetBatchSparseDynamicShape(self, shape):
-    """Tests batching of dynamically shaped sparse tensor windows.
-
-    Args:
-      shape: the input shape
-    """
-
-    shape_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensors(array_ops.zeros(shape_t)).map(
-        self._make_dense_to_sparse_fn(len(shape) == 0)).repeat(5).apply(  # pylint: disable=g-explicit-length-test
-            grouping.window_dataset(5)).apply(
-                grouping._map_x_dataset(batching.batch_window))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shape_t: shape})
-      expected = sess.run(
-          self._structuredSparseElement(None,
-                                        np.concatenate(([5], shape), axis=0),
-                                        dtypes.int32))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  def _structuredRaggedDataset(self, structure, shapes, dtype):
-
-    if structure is None:
-      return dataset_ops.Dataset.from_tensor_slices(shapes).map(
-          lambda shape: array_ops.zeros(shape, dtype=dtype))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredRaggedDataset(substructure, shapes, dtype)
-              for substructure in structure
-          ]))
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([[1], [2], [3]]), dtypes.bool, [-1]),
-      ("2", None, np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("3", None, np.int32([[1], [2], [3]]), dtypes.float32, [-1]),
-      ("4", None, np.int32([[1], [2], [3]]), dtypes.string, [-1]),
-      ("5", None, np.int32([[1, 3], [2, 2], [3, 1]]), dtypes.int32, [-1, -1]),
-      ("6", None, np.int32([[3, 1, 3], [1, 3, 1]]), dtypes.int32, [-1, -1, -1]),
-      ("7", (None, None, None), np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("8", (None,
-             (None, None)), np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("9", None, np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("10", None, np.int32([[1], [2], [3]]), dtypes.int32, np.int32([10])),
-  )
-  def testWindowDatasetPaddedBatchDense(self, structure, shapes, dtype,
-                                        padded_shape):
-    """Tests padded batching of dense tensor windows.
-
-    Args:
-      structure: the input structure
-      shapes: the input shapes
-      dtype: the input data type
-      padded_shape: the shape to pad the output to
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.padded_batch_window(args[0], padded_shape)
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.padded_batch_window(
-              arg, padded_shape) for arg in args
-      ])
-
-    dataset = self._structuredRaggedDataset(structure, shapes, dtype).apply(
-        grouping.window_dataset(len(shapes))).apply(
-            grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected_shape = np.maximum(np.amax(shapes, axis=0), padded_shape)
-      expected = sess.run(
-          self._structuredElement(
-              structure,
-              np.concatenate((np.int32([len(shapes)]), expected_shape)), dtype))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([[1], [2], [3]]), [-1]),
-      ("2", np.int32([[1, 3], [2, 2], [3, 1]]), [-1, -1]),
-      ("3", np.int32([[3, 1, 3], [1, 3, 1]]), [-1, -1, -1]),
-  )
-  def testWindowDatasetPaddedBatchDenseDynamicShape(self, shapes, padded_shape):
-    """Tests padded batching of dynamically shaped dense tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    shapes_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes_t).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).apply(
-            grouping.window_dataset(len(shapes))).apply(
-                grouping._map_x_dataset(
-                    lambda x: batching.padded_batch_window(x, padded_shape)))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shapes_t: shapes})
-      expected_shape = np.maximum(np.amax(shapes, axis=0), padded_shape)
-      expected = sess.run(
-          self._structuredElement(
-              None, np.concatenate((np.int32([len(shapes)]), expected_shape)),
-              dtypes.int32))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([[1]]), np.int32([0])),
-      ("2", np.int32([[10], [20]]), np.int32([15])),
-  )
-  def testWindowDatasetPaddedBatchDenseInvalid(self, shapes, padded_shape):
-    """Tests invalid padded batching of dense tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).apply(
-            grouping.window_dataset(len(shapes))).apply(
-                grouping._map_x_dataset(
-                    lambda x: batching.padded_batch_window(x, padded_shape)))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-  def _structuredRaggedSparseDataset(self, structure, shapes, dtype):
-
-    def map_fn(shape):
-      dense_to_sparse = self._make_dense_to_sparse_fn(False)
-      return dense_to_sparse(array_ops.zeros(shape, dtype=dtype))
-
-    if structure is None:
-      return dataset_ops.Dataset.from_tensor_slices(shapes).map(map_fn)
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredRaggedSparseDataset(substructure, shapes, dtype)
-              for substructure in structure
-          ]))
-
-  def _structuredRaggedSparseElement(self, structure, shapes, dtype,
-                                     padded_shape):
-    if structure is None:
-      dense_shape = np.maximum(np.amax(shapes, axis=0), padded_shape)
-      values = []
-      for shape in shapes:
-        dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0)  # pylint: disable=g-explicit-length-test
-        sparse = dense_to_sparse(array_ops.zeros(shape, dtype=dtype))
-        padded_sparse = sparse_tensor.SparseTensor(sparse.indices,
-                                                   sparse.values, dense_shape)
-        reshaped_sparse = sparse_ops.sparse_reshape(
-            padded_sparse,
-            array_ops.concat([np.array([1], dtype=np.int64), dense_shape], 0))
-        values.append(reshaped_sparse)
-      return sparse_ops.sparse_concat(0, values)
-    else:
-      return tuple([
-          self._structuredRaggedSparseElement(substructure, shapes, dtype,
-                                              padded_shape)
-          for substructure in structure
-      ])
-
-  @parameterized.named_parameters(
-      ("1", None, np.int64([[1], [2], [3]]), dtypes.bool, [-1]),
-      ("2", None, np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("3", None, np.int64([[1], [2], [3]]), dtypes.float32, [-1]),
-      ("4", None, np.int64([[1], [2], [3]]), dtypes.string, [-1]),
-      ("5", None, np.int64([[1, 3], [2, 2], [3, 1]]), dtypes.int32, [-1, -1]),
-      ("6", None, np.int64([[1, 3, 1], [3, 1, 3]]), dtypes.int32, [-1, -1, -1]),
-      ("7", (None, None, None), np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("8", (None,
-             (None, None)), np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("9", None, np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("10", None, np.int64([[1], [2], [3]]), dtypes.int32, np.int64([10])),
-  )
-  def testWindowDatasetPaddedBatchSparse(self, structure, shapes, dtype,
-                                         padded_shape):
-    """Tests padded batching of sparse tensor windows.
-
-    Args:
-      structure: the input structure
-      shapes: the input shapes
-      dtype: the input data type
-      padded_shape: the shape to pad the output to
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.padded_batch_window(args[0], padded_shape)
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.padded_batch_window(
-              arg, padded_shape) for arg in args
-      ])
-
-    dataset = self._structuredRaggedSparseDataset(
-        structure, shapes, dtype).apply(grouping.window_dataset(
-            len(shapes))).apply(grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(
-          self._structuredRaggedSparseElement(structure, shapes, dtype,
-                                              padded_shape))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int64([[1], [2], [3]]), [-1]),
-      ("2", np.int64([[1, 3], [2, 2], [3, 1]]), [-1, -1]),
-      ("3", np.int64([[3, 1, 3], [1, 3, 1]]), [-1, -1, -1]),
-  )
-  def testWindowDatasetPaddedBatchSparseDynamicShape(self, shapes,
-                                                     padded_shape):
-    """Tests padded batching of dynamically shaped sparse tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    shapes_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes_t).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).map(
-            self._make_dense_to_sparse_fn(False)
-        ).apply(grouping.window_dataset(len(shapes))).apply(
-            grouping._map_x_dataset(
-                lambda x: batching.padded_batch_window(x, padded_shape)))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shapes_t: shapes})
-      expected = sess.run(
-          self._structuredRaggedSparseElement(None, shapes, dtypes.int32,
-                                              padded_shape))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int64([[1]]), [0]),
-      ("2", np.int64([[10], [20]]), [15]),
-  )
-  def testWindowDatasetPaddedBatchSparseInvalid(self, shapes, padded_shape):
-    """Tests invalid padded batching of sparse tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).map(
-            self._make_dense_to_sparse_fn(False)
-        ).apply(grouping.window_dataset(len(shapes))).apply(
-            grouping._map_x_dataset(
-                lambda x: batching.padded_batch_window(x, padded_shape)))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 5cd1ed542b..34dc2379d0 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -16,10 +16,7 @@ py_library(
     srcs = ["counter.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":scan_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:counter",
     ],
 )
 
@@ -28,12 +25,7 @@ py_library(
     srcs = ["get_single_element.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":grouping",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-        "//third_party/py/numpy",
+        "//tensorflow/python/data/experimental/ops:get_single_element",
     ],
 )
 
@@ -44,10 +36,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
     ],
 )
 
@@ -58,15 +47,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:random_seed",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:random_ops",
     ],
 )
 
@@ -79,7 +60,6 @@ py_library(
     deps = [
         ":batching",
         ":interleave_ops",
-        ":optimization",
         ":parsing_ops",
         ":shuffle_ops",
         "//tensorflow/python:constant_op",
@@ -91,6 +71,7 @@ py_library(
         "//tensorflow/python:platform",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:readers",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:readers",
         "//tensorflow/python/data/util:convert",
@@ -106,7 +87,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:shuffle_ops",
     ],
 )
 
@@ -125,6 +106,7 @@ py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:convert",
         "//tensorflow/python/data/util:nest",
@@ -138,8 +120,7 @@ py_library(
     srcs = ["enumerate_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:enumerate_ops",
     ],
 )
 
@@ -148,10 +129,7 @@ py_library(
     srcs = ["error_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:error_ops",
     ],
 )
 
@@ -160,16 +138,7 @@ py_library(
     srcs = ["grouping.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:function",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:grouping",
     ],
 )
 
@@ -178,30 +147,7 @@ py_library(
     srcs = ["interleave_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":random_ops",
-        "//tensorflow/contrib/stateless",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
-py_library(
-    name = "optimization",
-    srcs = ["optimization.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
     ],
 )
 
@@ -210,25 +156,7 @@ py_library(
     srcs = ["parsing_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-    ],
-)
-
-py_library(
-    name = "map_defun",
-    srcs = ["map_defun.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:parsing_ops",
     ],
 )
 
@@ -237,18 +165,7 @@ py_library(
     srcs = ["resampling.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":batching",
-        ":interleave_ops",
-        ":scan_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:logging_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
+        "//tensorflow/python/data/experimental/ops:resampling",
     ],
 )
 
@@ -257,12 +174,7 @@ py_library(
     srcs = ["scan_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:function",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:scan_ops",
     ],
 )
 
@@ -281,32 +193,12 @@ py_library(
     ],
 )
 
-py_library(
-    name = "stats_ops",
-    srcs = ["stats_ops.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
 py_library(
     name = "threadpool",
     srcs = ["threadpool.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-        "//tensorflow/python/eager:context",
+        "//tensorflow/python/data/experimental/ops:threadpool",
     ],
 )
 
@@ -317,11 +209,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:unique",
     ],
 )
 
@@ -332,20 +220,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-py_library(
-    name = "indexed_dataset_ops",
-    srcs = ["indexed_dataset_ops.py"],
-    deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:writers",
     ],
 )
 
@@ -353,11 +228,7 @@ py_library(
     name = "prefetching_ops",
     srcs = ["prefetching_ops.py"],
     deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
     ],
 )
 
@@ -370,17 +241,14 @@ py_library(
         ":error_ops",
         ":get_single_element",
         ":grouping",
-        ":indexed_dataset_ops",
         ":interleave_ops",
-        ":map_defun",
-        ":optimization",
         ":prefetching_ops",
+        ":random_ops",
         ":readers",
         ":resampling",
         ":scan_ops",
         ":shuffle_ops",
         ":sliding",
-        ":stats_ops",
         ":threadpool",
         ":unique",
         ":writers",
diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py
index 7a0f221284..8c60459ca8 100644
--- a/tensorflow/contrib/data/python/ops/batching.py
+++ b/tensorflow/contrib/data/python/ops/batching.py
@@ -17,134 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import get_single_element
-from tensorflow.contrib.data.python.ops import grouping
 from tensorflow.contrib.framework import with_shape
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import convert
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import sparse_ops
 from tensorflow.python.util import deprecation
 
 
-def batch_window(dataset):
-  """Batches a window of tensors.
-
-  Args:
-    dataset: the input dataset.
-
-  Returns:
-    A `Tensor` representing the batch of the entire input dataset.
-  """
-  if isinstance(dataset.output_classes, tuple):
-    raise TypeError("Input dataset expected to have a single component")
-  if dataset.output_classes is ops.Tensor:
-    return _batch_dense_window(dataset)
-  elif dataset.output_classes is sparse_tensor.SparseTensor:
-    return _batch_sparse_window(dataset)
-  else:
-    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
-
-
-def _batch_dense_window(dataset):
-  """Batches a window of dense tensors."""
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def shape_init_fn(_):
-    return array_ops.shape(first_element)
-
-  def shape_reduce_fn(state, value):
-    check_ops.assert_equal(state, array_ops.shape(value))
-    return state
-
-  def finalize_fn(state):
-    return state
-
-  if dataset.output_shapes.is_fully_defined():
-    shape = dataset.output_shapes
-  else:
-    first_element = get_single_element.get_single_element(dataset.take(1))
-    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
-                                     finalize_fn)
-    shape = get_single_element.get_single_element(
-        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
-
-  def batch_init_fn(_):
-    batch_shape = array_ops.concat([[0], shape], 0)
-    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
-
-  def batch_reduce_fn(state, value):
-    return array_ops.concat([state, [value]], 0)
-
-  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
-
-
-def _batch_sparse_window(dataset):
-  """Batches a window of sparse tensors."""
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def shape_init_fn(_):
-    return first_element.dense_shape
-
-  def shape_reduce_fn(state, value):
-    check_ops.assert_equal(state, value.dense_shape)
-    return state
-
-  def finalize_fn(state):
-    return state
-
-  if dataset.output_shapes.is_fully_defined():
-    shape = dataset.output_shapes
-  else:
-    first_element = get_single_element.get_single_element(dataset.take(1))
-    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
-                                     finalize_fn)
-    shape = get_single_element.get_single_element(
-        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
-
-  def batch_init_fn(_):
-    indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0)
-    return sparse_tensor.SparseTensor(
-        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
-        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
-        dense_shape=array_ops.concat(
-            [np.array([0], dtype=np.int64),
-             math_ops.cast(shape, dtypes.int64)], 0))
-
-  def batch_reduce_fn(state, value):
-    return sparse_ops.sparse_concat(0, [state, value])
-
-  def reshape_fn(value):
-    return sparse_ops.sparse_reshape(
-        value,
-        array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0))
-
-  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.map(reshape_fn).apply(
-          grouping.group_by_reducer(key_fn, batch_reducer)))
-
-
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.dense_to_sparse_batch(...)`.")
 def dense_to_sparse_batch(batch_size, row_shape):
   """A transformation that batches ragged elements into `tf.SparseTensor`s.
 
@@ -187,201 +67,10 @@ def dense_to_sparse_batch(batch_size, row_shape):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _DenseToSparseBatchDataset(dataset, batch_size, row_shape)
-
-  return _apply_fn
-
-
-def padded_batch_window(dataset, padded_shape, padding_value=None):
-  """Batches a window of tensors with padding.
-
-  Args:
-    dataset: the input dataset.
-    padded_shape: (Optional.) `tf.TensorShape` or `tf.int64` vector tensor-like
-      object representing the shape to which the input elements should be padded
-      prior to batching. Any unknown dimensions (e.g. `tf.Dimension(None)` in a
-      `tf.TensorShape` or `-1` in a tensor-like object) will be padded to the
-      maximum size of that dimension in each batch.
-    padding_value: (Optional.) A scalar-shaped `tf.Tensor`, representing the
-      padding value to use. Defaults are `0` for numeric types and the empty
-      string for string types. If `dataset` contains `tf.SparseTensor`, this
-      value is ignored.
-
-  Returns:
-    A `Tensor` representing the batch of the entire input dataset.
-
-  Raises:
-    ValueError: if invalid arguments are provided.
-  """
-  if not issubclass(dataset.output_classes,
-                    (ops.Tensor, sparse_tensor.SparseTensor)):
-    raise TypeError("Input dataset expected to have a single tensor component")
-  if issubclass(dataset.output_classes, (ops.Tensor)):
-    return _padded_batch_dense_window(dataset, padded_shape, padding_value)
-  elif issubclass(dataset.output_classes, (sparse_tensor.SparseTensor)):
-    if padding_value is not None:
-      raise ValueError("Padding value not allowed for sparse tensors")
-    return _padded_batch_sparse_window(dataset, padded_shape)
-  else:
-    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
-
-
-def _padded_batch_dense_window(dataset, padded_shape, padding_value=None):
-  """Batches a window of dense tensors with padding."""
-
-  padded_shape = math_ops.cast(
-      convert.partial_shape_to_tensor(padded_shape), dtypes.int32)
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def max_init_fn(_):
-    return padded_shape
-
-  def max_reduce_fn(state, value):
-    """Computes the maximum shape to pad to."""
-    condition = math_ops.reduce_all(
-        math_ops.logical_or(
-            math_ops.less_equal(array_ops.shape(value), padded_shape),
-            math_ops.equal(padded_shape, -1)))
-    assert_op = control_flow_ops.Assert(condition, [
-        "Actual shape greater than padded shape: ",
-        array_ops.shape(value), padded_shape
-    ])
-    with ops.control_dependencies([assert_op]):
-      return math_ops.maximum(state, array_ops.shape(value))
-
-  def finalize_fn(state):
-    return state
-
-  # Compute the padded shape.
-  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
-  padded_shape = get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
-
-  if padding_value is None:
-    if dataset.output_types == dtypes.string:
-      padding_value = ""
-    elif dataset.output_types == dtypes.bool:
-      padding_value = False
-    elif dataset.output_types == dtypes.variant:
-      raise TypeError("Unable to create padding for field of type 'variant'")
-    else:
-      padding_value = 0
-
-  def batch_init_fn(_):
-    batch_shape = array_ops.concat(
-        [np.array([0], dtype=np.int32), padded_shape], 0)
-    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
-
-  def batch_reduce_fn(state, value):
-    return array_ops.concat([state, [value]], 0)
-
-  def pad_fn(value):
-    shape = array_ops.shape(value)
-    left = array_ops.zeros_like(shape)
-    right = padded_shape - shape
-    return array_ops.pad(
-        value, array_ops.stack([left, right], 1), constant_values=padding_value)
-
-  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.map(pad_fn).apply(
-          grouping.group_by_reducer(key_fn, batch_reducer)))
-
-
-def _padded_batch_sparse_window(dataset, padded_shape):
-  """Batches a window of sparse tensors with padding."""
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def max_init_fn(_):
-    return convert.partial_shape_to_tensor(padded_shape)
-
-  def max_reduce_fn(state, value):
-    """Computes the maximum shape to pad to."""
-    condition = math_ops.reduce_all(
-        math_ops.logical_or(
-            math_ops.less_equal(value.dense_shape, padded_shape),
-            math_ops.equal(padded_shape, -1)))
-    assert_op = control_flow_ops.Assert(condition, [
-        "Actual shape greater than padded shape: ", value.dense_shape,
-        padded_shape
-    ])
-    with ops.control_dependencies([assert_op]):
-      return math_ops.maximum(state, value.dense_shape)
-
-  def finalize_fn(state):
-    return state
-
-  # Compute the padded shape.
-  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
-  padded_shape = get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
-
-  def batch_init_fn(_):
-    indices_shape = array_ops.concat([[0], [array_ops.size(padded_shape) + 1]],
-                                     0)
-    return sparse_tensor.SparseTensor(
-        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
-        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
-        dense_shape=array_ops.concat(
-            [np.array([0], dtype=np.int64), padded_shape], 0))
-
-  def batch_reduce_fn(state, value):
-    padded_value = sparse_tensor.SparseTensor(
-        indices=value.indices, values=value.values, dense_shape=padded_shape)
-    reshaped_value = sparse_ops.sparse_reshape(
-        padded_value,
-        array_ops.concat(
-            [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
-    return sparse_ops.sparse_concat(0, [state, reshaped_value])
-
-  reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
-
-
-class _UnbatchDataset(dataset_ops.UnaryDataset):
-  """A dataset that splits the elements of its input into multiple elements."""
-
-  def __init__(self, input_dataset):
-    """See `unbatch()` for more details."""
-    super(_UnbatchDataset, self).__init__(input_dataset)
-    flat_shapes = nest.flatten(input_dataset.output_shapes)
-    if any(s.ndims == 0 for s in flat_shapes):
-      raise ValueError("Cannot unbatch an input with scalar components.")
-    known_batch_dim = tensor_shape.Dimension(None)
-    for s in flat_shapes:
-      try:
-        known_batch_dim = known_batch_dim.merge_with(s[0])
-      except ValueError:
-        raise ValueError("Cannot unbatch an input whose components have "
-                         "different batch sizes.")
-    self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.unbatch_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return nest.map_structure(lambda s: s[1:],
-                              self._input_dataset.output_shapes)
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+  return batching.dense_to_sparse_batch(batch_size, row_shape)
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.unbatch()`.")
 def unbatch():
   """Splits elements of a dataset into multiple elements on the batch dimension.
 
@@ -403,39 +92,7 @@ def unbatch():
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    if not sparse.any_sparse(dataset.output_classes):
-      return _UnbatchDataset(dataset)
-
-    # NOTE(mrry): We must ensure that any SparseTensors in `dataset`
-    # are normalized to the rank-1 dense representation, so that the
-    # sparse-oblivious unbatching logic will slice them
-    # appropriately. This leads to a somewhat inefficient re-encoding step
-    # for all SparseTensor components.
-    # TODO(mrry): Consider optimizing this in future
-    # if it turns out to be a bottleneck.
-    def normalize(arg, *rest):
-      if rest:
-        return sparse.serialize_many_sparse_tensors((arg,) + rest)
-      else:
-        return sparse.serialize_many_sparse_tensors(arg)
-
-    normalized_dataset = dataset.map(normalize)
-
-    # NOTE(mrry): Our `map()` has lost information about the sparseness
-    # of any SparseTensor components, so re-apply the structure of the
-    # original dataset.
-    restructured_dataset = _RestructuredDataset(
-        normalized_dataset,
-        dataset.output_types,
-        dataset.output_shapes,
-        dataset.output_classes,
-        allow_unsafe_cast=True)
-    return _UnbatchDataset(restructured_dataset)
-
-  return _apply_fn
+  return batching.unbatch()
 
 
 @deprecation.deprecated(
@@ -514,135 +171,8 @@ def padded_batch_and_drop_remainder(batch_size,
   return _apply_fn
 
 
-class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s."""
-
-  def __init__(self, input_dataset, batch_size, row_shape):
-    """See `Dataset.dense_to_sparse_batch()` for more details."""
-    super(_DenseToSparseBatchDataset, self).__init__(input_dataset)
-    if not isinstance(input_dataset.output_types, dtypes.DType):
-      raise TypeError("DenseToSparseDataset requires an input whose elements "
-                      "have a single component, whereas the input has %r." %
-                      input_dataset.output_types)
-    self._input_dataset = input_dataset
-    self._batch_size = batch_size
-    self._row_shape = row_shape
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.dense_to_sparse_batch_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._batch_size,
-        row_shape=convert.partial_shape_to_tensor(self._row_shape),
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return sparse_tensor.SparseTensor
-
-  @property
-  def output_shapes(self):
-    return tensor_shape.vector(None).concatenate(self._row_shape)
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-
-class _RestructuredDataset(dataset_ops.UnaryDataset):
-  """An internal helper for changing the structure and shape of a dataset."""
-
-  def __init__(self,
-               dataset,
-               output_types,
-               output_shapes=None,
-               output_classes=None,
-               allow_unsafe_cast=False):
-    """Creates a new dataset with the given output types and shapes.
-
-    The given `dataset` must have a structure that is convertible:
-    * `dataset.output_types` must be the same as `output_types` module nesting.
-    * Each shape in `dataset.output_shapes` must be compatible with each shape
-      in `output_shapes` (if given).
-
-    Note: This helper permits "unsafe casts" for shapes, equivalent to using
-    `tf.Tensor.set_shape()` where domain-specific knowledge is available.
-
-    Args:
-      dataset: A `Dataset` object.
-      output_types: A nested structure of `tf.DType` objects.
-      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
-        If omitted, the shapes will be inherited from `dataset`.
-      output_classes: (Optional.) A nested structure of class types.
-        If omitted, the class types will be inherited from `dataset`.
-      allow_unsafe_cast: (Optional.) If `True`, the caller may switch the
-        reported output types and shapes of the restructured dataset, e.g. to
-        switch a sparse tensor represented as `tf.variant` to its user-visible
-        type and shape.
-
-    Raises:
-      ValueError: If either `output_types` or `output_shapes` is not compatible
-        with the structure of `dataset`.
-    """
-    super(_RestructuredDataset, self).__init__(dataset)
-    self._input_dataset = dataset
-
-    if not allow_unsafe_cast:
-      # Validate that the types are compatible.
-      output_types = nest.map_structure(dtypes.as_dtype, output_types)
-      flat_original_types = nest.flatten(dataset.output_types)
-      flat_new_types = nest.flatten(output_types)
-      if flat_original_types != flat_new_types:
-        raise ValueError(
-            "Dataset with output types %r cannot be restructured to have "
-            "output types %r" % (dataset.output_types, output_types))
-
-    self._output_types = output_types
-
-    if output_shapes is None:
-      # Inherit shapes from the original `dataset`.
-      self._output_shapes = nest.pack_sequence_as(output_types,
-                                                  nest.flatten(
-                                                      dataset.output_shapes))
-    else:
-      if not allow_unsafe_cast:
-        # Validate that the shapes are compatible.
-        nest.assert_same_structure(output_types, output_shapes)
-        flat_original_shapes = nest.flatten(dataset.output_shapes)
-        flat_new_shapes = nest.flatten_up_to(output_types, output_shapes)
-
-        for original_shape, new_shape in zip(flat_original_shapes,
-                                             flat_new_shapes):
-          if not original_shape.is_compatible_with(new_shape):
-            raise ValueError(
-                "Dataset with output shapes %r cannot be restructured to have "
-                "incompatible output shapes %r" % (dataset.output_shapes,
-                                                   output_shapes))
-      self._output_shapes = nest.map_structure_up_to(
-          output_types, tensor_shape.as_shape, output_shapes)
-    if output_classes is None:
-      # Inherit class types from the original `dataset`.
-      self._output_classes = nest.pack_sequence_as(output_types,
-                                                   nest.flatten(
-                                                       dataset.output_classes))
-    else:
-      self._output_classes = output_classes
-
-  def _as_variant_tensor(self):
-    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-
+# TODO(b/116817045): Move this to `tf.data.experimental` when the `with_shape()`
+# function is available in the core.
 def assert_element_shape(expected_shapes):
   """Assert the shape of this `Dataset`.
 
@@ -687,7 +217,8 @@ def assert_element_shape(expected_shapes):
   def _apply_fn(dataset):
     output_shapes = _merge_output_shapes(dataset.output_shapes,
                                          expected_shapes)
-    return _RestructuredDataset(
+    # pylint: disable=protected-access
+    return batching._RestructuredDataset(
         dataset.map(_check_shape),
         dataset.output_types,
         output_shapes=output_shapes,
@@ -696,49 +227,7 @@ def assert_element_shape(expected_shapes):
   return _apply_fn
 
 
-class _MapAndBatchDataset(dataset_ops.MapDataset):
-  """A `Dataset` that maps a function over a batch of elements."""
-
-  def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls,
-               drop_remainder):
-    """See `Dataset.map()` for details."""
-    super(_MapAndBatchDataset, self).__init__(input_dataset, map_func)
-    self._batch_size_t = ops.convert_to_tensor(
-        batch_size, dtype=dtypes.int64, name="batch_size")
-    self._num_parallel_calls_t = ops.convert_to_tensor(
-        num_parallel_calls, dtype=dtypes.int64, name="num_parallel_calls")
-    self._drop_remainder_t = ops.convert_to_tensor(
-        drop_remainder, dtype=dtypes.bool, name="drop_remainder")
-
-    self._batch_size = batch_size
-    self._drop_remainder = drop_remainder
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    input_resource = self._input_dataset._as_variant_tensor()
-    return gen_dataset_ops.map_and_batch_dataset_v2(
-        input_resource,
-        self._map_func.captured_inputs,
-        f=self._map_func,
-        batch_size=self._batch_size_t,
-        num_parallel_calls=self._num_parallel_calls_t,
-        drop_remainder=self._drop_remainder_t,
-        **dataset_ops.flat_structure(self))
-    # pylint: enable=protected-access
-
-  @property
-  def output_shapes(self):
-    dim = self._batch_size if self._drop_remainder else None
-    return nest.pack_sequence_as(self._output_shapes, [
-        tensor_shape.vector(dim).concatenate(s)
-        for s in nest.flatten(self._output_shapes)
-    ])
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-
+@deprecation.deprecated(None, "Use `tf.data.experimental.map_and_batch(...)`.")
 def map_and_batch(map_func,
                   batch_size,
                   num_parallel_batches=None,
@@ -779,17 +268,5 @@ def map_and_batch(map_func,
     ValueError: If both `num_parallel_batches` and `num_parallel_calls` are
       specified.
   """
-
-  if num_parallel_batches is None and num_parallel_calls is None:
-    num_parallel_calls = batch_size
-  elif num_parallel_batches is not None and num_parallel_calls is None:
-    num_parallel_calls = batch_size * num_parallel_batches
-  elif num_parallel_batches is not None and num_parallel_calls is not None:
-    raise ValueError("The `num_parallel_batches` and `num_parallel_calls` "
-                     "arguments are mutually exclusive.")
-
-  def _apply_fn(dataset):
-    return _MapAndBatchDataset(dataset, map_func, batch_size,
-                               num_parallel_calls, drop_remainder)
-
-  return _apply_fn
+  return batching.map_and_batch(map_func, batch_size, num_parallel_batches,
+                                drop_remainder, num_parallel_calls)
diff --git a/tensorflow/contrib/data/python/ops/counter.py b/tensorflow/contrib/data/python/ops/counter.py
index 6ef65f9624..4ff5bf3e39 100644
--- a/tensorflow/contrib/data/python/ops/counter.py
+++ b/tensorflow/contrib/data/python/ops/counter.py
@@ -17,13 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import scan_ops
-
-from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.Counter(...)`.")
 def Counter(start=0, step=1, dtype=dtypes.int64):
   """Creates a `Dataset` that counts from `start` in steps of size `step`.
 
@@ -46,8 +45,4 @@ def Counter(start=0, step=1, dtype=dtypes.int64):
   Returns:
     A `Dataset` of scalar `dtype` elements.
   """
-  with ops.name_scope("counter"):
-    start = ops.convert_to_tensor(start, dtype=dtype, name="start")
-    step = ops.convert_to_tensor(step, dtype=dtype, name="step")
-    return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
-        scan_ops.scan(start, lambda state, _: (state + step, state)))
+  return counter.Counter(start, step, dtype)
diff --git a/tensorflow/contrib/data/python/ops/enumerate_ops.py b/tensorflow/contrib/data/python/ops/enumerate_ops.py
index 490281e0d2..a21da4d3ec 100644
--- a/tensorflow/contrib/data/python/ops/enumerate_ops.py
+++ b/tensorflow/contrib/data/python/ops/enumerate_ops.py
@@ -17,12 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
+from tensorflow.python.data.experimental.ops import enumerate_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.enumerate_dataset(...)`.")
 def enumerate_dataset(start=0):
   """A transformation that enumerate the elements of a dataset.
 
@@ -49,10 +50,4 @@ def enumerate_dataset(start=0):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    max_value = np.iinfo(dtypes.int64.as_numpy_dtype).max
-    return dataset_ops.Dataset.zip((dataset_ops.Dataset.range(start, max_value),
-                                    dataset))
-
-  return _apply_fn
+  return enumerate_ops.enumerate_dataset(start)
diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py
index f962e623ee..0559a2e09c 100644
--- a/tensorflow/contrib/data/python/ops/error_ops.py
+++ b/tensorflow/contrib/data/python/ops/error_ops.py
@@ -17,10 +17,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.ignore_errors()`.")
 def ignore_errors():
   """Creates a `Dataset` from another `Dataset` and silently ignores any errors.
 
@@ -43,34 +44,4 @@ def ignore_errors():
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _IgnoreErrorsDataset(dataset)
-
-  return _apply_fn
-
-
-class _IgnoreErrorsDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that silently ignores errors when computing its input."""
-
-  def __init__(self, input_dataset):
-    """See `Dataset.ignore_errors()` for details."""
-    super(_IgnoreErrorsDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_ignore_errors_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+  return error_ops.ignore_errors()
diff --git a/tensorflow/contrib/data/python/ops/get_single_element.py b/tensorflow/contrib/data/python/ops/get_single_element.py
index a6713b017a..58ad9eea90 100644
--- a/tensorflow/contrib/data/python/ops/get_single_element.py
+++ b/tensorflow/contrib/data/python/ops/get_single_element.py
@@ -19,13 +19,13 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.ops import get_single_element as experimental_get_single_element
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.get_single_element(...)`.")
 def get_single_element(dataset):
   """Returns the single element in `dataset` as a nested structure of tensors.
 
@@ -61,18 +61,10 @@ def get_single_element(dataset):
     InvalidArgumentError (at runtime): if `dataset` does not contain exactly
       one element.
   """
-  if not isinstance(dataset, dataset_ops.Dataset):
-    raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
-
-  nested_ret = nest.pack_sequence_as(
-      dataset.output_types, gen_dataset_ops.dataset_to_single_element(
-          dataset._as_variant_tensor(),  # pylint: disable=protected-access
-          **dataset_ops.flat_structure(dataset)))
-  return sparse.deserialize_sparse_tensors(
-      nested_ret, dataset.output_types, dataset.output_shapes,
-      dataset.output_classes)
+  return experimental_get_single_element.get_single_element(dataset)
 
 
+@deprecation.deprecated(None, "Use `tf.data.Dataset.reduce(...)`.")
 def reduce_dataset(dataset, reducer):
   """Returns the result of reducing the `dataset` using `reducer`.
 
@@ -90,11 +82,4 @@ def reduce_dataset(dataset, reducer):
   if not isinstance(dataset, dataset_ops.Dataset):
     raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
 
-  # The sentinel dataset is used in case the reduced dataset is empty.
-  sentinel_dataset = dataset_ops.Dataset.from_tensors(
-      reducer.finalize_func(reducer.init_func(np.int64(0))))
-  reduced_dataset = dataset.apply(
-      grouping.group_by_reducer(lambda x: np.int64(0), reducer))
-
-  return get_single_element(
-      reduced_dataset.concatenate(sentinel_dataset).take(1))
+  return dataset.reduce(reducer.init_func(np.int64(0)), reducer.reduce_func)
diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py
index 7cae33beb3..a99dc2f29a 100644
--- a/tensorflow/contrib/data/python/ops/grouping.py
+++ b/tensorflow/contrib/data/python/ops/grouping.py
@@ -17,20 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import math_ops
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.group_by_reducer(...)`.")
 def group_by_reducer(key_func, reducer):
   """A transformation that groups elements and performs a reduction.
 
@@ -52,14 +45,11 @@ def group_by_reducer(key_func, reducer):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _GroupByReducerDataset(dataset, key_func, reducer)
-
-  return _apply_fn
+  return grouping.group_by_reducer(key_func, reducer)
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.group_by_window(...)`.")
 def group_by_window(key_func,
                     reduce_func,
                     window_size=None,
@@ -98,27 +88,12 @@ def group_by_window(key_func,
     ValueError: if neither or both of {`window_size`, `window_size_func`} are
       passed.
   """
-  if (window_size is not None and window_size_func or
-      not (window_size is not None or window_size_func)):
-    raise ValueError("Must pass either window_size or window_size_func.")
-
-  if window_size is not None:
-
-    def constant_window_func(unused_key):
-      return ops.convert_to_tensor(window_size, dtype=dtypes.int64)
-
-    window_size_func = constant_window_func
-
-  assert window_size_func is not None
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _GroupByWindowDataset(dataset, key_func, reduce_func,
-                                 window_size_func)
-
-  return _apply_fn
+  return grouping.group_by_window(key_func, reduce_func, window_size,
+                                  window_size_func)
 
 
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.bucket_by_sequence_length(...)`.")
 def bucket_by_sequence_length(element_length_func,
                               bucket_boundaries,
                               bucket_batch_sizes,
@@ -163,342 +138,12 @@ def bucket_by_sequence_length(element_length_func,
   Raises:
     ValueError: if `len(bucket_batch_sizes) != len(bucket_boundaries) + 1`.
   """
-  with ops.name_scope("bucket_by_seq_length"):
-    if len(bucket_batch_sizes) != (len(bucket_boundaries) + 1):
-      raise ValueError(
-          "len(bucket_batch_sizes) must equal len(bucket_boundaries) + 1")
-
-    batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64)
-
-    def element_to_bucket_id(*args):
-      """Return int64 id of the length bucket for this element."""
-      seq_length = element_length_func(*args)
-
-      boundaries = list(bucket_boundaries)
-      buckets_min = [np.iinfo(np.int32).min] + boundaries
-      buckets_max = boundaries + [np.iinfo(np.int32).max]
-      conditions_c = math_ops.logical_and(
-          math_ops.less_equal(buckets_min, seq_length),
-          math_ops.less(seq_length, buckets_max))
-      bucket_id = math_ops.reduce_min(array_ops.where(conditions_c))
-
-      return bucket_id
-
-    def window_size_fn(bucket_id):
-      # The window size is set to the batch size for this bucket
-      window_size = batch_sizes[bucket_id]
-      return window_size
-
-    def make_padded_shapes(shapes, none_filler=None):
-      padded = []
-      for shape in nest.flatten(shapes):
-        shape = tensor_shape.TensorShape(shape)
-        shape = [
-            none_filler if d.value is None else d
-            for d in shape
-        ]
-        padded.append(shape)
-      return nest.pack_sequence_as(shapes, padded)
-
-    def batching_fn(bucket_id, grouped_dataset):
-      """Batch elements in dataset."""
-      batch_size = window_size_fn(bucket_id)
-      if no_padding:
-        return grouped_dataset.batch(batch_size)
-      none_filler = None
-      if pad_to_bucket_boundary:
-        err_msg = ("When pad_to_bucket_boundary=True, elements must have "
-                   "length < max(bucket_boundaries).")
-        check = check_ops.assert_less(
-            bucket_id,
-            constant_op.constant(len(bucket_batch_sizes) - 1,
-                                 dtype=dtypes.int64),
-            message=err_msg)
-        with ops.control_dependencies([check]):
-          boundaries = constant_op.constant(bucket_boundaries,
-                                            dtype=dtypes.int64)
-          bucket_boundary = boundaries[bucket_id]
-          none_filler = bucket_boundary - 1
-      shapes = make_padded_shapes(
-          padded_shapes or grouped_dataset.output_shapes,
-          none_filler=none_filler)
-      return grouped_dataset.padded_batch(batch_size, shapes, padding_values)
-
-    def _apply_fn(dataset):
-      return dataset.apply(
-          group_by_window(element_to_bucket_id, batching_fn,
-                          window_size_func=window_size_fn))
-
-    return _apply_fn
-
-
-def _map_x_dataset(map_func):
-  """A transformation that maps `map_func` across its input.
-
-  This transformation is similar to `tf.data.Dataset.map`, but in addition to
-  supporting dense and sparse tensor inputs, it also supports dataset inputs.
-
-  Args:
-    map_func: A function mapping a nested structure of tensors and/or datasets
-      (having shapes and types defined by `self.output_shapes` and
-     `self.output_types`) to another nested structure of tensors and/or
-     datasets.
-
-  Returns:
-    Dataset: A `Dataset`.
-  """
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _MapXDataset(dataset, map_func)
-
-  return _apply_fn
-
-
-# TODO(b/115382007) Remove this once canned reducers move to core.
-def window_dataset(window_size):
-  """A transformation that creates window datasets from the input dataset.
-
-  The resulting datasets will contain `window_size` elements (or
-  `N % window_size` for the last dataset if `window_size` does not divide the
-  number of input elements `N` evenly).
-
-  Args:
-    window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
-      consecutive elements of the input dataset to combine into a window.
-
-  Returns:
-    Dataset: A `Dataset`.
-  """
-
-  def _apply_fn(dataset):
-    return dataset_ops.WindowDataset(
-        dataset,
-        size=window_size,
-        shift=window_size,
-        stride=1,
-        drop_remainder=False)
-
-  return _apply_fn
-
-
-class _GroupByReducerDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that groups its input and performs a reduction."""
-
-  def __init__(self, input_dataset, key_func, reducer):
-    """See `group_by_reducer()` for details."""
-    super(_GroupByReducerDataset, self).__init__(input_dataset)
+  return grouping.bucket_by_sequence_length(
+      element_length_func, bucket_boundaries, bucket_batch_sizes, padded_shapes,
+      padding_values, pad_to_bucket_boundary, no_padding)
 
-    self._input_dataset = input_dataset
 
-    self._make_key_func(key_func, input_dataset)
-    self._make_init_func(reducer.init_func)
-    self._make_reduce_func(reducer.reduce_func, input_dataset)
-    self._make_finalize_func(reducer.finalize_func)
-
-  def _make_key_func(self, key_func, input_dataset):
-    """Make wrapping Defun for key_func."""
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        key_func, "tf.contrib.data.group_by_reducer()", input_dataset)
-    if not (
-        wrapped_func.output_types == dtypes.int64 and
-        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
-      raise ValueError(
-          "`key_func` must return a single tf.int64 tensor. "
-          "Got type=%s and shape=%s"
-          % (wrapped_func.output_types, wrapped_func.output_shapes))
-    self._key_func = wrapped_func.function
-
-  def _make_init_func(self, init_func):
-    """Make wrapping Defun for init_func."""
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        init_func, "tf.contrib.data.group_by_reducer()",
-        input_classes=ops.Tensor, input_shapes=tensor_shape.scalar(),
-        input_types=dtypes.int64)
-    self._init_func = wrapped_func.function
-    self._state_classes = wrapped_func.output_classes
-    self._state_shapes = wrapped_func.output_shapes
-    self._state_types = wrapped_func.output_types
-
-  def _make_reduce_func(self, reduce_func, input_dataset):
-    """Make wrapping Defun for reduce_func."""
-
-    # Iteratively rerun the reduce function until reaching a fixed point on
-    # `self._state_shapes`.
-    need_to_rerun = True
-    while need_to_rerun:
-
-      wrapped_func = dataset_ops.StructuredFunctionWrapper(
-          reduce_func, "tf.contrib.data.group_by_reducer()",
-          input_classes=(self._state_classes, input_dataset.output_classes),
-          input_shapes=(self._state_shapes, input_dataset.output_shapes),
-          input_types=(self._state_types, input_dataset.output_types),
-          add_to_graph=False)
-
-      # Extract and validate class information from the returned values.
-      for new_state_class, state_class in zip(
-          nest.flatten(wrapped_func.output_classes),
-          nest.flatten(self._state_classes)):
-        if not issubclass(new_state_class, state_class):
-          raise TypeError(
-              "The element classes for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_classes, wrapped_func.output_classes))
-
-      # Extract and validate type information from the returned values.
-      for new_state_type, state_type in zip(
-          nest.flatten(wrapped_func.output_types),
-          nest.flatten(self._state_types)):
-        if new_state_type != state_type:
-          raise TypeError(
-              "The element types for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_types, wrapped_func.output_types))
-
-      # Extract shape information from the returned values.
-      flat_state_shapes = nest.flatten(self._state_shapes)
-      flat_new_state_shapes = nest.flatten(wrapped_func.output_shapes)
-      weakened_state_shapes = [
-          original.most_specific_compatible_shape(new)
-          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
-      ]
-
-      need_to_rerun = False
-      for original_shape, weakened_shape in zip(flat_state_shapes,
-                                                weakened_state_shapes):
-        if original_shape.ndims is not None and (
-            weakened_shape.ndims is None or
-            original_shape.as_list() != weakened_shape.as_list()):
-          need_to_rerun = True
-          break
-
-      if need_to_rerun:
-        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
-                                                   weakened_state_shapes)
-
-    self._reduce_func = wrapped_func.function
-    self._reduce_func.add_to_graph(ops.get_default_graph())
-
-  def _make_finalize_func(self, finalize_func):
-    """Make wrapping Defun for finalize_func."""
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        finalize_func, "tf.contrib.data.group_by_reducer()",
-        input_classes=self._state_classes, input_shapes=self._state_shapes,
-        input_types=self._state_types)
-    self._finalize_func = wrapped_func.function
-    self._output_classes = wrapped_func.output_classes
-    self._output_shapes = wrapped_func.output_shapes
-    self._output_types = wrapped_func.output_types
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.group_by_reducer_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._key_func.captured_inputs,
-        self._init_func.captured_inputs,
-        self._reduce_func.captured_inputs,
-        self._finalize_func.captured_inputs,
-        key_func=self._key_func,
-        init_func=self._init_func,
-        reduce_func=self._reduce_func,
-        finalize_func=self._finalize_func,
-        **dataset_ops.flat_structure(self))
-
-
-class _GroupByWindowDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that groups its input and performs a windowed reduction."""
-
-  def __init__(self, input_dataset, key_func, reduce_func, window_size_func):
-    """See `group_by_window()` for details."""
-    super(_GroupByWindowDataset, self).__init__(input_dataset)
-
-    self._input_dataset = input_dataset
-
-    self._make_key_func(key_func, input_dataset)
-    self._make_reduce_func(reduce_func, input_dataset)
-    self._make_window_size_func(window_size_func)
-
-  def _make_window_size_func(self, window_size_func):
-    """Make wrapping Defun for window_size_func."""
-    def window_size_func_wrapper(key):
-      return ops.convert_to_tensor(window_size_func(key), dtype=dtypes.int64)
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        window_size_func_wrapper, "tf.contrib.data.group_by_window()",
-        input_classes=ops.Tensor, input_shapes=tensor_shape.scalar(),
-        input_types=dtypes.int64)
-    if not (
-        wrapped_func.output_types == dtypes.int64 and
-        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
-      raise ValueError(
-          "`window_size_func` must return a single tf.int64 scalar tensor.")
-    self._window_size_func = wrapped_func.function
-
-  def _make_key_func(self, key_func, input_dataset):
-    """Make wrapping Defun for key_func."""
-    def key_func_wrapper(*args):
-      return ops.convert_to_tensor(key_func(*args), dtype=dtypes.int64)
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        key_func_wrapper, "tf.contrib.data.group_by_window()", input_dataset)
-    if not (
-        wrapped_func.output_types == dtypes.int64 and
-        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
-      raise ValueError(
-          "`key_func` must return a single tf.int64 scalar tensor.")
-    self._key_func = wrapped_func.function
-
-  def _make_reduce_func(self, reduce_func, input_dataset):
-    """Make wrapping Defun for reduce_func."""
-    nested_dataset = dataset_ops._NestedDatasetComponent(input_dataset)  # pylint: disable=protected-access
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        reduce_func, "tf.contrib.data.reduce_by_window()",
-        input_classes=(ops.Tensor, nested_dataset),
-        input_shapes=(tensor_shape.scalar(), nested_dataset),
-        input_types=(dtypes.int64, nested_dataset),
-        experimental_nested_dataset_support=True)
-    if not isinstance(
-        wrapped_func.output_classes, dataset_ops._NestedDatasetComponent):  # pylint: disable=protected-access
-      raise TypeError("`reduce_func` must return a `Dataset` object.")
-    self._output_classes = wrapped_func.output_classes.output_classes
-    self._output_types = wrapped_func.output_types.output_types
-    self._output_shapes = wrapped_func.output_shapes.output_shapes
-    self._reduce_func = wrapped_func.function
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.group_by_window_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._key_func.captured_inputs,
-        self._reduce_func.captured_inputs,
-        self._window_size_func.captured_inputs,
-        key_func=self._key_func,
-        reduce_func=self._reduce_func,
-        window_size_func=self._window_size_func,
-        **dataset_ops.flat_structure(self))
-
-
-class Reducer(object):
+class Reducer(grouping.Reducer):
   """A reducer is used for reducing a set of elements.
 
   A reducer is represented as a tuple of the three functions:
@@ -507,58 +152,6 @@ class Reducer(object):
     3) finalization function: state => result
   """
 
+  @deprecation.deprecated(None, "Use `tf.data.experimental.Reducer(...)`.")
   def __init__(self, init_func, reduce_func, finalize_func):
-    self._init_func = init_func
-    self._reduce_func = reduce_func
-    self._finalize_func = finalize_func
-
-  @property
-  def init_func(self):
-    return self._init_func
-
-  @property
-  def reduce_func(self):
-    return self._reduce_func
-
-  @property
-  def finalize_func(self):
-    return self._finalize_func
-
-
-class _MapXDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that maps a function over elements in its input."""
-
-  def __init__(self, input_dataset, map_func):
-    """See `map_x_dataset()` for details."""
-    super(_MapXDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        map_func,
-        "tf.contrib.data.map_x_dataset()",
-        input_dataset,
-        experimental_nested_dataset_support=True)
-    self._output_classes = wrapped_func.output_classes
-    self._output_shapes = wrapped_func.output_shapes
-    self._output_types = wrapped_func.output_types
-    self._map_func = wrapped_func.function
-
-  def _as_variant_tensor(self):
-    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-    return gen_dataset_ops.map_dataset(
-        input_t,
-        self._map_func.captured_inputs,
-        f=self._map_func,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
+    super(Reducer, self).__init__(init_func, reduce_func, finalize_func)
diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py
index 1ee9db1aa8..f50da4d429 100644
--- a/tensorflow/contrib/data/python/ops/interleave_ops.py
+++ b/tensorflow/contrib/data/python/ops/interleave_ops.py
@@ -17,20 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib import stateless
-from tensorflow.contrib.data.python.ops import random_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import readers
-from tensorflow.python.data.util import nest
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_experimental_dataset_ops
-from tensorflow.python.ops import math_ops
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.parallel_interleave(...)`.")
 def parallel_interleave(map_func,
                         cycle_length,
                         block_length=1,
@@ -80,12 +72,9 @@ def parallel_interleave(map_func,
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return readers.ParallelInterleaveDataset(
-        dataset, map_func, cycle_length, block_length, sloppy,
-        buffer_output_elements, prefetch_input_elements)
-
-  return _apply_fn
+  return interleave_ops.parallel_interleave(
+      map_func, cycle_length, block_length, sloppy, buffer_output_elements,
+      prefetch_input_elements)
 
 
 @deprecation.deprecated(
@@ -139,63 +128,12 @@ def sloppy_interleave(map_func, cycle_length, block_length=1):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return readers.ParallelInterleaveDataset(
-        dataset,
-        map_func,
-        cycle_length,
-        block_length,
-        sloppy=True,
-        buffer_output_elements=None,
-        prefetch_input_elements=None)
-
-  return _apply_fn
-
-
-class _DirectedInterleaveDataset(dataset_ops.Dataset):
-  """A substitute for `Dataset.interleave()` on a fixed list of datasets."""
-
-  def __init__(self, selector_input, data_inputs):
-    self._selector_input = selector_input
-    self._data_inputs = list(data_inputs)
-
-    for data_input in data_inputs[1:]:
-      if (data_input.output_types != data_inputs[0].output_types or
-          data_input.output_classes != data_inputs[0].output_classes):
-        raise TypeError("All datasets must have the same type and class.")
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return (
-        gen_experimental_dataset_ops.experimental_directed_interleave_dataset(
-            self._selector_input._as_variant_tensor(), [
-                data_input._as_variant_tensor()
-                for data_input in self._data_inputs
-            ], **dataset_ops.flat_structure(self)))
-    # pylint: enable=protected-access
-
-  def _inputs(self):
-    return [self._selector_input] + self._data_inputs
-
-  @property
-  def output_classes(self):
-    return self._data_inputs[0].output_classes
-
-  @property
-  def output_shapes(self):
-    ret = self._data_inputs[0].output_shapes
-    for data_input in self._data_inputs[1:]:
-      ret = nest.pack_sequence_as(ret, [
-          ts1.most_specific_compatible_shape(ts2) for (ts1, ts2) in zip(
-              nest.flatten(ret), nest.flatten(data_input.output_shapes))
-      ])
-    return ret
-
-  @property
-  def output_types(self):
-    return self._data_inputs[0].output_types
+  return interleave_ops.parallel_interleave(
+      map_func, cycle_length, block_length, sloppy=True)
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.sample_from_datasets(...)`.")
 def sample_from_datasets(datasets, weights=None, seed=None):
   """Samples elements at random from the datasets in `datasets`.
 
@@ -219,64 +157,11 @@ def sample_from_datasets(datasets, weights=None, seed=None):
     ValueError: If the `weights` argument is specified and does not match the
       length of the `datasets` element.
   """
-  num_datasets = len(datasets)
-  if not isinstance(weights, dataset_ops.Dataset):
-    if weights is None:
-      # Select inputs with uniform probability.
-      logits = [[1.0] * num_datasets]
-
-    else:
-      # Use the given `weights` as the probability of choosing the respective
-      # input.
-      weights = ops.convert_to_tensor(weights, name="weights")
-      if weights.dtype not in (dtypes.float32, dtypes.float64):
-        raise TypeError("`weights` must be convertible to a tensor of "
-                        "`tf.float32` or `tf.float64` elements.")
-      if not weights.shape.is_compatible_with([num_datasets]):
-        raise ValueError(
-            "`weights` must be a vector of length `len(datasets)`.")
-
-      # The `stateless_multinomial()` op expects log-probabilities, as opposed
-      # to weights.
-      logits = array_ops.expand_dims(math_ops.log(weights, name="logits"), 0)
-
-    # NOTE(mrry): We only specialize when `weights` is not a `Dataset`. When it
-    # is a `Dataset`, it is possible that evaluating it has a side effect the
-    # user depends on.
-    if len(datasets) == 1:
-      return datasets[0]
-
-    def select_dataset_constant_logits(seed):
-      return array_ops.squeeze(
-          stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1])
-
-    selector_input = dataset_ops.MapDataset(
-        random_ops.RandomDataset(seed).batch(2),
-        select_dataset_constant_logits,
-        use_inter_op_parallelism=False)
-
-  else:
-    # Use each element of the given `weights` dataset as the probability of
-    # choosing the respective input.
-
-    # The `stateless_multinomial()` op expects log-probabilities, as opposed to
-    # weights.
-    logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits"))
-
-    def select_dataset_varying_logits(logits, seed):
-      return array_ops.squeeze(
-          stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1])
-
-    logits_and_seeds = dataset_ops.Dataset.zip(
-        (logits_ds, random_ops.RandomDataset(seed).batch(2)))
-    selector_input = dataset_ops.MapDataset(
-        logits_and_seeds,
-        select_dataset_varying_logits,
-        use_inter_op_parallelism=False)
-
-  return _DirectedInterleaveDataset(selector_input, datasets)
+  return interleave_ops.sample_from_datasets(datasets, weights, seed)
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.choose_from_datasets(...)`.")
 def choose_from_datasets(datasets, choice_dataset):
   """Creates a dataset that deterministically chooses elements from `datasets`.
 
@@ -312,10 +197,4 @@ def choose_from_datasets(datasets, choice_dataset):
     TypeError: If the `datasets` or `choice_dataset` arguments have the wrong
       type.
   """
-  if not (choice_dataset.output_types == dtypes.int64
-          and choice_dataset.output_shapes.is_compatible_with(
-              tensor_shape.scalar())
-          and choice_dataset.output_classes == ops.Tensor):
-    raise TypeError("`choice_dataset` must be a dataset of scalar "
-                    "`tf.int64` tensors.")
-  return _DirectedInterleaveDataset(choice_dataset, datasets)
+  return interleave_ops.choose_from_datasets(datasets, choice_dataset)
diff --git a/tensorflow/contrib/data/python/ops/iterator_ops.py b/tensorflow/contrib/data/python/ops/iterator_ops.py
index 18515e21ed..48c325c86f 100644
--- a/tensorflow/contrib/data/python/ops/iterator_ops.py
+++ b/tensorflow/contrib/data/python/ops/iterator_ops.py
@@ -16,15 +16,13 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training import session_run_hook
 
+from tensorflow.python.data.experimental.ops import iterator_ops
+from tensorflow.python.util import deprecation
 
+
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.make_saveable_from_iterator(...)`.")
 def make_saveable_from_iterator(iterator):
   """Returns a SaveableObject for saving/restore iterator state using Saver.
 
@@ -60,27 +58,10 @@ def make_saveable_from_iterator(iterator):
   Note: Not all iterators support checkpointing yet. Attempting to save the
   state of an unsupported iterator will throw an error.
   """
-  return _Saveable(iterator._iterator_resource)  # pylint: disable=protected-access
-
-
-class _Saveable(saver_lib.BaseSaverBuilder.SaveableObject):
-  """SaveableObject for saving/restoring iterator state."""
+  return iterator_ops.make_saveable_from_iterator(iterator)
 
-  def __init__(self, iterator_resource):
-    serialized_iterator = gen_dataset_ops.serialize_iterator(iterator_resource)
-    specs = [
-        saver_lib.BaseSaverBuilder.SaveSpec(serialized_iterator, "",
-                                            iterator_resource.name + "-state")
-    ]
-    super(_Saveable, self).__init__(iterator_resource, specs,
-                                    iterator_resource.name)
 
-  def restore(self, restored_tensors, unused_restored_shapes):
-    with ops.colocate_with(self.op):
-      return gen_dataset_ops.deserialize_iterator(self.op, restored_tensors[0])
-
-
-class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
+class CheckpointInputPipelineHook(iterator_ops.CheckpointInputPipelineHook):
   """Checkpoints input pipeline state every N steps or seconds.
 
   This hook saves the state of the iterators in the `Graph` so that when
@@ -125,135 +106,7 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
   collector when building the eval graph.
   """
 
+  @deprecation.deprecated(
+      None, "Use `tf.data.experimental.CheckpointInputPipelineHook(...)`.")
   def __init__(self, estimator):
-    """Initializes a `CheckpointInputPipelineHook`.
-
-    Args:
-      estimator: Estimator.
-
-    Raises:
-      ValueError: One of `save_steps` or `save_secs` should be set.
-      ValueError: At most one of saver or scaffold should be set.
-    """
-    # `checkpoint_basename` is "input.ckpt" for non-distributed pipelines or
-    # of the form "input_<task_type>_<task_id>.ckpt" for distributed pipelines.
-    # Note: The default `checkpoint_basename` used by `CheckpointSaverHook` is
-    # "model.ckpt". We intentionally choose the input pipeline checkpoint prefix
-    # to be different to avoid conflicts with the model checkpoint.
-
-    # pylint: disable=protected-access
-    checkpoint_prefix = "input"
-    if estimator._config.num_worker_replicas > 1:
-      # Distributed setting.
-      suffix = "_{}_{}".format(estimator._config.task_type,
-                               estimator._config.task_id)
-      checkpoint_prefix += suffix
-    # pylint: enable=protected-access
-
-    # We use a composition paradigm instead of inheriting from
-    # `CheckpointSaverHook` because `Estimator` does an `isinstance` check
-    # to check whether a `CheckpointSaverHook` is already present in the list
-    # of hooks and if not, adds one. Inheriting from `CheckpointSaverHook`
-    # would thwart this behavior. This hook checkpoints *only the iterators*
-    # and not the graph variables.
-    self._checkpoint_saver_hook = basic_session_run_hooks.CheckpointSaverHook(
-        estimator.model_dir,
-        save_secs=estimator._config.save_checkpoints_secs,  # pylint: disable=protected-access
-        save_steps=estimator._config.save_checkpoints_steps,  # pylint: disable=protected-access
-        checkpoint_basename=checkpoint_prefix + ".ckpt")
-
-    # Name for the protocol buffer file that will contain the list of most
-    # recent checkpoints stored as a `CheckpointState` protocol buffer.
-    # This file, kept in the same directory as the checkpoint files, is
-    # automatically managed by the `Saver` to keep track of recent checkpoints.
-    # The default name used by the `Saver` for this file is "checkpoint". Here
-    # we use the name "checkpoint_<checkpoint_prefix>" so that in case the
-    # `checkpoint_dir` is the same as the model checkpoint directory, there are
-    # no conflicts during restore.
-    self._latest_filename = "checkpoint_" + checkpoint_prefix
-    self._first_run = True
-
-  def begin(self):
-    # Build a Saver that saves all iterators in the `GLOBAL_ITERATORS`
-    # collection if no `Saver` or `Scaffold` is provided.
-    # pylint: disable=protected-access
-    if (self._checkpoint_saver_hook._saver is None and
-        self._checkpoint_saver_hook._scaffold is None):
-      iterators = ops.get_collection(iterator_ops.GLOBAL_ITERATORS)
-      saveables = [_Saveable(i) for i in iterators]
-      self._checkpoint_saver_hook._saver = _CustomSaver(saveables,
-                                                        self._latest_filename)
-    # pylint: enable=protected-access
-    self._checkpoint_saver_hook.begin()
-
-  def _restore_or_save_initial_ckpt(self, session):
-    # Ideally this should be run in after_create_session but is not for the
-    # following reason:
-    # Currently there is no way of enforcing an order of running the
-    # `SessionRunHooks`. Hence it is possible that the `_DatasetInitializerHook`
-    # is run *after* this hook. That is troublesome because
-    # 1. If a checkpoint exists and this hook restores it, the initializer hook
-    #    will override it.
-    # 2. If no checkpoint exists, this hook will try to save an initialized
-    #    iterator which will result in an exception.
-    #
-    # As a temporary fix we enter the following implicit contract between this
-    # hook and the _DatasetInitializerHook.
-    # 1. The _DatasetInitializerHook initializes the iterator in the call to
-    #    after_create_session.
-    # 2. This hook saves the iterator on the first call to `before_run()`, which
-    #    is guaranteed to happen after `after_create_session()` of all hooks
-    #    have been run.
-
-    # Check if there is an existing checkpoint. If so, restore from it.
-    # pylint: disable=protected-access
-    latest_checkpoint_path = checkpoint_management.latest_checkpoint(
-        self._checkpoint_saver_hook._checkpoint_dir,
-        latest_filename=self._latest_filename)
-    if latest_checkpoint_path:
-      self._checkpoint_saver_hook._get_saver().restore(session,
-                                                       latest_checkpoint_path)
-    else:
-      # The checkpoint saved here is the state at step "global_step".
-      # Note: We do not save the GraphDef or MetaGraphDef here.
-      global_step = session.run(self._checkpoint_saver_hook._global_step_tensor)
-      self._checkpoint_saver_hook._save(session, global_step)
-      self._checkpoint_saver_hook._timer.update_last_triggered_step(global_step)
-    # pylint: enable=protected-access
-
-  def before_run(self, run_context):
-    if self._first_run:
-      self._restore_or_save_initial_ckpt(run_context.session)
-      self._first_run = False
-    return self._checkpoint_saver_hook.before_run(run_context)
-
-  def after_run(self, run_context, run_values):
-    self._checkpoint_saver_hook.after_run(run_context, run_values)
-
-  def end(self, session):
-    self._checkpoint_saver_hook.end(session)
-
-
-class _CustomSaver(saver_lib.Saver):
-  """`Saver` with a different default `latest_filename`.
-
-  This is used in the `CheckpointInputPipelineHook` to avoid conflicts with
-  the model ckpt saved by the `CheckpointSaverHook`.
-  """
-
-  def __init__(self, var_list, latest_filename):
-    super(_CustomSaver, self).__init__(var_list)
-    self._latest_filename = latest_filename
-
-  def save(self,
-           sess,
-           save_path,
-           global_step=None,
-           latest_filename=None,
-           meta_graph_suffix="meta",
-           write_meta_graph=True,
-           write_state=True,
-           strip_default_attrs=False):
-    return super(_CustomSaver, self).save(
-        sess, save_path, global_step, latest_filename or self._latest_filename,
-        meta_graph_suffix, write_meta_graph, write_state, strip_default_attrs)
+    super(CheckpointInputPipelineHook, self).__init__(estimator)
diff --git a/tensorflow/contrib/data/python/ops/parsing_ops.py b/tensorflow/contrib/data/python/ops/parsing_ops.py
index cfbba701b0..3aeee9d8e4 100644
--- a/tensorflow/contrib/data/python/ops/parsing_ops.py
+++ b/tensorflow/contrib/data/python/ops/parsing_ops.py
@@ -17,92 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import parsing_ops
+from tensorflow.python.data.experimental.ops import parsing_ops
+from tensorflow.python.util import deprecation
 
 
-class _ParseExampleDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that parses `example` dataset into a `dict` dataset."""
-
-  def __init__(self, input_dataset, features, num_parallel_calls):
-    super(_ParseExampleDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    if not all(types == dtypes.string
-               for types in nest.flatten(input_dataset.output_types)):
-      raise TypeError("Input dataset should be a dataset of vectors of strings")
-    self._num_parallel_calls = num_parallel_calls
-    # pylint: disable=protected-access
-    self._features = parsing_ops._prepend_none_dimension(features)
-    # sparse_keys and dense_keys come back sorted here.
-    (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
-     dense_shapes) = parsing_ops._features_to_raw_params(
-         self._features, [
-             parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
-             parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature
-         ])
-    # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
-    (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes,
-     dense_shape_as_shape) = parsing_ops._process_raw_parameters(
-         None, dense_defaults, sparse_keys, sparse_types, dense_keys,
-         dense_types, dense_shapes)
-    # pylint: enable=protected-access
-    self._sparse_keys = sparse_keys
-    self._sparse_types = sparse_types
-    self._dense_keys = dense_keys
-    self._dense_defaults = dense_defaults_vec
-    self._dense_shapes = dense_shapes
-    self._dense_types = dense_types
-    dense_output_shapes = [
-        self._input_dataset.output_shapes.concatenate(shape)
-        for shape in dense_shape_as_shape
-    ]
-    sparse_output_shapes = [
-        self._input_dataset.output_shapes.concatenate([None])
-        for _ in range(len(sparse_keys))
-    ]
-
-    self._output_shapes = dict(
-        zip(self._dense_keys + self._sparse_keys,
-            dense_output_shapes + sparse_output_shapes))
-    self._output_types = dict(
-        zip(self._dense_keys + self._sparse_keys,
-            self._dense_types + self._sparse_types))
-    self._output_classes = dict(
-        zip(self._dense_keys + self._sparse_keys,
-            [ops.Tensor for _ in range(len(self._dense_defaults))] +
-            [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys))
-            ]))
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.parse_example_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._num_parallel_calls,
-        self._dense_defaults,
-        self._sparse_keys,
-        self._dense_keys,
-        self._sparse_types,
-        self._dense_shapes,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-
-# TODO(b/111553342): add arguments names and example names as well.
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.parse_example_dataset(...)`.")
 def parse_example_dataset(features, num_parallel_calls=1):
   """A transformation that parses `Example` protos into a `dict` of tensors.
 
@@ -130,21 +50,4 @@ def parse_example_dataset(features, num_parallel_calls=1):
   Raises:
     ValueError: if features argument is None.
   """
-  if features is None:
-    raise ValueError("Missing: features was %s." % features)
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    out_dataset = _ParseExampleDataset(dataset, features, num_parallel_calls)
-    if any([
-        isinstance(feature, parsing_ops.SparseFeature)
-        for _, feature in features.items()
-    ]):
-      # pylint: disable=protected-access
-      # pylint: disable=g-long-lambda
-      out_dataset = out_dataset.map(
-          lambda x: parsing_ops._construct_sparse_tensors_for_sparse_features(
-              features, x), num_parallel_calls=num_parallel_calls)
-    return out_dataset
-
-  return _apply_fn
+  return parsing_ops.parse_example_dataset(features, num_parallel_calls)
diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py
index 46f82e453a..adfb390cd9 100644
--- a/tensorflow/contrib/data/python/ops/prefetching_ops.py
+++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py
@@ -17,321 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import warnings
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.eager import context
-from tensorflow.python.framework import device as framework_device
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import functional_ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
-from tensorflow.python.ops import resource_variable_ops
-
-
-def function_buffering_resource(string_arg,
-                                target_device,
-                                f,
-                                buffer_size,
-                                output_types,
-                                container="",
-                                shared_name=None,
-                                name=None):
-  """Creates a FunctionBufferingResource.
-
-  A FunctionBufferingResource fills up a buffer by calling a function `f` on
-  `target_device`. `f` should take in only a single string argument as input.
-
-  Args:
-    string_arg: The single string argument to the function.
-    target_device: The device to run `f` on.
-    f: The function to be executed.
-    buffer_size: Size of the buffer to be populated.
-    output_types: The output types generated by the function.
-    container: (Optional) string. Defaults to "".
-    shared_name: (Optional) string.
-    name: (Optional) string to name the op.
-
-  Returns:
-    Handle to a FunctionBufferingResource.
-  """
-  if shared_name is None:
-    shared_name = ""
-  return ged_ops.experimental_function_buffering_resource(
-      string_arg=string_arg,
-      target_device=target_device,
-      shared_name=shared_name,
-      f=f,
-      buffer_size=buffer_size,
-      container=container,
-      name=name,
-      output_types=output_types)
-
-
-def function_buffering_resource_get_next(function_buffer_resource,
-                                         output_types,
-                                         name=None):
-  return ged_ops.experimental_function_buffering_resource_get_next(
-      function_buffer_resource=function_buffer_resource,
-      output_types=output_types,
-      name=name)
-
-
-def function_buffering_resource_reset(function_buffer_resource, name=None):
-  return ged_ops.experimental_function_buffering_resource_reset(
-      function_buffer_resource=function_buffer_resource, name=name)
-
-
-# pylint: disable=protected-access
-class _PrefetchToDeviceIterator(object):
-  """A replacement for `tf.data.Iterator` that prefetches to another device.
-
-  Args:
-    input_dataset: The input dataset
-    one_shot: If true, we make a one shot iterator that's already initialized.
-    device: A fully specified device string where we want to prefetch to
-    buffer_size: Size of the prefetching buffer.
-    shared_name: (Optional.) If non-empty, the returned iterator will be
-        shared under the given name across multiple sessions that share the
-        same devices (e.g. when using a remote server).
-
-  Returns:
-    An Iterator type object.
-  """
-
-  def __init__(self,
-               input_dataset,
-               one_shot,
-               device,
-               buffer_size,
-               shared_name=None):
-    self._input_dataset = input_dataset
-    self._get_next_call_count = 0
-    self._one_shot = one_shot
-    if shared_name is None:
-      shared_name = ""
-
-    if self._one_shot:
-      self._input_iterator = input_dataset.make_one_shot_iterator()
-    else:
-      self._input_iterator = iterator_ops.Iterator.from_structure(
-          self._input_dataset.output_types, self._input_dataset.output_shapes,
-          shared_name, self._input_dataset.output_classes)
-    input_iterator_handle = self._input_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _prefetch_fn(handle):
-      """Prefetches one element from `input_iterator`."""
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          handle, self._input_iterator.output_types,
-          self._input_iterator.output_shapes,
-          self._input_iterator.output_classes)
-      ret = remote_iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    iterator_device = ged_ops.experimental_iterator_get_device(
-        self._input_iterator._iterator_resource)
-
-    with ops.device(device):
-      self._buffering_resource = function_buffering_resource(
-          f=_prefetch_fn,
-          target_device=iterator_device,
-          string_arg=input_iterator_handle,
-          buffer_size=buffer_size,
-          shared_name=shared_name,
-          output_types=nest.flatten(
-              sparse.as_dense_types(self._input_dataset.output_types,
-                                    self._input_dataset.output_classes)))
-
-    if not self._one_shot:
-      reset_op = function_buffering_resource_reset(self._buffering_resource)
-      with ops.control_dependencies([reset_op]):
-        self._initializer = self._input_iterator.make_initializer(
-            self._input_dataset)
-
-  def get_next(self, name=None):
-    """See `tf.data.Iterator.get_next`."""
-    self._get_next_call_count += 1
-    if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
-      warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
-
-    flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
-        self._buffering_resource,
-        output_types=nest.flatten(
-            sparse.as_dense_types(self.output_types, self.output_classes)),
-        name=name)
-
-    ret = sparse.deserialize_sparse_tensors(
-        nest.pack_sequence_as(self.output_types, flat_ret),
-        self.output_types, self.output_shapes, self.output_classes)
-
-    for tensor, shape in zip(
-        nest.flatten(ret), nest.flatten(self.output_shapes)):
-      if isinstance(tensor, ops.Tensor):
-        tensor.set_shape(shape)
-
-    return ret
-
-  @property
-  def initializer(self):
-    if self._one_shot:
-      raise NotImplementedError("Can't initialize a one_shot_iterator")
-    return self._initializer
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-
-class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
-  """A replacement for `tf.data.Iterator` that prefetches to another device.
-
-  Args:
-    input_dataset: The input dataset
-    one_shot: If true, we make a one shot iterator that's already initialized.
-    device: A fully specified device string where we want to prefetch to
-    buffer_size: Size of the prefetching buffer.
-    shared_name: (Optional.) If non-empty, the returned iterator will be
-        shared under the given name across multiple sessions that share the
-        same devices (e.g. when using a remote server).
-
-  Returns:
-    An Iterator type object.
-  """
-
-  def __init__(self,
-               input_dataset,
-               device,
-               buffer_size):
-    with ops.device("/device:CPU:0"):
-      super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset)
-      input_iterator_handle = gen_dataset_ops.iterator_to_string_handle(
-          self._resource)
-
-    self._device = device
-
-    @function.Defun(dtypes.string)
-    def _prefetch_fn(handle):
-      """Prefetches one element from `input_iterator`."""
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          handle, self.output_types, self.output_shapes, self.output_classes)
-      ret = remote_iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    _prefetch_fn.add_to_graph(None)
-
-    with ops.device(device):
-      self._buffering_resource = function_buffering_resource(
-          f=_prefetch_fn,
-          output_types=self._flat_output_types,
-          target_device=ged_ops.experimental_iterator_get_device(
-              self._resource),
-          string_arg=input_iterator_handle,
-          buffer_size=buffer_size,
-          shared_name=iterator_ops._generate_shared_name(
-              "function_buffer_resource"))
-
-  def _next_internal(self):
-    """Returns a nested structure of `tf.Tensor`s containing the next element.
-    """
-    # This runs in sync mode as iterators use an error status to communicate
-    # that there is no more data to iterate over.
-    # TODO(b/77291417): Fix
-    with context.execution_mode(context.SYNC):
-      with ops.device(self._device):
-        ret = ged_ops.experimental_function_buffering_resource_get_next(
-            function_buffer_resource=self._buffering_resource,
-            output_types=self._flat_output_types)
-      return sparse.deserialize_sparse_tensors(
-          nest.pack_sequence_as(self._output_types, ret), self._output_types,
-          self._output_shapes, self._output_classes)
-# pylint: enable=protected-access
-
-
-class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` whose iterator prefetches elements to another device."""
-
-  def __init__(self, input_dataset, device, buffer_size):
-    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._device = device
-    self._buffer_size = buffer_size if buffer_size is not None else 1
-
-  # The static analysis cannot tell that the eager iterator's superclass has
-  # a `next()` method.
-  # pylint: disable=non-iterator-returned
-  def __iter__(self):
-    """Creates an `Iterator` for enumerating the elements of this dataset.
-
-    The returned iterator implements the Python iterator protocol and therefore
-    can only be used in eager mode.
-
-    Returns:
-      An `Iterator` over the elements of this dataset.
-
-    Raises:
-      RuntimeError: If eager execution is enabled.
-    """
-    if context.executing_eagerly():
-      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
-                                            self._buffer_size)
-    else:
-      raise RuntimeError("dataset.__iter__() is only supported when eager "
-                         "execution is enabled.")
-  # pylint: enable=non-iterator-returned
-
-  def make_one_shot_iterator(self):
-    if context.executing_eagerly():
-      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
-                                            self._buffer_size)
-    else:
-      return _PrefetchToDeviceIterator(self._input_dataset, one_shot=True,
-                                       device=self._device,
-                                       buffer_size=self._buffer_size)
-
-  def make_initializable_iterator(self, shared_name=None):
-    return _PrefetchToDeviceIterator(
-        self._input_dataset,
-        one_shot=False,
-        device=self._device,
-        buffer_size=self._buffer_size,
-        shared_name=shared_name)
-
-  def _as_variant_tensor(self):
-    # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset
-    # transformation methods is called.
-    # TODO(mrry): Investigate support for chaining further transformations after
-    # the prefetch, including GPU support.
-    raise NotImplementedError("`prefetch_to_device()` must be the last "
-                              "transformation in a dataset pipeline.")
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.prefetch_to_device(...)`.")
 def prefetch_to_device(device, buffer_size=None):
   """A transformation that prefetches dataset values to the given `device`.
 
@@ -347,12 +38,10 @@ def prefetch_to_device(device, buffer_size=None):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return _PrefetchToDeviceDataset(dataset, device, buffer_size)
-
-  return _apply_fn
+  return prefetching_ops.prefetch_to_device(device, buffer_size)
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.copy_to_device(...)`.")
 def copy_to_device(target_device, source_device="/cpu:0"):
   """A transformation that copies dataset elements to the given `target_device`.
 
@@ -364,165 +53,4 @@ def copy_to_device(target_device, source_device="/cpu:0"):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _CopyToDeviceDataset(
-        dataset, target_device=target_device, source_device=source_device)
-
-  return _apply_fn
-
-
-# TODO(rohanj): Use the _input_hostmem attr on the RemoteCall ops to indicate
-# all inputs to the Op are in host memory, thereby avoiding some unnecessary
-# Sends and Recvs.
-class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that copies elements to another device."""
-
-  def __init__(self, input_dataset, target_device, source_device="/cpu:0"):
-    """Constructs a _CopyToDeviceDataset.
-
-    Args:
-      input_dataset: `Dataset` to be copied
-      target_device: The name of the device to which elements would be copied.
-      source_device: Device where input_dataset would be placed.
-    """
-    super(_CopyToDeviceDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._target_device = target_device
-    spec = framework_device.DeviceSpec().from_string(self._target_device)
-    self._is_gpu_target = (spec.device_type == "GPU")
-    self._source_device_string = source_device
-    self._source_device = ops.convert_to_tensor(source_device)
-
-    self._flat_output_shapes = nest.flatten(
-        sparse.as_dense_shapes(self._input_dataset.output_shapes,
-                               self._input_dataset.output_classes))
-    self._flat_output_types = nest.flatten(
-        sparse.as_dense_types(self._input_dataset.output_types,
-                              self._input_dataset.output_classes))
-
-    @function.Defun()
-    def _init_func():
-      """Creates an iterator for the input dataset.
-
-      Returns:
-        A `string` tensor that encapsulates the iterator created.
-      """
-      # pylint: disable=protected-access
-      ds_variant = self._input_dataset._as_variant_tensor()
-      resource = gen_dataset_ops.anonymous_iterator(
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-      with ops.control_dependencies(
-          [gen_dataset_ops.make_iterator(ds_variant, resource)]):
-        return gen_dataset_ops.iterator_to_string_handle(resource)
-
-    @function.Defun()
-    def _remote_init_func():
-      return functional_ops.remote_call(
-          target=self._source_device,
-          args=_init_func.captured_inputs,
-          Tout=[dtypes.string],
-          f=_init_func)
-
-    self._init_func = _remote_init_func
-    self._init_captured_args = _remote_init_func.captured_inputs
-
-    @function.Defun(dtypes.string)
-    def _next_func(string_handle):
-      """Calls get_next for created iterator.
-
-      Args:
-        string_handle: An iterator string handle created by _init_func
-      Returns:
-        The elements generated from `input_dataset`
-      """
-      with ops.device(self._source_device_string):
-        iterator = iterator_ops.Iterator.from_string_handle(
-            string_handle, self.output_types, self.output_shapes,
-            self.output_classes)
-      ret = iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    @function.Defun(dtypes.string)
-    def _remote_next_func(string_handle):
-      return functional_ops.remote_call(
-          target=self._source_device,
-          args=[string_handle] + _next_func.captured_inputs,
-          Tout=self._flat_output_types,
-          f=_next_func)
-
-    self._next_func = _remote_next_func
-    self._next_captured_args = _remote_next_func.captured_inputs
-
-    @function.Defun(dtypes.string)
-    def _finalize_func(string_handle):
-      """Destroys the iterator resource created.
-
-      Args:
-        string_handle: An iterator string handle created by _init_func
-      Returns:
-        Tensor constant 0
-      """
-      iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
-          string_handle,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-      with ops.control_dependencies([
-          resource_variable_ops.destroy_resource_op(
-              iterator_resource, ignore_lookup_error=True)]):
-        return array_ops.constant(0, dtypes.int64)
-
-    @function.Defun(dtypes.string)
-    def _remote_finalize_func(string_handle):
-      return functional_ops.remote_call(
-          target=self._source_device,
-          args=[string_handle] + _finalize_func.captured_inputs,
-          Tout=[dtypes.int64],
-          f=_finalize_func)
-
-    self._finalize_func = _remote_finalize_func
-    self._finalize_captured_args = _remote_finalize_func.captured_inputs
-
-    g = ops.get_default_graph()
-    _remote_init_func.add_to_graph(g)
-    _remote_next_func.add_to_graph(g)
-    _remote_finalize_func.add_to_graph(g)
-    # pylint: enable=protected-scope
-
-  # The one_shot_iterator implementation needs a 0 arg _make_dataset function
-  # that thereby captures all the inputs required to create the dataset. Since
-  # there are strings that are inputs to the GeneratorDataset which can't be
-  # placed on a GPU, this fails for the GPU case. Therefore, disabling it for
-  # GPU
-  def make_one_shot_iterator(self):
-    if self._is_gpu_target:
-      raise ValueError("Cannot create a one shot iterator when using "
-                       "`tf.contrib.data.copy_to_device()` on GPU. Please use "
-                       "`Dataset.make_initializable_iterator()` instead.")
-    else:
-      return super(_CopyToDeviceDataset, self).make_one_shot_iterator()
-
-  def _as_variant_tensor(self):
-    with ops.device(self._target_device):
-      return gen_dataset_ops.generator_dataset(
-          self._init_captured_args,
-          self._next_captured_args,
-          self._finalize_captured_args,
-          init_func=self._init_func,
-          next_func=self._next_func,
-          finalize_func=self._finalize_func,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
+  return prefetching_ops.copy_to_device(target_device, source_device)
diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py
index 344a0763c8..2c95125636 100644
--- a/tensorflow/contrib/data/python/ops/random_ops.py
+++ b/tensorflow/contrib/data/python/ops/random_ops.py
@@ -17,36 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import random_seed
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.data.experimental.ops import random_ops
+from tensorflow.python.util import deprecation
 
 
-class RandomDataset(dataset_ops.DatasetSource):
+class RandomDataset(random_ops.RandomDataset):
   """A `Dataset` of pseudorandom values."""
 
+  @deprecation.deprecated(
+      None, "Use `tf.data.experimental.RandomDataset(...)`.")
   def __init__(self, seed=None):
-    """A `Dataset` of pseudorandom values."""
-    super(RandomDataset, self).__init__()
-    self._seed, self._seed2 = random_seed.get_seed(seed)
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.random_dataset(
-        seed=self._seed,
-        seed2=self._seed2,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return ops.Tensor
-
-  @property
-  def output_shapes(self):
-    return tensor_shape.scalar()
-
-  @property
-  def output_types(self):
-    return dtypes.int64
+    super(RandomDataset, self).__init__(seed)
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index 360971e200..4601376dff 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -17,295 +17,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import csv
-
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import interleave_ops
-from tensorflow.contrib.data.python.ops import optimization
-from tensorflow.contrib.data.python.ops import parsing_ops
-from tensorflow.contrib.data.python.ops import shuffle_ops
+from tensorflow.python.data.experimental.ops import optimization
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers as core_readers
-from tensorflow.python.data.util import convert
 from tensorflow.python.data.util import nest
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.lib.io import file_io
-from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import gen_experimental_dataset_ops
-from tensorflow.python.platform import gfile
 from tensorflow.python.util import deprecation
 
-_ACCEPTABLE_CSV_TYPES = (dtypes.float32, dtypes.float64, dtypes.int32,
-                         dtypes.int64, dtypes.string)
-
-
-def _is_valid_int32(str_val):
-  try:
-    # Checks equality to prevent int32 overflow
-    return dtypes.int32.as_numpy_dtype(str_val) == dtypes.int64.as_numpy_dtype(
-        str_val)
-  except (ValueError, OverflowError):
-    return False
-
-
-def _is_valid_int64(str_val):
-  try:
-    dtypes.int64.as_numpy_dtype(str_val)
-    return True
-  except (ValueError, OverflowError):
-    return False
-
-
-def _is_valid_float(str_val, float_dtype):
-  try:
-    return float_dtype.as_numpy_dtype(str_val) < np.inf
-  except ValueError:
-    return False
-
-
-def _infer_type(str_val, na_value, prev_type):
-  """Given a string, infers its tensor type.
-
-  Infers the type of a value by picking the least 'permissive' type possible,
-  while still allowing the previous type inference for this column to be valid.
-
-  Args:
-    str_val: String value to infer the type of.
-    na_value: Additional string to recognize as a NA/NaN CSV value.
-    prev_type: Type previously inferred based on values of this column that
-      we've seen up till now.
-  Returns:
-    Inferred dtype.
-  """
-  if str_val in ("", na_value):
-    # If the field is null, it gives no extra information about its type
-    return prev_type
-
-  type_list = [
-      dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64, dtypes.string
-  ]  # list of types to try, ordered from least permissive to most
-
-  type_functions = [
-      _is_valid_int32,
-      _is_valid_int64,
-      lambda str_val: _is_valid_float(str_val, dtypes.float32),
-      lambda str_val: _is_valid_float(str_val, dtypes.float64),
-      lambda str_val: True,
-  ]  # Corresponding list of validation functions
-
-  for i in range(len(type_list)):
-    validation_fn = type_functions[i]
-    if validation_fn(str_val) and (prev_type is None or
-                                   prev_type in type_list[:i + 1]):
-      return type_list[i]
-
-
-def _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header):
-  """Generator that yields rows of CSV file(s) in order."""
-  for fn in filenames:
-    with file_io.FileIO(fn, "r") as f:
-      rdr = csv.reader(
-          f,
-          delimiter=field_delim,
-          quoting=csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE)
-      if header:
-        next(rdr)  # Skip header lines
-
-      for csv_row in rdr:
-        if len(csv_row) != num_cols:
-          raise ValueError(
-              "Problem inferring types: CSV row has different number of fields "
-              "than expected.")
-        yield csv_row
-
-
-def _infer_column_defaults(filenames, num_cols, field_delim, use_quote_delim,
-                           na_value, header, num_rows_for_inference,
-                           select_columns):
-  """Infers column types from the first N valid CSV records of files."""
-  if select_columns is None:
-    select_columns = range(num_cols)
-  inferred_types = [None] * len(select_columns)
-
-  for i, csv_row in enumerate(
-      _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header)):
-    if num_rows_for_inference is not None and i >= num_rows_for_inference:
-      break
-
-    for j, col_index in enumerate(select_columns):
-      inferred_types[j] = _infer_type(csv_row[col_index], na_value,
-                                      inferred_types[j])
-
-  # Replace None's with a default type
-  inferred_types = [t or dtypes.string for t in inferred_types]
-  # Default to 0 or '' for null values
-  return [
-      constant_op.constant([0 if t is not dtypes.string else ""], dtype=t)
-      for t in inferred_types
-  ]
-
-
-def _infer_column_names(filenames, field_delim, use_quote_delim):
-  """Infers column names from first rows of files."""
-  csv_kwargs = {
-      "delimiter": field_delim,
-      "quoting": csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE
-  }
-  with file_io.FileIO(filenames[0], "r") as f:
-    try:
-      column_names = next(csv.reader(f, **csv_kwargs))
-    except StopIteration:
-      raise ValueError(("Received StopIteration when reading the header line "
-                        "of %s.  Empty file?") % filenames[0])
-
-  for name in filenames[1:]:
-    with file_io.FileIO(name, "r") as f:
-      try:
-        if next(csv.reader(f, **csv_kwargs)) != column_names:
-          raise ValueError(
-              "Files have different column names in the header row.")
-      except StopIteration:
-        raise ValueError(("Received StopIteration when reading the header line "
-                          "of %s.  Empty file?") % filenames[0])
-  return column_names
-
-
-def _get_sorted_col_indices(select_columns, column_names):
-  """Transforms select_columns argument into sorted column indices."""
-  names_to_indices = {n: i for i, n in enumerate(column_names)}
-  num_cols = len(column_names)
-  for i, v in enumerate(select_columns):
-    if isinstance(v, int):
-      if v < 0 or v >= num_cols:
-        raise ValueError(
-            "Column index %d specified in select_columns out of valid range." %
-            v)
-      continue
-    if v not in names_to_indices:
-      raise ValueError(
-          "Value '%s' specified in select_columns not a valid column index or "
-          "name." % v)
-    select_columns[i] = names_to_indices[v]
-
-  # Sort and ensure there are no duplicates
-  result = sorted(set(select_columns))
-  if len(result) != len(select_columns):
-    raise ValueError("select_columns contains duplicate columns")
-  return result
-
-
-def _maybe_shuffle_and_repeat(
-    dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed):
-  """Optionally shuffle and repeat dataset, as requested."""
-  if num_epochs != 1 and shuffle:
-    # Use shuffle_and_repeat for perf
-    return dataset.apply(
-        shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs,
-                                       shuffle_seed))
-  elif shuffle:
-    return dataset.shuffle(shuffle_buffer_size, shuffle_seed)
-  elif num_epochs != 1:
-    return dataset.repeat(num_epochs)
-  return dataset
-
-
-def make_tf_record_dataset(file_pattern,
-                           batch_size,
-                           parser_fn=None,
-                           num_epochs=None,
-                           shuffle=True,
-                           shuffle_buffer_size=None,
-                           shuffle_seed=None,
-                           prefetch_buffer_size=optimization.AUTOTUNE,
-                           num_parallel_reads=None,
-                           num_parallel_parser_calls=None,
-                           drop_final_batch=False):
-  """Reads and optionally parses TFRecord files into a dataset.
-
-  Provides common functionality such as batching, optional parsing, shuffling,
-  and performant defaults.
-
-  Args:
-    file_pattern: List of files or patterns of TFRecord file paths.
-      See `tf.gfile.Glob` for pattern rules.
-    batch_size: An int representing the number of records to combine
-      in a single batch.
-    parser_fn: (Optional.) A function accepting string input to parse
-      and process the record contents. This function must map records
-      to components of a fixed shape, so they may be batched. By
-      default, uses the record contents unmodified.
-    num_epochs: (Optional.) An int specifying the number of times this
-      dataset is repeated.  If None (the default), cycles through the
-      dataset forever.
-    shuffle: (Optional.) A bool that indicates whether the input
-      should be shuffled. Defaults to `True`.
-    shuffle_buffer_size: (Optional.) Buffer size to use for
-      shuffling. A large buffer size ensures better shuffling, but
-      increases memory usage and startup time.
-    shuffle_seed: (Optional.) Randomization seed to use for shuffling.
-    prefetch_buffer_size: (Optional.) An int specifying the number of
-      feature batches to prefetch for performance improvement.
-      Defaults to auto-tune. Set to 0 to disable prefetching.
-    num_parallel_reads: (Optional.) Number of threads used to read
-      records from files. By default or if set to a value >1, the
-      results will be interleaved.
-    num_parallel_parser_calls: (Optional.) Number of parallel
-      records to parse in parallel. Defaults to an automatic selection.
-    drop_final_batch: (Optional.) Whether the last batch should be
-      dropped in case its size is smaller than `batch_size`; the
-      default behavior is not to drop the smaller batch.
-
-  Returns:
-    A dataset, where each element matches the output of `parser_fn`
-    except it will have an additional leading `batch-size` dimension,
-    or a `batch_size`-length 1-D tensor of strings if `parser_fn` is
-    unspecified.
-  """
-  files = dataset_ops.Dataset.list_files(
-      file_pattern, shuffle=shuffle, seed=shuffle_seed)
-
-  if num_parallel_reads is None:
-    # Note: We considered auto-tuning this value, but there is a concern
-    # that this affects the mixing of records from different files, which
-    # could affect training convergence/accuracy, so we are defaulting to
-    # a constant for now.
-    num_parallel_reads = 24
-  dataset = core_readers.TFRecordDataset(
-      files, num_parallel_reads=num_parallel_reads)
-
-  if shuffle_buffer_size is None:
-    # TODO(josh11b): Auto-tune this value when not specified
-    shuffle_buffer_size = 10000
-  dataset = _maybe_shuffle_and_repeat(
-      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
-
-  # NOTE(mrry): We set `drop_final_batch=True` when `num_epochs is None` to
-  # improve the shape inference, because it makes the batch dimension static.
-  # It is safe to do this because in that case we are repeating the input
-  # indefinitely, and all batches will be full-sized.
-  drop_final_batch = drop_final_batch or num_epochs is None
-
-  if parser_fn is None:
-    dataset = dataset.batch(batch_size, drop_remainder=drop_final_batch)
-  else:
-    # TODO(josh11b): if num_parallel_parser_calls is None, use some function
-    # of num cores instead of map_and_batch's default behavior of one batch.
-    dataset = dataset.apply(batching.map_and_batch(
-        parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
-        drop_remainder=drop_final_batch))
-
-  if prefetch_buffer_size == 0:
-    return dataset
-  else:
-    return dataset.prefetch(buffer_size=prefetch_buffer_size)
-
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.make_csv_dataset(...)`.")
 def make_csv_dataset(
     file_pattern,
     batch_size,
@@ -387,7 +112,6 @@ def make_csv_dataset(
     prefetch_buffer_size: An int specifying the number of feature
       batches to prefetch for performance improvement. Recommended value is the
       number of batches consumed per training step. Defaults to auto-tune.
-
     num_parallel_reads: Number of threads used to read CSV records from files.
       If >1, the results will be interleaved.
     sloppy: If `True`, reading performance will be improved at
@@ -411,106 +135,18 @@ def make_csv_dataset(
   Raises:
     ValueError: If any of the arguments is malformed.
   """
-  # Create dataset of all matching filenames
-  filenames = _get_file_names(file_pattern, False)
-  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
-  if shuffle:
-    dataset = dataset.shuffle(len(filenames), shuffle_seed)
-
-  # Clean arguments; figure out column names and defaults
+  return readers.make_csv_dataset(
+      file_pattern, batch_size, column_names, column_defaults, label_name,
+      select_columns, field_delim, use_quote_delim, na_value, header,
+      num_epochs, shuffle, shuffle_buffer_size, shuffle_seed,
+      prefetch_buffer_size, num_parallel_reads, sloppy, num_rows_for_inference,
+      compression_type)
 
-  if column_names is None:
-    if not header:
-      raise ValueError("Cannot infer column names without a header line.")
-    # If column names are not provided, infer from the header lines
-    column_names = _infer_column_names(filenames, field_delim, use_quote_delim)
-  if len(column_names) != len(set(column_names)):
-    raise ValueError("Cannot have duplicate column names.")
 
-  if select_columns is not None:
-    select_columns = _get_sorted_col_indices(select_columns, column_names)
-
-  if column_defaults is not None:
-    column_defaults = [
-        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
-        for x in column_defaults
-    ]
-  else:
-    # If column defaults are not provided, infer from records at graph
-    # construction time
-    column_defaults = _infer_column_defaults(
-        filenames, len(column_names), field_delim, use_quote_delim, na_value,
-        header, num_rows_for_inference, select_columns)
-
-  if select_columns is not None and len(column_defaults) != len(select_columns):
-    raise ValueError(
-        "If specified, column_defaults and select_columns must have same "
-        "length."
-    )
-  if select_columns is not None and len(column_names) > len(select_columns):
-    # Pick the relevant subset of column names
-    column_names = [column_names[i] for i in select_columns]
-
-  if label_name is not None and label_name not in column_names:
-    raise ValueError("`label_name` provided must be one of the columns.")
-
-  def filename_to_dataset(filename):
-    return CsvDataset(
-        filename,
-        record_defaults=column_defaults,
-        field_delim=field_delim,
-        use_quote_delim=use_quote_delim,
-        na_value=na_value,
-        select_cols=select_columns,
-        header=header,
-        compression_type=compression_type,
-    )
-
-  def map_fn(*columns):
-    """Organizes columns into a features dictionary.
-
-    Args:
-      *columns: list of `Tensor`s corresponding to one csv record.
-    Returns:
-      An OrderedDict of feature names to values for that particular record. If
-      label_name is provided, extracts the label feature to be returned as the
-      second element of the tuple.
-    """
-    features = collections.OrderedDict(zip(column_names, columns))
-    if label_name is not None:
-      label = features.pop(label_name)
-      return features, label
-    return features
-
-  # Read files sequentially (if num_parallel_reads=1) or in parallel
-  dataset = dataset.apply(
-      interleave_ops.parallel_interleave(
-          filename_to_dataset, cycle_length=num_parallel_reads, sloppy=sloppy))
-
-  dataset = _maybe_shuffle_and_repeat(
-      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
-
-  # Apply batch before map for perf, because map has high overhead relative
-  # to the size of the computation in each map.
-  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
-  # improve the shape inference, because it makes the batch dimension static.
-  # It is safe to do this because in that case we are repeating the input
-  # indefinitely, and all batches will be full-sized.
-  dataset = dataset.batch(batch_size=batch_size,
-                          drop_remainder=num_epochs is None)
-  dataset = dataset_ops.MapDataset(
-      dataset, map_fn, use_inter_op_parallelism=False)
-  dataset = dataset.prefetch(prefetch_buffer_size)
-
-  return dataset
-
-
-_DEFAULT_READER_BUFFER_SIZE_BYTES = 4 * 1024 * 1024  # 4 MB
-
-
-class CsvDataset(dataset_ops.DatasetSource):
+class CsvDataset(readers.CsvDataset):
   """A Dataset comprising lines from one or more CSV files."""
 
+  @deprecation.deprecated(None, "Use `tf.data.experimental.CsvDataset(...)`.")
   def __init__(self,
                filenames,
                record_defaults,
@@ -521,140 +157,13 @@ class CsvDataset(dataset_ops.DatasetSource):
                use_quote_delim=True,
                na_value="",
                select_cols=None):
-    """Creates a `CsvDataset` by reading and decoding CSV files.
-
-    The elements of this dataset correspond to records from the file(s).
-    RFC 4180 format is expected for CSV files
-    (https://tools.ietf.org/html/rfc4180)
-    Note that we allow leading and trailing spaces with int or float field.
-
-
-    For example, suppose we have a file 'my_file0.csv' with four CSV columns of
-    different data types:
-    ```
-    abcdefg,4.28E10,5.55E6,12
-    hijklmn,-5.3E14,,2
-    ```
-
-    We can construct a CsvDataset from it as follows:
-    ```python
-    dataset = tf.contrib.data.CsvDataset(
-      "my_file*.csv",
-      [tf.float32,  # Required field, use dtype or empty tensor
-       tf.constant([0.0], dtype=tf.float32),  # Optional field, default to 0.0
-       tf.int32,  # Required field, use dtype or empty tensor
-       ],
-      select_cols=[1,2,3]  # Only parse last three columns
-    )
-    ```
-
-    The expected output of its iterations is:
-    ```python
-    next_element = dataset.make_one_shot_iterator().get_next()
-    with tf.Session() as sess:
-      while True:
-        try:
-          print(sess.run(next_element))
-        except tf.errors.OutOfRangeError:
-          break
-
-    >> (4.28e10, 5.55e6, 12)
-    >> (-5.3e14, 0.0, 2)
-    ```
-
-    Args:
-      filenames: A `tf.string` tensor containing one or more filenames.
-      record_defaults: A list of default values for the CSV fields. Each item in
-        the list is either a valid CSV `DType` (float32, float64, int32, int64,
-        string), or a `Tensor` object with one of the above types. One per
-        column of CSV data, with either a scalar `Tensor` default value for the
-        column if it is optional, or `DType` or empty `Tensor` if required. If
-        both this and `select_columns` are specified, these must have the same
-        lengths, and `column_defaults` is assumed to be sorted in order of
-        increasing column index.
-      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
-        `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no
-        compression.
-      buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes
-        to buffer while reading files. Defaults to 4MB.
-      header: (Optional.) A `tf.bool` scalar indicating whether the CSV file(s)
-        have header line(s) that should be skipped when parsing. Defaults to
-        `False`.
-      field_delim: (Optional.) A `tf.string` scalar containing the delimiter
-        character that separates fields in a record. Defaults to `","`.
-      use_quote_delim: (Optional.) A `tf.bool` scalar. If `False`, treats
-        double quotation marks as regular characters inside of string fields
-        (ignoring RFC 4180, Section 2, Bullet 5). Defaults to `True`.
-      na_value: (Optional.) A `tf.string` scalar indicating a value that will
-        be treated as NA/NaN.
-      select_cols: (Optional.) A sorted list of column indices to select from
-        the input data. If specified, only this subset of columns will be
-        parsed. Defaults to parsing all columns.
-    """
-    super(CsvDataset, self).__init__()
-    self._filenames = ops.convert_to_tensor(
-        filenames, dtype=dtypes.string, name="filenames")
-    self._compression_type = convert.optional_param_to_tensor(
-        "compression_type",
-        compression_type,
-        argument_default="",
-        argument_dtype=dtypes.string)
-    record_defaults = [
-        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
-        for x in record_defaults
-    ]
-    self._record_defaults = ops.convert_n_to_tensor(
-        record_defaults, name="record_defaults")
-    self._buffer_size = convert.optional_param_to_tensor(
-        "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
-    self._header = ops.convert_to_tensor(
-        header, dtype=dtypes.bool, name="header")
-    self._field_delim = ops.convert_to_tensor(
-        field_delim, dtype=dtypes.string, name="field_delim")
-    self._use_quote_delim = ops.convert_to_tensor(
-        use_quote_delim, dtype=dtypes.bool, name="use_quote_delim")
-    self._na_value = ops.convert_to_tensor(
-        na_value, dtype=dtypes.string, name="na_value")
-    self._select_cols = convert.optional_param_to_tensor(
-        "select_cols",
-        select_cols,
-        argument_default=[],
-        argument_dtype=dtypes.int64,
-    )
-    self._output_shapes = tuple(
-        tensor_shape.scalar() for _ in range(len(record_defaults)))
-    self._output_types = tuple(d.dtype for d in self._record_defaults)
-    self._output_classes = tuple(
-        ops.Tensor for _ in range(len(record_defaults)))
-
-  def _as_variant_tensor(self):
-    # Constructs graph node for the dataset op.
-    return gen_experimental_dataset_ops.experimental_csv_dataset(
-        filenames=self._filenames,
-        record_defaults=self._record_defaults,
-        buffer_size=self._buffer_size,
-        header=self._header,
-        output_shapes=self._output_shapes,
-        field_delim=self._field_delim,
-        use_quote_delim=self._use_quote_delim,
-        na_value=self._na_value,
-        select_cols=self._select_cols,
-        compression_type=self._compression_type,
-    )
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_classes(self):
-    return self._output_classes
+    super(CsvDataset, self).__init__(
+        filenames, record_defaults, compression_type, buffer_size, header,
+        field_delim, use_quote_delim, na_value, select_cols)
 
 
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.make_batched_features_dataset(...)`.")
 def make_batched_features_dataset(file_pattern,
                                   batch_size,
                                   features,
@@ -759,57 +268,15 @@ def make_batched_features_dataset(file_pattern,
   Raises:
     ValueError: If `label_key` is not one of the `features` keys.
   """
-  # Create dataset of all matching filenames
-  filenames = _get_file_names(file_pattern, False)
-  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
-  if shuffle:
-    dataset = dataset.shuffle(len(filenames), shuffle_seed)
-
-  # Read `Example` records from files as tensor objects.
-  if reader_args is None:
-    reader_args = []
+  return readers.make_batched_features_dataset(
+      file_pattern, batch_size, features, reader, label_key, reader_args,
+      num_epochs, shuffle, shuffle_buffer_size, shuffle_seed,
+      prefetch_buffer_size, reader_num_threads, parser_num_threads,
+      sloppy_ordering, drop_final_batch)
 
-  # Read files sequentially (if reader_num_threads=1) or in parallel
-  dataset = dataset.apply(
-      interleave_ops.parallel_interleave(
-          lambda filename: reader(filename, *reader_args),
-          cycle_length=reader_num_threads,
-          sloppy=sloppy_ordering))
 
-  # Extract values if the `Example` tensors are stored as key-value tuples.
-  if dataset.output_types == (dtypes.string, dtypes.string):
-    dataset = dataset_ops.MapDataset(
-        dataset, lambda _, v: v, use_inter_op_parallelism=False)
-
-  # Apply dataset repeat and shuffle transformations.
-  dataset = _maybe_shuffle_and_repeat(
-      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
-
-  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
-  # improve the shape inference, because it makes the batch dimension static.
-  # It is safe to do this because in that case we are repeating the input
-  # indefinitely, and all batches will be full-sized.
-  dataset = dataset.batch(
-      batch_size, drop_remainder=drop_final_batch or num_epochs is None)
-
-  # Parse `Example` tensors to a dictionary of `Feature` tensors.
-  dataset = dataset.apply(
-      parsing_ops.parse_example_dataset(
-          features, num_parallel_calls=parser_num_threads))
-
-  if label_key:
-    if label_key not in features:
-      raise ValueError(
-          "The `label_key` provided (%r) must be one of the `features` keys." %
-          label_key)
-    dataset = dataset.map(lambda x: (x, x.pop(label_key)))
-
-  dataset = dataset.prefetch(prefetch_buffer_size)
-  return dataset
-
-
-@deprecation.deprecated(None,
-                        "Use `tf.contrib.data.make_batched_features_dataset`")
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.make_batched_features_dataset(...)`")
 def read_batch_features(file_pattern,
                         batch_size,
                         features,
@@ -879,7 +346,7 @@ def read_batch_features(file_pattern,
   Returns:
     A dict from keys in features to `Tensor` or `SparseTensor` objects.
   """
-  dataset = make_batched_features_dataset(
+  dataset = readers.make_batched_features_dataset(
       file_pattern,
       batch_size,
       features,
@@ -893,96 +360,13 @@ def read_batch_features(file_pattern,
   return outputs
 
 
-def _get_file_names(file_pattern, shuffle):
-  """Parse list of file names from pattern, optionally shuffled.
-
-  Args:
-    file_pattern: File glob pattern, or list of glob patterns.
-    shuffle: Whether to shuffle the order of file names.
-
-  Returns:
-    List of file names matching `file_pattern`.
-
-  Raises:
-    ValueError: If `file_pattern` is empty, or pattern matches no files.
-  """
-  if isinstance(file_pattern, list):
-    if not file_pattern:
-      raise ValueError("File pattern is empty.")
-    file_names = []
-    for entry in file_pattern:
-      file_names.extend(gfile.Glob(entry))
-  else:
-    file_names = list(gfile.Glob(file_pattern))
-
-  if not file_names:
-    raise ValueError("No files match %s." % file_pattern)
-
-  # Sort files so it will be deterministic for unit tests.
-  if not shuffle:
-    file_names = sorted(file_names)
-  return file_names
-
-
-class SqlDataset(dataset_ops.DatasetSource):
+class SqlDataset(readers.SqlDataset):
   """A `Dataset` consisting of the results from a SQL query."""
 
+  @deprecation.deprecated(None, "Use `tf.data.experimental.SqlDataset(...)`.")
   def __init__(self, driver_name, data_source_name, query, output_types):
-    """Creates a `SqlDataset`.
-
-    `SqlDataset` allows a user to read data from the result set of a SQL query.
-    For example:
-
-    ```python
-    dataset = tf.contrib.data.SqlDataset("sqlite", "/foo/bar.sqlite3",
-                                         "SELECT name, age FROM people",
-                                         (tf.string, tf.int32))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-    # Prints the rows of the result set of the above query.
-    while True:
-      try:
-        print(sess.run(next_element))
-      except tf.errors.OutOfRangeError:
-        break
-    ```
-
-    Args:
-      driver_name: A 0-D `tf.string` tensor containing the database type.
-        Currently, the only supported value is 'sqlite'.
-      data_source_name: A 0-D `tf.string` tensor containing a connection string
-        to connect to the database.
-      query: A 0-D `tf.string` tensor containing the SQL query to execute.
-      output_types: A tuple of `tf.DType` objects representing the types of the
-        columns returned by `query`.
-    """
-    super(SqlDataset, self).__init__()
-    self._driver_name = ops.convert_to_tensor(
-        driver_name, dtype=dtypes.string, name="driver_name")
-    self._data_source_name = ops.convert_to_tensor(
-        data_source_name, dtype=dtypes.string, name="data_source_name")
-    self._query = ops.convert_to_tensor(
-        query, dtype=dtypes.string, name="query")
-    self._output_types = output_types
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.sql_dataset(self._driver_name,
-                                       self._data_source_name, self._query,
-                                       nest.flatten(self.output_types),
-                                       nest.flatten(self.output_shapes))
-
-  @property
-  def output_classes(self):
-    return nest.map_structure(lambda _: ops.Tensor, self._output_types)
-
-  @property
-  def output_shapes(self):
-    return nest.map_structure(lambda _: tensor_shape.TensorShape([]),
-                              self._output_types)
-
-  @property
-  def output_types(self):
-    return self._output_types
+    super(SqlDataset, self).__init__(
+        driver_name, data_source_name, query, output_types)
 
 
 class LMDBDataset(dataset_ops.DatasetSource):
diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py
index 75642f143e..29d77528d9 100644
--- a/tensorflow/contrib/data/python/ops/resampling.py
+++ b/tensorflow/contrib/data/python/ops/resampling.py
@@ -17,22 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import interleave_ops
-from tensorflow.contrib.data.python.ops import scan_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import logging_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.python.data.experimental.ops import resampling
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.rejection_resample(...)`.")
 def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
   """A transformation that resamples a dataset to achieve a target distribution.
 
@@ -52,243 +42,5 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist")
-    class_values_ds = dataset.map(class_func)
-
-    # Get initial distribution.
-    if initial_dist is not None:
-      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
-      acceptance_dist, prob_of_original = (
-          _calculate_acceptance_probs_with_mixing(initial_dist_t,
-                                                  target_dist_t))
-      initial_dist_ds = dataset_ops.Dataset.from_tensors(
-          initial_dist_t).repeat()
-      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
-          acceptance_dist).repeat()
-      prob_of_original_ds = dataset_ops.Dataset.from_tensors(
-          prob_of_original).repeat()
-    else:
-      initial_dist_ds = _estimate_initial_dist_ds(
-          target_dist_t, class_values_ds)
-      acceptance_and_original_prob_ds = initial_dist_ds.map(
-          lambda initial: _calculate_acceptance_probs_with_mixing(
-              initial, target_dist_t))
-      acceptance_dist_ds = acceptance_and_original_prob_ds.map(
-          lambda accept_prob, _: accept_prob)
-      prob_of_original_ds = acceptance_and_original_prob_ds.map(
-          lambda _, prob_original: prob_original)
-    filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds,
-                             class_values_ds, seed)
-    # Prefetch filtered dataset for speed.
-    filtered_ds = filtered_ds.prefetch(3)
-
-    prob_original_static = _get_prob_original_static(
-        initial_dist_t, target_dist_t) if initial_dist is not None else None
-    if prob_original_static == 1:
-      return dataset_ops.Dataset.zip((class_values_ds, dataset))
-    elif prob_original_static == 0:
-      return filtered_ds
-    else:
-      return interleave_ops.sample_from_datasets(
-          [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds],
-          weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]),
-          seed=seed)
-
-  return _apply_fn
-
-
-def _get_prob_original_static(initial_dist_t, target_dist_t):
-  """Returns the static probability of sampling from the original.
-
-  `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters
-  an Op that it isn't defined for. We have some custom logic to avoid this.
-
-  Args:
-    initial_dist_t: A tensor of the initial distribution.
-    target_dist_t: A tensor of the target distribution.
-
-  Returns:
-    The probability of sampling from the original distribution as a constant,
-    if it is a constant, or `None`.
-  """
-  init_static = tensor_util.constant_value(initial_dist_t)
-  target_static = tensor_util.constant_value(target_dist_t)
-
-  if init_static is None or target_static is None:
-    return None
-  else:
-    return np.min(target_static / init_static)
-
-
-def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds,
-               seed):
-  """Filters a dataset based on per-class acceptance probabilities.
-
-  Args:
-    dataset: The dataset to be filtered.
-    acceptance_dist_ds: A dataset of acceptance probabilities.
-    initial_dist_ds: A dataset of the initial probability distribution, given or
-        estimated.
-    class_values_ds: A dataset of the corresponding classes.
-    seed: (Optional.) Python integer seed for the resampler.
-
-  Returns:
-    A dataset of (class value, data) after filtering.
-  """
-  def maybe_warn_on_large_rejection(accept_dist, initial_dist):
-    proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist)
-    return control_flow_ops.cond(
-        math_ops.less(proportion_rejected, .5),
-        lambda: accept_dist,
-        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
-            accept_dist, [proportion_rejected, initial_dist, accept_dist],
-            message="Proportion of examples rejected by sampler is high: ",
-            summarize=100,
-            first_n=10))
-
-  acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
-                                                 initial_dist_ds))
-                        .map(maybe_warn_on_large_rejection))
-
-  def _gather_and_copy(class_val, acceptance_prob, data):
-    return class_val, array_ops.gather(acceptance_prob, class_val), data
-
-  current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
-      (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
-  filtered_ds = (
-      current_probabilities_and_class_and_data_ds
-      .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
-  return filtered_ds.map(lambda class_value, _, data: (class_value, data))
-
-
-def _estimate_initial_dist_ds(
-    target_dist_t, class_values_ds, dist_estimation_batch_size=32,
-    smoothing_constant=10):
-  num_classes = (target_dist_t.shape[0].value or
-                 array_ops.shape(target_dist_t)[0])
-  initial_examples_per_class_seen = array_ops.fill(
-      [num_classes], np.int64(smoothing_constant))
-
-  def update_estimate_and_tile(num_examples_per_class_seen, c):
-    updated_examples_per_class_seen, dist = _estimate_data_distribution(
-        c, num_examples_per_class_seen)
-    tiled_dist = array_ops.tile(
-        array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
-    return updated_examples_per_class_seen, tiled_dist
-
-  initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
-                     .apply(scan_ops.scan(initial_examples_per_class_seen,
-                                          update_estimate_and_tile))
-                     .apply(batching.unbatch()))
-
-  return initial_dist_ds
-
-
-def _get_target_to_initial_ratio(initial_probs, target_probs):
-  # Add tiny to initial_probs to avoid divide by zero.
-  denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny)
-  return target_probs / denom
-
-
-def _estimate_data_distribution(c, num_examples_per_class_seen):
-  """Estimate data distribution as labels are seen.
-
-  Args:
-    c: The class labels.  Type `int32`, shape `[batch_size]`.
-    num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
-      containing counts.
-
-  Returns:
-    num_examples_per_lass_seen: Updated counts.  Type `int64`, shape
-      `[num_classes]`.
-    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
-  """
-  num_classes = num_examples_per_class_seen.get_shape()[0].value
-  # Update the class-count based on what labels are seen in batch.
-  num_examples_per_class_seen = math_ops.add(
-      num_examples_per_class_seen, math_ops.reduce_sum(
-          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
-  init_prob_estimate = math_ops.truediv(
-      num_examples_per_class_seen,
-      math_ops.reduce_sum(num_examples_per_class_seen))
-  dist = math_ops.cast(init_prob_estimate, dtypes.float32)
-  return num_examples_per_class_seen, dist
-
-
-def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs):
-  """Calculates the acceptance probabilities and mixing ratio.
-
-  In this case, we assume that we can *either* sample from the original data
-  distribution with probability `m`, or sample from a reshaped distribution
-  that comes from rejection sampling on the original distribution. This
-  rejection sampling is done on a per-class basis, with `a_i` representing the
-  probability of accepting data from class `i`.
-
-  This method is based on solving the following analysis for the reshaped
-  distribution:
-
-  Let F be the probability of a rejection (on any example).
-  Let p_i be the proportion of examples in the data in class i (init_probs)
-  Let a_i is the rate the rejection sampler should *accept* class i
-  Let t_i is the target proportion in the minibatches for class i (target_probs)
-
-  ```
-  F = sum_i(p_i * (1-a_i))
-    = 1 - sum_i(p_i * a_i)     using sum_i(p_i) = 1
-  ```
-
-  An example with class `i` will be accepted if `k` rejections occur, then an
-  example with class `i` is seen by the rejector, and it is accepted. This can
-  be written as follows:
-
-  ```
-  t_i = sum_k=0^inf(F^k * p_i * a_i)
-      = p_i * a_j / (1 - F)    using geometric series identity, since 0 <= F < 1
-      = p_i * a_i / sum_j(p_j * a_j)        using F from above
-  ```
-
-  Note that the following constraints hold:
-  ```
-  0 <= p_i <= 1, sum_i(p_i) = 1
-  0 <= a_i <= 1
-  0 <= t_i <= 1, sum_i(t_i) = 1
-  ```
-
-  A solution for a_i in terms of the other variables is the following:
-    ```a_i = (t_i / p_i) / max_i[t_i / p_i]```
-
-  If we try to minimize the amount of data rejected, we get the following:
-
-  M_max = max_i [ t_i / p_i ]
-  M_min = min_i [ t_i / p_i ]
-
-  The desired probability of accepting data if it comes from class `i`:
-
-  a_i = (t_i/p_i - m) / (M_max - m)
-
-  The desired probability of pulling a data element from the original dataset,
-  rather than the filtered one:
-
-  m = M_min
-
-  Args:
-    initial_probs: A Tensor of the initial probability distribution, given or
-      estimated.
-    target_probs: A Tensor of the corresponding classes.
-
-  Returns:
-    (A 1D Tensor with the per-class acceptance probabilities, the desired
-    probability of pull from the original distribution.)
-  """
-  ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs)
-  max_ratio = math_ops.reduce_max(ratio_l)
-  min_ratio = math_ops.reduce_min(ratio_l)
-
-  # Target prob to sample from original distribution.
-  m = min_ratio
-
-  # TODO(joelshor): Simplify fraction, if possible.
-  a_i = (ratio_l - m) / (max_ratio - m)
-  return a_i, m
+  return resampling.rejection_resample(class_func, target_dist, initial_dist,
+                                       seed)
diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py
index c52582cd35..0ca9fddb23 100644
--- a/tensorflow/contrib/data/python/ops/scan_ops.py
+++ b/tensorflow/contrib/data/python/ops/scan_ops.py
@@ -17,137 +17,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import gen_dataset_ops
-
-
-class _ScanDataset(dataset_ops.UnaryDataset):
-  """A dataset that scans a function across its input."""
-
-  def __init__(self, input_dataset, initial_state, scan_func):
-    """See `scan()` for details."""
-    super(_ScanDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-    with ops.name_scope("initial_state"):
-      # Convert any `SparseTensorValue`s to `SparseTensor`s and all other
-      # values to tensors.
-      self._initial_state = nest.pack_sequence_as(initial_state, [
-          sparse_tensor.SparseTensor.from_value(t)
-          if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(
-              t, name="component_%d" % i)
-          for i, t in enumerate(nest.flatten(initial_state))
-      ])
-
-    # Compute initial values for the state classes, shapes and types based on
-    # the initial state. The shapes may be refined by running `tf_scan_func` one
-    # or more times below.
-    self._state_classes = sparse.get_classes(self._initial_state)
-    self._state_shapes = nest.pack_sequence_as(
-        self._initial_state,
-        [t.get_shape() for t in nest.flatten(self._initial_state)])
-    self._state_types = nest.pack_sequence_as(
-        self._initial_state,
-        [t.dtype for t in nest.flatten(self._initial_state)])
-
-    # Will be populated by calling `tf_scan_func`.
-    self._output_classes = None
-    self._output_shapes = None
-    self._output_types = None
-
-    # Iteratively rerun the scan function until reaching a fixed point on
-    # `self._state_shapes`.
-    need_to_rerun = True
-    while need_to_rerun:
-
-      wrapped_func = dataset_ops.StructuredFunctionWrapper(
-          scan_func, "tf.contrib.data.scan()",
-          input_classes=(self._state_classes, input_dataset.output_classes),
-          input_shapes=(self._state_shapes, input_dataset.output_shapes),
-          input_types=(self._state_types, input_dataset.output_types),
-          add_to_graph=False)
-      if not (
-          isinstance(wrapped_func.output_types, collections.Sequence) and
-          len(wrapped_func.output_types) == 2):
-        raise TypeError("The scan function must return a pair comprising the "
-                        "new state and the output value.")
-
-      new_state_classes, self._output_classes = wrapped_func.output_classes
-
-      # Extract and validate class information from the returned values.
-      for new_state_class, state_class in zip(
-          nest.flatten(new_state_classes),
-          nest.flatten(self._state_classes)):
-        if not issubclass(new_state_class, state_class):
-          raise TypeError(
-              "The element classes for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_classes, new_state_classes))
-
-      # Extract and validate type information from the returned values.
-      new_state_types, self._output_types = wrapped_func.output_types
-      for new_state_type, state_type in zip(
-          nest.flatten(new_state_types), nest.flatten(self._state_types)):
-        if new_state_type != state_type:
-          raise TypeError(
-              "The element types for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_types, new_state_types))
-
-      # Extract shape information from the returned values.
-      new_state_shapes, self._output_shapes = wrapped_func.output_shapes
-
-      flat_state_shapes = nest.flatten(self._state_shapes)
-      flat_new_state_shapes = nest.flatten(new_state_shapes)
-      weakened_state_shapes = [
-          original.most_specific_compatible_shape(new)
-          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
-      ]
-
-      need_to_rerun = False
-      for original_shape, weakened_shape in zip(flat_state_shapes,
-                                                weakened_state_shapes):
-        if original_shape.ndims is not None and (
-            weakened_shape.ndims is None or
-            original_shape.as_list() != weakened_shape.as_list()):
-          need_to_rerun = True
-          break
-
-      if need_to_rerun:
-        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
-                                                   weakened_state_shapes)
-
-    self._scan_func = wrapped_func.function
-    self._scan_func.add_to_graph(ops.get_default_graph())
-
-  def _as_variant_tensor(self):
-    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-    return gen_dataset_ops.scan_dataset(
-        input_t,
-        nest.flatten(sparse.serialize_sparse_tensors(self._initial_state)),
-        self._scan_func.captured_inputs,
-        f=self._scan_func,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
+from tensorflow.python.data.experimental.ops import scan_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.scan(...)`.")
 def scan(initial_state, scan_func):
   """A transformation that scans a function across an input dataset.
 
@@ -168,7 +42,4 @@ def scan(initial_state, scan_func):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return _ScanDataset(dataset, initial_state, scan_func)
-
-  return _apply_fn
+  return scan_ops.scan(initial_state, scan_func)
diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py
index 985d1d87d0..329b34fdfe 100644
--- a/tensorflow/contrib/data/python/ops/shuffle_ops.py
+++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py
@@ -17,54 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import random_seed
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_dataset_ops
-
-
-class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that fuses `shuffle` and `repeat`."""
-
-  def __init__(self, input_dataset, buffer_size, count=None, seed=None):
-    super(_ShuffleAndRepeatDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._buffer_size = ops.convert_to_tensor(
-        buffer_size, dtype=dtypes.int64, name="buffer_size")
-    if count is None:
-      self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
-    else:
-      self._count = ops.convert_to_tensor(
-          count, dtype=dtypes.int64, name="count")
-    self._seed, self._seed2 = random_seed.get_seed(seed)
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    input_resource = self._input_dataset._as_variant_tensor()
-    return gen_dataset_ops.shuffle_and_repeat_dataset(
-        input_resource,
-        buffer_size=self._buffer_size,
-        count=self._count,
-        seed=self._seed,
-        seed2=self._seed2,
-        **dataset_ops.flat_structure(self))
-    # pylint: enable=protected-access
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+from tensorflow.python.data.experimental.ops import shuffle_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.shuffle_and_repeat(...)`.")
 def shuffle_and_repeat(buffer_size, count=None, seed=None):
   """Shuffles and repeats a Dataset returning a new permutation for each epoch.
 
@@ -93,8 +51,4 @@ def shuffle_and_repeat(buffer_size, count=None, seed=None):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):  # pylint: disable=missing-docstring
-    return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed)
-
-  return _apply_fn
+  return shuffle_ops.shuffle_and_repeat(buffer_size, count, seed)
diff --git a/tensorflow/contrib/data/python/ops/threadpool.py b/tensorflow/contrib/data/python/ops/threadpool.py
index f73c3fd9cb..20cceb4647 100644
--- a/tensorflow/contrib/data/python/ops/threadpool.py
+++ b/tensorflow/contrib/data/python/ops/threadpool.py
@@ -17,88 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import threading
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import context
-from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
-from tensorflow.python.ops import resource_variable_ops
-
-_uid_counter = 0
-_uid_lock = threading.Lock()
-
-
-def _generate_shared_name(prefix):
-  with _uid_lock:
-    global _uid_counter
-    uid = _uid_counter
-    _uid_counter += 1
-  return "{}{}".format(prefix, uid)
-
-
-# TODO(b/73383364): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
-class PrivateThreadPool(object):
-  """A stateful resource that represents a private thread pool."""
-
-  def __init__(self, num_threads, display_name=None,
-               max_intra_op_parallelism=1):
-    """Creates a `PrivateThreadPool` with the given number of threads."""
-    if context.executing_eagerly():
-      shared_name = _generate_shared_name("privatethreadpool")
-      self._resource = ged_ops.experimental_thread_pool_handle(
-          num_threads=num_threads,
-          max_intra_op_parallelism=max_intra_op_parallelism,
-          display_name=display_name,
-          shared_name=shared_name)
-      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
-          handle=self._resource, handle_device=context.context().device_name)
-    else:
-      self._resource = ged_ops.experimental_thread_pool_handle(
-          num_threads=num_threads,
-          max_intra_op_parallelism=max_intra_op_parallelism,
-          display_name=display_name)
-
-
-class _ThreadPoolDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that acts as an identity, and sets a custom threadpool."""
-
-  def __init__(self, input_dataset, thread_pool):
-    super(_ThreadPoolDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._thread_pool = thread_pool
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_thread_pool_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._thread_pool._resource,  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-
-# TODO(b/73383364): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
-def override_threadpool(dataset, thread_pool):
-  """Returns a new dataset that uses the given thread pool for its operations.
-
-  Args:
-    dataset: A `tf.data.Dataset` object.
-    thread_pool: A `PrivateThreadPool` object.
-
-  Returns:
-    A dataset containing the same values as `dataset`, but which uses
-    `thread_pool` to compute any of its parallel operations (such as
-    `tf.data.Dataset.map`).
-  """
-  return _ThreadPoolDataset(dataset, thread_pool)
+# pylint: disable=unused-import
+from tensorflow.python.data.experimental.ops.threadpool import override_threadpool
+from tensorflow.python.data.experimental.ops.threadpool import PrivateThreadPool
diff --git a/tensorflow/contrib/data/python/ops/unique.py b/tensorflow/contrib/data/python/ops/unique.py
index ed363a7090..909d06c677 100644
--- a/tensorflow/contrib/data/python/ops/unique.py
+++ b/tensorflow/contrib/data/python/ops/unique.py
@@ -17,11 +17,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.data.experimental.ops import unique as experimental_unique
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.unique()`.")
 def unique():
   """Creates a `Dataset` from another `Dataset`, discarding duplicates.
 
@@ -39,39 +39,4 @@ def unique():
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _UniqueDataset(dataset)
-
-  return _apply_fn
-
-
-class _UniqueDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` contains the unique elements from its input."""
-
-  def __init__(self, input_dataset):
-    """See `unique()` for details."""
-    super(_UniqueDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    if input_dataset.output_types not in (dtypes.int32, dtypes.int64,
-                                          dtypes.string):
-      raise TypeError(
-          "`tf.contrib.data.unique()` only supports inputs with a single "
-          "`tf.int32`, `tf.int64`, or `tf.string` component.")
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_unique_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+  return experimental_unique.unique()
diff --git a/tensorflow/contrib/data/python/ops/writers.py b/tensorflow/contrib/data/python/ops/writers.py
index c455fdcba6..42fb69bf07 100644
--- a/tensorflow/contrib/data/python/ops/writers.py
+++ b/tensorflow/contrib/data/python/ops/writers.py
@@ -17,42 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import convert
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.data.experimental.ops import writers
+from tensorflow.python.util import deprecation
 
 
-class TFRecordWriter(object):
+class TFRecordWriter(writers.TFRecordWriter):
   """Writes data to a TFRecord file."""
 
+  @deprecation.deprecated(
+      None, "Use `tf.data.experimental.TFRecordWriter(...)`.")
   def __init__(self, filename, compression_type=None):
-    self._filename = ops.convert_to_tensor(
-        filename, dtypes.string, name="filename")
-    self._compression_type = convert.optional_param_to_tensor(
-        "compression_type",
-        compression_type,
-        argument_default="",
-        argument_dtype=dtypes.string)
-
-  def write(self, dataset):
-    """Returns a `tf.Operation` to write a dataset to a file.
-
-    Args:
-      dataset: a `tf.data.Dataset` whose elements are to be written to a file
-
-    Returns:
-      A `tf.Operation` that, when run, writes contents of `dataset` to a file.
-    """
-    if not isinstance(dataset, dataset_ops.Dataset):
-      raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
-    if (dataset.output_types != dtypes.string or
-        dataset.output_shapes != tensor_shape.scalar()):
-      raise TypeError(
-          "`dataset` must produce scalar `DT_STRING` tensors whereas it "
-          "produces shape {0} and types {1}".format(dataset.output_shapes,
-                                                    dataset.output_types))
-    return gen_dataset_ops.dataset_to_tf_record(
-        dataset._as_variant_tensor(), self._filename, self._compression_type)  # pylint: disable=protected-access
+    super(TFRecordWriter, self).__init__(filename, compression_type)
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
index 8d949943b7..d48aa9c89b 100644
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
+++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import warnings
 
-from tensorflow.contrib.data.python.ops import prefetching_ops
+from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.util import nest as data_nest
diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py
index 135095a979..3aed121233 100644
--- a/tensorflow/contrib/eager/python/datasets.py
+++ b/tensorflow/contrib/eager/python/datasets.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import prefetching_ops
+from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
@@ -54,7 +54,7 @@ class Iterator(iterator_ops.EagerIterator):
     """
     if isinstance(dataset, prefetching_ops._PrefetchToDeviceDataset):  # pylint: disable=protected-access
       raise TypeError(
-          "`tf.contrib.data.prefetch_to_device()` is not compatible with "
+          "`tf.data.experimental.prefetch_to_device()` is not compatible with "
           "`tf.contrib.eager.Iterator`. Use `for ... in dataset:` to iterate "
           "over the dataset instead.")
 
diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py
index a753d77580..6a508fc6ba 100644
--- a/tensorflow/contrib/eager/python/datasets_test.py
+++ b/tensorflow/contrib/eager/python/datasets_test.py
@@ -24,11 +24,11 @@ import time
 import numpy as np
 
 from tensorflow.contrib import lookup
-from tensorflow.contrib.data.python.ops import prefetching_ops
-from tensorflow.contrib.data.python.ops import threadpool
-from tensorflow.contrib.data.python.ops import unique
 from tensorflow.contrib.eager.python import datasets
 from tensorflow.python.data import Dataset
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.data.experimental.ops import threadpool
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py b/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py
index 34a9984b0e..d85188de03 100644
--- a/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py
+++ b/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py
@@ -169,11 +169,11 @@ class ImageNetInput(object):
 
     # Read the data from disk in parallel
     dataset = dataset.apply(
-        tf.contrib.data.parallel_interleave(
+        tf.data.experimental.parallel_interleave(
             fetch_dataset, cycle_length=self.num_parallel_calls, sloppy=True))
     if self.cache:
       dataset = dataset.cache().apply(
-          tf.contrib.data.shuffle_and_repeat(1024 * 16))
+          tf.data.experimental.shuffle_and_repeat(1024 * 16))
     else:
       dataset = dataset.shuffle(1024)
 
@@ -188,9 +188,11 @@ class ImageNetInput(object):
     # batch size. As long as this validation is done with consistent batch size,
     # exactly the same images will be used.
     dataset = dataset.apply(
-        tf.contrib.data.map_and_batch(
-            self.dataset_parser, batch_size=batch_size,
-            num_parallel_batches=self.num_cores, drop_remainder=True))
+        tf.data.experimental.map_and_batch(
+            self.dataset_parser,
+            batch_size=batch_size,
+            num_parallel_batches=self.num_cores,
+            drop_remainder=True))
 
     # Transpose for performance on TPU
     if self.transpose_input:
diff --git a/tensorflow/contrib/estimator/python/estimator/rnn_test.py b/tensorflow/contrib/estimator/python/estimator/rnn_test.py
index 1aebed348d..89506ee661 100644
--- a/tensorflow/contrib/estimator/python/estimator/rnn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/rnn_test.py
@@ -25,12 +25,12 @@ import tempfile
 import numpy as np
 import six
 
-from tensorflow.contrib.data.python.ops import readers
 from tensorflow.contrib.estimator.python.estimator import head as head_lib
 from tensorflow.contrib.estimator.python.estimator import rnn
 from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import parsing_utils
diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py
index 89b538d1ba..9e9345e875 100644
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py
@@ -23,8 +23,8 @@ import numpy as np
 import six
 
 from tensorflow.contrib import lookup
-from tensorflow.contrib.data.python.ops import counter
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD
index dcbef2881d..a217397c1a 100644
--- a/tensorflow/contrib/stateless/BUILD
+++ b/tensorflow/contrib/stateless/BUILD
@@ -9,19 +9,13 @@ exports_files(["LICENSE"])
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
 
-tf_gen_op_wrapper_py(
-    name = "stateless_random_ops",
-    out = "gen_stateless_random_ops.py",  # cmake chokes without this
-    deps = ["//tensorflow/core:stateless_random_ops_op_lib"],
-)
-
 py_library(
     name = "stateless",
     srcs = ["__init__.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":stateless_random_ops",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:stateless_random_ops_gen",
         "//tensorflow/python:util",
     ],
 )
diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py
index 0cca40f071..fe23fe0dd8 100644
--- a/tensorflow/contrib/stateless/__init__.py
+++ b/tensorflow/contrib/stateless/__init__.py
@@ -32,10 +32,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.framework import ops
+
 # pylint: disable=wildcard-import
-from tensorflow.contrib.stateless.gen_stateless_random_ops import *
+from tensorflow.python.ops.gen_stateless_random_ops import *
 
-from tensorflow.python.framework import ops
 from tensorflow.python.util.all_util import remove_undocumented
 
 ops.NotDifferentiable("StatelessMultinomial")
diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py
index d879170b68..c694e9c1bc 100644
--- a/tensorflow/contrib/tpu/python/tpu/datasets.py
+++ b/tensorflow/contrib/tpu/python/tpu/datasets.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import readers
diff --git a/tensorflow/contrib/tpu/tpu_estimator.md b/tensorflow/contrib/tpu/tpu_estimator.md
index 639e708169..b6514e19dc 100644
--- a/tensorflow/contrib/tpu/tpu_estimator.md
+++ b/tensorflow/contrib/tpu/tpu_estimator.md
@@ -87,7 +87,7 @@ handle training:
           label = tf.cast(features["label"], tf.int32)
           return image, label
 
-        dataset = tf.contrib.data.TFRecordDataset(
+        dataset = tf.data.TFRecordDataset(
             filename, buffer_size=FLAGS.dataset_reader_buffer_size)
         dataset = dataset.map(parser).cache().repeat().batch(batch_size)
         images, labels = dataset.make_one_shot_iterator().get_next()
diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD
index b565ebd073..00295f57f6 100644
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
@@ -295,7 +295,6 @@ py_test(
     tags = ["notsan"],
     deps = [
         ":training_py",
-        "//tensorflow/contrib/data/python/kernel_tests/serialization:dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:gradients",
@@ -305,6 +304,7 @@ py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
         "//tensorflow/python/data",
+        "//tensorflow/python/data/experimental/kernel_tests/serialization:dataset_serialization_test_base",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py b/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py
index d9b0511a98..c1657fec7b 100644
--- a/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py
+++ b/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.contrib.training.python.training import tensor_queue_dataset as tqd
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt
new file mode 100644
index 0000000000..d3c70190dd
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessMultinomial"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt
new file mode 100644
index 0000000000..e294325fb8
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessRandomNormal"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt
new file mode 100644
index 0000000000..95d414c54a
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessRandomUniform"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt
new file mode 100644
index 0000000000..c72bdda94a
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessTruncatedNormal"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/examples/get_started/regression/test.py b/tensorflow/examples/get_started/regression/test.py
index 0b1477ad96..bb4db6700b 100644
--- a/tensorflow/examples/get_started/regression/test.py
+++ b/tensorflow/examples/get_started/regression/test.py
@@ -29,7 +29,7 @@ import tensorflow.examples.get_started.regression.imports85 as imports85
 sys.modules["imports85"] = imports85
 
 # pylint: disable=g-bad-import-order,g-import-not-at-top
-import tensorflow.contrib.data as data
+import tensorflow.data as data
 
 import tensorflow.examples.get_started.regression.dnn_regression as dnn_regression
 import tensorflow.examples.get_started.regression.linear_regression as linear_regression
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9275ad767e..fe81254ef7 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1739,6 +1739,14 @@ tf_gen_op_wrapper_private_py(
     ],
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "stateless_random_ops_gen",
+    visibility = [
+        "//tensorflow/contrib/stateless:__pkg__",
+        "//tensorflow/python/data/experimental/ops:__pkg__",
+    ],
+)
+
 tf_gen_op_wrapper_private_py(
     name = "list_ops_gen",
 )
@@ -3302,9 +3310,11 @@ py_library(
             "training/checkpointable/**/*.py",
             # The following targets have their own build rules (same name as the
             # file):
+            "training/basic_session_run_hooks.py",
             "training/checkpoint_management.py",
             "training/saveable_object.py",
             "training/saver.py",
+            "training/session_run_hook.py",
             "training/training_util.py",
         ],
     ),
@@ -3312,6 +3322,7 @@ py_library(
     deps = [
         ":array_ops",
         ":array_ops_gen",
+        ":basic_session_run_hooks",
         ":checkpoint_management",
         ":checkpoint_ops_gen",
         ":client",
@@ -3336,6 +3347,7 @@ py_library(
         ":saver",
         ":sdca_ops",
         ":session",
+        ":session_run_hook",
         ":sparse_ops",
         ":sparse_tensor",
         ":state_ops",
@@ -3379,6 +3391,28 @@ py_library(
     ],
 )
 
+py_library(
+    name = "session_run_hook",
+    srcs = ["training/session_run_hook.py"],
+    srcs_version = "PY2AND3",
+    deps = [":util"],
+)
+
+py_library(
+    name = "basic_session_run_hooks",
+    srcs = ["training/basic_session_run_hooks.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":client",
+        ":framework",
+        ":platform",
+        ":protos_all_py",
+        ":session_run_hook",
+        ":training_util",
+        ":util",
+    ],
+)
+
 py_library(
     name = "saver",
     srcs = ["training/saver.py"],
diff --git a/tensorflow/python/data/BUILD b/tensorflow/python/data/BUILD
index 138141f4fc..e32eeecbb8 100644
--- a/tensorflow/python/data/BUILD
+++ b/tensorflow/python/data/BUILD
@@ -10,6 +10,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
         "//tensorflow/python/data/ops:multi_device_iterator_ops",
diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py
index f8b561205e..7536ba668a 100644
--- a/tensorflow/python/data/__init__.py
+++ b/tensorflow/python/data/__init__.py
@@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import
+from tensorflow.python.data import experimental
 from tensorflow.python.data.ops.dataset_ops import Dataset
 from tensorflow.python.data.ops.iterator_ops import Iterator
 from tensorflow.python.data.ops.readers import FixedLengthRecordDataset
diff --git a/tensorflow/python/data/experimental/BUILD b/tensorflow/python/data/experimental/BUILD
new file mode 100644
index 0000000000..84e761d376
--- /dev/null
+++ b/tensorflow/python/data/experimental/BUILD
@@ -0,0 +1,16 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "experimental",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
+    ],
+)
diff --git a/tensorflow/python/data/experimental/__init__.py b/tensorflow/python/data/experimental/__init__.py
new file mode 100644
index 0000000000..2ac159d38a
--- /dev/null
+++ b/tensorflow/python/data/experimental/__init__.py
@@ -0,0 +1,109 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental API for building input pipelines.
+
+This module contains experimental `Dataset` sources and transformations that can
+be used in conjunction with the `tf.data.Dataset` API. Note that the
+`tf.data.experimental` API is not subject to the same backwards compatibility
+guarantees as `tf.data`, but we will provide deprecation advice in advance of
+removing existing functionality.
+
+See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
+
+@@Counter
+@@CheckpointInputPipelineHook
+@@CsvDataset
+@@Optional
+@@RandomDataset
+@@Reducer
+@@SqlDataset
+@@TFRecordWriter
+
+@@bucket_by_sequence_length
+@@choose_from_datasets
+@@copy_to_device
+@@dense_to_sparse_batch
+@@enumerate_dataset
+@@get_next_as_optional
+@@get_single_element
+@@group_by_reducer
+@@group_by_window
+@@ignore_errors
+@@latency_stats
+@@make_batched_features_dataset
+@@make_csv_dataset
+@@make_saveable_from_iterator
+@@map_and_batch
+@@parallel_interleave
+@@parse_example_dataset
+@@prefetch_to_device
+@@rejection_resample
+@@sample_from_datasets
+@@scan
+@@set_stats_aggregator
+@@shuffle_and_repeat
+@@StatsAggregator
+@@unbatch
+@@unique
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=unused-import
+
+from tensorflow.python.data.experimental.ops.batching import dense_to_sparse_batch
+from tensorflow.python.data.experimental.ops.batching import map_and_batch
+from tensorflow.python.data.experimental.ops.batching import unbatch
+from tensorflow.python.data.experimental.ops.counter import Counter
+from tensorflow.python.data.experimental.ops.enumerate_ops import enumerate_dataset
+from tensorflow.python.data.experimental.ops.error_ops import ignore_errors
+from tensorflow.python.data.experimental.ops.get_single_element import get_single_element
+from tensorflow.python.data.experimental.ops.grouping import bucket_by_sequence_length
+from tensorflow.python.data.experimental.ops.grouping import group_by_reducer
+from tensorflow.python.data.experimental.ops.grouping import group_by_window
+from tensorflow.python.data.experimental.ops.grouping import Reducer
+from tensorflow.python.data.experimental.ops.interleave_ops import choose_from_datasets
+from tensorflow.python.data.experimental.ops.interleave_ops import parallel_interleave
+from tensorflow.python.data.experimental.ops.interleave_ops import sample_from_datasets
+from tensorflow.python.data.experimental.ops.iterator_ops import CheckpointInputPipelineHook
+from tensorflow.python.data.experimental.ops.iterator_ops import make_saveable_from_iterator
+
+# Optimization constant that can be used to enable auto-tuning.
+from tensorflow.python.data.experimental.ops.optimization import AUTOTUNE
+
+from tensorflow.python.data.experimental.ops.parsing_ops import parse_example_dataset
+from tensorflow.python.data.experimental.ops.prefetching_ops import copy_to_device
+from tensorflow.python.data.experimental.ops.prefetching_ops import prefetch_to_device
+from tensorflow.python.data.experimental.ops.random_ops import RandomDataset
+from tensorflow.python.data.experimental.ops.readers import CsvDataset
+from tensorflow.python.data.experimental.ops.readers import make_batched_features_dataset
+from tensorflow.python.data.experimental.ops.readers import make_csv_dataset
+from tensorflow.python.data.experimental.ops.readers import SqlDataset
+from tensorflow.python.data.experimental.ops.resampling import rejection_resample
+from tensorflow.python.data.experimental.ops.scan_ops import scan
+from tensorflow.python.data.experimental.ops.shuffle_ops import shuffle_and_repeat
+from tensorflow.python.data.experimental.ops.stats_ops import latency_stats
+from tensorflow.python.data.experimental.ops.stats_ops import set_stats_aggregator
+from tensorflow.python.data.experimental.ops.stats_ops import StatsAggregator
+from tensorflow.python.data.experimental.ops.unique import unique
+from tensorflow.python.data.experimental.ops.writers import TFRecordWriter
+from tensorflow.python.data.ops.iterator_ops import get_next_as_optional
+from tensorflow.python.data.ops.optional_ops import Optional
+# pylint: enable=unused-import
+
+from tensorflow.python.util.all_util import remove_undocumented
+remove_undocumented(__name__)
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
new file mode 100644
index 0000000000..a46c30ed2e
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -0,0 +1,569 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_test(
+    name = "batch_dataset_op_test",
+    size = "medium",
+    srcs = ["batch_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",  # (b/79552534)
+        "no_pip",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "bucketing_test",
+    size = "medium",
+    srcs = ["bucketing_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:grouping",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "csv_dataset_op_test",
+    size = "medium",
+    srcs = ["csv_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:error_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/eager:context",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "dataset_constructor_op_test",
+    size = "medium",
+    srcs = ["dataset_constructor_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "manual",
+        "nomac",  # b/62040583
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+py_test(
+    name = "directed_interleave_dataset_test",
+    size = "medium",
+    srcs = ["directed_interleave_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "get_single_element_test",
+    size = "small",
+    srcs = ["get_single_element_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:get_single_element",
+        "//tensorflow/python/data/experimental/ops:grouping",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "indexed_dataset_ops_test",
+    srcs = ["indexed_dataset_ops_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/experimental/ops:indexed_dataset_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "interleave_dataset_op_test",
+    size = "medium",
+    srcs = ["interleave_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "notap",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "iterator_ops_test",
+    size = "small",
+    srcs = ["iterator_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/estimator:estimator_py",
+    ],
+)
+
+py_test(
+    name = "map_dataset_op_test",
+    size = "medium",
+    srcs = ["map_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "noasan",  # times out
+        "optonly",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:error_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "filter_dataset_op_test",
+    size = "medium",
+    srcs = ["filter_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "map_defun_op_test",
+    size = "small",
+    srcs = ["map_defun_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:check_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:data_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:map_defun",
+        "//tensorflow/python/data/kernel_tests:test_base",
+    ],
+)
+
+py_test(
+    name = "parsing_ops_test",
+    size = "small",
+    srcs = ["parsing_ops_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:parsing_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "prefetching_ops_test",
+    size = "small",
+    srcs = ["prefetching_ops_test.py"],
+    additional_deps = [
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python/compat:compat",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    tags = ["no_windows_gpu"],
+)
+
+py_test(
+    name = "range_dataset_op_test",
+    size = "small",
+    srcs = ["range_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:counter",
+        "//tensorflow/python/data/experimental/ops:enumerate_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "reader_dataset_ops_test_base",
+    testonly = 1,
+    srcs = [
+        "reader_dataset_ops_test_base.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = [
+        "//tensorflow/python/data/experimental/kernel_tests:__pkg__",
+        "//tensorflow/python/data/experimental/kernel_tests/serialization:__pkg__",
+    ],
+    deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
+
+py_test(
+    name = "reader_dataset_ops_test",
+    size = "medium",
+    srcs = ["reader_dataset_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "resample_test",
+    size = "medium",
+    srcs = ["resample_test.py"],
+    shard_count = 2,
+    srcs_version = "PY2AND3",
+    tags = [
+        "noasan",
+        "optonly",
+    ],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:resampling",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "scan_dataset_op_test",
+    size = "small",
+    srcs = ["scan_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:scan_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/eager:context",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "shuffle_dataset_op_test",
+    size = "medium",
+    srcs = ["shuffle_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "optonly",
+    ],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:shuffle_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "sql_dataset_op_test_base",
+    srcs = ["sql_dataset_op_test_base.py"],
+    srcs_version = "PY2AND3",
+    visibility = [
+        "//tensorflow/python/data/experimental/kernel_tests:__pkg__",
+        "//tensorflow/python/data/experimental/kernel_tests/serialization:__pkg__",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "@org_sqlite//:python",
+    ],
+)
+
+py_test(
+    name = "sql_dataset_op_test",
+    size = "small",
+    srcs = ["sql_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":sql_dataset_op_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+    ],
+)
+
+py_test(
+    name = "stats_dataset_ops_test",
+    size = "medium",
+    srcs = ["stats_dataset_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        ":stats_dataset_test_base",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:stats_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "stats_dataset_test_base",
+    srcs = ["stats_dataset_test_base.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/kernel_tests:test_base",
+    ],
+)
+
+py_test(
+    name = "threadpool_dataset_ops_test",
+    size = "small",
+    srcs = ["threadpool_dataset_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python/data/experimental/ops:threadpool",
+        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "unique_dataset_op_test",
+    size = "small",
+    srcs = ["unique_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_test(
+    name = "writer_ops_test",
+    size = "small",
+    srcs = ["writer_ops_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:writers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
similarity index 67%
rename from tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
index fed7de5f2b..8703b2810e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
@@ -23,8 +23,8 @@ import time
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import batching
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -32,7 +32,6 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
@@ -43,7 +42,6 @@ from tensorflow.python.util import compat
 
 class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-
   def testDenseToSparseBatchDataset(self):
     components = np.random.randint(12, size=(100,)).astype(np.int32)
     iterator = (
@@ -302,128 +300,6 @@ class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(next_element)
 
-  def testBatchAndDropRemainder(self):
-    components = (np.arange(7),
-                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
-                  np.array(37.0) * np.arange(7))
-
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            batching.batch_and_drop_remainder(batch_size))
-        .make_initializable_iterator())
-
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for test_batch_size in [1, 3, 7, 10]:
-        sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size})
-        num_batches = 7 // test_batch_size
-        for i in range(num_batches):
-          result = sess.run(next_element)
-          for component, result_component in zip(components, result):
-            for j in range(test_batch_size):
-              self.assertAllEqual(component[(i * test_batch_size + j)],
-                                  result_component[j])
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
-
-  def testBatchAndDropRemainderSparse(self):
-
-    def _sparse(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0]], values=(i * [1]), dense_shape=[1])
-
-    iterator = dataset_ops.Dataset.range(12).map(_sparse).apply(
-        batching.batch_and_drop_remainder(5)).make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(2):
-        actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensorValue(
-            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
-            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
-            dense_shape=[5, 1])
-        self.assertTrue(sparse_tensor.is_sparse(actual))
-        self.assertSparseValuesEqual(actual, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testPaddedBatchAndDropRemainder(self):
-    els = []
-    for length in [3, 6, 9, 4, 12, 10, 2]:
-      els.append((np.array(length), np.arange(length) + 1,
-                  np.array(length * 2)))
-
-    dataset = dataset_ops.Dataset.from_tensors(els[0])
-    for el in els[1:]:
-      dataset = dataset.concatenate(dataset_ops.Dataset.from_tensors(el))
-
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = (
-        dataset.apply(
-            batching.padded_batch_and_drop_remainder(
-                batch_size, ([], [None], []))).make_initializable_iterator())
-
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for test_batch_size in [1, 3, 7, 10]:
-        sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size})
-        num_batches = 7 // test_batch_size
-        for i in range(num_batches):
-          result = sess.run(next_element)
-          for component_idx, result_component in enumerate(result):
-            for j in range(test_batch_size):
-              data_idx = i * test_batch_size + j
-              comp = result_component[j]
-              unpadded = comp[comp > 0]
-              if np.isscalar(comp):
-                # The boolean mask indexing above adds a dim back. Rm it.
-                unpadded = unpadded[0]
-              self.assertAllEqual(els[data_idx][component_idx], unpadded)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
-
-  def testPaddedBatchAndDropRemainderSparseError(self):
-
-    def _map_fn(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
-
-    with self.assertRaises(TypeError):
-      _ = dataset_ops.Dataset.range(10).map(_map_fn).apply(
-          batching.padded_batch_and_drop_remainder(5))
-
-  def testBatchAndDropRemainderShapeInference(self):
-    components = (array_ops.placeholder(dtypes.int32),
-                  (array_ops.placeholder(dtypes.int32, shape=[None]),
-                   array_ops.placeholder(dtypes.int32, shape=[20, 30])))
-
-    # Test with a statically known batch size.
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            batching.batch_and_drop_remainder(128)))
-
-    self.assertIs(None, dataset.output_shapes[0].ndims)
-    self.assertEqual([128], dataset.output_shapes[1][0].as_list())
-    self.assertEqual([128, 30], dataset.output_shapes[1][1].as_list())
-
-    # Test with a dynamic batch size: the static shape will be unknown, because
-    # `batch_size` is a placeholder.
-    batch_size = array_ops.placeholder(dtypes.int64)
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            batching.batch_and_drop_remainder(batch_size)))
-
-    self.assertIs(None, dataset.output_shapes[0].ndims)
-    self.assertEqual([None], dataset.output_shapes[1][0].as_list())
-    self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list())
-
   @parameterized.named_parameters(
       ("Default", None, None),
       ("SequentialCalls", 1, None),
@@ -720,197 +596,6 @@ class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
 
-class RestructuredDatasetTest(test_base.DatasetTestBase):
-
-  def test_assert_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
-    expected_shapes = (tensor_shape.TensorShape(2),
-                       tensor_shape.TensorShape((3, 4)))
-    self.assertEqual(expected_shapes, dataset.output_shapes)
-
-    result = dataset.apply(batching.assert_element_shape(expected_shapes))
-    self.assertEqual(expected_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((3, 10)))
-    with self.assertRaises(ValueError):
-      dataset.apply(batching.assert_element_shape(wrong_shapes))
-
-  def test_assert_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    expected_shapes = (tensor_shape.TensorShape(2),
-                       tensor_shape.TensorShape((3, 4)))
-    result = dataset.apply(batching.assert_element_shape(expected_shapes))
-    self.assertEqual(expected_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((3, 10)))
-    iterator = (
-        dataset.apply(batching.assert_element_shape(wrong_shapes))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-  def test_assert_partial_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
-    partial_expected_shape = (tensor_shape.TensorShape(None),       # Unknown shape
-                              tensor_shape.TensorShape((None, 4)))  # Partial shape
-    result = dataset.apply(
-        batching.assert_element_shape(partial_expected_shape))
-    # Partial shapes are merged with actual shapes:
-    actual_shapes = (tensor_shape.TensorShape(2),
-                     tensor_shape.TensorShape((3, 4)))
-    self.assertEqual(actual_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_partial_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((None, 10)))
-    with self.assertRaises(ValueError):
-      dataset.apply(batching.assert_element_shape(wrong_shapes))
-
-  def test_assert_partial_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    expected_shapes = (tensor_shape.TensorShape(2),
-                       tensor_shape.TensorShape((None, 4)))
-    result = dataset.apply(batching.assert_element_shape(expected_shapes))
-    self.assertEqual(expected_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_partial_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((None, 10)))
-    iterator = (
-        dataset.apply(batching.assert_element_shape(wrong_shapes))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-
 class UnbatchDatasetBenchmark(test.Benchmark):
 
   def benchmarkNativeUnbatch(self):
diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
rename to tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
index ae401f786c..153a03989b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
@@ -21,7 +21,7 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
index 5b3c512b64..4ee1779710 100644
--- a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
@@ -27,9 +27,9 @@ import zlib
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import error_ops
-from tensorflow.contrib.data.python.ops import readers
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.eager import context
diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
index 722e87e555..3fc7157bc5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_serialization_test_base.py b/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/dataset_serialization_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
index 595cecef4d..7f435b8239 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_serialization_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
@@ -22,7 +22,7 @@ import os
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py
rename to tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
index bc10c21472..796a692c56 100644
--- a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
@@ -84,7 +84,7 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
 
     # Use chi-squared test to assert that the observed distribution matches the
     # expected distribution. Based on the implementation in
-    # "tensorflow/python/kernel_tests/multinomial_op_test.py".
+    # "third_party/tensorflow/python/kernel_tests/multinomial_op_test.py".
     for probs in [[.85, .05, .1], rand_probs, [1.]]:
       probs = np.asarray(probs)
       classes = len(probs)
diff --git a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
index 6d01bf585c..c6ee88c676 100644
--- a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
@@ -21,8 +21,8 @@ import time
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
similarity index 76%
rename from tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py
rename to tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
index cc22ea1df7..8c07afbac5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
@@ -18,10 +18,8 @@ from __future__ import division
 from __future__ import print_function
 
 from absl.testing import parameterized
-import numpy as np
 
-from tensorflow.contrib.data.python.ops import get_single_element
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.ops import get_single_element
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
@@ -69,32 +67,6 @@ class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
         with self.assertRaisesRegexp(error, error_msg):
           sess.run(element, feed_dict={skip_t: skip, take_t: take})
 
-  @parameterized.named_parameters(
-      ("SumZero", 0),
-      ("SumOne", 1),
-      ("SumFive", 5),
-      ("SumTen", 10),
-  )
-  def testReduceDataset(self, stop):
-    def init_fn(_):
-      return np.int64(0)
-
-    def reduce_fn(state, value):
-      return state + value
-
-    def finalize_fn(state):
-      return state
-
-    sum_reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
-
-    stop_t = array_ops.placeholder(dtypes.int64, shape=[])
-    dataset = dataset_ops.Dataset.range(stop_t)
-    element = get_single_element.reduce_dataset(dataset, sum_reducer)
-
-    with self.cached_session() as sess:
-      value = sess.run(element, feed_dict={stop_t: stop})
-      self.assertEqual(stop * (stop - 1) / 2, value)
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
index d4d3d4adb2..c93a8353ce 100644
--- a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import unittest
 
-from tensorflow.contrib.data.python.ops import indexed_dataset_ops
+from tensorflow.python.data.experimental.ops import indexed_dataset_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
index 28bd670ab5..560902caad 100644
--- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
@@ -24,7 +24,7 @@ import time
 
 from six.moves import zip_longest
 
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
index 58a1d7c93b..94393d6d4b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import iterator_ops
+from tensorflow.python.data.experimental.ops import iterator_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.estimator import estimator
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
index 385c4ef6ea..2f0bd1456b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
@@ -24,11 +24,11 @@ import time
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import error_ops
-from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index 751e6d5b30..612ee332c4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import time
 
-from tensorflow.contrib.data.python.ops import map_defun
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import map_defun
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
similarity index 81%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
rename to tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index d7b5edcd9a..68f73bddb5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -12,9 +12,9 @@ py_test(
     srcs = ["assert_next_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
@@ -26,12 +26,12 @@ py_test(
     srcs = ["hoist_random_uniform_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
@@ -44,11 +44,11 @@ py_test(
     srcs = ["latency_all_edges_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base",
-        "//tensorflow/contrib/data/python/ops:optimization",
-        "//tensorflow/contrib/data/python/ops:stats_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/kernel_tests:stats_dataset_test_base",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/experimental/ops:stats_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -59,7 +59,6 @@ py_test(
     srcs = ["map_vectorization_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -68,6 +67,7 @@ py_test(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -81,12 +81,12 @@ py_test(
     srcs = ["map_and_filter_fusion_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
@@ -99,12 +99,12 @@ py_test(
     srcs = ["map_parallelization_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
@@ -120,11 +120,11 @@ py_test(
         "optonly",
     ],
     deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -137,11 +137,11 @@ py_test(
     srcs = ["noop_elimination_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -154,9 +154,9 @@ py_test(
     srcs = ["optimize_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
index fe1b5280ba..45b77b5c20 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
index b43efb5c7c..3cd9753665 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
similarity index 91%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
index e4f18222fd..45623876ae 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
@@ -17,9 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import stats_dataset_test_base
-from tensorflow.contrib.data.python.ops import optimization
-from tensorflow.contrib.data.python.ops import stats_ops
+from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_base
+from tensorflow.python.data.experimental.ops import optimization
+from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
index e9e3fc81e5..a439635716 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
index f7907eb890..334d8e3778 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index a5ea85f454..d47492753e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -22,8 +22,8 @@ import time
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
index 33c250ab2a..a9f2ce8c03 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
@@ -21,8 +21,8 @@ import time
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import math_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
index b9e60cfa4e..092e0ff62a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
index 04f499f8c5..eb661796c0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
index 66ccaceea5..13f924b656 100644
--- a/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
@@ -22,9 +22,9 @@ import copy
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import parsing_ops as contrib_parsing_ops
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.experimental.ops import parsing_ops as contrib_parsing_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
@@ -846,6 +846,5 @@ class ParseExampleTest(test_base.DatasetTestBase):
                       "allow_missing to be True."))
 
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
index 7a6a7a709a..7d7b842c17 100644
--- a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
@@ -19,9 +19,9 @@ from __future__ import print_function
 
 import threading
 
-from tensorflow.contrib.data.python.ops import prefetching_ops
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.compat import compat
+from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
index 2e901587f4..22412c3965 100644
--- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import counter
-from tensorflow.contrib.data.python.ops import enumerate_ops
+from tensorflow.python.data.experimental.ops import counter
+from tensorflow.python.data.experimental.ops import enumerate_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
index 66ed547b6d..a02f4bd14f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
@@ -23,8 +23,8 @@ import zlib
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.data.util import nest
diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
index f443b5501b..b6ab80d132 100644
--- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
@@ -22,9 +22,9 @@ import gzip
 import os
 import zlib
 
-from tensorflow.contrib.data.python.ops import readers
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import readers as core_readers
diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/python/data/experimental/kernel_tests/resample_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/resample_test.py
rename to tensorflow/python/data/experimental/kernel_tests/resample_test.py
index 32474bd411..775648c943 100644
--- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/resample_test.py
@@ -23,7 +23,7 @@ from absl.testing import parameterized
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
-from tensorflow.contrib.data.python.ops import resampling
+from tensorflow.python.data.experimental.ops import resampling
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
index bdf80eae4e..78ec80de23 100644
--- a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
@@ -21,7 +21,7 @@ import itertools
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import scan_ops
+from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/BUILD
rename to tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index aa89674c6e..20c02a5366 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -13,7 +13,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -24,6 +23,7 @@ py_library(
         "//tensorflow/python:training",
         "//tensorflow/python:util",
         "//tensorflow/python:variables",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
         "//tensorflow/python/data/ops:iterator_ops",
         "//third_party/py/numpy",
     ],
@@ -37,10 +37,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -81,9 +81,9 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:readers",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
     ],
 )
 
@@ -126,8 +126,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python/data/ops:readers",
     ],
 )
@@ -160,8 +160,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:grouping",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -174,8 +174,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:grouping",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -189,9 +189,9 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:error_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:error_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -222,9 +222,9 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -258,8 +258,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -288,10 +288,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -326,8 +326,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
     ],
 )
 
@@ -370,8 +370,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -384,8 +384,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:scan_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:scan_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -411,10 +411,10 @@ py_test(
     srcs_version = "PY2AND3",
     tags = ["no_pip"],
     deps = [
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -427,8 +427,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:shuffle_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:shuffle_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -441,10 +441,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -457,11 +457,11 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:sql_dataset_op_test_base",
-        "//tensorflow/contrib/data/python/ops:readers",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/kernel_tests:sql_dataset_op_test_base",
+        "//tensorflow/python/data/experimental/ops:readers",
     ],
 )
 
@@ -473,10 +473,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:stats_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:stats_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -490,8 +490,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python/data/ops:readers",
     ],
 )
@@ -505,8 +505,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python/data/ops:readers",
     ],
 )
@@ -519,8 +519,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -534,8 +534,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:unique",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:unique",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/batch_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/batch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/batch_dataset_serialization_test.py
index af87d8b608..d72a6df14c 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/batch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/batch_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/cache_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/cache_dataset_serialization_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/cache_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/cache_dataset_serialization_test.py
index 1b6059ccbc..2bcf77f5d8 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/cache_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/cache_dataset_serialization_test.py
@@ -21,7 +21,7 @@ import os
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/concatenate_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/concatenate_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/concatenate_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/concatenate_dataset_serialization_test.py
index 96f13d75a3..c075dff8cb 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/concatenate_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/concatenate_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/csv_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/csv_dataset_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/csv_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/csv_dataset_serialization_test.py
index 247f2046ea..d4983492e7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/csv_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/csv_dataset_serialization_test.py
@@ -20,8 +20,8 @@ from __future__ import print_function
 import gzip
 import os
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.platform import test
 
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_constructor_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_constructor_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/dataset_constructor_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/dataset_constructor_serialization_test.py
index 2139b5c33d..41a095fb1a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_constructor_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_constructor_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.platform import test
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
new file mode 100644
index 0000000000..7f435b8239
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
@@ -0,0 +1,692 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base class for testing serializable datasets."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.util import nest
+
+
+def remove_variants(get_next_op):
+  # TODO(b/72408568): Remove this once session.run can get
+  # variant tensors.
+  """Remove variants from a nest structure, so sess.run will execute."""
+
+  def _remove_variant(x):
+    if isinstance(x, ops.Tensor) and x.dtype == dtypes.variant:
+      return ()
+    else:
+      return x
+
+  return nest.map_structure(_remove_variant, get_next_op)
+
+
+class DatasetSerializationTestBase(test.TestCase):
+  """Base class for testing serializable datasets."""
+
+  def tearDown(self):
+    self._delete_ckpt()
+
+  # TODO(b/72657739): Remove sparse_tensor argument, which is to test the
+  # (deprecated) saveable `SparseTensorSliceDataset`, once the API
+  # `from_sparse_tensor_slices()`and related tests are deleted.
+  def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False):
+    """Runs the core tests.
+
+    Args:
+      ds_fn1: 0-argument function that returns a Dataset.
+      ds_fn2: 0-argument function that returns a Dataset different from
+        ds_fn1. If None, verify_restore_in_modified_graph test is not run.
+      num_outputs: Total number of outputs expected from this Dataset.
+      sparse_tensors: Whether dataset is built from SparseTensor(s).
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_unused_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_fully_used_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_exhausted_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_init_before_restore(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_multiple_breaks(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_reset_restored_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_restore_in_empty_graph(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    if ds_fn2:
+      self.verify_restore_in_modified_graph(
+          ds_fn1, ds_fn2, num_outputs, sparse_tensors=sparse_tensors)
+
+  def verify_unused_iterator(self,
+                             ds_fn,
+                             num_outputs,
+                             sparse_tensors=False,
+                             verify_exhausted=True):
+    """Verifies that saving and restoring an unused iterator works.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn, [0],
+        num_outputs,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+  def verify_fully_used_iterator(self, ds_fn, num_outputs,
+                                 sparse_tensors=False):
+    """Verifies that saving and restoring a fully used iterator works.
+
+    Note that this only checks saving and restoring an iterator from which
+    `num_outputs` items have been produced but does not check for an
+    exhausted iterator, i.e., one from which an OutOfRange error has been
+    returned.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+
+    Raises:
+      AssertionError if test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn, [num_outputs], num_outputs, sparse_tensors=sparse_tensors)
+
+  def verify_exhausted_iterator(self, ds_fn, num_outputs, sparse_tensors=False):
+    """Verifies that saving and restoring an exhausted iterator works.
+
+    An exhausted iterator is one which has returned an OutOfRange error.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.gen_outputs(
+        ds_fn, [],
+        num_outputs,
+        verify_exhausted=True,
+        sparse_tensors=sparse_tensors)
+    actual = self.gen_outputs(
+        ds_fn, [],
+        0,
+        ckpt_saved=True,
+        verify_exhausted=True,
+        sparse_tensors=sparse_tensors)
+    self.assertEqual(len(actual), 0)
+
+  def verify_init_before_restore(self,
+                                 ds_fn,
+                                 num_outputs,
+                                 sparse_tensors=False,
+                                 verify_exhausted=True):
+    """Verifies that restoring into an already initialized iterator works.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn,
+        self.gen_break_points(num_outputs),
+        num_outputs,
+        init_before_restore=True,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+  def verify_multiple_breaks(self,
+                             ds_fn,
+                             num_outputs,
+                             num_breaks=10,
+                             sparse_tensors=False,
+                             verify_exhausted=True):
+    """Attempts to save/restore at multiple break points.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      num_breaks: The number of break points. These are uniformly spread in
+        [0, num_outputs] both inclusive.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn,
+        self.gen_break_points(num_outputs, num_breaks),
+        num_outputs,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+  def verify_reset_restored_iterator(self,
+                                     ds_fn,
+                                     num_outputs,
+                                     break_point=None,
+                                     sparse_tensors=False,
+                                     verify_exhausted=True):
+    """Attempts to re-initialize a restored iterator.
+
+    This is useful when restoring a training checkpoint during validation.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    break_point = num_outputs // 2 if not break_point else break_point
+
+    # Collect ground truth containing all outputs.
+    expected = self.gen_outputs(
+        ds_fn, [],
+        num_outputs,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    # Skip some items and save checkpoint.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+
+    actual = []
+    # Restore from checkpoint and then run init_op.
+    with ops.Graph().as_default() as g:
+      saver = self._import_meta_graph()
+      init_op, get_next_op = self._get_iterator_ops_from_collection(
+          ds_fn, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._restore(saver, sess)
+        self._initialize(init_op, sess)
+        for _ in range(num_outputs):
+          actual.append(sess.run(get_next_op))
+        if verify_exhausted:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+    self.match(expected, actual)
+
+  def verify_restore_in_modified_graph(self,
+                                       ds_fn1,
+                                       ds_fn2,
+                                       num_outputs,
+                                       break_point=None,
+                                       sparse_tensors=False,
+                                       verify_exhausted=True):
+    """Attempts to restore an iterator in a modified graph.
+
+    Builds an input pipeline using ds_fn1, runs it for `break_point` steps
+    and saves a checkpoint. Then builds a new graph using ds_fn2, restores
+    the checkpoint from ds_fn1 and verifies that the restore is successful.
+
+    Args:
+      ds_fn1: See `run_core_tests`.
+      ds_fn2: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    break_point = num_outputs // 2 if not break_point else break_point
+
+    # Skip `break_point` items and store the remaining produced from ds_fn1
+    # in `expected`.
+    self.gen_outputs(
+        ds_fn1, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+    expected = self.gen_outputs(
+        ds_fn1, [],
+        num_outputs - break_point,
+        ckpt_saved=True,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    # Generate `break_point` items from ds_fn1 and save checkpoint.
+    self.gen_outputs(
+        ds_fn1, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+
+    actual = []
+    # Build graph for ds_fn2 but load checkpoint for ds_fn1.
+    with ops.Graph().as_default() as g:
+      _, get_next_op, saver = self._build_graph(
+          ds_fn2, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._restore(saver, sess)
+        for _ in range(num_outputs - break_point):
+          actual.append(sess.run(get_next_op))
+        if verify_exhausted:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+
+    self.match(expected, actual)
+
+  def verify_restore_in_empty_graph(self,
+                                    ds_fn,
+                                    num_outputs,
+                                    break_point=None,
+                                    sparse_tensors=False,
+                                    verify_exhausted=True):
+    """Attempts to restore an iterator in an empty graph.
+
+    Builds an input pipeline using ds_fn, runs it for `break_point` steps
+    and saves a checkpoint. Then builds a new empty graph, restores
+    the checkpoint from ds_fn and verifies that the restore is successful.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    break_point = num_outputs // 2 if not break_point else break_point
+
+    # Skip `break_point` items and store the remaining produced from ds_fn
+    # in `expected`.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+    expected = self.gen_outputs(
+        ds_fn, [],
+        num_outputs - break_point,
+        ckpt_saved=True,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    # Generate `break_point` items from ds_fn and save checkpoint.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+
+    actual = []
+    # Build an empty graph but load checkpoint for ds_fn.
+    with ops.Graph().as_default() as g:
+      get_next_op, saver = self._build_empty_graph(
+          ds_fn, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._restore(saver, sess)
+        for _ in range(num_outputs - break_point):
+          actual.append(sess.run(get_next_op))
+        if verify_exhausted:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+
+    self.match(expected, actual)
+
+  def verify_error_on_save(self,
+                           ds_fn,
+                           num_outputs,
+                           error,
+                           break_point=None,
+                           sparse_tensors=False):
+    """Attempts to save a non-saveable iterator.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      error: Declared error when trying to save iterator.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+
+    break_point = num_outputs // 2 if not break_point else break_point
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, saver = self._build_graph(
+          ds_fn, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._initialize(init_op, sess)
+        for _ in range(break_point):
+          sess.run(get_next_op)
+        with self.assertRaises(error):
+          self._save(sess, saver)
+
+  def verify_run_with_breaks(self,
+                             ds_fn,
+                             break_points,
+                             num_outputs,
+                             init_before_restore=False,
+                             sparse_tensors=False,
+                             verify_exhausted=True):
+    """Verifies that ds_fn() produces the same outputs with and without breaks.
+
+    1. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
+       *without* stopping at break points.
+    2. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
+       with stopping at break points.
+
+    Deep matches outputs from 1 and 2.
+
+    Args:
+      ds_fn: See `gen_outputs`.
+      break_points: See `gen_outputs`.
+      num_outputs: See `gen_outputs`.
+      init_before_restore: See `gen_outputs`.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    expected = self.gen_outputs(
+        ds_fn, [],
+        num_outputs,
+        init_before_restore=init_before_restore,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    actual = self.gen_outputs(
+        ds_fn,
+        break_points,
+        num_outputs,
+        init_before_restore=init_before_restore,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    self.match(expected, actual)
+
+  def gen_outputs(self,
+                  ds_fn,
+                  break_points,
+                  num_outputs,
+                  ckpt_saved=False,
+                  init_before_restore=False,
+                  sparse_tensors=False,
+                  verify_exhausted=True,
+                  save_checkpoint_at_end=True):
+    """Generates elements from input dataset while stopping at break points.
+
+    Produces `num_outputs` outputs and saves the state of the iterator in the
+    Saver checkpoint.
+
+    Args:
+      ds_fn: 0-argument function that returns the dataset.
+      break_points: A list of integers. For each `break_point` in
+        `break_points`, we produce outputs till `break_point` number of items
+        have been produced and then checkpoint the state. The current graph
+        and session are destroyed and a new graph and session are used to
+        produce outputs till next checkpoint or till `num_outputs` elements
+        have been produced. `break_point` must be <= `num_outputs`.
+      num_outputs: The total number of outputs to produce from the iterator.
+      ckpt_saved: Whether a checkpoint already exists. If False, we build the
+        graph from ds_fn.
+      init_before_restore: Whether init should be called before saver.restore.
+        This is just so that we can verify that restoring an already initialized
+        iterator works.
+      sparse_tensors:  Whether dataset is built from SparseTensor(s).
+      verify_exhausted: Whether to verify that the iterator has been exhausted
+        after producing `num_outputs` elements.
+      save_checkpoint_at_end: Whether to save a checkpoint after producing all
+        outputs. If False, checkpoints are saved each break point but not at the
+        end. Note that checkpoints overwrite each other so there is always only
+        a single checkpoint available. Defaults to True.
+
+    Returns:
+      A list of `num_outputs` items.
+    """
+    outputs = []
+
+    def get_ops():
+      if ckpt_saved:
+        saver = self._import_meta_graph()
+        init_op, get_next_op = self._get_iterator_ops_from_collection(
+            ds_fn, sparse_tensors=sparse_tensors)
+      else:
+        init_op, get_next_op, saver = self._build_graph(
+            ds_fn, sparse_tensors=sparse_tensors)
+      return init_op, get_next_op, saver
+
+    for i in range(len(break_points) + 1):
+      with ops.Graph().as_default() as g:
+        init_op, get_next_op, saver = get_ops()
+        get_next_op = remove_variants(get_next_op)
+        with self.session(graph=g) as sess:
+          if ckpt_saved:
+            if init_before_restore:
+              self._initialize(init_op, sess)
+            self._restore(saver, sess)
+          else:
+            self._initialize(init_op, sess)
+          start = break_points[i - 1] if i > 0 else 0
+          end = break_points[i] if i < len(break_points) else num_outputs
+          num_iters = end - start
+          for _ in range(num_iters):
+            outputs.append(sess.run(get_next_op))
+          if i == len(break_points) and verify_exhausted:
+            with self.assertRaises(errors.OutOfRangeError):
+              sess.run(get_next_op)
+          if save_checkpoint_at_end or i < len(break_points):
+            self._save(sess, saver)
+            ckpt_saved = True
+
+    return outputs
+
+  def match(self, expected, actual):
+    """Matches nested structures.
+
+    Recursively matches shape and values of `expected` and `actual`.
+    Handles scalars, numpy arrays and other python sequence containers
+    e.g. list, dict.
+
+    Args:
+      expected: Nested structure 1.
+      actual: Nested structure 2.
+
+    Raises:
+      AssertionError if matching fails.
+    """
+    if isinstance(expected, np.ndarray):
+      expected = expected.tolist()
+    if isinstance(actual, np.ndarray):
+      actual = actual.tolist()
+    self.assertEqual(type(expected), type(actual))
+
+    if nest.is_sequence(expected):
+      self.assertEqual(len(expected), len(actual))
+      if isinstance(expected, dict):
+        for key1, key2 in zip(sorted(expected), sorted(actual)):
+          self.assertEqual(key1, key2)
+          self.match(expected[key1], actual[key2])
+      else:
+        for item1, item2 in zip(expected, actual):
+          self.match(item1, item2)
+    else:
+      self.assertEqual(expected, actual)
+
+  def does_not_match(self, expected, actual):
+    with self.assertRaises(AssertionError):
+      self.match(expected, actual)
+
+  def gen_break_points(self, num_outputs, num_samples=10):
+    """Generates `num_samples` breaks points in [0, num_outputs]."""
+    return np.linspace(0, num_outputs, num_samples, dtype=int)
+
+  def _build_graph(self, ds_fn, sparse_tensors=False):
+    iterator = ds_fn().make_initializable_iterator()
+
+    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
+    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
+    init_op = iterator.initializer
+    if sparse_tensors:
+      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    else:
+      get_next = iterator.get_next()
+    self._add_iterator_ops_to_collection(init_op, get_next, ds_fn,
+                                         sparse_tensors)
+    saver = saver_lib.Saver(allow_empty=True)
+    return init_op, get_next, saver
+
+  def _build_empty_graph(self, ds_fn, sparse_tensors=False):
+    iterator = iterator_ops.Iterator.from_structure(
+        self._get_output_types(ds_fn),
+        output_shapes=self._get_output_shapes(ds_fn),
+        output_classes=self._get_output_classes(ds_fn))
+    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
+    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
+    if sparse_tensors:
+      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    else:
+      get_next = iterator.get_next()
+    saver = saver_lib.Saver(allow_empty=True)
+    return get_next, saver
+
+  def _add_iterator_ops_to_collection(self,
+                                      init_op,
+                                      get_next,
+                                      ds_fn,
+                                      sparse_tensors=False):
+    ops.add_to_collection("iterator_ops", init_op)
+    # `get_next` may be a tuple e.g. in TensorSliceDataset. Since Collections
+    # do not support tuples we flatten the tensors and restore the shape in
+    # `_get_iterator_ops_from_collection`.
+    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
+      ops.add_to_collection("iterator_ops", get_next.indices)
+      ops.add_to_collection("iterator_ops", get_next.values)
+      ops.add_to_collection("iterator_ops", get_next.dense_shape)
+      return
+
+    get_next_list = nest.flatten(get_next)
+    for i, output_class in enumerate(
+        nest.flatten(self._get_output_classes(ds_fn))):
+      if output_class is sparse_tensor.SparseTensor:
+        ops.add_to_collection("iterator_ops", get_next_list[i].indices)
+        ops.add_to_collection("iterator_ops", get_next_list[i].values)
+        ops.add_to_collection("iterator_ops", get_next_list[i].dense_shape)
+      else:
+        ops.add_to_collection("iterator_ops", get_next_list[i])
+
+  def _get_iterator_ops_from_collection(self, ds_fn, sparse_tensors=False):
+    all_ops = ops.get_collection("iterator_ops")
+    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
+      init_op, indices, values, dense_shape = all_ops
+      return init_op, sparse_tensor.SparseTensor(indices, values, dense_shape)
+    get_next_list = []
+    i = 1
+    for output_class in nest.flatten(self._get_output_classes(ds_fn)):
+      if output_class is sparse_tensor.SparseTensor:
+        indices, values, dense_shape = all_ops[i:i + 3]
+        i += 3
+        get_next_list.append(
+            sparse_tensor.SparseTensor(indices, values, dense_shape))
+      else:
+        get_next_list.append(all_ops[i])
+        i += 1
+    return all_ops[0], nest.pack_sequence_as(
+        self._get_output_types(ds_fn), get_next_list)
+
+  def _get_output_types(self, ds_fn):
+    with ops.Graph().as_default():
+      return ds_fn().output_types
+
+  def _get_output_shapes(self, ds_fn):
+    with ops.Graph().as_default():
+      return ds_fn().output_shapes
+
+  def _get_output_classes(self, ds_fn):
+    with ops.Graph().as_default():
+      return ds_fn().output_classes
+
+  def _ckpt_path(self):
+    return os.path.join(self.get_temp_dir(), "iterator")
+
+  def _latest_ckpt(self):
+    return checkpoint_management.latest_checkpoint(self.get_temp_dir())
+
+  def _save(self, sess, saver):
+    saver.save(sess, self._ckpt_path())
+
+  def _restore(self, saver, sess):
+    sess.run(lookup_ops.tables_initializer())
+    saver.restore(sess, self._latest_ckpt())
+
+  def _initialize(self, init_op, sess):
+    sess.run(variables.global_variables_initializer())
+    sess.run(lookup_ops.tables_initializer())
+    sess.run(init_op)
+
+  def _import_meta_graph(self):
+    meta_file_path = self._ckpt_path() + ".meta"
+    return saver_lib.import_meta_graph(meta_file_path)
+
+  def _delete_ckpt(self):
+    # Remove all checkpoint files.
+    prefix = self._ckpt_path()
+    pattern = prefix + "*"
+    files = gfile.Glob(pattern)
+    map(gfile.Remove, files)
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/filter_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/filter_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/filter_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/filter_dataset_serialization_test.py
index 7c170078a1..225f6cbac0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/filter_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/filter_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import math_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
index 34392d88d4..70caf3e0d5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/flat_map_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/flat_map_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/flat_map_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/flat_map_dataset_serialization_test.py
index 16051ffd3f..c30534a9e9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/flat_map_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/flat_map_dataset_serialization_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_reducer_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_reducer_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/group_by_reducer_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/group_by_reducer_serialization_test.py
index 571e0899bb..169c8845d0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_reducer_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_reducer_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_window_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_window_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/group_by_window_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/group_by_window_serialization_test.py
index f86af4084e..e5bc76288e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_window_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_window_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/ignore_errors_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/ignore_errors_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/ignore_errors_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/ignore_errors_serialization_test.py
index 65ae9923b8..df1f43129a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/ignore_errors_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/ignore_errors_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import error_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import error_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/interleave_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/interleave_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/interleave_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/interleave_dataset_serialization_test.py
index 243f6405a1..0c1d40ce39 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/interleave_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/interleave_dataset_serialization_test.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import sparse_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
index c9cd211328..166ffa99ca 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import math
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/map_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/map_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/map_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/map_dataset_serialization_test.py
index ab783e5cce..b93156a96c 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/map_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/map_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/optimize_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/optimize_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
index d5c03495e3..ed4a1da596 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/optimize_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/padded_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
index 9ac42a461a..6f72b24673 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import string_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
index 1f8a584df9..b8f38e8a28 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import sparse_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_map_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
index 3fb7605be1..a0bdd4fa59 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -65,7 +65,7 @@ class ParallelMapDatasetSerializationTest(
     for ds_fn in [self._build_ds, self._build_ds_with_prefetch]:
       self.run_core_tests(
           ds_fn,
-          lambda: ds_fn(multiplier=15.0),
+          lambda: ds_fn(multiplier=15.0),  # pylint: disable=cell-var-from-loop
           self._num_outputs)
 
   def testSaveStatefulFunction(self):
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/parse_example_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/parse_example_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
index d3fa84e74c..a0dd6960b0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/parse_example_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.platform import test
 
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/prefetch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/prefetch_dataset_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/prefetch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/prefetch_dataset_serialization_test.py
index c802402461..00d74c0025 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/prefetch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/prefetch_dataset_serialization_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/range_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/range_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
index 6341190847..ef99d01c73 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/range_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/sample_from_datasets_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sample_from_datasets_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/sample_from_datasets_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/sample_from_datasets_serialization_test.py
index fdb35ea624..c23c1ecdfb 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/sample_from_datasets_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sample_from_datasets_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/scan_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/scan_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/scan_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/scan_dataset_serialization_test.py
index af9ef48c0f..5f50160619 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/scan_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/scan_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import scan_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/sequence_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sequence_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/sequence_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/sequence_dataset_serialization_test.py
index 2afebca0f5..fe99a3d3d9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/sequence_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sequence_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/serialization_integration_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/serialization_integration_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
index 6aac50ecd9..88d5c896c9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/serialization_integration_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
index f199ec835e..f847ac19f9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import shuffle_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import shuffle_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
index a59fa94d66..a04f1ddafc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/sql_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
similarity index 88%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/sql_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
index 93b26ed58a..b179770ce3 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/sql_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
@@ -19,9 +19,9 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.kernel_tests import sql_dataset_op_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
index a10f85263a..ef7061b190 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import stats_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/textline_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/textline_dataset_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/textline_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/textline_dataset_serialization_test.py
index 2483787f44..c87a7443a7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/textline_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/textline_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/tf_record_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/tf_record_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/tf_record_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/tf_record_dataset_serialization_test.py
index 55a6257a27..f0dcc131d4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/tf_record_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/tf_record_dataset_serialization_test.py
@@ -21,8 +21,8 @@ import gzip
 import os
 import zlib
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/unbatch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/unbatch_dataset_serialization_test.py
similarity index 91%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/unbatch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/unbatch_dataset_serialization_test.py
index b2a5a8a20d..528598dfe4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/unbatch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/unbatch_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/unique_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/unique_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/unique_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/unique_dataset_serialization_test.py
index 22f15b8846..e2862af4d6 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/unique_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/unique_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/zip_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/zip_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/zip_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/zip_dataset_serialization_test.py
index 340a6ff72e..4ea6131c22 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/zip_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/zip_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
new file mode 100644
index 0000000000..88d5c896c9
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
@@ -0,0 +1,85 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Integration test for dataset serialization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+from tensorflow.python.training import saver as saver_lib
+
+
+class SerializationIntegrationTest(test.TestCase):
+
+  def _build_input_pipeline(self, name, num_outputs):
+    with ops.name_scope(name):
+      ds = dataset_ops.Dataset.range(num_outputs).shuffle(
+          10, reshuffle_each_iteration=False).prefetch(10)
+      iterator = ds.make_initializable_iterator()
+      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
+      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
+      return iterator.initializer, iterator.get_next()
+
+  def _build_graph(self, num_pipelines, num_outputs):
+    init_ops = []
+    get_next_ops = []
+    for i in range(num_pipelines):
+      name = "input_pipeline_%d" % i
+      init_op, get_next_op = self._build_input_pipeline(name, num_outputs)
+      init_ops.append(init_op)
+      get_next_ops.append(get_next_op)
+    saver = saver_lib.Saver()
+    return init_ops, get_next_ops, saver
+
+  def _ckpt_path(self):
+    return os.path.join(self.get_temp_dir(), "iterator")
+
+  def testConcurrentSaves(self):
+    num_pipelines = 100
+    num_outputs = 100
+    break_point = 10
+    all_outputs = [[] for _ in range(num_pipelines)]
+    with ops.Graph().as_default() as g:
+      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
+                                                        num_outputs)
+      with self.session(graph=g) as sess:
+        sess.run(init_ops)
+        for _ in range(break_point):
+          output = sess.run(get_next_ops)
+          for i in range(num_pipelines):
+            all_outputs[i].append(output[i])
+        saver.save(sess, self._ckpt_path())
+
+    with ops.Graph().as_default() as g:
+      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
+                                                        num_outputs)
+      with self.session(graph=g) as sess:
+        saver.restore(sess, self._ckpt_path())
+        for _ in range(num_outputs - break_point):
+          output = sess.run(get_next_ops)
+          for i in range(num_pipelines):
+            all_outputs[i].append(output[i])
+
+    for output in all_outputs:
+      self.assertSequenceEqual(sorted(output), range(num_outputs))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
index c97002a255..50895b5945 100644
--- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import shuffle_ops
+from tensorflow.python.data.experimental.ops import shuffle_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
index 52823d3fca..301f75488a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
index 319a2ea263..a135c357f0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
@@ -23,7 +23,7 @@ import os
 
 import sqlite3
 
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index be8ae5e955..6761fbd16b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests import stats_dataset_test_base
-from tensorflow.contrib.data.python.ops import stats_ops
+from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_base
+from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
similarity index 100%
rename from tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
diff --git a/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
index 08de3a9143..4432dcb05a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
@@ -22,8 +22,8 @@ import threading
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import threadpool
-from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.experimental.ops import threadpool
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
index 8856ce5afb..b5a0b20f3f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
index fca546a570..25a2e63ba1 100644
--- a/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.ops import writers
+from tensorflow.python.data.experimental.ops import writers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
diff --git a/tensorflow/python/data/experimental/ops/BUILD b/tensorflow/python/data/experimental/ops/BUILD
new file mode 100644
index 0000000000..915d399f1b
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/BUILD
@@ -0,0 +1,377 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
+)
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+
+py_library(
+    name = "counter",
+    srcs = ["counter.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":scan_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "get_single_element",
+    srcs = ["get_single_element.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "iterator_ops",
+    srcs = [
+        "iterator_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:basic_session_run_hooks",
+        "//tensorflow/python:checkpoint_management",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:saver",
+        "//tensorflow/python:session_run_hook",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:optional_ops",
+    ],
+)
+
+py_library(
+    name = "random_ops",
+    srcs = [
+        "random_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "readers",
+    srcs = [
+        "readers.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":batching",
+        ":interleave_ops",
+        ":optimization",
+        ":parsing_ops",
+        ":shuffle_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:convert",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "shuffle_ops",
+    srcs = [
+        "shuffle_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "batching",
+    srcs = ["batching.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":get_single_element",
+        ":grouping",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:tensor_util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:convert",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "enumerate_ops",
+    srcs = ["enumerate_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "error_ops",
+    srcs = ["error_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "grouping",
+    srcs = ["grouping.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:check_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:function",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "interleave_ops",
+    srcs = ["interleave_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":random_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:stateless_random_ops_gen",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "optimization",
+    srcs = ["optimization.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "parsing_ops",
+    srcs = ["parsing_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+py_library(
+    name = "map_defun",
+    srcs = ["map_defun.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:tensor_shape",
+    ],
+)
+
+py_library(
+    name = "resampling",
+    srcs = ["resampling.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":batching",
+        ":interleave_ops",
+        ":scan_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:logging_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "scan_ops",
+    srcs = ["scan_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:function",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "stats_ops",
+    srcs = ["stats_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "threadpool",
+    srcs = ["threadpool.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
+py_library(
+    name = "unique",
+    srcs = [
+        "unique.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "writers",
+    srcs = [
+        "writers.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "indexed_dataset_ops",
+    srcs = ["indexed_dataset_ops.py"],
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "prefetching_ops",
+    srcs = ["prefetching_ops.py"],
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "dataset_ops",
+    deps = [
+        ":batching",
+        ":counter",
+        ":enumerate_ops",
+        ":error_ops",
+        ":get_single_element",
+        ":grouping",
+        ":indexed_dataset_ops",
+        ":interleave_ops",
+        ":map_defun",
+        ":optimization",
+        ":prefetching_ops",
+        ":readers",
+        ":resampling",
+        ":scan_ops",
+        ":shuffle_ops",
+        ":stats_ops",
+        ":threadpool",
+        ":unique",
+        ":writers",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
new file mode 100644
index 0000000000..d42af9e7e9
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/batching.py
@@ -0,0 +1,669 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Batching dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import get_single_element
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import convert
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+def batch_window(dataset):
+  """Batches a window of tensors.
+
+  Args:
+    dataset: the input dataset.
+
+  Returns:
+    A `Tensor` representing the batch of the entire input dataset.
+  """
+  if isinstance(dataset.output_classes, tuple):
+    raise TypeError("Input dataset expected to have a single component")
+  if dataset.output_classes is ops.Tensor:
+    return _batch_dense_window(dataset)
+  elif dataset.output_classes is sparse_tensor.SparseTensor:
+    return _batch_sparse_window(dataset)
+  else:
+    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
+
+
+def _batch_dense_window(dataset):
+  """Batches a window of dense tensors."""
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def shape_init_fn(_):
+    return array_ops.shape(first_element)
+
+  def shape_reduce_fn(state, value):
+    check_ops.assert_equal(state, array_ops.shape(value))
+    return state
+
+  def finalize_fn(state):
+    return state
+
+  if dataset.output_shapes.is_fully_defined():
+    shape = dataset.output_shapes
+  else:
+    first_element = get_single_element.get_single_element(dataset.take(1))
+    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
+                                     finalize_fn)
+    shape = get_single_element.get_single_element(
+        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
+
+  def batch_init_fn(_):
+    batch_shape = array_ops.concat([[0], shape], 0)
+    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
+
+  def batch_reduce_fn(state, value):
+    return array_ops.concat([state, [value]], 0)
+
+  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
+
+
+def _batch_sparse_window(dataset):
+  """Batches a window of sparse tensors."""
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def shape_init_fn(_):
+    return first_element.dense_shape
+
+  def shape_reduce_fn(state, value):
+    check_ops.assert_equal(state, value.dense_shape)
+    return state
+
+  def finalize_fn(state):
+    return state
+
+  if dataset.output_shapes.is_fully_defined():
+    shape = dataset.output_shapes
+  else:
+    first_element = get_single_element.get_single_element(dataset.take(1))
+    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
+                                     finalize_fn)
+    shape = get_single_element.get_single_element(
+        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
+
+  def batch_init_fn(_):
+    indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0)
+    return sparse_tensor.SparseTensor(
+        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
+        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
+        dense_shape=array_ops.concat(
+            [np.array([0], dtype=np.int64),
+             math_ops.cast(shape, dtypes.int64)], 0))
+
+  def batch_reduce_fn(state, value):
+    return sparse_ops.sparse_concat(0, [state, value])
+
+  def reshape_fn(value):
+    return sparse_ops.sparse_reshape(
+        value,
+        array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0))
+
+  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.map(reshape_fn).apply(
+          grouping.group_by_reducer(key_fn, batch_reducer)))
+
+
+@tf_export("data.experimental.dense_to_sparse_batch")
+def dense_to_sparse_batch(batch_size, row_shape):
+  """A transformation that batches ragged elements into `tf.SparseTensor`s.
+
+  Like `Dataset.padded_batch()`, this transformation combines multiple
+  consecutive elements of the dataset, which might have different
+  shapes, into a single element. The resulting element has three
+  components (`indices`, `values`, and `dense_shape`), which
+  comprise a `tf.SparseTensor` that represents the same data. The
+  `row_shape` represents the dense shape of each row in the
+  resulting `tf.SparseTensor`, to which the effective batch size is
+  prepended. For example:
+
+  ```python
+  # NOTE: The following examples use `{ ... }` to represent the
+  # contents of a dataset.
+  a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] }
+
+  a.apply(tf.data.experimental.dense_to_sparse_batch(
+      batch_size=2, row_shape=[6])) ==
+  {
+      ([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],  # indices
+       ['a', 'b', 'c', 'a', 'b'],                 # values
+       [2, 6]),                                   # dense_shape
+      ([[0, 0], [0, 1], [0, 2], [0, 3]],
+       ['a', 'b', 'c', 'd'],
+       [1, 6])
+  }
+  ```
+
+  Args:
+    batch_size: A `tf.int64` scalar `tf.Tensor`, representing the
+      number of consecutive elements of this dataset to combine in a
+      single batch.
+    row_shape: A `tf.TensorShape` or `tf.int64` vector tensor-like
+      object representing the equivalent dense shape of a row in the
+      resulting `tf.SparseTensor`. Each element of this dataset must
+      have the same rank as `row_shape`, and must have size less
+      than or equal to `row_shape` in each dimension.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _DenseToSparseBatchDataset(dataset, batch_size, row_shape)
+
+  return _apply_fn
+
+
+def padded_batch_window(dataset, padded_shape, padding_value=None):
+  """Batches a window of tensors with padding.
+
+  Args:
+    dataset: the input dataset.
+    padded_shape: (Optional.) `tf.TensorShape` or `tf.int64` vector tensor-like
+      object representing the shape to which the input elements should be padded
+      prior to batching. Any unknown dimensions (e.g. `tf.Dimension(None)` in a
+      `tf.TensorShape` or `-1` in a tensor-like object) will be padded to the
+      maximum size of that dimension in each batch.
+    padding_value: (Optional.) A scalar-shaped `tf.Tensor`, representing the
+      padding value to use. Defaults are `0` for numeric types and the empty
+      string for string types. If `dataset` contains `tf.SparseTensor`, this
+      value is ignored.
+
+  Returns:
+    A `Tensor` representing the batch of the entire input dataset.
+
+  Raises:
+    ValueError: if invalid arguments are provided.
+  """
+  if not issubclass(dataset.output_classes,
+                    (ops.Tensor, sparse_tensor.SparseTensor)):
+    raise TypeError("Input dataset expected to have a single tensor component")
+  if issubclass(dataset.output_classes, (ops.Tensor)):
+    return _padded_batch_dense_window(dataset, padded_shape, padding_value)
+  elif issubclass(dataset.output_classes, (sparse_tensor.SparseTensor)):
+    if padding_value is not None:
+      raise ValueError("Padding value not allowed for sparse tensors")
+    return _padded_batch_sparse_window(dataset, padded_shape)
+  else:
+    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
+
+
+def _padded_batch_dense_window(dataset, padded_shape, padding_value=None):
+  """Batches a window of dense tensors with padding."""
+
+  padded_shape = math_ops.cast(
+      convert.partial_shape_to_tensor(padded_shape), dtypes.int32)
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def max_init_fn(_):
+    return padded_shape
+
+  def max_reduce_fn(state, value):
+    """Computes the maximum shape to pad to."""
+    condition = math_ops.reduce_all(
+        math_ops.logical_or(
+            math_ops.less_equal(array_ops.shape(value), padded_shape),
+            math_ops.equal(padded_shape, -1)))
+    assert_op = control_flow_ops.Assert(condition, [
+        "Actual shape greater than padded shape: ",
+        array_ops.shape(value), padded_shape
+    ])
+    with ops.control_dependencies([assert_op]):
+      return math_ops.maximum(state, array_ops.shape(value))
+
+  def finalize_fn(state):
+    return state
+
+  # Compute the padded shape.
+  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
+  padded_shape = get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
+
+  if padding_value is None:
+    if dataset.output_types == dtypes.string:
+      padding_value = ""
+    elif dataset.output_types == dtypes.bool:
+      padding_value = False
+    elif dataset.output_types == dtypes.variant:
+      raise TypeError("Unable to create padding for field of type 'variant'")
+    else:
+      padding_value = 0
+
+  def batch_init_fn(_):
+    batch_shape = array_ops.concat(
+        [np.array([0], dtype=np.int32), padded_shape], 0)
+    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
+
+  def batch_reduce_fn(state, value):
+    return array_ops.concat([state, [value]], 0)
+
+  def pad_fn(value):
+    shape = array_ops.shape(value)
+    left = array_ops.zeros_like(shape)
+    right = padded_shape - shape
+    return array_ops.pad(
+        value, array_ops.stack([left, right], 1), constant_values=padding_value)
+
+  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.map(pad_fn).apply(
+          grouping.group_by_reducer(key_fn, batch_reducer)))
+
+
+def _padded_batch_sparse_window(dataset, padded_shape):
+  """Batches a window of sparse tensors with padding."""
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def max_init_fn(_):
+    return convert.partial_shape_to_tensor(padded_shape)
+
+  def max_reduce_fn(state, value):
+    """Computes the maximum shape to pad to."""
+    condition = math_ops.reduce_all(
+        math_ops.logical_or(
+            math_ops.less_equal(value.dense_shape, padded_shape),
+            math_ops.equal(padded_shape, -1)))
+    assert_op = control_flow_ops.Assert(condition, [
+        "Actual shape greater than padded shape: ", value.dense_shape,
+        padded_shape
+    ])
+    with ops.control_dependencies([assert_op]):
+      return math_ops.maximum(state, value.dense_shape)
+
+  def finalize_fn(state):
+    return state
+
+  # Compute the padded shape.
+  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
+  padded_shape = get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
+
+  def batch_init_fn(_):
+    indices_shape = array_ops.concat([[0], [array_ops.size(padded_shape) + 1]],
+                                     0)
+    return sparse_tensor.SparseTensor(
+        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
+        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
+        dense_shape=array_ops.concat(
+            [np.array([0], dtype=np.int64), padded_shape], 0))
+
+  def batch_reduce_fn(state, value):
+    padded_value = sparse_tensor.SparseTensor(
+        indices=value.indices, values=value.values, dense_shape=padded_shape)
+    reshaped_value = sparse_ops.sparse_reshape(
+        padded_value,
+        array_ops.concat(
+            [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
+    return sparse_ops.sparse_concat(0, [state, reshaped_value])
+
+  reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
+
+
+class _UnbatchDataset(dataset_ops.UnaryDataset):
+  """A dataset that splits the elements of its input into multiple elements."""
+
+  def __init__(self, input_dataset):
+    """See `unbatch()` for more details."""
+    super(_UnbatchDataset, self).__init__(input_dataset)
+    flat_shapes = nest.flatten(input_dataset.output_shapes)
+    if any(s.ndims == 0 for s in flat_shapes):
+      raise ValueError("Cannot unbatch an input with scalar components.")
+    known_batch_dim = tensor_shape.Dimension(None)
+    for s in flat_shapes:
+      try:
+        known_batch_dim = known_batch_dim.merge_with(s[0])
+      except ValueError:
+        raise ValueError("Cannot unbatch an input whose components have "
+                         "different batch sizes.")
+    self._input_dataset = input_dataset
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.unbatch_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return nest.map_structure(lambda s: s[1:],
+                              self._input_dataset.output_shapes)
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+@tf_export("data.experimental.unbatch")
+def unbatch():
+  """Splits elements of a dataset into multiple elements on the batch dimension.
+
+  For example, if elements of the dataset are shaped `[B, a0, a1, ...]`,
+  where `B` may vary for each input element, then for each element in the
+  dataset, the unbatched dataset will contain `B` consecutive elements
+  of shape `[a0, a1, ...]`.
+
+  ```python
+  # NOTE: The following example uses `{ ... }` to represent the contents
+  # of a dataset.
+  a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] }
+
+  a.apply(tf.data.experimental.unbatch()) == {
+      'a', 'b', 'c', 'a', 'b', 'a', 'b', 'c', 'd'}
+  ```
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    if not sparse.any_sparse(dataset.output_classes):
+      return _UnbatchDataset(dataset)
+
+    # NOTE(mrry): We must ensure that any SparseTensors in `dataset`
+    # are normalized to the rank-1 dense representation, so that the
+    # sparse-oblivious unbatching logic will slice them
+    # appropriately. This leads to a somewhat inefficient re-encoding step
+    # for all SparseTensor components.
+    # TODO(mrry): Consider optimizing this in future
+    # if it turns out to be a bottleneck.
+    def normalize(arg, *rest):
+      if rest:
+        return sparse.serialize_many_sparse_tensors((arg,) + rest)
+      else:
+        return sparse.serialize_many_sparse_tensors(arg)
+
+    normalized_dataset = dataset.map(normalize)
+
+    # NOTE(mrry): Our `map()` has lost information about the sparseness
+    # of any SparseTensor components, so re-apply the structure of the
+    # original dataset.
+    restructured_dataset = _RestructuredDataset(
+        normalized_dataset,
+        dataset.output_types,
+        dataset.output_shapes,
+        dataset.output_classes,
+        allow_unsafe_cast=True)
+    return _UnbatchDataset(restructured_dataset)
+
+  return _apply_fn
+
+
+class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s."""
+
+  def __init__(self, input_dataset, batch_size, row_shape):
+    """See `Dataset.dense_to_sparse_batch()` for more details."""
+    super(_DenseToSparseBatchDataset, self).__init__(input_dataset)
+    if not isinstance(input_dataset.output_types, dtypes.DType):
+      raise TypeError("DenseToSparseDataset requires an input whose elements "
+                      "have a single component, whereas the input has %r." %
+                      input_dataset.output_types)
+    self._input_dataset = input_dataset
+    self._batch_size = batch_size
+    self._row_shape = row_shape
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.dense_to_sparse_batch_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._batch_size,
+        row_shape=convert.partial_shape_to_tensor(self._row_shape),
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return sparse_tensor.SparseTensor
+
+  @property
+  def output_shapes(self):
+    return tensor_shape.vector(None).concatenate(self._row_shape)
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _RestructuredDataset(dataset_ops.UnaryDataset):
+  """An internal helper for changing the structure and shape of a dataset."""
+
+  def __init__(self,
+               dataset,
+               output_types,
+               output_shapes=None,
+               output_classes=None,
+               allow_unsafe_cast=False):
+    """Creates a new dataset with the given output types and shapes.
+
+    The given `dataset` must have a structure that is convertible:
+    * `dataset.output_types` must be the same as `output_types` module nesting.
+    * Each shape in `dataset.output_shapes` must be compatible with each shape
+      in `output_shapes` (if given).
+
+    Note: This helper permits "unsafe casts" for shapes, equivalent to using
+    `tf.Tensor.set_shape()` where domain-specific knowledge is available.
+
+    Args:
+      dataset: A `Dataset` object.
+      output_types: A nested structure of `tf.DType` objects.
+      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
+        If omitted, the shapes will be inherited from `dataset`.
+      output_classes: (Optional.) A nested structure of class types.
+        If omitted, the class types will be inherited from `dataset`.
+      allow_unsafe_cast: (Optional.) If `True`, the caller may switch the
+        reported output types and shapes of the restructured dataset, e.g. to
+        switch a sparse tensor represented as `tf.variant` to its user-visible
+        type and shape.
+
+    Raises:
+      ValueError: If either `output_types` or `output_shapes` is not compatible
+        with the structure of `dataset`.
+    """
+    super(_RestructuredDataset, self).__init__(dataset)
+    self._input_dataset = dataset
+
+    if not allow_unsafe_cast:
+      # Validate that the types are compatible.
+      output_types = nest.map_structure(dtypes.as_dtype, output_types)
+      flat_original_types = nest.flatten(dataset.output_types)
+      flat_new_types = nest.flatten(output_types)
+      if flat_original_types != flat_new_types:
+        raise ValueError(
+            "Dataset with output types %r cannot be restructured to have "
+            "output types %r" % (dataset.output_types, output_types))
+
+    self._output_types = output_types
+
+    if output_shapes is None:
+      # Inherit shapes from the original `dataset`.
+      self._output_shapes = nest.pack_sequence_as(output_types,
+                                                  nest.flatten(
+                                                      dataset.output_shapes))
+    else:
+      if not allow_unsafe_cast:
+        # Validate that the shapes are compatible.
+        nest.assert_same_structure(output_types, output_shapes)
+        flat_original_shapes = nest.flatten(dataset.output_shapes)
+        flat_new_shapes = nest.flatten_up_to(output_types, output_shapes)
+
+        for original_shape, new_shape in zip(flat_original_shapes,
+                                             flat_new_shapes):
+          if not original_shape.is_compatible_with(new_shape):
+            raise ValueError(
+                "Dataset with output shapes %r cannot be restructured to have "
+                "incompatible output shapes %r" % (dataset.output_shapes,
+                                                   output_shapes))
+      self._output_shapes = nest.map_structure_up_to(
+          output_types, tensor_shape.as_shape, output_shapes)
+    if output_classes is None:
+      # Inherit class types from the original `dataset`.
+      self._output_classes = nest.pack_sequence_as(output_types,
+                                                   nest.flatten(
+                                                       dataset.output_classes))
+    else:
+      self._output_classes = output_classes
+
+  def _as_variant_tensor(self):
+    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+
+class _MapAndBatchDataset(dataset_ops.MapDataset):
+  """A `Dataset` that maps a function over a batch of elements."""
+
+  def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls,
+               drop_remainder):
+    """See `Dataset.map()` for details."""
+    super(_MapAndBatchDataset, self).__init__(input_dataset, map_func)
+    self._batch_size_t = ops.convert_to_tensor(
+        batch_size, dtype=dtypes.int64, name="batch_size")
+    self._num_parallel_calls_t = ops.convert_to_tensor(
+        num_parallel_calls, dtype=dtypes.int64, name="num_parallel_calls")
+    self._drop_remainder_t = ops.convert_to_tensor(
+        drop_remainder, dtype=dtypes.bool, name="drop_remainder")
+
+    self._batch_size = batch_size
+    self._drop_remainder = drop_remainder
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    input_resource = self._input_dataset._as_variant_tensor()
+    return gen_dataset_ops.map_and_batch_dataset_v2(
+        input_resource,
+        self._map_func.captured_inputs,
+        f=self._map_func,
+        batch_size=self._batch_size_t,
+        num_parallel_calls=self._num_parallel_calls_t,
+        drop_remainder=self._drop_remainder_t,
+        **dataset_ops.flat_structure(self))
+    # pylint: enable=protected-access
+
+  @property
+  def output_shapes(self):
+    dim = self._batch_size if self._drop_remainder else None
+    return nest.pack_sequence_as(self._output_shapes, [
+        tensor_shape.vector(dim).concatenate(s)
+        for s in nest.flatten(self._output_shapes)
+    ])
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+
+@tf_export("data.experimental.map_and_batch")
+def map_and_batch(map_func,
+                  batch_size,
+                  num_parallel_batches=None,
+                  drop_remainder=False,
+                  num_parallel_calls=None):
+  """Fused implementation of `map` and `batch`.
+
+  Maps `map_func` across `batch_size` consecutive elements of this dataset
+  and then combines them into a batch. Functionally, it is equivalent to `map`
+  followed by `batch`. However, by fusing the two transformations together, the
+  implementation can be more efficient. Surfacing this transformation in the API
+  is temporary. Once automatic input pipeline optimization is implemented,
+  the fusing of `map` and `batch` will happen automatically and this API will be
+  deprecated.
+
+  Args:
+    map_func: A function mapping a nested structure of tensors to another
+      nested structure of tensors.
+    batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
+      consecutive elements of this dataset to combine in a single batch.
+    num_parallel_batches: (Optional.) A `tf.int64` scalar `tf.Tensor`,
+      representing the number of batches to create in parallel. On one hand,
+      higher values can help mitigate the effect of stragglers. On the other
+      hand, higher values can increase contention if CPU is scarce.
+    drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
+      whether the last batch should be dropped in case its size is smaller than
+      desired; the default behavior is not to drop the smaller batch.
+    num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
+        representing the number of elements to process in parallel. If not
+        specified, `batch_size * num_parallel_batches` elements will be
+        processed in parallel.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: If both `num_parallel_batches` and `num_parallel_calls` are
+      specified.
+  """
+
+  if num_parallel_batches is None and num_parallel_calls is None:
+    num_parallel_calls = batch_size
+  elif num_parallel_batches is not None and num_parallel_calls is None:
+    num_parallel_calls = batch_size * num_parallel_batches
+  elif num_parallel_batches is not None and num_parallel_calls is not None:
+    raise ValueError("The `num_parallel_batches` and `num_parallel_calls` "
+                     "arguments are mutually exclusive.")
+
+  def _apply_fn(dataset):
+    return _MapAndBatchDataset(dataset, map_func, batch_size,
+                               num_parallel_calls, drop_remainder)
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/counter.py b/tensorflow/python/data/experimental/ops/counter.py
new file mode 100644
index 0000000000..42200eaef9
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/counter.py
@@ -0,0 +1,55 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Counter Dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import scan_ops
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.Counter")
+def Counter(start=0, step=1, dtype=dtypes.int64):
+  """Creates a `Dataset` that counts from `start` in steps of size `step`.
+
+  For example:
+
+  ```python
+  Dataset.count() == [0, 1, 2, ...)
+  Dataset.count(2) == [2, 3, ...)
+  Dataset.count(2, 5) == [2, 7, 12, ...)
+  Dataset.count(0, -1) == [0, -1, -2, ...)
+  Dataset.count(10, -1) == [10, 9, ...)
+  ```
+
+  Args:
+    start: (Optional.) The starting value for the counter. Defaults to 0.
+    step: (Optional.) The step size for the counter. Defaults to 1.
+    dtype: (Optional.) The data type for counter elements. Defaults to
+      `tf.int64`.
+
+  Returns:
+    A `Dataset` of scalar `dtype` elements.
+  """
+  with ops.name_scope("counter"):
+    start = ops.convert_to_tensor(start, dtype=dtype, name="start")
+    step = ops.convert_to_tensor(step, dtype=dtype, name="step")
+    return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
+        scan_ops.scan(start, lambda state, _: (state + step, state)))
diff --git a/tensorflow/python/data/experimental/ops/enumerate_ops.py b/tensorflow/python/data/experimental/ops/enumerate_ops.py
new file mode 100644
index 0000000000..a1af98f552
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/enumerate_ops.py
@@ -0,0 +1,60 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Enumerate dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.enumerate_dataset")
+def enumerate_dataset(start=0):
+  """A transformation that enumerate the elements of a dataset.
+
+  It is Similar to python's `enumerate`.
+  For example:
+
+  ```python
+  # NOTE: The following examples use `{ ... }` to represent the
+  # contents of a dataset.
+  a = { 1, 2, 3 }
+  b = { (7, 8), (9, 10) }
+
+  # The nested structure of the `datasets` argument determines the
+  # structure of elements in the resulting dataset.
+  a.apply(tf.data.experimental.enumerate(start=5)) == { (5, 1), (6, 2), (7, 3) }
+  b.apply(tf.data.experimental.enumerate()) == { (0, (7, 8)), (1, (9, 10)) }
+  ```
+
+  Args:
+    start: A `tf.int64` scalar `tf.Tensor`, representing the start
+      value for enumeration.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    max_value = np.iinfo(dtypes.int64.as_numpy_dtype).max
+    return dataset_ops.Dataset.zip((dataset_ops.Dataset.range(start, max_value),
+                                    dataset))
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/error_ops.py b/tensorflow/python/data/experimental/ops/error_ops.py
new file mode 100644
index 0000000000..82e274b70c
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/error_ops.py
@@ -0,0 +1,78 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Ignore_errors dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.ignore_errors")
+def ignore_errors():
+  """Creates a `Dataset` from another `Dataset` and silently ignores any errors.
+
+  Use this transformation to produce a dataset that contains the same elements
+  as the input, but silently drops any elements that caused an error. For
+  example:
+
+  ```python
+  dataset = tf.data.Dataset.from_tensor_slices([1., 2., 0., 4.])
+
+  # Computing `tf.check_numerics(1. / 0.)` will raise an InvalidArgumentError.
+  dataset = dataset.map(lambda x: tf.check_numerics(1. / x, "error"))
+
+  # Using `ignore_errors()` will drop the element that causes an error.
+  dataset =
+      dataset.apply(tf.data.experimental.ignore_errors())  # ==> {1., 0.5, 0.2}
+  ```
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _IgnoreErrorsDataset(dataset)
+
+  return _apply_fn
+
+
+class _IgnoreErrorsDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that silently ignores errors when computing its input."""
+
+  def __init__(self, input_dataset):
+    """See `Dataset.ignore_errors()` for details."""
+    super(_IgnoreErrorsDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+  def _as_variant_tensor(self):
+    return gen_experimental_dataset_ops.experimental_ignore_errors_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
diff --git a/tensorflow/python/data/experimental/ops/get_single_element.py b/tensorflow/python/data/experimental/ops/get_single_element.py
new file mode 100644
index 0000000000..132526166c
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/get_single_element.py
@@ -0,0 +1,72 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrappers for Datasets and Iterators."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.get_single_element")
+def get_single_element(dataset):
+  """Returns the single element in `dataset` as a nested structure of tensors.
+
+  This function enables you to use a `tf.data.Dataset` in a stateless
+  "tensor-in tensor-out" expression, without creating a `tf.data.Iterator`.
+  This can be useful when your preprocessing transformations are expressed
+  as a `Dataset`, and you want to use the transformation at serving time.
+  For example:
+
+  ```python
+  input_batch = tf.placeholder(tf.string, shape=[BATCH_SIZE])
+
+  def preprocessing_fn(input_str):
+    # ...
+    return image, label
+
+  dataset = (tf.data.Dataset.from_tensor_slices(input_batch)
+             .map(preprocessing_fn, num_parallel_calls=BATCH_SIZE)
+             .batch(BATCH_SIZE))
+
+  image_batch, label_batch = tf.data.experimental.get_single_element(dataset)
+  ```
+
+  Args:
+    dataset: A `tf.data.Dataset` object containing a single element.
+
+  Returns:
+    A nested structure of `tf.Tensor` objects, corresponding to the single
+    element of `dataset`.
+
+  Raises:
+    TypeError: if `dataset` is not a `tf.data.Dataset` object.
+    InvalidArgumentError (at runtime): if `dataset` does not contain exactly
+      one element.
+  """
+  if not isinstance(dataset, dataset_ops.Dataset):
+    raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
+
+  nested_ret = nest.pack_sequence_as(
+      dataset.output_types, gen_dataset_ops.dataset_to_single_element(
+          dataset._as_variant_tensor(),  # pylint: disable=protected-access
+          **dataset_ops.flat_structure(dataset)))
+  return sparse.deserialize_sparse_tensors(
+      nested_ret, dataset.output_types, dataset.output_shapes,
+      dataset.output_classes)
diff --git a/tensorflow/python/data/experimental/ops/grouping.py b/tensorflow/python/data/experimental/ops/grouping.py
new file mode 100644
index 0000000000..18ba583220
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/grouping.py
@@ -0,0 +1,551 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Grouping dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.group_by_reducer")
+def group_by_reducer(key_func, reducer):
+  """A transformation that groups elements and performs a reduction.
+
+  This transformation maps element of a dataset to a key using `key_func` and
+  groups the elements by key. The `reducer` is used to process each group; its
+  `init_func` is used to initialize state for each group when it is created, the
+  `reduce_func` is used to update the state every time an element is mapped to
+  the matching group, and the `finalize_func` is used to map the final state to
+  an output value.
+
+  Args:
+    key_func: A function mapping a nested structure of tensors
+      (having shapes and types defined by `self.output_shapes` and
+      `self.output_types`) to a scalar `tf.int64` tensor.
+    reducer: An instance of `Reducer`, which captures the reduction logic using
+      the `init_func`, `reduce_func`, and `finalize_func` functions.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    return _GroupByReducerDataset(dataset, key_func, reducer)
+
+  return _apply_fn
+
+
+@tf_export("data.experimental.group_by_window")
+def group_by_window(key_func,
+                    reduce_func,
+                    window_size=None,
+                    window_size_func=None):
+  """A transformation that groups windows of elements by key and reduces them.
+
+  This transformation maps each consecutive element in a dataset to a key
+  using `key_func` and groups the elements by key. It then applies
+  `reduce_func` to at most `window_size_func(key)` elements matching the same
+  key. All except the final window for each key will contain
+  `window_size_func(key)` elements; the final window may be smaller.
+
+  You may provide either a constant `window_size` or a window size determined by
+  the key through `window_size_func`.
+
+  Args:
+    key_func: A function mapping a nested structure of tensors
+      (having shapes and types defined by `self.output_shapes` and
+      `self.output_types`) to a scalar `tf.int64` tensor.
+    reduce_func: A function mapping a key and a dataset of up to `window_size`
+      consecutive elements matching that key to another dataset.
+    window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
+      consecutive elements matching the same key to combine in a single
+      batch, which will be passed to `reduce_func`. Mutually exclusive with
+      `window_size_func`.
+    window_size_func: A function mapping a key to a `tf.int64` scalar
+      `tf.Tensor`, representing the number of consecutive elements matching
+      the same key to combine in a single batch, which will be passed to
+      `reduce_func`. Mutually exclusive with `window_size`.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: if neither or both of {`window_size`, `window_size_func`} are
+      passed.
+  """
+  if (window_size is not None and window_size_func or
+      not (window_size is not None or window_size_func)):
+    raise ValueError("Must pass either window_size or window_size_func.")
+
+  if window_size is not None:
+
+    def constant_window_func(unused_key):
+      return ops.convert_to_tensor(window_size, dtype=dtypes.int64)
+
+    window_size_func = constant_window_func
+
+  assert window_size_func is not None
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    return _GroupByWindowDataset(dataset, key_func, reduce_func,
+                                 window_size_func)
+
+  return _apply_fn
+
+
+@tf_export("data.experimental.bucket_by_sequence_length")
+def bucket_by_sequence_length(element_length_func,
+                              bucket_boundaries,
+                              bucket_batch_sizes,
+                              padded_shapes=None,
+                              padding_values=None,
+                              pad_to_bucket_boundary=False,
+                              no_padding=False):
+  """A transformation that buckets elements in a `Dataset` by length.
+
+  Elements of the `Dataset` are grouped together by length and then are padded
+  and batched.
+
+  This is useful for sequence tasks in which the elements have variable length.
+  Grouping together elements that have similar lengths reduces the total
+  fraction of padding in a batch which increases training step efficiency.
+
+  Args:
+    element_length_func: function from element in `Dataset` to `tf.int32`,
+      determines the length of the element, which will determine the bucket it
+      goes into.
+    bucket_boundaries: `list<int>`, upper length boundaries of the buckets.
+    bucket_batch_sizes: `list<int>`, batch size per bucket. Length should be
+      `len(bucket_boundaries) + 1`.
+    padded_shapes: Nested structure of `tf.TensorShape` to pass to
+      `tf.data.Dataset.padded_batch`. If not provided, will use
+      `dataset.output_shapes`, which will result in variable length dimensions
+      being padded out to the maximum length in each batch.
+    padding_values: Values to pad with, passed to
+      `tf.data.Dataset.padded_batch`. Defaults to padding with 0.
+    pad_to_bucket_boundary: bool, if `False`, will pad dimensions with unknown
+      size to maximum length in batch. If `True`, will pad dimensions with
+      unknown size to bucket boundary minus 1 (i.e., the maximum length in each
+      bucket), and caller must ensure that the source `Dataset` does not contain
+      any elements with length longer than `max(bucket_boundaries)`.
+    no_padding: `bool`, indicates whether to pad the batch features (features
+      need to be either of type `tf.SparseTensor` or of same shape).
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: if `len(bucket_batch_sizes) != len(bucket_boundaries) + 1`.
+  """
+  with ops.name_scope("bucket_by_seq_length"):
+    if len(bucket_batch_sizes) != (len(bucket_boundaries) + 1):
+      raise ValueError(
+          "len(bucket_batch_sizes) must equal len(bucket_boundaries) + 1")
+
+    batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64)
+
+    def element_to_bucket_id(*args):
+      """Return int64 id of the length bucket for this element."""
+      seq_length = element_length_func(*args)
+
+      boundaries = list(bucket_boundaries)
+      buckets_min = [np.iinfo(np.int32).min] + boundaries
+      buckets_max = boundaries + [np.iinfo(np.int32).max]
+      conditions_c = math_ops.logical_and(
+          math_ops.less_equal(buckets_min, seq_length),
+          math_ops.less(seq_length, buckets_max))
+      bucket_id = math_ops.reduce_min(array_ops.where(conditions_c))
+
+      return bucket_id
+
+    def window_size_fn(bucket_id):
+      # The window size is set to the batch size for this bucket
+      window_size = batch_sizes[bucket_id]
+      return window_size
+
+    def make_padded_shapes(shapes, none_filler=None):
+      padded = []
+      for shape in nest.flatten(shapes):
+        shape = tensor_shape.TensorShape(shape)
+        shape = [
+            none_filler if d.value is None else d
+            for d in shape
+        ]
+        padded.append(shape)
+      return nest.pack_sequence_as(shapes, padded)
+
+    def batching_fn(bucket_id, grouped_dataset):
+      """Batch elements in dataset."""
+      batch_size = window_size_fn(bucket_id)
+      if no_padding:
+        return grouped_dataset.batch(batch_size)
+      none_filler = None
+      if pad_to_bucket_boundary:
+        err_msg = ("When pad_to_bucket_boundary=True, elements must have "
+                   "length < max(bucket_boundaries).")
+        check = check_ops.assert_less(
+            bucket_id,
+            constant_op.constant(len(bucket_batch_sizes) - 1,
+                                 dtype=dtypes.int64),
+            message=err_msg)
+        with ops.control_dependencies([check]):
+          boundaries = constant_op.constant(bucket_boundaries,
+                                            dtype=dtypes.int64)
+          bucket_boundary = boundaries[bucket_id]
+          none_filler = bucket_boundary - 1
+      shapes = make_padded_shapes(
+          padded_shapes or grouped_dataset.output_shapes,
+          none_filler=none_filler)
+      return grouped_dataset.padded_batch(batch_size, shapes, padding_values)
+
+    def _apply_fn(dataset):
+      return dataset.apply(
+          group_by_window(element_to_bucket_id, batching_fn,
+                          window_size_func=window_size_fn))
+
+    return _apply_fn
+
+
+def _map_x_dataset(map_func):
+  """A transformation that maps `map_func` across its input.
+
+  This transformation is similar to `tf.data.Dataset.map`, but in addition to
+  supporting dense and sparse tensor inputs, it also supports dataset inputs.
+
+  Args:
+    map_func: A function mapping a nested structure of tensors and/or datasets
+      (having shapes and types defined by `self.output_shapes` and
+     `self.output_types`) to another nested structure of tensors and/or
+     datasets.
+
+  Returns:
+    Dataset: A `Dataset`.
+  """
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    return _MapXDataset(dataset, map_func)
+
+  return _apply_fn
+
+
+class _GroupByReducerDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that groups its input and performs a reduction."""
+
+  def __init__(self, input_dataset, key_func, reducer):
+    """See `group_by_reducer()` for details."""
+    super(_GroupByReducerDataset, self).__init__(input_dataset)
+
+    self._input_dataset = input_dataset
+
+    self._make_key_func(key_func, input_dataset)
+    self._make_init_func(reducer.init_func)
+    self._make_reduce_func(reducer.reduce_func, input_dataset)
+    self._make_finalize_func(reducer.finalize_func)
+
+  def _make_key_func(self, key_func, input_dataset):
+    """Make wrapping Defun for key_func."""
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        key_func, "tf.data.experimental.group_by_reducer()", input_dataset)
+    if not (
+        wrapped_func.output_types == dtypes.int64 and
+        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
+      raise ValueError(
+          "`key_func` must return a single tf.int64 tensor. "
+          "Got type=%s and shape=%s"
+          % (wrapped_func.output_types, wrapped_func.output_shapes))
+    self._key_func = wrapped_func.function
+
+  def _make_init_func(self, init_func):
+    """Make wrapping Defun for init_func."""
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        init_func,
+        "tf.data.experimental.group_by_reducer()",
+        input_classes=ops.Tensor,
+        input_shapes=tensor_shape.scalar(),
+        input_types=dtypes.int64)
+    self._init_func = wrapped_func.function
+    self._state_classes = wrapped_func.output_classes
+    self._state_shapes = wrapped_func.output_shapes
+    self._state_types = wrapped_func.output_types
+
+  def _make_reduce_func(self, reduce_func, input_dataset):
+    """Make wrapping Defun for reduce_func."""
+
+    # Iteratively rerun the reduce function until reaching a fixed point on
+    # `self._state_shapes`.
+    need_to_rerun = True
+    while need_to_rerun:
+
+      wrapped_func = dataset_ops.StructuredFunctionWrapper(
+          reduce_func,
+          "tf.data.experimental.group_by_reducer()",
+          input_classes=(self._state_classes, input_dataset.output_classes),
+          input_shapes=(self._state_shapes, input_dataset.output_shapes),
+          input_types=(self._state_types, input_dataset.output_types),
+          add_to_graph=False)
+
+      # Extract and validate class information from the returned values.
+      for new_state_class, state_class in zip(
+          nest.flatten(wrapped_func.output_classes),
+          nest.flatten(self._state_classes)):
+        if not issubclass(new_state_class, state_class):
+          raise TypeError(
+              "The element classes for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_classes, wrapped_func.output_classes))
+
+      # Extract and validate type information from the returned values.
+      for new_state_type, state_type in zip(
+          nest.flatten(wrapped_func.output_types),
+          nest.flatten(self._state_types)):
+        if new_state_type != state_type:
+          raise TypeError(
+              "The element types for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_types, wrapped_func.output_types))
+
+      # Extract shape information from the returned values.
+      flat_state_shapes = nest.flatten(self._state_shapes)
+      flat_new_state_shapes = nest.flatten(wrapped_func.output_shapes)
+      weakened_state_shapes = [
+          original.most_specific_compatible_shape(new)
+          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
+      ]
+
+      need_to_rerun = False
+      for original_shape, weakened_shape in zip(flat_state_shapes,
+                                                weakened_state_shapes):
+        if original_shape.ndims is not None and (
+            weakened_shape.ndims is None or
+            original_shape.as_list() != weakened_shape.as_list()):
+          need_to_rerun = True
+          break
+
+      if need_to_rerun:
+        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
+                                                   weakened_state_shapes)
+
+    self._reduce_func = wrapped_func.function
+    self._reduce_func.add_to_graph(ops.get_default_graph())
+
+  def _make_finalize_func(self, finalize_func):
+    """Make wrapping Defun for finalize_func."""
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        finalize_func,
+        "tf.data.experimental.group_by_reducer()",
+        input_classes=self._state_classes,
+        input_shapes=self._state_shapes,
+        input_types=self._state_types)
+    self._finalize_func = wrapped_func.function
+    self._output_classes = wrapped_func.output_classes
+    self._output_shapes = wrapped_func.output_shapes
+    self._output_types = wrapped_func.output_types
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.group_by_reducer_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._key_func.captured_inputs,
+        self._init_func.captured_inputs,
+        self._reduce_func.captured_inputs,
+        self._finalize_func.captured_inputs,
+        key_func=self._key_func,
+        init_func=self._init_func,
+        reduce_func=self._reduce_func,
+        finalize_func=self._finalize_func,
+        **dataset_ops.flat_structure(self))
+
+
+class _GroupByWindowDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that groups its input and performs a windowed reduction."""
+
+  def __init__(self, input_dataset, key_func, reduce_func, window_size_func):
+    """See `group_by_window()` for details."""
+    super(_GroupByWindowDataset, self).__init__(input_dataset)
+
+    self._input_dataset = input_dataset
+
+    self._make_key_func(key_func, input_dataset)
+    self._make_reduce_func(reduce_func, input_dataset)
+    self._make_window_size_func(window_size_func)
+
+  def _make_window_size_func(self, window_size_func):
+    """Make wrapping Defun for window_size_func."""
+    def window_size_func_wrapper(key):
+      return ops.convert_to_tensor(window_size_func(key), dtype=dtypes.int64)
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        window_size_func_wrapper,
+        "tf.data.experimental.group_by_window()",
+        input_classes=ops.Tensor,
+        input_shapes=tensor_shape.scalar(),
+        input_types=dtypes.int64)
+    if not (
+        wrapped_func.output_types == dtypes.int64 and
+        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
+      raise ValueError(
+          "`window_size_func` must return a single tf.int64 scalar tensor.")
+    self._window_size_func = wrapped_func.function
+
+  def _make_key_func(self, key_func, input_dataset):
+    """Make wrapping Defun for key_func."""
+    def key_func_wrapper(*args):
+      return ops.convert_to_tensor(key_func(*args), dtype=dtypes.int64)
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        key_func_wrapper, "tf.data.experimental.group_by_window()",
+        input_dataset)
+    if not (
+        wrapped_func.output_types == dtypes.int64 and
+        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
+      raise ValueError(
+          "`key_func` must return a single tf.int64 scalar tensor.")
+    self._key_func = wrapped_func.function
+
+  def _make_reduce_func(self, reduce_func, input_dataset):
+    """Make wrapping Defun for reduce_func."""
+    nested_dataset = dataset_ops._NestedDatasetComponent(input_dataset)  # pylint: disable=protected-access
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        reduce_func,
+        "tf.data.experimental.reduce_by_window()",
+        input_classes=(ops.Tensor, nested_dataset),
+        input_shapes=(tensor_shape.scalar(), nested_dataset),
+        input_types=(dtypes.int64, nested_dataset),
+        experimental_nested_dataset_support=True)
+    if not isinstance(
+        wrapped_func.output_classes, dataset_ops._NestedDatasetComponent):  # pylint: disable=protected-access
+      raise TypeError("`reduce_func` must return a `Dataset` object.")
+    self._output_classes = wrapped_func.output_classes.output_classes
+    self._output_types = wrapped_func.output_types.output_types
+    self._output_shapes = wrapped_func.output_shapes.output_shapes
+    self._reduce_func = wrapped_func.function
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.group_by_window_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._key_func.captured_inputs,
+        self._reduce_func.captured_inputs,
+        self._window_size_func.captured_inputs,
+        key_func=self._key_func,
+        reduce_func=self._reduce_func,
+        window_size_func=self._window_size_func,
+        **dataset_ops.flat_structure(self))
+
+
+@tf_export("data.experimental.Reducer")
+class Reducer(object):
+  """A reducer is used for reducing a set of elements.
+
+  A reducer is represented as a tuple of the three functions:
+    1) initialization function: key => initial state
+    2) reduce function: (old state, input) => new state
+    3) finalization function: state => result
+  """
+
+  def __init__(self, init_func, reduce_func, finalize_func):
+    self._init_func = init_func
+    self._reduce_func = reduce_func
+    self._finalize_func = finalize_func
+
+  @property
+  def init_func(self):
+    return self._init_func
+
+  @property
+  def reduce_func(self):
+    return self._reduce_func
+
+  @property
+  def finalize_func(self):
+    return self._finalize_func
+
+
+class _MapXDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that maps a function over elements in its input."""
+
+  def __init__(self, input_dataset, map_func):
+    """See `map_x_dataset()` for details."""
+    super(_MapXDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        map_func,
+        "tf.data.experimental.map_x_dataset()",
+        input_dataset,
+        experimental_nested_dataset_support=True)
+    self._output_classes = wrapped_func.output_classes
+    self._output_shapes = wrapped_func.output_shapes
+    self._output_types = wrapped_func.output_types
+    self._map_func = wrapped_func.function
+
+  def _as_variant_tensor(self):
+    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+    return gen_dataset_ops.map_dataset(
+        input_t,
+        self._map_func.captured_inputs,
+        f=self._map_func,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
diff --git a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py b/tensorflow/python/data/experimental/ops/indexed_dataset_ops.py
similarity index 100%
rename from tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
rename to tensorflow/python/data/experimental/ops/indexed_dataset_ops.py
diff --git a/tensorflow/python/data/experimental/ops/interleave_ops.py b/tensorflow/python/data/experimental/ops/interleave_ops.py
new file mode 100644
index 0000000000..a3c094859e
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/interleave_ops.py
@@ -0,0 +1,262 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Non-deterministic dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import random_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.ops import gen_stateless_random_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.parallel_interleave")
+def parallel_interleave(map_func,
+                        cycle_length,
+                        block_length=1,
+                        sloppy=False,
+                        buffer_output_elements=None,
+                        prefetch_input_elements=None):
+  """A parallel version of the `Dataset.interleave()` transformation.
+
+  `parallel_interleave()` maps `map_func` across its input to produce nested
+  datasets, and outputs their elements interleaved. Unlike
+  `tf.data.Dataset.interleave`, it gets elements from `cycle_length` nested
+  datasets in parallel, which increases the throughput, especially in the
+  presence of stragglers. Furthermore, the `sloppy` argument can be used to
+  improve performance, by relaxing the requirement that the outputs are produced
+  in a deterministic order, and allowing the implementation to skip over nested
+  datasets whose elements are not readily available when requested.
+
+  Example usage:
+
+  ```python
+  # Preprocess 4 files concurrently.
+  filenames = tf.data.Dataset.list_files("/path/to/data/train*.tfrecords")
+  dataset = filenames.apply(
+      tf.data.experimental.parallel_interleave(
+          lambda filename: tf.data.TFRecordDataset(filename),
+          cycle_length=4))
+  ```
+
+  WARNING: If `sloppy` is `True`, the order of produced elements is not
+  deterministic.
+
+  Args:
+    map_func: A function mapping a nested structure of tensors to a `Dataset`.
+    cycle_length: The number of input `Dataset`s to interleave from in parallel.
+    block_length: The number of consecutive elements to pull from an input
+      `Dataset` before advancing to the next input `Dataset`.
+    sloppy: If false, elements are produced in deterministic order. Otherwise,
+      the implementation is allowed, for the sake of expediency, to produce
+      elements in a non-deterministic order.
+    buffer_output_elements: The number of elements each iterator being
+      interleaved should buffer (similar to the `.prefetch()` transformation for
+      each interleaved iterator).
+    prefetch_input_elements: The number of input elements to transform to
+      iterators before they are needed for interleaving.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    return readers.ParallelInterleaveDataset(
+        dataset, map_func, cycle_length, block_length, sloppy,
+        buffer_output_elements, prefetch_input_elements)
+
+  return _apply_fn
+
+
+class _DirectedInterleaveDataset(dataset_ops.Dataset):
+  """A substitute for `Dataset.interleave()` on a fixed list of datasets."""
+
+  def __init__(self, selector_input, data_inputs):
+    self._selector_input = selector_input
+    self._data_inputs = list(data_inputs)
+
+    for data_input in data_inputs[1:]:
+      if (data_input.output_types != data_inputs[0].output_types or
+          data_input.output_classes != data_inputs[0].output_classes):
+        raise TypeError("All datasets must have the same type and class.")
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    return (
+        gen_experimental_dataset_ops.experimental_directed_interleave_dataset(
+            self._selector_input._as_variant_tensor(), [
+                data_input._as_variant_tensor()
+                for data_input in self._data_inputs
+            ], **dataset_ops.flat_structure(self)))
+    # pylint: enable=protected-access
+
+  def _inputs(self):
+    return [self._selector_input] + self._data_inputs
+
+  @property
+  def output_classes(self):
+    return self._data_inputs[0].output_classes
+
+  @property
+  def output_shapes(self):
+    ret = self._data_inputs[0].output_shapes
+    for data_input in self._data_inputs[1:]:
+      ret = nest.pack_sequence_as(ret, [
+          ts1.most_specific_compatible_shape(ts2) for (ts1, ts2) in zip(
+              nest.flatten(ret), nest.flatten(data_input.output_shapes))
+      ])
+    return ret
+
+  @property
+  def output_types(self):
+    return self._data_inputs[0].output_types
+
+
+@tf_export("data.experimental.sample_from_datasets")
+def sample_from_datasets(datasets, weights=None, seed=None):
+  """Samples elements at random from the datasets in `datasets`.
+
+  Args:
+    datasets: A list of `tf.data.Dataset` objects with compatible structure.
+    weights: (Optional.) A list of `len(datasets)` floating-point values where
+      `weights[i]` represents the probability with which an element should be
+      sampled from `datasets[i]`, or a `tf.data.Dataset` object where each
+      element is such a list. Defaults to a uniform distribution across
+      `datasets`.
+    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      random seed that will be used to create the distribution. See
+      `tf.set_random_seed` for behavior.
+
+  Returns:
+    A dataset that interleaves elements from `datasets` at random, according to
+    `weights` if provided, otherwise with uniform probability.
+
+  Raises:
+    TypeError: If the `datasets` or `weights` arguments have the wrong type.
+    ValueError: If the `weights` argument is specified and does not match the
+      length of the `datasets` element.
+  """
+  num_datasets = len(datasets)
+  if not isinstance(weights, dataset_ops.Dataset):
+    if weights is None:
+      # Select inputs with uniform probability.
+      logits = [[1.0] * num_datasets]
+
+    else:
+      # Use the given `weights` as the probability of choosing the respective
+      # input.
+      weights = ops.convert_to_tensor(weights, name="weights")
+      if weights.dtype not in (dtypes.float32, dtypes.float64):
+        raise TypeError("`weights` must be convertible to a tensor of "
+                        "`tf.float32` or `tf.float64` elements.")
+      if not weights.shape.is_compatible_with([num_datasets]):
+        raise ValueError(
+            "`weights` must be a vector of length `len(datasets)`.")
+
+      # The `stateless_multinomial()` op expects log-probabilities, as opposed
+      # to weights.
+      logits = array_ops.expand_dims(math_ops.log(weights, name="logits"), 0)
+
+    # NOTE(mrry): We only specialize when `weights` is not a `Dataset`. When it
+    # is a `Dataset`, it is possible that evaluating it has a side effect the
+    # user depends on.
+    if len(datasets) == 1:
+      return datasets[0]
+
+    def select_dataset_constant_logits(seed):
+      return array_ops.squeeze(
+          gen_stateless_random_ops.stateless_multinomial(logits, 1, seed=seed),
+          axis=[0, 1])
+
+    selector_input = dataset_ops.MapDataset(
+        random_ops.RandomDataset(seed).batch(2),
+        select_dataset_constant_logits,
+        use_inter_op_parallelism=False)
+
+  else:
+    # Use each element of the given `weights` dataset as the probability of
+    # choosing the respective input.
+
+    # The `stateless_multinomial()` op expects log-probabilities, as opposed to
+    # weights.
+    logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits"))
+
+    def select_dataset_varying_logits(logits, seed):
+      return array_ops.squeeze(
+          gen_stateless_random_ops.stateless_multinomial(logits, 1, seed=seed),
+          axis=[0, 1])
+
+    logits_and_seeds = dataset_ops.Dataset.zip(
+        (logits_ds, random_ops.RandomDataset(seed).batch(2)))
+    selector_input = dataset_ops.MapDataset(
+        logits_and_seeds,
+        select_dataset_varying_logits,
+        use_inter_op_parallelism=False)
+
+  return _DirectedInterleaveDataset(selector_input, datasets)
+
+
+@tf_export("data.experimental.choose_from_datasets")
+def choose_from_datasets(datasets, choice_dataset):
+  """Creates a dataset that deterministically chooses elements from `datasets`.
+
+  For example, given the following datasets:
+
+  ```python
+  datasets = [tf.data.Dataset.from_tensors("foo").repeat(),
+              tf.data.Dataset.from_tensors("bar").repeat(),
+              tf.data.Dataset.from_tensors("baz").repeat()]
+
+  # Define a dataset containing `[0, 1, 2, 0, 1, 2, 0, 1, 2]`.
+  choice_dataset = tf.data.Dataset.range(3).repeat(3)
+
+  result = tf.data.experimental.choose_from_datasets(datasets, choice_dataset)
+  ```
+
+  The elements of `result` will be:
+
+  ```
+  "foo", "bar", "baz", "foo", "bar", "baz", "foo", "bar", "baz"
+  ```
+
+  Args:
+    datasets: A list of `tf.data.Dataset` objects with compatible structure.
+    choice_dataset: A `tf.data.Dataset` of scalar `tf.int64` tensors between
+      `0` and `len(datasets) - 1`.
+
+  Returns:
+    A dataset that interleaves elements from `datasets` according to the values
+    of `choice_dataset`.
+
+  Raises:
+    TypeError: If the `datasets` or `choice_dataset` arguments have the wrong
+      type.
+  """
+  if not (choice_dataset.output_types == dtypes.int64
+          and choice_dataset.output_shapes.is_compatible_with(
+              tensor_shape.scalar())
+          and choice_dataset.output_classes == ops.Tensor):
+    raise TypeError("`choice_dataset` must be a dataset of scalar "
+                    "`tf.int64` tensors.")
+  return _DirectedInterleaveDataset(choice_dataset, datasets)
diff --git a/tensorflow/python/data/experimental/ops/iterator_ops.py b/tensorflow/python/data/experimental/ops/iterator_ops.py
new file mode 100644
index 0000000000..72d7d58f06
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/iterator_ops.py
@@ -0,0 +1,268 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Iterator ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.ops import optional_ops
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.make_saveable_from_iterator")
+def make_saveable_from_iterator(iterator):
+  """Returns a SaveableObject for saving/restore iterator state using Saver.
+
+  Args:
+    iterator: Iterator.
+
+  For example:
+
+  ```python
+  with tf.Graph().as_default():
+    ds = tf.data.Dataset.range(10)
+    iterator = ds.make_initializable_iterator()
+    # Build the iterator SaveableObject.
+    saveable_obj = tf.data.experimental.make_saveable_from_iterator(iterator)
+    # Add the SaveableObject to the SAVEABLE_OBJECTS collection so
+    # it can be automatically saved using Saver.
+    tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable_obj)
+    saver = tf.train.Saver()
+
+    while continue_training:
+      ... Perform training ...
+      if should_save_checkpoint:
+        saver.save()
+  ```
+
+  Note: When restoring the iterator, the existing iterator state is completely
+  discarded. This means that any changes you may have made to the Dataset
+  graph will be discarded as well! This includes the new Dataset graph
+  that you may have built during validation. So, while running validation,
+  make sure to run the initializer for the validation input pipeline after
+  restoring the checkpoint.
+
+  Note: Not all iterators support checkpointing yet. Attempting to save the
+  state of an unsupported iterator will throw an error.
+  """
+  return _Saveable(iterator._iterator_resource)  # pylint: disable=protected-access
+
+
+class _Saveable(saver_lib.BaseSaverBuilder.SaveableObject):
+  """SaveableObject for saving/restoring iterator state."""
+
+  def __init__(self, iterator_resource):
+    serialized_iterator = gen_dataset_ops.serialize_iterator(iterator_resource)
+    specs = [
+        saver_lib.BaseSaverBuilder.SaveSpec(serialized_iterator, "",
+                                            iterator_resource.name + "-state")
+    ]
+    super(_Saveable, self).__init__(iterator_resource, specs,
+                                    iterator_resource.name)
+
+  def restore(self, restored_tensors, unused_restored_shapes):
+    with ops.colocate_with(self.op):
+      return gen_dataset_ops.deserialize_iterator(self.op, restored_tensors[0])
+
+
+@tf_export("data.experimental.CheckpointInputPipelineHook")
+class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
+  """Checkpoints input pipeline state every N steps or seconds.
+
+  This hook saves the state of the iterators in the `Graph` so that when
+  training is resumed the input pipeline continues from where it left off.
+  This could potentially avoid overfitting in certain pipelines where the
+  number of training steps per eval are small compared to the dataset
+  size or if the training pipeline is pre-empted.
+
+  Differences from `CheckpointSaverHook`:
+  1. Saves only the input pipelines in the "iterators" collection and not the
+     global variables or other saveable objects.
+  2. Does not write the `GraphDef` and `MetaGraphDef` to the summary.
+
+  Example of checkpointing the training pipeline:
+
+  ```python
+  est = tf.estimator.Estimator(model_fn)
+  while True:
+    est.train(
+        train_input_fn,
+        hooks=[tf.data.experimental.CheckpointInputPipelineHook(est)],
+        steps=train_steps_per_eval)
+    # Note: We do not pass the hook here.
+    metrics = est.evaluate(eval_input_fn)
+    if should_stop_the_training(metrics):
+      break
+  ```
+
+  This hook should be used if the input pipeline state needs to be saved
+  separate from the model checkpoint. Doing so may be useful for a few reasons:
+  1. The input pipeline checkpoint may be large, if there are large shuffle
+     or prefetch buffers for instance, and may bloat the checkpoint size.
+  2. If the input pipeline is shared between training and validation, restoring
+     the checkpoint during validation may override the validation input
+     pipeline.
+
+  For saving the input pipeline checkpoint alongside the model weights use
+  `tf.data.experimental.make_saveable_from_iterator` directly to create a
+  `SaveableObject` and add to the `SAVEABLE_OBJECTS` collection. Note, however,
+  that you will need to be careful not to restore the training iterator during
+  eval. You can do that by not adding the iterator to the SAVEABLE_OBJECTS
+  collector when building the eval graph.
+  """
+
+  def __init__(self, estimator):
+    """Initializes a `CheckpointInputPipelineHook`.
+
+    Args:
+      estimator: Estimator.
+
+    Raises:
+      ValueError: One of `save_steps` or `save_secs` should be set.
+      ValueError: At most one of saver or scaffold should be set.
+    """
+    # `checkpoint_basename` is "input.ckpt" for non-distributed pipelines or
+    # of the form "input_<task_type>_<task_id>.ckpt" for distributed pipelines.
+    # Note: The default `checkpoint_basename` used by `CheckpointSaverHook` is
+    # "model.ckpt". We intentionally choose the input pipeline checkpoint prefix
+    # to be different to avoid conflicts with the model checkpoint.
+
+    # pylint: disable=protected-access
+    checkpoint_prefix = "input"
+    if estimator._config.num_worker_replicas > 1:
+      # Distributed setting.
+      suffix = "_{}_{}".format(estimator._config.task_type,
+                               estimator._config.task_id)
+      checkpoint_prefix += suffix
+    # pylint: enable=protected-access
+
+    # We use a composition paradigm instead of inheriting from
+    # `CheckpointSaverHook` because `Estimator` does an `isinstance` check
+    # to check whether a `CheckpointSaverHook` is already present in the list
+    # of hooks and if not, adds one. Inheriting from `CheckpointSaverHook`
+    # would thwart this behavior. This hook checkpoints *only the iterators*
+    # and not the graph variables.
+    self._checkpoint_saver_hook = basic_session_run_hooks.CheckpointSaverHook(
+        estimator.model_dir,
+        save_secs=estimator._config.save_checkpoints_secs,  # pylint: disable=protected-access
+        save_steps=estimator._config.save_checkpoints_steps,  # pylint: disable=protected-access
+        checkpoint_basename=checkpoint_prefix + ".ckpt")
+
+    # Name for the protocol buffer file that will contain the list of most
+    # recent checkpoints stored as a `CheckpointState` protocol buffer.
+    # This file, kept in the same directory as the checkpoint files, is
+    # automatically managed by the `Saver` to keep track of recent checkpoints.
+    # The default name used by the `Saver` for this file is "checkpoint". Here
+    # we use the name "checkpoint_<checkpoint_prefix>" so that in case the
+    # `checkpoint_dir` is the same as the model checkpoint directory, there are
+    # no conflicts during restore.
+    self._latest_filename = "checkpoint_" + checkpoint_prefix
+    self._first_run = True
+
+  def begin(self):
+    # Build a Saver that saves all iterators in the `GLOBAL_ITERATORS`
+    # collection if no `Saver` or `Scaffold` is provided.
+    # pylint: disable=protected-access
+    if (self._checkpoint_saver_hook._saver is None and
+        self._checkpoint_saver_hook._scaffold is None):
+      iterators = ops.get_collection(iterator_ops.GLOBAL_ITERATORS)
+      saveables = [_Saveable(i) for i in iterators]
+      self._checkpoint_saver_hook._saver = _CustomSaver(saveables,
+                                                        self._latest_filename)
+    # pylint: enable=protected-access
+    self._checkpoint_saver_hook.begin()
+
+  def _restore_or_save_initial_ckpt(self, session):
+    # Ideally this should be run in after_create_session but is not for the
+    # following reason:
+    # Currently there is no way of enforcing an order of running the
+    # `SessionRunHooks`. Hence it is possible that the `_DatasetInitializerHook`
+    # is run *after* this hook. That is troublesome because
+    # 1. If a checkpoint exists and this hook restores it, the initializer hook
+    #    will override it.
+    # 2. If no checkpoint exists, this hook will try to save an initialized
+    #    iterator which will result in an exception.
+    #
+    # As a temporary fix we enter the following implicit contract between this
+    # hook and the _DatasetInitializerHook.
+    # 1. The _DatasetInitializerHook initializes the iterator in the call to
+    #    after_create_session.
+    # 2. This hook saves the iterator on the first call to `before_run()`, which
+    #    is guaranteed to happen after `after_create_session()` of all hooks
+    #    have been run.
+
+    # Check if there is an existing checkpoint. If so, restore from it.
+    # pylint: disable=protected-access
+    latest_checkpoint_path = checkpoint_management.latest_checkpoint(
+        self._checkpoint_saver_hook._checkpoint_dir,
+        latest_filename=self._latest_filename)
+    if latest_checkpoint_path:
+      self._checkpoint_saver_hook._get_saver().restore(session,
+                                                       latest_checkpoint_path)
+    else:
+      # The checkpoint saved here is the state at step "global_step".
+      # Note: We do not save the GraphDef or MetaGraphDef here.
+      global_step = session.run(self._checkpoint_saver_hook._global_step_tensor)
+      self._checkpoint_saver_hook._save(session, global_step)
+      self._checkpoint_saver_hook._timer.update_last_triggered_step(global_step)
+    # pylint: enable=protected-access
+
+  def before_run(self, run_context):
+    if self._first_run:
+      self._restore_or_save_initial_ckpt(run_context.session)
+      self._first_run = False
+    return self._checkpoint_saver_hook.before_run(run_context)
+
+  def after_run(self, run_context, run_values):
+    self._checkpoint_saver_hook.after_run(run_context, run_values)
+
+  def end(self, session):
+    self._checkpoint_saver_hook.end(session)
+
+
+class _CustomSaver(saver_lib.Saver):
+  """`Saver` with a different default `latest_filename`.
+
+  This is used in the `CheckpointInputPipelineHook` to avoid conflicts with
+  the model ckpt saved by the `CheckpointSaverHook`.
+  """
+
+  def __init__(self, var_list, latest_filename):
+    super(_CustomSaver, self).__init__(var_list)
+    self._latest_filename = latest_filename
+
+  def save(self,
+           sess,
+           save_path,
+           global_step=None,
+           latest_filename=None,
+           meta_graph_suffix="meta",
+           write_meta_graph=True,
+           write_state=True,
+           strip_default_attrs=False):
+    return super(_CustomSaver, self).save(
+        sess, save_path, global_step, latest_filename or self._latest_filename,
+        meta_graph_suffix, write_meta_graph, write_state, strip_default_attrs)
+
+
+tf_export("data.experimental.Optional")(optional_ops.Optional)
+tf_export("data.experimental.get_next_as_optional")(
+    iterator_ops.get_next_as_optional)
diff --git a/tensorflow/contrib/data/python/ops/map_defun.py b/tensorflow/python/data/experimental/ops/map_defun.py
similarity index 100%
rename from tensorflow/contrib/data/python/ops/map_defun.py
rename to tensorflow/python/data/experimental/ops/map_defun.py
diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py
similarity index 100%
rename from tensorflow/contrib/data/python/ops/optimization.py
rename to tensorflow/python/data/experimental/ops/optimization.py
diff --git a/tensorflow/python/data/experimental/ops/parsing_ops.py b/tensorflow/python/data/experimental/ops/parsing_ops.py
new file mode 100644
index 0000000000..6615b9022a
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/parsing_ops.py
@@ -0,0 +1,152 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental `dataset` API for parsing example."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+class _ParseExampleDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that parses `example` dataset into a `dict` dataset."""
+
+  def __init__(self, input_dataset, features, num_parallel_calls):
+    super(_ParseExampleDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    if not all(types == dtypes.string
+               for types in nest.flatten(input_dataset.output_types)):
+      raise TypeError("Input dataset should be a dataset of vectors of strings")
+    self._num_parallel_calls = num_parallel_calls
+    # pylint: disable=protected-access
+    self._features = parsing_ops._prepend_none_dimension(features)
+    # sparse_keys and dense_keys come back sorted here.
+    (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
+     dense_shapes) = parsing_ops._features_to_raw_params(
+         self._features, [
+             parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
+             parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature
+         ])
+    # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
+    (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes,
+     dense_shape_as_shape) = parsing_ops._process_raw_parameters(
+         None, dense_defaults, sparse_keys, sparse_types, dense_keys,
+         dense_types, dense_shapes)
+    # pylint: enable=protected-access
+    self._sparse_keys = sparse_keys
+    self._sparse_types = sparse_types
+    self._dense_keys = dense_keys
+    self._dense_defaults = dense_defaults_vec
+    self._dense_shapes = dense_shapes
+    self._dense_types = dense_types
+    dense_output_shapes = [
+        self._input_dataset.output_shapes.concatenate(shape)
+        for shape in dense_shape_as_shape
+    ]
+    sparse_output_shapes = [
+        self._input_dataset.output_shapes.concatenate([None])
+        for _ in range(len(sparse_keys))
+    ]
+
+    self._output_shapes = dict(
+        zip(self._dense_keys + self._sparse_keys,
+            dense_output_shapes + sparse_output_shapes))
+    self._output_types = dict(
+        zip(self._dense_keys + self._sparse_keys,
+            self._dense_types + self._sparse_types))
+    self._output_classes = dict(
+        zip(self._dense_keys + self._sparse_keys,
+            [ops.Tensor for _ in range(len(self._dense_defaults))] +
+            [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys))
+            ]))
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.parse_example_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._num_parallel_calls,
+        self._dense_defaults,
+        self._sparse_keys,
+        self._dense_keys,
+        self._sparse_types,
+        self._dense_shapes,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+
+# TODO(b/111553342): add arguments names and example names as well.
+@tf_export("data.experimental.parse_example_dataset")
+def parse_example_dataset(features, num_parallel_calls=1):
+  """A transformation that parses `Example` protos into a `dict` of tensors.
+
+  Parses a number of serialized `Example` protos given in `serialized`. We refer
+  to `serialized` as a batch with `batch_size` many entries of individual
+  `Example` protos.
+
+  This op parses serialized examples into a dictionary mapping keys to `Tensor`
+  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
+  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
+  and `SparseFeature` is mapped to a `SparseTensor`, and each
+  `FixedLenFeature` is mapped to a `Tensor`. See `tf.parse_example` for more
+  details about feature dictionaries.
+
+  Args:
+   features: A `dict` mapping feature keys to `FixedLenFeature`,
+     `VarLenFeature`, and `SparseFeature` values.
+   num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
+      representing the number of parsing processes to call in parallel.
+
+  Returns:
+    A dataset transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: if features argument is None.
+  """
+  if features is None:
+    raise ValueError("Missing: features was %s." % features)
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    out_dataset = _ParseExampleDataset(dataset, features, num_parallel_calls)
+    if any([
+        isinstance(feature, parsing_ops.SparseFeature)
+        for _, feature in features.items()
+    ]):
+      # pylint: disable=protected-access
+      # pylint: disable=g-long-lambda
+      out_dataset = out_dataset.map(
+          lambda x: parsing_ops._construct_sparse_tensors_for_sparse_features(
+              features, x), num_parallel_calls=num_parallel_calls)
+    return out_dataset
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
new file mode 100644
index 0000000000..48d7136f95
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -0,0 +1,531 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrapper for prefetching_ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import warnings
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.eager import context
+from tensorflow.python.framework import device as framework_device
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import functional_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+def function_buffering_resource(string_arg,
+                                target_device,
+                                f,
+                                buffer_size,
+                                output_types,
+                                container="",
+                                shared_name=None,
+                                name=None):
+  """Creates a FunctionBufferingResource.
+
+  A FunctionBufferingResource fills up a buffer by calling a function `f` on
+  `target_device`. `f` should take in only a single string argument as input.
+
+  Args:
+    string_arg: The single string argument to the function.
+    target_device: The device to run `f` on.
+    f: The function to be executed.
+    buffer_size: Size of the buffer to be populated.
+    output_types: The output types generated by the function.
+    container: (Optional) string. Defaults to "".
+    shared_name: (Optional) string.
+    name: (Optional) string to name the op.
+
+  Returns:
+    Handle to a FunctionBufferingResource.
+  """
+  if shared_name is None:
+    shared_name = ""
+  return ged_ops.experimental_function_buffering_resource(
+      string_arg=string_arg,
+      target_device=target_device,
+      shared_name=shared_name,
+      f=f,
+      buffer_size=buffer_size,
+      container=container,
+      name=name,
+      output_types=output_types)
+
+
+def function_buffering_resource_get_next(function_buffer_resource,
+                                         output_types,
+                                         name=None):
+  return ged_ops.experimental_function_buffering_resource_get_next(
+      function_buffer_resource=function_buffer_resource,
+      output_types=output_types,
+      name=name)
+
+
+def function_buffering_resource_reset(function_buffer_resource, name=None):
+  return ged_ops.experimental_function_buffering_resource_reset(
+      function_buffer_resource=function_buffer_resource, name=name)
+
+
+# pylint: disable=protected-access
+class _PrefetchToDeviceIterator(object):
+  """A replacement for `tf.data.Iterator` that prefetches to another device.
+
+  Args:
+    input_dataset: The input dataset
+    one_shot: If true, we make a one shot iterator that's already initialized.
+    device: A fully specified device string where we want to prefetch to
+    buffer_size: Size of the prefetching buffer.
+    shared_name: (Optional.) If non-empty, the returned iterator will be
+        shared under the given name across multiple sessions that share the
+        same devices (e.g. when using a remote server).
+
+  Returns:
+    An Iterator type object.
+  """
+
+  def __init__(self,
+               input_dataset,
+               one_shot,
+               device,
+               buffer_size,
+               shared_name=None):
+    self._input_dataset = input_dataset
+    self._get_next_call_count = 0
+    self._one_shot = one_shot
+    if shared_name is None:
+      shared_name = ""
+
+    if self._one_shot:
+      self._input_iterator = input_dataset.make_one_shot_iterator()
+    else:
+      self._input_iterator = iterator_ops.Iterator.from_structure(
+          self._input_dataset.output_types, self._input_dataset.output_shapes,
+          shared_name, self._input_dataset.output_classes)
+    input_iterator_handle = self._input_iterator.string_handle()
+
+    @function.Defun(dtypes.string)
+    def _prefetch_fn(handle):
+      """Prefetches one element from `input_iterator`."""
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          handle, self._input_iterator.output_types,
+          self._input_iterator.output_shapes,
+          self._input_iterator.output_classes)
+      ret = remote_iterator.get_next()
+      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+
+    iterator_device = ged_ops.experimental_iterator_get_device(
+        self._input_iterator._iterator_resource)
+
+    with ops.device(device):
+      self._buffering_resource = function_buffering_resource(
+          f=_prefetch_fn,
+          target_device=iterator_device,
+          string_arg=input_iterator_handle,
+          buffer_size=buffer_size,
+          shared_name=shared_name,
+          output_types=nest.flatten(
+              sparse.as_dense_types(self._input_dataset.output_types,
+                                    self._input_dataset.output_classes)))
+
+    if not self._one_shot:
+      reset_op = function_buffering_resource_reset(self._buffering_resource)
+      with ops.control_dependencies([reset_op]):
+        self._initializer = self._input_iterator.make_initializer(
+            self._input_dataset)
+
+  def get_next(self, name=None):
+    """See `tf.data.Iterator.get_next`."""
+    self._get_next_call_count += 1
+    if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
+      warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
+
+    flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
+        self._buffering_resource,
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        name=name)
+
+    ret = sparse.deserialize_sparse_tensors(
+        nest.pack_sequence_as(self.output_types, flat_ret),
+        self.output_types, self.output_shapes, self.output_classes)
+
+    for tensor, shape in zip(
+        nest.flatten(ret), nest.flatten(self.output_shapes)):
+      if isinstance(tensor, ops.Tensor):
+        tensor.set_shape(shape)
+
+    return ret
+
+  @property
+  def initializer(self):
+    if self._one_shot:
+      raise NotImplementedError("Can't initialize a one_shot_iterator")
+    return self._initializer
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
+  """A replacement for `tf.data.Iterator` that prefetches to another device.
+
+  Args:
+    input_dataset: The input dataset
+    one_shot: If true, we make a one shot iterator that's already initialized.
+    device: A fully specified device string where we want to prefetch to
+    buffer_size: Size of the prefetching buffer.
+    shared_name: (Optional.) If non-empty, the returned iterator will be
+        shared under the given name across multiple sessions that share the
+        same devices (e.g. when using a remote server).
+
+  Returns:
+    An Iterator type object.
+  """
+
+  def __init__(self,
+               input_dataset,
+               device,
+               buffer_size):
+    with ops.device("/device:CPU:0"):
+      super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset)
+      input_iterator_handle = gen_dataset_ops.iterator_to_string_handle(
+          self._resource)
+
+    self._device = device
+
+    @function.Defun(dtypes.string)
+    def _prefetch_fn(handle):
+      """Prefetches one element from `input_iterator`."""
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          handle, self.output_types, self.output_shapes, self.output_classes)
+      ret = remote_iterator.get_next()
+      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+
+    _prefetch_fn.add_to_graph(None)
+
+    with ops.device(device):
+      self._buffering_resource = function_buffering_resource(
+          f=_prefetch_fn,
+          output_types=self._flat_output_types,
+          target_device=ged_ops.experimental_iterator_get_device(
+              self._resource),
+          string_arg=input_iterator_handle,
+          buffer_size=buffer_size,
+          shared_name=iterator_ops._generate_shared_name(
+              "function_buffer_resource"))
+
+  def _next_internal(self):
+    """Returns a nested structure of `tf.Tensor`s containing the next element.
+    """
+    # This runs in sync mode as iterators use an error status to communicate
+    # that there is no more data to iterate over.
+    # TODO(b/77291417): Fix
+    with context.execution_mode(context.SYNC):
+      with ops.device(self._device):
+        ret = ged_ops.experimental_function_buffering_resource_get_next(
+            function_buffer_resource=self._buffering_resource,
+            output_types=self._flat_output_types)
+      return sparse.deserialize_sparse_tensors(
+          nest.pack_sequence_as(self._output_types, ret), self._output_types,
+          self._output_shapes, self._output_classes)
+# pylint: enable=protected-access
+
+
+class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` whose iterator prefetches elements to another device."""
+
+  def __init__(self, input_dataset, device, buffer_size):
+    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._device = device
+    self._buffer_size = buffer_size if buffer_size is not None else 1
+
+  # The static analysis cannot tell that the eager iterator's superclass has
+  # a `next()` method.
+  # pylint: disable=non-iterator-returned
+  def __iter__(self):
+    """Creates an `Iterator` for enumerating the elements of this dataset.
+
+    The returned iterator implements the Python iterator protocol and therefore
+    can only be used in eager mode.
+
+    Returns:
+      An `Iterator` over the elements of this dataset.
+
+    Raises:
+      RuntimeError: If eager execution is enabled.
+    """
+    if context.executing_eagerly():
+      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
+                                            self._buffer_size)
+    else:
+      raise RuntimeError("dataset.__iter__() is only supported when eager "
+                         "execution is enabled.")
+  # pylint: enable=non-iterator-returned
+
+  def make_one_shot_iterator(self):
+    if context.executing_eagerly():
+      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
+                                            self._buffer_size)
+    else:
+      return _PrefetchToDeviceIterator(self._input_dataset, one_shot=True,
+                                       device=self._device,
+                                       buffer_size=self._buffer_size)
+
+  def make_initializable_iterator(self, shared_name=None):
+    return _PrefetchToDeviceIterator(
+        self._input_dataset,
+        one_shot=False,
+        device=self._device,
+        buffer_size=self._buffer_size,
+        shared_name=shared_name)
+
+  def _as_variant_tensor(self):
+    # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset
+    # transformation methods is called.
+    # TODO(mrry): Investigate support for chaining further transformations after
+    # the prefetch, including GPU support.
+    raise NotImplementedError("`prefetch_to_device()` must be the last "
+                              "transformation in a dataset pipeline.")
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+
+@tf_export("data.experimental.prefetch_to_device")
+def prefetch_to_device(device, buffer_size=None):
+  """A transformation that prefetches dataset values to the given `device`.
+
+  NOTE: Although the transformation creates a `tf.data.Dataset`, the
+  transformation must be the final `Dataset` in the input pipeline.
+
+  Args:
+    device: A string. The name of a device to which elements will be prefetched.
+    buffer_size: (Optional.) The number of elements to buffer on `device`.
+      Defaults to an automatically chosen value.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    return _PrefetchToDeviceDataset(dataset, device, buffer_size)
+
+  return _apply_fn
+
+
+@tf_export("data.experimental.copy_to_device")
+def copy_to_device(target_device, source_device="/cpu:0"):
+  """A transformation that copies dataset elements to the given `target_device`.
+
+  Args:
+    target_device: The name of a device to which elements will be copied.
+    source_device: The original device on which `input_dataset` will be placed.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _CopyToDeviceDataset(
+        dataset, target_device=target_device, source_device=source_device)
+
+  return _apply_fn
+
+
+# TODO(rohanj): Use the _input_hostmem attr on the RemoteCall ops to indicate
+# all inputs to the Op are in host memory, thereby avoiding some unnecessary
+# Sends and Recvs.
+class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that copies elements to another device."""
+
+  def __init__(self, input_dataset, target_device, source_device="/cpu:0"):
+    """Constructs a _CopyToDeviceDataset.
+
+    Args:
+      input_dataset: `Dataset` to be copied
+      target_device: The name of the device to which elements would be copied.
+      source_device: Device where input_dataset would be placed.
+    """
+    super(_CopyToDeviceDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._target_device = target_device
+    spec = framework_device.DeviceSpec().from_string(self._target_device)
+    self._is_gpu_target = (spec.device_type == "GPU")
+    self._source_device_string = source_device
+    self._source_device = ops.convert_to_tensor(source_device)
+
+    self._flat_output_shapes = nest.flatten(
+        sparse.as_dense_shapes(self._input_dataset.output_shapes,
+                               self._input_dataset.output_classes))
+    self._flat_output_types = nest.flatten(
+        sparse.as_dense_types(self._input_dataset.output_types,
+                              self._input_dataset.output_classes))
+
+    @function.Defun()
+    def _init_func():
+      """Creates an iterator for the input dataset.
+
+      Returns:
+        A `string` tensor that encapsulates the iterator created.
+      """
+      # pylint: disable=protected-access
+      ds_variant = self._input_dataset._as_variant_tensor()
+      resource = gen_dataset_ops.anonymous_iterator(
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+      with ops.control_dependencies(
+          [gen_dataset_ops.make_iterator(ds_variant, resource)]):
+        return gen_dataset_ops.iterator_to_string_handle(resource)
+
+    @function.Defun()
+    def _remote_init_func():
+      return functional_ops.remote_call(
+          target=self._source_device,
+          args=_init_func.captured_inputs,
+          Tout=[dtypes.string],
+          f=_init_func)
+
+    self._init_func = _remote_init_func
+    self._init_captured_args = _remote_init_func.captured_inputs
+
+    @function.Defun(dtypes.string)
+    def _next_func(string_handle):
+      """Calls get_next for created iterator.
+
+      Args:
+        string_handle: An iterator string handle created by _init_func
+      Returns:
+        The elements generated from `input_dataset`
+      """
+      with ops.device(self._source_device_string):
+        iterator = iterator_ops.Iterator.from_string_handle(
+            string_handle, self.output_types, self.output_shapes,
+            self.output_classes)
+      ret = iterator.get_next()
+      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+
+    @function.Defun(dtypes.string)
+    def _remote_next_func(string_handle):
+      return functional_ops.remote_call(
+          target=self._source_device,
+          args=[string_handle] + _next_func.captured_inputs,
+          Tout=self._flat_output_types,
+          f=_next_func)
+
+    self._next_func = _remote_next_func
+    self._next_captured_args = _remote_next_func.captured_inputs
+
+    @function.Defun(dtypes.string)
+    def _finalize_func(string_handle):
+      """Destroys the iterator resource created.
+
+      Args:
+        string_handle: An iterator string handle created by _init_func
+      Returns:
+        Tensor constant 0
+      """
+      iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
+          string_handle,
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+      with ops.control_dependencies([
+          resource_variable_ops.destroy_resource_op(
+              iterator_resource, ignore_lookup_error=True)]):
+        return array_ops.constant(0, dtypes.int64)
+
+    @function.Defun(dtypes.string)
+    def _remote_finalize_func(string_handle):
+      return functional_ops.remote_call(
+          target=self._source_device,
+          args=[string_handle] + _finalize_func.captured_inputs,
+          Tout=[dtypes.int64],
+          f=_finalize_func)
+
+    self._finalize_func = _remote_finalize_func
+    self._finalize_captured_args = _remote_finalize_func.captured_inputs
+
+    g = ops.get_default_graph()
+    _remote_init_func.add_to_graph(g)
+    _remote_next_func.add_to_graph(g)
+    _remote_finalize_func.add_to_graph(g)
+    # pylint: enable=protected-scope
+
+  # The one_shot_iterator implementation needs a 0 arg _make_dataset function
+  # that thereby captures all the inputs required to create the dataset. Since
+  # there are strings that are inputs to the GeneratorDataset which can't be
+  # placed on a GPU, this fails for the GPU case. Therefore, disabling it for
+  # GPU
+  def make_one_shot_iterator(self):
+    if self._is_gpu_target:
+      raise ValueError("Cannot create a one shot iterator when using "
+                       "`tf.data.experimental.copy_to_device()` on GPU. Please "
+                       "use `Dataset.make_initializable_iterator()` instead.")
+    else:
+      return super(_CopyToDeviceDataset, self).make_one_shot_iterator()
+
+  def _as_variant_tensor(self):
+    with ops.device(self._target_device):
+      return gen_dataset_ops.generator_dataset(
+          self._init_captured_args,
+          self._next_captured_args,
+          self._finalize_captured_args,
+          init_func=self._init_func,
+          next_func=self._next_func,
+          finalize_func=self._finalize_func,
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py
new file mode 100644
index 0000000000..e3a2aeab31
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/random_ops.py
@@ -0,0 +1,54 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Datasets for random number generators."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import random_seed
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.RandomDataset")
+class RandomDataset(dataset_ops.DatasetSource):
+  """A `Dataset` of pseudorandom values."""
+
+  def __init__(self, seed=None):
+    """A `Dataset` of pseudorandom values."""
+    super(RandomDataset, self).__init__()
+    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.random_dataset(
+        seed=self._seed,
+        seed2=self._seed2,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
+  @property
+  def output_shapes(self):
+    return tensor_shape.scalar()
+
+  @property
+  def output_types(self):
+    return dtypes.int64
diff --git a/tensorflow/python/data/experimental/ops/readers.py b/tensorflow/python/data/experimental/ops/readers.py
new file mode 100644
index 0000000000..3b2d094514
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/readers.py
@@ -0,0 +1,904 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrappers for reader Datasets."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import csv
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import optimization
+from tensorflow.python.data.experimental.ops import parsing_ops
+from tensorflow.python.data.experimental.ops import shuffle_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers as core_readers
+from tensorflow.python.data.util import convert
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.util.tf_export import tf_export
+
+_ACCEPTABLE_CSV_TYPES = (dtypes.float32, dtypes.float64, dtypes.int32,
+                         dtypes.int64, dtypes.string)
+
+
+def _is_valid_int32(str_val):
+  try:
+    # Checks equality to prevent int32 overflow
+    return dtypes.int32.as_numpy_dtype(str_val) == dtypes.int64.as_numpy_dtype(
+        str_val)
+  except (ValueError, OverflowError):
+    return False
+
+
+def _is_valid_int64(str_val):
+  try:
+    dtypes.int64.as_numpy_dtype(str_val)
+    return True
+  except (ValueError, OverflowError):
+    return False
+
+
+def _is_valid_float(str_val, float_dtype):
+  try:
+    return float_dtype.as_numpy_dtype(str_val) < np.inf
+  except ValueError:
+    return False
+
+
+def _infer_type(str_val, na_value, prev_type):
+  """Given a string, infers its tensor type.
+
+  Infers the type of a value by picking the least 'permissive' type possible,
+  while still allowing the previous type inference for this column to be valid.
+
+  Args:
+    str_val: String value to infer the type of.
+    na_value: Additional string to recognize as a NA/NaN CSV value.
+    prev_type: Type previously inferred based on values of this column that
+      we've seen up till now.
+  Returns:
+    Inferred dtype.
+  """
+  if str_val in ("", na_value):
+    # If the field is null, it gives no extra information about its type
+    return prev_type
+
+  type_list = [
+      dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64, dtypes.string
+  ]  # list of types to try, ordered from least permissive to most
+
+  type_functions = [
+      _is_valid_int32,
+      _is_valid_int64,
+      lambda str_val: _is_valid_float(str_val, dtypes.float32),
+      lambda str_val: _is_valid_float(str_val, dtypes.float64),
+      lambda str_val: True,
+  ]  # Corresponding list of validation functions
+
+  for i in range(len(type_list)):
+    validation_fn = type_functions[i]
+    if validation_fn(str_val) and (prev_type is None or
+                                   prev_type in type_list[:i + 1]):
+      return type_list[i]
+
+
+def _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header):
+  """Generator that yields rows of CSV file(s) in order."""
+  for fn in filenames:
+    with file_io.FileIO(fn, "r") as f:
+      rdr = csv.reader(
+          f,
+          delimiter=field_delim,
+          quoting=csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE)
+      if header:
+        next(rdr)  # Skip header lines
+
+      for csv_row in rdr:
+        if len(csv_row) != num_cols:
+          raise ValueError(
+              "Problem inferring types: CSV row has different number of fields "
+              "than expected.")
+        yield csv_row
+
+
+def _infer_column_defaults(filenames, num_cols, field_delim, use_quote_delim,
+                           na_value, header, num_rows_for_inference,
+                           select_columns):
+  """Infers column types from the first N valid CSV records of files."""
+  if select_columns is None:
+    select_columns = range(num_cols)
+  inferred_types = [None] * len(select_columns)
+
+  for i, csv_row in enumerate(
+      _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header)):
+    if num_rows_for_inference is not None and i >= num_rows_for_inference:
+      break
+
+    for j, col_index in enumerate(select_columns):
+      inferred_types[j] = _infer_type(csv_row[col_index], na_value,
+                                      inferred_types[j])
+
+  # Replace None's with a default type
+  inferred_types = [t or dtypes.string for t in inferred_types]
+  # Default to 0 or '' for null values
+  return [
+      constant_op.constant([0 if t is not dtypes.string else ""], dtype=t)
+      for t in inferred_types
+  ]
+
+
+def _infer_column_names(filenames, field_delim, use_quote_delim):
+  """Infers column names from first rows of files."""
+  csv_kwargs = {
+      "delimiter": field_delim,
+      "quoting": csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE
+  }
+  with file_io.FileIO(filenames[0], "r") as f:
+    try:
+      column_names = next(csv.reader(f, **csv_kwargs))
+    except StopIteration:
+      raise ValueError(("Received StopIteration when reading the header line "
+                        "of %s.  Empty file?") % filenames[0])
+
+  for name in filenames[1:]:
+    with file_io.FileIO(name, "r") as f:
+      try:
+        if next(csv.reader(f, **csv_kwargs)) != column_names:
+          raise ValueError(
+              "Files have different column names in the header row.")
+      except StopIteration:
+        raise ValueError(("Received StopIteration when reading the header line "
+                          "of %s.  Empty file?") % filenames[0])
+  return column_names
+
+
+def _get_sorted_col_indices(select_columns, column_names):
+  """Transforms select_columns argument into sorted column indices."""
+  names_to_indices = {n: i for i, n in enumerate(column_names)}
+  num_cols = len(column_names)
+  for i, v in enumerate(select_columns):
+    if isinstance(v, int):
+      if v < 0 or v >= num_cols:
+        raise ValueError(
+            "Column index %d specified in select_columns out of valid range." %
+            v)
+      continue
+    if v not in names_to_indices:
+      raise ValueError(
+          "Value '%s' specified in select_columns not a valid column index or "
+          "name." % v)
+    select_columns[i] = names_to_indices[v]
+
+  # Sort and ensure there are no duplicates
+  result = sorted(set(select_columns))
+  if len(result) != len(select_columns):
+    raise ValueError("select_columns contains duplicate columns")
+  return result
+
+
+def _maybe_shuffle_and_repeat(
+    dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed):
+  """Optionally shuffle and repeat dataset, as requested."""
+  if num_epochs != 1 and shuffle:
+    # Use shuffle_and_repeat for perf
+    return dataset.apply(
+        shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs,
+                                       shuffle_seed))
+  elif shuffle:
+    return dataset.shuffle(shuffle_buffer_size, shuffle_seed)
+  elif num_epochs != 1:
+    return dataset.repeat(num_epochs)
+  return dataset
+
+
+def make_tf_record_dataset(file_pattern,
+                           batch_size,
+                           parser_fn=None,
+                           num_epochs=None,
+                           shuffle=True,
+                           shuffle_buffer_size=None,
+                           shuffle_seed=None,
+                           prefetch_buffer_size=optimization.AUTOTUNE,
+                           num_parallel_reads=None,
+                           num_parallel_parser_calls=None,
+                           drop_final_batch=False):
+  """Reads and optionally parses TFRecord files into a dataset.
+
+  Provides common functionality such as batching, optional parsing, shuffling,
+  and performant defaults.
+
+  Args:
+    file_pattern: List of files or patterns of TFRecord file paths.
+      See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int representing the number of records to combine
+      in a single batch.
+    parser_fn: (Optional.) A function accepting string input to parse
+      and process the record contents. This function must map records
+      to components of a fixed shape, so they may be batched. By
+      default, uses the record contents unmodified.
+    num_epochs: (Optional.) An int specifying the number of times this
+      dataset is repeated.  If None (the default), cycles through the
+      dataset forever.
+    shuffle: (Optional.) A bool that indicates whether the input
+      should be shuffled. Defaults to `True`.
+    shuffle_buffer_size: (Optional.) Buffer size to use for
+      shuffling. A large buffer size ensures better shuffling, but
+      increases memory usage and startup time.
+    shuffle_seed: (Optional.) Randomization seed to use for shuffling.
+    prefetch_buffer_size: (Optional.) An int specifying the number of
+      feature batches to prefetch for performance improvement.
+      Defaults to auto-tune. Set to 0 to disable prefetching.
+    num_parallel_reads: (Optional.) Number of threads used to read
+      records from files. By default or if set to a value >1, the
+      results will be interleaved.
+    num_parallel_parser_calls: (Optional.) Number of parallel
+      records to parse in parallel. Defaults to an automatic selection.
+    drop_final_batch: (Optional.) Whether the last batch should be
+      dropped in case its size is smaller than `batch_size`; the
+      default behavior is not to drop the smaller batch.
+
+  Returns:
+    A dataset, where each element matches the output of `parser_fn`
+    except it will have an additional leading `batch-size` dimension,
+    or a `batch_size`-length 1-D tensor of strings if `parser_fn` is
+    unspecified.
+  """
+  files = dataset_ops.Dataset.list_files(
+      file_pattern, shuffle=shuffle, seed=shuffle_seed)
+
+  if num_parallel_reads is None:
+    # Note: We considered auto-tuning this value, but there is a concern
+    # that this affects the mixing of records from different files, which
+    # could affect training convergence/accuracy, so we are defaulting to
+    # a constant for now.
+    num_parallel_reads = 24
+  dataset = core_readers.TFRecordDataset(
+      files, num_parallel_reads=num_parallel_reads)
+
+  if shuffle_buffer_size is None:
+    # TODO(josh11b): Auto-tune this value when not specified
+    shuffle_buffer_size = 10000
+  dataset = _maybe_shuffle_and_repeat(
+      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
+
+  # NOTE(mrry): We set `drop_final_batch=True` when `num_epochs is None` to
+  # improve the shape inference, because it makes the batch dimension static.
+  # It is safe to do this because in that case we are repeating the input
+  # indefinitely, and all batches will be full-sized.
+  drop_final_batch = drop_final_batch or num_epochs is None
+
+  if parser_fn is None:
+    dataset = dataset.batch(batch_size, drop_remainder=drop_final_batch)
+  else:
+    # TODO(josh11b): if num_parallel_parser_calls is None, use some function
+    # of num cores instead of map_and_batch's default behavior of one batch.
+    dataset = dataset.apply(batching.map_and_batch(
+        parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
+        drop_remainder=drop_final_batch))
+
+  if prefetch_buffer_size == 0:
+    return dataset
+  else:
+    return dataset.prefetch(buffer_size=prefetch_buffer_size)
+
+
+@tf_export("data.experimental.make_csv_dataset")
+def make_csv_dataset(
+    file_pattern,
+    batch_size,
+    column_names=None,
+    column_defaults=None,
+    label_name=None,
+    select_columns=None,
+    field_delim=",",
+    use_quote_delim=True,
+    na_value="",
+    header=True,
+    num_epochs=None,
+    shuffle=True,
+    shuffle_buffer_size=10000,
+    shuffle_seed=None,
+    prefetch_buffer_size=optimization.AUTOTUNE,
+    num_parallel_reads=1,
+    sloppy=False,
+    num_rows_for_inference=100,
+    compression_type=None,
+):
+  """Reads CSV files into a dataset.
+
+  Reads CSV files into a dataset, where each element is a (features, labels)
+  tuple that corresponds to a batch of CSV rows. The features dictionary
+  maps feature column names to `Tensor`s containing the corresponding
+  feature data, and labels is a `Tensor` containing the batch's label data.
+
+  Args:
+    file_pattern: List of files or patterns of file paths containing CSV
+      records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int representing the number of records to combine
+      in a single batch.
+    column_names: An optional list of strings that corresponds to the CSV
+      columns, in order. One per column of the input record. If this is not
+      provided, infers the column names from the first row of the records.
+      These names will be the keys of the features dict of each dataset element.
+    column_defaults: A optional list of default values for the CSV fields. One
+      item per selected column of the input record. Each item in the list is
+      either a valid CSV dtype (float32, float64, int32, int64, or string), or a
+      `Tensor` with one of the aforementioned types. The tensor can either be
+      a scalar default value (if the column is optional), or an empty tensor (if
+      the column is required). If a dtype is provided instead of a tensor, the
+      column is also treated as required. If this list is not provided, tries
+      to infer types based on reading the first num_rows_for_inference rows of
+      files specified, and assumes all columns are optional, defaulting to `0`
+      for numeric values and `""` for string values. If both this and
+      `select_columns` are specified, these must have the same lengths, and
+      `column_defaults` is assumed to be sorted in order of increasing column
+      index.
+    label_name: A optional string corresponding to the label column. If
+      provided, the data for this column is returned as a separate `Tensor` from
+      the features dictionary, so that the dataset complies with the format
+      expected by a `tf.Estimator.train` or `tf.Estimator.evaluate` input
+      function.
+    select_columns: An optional list of integer indices or string column
+      names, that specifies a subset of columns of CSV data to select. If
+      column names are provided, these must correspond to names provided in
+      `column_names` or inferred from the file header lines. When this argument
+      is specified, only a subset of CSV columns will be parsed and returned,
+      corresponding to the columns specified. Using this results in faster
+      parsing and lower memory usage. If both this and `column_defaults` are
+      specified, these must have the same lengths, and `column_defaults` is
+      assumed to be sorted in order of increasing column index.
+    field_delim: An optional `string`. Defaults to `","`. Char delimiter to
+      separate fields in a record.
+    use_quote_delim: An optional bool. Defaults to `True`. If false, treats
+      double quotation marks as regular characters inside of the string fields.
+    na_value: Additional string to recognize as NA/NaN.
+    header: A bool that indicates whether the first rows of provided CSV files
+      correspond to header lines with column names, and should not be included
+      in the data.
+    num_epochs: An int specifying the number of times this dataset is repeated.
+      If None, cycles through the dataset forever.
+    shuffle: A bool that indicates whether the input should be shuffled.
+    shuffle_buffer_size: Buffer size to use for shuffling. A large buffer size
+      ensures better shuffling, but increases memory usage and startup time.
+    shuffle_seed: Randomization seed to use for shuffling.
+    prefetch_buffer_size: An int specifying the number of feature
+      batches to prefetch for performance improvement. Recommended value is the
+      number of batches consumed per training step. Defaults to auto-tune.
+
+    num_parallel_reads: Number of threads used to read CSV records from files.
+      If >1, the results will be interleaved.
+    sloppy: If `True`, reading performance will be improved at
+      the cost of non-deterministic ordering. If `False`, the order of elements
+      produced is deterministic prior to shuffling (elements are still
+      randomized if `shuffle=True`. Note that if the seed is set, then order
+      of elements after shuffling is deterministic). Defaults to `False`.
+    num_rows_for_inference: Number of rows of a file to use for type inference
+      if record_defaults is not provided. If None, reads all the rows of all
+      the files. Defaults to 100.
+    compression_type: (Optional.) A `tf.string` scalar evaluating to one of
+      `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no compression.
+
+  Returns:
+    A dataset, where each element is a (features, labels) tuple that corresponds
+    to a batch of `batch_size` CSV rows. The features dictionary maps feature
+    column names to `Tensor`s containing the corresponding column data, and
+    labels is a `Tensor` containing the column data for the label column
+    specified by `label_name`.
+
+  Raises:
+    ValueError: If any of the arguments is malformed.
+  """
+  # Create dataset of all matching filenames
+  filenames = _get_file_names(file_pattern, False)
+  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
+  if shuffle:
+    dataset = dataset.shuffle(len(filenames), shuffle_seed)
+
+  # Clean arguments; figure out column names and defaults
+
+  if column_names is None:
+    if not header:
+      raise ValueError("Cannot infer column names without a header line.")
+    # If column names are not provided, infer from the header lines
+    column_names = _infer_column_names(filenames, field_delim, use_quote_delim)
+  if len(column_names) != len(set(column_names)):
+    raise ValueError("Cannot have duplicate column names.")
+
+  if select_columns is not None:
+    select_columns = _get_sorted_col_indices(select_columns, column_names)
+
+  if column_defaults is not None:
+    column_defaults = [
+        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
+        for x in column_defaults
+    ]
+  else:
+    # If column defaults are not provided, infer from records at graph
+    # construction time
+    column_defaults = _infer_column_defaults(
+        filenames, len(column_names), field_delim, use_quote_delim, na_value,
+        header, num_rows_for_inference, select_columns)
+
+  if select_columns is not None and len(column_defaults) != len(select_columns):
+    raise ValueError(
+        "If specified, column_defaults and select_columns must have same "
+        "length."
+    )
+  if select_columns is not None and len(column_names) > len(select_columns):
+    # Pick the relevant subset of column names
+    column_names = [column_names[i] for i in select_columns]
+
+  if label_name is not None and label_name not in column_names:
+    raise ValueError("`label_name` provided must be one of the columns.")
+
+  def filename_to_dataset(filename):
+    return CsvDataset(
+        filename,
+        record_defaults=column_defaults,
+        field_delim=field_delim,
+        use_quote_delim=use_quote_delim,
+        na_value=na_value,
+        select_cols=select_columns,
+        header=header,
+        compression_type=compression_type,
+    )
+
+  def map_fn(*columns):
+    """Organizes columns into a features dictionary.
+
+    Args:
+      *columns: list of `Tensor`s corresponding to one csv record.
+    Returns:
+      An OrderedDict of feature names to values for that particular record. If
+      label_name is provided, extracts the label feature to be returned as the
+      second element of the tuple.
+    """
+    features = collections.OrderedDict(zip(column_names, columns))
+    if label_name is not None:
+      label = features.pop(label_name)
+      return features, label
+    return features
+
+  # Read files sequentially (if num_parallel_reads=1) or in parallel
+  dataset = dataset.apply(
+      interleave_ops.parallel_interleave(
+          filename_to_dataset, cycle_length=num_parallel_reads, sloppy=sloppy))
+
+  dataset = _maybe_shuffle_and_repeat(
+      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
+
+  # Apply batch before map for perf, because map has high overhead relative
+  # to the size of the computation in each map.
+  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
+  # improve the shape inference, because it makes the batch dimension static.
+  # It is safe to do this because in that case we are repeating the input
+  # indefinitely, and all batches will be full-sized.
+  dataset = dataset.batch(batch_size=batch_size,
+                          drop_remainder=num_epochs is None)
+  dataset = dataset_ops.MapDataset(
+      dataset, map_fn, use_inter_op_parallelism=False)
+  dataset = dataset.prefetch(prefetch_buffer_size)
+
+  return dataset
+
+
+_DEFAULT_READER_BUFFER_SIZE_BYTES = 4 * 1024 * 1024  # 4 MB
+
+
+@tf_export("data.experimental.CsvDataset")
+class CsvDataset(dataset_ops.DatasetSource):
+  """A Dataset comprising lines from one or more CSV files."""
+
+  def __init__(self,
+               filenames,
+               record_defaults,
+               compression_type=None,
+               buffer_size=None,
+               header=False,
+               field_delim=",",
+               use_quote_delim=True,
+               na_value="",
+               select_cols=None):
+    """Creates a `CsvDataset` by reading and decoding CSV files.
+
+    The elements of this dataset correspond to records from the file(s).
+    RFC 4180 format is expected for CSV files
+    (https://tools.ietf.org/html/rfc4180)
+    Note that we allow leading and trailing spaces with int or float field.
+
+
+    For example, suppose we have a file 'my_file0.csv' with four CSV columns of
+    different data types:
+    ```
+    abcdefg,4.28E10,5.55E6,12
+    hijklmn,-5.3E14,,2
+    ```
+
+    We can construct a CsvDataset from it as follows:
+    ```python
+    dataset = tf.data.experimental.CsvDataset(
+        "my_file*.csv",
+        [tf.float32,  # Required field, use dtype or empty tensor
+         tf.constant([0.0], dtype=tf.float32),  # Optional field, default to 0.0
+         tf.int32,  # Required field, use dtype or empty tensor
+         ],
+        select_cols=[1,2,3]  # Only parse last three columns
+    )
+    ```
+
+    The expected output of its iterations is:
+    ```python
+    next_element = dataset.make_one_shot_iterator().get_next()
+    with tf.Session() as sess:
+      while True:
+        try:
+          print(sess.run(next_element))
+        except tf.errors.OutOfRangeError:
+          break
+
+    >> (4.28e10, 5.55e6, 12)
+    >> (-5.3e14, 0.0, 2)
+    ```
+
+    Args:
+      filenames: A `tf.string` tensor containing one or more filenames.
+      record_defaults: A list of default values for the CSV fields. Each item in
+        the list is either a valid CSV `DType` (float32, float64, int32, int64,
+        string), or a `Tensor` object with one of the above types. One per
+        column of CSV data, with either a scalar `Tensor` default value for the
+        column if it is optional, or `DType` or empty `Tensor` if required. If
+        both this and `select_columns` are specified, these must have the same
+        lengths, and `column_defaults` is assumed to be sorted in order of
+        increasing column index.
+      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
+        `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no
+        compression.
+      buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes
+        to buffer while reading files. Defaults to 4MB.
+      header: (Optional.) A `tf.bool` scalar indicating whether the CSV file(s)
+        have header line(s) that should be skipped when parsing. Defaults to
+        `False`.
+      field_delim: (Optional.) A `tf.string` scalar containing the delimiter
+        character that separates fields in a record. Defaults to `","`.
+      use_quote_delim: (Optional.) A `tf.bool` scalar. If `False`, treats
+        double quotation marks as regular characters inside of string fields
+        (ignoring RFC 4180, Section 2, Bullet 5). Defaults to `True`.
+      na_value: (Optional.) A `tf.string` scalar indicating a value that will
+        be treated as NA/NaN.
+      select_cols: (Optional.) A sorted list of column indices to select from
+        the input data. If specified, only this subset of columns will be
+        parsed. Defaults to parsing all columns.
+    """
+    super(CsvDataset, self).__init__()
+    self._filenames = ops.convert_to_tensor(
+        filenames, dtype=dtypes.string, name="filenames")
+    self._compression_type = convert.optional_param_to_tensor(
+        "compression_type",
+        compression_type,
+        argument_default="",
+        argument_dtype=dtypes.string)
+    record_defaults = [
+        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
+        for x in record_defaults
+    ]
+    self._record_defaults = ops.convert_n_to_tensor(
+        record_defaults, name="record_defaults")
+    self._buffer_size = convert.optional_param_to_tensor(
+        "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
+    self._header = ops.convert_to_tensor(
+        header, dtype=dtypes.bool, name="header")
+    self._field_delim = ops.convert_to_tensor(
+        field_delim, dtype=dtypes.string, name="field_delim")
+    self._use_quote_delim = ops.convert_to_tensor(
+        use_quote_delim, dtype=dtypes.bool, name="use_quote_delim")
+    self._na_value = ops.convert_to_tensor(
+        na_value, dtype=dtypes.string, name="na_value")
+    self._select_cols = convert.optional_param_to_tensor(
+        "select_cols",
+        select_cols,
+        argument_default=[],
+        argument_dtype=dtypes.int64,
+    )
+    self._output_shapes = tuple(
+        tensor_shape.scalar() for _ in range(len(record_defaults)))
+    self._output_types = tuple(d.dtype for d in self._record_defaults)
+    self._output_classes = tuple(
+        ops.Tensor for _ in range(len(record_defaults)))
+
+  def _as_variant_tensor(self):
+    # Constructs graph node for the dataset op.
+    return gen_experimental_dataset_ops.experimental_csv_dataset(
+        filenames=self._filenames,
+        record_defaults=self._record_defaults,
+        buffer_size=self._buffer_size,
+        header=self._header,
+        output_shapes=self._output_shapes,
+        field_delim=self._field_delim,
+        use_quote_delim=self._use_quote_delim,
+        na_value=self._na_value,
+        select_cols=self._select_cols,
+        compression_type=self._compression_type,
+    )
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+
+@tf_export("data.experimental.make_batched_features_dataset")
+def make_batched_features_dataset(file_pattern,
+                                  batch_size,
+                                  features,
+                                  reader=core_readers.TFRecordDataset,
+                                  label_key=None,
+                                  reader_args=None,
+                                  num_epochs=None,
+                                  shuffle=True,
+                                  shuffle_buffer_size=10000,
+                                  shuffle_seed=None,
+                                  prefetch_buffer_size=optimization.AUTOTUNE,
+                                  reader_num_threads=1,
+                                  parser_num_threads=2,
+                                  sloppy_ordering=False,
+                                  drop_final_batch=False):
+  """Returns a `Dataset` of feature dictionaries from `Example` protos.
+
+  If label_key argument is provided, returns a `Dataset` of tuple
+  comprising of feature dictionaries and label.
+
+  Example:
+
+  ```
+  serialized_examples = [
+    features {
+      feature { key: "age" value { int64_list { value: [ 0 ] } } }
+      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
+      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
+    },
+    features {
+      feature { key: "age" value { int64_list { value: [] } } }
+      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
+      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
+    }
+  ]
+  ```
+
+  We can use arguments:
+
+  ```
+  features: {
+    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
+    "gender": FixedLenFeature([], dtype=tf.string),
+    "kws": VarLenFeature(dtype=tf.string),
+  }
+  ```
+
+  And the expected output is:
+
+  ```python
+  {
+    "age": [[0], [-1]],
+    "gender": [["f"], ["f"]],
+    "kws": SparseTensor(
+      indices=[[0, 0], [0, 1], [1, 0]],
+      values=["code", "art", "sports"]
+      dense_shape=[2, 2]),
+  }
+  ```
+
+  Args:
+    file_pattern: List of files or patterns of file paths containing
+      `Example` records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int representing the number of records to combine
+      in a single batch.
+    features: A `dict` mapping feature keys to `FixedLenFeature` or
+      `VarLenFeature` values. See `tf.parse_example`.
+    reader: A function or class that can be
+      called with a `filenames` tensor and (optional) `reader_args` and returns
+      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
+    label_key: (Optional) A string corresponding to the key labels are stored in
+      `tf.Examples`. If provided, it must be one of the `features` key,
+      otherwise results in `ValueError`.
+    reader_args: Additional arguments to pass to the reader class.
+    num_epochs: Integer specifying the number of times to read through the
+      dataset. If None, cycles through the dataset forever. Defaults to `None`.
+    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
+      to `True`.
+    shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
+      ensures better shuffling but would increase memory usage and startup time.
+    shuffle_seed: Randomization seed to use for shuffling.
+    prefetch_buffer_size: Number of feature batches to prefetch in order to
+      improve performance. Recommended value is the number of batches consumed
+      per training step. Defaults to auto-tune.
+    reader_num_threads: Number of threads used to read `Example` records. If >1,
+      the results will be interleaved.
+    parser_num_threads: Number of threads to use for parsing `Example` tensors
+      into a dictionary of `Feature` tensors.
+    sloppy_ordering: If `True`, reading performance will be improved at
+      the cost of non-deterministic ordering. If `False`, the order of elements
+      produced is deterministic prior to shuffling (elements are still
+      randomized if `shuffle=True`. Note that if the seed is set, then order
+      of elements after shuffling is deterministic). Defaults to `False`.
+    drop_final_batch: If `True`, and the batch size does not evenly divide the
+      input dataset size, the final smaller batch will be dropped. Defaults to
+      `False`.
+
+  Returns:
+    A dataset of `dict` elements, (or a tuple of `dict` elements and label).
+    Each `dict` maps feature keys to `Tensor` or `SparseTensor` objects.
+
+  Raises:
+    ValueError: If `label_key` is not one of the `features` keys.
+  """
+  # Create dataset of all matching filenames
+  filenames = _get_file_names(file_pattern, False)
+  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
+  if shuffle:
+    dataset = dataset.shuffle(len(filenames), shuffle_seed)
+
+  # Read `Example` records from files as tensor objects.
+  if reader_args is None:
+    reader_args = []
+
+  # Read files sequentially (if reader_num_threads=1) or in parallel
+  dataset = dataset.apply(
+      interleave_ops.parallel_interleave(
+          lambda filename: reader(filename, *reader_args),
+          cycle_length=reader_num_threads,
+          sloppy=sloppy_ordering))
+
+  # Extract values if the `Example` tensors are stored as key-value tuples.
+  if dataset.output_types == (dtypes.string, dtypes.string):
+    dataset = dataset_ops.MapDataset(
+        dataset, lambda _, v: v, use_inter_op_parallelism=False)
+
+  # Apply dataset repeat and shuffle transformations.
+  dataset = _maybe_shuffle_and_repeat(
+      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
+
+  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
+  # improve the shape inference, because it makes the batch dimension static.
+  # It is safe to do this because in that case we are repeating the input
+  # indefinitely, and all batches will be full-sized.
+  dataset = dataset.batch(
+      batch_size, drop_remainder=drop_final_batch or num_epochs is None)
+
+  # Parse `Example` tensors to a dictionary of `Feature` tensors.
+  dataset = dataset.apply(
+      parsing_ops.parse_example_dataset(
+          features, num_parallel_calls=parser_num_threads))
+
+  if label_key:
+    if label_key not in features:
+      raise ValueError(
+          "The `label_key` provided (%r) must be one of the `features` keys." %
+          label_key)
+    dataset = dataset.map(lambda x: (x, x.pop(label_key)))
+
+  dataset = dataset.prefetch(prefetch_buffer_size)
+  return dataset
+
+
+def _get_file_names(file_pattern, shuffle):
+  """Parse list of file names from pattern, optionally shuffled.
+
+  Args:
+    file_pattern: File glob pattern, or list of glob patterns.
+    shuffle: Whether to shuffle the order of file names.
+
+  Returns:
+    List of file names matching `file_pattern`.
+
+  Raises:
+    ValueError: If `file_pattern` is empty, or pattern matches no files.
+  """
+  if isinstance(file_pattern, list):
+    if not file_pattern:
+      raise ValueError("File pattern is empty.")
+    file_names = []
+    for entry in file_pattern:
+      file_names.extend(gfile.Glob(entry))
+  else:
+    file_names = list(gfile.Glob(file_pattern))
+
+  if not file_names:
+    raise ValueError("No files match %s." % file_pattern)
+
+  # Sort files so it will be deterministic for unit tests.
+  if not shuffle:
+    file_names = sorted(file_names)
+  return file_names
+
+
+@tf_export("data.experimental.SqlDataset")
+class SqlDataset(dataset_ops.DatasetSource):
+  """A `Dataset` consisting of the results from a SQL query."""
+
+  def __init__(self, driver_name, data_source_name, query, output_types):
+    """Creates a `SqlDataset`.
+
+    `SqlDataset` allows a user to read data from the result set of a SQL query.
+    For example:
+
+    ```python
+    dataset = tf.data.experimental.SqlDataset("sqlite", "/foo/bar.sqlite3",
+                                              "SELECT name, age FROM people",
+                                              (tf.string, tf.int32))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+    # Prints the rows of the result set of the above query.
+    while True:
+      try:
+        print(sess.run(next_element))
+      except tf.errors.OutOfRangeError:
+        break
+    ```
+
+    Args:
+      driver_name: A 0-D `tf.string` tensor containing the database type.
+        Currently, the only supported value is 'sqlite'.
+      data_source_name: A 0-D `tf.string` tensor containing a connection string
+        to connect to the database.
+      query: A 0-D `tf.string` tensor containing the SQL query to execute.
+      output_types: A tuple of `tf.DType` objects representing the types of the
+        columns returned by `query`.
+    """
+    super(SqlDataset, self).__init__()
+    self._driver_name = ops.convert_to_tensor(
+        driver_name, dtype=dtypes.string, name="driver_name")
+    self._data_source_name = ops.convert_to_tensor(
+        data_source_name, dtype=dtypes.string, name="data_source_name")
+    self._query = ops.convert_to_tensor(
+        query, dtype=dtypes.string, name="query")
+    self._output_types = output_types
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.sql_dataset(self._driver_name,
+                                       self._data_source_name, self._query,
+                                       nest.flatten(self.output_types),
+                                       nest.flatten(self.output_shapes))
+
+  @property
+  def output_classes(self):
+    return nest.map_structure(lambda _: ops.Tensor, self._output_types)
+
+  @property
+  def output_shapes(self):
+    return nest.map_structure(lambda _: tensor_shape.TensorShape([]),
+                              self._output_types)
+
+  @property
+  def output_types(self):
+    return self._output_types
diff --git a/tensorflow/python/data/experimental/ops/resampling.py b/tensorflow/python/data/experimental/ops/resampling.py
new file mode 100644
index 0000000000..3a3040ae9a
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/resampling.py
@@ -0,0 +1,296 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resampling dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import scan_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.rejection_resample")
+def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
+  """A transformation that resamples a dataset to achieve a target distribution.
+
+  **NOTE** Resampling is performed via rejection sampling; some fraction
+  of the input values will be dropped.
+
+  Args:
+    class_func: A function mapping an element of the input dataset to a scalar
+      `tf.int32` tensor. Values should be in `[0, num_classes)`.
+    target_dist: A floating point type tensor, shaped `[num_classes]`.
+    initial_dist: (Optional.)  A floating point type tensor, shaped
+      `[num_classes]`.  If not provided, the true class distribution is
+      estimated live in a streaming fashion.
+    seed: (Optional.) Python integer seed for the resampler.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist")
+    class_values_ds = dataset.map(class_func)
+
+    # Get initial distribution.
+    if initial_dist is not None:
+      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
+      acceptance_dist, prob_of_original = (
+          _calculate_acceptance_probs_with_mixing(initial_dist_t,
+                                                  target_dist_t))
+      initial_dist_ds = dataset_ops.Dataset.from_tensors(
+          initial_dist_t).repeat()
+      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
+          acceptance_dist).repeat()
+      prob_of_original_ds = dataset_ops.Dataset.from_tensors(
+          prob_of_original).repeat()
+    else:
+      initial_dist_ds = _estimate_initial_dist_ds(
+          target_dist_t, class_values_ds)
+      acceptance_and_original_prob_ds = initial_dist_ds.map(
+          lambda initial: _calculate_acceptance_probs_with_mixing(  # pylint: disable=g-long-lambda
+              initial, target_dist_t))
+      acceptance_dist_ds = acceptance_and_original_prob_ds.map(
+          lambda accept_prob, _: accept_prob)
+      prob_of_original_ds = acceptance_and_original_prob_ds.map(
+          lambda _, prob_original: prob_original)
+    filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds,
+                             class_values_ds, seed)
+    # Prefetch filtered dataset for speed.
+    filtered_ds = filtered_ds.prefetch(3)
+
+    prob_original_static = _get_prob_original_static(
+        initial_dist_t, target_dist_t) if initial_dist is not None else None
+    if prob_original_static == 1:
+      return dataset_ops.Dataset.zip((class_values_ds, dataset))
+    elif prob_original_static == 0:
+      return filtered_ds
+    else:
+      return interleave_ops.sample_from_datasets(
+          [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds],
+          weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]),
+          seed=seed)
+
+  return _apply_fn
+
+
+def _get_prob_original_static(initial_dist_t, target_dist_t):
+  """Returns the static probability of sampling from the original.
+
+  `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters
+  an Op that it isn't defined for. We have some custom logic to avoid this.
+
+  Args:
+    initial_dist_t: A tensor of the initial distribution.
+    target_dist_t: A tensor of the target distribution.
+
+  Returns:
+    The probability of sampling from the original distribution as a constant,
+    if it is a constant, or `None`.
+  """
+  init_static = tensor_util.constant_value(initial_dist_t)
+  target_static = tensor_util.constant_value(target_dist_t)
+
+  if init_static is None or target_static is None:
+    return None
+  else:
+    return np.min(target_static / init_static)
+
+
+def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds,
+               seed):
+  """Filters a dataset based on per-class acceptance probabilities.
+
+  Args:
+    dataset: The dataset to be filtered.
+    acceptance_dist_ds: A dataset of acceptance probabilities.
+    initial_dist_ds: A dataset of the initial probability distribution, given or
+        estimated.
+    class_values_ds: A dataset of the corresponding classes.
+    seed: (Optional.) Python integer seed for the resampler.
+
+  Returns:
+    A dataset of (class value, data) after filtering.
+  """
+  def maybe_warn_on_large_rejection(accept_dist, initial_dist):
+    proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist)
+    return control_flow_ops.cond(
+        math_ops.less(proportion_rejected, .5),
+        lambda: accept_dist,
+        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
+            accept_dist, [proportion_rejected, initial_dist, accept_dist],
+            message="Proportion of examples rejected by sampler is high: ",
+            summarize=100,
+            first_n=10))
+
+  acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
+                                                 initial_dist_ds))
+                        .map(maybe_warn_on_large_rejection))
+
+  def _gather_and_copy(class_val, acceptance_prob, data):
+    return class_val, array_ops.gather(acceptance_prob, class_val), data
+
+  current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
+      (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
+  filtered_ds = (
+      current_probabilities_and_class_and_data_ds
+      .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
+  return filtered_ds.map(lambda class_value, _, data: (class_value, data))
+
+
+def _estimate_initial_dist_ds(
+    target_dist_t, class_values_ds, dist_estimation_batch_size=32,
+    smoothing_constant=10):
+  num_classes = (target_dist_t.shape[0].value or
+                 array_ops.shape(target_dist_t)[0])
+  initial_examples_per_class_seen = array_ops.fill(
+      [num_classes], np.int64(smoothing_constant))
+
+  def update_estimate_and_tile(num_examples_per_class_seen, c):
+    updated_examples_per_class_seen, dist = _estimate_data_distribution(
+        c, num_examples_per_class_seen)
+    tiled_dist = array_ops.tile(
+        array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
+    return updated_examples_per_class_seen, tiled_dist
+
+  initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
+                     .apply(scan_ops.scan(initial_examples_per_class_seen,
+                                          update_estimate_and_tile))
+                     .apply(batching.unbatch()))
+
+  return initial_dist_ds
+
+
+def _get_target_to_initial_ratio(initial_probs, target_probs):
+  # Add tiny to initial_probs to avoid divide by zero.
+  denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny)
+  return target_probs / denom
+
+
+def _estimate_data_distribution(c, num_examples_per_class_seen):
+  """Estimate data distribution as labels are seen.
+
+  Args:
+    c: The class labels.  Type `int32`, shape `[batch_size]`.
+    num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
+      containing counts.
+
+  Returns:
+    num_examples_per_lass_seen: Updated counts.  Type `int64`, shape
+      `[num_classes]`.
+    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
+  """
+  num_classes = num_examples_per_class_seen.get_shape()[0].value
+  # Update the class-count based on what labels are seen in batch.
+  num_examples_per_class_seen = math_ops.add(
+      num_examples_per_class_seen, math_ops.reduce_sum(
+          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
+  init_prob_estimate = math_ops.truediv(
+      num_examples_per_class_seen,
+      math_ops.reduce_sum(num_examples_per_class_seen))
+  dist = math_ops.cast(init_prob_estimate, dtypes.float32)
+  return num_examples_per_class_seen, dist
+
+
+def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs):
+  """Calculates the acceptance probabilities and mixing ratio.
+
+  In this case, we assume that we can *either* sample from the original data
+  distribution with probability `m`, or sample from a reshaped distribution
+  that comes from rejection sampling on the original distribution. This
+  rejection sampling is done on a per-class basis, with `a_i` representing the
+  probability of accepting data from class `i`.
+
+  This method is based on solving the following analysis for the reshaped
+  distribution:
+
+  Let F be the probability of a rejection (on any example).
+  Let p_i be the proportion of examples in the data in class i (init_probs)
+  Let a_i is the rate the rejection sampler should *accept* class i
+  Let t_i is the target proportion in the minibatches for class i (target_probs)
+
+  ```
+  F = sum_i(p_i * (1-a_i))
+    = 1 - sum_i(p_i * a_i)     using sum_i(p_i) = 1
+  ```
+
+  An example with class `i` will be accepted if `k` rejections occur, then an
+  example with class `i` is seen by the rejector, and it is accepted. This can
+  be written as follows:
+
+  ```
+  t_i = sum_k=0^inf(F^k * p_i * a_i)
+      = p_i * a_j / (1 - F)    using geometric series identity, since 0 <= F < 1
+      = p_i * a_i / sum_j(p_j * a_j)        using F from above
+  ```
+
+  Note that the following constraints hold:
+  ```
+  0 <= p_i <= 1, sum_i(p_i) = 1
+  0 <= a_i <= 1
+  0 <= t_i <= 1, sum_i(t_i) = 1
+  ```
+
+  A solution for a_i in terms of the other variables is the following:
+    ```a_i = (t_i / p_i) / max_i[t_i / p_i]```
+
+  If we try to minimize the amount of data rejected, we get the following:
+
+  M_max = max_i [ t_i / p_i ]
+  M_min = min_i [ t_i / p_i ]
+
+  The desired probability of accepting data if it comes from class `i`:
+
+  a_i = (t_i/p_i - m) / (M_max - m)
+
+  The desired probability of pulling a data element from the original dataset,
+  rather than the filtered one:
+
+  m = M_min
+
+  Args:
+    initial_probs: A Tensor of the initial probability distribution, given or
+      estimated.
+    target_probs: A Tensor of the corresponding classes.
+
+  Returns:
+    (A 1D Tensor with the per-class acceptance probabilities, the desired
+    probability of pull from the original distribution.)
+  """
+  ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs)
+  max_ratio = math_ops.reduce_max(ratio_l)
+  min_ratio = math_ops.reduce_min(ratio_l)
+
+  # Target prob to sample from original distribution.
+  m = min_ratio
+
+  # TODO(joelshor): Simplify fraction, if possible.
+  a_i = (ratio_l - m) / (max_ratio - m)
+  return a_i, m
diff --git a/tensorflow/python/data/experimental/ops/scan_ops.py b/tensorflow/python/data/experimental/ops/scan_ops.py
new file mode 100644
index 0000000000..e05e7c5a18
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/scan_ops.py
@@ -0,0 +1,177 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Scan dataset transformation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+class _ScanDataset(dataset_ops.UnaryDataset):
+  """A dataset that scans a function across its input."""
+
+  def __init__(self, input_dataset, initial_state, scan_func):
+    """See `scan()` for details."""
+    super(_ScanDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+    with ops.name_scope("initial_state"):
+      # Convert any `SparseTensorValue`s to `SparseTensor`s and all other
+      # values to tensors.
+      self._initial_state = nest.pack_sequence_as(initial_state, [
+          sparse_tensor.SparseTensor.from_value(t)
+          if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(
+              t, name="component_%d" % i)
+          for i, t in enumerate(nest.flatten(initial_state))
+      ])
+
+    # Compute initial values for the state classes, shapes and types based on
+    # the initial state. The shapes may be refined by running `tf_scan_func` one
+    # or more times below.
+    self._state_classes = sparse.get_classes(self._initial_state)
+    self._state_shapes = nest.pack_sequence_as(
+        self._initial_state,
+        [t.get_shape() for t in nest.flatten(self._initial_state)])
+    self._state_types = nest.pack_sequence_as(
+        self._initial_state,
+        [t.dtype for t in nest.flatten(self._initial_state)])
+
+    # Will be populated by calling `tf_scan_func`.
+    self._output_classes = None
+    self._output_shapes = None
+    self._output_types = None
+
+    # Iteratively rerun the scan function until reaching a fixed point on
+    # `self._state_shapes`.
+    need_to_rerun = True
+    while need_to_rerun:
+
+      wrapped_func = dataset_ops.StructuredFunctionWrapper(
+          scan_func,
+          "tf.data.experimental.scan()",
+          input_classes=(self._state_classes, input_dataset.output_classes),
+          input_shapes=(self._state_shapes, input_dataset.output_shapes),
+          input_types=(self._state_types, input_dataset.output_types),
+          add_to_graph=False)
+      if not (
+          isinstance(wrapped_func.output_types, collections.Sequence) and
+          len(wrapped_func.output_types) == 2):
+        raise TypeError("The scan function must return a pair comprising the "
+                        "new state and the output value.")
+
+      new_state_classes, self._output_classes = wrapped_func.output_classes
+
+      # Extract and validate class information from the returned values.
+      for new_state_class, state_class in zip(
+          nest.flatten(new_state_classes),
+          nest.flatten(self._state_classes)):
+        if not issubclass(new_state_class, state_class):
+          raise TypeError(
+              "The element classes for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_classes, new_state_classes))
+
+      # Extract and validate type information from the returned values.
+      new_state_types, self._output_types = wrapped_func.output_types
+      for new_state_type, state_type in zip(
+          nest.flatten(new_state_types), nest.flatten(self._state_types)):
+        if new_state_type != state_type:
+          raise TypeError(
+              "The element types for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_types, new_state_types))
+
+      # Extract shape information from the returned values.
+      new_state_shapes, self._output_shapes = wrapped_func.output_shapes
+
+      flat_state_shapes = nest.flatten(self._state_shapes)
+      flat_new_state_shapes = nest.flatten(new_state_shapes)
+      weakened_state_shapes = [
+          original.most_specific_compatible_shape(new)
+          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
+      ]
+
+      need_to_rerun = False
+      for original_shape, weakened_shape in zip(flat_state_shapes,
+                                                weakened_state_shapes):
+        if original_shape.ndims is not None and (
+            weakened_shape.ndims is None or
+            original_shape.as_list() != weakened_shape.as_list()):
+          need_to_rerun = True
+          break
+
+      if need_to_rerun:
+        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
+                                                   weakened_state_shapes)
+
+    self._scan_func = wrapped_func.function
+    self._scan_func.add_to_graph(ops.get_default_graph())
+
+  def _as_variant_tensor(self):
+    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+    return gen_dataset_ops.scan_dataset(
+        input_t,
+        nest.flatten(sparse.serialize_sparse_tensors(self._initial_state)),
+        self._scan_func.captured_inputs,
+        f=self._scan_func,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+
+@tf_export("data.experimental.scan")
+def scan(initial_state, scan_func):
+  """A transformation that scans a function across an input dataset.
+
+  This transformation is a stateful relative of `tf.data.Dataset.map`.
+  In addition to mapping `scan_func` across the elements of the input dataset,
+  `scan()` accumulates one or more state tensors, whose initial values are
+  `initial_state`.
+
+  Args:
+    initial_state: A nested structure of tensors, representing the initial state
+      of the accumulator.
+    scan_func: A function that maps `(old_state, input_element)` to
+      `(new_state, output_element). It must take two arguments and return a
+      pair of nested structures of tensors. The `new_state` must match the
+      structure of `initial_state`.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    return _ScanDataset(dataset, initial_state, scan_func)
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py
new file mode 100644
index 0000000000..a4307212da
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@@ -0,0 +1,102 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental shuffle ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import random_seed
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that fuses `shuffle` and `repeat`."""
+
+  def __init__(self, input_dataset, buffer_size, count=None, seed=None):
+    super(_ShuffleAndRepeatDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._buffer_size = ops.convert_to_tensor(
+        buffer_size, dtype=dtypes.int64, name="buffer_size")
+    if count is None:
+      self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
+    else:
+      self._count = ops.convert_to_tensor(
+          count, dtype=dtypes.int64, name="count")
+    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    input_resource = self._input_dataset._as_variant_tensor()
+    return gen_dataset_ops.shuffle_and_repeat_dataset(
+        input_resource,
+        buffer_size=self._buffer_size,
+        count=self._count,
+        seed=self._seed,
+        seed2=self._seed2,
+        **dataset_ops.flat_structure(self))
+    # pylint: enable=protected-access
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+@tf_export("data.experimental.shuffle_and_repeat")
+def shuffle_and_repeat(buffer_size, count=None, seed=None):
+  """Shuffles and repeats a Dataset returning a new permutation for each epoch.
+
+  `dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size, count))`
+
+  is equivalent to
+
+  `dataset.shuffle(buffer_size, reshuffle_each_iteration=True).repeat(count)`
+
+  The difference is that the latter dataset is not serializable. So,
+  if you need to checkpoint an input pipeline with reshuffling you must use
+  this implementation.
+
+  Args:
+    buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
+      maximum number elements that will be buffered when prefetching.
+    count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      number of times the dataset should be repeated. The default behavior
+      (if `count` is `None` or `-1`) is for the dataset be repeated
+      indefinitely.
+    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      random seed that will be used to create the distribution. See
+      `tf.set_random_seed` for behavior.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):  # pylint: disable=missing-docstring
+    return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed)
+
+  return _apply_fn
diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/python/data/experimental/ops/stats_ops.py
similarity index 92%
rename from tensorflow/contrib/data/python/ops/stats_ops.py
rename to tensorflow/python/data/experimental/ops/stats_ops.py
index bc47c5989d..c918d223e8 100644
--- a/tensorflow/contrib/data/python/ops/stats_ops.py
+++ b/tensorflow/python/data/experimental/ops/stats_ops.py
@@ -21,8 +21,10 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export("data.experimental.StatsAggregator")
 class StatsAggregator(object):
   """A stateful resource that aggregates statistics from one or more iterators.
 
@@ -34,7 +36,7 @@ class StatsAggregator(object):
 
   ```python
   dataset = ...
-  dataset = dataset.apply(stats_ops.latency_stats("total_bytes"))
+  dataset = dataset.apply(tf.data.experimental.latency_stats("total_bytes"))
   ```
 
   To associate a `StatsAggregator` with a `tf.data.Dataset` object, use
@@ -46,7 +48,7 @@ class StatsAggregator(object):
 
   # Apply `set_stats_aggregator` to associate `dataset` with `stats_aggregator`.
   dataset = dataset.apply(
-      tf.contrib.data.set_stats_aggregator(stats_aggregator))
+      tf.data.experimental.set_stats_aggregator(stats_aggregator))
   iterator = dataset.make_one_shot_iterator()
   ```
 
@@ -111,11 +113,12 @@ class _SetStatsAggregatorDataset(dataset_ops.UnaryDataset):
     return self._input_dataset.output_classes
 
 
+@tf_export("data.experimental.set_stats_aggregator")
 def set_stats_aggregator(stats_aggregator):
   """Set the given `stats_aggregator` for aggregating the input dataset stats.
 
   Args:
-    stats_aggregator: A `tf.contrib.data.StatsAggregator` object.
+    stats_aggregator: A `tf.data.experimental.StatsAggregator` object.
 
   Returns:
     A `Dataset` transformation function, which can be passed to
@@ -128,8 +131,8 @@ def set_stats_aggregator(stats_aggregator):
   return _apply_fn
 
 
-# TODO(b/38416882): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
+# TODO(b/38416882): Properly export in the `tf.data.experimental` API when
+# stable or make private / remove.
 def bytes_produced_stats(tag):
   """Records the number of bytes produced by each element of the input dataset.
 
@@ -152,6 +155,7 @@ def bytes_produced_stats(tag):
   return _apply_fn
 
 
+@tf_export("data.experimental.latency_stats")
 def latency_stats(tag):
   """Records the latency of producing each element of the input dataset.
 
diff --git a/tensorflow/python/data/experimental/ops/threadpool.py b/tensorflow/python/data/experimental/ops/threadpool.py
new file mode 100644
index 0000000000..3ea017c6e8
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/threadpool.py
@@ -0,0 +1,104 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental API for controlling threading in `tf.data` pipelines."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import threading
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
+from tensorflow.python.ops import resource_variable_ops
+
+_uid_counter = 0
+_uid_lock = threading.Lock()
+
+
+def _generate_shared_name(prefix):
+  with _uid_lock:
+    global _uid_counter
+    uid = _uid_counter
+    _uid_counter += 1
+  return "{}{}".format(prefix, uid)
+
+
+# TODO(b/73383364): Properly export in the `tf.data.experimental` API when
+# stable or make private / remove.
+class PrivateThreadPool(object):
+  """A stateful resource that represents a private thread pool."""
+
+  def __init__(self, num_threads, display_name=None,
+               max_intra_op_parallelism=1):
+    """Creates a `PrivateThreadPool` with the given number of threads."""
+    if context.executing_eagerly():
+      shared_name = _generate_shared_name("privatethreadpool")
+      self._resource = ged_ops.experimental_thread_pool_handle(
+          num_threads=num_threads,
+          max_intra_op_parallelism=max_intra_op_parallelism,
+          display_name=display_name,
+          shared_name=shared_name)
+      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
+          handle=self._resource, handle_device=context.context().device_name)
+    else:
+      self._resource = ged_ops.experimental_thread_pool_handle(
+          num_threads=num_threads,
+          max_intra_op_parallelism=max_intra_op_parallelism,
+          display_name=display_name)
+
+
+class _ThreadPoolDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that acts as an identity, and sets a custom threadpool."""
+
+  def __init__(self, input_dataset, thread_pool):
+    super(_ThreadPoolDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._thread_pool = thread_pool
+
+  def _as_variant_tensor(self):
+    return ged_ops.experimental_thread_pool_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._thread_pool._resource,  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+
+# TODO(b/73383364): Properly export in the `tf.data.experimental` API when
+# stable or make private / remove.
+def override_threadpool(dataset, thread_pool):
+  """Returns a new dataset that uses the given thread pool for its operations.
+
+  Args:
+    dataset: A `tf.data.Dataset` object.
+    thread_pool: A `PrivateThreadPool` object.
+
+  Returns:
+    A dataset containing the same values as `dataset`, but which uses
+    `thread_pool` to compute any of its parallel operations (such as
+    `tf.data.Dataset.map`).
+  """
+  return _ThreadPoolDataset(dataset, thread_pool)
diff --git a/tensorflow/python/data/experimental/ops/unique.py b/tensorflow/python/data/experimental/ops/unique.py
new file mode 100644
index 0000000000..2a7775c456
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/unique.py
@@ -0,0 +1,79 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unique element dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.unique")
+def unique():
+  """Creates a `Dataset` from another `Dataset`, discarding duplicates.
+
+  Use this transformation to produce a dataset that contains one instance of
+  each unique element in the input. For example:
+
+  ```python
+  dataset = tf.data.Dataset.from_tensor_slices([1, 37, 2, 37, 2, 1])
+
+  # Using `unique()` will drop the duplicate elements.
+  dataset = dataset.apply(tf.data.experimental.unique())  # ==> { 1, 37, 2 }
+  ```
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _UniqueDataset(dataset)
+
+  return _apply_fn
+
+
+class _UniqueDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` contains the unique elements from its input."""
+
+  def __init__(self, input_dataset):
+    """See `unique()` for details."""
+    super(_UniqueDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    if input_dataset.output_types not in (dtypes.int32, dtypes.int64,
+                                          dtypes.string):
+      raise TypeError(
+          "`tf.data.experimental.unique()` only supports inputs with a single "
+          "`tf.int32`, `tf.int64`, or `tf.string` component.")
+
+  def _as_variant_tensor(self):
+    return gen_experimental_dataset_ops.experimental_unique_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
diff --git a/tensorflow/python/data/experimental/ops/writers.py b/tensorflow/python/data/experimental/ops/writers.py
new file mode 100644
index 0000000000..994447cb4d
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/writers.py
@@ -0,0 +1,60 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrappers for tf.data writers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import convert
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.TFRecordWriter")
+class TFRecordWriter(object):
+  """Writes data to a TFRecord file."""
+
+  def __init__(self, filename, compression_type=None):
+    self._filename = ops.convert_to_tensor(
+        filename, dtypes.string, name="filename")
+    self._compression_type = convert.optional_param_to_tensor(
+        "compression_type",
+        compression_type,
+        argument_default="",
+        argument_dtype=dtypes.string)
+
+  def write(self, dataset):
+    """Returns a `tf.Operation` to write a dataset to a file.
+
+    Args:
+      dataset: a `tf.data.Dataset` whose elements are to be written to a file
+
+    Returns:
+      A `tf.Operation` that, when run, writes contents of `dataset` to a file.
+    """
+    if not isinstance(dataset, dataset_ops.Dataset):
+      raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
+    if (dataset.output_types != dtypes.string or
+        dataset.output_shapes != tensor_shape.scalar()):
+      raise TypeError(
+          "`dataset` must produce scalar `DT_STRING` tensors whereas it "
+          "produces shape {0} and types {1}".format(dataset.output_shapes,
+                                                    dataset.output_types))
+    return gen_dataset_ops.dataset_to_tf_record(
+        dataset._as_variant_tensor(), self._filename, self._compression_type)  # pylint: disable=protected-access
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 6bba72a8e9..3b9d3a639d 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -889,8 +889,8 @@ class Dataset(object):
       will be padded out to the maximum length of all elements in that
       dimension.
 
-    See also `tf.contrib.data.dense_to_sparse_batch`, which combines elements
-    that may have different shapes into a `tf.SparseTensor`.
+    See also `tf.data.experimental.dense_to_sparse_batch`, which combines
+    elements that may have different shapes into a `tf.SparseTensor`.
 
     Args:
       batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
diff --git a/tensorflow/python/data/ops/optional_ops.py b/tensorflow/python/data/ops/optional_ops.py
index 3bbebd7878..aca989e03a 100644
--- a/tensorflow/python/data/ops/optional_ops.py
+++ b/tensorflow/python/data/ops/optional_ops.py
@@ -31,7 +31,7 @@ class Optional(object):
 
   An `Optional` can represent the result of an operation that may fail as a
   value, rather than raising an exception and halting execution. For example,
-  `tf.contrib.data.get_next_as_optional` returns an `Optional` that either
+  `tf.data.experimental.get_next_as_optional` returns an `Optional` that either
   contains the next value from a `tf.data.Iterator` if one exists, or a "none"
   value that indicates the end of the sequence has been reached.
   """
@@ -111,7 +111,7 @@ class Optional(object):
 
 
 class _OptionalImpl(Optional):
-  """Concrete implementation of `tf.contrib.data.Optional`.
+  """Concrete implementation of `tf.data.experimental.Optional`.
 
   NOTE(mrry): This implementation is kept private, to avoid defining
   `Optional.__init__()` in the public API.
diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py
index b0f26631f9..d08da6704c 100644
--- a/tensorflow/python/data/ops/readers.py
+++ b/tensorflow/python/data/ops/readers.py
@@ -129,7 +129,7 @@ class ParallelInterleaveDataset(dataset_ops.InterleaveDataset):
 
   def __init__(self, input_dataset, map_func, cycle_length, block_length,
                sloppy, buffer_output_elements, prefetch_input_elements):
-    """See `tf.contrib.data.parallel_interleave()` for details."""
+    """See `tf.data.experimental.parallel_interleave()` for details."""
     super(ParallelInterleaveDataset, self).__init__(input_dataset, map_func,
                                                     cycle_length, block_length)
     self._sloppy = ops.convert_to_tensor(
@@ -158,7 +158,7 @@ class ParallelInterleaveDataset(dataset_ops.InterleaveDataset):
     # pylint: enable=protected-access
 
   def _transformation_name(self):
-    return "tf.contrib.data.parallel_interleave()"
+    return "tf.data.experimental.parallel_interleave()"
 
 
 @tf_export("data.TFRecordDataset")
diff --git a/tensorflow/python/debug/examples/debug_tflearn_iris.py b/tensorflow/python/debug/examples/debug_tflearn_iris.py
index 019f13c450..f9bb3148fb 100644
--- a/tensorflow/python/debug/examples/debug_tflearn_iris.py
+++ b/tensorflow/python/debug/examples/debug_tflearn_iris.py
@@ -94,13 +94,15 @@ def main(_):
         "sepal_length", "sepal_width", "petal_length", "petal_width", "label"]
     batch_size = 32
     def training_input_fn():
-      return tf.contrib.data.make_csv_dataset(
-          [training_data_path], batch_size,
-          column_names=column_names, label_name="label")
+      return tf.data.experimental.make_csv_dataset([training_data_path],
+                                                   batch_size,
+                                                   column_names=column_names,
+                                                   label_name="label")
     def test_input_fn():
-      return tf.contrib.data.make_csv_dataset(
-          [test_data_path], batch_size,
-          column_names=column_names, label_name="label")
+      return tf.data.experimental.make_csv_dataset([test_data_path],
+                                                   batch_size,
+                                                   column_names=column_names,
+                                                   label_name="label")
     feature_columns = [tf.feature_column.numeric_column(feature)
                        for feature in column_names[:-1]]
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 5ce5410e0b..533a138a39 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -8,6 +8,7 @@ TENSORFLOW_API_INIT_FILES = [
     "bitwise/__init__.py",
     "compat/__init__.py",
     "data/__init__.py",
+    "data/experimental/__init__.py",
     "debugging/__init__.py",
     "distributions/__init__.py",
     "dtypes/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 587eb232f5..0747424eab 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -8,6 +8,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "bitwise/__init__.py",
     "compat/__init__.py",
     "data/__init__.py",
+    "data/experimental/__init__.py",
     "debugging/__init__.py",
     "distributions/__init__.py",
     "dtypes/__init__.py",
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
new file mode 100644
index 0000000000..03c16cda8b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.data.experimental.CheckpointInputPipelineHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.iterator_ops.CheckpointInputPipelineHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'estimator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..3eeaa1b185
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.CsvDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
new file mode 100644
index 0000000000..0c0405ee02
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.CsvDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.CsvDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filenames\', \'record_defaults\', \'compression_type\', \'buffer_size\', \'header\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \',\', \'True\', \'\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt
new file mode 100644
index 0000000000..b4c9459098
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt
@@ -0,0 +1,28 @@
+path: "tensorflow.data.experimental.Optional"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.optional_ops.Optional\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "value_structure"
+    mtype: "<class \'abc.abstractproperty\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "has_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "none_from_structure"
+    argspec: "args=[\'value_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..2991b12f64
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.RandomDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
new file mode 100644
index 0000000000..bce0be4b17
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.RandomDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.random_ops.RandomDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt
new file mode 100644
index 0000000000..6b477a8a72
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.data.experimental.Reducer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.grouping.Reducer\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "finalize_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "init_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "reduce_func"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'init_func\', \'reduce_func\', \'finalize_func\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..948e99ef86
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.SqlDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
new file mode 100644
index 0000000000..8aeae92d96
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.SqlDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.SqlDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'driver_name\', \'data_source_name\', \'query\', \'output_types\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt
new file mode 100644
index 0000000000..0bcc8cf3e8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.StatsAggregator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.stats_ops.StatsAggregator\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_summary"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6f9d18a701
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.writers.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filename\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
new file mode 100644
index 0000000000..b14585f8d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -0,0 +1,139 @@
+path: "tensorflow.data.experimental"
+tf_module {
+  member {
+    name: "CheckpointInputPipelineHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CsvDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Optional"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Reducer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SqlDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "StatsAggregator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "Counter"
+    argspec: "args=[\'start\', \'step\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'1\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "bucket_by_sequence_length"
+    argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "choose_from_datasets"
+    argspec: "args=[\'datasets\', \'choice_dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "copy_to_device"
+    argspec: "args=[\'target_device\', \'source_device\'], varargs=None, keywords=None, defaults=[\'/cpu:0\'], "
+  }
+  member_method {
+    name: "dense_to_sparse_batch"
+    argspec: "args=[\'batch_size\', \'row_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "enumerate_dataset"
+    argspec: "args=[\'start\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "get_next_as_optional"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_single_element"
+    argspec: "args=[\'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_reducer"
+    argspec: "args=[\'key_func\', \'reducer\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_window"
+    argspec: "args=[\'key_func\', \'reduce_func\', \'window_size\', \'window_size_func\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "ignore_errors"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "latency_stats"
+    argspec: "args=[\'tag\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_batched_features_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'features\', \'reader\', \'label_key\', \'reader_args\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'reader_num_threads\', \'parser_num_threads\', \'sloppy_ordering\', \'drop_final_batch\'], varargs=None, keywords=None, defaults=[\"<class \'tensorflow.python.data.ops.readers.TFRecordDataset\'>\", \'None\', \'None\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'2\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "make_csv_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\'], "
+  }
+  member_method {
+    name: "make_saveable_from_iterator"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map_and_batch"
+    argspec: "args=[\'map_func\', \'batch_size\', \'num_parallel_batches\', \'drop_remainder\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "parallel_interleave"
+    argspec: "args=[\'map_func\', \'cycle_length\', \'block_length\', \'sloppy\', \'buffer_output_elements\', \'prefetch_input_elements\'], varargs=None, keywords=None, defaults=[\'1\', \'False\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_example_dataset"
+    argspec: "args=[\'features\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "prefetch_to_device"
+    argspec: "args=[\'device\', \'buffer_size\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "rejection_resample"
+    argspec: "args=[\'class_func\', \'target_dist\', \'initial_dist\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "sample_from_datasets"
+    argspec: "args=[\'datasets\', \'weights\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "scan"
+    argspec: "args=[\'initial_state\', \'scan_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_stats_aggregator"
+    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle_and_repeat"
+    argspec: "args=[\'buffer_size\', \'count\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "unbatch"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "unique"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
index 56fb270a49..e205157523 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
@@ -20,4 +20,8 @@ tf_module {
     name: "TextLineDataset"
     mtype: "<class \'abc.ABCMeta\'>"
   }
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
new file mode 100644
index 0000000000..03c16cda8b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.data.experimental.CheckpointInputPipelineHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.iterator_ops.CheckpointInputPipelineHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'estimator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..3eeaa1b185
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.CsvDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
new file mode 100644
index 0000000000..0c0405ee02
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.CsvDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.CsvDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filenames\', \'record_defaults\', \'compression_type\', \'buffer_size\', \'header\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \',\', \'True\', \'\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt
new file mode 100644
index 0000000000..b4c9459098
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt
@@ -0,0 +1,28 @@
+path: "tensorflow.data.experimental.Optional"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.optional_ops.Optional\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "value_structure"
+    mtype: "<class \'abc.abstractproperty\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "has_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "none_from_structure"
+    argspec: "args=[\'value_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..2991b12f64
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.RandomDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
new file mode 100644
index 0000000000..bce0be4b17
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.RandomDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.random_ops.RandomDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt
new file mode 100644
index 0000000000..6b477a8a72
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.data.experimental.Reducer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.grouping.Reducer\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "finalize_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "init_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "reduce_func"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'init_func\', \'reduce_func\', \'finalize_func\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..948e99ef86
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.SqlDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
new file mode 100644
index 0000000000..8aeae92d96
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.SqlDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.SqlDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'driver_name\', \'data_source_name\', \'query\', \'output_types\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt
new file mode 100644
index 0000000000..0bcc8cf3e8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.StatsAggregator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.stats_ops.StatsAggregator\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_summary"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6f9d18a701
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.writers.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filename\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
new file mode 100644
index 0000000000..b14585f8d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -0,0 +1,139 @@
+path: "tensorflow.data.experimental"
+tf_module {
+  member {
+    name: "CheckpointInputPipelineHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CsvDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Optional"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Reducer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SqlDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "StatsAggregator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "Counter"
+    argspec: "args=[\'start\', \'step\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'1\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "bucket_by_sequence_length"
+    argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "choose_from_datasets"
+    argspec: "args=[\'datasets\', \'choice_dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "copy_to_device"
+    argspec: "args=[\'target_device\', \'source_device\'], varargs=None, keywords=None, defaults=[\'/cpu:0\'], "
+  }
+  member_method {
+    name: "dense_to_sparse_batch"
+    argspec: "args=[\'batch_size\', \'row_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "enumerate_dataset"
+    argspec: "args=[\'start\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "get_next_as_optional"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_single_element"
+    argspec: "args=[\'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_reducer"
+    argspec: "args=[\'key_func\', \'reducer\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_window"
+    argspec: "args=[\'key_func\', \'reduce_func\', \'window_size\', \'window_size_func\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "ignore_errors"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "latency_stats"
+    argspec: "args=[\'tag\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_batched_features_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'features\', \'reader\', \'label_key\', \'reader_args\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'reader_num_threads\', \'parser_num_threads\', \'sloppy_ordering\', \'drop_final_batch\'], varargs=None, keywords=None, defaults=[\"<class \'tensorflow.python.data.ops.readers.TFRecordDataset\'>\", \'None\', \'None\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'2\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "make_csv_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\'], "
+  }
+  member_method {
+    name: "make_saveable_from_iterator"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map_and_batch"
+    argspec: "args=[\'map_func\', \'batch_size\', \'num_parallel_batches\', \'drop_remainder\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "parallel_interleave"
+    argspec: "args=[\'map_func\', \'cycle_length\', \'block_length\', \'sloppy\', \'buffer_output_elements\', \'prefetch_input_elements\'], varargs=None, keywords=None, defaults=[\'1\', \'False\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_example_dataset"
+    argspec: "args=[\'features\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "prefetch_to_device"
+    argspec: "args=[\'device\', \'buffer_size\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "rejection_resample"
+    argspec: "args=[\'class_func\', \'target_dist\', \'initial_dist\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "sample_from_datasets"
+    argspec: "args=[\'datasets\', \'weights\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "scan"
+    argspec: "args=[\'initial_state\', \'scan_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_stats_aggregator"
+    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle_and_repeat"
+    argspec: "args=[\'buffer_size\', \'count\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "unbatch"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "unique"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
index 56fb270a49..e205157523 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
@@ -20,4 +20,8 @@ tf_module {
     name: "TextLineDataset"
     mtype: "<class \'abc.ABCMeta\'>"
   }
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
 }
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 3a1c4a45d4..164b3d8303 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -64,8 +64,6 @@ COMMON_PIP_DEPS = [
     "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
     "//tensorflow/contrib/compiler:xla",
     "//tensorflow/contrib/constrained_optimization:constrained_optimization_pip",
-    "//tensorflow/contrib/data/python/kernel_tests/serialization:dataset_serialization_test_base",
-    "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base",
     "//tensorflow/contrib/eager/python/examples:examples_pip",
     "//tensorflow/contrib/eager/python:evaluator",
     "//tensorflow/contrib/gan:gan",
@@ -106,6 +104,8 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python:meta_graph_testdata",
     "//tensorflow/python:spectral_ops_test_util",
     "//tensorflow/python:util_example_parser_configuration",
+    "//tensorflow/python/data/experimental/kernel_tests/serialization:dataset_serialization_test_base",
+    "//tensorflow/python/data/experimental/kernel_tests:stats_dataset_test_base",
     "//tensorflow/python/data/kernel_tests:test_base",
     "//tensorflow/python/debug:debug_pip",
     "//tensorflow/python/eager:eager_pip",
-- 
GitLab


From 80f8931682aeaae89786f0940892a6557b4cfd67 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 17:05:45 -0700
Subject: [PATCH 0985/1357] Mark bfloat16 as supported for
 ExponentialMovingAverage.

PiperOrigin-RevId: 215307701
---
 tensorflow/python/training/moving_averages.py |  9 ++++---
 .../python/training/moving_averages_test.py   | 27 +++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py
index 177a7ddfa5..041266da3e 100644
--- a/tensorflow/python/training/moving_averages.py
+++ b/tensorflow/python/training/moving_averages.py
@@ -372,13 +372,13 @@ class ExponentialMovingAverage(object):
 
     Args:
       var_list: A list of Variable or Tensor objects. The variables
-        and Tensors must be of types float16, float32, or float64.
+        and Tensors must be of types bfloat16, float16, float32, or float64.
 
     Returns:
       An Operation that updates the moving averages.
 
     Raises:
-      TypeError: If the arguments are not all float16, float32, or float64.
+      TypeError: If the arguments are not an allowed type.
       ValueError: If the moving average of one of the variables is already
         being computed.
     """
@@ -387,8 +387,9 @@ class ExponentialMovingAverage(object):
       var_list = variables.trainable_variables()
     zero_debias_true = set()  # set of vars to set `zero_debias=True`
     for var in var_list:
-      if var.dtype.base_dtype not in [dtypes.float16, dtypes.float32,
-                                      dtypes.float64]:
+      if var.dtype.base_dtype not in [
+          dtypes.bfloat16, dtypes.float16, dtypes.float32, dtypes.float64
+      ]:
         raise TypeError("The variables must be half, float, or double: %s" %
                         var.name)
 
diff --git a/tensorflow/python/training/moving_averages_test.py b/tensorflow/python/training/moving_averages_test.py
index 93991d0e14..bb2fca66e3 100644
--- a/tensorflow/python/training/moving_averages_test.py
+++ b/tensorflow/python/training/moving_averages_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -110,6 +111,32 @@ class MovingAveragesTest(test.TestCase):
       denominator_2 = denominator_1 * decay + weight_2 * (1.0 - decay)
       self.assertAllClose(numerator_2 / denominator_2, wma_array)
 
+  def testWeightedMovingAverageBfloat16(self):
+    bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
+    with self.cached_session() as sess:
+      decay = 0.5
+      weight = array_ops.placeholder(dtypes.bfloat16, [])
+      val = array_ops.placeholder(dtypes.bfloat16, [])
+
+      wma = moving_averages.weighted_moving_average(val, decay, weight)
+      variables.global_variables_initializer().run()
+
+      # Get the first weighted moving average.
+      val_1 = 3.0
+      weight_1 = 4.0
+      wma_array = sess.run(wma, feed_dict={val: val_1, weight: weight_1})
+      numerator_1 = val_1 * weight_1 * (1.0 - decay)
+      denominator_1 = weight_1 * (1.0 - decay)
+      self.assertAllClose(numerator_1 / denominator_1, wma_array)
+
+      # Get the second weighted moving average.
+      val_2 = 11.0
+      weight_2 = 22.0
+      wma_array = sess.run(wma, feed_dict={val: val_2, weight: weight_2})
+      numerator_2 = numerator_1 * decay + val_2 * weight_2 * (1.0 - decay)
+      denominator_2 = denominator_1 * decay + weight_2 * (1.0 - decay)
+      self.assertAllClose(bfloat16(numerator_2 / denominator_2), wma_array)
+
 
 def _Repeat(value, dim):
   if dim == 1:
-- 
GitLab


From 7dc5f7caa959c70d5ca948f7b0fc5abfea9a5935 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Mon, 1 Oct 2018 17:18:28 -0700
Subject: [PATCH 0986/1357]  Minor changes, hanged  CHECK_GE to DCHECK_GE due
 to code policy change

---
 tensorflow/core/common_runtime/process_util.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index 4570496637..e1dc08d645 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -65,7 +65,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
 #ifdef _OPENMP
     mkl_intra_op = omp_get_max_threads();
 #endif  // _OPENMP
-    CHECK_GE(mkl_intra_op, 1);
+    DCHECK_GE(mkl_intra_op, 1);
     const int32 mkl_inter_op = std::max(
         (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
     VLOG(0)
-- 
GitLab


From bfbe2bbe6a83a4acfa8f87aa5c8228e74b37bb61 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 1 Oct 2018 17:18:24 -0700
Subject: [PATCH 0987/1357] [tf.data] More robust solution for input pipeline
 <--> performance model coordination.

PiperOrigin-RevId: 215309735
---
 tensorflow/core/framework/dataset.h           | 12 +--
 tensorflow/core/framework/model.cc            | 83 ++++++++---------
 tensorflow/core/framework/model.h             | 42 +++++----
 .../kernels/data/map_and_batch_dataset_op.cc  | 90 ++++++++++---------
 .../data/parallel_interleave_dataset_op.cc    | 86 +++++++++---------
 .../kernels/data/parallel_map_iterator.cc     | 77 ++++++++--------
 6 files changed, 201 insertions(+), 189 deletions(-)

diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 697e0604bf..8c1151cb56 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -657,15 +657,15 @@ class DatasetBaseIterator : public IteratorBase {
   // When performance modeling is enabled, this method adds a tunable parameter
   // to the model node corresponding to this iterator.
   //
-  // The performance modeling logic may use `value` to set the value of the
+  // The performance modeling logic may use `state` to set the value of the
   // tunable parameter at any point during the lifetime of this iterator. When
-  // it does, it notifies `cond_var`.
+  // it does, it acquires `state->mu` and notifies `state->cond_var`.
   void AddTunableParameter(IteratorContext* ctx, const string& name,
-                           std::atomic<int64>* value, int64 min, int64 max,
-                           condition_variable* cond_var) {
+                           std::shared_ptr<model::SharedState> state, int64 min,
+                           int64 max) {
     if (ctx->model()) {
-      ctx->model()->AddTunableParameter(prefix(), name, value, min, max,
-                                        cond_var);
+      ctx->model()->AddTunableParameter(prefix(), name, std::move(state), min,
+                                        max);
     }
   }
 
diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index b0330ec990..bfdb3a6658 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -296,12 +296,12 @@ void Model::AddProcessingTime(const string& name, int64 delta) {
 
 void Model::AddTunableParameter(const string& node_name,
                                 const string& parameter_name,
-                                std::atomic<int64>* value, int64 min, int64 max,
-                                condition_variable* cond_var) {
+                                std::shared_ptr<SharedState> state, int64 min,
+                                int64 max) {
   tf_shared_lock l(mu_);
   auto node = *gtl::FindOrNull(lookup_table_, node_name);
   DCHECK(node);
-  node->add_tunable_param(parameter_name, value, min, max, cond_var);
+  node->add_tunable_param(parameter_name, std::move(state), min, max);
 }
 
 // The optimization algorithm starts by setting all tunable parallelism
@@ -311,54 +311,55 @@ void Model::AddTunableParameter(const string& node_name,
 // is less than or equal to the processing time needed to produce an element
 // divided by CPU budget.
 void Model::Optimize(int64 cpu_budget) {
-  tf_shared_lock lock(mu_);
   std::vector<std::shared_ptr<Model::Node::Tunable>> tunables;
-  const int64 processing_time = ProcessingTime();
-  tunables = CollectTunables();
-  for (auto tunable : tunables) {
-    tunable->value = 1;
-  }
-  while (true) {
-    const int64 output_time = OutputTime();
-    bool all_tunables = true;
-    for (auto& tunable : tunables) {
-      if (tunable->value < tunable->max) {
-        all_tunables = false;
+  {
+    tf_shared_lock lock(mu_);
+    const int64 processing_time = ProcessingTime();
+    tunables = CollectTunables();
+    for (auto tunable : tunables) {
+      tunable->value = 1;
+    }
+    while (true) {
+      const int64 output_time = OutputTime();
+      bool all_tunables = true;
+      for (auto& tunable : tunables) {
+        if (tunable->value < tunable->max) {
+          all_tunables = false;
+          break;
+        }
+      }
+      if (output_time < processing_time / cpu_budget || all_tunables) {
         break;
       }
-    }
-    if (output_time < processing_time / cpu_budget || all_tunables) {
-      break;
-    }
-    int64 best_delta = -1;
-    Model::Node::Tunable* best_tunable = nullptr;
-    for (auto& tunable : tunables) {
-      if (tunable->value == tunable->max) {
-        continue;
+      int64 best_delta = -1;
+      Model::Node::Tunable* best_tunable = nullptr;
+      for (auto& tunable : tunables) {
+        if (tunable->value == tunable->max) {
+          continue;
+        }
+        tunable->value++;
+        int64 delta = output_time - OutputTime();
+        if (delta > best_delta) {
+          best_delta = delta;
+          best_tunable = tunable.get();
+        }
+        tunable->value--;
       }
-      tunable->value++;
-      int64 delta = output_time - OutputTime();
-      if (delta > best_delta) {
-        best_delta = delta;
-        best_tunable = tunable.get();
+      if (!best_tunable) {
+        // NOTE: This can happen because we are performing the optimization
+        // while the model data is changing. If this becomes an issue, we should
+        // look into performing the optimization using a model snapshot.
+        break;
       }
-      tunable->value--;
+      best_tunable->value++;
     }
-    if (!best_tunable) {
-      // NOTE: This can happen because we are performing the optimization
-      // while the model data is changing. If this becomes an issue, we should
-      // look into performing the optimization using a model snapshot.
-      break;
-    }
-    best_tunable->value++;
   }
   VLOG(2) << "Number of knobs: " << tunables.size();
   for (auto& tunable : tunables) {
     VLOG(2) << "Setting tunable parameter: " << tunable->value;
-    tunable->value_ptr->store(tunable->value);
-    if (tunable->cond_var) {
-      tunable->cond_var->notify_all();
-    }
+    mutex_lock l(*tunable->state->mu);
+    tunable->state->value = tunable->value;
+    tunable->state->cond_var->notify_all();
   }
 }
 
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index 26402f5cd3..eae0fa70e8 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -33,6 +33,19 @@ namespace tensorflow {
 namespace data {
 namespace model {
 
+// Represents thread-safe state that can be shared between an input pipeline and
+// the performance model.
+struct SharedState {
+ public:
+  explicit SharedState(int64 value, std::shared_ptr<mutex> mu,
+                       std::shared_ptr<condition_variable> cond_var)
+      : value(value), mu(std::move(mu)), cond_var(std::move(cond_var)) {}
+
+  std::shared_ptr<mutex> mu;
+  std::shared_ptr<condition_variable> cond_var;
+  int64 value;
+};
+
 // Abstract representation of a TensorFlow input pipeline that can be used
 // for collecting runtime information and optimizing performance. It collects
 // runtime information about execution of the input pipeline that is used to
@@ -62,8 +75,8 @@ class Model {
   // Adds a tunable parameter for the given node.
   void AddTunableParameter(const string& node_name,
                            const string& parameter_name,
-                           std::atomic<int64>* value, int64 min, int64 max,
-                           condition_variable* cond_var) LOCKS_EXCLUDED(mu_);
+                           std::shared_ptr<SharedState> value, int64 min,
+                           int64 max) LOCKS_EXCLUDED(mu_);
 
   // Runs optimization.
   void Optimize(int64 cpu_budget) LOCKS_EXCLUDED(mu_);
@@ -109,13 +122,8 @@ class Model {
    public:
     // Represents a tunable parameter.
     struct Tunable {
-      Tunable(std::atomic<int64>* value, int64 min, int64 max,
-              condition_variable* cond_var)
-          : value(*value),
-            min(min),
-            max(max),
-            value_ptr(value),
-            cond_var(cond_var) {}
+      Tunable(std::shared_ptr<SharedState> state, int64 min, int64 max)
+          : value(state->value), min(min), max(max), state(std::move(state)) {}
 
       // Identifies the model value of the parameter. This can be different from
       // the actual value (e.g. during optimization search).
@@ -127,12 +135,8 @@ class Model {
       // Identifies the maximum value of the parameter.
       int64 max;
 
-      // Points to the actual value of the parameter. Not owned.
-      std::atomic<int64>* value_ptr;
-
-      // If non-null, this condition variable is notified when the model updates
-      // the actual value of the parameter (via `value_ptr`). Not owned.
-      condition_variable* cond_var;
+      // Shared state of the parameter.
+      std::shared_ptr<SharedState> state;
     };
 
     Node(int64 id, const string& name, std::shared_ptr<Node> output)
@@ -158,12 +162,12 @@ class Model {
     }
 
     // Adds a tunable parameter.
-    void add_tunable_param(const string& name, std::atomic<int64>* value,
-                           int64 min, int64 max, condition_variable* cond_var)
-        LOCKS_EXCLUDED(mu_) {
+    void add_tunable_param(const string& name,
+                           std::shared_ptr<SharedState> state, int64 min,
+                           int64 max) LOCKS_EXCLUDED(mu_) {
       mutex_lock l(mu_);
       tunable_params_[name] =
-          std::make_shared<Tunable>(value, min, max, cond_var);
+          std::make_shared<Tunable>(std::move(state), min, max);
     }
 
     // Returns the unique node ID.
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index b4c7f9e510..bf08970560 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -187,29 +187,31 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
-            num_parallel_calls_(params.dataset->num_parallel_calls_) {}
+            mu_(std::make_shared<mutex>()),
+            cond_var_(std::make_shared<condition_variable>()),
+            num_parallel_calls_(std::make_shared<model::SharedState>(
+                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
 
       ~Iterator() override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Cancel the runner thread.
         cancelled_ = true;
-        cond_var_.notify_all();
+        cond_var_->notify_all();
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
       }
 
       Status Initialize(IteratorContext* ctx) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
-        if (num_parallel_calls_ == kAutoTune) {
-          num_parallel_calls_ = 1;
-          AddTunableParameter(ctx, "parallelism",
-                              &num_parallel_calls_ /* value */, 1 /* min */,
-                              port::NumSchedulableCPUs() /* max */, &cond_var_);
+        if (num_parallel_calls_->value == kAutoTune) {
+          num_parallel_calls_->value = 1;
+          AddTunableParameter(ctx, "parallelism", num_parallel_calls_, 1,
+                              port::NumSchedulableCPUs());
         } else {
-          AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
         }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
@@ -221,27 +223,27 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                              bool* end_of_sequence) override {
         std::shared_ptr<BatchResult> result;
         {
-          mutex_lock l(mu_);
+          mutex_lock l(*mu_);
           EnsureRunnerThreadStarted(ctx);
           while (batch_results_.empty() ||
                  batch_results_.front()->num_calls > 0) {
             RecordStop(ctx);
-            cond_var_.wait(l);
+            cond_var_->wait(l);
             RecordStart(ctx);
           }
           std::swap(result, batch_results_.front());
           batch_results_.pop_front();
-          cond_var_.notify_all();
+          cond_var_->notify_all();
         }
         return ProcessResult(ctx, result, out_tensors, end_of_sequence);
       }
 
      protected:
       Status SaveInternal(IteratorStateWriter* writer) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
         CHECK_EQ(num_calls_, 0);
         TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
@@ -257,7 +259,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
         TF_RETURN_IF_ERROR(
             reader->ReadScalar(full_name("call_counter"), &call_counter_));
@@ -298,7 +300,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void Callback(const std::shared_ptr<IteratorContext>& ctx,
                     const std::shared_ptr<BatchResult>& result,
                     const std::shared_ptr<std::vector<Tensor>>& return_values,
-                    int64 offset, const Status& status) LOCKS_EXCLUDED(mu_) {
+                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
         result->UpdateStatus(status);
         if (status.ok()) {
           EnsureOutputAllocated(ctx, result, return_values);
@@ -334,16 +336,16 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
-          LOCKS_EXCLUDED(mu_) {
-        mutex_lock l(mu_);
+          LOCKS_EXCLUDED(*mu_) {
+        mutex_lock l(*mu_);
         num_calls_--;
         result->num_calls--;
-        cond_var_.notify_all();
+        cond_var_->notify_all();
       }
 
       void CallFunction(std::shared_ptr<IteratorContext> ctx,
                         const std::shared_ptr<BatchResult>& result,
-                        int64 offset) LOCKS_EXCLUDED(mu_) {
+                        int64 offset) LOCKS_EXCLUDED(*mu_) {
         // Get the next input element.
         std::vector<Tensor> input_element;
         bool end_of_input;
@@ -400,7 +402,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
           runner_thread_.reset(ctx->env()->StartThread(
@@ -476,14 +478,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       void RunnerThread(const std::shared_ptr<IteratorContext>& ctx)
-          LOCKS_EXCLUDED(mu_) {
+          LOCKS_EXCLUDED(*mu_) {
         std::vector<std::pair<std::shared_ptr<BatchResult>, int64>> new_calls;
         RecordStart(ctx.get());
         auto stop_cleanup =
             gtl::MakeCleanup([this, &ctx]() { RecordStop(ctx.get()); });
-        new_calls.reserve(num_parallel_calls_);
-        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
-          int64 num_parallel_calls = num_parallel_calls_;
+        new_calls.reserve(num_parallel_calls_->value);
+        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(*mu_) -> bool {
+          int64 num_parallel_calls = num_parallel_calls_->value;
           int64 max_batch_results =
               (num_parallel_calls + dataset()->batch_size_ - 1) /
               dataset()->batch_size_;
@@ -494,10 +496,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         };
         while (true) {
           {
-            mutex_lock l(mu_);
+            mutex_lock l(*mu_);
             while (!cancelled_ && busy()) {
               RecordStop(ctx.get());
-              cond_var_.wait(l);
+              cond_var_->wait(l);
               RecordStart(ctx.get());
             }
 
@@ -524,7 +526,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
-                             size_t index) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                             size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
@@ -569,7 +571,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status ReadStatus(IteratorStateReader* reader, const string& prefix,
-                        Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                        Status* status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         int64 code_int;
         TF_RETURN_IF_ERROR(reader->ReadScalar(
             full_name(strings::StrCat(prefix, "_code")), &code_int));
@@ -587,7 +589,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status WriteBatchResult(IteratorStateWriter* writer, size_t index)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         std::shared_ptr<BatchResult> result = batch_results_[index];
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -628,7 +630,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status WriteStatus(IteratorStateWriter* writer, const string& prefix,
-                         const Status& status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                         const Status& status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         TF_RETURN_IF_ERROR(
             writer->WriteScalar(full_name(strings::StrCat(prefix, "_code")),
                                 static_cast<int64>(status.code())));
@@ -642,24 +644,24 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       // Used for coordination between the main thread, the runner thread, and
       // the callback threads.
-      mutex mu_;
+      const std::shared_ptr<mutex> mu_;
       // Used for coordination between the main thread, the runner thread, and
       // the callback threads. In particular, the runner thread should only
-      // schedule new calls when the number of in-flight calls is less than the
-      // user specified level of parallelism and there are slots available in
-      // the `batch_results_` buffer.
-      condition_variable cond_var_;
+      // schedule new calls when the number of in-flight calls is less than
+      // `num_parallel_calls_->value` and there are slots available in the
+      // `batch_results_` buffer.
+      const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
-      std::atomic<int64> num_parallel_calls_;
+      const std::shared_ptr<model::SharedState> num_parallel_calls_;
       // Counts the number of outstanding calls for this batch.
-      int64 num_calls_ GUARDED_BY(mu_) = 0;
+      int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
-      int64 call_counter_ GUARDED_BY(mu_) = 0;
+      int64 call_counter_ GUARDED_BY(*mu_) = 0;
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
-      std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(mu_);
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
-      bool cancelled_ GUARDED_BY(mu_) = false;
+      std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 2bb38bf0b9..6b6b3d6ab9 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -1217,7 +1217,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
-            num_parallel_calls_(params.dataset->num_parallel_calls_),
+            mu_(std::make_shared<mutex>()),
+            cond_var_(std::make_shared<condition_variable>()),
+            num_parallel_calls_(std::make_shared<model::SharedState>(
+                params.dataset->num_parallel_calls_, mu_, cond_var_)),
             args_list_(params.dataset->cycle_length_),
             current_elements_(params.dataset->cycle_length_),
             element_in_use_(params.dataset->cycle_length_, false),
@@ -1227,25 +1230,24 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                 false /* low_latency_hint */)) {}
 
       ~Iterator() override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Cancel the runner thread.
         cancelled_ = true;
-        cond_var_.notify_all();
+        cond_var_->notify_all();
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
       }
 
       Status Initialize(IteratorContext* ctx) override {
-        mutex_lock l(mu_);
-        if (num_parallel_calls_ == kAutoTune) {
-          num_parallel_calls_ = 1;
-          AddTunableParameter(ctx, "parallelism",
-                              &num_parallel_calls_ /* value */, 1 /* min */,
-                              dataset()->cycle_length_ /* max */, &cond_var_);
+        mutex_lock l(*mu_);
+        if (num_parallel_calls_->value == kAutoTune) {
+          num_parallel_calls_->value = 1;
+          AddTunableParameter(ctx, "parallelism", num_parallel_calls_, 1,
+                              dataset()->cycle_length_);
         } else {
-          AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
         }
         AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
@@ -1259,12 +1261,12 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         std::shared_ptr<InvocationResult> result;
         do {
           {
-            mutex_lock l(mu_);
+            mutex_lock l(*mu_);
             EnsureRunnerThreadStarted(ctx);
             while (invocation_results_.empty() &&
                    (!end_of_input_ || num_open_ > 0)) {
               RecordStop(ctx);
-              cond_var_.wait(l);
+              cond_var_->wait(l);
               RecordStart(ctx);
             }
             if (!invocation_results_.empty()) {
@@ -1274,7 +1276,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
               *end_of_sequence = true;
               return Status::OK();
             }
-            cond_var_.notify_all();
+            cond_var_->notify_all();
           }
           RecordStop(ctx);
           result->notification.WaitForNotification();
@@ -1290,10 +1292,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
      protected:
       Status SaveInternal(IteratorStateWriter* writer) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
         CHECK_EQ(num_calls_, 0);
         TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
@@ -1331,7 +1333,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
         int64 invocation_results_size;
         TF_RETURN_IF_ERROR(reader->ReadScalar(
@@ -1384,7 +1386,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       };
 
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
           runner_thread_.reset(ctx->env()->StartThread(
@@ -1401,7 +1403,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       void FetchOutputs(
           const std::shared_ptr<IteratorContext>& ctx, int64 cycle_index,
           const std::vector<std::shared_ptr<InvocationResult>>& results)
-          LOCKS_EXCLUDED(mu_) {
+          LOCKS_EXCLUDED(*mu_) {
         RecordStart(ctx.get());
         auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
         bool end_of_input = false;
@@ -1424,14 +1426,14 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         if (end_of_input) {
           current_elements_[cycle_index].reset();
         }
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         element_in_use_[cycle_index] = false;
         num_calls_--;
         if (end_of_input) {
           args_list_[cycle_index].clear();
           num_open_--;
         }
-        cond_var_.notify_all();
+        cond_var_->notify_all();
       }
 
       // Method responsible for 1) creating iterators out of input elements, 2)
@@ -1442,20 +1444,20 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       void RunnerThread(const std::shared_ptr<IteratorContext>& ctx) {
         RecordStart(ctx.get());
         auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
-        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
+        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(*mu_) -> bool {
           return element_in_use_[cycle_index_] ||
-                 num_calls_ >= num_parallel_calls_ ||
+                 num_calls_ >= num_parallel_calls_->value ||
                  invocation_results_.size() >=
                      dataset()->cycle_length_ * dataset()->block_length_;
         };
         while (true) {
-          mutex_lock l(mu_);
+          mutex_lock l(*mu_);
           // Wait until this thread is cancelled, the end of input has been
           // reached, or the cycle element at the `cycle_index_` position is
           // not in use and there is space in the `invocation_results_` queue.
           while (!cancelled_ && (!end_of_input_ || num_open_ > 0) && busy()) {
             RecordStop(ctx.get());
-            cond_var_.wait(l);
+            cond_var_->wait(l);
             RecordStart(ctx.get());
           }
 
@@ -1509,13 +1511,13 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
             }
             cycle_index_ = (cycle_index_ + 1) % dataset()->cycle_length_;
           }
-          cond_var_.notify_all();
+          cond_var_->notify_all();
         }
       }
 
       Status WriteStatusLocked(IteratorStateWriter* writer, size_t index,
                                const Status& status)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         TF_RETURN_IF_ERROR(writer->WriteScalar(
             CodeKey(index), static_cast<int64>(status.code())));
         if (!status.ok()) {
@@ -1526,7 +1528,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       }
 
       Status ReadStatusLocked(IteratorStateReader* reader, size_t index,
-                              Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                              Status* status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         int64 code_int;
         TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
         error::Code code = static_cast<error::Code>(code_int);
@@ -1553,7 +1555,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       }
 
       Status WriteCurrentElements(IteratorStateWriter* writer)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         for (int idx = 0; idx < current_elements_.size(); idx++) {
           if (current_elements_[idx]) {
             TF_RETURN_IF_ERROR(SaveInput(writer, current_elements_[idx]));
@@ -1572,7 +1574,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       Status ReadCurrentElements(IteratorContext* ctx,
                                  IteratorStateReader* reader)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         for (int idx = 0; idx < current_elements_.size(); idx++) {
           if (reader->Contains(
                   full_name(strings::StrCat("args_size[", idx, "]")))) {
@@ -1600,7 +1602,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       // Used for coordination between the main thread, the runner thread, and
       // the worker threads.
-      mutex mu_;
+      const std::shared_ptr<mutex> mu_;
 
       // Used for coordination between the main thread, the runner thread, and
       // the worker threads. In particular, the runner thread should only
@@ -1608,45 +1610,45 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       // user specified level of parallelism, there are slots available in the
       // `invocation_results_` buffer, the current cycle element is not in use,
       // and there are elements left to be fetched.
-      condition_variable cond_var_;
+      const std::shared_ptr<condition_variable> cond_var_;
 
       // Identifies the maximum number of parallel calls.
-      std::atomic<int64> num_parallel_calls_;
+      const std::shared_ptr<model::SharedState> num_parallel_calls_;
 
       // Iterator for input elements.
-      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(*mu_);
 
       // Identifies current cycle element.
       int64 cycle_index_ = 0;
 
       // Arguments for creating an iterator for cycle elements.
-      std::vector<std::vector<Tensor>> args_list_ GUARDED_BY(mu_);
+      std::vector<std::vector<Tensor>> args_list_ GUARDED_BY(*mu_);
 
       // Iterators for the current cycle elements. Concurrent access is
       // protected by `element_in_use_`.
       std::vector<std::unique_ptr<IteratorBase>> current_elements_;
 
       // Identifies cycle elements that are in use by worker threads.
-      std::vector<bool> element_in_use_ GUARDED_BY(mu_);
+      std::vector<bool> element_in_use_ GUARDED_BY(*mu_);
 
       // Buffer for storing the invocation results.
       std::deque<std::shared_ptr<InvocationResult>> invocation_results_
-          GUARDED_BY(mu_);
+          GUARDED_BY(*mu_);
 
       // Identifies whether end of input has been reached.
-      bool end_of_input_ GUARDED_BY(mu_) = false;
+      bool end_of_input_ GUARDED_BY(*mu_) = false;
 
       // Identifies the number of open iterators.
-      int64 num_open_ GUARDED_BY(mu_) = 0;
+      int64 num_open_ GUARDED_BY(*mu_) = 0;
 
       // Identifies the number of outstanding calls.
-      int64 num_calls_ GUARDED_BY(mu_) = 0;
+      int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
-      bool cancelled_ GUARDED_BY(mu_) = false;
+      bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index da067a4e6f..13bd4b6036 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -40,30 +40,32 @@ class ParallelMapIterator : public DatasetBaseIterator {
         input_dataset_(input_dataset),
         init_func_(std::move(init_func)),
         map_func_(std::move(map_func)),
-        num_parallel_calls_(num_parallel_calls) {}
+        mu_(std::make_shared<mutex>()),
+        cond_var_(std::make_shared<condition_variable>()),
+        num_parallel_calls_(std::make_shared<model::SharedState>(
+            num_parallel_calls, mu_, cond_var_)) {}
 
   ~ParallelMapIterator() override {
-    mutex_lock l(mu_);
+    mutex_lock l(*mu_);
     // Cancel the runner thread.
     cancelled_ = true;
-    cond_var_.notify_all();
+    cond_var_->notify_all();
     // Wait for all in-flight calls to complete.
     while (num_calls_ > 0) {
-      cond_var_.wait(l);
+      cond_var_->wait(l);
     }
   }
 
   Status Initialize(IteratorContext* ctx) override {
-    mutex_lock l(mu_);
-    if (num_parallel_calls_ == kAutoTune) {
-      num_parallel_calls_ = 1;
+    mutex_lock l(*mu_);
+    if (num_parallel_calls_->value == kAutoTune) {
+      num_parallel_calls_->value = 1;
       // TODO(jsimsa): Surface the number of threads used by `ctx->runner()` and
       // use it here for the maximum.
-      AddTunableParameter(ctx, "parallelism", &num_parallel_calls_ /* value */,
-                          1 /* min */, port::NumSchedulableCPUs() /* max */,
-                          &cond_var_);
+      AddTunableParameter(ctx, "parallelism", num_parallel_calls_, 1,
+                          port::NumSchedulableCPUs());
     } else {
-      AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+      AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
     }
     TF_RETURN_IF_ERROR(
         input_dataset_->MakeIterator(ctx, prefix(), &input_impl_));
@@ -77,16 +79,16 @@ class ParallelMapIterator : public DatasetBaseIterator {
                          bool* end_of_sequence) override {
     std::shared_ptr<InvocationResult> result;
     {
-      mutex_lock l(mu_);
+      mutex_lock l(*mu_);
       EnsureRunnerThreadStarted(ctx);
       while (invocation_results_.empty()) {
         RecordStop(ctx);
-        cond_var_.wait(l);
+        cond_var_->wait(l);
         RecordStart(ctx);
       }
       std::swap(result, invocation_results_.front());
       invocation_results_.pop_front();
-      cond_var_.notify_all();
+      cond_var_->notify_all();
     }
     RecordStop(ctx);
     result->notification.WaitForNotification();
@@ -96,10 +98,10 @@ class ParallelMapIterator : public DatasetBaseIterator {
 
  protected:
   Status SaveInternal(IteratorStateWriter* writer) override {
-    mutex_lock l(mu_);
+    mutex_lock l(*mu_);
     // Wait for all in-flight calls to complete.
     while (num_calls_ > 0) {
-      cond_var_.wait(l);
+      cond_var_->wait(l);
     }
     CHECK_EQ(num_calls_, 0);
     TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
@@ -128,7 +130,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
 
   Status RestoreInternal(IteratorContext* ctx,
                          IteratorStateReader* reader) override {
-    mutex_lock l(mu_);
+    mutex_lock l(*mu_);
     TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
     int64 invocation_results_size;
     TF_RETURN_IF_ERROR(reader->ReadScalar(
@@ -175,7 +177,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   };
 
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
       runner_thread_.reset(ctx->env()->StartThread(
@@ -185,18 +187,18 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   void CallCompleted(const std::shared_ptr<InvocationResult>& result)
-      LOCKS_EXCLUDED(mu_) {
+      LOCKS_EXCLUDED(*mu_) {
     {
-      mutex_lock l(mu_);
+      mutex_lock l(*mu_);
       num_calls_--;
-      cond_var_.notify_all();
+      cond_var_->notify_all();
     }
     result->notification.Notify();
   }
 
   void CallFunction(const std::shared_ptr<IteratorContext>& ctx,
                     const std::shared_ptr<InvocationResult>& result)
-      LOCKS_EXCLUDED(mu_) {
+      LOCKS_EXCLUDED(*mu_) {
     // Get the next input element.
     std::vector<Tensor> input_element;
     result->status =
@@ -239,18 +241,18 @@ class ParallelMapIterator : public DatasetBaseIterator {
     RecordStart(ctx.get());
     auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
     std::vector<std::shared_ptr<InvocationResult>> new_calls;
-    new_calls.reserve(num_parallel_calls_);
-    auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
-      int64 num_parallel_calls = num_parallel_calls_;
+    new_calls.reserve(num_parallel_calls_->value);
+    auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(*mu_) -> bool {
+      int64 num_parallel_calls = num_parallel_calls_->value;
       return num_calls_ >= num_parallel_calls ||
              invocation_results_.size() >= num_parallel_calls;
     };
     while (true) {
       {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         while (!cancelled_ && busy()) {
           RecordStop(ctx.get());
-          cond_var_.wait(l);
+          cond_var_->wait(l);
           RecordStart(ctx.get());
         }
         if (cancelled_) {
@@ -261,7 +263,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
           new_calls.push_back(invocation_results_.back());
           num_calls_++;
         }
-        cond_var_.notify_all();
+        cond_var_->notify_all();
       }
       for (const auto& call : new_calls) {
         CallFunction(ctx, call);
@@ -271,7 +273,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   Status WriteStatusLocked(IteratorStateWriter* writer, size_t index,
-                           const Status& status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                           const Status& status)
+      EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     TF_RETURN_IF_ERROR(
         writer->WriteScalar(CodeKey(index), static_cast<int64>(status.code())));
     if (!status.ok()) {
@@ -282,7 +285,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   Status ReadStatusLocked(IteratorStateReader* reader, size_t index,
-                          Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                          Status* status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     int64 code_int;
     TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
     error::Code code = static_cast<error::Code>(code_int);
@@ -312,23 +315,23 @@ class ParallelMapIterator : public DatasetBaseIterator {
   const std::function<Status(IteratorContext*)> init_func_;
   const ParallelMapIteratorFunction map_func_;
   // Used for coordination between the main thread and the runner thread.
-  mutex mu_;
+  const std::shared_ptr<mutex> mu_;
   // Used for coordination between the main thread and the runner thread. In
   // particular, the runner thread should only schedule new calls when the
   // number of in-flight calls is less than the user specified level of
   // parallelism and there are slots available in the `invocation_results_`
   // buffer.
-  condition_variable cond_var_;
+  const std::shared_ptr<condition_variable> cond_var_;
   // Identifies the maximum number of parallel calls.
-  std::atomic<int64> num_parallel_calls_;
+  const std::shared_ptr<model::SharedState> num_parallel_calls_;
   // Counts the number of outstanding calls.
-  int64 num_calls_ GUARDED_BY(mu_) = 0;
+  int64 num_calls_ GUARDED_BY(*mu_) = 0;
   std::unique_ptr<IteratorBase> input_impl_;
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
-      GUARDED_BY(mu_);
-  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
-  bool cancelled_ GUARDED_BY(mu_) = false;
+      GUARDED_BY(*mu_);
+  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+  bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
 }  // namespace
-- 
GitLab


From 9a23e9251ecba026471ff77a5bbbc802a2889a10 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 1 Oct 2018 17:26:37 -0700
Subject: [PATCH 0988/1357] [tf.data] Adding `tf.data.Options()`,
 `tf.data.Dataset.options()`, and `tf.data.Dataset.with_options()` to make it
 possible to respectively represent, get, and set options, such as
 optimization configuration, of a tf.data input pipeline.

PiperOrigin-RevId: 215310764
---
 .../core/kernels/data/optimize_dataset_op.cc  |  16 +-
 .../optimization/hoist_random_uniform_test.py |  11 +-
 .../optimization/latency_all_edges_test.py    |   7 +-
 .../map_and_filter_fusion_test.py             |  27 +-
 .../optimization/map_parallelization_test.py  |   6 +-
 .../optimization/map_vectorization_test.py    |  14 +-
 .../optimization/model_dataset_op_test.py     |  20 +-
 .../optimization/noop_elimination_test.py     |   4 +-
 .../optimization/optimize_dataset_op_test.py  |  45 ++-
 .../data/experimental/ops/optimization.py     |  61 +---
 tensorflow/python/data/kernel_tests/BUILD     |  18 +-
 .../data/kernel_tests/dataset_ops_test.py     | 158 ++++++++++-
 tensorflow/python/data/ops/dataset_ops.py     | 268 +++++++++++++++++-
 .../golden/v1/tensorflow.data.-dataset.pbtxt  |   8 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   8 +
 .../golden/v1/tensorflow.data.-options.pbtxt  |  57 ++++
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   8 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   8 +
 ...rflow.data.experimental.-csv-dataset.pbtxt |   8 +
 ...ow.data.experimental.-random-dataset.pbtxt |   8 +
 ...rflow.data.experimental.-sql-dataset.pbtxt |   8 +
 .../tools/api/golden/v1/tensorflow.data.pbtxt |   4 +
 .../golden/v2/tensorflow.data.-dataset.pbtxt  |   8 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   8 +
 .../golden/v2/tensorflow.data.-options.pbtxt  |  57 ++++
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   8 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   8 +
 ...rflow.data.experimental.-csv-dataset.pbtxt |   8 +
 ...ow.data.experimental.-random-dataset.pbtxt |   8 +
 ...rflow.data.experimental.-sql-dataset.pbtxt |   8 +
 .../tools/api/golden/v2/tensorflow.data.pbtxt |   4 +
 31 files changed, 742 insertions(+), 147 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt

diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc
index d5b725eac9..1cb7caa738 100644
--- a/tensorflow/core/kernels/data/optimize_dataset_op.cc
+++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc
@@ -154,12 +154,8 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel {
           : DatasetIterator<Dataset>(params) {}
 
       Status Initialize(IteratorContext* ctx) override {
-        IteratorContext::Params params;
-        params.env = ctx->env();
-        params.runner = *(ctx->runner());
-        params.stats_aggregator_getter = ctx->stats_aggregator_getter();
+        IteratorContext::Params params = ctx->params();
         params.lib = dataset()->lib_;
-        params.allocator_getter = ctx->allocator_getter();
         return dataset()->optimized_input_->MakeIterator(
             IteratorContext(params), prefix(), &input_impl_);
       }
@@ -167,14 +163,10 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
-        IteratorContext::Params params;
-        params.env = ctx->env();
-        params.runner = *(ctx->runner());
-        params.stats_aggregator_getter = ctx->stats_aggregator_getter();
+        IteratorContext::Params params = ctx->params();
         params.lib = dataset()->lib_;
-        params.allocator_getter = ctx->allocator_getter();
-        IteratorContext iter_ctx(params);
-        return input_impl_->GetNext(&iter_ctx, out_tensors, end_of_sequence);
+        return input_impl_->GetNext(IteratorContext(params), out_tensors,
+                                    end_of_sequence);
       }
 
      protected:
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
index 3cd9753665..81437c0aec 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -64,7 +64,9 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
         optimization.assert_next(
             ["Zip[0]", "Map"] if will_optimize else ["Map"])).map(function)
 
-    dataset = dataset.apply(optimization.optimize(["hoist_random_uniform"]))
+    options = dataset_ops.Options()
+    options.experimental_hoist_random_uniform = True
+    dataset = dataset.with_options(options)
     self._testDataset(dataset)
 
   def testAdditionalInputs(self):
@@ -77,9 +79,10 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
           [], minval=1, maxval=10, dtype=dtypes.float32, seed=42)
 
     dataset = dataset_ops.Dataset.range(5).apply(
-        optimization.assert_next(
-            ["Zip[0]", "Map"])).map(random_with_capture).apply(
-                optimization.optimize(["hoist_random_uniform"]))
+        optimization.assert_next(["Zip[0]", "Map"])).map(random_with_capture)
+    options = dataset_ops.Options()
+    options.experimental_hoist_random_uniform = True
+    dataset = dataset.with_options(options)
     self._testDataset(dataset)
 
   def _testDataset(self, dataset):
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
index 45623876ae..26fec0414e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
@@ -28,14 +28,15 @@ from tensorflow.python.platform import test
 class OptimizeStatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
 
   def testLatencyStatsOptimization(self):
-
     stats_aggregator = stats_ops.StatsAggregator()
     dataset = dataset_ops.Dataset.from_tensors(1).apply(
         optimization.assert_next(
             ["LatencyStats", "Map", "LatencyStats", "Prefetch",
              "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply(
-                 stats_ops.set_stats_aggregator(stats_aggregator)).apply(
-                     optimization.optimize(["latency_all_edges"]))
+                 stats_ops.set_stats_aggregator(stats_aggregator))
+    options = dataset_ops.Options()
+    options.experimental_latency_all_edges = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_initializable_iterator()
     get_next = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
index a439635716..7f8a4e6406 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -72,7 +72,10 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
     for function in functions:
       dataset = dataset.map(function)
 
-    dataset = dataset.prefetch(0).apply(optimization.optimize(["map_fusion"]))
+    dataset = dataset.prefetch(0)
+    options = dataset_ops.Options()
+    options.experimental_map_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
     with self.cached_session() as sess:
@@ -124,9 +127,10 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testMapFilterFusion(self, function, predicate):
     dataset = dataset_ops.Dataset.range(10).apply(
         optimization.assert_next(
-            ["Map",
-             "FilterByLastComponent"])).map(function).filter(predicate).apply(
-                 optimization.optimize(["map_and_filter_fusion"]))
+            ["Map", "FilterByLastComponent"])).map(function).filter(predicate)
+    options = dataset_ops.Options()
+    options.experimental_map_and_filter_fusion = True
+    dataset = dataset.with_options(options)
     self._testMapAndFilter(dataset, function, predicate)
 
   def _testMapAndFilter(self, dataset, function, predicate):
@@ -156,10 +160,11 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     # We are currently not supporting functions with additional inputs.
     dataset = dataset_ops.Dataset.range(10).apply(
-        optimization.assert_next(
-            ["Map", "Filter"])).map(function).filter(predicate).apply(
-                optimization.optimize(["map_and_filter_fusion"]))
-
+        optimization.assert_next(["Map",
+                                  "Filter"])).map(function).filter(predicate)
+    options = dataset_ops.Options()
+    options.experimental_map_and_filter_fusion = True
+    dataset = dataset.with_options(options)
     self._testMapAndFilter(dataset, function, predicate)
 
   @staticmethod
@@ -197,8 +202,10 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
     for predicate in predicates:
       dataset = dataset.filter(predicate)
 
-    dataset = dataset.prefetch(0).apply(
-        optimization.optimize(["filter_fusion"]))
+    dataset = dataset.prefetch(0)
+    options = dataset_ops.Options()
+    options.experimental_filter_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
     with self.cached_session() as sess:
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
index 334d8e3778..ce9c9bc47b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
@@ -62,8 +62,10 @@ class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testMapParallelization(self, function, should_optimize):
     next_nodes = ["ParallelMap"] if should_optimize else ["Map"]
     dataset = dataset_ops.Dataset.range(5).apply(
-        optimization.assert_next(next_nodes)).map(function).apply(
-            optimization.optimize(["map_parallelization"]))
+        optimization.assert_next(next_nodes)).map(function)
+    options = dataset_ops.Options()
+    options.experimental_map_parallelization = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index d47492753e..32ebc49c40 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -69,10 +69,11 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
           map_fn, num_parallel_calls=num_parallel_calls).batch(batch_size)
 
     unoptimized = _make_dataset([map_node_name, "Batch"])
-    optimized = _make_dataset(["Batch", map_node_name] if expect_optimized else
-                              [map_node_name, "Batch"]).apply(
-                                  optimization.optimize(["map_vectorization"]))
-
+    optimized = _make_dataset(["Batch", map_node_name]
+                              if expect_optimized else [map_node_name, "Batch"])
+    options = dataset_ops.Options()
+    options.experimental_map_vectorization = True
+    optimized = optimized.with_options(options)
     return unoptimized, optimized
 
   @parameterized.named_parameters(
@@ -179,7 +180,10 @@ class MapVectorizationBenchmark(test.Benchmark):
     unoptimized = input_dataset.map(map_fn).batch(batch_size)
     unoptimized_op = unoptimized.make_one_shot_iterator().get_next()
 
-    optimized = unoptimized.apply(optimization.optimize(["map_vectorization"]))
+    optimized = input_dataset.map(map_fn).batch(batch_size)
+    options = dataset_ops.Options()
+    options.experimental_map_vectorization = True
+    optimized = optimized.with_options(options)
     optimized_op = optimized.make_one_shot_iterator().get_next()
 
     unoptimized_time = self._run(
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
index a9f2ce8c03..82516356df 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
@@ -37,7 +37,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
                                                 np.random.rand(4 * k,
                                                                1))).repeat()
     dataset = dataset.map(math_ops.matmul)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -61,7 +63,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
                                                                1))).repeat()
     dataset = dataset.map(
         math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -89,7 +93,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
             math_ops.matmul,
             num_parallel_calls=optimization.AUTOTUNE,
             batch_size=batch_size))
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -116,7 +122,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
         lambda _: dataset,
         cycle_length=10,
         num_parallel_calls=optimization.AUTOTUNE)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -161,7 +169,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
         lambda _: dataset, cycle_length=2)
 
     dataset = dataset.map(f3, num_parallel_calls=optimization.AUTOTUNE)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
index 092e0ff62a..fb0640fe9f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
@@ -40,7 +40,9 @@ class NoopEliminationTest(test_base.DatasetTestBase):
             ["FiniteRepeat", "FiniteSkip", "Prefetch", "Prefetch"]))
     dataset = dataset.repeat(some_tensor).skip(5).prefetch(0).take(-1).skip(
         0).repeat(1).prefetch(0)
-    dataset = dataset.apply(optimization.optimize(["noop_elimination"]))
+    options = dataset_ops.Options()
+    options.experimental_noop_elimination = True
+    dataset = dataset.with_options(options)
 
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
index eb661796c0..760cd8cc4e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -33,23 +33,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationDefault(self):
     dataset = dataset_ops.Dataset.range(10).apply(
-        optimization.assert_next(
-            ["Map", "Batch"])).map(lambda x: x * x).batch(10).apply(
-                optimization.optimize())
-    iterator = dataset.make_one_shot_iterator()
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.assertAllEqual([x * x for x in range(10)], sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testOptimizationEmpty(self):
-    dataset = dataset_ops.Dataset.range(10).apply(
-        optimization.assert_next(
-            ["Map", "Batch"])).map(lambda x: x * x).batch(10).apply(
-                optimization.optimize([]))
-    iterator = dataset.make_one_shot_iterator()
+        optimization.assert_next(["Map",
+                                  "Batch"])).map(lambda x: x * x).batch(10)
+    iterator = dataset.with_options(
+        dataset_ops.Options()).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
@@ -60,8 +47,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
   def testOptimizationFusion(self):
     dataset = dataset_ops.Dataset.range(10).apply(
         optimization.assert_next(
-            ["MapAndBatch"])).map(lambda x: x * x).batch(10).apply(
-                optimization.optimize(["map_and_batch_fusion"]))
+            ["MapAndBatch"])).map(lambda x: x * x).batch(10)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -72,8 +61,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationStatefulFunction(self):
     dataset = dataset_ops.Dataset.range(10).map(
-        lambda _: random_ops.random_uniform([])).batch(10).apply(
-            optimization.optimize(["map_and_batch_fusion"]))
+        lambda _: random_ops.random_uniform([])).batch(10)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -82,8 +73,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationLargeInputFromTensor(self):
     input_t = array_ops.placeholder(dtypes.int32, (None, None, None))
-    dataset = dataset_ops.Dataset.from_tensors(input_t).apply(
-        optimization.optimize())
+    dataset = dataset_ops.Dataset.from_tensors(input_t)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -94,8 +87,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationLargeInputFromTensorSlices(self):
     input_t = array_ops.placeholder(dtypes.int32, (None, None, None, None))
-    dataset = dataset_ops.Dataset.from_tensor_slices(input_t).apply(
-        optimization.optimize())
+    dataset = dataset_ops.Dataset.from_tensor_slices(input_t)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
diff --git a/tensorflow/python/data/experimental/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py
index 30348ede36..276dde8383 100644
--- a/tensorflow/python/data/experimental/ops/optimization.py
+++ b/tensorflow/python/data/experimental/ops/optimization.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import gen_experimental_dataset_ops
 
 # A constant that can be used to enable auto-tuning.
@@ -58,7 +57,7 @@ def model():
 
   def _apply_fn(dataset):
     """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _ModelDataset(dataset)
+    return dataset_ops._ModelDataset(dataset)  # pylint: disable=protected-access
 
   return _apply_fn
 
@@ -78,7 +77,7 @@ def optimize(optimizations=None):
 
   def _apply_fn(dataset):
     """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _OptimizeDataset(dataset, optimizations)
+    return dataset_ops._OptimizeDataset(dataset, optimizations)  # pylint: disable=protected-access
 
   return _apply_fn
 
@@ -113,59 +112,3 @@ class _AssertNextDataset(dataset_ops.UnaryDataset):
   def output_types(self):
     return self._input_dataset.output_types
 
-
-class _ModelDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that acts as an identity, and models performance."""
-
-  def __init__(self, input_dataset):
-    """See `optimize()` for details."""
-    super(_ModelDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.model_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-
-class _OptimizeDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that acts as an identity, and applies optimizations."""
-
-  def __init__(self, input_dataset, optimizations):
-    """See `optimize()` for details."""
-    super(_OptimizeDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    if optimizations is None:
-      optimizations = []
-    self._optimizations = ops.convert_to_tensor(
-        optimizations, dtype=dtypes.string, name="optimizations")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.optimize_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._optimizations,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index cadfe7f9e0..bf76860aa4 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -115,8 +115,10 @@ tf_py_test(
     srcs = ["dataset_ops_test.py"],
     additional_deps = [
         ":test_base",
-        "//tensorflow/core:protos_all_py",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -172,20 +174,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "inputs_test",
-    size = "small",
-    srcs = ["inputs_test.py"],
-    additional_deps = [
-        ":test_base",
-        "@absl_py//absl/testing:parameterized",
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
 tf_py_test(
     name = "interleave_dataset_op_test",
     size = "small",
diff --git a/tensorflow/python/data/kernel_tests/dataset_ops_test.py b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
index f115f9d9c7..b9f8875b9f 100644
--- a/tensorflow/python/data/kernel_tests/dataset_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
@@ -18,13 +18,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+import numpy as np
+
 from tensorflow.core.framework import graph_pb2
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.platform import test
 
 
-class DatasetOpsTest(test_base.DatasetTestBase):
+class DatasetOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testAsSerializedGraph(self):
     dataset = dataset_ops.Dataset.range(10)
@@ -33,6 +40,155 @@ class DatasetOpsTest(test_base.DatasetTestBase):
           sess.run(dataset._as_serialized_graph()))
       self.assertTrue(any([node.op != "RangeDataset" for node in graph.node]))
 
+  @staticmethod
+  def make_apply_fn(dataset):
+
+    def apply_fn(dataset):
+
+      def _apply_fn(dataset):
+        return dataset.cache()
+
+      return dataset.apply(_apply_fn)
+
+    return apply_fn
+
+  @staticmethod
+  def make_gen():
+
+    def gen():
+      yield 42
+
+    return gen
+
+  @staticmethod
+  def make_interleave_fn(dataset, num_parallel_calls=None):
+
+    def interleave_fn(dataset):
+      return dataset.interleave(
+          lambda x: dataset_ops.Dataset.range(0),
+          cycle_length=2,
+          num_parallel_calls=num_parallel_calls)
+
+    return interleave_fn
+
+  @parameterized.named_parameters(
+      ("FixedLengthRecord", readers.FixedLengthRecordDataset("", 42)),
+      ("FromGenerator",
+       dataset_ops.Dataset.from_generator(make_gen.__func__(), dtypes.int32),
+       1),
+      ("FromSparseTensorSlices",
+       dataset_ops.Dataset.from_sparse_tensor_slices(
+           sparse_tensor.SparseTensor(
+               indices=np.array([[0, 0], [1, 0], [2, 0]]),
+               values=np.array([0, 0, 0]),
+               dense_shape=np.array([3, 1])))),
+      ("FromTensors", dataset_ops.Dataset.from_tensors([42])),
+      ("FromTensorSlices", dataset_ops.Dataset.from_tensors([42])),
+      ("Range", dataset_ops.Dataset.range(10)),
+      ("TextLine", readers.TextLineDataset("")),
+      ("TFRecord", readers.TFRecordDataset(""), 1),
+  )
+  def testDatasetSourceInputs(self, dataset, num_inputs=0):
+    self.assertEqual(num_inputs, len(dataset._inputs()))
+
+  @parameterized.named_parameters(
+      ("Apply", make_apply_fn.__func__(dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Batch", lambda x: x.batch(10), dataset_ops.Dataset.range(0)),
+      ("Cache", lambda x: x.cache(), dataset_ops.Dataset.range(0)),
+      ("Filter", lambda x: x.filter(lambda x: True),
+       dataset_ops.Dataset.range(0)),
+      ("FlatMap", lambda x: x.flat_map(lambda x: dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Interleave", make_interleave_fn.__func__(dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Map", lambda x: x.map(lambda x: x), dataset_ops.Dataset.range(0)),
+      ("PaddedBatch", lambda x: x.padded_batch(10, []),
+       dataset_ops.Dataset.range(0)),
+      ("ParallelInterleave",
+       make_interleave_fn.__func__(dataset_ops.Dataset.range(0), 2),
+       dataset_ops.Dataset.range(0)),
+      ("ParallelMap", lambda x: x.map(lambda x: x, num_parallel_calls=2),
+       dataset_ops.Dataset.range(0)),
+      ("Repeat", lambda x: x.repeat(), dataset_ops.Dataset.range(0)),
+      ("Shuffle", lambda x: x.shuffle(10), dataset_ops.Dataset.range(0)),
+      ("Skip", lambda x: x.skip(1), dataset_ops.Dataset.range(0)),
+      ("Take", lambda x: x.take(1), dataset_ops.Dataset.range(0)),
+      ("Window", lambda x: x.window(10), dataset_ops.Dataset.range(0)),
+  )
+  def testUnaryTransformationInputs(self, dataset_fn, input_dataset):
+    self.assertEqual([input_dataset], dataset_fn(input_dataset)._inputs())
+
+  @parameterized.named_parameters(
+      ("Concatenate", lambda x, y: x.concatenate(y),
+       dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1)))
+  def testBinaryTransformationInputs(self, dataset_fn, input1, input2):
+    self.assertEqual([input1, input2], dataset_fn(input1, input2)._inputs())
+
+  @parameterized.named_parameters(
+      ("ZipOne", dataset_ops.Dataset.zip, (dataset_ops.Dataset.range(0))),
+      ("ZipNest", dataset_ops.Dataset.zip,
+       (dataset_ops.Dataset.range(0),
+        (dataset_ops.Dataset.range(1), dataset_ops.Dataset.range(2)))),
+      ("ZipTuple", dataset_ops.Dataset.zip,
+       (dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1))))
+  def testVariadicTransformationInputs(self, dataset_fn, input_datasets):
+    self.assertEqual(
+        nest.flatten(input_datasets),
+        dataset_fn(input_datasets)._inputs())
+
+  def testCollectInputs(self):
+    ds1 = dataset_ops.Dataset.range(0)
+    ds2 = ds1.concatenate(ds1)
+    ds3 = dataset_ops.Dataset.zip((ds2, ds1, ds2))
+
+    inputs = []
+    queue = [ds3]
+    while queue:
+      ds = queue[0]
+      queue = queue[1:]
+      queue.extend(ds._inputs())
+      inputs.append(ds)
+
+    self.assertEqual(5, inputs.count(ds1))
+    self.assertEqual(2, inputs.count(ds2))
+    self.assertEqual(1, inputs.count(ds3))
+
+  def testOptionsDefault(self):
+    ds = dataset_ops.Dataset.range(0)
+    self.assertEqual(dataset_ops.Options(), ds.options())
+
+  def testOptionsOnce(self):
+    options = dataset_ops.Options()
+    ds = dataset_ops.Dataset.range(0).with_options(options).cache()
+    self.assertEqual(options, ds.options())
+
+  def testOptionsTwiceSame(self):
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    ds = dataset_ops.Dataset.range(0).with_options(options).with_options(
+        options)
+    self.assertEqual(options, ds.options())
+
+  def testOptionsTwiceDifferent(self):
+    options1 = dataset_ops.Options()
+    options1.experimental_autotune = True
+    options2 = dataset_ops.Options()
+    options2.experimental_filter_fusion = False
+    ds = dataset_ops.Dataset.range(0).with_options(options1).with_options(
+        options2)
+    self.assertTrue(ds.options().experimental_autotune)
+    self.assertFalse(ds.options().experimental_filter_fusion)
+
+  def testOptionsTwiceDifferentError(self):
+    options1 = dataset_ops.Options()
+    options1.experimental_autotune = True
+    options2 = dataset_ops.Options()
+    options2.experimental_autotune = False
+    with self.assertRaisesRegexp(ValueError,
+                                 "Cannot merge incompatible values of option"):
+      dataset_ops.Dataset.range(0).with_options(options1).with_options(options2)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 3b9d3a639d..46ce191f7b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -86,6 +86,18 @@ class Dataset(object):
 
     raise NotImplementedError("Dataset._inputs")
 
+  def options(self):
+    """Returns the options for this dataset.
+
+    Returns:
+      A `tf.data.Options` object representing the dataset options.
+    """
+    for input_dataset in self._inputs():
+      options = input_dataset.options()
+      if options is not None:
+        return options
+    return Options()
+
   def make_initializable_iterator(self, shared_name=None):
     """Creates an `Iterator` for enumerating the elements of this dataset.
 
@@ -114,6 +126,13 @@ class Dataset(object):
       raise RuntimeError(
           "dataset.make_initializable_iterator is not supported when eager "
           "execution is enabled.")
+    dataset = self
+    options = self.options()
+    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+    if static_optimizations:
+      dataset = _OptimizeDataset(dataset, static_optimizations)
+    if options.experimental_autotune:
+      dataset = _ModelDataset(dataset)
     if shared_name is None:
       shared_name = ""
     if compat.forward_compatible(2018, 8, 3):
@@ -123,11 +142,12 @@ class Dataset(object):
       iterator_resource = gen_dataset_ops.iterator(
           container="", shared_name=shared_name, **flat_structure(self))
     with ops.colocate_with(iterator_resource):
-      initializer = gen_dataset_ops.make_iterator(self._as_variant_tensor(),
-                                                  iterator_resource)
+      initializer = gen_dataset_ops.make_iterator(
+          dataset._as_variant_tensor(),  # pylint: disable=protected-access
+          iterator_resource)
     return iterator_ops.Iterator(iterator_resource, initializer,
-                                 self.output_types, self.output_shapes,
-                                 self.output_classes)
+                                 dataset.output_types, dataset.output_shapes,
+                                 dataset.output_classes)
 
   def __iter__(self):
     """Creates an `Iterator` for enumerating the elements of this dataset.
@@ -162,7 +182,14 @@ class Dataset(object):
     # a 0-argument function.
     @function.Defun(capture_by_value=True)
     def _make_dataset():
-      return self._as_variant_tensor()  # pylint: disable=protected-access
+      dataset = self
+      options = self.options()
+      static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+      if static_optimizations:
+        dataset = _OptimizeDataset(dataset, static_optimizations)
+      if options.experimental_autotune:
+        dataset = _ModelDataset(dataset)
+      return dataset._as_variant_tensor()  # pylint: disable=protected-access
 
     try:
       _make_dataset.add_to_graph(ops.get_default_graph())
@@ -1325,6 +1352,146 @@ class Dataset(object):
         output_shapes,
         output_classes)
 
+  def with_options(self, options):
+    """Returns a new `tf.data.Dataset` with the given options set.
+
+    The options are "global" in the sense they apply to the entire input
+    pipeline in which the `with_options` transformation is used. If options are
+    set multiple times, they are merged if possible (see
+    `tf.data.Options.merge()` for details).
+
+    Args:
+      options: A `tf.data.Options` that identifies the options the use.
+
+    Returns:
+      Dataset: A `Dataset` with the given options.
+
+    Raises:
+      ValueError: if options are set more than once
+    """
+    return _OptionsDataset(self, options)
+
+
+@tf_export("data.Options")
+class Options(object):
+  """Represents options for tf.data.Dataset.
+
+  An `Options` object can be for instance used to control which static
+  optimizations to apply or whether to use performance modeling to dynamically
+  tune the parallelism of operations such as `tf.data.Dataset.map` or
+  `tf.data.Dataset.interleave`.
+  """
+  for _name, _ty, _docstring in [
+      ("experimental_autotune", bool,
+       "Whether to dynamically adjust the values of tunable parameters (e.g. "
+       "degrees of parallelism)."),
+      ("experimental_filter_fusion", bool,
+       "Whether to fuse filter transformations."),
+      ("experimental_hoist_random_uniform", bool,
+       "Whether to hoist `tf.random_uniform()` ops out of map transformations."
+      ),
+      ("experimental_latency_all_edges", bool,
+       "Whether to add latency measurements on all edges."),
+      ("experimental_map_and_batch_fusion", bool,
+       "Whether to fuse map and batch transformations."),
+      ("experimental_map_and_filter_fusion", bool,
+       "Whether to fuse map and filter transformations."),
+      ("experimental_map_fusion", bool, "Whether to fuse map transformations."),
+      ("experimental_map_parallelization", bool,
+       "Whether to parallelize stateless map transformations."),
+      ("experimental_map_vectorization", bool,
+       "Whether to vectorize map transformations."),
+      ("experimental_noop_elimination", bool,
+       "Whether to eliminate no-op transformations."),
+      ("experimental_shuffle_and_repeat_fusion", bool,
+       "Whether to fuse shuffle and repeat transformations."),
+  ]:
+
+    def _make_getter(name):  # pylint: disable=no-self-argument
+
+      def getter(self):
+        return getattr(self, "_" + name)
+
+      return getter
+
+    def _make_setter(name, ty):  # pylint: disable=no-self-argument
+
+      def setter(self, value):
+        if not isinstance(value, ty):
+          raise TypeError(
+              "Attempting to set the option %s to incompatible value: %r" %
+              (name, value))
+        setattr(self, "_" + name, value)
+
+      return setter
+
+    vars()["_" + _name] = None
+    vars()[_name] = property(
+        _make_getter(_name), _make_setter(_name, _ty), None, _docstring)
+
+  def __init__(self):
+    pass
+
+  def __eq__(self, other):
+    if isinstance(other, self.__class__):
+      return self.__dict__ == other.__dict__
+    else:
+      return False
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def _static_optimizations(self):
+    """Produces the list of enabled static optimizations."""
+    experimental_optimizations = [
+        "filter_fusion", "hoist_random_uniform", "latency_all_edges",
+        "map_and_batch_fusion", "map_and_filter_fusion", "map_fusion",
+        "map_parallelization", "map_vectorization", "noop_elimination",
+        "shuffle_and_repeat_fusion"
+    ]
+    result = []
+    for exp_opt in experimental_optimizations:
+      if getattr(self, "experimental_" + exp_opt):
+        result.append(exp_opt)
+    return result
+
+  def merge(self, options):
+    """Merges itself with the given `tf.data.Options`.
+
+    The given `tf.data.Options` can be merged as long as there does not exist an
+    attribute that is set to different values in `self` and `options`.
+
+    Args:
+      options: a `tf.data.Options` to merge with
+
+    Raises:
+      ValueError: if the given `tf.data.Options` cannot be merged
+
+    Returns:
+      New `tf.data.Options()` object which is the result of merging self with
+      the input `tf.data.Options`.
+    """
+    result = Options()
+    for other in [self, options]:
+      for name in [
+          "experimental_autotune", "experimental_filter_fusion",
+          "experimental_hoist_random_uniform", "experimental_latency_all_edges",
+          "experimental_map_and_batch_fusion",
+          "experimental_map_and_filter_fusion", "experimental_map_fusion",
+          "experimental_map_parallelization", "experimental_map_vectorization",
+          "experimental_noop_elimination",
+          "experimental_shuffle_and_repeat_fusion"
+      ]:
+        this = getattr(result, name)
+        that = getattr(other, name)
+        if that is not None:
+          if this is None:
+            setattr(result, name, that)
+          elif this != that:
+            raise ValueError(
+                "Cannot merge incompatible values of option: %s" % (name))
+    return result
+
 
 class DatasetSource(Dataset):
   """Abstract class representing a dataset with no inputs."""
@@ -1664,6 +1831,9 @@ class StructuredFunctionWrapper(object):
           flat_classes.append(component)
           flat_shapes.append(component)
           flat_types.append(component)
+          if t.options() is not None:  # pylint: disable=protected-access
+            warnings.warn("Encountered a nested dataset with options. These "
+                          "options will not be applied to the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
@@ -2703,3 +2873,91 @@ class WindowDataset(UnaryDataset):
   @property
   def output_types(self):
     return self._output_types
+
+
+class _OptionsDataset(UnaryDataset):
+  """An identity `Dataset` that stores options."""
+
+  def __init__(self, input_dataset, options):
+    super(_OptionsDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._options = input_dataset.options()
+    if self._options:
+      self._options = self._options.merge(options)
+    else:
+      self._options = options
+
+  def _as_variant_tensor(self):
+    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+
+  def options(self):
+    return self._options
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _ModelDataset(UnaryDataset):
+  """A `Dataset` that acts as an identity, and models performance."""
+
+  def __init__(self, input_dataset):
+    """See `optimize()` for details."""
+    super(_ModelDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.model_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _OptimizeDataset(UnaryDataset):
+  """A `Dataset` that acts as an identity, and applies optimizations."""
+
+  def __init__(self, input_dataset, optimizations):
+    """See `optimize()` for details."""
+    super(_OptimizeDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    if optimizations is None:
+      optimizations = []
+    self._optimizations = ops.convert_to_tensor(
+        optimizations, dtype=dtypes.string, name="optimizations")
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.optimize_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._optimizations,
+        **flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
index 825afb622f..8b7f63e43e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
@@ -78,6 +78,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -118,6 +122,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
index cdad5f6360..a7bfa82c65 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
new file mode 100644
index 0000000000..d15dccc173
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
@@ -0,0 +1,57 @@
+path: "tensorflow.data.Options"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Options\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "experimental_autotune"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_hoist_random_uniform"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_latency_all_edges"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_batch_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_parallelization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_vectorization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_noop_elimination"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_shuffle_and_repeat_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
index df41bff1b5..7b7a9ebaf0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
index 028bcc2ce9..2817f900e1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
index 0c0405ee02..2520e28a3c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
index bce0be4b17..1dd53b1eab 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
index 8aeae92d96..8fdd9dc52e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
index e205157523..3023276a1d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "Iterator"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Options"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TFRecordDataset"
     mtype: "<class \'abc.ABCMeta\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
index 825afb622f..8b7f63e43e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
@@ -78,6 +78,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -118,6 +122,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
index cdad5f6360..a7bfa82c65 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
new file mode 100644
index 0000000000..d15dccc173
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
@@ -0,0 +1,57 @@
+path: "tensorflow.data.Options"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Options\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "experimental_autotune"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_hoist_random_uniform"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_latency_all_edges"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_batch_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_parallelization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_vectorization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_noop_elimination"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_shuffle_and_repeat_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
index df41bff1b5..7b7a9ebaf0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
index 028bcc2ce9..2817f900e1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
index 0c0405ee02..2520e28a3c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
index bce0be4b17..1dd53b1eab 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
index 8aeae92d96..8fdd9dc52e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
index e205157523..3023276a1d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "Iterator"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Options"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TFRecordDataset"
     mtype: "<class \'abc.ABCMeta\'>"
-- 
GitLab


From bacf1949f92bb1daa9e5c8a31cc6924e532551e9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 17:33:55 -0700
Subject: [PATCH 0989/1357] [XLA] Add kAllToAll and kCollectivePermute to
 EffectiveOperandPrecisionIsOutputPrecision list.

PiperOrigin-RevId: 215311766
---
 tensorflow/compiler/xla/service/bfloat16_support.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc
index 23645346e6..5b48f10505 100644
--- a/tensorflow/compiler/xla/service/bfloat16_support.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_support.cc
@@ -78,8 +78,10 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision(
     const HloInstruction& hlo, int64 operand_index) {
   switch (hlo.opcode()) {
     case HloOpcode::kAbs:
+    case HloOpcode::kAllToAll:
     case HloOpcode::kBroadcast:
     case HloOpcode::kClamp:
+    case HloOpcode::kCollectivePermute:
     case HloOpcode::kConcatenate:
     case HloOpcode::kConvert:
     case HloOpcode::kCopy:
-- 
GitLab


From beede8525be5386451bf0098992c37416d1864db Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 1 Oct 2018 17:45:22 -0700
Subject: [PATCH 0990/1357] Make Keras/TPU more robust to closed TF sessions.

PiperOrigin-RevId: 215313156
---
 .../contrib/tpu/python/tpu/keras_support.py   | 278 ++++++++++--------
 1 file changed, 155 insertions(+), 123 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 696656e840..a3a7fd8bb0 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -46,6 +46,7 @@ from __future__ import print_function
 
 import abc
 import collections
+import contextlib
 import re
 import sys
 import time
@@ -94,21 +95,56 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 
 
+# TODO(b/114775106): temporary shim to optionally initialize the TPU
+# This increases the odds our session is initialized, but shouldn't be needed.
+def _maybe_initialize_tpu(session):
+  """Initialize the TPU if it has not already been initialized."""
+  try:
+
+    def test_op():
+      return constant_op.constant(1) + constant_op.constant(1)
+
+    session.run(tpu.rewrite(test_op))
+  except errors.FailedPreconditionError as _:
+    session.run(tpu.initialize_system())
+
+
+@contextlib.contextmanager
+def _tpu_session_context():
+  """Initialize the TPU and cleans cache entries for bad sessions."""
+  try:
+    _maybe_initialize_tpu(K.get_session())
+    yield
+  except (errors.FailedPreconditionError, errors.AbortedError) as e:
+    K.clear_session()
+    raise Exception("""
+An error occurred connecting or initializing your TPU.
+
+The session has been reset. re-run keras_to_tpu_model to create a new session.
+""" + e)
+
+
 def setup_tpu_session(cluster_resolver):
   """Construct or return a `tf.Session` connected to the given cluster."""
   master = cluster_resolver.master()
 
   # Use the existing session if we're already connected to this TPU
-  if (K.get_session()._target == master and
-      getattr(K.get_session(), '_tpu_initialized', None)):
-    return
+  # N.B K.get_session() is a non-trivial operation, and may fail if the remote
+  # session has been reset.
+  try:
+    default_session = K.get_session()
+    if (default_session._target == master and
+        getattr(default_session, '_tpu_initialized', None)):
+      return
+  except errors.AbortedError as _:
+    # We lost the remote session and need to re-initialize.
+    logging.warning('Lost remote session: creating a new session.')
 
   cluster_spec = cluster_resolver.cluster_spec()
   config = config_pb2.ConfigProto(isolate_session_state=True)
   if cluster_spec:
     config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
 
-  logging.info('Initialize')
   tpu_session = tf_session.Session(target=master, config=config)
   tpu_session.run(tpu.initialize_system())
   tpu_session._tpu_initialized = True
@@ -1391,97 +1427,74 @@ class KerasTPUModel(models.Model):
       raise EnvironmentError('KerasTPUModel currently does not support eager '
                              'mode.')
 
-    assert not self._numpy_to_infeed_manager_list  # Ensure empty.
-
-    infeed_managers = []  # Managers to clean up at the end of the fit call.
-    if isinstance(x, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(x):
-      with ops.device('/job:%s/device:CPU:0' %
-                      self._tpu_assignment.worker_name):
-        dataset = x()
-        if steps_per_epoch is None:
-          raise ValueError('When using tf.data as input to a model, you '
-                           'should specify the steps_per_epoch argument.')
-        if y is not None:
-          raise ValueError('When using tf.data as input to a model, y must be '
-                           'None')
-        infeed_manager = TPUDatasetInfeedManager(
-            dataset, self._tpu_assignment, model_fn_lib.ModeKeys.TRAIN)
+    with _tpu_session_context():
+      assert not self._numpy_to_infeed_manager_list  # Ensure empty.
+
+      infeed_managers = []  # Managers to clean up at the end of the fit call.
+      if isinstance(x, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(x):
+        with ops.device(
+            '/job:%s/device:CPU:0' % self._tpu_assignment.worker_name):
+          dataset = x()
+          if steps_per_epoch is None:
+            raise ValueError('When using tf.data as input to a model, you '
+                             'should specify the steps_per_epoch argument.')
+          if y is not None:
+            raise ValueError('When using tf.data as input to a model, y must '
+                             'be None')
+          infeed_manager = TPUDatasetInfeedManager(
+              dataset, self._tpu_assignment, model_fn_lib.ModeKeys.TRAIN)
+          # Use dummy numpy inputs for the rest of Keras' shape checking. We
+          # intercept them when building the model.
+          x = infeed_manager.dummy_x
+          y = infeed_manager.dummy_y
+          infeed_managers.append((x, infeed_manager))
+
+      if isinstance(validation_data, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(validation_data):
+        dataset = validation_data()
+        if validation_steps is None:
+          raise ValueError('When using tf.data as validation for a model, you '
+                           'should specify the validation_steps argument.')
+        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
+                                                 model_fn_lib.ModeKeys.EVAL)
         # Use dummy numpy inputs for the rest of Keras' shape checking. We
         # intercept them when building the model.
-        x = infeed_manager.dummy_x
-        y = infeed_manager.dummy_y
-        infeed_managers.append((x, infeed_manager))
+        val_x = infeed_manager.dummy_x
+        val_y = infeed_manager.dummy_y
+        infeed_managers.append((val_x, infeed_manager))
+        validation_data = (val_x, val_y)
 
-    if isinstance(validation_data, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(validation_data):
-      dataset = validation_data()
-      if validation_steps is None:
-        raise ValueError('When using tf.data as validation for a model, you '
-                         'should specify the validation_steps argument.')
-      infeed_manager = TPUDatasetInfeedManager(
-          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
-      # Use dummy numpy inputs for the rest of Keras' shape checking. We
-      # intercept them when building the model.
-      val_x = infeed_manager.dummy_x
-      val_y = infeed_manager.dummy_y
-      infeed_managers.append((val_x, infeed_manager))
-      validation_data = (val_x, val_y)
-
-    self._numpy_to_infeed_manager_list = infeed_managers
-    try:
-      if not kwargs.get('_pipeline', True):
-        logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
-                     kwargs['_pipeline'])
-        kwargs.pop('_pipeline')
-        return super(KerasTPUModel, self).fit(
-            x,
-            y,
-            batch_size,
-            epochs,
-            verbose,
-            callbacks,
-            validation_split,
-            validation_data,
-            shuffle,
-            class_weight,
-            sample_weight,
-            initial_epoch,
-            steps_per_epoch,
-            validation_steps,
-            **kwargs)
-      return self._pipeline_fit(
-          x,
-          y,
-          batch_size,
-          epochs,
-          verbose,
-          callbacks,
-          validation_split,
-          validation_data,
-          shuffle,
-          class_weight,
-          sample_weight,
-          initial_epoch,
-          steps_per_epoch,
-          validation_steps,
-          **kwargs)
-    finally:
-      self._numpy_to_infeed_manager_list = []
+      self._numpy_to_infeed_manager_list = infeed_managers
+      try:
+        if not kwargs.get('_pipeline', True):
+          logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
+                       kwargs['_pipeline'])
+          kwargs.pop('_pipeline')
+          return super(KerasTPUModel, self).fit(
+              x, y, batch_size, epochs, verbose, callbacks, validation_split,
+              validation_data, shuffle, class_weight, sample_weight,
+              initial_epoch, steps_per_epoch, validation_steps, **kwargs)
+        return self._pipeline_fit(x, y, batch_size, epochs, verbose, callbacks,
+                                  validation_split, validation_data, shuffle,
+                                  class_weight, sample_weight, initial_epoch,
+                                  steps_per_epoch, validation_steps, **kwargs)
+      finally:
+        self._numpy_to_infeed_manager_list = []
 
   def evaluate(self,
                x=None,
@@ -1492,37 +1505,38 @@ class KerasTPUModel(models.Model):
                steps=None):
     assert not self._numpy_to_infeed_manager_list  # Ensure empty.
 
-    infeed_managers = []  # Managers to clean up at the end of the fit call.
-    if isinstance(x, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(x):
-      dataset = x()
-      if steps is None:
-        raise ValueError('When using tf.data as input to a model, you '
-                         'should specify the steps argument.')
-      if y is not None:
-        raise ValueError('When using tf.data as input to a model, y must be '
-                         'None')
-      infeed_manager = TPUDatasetInfeedManager(
-          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
-      # Use dummy numpy inputs for the rest of Keras' shape checking. We
-      # intercept them when building the model.
-      x = infeed_manager.dummy_x
-      y = infeed_manager.dummy_y
-      infeed_managers.append((x, infeed_manager))
-
-    self._numpy_to_infeed_manager_list = infeed_managers
-    try:
-      return super(KerasTPUModel, self).evaluate(x, y, batch_size, verbose,
-                                                 sample_weight, steps)
-    finally:
-      self._numpy_to_infeed_manager_list = []
+    with _tpu_session_context():
+      infeed_managers = []  # Managers to clean up at the end of the fit call.
+      if isinstance(x, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(x):
+        dataset = x()
+        if steps is None:
+          raise ValueError('When using tf.data as input to a model, you '
+                           'should specify the steps argument.')
+        if y is not None:
+          raise ValueError('When using tf.data as input to a model, y must be '
+                           'None')
+        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
+                                                 model_fn_lib.ModeKeys.EVAL)
+        # Use dummy numpy inputs for the rest of Keras' shape checking. We
+        # intercept them when building the model.
+        x = infeed_manager.dummy_x
+        y = infeed_manager.dummy_y
+        infeed_managers.append((x, infeed_manager))
+
+      self._numpy_to_infeed_manager_list = infeed_managers
+      try:
+        return super(KerasTPUModel, self).evaluate(x, y, batch_size, verbose,
+                                                   sample_weight, steps)
+      finally:
+        self._numpy_to_infeed_manager_list = []
 
   def _pipeline_fit(self, x, y, batch_size, epochs, verbose, callbacks,
                     validation_split, validation_data, shuffle, class_weight,
@@ -1910,6 +1924,24 @@ class KerasTPUModel(models.Model):
 
     return val_x, val_y, val_sample_weights
 
+  def predict(self,
+              x,
+              batch_size=None,
+              verbose=0,
+              steps=None,
+              max_queue_size=10,
+              workers=1,
+              use_multiprocessing=False):
+    with _tpu_session_context():
+      return super(KerasTPUModel, self).predict(
+          x,
+          batch_size=batch_size,
+          verbose=verbose,
+          steps=steps,
+          max_queue_size=max_queue_size,
+          workers=workers,
+          use_multiprocessing=use_multiprocessing)
+
   @property
   def optimizer(self):
     if self._tpu_model:
-- 
GitLab


From 991f06fd50fc73285ce415d57f720994c2b2e861 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 1 Oct 2018 19:42:12 -0700
Subject: [PATCH 0991/1357] [XLA] Migrate from gtl::FlatSet to
 absl::flat_hash_set

PiperOrigin-RevId: 215324035
---
 tensorflow/compiler/jit/BUILD                 |  2 +
 tensorflow/compiler/jit/deadness_analysis.cc  | 10 ++--
 .../jit/encapsulate_subgraphs_pass.cc         |  7 +--
 .../jit/encapsulate_xla_computations_pass.cc  | 10 ++--
 .../compiler/jit/mark_for_compilation_pass.cc |  6 +--
 .../compiler/jit/partially_decluster_pass.cc  |  7 +--
 .../jit/resource_operation_safety_analysis.cc |  4 +-
 tensorflow/compiler/tests/BUILD               |  1 +
 tensorflow/compiler/tests/randomized_tests.cc | 14 +++---
 tensorflow/compiler/xla/client/BUILD          |  1 +
 tensorflow/compiler/xla/client/xla_builder.cc |  4 +-
 tensorflow/compiler/xla/client/xla_builder.h  |  4 +-
 tensorflow/compiler/xla/service/BUILD         | 27 +++++++++++
 .../xla/service/bfloat16_propagation.cc       |  9 ++--
 .../xla/service/bfloat16_propagation.h        | 11 +++--
 .../compiler/xla/service/buffer_assignment.cc | 48 ++++++++++---------
 .../compiler/xla/service/buffer_assignment.h  | 22 ++++-----
 .../compiler/xla/service/buffer_liveness.h    |  4 +-
 .../xla/service/buffer_value_containers.h     |  4 +-
 tensorflow/compiler/xla/service/call_graph.cc |  9 ++--
 tensorflow/compiler/xla/service/call_graph.h  | 10 ++--
 .../compiler/xla/service/copy_insertion.cc    |  6 +--
 tensorflow/compiler/xla/service/cpu/BUILD     |  1 +
 .../compiler/xla/service/cpu/ir_emitter.cc    |  8 ++--
 .../xla/service/cpu/tests/cpu_noalias_test.cc |  2 +-
 tensorflow/compiler/xla/service/gpu/BUILD     |  3 ++
 .../xla/service/gpu/gpu_copy_insertion.cc     |  2 +-
 .../xla/service/gpu/instruction_fusion.cc     |  5 +-
 .../xla/service/gpu/multi_output_fusion.cc    |  6 +--
 .../compiler/xla/service/heap_simulator.cc    | 13 ++---
 .../compiler/xla/service/heap_simulator.h     |  6 +--
 .../xla/service/hlo_alias_analysis.cc         |  9 ++--
 tensorflow/compiler/xla/service/hlo_buffer.cc |  2 +-
 .../compiler/xla/service/hlo_computation.cc   | 11 ++---
 .../compiler/xla/service/hlo_computation.h    |  2 +-
 tensorflow/compiler/xla/service/hlo_cse.cc    |  6 +--
 .../xla/service/hlo_dataflow_analysis.cc      |  9 ++--
 .../compiler/xla/service/hlo_domain_map.cc    |  3 +-
 .../compiler/xla/service/hlo_domain_map.h     |  4 +-
 .../xla/service/hlo_domain_metadata.h         |  8 ++--
 .../compiler/xla/service/hlo_instruction.cc   |  4 +-
 .../xla/service/hlo_memory_scheduler.cc       |  7 +--
 tensorflow/compiler/xla/service/hlo_module.cc |  9 ++--
 .../xla/service/hlo_module_group_util.cc      |  6 +--
 .../compiler/xla/service/hlo_pass_pipeline.cc |  6 +--
 .../xla/service/hlo_rematerialization.cc      |  3 +-
 .../xla/service/hlo_rematerialization.h       |  3 +-
 .../compiler/xla/service/hlo_schedule.cc      |  5 +-
 tensorflow/compiler/xla/service/hlo_value.cc  |  4 +-
 .../xla/service/indexed_array_analysis.cc     |  2 +-
 .../compiler/xla/service/layout_assignment.h  |  7 ++-
 tensorflow/compiler/xla/service/llvm_ir/BUILD |  1 +
 .../xla/service/llvm_ir/alias_analysis.cc     |  6 +--
 .../xla/service/llvm_ir/alias_analysis.h      |  1 -
 .../xla/service/multi_output_fusion.cc        |  6 +--
 .../compiler/xla/service/name_uniquer.h       |  4 +-
 .../compiler/xla/service/shape_inference.cc   |  4 +-
 .../compiler/xla/service/shaped_buffer.cc     |  4 +-
 .../xla/service/tuple_points_to_analysis.h    |  1 -
 .../while_loop_invariant_code_motion.cc       |  8 ++--
 .../xla/service/while_loop_simplifier.cc      |  3 +-
 tensorflow/compiler/xla/tests/BUILD           |  2 +-
 .../compiler/xla/tests/test_utils_test.cc     |  5 +-
 63 files changed, 235 insertions(+), 186 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index f20270931f..661b444a42 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -325,6 +325,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
@@ -407,6 +408,7 @@ cc_library(
         "//tensorflow/core/kernels:bounds_check",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index e63d4b7792..e0b9932d80 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -16,11 +16,11 @@ limitations under the License.
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
 // ALGORITHM OVERVIEW
@@ -298,7 +298,7 @@ class SymbolPredicate : public Predicate {
 
 template <typename FunctionTy>
 /*static*/ void Predicate::Visit(Predicate* p, const FunctionTy& func) {
-  gtl::FlatSet<Predicate*> visited;
+  absl::flat_hash_set<Predicate*> visited;
   std::vector<Predicate*> stack;
 
   stack.push_back(p);
@@ -467,7 +467,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
       is_and ? Predicate::Kind::kAnd : Predicate::Kind::kOr;
   Predicate::Kind other_pred_kind =
       is_and ? Predicate::Kind::kOr : Predicate::Kind::kAnd;
-  gtl::FlatSet<Predicate*> simplified_ops_set;
+  absl::flat_hash_set<Predicate*> simplified_ops_set;
   std::vector<Predicate*> simplified_ops;
   for (Predicate* op : operands) {
     // Simplify A&A => A and  A|A => A.
@@ -492,7 +492,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
   }
 
   // Simplify "A&~A=>False" and "A|~A=>True".
-  gtl::FlatSet<Predicate*> negated_ops;
+  absl::flat_hash_set<Predicate*> negated_ops;
   for (Predicate* op : simplified_ops) {
     if (op->kind() == Predicate::Kind::kNot) {
       negated_ops.insert(dynamic_cast<NotPredicate&>(*op).operand());
@@ -512,7 +512,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
   //
   // First find any predicates contained in all subops.
   std::vector<Predicate*> common_inner_operands;
-  gtl::FlatSet<Predicate*> common_inner_operands_set;
+  absl::flat_hash_set<Predicate*> common_inner_operands_set;
   for (Predicate* op : simplified_ops) {
     if (op->kind() != other_pred_kind) {
       common_inner_operands.clear();
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index d165341f21..da27f837e8 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
@@ -44,7 +45,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/public/session_options.h"
@@ -78,7 +78,8 @@ void SortControlInputs(GraphDef* gdef) {
 namespace {
 
 bool AreAllParentsGuaranteedConst(
-    const Node& n, const gtl::FlatSet<const Node*>& runtime_const_nodes) {
+    const Node& n,
+    const absl::flat_hash_set<const Node*>& runtime_const_nodes) {
   if (n.type_string() == "GuaranteeConst") {
     // If the current node is itself a cast-to-const, no need
     // to look at the incoming edges.
@@ -101,7 +102,7 @@ bool AreAllParentsGuaranteedConst(
 void MarkGuaranteedConstants(
     const Graph& graph,
     const std::vector<std::pair<const Node*, Node*>>& src_arg_pairs) {
-  gtl::FlatSet<const Node*> guaranteed_const_nodes;
+  absl::flat_hash_set<const Node*> guaranteed_const_nodes;
   std::vector<const Node*> srcs;
   srcs.reserve(src_arg_pairs.size());
   for (const auto& src_arg : src_arg_pairs) {
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index 755c364c62..2ce6fa73fc 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -15,13 +15,13 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -62,7 +62,7 @@ DataType EdgeType(const Edge* edge) {
 }
 
 // Adds the control inputs of `node` to `*deps`.
-void AddControlInputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+void AddControlInputs(const Node& node, absl::flat_hash_set<Node*>* deps) {
   for (const Edge* edge : node.in_edges()) {
     if (edge->IsControlEdge()) {
       deps->insert(edge->src());
@@ -71,7 +71,7 @@ void AddControlInputs(const Node& node, gtl::FlatSet<Node*>* deps) {
 }
 
 // Adds the control outputs of `node` to `*deps`.
-void AddControlOutputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+void AddControlOutputs(const Node& node, absl::flat_hash_set<Node*>* deps) {
   for (const Edge* edge : node.out_edges()) {
     if (edge->IsControlEdge()) {
       deps->insert(edge->dst());
@@ -246,7 +246,7 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Data and control inputs to the new XlaLaunch node.
     std::vector<std::pair<Node*, int>> data_inputs(num_inputs);
-    gtl::FlatSet<Node*> control_inputs;
+    absl::flat_hash_set<Node*> control_inputs;
     DataTypeVector arg_types(num_args);
 
     AddControlInputs(*launch, &control_inputs);
@@ -266,7 +266,7 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Outputs.
     const int num_outputs = launch->output_types().size();
-    gtl::FlatSet<Node*> control_outputs;
+    absl::flat_hash_set<Node*> control_outputs;
     std::vector<std::vector<std::pair<Node*, int>>> data_outputs(num_outputs);
     DataTypeVector output_types(num_outputs);
 
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index 133d982360..4f0c370e65 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
@@ -42,7 +43,6 @@ limitations under the License.
 #include "tensorflow/core/graph/control_flow.h"
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/public/version.h"
 
@@ -371,7 +371,7 @@ bool IsXlaFusable(const NodeDef& node) {
 Status FindCompilationCandidates(
     const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env,
     const std::function<bool(const Node*, const DeviceType&)>& is_compilable_fn,
-    OrderedNodeSet* candidates, gtl::FlatSet<Node*>* isolated_nodes) {
+    OrderedNodeSet* candidates, absl::flat_hash_set<Node*>* isolated_nodes) {
   OptimizerOptions opts;
   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
       new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION,
@@ -849,7 +849,7 @@ Status MarkForCompilationPass::RunImpl(
   Graph* graph = options.graph->get();
 
   OrderedNodeSet compilation_candidates;
-  gtl::FlatSet<Node*> isolated_nodes;
+  absl::flat_hash_set<Node*> isolated_nodes;
   TF_RETURN_IF_ERROR(FindCompilationCandidates(
       *graph, options.flib_def,
       (options.session_options != nullptr) ? options.session_options->env
diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc
index 10fc9e85d9..b1f9e9088f 100644
--- a/tensorflow/compiler/jit/partially_decluster_pass.cc
+++ b/tensorflow/compiler/jit/partially_decluster_pass.cc
@@ -15,17 +15,18 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/partially_decluster_pass.h"
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/xla_cluster_util.h"
 #include "tensorflow/compiler/tf2xla/const_analysis.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/framework/memory_types.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace tensorflow {
 namespace {
-Status FindNodesToDecluster(const Graph& graph, gtl::FlatSet<Node*>* result,
+Status FindNodesToDecluster(const Graph& graph,
+                            absl::flat_hash_set<Node*>* result,
                             absl::Span<Node* const> post_order) {
   // Find nodes that have at least one user outside their cluster that expects
   // hostmem output.  These nodes should be cloned to outside the cluster to
@@ -171,7 +172,7 @@ Status PartiallyDeclusterToRemoveDeviceToHostCopies(Graph* graph) {
   GetPostOrder(*graph, &post_order, /*stable_comparator=*/NodeComparatorName(),
                /*edge_filter=*/NotBackedge);
 
-  gtl::FlatSet<Node*> nodes_to_partially_decluster;
+  absl::flat_hash_set<Node*> nodes_to_partially_decluster;
   TF_RETURN_IF_ERROR(
       FindNodesToDecluster(*graph, &nodes_to_partially_decluster, post_order));
 
diff --git a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
index 657bb409db..e039d46ec8 100644
--- a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
+++ b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
@@ -82,6 +82,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/resource_operation_safety_analysis.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
@@ -89,7 +90,6 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/util/ptr_util.h"
 
@@ -176,7 +176,7 @@ string ResourceOpToString(const ResourceOp& resource_op) {
 // point.
 class ResourceOpSet {
  private:
-  using Impl = gtl::FlatSet<ResourceOp>;
+  using Impl = absl::flat_hash_set<ResourceOp>;
 
  public:
   ResourceOpSet() = default;
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 3cf74fa788..822fedf121 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1105,6 +1105,7 @@ cc_library(
         "//tensorflow/core:test",
         "//tensorflow/core:testlib",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index bddda6f302..7a96f4c25c 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -45,6 +45,7 @@ limitations under the License.
 #include <random>
 #include <unordered_map>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/jit/defs.h"
@@ -63,7 +64,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/public/session_options.h"
@@ -457,7 +457,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
   Tensor tensor(dtype, TensorShape(shape));
   switch (dtype) {
     case DT_FLOAT: {
-      gtl::FlatSet<float> already_generated;
+      absl::flat_hash_set<float> already_generated;
       std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
       test::FillFn<float>(&tensor, [&](int i) -> float {
         float generated;
@@ -470,7 +470,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_DOUBLE: {
-      gtl::FlatSet<double> already_generated;
+      absl::flat_hash_set<double> already_generated;
       std::uniform_real_distribution<double> distribution(-1.0, 1.0);
       test::FillFn<double>(&tensor, [&](int i) -> double {
         double generated;
@@ -483,7 +483,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_COMPLEX64: {
-      gtl::FlatSet<std::pair<float, float>> already_generated;
+      absl::flat_hash_set<std::pair<float, float>> already_generated;
       std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
       test::FillFn<complex64>(&tensor, [&](int i) {
         complex64 generated;
@@ -500,7 +500,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_INT32: {
-      gtl::FlatSet<int32> already_generated;
+      absl::flat_hash_set<int32> already_generated;
       std::uniform_int_distribution<int32> distribution(-(1 << 20), 1 << 20);
       test::FillFn<int32>(&tensor, [&](int i) -> int32 {
         int32 generated;
@@ -513,7 +513,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_INT64: {
-      gtl::FlatSet<int64> already_generated;
+      absl::flat_hash_set<int64> already_generated;
       std::uniform_int_distribution<int64> distribution(-(1LL << 40),
                                                         1LL << 40);
       test::FillFn<int64>(&tensor, [&](int i) -> int64 {
@@ -527,7 +527,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_BOOL: {
-      gtl::FlatSet<bool> already_generated;
+      absl::flat_hash_set<bool> already_generated;
       std::bernoulli_distribution distribution;
       test::FillFn<bool>(&tensor, [&](int i) -> bool {
         bool generated;
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index 1191cff109..dc097f3696 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -221,6 +221,7 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 5277de6a85..e0ec91dba1 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/mutex.h"
 
 namespace xla {
@@ -2290,7 +2290,7 @@ StatusOr<XlaComputation> XlaBuilder::BuildConstantSubGraph(
   // also a valid dependency order). The related ops will be added to the
   // subgraph in the same order.
   std::set<int64> related_ops;
-  tensorflow::gtl::FlatSet<int64> related_calls;  // Related computations.
+  absl::flat_hash_set<int64> related_calls;  // Related computations.
   std::queue<int64> worklist;
   worklist.push(root->id());
   related_ops.insert(root->id());
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index b7295e8a53..cd0d5ca5d3 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/padding.h"
@@ -35,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stacktrace.h"
 #include "tensorflow/core/platform/types.h"
@@ -1035,7 +1035,7 @@ class XlaBuilder {
   std::map<int64, HloComputationProto> embedded_;
 
   // The unique parameter numbers.
-  tensorflow::gtl::FlatSet<int64> parameter_numbers_;
+  absl::flat_hash_set<int64> parameter_numbers_;
 
   // The metadata to attach to each op. This is structured as a "modal"-like
   // operation, in order to simplify client code (and not sprinkle this metadata
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 8da6364786..13803f5ebe 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -147,6 +147,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -183,6 +184,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
@@ -336,6 +338,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -490,6 +493,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -781,6 +785,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -959,6 +964,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -995,6 +1001,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
     ],
@@ -1043,6 +1050,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -1136,6 +1144,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1230,6 +1239,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -1275,6 +1285,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -1348,6 +1359,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -1660,6 +1672,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
     ],
@@ -2064,6 +2077,7 @@ cc_library(
         ":logical_buffer",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -2099,6 +2113,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -2120,6 +2135,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2203,6 +2219,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2225,6 +2242,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
     ],
@@ -2286,6 +2304,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2343,6 +2362,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2370,6 +2390,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2487,6 +2508,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2616,6 +2638,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2655,6 +2678,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -2730,6 +2754,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -3300,6 +3325,7 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -3387,6 +3413,7 @@ cc_library(
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
index 58f78f8e24..002be9c970 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/bfloat16_propagation.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -81,7 +82,7 @@ void BFloat16Propagation::RevertIfFusionInternalBF16Changes(
   };
 
   auto root = fusion->fused_instructions_computation()->root_instruction();
-  tensorflow::gtl::FlatSet<const HloValue*> changed_root_buffers;
+  absl::flat_hash_set<const HloValue*> changed_root_buffers;
 
   auto root_changes_it = changes_to_bf16_.find(root);
   if (root_changes_it != changes_to_bf16_.end()) {
@@ -500,7 +501,7 @@ void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) {
 
 bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper(
     HloComputation* computation,
-    tensorflow::gtl::FlatSet<const HloComputation*>* visited_computations) {
+    absl::flat_hash_set<const HloComputation*>* visited_computations) {
   bool parameter_changed = false;
   auto insts = computation->MakeInstructionPostOrder();
   // Do the adjustment on each instruction in the computation in reverse
@@ -560,7 +561,7 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper(
       // another input parameter. A fixed point will be reached because the
       // parameters can only be changed from BF16 to F32, not the other way
       // around.
-      tensorflow::gtl::FlatSet<const HloComputation*> visited_in_while;
+      absl::flat_hash_set<const HloComputation*> visited_in_while;
       while (ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_condition(),
                                                          &visited_in_while) ||
              ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_body(),
@@ -587,7 +588,7 @@ void BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers(
     HloModule* module) {
   const auto& computations_topological_order =
       module->MakeComputationPostOrder();
-  tensorflow::gtl::FlatSet<const HloComputation*> resolved;
+  absl::flat_hash_set<const HloComputation*> resolved;
   for (auto comp_it = computations_topological_order.rbegin();
        comp_it != computations_topological_order.rend(); ++comp_it) {
     if (ContainsKey(resolved, *comp_it)) {
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h
index c74326f631..5fcaa15c83 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.h
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/bfloat16_support.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -82,7 +83,7 @@ class BFloat16Propagation : public HloModulePass {
 
   // The set of instructions to consider using bfloat16, computed in the forward
   // pass.
-  tensorflow::gtl::FlatSet<const HloInstruction*> consider_using_bfloat16_;
+  absl::flat_hash_set<const HloInstruction*> consider_using_bfloat16_;
 
   // ***************************
   // Functions called and state produced by the backward pass (from root to
@@ -111,12 +112,12 @@ class BFloat16Propagation : public HloModulePass {
 
   // The set of HloInstructions that have been visited in the
   // opportunity-finding pass.
-  tensorflow::gtl::FlatSet<const HloInstruction*>
+  absl::flat_hash_set<const HloInstruction*>
       instructions_visited_in_backward_pass_;
 
   // The set of HloComputations that have been visited in the
   // opportunity-finding pass.
-  tensorflow::gtl::FlatSet<const HloComputation*>
+  absl::flat_hash_set<const HloComputation*>
       computations_visited_in_backward_pass_;
 
   // ***************************
@@ -132,7 +133,7 @@ class BFloat16Propagation : public HloModulePass {
   // point is reached.
   bool ResolveInconsistencyOfAliasingBuffersHelper(
       HloComputation* computation,
-      tensorflow::gtl::FlatSet<const HloComputation*>* visited_computations);
+      absl::flat_hash_set<const HloComputation*>* visited_computations);
 
   // Makes the parameters of called computations match how they are called by
   // the given HLO.
@@ -183,7 +184,7 @@ class BFloat16Propagation : public HloModulePass {
                                       PrimitiveType target_type);
 
   // The set of F32 HLO values that must be kept in F32.
-  tensorflow::gtl::FlatSet<const HloValue*> values_that_must_be_kept_as_f32_;
+  absl::flat_hash_set<const HloValue*> values_that_must_be_kept_as_f32_;
 
   // Mapping from each HloComputation to the number of callers to it in the
   // module. Populated at the beginning of this pass.
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 3efa0b1dad..2c2d1626c2 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -43,9 +44,9 @@ namespace xla {
 namespace {
 
 using absl::flat_hash_map;
+using absl::flat_hash_set;
 using absl::StrAppend;
 using absl::StrAppendFormat;
-using ::tensorflow::gtl::FlatSet;
 using ::tensorflow::strings::HumanReadableNumBytes;
 
 template <typename T>
@@ -129,8 +130,8 @@ Status GatherComputationsByAllocationType(
 
   // Sets for quickly checking membership. Computations are returned in vectors
   // for stable iteration.
-  FlatSet<const HloComputation*> thread_local_set;
-  FlatSet<const HloComputation*> global_set;
+  flat_hash_set<const HloComputation*> thread_local_set;
+  flat_hash_set<const HloComputation*> global_set;
 
   while (!worklist.empty()) {
     auto worklist_front = worklist.front();
@@ -445,7 +446,7 @@ bool BufferAssignment::SharesSliceAtIndex(
 bool BufferAssignment::HaveDisjointSlices(const HloInstruction* hlo_a,
                                           const HloInstruction* hlo_b) const {
   using SliceSet =
-      FlatSet<BufferAllocation::Slice, BufferAllocation::Slice::Hasher>;
+      flat_hash_set<BufferAllocation::Slice, BufferAllocation::Slice::Hasher>;
   // Gets the slices all of instr's subshapes.  If any subshape doesn't have an
   // assigned slice, returns the empty set.
   auto collect_slices = [&](const HloInstruction* instr) -> SliceSet {
@@ -815,9 +816,9 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
 
 Status BufferAssigner::AssignBuffersForComputation(
     const HloComputation* computation, bool is_thread_local,
-    const FlatSet<const LogicalBuffer*>& colocated_buffers,
-    const FlatSet<BufferAllocation::Index>& colocated_allocations,
-    flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>*
+    const flat_hash_set<const LogicalBuffer*>& colocated_buffers,
+    const flat_hash_set<BufferAllocation::Index>& colocated_allocations,
+    flat_hash_map<const HloComputation*, flat_hash_set<const LogicalBuffer*>>*
         buffers_to_assign_sequentially,
     BufferAssignment* assignment) {
   // Buffers are sorted and assigned to BufferAllocations in decreasing order of
@@ -853,8 +854,8 @@ Status BufferAssigner::AssignBuffersForComputation(
     // buffers_to_assign_sequentially map, even if we end up with an empty set
     // of buffers. This ensures we can correctly determine whether to run
     // whole-module heap simulation.
-    buffers_to_assign_sequentially->emplace(computation,
-                                            FlatSet<const LogicalBuffer*>());
+    buffers_to_assign_sequentially->emplace(
+        computation, flat_hash_set<const LogicalBuffer*>());
   }
 
   // Sort the LogicalBuffers first by size. We assign the larger LogicalBuffers
@@ -1046,11 +1047,11 @@ Status BufferAssigner::AssignBuffersForComputation(
   return Status::OK();
 }
 
-flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+flat_hash_map<LogicalBuffer::Color, flat_hash_set<const LogicalBuffer*>,
               LogicalBuffer::Color::Hasher>
 BufferAssigner::SplitBuffersByColor(
-    const FlatSet<const LogicalBuffer*>& buffers) {
-  flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+    const flat_hash_set<const LogicalBuffer*>& buffers) {
+  flat_hash_map<LogicalBuffer::Color, flat_hash_set<const LogicalBuffer*>,
                 LogicalBuffer::Color::Hasher>
       color_map;
   for (auto buffer : buffers) {
@@ -1060,7 +1061,8 @@ BufferAssigner::SplitBuffersByColor(
 }
 
 Status BufferAssigner::AssignBuffersWithSequentialOrdering(
-    const flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>&
+    const flat_hash_map<const HloComputation*,
+                        flat_hash_set<const LogicalBuffer*>>&
         buffers_to_assign_sequentially,
     bool run_whole_module_heap_simulation, BufferAssignment* assignment) {
   // Run the sequence of instructions through the heap simulator.  The heuristic
@@ -1086,10 +1088,11 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
     // only live for the duration of their calling instructions.
     VLOG(1) << "Running whole-module heap simulation";
     HloSchedule schedule(&assignment->module());
-    FlatSet<const LogicalBuffer*> all_buffers_to_assign;
+    flat_hash_set<const LogicalBuffer*> all_buffers_to_assign;
     for (const auto& pair : buffers_to_assign_sequentially) {
       const HloComputation* computation = pair.first;
-      const FlatSet<const LogicalBuffer*>& buffers_to_assign = pair.second;
+      const flat_hash_set<const LogicalBuffer*>& buffers_to_assign =
+          pair.second;
       const std::vector<const HloInstruction*>* instruction_sequence =
           hlo_ordering.SequentialOrder(*computation);
       CHECK(instruction_sequence != nullptr) << computation->name();
@@ -1123,7 +1126,8 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
     VLOG(1) << "Running per-computation heap simulation";
     for (const auto& pair : buffers_to_assign_sequentially) {
       const HloComputation* computation = pair.first;
-      const FlatSet<const LogicalBuffer*>& buffers_to_assign = pair.second;
+      const flat_hash_set<const LogicalBuffer*>& buffers_to_assign =
+          pair.second;
       const std::vector<const HloInstruction*>* instruction_sequence =
           hlo_ordering.SequentialOrder(*computation);
       CHECK(instruction_sequence != nullptr) << computation->name();
@@ -1198,7 +1202,7 @@ std::vector<const LogicalBuffer*> ComputePeakMemoryLogicalBuffers(
 
   // Next gather the set of logical buffers live at the earliest point of
   // maximal live set size.
-  tensorflow::gtl::FlatSet<const LogicalBuffer*> live_buffers;
+  absl::flat_hash_set<const LogicalBuffer*> live_buffers;
   live_size = 0;
   for (const auto& event : heap_trace.events()) {
     const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id());
@@ -1588,8 +1592,8 @@ void BufferAssigner::BuildColocatedBufferSets(
 void BufferAssigner::AssignColocatedBufferSets(
     const std::vector<ColocatedBufferSet>& colocated_buffer_sets,
     BufferAssignment* assignment,
-    FlatSet<const LogicalBuffer*>* colocated_buffers,
-    FlatSet<BufferAllocation::Index>* colocated_allocations) {
+    flat_hash_set<const LogicalBuffer*>* colocated_buffers,
+    flat_hash_set<BufferAllocation::Index>* colocated_allocations) {
   for (const ColocatedBufferSet& colocated_buffer_set : colocated_buffer_sets) {
     BufferAllocation* allocation = nullptr;
     // Set 'entry_parameter_number' and 'entry_parameter_shape_idx' if entry
@@ -1662,8 +1666,8 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
   // Once b/32491382 enables module-level liveness analysis, we may be able
   // to assign colocated buffers (or at least reuse their allocation for
   // buffers outside of the set) in AssignBuffersForComputation.
-  FlatSet<const LogicalBuffer*> colocated_buffers;
-  FlatSet<BufferAllocation::Index> colocated_allocations;
+  flat_hash_set<const LogicalBuffer*> colocated_buffers;
+  flat_hash_set<BufferAllocation::Index> colocated_allocations;
   std::vector<ColocatedBufferSet> colocated_buffer_sets;
   BuildColocatedBufferSets(module, assignment->liveness(),
                            assignment->buffer_size_, &colocated_buffer_sets);
@@ -1681,7 +1685,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
 
   // First assign buffers for global computatations. Temporary buffers for
   // sequential computations are collected in 'buffers_to_assign_sequentially'.
-  flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>
+  flat_hash_map<const HloComputation*, flat_hash_set<const LogicalBuffer*>>
       buffers_to_assign_sequentially;
   for (auto* computation : global_computations) {
     TF_RETURN_IF_ERROR(AssignBuffersForComputation(
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h
index 9ba40617a3..899cd36e1f 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.h
+++ b/tensorflow/compiler/xla/service/buffer_assignment.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -554,11 +554,10 @@ class BufferAssigner {
   // true.
   Status AssignBuffersForComputation(
       const HloComputation* computation, bool is_thread_local,
-      const tensorflow::gtl::FlatSet<const LogicalBuffer*>& colocated_buffers,
-      const tensorflow::gtl::FlatSet<BufferAllocation::Index>&
-          colocated_allocations,
+      const absl::flat_hash_set<const LogicalBuffer*>& colocated_buffers,
+      const absl::flat_hash_set<BufferAllocation::Index>& colocated_allocations,
       absl::flat_hash_map<const HloComputation*,
-                          tensorflow::gtl::FlatSet<const LogicalBuffer*>>*
+                          absl::flat_hash_set<const LogicalBuffer*>>*
           buffers_to_assign_sequentially,
       BufferAssignment* assignment);
 
@@ -569,7 +568,7 @@ class BufferAssigner {
   // assuming all global computations are sequentially ordered.
   Status AssignBuffersWithSequentialOrdering(
       const absl::flat_hash_map<const HloComputation*,
-                                tensorflow::gtl::FlatSet<const LogicalBuffer*>>&
+                                absl::flat_hash_set<const LogicalBuffer*>>&
           buffers_to_assign_sequentially,
       bool run_whole_module_heap_simulation, BufferAssignment* assignment);
 
@@ -589,7 +588,7 @@ class BufferAssigner {
   // alias. Explicitly handling these colocated buffers is necessary because
   // points-to analysis is computation level scope and does not recognize
   // aliasing across computations (b/32491382).
-  using ColocatedBufferSet = tensorflow::gtl::FlatSet<const LogicalBuffer*>;
+  using ColocatedBufferSet = absl::flat_hash_set<const LogicalBuffer*>;
 
   // Returns a vector of ColocatedBufferSet objects, where each
   // ColocatedBufferSet aggregates a set of related LogicalBuffers from 'module'
@@ -604,8 +603,8 @@ class BufferAssigner {
   void AssignColocatedBufferSets(
       const std::vector<ColocatedBufferSet>& colocated_buffer_sets,
       BufferAssignment* assignment,
-      tensorflow::gtl::FlatSet<const LogicalBuffer*>* colocated_buffers,
-      tensorflow::gtl::FlatSet<BufferAllocation::Index>* colocated_allocations);
+      absl::flat_hash_set<const LogicalBuffer*>* colocated_buffers,
+      absl::flat_hash_set<BufferAllocation::Index>* colocated_allocations);
 
   // Adds the 'colocated_set' of buffers to 'colocated_buffer_sets', maintaining
   // the invariant that all sets in 'colocated_buffer_sets' are disjoint.
@@ -624,10 +623,9 @@ class BufferAssigner {
   // Split a set of buffers into several sets, each of which contains buffers
   // colored with the same color.
   absl::flat_hash_map<LogicalBuffer::Color,
-                      tensorflow::gtl::FlatSet<const LogicalBuffer*>,
+                      absl::flat_hash_set<const LogicalBuffer*>,
                       LogicalBuffer::Color::Hasher>
-  SplitBuffersByColor(
-      const tensorflow::gtl::FlatSet<const LogicalBuffer*>& buffers);
+  SplitBuffersByColor(const absl::flat_hash_set<const LogicalBuffer*>& buffers);
 
   // If true, buffer assignments assumes that input parameter buffers and output
   // buffers can be shared if their sizes match.
diff --git a/tensorflow/compiler/xla/service/buffer_liveness.h b/tensorflow/compiler/xla/service/buffer_liveness.h
index 2911bbcfbf..f939a426ea 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.h
+++ b/tensorflow/compiler/xla/service/buffer_liveness.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -101,7 +101,7 @@ class BufferLiveness {
   // Set of LogicalBuffers which are aliased in the output of other
   // instructions. For example, a LogicalBuffer which is inserted into a tuple
   // is considered to be aliased and will be in this set.
-  tensorflow::gtl::FlatSet<const LogicalBuffer*> aliased_buffers_;
+  absl::flat_hash_set<const LogicalBuffer*> aliased_buffers_;
 
   // LogicalBuffers that may be live out of the entry computation.
   PointsToSet::BufferSet maybe_live_out_buffers_;
diff --git a/tensorflow/compiler/xla/service/buffer_value_containers.h b/tensorflow/compiler/xla/service/buffer_value_containers.h
index 305914fca8..cc46af5eee 100644
--- a/tensorflow/compiler/xla/service/buffer_value_containers.h
+++ b/tensorflow/compiler/xla/service/buffer_value_containers.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -38,7 +38,7 @@ BufferValueCompactPointerSet ToBufferValueCompactPointerSet(
   return output;
 }
 
-using BufferValueFlatSet = tensorflow::gtl::FlatSet<const BufferValue*>;
+using BufferValueFlatSet = absl::flat_hash_set<const BufferValue*>;
 template <class LogicalBufferContainerT>
 BufferValueFlatSet ToBufferValueFlatSet(
     const LogicalBufferContainerT& logical_buffer_container) {
diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc
index 23b2a32709..bdd5069632 100644
--- a/tensorflow/compiler/xla/service/call_graph.cc
+++ b/tensorflow/compiler/xla/service/call_graph.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <queue>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -138,7 +139,7 @@ CallGraphNode& CallGraph::GetNode(const HloComputation* computation) {
 
 bool CallGraph::DominatesHelper(
     const HloComputation* a, const HloComputation* b,
-    tensorflow::gtl::FlatSet<const HloComputation*>* visited) const {
+    absl::flat_hash_set<const HloComputation*>* visited) const {
   if (a == b || ContainsKey(*visited, b)) {
     // The call graph is guaranteed to be acyclic so any previously visited node
     // we encounter was already determined to be dominated.
@@ -163,7 +164,7 @@ bool CallGraph::DominatesHelper(
 
 bool CallGraph::Dominates(const HloComputation* a,
                           const HloComputation* b) const {
-  tensorflow::gtl::FlatSet<const HloComputation*> visited;
+  absl::flat_hash_set<const HloComputation*> visited;
   return DominatesHelper(a, b, &visited);
 }
 
@@ -277,7 +278,7 @@ std::unique_ptr<CallGraph> CallGraph::Build(const HloModule* module) {
 
 Status CallGraph::VisitNodesInternal(
     const VisitorFunction& visitor_func, const CallGraphNode& node,
-    tensorflow::gtl::FlatSet<const CallGraphNode*>* visited) const {
+    absl::flat_hash_set<const CallGraphNode*>* visited) const {
   auto pair = visited->insert(&node);
   if (!pair.second) {
     // Node was not inserted. Node has already been visited.
@@ -294,7 +295,7 @@ Status CallGraph::VisitNodesInternal(
 
 Status CallGraph::VisitNodes(const VisitorFunction& visitor_func,
                              bool visit_unreachable_nodes) const {
-  tensorflow::gtl::FlatSet<const CallGraphNode*> visited;
+  absl::flat_hash_set<const CallGraphNode*> visited;
   if (visit_unreachable_nodes) {
     // Traverse from all roots in the call graph.
     for (const CallGraphNode& node : nodes()) {
diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h
index 0c2e9b99db..cb56f4789d 100644
--- a/tensorflow/compiler/xla/service/call_graph.h
+++ b/tensorflow/compiler/xla/service/call_graph.h
@@ -21,10 +21,10 @@ limitations under the License.
 #include <ostream>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -145,12 +145,12 @@ class CallGraphNode {
   // The computations called by this computation. The vector is used for a
   // stable ordering and the set enables fast membership testing.
   std::vector<HloComputation*> callees_;
-  tensorflow::gtl::FlatSet<HloComputation*> callee_set_;
+  absl::flat_hash_set<HloComputation*> callee_set_;
 
   // The computations which call this computation. The vector is used for a
   // stable ordering and the set enables fast membership testing.
   std::vector<HloComputation*> callers_;
-  tensorflow::gtl::FlatSet<HloComputation*> caller_set_;
+  absl::flat_hash_set<HloComputation*> caller_set_;
 
   // The call sites in this computation
   std::vector<CallSite> callsites_;
@@ -250,14 +250,14 @@ class CallGraph {
   // 'visited'.
   Status VisitNodesInternal(
       const VisitorFunction& visitor_func, const CallGraphNode& node,
-      tensorflow::gtl::FlatSet<const CallGraphNode*>* visited) const;
+      absl::flat_hash_set<const CallGraphNode*>* visited) const;
 
   // Recursive helper for computing whether 'a' dominates 'b' in the call
   // graph. 'b_ancestor' is the currently visited node (which starts at 'b'),
   // and 'visited' is the set of computations which have been visited.
   bool DominatesHelper(
       const HloComputation* a, const HloComputation* b,
-      tensorflow::gtl::FlatSet<const HloComputation*>* visited) const;
+      absl::flat_hash_set<const HloComputation*>* visited) const;
 
   // The HLO module represented by this call graph.
   const HloModule* module_ = nullptr;
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index 7f78412924..f35324aa35 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -904,7 +904,7 @@ class CopyRemover {
     // The heads of all the value lists. Each value list represents the HLO
     // values contained in a particular HLO buffer. The values in the list are
     // in dependency order.
-    tensorflow::gtl::FlatSet<const ValueNode*> value_lists_;
+    absl::flat_hash_set<const ValueNode*> value_lists_;
 
     // Copy removal requires fast access to the value list elements
     // corresponding to the source and destination values of the kCopy
@@ -1009,7 +1009,7 @@ Status CopyInsertion::AddSpecialCaseCopies(const CallGraph& call_graph,
     HloInstruction* root = computation->root_instruction();
 
     // Mark nondistinct/ambiguous indices.
-    tensorflow::gtl::FlatSet<const HloBuffer*> seen;
+    absl::flat_hash_set<const HloBuffer*> seen;
     ShapeUtil::ForEachSubshape(
         root->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) {
           std::vector<const HloBuffer*> buffers_at_index =
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 6a83909a3b..ae4c6e962d 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -291,6 +291,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 953a75c35f..a70abb117a 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
@@ -68,7 +69,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -1400,8 +1400,8 @@ static bool ReductionPreservesLayout(const HloInstruction& reduce) {
   // [0->0, 3->1].
   absl::flat_hash_map<int64, int64> unreduced_dim_map;
 
-  gtl::FlatSet<int64> reduced_dims(reduce.dimensions().begin(),
-                                   reduce.dimensions().end());
+  absl::flat_hash_set<int64> reduced_dims(reduce.dimensions().begin(),
+                                          reduce.dimensions().end());
 
   const Shape& operand_shape = reduce.operand(0)->shape();
   const Shape& result_shape = reduce.shape();
@@ -1977,7 +1977,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
   //
   // * Implement the memcpy within the innermost loop.
 
-  gtl::FlatSet<int64> inner_dims;
+  absl::flat_hash_set<int64> inner_dims;
   for (int64 dim : LayoutUtil::MinorToMajor(layout)) {
     if (operand->shape().dimensions(dim) != slice->shape().dimensions(dim)) {
       break;
diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
index 7af51db55a..b35fd9dad8 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
@@ -121,7 +121,7 @@ TEST_F(CpuNoAliasTest, Concat) {
     CHECK: %read_concat2_array = load {{.*}} !alias.scope [[concat1_noalias]], !noalias [[concat1_scope]]
     CHECK-DAG: [[buf_size32:![0-9]+]] = !{!"buffer:{{.*}} size:32
     CHECK-DAG: [[buf_size48:![0-9]+]] = !{!"buffer:{{.*}} size:48
-    CHECK-DAG: [[param_x_noalias]] = !{[[buf_size32]], [[buf_size48]]}
+    CHECK-DAG: [[param_x_noalias]] = !{[[buf_size48]], [[buf_size32]]}
     CHECK-DAG: [[concat1_scope]] = !{[[buf_size32]]}
     CHECK-DAG: [[concat1_noalias]] = !{[[buf_size48]]}
   )";
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index e65d3fa332..a838464cae 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -476,6 +476,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:instruction_fusion",
         "//tensorflow/compiler/xla/service:pattern_matcher",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -508,6 +509,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:multi_output_fusion",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -541,6 +543,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
index 79c74e7e8b..e2ab00ce41 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <set>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
index 4d5d8e99f8..b61f038739 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -125,8 +126,8 @@ bool IsIEEEFloatingPointScalarConstant(const HloInstruction* constant) {
   }
 
   // Compute the precise number of operands to the new fusion.
-  tensorflow::gtl::FlatSet<const HloInstruction*> operands(
-      a->operands().begin(), a->operands().end());
+  absl::flat_hash_set<const HloInstruction*> operands(a->operands().begin(),
+                                                      a->operands().end());
   operands.insert(b->operands().begin(), b->operands().end());
   // If there's an edge between `a` and `b`, don't count it: We're fusing that
   // producer -> consumer relationship.
diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
index c21f76f6eb..835924024b 100644
--- a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -101,7 +101,7 @@ bool GpuMultiOutputFusion::IsFusible(HloInstruction* instr) {
 
 int64 GpuMultiOutputFusion::GetProfit(HloInstruction* instr1,
                                       HloInstruction* instr2) {
-  tensorflow::gtl::FlatSet<HloInstruction*> in_list;
+  absl::flat_hash_set<HloInstruction*> in_list;
   for (auto instr : instr1->operands()) {
     if (!IsProfitableOperand(instr)) {
       continue;
@@ -148,7 +148,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
   bool changed = false;
   RecomputeReachability();
 
-  tensorflow::gtl::FlatSet<HloInstruction*> to_fuse;
+  absl::flat_hash_set<HloInstruction*> to_fuse;
   // Keep a list of the instructions to fuse after making all the fusion
   // decisions. We first aggressively add instructions to potential_fusion_list,
   // then filter out instructions that will be no longer fusible because of
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index 147776c8c4..b343305554 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -26,7 +27,7 @@ limitations under the License.
 namespace xla {
 
 using absl::flat_hash_map;
-using tensorflow::gtl::FlatSet;
+using absl::flat_hash_set;
 
 /*static*/
 StatusOr<int64> HeapSimulator::MinimumMemoryForModule(
@@ -116,9 +117,9 @@ Status HeapSimulator::RunComputation(
   // 'used_buffers' is the reverse map - it tracks which buffers were used by an
   // instruction, so that we can remove the instructions from a buffer's live
   // set after they are visited.
-  flat_hash_map<const BufferValue*, FlatSet<const HloInstruction*>>
+  flat_hash_map<const BufferValue*, flat_hash_set<const HloInstruction*>>
       live_buffers;
-  flat_hash_map<const HloInstruction*, FlatSet<const BufferValue*>>
+  flat_hash_map<const HloInstruction*, flat_hash_set<const BufferValue*>>
       used_buffers;
   auto add_user_to_buffer = [this, &live_buffers, &used_buffers](
                                 const HloInstruction* user,
@@ -216,7 +217,7 @@ Status HeapSimulator::RunComputation(
       VLOG(4) << "  Removing user " << instruction->name() << " from buffer "
               << operand_buffer->ToString();
       auto it = live_buffers.find(operand_buffer);
-      FlatSet<const HloInstruction*>* live_set = &it->second;
+      flat_hash_set<const HloInstruction*>* live_set = &it->second;
       live_set->erase(instruction);
       if (live_set->empty()) {
         live_buffers.erase(it);
@@ -238,7 +239,7 @@ Status HeapSimulator::RunComputation(
     // that we should assign.
 
     // Make sure each buffer get reused at most once.
-    FlatSet<const BufferValue*> reused_buffers;
+    flat_hash_set<const BufferValue*> reused_buffers;
     for (const BufferValue* buffer : buffers_defined_by_instruction) {
       if (IgnoreBuffer(buffer)) {
         continue;
@@ -326,7 +327,7 @@ Status HeapSimulator::RunComputation(
   to_free.reserve(live_buffers.size());
   for (const auto& buffer_pending : live_buffers) {
     const BufferValue* buffer = buffer_pending.first;
-    const FlatSet<const HloInstruction*>& pending = buffer_pending.second;
+    const flat_hash_set<const HloInstruction*>& pending = buffer_pending.second;
     CHECK_EQ(pending.size(), 1) << *buffer;
     CHECK(*pending.begin() == nullptr) << *buffer;
     to_free.push_back(buffer);
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index a5bb3f81f7..b0295a6163 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/buffer_value_containers.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -197,8 +197,8 @@ class HeapSimulator {
       shared_buffers_;
 
   // Hold some sets for error-checking the sequence of Alloc and Free calls.
-  tensorflow::gtl::FlatSet<const BufferValue*> allocated_buffers_;
-  tensorflow::gtl::FlatSet<const BufferValue*> freed_buffers_;
+  absl::flat_hash_set<const BufferValue*> allocated_buffers_;
+  absl::flat_hash_set<const BufferValue*> freed_buffers_;
 
   // Debugging information filled in while the heap simulator runs.
   HeapSimulatorTrace debug_trace_;
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index b6e1f52cf5..c3da12e273 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -120,7 +121,7 @@ class BufferValueMap {
   }
 
   // Return a set of all the values in the given buffer.
-  const tensorflow::gtl::FlatSet<const HloValue*>& GetValuesInBuffer(
+  const absl::flat_hash_set<const HloValue*>& GetValuesInBuffer(
       BufferNumber buffer_number) const {
     return buffers_.at(buffer_number);
   }
@@ -143,7 +144,7 @@ class BufferValueMap {
   // Move the given value into the given buffer.
   void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) {
     BufferNumber old_buffer_number = value_to_buffer_number_.at(&value);
-    tensorflow::gtl::FlatSet<const HloValue*>& old_value_set =
+    absl::flat_hash_set<const HloValue*>& old_value_set =
         buffers_.at(old_buffer_number);
     old_value_set.erase(&value);
     if (old_value_set.empty()) {
@@ -291,7 +292,7 @@ class BufferValueMap {
   const HloDataflowAnalysis& dataflow_;
 
   // A map containing the set of values contained in each buffer.
-  absl::flat_hash_map<BufferNumber, tensorflow::gtl::FlatSet<const HloValue*>>
+  absl::flat_hash_map<BufferNumber, absl::flat_hash_set<const HloValue*>>
       buffers_;
 
   // A map indicating which buffer each value is contained in.
@@ -351,7 +352,7 @@ bool HloAliasAnalysis::InstructionBuffersAreAmbiguous(
 
 bool HloAliasAnalysis::InstructionBuffersAreDistinct(
     const HloInstruction* instruction) const {
-  tensorflow::gtl::FlatSet<const HloBuffer*> buffers_seen;
+  absl::flat_hash_set<const HloBuffer*> buffers_seen;
   for (const auto& pair :
        dataflow_analysis_->GetInstructionValueSet(instruction)) {
     const HloValueSet& value_set = pair.second;
diff --git a/tensorflow/compiler/xla/service/hlo_buffer.cc b/tensorflow/compiler/xla/service/hlo_buffer.cc
index 6c11a073b7..9c3aa0e64d 100644
--- a/tensorflow/compiler/xla/service/hlo_buffer.cc
+++ b/tensorflow/compiler/xla/service/hlo_buffer.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 257dd5876f..6ef67ab0a8 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
@@ -40,7 +41,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -278,10 +278,9 @@ void HloComputation::set_root_instruction(HloInstruction* new_root_instruction,
 namespace {
 
 // Helper which builds a post order of the HLO call graph.
-void ComputeComputationPostOrder(
-    HloComputation* computation,
-    tensorflow::gtl::FlatSet<HloComputation*>* visited,
-    std::vector<HloComputation*>* post_order) {
+void ComputeComputationPostOrder(HloComputation* computation,
+                                 absl::flat_hash_set<HloComputation*>* visited,
+                                 std::vector<HloComputation*>* post_order) {
   if (visited->insert(computation).second) {
     for (auto* instruction : computation->instructions()) {
       for (HloComputation* called_computation :
@@ -416,7 +415,7 @@ std::vector<HloInstruction*> HloComputation::MakeInstructionPostOrder() const {
 
 std::vector<HloComputation*> HloComputation::MakeEmbeddedComputationsList()
     const {
-  tensorflow::gtl::FlatSet<HloComputation*> visited;
+  absl::flat_hash_set<HloComputation*> visited;
   std::vector<HloComputation*> post_order;
 
   // To avoid special handling of this computation, cast away const of
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index af929ac009..d87ab4bda1 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/iterator_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -41,7 +42,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc
index b59c9ba3ed..e602107cbe 100644
--- a/tensorflow/compiler/xla/service/hlo_cse.cc
+++ b/tensorflow/compiler/xla/service/hlo_cse.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/literal.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
 namespace xla {
@@ -137,8 +137,8 @@ StatusOr<bool> HloCSE::Run(HloModule* module) {
     // HLO instructions are grouped into equivalency classes by using the
     // cse_equal predicate defined above. This set holds a representative
     // instruction for each class.
-    tensorflow::gtl::FlatSet<HloInstruction*, decltype(&CseHash),
-                             decltype(cse_equal)>
+    absl::flat_hash_set<HloInstruction*, decltype(&CseHash),
+                        decltype(cse_equal)>
         representatives(/*N=*/computation->instruction_count() + 1, &CseHash,
                         cse_equal);
     for (auto instruction : computation->MakeInstructionPostOrder()) {
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 6a63681996..44cde4a3d2 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
@@ -91,7 +92,7 @@ HloDataflowAnalysis::HloDataflowAnalysis(
 
 bool HloDataflowAnalysis::AreTransitiveUsesElementwiseOrTuple(
     const HloInstruction* inst) {
-  tensorflow::gtl::FlatSet<const HloInstruction*> visited;
+  absl::flat_hash_set<const HloInstruction*> visited;
   absl::InlinedVector<const HloInstruction*, 4> stack;
   stack.push_back(inst);
   while (!stack.empty()) {
@@ -159,8 +160,8 @@ void HloDataflowAnalysis::MarkValueForDeletion(HloValue::Id value_id) {
 void HloDataflowAnalysis::DeleteMarkedValues() {
 #ifndef NDEBUG
   // Verify that no marked-for-deletion values are in any of the value sets.
-  tensorflow::gtl::FlatSet<HloValue::Id> id_set(value_ids_to_delete_.begin(),
-                                                value_ids_to_delete_.end());
+  absl::flat_hash_set<HloValue::Id> id_set(value_ids_to_delete_.begin(),
+                                           value_ids_to_delete_.end());
   for (const auto& pair : value_sets_) {
     const HloInstruction* instruction = pair.first;
     const InstructionValueSet& instruction_value_set = pair.second;
@@ -673,7 +674,7 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
 
 void HloDataflowAnalysis::Propagate() {
   std::queue<HloInstruction*> worklist;
-  tensorflow::gtl::FlatSet<HloInstruction*> workset;
+  absl::flat_hash_set<HloInstruction*> workset;
   auto add_to_worklist = [&worklist, &workset](HloInstruction* instruction) {
     if (workset.insert(instruction).second) {
       worklist.push(instruction);
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc
index 159c39d557..6ca1255ede 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -217,7 +218,7 @@ bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const {
 
 /* static */ std::vector<HloInstruction*>
 HloDomainMap::MakeNonDomainInstructions(
-    const tensorflow::gtl::FlatSet<HloInstruction*>& instruction_set,
+    const absl::flat_hash_set<HloInstruction*>& instruction_set,
     const InstructionOrderMap& instructions_order) {
   std::vector<HloInstruction*> instructions;
   instructions.reserve(instruction_set.size());
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h
index 8584bc021d..c8d581b746 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.h
@@ -20,13 +20,13 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -110,7 +110,7 @@ class HloDomainMap {
   // Out of an instruction set, returns a vector of all the ones which are not
   // a kDomain kind.
   static std::vector<HloInstruction*> MakeNonDomainInstructions(
-      const tensorflow::gtl::FlatSet<HloInstruction*>& instruction_set,
+      const absl::flat_hash_set<HloInstruction*>& instruction_set,
       const InstructionOrderMap& instructions_order);
 
   // Populates domain_metadata_id_ that maps each HloInstruction to the unique
diff --git a/tensorflow/compiler/xla/service/hlo_domain_metadata.h b/tensorflow/compiler/xla/service/hlo_domain_metadata.h
index 302807f816..d3c83c15ae 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_metadata.h
@@ -20,11 +20,11 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -42,7 +42,7 @@ class DomainMetadata {
     // operand/user pathways, without crossing a kDomain instruction of a given
     // kind. The reach_set can contain kDomain instructions of other kinds, if
     // two domains of different kind intersect each other.
-    tensorflow::gtl::FlatSet<HloInstruction*> reach_set;
+    absl::flat_hash_set<HloInstruction*> reach_set;
 
     // The same instructions in reach_set, but purged from kDomain instructions
     // and ordered according to their computation graph post-order, i.e.
@@ -55,8 +55,8 @@ class DomainMetadata {
     // whose dataflow enters the reach set (domain), while the exit_domains
     // contains the set of kDomain instructions whose dataflow exit the reach
     // set.
-    tensorflow::gtl::FlatSet<HloInstruction*> enter_domains;
-    tensorflow::gtl::FlatSet<HloInstruction*> exit_domains;
+    absl::flat_hash_set<HloInstruction*> enter_domains;
+    absl::flat_hash_set<HloInstruction*> exit_domains;
   };
 
   virtual ~DomainMetadata() = default;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5d5c9c7e58..0207f9ae3f 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/ascii.h"
@@ -44,7 +45,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/human_readable_json.h"
 #include "tensorflow/core/platform/logging.h"
@@ -1433,7 +1433,7 @@ int64 HloInstruction::operand_index(const HloInstruction* target) const {
 
 HloInstruction::InstructionVector HloInstruction::unique_operands() const {
   InstructionVector unique;
-  tensorflow::gtl::FlatSet<const HloInstruction*> seen;
+  absl::flat_hash_set<const HloInstruction*> seen;
   for (HloInstruction* operand : operands()) {
     if (seen.insert(operand).second) {
       unique.push_back(operand);
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 1c2b2868fd..55314d0ae9 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
@@ -111,7 +112,7 @@ class ListScheduler {
     // LogicalBuffer is in an operand of the instruction as indicated by
     // points-to analysis.
     for (auto* instruction : computation.instructions()) {
-      tensorflow::gtl::FlatSet<const LogicalBuffer*> instr_uses;
+      absl::flat_hash_set<const LogicalBuffer*> instr_uses;
       for (auto* operand : instruction->operands()) {
         points_to_analysis.GetPointsToSet(operand).ForEachElement(
             [&](const ShapeIndex& /*index*/,
@@ -360,7 +361,7 @@ class ListScheduler {
   std::unordered_map<const LogicalBuffer*, int64> unscheduled_use_count_;
 
   // Set of instructions which have been scheduled.
-  tensorflow::gtl::FlatSet<const HloInstruction*> scheduled_instructions_;
+  absl::flat_hash_set<const HloInstruction*> scheduled_instructions_;
 };
 
 int64 SumLogicalBufferSizes(
@@ -418,7 +419,7 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
         points_to_analysis.GetBuffersDefinedByInstruction(hlo), size_function);
     total_sizes[hlo] = logical_buffer_size;
     cumulative_total_size += logical_buffer_size;
-    tensorflow::gtl::FlatSet<const HloInstruction*> unique_operands(
+    absl::flat_hash_set<const HloInstruction*> unique_operands(
         hlo->operands().begin(), hlo->operands().end());
     for (const HloInstruction* operand : unique_operands) {
       extra_users[hlo] += extra_users[operand];
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 9359e9a8be..7527e35c95 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -328,10 +329,10 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
 
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
-  tensorflow::gtl::FlatSet<string> computation_names;
-  tensorflow::gtl::FlatSet<string> instruction_names;
-  tensorflow::gtl::FlatSet<int> computation_ids;
-  tensorflow::gtl::FlatSet<int> instruction_ids;
+  absl::flat_hash_set<string> computation_names;
+  absl::flat_hash_set<string> instruction_names;
+  absl::flat_hash_set<int> computation_ids;
+  absl::flat_hash_set<int> instruction_ids;
   for (HloComputation* computation : module->computations()) {
     TF_RET_CHECK(!ContainsKey(computation_names, computation->name()))
         << "Computation name is not unique: " << computation->name();
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.cc b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
index d83ee71490..fddeb5f0a2 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -42,7 +42,7 @@ std::vector<HloInstruction*> HloModuleGroupUtil::GlobalPredecessors(
     HloInstruction* instruction) {
   std::vector<HloInstruction*>
       predecessors;  // Use a vector to avoid non-determinism.
-  tensorflow::gtl::FlatSet<HloInstruction*> unique;
+  absl::flat_hash_set<HloInstruction*> unique;
 
   // Adds to the unique predecessors list; if the predecessors is a companion
   // instruction, also add companion instructions; if the predecessors is a
@@ -119,7 +119,7 @@ std::vector<HloInstruction*> HloModuleGroupUtil::GlobalSuccessors(
     HloInstruction* instruction) {
   std::vector<HloInstruction*>
       successors;  // Use a vector to avoid non-determinism.
-  tensorflow::gtl::FlatSet<HloInstruction*> unique;
+  absl::flat_hash_set<HloInstruction*> unique;
 
   // Adds to the unique successors list; if the successor is a companion
   // instruction, also add companion instructions; if the successor is a
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 59fd01cb58..5e004ce78a 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <functional>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
@@ -25,7 +26,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -75,8 +75,8 @@ StatusOr<bool> HloPassPipeline::RunPassesInternal(
 std::vector<HloPassInterface*> HloPassPipeline::GetEnabledPasses(
     const DebugOptions& debug_options) {
   auto repeated_field = debug_options.xla_disable_hlo_passes();
-  tensorflow::gtl::FlatSet<string> disabled_pass_names(repeated_field.begin(),
-                                                       repeated_field.end());
+  absl::flat_hash_set<string> disabled_pass_names(repeated_field.begin(),
+                                                  repeated_field.end());
   if (!disabled_pass_names.empty()) {
     VLOG(1) << "Passes disabled by --xla_disable_hlo_passes: "
             << absl::StrJoin(disabled_pass_names, ", ");
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index abdd9a9212..5ac43808ee 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -981,7 +982,7 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
   // rematerialization is essentially a move). If the next rematerialization of
   // the instruction is also a move then the rematerialization is added to the
   // blacklist.
-  tensorflow::gtl::FlatSet<const HloInstruction*> remat_move_instructions;
+  absl::flat_hash_set<const HloInstruction*> remat_move_instructions;
 
   // The map from instructions to their rematerializable status.
   absl::flat_hash_map<const HloInstruction*, bool> remat_able;
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 5a02e3a8bb..70d83c04f0 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -16,6 +16,7 @@
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -122,7 +123,7 @@ class HloRematerialization : public HloModulePass {
 
   // Set of computations which have had rematerialization
   // applied. Rematerialization is only applied once per computation.
-  tensorflow::gtl::FlatSet<const HloComputation*> rematerialized_computations_;
+  absl::flat_hash_set<const HloComputation*> rematerialized_computations_;
 
   // Count of the total instructions rematerialized.
   int64 instructions_rematerialized_ = 0;
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.cc b/tensorflow/compiler/xla/service/hlo_schedule.cc
index 7c5c98f04e..9972eb2077 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.cc
+++ b/tensorflow/compiler/xla/service/hlo_schedule.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -119,7 +120,7 @@ Status HloSchedule::UpdateComputationSchedule(
   }
 
   // Set of all HloInstructions in the schedule.
-  tensorflow::gtl::FlatSet<int> ids_in_schedule;
+  absl::flat_hash_set<int> ids_in_schedule;
   for (int id : sequences_.at(computation->unique_id()).ids()) {
     InsertOrDie(&ids_in_schedule, id);
   }
@@ -210,7 +211,7 @@ Status HloSchedule::Update() {
   if (sequences_.size() > nonfusion_computations.size()) {
     // Schedule contains some computations which have been removed from the
     // HloModule. Remove them from the schedule as well.
-    tensorflow::gtl::FlatSet<int64> nonfusion_computations_ids;
+    absl::flat_hash_set<int64> nonfusion_computations_ids;
     for (const HloComputation* computation : nonfusion_computations) {
       nonfusion_computations_ids.insert(computation->unique_id());
     }
diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc
index 8549487702..59594ab2f0 100644
--- a/tensorflow/compiler/xla/service/hlo_value.cc
+++ b/tensorflow/compiler/xla/service/hlo_value.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -167,7 +167,7 @@ void HloValue::SetPositionsAndComputeUses(
   positions_.insert(positions_.end(), positions.begin(), positions.end());
 
   // Gather the computation roots at which this value appears.
-  tensorflow::gtl::FlatSet<HloInstruction*> root_positions;
+  absl::flat_hash_set<HloInstruction*> root_positions;
   for (const HloPosition& position : positions_) {
     if (position.instruction ==
         position.instruction->parent()->root_instruction()) {
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 7ee789276d..1ebb331977 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -24,7 +25,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 namespace gtl = ::tensorflow::gtl;
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 1591256fad..15f0adcaaf 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -39,7 +40,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -504,7 +504,7 @@ class LayoutAssignment : public HloModulePass {
 
   // Every copy added to the module by the layout assignment pass is registered
   // here.
-  tensorflow::gtl::FlatSet<HloInstruction*> added_copies_;
+  absl::flat_hash_set<HloInstruction*> added_copies_;
 
   // The pointer to the channel layout constraints passed in with the
   // constructor. If not nullptr, this is an input/output argument.
@@ -521,8 +521,7 @@ class LayoutAssignment : public HloModulePass {
 
   // The set of HLO instructions which lacked any layout constraint, thus
   // receiving propagated default layouts.
-  tensorflow::gtl::FlatSet<const HloInstruction*>
-      unconstrained_layout_instructions_;
+  absl::flat_hash_set<const HloInstruction*> unconstrained_layout_instructions_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index 3934d2e493..6223a34b12 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -39,6 +39,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:logical_buffer",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@llvm//:core",
     ],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
index e5370eca56..643ecd0fba 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h"
 
-#include <unordered_set>
+#include <map>
 
 #include "llvm/IR/MDBuilder.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
@@ -164,9 +164,7 @@ llvm::MDNode* AliasAnalysis::GetNoaliasMetadataForBuffer(
     add_buffers_to_worklist(operand);
   }
 
-  tensorflow::gtl::FlatSet<BufferAllocation::Slice,
-                           BufferAllocation::Slice::Hasher>
-      buffers;
+  std::set<BufferAllocation::Slice> buffers;
   for (const LogicalBuffer* buffer : worklist) {
     // Skip buffers which cannot be added to the noalias set.
     if (!assignment.HasAllocation(*buffer) ||
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
index 88cde2d3d9..2b46b3c396 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
@@ -23,7 +23,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 namespace llvm_ir {
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index 95b1c20663..2ca527bc4c 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -15,10 +15,10 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/multi_output_fusion.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -50,7 +50,7 @@ StatusOr<bool> MultiOutputFusion::Run(HloModule* module) {
       all_fusion_candidates_.push_back(instruction);
 
       std::vector<HloInstruction*> candidates;
-      tensorflow::gtl::FlatSet<HloInstruction*> candidates_set;
+      absl::flat_hash_set<HloInstruction*> candidates_set;
       VLOG(10) << "Looking at instruction: " << instruction->name();
       for (auto operand : instruction->operands()) {
         // Filter out the non-interesting instructions -- they
@@ -172,7 +172,7 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
   // Update the fusible list for fusion. Variable new_fusibles keeps
   // track of the new or changed entries.
   std::vector<std::pair<HloInstruction*, int64>> new_fusibles;
-  tensorflow::gtl::FlatSet<HloInstruction*> in_list;
+  absl::flat_hash_set<HloInstruction*> in_list;
   auto it = fusion_node.fusibles.begin();
   while (it != fusion_node.fusibles.end()) {
     HloInstruction* instr = it->first;
diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h
index 1ac60f1cf4..8909d0f4fe 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.h
+++ b/tensorflow/compiler/xla/service/name_uniquer.h
@@ -19,9 +19,9 @@ limitations under the License.
 #include <string>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 
 namespace xla {
@@ -69,7 +69,7 @@ class NameUniquer {
     int64 next_ = 0;
 
     // Set of all the identifiers which has been used.
-    tensorflow::gtl::FlatSet<int64> used_;
+    absl::flat_hash_set<int64> used_;
   };
 
   // The string to use to separate the prefix of the name from the uniquing
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 6ccea9d2b5..e379911462 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -577,7 +577,7 @@ Status ValidateDotDimensionNumbers(
   // Check that dimension numbers are unique.
   auto dims_unique = [](absl::Span<const int64> contracting_dims,
                         absl::Span<const int64> batch_dims) -> bool {
-    tensorflow::gtl::FlatSet<int64> dim_set;
+    absl::flat_hash_set<int64> dim_set;
     auto is_unique = [&dim_set](int64 i) -> bool {
       return dim_set.insert(i).second;
     };
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc
index 921a984589..56952e3ada 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.cc
+++ b/tensorflow/compiler/xla/service/shaped_buffer.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -26,7 +27,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -147,7 +147,7 @@ void ScopedShapedBuffer::Deallocate() {
   // Deallocate all non-null buffers. A buffer may appear in more than one spot
   // in the shape (eg, a tuple with a repeated element) so keep track of what
   // has been deallocated.
-  tensorflow::gtl::FlatSet<void*> deallocated_ptrs;
+  absl::flat_hash_set<void*> deallocated_ptrs;
   for (auto& pair : buffers_) {
     se::DeviceMemoryBase& memory_base = pair.second;
     if (!memory_base.is_null() &&
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index 78392d3bb2..64ad1dc80e 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -36,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
index 2590473c77..9795b2830b 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -16,17 +16,17 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/tuple_util.h"
 #include "tensorflow/compiler/xla/service/while_util.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
 using absl::flat_hash_map;
+using absl::flat_hash_set;
 using absl::InlinedVector;
-using tensorflow::gtl::FlatSet;
 
 // Copies `to_hoist` to the computation containing `while_instr`, hoisting its
 // operands as needed.  All of its transitive operands are expected to be either
@@ -35,7 +35,7 @@ using tensorflow::gtl::FlatSet;
 // them into `hoisted_instructions`.
 static void CreateLoopInvariantCopy(
     flat_hash_map<HloInstruction*, HloInstruction*>* hoisted_instructions,
-    FlatSet<HloInstruction*>* unhoisted_invariant_instructions,
+    flat_hash_set<HloInstruction*>* unhoisted_invariant_instructions,
     HloInstruction* while_instr, HloInstruction* to_hoist) {
   HloComputation* parent_of_while = while_instr->parent();
   HloComputation* while_body = while_instr->while_body();
@@ -153,7 +153,7 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody(
   // unprofitable to be hoisted alone by NotWorthHoistingIndividually.  When we
   // hoist an instruction in this set, we move it from
   // unhoisted_invariant_instructions to hoisted_instructions.
-  FlatSet<HloInstruction*> unhoisted_invariant_instructions;
+  flat_hash_set<HloInstruction*> unhoisted_invariant_instructions;
 
   // Invariant GTE's axiomatically satisfy the constraints for
   // unhoisted_invariant_instructions -- they can be legally hoisted, but there
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 07de8492ba..630d71e5ca 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
@@ -114,7 +115,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
     return false;
   }
 
-  tensorflow::gtl::FlatSet<int64> used_tuple_indices;
+  absl::flat_hash_set<int64> used_tuple_indices;
   for (HloComputation* comp : {while_body, while_cond}) {
     // The HLO verifier ensures that while_input's shape matches while_init's
     // shape, which we verified above is a tuple.
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 06b6330321..8a0ae33042 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -2146,11 +2146,11 @@ xla_test(
         ":test_utils",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/client:xla_computation",
         "//tensorflow/compiler/xla/service:hlo_parser",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc
index 181e5cbe29..bc433eac8f 100644
--- a/tensorflow/compiler/xla/tests/test_utils_test.cc
+++ b/tensorflow/compiler/xla/tests/test_utils_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/shape_util.h"
@@ -145,7 +146,7 @@ ENTRY %sort.148.1589 (parameter.0: f32[1048576], parameter.1: s32[1048576]) -> (
   ASSERT_EQ(args.size(), 2);
   const Literal& key_arg = args[0];
 
-  tensorflow::gtl::FlatSet<uint32> key_set;
+  absl::flat_hash_set<uint32> key_set;
   for (const float& value : key_arg.data<float>()) {
     EXPECT_TRUE(key_set.insert(tensorflow::bit_cast<uint32>(value)).second);
   }
@@ -168,7 +169,7 @@ ENTRY %sort.148.1589 (parameter.0: s32[1048576], parameter.1: s32[1048576]) -> (
   ASSERT_EQ(args.size(), 2);
   const Literal& key_arg = args[0];
 
-  tensorflow::gtl::FlatSet<int32> key_set;
+  absl::flat_hash_set<int32> key_set;
   for (const int32& value : key_arg.data<int32>()) {
     EXPECT_TRUE(key_set.insert(tensorflow::bit_cast<uint32>(value)).second);
   }
-- 
GitLab


From 350388fca9cb9509962ff393a9d21fb2879c9179 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 19:56:47 -0700
Subject: [PATCH 0992/1357] Add mode_override to the TPU embedding enqueue ops.
 This allows the mode to be overridden at runtime allowing dynamic switching
 between inference and training modes. Not fully implemented yet.

PiperOrigin-RevId: 215325071
---
 tensorflow/contrib/tpu/BUILD                  |   3 +
 .../contrib/tpu/ops/tpu_embedding_ops.cc      |  52 ++++--
 tensorflow/contrib/tpu/python/ops/tpu_ops.py  | 148 ++++++++++++++++++
 3 files changed, 186 insertions(+), 17 deletions(-)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 0c4bdab191..10ed1c2891 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -135,6 +135,9 @@ tf_gen_op_wrapper_py(
     name = "tpu_ops",
     hidden = [
         "SendTPUEmbeddingGradients",
+        "EnqueueTPUEmbeddingIntegerBatch",
+        "EnqueueTPUEmbeddingSparseBatch",
+        "EnqueueTPUEmbeddingSparseTensorBatch",
     ],
     deps = [
         ":cross_replica_ops_op_lib",
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index ef2f8dd36d..0ef29bdf73 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -335,7 +335,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
@@ -353,7 +352,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
@@ -366,7 +364,7 @@ void RegisterPerTableLoadAndRetrieveOps() {
 }  // namespace
 
 REGISTER_OP("RecvTPUEmbeddingActivations")
-    .Output("outputs: num_outputs * float")
+    .Output("outputs: num_outputs * float32")
     .Attr("num_outputs: int >= 1")
     .Attr("config: string")
     .SetIsStateful()
@@ -476,7 +474,8 @@ config: Serialized TPUEmbeddingConfiguration proto.
 
 REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
     .Input("batch: N * int32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
@@ -485,6 +484,10 @@ An op that enqueues a list of input batch tensors to TPUEmbedding.
 
 batch: A list of 1D tensors, one for each embedding table, containing the
     indices into the tables.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 )doc");
@@ -493,7 +496,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .SetIsStateful()
@@ -523,14 +527,18 @@ The tensors at corresponding positions in the three input lists
 must have the same shape, i.e. rank 1 with dim_size() equal to the total
 number of lookups into the table described by the corresponding table_id.
 
-sample_indices: A list of Rank 1 Tensors specifying the training example and
+sample_indices: A list of rank 1 Tensors specifying the training example and
     feature to which the corresponding embedding_indices and aggregation_weights
     values belong. sample_indices[i] must equal b * nf + f, where nf is the
     number of features from the corresponding table, f is in [0, nf), and
     b is in [0, batch size).
-embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables.
-aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
     (training example, feature) -- aggregation weights.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -545,7 +553,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .Attr("table_ids: list(int)")
@@ -555,7 +564,7 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
 This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse().
 
 sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
-to ith feature. table_ids[i] indicates which embedding table to look up ith
+to the ith feature. table_ids[i] indicates which embedding table to look up ith
 feature.
 
 The tensors at corresponding positions in the three input lists (sample_indices,
@@ -563,12 +572,18 @@ embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
 with dim_size() equal to the total number of lookups into the table described by
 the corresponding feature.
 
-sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in
+sample_indices: A list of rank 1 Tensors specifying the training example to
+    which the corresponding embedding_indices and aggregation_weights values
+    belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+    It corresponds to sp_ids.values in embedding_lookup_sparse().
+aggregation_weights: A list of rank 1 Tensors containing per training example
+    aggregation weights. It corresponds to sp_weights.values in
     embedding_lookup_sparse().
-embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values
-    in embedding_lookup_sparse().
-aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values
-    in embedding_lookup_sparse().
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -577,8 +592,11 @@ combiners: A list of string scalars, one for each embedding table that specify
     the sum of the weights be 0 for 'mean' or the sum of the squared weights be
     0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
     all tables.
-table_ids: A list of int. table_ids[i] indicates which embedding table to look
-    up ith feature in the list.
+table_ids: A list of integers specifying the identifier of the embedding table
+    (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+    corresponding input. The ith input is looked up using table_ids[i]. The size
+    of the table_ids list must be equal to that of sample_indices,
+    embedding_indices and aggregation_weights.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index e2e4acadab..968adccf2b 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
@@ -227,6 +227,154 @@ if platform.system() != "Windows":
         inputs=inputs, learning_rates=learning_rates, config=config, name=name)
 
 
+  send_tpu_embedding_gradients.__doc__ = (
+      gen_tpu_ops._send_tpu_embedding_gradients.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_integer_batch(batch,
+                                          device_ordinal,
+                                          mode_override=None,
+                                          name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      batch: A list of 1D tensors, one for each embedding table, containing the
+        indices into the tables.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingIntegerBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_integer_batch(
+        batch=batch,
+        device_ordinal=device_ordinal,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_integer_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_batch(sample_indices,
+                                         embedding_indices,
+                                         aggregation_weights,
+                                         device_ordinal,
+                                         combiners=None,
+                                         mode_override=None,
+                                         name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        and feature to which the corresponding embedding_indices and
+        aggregation_weights values belong. sample_indices[i] must equal b * nf +
+        f, where nf is the number of features from the corresponding table, f is
+        in [0, nf), and b is in [0, batch size).
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables.
+      aggregation_weights: A list of rank 1 Tensors containing per sample --
+        i.e. per (training example, feature) -- aggregation weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices,
+                                                embedding_indices,
+                                                aggregation_weights,
+                                                table_ids,
+                                                device_ordinal,
+                                                combiners=None,
+                                                mode_override=None,
+                                                name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        to which the corresponding embedding_indices and aggregation_weights
+        values
+        belong. It corresponds to sp_ids.indices[:,0] in
+          embedding_lookup_sparse().
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables. It corresponds to sp_ids.values in embedding_lookup_sparse().
+      aggregation_weights: A list of rank 1 Tensors containing per training
+        example aggregation weights. It corresponds to sp_weights.values in
+        embedding_lookup_sparse().
+      table_ids: A list of integers specifying the identifier of the embedding
+        table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to
+        lookup the corresponding input. The ith input is looked up using
+        table_ids[i]. The size of the table_ids list must be equal to that of
+        sample_indices, embedding_indices and aggregation_weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseTensorBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        table_ids=table_ids,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__)
+
 else:
   # We have already built the appropriate libraries into the binary via CMake
   # if we have built contrib, so we don't need this
-- 
GitLab


From 721ab82745a113fb8cca4ce2b1f22d1d5ab5d546 Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Mon, 1 Oct 2018 23:03:16 -0700
Subject: [PATCH 0993/1357] Loosen test bounds.

PiperOrigin-RevId: 215338403
---
 tensorflow/python/kernel_tests/depthwise_conv_op_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 6d1ead20be..9c02b69180 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -131,8 +131,8 @@ class DepthwiseConv2DTest(test.TestCase):
     with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-2,
-          dtypes.float32: 1e-8,
-          dtypes.float64: 1e-13,
+          dtypes.float32: 1e-7,
+          dtypes.float64: 1e-12,
       }[data_type]
 
       t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type)
-- 
GitLab


From 9884cb36290664593682d235ce0d5e1925e3fa23 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 1 Oct 2018 23:06:12 -0700
Subject: [PATCH 0994/1357] Check that IsValid{Input|Output}Tensor is only
 given non-control edges

PiperOrigin-RevId: 215338658
---
 tensorflow/core/graph/graph.cc | 4 ++--
 tensorflow/core/graph/graph.h  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 1630ab7a15..4c0cd14ff1 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -643,7 +643,7 @@ Status Graph::IsValidNode(const Node* node) const {
 
 Status Graph::IsValidOutputTensor(const Node* node, int idx) const {
   TF_RETURN_IF_ERROR(IsValidNode(node));
-  if (idx >= node->num_outputs()) {
+  if (idx >= node->num_outputs() || idx < 0) {
     return errors::OutOfRange("Node '", node->name(), "' (type: '",
                               node->op_def().name(),
                               "', num of outputs: ", node->num_outputs(),
@@ -654,7 +654,7 @@ Status Graph::IsValidOutputTensor(const Node* node, int idx) const {
 
 Status Graph::IsValidInputTensor(const Node* node, int idx) const {
   TF_RETURN_IF_ERROR(IsValidNode(node));
-  if (idx >= node->num_inputs()) {
+  if (idx >= node->num_inputs() || idx < 0) {
     return errors::OutOfRange("Node '", node->name(), "' (type: '",
                               node->op_def().name(),
                               "', num of inputs: ", node->num_inputs(),
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 52e9f23a76..72cef07072 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -590,12 +590,12 @@ class Graph {
   // Returns OK if `node` is non-null and belongs to this graph
   Status IsValidNode(const Node* node) const;
 
-  // Returns OK if IsValidNode(`node`) and `idx` is less than
-  // node->num_outputs()
+  // Returns OK if IsValidNode(`node`) and `idx` is a valid output.  Does not
+  // accept control outputs.
   Status IsValidOutputTensor(const Node* node, int idx) const;
 
-  // Returns OK if IsValidNode(`node`) and `idx` is less than
-  // node->num_inputs()
+  // Returns OK if IsValidNode(`node`) and `idx` a valid input.  Does not accept
+  // control inputs.
   Status IsValidInputTensor(const Node* node, int idx) const;
 
   // Create and return a new WhileContext owned by this graph. This is called
-- 
GitLab


From 38808119e9d5f8ad24bb414aab281e0fa3fde6dc Mon Sep 17 00:00:00 2001
From: Gautam <gautamrbharadwaj@gmail.com>
Date: Tue, 2 Oct 2018 11:56:06 +0530
Subject: [PATCH 0995/1357] Update backend.py

Adding missing import files in the commented examples. When trying out that particular example in commented section the TensorFlow and bumpy imports are missing
---
 tensorflow/python/keras/backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 584facc859..79ca4beb73 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -773,6 +773,8 @@ def is_keras_tensor(x):
 
   Examples:
   ```python
+      >>> import tensorflow as tf
+      >>> import numpy
       >>> from keras import backend as K
       >>> from keras.layers import Input, Dense
       >>> np_var = numpy.array([1, 2])
-- 
GitLab


From 7830912c03fe3939120651574d33cec01bc73fcf Mon Sep 17 00:00:00 2001
From: Gautam <gautamrbharadwaj@gmail.com>
Date: Tue, 2 Oct 2018 12:00:14 +0530
Subject: [PATCH 0996/1357] Update backend.py

adding missing import numpy
---
 tensorflow/python/keras/backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 584facc859..9c1581eef9 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -653,6 +653,7 @@ def variable(value, dtype=None, name=None, constraint=None):
 
   Examples:
   ```python
+      >>> import numpy as np
       >>> from keras import backend as K
       >>> val = np.array([[1, 2], [3, 4]])
       >>> kvar = K.variable(value=val, dtype='float64', name='example_var')
-- 
GitLab


From edea1be5dd98775399dbd12728e86039a14fb967 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 02:13:06 -0700
Subject: [PATCH 0997/1357] compat: Update forward compatibility horizon to
 2018-10-02

PiperOrigin-RevId: 215354927
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index bea5aa990f..3bb95b56c2 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 1)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 2)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 44da41e4900c3fd481f12c9aa4c49679c9f32fa4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 03:01:09 -0700
Subject: [PATCH 0998/1357] Fix layout assignment for cross module all reduce

Previously we could have ended up with the different HLOs being assigned
different layouts what made lowering impossible. This change enforces a
consistent layout between the communicating nodes the same way it is
done for send&recv pairs.

PiperOrigin-RevId: 215359420
---
 .../compiler/xla/service/layout_assignment.cc | 65 +++++++++++++++----
 .../xla/service/layout_assignment_test.cc     | 44 +++++++++++++
 2 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 082bf8bffe..25d5327561 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -498,6 +498,22 @@ Status LayoutAssignment::AddMandatoryConstraints(
         TF_RETURN_IF_ERROR(
             constraints->SetBufferLayout(new_shape.layout(), *buffer));
       }
+    } else if (instruction->IsCrossModuleAllReduce()) {
+      CHECK(get_channel_constraints(instruction))
+          << "Multi-module layout assignment requires ChannelLayoutConstraints";
+      int64 all_reduce_id = instruction->all_reduce_id().value();
+      if (!get_channel_constraints(instruction)
+               ->IsChannelConstrained(all_reduce_id)) {
+        continue;
+      }
+      // TODO(b/68493863): Change to use SetOperandLayout().
+      const Shape& buffer_shape = instruction->operand(0)->shape();
+      TF_RET_CHECK(ShapeUtil::IsArray(buffer_shape));
+      Shape new_buffer_shape =
+          get_channel_constraints(instruction)
+              ->LayoutShapeForChannel(buffer_shape, all_reduce_id);
+      TF_RETURN_IF_ERROR(
+          constraints->SetInstructionLayout(new_buffer_shape, instruction));
     }
   }
 
@@ -1512,19 +1528,6 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
     // Verify all layouts in the shape have been set.
     TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape()));
   }
-
-  // Copy the root instruction's result if its layout does not match the result
-  // layout constraint.
-  if (constraints.ResultLayout() != nullptr &&
-      !constraints.ResultLayout()->MatchesLayoutInShape(
-          computation->root_instruction()->shape())) {
-    TF_ASSIGN_OR_RETURN(
-        HloInstruction * new_root,
-        CreateCopyWithNewLayout(constraints.ResultLayout()->shape(),
-                                computation->root_instruction()));
-    computation->set_root_instruction(new_root);
-  }
-
   return Status::OK();
 }
 
@@ -1654,6 +1657,18 @@ Status LayoutAssignment::RunOnComputation(
     TF_RETURN_IF_ERROR(
         ConstrainChannelLayouts(computation, channel_constraints));
   }
+
+  // Copy the root instruction's result if its layout does not match the result
+  // layout constraint.
+  if (constraints.ResultLayout() != nullptr &&
+      !constraints.ResultLayout()->MatchesLayoutInShape(
+          computation->root_instruction()->shape())) {
+    TF_ASSIGN_OR_RETURN(
+        HloInstruction * new_root,
+        CreateCopyWithNewLayout(constraints.ResultLayout()->shape(),
+                                computation->root_instruction()));
+    computation->set_root_instruction(new_root);
+  }
   return Status::OK();
 }
 
@@ -1709,6 +1724,30 @@ Status LayoutAssignment::ConstrainChannelLayouts(
             ShapeUtil::GetMutableSubshape(instruction->mutable_shape(), {0});
         *send_shape = shape;
       }
+    } else if (instruction->IsCrossModuleAllReduce()) {
+      const Layout* layout =
+          get_channel_constraints(instruction)
+              ->ConstrainChannel(instruction->all_reduce_id().value(),
+                                 instruction->shape().layout());
+      if (layout != nullptr) {
+        // We found an already constrained layout which does not match the one
+        // the channel wants to impose. Either add a new kCopy, or use the
+        // existing one to marshal the correct shape.
+        HloInstruction* operand = instruction->mutable_operand(0);
+        Shape shape = operand->shape();
+        *shape.mutable_layout() = *layout;
+        if (operand->opcode() != HloOpcode::kCopy) {
+          HloInstruction* copy = operand->parent()->AddInstruction(
+              HloInstruction::CreateUnary(shape, HloOpcode::kCopy, operand));
+          RegisterAddedCopy(copy);
+          SetupCopiedInstruction(*operand, copy, {});
+          TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(0, copy));
+          operand = copy;
+        } else {
+          *operand->mutable_shape() = shape;
+        }
+        *instruction->mutable_shape() = shape;
+      }
     }
   }
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 752a61476d..10f9a95121 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -860,6 +860,50 @@ TEST_F(LayoutAssignmentTest, ChannelLayoutMismatch) {
       ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0})));
 }
 
+TEST_F(LayoutAssignmentTest, AllReduceLayoutMissmatch) {
+  // Pin non matching layouts to parameter and root.
+  const char* module_str = R"(
+    HloModule test_module
+
+    add {
+      lhs = f32[] parameter(0)
+      rhs = f32[] parameter(1)
+      ROOT add = f32[] add(lhs, rhs)
+    }
+
+    ENTRY entry_computation {
+      param = (f32[2,2]) parameter(0)
+      gte = f32[2,2] get-tuple-element(param), index=0
+      ar.0 = f32[2,2] cross-replica-sum(gte),
+        all_reduce_id=0, replica_groups={{0}}, to_apply=add,
+        sharding={maximal device=0}
+      const = f32[2,2] constant(f32[2,2]{{0,1},{2,3}})
+      ROOT ar.1 = f32[2,2] cross-replica-sum(const),
+        all_reduce_id=0, replica_groups={{0}}, to_apply=add,
+        sharding={maximal device=1}
+    })";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  ComputationLayout computation_layout(
+      module->entry_computation()->ComputeProgramShape());
+  Shape param_shape = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {0, 1})});
+  TF_ASSERT_OK(
+      computation_layout.mutable_parameter_layout(0)->CopyLayoutFromShape(
+          param_shape));
+  computation_layout.mutable_result_layout()->ResetLayout(
+      LayoutUtil::MakeLayout({1, 0}));
+
+  ChannelLayoutConstraints channel_constraints;
+  AssignLayouts(module.get(), &computation_layout, &channel_constraints);
+
+  EXPECT_THAT(LayoutOf(module.get(), "gte"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(module.get(), "ar.0"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(module.get(), "ar.1"), ElementsAre(0, 1));
+  const HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root->shape().layout().minor_to_major(), ElementsAre(1, 0));
+}
+
 TEST_F(LayoutAssignmentTest, CopySliceOperandToAvoidImplicitLayoutChange) {
   const char* module_str = R"(
     HloModule CopySliceOperandToAvoidImplicitLayoutChange
-- 
GitLab


From f22037abf5a6f4581f5fb6013f72f91747f22965 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 03:36:14 -0700
Subject: [PATCH 0999/1357] Add a hint parameter to
 TransferLiteralToDeviceAsync that the implementation can use to accelerate
 transfers.

PiperOrigin-RevId: 215362667
---
 tensorflow/compiler/jit/xla_device_context.cc    | 15 +++++++++++----
 tensorflow/compiler/jit/xla_device_context.h     |  3 ++-
 .../xla/service/generic_transfer_manager.cc      |  2 +-
 .../xla/service/generic_transfer_manager.h       |  7 ++++---
 .../compiler/xla/service/transfer_manager.h      | 16 +++++++++++++++-
 5 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index af83c792e5..e083652978 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -75,8 +75,9 @@ XlaTransferManager::XlaTransferManager(
   }
 }
 
-Status XlaTransferManager::TransferLiteralToDevice(
-    const Tensor& host_tensor, Tensor* device_tensor) const {
+Status XlaTransferManager::TransferLiteralToDevice(const Tensor& host_tensor,
+                                                   Tensor* device_tensor,
+                                                   bool buffer_is_fresh) const {
   xla::Shape xla_shape;
   TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(),
                                            host_tensor.shape(), &xla_shape));
@@ -97,8 +98,11 @@ Status XlaTransferManager::TransferLiteralToDevice(
     // synchronized.
     host_to_device_stream_->ThenWaitFor(stream_.get());
   }
+  xla::TransferManager::TransferToDeviceHint hint =
+      buffer_is_fresh ? xla::TransferManager::kBufferUndefined
+                      : xla::TransferManager::kNoHint;
   TF_RETURN_IF_ERROR(transfer_manager_->TransferLiteralToDeviceAsync(
-      host_to_device_stream_.get(), *literal, shaped_buffer));
+      host_to_device_stream_.get(), *literal, shaped_buffer, hint));
   if (UseMultipleStreams()) {
     auto event = std::make_shared<se::Event>(stream_->parent());
     TF_RET_CHECK(event->Init()) << "Event failed to initialize!";
@@ -165,6 +169,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
     return;
   }
   TensorShape shape = shape_or_status.ValueOrDie();
+  bool buffer_is_fresh = false;
   if (!xla_tensor->has_shaped_buffer()) {
     Status s =
         xla_tensor->AllocateShapedBuffer(device_tensor->dtype(), shape, client_,
@@ -173,6 +178,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
       done(s);
       return;
     }
+    buffer_is_fresh = true;
   }
 
   Status status;
@@ -183,7 +189,8 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
           "Tensor::CopyFrom failed when copying from CPU to XLA device"));
       return;
     }
-    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor);
+    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor,
+                                     buffer_is_fresh);
   } else {
     se::DeviceMemoryBase dev_dst_ptr =
         XlaTensor::DeviceMemoryFromTensor(*device_tensor);
diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h
index df82421294..a4c0c296fc 100644
--- a/tensorflow/compiler/jit/xla_device_context.h
+++ b/tensorflow/compiler/jit/xla_device_context.h
@@ -67,7 +67,8 @@ class XlaTransferManager {
 
  private:
   Status TransferLiteralToDevice(const Tensor& host_tensor,
-                                 Tensor* device_tensor) const;
+                                 Tensor* device_tensor,
+                                 bool buffer_is_fresh) const;
   void TransferLiteralFromDevice(Tensor* host_tensor,
                                  const Tensor& device_tensor,
                                  const StatusCallback& done) const;
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index bec02e14f9..f92fde7f46 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -98,7 +98,7 @@ Status GenericTransferManager::TransferLiteralFromDeviceInternal(
 
 Status GenericTransferManager::TransferLiteralToDeviceAsync(
     se::Stream* stream, const LiteralSlice& literal,
-    const ShapedBuffer& device_buffer) {
+    const ShapedBuffer& device_buffer, TransferToDeviceHint /*hint*/) {
   const Shape& shape = literal.shape();
   VLOG(2) << "transferring literal shape to device: "
           << ShapeUtil::HumanString(shape)
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index 86c8b1c145..b1cba82b9f 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -45,9 +45,10 @@ class GenericTransferManager : public TransferManager {
                                  MutableBorrowingLiteral literal,
                                  std::function<void(Status)> done) override;
 
-  Status TransferLiteralToDeviceAsync(
-      se::Stream* stream, const LiteralSlice& literal,
-      const ShapedBuffer& device_buffer) override;
+  Status TransferLiteralToDeviceAsync(se::Stream* stream,
+                                      const LiteralSlice& literal,
+                                      const ShapedBuffer& device_buffer,
+                                      TransferToDeviceHint hint) override;
 
   Status TransferLiteralToInfeed(se::StreamExecutor* executor,
                                  const LiteralSlice& literal) override;
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index f952e64af2..9199e32d0f 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -89,6 +89,16 @@ class TransferManager {
                                          const LiteralSlice& literal,
                                          const ShapedBuffer& device_buffer);
 
+  // Hint type given to TransferLiteralToDeviceAsync.
+  enum TransferToDeviceHint {
+    // No hint available.
+    kNoHint,
+
+    // The destination buffer is undefined on the device, meaning it can be
+    // transferred to eagerly rather than waiting for Stream ordering.
+    kBufferUndefined,
+  };
+
   // Transfers the given literal into the previously allocated device memory
   // represented by the given ShapedBuffer using the given executor. The shape
   // of the ShapedBuffer and DeviceShape(literal.shape()) must be compatible,
@@ -96,9 +106,13 @@ class TransferManager {
   //
   // This operation is performed asynchronously on the given stream. It returns
   // once the transfer is enqueued.
+  //
+  // The optional hint can allow implementations to optimize transfers. It is
+  // not mandatory for an implementation to obey the hint.
   virtual Status TransferLiteralToDeviceAsync(
       se::Stream* stream, const LiteralSlice& literal,
-      const ShapedBuffer& device_buffer) = 0;
+      const ShapedBuffer& device_buffer,
+      TransferToDeviceHint hint = kNoHint) = 0;
 
   // Convenience methods for transferring an array to or from the device at a
   // known address. This avoids having to construct a ShapedBuffer just to
-- 
GitLab


From 35f3046a326daea0179d024044636f2fcbb45f4a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 05:18:28 -0700
Subject: [PATCH 1000/1357] Export endpoint for the version of the
 `regex_replace` function that calls StaticRegexReplace.

PiperOrigin-RevId: 215371291
---
 .../python_api/api_def_RegexReplace.pbtxt     |  8 +-----
 tensorflow/python/ops/string_ops.py           | 25 +++++++++++--------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt b/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt
index b17806b338..5020844204 100644
--- a/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt
@@ -1,10 +1,4 @@
 op {
   graph_op_name: "RegexReplace"
-  endpoint {
-    name: "strings.regex_replace"
-  }
-  endpoint {
-    name: "regex_replace"
-    deprecated: true
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index e83c08f643..0812f901a2 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -46,6 +46,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 # pylint: disable=redefined-builtin
+@tf_export("strings.regex_full_match")
 def regex_full_match(input, pattern, name=None):
   r"""Match elements of `input` with regex `pattern`.
 
@@ -73,15 +74,14 @@ def regex_full_match(input, pattern, name=None):
 
 regex_full_match.__doc__ = gen_string_ops.regex_full_match.__doc__
 
-# Expose regex_full_match in strings namespace
-tf_export("strings.regex_full_match")(regex_full_match)
 
-
-def regex_replace(source, pattern, rewrite, replace_global=True):
-  r"""Replace elements of `source` matching regex `pattern` with `rewrite`.
+@tf_export("strings.regex_replace", "regex_replace")
+@deprecation.deprecated_endpoints("regex_replace")
+def regex_replace(input, pattern, rewrite, replace_global=True, name=None):
+  r"""Replace elements of `input` matching regex `pattern` with `rewrite`.
 
   Args:
-    source: string `Tensor`, the source strings to process.
+    input: string `Tensor`, the source strings to process.
     pattern: string or scalar string `Tensor`, regular expression to use,
       see more details at https://github.com/google/re2/wiki/Syntax
     rewrite: string or scalar string `Tensor`, value to use in match
@@ -89,9 +89,10 @@ def regex_replace(source, pattern, rewrite, replace_global=True):
       text matching corresponding parenthesized group.
     replace_global: `bool`, if `True` replace all non-overlapping matches,
       else replace only the first match.
+    name: A name for the operation (optional).
 
   Returns:
-    string `Tensor` of the same shape as `source` with specified replacements.
+    string `Tensor` of the same shape as `input` with specified replacements.
   """
   if (isinstance(pattern, util_compat.bytes_or_text_types) and
       isinstance(rewrite, util_compat.bytes_or_text_types)):
@@ -99,11 +100,13 @@ def regex_replace(source, pattern, rewrite, replace_global=True):
     # use a version which performs the expensive regex compilation once at
     # creation time.
     return gen_string_ops.static_regex_replace(
-        input=source, pattern=pattern,
-        rewrite=rewrite, replace_global=replace_global)
+        input=input, pattern=pattern,
+        rewrite=rewrite, replace_global=replace_global,
+        name=name)
   return gen_string_ops.regex_replace(
-      input=source, pattern=pattern,
-      rewrite=rewrite, replace_global=replace_global)
+      input=input, pattern=pattern,
+      rewrite=rewrite, replace_global=replace_global,
+      name=name)
 
 
 @tf_export("strings.format")
-- 
GitLab


From 97d515273a1e86a861cdfb338671a42b3b1126a7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 2 Oct 2018 07:34:40 -0700
Subject: [PATCH 1001/1357] Make
 StatelessRandomOpsTest.testRandomNormalIsFinite actually test
 stateless_random_normal.

Fixes #22611

PiperOrigin-RevId: 215385610
---
 tensorflow/compiler/tests/stateless_random_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py
index f3861043b2..e8741bc468 100644
--- a/tensorflow/compiler/tests/stateless_random_ops_test.py
+++ b/tensorflow/compiler/tests/stateless_random_ops_test.py
@@ -91,7 +91,7 @@ class StatelessRandomOpsTest(xla_test.XLATestCase):
     with self.cached_session() as sess, self.test_scope():
       for dtype in self._random_types():
         seed_t = array_ops.placeholder(dtypes.int32, shape=[2])
-        x = stateless.stateless_random_uniform(
+        x = stateless.stateless_random_normal(
             shape=[10000], seed=seed_t, dtype=dtype)
         y = sess.run(x, {seed_t: [0x12345678, 0xabcdef12]})
         self.assertTrue(np.all(np.isfinite(y)))
-- 
GitLab


From 1a56a3299e904d5a3352a3a15e4cf7401f72bbc3 Mon Sep 17 00:00:00 2001
From: joe yearsley <joe@kheironmed.com>
Date: Tue, 2 Oct 2018 16:33:37 +0100
Subject: [PATCH 1002/1357] Updated ordering for kwargs

---
 tensorflow/python/layers/core.py                       | 6 +++---
 tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt | 2 +-
 tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 5919fa543e..e06e9aba4a 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -292,17 +292,17 @@ class Flatten(keras_layers.Flatten, base.Layer):
 
 
 @tf_export('layers.flatten')
-def flatten(inputs, data_format='channels_last', name=None):
+def flatten(inputs, name=None, data_format='channels_last'):
   """Flattens an input tensor while preserving the batch axis (axis 0).
 
   Arguments:
     inputs: Tensor input.
+    name: The name of the layer (string).
     data_format: A string, one of `channels_last` (default) or `channels_first`.
       The ordering of the dimensions in the inputs.
       `channels_last` corresponds to inputs with shape
       `(batch, height, width, channels)` while `channels_first` corresponds to
       inputs with shape `(batch, channels, height, width)`.
-    name: The name of the layer (string).
 
   Returns:
     Reshaped tensor.
@@ -319,7 +319,7 @@ def flatten(inputs, data_format='channels_last', name=None):
     # now `y` has shape `(None, None)`
   ```
   """
-  layer = Flatten(data_format=data_format, name=name)
+  layer = Flatten(name=name, data_format=data_format)
   return layer.apply(inputs)
 
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
index 5d9ea2e5a3..0c24e9c7dd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
+    argspec: "args=[\'inputs\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'channels_last\'], "
   }
   member_method {
     name: "max_pooling1d"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
index 5d9ea2e5a3..0c24e9c7dd 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
+    argspec: "args=[\'inputs\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'channels_last\'], "
   }
   member_method {
     name: "max_pooling1d"
-- 
GitLab


From 28757ad658243526d84fd16d53b9eefbf809c6ff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 08:30:36 -0700
Subject: [PATCH 1003/1357] Use xlogy in a few places in TFP to avoid NaN's for
 certain special cases.

PiperOrigin-RevId: 215392621
---
 .../kernel_tests/distributions/beta_test.py     |  5 +++++
 .../distributions/dirichlet_test.py             | 17 +++++++++++++++++
 .../distributions/exponential_test.py           |  7 +++++++
 .../kernel_tests/distributions/gamma_test.py    |  8 ++++++++
 tensorflow/python/ops/distributions/beta.py     |  4 ++--
 .../python/ops/distributions/dirichlet.py       |  2 +-
 tensorflow/python/ops/distributions/gamma.py    |  2 +-
 7 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/distributions/beta_test.py b/tensorflow/python/kernel_tests/distributions/beta_test.py
index d580a415dd..42e81bd658 100644
--- a/tensorflow/python/kernel_tests/distributions/beta_test.py
+++ b/tensorflow/python/kernel_tests/distributions/beta_test.py
@@ -167,6 +167,11 @@ class BetaTest(test.TestCase):
     self.assertAllClose([[1., 3. / 2], [3. / 2, 15. / 8]], self.evaluate(pdf))
     self.assertEqual((2, 2), pdf.get_shape())
 
+  def testLogPdfOnBoundaryIsFiniteWhenAlphaIsOne(self):
+    b = [[0.01, 0.1, 1., 2], [5., 10., 2., 3]]
+    pdf = self.evaluate(beta_lib.Beta(1., b).prob(0.))
+    self.assertAllEqual(np.ones_like(pdf, dtype=np.bool), np.isfinite(pdf))
+
   def testBetaMean(self):
     a = [1., 2, 3]
     b = [2., 4, 1.2]
diff --git a/tensorflow/python/kernel_tests/distributions/dirichlet_test.py b/tensorflow/python/kernel_tests/distributions/dirichlet_test.py
index cace5b3ba2..0f96382453 100644
--- a/tensorflow/python/kernel_tests/distributions/dirichlet_test.py
+++ b/tensorflow/python/kernel_tests/distributions/dirichlet_test.py
@@ -83,6 +83,23 @@ class DirichletTest(test.TestCase):
     with self.assertRaisesOpError("sample last-dimension must sum to `1`"):
       self.evaluate(dist.prob([.1, .2, .8]))
 
+  def testLogPdfOnBoundaryIsFiniteWhenAlphaIsOne(self):
+    # Test concentration = 1. for each dimension.
+    concentration = 3 * np.ones((10, 10)).astype(np.float32)
+    concentration[range(10), range(10)] = 1.
+    x = 1 / 9. * np.ones((10, 10)).astype(np.float32)
+    x[range(10), range(10)] = 0.
+    dist = dirichlet_lib.Dirichlet(concentration)
+    log_prob = self.evaluate(dist.log_prob(x))
+    self.assertAllEqual(
+        np.ones_like(log_prob, dtype=np.bool), np.isfinite(log_prob))
+
+    # Test when concentration[k] = 1., and x is zero at various dimensions.
+    dist = dirichlet_lib.Dirichlet(10 * [1.])
+    log_prob = self.evaluate(dist.log_prob(x))
+    self.assertAllEqual(
+        np.ones_like(log_prob, dtype=np.bool), np.isfinite(log_prob))
+
   def testPdfZeroBatches(self):
     alpha = [1., 2]
     x = [.5, .5]
diff --git a/tensorflow/python/kernel_tests/distributions/exponential_test.py b/tensorflow/python/kernel_tests/distributions/exponential_test.py
index 367f8bb0f1..1600387585 100644
--- a/tensorflow/python/kernel_tests/distributions/exponential_test.py
+++ b/tensorflow/python/kernel_tests/distributions/exponential_test.py
@@ -65,6 +65,13 @@ class ExponentialTest(test.TestCase):
     self.assertAllClose(self.evaluate(log_pdf), expected_log_pdf)
     self.assertAllClose(self.evaluate(pdf), np.exp(expected_log_pdf))
 
+  def testExponentialLogPDFBoundary(self):
+    # Check that Log PDF is finite at 0.
+    rate = np.array([0.1, 0.5, 1., 2., 5., 10.], dtype=np.float32)
+    exponential = exponential_lib.Exponential(rate=rate)
+    log_pdf = exponential.log_prob(0.)
+    self.assertAllClose(np.log(rate), self.evaluate(log_pdf))
+
   def testExponentialCDF(self):
     batch_size = 6
     lam = constant_op.constant([2.0] * batch_size)
diff --git a/tensorflow/python/kernel_tests/distributions/gamma_test.py b/tensorflow/python/kernel_tests/distributions/gamma_test.py
index 4eff40b029..4c5b9c3ea3 100644
--- a/tensorflow/python/kernel_tests/distributions/gamma_test.py
+++ b/tensorflow/python/kernel_tests/distributions/gamma_test.py
@@ -77,6 +77,14 @@ class GammaTest(test.TestCase):
     self.assertAllClose(self.evaluate(log_pdf), expected_log_pdf)
     self.assertAllClose(self.evaluate(pdf), np.exp(expected_log_pdf))
 
+  def testGammaLogPDFBoundary(self):
+    # When concentration = 1, we have an exponential distribution. Check that at
+    # 0 we have finite log prob.
+    rate = np.array([0.1, 0.5, 1., 2., 5., 10.], dtype=np.float32)
+    gamma = gamma_lib.Gamma(concentration=1., rate=rate)
+    log_pdf = gamma.log_prob(0.)
+    self.assertAllClose(np.log(rate), self.evaluate(log_pdf))
+
   def testGammaLogPDFMultidimensional(self):
     batch_size = 6
     alpha = constant_op.constant([[2.0, 4.0]] * batch_size)
diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py
index 2ba1ea6744..d6f89a3517 100644
--- a/tensorflow/python/ops/distributions/beta.py
+++ b/tensorflow/python/ops/distributions/beta.py
@@ -267,8 +267,8 @@ class Beta(distribution.Distribution):
 
   def _log_unnormalized_prob(self, x):
     x = self._maybe_assert_valid_sample(x)
-    return ((self.concentration1 - 1.) * math_ops.log(x)
-            + (self.concentration0 - 1.) * math_ops.log1p(-x))
+    return (math_ops.xlogy(self.concentration1 - 1., x) +
+            (self.concentration0 - 1.) * math_ops.log1p(-x))
 
   def _log_normalization(self):
     return (math_ops.lgamma(self.concentration1)
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 415249a958..997b1d392d 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -236,7 +236,7 @@ class Dirichlet(distribution.Distribution):
 
   def _log_unnormalized_prob(self, x):
     x = self._maybe_assert_valid_sample(x)
-    return math_ops.reduce_sum((self.concentration - 1.) * math_ops.log(x), -1)
+    return math_ops.reduce_sum(math_ops.xlogy(self.concentration - 1., x), -1)
 
   def _log_normalization(self):
     return special_math_ops.lbeta(self.concentration)
diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py
index 3293cda874..bbc64da7bc 100644
--- a/tensorflow/python/ops/distributions/gamma.py
+++ b/tensorflow/python/ops/distributions/gamma.py
@@ -225,7 +225,7 @@ class Gamma(distribution.Distribution):
 
   def _log_unnormalized_prob(self, x):
     x = self._maybe_assert_valid_sample(x)
-    return (self.concentration - 1.) * math_ops.log(x) - self.rate * x
+    return math_ops.xlogy(self.concentration - 1., x) - self.rate * x
 
   def _log_normalization(self):
     return (math_ops.lgamma(self.concentration)
-- 
GitLab


From 13643287a535581c133de529e3b02942ef7dd730 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 2 Oct 2018 18:46:11 +0300
Subject: [PATCH 1004/1357] Fix merge artifacts: replace Dataset by
 DatasetSource in Ignite Dataset.

---
 tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index cfe59b6b23..288d485320 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -688,7 +688,7 @@ class IgniteClient(TcpClient):
         "Unknown binary type when expected string [type_id=%d]" % header)
 
 
-class IgniteDataset(dataset_ops.Dataset):
+class IgniteDataset(dataset_ops.DatasetSource):
   """Apache Ignite is a memory-centric distributed database, caching, and
 
      processing platform for transactional, analytical, and streaming workloads,
-- 
GitLab


From 7d66a720acb756291adc99ebe444c2c00bd37d84 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 2 Oct 2018 18:57:07 +0300
Subject: [PATCH 1005/1357] Remove Ignite Dataset SSL tests by internal policy.

---
 .../python/tests/ignite_dataset_test.py       | 36 -------------------
 1 file changed, 36 deletions(-)

diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
index 1856a4fba8..ef29b5f14a 100644
--- a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
+++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
@@ -46,42 +46,6 @@ class IgniteDatasetTest(test.TestCase):
     ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300)
     self._check_dataset(ds)
 
-  def test_ignite_dataset_with_ssl_client(self):
-    """Test Ignite Dataset with ssl client.
-
-    """
-    self._clear_env()
-    os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname(
-        os.path.realpath(__file__)) + "/keystore/client.pem"
-    os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456"
-
-    ds = IgniteDataset(
-        cache_name="SQL_PUBLIC_TEST_CACHE",
-        port=42301,
-        certfile=os.environ["IGNITE_DATASET_CERTFILE"],
-        cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"])
-    self._check_dataset(ds)
-
-  def test_ignite_dataset_with_ssl_client_and_auth(self):
-    """Test Ignite Dataset with ssl client and authentication.
-
-    """
-    self._clear_env()
-    os.environ["IGNITE_DATASET_USERNAME"] = "ignite"
-    os.environ["IGNITE_DATASET_PASSWORD"] = "ignite"
-    os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname(
-        os.path.realpath(__file__)) + "/keystore/client.pem"
-    os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456"
-
-    ds = IgniteDataset(
-        cache_name="SQL_PUBLIC_TEST_CACHE",
-        port=42302,
-        certfile=os.environ["IGNITE_DATASET_CERTFILE"],
-        cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"],
-        username=os.environ["IGNITE_DATASET_USERNAME"],
-        password=os.environ["IGNITE_DATASET_PASSWORD"])
-    self._check_dataset(ds)
-
   def _clear_env(self):
     """Clears environment variables used by Ignite Dataset.
 
-- 
GitLab


From ce41d2f95e1e5883f1808030c94fd9aaa57d9f10 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 09:32:20 -0700
Subject: [PATCH 1006/1357] Generate an error when --rnn_states refers to array
 names that aren't produced/consumed by any op.

PiperOrigin-RevId: 215402308
---
 .../resolve_multiply_by_zero.cc               | 14 ++++-----
 .../contrib/lite/toco/model_cmdline_flags.cc  | 18 ++++++++----
 tensorflow/contrib/lite/toco/tooling_util.cc  | 29 +++++++++++++++----
 3 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
index 4bb1217828..b2b2ea151b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
@@ -60,6 +60,10 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   const auto& output_array_name = mul_op->outputs[0];
   auto& output_array = model->GetArray(output_array_name);
 
+  if (!IsDiscardableArray(*model, output_array_name)) {
+    return false;
+  }
+
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
     return false;
@@ -139,14 +143,8 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   }
 
   // Erase input arrays to the multiply if no longer used
-  if (IsDiscardableArray(*model, mul_op->inputs[0]) &&
-      CountOpsWithInput(*model, mul_op->inputs[0]) == 1) {
-    model->EraseArray(mul_op->inputs[0]);
-  }
-  if (IsDiscardableArray(*model, mul_op->inputs[1]) &&
-      CountOpsWithInput(*model, mul_op->inputs[1]) == 1) {
-    model->EraseArray(mul_op->inputs[1]);
-  }
+  DeleteArrayIfUsedOnce(mul_op->inputs[0], model);
+  DeleteArrayIfUsedOnce(mul_op->inputs[1], model);
 
   // Erase the multiply operator.
   model->operators.erase(mul_it);
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index d34da63e43..b6a401aaf2 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -394,12 +394,18 @@ void ReadModelFlagsFromCommandLineFlags(
     }
   }
 
-  model_flags->set_allow_nonascii_arrays(
-      parsed_model_flags.allow_nonascii_arrays.value());
-  model_flags->set_allow_nonexistent_arrays(
-      parsed_model_flags.allow_nonexistent_arrays.value());
-  model_flags->set_change_concat_input_ranges(
-      parsed_model_flags.change_concat_input_ranges.value());
+  if (!model_flags->has_allow_nonascii_arrays()) {
+    model_flags->set_allow_nonascii_arrays(
+        parsed_model_flags.allow_nonascii_arrays.value());
+  }
+  if (!model_flags->has_allow_nonexistent_arrays()) {
+    model_flags->set_allow_nonexistent_arrays(
+        parsed_model_flags.allow_nonexistent_arrays.value());
+  }
+  if (!model_flags->has_change_concat_input_ranges()) {
+    model_flags->set_change_concat_input_ranges(
+        parsed_model_flags.change_concat_input_ranges.value());
+  }
 
   if (parsed_model_flags.arrays_extra_info_file.specified()) {
     string arrays_extra_info_file_contents;
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 4a1ae35cb5..b87e01fbf0 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -843,24 +843,40 @@ void CheckNonAsciiIOArrays(const ModelFlags& model_flags) {
 }
 
 void CheckNonExistentIOArrays(const Model& model) {
+  // "non-existent" is interpreted in the stronger sense of
+  // "not actually produced/consumed by an op".
+  // Rationale: we have to artificially fix up TensorFlow graphs by creating
+  // any array that it refers to, so just checking that arrays exist isn't
+  // sufficient. The real invariant here is whether arrays are produced/consumed
+  // by something.
   if (model.flags.allow_nonexistent_arrays()) {
     return;
   }
   for (const auto& input_array : model.flags.input_arrays()) {
-    CHECK(model.HasArray(input_array.name()))
-        << "Input array not found: " << input_array.name();
+    QCHECK(GetOpWithInput(model, input_array.name()))
+        << "Specified input array " << input_array.name()
+        << " is not consumed by any op in this graph. Is it a typo?";
   }
   for (const string& output_array : model.flags.output_arrays()) {
-    CHECK(model.HasArray(output_array))
-        << "Output array not found: " << output_array;
+    QCHECK(GetOpWithOutput(model, output_array))
+        << "Specified output array " << output_array
+        << " is not produced by any op in this graph. Is it a typo?";
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
     if (!rnn_state.discardable()) {
-      CHECK(model.HasArray(rnn_state.state_array()));
-      CHECK(model.HasArray(rnn_state.back_edge_source_array()));
+      // Check that all RNN states are consumed
+      QCHECK(GetOpWithInput(model, rnn_state.state_array()))
+          << "Specified RNN state " << rnn_state.state_array()
+          << " is not consumed by any op in this graph. Is it a typo?";
+      // Check that all RNN back-edge source arrays are produced
+      QCHECK(GetOpWithOutput(model, rnn_state.back_edge_source_array()))
+          << "Specified RNN back-edge source array "
+          << rnn_state.back_edge_source_array()
+          << " is not produced by any op in this graph. Is it a typo?";
     }
   }
 }
+
 }  // namespace
 
 void CheckNoMissingArray(const Model& model) {
@@ -1597,6 +1613,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
       input_array.GetOrCreateMinMax() = input_minmax;
     }
   }
+
   // Creation of the RNN state arrays
   for (const auto& rnn_state : model->flags.rnn_states()) {
     CreateOrCheckRnnStateArray(rnn_state.state_array(), rnn_state.size(),
-- 
GitLab


From dd66b78b38b457c7d37527472c4e92a7a07f4b09 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 2 Oct 2018 10:15:11 -0700
Subject: [PATCH 1007/1357] [XLA] Fix some outdated comments referring to
 FlatMap

Also convert unordered_map to flat/node_hash_map where the comments allow.

PiperOrigin-RevId: 215410566
---
 tensorflow/compiler/xla/service/BUILD                | 2 +-
 tensorflow/compiler/xla/service/allocation_tracker.h | 5 +----
 tensorflow/compiler/xla/service/gpu/BUILD            | 1 +
 tensorflow/compiler/xla/service/gpu/nvptx_compiler.h | 9 +++++----
 tensorflow/compiler/xla/service/hlo_evaluator.h      | 5 +++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 13803f5ebe..3f8b734afb 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -253,8 +253,8 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
-        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
+        "@com_google_absl//absl/container:node_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index af227fe4da..43feccee3c 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -124,10 +124,7 @@ class AllocationTracker {
   int64 next_handle_ GUARDED_BY(mutex_);
 
   // A map from device ordinal to AllocationMap.
-  //
-  // This is not a TF FlatMap because (currently) FlatMap (and therefore
-  // AllocationMap) is not movable.
-  std::unordered_map<int, AllocationMap> opaque_to_allocation_map_
+  absl::flat_hash_map<int, AllocationMap> opaque_to_allocation_map_
       GUARDED_BY(mutex_);
 
   // A map from data handle to a vector of shaped buffers that represent the
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index a838464cae..522e9f5948 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -718,6 +718,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core:stream_executor_no_cuda",
+        "@com_google_absl//absl/container:node_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
index 8e97774750..c4a0b727cd 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/node_hash_map.h"
 #include "absl/types/optional.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/executable.h"
@@ -140,10 +141,10 @@ class NVPTXCompiler : public LLVMCompiler {
     tensorflow::condition_variable compilation_done_cv_;
   };
 
-  // Don't even think about switching this to FlatMap; iterator stability is
-  // critical here.
-  std::unordered_map<CompilationCacheKey, CompilationCacheValue,
-                     CompilationCacheHash, CompilationCacheEq>
+  // Don't even think about switching this to flat_hash_map; iterator stability
+  // is critical here.
+  absl::node_hash_map<CompilationCacheKey, CompilationCacheValue,
+                      CompilationCacheHash, CompilationCacheEq>
       compilation_cache_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(NVPTXCompiler);
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 6c2662ebae..2b0792616e 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 
+#include "absl/container/node_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
@@ -210,8 +211,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // post-orderring.
   // Must be cleared for each evaluation.
   // Storing Literal in place require the container to have pointer stability so
-  // we cannot use FlatMap any more.
-  std::unordered_map<const HloInstruction*, Literal> evaluated_;
+  // we cannot use flat_hash_map any more.
+  absl::node_hash_map<const HloInstruction*, Literal> evaluated_;
 
  private:
   template <typename ReturnT, typename NativeT>
-- 
GitLab


From feb0dc87078698fd335b528c661c54226a58efa9 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Tue, 2 Oct 2018 11:30:04 -0700
Subject: [PATCH 1008/1357] Remove dependency on contrib model_variable.

Also remove add_arg_scope.

PiperOrigin-RevId: 215426187
---
 tensorflow/contrib/quantize/BUILD             |  1 -
 .../contrib/quantize/python/quant_ops.py      | 28 +++++++++++++------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD
index 23e3a25d71..94a2d9672d 100644
--- a/tensorflow/contrib/quantize/BUILD
+++ b/tensorflow/contrib/quantize/BUILD
@@ -138,7 +138,6 @@ py_library(
     srcs = ["python/quant_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/framework:framework_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:init_ops",
diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py
index 27069444a4..d9dc7fa62e 100644
--- a/tensorflow/contrib/quantize/python/quant_ops.py
+++ b/tensorflow/contrib/quantize/python/quant_ops.py
@@ -18,8 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework.python.ops import add_arg_scope
-from tensorflow.contrib.framework.python.ops import model_variable
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
@@ -29,7 +27,6 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.training import moving_averages
 
 
-@add_arg_scope
 def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None):
   """Adds a fake quantize layer with fixed quantization interval.
 
@@ -46,7 +43,21 @@ def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None):
         inputs, min=init_min, max=init_max)
 
 
-@add_arg_scope
+def _ModelVariable(name,
+                   shape=None,
+                   initializer=None,
+                   collections=None,
+                   trainable=None):
+  collections = list(collections or [])
+  collections += [ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES]
+  return variable_scope.get_variable(
+      name,
+      shape=shape,
+      initializer=initializer,
+      collections=collections,
+      trainable=trainable)
+
+
 def LastValueQuantize(inputs,
                       per_channel=False,
                       init_min=-6.0,
@@ -93,13 +104,13 @@ def LastValueQuantize(inputs,
     else:
       min_max_shape = []
 
-    min_var = model_variable(
+    min_var = _ModelVariable(
         'min',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_min),
         collections=[vars_collection],
         trainable=False)
-    max_var = model_variable(
+    max_var = _ModelVariable(
         'max',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_max),
@@ -153,7 +164,6 @@ def LastValueQuantize(inputs,
         narrow_range=narrow_range)
 
 
-@add_arg_scope
 def MovingAvgQuantize(inputs,
                       per_channel=False,
                       init_min=-6.0,
@@ -202,13 +212,13 @@ def MovingAvgQuantize(inputs,
     else:
       min_max_shape = []
 
-    min_var = model_variable(
+    min_var = _ModelVariable(
         'min',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_min),
         collections=[vars_collection],
         trainable=False)
-    max_var = model_variable(
+    max_var = _ModelVariable(
         'max',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_max),
-- 
GitLab


From b4c23d661228b549186dc82c16ecb22d261becf6 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 2 Oct 2018 11:40:08 -0700
Subject: [PATCH 1009/1357] [XLA] Replace the last FlatMap in XLA with a simple
 array.

A hash map for 18 pointers is just a waste of space.

PiperOrigin-RevId: 215428176
---
 tensorflow/compiler/xla/service/hlo_evaluator.cc |  2 +-
 tensorflow/compiler/xla/service/hlo_evaluator.h  | 10 ++--------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index d7c39b2778..eec8d242fa 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1378,7 +1378,7 @@ Status HloEvaluator::HandleReduce(HloInstruction* reduce) {
             "unsupported");
       }
     }
-    return reduce->Visit(typed_visitors_.at(first_element_type).get());
+    return reduce->Visit(typed_visitors_[first_element_type].get());
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 2b0792616e..07f8d0aad4 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 
 namespace xla {
@@ -135,7 +134,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // Wraps around instruction handling to infer types before dispatching to
   // the corresponding typed Visitor.
   Status DefaultAction(HloInstruction* hlo) override {
-    return hlo->Visit(typed_visitors_.at(hlo->shape().element_type()).get());
+    return hlo->Visit(typed_visitors_[hlo->shape().element_type()].get());
   }
 
   Status Preprocess(HloInstruction* hlo) override;
@@ -242,12 +241,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   }
 
   // Map from a primitive type to its associated (templated) DfsHloVisitor.
-  // Note: the hash function here is only needed because current gcc std::hash
-  // does not specialize for enum types. This should however be fixed in the
-  // future: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60970#c5
-  tensorflow::gtl::FlatMap<PrimitiveType, std::unique_ptr<DfsHloVisitor>,
-                           std::hash<int>>
-      typed_visitors_;
+  std::unique_ptr<DfsHloVisitor> typed_visitors_[PrimitiveType_ARRAYSIZE];
 
   // Caches pointers to input literals, assuming they are in post-order.
   // Literals are not owned by this class, and they must outlive the lifetime of
-- 
GitLab


From 16b44d48d485dbb62b9922e172df4cc460174046 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 2 Oct 2018 12:14:58 -0700
Subject: [PATCH 1010/1357] Fix the case when an object may have multiple
 directives with the same annotation.

PiperOrigin-RevId: 215435613
---
 tensorflow/python/autograph/core/BUILD        |  47 ++++---
 tensorflow/python/autograph/core/converter.py |  53 ++++----
 .../python/autograph/core/converter_test.py   | 124 ++++++++++++++++++
 3 files changed, 184 insertions(+), 40 deletions(-)
 create mode 100644 tensorflow/python/autograph/core/converter_test.py

diff --git a/tensorflow/python/autograph/core/BUILD b/tensorflow/python/autograph/core/BUILD
index 843e381f31..3ab2e7b1bc 100644
--- a/tensorflow/python/autograph/core/BUILD
+++ b/tensorflow/python/autograph/core/BUILD
@@ -33,6 +33,35 @@ py_library(
     ],
 )
 
+py_library(
+    name = "test_lib",
+    srcs = [
+        "converter_testing.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:__subpackages__"],
+    deps = [
+        ":core",
+        "//tensorflow/python/autograph/operators",
+        "//tensorflow/python/autograph/pyct",
+        "//tensorflow/python/autograph/pyct/static_analysis",
+        "//tensorflow/python/autograph/utils",
+        "@gast_archive//:gast",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "converter_test",
+    srcs = ["converter_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":core",
+        ":test_lib",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 py_test(
     name = "errors_test",
     srcs = ["errors_test.py"],
@@ -67,21 +96,3 @@ py_test(
         "//tensorflow/python:client_testlib",
     ],
 )
-
-py_library(
-    name = "test_lib",
-    srcs = [
-        "converter_testing.py",
-    ],
-    srcs_version = "PY2AND3",
-    visibility = ["//tensorflow:__subpackages__"],
-    deps = [
-        ":core",
-        "//tensorflow/python/autograph/operators",
-        "//tensorflow/python/autograph/pyct",
-        "//tensorflow/python/autograph/pyct/static_analysis",
-        "//tensorflow/python/autograph/utils",
-        "@gast_archive//:gast",
-        "@six_archive//:six",
-    ],
-)
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index 80928ae7f4..408a573ad0 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -210,14 +210,22 @@ class Base(transformer.Base):
     self._ast_depth = 0
 
   def get_definition_directive(self, node, directive, arg, default):
-    """Returns the unique directive for a symbol, or a default if none exist.
+    """Returns the unique directive argument for a symbol.
 
     See lang/directives.py for details on directives.
 
+    Example:
+       # Given a directive in the code:
+       ag.foo_directive(bar, baz=1)
+
+       # One can write for an AST node Name(id='bar'):
+       get_definition_directive(node, ag.foo_directive, 'baz')
+
     Args:
-      node: ast.AST
-      directive: Callable[..., Any]
-      arg: str
+      node: ast.AST, the node representing the symbol for which the directive
+        argument is needed.
+      directive: Callable[..., Any], the directive to search.
+      arg: str, the directive argument to return.
       default: Any
 
     Raises:
@@ -227,27 +235,28 @@ class Base(transformer.Base):
     if not defs:
       return default
 
-    # TODO(mdan): Simplify this.
-    arg_values = []
+    arg_values_found = []
     for def_ in defs:
-      if (directive not in def_.directives or
-          arg not in def_.directives[directive]):
-        continue
-      arg_value = def_.directives[directive][arg]
-      for prev_value in arg_values:
-        if not ast_util.matches(arg_value, prev_value):
-          qn = anno.getanno(node, anno.Basic.QN)
-          raise ValueError('%s has ambiguous annotations for %s(%s): %s, %s' %
-                           (qn, directive.__name__, arg,
-                            compiler.ast_to_source(arg_value).strip(),
-                            compiler.ast_to_source(prev_value).strip()))
-      arg_values.append(arg_value)
-
-    if not arg_values:
+      if (directive in def_.directives and arg in def_.directives[directive]):
+        arg_values_found.append(def_.directives[directive][arg])
+
+    if not arg_values_found:
       return default
 
-    arg_value, = arg_values
-    return arg_value
+    if len(arg_values_found) == 1:
+      return arg_values_found[0]
+
+    # If multiple annotations reach the symbol, they must all match. If they do,
+    # return any of them.
+    first_value = arg_values_found[0]
+    for other_value in arg_values_found[1:]:
+      if not ast_util.matches(first_value, other_value):
+        qn = anno.getanno(node, anno.Basic.QN)
+        raise ValueError('%s has ambiguous annotations for %s(%s): %s, %s' %
+                         (qn, directive.__name__, arg,
+                          compiler.ast_to_source(other_value).strip(),
+                          compiler.ast_to_source(first_value).strip()))
+    return first_value
 
   def visit(self, node):
     if not self._ast_depth:
diff --git a/tensorflow/python/autograph/core/converter_test.py b/tensorflow/python/autograph/core/converter_test.py
new file mode 100644
index 0000000000..b73c67e337
--- /dev/null
+++ b/tensorflow/python/autograph/core/converter_test.py
@@ -0,0 +1,124 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for lists module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.autograph.core import converter
+from tensorflow.python.autograph.core import converter_testing
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.platform import test
+
+
+class TestConverter(converter.Base):
+  pass
+
+
+class ConverterBaseTest(converter_testing.TestCase):
+
+  def test_get_definition_directive_basic(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[1].value
+    defs, = anno.getanno(symbol_a, anno.Static.ORIG_DEFINITIONS)
+    defs.directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+        'other_arg': parser.parse_expression('bar'),
+    }
+    c = TestConverter(ctx)
+    value = c.get_definition_directive(symbol_a, directive_key, 'test_arg',
+                                       None)
+    self.assertEqual(value.id, 'foo')
+
+  def test_get_definition_directive_default(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[1].value
+    c = TestConverter(ctx)
+    value = c.get_definition_directive(symbol_a, directive_key, 'test_arg',
+                                       parser.parse_expression('default'))
+    self.assertEqual(value.id, 'default')
+
+  def test_get_definition_directive_multiple_consistent(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      if a:
+        a = 2
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[2].value
+    defs = anno.getanno(symbol_a, anno.Static.ORIG_DEFINITIONS)
+    defs[0].directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+        'other_arg': parser.parse_expression('bar'),
+    }
+    defs[1].directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+        'other_arg': parser.parse_expression('baz'),
+    }
+    c = TestConverter(ctx)
+    value = c.get_definition_directive(symbol_a, directive_key, 'test_arg',
+                                       None)
+    self.assertEqual(value.id, 'foo')
+
+  def test_get_definition_directive_multiple_inconsistent(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      if a:
+        a = 2
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[2].value
+    defs = anno.getanno(symbol_a, anno.Static.ORIG_DEFINITIONS)
+    defs[0].directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+    }
+    defs[1].directives[directive_key] = {
+        'test_arg': parser.parse_expression('bar'),
+    }
+    c = TestConverter(ctx)
+    with self.assertRaises(ValueError):
+      c.get_definition_directive(symbol_a, directive_key, 'test_arg', None)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 8d4ef71f06a06a093419bf0f80562a1941059029 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 2 Oct 2018 12:15:36 -0700
Subject: [PATCH 1011/1357] Allow creating a list from a tensor. Fix a few
 inconsistencies in the tensor list constructors.

PiperOrigin-RevId: 215435720
---
 .../autograph/lang/special_functions.py       | 24 ++++++++++--
 .../autograph/lang/special_functions_test.py  | 37 ++++++++++++++++++-
 .../autograph/operators/data_structures.py    | 17 ++++++++-
 .../operators/data_structures_test.py         | 31 ++++++++++++++--
 4 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/autograph/lang/special_functions.py b/tensorflow/python/autograph/lang/special_functions.py
index e4838d1b6d..62ac018ac4 100644
--- a/tensorflow/python/autograph/lang/special_functions.py
+++ b/tensorflow/python/autograph/lang/special_functions.py
@@ -24,6 +24,26 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.autograph.operators import data_structures
+from tensorflow.python.framework import tensor_util
+
+
+def _validate_list_constructor(elements, element_dtype, element_shape):
+  """Validates the inputs of tensor_list."""
+  if element_dtype is not None and element_shape is not None:
+    return
+  if tensor_util.is_tensor(elements):
+    return
+  if isinstance(elements, (list, tuple)):
+    if elements:
+      return
+    else:
+      raise ValueError(
+          'element_dtype and element_shape are required when elements are'
+          ' empty')
+
+  raise ValueError(
+      'unknown type for elements: {}; only Tensor, list and tuple are'
+      ' allowed'.format(type(elements)))
 
 
 def tensor_list(elements,
@@ -52,9 +72,7 @@ def tensor_list(elements,
   Raises:
     ValueError: for invalid arguments
   """
-  if not (elements or (element_dtype and element_shape)):
-    raise ValueError(
-        'element_dtype and element_shape are required for empty lists')
+  _validate_list_constructor(elements, element_dtype, element_shape)
   if use_tensor_array:
     return data_structures.tf_tensor_array_new(elements, element_dtype,
                                                element_shape)
diff --git a/tensorflow/python/autograph/lang/special_functions_test.py b/tensorflow/python/autograph/lang/special_functions_test.py
index 545dd11729..206a32d07c 100644
--- a/tensorflow/python/autograph/lang/special_functions_test.py
+++ b/tensorflow/python/autograph/lang/special_functions_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.autograph.lang import special_functions
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -28,12 +30,43 @@ from tensorflow.python.platform import test
 
 class SpecialFunctionsTest(test.TestCase):
 
+  def test_tensor_list_empty_list(self):
+    l = special_functions.tensor_list([],
+                                      element_dtype=dtypes.int32,
+                                      element_shape=())
+    sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(sl), [])
+
+    l = special_functions.tensor_list((),
+                                      element_dtype=dtypes.int32,
+                                      element_shape=())
+    sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(sl), [])
+
+  def test_tensor_list_tensor(self):
+    l = special_functions.tensor_list(
+        constant_op.constant([], dtype=dtypes.int32))
+    sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(sl), [])
+
+  def test_tensor_list_unsupported_initializer(self):
+    with self.assertRaisesRegexp(ValueError, 'unknown type'):
+      special_functions.tensor_list(np.array([1, 2, 3]))
+
+  def test_tensor_list_empty_list_no_type(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'element_dtype and element_shape are required'):
+      special_functions.tensor_list([])
+
   def test_tensor_list_from_elements(self):
     elements = [constant_op.constant([1, 2]), constant_op.constant([3, 4])]
 
     l = special_functions.tensor_list(elements)
     sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
-    with self.cached_session() as sess:
+    with self.test_session() as sess:
       self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
 
   def test_tensor_list_array_from_elements(self):
@@ -41,7 +74,7 @@ class SpecialFunctionsTest(test.TestCase):
 
     l = special_functions.tensor_list(elements, use_tensor_array=True)
     sl = l.stack()
-    with self.cached_session() as sess:
+    with self.test_session() as sess:
       self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
 
   def test_stack(self):
diff --git a/tensorflow/python/autograph/operators/data_structures.py b/tensorflow/python/autograph/operators/data_structures.py
index cc0a3c3544..b3a3851333 100644
--- a/tensorflow/python/autograph/operators/data_structures.py
+++ b/tensorflow/python/autograph/operators/data_structures.py
@@ -106,6 +106,14 @@ def tf_tensor_array_new(elements, element_dtype=None, element_shape=None):
 
 def tf_tensor_list_new(elements, element_dtype=None, element_shape=None):
   """Overload of new_list that stages a Tensor list creation."""
+  if tensor_util.is_tensor(elements):
+    if element_shape is not None:
+      raise ValueError(
+          'element shape may not be specified when creating list from tensor')
+    element_shape = array_ops.shape(elements)[1:]
+    l = list_ops.tensor_list_from_tensor(elements, element_shape=element_shape)
+    return l
+
   elements = tuple(ops.convert_to_tensor(el) for el in elements)
 
   all_dtypes = set(el.dtype for el in elements)
@@ -115,13 +123,15 @@ def tf_tensor_list_new(elements, element_dtype=None, element_shape=None):
       raise ValueError(
           'incompatible dtype; specified: {}, inferred from {}: {}'.format(
               element_dtype, elements, inferred_dtype))
-  else:
+  elif all_dtypes:
     # Heterogeneous lists are ok.
     if element_dtype is not None:
       raise ValueError(
           'specified dtype {} is inconsistent with that of elements {}'.format(
               element_dtype, elements))
     inferred_dtype = dtypes.variant
+  else:
+    inferred_dtype = dtypes.variant
 
   all_shapes = set(tuple(el.shape.as_list()) for el in elements)
   if len(all_shapes) == 1:
@@ -130,19 +140,22 @@ def tf_tensor_list_new(elements, element_dtype=None, element_shape=None):
       raise ValueError(
           'incompatible shape; specified: {}, inferred from {}: {}'.format(
               element_shape, elements, inferred_shape))
-  else:
+  elif all_shapes:
     # Heterogeneous lists are ok.
     if element_shape is not None:
       raise ValueError(
           'specified shape {} is inconsistent with that of elements {}'.format(
               element_shape, elements))
     inferred_shape = constant_op.constant(-1)  # unknown shape, by convention
+  else:
+    inferred_shape = constant_op.constant(-1)  # unknown shape, by convention
 
   if element_dtype is None:
     element_dtype = inferred_dtype
   if element_shape is None:
     element_shape = inferred_shape
 
+  element_shape = ops.convert_to_tensor(element_shape, dtype=dtypes.int32)
   l = list_ops.empty_tensor_list(
       element_shape=element_shape, element_dtype=element_dtype)
   for el in elements:
diff --git a/tensorflow/python/autograph/operators/data_structures_test.py b/tensorflow/python/autograph/operators/data_structures_test.py
index 8532dbe466..6039b07982 100644
--- a/tensorflow/python/autograph/operators/data_structures_test.py
+++ b/tensorflow/python/autograph/operators/data_structures_test.py
@@ -45,6 +45,20 @@ class ListTest(test.TestCase):
     with self.cached_session() as sess:
       self.assertAllEqual(sess.run(t), [3, 4, 5])
 
+  def test_tf_tensor_list_new_empty(self):
+    l = data_structures.tf_tensor_list_new([],
+                                           element_dtype=dtypes.int32,
+                                           element_shape=())
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.cached_session() as sess:
+      self.assertAllEqual(sess.run(t), [])
+
+  def test_tf_tensor_list_new_from_tensor(self):
+    l = data_structures.tf_tensor_list_new(constant_op.constant([3, 4, 5]))
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.cached_session() as sess:
+      self.assertAllEqual(sess.run(t), [3, 4, 5])
+
   def test_tf_tensor_list_new_illegal_input(self):
     with self.assertRaises(ValueError):
       data_structures.tf_tensor_list_new([3, 4.0])
@@ -56,9 +70,8 @@ class ListTest(test.TestCase):
     with self.assertRaises(ValueError):
       data_structures.tf_tensor_list_new([3, 4], element_shape=(2,))
     with self.assertRaises(ValueError):
-      data_structures.tf_tensor_list_new([], element_shape=(2,))
-    with self.assertRaises(ValueError):
-      data_structures.tf_tensor_list_new([], element_dtype=dtypes.float32)
+      data_structures.tf_tensor_list_new(
+          constant_op.constant([1, 2, 3]), element_shape=[1])
 
   def test_tf_tensor_array_new(self):
     l = data_structures.tf_tensor_array_new([3, 4, 5])
@@ -141,6 +154,18 @@ class ListTest(test.TestCase):
       t = data_structures.list_stack(l, opts)
       self.assertAllEqual(sess.run(t), sess.run(initial_list))
 
+  def test_stack_tensor_list_empty(self):
+    l = list_ops.empty_tensor_list(
+        element_shape=-1,
+        element_dtype=dtypes.variant)
+
+    opts = data_structures.ListStackOpts(
+        element_dtype=dtypes.int32, original_call=None)
+
+    # TODO(mdan): Allow stacking empty lists if the dtype and shape are known.
+    with self.assertRaises(ValueError):
+      data_structures.list_stack(l, opts)
+
   def test_stack_fallback(self):
 
     def dummy_function(l):
-- 
GitLab


From d3e830e608211bc81cfb111abe3c0357bd92a12e Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 12:38:53 -0700
Subject: [PATCH 1012/1357] Disable fused_conv tests that don't build in
 open-source.

PiperOrigin-RevId: 215440356
---
 tensorflow/contrib/fused_conv/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD
index 490da9b33b..57a5bfbf43 100644
--- a/tensorflow/contrib/fused_conv/BUILD
+++ b/tensorflow/contrib/fused_conv/BUILD
@@ -145,6 +145,7 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
     ],
     tags = [
+        "manual",  # TODO(b/117128481): re-enable after fixing OSS build
         "no_pip",
         "requires-gpu-sm70",
     ],
@@ -169,6 +170,7 @@ cuda_py_test(
     ],
     main = "python/ops/fused_conv2d_bias_activation_benchmark.py",
     tags = [
+        "manual",  # TODO(b/117128481): re-enable after fixing OSS build
         "requires-gpu-sm70",
     ],
 )
-- 
GitLab


From 508dd179b6b6dd78aa3e24212648789e8fc018a0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 12:41:31 -0700
Subject: [PATCH 1013/1357] Allow passing --allow_nonexistent_arrays via
 toco_convert

PiperOrigin-RevId: 215440829
---
 tensorflow/contrib/lite/python/convert.py    |  8 +++++++-
 tensorflow/contrib/lite/toco/tooling_util.cc | 19 +++++++++++--------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 613a1530f7..1bf42d7551 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -155,7 +155,8 @@ def build_toco_convert_protos(input_tensors,
                               post_training_quantize=False,
                               dump_graphviz_dir=None,
                               dump_graphviz_video=False,
-                              converter_mode=ConverterMode.DEFAULT):
+                              converter_mode=ConverterMode.DEFAULT,
+                              allow_nonexistent_arrays=False):
   """Builds protocol buffers describing a conversion of a model using TOCO.
 
   Typically this is to convert from TensorFlow GraphDef to TFLite, in which
@@ -212,6 +213,8 @@ def build_toco_convert_protos(input_tensors,
       every graph transformation. (default False)
     converter_mode: Experimental flag, subject to change. ConverterMode
       indicating which converter to use. (default ConverterMode.DEFAULT)
+    allow_nonexistent_arrays: Allow specifying array names that don't exist
+      or are unused in the final graph.  (default False)
 
   Returns:
     model_flags, toco_flags: two protocol buffers describing the conversion
@@ -261,6 +264,9 @@ def build_toco_convert_protos(input_tensors,
 
   for output_tensor in output_tensors:
     model.output_arrays.append(tensor_name(output_tensor))
+
+  model.allow_nonexistent_arrays = allow_nonexistent_arrays
+
   return model, toco
 
 
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index b87e01fbf0..e3f27e9e2a 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -852,27 +852,30 @@ void CheckNonExistentIOArrays(const Model& model) {
   if (model.flags.allow_nonexistent_arrays()) {
     return;
   }
+  static constexpr char general_comment[] =
+      "Is it a typo? To silence this message, pass this flag:  "
+      "allow_nonexistent_arrays";
   for (const auto& input_array : model.flags.input_arrays()) {
     QCHECK(GetOpWithInput(model, input_array.name()))
-        << "Specified input array " << input_array.name()
-        << " is not consumed by any op in this graph. Is it a typo?";
+        << "Specified input array \"" << input_array.name()
+        << "\" is not consumed by any op in this graph. " << general_comment;
   }
   for (const string& output_array : model.flags.output_arrays()) {
     QCHECK(GetOpWithOutput(model, output_array))
-        << "Specified output array " << output_array
-        << " is not produced by any op in this graph. Is it a typo?";
+        << "Specified output array \"" << output_array
+        << "\" is not produced by any op in this graph. " << general_comment;
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
     if (!rnn_state.discardable()) {
       // Check that all RNN states are consumed
       QCHECK(GetOpWithInput(model, rnn_state.state_array()))
-          << "Specified RNN state " << rnn_state.state_array()
-          << " is not consumed by any op in this graph. Is it a typo?";
+          << "Specified RNN state \"" << rnn_state.state_array()
+          << "\" is not consumed by any op in this graph. " << general_comment;
       // Check that all RNN back-edge source arrays are produced
       QCHECK(GetOpWithOutput(model, rnn_state.back_edge_source_array()))
-          << "Specified RNN back-edge source array "
+          << "Specified RNN back-edge source array \""
           << rnn_state.back_edge_source_array()
-          << " is not produced by any op in this graph. Is it a typo?";
+          << "\" is not produced by any op in this graph. " << general_comment;
     }
   }
 }
-- 
GitLab


From 0a201955b47d484c6bfa149364c264a5b5f91be7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 12:47:05 -0700
Subject: [PATCH 1014/1357] Copy tf.distributions to tfp.distributions, and
 deprecate the tf.distributions API.

PiperOrigin-RevId: 215441733
---
 .../python/debug/examples/examples_test.sh    |  2 +-
 tensorflow/python/ops/distributions/BUILD     |  7 ++++++
 .../python/ops/distributions/bernoulli.py     |  9 +++++++
 tensorflow/python/ops/distributions/beta.py   | 14 +++++++++++
 .../python/ops/distributions/categorical.py   |  9 +++++++
 .../python/ops/distributions/dirichlet.py     |  9 +++++++
 .../distributions/dirichlet_multinomial.py    |  9 +++++++
 .../python/ops/distributions/distribution.py  | 17 +++++++++++++
 .../python/ops/distributions/exponential.py   | 13 ++++++++++
 tensorflow/python/ops/distributions/gamma.py  | 14 +++++++++++
 .../ops/distributions/identity_bijector.py    |  9 +++++++
 .../ops/distributions/kullback_leibler.py     | 25 +++++++++++++++++++
 .../python/ops/distributions/laplace.py       | 14 +++++++++++
 .../python/ops/distributions/multinomial.py   |  9 +++++++
 tensorflow/python/ops/distributions/normal.py | 14 +++++++++++
 .../python/ops/distributions/student_t.py     | 14 +++++++++++
 .../distributions/transformed_distribution.py |  9 +++++++
 .../python/ops/distributions/uniform.py       |  9 +++++++
 18 files changed, 205 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/debug/examples/examples_test.sh b/tensorflow/python/debug/examples/examples_test.sh
index f7d597c8c0..89dc918616 100755
--- a/tensorflow/python/debug/examples/examples_test.sh
+++ b/tensorflow/python/debug/examples/examples_test.sh
@@ -115,7 +115,7 @@ OUTPUT=$(${OFFLINE_ANALYZER_BIN} 2>&1)
 set -e
 
 EXPECTED_OUTPUT="ERROR: dump_dir flag is empty."
-if [[ "${OUTPUT}" != "${EXPECTED_OUTPUT}" ]]; then
+if ! echo "${OUTPUT}" | grep -q "${EXPECTED_OUTPUT}"; then
   echo "ERROR: offline_analyzer output didn't match expectation: ${OUTPUT}" 1>&2
   echo "Expected output: ${EXPECTED_OUTPUT}"
   exit 1
diff --git a/tensorflow/python/ops/distributions/BUILD b/tensorflow/python/ops/distributions/BUILD
index e7ad028376..59ba9aee59 100644
--- a/tensorflow/python/ops/distributions/BUILD
+++ b/tensorflow/python/ops/distributions/BUILD
@@ -12,6 +12,13 @@ py_library(
         ["*.py"],
         exclude = ["util.py"],
     ),
+    deprecation = ("TensorFlow Distributions has migrated to " +
+                   "TensorFlow Probability " +
+                   "(https://github.com/tensorflow/probability). " +
+                   "Deprecated copies remaining in tf.distributions " +
+                   "will not receive new features, and will be removed by " +
+                   "early 2019. You should update all usage of " +
+                   "`tf.distributions` to `tfp.distributions`."),
     srcs_version = "PY2AND3",
     deps = [
         ":util",
diff --git a/tensorflow/python/ops/distributions/bernoulli.py b/tensorflow/python/ops/distributions/bernoulli.py
index 84d9d40a35..baecc321d3 100644
--- a/tensorflow/python/ops/distributions/bernoulli.py
+++ b/tensorflow/python/ops/distributions/bernoulli.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -39,6 +40,14 @@ class Bernoulli(distribution.Distribution):
   `1` outcome (vs a `0` outcome).
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                logits=None,
                probs=None,
diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py
index d6f89a3517..51c4f6eb3d 100644
--- a/tensorflow/python/ops/distributions/beta.py
+++ b/tensorflow/python/ops/distributions/beta.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -150,6 +151,14 @@ class Beta(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                concentration1=None,
                concentration0=None,
@@ -341,6 +350,11 @@ class Beta(distribution.Distribution):
 class BetaWithSoftplusConcentration(Beta):
   """Beta with softplus transform of `concentration1` and `concentration0`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Beta(tf.nn.softplus(concentration1), "
+      "tf.nn.softplus(concentration2))` instead.",
+      warn_once=True)
   def __init__(self,
                concentration1,
                concentration0,
diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py
index fbbacf2521..26a3da2fb6 100644
--- a/tensorflow/python/ops/distributions/categorical.py
+++ b/tensorflow/python/ops/distributions/categorical.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -149,6 +150,14 @@ class Categorical(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(
       self,
       logits=None,
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 997b1d392d..675c30b383 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import special_math_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -156,6 +157,14 @@ class Dirichlet(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                concentration,
                validate_args=False,
diff --git a/tensorflow/python/ops/distributions/dirichlet_multinomial.py b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
index 5350c82847..2e3151a5ab 100644
--- a/tensorflow/python/ops/distributions/dirichlet_multinomial.py
+++ b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import special_math_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -163,6 +164,14 @@ class DirichletMultinomial(distribution.Distribution):
 
   # TODO(b/27419586) Change docstring for dtype of concentration once int
   # allowed.
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                total_count,
                concentration,
diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py
index 12fd039392..4741370cd8 100644
--- a/tensorflow/python/ops/distributions/distribution.py
+++ b/tensorflow/python/ops/distributions/distribution.py
@@ -34,6 +34,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
@@ -229,6 +230,14 @@ class ReparameterizationType(object):
     gradients / surrogate loss instead.
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self, rep_type):
     self._rep_type = rep_type
 
@@ -405,6 +414,14 @@ class Distribution(_BaseDistribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                dtype,
                reparameterization_type,
diff --git a/tensorflow/python/ops/distributions/exponential.py b/tensorflow/python/ops/distributions/exponential.py
index 02129b5e2a..6a52af8c33 100644
--- a/tensorflow/python/ops/distributions/exponential.py
+++ b/tensorflow/python/ops/distributions/exponential.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import gamma
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -70,6 +71,14 @@ class Exponential(gamma.Gamma):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                rate,
                validate_args=False,
@@ -138,6 +147,10 @@ class Exponential(gamma.Gamma):
 class ExponentialWithSoftplusRate(Exponential):
   """Exponential with softplus transform on `rate`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Exponential(tf.nn.softplus(rate)).",
+      warn_once=True)
   def __init__(self,
                rate,
                validate_args=False,
diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py
index bbc64da7bc..4a2db208d4 100644
--- a/tensorflow/python/ops/distributions/gamma.py
+++ b/tensorflow/python/ops/distributions/gamma.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -121,6 +122,14 @@ class Gamma(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                concentration,
                rate,
@@ -279,6 +288,11 @@ class Gamma(distribution.Distribution):
 class GammaWithSoftplusConcentrationRate(Gamma):
   """`Gamma` with softplus of `concentration` and `rate`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Gamma(tf.nn.softplus(concentration), "
+      "tf.nn.softplus(rate))` instead.",
+      warn_once=True)
   def __init__(self,
                concentration,
                rate,
diff --git a/tensorflow/python/ops/distributions/identity_bijector.py b/tensorflow/python/ops/distributions/identity_bijector.py
index 8628e68f96..eded96f5bc 100644
--- a/tensorflow/python/ops/distributions/identity_bijector.py
+++ b/tensorflow/python/ops/distributions/identity_bijector.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.util import deprecation
 
 
 __all__ = [
@@ -43,6 +44,14 @@ class Identity(bijector.Bijector):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self, validate_args=False, name="identity"):
     super(Identity, self).__init__(
         forward_min_event_ndims=0,
diff --git a/tensorflow/python/ops/distributions/kullback_leibler.py b/tensorflow/python/ops/distributions/kullback_leibler.py
index fdeb97bf64..12743fa23d 100644
--- a/tensorflow/python/ops/distributions/kullback_leibler.py
+++ b/tensorflow/python/ops/distributions/kullback_leibler.py
@@ -22,6 +22,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
@@ -51,6 +52,14 @@ def _registered_kl(type_a, type_b):
   return kl_fn
 
 
+@deprecation.deprecated(
+    "2019-01-01",
+    "The TensorFlow Distributions library has moved to "
+    "TensorFlow Probability "
+    "(https://github.com/tensorflow/probability). You "
+    "should update all references to use `tfp.distributions` "
+    "instead of `tf.distributions`.",
+    warn_once=True)
 @tf_export("distributions.kl_divergence")
 def kl_divergence(distribution_a, distribution_b,
                   allow_nan_stats=True, name=None):
@@ -112,6 +121,14 @@ def kl_divergence(distribution_a, distribution_b,
       return array_ops.identity(kl_t, name="checked_kl")
 
 
+@deprecation.deprecated(
+    "2019-01-01",
+    "The TensorFlow Distributions library has moved to "
+    "TensorFlow Probability "
+    "(https://github.com/tensorflow/probability). You "
+    "should update all references to use `tfp.distributions` "
+    "instead of `tf.distributions`.",
+    warn_once=True)
 def cross_entropy(ref, other,
                   allow_nan_stats=True, name=None):
   """Computes the (Shannon) cross entropy.
@@ -155,6 +172,14 @@ class RegisterKL(object):
     # Return KL(norm_a || norm_b)
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self, dist_cls_a, dist_cls_b):
     """Initialize the KL registrar.
 
diff --git a/tensorflow/python/ops/distributions/laplace.py b/tensorflow/python/ops/distributions/laplace.py
index be17cf2527..4f6a8f587d 100644
--- a/tensorflow/python/ops/distributions/laplace.py
+++ b/tensorflow/python/ops/distributions/laplace.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import special_math
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -71,6 +72,14 @@ class Laplace(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
@@ -211,6 +220,11 @@ class Laplace(distribution.Distribution):
 class LaplaceWithSoftplusScale(Laplace):
   """Laplace with softplus applied to `scale`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Laplace(loc, tf.nn.softplus(scale)) "
+      "instead.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index d0943e8eee..8397353cd5 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -148,6 +149,14 @@ class Multinomial(distribution.Distribution):
   ```
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                total_count,
                logits=None,
diff --git a/tensorflow/python/ops/distributions/normal.py b/tensorflow/python/ops/distributions/normal.py
index 2feaf806c0..9f511709b9 100644
--- a/tensorflow/python/ops/distributions/normal.py
+++ b/tensorflow/python/ops/distributions/normal.py
@@ -32,6 +32,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import special_math
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -106,6 +107,14 @@ class Normal(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
@@ -240,6 +249,11 @@ class Normal(distribution.Distribution):
 class NormalWithSoftplusScale(Normal):
   """Normal with softplus applied to `scale`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Normal(loc, tf.nn.softplus(scale)) "
+      "instead.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py
index e8d214bbe0..b69e61925c 100644
--- a/tensorflow/python/ops/distributions/student_t.py
+++ b/tensorflow/python/ops/distributions/student_t.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import special_math_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -140,6 +141,14 @@ class StudentT(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                df,
                loc,
@@ -361,6 +370,11 @@ class StudentT(distribution.Distribution):
 class StudentTWithAbsDfSoftplusScale(StudentT):
   """StudentT with `df = floor(abs(df))` and `scale = softplus(scale)`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.StudentT(tf.floor(tf.abs(df)), loc, "
+      "tf.nn.softplus(scale)) instead.",
+      warn_once=True)
   def __init__(self,
                df,
                loc,
diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py
index e80bf9ee42..1becfc1877 100644
--- a/tensorflow/python/ops/distributions/transformed_distribution.py
+++ b/tensorflow/python/ops/distributions/transformed_distribution.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.distributions import distribution as distribution_lib
 from tensorflow.python.ops.distributions import identity_bijector
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 
 __all__ = [
     "TransformedDistribution",
@@ -227,6 +228,14 @@ class TransformedDistribution(distribution_lib.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                distribution,
                bijector=None,
diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py
index e66c4a37e7..b6b24187cc 100644
--- a/tensorflow/python/ops/distributions/uniform.py
+++ b/tensorflow/python/ops/distributions/uniform.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -76,6 +77,14 @@ class Uniform(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                low=0.,
                high=1.,
-- 
GitLab


From 78e4ce52aeda5a10ddaf5e64ea8958f439a2f9f2 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Tue, 2 Oct 2018 13:08:39 -0700
Subject: [PATCH 1015/1357] Add proto serialization/deserialization testing to
 the HLO parser tests. Many of the HLO parser tests verify that an text form
 of an HLO module preserves all information when running through ToString then
 parsing. It makes sense to also use these tests to exercise proto
 serialization/deserialization. This is done by adding additional
 instantiations of the parameterized parsing tests. This caught several bugs
 which are fixed in this CL:

(1) Domain instructions were not being serialized properly.
(2) Host send/recv instructions did not preserve the is_host_transfer bit.
(3) Sparse literals could not be serialized or deserialized.

PiperOrigin-RevId: 215445200
---
 tensorflow/compiler/xla/literal.cc            | 18 ++++
 tensorflow/compiler/xla/literal_test.cc       | 10 +++
 tensorflow/compiler/xla/service/BUILD         | 20 +----
 tensorflow/compiler/xla/service/hlo.proto     |  6 +-
 .../compiler/xla/service/hlo_instruction.cc   | 33 +++++--
 .../compiler/xla/service/hlo_instructions.cc  | 21 +++++
 .../compiler/xla/service/hlo_instructions.h   |  3 +
 .../compiler/xla/service/hlo_parser_test.cc   | 85 +++++++++++++------
 8 files changed, 141 insertions(+), 55 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index 5035f41988..d1dad0d45f 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -1850,6 +1850,24 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) {
   TF_RET_CHECK(LayoutUtil::HasLayout(proto.shape()));
   TF_RET_CHECK(ShapeUtil::Equal(proto.shape(), subshape()));
 
+  if (LayoutUtil::IsSparseArray(subshape())) {
+    // Compute the number of elements (indices) in the sparse shape and reserve
+    // the necessary space in spare_indices.
+    TF_RET_CHECK(ShapeUtil::Rank(subshape()) != 0)
+        << "Scalar shapes cannot be sparse";
+    TF_RET_CHECK(proto.sparse_indices_size() % ShapeUtil::Rank(subshape()) == 0)
+        << "Unexpected number of indices in proto ("
+        << proto.sparse_indices_size() << ") for shape of rank "
+        << ShapeUtil::Rank(subshape());
+    const int64 index_count =
+        proto.sparse_indices_size() / ShapeUtil::Rank(subshape());
+    sparse_indices()->Resize(index_count);
+
+    // Copy the indices from the proto into the SparseIndexArray object.
+    TF_RETURN_IF_ERROR(CopyFromRepeatedField(sparse_indices()->mutable_data(),
+                                             proto.sparse_indices()));
+  }
+
   switch (subshape().element_type()) {
     case PRED:
       TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<bool>(), proto.preds()));
diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc
index 7ad287c897..dd5b54e4c9 100644
--- a/tensorflow/compiler/xla/literal_test.cc
+++ b/tensorflow/compiler/xla/literal_test.cc
@@ -224,6 +224,16 @@ TEST_F(LiteralUtilTest, CreateSparse) {
             absl::Span<const int64>(expected_indices.data(),
                                     expected_indices.num_elements()));
   EXPECT_EQ(literal.data<int64>(), absl::Span<const int64>(expected_values));
+
+  // Serialize then deserialize and verify the resulting literal.
+  TF_ASSERT_OK_AND_ASSIGN(Literal literal_from_proto,
+                          Literal::CreateFromProto(literal.ToProto()));
+
+  EXPECT_EQ(literal_from_proto.sparse_indices()->data(),
+            absl::Span<const int64>(expected_indices.data(),
+                                    expected_indices.num_elements()));
+  EXPECT_EQ(literal_from_proto.data<int64>(),
+            absl::Span<const int64>(expected_values));
 }
 
 TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) {
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 3f8b734afb..f329a27e14 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -300,6 +300,7 @@ cc_library(
         "hlo_opcode.cc",
         "hlo_schedule.cc",
         "hlo_sharding.cc",
+        "hlo_sharding_metadata.cc",
     ],
     hdrs = [
         "dfs_hlo_visitor.h",
@@ -313,6 +314,7 @@ cc_library(
         "hlo_opcode.h",
         "hlo_schedule.h",
         "hlo_sharding.h",
+        "hlo_sharding_metadata.h",
     ],
     deps = [
         ":hlo_casting_utils",
@@ -2759,22 +2761,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "hlo_sharding_metadata",
-    srcs = ["hlo_sharding_metadata.cc"],
-    hdrs = [
-        "hlo_sharding_metadata.h",
-    ],
-    deps = [
-        ":hlo",
-        "//tensorflow/compiler/xla:shape_tree",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
 cc_library(
     name = "hlo_domain_verifier",
     srcs = ["hlo_domain_verifier.cc"],
@@ -2825,7 +2811,6 @@ tf_cc_test(
         ":hlo_domain_isolator",
         ":hlo_domain_remover",
         ":hlo_parser",
-        ":hlo_sharding_metadata",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/legacy_flags:debug_options_flags",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
@@ -3441,7 +3426,6 @@ cc_library(
     deps = [
         ":hlo",
         ":hlo_lexer",
-        ":hlo_sharding_metadata",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index caaca16f71..1ea26ddd5b 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto";
 option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
-// Next ID: 54
+// Next ID: 56
 message HloInstructionProto {
   reserved 10;
   reserved "parameter_name";
@@ -180,6 +180,10 @@ message HloInstructionProto {
 
   // Collective permute field.
   repeated SourceTarget source_target_pairs = 52;
+
+  // Sharding for kDomain instructions.
+  xla.OpSharding domain_entry_sharding = 54;
+  xla.OpSharding domain_exit_sharding = 55;
 }
 
 // Serialization of HloComputation.
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 0207f9ae3f..de22b2d3a5 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h"
 #include "tensorflow/compiler/xla/service/name_uniquer.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -467,14 +468,27 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           proto.dot_dimension_numbers(), precision_config);
       break;
     }
-    case HloOpcode::kDomain:
+    case HloOpcode::kDomain: {
       TF_RET_CHECK(proto.operand_ids_size() == 1)
           << "Domain instruction should have 1 operands but sees "
           << proto.operand_ids_size();
+      TF_RET_CHECK(proto.has_domain_entry_sharding())
+          << "Domain instruction must domain_entry_sharding";
+      TF_RET_CHECK(proto.has_domain_exit_sharding())
+          << "Domain instruction must domain_exit_sharding";
+      TF_ASSIGN_OR_RETURN(
+          HloSharding entry_hlo_sharding,
+          HloSharding::FromProto(proto.domain_entry_sharding()));
+      TF_ASSIGN_OR_RETURN(HloSharding exit_hlo_sharding,
+                          HloSharding::FromProto(proto.domain_exit_sharding()));
       instruction = absl::make_unique<HloDomainInstruction>(
-          proto.shape(), operands(0), /*operand_side_metadata=*/nullptr,
-          /*user_side_metadata=*/nullptr);
+          proto.shape(), operands(0),
+          absl::make_unique<ShardingMetadata>(
+              std::make_shared<const HloSharding>(entry_hlo_sharding)),
+          absl::make_unique<ShardingMetadata>(
+              std::make_shared<const HloSharding>(exit_hlo_sharding)));
       break;
+    }
     default: {
       instruction = absl::WrapUnique(new HloInstruction(opcode, proto.shape()));
       for (const int64 operand_id : proto.operand_ids()) {
@@ -482,12 +496,6 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
             << "No instruction with id " << operand_id;
         instruction->AppendOperand(instruction_map.at(operand_id));
       }
-      for (const int64 predecessor_id : proto.control_predecessor_ids()) {
-        TF_RET_CHECK(ContainsKey(instruction_map, predecessor_id))
-            << "No instruction with id " << predecessor_id;
-        TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id)
-                               ->AddControlDependencyTo(instruction.get()));
-      }
       if (instruction->opcode() != HloOpcode::kFusion) {
         for (const int64 computation_id : proto.called_computation_ids()) {
           TF_RET_CHECK(ContainsKey(computation_map, computation_id))
@@ -503,6 +511,13 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     }
   }
 
+  for (const int64 predecessor_id : proto.control_predecessor_ids()) {
+    TF_RET_CHECK(ContainsKey(instruction_map, predecessor_id))
+        << "No instruction with id " << predecessor_id;
+    TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id)
+                           ->AddControlDependencyTo(instruction.get()));
+  }
+
   TF_RET_CHECK(!proto.name().empty());
   instruction->SetAndSanitizeName(proto.name());
   instruction->metadata_ = proto.metadata();
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 1bc168c8b7..68d0979f5c 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h"
 #include "tensorflow/compiler/xla/window_util.h"
 
 namespace xla {
@@ -213,6 +214,7 @@ HloSendRecvInstruction::HloSendRecvInstruction(HloOpcode opcode,
 HloInstructionProto HloSendRecvInstruction::ToProto() const {
   HloInstructionProto proto = HloInstruction::ToProto();
   proto.set_channel_id(channel_id_);
+  proto.set_is_host_transfer(is_host_transfer_);
   return proto;
 }
 
@@ -2310,4 +2312,23 @@ std::unique_ptr<HloInstruction> HloDomainInstruction::CloneWithNewOperandsImpl(
       shape, new_operands[0], operand_side_metadata_->Clone(),
       user_side_metadata_->Clone());
 }
+
+HloInstructionProto HloDomainInstruction::ToProto() const {
+  HloInstructionProto proto = HloInstruction::ToProto();
+  auto operand_side_sharding =
+      dynamic_cast<const ShardingMetadata*>(operand_side_metadata_.get());
+  if (operand_side_sharding) {
+    *proto.mutable_domain_entry_sharding() =
+        operand_side_sharding->sharding()->ToProto();
+  }
+
+  auto user_side_sharding =
+      dynamic_cast<const ShardingMetadata*>(user_side_metadata_.get());
+  if (user_side_sharding) {
+    *proto.mutable_domain_exit_sharding() =
+        user_side_sharding->sharding()->ToProto();
+  }
+
+  return proto;
+}
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 9c22f5db7e..c929867bb9 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -1341,6 +1341,9 @@ class HloDomainInstruction : public HloInstruction {
       std::unique_ptr<DomainMetadata> operand_side_metadata,
       std::unique_ptr<DomainMetadata> user_side_metadata);
 
+  // Returns a serialized representation of this instruction.
+  HloInstructionProto ToProto() const override;
+
   // Retrieves the operand side metadata of a kDomain instruction.
   const DomainMetadata& operand_side_metadata() const {
     return *operand_side_metadata_;
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 96db96bdb9..dd4ee780f0 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1163,49 +1163,80 @@ ENTRY Sort {
   // clang-format on
 }
 
-class HloParserTest : public ::testing::Test,
-                      public ::testing::WithParamInterface<TestData> {
+// The test class for those tests defined above which round-trip through the
+// parser and ToString is templatized on two bool parameters:
+//
+//  short_form : used for the "short" test cases which use the ShortParsable
+//    output form.
+//  proto_round_trip : whether the module should also be round-tripped through
+//    HloProto form. This provides much better coverage for the proto
+//    serialization/deserialization.
+//
+// The proto_round_trip=true case also technically covers the Parser->ToString
+// roundtrip as well, but separating out the Parser->ToString roundtrip as its
+// own test provides better isolation and could conceivably catch weirdo bugs
+// which are hidden by interaction between the textual and proto roundtripping.
+template <bool short_form, bool proto_round_trip>
+class HloParameterizedParserTest
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<TestData> {
  protected:
-  static void ExpectHasSubstr(string_view s, string_view expected) {
-    EXPECT_TRUE(absl::StrContains(s, expected))
-        << "'" << s << "' does not contain '" << expected << "'";
-  }
-
   // Expects "ToString(ParseHloString(string)) == string", that is, parses the
   // string, asserts that it succeeded, stringifies the parsed module, and
   // checks that the it equals the original string.
   void ExpectEqual() {
     const string& original = GetParam().module_string;
-    auto result = ParseHloString(original);
-    TF_ASSERT_OK(result.status());
-    EXPECT_EQ(original, result.ValueOrDie()->ToString(
-                            HloPrintOptions().set_print_large_constants(true)));
+    TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                            ParseHloString(original));
+    if (proto_round_trip) {
+      TF_ASSERT_OK_AND_ASSIGN(module, HloModule::CreateFromProto(
+                                          module->ToProto(), module->config()));
+    }
+    if (short_form) {
+      EXPECT_EQ(original, module->ToString(HloPrintOptions::ShortParsable()));
+    } else {
+      EXPECT_EQ(
+          original,
+          module->ToString(HloPrintOptions().set_print_large_constants(true)));
+    }
   }
 };
 
-class HloParserShortTest : public HloParserTest {
- protected:
-  void ExpectEqualShort() {
-    const string& original = GetParam().module_string;
-    auto result = ParseHloString(original);
-    TF_ASSERT_OK(result.status());
-    EXPECT_EQ(original,
-              result.ValueOrDie()->ToString(HloPrintOptions::ShortParsable()));
-  }
-};
+// These using shenanigans are required because the TEST_P macro doesn't like
+// template instantiations which contain commas.
+using HloParserTestLong = HloParameterizedParserTest<false, false>;
+using HloParserTestLongProto = HloParameterizedParserTest<false, true>;
+using HloParserTestShort = HloParameterizedParserTest<true, false>;
+using HloParserTestShortProto = HloParameterizedParserTest<true, true>;
 
-TEST_P(HloParserTest, Run) { ExpectEqual(); }
+TEST_P(HloParserTestLong, Run) { ExpectEqual(); }
+TEST_P(HloParserTestLongProto, Run) { ExpectEqual(); }
+TEST_P(HloParserTestShort, Run) { ExpectEqual(); }
+TEST_P(HloParserTestShortProto, Run) { ExpectEqual(); }
 
-TEST_P(HloParserShortTest, Run) { ExpectEqualShort(); }
-
-INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest,
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTestLong,
                         ::testing::ValuesIn(CreateTestCases()),
                         TestDataToString);
-
-INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserShortTest,
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation,
+                        HloParserTestLongProto,
+                        ::testing::ValuesIn(CreateTestCases()),
+                        TestDataToString);
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTestShort,
+                        ::testing::ValuesIn(CreateShortTestCases()),
+                        TestDataToString);
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation,
+                        HloParserTestShortProto,
                         ::testing::ValuesIn(CreateShortTestCases()),
                         TestDataToString);
 
+class HloParserTest : public ::testing::Test {
+ protected:
+  static void ExpectHasSubstr(string_view s, string_view expected) {
+    EXPECT_TRUE(absl::StrContains(s, expected))
+        << "'" << s << "' does not contain '" << expected << "'";
+  }
+};
+
 TEST_F(HloParserTest, Empty) {
   const string original = "";
   auto result = ParseHloString(original);
-- 
GitLab


From 8d12c635cc48e896da0bcac1cd568bd6381ca64e Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Tue, 2 Oct 2018 13:18:27 -0700
Subject: [PATCH 1016/1357] Support shape_invariants in while_v2. Note that
 this arg is temporary and may be replaced by automatic shape inference in TF
 2.0 (or before). Add a output_shapes attr to While op to allow output shapes
 to be different from the incoming loop_vars.

PiperOrigin-RevId: 215446737
---
 .../function_functional_while.pbtxt           |  7 +++
 tensorflow/core/ops/functional_ops.cc         | 23 +++++++-
 .../kernel_tests/control_flow_ops_py_test.py  | 11 ++--
 tensorflow/python/ops/control_flow_ops.py     |  3 +-
 tensorflow/python/ops/while_v2.py             | 59 ++++++++++++++++---
 5 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt b/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt
index c94ee2f227..0ec95dd684 100644
--- a/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt
+++ b/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt
@@ -88,6 +88,13 @@ library {
           }
         }
       }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+          }
+        }
+      }
     }
     ret {
       key: "while"
diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index bda4a75c5d..fed3fa22ed 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@@ -150,10 +150,29 @@ REGISTER_OP("While")
     .Attr("T: list(type) >= 0")
     .Attr("cond: func")
     .Attr("body: func")
+    .Attr("output_shapes: list(shape) = []")
     .SetIsStateful()
     .SetShapeFn([](shape_inference::InferenceContext* c) {
-      for (int i = 0; i < c->num_outputs(); ++i) {
-        c->set_output(i, c->input(i));
+      std::vector<PartialTensorShape> output_shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      // If `output_shapes` attr is set use that as the shapes of the outputs
+      // else use the input shapes.
+      if (!output_shapes.empty()) {
+        if (output_shapes.size() != c->num_outputs()) {
+          return errors::InvalidArgument(
+              "`output_shapes` must be the same length as num outputs (",
+              output_shapes.size(), " vs. ", c->num_outputs());
+        }
+        for (size_t i = 0; i < output_shapes.size(); ++i) {
+          shape_inference::ShapeHandle output_shape_handle;
+          TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+              output_shapes[i], &output_shape_handle));
+          c->set_output(static_cast<int>(i), output_shape_handle);
+        }
+      } else {
+        for (int i = 0; i < c->num_outputs(); ++i) {
+          c->set_output(i, c->input(i));
+        }
       }
       return Status::OK();
     });
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index ae61be614e..655fece5ff 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1040,7 +1040,6 @@ class ControlFlowTest(test.TestCase):
       result = r[3].eval()
     self.assertAllEqual(42, result)
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhile_5(self):
     with self.cached_session():
 
@@ -1116,7 +1115,6 @@ class ControlFlowTest(test.TestCase):
     self._testWhile_Gpu_1(use_gpu=False)
     self._testWhile_Gpu_1(use_gpu=True)
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileShape(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1152,7 +1150,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20)
       self.assertEqual([10000], r.eval())
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileShapeInference(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1366,6 +1363,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(lambda x: x < 10, body, [x0])
       self.assertEqual(10, sess.run(r, {b: True}))
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCondWithControl(self):
     # Ensure that no control edges by an outer control dependency context are
     # added to nodes inside cond/while contexts.
@@ -1477,6 +1475,7 @@ class ControlFlowTest(test.TestCase):
     self._testCondWhile_3(use_gpu=False)
     self._testCondWhile_3(use_gpu=True)
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_1(self):
 
     with self.cached_session():
@@ -1493,6 +1492,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [i])
       self.assertAllEqual(10, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_2(self):
 
     with self.cached_session():
@@ -1502,6 +1502,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n])
       self.assertAllEqual(10, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_3(self):
 
     with self.cached_session():
@@ -1696,7 +1697,7 @@ class ControlFlowTest(test.TestCase):
       for i in xrange(10):
         self.assertEqual([i], q.dequeue().eval())
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
+  @test_util.disable_control_flow_v2("b/117119329 (stack)")
   def testWhileStack_1(self):
     with self.cached_session():
       s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo")
@@ -1781,7 +1782,6 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(1024.0, r.eval())
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileGrad_Shape(self):
     with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=[None])
@@ -2291,7 +2291,6 @@ class ControlFlowTest(test.TestCase):
       r = sess.run(r, feed_dict={v: 2.0})
       self.assertAllClose(1024.0, r)
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileGrad_Concat(self):
     with self.cached_session() as sess:
       x = variable_scope.get_variable("x", initializer=[[1., 2.]])
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 8ad71fe00c..f779c3d273 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -3225,7 +3225,8 @@ def while_loop(cond,
       raise ValueError("The while_v2 module is not set. Did you forget to "
                        "import tensorflow.python.ops."
                        "while_v2?")
-    return _while_v2.while_loop(cond, body, loop_vars, name)
+    return _while_v2.while_loop(
+        cond, body, loop_vars, shape_invariants=shape_invariants, name=name)
 
   with ops.name_scope(name, "while", loop_vars):
     if not loop_vars:
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 6791e1cd61..8e88a84d60 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -32,6 +32,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import function_def_to_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl as cond_v2
 from tensorflow.python.ops import control_flow_ops
@@ -52,8 +53,17 @@ control_flow_ops._while_v2 = sys.modules[__name__]
 # handled in the CapturingGraph itself.
 
 
-def while_loop(cond, body, loop_vars, name=None):
+def while_loop(cond, body, loop_vars, shape_invariants=None, name=None):
   """Like tf.while_loop, except emits a single While op."""
+  flattened_loop_vars = nest.flatten(loop_vars)
+  if shape_invariants is not None:
+    nest.assert_same_structure(loop_vars, shape_invariants)
+    flattened_shapes = nest.flatten(shape_invariants)
+  else:
+    flattened_shapes = [t.shape for t in flattened_loop_vars]
+
+  del shape_invariants
+
   if not name:
     name = "while"
 
@@ -62,25 +72,33 @@ def while_loop(cond, body, loop_vars, name=None):
       cond_name = _get_unique_name(("%scond" % scope).replace("/", "_"))
       body_name = _get_unique_name(("%sbody" % scope).replace("/", "_"))
 
-    flattened_loop_vars = nest.flatten(loop_vars)
     num_outputs = len(flattened_loop_vars)
 
     # Add loop counter needed for computing gradients.
     flattened_loop_vars = [constant_op.constant(0., name="loop_counter")
                           ] + flattened_loop_vars
 
+    flattened_shapes = [tensor_shape.scalar()] + flattened_shapes
+
     # Build a `cond` wrapper that can handle the extra counter loop_var.
     def wrapped_cond(unused_loop_counter, *loop_vars):
       return cond(*loop_vars)
 
-    cond_graph = function.func_graph_from_py_func(cond_name, wrapped_cond,
-                                                  flattened_loop_vars, {})
+    signature = [
+        tensor_spec.TensorSpec(shape, t.dtype)
+        for shape, t in zip(flattened_shapes, flattened_loop_vars)
+    ]
+    cond_graph = function.func_graph_from_py_func(
+        cond_name, wrapped_cond, flattened_loop_vars, {}, signature=signature)
 
     # Add external_captures of cond to the list of loop vars.
     # Note that external tensors will be treated as loop invariants, i.e.,
     # the value of that tensor in each iteration is the same as it was at the
     # beginning of the loop execution.
     flattened_loop_vars = flattened_loop_vars + cond_graph.external_captures
+    flattened_shapes = flattened_shapes + [
+        t.shape for t in cond_graph.external_captures
+    ]
 
     def wrapped_body(loop_counter, *args):
       """Loop body augmented with counter update.
@@ -105,8 +123,12 @@ def while_loop(cond, body, loop_vars, name=None):
       # is_constant=True for inputs that are directly passed to outputs.
       return [loop_counter + 1] + list(outputs) + list(args[num_outputs:])
 
-    body_graph = function.func_graph_from_py_func(body_name, wrapped_body,
-                                                  flattened_loop_vars, {})
+    signature = [
+        tensor_spec.TensorSpec(shape, t.dtype)
+        for shape, t in zip(flattened_shapes, flattened_loop_vars)
+    ]
+    body_graph = function.func_graph_from_py_func(
+        body_name, wrapped_body, flattened_loop_vars, {}, signature=signature)
     # Add external captures of body to the list of loop vars.
     # Note that external tensors will be treated as loop invariants, i.e.,
     # the value of that tensor in each iteration is the same as it was at the
@@ -149,10 +171,17 @@ def while_loop(cond, body, loop_vars, name=None):
         # Add this modified tensor list to the list of outputs.
         body_graph.outputs.append(appended_tensor_list)
 
+    # Make sure that the shapes of the loop outputs are compatible with the
+    # shape invariants, or the shapes of the loop vars if the invariants are not
+    # specified.
+    _check_shapes_compat(body_graph.outputs[1:1 + num_outputs],
+                         flattened_shapes[1:1 + num_outputs],
+                         flattened_loop_vars[1:1 + num_outputs])
     outputs = gen_functional_ops._while(
         flattened_loop_vars,
         cond_v2._create_new_tf_function(cond_graph),
         cond_v2._create_new_tf_function(body_graph),
+        output_shapes=[t.shape for t in body_graph.outputs],
         name=scope)
 
     _copy_handle_data(body_graph.outputs, outputs)
@@ -216,6 +245,7 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
       loop_vars,
       cond_v2._create_new_tf_function(cond_grad_graph),
       cond_v2._create_new_tf_function(body_grad_graph),
+      output_shapes=[t.shape for t in body_grad_graph.outputs],
       name=_get_unique_name("%s_grad" % op.name))
 
   _copy_handle_data(body_grad_graph.outputs, outputs)
@@ -236,8 +266,10 @@ def _get_body_graph(while_op):
   Returns:
     `FuncGraph` for the while body.
   """
-  extra_inputs = list(while_op.inputs)
-  input_shapes = [t.shape for t in extra_inputs]
+  # TODO(srbs): Handle TensorShapeProto in function_def_to_graph.input_shapes.
+  input_shapes = [
+      tensor_shape.TensorShape(s) for s in while_op.get_attr("output_shapes")
+  ]
   func_name = while_op.get_attr("body").name
   fdef = while_op.graph._get_function(func_name).definition
   func_graph = function_def_to_graph.function_def_to_graph(fdef, input_shapes)
@@ -535,6 +567,17 @@ class _WhileBodyGradFuncGraph(function.FuncGraph):
     return captured_tensor
 
 
+def _check_shapes_compat(output_tensors, shape_invariants, input_tensors):
+  for (t, shape, input_t) in zip(output_tensors, shape_invariants,
+                                 input_tensors):
+    if not control_flow_ops._ShapeLessThanOrEqual(t.shape, shape):
+      raise ValueError(
+          "Input tensor '%s' enters the loop with shape %s, but has "
+          "shape %s after one iteration. To allow the shape to vary across "
+          "iterations, use the `shape_invariants` argument of tf.while_loop to "
+          "specify a less-specific shape." % (input_t.name, shape, t.shape))
+
+
 def _copy_handle_data(src_tensors, tgt_tensors):
   for src_t, tgt_t in zip(src_tensors, tgt_tensors):
     function._copy_handle_data(src_t, tgt_t)
-- 
GitLab


From a2599d1f89e3d6fe0a3f0436b5053fcbf4ae0265 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 13:28:51 -0700
Subject: [PATCH 1017/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 215448397
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 33 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  8 +++++
 2 files changed, 41 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 43c14d83b5..e46cbc863d 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -76797,6 +76797,39 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "While"
+  input_arg {
+    name: "input"
+    type_list_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "cond"
+    type: "func"
+  }
+  attr {
+    name: "body"
+    type: "func"
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "WholeFileReader"
   output_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index abee803889..0e9f939ab4 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -36935,6 +36935,14 @@ op {
     name: "body"
     type: "func"
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
   is_stateful: true
 }
 op {
-- 
GitLab


From a12b8c4afdca3ac2945d62b3b83ca2599ab360f9 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Sun, 16 Sep 2018 18:39:50 -0400
Subject: [PATCH 1018/1357] [xla] Improve validation of Broadcast shape

If one misreads the semantics of this instruction, it's easy to cause
an out of bounds access into the dimensions here. Add an extra check
to return a proper error to the user rather than crashing in that
case.

Ref #22130
---
 tensorflow/compiler/xla/service/hlo_verifier.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 50f39cbcb5..0f6ecd42f6 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -313,8 +313,9 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) {
        operand_dimension < ShapeUtil::Rank(operand_shape);
        ++operand_dimension) {
     int64 output_dimension = broadcast->dimensions()[operand_dimension];
-    TF_RET_CHECK(broadcast->shape().dimensions(output_dimension) ==
-                 operand_shape.dimensions(operand_dimension))
+    TF_RET_CHECK((output_dimension < ShapeUtil::Rank(broadcast->shape())) &&
+                 (broadcast->shape().dimensions(output_dimension) ==
+                 operand_shape.dimensions(operand_dimension)))
         << broadcast->ToString() << " operand shape " << operand_shape;
   }
   return Status::OK();
-- 
GitLab


From e45c90f0e4d17ac22048a73f1e81bd9c7a7a5145 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 14:03:40 -0700
Subject: [PATCH 1019/1357] Upgrade cloud tpu profiler to 1.12.0.

PiperOrigin-RevId: 215454323
---
 tensorflow/contrib/tpu/profiler/pip_package/setup.py | 2 +-
 tensorflow/contrib/tpu/profiler/version.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index 2415c46718..f27ae38e04 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 from setuptools import setup
 
-_VERSION = '1.11.0'
+_VERSION = '1.12.0'
 
 CONSOLE_SCRIPTS = [
     'capture_tpu_profile=cloud_tpu_profiler.main:run_main',
diff --git a/tensorflow/contrib/tpu/profiler/version.h b/tensorflow/contrib/tpu/profiler/version.h
index 90d34b5ef1..4b6d1b2b07 100644
--- a/tensorflow/contrib/tpu/profiler/version.h
+++ b/tensorflow/contrib/tpu/profiler/version.h
@@ -16,6 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
 #define TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
 
-#define TPU_PROFILER_VERSION "1.11.0"
+#define TPU_PROFILER_VERSION "1.12.0"
 
 #endif  // TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
-- 
GitLab


From c921e45bccac86ce0becc71cedc3da2c702d5c38 Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Tue, 2 Oct 2018 14:30:22 -0700
Subject: [PATCH 1020/1357] Add support for multiple input/output numpy arrays
 when using Keras APIs.

PiperOrigin-RevId: 215459075
---
 tensorflow/contrib/distribute/python/BUILD    |   1 +
 .../contrib/distribute/python/keras_test.py   |  88 ++++++++++--
 .../engine/distributed_training_utils.py      | 134 +++++++++++++++---
 tensorflow/python/keras/engine/training.py    |  48 ++++---
 .../keras/engine/training_distributed.py      |  30 ++--
 tensorflow/python/keras/models.py             |   5 +
 6 files changed, 237 insertions(+), 69 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index cfb9d42a6f..defa82f98a 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -728,6 +728,7 @@ cuda_py_test(
     additional_deps = [
         ":keras_test_lib",
     ],
+    shard_count = 16,
     tags = [
         "multi_and_single_gpu",
         "no_pip",
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 3aab2c521f..993cb2bac3 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -189,6 +189,14 @@ def get_dataset(distribution):
   return dataset
 
 
+def get_predict_dataset(distribution):
+  inputs = np.zeros((10, 3), dtype=np.float32)
+  dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
+  dataset = dataset.repeat(100)
+  dataset = batch_wrapper(dataset, 10, distribution)
+  return dataset
+
+
 strategies = [combinations.default_strategy,
               combinations.one_device_strategy,
               combinations.mirrored_strategy_with_gpu_and_cpu,
@@ -387,16 +395,26 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
           distributed_training_utils.validate_distributed_dataset_inputs(
               strategy, x, y)
 
-  def test_calling_model_with_numpy_arrays(self):
+  # TODO(anjalisridhar): Move this test along with other numpy related tests to
+  # its own class.
+  @combinations.generate(strategy_combinations())
+  def test_creating_var_with_numpy_arrays(self, distribution):
+    with self.cached_session():
+      x = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      var_x = distributed_training_utils.get_var_for_numpy(distribution, x)
+      val = self.evaluate(var_x.value())
+      # Verify that the numpy value is copied to the variable.
+      self.assertAllEqual(x, val)
+
+  @combinations.generate(strategy_combinations())
+  def test_calling_model_with_numpy_arrays(self, distribution):
     with self.cached_session():
       model = get_model()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
-      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
-                                                     '/device:GPU:0'])
-      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
+      metrics = ['mae']
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
 
       inputs = np.zeros((64, 3), dtype=np.float32)
       targets = np.zeros((64, 4), dtype=np.float32)
@@ -419,6 +437,48 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       # with batch_size
       model.predict(inputs, batch_size=8)
 
+  @combinations.generate(strategy_combinations())
+  def test_calling_model_with_nested_numpy_arrays(self, distribution):
+    with self.cached_session():
+      a = keras.layers.Input(shape=(3,), name='input_a')
+      b = keras.layers.Input(shape=(3,), name='input_b')
+
+      dense = keras.layers.Dense(4, name='dense')
+      c = dense(a)
+      d = dense(b)
+      e = keras.layers.Dropout(0.5, name='dropout')(c)
+
+      model = keras.models.Model([a, b], [d, e])
+
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      input_b_np = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      inputs = [input_a_np, input_b_np]
+
+      output_d_np = np.asarray(np.random.random((64, 4)), dtype=np.float32)
+      output_e_np = np.asarray(np.random.random((64, 4)), dtype=np.float32)
+      targets = [output_d_np, output_e_np]
+
+      # Call fit with validation data
+      model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0)
+
+      # TODO(anjalisridhar): We need tests for when the batch size and steps are
+      # smaller and results in a 0 batch_size and steps value.
+      model.evaluate(inputs, targets)
+      # with steps
+      model.evaluate(inputs, targets, steps=2)
+      # with batch_size
+      model.evaluate(inputs, targets, batch_size=8)
+
+      model.predict(inputs)
+      # with steps
+      model.predict(inputs, steps=2)
+      # with batch_size
+      model.predict(inputs, batch_size=8)
+
   @combinations.generate(strategy_combinations())
   def test_calling_model_on_same_dataset(self, distribution):
     with self.cached_session():
@@ -436,7 +496,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
                 validation_data=dataset, validation_steps=2)
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                 validation_data=dataset, validation_steps=2)
-      model.predict(dataset, steps=2)
+      model.predict(get_predict_dataset(distribution), steps=2)
 
   # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work
   # as clone_model's input_tensors argument only seems to accept list and not
@@ -496,10 +556,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
       model.evaluate(dataset, steps=2, verbose=1)
-      model.predict(dataset, steps=2)
-      # Test with validation data
-      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                validation_data=dataset, validation_steps=2)
+      model.predict(get_predict_dataset(distribution), steps=2)
 
   @combinations.generate(strategy_and_optimizer_combinations())
   def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer):
@@ -513,7 +570,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
       model.evaluate(dataset, steps=2, verbose=1)
-      model.predict(dataset, steps=2)
+      model.predict(get_predict_dataset(distribution), steps=2)
 
   def test_unsupported_features(self):
     with self.cached_session():
@@ -726,8 +783,12 @@ class NormalizationLayerWithDistributionStrategyTest(
       dataset = dataset.repeat(100)
       dataset = batch_wrapper(dataset, 32, distribution)
 
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(x)
+      predict_dataset = predict_dataset.repeat(100)
+      predict_dataset = batch_wrapper(predict_dataset, 32, distribution)
+
       model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10)
-      out = model.predict(dataset, steps=2)
+      out = model.predict(predict_dataset, steps=2)
       out -= keras.backend.eval(norm.beta)
       out /= keras.backend.eval(norm.gamma)
       np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
@@ -811,8 +872,7 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase,
         predict_batch_size = 4
         if with_distribution:
           predict_batch_size //= with_distribution.num_towers
-        predict_dataset = dataset_ops.Dataset.from_tensor_slices((x_predict,
-                                                                  x_predict))
+        predict_dataset = dataset_ops.Dataset.from_tensor_slices(x_predict)
         predict_dataset = batch_wrapper(predict_dataset,
                                         predict_batch_size, distribution)
         predict_result = model.predict(predict_dataset, steps=1)
diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py
index 39341a931b..050602868a 100644
--- a/tensorflow/python/keras/engine/distributed_training_utils.py
+++ b/tensorflow/python/keras/engine/distributed_training_utils.py
@@ -17,12 +17,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.client import session as session_module
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import callbacks
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import distribute as distribute_lib
 from tensorflow.python.util import nest
@@ -304,23 +310,19 @@ def validate_inputs(x, y, distribution_strategy):
       compiled.
 
   Raises:
-    ValueError: if input is not a Dataset or a numpy array.
+    ValueError: if input is not a Dataset or a numpy array(when we use
+      MirroredStrategy).
   """
-  if isinstance(x, list) or isinstance(y, list):
-    raise ValueError('DistributionStrategy does not support lists of numpy'
-                     'arrays. You must pass a Dataset object or a numpy array '
-                     'as input.')
-
   if isinstance(x, dict) or isinstance(y, dict):
-    raise ValueError('DistributionStrategy does not support inputs of type '
-                     'dict. You must pass a Dataset object or a numpy array as '
-                     'input.')
+    raise ValueError('`DistributionStrategy` does not support inputs of type '
+                     'dict. You must pass a `tf.data.Dataset` object or a '
+                     'numpy array as input.')
 
-  if isinstance(x, iterator_ops.Iterator) or \
-      isinstance(y, iterator_ops.Iterator):
-    raise ValueError('DistributionStrategy does not support inputs of type '
-                     'Iterator. You must pass a Dataset object or a numpy '
-                     'array as input.')
+  if (isinstance(x, iterator_ops.Iterator) or
+      isinstance(y, iterator_ops.Iterator)):
+    raise ValueError('`DistributionStrategy` does not support inputs of type '
+                     'Iterator. You must pass a `tf.data.Dataset` object or a '
+                     'numpy array as input.')
 
   if distribution_strategy.__class__.__name__ == 'TPUStrategy':
     for i in [x, y]:
@@ -334,14 +336,14 @@ def validate_inputs(x, y, distribution_strategy):
               'Found unknown shape {} in input {}.'.format(s, i))
 
 
-def get_input_batch_params(first_x_value, batch_size, current_strategy):
+def get_input_batch_params(first_x_value, batch_size, distribution_strategy):
   """Calculate the number of batches and steps/steps_per_epoch.
 
   Args:
     first_x_value: This is the first input numpy array that is passed in as the
       model input.
     batch_size: The specified batch_size or the default batch_size of 32.
-    current_strategy: The current DistributionStrategy used to compile the
+    distribution_strategy: The current DistributionStrategy used to compile the
       model.
 
   Returns:
@@ -359,14 +361,14 @@ def get_input_batch_params(first_x_value, batch_size, current_strategy):
   # TODO(anjalisridhar): TPU currently supports using the num_towers property.
   # We might want to look into implementing worker_devices. In multi worker
   # strategy, perhaps num_towers works better?
-  steps = num_batches // current_strategy.num_towers
+  steps = num_batches // distribution_strategy.num_towers
   if not steps:
     # TODO(anjalisridhar): Number of towers in the error message may not convey
     # what we want to the user. Is there another terminology that we can use
     # that is consistent across different strategies.
     raise ValueError('The number of batches %d is smaller than the number '
                      'of towers %d used for DistributionStrategy. ' %
-                     num_batches, current_strategy.num_towers)
+                     (num_batches, distribution_strategy.num_towers))
   return steps
 
 
@@ -376,3 +378,99 @@ def get_batch_dimension(iterator):
   # all.
   dims = shapes[0].dims
   return dims[0] if dims else None
+
+
+def get_cpu_device(distribution_strategy):
+  """Returns the CPU device of the TPU host or the default CPU device string.
+
+  Args:
+    distribution_strategy: The DistributionStrategy used to compile the model.
+
+  Returns:
+    A device string which is the TPU host's CPU device in case of
+    TPUDistributionStrategy or the default CPU device string in all other
+    cases.
+
+  Raises:
+    NotImplementedError: We currently don't support copying numpy data to
+    multiple hosts in the case of Cloud TPU pods.
+  """
+  if distribution_strategy.__class__.__name__ == 'TPUStrategy':
+    if distribution_strategy.num_hosts > 1:
+      raise NotImplementedError('TPUDistributionStrategy does not '
+                                'support numpy inputs when running on Cloud'
+                                'TPU pods.')
+    return distribution_strategy.get_host_cpu_device(0)
+  else:
+    # For all strategies except TPUDistributionStrategy
+    # TODO(anjalisridhar): We may need to modify this when we add support for
+    # multi-worker strategy.
+    return '/CPU:0'
+
+
+def get_var_for_numpy(distribution_strategy, x):
+  if isinstance(x, list):
+    var_x = tuple([_get_var_for_numpy(distribution_strategy, single_input)
+                   for single_input in x])
+  else:
+    var_x = _get_var_for_numpy(distribution_strategy, x)
+  return var_x
+
+
+def _get_var_for_numpy(distribution_strategy, input_array):
+  """Creates a variable and assigns the value of the numpy array to it.
+
+  Args:
+    distribution_strategy: The DistributionStrategy used to compile the model.
+    input_array: The input numpy array whose value will be assigned to the
+      variable we create.
+
+  Returns:
+    The variable to which we will copy the value of the input numpy array.
+
+  """
+  with ops.device(get_cpu_device(distribution_strategy)):
+    # Create and initialize a variable on the CPU device. This is the CPU
+    # device of the host in the case of TPUDistributionStrategy.
+    input_var = variables.VariableV1(array_ops.zeros(input_array.shape,
+                                                     input_array.dtype),
+                                     trainable=False, use_resource=True)
+  K.get_session().run(input_var.initializer)
+
+  # Create a placeholder for the numpy array input slices. We copy the value
+  # of the input numpy array to the variable in slices of size 64 MB to avoid
+  # running into memory issues or RPC message limits.
+  start_placeholder = array_ops.placeholder(dtypes.int64, ())
+  end_placeholder = array_ops.placeholder(dtypes.int64, ())
+  slice_placeholder = array_ops.placeholder(input_var.dtype)
+  assign_slice_op = input_var[start_placeholder:end_placeholder].assign(
+      slice_placeholder)
+
+  # If each batch element is > 64 MB, then we copy each batch element
+  # individually. Otherwise, the slices will be < 128 MB. There might be padding
+  # which might mean that the slices are 128 MB even if the size of the
+  # tensor allocated is less than 128 MB.
+  # This formula gives slices with size:
+  # ceil(64 MB / byte size per batch element) bytes.
+  # Using ceil() guarantees we get a number >= 1.
+
+  # Calculate the size of each batch element.
+  byte_size_per_batch_element = np.prod(input_array.shape[1:]) * \
+                                input_var.dtype.size
+
+  # Calculate number of elements we want to copy per slice.
+  batch_size_per_slice = np.ceil((64 << 20) / byte_size_per_batch_element)
+
+  # Copy slices of the above size starting at 0, except the last slice will be
+  # smaller.
+  start = 0
+  limit = input_array.shape[0]
+  while start < limit:
+    end = min(start + batch_size_per_slice, limit)
+    K.get_session().run(assign_slice_op, feed_dict={
+        start_placeholder: start,
+        end_placeholder: end,
+        slice_placeholder: input_array[start:end]})
+    start = end
+
+  return input_var
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 5091cac836..c842b8192e 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -20,11 +20,9 @@ from __future__ import print_function
 
 import weakref
 import numpy as np
-import six
 
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.ops.dataset_ops import Dataset
 from tensorflow.python.eager import context
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -814,19 +812,21 @@ class Model(Network):
     first_x_value = nest.flatten(x)[0]
     if isinstance(first_x_value, np.ndarray):
       x_shape = first_x_value.shape
-      x_dtype = first_x_value.dtype
       if batch_size is None:
         batch_size = x_shape[0] // steps
       if y is not None:
-        first_y_value = nest.flatten(y)[0]
-        x = Dataset.from_generator(lambda x=x, y=y: six.moves.zip(x, y),
-                                   output_types=(x_dtype, first_y_value.dtype),
-                                   output_shapes=(x_shape[1:],
-                                                  first_y_value.shape[1:]))
+        var_x = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, x)
+        var_y = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, y)
+
+        x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y))
         # TODO(anjalisridhar): What should the buffer size be?
         x = x.shuffle(10000)
         x = x.repeat()
-        x = x.batch(batch_size)
+        # We need to use the drop_remainder argument to allow for a static
+        # input shape which is required for TPUs.
+        x = x.batch(batch_size, drop_remainder=True)
         y = None
       else:
         # This case is for the predict call where the dataset only contains
@@ -834,11 +834,13 @@ class Model(Network):
         # TODO(anjalisridhar): Raise an error if we are not able to process
         # all the predict samples. This can happen if the number of batches is
         # not evenly divisible by the number of worker devices.
-        x = Dataset.from_generator(lambda x=x: x,
-                                   output_types=x_dtype,
-                                   output_shapes=x_shape[1:])
+        var_x = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, x)
+        x = dataset_ops.Dataset.from_tensor_slices(var_x)
         x = x.repeat()
-        x = x.batch(batch_size)
+        # We need to use the drop_remainder argument to allow for a static
+        # input shape which is required for TPUs.
+        x = x.batch(batch_size, drop_remainder=True)
 
     # TODO(anjalisridhar): Can we use the iterator and getnext op cache?
     # We require users to pass Datasets since we distribute the dataset across
@@ -978,16 +980,18 @@ class Model(Network):
                            'Make sure that your dataset can generate '
                            'required number of samples.')
 
-      if (not isinstance(next_element, (list, tuple)) or
-          len(next_element) not in [2, 3]):
-        raise ValueError(
-            'Please provide model inputs as a list or tuple of 2  or 3'
-            'elements: (input, target) or (input, target, sample_weights)'
-            'Received %s' % next_element)
-      if len(next_element) == 2:
-        x, y = next_element
+      if isinstance(next_element, (list, tuple)):
+        if len(next_element) not in [2, 3]:
+          raise ValueError(
+              'Please provide model inputs as a list or tuple of 2  or 3'
+              'elements: (input, target) or (input, target, sample_weights)'
+              'Received %s' % next_element)
+        if len(next_element) == 2:
+          x, y = next_element
+        else:
+          x, y, sample_weight = next_element
       else:
-        x, y, sample_weight = next_element
+        x = next_element
     x, y, sample_weights = self._standardize_weights(x, y, sample_weight,
                                                      class_weight, batch_size)
     return x, y, sample_weights
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index a6470458d2..04e8d079c0 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -32,6 +32,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import distribute as distribute_lib
+from tensorflow.python.util import nest
 
 
 # TODO(priyag, sourabhbajaj): Refactor this file to address code duplication.
@@ -296,15 +297,16 @@ def _experimental_fit_loop(
     initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype)
 
   if steps_per_epoch is None:
-    raise ValueError('steps_per_epoch should be specified in the fit call.')
-  steps_per_run_var = K.variable(
+    raise ValueError('`steps_per_epoch` should be specified when calling '
+                     '`fit` on the model.')
+  steps_per_run = K.variable(
       value=min(steps_per_epoch, current_strategy.steps_per_run),
       dtype='int32',
-      name='steps_per_run_var')
+      name='steps_per_run')
 
   with current_strategy.scope():
     ctx = current_strategy.run_steps_on_dataset(
-        step_fn, iterator, iterations=steps_per_run_var,
+        step_fn, iterator, iterations=steps_per_run,
         initial_loop_values=initial_loop_values)
 
   train_op = ctx.run_op
@@ -344,7 +346,7 @@ def _experimental_fit_loop(
       batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count}
       callbacks.on_batch_begin(step_index, batch_logs)
       if prev_step_count is None or step_count != prev_step_count:
-        steps_per_run_var.load(step_count, K.get_session())
+        steps_per_run.load(step_count, K.get_session())
         prev_step_count = step_count
       try:
         _, outputs = K.get_session().run([train_op, output_tensors])
@@ -720,13 +722,9 @@ def _experimental_predict_loop(model, iterator, verbose=0, steps=None):
             model.predict_function.updates_op,
             model.predict_function.session_kwargs)
 
-  def step_fn(ctx, inputs, targets):
+  def step_fn(ctx, *inputs):
     """Clones the model and calls make_predict_function."""
 
-    # TODO(anjalisridhar): Support predict input correctly as it will not
-    # contain targets, only inputs.
-    del targets
-
     # TODO(priyag, sourabhbajaj): The model gets cloned every time
     # fit/test/predict is called. We should look into caching this keyed on
     # input shapes.
@@ -824,9 +822,10 @@ def _clone_and_build_model(model, inputs=None, targets=None):
 
   # TODO(priyag): Is there a cleaner way to do this? The API doc suggests a
   # single tensor should be OK but it throws an error in that case.
-  if (targets is not None and not isinstance(targets, list) and
-      not isinstance(targets, dict)):
+  if targets is not None and not isinstance(targets, (list, dict, tuple)):
     targets = [targets]
+  if isinstance(targets, tuple):
+    targets = nest.flatten(targets)
   cloned_model.compile(
       optimizer,
       model.loss,
@@ -891,11 +890,12 @@ def _get_input_from_iterator(iterator, model):
   """Get elements from the iterator and verify the input shape and type."""
   next_element = iterator.get_next()
 
-  if isinstance(next_element, tuple):
-    x, y = next_element
-  else:
+  if len(nest.flatten(next_element)) == len(model.inputs):
     x = next_element
     y = None
+  else:
+    x, y = next_element
+
   # Validate that all the elements in x and y are of the same type and shape.
   # We can then pass the first element of x and y to `_standardize_weights`
   # below and be confident of the output.
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index b04b4df257..2883c9ad74 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -96,6 +96,8 @@ def _clone_functional_model(model, input_tensors=None):
   else:
     # Make sure that all input tensors come from a Keras layer.
     # If tensor comes from an input layer: cache the input layer.
+    if isinstance(input_tensors, tuple):
+      input_tensors = list(input_tensors)
     input_tensors = generic_utils.to_list(input_tensors)
     input_tensors_ = []
     for i, x in enumerate(input_tensors):
@@ -212,6 +214,9 @@ def _clone_sequential_model(model, input_tensors=None):
       raise ValueError('To clone a `Sequential` model, we expect '
                        ' at most one tensor '
                        'as part of `input_tensors`.')
+
+    if isinstance(input_tensors, tuple):
+      input_tensors = list(input_tensors)
     x = generic_utils.to_list(input_tensors)[0]
     if K.is_keras_tensor(x):
       origin_layer = x._keras_history[0]
-- 
GitLab


From 05812d761031b108b43560c90867b96dc4f030eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 14:35:49 -0700
Subject: [PATCH 1021/1357] Fixes for few issues in
 HloModule::CreateFromProto()

PiperOrigin-RevId: 215460064
---
 tensorflow/compiler/xla/literal.cc            |  2 ++
 .../compiler/xla/service/hlo_computation.cc   | 22 +++++++++++++++++++
 .../compiler/xla/service/hlo_instruction.cc   | 20 +++++++++++++----
 .../compiler/xla/service/hlo_sharding.cc      |  8 +++++--
 tensorflow/compiler/xla/shape_util.cc         |  3 ++-
 5 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index d1dad0d45f..deeb140b8f 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -287,6 +287,8 @@ Status MutableLiteralBase::CopyElementFrom(const LiteralSlice& src_literal,
     return InvalidArgument("LiteralProto has no layout");
   }
 
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(proto.shape()));
+
   Literal literal(proto.shape());
 
   TF_RETURN_IF_ERROR(literal.root_piece_->ForEachMutableSubpieceWithStatus(
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 6ef67ab0a8..c2041c4667 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -535,6 +535,28 @@ HloComputation::CreateFromProto(
               return to_proto_id[a.get()] < to_proto_id[b.get()];
             });
 
+  TF_RETURN_IF_ERROR([&]() -> Status {
+    std::vector<bool> parameters_seen(parameter_count);
+    int parameters_seen_count = 0;
+    for (auto& instruction : instructions) {
+      if (instruction->opcode() == HloOpcode::kParameter) {
+        int64 param_no = instruction->parameter_number();
+        TF_RET_CHECK(param_no >= 0 && param_no < parameter_count)
+            << "Invalid parameter number.  Expected [0, " << parameter_count
+            << "), got " << param_no;
+        TF_RET_CHECK(!parameters_seen[param_no])
+            << "Parameter number " << param_no
+            << " already allocated in this computation";
+        parameters_seen[param_no] = true;
+        parameters_seen_count++;
+      }
+    }
+    TF_RET_CHECK(parameters_seen_count == parameter_count)
+        << "Not all parameters in range [0, " << parameter_count
+        << ") were referenced";
+    return Status::OK();
+  }());
+
   auto computation = absl::WrapUnique(
       new HloComputation(proto.name(), parameter_count, &instructions, root,
                          /*fusion_instruction=*/nullptr));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index de22b2d3a5..5c16d6bb5e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -81,6 +81,20 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
   const auto computations = [&computation_map, &proto](int index) {
     return computation_map.at(proto.called_computation_ids(index));
   };
+
+  TF_RET_CHECK(std::all_of(
+      proto.operand_ids().begin(), proto.operand_ids().end(),
+      [&instruction_map](int64 id) { return instruction_map.contains(id); }))
+      << proto.name() << " instruction contains invalid operand id(s)";
+
+  TF_RET_CHECK(std::all_of(
+      proto.called_computation_ids().begin(),
+      proto.called_computation_ids().end(),
+      [&computation_map](int64 id) { return computation_map.contains(id); }))
+      << proto.name() << " instruction references invalid computation id(s)";
+
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(proto.shape()));
+
   switch (opcode) {
     // Ops migrated to subclasses.
     case HloOpcode::kBatchNormTraining:
@@ -304,6 +318,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     } break;
     case HloOpcode::kOutfeed:
       TF_RET_CHECK(proto.operand_ids_size() == 2);
+      TF_RETURN_IF_ERROR(
+          ShapeUtil::ValidateShapeWithOptionalLayout(proto.outfeed_shape()));
       instruction = CreateOutfeed(proto.outfeed_shape(), operands(0),
                                   operands(1), proto.outfeed_config());
       break;
@@ -492,14 +508,10 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     default: {
       instruction = absl::WrapUnique(new HloInstruction(opcode, proto.shape()));
       for (const int64 operand_id : proto.operand_ids()) {
-        TF_RET_CHECK(ContainsKey(instruction_map, operand_id))
-            << "No instruction with id " << operand_id;
         instruction->AppendOperand(instruction_map.at(operand_id));
       }
       if (instruction->opcode() != HloOpcode::kFusion) {
         for (const int64 computation_id : proto.called_computation_ids()) {
-          TF_RET_CHECK(ContainsKey(computation_map, computation_id))
-              << "No computation with id " << computation_id;
           instruction->called_computations_.push_back(
               computation_map.at(computation_id));
         }
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index de7e6b53d4..94c7bafd3b 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -369,10 +369,14 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
     return HloSharding(tuple_shardings);
   } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) {
     return Replicate();
-  } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL ||
-             proto.tile_assignment_devices().size() == 1) {
+  } else if (proto.tile_assignment_devices().size() == 1) {
     return HloSharding(proto.tile_assignment_devices(0));
   }
+
+  TF_RET_CHECK(proto.type() != OpSharding::Type::OpSharding_Type_MAXIMAL)
+      << "Maximal sharding is expected to have single device assignment, but "
+      << proto.tile_assignment_devices().size() << " has provided.";
+
   // Some versions of gcc cannot infer the TileAssignment constructor from a
   // braced initializer-list, so create one manually.
   std::vector<int64> devices(proto.tile_assignment_devices().begin(),
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 020c167ee9..476a9fe868 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -831,7 +831,8 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
 
 /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal(
     const Shape& shape) {
-  if (shape.element_type() == PRIMITIVE_TYPE_INVALID) {
+  if (shape.element_type() == PRIMITIVE_TYPE_INVALID ||
+      !PrimitiveType_IsValid(shape.element_type())) {
     return InvalidArgument("shape has invalid element type: %s",
                            shape.ShortDebugString());
   }
-- 
GitLab


From 891e49f57b8229f58315cfeb743e38c235918083 Mon Sep 17 00:00:00 2001
From: Suyog Gupta <suyoggupta@google.com>
Date: Tue, 2 Oct 2018 14:46:13 -0700
Subject: [PATCH 1022/1357] Add missing documentation for use_tpu hparam

PiperOrigin-RevId: 215462000
---
 tensorflow/contrib/model_pruning/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md
index 15d95896d9..b313024e28 100644
--- a/tensorflow/contrib/model_pruning/README.md
+++ b/tensorflow/contrib/model_pruning/README.md
@@ -62,6 +62,7 @@ The pruning library allows for specification of the following hyper parameters:
 | sparsity_function_begin_step | integer | 0 | The global step at this which the gradual sparsity function begins to take effect |
 | sparsity_function_end_step | integer | 100 | The global step used as the end point for the gradual sparsity function |
 | sparsity_function_exponent | float | 3.0 | exponent = 1 is linearly varying sparsity between initial and final. exponent > 1 varies more slowly towards the end than the beginning |
+| use_tpu | bool | False | Training using TPUs? |
 
 The sparsity $$s_t$$ at global step $$t$$ is given by:
 
-- 
GitLab


From 664f3dde67bfa436e5216ae54ee256761c7c6962 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 14:52:16 -0700
Subject: [PATCH 1023/1357] Do not warn about loss of accuracy in trivial cases
 when all array elements are equal to either the min or the max value, so that
 they are trivially exactly quantized. This case does not normally occur for
 true learned weights, which is what this warning is intended for.

PiperOrigin-RevId: 215463096
---
 .../toco/graph_transformations/quantize.cc    | 30 +++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
index 1bc366f555..fb299c31b7 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
@@ -97,15 +97,6 @@ const MinMax& GetOrComputeMinMax(Model* model, const string& array_name) {
   // to allow easily trying out quantization even if the graph
   // lacks some minmax information.
   if (array.buffer != nullptr) {
-    LOG(WARNING)
-        << "Constant array " << array_name
-        << " lacks MinMax information. To make up for that, we will now compute"
-        << " the MinMax from actual array elements. That will result in"
-        << " quantization parameters that probably do not match whichever "
-           "arithmetic"
-        << " was used during training, and thus will probably be a cause of "
-           "poor"
-        << " inference accuracy.";
     CHECK(array.buffer->type == ArrayDataType::kFloat);
     const auto& data = array.GetBuffer<ArrayDataType::kFloat>().data;
     // We always want [min, max] to contain 0.
@@ -120,6 +111,27 @@ const MinMax& GetOrComputeMinMax(Model* model, const string& array_name) {
       // to not be equal.
       max = 1.f;
     }
+    // No need to warn about accuracy if all array values are equal to either
+    // min or max:
+    // in that case, quantization is exact, and such arrays are not learned
+    // weights arrays for which fake-quantization would make sense, rather
+    // they tend to be hardcoded arrays of zeros or ones used in some graphs.
+    bool is_quantization_trivially_exact = true;
+    for (auto val : data) {
+      is_quantization_trivially_exact &= (val == min || val == max);
+    }
+    if (!is_quantization_trivially_exact) {
+      LOG(WARNING)
+          << "Constant array " << array_name
+          << " lacks MinMax information. To make up for that, we will now "
+             "compute"
+          << " the MinMax from actual array elements. That will result in"
+          << " quantization parameters that probably do not match whichever "
+             "arithmetic"
+          << " was used during training, and thus will probably be a cause of "
+             "poor"
+          << " inference accuracy.";
+    }
     auto& minmax = array.GetOrCreateMinMax();
     minmax.min = min;
     minmax.max = max;
-- 
GitLab


From 44f273e853360042ee14def03eba85d1e04a7272 Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Tue, 2 Oct 2018 14:54:08 -0700
Subject: [PATCH 1024/1357] [XLA] A test that disables layout assignment should
 only contain layout consistent HLO instructions.

Fix a dot test that disables layout assignment pass to not generate layout
inconsistent HLO instructions. This includes only adding the dot result to an
addend with the same layout, and disabling algebraic simplification which may
transform a dot to a multiplication with inconsistent layouts.

PiperOrigin-RevId: 215463477
---
 .../compiler/xla/tests/dot_operation_test.cc  | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index 0171f51583..6c0847a875 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -394,6 +394,10 @@ class ParametricDotTestWithoutLayoutAssignment : public ParametricDotTest {
   ParametricDotTestWithoutLayoutAssignment() {
     execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
         "layout-assignment");
+    // Disable algebraic simplification because the pass may replace a dot
+    // instruction with a layout-changing multiplication instruction.
+    execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
+        "algsimp");
   }
 };
 
@@ -404,31 +408,18 @@ std::vector<DotTestParam> CreateNoLayoutAssignmentDotTestParameters() {
     for (bool lhs_row_major : {true, false}) {
       for (bool rhs_row_major : {true, false}) {
         for (bool has_addend : {true, false}) {
+          // The addend needs to be row major to match the result of the dot.
           params.push_back({/*m=*/1, /*k=*/k, /*n=*/n,
                             /*dot_lhs_row_major=*/lhs_row_major,
                             /*dot_rhs_row_major=*/rhs_row_major,
                             /*has_addend=*/has_addend,
                             /*addend_row_major=*/true});
-          if (has_addend) {
-            params.push_back({/*m=*/1, /*k=*/k, /*n=*/n,
-                              /*dot_lhs_row_major=*/lhs_row_major,
-                              /*dot_rhs_row_major=*/rhs_row_major,
-                              /*has_addend=*/has_addend,
-                              /*addend_row_major=*/false});
-          }
           if (n != 1) {
             params.push_back({/*m=*/n, /*k=*/k, /*n=*/1,
                               /*dot_lhs_row_major=*/lhs_row_major,
                               /*dot_rhs_row_major=*/rhs_row_major,
                               /*has_addend=*/has_addend,
                               /*addend_row_major=*/true});
-            if (has_addend) {
-              params.push_back({/*m=*/n, /*k=*/k, /*n=*/1,
-                                /*dot_lhs_row_major=*/lhs_row_major,
-                                /*dot_rhs_row_major=*/rhs_row_major,
-                                /*has_addend=*/has_addend,
-                                /*addend_row_major=*/false});
-            }
           }
         }
       }
-- 
GitLab


From 08e5ad2839ca2c6749544ace354f78d00f5243d9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 15:06:38 -0700
Subject: [PATCH 1025/1357] Fix a bug: the use of sequence-point boolean
 operators here had the unintended effect of causing the second line not to
 run at all depending on the result from the first line.

PiperOrigin-RevId: 215466006
---
 .../read_array_minmax_and_narrow_range_from_fake_quant.cc   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
index 5b41c49bfa..eaa9d3bcda 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
@@ -71,8 +71,10 @@ bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model,
   CHECK(fq_op->minmax);
   CHECK_EQ(1, fq_op->inputs.size());
 
-  return ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]) ||
-         ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]);
+  bool changed = false;
+  changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]);
+  changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]);
+  return changed;
 }
 
 }  // namespace toco
-- 
GitLab


From cfec3aa38db1d2b70045e7b89d82fae87c3fec02 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 15:07:36 -0700
Subject: [PATCH 1026/1357] Update code to use
 convert_to_tensor_or_indexed_slices, since features may be SparseTensors as
 well.

PiperOrigin-RevId: 215466199
---
 .../estimator/python/estimator/dnn_with_layer_annotations.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 5faf0aacfe..6ca7aaf989 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -151,7 +151,7 @@ def make_input_layer_with_layer_annotations(original_input_layer):
     # spec and looking at the keys.
     spec = feature_column_lib.make_parse_example_spec(feature_columns)
     for key in spec.keys():
-      tensor = ops.convert_to_tensor(features[key])
+      tensor = ops.convert_to_tensor_or_indexed_slices(features[key])
       ops.add_to_collection(
           LayerAnnotationsCollectionNames.keys(
               LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
-- 
GitLab


From bb84d5d5e309204110315f7d0ff8ca0dbb022dd2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 15:08:52 -0700
Subject: [PATCH 1027/1357] [XLA] Support parsing the canonical format of HLO
 text.

Also stop truncating operands in the canonical format.

PiperOrigin-RevId: 215466465
---
 .../xla/service/hlo_execution_profile.cc      |   5 +-
 .../compiler/xla/service/hlo_instruction.cc   |   2 +-
 .../compiler/xla/service/hlo_instruction.h    |  14 +-
 tensorflow/compiler/xla/service/hlo_parser.cc | 276 ++++++++++++------
 tensorflow/compiler/xla/service/hlo_parser.h  |   5 +-
 .../compiler/xla/service/hlo_parser_test.cc   | 142 ++++++++-
 6 files changed, 338 insertions(+), 106 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index de3d7a1677..ce4cad4235 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -90,8 +90,9 @@ std::unique_ptr<HloProfilePrinterData> CreateHloProfilePrinterData(
       HloInstructionInfo* instruction_info =
           computation_info->add_instruction_infos();
       instruction_info->set_long_name(hlo->ToString());
-      instruction_info->set_short_name(
-          hlo->ToString(HloPrintOptions().set_compact_operands(true)));
+      instruction_info->set_short_name(hlo->ToString(
+          HloPrintOptions().set_compact_operands(true).set_print_operand_names(
+              false)));
       instruction_info->set_category(hlo->ToCategory());
       instruction_info->set_flop_count(cost_analysis.flop_count(*hlo));
       instruction_info->set_transcendental_count(
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5c16d6bb5e..8bddaa8c96 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2034,7 +2034,7 @@ string HloInstruction::OperandsToStringWithCanonicalNameMap(
         options.is_in_nested_computation()) {
       str.push_back(PrintName(
           canonical_name_map->LookupOrInsert(operand->name()), options));
-    } else if (!options.compact_operands()) {
+    } else if (options.print_operand_names()) {
       str.push_back(PrintName(operand->name(), options));
     }
     StrAppend(out, StrJoin(str, " "));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 1bfdc88abc..9deed20e5d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -80,6 +80,7 @@ class HloPrintOptions {
         print_backend_config_(true),
         compact_operands_(false),
         print_operand_shape_(true),
+        print_operand_names_(true),
         print_program_shape_(true),
         print_percent_(true),
         print_control_dependencies_(true),
@@ -107,6 +108,7 @@ class HloPrintOptions {
         .set_print_metadata(false)
         .set_print_backend_config(false)
         .set_compact_operands(true)
+        .set_print_operand_names(false)
         .set_print_operand_shape(true)
         .set_print_program_shape(false)
         .set_print_percent(false)
@@ -144,6 +146,12 @@ class HloPrintOptions {
     return *this;
   }
 
+  // If true, the operand names will be printed.
+  HloPrintOptions& set_print_operand_names(bool value) {
+    print_operand_names_ = value;
+    return *this;
+  }
+
   // If true, program shape of hlo computations will be printed.
   HloPrintOptions& set_print_program_shape(bool value) {
     print_program_shape_ = value;
@@ -162,8 +170,8 @@ class HloPrintOptions {
     return *this;
   }
 
-  // If true, only a part of operands will be printed out, and their names will
-  // be omitted (note that in this case the text will not be parsable).
+  // If true, only a part of operands will be printed out (note that in this
+  // case the text will not be parsable).
   HloPrintOptions& set_compact_operands(bool value) {
     compact_operands_ = value;
     return *this;
@@ -197,6 +205,7 @@ class HloPrintOptions {
   bool print_backend_config() const { return print_backend_config_; }
   bool compact_operands() const { return compact_operands_; }
   bool print_operand_shape() const { return print_operand_shape_; }
+  bool print_operand_names() const { return print_operand_names_; }
   bool print_program_shape() const { return print_program_shape_; }
   bool print_percent() const { return print_percent_; }
   bool print_control_dependencies() const {
@@ -215,6 +224,7 @@ class HloPrintOptions {
   bool print_backend_config_;
   bool compact_operands_;
   bool print_operand_shape_;
+  bool print_operand_names_;
   bool print_program_shape_;
   bool print_percent_;
   bool print_control_dependencies_;
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 25b70740e3..5a125b4c08 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -80,17 +80,23 @@ class HloParser {
   StatusOr<PaddingConfig> ParsePaddingConfigOnly();
 
   // Stand-alone parsing utility for a single instruction worth of text.
-  Status ParseSingleInstruction(HloComputation::Builder* builder,
-                                string* root_name);
+  Status ParseSingleInstruction(HloModule* module);
 
  private:
-  // Locates an instruction with the given name in the instruction_pool_ or
+  using InstrNameTable =
+      std::unordered_map<string, std::pair<HloInstruction*, LocTy>>;
+
+  // Returns the map from the instruction name to the instruction itself and its
+  // location in the current scope.
+  InstrNameTable& current_name_table() { return scoped_name_tables_.back(); }
+
+  // Locates an instruction with the given name in the current_name_table() or
   // returns nullptr.
   //
-  // If the missing_instruction_hook_ is registered and a "shape" is provided,
-  // the hook will be called and may satisfy the request for the given
-  // instruction. This is useful when we reify parameters as they're resolved;
-  // i.e. for ParseSingleInstruction.
+  // When the name is not found or name is empty, if create_missing_instruction_
+  // hook is registered and a "shape" is provided, the hook will be called to
+  // create an instruction. This is useful when we reify parameters as they're
+  // resolved; i.e. for ParseSingleInstruction.
   std::pair<HloInstruction*, LocTy>* FindInstruction(
       const string& name, const optional<Shape>& shape = nullopt);
 
@@ -98,9 +104,11 @@ class HloParser {
   bool ParseHloModule(HloModule* module);
   bool ParseComputations(HloModule* module);
   bool ParseComputation(HloComputation** entry_computation);
-  bool ParseInstructionList(HloComputation::Builder* builder,
-                            string* root_name);
+  bool ParseInstructionList(HloComputation** computation,
+                            const string& computation_name);
   bool ParseInstruction(HloComputation::Builder* builder, string* root_name);
+  bool ParseInstruciontRhs(HloComputation::Builder* builder, const string& name,
+                           LocTy name_loc);
   bool ParseControlPredecessors(HloInstruction* instruction);
   bool ParseLiteral(Literal* literal, const Shape& shape);
   bool ParseTupleLiteral(Literal* literal, const Shape& shape);
@@ -281,23 +289,47 @@ class HloParser {
   bool AddComputation(const string& name, HloComputation* computation,
                       LocTy name_loc);
 
-  // The map from the instruction/computation name to the
-  // instruction/computation itself and it's location. This does not own the
-  // pointers.
-  std::unordered_map<string, std::pair<HloInstruction*, LocTy>>
-      instruction_pool_;
+  HloLexer lexer_;
+
+  // A stack for the instruction names. The top of the stack stores the
+  // instruction name table for the current scope.
+  //
+  // A instruction's name is unique among its scope (i.e. its parent
+  // computation), but it's not necessarily unique among all computations in the
+  // module. When there are multiple levels of nested computations, the same
+  // name could appear in both an outer computation and an inner computation. So
+  // we need a stack to make sure a name is only visible within its scope,
+  std::vector<InstrNameTable> scoped_name_tables_;
+
+  // A helper class which pushes and pops to an InstrNameTable stack via RAII.
+  class Scope {
+   public:
+    explicit Scope(std::vector<InstrNameTable>* scoped_name_tables)
+        : scoped_name_tables_(scoped_name_tables) {
+      scoped_name_tables_->emplace_back();
+    }
+    ~Scope() { scoped_name_tables_->pop_back(); }
+
+   private:
+    std::vector<InstrNameTable>* scoped_name_tables_;
+  };
+
+  // Map from the computation name to the computation itself and its location.
   std::unordered_map<string, std::pair<HloComputation*, LocTy>>
       computation_pool_;
 
-  HloLexer lexer_;
   std::vector<std::unique_ptr<HloComputation>> computations_;
   std::vector<string> error_;
 
-  // Function that gets invoked when we try to resolve an instruction
-  // instruction_pool_ but fail to do so.
-  std::function<std::pair<HloInstruction*, LocTy>*(string,
-                                                   const optional<Shape>&)>
-      missing_instruction_hook_;
+  // When an operand name cannot be resolved, this function is called to create
+  // a parameter instruction with the given name and shape. It registers the
+  // name, instruction, and a placeholder location in the name table. It returns
+  // the newly-created instruction and the placeholder location. If `name` is
+  // empty, this should create the parameter with a generated name. This is
+  // supposed to be set and used only in ParseSingleInstruction.
+  std::function<std::pair<HloInstruction*, LocTy>*(const string& name,
+                                                   const Shape& shape)>
+      create_missing_instruction_;
 };
 
 bool SplitToInt64s(absl::string_view s, char delim, std::vector<int64>* out) {
@@ -351,11 +383,21 @@ bool HloParser::Run(HloModule* module) {
 
 std::pair<HloInstruction*, HloParser::LocTy>* HloParser::FindInstruction(
     const string& name, const optional<Shape>& shape) {
-  std::pair<HloInstruction*, LocTy>* instr =
-      tensorflow::gtl::FindOrNull(instruction_pool_, name);
+  std::pair<HloInstruction*, LocTy>* instr = nullptr;
+  if (!name.empty()) {
+    instr = tensorflow::gtl::FindOrNull(current_name_table(), name);
+  }
+
   // Potentially call the missing instruction hook.
-  if (instr == nullptr && missing_instruction_hook_ != nullptr) {
-    return missing_instruction_hook_(name, shape);
+  if (instr == nullptr && create_missing_instruction_ != nullptr &&
+      scoped_name_tables_.size() == 1) {
+    if (!shape.has_value()) {
+      Error(lexer_.GetLoc(),
+            "Operand had no shape in HLO text; cannot create parameter for "
+            "single-instruction module.");
+      return nullptr;
+    }
+    return create_missing_instruction_(name, *shape);
   }
   return instr;
 }
@@ -439,7 +481,6 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) {
   if (!ParseName(&name)) {
     return false;
   }
-  auto builder = absl::make_unique<HloComputation::Builder>(name);
 
   LocTy shape_loc = nullptr;
   Shape shape;
@@ -447,40 +488,21 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) {
     return false;
   }
 
-  string root_name;
-  if (!ParseInstructionList(builder.get(), &root_name)) {
+  HloComputation* computation = nullptr;
+  if (!ParseInstructionList(&computation, name)) {
     return false;
   }
 
-  std::pair<HloInstruction*, LocTy>* root_node = FindInstruction(root_name);
-  // This means some instruction was marked as ROOT but we didn't find it in the
-  // pool, which should not happen.
-  if (!root_name.empty() && root_node == nullptr) {
-    LOG(FATAL) << "instruction " << root_name
-               << " was marked as ROOT but the parser has not seen it before";
-  }
-
-  HloInstruction* root = root_node == nullptr ? nullptr : root_node->first;
-  // Now root can be either an existing instruction or a nullptr. If it's a
-  // nullptr, the implementation of Builder will set the last instruction as
-  // root instruction.
-  computations_.emplace_back(builder->Build(root));
-  HloComputation* computation = computations_.back().get();
-
-  if (!root) {
-    root = computation->root_instruction();
-  } else {
-    CHECK_EQ(root, computation->root_instruction());
-  }
-
   // If param_list_to_shape was present, check compatibility.
-  if (shape_loc != nullptr && !ShapeUtil::Compatible(root->shape(), shape)) {
+  if (shape_loc != nullptr &&
+      !ShapeUtil::Compatible(computation->root_instruction()->shape(), shape)) {
     return Error(
         shape_loc,
-        StrCat("Shape of computation ", name, ", ",
-               ShapeUtil::HumanString(shape),
-               ", is not compatible with that of its root instruction ",
-               root_name, ", ", ShapeUtil::HumanString(root->shape())));
+        StrCat(
+            "Shape of computation ", name, ", ", ShapeUtil::HumanString(shape),
+            ", is not compatible with that of its root instruction ",
+            computation->root_instruction()->name(), ", ",
+            ShapeUtil::HumanString(computation->root_instruction()->shape())));
   }
 
   if (is_entry_computation) {
@@ -489,43 +511,62 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) {
     }
     *entry_computation = computation;
   }
-  instruction_pool_.clear();
 
   return AddComputation(name, computation, name_loc);
 }
 
 // instruction_list ::= '{' instruction_list1 '}'
 // instruction_list1 ::= (instruction)+
-bool HloParser::ParseInstructionList(HloComputation::Builder* builder,
-                                     string* root_name) {
+bool HloParser::ParseInstructionList(HloComputation** computation,
+                                     const string& computation_name) {
+  Scope scope(&scoped_name_tables_);
+  HloComputation::Builder builder(computation_name);
   if (!ParseToken(TokKind::kLbrace,
                   "expects '{' at the beginning of instruction list.")) {
     return false;
   }
+  string root_name;
   do {
-    if (!ParseInstruction(builder, root_name)) {
+    if (!ParseInstruction(&builder, &root_name)) {
       return false;
     }
   } while (lexer_.GetKind() != TokKind::kRbrace);
-  return ParseToken(TokKind::kRbrace,
-                    "expects '}' at the end of instruction list.");
+  if (!ParseToken(TokKind::kRbrace,
+                  "expects '}' at the end of instruction list.")) {
+    return false;
+  }
+  HloInstruction* root = nullptr;
+  if (!root_name.empty()) {
+    std::pair<HloInstruction*, LocTy>* root_node =
+        tensorflow::gtl::FindOrNull(current_name_table(), root_name);
+
+    // This means some instruction was marked as ROOT but we didn't find it in
+    // the pool, which should not happen.
+    if (root_node == nullptr) {
+      LOG(FATAL) << "instruction " << root_name
+                 << " was marked as ROOT but the parser has not seen it before";
+    }
+    root = root_node->first;
+  }
+
+  // Now root can be either an existing instruction or a nullptr. If it's a
+  // nullptr, the implementation of Builder will set the last instruction as
+  // the root instruction.
+  computations_.emplace_back(builder.Build(root));
+  *computation = computations_.back().get();
+  return true;
 }
 
 // instruction ::= ('ROOT')? name '=' shape opcode operands (attribute)*
 bool HloParser::ParseInstruction(HloComputation::Builder* builder,
                                  string* root_name) {
   string name;
-  Shape shape;
-  HloOpcode opcode;
-  std::vector<HloInstruction*> operands;
-
   LocTy maybe_root_loc = lexer_.GetLoc();
   bool is_root = EatIfPresent(TokKind::kw_ROOT);
 
   const LocTy name_loc = lexer_.GetLoc();
   if (!ParseName(&name) ||
-      !ParseToken(TokKind::kEqual, "expects '=' in instruction") ||
-      !ParseShape(&shape) || !ParseOpcode(&opcode)) {
+      !ParseToken(TokKind::kEqual, "expects '=' in instruction")) {
     return false;
   }
 
@@ -536,6 +577,19 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     *root_name = name;
   }
 
+  return ParseInstruciontRhs(builder, name, name_loc);
+}
+
+bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
+                                    const string& name, LocTy name_loc) {
+  Shape shape;
+  HloOpcode opcode;
+  std::vector<HloInstruction*> operands;
+
+  if (!ParseShape(&shape) || !ParseOpcode(&opcode)) {
+    return false;
+  }
+
   // Add optional attributes.
   std::unordered_map<string, AttrConfig> attrs;
   optional<OpSharding> sharding;
@@ -2146,7 +2200,20 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
         }
       }
       if (!ParseName(&name)) {
-        return false;
+        // When parsing a single instruction (as opposed to a whole module), an
+        // HLO may have one or more operands with a shape but no name:
+        //
+        //  foo = add(f32[10], f32[10])
+        //
+        // create_missing_instruction_ is always non-null when parsing a single
+        // instruction, and is responsible for creating kParameter instructions
+        // for these operands.
+        if (shape.has_value() && create_missing_instruction_ != nullptr &&
+            scoped_name_tables_.size() == 1) {
+          name = "";
+        } else {
+          return false;
+        }
       }
       std::pair<HloInstruction*, LocTy>* instruction =
           FindInstruction(name, shape);
@@ -2299,9 +2366,17 @@ bool HloParser::ParseAttributeHelper(
         return true;
       }
       case AttrTy::kHloComputation: {
-        HloComputation* result;
-        if (!ParseComputationName(&result)) {
-          return false;
+        HloComputation* result = nullptr;
+        if (lexer_.GetKind() == TokKind::kLbrace) {
+          // This means it is a nested computation.
+          if (!ParseInstructionList(&result, /*computation_name=*/"_")) {
+            return false;
+          }
+        } else {
+          // This means it is a computation name.
+          if (!ParseComputationName(&result)) {
+            return false;
+          }
         }
         static_cast<optional<HloComputation*>*>(attr_out_ptr)->emplace(result);
         return true;
@@ -3134,7 +3209,7 @@ bool HloParser::EatIfPresent(TokKind kind) {
 
 bool HloParser::AddInstruction(const string& name, HloInstruction* instruction,
                                LocTy name_loc) {
-  auto result = instruction_pool_.insert({name, {instruction, name_loc}});
+  auto result = current_name_table().insert({name, {instruction, name_loc}});
   if (!result.second) {
     Error(name_loc, StrCat("instruction already exists: ", name));
     return Error(/*loc=*/result.first->second.second,
@@ -3204,36 +3279,51 @@ StatusOr<PaddingConfig> HloParser::ParsePaddingConfigOnly() {
   return padding_config;
 }
 
-Status HloParser::ParseSingleInstruction(HloComputation::Builder* builder,
-                                         string* root_name) {
-  TF_RET_CHECK(missing_instruction_hook_ == nullptr);
+Status HloParser::ParseSingleInstruction(HloModule* module) {
+  TF_RET_CHECK(create_missing_instruction_ == nullptr);
+  TF_RET_CHECK(scoped_name_tables_.empty());
+  HloComputation::Builder builder(module->name());
 
   // The missing instruction hook we register creates the shaped instruction on
   // the fly as a parameter and returns it.
   int64 parameter_count = 0;
-  missing_instruction_hook_ =
-      [this, builder, &parameter_count](
-          string name,
-          const optional<Shape>& shape) -> std::pair<HloInstruction*, LocTy>* {
-    if (!shape.has_value()) {
-      Error(lexer_.GetLoc(),
-            StrCat("Operand ", name,
-                   " had no shape in HLO text; cannot create parameter for "
-                   "single-instruction module."));
-      return nullptr;
-    }
-    HloInstruction* parameter = builder->AddInstruction(
-        HloInstruction::CreateParameter(parameter_count++, *shape, name));
-    instruction_pool_[name] = {parameter, lexer_.GetLoc()};
-    return tensorflow::gtl::FindOrNull(instruction_pool_, name);
+  create_missing_instruction_ =
+      [this, &builder, &parameter_count](
+          const string& name,
+          const Shape& shape) -> std::pair<HloInstruction*, LocTy>* {
+    string new_name = name.empty() ? StrCat("_", parameter_count) : name;
+    HloInstruction* parameter = builder.AddInstruction(
+        HloInstruction::CreateParameter(parameter_count++, shape, new_name));
+    current_name_table()[new_name] = {parameter, lexer_.GetLoc()};
+    return tensorflow::gtl::FindOrNull(current_name_table(), new_name);
   };
 
   // Prime the lexer.
   lexer_.Lex();
 
   // Parse the instruction with the registered hook.
-  if (!ParseInstruction(builder, root_name)) {
-    return InvalidArgument("Syntax error:\n%s", GetError());
+  Scope scope(&scoped_name_tables_);
+  if (CanBeShape()) {
+    // This means that the instruction's left-hand side is probably omitted,
+    // e.g.
+    //
+    //  f32[10] fusion(...), calls={...}
+    if (!ParseInstruciontRhs(&builder, module->name(), lexer_.GetLoc())) {
+      return InvalidArgument("Syntax error:\n%s", GetError());
+    }
+  } else {
+    // This means that the instruction's left-hand side might exist, e.g.
+    //
+    //  foo = f32[10] fusion(...), calls={...}
+    string root_name;
+    if (!ParseInstruction(&builder, &root_name)) {
+      return InvalidArgument("Syntax error:\n%s", GetError());
+    }
+  }
+
+  module->AddEntryComputation(builder.Build());
+  for (auto& comp : computations_) {
+    module->AddEmbeddedComputation(std::move(comp));
   }
   return Status::OK();
 }
@@ -3271,12 +3361,8 @@ Status ParseHloString(absl::string_view str, HloModule* module) {
 StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
     absl::string_view str, absl::string_view name) {
   HloParser parser(str);
-  auto builder = absl::make_unique<HloComputation::Builder>(string(name));
-  string root_name;
-  TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(builder.get(), &root_name));
-  std::unique_ptr<HloComputation> computation = builder->Build();
   auto module = absl::make_unique<HloModule>(string(name), HloModuleConfig());
-  module->AddEntryComputation(std::move(computation));
+  TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(module.get()));
   return std::move(module);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h
index 3696035514..97d6f0117e 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.h
+++ b/tensorflow/compiler/xla/service/hlo_parser.h
@@ -40,8 +40,9 @@ StatusOr<std::unique_ptr<HloModule>> ParseHloString(
 // point to an empty module (no computations).
 Status ParseHloString(absl::string_view str, HloModule* module);
 
-// Parses the text for a single HLO operation into an HLO module with a function
-// that runs that operation (with the same parameters) as its entry computation.
+// Parses the text for a single HLO instruction into an HLO module with an
+// entry computation that runs that instruction (with the same parameters) as
+// its root instruction.
 StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
     absl::string_view str, absl::string_view name = "single_op");
 
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index dd4ee780f0..d10acf3814 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1763,6 +1763,25 @@ ENTRY entry {
       "was parsing 8:39: error: instruction does not exist: aparam");
 }
 
+TEST_F(HloParserTest, SameNameDiffComputations) {
+  const string original = R"(HloModule same_names:
+add {
+  p0 = f32[] parameter(0)
+  p1 = f32[] parameter(1)
+  ROOT result = f32[] add(p0, p1)
+}
+
+ENTRY ReduceR3ToR2 {
+  p0 = f32[8,16,256]{2,1,0} parameter(0)
+  p1 = f32[] constant(0)
+  ROOT result = f32[8,16]{1,0} reduce(p0, p1), dimensions={2}, to_apply=add
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(original));
+  ASSERT_NE(module->entry_computation(), nullptr);
+  EXPECT_THAT(module->entry_computation()->root_instruction(), op::Reduce());
+}
+
 TEST_F(HloParserTest, ParseSharding) {
   const string original = "{maximal device=42}";
   TF_ASSERT_OK_AND_ASSIGN(HloSharding sharding, ParseSharding(original));
@@ -1823,14 +1842,129 @@ TEST(HloParserSingleOpTest, SingleOp) {
               op::Multiply(op::Parameter(0), op::Parameter(1)));
 }
 
-TEST(HloParserSingleOpTest, SingleOpNoShapesProducesError) {
+TEST(HloParserSingleOpTest, SingleOpNoShapeProducesError) {
+  const string text = "multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x)";
+  StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
+  ASSERT_TRUE(!module.status().ok());
+  LOG(INFO) << "Status: " << module.status();
+  EXPECT_THAT(module.status().ToString(),
+              ::testing::HasSubstr("expects '=' in instruction"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpNoOperandShapesProducesError) {
   const string text = "%multiply = f32[2,4]{1,0} multiply(%broadcast, %x)";
   StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
   ASSERT_TRUE(!module.status().ok());
   LOG(INFO) << "Status: " << module.status();
-  EXPECT_THAT(
-      module.status().ToString(),
-      ::testing::HasSubstr("Operand broadcast had no shape in HLO text"));
+  EXPECT_THAT(module.status().ToString(),
+              ::testing::HasSubstr("Operand had no shape in HLO text"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpNoNames) {
+  const string text =
+      "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_THAT(computation->root_instruction(),
+              op::Multiply(op::Parameter(0), op::Parameter(1)));
+}
+
+TEST(HloParserSingleOpTest, CanonicalOp) {
+  const string text = "f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_THAT(computation->root_instruction(),
+              op::Multiply(op::Parameter(0), op::Parameter(1)));
+  EXPECT_EQ(
+      computation->root_instruction()->ToString(HloPrintOptions::Canonical()),
+      text);
+}
+
+TEST(HloParserSingleOpTest, CanonicalOpWithNested) {
+  const string text =
+      R"(f32[5,20]{1,0} while(f32[5,10]{1,0}), condition=
+{
+  tmp_0 = f32[5,10]{1,0} parameter(0)
+  tmp_1 = f32[20,10]{1,0} parameter(1)
+  ROOT tmp_2 = f32[5,20]{1,0} fusion(f32[5,10]{1,0} tmp_0, f32[20,10]{1,0} tmp_1), kind=kLoop, calls=
+  {
+    tmp_0 = f32[5,10]{1,0} parameter(0)
+    tmp_1 = f32[20,10]{1,0} parameter(1)
+    tmp_2 = f32[10,20]{1,0} transpose(f32[20,10]{1,0} tmp_1), dimensions={1,0}
+    ROOT tmp_3 = f32[5,20]{1,0} dot(f32[5,10]{1,0} tmp_0, f32[10,20]{1,0} tmp_2), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  }
+}, body=
+{
+  tmp_0 = f32[5,10]{1,0} parameter(0)
+  tmp_1 = f32[20,10]{1,0} parameter(1)
+  ROOT tmp_2 = f32[5,20]{1,0} fusion(f32[5,10]{1,0} tmp_0, f32[20,10]{1,0} tmp_1), kind=kLoop, calls=
+  {
+    tmp_0 = f32[5,10]{1,0} parameter(0)
+    tmp_1 = f32[20,10]{1,0} parameter(1)
+    tmp_2 = f32[10,20]{1,0} transpose(f32[20,10]{1,0} tmp_1), dimensions={1,0}
+    ROOT tmp_3 = f32[5,20]{1,0} dot(f32[5,10]{1,0} tmp_0, f32[10,20]{1,0} tmp_2), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  }
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_EQ(
+      computation->root_instruction()->ToString(HloPrintOptions::Canonical()),
+      text);
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested) {
+  const string text =
+      R"(%fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %p0, f32[2]{0} %p1), kind=kLoop, calls=
+{
+  %param_0 = f32[3,2,1,1]{3,2,1,0} parameter(0)
+  %param_1 = f32[2]{0} parameter(1)
+  %broadcast = f32[3,2,1,1]{3,2,1,0} broadcast(f32[2]{0} %param_1), dimensions={1}
+  ROOT %subtract = f32[3,2,1,1]{3,2,1,0} subtract(f32[3,2,1,1]{3,2,1,0} %param_0, f32[3,2,1,1]{3,2,1,0} %broadcast)
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_THAT(computation->root_instruction(),
+              op::Fusion(op::Parameter(0), op::Parameter(1)));
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested_DoesNotExist) {
+  const string text =
+      R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply=
+{
+  result = f32[] add(f32[] x, f32[] y)
+})";
+  auto status = ParseHloOpToModule(text).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("does not exist: x"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested_NoLhs) {
+  const string text =
+      R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply=
+{
+  f32[] add(f32[] x, f32[] y)
+})";
+  auto status = ParseHloOpToModule(text).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested_NoOperandName) {
+  const string text =
+      R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply=
+{
+  result = f32[] add(f32[], f32[])
+})";
+  auto status = ParseHloOpToModule(text).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
 }
 
 TEST(HloParserSingleOpTest, ConvolutionTrivialFeatureGroupCount) {
-- 
GitLab


From 00000cbfdf0efac737f3bfff94950a49d48659fb Mon Sep 17 00:00:00 2001
From: Christopher Olston <olston@google.com>
Date: Tue, 2 Oct 2018 15:48:17 -0700
Subject: [PATCH 1028/1357] Delete the shims in tensorflow/contrib/batching/.

PiperOrigin-RevId: 215473319
---
 tensorflow/contrib/batching/BUILD             | 58 -------------------
 .../adaptive_shared_batch_scheduler.h         | 21 -------
 .../contrib/batching/basic_batch_scheduler.h  | 21 -------
 tensorflow/contrib/batching/batch_scheduler.h | 21 -------
 .../batching/serial_device_batch_scheduler.h  | 21 -------
 .../contrib/batching/shared_batch_scheduler.h | 21 -------
 tensorflow/contrib/batching/test_util/BUILD   | 19 ------
 .../batching/test_util/fake_clock_env.h       | 21 -------
 tensorflow/contrib/batching/util/BUILD        | 28 ---------
 .../contrib/batching/util/periodic_function.h | 20 -------
 10 files changed, 251 deletions(-)
 delete mode 100644 tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/basic_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/serial_device_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/shared_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/test_util/BUILD
 delete mode 100644 tensorflow/contrib/batching/test_util/fake_clock_env.h
 delete mode 100644 tensorflow/contrib/batching/util/BUILD
 delete mode 100644 tensorflow/contrib/batching/util/periodic_function.h

diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index b27a19b16c..648f3ebb05 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -7,64 +7,6 @@ package(
 licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
-
-cc_library(
-    name = "batch_scheduler_hdrs",
-    hdrs = ["batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:batch_scheduler_hdrs",
-    ],
-)
-
-cc_library(
-    name = "batch_scheduler",
-    hdrs = ["batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:batch_scheduler",
-    ],
-)
-
-cc_library(
-    name = "shared_batch_scheduler_hdrs",
-    hdrs = ["shared_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:shared_batch_scheduler_hdrs",
-    ],
-)
-
-cc_library(
-    name = "shared_batch_scheduler",
-    hdrs = ["shared_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:shared_batch_scheduler",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "adaptive_shared_batch_scheduler",
-    hdrs = ["adaptive_shared_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:adaptive_shared_batch_scheduler",
-    ],
-)
-
-cc_library(
-    name = "serial_device_batch_scheduler",
-    hdrs = ["serial_device_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:serial_device_batch_scheduler",
-    ],
-)
-
-cc_library(
-    name = "basic_batch_scheduler",
-    hdrs = ["basic_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:basic_batch_scheduler",
-    ],
-)
-
 load(
     "//tensorflow:tensorflow.bzl",
     "py_test",
diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
deleted file mode 100644
index 86250e6692..0000000000
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/basic_batch_scheduler.h b/tensorflow/contrib/batching/basic_batch_scheduler.h
deleted file mode 100644
index d9b37da693..0000000000
--- a/tensorflow/contrib/batching/basic_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/basic_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/batch_scheduler.h b/tensorflow/contrib/batching/batch_scheduler.h
deleted file mode 100644
index 8e94e1fd8b..0000000000
--- a/tensorflow/contrib/batching/batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/serial_device_batch_scheduler.h b/tensorflow/contrib/batching/serial_device_batch_scheduler.h
deleted file mode 100644
index bf6b708361..0000000000
--- a/tensorflow/contrib/batching/serial_device_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/serial_device_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h
deleted file mode 100644
index 83a59695d7..0000000000
--- a/tensorflow/contrib/batching/shared_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/test_util/BUILD b/tensorflow/contrib/batching/test_util/BUILD
deleted file mode 100644
index 7cb2d8079b..0000000000
--- a/tensorflow/contrib/batching/test_util/BUILD
+++ /dev/null
@@ -1,19 +0,0 @@
-# Description: Utilities to aid testing.
-
-package(
-    default_visibility = ["//tensorflow:internal"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-exports_files(["LICENSE"])
-
-cc_library(
-    name = "fake_clock_env",
-    testonly = 1,
-    hdrs = ["fake_clock_env.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:fake_clock_env",
-    ],
-)
diff --git a/tensorflow/contrib/batching/test_util/fake_clock_env.h b/tensorflow/contrib/batching/test_util/fake_clock_env.h
deleted file mode 100644
index 40a39a5569..0000000000
--- a/tensorflow/contrib/batching/test_util/fake_clock_env.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
-#define TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
-
-#include "tensorflow/core/kernels/batching_util/fake_clock_env.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
diff --git a/tensorflow/contrib/batching/util/BUILD b/tensorflow/contrib/batching/util/BUILD
deleted file mode 100644
index 8f81b6702f..0000000000
--- a/tensorflow/contrib/batching/util/BUILD
+++ /dev/null
@@ -1,28 +0,0 @@
-# Description: Utilities.
-
-package(
-    default_visibility = ["//tensorflow:internal"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
-
-cc_library(
-    name = "periodic_function_dynamic",
-    hdrs = ["periodic_function.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:periodic_function_dynamic",
-        "//third_party/eigen3",
-    ],
-)
-
-cc_library(
-    name = "periodic_function",
-    visibility = ["//visibility:public"],
-    deps = [
-        ":periodic_function_dynamic",
-        "//tensorflow/core/kernels/batching_util:periodic_function",
-    ],
-)
diff --git a/tensorflow/contrib/batching/util/periodic_function.h b/tensorflow/contrib/batching/util/periodic_function.h
deleted file mode 100644
index aa2ed0a385..0000000000
--- a/tensorflow/contrib/batching/util/periodic_function.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
-#define TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
-
-#include "tensorflow/core/kernels/batching_util/periodic_function.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
-- 
GitLab


From 6c487cddd3503ef72c015c5c283fff81328282e5 Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Tue, 2 Oct 2018 15:48:27 -0700
Subject: [PATCH 1029/1357] Internal change.

PiperOrigin-RevId: 215473351
---
 .../data/experimental/kernel_tests/BUILD      | 113 +++++++++-
 .../kernel_tests/optimization/BUILD           |  43 ++++
 .../kernel_tests/serialization/BUILD          | 196 +++++++++++++++---
 3 files changed, 316 insertions(+), 36 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index a46c30ed2e..f56127f3ef 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -15,6 +15,7 @@ py_test(
     tags = [
         "no_oss",  # (b/79552534)
         "no_pip",
+        "no_windows",
     ],
     deps = [
         "//tensorflow/python:array_ops",
@@ -43,6 +44,11 @@ py_test(
     size = "medium",
     srcs = ["bucketing_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -66,7 +72,11 @@ py_test(
     size = "medium",
     srcs = ["csv_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -93,6 +103,9 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "manual",
+        "no_oss",
+        "no_pip",
+        "no_windows",
         "nomac",  # b/62040583
     ],
     deps = [
@@ -111,6 +124,11 @@ py_test(
     size = "medium",
     srcs = ["directed_interleave_dataset_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -126,6 +144,11 @@ py_test(
     name = "get_single_element_test",
     size = "small",
     srcs = ["get_single_element_test.py"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -144,6 +167,11 @@ py_test(
 py_test(
     name = "indexed_dataset_ops_test",
     srcs = ["indexed_dataset_ops_test.py"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -164,6 +192,7 @@ py_test(
     tags = [
         "no_oss",
         "no_pip",
+        "no_windows",
         "notap",
     ],
     deps = [
@@ -187,7 +216,11 @@ py_test(
     size = "small",
     srcs = ["iterator_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -208,7 +241,9 @@ py_test(
     srcs = ["map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_pip",
+        "no_windows",
         "noasan",  # times out
         "optonly",
     ],
@@ -234,6 +269,11 @@ py_test(
     size = "medium",
     srcs = ["filter_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -254,7 +294,11 @@ py_test(
     size = "small",
     srcs = ["map_defun_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
@@ -277,6 +321,11 @@ py_test(
     size = "small",
     srcs = ["parsing_ops_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
@@ -313,7 +362,12 @@ cuda_py_test(
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
-    tags = ["no_windows_gpu"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+        "no_windows_gpu",
+    ],
 )
 
 py_test(
@@ -321,6 +375,11 @@ py_test(
     size = "small",
     srcs = ["range_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -366,7 +425,11 @@ py_test(
     size = "medium",
     srcs = ["reader_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
@@ -390,6 +453,9 @@ py_test(
     shard_count = 2,
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
         "noasan",
         "optonly",
     ],
@@ -415,7 +481,11 @@ py_test(
     size = "small",
     srcs = ["scan_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -438,7 +508,9 @@ py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_pip",
+        "no_windows",
         "optonly",
     ],
     deps = [
@@ -475,7 +547,11 @@ py_test(
     size = "small",
     srcs = ["sql_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":sql_dataset_op_test_base",
         "//tensorflow/python:client_testlib",
@@ -489,7 +565,11 @@ py_test(
     size = "medium",
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":reader_dataset_ops_test_base",
         ":stats_dataset_test_base",
@@ -519,7 +599,11 @@ py_test(
     size = "small",
     srcs = ["threadpool_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -539,7 +623,11 @@ py_test(
     size = "small",
     srcs = ["unique_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -555,6 +643,11 @@ py_test(
     name = "writer_ops_test",
     size = "small",
     srcs = ["writer_ops_test.py"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index 68f73bddb5..c92bb8b9bc 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -11,6 +11,11 @@ py_test(
     size = "medium",
     srcs = ["assert_next_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -25,6 +30,11 @@ py_test(
     size = "small",
     srcs = ["hoist_random_uniform_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -43,6 +53,11 @@ py_test(
     size = "small",
     srcs = ["latency_all_edges_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -58,6 +73,11 @@ py_test(
     size = "small",
     srcs = ["map_vectorization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:check_ops",
         "//tensorflow/python:client_testlib",
@@ -80,6 +100,11 @@ py_test(
     size = "medium",
     srcs = ["map_and_filter_fusion_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -98,6 +123,11 @@ py_test(
     size = "small",
     srcs = ["map_parallelization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -117,6 +147,9 @@ py_test(
     srcs = ["model_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
         "optonly",
     ],
     deps = [
@@ -136,6 +169,11 @@ py_test(
     size = "small",
     srcs = ["noop_elimination_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -153,6 +191,11 @@ py_test(
     size = "small",
     srcs = ["optimize_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index 20c02a5366..58a335ae4f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -34,7 +34,11 @@ py_test(
     size = "medium",
     srcs = ["batch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -51,6 +55,11 @@ py_test(
     size = "small",
     srcs = ["cache_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -65,6 +74,11 @@ py_test(
     size = "small",
     srcs = ["concatenate_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -78,7 +92,11 @@ py_test(
     size = "small",
     srcs = ["csv_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -92,6 +110,11 @@ py_test(
     size = "medium",
     srcs = ["dataset_constructor_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -106,7 +129,11 @@ py_test(
     size = "medium",
     srcs = ["filter_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -123,7 +150,11 @@ py_test(
     srcs = ["fixed_length_record_dataset_serialization_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -136,7 +167,11 @@ py_test(
     name = "flat_map_dataset_serialization_test",
     size = "medium",
     srcs = ["flat_map_dataset_serialization_test.py"],
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -158,6 +193,11 @@ py_test(
     size = "medium",
     srcs = ["group_by_reducer_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -172,6 +212,11 @@ py_test(
     size = "medium",
     srcs = ["group_by_window_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -186,7 +231,11 @@ py_test(
     size = "small",
     srcs = ["ignore_errors_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -202,7 +251,11 @@ py_test(
     size = "medium",
     srcs = ["interleave_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -219,7 +272,11 @@ py_test(
     size = "medium",
     srcs = ["map_and_batch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -234,7 +291,11 @@ py_test(
     size = "medium",
     srcs = ["map_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -256,6 +317,11 @@ py_test(
     size = "small",
     srcs = ["optimize_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -269,7 +335,11 @@ py_test(
     size = "medium",
     srcs = ["padded_batch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -285,7 +355,11 @@ py_test(
     size = "medium",
     srcs = ["parallel_interleave_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -302,7 +376,11 @@ py_test(
     size = "medium",
     srcs = ["parallel_map_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -323,7 +401,11 @@ py_test(
     size = "medium",
     srcs = ["parse_example_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -336,7 +418,11 @@ py_test(
     size = "small",
     srcs = ["prefetch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -349,6 +435,11 @@ py_test(
     size = "small",
     srcs = ["range_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -368,6 +459,11 @@ py_test(
     size = "medium",
     srcs = ["sample_from_datasets_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -381,7 +477,11 @@ py_test(
     size = "small",
     srcs = ["scan_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -395,7 +495,11 @@ py_test(
     size = "medium",
     srcs = ["sequence_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -409,7 +513,11 @@ py_test(
     size = "small",
     srcs = ["serialization_integration_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
@@ -424,7 +532,11 @@ py_test(
     size = "medium",
     srcs = ["shuffle_and_repeat_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -438,7 +550,11 @@ py_test(
     size = "medium",
     srcs = ["shuffle_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -454,7 +570,11 @@ py_test(
     size = "small",
     srcs = ["sql_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -470,7 +590,11 @@ py_test(
     size = "medium",
     srcs = ["stats_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -487,7 +611,11 @@ py_test(
     srcs = ["textline_dataset_serialization_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -502,7 +630,11 @@ py_test(
     srcs = ["tf_record_dataset_serialization_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -516,7 +648,11 @@ py_test(
     size = "medium",
     srcs = ["unbatch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -531,7 +667,11 @@ py_test(
     size = "small",
     srcs = ["unique_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -545,7 +685,11 @@ py_test(
     size = "small",
     srcs = ["zip_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
-- 
GitLab


From 7c0c0abab5b07528bae982d69257ebf4a8c077cb Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 2 Oct 2018 16:14:32 -0700
Subject: [PATCH 1030/1357] Internal change.

PiperOrigin-RevId: 215477724
---
 tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index cd7206baf8..9c6390070c 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -29,7 +29,7 @@ TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-8}
 # p100 has minimum 12G memory. Therefore, we should limit each test to 1.5G.
 # To leave some room in case we want to run more tests in parallel in the
 # future and to use a rounder number, we set it to 1G.
-export TF_PER_DEVICE_MEMORY_LIMIT_MB=1024
+export TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB:-1024}
 
 # *******************************************************************
 #         This section of the script is needed to
-- 
GitLab


From 6663959a8a2dd93a4dab9b049767d64761a00adc Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Tue, 2 Oct 2018 16:27:57 -0700
Subject: [PATCH 1031/1357] Update Keras RNN layer to support time major input.

PiperOrigin-RevId: 215479788
---
 tensorflow/python/keras/backend.py            | 25 ++++--
 .../python/keras/layers/cudnn_recurrent.py    | 24 +++--
 .../keras/layers/cudnn_recurrent_test.py      | 27 ++++++
 tensorflow/python/keras/layers/recurrent.py   | 65 ++++++++++----
 .../python/keras/layers/recurrent_test.py     | 90 +++++++++++++++++++
 .../golden/v1/tensorflow.keras.backend.pbtxt  |  2 +-
 .../v1/tensorflow.keras.layers.-r-n-n.pbtxt   |  2 +-
 .../golden/v2/tensorflow.keras.backend.pbtxt  |  2 +-
 .../v2/tensorflow.keras.layers.-r-n-n.pbtxt   |  2 +-
 9 files changed, 207 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 584facc859..0d6877e4a1 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3058,7 +3058,8 @@ def rnn(step_function,
         mask=None,
         constants=None,
         unroll=False,
-        input_length=None):
+        input_length=None,
+        time_major=False):
   """Iterates over the time dimension of a tensor.
 
   Arguments:
@@ -3087,6 +3088,13 @@ def rnn(step_function,
       constants: List of constant values passed at each step.
       unroll: Whether to unroll the RNN or to use a symbolic `while_loop`.
       input_length: If specified, assume time dimension is of this length.
+      time_major: Boolean. If true, the inputs and outputs will be in shape
+          `(timesteps, batch, ...)`, whereas in the False case, it will be
+          `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
+          efficient because it avoids transposes at the beginning and end of the
+          RNN calculation. However, most TensorFlow data is batch-major, so by
+          default this function accepts input and emits output in batch-major
+          form.
 
   Returns:
       A tuple, `(last_output, outputs, new_states)`.
@@ -3108,15 +3116,17 @@ def rnn(step_function,
   if ndim < 3:
     raise ValueError('Input should be at least 3D.')
   inputs_shape = inputs.shape
-  axes = [1, 0] + list(range(2, ndim))
-  inputs = array_ops.transpose(inputs, (axes))
+  if not time_major:
+    axes = [1, 0] + list(range(2, ndim))
+    inputs = array_ops.transpose(inputs, axes)
 
   if mask is not None:
     if mask.dtype != dtypes_module.bool:
       mask = math_ops.cast(mask, dtypes_module.bool)
     if len(mask.shape) == ndim - 1:
       mask = expand_dims(mask)
-    mask = array_ops.transpose(mask, axes)
+    if not time_major:
+      mask = array_ops.transpose(mask, axes)
 
   if constants is None:
     constants = []
@@ -3297,10 +3307,11 @@ def rnn(step_function,
     outputs = output_ta.stack()
     last_output = output_ta.read(last_time - 1)
 
-  axes = [1, 0] + list(range(2, len(outputs.shape)))
-  outputs = array_ops.transpose(outputs, axes)
+  if not time_major:
+    axes = [1, 0] + list(range(2, len(outputs.shape)))
+    outputs = array_ops.transpose(outputs, axes)
 
-  # Static shape inference: (samples, time, ...)
+  # Static shape inference: (samples, time, ...) or (time, sample, ...)
   outputs_shape = outputs.shape.as_list()
   outputs_shape[0] = inputs_shape[0]
   outputs_shape[1] = inputs_shape[1]
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent.py b/tensorflow/python/keras/layers/cudnn_recurrent.py
index cf2b0c476c..29a09a3d71 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent.py
@@ -47,6 +47,9 @@ class _CuDNNRNN(RNN):
     stateful: Boolean (default False). If True, the last state
         for each sample at index i in a batch will be used as initial
         state for the sample of index i in the following batch.
+    time_major: Boolean (default False). If true, the inputs and outputs will be
+        in shape `(timesteps, batch, ...)`, whereas in the False case, it will
+        be `(batch, timesteps, ...)`.
   """
 
   def __init__(self,
@@ -54,6 +57,7 @@ class _CuDNNRNN(RNN):
                return_state=False,
                go_backwards=False,
                stateful=False,
+               time_major=False,
                **kwargs):
     # We invoke the base layer's initializer directly here because we do not
     # want to create RNN cell instance.
@@ -62,6 +66,7 @@ class _CuDNNRNN(RNN):
     self.return_state = return_state
     self.go_backwards = go_backwards
     self.stateful = stateful
+    self.time_major = time_major
     self.supports_masking = False
     self.input_spec = [InputSpec(ndim=3)]
     if hasattr(self.cell.state_size, '__len__'):
@@ -124,7 +129,8 @@ class _CuDNNRNN(RNN):
         'return_sequences': self.return_sequences,
         'return_state': self.return_state,
         'go_backwards': self.go_backwards,
-        'stateful': self.stateful
+        'stateful': self.stateful,
+        'time_major': self.time_major,
     }
     base_config = super(  # pylint: disable=bad-super-call
         RNN, self).get_config()
@@ -267,7 +273,8 @@ class CuDNNGRU(_CuDNNRNN):
     self.built = True
 
   def _process_batch(self, inputs, initial_state):
-    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
+    if not self.time_major:
+      inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
     input_h = initial_state[0]
     input_h = array_ops.expand_dims(input_h, axis=0)
 
@@ -301,7 +308,10 @@ class CuDNNGRU(_CuDNNRNN):
     if self.stateful or self.return_state:
       h = h[0]
     if self.return_sequences:
-      output = array_ops.transpose(outputs, perm=(1, 0, 2))
+      if self.time_major:
+        output = outputs
+      else:
+        output = array_ops.transpose(outputs, perm=(1, 0, 2))
     else:
       output = outputs[-1]
     return output, [h]
@@ -456,7 +466,8 @@ class CuDNNLSTM(_CuDNNRNN):
     self.built = True
 
   def _process_batch(self, inputs, initial_state):
-    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
+    if not self.time_major:
+      inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
     input_h = initial_state[0]
     input_c = initial_state[1]
     input_h = array_ops.expand_dims(input_h, axis=0)
@@ -496,7 +507,10 @@ class CuDNNLSTM(_CuDNNRNN):
       h = h[0]
       c = c[0]
     if self.return_sequences:
-      output = array_ops.transpose(outputs, perm=(1, 0, 2))
+      if self.time_major:
+        output = outputs
+      else:
+        output = array_ops.transpose(outputs, perm=(1, 0, 2))
     else:
       output = outputs[-1]
     return output, [h, c]
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index 2ed0aa8f26..7becbfede1 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -26,6 +26,7 @@ import numpy as np
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
@@ -138,6 +139,32 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
         np.testing.assert_allclose(
             keras.backend.eval(layer.states[0]), state, atol=1e-4)
 
+  @parameterized.named_parameters(
+      ('cudnngru', keras.layers.CuDNNGRU),
+      ('cudnnlstm', keras.layers.CuDNNLSTM),
+  )
+  def test_time_major_input(self, layer_class):
+    if test.is_gpu_available(cuda_only=True):
+      with self.test_session(use_gpu=True):
+        input_size = 10
+        timesteps = 6
+        units = 2
+        num_samples = 32
+
+        model = keras.models.Sequential()
+        model.add(
+            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
+        layer = layer_class(units, time_major=True, return_sequences=True)
+        model.add(layer)
+        model.add(
+            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
+        model.compile(loss='categorical_crossentropy', optimizer='adam')
+        model.fit(
+            np.ones((num_samples, timesteps, input_size)),
+            np.ones((num_samples, timesteps, units)))
+        out = model.predict(np.ones((num_samples, timesteps, input_size)))
+        self.assertEqual(out.shape, (num_samples, timesteps, units))
+
   @parameterized.named_parameters(
       ('cudnngru', keras.layers.CuDNNGRU),
       ('cudnnlstm', keras.layers.CuDNNLSTM),
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index ba7498e7e6..b07ec71178 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -336,9 +336,18 @@ class RNN(Layer):
           in your model, you would need to specify the input length
           at the level of the first layer
           (e.g. via the `input_shape` argument)
+      time_major: The shape format of the `inputs` and `outputs` tensors.
+          If True, the inputs and outputs will be in shape
+          `(timesteps, batch, ...)`, whereas in the False case, it will be
+          `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
+          efficient because it avoids transposes at the beginning and end of the
+          RNN calculation. However, most TensorFlow data is batch-major, so by
+          default this function accepts input and emits output in batch-major
+          form.
 
   Input shape:
-      N-D tensor with shape `(batch_size, timesteps, ...)`.
+      N-D tensor with shape `(batch_size, timesteps, ...)` or
+      `(timesteps, batch_size, ...)` when time_major is True.
 
   Output shape:
       - if `return_state`: a list of tensors. The first tensor is
@@ -347,7 +356,8 @@ class RNN(Layer):
           be a high dimension tensor shape.
       - if `return_sequences`: N-D tensor with shape
           `(batch_size, timesteps, output_size)`, where `output_size` could
-          be a high dimension tensor shape.
+          be a high dimension tensor shape, or
+          `(timesteps, batch_size, output_size)` when `time_major` is True.
       - else, N-D tensor with shape `(batch_size, output_size)`, where
           `output_size` could be a high dimension tensor shape.
 
@@ -448,6 +458,7 @@ class RNN(Layer):
                go_backwards=False,
                stateful=False,
                unroll=False,
+               time_major=False,
                **kwargs):
     if isinstance(cell, (list, tuple)):
       cell = StackedRNNCells(cell)
@@ -468,6 +479,7 @@ class RNN(Layer):
     self.go_backwards = go_backwards
     self.stateful = stateful
     self.unroll = unroll
+    self.time_major = time_major
 
     self.supports_masking = True
     self.input_spec = [None]  # The input shape is unknown yet, at least rank 3.
@@ -503,14 +515,21 @@ class RNN(Layer):
       # Note that state_size[0] could be a tensor_shape or int.
       output_dim = tensor_shape.as_shape(state_size[0]).as_list()
 
+    batch = input_shape[0]
+    time_step = input_shape[1]
+    if self.time_major:
+      batch, time_step = time_step, batch
     if self.return_sequences:
-      output_shape = tuple([input_shape[0], input_shape[1]] + output_dim)
+      if self.time_major:
+        output_shape = tuple([time_step, batch] + output_dim)
+      else:
+        output_shape = tuple([batch, time_step] + output_dim)
     else:
-      output_shape = tuple([input_shape[0]] + output_dim)
+      output_shape = tuple([batch] + output_dim)
 
     if self.return_state:
       state_shape = [
-          tuple([input_shape[0]] + tensor_shape.as_shape(dim).as_list())
+          tuple([batch] + tensor_shape.as_shape(dim).as_list())
           for dim in state_size
       ]
       return [output_shape] + state_shape
@@ -539,13 +558,18 @@ class RNN(Layer):
     if isinstance(input_shape, list):
       input_shape = input_shape[0]
 
-    batch_size = input_shape[0] if self.stateful else None
-    input_dim = input_shape[2:]
-    self.input_spec[0] = InputSpec(shape=(batch_size, None) + input_dim)
+    input_spec_shape = list(input_shape)
+    batch_index, time_step_index = (1, 0) if self.time_major else (0, 1)
+    if not self.stateful:
+      input_spec_shape[batch_index] = None
+    input_spec_shape[time_step_index] = None
+    self.input_spec[0] = InputSpec(shape=tuple(input_spec_shape))
 
+    batch = input_shape[batch_index]
+    input_dim = input_shape[2:]
+    step_input_shape = (batch,) + input_dim
     # allow cell (if layer) to build before we set or validate state_spec
     if isinstance(self.cell, Layer):
-      step_input_shape = (input_shape[0],) + input_dim
       if constants_shape is not None:
         self.cell.build([step_input_shape] + constants_shape)
       else:
@@ -598,12 +622,16 @@ class RNN(Layer):
 
   def get_initial_state(self, inputs):
     get_initial_state_fn = getattr(self.cell, 'get_initial_state', None)
+
+    input_shape = array_ops.shape(inputs)
+    batch_size = input_shape[1] if self.time_major else input_shape[0]
+    dtype = inputs.dtype
     if get_initial_state_fn:
       init_state = get_initial_state_fn(
-          inputs=inputs, batch_size=None, dtype=None)
+          inputs=None, batch_size=batch_size, dtype=dtype)
     else:
-      init_state = _generate_zero_filled_state(
-          array_ops.shape(inputs)[0], self.cell.state_size, inputs.dtype)
+      init_state = _generate_zero_filled_state(batch_size, self.cell.state_size,
+                                               dtype)
     # Keras RNN expect the states in a list, even if it's a single state tensor.
     if not nest.is_sequence(init_state):
       init_state = [init_state]
@@ -696,7 +724,7 @@ class RNN(Layer):
           'Layer has ' + str(len(self.states)) + ' states but was passed ' +
           str(len(initial_state)) + ' initial states.')
     input_shape = K.int_shape(inputs)
-    timesteps = input_shape[1]
+    timesteps = input_shape[0] if self.time_major else input_shape[1]
     if self.unroll and timesteps in [None, 1]:
       raise ValueError('Cannot unroll a RNN if the '
                        'time dimension is undefined or equal to 1. \n'
@@ -747,7 +775,8 @@ class RNN(Layer):
         go_backwards=self.go_backwards,
         mask=mask,
         unroll=self.unroll,
-        input_length=timesteps)
+        input_length=timesteps,
+        time_major=self.time_major)
     if self.stateful:
       updates = []
       for i in range(len(states)):
@@ -777,7 +806,10 @@ class RNN(Layer):
   def reset_states(self, states=None):
     if not self.stateful:
       raise AttributeError('Layer must be stateful.')
-    batch_size = self.input_spec[0].shape[0]
+    if self.time_major:
+      batch_size = self.input_spec[0].shape[1]
+    else:
+      batch_size = self.input_spec[0].shape[0]
     if not batch_size:
       raise ValueError('If a RNN is stateful, it needs to know '
                        'its batch size. Specify the batch size '
@@ -839,7 +871,8 @@ class RNN(Layer):
         'return_state': self.return_state,
         'go_backwards': self.go_backwards,
         'stateful': self.stateful,
-        'unroll': self.unroll
+        'unroll': self.unroll,
+        'time_major': self.time_major
     }
     if self._num_constants is not None:
       config['num_constants'] = self._num_constants
diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py
index b9e90095e4..d246be6b45 100644
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py
@@ -186,6 +186,96 @@ class RNNTest(test.TestCase):
       y_np_2 = model.predict(x_np)
       self.assertAllClose(y_np, y_np_2, atol=1e-4)
 
+  def test_rnn_with_time_major(self):
+    batch = 10
+    time_step = 5
+    embedding_dim = 4
+    units = 3
+
+    with self.cached_session():
+      # Test basic case.
+      x = keras.Input((time_step, embedding_dim))
+      time_major_x = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      layer = keras.layers.SimpleRNN(
+          units, time_major=True, return_sequences=True)
+      self.assertEqual(
+          layer.compute_output_shape((time_step, None,
+                                      embedding_dim)).as_list(),
+          [time_step, None, units])
+      y = layer(time_major_x)
+      self.assertEqual(layer.output_shape, (time_step, None, units))
+
+      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
+
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, units)))
+
+    with self.cached_session():
+      # Test stacking.
+      x = keras.Input((time_step, embedding_dim))
+      time_major_x = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      cell_units = [10, 8, 6]
+      cells = [keras.layers.SimpleRNNCell(cell_units[i]) for i in range(3)]
+      layer = keras.layers.RNN(cells, time_major=True, return_sequences=True)
+      y = layer(time_major_x)
+      self.assertEqual(layer.output_shape, (time_step, None, cell_units[-1]))
+
+      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, cell_units[-1])))
+
+    with self.cached_session():
+      # Test masking.
+      x = keras.Input((time_step, embedding_dim))
+      time_major = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      mask = keras.layers.Masking()(time_major)
+      rnn = keras.layers.SimpleRNN(
+          units, time_major=True, return_sequences=True)(mask)
+      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(rnn)
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, units)))
+
+    with self.cached_session():
+      # Test layer output
+      x = keras.Input((time_step, embedding_dim))
+      rnn_1 = keras.layers.SimpleRNN(units, return_sequences=True)
+      y = rnn_1(x)
+
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, units)))
+
+      x_np = np.random.random((batch, time_step, embedding_dim))
+      y_np_1 = model.predict(x_np)
+
+      time_major = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      rnn_2 = keras.layers.SimpleRNN(
+          units, time_major=True, return_sequences=True)
+      y_2 = rnn_2(time_major)
+      y_2 = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(y_2)
+
+      model_2 = keras.models.Model(x, y_2)
+      rnn_2.set_weights(rnn_1.get_weights())
+
+      y_np_2 = model_2.predict(x_np)
+      self.assertAllClose(y_np_1, y_np_2, atol=1e-4)
+
   def test_rnn_cell_with_constants_layer(self):
 
     class RNNCellWithConstants(keras.layers.Layer):
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
index 126ce8db6a..a71a59e269 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
@@ -398,7 +398,7 @@ tf_module {
   }
   member_method {
     name: "rnn"
-    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "round"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
index 2b6e8af11d..68b6678d48 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -86,7 +86,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\'], "
+    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'time_major\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
index 126ce8db6a..a71a59e269 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
@@ -398,7 +398,7 @@ tf_module {
   }
   member_method {
     name: "rnn"
-    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "round"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
index 2b6e8af11d..68b6678d48 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -86,7 +86,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\'], "
+    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'time_major\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
   }
   member_method {
     name: "add_loss"
-- 
GitLab


From 41e97007638ef41764b1da86fb2de772f35762e5 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 17:00:46 -0700
Subject: [PATCH 1032/1357] Disable XLA from raspberry pi builds.

There is no known conceptual reason we can't use XLA, but in practice
we have some build issues that will need to be fixed.

PiperOrigin-RevId: 215484942
---
 tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 3d27e84b81..864278c647 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -34,6 +34,8 @@ set -e
 #
 # Make sure you have an up to date version of the Bazel build tool installed too.
 
+export TF_ENABLE_XLA=0
+
 yes '' | ./configure
 
 # Fix for curl build problem in 32-bit, see https://stackoverflow.com/questions/35181744/size-of-array-curl-rule-01-is-negative
-- 
GitLab


From e4188461aee1d614a14f17fe2abaf2a9a94886d9 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 17:02:30 -0700
Subject: [PATCH 1033/1357] Add missing `import unittest` to
 control_flow_ops_py_test.py

PiperOrigin-RevId: 215485333
---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 655fece5ff..07ec859766 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -23,6 +23,7 @@ from __future__ import print_function
 import collections
 import math
 import time
+import unittest
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
-- 
GitLab


From 22919770355b1b7d8f4c5a20327898e881aa11cb Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Tue, 2 Oct 2018 17:09:45 -0700
Subject: [PATCH 1034/1357] Pin wheel=0.31.1 to work around issue
 https://github.com/pypa/auditwheel/issues/102

PiperOrigin-RevId: 215486669
---
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index b90f3f3b97..7f293e8604 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -24,8 +24,10 @@ easy_install3 -U pip==9.0.3
 # Install pip packages from whl files to avoid the time-consuming process of
 # building from source.
 
-pip2 install wheel
-pip3 install wheel
+# Pin wheel==0.31.1 to work around issue
+# https://github.com/pypa/auditwheel/issues/102
+pip2 install wheel==0.31.1
+pip3 install wheel==0.31.1
 
 pip2 install virtualenv
 pip3 install virtualenv
-- 
GitLab


From 80821abd6410f47130fc031b15e9ac220de5b1b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 17:16:05 -0700
Subject: [PATCH 1035/1357] Make RemoveTrivialPassthrough preserve
 minmax-related info

PiperOrigin-RevId: 215487633
---
 .../remove_trivial_passthrough.cc             | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
index fc49fbda59..d5983a1f12 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
@@ -29,20 +29,34 @@ namespace {
 // array instead. from_array is assumed to be discardable, and consequently
 // this only updates operator edges (since discardable arrays only
 // appear there, and not e.g. in model flags).
-void RerouteEdges(const string& from_array, const string& to_array,
-                  Model* model) {
+void Reroute(const string& from, const string& to, Model* model) {
   for (const auto& op : model->operators) {
     for (auto& output : op->outputs) {
-      if (output == from_array) {
-        output = to_array;
+      if (output == from) {
+        output = to;
       }
     }
     for (auto& input : op->inputs) {
-      if (input == from_array) {
-        input = to_array;
+      if (input == from) {
+        input = to;
       }
     }
   }
+  const Array& from_array = model->GetArray(from);
+  Array& to_array = model->GetOrCreateArray(to);
+  // Preserve minmax information if to_array didn't already have any.
+  if (from_array.minmax && !to_array.minmax) {
+    to_array.GetOrCreateMinMax() = from_array.GetMinMax();
+    // If we're copying minmax info, then we should also be copying
+    // narrow_range, which affects how minmax info is to be interpreted.
+    to_array.narrow_range = from_array.narrow_range;
+  }
+  // Separately, also preserve final_data_type if to_array didn't already
+  // have any.
+  if (from_array.final_data_type != ArrayDataType::kNone &&
+      to_array.final_data_type == ArrayDataType::kNone) {
+    to_array.final_data_type = from_array.final_data_type;
+  }
 }
 
 }  // namespace
@@ -90,14 +104,14 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
     transformation->AddMessageF(
         "Removing %s, keeping its non-constant input array %s and removing %s",
         LogName(*passthru_op), main_input_name, output_name);
-    RerouteEdges(output_name, main_input_name, model);
+    Reroute(output_name, main_input_name, model);
   } else if (IsDiscardableArray(*model, main_input_name) &&
              !IsConstantParameterArray(*model, main_input_name)) {
     transformation->AddMessageF(
         "Removing %s, keeping its output array %s and removing non-constant "
         "input %s",
         LogName(*passthru_op), output_name, main_input_name);
-    RerouteEdges(main_input_name, output_name, model);
+    Reroute(main_input_name, output_name, model);
   } else {
     transformation->AddMessageF(
         "Cannot remove %s, neither its main input nor its output may be "
-- 
GitLab


From b7e9cbab27c893283acc4a6154d7a59dffb23758 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Tue, 2 Oct 2018 17:48:25 -0700
Subject: [PATCH 1036/1357] Use `defun` instead of `Defun` for `tf.data`,
 except for `make_one_shot_iterator` which is to be deprecated in future.

PiperOrigin-RevId: 215491729
---
 .../contrib/distribute/python/input_ops.py    |  2 +-
 tensorflow/python/data/ops/dataset_ops.py     | 60 ++++++++-----------
 tensorflow/python/eager/function.py           | 14 +++++
 tensorflow/python/eager/function_test.py      |  9 ++-
 4 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/input_ops.py b/tensorflow/contrib/distribute/python/input_ops.py
index f07ec8234d..423952c9e2 100644
--- a/tensorflow/contrib/distribute/python/input_ops.py
+++ b/tensorflow/contrib/distribute/python/input_ops.py
@@ -78,7 +78,7 @@ def auto_shard_dataset(dataset, num_shards, index):
       elif hasattr(dataset, "_map_func"):
         # TODO(priyag): Make this check more robust by enforcing some common
         # property on all map/flatmap/interleave datasets.
-        map_func_def = dataset._map_func.definition
+        map_func_def = dataset._map_func.function_def
         for node in map_func_def.node_def:
           if node.op in _READER_DATASET_OPS:
             found_reader_op = True
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 46ce191f7b..d90da5908d 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -30,6 +30,7 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import random_seed
 from tensorflow.python.data.util import sparse
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
@@ -37,6 +38,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -1713,7 +1715,8 @@ class _VariantDataset(Dataset):
 
 
 class StructuredFunctionWrapper(object):
-  """A wrapper for `Defun` that supports structured arguments and return values.
+  """A wrapper for `defun` that supports structured arguments and return values.
+
   """
 
   def __init__(self, func, transformation_name, dataset=None,
@@ -1765,7 +1768,7 @@ class StructuredFunctionWrapper(object):
     # TODO(b/110122868): Enable this support for all `tf.data` functions.
     self._nested_dataset_support = experimental_nested_dataset_support
 
-    @function.Defun(*self._defun_args())
+    @eager_function.defun(input_signature=self._defun_args())
     def tf_data_structured_function_wrapper(*args):
       """Wrapper for passing nested structures to and from tf.data functions."""
       flat_args = []
@@ -1850,36 +1853,43 @@ class StructuredFunctionWrapper(object):
       self._output_shapes = nest.pack_sequence_as(ret, flat_shapes)
       self._output_types = nest.pack_sequence_as(ret, flat_types)
 
-      _warn_if_collections(transformation_name)
-
       return flat_ret
 
-    self._function = tf_data_structured_function_wrapper
+    table_initializers_len = len(ops.get_default_graph().get_collection(
+        ops.GraphKeys.TABLE_INITIALIZERS))
+
+    self._function = tf_data_structured_function_wrapper.get_concrete_function()
     if add_to_graph:
       self._function.add_to_graph(ops.get_default_graph())
-    else:
-      # Use the private method that will execute
-      # `tf_data_structured_function_wrapper` but delay adding it to the graph
-      # in case (e.g.) we need to rerun the function.
-      self._function._create_definition_if_needed()  # pylint: disable=protected-access
+    if len(
+        self._function.graph.get_collection(
+            ops.GraphKeys.TABLE_INITIALIZERS)) != table_initializers_len:
+      warnings.warn(
+          "Creating lookup tables inside a function passed to %s is not"
+          " supported. Create each table outside the function, and "
+          "capture it inside the function to use it." % transformation_name)
 
   def _defun_args(self):
-    """Returns a flat list of `tf.DType` for the input element structure."""
+    """Returns a list of `tf.TensorSpec` for the input element structure."""
     ret = []
-    for input_type, input_class in zip(nest.flatten(self._input_types),
-                                       nest.flatten(self._input_classes)):
+    for input_type, input_shape, input_class in zip(
+        nest.flatten(self._input_types), nest.flatten(self._input_shapes),
+        nest.flatten(self._input_classes)):
       # TODO(b/110122868): Add a registration mechanism for new component types.
       if input_class is sparse_tensor_lib.SparseTensor:
-        ret.append(dtypes.variant)
+        ret.append(
+            tensor_spec.TensorSpec(
+                tensor_shape.TensorShape(None), dtypes.variant))
       elif isinstance(input_class, _NestedDatasetComponent):
         if not self._nested_dataset_support:
           raise NotImplementedError(
               "The %s transformation does not currently support nested "
               "datasets as inputs." % self._transformation_name)
-        ret.append(dtypes.variant)
+        ret.append(
+            tensor_spec.TensorSpec(tensor_shape.scalar(), dtypes.variant))
       else:
         assert isinstance(input_type, dtypes.DType)
-        ret.append(input_type)
+        ret.append(tensor_spec.TensorSpec(input_shape, input_type))
     return ret
 
   @property
@@ -2579,24 +2589,6 @@ def _should_unpack_args(args):
   return type(args) is tuple  # pylint: disable=unidiomatic-typecheck
 
 
-def _warn_if_collections(transformation_name):
-  """Prints warning message if the current graph uses common graph collections.
-
-  NOTE(mrry): Currently a warning is only generated for lookup tables. Any
-  variables created will be automatically hoisted out to the outermost scope
-  using `init_scope()`. Some collections (such as for control-flow contexts)
-  are benign and should not generate a warning.
-
-  Args:
-    transformation_name: A human-readable name for the transformation.
-  """
-  if ops.get_default_graph().get_collection(ops.GraphKeys.TABLE_INITIALIZERS):
-    warnings.warn("Creating lookup tables inside a function passed to %s is not"
-                  " supported. Create each table outside the function, and "
-                  "capture it inside the function to use it."
-                  % transformation_name)
-
-
 class MapDataset(UnaryDataset):
   """A `Dataset` that maps a function over elements in its input."""
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f261d92d64..aeb1cac3e9 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -662,6 +662,11 @@ class Function(object):
     outputs = self._inference_function.call(ctx, args)
     return self._build_call_outputs(outputs)
 
+  @property
+  def name(self):
+    """Function name."""
+    return self._inference_function.name
+
   @property
   def graph(self):
     """Returns the graph from which this function was constructed."""
@@ -719,6 +724,10 @@ class Function(object):
     return nest.map_structure(lambda x: x.dtype if x is not None else None,
                               self._func_graph.structured_outputs)
 
+  def add_to_graph(self, g):
+    """Adds this function into the graph g."""
+    return self._inference_function.add_to_graph(g)
+
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
@@ -1122,6 +1131,8 @@ class PolymorphicFunction(object):
       *args: inputs to specialize on.
       **kwargs: inputs to specialize on.
     """
+    if self._input_signature:
+      args, kwargs = None, None
     graph_function, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
@@ -1304,6 +1315,9 @@ def register(func, *args, **kwargs):
   function definition into graph. Register function with different input param
   will result into multiple version of functions registered in graph.
 
+  Also, `args` and `kwargs` are ignored if this `PolymorphicFunction` was
+  created with an `input_signature`.
+
   Args:
     func: the PolymorphicFunction instance that generated by a @defun
     *args: input arguments for the Python function.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 9ce367a837..ac45606eb0 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1750,11 +1750,10 @@ class FunctionTest(test.TestCase):
         # pylint: disable=protected-access
         self.assertEqual(len(graph._functions), 3)
 
-        # Test input param shape mismatch
-        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        with self.assertRaisesRegexp(
-            ValueError, 'Python inputs incompatible with input_signature'):
-          function.register(defun_matmul, t2, t2)
+        # Test register function with cache, note inputs are ignored.
+        function.register(defun_matmul)
+        graph = ops.get_default_graph()
+        self.assertEqual(len(graph._functions), 3)
 
   def testRegisterFunctionWithCache(self):
     def matmul(x, y):
-- 
GitLab


From 9f7a138640408cea58698a432fd1596cf436b484 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Tue, 2 Oct 2018 17:57:49 -0700
Subject: [PATCH 1037/1357] Set shape for output tensors of cond_v2.

PiperOrigin-RevId: 215492782
---
 tensorflow/core/ops/functional_ops.cc         | 21 ++++++++++++++++++-
 .../kernel_tests/control_flow_ops_py_test.py  |  7 +++++++
 tensorflow/python/ops/cond_v2_impl.py         | 20 +++++++++++++++---
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index fed3fa22ed..22b4b07eff 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@@ -110,8 +110,27 @@ REGISTER_OP("If")
     .Attr("Tout: list(type) >= 0")
     .Attr("then_branch: func")
     .Attr("else_branch: func")
+    .Attr("output_shapes: list(shape) = []")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      std::vector<PartialTensorShape> output_shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      // If `output_shapes` attr is set use that as the shapes of the outputs
+      // else return unknown shapes.
+      if (output_shapes.empty()) return shape_inference::UnknownShape(c);
+      if (output_shapes.size() != c->num_outputs()) {
+        return errors::InvalidArgument(
+            "`output_shapes` must be the same length as num outputs (",
+            output_shapes.size(), " vs. ", c->num_outputs());
+      }
+      for (size_t i = 0; i < output_shapes.size(); ++i) {
+        shape_inference::ShapeHandle output_shape_handle;
+        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+            output_shapes[i], &output_shape_handle));
+        c->set_output(static_cast<int>(i), output_shape_handle);
+      }
+      return Status::OK();
+    });
 
 // TODO(drpng): remove this.
 REGISTER_OP("_While")
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 07ec859766..a1be77601c 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -351,6 +351,13 @@ class ControlFlowTest(test.TestCase):
     grad = gradients_impl.gradients(y, [v])
     self.assertAllEqual([None], grad)
 
+  def testCondOutputShape(self):
+    x = constant_op.constant(1.0)
+    b = control_flow_ops.cond(
+        constant_op.constant(True), lambda: math_ops.square(x),
+        lambda: math_ops.subtract(x, 1.))
+    self.assertEqual(b.shape, tensor_shape.scalar())
+
   def testFetchable(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(dtypes.float32)
diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py
index f8b1ddb140..195ad11c71 100644
--- a/tensorflow/python/ops/cond_v2_impl.py
+++ b/tensorflow/python/ops/cond_v2_impl.py
@@ -96,9 +96,12 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
 
     # Create the If op.
     tensors = gen_functional_ops._if(  # pylint: disable=protected-access
-        pred, cond_inputs, [t.dtype for t in true_graph.outputs],
+        pred,
+        cond_inputs, [t.dtype for t in true_graph.outputs],
         _create_new_tf_function(true_graph),
         _create_new_tf_function(false_graph),
+        output_shapes=_get_output_shapes(true_graph.outputs,
+                                         false_graph.outputs),
         name=scope)
 
     # Set the flag to enable lowering on the `if` op if necessary
@@ -175,9 +178,12 @@ def _IfGrad(op, *grads):  # pylint: disable=invalid-name
 
   # Create the gradient If op.
   tensors = gen_functional_ops._if(
-      op.inputs[0], grad_inputs, [t.dtype for t in true_grad_graph.outputs],
+      op.inputs[0],
+      grad_inputs, [t.dtype for t in true_grad_graph.outputs],
       _create_new_tf_function(true_grad_graph),
-      _create_new_tf_function(false_grad_graph))
+      _create_new_tf_function(false_grad_graph),
+      output_shapes=_get_output_shapes(true_grad_graph.outputs,
+                                       false_grad_graph.outputs))
 
   # The predicate has no gradient.
   return [None] + tensors[:num_grad_outputs]
@@ -480,6 +486,14 @@ def _check_same_outputs(true_graph, false_graph):
         "  false_fn: %s" % (true_output_types, false_output_types))
 
 
+def _get_output_shapes(true_graph_outputs, false_graph_outputs):
+  output_shapes = [
+      t_out.shape.most_specific_compatible_shape(f_out.shape)
+      for t_out, f_out in zip(true_graph_outputs, false_graph_outputs)
+  ]
+  return output_shapes
+
+
 def _is_ancestor(graph, maybe_ancestor):
   if maybe_ancestor == graph:
     return True
-- 
GitLab


From 05bc6c6762d5a58bacd585e9243133bf0378515f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 18:10:46 -0700
Subject: [PATCH 1038/1357] Remove initial accumulator (and other auxiliary
 parameter) values from optimization parameter protos and removed uses of that
 functionality in tests.

PiperOrigin-RevId: 215494433
---
 .../tpu/proto/optimization_parameters.proto     | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index 8529b48c15..b9e0747fa4 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -28,7 +28,6 @@ message LearningRate {
 // https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer
 // https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L151
 message AdagradParameters {
-  float initial_accumulator = 1;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer
@@ -42,8 +41,6 @@ message FtrlParameters {
   float l1 = 1;
   float l2 = 2;
   float lr_power = 3;
-  float initial_accum = 4;
-  float initial_linear = 5;
 }
 
 // The Adam optimizer does not implement hyper-parameter update; use the dynamic
@@ -70,8 +67,6 @@ message AdamParameters {
   float beta1 = 3;
   float beta2 = 4;
   float epsilon = 5;
-  float initial_m = 6;
-  float initial_v = 7;
   bool use_non_lazy_adam = 8;
   bool use_max_with_epsilon = 9;
 }
@@ -81,7 +76,6 @@ message AdamParameters {
 message MomentumParameters {
   float momentum = 1;
   bool use_nesterov = 2;
-  float initial_accum = 3;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -90,8 +84,6 @@ message RmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
-  float initial_ms = 4;
-  float initial_mom = 5;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -100,9 +92,6 @@ message CenteredRmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
-  float initial_ms = 4;
-  float initial_mom = 5;
-  float initial_mg = 6;
 }
 
 // Variant of algorithm in http://proceedings.mlr.press/v44/shamir15.pdf
@@ -119,9 +108,6 @@ message MdlAdagradLightParameters {
   float mdl_hard_limit = 10;
   bool hard_limit_min_benefit = 11;
   bool mdl_regularize = 12;
-  float initial_accumulator = 13;
-  float initial_weight = 14;
-  float initial_benefit = 15;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -129,8 +115,6 @@ message MdlAdagradLightParameters {
 message AdadeltaParameters {
   float rho = 1;
   float epsilon = 2;
-  float initial_accumulator = 3;
-  float initial_update = 4;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -138,7 +122,6 @@ message AdadeltaParameters {
 message ProximalAdagradParameters {
   float l1 = 1;
   float l2 = 2;
-  float initial_accumulator = 3;
 }
 
 message OptimizationParameters {
-- 
GitLab


From f8ba42b0ab0bb19af0e4a930b95e7e7b3d2f557e Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 2 Oct 2018 18:38:24 -0700
Subject: [PATCH 1039/1357] Disable the cuDNN workarounds if the version number
 is new enough to get the corresponding bugs fixed. The bugs that were
 work-arounded were fixed and verified.

PiperOrigin-RevId: 215497418
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 104 ++++++++++----------
 1 file changed, 54 insertions(+), 50 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index ca90c383f9..df8538a4b8 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2487,30 +2487,32 @@ port::Status CudnnSupport::DoConvolveImpl(
 
   // Report an error if we might be hitting a cuDNN bug that accesses illegal
   // memory. See nvbugs/2138754, b/80018418.
-  SE_RETURN_IF_ERROR([&] {
-    if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
-      return port::Status::OK();
-    }
-    if (input_descriptor.ndims() < 3) {
-      return port::Status::OK();
-    }
-    // Checks that a*b is within the valid range (as provided by NVIDIA).
-    auto check_sizes = [](size_t a, size_t b) {
-      if ((a * b * 4608 - 1) >> 31 == 0) {
+  if (CUDNN_VERSION < 7300) {
+    SE_RETURN_IF_ERROR([&] {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
         return port::Status::OK();
       }
-      return port::Status(
-          port::error::FAILED_PRECONDITION,
-          "This configuration potentially accesses illegal memory.");
-    };
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
-                                   output_descriptor.feature_map_count()));
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
-                                   input_descriptor.feature_map_count()));
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
-                                   output_descriptor.feature_map_count()));
-    return port::Status::OK();
-  }());
+      if (input_descriptor.ndims() < 3) {
+        return port::Status::OK();
+      }
+      // Checks that a*b is within the valid range (as provided by NVIDIA).
+      auto check_sizes = [](size_t a, size_t b) {
+        if ((a * b * 4608 - 1) >> 31 == 0) {
+          return port::Status::OK();
+        }
+        return port::Status(
+            port::error::FAILED_PRECONDITION,
+            "This configuration potentially accesses illegal memory.");
+      };
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
+                                     output_descriptor.feature_map_count()));
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
+                                     input_descriptor.feature_map_count()));
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
+                                     output_descriptor.feature_map_count()));
+      return port::Status::OK();
+    }());
+  }
 
   if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@@ -3166,7 +3168,7 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
 
   // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
   // zero-initialized, nvbugs/2254619.
-  if (CUDNN_VERSION >= 7000 &&
+  if (CUDNN_VERSION >= 7000 && CUDNN_VERSION < 7300 &&
       algorithm_config.algorithm().algo_id() ==
           CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
       cudnn_type == CUDNN_DATA_HALF &&
@@ -3317,31 +3319,33 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
 
   // Report an error if we might be hitting a cuDNN bug that produces incorrect
   // results. See nvbugs/2072856
-  SE_RETURN_IF_ERROR([&] {
-    if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
-      return port::Status::OK();
-    }
-    if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
-      return port::Status::OK();
-    }
-    int convolution_size = output_descriptor.height() > 1
-                               ? filter_descriptor.input_filter_height()
-                               : filter_descriptor.input_filter_width();
-    if (convolution_size <= 32) {
-      return port::Status::OK();
-    }
-    cudnnConvolutionMode_t convolution_mode;
-    cudnnDataType_t compute_type;
-    RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
-        conv.handle(), 0, nullptr, nullptr, nullptr, nullptr, &convolution_mode,
-        &compute_type));
-    if (convolution_mode != CUDNN_CONVOLUTION) {
-      return port::Status::OK();
-    }
-    return port::Status(
-        port::error::FAILED_PRECONDITION,
-        "This configuration potentially produces incorrect results.");
-  }());
+  if (CUDNN_VERSION < 7300) {
+    SE_RETURN_IF_ERROR([&] {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
+        return port::Status::OK();
+      }
+      if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
+        return port::Status::OK();
+      }
+      int convolution_size = output_descriptor.height() > 1
+                                 ? filter_descriptor.input_filter_height()
+                                 : filter_descriptor.input_filter_width();
+      if (convolution_size <= 32) {
+        return port::Status::OK();
+      }
+      cudnnConvolutionMode_t convolution_mode;
+      cudnnDataType_t compute_type;
+      RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
+          conv.handle(), 0, nullptr, nullptr, nullptr, nullptr,
+          &convolution_mode, &compute_type));
+      if (convolution_mode != CUDNN_CONVOLUTION) {
+        return port::Status::OK();
+      }
+      return port::Status(
+          port::error::FAILED_PRECONDITION,
+          "This configuration potentially produces incorrect results.");
+    }());
+  }
 
   if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@@ -3357,8 +3361,8 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
   // This wrong result caused by the bug is very flaky. It needs to be run for
   // up to 20 times to produce a mismatch.
   //
-  // TODO(timshen): add a nvbugs link.
-  if (CUDNN_VERSION >= 7100 &&
+  // See nvbugs/2379553.
+  if (CUDNN_VERSION >= 7100 && CUDNN_VERSION < 7300 &&
       algorithm_config.algorithm().algo_id() ==
           CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 &&
       cudnn_type == CUDNN_DATA_HALF &&
-- 
GitLab


From 8dc7bc7764150253c03a666eee84fc48f867d6a2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 19:13:14 -0700
Subject: [PATCH 1040/1357] In all constant-propagation transformations, check
 that the array we'd be turning into a constant is a discardable array. If
 it's not discardable, it means that the user wants this array to keep
 existing in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760
---
 .../toco/graph_transformations/resolve_constant_binary.cc | 8 ++++++++
 .../resolve_constant_concatenation.cc                     | 7 +++++++
 .../graph_transformations/resolve_constant_fake_quant.cc  | 7 +++++++
 .../toco/graph_transformations/resolve_constant_fill.cc   | 7 +++++++
 .../toco/graph_transformations/resolve_constant_gather.cc | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_pack.cc   | 8 ++++++++
 .../resolve_constant_random_uniform.cc                    | 7 +++++++
 .../toco/graph_transformations/resolve_constant_range.cc  | 8 ++++++++
 .../graph_transformations/resolve_constant_reshape.cc     | 7 +++++++
 .../toco/graph_transformations/resolve_constant_select.cc | 8 ++++++++
 .../resolve_constant_shape_or_rank.cc                     | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_slice.cc  | 8 ++++++++
 .../resolve_constant_strided_slice.cc                     | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_tile.cc   | 7 +++++++
 .../graph_transformations/resolve_constant_transpose.cc   | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_unary.cc  | 8 ++++++++
 16 files changed, 122 insertions(+)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
index f7e5aa6609..3e57d3f467 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
@@ -191,6 +191,14 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
 bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, binary_op->outputs[0])) {
+    return false;
+  }
+
   // Test for binary ops of types that we know how to resolve
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index d916ae0ddf..c6c5035a51 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -144,6 +144,13 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
   const auto* concat_op =
       static_cast<const ConcatenationOperator*>(concat_base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, concat_op->outputs[0])) {
+    return false;
+  }
+
   for (const string& input_name : concat_op->inputs) {
     // We only expect constant unquantized arrays as input, otherwise we return.
     // We  also make sure the shapes of the input arrays are known and they are
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
index f5f2f77460..3d797533c9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
@@ -69,6 +69,13 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   const auto* fakequant_op =
       static_cast<const FakeQuantOperator*>(fakequant_base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, fakequant_op->outputs[0])) {
+    return false;
+  }
+
   // Yield until the fakequant MinMax has been resolved.
   if (!fakequant_op->minmax) {
     return false;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
index f6f95481b5..2cb1e64f3a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
@@ -52,6 +52,13 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
index 36d7dad0ce..4dfe203a25 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
@@ -71,6 +71,14 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
index e86616574d..6f44025dd4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
@@ -59,6 +59,14 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
index 88d06d7dc7..c9f2b95d09 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
@@ -70,6 +70,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
index 1a0ba9e2bc..e347286dd4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
@@ -28,6 +28,14 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   auto* op = static_cast<RangeOperator*>(base_op);
 
   CHECK_EQ(op->inputs.size(), 3);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   const auto& start_array = model->GetArray(op->inputs[0]);
   if (!start_array.has_shape()) {
     // Yield until all input dims have been resolved.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
index a6f665b5f0..bfdaa8aafd 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
@@ -33,6 +33,13 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
index e880a3f44d..3a95d39cd4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
@@ -37,6 +37,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 3);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
index 8a0e3e8995..452bef1f16 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
@@ -27,6 +27,14 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
   }
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been resolved
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
index b35c3e19c4..58d6797e1c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
@@ -96,6 +96,14 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   const SliceOperator* op = static_cast<const SliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
index 8853ed87e6..e275447a0c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
@@ -114,6 +114,14 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
       static_cast<const StridedSliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
index 5cfa1a5582..378a38f14b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
@@ -105,6 +105,13 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
   }
   const auto* op = static_cast<const TensorFlowTileOperator*>(base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
index fe15dfa06f..5d3f4a6240 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
@@ -111,6 +111,14 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index c698a9567a..e35ed0898b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -48,6 +48,14 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
 bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, unary_op->outputs[0])) {
+    return false;
+  }
+
   // Test for unary ops of types that we know how to resolve.
   switch (unary_op->type) {
     case OperatorType::kCast:
-- 
GitLab


From fa61b939bec50d731b86f40c79054503d629e29b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 19:28:27 -0700
Subject: [PATCH 1041/1357] [XLA] Merge the single instruction parsing and the
 full module parsing in one function.

PiperOrigin-RevId: 215501702
---
 tensorflow/compiler/xla/service/hlo_parser.cc | 66 ++++++++++---------
 tensorflow/compiler/xla/service/hlo_parser.h  |  6 --
 .../compiler/xla/service/hlo_parser_test.cc   | 22 +++----
 3 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 5a125b4c08..0440f1b54f 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -68,7 +68,7 @@ class HloParser {
 
   // Runs the parser and constructs the resulting HLO in the given (empty)
   // HloModule. Returns false if an error occurred.
-  bool Run(HloModule* module);
+  Status Run(HloModule* module);
 
   // Returns the error information.
   string GetError() const { return StrJoin(error_, "\n"); }
@@ -79,9 +79,6 @@ class HloParser {
   StatusOr<ConvolutionDimensionNumbers> ParseConvolutionDimensionNumbersOnly();
   StatusOr<PaddingConfig> ParsePaddingConfigOnly();
 
-  // Stand-alone parsing utility for a single instruction worth of text.
-  Status ParseSingleInstruction(HloModule* module);
-
  private:
   using InstrNameTable =
       std::unordered_map<string, std::pair<HloInstruction*, LocTy>>;
@@ -100,8 +97,12 @@ class HloParser {
   std::pair<HloInstruction*, LocTy>* FindInstruction(
       const string& name, const optional<Shape>& shape = nullopt);
 
+  // Parse a single instruction worth of text.
+  bool ParseSingleInstruction(HloModule* module);
+
   // ParseXXX returns false if an error occurred.
   bool ParseHloModule(HloModule* module);
+
   bool ParseComputations(HloModule* module);
   bool ParseComputation(HloComputation** entry_computation);
   bool ParseInstructionList(HloComputation** computation,
@@ -376,9 +377,25 @@ bool HloParser::TokenError(absl::string_view msg) {
   return Error(lexer_.GetLoc(), msg);
 }
 
-bool HloParser::Run(HloModule* module) {
+Status HloParser::Run(HloModule* module) {
   lexer_.Lex();
-  return ParseHloModule(module);
+  if (lexer_.GetKind() == TokKind::kw_HloModule) {
+    // This means that the text contains a full HLO module.
+    if (!ParseHloModule(module)) {
+      return InvalidArgument(
+          "Syntax error when trying to parse the text as a HloModule:\n%s",
+          GetError());
+    }
+    return Status::OK();
+  }
+  // This means that the text is a single HLO instruction.
+  if (!ParseSingleInstruction(module)) {
+    return InvalidArgument(
+        "Syntax error when trying to parse the text as single "
+        "HloInstruction:\n%s",
+        GetError());
+  }
+  return Status::OK();
 }
 
 std::pair<HloInstruction*, HloParser::LocTy>* HloParser::FindInstruction(
@@ -3279,9 +3296,11 @@ StatusOr<PaddingConfig> HloParser::ParsePaddingConfigOnly() {
   return padding_config;
 }
 
-Status HloParser::ParseSingleInstruction(HloModule* module) {
-  TF_RET_CHECK(create_missing_instruction_ == nullptr);
-  TF_RET_CHECK(scoped_name_tables_.empty());
+bool HloParser::ParseSingleInstruction(HloModule* module) {
+  if (create_missing_instruction_ != nullptr || !scoped_name_tables_.empty()) {
+    LOG(FATAL) << "Parser state is not clean. Please do not call any other "
+                  "methods before calling ParseSingleInstruction.";
+  }
   HloComputation::Builder builder(module->name());
 
   // The missing instruction hook we register creates the shaped instruction on
@@ -3298,9 +3317,6 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
     return tensorflow::gtl::FindOrNull(current_name_table(), new_name);
   };
 
-  // Prime the lexer.
-  lexer_.Lex();
-
   // Parse the instruction with the registered hook.
   Scope scope(&scoped_name_tables_);
   if (CanBeShape()) {
@@ -3309,7 +3325,7 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
     //
     //  f32[10] fusion(...), calls={...}
     if (!ParseInstruciontRhs(&builder, module->name(), lexer_.GetLoc())) {
-      return InvalidArgument("Syntax error:\n%s", GetError());
+      return false;
     }
   } else {
     // This means that the instruction's left-hand side might exist, e.g.
@@ -3317,7 +3333,7 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
     //  foo = f32[10] fusion(...), calls={...}
     string root_name;
     if (!ParseInstruction(&builder, &root_name)) {
-      return InvalidArgument("Syntax error:\n%s", GetError());
+      return false;
     }
   }
 
@@ -3325,7 +3341,7 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
   for (auto& comp : computations_) {
     module->AddEmbeddedComputation(std::move(comp));
   }
-  return Status::OK();
+  return true;
 }
 
 }  // namespace
@@ -3334,38 +3350,24 @@ StatusOr<std::unique_ptr<HloModule>> ParseHloString(
     absl::string_view str, const HloModuleConfig& config) {
   auto module = absl::make_unique<HloModule>(/*name=*/"", config);
   HloParser parser(str);
-  if (!parser.Run(module.get())) {
-    return InvalidArgument("Syntax error:\n%s", parser.GetError());
-  }
+  TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
 }
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str) {
   auto module = absl::make_unique<HloModule>(/*name=*/"", HloModuleConfig());
   HloParser parser(str);
-  if (!parser.Run(module.get())) {
-    return InvalidArgument("Syntax error:\n%s", parser.GetError());
-  }
+  TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
 }
 
 Status ParseHloString(absl::string_view str, HloModule* module) {
   TF_RET_CHECK(module->computation_count() == 0);
   HloParser parser(str);
-  if (!parser.Run(module)) {
-    return InvalidArgument("Syntax error:\n%s", parser.GetError());
-  }
+  TF_RETURN_IF_ERROR(parser.Run(module));
   return Status::OK();
 }
 
-StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
-    absl::string_view str, absl::string_view name) {
-  HloParser parser(str);
-  auto module = absl::make_unique<HloModule>(string(name), HloModuleConfig());
-  TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(module.get()));
-  return std::move(module);
-}
-
 StatusOr<HloSharding> ParseSharding(absl::string_view str) {
   HloParser parser(str);
   return parser.ParseShardingOnly();
diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h
index 97d6f0117e..81eeb9f13b 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.h
+++ b/tensorflow/compiler/xla/service/hlo_parser.h
@@ -40,12 +40,6 @@ StatusOr<std::unique_ptr<HloModule>> ParseHloString(
 // point to an empty module (no computations).
 Status ParseHloString(absl::string_view str, HloModule* module);
 
-// Parses the text for a single HLO instruction into an HLO module with an
-// entry computation that runs that instruction (with the same parameters) as
-// its root instruction.
-StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
-    absl::string_view str, absl::string_view name = "single_op");
-
 // Given a string in the HloModule::ToString() format, parses the string and
 // creates a HloModule with default config.
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str);
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index d10acf3814..b618510640 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1835,7 +1835,7 @@ TEST(HloParserSingleOpTest, SingleOp) {
   const string text =
       "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, "
       "f32[2,4]{1,0} %x)";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1844,7 +1844,7 @@ TEST(HloParserSingleOpTest, SingleOp) {
 
 TEST(HloParserSingleOpTest, SingleOpNoShapeProducesError) {
   const string text = "multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x)";
-  StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
+  StatusOr<std::unique_ptr<HloModule>> module = ParseHloString(text);
   ASSERT_TRUE(!module.status().ok());
   LOG(INFO) << "Status: " << module.status();
   EXPECT_THAT(module.status().ToString(),
@@ -1853,7 +1853,7 @@ TEST(HloParserSingleOpTest, SingleOpNoShapeProducesError) {
 
 TEST(HloParserSingleOpTest, SingleOpNoOperandShapesProducesError) {
   const string text = "%multiply = f32[2,4]{1,0} multiply(%broadcast, %x)";
-  StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
+  StatusOr<std::unique_ptr<HloModule>> module = ParseHloString(text);
   ASSERT_TRUE(!module.status().ok());
   LOG(INFO) << "Status: " << module.status();
   EXPECT_THAT(module.status().ToString(),
@@ -1863,7 +1863,7 @@ TEST(HloParserSingleOpTest, SingleOpNoOperandShapesProducesError) {
 TEST(HloParserSingleOpTest, SingleOpNoNames) {
   const string text =
       "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1872,7 +1872,7 @@ TEST(HloParserSingleOpTest, SingleOpNoNames) {
 
 TEST(HloParserSingleOpTest, CanonicalOp) {
   const string text = "f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1908,7 +1908,7 @@ TEST(HloParserSingleOpTest, CanonicalOpWithNested) {
   }
 })";
 
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_EQ(
@@ -1926,7 +1926,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested) {
   ROOT %subtract = f32[3,2,1,1]{3,2,1,0} subtract(f32[3,2,1,1]{3,2,1,0} %param_0, f32[3,2,1,1]{3,2,1,0} %broadcast)
 })";
 
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1939,7 +1939,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_DoesNotExist) {
 {
   result = f32[] add(f32[] x, f32[] y)
 })";
-  auto status = ParseHloOpToModule(text).status();
+  auto status = ParseHloString(text).status();
   ASSERT_FALSE(status.ok());
   EXPECT_THAT(status.error_message(),
               ::testing::HasSubstr("does not exist: x"));
@@ -1951,7 +1951,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_NoLhs) {
 {
   f32[] add(f32[] x, f32[] y)
 })";
-  auto status = ParseHloOpToModule(text).status();
+  auto status = ParseHloString(text).status();
   ASSERT_FALSE(status.ok());
   EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
 }
@@ -1962,7 +1962,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_NoOperandName) {
 {
   result = f32[] add(f32[], f32[])
 })";
-  auto status = ParseHloOpToModule(text).status();
+  auto status = ParseHloString(text).status();
   ASSERT_FALSE(status.ok());
   EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
 }
@@ -1970,7 +1970,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_NoOperandName) {
 TEST(HloParserSingleOpTest, ConvolutionTrivialFeatureGroupCount) {
   const string text =
       R"(%convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f)";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
-- 
GitLab


From 4b2d0180ba8c903f098f52eb9a12d26a7626dd34 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 19:28:31 -0700
Subject: [PATCH 1042/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 215501709
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 46 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  8 ++++
 2 files changed, 54 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index e46cbc863d..4845767405 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -27069,6 +27069,52 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "If"
+  input_arg {
+    name: "cond"
+    type_attr: "Tcond"
+  }
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tcond"
+    type: "type"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "then_branch"
+    type: "func"
+  }
+  attr {
+    name: "else_branch"
+    type: "func"
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "Igamma"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 0e9f939ab4..229022b64c 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -13176,6 +13176,14 @@ op {
     name: "else_branch"
     type: "func"
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
   is_stateful: true
 }
 op {
-- 
GitLab


From 2597b883a14749c77fffd7e5f9677107021ff40a Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 2 Oct 2018 20:00:36 -0700
Subject: [PATCH 1043/1357] Automated rollback of commit
 b7e9cbab27c893283acc4a6154d7a59dffb23758

PiperOrigin-RevId: 215503549
---
 .../contrib/distribute/python/input_ops.py    |  2 +-
 tensorflow/python/data/ops/dataset_ops.py     | 60 +++++++++++--------
 tensorflow/python/eager/function.py           | 14 -----
 tensorflow/python/eager/function_test.py      |  9 +--
 4 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/input_ops.py b/tensorflow/contrib/distribute/python/input_ops.py
index 423952c9e2..f07ec8234d 100644
--- a/tensorflow/contrib/distribute/python/input_ops.py
+++ b/tensorflow/contrib/distribute/python/input_ops.py
@@ -78,7 +78,7 @@ def auto_shard_dataset(dataset, num_shards, index):
       elif hasattr(dataset, "_map_func"):
         # TODO(priyag): Make this check more robust by enforcing some common
         # property on all map/flatmap/interleave datasets.
-        map_func_def = dataset._map_func.function_def
+        map_func_def = dataset._map_func.definition
         for node in map_func_def.node_def:
           if node.op in _READER_DATASET_OPS:
             found_reader_op = True
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d90da5908d..46ce191f7b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -30,7 +30,6 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import random_seed
 from tensorflow.python.data.util import sparse
 from tensorflow.python.eager import context
-from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
@@ -38,7 +37,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -1715,8 +1713,7 @@ class _VariantDataset(Dataset):
 
 
 class StructuredFunctionWrapper(object):
-  """A wrapper for `defun` that supports structured arguments and return values.
-
+  """A wrapper for `Defun` that supports structured arguments and return values.
   """
 
   def __init__(self, func, transformation_name, dataset=None,
@@ -1768,7 +1765,7 @@ class StructuredFunctionWrapper(object):
     # TODO(b/110122868): Enable this support for all `tf.data` functions.
     self._nested_dataset_support = experimental_nested_dataset_support
 
-    @eager_function.defun(input_signature=self._defun_args())
+    @function.Defun(*self._defun_args())
     def tf_data_structured_function_wrapper(*args):
       """Wrapper for passing nested structures to and from tf.data functions."""
       flat_args = []
@@ -1853,43 +1850,36 @@ class StructuredFunctionWrapper(object):
       self._output_shapes = nest.pack_sequence_as(ret, flat_shapes)
       self._output_types = nest.pack_sequence_as(ret, flat_types)
 
-      return flat_ret
+      _warn_if_collections(transformation_name)
 
-    table_initializers_len = len(ops.get_default_graph().get_collection(
-        ops.GraphKeys.TABLE_INITIALIZERS))
+      return flat_ret
 
-    self._function = tf_data_structured_function_wrapper.get_concrete_function()
+    self._function = tf_data_structured_function_wrapper
     if add_to_graph:
       self._function.add_to_graph(ops.get_default_graph())
-    if len(
-        self._function.graph.get_collection(
-            ops.GraphKeys.TABLE_INITIALIZERS)) != table_initializers_len:
-      warnings.warn(
-          "Creating lookup tables inside a function passed to %s is not"
-          " supported. Create each table outside the function, and "
-          "capture it inside the function to use it." % transformation_name)
+    else:
+      # Use the private method that will execute
+      # `tf_data_structured_function_wrapper` but delay adding it to the graph
+      # in case (e.g.) we need to rerun the function.
+      self._function._create_definition_if_needed()  # pylint: disable=protected-access
 
   def _defun_args(self):
-    """Returns a list of `tf.TensorSpec` for the input element structure."""
+    """Returns a flat list of `tf.DType` for the input element structure."""
     ret = []
-    for input_type, input_shape, input_class in zip(
-        nest.flatten(self._input_types), nest.flatten(self._input_shapes),
-        nest.flatten(self._input_classes)):
+    for input_type, input_class in zip(nest.flatten(self._input_types),
+                                       nest.flatten(self._input_classes)):
       # TODO(b/110122868): Add a registration mechanism for new component types.
       if input_class is sparse_tensor_lib.SparseTensor:
-        ret.append(
-            tensor_spec.TensorSpec(
-                tensor_shape.TensorShape(None), dtypes.variant))
+        ret.append(dtypes.variant)
       elif isinstance(input_class, _NestedDatasetComponent):
         if not self._nested_dataset_support:
           raise NotImplementedError(
               "The %s transformation does not currently support nested "
               "datasets as inputs." % self._transformation_name)
-        ret.append(
-            tensor_spec.TensorSpec(tensor_shape.scalar(), dtypes.variant))
+        ret.append(dtypes.variant)
       else:
         assert isinstance(input_type, dtypes.DType)
-        ret.append(tensor_spec.TensorSpec(input_shape, input_type))
+        ret.append(input_type)
     return ret
 
   @property
@@ -2589,6 +2579,24 @@ def _should_unpack_args(args):
   return type(args) is tuple  # pylint: disable=unidiomatic-typecheck
 
 
+def _warn_if_collections(transformation_name):
+  """Prints warning message if the current graph uses common graph collections.
+
+  NOTE(mrry): Currently a warning is only generated for lookup tables. Any
+  variables created will be automatically hoisted out to the outermost scope
+  using `init_scope()`. Some collections (such as for control-flow contexts)
+  are benign and should not generate a warning.
+
+  Args:
+    transformation_name: A human-readable name for the transformation.
+  """
+  if ops.get_default_graph().get_collection(ops.GraphKeys.TABLE_INITIALIZERS):
+    warnings.warn("Creating lookup tables inside a function passed to %s is not"
+                  " supported. Create each table outside the function, and "
+                  "capture it inside the function to use it."
+                  % transformation_name)
+
+
 class MapDataset(UnaryDataset):
   """A `Dataset` that maps a function over elements in its input."""
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index aeb1cac3e9..f261d92d64 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -662,11 +662,6 @@ class Function(object):
     outputs = self._inference_function.call(ctx, args)
     return self._build_call_outputs(outputs)
 
-  @property
-  def name(self):
-    """Function name."""
-    return self._inference_function.name
-
   @property
   def graph(self):
     """Returns the graph from which this function was constructed."""
@@ -724,10 +719,6 @@ class Function(object):
     return nest.map_structure(lambda x: x.dtype if x is not None else None,
                               self._func_graph.structured_outputs)
 
-  def add_to_graph(self, g):
-    """Adds this function into the graph g."""
-    return self._inference_function.add_to_graph(g)
-
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
@@ -1131,8 +1122,6 @@ class PolymorphicFunction(object):
       *args: inputs to specialize on.
       **kwargs: inputs to specialize on.
     """
-    if self._input_signature:
-      args, kwargs = None, None
     graph_function, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
@@ -1315,9 +1304,6 @@ def register(func, *args, **kwargs):
   function definition into graph. Register function with different input param
   will result into multiple version of functions registered in graph.
 
-  Also, `args` and `kwargs` are ignored if this `PolymorphicFunction` was
-  created with an `input_signature`.
-
   Args:
     func: the PolymorphicFunction instance that generated by a @defun
     *args: input arguments for the Python function.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index ac45606eb0..9ce367a837 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1750,10 +1750,11 @@ class FunctionTest(test.TestCase):
         # pylint: disable=protected-access
         self.assertEqual(len(graph._functions), 3)
 
-        # Test register function with cache, note inputs are ignored.
-        function.register(defun_matmul)
-        graph = ops.get_default_graph()
-        self.assertEqual(len(graph._functions), 3)
+        # Test input param shape mismatch
+        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        with self.assertRaisesRegexp(
+            ValueError, 'Python inputs incompatible with input_signature'):
+          function.register(defun_matmul, t2, t2)
 
   def testRegisterFunctionWithCache(self):
     def matmul(x, y):
-- 
GitLab


From 9f42ebd5982688511ecc0ef7d23de02b64d8dd1e Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Tue, 2 Oct 2018 20:04:31 -0700
Subject: [PATCH 1044/1357] Improve error messages and doc strings for
 eager-mode tf.keras.Model.fit() + tf.data objects

- Previously, when validation_steps was missing, the error message incorrectly says "please provide either batch_size or steps_per_epoch". Now it reads "please provide either batch_size or validation_steps".
- Some whitespace-related fixes.

PiperOrigin-RevId: 215503991
---
 tensorflow/python/keras/engine/training.py    |  9 ++++--
 .../python/keras/engine/training_eager.py     |  3 +-
 .../keras/engine/training_eager_test.py       | 30 +++++++++++++++++++
 .../python/keras/engine/training_utils.py     | 15 +++++++---
 4 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index c842b8192e..85233de9b1 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -1419,6 +1419,8 @@ class Model(Network):
               - tuple `(x_val, y_val)` of Numpy arrays or tensors
               - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
               - dataset or a dataset iterator
+            For the first two cases, `batch_size` must be provided.
+            For the last case, `validation_steps` must be provided.
         shuffle: Boolean (whether to shuffle the training data
             before each epoch) or str (for 'batch').
             'batch' is a special option for dealing with the
@@ -1454,9 +1456,10 @@ class Model(Network):
             TensorFlow data tensors, the default `None` is equal to
             the number of samples in your dataset divided by
             the batch size, or 1 if that cannot be determined.
-        validation_steps: Only relevant if `steps_per_epoch`
-            is specified. Total number of steps (batches of samples)
-            to validate before stopping.
+        validation_steps: Only relevant if `validation_data` is provided and
+            is a dataset or dataset iterator. Total number of steps (batches of
+            samples) to draw before stopping when performing validation
+            at the end of every epoch.
         max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
             input only. Maximum size for the generator queue.
             If unspecified, `max_queue_size` will default to 10.
diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py
index fb71bf2596..2a62edd698 100644
--- a/tensorflow/python/keras/engine/training_eager.py
+++ b/tensorflow/python/keras/engine/training_eager.py
@@ -739,7 +739,8 @@ def test_loop(model, inputs, targets,
       y=targets,
       sample_weights=sample_weights,
       batch_size=batch_size,
-      steps_per_epoch=steps)
+      steps_per_epoch=steps,
+      is_validation=True)
   with backend.learning_phase_scope(0):
     return iterator_test_loop(model, inputs, steps, verbose=verbose)
 
diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py
index 1f5176c4d7..943ede1be9 100644
--- a/tensorflow/python/keras/engine/training_eager_test.py
+++ b/tensorflow/python/keras/engine/training_eager_test.py
@@ -125,6 +125,36 @@ class TrainingTest(test.TestCase):
     model.train_on_batch(inputs, targets)
     model.test_on_batch(inputs, targets)
 
+  def test_model_fit_and_validation_with_missing_arg_errors(self):
+    x = keras.layers.Input(shape=(3,), name='input')
+    y = keras.layers.Dense(4, name='dense')(x)
+    model = keras.Model(x, y)
+    model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse')
+
+    x = keras.backend.zeros(shape=(10, 3))
+    y = keras.backend.zeros(shape=(10, 4))
+    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat(10).batch(5)
+    iterator = dataset.make_one_shot_iterator()
+    validation_dataset = dataset_ops.Dataset.from_tensor_slices(
+        (x, y)).repeat(10).batch(5)
+    validation_iterator = validation_dataset.make_one_shot_iterator()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'specify .* `steps_per_epoch`'):
+      model.fit(iterator, epochs=1, verbose=0)
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=(x, y))
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=validation_dataset)
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=validation_iterator)
+
   def test_generator_methods(self):
     model = keras.Sequential()
     model.add(keras.layers.Dense(4, input_shape=(3,)))
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 9c303f4bed..dd2a7f16ec 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -106,7 +106,8 @@ def convert_to_iterator(x=None,
                         batch_size=None,
                         steps_per_epoch=None,
                         epochs=1,
-                        shuffle=False):
+                        shuffle=False,
+                        is_validation=False):
   """Converts NumPy arrays or EagerTensors to an EagerIterator.
 
   Combines all provided data into a single EagerIterator.
@@ -124,6 +125,9 @@ def convert_to_iterator(x=None,
         epoch.
       epochs: Epochs to repeat iterator for.
       shuffle: Whether to shuffle data after each epoch.
+      is_validation: Whether this call is for validation during a training
+        (e.g., `fit()`) call. This info is used to construct error messages
+        (if any).
 
   Raises:
       ValueError: if steps_per_epoch cannot be calculated from the data
@@ -151,9 +155,12 @@ def convert_to_iterator(x=None,
     steps_per_epoch = int(math.ceil(num_samples / batch_size))
 
   if steps_per_epoch is None:
-    raise ValueError('Could not determine steps_per_epoch.'
-                     'Please provide either batch_size or'
-                     'steps_per_epoch.')
+    alternative_arg_name = (
+        'validation_steps' if is_validation else 'steps_per_epoch')
+    raise ValueError(
+        'Could not determine how to convert EagerTensors into EagerIterator. '
+        'Please provide either `batch_size` or '
+        '`%s`.' % alternative_arg_name)
 
   # TODO(omalleyt) for NumPy arrays in graph mode
   # placeholder ops should be used
-- 
GitLab


From 65b5190065db0074f8722b09ba43423438c40258 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 2 Oct 2018 21:49:20 -0700
Subject: [PATCH 1045/1357] Further loosen bounds for depthwise_conv_op_test.

PiperOrigin-RevId: 215512168
---
 tensorflow/python/kernel_tests/depthwise_conv_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 9c02b69180..6aee2eb0a3 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -131,7 +131,7 @@ class DepthwiseConv2DTest(test.TestCase):
     with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-2,
-          dtypes.float32: 1e-7,
+          dtypes.float32: 1e-6,
           dtypes.float64: 1e-12,
       }[data_type]
 
-- 
GitLab


From bbe15eee6779941c54e145d12e16f6473738857c Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Tue, 2 Oct 2018 22:39:09 -0700
Subject: [PATCH 1046/1357] [XLA] Modify the function that determines whether
 an instruction can change layout so that it can be used by the HLO verifier.

Change the function to a static member function of the LayoutAssignment class.

Add an std::function member to LayoutAssignment to store the function object
passed down from the backend compiler class and use it to decide whether an
instruction can change layouts.

Fix affected test cases.

PiperOrigin-RevId: 215515611
---
 .../compiler/xla/service/cpu/cpu_compiler.cc   |  3 ++-
 .../xla/service/cpu/cpu_layout_assignment.h    |  5 ++++-
 .../service/cpu/cpu_layout_assignment_test.cc  | 10 ++++++----
 .../xla/service/gpu/gpu_layout_assignment.h    |  5 ++++-
 .../service/gpu/gpu_layout_assignment_test.cc  | 17 +++++++++++------
 .../compiler/xla/service/gpu/nvptx_compiler.cc |  3 ++-
 .../xla/service/interpreter/compiler.cc        |  3 ++-
 .../compiler/xla/service/layout_assignment.cc  | 18 ++++++++++++------
 .../compiler/xla/service/layout_assignment.h   | 18 ++++++++++++++----
 .../xla/service/layout_assignment_test.cc      |  3 ++-
 10 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 18fc144efe..ea8c200dee 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -308,7 +308,8 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
       ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
 
   pipeline.AddPass<CpuLayoutAssignment>(
-      module->mutable_entry_computation_layout(), target_machine_features);
+      module->mutable_entry_computation_layout(),
+      LayoutAssignment::InstructionCanChangeLayout, target_machine_features);
   return pipeline.Run(module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
index 3c4fe68b83..f4da35dd37 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
@@ -30,8 +30,11 @@ class CpuLayoutAssignment : public LayoutAssignment {
  public:
   explicit CpuLayoutAssignment(
       ComputationLayout* entry_computation_layout,
+      std::function<bool(const HloInstruction*)>
+          instruction_can_change_layout_func,
       const TargetMachineFeatures* target_machine_features)
-      : LayoutAssignment(entry_computation_layout),
+      : LayoutAssignment(entry_computation_layout,
+                         std::move(instruction_can_change_layout_func)),
         target_machine_features_(*target_machine_features) {}
   ~CpuLayoutAssignment() override {}
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
index 4668f3872d..97659b88a7 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
@@ -54,8 +54,9 @@ class CpuLayoutAssignmentTest : public HloTestBase {
         [](int64 shape_size) {
           return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment;
         });
-    cpu::CpuLayoutAssignment layout_assignment(entry_computation_layout,
-                                               &target_machine_features);
+    cpu::CpuLayoutAssignment layout_assignment(
+        entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+        &target_machine_features);
     EXPECT_IS_OK(layout_assignment.Run(module).status());
   }
 };
@@ -321,8 +322,9 @@ static StatusOr<DotOutputFusionLayoutAssignmentResult> RunDotOutputFusion(
       [](int64 shape_size) {
         return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment;
       });
-  cpu::CpuLayoutAssignment layout_assignment(&computation_layout,
-                                             &target_machine_features);
+  cpu::CpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      &target_machine_features);
   TF_ASSIGN_OR_RETURN(result.layout_assignment_changed_something,
                       layout_assignment.Run(module));
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index e2b96a81d4..4ba7989e9c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -30,8 +30,11 @@ namespace gpu {
 class GpuLayoutAssignment : public LayoutAssignment {
  public:
   explicit GpuLayoutAssignment(ComputationLayout* entry_computation_layout,
+                               std::function<bool(const HloInstruction*)>
+                                   instruction_can_change_layout_func,
                                se::StreamExecutor* stream_executor)
-      : LayoutAssignment(entry_computation_layout),
+      : LayoutAssignment(entry_computation_layout,
+                         std::move(instruction_can_change_layout_func)),
         stream_executor_(stream_executor) {}
   ~GpuLayoutAssignment() override {}
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
index fbc8ddf599..04681cfcec 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@@ -75,7 +75,8 @@ TEST_F(LayoutAssignmentTest, Elementwise) {
             ShapeLayout(result_shape_with_layout);
 
         GpuLayoutAssignment layout_assignment(
-            &computation_layout, backend().default_stream_executor());
+            &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+            backend().default_stream_executor());
         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
         for (const HloInstruction* operand : add->operands()) {
@@ -163,7 +164,8 @@ TEST_F(LayoutAssignmentTest, BatchNormInference) {
       }
 
       GpuLayoutAssignment layout_assignment(
-          &computation_layout, backend().default_stream_executor());
+          &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+          backend().default_stream_executor());
       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
       // The first operand to batchnorm should have the same layout as the
@@ -233,7 +235,8 @@ TEST_F(LayoutAssignmentTest, BatchNormTraining) {
       }
 
       GpuLayoutAssignment layout_assignment(
-          &computation_layout, backend().default_stream_executor());
+          &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+          backend().default_stream_executor());
       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
       // The first operand to batchnorm should have the same layout as the
@@ -314,7 +317,8 @@ TEST_F(LayoutAssignmentTest, BatchNormGrad) {
         }
 
         GpuLayoutAssignment layout_assignment(
-            &computation_layout, backend().default_stream_executor());
+            &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+            backend().default_stream_executor());
         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
         // The first and fourth operands to the batchnorm call should have the
@@ -348,8 +352,9 @@ TEST_F(LayoutAssignmentTest, DotLayout) {
 
   ComputationLayout computation_layout(
       module->entry_computation()->ComputeProgramShape());
-  GpuLayoutAssignment layout_assignment(&computation_layout,
-                                        backend().default_stream_executor());
+  GpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      backend().default_stream_executor());
   EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
   Shape expected_shape =
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 0b3b429710..b4ae2e42c7 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -232,7 +232,8 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     // a layout-sensitive verifier!
     HloPassPipeline pipeline("layout assignment");
     pipeline.AddPass<GpuLayoutAssignment>(
-        hlo_module->mutable_entry_computation_layout(), stream_exec);
+        hlo_module->mutable_entry_computation_layout(),
+        LayoutAssignment::InstructionCanChangeLayout, stream_exec);
     TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status());
   }
 
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index bb69cb9c47..27fe89375d 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -44,7 +44,8 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) {
   HloPassPipeline pipeline("Interpreter");
 
   pipeline.AddPass<LayoutAssignment>(
-      hlo_module->mutable_entry_computation_layout());
+      hlo_module->mutable_entry_computation_layout(),
+      LayoutAssignment::InstructionCanChangeLayout);
   return pipeline.Run(hlo_module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 25d5327561..68a08a0886 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -974,10 +974,15 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) {
 
 LayoutAssignment::LayoutAssignment(
     ComputationLayout* entry_computation_layout,
+    std::function<bool(const HloInstruction*)>
+        instruction_can_change_layout_func,
     ChannelLayoutConstraints* channel_constraints)
     : entry_computation_layout_(entry_computation_layout),
+
       saved_entry_computation_layout_(*entry_computation_layout),
-      channel_layout_constraints_(channel_constraints) {
+      channel_layout_constraints_(channel_constraints),
+      instruction_can_change_layout_func_(
+          std::move(instruction_can_change_layout_func)) {
   if (channel_layout_constraints_ != nullptr) {
     // Save a copy of the input ChannelLayoutConstraints so that we can reset it
     // if we have to undo previous operations (ClearPreviousPassSideEffects()).
@@ -998,7 +1003,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
   if (!ShapeUtil::IsScalar(operand->shape()) &&
       ShapeUtil::Rank(operand->shape()) ==
           ShapeUtil::Rank(instruction->shape()) &&
-      InstructionRequiresInputLayoutEqualToOutputLayout(instruction)) {
+      !instruction_can_change_layout_func_(instruction)) {
     // Propagate the result layout to the operand layout if the instruction
     // requires the same layout out for the result and the operand.
     //
@@ -1076,7 +1081,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOutputLayoutFromOperandLayout(
 
   if (!ShapeUtil::IsScalar(operand->shape()) &&
       ShapeUtil::Rank(operand->shape()) == ShapeUtil::Rank(user->shape()) &&
-      InstructionRequiresInputLayoutEqualToOutputLayout(user)) {
+      !instruction_can_change_layout_func_(user)) {
     // Assign users the same layout as the operand.
     return absl::make_unique<Layout>(operand_layout);
   }
@@ -1842,7 +1847,8 @@ StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
   return true;
 }
 
-bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
+/* static */
+bool LayoutAssignment::InstructionCanChangeLayout(
     const HloInstruction* instruction) {
   switch (instruction->opcode()) {
     case HloOpcode::kAbs:
@@ -1908,7 +1914,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
     case HloOpcode::kTanh:
     case HloOpcode::kTupleSelect:
     case HloOpcode::kWhile:
-      return true;
+      return false;
     case HloOpcode::kBatchNormGrad:
     case HloOpcode::kBatchNormInference:
     case HloOpcode::kBatchNormTraining:
@@ -1939,7 +1945,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
     case HloOpcode::kTrace:
     case HloOpcode::kTranspose:
     case HloOpcode::kTuple:
-      return false;
+      return true;
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 15f0adcaaf..2d48e12263 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -286,6 +286,11 @@ class LayoutAssignment : public HloModulePass {
   // entry_computation_layout is modified to populate a layout for the result in
   // the case that no particular layout is requested.
   //
+  // instruction_can_change_layout_func is a function object that determines
+  // whether an instruction can change layouts. An instruction not being able to
+  // change layout means that it requires operands with the same rank as the
+  // output to have the same layout as the output.
+  //
   // channel_constraints is both an input and output. Any sends or recvs that
   // are present in channel_constraints will be laid out as constrained. Any
   // unconstrained sends or recvs will be laid out as locally optimal and their
@@ -295,6 +300,8 @@ class LayoutAssignment : public HloModulePass {
   // within any module passed to `Run`.
   explicit LayoutAssignment(
       ComputationLayout* entry_computation_layout,
+      std::function<bool(const HloInstruction*)>
+          instruction_can_change_layout_func = InstructionCanChangeLayout,
       ChannelLayoutConstraints* channel_constraints = nullptr);
   ~LayoutAssignment() override {}
   absl::string_view name() const override { return "layout-assignment"; }
@@ -303,10 +310,10 @@ class LayoutAssignment : public HloModulePass {
   // (any layouts were changed).
   StatusOr<bool> Run(HloModule* module) override;
 
-  // Returns true if the instruction requires that operands with the same rank
-  // as the output have to have the same layout as the output.
-  virtual bool InstructionRequiresInputLayoutEqualToOutputLayout(
-      const HloInstruction* instruction);
+  // Determines whether an instruction can change layouts. An instruction not
+  // being able to change layout means that it requires operands with the same
+  // rank as the output to have the same layout as the output.
+  static bool InstructionCanChangeLayout(const HloInstruction* instruction);
 
  protected:
   // These methods, invoked by PropagateConstraints, propagate a layout
@@ -522,6 +529,9 @@ class LayoutAssignment : public HloModulePass {
   // The set of HLO instructions which lacked any layout constraint, thus
   // receiving propagated default layouts.
   absl::flat_hash_set<const HloInstruction*> unconstrained_layout_instructions_;
+
+  std::function<bool(const HloInstruction*)>
+      instruction_can_change_layout_func_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 10f9a95121..15c16d667c 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -55,7 +55,8 @@ class LayoutAssignmentTest : public HloVerifiedTestBase {
                      ComputationLayout* entry_computation_layout,
                      ChannelLayoutConstraints* channel_constraints = nullptr) {
     LayoutAssignment layout_assignment(
-        entry_computation_layout, /*channel_constraints=*/channel_constraints);
+        entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+        /*channel_constraints=*/channel_constraints);
     EXPECT_IS_OK(layout_assignment.Run(module).status());
   }
 
-- 
GitLab


From b790ac196148b7547bb4da7091973e8f0ae58803 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 23:10:30 -0700
Subject: [PATCH 1047/1357] [XLA:CPU] Re-enable the inliner pass in the cpu
 compiler.

PiperOrigin-RevId: 215517752
---
 tensorflow/compiler/xla/service/cpu/cpu_compiler.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index ea8c200dee..afc94f2185 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -249,9 +249,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
       &pipeline, module->config().debug_options(),
       ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
 
-  // TODO(b/35786417): Re-enable inliner pass after fixing the bug and deciding
-  // where we will take this pass in future.
-  // pipeline.AddPass<Inliner>();
+  pipeline.AddPass<Inliner>();
 
   // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner
   // pass.
-- 
GitLab


From ac15fb000dc0558495b62e897206e2c4ad189c5a Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 2 Oct 2018 23:18:36 -0700
Subject: [PATCH 1048/1357] Internal change.

PiperOrigin-RevId: 215518288
---
 tensorflow/python/kernel_tests/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 9490746fd9..44575fc452 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2999,6 +2999,7 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
     ],
     shard_count = 20,
+    tags = ["no_oss"],  # b/117185141
 )
 
 cuda_py_test(
-- 
GitLab


From 3d452dbcf7e1a71ba449f6acf7342cdd1dd11859 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 23:37:58 -0700
Subject: [PATCH 1049/1357] [XLA] In the HLO parser, give the module a
 non-empty default name.

Otherwise, when parsing a single instruction, the parsed module doesn't have a name, which won't pass the hlo verifier check.

PiperOrigin-RevId: 215519412
---
 tensorflow/compiler/xla/service/hlo_parser.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 0440f1b54f..dd62988bcc 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -391,7 +391,7 @@ Status HloParser::Run(HloModule* module) {
   // This means that the text is a single HLO instruction.
   if (!ParseSingleInstruction(module)) {
     return InvalidArgument(
-        "Syntax error when trying to parse the text as single "
+        "Syntax error when trying to parse the text as a single "
         "HloInstruction:\n%s",
         GetError());
   }
@@ -3348,14 +3348,14 @@ bool HloParser::ParseSingleInstruction(HloModule* module) {
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(
     absl::string_view str, const HloModuleConfig& config) {
-  auto module = absl::make_unique<HloModule>(/*name=*/"", config);
+  auto module = absl::make_unique<HloModule>(/*name=*/"_", config);
   HloParser parser(str);
   TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
 }
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str) {
-  auto module = absl::make_unique<HloModule>(/*name=*/"", HloModuleConfig());
+  auto module = absl::make_unique<HloModule>(/*name=*/"_", HloModuleConfig());
   HloParser parser(str);
   TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
-- 
GitLab


From 946e58e402778606d26056f5decf91ecfb4a9f89 Mon Sep 17 00:00:00 2001
From: YongJoon Lee <joon0351@gmail.com>
Date: Wed, 3 Oct 2018 16:43:55 +0900
Subject: [PATCH 1050/1357] fix spelling problem

---
 .../contrib/estimator/python/estimator/boosted_trees.py     | 6 +++---
 .../estimator/python/estimator/dnn_linear_combined.py       | 2 +-
 .../python/estimator/dnn_with_layer_annotations.py          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
index a1f1c5f3d7..b131ed4f12 100644
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
@@ -75,7 +75,7 @@ class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase):  # pylint:
         layer.
       head: the `Head` instance defined for Estimator.
       model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
+        also be used to load checkpoints from the directory into an estimator
         to continue training a previously saved model.
       weight_column: A string or a `_NumericColumn` created by
         `tf.feature_column.numeric_column` defining feature column representing
@@ -199,7 +199,7 @@ def boosted_trees_classifier_train_in_memory(
       the model. All items in the set should be instances of classes derived
       from `FeatureColumn`.
     model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator
+      also be used to load checkpoints from the directory into an estimator
       to continue training a previously saved model.
     n_classes: number of label classes. Default is binary classification.
       Multiclass support is not yet implemented.
@@ -345,7 +345,7 @@ def boosted_trees_regressor_train_in_memory(
       the model. All items in the set should be instances of classes derived
       from `FeatureColumn`.
     model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator
+      also be used to load checkpoints from the directory into an estimator
       to continue training a previously saved model.
     label_dimension: Number of regression targets per example.
       Multi-dimensional support is not yet implemented.
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
index 724bc2c82f..4e7965ef26 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
@@ -118,7 +118,7 @@ class DNNLinearCombinedEstimator(estimator.Estimator):
       head: A `_Head` instance constructed with a method such as
         `tf.contrib.estimator.multi_label_head`.
       model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
+        also be used to load checkpoints from the directory into an estimator
         to continue training a previously saved model.
       linear_feature_columns: An iterable containing all the feature columns
         used by linear part of the model. All items in the set must be
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 6ca7aaf989..40a91175b7 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -248,7 +248,7 @@ def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
       model. All items in the set should be instances of classes derived from
       `_FeatureColumn`.
     model_dir: Directory to save model parameters, graph and etc. This can also
-      be used to load checkpoints from the directory into a estimator to
+      be used to load checkpoints from the directory into an estimator to
       continue training a previously saved model.
     n_classes: Number of label classes. Defaults to 2, namely binary
       classification. Must be > 1.
-- 
GitLab


From c248f458c76df89fa3d608dcbe7c4c5e10962c24 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 02:25:06 -0700
Subject: [PATCH 1051/1357] compat: Update forward compatibility horizon to
 2018-10-03

PiperOrigin-RevId: 215534396
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 3bb95b56c2..d833defb8e 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 2)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 3)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From dd52e1d30702df5dfc805a1f433061dfbb75c814 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 06:14:11 -0700
Subject: [PATCH 1052/1357] Fix test that was relying on old lax toco behavior

PiperOrigin-RevId: 215553161
---
 .../contrib/lite/testing/generate_examples.py      | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 18036fac6f..3f2255c454 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -762,8 +762,11 @@ def make_constant_tests(zip_path):
         dtype=parameters["dtype"],
         name="input1",
         shape=parameters["input_shape"])
-    out = tf.constant(
+    constant = tf.constant(
         create_tensor_data(parameters["dtype"], parameters["input_shape"]))
+    # This maximum node is here to avoid the situation where a graph output is
+    # a constant, which is an error in toco.
+    out = tf.maximum(dummy_input, constant)
     return [dummy_input], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
@@ -2848,7 +2851,14 @@ def make_zeros_like_tests(zip_path):
         dtype=parameters["input_dtype"],
         name="input",
         shape=parameters["input_shape"])
-    out = tf.zeros_like(input_tensor)
+    zeros = tf.zeros_like(input_tensor)
+    # This maximum node is so that toco can perform the constants-propagation
+    # through the above zeros_like, which it can't do if the output of the
+    # zeros_like as an output of the whole graphs (graph outputs can't be
+    # constants). If toco does not perform such constants-propagation then
+    # the resulting tflite graph retains the zeros_like as a Fill op, which
+    # is unsupported by TFLite, even as a custom op.
+    out = tf.maximum(zeros, input_tensor)
     return [input_tensor], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
-- 
GitLab


From c9bdd3938e2b43334a0065b4c198ec9d491c8cb8 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 3 Oct 2018 10:04:37 -0700
Subject: [PATCH 1053/1357] [tf.data] Switch background threads to use
 `BackgroundWorker`.

PiperOrigin-RevId: 215579950
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 ---
 .../kernels/data/map_and_batch_dataset_op.cc  | 10 ++++---
 .../core/kernels/data/model_dataset_op.cc     | 10 ++++---
 .../data/parallel_interleave_dataset_op.cc    | 27 +++++++++++--------
 .../kernels/data/parallel_map_iterator.cc     | 10 ++++---
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 ++++---
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 7a833668ac..8acd6cc724 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,10 +16,8 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
-#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
-#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -27,13 +25,11 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index bf08970560..6a670f1efb 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -405,9 +406,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              std::bind(&Iterator::RunnerThread, this, ctx_copy));
         }
       }
 
@@ -660,7 +662,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 9aa505f4f1..859df57962 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -126,9 +127,10 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_.reset(ctx->env()->StartThread(
-              {}, "optimize_thread",
-              [this, new_ctx]() { OptimizeThread(new_ctx); }));
+          optimize_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
+          optimize_thread_->Schedule(
+              [this, new_ctx]() { OptimizeThread(new_ctx); });
         }
         return Status::OK();
       }
@@ -167,7 +169,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 6b6b3d6ab9..9c836b836e 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -481,9 +482,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
           }
         }
         return Status::OK();
@@ -580,9 +582,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1047,7 +1050,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
+          GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1389,9 +1393,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              [this, new_ctx]() { RunnerThread(new_ctx); }));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              [this, new_ctx]() { RunnerThread(new_ctx); });
         }
       }
 
@@ -1645,7 +1650,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 13bd4b6036..626e98af91 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -180,9 +181,10 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-      runner_thread_.reset(ctx->env()->StartThread(
-          {}, "runner_thread",
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
+      runner_thread_ =
+          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+      runner_thread_->Schedule(
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
     }
   }
 
@@ -330,7 +332,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 754ed772db..e9c38eb8a0 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -256,10 +257,11 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
+        prefetch_thread_ =
+            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_.reset(ctx->env()->StartThread(
-            {}, "prefetch_thread",
-            [this, new_ctx]() { PrefetchThread(new_ctx); }));
+        prefetch_thread_->Schedule(
+            [this, new_ctx]() { PrefetchThread(new_ctx); });
       }
       return Status::OK();
     }
@@ -363,7 +365,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 3f76695bb1..7bb2077b62 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        thread_pool_(new thread::ThreadPool(
-            ctx->env(), ThreadOptions(),
-            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
-            1 /* num_threads */, false /* low_latency_hint */)) {}
+        background_worker_(
+            ctx->env(),
+            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
+  }
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    thread_pool_->Schedule([this, ctx, done]() {
+    background_worker_.Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  std::unique_ptr<thread::ThreadPool> thread_pool_;
+  BackgroundWorker background_worker_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From 2af8fd975aaf5c70ebb396895fa15a8f034a8440 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Wed, 3 Oct 2018 10:09:14 -0700
Subject: [PATCH 1054/1357] Skip control flow functionalization if there is no
 Switch or Merge node.

PiperOrigin-RevId: 215580891
---
 .../tf2xla/functionalize_control_flow.cc      | 129 ++++++++++++------
 1 file changed, 90 insertions(+), 39 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 36c6f5d316..28e09d7b79 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -79,7 +79,10 @@ Status FunctionalizeControlFlowForFunction(
     const string& func_name, const string& new_func_name,
     const protobuf::Map<string, tensorflow::AttrValue>& attrs,
     FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr,
-    std::map<string, string>* canonicalized_name_to_new_name) {
+    std::map<string, absl::optional<string>>* canonicalized_name_to_new_name,
+    bool* modified) {
+  *modified = false;
+
   // Convert the function to Graph.
   FunctionLibraryRuntime::Handle handle;
   TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle));
@@ -92,6 +95,19 @@ Status FunctionalizeControlFlowForFunction(
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
 
+  // Check if the graph has Switch or Merge node before optimizing the graph.
+  bool has_switch_or_merge = false;
+  for (Node* n : body->graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
+  // We cannot return here directly if the graph has no Switch/Merge.
+  // It might contain function call nodes, or If/While nodes with Switch/Merge
+  // in function body. We still need to rewrite those functions and modify
+  // corresponding nodes.
+
   // Call graph optimizer. The most important optimization we need is constant
   // folding, which will replace ops like Shape/BroadcastGradientArgs with
   // constant shape input. Without this optimization, those ops might become
@@ -129,6 +145,13 @@ Status FunctionalizeControlFlowForFunction(
         absl::StrCat("functionalize_control_flow_after_opt_", func_name),
         *optimized_graph, fld);
   }
+  // Some inlined functions might have Switch/Merge nodes.
+  for (Node* n : optimized_graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -151,10 +174,15 @@ Status FunctionalizeControlFlowForFunction(
           Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
+      bool function_modified;
       if (iter != canonicalized_name_to_new_name->end()) {
-        // If we already functionalized this function, skip functionalization
-        // but still rewrite the node.
-        new_name = iter->second;
+        // If we already processed this function, check if it was rewritten. If
+        // the function was rewritten, the entry will be non-empty. Otherwise
+        // the entry will be empty.
+        function_modified = iter->second.has_value();
+        if (function_modified) {
+          new_name = iter->second.value();
+        }
       } else {
         if (associated_function.type() ==
             AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
@@ -166,42 +194,62 @@ Status FunctionalizeControlFlowForFunction(
         }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
-            canonicalized_name_to_new_name));
-        (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+            canonicalized_name_to_new_name, &function_modified));
+        if (function_modified) {
+          // If the function was rewritten, add an non-empty entry. So later we
+          // know we have processed this function, and it was rewritten into
+          // another function.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+        } else {
+          // If the function was not rewritten, add an empty entry. So later
+          // we know we have processed this function, and it does not need to be
+          // rewritten.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt;
+        }
+      }
+      if (function_modified) {
+        *modified = true;
+
+        // Notice that if "n" is a function call, RewriteAssociatedFunction()
+        // will delete it and create a new node instead, making "n" an invalid
+        // pointer. That's fine because in that case, associated_functions will
+        // only have one member and the loop will only run once.
+        TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
+            optimized_graph.get(), n, fld, associated_function, new_name));
       }
-      // Notice that if "n" is a function call, RewriteAssociatedFunction() will
-      // delete it and create a new node instead, making "n" an invalid pointer.
-      // That's fine because in that case, associated_functions will only have
-      // one member and the loop will only run once.
-      TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  // Functionalize the function body.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-        *optimized_graph, fld);
-  }
-  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-        *optimized_graph, fld);
+  if (has_switch_or_merge) {
+    *modified = true;
+
+    // Functionalize the function body.
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
+          *optimized_graph, fld);
+    }
+    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
+          *optimized_graph, fld);
+    }
   }
-  FunctionDef functionalized_fdef;
-  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                        &functionalized_fdef));
 
-  // Add rewritten FunctionDef into library.
-  if (func_name == new_func_name) {
-    VLOG(2) << "Replacing function " << func_name;
-    TF_RETURN_IF_ERROR(
-        fld->ReplaceFunction(new_func_name, functionalized_fdef));
-  } else {
-    VLOG(2) << "Adding function " << new_func_name;
-    TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+  if (*modified) {
+    // Add rewritten FunctionDef into library.
+    FunctionDef functionalized_fdef;
+    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
+                                          &functionalized_fdef));
+    if (func_name == new_func_name) {
+      VLOG(2) << "Replacing function " << func_name;
+      TF_RETURN_IF_ERROR(
+          fld->ReplaceFunction(new_func_name, functionalized_fdef));
+    } else {
+      VLOG(2) << "Adding function " << new_func_name;
+      TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+    }
   }
 
   return ret_status;
@@ -227,7 +275,7 @@ Status FunctionalizeControlFlowPass::Run(
           {"TPUCompile", "function"},
           {"XlaLaunch", "function"},
       };
-  std::map<string, string> canonicalized_name_to_new_name;
+  std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
   for (Node* n : graph->nodes()) {
     auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string());
     if (it == kNodeTypeToFunctionAttrMapping->end()) {
@@ -242,12 +290,15 @@ Status FunctionalizeControlFlowPass::Run(
               << ". Corresponding function: " << func.name();
       string new_func_name = options.flib_def->UniqueFunctionName(
           absl::StrCat(func.name(), "_f15n_"));
+      bool modified;
       TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
           func.name(), new_func_name, func.attr(), options.flib_def, flr,
-          &canonicalized_name_to_new_name));
-      n->ClearAttr(func_attr);
-      func.set_name(new_func_name);
-      n->AddAttr(func_attr, func);
+          &canonicalized_name_to_new_name, &modified));
+      if (modified) {
+        n->ClearAttr(func_attr);
+        func.set_name(new_func_name);
+        n->AddAttr(func_attr, func);
+      }
     }
   }
 
-- 
GitLab


From 022af5300701d457d848e60ea511dd8d05f68738 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 3 Oct 2018 10:18:59 -0700
Subject: [PATCH 1055/1357] Fix TfLiteTensor invalidation issue when using the
 Java API

Fix an issue where the Java Tensor class would hold a reference
to an invalidated TfLiteTensor instance. This issue was manifest
in certain models that add temporary tensors during execution.

PiperOrigin-RevId: 215582842
---
 .../lite/NativeInterpreterWrapper.java        | 26 +++++++---
 .../main/java/org/tensorflow/lite/Tensor.java | 27 ++++++++--
 .../native/nativeinterpreterwrapper_jni.cc    | 22 +++-----
 .../native/nativeinterpreterwrapper_jni.h     | 24 ++++-----
 .../lite/java/src/main/native/tensor_jni.cc   | 50 +++++++++++++++----
 .../lite/java/src/main/native/tensor_jni.h    | 17 +++++++
 .../java/org/tensorflow/lite/TensorTest.java  | 13 ++++-
 7 files changed, 129 insertions(+), 50 deletions(-)

diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 9bc44bf797..6f03e7853a 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -18,7 +18,6 @@ package org.tensorflow.lite;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.MappedByteBuffer;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -83,6 +82,19 @@ final class NativeInterpreterWrapper implements AutoCloseable {
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
   @Override
   public void close() {
+    // Close the tensors first as they may reference the native interpreter.
+    for (int i = 0; i < inputTensors.length; ++i) {
+      if (inputTensors[i] != null) {
+        inputTensors[i].close();
+        inputTensors[i] = null;
+      }
+    }
+    for (int i = 0; i < outputTensors.length; ++i) {
+      if (outputTensors[i] != null) {
+        outputTensors[i].close();
+        outputTensors[i] = null;
+      }
+    }
     delete(errorHandle, modelHandle, interpreterHandle);
     errorHandle = 0;
     modelHandle = 0;
@@ -91,8 +103,6 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     inputsIndexes = null;
     outputsIndexes = null;
     isMemoryAllocated = false;
-    Arrays.fill(inputTensors, null);
-    Arrays.fill(outputTensors, null);
   }
 
   /** Sets inputs, runs model inference and returns outputs. */
@@ -260,7 +270,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     Tensor inputTensor = inputTensors[index];
     if (inputTensor == null) {
       inputTensor =
-          inputTensors[index] = Tensor.fromHandle(getInputTensor(interpreterHandle, index));
+          inputTensors[index] =
+              Tensor.fromIndex(interpreterHandle, getInputTensorIndex(interpreterHandle, index));
     }
     return inputTensor;
   }
@@ -282,7 +293,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     Tensor outputTensor = outputTensors[index];
     if (outputTensor == null) {
       outputTensor =
-          outputTensors[index] = Tensor.fromHandle(getOutputTensor(interpreterHandle, index));
+          outputTensors[index] =
+              Tensor.fromIndex(interpreterHandle, getOutputTensorIndex(interpreterHandle, index));
     }
     return outputTensor;
   }
@@ -317,9 +329,9 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native long allocateTensors(long interpreterHandle, long errorHandle);
 
-  private static native long getInputTensor(long interpreterHandle, int inputIdx);
+  private static native int getInputTensorIndex(long interpreterHandle, int inputIdx);
 
-  private static native long getOutputTensor(long interpreterHandle, int outputIdx);
+  private static native int getOutputTensorIndex(long interpreterHandle, int outputIdx);
 
   private static native int getInputCount(long interpreterHandle);
 
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
index f174178d98..6ca47aa3ed 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
@@ -23,13 +23,26 @@ import java.util.Arrays;
 /**
  * A typed multi-dimensional array used in Tensorflow Lite.
  *
- * <p>The native handle of a {@code Tensor} belongs to {@code NativeInterpreterWrapper}, thus not
- * needed to be closed here.
+ * <p>The native handle of a {@code Tensor} is managed by {@code NativeInterpreterWrapper}, and does
+ * not needed to be closed by the client. However, once the {@code NativeInterpreterWrapper} has
+ * been closed, the tensor handle will be invalidated.
  */
 public final class Tensor {
 
-  static Tensor fromHandle(long nativeHandle) {
-    return new Tensor(nativeHandle);
+  /**
+   * Creates a Tensor wrapper from the provided interpreter instance and tensor index.
+   *
+   * <p>The caller is responsible for closing the created wrapper, and ensuring the provided
+   * native interpreter is valid until the tensor is closed.
+   */
+  static Tensor fromIndex(long nativeInterpreterHandle, int tensorIndex) {
+    return new Tensor(create(nativeInterpreterHandle, tensorIndex));
+  }
+
+  /** Disposes of any resources used by the Tensor wrapper. */
+  void close() {
+    delete(nativeHandle);
+    nativeHandle = 0;
   }
 
   /** Returns the {@link DataType} of elements stored in the Tensor. */
@@ -235,7 +248,7 @@ public final class Tensor {
     return o instanceof ByteBuffer;
   }
 
-  private final long nativeHandle;
+  private long nativeHandle;
   private final DataType dtype;
   private int[] shapeCopy;
 
@@ -249,6 +262,10 @@ public final class Tensor {
     return buffer(nativeHandle).order(ByteOrder.nativeOrder());
   }
 
+  private static native long create(long interpreterHandle, int tensorIndex);
+
+  private static native void delete(long handle);
+
   private static native ByteBuffer buffer(long handle);
 
   private static native void writeDirectBuffer(long handle, ByteBuffer src);
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
index abb7320bc5..4dc73fbcf8 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
@@ -159,26 +159,20 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_allocateTensors(
   }
 }
 
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensor(JNIEnv* env,
-                                                                 jclass clazz,
-                                                                 jlong handle,
-                                                                 jint index) {
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint input_index) {
   tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return 0;
-  return reinterpret_cast<jlong>(
-      interpreter->tensor(interpreter->inputs()[index]));
+  return interpreter->inputs()[input_index];
 }
 
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensor(JNIEnv* env,
-                                                                  jclass clazz,
-                                                                  jlong handle,
-                                                                  jint index) {
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint output_index) {
   tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return 0;
-  return reinterpret_cast<jlong>(
-      interpreter->tensor(interpreter->outputs()[index]));
+  return interpreter->outputs()[output_index];
 }
 
 JNIEXPORT jint JNICALL
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
index aa809dff8a..f8f3e7028c 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
@@ -46,25 +46,21 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_allocateTensors(
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
- *  Method:    getInputTensor
- *  Signature: (JI)J
+ *  Method:    getInputTensorIndex
+ *  Signature: (JI)I
  */
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensor(JNIEnv* env,
-                                                                 jclass clazz,
-                                                                 jlong handle,
-                                                                 jint index);
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint input_index);
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
- *  Method:    getOutputTensor
- *  Signature: (JI)J
+ *  Method:    getOutputTensorIndex
+ *  Signature: (JI)I
  */
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensor(JNIEnv* env,
-                                                                  jclass clazz,
-                                                                  jlong handle,
-                                                                  jint index);
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint output_index);
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
index 7ff96a3172..d3378f5f14 100644
--- a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
@@ -16,17 +16,36 @@ limitations under the License.
 #include "tensorflow/contrib/lite/java/src/main/native/tensor_jni.h"
 #include <cstring>
 #include <memory>
+#include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/java/src/main/native/exception_jni.h"
 
 namespace {
 
-TfLiteTensor* convertLongToTensor(JNIEnv* env, jlong handle) {
+// Convenience handle for obtaining a TfLiteTensor given an interpreter and
+// tensor index.
+//
+// Historically, the Java Tensor class used a TfLiteTensor pointer as its native
+// handle. However, this approach isn't generally safe, as the interpreter may
+// invalidate all TfLiteTensor* handles during inference or allocation.
+class TensorHandle {
+ public:
+  TensorHandle(tflite::Interpreter* interpreter, int tensor_index)
+      : interpreter_(interpreter), tensor_index_(tensor_index) {}
+
+  TfLiteTensor* tensor() const { return interpreter_->tensor(tensor_index_); }
+
+ private:
+  tflite::Interpreter* const interpreter_;
+  const int tensor_index_;
+};
+
+TfLiteTensor* GetTensorFromHandle(JNIEnv* env, jlong handle) {
   if (handle == 0) {
     throwException(env, kIllegalArgumentException,
                    "Internal error: Invalid handle to TfLiteTensor.");
     return nullptr;
   }
-  return reinterpret_cast<TfLiteTensor*>(handle);
+  return reinterpret_cast<TensorHandle*>(handle)->tensor();
 }
 
 size_t elementByteSize(TfLiteType data_type) {
@@ -192,10 +211,23 @@ size_t writeMultiDimensionalArray(JNIEnv* env, jobject src, TfLiteType type,
 
 }  // namespace
 
+JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_Tensor_create(
+    JNIEnv* env, jclass clazz, jlong interpreter_handle, jint tensor_index) {
+  tflite::Interpreter* interpreter =
+      reinterpret_cast<tflite::Interpreter*>(interpreter_handle);
+  return reinterpret_cast<jlong>(new TensorHandle(interpreter, tensor_index));
+}
+
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_delete(JNIEnv* env,
+                                                              jclass clazz,
+                                                              jlong handle) {
+  delete reinterpret_cast<TensorHandle*>(handle);
+}
+
 JNIEXPORT jobject JNICALL Java_org_tensorflow_lite_Tensor_buffer(JNIEnv* env,
                                                                  jclass clazz,
                                                                  jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return nullptr;
   if (tensor->data.raw == nullptr) {
     throwException(env, kIllegalArgumentException,
@@ -208,7 +240,7 @@ JNIEXPORT jobject JNICALL Java_org_tensorflow_lite_Tensor_buffer(JNIEnv* env,
 
 JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_writeDirectBuffer(
     JNIEnv* env, jclass clazz, jlong handle, jobject src) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
 
   char* src_data_raw = static_cast<char*>(env->GetDirectBufferAddress(src));
@@ -226,7 +258,7 @@ Java_org_tensorflow_lite_Tensor_readMultiDimensionalArray(JNIEnv* env,
                                                           jclass clazz,
                                                           jlong handle,
                                                           jobject value) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
   int num_dims = tensor->dims->size;
   if (num_dims == 0) {
@@ -243,7 +275,7 @@ Java_org_tensorflow_lite_Tensor_writeMultiDimensionalArray(JNIEnv* env,
                                                            jclass clazz,
                                                            jlong handle,
                                                            jobject src) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
   if (tensor->data.raw == nullptr) {
     throwException(env, kIllegalArgumentException,
@@ -262,14 +294,14 @@ Java_org_tensorflow_lite_Tensor_writeMultiDimensionalArray(JNIEnv* env,
 JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_dtype(JNIEnv* env,
                                                              jclass clazz,
                                                              jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return 0;
   return static_cast<jint>(tensor->type);
 }
 
 JNIEXPORT jintArray JNICALL
 Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, jclass clazz, jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return nullptr;
   int num_dims = tensor->dims->size;
   jintArray result = env->NewIntArray(num_dims);
@@ -280,7 +312,7 @@ Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, jclass clazz, jlong handle) {
 JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_numBytes(JNIEnv* env,
                                                                 jclass clazz,
                                                                 jlong handle) {
-  const TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  const TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return 0;
   return static_cast<jint>(tensor->bytes);
 }
diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
index 2f73128bdf..c5e9690e9a 100644
--- a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
@@ -23,6 +23,23 @@ limitations under the License.
 extern "C" {
 #endif  // __cplusplus
 
+/*
+ * Class:     org_tensorflow_lite_Tensor
+ * Method:    create
+ * Signature: (JI)J
+ */
+JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_Tensor_create(
+    JNIEnv* env, jclass clazz, jlong interpreter_handle, jint tensor_index);
+
+/*
+ * Class:     org_tensorflow_lite_Tensor
+ * Method:    delete
+ * Signature: (J)
+ */
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_delete(JNIEnv* env,
+                                                              jclass clazz,
+                                                              jlong handle);
+
 /*
  * Class:     org_tensorflow_lite_Tensor
  * Method:    buffer
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
index 85ad393d89..56a38ea3e2 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
@@ -182,7 +182,7 @@ public final class TensorTest {
     dataType = Tensor.dataTypeOf(testFloatArray);
     assertThat(dataType).isEqualTo(DataType.FLOAT32);
     float[][] testMultiDimArray = {testFloatArray, testFloatArray, testFloatArray};
-    dataType = Tensor.dataTypeOf(testFloatArray);
+    dataType = Tensor.dataTypeOf(testMultiDimArray);
     assertThat(dataType).isEqualTo(DataType.FLOAT32);
     try {
       double[] testDoubleArray = {0.783, 0.251};
@@ -238,4 +238,15 @@ public final class TensorTest {
     assertThat(shape[1]).isEqualTo(3);
     assertThat(shape[2]).isEqualTo(1);
   }
+
+  @Test
+  public void testUseAfterClose() {
+    tensor.close();
+    try {
+      tensor.numBytes();
+      fail();
+    } catch (IllegalArgumentException e) {
+      // Expected failure.
+    }
+  }
 }
-- 
GitLab


From a5b3cd8b4d28cfcdcb9adb3d3568b168b9b8a088 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:19:55 -0700
Subject: [PATCH 1056/1357] Fix bug in shape function for transpose: If the
 rank of the input is unknown and the rank derived from the permutation array
 is 0 or 1, the shape is ambiguous and cannot be determined at graph
 construction time. In this case, forward the shape of the input.

PiperOrigin-RevId: 215583050
---
 tensorflow/core/ops/array_ops.cc              |  8 +++++
 tensorflow/core/ops/array_ops_test.cc         |  1 +
 tensorflow/python/kernel_tests/BUILD          |  2 +-
 .../python/kernel_tests/transpose_op_test.py  | 29 +++++++++++++++++--
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index c9f80df5e4..f55562ec99 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -133,6 +133,14 @@ Status TransposeShapeFn(InferenceContext* c) {
   } else {
     rank = perm->NumElements();
   }
+  if (!c->RankKnown(input) && rank < 2) {
+    // A permutation array containing a single element is ambiguous. It could
+    // indicate either a scalar or a 1-dimensional array, both of which the
+    // transpose op returns unchanged.
+    c->set_output(0, input);
+    return Status::OK();
+  }
+
   std::vector<DimensionHandle> dims;
   dims.resize(rank);
   TF_RETURN_IF_ERROR(c->WithRank(input, rank, &input));
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 03dab390a7..1c29cd2491 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -975,6 +975,7 @@ TEST(ArrayOpsTest, Transpose_ShapeFn) {
   INFER_OK(op, "?;[2]", "[?,?]");
   INFER_OK(op, "[?,?];[2]", "[d0_1,d0_0]");
   INFER_OK(op, "[1,?];[2]", "[d0_1,d0_0]");
+  INFER_OK(op, "?;[0]", "in0");
 
   // Invalid arguments.
   perm = test::AsTensor<int32>({1, 2});
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 44575fc452..c0e9a3c975 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2367,7 +2367,7 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
-    shard_count = 4,
+    shard_count = 10,
     tags = [
         "no_gpu",
         "no_oss",
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index f42800226e..a825052dd2 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -39,7 +39,12 @@ class TransposeTest(test.TestCase):
     return ret
 
   def _compareCpu(self, x, p, conjugate=False):
-    np_ans = self._np_transpose(x, p)
+    if p is None:
+      rank = x.ndim
+      perm = (rank - 1) - np.arange(rank)
+    else:
+      perm = p
+    np_ans = self._np_transpose(x, perm)
     if conjugate:
       np_ans = np.conj(np_ans)
     with self.test_session(use_gpu=False):
@@ -65,7 +70,12 @@ class TransposeTest(test.TestCase):
       return tf_ans, jacob_t
 
   def _compareGpu(self, x, p, conjugate=False):
-    np_ans = self._np_transpose(x, p)
+    if p is None:
+      rank = x.ndim
+      perm = (rank - 1) - np.arange(rank)
+    else:
+      perm = p
+    np_ans = self._np_transpose(x, perm)
     if conjugate:
       np_ans = np.conj(np_ans)
     with self.test_session(use_gpu=True):
@@ -102,6 +112,11 @@ class TransposeTest(test.TestCase):
         self._compareCpu(x, p, conjugate=c)
         if use_gpu:
           self._compareGpu(x, p, conjugate=c)
+    # Test with an empty permutation
+    for c in cs:
+      self._compareCpu(x, None, conjugate=c)
+      if use_gpu:
+        self._compareGpu(x, None, conjugate=c)
 
   def _compare_cpu_gpu(self, x):
     n = np.ndim(x)
@@ -449,6 +464,10 @@ class TransposeTest(test.TestCase):
     self.assertEqual(
         tensor_shape.TensorShape(None),
         array_ops.transpose(array_ops.placeholder(dtypes.int32)).get_shape())
+    self.assertEqual(
+        tensor_shape.TensorShape(None),
+        array_ops.transpose(array_ops.placeholder(dtypes.int32),
+                            [0]).get_shape())
 
   def testNullTensor(self):
     with self.cached_session():
@@ -456,6 +475,12 @@ class TransposeTest(test.TestCase):
       xt = array_ops.transpose(x, [0, 2, 1]).eval()
       self.assertAllEqual(xt.shape, (1, 0, 4))
 
+  def testScalar(self):
+    with self.cached_session():
+      x = constant_op.constant(42, dtype=dtypes.float32, shape=[])
+      xt = array_ops.transpose(x).eval()
+      self.assertAllEqual(xt, x)
+
   def _testError(self, x, p, err):
     with self.cached_session():
       with self.assertRaisesOpError(err):
-- 
GitLab


From 0f9baa02a4e32b672b0cc29e99d5bfcf1329988c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:26:41 -0700
Subject: [PATCH 1057/1357] Re-enable the arithmetic optimizer by default in
 tests. Add a warning to not disable optimizers without consulting with the
 Grappler team.

PiperOrigin-RevId: 215584369
---
 tensorflow/python/framework/test_util.py                    | 6 ++++--
 .../python/kernel_tests/distributions/laplace_test.py       | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 6673bc5561..4ec4b41b5e 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1992,10 +1992,12 @@ class TensorFlowTestCase(googletest.TestCase):
       # Don't perform optimizations for tests so we don't inadvertently run
       # gpu ops on cpu
       config.graph_options.optimizer_options.opt_level = -1
+      # Disable Grappler constant folding since some tests & benchmarks
+      # use constant input and become meaningless after constant folding.
+      # DO NOT DISABLE GRAPPLER OPTIMIZERS WITHOUT CONSULTING WITH THE
+      # GRAPPLER TEAM.
       config.graph_options.rewrite_options.constant_folding = (
           rewriter_config_pb2.RewriterConfig.OFF)
-      config.graph_options.rewrite_options.arithmetic_optimization = (
-          rewriter_config_pb2.RewriterConfig.OFF)
       config.graph_options.rewrite_options.pin_to_host_optimization = (
           rewriter_config_pb2.RewriterConfig.OFF)
       return config
diff --git a/tensorflow/python/kernel_tests/distributions/laplace_test.py b/tensorflow/python/kernel_tests/distributions/laplace_test.py
index 630c2cb424..2610ba23b8 100644
--- a/tensorflow/python/kernel_tests/distributions/laplace_test.py
+++ b/tensorflow/python/kernel_tests/distributions/laplace_test.py
@@ -275,8 +275,8 @@ class LaplaceTest(test.TestCase):
     self.assertAllClose(
         sample_values.var(axis=0),
         stats.laplace.var(loc_bc, scale=scale_bc),
-        rtol=0.10,
-        atol=0.)
+        rtol=0.105,
+        atol=0.0)
     fails = 0
     trials = 0
     for ai, a in enumerate(np.reshape(loc_v, [-1])):
-- 
GitLab


From 26ce26d127587bc1f5dc7950e22f7d935d372abf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:31:02 -0700
Subject: [PATCH 1058/1357] Re-add proto fields temporarily for internal
 compatibility.

PiperOrigin-RevId: 215585187
---
 .../tpu/proto/optimization_parameters.proto     | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index b9e0747fa4..8529b48c15 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -28,6 +28,7 @@ message LearningRate {
 // https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer
 // https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L151
 message AdagradParameters {
+  float initial_accumulator = 1;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer
@@ -41,6 +42,8 @@ message FtrlParameters {
   float l1 = 1;
   float l2 = 2;
   float lr_power = 3;
+  float initial_accum = 4;
+  float initial_linear = 5;
 }
 
 // The Adam optimizer does not implement hyper-parameter update; use the dynamic
@@ -67,6 +70,8 @@ message AdamParameters {
   float beta1 = 3;
   float beta2 = 4;
   float epsilon = 5;
+  float initial_m = 6;
+  float initial_v = 7;
   bool use_non_lazy_adam = 8;
   bool use_max_with_epsilon = 9;
 }
@@ -76,6 +81,7 @@ message AdamParameters {
 message MomentumParameters {
   float momentum = 1;
   bool use_nesterov = 2;
+  float initial_accum = 3;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -84,6 +90,8 @@ message RmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
+  float initial_ms = 4;
+  float initial_mom = 5;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -92,6 +100,9 @@ message CenteredRmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
+  float initial_ms = 4;
+  float initial_mom = 5;
+  float initial_mg = 6;
 }
 
 // Variant of algorithm in http://proceedings.mlr.press/v44/shamir15.pdf
@@ -108,6 +119,9 @@ message MdlAdagradLightParameters {
   float mdl_hard_limit = 10;
   bool hard_limit_min_benefit = 11;
   bool mdl_regularize = 12;
+  float initial_accumulator = 13;
+  float initial_weight = 14;
+  float initial_benefit = 15;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -115,6 +129,8 @@ message MdlAdagradLightParameters {
 message AdadeltaParameters {
   float rho = 1;
   float epsilon = 2;
+  float initial_accumulator = 3;
+  float initial_update = 4;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -122,6 +138,7 @@ message AdadeltaParameters {
 message ProximalAdagradParameters {
   float l1 = 1;
   float l2 = 2;
+  float initial_accumulator = 3;
 }
 
 message OptimizationParameters {
-- 
GitLab


From af1458a9c1a3bc8d49a1e55386950b4941ab1815 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Wed, 3 Oct 2018 10:39:07 -0700
Subject: [PATCH 1059/1357] Fix filename/line number lookup for logging.

Log messages now show the correct file/function name/line number instead of that of the helper function.

PiperOrigin-RevId: 215586852
---
 tensorflow/python/platform/tf_logging.py | 58 ++++++++++++++++++------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index 5962d2f220..59e60856ae 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -25,6 +25,7 @@ import logging as _logging
 import os as _os
 import sys as _sys
 import time as _time
+import traceback as _traceback
 from logging import DEBUG
 from logging import ERROR
 from logging import FATAL
@@ -36,13 +37,49 @@ import six
 
 from tensorflow.python.util.tf_export import tf_export
 
-
 # Don't use this directly. Use _get_logger() instead.
 _logger = None
 _logger_lock = threading.Lock()
 
 
+def _get_caller(offset=3):
+  """Returns a code and frame object for the lowest non-logging stack frame."""
+  # Use sys._getframe().  This avoids creating a traceback object.
+  # pylint: disable=protected-access
+  f = _sys._getframe(offset)
+  # pylint: enable=protected-access
+  our_file = f.f_code.co_filename
+  f = f.f_back
+  while f:
+    code = f.f_code
+    if code.co_filename != our_file:
+      return code, f
+    f = f.f_back
+  return None, None
+
+
+# The definition of `findCaller` changed in Python 3.2
+if _sys.version_info.major >= 3 and _sys.version_info.minor >= 2:
+  def _logger_find_caller(stack_info=False):  # pylint: disable=g-wrong-blank-lines
+    code, frame = _get_caller(4)
+    sinfo = None
+    if stack_info:
+      sinfo = '\n'.join(_traceback.format_stack())
+    if code:
+      return (code.co_filename, frame.f_lineno, code.co_name, sinfo)
+    else:
+      return '(unknown file)', 0, '(unknown function)', sinfo
+else:
+  def _logger_find_caller():  # pylint: disable=g-wrong-blank-lines
+    code, frame = _get_caller(4)
+    if code:
+      return (code.co_filename, frame.f_lineno, code.co_name)
+    else:
+      return '(unknown file)', 0, '(unknown function)'
+
+
 def _get_logger():
+  """Return TF logger instance."""
   global _logger
 
   # Use double-checked locking to avoid taking lock unnecessarily.
@@ -58,6 +95,9 @@ def _get_logger():
     # Scope the TensorFlow logger to not conflict with users' loggers.
     logger = _logging.getLogger('tensorflow')
 
+    # Override findCaller on the logger to skip internal helper functions
+    logger.findCaller = _logger_find_caller
+
     # Don't further configure the TensorFlow logger if the root logger is
     # already configured. This prevents double logging in those cases.
     if not _logging.getLogger().handlers:
@@ -216,18 +256,10 @@ def log_if(level, msg, condition, *args):
 
 def _GetFileAndLine():
   """Returns (filename, linenumber) for the stack frame."""
-  # Use sys._getframe().  This avoids creating a traceback object.
-  # pylint: disable=protected-access
-  f = _sys._getframe()
-  # pylint: enable=protected-access
-  our_file = f.f_code.co_filename
-  f = f.f_back
-  while f:
-    code = f.f_code
-    if code.co_filename != our_file:
-      return (code.co_filename, f.f_lineno)
-    f = f.f_back
-  return ('<unknown>', 0)
+  code, f = _get_caller()
+  if not code:
+    return ('<unknown>', 0)
+  return (code.co_filename, f.f_lineno)
 
 
 def google2_log_prefix(level, timestamp=None, file_and_line=None):
-- 
GitLab


From 560624bff65b7b502da2c52f9b250d9181c4a3f7 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Wed, 3 Oct 2018 10:51:17 -0700
Subject: [PATCH 1060/1357] Internal change.

PiperOrigin-RevId: 215589009
---
 tensorflow/contrib/lite/python/interpreter.py | 17 ++++
 .../interpreter_wrapper.cc                    | 19 ++++-
 .../interpreter_wrapper/interpreter_wrapper.h |  1 +
 .../model_coverage/model_coverage_lib.py      | 81 +++++++++++++++++--
 .../model_coverage/model_coverage_lib_test.py | 38 +++++++++
 5 files changed, 147 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py
index 5700bf7892..6300552cbe 100644
--- a/tensorflow/contrib/lite/python/interpreter.py
+++ b/tensorflow/contrib/lite/python/interpreter.py
@@ -129,6 +129,23 @@ class Interpreter(object):
 
     return details
 
+  def get_tensor_details(self):
+    """Gets tensor details for every tensor with valid tensor details.
+
+    Tensors where required information about the tensor is not found are not
+    added to the list. This includes temporary tensors without a name.
+
+    Returns:
+      A list of dictionaries containing tensor information.
+    """
+    tensor_details = []
+    for idx in range(self._interpreter.NumTensors()):
+      try:
+        tensor_details.append(self._get_tensor_details(idx))
+      except ValueError:
+        pass
+    return tensor_details
+
   def get_input_details(self):
     """Gets model input details.
 
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
index 418f19a179..1e2384b6d2 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
@@ -277,13 +277,20 @@ PyObject* InterpreterWrapper::ResizeInputTensor(int i, PyObject* value) {
   Py_RETURN_NONE;
 }
 
+int InterpreterWrapper::NumTensors() const {
+  if (!interpreter_) {
+    return 0;
+  }
+  return interpreter_->tensors_size();
+}
+
 std::string InterpreterWrapper::TensorName(int i) const {
   if (!interpreter_ || i >= interpreter_->tensors_size() || i < 0) {
     return "";
   }
 
   const TfLiteTensor* tensor = interpreter_->tensor(i);
-  return tensor->name;
+  return tensor->name ? tensor->name : "";
 }
 
 PyObject* InterpreterWrapper::TensorType(int i) const {
@@ -291,6 +298,11 @@ PyObject* InterpreterWrapper::TensorType(int i) const {
   TFLITE_PY_TENSOR_BOUNDS_CHECK(i);
 
   const TfLiteTensor* tensor = interpreter_->tensor(i);
+  if (tensor->type == kTfLiteNoType) {
+    PyErr_Format(PyExc_ValueError, "Tensor with no type found.");
+    return nullptr;
+  }
+
   int code = TfLiteTypeToPyArrayType(tensor->type);
   if (code == -1) {
     PyErr_Format(PyExc_ValueError, "Invalid tflite type code %d", code);
@@ -302,7 +314,12 @@ PyObject* InterpreterWrapper::TensorType(int i) const {
 PyObject* InterpreterWrapper::TensorSize(int i) const {
   TFLITE_PY_ENSURE_VALID_INTERPRETER();
   TFLITE_PY_TENSOR_BOUNDS_CHECK(i);
+
   const TfLiteTensor* tensor = interpreter_->tensor(i);
+  if (tensor->dims == nullptr) {
+    PyErr_Format(PyExc_ValueError, "Tensor with no shape found.");
+    return nullptr;
+  }
   PyObject* np_array =
       PyArrayFromIntVector(tensor->dims->data, tensor->dims->size);
 
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
index f5ca81e62a..b98046fe8a 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
@@ -59,6 +59,7 @@ class InterpreterWrapper {
   PyObject* OutputIndices() const;
   PyObject* ResizeInputTensor(int i, PyObject* value);
 
+  int NumTensors() const;
   std::string TensorName(int i) const;
   PyObject* TensorType(int i) const;
   PyObject* TensorSize(int i) const;
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
index 5ca57d083d..72029ed03c 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
@@ -35,9 +35,9 @@ def _convert(converter, **kwargs):
   """Converts the model.
 
   Args:
-    converter: TocoConverter object.
+    converter: TFLiteConverter object.
     **kwargs: Additional arguments to be passed into the converter. Supported
-      flags are {"converter_mode", "post_training_quant"}.
+      flags are {"converter_mode", "post_training_quantize"}.
 
   Returns:
     The converted TFLite model in serialized format.
@@ -174,7 +174,7 @@ def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
     tflite_model: Serialized TensorFlow Lite model.
     tf_eval_func: Lambda function that takes in input data and outputs the
       results of the TensorFlow model ([np.ndarray data] : [np.ndarray result]).
-    tolerance: Decimal place to check accuracy to.
+    tolerance: Decimal place to check accuracy to. (default 5)
   """
   input_data = _generate_random_input_data(tflite_model)
   tf_results = tf_eval_func(input_data)
@@ -183,6 +183,71 @@ def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
     np.testing.assert_almost_equal(tf_result, tflite_result, tolerance)
 
 
+def test_frozen_graph_quant(filename,
+                            input_arrays,
+                            output_arrays,
+                            input_shapes=None,
+                            **kwargs):
+  """Sanity check to validate post quantize flag alters the graph.
+
+  This test does not check correctness of the converted model. It converts the
+  TensorFlow frozen graph to TFLite with and without the post_training_quantized
+  flag. It ensures some tensors have different types between the float and
+  quantized models in the case of an all TFLite model or mix-and-match model.
+  It ensures tensor types do not change in the case of an all Flex model.
+
+  Args:
+    filename: Full filepath of file containing frozen GraphDef.
+    input_arrays: List of input tensors to freeze graph with.
+    output_arrays: List of output tensors to freeze graph with.
+    input_shapes: Dict of strings representing input tensor names to list of
+      integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
+      Automatically determined when input shapes is None (e.g., {"foo" : None}).
+        (default None)
+    **kwargs: Additional arguments to be passed into the converter.
+
+  Raises:
+    ValueError: post_training_quantize flag doesn't act as intended.
+  """
+  # Convert and load the float model.
+  converter = _lite.TFLiteConverter.from_frozen_graph(
+      filename, input_arrays, output_arrays, input_shapes)
+  tflite_model_float = _convert(converter, **kwargs)
+
+  interpreter_float = _lite.Interpreter(model_content=tflite_model_float)
+  interpreter_float.allocate_tensors()
+  float_tensors = interpreter_float.get_tensor_details()
+
+  # Convert and load the quantized model.
+  converter = _lite.TFLiteConverter.from_frozen_graph(filename, input_arrays,
+                                                      output_arrays)
+  tflite_model_quant = _convert(
+      converter, post_training_quantize=True, **kwargs)
+
+  interpreter_quant = _lite.Interpreter(model_content=tflite_model_quant)
+  interpreter_quant.allocate_tensors()
+  quant_tensors = interpreter_quant.get_tensor_details()
+  quant_tensors_map = {
+      tensor_detail["name"]: tensor_detail for tensor_detail in quant_tensors
+  }
+
+  # Check if weights are of different types in the float and quantized models.
+  num_tensors_float = len(float_tensors)
+  num_tensors_same_dtypes = sum(
+      float_tensor["dtype"] == quant_tensors_map[float_tensor["name"]]["dtype"]
+      for float_tensor in float_tensors)
+  has_quant_tensor = num_tensors_float != num_tensors_same_dtypes
+
+  if ("converter_mode" in kwargs and
+      kwargs["converter_mode"] == _lite.ConverterMode.TOCO_FLEX_ALL):
+    if has_quant_tensor:
+      raise ValueError("--post_training_quantize flag unexpectedly altered the "
+                       "full Flex mode graph.")
+  elif not has_quant_tensor:
+    raise ValueError("--post_training_quantize flag was unable to quantize the "
+                     "graph as expected in TFLite and mix-and-match mode.")
+
+
 def test_frozen_graph(filename,
                       input_arrays,
                       output_arrays,
@@ -203,8 +268,8 @@ def test_frozen_graph(filename,
         (default None)
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TocoConverter.from_frozen_graph(filename, input_arrays,
-                                                    output_arrays, input_shapes)
+  converter = _lite.TFLiteConverter.from_frozen_graph(
+      filename, input_arrays, output_arrays, input_shapes)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_frozen_graph(filename, input_arrays, output_arrays)
@@ -224,8 +289,8 @@ def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs):
     signature_key: Key identifying SignatureDef containing inputs and outputs.
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TocoConverter.from_saved_model(directory, tag_set,
-                                                   signature_key)
+  converter = _lite.TFLiteConverter.from_saved_model(directory, tag_set,
+                                                     signature_key)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_saved_model(directory, tag_set, signature_key)
@@ -242,7 +307,7 @@ def test_keras_model(filename, **kwargs):
     filename: Full filepath of HDF5 file containing the tf.keras model.
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TocoConverter.from_keras_model_file(filename)
+  converter = _lite.TFLiteConverter.from_keras_model_file(filename)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_keras_model(filename)
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
index 1498f86c6f..e07202b1a6 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 import tempfile
+import numpy as np
 
 from tensorflow.contrib.lite.python import lite
 from tensorflow.contrib.lite.testing.model_coverage import model_coverage_lib as model_coverage
@@ -66,6 +67,43 @@ class EvaluateFrozenGraph(test.TestCase):
     model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'],
                                      ['add', 'Mean'])
 
+  def _getQuantizedModel(self):
+    np.random.seed(0)
+    with session.Session().as_default() as sess:
+      # The tensor needs to have more than 1024 elements for quantize_weights to
+      # kick in. Thus, the [33, 33] shape.
+      in_tensor_1 = array_ops.placeholder(
+          shape=[33, 33], dtype=dtypes.float32, name='inputA')
+      in_tensor_2 = constant_op.constant(
+          np.random.uniform(low=-10., high=10., size=(33, 33)),
+          shape=[33, 33],
+          dtype=dtypes.float32,
+          name='inputB')
+      _ = math_ops.matmul(in_tensor_1, in_tensor_2, name='output')
+
+    filename = self._saveFrozenGraph(sess)
+    return filename
+
+  def testQuantized(self):
+    filename = self._getQuantizedModel()
+    model_coverage.test_frozen_graph_quant(filename, ['inputA', 'inputB'],
+                                           ['output'])
+
+  def testQuantizedInputShapes(self):
+    filename = self._getQuantizedModel()
+    model_coverage.test_frozen_graph_quant(
+        filename, ['inputA', 'inputB'], ['output'],
+        input_shapes={
+            'inputA': [33, 33],
+            'inputB': [33, 33],
+        })
+
+  def testQuantizedFlexAll(self):
+    filename = self._getQuantizedModel()
+    model_coverage.test_frozen_graph_quant(
+        filename, ['inputA', 'inputB'], ['output'],
+        converter_mode=lite.ConverterMode.TOCO_FLEX_ALL)
+
 
 class EvaluateSavedModel(test.TestCase):
 
-- 
GitLab


From 0796d711f17c8c981d19461c9edd0e16837c8ab7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:51:56 -0700
Subject: [PATCH 1061/1357] Update _check_shape to accept six.integer_types
 instead of int

Currently _check_shape requires that a shape be an `int` or sequence of `int`s.  This CL allows `six.integer_type`s so now (1L,) would be a valid shape.

PiperOrigin-RevId: 215589131
---
 tensorflow/python/feature_column/feature_column.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 618e70f3a5..5352796174 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -2829,7 +2829,7 @@ def _check_shape(shape, key):
     shape = [shape]
   shape = tuple(shape)
   for dimension in shape:
-    if not isinstance(dimension, int):
+    if not isinstance(dimension, six.integer_types):
       raise TypeError('shape dimensions must be integer. '
                       'shape: {}, key: {}'.format(shape, key))
     if dimension < 1:
-- 
GitLab


From b25ef3877da28b7ec31d0bd69a7a6268f5e8a4b4 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 3 Oct 2018 10:58:53 -0700
Subject: [PATCH 1062/1357] Add a new GetRunFilesDir function to Env.

PiperOrigin-RevId: 215590440
---
 tensorflow/core/platform/env.h          |  6 ++++++
 tensorflow/core/platform/posix/env.cc   | 11 +++++++++++
 tensorflow/core/platform/windows/env.cc | 11 +++++++++++
 3 files changed, 28 insertions(+)

diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 5b237c4736..5732271f15 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -228,6 +228,10 @@ class Env {
   /// |suffix|. Returns true if success.
   bool CreateUniqueFileName(string* prefix, const string& suffix);
 
+  /// \brief Return the runfiles directory if running under bazel. Returns
+  /// the directory the executable is located in if not running under bazel.
+  virtual string GetRunfilesDir() = 0;
+
   // TODO(jeff,sanjay): Add back thread/thread-pool support if needed.
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
   // provide a routine to get the absolute time.
@@ -360,6 +364,8 @@ class EnvWrapper : public Env {
     return target_->FormatLibraryFileName(name, version);
   }
 
+  string GetRunfilesDir() override { return target_->GetRunfilesDir(); }
+
  private:
   void GetLocalTempDirectories(std::vector<string>* list) override {
     target_->GetLocalTempDirectories(list);
diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc
index 418874d340..af95d8201e 100644
--- a/tensorflow/core/platform/posix/env.cc
+++ b/tensorflow/core/platform/posix/env.cc
@@ -119,6 +119,17 @@ class PosixEnv : public Env {
     return tensorflow::internal::FormatLibraryFileName(name, version);
   }
 
+  string GetRunfilesDir() override {
+    string bin_path = this->GetExecutablePath();
+    string runfiles_path = bin_path + ".runfiles/org_tensorflow";
+    Status s = this->IsDirectory(runfiles_path);
+    if (!s.ok()) {
+      return runfiles_path;
+    } else {
+      return bin_path.substr(0, bin_path.find_last_of("/\\"));
+    }
+  }
+
  private:
   void GetLocalTempDirectories(std::vector<string>* list) override;
 };
diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc
index 68ee3595a2..f26ccd1662 100644
--- a/tensorflow/core/platform/windows/env.cc
+++ b/tensorflow/core/platform/windows/env.cc
@@ -160,6 +160,17 @@ class WindowsEnv : public Env {
     return filename;
   }
 
+  string GetRunfilesDir() override {
+    string bin_path = this->GetExecutablePath();
+    string runfiles_path = bin_path + ".runfiles\\org_tensorflow";
+    Status s = this->IsDirectory(runfiles_path);
+    if (!s.ok()) {
+      return runfiles_path;
+    } else {
+      return bin_path.substr(0, bin_path.find_last_of("/\\"));
+    }
+  }
+
  private:
   void GetLocalTempDirectories(std::vector<string>* list) override;
 
-- 
GitLab


From 55ea7f89ee6aa45c5a7623ac9ba671044467e807 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 11:00:21 -0700
Subject: [PATCH 1063/1357] Supports TPUEstimatorSpec in multi_head for TRAIN
 and PREDICT modes.

PiperOrigin-RevId: 215590676
---
 .../estimator/python/estimator/multi_head.py  | 67 ++++++++++++-----
 .../python/estimator/multi_head_test.py       | 75 ++++++++++++++++---
 2 files changed, 111 insertions(+), 31 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py
index ce75899214..6e793c8302 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py
@@ -233,6 +233,22 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
       self, features, mode, logits, labels=None, optimizer=None,
       train_op_fn=None):
     """See `_Head`."""
+    return self._create_estimator_spec(
+        features=features, mode=mode, logits=logits, labels=labels,
+        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=False)
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None):
+    """See `_Head`."""
+    return self._create_estimator_spec(
+        features=features, mode=mode, logits=logits, labels=labels,
+        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=True)
+
+  def _create_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, use_tpu=False):
+    """Returns `EstimatorSpec` or `TPUEstimatorSpec`."""
     if isinstance(logits, dict):
       logits_dict = logits
     else:
@@ -255,14 +271,15 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
       spec = self._merge_train(
           all_estimator_spec=all_estimator_spec,
           optimizer=optimizer,
-          train_op_fn=train_op_fn)
+          train_op_fn=train_op_fn,
+          use_tpu=use_tpu)
       with ops.name_scope(''):
         summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss)
       return spec
     if mode == model_fn.ModeKeys.PREDICT:
-      return self._merge_predict(all_estimator_spec)
+      return self._merge_predict(all_estimator_spec, use_tpu=use_tpu)
     if mode == model_fn.ModeKeys.EVAL:
-      return self._merge_eval(all_estimator_spec)
+      return self._merge_eval(all_estimator_spec, use_tpu=use_tpu)
     raise ValueError('mode={} unrecognized'.format(mode))
 
   def _split_logits(self, logits):
@@ -284,28 +301,28 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
         begin_idx += head.logits_dimension
     return logits_dict
 
-  def _merge_train(self, all_estimator_spec, optimizer, train_op_fn):
-    """Merges list of `EstimatorSpec` for training.
+  def _merge_train(
+      self, all_estimator_spec, optimizer, train_op_fn, use_tpu=False):
+    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for training.
 
     Args:
-      all_estimator_spec: list of `EstimatorSpec` for the individual heads.
+      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
+        individual heads.
       optimizer: `Optimizer` instance to create train op. See
         `create_estimator_spec` documentation for more details.
       train_op_fn: Function to create train op. Used if `optimizer` is `None`.
+      use_tpu: If `True`, returns `TPUEstimatorSpec`.
 
     Returns:
-      `EstimatorSpec` that merges all heads for TRAIN.
+      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for TRAIN.
 
     Raises:
       ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
         mode.
     """
     losses = []
-    metrics = {}
     for spec in all_estimator_spec:
       losses.append(spec.loss)
-      # Metric keys already contain head.name.
-      metrics.update(spec.eval_metric_ops or {})
     loss = _merge_losses(losses, self._head_weights)
     if optimizer is not None:
       if train_op_fn is not None:
@@ -317,20 +334,23 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
     else:
       raise ValueError('train_op_fn and optimizer cannot both be None.')
 
-    return model_fn.EstimatorSpec(
+    spec_type = (
+        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
+    return spec_type(
         mode=model_fn.ModeKeys.TRAIN,
         loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metrics)
+        train_op=train_op)
 
-  def _merge_predict(self, all_estimator_spec):
-    """Merges list of `EstimatorSpec` for prediction.
+  def _merge_predict(self, all_estimator_spec, use_tpu=False):
+    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for prediction.
 
     Args:
-      all_estimator_spec: list of `EstimatorSpec` for the individual heads.
+      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
+        individual heads.
+      use_tpu: If `True`, returns `TPUEstimatorSpec`.
 
     Returns:
-      `EstimatorSpec` that merges all heads for PREDICT.
+      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for PREDICT.
     """
     predictions = {}
     export_outputs = {
@@ -357,20 +377,29 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
     export_outputs[head_lib._PREDICT_SERVING_KEY] = (  # pylint:disable=protected-access
         export_output_lib.PredictOutput(merged_predict_outputs))
 
-    return model_fn.EstimatorSpec(
+    spec_type = (
+        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
+    return spec_type(
         mode=model_fn.ModeKeys.PREDICT,
         predictions=predictions,
         export_outputs=export_outputs)
 
-  def _merge_eval(self, all_estimator_spec):
+  def _merge_eval(self, all_estimator_spec, use_tpu=False):
     """Merges list of `EstimatorSpec` for eval.
 
     Args:
       all_estimator_spec: list of `EstimatorSpec` for the individual heads.
+      use_tpu: If `True`, will raise `NotImplementedError`, because TPU is not
+        yet supported for eval.
 
     Returns:
       `EstimatorSpec` that merges all heads for EVAL.
+    Raises:
+      NotImplementedError: If `use_tpu` is `True`.
     """
+    if use_tpu:
+      raise NotImplementedError(
+          'TPU evaluation is not implemented for multi_head.')
     predictions = {}
     metrics = {}
     losses = []
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
index 2b4d5f5261..a602f87b4a 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
@@ -106,7 +106,7 @@ class MultiHeadTest(test.TestCase):
     multi_head = multi_head_lib.multi_head([head1, head2])
     self.assertEqual('head1_head2', multi_head.name)
 
-  def test_predict_two_heads_logits_dict(self):
+  def _test_predict_two_heads_logits_dict(self, use_tpu):
     """Tests predict with logits as dict."""
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
     head2 = head_lib.multi_label_head(n_classes=3, name='head2')
@@ -121,10 +121,16 @@ class MultiHeadTest(test.TestCase):
         'head2': _sigmoid(logits['head2']),
     }
 
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
+    if use_tpu:
+      spec = multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits).as_estimator_spec()
+    else:
+      spec = multi_head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits)
 
     self.assertItemsEqual(
         (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification',
@@ -175,6 +181,12 @@ class MultiHeadTest(test.TestCase):
           sess.run(
               spec.export_outputs['head2/predict'].outputs['probabilities']))
 
+  def test_predict_two_heads_logits_dict(self):
+    self._test_predict_two_heads_logits_dict(use_tpu=False)
+
+  def test_predict_two_heads_logits_dict_tpu(self):
+    self._test_predict_two_heads_logits_dict(use_tpu=True)
+
   def test_predict_two_heads_logits_tensor(self):
     """Tests predict with logits as Tensor."""
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
@@ -350,6 +362,31 @@ class MultiHeadTest(test.TestCase):
           rtol=tol,
           atol=tol)
 
+  def test_eval_tpu(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head(
+        [head1, head2], head_weights=[1., 2.])
+
+    logits = {
+        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
+                          dtype=np.float32),
+    }
+    labels = {
+        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
+        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
+    }
+
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        r'TPU evaluation is not implemented for multi_head\.'):
+      multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits,
+          labels=labels)
+
   def test_train_create_loss_one_head(self):
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
     multi_head = multi_head_lib.multi_head([head1])
@@ -587,7 +624,7 @@ class MultiHeadTest(test.TestCase):
           six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
           train_result)
 
-  def test_train_two_heads_with_weights(self):
+  def _test_train_two_heads_with_weights(self, use_tpu):
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
     head2 = head_lib.multi_label_head(n_classes=3, name='head2')
     multi_head = multi_head_lib.multi_head(
@@ -619,12 +656,20 @@ class MultiHeadTest(test.TestCase):
           [constant_op.constant(expected_train_result),
            string_ops.as_string(loss, precision=3)])
 
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
+    if use_tpu:
+      spec = multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=logits,
+          labels=labels,
+          train_op_fn=_train_op_fn).as_estimator_spec()
+    else:
+      spec = multi_head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=logits,
+          labels=labels,
+          train_op_fn=_train_op_fn)
 
     self.assertIsNotNone(spec.loss)
     self.assertEqual({}, spec.eval_metric_ops)
@@ -649,6 +694,12 @@ class MultiHeadTest(test.TestCase):
           metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2,
       }, summary_str, tol)
 
+  def test_train_two_heads_with_weights(self):
+    self._test_train_two_heads_with_weights(use_tpu=False)
+
+  def test_train_two_heads_with_weights_tpu(self):
+    self._test_train_two_heads_with_weights(use_tpu=True)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 51b266fba181dffb6b3f9207280cde6b7670dd90 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 3 Oct 2018 11:09:44 -0700
Subject: [PATCH 1064/1357] [tf.data] Fix noisy warning.

PiperOrigin-RevId: 215592456
---
 tensorflow/python/data/ops/dataset_ops.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 46ce191f7b..3693cc88f2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1830,10 +1830,11 @@ class StructuredFunctionWrapper(object):
           component = _NestedDatasetComponent(t)
           flat_classes.append(component)
           flat_shapes.append(component)
-          flat_types.append(component)
-          if t.options() is not None:  # pylint: disable=protected-access
-            warnings.warn("Encountered a nested dataset with options. These "
-                          "options will not be applied to the outer dataset.")
+          flat_types.append(component)          
+          if t.options() != Options():  # pylint: disable=protected-access
+            warnings.warn("Encountered a nested dataset with non-default "
+                          "options. These options will not be propagated to "
+                          "the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
-- 
GitLab


From 880dcb7a91e5ee497045614d9c5f4ab93c9ffacf Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 3 Oct 2018 11:17:48 -0700
Subject: [PATCH 1065/1357] Automated rollback of commit
 51b266fba181dffb6b3f9207280cde6b7670dd90

PiperOrigin-RevId: 215593867
---
 tensorflow/python/data/ops/dataset_ops.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 3693cc88f2..46ce191f7b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1830,11 +1830,10 @@ class StructuredFunctionWrapper(object):
           component = _NestedDatasetComponent(t)
           flat_classes.append(component)
           flat_shapes.append(component)
-          flat_types.append(component)          
-          if t.options() != Options():  # pylint: disable=protected-access
-            warnings.warn("Encountered a nested dataset with non-default "
-                          "options. These options will not be propagated to "
-                          "the outer dataset.")
+          flat_types.append(component)
+          if t.options() is not None:  # pylint: disable=protected-access
+            warnings.warn("Encountered a nested dataset with options. These "
+                          "options will not be applied to the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
-- 
GitLab


From 47eafbaf43c763dc65a2cd3cfd9ecbd8fbbdf668 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Wed, 3 Oct 2018 11:24:41 -0700
Subject: [PATCH 1066/1357] [tf.data] Add utility to deduplicate graph node
 names (after vectorization)

PiperOrigin-RevId: 215595078
---
 tensorflow/core/graph/graph.cc                |  5 ++++
 tensorflow/core/graph/graph.h                 |  1 +
 .../core/grappler/optimizers/data/BUILD       |  2 ++
 .../grappler/optimizers/data/graph_utils.cc   | 21 ++++++++++++++
 .../grappler/optimizers/data/graph_utils.h    |  9 ++++++
 .../optimizers/data/graph_utils_test.cc       | 28 +++++++++++++++++++
 .../optimizers/data/vectorization_utils.cc    |  2 ++
 7 files changed, 68 insertions(+)

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 4c0cd14ff1..7a4a0096fa 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -192,6 +192,11 @@ void Node::ClearAttr(const string& name) {
   (*props_->node_def.mutable_attr()).erase(name);
 }
 
+void Node::set_name(string name) {
+  MaybeCopyOnWrite();
+  props_->node_def.set_name(std::move(name));
+}
+
 void Node::set_requested_device(const string& device) {
   MaybeCopyOnWrite();
   props_->node_def.set_device(device);
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 72cef07072..2944951f82 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -72,6 +72,7 @@ class Node {
   int id() const { return id_; }
   int cost_id() const { return cost_id_; }
   const string& name() const;
+  void set_name(string name);
   const string& type_string() const;
 
   // def() provides the NodeDef the user supplied, but the specifics
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 5a3abbb545..755af3361e 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -129,6 +129,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core/grappler:utils",
+        "//tensorflow/core:lib_internal",
     ] + tf_protos_all(),
 )
 
@@ -138,6 +139,7 @@ tf_cc_test(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_utils",
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index 3eaaf8fbef..b863a25dc5 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
@@ -272,6 +273,26 @@ void ConcatAttributeList(const string& attribute_name, const NodeDef& first,
       ->MergeFrom(second.attr().at(attribute_name).list());
 }
 
+Status EnsureNodeNamesUnique(Graph* g) {
+  // Modeled after Scope::Impl::GetUniqueName
+  std::unordered_map<string, int> name_map;
+
+  for (auto node : g->op_nodes()) {
+    const string& prefix = node->name();
+    if (auto entry = gtl::FindOrNull(name_map, prefix)) {
+      string unique_name;
+      do {
+        unique_name = strings::StrCat(prefix, "_", ++(*entry));
+      } while (name_map.find(unique_name) != name_map.end());
+      name_map.insert({unique_name, 0});
+      node->set_name(std::move(unique_name));
+    } else {
+      name_map.insert({node->name(), 0});
+    }
+  }
+
+  return Status::OK();
+}
 }  // end namespace graph_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index 3af34f6904..d130fee204 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -131,6 +132,14 @@ void CopyAttribute(const string& attribute_name, const NodeDef& from,
 void ConcatAttributeList(const string& attribute_name, const NodeDef& first,
                          const NodeDef& second, NodeDef* to_node);
 
+// Checks that all nodes in the graphs have unique names, and sets their names
+// to be unique if they are not already.  This is necessary as Graph does not
+// have the provisions to deduplicate names, and name deduplication elsewhere
+// in tensorflow happens in other layers (for example, in the Scope class of the
+// C++ API). Note that the nodes in the graph are identified by their id,
+// and renaming nodes does not mutate any edges.
+Status EnsureNodeNamesUnique(Graph* g);
+
 }  // end namespace graph_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
index db986542b2..4ab6d71532 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -229,6 +230,33 @@ TEST(GraphUtilsTest, GetInputNode) {
   EXPECT_EQ(GetInputNode(*node1, graph), nullptr);
 }
 
+TEST(GraphUtilsTest, EnsureNodeNamesUnique) {
+  Graph g(OpRegistry::Global());
+
+  Node *const_0, *const_1, *const_2;
+
+  // Arbitrary const
+  Tensor tensor(DT_INT32, {});
+  tensor.scalar<int32>()() = 5;
+
+  for (auto node : {&const_0, &const_1}) {
+    TF_EXPECT_OK(NodeBuilder("Const", "Const")
+                     .Attr("value", tensor)
+                     .Attr("dtype", DT_INT32)
+                     .Finalize(&g, node));
+  }
+  // Make sure generated name doesn't clash with existing name either
+  TF_EXPECT_OK(NodeBuilder("Const_1", "Const")
+                   .Attr("value", tensor)
+                   .Attr("dtype", DT_INT32)
+                   .Finalize(&g, &const_2));
+
+  TF_EXPECT_OK(EnsureNodeNamesUnique(&g));
+  EXPECT_NE(const_0->name(), const_1->name());
+  EXPECT_NE(const_1->name(), const_2->name());
+  EXPECT_NE(const_0->name(), const_2->name());
+}
+
 }  // namespace
 }  // namespace graph_utils
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index cea667f668..2d6cf562b1 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -352,6 +352,8 @@ Status Vectorization::Initialize(const FunctionDef& outer_scope,
 
 Status Vectorization::GetResult(FunctionDef** vectorized_function) {
   TF_RETURN_IF_ERROR(status_);
+  TF_RETURN_IF_ERROR(graph_utils::EnsureNodeNamesUnique(outer_scope_.get()));
+  TF_RETURN_IF_ERROR(graph_utils::EnsureNodeNamesUnique(map_defun_fn_->graph));
 
   if (!map_defun_fn_->ret_nodes.empty()) {
     FunctionDef* map_defun_fn = lib_->add_function();
-- 
GitLab


From 3d76a83037388b61bcda1571d3b3e175a2f53f2e Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Wed, 3 Oct 2018 12:25:25 -0700
Subject: [PATCH 1067/1357] Disable XLA for Android builds.

PiperOrigin-RevId: 215605865
---
 tensorflow/tools/ci_build/builds/configured | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/builds/configured b/tensorflow/tools/ci_build/builds/configured
index 868a3beac5..3eee11fd7e 100755
--- a/tensorflow/tools/ci_build/builds/configured
+++ b/tensorflow/tools/ci_build/builds/configured
@@ -32,6 +32,10 @@ COMMAND=("$@")
 
 export CI_BUILD_PYTHON="${CI_BUILD_PYTHON:-python}"
 export PYTHON_BIN_PATH="${PYTHON_BIN_PATH:-$(which ${CI_BUILD_PYTHON})}"
+# XLA currently does not build under Android, so disable it for now.
+if [[ "${CONTAINER_TYPE}" -eq 'android' ]]; then
+  export TF_ENABLE_XLA=0
+fi
 
 pushd "${CI_TENSORFLOW_SUBMODULE_PATH:-.}"
 yes "" | $PYTHON_BIN_PATH configure.py
-- 
GitLab


From 295b3c80555cc82d8d70faf96a47681e1d904b9c Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 3 Oct 2018 12:32:16 -0700
Subject: [PATCH 1068/1357] Automated rollback of commit
 c9bdd3938e2b43334a0065b4c198ec9d491c8cb8

PiperOrigin-RevId: 215607038
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 +++
 .../kernels/data/map_and_batch_dataset_op.cc  | 10 +++----
 .../core/kernels/data/model_dataset_op.cc     | 10 +++----
 .../data/parallel_interleave_dataset_op.cc    | 27 ++++++++-----------
 .../kernels/data/parallel_map_iterator.cc     | 10 +++----
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 +++----
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 8acd6cc724..7a833668ac 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,8 +16,10 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
+#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -25,11 +27,13 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 6a670f1efb..bf08970560 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -406,10 +405,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              std::bind(&Iterator::RunnerThread, this, ctx_copy));
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
         }
       }
 
@@ -662,7 +660,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 859df57962..9aa505f4f1 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -127,10 +126,9 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
-          optimize_thread_->Schedule(
-              [this, new_ctx]() { OptimizeThread(new_ctx); });
+          optimize_thread_.reset(ctx->env()->StartThread(
+              {}, "optimize_thread",
+              [this, new_ctx]() { OptimizeThread(new_ctx); }));
         }
         return Status::OK();
       }
@@ -169,7 +167,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 9c836b836e..6b6b3d6ab9 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -482,10 +481,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
           }
         }
         return Status::OK();
@@ -582,10 +580,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1050,8 +1047,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
-          GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1393,10 +1389,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              [this, new_ctx]() { RunnerThread(new_ctx); });
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              [this, new_ctx]() { RunnerThread(new_ctx); }));
         }
       }
 
@@ -1650,7 +1645,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 626e98af91..13bd4b6036 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,7 +22,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -181,10 +180,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-      runner_thread_ =
-          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-      runner_thread_->Schedule(
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
+      runner_thread_.reset(ctx->env()->StartThread(
+          {}, "runner_thread",
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
     }
   }
 
@@ -332,7 +330,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index e9c38eb8a0..754ed772db 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -257,11 +256,10 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
-        prefetch_thread_ =
-            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_->Schedule(
-            [this, new_ctx]() { PrefetchThread(new_ctx); });
+        prefetch_thread_.reset(ctx->env()->StartThread(
+            {}, "prefetch_thread",
+            [this, new_ctx]() { PrefetchThread(new_ctx); }));
       }
       return Status::OK();
     }
@@ -365,7 +363,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 7bb2077b62..3f76695bb1 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        background_worker_(
-            ctx->env(),
-            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
-  }
+        thread_pool_(new thread::ThreadPool(
+            ctx->env(), ThreadOptions(),
+            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
+            1 /* num_threads */, false /* low_latency_hint */)) {}
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    background_worker_.Schedule([this, ctx, done]() {
+    thread_pool_->Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  BackgroundWorker background_worker_;
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From d4e9282dc53697432178a68940634612c4ab2baa Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 3 Oct 2018 12:32:57 -0700
Subject: [PATCH 1069/1357] [tf.data] Fix noisy warning.

PiperOrigin-RevId: 215607171
---
 tensorflow/python/data/ops/dataset_ops.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 46ce191f7b..b7e19055f2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1831,9 +1831,10 @@ class StructuredFunctionWrapper(object):
           flat_classes.append(component)
           flat_shapes.append(component)
           flat_types.append(component)
-          if t.options() is not None:  # pylint: disable=protected-access
-            warnings.warn("Encountered a nested dataset with options. These "
-                          "options will not be applied to the outer dataset.")
+          if t.options() != Options():
+            warnings.warn("Encountered a nested dataset with non-default "
+                          "options. These options will not be propagated to "
+                          "the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
-- 
GitLab


From 506ea0b8d3af1b54f42721584a414957e1525c8a Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 12:36:16 -0700
Subject: [PATCH 1070/1357] Change hierarchical_tree_broadcaster_test from
 small to medium.

PiperOrigin-RevId: 215607769
---
 tensorflow/core/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 0aae29d10c..6a3ee3c1cb 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3750,7 +3750,7 @@ tf_cc_tests_gpu(
 
 tf_cc_tests_gpu(
     name = "hierarchical_tree_broadcaster_test",
-    size = "small",
+    size = "medium",
     srcs = [
         "common_runtime/hierarchical_tree_broadcaster_test.cc",
     ],
-- 
GitLab


From 19833284cc8fa555115aacde350ad66652b250dc Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Wed, 3 Oct 2018 12:39:32 -0700
Subject: [PATCH 1071/1357] Automated rollback of commit
 2af8fd975aaf5c70ebb396895fa15a8f034a8440

PiperOrigin-RevId: 215608349
---
 .../tf2xla/functionalize_control_flow.cc      | 129 ++++++------------
 1 file changed, 39 insertions(+), 90 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 28e09d7b79..36c6f5d316 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -79,10 +79,7 @@ Status FunctionalizeControlFlowForFunction(
     const string& func_name, const string& new_func_name,
     const protobuf::Map<string, tensorflow::AttrValue>& attrs,
     FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr,
-    std::map<string, absl::optional<string>>* canonicalized_name_to_new_name,
-    bool* modified) {
-  *modified = false;
-
+    std::map<string, string>* canonicalized_name_to_new_name) {
   // Convert the function to Graph.
   FunctionLibraryRuntime::Handle handle;
   TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle));
@@ -95,19 +92,6 @@ Status FunctionalizeControlFlowForFunction(
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
 
-  // Check if the graph has Switch or Merge node before optimizing the graph.
-  bool has_switch_or_merge = false;
-  for (Node* n : body->graph->nodes()) {
-    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
-      has_switch_or_merge = true;
-      break;
-    }
-  }
-  // We cannot return here directly if the graph has no Switch/Merge.
-  // It might contain function call nodes, or If/While nodes with Switch/Merge
-  // in function body. We still need to rewrite those functions and modify
-  // corresponding nodes.
-
   // Call graph optimizer. The most important optimization we need is constant
   // folding, which will replace ops like Shape/BroadcastGradientArgs with
   // constant shape input. Without this optimization, those ops might become
@@ -145,13 +129,6 @@ Status FunctionalizeControlFlowForFunction(
         absl::StrCat("functionalize_control_flow_after_opt_", func_name),
         *optimized_graph, fld);
   }
-  // Some inlined functions might have Switch/Merge nodes.
-  for (Node* n : optimized_graph->nodes()) {
-    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
-      has_switch_or_merge = true;
-      break;
-    }
-  }
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -174,15 +151,10 @@ Status FunctionalizeControlFlowForFunction(
           Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
-      bool function_modified;
       if (iter != canonicalized_name_to_new_name->end()) {
-        // If we already processed this function, check if it was rewritten. If
-        // the function was rewritten, the entry will be non-empty. Otherwise
-        // the entry will be empty.
-        function_modified = iter->second.has_value();
-        if (function_modified) {
-          new_name = iter->second.value();
-        }
+        // If we already functionalized this function, skip functionalization
+        // but still rewrite the node.
+        new_name = iter->second;
       } else {
         if (associated_function.type() ==
             AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
@@ -194,62 +166,42 @@ Status FunctionalizeControlFlowForFunction(
         }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
-            canonicalized_name_to_new_name, &function_modified));
-        if (function_modified) {
-          // If the function was rewritten, add an non-empty entry. So later we
-          // know we have processed this function, and it was rewritten into
-          // another function.
-          (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
-        } else {
-          // If the function was not rewritten, add an empty entry. So later
-          // we know we have processed this function, and it does not need to be
-          // rewritten.
-          (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt;
-        }
-      }
-      if (function_modified) {
-        *modified = true;
-
-        // Notice that if "n" is a function call, RewriteAssociatedFunction()
-        // will delete it and create a new node instead, making "n" an invalid
-        // pointer. That's fine because in that case, associated_functions will
-        // only have one member and the loop will only run once.
-        TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-            optimized_graph.get(), n, fld, associated_function, new_name));
+            canonicalized_name_to_new_name));
+        (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
       }
+      // Notice that if "n" is a function call, RewriteAssociatedFunction() will
+      // delete it and create a new node instead, making "n" an invalid pointer.
+      // That's fine because in that case, associated_functions will only have
+      // one member and the loop will only run once.
+      TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
+          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  if (has_switch_or_merge) {
-    *modified = true;
-
-    // Functionalize the function body.
-    if (VLOG_IS_ON(4)) {
-      dump_graph::DumpGraphToFile(
-          absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-          *optimized_graph, fld);
-    }
-    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
-    if (VLOG_IS_ON(4)) {
-      dump_graph::DumpGraphToFile(
-          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-          *optimized_graph, fld);
-    }
+  // Functionalize the function body.
+  if (VLOG_IS_ON(4)) {
+    dump_graph::DumpGraphToFile(
+        absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
+        *optimized_graph, fld);
   }
+  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+  if (VLOG_IS_ON(4)) {
+    dump_graph::DumpGraphToFile(
+        absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
+        *optimized_graph, fld);
+  }
+  FunctionDef functionalized_fdef;
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
+                                        &functionalized_fdef));
 
-  if (*modified) {
-    // Add rewritten FunctionDef into library.
-    FunctionDef functionalized_fdef;
-    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                          &functionalized_fdef));
-    if (func_name == new_func_name) {
-      VLOG(2) << "Replacing function " << func_name;
-      TF_RETURN_IF_ERROR(
-          fld->ReplaceFunction(new_func_name, functionalized_fdef));
-    } else {
-      VLOG(2) << "Adding function " << new_func_name;
-      TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
-    }
+  // Add rewritten FunctionDef into library.
+  if (func_name == new_func_name) {
+    VLOG(2) << "Replacing function " << func_name;
+    TF_RETURN_IF_ERROR(
+        fld->ReplaceFunction(new_func_name, functionalized_fdef));
+  } else {
+    VLOG(2) << "Adding function " << new_func_name;
+    TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
   }
 
   return ret_status;
@@ -275,7 +227,7 @@ Status FunctionalizeControlFlowPass::Run(
           {"TPUCompile", "function"},
           {"XlaLaunch", "function"},
       };
-  std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
+  std::map<string, string> canonicalized_name_to_new_name;
   for (Node* n : graph->nodes()) {
     auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string());
     if (it == kNodeTypeToFunctionAttrMapping->end()) {
@@ -290,15 +242,12 @@ Status FunctionalizeControlFlowPass::Run(
               << ". Corresponding function: " << func.name();
       string new_func_name = options.flib_def->UniqueFunctionName(
           absl::StrCat(func.name(), "_f15n_"));
-      bool modified;
       TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
           func.name(), new_func_name, func.attr(), options.flib_def, flr,
-          &canonicalized_name_to_new_name, &modified));
-      if (modified) {
-        n->ClearAttr(func_attr);
-        func.set_name(new_func_name);
-        n->AddAttr(func_attr, func);
-      }
+          &canonicalized_name_to_new_name));
+      n->ClearAttr(func_attr);
+      func.set_name(new_func_name);
+      n->AddAttr(func_attr, func);
     }
   }
 
-- 
GitLab


From 808b1dcb318b1feb5a8c9fed5558f95cd05728e4 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Wed, 3 Oct 2018 12:44:47 -0700
Subject: [PATCH 1072/1357] [data-stats] Sets user given `tag` and
 `counter_prefix` with `set_stats_aggregator`. `tag` would get prep-end with
 all the statistics recorded as summary and `counter_prefix` would set the
 prefix for the statistics recorded as counter. Note: `counter` defaults to
 `\tensorflow`, and `tag` and `prefix` gets associated with the dataset (not
 the stats_aggregator).

PiperOrigin-RevId: 215609159
---
 tensorflow/core/framework/dataset.h           | 22 +-----
 tensorflow/core/kernels/data/BUILD            |  1 +
 .../experimental/threadpool_dataset_op.cc     |  2 +-
 .../kernels/data/parse_example_dataset_op.cc  |  4 +-
 .../data/stats_aggregator_dataset_op.cc       | 78 +++++++++++++++++--
 .../core/kernels/data/stats_aggregator_ops.cc | 11 +--
 .../core/ops/compat/ops_history.v1.pbtxt      |  8 ++
 tensorflow/core/ops/dataset_ops.cc            |  2 +
 .../kernel_tests/stats_dataset_ops_test.py    | 69 ++++++++++++++++
 .../python/data/experimental/ops/stats_ops.py | 17 +++-
 .../v1/tensorflow.data.experimental.pbtxt     |  2 +-
 .../v2/tensorflow.data.experimental.pbtxt     |  2 +-
 12 files changed, 179 insertions(+), 39 deletions(-)

diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 8c1151cb56..964a7d5f8c 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -278,15 +278,8 @@ class IteratorContext {
     // Function call support.
     std::function<void(std::function<void()>)> runner = nullptr;
 
-    // A function that returns the current `StatsAggregator` instance to be
-    // used when recording statistics about the iterator.
-    //
-    // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator`
-    // is a property of the `IteratorResource` (which this class does not know
-    // about), and (ii) it can change after the `IteratorContext` has been
-    // created. Better suggestions are welcome!
-    std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter =
-        nullptr;
+    // The `StatsAggregator` object to record statistics about the iterator.
+    std::shared_ptr<StatsAggregator> stats_aggregator = nullptr;
 
     // The FunctionLibraryRuntime object to be used to make function calls.
     FunctionLibraryRuntime* lib = nullptr;
@@ -320,13 +313,6 @@ class IteratorContext {
     return &params_.runner;
   }
 
-  std::shared_ptr<StatsAggregator> stats_aggregator() {
-    if (params_.stats_aggregator_getter) {
-      return params_.stats_aggregator_getter();
-    } else {
-      return nullptr;
-    }
-  }
 
   std::shared_ptr<const FunctionLibraryDefinition> function_library() {
     return params_.function_library;
@@ -344,8 +330,8 @@ class IteratorContext {
     return params_.allocator_getter;
   }
 
-  std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter() {
-    return params_.stats_aggregator_getter;
+  std::shared_ptr<StatsAggregator> stats_aggregator() {
+    return params_.stats_aggregator;
   }
 
   std::shared_ptr<model::Model> model() { return params_.model; }
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 6333853cdf..451f8c1a6c 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -458,6 +458,7 @@ tf_kernel_library(
     srcs = ["stats_aggregator_dataset_op.cc"],
     deps = [
         ":dataset",
+        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib_internal",
     ],
diff --git a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
index c80493d3a1..8d561ca0e3 100644
--- a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
@@ -191,7 +191,7 @@ class ThreadPoolDatasetOp : public UnaryDatasetOpKernel {
         params.runner = [pool](std::function<void()> c) {
           pool->Schedule(std::move(c));
         };
-        params.stats_aggregator_getter = ctx->stats_aggregator_getter();
+        params.stats_aggregator = ctx->stats_aggregator();
         params.lib = ctx->lib();
         params.function_library = ctx->function_library();
         params.allocator_getter = ctx->allocator_getter();
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index c28c06da62..1d1a717062 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -253,7 +253,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
               for (example::PerExampleFeatureStats feature_stats :
                    example_result.feature_stats) {
                 stats_aggregator->AddToHistogram(
-                    strings::StrCat("record_stats", ":features"),
+                    "features",
                     {static_cast<double>(feature_stats.features_count)});
                 stats_aggregator->IncrementCounter(
                     "features_count", "trainer", feature_stats.features_count);
@@ -261,7 +261,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
                     "feature_values_count", "trainer",
                     feature_stats.feature_values_count);
                 stats_aggregator->AddToHistogram(
-                    strings::StrCat("record_stats", ":feature-values"),
+                    "feature-values",
                     {static_cast<double>(feature_stats.feature_values_count)});
               }
             }
diff --git a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
index c8abfb9eb5..c09a73fff1 100644
--- a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
@@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <memory>
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
@@ -22,6 +24,52 @@ namespace tensorflow {
 namespace data {
 namespace {
 
+class StatsAggregatorWithTagAndPrefix : public StatsAggregator {
+ public:
+  StatsAggregatorWithTagAndPrefix(
+      std::shared_ptr<StatsAggregator> stats_aggregator, const string& tag,
+      const string& prefix)
+      : wrapped_(stats_aggregator), tag_(tag), prefix_(prefix) {}
+
+  void AddToHistogram(const string& name,
+                      gtl::ArraySlice<double> values) override {
+    if (!tag_.empty()) {
+      wrapped_->AddToHistogram(strings::StrCat(tag_, "_", name), values);
+    } else {
+      wrapped_->AddToHistogram(name, values);
+    }
+  }
+
+  void AddScalar(const string& name, float value) override {
+    if (!tag_.empty()) {
+      wrapped_->AddScalar(strings::StrCat(tag_, "_", name), value);
+    } else {
+      wrapped_->AddScalar(name, value);
+    }
+  }
+
+  void EncodeToProto(Summary* out_summary) override {
+    wrapped_->EncodeToProto(out_summary);
+  }
+
+  void IncrementCounter(const string& name, const string& label,
+                        int64 val) override {
+    if (!prefix_.empty()) {
+      wrapped_->IncrementCounter(strings::StrCat(prefix_, "/", name), label,
+                                 val);
+    } else {
+      wrapped_->IncrementCounter(strings::StrCat("/tensorflow/", name), label,
+                                 val);
+    }
+  }
+
+ private:
+  std::shared_ptr<StatsAggregator> wrapped_;
+  string tag_;
+  string prefix_;
+  TF_DISALLOW_COPY_AND_ASSIGN(StatsAggregatorWithTagAndPrefix);
+};
+
 class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit SetStatsAggregatorDatasetOp(OpKernelConstruction* ctx)
@@ -33,8 +81,13 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 1),
                                        &stats_aggregator_resource));
     core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource);
+    string tag;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
+    string prefix;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "counter_prefix", &prefix));
 
-    *output = new Dataset(ctx, input, ctx->input(1), stats_aggregator_resource);
+    *output = new Dataset(ctx, input, ctx->input(1), stats_aggregator_resource,
+                          tag, prefix);
   }
 
  private:
@@ -42,11 +95,14 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
    public:
     explicit Dataset(OpKernelContext* ctx, const DatasetBase* input,
                      const Tensor& resource_handle,
-                     StatsAggregatorResource* stats_aggregator_resource)
+                     StatsAggregatorResource* stats_aggregator_resource,
+                     const string& tag, const string& prefix)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           resource_handle_(resource_handle),
-          stats_aggregator_resource_(stats_aggregator_resource) {
+          stats_aggregator_resource_(stats_aggregator_resource),
+          tag_(tag),
+          prefix_(prefix) {
       input_->Ref();
       stats_aggregator_resource_->Ref();
     }
@@ -81,8 +137,13 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* resource_handle_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddTensor(resource_handle_, &resource_handle_node));
+      Node* tag_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(tag_, &tag_node));
+      Node* prefix_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(prefix_, &prefix_node));
       TF_RETURN_IF_ERROR(b->AddDataset(
-          this, {input_graph_node, resource_handle_node}, output));
+          this, {input_graph_node, resource_handle_node, tag_node, prefix_node},
+          output));
       return Status::OK();
     }
 
@@ -105,9 +166,10 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
         IteratorContext::Params params;
         params.env = ctx->env();
         params.runner = *(ctx->runner());
-        params.stats_aggregator_getter = [stats_aggregator_resource]() {
-          return stats_aggregator_resource->stats_aggregator();
-        };
+        params.stats_aggregator = std::shared_ptr<StatsAggregator>(
+            new StatsAggregatorWithTagAndPrefix(
+                stats_aggregator_resource->stats_aggregator(), dataset()->tag_,
+                dataset()->prefix_));
         params.lib = ctx->lib();
         params.function_library = ctx->function_library();
         params.allocator_getter = ctx->allocator_getter();
@@ -136,6 +198,8 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     const DatasetBase* const input_;
     const Tensor resource_handle_;
     StatsAggregatorResource* stats_aggregator_resource_;
+    string tag_;
+    string prefix_;
   };
 };
 
diff --git a/tensorflow/core/kernels/data/stats_aggregator_ops.cc b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
index a7ded67876..2d51467616 100644
--- a/tensorflow/core/kernels/data/stats_aggregator_ops.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
@@ -82,11 +82,12 @@ class StatsAggregatorImpl : public StatsAggregator {
     auto counters_map = get_counters_map();
     if (counters_map->find(name) == counters_map->end()) {
       counters_map->emplace(
-          name, monitoring::Counter<1>::New(
-                    /*streamz name*/ "/tensorflow/" + name,
-                    /*streamz description*/
-                    name + " generated or consumed by the component.",
-                    /*streamz label name*/ "component_descriptor"));
+          name,
+          monitoring::Counter<1>::New(
+              /*streamz name*/ name,
+              /*streamz description*/
+              strings::StrCat(name, " generated or consumed by the component."),
+              /*streamz label name*/ "component_descriptor"));
     }
     counters_map->at(name)->GetCell(label)->IncrementBy(val);
   }
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 4845767405..33f18ae13f 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -59785,6 +59785,14 @@ op {
     name: "stats_aggregator"
     type: DT_RESOURCE
   }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "counter_prefix"
+    type: DT_STRING
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 71f4cc3c4c..889a6a4640 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -185,6 +185,8 @@ REGISTER_OP("ParseExampleDataset")
 REGISTER_OP("SetStatsAggregatorDataset")
     .Input("input_dataset: variant")
     .Input("stats_aggregator: resource")
+    .Input("tag: string")
+    .Input("counter_prefix: string")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 6761fbd16b..19f5a62d45 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_base
 from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
@@ -248,6 +249,74 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         sess.run(next_element)
       self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0)
 
+  def testMultipleDatasetWithTags(self):
+    stats_aggregator = stats_ops.StatsAggregator()
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency")).apply(
+            stats_ops.set_stats_aggregator(stats_aggregator, "dataset1"))
+    dataset2 = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency")).apply(
+            stats_ops.set_stats_aggregator(stats_aggregator, "dataset2"))
+    iterator_0 = dataset.make_initializable_iterator()
+    iterator_1 = dataset2.make_initializable_iterator()
+    next_element = iterator_0.get_next() + iterator_1.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run([iterator_0.initializer, iterator_1.initializer])
+      for i in range(100):
+        self.assertEqual(i * 2, sess.run(next_element))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "dataset1_record_latency", float(i + 1))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "dataset2_record_latency", float(i + 1))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "dataset1_record_latency", 100.0)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "dataset2_record_latency", 100.0)
+
+
+class FeatureStatsDatasetTest(
+    stats_dataset_test_base.StatsDatasetTestBase,
+    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase):
+
+  def testFeaturesStats(self):
+    num_epochs = 5
+    total_records = num_epochs * self._num_records
+    batch_size = 2
+    stats_aggregator = stats_ops.StatsAggregator()
+    dataset = self.make_batch_feature(
+        filenames=self.test_filenames[0],
+        num_epochs=num_epochs,
+        batch_size=batch_size,
+        shuffle=True,
+        shuffle_seed=5,
+        drop_final_batch=False).apply(
+            stats_ops.set_stats_aggregator(stats_aggregator, "record_stats"))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run(iterator.initializer)
+      for _ in range(total_records // batch_size + 1 if total_records %
+                     batch_size else total_records // batch_size):
+        sess.run(next_element)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "record_stats_features", total_records)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "record_stats_feature-values", total_records)
+      self._assertSummaryHasSum(
+          sess.run(summary_t), "record_stats_features", total_records * 4)
+      self._assertSummaryHasSum(
+          sess.run(summary_t), "record_stats_feature-values",
+          self._sum_keywords(1) * num_epochs + 3 * total_records)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/stats_ops.py b/tensorflow/python/data/experimental/ops/stats_ops.py
index c918d223e8..54ef6fc3e8 100644
--- a/tensorflow/python/data/experimental/ops/stats_ops.py
+++ b/tensorflow/python/data/experimental/ops/stats_ops.py
@@ -89,15 +89,19 @@ class StatsAggregator(object):
 class _SetStatsAggregatorDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that acts as an identity, and sets given stats_aggregator."""
 
-  def __init__(self, input_dataset, stats_aggregator):
+  def __init__(self, input_dataset, stats_aggregator, tag, prefix):
     super(_SetStatsAggregatorDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._stats_aggregator = stats_aggregator
+    self._tag = tag
+    self._prefix = prefix
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.set_stats_aggregator_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         self._stats_aggregator._resource,  # pylint: disable=protected-access
+        self._tag,
+        self._prefix,
         **dataset_ops.flat_structure(self))
 
   @property
@@ -114,11 +118,15 @@ class _SetStatsAggregatorDataset(dataset_ops.UnaryDataset):
 
 
 @tf_export("data.experimental.set_stats_aggregator")
-def set_stats_aggregator(stats_aggregator):
+def set_stats_aggregator(stats_aggregator, tag="", counter_prefix=""):
   """Set the given `stats_aggregator` for aggregating the input dataset stats.
 
   Args:
-    stats_aggregator: A `tf.data.experimental.StatsAggregator` object.
+    stats_aggregator: A `tf.contrib.data.StatsAggregator` object.
+    tag: (Optional) String, all statistics recorded for the input `dataset`
+      will have given `tag` prepend with the name.
+    counter_prefix: (Optional) String, all statistics recorded as `counters`
+      will have the given `prefix` for the counter. Defaults to "/tesorflow".
 
   Returns:
     A `Dataset` transformation function, which can be passed to
@@ -126,7 +134,8 @@ def set_stats_aggregator(stats_aggregator):
   """
 
   def _apply_fn(dataset):
-    return _SetStatsAggregatorDataset(dataset, stats_aggregator)
+    return _SetStatsAggregatorDataset(dataset, stats_aggregator, tag,
+                                      counter_prefix)
 
   return _apply_fn
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
index b14585f8d7..2a1f899dc0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "set_stats_aggregator"
-    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'stats_aggregator\', \'tag\', \'counter_prefix\'], varargs=None, keywords=None, defaults=[\'\', \'\'], "
   }
   member_method {
     name: "shuffle_and_repeat"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
index b14585f8d7..2a1f899dc0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "set_stats_aggregator"
-    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'stats_aggregator\', \'tag\', \'counter_prefix\'], varargs=None, keywords=None, defaults=[\'\', \'\'], "
   }
   member_method {
     name: "shuffle_and_repeat"
-- 
GitLab


From 7566f3d5ad690c71c36e78611b1ae5913ec3e845 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:22:52 -0700
Subject: [PATCH 1073/1357] Fix handling of tuples in CreateCopyWithNewLayout.

If the layout of a single tensor in a tuple is different from its use, then
CreateCopyWithNewLayout will do a deep copy of the entire tuple.  Not only does
this operation create unnecessary copies of elements where the layout is the
same, it will throw an error if the tuple contains elements like token[] that
cannot be copied.  As a result, layout assignment on TPU occassionally causes
mysterious compilation failures for code that runs correctly on CPU and GPU.

PiperOrigin-RevId: 215615731
---
 .../compiler/xla/service/layout_assignment.cc | 28 +++++----
 .../xla/service/layout_assignment_test.cc     | 59 +++++++++++++++++++
 2 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 68a08a0886..cc4a342e9d 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -792,21 +792,27 @@ StatusOr<HloInstruction*> LayoutAssignment::CreateCopyWithNewLayout(
       << " instruction: " << instruction->ToString();
 
   if (ShapeUtil::IsTuple(instruction->shape())) {
-    // Deep-copy tuples.
+    // Copy tuple elements which have differing layouts.
     std::vector<HloInstruction*> element_copies;
     for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape());
          ++i) {
+      const Shape& target_shape =
+          ShapeUtil::GetSubshape(shape_with_layout, {i});
+      const Shape& instr_shape =
+          ShapeUtil::GetSubshape(instruction->shape(), {i});
       HloInstruction* gte = instruction->parent()->AddInstruction(
-          HloInstruction::CreateGetTupleElement(
-              ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction,
-              i));
-      SetupCopiedInstruction(*instruction, gte, {i});
-      // Recurse to copy each elements.
-      TF_ASSIGN_OR_RETURN(
-          HloInstruction * element_copy,
-          CreateCopyWithNewLayout(
-              ShapeUtil::GetSubshape(shape_with_layout, {i}), gte));
-      element_copies.push_back(element_copy);
+          HloInstruction::CreateGetTupleElement(instr_shape, instruction, i));
+
+      if (ShapeUtil::Equal(target_shape, instr_shape)) {
+        // Shapes and layouts are equal, no need to copy.
+        element_copies.push_back(gte);
+      } else {
+        SetupCopiedInstruction(*instruction, gte, {i});
+        // Recurse to copy each element.
+        TF_ASSIGN_OR_RETURN(HloInstruction * element_copy,
+                            CreateCopyWithNewLayout(target_shape, gte));
+        element_copies.push_back(element_copy);
+      }
     }
     // Gather element copies into a tuple with a new Tuple instruction.
     HloInstruction* tuple_copy = instruction->parent()->AddInstruction(
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 15c16d667c..2c549cd872 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -1043,5 +1043,64 @@ TEST_F(LayoutAssignmentTest, PropagatingLayoutFromResultToOperand) {
                                     op::ShapeWithLayout(shape_copy))));
 }
 
+TEST_F(LayoutAssignmentTest, TupleCopyOnLayoutMismatch) {
+  // The first infeed uses layout {0,1}, while the second uses layout {1,0}.
+  // The mismatch forces a copy of the tuple.  The tuple contains a token, so
+  // layout assignment will fail if it tries to copy the whole tuple.
+  const char* module_str = R"(
+    HloModule TupleCopyOnLayoutMismatch
+
+    condition.1 (tup: (s32[], token[], f32[512,1024]{0,1})) -> pred[] {
+      tup.1 = (s32[], token[], f32[512,1024]{0,1}) parameter(0)
+      counter.1 = s32[] get-tuple-element(tup.1), index=0
+      five = s32[] constant(5)
+      ROOT lt = pred[] less-than(counter.1, five)
+    }
+
+    body.2 (tup: (s32[], token[], f32[512,1024]{0,1})) -> (s32[], token[], f32[512,1024]{0,1}) {
+      tup.2 = (s32[], token[], f32[512,1024]{0,1}) parameter(0)
+      counter.2 = s32[] get-tuple-element(tup.2), index=0
+      tok.2 = token[] get-tuple-element(tup.2), index=1
+
+      ifeed.2 = (f32[512,1024]{1,0}, token[]) infeed(tok.2)
+      next_tok = token[] get-tuple-element(ifeed.2), index=1
+      next_buf = f32[512,1024]{1,0} get-tuple-element(ifeed.2), index=0
+
+      one = s32[] constant(1)
+      next_counter = s32[] add(counter.2, one)
+      ROOT tup = (s32[], token[], f32[512,1024]{0,1}) tuple(next_counter, next_tok, next_buf)
+    }
+
+    ENTRY main () -> f32[512,1024]{0,1} {
+      start_tok = token[] after-all()
+
+      ifeed.3 = (f32[512,1024]{0,1}, token[]) infeed(start_tok)
+      itok = token[] get-tuple-element(ifeed.3), index=1
+      ibuf = f32[512,1024]{0,1} get-tuple-element(ifeed.3), index=0
+
+      zero = s32[] constant(0)
+      itup = (s32[], token[], f32[512,1024]{0,1}) tuple(zero, itok, ibuf)
+
+      loop = (s32[], token[], f32[512,1024]{0,1}) while(itup), condition=condition.1, body=body.2
+      ROOT result = f32[512,1024]{0,1} get-tuple-element(loop), index=2
+    }
+  )";
+
+  ParseAndVerifyModule(module_str);
+  ComputationLayout computation_layout(
+      module().entry_computation()->ComputeProgramShape());
+
+  // Sanity check to verify that there's a layout mismatch.
+  EXPECT_THAT(LayoutOf(&module(), "ibuf"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0));
+
+  AssignLayouts(&module(), &computation_layout);
+
+  // Make sure that layout assignment did not magically eliminate the mismatch,
+  // in which case the test didn't prove anything.
+  EXPECT_THAT(LayoutOf(&module(), "ibuf"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From c2c8cfe22492cf7fab804d32283b623632270035 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:25:22 -0700
Subject: [PATCH 1074/1357] Add the option of merging bidirectional RNN and
 LSTM outputs into a single output tensor.

This is useful if the output of both directions will be passed to the next layer as a single output, as it avoids adding a concatenation op, which can be expensive on mobile devices where memory movement is relatively expensive.

PiperOrigin-RevId: 215616140
---
 tensorflow/contrib/lite/c/builtin_op_data.h   |  16 ++
 .../contrib/lite/c/builtin_op_data_test.cc    |   2 +
 .../lite/core/api/flatbuffer_conversions.cc   |  34 ++-
 .../kernels/bidirectional_sequence_lstm.cc    | 116 +++++----
 .../bidirectional_sequence_lstm_test.cc       | 186 +++++++++++++-
 .../kernels/bidirectional_sequence_rnn.cc     |  85 +++---
 .../bidirectional_sequence_rnn_test.cc        |  56 +++-
 tensorflow/contrib/lite/schema/schema.fbs     |  12 +
 .../contrib/lite/schema/schema_generated.h    | 243 +++++++++++++++++-
 9 files changed, 640 insertions(+), 110 deletions(-)

diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h
index be9d551ee4..44daf7adaa 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data.h
+++ b/tensorflow/contrib/lite/c/builtin_op_data.h
@@ -99,6 +99,12 @@ typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteSequenceRNNParams;
 
+typedef struct {
+  bool time_major;
+  TfLiteFusedActivation activation;
+  bool merge_outputs;
+} TfLiteBidirectionalSequenceRNNParams;
+
 typedef enum {
   kTfLiteFullyConnectedWeightsFormatDefault = 0,
   kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
@@ -180,6 +186,16 @@ typedef struct {
   TfLiteLSTMKernelType kernel_type;
 } TfLiteLSTMParams;
 
+typedef struct {
+  // Parameters for the LSTM kernel.
+  TfLiteFusedActivation activation;
+  float cell_clip;
+  float proj_clip;
+
+  // If true, store the outputs of both directions in the first output.
+  bool merge_outputs;
+} TfLiteBidirectionalSequenceLSTMParams;
+
 typedef struct {
   bool align_corners;
 } TfLiteResizeBilinearParams;
diff --git a/tensorflow/contrib/lite/c/builtin_op_data_test.cc b/tensorflow/contrib/lite/c/builtin_op_data_test.cc
index 4d0ba75e68..ba458b4252 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data_test.cc
+++ b/tensorflow/contrib/lite/c/builtin_op_data_test.cc
@@ -73,6 +73,8 @@ TEST(IntArray, CanCompileStructs) {
   TfLiteFakeQuantParams fake_quant_params;
   TfLitePackParams pack_params;
   TfLiteOneHotParams one_hot_params;
+  TfLiteBidirectionalSequenceRNNParams bidi_sequence_rnn_params;
+  TfLiteBidirectionalSequenceLSTMParams bidi_sequence_lstm_params;
 }
 
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index e6900e0950..eac7db9a88 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -224,10 +224,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
-    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN:
     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: {
-      TfLiteSequenceRNNParams* params =
-          allocator->AllocatePOD<TfLiteSequenceRNNParams>();
+      auto params = allocator->AllocatePOD<TfLiteSequenceRNNParams>();
       if (auto* sequence_rnn_params =
               op->builtin_options_as_SequenceRNNOptions()) {
         params->activation =
@@ -237,6 +235,19 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: {
+      auto params =
+          allocator->AllocatePOD<TfLiteBidirectionalSequenceRNNParams>();
+      if (auto* bidi_sequence_rnn_params =
+              op->builtin_options_as_BidirectionalSequenceRNNOptions()) {
+        params->activation = parse_activation(
+            bidi_sequence_rnn_params->fused_activation_function());
+        params->time_major = bidi_sequence_rnn_params->time_major();
+        params->merge_outputs = bidi_sequence_rnn_params->merge_outputs();
+      }
+      *builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_RNN: {
       TfLiteRNNParams* params = allocator->AllocatePOD<TfLiteRNNParams>();
       if (auto* rnn_params = op->builtin_options_as_RNNOptions()) {
@@ -360,10 +371,9 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
-    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_LSTM: {
-      TfLiteLSTMParams* params = allocator->AllocatePOD<TfLiteLSTMParams>();
+      auto params = allocator->AllocatePOD<TfLiteLSTMParams>();
       if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
         params->activation =
             parse_activation(lstm_params->fused_activation_function());
@@ -381,6 +391,20 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
+      auto params =
+          allocator->AllocatePOD<TfLiteBidirectionalSequenceLSTMParams>();
+      if (auto* bidi_lstm_params =
+              op->builtin_options_as_BidirectionalSequenceLSTMOptions()) {
+        params->activation =
+            parse_activation(bidi_lstm_params->fused_activation_function());
+        params->cell_clip = bidi_lstm_params->cell_clip();
+        params->proj_clip = bidi_lstm_params->proj_clip();
+        params->merge_outputs = bidi_lstm_params->merge_outputs();
+      }
+      *builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_RESIZE_BILINEAR: {
       auto* params = allocator->AllocatePOD<TfLiteResizeBilinearParams>();
       if (auto* schema_params =
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 66b947771c..0532528f52 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -119,7 +119,7 @@ constexpr int kBwAuxInputToOutputWeightsTensor = 47;  // Optional
 
 // Output tensors.
 constexpr int kFwOutputTensor = 0;
-constexpr int kBwOutputTensor = 1;
+constexpr int kBwOutputTensor = 1;  // Ignored if merge_outputs is set.
 
 // Temporary tensors.
 enum TemporaryTensor {
@@ -162,7 +162,8 @@ TfLiteStatus CheckLstmTensorDimensions(
     int input_gate_bias_tensor, int forget_gate_bias_tensor,
     int cell_gate_bias_tensor, int output_gate_bias_tensor,
     int projection_weights_tensor, int projection_bias_tensor) {
-  const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
+      node->builtin_data);
 
   // Making sure clipping parameters have valid values.
   // == 0 means no clipping
@@ -347,10 +348,13 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 // tensors. Also check that the size of the input tensors match each other.
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   int* scratch_tensor_index = reinterpret_cast<int*>(node->user_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
+      node->builtin_data);
 
   // Check we have all the inputs and outputs we need.
   TF_LITE_ENSURE_EQ(context, node->inputs->size, 48);
-  TF_LITE_ENSURE_EQ(context, node->outputs->size, 2);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size,
+                    params->merge_outputs ? 1 : 2);
 
   // Inferring batch size, number of outputs and sequence length and
   // number of cells from the input tensors.
@@ -368,6 +372,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, fw_input_to_output_weights->dims->data[1],
                     n_input);
 
+  const TfLiteTensor* bw_input_to_output_weights =
+      GetInput(context, node, kBwInputToOutputWeightsTensor);
+  const int n_bw_cell = bw_input_to_output_weights->dims->data[0];
+  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->data[1],
+                    n_input);
+
   const TfLiteTensor* fw_recurrent_to_output_weights =
       GetInput(context, node, kFwRecurrentToOutputWeightsTensor);
   TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->size, 2);
@@ -375,6 +386,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                     n_fw_cell);
   const int n_fw_output = fw_recurrent_to_output_weights->dims->data[1];
 
+  const TfLiteTensor* bw_recurrent_to_output_weights =
+      GetInput(context, node, kBwRecurrentToOutputWeightsTensor);
+  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->data[0],
+                    n_bw_cell);
+  const int n_bw_output = bw_recurrent_to_output_weights->dims->data[1];
+
   // Check that input tensor dimensions matches with each other.
   TF_LITE_ENSURE_OK(
       context, CheckInputTensorDimensions(context, node, n_input, n_fw_output,
@@ -440,7 +458,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TfLiteIntArray* fw_output_size = TfLiteIntArrayCreate(3);
   fw_output_size->data[0] = max_time;
   fw_output_size->data[1] = n_batch;
-  fw_output_size->data[2] = n_fw_output;
+  fw_output_size->data[2] =
+      params->merge_outputs ? n_bw_output + n_fw_output : n_fw_output;
   TF_LITE_ENSURE_OK(context,
                     context->ResizeTensor(context, fw_output, fw_output_size));
 
@@ -479,39 +498,28 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, fw_scratch_buffer,
                                                    fw_scratch_buffer_size));
   // Same for the backward cell.
-  const TfLiteTensor* bw_input_to_output_weights =
-      GetInput(context, node, kBwInputToOutputWeightsTensor);
-  const int n_bw_cell = bw_input_to_output_weights->dims->data[0];
-  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->data[1],
-                    n_input);
-
-  const TfLiteTensor* bw_recurrent_to_output_weights =
-      GetInput(context, node, kBwRecurrentToOutputWeightsTensor);
-  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->data[0],
-                    n_bw_cell);
-  const int n_bw_output = bw_recurrent_to_output_weights->dims->data[1];
 
   // Check that input tensor dimensions matches with each other.
   TF_LITE_ENSURE_OK(
       context, CheckInputTensorDimensions(context, node, n_input, n_bw_output,
                                           n_bw_cell));
 
-  // Get the pointer to output, activation_state and cell_state buffer tensors.
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+  // Get the pointer to activation_state and cell_state buffer tensors.
   TfLiteTensor* bw_activation_state =
       GetVariableInput(context, node, kBwInputActivationStateTensor);
   TfLiteTensor* bw_cell_state =
       GetVariableInput(context, node, kBwInputCellStateTensor);
 
   // Resize the output tensors.
-  TfLiteIntArray* bw_output_size = TfLiteIntArrayCreate(3);
-  bw_output_size->data[0] = max_time;
-  bw_output_size->data[1] = n_batch;
-  bw_output_size->data[2] = n_bw_output;
-  TF_LITE_ENSURE_OK(context,
-                    context->ResizeTensor(context, bw_output, bw_output_size));
+  if (!params->merge_outputs) {
+    TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+    TfLiteIntArray* bw_output_size = TfLiteIntArrayCreate(3);
+    bw_output_size->data[0] = max_time;
+    bw_output_size->data[1] = n_batch;
+    bw_output_size->data[2] = n_bw_output;
+    TF_LITE_ENSURE_OK(
+        context, context->ResizeTensor(context, bw_output, bw_output_size));
+  }
 
   // Check the shape of input state tensors.
   // These tensor may be 1D or 2D. It's fine as long as the total size is
@@ -705,7 +713,7 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
     TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
     TfLiteTensor* cell_state, TfLiteTensor* output) {
   const int max_time = input->dims->data[0];
@@ -771,12 +779,13 @@ TfLiteStatus EvalFloat(
 
   // Loop through the sequence.
   const int input_step = n_batch * n_input;
-  const int output_step = n_batch * n_output;
+  const int output_step = n_batch * output->dims->data[2];
   for (int t = 0; t < max_time; t++) {
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
     const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr_time = output->data.f + t_rel * output_step;
+    float* output_ptr_time =
+        output->data.f + t_rel * output_step + output_offset;
 
     kernel_utils::LstmStepWithAuxInput(
         input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
@@ -816,7 +825,7 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
     TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
     TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
     TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
@@ -972,12 +981,12 @@ TfLiteStatus EvalHybrid(
 
   // Feed the sequence into the LSTM step-by-step.
   const int input_step = n_batch * n_input;
-  const int output_step = n_batch * n_output;
+  const int output_step = n_batch * output->dims->data[2];
   for (int t = 0; t < max_time; t++) {
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
     const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr = output->data.f + t_rel * output_step;
+    float* output_ptr = output->data.f + t_rel * output_step + output_offset;
 
     kernel_utils::LstmStepWithAuxInput(
         input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
@@ -1011,7 +1020,8 @@ TfLiteStatus EvalHybrid(
 
 // The LSTM Op engine.
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
+      node->builtin_data);
 
   // Input tensor.
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
@@ -1107,7 +1117,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       GetVariableInput(context, node, kBwInputActivationStateTensor);
   TfLiteTensor* bw_cell_state =
       GetVariableInput(context, node, kBwInputCellStateTensor);
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+  TfLiteTensor* bw_output = params->merge_outputs
+                                ? nullptr
+                                : GetOutput(context, node, kBwOutputTensor);
 
   // Temporary tensors.
   TfLiteTensor* fw_scratch_buffer =
@@ -1135,6 +1147,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* bw_aux_input_to_output_weights =
       GetOptionalInputTensor(context, node, kBwAuxInputToOutputWeightsTensor);
 
+  // Populate a TfLiteLSTMParams struct for the evaluation functions.
+  TfLiteLSTMParams lstm_params = {params->activation, params->cell_clip,
+                                  params->proj_clip, kTfLiteLSTMFullKernel};
+
+  const int bw_output_offset =
+      params->merge_outputs ? fw_recurrent_to_output_weights->dims->data[1] : 0;
+  const auto actual_bw_output = params->merge_outputs ? fw_output : bw_output;
+
   switch (fw_input_to_output_weights->type) {
     case kTfLiteFloat32: {
       TfLiteStatus fw_pass_status = EvalFloat(
@@ -1147,9 +1167,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
           fw_aux_input_to_output_weights, fw_input_gate_bias,
           fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
-          fw_projection_weights, fw_projection_bias, params,
-          /*forward_sequence=*/true, fw_scratch_buffer, fw_activation_state,
-          fw_cell_state, fw_output);
+          fw_projection_weights, fw_projection_bias, &lstm_params,
+          /*forward_sequence=*/true, /*output_offset=*/0, fw_scratch_buffer,
+          fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
       TfLiteStatus bw_pass_status = EvalFloat(
@@ -1162,9 +1182,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_aux_input_to_forget_weights, bw_aux_input_to_cell_weights,
           bw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
-          bw_projection_weights, bw_projection_bias, params,
-          /*forward_sequence=*/false, bw_scratch_buffer, bw_activation_state,
-          bw_cell_state, bw_output);
+          bw_projection_weights, bw_projection_bias, &lstm_params,
+          /*forward_sequence=*/false, bw_output_offset, bw_scratch_buffer,
+          bw_activation_state, bw_cell_state, actual_bw_output);
       TF_LITE_ENSURE_OK(context, bw_pass_status);
       return kTfLiteOk;
     }
@@ -1198,10 +1218,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
           fw_aux_input_to_output_weights, fw_input_gate_bias,
           fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
-          fw_projection_weights, fw_projection_bias, params,
-          /*forward_sequence=*/true, fw_scratch_buffer, scaling_factors,
-          prod_scaling_factors, recovered_cell_weights, input_quantized,
-          aux_input_quantized, fw_activation_state_quantized,
+          fw_projection_weights, fw_projection_bias, &lstm_params,
+          /*forward_sequence=*/true, /*output_offset=*/0, fw_scratch_buffer,
+          scaling_factors, prod_scaling_factors, recovered_cell_weights,
+          input_quantized, aux_input_quantized, fw_activation_state_quantized,
           fw_cell_state_quantized, fw_activation_state, fw_cell_state,
           fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
@@ -1216,12 +1236,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
           fw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
-          bw_projection_weights, bw_projection_bias, params,
-          /*forward_sequence=*/false, bw_scratch_buffer, scaling_factors,
-          prod_scaling_factors, recovered_cell_weights, input_quantized,
-          aux_input_quantized, bw_activation_state_quantized,
+          bw_projection_weights, bw_projection_bias, &lstm_params,
+          /*forward_sequence=*/false, bw_output_offset, bw_scratch_buffer,
+          scaling_factors, prod_scaling_factors, recovered_cell_weights,
+          input_quantized, aux_input_quantized, bw_activation_state_quantized,
           bw_cell_state_quantized, bw_activation_state, bw_cell_state,
-          bw_output);
+          actual_bw_output);
       TF_LITE_ENSURE_OK(context, bw_pass_status);
       return kTfLiteOk;
     }
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
index 74ba8021c2..9cc04907e1 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
@@ -35,8 +35,8 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
   BidirectionalLSTMOpModel(int n_batch, int n_input, int n_cell, int n_output,
                            int sequence_length, bool use_cifg,
                            bool use_peephole, bool use_projection_weights,
-                           bool use_projection_bias, float cell_clip,
-                           float proj_clip,
+                           bool use_projection_bias, bool merge_outputs,
+                           float cell_clip, float proj_clip,
                            const std::vector<std::vector<int>>& input_shapes)
       : n_batch_(n_batch),
         n_input_(n_input),
@@ -175,7 +175,9 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
 
     fw_output_ = AddOutput(TensorType_FLOAT32);
 
-    bw_output_ = AddOutput(TensorType_FLOAT32);
+    if (!merge_outputs) {
+      bw_output_ = AddOutput(TensorType_FLOAT32);
+    }
 
     aux_input_ = AddNullInput();
     fw_aux_input_to_input_weights_ = AddNullInput();
@@ -188,9 +190,10 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
     bw_aux_input_to_output_weights_ = AddNullInput();
 
     SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
-                 BuiltinOptions_LSTMOptions,
-                 CreateLSTMOptions(builder_, ActivationFunctionType_TANH,
-                                   cell_clip, proj_clip)
+                 BuiltinOptions_BidirectionalSequenceLSTMOptions,
+                 CreateBidirectionalSequenceLSTMOptions(
+                     builder_, ActivationFunctionType_TANH, cell_clip,
+                     proj_clip, merge_outputs)
                      .Union());
     BuildInterpreter(input_shapes);
   }
@@ -380,7 +383,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -526,6 +530,162 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
               ElementsAreArray(ArrayFloatNear(bw_expected)));
 }
 
+// Same as the previous test, yet with a single merged output tensor.
+TEST(LSTMOpTest, BlackBoxTestMergedOutput) {
+  const int n_batch = 1;
+  const int n_input = 2;
+  // n_cell and n_output have the same size when there is no projection.
+  const int n_cell = 4;
+  const int n_output = 4;
+  const int sequence_length = 3;
+
+  BidirectionalLSTMOpModel lstm(
+      n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
+      /*use_peephole=*/false, /*use_projection_weights=*/false,
+      /*use_projection_bias=*/false, /*merge_outputs=*/true, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
+      {
+          {sequence_length, n_batch, n_input},  // input tensor
+
+          // Forward cell
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},  // cell_to_input_weight tensor
+          {0},  // cell_to_forget_weight tensor
+          {0},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {0, 0},  // projection_weight tensor
+          {0},     // projection_bias tensor
+
+          // Backward cell
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},  // cell_to_input_weight tensor
+          {0},  // cell_to_forget_weight tensor
+          {0},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {0, 0},  // projection_weight tensor
+          {0},     // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, sequence_length, 0},  // aux_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_forget tensor
+          {n_cell, 0},                    // aux_fw_input_to_cell tensor
+          {n_cell, 0},                    // aux_fw_input_to_output tensor
+          {n_cell, 0},                    // aux_bw_input_to_input tensor
+          {n_cell, 0},                    // aux_bw_input_to_forget tensor
+          {n_cell, 0},                    // aux_bw_input_to_cell tensor
+          {n_cell, 0},                    // aux_bw_input_to_output tensor
+      });
+
+  lstm.SetInputToInputWeights({-0.45018822, -0.02338299, -0.0870589,
+                               -0.34550029, 0.04266912, -0.15680569,
+                               -0.34856534, 0.43890524});
+
+  lstm.SetInputToCellWeights({-0.50013041, 0.1370284, 0.11810488, 0.2013163,
+                              -0.20583314, 0.44344562, 0.22077113,
+                              -0.29909778});
+
+  lstm.SetInputToForgetWeights({0.09701663, 0.20334584, -0.50592935,
+                                -0.31343272, -0.40032279, 0.44781327,
+                                0.01387155, -0.35593212});
+
+  lstm.SetInputToOutputWeights({-0.25065863, -0.28290087, 0.04613829,
+                                0.40525138, 0.44272184, 0.03897077, -0.1556896,
+                                0.19487578});
+
+  lstm.SetInputGateBias({0., 0., 0., 0.});
+
+  lstm.SetCellBias({0., 0., 0., 0.});
+
+  lstm.SetForgetGateBias({1., 1., 1., 1.});
+
+  lstm.SetOutputGateBias({0., 0., 0., 0.});
+
+  lstm.SetRecurrentToInputWeights(
+      {-0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324,
+       -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322,
+       -0.12528998, 0.24077177, -0.51332325, -0.33502164, 0.10629296});
+
+  lstm.SetRecurrentToCellWeights(
+      {-0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841,
+       -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659,
+       -0.46367589, 0.26016325, -0.03894562, -0.16368064});
+
+  lstm.SetRecurrentToForgetWeights(
+      {-0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892,
+       -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436,
+       0.28053468, 0.01560611, -0.20127171, -0.01140004});
+
+  lstm.SetRecurrentToOutputWeights(
+      {0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793,
+       0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421,
+       -0.51818722, -0.15390486, 0.0468148, 0.39922136});
+
+  // Input should have n_input * sequence_length many values.
+  static float lstm_input[] = {2., 3., 3., 4., 1., 1.};
+  static float lstm_fw_golden_output[] = {
+      -0.02973187, 0.1229473,  0.20885126, -0.15358765,
+      -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+      -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+  static float lstm_bw_golden_output[] = {
+      -0.0806187, 0.139077, 0.400476,   -0.197842, -0.0332076, 0.123838,
+      0.309777,   -0.17621, -0.0490733, 0.0739237, 0.067706,   -0.0208124};
+
+  float* batch0_start = lstm_input;
+  float* batch0_end = batch0_start + lstm.num_inputs() * lstm.sequence_length();
+
+  lstm.SetInput(0, batch0_start, batch0_end);
+
+  lstm.Invoke();
+
+  std::vector<float> merged_expected;
+  for (int k = 0; k < lstm.sequence_length(); k++) {
+    merged_expected.insert(
+        merged_expected.end(),
+        lstm_fw_golden_output + k * lstm.num_fw_outputs(),
+        lstm_fw_golden_output + (k + 1) * lstm.num_fw_outputs());
+    merged_expected.insert(
+        merged_expected.end(),
+        lstm_bw_golden_output + k * lstm.num_bw_outputs(),
+        lstm_bw_golden_output + (k + 1) * lstm.num_bw_outputs());
+  }
+  EXPECT_THAT(lstm.GetFwOutput(),
+              ElementsAreArray(ArrayFloatNear(merged_expected)));
+}
+
 TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
   const int n_batch = 1;
   const int n_input = 2;
@@ -537,7 +697,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -696,7 +857,8 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -845,7 +1007,8 @@ TEST(LSTMOpTest,
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -994,7 +1157,8 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/true, /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index 2f896c5289..9f62ac3f2c 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -47,7 +47,7 @@ constexpr int kFwAuxWeightsTensor = 10;  // Optional.
 constexpr int kBwAuxWeightsTensor = 11;  // Optional.
 // Output tensors.
 constexpr int kFwOutputTensor = 0;
-constexpr int kBwOutputTensor = 1;
+constexpr int kBwOutputTensor = 1;  // Only if merge_outputs is false.
 
 // Temporary tensors.
 enum TemporaryTensor {
@@ -70,9 +70,13 @@ void Free(TfLiteContext* context, void* buffer) {
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceRNNParams*>(
+      node->builtin_data);
+
   // Check we have all the inputs and outputs we need.
   TF_LITE_ENSURE_EQ(context, node->inputs->size, 12);
-  TF_LITE_ENSURE_EQ(context, node->outputs->size, 2);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size,
+                    params->merge_outputs ? 1 : 2);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* fw_input_weights =
@@ -142,9 +146,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       bw_aux_input_weights->dims->data[1]);
   }
 
-  TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
-
   const bool is_hybrid_op =
       (fw_input_weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32);
 
@@ -233,18 +234,23 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   // Resize outputs.
+  TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
   TfLiteIntArray* fw_output_size_array = TfLiteIntArrayCreate(3);
   fw_output_size_array->data[0] = batch_size;
   fw_output_size_array->data[1] = max_time;
-  fw_output_size_array->data[2] = fw_num_units;
+  fw_output_size_array->data[2] =
+      params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   TF_LITE_ENSURE_OK(
       context, context->ResizeTensor(context, fw_output, fw_output_size_array));
-  TfLiteIntArray* bw_output_size_array = TfLiteIntArrayCreate(3);
-  bw_output_size_array->data[0] = batch_size;
-  bw_output_size_array->data[1] = max_time;
-  bw_output_size_array->data[2] = bw_num_units;
-  TF_LITE_ENSURE_OK(
-      context, context->ResizeTensor(context, bw_output, bw_output_size_array));
+  if (!params->merge_outputs) {
+    TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+    TfLiteIntArray* bw_output_size_array = TfLiteIntArrayCreate(3);
+    bw_output_size_array->data[0] = batch_size;
+    bw_output_size_array->data[1] = max_time;
+    bw_output_size_array->data[2] = bw_num_units;
+    TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, bw_output,
+                                                     bw_output_size_array));
+  }
 
   return kTfLiteOk;
 }
@@ -256,9 +262,9 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* bw_recurrent_weights, const TfLiteTensor* bw_bias,
     const TfLiteTensor* aux_input, const TfLiteTensor* fw_aux_input_weights,
     const TfLiteTensor* bw_aux_input_weights,
-    const TfLiteSequenceRNNParams* params, TfLiteTensor* fw_hidden_state,
-    TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state,
-    TfLiteTensor* bw_output) {
+    const TfLiteBidirectionalSequenceRNNParams* params,
+    TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
+    TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) {
   const int batch_size = input->dims->data[0];
   const int max_time = input->dims->data[1];
   const int input_size = input->dims->data[2];
@@ -281,10 +287,15 @@ TfLiteStatus EvalFloat(
                                               ? bw_aux_input_weights->data.f
                                               : nullptr;
 
+  const int fw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
+  const int bw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
   for (int b = 0; b < batch_size; b++) {
     // Forward cell.
     float* fw_hidden_state_ptr_batch =
         fw_hidden_state->data.f + b * fw_num_units;
+    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
     for (int s = 0; s < max_time; s++) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -292,8 +303,7 @@ TfLiteStatus EvalFloat(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          fw_output->data.f + b * fw_num_units * max_time + s * fw_num_units;
+      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
@@ -304,6 +314,10 @@ TfLiteStatus EvalFloat(
     // Backward cell.
     float* bw_hidden_state_ptr_batch =
         bw_hidden_state->data.f + b * bw_num_units;
+    float* bw_output_offset =
+        params->merge_outputs
+            ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units
+            : bw_output->data.f + b * bw_output_step * max_time;
     for (int s = max_time - 1; s >= 0; s--) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -311,8 +325,7 @@ TfLiteStatus EvalFloat(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          bw_output->data.f + b * bw_num_units * max_time + s * bw_num_units;
+      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
@@ -331,11 +344,12 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* bw_recurrent_weights, const TfLiteTensor* bw_bias,
     const TfLiteTensor* aux_input, const TfLiteTensor* aux_fw_input_weights,
     const TfLiteTensor* aux_bw_input_weights,
-    const TfLiteSequenceRNNParams* params, TfLiteTensor* scaling_factors,
-    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
-    TfLiteTensor* fw_hidden_state_quantized, TfLiteTensor* fw_hidden_state,
-    TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state_quantized,
-    TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) {
+    const TfLiteBidirectionalSequenceRNNParams* params,
+    TfLiteTensor* scaling_factors, TfLiteTensor* input_quantized,
+    TfLiteTensor* aux_input_quantized, TfLiteTensor* fw_hidden_state_quantized,
+    TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
+    TfLiteTensor* bw_hidden_state_quantized, TfLiteTensor* bw_hidden_state,
+    TfLiteTensor* bw_output) {
   const int batch_size = input->dims->data[0];
   const int max_time = input->dims->data[1];
   const int input_size = input->dims->data[2];
@@ -384,10 +398,15 @@ TfLiteStatus EvalHybrid(
       reinterpret_cast<int8_t*>(bw_hidden_state_quantized->data.uint8);
   float* scaling_factors_ptr = scaling_factors->data.f;
 
+  const int fw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
+  const int bw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
   for (int b = 0; b < batch_size; b++) {
     // Forward cell.
     float* fw_hidden_state_ptr_batch =
         fw_hidden_state->data.f + b * fw_num_units;
+    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
     for (int s = 0; s < max_time; s++) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -395,8 +414,7 @@ TfLiteStatus EvalHybrid(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          fw_output->data.f + b * fw_num_units * max_time + s * fw_num_units;
+      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
@@ -411,6 +429,10 @@ TfLiteStatus EvalHybrid(
     // Backward cell.
     float* bw_hidden_state_ptr_batch =
         bw_hidden_state->data.f + b * bw_num_units;
+    float* bw_output_offset =
+        params->merge_outputs
+            ? fw_output->data.f + b * bw_output_step * max_time
+            : bw_output->data.f + b * bw_output_step * max_time;
     for (int s = max_time - 1; s >= 0; s--) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -418,8 +440,7 @@ TfLiteStatus EvalHybrid(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          bw_output->data.f + b * bw_num_units * max_time + s * bw_num_units;
+      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
@@ -436,8 +457,8 @@ TfLiteStatus EvalHybrid(
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const auto* params =
-      reinterpret_cast<TfLiteSequenceRNNParams*>(node->builtin_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceRNNParams*>(
+      node->builtin_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* fw_input_weights =
@@ -465,7 +486,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       GetVariableInput(context, node, kBwHiddenStateTensor);
 
   TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+  TfLiteTensor* bw_output = params->merge_outputs
+                                ? nullptr
+                                : GetOutput(context, node, kBwOutputTensor);
 
   switch (fw_input_weights->type) {
     case kTfLiteFloat32:
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
index 3e34ba6196..f555c472f5 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
@@ -654,7 +654,7 @@ const std::initializer_list<float> recurrent_weights = {
 class BidirectionalRNNOpModel : public SingleOpModel {
  public:
   BidirectionalRNNOpModel(int batches, int sequence_len, int fw_units,
-                          int bw_units, int input_size)
+                          int bw_units, int input_size, bool merge_outputs)
       : batches_(batches),
         sequence_len_(sequence_len),
         fw_units_(fw_units),
@@ -675,12 +675,15 @@ class BidirectionalRNNOpModel : public SingleOpModel {
     aux_bw_weights_ = AddNullInput();
 
     fw_output_ = AddOutput(TensorType_FLOAT32);
-    bw_output_ = AddOutput(TensorType_FLOAT32);
+    if (!merge_outputs) {
+      bw_output_ = AddOutput(TensorType_FLOAT32);
+    }
 
     SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
-                 BuiltinOptions_SequenceRNNOptions,
-                 CreateSequenceRNNOptions(builder_, /*time_major=*/false,
-                                          ActivationFunctionType_RELU)
+                 BuiltinOptions_BidirectionalSequenceRNNOptions,
+                 CreateBidirectionalSequenceRNNOptions(
+                     builder_, /*time_major=*/false,
+                     ActivationFunctionType_RELU, merge_outputs)
                      .Union());
     BuildInterpreter({
         {batches_, sequence_len_, input_size_},  // input
@@ -767,7 +770,7 @@ class BidirectionalRNNOpModel : public SingleOpModel {
 TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8);
+                              /*input_size=*/8, /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -800,12 +803,49 @@ TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   EXPECT_THAT(rnn.GetBwOutput(), ElementsAreArray(ArrayFloatNear(bw_expected)));
 }
 
+// Same as the previous test, yet with merged outputs.
+TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) {
+  BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
+                              /*fw_units=*/16, /*bw_units=*/16,
+                              /*input_size=*/8, /*merge_outputs=*/true);
+  rnn.SetFwWeights(weights);
+  rnn.SetBwWeights(weights);
+  rnn.SetFwBias(biases);
+  rnn.SetBwBias(biases);
+  rnn.SetFwRecurrentWeights(recurrent_weights);
+  rnn.SetBwRecurrentWeights(recurrent_weights);
+
+  const int input_sequence_size = rnn.input_size() * rnn.sequence_len();
+  float* batch_start = rnn_input;
+  float* batch_end = batch_start + input_sequence_size;
+  rnn.SetInput(0, batch_start, batch_end);
+  rnn.SetInput(input_sequence_size, batch_start, batch_end);
+
+  rnn.Invoke();
+
+  std::vector<float> merged_expected;
+  for (int bid = 0; bid < rnn.num_batches(); bid++) {
+    for (int step = 0; step < rnn.sequence_len(); step++) {
+      merged_expected.insert(
+          merged_expected.end(),
+          rnn_golden_fw_output + rnn.num_fw_units() * step,
+          rnn_golden_fw_output + rnn.num_fw_units() * (step + 1));
+      merged_expected.insert(
+          merged_expected.end(),
+          rnn_golden_bw_output + rnn.num_bw_units() * step,
+          rnn_golden_bw_output + rnn.num_bw_units() * (step + 1));
+    }
+  }
+  EXPECT_THAT(rnn.GetFwOutput(),
+              ElementsAreArray(ArrayFloatNear(merged_expected)));
+}
+
 // Check that if the input sequence is reversed the outputs are the same just
 // forward and backward are swapped (and reversed).
 TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8);
+                              /*input_size=*/8, /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -851,7 +891,7 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
 TEST(BidirectionalRNNOpTest, EndToEndTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/1, /*sequence_len=*/4,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8);
+                              /*input_size=*/8, /*merge_outputs=*/false);
   const int output_size = 4;
   float dnn_weights[] = {
       -0.5782342,  -0.052212059, 0.73036242,  -0.81216097, -0.80088139,
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 3da3188c3a..ff8430827c 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -248,6 +248,8 @@ union BuiltinOptions {
   SquareOptions,
   ZerosLikeOptions,
   FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -327,6 +329,7 @@ table SequenceRNNOptions {
 table BidirectionalSequenceRNNOptions {
   time_major:bool;
   fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
 }
 
 enum FullyConnectedOptionsWeightsFormat: byte {
@@ -391,6 +394,15 @@ table LSTMOptions {
   kernel_type: LSTMKernelType = FULL;
 }
 
+table BidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+}
+
 table ResizeBilinearOptions {
   new_height: int (deprecated);
   new_width: int (deprecated);
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 23ac8484de..f3cb113c9c 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -79,6 +79,9 @@ struct LocalResponseNormalizationOptionsT;
 struct LSTMOptions;
 struct LSTMOptionsT;
 
+struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsT;
+
 struct ResizeBilinearOptions;
 struct ResizeBilinearOptionsT;
 
@@ -676,11 +679,13 @@ enum BuiltinOptions {
   BuiltinOptions_SquareOptions = 66,
   BuiltinOptions_ZerosLikeOptions = 67,
   BuiltinOptions_FillOptions = 68,
+  BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
+  BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_FillOptions
+  BuiltinOptions_MAX = BuiltinOptions_BidirectionalSequenceRNNOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -750,7 +755,9 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
     BuiltinOptions_FloorDivOptions,
     BuiltinOptions_SquareOptions,
     BuiltinOptions_ZerosLikeOptions,
-    BuiltinOptions_FillOptions
+    BuiltinOptions_FillOptions,
+    BuiltinOptions_BidirectionalSequenceLSTMOptions,
+    BuiltinOptions_BidirectionalSequenceRNNOptions
   };
   return values;
 }
@@ -826,6 +833,8 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "SquareOptions",
     "ZerosLikeOptions",
     "FillOptions",
+    "BidirectionalSequenceLSTMOptions",
+    "BidirectionalSequenceRNNOptions",
     nullptr
   };
   return names;
@@ -1112,6 +1121,14 @@ template<> struct BuiltinOptionsTraits<FillOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
 };
 
+template<> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
+};
+
+template<> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1687,6 +1704,22 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_FillOptions ?
       reinterpret_cast<const FillOptionsT *>(value) : nullptr;
   }
+  BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  const BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() const {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<const BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
+      reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
+  }
+  const BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() const {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
+      reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -2834,9 +2867,11 @@ struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable {
   typedef BidirectionalSequenceRNNOptions TableType;
   bool time_major;
   ActivationFunctionType fused_activation_function;
+  bool merge_outputs;
   BidirectionalSequenceRNNOptionsT()
       : time_major(false),
-        fused_activation_function(ActivationFunctionType_NONE) {
+        fused_activation_function(ActivationFunctionType_NONE),
+        merge_outputs(false) {
   }
 };
 
@@ -2844,7 +2879,8 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
   typedef BidirectionalSequenceRNNOptionsT NativeTableType;
   enum {
     VT_TIME_MAJOR = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_MERGE_OUTPUTS = 8
   };
   bool time_major() const {
     return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
@@ -2852,10 +2888,14 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
   ActivationFunctionType fused_activation_function() const {
     return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  bool merge_outputs() const {
+    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
            verifier.EndTable();
   }
   BidirectionalSequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -2872,6 +2912,9 @@ struct BidirectionalSequenceRNNOptionsBuilder {
   void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
     fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
   }
+  void add_merge_outputs(bool merge_outputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
+  }
   explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -2887,8 +2930,10 @@ struct BidirectionalSequenceRNNOptionsBuilder {
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
     bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    bool merge_outputs = false) {
   BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
+  builder_.add_merge_outputs(merge_outputs);
   builder_.add_fused_activation_function(fused_activation_function);
   builder_.add_time_major(time_major);
   return builder_.Finish();
@@ -3424,6 +3469,96 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
 
 flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
+  typedef BidirectionalSequenceLSTMOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  float cell_clip;
+  float proj_clip;
+  bool merge_outputs;
+  BidirectionalSequenceLSTMOptionsT()
+      : fused_activation_function(ActivationFunctionType_NONE),
+        cell_clip(0.0f),
+        proj_clip(0.0f),
+        merge_outputs(false) {
+  }
+};
+
+struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BidirectionalSequenceLSTMOptionsT NativeTableType;
+  enum {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_MERGE_OUTPUTS = 10
+  };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const {
+    return GetField<float>(VT_CELL_CLIP, 0.0f);
+  }
+  float proj_clip() const {
+    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+  }
+  bool merge_outputs() const {
+    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+           verifier.EndTable();
+  }
+  BidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BidirectionalSequenceLSTMOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_merge_outputs(bool merge_outputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
+  }
+  explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  BidirectionalSequenceLSTMOptionsBuilder &operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
+  flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    float cell_clip = 0.0f,
+    float proj_clip = 0.0f,
+    bool merge_outputs = false) {
+  BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_merge_outputs(merge_outputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct ResizeBilinearOptionsT : public flatbuffers::NativeTable {
   typedef ResizeBilinearOptions TableType;
   bool align_corners;
@@ -6347,6 +6482,12 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const FillOptions *builtin_options_as_FillOptions() const {
     return builtin_options_type() == BuiltinOptions_FillOptions ? static_cast<const FillOptions *>(builtin_options()) : nullptr;
   }
+  const BidirectionalSequenceLSTMOptions *builtin_options_as_BidirectionalSequenceLSTMOptions() const {
+    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
+  }
+  const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const {
+    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6650,6 +6791,14 @@ template<> inline const FillOptions *Operator::builtin_options_as<FillOptions>()
   return builtin_options_as_FillOptions();
 }
 
+template<> inline const BidirectionalSequenceLSTMOptions *Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const {
+  return builtin_options_as_BidirectionalSequenceLSTMOptions();
+}
+
+template<> inline const BidirectionalSequenceRNNOptions *Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const {
+  return builtin_options_as_BidirectionalSequenceRNNOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -7407,6 +7556,7 @@ inline void BidirectionalSequenceRNNOptions::UnPackTo(BidirectionalSequenceRNNOp
   (void)_resolver;
   { auto _e = time_major(); _o->time_major = _e; };
   { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = merge_outputs(); _o->merge_outputs = _e; };
 }
 
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> BidirectionalSequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -7419,10 +7569,12 @@ inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalS
   struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
   auto _time_major = _o->time_major;
   auto _fused_activation_function = _o->fused_activation_function;
+  auto _merge_outputs = _o->merge_outputs;
   return tflite::CreateBidirectionalSequenceRNNOptions(
       _fbb,
       _time_major,
-      _fused_activation_function);
+      _fused_activation_function,
+      _merge_outputs);
 }
 
 inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
@@ -7657,6 +7809,41 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBuffe
       _kernel_type);
 }
 
+inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new BidirectionalSequenceLSTMOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void BidirectionalSequenceLSTMOptions::UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = cell_clip(); _o->cell_clip = _e; };
+  { auto _e = proj_clip(); _o->proj_clip = _e; };
+  { auto _e = merge_outputs(); _o->merge_outputs = _e; };
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> BidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _merge_outputs = _o->merge_outputs;
+  return tflite::CreateBidirectionalSequenceLSTMOptions(
+      _fbb,
+      _fused_activation_function,
+      _cell_clip,
+      _proj_clip,
+      _merge_outputs);
+}
+
 inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new ResizeBilinearOptionsT();
   UnPackTo(_o, _resolver);
@@ -9425,6 +9612,14 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const FillOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -9715,6 +9910,14 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const FillOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -9993,6 +10196,14 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const FillOptionsT *>(value);
       return CreateFillOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateBidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value);
+      return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10271,6 +10482,14 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new FillOptionsT(*reinterpret_cast<FillOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      value = new BidirectionalSequenceLSTMOptionsT(*reinterpret_cast<BidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      value = new BidirectionalSequenceRNNOptionsT(*reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -10618,6 +10837,16 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<BidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From 261b6958fb95db18cd28c1aba140a627deb790a1 Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Wed, 3 Oct 2018 13:25:23 -0700
Subject: [PATCH 1075/1357] Enable collective graph key test for GPU builds.

In the process, properly place nodes on devices in the collective graph key
test.

PiperOrigin-RevId: 215616146
---
 .../common_runtime/direct_session_test.cc     | 58 +++++++++----------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index e3e431f800..a6440c55ad 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -2262,8 +2262,8 @@ class DirectSessionCollectiveTest : public ::testing::Test {
     TF_RETURN_IF_ERROR(session->Create(g));
     std::vector<Tensor> outputs;
     TF_RETURN_IF_ERROR(
-        session->Run({{"input1:0", t1}, {"input2:0", t2}}, {},
-                     {"collective_call1:0", "collective_call2:0"}, &outputs));
+        session->Run({{"input0:0", t1}, {"input1:0", t2}}, {},
+                     {"collective_call0:0", "collective_call1:0"}, &outputs));
     DirectSession* direct_session = static_cast<DirectSession*>(session.get());
     {
       mutex_lock l(direct_session->collective_graph_key_lock_);
@@ -2301,6 +2301,26 @@ class DirectSessionCollectiveTest : public ::testing::Test {
         }});
   }
 
+  NodeDef Input(int id) {
+    AttrValue dtype_attr;
+    SetAttrValue(DT_FLOAT, &dtype_attr);
+    NodeDef input;
+    input.set_name(strings::StrCat("input", id));
+    input.set_op("Placeholder");
+    input.mutable_attr()->insert({"dtype", dtype_attr});
+    return input;
+  }
+
+  NodeDef CollectiveCall(const string& op, const string& input, int cpu_id) {
+    NodeDef collective_call;
+    collective_call.set_name(strings::StrCat("collective_call", cpu_id));
+    collective_call.set_op(op);
+    collective_call.add_input(input);
+    collective_call.set_device(
+        strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", cpu_id));
+    return collective_call;
+  }
+
   // Creates a GraphDef that adds two CollectiveFunctions, one each on CPU0 and
   // CPU1, with instance_key 1, and appropriate placeholder inputs.  If
   // `add_unused_function` is true, adds another CollectiveFunction with
@@ -2317,42 +2337,17 @@ class DirectSessionCollectiveTest : public ::testing::Test {
       *lib->add_function() = unused_function;
     }
 
-    // Inputs.
-    AttrValue dtype_attr;
-    SetAttrValue(DT_FLOAT, &dtype_attr);
-    NodeDef input1;
-    input1.set_name("input1");
-    input1.set_op("Placeholder");
-    input1.mutable_attr()->insert({"dtype", dtype_attr});
-    NodeDef input2;
-    input2.set_name("input2");
-    input2.set_op("Placeholder");
-    input2.mutable_attr()->insert({"dtype", dtype_attr});
-
+    *g.add_node() = Input(0);
+    *g.add_node() = Input(1);
     // CollectiveReduce on CPU0 with instance_key 1.
-    NodeDef collective_call1;
-    collective_call1.set_name("collective_call1");
-    collective_call1.set_op("CollectiveFunction1");
-    collective_call1.add_input("input1");
-    collective_call1.set_device("/job:localhost/replica:0/task:0/device:CPU:0");
+    *g.add_node() = CollectiveCall("CollectiveFunction1", "input0", 0);
     // CollectiveReduce on CPU1 with instance_key 1.
-    NodeDef collective_call2;
-    collective_call2.set_name("collective_call2");
-    collective_call2.set_op("CollectiveFunction1");
-    collective_call2.add_input("input2");
-    collective_call1.set_device("/job:localhost/replica:0/task:0/device:CPU:1");
-
-    *g.add_node() = input1;
-    *g.add_node() = input2;
-    *g.add_node() = collective_call1;
-    *g.add_node() = collective_call2;
+    *g.add_node() = CollectiveCall("CollectiveFunction1", "input1", 1);
 
     return g;
   }
 };
 
-#ifndef GOOGLE_CUDA
-// TODO(ayushd): enable this test for GPU builds.
 TEST_F(DirectSessionCollectiveTest,
        TestCollectiveGraphKeyUsesOnlyCalledFunctions) {
   int64 key1;
@@ -2361,6 +2356,5 @@ TEST_F(DirectSessionCollectiveTest,
   TF_ASSERT_OK(RunGraphWithCollectiveFunctions(true, &key2));
   ASSERT_EQ(key1, key2);
 }
-#endif
 
 }  // namespace tensorflow
-- 
GitLab


From d66aac16855ddb70c8d3d5b4c9d4da24a34dffec Mon Sep 17 00:00:00 2001
From: Mustafa Ispir <ispir@google.com>
Date: Wed, 3 Oct 2018 13:33:12 -0700
Subject: [PATCH 1076/1357] Updates the doc of SyncReplicasOptimizer. It notes
 that some worker can consume multiple mini-batches while some may not even
 one.

PiperOrigin-RevId: 215617588
---
 tensorflow/python/training/sync_replicas_optimizer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py
index 7afaa92699..6a3756fba9 100644
--- a/tensorflow/python/training/sync_replicas_optimizer.py
+++ b/tensorflow/python/training/sync_replicas_optimizer.py
@@ -78,7 +78,11 @@ class SyncReplicasOptimizer(optimizer.Optimizer):
   4. Only after all variables have been updated, increment the global step.
   5. Only after step 4, pushes `global_step` in the `token_queue`, once for
      each worker replica. The workers can now fetch the global step, use it to
-     update its local_step variable and start the next batch.
+     update its local_step variable and start the next batch. Please note that
+     some workers can consume multiple minibatches, while some may not consume
+     even one. This is because each worker fetches minibatches as long as
+     a token exists. If one worker is stuck for some reason and does not
+     consume a token, another worker can use it.
 
   For the replicas:
 
-- 
GitLab


From 43073e9d4dc957367d8e2b73c37733ff1dc376c1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:34:21 -0700
Subject: [PATCH 1077/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 215617800
---
 tensorflow/core/ops/ops.pbtxt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 229022b64c..0e58a9475d 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -28603,6 +28603,14 @@ op {
     name: "stats_aggregator"
     type: DT_RESOURCE
   }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "counter_prefix"
+    type: DT_STRING
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
-- 
GitLab


From ce9a5d143f89a37ab029a29c62433883323987e8 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Wed, 3 Oct 2018 13:39:44 -0700
Subject: [PATCH 1078/1357] Tests for metrics correctness with TPU strategy

PiperOrigin-RevId: 215618809
---
 tensorflow/contrib/distribute/python/BUILD    |  17 ++-
 .../contrib/distribute/python/combinations.py |   4 +-
 .../distribute/python/metrics_v1_test.py      | 121 ++++++++++--------
 3 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index defa82f98a..8267612236 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -737,18 +737,27 @@ cuda_py_test(
     ],
 )
 
-cuda_py_test(
-    name = "metrics_v1_test",
+py_library(
+    name = "metrics_v1_test_lib",
+    testonly = 1,
     srcs = ["metrics_v1_test.py"],
-    additional_deps = [
+    deps = [
         ":combinations",
-        "@absl_py//absl/testing:parameterized",
         "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:metrics",
         "//tensorflow/python:variables",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/eager:test",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+cuda_py_test(
+    name = "metrics_v1_test",
+    srcs = ["metrics_v1_test.py"],
+    additional_deps = [
+        ":metrics_v1_test_lib",
     ],
     tags = [
         "multi_and_single_gpu",
diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py
index 82ca041cc2..cff4b0a463 100644
--- a/tensorflow/contrib/distribute/python/combinations.py
+++ b/tensorflow/contrib/distribute/python/combinations.py
@@ -329,10 +329,10 @@ one_device_strategy = NamedDistribution(
     required_gpus=None)
 tpu_strategy = NamedDistribution(
     "TPU", lambda: tpu_lib.TPUStrategy(
-        TPUClusterResolver(""), steps_per_run=5),
+        TPUClusterResolver(""), steps_per_run=2),
     required_tpu=True)
 tpu_strategy_one_step = NamedDistribution(
-    "TPU", lambda: tpu_lib.TPUStrategy(
+    "TPUOneStep", lambda: tpu_lib.TPUStrategy(
         TPUClusterResolver(""), steps_per_run=1),
     required_tpu=True)
 # Note that we disable prefetching for testing since prefetching makes
diff --git a/tensorflow/contrib/distribute/python/metrics_v1_test.py b/tensorflow/contrib/distribute/python/metrics_v1_test.py
index 8163494c8e..ae4189eb1c 100644
--- a/tensorflow/contrib/distribute/python/metrics_v1_test.py
+++ b/tensorflow/contrib/distribute/python/metrics_v1_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.contrib.distribute.python import combinations
+from tensorflow.contrib.distribute.python import tpu_strategy
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import test
 from tensorflow.python.framework import ops
@@ -35,7 +36,8 @@ def _labeled_dataset_fn():
   #  8: 3, 2 -> False;  9: 4, 0 -> False; 10: 0, 1 -> False; 11: 1, 2 -> False
   # 12: 2, 0 -> False; 13: 3, 1 -> False; 14: 4, 2 -> False; 15: 0, 0 -> True
   return dataset_ops.Dataset.range(1000).map(
-      lambda x: {"labels": x % 5, "predictions": x % 3}).batch(4)
+      lambda x: {"labels": x % 5, "predictions": x % 3}).batch(
+          4, drop_remainder=True)
 
 
 def _boolean_dataset_fn():
@@ -47,7 +49,8 @@ def _boolean_dataset_fn():
   #   F, T -> FP;  T, F -> FN;   F, F -> TN
   return dataset_ops.Dataset.from_tensor_slices({
       "labels": [True, False, True, False],
-      "predictions": [True, True, False, False]}).repeat().batch(3)
+      "predictions": [True, True, False, False]}).repeat().batch(
+          3, drop_remainder=True)
 
 
 def _threshold_dataset_fn():
@@ -59,7 +62,8 @@ def _threshold_dataset_fn():
   #  False, .75 -> FP;   True, .25 -> FN;  False, 0.0 -> TN
   return dataset_ops.Dataset.from_tensor_slices({
       "labels": [True, False, True, False],
-      "predictions": [1.0, 0.75, 0.25, 0.]}).repeat().batch(3)
+      "predictions": [1.0, 0.75, 0.25, 0.]}).repeat().batch(
+          3, drop_remainder=True)
 
 
 def _regression_dataset_fn():
@@ -79,6 +83,12 @@ def all_combinations():
       mode=["graph"])
 
 
+def tpu_combinations():
+  return combinations.combine(distribution=[combinations.tpu_strategy_one_step,
+                                            combinations.tpu_strategy],
+                              mode=["graph"])
+
+
 # TODO(josh11b): Test metrics.recall_at_top_k, metrics.average_precision_at_k,
 # metrics.precision_at_k
 class MetricsV1Test(test.TestCase, parameterized.TestCase):
@@ -87,42 +97,50 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     with ops.Graph().as_default(), distribution.scope():
       iterator = distribution.distribute_dataset(
           dataset_fn).make_one_shot_iterator()
-      value, update = distribution.call_for_each_tower(
-          metric_fn, iterator.get_next())
-      update = distribution.group(update)
+      if isinstance(distribution, tpu_strategy.TPUStrategy):
+        def step_fn(ctx, inputs):
+          value, update = distribution.call_for_each_tower(
+              metric_fn, inputs)
+          ctx.set_non_tensor_output(name="value", output=value)
+          return distribution.group(update)
+
+        ctx = distribution.run_steps_on_dataset(
+            step_fn, iterator, iterations=distribution.steps_per_run)
+        update = ctx.run_op
+        value = ctx.non_tensor_outputs["value"]
+        # In each run, we run multiple steps, and each steps consumes as many
+        # batches as number of towers.
+        batches_per_update = (
+            distribution.num_towers * distribution.steps_per_run)
+      else:
+        value, update = distribution.call_for_each_tower(
+            metric_fn, iterator.get_next())
+        update = distribution.group(update)
+        # TODO(josh11b): Once we switch to using a global batch size for input,
+        # replace "distribution.num_towers" with "1".
+        batches_per_update = distribution.num_towers
+
+      self.evaluate(distribution.initialize())
       self.evaluate(variables.local_variables_initializer())
-      # TODO(josh11b): Once we switch to using a global batch size for input,
-      # replace "distribution.num_towers" with "1".
-      batches_per_update = distribution.num_towers
-
-      # Update variables using the first `num_towers` batches.
-      self.evaluate(update)
-      self.assertAllClose(expected_fn(batches_per_update), self.evaluate(value),
-                          0.001, msg="After first update")
-
-      # Update variables using the second `num_towers` batches.
-      self.evaluate(update)
-      self.assertAllClose(expected_fn(2 * batches_per_update),
-                          self.evaluate(value),
-                          0.001,
-                          msg="After second update")
-
-      if batches_per_update == 1:  # Consume 4 input batches
-        self.evaluate(update)
-        self.assertAllClose(expected_fn(3 * batches_per_update),
-                            self.evaluate(value),
-                            0.001,
-                            msg="After third update")
+
+      batches_consumed = 0
+      for i in range(4):
         self.evaluate(update)
-        self.assertAllClose(expected_fn(4 * batches_per_update),
+        batches_consumed += batches_per_update
+        self.assertAllClose(expected_fn(batches_consumed),
                             self.evaluate(value),
                             0.001,
-                            msg="After fourth update")
+                            msg="After update #" + str(i+1))
+        if batches_consumed >= 4:  # Consume 4 input batches in total.
+          break
 
-  @combinations.generate(all_combinations())
+      self.evaluate(distribution.finalize())
+
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testMean(self, distribution):
     def _dataset_fn():
-      return dataset_ops.Dataset.range(1000).map(math_ops.to_float).batch(4)
+      return dataset_ops.Dataset.range(1000).map(math_ops.to_float).batch(
+          4, drop_remainder=True)
 
     def _expected_fn(num_batches):
       # Mean(0..3) = 1.5, Mean(0..7) = 3.5, Mean(0..11) = 5.5, etc.
@@ -130,7 +148,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
 
     self._test_metric(distribution, _dataset_fn, metrics.mean, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testAccuracy(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -143,6 +161,8 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _labeled_dataset_fn, _metric_fn, _expected_fn)
 
+  # TODO(priyag, jhseu): Enable TPU for this test once scatter_add is added
+  # for TPUMirroredVariable.
   @combinations.generate(all_combinations())
   def testMeanPerClassAccuracy(self, distribution):
     def _metric_fn(x):
@@ -161,6 +181,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _labeled_dataset_fn, _metric_fn, _expected_fn)
 
+  # NOTE(priyag): This metric doesn't work on TPUs yet.
   @combinations.generate(all_combinations())
   def testMeanIOU(self, distribution):
     def _metric_fn(x):
@@ -179,7 +200,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _labeled_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testMeanTensor(self, distribution):
     def _dataset_fn():
       dataset = dataset_ops.Dataset.range(1000).map(math_ops.to_float)
@@ -198,7 +219,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _dataset_fn, metrics.mean_tensor, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testAUCROC(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -212,7 +233,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testAUCPR(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -226,7 +247,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalseNegatives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -239,7 +260,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalseNegativesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -252,7 +273,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTrueNegatives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -265,7 +286,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTrueNegativesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -278,7 +299,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalsePositives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -291,7 +312,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalsePositivesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -304,7 +325,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTruePositives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -317,7 +338,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTruePositivesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -330,7 +351,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testPrecision(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -343,7 +364,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testPrecisionAtThreshold(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -356,7 +377,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testRecall(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -369,7 +390,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testRecallAtThreshold(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -382,7 +403,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testMeanSquaredError(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -395,7 +416,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _regression_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testRootMeanSquaredError(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
-- 
GitLab


From c26b5e9685b05fafc509d8ebc88c8304be5974a4 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Wed, 3 Oct 2018 13:45:59 -0700
Subject: [PATCH 1079/1357] Some tiny speed improvements for defun.

Before:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 48.4476327896
  extras {
    key: "examples_per_sec"
    value {
      double_value: 20640.8433688
    }
  }
}

After:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 45.2344338099
  extras {
    key: "examples_per_sec"
    value {
      double_value: 22107.0524327
    }
  }
}
PiperOrigin-RevId: 215619902
---
 tensorflow/python/eager/function.py | 36 +++++++++++++++--------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f261d92d64..dd9f5e233c 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1152,23 +1152,22 @@ class PolymorphicFunction(object):
       del args, kwargs
       cache_key = self._flat_input_signature
 
+    ctx = context.context()
     with ops.init_scope():
-      init_graph = ops.get_default_graph()
-
       # The graph, or whether we're executing eagerly, should be a part of the
       # cache key so we don't improperly capture tensors such as variables.
-      executing_eagerly = context.executing_eagerly()
-      execution_context = executing_eagerly or init_graph
-
-    default_graph = ops.get_default_graph()
-    # Putting the device in the cache key ensures that call-site device
-    # annotations are respected.
-    device_functions = _get_device_functions(context.context(), default_graph)
+      executing_eagerly = ctx.executing_eagerly()
+      execution_context = executing_eagerly or ops.get_default_graph()
 
-    # `ops.colocate_with` directives translate into `ops.device` directives when
-    # eager execution is enabled.
-    colocation_stack = (() if executing_eagerly else
-                        tuple(default_graph._colocation_stack.peek_objs()))  # pylint: disable=protected-access
+    if executing_eagerly:
+      device_functions = (pydev.merge_device(ctx.device_name),)
+      colocation_stack = ()
+    else:
+      default_graph = ops.get_default_graph()
+      # Putting the device in the cache key ensures that call-site device
+      # annotations are respected.
+      device_functions = tuple(default_graph._device_functions_outer_to_inner)  # pylint: disable=protected-access
+      colocation_stack = tuple(default_graph._colocation_stack.peek_objs())  # pylint: disable=protected-access
 
     return (cache_key, execution_context, device_functions, colocation_stack)
 
@@ -1195,9 +1194,6 @@ class PolymorphicFunction(object):
     """
     args = self._args_to_prepend + args
     kwargs = dict(kwargs, **self._kwargs_to_include)
-    # Maps from index of arg to its corresponding value, according to `args`
-    # and `kwargs`; seeded with the default values for the named args that
-    # aren't in `args`.
     if not kwargs:
       if self._default_values:
         inputs = args + self._default_values[len(args) -
@@ -1205,6 +1201,9 @@ class PolymorphicFunction(object):
       else:
         inputs = args
     else:
+      # Maps from index of arg to its corresponding value, according to `args`
+      # and `kwargs`; seeded with the default values for the named args that
+      # aren't in `args`.
       arg_indices_to_values = {
           index: default for index, default in six.iteritems(
               self._arg_indices_to_default_values) if index >= len(args)
@@ -1227,9 +1226,12 @@ class PolymorphicFunction(object):
     flat_inputs = nest.flatten(inputs)
 
     # Check for NumPy arrays in arguments and convert them to Tensors.
+    # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps
+    # finding a way to store them directly in the cache key (currently not
+    # possible since ndarrays are not hashable).
     need_packing = False
     for index, value in enumerate(flat_inputs):
-      if isinstance(value, np.ndarray):
+      if type(value) == np.ndarray:
         flat_inputs[index] = constant_op.constant(value)
         need_packing = True
     if need_packing:
-- 
GitLab


From 0b7a3df432f0e607b39ab17d1b85fb0b04e05bd5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:46:19 -0700
Subject: [PATCH 1080/1357] Fixes bug in Conv2D unit test that made it test a
 SeparableConv2D layer instead of a Conv2D layer.

PiperOrigin-RevId: 215619966
---
 tensorflow/python/keras/layers/convolutional_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index 2d3d38a5ce..cad5e4c8bd 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -113,7 +113,7 @@ class Conv2DTest(test.TestCase):
       test_kwargs[arg] = value
       with self.test_session(use_gpu=True):
         testing_utils.layer_test(
-            keras.layers.SeparableConv2D,
+            keras.layers.Conv2D,
             kwargs=test_kwargs,
             input_shape=(num_samples, num_row, num_col, stack_size))
 
-- 
GitLab


From ed904611009a74ae530335d3bd16b7070238cec3 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 3 Oct 2018 14:01:16 -0700
Subject: [PATCH 1081/1357] Update reference to tools/bazel.rc to .bazelrc
 after cl/215483141

PiperOrigin-RevId: 215623215
---
 configure.py             | 4 ++--
 tensorflow/workspace.bzl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure.py b/configure.py
index 2d2da11700..a88fdb3555 100644
--- a/configure.py
+++ b/configure.py
@@ -1676,8 +1676,8 @@ def main():
   # TODO(pcloudy): remove the following if check when they make sense on Windows
   if not is_windows():
     print('Preconfigured Bazel build configs. You can use any of the below by '
-          'adding "--config=<>" to your build command. See tools/bazel.rc for '
-          'more details.')
+          'adding "--config=<>" to your build command. See .bazelrc for more '
+          'details.')
     config_info_line('mkl', 'Build with MKL support.')
     config_info_line('monolithic', 'Config for mostly static monolithic build.')
     config_info_line('gdr', 'Build with GDR support.')
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index bcc89ef729..d27732a801 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -888,7 +888,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     # why we can't depend on the canonical build target.
 
     # gRPC wants a cares dependency but its contents is not actually
-    # important since we have set GRPC_ARES=0 in tools/bazel.rc
+    # important since we have set GRPC_ARES=0 in .bazelrc
     native.bind(
         name = "cares",
         actual = "@grpc//third_party/nanopb:nanopb",
-- 
GitLab


From 94267ccc14516ad9df67897bea8ede20cbad24ca Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 3 Oct 2018 14:09:05 -0700
Subject: [PATCH 1082/1357] Move out-params to end of argument list and add an
 out_ prefix; NFC

PiperOrigin-RevId: 215624875
---
 tensorflow/compiler/jit/kernels/xla_ops.cc    |  2 +-
 .../compiler/jit/xla_compilation_cache.cc     | 33 ++++++++++---------
 .../compiler/jit/xla_compilation_cache.h      | 29 ++++++++--------
 .../compiler/jit/xla_compile_on_demand_op.cc  |  2 +-
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
index cfd27a6510..accc86a86d 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.cc
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -277,7 +277,7 @@ static Status CompileToLocalExecutable(
   compile_options.always_return_tuple = false;
 
   return cache->Compile(options, function, constant_args, *variables, ctx,
-                        kernel, executable, compile_options);
+                        compile_options, kernel, executable);
 }
 
 void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc
index 3aa9e9c7ed..0471995015 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.cc
+++ b/tensorflow/compiler/jit/xla_compilation_cache.cc
@@ -228,37 +228,38 @@ Status XlaCompilationCache::Compile(
     const XlaCompiler::Options& options, const NameAttrList& function,
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-    const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable,
-    const XlaCompiler::CompileOptions& compile_options) {
+    const XlaCompiler::CompileOptions& compile_options,
+    const XlaCompiler::CompilationResult** out_compilation_result,
+    xla::LocalExecutable** out_executable) {
   return CompileImpl(options, function, constant_args, variable_args, ctx,
-                     compilation_result, executable, compile_options, false);
+                     compile_options, /*compile_single_op=*/false,
+                     out_compilation_result, out_executable);
 }
 
 Status XlaCompilationCache::CompileSingleOp(
     const XlaCompiler::Options& options,
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-    const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable,
-    const XlaCompiler::CompileOptions& compile_options) {
+    const XlaCompiler::CompileOptions& compile_options,
+    const XlaCompiler::CompilationResult** out_compilation_result,
+    xla::LocalExecutable** out_executable) {
   const NodeDef& def = ctx->op_kernel().def();
   NameAttrList name;
   name.set_name(def.op());
   *name.mutable_attr() = def.attr();
-  return CompileImpl(options, name, constant_args, variable_args, ctx,
-                     compilation_result, executable, compile_options, true);
+  return CompileImpl(
+      options, name, constant_args, variable_args, ctx, compile_options,
+      /*compile_single_op=*/true, out_compilation_result, out_executable);
 }
 
 Status XlaCompilationCache::CompileImpl(
     const XlaCompiler::Options& options, const NameAttrList& function,
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-    const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable,
-    const XlaCompiler::CompileOptions& compile_options,
-    bool compile_single_op) {
-  CHECK_NE(executable, nullptr);
+    const XlaCompiler::CompileOptions& compile_options, bool compile_single_op,
+    const XlaCompiler::CompilationResult** out_compilation_result,
+    xla::LocalExecutable** out_executable) {
+  DCHECK_NE(out_executable, nullptr);
   VLOG(2) << "XlaCompilationCache::Compile " << DebugString();
 
   if (VLOG_IS_ON(2)) {
@@ -357,8 +358,8 @@ Status XlaCompilationCache::CompileImpl(
     }
   }
   TF_RETURN_IF_ERROR(entry->compilation_status);
-  *compilation_result = &entry->compilation_result;
-  *executable = entry->executable.get();
+  *out_compilation_result = &entry->compilation_result;
+  *out_executable = entry->executable.get();
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h
index 17c0321c1e..75c7758f73 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.h
+++ b/tensorflow/compiler/jit/xla_compilation_cache.h
@@ -68,9 +68,9 @@ class XlaCompilationCache : public ResourceBase {
                  const std::map<int, Tensor>& constant_args,
                  const std::map<int, OptionalTensor>& variable_args,
                  OpKernelContext* ctx,
-                 const XlaCompiler::CompilationResult** compilation_result,
-                 xla::LocalExecutable** executable,
-                 const XlaCompiler::CompileOptions& compile_options);
+                 const XlaCompiler::CompileOptions& compile_options,
+                 const XlaCompiler::CompilationResult** out_compilation_result,
+                 xla::LocalExecutable** out_executable);
 
   // As above, but calls XlaCompiler::CompileSingleOp instead of
   // XlaCompiler::CompileFunction.
@@ -78,9 +78,9 @@ class XlaCompilationCache : public ResourceBase {
       const XlaCompiler::Options& options,
       const std::map<int, Tensor>& constant_args,
       const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-      const XlaCompiler::CompilationResult** compilation_result,
-      xla::LocalExecutable** executable,
-      const XlaCompiler::CompileOptions& compile_options);
+      const XlaCompiler::CompileOptions& compile_options,
+      const XlaCompiler::CompilationResult** out_compilation_result,
+      xla::LocalExecutable** out_executable);
 
   xla::LocalClient* client() const { return client_; }
   const DeviceType& device_type() const { return device_type_; }
@@ -89,15 +89,14 @@ class XlaCompilationCache : public ResourceBase {
 
  private:
   // Common implementation of Compile and CompileSingleOp.
-  Status CompileImpl(const XlaCompiler::Options& options,
-                     const NameAttrList& function,
-                     const std::map<int, Tensor>& constant_args,
-                     const std::map<int, OptionalTensor>& variable_args,
-                     OpKernelContext* ctx,
-                     const XlaCompiler::CompilationResult** compilation_result,
-                     xla::LocalExecutable** executable,
-                     const XlaCompiler::CompileOptions& compile_options,
-                     bool compile_single_op);
+  Status CompileImpl(
+      const XlaCompiler::Options& options, const NameAttrList& function,
+      const std::map<int, Tensor>& constant_args,
+      const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
+      const XlaCompiler::CompileOptions& compile_options,
+      bool compile_single_op,
+      const XlaCompiler::CompilationResult** out_compilation_result,
+      xla::LocalExecutable** out_executable);
 
   // Takes `result` which has been compiled from a Tensorflow subgraph to a
   // XLA computation already, and generates an XLA LocalExecutable `executable`.
diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
index b98c0cb028..79976c85df 100644
--- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
+++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
@@ -180,7 +180,7 @@ Status XlaCompileOnDemandOp::Compile(
 
   std::map<int, OptionalTensor> variable_args = GetVariables(ctx);
   return cache->CompileSingleOp(options, constant_arguments, variable_args, ctx,
-                                result, executable, compile_options);
+                                compile_options, result, executable);
 }
 
 void XlaCompileOnDemandOp::Compute(OpKernelContext* ctx) {
-- 
GitLab


From f5f8dff270b9f2cdf36bba9d671c324a4f7c6fac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 14:28:25 -0700
Subject: [PATCH 1083/1357] Add NNAPI padding enums to NeuralNetworksShim.h

PiperOrigin-RevId: 215628561
---
 tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index 687944023b..eccf4aefb6 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -179,6 +179,14 @@ enum {
   ANEURALNETWORKS_BAD_STATE = 6,
 };
 
+/**
+ * Implicit padding algorithms.
+ */
+enum {
+  ANEURALNETWORKS_PADDING_SAME = 1,
+  ANEURALNETWORKS_PADDING_VALID = 2,
+};
+
 /**
  * ANeuralNetworksMemory is an opaque type that represents memory.
  *
-- 
GitLab


From 2e11deba60cb00027de4373af17703676fa74bd7 Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Wed, 3 Oct 2018 14:37:57 -0700
Subject: [PATCH 1084/1357] [XLA] Disable a test for layout changing
 elementwise operations.

Rename the test to make it obvious that it is for testing the codegen
correctness in handling layout changing elementwise operations.

Keep the test only for the CPU backend.

PiperOrigin-RevId: 215630611
---
 tensorflow/compiler/xla/tests/fusion_test.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc
index 9c94acb437..fd79a9d041 100644
--- a/tensorflow/compiler/xla/tests/fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/fusion_test.cc
@@ -764,8 +764,9 @@ XLA_TEST_F(FusionTest, Clamp2D) {
   TestElementwise2D<float, 3>(HloOpcode::kClamp);
 }
 
-// TODO(b/73903144): Enable on interpreter once interpreter supports bitcast.
-XLA_TEST_F(FusionTest, DISABLED_ON_INTERPRETER(FusionWithLayout)) {
+// TODO(b/117156505): Remove this test when the bug is fixed.
+XLA_TEST_F(FusionTest, DISABLED_ON_GPU(DISABLED_ON_INTERPRETER(
+                           LayoutChangingElementWiseOp))) {
   const string hlo_text = R"(
 HloModule Cluster
 
-- 
GitLab


From c1b3b0b9e041d82e80c2cdcc623a387753daf0b4 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Wed, 3 Oct 2018 14:42:56 -0700
Subject: [PATCH 1085/1357] Internal change.

PiperOrigin-RevId: 215631612
---
 tensorflow/contrib/lite/kernels/BUILD | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index daaf6714cc..b349a2863c 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -337,7 +337,10 @@ tf_cc_test(
     name = "activations_test",
     size = "small",
     srcs = ["activations_test.cc"],
-    tags = ["tflite_not_portable_ios"],
+    tags = [
+        "nomac",
+        "tflite_not_portable_ios",
+    ],
     deps = [
         ":builtin_ops",
         "//tensorflow/contrib/lite:framework",
-- 
GitLab


From 312e37cee391b0d207293d59d8882db3c8030f9d Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Wed, 3 Oct 2018 14:51:08 -0700
Subject: [PATCH 1086/1357] Add a require_static_shapes argument to
 DistributionStrategy class. This allows us to identify if we need to set the
 drop_remainder option when creating Dataset objects.

PiperOrigin-RevId: 215633097
---
 tensorflow/contrib/distribute/python/tpu_strategy.py |  4 +++-
 tensorflow/python/keras/engine/training.py           | 11 +++++------
 tensorflow/python/training/distribute.py             |  7 +++++++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index c3c7df3cd8..1d9e299b38 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -132,7 +132,7 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     """
     # TODO(sourabhbajaj): OneDeviceStrategy should be initialized with the
     # master node fetched from the cluster resolver.
-    super(TPUStrategy, self).__init__('/device:CPU:0')
+    super(TPUStrategy, self).__init__("/device:CPU:0")
 
     self._tpu_cluster_resolver = tpu_cluster_resolver
     self._tpu_metadata = get_tpu_system_metadata(self._tpu_cluster_resolver)
@@ -152,6 +152,8 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     # at a time is comparable to multiple steps.
     self.steps_per_run = steps_per_run
 
+    self._require_static_shapes = True
+
   def _get_enqueue_op_per_host(self, host_id, iterator, input_shapes,
                                iterations):
     """Create an enqueue op for a single host identified using host_id.
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 85233de9b1..d81bd83f7f 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -814,6 +814,9 @@ class Model(Network):
       x_shape = first_x_value.shape
       if batch_size is None:
         batch_size = x_shape[0] // steps
+      # We need to use the drop_remainder argument to allow for a static
+      # input shape which is required for TPUs.
+      drop_remainder = self._distribution_strategy.require_static_shapes
       if y is not None:
         var_x = distributed_training_utils.get_var_for_numpy(
             self._distribution_strategy, x)
@@ -824,9 +827,7 @@ class Model(Network):
         # TODO(anjalisridhar): What should the buffer size be?
         x = x.shuffle(10000)
         x = x.repeat()
-        # We need to use the drop_remainder argument to allow for a static
-        # input shape which is required for TPUs.
-        x = x.batch(batch_size, drop_remainder=True)
+        x = x.batch(batch_size, drop_remainder=drop_remainder)
         y = None
       else:
         # This case is for the predict call where the dataset only contains
@@ -838,9 +839,7 @@ class Model(Network):
             self._distribution_strategy, x)
         x = dataset_ops.Dataset.from_tensor_slices(var_x)
         x = x.repeat()
-        # We need to use the drop_remainder argument to allow for a static
-        # input shape which is required for TPUs.
-        x = x.batch(batch_size, drop_remainder=True)
+        x = x.batch(batch_size, drop_remainder=drop_remainder)
 
     # TODO(anjalisridhar): Can we use the iterator and getnext op cache?
     # We require users to pass Datasets since we distribute the dataset across
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index a92a1bdee7..b3f3c29b2f 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -436,6 +436,9 @@ class DistributionStrategy(object):
 
   def __init__(self):
     self._default_device = None
+    # This property is used to determine if we should set drop_remainder=True
+    # when creating Datasets from numpy array inputs.
+    self._require_static_shapes = False
 
   def scope(self):
     """Returns a context manager selecting this DistributionStrategy as current.
@@ -898,6 +901,10 @@ class DistributionStrategy(object):
     """
     raise NotImplementedError("must be implemented in descendants")
 
+  @property
+  def require_static_shapes(self):
+    return self._require_static_shapes
+
   @property
   def num_towers(self):
     """Returns number of towers, for purposes of averaging across towers."""
-- 
GitLab


From 148bc62dba0a0b9d26945ce48b6dcd903613de14 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Wed, 3 Oct 2018 15:14:32 -0700
Subject: [PATCH 1087/1357] Update size of multi_device_iterator_test to medium
 to fix timeouts

PiperOrigin-RevId: 215637785
---
 tensorflow/python/data/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index bf76860aa4..c7295d6e69 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -291,7 +291,7 @@ tf_py_test(
 
 cuda_py_test(
     name = "multi_device_iterator_test",
-    size = "small",
+    size = "medium",
     srcs = ["multi_device_iterator_test.py"],
     additional_deps = [
         ":test_base",
-- 
GitLab


From efbee1ab2cac59f511cc0850d84414e711bbda3b Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 15:15:23 -0700
Subject: [PATCH 1088/1357] Fix ci_parameterized_build to pass environment
 variables to tests.

This is particularly important when using --run_under with
parallel_gpu_execute, since the envvars control the execution.

PiperOrigin-RevId: 215637931
---
 .../tools/ci_build/ci_parameterized_build.sh   | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 49a9048c03..99bdedf7b4 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -65,8 +65,6 @@
 #   TF_GPU_COUNT:
 #                      Run this many parallel tests for serial builds.
 #                      For now, only can be edited for PIP builds.
-#                      TODO(gunan): Find a way to pass this environment variable
-#                      to the script bazel runs (using --run_under).
 #   TF_BUILD_TEST_TUTORIALS:
 #                      If set to any non-empty and non-0 value, will perform
 #                      tutorials tests (Applicable only if TF_BUILD_IS_PIP is
@@ -150,6 +148,13 @@ ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
+# Environment variables to set when running bazel tests.  These are especially
+# important when using --run_under with parallel_gpu_execute.
+BAZEL_TEST_ENV=""\
+"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
+"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
+"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB} "
+
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
 
 EXTRA_PARAMS=""
@@ -410,13 +415,14 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
   if [[ ${CTYPE} == cpu* ]] || \
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
-    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} ${EXTRA_ARGS} -- "\
-"${BAZEL_TARGET}"
+    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+      "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
-    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
 "--local_test_jobs=${TF_GPU_COUNT} "\
-"--run_under=${PARALLEL_GPU_TEST_CMD} ${EXTRA_ARGS} -- ${BAZEL_TARGET}"
+"--run_under=${PARALLEL_GPU_TEST_CMD} "\
+"${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == "android" ]]; then
     # Run android specific script for android build.
     NO_PIP_MAIN_CMD="${ANDROID_CMD} ${OPT_FLAG} "
-- 
GitLab


From 0dfde8ab8addef36f90a445f0d604618a199508c Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Wed, 3 Oct 2018 15:48:53 -0700
Subject: [PATCH 1089/1357] Disable norm_op_test and svd_op_test under msan

PiperOrigin-RevId: 215643600
---
 tensorflow/python/kernel_tests/BUILD | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index c0e9a3c975..9303c70c60 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2999,7 +2999,10 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
     ],
     shard_count = 20,
-    tags = ["no_oss"],  # b/117185141
+    tags = [
+        "no_oss",  # b/117185141.
+        "nomsan",  # TODO(b/117236102): Re-enable in msan build.
+    ],
 )
 
 cuda_py_test(
@@ -3014,7 +3017,11 @@ cuda_py_test(
         "//tensorflow/python:linalg_ops",
     ],
     shard_count = 20,
-    tags = ["no_windows_gpu"],
+    # TODO(b/117236102): Re-enable in msan build.
+    tags = [
+        "no_windows_gpu",
+        "nomsan",
+    ],
 )
 
 cuda_py_test(
-- 
GitLab


From 041c347df995e6c6d9206920ae061f558e120b92 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 3 Oct 2018 15:59:41 -0700
Subject: [PATCH 1090/1357] [TF:XLA] Bump open source abseil revision to
 f21d187b80e3b7f08fb279775ea9c8b48c636030

PiperOrigin-RevId: 215645351
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index d27732a801..72f3fd0cf8 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -110,11 +110,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "7dd09690ae7ca4551de3111d4a86b75b23ec17445f273d3c42bdcdc1c7b02e4e",
-        strip_prefix = "abseil-cpp-48cd2c3f351ff188bc85684b84a91b6e6d17d896",
+        sha256 = "507903ef9353cb25cccd0a6840048fdd348fd20e98314d694f04a990c0f277e3",
+        strip_prefix = "abseil-cpp-f21d187b80e3b7f08fb279775ea9c8b48c636030",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
         ],
     )
 
-- 
GitLab


From 207bea0e35ab635e66137520963761a6e94354ea Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Wed, 3 Oct 2018 16:34:05 -0700
Subject: [PATCH 1091/1357] [XLA] Revise the way to express a CPU specific
 test.

Use #ifdef XLA_TEST_BACKEND_CPU to protect the test instead of disabling it for
all the other backends except for the CPU backend.

PiperOrigin-RevId: 215651036
---
 tensorflow/compiler/xla/tests/fusion_test.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc
index fd79a9d041..4d4b676a53 100644
--- a/tensorflow/compiler/xla/tests/fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/fusion_test.cc
@@ -764,9 +764,10 @@ XLA_TEST_F(FusionTest, Clamp2D) {
   TestElementwise2D<float, 3>(HloOpcode::kClamp);
 }
 
-// TODO(b/117156505): Remove this test when the bug is fixed.
-XLA_TEST_F(FusionTest, DISABLED_ON_GPU(DISABLED_ON_INTERPRETER(
-                           LayoutChangingElementWiseOp))) {
+// TODO(b/117156505): Remove this test when the bug is fixed and the CPU backend
+// should not generate layout changing elementwise operations.
+#ifdef XLA_TEST_BACKEND_CPU
+XLA_TEST_F(FusionTest, LayoutChangingElementWiseOp) {
   const string hlo_text = R"(
 HloModule Cluster
 
@@ -795,6 +796,7 @@ ENTRY main {
       LiteralUtil::CreateR3<float>({{{0.}, {0.76159415595}}, {{0.}, {0.}}}),
       result));
 }
+#endif
 
 class FusionClientLibraryTest : public ClientLibraryTestBase {};
 
-- 
GitLab


From 9801b8810e07859141d4417746317cc3dbebc227 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 16:36:23 -0700
Subject: [PATCH 1092/1357] Reduce batch sizes for some eager tests to prevert
 OOMs in OSS runs

PiperOrigin-RevId: 215651413
---
 .../python/examples/resnet50/resnet50_graph_test.py    | 10 +++++++---
 .../eager/python/examples/revnet/revnet_test.py        |  3 +++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
index 551c76b0df..f3bb978875 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
@@ -51,7 +51,9 @@ def random_batch(batch_size):
 class ResNet50GraphTest(tf.test.TestCase):
 
   def testApply(self):
-    batch_size = 64
+    # Use small batches for tests because the OSS version runs
+    # in constrained GPU environment with 1-2GB of memory.
+    batch_size = 8
     with tf.Graph().as_default():
       images = tf.placeholder(tf.float32, image_shape(None))
       model = resnet50.ResNet50(data_format())
@@ -63,7 +65,7 @@ class ResNet50GraphTest(tf.test.TestCase):
         sess.run(init)
         np_images, _ = random_batch(batch_size)
         out = sess.run(predictions, feed_dict={images: np_images})
-        self.assertAllEqual([64, 1000], out.shape)
+        self.assertAllEqual([batch_size, 1000], out.shape)
 
   def testTrainWithSummary(self):
     with tf.Graph().as_default():
@@ -87,7 +89,9 @@ class ResNet50GraphTest(tf.test.TestCase):
       init = tf.global_variables_initializer()
       self.assertEqual(321, len(tf.global_variables()))
 
-      batch_size = 32
+      # Use small batches for tests because the OSS version runs
+      # in constrained GPU environment with 1-2GB of memory.
+      batch_size = 2
       with tf.Session() as sess:
         sess.run(init)
         sess.run(tf.contrib.summary.summary_writer_initializer_op())
diff --git a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
index 6a921e1997..4f4cc3af6f 100644
--- a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
+++ b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
@@ -50,6 +50,9 @@ class RevNetTest(tf.test.TestCase):
     # Reconstruction could cause numerical error, use double precision for tests
     config.dtype = tf.float64
     config.fused = False  # Fused batch norm does not support tf.float64
+    # Reduce the batch size for tests because the OSS version runs
+    # in constrained GPU environment with 1-2GB of memory.
+    config.batch_size = 2
     shape = (config.batch_size,) + config.input_shape
     self.model = revnet.RevNet(config=config)
     self.x = tf.random_normal(shape=shape, dtype=tf.float64)
-- 
GitLab


From d5b362a67a57f53f610536ed6068a5b67bc37b88 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Wed, 3 Oct 2018 16:38:22 -0700
Subject: [PATCH 1093/1357] Update size of mvn_diag_test and core_rnn_cell_test
 to medium to fix timeouts

PiperOrigin-RevId: 215651746
---
 tensorflow/contrib/distributions/BUILD | 2 +-
 tensorflow/contrib/rnn/BUILD           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 3ff7da4f89..60f6b90edc 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -299,7 +299,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "mvn_diag_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/kernel_tests/mvn_diag_test.py"],
     additional_deps = [
         ":distributions_py",
diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index 4e67d80558..1385a9ddc1 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
@@ -108,7 +108,7 @@ cuda_py_tests(
 
 cuda_py_tests(
     name = "core_rnn_cell_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/kernel_tests/core_rnn_cell_test.py"],
     additional_deps = [
         ":rnn_py",
-- 
GitLab


From aeb044c9784d30a25c0d15fa31f479001be55052 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 3 Oct 2018 16:41:21 -0700
Subject: [PATCH 1094/1357] assert_nontrivial_match in
 tf.keras.Model.load_weights (TF format)

Adds a bit of sanity checking by default to load_weights (e.g. for the case when absolutely nothing matches) while still supporting restore-on-create and the addition of new Layers to checkpointed models.

PiperOrigin-RevId: 215652168
---
 tensorflow/python/keras/engine/network.py     |  1 +
 tensorflow/python/keras/engine/saving_test.py | 13 +++++
 .../python/training/checkpointable/util.py    | 56 +++++++++++++++++--
 .../training/checkpointable/util_test.py      |  5 ++
 4 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 5ef8d13487..8d34006967 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1526,6 +1526,7 @@ class Network(base_layer.Layer):
         # Restore existing variables (if any) immediately, and set up a
         # streaming restore for any variables created in the future.
         checkpointable_utils.streaming_restore(status=status, session=session)
+      status.assert_nontrivial_match()
       return status
     if h5py is None:
       raise ImportError(
diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 02d99d5d69..f5045be907 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -38,6 +38,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import training as training_module
+from tensorflow.python.training.checkpointable import util as checkpointable
 
 try:
   import h5py  # pylint:disable=g-import-not-at-top
@@ -922,6 +923,18 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase):
         SubclassedModel, SubclassedModelRestore,
         _restore_init_fn)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_incompatible_checkpoint(self):
+    save_path = checkpointable.Checkpoint().save(
+        os.path.join(self.get_temp_dir(), 'ckpt'))
+    m = keras.Model()
+    with self.assertRaisesRegexp(AssertionError, 'Nothing to load'):
+      m.load_weights(save_path)
+    m.dense = keras.layers.Dense(2)
+    m.dense(constant_op.constant([[1.]]))
+    with self.assertRaisesRegexp(
+        AssertionError, 'Nothing except the root object matched'):
+      m.load_weights(save_path)
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index eff15b24ce..edab6cc6eb 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -853,6 +853,11 @@ class _LoadStatus(object):
     """Raises an exception unless existing Python objects have been matched."""
     pass
 
+  @abc.abstractmethod
+  def assert_nontrivial_match(self):
+    """Raises an exception if only the root object matched."""
+    pass
+
   @abc.abstractmethod
   def run_restore_ops(self, session=None):
     """Runs restore ops from the checkpoint. Requires a valid checkpoint."""
@@ -975,6 +980,26 @@ class CheckpointLoadStatus(_LoadStatus):
           % (list(unused_python_objects),))
     return self
 
+  def assert_nontrivial_match(self):
+    """Raises an exception if only the root object matched."""
+    for checkpointable_object in list_objects(self._root_checkpointable):
+      self._checkpoint.all_python_objects.add(checkpointable_object)
+    if len(self._checkpoint.object_by_proto_id) <= 1:
+      unused_python_objects = (
+          _ObjectIdentitySet(self._checkpoint.all_python_objects)
+          - _ObjectIdentitySet(self._checkpoint.object_by_proto_id.values()))
+      if unused_python_objects:
+        raise AssertionError(
+            ("Nothing except the root object matched a checkpointed value. "
+             "Typically this means that the checkpoint does not match the "
+             "Python program. The following objects have no matching "
+             "checkpointed value: %s") % (list(unused_python_objects),))
+      else:
+        raise AssertionError(
+            "Nothing to load. No dependencies have been added to %s yet." % (
+                self._root_checkpointable,))
+    return self
+
   def run_restore_ops(self, session=None):
     """Run operations to restore objects in the dependency graph."""
     if context.executing_eagerly():
@@ -1039,6 +1064,11 @@ class InitializationOnlyStatus(_LoadStatus):
     raise AssertionError(
         "No checkpoint specified (save_path=None); nothing is being restored.")
 
+  def assert_nontrivial_match(self):
+    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
+    raise AssertionError(
+        "No checkpoint specified (save_path=None); nothing is being restored.")
+
   def run_restore_ops(self, session=None):
     """For consistency with `CheckpointLoadStatus`.
 
@@ -1122,6 +1152,14 @@ class NameBasedSaverStatus(_LoadStatus):
     # useful since we don't touch Python objects or Python state).
     return self.assert_consumed()
 
+  def assert_nontrivial_match(self):
+    """Raises an exception if currently created objects are unmatched."""
+    # For name-based checkpoints there's no object information in the
+    # checkpoint, so there's no distinction between
+    # assert_nontrivial_match and assert_consumed (and both are less
+    # useful since we don't touch Python objects or Python state).
+    return self.assert_consumed()
+
   def _gather_saveable_objects(self):
     """Walk the object graph, using global names for SaveableObjects."""
     objects = list_objects(self._root_checkpointable)
@@ -1779,13 +1817,15 @@ class Checkpoint(tracking.Checkpointable):
       status of a checkpoint restoration and run initialization/restore ops.
 
       The returned status object has the following methods:
-      - `assert_consumed()`:
+
+      * `assert_consumed()`:
           Raises an exception if any variables/objects are unmatched: either
           checkpointed values which don't have a matching Python object or
           Python objects in the dependency graph with no values in the
           checkpoint. This method returns the status object, and so may be
           chained with `initialize_or_restore` or `run_restore_ops`.
-      -  `assert_existing_objects_matched()`:
+
+      * `assert_existing_objects_matched()`:
           Raises an exception if any existing Python objects in the dependency
           graph are unmatched. Unlike `assert_consumed`, this assertion will
           pass if values in the checkpoint have no corresponding Python
@@ -1796,12 +1836,20 @@ class Checkpoint(tracking.Checkpointable):
           a `tf.train.Optimizer` was saved but only the state required for
           inference is being loaded. This method returns the status object, and
           so may be chained with `initialize_or_restore` or `run_restore_ops`.
-      - `initialize_or_restore(session=None)`:
+
+      * `assert_nontrivial_match()`: Asserts that something aside from the root
+          object was matched. This is a very weak assertion, but is useful for
+          sanity checking in library code where objects may exist in the
+          checkpoint which haven't been created in Python and some Python
+          objects may not have a checkpointed value.
+
+      * `initialize_or_restore(session=None)`:
           When graph building, runs variable initializers if `save_path` is
           `None`, but otherwise runs restore operations. If no `session` is
           explicitly specified, the default session is used. No effect when
           executing eagerly (variables are initialized or restored eagerly).
-      - `run_restore_ops(session=None)`:
+
+      * `run_restore_ops(session=None)`:
           When graph building, runs restore operations. If no `session` is
           explicitly specified, the default session is used. No effect when
           executing eagerly (restore operations are run eagerly). May only be
diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py
index f8b5bd8501..14b47a1940 100644
--- a/tensorflow/python/training/checkpointable/util_test.py
+++ b/tensorflow/python/training/checkpointable/util_test.py
@@ -437,6 +437,7 @@ class CheckpointingTests(test.TestCase):
         optimizer=on_create_optimizer, model=on_create_model)
     # Deferred restoration
     status = on_create_root.restore(save_path=save_path)
+    status.assert_nontrivial_match()
     status.assert_existing_objects_matched()
     with self.assertRaises(AssertionError):
       status.assert_consumed()
@@ -1509,6 +1510,8 @@ class CheckpointCompatibilityTests(test.TestCase):
           status.assert_consumed()
         with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
           status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_nontrivial_match()
       else:
         # When graph building, we haven't read any keys, so we don't know
         # whether the restore will be complete.
@@ -1516,6 +1519,8 @@ class CheckpointCompatibilityTests(test.TestCase):
           status.assert_consumed()
         with self.assertRaisesRegexp(AssertionError, "not restored"):
           status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_nontrivial_match()
       status.run_restore_ops()
       self._check_sentinels(root)
       self._set_sentinels(root)
-- 
GitLab


From 13941241e984e4a4296891f4e61a9ed5b3107b22 Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Wed, 3 Oct 2018 16:47:49 -0700
Subject: [PATCH 1095/1357] [TF:XLA] Improve the accounting for subcomputations
 in the heap simulator.

Subtract the size of the aliased buffers from the subcomputation estimate instead of from the current computation. This way, the memory estimate for the current computation is more accurate.

For the newly added test, the heap simulation calculates 48 bytes at head instead of the correct 64 bytes.

PiperOrigin-RevId: 215653047
---
 .../compiler/xla/service/heap_simulator.cc    |  34 +++--
 .../compiler/xla/service/heap_simulator.h     |  13 +-
 .../xla/service/heap_simulator_test.cc        | 118 +++++++++++++++++
 .../xla/service/hlo_memory_scheduler_test.cc  | 120 ------------------
 4 files changed, 136 insertions(+), 149 deletions(-)

diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index b343305554..9220865867 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -240,6 +240,7 @@ Status HeapSimulator::RunComputation(
 
     // Make sure each buffer get reused at most once.
     flat_hash_set<const BufferValue*> reused_buffers;
+    int64 alloc_size_by_instruction = 0;
     for (const BufferValue* buffer : buffers_defined_by_instruction) {
       if (IgnoreBuffer(buffer)) {
         continue;
@@ -272,14 +273,15 @@ Status HeapSimulator::RunComputation(
 
       if (!shared) {
         VLOG(3) << "  Allocating: " << buffer->ToString();
+        alloc_size_by_instruction += size_fn_(*buffer);
         Alloc(buffer, instruction);
       }
     }
     // Account for the memory used by subcomputations when estimating the
     // current heap size.
     if (memory_by_computation_ != nullptr) {
-      algorithm_->AccountForSubcomputationMemory(instruction,
-                                                 *memory_by_computation_);
+      algorithm_->AccountForSubcomputationMemory(
+          instruction, alloc_size_by_instruction, *memory_by_computation_);
     }
 
     // If all computations in the module have been scheduled, we can save memory
@@ -385,10 +387,8 @@ void HeapSimulator::Alloc(const BufferValue* buffer,
 
   allocated_buffers_.insert(buffer);
   const int64 size = size_fn_(*buffer);
-  const HloInstruction* instruction_to_calc_aliasing =
-      memory_by_computation_ == nullptr ? nullptr : instruction;
-  algorithm_->Alloc(buffer, size, instruction_to_calc_aliasing);
-  no_fragmentation_stats_->Alloc(buffer, size, instruction_to_calc_aliasing);
+  algorithm_->Alloc(buffer, size);
+  no_fragmentation_stats_->Alloc(buffer, size);
   FillDebugTrace(HeapSimulatorTrace::Event::ALLOC, buffer, instruction,
                  nullptr);
 }
@@ -526,20 +526,8 @@ void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size) {
   }
 }
 
-void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size,
-                                     const HloInstruction* instruction) {
-  // The output buffer of while/call/conditional is always aliased with the
-  // output buffer of the root instruction in the body. Don't double count.
-  if (instruction == nullptr ||
-      (instruction->opcode() != HloOpcode::kWhile &&
-       instruction->opcode() != HloOpcode::kCall &&
-       instruction->opcode() != HloOpcode::kConditional)) {
-    Alloc(buffer, size);
-  }
-}
-
 void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
-    const HloInstruction* instruction,
+    const HloInstruction* instruction, int64 alloc_size_by_instruction,
     const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // We only count the memory usage of the largest subcomputation, instead of
@@ -554,6 +542,14 @@ void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
       }
     }
   }
+  if (max_subcomputation_bytes > 0 &&
+      (instruction->opcode() == HloOpcode::kWhile ||
+       instruction->opcode() == HloOpcode::kCall ||
+       instruction->opcode() == HloOpcode::kConditional)) {
+    // The output buffer of while/call/conditional is always aliased with the
+    // output buffer of the root instruction in the body. Don't double count.
+    max_subcomputation_bytes -= alloc_size_by_instruction;
+  }
   max_heap_size_ =
       std::max(max_heap_size_, current_heap_size_ + max_subcomputation_bytes);
 }
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index b0295a6163..dbbf43082f 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -218,12 +218,6 @@ class HeapAlgorithm {
   // Alloc allocates a buffer of 'size' bytes.
   virtual void Alloc(const BufferValue* buffer, int64 size) = 0;
 
-  // NoFragmentationStatsHeap overrides this method.
-  virtual void Alloc(const BufferValue* buffer, int64 size,
-                     const HloInstruction* instruction) {
-    Alloc(buffer, size);
-  }
-
   // Takes memory usage of subcomputations into account when calculating the
   // memory usage of a computation. Currently, we don't handle buffer aliasing
   // between computations entirely correctly. We are careful to not double count
@@ -235,6 +229,8 @@ class HeapAlgorithm {
   // analysis, it's not worth making major changes to HeapSimulator now.
   virtual void AccountForSubcomputationMemory(
       const HloInstruction* instruction,
+      // The total number of bytes allocated by instruction.
+      int64 alloc_size_by_instruction,
       const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) {}
 
@@ -257,11 +253,8 @@ class NoFragmentationStatsHeap : public HeapAlgorithm {
 
   void Alloc(const BufferValue* buffer, int64 size) override;
 
-  void Alloc(const BufferValue* buffer, int64 size,
-             const HloInstruction* instruction) override;
-
   void AccountForSubcomputationMemory(
-      const HloInstruction* instruction,
+      const HloInstruction* instruction, int64 alloc_size_by_instruction,
       const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) override;
 
diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc
index ea0bced923..e30e7667f3 100644
--- a/tensorflow/compiler/xla/service/heap_simulator_test.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc
@@ -98,6 +98,124 @@ TEST_F(MinimumMemoryForSequenceTest, MultiComputation) {
       HeapSimulator::MinimumMemoryForModule(schedule, size_fn).ValueOrDie());
 }
 
+TEST_F(MinimumMemoryForSequenceTest, SubcomputationAccounting) {
+  // HloModule SubcomputationAccounting
+
+  // %WhileBody (body_param: f32[4]) -> f32[4] {
+  //   %body_param = f32[4]{0} parameter(0)
+  //   %constant.1 = f32[4]{0} constant({1, 1, 1, 1})
+  //   ROOT %subtract = f32[4]{0} subtract(f32[4]{0} %body_param, f32[4]{0}
+  //   %constant.1)
+  // }
+
+  // %WhileCond (cond_param: f32[4]) -> pred[] {
+  //   %cond_param = f32[4]{0} parameter(0)
+  //   %slice = f32[1]{0} slice(f32[4]{0} %cond_param), slice={[0:1]}
+  //   %reshape = f32[] reshape(f32[1]{0} %slice)
+  //   %constant = f32[] constant(0)
+  //   ROOT %not-equal-to = pred[] not-equal-to(f32[] %reshape, f32[] %constant)
+  // }
+
+  // ENTRY %SubcomputationAccounting () -> f32[2,4] {
+  //   %constant.3 = f32[2,4]{1,0} constant(f32[2,4] { { 1, 2, 3, 4 }, { 1, 2,
+  //   3, 4 } }) %transpose = f32[2,4]{1,0} transpose(f32[2,4]{1,0}
+  //   %constant.3), dimensions={0,1} %constant.2 = f32[4]{0} constant({1, 1, 1,
+  //   1}) %while = f32[4]{0} while(f32[4]{0} %constant.2),
+  //   condition=%WhileCond, body=%WhileBody %broadcast = f32[2,4]{1,0}
+  //   broadcast(f32[4]{0} %while), dimensions={1} ROOT %add = f32[2,4]{1,0}
+  //   add(f32[2,4]{1,0} %transpose, f32[2,4]{1,0} %broadcast)
+  // }
+
+  auto module = CreateNewVerifiedModule();
+  const Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+  const Shape r1f32 = ShapeUtil::MakeShape(F32, {4});
+  const Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 4});
+
+  // reshape(slice(param)) != 0
+  // Needs 5 bytes
+  auto cond_builder = HloComputation::Builder("WhileCond");
+  HloInstruction* cond_param = cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "cond_param"));
+  HloInstruction* slice =
+      cond_builder.AddInstruction(HloInstruction::CreateSlice(
+          ShapeUtil::MakeShape(F32, {1}), cond_param, {0}, {1}, {1}));
+  HloInstruction* reshape =
+      cond_builder.AddInstruction(HloInstruction::CreateReshape(r0f32, slice));
+  HloInstruction* zero = cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(0)));
+  HloInstruction* cond_comparison =
+      cond_builder.AddInstruction(HloInstruction::CreateBinary(
+          ShapeUtil::MakeShape(PRED, {}), HloOpcode::kNe, reshape, zero));
+  auto cond_computation = module->AddEmbeddedComputation(cond_builder.Build());
+
+  // param - 1
+  // Needs 16 bytes
+  auto body_builder = HloComputation::Builder("WhileBody");
+  HloInstruction* body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "body_param"));
+  HloInstruction* one_vector =
+      body_builder.AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::CreateR1<float>({1, 1, 1, 1})));
+  HloInstruction* subtract =
+      body_builder.AddInstruction(HloInstruction::CreateBinary(
+          r1f32, HloOpcode::kSubtract, body_param, one_vector));
+  auto body_computation = module->AddEmbeddedComputation(body_builder.Build());
+
+  // transpose(matrix) + bcast(while)
+  auto builder = HloComputation::Builder(TestName());
+  HloInstruction* while_init =
+      builder.AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::CreateR1<float>({1, 1, 1, 1})));
+  // Creates 16 bytes, ignoring subcomputations
+  HloInstruction* while_loop =
+      builder.AddInstruction(HloInstruction::CreateWhile(
+          r1f32, cond_computation, body_computation, while_init));
+
+  // Creates 32 bytes and frees 16
+  HloInstruction* bcast = builder.AddInstruction(
+      HloInstruction::CreateBroadcast(r2f32, while_loop, {1}));
+
+  HloInstruction* matrix = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR2<float>(
+          {{1.0, 2.0, 3.0, 4.0}, {1.0, 2.0, 3.0, 4.0}})));
+  // Creates 32 bytes
+  HloInstruction* transpose = builder.AddInstruction(
+      HloInstruction::CreateTranspose(r2f32, matrix, {0, 1}));
+
+  // Creates 32 bytes and frees 64
+  HloInstruction* add = builder.AddInstruction(
+      HloInstruction::CreateBinary(r2f32, HloOpcode::kAdd, transpose, bcast));
+
+  auto entry_computation = module->AddEntryComputation(builder.Build());
+
+  HloSchedule schedule(module.get());
+  std::vector<HloInstruction*> cond_vec = {cond_param, slice, reshape, zero,
+                                           cond_comparison};
+  std::vector<HloInstruction*> while_body_vec = {body_param, one_vector,
+                                                 subtract};
+  std::vector<HloInstruction*> entry_comp_vec = {while_init, while_loop, bcast,
+                                                 matrix,     transpose,  add};
+  schedule.set_sequence(cond_computation, cond_vec);
+  schedule.set_sequence(body_computation, while_body_vec);
+  schedule.set_sequence(entry_computation, entry_comp_vec);
+
+  auto size_fn = [](const BufferValue& buffer) {
+    return ShapeUtil::ByteSizeOf(buffer.shape());
+  };
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
+  memory_by_computation[cond_computation] = 5;
+  memory_by_computation[body_computation] = 16;
+  std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
+      TuplePointsToAnalysis::Run(module.get()).ValueOrDie();
+
+  // HeapSimulator accounts for subcomputations. The output buffer is aliased,
+  // so we don't double count.
+  EXPECT_EQ(64, HeapSimulator::MinimumMemoryForComputation(
+                    *entry_computation, schedule.sequence(entry_computation),
+                    *points_to_analysis, size_fn, &memory_by_computation)
+                    .ValueOrDie());
+}
+
 const char kAlloc[] = "Alloc";
 const char kFree[] = "Free";
 const char kFinish[] = "Finish";
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
index 5a9fccc7dd..214119fba8 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
@@ -147,126 +147,6 @@ ENTRY root {
                                       instructions_by_name.at("e")));
 }
 
-TEST_F(HloSchedulingTest, ListAccountsForSubcomputations) {
-  // %WhileCond (cond_param: f32[4]) -> pred[] {
-  //   %cond_param = f32[4]{0} parameter(0)
-  //   %constant = f32[1,4]{1,0} constant(f32[1,4] { { 0, 0, 0, 0 } })
-  //   ROOT %not-equal-to = pred[] not-equal-to(
-  //     f32[4]{0} %cond_param, f32[1,4]{1,0} %constant)
-  // }
-  // %WhileBody (body_param: f32[4]) -> f32[4] {
-  //   %body_param = f32[4]{0} parameter(0)
-  //   %constant.1 = f32[1,4]{1,0} constant(f32[1,4] { { 1, 1, 1, 1 } })
-  //   ROOT %subtract = f32[4]{0} subtract(
-  //     f32[4]{0} %body_param, f32[1,4]{1,0} %constant.1)
-  // }
-  // %ListAccountsForSubcomputations () -> f32[2,4] {
-  //   %constant.3 = f32[2,4]{1,0} constant(
-  //     f32[2,4] { { 1, 2, 3, 4 }, { 1, 2, 3, 4 } })
-  //   %transpose = f32[2,4]{1,0} transpose(
-  //     f32[2,4]{1,0} %constant.3), dimensions={0,1}
-  //   %constant.2 = f32[1,4]{1,0} constant(f32[1,4] { { 1, 1, 1, 1 } })
-  //   %while = f32[4]{0} while(f32[1,4]{1,0} %constant.2),
-  //      condition=%WhileCond,
-  //      body=%WhileBody
-  //   %broadcast = f32[2,4]{1,0} broadcast(f32[4]{0} %while), dimensions={0}
-  //   ROOT %add = f32[2,4]{1,0} add(
-  //     f32[2,4]{1,0} %transpose, f32[2,4]{1,0} %broadcast)
-  // }
-
-  auto module = CreateNewModule();
-  const Shape r1f32 = ShapeUtil::MakeShape(F32, {4});
-  const Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 4});
-
-  // param != 0
-  // Needs 17 bytes
-  auto cond_builder = HloComputation::Builder("WhileCond");
-  HloInstruction* cond_param = cond_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, r1f32, "cond_param"));
-  HloInstruction* zero_vector =
-      cond_builder.AddInstruction(HloInstruction::CreateConstant(
-          LiteralUtil::CreateR2<float>({{0, 0, 0, 0}})));
-  cond_builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(PRED, {}), HloOpcode::kNe, cond_param, zero_vector));
-  auto cond_computation = module->AddEmbeddedComputation(cond_builder.Build());
-
-  // param - 1
-  // Needs 16 bytes
-  auto body_builder = HloComputation::Builder("WhileBody");
-  HloInstruction* body_param = body_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, r1f32, "body_param"));
-  HloInstruction* one_vector =
-      body_builder.AddInstruction(HloInstruction::CreateConstant(
-          LiteralUtil::CreateR2<float>({{1, 1, 1, 1}})));
-  body_builder.AddInstruction(HloInstruction::CreateBinary(
-      r1f32, HloOpcode::kSubtract, body_param, one_vector));
-  auto body_computation = module->AddEmbeddedComputation(body_builder.Build());
-
-  // transpose(matrix) + bcast(while)
-  auto builder = HloComputation::Builder(TestName());
-  HloInstruction* while_init =
-      builder.AddInstruction(HloInstruction::CreateConstant(
-          LiteralUtil::CreateR2<float>({{1, 1, 1, 1}})));
-  // Creates 16 bytes, ignoring subcomputations
-  HloInstruction* while_loop =
-      builder.AddInstruction(HloInstruction::CreateWhile(
-          r1f32, cond_computation, body_computation, while_init));
-
-  // Creates 32 bytes and frees 16
-  HloInstruction* bcast = builder.AddInstruction(
-      HloInstruction::CreateBroadcast(r2f32, while_loop, {0}));
-
-  HloInstruction* matrix = builder.AddInstruction(
-      HloInstruction::CreateConstant(LiteralUtil::CreateR2<float>(
-          {{1.0, 2.0, 3.0, 4.0}, {1.0, 2.0, 3.0, 4.0}})));
-  // Creates 32 bytes
-  HloInstruction* transpose = builder.AddInstruction(
-      HloInstruction::CreateTranspose(r2f32, matrix, {0, 1}));
-
-  // Creates 32 bytes and frees 64
-  HloInstruction* add = builder.AddInstruction(
-      HloInstruction::CreateBinary(r2f32, HloOpcode::kAdd, transpose, bcast));
-
-  module->AddEntryComputation(builder.Build());
-
-  auto size_fn = [](const BufferValue& buffer) {
-    return ShapeUtil::ByteSizeOf(buffer.shape());
-  };
-  TF_ASSERT_OK_AND_ASSIGN(
-      HloSchedule schedule,
-      ScheduleModule(*module, size_fn, ListMemoryScheduler));
-  // Verify that all instructions are in the sequence.
-  auto entry_computation = module->entry_computation();
-  EXPECT_EQ(entry_computation->instruction_count(),
-            schedule.sequence(entry_computation).size());
-  SequentialHloOrdering ordering(schedule);
-  // This schedule is an example of List's greedy heuristics being suboptimal.
-  // The while_loop is more expensive than transpose, so it would have been
-  // better to schedule it first, instead of during the busy time.
-  EXPECT_TRUE(ordering.ExecutesBefore(transpose, while_loop));
-  EXPECT_TRUE(ordering.ExecutesBefore(transpose, bcast));
-  EXPECT_TRUE(ordering.ExecutesBefore(bcast, add));
-  EXPECT_TRUE(ordering.ExecutesBefore(transpose, add));
-
-  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
-  memory_by_computation[cond_computation] = 17;
-  memory_by_computation[body_computation] = 16;
-  std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
-      TuplePointsToAnalysis::Run(module.get()).ValueOrDie();
-
-  // HeapSimulator doesn't account for subcomputations
-  EXPECT_EQ(80, HeapSimulator::MinimumMemoryForComputation(
-                    *entry_computation, schedule.sequence(entry_computation),
-                    *points_to_analysis, size_fn)
-                    .ValueOrDie());
-  // HeapSimulator accounts for subcomputations. The output buffer is aliased,
-  // so we don't double count.
-  EXPECT_EQ(64, HeapSimulator::MinimumMemoryForComputation(
-                    *entry_computation, schedule.sequence(entry_computation),
-                    *points_to_analysis, size_fn, &memory_by_computation)
-                    .ValueOrDie());
-}
-
 TEST_F(HloSchedulingTest, TuplesAreAccountedCorrectly) {
   auto builder = HloComputation::Builder(TestName());
   const auto TUPLE_SIZE = 1;
-- 
GitLab


From caaf9a89750a9a0b3d66f3ce3e9bd507f4c6514c Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Wed, 3 Oct 2018 16:51:30 -0700
Subject: [PATCH 1096/1357] Create new classes for Keras tests to allow us to
 create new test targets.

PiperOrigin-RevId: 215653650
---
 .../contrib/distribute/python/keras_test.py   | 256 +++++++++---------
 1 file changed, 131 insertions(+), 125 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 993cb2bac3..3511b7761f 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -355,48 +355,9 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase):
     gfile.DeleteRecursively(self._config.model_dir)
 
 
-class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
-
-  def test_validating_dataset_input_tensors_with_shape_mismatch(self):
-    with self.cached_session():
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
-                                                     '/device:CPU:0'])
-      a = constant_op.constant([1, 2], shape=(1, 2))
-      b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
-      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
-      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
-      with strategy.scope():
-        # Removed device and input tensor shape details from the error message
-        # since the order of the device and the corresponding input tensor shape
-        # is not deterministic over different runs.
-        with self.assertRaisesRegexp(ValueError,
-                                     'Input tensor shapes do not match for '
-                                     'distributed tensor inputs '
-                                     'DistributedValues:.+'):
-          distributed_training_utils.validate_distributed_dataset_inputs(
-              strategy, x, y)
-
-  def test_validating_dataset_input_tensors_with_dtype_mismatch(self):
-    with self.cached_session():
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
-                                                     '/device:CPU:0'])
-      a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
-      b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
-      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
-      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
-      with strategy.scope():
-        # Removed device and input tensor dtype details from the error message
-        # since the order of the device and the corresponding input tensor dtype
-        # is not deterministic over different runs.
-        with self.assertRaisesRegexp(ValueError,
-                                     'Input tensor dtypes do not match for '
-                                     'distributed tensor inputs '
-                                     'DistributedValues:.+'):
-          distributed_training_utils.validate_distributed_dataset_inputs(
-              strategy, x, y)
+class TestDistributionStrategyWithNumpyArrays(test.TestCase,
+                                              parameterized.TestCase):
 
-  # TODO(anjalisridhar): Move this test along with other numpy related tests to
-  # its own class.
   @combinations.generate(strategy_combinations())
   def test_creating_var_with_numpy_arrays(self, distribution):
     with self.cached_session():
@@ -479,6 +440,10 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       # with batch_size
       model.predict(inputs, batch_size=8)
 
+
+class TestDistributionStrategyWithDatasets(test.TestCase,
+                                           parameterized.TestCase):
+
   @combinations.generate(strategy_combinations())
   def test_calling_model_on_same_dataset(self, distribution):
     with self.cached_session():
@@ -572,86 +537,6 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       model.evaluate(dataset, steps=2, verbose=1)
       model.predict(get_predict_dataset(distribution), steps=2)
 
-  def test_unsupported_features(self):
-    with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
-                                                     '/device:GPU:0'])
-
-      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
-
-      dataset = get_dataset(strategy)
-
-      # Test with validation split
-      with self.assertRaisesRegexp(
-          ValueError, '`validation_split` argument is not '
-                      'supported when input `x` is a dataset or a '
-                      'dataset iterator.+'):
-        model.fit(dataset,
-                  epochs=1, steps_per_epoch=2, verbose=0,
-                  validation_split=0.5, validation_steps=2)
-
-      # Test with sample weight.
-      sample_weight = np.random.random((10,))
-      with self.assertRaisesRegexp(
-          NotImplementedError, '`sample_weight` is currently not supported '
-                               'when using DistributionStrategy.'):
-        model.fit(
-            dataset,
-            epochs=1,
-            steps_per_epoch=2,
-            verbose=0,
-            sample_weight=sample_weight)
-
-      # Test with not specifying the `steps` argument.
-      with self.assertRaisesRegexp(
-          ValueError, 'you should specify the `steps_per_epoch` argument'):
-        model.fit(dataset, epochs=1, verbose=0)
-      with self.assertRaisesRegexp(ValueError,
-                                   'you should specify the `steps` argument'):
-        model.evaluate(dataset, verbose=0)
-
-      with self.assertRaisesRegexp(ValueError,
-                                   'you should specify the `steps` argument'):
-        model.predict(dataset, verbose=0)
-
-  def test_calling_with_unsupported_predefined_callbacks(self):
-    with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
-                                                     '/device:GPU:0'])
-      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
-
-      dataset = get_dataset(strategy)
-
-      def schedule(_):
-        return 0.001
-      with self.assertRaisesRegexp(ValueError,
-                                   'LearningRateScheduler callback is not '
-                                   'supported with DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
-
-      with self.assertRaisesRegexp(ValueError,
-                                   'ReduceLROnPlateau callback is not '
-                                   'supported with DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.ReduceLROnPlateau()])
-      with self.assertRaisesRegexp(ValueError,
-                                   'histogram_freq in the TensorBoard callback '
-                                   'is not supported when using '
-                                   'DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])
-
   def test_dataset_input_shape_validation(self):
     with self.cached_session():
       model = get_model()
@@ -736,7 +621,128 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       self.assertNotEqual(np.mean(predict_output), 0)
 
 
-class LossMaskingWithDistributionStrategyTest(test.TestCase):
+class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
+
+  def test_validating_dataset_input_tensors_with_shape_mismatch(self):
+    with self.cached_session():
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
+                                                     '/device:CPU:0'])
+      a = constant_op.constant([1, 2], shape=(1, 2))
+      b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
+      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
+      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
+      with strategy.scope():
+        # Removed device and input tensor shape details from the error message
+        # since the order of the device and the corresponding input tensor shape
+        # is not deterministic over different runs.
+        with self.assertRaisesRegexp(ValueError,
+                                     'Input tensor shapes do not match for '
+                                     'distributed tensor inputs '
+                                     'DistributedValues:.+'):
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              strategy, x, y)
+
+  def test_validating_dataset_input_tensors_with_dtype_mismatch(self):
+    with self.cached_session():
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
+                                                     '/device:CPU:0'])
+      a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
+      b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
+      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
+      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
+      with strategy.scope():
+        # Removed device and input tensor dtype details from the error message
+        # since the order of the device and the corresponding input tensor dtype
+        # is not deterministic over different runs.
+        with self.assertRaisesRegexp(ValueError,
+                                     'Input tensor dtypes do not match for '
+                                     'distributed tensor inputs '
+                                     'DistributedValues:.+'):
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              strategy, x, y)
+
+  def test_unsupported_features(self):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae']
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
+                                                     '/device:GPU:0'])
+
+      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
+
+      dataset = get_dataset(strategy)
+
+      # Test with validation split
+      with self.assertRaisesRegexp(
+          ValueError, '`validation_split` argument is not '
+                      'supported when input `x` is a dataset or a '
+                      'dataset iterator.+'):
+        model.fit(dataset,
+                  epochs=1, steps_per_epoch=2, verbose=0,
+                  validation_split=0.5, validation_steps=2)
+
+      # Test with sample weight.
+      sample_weight = np.random.random((10,))
+      with self.assertRaisesRegexp(
+          NotImplementedError, '`sample_weight` is currently not supported '
+                               'when using DistributionStrategy.'):
+        model.fit(
+            dataset,
+            epochs=1,
+            steps_per_epoch=2,
+            verbose=0,
+            sample_weight=sample_weight)
+
+      # Test with not specifying the `steps` argument.
+      with self.assertRaisesRegexp(
+          ValueError, 'you should specify the `steps_per_epoch` argument'):
+        model.fit(dataset, epochs=1, verbose=0)
+      with self.assertRaisesRegexp(ValueError,
+                                   'you should specify the `steps` argument'):
+        model.evaluate(dataset, verbose=0)
+
+      with self.assertRaisesRegexp(ValueError,
+                                   'you should specify the `steps` argument'):
+        model.predict(dataset, verbose=0)
+
+  def test_calling_with_unsupported_predefined_callbacks(self):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae']
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
+                                                     '/device:GPU:0'])
+      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
+
+      dataset = get_dataset(strategy)
+
+      def schedule(_):
+        return 0.001
+      with self.assertRaisesRegexp(ValueError,
+                                   'LearningRateScheduler callback is not '
+                                   'supported with DistributionStrategy.'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
+
+      with self.assertRaisesRegexp(ValueError,
+                                   'ReduceLROnPlateau callback is not '
+                                   'supported with DistributionStrategy.'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.ReduceLROnPlateau()])
+      with self.assertRaisesRegexp(ValueError,
+                                   'histogram_freq in the TensorBoard callback '
+                                   'is not supported when using '
+                                   'DistributionStrategy.'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])
+
+
+class TestDistributionStrategyWithLossMasking(test.TestCase):
 
   # TODO(priyag): Enable all strategies for this test. Currently it does not
   # work for TPU due to some invalid datatype.
@@ -763,7 +769,7 @@ class LossMaskingWithDistributionStrategyTest(test.TestCase):
       self.assertEqual(hist.history['loss'][0], 0)
 
 
-class NormalizationLayerWithDistributionStrategyTest(
+class TestDistributionStrategyWithNormalizationLayer(
     test.TestCase, parameterized.TestCase):
 
   @combinations.generate(strategy_combinations())
@@ -795,8 +801,8 @@ class NormalizationLayerWithDistributionStrategyTest(
       np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
 
 
-class CorrectnessWithDistributionStrategyTest(test.TestCase,
-                                              parameterized.TestCase):
+class TestDistributionStrategyCorrectness(test.TestCase,
+                                          parameterized.TestCase):
 
   @combinations.generate(strategy_combinations())
   def test_metric_correctness(self, distribution):
-- 
GitLab


From 3a9a3664fe1aa9e5c81ca4959f028c2a8161520e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 16:52:30 -0700
Subject: [PATCH 1097/1357] Fix 1970s-style bug in LogSoftmax eval.

PiperOrigin-RevId: 215653797
---
 tensorflow/contrib/lite/kernels/activations.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc
index cf9441aee3..9aed4f09b8 100644
--- a/tensorflow/contrib/lite/kernels/activations.cc
+++ b/tensorflow/contrib/lite/kernels/activations.cc
@@ -616,13 +616,15 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
   switch (input->type) {
-    case kTfLiteFloat32:
+    case kTfLiteFloat32: {
       SoftmaxParams op_params;
       optimized_ops::LogSoftmax(
           op_params, GetTensorShape(input), GetTensorData<float>(input),
           GetTensorShape(output), GetTensorData<float>(output));
       return kTfLiteOk;
-    case kTfLiteUInt8:
+    }
+    case kTfLiteUInt8: {
+      SoftmaxParams op_params;
       op_params.input_multiplier = data->input_multiplier;
       op_params.input_left_shift = data->input_left_shift;
       op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
@@ -632,6 +634,7 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
           op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
           GetTensorShape(output), GetTensorData<uint8_t>(output));
       return kTfLiteOk;
+    }
     default:
       context->ReportError(context, "Only float32 supported currently., got %d",
                            input->type);
-- 
GitLab


From d340eb9f7ea46012b7ead202f4c12fb6b32cc56d Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 16:56:14 -0700
Subject: [PATCH 1098/1357] Increase error-epsilon for
 ProfilingTest::ProfilesAreCollected.

PiperOrigin-RevId: 215654327
---
 tensorflow/contrib/lite/profiling/profiler_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/profiling/profiler_test.cc b/tensorflow/contrib/lite/profiling/profiler_test.cc
index 0fba0450a0..cf56eed2a4 100644
--- a/tensorflow/contrib/lite/profiling/profiler_test.cc
+++ b/tensorflow/contrib/lite/profiling/profiler_test.cc
@@ -83,8 +83,8 @@ TEST(ProfilingTest, ProfilesAreCollected) {
   EXPECT_EQ("SleepForQuarter", profile_events[4]->tag);
 
 #ifndef ADDRESS_SANITIZER
-  // ASAN build is sometimes very slow.
-  const int eps_ms = 10;
+  // ASAN build is sometimes very slow. Set a large epsilon to avoid flakiness.
+  const int eps_ms = 50;
   AssertDurationOfEventAroundMs(profile_events[0], /*expected_ms*/ 500, eps_ms);
   AssertDurationOfEventAroundMs(profile_events[1], /*expected_ms*/ 250, eps_ms);
   AssertDurationOfEventAroundMs(profile_events[2], /*expected_ms*/ 250, eps_ms);
-- 
GitLab


From c842d38978a0babb373fe2acbb0231960aa1c1d0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 17:05:38 -0700
Subject: [PATCH 1099/1357] Add MinimalRNN cell.

The implementation is based on: https://arxiv.org/pdf/1806.05394v2.pdf.

PiperOrigin-RevId: 215655857
---
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  72 +++++++++++
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 116 ++++++++++++++++++
 2 files changed, 188 insertions(+)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index 6689664fb9..0a27200015 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -29,6 +29,9 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
+from tensorflow.python.keras import initializers
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras import utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
@@ -40,7 +43,9 @@ from tensorflow.python.ops import rnn_cell
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import test
+from tensorflow.python.training import training
 from tensorflow.python.util import nest
 
 
@@ -1115,6 +1120,73 @@ class RNNCellTest(test.TestCase):
             r"input size \(3\) must be divisible by number_of_groups \(2\)"):
           gcell(glstm_input, gcell_zero_state)
 
+  def testMinimalRNNCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root"):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.MinimalRNNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.18899589, 0.18899589]])
+      with variable_scope.variable_scope(
+          "other"):
+        # Test MinimalRNN with input_size != num_units.
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.MinimalRNNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.19554167, 0.19554167]])
+
+  def testMinimalRNNCellEndToEnd(self):
+    with self.cached_session() as sess:
+      input_shape = 10
+      output_shape = 5
+      timestep = 4
+      batch = 100
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=output_shape)
+      y_train = utils.to_categorical(y_train)
+      cell = contrib_rnn_cell.MinimalRNNCell(output_shape)
+
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      predict = array_ops.placeholder(
+          dtypes.float32, shape=(None, output_shape))
+
+      outputs, state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape])
+      self.assertEqual(state.shape.as_list(), [None, output_shape])
+      loss = losses.softmax_cross_entropy(predict, state)
+      train_op = training.GradientDescentOptimizer(0.001).minimize(loss)
+
+      sess.run([variables.global_variables_initializer()])
+      _, outputs, state = sess.run(
+          [train_op, outputs, state], {inputs: x_train, predict: y_train})
+
+      self.assertEqual(len(outputs), batch)
+      self.assertEqual(len(state), batch)
+
 
 class LayerNormBasicLSTMCellTest(test.TestCase):
 
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 06c481672c..59a61af7b3 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -28,6 +28,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras import activations
+from tensorflow.python.keras import initializers
 from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
@@ -3394,3 +3396,117 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell):
 
     new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h)
     return new_h, new_state
+
+
+class MinimalRNNCell(rnn_cell_impl.LayerRNNCell):
+  """MinimalRNN cell.
+
+  The implementation is based on:
+
+    https://arxiv.org/pdf/1806.05394v2.pdf
+
+  Minmin Chen, Jeffrey Pennington, Samuel S. Schoenholz.
+  "Dynamical Isometry and a Mean Field Theory of RNNs: Gating Enables Signal
+   Propagation in Recurrent Neural Networks." ICML, 2018.
+
+  A MinimalRNN cell first projects the input to the hidden space. The new
+  hidden state is then calcuated as a weighted sum of the projected input and
+  the previous hidden state, using a single update gate.
+  """
+
+  def __init__(self,
+               units,
+               activation="tanh",
+               kernel_initializer="glorot_uniform",
+               bias_initializer="ones",
+               name=None,
+               dtype=None,
+               **kwargs):
+    """Initialize the parameters for a MinimalRNN cell.
+
+    Args:
+      units: int, The number of units in the MinimalRNN cell.
+      activation: Nonlinearity to use in the feedforward network. Default:
+        `tanh`.
+      kernel_initializer: The initializer to use for the weight in the update
+        gate and feedforward network. Default: `glorot_uniform`.
+      bias_initializer: The initializer to use for the bias in the update
+        gate. Default: `ones`.
+      name: String, the name of the cell.
+      dtype: Default dtype of the cell.
+      **kwargs: Dict, keyword named properties for common cell attributes.
+    """
+    super(MinimalRNNCell, self).__init__(name=name, dtype=dtype, **kwargs)
+
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self.units = units
+    self.activation = activations.get(activation)
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.bias_initializer = initializers.get(bias_initializer)
+
+  @property
+  def state_size(self):
+    return self.units
+
+  @property
+  def output_size(self):
+    return self.units
+
+  def build(self, inputs_shape):
+    if inputs_shape[-1] is None:
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
+                       % str(inputs_shape))
+
+    input_size = inputs_shape[-1]
+    # pylint: disable=protected-access
+    # self._kernel contains W_x, W, V
+    self.kernel = self.add_weight(
+        name=rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+        shape=[input_size + 2 * self.units, self.units],
+        initializer=self.kernel_initializer)
+    self.bias = self.add_weight(
+        name=rnn_cell_impl._BIAS_VARIABLE_NAME,
+        shape=[self.units],
+        initializer=self.bias_initializer)
+    # pylint: enable=protected-access
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Run one step of MinimalRNN.
+
+    Args:
+      inputs: input Tensor, must be 2-D, `[batch, input_size]`.
+      state: state Tensor, must be 2-D, `[batch, state_size]`.
+
+    Returns:
+      A tuple containing:
+
+      - Output: A `2-D` tensor with shape `[batch_size, state_size]`.
+      - New state: A `2-D` tensor with shape `[batch_size, state_size]`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    input_size = inputs.get_shape()[1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+
+    feedforward_weight, gate_weight = array_ops.split(
+        value=self.kernel,
+        num_or_size_splits=[input_size.value, 2 * self.units],
+        axis=0)
+
+    feedforward = math_ops.matmul(inputs, feedforward_weight)
+    feedforward = self.activation(feedforward)
+
+    gate_inputs = math_ops.matmul(
+        array_ops.concat([feedforward, state], 1), gate_weight)
+    gate_inputs = nn_ops.bias_add(gate_inputs, self.bias)
+    u = math_ops.sigmoid(gate_inputs)
+
+    new_h = u * state + (1 - u) * feedforward
+    return new_h, new_h
-- 
GitLab


From 4da5b350e1c062b9d55896ee872e0e4790f30bcb Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Wed, 3 Oct 2018 17:25:46 -0700
Subject: [PATCH 1100/1357] TFLite Flex: Blacklist Control Flow Ops

PiperOrigin-RevId: 215658384
---
 tensorflow/contrib/lite/toco/tflite/export.cc | 132 +++++++++++++-----
 tensorflow/contrib/lite/toco/tflite/export.h  |  20 ++-
 .../contrib/lite/toco/tflite/export_test.cc   |  40 ++++++
 3 files changed, 152 insertions(+), 40 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 0c9fac249c..45ca7f7f0c 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -47,6 +47,22 @@ using ::tflite::Tensor;
 
 namespace {
 
+// Check if a TensorFlow Op is a control flow op by its name.
+bool IsControlFlowOp(const string& tensorflow_op) {
+  // Technically this is equalivent to `::tensorflow::Node::IsControlFlow()`.
+  // It requires to construct a `::tensorflow::Graph` to use that helper
+  // function, so we simply hardcode the list of control flow ops here.
+  if (tensorflow_op == "Switch" || tensorflow_op == "RefSwitch" ||
+      tensorflow_op == "Merge" || tensorflow_op == "RefMerge" ||
+      tensorflow_op == "Enter" || tensorflow_op == "RefEnter" ||
+      tensorflow_op == "Exit" || tensorflow_op == "RefExit" ||
+      tensorflow_op == "NextIteration" || tensorflow_op == "RefNextIteration") {
+    return true;
+  }
+  // TODO(ycling): Also check how to handle Variable ops and Assign ops.
+  return false;
+}
+
 details::OperatorKey GetOperatorKey(
     const ::toco::Operator& op,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
@@ -55,21 +71,13 @@ details::OperatorKey GetOperatorKey(
   if (op.type == OperatorType::kUnsupported) {
     const TensorFlowUnsupportedOperator& unsupported_op =
         static_cast<const TensorFlowUnsupportedOperator&>(op);
-
-    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
-    // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_flex_ops) {
-      custom_code = string(::tflite::kFlexCustomCodePrefix) +
-                    unsupported_op.tensorflow_op;
-    } else {
-      custom_code = unsupported_op.tensorflow_op;
-    }
+    custom_code = unsupported_op.tensorflow_op;
   }
   int version = 1;
   if (ops_by_type.count(op.type) != 0) {
     version = ops_by_type.at(op.type)->GetVersion(op);
   }
-  return details::OperatorKey(op.type, custom_code, version);
+  return details::OperatorKey(op.type, custom_code, version, allow_flex_ops);
 }
 
 void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
@@ -83,6 +91,29 @@ void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
 
 namespace details {
 
+OperatorKey::OperatorKey(OperatorType type, const std::string& custom_code,
+                         int version, bool allow_flex_ops) {
+  this->type = type;
+  this->custom_code = custom_code;
+  this->version = version;
+
+  if (type == OperatorType::kUnsupported) {
+    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
+    // to populate a regular custom op. We need to find a way to fix this.
+    if (allow_flex_ops) {
+      // Memorize the original TensorFlow op name.
+      this->flex_tensorflow_op = custom_code;
+      // Prefix the custom code of the flex op.
+      this->custom_code = string(::tflite::kFlexCustomCodePrefix) + custom_code;
+      this->is_flex_op = true;
+
+      if (IsControlFlowOp(this->flex_tensorflow_op)) {
+        is_unsupported_flex_op = true;
+      }
+    }
+  }
+}
+
 void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) {
   // First find a list of unique array names.
   std::set<string> names;
@@ -199,7 +230,7 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
     const Model& model,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
     const details::OperatorsMap& operators_map, FlatBufferBuilder* builder,
-    std::set<string>* error_summary, const ExportParams& params) {
+    std::set<string>* unsupported_ops, const ExportParams& params) {
   // Map from operator name to TF Lite enum value, for all builtins.
   std::map<string, BuiltinOperator> builtin_ops;
   for (int i = BuiltinOperator_MIN; i <= BuiltinOperator_MAX; ++i) {
@@ -240,8 +271,8 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
       }
       // Either way, this is an operator that is not supported by TF Lite,
       // so we output it as a custom op and add it to the error summary.
-      if (error_summary) {
-        error_summary->insert(name);
+      if (unsupported_ops) {
+        unsupported_ops->insert(name);
       }
       ordered_opcodes[op_index] =
           CreateOperatorCode(*builder, BuiltinOperator_CUSTOM,
@@ -355,9 +386,9 @@ void Export(
   Array empty_array;
   buffers_to_write.push_back(&empty_array);
 
-  std::set<string> error_summary;
+  std::set<string> unsupported_ops;
   auto op_codes = ExportOperatorCodes(model, ops_by_type, operators_map,
-                                      &builder, &error_summary, params);
+                                      &builder, &unsupported_ops, params);
 
   for (const auto& op : model.operators) {
     if (op->type == OperatorType::kFakeQuant) {
@@ -367,30 +398,61 @@ void Export(
                       "for --std_values and --mean_values.";
     }
   }
-  if (!params.allow_custom_ops && !error_summary.empty()) {
-    // Remove ExpandDims and ReorderAxes from unimplemented list unless they
-    // compose the list. Both ops are removed during graph transformations.
-    // However, if an op is unimplemented earlier in the model, the graph
-    // transformation is unable to run because the output shape is not defined.
-    // This causes unnecessary confusion during model conversion time.
-    std::set<string> error_summary_final;
-    for (const auto& op_type : error_summary) {
-      if (op_type != "ReorderAxes" && op_type != "ExpandDims") {
-        error_summary_final.insert(op_type);
+  if (!unsupported_ops.empty()) {
+    if (!params.allow_custom_ops) {
+      // Remove ExpandDims and ReorderAxes from unimplemented list unless they
+      // compose the list. Both ops are removed during graph transformations.
+      // However, if an op is unimplemented earlier in the model, the graph
+      // transformation is unable to run because the output shape is not
+      // defined. This causes unnecessary confusion during model conversion
+      // time.
+      std::set<string> unsupported_ops_final;
+      for (const auto& op_type : unsupported_ops) {
+        if (op_type != "ReorderAxes" && op_type != "ExpandDims") {
+          unsupported_ops_final.insert(op_type);
+        }
+      }
+      if (unsupported_ops_final.empty()) {
+        unsupported_ops_final = unsupported_ops;
+      }
+
+      LOG(QFATAL)
+          << "Some of the operators in the model are not supported by "
+             "the standard TensorFlow Lite runtime. If you have a custom "
+             "implementation for them you can disable this error with "
+             "--allow_custom_ops, or by setting allow_custom_ops=True "
+             "when calling tf.contrib.lite.TFLiteConverter(). Here is a list "
+             "of operators for which  you will need custom implementations: "
+          << absl::StrJoin(unsupported_ops_final, ", ") << ".";
+    }
+
+    std::set<string> unsupported_control_flow_ops;
+    // Check if unsupported ops contains control flow ops. It's impossible
+    // to implement these ops as custom ops at the moment.
+    for (const auto& op : unsupported_ops) {
+      if (IsControlFlowOp(op)) {
+        unsupported_control_flow_ops.insert(op);
       }
     }
-    if (error_summary_final.empty()) {
-      error_summary_final = error_summary;
+    if (!unsupported_control_flow_ops.empty()) {
+      LOG(QFATAL)
+          << "TensorFlow Lite currently doesn't support control flow ops: "
+          << absl::StrJoin(unsupported_control_flow_ops, ", ") << ".";
     }
+  }
+
+  std::set<string> unsupported_flex_ops;
+  for (const auto& it : operators_map) {
+    const details::OperatorKey& key = it.first;
+    if (key.is_unsupported_flex_op) {
+      unsupported_flex_ops.insert(key.custom_code);
+    }
+  }
 
-    LOG(QFATAL)
-        << "Some of the operators in the model are not supported by "
-           "the standard TensorFlow Lite runtime. If you have a custom "
-           "implementation for them you can disable this error with "
-           "--allow_custom_ops, or by setting allow_custom_ops=True "
-           "when calling tf.contrib.lite.TFLiteConverter(). Here is a list "
-           "of operators for which  you will need custom implementations: "
-        << absl::StrJoin(error_summary_final, ", ") << ".";
+  if (!unsupported_flex_ops.empty()) {
+    LOG(QFATAL) << "Some of the operators in the model are not supported by "
+                   "TensorFlow Flex runtime: "
+                << absl::StrJoin(unsupported_flex_ops, ", ") << ".";
   }
 
   std::set<int32_t> variable_tensor_indices;
diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h
index 29d6de4049..9efb282c6c 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.h
+++ b/tensorflow/contrib/lite/toco/tflite/export.h
@@ -81,11 +81,21 @@ using TensorsMap = std::unordered_map<string, int>;
 // Only when `type` is `kUnsupported`, `custom_code` is filled to
 // identify which operation is used.
 struct OperatorKey {
-  OperatorKey(OperatorType type, const std::string& custom_code, int version)
-      : type(type), custom_code(custom_code), version(version) {}
-  const OperatorType type;
-  const std::string custom_code;
-  const int version;
+  OperatorKey(OperatorType type, const std::string& custom_code, int version,
+              bool allow_flex_ops = false);
+
+  // Only `type`, `custom_code` and `version` is used to compute hash and
+  // identity.
+  OperatorType type;
+  std::string custom_code;
+  int version;
+
+  // THe fields below are not used to compute hash and identity.
+  bool is_flex_op = false;
+  bool is_unsupported_flex_op = false;
+  // The original TensorFlow op name for the flex op. Filled only when
+  // `is_flex_op` is true.
+  std::string flex_tensorflow_op;
 
   bool operator<(const OperatorKey& other) const {
     if (type < other.type) return true;
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index 93882a91a7..a71a64d56f 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -313,6 +313,46 @@ TEST_F(VersionedOpExportTest, Export) {
   EXPECT_EQ(1, (*operators)[1]->opcode_index());
 }
 
+TEST(OperatorKeyTest, TestBuiltinOp) {
+  details::OperatorKey key(OperatorType::kConv, "", 2);
+  EXPECT_EQ(key.type, OperatorType::kConv);
+  EXPECT_EQ(key.custom_code, "");
+  EXPECT_EQ(key.version, 2);
+}
+
+TEST(OperatorKeyTest, TestFlexOp) {
+  {
+    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
+                             false);
+    EXPECT_EQ(key.type, OperatorType::kUnsupported);
+    // It shouldn't be converted to Flex op if `allow_flex_op` is false.
+    EXPECT_EQ(key.custom_code, "SomeUnsupportedOp");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_FALSE(key.is_flex_op);
+  }
+
+  {
+    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
+                             true);
+    EXPECT_EQ(key.type, OperatorType::kUnsupported);
+    // Verify that the custom op name is prefixed by "Flex" and `is_flex_op`
+    // is true.
+    EXPECT_EQ(key.custom_code, "FlexSomeUnsupportedOp");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_TRUE(key.is_flex_op);
+  }
+}
+
+TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
+  details::OperatorKey key(OperatorType::kUnsupported, "Merge", 1, true);
+  EXPECT_EQ(key.type, OperatorType::kUnsupported);
+  EXPECT_EQ(key.custom_code, "FlexMerge");
+  EXPECT_EQ(key.version, 1);
+  EXPECT_TRUE(key.is_flex_op);
+  // The control flow ops should be marked as unsupported.
+  EXPECT_TRUE(key.is_unsupported_flex_op);
+}
+
 // TODO(ahentz): tests for tensors, inputs, outputs, opcodes and operators.
 
 }  // namespace
-- 
GitLab


From d6e14a53835eed5eed279c83e475440f8f814f0e Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Wed, 3 Oct 2018 17:28:57 -0700
Subject: [PATCH 1101/1357] Automated rollback of commit
 c1b3b0b9e041d82e80c2cdcc623a387753daf0b4

PiperOrigin-RevId: 215658770
---
 tensorflow/contrib/lite/kernels/BUILD | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index b349a2863c..daaf6714cc 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -337,10 +337,7 @@ tf_cc_test(
     name = "activations_test",
     size = "small",
     srcs = ["activations_test.cc"],
-    tags = [
-        "nomac",
-        "tflite_not_portable_ios",
-    ],
+    tags = ["tflite_not_portable_ios"],
     deps = [
         ":builtin_ops",
         "//tensorflow/contrib/lite:framework",
-- 
GitLab


From f7edc2d308523fa6c2d233c09e3f2da1c98e3dbc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 18:00:17 -0700
Subject: [PATCH 1102/1357] PinToHostOptimizer: Refactored code. Update
 blacklist. Added recursive lookback for Identity op. This fixes many
 performance regressions.

PiperOrigin-RevId: 215662393
---
 .../core/grappler/costs/graph_properties.h    |   4 +
 tensorflow/core/grappler/graph_view.cc        |  33 +-
 tensorflow/core/grappler/graph_view.h         |   3 +-
 tensorflow/core/grappler/graph_view_test.cc   |  22 +-
 tensorflow/core/grappler/op_types.cc          | 114 ++++---
 tensorflow/core/grappler/op_types.h           |   2 +
 .../optimizers/pin_to_host_optimizer.cc       | 303 ++++++++++++------
 .../optimizers/pin_to_host_optimizer_test.cc  |  42 +++
 8 files changed, 366 insertions(+), 157 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index f716cd72c9..28fd7565cc 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -74,6 +74,10 @@ class GraphProperties {
   // shape information.
   void ClearInputProperties(const string& node_name);
   void ClearOutputProperties(const string& node_name);
+  // Returns true if we have *any* properties.
+  bool has_properties() const {
+    return input_properties_.size() > 0 || output_properties_.size() > 0;
+  }
 
  private:
   // Relaxes shapes <shapes_and_types>, determined from an EnqueueV2 node, into
diff --git a/tensorflow/core/grappler/graph_view.cc b/tensorflow/core/grappler/graph_view.cc
index 0b8cb5e919..de0a63fc4e 100644
--- a/tensorflow/core/grappler/graph_view.cc
+++ b/tensorflow/core/grappler/graph_view.cc
@@ -20,23 +20,25 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
-  for (int output_arg_id = 0; output_arg_id < op.output_arg_size();
-       ++output_arg_id) {
+namespace {
+int OpPortIdToArgId(const NodeDef& node,
+                    const protobuf::RepeatedPtrField<OpDef::ArgDef>& args,
+                    int port_id) {
+  for (int arg_id = 0; arg_id < args.size(); ++arg_id) {
     if (port_id < 0) {
       return -1;
     } else if (port_id == 0) {
-      return output_arg_id;
+      return arg_id;
     }
 
-    // Default is 1 port per output arg.
+    // Default is 1 port per arg.
     int n = 1;
 
-    const auto& output_arg = op.output_arg(output_arg_id);
-    if (!output_arg.number_attr().empty()) {
-      n = node.attr().at(output_arg.number_attr()).i();
-    } else if (!output_arg.type_list_attr().empty()) {
-      n = node.attr().at(output_arg.type_list_attr()).list().type_size();
+    const auto& arg = args.Get(arg_id);
+    if (!arg.number_attr().empty()) {
+      n = node.attr().at(arg.number_attr()).i();
+    } else if (!arg.type_list_attr().empty()) {
+      n = node.attr().at(arg.type_list_attr()).list().type_size();
     }
 
     if (n < 0) {
@@ -44,13 +46,22 @@ int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
       DCHECK_GE(n, 0);
       return -1;
     } else if (port_id < n) {
-      return output_arg_id;
+      return arg_id;
     }
     port_id -= n;
   }
 
   return -1;
 }
+}  // end namespace
+
+int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
+  return OpPortIdToArgId(node, op.output_arg(), port_id);
+}
+
+int OpInputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
+  return OpPortIdToArgId(node, op.input_arg(), port_id);
+}
 
 GraphView::GraphView(GraphDef* graph) : graph_(graph) {
   for (int i = 0; i < graph_->node_size(); i++) {
diff --git a/tensorflow/core/grappler/graph_view.h b/tensorflow/core/grappler/graph_view.h
index ec946ca3b5..09c36a1368 100644
--- a/tensorflow/core/grappler/graph_view.h
+++ b/tensorflow/core/grappler/graph_view.h
@@ -26,7 +26,7 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-// Map a node/op's output port_id to arg_id.
+// Map a node/op's input/output port_id to arg_id.
 //
 // The port_id refers to the n-th tensor of the node, while the arg_id refers to
 // the n-th arg of the op. These two can be different if an op's arg is a list
@@ -34,6 +34,7 @@ namespace grappler {
 //
 // We return -1 for any invalid port_id (i.e., no corresponding arg_id).
 int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id);
+int OpInputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id);
 
 // A utility class to simplify the traversal of a GraphDef.
 class GraphView {
diff --git a/tensorflow/core/grappler/graph_view_test.cc b/tensorflow/core/grappler/graph_view_test.cc
index 3d7d2faf7c..f90e2c8cfc 100644
--- a/tensorflow/core/grappler/graph_view_test.cc
+++ b/tensorflow/core/grappler/graph_view_test.cc
@@ -26,7 +26,7 @@ namespace {
 
 class GraphViewTest : public ::testing::Test {};
 
-TEST_F(GraphViewTest, OpOutputPortIdToArgIdShapeN) {
+TEST_F(GraphViewTest, OpPortIdToArgIdShapeN) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output a = ops::Const(s.WithOpName("a"), 0.0f, {10, 10});
   ops::ShapeN b(s.WithOpName("b"), {a, a, a});
@@ -45,9 +45,16 @@ TEST_F(GraphViewTest, OpOutputPortIdToArgIdShapeN) {
   EXPECT_TRUE(
       OpRegistry::Global()->LookUpOpDef(b_node_def.op(), &b_op_def).ok());
 
-  EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *a_op_def, 0));
-  EXPECT_EQ(-1, OpOutputPortIdToArgId(b_node_def, *a_op_def, 1));
+  // Const has 0 inputs, 1 output.
+  EXPECT_EQ(-1, OpInputPortIdToArgId(a_node_def, *a_op_def, 0));
+  EXPECT_EQ(0, OpOutputPortIdToArgId(a_node_def, *a_op_def, 0));
+  EXPECT_EQ(-1, OpOutputPortIdToArgId(a_node_def, *a_op_def, 1));
 
+  // ShapeN has N=3 inputs and outputs.
+  EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 0));
+  EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 1));
+  EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 2));
+  EXPECT_EQ(-1, OpInputPortIdToArgId(b_node_def, *b_op_def, 3));
   EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 0));
   EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 1));
   EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 2));
@@ -55,7 +62,7 @@ TEST_F(GraphViewTest, OpOutputPortIdToArgIdShapeN) {
   EXPECT_EQ(-1, OpOutputPortIdToArgId(b_node_def, *b_op_def, 4));
 }
 
-TEST_F(GraphViewTest, OpOutputPortIdToArgIdSparseSplit) {
+TEST_F(GraphViewTest, OpPortIdToArgIdSparseSplit) {
   for (int num_splits : {1, 2}) {
     tensorflow::Scope s = tensorflow::Scope::NewRootScope();
     Output a = ops::Const<int64>(s.WithOpName("a"), 1, {10, 10});
@@ -70,6 +77,13 @@ TEST_F(GraphViewTest, OpOutputPortIdToArgIdSparseSplit) {
     EXPECT_TRUE(
         OpRegistry::Global()->LookUpOpDef(b_node_def.op(), &b_op_def).ok());
 
+    // We have 4 inputs.
+    EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 0));
+    EXPECT_EQ(1, OpInputPortIdToArgId(b_node_def, *b_op_def, 1));
+    EXPECT_EQ(2, OpInputPortIdToArgId(b_node_def, *b_op_def, 2));
+    EXPECT_EQ(3, OpInputPortIdToArgId(b_node_def, *b_op_def, 3));
+    EXPECT_EQ(-1, OpInputPortIdToArgId(b_node_def, *b_op_def, 4));
+
     for (int port_id = 0; port_id <= num_splits * 3; ++port_id) {
       int arg_id = -1;
       if (port_id < num_splits * 3) {
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 9f0d9dbf28..1b5a215987 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -13,14 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <unordered_set>
-
+#include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -102,6 +101,18 @@ bool IsConjugateTranspose(const NodeDef& node) {
   return node.op() == "ConjugateTranspose";
 }
 
+bool IsControlFlow(const NodeDef& node) {
+  // clang-format off
+  return node.op() == "ControlTrigger" ||
+         node.op() == "Enter" ||
+         node.op() == "Exit" ||
+         node.op() == "LoopCond" ||
+         node.op() == "Merge" ||
+         node.op() == "NextIteration" ||
+         node.op() == "Switch";
+  // clang-format on
+}
+
 bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
 
 bool IsConv2DBackpropFilter(const NodeDef& node) {
@@ -140,26 +151,26 @@ bool IsDiv(const NodeDef& node) { return node.op() == "Div"; }
 // e.g. sqrt, exp. *is_non_decreasing is false, the function is non-increasing,
 // e.g. inv.
 bool IsElementWiseMonotonic(const NodeDef& node, bool* is_non_decreasing) {
-  static const std::unordered_set<string>* monotonic_non_decreasing_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kMonotonicNonDecreasingOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "Asinh", "Atanh",   "Ceil",  "Elu",  "Erf",  "Exp",   "Expm1",
           "Floor", "Log",     "Log1p", "Relu", "Relu", "Relu6", "Rint",
           "Selu",  "Sigmoid", "Sign",  "Sinh", "Sqrt", "Tanh",
       }));
-  static const std::unordered_set<string>* monotonic_non_increasing_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kMonotonicNonIncreasingOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "Inv",
           "Reciprocal",
           "Erfc",
           "Rsqrt",
           "Neg",
       }));
-  if (monotonic_non_decreasing_ops->count(node.op()) > 0) {
+  if (kMonotonicNonDecreasingOps->count(node.op()) > 0) {
     if (is_non_decreasing) {
       *is_non_decreasing = true;
     }
     return true;
-  } else if (monotonic_non_increasing_ops->count(node.op()) > 0) {
+  } else if (kMonotonicNonIncreasingOps->count(node.op()) > 0) {
     if (is_non_decreasing) {
       *is_non_decreasing = false;
     }
@@ -431,6 +442,38 @@ bool IsSymbolicGradient(const NodeDef& node) {
 
 bool IsTanhGrad(const NodeDef& node) { return node.op() == "TanhGrad"; }
 
+bool IsTensorArray(const NodeDef& node) {
+  static const gtl::FlatSet<string>* const kTensorArrayOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
+          "TensorArray",
+          "TensorArrayV2",
+          "TensorArrayV3",
+          "TensorArrayGrad",
+          "TensorArrayGradV2",
+          "TensorArrayGradV3",
+          "TensorArrayGradWithShape",
+          "TensorArrayWrite",
+          "TensorArrayWriteV2",
+          "TensorArrayWriteV3",
+          "TensorArrayRead",
+          "TensorArrayReadV2",
+          "TensorArrayReadV3",
+          "TensorArrayConcat",
+          "TensorArrayConcatV2",
+          "TensorArrayConcatV3",
+          "TensorArraySplit",
+          "TensorArraySplitV2",
+          "TensorArraySplitV3",
+          "TensorArraySize",
+          "TensorArraySizeV2",
+          "TensorArraySizeV3",
+          "TensorArrayClose",
+          "TensorArrayCloseV2",
+          "TensorArrayCloseV3",
+      }));
+  return kTensorArrayOps->count(node.op()) > 0;
+}
+
 bool IsTile(const NodeDef& node) { return node.op() == "Tile"; }
 
 bool IsTranspose(const NodeDef& node) { return node.op() == "Transpose"; }
@@ -542,30 +585,29 @@ OPDEF_PROPERTY_HELPER(Aggregate, aggregate)
 OPDEF_PROPERTY_HELPER(Commutative, commutative)
 
 bool IsInvolution(const NodeDef& node) {
-  static const std::unordered_set<string>* involution_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
-          "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"}));
-  return involution_ops->count(node.op()) > 0;
+  static const gtl::FlatSet<string>* const kInvolutionOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{"Conj", "Reciprocal", "Invert",
+                                              "Neg", "LogicalNot"}));
+  return kInvolutionOps->count(node.op()) > 0;
 }
 
 bool IsValueAndOrderAndShapePreserving(const NodeDef& node) {
   if (NumNonControlInputs(node) == 1 && IsAggregate(node)) {
     return true;
   }
-  static const std::unordered_set<string>*
-      value_and_order_and_shape_preserving_ops =
-          CHECK_NOTNULL((new const std::unordered_set<string>{
-              "CheckNumerics",
-              "DebugGradientIdentity",
-              "DeepCopy"
-              "Enter",
-              "Exit",
-              "PreventGradient",
-              "Print",
-              "Snapshot",
-              "StopGradient",
-          }));
-  return value_and_order_and_shape_preserving_ops->count(node.op()) > 0 ||
+  static const gtl::FlatSet<string>* const kValueAndOrderAndShapePreservingOps =
+      CHECK_NOTNULL((new const gtl::FlatSet<string>{
+          "CheckNumerics",
+          "DebugGradientIdentity",
+          "DeepCopy"
+          "Enter",
+          "Exit",
+          "PreventGradient",
+          "Print",
+          "Snapshot",
+          "StopGradient",
+      }));
+  return kValueAndOrderAndShapePreservingOps->count(node.op()) > 0 ||
          IsIdentity(node);
 }
 
@@ -573,31 +615,31 @@ bool IsValueAndOrderPreserving(const NodeDef& node) {
   if (NumNonControlInputs(node) == 1 && IsAggregate(node)) {
     return true;
   }
-  static const std::unordered_set<string>* value_and_order_preserving_ops =
-      CHECK_NOTNULL((new const std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kValueAndOrderPreservingOps =
+      CHECK_NOTNULL((new const gtl::FlatSet<string>{
           "ExpandDims",
           "Reshape",
           "Squeeze",
       }));
-  return value_and_order_preserving_ops->count(node.op()) > 0 ||
+  return kValueAndOrderPreservingOps->count(node.op()) > 0 ||
          IsValueAndOrderAndShapePreserving(node);
 }
 
 bool IsValuePreserving(const NodeDef& node) {
-  static const std::unordered_set<string>* value_preserving_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kValuePreservingOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "InvertPermutation",
           "Reverse",
           "Roll",
           "Transpose",
       }));
   return IsValueAndOrderPreserving(node) ||
-         value_preserving_ops->count(node.op()) > 0;
+         kValuePreservingOps->count(node.op()) > 0;
 }
 
 bool IsUnaryElementWise(const NodeDef& node) {
-  static const std::unordered_set<string>* element_wise_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kElementWiseOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "Abs",
           "Acos",
           "Acosh",
@@ -646,7 +688,7 @@ bool IsUnaryElementWise(const NodeDef& node) {
           "Tan"
           "Tanh",
       }));
-  return element_wise_ops->count(node.op()) > 0 ||
+  return kElementWiseOps->count(node.op()) > 0 ||
          IsValueAndOrderAndShapePreserving(node);
 }
 
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 7f86a5f295..d4e0159e81 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -46,6 +46,7 @@ bool IsConjugateTranspose(const NodeDef& node);
 bool IsConcat(const NodeDef& node);
 bool IsConcatOffset(const NodeDef& node);
 bool IsConstant(const NodeDef& node);
+bool IsControlFlow(const NodeDef& node);
 bool IsConv2D(const NodeDef& node);
 bool IsConv2DBackpropFilter(const NodeDef& node);
 bool IsConv2DBackpropInput(const NodeDef& node);
@@ -151,6 +152,7 @@ bool IsSum(const NodeDef& node);
 bool IsSwitch(const NodeDef& node);
 bool IsSymbolicGradient(const NodeDef& node);
 bool IsTanhGrad(const NodeDef& node);
+bool IsTensorArray(const NodeDef& node);
 bool IsTile(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
 bool IsTruncateDiv(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 89eb76046e..8ed4271fa4 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -35,13 +35,44 @@ namespace internal {
 // dynamically determined.
 constexpr int64 kTensorMaxSize = 64;
 
-// Find KernelDef for `node`.
-Status TryFindKernelDef(const NodeDef& node, const KernelDef** kdef) {
-  // Try find KernelDef for node.device, else GPU or CPU.
-  for (const DeviceType& device :
-       {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}) {
-    Status s = FindKernelDef(device, node, kdef, nullptr);
+// All the nodes that should be blacklisted and not swapped.
+bool IsBlacklisted(const NodeDef& node) {
+  return
+      // Collective ops should not be swapped.
+      IsCollective(node) ||
+      // ControlFlow ops should not be swapped.
+      IsControlFlow(node) ||
+      // NoOp ops should not be swapped (due to group dependencies).
+      IsNoOp(node);
+}
+
+// Check if Tensor is integer and small size.
+bool IsTensorIntegerAndSmall(const OpInfo::TensorProperties& prop) {
+  // Check type to be int32 or int64.
+  if (prop.dtype() != DataType::DT_INT32 &&
+      prop.dtype() != DataType::DT_INT64) {
+    return false;
+  }
+
+  // Check size known and small.
+  const int64 size = NumCoefficients(prop.shape());
+  if (size < 0 || size > kTensorMaxSize) {
+    return false;
+  }
+
+  return true;
+}
+
+// Find KernelDef for `node`, greedily return first found from `devices`.
+Status TryFindKernelDef(const std::vector<DeviceType>& devices,
+                        const NodeDef& node, const KernelDef** kdef) {
+  for (const DeviceType& device : devices) {
+    const KernelDef* kernel = nullptr;
+    Status s = FindKernelDef(device, node, &kernel, nullptr);
     if (s.ok()) {
+      if (kdef) {
+        *kdef = kernel;
+      }
       return Status::OK();
     }
   }
@@ -49,88 +80,183 @@ Status TryFindKernelDef(const NodeDef& node, const KernelDef** kdef) {
   return errors::NotFound("Could not find KernelDef for op: ", node.op());
 }
 
-// Check if all node's inputs are pinned to CPU memory.
-bool AreAllNodeInputsPinnedToHost(const GraphView& graph, const NodeDef& node) {
-  // Loop through all the inputs excluding the controlling nodes.
-  for (const GraphView::OutputPort& fanin : graph.GetFanins(node, false)) {
-    // Check if (the fanin) op's device is on CPU.
-    if (str_util::StrContains(fanin.node->device(), DEVICE_CPU)) {
-      continue;
-    }
-
-    // Check if (the fanin) op's output port is pinned to HostMemory.
-    const OpDef* fanin_odef = nullptr;
-    Status s = OpRegistry::Global()->LookUpOpDef(fanin.node->op(), &fanin_odef);
-    if (!s.ok()) {
-      LOG(INFO) << "Could not find OpDef for : " << fanin.node->op();
-      return false;
-    }
+// Checks if a node's output port is host friendly.
+// Roughly this means checking if the output port is on Host memory.
+Status IsNodeOutputPortHostFriendly(const GraphView& graph,
+                                    GraphProperties* properties,
+                                    const NodeDef& node, int port_id,
+                                    bool* is_candidate) {
+  *is_candidate = false;
 
-    const int output_arg_id =
-        OpOutputPortIdToArgId(*fanin.node, *fanin_odef, fanin.port_id);
-    if (output_arg_id < 0) {
-      LOG(WARNING) << "Invalid port: " << fanin.port_id << "!\n"
-                   << node.DebugString() << "\n"
-                   << fanin.node->DebugString() << "\n"
-                   << fanin_odef->DebugString();
-      return false;
-    }
+  // Make sure we are not a blacklisted op.
+  if (IsBlacklisted(node)) {
+    return Status::OK();
+  }
 
-    const KernelDef* fanin_kdef = nullptr;
-    s = TryFindKernelDef(*fanin.node, &fanin_kdef);
-    if (!s.ok()) {
-      LOG(INFO) << "Could not find KernelDef for : " << fanin.node->op();
-      return false;
-    }
+  // Check to make sure we have the right properties (i.e., statically shaped).
+  if (!properties->has_properties()) {
+    // This is an expensive call, call it lazily.
+    TF_RETURN_IF_ERROR(properties->InferStatically(
+        /*assume_valid_feeds=*/false));
+  }
+  const auto& output_properties = properties->GetOutputProperties(node.name());
+  if (port_id >= output_properties.size()) {
+    LOG(WARNING) << "port_id=" << port_id
+                 << " but output_properties.size()=" << output_properties.size()
+                 << "\n"
+                 << node.DebugString();
+    return Status::OK();
+  }
+  if (!IsTensorIntegerAndSmall(output_properties[port_id])) {
+    return Status::OK();
+  }
 
-    bool fanin_pinned = false;
-    for (const string& host_memory_arg : fanin_kdef->host_memory_arg()) {
-      if (fanin_odef->output_arg(output_arg_id).name() == host_memory_arg) {
-        fanin_pinned = true;
-        break;
+  // These nodes may be optimized away downstream (even if pinned to Host), we
+  // should (recusively) check their source.
+  if (IsIdentity(node)) {
+    for (const auto& fanin : graph.GetFanins(node, false)) {
+      bool fanin_candidate = false;
+      TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+          graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+      if (!fanin_candidate) {
+        return Status::OK();
       }
     }
+    *is_candidate = true;
+    return Status::OK();
+  }
 
-    if (!fanin_pinned) {
-      return false;
+  // Check if op's device is on CPU.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    *is_candidate = true;
+    return Status::OK();
+  }
+
+  // Check if op's output port is pinned to HostMemory.
+  const OpDef* op = nullptr;
+  Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
+  if (!s.ok()) {
+    LOG(WARNING) << "Could not find OpDef for : " << node.op();
+    return Status::OK();
+  }
+
+  // Map the port_id to output_arg_id.
+  const int output_arg_id = OpOutputPortIdToArgId(node, *op, port_id);
+  if (output_arg_id < 0) {
+    LOG(WARNING) << "Invalid port: " << port_id << "!\n"
+                 << node.DebugString() << "\n"
+                 << op->DebugString();
+    return Status::OK();
+  }
+
+  // Find the kernel.
+  const KernelDef* kernel = nullptr;
+  s = TryFindKernelDef({node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node,
+                       &kernel);
+  if (!s.ok()) {
+    LOG(INFO) << "Could not find KernelDef for: " << node.op();
+    return Status::OK();
+  }
+
+  // Check if the output_arg is pinned to Host.
+  for (const string& host_memory_arg : kernel->host_memory_arg()) {
+    if (op->output_arg(output_arg_id).name() == host_memory_arg) {
+      *is_candidate = true;
+      break;
     }
   }
 
-  return true;
+  return Status::OK();
 }
 
-bool IsTensorIntegerAndSmall(const OpInfo::TensorProperties& prop) {
-  // Check if Tensor is integer and small size.
+// Checks if a node's input port is Host friendly.
+// Roughly this means checking if the input port is on Host memory.
+bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
+  // If node is on Host, assume its inputs are Host friendly.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    return true;
+  }
 
-  // Check type to be int32 or int64.
-  if (prop.dtype() != DataType::DT_INT32 &&
-      prop.dtype() != DataType::DT_INT64) {
+  // Check if op's input port is pinned to HostMemory.
+  const OpDef* op = nullptr;
+  Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
+  if (!s.ok()) {
+    LOG(WARNING) << "Could not find OpDef for : " << node.op();
     return false;
   }
-
-  // Check size known and small.
-  const int64 size = NumCoefficients(prop.shape());
-  if (size < 0 || size > kTensorMaxSize) {
+  const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id);
+
+  // Find the kernel.
+  const KernelDef* kernel = nullptr;
+  s = internal::TryFindKernelDef(
+      {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel);
+  if (!s.ok()) {
+    LOG(INFO) << "Could not find KernelDef for: " << node.op();
     return false;
   }
 
-  return true;
+  // Check if the input_arg is pinned to Host.
+  for (const string& host_memory_arg : kernel->host_memory_arg()) {
+    if (op->input_arg(input_arg_id).name() == host_memory_arg) {
+      return true;
+    }
+  }
+
+  return false;
 }
 
-bool AreAllNodeInputsAndOutputsIntsAndSmall(const GraphProperties& properties,
-                                            const NodeDef& node) {
-  for (const auto& prop : properties.GetInputProperties(node.name())) {
-    if (!IsTensorIntegerAndSmall(prop)) {
-      return false;
+// Checks if a node is a candidate to pin to Host.
+// The rough algorithm is as follows:
+// 1] Check if node is blacklisted.
+// 2] Check if node can run on Host.
+// 3] Check all input/outputs are Host "friendly" (atm, friendly means small,
+//    ints, and pinned to Host).
+Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
+                           const NodeDef& node, bool* is_candidate) {
+  *is_candidate = false;
+
+  // Check if node already on CPU.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    *is_candidate = true;
+    return Status::OK();
+  }
+
+  // Skip these node types.
+  if (IsBlacklisted(node)) {
+    return Status::OK();
+  }
+
+  // Check the node can be run on CPU.
+  Status s = TryFindKernelDef({DEVICE_CPU}, node, nullptr);
+  if (!s.ok()) {
+    return Status::OK();
+  }
+
+  // Check all inputs are Host friendly.
+  for (const GraphView::OutputPort& fanin :
+       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
+    bool fanin_candidate = false;
+    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+        graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+    if (!fanin_candidate) {
+      return Status::OK();
     }
   }
 
-  for (const auto& prop : properties.GetOutputProperties(node.name())) {
+  // Check all outputs are Host friendly.
+  if (!properties->has_properties()) {
+    // This is an expensive call, call it lazily.
+    TF_RETURN_IF_ERROR(properties->InferStatically(
+        /*assume_valid_feeds=*/false));
+  }
+  for (const auto& prop : properties->GetOutputProperties(node.name())) {
     if (!IsTensorIntegerAndSmall(prop)) {
-      return false;
+      return Status::OK();
     }
   }
-  return true;
+
+  *is_candidate = true;
+  return Status::OK();
 }
 
 string TryFindHostDevice(const gtl::FlatSet<string>& devices,
@@ -167,15 +293,6 @@ bool IsTPUGraphDef(const GraphDef& def) {
   }
   return false;
 }
-
-// All the nodes that should be blacklisted and not swapped.
-bool IsBlacklisted(const NodeDef& node) {
-  return
-      // Collective ops should not be swapped.
-      IsCollective(node) ||
-      // NoOp breaks perf regression tests (probably due to group dependencies).
-      IsNoOp(node);
-}
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
@@ -188,7 +305,6 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   }
 
   GraphProperties properties(item);
-  bool has_properties = false;
   GraphView graph(optimized_graph);
 
   gtl::FlatSet<string> devices;
@@ -209,35 +325,10 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   std::vector<std::pair<NodeDef*, string>> const_nodes;
 
   for (auto& node : *optimized_graph->mutable_node()) {
-    // Check if node already on CPU.
-    if (str_util::StrContains(node.device(), DEVICE_CPU)) {
-      continue;
-    }
-
-    // Skip these node types.
-    if (internal::IsBlacklisted(node)) {
-      continue;
-    }
-
-    // Check the node can be run on CPU.
-    Status s = FindKernelDef(DEVICE_CPU, node, nullptr, nullptr);
-    if (!s.ok()) {
-      continue;
-    }
-
-    // Check all input's are pinned to CPU.
-    if (!internal::AreAllNodeInputsPinnedToHost(graph, node)) {
-      continue;
-    }
-
-    if (!has_properties) {
-      // This is an expensive call, call it lazily.
-      TF_RETURN_IF_ERROR(properties.InferStatically(false));
-      has_properties = true;
-    }
-
-    // Check all inputs and outputs are integers and small.
-    if (!internal::AreAllNodeInputsAndOutputsIntsAndSmall(properties, node)) {
+    bool is_candidate = false;
+    TF_RETURN_IF_ERROR(
+        internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate));
+    if (!is_candidate) {
       continue;
     }
 
@@ -254,10 +345,12 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     NodeDef* node = it.first;
     const string& device = it.second;
 
-    // Check all the consumers of this node, if any of them are on the original
-    // device, swap this node back onto the original device.
+    // Check all the consumers of this node, if any of them are not on CPU, swap
+    // this node back onto the original device.
     for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) {
-      if (fanout.node->device() == device) {
+      // The consumer is not Host friendly, swap it back to the original device.
+      if (!internal::IsNodeInputPortHostFriendly(*fanout.node,
+                                                 fanout.port_id)) {
         node->set_device(device);
         break;
       }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
index 173cb3fe3c..7c64529441 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -160,6 +160,48 @@ TEST_F(PinToHostOptimizerTest, NoSwap) {
   EXPECT_EQ(found, 3);
 }
 
+TEST_F(PinToHostOptimizerTest, Identity) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  // `a,c` is on GPU, `e` is on CPU, consequently `e` should not be swapped.
+  // `b` should be placed onto Host since `c` pins the input to Host memory.
+  Output a =
+      ops::Const(s.WithOpName("a").WithDevice("/device:GPU:0"), 1, {64, 64});
+  Output b = ops::Const(s.WithOpName("b"), {0, 1}, {2});
+  Output c =
+      ops::ReduceProd(s.WithOpName("c").WithDevice("/device:GPU:0"), a, b);
+  Output d = ops::Identity(s.WithDevice("/device:CPU:0").WithOpName("d"), c);
+  Output e = ops::Multiply(s.WithOpName("e"), d, d);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  GraphDef output;
+  PinToHostOptimizer optimizer(RewriterConfig::ON);
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    if (node.name() == "a" || node.name() == "c") {
+      EXPECT_EQ(node.device(), "/device:GPU:0");
+    } else if (node.name() == "b") {
+      // If CUDA, then there is a GPU kernel registration that is pinned to Host
+      // memory. Consequently, `b` will be mapped to Host correct if there is
+      // a GPU kernel registered.
+#if GOOGLE_CUDA
+      EXPECT_EQ(node.device(), "/device:CPU:0");
+#else
+      EXPECT_TRUE(node.device().empty());
+#endif
+    } else if (node.name() == "d") {
+      EXPECT_EQ(node.device(), "/device:CPU:0");
+    } else if (node.name() == "e") {
+      EXPECT_TRUE(node.device().empty());
+    }
+    ++found;
+  }
+  EXPECT_EQ(found, 5);
+}
+
 TEST_F(PinToHostOptimizerTest, PortIdToArgId) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output a = ops::Const(s.WithOpName("a"), 1, {1, 2, 3});
-- 
GitLab


From 18f589350f0cb244e2373480048d17cbacd241e1 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 3 Oct 2018 18:05:22 -0700
Subject: [PATCH 1103/1357] [XLA] Add a size limit to the constant folder to
 avoid forming giant constants during compilation.

PiperOrigin-RevId: 215663002
---
 .../xla/service/hlo_constant_folding.cc       | 17 ++++++++++++++++
 .../xla/service/hlo_constant_folding_test.cc  | 20 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.cc b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
index f837816cea..538816a353 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
@@ -76,6 +76,22 @@ StatusOr<bool> HloConstantFolding::Run(HloModule* module) {
         continue;
       }
 
+      // Don't constant fold unless it's a net positive or the output is small.
+      int64 elements_in_removed_operands = 0;
+      for (HloInstruction* operand : instruction->operands()) {
+        if (operand->user_count() == 1) {
+          elements_in_removed_operands +=
+              ShapeUtil::ElementsIn(operand->shape());
+        }
+      }
+      int64 elements_in_constant = ShapeUtil::ElementsIn(instruction->shape());
+
+      static const int64 kMaximumConstantSizeElements = 2 * 1000 * 1000;
+      if (elements_in_constant > elements_in_removed_operands &&
+          elements_in_constant > kMaximumConstantSizeElements) {
+        continue;
+      }
+
       Literal result;
       // Currently we skip unimplemented operations.
       // TODO(b/35975797): Fold constant computations for more operations.
@@ -84,6 +100,7 @@ StatusOr<bool> HloConstantFolding::Run(HloModule* module) {
                 << instruction->ToString();
         continue;
       }
+      VLOG(4) << "Constant folded: " << instruction->ToString();
 
       TF_RETURN_IF_ERROR(computation->ReplaceWithNewInstruction(
           instruction, HloInstruction::CreateConstant(std::move(result))));
diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
index 3e0def5d26..e45f905f71 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
@@ -242,5 +242,25 @@ TEST_F(HloConstantFoldingTest, ConstantFoldReduceNoLayout) {
   EXPECT_THAT(module().entry_computation()->root_instruction(), op::Reduce());
 }
 
+const char* const kConstantFoldLargePad = R"(
+  HloModule ConstantFoldLargePad
+
+  ENTRY r {
+    a = f32[1,1,1] constant(f32[1,1,1]{{{7}}})
+    b = f32[] constant(42)
+    ROOT pad = f32[2048,2048,128] pad(a, b), padding=1024_1023x1024_1023x64_63
+  })";
+
+TEST_F(HloConstantFoldingTest, DoesNotFoldLargePad) {
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(kConstantFoldLargePad));
+  HloConstantFolding const_folder;
+  TF_ASSERT_OK_AND_ASSIGN(bool result, const_folder.Run(module.get()));
+  EXPECT_FALSE(result);
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Pad(op::Constant(), op::Constant()));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 54bebc286bbe7d6a866a3bdbcefd8af55adbe39a Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Wed, 3 Oct 2018 18:26:28 -0700
Subject: [PATCH 1104/1357] Fix a test. - SetCustomOp also sets the name of the
 custom op. Test was checking against the wrong name in the profile.

PiperOrigin-RevId: 215665359
---
 .../contrib/lite/profiling/profile_summarizer_test.cc       | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
index 67a5eecfa0..465c294962 100644
--- a/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
+++ b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
@@ -31,6 +31,8 @@ namespace profiling {
 
 namespace {
 
+const char* kOpName = "SimpleOpEval";
+
 #ifdef TFLITE_PROFILING_ENABLED
 TfLiteStatus SimpleOpEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input1 = tflite::GetInput(context, node, /*index=*/0);
@@ -63,7 +65,7 @@ TfLiteRegistration* RegisterSimpleOpWithProfilingDetails() {
                                             SimpleOpEval,
                                             SimpleOpProfilingString,
                                             tflite::BuiltinOperator_CUSTOM,
-                                            "SimpleOpEval",
+                                            kOpName,
                                             1};
   return &registration;
 }
@@ -89,7 +91,7 @@ void SimpleOpModel::Init(
   inputs_[0] = AddInput({TensorType_INT32, {1}});
   inputs_[1] = AddInput({TensorType_INT32, {1}});
   output_ = AddOutput({TensorType_INT32, {}});
-  SetCustomOp("SimpleAdd", {}, registration);
+  SetCustomOp(kOpName, {}, registration);
   BuildInterpreter({GetShape(inputs_[0]), GetShape(inputs_[1])});
 }
 
-- 
GitLab


From 9bd6f5ed55e533ccac055a5bc7fbb771e2d432c5 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 3 Oct 2018 18:56:00 -0700
Subject: [PATCH 1105/1357] [TF:XLA] Use xla::Iota rather than expanding Range
 ops to constants.

PiperOrigin-RevId: 215668016
---
 .../compiler/tf2xla/kernels/sequence_ops.cc   | 39 +++++++++----------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
index 25a5bcbe1d..0c32b8def0 100644
--- a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
@@ -18,7 +18,9 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/numeric.h"
 #include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -55,10 +57,10 @@ Status GetIntValue(int index, XlaOpKernelContext* ctx, int64* value) {
 
 // The type-specific part of the implementation of Range.
 template <typename T>
-Status CreateRangeTensor(const xla::LiteralSlice& start_literal,
-                         const xla::LiteralSlice& limit_literal,
-                         const xla::LiteralSlice& delta_literal,
-                         Tensor* output) {
+xla::StatusOr<xla::XlaOp> CreateRangeTensor(
+    const xla::LiteralSlice& start_literal,
+    const xla::LiteralSlice& limit_literal,
+    const xla::LiteralSlice& delta_literal, xla::XlaBuilder* builder) {
   T start = start_literal.Get<T>({});
   T limit = limit_literal.Get<T>({});
   T delta = delta_literal.Get<T>({});
@@ -82,14 +84,10 @@ Status CreateRangeTensor(const xla::LiteralSlice& start_literal,
            ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta))
            : std::ceil(std::abs((limit - start) / delta)));
 
-  *output = Tensor(DataTypeToEnum<T>::v(), TensorShape({size}));
-  auto flat = output->flat<T>();
-  T val = start;
-  for (int64 i = 0; i < size; ++i) {
-    flat(i) = val;
-    val += delta;
-  }
-  return Status::OK();
+  return xla::ConstantR0(builder, start) +
+         xla::ConstantR0(builder, delta) *
+             xla::Iota(builder, xla::primitive_util::NativeToPrimitiveType<T>(),
+                       size);
 }
 
 class RangeOp : public XlaOpKernel {
@@ -115,27 +113,26 @@ class RangeOp : public XlaOpKernel {
     OP_REQUIRES_OK(ctx, ctx->ConstantInput(2, &delta));
 
     DataType type = input_type(0);
-    Tensor output;
-    Status status;
+    xla::StatusOr<xla::XlaOp> output;
     switch (type) {
       case DT_INT32:
-        status = CreateRangeTensor<int32>(start, limit, delta, &output);
+        output = CreateRangeTensor<int32>(start, limit, delta, ctx->builder());
         break;
       case DT_INT64:
-        status = CreateRangeTensor<int64>(start, limit, delta, &output);
+        output = CreateRangeTensor<int64>(start, limit, delta, ctx->builder());
         break;
       case DT_FLOAT:
-        status = CreateRangeTensor<float>(start, limit, delta, &output);
+        output = CreateRangeTensor<float>(start, limit, delta, ctx->builder());
         break;
       case DT_DOUBLE:
-        status = CreateRangeTensor<double>(start, limit, delta, &output);
+        output = CreateRangeTensor<double>(start, limit, delta, ctx->builder());
         break;
       default:
-        status = errors::InvalidArgument("Invalid type for Range ",
+        output = errors::InvalidArgument("Invalid type for Range ",
                                          DataTypeString(type));
     }
-    OP_REQUIRES_OK(ctx, status);
-    ctx->SetConstantOutput(0, output);
+    OP_REQUIRES_OK(ctx, output.status());
+    ctx->SetOutput(0, output.ValueOrDie());
   }
 };
 
-- 
GitLab


From 2e19f32d28ab88b5bd3dd4f6d42a54040591dfbb Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 3 Oct 2018 20:48:35 -0700
Subject: [PATCH 1106/1357] [XLA] Fix handling of tuple constants in HLO
 constant folding.

PiperOrigin-RevId: 215676675
---
 .../xla/service/hlo_constant_folding.cc       | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.cc b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
index 538816a353..4f898ce61c 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
@@ -77,19 +77,23 @@ StatusOr<bool> HloConstantFolding::Run(HloModule* module) {
       }
 
       // Don't constant fold unless it's a net positive or the output is small.
-      int64 elements_in_removed_operands = 0;
-      for (HloInstruction* operand : instruction->operands()) {
-        if (operand->user_count() == 1) {
-          elements_in_removed_operands +=
-              ShapeUtil::ElementsIn(operand->shape());
+      if (ShapeUtil::IsArray(instruction->shape())) {
+        int64 elements_in_removed_operands = 0;
+        for (HloInstruction* operand : instruction->operands()) {
+          if (operand->user_count() == 1 &&
+              ShapeUtil::IsArray(operand->shape())) {
+            elements_in_removed_operands +=
+                ShapeUtil::ElementsIn(operand->shape());
+          }
         }
-      }
-      int64 elements_in_constant = ShapeUtil::ElementsIn(instruction->shape());
+        int64 elements_in_constant =
+            ShapeUtil::ElementsIn(instruction->shape());
 
-      static const int64 kMaximumConstantSizeElements = 2 * 1000 * 1000;
-      if (elements_in_constant > elements_in_removed_operands &&
-          elements_in_constant > kMaximumConstantSizeElements) {
-        continue;
+        static const int64 kMaximumConstantSizeElements = 2 * 1000 * 1000;
+        if (elements_in_constant > elements_in_removed_operands &&
+            elements_in_constant > kMaximumConstantSizeElements) {
+          continue;
+        }
       }
 
       Literal result;
-- 
GitLab


From 8a437200e14c8e09fcc8e952679d489909f175c8 Mon Sep 17 00:00:00 2001
From: Mingxing Tan <tanmingxing@google.com>
Date: Wed, 3 Oct 2018 21:06:27 -0700
Subject: [PATCH 1107/1357] BEGIN_PUBLIC Rollback some quantization changes
 that breaks some models. END_PUBLIC

Automated rollback of commit d3f14ef70cdf113f9d330c1f7c638003429a1dc4. Revert #19894.

PiperOrigin-RevId: 215678307
---
 .../contrib/quantize/python/quantize.py       | 115 +++++++-----------
 .../quantize/python/quantize_graph_test.py    |  37 ------
 2 files changed, 41 insertions(+), 111 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index afb9de8370..5e63d33db8 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -461,8 +461,8 @@ class _LayerMatch(object):
     return self._bias_add_op
 
 
-def _GetFollowingFakeQuantOp(tensor):
-  """Returns the following FakeQuant op if it exists else None."""
+def _FollowedByFakeQuant(tensor):
+  """Returns True if the tensor is followed by a FakeQuant."""
   fake_quant_ops = set([
       'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxArgs',
       'FakeQuantWithMinMaxVarsPerChannel'
@@ -472,11 +472,11 @@ def _GetFollowingFakeQuantOp(tensor):
   while consumers:
     c = consumers.pop()
     if c.type in fake_quant_ops:
-      return c
+      return True
     elif c.type in pass_through_ops:
       for output in c.outputs:
         consumers.extend(output.consumers())
-  return None
+  return False
 
 
 def _InsertQuantOp(context,
@@ -559,77 +559,44 @@ def _InsertQuantOp(context,
   # Prevent ops from being quantized multiple times. Bypass ops can sometimes
   # overlap between multiple matches, so we need to ensure that we don't
   # add duplicate FakeQuant operations.
-  fake_quant_op = _GetFollowingFakeQuantOp(inputs)
-
-  # If we find that we are attempting to insert a fake quant op following
-  # a fake quant, we skip inserting a fake quant op
-
-  if fake_quant_op is None:
-    if moving_avg:
-      quant = (
-          quant_ops.MovingAvgQuantize(
-              inputs,
-              init_min=init_min,
-              init_max=init_max,
-              ema_decay=ema_decay,
-              is_training=is_training,
-              num_bits=bits,
-              narrow_range=narrow_range,
-              vars_collection=vars_collection,
-              name_prefix=name_prefix))
-    else:
-      quant = (
-          quant_ops.LastValueQuantize(
-              inputs,
-              init_min=init_min,
-              init_max=init_max,
-              is_training=is_training,
-              num_bits=bits,
-              narrow_range=narrow_range,
-              vars_collection=vars_collection,
-              name_prefix=name_prefix))
-
-    if quant_delay and quant_delay > 0:
-      activate_quant = math_ops.greater_equal(
-          common.CreateOrGetQuantizationStep(),
-          quant_delay,
-          name=name_prefix + '/activate_quant')
-      quant = control_flow_ops.cond(
-          activate_quant,
-          lambda: quant,
-          lambda: inputs,
-          name=name_prefix + '/delayed_quant')
+  if _FollowedByFakeQuant(inputs):
+    return
+
+  if moving_avg:
+    quant = (
+        quant_ops.MovingAvgQuantize(
+            inputs,
+            init_min=init_min,
+            init_max=init_max,
+            ema_decay=ema_decay,
+            is_training=is_training,
+            num_bits=bits,
+            narrow_range=narrow_range,
+            vars_collection=vars_collection,
+            name_prefix=name_prefix))
   else:
-    # If a fake quant op is present already, make sure that
-    # any downstream use of the tensor reroutes to the appropriate quantized
-    # tensor. If there is no quant_delay, this is simply the output of the
-    # fake quant op. If there is a quant delay, we reroute to the output
-    # of the delayed quant operation, which inserts quantization only after
-    # a specified quant_delay
-
-    quant = fake_quant_op.outputs[0]
-    if quant_delay and quant_delay > 0:
-      name_prefix = '/'.join(quant.name.split('/')[:-1])
-      quant = quant.graph.get_tensor_by_name(name_prefix +
-                                             '/delayed_quant/Merge:0')
-    pruned_consumer_set = set()
-    for consumer in consumers:
-      fake_quant_dest_op = _GetFollowingFakeQuantOp(consumer.outputs[0])
-      if (fake_quant_dest_op is None or
-          fake_quant_dest_op.name != fake_quant_op.name):
-        pruned_consumer_set.add(consumer)
-    consumers = pruned_consumer_set
-
-    # If we have
-    # input->pass_through->fake_quant
-    # there is nothing to reroute.
-    #
-    # If we have
-    #  input-> pass_through->fake_quant
-    #                |-> consumer
-    # Then we reroute such that:
-    # input-> pass_through->fake_quant
-    #                            |-> consumer
+    quant = (
+        quant_ops.LastValueQuantize(
+            inputs,
+            init_min=init_min,
+            init_max=init_max,
+            is_training=is_training,
+            num_bits=bits,
+            narrow_range=narrow_range,
+            vars_collection=vars_collection,
+            name_prefix=name_prefix))
+
+  if quant_delay and quant_delay > 0:
+    activate_quant = math_ops.greater_equal(
+        common.CreateOrGetQuantizationStep(),
+        quant_delay,
+        name=name_prefix + '/activate_quant')
+    quant = control_flow_ops.cond(
+        activate_quant,
+        lambda: quant,
+        lambda: inputs,
+        name=name_prefix + '/delayed_quant')
+
   if consumers:
     tensors_modified_count = common.RerouteTensor(
         quant, inputs, can_modify=consumers)
diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py
index a9fc6c3c61..e80d2183a6 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py
@@ -27,7 +27,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import template
 from tensorflow.python.platform import googletest
 
 
@@ -307,42 +306,6 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     # No ops should be inserted or removed.
     self.assertEqual(op_names_before_rewrite, op_names_after_rewrite)
 
-  def testWithSharedWeights(self):
-
-    self._RunTestOverAllRewrites(self._TestWithSharedWeights)
-    self._RunTestOverTrainingRewrites(self._TestRewriteWithSharedWeights)
-
-  def _TestRewriteWithSharedWeights(self, rewrite_fn, quant_delay=1):
-    self._TestWithSharedWeights(rewrite_fn, quant_delay)
-
-  def _TestWithSharedWeights(self, rewrite_fn, quant_delay=None):
-    with ops.Graph().as_default() as g:
-      conv = template.make_template('shared_weights_conv', self._ConvLayer)
-      conv()
-      conv()
-      if quant_delay is None:
-        rewrite_fn()
-      else:
-        rewrite_fn(quant_delay=quant_delay)
-
-    conv_ops = [op for op in g.get_operations() if op.type == 'Conv2D']
-    weights_quants = [
-        op for op in g.get_operations()
-        if 'weights_quant' in op.name and op.type == 'FakeQuantWithMinMaxVars'
-    ]
-    # Check that the shared weights variable is not quantized multiple times
-    self.assertTrue(len(weights_quants) == 1)
-    weights_quant_tensor = weights_quants[0].outputs[0]
-    if quant_delay:
-      delayed_weights_quants = [
-          op for op in g.get_operations()
-          if 'weights_quant' in op.name and op.type == 'Merge'
-      ]
-      self.assertTrue(len(delayed_weights_quants) == 1)
-      weights_quant_tensor = delayed_weights_quants[0].outputs[0]
-    # Check that the Conv2D operations get the quantized weights
-    self.assertTrue(all(weights_quant_tensor in op.inputs for op in conv_ops))
-
   def _ConvLayer(
       self, input_tensor=None, scope='test', pre_activation_bypass=False,
       post_activation_bypass=False):
-- 
GitLab


From d3ced638f0496c70c3a063be82b30b358179e369 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Wed, 3 Oct 2018 21:41:43 -0700
Subject: [PATCH 1108/1357] [XLA] Delete IsInplaceSlice.

PiperOrigin-RevId: 215681153
---
 .../xla/service/hlo_dataflow_analysis.cc      | 24 -------------------
 .../xla/service/hlo_dataflow_analysis.h       |  1 -
 .../compiler/xla/service/hlo_instruction.cc   |  4 ----
 .../compiler/xla/service/hlo_instruction.h    |  3 ---
 .../compiler/xla/service/hlo_instructions.h   | 14 -----------
 .../xla/service/tuple_points_to_analysis.cc   | 23 ++++--------------
 .../xla/service/tuple_points_to_analysis.h    |  1 -
 7 files changed, 4 insertions(+), 66 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 44cde4a3d2..c22adcdd8d 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -356,23 +356,6 @@ bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) {
   return false;
 }
 
-bool HloDataflowAnalysis::UpdateSliceValueSet(HloInstruction* slice) {
-  CHECK_EQ(slice->opcode(), HloOpcode::kSlice);
-  if (!slice->IsInPlaceSlice()) {
-    return false;
-  }
-  // If this slice is lowered to an in-place version, then it forwards the
-  // operand value to the output.
-  const InstructionValueSet& operand_set =
-      GetInstructionValueSet(slice->operand(0));
-  InstructionValueSet& slice_set = GetInstructionValueSet(slice);
-  if (operand_set != slice_set) {
-    slice_set = operand_set;
-    return true;
-  }
-  return false;
-}
-
 bool HloDataflowAnalysis::UpdateSendValueSet(HloInstruction* send) {
   CHECK_EQ(send->opcode(), HloOpcode::kSend);
   bool changed = false;
@@ -641,8 +624,6 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
   switch (instruction->opcode()) {
     case HloOpcode::kBitcast:
       return UpdateBitcastValueSet(instruction);
-    case HloOpcode::kSlice:
-      return UpdateSliceValueSet(instruction);
     case HloOpcode::kDomain:
       return UpdateDomainValueSet(instruction);
     case HloOpcode::kCopy:
@@ -814,11 +795,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() {
             define_all_values();
           }
           break;
-        case HloOpcode::kSlice:
-          if (!instruction->IsInPlaceSlice()) {
-            define_all_values();
-          }
-          break;
         case HloOpcode::kWhile:
         case HloOpcode::kCall:
         case HloOpcode::kConditional:
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
index e62c1c2ac8..abac398c04 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
@@ -182,7 +182,6 @@ class HloDataflowAnalysis {
   // Updates the value set for a particular instruction type. Returns whether
   // the instruction value set changed.
   bool UpdateBitcastValueSet(HloInstruction* bitcast);
-  bool UpdateSliceValueSet(HloInstruction* slice);
   bool UpdateCallValueSet(HloInstruction* call);
   bool UpdateConditionalValueSet(HloInstruction* conditional);
   bool UpdateCopyValueSet(HloInstruction* copy);
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 8bddaa8c96..fb91adc302 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -3076,10 +3076,6 @@ const std::vector<int64>& HloInstruction::slice_strides() const {
   return Cast<HloSliceInstruction>(this)->slice_strides();
 }
 
-bool HloInstruction::IsInPlaceSlice() const {
-  return Cast<HloSliceInstruction>(this)->IsInPlaceSlice();
-}
-
 const Literal& HloInstruction::literal() const {
   return Cast<HloConstantInstruction>(this)->literal();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 9deed20e5d..374862c4b6 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1330,9 +1330,6 @@ class HloInstruction {
   int64 slice_strides(int64 dimension) const;
   const std::vector<int64>& slice_strides() const;
 
-  // Delegates to HloSliceInstruction::IsInPlaceSlice.
-  bool IsInPlaceSlice() const;
-
   // Returns the literal associated with this instruction.
   const Literal& literal() const;
 
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index c929867bb9..ab168800f6 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -546,17 +546,6 @@ class HloSliceInstruction : public HloInstruction {
   }
   const std::vector<int64>& slice_strides() const { return slice_strides_; }
 
-  // Returns the flag that describes whether a slice must be lowered into an
-  // offset into the original operand.
-  bool IsInPlaceSlice() const { return is_in_place_slice_; }
-
-  // Sets and returns the flag that describes whether a slice must be lowered
-  // into an offset into the original operand.
-  bool SetIsInPlaceSlice(bool value) {
-    is_in_place_slice_ = value;
-    return value;
-  }
-
  private:
   std::vector<string> ExtraAttributesToStringImpl(
       const HloPrintOptions& options) const override;
@@ -573,9 +562,6 @@ class HloSliceInstruction : public HloInstruction {
   std::vector<int64> slice_starts_;
   std::vector<int64> slice_limits_;
   std::vector<int64> slice_strides_;
-
-  // Describes whether the slice can be lowered to an offset into the operand.
-  bool is_in_place_slice_ = false;
 };
 
 class HloConstantInstruction : public HloInstruction {
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index 6fed7c76d0..811ac55e2d 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -280,16 +280,6 @@ Status TuplePointsToAnalysis::HandleDomain(HloInstruction* domain) {
   return Status::OK();
 }
 
-Status TuplePointsToAnalysis::HandleSlice(HloInstruction* slice) {
-  // A kSlice instruction aliases its operand if the backend lowers it to an
-  // in-place implementation.
-  if (slice->IsInPlaceSlice()) {
-    CreateCopiedPointsToSet(slice, slice->operand(0));
-    return Status::OK();
-  }
-  return DefaultAction(slice);
-}
-
 Status TuplePointsToAnalysis::HandleRecvDone(HloInstruction* recv_done) {
   // RecvDone aliases its input (Recv) tuple element {0} to element {0} of its
   // output. The other indices ({} and {1}) define their own buffers.
@@ -455,15 +445,10 @@ bool TuplePointsToAnalysis::InstructionDefinesBufferAtIndex(
 
 Status TuplePointsToAnalysis::VerifyBuffer(const LogicalBuffer& buffer) const {
   if (!InstructionDefinesBufferAtIndex(buffer.instruction(), buffer.index())) {
-    // kSlice ops that are lowered to an in-place version are expected to not
-    // define their output buffer.
-    if (buffer.instruction()->opcode() != HloOpcode::kSlice ||
-        !buffer.instruction()->IsInPlaceSlice()) {
-      return FailedPrecondition(
-          "LogicalBuffer %s is ill-defined: instruction %s does not define a "
-          "buffer at that index",
-          buffer.ToString(), buffer.instruction()->name());
-    }
+    return FailedPrecondition(
+        "LogicalBuffer %s is ill-defined: instruction %s does not define a "
+        "buffer at that index",
+        buffer.ToString(), buffer.instruction()->name());
   }
 
   if (buffer.id() < 0 ||
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index 64ad1dc80e..30c365053c 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -247,7 +247,6 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault {
   Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
   Status HandleBitcast(HloInstruction* bitcast) override;
   Status HandleDomain(HloInstruction* domain) override;
-  Status HandleSlice(HloInstruction* slice) override;
   Status HandleCopy(HloInstruction* copy) override;
   Status HandleRecvDone(HloInstruction* recv_done) override;
   Status HandleSend(HloInstruction* send) override;
-- 
GitLab


From 54cde61fbf473270ce19f8b40e9511373fbc12c7 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 3 Oct 2018 22:00:51 -0700
Subject: [PATCH 1109/1357] [tf.data] Fix bug in
 `tf.data.experimental.unbatch()`.

Previously, if the rank of the input to this transformation was
statically unknown, we would erroneously report that the output is a
scalar, and violate downstream shape integrity checks. Instead, in
that case the output shape should be unknown.

PiperOrigin-RevId: 215683027
---
 tensorflow/core/kernels/data/unbatch_dataset_op.cc | 13 +++++++++----
 .../kernel_tests/batch_dataset_op_test.py          | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/data/unbatch_dataset_op.cc b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
index 81c432b938..74908994b4 100644
--- a/tensorflow/core/kernels/data/unbatch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
@@ -41,11 +41,16 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel {
         : DatasetBase(DatasetContext(ctx)), input_(input) {
       input_->Ref();
       for (const PartialTensorShape& shape : input->output_shapes()) {
-        gtl::InlinedVector<int64, 4> partial_dim_sizes;
-        for (int i = 1; i < shape.dims(); ++i) {
-          partial_dim_sizes.push_back(shape.dim_size(i));
+        if (!shape.unknown_rank()) {
+          gtl::InlinedVector<int64, 4> partial_dim_sizes;
+          for (int i = 1; i < shape.dims(); ++i) {
+            partial_dim_sizes.push_back(shape.dim_size(i));
+          }
+          shapes_.emplace_back(std::move(partial_dim_sizes));
+        } else {
+          // If the input shape is unknown, the output shape will be unknown.
+          shapes_.emplace_back();
         }
-        shapes_.emplace_back(std::move(partial_dim_sizes));
       }
     }
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
index 8703b2810e..956b4518f6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
@@ -131,6 +131,20 @@ class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                                    "larger than the row shape"):
         sess.run(get_next)
 
+  def testUnbatchWithUnknownRankInput(self):
+    placeholder = array_ops.placeholder(dtypes.int32)
+    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
+        batching.unbatch())
+    iterator = dataset.make_initializable_iterator()
+    next_elem = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
+      for i in range(4):
+        self.assertEqual(i, sess.run(next_elem))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_elem)
+
   def testUnbatchScalarDataset(self):
     data = tuple([math_ops.range(10) for _ in range(3)])
     data = dataset_ops.Dataset.from_tensor_slices(data)
-- 
GitLab


From 1f1fe5a01af616707b8554d59651fb4925d7faee Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Wed, 3 Oct 2018 22:23:08 -0700
Subject: [PATCH 1110/1357] Include .inc files for absl headers

---
 tensorflow/tools/pip_package/setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index d864a7a039..54a7b7ffbe 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -232,6 +232,8 @@ headers = (list(find_files('*.h', 'tensorflow/core')) +
            list(find_files('*', 'third_party/eigen3')) +
            list(find_files('*.h',
                            'tensorflow/include/external/com_google_absl')) +
+           list(find_files('*.inc',
+                           'tensorflow/include/external/com_google_absl')) +
            list(find_files('*', 'tensorflow/include/external/eigen_archive')))
 
 setup(
-- 
GitLab


From 6795491bcc0c276e27be6a9e1a4a14c019c2ba37 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 22:24:14 -0700
Subject: [PATCH 1111/1357] Pin wheel=0.31.1 in install_auditwheel.sh to work
 around issue https://github.com/pypa/auditwheel/issues/102

PiperOrigin-RevId: 215685104
---
 tensorflow/tools/ci_build/install/install_auditwheel.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/install/install_auditwheel.sh b/tensorflow/tools/ci_build/install/install_auditwheel.sh
index e6f6124d56..0e6d98c0a8 100755
--- a/tensorflow/tools/ci_build/install/install_auditwheel.sh
+++ b/tensorflow/tools/ci_build/install/install_auditwheel.sh
@@ -18,6 +18,10 @@ set -e
 
 sudo pip3 install auditwheel==1.5.0
 
+# Pin wheel==0.31.1 to work around issue
+# https://github.com/pypa/auditwheel/issues/102
+sudo pip3 install wheel==0.31.1
+
 set +e
 patchelf_location=$(which patchelf)
 if [[ -z "$patchelf_location" ]]; then
-- 
GitLab


From e57874169fca3cfdd15cf0dda3717a6374a7dcb9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 23:03:11 -0700
Subject: [PATCH 1112/1357] [XLA] Update Tf2Xla bridge to use Scatter HLO.

PiperOrigin-RevId: 215687800
---
 tensorflow/compiler/tf2xla/lib/scatter.cc     | 213 ++++++++++--------
 tensorflow/compiler/tf2xla/lib/scatter.h      |   6 +-
 tensorflow/compiler/xla/client/xla_builder.cc |   3 +
 tensorflow/compiler/xla/service/hlo_module.cc |   3 +-
 tensorflow/compiler/xla/service/inliner.cc    |  32 +--
 .../compiler/xla/service/inliner_test.cc      |  30 +++
 6 files changed, 177 insertions(+), 110 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc
index 38dfde165d..2b1c2ced92 100644
--- a/tensorflow/compiler/tf2xla/lib/scatter.cc
+++ b/tensorflow/compiler/tf2xla/lib/scatter.cc
@@ -38,12 +38,10 @@ xla::StatusOr<xla::XlaOp> XlaScatter(
         combiner,
     xla::XlaBuilder* builder) {
   TF_ASSIGN_OR_RETURN(xla::Shape buffer_shape, builder->GetShape(buffer));
-  TF_RETURN_IF_ERROR(builder->GetShape(updates).status());
+  TF_ASSIGN_OR_RETURN(xla::Shape updates_shape, builder->GetShape(updates));
   TF_ASSIGN_OR_RETURN(xla::Shape indices_shape, builder->GetShape(indices));
   absl::Span<const int64> indices_dims =
       xla::AsInt64Slice(indices_shape.dimensions());
-  absl::Span<const int64> buffer_dims =
-      xla::AsInt64Slice(buffer_shape.dimensions());
 
   // If the indices are N-dimensional, the minor dimension of indices contains
   // the indices to update. Otherwise the indices are all scalars.
@@ -81,104 +79,129 @@ xla::StatusOr<xla::XlaOp> XlaScatter(
     }
   }
 
-  // Shape of the non-indexed dimensions of the buffer.
-  std::vector<int64> buffer_shape_post_axes(
-      buffer_dims.begin() + num_index_dims, buffer_dims.end());
-
-  // Flatten the major dimensions of indices and updates into a single dimension
-  // for ease of iteration.
-  std::vector<int64> flat_indices_shape({num_indices});
-  if (indices_are_vectors) {
-    flat_indices_shape.push_back(num_index_dims);
+  // Example of a 1-D scatter that updates two [3,1] tensors in a tensor of
+  // shape [3,3]:
+  // NOTE: ***This case will not be generated by any of the tf.scatter ops.***
+  //
+  //   operand = s32[3,3] parameter(0)
+  //   indices = s32[2] parameter(1)
+  //   updates = s32[3,2] parameter(2)
+  //   scatter = s32[3,3] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={0},
+  //       inserted_window_dims={1},
+  //       scatter_dims_to_operand_dims={1},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of a 1-D scatter that updates two [1,3] tensors in a tensor of
+  // shape [3,3]:
+  //
+  //   operand = s32[3,3] parameter(0)
+  //   indices = s32[2] parameter(1)
+  //   updates = s32[2,3] parameter(2)
+  //   scatter = s32[3,3] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={1},
+  //       inserted_window_dims={0},
+  //       scatter_dims_to_operand_dims={0},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of an N-D scatter updating slices of shape [1,1,2] in a tensor of
+  // shape [3,3,2]
+  //
+  //   operand = s32[3,3,2] parameter(0)
+  //   indices = s32[2,2] parameter(1)
+  //   updates = s32[2,2] parameter(2)
+  //   scatter = s32[3,3,2] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={1},
+  //       inserted_window_dims={0,1},
+  //       scatter_dims_to_operand_dims={0,1},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of a scatter updating slices of shape [] in a tensor of shape [1,1]
+  //
+  //   operand = s32[1,1] parameter(0)
+  //   indices = s32[1] parameter(1)
+  //   updates = s32[1] parameter(2)
+  //   scatter = s32[1,1] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={},
+  //       inserted_window_dims={0,1},
+  //       scatter_dims_to_operand_dims={0},
+  //       index_vector_dim=1
+  // Note that updates operand would be broadcasted into [1] in this case.
+  //
+
+  xla::ScatterDimensionNumbers dim_numbers;
+  dim_numbers.set_index_vector_dim(indices_are_vectors
+                                       ? indices_shape.dimensions_size() - 1
+                                       : indices_shape.dimensions_size());
+
+  int64 updates_rank = xla::ShapeUtil::Rank(updates_shape);
+  int64 buffer_rank = xla::ShapeUtil::Rank(buffer_shape);
+  int64 num_window_dims_in_updates = buffer_rank - num_index_dims;
+
+  // If the rank of `updates` is 0 and does not match the expected rank of
+  // updates, broadcast `updates` to the expected shape of updates.
+  auto new_updates = updates;
+  std::vector<int64> expected_updates_dims(indices_dims.begin(),
+                                           indices_dims.end());
+  for (int64 dim = num_index_dims; dim < buffer_rank; ++dim) {
+    expected_updates_dims.push_back(buffer_shape.dimensions(dim));
+  }
+  int64 expected_updates_rank = expected_updates_dims.size();
+  if (updates_rank == 0 && expected_updates_rank != 0) {
+    new_updates = xla::Broadcast(updates, expected_updates_dims);
+    TF_ASSIGN_OR_RETURN(updates_shape, builder->GetShape(new_updates));
+    updates_rank = xla::ShapeUtil::Rank(updates_shape);
   }
 
-  std::vector<int64> flat_updates_shape({num_indices});
-  flat_updates_shape.insert(flat_updates_shape.end(),
-                            buffer_shape_post_axes.begin(),
-                            buffer_shape_post_axes.end());
-
-  // Construct the initial values of the loop-carried Tensors.
-  auto flat_indices = xla::Reshape(indices, flat_indices_shape);
-  auto flat_updates = xla::Reshape(updates, flat_updates_shape);
-  auto init = {flat_indices, flat_updates, buffer};
-
-  // Constructs the loop body. The implementation of scatter is essentially:
-  // for i in range(num_indices):
-  //   index = dynamic-slice(indices, i)
-  //   update = dynamic-slice(updates, i)
-  //   buffer = dynamic-update-slice(buffer, update, index)
-  auto body_fn = [&](xla::XlaOp i, absl::Span<const xla::XlaOp> loop_vars,
-                     xla::XlaBuilder* body_builder) {
-    auto indices = loop_vars[0];
-    auto updates = loop_vars[1];
-    auto buffer = loop_vars[2];
-
-    auto zero_index = xla::ConstantLiteral(
-        body_builder, xla::LiteralUtil::Zero(indices_shape.element_type()));
-
-    // Slice the i-th index from the indices array.
-    xla::XlaOp index;
-    auto indices_offset = xla::Reshape(i, {1});
-    if (indices_are_vectors) {
-      indices_offset = xla::Pad(indices_offset, zero_index,
-                                xla::MakeEdgePaddingConfig({{0, 1}}));
-
-      index = xla::DynamicSlice(indices, indices_offset, {1, num_index_dims});
-      index = xla::Collapse(index, {0, 1});
-    } else {
-      index = xla::DynamicSlice(indices, indices_offset, {1});
+  if (updates_rank > 0) {
+    for (int64 i = (updates_rank - num_window_dims_in_updates);
+         i < updates_rank; ++i) {
+      dim_numbers.add_update_window_dims(i);
     }
+  }
 
-    // Discard updates with negative indices, since some users expect this.
-    auto index_in_range = xla::ReduceAll(
-        xla::Le(zero_index, index), xla::ConstantR0<bool>(body_builder, true),
-        xla::CreateScalarAndComputation(xla::PRED, body_builder));
-
-    // Make the index in bounds to prevent implementation defined behavior.
-    index = xla::Max(index, zero_index);
-    index = xla::Pad(
-        index, zero_index,
-        xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}}));
-
-    // Slice the i-th index from the updates array.
-    auto updates_offset = xla::Reshape(i, {1});
-    updates_offset = xla::Pad(
-        updates_offset, zero_index,
-        xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}}));
-    std::vector<int64> flat_updates_slice_shape({1});
-    flat_updates_slice_shape.insert(flat_updates_slice_shape.end(),
-                                    buffer_shape_post_axes.begin(),
-                                    buffer_shape_post_axes.end());
-    auto update =
-        xla::DynamicSlice(updates, updates_offset, flat_updates_slice_shape);
-
-    // Unflatten the major (iteration) dimensions of the slice to their
-    // original shape.
-    std::vector<int64> updates_slice_shape(num_index_dims, 1);
-    updates_slice_shape.insert(updates_slice_shape.end(),
-                               buffer_shape_post_axes.begin(),
-                               buffer_shape_post_axes.end());
-    update = xla::Reshape(update, updates_slice_shape);
-
-    // Apply the update to the buffer. If there is a combiner, use it to merge
-    // the current values with the update.
-    auto current_value = xla::DynamicSlice(buffer, index, updates_slice_shape);
+  for (int64 i = 0; i < num_index_dims; ++i) {
+    dim_numbers.add_inserted_window_dims(i);
+    dim_numbers.add_scatter_dims_to_operand_dims(i);
+  }
+
+  // Build the combiner computation.
+  xla::XlaComputation combiner_computation;
+  {
+    xla::XlaBuilder cb("scatter-combiner");
+    auto xla_scalar_shape =
+        xla::ShapeUtil::MakeShape(buffer_shape.element_type(), {});
+    auto p0 = xla::Parameter(&cb, 0, xla_scalar_shape, "p0");
+    auto p1 = xla::Parameter(&cb, 1, xla_scalar_shape, "p1");
     if (combiner) {
-      update = combiner(current_value, update, body_builder);
+      combiner(p0, p1, &cb);
     }
-    // Use the current value instead of the update if the index is out of
-    // bounds.
-    update = xla::Select(index_in_range, update, current_value);
-    // Apply the update.
-    buffer = xla::DynamicUpdateSlice(buffer, update, index);
-
-    return std::vector<xla::XlaOp>{indices, updates, buffer};
-  };
-
-  TF_ASSIGN_OR_RETURN(auto outputs,
-                      XlaForEachIndex(num_indices, indices_shape.element_type(),
-                                      body_fn, init, "scatter", builder));
-  return outputs[2];
+    combiner_computation = cb.Build().ConsumeValueOrDie();
+  }
+
+  VLOG(3) << "Scatter op:";
+  VLOG(3) << "  Input: " << xla::ShapeUtil::HumanString(buffer_shape);
+  VLOG(3) << "  Indices: " << xla::ShapeUtil::HumanString(indices_shape);
+  VLOG(3) << "  Updates: " << xla::ShapeUtil::HumanString(updates_shape);
+  VLOG(3) << "  Scatter Dimension Numbers: ";
+  VLOG(3) << "    index_vector_dim: " << dim_numbers.index_vector_dim();
+  VLOG(3) << "    update_window_dims: ["
+          << absl::StrJoin(dim_numbers.update_window_dims(), ",") << "]";
+  VLOG(3) << "    inserted_window_dims: ["
+          << absl::StrJoin(dim_numbers.inserted_window_dims(), ",") << "]";
+  VLOG(3) << "    scatter_dims_to_operand_dims: ["
+          << absl::StrJoin(dim_numbers.scatter_dims_to_operand_dims(), ",")
+          << "]";
+
+  return xla::Scatter(buffer, indices, new_updates, combiner_computation,
+                      dim_numbers);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/scatter.h b/tensorflow/compiler/tf2xla/lib/scatter.h
index 13a5f1b850..4cf478c4b9 100644
--- a/tensorflow/compiler/tf2xla/lib/scatter.h
+++ b/tensorflow/compiler/tf2xla/lib/scatter.h
@@ -34,7 +34,11 @@ namespace tensorflow {
 // Otherwise, `indices_are_vectors`, then indices are multidimensional and the
 // minor dimension of `indices` represents a vector of indices.
 //
-// If any indices are negative, the corresponding update is discarded.
+// If `updates` is a scalar, then it will be broadcasted into the expected shape
+// of updates.
+//
+// If any part of the update region is out-of-bounds, the corresponding update
+// is discarded.
 //
 // If a `combiner` is provided, updates are combined with the existing values in
 // the buffer using the combiner function. Otherwise, the updates replace the
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index e0ec91dba1..d196252db1 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -208,6 +208,9 @@ void XlaBuilder::IsConstantVisitor(const int64 op_handle,
     case HloOpcode::kWhile:
       // TODO(b/32495713): We aren't checking the condition and body
       // computations themselves.
+    case HloOpcode::kScatter:
+      // TODO(b/32495713): We aren't checking the embedded computation in
+      // Scatter.
     case HloOpcode::kSend:
     case HloOpcode::kRecv:
     case HloOpcode::kParameter:
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 7527e35c95..93e04eb3db 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -146,7 +146,8 @@ void HloModule::ReplaceComputations(
         case HloOpcode::kCall:
         case HloOpcode::kMap:
         case HloOpcode::kReduce:
-        case HloOpcode::kReduceWindow: {
+        case HloOpcode::kReduceWindow:
+        case HloOpcode::kScatter: {
           HloComputation* new_arg = tensorflow::gtl::FindWithDefault(
               replacements, instruction->to_apply(), nullptr);
           if (new_arg != nullptr) {
diff --git a/tensorflow/compiler/xla/service/inliner.cc b/tensorflow/compiler/xla/service/inliner.cc
index 5fd779ebf9..50c408f5bb 100644
--- a/tensorflow/compiler/xla/service/inliner.cc
+++ b/tensorflow/compiler/xla/service/inliner.cc
@@ -71,26 +71,23 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
   // profitability model for inlining is defined.
   if (hlo_query::AllOperandsAreParameters(root)) {
     if (root.opcode() == HloOpcode::kFusion ||
-        root.opcode() == HloOpcode::kParameter ||
         root.opcode() == HloOpcode::kTrace) {
       // Cloning not supported for these instructions.
       return Status::OK();
     }
     VLOG(10) << "inlining map({X ... Y}, op) => : op(X ... Y) with function "
              << root.ToShortString();
-    // If the input is a constant then the shape of the constant could be
-    // different than the map shape. Hence, a broadcast is needed, else the
-    // cloned operand with new shape and operands work.
-    if (root.opcode() != HloOpcode::kConstant) {
-      std::vector<HloInstruction*> params;
-      for (int64 o = 0; o < root.operands().size(); o++) {
-        params.push_back(map->operands()[root.operand(o)->parameter_number()]);
-      }
-      HloInstruction* placed_instruction = computation_->AddInstruction(
-          root.CloneWithNewOperands(map->shape(), params));
+    if (root.opcode() == HloOpcode::kParameter) {
+      // If the root is a parameter, then use the corresponding operand as the
+      // result of the computation.
       TF_RETURN_IF_ERROR(
-          computation_->ReplaceInstruction(map, placed_instruction));
-    } else {
+          map->ReplaceAllUsesWith(map->operands()[root.parameter_number()]));
+      TF_RETURN_IF_ERROR(computation_->RemoveInstruction(map));
+    } else if (root.opcode() == HloOpcode::kConstant) {
+      // If the input is a constant then the shape of the constant could be
+      // different than the map shape. Hence, a broadcast is needed, else the
+      // cloned operand with new shape and operands work.
+      //
       // The constant is in an embedded computation and needs to be recreated
       // as part of the computation that the broadcast is inserted into.
       HloInstruction* constant = computation_->AddInstruction(root.Clone());
@@ -98,6 +95,15 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
           HloInstruction::CreateBroadcast(map->shape(), constant, {}));
       TF_RETURN_IF_ERROR(
           computation_->ReplaceInstruction(map, placed_instruction));
+    } else {
+      std::vector<HloInstruction*> params;
+      for (int64 o = 0; o < root.operands().size(); o++) {
+        params.push_back(map->operands()[root.operand(o)->parameter_number()]);
+      }
+      HloInstruction* placed_instruction = computation_->AddInstruction(
+          root.CloneWithNewOperands(map->shape(), params));
+      TF_RETURN_IF_ERROR(
+          computation_->ReplaceInstruction(map, placed_instruction));
     }
     changed_ = true;
     return Status::OK();
diff --git a/tensorflow/compiler/xla/service/inliner_test.cc b/tensorflow/compiler/xla/service/inliner_test.cc
index 7e967f035c..98e0f2cfd7 100644
--- a/tensorflow/compiler/xla/service/inliner_test.cc
+++ b/tensorflow/compiler/xla/service/inliner_test.cc
@@ -146,6 +146,36 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) {
   EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
 }
 
+TEST_F(InlinerTest, MapParameter) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+
+  auto param_builder = HloComputation::Builder(TestName());
+  param_builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32, "p0"));
+  param_builder.AddInstruction(HloInstruction::CreateParameter(1, r0f32, "p1"));
+  auto param_f32 = param_builder.Build();
+
+  auto builder = HloComputation::Builder("MapParamFunction");
+  auto lhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1)));
+  auto rhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(4)));
+  builder.AddInstruction(
+      HloInstruction::CreateMap(lhs->shape(), {lhs, rhs}, param_f32.get()));
+
+  auto computation = builder.Build();
+  auto hlo_module = CreateNewVerifiedModule();
+  hlo_module->AddEmbeddedComputation(std::move(param_f32));
+  hlo_module->AddEntryComputation(std::move(computation));
+
+  Inliner inliner;
+  EXPECT_TRUE(inliner.Run(hlo_module.get()).ValueOrDie());
+  EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), rhs);
+
+  // Verify execution on CPU.
+  auto result = ExecuteAndTransfer(hlo_module->Clone(), {});
+  auto expected = LiteralUtil::CreateR0<float>(4);
+  EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
+}
 
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 67e0ccb3e5c1a48d62bcc45201fd70d2420dc4eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 02:27:59 -0700
Subject: [PATCH 1113/1357] compat: Update forward compatibility horizon to
 2018-10-04

PiperOrigin-RevId: 215706500
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index d833defb8e..76e08610ba 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 3)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 4)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 98ea840dabc0c4e9417ebe9a0fd10c9d471cda51 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 02:41:25 -0700
Subject: [PATCH 1114/1357] Improve the performance of the ListMemoryScheduler

This CL replaces a std::unordered_map with an absl::flat_hash_map and
removes an unnecessary map lookup. This two change can improve the
performance of the scheduler on large graphs by up to 2x.

PiperOrigin-RevId: 215707921
---
 .../compiler/xla/service/hlo_memory_scheduler.cc       | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 55314d0ae9..bf30764488 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -263,9 +263,8 @@ class ListScheduler {
     };
 
     for (auto* instruction : computation_.instructions()) {
-      // Instruction with no operands or control predecessors will
-      // not be in the map.
-      if (unscheduled_pred_count.count(instruction) == 0) {
+      if (instruction->operands().empty() &&
+          instruction->control_predecessors().empty()) {
         add_to_ready_queue(instruction);
       }
     }
@@ -356,9 +355,8 @@ class ListScheduler {
       buffer_uses_;
 
   // A map containing the count of unscheduled HLOs which using a particular
-  // LogicalBuffer.  We rely on iterator stability in this map, and that the map
-  // entries are std::pair's.
-  std::unordered_map<const LogicalBuffer*, int64> unscheduled_use_count_;
+  // LogicalBuffer.
+  absl::flat_hash_map<const LogicalBuffer*, int64> unscheduled_use_count_;
 
   // Set of instructions which have been scheduled.
   absl::flat_hash_set<const HloInstruction*> scheduled_instructions_;
-- 
GitLab


From 6b538d9ce54e878576131cde0c76e43a893180c2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 03:12:04 -0700
Subject: [PATCH 1115/1357] Automated rollback of commit
 70a395f9795a48c21bc35cdf1dc44778f73a7bba

PiperOrigin-RevId: 215710849
---
 tensorflow/python/data/kernel_tests/BUILD     |  1 +
 tensorflow/tensorflow.bzl                     | 39 +++++++++++--------
 .../tools/pip_package/pip_smoke_test.py       |  2 +-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index c7295d6e69..10ec0dbe1c 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -306,6 +306,7 @@ cuda_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
     tags = [
+        "no_oss",  # TODO(b/116813115): Investigate timeout and re-enable.
         "no_windows_gpu",
     ],
 )
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index cad5de1b0c..dead44c57e 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1798,22 +1798,29 @@ def cuda_py_test(
         flaky = 0,
         xla_enabled = False,
         grpc_enabled = False):
-    test_tags = tags + tf_cuda_tests_tags()
-    tf_py_test(
-        name = name,
-        size = size,
-        srcs = srcs,
-        data = data,
-        main = main,
-        args = args,
-        tags = test_tags,
-        shard_count = shard_count,
-        additional_deps = additional_deps,
-        kernels = kernels,
-        flaky = flaky,
-        xla_enabled = xla_enabled,
-        grpc_enabled = grpc_enabled,
-    )
+    if main == None:
+        main = name + ".py"
+    for config in ["cpu", "gpu"]:
+        test_name = name
+        test_tags = tags
+        if config == "gpu":
+            test_name += "_gpu"
+            test_tags = test_tags + tf_cuda_tests_tags()
+        tf_py_test(
+            name = test_name,
+            size = size,
+            srcs = srcs,
+            data = data,
+            main = main,
+            args = args,
+            tags = test_tags,
+            shard_count = shard_count,
+            additional_deps = additional_deps,
+            kernels = kernels,
+            flaky = flaky,
+            xla_enabled = xla_enabled,
+            grpc_enabled = grpc_enabled,
+        )
 
 register_extension_info(
     extension_name = "cuda_py_test",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index c6ef82ccdc..e7f9628fa6 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -142,7 +142,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = ["_test", "_test.py"]
+  ignore_extensions = ["_test", "_test.py", "_test_gpu", "_test_gpu.py"]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
-- 
GitLab


From 6cc738da1748e819b9c8ee92dc2f1a7bdb291b50 Mon Sep 17 00:00:00 2001
From: Adria Puigdomenech <adriap@google.com>
Date: Thu, 4 Oct 2018 03:19:46 -0700
Subject: [PATCH 1116/1357] Make batch_gather work with indices of dtype int64.

PiperOrigin-RevId: 215711383
---
 tensorflow/python/kernel_tests/BUILD               |  1 +
 .../python/kernel_tests/batch_gather_op_test.py    | 13 ++++++++-----
 tensorflow/python/ops/array_ops.py                 | 14 ++++++++++----
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 9303c70c60..e055ef1c1b 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -76,6 +76,7 @@ tf_py_test(
     name = "batch_gather_op_test",
     srcs = ["batch_gather_op_test.py"],
     additional_deps = [
+        "@absl_py//absl/testing:parameterized",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/python/kernel_tests/batch_gather_op_test.py b/tensorflow/python/kernel_tests/batch_gather_op_test.py
index 7dd347989a..84e93b8136 100644
--- a/tensorflow/python/kernel_tests/batch_gather_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_gather_op_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.framework import constant_op
@@ -29,7 +30,7 @@ _TEST_TYPES = (dtypes.int64, dtypes.float32,
                dtypes.complex64, dtypes.complex128)
 
 
-class GatherTest(test.TestCase):
+class GatherTest(test.TestCase, parameterized.TestCase):
 
   def _buildParams(self, data, dtype):
     data = data.astype(dtype.as_numpy_dtype)
@@ -39,14 +40,15 @@ class GatherTest(test.TestCase):
       return data + 10j * data
     return data
 
-  def testSimpleGather(self):
+  @parameterized.parameters(dtypes.int32, dtypes.int64)
+  def testSimpleGather(self, indices_dtype):
     data = np.array([0, 1, 2, 3, 7, 5, 8, 9, 10, 11, 15, 13])
     indices = [3, 4]
     with self.test_session(use_gpu=True):
       for dtype in _TEST_TYPES:
         params_np = self._buildParams(data, dtype)
         params = constant_op.constant(params_np)
-        indices_tf = constant_op.constant(indices)
+        indices_tf = constant_op.constant(indices, dtype=indices_dtype)
         gather_t = array_ops.batch_gather(params, indices_tf)
         expected_result = np.array([3, 7])
         np_val = self._buildParams(expected_result, dtype)
@@ -54,14 +56,15 @@ class GatherTest(test.TestCase):
         self.assertAllEqual(np_val, gather_val)
         self.assertEqual(np_val.shape, gather_t.get_shape())
 
-  def test2DArray(self):
+  @parameterized.parameters(dtypes.int32, dtypes.int64)
+  def test2DArray(self, indices_dtype):
     data = np.array([[0, 1, 2, 3, 7, 5], [8, 9, 10, 11, 15, 13]])
     indices = [[3], [4]]
     with self.test_session(use_gpu=True):
       for dtype in _TEST_TYPES:
         params_np = self._buildParams(data, dtype)
         params = constant_op.constant(params_np)
-        indices_tf = constant_op.constant(indices)
+        indices_tf = constant_op.constant(indices, dtype=indices_dtype)
         gather_t = array_ops.batch_gather(params, indices_tf)
         expected_result = np.array([[3], [15]])
         np_val = self._buildParams(expected_result, dtype)
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 9f5149d5ac..4be9c532f4 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -2716,16 +2716,22 @@ def batch_gather(params, indices, name=None):
     params = ops.convert_to_tensor(params, name="params")
     indices_shape = shape(indices)
     params_shape = shape(params)
+
     ndims = indices.shape.ndims
     if ndims is None:
       raise ValueError("batch_gather does not allow indices with unknown "
                        "shape.")
     batch_indices = indices
-    accum_dim_value = 1
+    indices_dtype = indices.dtype.base_dtype
+    accum_dim_value = ones((), dtype=indices_dtype)
+    # Use correct type for offset index computation
+    casted_params_shape = gen_math_ops.cast(params_shape, indices_dtype)
     for dim in range(ndims-1, 0, -1):
-      dim_value = params_shape[dim-1]
-      accum_dim_value *= params_shape[dim]
-      dim_indices = gen_math_ops._range(0, dim_value, 1)
+      dim_value = casted_params_shape[dim-1]
+      accum_dim_value *= casted_params_shape[dim]
+      start = zeros((), dtype=indices_dtype)
+      step = ones((), dtype=indices_dtype)
+      dim_indices = gen_math_ops._range(start, dim_value, step)
       dim_indices *= accum_dim_value
       dim_shape = stack([1] * (dim - 1) + [dim_value] + [1] * (ndims - dim),
                         axis=0)
-- 
GitLab


From 9cd6cab4f85f1f35c6532da3fb68839294d44ee4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 03:20:59 -0700
Subject: [PATCH 1117/1357] Internal change.

PiperOrigin-RevId: 215711454
---
 .../cluster_resolver/python/training/tpu_cluster_resolver.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
index 1056894f18..f4a8e16c99 100644
--- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
+++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
@@ -60,6 +60,7 @@ class TPUClusterResolver(ClusterResolver):
     if (self._tpu == compat.as_bytes('') or
         self._tpu == compat.as_bytes('local') or
         self._tpu.startswith(compat.as_bytes('/bns')) or
+        self._tpu.startswith(compat.as_bytes('localhost:')) or
         self._tpu.startswith(compat.as_bytes('grpc://'))):
       return False
     return True
-- 
GitLab


From 28f239fdfa0c94f715fccf0197ab6c3c8df27d28 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 4 Oct 2018 05:34:55 -0700
Subject: [PATCH 1118/1357] Implement DataFormatVecPermute for XLA.

Also clear "_kernel" attributes of nodes if they are set to "host".
This is not meaningful when processing the graph for XLA, and it
would prevent finding the registered XLA kernel.

PiperOrigin-RevId: 215722216
---
 tensorflow/compiler/tests/BUILD               | 13 +++
 tensorflow/compiler/tests/permute_test.py     | 80 +++++++++++++++
 tensorflow/compiler/tf2xla/kernels/BUILD      |  1 +
 .../compiler/tf2xla/kernels/permute_op.cc     | 98 +++++++++++++++++++
 tensorflow/compiler/tf2xla/xla_compiler.cc    | 11 +++
 5 files changed, 203 insertions(+)
 create mode 100644 tensorflow/compiler/tests/permute_test.py
 create mode 100644 tensorflow/compiler/tf2xla/kernels/permute_op.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 822fedf121..ee36729fd1 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1028,6 +1028,19 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "permute_test",
+    size = "small",
+    srcs = ["permute_test.py"],
+    deps = [
+        "//tensorflow/compiler/tests:xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:nn_ops",
+    ],
+)
+
 tf_xla_py_test(
     name = "xla_device_test",
     size = "small",
diff --git a/tensorflow/compiler/tests/permute_test.py b/tensorflow/compiler/tests/permute_test.py
new file mode 100644
index 0000000000..dbb9274df4
--- /dev/null
+++ b/tensorflow/compiler/tests/permute_test.py
@@ -0,0 +1,80 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the DataFormatVecPermute operator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.compiler.tests import xla_test
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test
+
+
+class XlaPermuteOpTest(xla_test.XLATestCase):
+
+  def _runPermuteAndCompare(self, x, src_format, dst_format, expected):
+    with self.cached_session() as session:
+      with self.test_scope():
+        placeholder = array_ops.placeholder(dtypes.as_dtype(x.dtype), x.shape)
+        param = {placeholder: x}
+        output = nn_ops.data_format_vec_permute(
+            placeholder, src_format=src_format, dst_format=dst_format)
+      result = session.run(output, param)
+    self.assertAllEqual(result, expected)
+
+  def testNHWCToNCHW(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "NCHW", [7, 3, 4, 9])
+
+  def testNCHWToNHWC(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NCHW", "NHWC", [7, 9, 3, 4])
+
+  def testNHWCToHWNC(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "HWNC", [4, 9, 7, 3])
+
+  def testHWNCToNHWC(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "HWNC", "NHWC", [9, 7, 4, 3])
+
+  def testNHWCToNCHW2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "NCHW",
+                               [[7, 4], [5, 1], [9, 3], [4, 5]])
+
+  def testNHWCToHWNC2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "HWNC",
+                               [[9, 3], [4, 5], [7, 4], [5, 1]])
+
+  def testHWNCToNHWC2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "HWNC", "NHWC",
+                               [[4, 5], [7, 4], [9, 3], [5, 1]])
+
+  def testNCHWToNHWC2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NCHW", "NHWC",
+                               [[7, 4], [4, 5], [5, 1], [9, 3]])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 3e823254d3..9a7130f253 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -62,6 +62,7 @@ tf_kernel_library(
         "one_hot_op.cc",
         "pack_op.cc",
         "pad_op.cc",
+        "permute_op.cc",
         "pooling_ops.cc",
         "qr_op.cc",
         "quantize_and_dequantize_op.cc",
diff --git a/tensorflow/compiler/tf2xla/kernels/permute_op.cc b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
new file mode 100644
index 0000000000..0764e5503d
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
@@ -0,0 +1,98 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+namespace {
+
+class DataFormatVecPermuteOp : public XlaOpKernel {
+ public:
+  explicit DataFormatVecPermuteOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("src_format", &src_format_));
+    OP_REQUIRES(
+        ctx, src_format_.size() == 4,
+        errors::InvalidArgument("Data format should have 4 characters"));
+    TensorFormat data_format;
+    OP_REQUIRES(ctx, FormatFromString(src_format_, &data_format),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dst_format", &dst_format_));
+    OP_REQUIRES(
+        ctx, dst_format_.size() == 4,
+        errors::InvalidArgument("Data format should have 4 characters"));
+    OP_REQUIRES(ctx, FormatFromString(dst_format_, &data_format),
+                errors::InvalidArgument("Invalid data format"));
+  }
+  void Compile(XlaOpKernelContext* ctx) override {
+    auto builder = ctx->builder();
+    const TensorShape input_tensor_shape = ctx->InputShape(0);
+    int input_rank = input_tensor_shape.dims();
+    OP_REQUIRES(ctx, input_rank == 1 || input_rank == 2,
+                errors::InvalidArgument(
+                    "Input must be a vector or matrix, but got shape ",
+                    input_tensor_shape.DebugString()));
+    OP_REQUIRES(
+        ctx, input_tensor_shape.dim_size(0) == 4,
+        errors::InvalidArgument(
+            "First dimension of input must be of size 4, but got shape ",
+            input_tensor_shape.DebugString()));
+    if (input_rank == 2) {
+      OP_REQUIRES(
+          ctx, input_tensor_shape.dim_size(1) == 2,
+          errors::InvalidArgument(
+              "Second dimension of 2D input must be of size 2, but got shape ",
+              input_tensor_shape.DebugString()));
+    }
+    std::vector<int32> dst_indices(4, 0);
+    for (int i = 0; i < 4; ++i) {
+      for (int j = 0; j < 4; ++j) {
+        if (src_format_[i] == dst_format_[j]) {
+          dst_indices[i] = j;
+          break;
+        }
+      }
+    }
+    auto keys = xla::ConstantR1(builder, absl::Span<const int32>(dst_indices));
+    if (input_rank == 2) {
+      keys = xla::BroadcastInDim(
+          keys, xla::ShapeUtil::MakeShape(xla::S32, {4, 2}), {0});
+    }
+    auto sorted = xla::Sort(keys, ctx->Input(0), 0);
+    auto output = xla::GetTupleElement(sorted, 1);
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  string src_format_;
+  string dst_format_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(DataFormatVecPermuteOp);
+};
+
+// TODO(b/115384656): Support DT_INT64.
+REGISTER_XLA_OP(Name("DataFormatVecPermute").TypeConstraint("T", DT_INT32),
+                DataFormatVecPermuteOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index d5094e8ec5..b2c57e8880 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -194,6 +194,17 @@ Status XlaCompiler::CompileFunction(const XlaCompiler::CompileOptions& options,
 
   std::unique_ptr<Graph> graph = GetGraph(fbody);
 
+  // Clear the "_kernel" attribute if it is set to "host". This is used to
+  // indicate that a computation should happen on the host instead of the
+  // accelerator, but doesn't make sense in XLA.
+  const char* const kKernelAttr = "_kernel";
+  for (Node* n : graph->nodes()) {
+    string value;
+    if (GetNodeAttrSimple(n->attrs(), kKernelAttr, &value) && value == "host") {
+      n->ClearAttr(kKernelAttr);
+    }
+  }
+
   // _Arg and _Retval nodes don't exist in the stored subgraph for the function;
   // they are added by the function body looked up.  Therefore, they don't have
   // core assignments here.
-- 
GitLab


From 2c9369c8d878c913b5dfcd3c27849bcd3d6af6c9 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 4 Oct 2018 06:00:02 -0700
Subject: [PATCH 1119/1357] [TF:XLA] Don't expand complex64 tensors during
 TF/XLA lowering, if possible.

PiperOrigin-RevId: 215724324
---
 tensorflow/compiler/tests/nullary_ops_test.py | 43 +++++++++++++------
 .../compiler/tf2xla/kernels/const_op.cc       | 12 ++++++
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/tests/nullary_ops_test.py b/tensorflow/compiler/tests/nullary_ops_test.py
index f985c5d2d9..38cb2f83ef 100644
--- a/tensorflow/compiler/tests/nullary_ops_test.py
+++ b/tensorflow/compiler/tests/nullary_ops_test.py
@@ -43,18 +43,37 @@ class NullaryOpsTest(xla_test.XLATestCase):
       output.run()
 
   def testConstants(self):
-    constants = [
-        np.float32(42),
-        np.array([], dtype=np.float32),
-        np.array([1, 2], dtype=np.float32),
-        np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
-        np.array([[[1, 2], [3, 4], [5, 6]], [[10, 20], [30, 40], [50, 60]]],
-                 dtype=np.float32),
-        np.array([[[]], [[]]], dtype=np.float32),
-        np.array([[[[1]]]], dtype=np.float32),
-    ]
-    for c in constants:
-      self._testNullary(lambda c=c: constant_op.constant(c), expected=c)
+    for dtype in self.numeric_types:
+      constants = [
+          dtype(42),
+          np.array([], dtype=dtype),
+          np.array([1, 2], dtype=dtype),
+          np.array([7, 7, 7, 7, 7], dtype=dtype),
+          np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype),
+          np.array([[[1, 2], [3, 4], [5, 6]], [[10, 20], [30, 40], [50, 60]]],
+                   dtype=dtype),
+          np.array([[[]], [[]]], dtype=dtype),
+          np.array([[[[1]]]], dtype=dtype),
+      ]
+      for c in constants:
+        self._testNullary(lambda c=c: constant_op.constant(c), expected=c)
+
+  def testComplexConstants(self):
+    for dtype in self.complex_types:
+      constants = [
+          dtype(42 + 3j),
+          np.array([], dtype=dtype),
+          np.ones([50], dtype=dtype) * (3 + 4j),
+          np.array([1j, 2 + 1j], dtype=dtype),
+          np.array([[1, 2j, 7j], [4, 5, 6]], dtype=dtype),
+          np.array([[[1, 2], [3, 4 + 6j], [5, 6]],
+                    [[10 + 7j, 20], [30, 40], [50, 60]]],
+                   dtype=dtype),
+          np.array([[[]], [[]]], dtype=dtype),
+          np.array([[[[1 + 3j]]]], dtype=dtype),
+      ]
+      for c in constants:
+        self._testNullary(lambda c=c: constant_op.constant(c), expected=c)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/compiler/tf2xla/kernels/const_op.cc b/tensorflow/compiler/tf2xla/kernels/const_op.cc
index da8cf3fc6f..2628ef8e24 100644
--- a/tensorflow/compiler/tf2xla/kernels/const_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/const_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/types.pb.h"
 
 namespace tensorflow {
 namespace {
@@ -76,6 +77,17 @@ class ConstOp : public XlaOpKernel {
             return;
           }
           break;
+        case DT_COMPLEX64:
+          if (proto_.scomplex_val_size() == 2) {
+            ctx->SetOutput(
+                0,
+                xla::Broadcast(xla::ConstantR0<xla::complex64>(
+                                   b, xla::complex64(proto_.scomplex_val(0),
+                                                     proto_.scomplex_val(1))),
+                               shape.dim_sizes()));
+            return;
+          }
+          break;
         case DT_INT32:
           if (proto_.int_val_size() == 1) {
             ctx->SetOutput(
-- 
GitLab


From 82ea80b979768c7fe1daa4b50cf054e5a0968f31 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 06:09:42 -0700
Subject: [PATCH 1120/1357] Add option in tf.gradients() to return zero tensors
 for unconnected gradients.

tf.gradients currently returns [NONE] when the gradient of unconnected variables
is required. This backwards compatable change adds in the option to have zero
tensors returned that match the dimensions of the input tensor.

PiperOrigin-RevId: 215725488
---
 tensorflow/python/BUILD                       |  4 ++
 tensorflow/python/ops/gradients.py            |  1 +
 tensorflow/python/ops/gradients_impl.py       | 67 +++++++++++++++++--
 tensorflow/python/ops/gradients_test.py       | 34 ++++++++++
 .../tensorflow.-unconnected-gradients.pbtxt   | 12 ++++
 .../tools/api/golden/v1/tensorflow.pbtxt      |  6 +-
 .../tensorflow.-unconnected-gradients.pbtxt   | 12 ++++
 .../tools/api/golden/v2/tensorflow.pbtxt      |  6 +-
 8 files changed, 135 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index fe81254ef7..da3c56db92 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2152,6 +2152,7 @@ py_library(
         ":array_grad",
         ":array_ops",
         ":bitwise_ops",
+        ":check_ops",
         ":cond_v2_impl",
         ":control_flow_grad",
         ":control_flow_ops",
@@ -2172,8 +2173,11 @@ py_library(
         ":random_grad",
         ":resource_variable_ops",
         ":spectral_grad",
+        ":tensor_array_ops",
+        ":tensor_util",
         ":util",
         ":variable_scope",
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:tape",
diff --git a/tensorflow/python/ops/gradients.py b/tensorflow/python/ops/gradients.py
index 1dc666e78b..794465b10e 100644
--- a/tensorflow/python/ops/gradients.py
+++ b/tensorflow/python/ops/gradients.py
@@ -25,4 +25,5 @@ from tensorflow.python.ops.custom_gradient import custom_gradient
 from tensorflow.python.ops.gradients_impl import AggregationMethod
 from tensorflow.python.ops.gradients_impl import gradients
 from tensorflow.python.ops.gradients_impl import hessians
+from tensorflow.python.ops.gradients_impl import UnconnectedGradients
 # pylint: enable=unused-import
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index 056015d6b6..aac95037dc 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import contextlib
+import enum  # pylint: disable=g-bad-import-order
 import sys
 import warnings
 
@@ -537,6 +538,26 @@ def _Consumers(t, func_graphs):
   return consumers
 
 
+@tf_export("UnconnectedGradients")
+class UnconnectedGradients(enum.Enum):
+  """Controls how gradient computation behaves when y does not depend on x.
+
+  The gradient of y with respect to x can be zero in two different ways: there
+  could be no differentiable path in the graph connecting x to y (and so we can
+  statically prove that the gradient is zero) or it could be that runtime values
+  of tensors in a particular execution lead to a gradient of zero (say, if a
+  relu unit happens to not be activated). To allow you to distinguish between
+  these two cases you can choose what value gets returned for the gradient when
+  there is no path in the graph from x to y:
+
+  * `NONE`: Indicates that [None] will be returned if there is no path from x
+    to y
+  * `ZERO`: Indicates that a zero tensor will be returned in the shape of x.
+  """
+  NONE = "none"
+  ZERO = "zero"
+
+
 @tf_export("gradients")
 def gradients(ys,
               xs,
@@ -545,7 +566,8 @@ def gradients(ys,
               colocate_gradients_with_ops=False,
               gate_gradients=False,
               aggregation_method=None,
-              stop_gradients=None):
+              stop_gradients=None,
+              unconnected_gradients=UnconnectedGradients.NONE):
   """Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`.
 
   `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
@@ -596,6 +618,23 @@ def gradients(ys,
   All integer tensors are considered constant with respect to all `xs`, as if
   they were included in `stop_gradients`.
 
+  `unconnected_gradients` determines the value returned for each x in xs if it
+  is unconnected in the graph to ys. By default this is None to safeguard
+  against errors. MAthematically these gradients are zero which can be requested
+  using the `'zero'` option. `tf.UnconnectedGradients` provides the
+  following options and behaviors:
+
+  ```python
+  a = tf.ones([1, 2])
+  b = tf.ones([3, 1])
+  g1 = tf.gradients([b], [a], unnconnected_gradients='none')
+  sess.run(g1)  # [None]
+
+  g2 = tf.gradients([b], [a], unconnected_gradients='zero')
+  sess.run(g2)  # [array([[0., 0.]], dtype=float32)]
+  ```
+
+
   Args:
     ys: A `Tensor` or list of tensors to be differentiated.
     xs: A `Tensor` or list of tensors to be used for differentiation.
@@ -611,6 +650,10 @@ def gradients(ys,
       Accepted values are constants defined in the class `AggregationMethod`.
     stop_gradients: Optional. A `Tensor` or list of tensors not to differentiate
       through.
+    unconnected_gradients: Optional. Specifies the gradient value returned when
+      the given input tensors are unconnected. Accepted values are constants
+      defined in the class `tf.UnconnectedGradients` and the default value is
+      `none`.
 
   Returns:
     A list of `sum(dy/dx)` for each x in `xs`.
@@ -627,7 +670,8 @@ def gradients(ys,
   # mutating new ops.
   with ops.get_default_graph()._mutation_lock():  # pylint: disable=protected-access
     return _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops,
-                            gate_gradients, aggregation_method, stop_gradients)
+                            gate_gradients, aggregation_method, stop_gradients,
+                            unconnected_gradients)
 
 
 def _GradientsHelper(ys,
@@ -638,6 +682,7 @@ def _GradientsHelper(ys,
                      gate_gradients=False,
                      aggregation_method=None,
                      stop_gradients=None,
+                     unconnected_gradients=UnconnectedGradients.NONE,
                      src_graph=None):
   """Implementation of gradients()."""
   if context.executing_eagerly():
@@ -645,6 +690,11 @@ def _GradientsHelper(ys,
                        "is enabled. Use tf.GradientTape instead.")
   if src_graph is None:
     src_graph = ops.get_default_graph()
+  try:
+    unconnected_gradients = UnconnectedGradients(unconnected_gradients)
+  except ValueError:
+    raise ValueError(
+        "Unknown value for unconnected_gradients: %r" % unconnected_gradients)
 
   # If src_graph is a _FuncGraph (i.e. a function body), gather it and all
   # ancestor graphs. This is necessary for correctly handling captured values.
@@ -856,7 +906,7 @@ def _GradientsHelper(ys,
 
   if loop_state:
     loop_state.PostProcessing()
-  return [_GetGrad(grads, x) for x in xs]
+  return [_GetGrad(grads, x, unconnected_gradients) for x in xs]
 
 
 def _HasAnyNotNoneGrads(grads, op):
@@ -924,12 +974,19 @@ def _SetGrad(grads, t, grad):
     op_grads[t.value_index] = grad
 
 
-def _GetGrad(grads, t):
+def _GetGrad(grads, t, unconnected_gradients):
   """Gets gradient for tensor "t"."""
   op = t.op
   op_grads = grads.get(op)
   if not op_grads:
-    return None
+    if unconnected_gradients == UnconnectedGradients.ZERO:
+      return array_ops.zeros_like(t)
+    elif unconnected_gradients == UnconnectedGradients.NONE:
+      return None
+    else:
+      raise ValueError(
+          "Unknown value for unconnected_gradients: %r" % unconnected_gradients)
+
   t_grad = op_grads[t.value_index]
   assert not isinstance(
       t_grad, list), ("gradients list should have been aggregated by now.")
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 3c9b7a01c7..c93e2493ee 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -350,6 +350,40 @@ class GradientsTest(test_util.TensorFlowTestCase):
       for a, b in zip(npgrad1, npgrad2):
         np.testing.assert_allclose(a, b)
 
+  def testUnconnectedGradientsNoneUnconnectedGradients(self):
+    with ops.Graph().as_default():
+      x = constant(1.0, shape=[2, 2])
+      y = constant(3.0, shape=[3, 1])
+      grad = gradients.gradients(
+          [y], [x], unconnected_gradients="none")
+    self.assertIsNone(grad[0])
+
+  def testUnconnectedGradientsZerosUnconnectedGradients(self):
+    with ops.Graph().as_default():
+      x = constant(1.0, shape=[2, 2])
+      y = constant(3.0, shape=[3, 1])
+      grads = gradients.gradients(
+          [y], [x], unconnected_gradients="zero")
+      with self.cached_session() as sess:
+        self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], sess.run(grads)[0])
+
+  def testUnconnectedGradientsZeroConnectedGradients(self):
+    with ops.Graph().as_default():
+      x = constant(1.0)
+      y = x * 3.0
+      grad = gradients.gradients(
+          [y], [x], unconnected_gradients="zero")
+      with self.cached_session() as sess:
+        self.assertEquals(3.0, sess.run(grad)[0])
+
+  def testUnknownUnconnectedGradientsValueGiven(self):
+    with ops.Graph().as_default():
+      x = constant(1.0)
+      y = constant(1.0)
+      with self.assertRaisesRegexp(
+          ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
+        gradients.gradients([y], [x], unconnected_gradients="nonsense")
+
 
 class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt
new file mode 100644
index 0000000000..c5eb959430
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.UnconnectedGradients"
+tf_class {
+  is_instance: "<enum \'UnconnectedGradients\'>"
+  member {
+    name: "NONE"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+  member {
+    name: "ZERO"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index a268529c1f..c1cc7322f0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -248,6 +248,10 @@ tf_module {
     name: "TextLineReader"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "UnconnectedGradients"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "VERSION"
     mtype: "<type \'str\'>"
@@ -1234,7 +1238,7 @@ tf_module {
   }
   member_method {
     name: "gradients"
-    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\', \'UnconnectedGradients.NONE\'], "
   }
   member_method {
     name: "greater"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt
new file mode 100644
index 0000000000..c5eb959430
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.UnconnectedGradients"
+tf_class {
+  is_instance: "<enum \'UnconnectedGradients\'>"
+  member {
+    name: "NONE"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+  member {
+    name: "ZERO"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 5b3ea75bce..571abc3b19 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -220,6 +220,10 @@ tf_module {
     name: "TensorShape"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "UnconnectedGradients"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "VERSION"
     mtype: "<type \'str\'>"
@@ -1134,7 +1138,7 @@ tf_module {
   }
   member_method {
     name: "gradients"
-    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\', \'UnconnectedGradients.NONE\'], "
   }
   member_method {
     name: "greater"
-- 
GitLab


From 7b56d4ff7679ed59e3ea799054c5dcefd0600ab0 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 4 Oct 2018 08:08:22 -0700
Subject: [PATCH 1121/1357] [TF] Fail fast if there is no CPU kernel during
 constant tensor evaluation. Avoids LOG(ERROR) spam when the Executor is
 unable to find a CPU kernel.

PiperOrigin-RevId: 215738481
---
 .../core/common_runtime/eval_const_tensor.cc   | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tensorflow/core/common_runtime/eval_const_tensor.cc b/tensorflow/core/common_runtime/eval_const_tensor.cc
index c1542f1f57..87749da7af 100644
--- a/tensorflow/core/common_runtime/eval_const_tensor.cc
+++ b/tensorflow/core/common_runtime/eval_const_tensor.cc
@@ -113,6 +113,13 @@ Status TryToInferTensorOutputFromInputShapes(const Edge& edge,
   return Status::OK();
 }
 
+// Returns true if 'node' has a registered CPU kernel.
+bool HasCpuKernel(const Node& node) {
+  return FindKernelDef(DeviceType(DEVICE_CPU), node.def(), /*def=*/nullptr,
+                       /*kernel_class_name=*/nullptr)
+      .ok();
+}
+
 // Extracts the subgraph ending at 'target_node' that is statically computable
 // and inserts into 'out_graph'. If statically computable, 'is_constant_graph'
 // will be set to true.
@@ -136,6 +143,12 @@ Status ExtractConstantSubgraph(
     return Status::OK();
   }
 
+  // Since constant-folding runs on the CPU, do not attempt to constant-fold
+  // operators that have no CPU kernel.
+  if (!HasCpuKernel(target_node)) {
+    return Status::OK();
+  }
+
   // TODO(skyewm): should more of the filtering applied in input nodes below be
   // applied to target_node here?
 
@@ -201,6 +214,11 @@ Status ExtractConstantSubgraph(
       return Status::OK();
     }
 
+    if (!HasCpuKernel(*current_node)) {
+      *is_constant_graph = false;
+      return Status::OK();
+    }
+
     // If there is nothing more to recurse down, see if
     // the generator node is a constant.
     if (current_node->num_inputs() == 0) {
-- 
GitLab


From dcd7dd2d2e1ed7d8c26dd22dbbd2bac269c42e1e Mon Sep 17 00:00:00 2001
From: Alan Chiao <alanchiao@google.com>
Date: Thu, 4 Oct 2018 08:30:22 -0700
Subject: [PATCH 1122/1357] Sparse output fully connected custom op.

PiperOrigin-RevId: 215741296
---
 tensorflow/contrib/lite/kernels/BUILD         |  18 ++
 .../kernels/sparse_output_fully_connected.cc  | 235 ++++++++++++++++++
 .../sparse_output_fully_connected_test.cc     | 158 ++++++++++++
 3 files changed, 411 insertions(+)
 create mode 100644 tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
 create mode 100644 tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index daaf6714cc..95e387814d 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -210,6 +210,7 @@ cc_library(
         "slice.cc",
         "space_to_batch_nd.cc",
         "space_to_depth.cc",
+        "sparse_output_fully_connected.cc",
         "sparse_to_dense.cc",
         "split.cc",
         "squeeze.cc",
@@ -333,6 +334,23 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "sparse_output_fully_connected_test",
+    size = "small",
+    srcs = ["sparse_output_fully_connected_test.cc"],
+    tags = [
+        "no_oss",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
 tf_cc_test(
     name = "activations_test",
     size = "small",
diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
new file mode 100644
index 0000000000..843ed0768c
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
@@ -0,0 +1,235 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// SparseOutputFullyConnected is a fully connected layer that uses a single
+// row in the weights and bias via a lookup.
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace custom {
+namespace sparse_output_fully_connected {
+
+// Input tensors of size {n_batch, n_input}
+constexpr int kInputTensor = 0;
+// Auxiliary input tensor of size { 1 }
+constexpr int kInputLookupTensor = 1;
+
+// Weights tensor of size { n_embeddings , n_input }
+constexpr int kWeightsTensor = 2;
+// Bias tensor of size { n_embeddings }
+constexpr int kBiasTensor = 3;
+
+// Output tensor.
+constexpr int kOutputTensor = 0;
+
+// Temporary tensors.
+enum TemporaryTensor {
+  kInputQuantized = 0,
+  kScalingFactors = 1,
+  kNumTemporaryTensors = 2
+};
+
+// Struct to hold op data.
+struct OpData {
+  int scratch_tensor_index;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  auto* data = new OpData;
+  context->AddTensors(context, /*tensors_to_add=*/kNumTemporaryTensors,
+                      &data->scratch_tensor_index);
+  return data;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<OpData*>(buffer);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 4);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
+  const int n_batch = SizeOfDimension(input, 0);
+  const int n_input = SizeOfDimension(input, 1);
+
+  const TfLiteTensor* lookup = GetInput(context, node, kInputLookupTensor);
+  TF_LITE_ENSURE_EQ(context, lookup->type, kTfLiteInt32);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(lookup), 1);
+  // Only support single lookup.
+  TF_LITE_ENSURE_EQ(context, SizeOfDimension(lookup, 0), 1);
+
+  const TfLiteTensor* weights = GetInput(context, node, kWeightsTensor);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(weights), 2);
+  TF_LITE_ENSURE_EQ(context, SizeOfDimension(weights, 1), n_input);
+
+  const TfLiteTensor* bias = GetInput(context, node, kBiasTensor);
+  TF_LITE_ENSURE_EQ(context, NumElements(bias), SizeOfDimension(weights, 0));
+
+  const bool is_hybrid_op =
+      (weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32);
+
+  if (is_hybrid_op) {
+    TfLiteIntArrayFree(node->temporaries);
+    node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors);
+
+    // Allocate temporary tensors to store quantized values of input.
+    node->temporaries->data[kInputQuantized] = op_data->scratch_tensor_index;
+    TfLiteTensor* input_quantized =
+        GetTemporary(context, node, /*index=*/kInputQuantized);
+    input_quantized->type = kTfLiteUInt8;
+    input_quantized->allocation_type = kTfLiteArenaRw;
+    if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) {
+      TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims);
+      TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized,
+                                                       input_quantized_size));
+    }
+
+    // Tell interpreter to allocate temporary tensors to store scaling factors.
+    node->temporaries->data[kScalingFactors] =
+        op_data->scratch_tensor_index + kScalingFactors;
+    TfLiteTensor* scaling_factors =
+        GetTemporary(context, node, /*index=*/kScalingFactors);
+    scaling_factors->type = kTfLiteFloat32;
+    scaling_factors->allocation_type = kTfLiteArenaRw;
+    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+    scaling_factors_size->data[0] = n_batch;
+    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+      TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
+                                                       scaling_factors_size));
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalFloat(const TfLiteTensor* input, const TfLiteTensor* lookup,
+                       const TfLiteTensor* weights, const TfLiteTensor* bias,
+                       TfLiteTensor* output) {
+  const int n_batch = SizeOfDimension(input, 0);
+  const int n_input = SizeOfDimension(input, 1);
+
+  const float* input_ptr_batch = input->data.f;
+
+  // Initialize pointer to right row according to lookup value.
+  int32 lookup_index = lookup->data.i32[0];
+  const float* weights_ptr = weights->data.f + lookup_index * n_input;
+
+  // Initialize output to bias.
+  if (bias) {
+    float* bias_ptr = bias->data.f + lookup_index;
+    tensor_utils::VectorBatchVectorAssign(bias_ptr, 1, n_batch, output->data.f);
+  } else {
+    tensor_utils::ZeroVector(output->data.f, n_batch * 1);
+  }
+
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      weights_ptr, /*m_rows=*/1, n_input, input_ptr_batch, n_batch,
+      output->data.f, /*result_stride=*/1);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalHybrid(const TfLiteTensor* input, const TfLiteTensor* lookup,
+                        const TfLiteTensor* weights, const TfLiteTensor* bias,
+                        TfLiteTensor* scaling_factors,
+                        TfLiteTensor* input_quantized, TfLiteTensor* output) {
+  const int n_batch = SizeOfDimension(input, 0);
+  const int n_input = SizeOfDimension(input, 1);
+
+  const float* input_ptr_batch = input->data.f;
+  // Initialize the pointer to storage for quantized values and
+  // scaling factors.
+  int8_t* quantized_input_ptr_batch =
+      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+  float* scaling_factors_ptr = scaling_factors->data.f;
+
+  // Initialize pointer to right row according to lookup value.
+  int32 lookup_index = lookup->data.i32[0];
+  int8_t* weights_ptr =
+      reinterpret_cast<int8_t*>(weights->data.uint8) + lookup_index * n_input;
+
+  // Initialize output to bias.
+  if (bias) {
+    float* bias_ptr = bias->data.f + lookup_index;
+    tensor_utils::VectorBatchVectorAssign(bias_ptr, 1, n_batch, output->data.f);
+  } else {
+    tensor_utils::ZeroVector(output->data.f, n_batch * 1);
+  }
+
+  if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) {
+    // Quantize input from float to int8.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_input;
+      tensor_utils::SymmetricQuantizeFloats(
+          input_ptr_batch + offset, n_input, quantized_input_ptr_batch + offset,
+          &unused_min, &unused_max, &scaling_factors_ptr[b]);
+      scaling_factors_ptr[b] *= weights->params.scale;
+    }
+
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        weights_ptr, /*m_rows=*/1, n_input, quantized_input_ptr_batch,
+        scaling_factors_ptr, n_batch, output->data.f, /*result_stride=*/1);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* lookup = GetInput(context, node, kInputLookupTensor);
+  const TfLiteTensor* weights = GetInput(context, node, kWeightsTensor);
+  const TfLiteTensor* bias = GetInput(context, node, kBiasTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  switch (weights->type) {
+    case kTfLiteFloat32: {
+      return EvalFloat(input, lookup, weights, bias, output);
+    }
+    case kTfLiteUInt8: {
+      TfLiteTensor* input_quantized =
+          GetTemporary(context, node, /*index=*/kInputQuantized);
+      TfLiteTensor* scaling_factors =
+          GetTemporary(context, node, /*index=*/kScalingFactors);
+      return EvalHybrid(input, lookup, weights, bias, scaling_factors,
+                        input_quantized, output);
+    }
+    default:
+      context->ReportError(context, "Type %d is not currently supported.",
+                           weights->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace sparse_output_fully_connected
+
+TfLiteRegistration* Register_SPARSE_OUTPUT_FULLY_CONNECTED() {
+  static TfLiteRegistration r = {sparse_output_fully_connected::Init,
+                                 sparse_output_fully_connected::Free,
+                                 sparse_output_fully_connected::Prepare,
+                                 sparse_output_fully_connected::Eval};
+  return &r;
+}
+
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc
new file mode 100644
index 0000000000..365986a5c1
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc
@@ -0,0 +1,158 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Unit test for TFLite sparse output fully connected op.
+#include <iomanip>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+
+namespace tflite {
+
+namespace ops {
+namespace custom {
+
+TfLiteRegistration* Register_SPARSE_OUTPUT_FULLY_CONNECTED();
+
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BaseSparseOutputFullyConnectedOpModel : public SingleOpModel {
+ public:
+  BaseSparseOutputFullyConnectedOpModel(const TensorData& input,
+                                        const TensorData& weights,
+                                        const TensorData& output = {
+                                            TensorType_FLOAT32}) {
+    input_ = AddInput(input);
+    lookup_ = AddInput({TensorType_INT32, {1}});
+    weights_ = AddInput(weights);
+    int bias_size = GetShape(weights_)[0];
+    bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
+    output_ = AddOutput(output);
+
+    // Create empty (required) options map.
+    flexbuffers::Builder fbb;
+    fbb.Map([&]() {});
+    fbb.Finish();
+
+    SetCustomOp("SPARSE_OUTPUT_FULLY_CONNECTED", fbb.GetBuffer(),
+                Register_SPARSE_OUTPUT_FULLY_CONNECTED);
+    BuildInterpreter({GetShape(input_), GetShape(lookup_), GetShape(weights_),
+                      GetShape(bias_)});
+  }
+
+  void SetInput(const std::vector<float>& data) {
+    PopulateTensor(input_, data);
+  }
+
+  void SetLookup(const std::vector<int32>& f) { PopulateTensor(lookup_, f); }
+
+  void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ protected:
+  int input_;
+  int lookup_;
+  int weights_;
+  int bias_;
+  int output_;
+};
+
+class FloatSparseOutputFullyConnectedOpModel
+    : public BaseSparseOutputFullyConnectedOpModel {
+ public:
+  using BaseSparseOutputFullyConnectedOpModel::
+      BaseSparseOutputFullyConnectedOpModel;
+
+  void SetWeights(const std::vector<float>& f) { PopulateTensor(weights_, f); }
+};
+
+class HybridSparseOutputFullyConnectedOpModel
+    : public BaseSparseOutputFullyConnectedOpModel {
+ public:
+  using BaseSparseOutputFullyConnectedOpModel::
+      BaseSparseOutputFullyConnectedOpModel;
+
+  void SetWeights(const std::vector<float>& f) {
+    SymmetricQuantizeAndPopulate(weights_, f);
+  }
+};
+
+TEST(SparseOutputFullyConnectedOpTest, SimpleTestFloat) {
+  FloatSparseOutputFullyConnectedOpModel m({TensorType_FLOAT32, {1, 5}},
+                                           {TensorType_FLOAT32, {3, 5}},
+                                           {TensorType_FLOAT32, {}});
+
+  m.SetInput({-1.0, 0.0, 1.0, 2.0, 3.0});
+
+  m.SetLookup({2});
+
+  m.SetWeights({
+      -1.0, 0.0, 1.0, 2.0, 3.0,  //
+      0.0, 1.0, 2.0, 3.0, 4.0,   //
+      1.0, 2.0, 3.0, 4.0, 5.0,   //
+  });
+
+  m.SetBias({1.0, 2.0, 3.0});
+
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({28}));
+}
+
+TEST(SparseOutputFullyConnectedOpTest, SimpleTestHybrid) {
+  HybridSparseOutputFullyConnectedOpModel m({TensorType_FLOAT32, {1, 5}},
+                                            {TensorType_UINT8, {3, 5}},
+                                            {TensorType_FLOAT32, {}});
+
+  m.SetInput({-1.0, 0.0, 1.0, 2.0, 3.0});
+
+  m.SetLookup({2});
+
+  m.SetWeights({
+      -1.0, 0.0, 1.0, 2.0, 3.0,  //
+      0.0, 1.0, 2.0, 3.0, 4.0,   //
+      1.0, 2.0, 3.0, 4.0, 5.0,   //
+  });
+
+  m.SetBias({1.0, 2.0, 3.0});
+
+  m.Invoke();
+
+  // We get 28.0552 instead of 28.
+  //
+  // Input -> -42, 0, 42, 85, 127 with scale factor of 127/3.
+  // Looked up weights ->  25, 51, 76, 102, 127 with scale factor of 127/5.
+  //
+  // (-42 * 25 + 0 * 51 + 42 * 76 + 85 * 102 + 127 * 127) * (3*5/127^2) + 3.0
+  // gives us the expected result.
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({28}, 0.0553)));
+}
+
+}  // namespace
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
-- 
GitLab


From 80c9eec9b2475630f83a596f77a906c8075f8e6c Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 4 Oct 2018 08:56:45 -0700
Subject: [PATCH 1123/1357] Remove CHECKs from HloInstruction constructors.
 Move these checks to RET_CHECKs in the HloVerifier. Added a new visitor class
 InstructionVerifier inside of hlo_verifier.cc for handling these random
 non-result-shape verifications.

PiperOrigin-RevId: 215745043
---
 .../compiler/xla/service/hlo_instructions.cc  |  12 -
 .../compiler/xla/service/hlo_instructions.h   |   1 -
 .../compiler/xla/service/hlo_verifier.cc      | 456 ++++++++++--------
 .../compiler/xla/service/hlo_verifier.h       |  11 -
 4 files changed, 248 insertions(+), 232 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 68d0979f5c..152d8eacdb 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -643,14 +643,6 @@ HloTransposeInstruction::HloTransposeInstruction(
     absl::Span<const int64> dimensions)
     : HloInstruction(HloOpcode::kTranspose, shape),
       dimensions_(dimensions.begin(), dimensions.end()) {
-  CHECK_EQ(shape.dimensions().size(), dimensions.size());
-  CHECK_EQ(shape.dimensions().size(), operand->shape().dimensions().size());
-  CHECK(std::equal(operand->shape().dimensions().begin(),
-                   operand->shape().dimensions().end(),
-                   Permute(dimensions, shape.dimensions()).begin()))
-      << "shape: " << ShapeUtil::HumanString(shape)
-      << ", operand->shape(): " << ShapeUtil::HumanString(shape)
-      << ", dimensions: {" << StrJoin(dimensions, ", ") << "}";
   AppendOperand(operand);
 }
 
@@ -1491,7 +1483,6 @@ HloParameterInstruction::CloneWithNewOperandsImpl(
 HloGetTupleElementInstruction::HloGetTupleElementInstruction(
     const Shape& shape, HloInstruction* operand, int64 index)
     : HloInstruction(HloOpcode::kGetTupleElement, shape), tuple_index_(index) {
-  CHECK(ShapeUtil::IsTuple(operand->shape()));
   AppendOperand(operand);
 }
 
@@ -1613,9 +1604,6 @@ HloOutfeedInstruction::HloOutfeedInstruction(const Shape& outfeed_shape,
     : HloInstruction(HloOpcode::kOutfeed, ShapeUtil::MakeTokenShape()),
       outfeed_shape_(outfeed_shape),
       outfeed_config_(outfeed_config) {
-  CHECK(ShapeUtil::Compatible(operand->shape(), outfeed_shape))
-      << "Outfeed shape " << outfeed_shape
-      << " must be compatible with operand shape " << operand->shape();
   AppendOperand(operand);
   AppendOperand(token_operand);
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index ab168800f6..e169604072 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -896,7 +896,6 @@ class HloOutfeedInstruction : public HloInstruction {
                                  absl::string_view outfeed_config);
   // Returns the shape for the Outfeed instruction.
   const Shape& outfeed_shape() const {
-    TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(outfeed_shape_));
     return outfeed_shape_;
   }
   // Returns the config for the Outfeed instruction.
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index a7727824fe..b5498bb936 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -763,7 +763,136 @@ Status VerifyHloStructure(HloModule* module) {
   return Status::OK();
 }
 
-Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
+namespace {
+
+// Returns true if the given Shape has a TOKEN shape as any subshape.
+bool ShapeContainsToken(const Shape& shape) {
+  bool contains_token = false;
+  ShapeUtil::ForEachSubshape(
+      shape, [&contains_token](const Shape& subshape, const ShapeIndex&) {
+        if (ShapeUtil::IsToken(subshape)) {
+          contains_token = true;
+        }
+      });
+  return contains_token;
+}
+
+// Verifies that all types entering and exiting the entry computation are
+// legal.
+Status VerifyEntryAndExitShapes(const HloModule& module) {
+  // Tokens cannot be passed as entry parameters.
+  // TODO(b/80000000): Remove this constraint.
+  for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) {
+    HloInstruction* param =
+        module.entry_computation()->parameter_instruction(i);
+    if (ShapeContainsToken(param->shape())) {
+      return InternalError(
+          "Entry parameter %d is or contains a token shape: %s", i,
+          ShapeUtil::HumanString(param->shape()));
+    }
+  }
+  return Status::OK();
+}
+
+// Checks if the given two instructions share the same channel id.
+Status CheckSameChannel(const HloInstruction* instr1,
+                        const HloInstruction* instr2) {
+  if (instr1->channel_id() != instr2->channel_id()) {
+    return InternalError(
+        "Expected to have the same channel id, actual channel ids are: %s "
+        "(%d), %s (%d)",
+        instr1->ToString(), instr1->channel_id(), instr2->ToString(),
+        instr2->channel_id());
+  }
+  return Status::OK();
+}
+
+// Checks if the given two instructions have the same is_host_transfer
+// attribute value. Intsructions must be send/recv instructions or their
+// 'done' variant.
+Status CheckSameIsHostTransfer(const HloInstruction* instr1,
+                               const HloInstruction* instr2) {
+  const HloSendRecvInstruction* send_recv1 =
+      DynCast<const HloSendRecvInstruction>(instr1);
+  const HloSendRecvInstruction* send_recv2 =
+      DynCast<const HloSendRecvInstruction>(instr2);
+  TF_RET_CHECK(send_recv1 != nullptr);
+  TF_RET_CHECK(send_recv2 != nullptr);
+  if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) {
+    return InternalError(
+        "Expected instructions to have the same is-host-transfer property: "
+        "%s, "
+        "%s ",
+        instr1->ToString(), instr2->ToString());
+  }
+  return Status::OK();
+}
+
+// Checks various invariants of send and recv instructions.
+Status VerifySendsAndRecvs(const HloModule& module) {
+  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
+  // Host send/recv instructions must have their own unique channel.
+  auto check_unique_host_channel = [&](const HloInstruction* instruction) {
+    const HloSendRecvInstruction* sendrecv =
+        DynCast<const HloSendRecvInstruction>(instruction);
+    if (sendrecv->is_host_transfer()) {
+      auto it_inserted =
+          host_channels.insert({sendrecv->channel_id(), sendrecv});
+      if (!it_inserted.second) {
+        return FailedPrecondition(
+            "Channel %d is used for multiple host send/recv instructions: "
+            "%s "
+            "and "
+            "%s",
+            sendrecv->channel_id(), sendrecv->ToString(),
+            it_inserted.first->second->ToString());
+      }
+    }
+
+    return Status::OK();
+  };
+
+  // Send/Recv instruction must have a single user: the corresponding
+  // SendDone/RecvDone. with matching channel.
+  for (const HloComputation* computation : module.computations()) {
+    for (const HloInstruction* instruction : computation->instructions()) {
+      switch (instruction->opcode()) {
+        case HloOpcode::kSend: {
+          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
+          TF_RET_CHECK(instruction->users().size() == 1);
+          const HloInstruction* send_done = instruction->users().front();
+          TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
+          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done));
+          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done));
+          break;
+        }
+        case HloOpcode::kRecv: {
+          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
+          TF_RET_CHECK(instruction->users().size() == 1);
+          const HloInstruction* recv_done = instruction->users().front();
+          TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
+          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done));
+          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done));
+          break;
+        }
+        case HloOpcode::kSendDone:
+          TF_RET_CHECK(instruction->operands().size() == 1);
+          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend);
+          break;
+        case HloOpcode::kRecvDone:
+          TF_RET_CHECK(instruction->operands().size() == 1);
+          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+  return Status::OK();
+}
+
+// CHECKs various invariants of a fusion instruction.
+Status CheckFusionInstruction(HloInstruction* fusion) {
   // The parent fusion instruction of the fusion computation must be 'fusion'.
   HloComputation* fused_computation = fusion->fused_instructions_computation();
   if (fusion != fused_computation->FusionInstruction()) {
@@ -866,50 +995,32 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
     }
   }
 
+  TF_RET_CHECK(fusion->called_computations() ==
+               absl::Span<HloComputation* const>(
+                   {fusion->fused_instructions_computation()}))
+      << "Fusion HLO calls computations other than the "
+         "fused_instructions_computation: "
+      << fusion->ToString() << " fusion->fused_instructions_computation(): "
+      << fusion->fused_instructions_computation()->ToString()
+      << " fusion->called_computations(): "
+      << ComputationsToString(fusion->called_computations());
+
+  for (const auto& fused : fusion->fused_instructions()) {
+    TF_RET_CHECK(fused->parent() == fusion->fused_instructions_computation())
+        << "Fused HLO was missing a parent: " << fused->ToString()
+        << " parent: " << fused->parent()
+        << " computation: " << fusion->parent();
+  }
+
   // TODO(b/65423525): We'd like to check that all operands are distinct.
   // This is currently disabled due to the invariant being violated by
   // multi-output fusion.
   return Status::OK();
 }
 
-Status HloVerifier::CheckWhileInstruction(HloInstruction* instruction) {
-  auto* while_cond = instruction->while_condition();
-  auto* while_body = instruction->while_body();
-  if (while_cond->num_parameters() != 1) {
-    return FailedPrecondition(
-        "While condition must have exactly 1 parameter; had %d : %s",
-        while_cond->num_parameters(), while_cond->ToString());
-  }
-  if (while_body->num_parameters() != 1) {
-    return FailedPrecondition(
-        "While body must have exactly 1 parameter; had %d : %s",
-        while_body->num_parameters(), while_body->ToString());
-  }
-  if (instruction->operand_count() != 1) {
-    return FailedPrecondition(
-        "While loop must have exactly one operand; had %d : %s",
-        instruction->operand_count(), instruction->ToString());
-  }
-  return Status::OK();
-}
-
-Status HloVerifier::CheckConditionalInstruction(HloInstruction* instruction) {
-  if (instruction->true_computation()->num_parameters() != 1) {
-    return FailedPrecondition(
-        "True computation %s of %s must have 1 parameter insted of %d",
-        instruction->true_computation()->name(), instruction->ToString(),
-        instruction->true_computation()->num_parameters());
-  }
-  if (instruction->false_computation()->num_parameters() != 1) {
-    return FailedPrecondition(
-        "False computation %s of %s must have 1 parameter insted of %d",
-        instruction->false_computation()->name(), instruction->ToString(),
-        instruction->false_computation()->num_parameters());
-  }
-  return Status::OK();
-}
-
-Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) {
+// Checks that the non-scalar operand shapes are compatible to the output
+// shape, i.e., that there are no implicit broadcasts of size-one dimensions.
+Status CheckElementwiseInstruction(HloInstruction* instruction) {
   const Shape& out_shape = instruction->shape();
   for (HloInstruction* operand : instruction->operands()) {
     const Shape& operand_shape = operand->shape();
@@ -926,133 +1037,114 @@ Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) {
   return Status::OK();
 }
 
-namespace {
+// Visitor which verifies various fields on the HLO instruction. This class does
+// not check result shape as that is checked in the ShapeVerifier.
+class InstructionVerifier : public DfsHloVisitorWithDefault {
+ public:
+  InstructionVerifier() {}
 
-// Returns true if the given Shape has a TOKEN shape as any subshape.
-bool ShapeContainsToken(const Shape& shape) {
-  bool contains_token = false;
-  ShapeUtil::ForEachSubshape(
-      shape, [&contains_token](const Shape& subshape, const ShapeIndex&) {
-        if (ShapeUtil::IsToken(subshape)) {
-          contains_token = true;
-        }
-      });
-  return contains_token;
-}
+  Status DefaultAction(HloInstruction*) override { return Status::OK(); }
 
-// Verifies that all types entering and exiting the entry computation are
-// legal.
-Status VerifyEntryAndExitShapes(const HloModule& module) {
-  // Tokens cannot be passed as entry parameters.
-  // TODO(b/80000000): Remove this constraint.
-  for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) {
-    HloInstruction* param =
-        module.entry_computation()->parameter_instruction(i);
-    if (ShapeContainsToken(param->shape())) {
-      return InternalError(
-          "Entry parameter %d is or contains a token shape: %s", i,
-          ShapeUtil::HumanString(param->shape()));
-    }
+  Status HandleFusion(HloInstruction* fusion) override {
+    return CheckFusionInstruction(fusion);
   }
-  return Status::OK();
-}
 
-// Checks if the given two instructions share the same channel id.
-Status CheckSameChannel(const HloInstruction* instr1,
-                        const HloInstruction* instr2) {
-  if (instr1->channel_id() != instr2->channel_id()) {
-    return InternalError(
-        "Expected to have the same channel id, actual channel ids are: %s "
-        "(%d), %s (%d)",
-        instr1->ToString(), instr1->channel_id(), instr2->ToString(),
-        instr2->channel_id());
+  Status HandleBroadcast(HloInstruction* broadcast) override {
+    // If you see this failure then someone has confused the difference
+    // between the HLO broadcast op, and the UserComputation broadcast
+    // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I
+    // or ComputationLowerer::Visit()
+    TF_RET_CHECK(broadcast->dimensions().size() ==
+                 ShapeUtil::Rank(broadcast->operand(0)->shape()))
+        << "Broadcast HLO (" << broadcast->ToShortString()
+        << ") has invalid number of dimensions: "
+        << broadcast->dimensions().size()
+        << " != " << ShapeUtil::Rank(broadcast->operand(0)->shape());
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-// Checks if the given two instructions have the same is_host_transfer
-// attribute value. Intsructions must be send/recv instructions or their
-// 'done' variant.
-Status CheckSameIsHostTransfer(const HloInstruction* instr1,
-                               const HloInstruction* instr2) {
-  const HloSendRecvInstruction* send_recv1 =
-      DynCast<const HloSendRecvInstruction>(instr1);
-  const HloSendRecvInstruction* send_recv2 =
-      DynCast<const HloSendRecvInstruction>(instr2);
-  TF_RET_CHECK(send_recv1 != nullptr);
-  TF_RET_CHECK(send_recv2 != nullptr);
-  if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) {
-    return InternalError(
-        "Expected instructions to have the same is-host-transfer property: "
-        "%s, "
-        "%s ",
-        instr1->ToString(), instr2->ToString());
+  Status HandleWhile(HloInstruction* xla_while) override {
+    auto* while_cond = xla_while->while_condition();
+    auto* while_body = xla_while->while_body();
+    if (while_cond->num_parameters() != 1) {
+      return FailedPrecondition(
+          "While condition must have exactly 1 parameter; had %d : %s",
+          while_cond->num_parameters(), while_cond->ToString());
+    }
+    if (while_body->num_parameters() != 1) {
+      return FailedPrecondition(
+          "While body must have exactly 1 parameter; had %d : %s",
+          while_body->num_parameters(), while_body->ToString());
+    }
+    if (xla_while->operand_count() != 1) {
+      return FailedPrecondition(
+          "While loop must have exactly one operand; had %d : %s",
+          xla_while->operand_count(), xla_while->ToString());
+    }
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-// Checks various invariants of send and recv instructions.
-Status VerifySendsAndRecvs(const HloModule& module) {
-  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
-  // Host send/recv instructions must have their own unique channel.
-  auto check_unique_host_channel = [&](const HloInstruction* instruction) {
-    const HloSendRecvInstruction* sendrecv =
-        DynCast<const HloSendRecvInstruction>(instruction);
-    if (sendrecv->is_host_transfer()) {
-      auto it_inserted =
-          host_channels.insert({sendrecv->channel_id(), sendrecv});
-      if (!it_inserted.second) {
-        return FailedPrecondition(
-            "Channel %d is used for multiple host send/recv instructions: "
-            "%s "
-            "and "
-            "%s",
-            sendrecv->channel_id(), sendrecv->ToString(),
-            it_inserted.first->second->ToString());
-      }
+  Status HandleConditional(HloInstruction* conditional) override {
+    if (conditional->true_computation()->num_parameters() != 1) {
+      return FailedPrecondition(
+          "True computation %s of %s must have 1 parameter insted of %d",
+          conditional->true_computation()->name(), conditional->ToString(),
+          conditional->true_computation()->num_parameters());
     }
+    if (conditional->false_computation()->num_parameters() != 1) {
+      return FailedPrecondition(
+          "False computation %s of %s must have 1 parameter insted of %d",
+          conditional->false_computation()->name(), conditional->ToString(),
+          conditional->false_computation()->num_parameters());
+    }
+    return Status::OK();
+  }
+
+  Status HandleElementwiseUnary(HloInstruction* instruction) override {
+    return CheckElementwiseInstruction(instruction);
+  }
+
+  Status HandleElementwiseBinary(HloInstruction* instruction) override {
+    return CheckElementwiseInstruction(instruction);
+  }
 
+  Status HandleGetTupleElement(HloInstruction* gte) override {
+    TF_RET_CHECK(ShapeUtil::IsTuple(gte->operand(0)->shape()));
     return Status::OK();
-  };
+  }
 
-  // Send/Recv instruction must have a single user: the corresponding
-  // SendDone/RecvDone. with matching channel.
-  for (const HloComputation* computation : module.computations()) {
-    for (const HloInstruction* instruction : computation->instructions()) {
-      switch (instruction->opcode()) {
-        case HloOpcode::kSend: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
-          TF_RET_CHECK(instruction->users().size() == 1);
-          const HloInstruction* send_done = instruction->users().front();
-          TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
-          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done));
-          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done));
-          break;
-        }
-        case HloOpcode::kRecv: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
-          TF_RET_CHECK(instruction->users().size() == 1);
-          const HloInstruction* recv_done = instruction->users().front();
-          TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
-          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done));
-          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done));
-          break;
-        }
-        case HloOpcode::kSendDone:
-          TF_RET_CHECK(instruction->operands().size() == 1);
-          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend);
-          break;
-        case HloOpcode::kRecvDone:
-          TF_RET_CHECK(instruction->operands().size() == 1);
-          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv);
-          break;
-        default:
-          break;
-      }
-    }
+  Status HandleTranspose(HloInstruction* transpose) override {
+    const Shape& shape = transpose->shape();
+    const HloInstruction* operand = transpose->operand(0);
+    TF_RET_CHECK(shape.dimensions().size() == transpose->dimensions().size());
+    TF_RET_CHECK(shape.dimensions().size() ==
+                 transpose->operand(0)->shape().dimensions().size());
+    TF_RET_CHECK(std::equal(
+        operand->shape().dimensions().begin(),
+        operand->shape().dimensions().end(),
+        Permute(transpose->dimensions(), shape.dimensions()).begin()))
+        << "shape: " << shape << ", operand->shape(): " << shape
+        << ", dimensions: {" << absl::StrJoin(transpose->dimensions(), ", ")
+        << "}";
+    return Status::OK();
   }
-  return Status::OK();
-}
+
+  Status Preprocess(HloInstruction* instruction) override {
+    auto previous = instructions_by_name_.find(instruction->name());
+    TF_RET_CHECK(previous == instructions_by_name_.end())
+        << "HLO has name that is not unique within module:\n"
+        << instruction->ToString()
+        << " in computation: " << instruction->parent()->name()
+        << "\nPrevious HLO with same name:\n"
+        << previous->second->ToString()
+        << " in computation: " << previous->second->parent()->name();
+    instructions_by_name_[instruction->name()] = instruction;
+    return Status::OK();
+  }
+
+ private:
+  absl::flat_hash_map<string, const HloInstruction*> instructions_by_name_;
+};
 
 }  // namespace
 
@@ -1061,65 +1153,13 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
-  absl::flat_hash_map<string, const HloInstruction*> instructions;
 
   for (auto* computation : module->computations()) {
-    for (const auto& instruction : computation->instructions()) {
-      TF_RET_CHECK(instruction->parent() == computation);
-      if (instruction->opcode() == HloOpcode::kFusion) {
-        TF_RETURN_IF_ERROR(CheckFusionInstruction(instruction));
-        TF_RET_CHECK(instruction->called_computations() ==
-                     absl::Span<HloComputation* const>(
-                         {instruction->fused_instructions_computation()}))
-            << "Fusion HLO calls computations other than the "
-               "fused_instructions_computation: "
-            << instruction->ToString()
-            << " instruction->fused_instructions_computation(): "
-            << instruction->fused_instructions_computation()->ToString()
-            << " instruction->called_computations(): "
-            << ComputationsToString(instruction->called_computations());
-
-        for (const auto& fused : instruction->fused_instructions()) {
-          TF_RET_CHECK(fused->parent() ==
-                       instruction->fused_instructions_computation())
-              << "Fused HLO was missing a parent: " << fused->ToString()
-              << " parent: " << fused->parent()
-              << " computation: " << computation;
-        }
-      } else if (instruction->opcode() == HloOpcode::kBroadcast) {
-        // If you see this failure then someone has confused the difference
-        // between the HLO broadcast op, and the UserComputation broadcast
-        // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I
-        // or ComputationLowerer::Visit()
-        TF_RET_CHECK(instruction->dimensions().size() ==
-                     ShapeUtil::Rank(instruction->operand(0)->shape()))
-            << "Broadcast HLO (" << instruction->ToShortString()
-            << ") has invalid number of dimensions: "
-            << instruction->dimensions().size()
-            << " != " << ShapeUtil::Rank(instruction->operand(0)->shape());
-      } else if (instruction->opcode() == HloOpcode::kWhile) {
-        TF_RETURN_IF_ERROR(CheckWhileInstruction(instruction));
-      } else if (instruction->opcode() == HloOpcode::kConditional) {
-        TF_RETURN_IF_ERROR(CheckConditionalInstruction(instruction));
-      } else if (instruction->opcode() !=
-                     HloOpcode::kRng /* Rng operands are always scalar. */
-                 && instruction->IsElementwise()) {
-        TF_RETURN_IF_ERROR(CheckElementwiseInstruction(instruction));
-      }
-
-      auto previous = instructions.find(instruction->name());
-      TF_RET_CHECK(previous == instructions.end())
-          << "HLO has name that is not unique within module:\n"
-          << instruction->ToString()
-          << " in computation: " << computation->name()
-          << "\nPrevious HLO with same name:\n"
-          << previous->second->ToString()
-          << " in computation: " << previous->second->parent()->name();
-      instructions[instruction->name()] = instruction;
-    }
-
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
+
+    InstructionVerifier instruction_verifier;
+    TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
   TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module));
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 0cde4a31af..6d16586c2c 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -172,17 +172,6 @@ class HloVerifier : public HloModulePass {
   StatusOr<bool> Run(HloModule* module) override;
 
  private:
-  // CHECKs various invariants of a fusion instruction.
-  Status CheckFusionInstruction(HloInstruction* fusion) const;
-
-  Status CheckWhileInstruction(HloInstruction* instruction);
-
-  Status CheckConditionalInstruction(HloInstruction* instruction);
-
-  // Checks that the non-scalar operand shapes are compatible to the output
-  // shape, i.e., that there are no implicit broadcasts of size-one dimensions.
-  Status CheckElementwiseInstruction(HloInstruction* instruction);
-
   // Creates a ShapeVerifier that checks that shapes match inferred
   // expectations. This is a factory function because ShapeVerifier,
   // being a DfsHloVisitor, is stateful. We want a clean object
-- 
GitLab


From 3302b4c1fcf2ecd3ae3119cddb16d057235ece07 Mon Sep 17 00:00:00 2001
From: Tingbo Lu <tingbopku@gmail.com>
Date: Fri, 5 Oct 2018 00:02:45 +0800
Subject: [PATCH 1124/1357] Update rnn_cell.py

---
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 59a61af7b3..e8073f8463 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -1110,7 +1110,7 @@ _Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
 class AttentionCellWrapper(rnn_cell_impl.RNNCell):
   """Basic attention cell wrapper.
 
-  Implementation based on https://arxiv.org/abs/1409.0473.
+  Implementation based on https://arxiv.org/abs/1601.06733.
   """
 
   def __init__(self,
-- 
GitLab


From a7e8ad18a61b251ef42c0260dd80a12cea8f268c Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Thu, 4 Oct 2018 09:20:31 -0700
Subject: [PATCH 1125/1357] Experimental interpreter, kernels, and example
 running TensorFlow Lite on a microcontroller

PiperOrigin-RevId: 215748973
---
 .../contrib/lite/experimental/micro/BUILD     |   76 +
 .../contrib/lite/experimental/micro/README.md |  114 ++
 .../lite/experimental/micro/compatibility.h   |   32 +
 .../micro/examples/micro_speech/BUILD         |   28 +
 .../micro_speech/micro_speech_test.cc         |   55 +
 .../micro_speech/tiny_conv_model_data.cc      | 1672 +++++++++++++++++
 .../micro_speech/tiny_conv_model_data.h       |   27 +
 .../lite/experimental/micro/kernels/BUILD     |  107 ++
 .../micro/kernels/all_ops_resolver.cc         |   43 +
 .../micro/kernels/all_ops_resolver.h          |   34 +
 .../micro/kernels/depthwise_conv.cc           |  208 ++
 .../micro/kernels/depthwise_conv_test.cc      |  406 ++++
 .../micro/kernels/fully_connected.cc          |  184 ++
 .../micro/kernels/fully_connected_test.cc     |  643 +++++++
 .../experimental/micro/kernels/softmax.cc     |  213 +++
 .../micro/kernels/softmax_test.cc             |  220 +++
 .../experimental/micro/kernels/test_utils.h   |  170 ++
 .../micro/micro_error_reporter.cc             |   78 +
 .../experimental/micro/micro_error_reporter.h |   34 +
 .../micro/micro_error_reporter_test.cc        |   25 +
 .../experimental/micro/micro_interpreter.cc   |  310 +++
 .../experimental/micro/micro_interpreter.h    |   71 +
 .../micro/micro_interpreter_test.cc           |  197 ++
 .../micro/micro_mutable_op_resolver.cc        |   80 +
 .../micro/micro_mutable_op_resolver.h         |   46 +
 .../micro/micro_mutable_op_resolver_test.cc   |   83 +
 .../micro/simple_tensor_allocator.cc          |  149 ++
 .../micro/simple_tensor_allocator.h           |   51 +
 .../micro/simple_tensor_allocator_test.cc     |  144 ++
 .../lite/experimental/micro/testing/BUILD     |   17 +
 .../micro/testing/Dockerfile.bluepill         |   21 +
 .../experimental/micro/testing/bluepill.resc  |   36 +
 .../experimental/micro/testing/micro_test.bzl |   64 +
 .../experimental/micro/testing/micro_test.h   |  138 ++
 .../micro/testing/test_bluepill_binary.sh     |   54 +
 .../micro/testing/test_linux_binary.sh        |   39 +
 .../experimental/micro/tools/make/Makefile    |  166 ++
 .../micro/tools/make/download_dependencies.sh |   73 +
 .../tools/make/targets/bluepill_makefile.inc  |   65 +
 .../lite/kernels/internal/compatibility.h     |   23 +
 .../contrib/lite/kernels/internal/types.h     |    3 +-
 41 files changed, 6197 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/lite/experimental/micro/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/README.md
 create mode 100644 tensorflow/contrib/lite/experimental/micro/compatibility.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_interpreter.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
 create mode 100755 tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
 create mode 100755 tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
 create mode 100644 tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
 create mode 100755 tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh
 create mode 100644 tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc

diff --git a/tensorflow/contrib/lite/experimental/micro/BUILD b/tensorflow/contrib/lite/experimental/micro/BUILD
new file mode 100644
index 0000000000..df1036bc8b
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/BUILD
@@ -0,0 +1,76 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
+cc_library(
+    name = "micro_framework",
+    srcs = [
+        "micro_error_reporter.cc",
+        "micro_interpreter.cc",
+        "micro_mutable_op_resolver.cc",
+        "simple_tensor_allocator.cc",
+    ],
+    hdrs = [
+        "compatibility.h",
+        "micro_error_reporter.h",
+        "micro_interpreter.h",
+        "micro_mutable_op_resolver.h",
+        "simple_tensor_allocator.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite:schema_fbs_version",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/schema:schema_fbs",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "micro_error_reporter_test",
+    srcs = [
+        "micro_error_reporter_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "micro_mutable_op_resolver_test",
+    srcs = [
+        "micro_mutable_op_resolver_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "micro_interpreter_test",
+    srcs = [
+        "micro_interpreter_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "simple_tensor_allocator_test",
+    srcs = [
+        "simple_tensor_allocator_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/README.md b/tensorflow/contrib/lite/experimental/micro/README.md
new file mode 100644
index 0000000000..414cafde4d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/README.md
@@ -0,0 +1,114 @@
+# TensorFlow Lite for Microcontrollers
+
+This an experimental port of TensorFlow Lite aimed at micro controllers and other devices with only kilobytes of memory. It doesn't require any operating system support, any standard C or C++ libraries, or dynamic memory allocation, so it's designed to be portable even to 'bare metal' systems. The core runtime fits in 16KB on a Cortex M3, and with enough operators to run a speech keyword detection model, takes up a total of 22KB.
+
+The design goals are for the framework to be:
+
+- **Readable**: We want embedded software engineers to be able to understand what's required to run ML inference without having to study research papers. We've tried to keep the code base small, modular, and have reference implementations of all operations to help with this.
+
+- **Easy to modify**: We know that there are a lot of different platforms and requirements in the embedded world, and we don't expect to cover all of them in one framework. Instead, we're hoping that it can be a good starting point for developers to build on top of to meet their own needs. For example, we tried to make it easy to replace the implementations of key computational operators that are often crucial for performance, without having to touch the data flow and other runtime code. We want it to make more sense to use our workflow to handle things like model import and less-important operations, and customize the parts that matter, rather than having to reimplement everything in your own engine.
+
+- **Well-tested**: If you're modifying code, you need to know if your changes are correct. Having an easy way to test lets you develop much faster. To help there, we've written tests for all the components, and we've made sure that the tests can be run on almost any platform, with no dependencies apart from the ability to log text to a debug console somewhere. We also provide an easy way to run all the tests on-device as part of an automated test framework, and we use qemu/Renode emulation so that tests can be run even without physical devices present.
+
+- **Easy to integrate**: We want to be as open a system as possible, and use the best code available for each platform. To do that, we're going to rely on projects like [CMSIS-NN](https://www.keil.com/pack/doc/CMSIS/NN/html/index.html), [uTensor](https://github.com/uTensor/uTensor), and other vendor libraries to handle as much performance-critical code as possible. We know that there are an increasing number of options to accelerate neural networks on microcontrollers, so we're aiming to be a good host for deploying those hardware technologies too.
+
+- **Compatible**: We're using the same file schema, interpreter API, and kernel interface as regular TensorFlow Lite, so we leverage the large existing set of tools, documentation, and examples for the project. The biggest barrier to deploying ML models is getting them from a training environment into a form that's easy to run inference on, so we see reusing this rich ecosystem as being crucial to being easily usable. We also hope to integrate this experimental work back into the main codebase in the future.
+
+To meet those goals, we've made some tradeoffs:
+
+- **Simple C++**: To help with readability, our code is written in a modern version of C++, but we generally treat it as a "better C", rather relying on more complex features such as template meta-programming. As mentioned earlier, we avoid any use of dynamic memory allocation (new/delete) or the standard C/C++ libraries, so we believe this should still be fairly portable. It does mean that some older devices with C-only toolchains won't be supported, but we're hoping that the reference operator implementations (which are simple C-like functions) can still be useful in those cases. The interfaces are also designed to be C-only, so it should be possible to integrate the resulting library with pure C projects.
+
+- **Interpreted**: Code generation is a popular pattern for embedded code, because it gives standalone code that's easy to modify and step through, but we've chosen to go with an interpreted approach. In our internal microcontroller work we've found that using an extremely stripped-down interpreter with almost no dependencies gives us a lot of the same advantages, but is easier to maintain. For example, when new updates come out for the underlying library, you can just merge your local modifications in a single step, rather than having to regenerate new code and then patch in any changes you subsequently made. The coarse granularity of the interpreted primitives means that each operation call typically takes hundreds of thousands of instruction cycles at least, so we don't see noticeable performance gains from avoiding what's essentially a single switch statement at the interpreter level to call each operation. We're still working on improving the packaging though, for example we're considering having the ability to snapshot all the source files and headers used for a particular model, being able to compile the code and data together as a library, and then access it through a minimal set of C interface calls which hide the underlying complexity.
+
+- **Flatbuffers**: We represent our models using [the standard flatbuffer schema used by the rest of TensorFlow Lite](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/schema/schema.fbs), with the difference that we always keep it in read-only program memory (typically flash) rather than relying on having a file system to read it from. This is a good fit because flatbuffer's serialized format is designed to be mapped into memory without requiring any extra memory allocations or modifications to access it. All of the functions to read model values work directly on the serialized bytes, and large sections of data like weights are directly accessible as sequential C-style arrays of their data type, with no strides or unpacking needed. We do get a lot of value from using flatbuffers, but there is a cost in complexity. The flat buffer library code is all inline [inside the main headers](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/schema/schema_generated.h), but it isn't straightforward to inspect their implementations, and the model data structures aren't easy to comprehend from the debugger. The header for the schema itself also has to be periodically updated when new information is added to the file format, though we try to handle that transparently for most developers by checking in a pre-generated version.
+
+- **Code Duplication**: Some of the code in this prototype largely duplicates the logic in other parts of the TensorFlow Lite code base, for example the operator wrappers. We've tried to keep share as much as we can between the two interpreters, but there are some assumptions built into the original runtime that make this difficult. We'll be working on modularizing the main interpreter so that we can move to an entirely shared system.
+
+This initial preview release is designed to get early feedback, and is not intended to be a final product. It only includes enough operations to run a simple keyword recognition model, and the implementations are not optimized. We're hoping this will be a good way to get feedback and collaborate to improve the framework.
+
+## Getting Started
+
+Building requires a Linux or OS X machine.
+
+ - Open a terminal
+ - Download the TensorFlow source with `git clone https://github.com/tensorflow`
+ - Enter the source root directory by running `cd tensorflow`
+ - Download the dependencies by running `tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh`. This may take a few minutes
+ - Build and test the library with `make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile test`
+
+You should see a series of compilation steps, followed by "~~~ALL TESTS PASSED~~~" for the various tests of the code that it will run. If there's an error, you should get an informative message from make about what went wrong.
+
+These tests are all built as simple binaries with few dependencies, so you can run them manually. For example, here's how to run the depthwise convolution test, and its output:
+
+```
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/linux_x86_64/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test
+
+Testing SimpleTest
+Testing SimpleTestQuantized
+Testing SimpleTestRelu
+Testing SimpleTestReluQuantized
+4/4 tests passed
+~ALL TESTS PASSED~~~
+```
+
+Looking at the [depthwise_conv_test.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc) code, you'll see a sequence that looks like this:
+
+```
+...
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+...
+}
+...
+TF_LITE_MICRO_TESTS_END
+```
+
+These macros work a lot like [the Google test framework](https://github.com/google/googletest), but they don't require any dependencies and just write results to stderr, rather than aborting the program. If all the tests pass, then "~~~ALL TESTS PASSED~~~" is output, and the test harness that runs the binary during the make process knows that everything ran correctly. If there's an error, the lack of the expected string lets the harness know that the test failed.
+
+So, why are we running tests in this complicated way? So far, we've been building binaries that run locally on the Mac OS or Linux machine you're building on, but this approach becomes important when we're targeting simple micro controller devices.
+
+## Building for the "Blue Pill" STM32F103
+
+The goal of this library is to enable machine learning on resource-constrained micro controllers and DSPs, and as part of that we've targeted the ["Blue Pill" STM32F103-compatible development board](https://github.com/google/googletest) as a cheap and popular platform. It only has 20KB of RAM and 64KB of flash, so it's a good device to ensure we can run efficiently on small chips.
+
+It's fairly easy to [buy and wire up a physical board](https://github.com/google/stm32_bare_lib#wiring-up-your-blue-pill), but even if you don't have an actual device, the [Renode project](https://renode.io/) makes it easy to run a faithful emulation on your desktop machine. You'll need [Docker](https://www.docker.com/) installed, but once you have that set up, try running the following command:
+
+`make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test`
+
+You should see a similar set of outputs as you did in the previous section, with the addition of some extra Docker logging messages. These are because we're using Docker to run the Renode micro controller emulation tool, and the tests themselves are being run on a simulated STM32F103 device. The communication channels between an embedded device and the host are quite limited, so the test harness looks at the output of the debug log to see if tests have passed, just as it did in the previous section. This makes it a very flexible way to run cross-platform tests, even when a platform has no operating system facilities, as long as it can output debugging text logs.
+
+To understand what's happening here, try running the same depthwise convolution test, but through the emulated device test harness, with the following command:
+
+```
+tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh \
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test
+
+```
+
+You should see output that looks something like this:
+
+```
+Sending build context to Docker daemon   21.5kB
+Step 1/2 : FROM antmicro/renode:latest
+ ---> 1b670a243e8f
+Step 2/2 : LABEL maintainer="Pete Warden <petewarden@google.com>"
+ ---> Using cache
+ ---> 3afcd410846d
+Successfully built 3afcd410846d
+Successfully tagged renode_bluepill:latest
+LOGS:
+...
+03:27:32.4340 [INFO] machine-0: Machine started.
+03:27:32.4790 [DEBUG] cpu.uartSemihosting: [+0.22s host +0s virt 0s virt from start] Testing SimpleTest
+03:27:32.4812 [DEBUG] cpu.uartSemihosting: [+2.21ms host +0s virt 0s virt from start]   Testing SimpleTestQuantized
+03:27:32.4833 [DEBUG] cpu.uartSemihosting: [+2.14ms host +0s virt 0s virt from start]   Testing SimpleTestRelu
+03:27:32.4834 [DEBUG] cpu.uartSemihosting: [+0.18ms host +0s virt 0s virt from start]   Testing SimpleTestReluQuantized
+03:27:32.4838 [DEBUG] cpu.uartSemihosting: [+0.4ms host +0s virt 0s virt from start]   4/4 tests passed
+03:27:32.4839 [DEBUG] cpu.uartSemihosting: [+41µs host +0s virt 0s virt from start]   ~~~ALL TESTS PASSED~~~
+03:27:32.4839 [DEBUG] cpu.uartSemihosting: [+5µs host +0s virt 0s virt from start]   
+...
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test: PASS
+```
+
+There's a lot of output here, but you should be able to see that the same tests that were covered when we ran locally on the development machine show up in the debug logs here, along with the magic string "~~~ALL TESTS PASSED~~~". This is the exact same code as before, just compiled and run on the STM32F103 rather than your desktop. We hope that the simplicity of this testing approach will help make adding support for new platforms as easy as possible.
diff --git a/tensorflow/contrib/lite/experimental/micro/compatibility.h b/tensorflow/contrib/lite/experimental/micro/compatibility.h
new file mode 100644
index 0000000000..4f0fd9f312
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/compatibility.h
@@ -0,0 +1,32 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_
+
+// C++ will automatically create class-specific delete operators for virtual
+// objects, which by default call the global delete function. For embedded
+// applications we want to avoid this, and won't be calling new/delete on these
+// objects, so we need to override the default implementation with one that does
+// nothing to avoid linking in ::delete().
+// This macro needs to be included in all subclasses of a virtual base class in
+// the private section.
+#ifdef TF_LITE_STATIC_MEMORY
+#define TF_LITE_REMOVE_VIRTUAL_DELETE \
+  void operator delete(void* p) {}
+#else
+#define TF_LITE_REMOVE_VIRTUAL_DELETE
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
new file mode 100644
index 0000000000..447c584387
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
@@ -0,0 +1,28 @@
+# Description:
+#   TensorFlow Lite microcontroller example.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
+tflite_micro_cc_test(
+    name = "micro_speech_test",
+    srcs = [
+        "micro_speech_test.cc",
+        "tiny_conv_model_data.cc",
+        "tiny_conv_model_data.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite:schema_fbs_version",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/kernels:all_ops_resolver",
+        "//tensorflow/contrib/lite/experimental/micro/kernels:micro_ops",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+        "//tensorflow/contrib/lite/schema:schema_fbs",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
new file mode 100644
index 0000000000..86cd056a72
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+#include "tensorflow/contrib/lite/version.h"
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestInvoke) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data);
+  if (model->version() != TFLITE_SCHEMA_VERSION) {
+    error_reporter->Report(
+        "Model provided is schema version %d not equal "
+        "to supported version %d.\n",
+        model->version(), TFLITE_SCHEMA_VERSION);
+  }
+  tflite::ops::micro::AllOpsResolver resolver;
+
+  const int tensor_arena_size = 10 * 1024;
+  uint8_t tensor_arena[tensor_arena_size];
+  tflite::SimpleTensorAllocator tensor_allocator(tensor_arena,
+                                                 tensor_arena_size);
+
+  tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator,
+                                       error_reporter);
+  TfLiteStatus invoke_status = interpreter.Invoke();
+  if (invoke_status != kTfLiteOk) {
+    error_reporter->Report("Invoke failed\n");
+  }
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status);
+
+  error_reporter->Report("Ran successfully\n");
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
new file mode 100644
index 0000000000..f1f9e0e219
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
@@ -0,0 +1,1672 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Automatically created from a TensorFlow Lite flatbuffer using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc
+
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
+
+const unsigned char g_tiny_conv_model_data[] = {
+    0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
+    0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x4d, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
+    0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00,
+    0x04, 0x03, 0x00, 0x00, 0xfc, 0x02, 0x00, 0x00, 0xf4, 0x02, 0x00, 0x00,
+    0x64, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff,
+    0x16, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xd7, 0x02, 0x00, 0x00, 0x2f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0xb3, 0xff, 0xff,
+    0x46, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0xab, 0x00, 0x00, 0x00, 0x1e, 0xff, 0xff, 0xff, 0xed, 0xff, 0xff, 0xff,
+    0x4a, 0x00, 0x00, 0x00, 0x62, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x80, 0x02, 0x00, 0x00, 0xce, 0xad, 0xaf, 0x3c, 0xc8, 0xe9, 0xb0, 0x83,
+    0xa1, 0xbf, 0xb2, 0xb1, 0xab, 0xd0, 0xa7, 0x53, 0xa5, 0xe9, 0xb5, 0xac,
+    0xa2, 0xd3, 0xc4, 0x9e, 0x8b, 0xb2, 0x64, 0xb3, 0x9d, 0xa2, 0xae, 0xa6,
+    0xd5, 0xbe, 0x43, 0x9f, 0x9c, 0x54, 0xb5, 0xa8, 0x49, 0x78, 0x86, 0xa2,
+    0xa3, 0x55, 0x35, 0x96, 0x3d, 0x7f, 0xe2, 0xb5, 0xb0, 0x47, 0x28, 0xa9,
+    0x9d, 0xbb, 0xd6, 0xff, 0xb7, 0x79, 0x63, 0xb5, 0xaf, 0xa7, 0xab, 0x7e,
+    0xbc, 0xc7, 0xa0, 0xc3, 0xb1, 0xb6, 0xb2, 0xa1, 0xc2, 0xbb, 0x79, 0x57,
+    0xbe, 0xc1, 0xb7, 0xb0, 0x6b, 0xb7, 0xa5, 0x75, 0x97, 0xb8, 0xe7, 0xac,
+    0xad, 0x7e, 0xb1, 0x9b, 0xc3, 0xba, 0x6b, 0xa2, 0x7f, 0x58, 0xb9, 0x7a,
+    0x4c, 0x91, 0x74, 0x9e, 0xa7, 0x3d, 0xc2, 0x94, 0x75, 0xa1, 0xa4, 0xac,
+    0xab, 0x45, 0x2e, 0xb4, 0xb6, 0xbf, 0xc1, 0xdb, 0xaf, 0x6c, 0x67, 0xb1,
+    0xa9, 0xa6, 0xa8, 0xca, 0xc2, 0xc4, 0xb9, 0xbf, 0xb4, 0xb9, 0xaa, 0x9d,
+    0x9f, 0xb9, 0xb2, 0x71, 0xb2, 0xca, 0xbe, 0xaf, 0x5f, 0xbc, 0xa0, 0x5b,
+    0xa8, 0xb4, 0xa4, 0xa8, 0xd8, 0x69, 0xb7, 0x8a, 0xbc, 0xb8, 0xaf, 0x9c,
+    0x7c, 0x5d, 0xb3, 0x6b, 0x49, 0x95, 0x64, 0xa0, 0xa2, 0x49, 0xcb, 0x87,
+    0xa5, 0xb5, 0xa1, 0xb2, 0xa3, 0x40, 0x6d, 0x9f, 0xc5, 0xb6, 0xbb, 0xd4,
+    0x9c, 0x6d, 0x69, 0xa9, 0xa8, 0x91, 0xad, 0xb8, 0xd2, 0xc6, 0xaf, 0xb8,
+    0xac, 0xa9, 0xa2, 0xa7, 0x60, 0xa6, 0xa1, 0xc9, 0xb8, 0xd6, 0xcf, 0xb1,
+    0x56, 0xb4, 0xac, 0x40, 0xae, 0xbd, 0xbf, 0xa2, 0x54, 0x72, 0x9b, 0x8c,
+    0xc2, 0xb5, 0xc2, 0x9b, 0x64, 0x6d, 0xb4, 0x62, 0x4e, 0x9b, 0x6c, 0xa6,
+    0x8f, 0x4c, 0xca, 0x95, 0xb6, 0xbf, 0x92, 0xae, 0x9c, 0x49, 0xae, 0xb2,
+    0xc0, 0xb6, 0xbc, 0xd1, 0xa4, 0x7b, 0x64, 0xa0, 0xa6, 0x81, 0xac, 0xa6,
+    0xbd, 0xc8, 0xbc, 0xae, 0xaa, 0x9e, 0x61, 0xb1, 0x57, 0xac, 0xbf, 0xbf,
+    0xbb, 0xe0, 0xa6, 0xae, 0x47, 0xc9, 0xbc, 0x57, 0xb0, 0xb5, 0xc7, 0x98,
+    0xf4, 0x93, 0xb6, 0x70, 0xc3, 0xb3, 0xca, 0xab, 0x77, 0x9a, 0xac, 0x45,
+    0x5c, 0x9e, 0x9a, 0xa9, 0x9b, 0x35, 0xc0, 0x6f, 0xc6, 0xc7, 0x91, 0xb4,
+    0xa8, 0x3c, 0xce, 0xb8, 0xad, 0xb9, 0xb5, 0xdd, 0x9c, 0x6d, 0xbf, 0x91,
+    0xb2, 0x7d, 0xa0, 0xaf, 0x9f, 0xbd, 0xb9, 0xcf, 0x9b, 0x5d, 0x3f, 0xac,
+    0x64, 0xae, 0xaf, 0xb8, 0xbc, 0xb8, 0x86, 0xb5, 0x36, 0xcf, 0xb4, 0xa9,
+    0xad, 0xcd, 0xdb, 0xa4, 0x68, 0xa6, 0xa4, 0x67, 0xc8, 0xb7, 0xe5, 0xa4,
+    0x76, 0xb8, 0xa8, 0x28, 0x6b, 0xa5, 0xba, 0xad, 0x9f, 0x3a, 0xa5, 0x42,
+    0xc5, 0xb0, 0x88, 0xad, 0xa5, 0x4d, 0xea, 0x8a, 0xb8, 0xb5, 0xb3, 0xd9,
+    0xa0, 0x77, 0xbb, 0x92, 0x9e, 0x80, 0xbd, 0xbd, 0x6d, 0xcc, 0xab, 0x99,
+    0x88, 0x58, 0x4d, 0xb0, 0x6c, 0xbc, 0x96, 0xbd, 0xae, 0xab, 0x5b, 0xac,
+    0x2f, 0xc3, 0x9a, 0xbe, 0xac, 0xb3, 0x84, 0x9b, 0xe3, 0xaf, 0x95, 0x6b,
+    0xc2, 0xb5, 0xca, 0xb7, 0x4e, 0xbc, 0x9d, 0x24, 0x75, 0xa9, 0xd2, 0xae,
+    0xa0, 0x2b, 0x90, 0x34, 0xd1, 0xb5, 0x96, 0xae, 0xaa, 0x4d, 0xc1, 0xa3,
+    0xb1, 0xb4, 0xaa, 0xd2, 0x9c, 0x7d, 0xc0, 0x91, 0x91, 0x7a, 0xb8, 0x83,
+    0x44, 0xcb, 0xaf, 0x9b, 0x6b, 0x5b, 0x75, 0xb2, 0x62, 0xb6, 0xaa, 0xcb,
+    0x99, 0xa8, 0x63, 0xae, 0x24, 0xc7, 0x8a, 0xbe, 0xa9, 0xb6, 0xa0, 0xa1,
+    0x41, 0xac, 0x84, 0xb5, 0xb9, 0xb3, 0x9b, 0xad, 0x77, 0xbf, 0xa8, 0x7e,
+    0x82, 0xb9, 0xbe, 0xaa, 0xa3, 0x47, 0x6d, 0xb5, 0xc3, 0xb1, 0xbf, 0xa7,
+    0xb1, 0x57, 0x75, 0xb5, 0xb0, 0xb6, 0xb9, 0xce, 0xa4, 0x86, 0xb0, 0xa4,
+    0x98, 0x80, 0xc5, 0x3e, 0x90, 0xca, 0x9b, 0xa2, 0x5a, 0x50, 0xc5, 0xa5,
+    0xad, 0xc1, 0x9c, 0x91, 0x83, 0x8f, 0x21, 0xab, 0xac, 0xba, 0x70, 0xb4,
+    0xae, 0x85, 0x7e, 0xa7, 0xbd, 0xba, 0x7c, 0xb2, 0xb5, 0xb2, 0x7e, 0xb3,
+    0xc3, 0xcd, 0x82, 0xac, 0x9b, 0xb3, 0xa6, 0xb0, 0xbc, 0x6f, 0x52, 0xb9,
+    0xbf, 0xb1, 0xa6, 0xa4, 0xc1, 0x7a, 0x90, 0xc0, 0xae, 0xab, 0x94, 0xd8,
+    0xab, 0xa4, 0x98, 0xbb, 0x8b, 0x86, 0x94, 0x01, 0xad, 0xe7, 0xb1, 0x9b,
+    0x57, 0x48, 0xc1, 0x88, 0xbf, 0xcc, 0xb4, 0x4b, 0x62, 0x8b, 0x48, 0xa7,
+    0xbe, 0xe1, 0x80, 0xa6, 0xb3, 0x64, 0xaa, 0xa4, 0xcf, 0xba, 0x6d, 0xa6,
+    0xb8, 0xa0, 0x8f, 0xb3, 0xce, 0xc3, 0x87, 0xb2, 0xa0, 0xc0, 0x78, 0xb0,
+    0xb9, 0xaa, 0x40, 0xb8, 0xd8, 0xa3, 0x9a, 0xaa, 0xcc, 0xa2, 0x9f, 0xb9,
+    0xbe, 0xc2, 0x89, 0xd6, 0xc6, 0x9c, 0xa3, 0xc7, 0x94, 0xb6, 0xff, 0xff,
+    0x98, 0xb6, 0xff, 0xff, 0xf6, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0xc0, 0x44, 0x00, 0x00, 0x4a, 0x4d, 0x59, 0x60, 0x5a, 0x45, 0x3d, 0x50,
+    0x4a, 0x43, 0x3d, 0x59, 0x3e, 0x49, 0x4a, 0x59, 0x45, 0x44, 0x41, 0x5d,
+    0x50, 0x2f, 0x4e, 0x34, 0x46, 0x48, 0x41, 0x4a, 0x4c, 0x3b, 0x4b, 0x3e,
+    0x49, 0x49, 0x43, 0x4b, 0x3e, 0x49, 0x47, 0x41, 0x3e, 0x4a, 0x46, 0x43,
+    0x41, 0x43, 0x47, 0x49, 0x4a, 0x4c, 0x46, 0x58, 0x3f, 0x4c, 0x4b, 0x4c,
+    0x4d, 0x4b, 0x45, 0x52, 0x45, 0x42, 0x52, 0x52, 0x48, 0x40, 0x46, 0x5f,
+    0x4c, 0x41, 0x47, 0x48, 0x48, 0x4c, 0x43, 0x61, 0x50, 0x4b, 0x49, 0x49,
+    0x46, 0x3f, 0x40, 0x67, 0x40, 0x4d, 0x45, 0x40, 0x40, 0x45, 0x47, 0x56,
+    0x44, 0x3a, 0x4a, 0x4c, 0x52, 0x48, 0x46, 0x50, 0x4b, 0x44, 0x51, 0x45,
+    0x40, 0x45, 0x45, 0x48, 0x4e, 0x4e, 0x43, 0x48, 0x44, 0x4b, 0x45, 0x4a,
+    0x53, 0x45, 0x4a, 0x4b, 0x3f, 0x43, 0x45, 0x53, 0x4d, 0x43, 0x46, 0x3f,
+    0x47, 0x4e, 0x51, 0x50, 0x48, 0x4f, 0x4f, 0x4a, 0x4a, 0x4e, 0x45, 0x4e,
+    0x46, 0x41, 0x4a, 0x46, 0x45, 0x47, 0x45, 0x4b, 0x50, 0x4c, 0x46, 0x45,
+    0x41, 0x47, 0x41, 0x47, 0x46, 0x4f, 0x3f, 0x4f, 0x4a, 0x51, 0x4f, 0x53,
+    0x54, 0x48, 0x51, 0x43, 0x4b, 0x48, 0x4d, 0x46, 0x48, 0x4f, 0x49, 0x44,
+    0x43, 0x53, 0x50, 0x59, 0x56, 0x3d, 0x45, 0x44, 0x48, 0x38, 0x3b, 0x5f,
+    0x39, 0x43, 0x43, 0x52, 0x46, 0x3e, 0x43, 0x58, 0x43, 0x1e, 0x50, 0x3c,
+    0x46, 0x4b, 0x46, 0x50, 0x3c, 0x37, 0x4c, 0x47, 0x47, 0x4b, 0x47, 0x54,
+    0x43, 0x3e, 0x47, 0x4f, 0x4b, 0x41, 0x53, 0x50, 0x42, 0x46, 0x4f, 0x4b,
+    0x4e, 0x3f, 0x49, 0x52, 0x4a, 0x4a, 0x49, 0x53, 0x52, 0x47, 0x52, 0x5a,
+    0x40, 0x42, 0x4d, 0x4b, 0x50, 0x43, 0x49, 0x59, 0x47, 0x4c, 0x4d, 0x50,
+    0x4e, 0x3c, 0x44, 0x61, 0x51, 0x49, 0x49, 0x46, 0x49, 0x47, 0x4b, 0x5a,
+    0x45, 0x4b, 0x43, 0x40, 0x44, 0x52, 0x4d, 0x54, 0x49, 0x47, 0x44, 0x48,
+    0x46, 0x48, 0x3e, 0x40, 0x45, 0x4f, 0x4d, 0x4b, 0x4c, 0x40, 0x3d, 0x40,
+    0x3e, 0x48, 0x50, 0x4e, 0x4c, 0x42, 0x48, 0x4b, 0x3d, 0x48, 0x4b, 0x44,
+    0x52, 0x4b, 0x49, 0x4f, 0x49, 0x3f, 0x47, 0x43, 0x4d, 0x3f, 0x53, 0x4e,
+    0x4a, 0x4f, 0x4e, 0x4e, 0x53, 0x42, 0x46, 0x4c, 0x44, 0x4c, 0x46, 0x51,
+    0x45, 0x48, 0x4a, 0x50, 0x47, 0x41, 0x45, 0x54, 0x4a, 0x44, 0x50, 0x49,
+    0x48, 0x50, 0x51, 0x4b, 0x50, 0x4c, 0x4a, 0x49, 0x43, 0x47, 0x50, 0x4a,
+    0x4d, 0x4c, 0x4e, 0x49, 0x42, 0x50, 0x52, 0x48, 0x45, 0x5a, 0x4e, 0x55,
+    0x51, 0x3d, 0x3d, 0x4d, 0x42, 0x32, 0x36, 0x64, 0x39, 0x4c, 0x41, 0x48,
+    0x44, 0x35, 0x43, 0x56, 0x47, 0x1e, 0x4b, 0x3e, 0x47, 0x3f, 0x43, 0x52,
+    0x51, 0x34, 0x41, 0x4d, 0x3e, 0x41, 0x41, 0x48, 0x3c, 0x4b, 0x45, 0x3b,
+    0x40, 0x43, 0x4c, 0x46, 0x46, 0x47, 0x3e, 0x4f, 0x4b, 0x48, 0x42, 0x47,
+    0x4e, 0x3e, 0x49, 0x47, 0x43, 0x43, 0x4e, 0x52, 0x51, 0x45, 0x3f, 0x54,
+    0x46, 0x44, 0x48, 0x5d, 0x3e, 0x4a, 0x47, 0x52, 0x53, 0x3a, 0x4f, 0x5d,
+    0x41, 0x4c, 0x48, 0x51, 0x43, 0x4b, 0x4b, 0x67, 0x48, 0x4b, 0x45, 0x4d,
+    0x4b, 0x43, 0x4a, 0x54, 0x4c, 0x46, 0x43, 0x4a, 0x4d, 0x43, 0x4c, 0x47,
+    0x4a, 0x48, 0x4d, 0x42, 0x4d, 0x48, 0x3f, 0x43, 0x4c, 0x44, 0x4e, 0x4c,
+    0x40, 0x45, 0x4b, 0x48, 0x47, 0x47, 0x3e, 0x4c, 0x52, 0x41, 0x44, 0x4e,
+    0x4d, 0x44, 0x49, 0x4d, 0x3d, 0x45, 0x48, 0x4f, 0x4c, 0x4a, 0x55, 0x51,
+    0x4d, 0x4c, 0x45, 0x4e, 0x46, 0x45, 0x44, 0x49, 0x4e, 0x44, 0x40, 0x48,
+    0x49, 0x44, 0x53, 0x51, 0x42, 0x41, 0x51, 0x49, 0x51, 0x45, 0x51, 0x3f,
+    0x4b, 0x3f, 0x52, 0x3c, 0x50, 0x4d, 0x4f, 0x4b, 0x44, 0x4f, 0x40, 0x52,
+    0x49, 0x4a, 0x50, 0x3f, 0x3d, 0x54, 0x4c, 0x53, 0x52, 0x45, 0x41, 0x43,
+    0x47, 0x2d, 0x40, 0x63, 0x3a, 0x51, 0x43, 0x4e, 0x40, 0x2b, 0x36, 0x5b,
+    0x4b, 0x12, 0x4d, 0x35, 0x4b, 0x3f, 0x44, 0x4a, 0x46, 0x31, 0x54, 0x48,
+    0x43, 0x42, 0x3d, 0x51, 0x41, 0x45, 0x49, 0x4b, 0x47, 0x49, 0x3d, 0x3e,
+    0x46, 0x3d, 0x4d, 0x48, 0x3d, 0x45, 0x48, 0x4b, 0x49, 0x52, 0x44, 0x4c,
+    0x45, 0x44, 0x45, 0x49, 0x50, 0x48, 0x45, 0x46, 0x45, 0x44, 0x52, 0x55,
+    0x46, 0x45, 0x4b, 0x3d, 0x42, 0x4a, 0x3e, 0x57, 0x48, 0x4b, 0x3c, 0x42,
+    0x4a, 0x46, 0x47, 0x6c, 0x54, 0x4b, 0x41, 0x49, 0x49, 0x50, 0x43, 0x56,
+    0x44, 0x43, 0x4d, 0x3e, 0x44, 0x41, 0x47, 0x40, 0x4a, 0x4b, 0x4d, 0x4d,
+    0x3e, 0x46, 0x45, 0x47, 0x3e, 0x42, 0x4a, 0x45, 0x49, 0x3d, 0x3f, 0x43,
+    0x40, 0x44, 0x47, 0x4a, 0x45, 0x4d, 0x4b, 0x4c, 0x43, 0x40, 0x3d, 0x3e,
+    0x4c, 0x4c, 0x42, 0x4d, 0x48, 0x4d, 0x49, 0x42, 0x51, 0x51, 0x4c, 0x4b,
+    0x53, 0x4f, 0x48, 0x4d, 0x40, 0x46, 0x45, 0x4b, 0x47, 0x47, 0x4b, 0x46,
+    0x54, 0x42, 0x42, 0x46, 0x46, 0x4a, 0x4c, 0x55, 0x3f, 0x3c, 0x52, 0x4b,
+    0x4b, 0x4d, 0x4e, 0x48, 0x53, 0x4c, 0x4b, 0x42, 0x52, 0x54, 0x50, 0x4b,
+    0x40, 0x5f, 0x58, 0x53, 0x50, 0x42, 0x35, 0x48, 0x39, 0x24, 0x3c, 0x5e,
+    0x41, 0x50, 0x3c, 0x51, 0x42, 0x26, 0x42, 0x56, 0x41, 0x0c, 0x3e, 0x3d,
+    0x48, 0x3e, 0x50, 0x4b, 0x3a, 0x2c, 0x43, 0x3d, 0x48, 0x3e, 0x43, 0x48,
+    0x4c, 0x3f, 0x4a, 0x3e, 0x51, 0x4a, 0x4f, 0x40, 0x47, 0x43, 0x50, 0x4c,
+    0x43, 0x4d, 0x3f, 0x45, 0x4d, 0x3e, 0x4c, 0x44, 0x51, 0x47, 0x4b, 0x51,
+    0x45, 0x49, 0x44, 0x3f, 0x46, 0x46, 0x46, 0x57, 0x49, 0x4c, 0x49, 0x4e,
+    0x47, 0x4c, 0x47, 0x5e, 0x43, 0x46, 0x45, 0x4b, 0x52, 0x49, 0x45, 0x5f,
+    0x47, 0x41, 0x46, 0x43, 0x4f, 0x3b, 0x43, 0x51, 0x46, 0x53, 0x4a, 0x4e,
+    0x4b, 0x43, 0x4e, 0x40, 0x48, 0x49, 0x46, 0x3f, 0x48, 0x50, 0x4b, 0x41,
+    0x4a, 0x47, 0x4b, 0x3d, 0x46, 0x49, 0x4b, 0x43, 0x43, 0x42, 0x3e, 0x47,
+    0x47, 0x4a, 0x45, 0x46, 0x51, 0x48, 0x51, 0x4e, 0x3f, 0x50, 0x44, 0x4b,
+    0x4d, 0x4e, 0x44, 0x4d, 0x3d, 0x49, 0x4a, 0x4e, 0x42, 0x51, 0x43, 0x42,
+    0x46, 0x3e, 0x48, 0x4b, 0x4f, 0x50, 0x3d, 0x48, 0x4c, 0x4f, 0x46, 0x44,
+    0x44, 0x48, 0x42, 0x4b, 0x48, 0x41, 0x43, 0x46, 0x4d, 0x49, 0x4f, 0x43,
+    0x41, 0x44, 0x3f, 0x3d, 0x45, 0x4f, 0x45, 0x41, 0x40, 0x58, 0x4f, 0x54,
+    0x5b, 0x4b, 0x3a, 0x47, 0x3d, 0x28, 0x3d, 0x57, 0x3e, 0x51, 0x3f, 0x47,
+    0x3f, 0x2e, 0x3e, 0x54, 0x4e, 0x0b, 0x41, 0x3d, 0x3b, 0x3d, 0x43, 0x47,
+    0x47, 0x28, 0x4d, 0x43, 0x43, 0x3b, 0x4e, 0x4a, 0x4d, 0x42, 0x51, 0x46,
+    0x4f, 0x3d, 0x4c, 0x3a, 0x49, 0x49, 0x4a, 0x43, 0x42, 0x4b, 0x47, 0x42,
+    0x42, 0x49, 0x3f, 0x4d, 0x46, 0x4a, 0x49, 0x4e, 0x42, 0x3c, 0x4a, 0x41,
+    0x4c, 0x40, 0x4d, 0x5a, 0x49, 0x46, 0x51, 0x46, 0x4b, 0x4c, 0x46, 0x62,
+    0x45, 0x42, 0x51, 0x4e, 0x4d, 0x3e, 0x4d, 0x5b, 0x4d, 0x43, 0x45, 0x50,
+    0x4b, 0x40, 0x50, 0x53, 0x4f, 0x4f, 0x51, 0x53, 0x46, 0x41, 0x4e, 0x3a,
+    0x4b, 0x47, 0x3f, 0x3e, 0x4d, 0x48, 0x53, 0x3f, 0x45, 0x42, 0x4c, 0x45,
+    0x55, 0x4c, 0x4b, 0x39, 0x4a, 0x45, 0x48, 0x4d, 0x47, 0x40, 0x48, 0x4f,
+    0x4d, 0x49, 0x3e, 0x41, 0x46, 0x4e, 0x40, 0x49, 0x4b, 0x47, 0x4c, 0x45,
+    0x44, 0x51, 0x4f, 0x4b, 0x48, 0x49, 0x44, 0x41, 0x43, 0x46, 0x51, 0x45,
+    0x40, 0x48, 0x4b, 0x42, 0x44, 0x4f, 0x53, 0x4d, 0x44, 0x46, 0x4e, 0x4c,
+    0x48, 0x50, 0x41, 0x45, 0x42, 0x48, 0x4d, 0x4d, 0x47, 0x45, 0x41, 0x45,
+    0x48, 0x58, 0x4e, 0x46, 0x43, 0x53, 0x57, 0x52, 0x5e, 0x42, 0x45, 0x4e,
+    0x39, 0x24, 0x32, 0x56, 0x47, 0x56, 0x49, 0x52, 0x46, 0x26, 0x3a, 0x51,
+    0x4b, 0x05, 0x3e, 0x43, 0x3f, 0x38, 0x4d, 0x4b, 0x4f, 0x27, 0x51, 0x46,
+    0x47, 0x41, 0x4a, 0x47, 0x4a, 0x3e, 0x44, 0x51, 0x3f, 0x3a, 0x43, 0x46,
+    0x4d, 0x49, 0x46, 0x52, 0x43, 0x48, 0x49, 0x3e, 0x47, 0x46, 0x4a, 0x4d,
+    0x47, 0x46, 0x52, 0x50, 0x44, 0x48, 0x4c, 0x47, 0x45, 0x41, 0x49, 0x5b,
+    0x4d, 0x4b, 0x47, 0x4c, 0x4a, 0x47, 0x45, 0x5b, 0x49, 0x46, 0x52, 0x47,
+    0x47, 0x3d, 0x55, 0x59, 0x40, 0x4b, 0x3e, 0x50, 0x42, 0x43, 0x40, 0x4f,
+    0x48, 0x3f, 0x47, 0x53, 0x4d, 0x44, 0x4e, 0x37, 0x4c, 0x43, 0x51, 0x4d,
+    0x46, 0x4e, 0x40, 0x41, 0x52, 0x44, 0x43, 0x4a, 0x50, 0x48, 0x47, 0x42,
+    0x48, 0x45, 0x50, 0x4d, 0x42, 0x52, 0x44, 0x43, 0x45, 0x43, 0x4c, 0x4d,
+    0x44, 0x51, 0x47, 0x48, 0x51, 0x4f, 0x48, 0x45, 0x49, 0x4a, 0x3e, 0x43,
+    0x4d, 0x4e, 0x4e, 0x46, 0x54, 0x4d, 0x49, 0x4d, 0x47, 0x46, 0x4b, 0x41,
+    0x4a, 0x49, 0x44, 0x45, 0x4d, 0x3e, 0x53, 0x50, 0x47, 0x4d, 0x4e, 0x43,
+    0x4f, 0x45, 0x4e, 0x4a, 0x47, 0x49, 0x4c, 0x4c, 0x4d, 0x54, 0x42, 0x4c,
+    0x43, 0x5d, 0x59, 0x50, 0x5e, 0x4b, 0x44, 0x43, 0x3c, 0x25, 0x31, 0x5b,
+    0x46, 0x5a, 0x50, 0x4d, 0x41, 0x2a, 0x41, 0x4f, 0x44, 0x00, 0x41, 0x3d,
+    0x43, 0x4b, 0x47, 0x45, 0x4e, 0x2e, 0x44, 0x46, 0x53, 0x3d, 0x43, 0x41,
+    0x44, 0x46, 0x49, 0x42, 0x45, 0x4f, 0x4d, 0x3a, 0x43, 0x3c, 0x47, 0x53,
+    0x43, 0x4e, 0x3f, 0x41, 0x4d, 0x50, 0x4b, 0x4c, 0x51, 0x47, 0x53, 0x4f,
+    0x45, 0x4a, 0x44, 0x45, 0x41, 0x46, 0x47, 0x50, 0x51, 0x3f, 0x3e, 0x41,
+    0x48, 0x45, 0x46, 0x5d, 0x45, 0x4a, 0x4c, 0x46, 0x4a, 0x49, 0x50, 0x51,
+    0x51, 0x4c, 0x4f, 0x47, 0x47, 0x42, 0x45, 0x47, 0x4e, 0x48, 0x46, 0x40,
+    0x45, 0x46, 0x4d, 0x3b, 0x4d, 0x52, 0x4c, 0x51, 0x49, 0x51, 0x47, 0x3d,
+    0x4d, 0x42, 0x4f, 0x4e, 0x43, 0x43, 0x45, 0x3a, 0x42, 0x50, 0x4c, 0x4a,
+    0x41, 0x53, 0x4c, 0x45, 0x51, 0x3f, 0x54, 0x43, 0x4b, 0x54, 0x56, 0x4d,
+    0x4f, 0x4a, 0x50, 0x4b, 0x44, 0x45, 0x4f, 0x4f, 0x47, 0x3e, 0x50, 0x4f,
+    0x4b, 0x48, 0x4d, 0x49, 0x55, 0x4d, 0x45, 0x4d, 0x4a, 0x53, 0x43, 0x46,
+    0x4c, 0x45, 0x41, 0x46, 0x49, 0x49, 0x4f, 0x4b, 0x49, 0x50, 0x52, 0x49,
+    0x41, 0x54, 0x44, 0x4c, 0x44, 0x63, 0x4a, 0x49, 0x40, 0x59, 0x52, 0x52,
+    0x59, 0x3f, 0x3e, 0x3e, 0x40, 0x25, 0x3c, 0x5c, 0x4f, 0x57, 0x44, 0x50,
+    0x41, 0x2a, 0x48, 0x4f, 0x43, 0x08, 0x47, 0x43, 0x49, 0x48, 0x4d, 0x49,
+    0x46, 0x2b, 0x48, 0x44, 0x4e, 0x47, 0x47, 0x43, 0x44, 0x3e, 0x4a, 0x52,
+    0x3f, 0x4a, 0x53, 0x42, 0x49, 0x47, 0x4c, 0x50, 0x43, 0x46, 0x46, 0x3c,
+    0x4c, 0x47, 0x4e, 0x4d, 0x42, 0x41, 0x53, 0x52, 0x4f, 0x40, 0x54, 0x50,
+    0x46, 0x43, 0x50, 0x56, 0x51, 0x48, 0x48, 0x48, 0x49, 0x39, 0x47, 0x5e,
+    0x4e, 0x4b, 0x4f, 0x4e, 0x43, 0x45, 0x42, 0x58, 0x4a, 0x3b, 0x48, 0x4d,
+    0x43, 0x3e, 0x4b, 0x43, 0x3c, 0x45, 0x46, 0x4b, 0x42, 0x42, 0x4e, 0x3d,
+    0x4b, 0x4e, 0x51, 0x52, 0x48, 0x3e, 0x4b, 0x3f, 0x4c, 0x4a, 0x4b, 0x4c,
+    0x46, 0x48, 0x3e, 0x48, 0x47, 0x4d, 0x4a, 0x46, 0x49, 0x4d, 0x4a, 0x48,
+    0x50, 0x4b, 0x40, 0x48, 0x4b, 0x52, 0x46, 0x50, 0x4f, 0x3e, 0x42, 0x44,
+    0x44, 0x42, 0x43, 0x49, 0x4f, 0x4f, 0x46, 0x42, 0x4a, 0x54, 0x42, 0x48,
+    0x50, 0x4f, 0x4f, 0x4c, 0x4c, 0x47, 0x52, 0x49, 0x4c, 0x45, 0x4a, 0x4d,
+    0x4a, 0x41, 0x47, 0x4a, 0x4d, 0x4a, 0x4c, 0x46, 0x51, 0x44, 0x4b, 0x49,
+    0x53, 0x5e, 0x45, 0x4a, 0x3b, 0x57, 0x5a, 0x4c, 0x59, 0x43, 0x3e, 0x4a,
+    0x3e, 0x20, 0x36, 0x5d, 0x47, 0x5b, 0x3f, 0x55, 0x3e, 0x24, 0x41, 0x52,
+    0x3f, 0x01, 0x49, 0x41, 0x40, 0x45, 0x42, 0x46, 0x49, 0x2a, 0x47, 0x40,
+    0x44, 0x3f, 0x42, 0x47, 0x4e, 0x42, 0x4b, 0x3d, 0x45, 0x4c, 0x47, 0x3d,
+    0x4c, 0x44, 0x48, 0x43, 0x43, 0x41, 0x4a, 0x3d, 0x48, 0x4b, 0x46, 0x4e,
+    0x4c, 0x45, 0x48, 0x4d, 0x54, 0x4d, 0x3e, 0x46, 0x3e, 0x47, 0x44, 0x4e,
+    0x48, 0x49, 0x53, 0x4b, 0x41, 0x45, 0x4c, 0x57, 0x52, 0x4e, 0x40, 0x48,
+    0x4d, 0x43, 0x44, 0x5a, 0x4a, 0x4c, 0x48, 0x4d, 0x3f, 0x52, 0x41, 0x50,
+    0x4a, 0x47, 0x3e, 0x43, 0x4c, 0x42, 0x48, 0x3e, 0x4f, 0x4b, 0x41, 0x43,
+    0x49, 0x40, 0x43, 0x36, 0x3f, 0x4b, 0x49, 0x49, 0x51, 0x43, 0x48, 0x40,
+    0x4c, 0x51, 0x4d, 0x4a, 0x49, 0x3f, 0x4b, 0x3d, 0x4f, 0x4b, 0x43, 0x4d,
+    0x46, 0x40, 0x46, 0x4d, 0x49, 0x48, 0x4d, 0x4c, 0x52, 0x4c, 0x49, 0x4f,
+    0x53, 0x40, 0x49, 0x53, 0x47, 0x43, 0x4c, 0x45, 0x42, 0x48, 0x42, 0x4e,
+    0x49, 0x43, 0x42, 0x40, 0x4f, 0x46, 0x50, 0x47, 0x51, 0x4a, 0x52, 0x45,
+    0x4c, 0x51, 0x48, 0x47, 0x40, 0x41, 0x52, 0x4f, 0x41, 0x5a, 0x53, 0x47,
+    0x42, 0x5f, 0x55, 0x4f, 0x53, 0x3e, 0x41, 0x49, 0x3d, 0x20, 0x3f, 0x54,
+    0x42, 0x5b, 0x49, 0x4d, 0x3d, 0x22, 0x3e, 0x48, 0x41, 0x01, 0x4c, 0x3d,
+    0x43, 0x4a, 0x46, 0x43, 0x4f, 0x2b, 0x49, 0x46, 0x47, 0x4a, 0x51, 0x3d,
+    0x4b, 0x44, 0x49, 0x41, 0x47, 0x47, 0x45, 0x3a, 0x44, 0x42, 0x40, 0x52,
+    0x46, 0x51, 0x4a, 0x41, 0x4a, 0x52, 0x44, 0x52, 0x4a, 0x40, 0x46, 0x45,
+    0x52, 0x4c, 0x4e, 0x42, 0x42, 0x48, 0x40, 0x4f, 0x4b, 0x4f, 0x51, 0x4c,
+    0x4e, 0x48, 0x4a, 0x5a, 0x46, 0x3d, 0x41, 0x50, 0x52, 0x4c, 0x44, 0x53,
+    0x4b, 0x4d, 0x4f, 0x49, 0x47, 0x4c, 0x48, 0x45, 0x48, 0x4a, 0x44, 0x4e,
+    0x4c, 0x40, 0x4d, 0x35, 0x40, 0x49, 0x4a, 0x51, 0x49, 0x4a, 0x46, 0x36,
+    0x46, 0x47, 0x4a, 0x4c, 0x40, 0x4e, 0x42, 0x38, 0x48, 0x45, 0x42, 0x49,
+    0x54, 0x4c, 0x3f, 0x49, 0x4c, 0x39, 0x47, 0x45, 0x4e, 0x4a, 0x42, 0x44,
+    0x4b, 0x53, 0x43, 0x40, 0x46, 0x51, 0x3d, 0x50, 0x4b, 0x43, 0x4a, 0x4c,
+    0x55, 0x54, 0x4a, 0x43, 0x48, 0x40, 0x44, 0x3f, 0x47, 0x45, 0x3e, 0x41,
+    0x49, 0x44, 0x4d, 0x49, 0x44, 0x41, 0x4a, 0x50, 0x44, 0x49, 0x4d, 0x47,
+    0x4a, 0x49, 0x46, 0x49, 0x40, 0x5b, 0x4d, 0x51, 0x47, 0x57, 0x49, 0x4f,
+    0x56, 0x46, 0x3a, 0x4a, 0x3e, 0x22, 0x36, 0x5c, 0x44, 0x56, 0x46, 0x48,
+    0x3a, 0x2d, 0x4a, 0x48, 0x44, 0x17, 0x41, 0x42, 0x40, 0x3d, 0x4e, 0x45,
+    0x40, 0x26, 0x43, 0x52, 0x41, 0x40, 0x44, 0x4a, 0x48, 0x42, 0x4f, 0x47,
+    0x46, 0x4c, 0x4a, 0x3b, 0x42, 0x3e, 0x3e, 0x49, 0x4e, 0x44, 0x4e, 0x49,
+    0x47, 0x41, 0x47, 0x44, 0x4c, 0x45, 0x4d, 0x49, 0x49, 0x48, 0x55, 0x3d,
+    0x4a, 0x45, 0x50, 0x4f, 0x46, 0x4c, 0x46, 0x45, 0x3c, 0x51, 0x4b, 0x5a,
+    0x46, 0x47, 0x54, 0x41, 0x44, 0x40, 0x4f, 0x53, 0x49, 0x46, 0x46, 0x48,
+    0x44, 0x40, 0x50, 0x49, 0x49, 0x43, 0x50, 0x41, 0x52, 0x4b, 0x46, 0x3e,
+    0x44, 0x44, 0x46, 0x4e, 0x47, 0x48, 0x3e, 0x38, 0x4c, 0x4c, 0x48, 0x43,
+    0x48, 0x3e, 0x50, 0x42, 0x51, 0x50, 0x4a, 0x48, 0x4a, 0x42, 0x44, 0x3d,
+    0x4a, 0x46, 0x46, 0x3d, 0x4e, 0x47, 0x3d, 0x48, 0x4c, 0x46, 0x50, 0x4d,
+    0x49, 0x45, 0x4a, 0x4c, 0x4c, 0x47, 0x4a, 0x42, 0x4a, 0x45, 0x50, 0x52,
+    0x4b, 0x4d, 0x4c, 0x43, 0x42, 0x53, 0x41, 0x45, 0x49, 0x41, 0x4b, 0x4c,
+    0x52, 0x54, 0x4b, 0x41, 0x48, 0x4c, 0x47, 0x4c, 0x41, 0x49, 0x4a, 0x47,
+    0x50, 0x59, 0x4e, 0x45, 0x3c, 0x5d, 0x53, 0x4c, 0x5a, 0x3e, 0x3a, 0x51,
+    0x3a, 0x22, 0x35, 0x59, 0x40, 0x5a, 0x43, 0x46, 0x41, 0x32, 0x44, 0x4b,
+    0x47, 0x04, 0x4c, 0x3a, 0x4a, 0x49, 0x48, 0x3d, 0x45, 0x2b, 0x50, 0x41,
+    0x3e, 0x44, 0x4f, 0x43, 0x4a, 0x3f, 0x48, 0x4b, 0x53, 0x49, 0x4b, 0x38,
+    0x44, 0x40, 0x48, 0x4c, 0x41, 0x3f, 0x47, 0x3e, 0x47, 0x49, 0x45, 0x42,
+    0x43, 0x3e, 0x46, 0x44, 0x53, 0x4d, 0x48, 0x44, 0x45, 0x42, 0x43, 0x53,
+    0x55, 0x49, 0x4d, 0x4b, 0x45, 0x44, 0x47, 0x5f, 0x48, 0x44, 0x4a, 0x48,
+    0x45, 0x4d, 0x4f, 0x5e, 0x4e, 0x46, 0x49, 0x49, 0x4d, 0x49, 0x44, 0x48,
+    0x4d, 0x41, 0x50, 0x48, 0x3d, 0x3f, 0x4d, 0x38, 0x46, 0x4a, 0x50, 0x4a,
+    0x45, 0x3e, 0x43, 0x36, 0x42, 0x48, 0x53, 0x54, 0x49, 0x43, 0x4b, 0x3a,
+    0x45, 0x48, 0x50, 0x45, 0x4a, 0x4c, 0x4a, 0x4d, 0x43, 0x4c, 0x55, 0x4e,
+    0x4c, 0x42, 0x45, 0x52, 0x52, 0x45, 0x46, 0x40, 0x54, 0x4c, 0x3d, 0x4e,
+    0x49, 0x4e, 0x44, 0x47, 0x45, 0x48, 0x4b, 0x50, 0x49, 0x4b, 0x44, 0x4b,
+    0x4f, 0x49, 0x47, 0x47, 0x53, 0x3f, 0x4b, 0x42, 0x45, 0x3e, 0x4d, 0x4d,
+    0x48, 0x51, 0x45, 0x40, 0x43, 0x43, 0x4e, 0x44, 0x51, 0x55, 0x4a, 0x3e,
+    0x45, 0x55, 0x58, 0x50, 0x50, 0x38, 0x44, 0x4f, 0x3b, 0x23, 0x3c, 0x55,
+    0x3c, 0x54, 0x49, 0x42, 0x44, 0x2f, 0x3e, 0x47, 0x42, 0x01, 0x42, 0x37,
+    0x3f, 0x42, 0x45, 0x45, 0x47, 0x2a, 0x52, 0x4b, 0x45, 0x3c, 0x47, 0x44,
+    0x44, 0x40, 0x50, 0x53, 0x48, 0x42, 0x4d, 0x36, 0x50, 0x3d, 0x49, 0x44,
+    0x4f, 0x4c, 0x4a, 0x42, 0x4d, 0x3e, 0x3d, 0x3f, 0x4e, 0x44, 0x4d, 0x4e,
+    0x54, 0x3d, 0x42, 0x46, 0x49, 0x47, 0x4b, 0x53, 0x45, 0x46, 0x47, 0x4a,
+    0x45, 0x3d, 0x4a, 0x5f, 0x51, 0x3e, 0x45, 0x45, 0x44, 0x3a, 0x4d, 0x57,
+    0x45, 0x47, 0x4d, 0x45, 0x4e, 0x4b, 0x51, 0x48, 0x4b, 0x4a, 0x3c, 0x4e,
+    0x51, 0x41, 0x4d, 0x36, 0x47, 0x4a, 0x46, 0x51, 0x4e, 0x4c, 0x52, 0x41,
+    0x55, 0x47, 0x41, 0x47, 0x4d, 0x47, 0x4b, 0x3d, 0x4a, 0x4a, 0x46, 0x49,
+    0x4d, 0x48, 0x46, 0x46, 0x4d, 0x52, 0x52, 0x48, 0x49, 0x3f, 0x4b, 0x4e,
+    0x4c, 0x49, 0x45, 0x47, 0x41, 0x4b, 0x44, 0x48, 0x52, 0x4b, 0x53, 0x44,
+    0x46, 0x4e, 0x44, 0x49, 0x52, 0x50, 0x46, 0x4b, 0x44, 0x43, 0x50, 0x49,
+    0x4a, 0x53, 0x45, 0x49, 0x52, 0x3f, 0x4a, 0x4e, 0x49, 0x4c, 0x4d, 0x4d,
+    0x40, 0x40, 0x3f, 0x4a, 0x47, 0x56, 0x51, 0x43, 0x40, 0x5a, 0x58, 0x52,
+    0x4f, 0x3d, 0x3d, 0x45, 0x38, 0x29, 0x33, 0x59, 0x45, 0x54, 0x3c, 0x42,
+    0x3f, 0x27, 0x3e, 0x49, 0x48, 0x06, 0x4a, 0x3f, 0x41, 0x49, 0x4c, 0x48,
+    0x46, 0x2b, 0x4a, 0x4f, 0x44, 0x46, 0x4c, 0x46, 0x4a, 0x3b, 0x4d, 0x4a,
+    0x40, 0x41, 0x45, 0x38, 0x51, 0x39, 0x46, 0x46, 0x41, 0x51, 0x4e, 0x41,
+    0x49, 0x44, 0x48, 0x4a, 0x4b, 0x46, 0x47, 0x46, 0x4a, 0x4c, 0x47, 0x48,
+    0x3d, 0x42, 0x50, 0x4f, 0x50, 0x4a, 0x4a, 0x48, 0x4a, 0x45, 0x45, 0x61,
+    0x4a, 0x4c, 0x49, 0x3d, 0x4b, 0x4a, 0x4a, 0x5a, 0x48, 0x49, 0x50, 0x4f,
+    0x42, 0x48, 0x3e, 0x44, 0x43, 0x3b, 0x4f, 0x54, 0x4b, 0x4a, 0x47, 0x31,
+    0x4a, 0x49, 0x47, 0x4e, 0x48, 0x48, 0x46, 0x42, 0x4a, 0x45, 0x4c, 0x49,
+    0x4b, 0x4e, 0x53, 0x43, 0x4c, 0x49, 0x4f, 0x4b, 0x46, 0x4c, 0x4b, 0x4e,
+    0x51, 0x4b, 0x49, 0x52, 0x44, 0x55, 0x45, 0x49, 0x4b, 0x4a, 0x50, 0x4c,
+    0x4d, 0x4a, 0x4b, 0x48, 0x41, 0x46, 0x47, 0x43, 0x4b, 0x3f, 0x54, 0x4a,
+    0x46, 0x49, 0x51, 0x48, 0x4e, 0x4a, 0x41, 0x52, 0x52, 0x4e, 0x53, 0x47,
+    0x42, 0x48, 0x43, 0x44, 0x54, 0x51, 0x40, 0x49, 0x4c, 0x48, 0x49, 0x44,
+    0x4c, 0x56, 0x52, 0x49, 0x3d, 0x59, 0x4f, 0x56, 0x56, 0x42, 0x46, 0x45,
+    0x3e, 0x28, 0x3f, 0x5b, 0x3f, 0x5a, 0x4c, 0x42, 0x44, 0x22, 0x3f, 0x46,
+    0x47, 0x0d, 0x3e, 0x41, 0x45, 0x49, 0x4a, 0x3b, 0x45, 0x2d, 0x4d, 0x4a,
+    0x44, 0x43, 0x49, 0x46, 0x4b, 0x47, 0x49, 0x45, 0x4e, 0x40, 0x4c, 0x3c,
+    0x42, 0x3e, 0x4b, 0x50, 0x48, 0x49, 0x4c, 0x42, 0x3c, 0x43, 0x50, 0x43,
+    0x49, 0x4e, 0x4e, 0x43, 0x46, 0x4c, 0x48, 0x4a, 0x43, 0x4c, 0x49, 0x4e,
+    0x47, 0x44, 0x50, 0x4c, 0x4a, 0x48, 0x47, 0x5f, 0x3f, 0x3e, 0x48, 0x4f,
+    0x4f, 0x49, 0x4a, 0x5f, 0x4e, 0x40, 0x4e, 0x48, 0x47, 0x44, 0x40, 0x4d,
+    0x3f, 0x4a, 0x53, 0x45, 0x3e, 0x50, 0x3f, 0x39, 0x50, 0x45, 0x45, 0x4b,
+    0x43, 0x41, 0x46, 0x41, 0x49, 0x47, 0x4b, 0x41, 0x3c, 0x4b, 0x46, 0x3f,
+    0x41, 0x4a, 0x4e, 0x4c, 0x49, 0x4c, 0x3f, 0x44, 0x53, 0x4c, 0x45, 0x49,
+    0x48, 0x4d, 0x48, 0x4a, 0x48, 0x4f, 0x45, 0x4d, 0x48, 0x4c, 0x41, 0x49,
+    0x42, 0x48, 0x53, 0x46, 0x4a, 0x46, 0x4b, 0x4f, 0x4c, 0x52, 0x4c, 0x51,
+    0x41, 0x4d, 0x49, 0x41, 0x49, 0x4f, 0x49, 0x42, 0x4a, 0x48, 0x51, 0x4a,
+    0x44, 0x4d, 0x55, 0x48, 0x47, 0x4d, 0x4d, 0x45, 0x42, 0x60, 0x4a, 0x51,
+    0x42, 0x54, 0x56, 0x56, 0x50, 0x4a, 0x3f, 0x4a, 0x40, 0x25, 0x3a, 0x59,
+    0x46, 0x58, 0x52, 0x46, 0x41, 0x28, 0x3d, 0x3e, 0x45, 0x13, 0x47, 0x41,
+    0x3d, 0x44, 0x48, 0x45, 0x49, 0x26, 0x46, 0x4c, 0x3b, 0x4a, 0x42, 0x47,
+    0x46, 0x41, 0x44, 0x52, 0x50, 0x4a, 0x4f, 0x40, 0x4b, 0x39, 0x42, 0x45,
+    0x4a, 0x4d, 0x4f, 0x3f, 0x42, 0x4f, 0x49, 0x45, 0x42, 0x4a, 0x46, 0x47,
+    0x48, 0x40, 0x4a, 0x46, 0x41, 0x3b, 0x48, 0x55, 0x4b, 0x4e, 0x4e, 0x48,
+    0x4b, 0x44, 0x46, 0x53, 0x48, 0x45, 0x4b, 0x53, 0x49, 0x43, 0x4a, 0x5c,
+    0x46, 0x45, 0x45, 0x49, 0x49, 0x49, 0x4c, 0x43, 0x4e, 0x4a, 0x41, 0x4a,
+    0x42, 0x43, 0x4a, 0x38, 0x44, 0x4a, 0x4b, 0x3f, 0x45, 0x49, 0x45, 0x38,
+    0x43, 0x40, 0x45, 0x4c, 0x47, 0x42, 0x3f, 0x42, 0x3e, 0x4a, 0x43, 0x50,
+    0x4a, 0x4e, 0x4f, 0x47, 0x4d, 0x49, 0x49, 0x47, 0x4a, 0x4d, 0x46, 0x4c,
+    0x4f, 0x3d, 0x52, 0x4a, 0x41, 0x44, 0x4b, 0x50, 0x4c, 0x52, 0x49, 0x50,
+    0x4b, 0x45, 0x49, 0x4d, 0x48, 0x55, 0x50, 0x47, 0x4e, 0x50, 0x4f, 0x48,
+    0x46, 0x4d, 0x4d, 0x41, 0x48, 0x51, 0x4b, 0x4c, 0x47, 0x51, 0x42, 0x42,
+    0x4d, 0x47, 0x43, 0x4c, 0x4c, 0x5a, 0x4e, 0x47, 0x3b, 0x59, 0x51, 0x57,
+    0x4c, 0x40, 0x46, 0x4c, 0x37, 0x2a, 0x35, 0x58, 0x44, 0x5b, 0x4c, 0x44,
+    0x3e, 0x2e, 0x3f, 0x43, 0x46, 0x23, 0x49, 0x3e, 0x41, 0x3f, 0x4b, 0x3e,
+    0x4e, 0x2f, 0x4d, 0x4a, 0x4e, 0x40, 0x4e, 0x41, 0x40, 0x3f, 0x4a, 0x42,
+    0x4d, 0x4c, 0x44, 0x47, 0x4e, 0x44, 0x40, 0x43, 0x4d, 0x49, 0x4f, 0x3d,
+    0x49, 0x3f, 0x51, 0x48, 0x42, 0x4a, 0x49, 0x47, 0x49, 0x46, 0x4a, 0x45,
+    0x45, 0x49, 0x53, 0x4d, 0x4c, 0x4e, 0x44, 0x50, 0x4b, 0x43, 0x4e, 0x5f,
+    0x3c, 0x40, 0x44, 0x46, 0x48, 0x4b, 0x42, 0x62, 0x4e, 0x50, 0x4c, 0x49,
+    0x4a, 0x4f, 0x44, 0x53, 0x42, 0x43, 0x49, 0x48, 0x4b, 0x3c, 0x4a, 0x37,
+    0x4c, 0x41, 0x49, 0x46, 0x46, 0x47, 0x43, 0x40, 0x4d, 0x4d, 0x4a, 0x48,
+    0x50, 0x4b, 0x50, 0x41, 0x44, 0x3e, 0x51, 0x47, 0x44, 0x4a, 0x44, 0x45,
+    0x48, 0x4d, 0x52, 0x4e, 0x44, 0x48, 0x4d, 0x43, 0x42, 0x45, 0x48, 0x52,
+    0x44, 0x42, 0x50, 0x42, 0x4d, 0x45, 0x48, 0x4d, 0x4f, 0x4e, 0x45, 0x49,
+    0x51, 0x48, 0x4f, 0x53, 0x4d, 0x4c, 0x48, 0x50, 0x4e, 0x4d, 0x50, 0x48,
+    0x49, 0x42, 0x4c, 0x42, 0x4b, 0x4b, 0x49, 0x48, 0x48, 0x49, 0x4a, 0x54,
+    0x44, 0x57, 0x4d, 0x4b, 0x3f, 0x56, 0x53, 0x5c, 0x50, 0x4e, 0x46, 0x49,
+    0x40, 0x24, 0x44, 0x58, 0x49, 0x54, 0x48, 0x49, 0x41, 0x22, 0x44, 0x3f,
+    0x48, 0x1c, 0x4d, 0x39, 0x3e, 0x4c, 0x3d, 0x4a, 0x48, 0x2d, 0x48, 0x3e,
+    0x3f, 0x3a, 0x46, 0x4e, 0x44, 0x43, 0x49, 0x51, 0x4d, 0x3c, 0x44, 0x41,
+    0x4e, 0x44, 0x42, 0x4c, 0x45, 0x48, 0x45, 0x46, 0x42, 0x46, 0x47, 0x42,
+    0x4f, 0x45, 0x47, 0x44, 0x48, 0x47, 0x4a, 0x42, 0x4d, 0x48, 0x3e, 0x53,
+    0x47, 0x4b, 0x44, 0x4b, 0x45, 0x4a, 0x50, 0x55, 0x4c, 0x45, 0x48, 0x43,
+    0x53, 0x3d, 0x4e, 0x5f, 0x42, 0x44, 0x4a, 0x4f, 0x3f, 0x48, 0x4e, 0x4b,
+    0x43, 0x48, 0x43, 0x41, 0x4a, 0x4b, 0x51, 0x39, 0x52, 0x46, 0x44, 0x49,
+    0x48, 0x45, 0x4c, 0x40, 0x45, 0x49, 0x51, 0x48, 0x45, 0x42, 0x45, 0x48,
+    0x40, 0x43, 0x3d, 0x47, 0x53, 0x54, 0x4d, 0x4a, 0x4a, 0x47, 0x48, 0x43,
+    0x4c, 0x46, 0x43, 0x4f, 0x49, 0x4c, 0x3f, 0x3d, 0x4b, 0x41, 0x40, 0x48,
+    0x4e, 0x4c, 0x4b, 0x40, 0x4c, 0x43, 0x49, 0x4d, 0x47, 0x4f, 0x47, 0x42,
+    0x47, 0x4a, 0x4d, 0x4f, 0x46, 0x4d, 0x51, 0x49, 0x48, 0x4d, 0x4e, 0x46,
+    0x47, 0x41, 0x44, 0x4d, 0x4b, 0x55, 0x4b, 0x4c, 0x41, 0x5e, 0x50, 0x45,
+    0x40, 0x55, 0x4b, 0x60, 0x55, 0x47, 0x3d, 0x4a, 0x42, 0x22, 0x46, 0x5a,
+    0x47, 0x53, 0x49, 0x44, 0x44, 0x27, 0x41, 0x4f, 0x3e, 0x22, 0x4a, 0x44,
+    0x49, 0x3e, 0x4e, 0x4d, 0x3f, 0x3a, 0x4c, 0x44, 0x4a, 0x44, 0x46, 0x51,
+    0x4f, 0x42, 0x4c, 0x4e, 0x39, 0x4b, 0x42, 0x39, 0x4b, 0x3e, 0x4f, 0x47,
+    0x4a, 0x4f, 0x3f, 0x4d, 0x43, 0x4c, 0x4a, 0x4b, 0x4b, 0x3d, 0x51, 0x46,
+    0x49, 0x4c, 0x47, 0x44, 0x43, 0x3d, 0x3c, 0x54, 0x4a, 0x47, 0x4d, 0x50,
+    0x4a, 0x46, 0x51, 0x62, 0x46, 0x4d, 0x4b, 0x46, 0x49, 0x3c, 0x50, 0x57,
+    0x47, 0x40, 0x3e, 0x4c, 0x4b, 0x3f, 0x55, 0x46, 0x3d, 0x45, 0x42, 0x4e,
+    0x50, 0x49, 0x46, 0x3a, 0x4c, 0x47, 0x4a, 0x49, 0x42, 0x42, 0x4a, 0x44,
+    0x42, 0x40, 0x49, 0x54, 0x46, 0x4b, 0x47, 0x45, 0x51, 0x47, 0x41, 0x42,
+    0x49, 0x50, 0x4e, 0x48, 0x4b, 0x4b, 0x47, 0x4a, 0x47, 0x49, 0x4b, 0x45,
+    0x4b, 0x54, 0x48, 0x54, 0x4b, 0x49, 0x51, 0x4a, 0x4a, 0x40, 0x46, 0x42,
+    0x44, 0x44, 0x4d, 0x4b, 0x47, 0x43, 0x45, 0x41, 0x3e, 0x49, 0x43, 0x51,
+    0x3e, 0x4b, 0x52, 0x46, 0x48, 0x3f, 0x4e, 0x51, 0x51, 0x49, 0x3f, 0x48,
+    0x4c, 0x4c, 0x52, 0x47, 0x43, 0x57, 0x44, 0x42, 0x40, 0x52, 0x50, 0x5d,
+    0x4f, 0x40, 0x42, 0x45, 0x46, 0x26, 0x3c, 0x51, 0x4b, 0x4e, 0x4b, 0x49,
+    0x46, 0x35, 0x49, 0x53, 0x49, 0x2b, 0x4d, 0x3e, 0x50, 0x44, 0x4f, 0x54,
+    0x46, 0x34, 0x49, 0x4d, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x44, 0x52, 0x41,
+    0x4d, 0x4c, 0x52, 0x41, 0x49, 0x3a, 0x4e, 0x49, 0x40, 0x4b, 0x45, 0x4d,
+    0x4b, 0x4a, 0x47, 0x49, 0x45, 0x49, 0x4d, 0x50, 0x3e, 0x47, 0x44, 0x51,
+    0x4c, 0x41, 0x45, 0x50, 0x47, 0x41, 0x4a, 0x52, 0x4b, 0x3d, 0x4b, 0x5b,
+    0x4c, 0x4c, 0x4d, 0x3f, 0x47, 0x44, 0x49, 0x5d, 0x4a, 0x53, 0x44, 0x45,
+    0x45, 0x46, 0x3d, 0x4f, 0x50, 0x3b, 0x44, 0x4e, 0x40, 0x41, 0x4c, 0x3a,
+    0x4a, 0x45, 0x49, 0x48, 0x45, 0x4a, 0x45, 0x36, 0x45, 0x4d, 0x4c, 0x49,
+    0x3f, 0x47, 0x4d, 0x40, 0x53, 0x48, 0x49, 0x4c, 0x47, 0x4f, 0x42, 0x44,
+    0x45, 0x40, 0x4a, 0x4c, 0x49, 0x4f, 0x4b, 0x4d, 0x42, 0x45, 0x3e, 0x4a,
+    0x48, 0x4a, 0x49, 0x50, 0x4c, 0x53, 0x50, 0x45, 0x4b, 0x4c, 0x46, 0x4f,
+    0x44, 0x43, 0x54, 0x50, 0x3f, 0x48, 0x42, 0x4b, 0x43, 0x3f, 0x4d, 0x4c,
+    0x43, 0x49, 0x4a, 0x47, 0x54, 0x4b, 0x4f, 0x4d, 0x44, 0x47, 0x49, 0x4e,
+    0x4e, 0x55, 0x40, 0x46, 0x44, 0x56, 0x4e, 0x65, 0x4f, 0x3f, 0x43, 0x48,
+    0x39, 0x27, 0x43, 0x55, 0x4b, 0x4c, 0x44, 0x46, 0x42, 0x34, 0x44, 0x52,
+    0x43, 0x22, 0x4e, 0x41, 0x49, 0x48, 0x49, 0x51, 0x3b, 0x37, 0x4b, 0x40,
+    0x4f, 0x45, 0x53, 0x4c, 0x47, 0x46, 0x47, 0x4c, 0x3e, 0x44, 0x45, 0x49,
+    0x48, 0x50, 0x45, 0x40, 0x46, 0x4c, 0x47, 0x4d, 0x44, 0x48, 0x49, 0x50,
+    0x4f, 0x4a, 0x46, 0x55, 0x4e, 0x42, 0x4c, 0x4c, 0x50, 0x48, 0x3d, 0x55,
+    0x46, 0x3e, 0x4a, 0x4b, 0x4f, 0x46, 0x46, 0x60, 0x50, 0x3f, 0x55, 0x40,
+    0x42, 0x44, 0x48, 0x63, 0x50, 0x3d, 0x45, 0x4f, 0x4e, 0x41, 0x47, 0x48,
+    0x4a, 0x3c, 0x3d, 0x46, 0x3f, 0x42, 0x43, 0x37, 0x4f, 0x4f, 0x50, 0x47,
+    0x47, 0x4b, 0x52, 0x40, 0x3f, 0x44, 0x4a, 0x40, 0x4d, 0x44, 0x4e, 0x37,
+    0x43, 0x48, 0x47, 0x3f, 0x51, 0x4d, 0x45, 0x42, 0x41, 0x46, 0x3d, 0x53,
+    0x4f, 0x4b, 0x54, 0x45, 0x51, 0x40, 0x4a, 0x4a, 0x48, 0x4f, 0x43, 0x4a,
+    0x4f, 0x4c, 0x4c, 0x4f, 0x48, 0x4c, 0x44, 0x4e, 0x43, 0x46, 0x4f, 0x4a,
+    0x43, 0x41, 0x49, 0x49, 0x47, 0x53, 0x45, 0x49, 0x4e, 0x46, 0x4c, 0x4e,
+    0x3c, 0x49, 0x44, 0x45, 0x4c, 0x42, 0x49, 0x41, 0x48, 0x58, 0x54, 0x4d,
+    0x35, 0x52, 0x4e, 0x5b, 0x4f, 0x40, 0x3e, 0x46, 0x46, 0x36, 0x3d, 0x60,
+    0x4d, 0x49, 0x4a, 0x43, 0x44, 0x36, 0x49, 0x67, 0x4a, 0x2d, 0x4b, 0x40,
+    0x3f, 0x49, 0x43, 0x5f, 0x45, 0x3c, 0x49, 0x4c, 0x4a, 0x43, 0x48, 0x55,
+    0x49, 0x46, 0x49, 0x46, 0x44, 0x4e, 0x42, 0x4e, 0x40, 0x45, 0x42, 0x52,
+    0x4a, 0x40, 0x4a, 0x44, 0x40, 0x45, 0x54, 0x3d, 0x4c, 0x3e, 0x4c, 0x55,
+    0x4d, 0x45, 0x4d, 0x51, 0x4a, 0x4b, 0x44, 0x5b, 0x48, 0x3d, 0x3e, 0x46,
+    0x4f, 0x4d, 0x3f, 0x62, 0x4d, 0x45, 0x3f, 0x47, 0x47, 0x47, 0x44, 0x5b,
+    0x4b, 0x4f, 0x51, 0x4c, 0x4a, 0x47, 0x48, 0x5b, 0x47, 0x40, 0x4a, 0x47,
+    0x42, 0x44, 0x46, 0x46, 0x45, 0x48, 0x4a, 0x3f, 0x40, 0x4f, 0x48, 0x3a,
+    0x49, 0x52, 0x4a, 0x53, 0x43, 0x4c, 0x4b, 0x4a, 0x4a, 0x4a, 0x4e, 0x42,
+    0x4b, 0x46, 0x3d, 0x50, 0x51, 0x4b, 0x4b, 0x4f, 0x50, 0x4c, 0x4f, 0x4c,
+    0x4d, 0x41, 0x41, 0x3c, 0x40, 0x43, 0x54, 0x51, 0x48, 0x3d, 0x48, 0x51,
+    0x42, 0x42, 0x4c, 0x4e, 0x4d, 0x4b, 0x49, 0x43, 0x48, 0x47, 0x4b, 0x49,
+    0x49, 0x4e, 0x4d, 0x46, 0x4c, 0x52, 0x49, 0x49, 0x51, 0x4e, 0x45, 0x47,
+    0x44, 0x47, 0x42, 0x4a, 0x46, 0x59, 0x48, 0x48, 0x4b, 0x4f, 0x4c, 0x5e,
+    0x5c, 0x45, 0x3f, 0x48, 0x3d, 0x3f, 0x37, 0x5a, 0x4b, 0x4b, 0x45, 0x49,
+    0x3e, 0x42, 0x41, 0x6b, 0x49, 0x2d, 0x45, 0x43, 0x47, 0x45, 0x49, 0x61,
+    0x3d, 0x3b, 0x49, 0x43, 0x49, 0x4b, 0x4b, 0x55, 0x4b, 0x47, 0x46, 0x46,
+    0x48, 0x4d, 0x49, 0x4f, 0x4a, 0x4c, 0x42, 0x51, 0x41, 0x44, 0x45, 0x4f,
+    0x4e, 0x44, 0x3f, 0x55, 0x3e, 0x4a, 0x45, 0x50, 0x46, 0x42, 0x41, 0x49,
+    0x49, 0x47, 0x49, 0x61, 0x47, 0x40, 0x41, 0x4e, 0x4d, 0x4b, 0x4a, 0x5e,
+    0x52, 0x49, 0x4b, 0x52, 0x51, 0x55, 0x42, 0x61, 0x53, 0x4c, 0x48, 0x4a,
+    0x4e, 0x48, 0x48, 0x57, 0x4c, 0x40, 0x40, 0x48, 0x45, 0x43, 0x3e, 0x46,
+    0x43, 0x4a, 0x45, 0x45, 0x44, 0x4f, 0x44, 0x40, 0x49, 0x48, 0x4e, 0x49,
+    0x4a, 0x4e, 0x49, 0x51, 0x46, 0x4f, 0x47, 0x44, 0x42, 0x4d, 0x43, 0x4e,
+    0x4f, 0x4d, 0x44, 0x51, 0x47, 0x49, 0x40, 0x57, 0x4b, 0x49, 0x47, 0x4c,
+    0x4d, 0x4d, 0x3e, 0x47, 0x45, 0x41, 0x50, 0x4b, 0x4b, 0x45, 0x42, 0x4e,
+    0x48, 0x47, 0x4e, 0x4b, 0x56, 0x4c, 0x4f, 0x52, 0x51, 0x49, 0x4d, 0x4a,
+    0x4b, 0x52, 0x4d, 0x55, 0x4b, 0x4e, 0x4e, 0x4b, 0x51, 0x57, 0x47, 0x42,
+    0x49, 0x48, 0x56, 0x44, 0x52, 0x56, 0x53, 0x5a, 0x63, 0x53, 0x4c, 0x4c,
+    0x43, 0x56, 0x3c, 0x57, 0x47, 0x47, 0x4d, 0x52, 0x43, 0x48, 0x45, 0x5f,
+    0x45, 0x29, 0x47, 0x45, 0x48, 0x40, 0x41, 0x4b, 0x3f, 0x39, 0x49, 0x4e,
+    0x47, 0x55, 0x42, 0x56, 0x4d, 0x43, 0x48, 0x44, 0x45, 0x53, 0x43, 0x46,
+    0x49, 0x43, 0x49, 0x4a, 0x40, 0x4e, 0x4a, 0x4a, 0x47, 0x43, 0x45, 0x4d,
+    0x4a, 0x47, 0x3f, 0x53, 0x45, 0x43, 0x4b, 0x4c, 0x42, 0x47, 0x47, 0x5f,
+    0x48, 0x48, 0x46, 0x44, 0x50, 0x47, 0x41, 0x64, 0x4e, 0x46, 0x49, 0x4a,
+    0x4d, 0x55, 0x42, 0x55, 0x46, 0x3d, 0x49, 0x43, 0x52, 0x52, 0x47, 0x52,
+    0x4e, 0x46, 0x47, 0x41, 0x49, 0x4d, 0x50, 0x47, 0x42, 0x49, 0x41, 0x42,
+    0x4b, 0x48, 0x49, 0x42, 0x4d, 0x48, 0x51, 0x54, 0x43, 0x56, 0x4c, 0x52,
+    0x53, 0x4d, 0x54, 0x4a, 0x51, 0x50, 0x48, 0x4c, 0x4e, 0x48, 0x4c, 0x4c,
+    0x52, 0x49, 0x4a, 0x4e, 0x4e, 0x41, 0x4f, 0x53, 0x49, 0x52, 0x42, 0x4b,
+    0x50, 0x46, 0x50, 0x4a, 0x53, 0x56, 0x46, 0x4f, 0x4b, 0x49, 0x3d, 0x41,
+    0x4c, 0x52, 0x42, 0x50, 0x4d, 0x45, 0x4e, 0x51, 0x4b, 0x4c, 0x46, 0x42,
+    0x41, 0x4b, 0x40, 0x4a, 0x42, 0x57, 0x4f, 0x43, 0x40, 0x50, 0x4c, 0x51,
+    0x4f, 0x48, 0x3a, 0x4e, 0x51, 0x40, 0x49, 0x66, 0x4b, 0x42, 0x48, 0x3c,
+    0x5b, 0x47, 0x53, 0x40, 0x4a, 0x48, 0x35, 0x44, 0x5f, 0x50, 0x4a, 0x3c,
+    0x41, 0x45, 0x48, 0x3b, 0x42, 0x59, 0x43, 0x4b, 0x48, 0x49, 0x4a, 0x40,
+    0x4f, 0x5c, 0x50, 0x54, 0x53, 0x55, 0x4c, 0x4a, 0x43, 0x46, 0x49, 0x47,
+    0x49, 0x48, 0x4b, 0x43, 0x42, 0x44, 0x42, 0x46, 0x44, 0x3f, 0x4b, 0x42,
+    0x4d, 0x49, 0x41, 0x46, 0x47, 0x51, 0x51, 0x44, 0x4c, 0x54, 0x4e, 0x4b,
+    0x42, 0x52, 0x4e, 0x4c, 0x4b, 0x4a, 0x50, 0x4e, 0x44, 0x4b, 0x4e, 0x4e,
+    0x4f, 0x42, 0x4b, 0x48, 0x46, 0x43, 0x48, 0x54, 0x4b, 0x4e, 0x48, 0x4f,
+    0x4a, 0x4d, 0x43, 0x4e, 0x47, 0x50, 0x4a, 0x44, 0x47, 0x52, 0x46, 0x53,
+    0x4a, 0x40, 0x46, 0x54, 0x50, 0x4a, 0x47, 0x51, 0x49, 0x45, 0x4b, 0x4e,
+    0x4b, 0x46, 0x4c, 0x4c, 0x52, 0x47, 0x45, 0x45, 0x4a, 0x47, 0x4c, 0x52,
+    0x44, 0x51, 0x47, 0x42, 0x47, 0x43, 0x43, 0x49, 0x52, 0x5a, 0x55, 0x3e,
+    0x45, 0x4b, 0x4c, 0x46, 0x4f, 0x4b, 0x45, 0x49, 0x4a, 0x4e, 0x4a, 0x50,
+    0x3e, 0x4e, 0x42, 0x4e, 0x44, 0x55, 0x3d, 0x4a, 0x4d, 0x49, 0x4d, 0x42,
+    0x49, 0x4e, 0x50, 0x44, 0x4b, 0x3c, 0x41, 0x49, 0x51, 0x49, 0x3c, 0x4e,
+    0x4c, 0x39, 0x4c, 0x72, 0x44, 0x4b, 0x49, 0x42, 0x5f, 0x48, 0x4a, 0x48,
+    0x41, 0x4c, 0x43, 0x40, 0x62, 0x5e, 0x47, 0x3c, 0x4a, 0x4c, 0x55, 0x49,
+    0x4b, 0x52, 0x4e, 0x4b, 0x4d, 0x48, 0x4c, 0x3c, 0x3f, 0x4f, 0x4e, 0x48,
+    0x45, 0x55, 0x4a, 0x46, 0x48, 0x3d, 0x45, 0x44, 0x4b, 0x4a, 0x46, 0x3a,
+    0x4e, 0x44, 0x4d, 0x49, 0x49, 0x49, 0x40, 0x3e, 0x40, 0x47, 0x48, 0x43,
+    0x3f, 0x51, 0x46, 0x4c, 0x45, 0x4c, 0x49, 0x44, 0x3e, 0x57, 0x49, 0x4e,
+    0x48, 0x3f, 0x48, 0x47, 0x53, 0x4d, 0x50, 0x51, 0x49, 0x42, 0x45, 0x44,
+    0x49, 0x49, 0x46, 0x4b, 0x45, 0x49, 0x4f, 0x49, 0x46, 0x48, 0x4c, 0x55,
+    0x46, 0x51, 0x48, 0x4a, 0x48, 0x54, 0x4b, 0x5a, 0x4c, 0x47, 0x40, 0x47,
+    0x40, 0x55, 0x50, 0x52, 0x4a, 0x4b, 0x4f, 0x49, 0x4b, 0x50, 0x4b, 0x5b,
+    0x51, 0x53, 0x4f, 0x4e, 0x49, 0x48, 0x44, 0x52, 0x46, 0x4e, 0x47, 0x48,
+    0x44, 0x43, 0x49, 0x55, 0x48, 0x58, 0x4f, 0x46, 0x45, 0x53, 0x45, 0x4a,
+    0x4c, 0x4c, 0x49, 0x46, 0x47, 0x4d, 0x41, 0x4d, 0x4f, 0x59, 0x4a, 0x49,
+    0x46, 0x4e, 0x44, 0x49, 0x4d, 0x48, 0x54, 0x47, 0x48, 0x4e, 0x48, 0x43,
+    0x46, 0x41, 0x46, 0x44, 0x52, 0x46, 0x42, 0x4c, 0x4c, 0x31, 0x4d, 0x6f,
+    0x51, 0x4f, 0x4d, 0x43, 0x5c, 0x48, 0x49, 0x49, 0x46, 0x4c, 0x43, 0x3b,
+    0x5d, 0x63, 0x58, 0x46, 0x49, 0x45, 0x4e, 0x48, 0x49, 0x5d, 0x45, 0x50,
+    0x56, 0x4d, 0x57, 0x37, 0x40, 0x55, 0x43, 0x4b, 0x4e, 0x46, 0x4c, 0x3b,
+    0x3d, 0x4b, 0x49, 0x4b, 0x52, 0x47, 0x4d, 0x34, 0x4c, 0x4c, 0x47, 0x4e,
+    0x4d, 0x4c, 0x3d, 0x3f, 0x4a, 0x49, 0x44, 0x45, 0x4a, 0x54, 0x43, 0x44,
+    0x50, 0x4b, 0x4d, 0x4c, 0x4e, 0x48, 0x46, 0x51, 0x43, 0x48, 0x48, 0x48,
+    0x42, 0x44, 0x4e, 0x48, 0x47, 0x45, 0x48, 0x51, 0x53, 0x4a, 0x4f, 0x58,
+    0x42, 0x4d, 0x48, 0x4f, 0x4c, 0x45, 0x4a, 0x57, 0x4b, 0x43, 0x4d, 0x4b,
+    0x4a, 0x4e, 0x4c, 0x5f, 0x3f, 0x4f, 0x4a, 0x42, 0x4b, 0x48, 0x4d, 0x62,
+    0x4f, 0x4b, 0x50, 0x4c, 0x45, 0x49, 0x44, 0x53, 0x4a, 0x4f, 0x45, 0x56,
+    0x4b, 0x44, 0x41, 0x53, 0x49, 0x48, 0x4d, 0x49, 0x47, 0x4b, 0x46, 0x4c,
+    0x49, 0x4b, 0x4c, 0x54, 0x4f, 0x4b, 0x47, 0x49, 0x44, 0x4a, 0x4e, 0x53,
+    0x4f, 0x49, 0x54, 0x4e, 0x4a, 0x48, 0x42, 0x54, 0x51, 0x46, 0x4b, 0x52,
+    0x45, 0x48, 0x51, 0x4a, 0x40, 0x4a, 0x50, 0x45, 0x4a, 0x46, 0x49, 0x46,
+    0x54, 0x46, 0x42, 0x48, 0x50, 0x36, 0x4a, 0x6b, 0x46, 0x59, 0x51, 0x47,
+    0x5f, 0x4d, 0x43, 0x4d, 0x44, 0x4d, 0x42, 0x3b, 0x65, 0x6a, 0x56, 0x48,
+    0x4d, 0x4c, 0x52, 0x4a, 0x4d, 0x61, 0x52, 0x4b, 0x47, 0x4f, 0x48, 0x49,
+    0x3f, 0x5b, 0x45, 0x51, 0x48, 0x48, 0x4b, 0x3c, 0x3b, 0x4c, 0x54, 0x52,
+    0x4f, 0x51, 0x53, 0x31, 0x47, 0x4c, 0x45, 0x4a, 0x42, 0x4b, 0x47, 0x40,
+    0x41, 0x49, 0x4c, 0x46, 0x4b, 0x53, 0x46, 0x49, 0x44, 0x4b, 0x4e, 0x4b,
+    0x48, 0x51, 0x49, 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x45, 0x43, 0x46, 0x56,
+    0x42, 0x4b, 0x49, 0x4e, 0x4e, 0x53, 0x42, 0x5c, 0x4b, 0x46, 0x49, 0x46,
+    0x4e, 0x41, 0x42, 0x67, 0x41, 0x49, 0x4d, 0x48, 0x49, 0x4e, 0x3f, 0x61,
+    0x48, 0x4a, 0x40, 0x42, 0x4c, 0x51, 0x50, 0x63, 0x49, 0x44, 0x49, 0x47,
+    0x45, 0x4d, 0x49, 0x61, 0x3f, 0x48, 0x40, 0x41, 0x49, 0x49, 0x45, 0x57,
+    0x45, 0x46, 0x4d, 0x46, 0x4c, 0x4a, 0x4d, 0x4b, 0x43, 0x54, 0x4b, 0x49,
+    0x4c, 0x49, 0x41, 0x49, 0x4b, 0x47, 0x45, 0x4b, 0x44, 0x43, 0x46, 0x3f,
+    0x47, 0x47, 0x43, 0x4c, 0x49, 0x4c, 0x3d, 0x4d, 0x4b, 0x54, 0x4a, 0x4f,
+    0x44, 0x4c, 0x4b, 0x47, 0x4c, 0x45, 0x3d, 0x52, 0x58, 0x4b, 0x45, 0x4e,
+    0x48, 0x39, 0x53, 0x70, 0x4a, 0x5d, 0x4c, 0x4e, 0x5a, 0x4f, 0x46, 0x4b,
+    0x3e, 0x4f, 0x44, 0x3d, 0x66, 0x6b, 0x50, 0x4d, 0x4d, 0x57, 0x52, 0x4a,
+    0x4c, 0x5b, 0x4e, 0x53, 0x4d, 0x54, 0x50, 0x42, 0x3c, 0x5d, 0x4a, 0x4c,
+    0x56, 0x52, 0x50, 0x40, 0x48, 0x4c, 0x4d, 0x49, 0x49, 0x4f, 0x51, 0x38,
+    0x42, 0x49, 0x4d, 0x4f, 0x45, 0x40, 0x4d, 0x41, 0x4b, 0x4a, 0x47, 0x51,
+    0x4b, 0x53, 0x4c, 0x4a, 0x51, 0x4c, 0x42, 0x56, 0x48, 0x4a, 0x47, 0x58,
+    0x49, 0x46, 0x52, 0x4a, 0x45, 0x47, 0x51, 0x54, 0x4f, 0x50, 0x50, 0x53,
+    0x49, 0x4a, 0x4d, 0x56, 0x56, 0x4b, 0x4d, 0x45, 0x40, 0x4d, 0x48, 0x60,
+    0x4e, 0x56, 0x48, 0x4b, 0x47, 0x45, 0x47, 0x62, 0x4e, 0x4f, 0x41, 0x49,
+    0x48, 0x57, 0x44, 0x64, 0x4f, 0x4f, 0x49, 0x44, 0x49, 0x4c, 0x3f, 0x53,
+    0x40, 0x41, 0x4e, 0x4b, 0x4d, 0x54, 0x42, 0x53, 0x4e, 0x41, 0x49, 0x44,
+    0x41, 0x45, 0x4d, 0x4f, 0x47, 0x51, 0x45, 0x4a, 0x42, 0x45, 0x4e, 0x40,
+    0x4b, 0x52, 0x48, 0x47, 0x4e, 0x4f, 0x47, 0x41, 0x48, 0x53, 0x47, 0x47,
+    0x46, 0x42, 0x48, 0x4b, 0x42, 0x4c, 0x49, 0x4c, 0x45, 0x4c, 0x54, 0x45,
+    0x4c, 0x43, 0x4e, 0x49, 0x56, 0x47, 0x45, 0x4f, 0x4d, 0x3a, 0x58, 0x74,
+    0x49, 0x5b, 0x4c, 0x4f, 0x64, 0x4e, 0x45, 0x43, 0x44, 0x5b, 0x43, 0x41,
+    0x63, 0x70, 0x55, 0x45, 0x4a, 0x4a, 0x4d, 0x51, 0x4b, 0x5a, 0x51, 0x57,
+    0x54, 0x5b, 0x55, 0x44, 0x38, 0x57, 0x4e, 0x50, 0x4e, 0x56, 0x57, 0x3a,
+    0x3a, 0x4b, 0x57, 0x4c, 0x51, 0x53, 0x4d, 0x3b, 0x44, 0x43, 0x47, 0x4c,
+    0x48, 0x59, 0x51, 0x41, 0x43, 0x44, 0x51, 0x51, 0x4a, 0x54, 0x51, 0x4b,
+    0x4e, 0x45, 0x51, 0x4a, 0x49, 0x4a, 0x4f, 0x52, 0x4c, 0x3e, 0x4e, 0x55,
+    0x42, 0x46, 0x46, 0x4a, 0x42, 0x52, 0x49, 0x47, 0x4a, 0x56, 0x4f, 0x50,
+    0x46, 0x4f, 0x43, 0x51, 0x53, 0x46, 0x40, 0x60, 0x44, 0x4d, 0x46, 0x54,
+    0x3d, 0x49, 0x43, 0x64, 0x45, 0x4d, 0x50, 0x49, 0x4f, 0x4d, 0x53, 0x60,
+    0x4a, 0x52, 0x49, 0x47, 0x48, 0x5a, 0x48, 0x58, 0x4e, 0x4f, 0x43, 0x4f,
+    0x50, 0x51, 0x41, 0x52, 0x4c, 0x4d, 0x45, 0x42, 0x41, 0x4c, 0x44, 0x54,
+    0x4e, 0x4d, 0x4a, 0x47, 0x40, 0x4a, 0x3e, 0x47, 0x4c, 0x58, 0x46, 0x46,
+    0x55, 0x4c, 0x4d, 0x45, 0x49, 0x51, 0x53, 0x46, 0x46, 0x43, 0x43, 0x48,
+    0x52, 0x3d, 0x4b, 0x4e, 0x49, 0x47, 0x3f, 0x3d, 0x4f, 0x45, 0x44, 0x3f,
+    0x5a, 0x43, 0x4b, 0x4d, 0x51, 0x35, 0x54, 0x76, 0x4f, 0x5e, 0x4c, 0x50,
+    0x5a, 0x51, 0x46, 0x49, 0x44, 0x61, 0x4f, 0x41, 0x67, 0x72, 0x56, 0x4f,
+    0x42, 0x48, 0x4b, 0x52, 0x46, 0x60, 0x50, 0x4e, 0x4a, 0x5b, 0x5f, 0x46,
+    0x31, 0x5b, 0x4a, 0x48, 0x4b, 0x58, 0x51, 0x41, 0x37, 0x4e, 0x4f, 0x55,
+    0x51, 0x5c, 0x4f, 0x42, 0x4b, 0x4e, 0x4f, 0x54, 0x4f, 0x52, 0x43, 0x43,
+    0x48, 0x53, 0x53, 0x41, 0x4b, 0x49, 0x4e, 0x50, 0x46, 0x4c, 0x4f, 0x49,
+    0x42, 0x49, 0x4c, 0x4c, 0x4c, 0x41, 0x4e, 0x48, 0x47, 0x4c, 0x49, 0x53,
+    0x44, 0x46, 0x51, 0x53, 0x45, 0x52, 0x4e, 0x53, 0x50, 0x58, 0x42, 0x45,
+    0x44, 0x42, 0x48, 0x58, 0x4e, 0x4d, 0x54, 0x56, 0x4c, 0x46, 0x4a, 0x58,
+    0x48, 0x4f, 0x47, 0x51, 0x47, 0x4f, 0x4f, 0x5b, 0x41, 0x4e, 0x45, 0x45,
+    0x4a, 0x50, 0x3e, 0x57, 0x48, 0x4e, 0x41, 0x4c, 0x45, 0x51, 0x46, 0x4c,
+    0x46, 0x4f, 0x42, 0x45, 0x4b, 0x4c, 0x49, 0x4c, 0x44, 0x4f, 0x4e, 0x4d,
+    0x48, 0x56, 0x43, 0x48, 0x42, 0x54, 0x48, 0x43, 0x3e, 0x51, 0x43, 0x47,
+    0x47, 0x47, 0x49, 0x4d, 0x46, 0x4e, 0x52, 0x42, 0x48, 0x4e, 0x4c, 0x4a,
+    0x4d, 0x3e, 0x43, 0x40, 0x48, 0x41, 0x47, 0x4f, 0x5e, 0x49, 0x40, 0x4c,
+    0x50, 0x42, 0x56, 0x75, 0x51, 0x5e, 0x51, 0x4e, 0x62, 0x58, 0x49, 0x47,
+    0x51, 0x59, 0x46, 0x46, 0x6c, 0x72, 0x55, 0x44, 0x4c, 0x4a, 0x4d, 0x59,
+    0x53, 0x64, 0x4d, 0x51, 0x55, 0x5e, 0x59, 0x50, 0x30, 0x58, 0x50, 0x4c,
+    0x4c, 0x60, 0x59, 0x42, 0x32, 0x53, 0x50, 0x55, 0x4d, 0x53, 0x59, 0x43,
+    0x3e, 0x49, 0x4f, 0x52, 0x4d, 0x51, 0x47, 0x45, 0x4d, 0x4e, 0x53, 0x4e,
+    0x54, 0x4f, 0x4d, 0x4d, 0x4e, 0x40, 0x47, 0x53, 0x53, 0x49, 0x56, 0x4d,
+    0x4d, 0x3a, 0x4c, 0x4e, 0x45, 0x4a, 0x47, 0x45, 0x53, 0x4a, 0x4e, 0x52,
+    0x4d, 0x4e, 0x48, 0x56, 0x4e, 0x4a, 0x4d, 0x52, 0x49, 0x4e, 0x4e, 0x58,
+    0x47, 0x50, 0x4c, 0x54, 0x49, 0x42, 0x46, 0x54, 0x50, 0x54, 0x54, 0x46,
+    0x40, 0x49, 0x4b, 0x57, 0x4b, 0x59, 0x44, 0x46, 0x52, 0x55, 0x51, 0x55,
+    0x4f, 0x50, 0x4d, 0x4d, 0x48, 0x50, 0x4e, 0x49, 0x4e, 0x42, 0x45, 0x3f,
+    0x4d, 0x4f, 0x51, 0x47, 0x4a, 0x4c, 0x4b, 0x4b, 0x46, 0x4d, 0x44, 0x52,
+    0x4d, 0x44, 0x40, 0x4d, 0x54, 0x46, 0x54, 0x44, 0x4b, 0x46, 0x47, 0x45,
+    0x50, 0x45, 0x45, 0x4b, 0x4c, 0x48, 0x3f, 0x55, 0x4a, 0x45, 0x49, 0x4e,
+    0x40, 0x49, 0x4a, 0x41, 0x56, 0x4b, 0x49, 0x4e, 0x4a, 0x41, 0x50, 0x70,
+    0x56, 0x59, 0x4b, 0x55, 0x58, 0x59, 0x49, 0x47, 0x4a, 0x5a, 0x4c, 0x46,
+    0x62, 0x7b, 0x58, 0x51, 0x44, 0x47, 0x44, 0x57, 0x4f, 0x65, 0x4e, 0x50,
+    0x4d, 0x67, 0x5c, 0x4a, 0x2b, 0x61, 0x48, 0x4b, 0x4b, 0x5d, 0x5c, 0x48,
+    0x39, 0x50, 0x45, 0x4d, 0x53, 0x60, 0x53, 0x46, 0x42, 0x46, 0x50, 0x45,
+    0x4f, 0x4e, 0x46, 0x4a, 0x4d, 0x51, 0x54, 0x47, 0x59, 0x4b, 0x58, 0x4a,
+    0x50, 0x3d, 0x59, 0x48, 0x45, 0x4e, 0x4e, 0x47, 0x4f, 0x47, 0x4d, 0x4b,
+    0x52, 0x42, 0x4c, 0x48, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x4c, 0x4d, 0x51,
+    0x49, 0x4f, 0x4c, 0x47, 0x47, 0x48, 0x47, 0x59, 0x4f, 0x4f, 0x53, 0x49,
+    0x4e, 0x4b, 0x4f, 0x5a, 0x50, 0x42, 0x47, 0x50, 0x4a, 0x54, 0x47, 0x5a,
+    0x43, 0x49, 0x47, 0x4e, 0x49, 0x4d, 0x43, 0x54, 0x4c, 0x53, 0x4e, 0x4e,
+    0x42, 0x43, 0x48, 0x46, 0x4f, 0x43, 0x43, 0x45, 0x51, 0x47, 0x4b, 0x4f,
+    0x56, 0x48, 0x48, 0x49, 0x46, 0x45, 0x4d, 0x52, 0x47, 0x4b, 0x46, 0x50,
+    0x3e, 0x4e, 0x4c, 0x43, 0x45, 0x4d, 0x53, 0x43, 0x46, 0x45, 0x44, 0x52,
+    0x45, 0x49, 0x49, 0x51, 0x3d, 0x4a, 0x4d, 0x46, 0x42, 0x41, 0x4e, 0x48,
+    0x5a, 0x49, 0x49, 0x49, 0x4f, 0x3d, 0x56, 0x68, 0x56, 0x67, 0x4b, 0x57,
+    0x5f, 0x5c, 0x40, 0x4a, 0x4a, 0x54, 0x4c, 0x47, 0x64, 0x7a, 0x54, 0x48,
+    0x46, 0x45, 0x46, 0x57, 0x4e, 0x61, 0x4f, 0x50, 0x4d, 0x64, 0x5b, 0x43,
+    0x2d, 0x60, 0x55, 0x51, 0x4c, 0x54, 0x4f, 0x4e, 0x2f, 0x50, 0x4f, 0x52,
+    0x50, 0x61, 0x54, 0x4b, 0x3d, 0x4c, 0x47, 0x51, 0x4a, 0x54, 0x4b, 0x42,
+    0x3b, 0x55, 0x47, 0x50, 0x4f, 0x49, 0x4a, 0x46, 0x43, 0x44, 0x45, 0x47,
+    0x46, 0x4b, 0x4f, 0x46, 0x43, 0x47, 0x4a, 0x4e, 0x51, 0x43, 0x55, 0x47,
+    0x4d, 0x46, 0x4c, 0x4c, 0x49, 0x4d, 0x43, 0x51, 0x47, 0x51, 0x52, 0x4a,
+    0x46, 0x4f, 0x49, 0x52, 0x50, 0x4a, 0x43, 0x53, 0x46, 0x4e, 0x50, 0x54,
+    0x45, 0x3a, 0x4a, 0x4a, 0x4c, 0x50, 0x4b, 0x54, 0x43, 0x4f, 0x4e, 0x45,
+    0x49, 0x4f, 0x46, 0x53, 0x4d, 0x51, 0x52, 0x53, 0x3d, 0x4a, 0x47, 0x4e,
+    0x43, 0x4a, 0x53, 0x48, 0x4a, 0x4c, 0x4a, 0x4a, 0x42, 0x53, 0x3e, 0x43,
+    0x4f, 0x4c, 0x47, 0x48, 0x54, 0x4d, 0x48, 0x48, 0x4e, 0x4c, 0x43, 0x51,
+    0x42, 0x49, 0x44, 0x3e, 0x49, 0x51, 0x4a, 0x4d, 0x4f, 0x49, 0x45, 0x44,
+    0x4e, 0x41, 0x48, 0x4b, 0x4c, 0x49, 0x46, 0x47, 0x5d, 0x4c, 0x4d, 0x50,
+    0x45, 0x40, 0x4e, 0x6a, 0x4f, 0x62, 0x53, 0x50, 0x5c, 0x5e, 0x4a, 0x4c,
+    0x50, 0x56, 0x52, 0x42, 0x60, 0x7e, 0x5b, 0x4b, 0x43, 0x41, 0x4c, 0x56,
+    0x46, 0x5f, 0x4d, 0x49, 0x43, 0x65, 0x5c, 0x4d, 0x2c, 0x61, 0x48, 0x4c,
+    0x44, 0x55, 0x5c, 0x49, 0x37, 0x54, 0x4e, 0x57, 0x52, 0x5c, 0x50, 0x49,
+    0x3e, 0x4d, 0x4f, 0x4f, 0x51, 0x4c, 0x48, 0x43, 0x4a, 0x5a, 0x4d, 0x4b,
+    0x4e, 0x58, 0x54, 0x49, 0x51, 0x42, 0x49, 0x4f, 0x46, 0x45, 0x52, 0x3d,
+    0x4b, 0x4b, 0x43, 0x54, 0x47, 0x47, 0x4c, 0x42, 0x4b, 0x49, 0x45, 0x46,
+    0x46, 0x4a, 0x51, 0x47, 0x47, 0x4f, 0x48, 0x4a, 0x3f, 0x4c, 0x4b, 0x57,
+    0x4a, 0x3f, 0x52, 0x4a, 0x56, 0x52, 0x4b, 0x54, 0x4c, 0x3e, 0x3f, 0x4f,
+    0x4b, 0x50, 0x4c, 0x53, 0x4a, 0x49, 0x46, 0x4e, 0x50, 0x48, 0x4f, 0x4b,
+    0x4a, 0x4e, 0x3e, 0x49, 0x45, 0x42, 0x42, 0x41, 0x47, 0x4b, 0x4f, 0x42,
+    0x49, 0x4c, 0x55, 0x4c, 0x4e, 0x42, 0x47, 0x42, 0x4b, 0x48, 0x46, 0x41,
+    0x46, 0x4e, 0x4d, 0x3f, 0x4f, 0x46, 0x4f, 0x4b, 0x4b, 0x4d, 0x50, 0x3e,
+    0x42, 0x43, 0x44, 0x4a, 0x49, 0x40, 0x4e, 0x43, 0x3e, 0x52, 0x3e, 0x44,
+    0x49, 0x43, 0x4d, 0x44, 0x62, 0x51, 0x42, 0x53, 0x51, 0x40, 0x4c, 0x64,
+    0x4f, 0x63, 0x4e, 0x5c, 0x5b, 0x5c, 0x48, 0x4d, 0x4a, 0x57, 0x4f, 0x42,
+    0x65, 0xfe, 0x5c, 0x4e, 0x47, 0x43, 0x4a, 0x58, 0x4e, 0x5e, 0x48, 0x4c,
+    0x51, 0x5e, 0x60, 0x56, 0x2f, 0x62, 0x54, 0x58, 0x51, 0x52, 0x55, 0x51,
+    0x36, 0x4b, 0x46, 0x51, 0x53, 0x5f, 0x46, 0x4c, 0x37, 0x4d, 0x4a, 0x45,
+    0x4b, 0x3f, 0x41, 0x42, 0x3f, 0x53, 0x4a, 0x48, 0x49, 0x4a, 0x4a, 0x45,
+    0x52, 0x3f, 0x52, 0x52, 0x45, 0x4d, 0x4f, 0x45, 0x46, 0x4a, 0x51, 0x48,
+    0x56, 0x47, 0x50, 0x3e, 0x46, 0x49, 0x4c, 0x51, 0x49, 0x54, 0x45, 0x4f,
+    0x4b, 0x4b, 0x49, 0x46, 0x4b, 0x4d, 0x49, 0x5c, 0x4d, 0x43, 0x47, 0x49,
+    0x48, 0x52, 0x46, 0x50, 0x51, 0x37, 0x50, 0x52, 0x4c, 0x4d, 0x4f, 0x51,
+    0x4f, 0x42, 0x50, 0x47, 0x48, 0x4e, 0x4d, 0x4c, 0x48, 0x48, 0x4a, 0x51,
+    0x49, 0x42, 0x50, 0x4f, 0x43, 0x4e, 0x47, 0x4b, 0x47, 0x4a, 0x44, 0x44,
+    0x4c, 0x51, 0x49, 0x44, 0x45, 0x45, 0x45, 0x48, 0x3f, 0x4a, 0x43, 0x49,
+    0x46, 0x49, 0x4c, 0x4d, 0x45, 0x50, 0x44, 0x45, 0x44, 0x55, 0x4a, 0x45,
+    0x48, 0x47, 0x4c, 0x43, 0x3f, 0x48, 0x42, 0x43, 0x43, 0x43, 0x48, 0x46,
+    0x5c, 0x51, 0x47, 0x51, 0x48, 0x40, 0x54, 0x66, 0x4e, 0x67, 0x4d, 0x5a,
+    0x60, 0x57, 0x47, 0x4d, 0x4d, 0x58, 0x53, 0x46, 0x66, 0x7e, 0x56, 0x48,
+    0x44, 0x4f, 0x49, 0x5c, 0x4a, 0x63, 0x50, 0x4c, 0x49, 0x56, 0x61, 0x50,
+    0x2c, 0x68, 0x4d, 0x51, 0x46, 0x4e, 0x5b, 0x51, 0x2e, 0x53, 0x54, 0x50,
+    0x46, 0x58, 0x44, 0x4f, 0x37, 0x48, 0x55, 0x50, 0x49, 0x49, 0x4e, 0x46,
+    0x43, 0x56, 0x52, 0x4e, 0x50, 0x4b, 0x50, 0x4c, 0x49, 0x40, 0x4d, 0x4f,
+    0x50, 0x41, 0x44, 0x39, 0x4b, 0x4d, 0x4b, 0x41, 0x51, 0x4d, 0x4c, 0x41,
+    0x3f, 0x52, 0x4e, 0x4b, 0x49, 0x53, 0x45, 0x43, 0x4d, 0x4f, 0x44, 0x4d,
+    0x4b, 0x53, 0x50, 0x4e, 0x45, 0x3f, 0x4e, 0x51, 0x50, 0x55, 0x4f, 0x51,
+    0x4d, 0x3d, 0x58, 0x3f, 0x46, 0x50, 0x50, 0x50, 0x56, 0x42, 0x49, 0x49,
+    0x50, 0x4f, 0x42, 0x4b, 0x4c, 0x45, 0x52, 0x41, 0x46, 0x43, 0x4c, 0x4a,
+    0x4c, 0x51, 0x4d, 0x4d, 0x4a, 0x49, 0x54, 0x49, 0x58, 0x53, 0x49, 0x45,
+    0x47, 0x4c, 0x4c, 0x44, 0x4e, 0x51, 0x4c, 0x4c, 0x47, 0x48, 0x4c, 0x4e,
+    0x49, 0x54, 0x4c, 0x51, 0x49, 0x48, 0x47, 0x45, 0x42, 0x49, 0x42, 0x51,
+    0x4e, 0x3f, 0x49, 0x41, 0x50, 0x3e, 0x4d, 0x50, 0x5c, 0x51, 0x4d, 0x56,
+    0x47, 0x48, 0x58, 0x65, 0x51, 0x6b, 0x56, 0x5b, 0x56, 0x55, 0x46, 0x49,
+    0x4b, 0x58, 0x59, 0x4a, 0x68, 0x79, 0x53, 0x46, 0x45, 0x4b, 0x53, 0x5d,
+    0x4b, 0x6f, 0x4e, 0x4f, 0x4c, 0x53, 0x5b, 0x52, 0x30, 0x63, 0x46, 0x57,
+    0x46, 0x50, 0x4b, 0x48, 0x2e, 0x4c, 0x46, 0x48, 0x44, 0x51, 0x46, 0x4a,
+    0x35, 0x55, 0x43, 0x4c, 0x43, 0x4d, 0x4e, 0x3e, 0x47, 0x56, 0x50, 0x4d,
+    0x44, 0x59, 0x4c, 0x51, 0x46, 0x42, 0x4e, 0x43, 0x4c, 0x44, 0x42, 0x3a,
+    0x40, 0x48, 0x46, 0x44, 0x45, 0x4a, 0x46, 0x3a, 0x53, 0x4c, 0x4d, 0x4c,
+    0x4a, 0x4f, 0x53, 0x40, 0x4b, 0x48, 0x54, 0x4b, 0x44, 0x59, 0x41, 0x50,
+    0x4e, 0x50, 0x55, 0x4d, 0x55, 0x41, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x50,
+    0x52, 0x4c, 0x50, 0x4d, 0x47, 0x42, 0x4f, 0x4b, 0x47, 0x43, 0x41, 0x4a,
+    0x55, 0x3e, 0x50, 0x4b, 0x41, 0x49, 0x47, 0x49, 0x53, 0x4d, 0x48, 0x4b,
+    0x43, 0x43, 0x51, 0x44, 0x4d, 0x4c, 0x44, 0x50, 0x4d, 0x42, 0x49, 0x4e,
+    0x50, 0x50, 0x4c, 0x49, 0x49, 0x51, 0x46, 0x43, 0x4a, 0x4e, 0x53, 0x47,
+    0x43, 0x46, 0x40, 0x49, 0x47, 0x44, 0x44, 0x4d, 0x4b, 0x4b, 0x51, 0x4b,
+    0x45, 0x49, 0x47, 0x43, 0x56, 0x49, 0x4c, 0x54, 0x50, 0x3c, 0x4c, 0x5e,
+    0x51, 0x67, 0x4f, 0x57, 0x57, 0x53, 0x3e, 0x4e, 0x4e, 0x5e, 0x4b, 0x48,
+    0x5a, 0x78, 0x55, 0x4a, 0x3f, 0x4b, 0x4c, 0x5b, 0x53, 0x64, 0x4d, 0x53,
+    0x49, 0x57, 0x57, 0x58, 0x37, 0x62, 0x4f, 0x56, 0x44, 0x4e, 0x58, 0x4a,
+    0x30, 0x4f, 0x40, 0x4e, 0x47, 0x58, 0x52, 0x50, 0x35, 0x4d, 0x49, 0x52,
+    0x4e, 0x42, 0x46, 0x47, 0x44, 0x57, 0x54, 0x43, 0x4e, 0x56, 0x43, 0x49,
+    0x44, 0x40, 0x44, 0x41, 0x50, 0x49, 0x4b, 0x44, 0x4d, 0x52, 0x49, 0x43,
+    0x52, 0x54, 0x49, 0x3f, 0x49, 0x42, 0x49, 0x4a, 0x43, 0x3e, 0x50, 0x40,
+    0x46, 0x4b, 0x50, 0x4b, 0x53, 0x4b, 0x47, 0x52, 0x51, 0x4b, 0x47, 0x3f,
+    0x46, 0x4b, 0x4c, 0x57, 0x49, 0x47, 0x54, 0x49, 0x50, 0x50, 0x4d, 0x4a,
+    0x42, 0x4e, 0x51, 0x4c, 0x47, 0x47, 0x42, 0x43, 0x54, 0x43, 0x46, 0x47,
+    0x4d, 0x43, 0x54, 0x47, 0x43, 0x58, 0x48, 0x45, 0x4b, 0x46, 0x48, 0x3d,
+    0x47, 0x3f, 0x44, 0x4f, 0x4e, 0x46, 0x41, 0x40, 0x4d, 0x4d, 0x4d, 0x52,
+    0x54, 0x47, 0x4f, 0x51, 0x4f, 0x45, 0x45, 0x48, 0x4b, 0x4d, 0x44, 0x52,
+    0x51, 0x4b, 0x48, 0x4f, 0x49, 0x49, 0x46, 0x50, 0x54, 0x42, 0x44, 0x51,
+    0x58, 0x4e, 0x43, 0x58, 0x55, 0x40, 0x53, 0x5a, 0x51, 0x61, 0x51, 0x60,
+    0x53, 0x57, 0x45, 0x4f, 0x45, 0x5e, 0x51, 0x42, 0x61, 0x7a, 0x55, 0x47,
+    0x41, 0x4b, 0x4a, 0x5b, 0x4c, 0x65, 0x4f, 0x55, 0x46, 0x54, 0x65, 0x59,
+    0x36, 0x61, 0x54, 0x55, 0x48, 0x57, 0x52, 0x4e, 0x24, 0x4b, 0x49, 0x4d,
+    0x43, 0x57, 0x44, 0x51, 0x3b, 0x4f, 0x45, 0x40, 0x47, 0x4a, 0x43, 0x47,
+    0x46, 0x58, 0x50, 0x54, 0x4d, 0x50, 0x44, 0x42, 0x4a, 0x46, 0x4b, 0x4d,
+    0x4f, 0x4f, 0x4d, 0x40, 0x48, 0x4a, 0x53, 0x48, 0x49, 0x48, 0x4d, 0x39,
+    0x47, 0x4e, 0x44, 0x4c, 0x4b, 0x49, 0x44, 0x42, 0x4a, 0x45, 0x46, 0x46,
+    0x53, 0x4d, 0x49, 0x4f, 0x4e, 0x48, 0x50, 0x4a, 0x4c, 0x46, 0x56, 0x4b,
+    0x4b, 0x57, 0x4c, 0x49, 0x4a, 0x4a, 0x43, 0x4e, 0x56, 0x45, 0x50, 0x4c,
+    0x47, 0x55, 0x48, 0x46, 0x4e, 0x46, 0x45, 0x3f, 0x4a, 0x4c, 0x4c, 0x47,
+    0x4a, 0x51, 0x4e, 0x50, 0x40, 0x52, 0x45, 0x45, 0x4b, 0x46, 0x4f, 0x44,
+    0x51, 0x4a, 0x4e, 0x4d, 0x4c, 0x46, 0x42, 0x47, 0x4a, 0x4e, 0x46, 0x42,
+    0x4b, 0x4f, 0x4b, 0x4e, 0x4e, 0x46, 0x42, 0x50, 0x53, 0x51, 0x4f, 0x54,
+    0x45, 0x4f, 0x45, 0x42, 0x4c, 0x45, 0x40, 0x48, 0x59, 0x49, 0x49, 0x53,
+    0x4c, 0x43, 0x4b, 0x57, 0x54, 0x64, 0x4e, 0x5f, 0x5c, 0x59, 0x4b, 0x56,
+    0x49, 0x5d, 0x4f, 0x4b, 0x62, 0x73, 0x54, 0x45, 0x49, 0x50, 0x48, 0x5a,
+    0x50, 0x6d, 0x4a, 0x4e, 0x48, 0x55, 0x5d, 0x57, 0x38, 0x68, 0x52, 0x5a,
+    0x46, 0x56, 0x4c, 0x5a, 0x2e, 0x55, 0x49, 0x4f, 0x4a, 0x57, 0x4f, 0x54,
+    0x41, 0x53, 0x46, 0x43, 0x45, 0x47, 0x53, 0x4a, 0x42, 0x4f, 0x4d, 0x48,
+    0x4c, 0x49, 0x47, 0x48, 0x45, 0x49, 0x48, 0x53, 0x48, 0x52, 0x4a, 0x44,
+    0x4c, 0x49, 0x52, 0x4b, 0x47, 0x51, 0x42, 0x47, 0x49, 0x51, 0x3f, 0x45,
+    0x47, 0x4e, 0x53, 0x33, 0x55, 0x51, 0x55, 0x48, 0x4b, 0x51, 0x56, 0x47,
+    0x43, 0x55, 0x47, 0x42, 0x47, 0x4f, 0x47, 0x51, 0x46, 0x55, 0x4a, 0x4b,
+    0x50, 0x52, 0x4f, 0x43, 0x4b, 0x53, 0x4d, 0x3f, 0x4e, 0x56, 0x50, 0x49,
+    0x4d, 0x47, 0x51, 0x49, 0x4a, 0x52, 0x44, 0x43, 0x4d, 0x4e, 0x41, 0x51,
+    0x4c, 0x4d, 0x47, 0x48, 0x4f, 0x40, 0x50, 0x46, 0x43, 0x4d, 0x4e, 0x50,
+    0x43, 0x47, 0x4e, 0x46, 0x4f, 0x4b, 0x51, 0x4b, 0x4a, 0x57, 0x42, 0x51,
+    0x4c, 0x54, 0x52, 0x42, 0x4c, 0x42, 0x47, 0x54, 0x4a, 0x4a, 0x47, 0x4a,
+    0x3f, 0x46, 0x4e, 0x4c, 0x53, 0x50, 0x47, 0x53, 0x49, 0x44, 0x52, 0x5a,
+    0x4b, 0x65, 0x50, 0x5b, 0x57, 0x59, 0x4a, 0x48, 0x48, 0x5f, 0x55, 0x48,
+    0x5c, 0x78, 0x55, 0x48, 0x4a, 0x4b, 0x49, 0x4c, 0x46, 0x6b, 0x54, 0x57,
+    0x55, 0x4b, 0x59, 0x52, 0x38, 0x5b, 0x57, 0x56, 0x4b, 0x4f, 0x48, 0x4e,
+    0x34, 0x5a, 0x4e, 0x4f, 0x43, 0x4e, 0x4b, 0x4e, 0x36, 0x4d, 0x52, 0x48,
+    0x4d, 0x4c, 0x4c, 0x49, 0x51, 0x54, 0x45, 0x54, 0x4a, 0x4e, 0x52, 0x41,
+    0x4c, 0x45, 0x4a, 0x53, 0x55, 0x4b, 0x50, 0x47, 0x4e, 0x4d, 0x43, 0x51,
+    0x4e, 0x4a, 0x51, 0x46, 0x4e, 0x4d, 0x48, 0x3f, 0x43, 0x52, 0x56, 0x38,
+    0x52, 0x46, 0x43, 0x49, 0x40, 0x49, 0x53, 0x41, 0x47, 0x41, 0x41, 0x42,
+    0x4f, 0x4b, 0x46, 0x4b, 0x4a, 0x57, 0x4a, 0x45, 0x4b, 0x46, 0x47, 0x3c,
+    0x43, 0x46, 0x4f, 0x50, 0x4c, 0x53, 0x4f, 0x41, 0x4a, 0x4a, 0x40, 0x4a,
+    0x3e, 0x4e, 0x4d, 0x41, 0x4a, 0x42, 0x49, 0x4c, 0x51, 0x46, 0x4f, 0x43,
+    0x4b, 0x41, 0x50, 0x48, 0x4a, 0x40, 0x52, 0x45, 0x40, 0x40, 0x46, 0x48,
+    0x48, 0x52, 0x52, 0x41, 0x43, 0x49, 0x49, 0x4c, 0x44, 0x48, 0x50, 0x4a,
+    0x47, 0x48, 0x4c, 0x42, 0x49, 0x48, 0x52, 0x56, 0x4b, 0x41, 0x4e, 0x47,
+    0x52, 0x56, 0x4e, 0x56, 0x4b, 0x38, 0x50, 0x55, 0x5a, 0x63, 0x51, 0x5a,
+    0x54, 0x52, 0x44, 0x45, 0x47, 0x5e, 0x4c, 0x4a, 0x5e, 0x71, 0x56, 0x44,
+    0x4c, 0x4b, 0x4c, 0x4e, 0x49, 0x69, 0x50, 0x53, 0x4d, 0x5c, 0x59, 0x50,
+    0x36, 0x5d, 0x46, 0x5b, 0x51, 0x55, 0x55, 0x51, 0x36, 0x5a, 0x53, 0x56,
+    0x54, 0x4a, 0x55, 0x53, 0x3c, 0x52, 0x4a, 0x45, 0x4c, 0x56, 0x49, 0x46,
+    0x4f, 0x5b, 0x43, 0x4b, 0x49, 0x4c, 0x4b, 0x41, 0x44, 0x4b, 0x47, 0x4b,
+    0x4b, 0x54, 0x4a, 0x4c, 0x49, 0x44, 0x46, 0x46, 0x48, 0x49, 0x47, 0x4a,
+    0x40, 0x4e, 0x47, 0x53, 0x4a, 0x47, 0x4a, 0x3b, 0x48, 0x4b, 0x50, 0x51,
+    0x50, 0x44, 0x4d, 0x49, 0x42, 0x4b, 0x43, 0x48, 0x4a, 0x43, 0x4d, 0x4d,
+    0x49, 0x4d, 0x43, 0x4f, 0x50, 0x49, 0x47, 0x48, 0x48, 0x4f, 0x49, 0x41,
+    0x4c, 0x46, 0x47, 0x3e, 0x51, 0x4d, 0x4e, 0x42, 0x3d, 0x53, 0x4d, 0x3b,
+    0x53, 0x52, 0x4c, 0x4c, 0x43, 0x46, 0x43, 0x3d, 0x53, 0x48, 0x43, 0x4e,
+    0x45, 0x52, 0x4d, 0x4a, 0x44, 0x49, 0x47, 0x4c, 0x4e, 0x4c, 0x4a, 0x4e,
+    0x41, 0x48, 0x4b, 0x44, 0x4d, 0x4a, 0x4d, 0x44, 0x4a, 0x45, 0x4f, 0x52,
+    0x45, 0x3f, 0x4b, 0x48, 0x43, 0x41, 0x3d, 0x53, 0x53, 0x50, 0x4a, 0x56,
+    0x4d, 0x3e, 0x55, 0x4e, 0x56, 0x5e, 0x52, 0x52, 0x54, 0x50, 0x42, 0x4a,
+    0x4d, 0x5f, 0x4f, 0x49, 0x5d, 0x6f, 0x55, 0x4a, 0x47, 0x49, 0x4e, 0x4a,
+    0x43, 0x6e, 0x4e, 0x4f, 0x52, 0x59, 0x62, 0x4b, 0x3e, 0x5c, 0x4c, 0x4e,
+    0x45, 0x52, 0x43, 0x4d, 0x3c, 0x58, 0x52, 0x49, 0x48, 0x55, 0x53, 0x4e,
+    0x3d, 0x4e, 0x4c, 0x4b, 0x4b, 0x50, 0x4a, 0x47, 0x45, 0x62, 0x50, 0x49,
+    0x48, 0x4b, 0x55, 0x45, 0x46, 0x51, 0x41, 0x55, 0x54, 0x55, 0x50, 0x47,
+    0x46, 0x4d, 0x46, 0x4b, 0x41, 0x49, 0x4c, 0x40, 0x45, 0x4f, 0x52, 0x54,
+    0x45, 0x4d, 0x53, 0x3a, 0x4c, 0x55, 0x4e, 0x48, 0x44, 0x45, 0x56, 0x3c,
+    0x48, 0x46, 0x4b, 0x51, 0x53, 0x43, 0x41, 0x49, 0x4c, 0x52, 0x48, 0x42,
+    0x48, 0x3f, 0x4c, 0x38, 0x46, 0x50, 0x4a, 0x44, 0x50, 0x54, 0x4e, 0x38,
+    0x48, 0x42, 0x43, 0x4a, 0x4c, 0x44, 0x47, 0x42, 0x42, 0x46, 0x4a, 0x50,
+    0x47, 0x4b, 0x43, 0x40, 0x44, 0x46, 0x46, 0x4d, 0x50, 0x4a, 0x4e, 0x51,
+    0x44, 0x40, 0x50, 0x43, 0x52, 0x4d, 0x42, 0x4c, 0x50, 0x41, 0x4a, 0x4e,
+    0x45, 0x49, 0x4d, 0x40, 0x46, 0x51, 0x43, 0x4b, 0x48, 0x47, 0x42, 0x55,
+    0x4a, 0x41, 0x4f, 0x49, 0x4f, 0x4e, 0x47, 0x4c, 0x4a, 0x48, 0x50, 0x4e,
+    0x50, 0x57, 0x4e, 0x56, 0x56, 0x4e, 0x44, 0x48, 0x4a, 0x5b, 0x55, 0x49,
+    0x59, 0x67, 0x54, 0x46, 0x4f, 0x41, 0x4d, 0x4e, 0x4a, 0x63, 0x4d, 0x44,
+    0x53, 0x5b, 0x59, 0x4f, 0x43, 0x55, 0x56, 0x4e, 0x55, 0x4c, 0x4b, 0x54,
+    0x3c, 0x56, 0x4d, 0x50, 0x4f, 0x4a, 0x5a, 0x47, 0x48, 0x56, 0x4f, 0x4f,
+    0x50, 0x51, 0x48, 0x4e, 0x4d, 0x50, 0x4e, 0x45, 0x4b, 0x48, 0x4e, 0x44,
+    0x46, 0x4d, 0x43, 0x46, 0x41, 0x59, 0x53, 0x4b, 0x4a, 0x3e, 0x51, 0x47,
+    0x43, 0x48, 0x52, 0x3f, 0x43, 0x50, 0x4b, 0x4f, 0x41, 0x48, 0x43, 0x2e,
+    0x4d, 0x4e, 0x4c, 0x45, 0x45, 0x46, 0x4b, 0x43, 0x46, 0x49, 0x46, 0x4d,
+    0x47, 0x4e, 0x4d, 0x3c, 0x47, 0x4a, 0x52, 0x4e, 0x41, 0x50, 0x43, 0x3a,
+    0x50, 0x47, 0x4a, 0x45, 0x52, 0x4a, 0x4c, 0x3f, 0x42, 0x3d, 0x49, 0x48,
+    0x48, 0x4c, 0x42, 0x3a, 0x40, 0x47, 0x46, 0x4e, 0x44, 0x52, 0x46, 0x44,
+    0x4a, 0x44, 0x43, 0x49, 0x42, 0x45, 0x3f, 0x50, 0x4c, 0x44, 0x48, 0x43,
+    0x47, 0x4a, 0x48, 0x48, 0x3e, 0x45, 0x43, 0x48, 0x4a, 0x48, 0x53, 0x4b,
+    0x50, 0x49, 0x43, 0x4d, 0x53, 0x4f, 0x4b, 0x4b, 0x40, 0x42, 0x50, 0x4d,
+    0x53, 0x4e, 0x44, 0x4d, 0x45, 0x3d, 0x51, 0x51, 0x4f, 0x59, 0x4b, 0x51,
+    0x4a, 0x4e, 0x42, 0x40, 0x49, 0x5b, 0x4b, 0x43, 0x53, 0x60, 0x47, 0x49,
+    0x4a, 0x44, 0x44, 0x48, 0x4b, 0x60, 0x51, 0x3f, 0x4b, 0x5b, 0x4f, 0x4a,
+    0x4a, 0x50, 0x49, 0x46, 0x55, 0x50, 0x4b, 0x4c, 0x40, 0x4e, 0x51, 0x4f,
+    0x4b, 0x51, 0x54, 0x50, 0x48, 0x4e, 0x4a, 0x4f, 0x4d, 0x4e, 0x54, 0x4d,
+    0x41, 0x50, 0x4e, 0x47, 0x47, 0x47, 0x54, 0x3b, 0x51, 0x54, 0x50, 0x49,
+    0x48, 0x4c, 0x4e, 0x47, 0x3f, 0x3c, 0x4c, 0x43, 0x45, 0x42, 0x45, 0x37,
+    0x41, 0x52, 0x49, 0x47, 0x4e, 0x4a, 0x4b, 0x37, 0x48, 0x4d, 0x4e, 0x4a,
+    0x42, 0x56, 0x3d, 0x35, 0x48, 0x42, 0x4b, 0x4a, 0x44, 0x52, 0x40, 0x48,
+    0x4f, 0x49, 0x4f, 0x4c, 0x4d, 0x43, 0x49, 0x38, 0x4b, 0x42, 0x48, 0x42,
+    0x45, 0x45, 0x54, 0x3a, 0x47, 0x47, 0x52, 0x45, 0x4a, 0x48, 0x47, 0x39,
+    0x4d, 0x45, 0x54, 0x4b, 0x4e, 0x4f, 0x4e, 0x38, 0x4a, 0x4b, 0x48, 0x45,
+    0x4e, 0x43, 0x4e, 0x4e, 0x46, 0x4e, 0x4e, 0x50, 0x46, 0x4c, 0x42, 0x45,
+    0x4b, 0x46, 0x47, 0x4d, 0x49, 0x3f, 0x4f, 0x50, 0x46, 0x4a, 0x47, 0x4e,
+    0x4a, 0x3e, 0x50, 0x46, 0x47, 0x40, 0x4f, 0x47, 0x51, 0x4b, 0x43, 0x46,
+    0x4a, 0x42, 0x55, 0x4d, 0x46, 0x63, 0x49, 0x4e, 0x4f, 0x4f, 0x42, 0x45,
+    0x50, 0x57, 0x49, 0x3e, 0x57, 0x63, 0x45, 0x4a, 0x49, 0x50, 0x41, 0x4a,
+    0x48, 0x64, 0x4f, 0x42, 0x47, 0x58, 0x4b, 0x45, 0x43, 0x57, 0x49, 0x58,
+    0x51, 0x51, 0x47, 0x43, 0x51, 0x4b, 0x4a, 0x45, 0x50, 0x54, 0x4d, 0x4d,
+    0x3e, 0x4a, 0x50, 0x40, 0x51, 0x4f, 0x52, 0x48, 0x53, 0x49, 0x44, 0x4b,
+    0x51, 0x4b, 0x50, 0x42, 0x4d, 0x49, 0x4a, 0x46, 0x44, 0x50, 0x47, 0x3f,
+    0x48, 0x47, 0x41, 0x4a, 0x42, 0x52, 0x4a, 0x33, 0x50, 0x50, 0x54, 0x3f,
+    0x44, 0x4e, 0x51, 0x3c, 0x4e, 0x51, 0x48, 0x4b, 0x47, 0x49, 0x3f, 0x3d,
+    0x4e, 0x46, 0x4a, 0x41, 0x40, 0x50, 0x49, 0x40, 0x4a, 0x4b, 0x45, 0x50,
+    0x4e, 0x4d, 0x4b, 0x39, 0x4e, 0x4b, 0x48, 0x3c, 0x47, 0x44, 0x4c, 0x42,
+    0x45, 0x50, 0x3e, 0x54, 0x4d, 0x49, 0x48, 0x3c, 0x45, 0x42, 0x55, 0x4a,
+    0x41, 0x4f, 0x40, 0x3f, 0x47, 0x46, 0x46, 0x44, 0x4f, 0x47, 0x46, 0x44,
+    0x41, 0x40, 0x44, 0x48, 0x3e, 0x3c, 0x46, 0x3e, 0x4a, 0x45, 0x4c, 0x52,
+    0x47, 0x42, 0x47, 0x3f, 0x47, 0x4e, 0x4b, 0x53, 0x4a, 0x3d, 0x4d, 0x47,
+    0x4f, 0x3d, 0x4e, 0x43, 0x4f, 0x46, 0x43, 0x43, 0x46, 0x41, 0x4f, 0x42,
+    0x46, 0x57, 0x4d, 0x51, 0x49, 0x51, 0x4c, 0x44, 0x51, 0x4f, 0x46, 0x44,
+    0x54, 0x5d, 0x4f, 0x40, 0x59, 0x46, 0x53, 0x46, 0x48, 0x54, 0x43, 0x45,
+    0x4d, 0x51, 0x4f, 0x44, 0x44, 0x53, 0x49, 0x4e, 0x48, 0x46, 0x44, 0x4a,
+    0x4a, 0x42, 0x4c, 0x46, 0x54, 0x4f, 0x52, 0x47, 0x46, 0x44, 0x4c, 0x4d,
+    0x4c, 0x47, 0x4d, 0x40, 0x55, 0x58, 0x46, 0x46, 0x3f, 0x3e, 0x47, 0x36,
+    0x3f, 0x4d, 0x4b, 0x4d, 0x4f, 0x4f, 0x48, 0x34, 0x4d, 0x46, 0x46, 0x50,
+    0x50, 0x4b, 0x47, 0x45, 0x4e, 0x49, 0x50, 0x4f, 0x4a, 0x48, 0x4f, 0x39,
+    0x53, 0x4c, 0x4b, 0x56, 0x45, 0x4f, 0x55, 0x3a, 0x40, 0x53, 0x43, 0x4b,
+    0x47, 0x3d, 0x4c, 0x34, 0x4b, 0x4e, 0x4a, 0x4b, 0x4d, 0x49, 0x4e, 0x40,
+    0x4d, 0x48, 0x40, 0x4a, 0x4a, 0x4b, 0x4a, 0x42, 0x4c, 0x52, 0x43, 0x42,
+    0x44, 0x3f, 0x4e, 0x42, 0x44, 0x45, 0x40, 0x3d, 0x4b, 0x45, 0x4a, 0x43,
+    0x4b, 0x4b, 0x4e, 0x46, 0x55, 0x43, 0x44, 0x3f, 0x44, 0x43, 0x4b, 0x4b,
+    0x45, 0x51, 0x48, 0x49, 0x3d, 0x44, 0x4a, 0x4a, 0x50, 0x50, 0x47, 0x44,
+    0x4f, 0x3e, 0x3f, 0x43, 0x4c, 0x46, 0x4a, 0x4e, 0x4c, 0x52, 0x48, 0x4e,
+    0x48, 0x46, 0x45, 0x48, 0x41, 0x4f, 0x51, 0x48, 0x40, 0x4d, 0x4a, 0x4b,
+    0x4c, 0x51, 0x49, 0x50, 0x4e, 0x4b, 0x4a, 0x42, 0x49, 0x54, 0x4e, 0x43,
+    0x52, 0x47, 0x4a, 0x41, 0x42, 0x51, 0x48, 0x4a, 0x46, 0x45, 0x4a, 0x43,
+    0x4e, 0x4f, 0x41, 0x49, 0x4b, 0x42, 0x40, 0x4a, 0x50, 0x41, 0x42, 0x3f,
+    0x49, 0x4a, 0x40, 0x3e, 0x3f, 0x42, 0x4d, 0x51, 0x4e, 0x4e, 0x47, 0x41,
+    0x4e, 0x4e, 0x49, 0x4b, 0x41, 0x45, 0x51, 0x40, 0x45, 0x4c, 0x3f, 0x42,
+    0x4c, 0x45, 0x4d, 0x39, 0x46, 0x52, 0x4a, 0x4e, 0x4c, 0x49, 0x4e, 0x43,
+    0x43, 0x4c, 0x48, 0x46, 0x48, 0x49, 0x50, 0x3a, 0x3f, 0x49, 0x42, 0x4f,
+    0x42, 0x4d, 0x4e, 0x3f, 0x51, 0x4b, 0x4e, 0x4b, 0x51, 0x44, 0x43, 0x4a,
+    0x4a, 0x4c, 0x50, 0x48, 0x45, 0x47, 0x4d, 0x41, 0x47, 0x45, 0x51, 0x41,
+    0x42, 0x48, 0x4c, 0x39, 0x51, 0x45, 0x46, 0x53, 0x4b, 0x50, 0x46, 0x45,
+    0x4b, 0x4d, 0x42, 0x4b, 0x3f, 0x45, 0x4b, 0x4e, 0x50, 0x50, 0x47, 0x4a,
+    0x45, 0x40, 0x4b, 0x43, 0x3f, 0x4a, 0x41, 0x42, 0x51, 0x41, 0x4d, 0x42,
+    0x53, 0x48, 0x48, 0x49, 0x4b, 0x40, 0x42, 0x3d, 0x4f, 0x53, 0x49, 0x46,
+    0x46, 0x43, 0x42, 0x44, 0x46, 0x48, 0x3f, 0x46, 0x31, 0x43, 0x4d, 0x4b,
+    0x48, 0x4d, 0x4c, 0x43, 0x45, 0x53, 0x50, 0x40, 0x4a, 0x48, 0x45, 0x3b,
+    0x4f, 0x4d, 0x53, 0x4c, 0x44, 0x54, 0x50, 0x66, 0x3f, 0x45, 0x4c, 0x4c,
+    0x4a, 0x49, 0x49, 0x4a, 0x40, 0x52, 0x3e, 0x4c, 0x49, 0x40, 0x44, 0x49,
+    0x48, 0x3f, 0x45, 0x5b, 0x49, 0x4b, 0x4c, 0x44, 0x50, 0x4e, 0x4a, 0x4a,
+    0x49, 0x4e, 0x4f, 0x47, 0x46, 0x4b, 0x44, 0x3b, 0x4e, 0x4b, 0x48, 0x46,
+    0x45, 0x45, 0x3d, 0x35, 0x4c, 0x49, 0x54, 0x42, 0x51, 0x46, 0x49, 0x2d,
+    0x43, 0x4a, 0x53, 0x49, 0x49, 0x42, 0x4f, 0x40, 0x4e, 0x50, 0x54, 0x51,
+    0x4b, 0x45, 0x48, 0x35, 0x4d, 0x41, 0x51, 0x40, 0x41, 0x49, 0x4a, 0x3b,
+    0x45, 0x50, 0x48, 0x51, 0x51, 0x4d, 0x4c, 0x36, 0x47, 0x4a, 0x44, 0x45,
+    0x4d, 0x47, 0x43, 0x3a, 0x48, 0x40, 0x42, 0x4f, 0x4f, 0x4f, 0x4f, 0x43,
+    0x4a, 0x41, 0x4b, 0x53, 0x43, 0x46, 0x4f, 0x39, 0x46, 0x4a, 0x4d, 0x53,
+    0x41, 0x44, 0x4e, 0x44, 0x3f, 0x47, 0x4c, 0x4d, 0x4d, 0x43, 0x45, 0x3d,
+    0x43, 0x4b, 0x3e, 0x48, 0x42, 0x4c, 0x47, 0x42, 0x42, 0x50, 0x49, 0x4b,
+    0x43, 0x4e, 0x44, 0x44, 0x4c, 0x3d, 0x4c, 0x47, 0x4e, 0x42, 0x4b, 0x44,
+    0x4b, 0x44, 0x3f, 0x49, 0x33, 0x46, 0x4a, 0x4a, 0x42, 0x57, 0x5e, 0x4a,
+    0x46, 0x4f, 0x55, 0x3c, 0x4a, 0x4b, 0x4c, 0x43, 0x51, 0x59, 0x64, 0x51,
+    0x45, 0x60, 0x4b, 0x65, 0x46, 0x4a, 0x4e, 0x49, 0x41, 0x4b, 0x50, 0x5c,
+    0x48, 0x4b, 0x3e, 0x52, 0x4f, 0x2f, 0x4e, 0x4a, 0x45, 0x53, 0x48, 0x59,
+    0x4c, 0x4e, 0x4a, 0x4d, 0x49, 0x40, 0x52, 0x44, 0x49, 0x46, 0x4e, 0x46,
+    0x42, 0x4b, 0x4a, 0x4b, 0x4b, 0x4b, 0x4f, 0x52, 0x46, 0x50, 0x4d, 0x3d,
+    0x46, 0x4b, 0x4b, 0x40, 0x4d, 0x3f, 0x43, 0x33, 0x4e, 0x53, 0x4b, 0x4a,
+    0x45, 0x48, 0x4c, 0x2e, 0x48, 0x4f, 0x49, 0x42, 0x54, 0x4f, 0x4b, 0x2b,
+    0x55, 0x4e, 0x43, 0x4d, 0x4d, 0x47, 0x42, 0x3e, 0x48, 0x48, 0x4d, 0x54,
+    0x52, 0x4f, 0x43, 0x37, 0x4b, 0x42, 0x4b, 0x4e, 0x49, 0x49, 0x4b, 0x2e,
+    0x45, 0x4e, 0x48, 0x4e, 0x44, 0x49, 0x48, 0x30, 0x4c, 0x4b, 0x3f, 0x42,
+    0x4f, 0x4f, 0x4e, 0x38, 0x4f, 0x42, 0x54, 0x49, 0x41, 0x42, 0x45, 0x3a,
+    0x47, 0x43, 0x43, 0x4b, 0x49, 0x40, 0x4d, 0x38, 0x52, 0x4c, 0x3d, 0x4d,
+    0x43, 0x54, 0x4e, 0x41, 0x4a, 0x47, 0x44, 0x51, 0x47, 0x48, 0x41, 0x47,
+    0x4d, 0x41, 0x46, 0x4c, 0x4d, 0x46, 0x51, 0x4a, 0x49, 0x46, 0x4a, 0x42,
+    0x3a, 0x43, 0x4a, 0x4b, 0x43, 0x4c, 0x68, 0x44, 0x4b, 0x52, 0x50, 0x37,
+    0x4d, 0x4c, 0x57, 0x4c, 0x68, 0x62, 0x64, 0x4a, 0x3e, 0x64, 0x4b, 0x66,
+    0x48, 0x4d, 0x54, 0x57, 0x4b, 0x52, 0x49, 0x5c, 0x4d, 0x55, 0x51, 0x57,
+    0x4c, 0x3a, 0x48, 0x43, 0x3b, 0x43, 0x52, 0x5d, 0x45, 0x4e, 0x51, 0x4d,
+    0x4a, 0x55, 0x4e, 0x4c, 0x44, 0x51, 0x4c, 0x4f, 0x41, 0x4f, 0x4a, 0x43,
+    0x53, 0x48, 0x47, 0x49, 0x46, 0x52, 0x48, 0x3e, 0x4b, 0x4e, 0x4a, 0x50,
+    0x4f, 0x47, 0x3e, 0x2e, 0x4b, 0x51, 0x4a, 0x44, 0x4c, 0x49, 0x4f, 0x26,
+    0x48, 0x4f, 0x44, 0x51, 0x48, 0x3f, 0x4c, 0x30, 0x4e, 0x48, 0x4d, 0x48,
+    0x48, 0x44, 0x4b, 0x2f, 0x50, 0x41, 0x4d, 0x50, 0x52, 0x42, 0x45, 0x33,
+    0x4c, 0x48, 0x48, 0x3d, 0x46, 0x41, 0x43, 0x38, 0x45, 0x4f, 0x48, 0x4b,
+    0x41, 0x49, 0x4c, 0x2f, 0x53, 0x4c, 0x48, 0x4a, 0x47, 0x40, 0x4a, 0x31,
+    0x52, 0x40, 0x49, 0x4c, 0x3f, 0x48, 0x48, 0x39, 0x48, 0x3f, 0x45, 0x43,
+    0x40, 0x48, 0x3c, 0x40, 0x4c, 0x48, 0x48, 0x4d, 0x3e, 0x42, 0x4a, 0x3d,
+    0x4c, 0x45, 0x44, 0x46, 0x44, 0x45, 0x4a, 0x47, 0x52, 0x48, 0x4a, 0x4d,
+    0x3f, 0x49, 0x4c, 0x4c, 0x48, 0x44, 0x4c, 0x44, 0x3d, 0x41, 0x47, 0x45,
+    0x43, 0x4a, 0x5a, 0x3f, 0x48, 0x5d, 0x50, 0x35, 0x47, 0x4f, 0x5b, 0x46,
+    0x6e, 0x50, 0x6d, 0x44, 0x49, 0x6a, 0x53, 0x6b, 0x4b, 0x4b, 0x4f, 0x62,
+    0x45, 0x57, 0x48, 0x5b, 0x40, 0x4b, 0x4f, 0x63, 0x48, 0x3a, 0x4b, 0x42,
+    0x43, 0x53, 0x41, 0x5f, 0x54, 0x3e, 0x4d, 0x43, 0x3d, 0x4c, 0x46, 0x46,
+    0x49, 0x56, 0x4b, 0x45, 0x47, 0x45, 0x4e, 0x4f, 0x4c, 0x4d, 0x4f, 0x47,
+    0x49, 0x4b, 0x51, 0x33, 0x4b, 0x45, 0x4d, 0x41, 0x51, 0x4a, 0x43, 0x2a,
+    0x50, 0x4b, 0x4a, 0x4b, 0x4c, 0x52, 0x4c, 0x3b, 0x45, 0x4c, 0x51, 0x44,
+    0x4c, 0x48, 0x43, 0x35, 0x51, 0x50, 0x48, 0x49, 0x3f, 0x48, 0x3d, 0x3b,
+    0x52, 0x3f, 0x42, 0x4b, 0x49, 0x49, 0x47, 0x38, 0x4a, 0x4a, 0x41, 0x52,
+    0x41, 0x3e, 0x4b, 0x2f, 0x46, 0x4d, 0x49, 0x44, 0x46, 0x3b, 0x47, 0x36,
+    0x46, 0x3f, 0x49, 0x48, 0x47, 0x42, 0x42, 0x35, 0x44, 0x4b, 0x4d, 0x56,
+    0x50, 0x49, 0x43, 0x42, 0x4b, 0x3e, 0x53, 0x44, 0x4a, 0x43, 0x47, 0x38,
+    0x4a, 0x45, 0x4d, 0x3f, 0x46, 0x4a, 0x47, 0x3a, 0x4c, 0x3e, 0x47, 0x45,
+    0x46, 0x4b, 0x45, 0x49, 0x4a, 0x4b, 0x54, 0x49, 0x4a, 0x53, 0x4a, 0x4c,
+    0x45, 0x48, 0x53, 0x42, 0x4b, 0x47, 0x4e, 0x50, 0x3d, 0x51, 0x60, 0x3e,
+    0x53, 0x5d, 0x51, 0x30, 0x45, 0x50, 0x59, 0x4e, 0x62, 0x52, 0x68, 0x51,
+    0x45, 0x6c, 0x4c, 0x64, 0x4d, 0x47, 0x55, 0x61, 0x44, 0x57, 0x44, 0x58,
+    0x44, 0x4a, 0x53, 0x58, 0x47, 0x31, 0x3f, 0x4c, 0x43, 0x45, 0x48, 0x5e,
+    0x41, 0x43, 0x3f, 0x43, 0x51, 0x46, 0x48, 0x4b, 0x4d, 0x5b, 0x45, 0x4b,
+    0x48, 0x46, 0x3f, 0x45, 0x47, 0x45, 0x40, 0x4a, 0x51, 0x51, 0x3d, 0x3f,
+    0x43, 0x45, 0x4d, 0x4a, 0x47, 0x50, 0x49, 0x32, 0x4c, 0x5a, 0x55, 0x4f,
+    0x4c, 0x51, 0x43, 0x37, 0x40, 0x59, 0x49, 0x49, 0x4e, 0x4f, 0x47, 0x34,
+    0x40, 0x4c, 0x4a, 0x41, 0x4a, 0x47, 0x4a, 0x42, 0x4e, 0x4a, 0x48, 0x4e,
+    0x4e, 0x4e, 0x45, 0x39, 0x4e, 0x45, 0x45, 0x4e, 0x4c, 0x48, 0x4a, 0x35,
+    0x45, 0x4c, 0x49, 0x4f, 0x51, 0x43, 0x3c, 0x3a, 0x4a, 0x4a, 0x46, 0x48,
+    0x49, 0x42, 0x4e, 0x2f, 0x42, 0x4e, 0x45, 0x50, 0x51, 0x40, 0x45, 0x32,
+    0x4a, 0x4d, 0x44, 0x4e, 0x48, 0x48, 0x47, 0x2f, 0x48, 0x4b, 0x49, 0x44,
+    0x48, 0x4d, 0x46, 0x3b, 0x46, 0x4a, 0x41, 0x4e, 0x4e, 0x47, 0x54, 0x4b,
+    0x45, 0x49, 0x45, 0x44, 0x45, 0x48, 0x4a, 0x46, 0x55, 0x49, 0x47, 0x49,
+    0x4b, 0x42, 0x48, 0x4f, 0x3f, 0x52, 0x60, 0x39, 0x4b, 0x5e, 0x55, 0x2e,
+    0x48, 0x50, 0x59, 0x4f, 0x68, 0x5f, 0x64, 0x4f, 0x3b, 0x71, 0x50, 0x63,
+    0x4f, 0x50, 0x50, 0x6c, 0x4b, 0x55, 0x47, 0x5b, 0x4c, 0x40, 0x48, 0x59,
+    0x4f, 0x2e, 0x4b, 0x4c, 0x4e, 0x4e, 0x46, 0x61, 0x50, 0x41, 0x4c, 0x4a,
+    0x44, 0x3e, 0x3f, 0x47, 0x4b, 0x4f, 0x47, 0x4b, 0x47, 0x3d, 0x41, 0x49,
+    0x49, 0x3f, 0x4d, 0x44, 0x4a, 0x4d, 0x45, 0x41, 0x4d, 0x43, 0x49, 0x3c,
+    0x49, 0x57, 0x49, 0x3b, 0x49, 0x59, 0x3f, 0x4f, 0x4e, 0x49, 0x4e, 0x46,
+    0x52, 0x4e, 0x4c, 0x54, 0x4a, 0x48, 0x48, 0x3a, 0x44, 0x4a, 0x4f, 0x4a,
+    0x44, 0x4b, 0x43, 0x4d, 0x51, 0x42, 0x53, 0x4d, 0x52, 0x41, 0x4d, 0x43,
+    0x4e, 0x54, 0x4b, 0x42, 0x4b, 0x3f, 0x53, 0x45, 0x3f, 0x4a, 0x45, 0x50,
+    0x3f, 0x4c, 0x4f, 0x43, 0x46, 0x42, 0x4b, 0x4d, 0x4c, 0x3b, 0x48, 0x40,
+    0x4e, 0x4e, 0x49, 0x46, 0x4d, 0x4d, 0x52, 0x40, 0x4e, 0x4f, 0x46, 0x4a,
+    0x40, 0x4b, 0x4c, 0x40, 0x4f, 0x4a, 0x44, 0x41, 0x46, 0x3c, 0x40, 0x3d,
+    0x44, 0x48, 0x4a, 0x50, 0x46, 0x53, 0x46, 0x40, 0x44, 0x3e, 0x47, 0x43,
+    0x48, 0x3d, 0x4e, 0x3e, 0x48, 0x49, 0x4b, 0x49, 0x4c, 0x3e, 0x4c, 0x4a,
+    0x46, 0x4e, 0x62, 0x3c, 0x59, 0x60, 0x51, 0x29, 0x47, 0x52, 0x59, 0x4c,
+    0x67, 0x68, 0x68, 0x4e, 0x3b, 0x72, 0x4d, 0x68, 0x44, 0x4f, 0x53, 0x63,
+    0x47, 0x5a, 0x45, 0x4f, 0x4b, 0x37, 0x43, 0x5b, 0x4b, 0x3d, 0x44, 0x41,
+    0x4a, 0x4b, 0x3c, 0x64, 0x48, 0x38, 0x42, 0x3f, 0x48, 0x46, 0x4b, 0x46,
+    0x46, 0x4f, 0x46, 0x46, 0x44, 0x3c, 0x4b, 0x4f, 0x4d, 0x4a, 0x4b, 0x46,
+    0x4d, 0x4f, 0x4f, 0x3f, 0x3a, 0x4b, 0x55, 0x3c, 0x51, 0x56, 0x4d, 0x42,
+    0x52, 0x5a, 0x3e, 0x4b, 0x54, 0x57, 0x4e, 0x4d, 0x4e, 0x5b, 0x4e, 0x49,
+    0x4e, 0x3c, 0x40, 0x41, 0x40, 0x4d, 0x48, 0x42, 0x49, 0x4e, 0x4f, 0x47,
+    0x47, 0x48, 0x50, 0x49, 0x51, 0x46, 0x44, 0x45, 0x49, 0x46, 0x43, 0x48,
+    0x48, 0x49, 0x4d, 0x4c, 0x45, 0x4f, 0x4c, 0x45, 0x44, 0x40, 0x49, 0x45,
+    0x49, 0x51, 0x4b, 0x4b, 0x50, 0x4b, 0x48, 0x3d, 0x4e, 0x52, 0x4a, 0x47,
+    0x49, 0x41, 0x55, 0x3d, 0x48, 0x4d, 0x49, 0x48, 0x4e, 0x4c, 0x48, 0x3d,
+    0x3f, 0x4c, 0x4e, 0x53, 0x3e, 0x48, 0x4a, 0x3f, 0x54, 0x4d, 0x54, 0x4b,
+    0x47, 0x4e, 0x44, 0x48, 0x49, 0x4b, 0x4c, 0x49, 0x4d, 0x42, 0x52, 0x4b,
+    0x40, 0x3e, 0x54, 0x49, 0x55, 0x45, 0x47, 0x4d, 0x45, 0x5c, 0x60, 0x40,
+    0x57, 0x60, 0x5b, 0x27, 0x4a, 0x5a, 0x64, 0x53, 0x6a, 0x5a, 0x5f, 0x52,
+    0x3a, 0x72, 0x4b, 0x5f, 0x45, 0x56, 0x5f, 0x5f, 0x54, 0x5f, 0x39, 0x52,
+    0x51, 0x3e, 0x3b, 0x5a, 0x44, 0x32, 0x46, 0x50, 0x3a, 0x4f, 0x44, 0x5d,
+    0x4c, 0x41, 0x39, 0x3f, 0x45, 0x46, 0x3b, 0x43, 0x46, 0x51, 0x3c, 0x4c,
+    0x4b, 0x43, 0x4b, 0x51, 0x43, 0x48, 0x4d, 0x43, 0x38, 0x46, 0x46, 0x43,
+    0x44, 0x4a, 0x46, 0x49, 0x48, 0x50, 0x4e, 0x4a, 0x4e, 0x58, 0x4a, 0x49,
+    0x48, 0x4f, 0x4a, 0x49, 0x41, 0x57, 0x51, 0x50, 0x4b, 0x48, 0x47, 0x4b,
+    0x53, 0x3d, 0x4b, 0x4c, 0x4b, 0x4b, 0x55, 0x56, 0x45, 0x49, 0x46, 0x4c,
+    0x45, 0x51, 0x47, 0x50, 0x40, 0x4b, 0x4f, 0x4b, 0x4d, 0x4a, 0x4f, 0x50,
+    0x49, 0x53, 0x50, 0x46, 0x40, 0x48, 0x4a, 0x4a, 0x49, 0x4a, 0x42, 0x45,
+    0x4b, 0x45, 0x42, 0x45, 0x4e, 0x4e, 0x44, 0x41, 0x4b, 0x4a, 0x49, 0x3f,
+    0x41, 0x51, 0x48, 0x4c, 0x40, 0x41, 0x51, 0x42, 0x49, 0x49, 0x48, 0x42,
+    0x48, 0x4c, 0x4b, 0x3c, 0x49, 0x45, 0x42, 0x49, 0x4c, 0x46, 0x45, 0x43,
+    0x43, 0x48, 0x48, 0x41, 0x43, 0x42, 0x4c, 0x4b, 0x40, 0x45, 0x44, 0x46,
+    0x4c, 0x4b, 0x4e, 0x4d, 0x3f, 0x59, 0x55, 0x41, 0x56, 0x5a, 0x51, 0x30,
+    0x49, 0x5a, 0x63, 0x4d, 0x61, 0x5b, 0x64, 0x55, 0x34, 0x7a, 0x4c, 0x62,
+    0x3e, 0x5d, 0x56, 0x60, 0x48, 0x61, 0x3f, 0x54, 0x46, 0x40, 0x42, 0x56,
+    0x52, 0x35, 0x4c, 0x59, 0x45, 0x4c, 0x42, 0x60, 0x49, 0x3f, 0x4c, 0x3c,
+    0x52, 0x36, 0x46, 0x3d, 0x58, 0x4b, 0x41, 0x48, 0x3e, 0x45, 0x4e, 0x54,
+    0x4c, 0x56, 0x47, 0x44, 0x39, 0x4a, 0x4a, 0x4a, 0x46, 0x48, 0x4a, 0x48,
+    0x51, 0x4f, 0x4b, 0x49, 0x45, 0x4b, 0x44, 0x4c, 0x3e, 0x4c, 0x42, 0x59,
+    0x47, 0x55, 0x47, 0x47, 0x41, 0x44, 0x44, 0x4a, 0x44, 0x4b, 0x44, 0x46,
+    0x49, 0x5a, 0x48, 0x5d, 0x4f, 0x4a, 0x47, 0x50, 0x48, 0x4e, 0x44, 0x57,
+    0x49, 0x46, 0x42, 0x4d, 0x3d, 0x4a, 0x4a, 0x58, 0x41, 0x4d, 0x3c, 0x47,
+    0x42, 0x4e, 0x4d, 0x49, 0x44, 0x4b, 0x4c, 0x4b, 0x53, 0x42, 0x4a, 0x46,
+    0x4e, 0x56, 0x4b, 0x47, 0x50, 0x43, 0x4f, 0x48, 0x49, 0x50, 0x48, 0x50,
+    0x42, 0x4c, 0x4e, 0x3c, 0x41, 0x4f, 0x4a, 0x41, 0x44, 0x47, 0x4c, 0x42,
+    0x51, 0x4f, 0x53, 0x46, 0x4c, 0x4b, 0x48, 0x51, 0x47, 0x4b, 0x4c, 0x4d,
+    0x4d, 0x49, 0x3d, 0x44, 0x4b, 0x42, 0x43, 0x49, 0x51, 0x47, 0x4c, 0x4b,
+    0x4a, 0x50, 0x5b, 0x43, 0x5b, 0x68, 0x54, 0x31, 0x4c, 0x5d, 0x5c, 0x54,
+    0x63, 0x5a, 0x61, 0x54, 0x3d, 0x7a, 0x51, 0x5b, 0x40, 0x59, 0x5a, 0x62,
+    0x4c, 0x5e, 0x42, 0x58, 0x49, 0x3c, 0x38, 0x50, 0x54, 0x37, 0x42, 0x51,
+    0x4d, 0x4f, 0x42, 0x68, 0x4a, 0x40, 0x4e, 0x40, 0x3f, 0x3e, 0x3f, 0x40,
+    0x54, 0x52, 0x3e, 0x43, 0x46, 0x4a, 0x48, 0x51, 0x4e, 0x4d, 0x42, 0x47,
+    0x3f, 0x51, 0x47, 0x44, 0x3f, 0x4c, 0x46, 0x47, 0x4f, 0x55, 0x4b, 0x4e,
+    0x4c, 0x51, 0x40, 0x51, 0x47, 0x4a, 0x44, 0x5c, 0x48, 0x54, 0x4b, 0x46,
+    0x49, 0x4b, 0x53, 0x59, 0x43, 0x3e, 0x45, 0x4e, 0x4f, 0x58, 0x4b, 0x64,
+    0x41, 0x4b, 0x45, 0x4a, 0x4c, 0x51, 0x47, 0x57, 0x45, 0x46, 0x43, 0x4f,
+    0x4d, 0x4d, 0x49, 0x58, 0x4b, 0x52, 0x43, 0x4b, 0x45, 0x4c, 0x50, 0x4c,
+    0x4e, 0x4b, 0x40, 0x4c, 0x44, 0x4e, 0x4c, 0x47, 0x41, 0x55, 0x45, 0x4a,
+    0x4c, 0x48, 0x46, 0x41, 0x47, 0x52, 0x44, 0x4f, 0x48, 0x49, 0x4b, 0x47,
+    0x50, 0x4f, 0x42, 0x4a, 0x44, 0x4b, 0x52, 0x43, 0x45, 0x4e, 0x46, 0x49,
+    0x45, 0x52, 0x51, 0x45, 0x44, 0x41, 0x4c, 0x46, 0x4c, 0x4b, 0x44, 0x4d,
+    0x4f, 0x48, 0x44, 0x4d, 0x56, 0x48, 0x50, 0x4f, 0x3b, 0x4e, 0x55, 0x43,
+    0x52, 0x62, 0x57, 0x2c, 0x4d, 0x5e, 0x5e, 0x50, 0x64, 0x5b, 0x6a, 0x55,
+    0x39, 0x7d, 0x4b, 0x5e, 0x43, 0x54, 0x5d, 0x5c, 0x4d, 0x5c, 0x42, 0x51,
+    0x4c, 0x3d, 0x46, 0x51, 0x4c, 0x2a, 0x3e, 0x54, 0x47, 0x48, 0x46, 0x64,
+    0x42, 0x3d, 0x47, 0x3f, 0x42, 0x45, 0x49, 0x3b, 0x59, 0x50, 0x4c, 0x46,
+    0x4d, 0x44, 0x47, 0x4d, 0x4a, 0x50, 0x41, 0x48, 0x43, 0x50, 0x3e, 0x44,
+    0x4b, 0x53, 0x48, 0x49, 0x51, 0x51, 0x4d, 0x57, 0x49, 0x4f, 0x53, 0x50,
+    0x46, 0x4f, 0x41, 0x5d, 0x47, 0x46, 0x49, 0x51, 0x45, 0x41, 0x4a, 0x56,
+    0x4f, 0x4e, 0x4d, 0x4a, 0x3e, 0x55, 0x47, 0x65, 0x48, 0x51, 0x4d, 0x4e,
+    0x46, 0x43, 0x48, 0x5b, 0x48, 0x4f, 0x4f, 0x48, 0x4b, 0x4d, 0x4e, 0x5c,
+    0x4f, 0x4c, 0x54, 0x48, 0x4a, 0x4d, 0x4e, 0x4e, 0x44, 0x48, 0x43, 0x52,
+    0x41, 0x52, 0x48, 0x4f, 0x46, 0x4f, 0x51, 0x41, 0x44, 0x45, 0x41, 0x4b,
+    0x43, 0x4e, 0x4e, 0x42, 0x48, 0x41, 0x45, 0x43, 0x44, 0x43, 0x4c, 0x4c,
+    0x51, 0x54, 0x4c, 0x32, 0x46, 0x52, 0x4e, 0x49, 0x40, 0x4d, 0x43, 0x4f,
+    0x4a, 0x4d, 0x4d, 0x49, 0x46, 0x4c, 0x41, 0x4d, 0x41, 0x3a, 0x50, 0x4c,
+    0x5a, 0x4e, 0x49, 0x53, 0x4d, 0x53, 0x53, 0x3d, 0x52, 0x64, 0x55, 0x2a,
+    0x47, 0x5d, 0x61, 0x51, 0x5b, 0x5d, 0x66, 0x52, 0x3f, 0xfd, 0x55, 0x5a,
+    0x4b, 0x54, 0x5b, 0x60, 0x49, 0x5d, 0x43, 0x57, 0x47, 0x41, 0x45, 0x5e,
+    0x4c, 0x28, 0x3e, 0x40, 0x49, 0x4e, 0x40, 0x69, 0x4a, 0x44, 0x45, 0x43,
+    0x45, 0x3d, 0x39, 0x40, 0x4c, 0x53, 0x4b, 0x3d, 0x4e, 0x43, 0x48, 0x55,
+    0x4d, 0x50, 0x4d, 0x49, 0x4f, 0x48, 0x3e, 0x46, 0x47, 0x56, 0x40, 0x48,
+    0x46, 0x53, 0x50, 0x5d, 0x43, 0x54, 0x49, 0x47, 0x49, 0x4c, 0x48, 0x5d,
+    0x49, 0x51, 0x50, 0x3d, 0x41, 0x47, 0x48, 0x64, 0x4b, 0x44, 0x49, 0x41,
+    0x54, 0x48, 0x3d, 0x6b, 0x4c, 0x5a, 0x48, 0x4e, 0x40, 0x4c, 0x52, 0x5f,
+    0x54, 0x4a, 0x3f, 0x48, 0x43, 0x43, 0x44, 0x66, 0x49, 0x47, 0x43, 0x46,
+    0x47, 0x54, 0x42, 0x54, 0x4b, 0x4e, 0x49, 0x49, 0x49, 0x4b, 0x52, 0x4f,
+    0x43, 0x46, 0x4b, 0x49, 0x54, 0x4b, 0x40, 0x48, 0x47, 0x4a, 0x46, 0x47,
+    0x44, 0x47, 0x4c, 0x37, 0x3f, 0x49, 0x45, 0x44, 0x50, 0x49, 0x44, 0x36,
+    0x4d, 0x40, 0x45, 0x49, 0x53, 0x55, 0x44, 0x42, 0x47, 0x48, 0x46, 0x40,
+    0x4f, 0x4c, 0x41, 0x42, 0x52, 0x3a, 0x43, 0x46, 0x55, 0x51, 0x4e, 0x4f,
+    0x48, 0x51, 0x55, 0x48, 0x52, 0x66, 0x4e, 0x33, 0x49, 0x5b, 0x5f, 0x4b,
+    0x5f, 0x5b, 0x66, 0x52, 0x41, 0x7c, 0x4a, 0x59, 0x47, 0x59, 0x58, 0x67,
+    0x49, 0x5e, 0x44, 0x57, 0x49, 0x4c, 0x43, 0x56, 0x41, 0x27, 0x4c, 0x44,
+    0x51, 0x44, 0x42, 0x65, 0x49, 0x44, 0x40, 0x3d, 0x4d, 0x3e, 0x4c, 0x3c,
+    0x4f, 0x4b, 0x45, 0x44, 0x4d, 0x48, 0x47, 0x54, 0x4d, 0x4e, 0x44, 0x42,
+    0x47, 0x44, 0x3d, 0x49, 0x4e, 0x50, 0x49, 0x45, 0x58, 0x4a, 0x54, 0x5c,
+    0x41, 0x49, 0x4f, 0x42, 0x44, 0x4f, 0x4a, 0x62, 0x48, 0x50, 0x48, 0x43,
+    0x51, 0x53, 0x47, 0x6c, 0x40, 0x46, 0x3d, 0x46, 0x4a, 0x50, 0x43, 0x69,
+    0x49, 0x4f, 0x4a, 0x4c, 0x49, 0x46, 0x43, 0x6a, 0x48, 0x50, 0x49, 0x48,
+    0x48, 0x51, 0x4b, 0x65, 0x42, 0x4b, 0x4d, 0x48, 0x44, 0x4e, 0x49, 0x60,
+    0x44, 0x52, 0x42, 0x42, 0x47, 0x48, 0x4b, 0x51, 0x50, 0x4b, 0x3c, 0x4d,
+    0x4c, 0x44, 0x48, 0x55, 0x51, 0x4c, 0x55, 0x4e, 0x52, 0x4c, 0x4b, 0x39,
+    0x48, 0x42, 0x49, 0x49, 0x49, 0x50, 0x49, 0x32, 0x4e, 0x4b, 0x45, 0x4f,
+    0x42, 0x4b, 0x47, 0x50, 0x48, 0x45, 0x54, 0x49, 0x4c, 0x46, 0x40, 0x46,
+    0x43, 0x3d, 0x51, 0x44, 0x53, 0x4f, 0x54, 0x55, 0x43, 0x4f, 0x5b, 0x47,
+    0x53, 0x6c, 0x57, 0x2e, 0x50, 0x55, 0x5a, 0x4d, 0x57, 0x5d, 0x70, 0x50,
+    0x3f, 0x79, 0x4a, 0x5a, 0x4c, 0x58, 0x59, 0x63, 0x45, 0x69, 0x48, 0x58,
+    0x42, 0x4b, 0x43, 0x5c, 0x46, 0x28, 0x48, 0x49, 0x4c, 0x3f, 0x45, 0x58,
+    0x45, 0x44, 0x47, 0x40, 0x4c, 0x42, 0x3e, 0x37, 0x45, 0x54, 0x48, 0x3b,
+    0x4e, 0x48, 0x43, 0x4a, 0x50, 0x4a, 0x49, 0x46, 0x4c, 0x54, 0x3f, 0x4b,
+    0x4e, 0x56, 0x48, 0x49, 0x49, 0x4c, 0x51, 0x5f, 0x4d, 0x4b, 0x43, 0x4d,
+    0x47, 0x51, 0x43, 0x59, 0x45, 0x4e, 0x4f, 0x45, 0x44, 0x54, 0x44, 0x6d,
+    0x47, 0x51, 0x43, 0x4e, 0x4c, 0x4f, 0x43, 0x6d, 0x48, 0x53, 0x4b, 0x47,
+    0x49, 0x48, 0x46, 0x6a, 0x51, 0x4c, 0x4d, 0x45, 0x4e, 0x47, 0x46, 0x62,
+    0x4a, 0x54, 0x51, 0x4c, 0x47, 0x4d, 0x4a, 0x61, 0x3d, 0x50, 0x4c, 0x4c,
+    0x45, 0x3f, 0x3e, 0x54, 0x3d, 0x53, 0x48, 0x47, 0x52, 0x4b, 0x47, 0x51,
+    0x4f, 0x45, 0x4b, 0x4a, 0x4c, 0x46, 0x44, 0x37, 0x42, 0x50, 0x49, 0x4f,
+    0x51, 0x41, 0x44, 0x38, 0x54, 0x40, 0x51, 0x52, 0x3e, 0x43, 0x44, 0x47,
+    0x49, 0x4b, 0x4b, 0x46, 0x53, 0x54, 0x55, 0x4b, 0x4a, 0x37, 0x43, 0x4a,
+    0x51, 0x47, 0x51, 0x54, 0x43, 0x46, 0x56, 0x3d, 0x54, 0x66, 0x4f, 0x30,
+    0x45, 0x52, 0x5a, 0x43, 0x5c, 0x65, 0x5d, 0x52, 0x32, 0x77, 0x53, 0x5f,
+    0x4a, 0x5a, 0x4f, 0x5e, 0x4e, 0x61, 0x4b, 0x5b, 0x4a, 0x53, 0x3e, 0x61,
+    0x47, 0x24, 0x3e, 0x48, 0x4d, 0x43, 0x40, 0x53, 0x4e, 0x41, 0x43, 0x3d,
+    0x50, 0x49, 0x41, 0x3a, 0x4e, 0x4b, 0x48, 0x49, 0x48, 0x49, 0x46, 0x50,
+    0x4f, 0x4b, 0x47, 0x4b, 0x48, 0x52, 0x3e, 0x4d, 0x4d, 0x59, 0x4c, 0x3e,
+    0x52, 0x49, 0x4f, 0x5e, 0x54, 0x59, 0x47, 0x4d, 0x40, 0x4c, 0x4b, 0x64,
+    0x42, 0x4c, 0x53, 0x46, 0x4e, 0x50, 0x46, 0x6a, 0x41, 0x59, 0x44, 0x4b,
+    0x4f, 0x44, 0x52, 0x6c, 0x54, 0x4e, 0x46, 0x48, 0x42, 0x3d, 0x44, 0x67,
+    0x44, 0x4f, 0x47, 0x54, 0x4c, 0x4f, 0x43, 0x61, 0x4c, 0x54, 0x4f, 0x43,
+    0x49, 0x40, 0x4a, 0x5f, 0x4a, 0x52, 0x47, 0x43, 0x4c, 0x43, 0x49, 0x53,
+    0x4c, 0x4b, 0x43, 0x3d, 0x4e, 0x45, 0x49, 0x50, 0x44, 0x53, 0x4f, 0x48,
+    0x4b, 0x46, 0x44, 0x3c, 0x50, 0x42, 0x43, 0x40, 0x47, 0x43, 0x42, 0x34,
+    0x47, 0x42, 0x3f, 0x4a, 0x48, 0x42, 0x48, 0x4c, 0x42, 0x4c, 0x4e, 0x47,
+    0x48, 0x47, 0x51, 0x51, 0x4d, 0x3d, 0x3e, 0x4b, 0x54, 0x4c, 0x4c, 0x59,
+    0x4f, 0x50, 0x57, 0x3c, 0x54, 0x62, 0x54, 0x35, 0x3d, 0x5a, 0x5b, 0x47,
+    0x59, 0x63, 0x66, 0x4d, 0x3c, 0x79, 0x50, 0x5f, 0x45, 0x58, 0x4e, 0x5d,
+    0x48, 0x61, 0x43, 0x54, 0x47, 0x54, 0x4d, 0x54, 0x4b, 0x25, 0x41, 0x44,
+    0x4c, 0x4a, 0x3b, 0x52, 0x47, 0x3c, 0x45, 0x3c, 0x53, 0x44, 0x44, 0x40,
+    0x50, 0x4c, 0x45, 0x3a, 0x4c, 0x51, 0x44, 0x49, 0x4d, 0x52, 0x4d, 0x4b,
+    0x45, 0x52, 0x3d, 0x50, 0x4a, 0x58, 0x4a, 0x47, 0x4d, 0x47, 0x4e, 0x52,
+    0x4f, 0x4d, 0x4f, 0x49, 0x52, 0x52, 0x4c, 0x5e, 0x47, 0x4d, 0x46, 0x4d,
+    0x4c, 0x48, 0x50, 0x70, 0x41, 0x4a, 0x48, 0x3d, 0x45, 0x48, 0x45, 0x74,
+    0x47, 0x4c, 0x43, 0x4f, 0x4a, 0x4a, 0x40, 0x68, 0x52, 0x49, 0x3e, 0x3e,
+    0x4e, 0x4b, 0x4b, 0x69, 0x42, 0x4f, 0x45, 0x47, 0x3f, 0x45, 0x46, 0x56,
+    0x45, 0x4a, 0x47, 0x44, 0x52, 0x4b, 0x53, 0x4e, 0x4e, 0x46, 0x45, 0x40,
+    0x47, 0x4b, 0x53, 0x52, 0x53, 0x51, 0x4f, 0x46, 0x42, 0x43, 0x50, 0x3e,
+    0x48, 0x4e, 0x41, 0x53, 0x4d, 0x48, 0x48, 0x33, 0x40, 0x43, 0x4b, 0x42,
+    0x52, 0x4c, 0x42, 0x4e, 0x41, 0x4e, 0x4f, 0x50, 0x43, 0x49, 0x4d, 0x47,
+    0x4a, 0x3a, 0x3f, 0x51, 0x51, 0x44, 0x4e, 0x54, 0x40, 0x55, 0x59, 0x3c,
+    0x57, 0x67, 0x4e, 0x2e, 0x4c, 0x5b, 0x5b, 0x51, 0x58, 0x63, 0x62, 0x52,
+    0x3c, 0x72, 0x51, 0x5a, 0x4e, 0x53, 0x4a, 0x5c, 0x51, 0x69, 0x42, 0x51,
+    0x48, 0x54, 0x48, 0x57, 0x3e, 0x37, 0x3f, 0x4d, 0x4d, 0x4a, 0x35, 0x57,
+    0x4e, 0x40, 0x45, 0x4a, 0x45, 0x4e, 0x49, 0x40, 0x49, 0x53, 0x51, 0x44,
+    0x4a, 0x50, 0x4b, 0x4b, 0x50, 0x4f, 0x3e, 0x44, 0x45, 0x44, 0x4c, 0x51,
+    0x47, 0x51, 0x46, 0x42, 0x48, 0x50, 0x49, 0x4d, 0x43, 0x54, 0x52, 0x4d,
+    0x4e, 0x4f, 0x3f, 0x63, 0x54, 0x57, 0x41, 0x44, 0x4e, 0x50, 0x4e, 0x66,
+    0x41, 0x53, 0x4b, 0x4d, 0x4e, 0x4f, 0x43, 0x6d, 0x4e, 0x51, 0x49, 0x4f,
+    0x49, 0x4a, 0x4a, 0x6c, 0x4b, 0x4f, 0x3d, 0x47, 0x4d, 0x51, 0x3c, 0x66,
+    0x4b, 0x56, 0x3e, 0x4c, 0x41, 0x46, 0x45, 0x68, 0x47, 0x4b, 0x4a, 0x54,
+    0x53, 0x48, 0x51, 0x59, 0x45, 0x43, 0x50, 0x45, 0x4f, 0x45, 0x42, 0x55,
+    0x48, 0x52, 0x4c, 0x46, 0x52, 0x49, 0x47, 0x3d, 0x55, 0x48, 0x52, 0x52,
+    0x40, 0x4e, 0x47, 0x31, 0x45, 0x4f, 0x42, 0x4a, 0x4e, 0x50, 0x42, 0x4a,
+    0x49, 0x57, 0x46, 0x4b, 0x45, 0x4e, 0x4d, 0x46, 0x47, 0x43, 0x50, 0x4e,
+    0x4f, 0x4c, 0x53, 0x55, 0x45, 0x51, 0x5b, 0x3a, 0x52, 0x64, 0x54, 0x2d,
+    0x42, 0x59, 0x59, 0x45, 0x59, 0x67, 0x69, 0x53, 0x3f, 0x78, 0x50, 0x60,
+    0x4c, 0x4c, 0x5b, 0x53, 0x45, 0x63, 0x49, 0x63, 0x51, 0x4c, 0x41, 0x4e,
+    0x4b, 0x37, 0x45, 0x4e, 0x48, 0x4c, 0x39, 0x55, 0x44, 0x37, 0x3c, 0x49,
+    0x44, 0x56, 0x3e, 0x40, 0x4d, 0x45, 0x4c, 0x43, 0x42, 0x41, 0x40, 0x42,
+    0x57, 0x4f, 0x43, 0x3f, 0x52, 0x53, 0x51, 0x4b, 0x4b, 0x55, 0x46, 0x40,
+    0x49, 0x45, 0x40, 0x4f, 0x47, 0x58, 0x4b, 0x53, 0x4e, 0x52, 0x54, 0x5e,
+    0x4b, 0x51, 0x50, 0x44, 0x50, 0x4b, 0x4f, 0x70, 0x49, 0x4f, 0x4c, 0x50,
+    0x45, 0x56, 0x4b, 0x6b, 0x49, 0x52, 0x4a, 0x3f, 0x44, 0x4b, 0x48, 0x72,
+    0x4c, 0x47, 0x4e, 0x43, 0x46, 0x4c, 0x4f, 0x61, 0x4a, 0x52, 0x52, 0x46,
+    0x4a, 0x4d, 0x46, 0x65, 0x48, 0x4e, 0x4d, 0x4e, 0x46, 0x4e, 0x53, 0x59,
+    0x43, 0x49, 0x43, 0x47, 0x45, 0x47, 0x53, 0x50, 0x3e, 0x4d, 0x41, 0x46,
+    0x4c, 0x4a, 0x4c, 0x35, 0x3f, 0x4f, 0x50, 0x48, 0x47, 0x4d, 0x4c, 0x32,
+    0x45, 0x53, 0x43, 0x4d, 0x4e, 0x4a, 0x3e, 0x4b, 0x55, 0x4f, 0x53, 0x4c,
+    0x4a, 0x4d, 0x48, 0x53, 0x4f, 0x3a, 0x47, 0x4b, 0x4e, 0x4e, 0x51, 0x59,
+    0x41, 0x50, 0x57, 0x38, 0x5d, 0x63, 0x59, 0x2b, 0x45, 0x53, 0x5a, 0x4e,
+    0x5c, 0x60, 0x5e, 0x4c, 0x41, 0x6f, 0x53, 0x5c, 0x48, 0x53, 0x56, 0x54,
+    0x4b, 0x62, 0x46, 0x63, 0x47, 0x4e, 0x40, 0x51, 0x43, 0x36, 0x44, 0x42,
+    0x46, 0x51, 0x41, 0x54, 0x4e, 0x36, 0x40, 0x4b, 0x55, 0x49, 0x40, 0x3f,
+    0x4b, 0x42, 0x4a, 0x4a, 0x48, 0x47, 0x40, 0x43, 0x4d, 0x4f, 0x55, 0x3f,
+    0x53, 0x42, 0x4d, 0x56, 0x49, 0x51, 0x4f, 0x41, 0x3b, 0x48, 0x43, 0x4e,
+    0x4b, 0x5c, 0x4f, 0x45, 0x4a, 0x4c, 0x46, 0x66, 0x43, 0x45, 0x46, 0x48,
+    0x4f, 0x4e, 0x40, 0x71, 0x4b, 0x4e, 0x3e, 0x42, 0x4d, 0x52, 0x42, 0x71,
+    0x4c, 0x54, 0x4f, 0x3f, 0x4c, 0x43, 0x4a, 0x73, 0x48, 0x48, 0x4c, 0x4b,
+    0x4c, 0x4d, 0x40, 0x72, 0x3e, 0x51, 0x49, 0x48, 0x52, 0x53, 0x45, 0x65,
+    0x52, 0x4e, 0x4f, 0x44, 0x4c, 0x43, 0x4a, 0x5e, 0x3e, 0x56, 0x46, 0x55,
+    0x55, 0x43, 0x49, 0x51, 0x4f, 0x52, 0x49, 0x4d, 0x46, 0x47, 0x49, 0x3e,
+    0x51, 0x49, 0x41, 0x53, 0x42, 0x47, 0x46, 0x3b, 0x4d, 0x4e, 0x48, 0x44,
+    0x42, 0x48, 0x4c, 0x47, 0x42, 0x4e, 0x4a, 0x3e, 0x44, 0x54, 0x4a, 0x4d,
+    0x49, 0x41, 0x41, 0x53, 0x52, 0x4c, 0x4c, 0x56, 0x49, 0x4a, 0x5a, 0x3f,
+    0x5b, 0x5c, 0x59, 0x2f, 0x49, 0x52, 0x5a, 0x4e, 0x5a, 0x61, 0x67, 0x4c,
+    0x41, 0x6f, 0x5a, 0x5a, 0x40, 0x5a, 0x54, 0x4e, 0x49, 0x66, 0x45, 0x5a,
+    0x4a, 0x45, 0x44, 0x4b, 0x44, 0x36, 0x41, 0x4c, 0x45, 0x44, 0x3d, 0x51,
+    0x3f, 0x35, 0x3c, 0x46, 0x53, 0x5c, 0x3f, 0x3e, 0x50, 0x43, 0x46, 0x4b,
+    0x40, 0x54, 0x41, 0x47, 0x4b, 0x51, 0x41, 0x46, 0x4a, 0x4d, 0x51, 0x52,
+    0x43, 0x58, 0x45, 0x46, 0x4e, 0x46, 0x4a, 0x4b, 0x44, 0x54, 0x4c, 0x4c,
+    0x43, 0x59, 0x48, 0x61, 0x4e, 0x4f, 0x4d, 0x4d, 0x4a, 0x52, 0x4c, 0x6e,
+    0x49, 0x57, 0x48, 0x4d, 0x46, 0x46, 0x4d, 0x72, 0x4a, 0x4e, 0x47, 0x44,
+    0x49, 0x4f, 0x48, 0x73, 0x42, 0x40, 0x4d, 0x44, 0x4d, 0x57, 0x3e, 0x69,
+    0x50, 0x52, 0x4c, 0x55, 0x46, 0x4c, 0x44, 0x5f, 0x4b, 0x4d, 0x55, 0x4c,
+    0x48, 0x49, 0x4a, 0x5e, 0x47, 0x4b, 0x45, 0x53, 0x55, 0x53, 0x4d, 0x53,
+    0x47, 0x5c, 0x45, 0x4e, 0x4e, 0x52, 0x4c, 0x39, 0x4b, 0x4c, 0x49, 0x46,
+    0x4a, 0x4e, 0x4b, 0x33, 0x46, 0x47, 0x52, 0x41, 0x49, 0x4b, 0x4c, 0x48,
+    0x51, 0x53, 0x44, 0x4c, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x4b, 0x50, 0x47,
+    0x4d, 0x4b, 0x4c, 0x4f, 0x44, 0x45, 0x58, 0x3c, 0x56, 0x5a, 0x56, 0x23,
+    0x4f, 0x4d, 0x5c, 0x4e, 0x59, 0x5a, 0x65, 0x43, 0x45, 0x66, 0x54, 0x5f,
+    0x45, 0x5e, 0x54, 0x4f, 0x48, 0x5f, 0x44, 0x59, 0x48, 0x46, 0x47, 0x49,
+    0x4d, 0x3c, 0x49, 0x54, 0x3e, 0x48, 0x43, 0x5b, 0x4a, 0x35, 0x41, 0x43,
+    0x4b, 0x55, 0x43, 0x38, 0x46, 0x42, 0x4a, 0x4e, 0x54, 0x4b, 0x4d, 0x46,
+    0x43, 0x4e, 0x44, 0x47, 0x56, 0x4c, 0x51, 0x57, 0x41, 0x4d, 0x43, 0x41,
+    0x51, 0x47, 0x41, 0x51, 0x51, 0x4f, 0x46, 0x50, 0x52, 0x4e, 0x4d, 0x60,
+    0x41, 0x49, 0x46, 0x50, 0x48, 0x56, 0x42, 0x6d, 0x40, 0x45, 0x44, 0x55,
+    0x40, 0x4e, 0x40, 0x7c, 0x47, 0x5a, 0x44, 0x44, 0x45, 0x56, 0x55, 0x71,
+    0x47, 0x4b, 0x4b, 0x45, 0x4f, 0x54, 0x4c, 0x73, 0x48, 0x55, 0x44, 0x4d,
+    0x4a, 0x47, 0x49, 0x5e, 0x4d, 0x52, 0x4e, 0x4c, 0x48, 0x52, 0x48, 0x58,
+    0x4c, 0x5a, 0x49, 0x4b, 0x53, 0x46, 0x4d, 0x4b, 0x48, 0x53, 0x41, 0x49,
+    0x4a, 0x56, 0x51, 0x3a, 0x4c, 0x4e, 0x4f, 0x51, 0x4c, 0x59, 0x47, 0x45,
+    0x4f, 0x50, 0x4a, 0x4f, 0x4d, 0x3f, 0x44, 0x4e, 0x42, 0x4a, 0x4a, 0x43,
+    0x46, 0x4e, 0x4c, 0x4f, 0x47, 0x47, 0x4c, 0x4b, 0x52, 0x50, 0x50, 0x4b,
+    0x42, 0x45, 0x54, 0x44, 0x54, 0x59, 0x4c, 0x2b, 0x4d, 0x4c, 0x55, 0x4e,
+    0x5c, 0x5b, 0x5a, 0x42, 0x47, 0x5e, 0x56, 0x59, 0x47, 0x65, 0x55, 0x4c,
+    0x4c, 0x59, 0x42, 0x5a, 0x4e, 0x46, 0x4e, 0x4b, 0x53, 0x46, 0x49, 0x56,
+    0x48, 0x58, 0x4b, 0x4f, 0x45, 0x38, 0x40, 0x44, 0x49, 0x51, 0x4a, 0x3b,
+    0x53, 0x40, 0x40, 0x48, 0x51, 0x49, 0x44, 0x46, 0x52, 0x4b, 0x4e, 0x45,
+    0x48, 0x5a, 0x4e, 0x57, 0x44, 0x53, 0x49, 0x40, 0x4c, 0x47, 0x41, 0x4f,
+    0x49, 0x55, 0x46, 0x50, 0x57, 0x5b, 0x48, 0x66, 0x50, 0x49, 0x51, 0x55,
+    0x55, 0x4f, 0x47, 0x72, 0x49, 0x4f, 0x41, 0x4c, 0x49, 0x42, 0x48, 0x75,
+    0x4a, 0x55, 0x45, 0x4a, 0x41, 0x51, 0x41, 0x70, 0x47, 0x49, 0x42, 0x52,
+    0x4f, 0x47, 0x46, 0x63, 0x4f, 0x53, 0x46, 0x4f, 0x49, 0x53, 0x52, 0x63,
+    0x4c, 0x59, 0x46, 0x41, 0x49, 0x51, 0x3e, 0x53, 0x45, 0x52, 0x51, 0x40,
+    0x4f, 0x4c, 0x41, 0x4c, 0x47, 0x4a, 0x46, 0x47, 0x53, 0x47, 0x48, 0x39,
+    0x53, 0x4b, 0x46, 0x4b, 0x50, 0x4c, 0x41, 0x40, 0x48, 0x4e, 0x49, 0x4e,
+    0x44, 0x53, 0x44, 0x4e, 0x53, 0x49, 0x49, 0x4e, 0x46, 0x3f, 0x45, 0x42,
+    0x4c, 0x47, 0x42, 0x4e, 0x49, 0x4a, 0x49, 0x44, 0x51, 0x48, 0x57, 0x4c,
+    0x4d, 0x60, 0x4e, 0x2d, 0x46, 0x4d, 0x58, 0x53, 0x5c, 0x56, 0x5e, 0x41,
+    0x3e, 0x66, 0x53, 0x5b, 0x49, 0x59, 0x5a, 0x55, 0x4e, 0x59, 0x46, 0x4a,
+    0x44, 0x42, 0x45, 0x3d, 0x4d, 0x45, 0x44, 0x4f, 0x4d, 0x53, 0x42, 0x5a,
+    0x43, 0x3c, 0x48, 0x4f, 0x44, 0x59, 0x3f, 0x33, 0x45, 0x48, 0x43, 0x45,
+    0x4d, 0x56, 0x48, 0x44, 0x3e, 0x48, 0x46, 0x4d, 0x44, 0x53, 0x46, 0x4e,
+    0x45, 0x52, 0x40, 0x46, 0x4c, 0x50, 0x4e, 0x4b, 0x4d, 0x46, 0x48, 0x46,
+    0x50, 0x52, 0x4e, 0x57, 0x3f, 0x4a, 0x49, 0x50, 0x53, 0x4e, 0x41, 0x66,
+    0x49, 0x4f, 0x40, 0x4b, 0x50, 0x4c, 0x4a, 0x70, 0x42, 0x51, 0x41, 0x4c,
+    0x50, 0x4f, 0x46, 0x60, 0x45, 0x47, 0x54, 0x4c, 0x49, 0x59, 0x52, 0x61,
+    0x4a, 0x53, 0x52, 0x4f, 0x4b, 0x4c, 0x46, 0x56, 0x4b, 0x54, 0x4f, 0x47,
+    0x53, 0x49, 0x4f, 0x50, 0x4a, 0x54, 0x45, 0x4e, 0x47, 0x48, 0x47, 0x42,
+    0x49, 0x44, 0x46, 0x46, 0x55, 0x4c, 0x4f, 0x36, 0x4c, 0x49, 0x3f, 0x4e,
+    0x45, 0x4b, 0x4b, 0x36, 0x48, 0x4f, 0x4b, 0x50, 0x45, 0x47, 0x49, 0x3f,
+    0x50, 0x4b, 0x52, 0x48, 0x4c, 0x41, 0x49, 0x43, 0x4e, 0x3c, 0x43, 0x45,
+    0x3e, 0x45, 0x48, 0x44, 0x4d, 0x48, 0x56, 0x47, 0x4b, 0x54, 0x52, 0x2b,
+    0x4d, 0x4e, 0x57, 0x4f, 0x57, 0x4f, 0x56, 0x43, 0x48, 0x5f, 0x4c, 0x51,
+    0x4d, 0x58, 0x4f, 0x4e, 0x50, 0x50, 0x48, 0x4a, 0x4d, 0x3f, 0x47, 0x40,
+    0x4b, 0x4a, 0x4e, 0x4b, 0x4a, 0x58, 0x42, 0x49, 0x3f, 0x42, 0x3d, 0x4d,
+    0x46, 0x53, 0x45, 0x3e, 0x4e, 0x49, 0x4f, 0x4a, 0x47, 0x46, 0x40, 0x3e,
+    0x4c, 0x4d, 0x4d, 0x45, 0x4a, 0x56, 0x40, 0x4a, 0x47, 0x57, 0x4f, 0x48,
+    0x4f, 0x48, 0x47, 0x49, 0x4e, 0x52, 0x50, 0x48, 0x42, 0x52, 0x43, 0x5a,
+    0x49, 0x42, 0x4f, 0x4f, 0x51, 0x51, 0x50, 0x5c, 0x4b, 0x43, 0x4b, 0x48,
+    0x50, 0x51, 0x4b, 0x6d, 0x53, 0x4e, 0x44, 0x4c, 0x4c, 0x51, 0x46, 0x5b,
+    0x44, 0x48, 0x4d, 0x4c, 0x46, 0x4f, 0x54, 0x54, 0x4e, 0x54, 0x42, 0x4e,
+    0x4c, 0x49, 0x49, 0x58, 0x49, 0x53, 0x53, 0x4a, 0x4e, 0x4b, 0x47, 0x53,
+    0x43, 0x55, 0x46, 0x51, 0x3d, 0x3d, 0x4c, 0x47, 0x4e, 0x51, 0x47, 0x48,
+    0x4b, 0x4c, 0x42, 0x3b, 0x43, 0x4f, 0x44, 0x4d, 0x54, 0x4b, 0x4a, 0x47,
+    0x4c, 0x42, 0x4b, 0x43, 0x41, 0x4e, 0x4d, 0x50, 0x45, 0x46, 0x41, 0x4a,
+    0x49, 0x49, 0x54, 0x47, 0x4c, 0x4b, 0x50, 0x4e, 0x3f, 0x43, 0x40, 0x41,
+    0x44, 0x54, 0x51, 0x47, 0x4c, 0x4b, 0x4f, 0x34, 0x4d, 0x4c, 0x4f, 0x49,
+    0x56, 0x4e, 0x4b, 0x3e, 0x48, 0x53, 0x4e, 0x56, 0x49, 0x4e, 0x4c, 0x40,
+    0x55, 0x4a, 0x46, 0x4f, 0x48, 0x4a, 0x55, 0x41, 0x55, 0x3d, 0x47, 0x51,
+    0x50, 0x51, 0x45, 0x51, 0x4b, 0x4e, 0x4a, 0x4f, 0x4b, 0x45, 0x42, 0x3c,
+    0x4e, 0x46, 0x47, 0x49, 0x4a, 0x4c, 0x48, 0x41, 0x4f, 0x4a, 0x44, 0x45,
+    0x4e, 0x4e, 0x43, 0x41, 0x4c, 0x47, 0x48, 0x49, 0x4c, 0x48, 0x4f, 0x4a,
+    0x4f, 0x4a, 0x4b, 0x45, 0x42, 0x40, 0x52, 0x55, 0x4f, 0x49, 0x44, 0x54,
+    0x49, 0x48, 0x51, 0x4d, 0x44, 0x4a, 0x4d, 0x49, 0x4e, 0x4e, 0x51, 0x5d,
+    0x42, 0x4d, 0x49, 0x3f, 0x48, 0x58, 0x40, 0x5e, 0x48, 0x4f, 0x49, 0x53,
+    0x45, 0x47, 0x4f, 0x53, 0x4d, 0x4f, 0x4d, 0x4d, 0x46, 0x55, 0x43, 0x51,
+    0x4f, 0x51, 0x4a, 0x4e, 0x49, 0x42, 0x49, 0x50, 0x47, 0x4d, 0x42, 0x47,
+    0x46, 0x50, 0x55, 0x47, 0x4d, 0x47, 0x3e, 0x51, 0x4d, 0x43, 0x44, 0x39,
+    0x4e, 0x4b, 0x41, 0x48, 0x52, 0x53, 0x4d, 0x39, 0x4d, 0x51, 0x4c, 0x46,
+    0x4e, 0x47, 0x49, 0x41, 0x45, 0x4a, 0x4a, 0x45, 0x50, 0x4a, 0x40, 0x48,
+    0x43, 0x47, 0x44, 0x50, 0x4d, 0x47, 0x4a, 0x47, 0x45, 0x57, 0x41, 0x34,
+    0x51, 0x40, 0x45, 0x44, 0x3c, 0x47, 0x46, 0x47, 0x44, 0x48, 0x42, 0x40,
+    0x37, 0x53, 0x4a, 0x43, 0x49, 0x4b, 0x43, 0x44, 0x4f, 0x4f, 0x48, 0x48,
+    0x53, 0x49, 0x4b, 0x48, 0x4e, 0x4c, 0x42, 0x45, 0x4c, 0x4a, 0x4a, 0x46,
+    0x47, 0x57, 0x3e, 0x46, 0x46, 0x45, 0x4a, 0x43, 0x46, 0x49, 0x43, 0x52,
+    0x3e, 0x48, 0x4a, 0x4b, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4b, 0x4e, 0x44,
+    0x42, 0x44, 0x50, 0x41, 0x49, 0x49, 0x4d, 0x4b, 0x44, 0x46, 0x4a, 0x52,
+    0x4d, 0x47, 0x49, 0x4b, 0x4d, 0x49, 0x41, 0x48, 0x4b, 0x3f, 0x45, 0x4f,
+    0x51, 0x41, 0x55, 0x42, 0x49, 0x4b, 0x4b, 0x51, 0x4f, 0x4f, 0x42, 0x4e,
+    0x4e, 0x4a, 0x52, 0x41, 0x4f, 0x42, 0x48, 0x3d, 0x4a, 0x44, 0x50, 0x4b,
+    0x49, 0x45, 0x51, 0x46, 0x51, 0x44, 0x4d, 0x47, 0x4a, 0x4a, 0x4d, 0x49,
+    0x4d, 0x48, 0x4d, 0x4f, 0x4d, 0x44, 0x48, 0x4e, 0x4a, 0x4b, 0x40, 0x4f,
+    0x47, 0x3a, 0x41, 0x47, 0x4a, 0x4a, 0x4a, 0x48, 0x42, 0x41, 0x4d, 0x56,
+    0x3f, 0x52, 0x4d, 0x4c, 0x44, 0x48, 0x47, 0x4e, 0x51, 0x4c, 0x49, 0x47,
+    0x44, 0x4c, 0x4b, 0x47, 0x48, 0x46, 0x47, 0x4f, 0x43, 0x41, 0x3e, 0x47,
+    0x53, 0x4a, 0x46, 0x42, 0x46, 0x61, 0x43, 0x30, 0x4e, 0x52, 0x43, 0x45,
+    0x32, 0x4a, 0x45, 0x48, 0x51, 0x3e, 0x44, 0x3b, 0x3a, 0x63, 0x4c, 0x46,
+    0x4c, 0x49, 0x3d, 0x41, 0x52, 0x53, 0x43, 0x43, 0x45, 0x3d, 0x48, 0x40,
+    0x4b, 0x4a, 0x49, 0x48, 0x4d, 0x49, 0x4b, 0x4c, 0x3f, 0x4e, 0x4b, 0x47,
+    0x45, 0x4d, 0x3f, 0x4d, 0x43, 0x50, 0x48, 0x4b, 0x54, 0x3e, 0x44, 0x4e,
+    0x3e, 0x4c, 0x43, 0x4b, 0x4c, 0x4b, 0x3e, 0x49, 0x50, 0x52, 0x4a, 0x4a,
+    0x50, 0x50, 0x43, 0x4e, 0x49, 0x48, 0x51, 0x50, 0x47, 0x3d, 0x45, 0x4b,
+    0x47, 0x46, 0x4d, 0x4c, 0x45, 0x4d, 0x4a, 0x4d, 0x42, 0x4d, 0x47, 0x4f,
+    0x40, 0x43, 0x46, 0x51, 0x47, 0x4b, 0x43, 0x49, 0x49, 0x50, 0x4b, 0x4b,
+    0x46, 0x4a, 0x4c, 0x48, 0x49, 0x47, 0x4b, 0x56, 0x55, 0x4f, 0x49, 0x4f,
+    0x4f, 0x4e, 0x4b, 0x49, 0x4a, 0x4a, 0x49, 0x47, 0x44, 0x4b, 0x47, 0x50,
+    0x46, 0x4c, 0x46, 0x4c, 0x4b, 0x4e, 0x49, 0x57, 0x4d, 0x3e, 0x46, 0x47,
+    0x50, 0x45, 0x4f, 0x52, 0x3e, 0x4d, 0x49, 0x4a, 0x40, 0x49, 0x4f, 0x5c,
+    0x3e, 0x4a, 0x47, 0x45, 0x47, 0x41, 0x44, 0x3f, 0x4b, 0x4a, 0x52, 0x43,
+    0x41, 0x43, 0x43, 0x47, 0x55, 0x49, 0x42, 0x4c, 0x58, 0x4b, 0x42, 0x48,
+    0x4b, 0x5a, 0x36, 0x33, 0x53, 0x57, 0x4d, 0x4a, 0x37, 0x4c, 0x3e, 0x48,
+    0x43, 0x46, 0x39, 0x3c, 0x34, 0x65, 0x47, 0x3d, 0x47, 0x42, 0x3c, 0x3e,
+    0x45, 0x5b, 0x44, 0x3e, 0x45, 0x43, 0x46, 0x43, 0x59, 0x4e, 0x48, 0x46,
+    0x43, 0x3f, 0x46, 0x47, 0x4e, 0x53, 0x50, 0x4b, 0x4a, 0x3f, 0x4a, 0x54,
+    0x4c, 0x4a, 0x43, 0x50, 0x4c, 0x42, 0x4d, 0x55, 0x4d, 0x51, 0x51, 0x46,
+    0x49, 0x41, 0x50, 0x44, 0x4a, 0x4b, 0x4b, 0x43, 0x4b, 0x4e, 0x47, 0x4b,
+    0x3e, 0x4e, 0x44, 0x4d, 0x49, 0x41, 0x49, 0x44, 0x50, 0x4d, 0x45, 0x4e,
+    0x4b, 0x50, 0x45, 0x4c, 0x46, 0x4a, 0x46, 0x42, 0x50, 0x45, 0x48, 0x53,
+    0x4d, 0x44, 0x42, 0x50, 0x4c, 0x49, 0x45, 0x55, 0x4d, 0x42, 0x43, 0x41,
+    0x4c, 0x41, 0x4e, 0x4d, 0x42, 0x4e, 0x3f, 0x44, 0x4d, 0x4c, 0x4b, 0x4a,
+    0x47, 0x47, 0x4e, 0x54, 0x43, 0x40, 0x41, 0x55, 0x49, 0x49, 0x4e, 0x49,
+    0x52, 0x4e, 0x46, 0x58, 0x4b, 0x3d, 0x4a, 0x44, 0x4e, 0x47, 0x53, 0x58,
+    0x47, 0x42, 0x52, 0x46, 0x49, 0x4b, 0x47, 0x5a, 0x4c, 0x46, 0x46, 0x49,
+    0x4b, 0x4d, 0x3d, 0x48, 0x40, 0x54, 0x48, 0x4c, 0x4c, 0x44, 0x4c, 0x46,
+    0x47, 0x4b, 0x4d, 0x44, 0x5a, 0x4a, 0x3e, 0x46, 0x48, 0x53, 0x39, 0x30,
+    0x51, 0x60, 0x4d, 0x47, 0x35, 0x4f, 0x45, 0x45, 0x4a, 0x4b, 0x42, 0x3f,
+    0x38, 0x6c, 0x3d, 0x40, 0x44, 0x48, 0x3a, 0x3b, 0x46, 0x5e, 0x45, 0x3b,
+    0x47, 0x47, 0x45, 0x42, 0x53, 0x55, 0x44, 0x45, 0x46, 0x43, 0x48, 0x48,
+    0x52, 0x5d, 0x3e, 0x41, 0x53, 0x42, 0x48, 0x55, 0x49, 0x4d, 0x4a, 0x46,
+    0x52, 0x46, 0x51, 0x48, 0x44, 0x46, 0x48, 0x41, 0x49, 0x49, 0x49, 0x49,
+    0x41, 0x4d, 0x40, 0x4f, 0x45, 0x46, 0x45, 0x3f, 0x53, 0x40, 0x46, 0x43,
+    0x47, 0x4d, 0x50, 0x4c, 0x55, 0x48, 0x45, 0x47, 0x4f, 0x46, 0x42, 0x4d,
+    0x41, 0x48, 0x46, 0x4e, 0x42, 0x48, 0x48, 0x45, 0x41, 0x45, 0x48, 0x4a,
+    0x40, 0x49, 0x43, 0x4b, 0x48, 0x4a, 0x4c, 0x45, 0x4b, 0x48, 0x48, 0x4f,
+    0x40, 0x4b, 0x4a, 0x44, 0x50, 0x4a, 0x43, 0x50, 0x4c, 0x44, 0x46, 0x4c,
+    0x42, 0x44, 0x4e, 0x55, 0x47, 0x49, 0x48, 0x47, 0x52, 0x4e, 0x44, 0x59,
+    0x4e, 0x44, 0x4a, 0x48, 0x49, 0x4a, 0x42, 0x4e, 0x3e, 0x39, 0x51, 0x45,
+    0x4d, 0x49, 0x4f, 0x54, 0x51, 0x4b, 0x50, 0x44, 0x53, 0x4f, 0x4d, 0x48,
+    0x42, 0x45, 0x4e, 0x40, 0x4a, 0x48, 0x43, 0x48, 0x52, 0x54, 0x4d, 0x49,
+    0x5f, 0x53, 0x46, 0x4e, 0x3f, 0x5a, 0x36, 0x31, 0x52, 0x60, 0x4b, 0x4a,
+    0x32, 0x51, 0x40, 0x44, 0x46, 0x52, 0x44, 0x41, 0x3a, 0x6e, 0x41, 0x3e,
+    0x47, 0x3e, 0x3a, 0x2a, 0x44, 0x5a, 0x40, 0x3c, 0x4d, 0x48, 0x46, 0x3b,
+    0x5e, 0x58, 0x4d, 0x47, 0x51, 0x3a, 0x4b, 0x48, 0x5b, 0x5a, 0x54, 0x43,
+    0x50, 0x4c, 0x54, 0x54, 0x49, 0x47, 0x4f, 0x48, 0x50, 0x40, 0x4f, 0x4a,
+    0x42, 0x42, 0x3c, 0x41, 0x43, 0x4e, 0x53, 0x49, 0x4b, 0x4d, 0x49, 0x41,
+    0x4c, 0x3e, 0x40, 0x49, 0x40, 0x44, 0x49, 0x4f, 0x50, 0x4a, 0x42, 0x3a,
+    0x49, 0x4b, 0x47, 0x50, 0x49, 0x41, 0x52, 0x46, 0x3d, 0x44, 0x46, 0x43,
+    0x4b, 0x4b, 0x4d, 0x4b, 0x4e, 0x40, 0x45, 0x43, 0x48, 0x44, 0x55, 0x51,
+    0x4a, 0x46, 0x4e, 0x40, 0x53, 0x4a, 0x45, 0x41, 0x48, 0x48, 0x45, 0x4e,
+    0x4a, 0x48, 0x40, 0x4c, 0x54, 0x44, 0x42, 0x4d, 0x49, 0x43, 0x45, 0x4c,
+    0x43, 0x4f, 0x46, 0x3f, 0x46, 0x4f, 0x4b, 0x59, 0x46, 0x49, 0x54, 0x47,
+    0x49, 0x46, 0x45, 0x53, 0x4a, 0x49, 0x54, 0x45, 0x41, 0x45, 0x4c, 0x5e,
+    0x50, 0x3d, 0x4d, 0x49, 0x55, 0x4b, 0x49, 0x47, 0x4c, 0x4f, 0x43, 0x3d,
+    0x41, 0x4b, 0x43, 0x46, 0x4f, 0x4a, 0x4c, 0x54, 0x5e, 0x4e, 0x40, 0x4d,
+    0x3d, 0x59, 0x40, 0x28, 0x54, 0x5f, 0x4d, 0x4b, 0x36, 0x51, 0x3a, 0x47,
+    0x4a, 0x55, 0x42, 0x43, 0x3b, 0x72, 0x3b, 0x3d, 0x51, 0x42, 0x3f, 0x2d,
+    0x4b, 0x5a, 0x48, 0x44, 0x49, 0x49, 0x3d, 0x39, 0x56, 0x55, 0x46, 0x46,
+    0x4b, 0x43, 0x40, 0x4a, 0x52, 0x56, 0x4d, 0x45, 0x4b, 0x48, 0x40, 0x5a,
+    0x4e, 0x3a, 0x53, 0x48, 0x4c, 0x44, 0x49, 0x4e, 0x42, 0x47, 0x46, 0x40,
+    0x51, 0x42, 0x50, 0x4b, 0x43, 0x53, 0x44, 0x44, 0x46, 0x4c, 0x4c, 0x3c,
+    0x42, 0x45, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x3d, 0x47, 0x4b, 0x4c, 0x4e,
+    0x52, 0x4a, 0x4e, 0x41, 0x3f, 0x46, 0x43, 0x54, 0x44, 0x53, 0x4e, 0x48,
+    0x40, 0x41, 0x4f, 0x45, 0x43, 0x3c, 0x52, 0x49, 0x40, 0x44, 0x4a, 0x3f,
+    0x4d, 0x4c, 0x4f, 0x47, 0x44, 0x47, 0x55, 0x47, 0x50, 0x4d, 0x4a, 0x4c,
+    0x50, 0x48, 0x47, 0x55, 0x4b, 0x4a, 0x52, 0x49, 0x3d, 0x3f, 0x4f, 0x51,
+    0x48, 0x4e, 0x42, 0x4e, 0x42, 0x48, 0x4e, 0x49, 0x4a, 0x50, 0x45, 0x54,
+    0x41, 0x43, 0x45, 0x4d, 0x48, 0x48, 0x48, 0x51, 0x53, 0x3e, 0x55, 0x44,
+    0x52, 0x56, 0x44, 0x4d, 0x4e, 0x48, 0x4b, 0x43, 0x48, 0x53, 0x48, 0x44,
+    0x49, 0x45, 0x4e, 0x50, 0x5d, 0x4a, 0x45, 0x4c, 0x45, 0x55, 0x43, 0x2e,
+    0x59, 0x60, 0x4e, 0x4d, 0x32, 0x53, 0x3e, 0x3f, 0x40, 0x63, 0x41, 0x48,
+    0x38, 0x73, 0x38, 0x46, 0x50, 0x3e, 0x3c, 0x23, 0x48, 0x61, 0x45, 0x3c,
+    0x41, 0x41, 0x36, 0x3b, 0x58, 0x56, 0x4a, 0x40, 0x4f, 0x44, 0x45, 0x4c,
+    0x5a, 0x56, 0x47, 0x3f, 0x4d, 0x4b, 0x46, 0x5d, 0x52, 0x47, 0x45, 0x4c,
+    0x4a, 0x52, 0x4f, 0x4f, 0x4f, 0x43, 0x4f, 0x47, 0x43, 0x46, 0x3c, 0x4c,
+    0x46, 0x55, 0x40, 0x53, 0x43, 0x3e, 0x42, 0x35, 0x51, 0x41, 0x42, 0x3f,
+    0x45, 0x3d, 0x41, 0x31, 0x4e, 0x47, 0x48, 0x42, 0x41, 0x45, 0x43, 0x38,
+    0x42, 0x40, 0x4a, 0x47, 0x4e, 0x43, 0x40, 0x43, 0x48, 0x49, 0x45, 0x4f,
+    0x44, 0x42, 0x4d, 0x42, 0x42, 0x3f, 0x46, 0x52, 0x3c, 0x3c, 0x47, 0x43,
+    0x46, 0x47, 0x45, 0x40, 0x4c, 0x44, 0x43, 0x4a, 0x4b, 0x4d, 0x4e, 0x46,
+    0x51, 0x45, 0x47, 0x4b, 0x45, 0x50, 0x40, 0x42, 0x4c, 0x4c, 0x4c, 0x4f,
+    0x44, 0x3c, 0x49, 0x3c, 0x3f, 0x45, 0x3f, 0x5c, 0x42, 0x3e, 0x4b, 0x4e,
+    0x50, 0x45, 0x42, 0x5c, 0x4c, 0x48, 0x50, 0x52, 0x50, 0x47, 0x4b, 0x44,
+    0x3d, 0x50, 0x55, 0x4c, 0x48, 0x3f, 0x4b, 0x44, 0x4a, 0x51, 0x42, 0x4c,
+    0x60, 0x51, 0x41, 0x4b, 0x46, 0x5c, 0x42, 0x2c, 0x55, 0x61, 0x50, 0x52,
+    0x37, 0x5a, 0x3f, 0x43, 0x43, 0x58, 0x3a, 0x4d, 0x3e, 0x72, 0x35, 0x3f,
+    0x58, 0x41, 0x40, 0x1f, 0x55, 0x63, 0x3f, 0x49, 0x41, 0x3e, 0x35, 0x41,
+    0x65, 0x54, 0x42, 0x45, 0x45, 0x3c, 0x44, 0x45, 0x59, 0x5a, 0x4d, 0x41,
+    0x51, 0x46, 0x49, 0x59, 0x4c, 0x41, 0x42, 0x44, 0x4a, 0x45, 0x3f, 0x4a,
+    0x4a, 0x44, 0x48, 0x48, 0x52, 0x40, 0x4a, 0x4a, 0x4d, 0x54, 0x44, 0x48,
+    0x54, 0x46, 0x49, 0x3b, 0x42, 0x4a, 0x4e, 0x46, 0x4a, 0x45, 0x4f, 0x30,
+    0x46, 0x41, 0x47, 0x46, 0x4b, 0x47, 0x46, 0x38, 0x4c, 0x3a, 0x4b, 0x46,
+    0x52, 0x48, 0x4f, 0x3e, 0x48, 0x4a, 0x48, 0x4b, 0x44, 0x45, 0x4a, 0x46,
+    0x3f, 0x4f, 0x40, 0x44, 0x43, 0x43, 0x4b, 0x39, 0x46, 0x43, 0x49, 0x49,
+    0x49, 0x4a, 0x44, 0x48, 0x4c, 0x41, 0x4d, 0x52, 0x4c, 0x4a, 0x46, 0x3d,
+    0x41, 0x4b, 0x41, 0x48, 0x45, 0x3b, 0x51, 0x54, 0x4a, 0x39, 0x4d, 0x41,
+    0x54, 0x46, 0x4c, 0x53, 0x48, 0x3e, 0x4a, 0x3d, 0x41, 0x52, 0x54, 0x63,
+    0x44, 0x4d, 0x4a, 0x43, 0x52, 0x4b, 0x52, 0x52, 0x4e, 0x41, 0x48, 0x42,
+    0x48, 0x4d, 0x49, 0x45, 0x51, 0x48, 0x3e, 0x47, 0x5a, 0x52, 0x4a, 0x4e,
+    0x3e, 0x59, 0x3c, 0x2e, 0x5c, 0x5b, 0x4c, 0x56, 0x30, 0x59, 0x3a, 0x48,
+    0x3d, 0x5c, 0x44, 0x49, 0x40, 0x7c, 0x3a, 0x48, 0x54, 0x40, 0x41, 0x28,
+    0x4d, 0x64, 0x46, 0x47, 0x49, 0x40, 0x30, 0x3a, 0x5f, 0x5b, 0x42, 0x37,
+    0x49, 0x45, 0x40, 0x43, 0x5b, 0x54, 0x48, 0x4d, 0x4a, 0x47, 0x51, 0x58,
+    0x4b, 0x3c, 0x4d, 0x46, 0x4b, 0x52, 0x4c, 0x58, 0x53, 0x46, 0x42, 0x45,
+    0x4c, 0x4a, 0x4d, 0x4e, 0x52, 0x4d, 0x46, 0x44, 0x46, 0x3f, 0x46, 0x34,
+    0x4f, 0x42, 0x44, 0x46, 0x44, 0x50, 0x47, 0x30, 0x44, 0x3c, 0x42, 0x46,
+    0x4f, 0x4a, 0x52, 0x30, 0x55, 0x4f, 0x45, 0x4a, 0x48, 0x4c, 0x4e, 0x35,
+    0x4e, 0x3c, 0x45, 0x4a, 0x45, 0x4a, 0x44, 0x3c, 0x4e, 0x4a, 0x51, 0x44,
+    0x49, 0x40, 0x4a, 0x40, 0x41, 0x44, 0x4f, 0x4c, 0x43, 0x45, 0x4b, 0x43,
+    0x3e, 0x3e, 0x4c, 0x44, 0x48, 0x48, 0x42, 0x42, 0x4d, 0x43, 0x50, 0x4d,
+    0x49, 0x3c, 0x45, 0x4f, 0x4c, 0x46, 0x4b, 0x48, 0x4d, 0x4d, 0x49, 0x55,
+    0x49, 0x3b, 0x40, 0x44, 0x4a, 0x4b, 0x4e, 0x5e, 0x43, 0x47, 0x45, 0x43,
+    0x4d, 0x4d, 0x49, 0x46, 0x4a, 0x44, 0x4e, 0x3e, 0x52, 0x41, 0x47, 0x47,
+    0x4a, 0x50, 0x48, 0x43, 0x5d, 0x4f, 0x49, 0x48, 0x43, 0x4f, 0x45, 0x3e,
+    0x5a, 0x69, 0x4d, 0x5a, 0x3a, 0x5d, 0x3a, 0x48, 0x42, 0x55, 0x3e, 0x48,
+    0x48, 0x7b, 0x37, 0x40, 0x57, 0x45, 0x48, 0x24, 0x50, 0x61, 0x4c, 0x4a,
+    0x44, 0x41, 0x34, 0x38, 0x65, 0x5b, 0x4f, 0x3c, 0x4d, 0x3a, 0x4a, 0x4c,
+    0x66, 0x55, 0x50, 0x47, 0x4d, 0x46, 0x47, 0x58, 0x4c, 0x48, 0x48, 0x48,
+    0x4e, 0x59, 0x4f, 0x4b, 0x45, 0x45, 0x4b, 0x54, 0x46, 0x51, 0x4f, 0x44,
+    0x42, 0x55, 0x48, 0x44, 0x48, 0x41, 0x53, 0x2e, 0x4d, 0x45, 0x44, 0x54,
+    0x4a, 0x44, 0x53, 0x34, 0x4c, 0x46, 0x47, 0x3f, 0x4c, 0x4b, 0x47, 0x36,
+    0x47, 0x41, 0x43, 0x40, 0x51, 0x46, 0x45, 0x33, 0x46, 0x3e, 0x47, 0x50,
+    0x3f, 0x48, 0x48, 0x37, 0x41, 0x41, 0x42, 0x3e, 0x45, 0x3d, 0x49, 0x3e,
+    0x4f, 0x42, 0x49, 0x4a, 0x46, 0x46, 0x48, 0x44, 0x49, 0x45, 0x46, 0x4a,
+    0x4a, 0x47, 0x48, 0x43, 0x44, 0x45, 0x3f, 0x4c, 0x4c, 0x49, 0x4d, 0x51,
+    0x4a, 0x4a, 0x49, 0x4c, 0x42, 0x4d, 0x4b, 0x4b, 0x4a, 0x42, 0x47, 0x4d,
+    0x3e, 0x4b, 0x47, 0x5c, 0x49, 0x3d, 0x4e, 0x41, 0x44, 0x49, 0x3e, 0x3e,
+    0x4b, 0x47, 0x4e, 0x45, 0x44, 0x4a, 0x4d, 0x4a, 0x4f, 0x46, 0x45, 0x52,
+    0x60, 0x53, 0x49, 0x50, 0x3d, 0x4f, 0x43, 0x3d, 0x52, 0x64, 0x52, 0x58,
+    0x39, 0x5f, 0x36, 0x4c, 0x45, 0x57, 0x42, 0x4b, 0x3f, 0x80, 0x34, 0x47,
+    0x58, 0x41, 0x45, 0x1b, 0x4b, 0x5e, 0x4c, 0x40, 0x44, 0x42, 0x39, 0x3a,
+    0x5e, 0x5b, 0x4b, 0x3a, 0x4b, 0x3f, 0x45, 0x3e, 0x69, 0x57, 0x4b, 0x45,
+    0x4b, 0x3f, 0x45, 0x55, 0x49, 0x49, 0x48, 0x47, 0x41, 0x4f, 0x42, 0x53,
+    0x49, 0x40, 0x42, 0x3e, 0x49, 0x47, 0x53, 0x47, 0x45, 0x51, 0x4a, 0x44,
+    0x44, 0x45, 0x4e, 0x2a, 0x45, 0x42, 0x4a, 0x4b, 0x46, 0x4d, 0x41, 0x30,
+    0x3d, 0x43, 0x3f, 0x48, 0x49, 0x44, 0x4d, 0x2e, 0x48, 0x4a, 0x4c, 0x51,
+    0x50, 0x46, 0x3e, 0x2c, 0x4d, 0x3f, 0x47, 0x46, 0x3c, 0x40, 0x4c, 0x38,
+    0x4f, 0x46, 0x47, 0x53, 0x3b, 0x3c, 0x4e, 0x3e, 0x49, 0x40, 0x43, 0x4c,
+    0x4d, 0x48, 0x45, 0x3c, 0x4d, 0x4c, 0x4d, 0x45, 0x3f, 0x49, 0x4a, 0x43,
+    0x4d, 0x41, 0x4b, 0x50, 0x4e, 0x46, 0x50, 0x44, 0x49, 0x44, 0x4e, 0x42,
+    0x4a, 0x43, 0x4c, 0x4c, 0x49, 0x49, 0x44, 0x4e, 0x4b, 0x3f, 0x4b, 0x5d,
+    0x41, 0x49, 0x4b, 0x46, 0x4e, 0x48, 0x45, 0x51, 0x4d, 0x45, 0x46, 0x45,
+    0x4b, 0x4e, 0x3c, 0x4d, 0x3d, 0x41, 0x47, 0x47, 0x64, 0x54, 0x41, 0x55,
+    0x47, 0x56, 0x44, 0x3b, 0x53, 0x66, 0x4f, 0x5e, 0x40, 0x5d, 0x38, 0x4a,
+    0x41, 0x59, 0x42, 0x48, 0x47, 0xff, 0x36, 0x49, 0x59, 0x41, 0x43, 0x1d,
+    0x4d, 0x5e, 0x44, 0x44, 0x50, 0x3f, 0x39, 0x40, 0x68, 0x5e, 0x4a, 0x41,
+    0x52, 0x41, 0x43, 0x41, 0x68, 0x51, 0x45, 0x48, 0x4c, 0x46, 0x4a, 0x5e,
+    0x4e, 0x40, 0x4d, 0x41, 0x41, 0x5c, 0x3f, 0x4e, 0x4c, 0x37, 0x48, 0x40,
+    0x46, 0x47, 0x4f, 0x43, 0x53, 0x52, 0x3d, 0x44, 0x47, 0x44, 0x3d, 0x34,
+    0x44, 0x42, 0x4a, 0x43, 0x4d, 0x3f, 0x53, 0x2e, 0x42, 0x47, 0x43, 0x4d,
+    0x45, 0x45, 0x47, 0x31, 0x4d, 0x39, 0x41, 0x4a, 0x4a, 0x4d, 0x4b, 0x35,
+    0x47, 0x4e, 0x4c, 0x40, 0x4a, 0x44, 0x44, 0x36, 0x3e, 0x49, 0x3f, 0x45,
+    0x46, 0x43, 0x4e, 0x3c, 0x4d, 0x47, 0x4c, 0x48, 0x4a, 0x4b, 0x48, 0x39,
+    0x46, 0x50, 0x4a, 0x4f, 0x46, 0x41, 0x44, 0x4a, 0x41, 0x4f, 0x4c, 0x4e,
+    0x55, 0x46, 0x43, 0x46, 0x4a, 0x48, 0x4e, 0x46, 0x42, 0x40, 0x4f, 0x56,
+    0x4c, 0x45, 0x4b, 0x46, 0x4a, 0x47, 0x42, 0x5e, 0x49, 0x4e, 0x46, 0x43,
+    0x4e, 0x42, 0x45, 0x48, 0x47, 0x48, 0x4f, 0x45, 0x47, 0x51, 0x4b, 0x4c,
+    0x51, 0x39, 0x4d, 0x48, 0x60, 0x57, 0x49, 0x52, 0x3d, 0x57, 0x46, 0x3d,
+    0x53, 0x68, 0x4b, 0x60, 0x40, 0x5a, 0x41, 0x4b, 0x46, 0x56, 0x46, 0x4c,
+    0x49, 0x7e, 0x2f, 0x48, 0x51, 0x42, 0x40, 0x20, 0x4b, 0x62, 0x4d, 0x41,
+    0x4f, 0x43, 0x3d, 0x35, 0x63, 0x63, 0x46, 0x3e, 0x4e, 0x47, 0x40, 0x40,
+    0x60, 0x52, 0x4c, 0x46, 0x49, 0x48, 0x4f, 0x56, 0x51, 0x47, 0x52, 0x4e,
+    0x4b, 0x59, 0x55, 0x4f, 0x48, 0x3d, 0x48, 0x4a, 0x4d, 0x50, 0x47, 0x47,
+    0x51, 0x52, 0x4d, 0x51, 0x45, 0x45, 0x47, 0x2d, 0x4d, 0x41, 0x43, 0x49,
+    0x4d, 0x40, 0x4a, 0x2f, 0x4f, 0x43, 0x46, 0x4a, 0x3e, 0x4a, 0x4a, 0x2b,
+    0x49, 0x4c, 0x4c, 0x3e, 0x41, 0x4c, 0x4a, 0x2b, 0x40, 0x44, 0x46, 0x4a,
+    0x40, 0x44, 0x42, 0x38, 0x52, 0x42, 0x46, 0x51, 0x53, 0x4e, 0x45, 0x31,
+    0x45, 0x47, 0x4f, 0x46, 0x49, 0x43, 0x45, 0x3b, 0x4b, 0x4b, 0x4b, 0x4c,
+    0x43, 0x4a, 0x4c, 0x43, 0x4e, 0x40, 0x52, 0x44, 0x48, 0x49, 0x47, 0x4b,
+    0x4e, 0x3d, 0x4e, 0x44, 0x48, 0x4d, 0x4f, 0x4f, 0x50, 0x36, 0x47, 0x41,
+    0x4a, 0x44, 0x45, 0x56, 0x4f, 0x4c, 0x50, 0x4b, 0x45, 0x3e, 0x45, 0x4e,
+    0x45, 0x45, 0x43, 0x40, 0x47, 0x4e, 0x45, 0x3e, 0x4a, 0x3f, 0x49, 0x50,
+    0x62, 0x55, 0x48, 0x56, 0x3e, 0x57, 0x4f, 0x3b, 0x55, 0x6c, 0x50, 0x5c,
+    0x3d, 0x54, 0x3d, 0x46, 0x43, 0x59, 0x3e, 0x51, 0x4d, 0x7b, 0x33, 0x47,
+    0x52, 0x43, 0x3f, 0x25, 0x4a, 0x6f, 0x49, 0x3e, 0x50, 0x40, 0x41, 0x30,
+    0x5e, 0x5c, 0x4a, 0x43, 0x4d, 0x42, 0x46, 0x3b, 0x63, 0x53, 0x4f, 0x43,
+    0x58, 0x48, 0x4b, 0x59, 0x50, 0x4e, 0x4b, 0x51, 0x4a, 0x55, 0x44, 0x46,
+    0x4c, 0x3d, 0x4c, 0x52, 0x44, 0x52, 0x4c, 0x41, 0x4f, 0x44, 0x4a, 0x47,
+    0x4e, 0x48, 0x49, 0x2e, 0x3e, 0x45, 0x4c, 0x48, 0x41, 0x47, 0x4d, 0x2e,
+    0x40, 0x4b, 0x4c, 0x42, 0x4d, 0x40, 0x4e, 0x2e, 0x43, 0x45, 0x4b, 0x43,
+    0x3e, 0x49, 0x55, 0x35, 0x43, 0x42, 0x42, 0x40, 0x4e, 0x46, 0x44, 0x37,
+    0x49, 0x41, 0x3f, 0x52, 0x47, 0x4b, 0x43, 0x33, 0x4b, 0x47, 0x4b, 0x4c,
+    0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x40, 0x49, 0x41, 0x42, 0x49, 0x4b, 0x46,
+    0x4e, 0x4e, 0x47, 0x4e, 0x48, 0x48, 0x4b, 0x46, 0x51, 0x4b, 0x46, 0x4d,
+    0x47, 0x4f, 0x3e, 0x51, 0x46, 0x4e, 0x46, 0x4b, 0x47, 0x48, 0x4e, 0x55,
+    0x4c, 0x3d, 0x47, 0x51, 0x42, 0x45, 0x4f, 0x42, 0x52, 0x50, 0x44, 0x4c,
+    0x44, 0x44, 0x43, 0x4d, 0x40, 0x42, 0x4d, 0x4b, 0x5d, 0x4e, 0x47, 0x54,
+    0x47, 0x51, 0x43, 0x39, 0x58, 0x66, 0x4e, 0x5a, 0x41, 0x52, 0x36, 0x47,
+    0x45, 0x5f, 0x34, 0x50, 0x46, 0x79, 0x30, 0x48, 0x50, 0x45, 0x32, 0x22,
+    0x54, 0x64, 0x49, 0x46, 0x45, 0x3c, 0x42, 0x36, 0x65, 0x5c, 0x48, 0x3a,
+    0x4d, 0x4b, 0x47, 0x3e, 0x63, 0x56, 0x4a, 0x48, 0x51, 0x42, 0x4f, 0x5e,
+    0x4c, 0x44, 0x4b, 0x4c, 0x3d, 0x5a, 0x43, 0x4d, 0x42, 0x40, 0x4f, 0x4d,
+    0x3f, 0x3e, 0x46, 0x40, 0x49, 0x42, 0x49, 0x40, 0x49, 0x4c, 0x4a, 0x2e,
+    0x4b, 0x3f, 0x53, 0x4b, 0x48, 0x49, 0x3e, 0x34, 0x47, 0x4a, 0x4b, 0x46,
+    0x3b, 0x49, 0x46, 0x34, 0x4b, 0x48, 0x4c, 0x49, 0x49, 0x43, 0x4f, 0x2e,
+    0x44, 0x46, 0x48, 0x50, 0x46, 0x4e, 0x4a, 0x37, 0x4b, 0x4c, 0x4a, 0x50,
+    0x45, 0x4a, 0x48, 0x3b, 0x48, 0x44, 0x48, 0x4a, 0x41, 0x44, 0x52, 0x3f,
+    0x4c, 0x46, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x36, 0x53, 0x3e, 0x48, 0x47,
+    0x3f, 0x42, 0x41, 0x4c, 0x42, 0x4a, 0x52, 0x46, 0x49, 0x3f, 0x48, 0x5a,
+    0x43, 0x42, 0x3d, 0x43, 0x4f, 0x44, 0x43, 0x65, 0x41, 0x41, 0x44, 0x4b,
+    0x50, 0x44, 0x53, 0x49, 0x41, 0x45, 0x4a, 0x4d, 0x40, 0x45, 0x4a, 0x4e,
+    0x50, 0x40, 0x51, 0x40, 0x5e, 0x50, 0x43, 0x5c, 0x47, 0x5a, 0x44, 0x4c,
+    0x54, 0x64, 0x4f, 0x63, 0x39, 0x58, 0x3c, 0x4a, 0x42, 0x5e, 0x3c, 0x4a,
+    0x48, 0x7b, 0x34, 0x4c, 0x4f, 0x44, 0x30, 0x24, 0x50, 0x65, 0x47, 0x39,
+    0x46, 0x3e, 0x3f, 0x33, 0x65, 0x5a, 0x44, 0x38, 0x50, 0x47, 0x4b, 0x3e,
+    0x5b, 0x53, 0x4a, 0x4d, 0x51, 0x40, 0x47, 0x59, 0x51, 0x42, 0x4f, 0x50,
+    0x45, 0x57, 0x46, 0x50, 0x3f, 0x3c, 0x4c, 0x4f, 0x46, 0x41, 0x4a, 0x3e,
+    0x4d, 0x45, 0x51, 0x48, 0x4e, 0x44, 0x4e, 0x35, 0x44, 0x3f, 0x44, 0x48,
+    0x3c, 0x4c, 0x49, 0x2c, 0x4a, 0x46, 0x48, 0x44, 0x4b, 0x42, 0x4b, 0x2f,
+    0x4e, 0x50, 0x4c, 0x4d, 0x44, 0x46, 0x3f, 0x39, 0x4d, 0x47, 0x45, 0x41,
+    0x42, 0x47, 0x4a, 0x3a, 0x40, 0x3e, 0x4a, 0x51, 0x3f, 0x47, 0x44, 0x37,
+    0x47, 0x4e, 0x47, 0x52, 0x45, 0x42, 0x4a, 0x3d, 0x43, 0x4d, 0x4d, 0x47,
+    0x48, 0x43, 0x44, 0x44, 0x47, 0x4e, 0x52, 0x4b, 0x4e, 0x50, 0x42, 0x47,
+    0x4b, 0x4b, 0x4e, 0x4c, 0x4e, 0x47, 0x50, 0x56, 0x46, 0x47, 0x4d, 0x49,
+    0x4d, 0x46, 0x49, 0x5f, 0x49, 0x42, 0x4d, 0x44, 0x40, 0x4b, 0x52, 0x45,
+    0x46, 0x4a, 0x4b, 0x49, 0x47, 0x4b, 0x42, 0x45, 0x42, 0x44, 0x46, 0x4c,
+    0x62, 0x4a, 0x44, 0x53, 0x43, 0x5a, 0x48, 0x49, 0x59, 0x68, 0x46, 0x61,
+    0x40, 0x5a, 0x3a, 0x4d, 0x45, 0x5e, 0x33, 0x4f, 0x4e, 0x74, 0x3e, 0x3e,
+    0x5a, 0x4b, 0x34, 0x31, 0x52, 0x6c, 0x44, 0x39, 0x4c, 0x3b, 0x39, 0x3a,
+    0x63, 0x65, 0x4b, 0x40, 0x50, 0x4d, 0x53, 0x4a, 0x69, 0x56, 0x54, 0x45,
+    0x4c, 0x4c, 0x50, 0x5b, 0x4d, 0x4f, 0x3d, 0x4b, 0x44, 0x47, 0x43, 0x47,
+    0x49, 0x3c, 0x49, 0x41, 0x41, 0x3f, 0x47, 0x43, 0x48, 0x47, 0x4c, 0x43,
+    0x4a, 0x40, 0x4d, 0x32, 0x4b, 0x4d, 0x44, 0x48, 0x46, 0x44, 0x50, 0x2f,
+    0x4e, 0x49, 0x53, 0x4b, 0x52, 0x47, 0x4b, 0x2b, 0x48, 0x4b, 0x4a, 0x4c,
+    0x4d, 0x4c, 0x43, 0x37, 0x48, 0x3c, 0x4b, 0x42, 0x51, 0x3f, 0x45, 0x3c,
+    0x49, 0x40, 0x42, 0x43, 0x4d, 0x4c, 0x3f, 0x3f, 0x4d, 0x43, 0x45, 0x42,
+    0x48, 0x42, 0x48, 0x39, 0x51, 0x4e, 0x46, 0x4f, 0x3e, 0x4c, 0x45, 0x3e,
+    0x3f, 0x3f, 0x43, 0x41, 0x4b, 0x4b, 0x43, 0x4d, 0x44, 0x3b, 0x48, 0x45,
+    0x3c, 0x4a, 0x48, 0x5b, 0x3c, 0x4b, 0x4c, 0x44, 0x46, 0x3e, 0x45, 0x57,
+    0x43, 0x42, 0x51, 0x4a, 0x46, 0x47, 0x43, 0x49, 0x42, 0x43, 0x50, 0x4e,
+    0x4e, 0x44, 0x41, 0x4e, 0x4e, 0x41, 0x48, 0x47, 0x5c, 0x53, 0x44, 0x54,
+    0x44, 0x5b, 0x45, 0x46, 0x55, 0x67, 0x4d, 0x5d, 0x40, 0x5a, 0x43, 0x4b,
+    0x43, 0x60, 0x3c, 0x4b, 0x41, 0x79, 0x41, 0x41, 0x58, 0x48, 0x40, 0x3b,
+    0x4f, 0x6c, 0x46, 0x3f, 0x53, 0x3a, 0x3d, 0x36, 0x5a, 0x57, 0x44, 0x41,
+    0x4c, 0x47, 0x4e, 0x48, 0x62, 0x60, 0x4a, 0x46, 0x51, 0x3e, 0x52, 0x5f,
+    0x4b, 0x46, 0x48, 0x4c, 0x4c, 0x55, 0x43, 0x46, 0x49, 0x3e, 0x41, 0x40,
+    0x4d, 0x47, 0x46, 0x3b, 0x51, 0x3a, 0x4a, 0x45, 0x50, 0x47, 0x51, 0x38,
+    0x44, 0x41, 0x40, 0x4b, 0x4d, 0x44, 0x4d, 0x28, 0x47, 0x3e, 0x44, 0x40,
+    0x49, 0x49, 0x40, 0x3c, 0x44, 0x4c, 0x48, 0x51, 0x46, 0x3e, 0x47, 0x2a,
+    0x41, 0x44, 0x49, 0x4c, 0x4e, 0x4e, 0x42, 0x3c, 0x49, 0x42, 0x43, 0x45,
+    0x4e, 0x4d, 0x50, 0x39, 0x42, 0x43, 0x48, 0x41, 0x3f, 0x40, 0x4e, 0x3a,
+    0x44, 0x3d, 0x49, 0x4d, 0x47, 0x45, 0x4b, 0x42, 0x4c, 0x4d, 0x3f, 0x3f,
+    0x4e, 0x4d, 0x4d, 0x4d, 0x4d, 0x45, 0x47, 0x43, 0x4c, 0x46, 0x47, 0x57,
+    0x4b, 0x42, 0x4d, 0x46, 0x4b, 0x4b, 0x43, 0x58, 0x48, 0x49, 0x4d, 0x47,
+    0x43, 0x49, 0x4b, 0x48, 0x46, 0x4f, 0x4f, 0x42, 0x4a, 0x43, 0x49, 0x4e,
+    0x4a, 0x47, 0x4c, 0x48, 0x5a, 0x57, 0x4a, 0x58, 0x49, 0x4f, 0x45, 0x47,
+    0x63, 0x66, 0x4d, 0x5e, 0x4b, 0x51, 0x45, 0x4a, 0x43, 0x5d, 0x33, 0x4b,
+    0x4e, 0x70, 0x42, 0x39, 0x57, 0x4a, 0x40, 0x3a, 0x51, 0x68, 0x45, 0x45,
+    0x4c, 0x44, 0x3a, 0x3a, 0x4f, 0x62, 0x49, 0x45, 0x53, 0x4c, 0x4e, 0x41,
+    0x63, 0x5e, 0x44, 0x44, 0x47, 0x43, 0x47, 0x59, 0x4c, 0x4b, 0x4c, 0x49,
+    0x3e, 0x43, 0x4c, 0x46, 0x4c, 0x38, 0x47, 0x46, 0x46, 0x47, 0x40, 0x44,
+    0x51, 0x3e, 0x40, 0x47, 0x3f, 0x45, 0x48, 0x2a, 0x42, 0x3e, 0x43, 0x46,
+    0x50, 0x4c, 0x4a, 0x2c, 0x49, 0x4b, 0x48, 0x48, 0x40, 0x4a, 0x4a, 0x37,
+    0x4e, 0x42, 0x4f, 0x4c, 0x41, 0x43, 0x45, 0x38, 0x4e, 0x3d, 0x41, 0x47,
+    0x42, 0x42, 0x43, 0x3b, 0x4a, 0x40, 0x48, 0x4a, 0x53, 0x44, 0x4d, 0x35,
+    0x51, 0x3c, 0x4e, 0x4e, 0x3e, 0x3f, 0x4b, 0x3c, 0x3e, 0x47, 0x41, 0x48,
+    0x40, 0x46, 0x4e, 0x44, 0x49, 0x42, 0x49, 0x44, 0x4b, 0x46, 0x46, 0x43,
+    0x4c, 0x4b, 0x49, 0x4d, 0x3d, 0x47, 0x43, 0x5c, 0x4a, 0x42, 0x47, 0x4e,
+    0x47, 0x40, 0x4c, 0x55, 0x3f, 0x45, 0x46, 0x49, 0x46, 0x48, 0x49, 0x4d,
+    0x4c, 0x41, 0x49, 0x40, 0x4a, 0x44, 0x42, 0x49, 0x52, 0x41, 0x49, 0x4a,
+    0x5c, 0x53, 0x47, 0x58, 0x49, 0x55, 0x4a, 0x4a, 0x62, 0x61, 0x4b, 0x57,
+    0x3c, 0x50, 0x42, 0x4c, 0x49, 0x5f, 0x3f, 0x4a, 0x42, 0x70, 0x40, 0x40,
+    0x4f, 0x46, 0x43, 0x43, 0x4d, 0x6c, 0x41, 0x3e, 0x4e, 0x49, 0x43, 0x38,
+    0x50, 0x57, 0x43, 0x39, 0x4a, 0x4f, 0x51, 0x3e, 0x5c, 0x57, 0x46, 0x49,
+    0x41, 0x40, 0x42, 0x4f, 0x4c, 0x45, 0x46, 0x4a, 0x4c, 0x4b, 0x43, 0x42,
+    0x4c, 0x3c, 0x47, 0x47, 0x4f, 0x44, 0x45, 0x3a, 0x4d, 0x3d, 0x4d, 0x3f,
+    0x46, 0x4f, 0x41, 0x37, 0x46, 0x45, 0x54, 0x47, 0x4e, 0x46, 0x47, 0x23,
+    0x48, 0x4e, 0x4a, 0x47, 0x45, 0x45, 0x4e, 0x33, 0x49, 0x4a, 0x4d, 0x4e,
+    0x49, 0x46, 0x49, 0x36, 0x48, 0x44, 0x53, 0x44, 0x4a, 0x45, 0x4a, 0x37,
+    0x45, 0x36, 0x4b, 0x4e, 0x50, 0x3f, 0x49, 0x38, 0x40, 0x43, 0x46, 0x4c,
+    0x43, 0x46, 0x4a, 0x3f, 0x45, 0x3d, 0x44, 0x47, 0x44, 0x42, 0x4a, 0x45,
+    0x47, 0x43, 0x4d, 0x4d, 0x44, 0x44, 0x4f, 0x4a, 0x4a, 0x41, 0x50, 0x50,
+    0x4b, 0x44, 0x54, 0x5c, 0x4b, 0x3a, 0x46, 0x4a, 0x4a, 0x43, 0x48, 0x5c,
+    0x4b, 0x43, 0x47, 0x3d, 0x3e, 0x54, 0x42, 0x47, 0x42, 0x4f, 0x4b, 0x4b,
+    0x46, 0x46, 0x46, 0x42, 0x42, 0x4b, 0x48, 0x45, 0x51, 0x4e, 0x49, 0x4d,
+    0x43, 0x56, 0x45, 0x40, 0x5a, 0x58, 0x4c, 0x55, 0x40, 0x4b, 0x4c, 0x51,
+    0x42, 0x59, 0x43, 0x46, 0x46, 0x69, 0x43, 0x3c, 0x54, 0x47, 0x3d, 0x41,
+    0x52, 0x64, 0x44, 0x38, 0x4f, 0x49, 0x3a, 0x3a, 0x55, 0x54, 0x45, 0x3e,
+    0x49, 0x44, 0x4e, 0x3f, 0x57, 0x50, 0x47, 0x43, 0x45, 0x48, 0x53, 0x5b,
+    0x53, 0x4d, 0x48, 0x4e, 0x48, 0x3a, 0x3e, 0x46, 0x42, 0x36, 0x50, 0x4d,
+    0x49, 0x4b, 0x4b, 0x45, 0x4c, 0x44, 0x50, 0x47, 0x3e, 0x49, 0x50, 0x37,
+    0x4c, 0x4b, 0x4a, 0x54, 0x4e, 0x43, 0x40, 0x25, 0x46, 0x42, 0x52, 0x3d,
+    0x44, 0x45, 0x51, 0x2e, 0x4a, 0x3d, 0x46, 0x46, 0x4c, 0x42, 0x48, 0x34,
+    0x44, 0x44, 0x44, 0x4c, 0x4f, 0x4b, 0x42, 0x3d, 0x45, 0x40, 0x47, 0x49,
+    0x43, 0x41, 0x3e, 0x39, 0x47, 0x4b, 0x50, 0x4a, 0x46, 0x47, 0x4e, 0x3b,
+    0x4e, 0x3e, 0x49, 0x4a, 0x50, 0x40, 0x43, 0x49, 0x48, 0x3c, 0x4f, 0x45,
+    0x4a, 0x41, 0x42, 0x48, 0x4b, 0x46, 0x4a, 0x50, 0x40, 0x49, 0x44, 0x54,
+    0x45, 0x45, 0x4a, 0x4b, 0x51, 0x51, 0x48, 0x53, 0x50, 0x3f, 0x50, 0x46,
+    0x44, 0x45, 0x51, 0x43, 0x4f, 0x3e, 0x41, 0x41, 0x46, 0x45, 0x45, 0x4c,
+    0x54, 0x3c, 0x4a, 0x4c, 0x5a, 0x4f, 0x46, 0x4b, 0x47, 0x4a, 0x43, 0x4c,
+    0x56, 0x5a, 0x4a, 0x53, 0x4c, 0x49, 0x46, 0x4c, 0x45, 0x59, 0x40, 0x4b,
+    0x48, 0x60, 0x3d, 0x42, 0x52, 0x3f, 0x42, 0x3d, 0x52, 0x5f, 0x46, 0x42,
+    0x4b, 0x4e, 0x4a, 0x3d, 0x52, 0x55, 0x53, 0x37, 0x47, 0x3e, 0x4a, 0x42,
+    0x51, 0x54, 0x48, 0x48, 0x4b, 0x48, 0x3e, 0x52, 0x41, 0x4e, 0x4c, 0x4f,
+    0x43, 0x3b, 0x4b, 0x4b, 0x4c, 0x40, 0x48, 0x49, 0x4d, 0x3a, 0x45, 0x3c,
+    0x53, 0x44, 0x48, 0x4d, 0x4b, 0x49, 0x46, 0x3c, 0x4d, 0x40, 0x51, 0x3f,
+    0x4c, 0x45, 0x44, 0x2f, 0x49, 0x51, 0x3f, 0x4d, 0x3e, 0x4e, 0x3c, 0x30,
+    0x3d, 0x48, 0x4f, 0x3f, 0x45, 0x45, 0x46, 0x3b, 0x4c, 0x46, 0x4d, 0x50,
+    0x4c, 0x3d, 0x41, 0x37, 0x3e, 0x3e, 0x4f, 0x4b, 0x4d, 0x4f, 0x45, 0x45,
+    0x4a, 0x47, 0x4a, 0x44, 0x43, 0x46, 0x51, 0x41, 0x4e, 0x39, 0x44, 0x4a,
+    0x4e, 0x49, 0x4a, 0x42, 0x49, 0x4b, 0x4e, 0x48, 0x49, 0x4a, 0x45, 0x4a,
+    0x45, 0x41, 0x4a, 0x4b, 0x42, 0x41, 0x48, 0x4a, 0x44, 0x3a, 0x46, 0x49,
+    0x54, 0x45, 0x44, 0x60, 0x4a, 0x4e, 0x45, 0x4a, 0x4a, 0x45, 0x4b, 0x49,
+    0x42, 0x44, 0x46, 0x50, 0x4b, 0x4b, 0x4e, 0x45, 0x48, 0x3e, 0x55, 0x42,
+    0x51, 0x49, 0x49, 0x44, 0x4e, 0x54, 0x53, 0x49, 0x4c, 0x63, 0x48, 0x5a,
+    0x50, 0x4b, 0x45, 0x49, 0x43, 0x57, 0x4c, 0x3f, 0x4d, 0x67, 0x3f, 0x47,
+    0x53, 0x49, 0x43, 0x44, 0x49, 0x61, 0x50, 0x47, 0x49, 0x49, 0x4a, 0x42,
+    0x4a, 0x51, 0x46, 0x43, 0x3f, 0x34, 0x40, 0x3a, 0x45, 0x54, 0x4c, 0x55,
+    0x40, 0x3c, 0x4a, 0x4d, 0x3e, 0x4d, 0x48, 0x51, 0x4c, 0x3e, 0x4c, 0x4f,
+    0x50, 0x47, 0x4d, 0x49, 0x4d, 0x4e, 0x45, 0x43, 0x41, 0x41, 0x40, 0x47,
+    0x43, 0x4a, 0x4a, 0x3c, 0x4c, 0x3d, 0x4e, 0x43, 0x41, 0x42, 0x4a, 0x30,
+    0x45, 0x4c, 0x45, 0x55, 0x46, 0x39, 0x43, 0x39, 0x45, 0x47, 0x48, 0x53,
+    0x4a, 0x48, 0x43, 0x38, 0x4f, 0x51, 0x4d, 0x4c, 0x41, 0x46, 0x40, 0x3d,
+    0x43, 0x4b, 0x40, 0x46, 0x47, 0x50, 0x4a, 0x43, 0x50, 0x4e, 0x45, 0x4f,
+    0x4d, 0x44, 0x4d, 0x3f, 0x4e, 0x48, 0x4a, 0x49, 0x44, 0x3d, 0x4a, 0x44,
+    0x40, 0x45, 0x49, 0x40, 0x4a, 0x44, 0x4f, 0x4a, 0x43, 0x4a, 0x4e, 0x52,
+    0x4d, 0x50, 0x48, 0x4c, 0x43, 0x45, 0x4d, 0x54, 0x4a, 0x49, 0x4c, 0x58,
+    0x4c, 0x48, 0x4c, 0x44, 0x4b, 0x4e, 0x52, 0x44, 0x49, 0x44, 0x47, 0x4e,
+    0x4b, 0x45, 0x49, 0x3e, 0x4c, 0x3b, 0x53, 0x3f, 0x51, 0x41, 0x3f, 0x44,
+    0x43, 0x4a, 0x4b, 0x43, 0x53, 0x57, 0x50, 0x53, 0x4f, 0x4b, 0x48, 0x51,
+    0x47, 0x49, 0x46, 0x4d, 0x4d, 0x5e, 0x44, 0x46, 0x56, 0x3d, 0x3c, 0x3e,
+    0x47, 0x55, 0x54, 0x46, 0x42, 0x49, 0x4f, 0x43, 0x48, 0x54, 0x51, 0x40,
+    0x44, 0x44, 0x47, 0x45, 0x4b, 0x59, 0x4d, 0x47, 0x40, 0x39, 0x48, 0x54,
+    0x43, 0x45, 0x44, 0x42, 0x4c, 0x3c, 0x4d, 0x42, 0x4b, 0x45, 0x42, 0x48,
+    0x51, 0x44, 0x45, 0x3f, 0x3d, 0x49, 0x4b, 0x4a, 0x41, 0x43, 0x4f, 0x3f,
+    0x51, 0x4b, 0x44, 0x46, 0x46, 0x44, 0x53, 0x3d, 0x47, 0x47, 0x43, 0x4b,
+    0x41, 0x43, 0x3c, 0x3b, 0x49, 0x47, 0x47, 0x49, 0x4b, 0x3d, 0x43, 0x43,
+    0x4b, 0x47, 0x45, 0x4e, 0x42, 0x4a, 0x4c, 0x3e, 0x51, 0x3e, 0x46, 0x44,
+    0x46, 0x43, 0x42, 0x42, 0x47, 0x4d, 0x51, 0x4b, 0x49, 0x44, 0x4d, 0x40,
+    0x50, 0x43, 0x41, 0x4c, 0x42, 0x49, 0x49, 0x4c, 0x42, 0x50, 0x48, 0x3f,
+    0x46, 0x42, 0x48, 0x57, 0x49, 0x4d, 0x47, 0x4e, 0x48, 0x4b, 0x46, 0x50,
+    0x47, 0x45, 0x52, 0x45, 0x4b, 0x48, 0x40, 0x5b, 0x4e, 0x43, 0x51, 0x48,
+    0x48, 0x4a, 0x4a, 0x4a, 0x52, 0x51, 0x4c, 0x4b, 0x42, 0x55, 0x4d, 0x46,
+    0x50, 0x40, 0x4a, 0x50, 0x51, 0x3e, 0x42, 0x4c, 0x43, 0x46, 0x4d, 0x46,
+    0x46, 0x4d, 0x4d, 0x52, 0x4e, 0x44, 0x45, 0x47, 0x49, 0x4c, 0x41, 0x44,
+    0x4d, 0x54, 0x4c, 0x4a, 0x54, 0x3e, 0x44, 0x43, 0x53, 0x55, 0x4b, 0x4a,
+    0x47, 0x47, 0x4f, 0x46, 0x4f, 0x4b, 0x51, 0x3f, 0x41, 0x4c, 0x43, 0x46,
+    0x55, 0x51, 0x40, 0x4b, 0x4f, 0x40, 0x47, 0x50, 0x4e, 0x4a, 0x46, 0x4e,
+    0x42, 0x4d, 0x48, 0x49, 0x48, 0x4a, 0x4a, 0x43, 0x49, 0x48, 0x44, 0x3b,
+    0x51, 0x46, 0x3d, 0x43, 0x47, 0x4a, 0x4f, 0x42, 0x4a, 0x50, 0x4f, 0x41,
+    0x45, 0x45, 0x43, 0x3c, 0x4c, 0x4c, 0x46, 0x4b, 0x3e, 0x44, 0x4b, 0x3a,
+    0x45, 0x50, 0x42, 0x48, 0x46, 0x47, 0x44, 0x3a, 0x53, 0x46, 0x4e, 0x4f,
+    0x43, 0x40, 0x46, 0x48, 0x4e, 0x45, 0x3f, 0x47, 0x48, 0x3f, 0x44, 0x4f,
+    0x44, 0x47, 0x4e, 0x47, 0x47, 0x49, 0x42, 0x43, 0x3f, 0x49, 0x4a, 0x53,
+    0x53, 0x4a, 0x4e, 0x4a, 0x49, 0x4d, 0x49, 0x41, 0x48, 0x4d, 0x4d, 0x4e,
+    0x4b, 0x45, 0x4d, 0x4a, 0x46, 0x4a, 0x46, 0x51, 0x4b, 0x47, 0x49, 0x45,
+    0x49, 0x49, 0x4b, 0x5c, 0x48, 0x42, 0x51, 0x4c, 0x41, 0x3f, 0x4c, 0x42,
+    0x4f, 0x45, 0x4b, 0x4a, 0x52, 0x48, 0x53, 0x4f, 0x40, 0x47, 0x41, 0x47,
+    0x68, 0xfb, 0xff, 0xff, 0x4c, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+    0x58, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00,
+    0x38, 0x02, 0x00, 0x00, 0x9c, 0x02, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00,
+    0x14, 0x03, 0x00, 0x00, 0xfe, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00,
+    0xcc, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x17, 0xbf, 0xd2, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x58, 0xec, 0xd1, 0x43,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x76,
+    0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x34, 0xff, 0xff, 0xff,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, 0xc2, 0xfd, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68,
+    0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x94, 0xfd, 0xff, 0xff,
+    0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d,
+    0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xc5, 0x01, 0x2a, 0x3b, 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x25, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f,
+    0x71, 0x75, 0x61, 0x6e, 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75,
+    0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61,
+    0x78, 0x56, 0x61, 0x72, 0x73, 0x00, 0x00, 0x00, 0x84, 0xfe, 0xff, 0xff,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a,
+    0x01, 0x00, 0x00, 0x00, 0x6e, 0x88, 0xae, 0x3d, 0x01, 0x00, 0x00, 0x00,
+    0xd4, 0x97, 0x30, 0xbe, 0x26, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f,
+    0x31, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2f, 0xad, 0x18, 0x40, 0x01, 0x00, 0x00, 0x00,
+    0x02, 0x38, 0xa2, 0x43, 0x01, 0x00, 0x00, 0x00, 0x02, 0xf1, 0x8d, 0xc3,
+    0x8e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,
+    0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00,
+    0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
+    0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,
+    0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e,
+    0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56,
+    0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73,
+    0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00,
+    0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x31, 0x83, 0xce, 0x3a, 0x01, 0x00, 0x00, 0x00,
+    0x4d, 0x97, 0x92, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x84, 0x75, 0xec, 0xbd,
+    0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
+    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00,
+    0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00,
+    0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+    0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+    0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x19, 0x06, 0x00,
+    0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00,
+    0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04};
+const int g_tiny_conv_model_data_len = 19800;
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
new file mode 100644
index 0000000000..2953cc852d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
@@ -0,0 +1,27 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is a standard TensorFlow Lite model file that has been converted into a
+// C data array, so it can be easily compiled into a binary for devices that
+// don't have a file system. It was created using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
+
+extern const unsigned char g_tiny_conv_model_data[];
+extern const int g_tiny_conv_model_data_len;
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/BUILD b/tensorflow/contrib/lite/experimental/micro/kernels/BUILD
new file mode 100644
index 0000000000..a012f950e6
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/BUILD
@@ -0,0 +1,107 @@
+package(default_visibility = [
+    "//visibility:public",
+])
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts")
+load(
+    "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
+cc_library(
+    name = "micro_ops",
+    srcs = [
+        "depthwise_conv.cc",
+        "fully_connected.cc",
+        "softmax.cc",
+    ],
+    hdrs = [
+    ],
+    copts = tflite_copts(),
+    deps = [
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/kernels:kernel_util",
+        "//tensorflow/contrib/lite/kernels:op_macros",
+        "//tensorflow/contrib/lite/kernels:padding",
+        "//tensorflow/contrib/lite/kernels/internal:quantization_util",
+        "//tensorflow/contrib/lite/kernels/internal:reference_base",
+        "//tensorflow/contrib/lite/kernels/internal:tensor",
+    ],
+)
+
+cc_library(
+    name = "all_ops_resolver",
+    srcs = [
+        "all_ops_resolver.cc",
+    ],
+    hdrs = [
+        "all_ops_resolver.h",
+    ],
+    copts = tflite_copts(),
+    deps = [
+        ":micro_ops",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "test_utils",
+    srcs = [
+    ],
+    hdrs = [
+        "test_utils.h",
+    ],
+    copts = tflite_copts(),
+    deps = [
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "depthwise_conv_test",
+    srcs = [
+        "depthwise_conv_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        ":test_utils",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "fully_connected_test",
+    srcs = [
+        "fully_connected_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        ":test_utils",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "softmax_test",
+    srcs = [
+        "softmax_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        ":test_utils",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc
new file mode 100644
index 0000000000..bd0a37badb
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+
+TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
+TfLiteRegistration* Micro_Register_DEPTHWISE_CONV_2D() {
+  return Register_DEPTHWISE_CONV_2D();
+}
+
+TfLiteRegistration* Register_FULLY_CONNECTED();
+TfLiteRegistration* Micro_Register_FULLY_CONNECTED() {
+  return Register_FULLY_CONNECTED();
+}
+
+TfLiteRegistration* Register_SOFTMAX();
+TfLiteRegistration* Micro_Register_SOFTMAX() { return Register_SOFTMAX(); }
+
+AllOpsResolver::AllOpsResolver() {
+  AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
+             Micro_Register_DEPTHWISE_CONV_2D());
+  AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Micro_Register_FULLY_CONNECTED(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_SOFTMAX, Micro_Register_SOFTMAX());
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h
new file mode 100644
index 0000000000..f836064a3f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
+
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+
+class AllOpsResolver : public MicroMutableOpResolver {
+ public:
+  AllOpsResolver();
+
+ private:
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc
new file mode 100644
index 0000000000..4f17263181
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc
@@ -0,0 +1,208 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/padding.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace depthwise_conv {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kFilterTensor = 1;
+constexpr int kBiasTensor = 2;
+constexpr int kOutputTensor = 0;
+
+struct OpData {
+  TfLitePaddingValues padding;
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+  // The range of the fused activation layer. For example for kNone and
+  // uint8_t these would be 0 and 255.
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+};
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
+                             TfLiteDepthwiseConvParams* params, int width,
+                             int height, int filter_width, int filter_height,
+                             int out_width, int out_height,
+                             const TfLiteType data_type, OpData* data) {
+  data->padding.height = ComputePadding(params->stride_height, 1, height,
+                                        filter_height, out_height);
+  data->padding.width =
+      ComputePadding(params->stride_width, 1, width, filter_width, out_width);
+
+  // Note that quantized inference requires that all tensors have their
+  // parameters set. This is usually done during quantized training.
+  if (data_type != kTfLiteFloat32) {
+    const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+    const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+    const TfLiteTensor* bias =
+        GetOptionalInputTensor(context, node, kBiasTensor);
+    TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+    double real_multiplier = 0.0;
+    TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
+        context, input, filter, bias, output, &real_multiplier));
+    int exponent;
+    QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
+    data->output_shift = -exponent;
+    CalculateActivationRangeUint8(params->activation, output,
+                                  &data->output_activation_min,
+                                  &data->output_activation_max);
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+void EvalFloat(TfLiteContext* context, TfLiteNode* node,
+               TfLiteDepthwiseConvParams* params, OpData* data,
+               const TfLiteTensor* input, const TfLiteTensor* filter,
+               const TfLiteTensor* bias, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRange(params->activation, &output_activation_min,
+                           &output_activation_max);
+
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+  op_params.depth_multiplier = params->depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+      op_params, GetTensorShape(input), GetTensorData<float>(input),
+      GetTensorShape(filter), GetTensorData<float>(filter),
+      GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
+      GetTensorData<float>(output));
+}
+
+void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                   TfLiteDepthwiseConvParams* params, OpData* data,
+                   const TfLiteTensor* input, const TfLiteTensor* filter,
+                   const TfLiteTensor* bias, TfLiteTensor* output) {
+  const int32_t input_offset = -input->params.zero_point;
+  const int32_t filter_offset = -filter->params.zero_point;
+  const int32_t output_offset = output->params.zero_point;
+
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+  op_params.depth_multiplier = params->depth_multiplier;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = -data->output_shift;
+
+  tflite::reference_ops::DepthwiseConv(
+      op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+      GetTensorShape(filter), GetTensorData<uint8_t>(filter),
+      GetTensorShape(bias), GetTensorData<int32_t>(bias),
+      GetTensorShape(output), GetTensorData<uint8_t>(output));
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
+
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+  const TfLiteTensor* bias =
+      (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
+
+  const TfLiteType data_type = input->type;
+  int width = SizeOfDimension(input, 2);
+  int height = SizeOfDimension(input, 1);
+  int filter_width = SizeOfDimension(filter, 2);
+  int filter_height = SizeOfDimension(filter, 1);
+  int out_width = ComputeOutSize(params->padding, width, filter_width,
+                                 params->stride_width);
+  int out_height = ComputeOutSize(params->padding, height, filter_height,
+                                  params->stride_height);
+  OpData local_data_object;
+  OpData* data = &local_data_object;
+  TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
+                                        filter_width, filter_height, out_width,
+                                        out_height, data_type, data));
+
+  // TODO(aselle): Consider whether float conv and quantized conv should be
+  // separate ops to avoid dispatch overhead here.
+  switch (input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      EvalFloat(context, node, params, data, input, filter, bias, output);
+      break;
+    case kTfLiteUInt8:
+      EvalQuantized(context, node, params, data, input, filter, bias, output);
+      break;
+    default:
+      context->ReportError(context, "Type %d not currently supported.",
+                           input->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace depthwise_conv
+
+TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
+  static TfLiteRegistration r = {depthwise_conv::Init, depthwise_conv::Free,
+                                 depthwise_conv::Prepare, depthwise_conv::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc
new file mode 100644
index 0000000000..169899c471
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc
@@ -0,0 +1,406 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+namespace {
+
+void TestDepthwiseConvFloat(std::initializer_list<int> input_dims_data,
+                            std::initializer_list<float> input_data,
+                            std::initializer_list<int> filter_dims_data,
+                            std::initializer_list<float> filter_data,
+                            std::initializer_list<int> bias_dims_data,
+                            std::initializer_list<float> bias_data,
+                            std::initializer_list<float> expected_output_data,
+                            std::initializer_list<int> output_dims_data,
+                            TfLiteFusedActivation activation,
+                            float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInitializer(filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(filter_data, filter_dims, "filter_tensor"),
+      CreateFloatTensor(bias_data, bias_dims, "bias_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor"),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  int input_depth = input_dims->data[3];
+  int output_depth = filter_dims->data[3];
+  int depth_mul = output_depth / input_depth;
+  TfLiteDepthwiseConvParams builtin_data = {
+      kTfLitePaddingValid, 1, 1, depth_mul, activation,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestDepthwiseConvQuantized(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<uint8_t> input_data, float input_min, float input_max,
+    std::initializer_list<int> filter_dims_data,
+    std::initializer_list<uint8_t> filter_data, float filter_min,
+    float filter_max, std::initializer_list<int> bias_dims_data,
+    std::initializer_list<int32_t> bias_data, float bias_min, float bias_max,
+    std::initializer_list<uint8_t> expected_output_data,
+    std::initializer_list<int> output_dims_data, float output_min,
+    float output_max, TfLiteFusedActivation activation, uint8_t* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInitializer(filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min,
+                            input_max),
+      CreateQuantizedTensor(filter_data, filter_dims, "filter_tensor",
+                            filter_min, filter_max),
+      CreateQuantized32Tensor(bias_data, bias_dims, "bias_tensor", bias_min,
+                              bias_max),
+      CreateQuantizedTensor(output_data, output_dims, "output_tensor",
+                            output_min, output_max),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  int input_depth = input_dims->data[3];
+  int output_depth = filter_dims->data[3];
+  int depth_mul = output_depth / input_depth;
+  TfLiteDepthwiseConvParams builtin_data = {
+      kTfLitePaddingValid, 1, 1, depth_mul, activation,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+  const int output_dims_count = 8;
+  float output_data[output_dims_count];
+  tflite::testing::TestDepthwiseConvFloat(  //
+      {4, 1, 3, 2, 2},                      // Input shape.
+      {
+          1, 2, 7, 8,    // Input values.
+          3, 4, 9, 10,   //
+          5, 6, 11, 12,  //
+      },
+      {4, 1, 2, 2, 4},  // Filters shape.
+      {
+          1, 2, 3, 4,        // Filters values.
+          -9, 10, -11, 12,   //
+          5, 6, 7, 8,        //
+          13, -14, 15, -16,  //
+      },
+      {1, 4},  // Bias shape.
+      {
+          1, 2, 3, 4,  // Bias values.
+      },
+      {
+          71, -34, 99, -20,  // Expected results.
+          91, -26, 127, -4,  //
+      },
+      {4, 1, 2, 1, 4},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float filter_min = -63.5f;
+  const float filter_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 8;
+  uint8_t output_data[output_dims_count];
+
+  tflite::testing::TestDepthwiseConvQuantized(  //
+      {4, 1, 3, 2, 2},                          // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),
+          F2Q(2, input_min, input_max),
+          F2Q(7, input_min, input_max),
+          F2Q(8, input_min, input_max),
+          F2Q(3, input_min, input_max),
+          F2Q(4, input_min, input_max),
+          F2Q(9, input_min, input_max),
+          F2Q(10, input_min, input_max),
+          F2Q(5, input_min, input_max),
+          F2Q(6, input_min, input_max),
+          F2Q(11, input_min, input_max),
+          F2Q(12, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {4, 1, 2, 2, 4},       // Filter shape.
+      {
+          // Filter values.
+          F2Q(1, filter_min, filter_max),
+          F2Q(2, filter_min, filter_max),
+          F2Q(3, filter_min, filter_max),
+          F2Q(4, filter_min, filter_max),
+          F2Q(-9, filter_min, filter_max),
+          F2Q(10, filter_min, filter_max),
+          F2Q(-11, filter_min, filter_max),
+          F2Q(12, filter_min, filter_max),
+          F2Q(5, filter_min, filter_max),
+          F2Q(6, filter_min, filter_max),
+          F2Q(7, filter_min, filter_max),
+          F2Q(8, filter_min, filter_max),
+          F2Q(13, filter_min, filter_max),
+          F2Q(-14, filter_min, filter_max),
+          F2Q(15, filter_min, filter_max),
+          F2Q(-16, filter_min, filter_max),
+      },
+      filter_min, filter_max,  // Filter quantization range.
+      {1, 4},                  // Bias shape.
+      {
+          // Bias values.
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+          F2Q32(4, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(71, output_min, output_max),
+          F2Q(-34, output_min, output_max),
+          F2Q(99, output_min, output_max),
+          F2Q(-20, output_min, output_max),
+          F2Q(91, output_min, output_max),
+          F2Q(-26, output_min, output_max),
+          F2Q(127, output_min, output_max),
+          F2Q(-4, output_min, output_max),
+      },
+      {4, 1, 2, 1, 4},         // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestRelu) {
+  const int output_dims_count = 8;
+  float output_data[output_dims_count];
+  tflite::testing::TestDepthwiseConvFloat(  //
+      {4, 1, 3, 2, 2},                      // Input shape.
+      {
+          1, 2, 7, 8,    // Input values.
+          3, 4, 9, 10,   //
+          5, 6, 11, 12,  //
+      },
+      {4, 1, 2, 2, 4},  // Filters shape.
+      {
+          1, 2, 3, 4,        // Filters values.
+          -9, 10, -11, 12,   //
+          5, 6, 7, 8,        //
+          13, -14, 15, -16,  //
+      },
+      {1, 4},  // Bias shape.
+      {
+          1, 2, 3, 4,  // Bias values.
+      },
+      {
+          71, 0, 99, 0,   // Expected results.
+          91, 0, 127, 0,  //
+      },
+      {4, 1, 2, 1, 4},  // Output shape.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestReluQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float filter_min = -63.5f;
+  const float filter_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 8;
+  uint8_t output_data[output_dims_count];
+
+  tflite::testing::TestDepthwiseConvQuantized(  //
+      {4, 1, 3, 2, 2},                          // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),
+          F2Q(2, input_min, input_max),
+          F2Q(7, input_min, input_max),
+          F2Q(8, input_min, input_max),
+          F2Q(3, input_min, input_max),
+          F2Q(4, input_min, input_max),
+          F2Q(9, input_min, input_max),
+          F2Q(10, input_min, input_max),
+          F2Q(5, input_min, input_max),
+          F2Q(6, input_min, input_max),
+          F2Q(11, input_min, input_max),
+          F2Q(12, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {4, 1, 2, 2, 4},       // Filter shape.
+      {
+          // Filter values.
+          F2Q(1, filter_min, filter_max),
+          F2Q(2, filter_min, filter_max),
+          F2Q(3, filter_min, filter_max),
+          F2Q(4, filter_min, filter_max),
+          F2Q(-9, filter_min, filter_max),
+          F2Q(10, filter_min, filter_max),
+          F2Q(-11, filter_min, filter_max),
+          F2Q(12, filter_min, filter_max),
+          F2Q(5, filter_min, filter_max),
+          F2Q(6, filter_min, filter_max),
+          F2Q(7, filter_min, filter_max),
+          F2Q(8, filter_min, filter_max),
+          F2Q(13, filter_min, filter_max),
+          F2Q(-14, filter_min, filter_max),
+          F2Q(15, filter_min, filter_max),
+          F2Q(-16, filter_min, filter_max),
+      },
+      filter_min, filter_max,  // Filter quantization range.
+      {1, 4},                  // Bias shape.
+      {
+          // Bias values.
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+          F2Q32(4, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(71, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(99, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(91, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(127, output_min, output_max),
+          F2Q(0, output_min, output_max),
+      },
+      {4, 1, 2, 1, 4},         // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc
new file mode 100644
index 0000000000..1e9e54cafb
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc
@@ -0,0 +1,184 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace fully_connected {
+namespace {
+
+struct OpData {
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+  // The range of the fused activation layer. For example for kNone and
+  // uint8_t these would be 0 and 255.
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  // The index of the temporary tensor where the quantized inputs are cached.
+  int input_quantized_index;
+};
+
+constexpr int kInputTensor = 0;
+constexpr int kWeightsTensor = 1;
+constexpr int kBiasTensor = 2;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus CalculateOpData(TfLiteContext* context,
+                             TfLiteFullyConnectedParams* params,
+                             TfLiteType data_type, const TfLiteTensor* input,
+                             const TfLiteTensor* filter,
+                             const TfLiteTensor* bias, TfLiteTensor* output,
+                             OpData* data) {
+  TfLiteStatus status = kTfLiteOk;
+  if (data_type != kTfLiteFloat32) {
+    double real_multiplier = 0.0;
+    TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
+        context, input, filter, bias, output, &real_multiplier));
+    int exponent;
+    QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
+    data->output_shift = -exponent;
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
+  }
+  return status;
+}
+
+}  // namespace
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                           TfLiteFullyConnectedParams* params, OpData* data,
+                           const TfLiteTensor* input,
+                           const TfLiteTensor* filter, const TfLiteTensor* bias,
+                           TfLiteTensor* output) {
+  const int32_t input_offset = -input->params.zero_point;
+  const int32_t filter_offset = -filter->params.zero_point;
+  const int32_t output_offset = output->params.zero_point;
+
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = -data->output_shift;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+
+#define TF_LITE_FULLY_CONNECTED(output_data_type)                      \
+  reference_ops::FullyConnected(                                       \
+      op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
+      GetTensorShape(filter), GetTensorData<uint8_t>(filter),          \
+      GetTensorShape(bias), GetTensorData<int32_t>(bias),              \
+      GetTensorShape(output), GetTensorData<output_data_type>(output), \
+      nullptr)
+  switch (output->type) {
+    case kTfLiteUInt8:
+      TF_LITE_FULLY_CONNECTED(uint8_t);
+      break;
+    case kTfLiteInt16:
+      TF_LITE_FULLY_CONNECTED(int16_t);
+      break;
+    default:
+      context->ReportError(
+          context,
+          "Quantized FullyConnected expects output data type uint8 or int16");
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
+                       TfLiteFullyConnectedParams* params, OpData* data,
+                       const TfLiteTensor* input, const TfLiteTensor* filter,
+                       const TfLiteTensor* bias, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRange(params->activation, &output_activation_min,
+                           &output_activation_max);
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  tflite::reference_ops::FullyConnected(
+      op_params, GetTensorShape(input), GetTensorData<float>(input),
+      GetTensorShape(filter), GetTensorData<float>(filter),
+      GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
+      GetTensorData<float>(output));
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TfLiteType data_type = input->type;
+  OpData local_data_object;
+  OpData* data = &local_data_object;
+  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input,
+                                        filter, bias, output, data));
+
+  switch (filter->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      return EvalFloat(context, node, params, data, input, filter, bias,
+                       output);
+    case kTfLiteUInt8:
+      return EvalQuantized(context, node, params, data, input, filter, bias,
+                           output);
+
+    default:
+      context->ReportError(context, "Type %d not currently supported.",
+                           filter->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace fully_connected
+
+TfLiteRegistration* Register_FULLY_CONNECTED() {
+  static TfLiteRegistration r = {fully_connected::Init, fully_connected::Free,
+                                 fully_connected::Prepare,
+                                 fully_connected::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc
new file mode 100644
index 0000000000..b42bf4c3bc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc
@@ -0,0 +1,643 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+namespace {
+
+void TestFullyConnectedFloat(std::initializer_list<int> input_dims_data,
+                             std::initializer_list<float> input_data,
+                             std::initializer_list<int> weights_dims_data,
+                             std::initializer_list<float> weights_data,
+                             std::initializer_list<int> bias_dims_data,
+                             std::initializer_list<float> bias_data,
+                             std::initializer_list<float> expected_output_data,
+                             std::initializer_list<int> output_dims_data,
+                             TfLiteFusedActivation activation,
+                             float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* weights_dims = IntArrayFromInitializer(weights_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(weights_data, weights_dims, "weights_tensor"),
+      CreateFloatTensor(bias_data, bias_dims, "bias_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor"),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteFullyConnectedParams builtin_data = {
+      activation,
+      kTfLiteFullyConnectedWeightsFormatDefault,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestFullyConnectedQuantized(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<uint8_t> input_data, float input_min, float input_max,
+    std::initializer_list<int> weights_dims_data,
+    std::initializer_list<uint8_t> weights_data, float weights_min,
+    float weights_max, std::initializer_list<int> bias_dims_data,
+    std::initializer_list<int32_t> bias_data, float bias_min, float bias_max,
+    std::initializer_list<uint8_t> expected_output_data,
+    std::initializer_list<int> output_dims_data, float output_min,
+    float output_max, TfLiteFusedActivation activation, uint8_t* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* weights_dims = IntArrayFromInitializer(weights_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min,
+                            input_max),
+      CreateQuantizedTensor(weights_data, weights_dims, "weights_tensor",
+                            weights_min, weights_max),
+      CreateQuantized32Tensor(bias_data, bias_dims, "bias_tensor", bias_min,
+                              bias_max),
+      CreateQuantizedTensor(output_data, output_dims, "output_tensor",
+                            output_min, output_max),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteFullyConnectedParams builtin_data = {
+      activation,
+      kTfLiteFullyConnectedWeightsFormatDefault,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {2, 2, 10},                            // Input shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+          1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+      },
+      {2, 3, 10},  // Weights shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+      },
+      {1, 3},  // Bias shape.
+      {
+          1, 2, 3,  // Bias values.
+      },
+      {
+          24, 25, 26, 58, 59, 60,  // Expected results.
+      },
+      {2, 2, 3},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest2) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {2, 2, 2},                             // Input shape.
+      {
+          1, 2,  // b = 0
+          2, 1,  // b = 1
+      },
+      {2, 1, 2},  // Weights shape.
+      {
+          2, 4,  // u = 0
+      },
+      {1, 1},  // Bias shape.
+      {
+          1,  // Bias values.
+      },
+      {
+          11, 9,  // Expected results.
+      },
+      {2, 2, 1},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestRelu) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {2, 2, 10},                            // Input shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+          1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+      },
+      {2, 3, 10},  // Weights shape.
+      {
+          1,  2,  3,  4,  5,  6,  7,  8,  9,  10,   // u = 0
+          -1, -2, -3, -4, -5, -6, -7, -8, -9, -10,  // u = 1
+          1,  2,  3,  4,  5,  6,  7,  8,  9,  10,   // u = 2
+      },
+      {1, 3},  // Bias shape.
+      {
+          1, -2, 3,  // Bias values.
+      },
+      {
+          24, 0, 26, 58, 0, 60,  // Expected results.
+      },
+      {2, 2, 3},  // Output shape.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float weights_min = -63.5f;
+  const float weights_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {2, 2, 10},                                // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedRelu) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float weights_min = -63.5f;
+  const float weights_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {2, 2, 10},                                // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max),  F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max),  F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max),  F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max),  F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max),  F2Q(10, weights_min, weights_max),
+          F2Q(-1, weights_min, weights_max), F2Q(-2, weights_min, weights_max),
+          F2Q(-3, weights_min, weights_max), F2Q(-4, weights_min, weights_max),
+          F2Q(-5, weights_min, weights_max), F2Q(-6, weights_min, weights_max),
+          F2Q(-7, weights_min, weights_max), F2Q(-8, weights_min, weights_max),
+          F2Q(-9, weights_min, weights_max), F2Q(-10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max),  F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max),  F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max),  F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max),  F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max),  F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(0, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedOutputMultiplierGreaterThan1) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -127.0f;
+  const float input_max = 128.0f;
+  const float weights_min = -127.0f;
+  const float weights_max = 128.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 256.0f * (1 << 24);
+  const float output_min = -63.5f;
+  const float output_max = 64.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {2, 2, 10},                                // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest4DInput) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {4, 1, 1, 5, 1},                       // Input shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+          1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+      },
+      {2, 3, 10},  // Weights shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+      },
+      {1, 3},  // Bias shape.
+      {
+          1, 2, 3,  // Bias values.
+      },
+      {
+          24, 25, 26, 58, 59, 60,  // Expected results.
+      },
+      {2, 2, 3},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest4DInputQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float weights_min = -63.5f;
+  const float weights_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {4, 1, 1, 5, 1},                           // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedOutputMultiplierGreaterThan1) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -127.0f;
+  const float input_max = 128.0f;
+  const float weights_min = -127.0f;
+  const float weights_max = 128.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 256.0f * (1 << 24);
+  const float output_min = -63.5f;
+  const float output_max = 64.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {4, 1, 1, 5, 1},                           // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc
new file mode 100644
index 0000000000..a4019a067c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc
@@ -0,0 +1,213 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace activations {
+namespace {
+
+struct OpData {
+  int32_t input_multiplier = 0;
+  int input_left_shift = 0;
+  int32_t input_range_radius = 0;
+  int diff_min = 0;
+};
+
+TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
+                                    const TfLiteTensor* input,
+                                    TfLiteTensor* output,
+                                    const TfLiteSoftmaxParams* params,
+                                    OpData* data) {
+  if (input->type == kTfLiteUInt8) {
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+    TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
+
+    static const int kScaledDiffIntegerBits = 5;
+
+    tflite::PreprocessSoftmaxScaling(
+        params->beta, input->params.scale, kScaledDiffIntegerBits,
+        &data->input_multiplier, &data->input_left_shift);
+    data->diff_min = -1.0 * tflite::CalculateInputRadius(
+                                kScaledDiffIntegerBits, data->input_left_shift);
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+// Takes a 1D tensor and performs softmax along it.
+void Softmax1DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  const int input_size = input->dims->data[0];
+  tflite::reference_ops::Softmax(input->data.f, input_size, 1, params->beta,
+                                 output->data.f);
+}
+
+// Takes a 2D tensor and perform softmax along the last dimension.
+void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  const int batch_size = input->dims->data[0];
+  const int input_size = input->dims->data[1];
+  tflite::reference_ops::Softmax(input->data.f, input_size, batch_size,
+                                 params->beta, output->data.f);
+}
+
+void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  // TODO(ahentz): this is arguably a dirty trick. Since the implementation
+  // always traverses the last dimension of a 4D tensor, we will pretend our 1D
+  // tensor is 4D in a special way. We will convert a (Y) shape into a (1,
+  // 1, 1, Y) shape.
+  const int input_size = input->dims->data[0];
+  const int32_t shape_data[4] = {1, 1, 1, input_size};
+  RuntimeShape shape(4, shape_data);
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  tflite::reference_ops::Softmax(op_params, shape,
+                                 GetTensorData<uint8_t>(input), shape,
+                                 GetTensorData<uint8_t>(output));
+}
+
+void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  // TODO(ahentz): this is arguably a dirty trick. Since the implementation
+  // always traverses the last dimension of a 4D tensor, we will pretend our 2D
+  // tensor is 4D in a special way. We will convert a (X, Y) shape into a (X,
+  // 1, 1, Y) shape.
+  const int batch_size = input->dims->data[0];
+  const int input_size = input->dims->data[1];
+  const int32_t shape_data[4] = {batch_size, 1, 1, input_size};
+  RuntimeShape shape(4, shape_data);
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  tflite::reference_ops::Softmax(op_params, shape,
+                                 GetTensorData<uint8_t>(input), shape,
+                                 GetTensorData<uint8_t>(output));
+}
+
+// Takes a 4D tensor and perform softmax along the forth dimension.
+void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  SoftmaxParams op_params;
+  op_params.beta = params->beta;
+  tflite::reference_ops::Softmax(
+      op_params, GetTensorShape(input), GetTensorData<float>(input),
+      GetTensorShape(output), GetTensorData<float>(output));
+}
+
+void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  tflite::reference_ops::Softmax(
+      op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+      GetTensorShape(output), GetTensorData<uint8_t>(output));
+}
+
+TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
+
+  const TfLiteTensor* input = GetInput(context, node, 0);
+  TfLiteTensor* output = GetOutput(context, node, 0);
+
+  OpData local_data_object;
+  OpData* data = &local_data_object;
+  TF_LITE_ENSURE_STATUS(
+      CalculateSoftmaxOpData(context, input, output, params, data));
+
+  // TODO(ahentz): consider an implementation that works for many (all?)
+  // dimensions.
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      if (NumDimensions(input) == 1) {
+        Softmax1DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 2) {
+        Softmax2DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 4) {
+        Softmax4DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      context->ReportError(
+          context, "Only 1D, 2D and 4D tensors supported currently, got %dD.",
+          NumDimensions(input));
+      return kTfLiteError;
+    }
+    case kTfLiteUInt8: {
+      if (NumDimensions(input) == 1) {
+        Softmax1DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 2) {
+        Softmax2DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 4) {
+        Softmax4DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      context->ReportError(
+          context, "Only 2D and 4D tensors supported currently, got %dD.",
+          NumDimensions(input));
+      return kTfLiteError;
+    }
+    default:
+      context->ReportError(
+          context, "Only float32 and uint8_t supported currently, got %d.",
+          input->type);
+      return kTfLiteError;
+  }
+}
+}  // namespace activations
+
+TfLiteRegistration* Register_SOFTMAX() {
+  static TfLiteRegistration r = {activations::Init, activations::Free,
+                                 activations::SoftmaxPrepare,
+                                 activations::SoftmaxEval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
new file mode 100644
index 0000000000..df7d87d623
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
@@ -0,0 +1,220 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+namespace {
+
+void TestSoftmaxFloat(std::initializer_list<int> input_dims_data,
+                      std::initializer_list<float> input_data,
+                      std::initializer_list<float> expected_output_data,
+                      std::initializer_list<int> output_dims_data,
+                      float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 2;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor"),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SOFTMAX, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSoftmaxParams builtin_data = {1.0f};
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+  int inputs_array_data[] = {1, 0};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 1};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestSoftmaxQuantized(std::initializer_list<int> input_dims_data,
+                          std::initializer_list<uint8_t> input_data,
+                          float input_min, float input_max,
+                          std::initializer_list<uint8_t> expected_output_data,
+                          std::initializer_list<int> output_dims_data,
+                          float output_min, float output_max,
+                          uint8_t* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 1;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min,
+                            input_max),
+      CreateQuantizedTensor(output_data, output_dims, "output_tensor",
+                            output_min, output_max),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SOFTMAX, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSoftmaxParams builtin_data = {1.0f};
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+
+  int inputs_array_data[] = {1, 0};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 1};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestSoftmaxFloat(  //
+      {2, 2, 5},                      // Input shape.
+      {
+          1.0, 2.0, 3.0, 4.0, 5.0,       // b = 0
+          -1.0, -2.0, -3.0, -4.0, -5.0,  // b = 0
+      },
+      {
+          // Expected results.
+          0.011656231,
+          0.031684921,
+          0.086128544,
+          0.234121657,
+          0.636408647,
+          0.636408647,
+          0.234121657,
+          0.086128544,
+          0.031684921,
+          0.011656231,
+      },
+      {2, 2, 3},  // Output shape.
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized) {
+  using tflite::testing::F2Q;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float output_min = 0.0f;
+  const float output_max = (255.0f / 256.0f);
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestSoftmaxQuantized(  //
+      {2, 1, 5},                          // Input shape.
+      {
+          F2Q(1.0, input_min, input_max),
+          F2Q(2.0, input_min, input_max),
+          F2Q(3.0, input_min, input_max),
+          F2Q(4.0, input_min, input_max),
+          F2Q(5.0, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantized range.
+      {
+          // Expected results.
+          F2Q(0.011656231, output_min, output_max),
+          F2Q(0.031684921, output_min, output_max),
+          F2Q(0.086128544, output_min, output_max),
+          F2Q(0.234121657, output_min, output_max),
+          F2Q(0.636408647, output_min, output_max),
+      },
+      {2, 1, 3},               // Output shape.
+      output_min, output_max,  // Output quantized range.
+      output_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h b/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h
new file mode 100644
index 0000000000..789a48ece8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h
@@ -0,0 +1,170 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_
+
+#include <cstdarg>
+#include <initializer_list>
+#include <limits>
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+
+// How many elements are in the array with this shape.
+inline int ElementCount(const TfLiteIntArray& dims) {
+  int result = 1;
+  for (int i = 0; i < dims.size; ++i) {
+    result *= dims.data[i];
+  }
+  return result;
+}
+
+// Wrapper to forward kernel errors to the interpreter's error reporter.
+inline void ReportOpError(struct TfLiteContext* context, const char* format,
+                          ...) {
+  ErrorReporter* error_reporter = static_cast<ErrorReporter*>(context->impl_);
+  va_list args;
+  va_start(args, format);
+  error_reporter->Report(format, args);
+  va_end(args);
+}
+
+// Derives the quantization scaling factor from a min and max range.
+template <typename T>
+inline float ScaleFromMinMax(const float min, const float max) {
+  return (max - min) / ((std::numeric_limits<T>::max() * 1.0) -
+                        std::numeric_limits<T>::min());
+}
+
+// Derives the quantization zero point from a min and max range.
+template <typename T>
+inline int ZeroPointFromMinMax(const float min, const float max) {
+  return static_cast<int>((-min / ScaleFromMinMax<T>(min, max)) + 0.5f);
+}
+
+// Converts a float value into an unsigned eight-bit quantized value.
+inline uint8_t F2Q(const float value, const float min, const float max) {
+  int32_t result = ZeroPointFromMinMax<uint8_t>(min, max) +
+                   (value / ScaleFromMinMax<uint8_t>(min, max)) + 0.5f;
+  if (result < 0) {
+    result = 0;
+  }
+  if (result > 256) {
+    result = 256;
+  }
+  return result;
+}
+
+// Converts a float value into a signed thirty-two-bit quantized value.
+inline uint8_t F2Q32(const float value, const float min, const float max) {
+  return static_cast<int32_t>((value - ZeroPointFromMinMax<int32_t>(min, max)) /
+                              ScaleFromMinMax<int32_t>(min, max));
+}
+
+inline void PopulateContext(TfLiteTensor* tensors, int tensors_size,
+                            TfLiteContext* context) {
+  context->tensors_size = tensors_size;
+  context->tensors = tensors;
+  context->impl_ = static_cast<void*>(micro_test::reporter);
+  context->GetExecutionPlan = nullptr;
+  context->ResizeTensor = nullptr;
+  context->ReportError = ReportOpError;
+  context->AddTensors = nullptr;
+  context->GetNodeAndRegistration = nullptr;
+  context->ReplaceSubgraphsWithDelegateKernels = nullptr;
+  context->recommended_num_threads = 1;
+  context->GetExternalContext = nullptr;
+  context->SetExternalContext = nullptr;
+}
+
+inline TfLiteIntArray* IntArrayFromInts(const int* int_array) {
+  return const_cast<TfLiteIntArray*>(
+      reinterpret_cast<const TfLiteIntArray*>(int_array));
+}
+
+inline TfLiteIntArray* IntArrayFromInitializer(
+    std::initializer_list<int> int_initializer) {
+  return IntArrayFromInts(int_initializer.begin());
+}
+
+inline TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
+                                      const char* name) {
+  const size_t bytes = ElementCount(*dims) * sizeof(float);
+  return {
+      kTfLiteFloat32, {const_cast<int*>(reinterpret_cast<const int*>(data))},
+      dims,           {},
+      kTfLiteMemNone, bytes,
+      nullptr,        name};
+}
+
+inline TfLiteTensor CreateFloatTensor(std::initializer_list<float> data,
+                                      TfLiteIntArray* dims, const char* name) {
+  return CreateFloatTensor(data.begin(), dims, name);
+}
+
+inline TfLiteTensor CreateQuantizedTensor(const uint8_t* data,
+                                          TfLiteIntArray* dims,
+                                          const char* name, float min,
+                                          float max) {
+  const size_t bytes = ElementCount(*dims) * sizeof(uint8_t);
+  const TfLiteQuantizationParams q_params = {
+      ScaleFromMinMax<uint8_t>(min, max),
+      ZeroPointFromMinMax<uint8_t>(min, max)};
+  return {
+      kTfLiteUInt8,   {const_cast<int*>(reinterpret_cast<const int*>(data))},
+      dims,           q_params,
+      kTfLiteMemNone, bytes,
+      nullptr,        name};
+}
+
+inline TfLiteTensor CreateQuantizedTensor(std::initializer_list<uint8_t> data,
+                                          TfLiteIntArray* dims,
+                                          const char* name, float min,
+                                          float max) {
+  return CreateQuantizedTensor(data.begin(), dims, name, min, max);
+}
+
+inline TfLiteTensor CreateQuantized32Tensor(const int32_t* data,
+                                            TfLiteIntArray* dims,
+                                            const char* name, float min,
+                                            float max) {
+  const size_t bytes = ElementCount(*dims) * sizeof(int32_t);
+  const TfLiteQuantizationParams q_params = {
+      ScaleFromMinMax<int32_t>(min, max),
+      ZeroPointFromMinMax<int32_t>(min, max)};
+  return {
+      kTfLiteUInt8,   {const_cast<int*>(reinterpret_cast<const int*>(data))},
+      dims,           q_params,
+      kTfLiteMemNone, bytes,
+      nullptr,        name};
+}
+
+inline TfLiteTensor CreateQuantized32Tensor(std::initializer_list<int32_t> data,
+                                            TfLiteIntArray* dims,
+                                            const char* name, float min,
+                                            float max) {
+  return CreateQuantized32Tensor(data.begin(), dims, name, min, max);
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc
new file mode 100644
index 0000000000..99dd883661
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc
@@ -0,0 +1,78 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+
+#ifdef TF_LITE_MCU_DEBUG_LOG
+#include <debug_log.h>
+#else  // TF_LITE_MCU_DEBUG_LOG
+#include <cstdint>
+#include <cstdio>
+void DebugLog(const char* s) { fprintf(stderr, "%s", s); }
+void DebugLogInt32(int32_t i) { fprintf(stderr, "%d", i); }
+void DebugLogUInt32(uint32_t i) { fprintf(stderr, "%d", i); }
+void DebugLogHex(uint32_t i) { fprintf(stderr, "0x%8x", i); }
+void DebugLogFloat(float i) { fprintf(stderr, "%f", i); }
+#endif  // TF_LITE_MCU_DEBUG_LOG
+
+namespace tflite {
+namespace {
+void DebugLogPrintf(const char* format, va_list args) {
+  const int output_cache_size = 64;
+  char output_cache[output_cache_size + 1];
+  int output_cache_index = 0;
+  const char* current = format;
+  while (*current != 0) {
+    if (*current == '%') {
+      const char next = *(current + 1);
+      if ((next == 'd') || (next == 's')) {
+        current += 1;
+        if (output_cache_index > 0) {
+          output_cache[output_cache_index] = 0;
+          DebugLog(output_cache);
+          output_cache_index = 0;
+        }
+        if (next == 'd') {
+          DebugLogInt32(va_arg(args, int));
+        } else if (next == 's') {
+          DebugLog(va_arg(args, char*));
+        }
+      }
+    } else {
+      output_cache[output_cache_index] = *current;
+      output_cache_index += 1;
+    }
+    if (output_cache_index >= output_cache_size) {
+      output_cache[output_cache_index] = 0;
+      DebugLog(output_cache);
+      output_cache_index = 0;
+    }
+    current += 1;
+  }
+  if (output_cache_index > 0) {
+    output_cache[output_cache_index] = 0;
+    DebugLog(output_cache);
+    output_cache_index = 0;
+  }
+  DebugLog("\n");
+}
+}  // namespace
+
+int MicroErrorReporter::Report(const char* format, va_list args) {
+  DebugLogPrintf(format, args);
+  return 0;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h
new file mode 100644
index 0000000000..33e54f7990
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_
+
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+
+namespace tflite {
+
+class MicroErrorReporter : public ErrorReporter {
+ public:
+  ~MicroErrorReporter() {}
+  int Report(const char* format, va_list args) override;
+
+ private:
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc
new file mode 100644
index 0000000000..ef3c32050c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc
@@ -0,0 +1,25 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+
+int main(int argc, char** argv) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+  error_reporter->Report("Number: %d", 42);
+  error_reporter->Report("Badly-formed format string %");
+  error_reporter->Report("Another % badly-formed %% format string");
+  error_reporter->Report("~~~%s~~~", "ALL TESTS PASSED");
+}
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc
new file mode 100644
index 0000000000..0f38991bb0
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc
@@ -0,0 +1,310 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+
+#include "tensorflow/contrib/lite/core/api/flatbuffer_conversions.h"
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+
+namespace tflite {
+namespace {
+const int kStackDataAllocatorSize = 128;
+class StackDataAllocator : public BuiltinDataAllocator {
+ public:
+  void* Allocate(size_t size) override {
+    if (size > kStackDataAllocatorSize) {
+      return nullptr;
+    } else {
+      return data_;
+    }
+  }
+  void Deallocate(void* data) override {
+    // Do nothing.
+  }
+
+ private:
+  uint8_t data_[kStackDataAllocatorSize];
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
+  if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+    return registration->custom_name;
+  } else {
+    return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
+  }
+}
+
+void ReportOpError(struct TfLiteContext* context, const char* format, ...) {
+  MicroInterpreter* interpreter =
+      static_cast<MicroInterpreter*>(context->impl_);
+  va_list args;
+  va_start(args, format);
+  interpreter->error_reporter()->Report(format, args);
+  va_end(args);
+}
+
+}  // namespace
+
+MicroInterpreter::MicroInterpreter(const Model* model,
+                                   const OpResolver& op_resolver,
+                                   SimpleTensorAllocator* tensor_allocator,
+                                   ErrorReporter* error_reporter)
+    : model_(model),
+      op_resolver_(op_resolver),
+      tensor_allocator_(tensor_allocator),
+      error_reporter_(error_reporter),
+      initialization_status_(kTfLiteOk) {
+  const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
+      model->buffers();
+  auto* subgraphs = model->subgraphs();
+  if (subgraphs->size() != 1) {
+    error_reporter->Report("Only 1 subgraph is currently supported.\n");
+    initialization_status_ = kTfLiteError;
+    return;
+  }
+  subgraph_ = (*subgraphs)[0];
+  tensors_ = subgraph_->tensors();
+  operators_ = subgraph_->operators();
+
+  context_.tensors_size = tensors_->Length();
+  context_.tensors =
+      reinterpret_cast<TfLiteTensor*>(tensor_allocator_->AllocateMemory(
+          sizeof(TfLiteTensor) * context_.tensors_size));
+  for (int i = 0; i < subgraph_->inputs()->Length(); ++i) {
+    const int tensor_index = subgraph_->inputs()->Get(i);
+    const auto* tensor = tensors_->Get(tensor_index);
+    initialization_status_ = tensor_allocator_->AllocateTensor(
+        *tensor, 0, operators_->Length(), buffers, error_reporter,
+        &context_.tensors[tensor_index]);
+    if (initialization_status_ != kTfLiteOk) {
+      return;
+    }
+  }
+
+  int* first_created = reinterpret_cast<int*>(
+      tensor_allocator_->AllocateMemory(sizeof(int) * tensors_->Length()));
+  int* last_used = reinterpret_cast<int*>(
+      tensor_allocator_->AllocateMemory(sizeof(int) * tensors_->Length()));
+  for (int i = 0; i < tensors_->Length(); ++i) {
+    first_created[i] = -1;
+    last_used[i] = -1;
+  }
+
+  for (int i = (operators_->Length() - 1); i >= 0; --i) {
+    const auto* op = operators_->Get(i);
+    for (int n = 0; n < op->inputs()->Length(); ++n) {
+      const int tensor_index = op->inputs()->Get(n);
+      if ((last_used[tensor_index] == -1) || (last_used[tensor_index] < i)) {
+        last_used[tensor_index] = i;
+      }
+    }
+    for (int n = 0; n < op->outputs()->Length(); ++n) {
+      const int tensor_index = op->outputs()->Get(n);
+      const int create_before = i;
+      int destroy_after = last_used[tensor_index];
+      if (destroy_after == -1) {
+        destroy_after = operators_->Length();
+      }
+      const auto* tensor = tensors_->Get(tensor_index);
+      if (!tensor->is_variable()) {
+        initialization_status_ = tensor_allocator_->AllocateTensor(
+            *tensor, create_before, destroy_after, buffers, error_reporter,
+            &context_.tensors[tensor_index]);
+        if (initialization_status_ != kTfLiteOk) {
+          return;
+        }
+        first_created[tensor_index] = i;
+      }
+    }
+  }
+
+  for (int i = 0; i < tensors_->Length(); ++i) {
+    const auto* tensor = tensors_->Get(i);
+    const bool is_read_only = (first_created[i] == -1) && (last_used[i] != -1);
+    if (tensor->is_variable() || is_read_only) {
+      initialization_status_ = tensor_allocator_->AllocateTensor(
+          *tensor, 0, operators_->Length(), buffers, error_reporter,
+          &context_.tensors[i]);
+      if (initialization_status_ != kTfLiteOk) {
+        return;
+      }
+    }
+  }
+  context_.impl_ = static_cast<void*>(this);
+  context_.GetExecutionPlan = nullptr;
+  context_.ResizeTensor = nullptr;
+  context_.ReportError = ReportOpError;
+  context_.AddTensors = nullptr;
+  context_.GetNodeAndRegistration = nullptr;
+  context_.ReplaceSubgraphsWithDelegateKernels = nullptr;
+  context_.recommended_num_threads = 1;
+  context_.GetExternalContext = nullptr;
+  context_.SetExternalContext = nullptr;
+}
+
+TfLiteStatus MicroInterpreter::Invoke() {
+  if (initialization_status_ != kTfLiteOk) {
+    error_reporter_->Report("Invoke() called after initialization failed\n");
+    return kTfLiteError;
+  }
+  TfLiteStatus status = kTfLiteOk;
+  auto opcodes = model_->operator_codes();
+  for (int i = 0; i < operators_->Length(); ++i) {
+    const auto* op = operators_->Get(i);
+    int index = op->opcode_index();
+    if (index < 0 || index >= opcodes->size()) {
+      error_reporter_->Report("Missing registration for opcode_index %d\n",
+                              index);
+      return kTfLiteError;
+    }
+    auto opcode = (*opcodes)[index];
+    const TfLiteRegistration* registration = nullptr;
+    status = GetRegistrationFromOpCode(opcode, op_resolver_, error_reporter_,
+                                       &registration);
+    if (status != kTfLiteOk) {
+      return status;
+    }
+    if (registration == nullptr) {
+      error_reporter_->Report("Skipping op for opcode_index %d\n", index);
+      return kTfLiteError;
+    }
+    BuiltinOperator op_type =
+        static_cast<BuiltinOperator>(registration->builtin_code);
+
+    if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) {
+      error_reporter_->Report(
+          "Found builtin operator %s with custom options.\n",
+          EnumNameBuiltinOperator(op_type));
+    }
+    StackDataAllocator stack_data_allocator;
+    const char* custom_data = nullptr;
+    size_t custom_data_size = 0;
+    unsigned char* builtin_data = nullptr;
+    if (op->custom_options()) {
+      custom_data = reinterpret_cast<const char*>(op->custom_options()->data());
+      custom_data_size = op->custom_options()->size();
+    } else {
+      TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_,
+                                        &stack_data_allocator,
+                                        (void**)(&builtin_data)));
+    }
+
+    const char* init_data;
+    size_t init_data_size;
+    if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+      init_data = custom_data;
+      init_data_size = custom_data_size;
+    } else {
+      init_data = reinterpret_cast<const char*>(builtin_data);
+      init_data_size = 0;
+    }
+    void* user_data = nullptr;
+    if (registration->init) {
+      user_data = registration->init(&context_, init_data, init_data_size);
+    }
+
+    const int kMaxInputs = 16;
+    int inputs_data[kMaxInputs + 1];
+    TfLiteIntArray* inputs_array =
+        reinterpret_cast<TfLiteIntArray*>(inputs_data);
+    if (op->inputs()->Length() >= kMaxInputs) {
+      error_reporter_->Report("Too many inputs (%d)\n", op->inputs()->Length());
+      return kTfLiteError;
+    }
+    inputs_array->size = op->inputs()->Length();
+    for (int n = 0; n < op->inputs()->Length(); ++n) {
+      inputs_array->data[n] = op->inputs()->Get(n);
+    }
+
+    const int kMaxOutputs = 16;
+    int outputs_data[kMaxOutputs + 1];
+    TfLiteIntArray* outputs_array =
+        reinterpret_cast<TfLiteIntArray*>(outputs_data);
+    if (op->outputs()->Length() >= kMaxOutputs) {
+      error_reporter_->Report("Too many outputs (%d)\n",
+                              op->outputs()->Length());
+      return kTfLiteError;
+    }
+    outputs_array->size = op->outputs()->Length();
+    for (int n = 0; n < op->outputs()->Length(); ++n) {
+      outputs_array->data[n] = op->outputs()->Get(n);
+    }
+
+    const int kMaxTemporaries = 16;
+    int temporaries_data[kMaxTemporaries + 1];
+    TfLiteIntArray* temporaries_array =
+        reinterpret_cast<TfLiteIntArray*>(temporaries_data);
+    temporaries_array->size = 0;
+
+    TfLiteNode node;
+    node.inputs = inputs_array;
+    node.outputs = outputs_array;
+    node.temporaries = temporaries_array;
+    node.user_data = user_data;
+    node.builtin_data = reinterpret_cast<void*>(builtin_data);
+    node.custom_initial_data = custom_data;
+    node.custom_initial_data_size = custom_data_size;
+    node.delegate = nullptr;
+    if (registration->prepare) {
+      TfLiteStatus prepare_status = registration->prepare(&context_, &node);
+      if (prepare_status != kTfLiteOk) {
+        error_reporter_->Report(
+            "Node %s (number %d) failed to prepare with status %d",
+            OpNameFromRegistration(registration), i, prepare_status);
+        return kTfLiteError;
+      }
+    }
+
+    if (registration->invoke) {
+      TfLiteStatus invoke_status = registration->invoke(&context_, &node);
+      if (invoke_status != kTfLiteOk) {
+        error_reporter_->Report(
+            "Node %s (number %d) failed to invoke with status %d",
+            OpNameFromRegistration(registration), i, invoke_status);
+        return kTfLiteError;
+      }
+    }
+
+    if (registration->free) {
+      registration->free(&context_, user_data);
+    }
+  }
+  return status;
+}
+
+TfLiteTensor* MicroInterpreter::input(int index) {
+  const flatbuffers::Vector<int32_t>* inputs = subgraph_->inputs();
+  const size_t length = inputs->Length();
+  if ((index < 0) || (index >= length)) {
+    error_reporter_->Report("Input index %d out of range (length is %d)", index,
+                            length);
+    return nullptr;
+  }
+  return &(context_.tensors[inputs->Get(index)]);
+}
+
+TfLiteTensor* MicroInterpreter::output(int index) {
+  const flatbuffers::Vector<int32_t>* outputs = subgraph_->outputs();
+  const size_t length = outputs->Length();
+  if ((index < 0) || (index >= outputs->Length())) {
+    error_reporter_->Report("Output index %d out of range (length is %d)",
+                            index, length);
+    return nullptr;
+  }
+  return &(context_.tensors[outputs->Get(index)]);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h
new file mode 100644
index 0000000000..a88514cde8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h
@@ -0,0 +1,71 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_
+
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/core/api/op_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+class MicroInterpreter {
+ public:
+  // The lifetime of the model, op resolver, allocator, and error reporter must
+  // be at least as long as that of the interpreter object, since the
+  // interpreter may need to access them at any time. This means that you should
+  // usually create them with the same scope as each other, for example having
+  // them all allocated on the stack as local variables through a top-level
+  // function.
+  // The interpreter doesn't do any deallocation of any of the pointed-to
+  // objects, ownership remains with the caller.
+  MicroInterpreter(const Model* model, const OpResolver& op_resolver,
+                   SimpleTensorAllocator* tensor_allocator,
+                   ErrorReporter* error_reporter);
+
+  TfLiteStatus Invoke();
+
+  size_t tensors_size() const { return context_.tensors_size; }
+  TfLiteTensor* tensor(int tensor_index);
+
+  TfLiteTensor* input(int index);
+  size_t inputs_size() const { return subgraph_->inputs()->Length(); }
+
+  TfLiteTensor* output(int index);
+  size_t outputs_size() const { return subgraph_->outputs()->Length(); }
+
+  TfLiteStatus initialization_status() const { return initialization_status_; }
+
+  ErrorReporter* error_reporter() { return error_reporter_; }
+
+ private:
+  const Model* model_;
+  const OpResolver& op_resolver_;
+  SimpleTensorAllocator* tensor_allocator_;
+  ErrorReporter* error_reporter_;
+
+  TfLiteStatus initialization_status_;
+  const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
+  const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
+  TfLiteContext context_;
+
+  const SubGraph* subgraph_;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc
new file mode 100644
index 0000000000..251e5f7203
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc
@@ -0,0 +1,197 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace {
+void* MockInit(TfLiteContext* context, const char* buffer, size_t length) {
+  // Do nothing.
+  return nullptr;
+}
+
+void MockFree(TfLiteContext* context, void* buffer) {
+  // Do nothing.
+}
+
+TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* input = &context->tensors[node->inputs->data[0]];
+  const int32_t* input_data = input->data.i32;
+  const TfLiteTensor* weight = &context->tensors[node->inputs->data[1]];
+  const uint8_t* weight_data = weight->data.uint8;
+  TfLiteTensor* output = &context->tensors[node->outputs->data[0]];
+  int32_t* output_data = output->data.i32;
+  output_data[0] = input_data[0] + weight_data[0];
+  return kTfLiteOk;
+}
+
+class MockOpResolver : public OpResolver {
+ public:
+  const TfLiteRegistration* FindOp(BuiltinOperator op,
+                                   int version) const override {
+    return nullptr;
+  }
+  const TfLiteRegistration* FindOp(const char* op, int version) const override {
+    if (strcmp(op, "mock_custom") == 0) {
+      static TfLiteRegistration r = {MockInit, MockFree, MockPrepare,
+                                     MockInvoke};
+      return &r;
+    } else {
+      return nullptr;
+    }
+  }
+};
+
+class StackAllocator : public flatbuffers::Allocator {
+ public:
+  StackAllocator() : data_(data_backing_), data_size_(0) {}
+
+  uint8_t* allocate(size_t size) override {
+    if ((data_size_ + size) > kStackAllocatorSize) {
+      // TODO(petewarden): Add error reporting beyond returning null!
+      return nullptr;
+    }
+    uint8_t* result = data_;
+    data_ += size;
+    data_size_ += size;
+    return result;
+  }
+
+  void deallocate(uint8_t* p, size_t) override {}
+
+  static StackAllocator& instance() {
+    // Avoid using true dynamic memory allocation to be portable to bare metal.
+    static char inst_memory[sizeof(StackAllocator)];
+    static StackAllocator* inst = new (inst_memory) StackAllocator;
+    return *inst;
+  }
+
+  static constexpr int kStackAllocatorSize = 4096;
+
+ private:
+  uint8_t data_backing_[kStackAllocatorSize];
+  uint8_t* data_;
+  int data_size_;
+};
+
+const Model* BuildMockModel() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder builder(StackAllocator::kStackAllocatorSize,
+                                         &StackAllocator::instance());
+  constexpr size_t buffer_data_size = 1;
+  const uint8_t buffer_data[buffer_data_size] = {21};
+  constexpr size_t buffers_size = 2;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(builder),
+      CreateBuffer(builder,
+                   builder.CreateVector(buffer_data, buffer_data_size))};
+  constexpr size_t tensor_shape_size = 1;
+  const int32_t tensor_shape[tensor_shape_size] = {1};
+  constexpr size_t tensors_size = 3;
+  const Offset<Tensor> tensors[tensors_size] = {
+      CreateTensor(builder,
+                   builder.CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT32, 0,
+                   builder.CreateString("test_input_tensor"), 0, false),
+      CreateTensor(builder,
+                   builder.CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_UINT8, 1,
+                   builder.CreateString("test_weight_tensor"), 0, false),
+      CreateTensor(builder,
+                   builder.CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT32, 0,
+                   builder.CreateString("test_output_tensor"), 0, false),
+  };
+  constexpr size_t inputs_size = 1;
+  const int32_t inputs[inputs_size] = {0};
+  constexpr size_t outputs_size = 1;
+  const int32_t outputs[outputs_size] = {2};
+  constexpr size_t operator_inputs_size = 2;
+  const int32_t operator_inputs[operator_inputs_size] = {0, 1};
+  constexpr size_t operator_outputs_size = 1;
+  const int32_t operator_outputs[operator_outputs_size] = {2};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> operators[operators_size] = {CreateOperator(
+      builder, 0, builder.CreateVector(operator_inputs, operator_inputs_size),
+      builder.CreateVector(operator_outputs, operator_outputs_size),
+      BuiltinOptions_NONE)};
+  constexpr size_t subgraphs_size = 1;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(builder, builder.CreateVector(tensors, tensors_size),
+                     builder.CreateVector(inputs, inputs_size),
+                     builder.CreateVector(outputs, outputs_size),
+                     builder.CreateVector(operators, operators_size),
+                     builder.CreateString("test_subgraph"))};
+  constexpr size_t operator_codes_size = 1;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(builder, BuiltinOperator_CUSTOM, "mock_custom",
+                               0)};
+  const Offset<Model> model_offset = CreateModel(
+      builder, 0, builder.CreateVector(operator_codes, operator_codes_size),
+      builder.CreateVector(subgraphs, subgraphs_size),
+      builder.CreateString("test_model"),
+      builder.CreateVector(buffers, buffers_size));
+  FinishModelBuffer(builder, model_offset);
+  void* model_pointer = builder.GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+}  // namespace
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestInterpreter) {
+  const tflite::Model* model = tflite::BuildMockModel();
+  TF_LITE_MICRO_EXPECT_NE(nullptr, model);
+  tflite::MockOpResolver mock_resolver;
+  constexpr size_t allocator_buffer_size = 1024;
+  uint8_t allocator_buffer[allocator_buffer_size];
+  tflite::SimpleTensorAllocator simple_tensor_allocator(allocator_buffer,
+                                                        allocator_buffer_size);
+  tflite::MicroInterpreter interpreter(
+      model, mock_resolver, &simple_tensor_allocator, micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
+  TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size());
+
+  TfLiteTensor* input = interpreter.input(0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, input);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, input->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, input->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(4, input->bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, input->data.i32);
+  input->data.i32[0] = 21;
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter.Invoke());
+
+  TfLiteTensor* output = interpreter.output(0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, output);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, output->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(4, output->bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, output->data.i32);
+  TF_LITE_MICRO_EXPECT_EQ(42, output->data.i32[0]);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc
new file mode 100644
index 0000000000..40c21c6448
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc
@@ -0,0 +1,80 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h"
+
+namespace tflite {
+
+const TfLiteRegistration* MicroMutableOpResolver::FindOp(
+    tflite::BuiltinOperator op, int version) const {
+  for (int i = 0; i < registrations_len_; ++i) {
+    const TfLiteRegistration& registration = registrations_[i];
+    if ((registration.builtin_code == op) &&
+        (registration.version == version)) {
+      return &registration;
+    }
+  }
+  return nullptr;
+}
+
+const TfLiteRegistration* MicroMutableOpResolver::FindOp(const char* op,
+                                                         int version) const {
+  for (int i = 0; i < registrations_len_; ++i) {
+    const TfLiteRegistration& registration = registrations_[i];
+    if ((registration.builtin_code == -1) &&
+        (strcmp(registration.custom_name, op) == 0) &&
+        (registration.version == version)) {
+      return &registration;
+    }
+  }
+  return nullptr;
+}
+
+void MicroMutableOpResolver::AddBuiltin(tflite::BuiltinOperator op,
+                                        TfLiteRegistration* registration,
+                                        int min_version, int max_version) {
+  for (int version = min_version; version <= max_version; ++version) {
+    if (registrations_len_ >= TFLITE_REGISTRATIONS_MAX) {
+      // TODO(petewarden) - Add error reporting hooks so we can report this!
+      return;
+    }
+    TfLiteRegistration* new_registration = &registrations_[registrations_len_];
+    registrations_len_ += 1;
+
+    *new_registration = *registration;
+    new_registration->builtin_code = op;
+    new_registration->version = version;
+  }
+}
+
+void MicroMutableOpResolver::AddCustom(const char* name,
+                                       TfLiteRegistration* registration,
+                                       int min_version, int max_version) {
+  for (int version = min_version; version <= max_version; ++version) {
+    if (registrations_len_ >= TFLITE_REGISTRATIONS_MAX) {
+      // TODO(petewarden) - Add error reporting hooks so we can report this!
+      return;
+    }
+    TfLiteRegistration* new_registration = &registrations_[registrations_len_];
+    registrations_len_ += 1;
+
+    *new_registration = *registration;
+    new_registration->builtin_code = -1;
+    new_registration->custom_name = name;
+    new_registration->version = version;
+  }
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h
new file mode 100644
index 0000000000..f3750a2484
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
+
+#include "tensorflow/contrib/lite/core/api/op_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+
+#ifndef TFLITE_REGISTRATIONS_MAX
+#define TFLITE_REGISTRATIONS_MAX (128)
+#endif
+
+namespace tflite {
+
+class MicroMutableOpResolver : public OpResolver {
+ public:
+  const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
+                                   int version) const override;
+  const TfLiteRegistration* FindOp(const char* op, int version) const override;
+  void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
+                  int min_version = 1, int max_version = 1);
+  void AddCustom(const char* name, TfLiteRegistration* registration,
+                 int min_version = 1, int max_version = 1);
+
+ private:
+  TfLiteRegistration registrations_[TFLITE_REGISTRATIONS_MAX];
+  int registrations_len_ = 0;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc
new file mode 100644
index 0000000000..5420a33e87
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc
@@ -0,0 +1,83 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h"
+
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace {
+void* MockInit(TfLiteContext* context, const char* buffer, size_t length) {
+  // Do nothing.
+  return nullptr;
+}
+
+void MockFree(TfLiteContext* context, void* buffer) {
+  // Do nothing.
+}
+
+TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+}  // namespace
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestOperations) {
+  using tflite::BuiltinOperator_CONV_2D;
+  using tflite::BuiltinOperator_RELU;
+  using tflite::MicroMutableOpResolver;
+  using tflite::OpResolver;
+
+  static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree,
+                                 tflite::MockPrepare, tflite::MockInvoke};
+
+  MicroMutableOpResolver micro_mutable_op_resolver;
+  micro_mutable_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r, 0, 2);
+  micro_mutable_op_resolver.AddCustom("mock_custom", &r, 0, 3);
+  OpResolver* resolver = &micro_mutable_op_resolver;
+
+  const TfLiteRegistration* registration =
+      resolver->FindOp(BuiltinOperator_CONV_2D, 0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr));
+
+  registration = resolver->FindOp(BuiltinOperator_CONV_2D, 10);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+
+  registration = resolver->FindOp(BuiltinOperator_RELU, 0);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+
+  registration = resolver->FindOp("mock_custom", 0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr));
+
+  registration = resolver->FindOp("mock_custom", 10);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+
+  registration = resolver->FindOp("nonexistent_custom", 0);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc
new file mode 100644
index 0000000000..8c090a20a5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc
@@ -0,0 +1,149 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+
+#include "tensorflow/contrib/lite/core/api/flatbuffer_conversions.h"
+
+namespace tflite {
+namespace {
+
+TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
+                              ErrorReporter* reporter) {
+  switch (type) {
+    case kTfLiteFloat32:
+      *size = sizeof(float);
+      break;
+    case kTfLiteInt16:
+      *size = sizeof(int16_t);
+      break;
+    case kTfLiteInt32:
+      *size = sizeof(int32_t);
+      break;
+    case kTfLiteUInt8:
+      *size = sizeof(uint8_t);
+      break;
+    case kTfLiteInt64:
+      *size = sizeof(int64_t);
+      break;
+    case kTfLiteBool:
+      *size = sizeof(bool);
+      break;
+    case kTfLiteComplex64:
+      *size = sizeof(float) * 2;
+      break;
+    default:
+      reporter->Report(
+          "Only float32, int16, int32, int64, uint8, bool, complex64 "
+          "supported currently.");
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus BytesRequired(const tflite::Tensor& flatbuffer_tensor,
+                           size_t dims_size, size_t* bytes,
+                           ErrorReporter* error_reporter) {
+  TfLiteType tf_lite_type;
+  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
+                                          &tf_lite_type, error_reporter));
+  size_t type_size;
+  TF_LITE_ENSURE_STATUS(
+      TfLiteTypeSizeOf(tf_lite_type, &type_size, error_reporter));
+  *bytes = dims_size * type_size;
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteStatus SimpleTensorAllocator::AllocateTensor(
+    const tflite::Tensor& flatbuffer_tensor, int create_before,
+    int destroy_after,
+    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
+    ErrorReporter* error_reporter, TfLiteTensor* result) {
+  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
+                                          &result->type, error_reporter));
+  result->is_variable = flatbuffer_tensor.is_variable();
+
+  result->data.raw = nullptr;
+  result->bytes = 0;
+  if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
+    if (auto* array = buffer->data()) {
+      if (size_t array_size = array->size()) {
+        result->data.raw =
+            const_cast<char*>(reinterpret_cast<const char*>(array->data()));
+        TF_LITE_ENSURE_STATUS(BytesRequired(flatbuffer_tensor, array_size,
+                                            &result->bytes, error_reporter));
+      }
+    }
+  }
+  if (result->data.raw) {
+    result->allocation_type = kTfLiteMmapRo;
+  } else {
+    int data_size = 1;
+    for (int n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
+      data_size *= flatbuffer_tensor.shape()->Get(n);
+    }
+    TF_LITE_ENSURE_STATUS(BytesRequired(flatbuffer_tensor, data_size,
+                                        &result->bytes, error_reporter));
+    result->data.raw = reinterpret_cast<char*>(AllocateMemory(result->bytes));
+    if (result->data.raw == nullptr) {
+      const char* tensor_name = flatbuffer_tensor.name()->c_str();
+      if (tensor_name == nullptr) {
+        tensor_name = "<None>";
+      }
+      error_reporter->Report(
+          "Couldn't allocate memory for tensor '%s', wanted %d bytes but only "
+          "%d were available",
+          tensor_name, result->bytes, (data_size_max_ - data_size_));
+      return kTfLiteError;
+    }
+    result->allocation_type = kTfLiteArenaRw;
+  }
+  result->dims = reinterpret_cast<TfLiteIntArray*>(
+      AllocateMemory(sizeof(int) * (flatbuffer_tensor.shape()->Length() + 1)));
+  result->dims->size = flatbuffer_tensor.shape()->Length();
+  for (int n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
+    result->dims->data[n] = flatbuffer_tensor.shape()->Get(n);
+  }
+  if (flatbuffer_tensor.quantization()) {
+    result->params.scale = flatbuffer_tensor.quantization()->scale()->Get(0);
+    result->params.zero_point =
+        flatbuffer_tensor.quantization()->zero_point()->Get(0);
+  }
+  result->allocation = nullptr;
+  if (flatbuffer_tensor.name()) {
+    result->name = flatbuffer_tensor.name()->c_str();
+  } else {
+    result->name = "<No name>";
+  }
+  result->delegate = nullptr;
+  result->buffer_handle = 0;
+  result->data_is_stale = false;
+  return kTfLiteOk;
+}
+
+uint8_t* SimpleTensorAllocator::AllocateMemory(size_t size) {
+  if ((data_size_ + size) > data_size_max_) {
+    // TODO(petewarden): Add error reporting beyond returning null!
+    return nullptr;
+  }
+  uint8_t* result = data_;
+  data_ += size;
+  data_size_ += size;
+  return result;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h
new file mode 100644
index 0000000000..4f16a9d0e5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h
@@ -0,0 +1,51 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_
+
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// TODO(petewarden): This allocator never frees up or reuses  any memory, even
+// though we have enough information about lifetimes of the tensors to do so.
+// This makes it pretty wasteful, so we should use a more intelligent method.
+class SimpleTensorAllocator {
+ public:
+  SimpleTensorAllocator(uint8_t* buffer, int buffer_size)
+      : data_size_(0), data_size_max_(buffer_size), data_(buffer) {}
+
+  TfLiteStatus AllocateTensor(
+      const tflite::Tensor& flatbuffer_tensor, int create_before,
+      int destroy_after,
+      const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
+      ErrorReporter* error_reporter, TfLiteTensor* result);
+
+  uint8_t* AllocateMemory(size_t size);
+
+  int GetDataSize() const { return data_size_; }
+
+ private:
+  int data_size_;
+  int data_size_max_;
+  uint8_t* data_;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
new file mode 100644
index 0000000000..c835427243
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
@@ -0,0 +1,144 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace {
+class StackAllocator : public flatbuffers::Allocator {
+ public:
+  StackAllocator() : data_(data_backing_), data_size_(0) {}
+
+  uint8_t* allocate(size_t size) override {
+    if ((data_size_ + size) > kStackAllocatorSize) {
+      // TODO(petewarden): Add error reporting beyond returning null!
+      return nullptr;
+    }
+    uint8_t* result = data_;
+    data_ += size;
+    data_size_ += size;
+    return result;
+  }
+
+  void deallocate(uint8_t* p, size_t) override {}
+
+  static StackAllocator& instance() {
+    // Avoid using true dynamic memory allocation to be portable to bare metal.
+    static char inst_memory[sizeof(StackAllocator)];
+    static StackAllocator* inst = new (inst_memory) StackAllocator;
+    return *inst;
+  }
+
+  static constexpr int kStackAllocatorSize = 4096;
+
+ private:
+  uint8_t data_backing_[kStackAllocatorSize];
+  uint8_t* data_;
+  int data_size_;
+};
+
+flatbuffers::FlatBufferBuilder* BuilderInstance() {
+  static char inst_memory[sizeof(flatbuffers::FlatBufferBuilder)];
+  static flatbuffers::FlatBufferBuilder* inst =
+      new (inst_memory) flatbuffers::FlatBufferBuilder(
+          StackAllocator::kStackAllocatorSize, &StackAllocator::instance());
+  return inst;
+}
+
+const Tensor* Create1dTensor(int size) {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+  constexpr size_t tensor_shape_size = 1;
+  const int32_t tensor_shape[tensor_shape_size] = {size};
+  const Offset<Tensor> tensor_offset = CreateTensor(
+      *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+      TensorType_INT32, 0, builder->CreateString("test_tensor"), 0, false);
+  builder->Finish(tensor_offset);
+  void* tensor_pointer = builder->GetBufferPointer();
+  const Tensor* tensor = flatbuffers::GetRoot<Tensor>(tensor_pointer);
+  return tensor;
+}
+
+const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* CreateBuffers() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(*builder),
+  };
+  const flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>
+      buffers_offset = builder->CreateVector(buffers, buffers_size);
+  builder->Finish(buffers_offset);
+  void* buffers_pointer = builder->GetBufferPointer();
+  const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* result =
+      flatbuffers::GetRoot<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>(
+          buffers_pointer);
+  return result;
+}
+
+}  // namespace
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestAllocateTensor) {
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::SimpleTensorAllocator allocator(arena, arena_size);
+
+  const tflite::Tensor* tensor = tflite::Create1dTensor(100);
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>* buffers =
+      tflite::CreateBuffers();
+
+  TfLiteTensor allocated_tensor;
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      allocator.AllocateTensor(*tensor, 0, 1, buffers, micro_test::reporter,
+                               &allocated_tensor));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type);
+  TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, allocated_tensor.data.i32);
+}
+
+TF_LITE_MICRO_TEST(TestTooLarge) {
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::SimpleTensorAllocator allocator(arena, arena_size);
+
+  const tflite::Tensor* tensor = tflite::Create1dTensor(10000);
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>* buffers =
+      tflite::CreateBuffers();
+
+  TfLiteTensor allocated_tensor;
+  TF_LITE_MICRO_EXPECT_NE(
+      kTfLiteOk,
+      allocator.AllocateTensor(*tensor, 0, 1, buffers, micro_test::reporter,
+                               &allocated_tensor));
+}
+
+TF_LITE_MICRO_TEST(TestJustFits) {
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::SimpleTensorAllocator allocator(arena, arena_size);
+
+  uint8_t* result = allocator.AllocateMemory(arena_size);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, result);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/BUILD b/tensorflow/contrib/lite/experimental/micro/testing/BUILD
new file mode 100644
index 0000000000..0d23be5712
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/BUILD
@@ -0,0 +1,17 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["test_linux_binary.sh"])
+
+cc_library(
+    name = "micro_test",
+    hdrs = [
+        "micro_test.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill b/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill
new file mode 100644
index 0000000000..7d6d81af0f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill
@@ -0,0 +1,21 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# This docker configuration file lets you emulate a Blue Pill board
+# on an x86 desktop or laptop, which can be useful for debugging and
+# automated testing.
+FROM antmicro/renode:latest
+
+LABEL maintainer="Pete Warden <petewarden@google.com>"
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
new file mode 100644
index 0000000000..9333dc42bf
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
@@ -0,0 +1,36 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+using sysbus
+
+mach create
+machine LoadPlatformDescription @platforms/cpus/stm32f103.repl
+
+# These lines are needed to show the results of DebugLog calls in the output.
+machine LoadPlatformDescriptionFromString "uartSemihosting: UART.SemihostingUart @ cpu"
+showAnalyzer cpu.uartSemihosting Antmicro.Renode.Analyzers.LoggingUartAnalyzer
+
+logFile @/tmp/renode_bluepill_log.txt
+
+macro reset
+"""
+    sysbus LoadELF $bin
+"""
+
+runMacro $reset
+
+emulation RunFor @1
+
+quit
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
new file mode 100644
index 0000000000..91e349cb24
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
@@ -0,0 +1,64 @@
+"""Rules for simple testing without dependencies by parsing output logs."""
+
+def tflite_micro_cc_test(
+        name,
+        expected_in_logs = "~~~ALL TESTS PASSED~~~",
+        srcs = [],
+        includes = [],
+        defines = [],
+        copts = [],
+        nocopts = "",
+        linkopts = [],
+        deps = [],
+        visibility = None):
+    """Tests a C/C++ binary without testing framework  dependencies`.
+
+    Runs a C++ binary, and tests that the output logs contain the
+    expected value. This is a deliberately spartan way of testing, to match
+    what's available when testing microcontroller binaries.
+
+    Args:
+      name: a unique name for this rule.
+      expected_in_logs: A regular expression that is required to be
+                        present in the binary's logs for the test to pass.
+      srcs: sources to compile (C, C++, ld scripts).
+      includes: include paths to add to this rule and its dependents.
+      defines: list of `VAR` or `VAR=VAL` to pass to CPP for this rule and
+               its dependents.
+      copts: gcc compilation flags for this rule only.
+      nocopts: list of gcc compilation flags to remove for this rule
+               only. No regexp like for `cc_library`.
+      linkopts: `gcc` flags to add to the linking phase. For "pure" ld flags,
+                prefix them with the `-Wl,` prefix here.
+      deps: dependencies. only `tflite_bare_metal_cc_library()` dependencies
+            allowed.
+      visibility: visibility.
+    """
+    native.cc_binary(
+        name = name + "_binary",
+        srcs = srcs,
+        includes = includes,
+        defines = defines,
+        copts = copts,
+        nocopts = nocopts,
+        linkopts = linkopts,
+        deps = deps,
+        visibility = visibility,
+    )
+    native.sh_test(
+        name = name,
+        size = "medium",
+        srcs = [
+            "//tensorflow/contrib/lite/experimental/micro/testing:test_linux_binary.sh",
+        ],
+        args = [
+            native.package_name() + "/" + name + "_binary",
+            "'" + expected_in_logs + "'",
+        ],
+        data = [
+            name + "_binary",
+            # Internal test dependency placeholder
+        ],
+        deps = [
+        ],
+    )
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
new file mode 100644
index 0000000000..104509c9dc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
@@ -0,0 +1,138 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// An ultra-lightweight testing framework designed for use with microcontroller
+// applications. Its only dependency is on TensorFlow Lite's ErrorReporter
+// interface, where log messages are output. This is designed to be usable even
+// when no standard C or C++ libraries are available, and without any dynamic
+// memory allocation or reliance on global constructors.
+//
+// To build a test, you use syntax similar to gunit, but with some extra
+// decoration to create a hidden 'main' function containing each of the tests to
+// be run. Your code should look something like:
+// ----------------------------------------------------------------------------
+// #include "path/to/this/header"
+//
+// TF_LITE_MICRO_TESTS_BEGIN
+//
+// TF_LITE_MICRO_TEST(SomeTest) {
+//   TF_LITE_LOG_EXPECT_EQ(true, true);
+// }
+//
+// TF_LITE_MICRO_TESTS_END
+// ----------------------------------------------------------------------------
+// If you compile this for your platform, you'll get a normal binary that you
+// should be able to run. Executing it will output logging information like this
+// to stderr (or whatever equivalent is available and written to by
+// ErrorReporter):
+// ----------------------------------------------------------------------------
+// Testing SomeTest
+// 1/1 tests passed
+// ~~~ALL TESTS PASSED~~~
+// ----------------------------------------------------------------------------
+// This is designed to be human-readable, so you can just run tests manually,
+// but the string "~~~ALL TESTS PASSED~~~" should only appear if all of the
+// tests do pass. This makes it possible to integrate with automated test
+// systems by scanning the output logs and looking for that magic value.
+//
+// This framework is intended to be a rudimentary alternative to no testing at
+// all on systems that struggle to run more conventional approaches, so use with
+// caution!
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+
+namespace micro_test {
+extern int tests_passed;
+extern int tests_failed;
+extern bool is_test_complete;
+extern bool did_test_fail;
+extern tflite::ErrorReporter* reporter;
+}  // namespace micro_test
+
+#define TF_LITE_MICRO_TESTS_BEGIN              \
+  namespace micro_test {                       \
+  int tests_passed;                            \
+  int tests_failed;                            \
+  bool is_test_complete;                       \
+  bool did_test_fail;                          \
+  tflite::ErrorReporter* reporter;             \
+  }                                            \
+                                               \
+  int main(int argc, char** argv) {            \
+    micro_test::tests_passed = 0;              \
+    micro_test::tests_failed = 0;              \
+    tflite::MicroErrorReporter error_reporter; \
+    micro_test::reporter = &error_reporter;
+
+#define TF_LITE_MICRO_TESTS_END                                \
+  micro_test::reporter->Report(                                \
+      "%d/%d tests passed", micro_test::tests_passed,          \
+      (micro_test::tests_failed + micro_test::tests_passed));  \
+  if (micro_test::tests_failed == 0) {                         \
+    micro_test::reporter->Report("~~~ALL TESTS PASSED~~~\n");  \
+  } else {                                                     \
+    micro_test::reporter->Report("~~~SOME TESTS FAILED~~~\n"); \
+  }                                                            \
+  }
+
+// TODO(petewarden): I'm going to hell for what I'm doing to this poor for loop.
+#define TF_LITE_MICRO_TEST(name)                                           \
+  micro_test::reporter->Report("Testing %s", #name);                       \
+  for (micro_test::is_test_complete = false,                               \
+      micro_test::did_test_fail = false;                                   \
+       !micro_test::is_test_complete; micro_test::is_test_complete = true, \
+      micro_test::tests_passed += (micro_test::did_test_fail) ? 0 : 1,     \
+      micro_test::tests_failed += (micro_test::did_test_fail) ? 1 : 0)
+
+#define TF_LITE_MICRO_EXPECT(x)                                                \
+  do {                                                                         \
+    if (!(x)) {                                                                \
+      micro_test::reporter->Report(#x " failed at %s:%d", __FILE__, __LINE__); \
+      micro_test::did_test_fail = true;                                        \
+    }                                                                          \
+  } while (false)
+
+#define TF_LITE_MICRO_EXPECT_EQ(x, y)                                         \
+  do {                                                                        \
+    if ((x) != (y)) {                                                         \
+      micro_test::reporter->Report(#x " == " #y " failed at %s:%d", __FILE__, \
+                                   __LINE__);                                 \
+      micro_test::did_test_fail = true;                                       \
+    }                                                                         \
+  } while (false)
+
+#define TF_LITE_MICRO_EXPECT_NE(x, y)                                         \
+  do {                                                                        \
+    if ((x) == (y)) {                                                         \
+      micro_test::reporter->Report(#x " != " #y " failed at %s:%d", __FILE__, \
+                                   __LINE__);                                 \
+      micro_test::did_test_fail = true;                                       \
+    }                                                                         \
+  } while (false)
+
+#define TF_LITE_MICRO_EXPECT_NEAR(x, y, epsilon)                      \
+  do {                                                                \
+    auto delta = ((x) > (y)) ? ((x) - (y)) : ((y) - (x));             \
+    if (delta > epsilon) {                                            \
+      micro_test::reporter->Report(#x " near " #y " failed at %s:%d", \
+                                   __FILE__, __LINE__);               \
+      micro_test::did_test_fail = true;                               \
+    }                                                                 \
+  } while (false)
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
new file mode 100755
index 0000000000..07742a8262
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
@@ -0,0 +1,54 @@
+#!/bin/bash -e
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Tests a 'bluepill' STM32F103 ELF by parsing the log output of Renode emulation.
+#
+# First argument is the ELF location.
+# Second argument is a regular expression that's required to be in the output logs
+# for the test to pass.
+
+declare -r ROOT_DIR=`pwd`
+declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/
+declare -r MICRO_LOG_PATH=${TEST_TMPDIR}
+declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
+mkdir -p ${MICRO_LOG_PATH}
+
+docker build -t renode_bluepill \
+  -f ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill \
+  ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/
+
+docker run \
+  --log-driver=none -a stdout -a stderr \
+  -v ${ROOT_DIR}:/workspace \
+  -v /tmp:/tmp \
+  -it renode_bluepill \
+  /bin/bash -c "renode -P 5000 --disable-xwt -e '
+\$bin?=@/workspace/$1
+s @/workspace/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
+' 2>&1 >${MICRO_LOG_FILENAME}"
+
+echo "LOGS:"
+cat ${MICRO_LOG_FILENAME}
+
+if grep -q "$2" ${MICRO_LOG_FILENAME}
+then
+  echo "$1: PASS"
+  exit 0
+else
+  echo "$1: FAIL - '$2' not found in logs."
+  exit 1
+fi
+
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh b/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
new file mode 100755
index 0000000000..24131a6d2d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
@@ -0,0 +1,39 @@
+#!/bin/bash -e
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Tests a Linux binary by parsing the log output.
+#
+# First argument is the binary location.
+# Second argument is a regular expression that's required to be in the output logs
+# for the test to pass.
+
+declare -r ROOT_DIR=`pwd`
+declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/
+declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
+declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
+mkdir -p ${MICRO_LOG_PATH}
+
+$1 2>&1 | tee ${MICRO_LOG_FILENAME}
+
+if grep -q "$2" ${MICRO_LOG_FILENAME}
+then
+  echo "$1: PASS"
+  exit 0
+else
+  echo "$1: FAIL - '$2' not found in logs."
+  exit 1
+fi
+
diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
new file mode 100644
index 0000000000..880bb4763c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
@@ -0,0 +1,166 @@
+MAKEFILE_DIR := tensorflow/contrib/lite/experimental/micro/tools/make
+
+# Try to figure out the host system
+HOST_OS :=
+ifeq ($(OS),Windows_NT)
+	HOST_OS = windows
+else
+	UNAME_S := $(shell uname -s)
+	ifeq ($(UNAME_S),Linux)
+		HOST_OS := linux
+	endif
+	ifeq ($(UNAME_S),Darwin)
+		HOST_OS := osx
+	endif
+endif
+
+HOST_ARCH := $(shell if [[ $(shell uname -m) =~ i[345678]86 ]]; then echo x86_32; else echo $(shell uname -m); fi)
+
+# Override these on the make command line to target a specific architecture. For example:
+# make -f tensorflow/contrib/lite/Makefile TARGET=rpi TARGET_ARCH=armv7l
+TARGET := $(HOST_OS)
+TARGET_ARCH := $(HOST_ARCH)
+
+INCLUDES := \
+-I. \
+-I$(MAKEFILE_DIR)/../../../../../ \
+-I$(MAKEFILE_DIR)/../../../../../../ \
+-I$(MAKEFILE_DIR)/downloads/ \
+-I$(MAKEFILE_DIR)/downloads/gemmlowp \
+-I$(MAKEFILE_DIR)/downloads/flatbuffers/include \
+-I$(OBJDIR)
+# This is at the end so any globally-installed frameworks like protobuf don't
+# override local versions in the source tree.
+INCLUDES += -I/usr/local/include
+
+TEST_SCRIPT := tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
+
+MICROLITE_LIBS := -lm
+
+# There are no rules for compiling objects for the host system (since we don't
+# generate things like the protobuf compiler that require that), so all of
+# these settings are for the target compiler.
+CXXFLAGS := -O3 -DNDEBUG
+CXXFLAGS += --std=c++11 -g -DTF_LITE_STATIC_MEMORY
+CCFLAGS := -DNDEBUG -g -DTF_LITE_STATIC_MEMORY
+LDOPTS := -L/usr/local/lib
+ARFLAGS := -r
+TARGET_TOOLCHAIN_PREFIX :=
+CC_PREFIX :=
+
+# This library is the main target for this makefile. It will contain a minimal
+# runtime that can be linked in to other programs.
+MICROLITE_LIB_NAME := libtensorflow-microlite.a
+
+# Test binary for the microcontroller speech model.
+MICRO_SPEECH_TEST_SRCS := \
+tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \
+tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
+
+MICROLITE_TEST_SRCS := \
+$(wildcard tensorflow/contrib/lite/experimental/micro/*test.cc) \
+$(wildcard tensorflow/contrib/lite/experimental/micro/kernels/*test.cc)
+
+MICROLITE_CC_BASE_SRCS := \
+$(wildcard tensorflow/contrib/lite/experimental/micro/*.cc) \
+$(wildcard tensorflow/contrib/lite/experimental/micro/kernels/*.cc) \
+tensorflow/contrib/lite/c/c_api_internal.c \
+tensorflow/contrib/lite/core/api/error_reporter.cc \
+tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc \
+tensorflow/contrib/lite/core/api/op_resolver.cc \
+tensorflow/contrib/lite/kernels/kernel_util.cc \
+tensorflow/contrib/lite/kernels/internal/quantization_util.cc
+MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_BASE_SRCS))
+
+# These target-specific makefiles should modify or replace options like
+# CXXFLAGS or LIBS to work for a specific targetted architecture. All logic
+# based on platforms or architectures should happen within these files, to
+# keep this main makefile focused on the sources and dependencies.
+include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc)
+
+ALL_SRCS := \
+	$(MICRO_SPEECH_TEST_SRCS) \
+	$(MICROLITE_CC_SRCS) \
+	$(MICROLITE_TEST_SRCS)
+
+# Where compiled objects are stored.
+GENDIR := $(MAKEFILE_DIR)/gen/$(TARGET)_$(TARGET_ARCH)/
+OBJDIR := $(GENDIR)obj/
+BINDIR := $(GENDIR)bin/
+LIBDIR := $(GENDIR)lib/
+
+MICROLITE_LIB_PATH := $(LIBDIR)$(MICROLITE_LIB_NAME)
+
+MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test
+
+CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}g++
+CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}gcc
+AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}ar
+
+MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS))))
+
+MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS))))
+
+MICROLITE_TEST_TARGETS := $(addprefix $(BINDIR), \
+$(patsubst %_test.cc,%.test_target,$(MICROLITE_TEST_SRCS)))
+
+# For normal manually-created TensorFlow C++ source files.
+$(OBJDIR)%.o: %.cc
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
+
+# For normal manually-created TensorFlow C source files.
+$(OBJDIR)%.o: %.c
+	@mkdir -p $(dir $@)
+	$(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
+
+# The target that's compiled if there's no command-line arguments.
+all: $(MICROLITE_LIB_PATH) $(MICRO_SPEECH_TEST_BINARY)
+
+microlite: $(MICROLITE_LIB_PATH)
+
+# Hack for generating schema file bypassing flatbuffer parsing
+tensorflow/contrib/lite/schema/schema_generated.h:
+	@cp -u tensorflow/contrib/lite/schema/schema_generated.h.OPENSOURCE tensorflow/contrib/lite/schema/schema_generated.h
+
+# Gathers together all the objects we've compiled into a single '.a' archive.
+$(MICROLITE_LIB_PATH): tensorflow/contrib/lite/schema/schema_generated.h $(MICROLITE_LIB_OBJS)
+	@mkdir -p $(dir $@)
+	$(AR) $(ARFLAGS) $(MICROLITE_LIB_PATH) $(MICROLITE_LIB_OBJS)
+
+$(MICRO_SPEECH_TEST_BINARY): $(MICRO_SPEECH_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(MICRO_SPEECH_TEST_BINARY) $(MICRO_SPEECH_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+
+micro_speech_test: $(MICRO_SPEECH_TEST_BINARY)
+micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin
+
+test_micro_speech: $(MICRO_SPEECH_TEST_BINARY)
+	$(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+$(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $@ $< \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+
+$(BINDIR)%.test_target: $(BINDIR)%_test
+	$(TEST_SCRIPT) $< '~~~ALL TESTS PASSED~~~'
+
+$(info $(MICROLITE_TEST_TARGETS))
+
+test: test_micro_speech $(MICROLITE_TEST_TARGETS)
+
+# Gets rid of all generated files.
+clean:
+	rm -rf $(MAKEFILE_DIR)/gen
+
+$(DEPDIR)/%.d: ;
+.PRECIOUS: $(DEPDIR)/%.d
+.PRECIOUS: $(BINDIR)%_test
+
+-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(ALL_SRCS)))
diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh b/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh
new file mode 100755
index 0000000000..4c2ff8545d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR/../../../../../../.."
+
+DOWNLOADS_DIR=tensorflow/contrib/lite/experimental/micro/tools/make/downloads
+BZL_FILE_PATH=tensorflow/workspace.bzl
+
+# Ensure it is being run from repo root
+if [ ! -f $BZL_FILE_PATH ]; then
+  echo "Could not find ${BZL_FILE_PATH}":
+  echo "Likely you are not running this from the root directory of the repository.";
+  exit 1;
+fi
+
+GEMMLOWP_URL="https://github.com/google/gemmlowp/archive/719139ce755a0f31cbf1c37f7f98adcc7fc9f425.zip"
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz"
+CMSIS_URL="https://github.com/ARM-software/CMSIS_5/archive/5.4.0.zip"
+STM32_BARE_LIB_URL="https://github.com/google/stm32_bare_lib/archive/50e0da307a2821bb54af1f57b969e6b76cb89d32.zip"
+
+download_and_extract() {
+  local usage="Usage: download_and_extract URL DIR"
+  local url="${1:?${usage}}"
+  local dir="${2:?${usage}}"
+  echo "downloading ${url}" >&2
+  mkdir -p "${dir}"
+  if [[ "${url}" == *gz ]]; then
+    curl -Ls "${url}" | tar -C "${dir}" --strip-components=1 -xz
+  elif [[ "${url}" == *zip ]]; then
+    tempdir=$(mktemp -d)
+    tempdir2=$(mktemp -d)
+
+    curl -L ${url} > ${tempdir}/zipped.zip
+    unzip ${tempdir}/zipped.zip -d ${tempdir2}
+
+    # If the zip file contains nested directories, extract the files from the
+    # inner directory.
+    if ls ${tempdir2}/*/* 1> /dev/null 2>&1; then
+      # unzip has no strip components, so unzip to a temp dir, and move the
+      # files we want from the tempdir to destination.
+      cp -R ${tempdir2}/*/* ${dir}/
+    else
+      cp -R ${tempdir2}/* ${dir}/
+    fi
+    rm -rf ${tempdir2} ${tempdir}
+  fi
+
+  # Delete any potential BUILD files, which would interfere with Bazel builds.
+  find "${dir}" -type f -name '*BUILD' -delete
+}
+
+download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp"
+download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers"
+download_and_extract "${CMSIS_URL}" "${DOWNLOADS_DIR}/cmsis"
+download_and_extract "${STM32_BARE_LIB_URL}" "${DOWNLOADS_DIR}/stm32_bare_lib"
+
+echo "download_dependencies.sh completed successfully." >&2
diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc b/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc
new file mode 100644
index 0000000000..022a8422dc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc
@@ -0,0 +1,65 @@
+# Settings for Blue Pill platforms.
+ifeq ($(TARGET), bluepill)
+  TARGET_ARCH := cortex-m3
+  TARGET_TOOLCHAIN_PREFIX := arm-none-eabi-
+
+  PLATFORM_FLAGS = \
+    -DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \
+    -DTF_LITE_STATIC_MEMORY \
+    -DTF_LITE_MCU_DEBUG_LOG \
+    -fno-rtti \
+    -fmessage-length=0 \
+    -fno-exceptions \
+    -fno-unwind-tables \
+    -fno-builtin \
+    -ffunction-sections \
+    -fdata-sections \
+    -funsigned-char \
+    -MMD \
+    -mcpu=cortex-m3 \
+    -mthumb \
+    -std=gnu++11 \
+    -Wvla \
+    -Wall \
+    -Wextra \
+    -Wno-unused-parameter \
+    -Wno-missing-field-initializers \
+    -Wno-write-strings \
+    -Wno-sign-compare \
+    -fno-delete-null-pointer-checks \
+    -fomit-frame-pointer \
+    -fpermissive \
+    -nostdlib \
+    -g \
+    -Os
+  CXXFLAGS += $(PLATFORM_FLAGS)
+  CCFLAGS += $(PLATFORM_FLAGS)
+  LDFLAGS += \
+    -T $(MAKEFILE_DIR)/downloads/stm32_bare_lib/stm32_linker_layout.lds \
+    -Wl,-Map=$(MAKEFILE_DIR)/gen/$(TARGET).map,--cref \
+    -Wl,--gc-sections
+	BUILD_TYPE := micro
+  MICROLITE_LIBS := \
+    -lm
+  INCLUDES += \
+    -isystem$(MAKEFILE_DIR)/downloads/cmsis/CMSIS/Core/Include/ \
+    -I$(MAKEFILE_DIR)/downloads/stm32_bare_lib/include
+  MICROLITE_CC_SRCS += \
+    $(wildcard $(MAKEFILE_DIR)/downloads/stm32_bare_lib/source/*.c) \
+    $(wildcard $(MAKEFILE_DIR)/downloads/stm32_bare_lib/source/*.cc)
+    TEST_SCRIPT := tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
+  # These are tests that don't currently work on the blue pill.
+  EXCLUDED_TESTS := \
+    tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc \
+    tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
+  MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS))
+
+# These are microcontroller-specific rules for converting the ELF output
+# of the linker into a binary image that can be loaded directly.
+OBJCOPY := $(TARGET_TOOLCHAIN_PREFIX)objcopy
+
+$(BINDIR)/%.bin: $(BINDIR)/%
+	@mkdir -p $(dir $@)
+	$(OBJCOPY) $< $@ -O binary
+
+endif
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/kernels/internal/compatibility.h b/tensorflow/contrib/lite/kernels/internal/compatibility.h
index b87cf2b60d..7c176e0fa1 100644
--- a/tensorflow/contrib/lite/kernels/internal/compatibility.h
+++ b/tensorflow/contrib/lite/kernels/internal/compatibility.h
@@ -84,4 +84,27 @@ using uint16 = std::uint16_t;
 using int32 = std::int32_t;
 using uint32 = std::uint32_t;
 
+// TFLITE_DEPRECATED()
+//
+// Duplicated from absl/base/macros.h to avoid pulling in that library.
+// Marks a deprecated class, struct, enum, function, method and variable
+// declarations. The macro argument is used as a custom diagnostic message (e.g.
+// suggestion of a better alternative).
+//
+// Example:
+//
+//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
+//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
+//
+// Every usage of a deprecated entity will trigger a warning when compiled with
+// clang's `-Wdeprecated-declarations` option. This option is turned off by
+// default, but the warnings will be reported by clang-tidy.
+#if defined(__clang__) && __cplusplus >= 201103L
+#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
+#endif
+
+#ifndef TFLITE_DEPRECATED
+#define TFLITE_DEPRECATED(message)
+#endif
+
 #endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index b39347758a..64a39dd2a2 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <algorithm>
 #include <cstring>
 
-#include "absl/base/macros.h"
 #include "tensorflow/contrib/lite/kernels/internal/compatibility.h"
 
 namespace tflite {
@@ -441,7 +440,7 @@ inline int FlatSize(const Dims<N>& dims) {
   return flat_size;
 }
 
-ABSL_DEPRECATED("Prefer FlatSize.")
+TFLITE_DEPRECATED("Prefer FlatSize.")
 inline int RequiredBufferSizeForDims(const Dims<4>& dims) {
   return FlatSize(dims);
 }
-- 
GitLab


From ac22e1583aed390d78d2e87a4bf8a6ec39400ec4 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 4 Oct 2018 09:21:05 -0700
Subject: [PATCH 1126/1357] Gracefully disallow updating resource variables
 with invalid shapes.

During graph construction, the shape function for AssignAddVariableOp etc.
would raise an error when the value being "assign add"ed to the variable
has an incompatible shape.

With eager execution, no such validation was being made which triggerred
an assertion failure in eigen:
https://github.com/eigenteam/eigen-git-mirror/blob/7d97e1cbbe4424fda39e31c88def7c0863897640/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h#L479

This change prevents that assertion failure.

PiperOrigin-RevId: 215749071
---
 tensorflow/core/kernels/resource_variable_ops.cc         | 6 ++++++
 .../python/kernel_tests/resource_variable_ops_test.py    | 9 ++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 23d76986bf..678d675c4a 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -426,6 +426,12 @@ class AssignUpdateVariableOp : public OpKernel {
     // ADD if value's refcount was 1.
     mutex_lock ml(*variable->mu());
     Tensor* var_tensor = variable->tensor();
+    OP_REQUIRES(context, var_tensor->shape().IsSameSize(value.shape()),
+                errors::InvalidArgument("Cannot update variable with shape ",
+                                        var_tensor->shape().DebugString(),
+                                        " using a Tensor with shape ",
+                                        value.shape().DebugString(),
+                                        ", shapes must be equal."));
     OP_REQUIRES_OK(context,
                    PrepareToUpdateVariable<Device, T>(context, var_tensor));
     functor::DenseUpdate<Device, T, Op> update_functor;
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 1365d4b240..a9fd93e9f8 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -142,7 +142,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       v = resource_variable_ops.ResourceVariable(1.0)
     ops.reset_default_graph()
     v.assign(2.0)  # Note: this fails if we run convert_to_tensor on not the
-                   # variable graph.
+    # variable graph.
 
   def testFetchHandle(self):
     with self.cached_session():
@@ -908,6 +908,13 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(Exception, r"shape.*2.*3"):
       state_ops.scatter_update(v, [0, 1], [0, 1, 2])
 
+  @test_util.run_in_graph_and_eager_modes
+  def testAssignIncompatibleShape(self):
+    v = resource_variable_ops.ResourceVariable([0, 1, 2, 3])
+    self.evaluate(v.initializer)
+    with self.assertRaisesRegexp(Exception, r"hapes must be equal"):
+      self.assertAllEqual(self.evaluate(v.assign_add(1)), [1, 2, 3, 4])
+
 
 class _MixedPrecisionVariableTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 1fb84c2e41c454939a02a69093cb214673eab343 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 4 Oct 2018 09:26:19 -0700
Subject: [PATCH 1127/1357] Add ability to vectorize nodes that do not derive
 from function arguments. (This indirectly handles "Const" outputs
 automagically, since they are always unstacked.)

PiperOrigin-RevId: 215749824
---
 .../core/grappler/optimizers/data/BUILD       |   1 +
 .../optimizers/data/map_vectorization.cc      |   2 +-
 .../optimizers/data/vectorization_utils.cc    | 247 ++++++++++++++++-
 .../data/vectorization_utils_test.cc          | 251 ++++++++++++++++++
 .../optimization/map_vectorization_test.py    |   4 +
 5 files changed, 492 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 755af3361e..ee7c14e3ab 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -524,6 +524,7 @@ cc_library(
     deps = [
         ":function_utils",
         ":graph_utils",
+        "//tensorflow/cc:ops",
         "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index 9328a7ca99..ba521e79bc 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -44,7 +44,7 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
   // Function inputs and outputs are the same as original, just
   // with different shapes.
   *vectorized_func->mutable_signature() = orig_func.signature();
-  graph_utils::SetUniqueGraphFunctionName("vectorized_function", library,
+  graph_utils::SetUniqueGraphFunctionName("naively_vectorized_fn", library,
                                           vectorized_func);
 
   // Add MapDefun node
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 2d6cf562b1..344c420902 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -14,10 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
-#include <memory>
 #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 #include "absl/strings/str_join.h"
+#include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/device_base.h"
@@ -28,13 +28,13 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
-#include "tensorflow/core/lib/strings/scanner.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -45,6 +45,22 @@ namespace {
 // Describes a tensor with its operation Node and output position
 typedef std::pair<Node*, int> TensorDesc;
 
+// Equivalent to python Pfor's WrappedTensor struct
+struct WrappedTensor {
+  TensorDesc tensor;
+
+  // Whether the tensor is stacked, i.e. represents the results of applying
+  // the operation on all slices of the input, where each row i of the
+  // tensor corresponds to the op's output on slice i of the input. False
+  // if the tensor is not stacked, i.e. represents the result of the op on
+  // a single slice of the input, where the result does not vary between
+  // slices.
+  bool stacked;
+
+  WrappedTensor(TensorDesc&& tensor, bool stacked)
+      : tensor(std::move(tensor)), stacked(stacked) {}
+};
+
 const char* const kRetValOp = "_Retval";
 
 void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
@@ -132,7 +148,8 @@ class Vectorization {
                    const NodeDef& map_defun_node, FunctionDef** result);
 
  private:
-  // Converts FunctionDefs to Graphs.
+  // Converts FunctionDefs to Graphs and adds mappings from
+  // arg nodes and unstacked nodes to the corresponding nodes in outer_scope_.
   Status Initialize(const FunctionDef& outer_scope,
                     const NodeDef& map_defun_node);
 
@@ -162,9 +179,30 @@ class Vectorization {
   //    the conversion map.
   Status AddConversionMapping(Node* op_node);
 
-  // Maps a tensor to the corresponding vectorized tensor. For example,
-  // {"Cast" Node*, 0} -> {"Vectorize/Cast" Node*, 0}
-  std::map<TensorDesc, TensorDesc> conversion_map_;
+  // Given a tensor t in `unstacked`, stacks it by doing the equivalent of
+  // tf.tile(tf.expand_dims(t, 0), [n, 1, 1, ...]) where n is dimension 0 of
+  // inputs to `map_defun_node_`. This stacked tensor will be compatible with
+  // the expected output shape of `map_defun_node_`.
+  // This is equivalent to the _stack function in python Pfor.
+  Status StackTensor(WrappedTensor* unstacked, TensorDesc* result);
+
+  // Recursively looks for unstacked nodes in the `map_defun_fn_` graph by
+  // doing a depth-first search from the ret nodes. Lifts nodes that are
+  // unstacked (i.e. don't derive from arg nodes) into `outer_scope_` directly
+  // and add mappings to `conversion_map_`.
+  Status AddUnstackedNodeMappings();
+
+  // Recursive helper for `AddUnstackedNodeMappings`, returns true if tensor
+  // is unstacked.
+  bool AddUnstackedNodeMappingsHelper(TensorDesc&& tensor, Status* status);
+
+  // Add mappings from `map_defun_fn_` arg nodes to `map_defun_node_` input
+  // nodes to `conversion_map_`.
+  Status AddArgNodeMappings();
+
+  // Maps a tensor to the corresponding WrappedTensor. For example,
+  // {"Cast" Node*, 0} -> WrappedTensor({"Vectorize/Cast" Node*, 0}, true)
+  std::map<TensorDesc, WrappedTensor> conversion_map_;
 
   // Unconvertible ret nodes
   std::set<Node*> unconvertible_;
@@ -180,6 +218,10 @@ class Vectorization {
   std::unique_ptr<Graph> outer_scope_;
   std::unique_ptr<FunctionBody> map_defun_fn_;
   Node* map_defun_node_ = nullptr;  // Owned by `outer_scope`
+
+  // Caches the loop_len_node_ needed for tiling unstacked output. This
+  // corresponds to a vector with one element.
+  Node* loop_len_node_ = nullptr;  // Owned by `outer_scope`
   Status status_;
 };
 
@@ -224,7 +266,7 @@ Status Vectorization::AddConversionMapping(Node* op_node) {
 
   // Add output mappings.
   for (size_t i = 0; i < op_node->num_outputs(); ++i) {
-    conversion_map_.insert({{op_node, i}, std::move(output_ports[i])});
+    conversion_map_.insert({{op_node, i}, {std::move(output_ports[i]), true}});
   }
 
   return Status::OK();
@@ -242,10 +284,22 @@ Status Vectorization::ConvertOutput(int output_position) {
   if (auto found = gtl::FindOrNull(conversion_map_, output)) {
     // It's possible the output already has a mapping, if it comes from a node
     // that has already been converted.
-    converted_output = *found;
+    if (found->stacked) {
+      converted_output = found->tensor;
+    } else {
+      // Some outputs may be unstacked if they don't derive from arg nodes
+      // (for example, if a function returns a constant). For these, we
+      // have to add extra nodes to tile it in the 0th dimension.
+      TF_RETURN_IF_ERROR(StackTensor(found, &converted_output));
+    }
   } else {
+    // Note: All unstacked nodes are converted ahead of time in `Initialize`,
+    // and here we assume that all op vectorizers create only stacked outputs.
+    // This may not hold in the future, as more vectorizers are added that
+    // may actually create unstacked outputs. For example, see the `Shape`
+    // converter in third_party/tensorflow/python/ops/parallel_for/pfor.py
     TF_RETURN_IF_ERROR(AddConversionMapping(output.first));
-    converted_output = conversion_map_.at(output);
+    converted_output = conversion_map_.at(output).tensor;
   }
 
   ReplaceEdgeSources({map_defun_node_, output_position}, converted_output,
@@ -297,6 +351,7 @@ void Vectorization::VectorizeHelper() {
     map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types);
   }
 }
+
 Status Vectorization::Initialize(const FunctionDef& outer_scope,
                                  const NodeDef& map_defun_node) {
   // Convert outer_scope and map_defun_fn to FunctionBodys so we can
@@ -337,16 +392,184 @@ Status Vectorization::Initialize(const FunctionDef& outer_scope,
   }
   map_defun_node_ = outer_scope_->FindNodeId(node_id);
 
-  // Add mappings from map_defun_fn_ arg nodes to map_defun_node_ input nodes to
-  // the conversion map
+  TF_RETURN_IF_ERROR(AddArgNodeMappings());
+
+  TF_RETURN_IF_ERROR(AddUnstackedNodeMappings());
+  loop_len_node_ = nullptr;
+
+  return Status::OK();
+}
+
+// TODO(rachelim): It might be profitable to use the C++ API for this instead of
+// NodeBuilder
+Status Vectorization::StackTensor(WrappedTensor* unstacked,
+                                  TensorDesc* result) {
+  // Note that all these nodes are necessary as the size of the batch may not be
+  // constant.
+  if (unstacked->stacked) {
+    return errors::Internal("Can only stack unstacked tensor.");
+  }
+
+  Graph* g = outer_scope_.get();
+  auto node_builder = [](StringPiece op) {
+    return NodeBuilder(strings::StrCat("vectorized/stack/", op), op);
+  };
+
+  auto make_const = [&node_builder](const Input::Initializer& val, Graph* graph,
+                                    Node** result) {
+    TF_RETURN_IF_ERROR(val.status);
+    return node_builder("Const")
+        .Attr("value", val.tensor)
+        .Attr("dtype", val.tensor.dtype())
+        .Finalize(graph, result);
+  };
+
+  // If loop_len_node_ hasn't been created yet, add the node and cache it.
+  if (loop_len_node_ == nullptr) {
+    Node* input_node;
+    TF_RETURN_IF_ERROR(map_defun_node_->input_node(0, &input_node));
+
+    Node* shape_node;
+    TF_RETURN_IF_ERROR(
+        node_builder("Shape").Input(input_node).Finalize(g, &shape_node));
+
+    Node* const_vec_0;
+    TF_RETURN_IF_ERROR(make_const({0}, g, &const_vec_0));
+    Node* const_vec_1;
+    TF_RETURN_IF_ERROR(make_const({1}, g, &const_vec_1));
+
+    Node* strided_slice_node;
+    TF_RETURN_IF_ERROR(node_builder("StridedSlice")
+                           .Input(shape_node)   // input
+                           .Input(const_vec_0)  // begin
+                           .Input(const_vec_1)  // end
+                           .Input(const_vec_1)  // strides
+                           .Finalize(g, &strided_slice_node));
+
+    // Produces a vector of length 1
+    TF_RETURN_IF_ERROR(node_builder("Reshape")
+                           .Input(strided_slice_node)  // tensor
+                           .Input(const_vec_1)         // shape
+                           .Finalize(g, &loop_len_node_));
+  }
+
+  Node* ones_shape;
+  TF_RETURN_IF_ERROR(node_builder("Shape")
+                         .Input(unstacked->tensor.first)  // input
+                         .Finalize(g, &ones_shape));
+
+  Node* ones;
+  TF_RETURN_IF_ERROR(
+      node_builder("OnesLike").Input(ones_shape).Finalize(g, &ones));
+
+  Node* const_0;
+  TF_RETURN_IF_ERROR(make_const(0, g, &const_0));
+
+  Node* multiples;
+  TF_RETURN_IF_ERROR(node_builder("Concat")
+                         .Input(const_0)                           // concat_dim
+                         .Input({{loop_len_node_, 0}, {ones, 0}})  // values
+                         .Finalize(g, &multiples));
+
+  Node* expand_dims;
+  TF_RETURN_IF_ERROR(node_builder("ExpandDims")
+                         .Input(unstacked->tensor.first)  // input
+                         .Input(const_0)                  // dim
+                         .Finalize(g, &expand_dims));
+
+  TF_RETURN_IF_ERROR(node_builder("Tile")
+                         .Input(expand_dims)  // input
+                         .Input(multiples)    // multiples
+                         .Finalize(g, &result->first));
+  result->second = 0;
+  return Status::OK();
+}
+
+Status Vectorization::AddArgNodeMappings() {
   for (auto arg_node : map_defun_fn_->arg_nodes) {
     Node* input_node;
     TF_RETURN_IF_ERROR(map_defun_node_->input_node(
         arg_node->attrs().Find("index")->i(), &input_node));
 
-    conversion_map_.insert({{arg_node, 0}, {input_node, 0}});
+    conversion_map_.insert({{arg_node, 0}, {{input_node, 0}, true}});
+
+    // Control inputs
+    conversion_map_.insert({{arg_node, Graph::kControlSlot},
+                            {{input_node, Graph::kControlSlot}, true}});
   }
+  return Status::OK();
+}
 
+bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor,
+                                                   Status* status) {
+  if (auto found = gtl::FindOrNull(conversion_map_, tensor)) {
+    return !found->stacked;
+  }
+
+  if (tensor.first->op_def().is_stateful()) {
+    // We don't lift stateful nodes directly out of the MapDefun, since they may
+    // have to be executed N times.
+    return false;
+  }
+
+  bool is_unstacked = true;
+  for (auto edge : tensor.first->in_edges()) {
+    // Ignore Source nodes. Note that these are also ignored in the
+    // GraphToFunctionDef conversion.
+    if (edge->src()->IsSource()) continue;
+
+    // A node is unstacked if all of its inputs are unstacked
+    is_unstacked &= AddUnstackedNodeMappingsHelper(
+        {edge->src(), edge->src_output()}, status);
+  }
+
+  if (!is_unstacked) {
+    return false;
+  }
+
+  // If the node is unstacked, we copy it into outer_scope_ and
+  // add it to the map. Note that we don't clean up the nodes that are copied
+  // in map_defun_fn_, and rely on them being pruned out later.
+  Node* node = outer_scope_->AddNode(tensor.first->def(), status);
+  if (!status->ok()) return true;
+
+  // Add input edges to nodes that should already have been lifted.
+  for (auto edge : tensor.first->in_edges()) {
+    // Ignore Source nodes. Note that these are also ignored in the
+    // GraphToFunctionDef conversion.
+    if (edge->src()->IsSource()) continue;
+
+    if (auto found = gtl::FindOrNull(conversion_map_,
+                                     {edge->src(), edge->src_output()})) {
+      outer_scope_->AddEdge(found->tensor.first, found->tensor.second, node,
+                            edge->dst_input());
+    } else {
+      status->Update(errors::Internal(
+          "Could not find input conversion even though we did depth first "
+          "conversion."));
+    }
+  }
+
+  // Add output mappings
+  for (int i = 0; i < tensor.first->num_outputs(); ++i) {
+    conversion_map_.insert(
+        {{tensor.first, i}, WrappedTensor({node, i}, false)});
+  }
+  conversion_map_.insert({{tensor.first, Graph::kControlSlot},
+                          WrappedTensor({node, Graph::kControlSlot}, false)});
+
+  return true;
+}
+
+Status Vectorization::AddUnstackedNodeMappings() {
+  SetVector<Node*> unstacked_nodes;
+  Status s;
+  for (const auto& ret_node : map_defun_fn_->ret_nodes) {
+    const Edge* in_edge = nullptr;
+    TF_RETURN_IF_ERROR(ret_node->input_edge(0, &in_edge));
+    AddUnstackedNodeMappingsHelper({in_edge->src(), in_edge->src_output()}, &s);
+    TF_RETURN_IF_ERROR(s);
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index 1ff62217dd..a958d706c1 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -670,6 +670,257 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
               cast_node.input(1) == control_input);
 }
 
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +------+     |   |
+// |   |                        |   |
+// |   |                        |   |
+// |   |           +------+     |   |
+// |   |           |Const |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +------+         |
+// |                                |
+// |               +------+         |
+// |               |Const |         |
+// |               +---+--+         |
+// |                   |            |
+// |                   |            |
+// |                   |            |
+// |               +---v--+         |
+// |               |Stack*|         |
+// |               +---+--+         |
+// |                   |            |
+// |                   |            |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+// *Not actually a Stack node, but does the equivalent.
+//
+TEST(VectorizeMapDefunTest, VectorizeConst) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2)},
+      {{"ret0", "Const:output:0"}});
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
+                      inner.signature().name(), &outer);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized));
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +------+     |   |
+// |   |                        |   |
+// |   |                        |   |
+// |   |           +------+     |   |
+// |   |           |Const |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   |           +---v--+     |   |
+// |   |           | Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +------+         |
+// |                                |
+// |               +------+         |
+// |               |Const |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               | Cast |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               |Stack*|         |
+// |               +---+--+         |
+// |                   |            |
+// |                   |            |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+// *Not actually a Stack node, but does the equivalent.
+//
+TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int64"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2)},
+      {{"ret0", "Cast:y:0"}});
+  AddCastNode("Cast", {"Const:output:0"}, DT_INT32, DT_INT64, false, &inner);
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int64"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT64}, {{}},
+                      inner.signature().name(), &outer);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  auto const_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Const", *vectorized));
+  auto cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
+  EXPECT_EQ(cast_node.input(0).substr(0, cast_node.input(0).find(':')),
+            const_node.name());
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +------+     |   |
+// |   |                        |   |
+// |   | +------+  +------+     |   |
+// |   | |Const |  |Const |     |   |
+// |   | +---+--+  +---+--+     |   |
+// |   |     :     +---v--+     |   |
+// |   |     ::::::> Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +------+         |
+// |                                |
+// |                                |
+// |               +------+         |
+// |     +------+  |Const |         |
+// |     |Const |  +---+--+         |
+// |     +---+--+      |            |
+// |         :     +---v--+         |
+// |         ::::::> Cast |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               +Stack*+         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+// *Not actually a Stack node, but does the equivalent.
+//
+TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int64"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2),
+       FunctionDefHelper::Const("ConstDep", 3)},
+      {{"ret0", "Cast:y:0"}});
+  AddCastNode("Cast", {"Const:output:0", "^ConstDep"}, DT_INT32, DT_INT64,
+              false, &inner);
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int64"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT64}, {{}},
+                      inner.signature().name(), &outer);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+
+  auto find_const = [vectorized](int val) -> const NodeDef* {
+    for (const auto& n : vectorized->node_def()) {
+      if (n.attr().at("value").tensor().int_val(0) == val) {
+        return &n;
+      }
+    }
+    return nullptr;
+  };
+
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  auto const_node = find_const(2);
+  auto const_dep_node = find_const(3);
+  auto cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
+  EXPECT_EQ(cast_node.input(0).substr(0, cast_node.input(0).find(':')),
+            const_node->name());
+  EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name()));
+}
+
 // TODO(rachelim): More test cases when we get around to implementing them:
 // [] A badly defined converter, e.g. doesn't produce nodes that have the
 //    same number of outputs/inputs as the nodes to be converted
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index 32ebc49c40..971a2d94b9 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -78,6 +78,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("Basic", lambda x: (x, x + 1), None),
+      ("Const", lambda x: 2, 12),
       ("Parallel", lambda x: (x, x + 1), 12),
       ("Gather", lambda x: array_ops.gather(x, 0), 12),
   )
@@ -207,6 +208,9 @@ class MapVectorizationBenchmark(test.Benchmark):
   def benchmarkAddConst(self):
     self._benchmark_helper(lambda *args: [x + 1 for x in args], "add_const")
 
+  def benchmarkReturnConst(self):
+    self._benchmark_helper(lambda *args: [constant_op.constant(2)], "ret_const")
+
   def benchmarkSelect(self):
     self._benchmark_helper(lambda *args: args[0], "select")
 
-- 
GitLab


From c2552cd33c05fa84f280e766e33ba01308ffbcb2 Mon Sep 17 00:00:00 2001
From: Youlong Cheng <ylc@google.com>
Date: Thu, 4 Oct 2018 09:42:13 -0700
Subject: [PATCH 1128/1357]  Skip numeric checking in BROADCAST mode.

PiperOrigin-RevId: 215752559
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 37 +++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 3aa5b6efa1..8d15c857f8 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -177,14 +177,29 @@ def _create_or_get_iterations_per_loop():
           use_resource=True)
 
 
-def _sync_variables_ops():
-  # Gets the variables back from TPU nodes. This means the variables updated
-  # by TPU will now be *synced* to host memory.
-  return [
-      array_ops.check_numerics(v.read_value(),
-                               'Gradient for %s is NaN' % v.name).op
-      for v in variables.trainable_variables()
-  ]
+def _sync_variables_ops(ctx):
+  """Create varriables synchronization ops.
+
+  Gets the variables back from TPU nodes. This means the variables updated
+  by TPU will now be *synced* to host memory.
+  In BROADCAST mode, we skip this sync since the variables are ususally too
+  big to transmit via RPC.
+
+  Args:
+    ctx: A `_InternalTPUContext` instance with mode.
+
+  Returns:
+    A list of sync ops.
+  """
+
+  if not ctx.is_input_broadcast_with_iterators():
+    return [
+        array_ops.check_numerics(v.read_value(),
+                                 'Gradient for %s is NaN' % v.name).op
+        for v in variables.trainable_variables()
+    ]
+  else:
+    return [control_flow_ops.no_op()]
 
 
 def _increase_eval_step_op(iterations_per_loop):
@@ -2567,7 +2582,7 @@ class TPUEstimator(estimator_lib.Estimator):
 
           summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss)
           with ops.control_dependencies([loss]):
-            update_ops = _sync_variables_ops()
+            update_ops = _sync_variables_ops(ctx)
 
           # Validate the TPU training graph to catch basic errors
           _validate_tpu_training_graph()
@@ -2600,7 +2615,7 @@ class TPUEstimator(estimator_lib.Estimator):
             # After TPU evaluation computation is done (the mean_loss tensor),
             # reads all variables back from TPU and updates the eval step
             # counter properly
-            internal_ops_to_run = _sync_variables_ops()
+            internal_ops_to_run = _sync_variables_ops(ctx)
             internal_ops_to_run.append(
                 _increase_eval_step_op(iterations_per_loop_var))
             with ops.control_dependencies(internal_ops_to_run):
@@ -2645,7 +2660,7 @@ class TPUEstimator(estimator_lib.Estimator):
          scaffold, prediction_hooks) = _predict_on_tpu_system(
              ctx, model_fn_wrapper, dequeue_fn)
         with ops.control_dependencies([dummy_predict_op]):
-          internal_ops_to_run = _sync_variables_ops()
+          internal_ops_to_run = _sync_variables_ops(ctx)
           with ops.control_dependencies(internal_ops_to_run):
             dummy_predict_op = control_flow_ops.no_op()
 
-- 
GitLab


From 5e1b45d0a8aa3f268745cdc683c26d9ebdd1ea8b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 10:10:58 -0700
Subject: [PATCH 1129/1357] Automated rollback of commit
 f22037abf5a6f4581f5fb6013f72f91747f22965

PiperOrigin-RevId: 215757701
---
 tensorflow/compiler/jit/xla_device_context.cc    | 15 ++++-----------
 tensorflow/compiler/jit/xla_device_context.h     |  3 +--
 .../xla/service/generic_transfer_manager.cc      |  2 +-
 .../xla/service/generic_transfer_manager.h       |  7 +++----
 .../compiler/xla/service/transfer_manager.h      | 16 +---------------
 5 files changed, 10 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index e083652978..af83c792e5 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -75,9 +75,8 @@ XlaTransferManager::XlaTransferManager(
   }
 }
 
-Status XlaTransferManager::TransferLiteralToDevice(const Tensor& host_tensor,
-                                                   Tensor* device_tensor,
-                                                   bool buffer_is_fresh) const {
+Status XlaTransferManager::TransferLiteralToDevice(
+    const Tensor& host_tensor, Tensor* device_tensor) const {
   xla::Shape xla_shape;
   TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(),
                                            host_tensor.shape(), &xla_shape));
@@ -98,11 +97,8 @@ Status XlaTransferManager::TransferLiteralToDevice(const Tensor& host_tensor,
     // synchronized.
     host_to_device_stream_->ThenWaitFor(stream_.get());
   }
-  xla::TransferManager::TransferToDeviceHint hint =
-      buffer_is_fresh ? xla::TransferManager::kBufferUndefined
-                      : xla::TransferManager::kNoHint;
   TF_RETURN_IF_ERROR(transfer_manager_->TransferLiteralToDeviceAsync(
-      host_to_device_stream_.get(), *literal, shaped_buffer, hint));
+      host_to_device_stream_.get(), *literal, shaped_buffer));
   if (UseMultipleStreams()) {
     auto event = std::make_shared<se::Event>(stream_->parent());
     TF_RET_CHECK(event->Init()) << "Event failed to initialize!";
@@ -169,7 +165,6 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
     return;
   }
   TensorShape shape = shape_or_status.ValueOrDie();
-  bool buffer_is_fresh = false;
   if (!xla_tensor->has_shaped_buffer()) {
     Status s =
         xla_tensor->AllocateShapedBuffer(device_tensor->dtype(), shape, client_,
@@ -178,7 +173,6 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
       done(s);
       return;
     }
-    buffer_is_fresh = true;
   }
 
   Status status;
@@ -189,8 +183,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
           "Tensor::CopyFrom failed when copying from CPU to XLA device"));
       return;
     }
-    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor,
-                                     buffer_is_fresh);
+    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor);
   } else {
     se::DeviceMemoryBase dev_dst_ptr =
         XlaTensor::DeviceMemoryFromTensor(*device_tensor);
diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h
index a4c0c296fc..df82421294 100644
--- a/tensorflow/compiler/jit/xla_device_context.h
+++ b/tensorflow/compiler/jit/xla_device_context.h
@@ -67,8 +67,7 @@ class XlaTransferManager {
 
  private:
   Status TransferLiteralToDevice(const Tensor& host_tensor,
-                                 Tensor* device_tensor,
-                                 bool buffer_is_fresh) const;
+                                 Tensor* device_tensor) const;
   void TransferLiteralFromDevice(Tensor* host_tensor,
                                  const Tensor& device_tensor,
                                  const StatusCallback& done) const;
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index f92fde7f46..bec02e14f9 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -98,7 +98,7 @@ Status GenericTransferManager::TransferLiteralFromDeviceInternal(
 
 Status GenericTransferManager::TransferLiteralToDeviceAsync(
     se::Stream* stream, const LiteralSlice& literal,
-    const ShapedBuffer& device_buffer, TransferToDeviceHint /*hint*/) {
+    const ShapedBuffer& device_buffer) {
   const Shape& shape = literal.shape();
   VLOG(2) << "transferring literal shape to device: "
           << ShapeUtil::HumanString(shape)
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index b1cba82b9f..86c8b1c145 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -45,10 +45,9 @@ class GenericTransferManager : public TransferManager {
                                  MutableBorrowingLiteral literal,
                                  std::function<void(Status)> done) override;
 
-  Status TransferLiteralToDeviceAsync(se::Stream* stream,
-                                      const LiteralSlice& literal,
-                                      const ShapedBuffer& device_buffer,
-                                      TransferToDeviceHint hint) override;
+  Status TransferLiteralToDeviceAsync(
+      se::Stream* stream, const LiteralSlice& literal,
+      const ShapedBuffer& device_buffer) override;
 
   Status TransferLiteralToInfeed(se::StreamExecutor* executor,
                                  const LiteralSlice& literal) override;
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index 9199e32d0f..f952e64af2 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -89,16 +89,6 @@ class TransferManager {
                                          const LiteralSlice& literal,
                                          const ShapedBuffer& device_buffer);
 
-  // Hint type given to TransferLiteralToDeviceAsync.
-  enum TransferToDeviceHint {
-    // No hint available.
-    kNoHint,
-
-    // The destination buffer is undefined on the device, meaning it can be
-    // transferred to eagerly rather than waiting for Stream ordering.
-    kBufferUndefined,
-  };
-
   // Transfers the given literal into the previously allocated device memory
   // represented by the given ShapedBuffer using the given executor. The shape
   // of the ShapedBuffer and DeviceShape(literal.shape()) must be compatible,
@@ -106,13 +96,9 @@ class TransferManager {
   //
   // This operation is performed asynchronously on the given stream. It returns
   // once the transfer is enqueued.
-  //
-  // The optional hint can allow implementations to optimize transfers. It is
-  // not mandatory for an implementation to obey the hint.
   virtual Status TransferLiteralToDeviceAsync(
       se::Stream* stream, const LiteralSlice& literal,
-      const ShapedBuffer& device_buffer,
-      TransferToDeviceHint hint = kNoHint) = 0;
+      const ShapedBuffer& device_buffer) = 0;
 
   // Convenience methods for transferring an array to or from the device at a
   // known address. This avoids having to construct a ShapedBuffer just to
-- 
GitLab


From 100714d9e5eb723525eb54142769f9bd8eec5edd Mon Sep 17 00:00:00 2001
From: Alan Chiao <alanchiao@google.com>
Date: Thu, 4 Oct 2018 10:11:56 -0700
Subject: [PATCH 1130/1357] Fix quantization util test to pass with defined
 behavior on 32-bit architectures.

PiperOrigin-RevId: 215757844
---
 .../contrib/lite/kernels/internal/quantization_util_test.cc     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc
index 14281f25c6..25ea72b886 100644
--- a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc
@@ -259,7 +259,7 @@ TEST(QuantizationUtilTest, IntegerFrExpVersusDouble) {
   EXPECT_EQ(double_shift, 1);
 
   result = IntegerFrExp(123.45, &shift);
-  EXPECT_NEAR(result, (0.964453 * (1L << 31)), 1000);
+  EXPECT_NEAR(result, (0.964453 * (1LL << 31)), 1000);
   EXPECT_EQ(shift, 7);
   double_result = std::frexp(123.45, &double_shift);
   EXPECT_NEAR(double_result, 0.964453, 1e-5);
-- 
GitLab


From 8622f05a62948d8966be8962a6a33e0a8b5a116d Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 4 Oct 2018 10:17:02 -0700
Subject: [PATCH 1131/1357] Don't CHECK-fail on malformed graphs in deadness
 analysis

Instead return a friendlier failed Status from the following two methods which
used to CHECK-fail before:  GetIncomingPreds, FindUniqueBackedge.

While at it, also rename GetIncomingPreds to GetInputPreds to be consistent with
the variable names.

PiperOrigin-RevId: 215758757
---
 tensorflow/compiler/jit/deadness_analysis.cc | 77 ++++++++++++++------
 1 file changed, 55 insertions(+), 22 deletions(-)

diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index e0b9932d80..b7ae7fbeb3 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
+#include "tensorflow/compiler/jit/xla_cluster_util.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/hash/hash.h"
@@ -579,7 +580,8 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis {
  private:
   enum class EdgeKind { kDataAndControl, kDataOnly, kControlOnly };
 
-  std::vector<Predicate*> GetIncomingPreds(Node* n, EdgeKind edge_kind);
+  Status GetInputPreds(Node* n, EdgeKind edge_kind,
+                       std::vector<Predicate*>* result);
 
   // Sets the predicate for output `output_idx` of `n` to `pred`.  Sets the i'th
   // bit of `should_revisit` if `pred` is different from the current predicate
@@ -625,9 +627,10 @@ TensorId InputEdgeToTensorId(const Edge* e) {
   return TensorId(e->src()->name(), e->src_output());
 }
 
-std::vector<Predicate*> DeadnessAnalysisImpl::GetIncomingPreds(
-    Node* n, DeadnessAnalysisImpl::EdgeKind edge_kind) {
-  std::vector<Predicate*> incoming_preds;
+Status DeadnessAnalysisImpl::GetInputPreds(
+    Node* n, DeadnessAnalysisImpl::EdgeKind edge_kind,
+    std::vector<Predicate*>* result) {
+  result->clear();
   for (const Edge* in_edge : n->in_edges()) {
     bool should_process =
         edge_kind == EdgeKind::kDataAndControl ||
@@ -636,17 +639,27 @@ std::vector<Predicate*> DeadnessAnalysisImpl::GetIncomingPreds(
 
     if (should_process) {
       auto it = predicate_map_.find(InputEdgeToTensorId(in_edge));
-      CHECK(it != predicate_map_.end()) << n->name();
-      incoming_preds.push_back(it->second);
+      if (it == predicate_map_.end()) {
+        GraphCycles graph_cycles;
+        TF_RETURN_IF_ERROR(CreateCycleDetectionGraph(&graph_, &graph_cycles));
+
+        // If we didn't return with an error above then the graph is probably
+        // fine and we have a bug in deadness analysis.
+        return errors::Internal("Could not find input ", in_edge->DebugString(),
+                                " to ", n->name(),
+                                " when visiting the graph in post-order.  Most "
+                                "likely indicates a bug in deadness analysis.");
+      }
+      result->push_back(it->second);
     }
   }
-  return incoming_preds;
+  return Status::OK();
 }
 
 Status DeadnessAnalysisImpl::HandleSwitch(Node* n,
                                           std::vector<bool>* should_revisit) {
-  std::vector<Predicate*> input_preds =
-      GetIncomingPreds(n, EdgeKind::kDataAndControl);
+  std::vector<Predicate*> input_preds;
+  TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds));
   const Edge* pred_edge;
   TF_RETURN_IF_ERROR(n->input_edge(1, &pred_edge));
   Predicate* true_switch = predicate_factory_.MakeSymbolPredicate(
@@ -675,17 +688,31 @@ Status DeadnessAnalysisImpl::HandleSwitch(Node* n,
 }
 
 namespace {
-const Edge* FindUniqueBackedge(Node* merge) {
+Status CreateMultipleNextIterationInputsError(Node* merge) {
+  std::vector<string> backedges;
+  for (const Edge* backedge : merge->in_edges()) {
+    if (backedge->src()->IsNextIteration()) {
+      backedges.push_back(absl::StrCat("  ", SummarizeNode(*backedge->src())));
+    }
+  }
+  return errors::InvalidArgument(
+      "Multiple NextIteration inputs to merge node ", SummarizeNode(*merge),
+      ": \n", absl::StrJoin(backedges, "\n"),
+      "\nMerge nodes can have at most one incoming NextIteration edge.");
+}
+
+Status FindUniqueBackedge(Node* merge, const Edge** result) {
+  *result = nullptr;
   CHECK(merge->IsMerge());
-  const Edge* result = nullptr;
   for (const Edge* e : merge->in_edges()) {
     if (e->src()->IsNextIteration()) {
-      CHECK_EQ(result, nullptr)
-          << "Multiple backedges to " << merge->DebugString();
-      result = e;
+      if (*result != nullptr) {
+        return CreateMultipleNextIterationInputsError(merge);
+      }
+      *result = e;
     }
   }
-  return result;
+  return Status::OK();
 }
 
 // If `backedge_predicate` is equal to `symbolic_predicate` & Step where Step
@@ -764,9 +791,12 @@ Status DeadnessAnalysisImpl::HandleMerge(Node* n,
       return Status::OK();
     }
 
+    std::vector<Predicate*> input_preds;
+    TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataOnly, &input_preds));
+
     // We're visiting this merge for the first time and it is a acyclic merge.
-    Predicate* input_data_pred = predicate_factory_.MakeOrPredicate(
-        GetIncomingPreds(n, EdgeKind::kDataOnly));
+    Predicate* input_data_pred =
+        predicate_factory_.MakeOrPredicate(input_preds);
     SetPredicate(n, {0, 1, Graph::kControlSlot}, input_data_pred,
                  should_revisit);
     return Status::OK();
@@ -777,7 +807,9 @@ Status DeadnessAnalysisImpl::HandleMerge(Node* n,
     // of an unvisited backedge.  Try to pattern match the predicate expression
     // for that backedge (which should be visited now) into an and recurrence
     // for the merge node.
-    if (const Edge* unique_backedge = FindUniqueBackedge(n)) {
+    const Edge* unique_backedge;
+    TF_RETURN_IF_ERROR(FindUniqueBackedge(n, &unique_backedge));
+    if (unique_backedge) {
       if (Predicate* step = DeduceStepPredicate(
               &predicate_factory_, it->second,
               predicate_map_[InputEdgeToTensorId(unique_backedge)])) {
@@ -808,8 +840,8 @@ Status DeadnessAnalysisImpl::HandleRecv(Node* n,
                                         std::vector<bool>* should_revisit) {
   // In addition to being alive or dead based on the inputs, a _Recv can also
   // acquire a dead signal from a _Send.
-  std::vector<Predicate*> input_preds =
-      GetIncomingPreds(n, EdgeKind::kDataAndControl);
+  std::vector<Predicate*> input_preds;
+  TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds));
   input_preds.push_back(predicate_factory_.MakeSymbolPredicate(
       TensorId(n->name(), 0), /*must_be_true=*/false));
   SetPredicate(n, {0, Graph::kControlSlot},
@@ -821,8 +853,9 @@ Status DeadnessAnalysisImpl::HandleRecv(Node* n,
 Status DeadnessAnalysisImpl::HandleGeneric(Node* n,
                                            std::vector<bool>* should_revisit) {
   // Generally nodes are alive iff all their inputs are alive.
-  Predicate* pred = predicate_factory_.MakeAndPredicate(
-      GetIncomingPreds(n, EdgeKind::kDataAndControl));
+  std::vector<Predicate*> input_preds;
+  TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds));
+  Predicate* pred = predicate_factory_.MakeAndPredicate(input_preds);
   for (int output_idx = 0; output_idx < n->num_outputs(); output_idx++) {
     SetPredicate(n, output_idx, pred, should_revisit);
   }
-- 
GitLab


From 8ac087482f7224273fb6697a66191b2661e86477 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Thu, 4 Oct 2018 10:27:57 -0700
Subject: [PATCH 1132/1357] Add tensorflow_estimator pip package to
 install_pip_packages.sh

We will need this for remote-build presubmits to pass.

PiperOrigin-RevId: 215760872
---
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 7f293e8604..329d05342a 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -124,6 +124,10 @@ pip3 install keras_preprocessing==1.0.5 --no-deps
 pip2 install --upgrade h5py==2.8.0
 pip3 install --upgrade h5py==2.8.0
 
+# Estimator
+pip2 install tensorflow_estimator --no-deps
+pip3 install tensorflow_estimator --no-deps
+
 # Install last working version of setuptools.
 pip2 install --upgrade setuptools==39.1.0
 pip3 install --upgrade setuptools==39.1.0
-- 
GitLab


From 419fff9de94ea9573f2e368fd6a68fdf54c59bab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 10:44:58 -0700
Subject: [PATCH 1133/1357] Implement LiteralBase::Slice for all primitive type

PiperOrigin-RevId: 215764305
---
 tensorflow/compiler/xla/literal.cc | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index deeb140b8f..177f39cc74 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -727,16 +727,34 @@ Literal LiteralBase::Slice(absl::Span<const int64> start_indices,
       ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions,
                                      LayoutUtil::MinorToMajor(shape()));
   switch (result_shape.element_type()) {
-    case F32:
-      return SliceInternal<float>(result_shape, start_indices);
+    case PRED:
+      return SliceInternal<bool>(result_shape, start_indices);
+    case U8:
+      return SliceInternal<uint8>(result_shape, start_indices);
+    case U16:
+      return SliceInternal<uint16>(result_shape, start_indices);
+    case U32:
+      return SliceInternal<uint32>(result_shape, start_indices);
+    case U64:
+      return SliceInternal<uint64>(result_shape, start_indices);
+    case S8:
+      return SliceInternal<int8>(result_shape, start_indices);
+    case S16:
+      return SliceInternal<int16>(result_shape, start_indices);
+    case S32:
+      return SliceInternal<int32>(result_shape, start_indices);
+    case S64:
+      return SliceInternal<int64>(result_shape, start_indices);
+    case F16:
+      return SliceInternal<half>(result_shape, start_indices);
     case BF16:
       return SliceInternal<bfloat16>(result_shape, start_indices);
+    case F32:
+      return SliceInternal<float>(result_shape, start_indices);
+    case F64:
+      return SliceInternal<double>(result_shape, start_indices);
     case C64:
       return SliceInternal<complex64>(result_shape, start_indices);
-    case S32:
-      return SliceInternal<int32>(result_shape, start_indices);
-    case U32:
-      return SliceInternal<uint32>(result_shape, start_indices);
     default:
       LOG(FATAL) << "not yet implemented: "
                  << PrimitiveType_Name(result_shape.element_type());
-- 
GitLab


From 5e9bd578802fcfff5de9729332eea4ae85c05c9e Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 4 Oct 2018 10:46:16 -0700
Subject: [PATCH 1134/1357] [tf.data] Fix C++ shape inference for
 `Dataset.concatenate()`.

Previously, we were returning an unknown shape in
`Dataset::output_shapes()` for the "most specific compatible shape"
between the two inputs. While this does not cause correctness problems
(since the unknown shape *is* compatible), we gain the ability to
raise errors earlier when more shape information is available.

PiperOrigin-RevId: 215764530
---
 tensorflow/core/kernels/data/concatenate_dataset_op.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/data/concatenate_dataset_op.cc b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
index a04f150e71..9607e9444c 100644
--- a/tensorflow/core/kernels/data/concatenate_dataset_op.cc
+++ b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
@@ -171,16 +171,16 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel {
 
     static PartialTensorShape MostSpecificCompatibleShape(
         const PartialTensorShape& ts1, const PartialTensorShape& ts2) {
-      PartialTensorShape output_tensorshape;
       if (ts1.dims() != ts2.dims() || ts1.unknown_rank() || ts2.unknown_rank())
-        return output_tensorshape;
+        return PartialTensorShape();
+      PartialTensorShape output_tensorshape({});
       auto dims1 = ts1.dim_sizes();
       auto dims2 = ts2.dim_sizes();
       for (int d = 0; d < ts1.dims(); d++) {
         if (dims1[d] == dims2[d])
-          output_tensorshape.Concatenate(dims1[d]);
+          output_tensorshape.AddDim(dims1[d]);
         else
-          output_tensorshape.Concatenate(-1);
+          output_tensorshape.AddDim(-1);
       }
       return output_tensorshape;
     }
-- 
GitLab


From e1a8f4b03df2ef84538c01788b6043eb723cd046 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:04:41 -0700
Subject: [PATCH 1135/1357] Automated rollback of commit
 8dc7bc7764150253c03a666eee84fc48f867d6a2

PiperOrigin-RevId: 215768310
---
 .../toco/graph_transformations/resolve_constant_binary.cc | 8 --------
 .../resolve_constant_concatenation.cc                     | 7 -------
 .../graph_transformations/resolve_constant_fake_quant.cc  | 7 -------
 .../toco/graph_transformations/resolve_constant_fill.cc   | 7 -------
 .../toco/graph_transformations/resolve_constant_gather.cc | 8 --------
 .../toco/graph_transformations/resolve_constant_pack.cc   | 8 --------
 .../resolve_constant_random_uniform.cc                    | 7 -------
 .../toco/graph_transformations/resolve_constant_range.cc  | 8 --------
 .../graph_transformations/resolve_constant_reshape.cc     | 7 -------
 .../toco/graph_transformations/resolve_constant_select.cc | 8 --------
 .../resolve_constant_shape_or_rank.cc                     | 8 --------
 .../toco/graph_transformations/resolve_constant_slice.cc  | 8 --------
 .../resolve_constant_strided_slice.cc                     | 8 --------
 .../toco/graph_transformations/resolve_constant_tile.cc   | 7 -------
 .../graph_transformations/resolve_constant_transpose.cc   | 8 --------
 .../toco/graph_transformations/resolve_constant_unary.cc  | 8 --------
 16 files changed, 122 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
index 3e57d3f467..f7e5aa6609 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
@@ -191,14 +191,6 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
 bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, binary_op->outputs[0])) {
-    return false;
-  }
-
   // Test for binary ops of types that we know how to resolve
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index c6c5035a51..d916ae0ddf 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -144,13 +144,6 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
   const auto* concat_op =
       static_cast<const ConcatenationOperator*>(concat_base_op);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, concat_op->outputs[0])) {
-    return false;
-  }
-
   for (const string& input_name : concat_op->inputs) {
     // We only expect constant unquantized arrays as input, otherwise we return.
     // We  also make sure the shapes of the input arrays are known and they are
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
index 3d797533c9..f5f2f77460 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
@@ -69,13 +69,6 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   const auto* fakequant_op =
       static_cast<const FakeQuantOperator*>(fakequant_base_op);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, fakequant_op->outputs[0])) {
-    return false;
-  }
-
   // Yield until the fakequant MinMax has been resolved.
   if (!fakequant_op->minmax) {
     return false;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
index 2cb1e64f3a..f6f95481b5 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
@@ -52,13 +52,6 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
index 4dfe203a25..36d7dad0ce 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
@@ -71,14 +71,6 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
index 6f44025dd4..e86616574d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
@@ -59,14 +59,6 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
index c9f2b95d09..88d06d7dc7 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
@@ -70,13 +70,6 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
index e347286dd4..1a0ba9e2bc 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
@@ -28,14 +28,6 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   auto* op = static_cast<RangeOperator*>(base_op);
 
   CHECK_EQ(op->inputs.size(), 3);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   const auto& start_array = model->GetArray(op->inputs[0]);
   if (!start_array.has_shape()) {
     // Yield until all input dims have been resolved.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
index bfdaa8aafd..a6f665b5f0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
@@ -33,13 +33,6 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
index 3a95d39cd4..e880a3f44d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
@@ -37,14 +37,6 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 3);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
index 452bef1f16..8a0e3e8995 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
@@ -27,14 +27,6 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
   }
 
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been resolved
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
index 58d6797e1c..b35c3e19c4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
@@ -96,14 +96,6 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   const SliceOperator* op = static_cast<const SliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
index e275447a0c..8853ed87e6 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
@@ -114,14 +114,6 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
       static_cast<const StridedSliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
index 378a38f14b..5cfa1a5582 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
@@ -105,13 +105,6 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
   }
   const auto* op = static_cast<const TensorFlowTileOperator*>(base_op);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
index 5d3f4a6240..fe15dfa06f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
@@ -111,14 +111,6 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index e35ed0898b..c698a9567a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -48,14 +48,6 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
 bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, unary_op->outputs[0])) {
-    return false;
-  }
-
   // Test for unary ops of types that we know how to resolve.
   switch (unary_op->type) {
     case OperatorType::kCast:
-- 
GitLab


From 6850dafeeaaa48efa748134688844bd079ef3949 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:09:52 -0700
Subject: [PATCH 1136/1357] collective_param_resolver_local.cc: delete
 DCHECK(!ir->out_mu.try_lock()); in a lambda

UNLOCK_FUNCTION(ir->out_mu) annotates that the lock is held on entry.
try_lock() should not be called.

PiperOrigin-RevId: 215769341
---
 .../core/common_runtime/collective_param_resolver_local.cc       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc
index 3b2dc6a050..7cb90de3c7 100644
--- a/tensorflow/core/common_runtime/collective_param_resolver_local.cc
+++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc
@@ -522,7 +522,6 @@ void CollectiveParamResolverLocal::CallInitInstanceSharedParams(
   InitInstanceSharedParams(
       gr, cp, ir,
       [this, ir, done](const Status& s) UNLOCK_FUNCTION(ir->out_mu) {
-        DCHECK(!ir->out_mu.try_lock());
         DCHECK(ir->out_mu_available);
         ir->status.Update(s);
         ir->out_mu.unlock();
-- 
GitLab


From c8d5054e8c12800f0c3db0e51f3d5902e04eaa37 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 4 Oct 2018 11:24:41 -0700
Subject: [PATCH 1137/1357] Roll forward change "Skip control flow
 functionalization if there is no Switch or Merge node.".

PiperOrigin-RevId: 215772272
---
 .../tf2xla/functionalize_control_flow.cc      | 129 ++++++++++++------
 .../core/common_runtime/constant_folding.cc   |  37 ++---
 .../core/common_runtime/constant_folding.h    |   4 +
 .../core/common_runtime/graph_optimizer.cc    |   5 +-
 .../core/common_runtime/graph_optimizer.h     |   5 +-
 5 files changed, 122 insertions(+), 58 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 36c6f5d316..28e09d7b79 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -79,7 +79,10 @@ Status FunctionalizeControlFlowForFunction(
     const string& func_name, const string& new_func_name,
     const protobuf::Map<string, tensorflow::AttrValue>& attrs,
     FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr,
-    std::map<string, string>* canonicalized_name_to_new_name) {
+    std::map<string, absl::optional<string>>* canonicalized_name_to_new_name,
+    bool* modified) {
+  *modified = false;
+
   // Convert the function to Graph.
   FunctionLibraryRuntime::Handle handle;
   TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle));
@@ -92,6 +95,19 @@ Status FunctionalizeControlFlowForFunction(
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
 
+  // Check if the graph has Switch or Merge node before optimizing the graph.
+  bool has_switch_or_merge = false;
+  for (Node* n : body->graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
+  // We cannot return here directly if the graph has no Switch/Merge.
+  // It might contain function call nodes, or If/While nodes with Switch/Merge
+  // in function body. We still need to rewrite those functions and modify
+  // corresponding nodes.
+
   // Call graph optimizer. The most important optimization we need is constant
   // folding, which will replace ops like Shape/BroadcastGradientArgs with
   // constant shape input. Without this optimization, those ops might become
@@ -129,6 +145,13 @@ Status FunctionalizeControlFlowForFunction(
         absl::StrCat("functionalize_control_flow_after_opt_", func_name),
         *optimized_graph, fld);
   }
+  // Some inlined functions might have Switch/Merge nodes.
+  for (Node* n : optimized_graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -151,10 +174,15 @@ Status FunctionalizeControlFlowForFunction(
           Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
+      bool function_modified;
       if (iter != canonicalized_name_to_new_name->end()) {
-        // If we already functionalized this function, skip functionalization
-        // but still rewrite the node.
-        new_name = iter->second;
+        // If we already processed this function, check if it was rewritten. If
+        // the function was rewritten, the entry will be non-empty. Otherwise
+        // the entry will be empty.
+        function_modified = iter->second.has_value();
+        if (function_modified) {
+          new_name = iter->second.value();
+        }
       } else {
         if (associated_function.type() ==
             AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
@@ -166,42 +194,62 @@ Status FunctionalizeControlFlowForFunction(
         }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
-            canonicalized_name_to_new_name));
-        (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+            canonicalized_name_to_new_name, &function_modified));
+        if (function_modified) {
+          // If the function was rewritten, add an non-empty entry. So later we
+          // know we have processed this function, and it was rewritten into
+          // another function.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+        } else {
+          // If the function was not rewritten, add an empty entry. So later
+          // we know we have processed this function, and it does not need to be
+          // rewritten.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt;
+        }
+      }
+      if (function_modified) {
+        *modified = true;
+
+        // Notice that if "n" is a function call, RewriteAssociatedFunction()
+        // will delete it and create a new node instead, making "n" an invalid
+        // pointer. That's fine because in that case, associated_functions will
+        // only have one member and the loop will only run once.
+        TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
+            optimized_graph.get(), n, fld, associated_function, new_name));
       }
-      // Notice that if "n" is a function call, RewriteAssociatedFunction() will
-      // delete it and create a new node instead, making "n" an invalid pointer.
-      // That's fine because in that case, associated_functions will only have
-      // one member and the loop will only run once.
-      TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  // Functionalize the function body.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-        *optimized_graph, fld);
-  }
-  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-        *optimized_graph, fld);
+  if (has_switch_or_merge) {
+    *modified = true;
+
+    // Functionalize the function body.
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
+          *optimized_graph, fld);
+    }
+    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
+          *optimized_graph, fld);
+    }
   }
-  FunctionDef functionalized_fdef;
-  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                        &functionalized_fdef));
 
-  // Add rewritten FunctionDef into library.
-  if (func_name == new_func_name) {
-    VLOG(2) << "Replacing function " << func_name;
-    TF_RETURN_IF_ERROR(
-        fld->ReplaceFunction(new_func_name, functionalized_fdef));
-  } else {
-    VLOG(2) << "Adding function " << new_func_name;
-    TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+  if (*modified) {
+    // Add rewritten FunctionDef into library.
+    FunctionDef functionalized_fdef;
+    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
+                                          &functionalized_fdef));
+    if (func_name == new_func_name) {
+      VLOG(2) << "Replacing function " << func_name;
+      TF_RETURN_IF_ERROR(
+          fld->ReplaceFunction(new_func_name, functionalized_fdef));
+    } else {
+      VLOG(2) << "Adding function " << new_func_name;
+      TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+    }
   }
 
   return ret_status;
@@ -227,7 +275,7 @@ Status FunctionalizeControlFlowPass::Run(
           {"TPUCompile", "function"},
           {"XlaLaunch", "function"},
       };
-  std::map<string, string> canonicalized_name_to_new_name;
+  std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
   for (Node* n : graph->nodes()) {
     auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string());
     if (it == kNodeTypeToFunctionAttrMapping->end()) {
@@ -242,12 +290,15 @@ Status FunctionalizeControlFlowPass::Run(
               << ". Corresponding function: " << func.name();
       string new_func_name = options.flib_def->UniqueFunctionName(
           absl::StrCat(func.name(), "_f15n_"));
+      bool modified;
       TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
           func.name(), new_func_name, func.attr(), options.flib_def, flr,
-          &canonicalized_name_to_new_name));
-      n->ClearAttr(func_attr);
-      func.set_name(new_func_name);
-      n->AddAttr(func_attr, func);
+          &canonicalized_name_to_new_name, &modified));
+      if (modified) {
+        n->ClearAttr(func_attr);
+        func.set_name(new_func_name);
+        n->AddAttr(func_attr, func);
+      }
     }
   }
 
diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 419867ff58..db137f1a19 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -466,7 +466,7 @@ Graph* GetConstantGraph(
 bool ReplaceTensorWithConstant(
     Graph* graph, Device* partition_device, NodeAndOutput tensor,
     const Tensor& constant, const gtl::FlatSet<Node*>& control_deps,
-    int64 max_constant_size_in_bytes,
+    int64 max_constant_size_in_bytes, bool disable_memory_output_type_check,
     const ConstantFoldNameGenerator& generate_new_name) {
   // Be conservative when replacing a tensor with a constant, when not
   // running on CPU.
@@ -535,21 +535,23 @@ bool ReplaceTensorWithConstant(
   if (!NodeBuilder(builder).Finalize(graph, &constant_node).ok()) {
     return false;
   }
-  if (partition_device && device_type != DEVICE_CPU) {
-    MemoryType original_output_memory_type;
-    if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
-                             &original_output_memory_type)
-             .ok()) {
-      return false;
-    }
-    MemoryType const_output_memory_type;
-    if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
-                             &const_output_memory_type)
-             .ok()) {
-      return false;
-    }
-    if (original_output_memory_type != const_output_memory_type) {
-      return false;
+  if (!disable_memory_output_type_check) {
+    if (partition_device && device_type != DEVICE_CPU) {
+      MemoryType original_output_memory_type;
+      if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
+                               &original_output_memory_type)
+               .ok()) {
+        return false;
+      }
+      MemoryType const_output_memory_type;
+      if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
+                               &const_output_memory_type)
+               .ok()) {
+        return false;
+      }
+      if (original_output_memory_type != const_output_memory_type) {
+        return false;
+      }
     }
   }
   for (auto edge : edges_to_remove) {
@@ -658,7 +660,8 @@ Status ConstantFold(const ConstantFoldingOptions& opts,
         constant_control_deps[tensors_to_replace[c].first];
     if (ReplaceTensorWithConstant(
             graph, partition_device, tensors_to_replace[c], outputs[c],
-            control_deps, opts.max_constant_size_in_bytes, generate_new_name)) {
+            control_deps, opts.max_constant_size_in_bytes,
+            opts.disable_memory_output_type_check, generate_new_name)) {
       ++num_nodes_replaced;
     }
   }
diff --git a/tensorflow/core/common_runtime/constant_folding.h b/tensorflow/core/common_runtime/constant_folding.h
index a9a84f761b..4c71b7bd27 100644
--- a/tensorflow/core/common_runtime/constant_folding.h
+++ b/tensorflow/core/common_runtime/constant_folding.h
@@ -45,6 +45,10 @@ struct ConstantFoldingOptions {
   // optimization.
   int64 max_constant_size_in_bytes = 10 * 1024 * 1024;
 
+  // If disable_memory_output_type_check is true, we will disable output memory
+  // type check for constant node replacement.
+  bool disable_memory_output_type_check = false;
+
   // A generator for the name suffix of constant folded nodes. A
   // default id generator that monotonically increases is used if nullptr is
   // passed.
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index 37a979a8f1..91194bc86f 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -39,7 +39,8 @@ void GraphOptimizer::Optimize(
     const std::unordered_map<string, std::vector<PartialTensorShape>>*
         shape_map,
     const std::function<bool(const Node*)>& cse_consider_fn,
-    const std::function<bool(const Node*)>& cf_consider_fn) {
+    const std::function<bool(const Node*)>& cf_consider_fn,
+    bool cf_disable_memory_output_type_check) {
   Graph* g = graph->get();
   DumpGraph("Initial", g);
 
@@ -64,6 +65,8 @@ void GraphOptimizer::Optimize(
       ConstantFoldingOptions cf_opts;
       cf_opts.shape_map = shape_map;
       cf_opts.consider = cf_consider_fn;
+      cf_opts.disable_memory_output_type_check =
+          cf_disable_memory_output_type_check;
       if (opts_.max_folded_constant_in_bytes() > 0) {
         cf_opts.max_constant_size_in_bytes =
             opts_.max_folded_constant_in_bytes();
diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h
index 789cc56942..8954e9612d 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.h
+++ b/tensorflow/core/common_runtime/graph_optimizer.h
@@ -47,13 +47,16 @@ class GraphOptimizer {
   // returns true will be considered for CSE.
   // If cf_consider_fn is not null then only nodes for which cf_consider_fn
   // returns true will be considered for CF.
+  // If cf_disable_memory_output_type_check is true, CF will discard output
+  // memory type check for constant node replacement.
   void Optimize(
       FunctionLibraryRuntime* runtime, Env* env, Device* device,
       std::unique_ptr<Graph>* graph,
       const std::unordered_map<string, std::vector<PartialTensorShape>>*
           shape_map,
       const std::function<bool(const Node*)>& cse_consider_fn = nullptr,
-      const std::function<bool(const Node*)>& cf_consider_fn = nullptr);
+      const std::function<bool(const Node*)>& cf_consider_fn = nullptr,
+      bool cf_disable_memory_output_type_check = false);
 
   const OptimizerOptions& options() { return opts_; }
 
-- 
GitLab


From 700c3325311e16be9bb4856cbf944d1871ff35c1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:30:52 -0700
Subject: [PATCH 1138/1357] Add "encoding" attribute to string substr op, which
 controls how each "character" is treated:   * BYTE: Position & length refer
 to bytes in the string.  (Default)   * UTF8: The string is interpreted as
 UTF-8 encoded Unicode code points, and position & length are treated relative
 to them.

RELNOTES: Add option to get substring using Unicode characters
PiperOrigin-RevId: 215773373
---
 .../api_def/base_api/api_def_Substr.pbtxt     |  10 +
 .../api_def/python_api/api_def_Substr.pbtxt   |   8 +-
 tensorflow/core/kernels/BUILD                 |   7 +-
 tensorflow/core/kernels/string_util.cc        |   4 -
 tensorflow/core/kernels/string_util.h         |  44 ++
 tensorflow/core/kernels/substr_op.cc          | 162 +++++-
 tensorflow/core/kernels/substr_op_test.cc     | 100 +++-
 tensorflow/core/ops/string_ops.cc             |   1 +
 .../python/kernel_tests/substr_op_test.py     | 503 ++++++++++++------
 tensorflow/python/ops/string_ops.py           |  16 +
 .../tools/api/golden/v1/tensorflow.pbtxt      |   2 +-
 .../api/golden/v1/tensorflow.strings.pbtxt    |   2 +-
 .../tools/api/golden/v2/tensorflow.pbtxt      |   2 +-
 .../api/golden/v2/tensorflow.strings.pbtxt    |   2 +-
 14 files changed, 655 insertions(+), 208 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt b/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
index 5246090ab3..fe0fcc9508 100644
--- a/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
@@ -16,6 +16,16 @@ END
     name: "len"
     description: <<END
 Scalar defining the number of characters to include in each substring
+END
+  }
+  attr {
+    name: "unit"
+    description: <<END
+The unit that is used to create the substring.  One of: `"BYTE"` (for
+defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
+encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
+`unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
+UTF-8.
 END
   }
   out_arg {
diff --git a/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt b/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt
index 4778d7927c..4fb9ee56e9 100644
--- a/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt
@@ -1,10 +1,4 @@
 op {
   graph_op_name: "Substr"
-  endpoint {
-    name: "strings.substr"
-  }
-  endpoint {
-    name: "substr"
-    deprecated: true
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 9439ab332c..3a920f26f3 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4458,7 +4458,12 @@ cc_library(
     name = "string_util",
     srcs = ["string_util.cc"],
     hdrs = ["string_util.h"],
-    deps = ["//tensorflow/core:lib"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "@icu//:common",
+    ],
 )
 
 STRING_DEPS = [
diff --git a/tensorflow/core/kernels/string_util.cc b/tensorflow/core/kernels/string_util.cc
index 3a9803a052..92c73220d8 100644
--- a/tensorflow/core/kernels/string_util.cc
+++ b/tensorflow/core/kernels/string_util.cc
@@ -16,10 +16,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/errors.h"
 
-namespace {
-inline bool IsTrailByte(char x) { return static_cast<signed char>(x) < -0x40; }
-}  // namespace
-
 namespace tensorflow {
 
 // Sets unit value based on str.
diff --git a/tensorflow/core/kernels/string_util.h b/tensorflow/core/kernels/string_util.h
index 390cf57702..d40e93ea33 100644
--- a/tensorflow/core/kernels/string_util.h
+++ b/tensorflow/core/kernels/string_util.h
@@ -30,6 +30,9 @@ enum class UnicodeEncoding { UTF8 };
 // TODO(edloper): Add support for: UTF32_CHAR, etc.
 enum class CharUnit { BYTE, UTF8_CHAR };
 
+// Whether or not the given byte is the trailing byte of a UTF-8/16/32 char.
+inline bool IsTrailByte(char x) { return static_cast<signed char>(x) < -0x40; }
+
 // Sets `encoding` based on `str`.
 Status ParseUnicodeEncoding(const string& str, UnicodeEncoding* encoding);
 
@@ -40,6 +43,47 @@ Status ParseCharUnit(const string& str, CharUnit* unit);
 // Result may be incorrect if the input string is not valid UTF-8.
 int32 UTF8StrLen(const string& string);
 
+// Get the next UTF8 character position starting at the given position and
+// skipping the given number of characters. Position is a byte offset, and
+// should never be `null`. The function return true if successful. However, if
+// the end of the string is reached before the requested characters, then the
+// position will point to the end of string and this function will return false.
+template <typename T>
+bool ForwardNUTF8CharPositions(const StringPiece in,
+                               const T num_utf8_chars_to_shift, T* pos) {
+  const size_t size = in.size();
+  T utf8_chars_counted = 0;
+  while (utf8_chars_counted < num_utf8_chars_to_shift && *pos < size) {
+    // move forward one utf-8 character
+    do {
+      ++*pos;
+    } while (IsTrailByte(in[*pos]) && *pos < size);
+    ++utf8_chars_counted;
+  }
+  return utf8_chars_counted == num_utf8_chars_to_shift;
+}
+
+// Get the previous UTF8 character position starting at the given position and
+// skipping the given number of characters. Position is a byte offset with a
+// positive value, relative to the beginning of the string, and should never be
+// `null`. The function return true if successful. However, if the beginning of
+// the string is reached before the requested character, then the position will
+// point to the beginning of the string and this function will return false.
+template <typename T>
+bool BackNUTF8CharPositions(const StringPiece in,
+                            const T num_utf8_chars_to_shift, T* pos) {
+  const size_t start = 0;
+  T utf8_chars_counted = 0;
+  while (utf8_chars_counted < num_utf8_chars_to_shift && (*pos > start)) {
+    // move back one utf-8 character
+    do {
+      --*pos;
+    } while (IsTrailByte(in[*pos]) && *pos > start);
+    ++utf8_chars_counted;
+  }
+  return utf8_chars_counted == num_utf8_chars_to_shift;
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc
index 07f1d6e767..93c427039d 100644
--- a/tensorflow/core/kernels/substr_op.cc
+++ b/tensorflow/core/kernels/substr_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/string_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
@@ -37,7 +38,11 @@ namespace tensorflow {
 template <typename T>
 class SubstrOp : public OpKernel {
  public:
-  using OpKernel::OpKernel;
+  explicit SubstrOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    string unit;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("unit", &unit));
+    OP_REQUIRES_OK(ctx, ParseCharUnit(unit, &unit_));
+  }
 
   void Compute(OpKernelContext* context) override {
     // Get inputs
@@ -69,11 +74,23 @@ class SubstrOp : public OpKernel {
             tensorflow::internal::SubtleMustCopy(len_tensor.scalar<T>()());
         for (size_t i = 0; i < input_tensor.NumElements(); ++i) {
           StringPiece in(input(i));
-          OP_REQUIRES(
-              context, FastBoundsCheck(std::abs(pos), in.size() + 1),
-              errors::InvalidArgument("pos ", pos, " out of range for string",
-                                      "b'", in, "' at index ", i));
-          StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+          T byte_pos = pos;
+          T byte_len = len;
+          switch (unit_) {
+            case CharUnit::UTF8_CHAR:
+              OP_REQUIRES(
+                  context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string at index ", i));
+              break;
+            case CharUnit::BYTE:
+              byte_pos = AdjustedPosIndex(byte_pos, in);
+              OP_REQUIRES(
+                  context, FastBoundsCheck(byte_pos, in.size() + 1),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string b'", in, "' at index ", i));
+          }
+          StringPiece sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       } else {
@@ -84,11 +101,23 @@ class SubstrOp : public OpKernel {
           StringPiece in(input(i));
           const T pos = tensorflow::internal::SubtleMustCopy(pos_flat(i));
           const T len = tensorflow::internal::SubtleMustCopy(len_flat(i));
-          OP_REQUIRES(
-              context, FastBoundsCheck(std::abs(pos), in.size() + 1),
-              errors::InvalidArgument("pos ", pos, " out of range for string",
-                                      "b'", in, "' at index ", i));
-          StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+          T byte_pos = pos;
+          T byte_len = len;
+          switch (unit_) {
+            case CharUnit::UTF8_CHAR:
+              OP_REQUIRES(
+                  context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string at index ", i));
+              break;
+            case CharUnit::BYTE:
+              byte_pos = AdjustedPosIndex(byte_pos, in);
+              OP_REQUIRES(
+                  context, FastBoundsCheck(byte_pos, in.size() + 1),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string b'", in, "' at index ", i));
+          }
+          StringPiece sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       }
@@ -151,12 +180,24 @@ class SubstrOp : public OpKernel {
             StringPiece in(input_bcast(i));
             const T pos = tensorflow::internal::SubtleMustCopy(pos_bcast(i));
             const T len = tensorflow::internal::SubtleMustCopy(len_bcast(i));
-            OP_REQUIRES(
-                context,
-                FastBoundsCheck(std::abs(pos), input_bcast(i).size() + 1),
-                errors::InvalidArgument("pos ", pos, " out of range for string",
-                                        "b'", in, "' at index ", i));
-            StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+            T byte_pos = pos;
+            T byte_len = len;
+            switch (unit_) {
+              case CharUnit::UTF8_CHAR:
+                OP_REQUIRES(
+                    context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                    errors::InvalidArgument("pos ", pos, " out of range for ",
+                                            "string at index ", i));
+                break;
+              case CharUnit::BYTE:
+                byte_pos = AdjustedPosIndex(byte_pos, in);
+                OP_REQUIRES(
+                    context,
+                    FastBoundsCheck(byte_pos, input_bcast(i).size() + 1),
+                    errors::InvalidArgument("pos ", pos, " out of range for ",
+                                            "string b'", in, "' at index ", i));
+            }
+            StringPiece sub_in = in.substr(byte_pos, byte_len);
             output(i).assign(sub_in.data(), sub_in.size());
           }
           break;
@@ -205,12 +246,24 @@ class SubstrOp : public OpKernel {
                   tensorflow::internal::SubtleMustCopy(pos_bcast(i, j));
               const T len =
                   tensorflow::internal::SubtleMustCopy(len_bcast(i, j));
-              OP_REQUIRES(
-                  context, FastBoundsCheck(std::abs(pos), in.size() + 1),
-                  errors::InvalidArgument("pos ", pos, " out of range for ",
-                                          "string b'", in, "' at index (", i,
-                                          ", ", j, ")"));
-              StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+              T byte_pos = pos;
+              T byte_len = len;
+              switch (unit_) {
+                case CharUnit::UTF8_CHAR:
+                  OP_REQUIRES(
+                      context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                      errors::InvalidArgument("pos ", pos, " out of range for ",
+                                              "string at index ", i));
+                  break;
+                case CharUnit::BYTE:
+                  byte_pos = AdjustedPosIndex(byte_pos, in);
+                  OP_REQUIRES(
+                      context, FastBoundsCheck(byte_pos, in.size() + 1),
+                      errors::InvalidArgument("pos ", pos, " out of range for ",
+                                              "string b'", in, "' at index (",
+                                              i, ", ", j, ")"));
+              }
+              StringPiece sub_in = in.substr(byte_pos, byte_len);
               output(i, j).assign(sub_in.data(), sub_in.size());
             }
           }
@@ -227,12 +280,73 @@ class SubstrOp : public OpKernel {
  private:
   // This adjusts the requested position. Note it does not perform any bound
   // checks.
-  T AdjustedPosIndex(const T pos_requested, const StringPiece s) {
+  static inline T AdjustedPosIndex(const T pos_requested, const StringPiece s) {
     if (pos_requested < 0) {
       return s.size() + pos_requested;
     }
     return pos_requested;
   }
+
+  // Return true if successful; otherwise, return false if the `pos` argument
+  // is out of range in the string.
+  static inline bool UpdatePosAndLenForUtf8(const StringPiece in, T* pos,
+                                            T* len) {
+    if (*pos >= 0) {
+      return UpdatePositivePosAndLenForUtf8(in, *pos, *len, pos, len);
+    } else {
+      return UpdateNegativePosAndLenForUtf8(in, *pos, *len, pos, len);
+    }
+  }
+
+  static bool UpdatePositivePosAndLenForUtf8(const StringPiece in, const T pos,
+                                             const T len, T* char_pos,
+                                             T* char_len) {
+    *char_pos = 0;
+    // Determine byte position of the substring start.
+    if (!ForwardNUTF8CharPositions(in, pos, char_pos)) {
+      return false;
+    }
+    // Determine position of the end of the substring.
+    // The length will be capped at the end of the string, and we ignore whether
+    // the string had enough characters to handle it or not.
+    *char_len = *char_pos;
+    ForwardNUTF8CharPositions(in, len, char_len);
+    // The length in bytes is the position end of the substring less the start.
+    *char_len = *char_len - *char_pos;
+    return true;
+  }
+
+  // This function expects a negative position relative to the end of the
+  // string, but will update the character position to a positive number
+  // relative to the beginning of the string.
+  static bool UpdateNegativePosAndLenForUtf8(const StringPiece in, const T pos,
+                                             const T len, T* char_pos,
+                                             T* char_len) {
+    // Initially treat the length as position of the end of the substring.
+    *char_len = in.size();
+    // This is the number of character to skip from the end of the string to
+    // arrive at the position where the substring should end.
+    T utf8_chars_to_skip = -pos - len;
+    if (utf8_chars_to_skip < 0) {
+      utf8_chars_to_skip = 0;
+    }
+    // Find the byte position where the substring should end using the computed
+    // number of characters to skip.
+    if (!BackNUTF8CharPositions(in, utf8_chars_to_skip, char_len)) {
+      return false;
+    }
+    // Next, determine where the substring should begin. The number of chars to
+    // skip is the requested position minus the chars we've previously skipped.
+    *char_pos = *char_len;
+    if (!BackNUTF8CharPositions(in, -pos - utf8_chars_to_skip, char_pos)) {
+      return false;
+    }
+    // The length in bytes is the position end of the substring less the start.
+    *char_len = *char_len - *char_pos;
+    return true;
+  }
+
+  CharUnit unit_ = CharUnit::BYTE;
 };
 
 #define REGISTER_SUBSTR(type)                                      \
diff --git a/tensorflow/core/kernels/substr_op_test.cc b/tensorflow/core/kernels/substr_op_test.cc
index 2e07050260..ea6b1ed500 100644
--- a/tensorflow/core/kernels/substr_op_test.cc
+++ b/tensorflow/core/kernels/substr_op_test.cc
@@ -42,7 +42,7 @@ limitations under the License.
 namespace tensorflow {
 
 // Test data from the TensorFlow README.md.
-const char* lines[] = {
+const char* ascii_lines[] = {
     "**TensorFlow** is an open source software library for numerical "
     "computation using data flow graphs.",
     "The graph nodes represent mathematical operations, while the graph edges "
@@ -64,17 +64,76 @@ const char* lines[] = {
     "backwards compatibility guarantee like C++, Go, Java, JavaScript and "
     "Swift."};
 
+const char* unicode_lines[] = {
+    "TensorFlow\xe6\x98\xaf\xe4\xb8\x80\xe4\xb8\xaa\xe4\xbd\xbf\xe7\x94\xa8\xe6"
+    "\x95\xb0\xe6\x8d\xae\xe6\xb5\x81\xe5\x9b\xbe\xe8\xbf\x9b\xe8\xa1\x8c\xe6"
+    "\x95\xb0\xe5\x80\xbc\xe8\xae\xa1\xe7\xae\x97\xe7\x9a\x84\xe5\xbc\x80\xe6"
+    "\xba\x90\xe8\xbd\xaf\xe4\xbb\xb6\xe5\xba\x93\xe3\x80\x82",
+    "\xe5\x9b\xbe\xe5\xbd\xa2\xe8\x8a\x82\xe7\x82\xb9\xe8\xa1\xa8\xe7\xa4\xba"
+    "\xe6\x95\xb0\xe5\xad\xa6\xe8\xbf\x90\xe7\xae\x97\xef\xbc\x8c\xe8\x80\x8c"
+    "\xe5\x9b\xbe\xe5\xbd\xa2\xe8\xbe\xb9\xe7\xbc\x98\xe8\xa1\xa8\xe7\xa4\xba"
+    "\xe5\x9c\xa8\xe5\xae\x83\xe4\xbb\xac\xe4\xb9\x8b\xe9\x97\xb4\xe6\xb5\x81"
+    "\xe5\x8a\xa8\xe7\x9a\x84\xe5\xa4\x9a\xe7\xbb\xb4\xe6\x95\xb0\xe6\x8d\xae"
+    "\xe9\x98\xb5\xe5\x88\x97\xef\xbc\x88\xe5\xbc\xa0\xe9\x87\x8f\xef\xbc\x89"
+    "\xe3\x80\x82",
+    "\xe8\xbf\x99\xe7\xa7\x8d\xe7\x81\xb5\xe6\xb4\xbb\xe7\x9a\x84\xe4\xbd\x93"
+    "\xe7\xb3\xbb\xe7\xbb\x93\xe6\x9e\x84\xe4\xbd\xbf\xe6\x82\xa8\xe5\x8f\xaf"
+    "\xe4\xbb\xa5\xe5\xb0\x86\xe8\xae\xa1\xe7\xae\x97\xe9\x83\xa8\xe7\xbd\xb2"
+    "\xe5\x88\xb0\xe6\xa1\x8c\xe9\x9d\xa2\xef\xbc\x8c\xe6\x9c\x8d\xe5\x8a\xa1"
+    "\xe5\x99\xa8\xe6\x88\x96\xe7\xa7\xbb\xe5\x8a\xa8\xe8\xae\xbe\xe5\xa4\x87"
+    "\xe4\xb8\xad\xe7\x9a\x84\xe4\xb8\x80\xe4\xb8\xaa\xe6\x88\x96\xe5\xa4\x9a"
+    "\xe4\xb8\xaa CPU\xe6\x88\x96GPU\xef\xbc\x8c\xe8\x80\x8c\xe6\x97\xa0\xe9"
+    "\x9c\x80\xe9\x87\x8d\xe5\x86\x99\xe4\xbb\xa3\xe7\xa0\x81\xe3\x80\x82",
+    "TensorFlow\xe8\xbf\x98\xe5\x8c\x85\xe6\x8b\xac[TensorBoard]\xef\xbc\x88"
+    "https://www.tensorflow.org/guide/summaries_and_tensorboard\xef\xbc\x89\xef"
+    "\xbc\x8c\xe8\xbf\x99\xe6\x98\xaf\xe4\xb8\x80\xe4\xb8\xaa\xe6\x95\xb0\xe6"
+    "\x8d\xae\xe5\x8f\xaf\xe8\xa7\x86\xe5\x8c\x96\xe5\xb7\xa5\xe5\x85\xb7\xe5"
+    "\x8c\x85\xe3\x80\x82",
+    "TensorFlow\xe6\x9c\x80\xe5\x88\x9d\xe6\x98\xaf\xe7\x94\xb1\xe7\xa0\x94\xe7"
+    "\xa9\xb6\xe4\xba\xba\xe5\x91\x98\xe5\x92\x8c\xe5\xb7\xa5\xe7\xa8\x8b\xe5"
+    "\xb8\x88\xe5\x9c\xa8Google\xe6\x9c\xba\xe5\x99\xa8\xe6\x99\xba\xe8\x83\xbd"
+    "\xe7\xa0\x94\xe7\xa9\xb6\xe7\xbb\x84\xe7\xbb\x87\xe7\x9a\x84Google Brain"
+    "\xe5\x9b\xa2\xe9\x98\x9f\xe5\xbc\x80\xe5\x8f\x91\xe7\x9a\x84\xef\xbc\x8c"
+    "\xe7\x9b\xae\xe7\x9a\x84\xe6\x98\xaf\xe8\xbf\x9b\xe8\xa1\x8c\xe6\x9c\xba"
+    "\xe5\x99\xa8\xe5\xad\xa6\xe4\xb9\xa0\xe5\x92\x8c\xe6\xb7\xb1\xe5\xba\xa6"
+    "\xe7\xa5\x9e\xe7\xbb\x8f\xe7\xbd\x91\xe7\xbb\x9c\xe7\xa0\x94\xe7\xa9\xb6"
+    "\xe3\x80\x82",
+    "\xe8\xaf\xa5\xe7\xb3\xbb\xe7\xbb\x9f\xe8\xb6\xb3\xe4\xbb\xa5\xe9\x80\x82"
+    "\xe7\x94\xa8\xe4\xba\x8e\xe5\x90\x84\xe7\xa7\x8d\xe5\x85\xb6\xe4\xbb\x96"
+    "\xe9\xa2\x86\xe5\x9f\x9f\xe4\xb9\x9f\xe6\x98\xaf\xe5\xa6\x82\xe6\xad\xa4"
+    "\xe3\x80\x82",
+    "TensorFlow\xe6\x8f\x90\xe4\xbe\x9b\xe7\xa8\xb3\xe5\xae\x9a\xe7\x9a\x84"
+    "Python API\xe5\x92\x8c C API\xef\xbc\x8c\xe4\xbb\xa5\xe5\x8f\x8a\xe6\xb2"
+    "\xa1\xe6\x9c\x89 API\xe5\x90\x91\xe5\x90\x8e\xe5\x85\xbc\xe5\xae\xb9\xe6"
+    "\x80\xa7\xe4\xbf\x9d\xe8\xaf\x81\xef\xbc\x8c\xe5\xa6\x82 C ++\xef\xbc\x8c"
+    "Go\xef\xbc\x8cJava\xef\xbc\x8cJavaScript\xe5\x92\x8cSwift\xe3\x80\x82",
+};
+
+const char* const kByteUnit = "BYTE";
+const char* const kUTF8Unit = "UTF8_CHAR";
+
 Tensor GetTestTensor(int batch) {
-  const int sz = TF_ARRAYSIZE(lines);
+  const int sz = TF_ARRAYSIZE(ascii_lines);
+  Tensor t(DT_STRING, {batch});
+  auto s = t.flat<string>();
+  for (int i = 0; i < batch; ++i) {
+    s(i) = ascii_lines[i % sz];
+  }
+  return t;
+}
+
+Tensor GetTestUTF8Tensor(int batch) {
+  const int sz = TF_ARRAYSIZE(unicode_lines);
   Tensor t(DT_STRING, {batch});
   auto s = t.flat<string>();
   for (int i = 0; i < batch; ++i) {
-    s(i) = lines[i % sz];
+    s(i) = unicode_lines[i % sz];
   }
   return t;
 }
 
-Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len) {
+Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len,
+                        const char* const unit) {
   Graph* g = new Graph(OpRegistry::Global());
   Tensor position(DT_INT32, TensorShape({}));
   position.flat<int32>().setConstant(pos);
@@ -85,21 +144,46 @@ Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len) {
                   .Input(test::graph::Constant(g, input))
                   .Input(test::graph::Constant(g, position))
                   .Input(test::graph::Constant(g, length))
+                  .Attr("unit", unit)
                   .Finalize(g, nullptr /* node */));
   return g;
 }
 
-void BM_Substr(int iters, int batch_size) {
+void BM_SubstrByte(int iters, int batch_size) {
   testing::StopTiming();
   testing::ItemsProcessed(static_cast<int64>(iters));
   testing::UseRealTime();
   Tensor input = GetTestTensor(batch_size);
-  Graph* g = SetupSubstrGraph(input, 3, 30);
+  Graph* g = SetupSubstrGraph(input, 3, 30, kByteUnit);
+  testing::StartTiming();
+  test::Benchmark("cpu", g).Run(iters);
+}
+
+void BM_SubstrUTF8(int iters, int batch_size) {
+  testing::StopTiming();
+  testing::ItemsProcessed(static_cast<int64>(iters));
+  testing::UseRealTime();
+  Tensor input = GetTestUTF8Tensor(batch_size);
+  Graph* g = SetupSubstrGraph(input, 3, 30, kUTF8Unit);
   testing::StartTiming();
   test::Benchmark("cpu", g).Run(iters);
 }
 
-BENCHMARK(BM_Substr)->Arg(1)->Arg(8)->Arg(16)->Arg(32)->Arg(64)->Arg(128)->Arg(
-    256);
+BENCHMARK(BM_SubstrByte)
+    ->Arg(1)
+    ->Arg(8)
+    ->Arg(16)
+    ->Arg(32)
+    ->Arg(64)
+    ->Arg(128)
+    ->Arg(256);
+BENCHMARK(BM_SubstrUTF8)
+    ->Arg(1)
+    ->Arg(8)
+    ->Arg(16)
+    ->Arg(32)
+    ->Arg(64)
+    ->Arg(128)
+    ->Arg(256);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index b4fbde54d9..94d71a4113 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -223,6 +223,7 @@ REGISTER_OP("Substr")
     .Input("len: T")
     .Output("output: string")
     .Attr("T: {int32, int64}")
+    .Attr("unit: {'BYTE', 'UTF8_CHAR'} = 'BYTE'")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle pos_shape = c->input(1);
       ShapeHandle len_shape = c->input(2);
diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py
index cd3fe14883..37aa624b07 100644
--- a/tensorflow/python/kernel_tests/substr_op_test.py
+++ b/tensorflow/python/kernel_tests/substr_op_test.py
@@ -28,270 +28,448 @@ from tensorflow.python.platform import test
 
 class SubstrOpTest(test.TestCase, parameterized.TestCase):
 
-  def _testScalarString(self, dtype):
-    test_string = b"Hello"
-    position = np.array(1, dtype)
+  @parameterized.parameters(
+      (np.int32, 1, "BYTE"),
+      (np.int64, 1, "BYTE"),
+      (np.int32, -4, "BYTE"),
+      (np.int64, -4, "BYTE"),
+      (np.int32, 1, "UTF8_CHAR"),
+      (np.int64, 1, "UTF8_CHAR"),
+      (np.int32, -4, "UTF8_CHAR"),
+      (np.int64, -4, "UTF8_CHAR"),
+  )
+  def testScalarString(self, dtype, pos, unit):
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"He\xc3\xc3\U0001f604".encode("utf-8"),
+    }[unit]
+    expected_value = {
+        "BYTE": b"ell",
+        "UTF8_CHAR": u"e\xc3\xc3".encode("utf-8"),
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(3, dtype)
-    expected_value = b"ell"
-
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-    # Negative position.
-    test_string = b"Hello"
-    position = np.array(-4, dtype)
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testScalarString_EdgeCases(self, dtype, unit):
+    # Empty string
+    test_string = {
+        "BYTE": b"",
+        "UTF8_CHAR": u"".encode("utf-8"),
+    }[unit]
+    expected_value = b""
+    position = np.array(0, dtype)
     length = np.array(3, dtype)
-    expected_value = b"ell"
-
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-    # Position is equal to the length of string.
-    test_string = b""
+    # Full string
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
     position = np.array(0, dtype)
-    length = np.array(2, dtype)
-    expected_value = b""
-
-    substr_op = string_ops.substr(test_string, position, length)
+    length = np.array(5, dtype)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
-      self.assertAllEqual(substr, expected_value)
-
-    # Negative position magnitude is equal to the length of string.
-    test_string = b"yo"
-    position = np.array(-2, dtype)
-    length = np.array(1, dtype)
-    expected_value = b"y"
-
-    substr_op = string_ops.substr(test_string, position, length)
+      self.assertAllEqual(substr, test_string)
+
+    # Full string (Negative)
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    position = np.array(-5, dtype)
+    length = np.array(5, dtype)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
-      self.assertAllEqual(substr, expected_value)
-
-  def _testVectorStrings(self, dtype):
-    test_string = [b"Hello", b"World"]
-    position = np.array(1, dtype)
-    length = np.array(3, dtype)
-    expected_value = [b"ell", b"orl"]
-
-    substr_op = string_ops.substr(test_string, position, length)
+      self.assertAllEqual(substr, test_string)
+
+    # Length is larger in magnitude than a negative position
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    expected_string = {
+        "BYTE": b"ello",
+        "UTF8_CHAR": u"\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    position = np.array(-4, dtype)
+    length = np.array(5, dtype)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
-      self.assertAllEqual(substr, expected_value)
-
-    # Negative position.
-    test_string = [b"Hello", b"World"]
-    position = np.array(-4, dtype)
+      self.assertAllEqual(substr, expected_string)
+
+  @parameterized.parameters(
+      (np.int32, 1, "BYTE"),
+      (np.int64, 1, "BYTE"),
+      (np.int32, -4, "BYTE"),
+      (np.int64, -4, "BYTE"),
+      (np.int32, 1, "UTF8_CHAR"),
+      (np.int64, 1, "UTF8_CHAR"),
+      (np.int32, -4, "UTF8_CHAR"),
+      (np.int64, -4, "UTF8_CHAR"),
+  )
+  def testVectorStrings(self, dtype, pos, unit):
+    test_string = {
+        "BYTE": [b"Hello", b"World"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"H\xc3llo",
+                                                  u"W\U0001f604rld"]],
+    }[unit]
+    expected_value = {
+        "BYTE": [b"ell", b"orl"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"\xc3ll", u"\U0001f604rl"]],
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(3, dtype)
-    expected_value = [b"ell", b"orl"]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testMatrixStrings(self, dtype):
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testMatrixStrings(self, dtype, unit):
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"He\xc3\xc3o",
+                                                   u"W\U0001f604rld",
+                                                   u"d\xfcd\xea"]]],
+    }[unit]
     position = np.array(1, dtype)
     length = np.array(4, dtype)
-    expected_value = [[b"en", b"leve", b"welv"], [b"hirt", b"ourt", b"ifte"],
-                      [b"ixte", b"even", b"ight"]]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"en", b"leve", b"welv"], [b"hirt", b"ourt", b"ifte"],
+                 [b"ixte", b"even", b"ight"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d227n",
+                                                   u"\u053c\u025bv\u025b",
+                                                   u"w\u0c1dlv"]],
+                      [x.encode("utf-8") for x in [u"e\xc3\xc3o",
+                                                   u"\U0001f604rld",
+                                                   u"\xfcd\xea"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-    # Negative position
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
-    position = np.array(-2, dtype)
+    position = np.array(-3, dtype)
     length = np.array(2, dtype)
-    expected_value = [[b"en", b"en", b"ve"], [b"en", b"en", b"en"],
-                      [b"en", b"en", b"en"]]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"te", b"ve", b"lv"], [b"ee", b"ee", b"ee"],
+                 [b"ee", b"ee", b"ee"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227",
+                                                   u"v\u025b", u"lv"]],
+                      [x.encode("utf-8") for x in [u"\xc3\xc3", u"rl",
+                                                   u"\xfcd"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testElementWisePosLen(self, dtype):
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testElementWisePosLen(self, dtype, unit):
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"He\xc3\xc3o",
+                                                   u"W\U0001f604rld",
+                                                   u"d\xfcd\xea"]],
+                      [x.encode("utf-8") for x in [u"sixt\xea\xean",
+                                                   u"se\U00010299enteen",
+                                                   u"ei\U0001e920h\x86een"]]],
+    }[unit]
     position = np.array([[1, -4, 3], [1, 2, -4], [-5, 2, 3]], dtype)
     length = np.array([[2, 2, 4], [4, 3, 2], [5, 5, 5]], dtype)
-    expected_value = [[b"en", b"ev", b"lve"], [b"hirt", b"urt", b"te"],
-                      [b"xteen", b"vente", b"hteen"]]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"en", b"ev", b"lve"], [b"hirt", b"urt", b"te"],
+                 [b"xteen", b"vente", b"hteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d227n",
+                                                   u"\u025bv",
+                                                   u"lv\u025b"]],
+                      [x.encode("utf-8") for x in [u"e\xc3\xc3o",
+                                                   u"rld",
+                                                   u"d\xfc"]],
+                      [x.encode("utf-8") for x in [u"xt\xea\xean",
+                                                   u"\U00010299ente",
+                                                   u"h\x86een"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testBroadcast(self, dtype):
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testBroadcast(self, dtype, unit):
     # Broadcast pos/len onto input string
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"],
-                   [b"nineteen", b"twenty", b"twentyone"]]
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"],
+                 [b"nineteen", b"twenty", b"twentyone"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"th\xcdrt\xea\xean",
+                                                   u"f\U0001f604urt\xea\xean",
+                                                   u"f\xcd\ua09ctee\ua0e4"]],
+                      [x.encode("utf-8") for x in [u"s\xcdxt\xea\xean",
+                                                   u"se\U00010299enteen",
+                                                   u"ei\U0001e920h\x86een"]],
+                      [x.encode("utf-8") for x in [u"nineteen",
+                                                   u"twenty",
+                                                   u"twentyone"]]],
+    }[unit]
     position = np.array([1, -4, 3], dtype)
     length = np.array([1, 2, 3], dtype)
-    expected_value = [[b"e", b"ev", b"lve"], [b"h", b"te", b"tee"],
-                      [b"i", b"te", b"hte"], [b"i", b"en", b"nty"]]
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"e", b"ev", b"lve"], [b"h", b"te", b"tee"],
+                 [b"i", b"te", b"hte"], [b"i", b"en", b"nty"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d227",
+                                                   u"\u025bv", u"lv\u025b"]],
+                      [x.encode("utf-8") for x in [u"h", u"t\xea", u"tee"]],
+                      [x.encode("utf-8") for x in [u"\xcd", u"te", u"h\x86e"]],
+                      [x.encode("utf-8") for x in [u"i", u"en", u"nty"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
     # Broadcast input string onto pos/len
-    test_string = [b"thirteen", b"fourteen", b"fifteen"]
+    test_string = {
+        "BYTE": [b"thirteen", b"fourteen", b"fifteen"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"th\xcdrt\xea\xean",
+                                                  u"f\U0001f604urt\xea\xean",
+                                                  u"f\xcd\ua09ctee\ua0e4"]],
+    }[unit]
     position = np.array([[1, -2, 3], [-3, 2, 1], [5, 5, -5]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
-    expected_value = [[b"hir", b"en", b"t"], [b"e", b"ur", b"ift"],
-                      [b"ee", b"ee", b"ft"]]
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"hir", b"en", b"t"], [b"e", b"ur", b"ift"],
+                 [b"ee", b"ee", b"ft"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"h\xcdr", u"\xean", u"t"]],
+                      [x.encode("utf-8") for x in [u"\xea", u"ur",
+                                                   u"\xcd\ua09ct"]],
+                      [x.encode("utf-8") for x in [u"\xea\xea", u"\xea\xea",
+                                                   u"\ua09ct"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
     # Test 1D broadcast
-    test_string = b"thirteen"
-    position = np.array([1, -5, 7], dtype)
+    test_string = {
+        "BYTE": b"thirteen",
+        "UTF8_CHAR": u"th\xcdrt\xea\xean".encode("utf-8"),
+    }[unit]
+    position = np.array([1, -4, 7], dtype)
     length = np.array([3, 2, 1], dtype)
-    expected_value = [b"hir", b"rt", b"n"]
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [b"hir", b"te", b"n"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"h\xcdr", u"t\xea", u"n"]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testBadBroadcast(self, dtype):
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testBadBroadcast(self, dtype, unit):
     test_string = [[b"ten", b"eleven", b"twelve"],
                    [b"thirteen", b"fourteen", b"fifteen"],
                    [b"sixteen", b"seventeen", b"eighteen"]]
     position = np.array([1, 2, -3, 4], dtype)
     length = np.array([1, 2, 3, 4], dtype)
     with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
-
-  def _testOutOfRangeError(self, dtype):
+      string_ops.substr(test_string, position, length, unit=unit)
+
+  @parameterized.parameters(
+      (np.int32, 6, "BYTE"),
+      (np.int64, 6, "BYTE"),
+      (np.int32, -6, "BYTE"),
+      (np.int64, -6, "BYTE"),
+      (np.int32, 6, "UTF8_CHAR"),
+      (np.int64, 6, "UTF8_CHAR"),
+      (np.int32, -6, "UTF8_CHAR"),
+      (np.int64, -6, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_Scalar(self, dtype, pos, unit):
     # Scalar/Scalar
-    test_string = b"Hello"
-    position = np.array(7, dtype)
-    length = np.array(3, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
-    with self.cached_session():
-      with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
-    # Scalar/Scalar (with negative)
-    test_string = b"Hello"
-    position = np.array(-7, dtype)
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(3, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, 4, "BYTE"),
+      (np.int64, 4, "BYTE"),
+      (np.int32, -4, "BYTE"),
+      (np.int64, -4, "BYTE"),
+      (np.int32, 4, "UTF8_CHAR"),
+      (np.int64, 4, "UTF8_CHAR"),
+      (np.int32, -4, "UTF8_CHAR"),
+      (np.int64, -4, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_VectorScalar(self, dtype, pos, unit):
     # Vector/Scalar
-    test_string = [b"good", b"good", b"bad", b"good"]
-    position = np.array(4, dtype)
-    length = np.array(1, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
-    with self.cached_session():
-      with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
-    # Vector/Scalar (with negative)
-    test_string = [b"good", b"good", b"bad", b"good"]
-    position = np.array(-4, dtype)
+    test_string = {
+        "BYTE": [b"good", b"good", b"bad", b"good"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"b\xc3d",
+                                                  u"g\xc3\xc3d"]],
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(1, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_MatrixMatrix(self, dtype, unit):
     # Matrix/Matrix
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"],
-                   [b"good", b"good", b"good"]]
+    test_string = {
+        "BYTE": [[b"good", b"good", b"good"], [b"good", b"good", b"bad"],
+                 [b"good", b"good", b"good"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"g\xc3\xc3d"]],
+                      [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"b\xc3d"]],
+                      [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"g\xc3\xc3d"]]],
+    }[unit]
     position = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 3]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
+        substr_op.eval()
 
     # Matrix/Matrix (with negative)
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"],
-                   [b"good", b"good", b"good"]]
     position = np.array([[1, 2, -3], [1, 2, -4], [1, 2, -3]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_Broadcast(self, dtype, unit):
     # Broadcast
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"]]
+    test_string = {
+        "BYTE": [[b"good", b"good", b"good"], [b"good", b"good", b"bad"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"g\xc3\xc3d"]],
+                      [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"b\xc3d"]]],
+    }[unit]
     position = np.array([1, 2, 4], dtype)
     length = np.array([1, 2, 3], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
+        substr_op.eval()
 
     # Broadcast (with negative)
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"]]
     position = np.array([-1, -2, -4], dtype)
     length = np.array([1, 2, 3], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
-  def _testMismatchPosLenShapes(self, dtype):
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testMismatchPosLenShapes(self, dtype, unit):
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"th\xcdrt\xea\xean",
+                                                   u"f\U0001f604urt\xea\xean",
+                                                   u"f\xcd\ua09ctee\ua0e4"]],
+                      [x.encode("utf-8") for x in [u"s\xcdxt\xea\xean",
+                                                   u"se\U00010299enteen",
+                                                   u"ei\U0001e920h\x86een"]]],
+    }[unit]
     position = np.array([[1, 2, 3]], dtype)
     length = np.array([2, 3, 4], dtype)
     # Should fail: position/length have different rank
     with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
+      string_ops.substr(test_string, position, length)
 
     position = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]], dtype)
     length = np.array([[2, 3, 4]], dtype)
     # Should fail: position/length have different dimensionality
     with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
-
-    # Negative position.
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
-    position = np.array([[-1, -2, -3]], dtype)
-    length = np.array([1, 2, 3], dtype)
-    # Should fail: position/length have different rank
-    with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
-
-  @parameterized.parameters(np.int32, np.int64)
-  def testAll(self, dtype):
-    self._testScalarString(dtype)
-    self._testVectorStrings(dtype)
-    self._testMatrixStrings(dtype)
-    self._testElementWisePosLen(dtype)
-    self._testBroadcast(dtype)
-    self._testBadBroadcast(dtype)
-    self._testOutOfRangeError(dtype)
-    self._testMismatchPosLenShapes(dtype)
+      string_ops.substr(test_string, position, length)
 
   def testWrongDtype(self):
     with self.cached_session():
@@ -300,6 +478,11 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
       with self.assertRaises(TypeError):
         string_ops.substr(b"test", 3, 1.0)
 
+  def testInvalidUnit(self):
+    with self.cached_session():
+      with self.assertRaises(ValueError):
+        string_ops.substr(b"test", 3, 1, unit="UTF8")
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 0812f901a2..f26388efea 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -347,6 +347,22 @@ def string_length(input, name=None, unit="BYTE"):
 string_length.__doc__ = gen_string_ops.string_length.__doc__
 
 
+@tf_export("substr")
+@deprecation.deprecated(None, "Use `tf.strings.substr` instead of `tf.substr`.")
+def substr_deprecated(input, pos, len, name=None, unit="BYTE"):
+  return substr(input, pos, len, name=name, unit=unit)
+
+substr_deprecated.__doc__ = gen_string_ops.substr.__doc__
+
+
+@tf_export("strings.substr")
+def substr(input, pos, len, name=None, unit="BYTE"):
+  return gen_string_ops.substr(input, pos, len, unit=unit, name=name)
+
+
+substr.__doc__ = gen_string_ops.substr.__doc__
+
+
 ops.NotDifferentiable("RegexReplace")
 ops.NotDifferentiable("StringToHashBucket")
 ops.NotDifferentiable("StringToHashBucketFast")
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index c1cc7322f0..247dfcc1ca 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -2094,7 +2094,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "subtract"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index ebdaf57231..5ba48e7f57 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -34,7 +34,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "to_hash_bucket"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 571abc3b19..978afcf985 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1934,7 +1934,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "subtract"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index ebdaf57231..5ba48e7f57 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -34,7 +34,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "to_hash_bucket"
-- 
GitLab


From 31619b408551907030dc25d8270f8997a0d9e6aa Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Thu, 4 Oct 2018 11:34:55 -0700
Subject: [PATCH 1139/1357] Add xla library into contrib_py

PiperOrigin-RevId: 215774158
---
 tensorflow/contrib/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index fbe0573d5d..fa06d351d4 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -29,6 +29,7 @@ py_library(
         "//tensorflow/contrib/cluster_resolver:cluster_resolver_py",
         "//tensorflow/contrib/coder:coder_py",
         "//tensorflow/contrib/compiler:compiler_py",
+        "//tensorflow/contrib/compiler:xla",
         "//tensorflow/contrib/autograph",
         "//tensorflow/contrib/constrained_optimization",
         "//tensorflow/contrib/copy_graph:copy_graph_py",
-- 
GitLab


From 2390b48b11efda60a0f68a683c94af9612a5306f Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 4 Oct 2018 11:54:24 -0700
Subject: [PATCH 1140/1357] Add a separator between shape and dtype in cache
 key encoding.

It was possible that we could mix shapes and types (T111 could mean a tensor of dtype 1 and shape (1, 1) or a tensor of dtype 11 and shape (1)).

PiperOrigin-RevId: 215777629
---
 tensorflow/python/eager/function_test.py  | 44 +++++++++++++++++++++--
 tensorflow/python/eager/pywrap_tfe_src.cc | 34 +++++++++---------
 2 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 9ce367a837..a2cfb4b476 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1255,6 +1255,44 @@ class FunctionTest(test.TestCase):
     defined(Foo())
     self.assertEqual(len(defined._function_cache), 2)
 
+  def testCacheTensorShapeDtypeCollision(self):
+
+    def func(t):
+      return t + t
+
+    defined = function.defun(func)
+    t = constant_op.constant([[1.0]], dtype=dtypes.complex64)
+    defined(t)
+    self.assertEqual(len(defined._function_cache), 1)
+
+    t = constant_op.constant([1.0], dtype=dtypes.complex128)
+    defined(t)
+    self.assertEqual(len(defined._function_cache), 2)
+
+  def testCacheTensorUnknownShapesCollision(self):
+
+    def func(t):
+      return t + t
+
+    with context.graph_mode(), self.cached_session():
+      defined = function.defun(func)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=None)
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 1)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=[None])
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 2)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=[None, None])
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 3)
+
+      t = constant_op.constant(1.0, dtype=dtypes.float32)
+      defined(t)
+      self.assertEqual(len(defined._function_cache), 4)
+
   def testPythonFunctionWithDefaultArgs(self):
 
     def func(foo, bar=1, baz=2):
@@ -1271,17 +1309,17 @@ class FunctionTest(test.TestCase):
       return tuple(key[0] for key in defined._function_cache)
 
     # `True` corresponds to the fact that we're executing eagerly
-    self.assertIn(('tRRR', (0, 1, 20)), cache_keys())
+    self.assertIn(('URRR', (0, 1, 20)), cache_keys())
 
     defined(1)  # bar=1, baz=2
-    self.assertIn(('tRRR', (1, 1, 2)), cache_keys())
+    self.assertIn(('URRR', (1, 1, 2)), cache_keys())
 
     # This matches the previous call.
     defined(foo=1)
     self.assertEqual(len(defined._function_cache), 2)
 
     defined(1, 2, 3)
-    self.assertIn(('tRRR', (1, 2, 3)), cache_keys())
+    self.assertIn(('URRR', (1, 2, 3)), cache_keys())
 
     # This matches the previous call.
     defined(1, bar=2, baz=3)
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index ae1e12f9c3..6193f40ce8 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -2747,11 +2747,15 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs,
 }
 
 namespace {
-
-tensorflow::int64 GetPyNoneHash() {
-  tensorflow::int64 py_none_hash = PyObject_Hash(Py_None);
-  return py_none_hash;
-}
+const char kTensor[] = "T";
+const char kIndexedSlices[] = "I";
+const char kList[] = "L";
+const char kTuple[] = "U";
+const char kDict[] = "D";
+const char kRaw[] = "R";
+const char kShape[] = "s";
+const char kDType[] = "d";
+const char kNone[] = "n";
 
 struct EncodeResult {
   string str;
@@ -2784,8 +2788,10 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
     TFE_TensorHandle* t = EagerTensor_Handle(arg);
     tensorflow::TensorShape tensor_shape;
     TF_RETURN_IF_ERROR(t->handle->Shape(&tensor_shape));
-    absl::StrAppend(&result->str, t->handle->dtype);
 
+    absl::StrAppend(&result->str, kDType, t->handle->dtype);
+
+    absl::StrAppend(&result->str, kShape);
     for (tensorflow::int64 dim_size : tensor_shape.dim_sizes()) {
       absl::StrAppend(&result->str, dim_size);
     }
@@ -2812,7 +2818,7 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   tensorflow::DataType dtype =
       static_cast<tensorflow::DataType>(MakeInt(dtype_enum.get()));
 
-  absl::StrAppend(&result->str, dtype);
+  absl::StrAppend(&result->str, kDType, dtype);
   static char _shape_tuple[] = "_shape_tuple";
   tensorflow::Safe_PyObjectPtr shape_tuple(
       PyObject_CallMethod(arg, _shape_tuple, nullptr));
@@ -2824,10 +2830,11 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
 
   if (shape_tuple.get() == Py_None) {
     // Unknown shape, encode that directly.
-    absl::StrAppend(&result->str, GetPyNoneHash());
+    absl::StrAppend(&result->str, kNone);
     return tensorflow::Status::OK();
   }
 
+  absl::StrAppend(&result->str, kShape);
   tensorflow::Safe_PyObjectPtr shape_seq(PySequence_Fast(
       shape_tuple.get(), "shape_tuple didn't return a sequence"));
 
@@ -2835,7 +2842,7 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   for (int i = 0; i < len; ++i) {
     PyObject* item = PySequence_Fast_GET_ITEM(shape_seq.get(), i);
     if (item == Py_None) {
-      absl::StrAppend(&result->str, GetPyNoneHash());
+      absl::StrAppend(&result->str, kNone);
     } else {
       absl::StrAppend(&result->str, MakeInt(item));
     }
@@ -2844,13 +2851,6 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   return tensorflow::Status::OK();
 }
 
-const char kTensor[] = "T";
-const char kIndexedSlices[] = "I";
-const char kList[] = "L";
-const char kTuple[] = "t";
-const char kDict[] = "D";
-const char kRaw[] = "R";
-
 tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result);
 
 // This function doesn't set the type of sequence before
@@ -2864,7 +2864,7 @@ tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
   for (int i = 0; i < len; ++i) {
     PyObject* item = PySequence_Fast_GET_ITEM(arg_seq.get(), i);
     if (item == Py_None) {
-      absl::StrAppend(&result->str, GetPyNoneHash());
+      absl::StrAppend(&result->str, kNone);
     } else {
       TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(item, result));
     }
-- 
GitLab


From b82c4dad705bffac6d14a189605c9ece89f8c17b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:55:48 -0700
Subject: [PATCH 1141/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 215777837

---
 tensorflow/go/op/wrappers.go | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index b4d4db3e4d..a7bbb80c82 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -29094,6 +29094,17 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source
 	return op.Output(0)
 }
 
+// SubstrAttr is an optional argument to Substr.
+type SubstrAttr func(optionalAttr)
+
+// SubstrUnit sets the optional unit attribute to value.
+// If not specified, defaults to "BYTE"
+func SubstrUnit(value string) SubstrAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
 // Return substrings from `Tensor` of strings.
 //
 // For each string in the input `Tensor`, creates a substring starting at index
@@ -29178,15 +29189,20 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source
 //	len: Scalar defining the number of characters to include in each substring
 //
 // Returns Tensor of substrings
-func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output) (output tf.Output) {
+func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
 		Type: "Substr",
 		Input: []tf.Input{
 			input, pos, len,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
-- 
GitLab


From 2667ed3bf01e7153f466b27c450fc2b662c00bdd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:59:37 -0700
Subject: [PATCH 1142/1357] Makes sure Keras Layer's `__call__` is always used
 in Eager.

Currently if a Layer is invoked with the Functional API in Eager, `__call__` is only used
during setup, and thereafter `call` is used internally. This limits the ability
to add pre/post processing steps to `call` in Eager in the future.
Additionally, the Subclassed Model API already always uses `__call__` in Eager.

PiperOrigin-RevId: 215778408
---
 tensorflow/python/keras/engine/network.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 8d34006967..918488bd7a 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1028,7 +1028,10 @@ class Network(base_layer.Layer):
                 output_tensors, output_masks = layer._call_and_compute_mask(
                     computed_tensor, **kwargs)
               else:
-                output_tensors = layer.call(computed_tensor, **kwargs)
+                if context.executing_eagerly():
+                  output_tensors = layer(computed_tensor, **kwargs)
+                else:
+                  output_tensors = layer.call(computed_tensor, **kwargs)
                 if hasattr(layer, 'compute_mask'):
                   output_masks = layer.compute_mask(computed_tensor,
                                                     computed_mask)
@@ -1049,7 +1052,10 @@ class Network(base_layer.Layer):
                 output_tensors, output_masks = layer._call_and_compute_mask(
                     computed_tensors, **kwargs)
               else:
-                output_tensors = layer.call(computed_tensors, **kwargs)
+                if context.executing_eagerly():
+                  output_tensors = layer(computed_tensors, **kwargs)
+                else:
+                  output_tensors = layer.call(computed_tensors, **kwargs)
                 if hasattr(layer, 'compute_mask'):
                   output_masks = layer.compute_mask(computed_tensors,
                                                     computed_masks)
-- 
GitLab


From 5bdd0f7c2807ed413cfc60319f1e75b1e6a4a5b5 Mon Sep 17 00:00:00 2001
From: Paul Donnelly <pauldonnelly@google.com>
Date: Thu, 4 Oct 2018 12:12:39 -0700
Subject: [PATCH 1143/1357] Remove obsolete TODO.

PiperOrigin-RevId: 215780734
---
 tensorflow/core/kernels/dequantize_op.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index 42fbf95cd3..28940e0849 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -96,8 +96,6 @@ class DequantizeOp : public OpKernel {
             output);
       }
     } else if (mode_ == QUANTIZE_MODE_SCALED) {
-      // TODO(pauldonnelly): Update QuantizeAndDequantizeV2 and
-      // QuantizeAndDequantizeV3 to match this SCALED mode again.
       const float scale_factor =
           std::numeric_limits<T>::min() == 0
               ? (max_range / std::numeric_limits<T>::max())
-- 
GitLab


From 900d115135656229e3667025f925eb92687dce18 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 12:29:50 -0700
Subject: [PATCH 1144/1357] [XLA] Move FusionQueue class declaration into
 separate header

PiperOrigin-RevId: 215783391
---
 tensorflow/compiler/xla/service/BUILD         |  9 ++++
 .../compiler/xla/service/fusion_queue.h       | 53 +++++++++++++++++++
 .../xla/service/instruction_fusion.cc         |  1 +
 .../compiler/xla/service/instruction_fusion.h | 28 +---------
 4 files changed, 64 insertions(+), 27 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/fusion_queue.h

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index f329a27e14..2f8bab0614 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1323,11 +1323,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "fusion_queue",
+    hdrs = ["fusion_queue.h"],
+    deps = [
+        ":hlo",
+    ],
+)
+
 cc_library(
     name = "instruction_fusion",
     srcs = ["instruction_fusion.cc"],
     hdrs = ["instruction_fusion.h"],
     deps = [
+        ":fusion_queue",
         ":hlo",
         ":hlo_pass",
         "//tensorflow/compiler/xla:util",
diff --git a/tensorflow/compiler/xla/service/fusion_queue.h b/tensorflow/compiler/xla/service/fusion_queue.h
new file mode 100644
index 0000000000..1208a7dda8
--- /dev/null
+++ b/tensorflow/compiler/xla/service/fusion_queue.h
@@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_
+
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+
+namespace xla {
+
+// A queue interface that allows implementations to choose fusion candidates in
+// custom order.
+class FusionQueue {
+ public:
+  FusionQueue() = default;
+  virtual ~FusionQueue() = default;
+
+  // Dequeues the next fusion candidates: a consumer and the list of producers
+  // as operand indices.
+  virtual std::pair<HloInstruction*, std::vector<int64>>
+  DequeueNextInstructionAndOperandsToFuseInOrder() = 0;
+
+  // A callback passed to the queue implementation right before the producer is
+  // fused into the consumer.
+  virtual void PreFusion(HloInstruction* producer, HloInstruction* consumer) {}
+
+  // A callback passed to the queue implementation right after the fusion is
+  // created. Note that original_producer could have been destroyed.
+  virtual void OnFusingInstruction(HloInstruction* fusion,
+                                   HloInstruction* original_producer,
+                                   HloInstruction* original_consumer) {}
+
+  // A callback passed to the queue implementation to notify the removal of an
+  // instruction.
+  virtual void RemoveInstruction(HloInstruction* instruction) = 0;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 5a99c40df4..69a4c160ee 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
+#include "tensorflow/compiler/xla/service/fusion_queue.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index da2032f6c7..f14c667520 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -17,6 +17,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INSTRUCTION_FUSION_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_INSTRUCTION_FUSION_H_
 
+#include "tensorflow/compiler/xla/service/fusion_queue.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
@@ -25,33 +26,6 @@ limitations under the License.
 
 namespace xla {
 
-// A queue interface that allows implementations to choose fusion candidates in
-// custom order.
-class FusionQueue {
- public:
-  FusionQueue() = default;
-  virtual ~FusionQueue() = default;
-
-  // Dequeues the next fusion candidates: a consumer and the list of producers
-  // as operand indices.
-  virtual std::pair<HloInstruction*, std::vector<int64>>
-  DequeueNextInstructionAndOperandsToFuseInOrder() = 0;
-
-  // A callback passed to the queue implementation right before the producer is
-  // fused into the consumer.
-  virtual void PreFusion(HloInstruction* producer, HloInstruction* consumer) {}
-
-  // A callback passed to the queue implementation right after the fusion is
-  // created. Note that original_producer could have been destroyed.
-  virtual void OnFusingInstruction(HloInstruction* fusion,
-                                   HloInstruction* original_producer,
-                                   HloInstruction* original_consumer) {}
-
-  // A callback passed to the queue implementation to notify the removal of an
-  // instruction.
-  virtual void RemoveInstruction(HloInstruction* instruction) = 0;
-};
-
 // HLO pass which performs instruction fusion. Instructions are fused
 // "vertically", meaning producing instructions are fused into their consumers
 // with the intent that the loops which compute their values will be fused in
-- 
GitLab


From 2c75da86ffdb9d04b2b94ce89891f17a8656da22 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 4 Oct 2018 12:41:23 -0700
Subject: [PATCH 1145/1357] [tf.data] Clean up tests for
 `tf.data.experimental`.

This change splits up large test files into smaller ones, and re-enables tests that were disabled for obsolete reasons.

PiperOrigin-RevId: 215785396
---
 .../python/data/experimental/benchmarks/BUILD |  25 +
 .../map_benchmark.py}                         | 114 ---
 .../data/experimental/kernel_tests/BUILD      | 545 ++++++------
 .../kernel_tests/batch_dataset_op_test.py     | 686 ---------------
 .../bucket_by_sequence_length_test.py         | 322 +++++++
 .../kernel_tests/bucketing_test.py            | 824 ------------------
 ...ing_ops_test.py => copy_to_device_test.py} | 417 +--------
 .../experimental/kernel_tests/counter_test.py |  51 ++
 ...dataset_op_test.py => csv_dataset_test.py} |   4 +-
 .../dataset_serialization_test_base.py        | 692 ---------------
 .../dense_to_sparse_batch_test.py             | 124 +++
 ...t_op_test.py => enumerate_dataset_test.py} |  26 +-
 .../function_buffering_resource_test.py       | 247 ++++++
 .../kernel_tests/group_by_reducer_test.py     | 199 +++++
 .../kernel_tests/group_by_window_test.py      | 367 ++++++++
 .../kernel_tests/ignore_errors_test.py        | 115 +++
 .../make_batched_features_dataset_test.py     | 239 +++++
 ...t_ops_test.py => make_csv_dataset_test.py} | 425 +--------
 .../make_tf_record_dataset_test.py            | 243 ++++++
 .../kernel_tests/map_and_batch_test.py        | 337 +++++++
 ...ps_test.py => override_threadpool_test.py} |   6 +-
 ...op_test.py => parallel_interleave_test.py} |   4 +-
 ..._test.py => parse_example_dataset_test.py} |   4 +-
 .../kernel_tests/prefetch_to_device_test.py   | 234 +++++
 .../reader_dataset_ops_test_base.py           |   4 +-
 ...ple_test.py => rejection_resample_test.py} |   4 +-
 ...p_test.py => restructured_dataset_test.py} |   4 +-
 .../{scan_dataset_op_test.py => scan_test.py} |   4 +-
 .../kernel_tests/serialization/BUILD          |  22 +-
 .../checkpoint_input_pipeline_hook_test.py}   |   0
 ...arse_example_dataset_serialization_test.py |   2 +-
 .../sql_dataset_serialization_test.py         |   4 +-
 .../serialization_integration_test.py         |  85 --
 ..._op_test.py => shuffle_and_repeat_test.py} |   2 +-
 ...dataset_op_test.py => sql_dataset_test.py} |   6 +-
 ..._test_base.py => sql_dataset_test_base.py} |   3 +-
 .../kernel_tests/stats_dataset_ops_test.py    |   2 +-
 ...r_ops_test.py => tf_record_writer_test.py} |   2 +-
 .../experimental/kernel_tests/unbatch_test.py | 300 +++++++
 ...ique_dataset_op_test.py => unique_test.py} |   4 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  31 +-
 41 files changed, 3172 insertions(+), 3557 deletions(-)
 create mode 100644 tensorflow/python/data/experimental/benchmarks/BUILD
 rename tensorflow/python/data/experimental/{kernel_tests/map_dataset_op_test.py => benchmarks/map_benchmark.py} (71%)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{prefetching_ops_test.py => copy_to_device_test.py} (56%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/counter_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{csv_dataset_op_test.py => csv_dataset_test.py} (99%)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{range_dataset_op_test.py => enumerate_dataset_test.py} (68%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{reader_dataset_ops_test.py => make_csv_dataset_test.py} (57%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{threadpool_dataset_ops_test.py => override_threadpool_test.py} (94%)
 rename tensorflow/python/data/experimental/kernel_tests/{interleave_dataset_op_test.py => parallel_interleave_test.py} (99%)
 rename tensorflow/python/data/experimental/kernel_tests/{parsing_ops_test.py => parse_example_dataset_test.py} (99%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{resample_test.py => rejection_resample_test.py} (97%)
 rename tensorflow/python/data/experimental/kernel_tests/{dataset_constructor_op_test.py => restructured_dataset_test.py} (95%)
 rename tensorflow/python/data/experimental/kernel_tests/{scan_dataset_op_test.py => scan_test.py} (98%)
 rename tensorflow/python/data/experimental/kernel_tests/{iterator_ops_test.py => serialization/checkpoint_input_pipeline_hook_test.py} (100%)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{shuffle_dataset_op_test.py => shuffle_and_repeat_test.py} (98%)
 rename tensorflow/python/data/experimental/kernel_tests/{sql_dataset_op_test.py => sql_dataset_test.py} (99%)
 rename tensorflow/python/data/experimental/kernel_tests/{sql_dataset_op_test_base.py => sql_dataset_test_base.py} (98%)
 rename tensorflow/python/data/experimental/kernel_tests/{writer_ops_test.py => tf_record_writer_test.py} (98%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{unique_dataset_op_test.py => unique_test.py} (96%)

diff --git a/tensorflow/python/data/experimental/benchmarks/BUILD b/tensorflow/python/data/experimental/benchmarks/BUILD
new file mode 100644
index 0000000000..b9398aebe7
--- /dev/null
+++ b/tensorflow/python/data/experimental/benchmarks/BUILD
@@ -0,0 +1,25 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_test(
+    name = "map_benchmark",
+    size = "medium",
+    srcs = ["map_benchmark.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/experimental/benchmarks/map_benchmark.py
similarity index 71%
rename from tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
rename to tensorflow/python/data/experimental/benchmarks/map_benchmark.py
index 2f0bd1456b..ad253cffa5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/benchmarks/map_benchmark.py
@@ -19,7 +19,6 @@ from __future__ import print_function
 
 import hashlib
 import itertools
-import os
 import time
 
 import numpy as np
@@ -27,128 +26,15 @@ import numpy as np
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import batching
-from tensorflow.python.data.experimental.ops import error_ops
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
-from tensorflow.python.util import compat
 
 _NUMPY_RANDOM_SEED = 42
 
 
-class MapDatasetTest(test_base.DatasetTestBase):
-
-  def testMapIgnoreError(self):
-    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
-
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.check_numerics(x, "message")).apply(
-            error_ops.ignore_errors()))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testParallelMapIgnoreError(self):
-    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
-
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components).map(
-            lambda x: array_ops.check_numerics(x, "message"),
-            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testReadFileIgnoreError(self):
-
-    def write_string_to_file(value, filename):
-      with open(filename, "w") as f:
-        f.write(value)
-
-    filenames = [
-        os.path.join(self.get_temp_dir(), "file_%d.txt" % i) for i in range(5)
-    ]
-    for filename in filenames:
-      write_string_to_file(filename, filename)
-
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(filenames).map(
-            io_ops.read_file,
-            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # All of the files are present.
-      sess.run(init_op)
-      for filename in filenames:
-        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Delete one of the files.
-      os.remove(filenames[0])
-
-      # Attempting to read filenames[0] will fail, but ignore_errors()
-      # will catch the error.
-      sess.run(init_op)
-      for filename in filenames[1:]:
-        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testCaptureResourceInMapFn(self):
-
-    def _build_ds(iterator):
-
-      def _map_fn(x):
-        get_next = iterator.get_next()
-        return x * get_next
-
-      return dataset_ops.Dataset.range(10).map(_map_fn)
-
-    def _build_graph():
-      captured_iterator = dataset_ops.Dataset.range(
-          10).make_initializable_iterator()
-      ds = _build_ds(captured_iterator)
-      iterator = ds.make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      return captured_iterator.initializer, init_op, get_next
-
-    with ops.Graph().as_default() as g:
-      captured_init_op, init_op, get_next = _build_graph()
-      with self.session(graph=g) as sess:
-        sess.run(captured_init_op)
-        sess.run(init_op)
-        for i in range(10):
-          self.assertEquals(i * i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-
 class MapDatasetBenchmark(test.Benchmark):
 
   # The purpose of this benchmark is to compare the performance of chaining vs
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index f56127f3ef..4eef9580ad 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -8,75 +8,62 @@ load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "py_test")
 
 py_test(
-    name = "batch_dataset_op_test",
+    name = "bucket_by_sequence_length_test",
     size = "medium",
-    srcs = ["batch_dataset_op_test.py"],
+    srcs = ["bucket_by_sequence_length_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",  # (b/79552534)
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:session",
         "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
         "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
     ],
 )
 
+cuda_py_test(
+    name = "copy_to_device_test",
+    size = "small",
+    srcs = ["copy_to_device_test.py"],
+    additional_deps = [
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python/compat:compat",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    tags = ["no_windows_gpu"],
+)
+
 py_test(
-    name = "bucketing_test",
-    size = "medium",
-    srcs = ["bucketing_test.py"],
+    name = "counter_test",
+    size = "small",
+    srcs = ["counter_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/experimental/ops:grouping",
+        "//tensorflow/python/data/experimental/ops:counter",
         "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "csv_dataset_op_test",
+    name = "csv_dataset_test",
     size = "medium",
-    srcs = ["csv_dataset_op_test.py"],
+    srcs = ["csv_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -97,25 +84,18 @@ py_test(
 )
 
 py_test(
-    name = "dataset_constructor_op_test",
-    size = "medium",
-    srcs = ["dataset_constructor_op_test.py"],
+    name = "dense_to_sparse_batch_test",
+    srcs = ["dense_to_sparse_batch_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "nomac",  # b/62040583
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
         "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
     ],
 )
 
@@ -124,11 +104,6 @@ py_test(
     size = "medium",
     srcs = ["directed_interleave_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -140,15 +115,68 @@ py_test(
     ],
 )
 
+py_test(
+    name = "enumerate_dataset_test",
+    size = "small",
+    srcs = ["enumerate_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:enumerate_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_test(
+    name = "filter_dataset_op_test",
+    size = "medium",
+    srcs = ["filter_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "function_buffering_resource_test",
+    size = "small",
+    srcs = ["function_buffering_resource_test.py"],
+    additional_deps = [
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    tags = ["no_windows_gpu"],
+)
+
 py_test(
     name = "get_single_element_test",
     size = "small",
     srcs = ["get_single_element_test.py"],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -165,19 +193,20 @@ py_test(
 )
 
 py_test(
-    name = "indexed_dataset_ops_test",
-    srcs = ["indexed_dataset_ops_test.py"],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    name = "group_by_reducer_test",
+    size = "medium",
+    srcs = ["group_by_reducer_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/experimental/ops:indexed_dataset_ops",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -185,107 +214,134 @@ py_test(
 )
 
 py_test(
-    name = "interleave_dataset_op_test",
+    name = "group_by_window_test",
     size = "medium",
-    srcs = ["interleave_dataset_op_test.py"],
+    srcs = ["group_by_window_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "notap",
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "@six_archive//:six",
+        "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "iterator_ops_test",
-    size = "small",
-    srcs = ["iterator_ops_test.py"],
+    name = "ignore_errors_test",
+    srcs = ["ignore_errors_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:error_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
     ],
+)
+
+py_test(
+    name = "indexed_dataset_ops_test",
+    srcs = ["indexed_dataset_ops_test.py"],
     deps = [
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/experimental/ops:iterator_ops",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/experimental/ops:indexed_dataset_ops",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/estimator:estimator_py",
+        "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "map_dataset_op_test",
+    name = "make_batched_features_dataset_test",
     size = "medium",
-    srcs = ["map_dataset_op_test.py"],
+    srcs = ["make_batched_features_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "noasan",  # times out
-        "optonly",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
     ],
+)
+
+py_test(
+    name = "make_csv_dataset_test",
+    size = "medium",
+    srcs = ["make_csv_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:batching",
-        "//tensorflow/python/data/experimental/ops:error_ops",
-        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/experimental/ops:readers",
         "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
         "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "filter_dataset_op_test",
+    name = "make_tf_record_dataset_test",
     size = "medium",
-    srcs = ["filter_dataset_op_test.py"],
+    srcs = ["make_tf_record_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/util:nest",
     ],
+)
+
+py_test(
+    name = "map_and_batch_test",
+    size = "medium",
+    srcs = ["map_and_batch_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
@@ -294,11 +350,7 @@ py_test(
     size = "small",
     srcs = ["map_defun_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
@@ -317,15 +369,56 @@ py_test(
 )
 
 py_test(
-    name = "parsing_ops_test",
+    name = "override_threadpool_test",
     size = "small",
-    srcs = ["parsing_ops_test.py"],
+    srcs = ["override_threadpool_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python/data/experimental/ops:threadpool",
+        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "parallel_interleave_test",
+    size = "medium",
+    srcs = ["parallel_interleave_test.py"],
     srcs_version = "PY2AND3",
     tags = [
         "no_oss",
         "no_pip",
-        "no_windows",
+        "notap",
     ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "parse_example_dataset_test",
+    size = "small",
+    srcs = ["parse_example_dataset_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
@@ -344,53 +437,20 @@ py_test(
 )
 
 cuda_py_test(
-    name = "prefetching_ops_test",
+    name = "prefetch_to_device_test",
     size = "small",
-    srcs = ["prefetching_ops_test.py"],
+    srcs = ["prefetch_to_device_test.py"],
     additional_deps = [
         "//tensorflow/python/data/experimental/ops:prefetching_ops",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python/compat:compat",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "no_windows_gpu",
-    ],
-)
-
-py_test(
-    name = "range_dataset_op_test",
-    size = "small",
-    srcs = ["range_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
-    deps = [
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/experimental/ops:counter",
-        "//tensorflow/python/data/experimental/ops:enumerate_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
+    tags = ["no_windows_gpu"],
 )
 
 py_library(
@@ -421,41 +481,12 @@ py_library(
 )
 
 py_test(
-    name = "reader_dataset_ops_test",
-    size = "medium",
-    srcs = ["reader_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
-    deps = [
-        ":reader_dataset_ops_test_base",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python/data/experimental/ops:readers",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/data/util:nest",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "resample_test",
+    name = "rejection_resample_test",
     size = "medium",
-    srcs = ["resample_test.py"],
+    srcs = ["rejection_resample_test.py"],
     shard_count = 2,
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
         "noasan",
         "optonly",
     ],
@@ -477,15 +508,27 @@ py_test(
 )
 
 py_test(
-    name = "scan_dataset_op_test",
-    size = "small",
-    srcs = ["scan_dataset_op_test.py"],
+    name = "restructured_dataset_test",
+    size = "medium",
+    srcs = ["restructured_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
     ],
+)
+
+py_test(
+    name = "scan_test",
+    size = "small",
+    srcs = ["scan_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -503,14 +546,12 @@ py_test(
 )
 
 py_test(
-    name = "shuffle_dataset_op_test",
+    name = "shuffle_and_repeat_test",
     size = "medium",
-    srcs = ["shuffle_dataset_op_test.py"],
+    srcs = ["shuffle_and_repeat_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
         "no_pip",
-        "no_windows",
         "optonly",
     ],
     deps = [
@@ -525,8 +566,8 @@ py_test(
 )
 
 py_library(
-    name = "sql_dataset_op_test_base",
-    srcs = ["sql_dataset_op_test_base.py"],
+    name = "sql_dataset_test_base",
+    srcs = ["sql_dataset_test_base.py"],
     srcs_version = "PY2AND3",
     visibility = [
         "//tensorflow/python/data/experimental/kernel_tests:__pkg__",
@@ -543,17 +584,13 @@ py_library(
 )
 
 py_test(
-    name = "sql_dataset_op_test",
+    name = "sql_dataset_test",
     size = "small",
-    srcs = ["sql_dataset_op_test.py"],
+    srcs = ["sql_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
-        ":sql_dataset_op_test_base",
+        ":sql_dataset_test_base",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -565,11 +602,7 @@ py_test(
     size = "medium",
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
         ":reader_dataset_ops_test_base",
         ":stats_dataset_test_base",
@@ -595,68 +628,60 @@ py_library(
 )
 
 py_test(
-    name = "threadpool_dataset_ops_test",
+    name = "tf_record_writer_test",
     size = "small",
-    srcs = ["threadpool_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    srcs = ["tf_record_writer_test.py"],
     deps = [
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python/data/experimental/ops:threadpool",
-        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:writers",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python/data/ops:readers",
     ],
 )
 
 py_test(
-    name = "unique_dataset_op_test",
-    size = "small",
-    srcs = ["unique_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    name = "unbatch_test",
+    size = "medium",
+    srcs = ["unbatch_test.py"],
     deps = [
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
 py_test(
-    name = "writer_ops_test",
+    name = "unique_test",
     size = "small",
-    srcs = ["writer_ops_test.py"],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    srcs = ["unique_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:lib",
+        "//tensorflow/python:errors",
         "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:writers",
+        "//tensorflow/python/data/experimental/ops:unique",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:readers",
     ],
 )
diff --git a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
deleted file mode 100644
index 956b4518f6..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
+++ /dev/null
@@ -1,686 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the experimental input pipeline ops."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import time
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python.client import session
-from tensorflow.python.data.experimental.ops import batching
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import script_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.platform import test
-from tensorflow.python.util import compat
-
-
-class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
-
-  def testDenseToSparseBatchDataset(self):
-    components = np.random.randint(12, size=(100,)).astype(np.int32)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([x], x)).apply(
-            batching.dense_to_sparse_batch(4, [12]))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      for start in range(0, len(components), 4):
-        results = sess.run(get_next)
-        self.assertAllEqual([[i, j]
-                             for i, c in enumerate(components[start:start + 4])
-                             for j in range(c)], results.indices)
-        self.assertAllEqual(
-            [c for c in components[start:start + 4] for _ in range(c)],
-            results.values)
-        self.assertAllEqual([min(4,
-                                 len(components) - start), 12],
-                            results.dense_shape)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testDenseToSparseBatchDatasetWithUnknownShape(self):
-    components = np.random.randint(5, size=(40,)).astype(np.int32)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([x, x], x)).apply(
-            batching.dense_to_sparse_batch(
-                4, [5, None])).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      for start in range(0, len(components), 4):
-        results = sess.run(get_next)
-        self.assertAllEqual([[i, j, z]
-                             for i, c in enumerate(components[start:start + 4])
-                             for j in range(c)
-                             for z in range(c)], results.indices)
-        self.assertAllEqual([
-            c
-            for c in components[start:start + 4] for _ in range(c)
-            for _ in range(c)
-        ], results.values)
-        self.assertAllEqual([
-            min(4,
-                len(components) - start), 5,
-            np.max(components[start:start + 4])
-        ], results.dense_shape)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testDenseToSparseBatchDatasetWithInvalidShape(self):
-    input_tensor = array_ops.constant([[1]])
-    with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
-      dataset_ops.Dataset.from_tensors(input_tensor).apply(
-          batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator()
-
-  def testDenseToSparseBatchDatasetShapeErrors(self):
-    input_tensor = array_ops.placeholder(dtypes.int32)
-    iterator = (
-        dataset_ops.Dataset.from_tensors(input_tensor).apply(
-            batching.dense_to_sparse_batch(4, [12]))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # Initialize with an input tensor of incompatible rank.
-      sess.run(init_op, feed_dict={input_tensor: [[1]]})
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "incompatible with the row shape"):
-        sess.run(get_next)
-
-      # Initialize with an input tensor that is larger than `row_shape`.
-      sess.run(init_op, feed_dict={input_tensor: range(13)})
-      with self.assertRaisesRegexp(errors.DataLossError,
-                                   "larger than the row shape"):
-        sess.run(get_next)
-
-  def testUnbatchWithUnknownRankInput(self):
-    placeholder = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
-        batching.unbatch())
-    iterator = dataset.make_initializable_iterator()
-    next_elem = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
-      for i in range(4):
-        self.assertEqual(i, sess.run(next_elem))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_elem)
-
-  def testUnbatchScalarDataset(self):
-    data = tuple([math_ops.range(10) for _ in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    expected_types = (dtypes.int32,) * 3
-    data = data.batch(2)
-    self.assertEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual((i,) * 3, sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchDatasetWithStrings(self):
-    data = tuple([math_ops.range(10) for _ in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z))
-    expected_types = (dtypes.int32, dtypes.string, dtypes.int32)
-    data = data.batch(2)
-    self.assertEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchDatasetWithSparseTensor(self):
-    st = sparse_tensor.SparseTensorValue(
-        indices=[[i, i] for i in range(10)],
-        values=list(range(10)),
-        dense_shape=[10, 10])
-    data = dataset_ops.Dataset.from_tensors(st)
-    data = data.apply(batching.unbatch())
-    data = data.batch(5)
-    data = data.apply(batching.unbatch())
-    iterator = data.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        st_row = sess.run(next_element)
-        self.assertEqual([i], st_row.indices)
-        self.assertEqual([i], st_row.values)
-        self.assertEqual([10], st_row.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testUnbatchDatasetWithDenseAndSparseTensor(self):
-    st = sparse_tensor.SparseTensorValue(
-        indices=[[i, i] for i in range(10)],
-        values=list(range(10)),
-        dense_shape=[10, 10])
-    data = dataset_ops.Dataset.from_tensors((list(range(10)), st))
-    data = data.apply(batching.unbatch())
-    data = data.batch(5)
-    data = data.apply(batching.unbatch())
-    iterator = data.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        dense_elem, st_row = sess.run(next_element)
-        self.assertEqual(i, dense_elem)
-        self.assertEqual([i], st_row.indices)
-        self.assertEqual([i], st_row.values)
-        self.assertEqual([10], st_row.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testUnbatchSingleElementTupleDataset(self):
-    data = tuple([(math_ops.range(10),) for _ in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    expected_types = ((dtypes.int32,),) * 3
-    data = data.batch(2)
-    self.assertEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(((i,),) * 3, sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchMultiElementTupleDataset(self):
-    data = tuple([(math_ops.range(10 * i, 10 * i + 10),
-                   array_ops.fill([10], "hi")) for i in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    expected_types = ((dtypes.int32, dtypes.string),) * 3
-    data = data.batch(2)
-    self.assertAllEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertAllEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
-                         sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchEmpty(self):
-    data = dataset_ops.Dataset.from_tensors(
-        (constant_op.constant([]), constant_op.constant([], shape=[0, 4]),
-         constant_op.constant([], shape=[0, 4, 0])))
-    data = data.apply(batching.unbatch())
-    iterator = data.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testUnbatchStaticShapeMismatch(self):
-    data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
-                                             np.arange(9)))
-    with self.assertRaises(ValueError):
-      data.apply(batching.unbatch())
-
-  def testUnbatchDynamicShapeMismatch(self):
-    ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
-    ph2 = array_ops.placeholder(dtypes.int32, shape=None)
-    data = dataset_ops.Dataset.from_tensors((ph1, ph2))
-    data = data.apply(batching.unbatch())
-    iterator = data.make_initializable_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # Mismatch in the 0th dimension.
-      sess.run(
-          iterator.initializer,
-          feed_dict={
-              ph1: np.arange(7).astype(np.int32),
-              ph2: np.arange(8).astype(np.int32)
-          })
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(next_element)
-
-      # No 0th dimension (i.e. scalar value) for one component.
-      sess.run(
-          iterator.initializer,
-          feed_dict={
-              ph1: np.arange(7).astype(np.int32),
-              ph2: 7
-          })
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(next_element)
-
-  @parameterized.named_parameters(
-      ("Default", None, None),
-      ("SequentialCalls", 1, None),
-      ("ParallelCalls", 2, None),
-      ("ParallelBatches", None, 10),
-  )
-  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches):
-    """Test a dataset that maps a TF function across its input elements."""
-    # The pipeline is TensorSliceDataset ->
-    # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size).
-    components = (np.arange(7),
-                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
-                  np.array(37.0) * np.arange(7))
-
-    count = array_ops.placeholder(dtypes.int64, shape=[])
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-
-    def _map_fn(x, y, z):
-      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
-            batching.map_and_batch(
-                map_func=_map_fn,
-                batch_size=batch_size,
-                num_parallel_calls=num_parallel_calls,
-                num_parallel_batches=num_parallel_batches))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    self.assertEqual([[None] + list(c.shape[1:]) for c in components],
-                     [t.shape.as_list() for t in get_next])
-
-    with self.cached_session() as sess:
-      # Batch of a finite input, where the batch_size divides the
-      # total number of elements.
-      sess.run(init_op, feed_dict={count: 28, batch_size: 14})
-      num_batches = (28 * 7) // 14
-      for i in range(num_batches):
-        result = sess.run(get_next)
-        for component, result_component in zip(components, result):
-          for j in range(14):
-            self.assertAllEqual(component[(i * 14 + j) % 7]**2,
-                                result_component[j])
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Batch of a finite input, where the batch_size does not
-      # divide the total number of elements.
-      sess.run(init_op, feed_dict={count: 14, batch_size: 8})
-
-      # We expect (num_batches - 1) full-sized batches.
-      num_batches = int(math.ceil((14 * 7) / 8))
-      for i in range(num_batches - 1):
-        result = sess.run(get_next)
-        for component, result_component in zip(components, result):
-          for j in range(8):
-            self.assertAllEqual(component[(i * 8 + j) % 7]**2,
-                                result_component[j])
-      result = sess.run(get_next)
-      for component, result_component in zip(components, result):
-        for j in range((14 * 7) % 8):
-          self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
-                              result_component[j])
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Batch of an empty input should fail straight away.
-      sess.run(init_op, feed_dict={count: 0, batch_size: 8})
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Empty batch should be an initialization time error.
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(init_op, feed_dict={count: 14, batch_size: 0})
-
-  @parameterized.named_parameters(
-      ("Even", False),
-      ("Uneven", True),
-  )
-  def testMapAndBatchPartialBatch(self, drop_remainder):
-    iterator = (
-        dataset_ops.Dataset.range(10).apply(
-            batching.map_and_batch(
-                lambda x: array_ops.reshape(x * x, [1]),
-                batch_size=4,
-                drop_remainder=drop_remainder)).make_one_shot_iterator())
-    if drop_remainder:
-      self.assertEqual([4, 1], iterator.output_shapes.as_list())
-    else:
-      self.assertEqual([None, 1], iterator.output_shapes.as_list())
-    next_element = iterator.get_next()
-    with self.cached_session() as sess:
-      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
-      if not drop_remainder:
-        self.assertAllEqual([[64], [81]], sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testMapAndBatchYieldsPartialBatch(self):
-    iterator = (dataset_ops.Dataset.range(10)
-                .apply(batching.map_and_batch(
-                    lambda x: array_ops.reshape(x * x, [1]), 4))
-                .make_one_shot_iterator())
-    self.assertEqual([None, 1], iterator.output_shapes.as_list())
-    next_element = iterator.get_next()
-    with self.cached_session() as sess:
-      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
-      self.assertAllEqual([[64], [81]], sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testMapAndBatchParallelGetNext(self):
-    iterator = (dataset_ops.Dataset.range(50000)
-                .apply(batching.map_and_batch(lambda x: x, batch_size=100))
-                .make_one_shot_iterator())
-    elements = []
-    for _ in range(100):
-      elements.append(iterator.get_next())
-    with self.cached_session() as sess:
-      for i in range(5):
-        got = sess.run(elements)
-        got.sort(key=lambda x: x[0])
-        expected = []
-        for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
-        self.assertAllEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elements)
-
-  def testMapAndBatchParallelGetNextDropRemainder(self):
-    iterator = (
-        dataset_ops.Dataset.range(49999).apply(
-            batching.map_and_batch(
-                lambda x: x, batch_size=100, drop_remainder=True))
-        .make_one_shot_iterator())
-    elements = []
-    for _ in range(100):
-      elements.append(iterator.get_next())
-    with self.cached_session() as sess:
-      for i in range(4):
-        got = sess.run(elements)
-        got.sort(key=lambda x: x[0])
-        expected = []
-        for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
-        self.assertAllEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elements)
-
-  def testMapAndBatchSparse(self):
-
-    def _sparse(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0]], values=(i * [1]), dense_shape=[1])
-
-    iterator = dataset_ops.Dataset.range(10).apply(
-        batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(2):
-        actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensorValue(
-            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
-            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
-            dense_shape=[5, 1])
-        self.assertTrue(sparse_tensor.is_sparse(actual))
-        self.assertSparseValuesEqual(actual, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testMapAndBatchFails(self):
-    """Test a dataset that maps a TF function across its input elements."""
-    dataset = dataset_ops.Dataset.from_tensors(
-        array_ops.check_numerics(
-            constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    with self.cached_session() as sess:
-      with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
-        sess.run(init_op, feed_dict={batch_size: 14})
-
-  def testMapAndBatchShapeMismatch(self):
-    """Test a dataset that maps a TF function across its input elements."""
-
-    def generator():
-      yield [1]
-      yield [2]
-      yield [3]
-      yield [[4, 5, 6]]
-
-    dataset = dataset_ops.Dataset.from_generator(
-        generator, output_types=dtypes.int32)
-    batch_size = 4
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "number of elements does not match"):
-        sess.run(get_next)
-
-  def testMapAndBatchImplicitDispose(self):
-    # Tests whether a map and batch dataset will be cleaned up correctly when
-    # the pipeline does not run it until exhaustion.
-    # The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
-    # MapAndBatchDataset(f=square_3, batch_size=100).
-    components = (np.arange(1000),
-                  np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis],
-                  np.array(37.0) * np.arange(1000))
-
-    def _map_fn(x, y, z):
-      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
-        1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
-    dataset = dataset.prefetch(5)
-    iterator = dataset.make_one_shot_iterator()
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(3):
-        sess.run(get_next)
-
-  @parameterized.named_parameters(
-      ("1", 0),
-      ("2", 5),
-      ("3", 10),
-      ("4", 90),
-      ("5", 95),
-      ("6", 99),
-  )
-  def testMapAndBatchOutOfRangeError(self, threshold):
-
-    def raising_py_fn(i):
-      if i >= threshold:
-        raise StopIteration()
-      else:
-        return i
-
-    iterator = (
-        dataset_ops.Dataset.range(100).apply(
-            batching.map_and_batch(
-                lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
-                batch_size=10)).make_one_shot_iterator())
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(threshold // 10):
-        self.assertAllEqual([i * 10 + j for j in range(10)], sess.run(get_next))
-      if threshold % 10 != 0:
-        self.assertAllEqual(
-            [threshold // 10 * 10 + j for j in range(threshold % 10)],
-            sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  @parameterized.named_parameters(
-      ("1", False, dtypes.bool),
-      ("2", -42, dtypes.int8),
-      ("3", -42, dtypes.int16),
-      ("4", -42, dtypes.int32),
-      ("5", -42, dtypes.int64),
-      ("6", 42, dtypes.uint8),
-      ("7", 42, dtypes.uint16),
-      ("8", 42.0, dtypes.float16),
-      ("9", 42.0, dtypes.float32),
-      ("10", 42.0, dtypes.float64),
-      ("11", b"hello", dtypes.string),
-  )
-  def testMapAndBatchTypes(self, element, dtype):
-    def gen():
-      yield element
-
-    dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply(
-        batching.map_and_batch(lambda x: x, batch_size=10))
-
-    get_next = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(10):
-        self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
-
-
-class UnbatchDatasetBenchmark(test.Benchmark):
-
-  def benchmarkNativeUnbatch(self):
-    batch_sizes = [1, 2, 5, 10, 20, 50]
-    elems_per_trial = 10000
-    with ops.Graph().as_default():
-      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
-      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
-      dataset = dataset.batch(batch_size_placeholder)
-      dataset = dataset.apply(batching.unbatch())
-      dataset = dataset.skip(elems_per_trial)
-      iterator = dataset.make_initializable_iterator()
-      next_element = iterator.get_next()
-
-      with session.Session() as sess:
-        for batch_size in batch_sizes:
-          deltas = []
-          for _ in range(5):
-            sess.run(
-                iterator.initializer,
-                feed_dict={batch_size_placeholder: batch_size})
-            start = time.time()
-            sess.run(next_element.op)
-            end = time.time()
-            deltas.append((end - start) / elems_per_trial)
-
-          median_wall_time = np.median(deltas)
-          print("Unbatch (native) batch size: %d Median wall time per element:"
-                " %f microseconds" % (batch_size, median_wall_time * 1e6))
-          self.report_benchmark(
-              iters=10000,
-              wall_time=median_wall_time,
-              name="benchmark_unbatch_dataset_native_batch_size_%d" %
-              batch_size)
-
-  # Include a benchmark of the previous `unbatch()` implementation that uses
-  # a composition of more primitive ops. Eventually we'd hope to generate code
-  # that is as good in both cases.
-  def benchmarkOldUnbatchImplementation(self):
-    batch_sizes = [1, 2, 5, 10, 20, 50]
-    elems_per_trial = 10000
-    with ops.Graph().as_default():
-      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
-      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
-      dataset = dataset.batch(batch_size_placeholder)
-      dataset = dataset.flat_map(dataset_ops.Dataset.from_tensor_slices)
-      dataset = dataset.skip(elems_per_trial)
-      iterator = dataset.make_initializable_iterator()
-      next_element = iterator.get_next()
-
-      with session.Session() as sess:
-        for batch_size in batch_sizes:
-          deltas = []
-          for _ in range(5):
-            sess.run(
-                iterator.initializer,
-                feed_dict={batch_size_placeholder: batch_size})
-            start = time.time()
-            sess.run(next_element.op)
-            end = time.time()
-            deltas.append((end - start) / elems_per_trial)
-
-          median_wall_time = np.median(deltas)
-          print("Unbatch (unfused) batch size: %d Median wall time per element:"
-                " %f microseconds" % (batch_size, median_wall_time * 1e6))
-          self.report_benchmark(
-              iters=10000,
-              wall_time=median_wall_time,
-              name="benchmark_unbatch_dataset_unfused_batch_size_%d" %
-              batch_size)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
new file mode 100644
index 0000000000..3903ec49b9
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
@@ -0,0 +1,322 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.bucket_by_sequence_length()."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+def _element_length_fn(x, y=None):
+  del y
+  return array_ops.shape(x)[0]
+
+
+def _to_sparse_tensor(record):
+  return sparse_tensor.SparseTensor(**record)
+
+
+def _format_record(array, sparse):
+  if sparse:
+    return {
+        "values": array,
+        "indices": [[i] for i in range(len(array))],
+        "dense_shape": (len(array),)
+    }
+  return array
+
+
+def _get_record_type(sparse):
+  if sparse:
+    return {
+        "values": dtypes.int64,
+        "indices": dtypes.int64,
+        "dense_shape": dtypes.int64
+    }
+  return dtypes.int32
+
+
+def _get_record_shape(sparse):
+  if sparse:
+    return {
+        "values": tensor_shape.TensorShape([None,]),
+        "indices": tensor_shape.TensorShape([None, 1]),
+        "dense_shape": tensor_shape.TensorShape([1,])
+    }
+  return tensor_shape.TensorShape([None])
+
+
+class BucketBySequenceLengthTest(test_base.DatasetTestBase):
+
+  def testBucket(self):
+
+    boundaries = [10, 20, 30]
+    batch_sizes = [10, 8, 4, 2]
+    lengths = [8, 13, 25, 35]
+
+    def build_dataset(sparse):
+      def _generator():
+        # Produce 1 batch for each bucket
+        elements = []
+        for batch_size, length in zip(batch_sizes, lengths):
+          record_len = length - 1
+          for _ in range(batch_size):
+            elements.append([1] * record_len)
+            record_len = length
+        random.shuffle(elements)
+        for el in elements:
+          yield (_format_record(el, sparse),)
+      dataset = dataset_ops.Dataset.from_generator(
+          _generator,
+          (_get_record_type(sparse),),
+          (_get_record_shape(sparse),))
+      if sparse:
+        dataset = dataset.map(lambda x: (_to_sparse_tensor(x),))
+      return dataset
+
+    def _test_bucket_by_padding(no_padding):
+      dataset = build_dataset(sparse=no_padding)
+      dataset = dataset.apply(
+          grouping.bucket_by_sequence_length(
+              _element_length_fn,
+              boundaries,
+              batch_sizes,
+              no_padding=no_padding))
+      batch, = dataset.make_one_shot_iterator().get_next()
+
+      with self.cached_session() as sess:
+        batches = []
+        for _ in range(4):
+          batches.append(sess.run(batch))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(batch)
+      batch_sizes_val = []
+      lengths_val = []
+      for batch in batches:
+        shape = batch.dense_shape if no_padding else batch.shape
+        batch_size = shape[0]
+        length = shape[1]
+        batch_sizes_val.append(batch_size)
+        lengths_val.append(length)
+        sum_check = batch.values.sum() if no_padding else batch.sum()
+        self.assertEqual(sum_check, batch_size * length - 1)
+      self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
+      self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
+      self.assertEqual(sorted(lengths), sorted(lengths_val))
+
+    for no_padding in (True, False):
+      _test_bucket_by_padding(no_padding)
+
+  def testPadToBoundary(self):
+
+    boundaries = [10, 20, 30]
+    batch_sizes = [10, 8, 4, 2]
+    lengths = [8, 13, 25]
+
+    def element_gen():
+      # Produce 1 batch for each bucket
+      elements = []
+      for batch_size, length in zip(batch_sizes[:-1], lengths):
+        for _ in range(batch_size):
+          elements.append([1] * length)
+      random.shuffle(elements)
+      for el in elements:
+        yield (el,)
+      for _ in range(batch_sizes[-1]):
+        el = [1] * (boundaries[-1] + 5)
+        yield (el,)
+
+    element_len = lambda el: array_ops.shape(el)[0]
+    dataset = dataset_ops.Dataset.from_generator(
+        element_gen, (dtypes.int64,), ([None],)).apply(
+            grouping.bucket_by_sequence_length(
+                element_len, boundaries, batch_sizes,
+                pad_to_bucket_boundary=True))
+    batch, = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      batches = []
+      for _ in range(3):
+        batches.append(sess.run(batch))
+      with self.assertRaisesOpError("bucket_boundaries"):
+        sess.run(batch)
+    batch_sizes_val = []
+    lengths_val = []
+    for batch in batches:
+      batch_size = batch.shape[0]
+      length = batch.shape[1]
+      batch_sizes_val.append(batch_size)
+      lengths_val.append(length)
+    batch_sizes = batch_sizes[:-1]
+    self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
+    self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
+    self.assertEqual([boundary - 1 for boundary in sorted(boundaries)],
+                     sorted(lengths_val))
+
+  def testPadToBoundaryNoExtraneousPadding(self):
+
+    boundaries = [3, 7, 11]
+    batch_sizes = [2, 2, 2, 2]
+    lengths = range(1, 11)
+
+    def element_gen():
+      for length in lengths:
+        yield ([1] * length,)
+
+    element_len = lambda element: array_ops.shape(element)[0]
+    dataset = dataset_ops.Dataset.from_generator(
+        element_gen, (dtypes.int64,), ([None],)).apply(
+            grouping.bucket_by_sequence_length(
+                element_len, boundaries, batch_sizes,
+                pad_to_bucket_boundary=True))
+    batch, = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      batches = []
+      for _ in range(5):
+        batches.append(sess.run(batch))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(batch)
+
+    self.assertAllEqual(batches[0], [[1, 0],
+                                     [1, 1]])
+    self.assertAllEqual(batches[1], [[1, 1, 1, 0, 0, 0],
+                                     [1, 1, 1, 1, 0, 0]])
+    self.assertAllEqual(batches[2], [[1, 1, 1, 1, 1, 0],
+                                     [1, 1, 1, 1, 1, 1]])
+    self.assertAllEqual(batches[3], [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+                                     [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])
+    self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                                     [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
+
+  def testTupleElements(self):
+
+    def build_dataset(sparse):
+      def _generator():
+        text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
+        label = [1, 2, 1, 2]
+        for x, y in zip(text, label):
+          yield (_format_record(x, sparse), y)
+      dataset = dataset_ops.Dataset.from_generator(
+          generator=_generator,
+          output_types=(_get_record_type(sparse), dtypes.int32),
+          output_shapes=(_get_record_shape(sparse),
+                         tensor_shape.TensorShape([])))
+      if sparse:
+        dataset = dataset.map(lambda x, y: (_to_sparse_tensor(x), y))
+      return dataset
+
+    def _test_tuple_elements_by_padding(no_padding):
+      dataset = build_dataset(sparse=no_padding)
+      dataset = dataset.apply(grouping.bucket_by_sequence_length(
+          element_length_func=_element_length_fn,
+          bucket_batch_sizes=[2, 2, 2],
+          bucket_boundaries=[0, 8],
+          no_padding=no_padding))
+      shapes = dataset.output_shapes
+      self.assertEqual([None, None], shapes[0].as_list())
+      self.assertEqual([None], shapes[1].as_list())
+
+    for no_padding in (True, False):
+      _test_tuple_elements_by_padding(no_padding)
+
+  def testBucketSparse(self):
+    """Tests bucketing of sparse tensors (case where `no_padding` == True).
+
+    Test runs on following dataset:
+      [
+        [0],
+        [0, 1],
+        [0, 1, 2]
+        ...
+        [0, ..., max_len - 1]
+      ]
+    Sequences are bucketed by length and batched with
+      `batch_size` < `bucket_size`.
+    """
+
+    min_len = 0
+    max_len = 100
+    batch_size = 7
+    bucket_size = 10
+
+    def _build_dataset():
+      input_data = [range(i+1) for i in range(min_len, max_len)]
+      def generator_fn():
+        for record in input_data:
+          yield _format_record(record, sparse=True)
+      dataset = dataset_ops.Dataset.from_generator(
+          generator=generator_fn,
+          output_types=_get_record_type(sparse=True))
+      dataset = dataset.map(_to_sparse_tensor)
+      return dataset
+
+    def _compute_expected_batches():
+      """Computes expected batch outputs and stores in a set."""
+      all_expected_sparse_tensors = set()
+      for bucket_start_len in range(min_len, max_len, bucket_size):
+        for batch_offset in range(0, bucket_size, batch_size):
+          batch_start_len = bucket_start_len + batch_offset
+          batch_end_len = min(batch_start_len + batch_size,
+                              bucket_start_len + bucket_size)
+          expected_indices = []
+          expected_values = []
+          for length in range(batch_start_len, batch_end_len):
+            for val in range(length + 1):
+              expected_indices.append((length - batch_start_len, val))
+              expected_values.append(val)
+          expected_sprs_tensor = (tuple(expected_indices),
+                                  tuple(expected_values))
+          all_expected_sparse_tensors.add(expected_sprs_tensor)
+      return all_expected_sparse_tensors
+
+    def _compute_batches(dataset):
+      """Computes actual batch outputs of dataset and stores in a set."""
+      batch = dataset.make_one_shot_iterator().get_next()
+      all_sparse_tensors = set()
+      with self.cached_session() as sess:
+        with self.assertRaises(errors.OutOfRangeError):
+          while True:
+            output = sess.run(batch)
+            sprs_tensor = (tuple([tuple(idx) for idx in output.indices]),
+                           tuple(output.values))
+            all_sparse_tensors.add(sprs_tensor)
+      return all_sparse_tensors
+
+    dataset = _build_dataset()
+    boundaries = range(min_len + bucket_size + 1, max_len, bucket_size)
+    dataset = dataset.apply(grouping.bucket_by_sequence_length(
+        _element_length_fn,
+        boundaries,
+        [batch_size] * (len(boundaries) + 1),
+        no_padding=True))
+    batches = _compute_batches(dataset)
+    expected_batches = _compute_expected_batches()
+    self.assertEqual(batches, expected_batches)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py b/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
deleted file mode 100644
index 153a03989b..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
+++ /dev/null
@@ -1,824 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the experimental input pipeline ops."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import random
-
-import numpy as np
-
-from tensorflow.python.data.experimental.ops import grouping
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.platform import test
-
-
-class GroupByReducerTest(test_base.DatasetTestBase):
-
-  def checkResults(self, dataset, shapes, values):
-    self.assertEqual(shapes, dataset.output_shapes)
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      for expected in values:
-        got = sess.run(get_next)
-        self.assertEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testSum(self):
-    reducer = grouping.Reducer(
-        init_func=lambda _: np.int64(0),
-        reduce_func=lambda x, y: x + y,
-        finalize_func=lambda x: x)
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.range(2 * i).apply(
-          grouping.group_by_reducer(lambda x: x % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
-
-  def testAverage(self):
-
-    def reduce_fn(x, y):
-      return (x[0] * x[1] + math_ops.cast(y, dtypes.float32)) / (
-          x[1] + 1), x[1] + 1
-
-    reducer = grouping.Reducer(
-        init_func=lambda _: (0.0, 0.0),
-        reduce_func=reduce_fn,
-        finalize_func=lambda x, _: x)
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.range(2 * i).apply(
-          grouping.group_by_reducer(
-              lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[i - 1, i])
-
-  def testConcat(self):
-    components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray)
-    reducer = grouping.Reducer(
-        init_func=lambda x: "",
-        reduce_func=lambda x, y: x + y[0],
-        finalize_func=lambda x: x)
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.zip(
-          (dataset_ops.Dataset.from_tensor_slices(components),
-           dataset_ops.Dataset.range(2 * i))).apply(
-               grouping.group_by_reducer(lambda x, y: y % 2, reducer))
-      self.checkResults(
-          dataset,
-          shapes=tensor_shape.scalar(),
-          values=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]])
-
-  def testSparseSum(self):
-    def _sparse(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=np.array([[0, 0]]),
-          values=(i * np.array([1], dtype=np.int64)),
-          dense_shape=np.array([1, 1]))
-
-    reducer = grouping.Reducer(
-        init_func=lambda _: _sparse(np.int64(0)),
-        reduce_func=lambda x, y: _sparse(x.values[0] + y.values[0]),
-        finalize_func=lambda x: x.values[0])
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply(
-          grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
-
-  def testChangingStateShape(self):
-
-    def reduce_fn(x, _):
-      # Statically known rank, but dynamic length.
-      larger_dim = array_ops.concat([x[0], x[0]], 0)
-      # Statically unknown rank.
-      larger_rank = array_ops.expand_dims(x[1], 0)
-      return larger_dim, larger_rank
-
-    reducer = grouping.Reducer(
-        init_func=lambda x: ([0], 1),
-        reduce_func=reduce_fn,
-        finalize_func=lambda x, y: (x, y))
-
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.from_tensors(np.int64(0)).repeat(i).apply(
-          grouping.group_by_reducer(lambda x: x, reducer))
-      self.assertEqual([None], dataset.output_shapes[0].as_list())
-      self.assertIs(None, dataset.output_shapes[1].ndims)
-      iterator = dataset.make_one_shot_iterator()
-      get_next = iterator.get_next()
-      with self.cached_session() as sess:
-        x, y = sess.run(get_next)
-        self.assertAllEqual([0] * (2**i), x)
-        self.assertAllEqual(np.array(1, ndmin=i), y)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testTypeMismatch(self):
-    reducer = grouping.Reducer(
-        init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32),
-        reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64),
-        finalize_func=lambda x: x)
-
-    dataset = dataset_ops.Dataset.range(10)
-    with self.assertRaisesRegexp(
-        TypeError,
-        "The element types for the new state must match the initial state."):
-      dataset.apply(
-          grouping.group_by_reducer(lambda _: np.int64(0), reducer))
-
-  # TODO(b/78665031): Remove once non-scalar keys are supported.
-  def testInvalidKeyShape(self):
-    reducer = grouping.Reducer(
-        init_func=lambda x: np.int64(0),
-        reduce_func=lambda x, y: x + y,
-        finalize_func=lambda x: x)
-
-    dataset = dataset_ops.Dataset.range(10)
-    with self.assertRaisesRegexp(
-        ValueError, "`key_func` must return a single tf.int64 tensor."):
-      dataset.apply(
-          grouping.group_by_reducer(lambda _: np.int64((0, 0)), reducer))
-
-  # TODO(b/78665031): Remove once non-int64 keys are supported.
-  def testInvalidKeyType(self):
-    reducer = grouping.Reducer(
-        init_func=lambda x: np.int64(0),
-        reduce_func=lambda x, y: x + y,
-        finalize_func=lambda x: x)
-
-    dataset = dataset_ops.Dataset.range(10)
-    with self.assertRaisesRegexp(
-        ValueError, "`key_func` must return a single tf.int64 tensor."):
-      dataset.apply(
-          grouping.group_by_reducer(lambda _: "wrong", reducer))
-
-  def testTuple(self):
-    def init_fn(_):
-      return np.array([], dtype=np.int64), np.int64(0)
-
-    def reduce_fn(state, value):
-      s1, s2 = state
-      v1, v2 = value
-      return array_ops.concat([s1, [v1]], 0), s2 + v2
-
-    def finalize_fn(s1, s2):
-      return s1, s2
-
-    reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
-    dataset = dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply(
-            grouping.group_by_reducer(lambda x, y: np.int64(0), reducer))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      x, y = sess.run(get_next)
-      self.assertAllEqual(x, np.asarray([x for x in range(10)]))
-      self.assertEqual(y, 45)
-
-
-class GroupByWindowTest(test_base.DatasetTestBase):
-
-  def testSimple(self):
-    components = np.random.randint(100, size=(200,)).astype(np.int64)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x)
-        .apply(
-            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
-                                     4)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      counts = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          result = sess.run(get_next)
-          self.assertTrue(
-              all(x % 2 == 0
-                  for x in result) or all(x % 2 == 1)
-              for x in result)
-          counts.append(result.shape[0])
-
-      self.assertEqual(len(components), sum(counts))
-      num_full_batches = len([c for c in counts if c == 4])
-      self.assertGreaterEqual(num_full_batches, 24)
-      self.assertTrue(all(c == 4 for c in counts[:num_full_batches]))
-
-  def testImmediateOutput(self):
-    components = np.array(
-        [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply(
-            grouping.group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4),
-                                     4)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      # The input is infinite, so this test demonstrates that:
-      # 1. We produce output without having to consume the entire input,
-      # 2. Different buckets can produce output at different rates, and
-      # 3. For deterministic input, the output is deterministic.
-      for _ in range(3):
-        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-        self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
-        self.assertAllEqual([2, 2, 2, 2], sess.run(get_next))
-        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-
-  def testSmallGroups(self):
-    components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
-                                     4)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-      self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
-      # The small outputs at the end are deterministically produced in key
-      # order.
-      self.assertAllEqual([0, 0, 0], sess.run(get_next))
-      self.assertAllEqual([1], sess.run(get_next))
-
-  def testEmpty(self):
-    iterator = (
-        dataset_ops.Dataset.range(4).apply(
-            grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "Window size must be greater than zero, but got 0."):
-        print(sess.run(get_next))
-
-  def testReduceFuncError(self):
-    components = np.random.randint(100, size=(200,)).astype(np.int64)
-
-    def reduce_func(_, xs):
-      # Introduce an incorrect padded shape that cannot (currently) be
-      # detected at graph construction time.
-      return xs.padded_batch(
-          4,
-          padded_shapes=(tensor_shape.TensorShape([]),
-                         constant_op.constant([5], dtype=dtypes.int64) * -1))
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply(
-            grouping.group_by_window(lambda x, _: x % 2, reduce_func,
-                                     32)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-  def testConsumeWindowDatasetMoreThanOnce(self):
-    components = np.random.randint(50, size=(200,)).astype(np.int64)
-
-    def reduce_func(key, window):
-      # Apply two different kinds of padding to the input: tight
-      # padding, and quantized (to a multiple of 10) padding.
-      return dataset_ops.Dataset.zip((
-          window.padded_batch(
-              4, padded_shapes=tensor_shape.TensorShape([None])),
-          window.padded_batch(
-              4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])),
-      ))
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x))
-        .apply(grouping.group_by_window(
-            lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64),
-            reduce_func, 4))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      counts = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          tight_result, multiple_of_10_result = sess.run(get_next)
-          self.assertEqual(0, multiple_of_10_result.shape[1] % 10)
-          self.assertAllEqual(tight_result,
-                              multiple_of_10_result[:, :tight_result.shape[1]])
-          counts.append(tight_result.shape[0])
-      self.assertEqual(len(components), sum(counts))
-
-
-# NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
-# Currently, they use a constant batch size, though should be made to use a
-# different batch size per key.
-class BucketTest(test_base.DatasetTestBase):
-
-  def _dynamicPad(self, bucket, window, window_size):
-    # TODO(mrry): To match `tf.contrib.training.bucket()`, implement a
-    # generic form of padded_batch that pads every component
-    # dynamically and does not rely on static shape information about
-    # the arguments.
-    return dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.from_tensors(bucket),
-         window.padded_batch(
-             32, (tensor_shape.TensorShape([]), tensor_shape.TensorShape(
-                 [None]), tensor_shape.TensorShape([3])))))
-
-  def testSingleBucket(self):
-
-    def _map_fn(v):
-      return (v, array_ops.fill([v], v),
-              array_ops.fill([3], string_ops.as_string(v)))
-
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn))
-
-    bucketed_dataset = input_dataset.apply(
-        grouping.group_by_window(
-            lambda x, y, z: 0,
-            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
-
-    iterator = bucketed_dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      which_bucket, bucketed_values = sess.run(get_next)
-
-      self.assertEqual(0, which_bucket)
-
-      expected_scalar_int = np.arange(32, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
-      for i in range(32):
-        expected_unk_int64[i, :i] = i
-      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values[2])
-
-  def testEvenOddBuckets(self):
-
-    def _map_fn(v):
-      return (v, array_ops.fill([v], v),
-              array_ops.fill([3], string_ops.as_string(v)))
-
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn))
-
-    bucketed_dataset = input_dataset.apply(
-        grouping.group_by_window(
-            lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
-            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
-
-    iterator = bucketed_dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      # Get two minibatches (one containing even values, one containing odds)
-      which_bucket_even, bucketed_values_even = sess.run(get_next)
-      which_bucket_odd, bucketed_values_odd = sess.run(get_next)
-
-      # Count number of bucket_tensors.
-      self.assertEqual(3, len(bucketed_values_even))
-      self.assertEqual(3, len(bucketed_values_odd))
-
-      # Ensure bucket 0 was used for all minibatch entries.
-      self.assertAllEqual(0, which_bucket_even)
-      self.assertAllEqual(1, which_bucket_odd)
-
-      # Test the first bucket outputted, the events starting at 0
-      expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
-      for i in range(0, 32):
-        expected_unk_int64[i, :2 * i] = 2 * i
-        expected_vec3_str = np.vstack(
-            3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values_even[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values_even[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values_even[2])
-
-      # Test the second bucket outputted, the odds starting at 1
-      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
-      for i in range(0, 32):
-        expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
-        expected_vec3_str = np.vstack(
-            3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
-
-  def testEvenOddBucketsFilterOutAllOdd(self):
-
-    def _map_fn(v):
-      return {
-          "x": v,
-          "y": array_ops.fill([v], v),
-          "z": array_ops.fill([3], string_ops.as_string(v))
-      }
-
-    def _dynamic_pad_fn(bucket, window, _):
-      return dataset_ops.Dataset.zip(
-          (dataset_ops.Dataset.from_tensors(bucket),
-           window.padded_batch(
-               32, {
-                   "x": tensor_shape.TensorShape([]),
-                   "y": tensor_shape.TensorShape([None]),
-                   "z": tensor_shape.TensorShape([3])
-               })))
-
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn)
-        .filter(lambda d: math_ops.equal(d["x"] % 2, 0)))
-
-    bucketed_dataset = input_dataset.apply(
-        grouping.group_by_window(
-            lambda d: math_ops.cast(d["x"] % 2, dtypes.int64),
-            lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32))
-
-    iterator = bucketed_dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
-      which_bucket0, bucketed_values_even0 = sess.run(get_next)
-      which_bucket1, bucketed_values_even1 = sess.run(get_next)
-
-      # Ensure that bucket 1 was completely filtered out
-      self.assertAllEqual(0, which_bucket0)
-      self.assertAllEqual(0, which_bucket1)
-      self.assertAllEqual(
-          np.arange(0, 64, 2, dtype=np.int64), bucketed_values_even0["x"])
-      self.assertAllEqual(
-          np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
-
-  def testDynamicWindowSize(self):
-    components = np.arange(100).astype(np.int64)
-
-    # Key fn: even/odd
-    # Reduce fn: batches of 5
-    # Window size fn: even=5, odd=10
-
-    def window_size_func(key):
-      window_sizes = constant_op.constant([5, 10], dtype=dtypes.int64)
-      return window_sizes[key]
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
-        grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(20),
-                                 None, window_size_func))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.OutOfRangeError):
-        batches = 0
-        while True:
-          result = sess.run(get_next)
-          is_even = all(x % 2 == 0 for x in result)
-          is_odd = all(x % 2 == 1 for x in result)
-          self.assertTrue(is_even or is_odd)
-          expected_batch_size = 5 if is_even else 10
-          self.assertEqual(expected_batch_size, result.shape[0])
-          batches += 1
-
-      self.assertEqual(batches, 15)
-
-
-def _element_length_fn(x, y=None):
-  del y
-  return array_ops.shape(x)[0]
-
-
-def _to_sparse_tensor(record):
-  return sparse_tensor.SparseTensor(**record)
-
-
-def _format_record(array, sparse):
-  if sparse:
-    return {
-        "values": array,
-        "indices": [[i] for i in range(len(array))],
-        "dense_shape": (len(array),)
-    }
-  return array
-
-
-def _get_record_type(sparse):
-  if sparse:
-    return {
-        "values": dtypes.int64,
-        "indices": dtypes.int64,
-        "dense_shape": dtypes.int64
-    }
-  return dtypes.int32
-
-
-def _get_record_shape(sparse):
-  if sparse:
-    return {
-        "values": tensor_shape.TensorShape([None,]),
-        "indices": tensor_shape.TensorShape([None, 1]),
-        "dense_shape": tensor_shape.TensorShape([1,])
-    }
-  return tensor_shape.TensorShape([None])
-
-
-class BucketBySequenceLength(test_base.DatasetTestBase):
-
-  def testBucket(self):
-
-    boundaries = [10, 20, 30]
-    batch_sizes = [10, 8, 4, 2]
-    lengths = [8, 13, 25, 35]
-
-    def build_dataset(sparse):
-      def _generator():
-        # Produce 1 batch for each bucket
-        elements = []
-        for batch_size, length in zip(batch_sizes, lengths):
-          record_len = length - 1
-          for _ in range(batch_size):
-            elements.append([1] * record_len)
-            record_len = length
-        random.shuffle(elements)
-        for el in elements:
-          yield (_format_record(el, sparse),)
-      dataset = dataset_ops.Dataset.from_generator(
-          _generator,
-          (_get_record_type(sparse),),
-          (_get_record_shape(sparse),))
-      if sparse:
-        dataset = dataset.map(lambda x: (_to_sparse_tensor(x),))
-      return dataset
-
-    def _test_bucket_by_padding(no_padding):
-      dataset = build_dataset(sparse=no_padding)
-      dataset = dataset.apply(
-          grouping.bucket_by_sequence_length(
-              _element_length_fn,
-              boundaries,
-              batch_sizes,
-              no_padding=no_padding))
-      batch, = dataset.make_one_shot_iterator().get_next()
-
-      with self.cached_session() as sess:
-        batches = []
-        for _ in range(4):
-          batches.append(sess.run(batch))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(batch)
-      batch_sizes_val = []
-      lengths_val = []
-      for batch in batches:
-        shape = batch.dense_shape if no_padding else batch.shape
-        batch_size = shape[0]
-        length = shape[1]
-        batch_sizes_val.append(batch_size)
-        lengths_val.append(length)
-        sum_check = batch.values.sum() if no_padding else batch.sum()
-        self.assertEqual(sum_check, batch_size * length - 1)
-      self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
-      self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
-      self.assertEqual(sorted(lengths), sorted(lengths_val))
-
-    for no_padding in (True, False):
-      _test_bucket_by_padding(no_padding)
-
-  def testPadToBoundary(self):
-
-    boundaries = [10, 20, 30]
-    batch_sizes = [10, 8, 4, 2]
-    lengths = [8, 13, 25]
-
-    def element_gen():
-      # Produce 1 batch for each bucket
-      elements = []
-      for batch_size, length in zip(batch_sizes[:-1], lengths):
-        for _ in range(batch_size):
-          elements.append([1] * length)
-      random.shuffle(elements)
-      for el in elements:
-        yield (el,)
-      for _ in range(batch_sizes[-1]):
-        el = [1] * (boundaries[-1] + 5)
-        yield (el,)
-
-    element_len = lambda el: array_ops.shape(el)[0]
-    dataset = dataset_ops.Dataset.from_generator(
-        element_gen, (dtypes.int64,), ([None],)).apply(
-            grouping.bucket_by_sequence_length(
-                element_len, boundaries, batch_sizes,
-                pad_to_bucket_boundary=True))
-    batch, = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      batches = []
-      for _ in range(3):
-        batches.append(sess.run(batch))
-      with self.assertRaisesOpError("bucket_boundaries"):
-        sess.run(batch)
-    batch_sizes_val = []
-    lengths_val = []
-    for batch in batches:
-      batch_size = batch.shape[0]
-      length = batch.shape[1]
-      batch_sizes_val.append(batch_size)
-      lengths_val.append(length)
-    batch_sizes = batch_sizes[:-1]
-    self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
-    self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
-    self.assertEqual([boundary - 1 for boundary in sorted(boundaries)],
-                     sorted(lengths_val))
-
-  def testPadToBoundaryNoExtraneousPadding(self):
-
-    boundaries = [3, 7, 11]
-    batch_sizes = [2, 2, 2, 2]
-    lengths = range(1, 11)
-
-    def element_gen():
-      for length in lengths:
-        yield ([1] * length,)
-
-    element_len = lambda element: array_ops.shape(element)[0]
-    dataset = dataset_ops.Dataset.from_generator(
-        element_gen, (dtypes.int64,), ([None],)).apply(
-            grouping.bucket_by_sequence_length(
-                element_len, boundaries, batch_sizes,
-                pad_to_bucket_boundary=True))
-    batch, = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      batches = []
-      for _ in range(5):
-        batches.append(sess.run(batch))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(batch)
-
-    self.assertAllEqual(batches[0], [[1, 0],
-                                     [1, 1]])
-    self.assertAllEqual(batches[1], [[1, 1, 1, 0, 0, 0],
-                                     [1, 1, 1, 1, 0, 0]])
-    self.assertAllEqual(batches[2], [[1, 1, 1, 1, 1, 0],
-                                     [1, 1, 1, 1, 1, 1]])
-    self.assertAllEqual(batches[3], [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                                     [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])
-    self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                                     [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
-
-  def testTupleElements(self):
-
-    def build_dataset(sparse):
-      def _generator():
-        text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
-        label = [1, 2, 1, 2]
-        for x, y in zip(text, label):
-          yield (_format_record(x, sparse), y)
-      dataset = dataset_ops.Dataset.from_generator(
-          generator=_generator,
-          output_types=(_get_record_type(sparse), dtypes.int32),
-          output_shapes=(_get_record_shape(sparse),
-                         tensor_shape.TensorShape([])))
-      if sparse:
-        dataset = dataset.map(lambda x, y: (_to_sparse_tensor(x), y))
-      return dataset
-
-    def _test_tuple_elements_by_padding(no_padding):
-      dataset = build_dataset(sparse=no_padding)
-      dataset = dataset.apply(grouping.bucket_by_sequence_length(
-          element_length_func=_element_length_fn,
-          bucket_batch_sizes=[2, 2, 2],
-          bucket_boundaries=[0, 8],
-          no_padding=no_padding))
-      shapes = dataset.output_shapes
-      self.assertEqual([None, None], shapes[0].as_list())
-      self.assertEqual([None], shapes[1].as_list())
-
-    for no_padding in (True, False):
-      _test_tuple_elements_by_padding(no_padding)
-
-  def testBucketSparse(self):
-    """Tests bucketing of sparse tensors (case where `no_padding` == True).
-
-    Test runs on following dataset:
-      [
-        [0],
-        [0, 1],
-        [0, 1, 2]
-        ...
-        [0, ..., max_len - 1]
-      ]
-    Sequences are bucketed by length and batched with
-      `batch_size` < `bucket_size`.
-    """
-
-    min_len = 0
-    max_len = 100
-    batch_size = 7
-    bucket_size = 10
-
-    def _build_dataset():
-      input_data = [range(i+1) for i in range(min_len, max_len)]
-      def generator_fn():
-        for record in input_data:
-          yield _format_record(record, sparse=True)
-      dataset = dataset_ops.Dataset.from_generator(
-          generator=generator_fn,
-          output_types=_get_record_type(sparse=True))
-      dataset = dataset.map(_to_sparse_tensor)
-      return dataset
-
-    def _compute_expected_batches():
-      """Computes expected batch outputs and stores in a set."""
-      all_expected_sparse_tensors = set()
-      for bucket_start_len in range(min_len, max_len, bucket_size):
-        for batch_offset in range(0, bucket_size, batch_size):
-          batch_start_len = bucket_start_len + batch_offset
-          batch_end_len = min(batch_start_len + batch_size,
-                              bucket_start_len + bucket_size)
-          expected_indices = []
-          expected_values = []
-          for length in range(batch_start_len, batch_end_len):
-            for val in range(length + 1):
-              expected_indices.append((length - batch_start_len, val))
-              expected_values.append(val)
-          expected_sprs_tensor = (tuple(expected_indices),
-                                  tuple(expected_values))
-          all_expected_sparse_tensors.add(expected_sprs_tensor)
-      return all_expected_sparse_tensors
-
-    def _compute_batches(dataset):
-      """Computes actual batch outputs of dataset and stores in a set."""
-      batch = dataset.make_one_shot_iterator().get_next()
-      all_sparse_tensors = set()
-      with self.cached_session() as sess:
-        with self.assertRaises(errors.OutOfRangeError):
-          while True:
-            output = sess.run(batch)
-            sprs_tensor = (tuple([tuple(idx) for idx in output.indices]),
-                           tuple(output.values))
-            all_sparse_tensors.add(sprs_tensor)
-      return all_sparse_tensors
-
-    dataset = _build_dataset()
-    boundaries = range(min_len + bucket_size + 1, max_len, bucket_size)
-    dataset = dataset.apply(grouping.bucket_by_sequence_length(
-        _element_length_fn,
-        boundaries,
-        [batch_size] * (len(boundaries) + 1),
-        no_padding=True))
-    batches = _compute_batches(dataset)
-    expected_batches = _compute_expected_batches()
-    self.assertEqual(batches, expected_batches)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
similarity index 56%
rename from tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
index 7d7b842c17..adfacf1c9f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
@@ -12,440 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for prefetching_ops."""
+"""Tests for `tf.data.experimental.copy_to_device()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import threading
-
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.compat import compat
 from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import test
 
 
-class PrefetchingKernelsOpsTest(test_base.DatasetTestBase):
-
-  def setUp(self):
-    self._event = threading.Event()
-
-  def _create_ds_and_iterator(self, device0, initializable=False):
-
-    def gen():
-      for i in range(1, 10):
-        yield [float(i)]
-        if i == 6:
-          self._event.set()
-
-    with ops.device(device0):
-      ds = dataset_ops.Dataset.from_generator(gen, (dtypes.float32))
-      if initializable:
-        ds_iterator = ds.make_initializable_iterator()
-      else:
-        ds_iterator = ds.make_one_shot_iterator()
-      return (ds, ds_iterator)
-
-  def _create_ops(self, ds, ds_iterator, buffer_name, device0, device1):
-    ds_iterator_handle = ds_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _remote_fn(h):
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          h, ds.output_types, ds.output_shapes)
-      return remote_iterator.get_next()
-
-    target = constant_op.constant(device0)
-    with ops.device(device1):
-      buffer_resource_handle = prefetching_ops.function_buffering_resource(
-          f=_remote_fn,
-          output_types=[dtypes.float32],
-          target_device=target,
-          string_arg=ds_iterator_handle,
-          buffer_size=3,
-          shared_name=buffer_name)
-
-    with ops.device(device1):
-      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
-          function_buffer_resource=buffer_resource_handle,
-          output_types=[dtypes.float32])
-      reset_op = prefetching_ops.function_buffering_resource_reset(
-          function_buffer_resource=buffer_resource_handle)
-      destroy_op = resource_variable_ops.destroy_resource_op(
-          buffer_resource_handle, ignore_lookup_error=True)
-
-    return (prefetch_op, reset_op, destroy_op)
-
-  def _prefetch_fn_helper_one_shot(self, buffer_name, device0, device1):
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-
-    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=False)
-    prefetch_op, _, destroy_op = self._create_ops(ds, ds_iterator, buffer_name,
-                                                  device0, device1)
-
-    with self.test_session(config=worker_config) as sess:
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [4.0])
-      self._event.wait()
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [5.0])
-      sess.run(destroy_op)
-
-  def testSameDeviceCPU(self):
-    self._prefetch_fn_helper_one_shot("same_device_cpu",
-                                      "/job:localhost/replica:0/task:0/cpu:0",
-                                      "/job:localhost/replica:0/task:0/cpu:0")
-
-  def testDifferentDeviceCPU(self):
-    self._prefetch_fn_helper_one_shot("diff_device_cpu",
-                                      "/job:localhost/replica:0/task:0/cpu:0",
-                                      "/job:localhost/replica:0/task:0/cpu:1")
-
-  def testDifferentDeviceCPUGPU(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    self._prefetch_fn_helper_one_shot("cpu_gpu",
-                                      "/job:localhost/replica:0/task:0/cpu:0",
-                                      "/job:localhost/replica:0/task:0/gpu:0")
-
-  def testReinitialization(self):
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-
-    device0 = "/job:localhost/replica:0/task:0/cpu:0"
-    device1 = "/job:localhost/replica:0/task:0/cpu:1"
-    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
-    prefetch_op, reset_op, destroy_op = self._create_ops(
-        ds, ds_iterator, "reinit", device0, device1)
-
-    with self.test_session(config=worker_config) as sess:
-      sess.run(ds_iterator.initializer)
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [4.0])
-      self._event.wait()
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [5.0])
-      # Lets reset the function buffering resource and reinitialize the
-      # iterator. Should be able to go through this again.
-      self._event.clear()
-      sess.run(reset_op)
-      sess.run(ds_iterator.initializer)
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [4.0])
-      self._event.wait()
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [5.0])
-      sess.run(destroy_op)
-
-  def testReinitializationOutOfRange(self):
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-
-    device0 = "/job:localhost/replica:0/task:0/cpu:0"
-    device1 = "/job:localhost/replica:0/task:0/cpu:1"
-    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
-    prefetch_op, reset_op, destroy_op = self._create_ops(
-        ds, ds_iterator, "reinit", device0, device1)
-
-    with self.test_session(config=worker_config) as sess:
-      sess.run(ds_iterator.initializer)
-      for i in range(1, 10):
-        elem = sess.run(prefetch_op)
-        self.assertEqual(elem, [float(i)])
-      # Try fetching after its over twice to test out end of sequence.
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-
-      # Now reset everything and try it out again.
-      self._event.clear()
-      sess.run(reset_op)
-      sess.run(ds_iterator.initializer)
-      for i in range(1, 10):
-        elem = sess.run(prefetch_op)
-        self.assertEqual(elem, [float(i)])
-      # Try fetching after its over twice to test out end of sequence.
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-
-      sess.run(destroy_op)
-
-  def testStringsGPU(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    device0 = "/job:localhost/replica:0/task:0/cpu:0"
-    device1 = "/job:localhost/replica:0/task:0/gpu:0"
-
-    ds = dataset_ops.Dataset.from_tensor_slices(["a", "b", "c"])
-    ds_iterator = ds.make_one_shot_iterator()
-    ds_iterator_handle = ds_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _remote_fn(h):
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          h, ds.output_types, ds.output_shapes)
-      return remote_iterator.get_next()
-
-    target = constant_op.constant(device0)
-    with ops.device(device1):
-      buffer_resource_handle = prefetching_ops.function_buffering_resource(
-          f=_remote_fn,
-          output_types=[dtypes.string],
-          target_device=target,
-          string_arg=ds_iterator_handle,
-          buffer_size=3,
-          shared_name="strings")
-
-    with ops.device(device1):
-      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
-          function_buffer_resource=buffer_resource_handle,
-          output_types=[dtypes.string])
-      destroy_op = resource_variable_ops.destroy_resource_op(
-          buffer_resource_handle, ignore_lookup_error=True)
-
-    with self.cached_session() as sess:
-      self.assertEqual([b"a"], sess.run(prefetch_op))
-      self.assertEqual([b"b"], sess.run(prefetch_op))
-      self.assertEqual([b"c"], sess.run(prefetch_op))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-
-      sess.run(destroy_op)
-
-
-class PrefetchToDeviceTest(test_base.DatasetTestBase):
-
-  def testPrefetchToDevice(self):
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-    self.assertEqual([], next_element.shape)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToSameDevice(self):
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device(
-            "/job:localhost/replica:0/task:0/device:CPU:0"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-    self.assertEqual([], next_element.shape)
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchDictToDevice(self):
-    host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element["a"].dtype)
-    self.assertEqual([], next_element["a"].shape)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      for i in range(10):
-        self.assertEqual({"a": i}, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchSparseTensorsToDevice(self):
-    def make_tensor(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0, 0]], values=(i*[1]), dense_shape=[2, 2])
-    host_dataset = dataset_ops.Dataset.range(10).map(make_tensor)
-
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      for i in range(10):
-        actual = sess.run(next_element)
-        self.assertAllEqual([i], actual.values)
-        self.assertAllEqual([[0, 0]], actual.indices)
-        self.assertAllEqual([2, 2], actual.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToDeviceGpu(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/gpu:0"))
-
-    iterator = device_dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToDeviceWithReInit(self):
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_initializable_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-    self.assertEqual([], next_element.shape)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      sess.run(iterator.initializer)
-      for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToDeviceGpuWithReInit(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/gpu:0"))
-
-    iterator = device_dataset.make_initializable_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-
 class CopyToDeviceTest(test_base.DatasetTestBase):
 
   def testCopyToDevice(self):
diff --git a/tensorflow/python/data/experimental/kernel_tests/counter_test.py b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
new file mode 100644
index 0000000000..4e114ac479
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
@@ -0,0 +1,51 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.Counter`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import counter
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.framework import dtypes
+from tensorflow.python.platform import test
+
+
+class CounterTest(test_base.DatasetTestBase):
+
+  def testCounter(self):
+    """Test dataset construction using `count`."""
+    iterator = (counter.Counter(start=3, step=4)
+                .make_one_shot_iterator())
+    get_next = iterator.get_next()
+    self.assertEqual([], get_next.shape.as_list())
+    self.assertEqual(dtypes.int64, get_next.dtype)
+
+    negative_iterator = (counter.Counter(start=0, step=-1)
+                         .make_one_shot_iterator())
+    negative_get_next = negative_iterator.get_next()
+
+    with self.cached_session() as sess:
+      self.assertEqual(3, sess.run(get_next))
+      self.assertEqual(3 + 4, sess.run(get_next))
+      self.assertEqual(3 + 2 * 4, sess.run(get_next))
+
+      self.assertEqual(0, sess.run(negative_get_next))
+      self.assertEqual(-1, sess.run(negative_get_next))
+      self.assertEqual(-2, sess.run(negative_get_next))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
index 4ee1779710..fb75be1fbc 100644
--- a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for CsvDatasetOp."""
+"""Tests for `tf.data.experimental.CsvDataset`."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -44,7 +44,7 @@ from tensorflow.python.platform import test
 
 
 @test_util.run_all_in_graph_and_eager_modes
-class CsvDatasetOpTest(test_base.DatasetTestBase):
+class CsvDatasetTest(test_base.DatasetTestBase):
 
   def _setup_files(self, inputs, linebreak='\n', compression_type=None):
     filenames = []
diff --git a/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py b/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
deleted file mode 100644
index 7f435b8239..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
+++ /dev/null
@@ -1,692 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Base class for testing serializable datasets."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-import numpy as np
-
-from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.util import nest
-
-
-def remove_variants(get_next_op):
-  # TODO(b/72408568): Remove this once session.run can get
-  # variant tensors.
-  """Remove variants from a nest structure, so sess.run will execute."""
-
-  def _remove_variant(x):
-    if isinstance(x, ops.Tensor) and x.dtype == dtypes.variant:
-      return ()
-    else:
-      return x
-
-  return nest.map_structure(_remove_variant, get_next_op)
-
-
-class DatasetSerializationTestBase(test.TestCase):
-  """Base class for testing serializable datasets."""
-
-  def tearDown(self):
-    self._delete_ckpt()
-
-  # TODO(b/72657739): Remove sparse_tensor argument, which is to test the
-  # (deprecated) saveable `SparseTensorSliceDataset`, once the API
-  # `from_sparse_tensor_slices()`and related tests are deleted.
-  def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False):
-    """Runs the core tests.
-
-    Args:
-      ds_fn1: 0-argument function that returns a Dataset.
-      ds_fn2: 0-argument function that returns a Dataset different from
-        ds_fn1. If None, verify_restore_in_modified_graph test is not run.
-      num_outputs: Total number of outputs expected from this Dataset.
-      sparse_tensors: Whether dataset is built from SparseTensor(s).
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_unused_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_fully_used_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_exhausted_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_init_before_restore(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_multiple_breaks(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_reset_restored_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_restore_in_empty_graph(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    if ds_fn2:
-      self.verify_restore_in_modified_graph(
-          ds_fn1, ds_fn2, num_outputs, sparse_tensors=sparse_tensors)
-
-  def verify_unused_iterator(self,
-                             ds_fn,
-                             num_outputs,
-                             sparse_tensors=False,
-                             verify_exhausted=True):
-    """Verifies that saving and restoring an unused iterator works.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn, [0],
-        num_outputs,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-  def verify_fully_used_iterator(self, ds_fn, num_outputs,
-                                 sparse_tensors=False):
-    """Verifies that saving and restoring a fully used iterator works.
-
-    Note that this only checks saving and restoring an iterator from which
-    `num_outputs` items have been produced but does not check for an
-    exhausted iterator, i.e., one from which an OutOfRange error has been
-    returned.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-
-    Raises:
-      AssertionError if test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn, [num_outputs], num_outputs, sparse_tensors=sparse_tensors)
-
-  def verify_exhausted_iterator(self, ds_fn, num_outputs, sparse_tensors=False):
-    """Verifies that saving and restoring an exhausted iterator works.
-
-    An exhausted iterator is one which has returned an OutOfRange error.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.gen_outputs(
-        ds_fn, [],
-        num_outputs,
-        verify_exhausted=True,
-        sparse_tensors=sparse_tensors)
-    actual = self.gen_outputs(
-        ds_fn, [],
-        0,
-        ckpt_saved=True,
-        verify_exhausted=True,
-        sparse_tensors=sparse_tensors)
-    self.assertEqual(len(actual), 0)
-
-  def verify_init_before_restore(self,
-                                 ds_fn,
-                                 num_outputs,
-                                 sparse_tensors=False,
-                                 verify_exhausted=True):
-    """Verifies that restoring into an already initialized iterator works.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn,
-        self.gen_break_points(num_outputs),
-        num_outputs,
-        init_before_restore=True,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-  def verify_multiple_breaks(self,
-                             ds_fn,
-                             num_outputs,
-                             num_breaks=10,
-                             sparse_tensors=False,
-                             verify_exhausted=True):
-    """Attempts to save/restore at multiple break points.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      num_breaks: The number of break points. These are uniformly spread in
-        [0, num_outputs] both inclusive.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn,
-        self.gen_break_points(num_outputs, num_breaks),
-        num_outputs,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-  def verify_reset_restored_iterator(self,
-                                     ds_fn,
-                                     num_outputs,
-                                     break_point=None,
-                                     sparse_tensors=False,
-                                     verify_exhausted=True):
-    """Attempts to re-initialize a restored iterator.
-
-    This is useful when restoring a training checkpoint during validation.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    break_point = num_outputs // 2 if not break_point else break_point
-
-    # Collect ground truth containing all outputs.
-    expected = self.gen_outputs(
-        ds_fn, [],
-        num_outputs,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    # Skip some items and save checkpoint.
-    self.gen_outputs(
-        ds_fn, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-
-    actual = []
-    # Restore from checkpoint and then run init_op.
-    with ops.Graph().as_default() as g:
-      saver = self._import_meta_graph()
-      init_op, get_next_op = self._get_iterator_ops_from_collection(
-          ds_fn, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._restore(saver, sess)
-        self._initialize(init_op, sess)
-        for _ in range(num_outputs):
-          actual.append(sess.run(get_next_op))
-        if verify_exhausted:
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-    self.match(expected, actual)
-
-  def verify_restore_in_modified_graph(self,
-                                       ds_fn1,
-                                       ds_fn2,
-                                       num_outputs,
-                                       break_point=None,
-                                       sparse_tensors=False,
-                                       verify_exhausted=True):
-    """Attempts to restore an iterator in a modified graph.
-
-    Builds an input pipeline using ds_fn1, runs it for `break_point` steps
-    and saves a checkpoint. Then builds a new graph using ds_fn2, restores
-    the checkpoint from ds_fn1 and verifies that the restore is successful.
-
-    Args:
-      ds_fn1: See `run_core_tests`.
-      ds_fn2: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    break_point = num_outputs // 2 if not break_point else break_point
-
-    # Skip `break_point` items and store the remaining produced from ds_fn1
-    # in `expected`.
-    self.gen_outputs(
-        ds_fn1, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-    expected = self.gen_outputs(
-        ds_fn1, [],
-        num_outputs - break_point,
-        ckpt_saved=True,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    # Generate `break_point` items from ds_fn1 and save checkpoint.
-    self.gen_outputs(
-        ds_fn1, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-
-    actual = []
-    # Build graph for ds_fn2 but load checkpoint for ds_fn1.
-    with ops.Graph().as_default() as g:
-      _, get_next_op, saver = self._build_graph(
-          ds_fn2, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        if verify_exhausted:
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-    self.match(expected, actual)
-
-  def verify_restore_in_empty_graph(self,
-                                    ds_fn,
-                                    num_outputs,
-                                    break_point=None,
-                                    sparse_tensors=False,
-                                    verify_exhausted=True):
-    """Attempts to restore an iterator in an empty graph.
-
-    Builds an input pipeline using ds_fn, runs it for `break_point` steps
-    and saves a checkpoint. Then builds a new empty graph, restores
-    the checkpoint from ds_fn and verifies that the restore is successful.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    break_point = num_outputs // 2 if not break_point else break_point
-
-    # Skip `break_point` items and store the remaining produced from ds_fn
-    # in `expected`.
-    self.gen_outputs(
-        ds_fn, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-    expected = self.gen_outputs(
-        ds_fn, [],
-        num_outputs - break_point,
-        ckpt_saved=True,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    # Generate `break_point` items from ds_fn and save checkpoint.
-    self.gen_outputs(
-        ds_fn, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-
-    actual = []
-    # Build an empty graph but load checkpoint for ds_fn.
-    with ops.Graph().as_default() as g:
-      get_next_op, saver = self._build_empty_graph(
-          ds_fn, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        if verify_exhausted:
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-    self.match(expected, actual)
-
-  def verify_error_on_save(self,
-                           ds_fn,
-                           num_outputs,
-                           error,
-                           break_point=None,
-                           sparse_tensors=False):
-    """Attempts to save a non-saveable iterator.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      error: Declared error when trying to save iterator.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-
-    break_point = num_outputs // 2 if not break_point else break_point
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(
-          ds_fn, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._initialize(init_op, sess)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        with self.assertRaises(error):
-          self._save(sess, saver)
-
-  def verify_run_with_breaks(self,
-                             ds_fn,
-                             break_points,
-                             num_outputs,
-                             init_before_restore=False,
-                             sparse_tensors=False,
-                             verify_exhausted=True):
-    """Verifies that ds_fn() produces the same outputs with and without breaks.
-
-    1. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
-       *without* stopping at break points.
-    2. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
-       with stopping at break points.
-
-    Deep matches outputs from 1 and 2.
-
-    Args:
-      ds_fn: See `gen_outputs`.
-      break_points: See `gen_outputs`.
-      num_outputs: See `gen_outputs`.
-      init_before_restore: See `gen_outputs`.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    expected = self.gen_outputs(
-        ds_fn, [],
-        num_outputs,
-        init_before_restore=init_before_restore,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    actual = self.gen_outputs(
-        ds_fn,
-        break_points,
-        num_outputs,
-        init_before_restore=init_before_restore,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    self.match(expected, actual)
-
-  def gen_outputs(self,
-                  ds_fn,
-                  break_points,
-                  num_outputs,
-                  ckpt_saved=False,
-                  init_before_restore=False,
-                  sparse_tensors=False,
-                  verify_exhausted=True,
-                  save_checkpoint_at_end=True):
-    """Generates elements from input dataset while stopping at break points.
-
-    Produces `num_outputs` outputs and saves the state of the iterator in the
-    Saver checkpoint.
-
-    Args:
-      ds_fn: 0-argument function that returns the dataset.
-      break_points: A list of integers. For each `break_point` in
-        `break_points`, we produce outputs till `break_point` number of items
-        have been produced and then checkpoint the state. The current graph
-        and session are destroyed and a new graph and session are used to
-        produce outputs till next checkpoint or till `num_outputs` elements
-        have been produced. `break_point` must be <= `num_outputs`.
-      num_outputs: The total number of outputs to produce from the iterator.
-      ckpt_saved: Whether a checkpoint already exists. If False, we build the
-        graph from ds_fn.
-      init_before_restore: Whether init should be called before saver.restore.
-        This is just so that we can verify that restoring an already initialized
-        iterator works.
-      sparse_tensors:  Whether dataset is built from SparseTensor(s).
-      verify_exhausted: Whether to verify that the iterator has been exhausted
-        after producing `num_outputs` elements.
-      save_checkpoint_at_end: Whether to save a checkpoint after producing all
-        outputs. If False, checkpoints are saved each break point but not at the
-        end. Note that checkpoints overwrite each other so there is always only
-        a single checkpoint available. Defaults to True.
-
-    Returns:
-      A list of `num_outputs` items.
-    """
-    outputs = []
-
-    def get_ops():
-      if ckpt_saved:
-        saver = self._import_meta_graph()
-        init_op, get_next_op = self._get_iterator_ops_from_collection(
-            ds_fn, sparse_tensors=sparse_tensors)
-      else:
-        init_op, get_next_op, saver = self._build_graph(
-            ds_fn, sparse_tensors=sparse_tensors)
-      return init_op, get_next_op, saver
-
-    for i in range(len(break_points) + 1):
-      with ops.Graph().as_default() as g:
-        init_op, get_next_op, saver = get_ops()
-        get_next_op = remove_variants(get_next_op)
-        with self.session(graph=g) as sess:
-          if ckpt_saved:
-            if init_before_restore:
-              self._initialize(init_op, sess)
-            self._restore(saver, sess)
-          else:
-            self._initialize(init_op, sess)
-          start = break_points[i - 1] if i > 0 else 0
-          end = break_points[i] if i < len(break_points) else num_outputs
-          num_iters = end - start
-          for _ in range(num_iters):
-            outputs.append(sess.run(get_next_op))
-          if i == len(break_points) and verify_exhausted:
-            with self.assertRaises(errors.OutOfRangeError):
-              sess.run(get_next_op)
-          if save_checkpoint_at_end or i < len(break_points):
-            self._save(sess, saver)
-            ckpt_saved = True
-
-    return outputs
-
-  def match(self, expected, actual):
-    """Matches nested structures.
-
-    Recursively matches shape and values of `expected` and `actual`.
-    Handles scalars, numpy arrays and other python sequence containers
-    e.g. list, dict.
-
-    Args:
-      expected: Nested structure 1.
-      actual: Nested structure 2.
-
-    Raises:
-      AssertionError if matching fails.
-    """
-    if isinstance(expected, np.ndarray):
-      expected = expected.tolist()
-    if isinstance(actual, np.ndarray):
-      actual = actual.tolist()
-    self.assertEqual(type(expected), type(actual))
-
-    if nest.is_sequence(expected):
-      self.assertEqual(len(expected), len(actual))
-      if isinstance(expected, dict):
-        for key1, key2 in zip(sorted(expected), sorted(actual)):
-          self.assertEqual(key1, key2)
-          self.match(expected[key1], actual[key2])
-      else:
-        for item1, item2 in zip(expected, actual):
-          self.match(item1, item2)
-    else:
-      self.assertEqual(expected, actual)
-
-  def does_not_match(self, expected, actual):
-    with self.assertRaises(AssertionError):
-      self.match(expected, actual)
-
-  def gen_break_points(self, num_outputs, num_samples=10):
-    """Generates `num_samples` breaks points in [0, num_outputs]."""
-    return np.linspace(0, num_outputs, num_samples, dtype=int)
-
-  def _build_graph(self, ds_fn, sparse_tensors=False):
-    iterator = ds_fn().make_initializable_iterator()
-
-    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    init_op = iterator.initializer
-    if sparse_tensors:
-      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
-    else:
-      get_next = iterator.get_next()
-    self._add_iterator_ops_to_collection(init_op, get_next, ds_fn,
-                                         sparse_tensors)
-    saver = saver_lib.Saver(allow_empty=True)
-    return init_op, get_next, saver
-
-  def _build_empty_graph(self, ds_fn, sparse_tensors=False):
-    iterator = iterator_ops.Iterator.from_structure(
-        self._get_output_types(ds_fn),
-        output_shapes=self._get_output_shapes(ds_fn),
-        output_classes=self._get_output_classes(ds_fn))
-    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    if sparse_tensors:
-      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
-    else:
-      get_next = iterator.get_next()
-    saver = saver_lib.Saver(allow_empty=True)
-    return get_next, saver
-
-  def _add_iterator_ops_to_collection(self,
-                                      init_op,
-                                      get_next,
-                                      ds_fn,
-                                      sparse_tensors=False):
-    ops.add_to_collection("iterator_ops", init_op)
-    # `get_next` may be a tuple e.g. in TensorSliceDataset. Since Collections
-    # do not support tuples we flatten the tensors and restore the shape in
-    # `_get_iterator_ops_from_collection`.
-    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
-      ops.add_to_collection("iterator_ops", get_next.indices)
-      ops.add_to_collection("iterator_ops", get_next.values)
-      ops.add_to_collection("iterator_ops", get_next.dense_shape)
-      return
-
-    get_next_list = nest.flatten(get_next)
-    for i, output_class in enumerate(
-        nest.flatten(self._get_output_classes(ds_fn))):
-      if output_class is sparse_tensor.SparseTensor:
-        ops.add_to_collection("iterator_ops", get_next_list[i].indices)
-        ops.add_to_collection("iterator_ops", get_next_list[i].values)
-        ops.add_to_collection("iterator_ops", get_next_list[i].dense_shape)
-      else:
-        ops.add_to_collection("iterator_ops", get_next_list[i])
-
-  def _get_iterator_ops_from_collection(self, ds_fn, sparse_tensors=False):
-    all_ops = ops.get_collection("iterator_ops")
-    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
-      init_op, indices, values, dense_shape = all_ops
-      return init_op, sparse_tensor.SparseTensor(indices, values, dense_shape)
-    get_next_list = []
-    i = 1
-    for output_class in nest.flatten(self._get_output_classes(ds_fn)):
-      if output_class is sparse_tensor.SparseTensor:
-        indices, values, dense_shape = all_ops[i:i + 3]
-        i += 3
-        get_next_list.append(
-            sparse_tensor.SparseTensor(indices, values, dense_shape))
-      else:
-        get_next_list.append(all_ops[i])
-        i += 1
-    return all_ops[0], nest.pack_sequence_as(
-        self._get_output_types(ds_fn), get_next_list)
-
-  def _get_output_types(self, ds_fn):
-    with ops.Graph().as_default():
-      return ds_fn().output_types
-
-  def _get_output_shapes(self, ds_fn):
-    with ops.Graph().as_default():
-      return ds_fn().output_shapes
-
-  def _get_output_classes(self, ds_fn):
-    with ops.Graph().as_default():
-      return ds_fn().output_classes
-
-  def _ckpt_path(self):
-    return os.path.join(self.get_temp_dir(), "iterator")
-
-  def _latest_ckpt(self):
-    return checkpoint_management.latest_checkpoint(self.get_temp_dir())
-
-  def _save(self, sess, saver):
-    saver.save(sess, self._ckpt_path())
-
-  def _restore(self, saver, sess):
-    sess.run(lookup_ops.tables_initializer())
-    saver.restore(sess, self._latest_ckpt())
-
-  def _initialize(self, init_op, sess):
-    sess.run(variables.global_variables_initializer())
-    sess.run(lookup_ops.tables_initializer())
-    sess.run(init_op)
-
-  def _import_meta_graph(self):
-    meta_file_path = self._ckpt_path() + ".meta"
-    return saver_lib.import_meta_graph(meta_file_path)
-
-  def _delete_ckpt(self):
-    # Remove all checkpoint files.
-    prefix = self._ckpt_path()
-    pattern = prefix + "*"
-    files = gfile.Glob(pattern)
-    map(gfile.Remove, files)
diff --git a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
new file mode 100644
index 0000000000..73be6cbcca
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
@@ -0,0 +1,124 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.dense_to_sparse_batch()."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class DenseToSparseBatchTest(test_base.DatasetTestBase):
+
+  def testDenseToSparseBatchDataset(self):
+    components = np.random.randint(12, size=(100,)).astype(np.int32)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.fill([x], x)).apply(
+            batching.dense_to_sparse_batch(4, [12]))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      for start in range(0, len(components), 4):
+        results = sess.run(get_next)
+        self.assertAllEqual([[i, j]
+                             for i, c in enumerate(components[start:start + 4])
+                             for j in range(c)], results.indices)
+        self.assertAllEqual(
+            [c for c in components[start:start + 4] for _ in range(c)],
+            results.values)
+        self.assertAllEqual([min(4,
+                                 len(components) - start), 12],
+                            results.dense_shape)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testDenseToSparseBatchDatasetWithUnknownShape(self):
+    components = np.random.randint(5, size=(40,)).astype(np.int32)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.fill([x, x], x)).apply(
+            batching.dense_to_sparse_batch(
+                4, [5, None])).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      for start in range(0, len(components), 4):
+        results = sess.run(get_next)
+        self.assertAllEqual([[i, j, z]
+                             for i, c in enumerate(components[start:start + 4])
+                             for j in range(c)
+                             for z in range(c)], results.indices)
+        self.assertAllEqual([
+            c
+            for c in components[start:start + 4] for _ in range(c)
+            for _ in range(c)
+        ], results.values)
+        self.assertAllEqual([
+            min(4,
+                len(components) - start), 5,
+            np.max(components[start:start + 4])
+        ], results.dense_shape)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testDenseToSparseBatchDatasetWithInvalidShape(self):
+    input_tensor = array_ops.constant([[1]])
+    with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
+      dataset_ops.Dataset.from_tensors(input_tensor).apply(
+          batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator()
+
+  def testDenseToSparseBatchDatasetShapeErrors(self):
+    input_tensor = array_ops.placeholder(dtypes.int32)
+    iterator = (
+        dataset_ops.Dataset.from_tensors(input_tensor).apply(
+            batching.dense_to_sparse_batch(4, [12]))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      # Initialize with an input tensor of incompatible rank.
+      sess.run(init_op, feed_dict={input_tensor: [[1]]})
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "incompatible with the row shape"):
+        sess.run(get_next)
+
+      # Initialize with an input tensor that is larger than `row_shape`.
+      sess.run(init_op, feed_dict={input_tensor: range(13)})
+      with self.assertRaisesRegexp(errors.DataLossError,
+                                   "larger than the row shape"):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
similarity index 68%
rename from tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
index 22412c3965..e54235d9f8 100644
--- a/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
@@ -12,12 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Test RangeDataset."""
+"""Tests for `tf.data.experimental.enumerate_dataset()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.data.experimental.ops import enumerate_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
@@ -28,7 +27,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.platform import test
 
 
-class RangeDatasetTest(test_base.DatasetTestBase):
+class EnumerateDatasetTest(test_base.DatasetTestBase):
 
   def testEnumerateDataset(self):
     components = (["a", "b"], [1, 2], [37.0, 38])
@@ -52,27 +51,6 @@ class RangeDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testCounter(self):
-    """Test dataset construction using `count`."""
-    iterator = (counter.Counter(start=3, step=4)
-                .make_one_shot_iterator())
-    get_next = iterator.get_next()
-    self.assertEqual([], get_next.shape.as_list())
-    self.assertEqual(dtypes.int64, get_next.dtype)
-
-    negative_iterator = (counter.Counter(start=0, step=-1)
-                         .make_one_shot_iterator())
-    negative_get_next = negative_iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.assertEqual(3, sess.run(get_next))
-      self.assertEqual(3 + 4, sess.run(get_next))
-      self.assertEqual(3 + 2 * 4, sess.run(get_next))
-
-      self.assertEqual(0, sess.run(negative_get_next))
-      self.assertEqual(-1, sess.run(negative_get_next))
-      self.assertEqual(-2, sess.run(negative_get_next))
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py b/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
new file mode 100644
index 0000000000..399fd284f4
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
@@ -0,0 +1,247 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the private `FunctionBufferingResource` used in prefetching."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import threading
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.platform import test
+
+
+class FunctionBufferingResourceTest(test_base.DatasetTestBase):
+
+  def setUp(self):
+    self._event = threading.Event()
+
+  def _create_ds_and_iterator(self, device0, initializable=False):
+
+    def gen():
+      for i in range(1, 10):
+        yield [float(i)]
+        if i == 6:
+          self._event.set()
+
+    with ops.device(device0):
+      ds = dataset_ops.Dataset.from_generator(gen, (dtypes.float32))
+      if initializable:
+        ds_iterator = ds.make_initializable_iterator()
+      else:
+        ds_iterator = ds.make_one_shot_iterator()
+      return (ds, ds_iterator)
+
+  def _create_ops(self, ds, ds_iterator, buffer_name, device0, device1):
+    ds_iterator_handle = ds_iterator.string_handle()
+
+    @function.Defun(dtypes.string)
+    def _remote_fn(h):
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          h, ds.output_types, ds.output_shapes)
+      return remote_iterator.get_next()
+
+    target = constant_op.constant(device0)
+    with ops.device(device1):
+      buffer_resource_handle = prefetching_ops.function_buffering_resource(
+          f=_remote_fn,
+          output_types=[dtypes.float32],
+          target_device=target,
+          string_arg=ds_iterator_handle,
+          buffer_size=3,
+          shared_name=buffer_name)
+
+    with ops.device(device1):
+      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
+          function_buffer_resource=buffer_resource_handle,
+          output_types=[dtypes.float32])
+      reset_op = prefetching_ops.function_buffering_resource_reset(
+          function_buffer_resource=buffer_resource_handle)
+      destroy_op = resource_variable_ops.destroy_resource_op(
+          buffer_resource_handle, ignore_lookup_error=True)
+
+    return (prefetch_op, reset_op, destroy_op)
+
+  def _prefetch_fn_helper_one_shot(self, buffer_name, device0, device1):
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+
+    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=False)
+    prefetch_op, _, destroy_op = self._create_ops(ds, ds_iterator, buffer_name,
+                                                  device0, device1)
+
+    with self.test_session(config=worker_config) as sess:
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [1.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [2.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [3.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [4.0])
+      self._event.wait()
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [5.0])
+      sess.run(destroy_op)
+
+  def testSameDeviceCPU(self):
+    self._prefetch_fn_helper_one_shot("same_device_cpu",
+                                      "/job:localhost/replica:0/task:0/cpu:0",
+                                      "/job:localhost/replica:0/task:0/cpu:0")
+
+  def testDifferentDeviceCPU(self):
+    self._prefetch_fn_helper_one_shot("diff_device_cpu",
+                                      "/job:localhost/replica:0/task:0/cpu:0",
+                                      "/job:localhost/replica:0/task:0/cpu:1")
+
+  def testDifferentDeviceCPUGPU(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    self._prefetch_fn_helper_one_shot("cpu_gpu",
+                                      "/job:localhost/replica:0/task:0/cpu:0",
+                                      "/job:localhost/replica:0/task:0/gpu:0")
+
+  def testReinitialization(self):
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+
+    device0 = "/job:localhost/replica:0/task:0/cpu:0"
+    device1 = "/job:localhost/replica:0/task:0/cpu:1"
+    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
+    prefetch_op, reset_op, destroy_op = self._create_ops(
+        ds, ds_iterator, "reinit", device0, device1)
+
+    with self.test_session(config=worker_config) as sess:
+      sess.run(ds_iterator.initializer)
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [1.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [2.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [3.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [4.0])
+      self._event.wait()
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [5.0])
+      # Lets reset the function buffering resource and reinitialize the
+      # iterator. Should be able to go through this again.
+      self._event.clear()
+      sess.run(reset_op)
+      sess.run(ds_iterator.initializer)
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [1.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [2.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [3.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [4.0])
+      self._event.wait()
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [5.0])
+      sess.run(destroy_op)
+
+  def testReinitializationOutOfRange(self):
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+
+    device0 = "/job:localhost/replica:0/task:0/cpu:0"
+    device1 = "/job:localhost/replica:0/task:0/cpu:1"
+    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
+    prefetch_op, reset_op, destroy_op = self._create_ops(
+        ds, ds_iterator, "reinit", device0, device1)
+
+    with self.test_session(config=worker_config) as sess:
+      sess.run(ds_iterator.initializer)
+      for i in range(1, 10):
+        elem = sess.run(prefetch_op)
+        self.assertEqual(elem, [float(i)])
+      # Try fetching after its over twice to test out end of sequence.
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+
+      # Now reset everything and try it out again.
+      self._event.clear()
+      sess.run(reset_op)
+      sess.run(ds_iterator.initializer)
+      for i in range(1, 10):
+        elem = sess.run(prefetch_op)
+        self.assertEqual(elem, [float(i)])
+      # Try fetching after its over twice to test out end of sequence.
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+
+      sess.run(destroy_op)
+
+  def testStringsGPU(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    device0 = "/job:localhost/replica:0/task:0/cpu:0"
+    device1 = "/job:localhost/replica:0/task:0/gpu:0"
+
+    ds = dataset_ops.Dataset.from_tensor_slices(["a", "b", "c"])
+    ds_iterator = ds.make_one_shot_iterator()
+    ds_iterator_handle = ds_iterator.string_handle()
+
+    @function.Defun(dtypes.string)
+    def _remote_fn(h):
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          h, ds.output_types, ds.output_shapes)
+      return remote_iterator.get_next()
+
+    target = constant_op.constant(device0)
+    with ops.device(device1):
+      buffer_resource_handle = prefetching_ops.function_buffering_resource(
+          f=_remote_fn,
+          output_types=[dtypes.string],
+          target_device=target,
+          string_arg=ds_iterator_handle,
+          buffer_size=3,
+          shared_name="strings")
+
+    with ops.device(device1):
+      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
+          function_buffer_resource=buffer_resource_handle,
+          output_types=[dtypes.string])
+      destroy_op = resource_variable_ops.destroy_resource_op(
+          buffer_resource_handle, ignore_lookup_error=True)
+
+    with self.cached_session() as sess:
+      self.assertEqual([b"a"], sess.run(prefetch_op))
+      self.assertEqual([b"b"], sess.run(prefetch_op))
+      self.assertEqual([b"c"], sess.run(prefetch_op))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+
+      sess.run(destroy_op)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
new file mode 100644
index 0000000000..9030328593
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
@@ -0,0 +1,199 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.group_by_reducer()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class GroupByReducerTest(test_base.DatasetTestBase):
+
+  def checkResults(self, dataset, shapes, values):
+    self.assertEqual(shapes, dataset.output_shapes)
+    get_next = dataset.make_one_shot_iterator().get_next()
+    with self.cached_session() as sess:
+      for expected in values:
+        got = sess.run(get_next)
+        self.assertEqual(got, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testSum(self):
+    reducer = grouping.Reducer(
+        init_func=lambda _: np.int64(0),
+        reduce_func=lambda x, y: x + y,
+        finalize_func=lambda x: x)
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.range(2 * i).apply(
+          grouping.group_by_reducer(lambda x: x % 2, reducer))
+      self.checkResults(
+          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
+
+  def testAverage(self):
+
+    def reduce_fn(x, y):
+      return (x[0] * x[1] + math_ops.cast(y, dtypes.float32)) / (
+          x[1] + 1), x[1] + 1
+
+    reducer = grouping.Reducer(
+        init_func=lambda _: (0.0, 0.0),
+        reduce_func=reduce_fn,
+        finalize_func=lambda x, _: x)
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.range(2 * i).apply(
+          grouping.group_by_reducer(
+              lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer))
+      self.checkResults(
+          dataset, shapes=tensor_shape.scalar(), values=[i - 1, i])
+
+  def testConcat(self):
+    components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray)
+    reducer = grouping.Reducer(
+        init_func=lambda x: "",
+        reduce_func=lambda x, y: x + y[0],
+        finalize_func=lambda x: x)
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.from_tensor_slices(components),
+           dataset_ops.Dataset.range(2 * i))).apply(
+               grouping.group_by_reducer(lambda x, y: y % 2, reducer))
+      self.checkResults(
+          dataset,
+          shapes=tensor_shape.scalar(),
+          values=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]])
+
+  def testSparseSum(self):
+    def _sparse(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1], dtype=np.int64)),
+          dense_shape=np.array([1, 1]))
+
+    reducer = grouping.Reducer(
+        init_func=lambda _: _sparse(np.int64(0)),
+        reduce_func=lambda x, y: _sparse(x.values[0] + y.values[0]),
+        finalize_func=lambda x: x.values[0])
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply(
+          grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer))
+      self.checkResults(
+          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
+
+  def testChangingStateShape(self):
+
+    def reduce_fn(x, _):
+      # Statically known rank, but dynamic length.
+      larger_dim = array_ops.concat([x[0], x[0]], 0)
+      # Statically unknown rank.
+      larger_rank = array_ops.expand_dims(x[1], 0)
+      return larger_dim, larger_rank
+
+    reducer = grouping.Reducer(
+        init_func=lambda x: ([0], 1),
+        reduce_func=reduce_fn,
+        finalize_func=lambda x, y: (x, y))
+
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.from_tensors(np.int64(0)).repeat(i).apply(
+          grouping.group_by_reducer(lambda x: x, reducer))
+      self.assertEqual([None], dataset.output_shapes[0].as_list())
+      self.assertIs(None, dataset.output_shapes[1].ndims)
+      iterator = dataset.make_one_shot_iterator()
+      get_next = iterator.get_next()
+      with self.cached_session() as sess:
+        x, y = sess.run(get_next)
+        self.assertAllEqual([0] * (2**i), x)
+        self.assertAllEqual(np.array(1, ndmin=i), y)
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
+  def testTypeMismatch(self):
+    reducer = grouping.Reducer(
+        init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32),
+        reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64),
+        finalize_func=lambda x: x)
+
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaisesRegexp(
+        TypeError,
+        "The element types for the new state must match the initial state."):
+      dataset.apply(
+          grouping.group_by_reducer(lambda _: np.int64(0), reducer))
+
+  # TODO(b/78665031): Remove once non-scalar keys are supported.
+  def testInvalidKeyShape(self):
+    reducer = grouping.Reducer(
+        init_func=lambda x: np.int64(0),
+        reduce_func=lambda x, y: x + y,
+        finalize_func=lambda x: x)
+
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaisesRegexp(
+        ValueError, "`key_func` must return a single tf.int64 tensor."):
+      dataset.apply(
+          grouping.group_by_reducer(lambda _: np.int64((0, 0)), reducer))
+
+  # TODO(b/78665031): Remove once non-int64 keys are supported.
+  def testInvalidKeyType(self):
+    reducer = grouping.Reducer(
+        init_func=lambda x: np.int64(0),
+        reduce_func=lambda x, y: x + y,
+        finalize_func=lambda x: x)
+
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaisesRegexp(
+        ValueError, "`key_func` must return a single tf.int64 tensor."):
+      dataset.apply(
+          grouping.group_by_reducer(lambda _: "wrong", reducer))
+
+  def testTuple(self):
+    def init_fn(_):
+      return np.array([], dtype=np.int64), np.int64(0)
+
+    def reduce_fn(state, value):
+      s1, s2 = state
+      v1, v2 = value
+      return array_ops.concat([s1, [v1]], 0), s2 + v2
+
+    def finalize_fn(s1, s2):
+      return s1, s2
+
+    reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
+    dataset = dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply(
+            grouping.group_by_reducer(lambda x, y: np.int64(0), reducer))
+    get_next = dataset.make_one_shot_iterator().get_next()
+    with self.cached_session() as sess:
+      x, y = sess.run(get_next)
+      self.assertAllEqual(x, np.asarray([x for x in range(10)]))
+      self.assertEqual(y, 45)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
new file mode 100644
index 0000000000..557d56e8b9
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
@@ -0,0 +1,367 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.group_by_window()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+
+
+# NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
+# Currently, they use a constant batch size, though should be made to use a
+# different batch size per key.
+class GroupByWindowTest(test_base.DatasetTestBase):
+
+  def _dynamicPad(self, bucket, window, window_size):
+    # TODO(mrry): To match `tf.contrib.training.bucket()`, implement a
+    # generic form of padded_batch that pads every component
+    # dynamically and does not rely on static shape information about
+    # the arguments.
+    return dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.from_tensors(bucket),
+         window.padded_batch(
+             32, (tensor_shape.TensorShape([]), tensor_shape.TensorShape(
+                 [None]), tensor_shape.TensorShape([3])))))
+
+  def testSingleBucket(self):
+
+    def _map_fn(v):
+      return (v, array_ops.fill([v], v),
+              array_ops.fill([3], string_ops.as_string(v)))
+
+    input_dataset = (
+        dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn))
+
+    bucketed_dataset = input_dataset.apply(
+        grouping.group_by_window(
+            lambda x, y, z: 0,
+            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
+
+    iterator = bucketed_dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      which_bucket, bucketed_values = sess.run(get_next)
+
+      self.assertEqual(0, which_bucket)
+
+      expected_scalar_int = np.arange(32, dtype=np.int64)
+      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
+      for i in range(32):
+        expected_unk_int64[i, :i] = i
+      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T
+
+      self.assertAllEqual(expected_scalar_int, bucketed_values[0])
+      self.assertAllEqual(expected_unk_int64, bucketed_values[1])
+      self.assertAllEqual(expected_vec3_str, bucketed_values[2])
+
+  def testEvenOddBuckets(self):
+
+    def _map_fn(v):
+      return (v, array_ops.fill([v], v),
+              array_ops.fill([3], string_ops.as_string(v)))
+
+    input_dataset = (
+        dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn))
+
+    bucketed_dataset = input_dataset.apply(
+        grouping.group_by_window(
+            lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
+            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
+
+    iterator = bucketed_dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      # Get two minibatches (one containing even values, one containing odds)
+      which_bucket_even, bucketed_values_even = sess.run(get_next)
+      which_bucket_odd, bucketed_values_odd = sess.run(get_next)
+
+      # Count number of bucket_tensors.
+      self.assertEqual(3, len(bucketed_values_even))
+      self.assertEqual(3, len(bucketed_values_odd))
+
+      # Ensure bucket 0 was used for all minibatch entries.
+      self.assertAllEqual(0, which_bucket_even)
+      self.assertAllEqual(1, which_bucket_odd)
+
+      # Test the first bucket outputted, the events starting at 0
+      expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64)
+      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
+      for i in range(0, 32):
+        expected_unk_int64[i, :2 * i] = 2 * i
+        expected_vec3_str = np.vstack(
+            3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T
+
+      self.assertAllEqual(expected_scalar_int, bucketed_values_even[0])
+      self.assertAllEqual(expected_unk_int64, bucketed_values_even[1])
+      self.assertAllEqual(expected_vec3_str, bucketed_values_even[2])
+
+      # Test the second bucket outputted, the odds starting at 1
+      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64)
+      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
+      for i in range(0, 32):
+        expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
+        expected_vec3_str = np.vstack(
+            3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T
+
+      self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0])
+      self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
+      self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
+
+  def testEvenOddBucketsFilterOutAllOdd(self):
+
+    def _map_fn(v):
+      return {
+          "x": v,
+          "y": array_ops.fill([v], v),
+          "z": array_ops.fill([3], string_ops.as_string(v))
+      }
+
+    def _dynamic_pad_fn(bucket, window, _):
+      return dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.from_tensors(bucket),
+           window.padded_batch(
+               32, {
+                   "x": tensor_shape.TensorShape([]),
+                   "y": tensor_shape.TensorShape([None]),
+                   "z": tensor_shape.TensorShape([3])
+               })))
+
+    input_dataset = (
+        dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn)
+        .filter(lambda d: math_ops.equal(d["x"] % 2, 0)))
+
+    bucketed_dataset = input_dataset.apply(
+        grouping.group_by_window(
+            lambda d: math_ops.cast(d["x"] % 2, dtypes.int64),
+            lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32))
+
+    iterator = bucketed_dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
+      which_bucket0, bucketed_values_even0 = sess.run(get_next)
+      which_bucket1, bucketed_values_even1 = sess.run(get_next)
+
+      # Ensure that bucket 1 was completely filtered out
+      self.assertAllEqual(0, which_bucket0)
+      self.assertAllEqual(0, which_bucket1)
+      self.assertAllEqual(
+          np.arange(0, 64, 2, dtype=np.int64), bucketed_values_even0["x"])
+      self.assertAllEqual(
+          np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
+
+  def testDynamicWindowSize(self):
+    components = np.arange(100).astype(np.int64)
+
+    # Key fn: even/odd
+    # Reduce fn: batches of 5
+    # Window size fn: even=5, odd=10
+
+    def window_size_func(key):
+      window_sizes = constant_op.constant([5, 10], dtype=dtypes.int64)
+      return window_sizes[key]
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
+        grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(20),
+                                 None, window_size_func))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        batches = 0
+        while True:
+          result = sess.run(get_next)
+          is_even = all(x % 2 == 0 for x in result)
+          is_odd = all(x % 2 == 1 for x in result)
+          self.assertTrue(is_even or is_odd)
+          expected_batch_size = 5 if is_even else 10
+          self.assertEqual(expected_batch_size, result.shape[0])
+          batches += 1
+
+      self.assertEqual(batches, 15)
+
+  def testSimple(self):
+    components = np.random.randint(100, size=(200,)).astype(np.int64)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x)
+        .apply(
+            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
+                                     4)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      counts = []
+      with self.assertRaises(errors.OutOfRangeError):
+        while True:
+          result = sess.run(get_next)
+          self.assertTrue(
+              all(x % 2 == 0
+                  for x in result) or all(x % 2 == 1)
+              for x in result)
+          counts.append(result.shape[0])
+
+      self.assertEqual(len(components), sum(counts))
+      num_full_batches = len([c for c in counts if c == 4])
+      self.assertGreaterEqual(num_full_batches, 24)
+      self.assertTrue(all(c == 4 for c in counts[:num_full_batches]))
+
+  def testImmediateOutput(self):
+    components = np.array(
+        [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply(
+            grouping.group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4),
+                                     4)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      # The input is infinite, so this test demonstrates that:
+      # 1. We produce output without having to consume the entire input,
+      # 2. Different buckets can produce output at different rates, and
+      # 3. For deterministic input, the output is deterministic.
+      for _ in range(3):
+        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
+        self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
+        self.assertAllEqual([2, 2, 2, 2], sess.run(get_next))
+        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
+
+  def testSmallGroups(self):
+    components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).apply(
+            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
+                                     4)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
+      self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
+      # The small outputs at the end are deterministically produced in key
+      # order.
+      self.assertAllEqual([0, 0, 0], sess.run(get_next))
+      self.assertAllEqual([1], sess.run(get_next))
+
+  def testEmpty(self):
+    iterator = (
+        dataset_ops.Dataset.range(4).apply(
+            grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          "Window size must be greater than zero, but got 0."):
+        print(sess.run(get_next))
+
+  def testReduceFuncError(self):
+    components = np.random.randint(100, size=(200,)).astype(np.int64)
+
+    def reduce_func(_, xs):
+      # Introduce an incorrect padded shape that cannot (currently) be
+      # detected at graph construction time.
+      return xs.padded_batch(
+          4,
+          padded_shapes=(tensor_shape.TensorShape([]),
+                         constant_op.constant([5], dtype=dtypes.int64) * -1))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply(
+            grouping.group_by_window(lambda x, _: x % 2, reduce_func,
+                                     32)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(get_next)
+
+  def testConsumeWindowDatasetMoreThanOnce(self):
+    components = np.random.randint(50, size=(200,)).astype(np.int64)
+
+    def reduce_func(key, window):
+      # Apply two different kinds of padding to the input: tight
+      # padding, and quantized (to a multiple of 10) padding.
+      return dataset_ops.Dataset.zip((
+          window.padded_batch(
+              4, padded_shapes=tensor_shape.TensorShape([None])),
+          window.padded_batch(
+              4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])),
+      ))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x))
+        .apply(grouping.group_by_window(
+            lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64),
+            reduce_func, 4))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      counts = []
+      with self.assertRaises(errors.OutOfRangeError):
+        while True:
+          tight_result, multiple_of_10_result = sess.run(get_next)
+          self.assertEqual(0, multiple_of_10_result.shape[1] % 10)
+          self.assertAllEqual(tight_result,
+                              multiple_of_10_result[:, :tight_result.shape[1]])
+          counts.append(tight_result.shape[0])
+      self.assertEqual(len(components), sum(counts))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
new file mode 100644
index 0000000000..c0ec1486ab
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
@@ -0,0 +1,115 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.ignore_errors()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import io_ops
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+_NUMPY_RANDOM_SEED = 42
+
+
+class IgnoreErrorsTest(test_base.DatasetTestBase):
+
+  def testMapIgnoreError(self):
+    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.check_numerics(x, "message")).apply(
+            error_ops.ignore_errors()))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for x in [1., 2., 3., 5.]:
+        self.assertEqual(x, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testParallelMapIgnoreError(self):
+    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(components).map(
+            lambda x: array_ops.check_numerics(x, "message"),
+            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for x in [1., 2., 3., 5.]:
+        self.assertEqual(x, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testReadFileIgnoreError(self):
+
+    def write_string_to_file(value, filename):
+      with open(filename, "w") as f:
+        f.write(value)
+
+    filenames = [
+        os.path.join(self.get_temp_dir(), "file_%d.txt" % i) for i in range(5)
+    ]
+    for filename in filenames:
+      write_string_to_file(filename, filename)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(filenames).map(
+            io_ops.read_file,
+            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      # All of the files are present.
+      sess.run(init_op)
+      for filename in filenames:
+        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Delete one of the files.
+      os.remove(filenames[0])
+
+      # Attempting to read filenames[0] will fail, but ignore_errors()
+      # will catch the error.
+      sess.run(init_op)
+      for filename in filenames[1:]:
+        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
new file mode 100644
index 0000000000..5ee94e14dc
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
@@ -0,0 +1,239 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.make_batched_features_dataset()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.ops import readers as core_readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import test
+
+
+class MakeBatchedFeaturesDatasetTest(
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
+
+  def testRead(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 10]:
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from file 0.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames[0],
+                label_key="label",
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(
+                sess,
+                batch_size,
+                0,
+                num_epochs=num_epochs,
+                label_key_provided=True)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess, label_key_provided=True)
+
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from file 1.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames[1],
+                label_key="label",
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(
+                sess,
+                batch_size,
+                1,
+                num_epochs=num_epochs,
+                label_key_provided=True)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess, label_key_provided=True)
+
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from both files.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames,
+                label_key="label",
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(
+                sess,
+                batch_size,
+                num_epochs=num_epochs,
+                label_key_provided=True)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess, label_key_provided=True)
+
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from both files.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames,
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(sess, batch_size, num_epochs=num_epochs)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess)
+
+  def testReadWithEquivalentDataset(self):
+    features = {
+        "file": parsing_ops.FixedLenFeature([], dtypes.int64),
+        "record": parsing_ops.FixedLenFeature([], dtypes.int64),
+    }
+    dataset = (
+        core_readers.TFRecordDataset(self.test_filenames)
+        .map(lambda x: parsing_ops.parse_single_example(x, features))
+        .repeat(10).batch(2))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
+          range(self._num_files), 2, 10):
+        actual_batch = sess.run(next_element)
+        self.assertAllEqual(file_batch, actual_batch["file"])
+        self.assertAllEqual(record_batch, actual_batch["record"])
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testReadWithFusedShuffleRepeatDataset(self):
+    num_epochs = 5
+    total_records = num_epochs * self._num_records
+    for batch_size in [1, 2]:
+      # Test that shuffling with same seed produces the same result.
+      with ops.Graph().as_default() as g:
+        with self.session(graph=g) as sess:
+          outputs1 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=5).make_one_shot_iterator().get_next()
+          outputs2 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=5).make_one_shot_iterator().get_next()
+          for _ in range(total_records // batch_size):
+            batch1 = self._run_actual_batch(outputs1, sess)
+            batch2 = self._run_actual_batch(outputs2, sess)
+            for i in range(len(batch1)):
+              self.assertAllEqual(batch1[i], batch2[i])
+
+      # Test that shuffling with different seeds produces a different order.
+      with ops.Graph().as_default() as g:
+        with self.session(graph=g) as sess:
+          outputs1 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=5).make_one_shot_iterator().get_next()
+          outputs2 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=15).make_one_shot_iterator().get_next()
+          all_equal = True
+          for _ in range(total_records // batch_size):
+            batch1 = self._run_actual_batch(outputs1, sess)
+            batch2 = self._run_actual_batch(outputs2, sess)
+            for i in range(len(batch1)):
+              all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
+          self.assertFalse(all_equal)
+
+  def testParallelReadersAndParsers(self):
+    num_epochs = 5
+    for batch_size in [1, 2]:
+      for reader_num_threads in [2, 4]:
+        for parser_num_threads in [2, 4]:
+          with ops.Graph().as_default() as g:
+            with self.session(graph=g) as sess:
+              self.outputs = self.make_batch_feature(
+                  filenames=self.test_filenames,
+                  label_key="label",
+                  num_epochs=num_epochs,
+                  batch_size=batch_size,
+                  reader_num_threads=reader_num_threads,
+                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
+                  ).get_next()
+              self.verify_records(
+                  sess,
+                  batch_size,
+                  num_epochs=num_epochs,
+                  label_key_provided=True,
+                  interleave_cycle_length=reader_num_threads)
+              with self.assertRaises(errors.OutOfRangeError):
+                self._next_actual_batch(sess, label_key_provided=True)
+
+          with ops.Graph().as_default() as g:
+            with self.session(graph=g) as sess:
+              self.outputs = self.make_batch_feature(
+                  filenames=self.test_filenames,
+                  num_epochs=num_epochs,
+                  batch_size=batch_size,
+                  reader_num_threads=reader_num_threads,
+                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
+                  ).get_next()
+              self.verify_records(
+                  sess,
+                  batch_size,
+                  num_epochs=num_epochs,
+                  interleave_cycle_length=reader_num_threads)
+              with self.assertRaises(errors.OutOfRangeError):
+                self._next_actual_batch(sess)
+
+  def testDropFinalBatch(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 10]:
+        with ops.Graph().as_default():
+          # Basic test: read from file 0.
+          outputs = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              label_key="label",
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              drop_final_batch=True).make_one_shot_iterator().get_next()
+          for tensor in nest.flatten(outputs):
+            if isinstance(tensor, ops.Tensor):  # Guard against SparseTensor.
+              self.assertEqual(tensor.shape[0], batch_size)
+
+  def testIndefiniteRepeatShapeInference(self):
+    dataset = self.make_batch_feature(
+        filenames=self.test_filenames[0],
+        label_key="label",
+        num_epochs=None,
+        batch_size=32)
+    for shape, clazz in zip(nest.flatten(dataset.output_shapes),
+                            nest.flatten(dataset.output_classes)):
+      if issubclass(clazz, ops.Tensor):
+        self.assertEqual(32, shape[0])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
similarity index 57%
rename from tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
index a02f4bd14f..e4bf089184 100644
--- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.make_csv_dataset()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -23,226 +23,16 @@ import zlib
 
 import numpy as np
 
-from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
 
 
-class ReadBatchFeaturesTest(
-    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase):
-
-  def testRead(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 10]:
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from file 0.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames[0],
-                label_key="label",
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                0,
-                num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from file 1.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames[1],
-                label_key="label",
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                1,
-                num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from both files.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames,
-                label_key="label",
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from both files.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames,
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(sess, batch_size, num_epochs=num_epochs)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess)
-
-  def testReadWithEquivalentDataset(self):
-    features = {
-        "file": parsing_ops.FixedLenFeature([], dtypes.int64),
-        "record": parsing_ops.FixedLenFeature([], dtypes.int64),
-    }
-    dataset = (
-        core_readers.TFRecordDataset(self.test_filenames)
-        .map(lambda x: parsing_ops.parse_single_example(x, features))
-        .repeat(10).batch(2))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
-          range(self._num_files), 2, 10):
-        actual_batch = sess.run(next_element)
-        self.assertAllEqual(file_batch, actual_batch["file"])
-        self.assertAllEqual(record_batch, actual_batch["record"])
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testReadWithFusedShuffleRepeatDataset(self):
-    num_epochs = 5
-    total_records = num_epochs * self._num_records
-    for batch_size in [1, 2]:
-      # Test that shuffling with same seed produces the same result.
-      with ops.Graph().as_default() as g:
-        with self.session(graph=g) as sess:
-          outputs1 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=5).make_one_shot_iterator().get_next()
-          outputs2 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=5).make_one_shot_iterator().get_next()
-          for _ in range(total_records // batch_size):
-            batch1 = self._run_actual_batch(outputs1, sess)
-            batch2 = self._run_actual_batch(outputs2, sess)
-            for i in range(len(batch1)):
-              self.assertAllEqual(batch1[i], batch2[i])
-
-      # Test that shuffling with different seeds produces a different order.
-      with ops.Graph().as_default() as g:
-        with self.session(graph=g) as sess:
-          outputs1 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=5).make_one_shot_iterator().get_next()
-          outputs2 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=15).make_one_shot_iterator().get_next()
-          all_equal = True
-          for _ in range(total_records // batch_size):
-            batch1 = self._run_actual_batch(outputs1, sess)
-            batch2 = self._run_actual_batch(outputs2, sess)
-            for i in range(len(batch1)):
-              all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
-          self.assertFalse(all_equal)
-
-  def testParallelReadersAndParsers(self):
-    num_epochs = 5
-    for batch_size in [1, 2]:
-      for reader_num_threads in [2, 4]:
-        for parser_num_threads in [2, 4]:
-          with ops.Graph().as_default() as g:
-            with self.session(graph=g) as sess:
-              self.outputs = self.make_batch_feature(
-                  filenames=self.test_filenames,
-                  label_key="label",
-                  num_epochs=num_epochs,
-                  batch_size=batch_size,
-                  reader_num_threads=reader_num_threads,
-                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
-                  ).get_next()
-              self.verify_records(
-                  sess,
-                  batch_size,
-                  num_epochs=num_epochs,
-                  label_key_provided=True,
-                  interleave_cycle_length=reader_num_threads)
-              with self.assertRaises(errors.OutOfRangeError):
-                self._next_actual_batch(sess, label_key_provided=True)
-
-          with ops.Graph().as_default() as g:
-            with self.session(graph=g) as sess:
-              self.outputs = self.make_batch_feature(
-                  filenames=self.test_filenames,
-                  num_epochs=num_epochs,
-                  batch_size=batch_size,
-                  reader_num_threads=reader_num_threads,
-                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
-                  ).get_next()
-              self.verify_records(
-                  sess,
-                  batch_size,
-                  num_epochs=num_epochs,
-                  interleave_cycle_length=reader_num_threads)
-              with self.assertRaises(errors.OutOfRangeError):
-                self._next_actual_batch(sess)
-
-  def testDropFinalBatch(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 10]:
-        with ops.Graph().as_default():
-          # Basic test: read from file 0.
-          outputs = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              label_key="label",
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              drop_final_batch=True).make_one_shot_iterator().get_next()
-          for tensor in nest.flatten(outputs):
-            if isinstance(tensor, ops.Tensor):  # Guard against SparseTensor.
-              self.assertEqual(tensor.shape[0], batch_size)
-
-  def testIndefiniteRepeatShapeInference(self):
-    dataset = self.make_batch_feature(
-        filenames=self.test_filenames[0],
-        label_key="label",
-        num_epochs=None,
-        batch_size=32)
-    for shape, clazz in zip(nest.flatten(dataset.output_shapes),
-                            nest.flatten(dataset.output_classes)):
-      if issubclass(clazz, ops.Tensor):
-        self.assertEqual(32, shape[0])
-
-
 class MakeCsvDatasetTest(test_base.DatasetTestBase):
 
   def _make_csv_dataset(self, filenames, batch_size, num_epochs=1, **kwargs):
@@ -866,218 +656,5 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
       self.assertEqual(32, shape[0])
 
 
-class MakeTFRecordDatasetTest(
-    reader_dataset_ops_test_base.TFRecordDatasetTestBase):
-
-  def _interleave(self, iterators, cycle_length):
-    pending_iterators = iterators
-    open_iterators = []
-    num_open = 0
-    for i in range(cycle_length):
-      if pending_iterators:
-        open_iterators.append(pending_iterators.pop(0))
-        num_open += 1
-
-    while num_open:
-      for i in range(min(cycle_length, len(open_iterators))):
-        if open_iterators[i] is None:
-          continue
-        try:
-          yield next(open_iterators[i])
-        except StopIteration:
-          if pending_iterators:
-            open_iterators[i] = pending_iterators.pop(0)
-          else:
-            open_iterators[i] = None
-            num_open -= 1
-
-  def _next_expected_batch(self,
-                           file_indices,
-                           batch_size,
-                           num_epochs,
-                           cycle_length,
-                           drop_final_batch,
-                           use_parser_fn):
-
-    def _next_record(file_indices):
-      for j in file_indices:
-        for i in range(self._num_records):
-          yield j, i
-
-    def _next_record_interleaved(file_indices, cycle_length):
-      return self._interleave([_next_record([i]) for i in file_indices],
-                              cycle_length)
-
-    record_batch = []
-    batch_index = 0
-    for _ in range(num_epochs):
-      if cycle_length == 1:
-        next_records = _next_record(file_indices)
-      else:
-        next_records = _next_record_interleaved(file_indices, cycle_length)
-      for f, r in next_records:
-        record = self._record(f, r)
-        if use_parser_fn:
-          record = record[1:]
-        record_batch.append(record)
-        batch_index += 1
-        if len(record_batch) == batch_size:
-          yield record_batch
-          record_batch = []
-          batch_index = 0
-    if record_batch and not drop_final_batch:
-      yield record_batch
-
-  def _verify_records(self,
-                      sess,
-                      outputs,
-                      batch_size,
-                      file_index,
-                      num_epochs,
-                      interleave_cycle_length,
-                      drop_final_batch,
-                      use_parser_fn):
-    if file_index is not None:
-      file_indices = [file_index]
-    else:
-      file_indices = range(self._num_files)
-
-    for expected_batch in self._next_expected_batch(
-        file_indices, batch_size, num_epochs, interleave_cycle_length,
-        drop_final_batch, use_parser_fn):
-      actual_batch = sess.run(outputs)
-      self.assertAllEqual(expected_batch, actual_batch)
-
-  def _read_test(self, batch_size, num_epochs, file_index=None,
-                 num_parallel_reads=1, drop_final_batch=False, parser_fn=False):
-    if file_index is None:
-      file_pattern = self.test_filenames
-    else:
-      file_pattern = self.test_filenames[file_index]
-
-    if parser_fn:
-      fn = lambda x: string_ops.substr(x, 1, 999)
-    else:
-      fn = None
-
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        outputs = readers.make_tf_record_dataset(
-            file_pattern=file_pattern,
-            num_epochs=num_epochs,
-            batch_size=batch_size,
-            parser_fn=fn,
-            num_parallel_reads=num_parallel_reads,
-            drop_final_batch=drop_final_batch,
-            shuffle=False).make_one_shot_iterator().get_next()
-        self._verify_records(
-            sess, outputs, batch_size, file_index, num_epochs=num_epochs,
-            interleave_cycle_length=num_parallel_reads,
-            drop_final_batch=drop_final_batch, use_parser_fn=parser_fn)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(outputs)
-
-  def testRead(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 3]:
-        # Basic test: read from file 0.
-        self._read_test(batch_size, num_epochs, 0)
-
-        # Basic test: read from file 1.
-        self._read_test(batch_size, num_epochs, 1)
-
-        # Basic test: read from both files.
-        self._read_test(batch_size, num_epochs)
-
-        # Basic test: read from both files, with parallel reads.
-        self._read_test(batch_size, num_epochs, num_parallel_reads=8)
-
-  def testDropFinalBatch(self):
-    for batch_size in [1, 2, 10]:
-      for num_epochs in [1, 3]:
-        # Read from file 0.
-        self._read_test(batch_size, num_epochs, 0, drop_final_batch=True)
-
-        # Read from both files.
-        self._read_test(batch_size, num_epochs, drop_final_batch=True)
-
-        # Read from both files, with parallel reads.
-        self._read_test(batch_size, num_epochs, num_parallel_reads=8,
-                        drop_final_batch=True)
-
-  def testParserFn(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 3]:
-        for drop_final_batch in [False, True]:
-          self._read_test(batch_size, num_epochs, parser_fn=True,
-                          drop_final_batch=drop_final_batch)
-          self._read_test(batch_size, num_epochs, num_parallel_reads=8,
-                          parser_fn=True, drop_final_batch=drop_final_batch)
-
-  def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1,
-                    seed=None):
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.make_tf_record_dataset(
-            file_pattern=self.test_filenames,
-            num_epochs=num_epochs,
-            batch_size=batch_size,
-            num_parallel_reads=num_parallel_reads,
-            shuffle=True,
-            shuffle_seed=seed)
-        iterator = dataset.make_initializable_iterator()
-        next_element = iterator.get_next()
-
-        sess.run(iterator.initializer)
-        first_batches = []
-        try:
-          while True:
-            first_batches.append(sess.run(next_element))
-        except errors.OutOfRangeError:
-          pass
-
-        sess.run(iterator.initializer)
-        second_batches = []
-        try:
-          while True:
-            second_batches.append(sess.run(next_element))
-        except errors.OutOfRangeError:
-          pass
-
-        self.assertEqual(len(first_batches), len(second_batches))
-        if seed is not None:
-          # if you set a seed, should get the same results
-          for i in range(len(first_batches)):
-            self.assertAllEqual(first_batches[i], second_batches[i])
-
-        expected = []
-        for f in range(self._num_files):
-          for r in range(self._num_records):
-            expected.extend([self._record(f, r)] * num_epochs)
-
-        for batches in (first_batches, second_batches):
-          actual = []
-          for b in batches:
-            actual.extend(b)
-          self.assertAllEqual(sorted(expected), sorted(actual))
-
-  def testShuffle(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 3]:
-        for num_parallel_reads in [1, 2]:
-          # Test that all expected elements are produced
-          self._shuffle_test(batch_size, num_epochs, num_parallel_reads)
-          # Test that elements are produced in a consistent order if
-          # you specify a seed.
-          self._shuffle_test(batch_size, num_epochs, num_parallel_reads,
-                             seed=21345)
-
-  def testIndefiniteRepeatShapeInference(self):
-    dataset = readers.make_tf_record_dataset(
-        file_pattern=self.test_filenames, num_epochs=None, batch_size=32)
-    for shape in nest.flatten(dataset.output_shapes):
-      self.assertEqual(32, shape[0])
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
new file mode 100644
index 0000000000..657cf3c00e
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
@@ -0,0 +1,243 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.make_tf_record_dataset()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.ops import readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+
+
+class MakeTFRecordDatasetTest(
+    reader_dataset_ops_test_base.TFRecordDatasetTestBase):
+
+  def _interleave(self, iterators, cycle_length):
+    pending_iterators = iterators
+    open_iterators = []
+    num_open = 0
+    for i in range(cycle_length):
+      if pending_iterators:
+        open_iterators.append(pending_iterators.pop(0))
+        num_open += 1
+
+    while num_open:
+      for i in range(min(cycle_length, len(open_iterators))):
+        if open_iterators[i] is None:
+          continue
+        try:
+          yield next(open_iterators[i])
+        except StopIteration:
+          if pending_iterators:
+            open_iterators[i] = pending_iterators.pop(0)
+          else:
+            open_iterators[i] = None
+            num_open -= 1
+
+  def _next_expected_batch(self,
+                           file_indices,
+                           batch_size,
+                           num_epochs,
+                           cycle_length,
+                           drop_final_batch,
+                           use_parser_fn):
+
+    def _next_record(file_indices):
+      for j in file_indices:
+        for i in range(self._num_records):
+          yield j, i
+
+    def _next_record_interleaved(file_indices, cycle_length):
+      return self._interleave([_next_record([i]) for i in file_indices],
+                              cycle_length)
+
+    record_batch = []
+    batch_index = 0
+    for _ in range(num_epochs):
+      if cycle_length == 1:
+        next_records = _next_record(file_indices)
+      else:
+        next_records = _next_record_interleaved(file_indices, cycle_length)
+      for f, r in next_records:
+        record = self._record(f, r)
+        if use_parser_fn:
+          record = record[1:]
+        record_batch.append(record)
+        batch_index += 1
+        if len(record_batch) == batch_size:
+          yield record_batch
+          record_batch = []
+          batch_index = 0
+    if record_batch and not drop_final_batch:
+      yield record_batch
+
+  def _verify_records(self,
+                      sess,
+                      outputs,
+                      batch_size,
+                      file_index,
+                      num_epochs,
+                      interleave_cycle_length,
+                      drop_final_batch,
+                      use_parser_fn):
+    if file_index is not None:
+      file_indices = [file_index]
+    else:
+      file_indices = range(self._num_files)
+
+    for expected_batch in self._next_expected_batch(
+        file_indices, batch_size, num_epochs, interleave_cycle_length,
+        drop_final_batch, use_parser_fn):
+      actual_batch = sess.run(outputs)
+      self.assertAllEqual(expected_batch, actual_batch)
+
+  def _read_test(self, batch_size, num_epochs, file_index=None,
+                 num_parallel_reads=1, drop_final_batch=False, parser_fn=False):
+    if file_index is None:
+      file_pattern = self.test_filenames
+    else:
+      file_pattern = self.test_filenames[file_index]
+
+    if parser_fn:
+      fn = lambda x: string_ops.substr(x, 1, 999)
+    else:
+      fn = None
+
+    with ops.Graph().as_default() as g:
+      with self.session(graph=g) as sess:
+        outputs = readers.make_tf_record_dataset(
+            file_pattern=file_pattern,
+            num_epochs=num_epochs,
+            batch_size=batch_size,
+            parser_fn=fn,
+            num_parallel_reads=num_parallel_reads,
+            drop_final_batch=drop_final_batch,
+            shuffle=False).make_one_shot_iterator().get_next()
+        self._verify_records(
+            sess, outputs, batch_size, file_index, num_epochs=num_epochs,
+            interleave_cycle_length=num_parallel_reads,
+            drop_final_batch=drop_final_batch, use_parser_fn=parser_fn)
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(outputs)
+
+  def testRead(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 3]:
+        # Basic test: read from file 0.
+        self._read_test(batch_size, num_epochs, 0)
+
+        # Basic test: read from file 1.
+        self._read_test(batch_size, num_epochs, 1)
+
+        # Basic test: read from both files.
+        self._read_test(batch_size, num_epochs)
+
+        # Basic test: read from both files, with parallel reads.
+        self._read_test(batch_size, num_epochs, num_parallel_reads=8)
+
+  def testDropFinalBatch(self):
+    for batch_size in [1, 2, 10]:
+      for num_epochs in [1, 3]:
+        # Read from file 0.
+        self._read_test(batch_size, num_epochs, 0, drop_final_batch=True)
+
+        # Read from both files.
+        self._read_test(batch_size, num_epochs, drop_final_batch=True)
+
+        # Read from both files, with parallel reads.
+        self._read_test(batch_size, num_epochs, num_parallel_reads=8,
+                        drop_final_batch=True)
+
+  def testParserFn(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 3]:
+        for drop_final_batch in [False, True]:
+          self._read_test(batch_size, num_epochs, parser_fn=True,
+                          drop_final_batch=drop_final_batch)
+          self._read_test(batch_size, num_epochs, num_parallel_reads=8,
+                          parser_fn=True, drop_final_batch=drop_final_batch)
+
+  def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1,
+                    seed=None):
+    with ops.Graph().as_default() as g:
+      with self.session(graph=g) as sess:
+        dataset = readers.make_tf_record_dataset(
+            file_pattern=self.test_filenames,
+            num_epochs=num_epochs,
+            batch_size=batch_size,
+            num_parallel_reads=num_parallel_reads,
+            shuffle=True,
+            shuffle_seed=seed)
+        iterator = dataset.make_initializable_iterator()
+        next_element = iterator.get_next()
+
+        sess.run(iterator.initializer)
+        first_batches = []
+        try:
+          while True:
+            first_batches.append(sess.run(next_element))
+        except errors.OutOfRangeError:
+          pass
+
+        sess.run(iterator.initializer)
+        second_batches = []
+        try:
+          while True:
+            second_batches.append(sess.run(next_element))
+        except errors.OutOfRangeError:
+          pass
+
+        self.assertEqual(len(first_batches), len(second_batches))
+        if seed is not None:
+          # if you set a seed, should get the same results
+          for i in range(len(first_batches)):
+            self.assertAllEqual(first_batches[i], second_batches[i])
+
+        expected = []
+        for f in range(self._num_files):
+          for r in range(self._num_records):
+            expected.extend([self._record(f, r)] * num_epochs)
+
+        for batches in (first_batches, second_batches):
+          actual = []
+          for b in batches:
+            actual.extend(b)
+          self.assertAllEqual(sorted(expected), sorted(actual))
+
+  def testShuffle(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 3]:
+        for num_parallel_reads in [1, 2]:
+          # Test that all expected elements are produced
+          self._shuffle_test(batch_size, num_epochs, num_parallel_reads)
+          # Test that elements are produced in a consistent order if
+          # you specify a seed.
+          self._shuffle_test(batch_size, num_epochs, num_parallel_reads,
+                             seed=21345)
+
+  def testIndefiniteRepeatShapeInference(self):
+    dataset = readers.make_tf_record_dataset(
+        file_pattern=self.test_filenames, num_epochs=None, batch_size=32)
+    for shape in nest.flatten(dataset.output_shapes):
+      self.assertEqual(32, shape[0])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
new file mode 100644
index 0000000000..afd0fc3abf
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -0,0 +1,337 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.map_and_batch()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import script_ops
+from tensorflow.python.platform import test
+
+
+class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("Default", None, None),
+      ("SequentialCalls", 1, None),
+      ("ParallelCalls", 2, None),
+      ("ParallelBatches", None, 10),
+  )
+  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches):
+    """Test a dataset that maps a TF function across its input elements."""
+    # The pipeline is TensorSliceDataset ->
+    # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size).
+    components = (np.arange(7),
+                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
+                  np.array(37.0) * np.arange(7))
+
+    count = array_ops.placeholder(dtypes.int64, shape=[])
+    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
+
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
+            batching.map_and_batch(
+                map_func=_map_fn,
+                batch_size=batch_size,
+                num_parallel_calls=num_parallel_calls,
+                num_parallel_batches=num_parallel_batches))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual([[None] + list(c.shape[1:]) for c in components],
+                     [t.shape.as_list() for t in get_next])
+
+    with self.cached_session() as sess:
+      # Batch of a finite input, where the batch_size divides the
+      # total number of elements.
+      sess.run(init_op, feed_dict={count: 28, batch_size: 14})
+      num_batches = (28 * 7) // 14
+      for i in range(num_batches):
+        result = sess.run(get_next)
+        for component, result_component in zip(components, result):
+          for j in range(14):
+            self.assertAllEqual(component[(i * 14 + j) % 7]**2,
+                                result_component[j])
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Batch of a finite input, where the batch_size does not
+      # divide the total number of elements.
+      sess.run(init_op, feed_dict={count: 14, batch_size: 8})
+
+      # We expect (num_batches - 1) full-sized batches.
+      num_batches = int(math.ceil((14 * 7) / 8))
+      for i in range(num_batches - 1):
+        result = sess.run(get_next)
+        for component, result_component in zip(components, result):
+          for j in range(8):
+            self.assertAllEqual(component[(i * 8 + j) % 7]**2,
+                                result_component[j])
+      result = sess.run(get_next)
+      for component, result_component in zip(components, result):
+        for j in range((14 * 7) % 8):
+          self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
+                              result_component[j])
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Batch of an empty input should fail straight away.
+      sess.run(init_op, feed_dict={count: 0, batch_size: 8})
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Empty batch should be an initialization time error.
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(init_op, feed_dict={count: 14, batch_size: 0})
+
+  @parameterized.named_parameters(
+      ("Even", False),
+      ("Uneven", True),
+  )
+  def testMapAndBatchPartialBatch(self, drop_remainder):
+    iterator = (
+        dataset_ops.Dataset.range(10).apply(
+            batching.map_and_batch(
+                lambda x: array_ops.reshape(x * x, [1]),
+                batch_size=4,
+                drop_remainder=drop_remainder)).make_one_shot_iterator())
+    if drop_remainder:
+      self.assertEqual([4, 1], iterator.output_shapes.as_list())
+    else:
+      self.assertEqual([None, 1], iterator.output_shapes.as_list())
+    next_element = iterator.get_next()
+    with self.cached_session() as sess:
+      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
+      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
+      if not drop_remainder:
+        self.assertAllEqual([[64], [81]], sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testMapAndBatchYieldsPartialBatch(self):
+    iterator = (dataset_ops.Dataset.range(10)
+                .apply(batching.map_and_batch(
+                    lambda x: array_ops.reshape(x * x, [1]), 4))
+                .make_one_shot_iterator())
+    self.assertEqual([None, 1], iterator.output_shapes.as_list())
+    next_element = iterator.get_next()
+    with self.cached_session() as sess:
+      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
+      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
+      self.assertAllEqual([[64], [81]], sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testMapAndBatchParallelGetNext(self):
+    iterator = (dataset_ops.Dataset.range(50000)
+                .apply(batching.map_and_batch(lambda x: x, batch_size=100))
+                .make_one_shot_iterator())
+    elements = []
+    for _ in range(100):
+      elements.append(iterator.get_next())
+    with self.cached_session() as sess:
+      for i in range(5):
+        got = sess.run(elements)
+        got.sort(key=lambda x: x[0])
+        expected = []
+        for j in range(100):
+          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+        self.assertAllEqual(got, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elements)
+
+  def testMapAndBatchParallelGetNextDropRemainder(self):
+    iterator = (
+        dataset_ops.Dataset.range(49999).apply(
+            batching.map_and_batch(
+                lambda x: x, batch_size=100, drop_remainder=True))
+        .make_one_shot_iterator())
+    elements = []
+    for _ in range(100):
+      elements.append(iterator.get_next())
+    with self.cached_session() as sess:
+      for i in range(4):
+        got = sess.run(elements)
+        got.sort(key=lambda x: x[0])
+        expected = []
+        for j in range(100):
+          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+        self.assertAllEqual(got, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elements)
+
+  def testMapAndBatchSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(10).apply(
+        batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for i in range(2):
+        actual = sess.run(get_next)
+        expected = sparse_tensor.SparseTensorValue(
+            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
+            dense_shape=[5, 1])
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testMapAndBatchFails(self):
+    """Test a dataset that maps a TF function across its input elements."""
+    dataset = dataset_ops.Dataset.from_tensors(
+        array_ops.check_numerics(
+            constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
+    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
+    iterator = (
+        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    with self.cached_session() as sess:
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
+        sess.run(init_op, feed_dict={batch_size: 14})
+
+  def testMapAndBatchShapeMismatch(self):
+    """Test a dataset that maps a TF function across its input elements."""
+
+    def generator():
+      yield [1]
+      yield [2]
+      yield [3]
+      yield [[4, 5, 6]]
+
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.int32)
+    batch_size = 4
+    iterator = (
+        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "number of elements does not match"):
+        sess.run(get_next)
+
+  def testMapAndBatchImplicitDispose(self):
+    # Tests whether a map and batch dataset will be cleaned up correctly when
+    # the pipeline does not run it until exhaustion.
+    # The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
+    # MapAndBatchDataset(f=square_3, batch_size=100).
+    components = (np.arange(1000),
+                  np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis],
+                  np.array(37.0) * np.arange(1000))
+
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
+        1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
+    dataset = dataset.prefetch(5)
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(3):
+        sess.run(get_next)
+
+  @parameterized.named_parameters(
+      ("1", 0),
+      ("2", 5),
+      ("3", 10),
+      ("4", 90),
+      ("5", 95),
+      ("6", 99),
+  )
+  def testMapAndBatchOutOfRangeError(self, threshold):
+
+    def raising_py_fn(i):
+      if i >= threshold:
+        raise StopIteration()
+      else:
+        return i
+
+    iterator = (
+        dataset_ops.Dataset.range(100).apply(
+            batching.map_and_batch(
+                lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
+                batch_size=10)).make_one_shot_iterator())
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(threshold // 10):
+        self.assertAllEqual([i * 10 + j for j in range(10)], sess.run(get_next))
+      if threshold % 10 != 0:
+        self.assertAllEqual(
+            [threshold // 10 * 10 + j for j in range(threshold % 10)],
+            sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  @parameterized.named_parameters(
+      ("1", False, dtypes.bool),
+      ("2", -42, dtypes.int8),
+      ("3", -42, dtypes.int16),
+      ("4", -42, dtypes.int32),
+      ("5", -42, dtypes.int64),
+      ("6", 42, dtypes.uint8),
+      ("7", 42, dtypes.uint16),
+      ("8", 42.0, dtypes.float16),
+      ("9", 42.0, dtypes.float32),
+      ("10", 42.0, dtypes.float64),
+      ("11", b"hello", dtypes.string),
+  )
+  def testMapAndBatchTypes(self, element, dtype):
+    def gen():
+      yield element
+
+    dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply(
+        batching.map_and_batch(lambda x: x, batch_size=10))
+
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(10):
+        self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
similarity index 94%
rename from tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
index 4432dcb05a..5e419a9b2f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline statistics gathering ops."""
+"""Tests for the private `override_threadpool()` transformation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -32,8 +32,8 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-class OverrideThreadpoolDatasetTest(test_base.DatasetTestBase,
-                                    parameterized.TestCase):
+class OverrideThreadpoolTest(test_base.DatasetTestBase,
+                             parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("1", 1, None),
diff --git a/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
index 560902caad..90ac250df7 100644
--- a/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.parallel_interleave()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -37,7 +37,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-class ParallelInterleaveDatasetTest(test_base.DatasetTestBase):
+class ParallelInterleaveTest(test_base.DatasetTestBase):
 
   def setUp(self):
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
index 13f924b656..723e709ae8 100644
--- a/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tensorflow.ops.parsing_ops."""
+"""Tests for `tf.data.experimental.parse_example_dataset()."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -73,7 +73,7 @@ def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
     i += 1
 
 
-class ParseExampleTest(test_base.DatasetTestBase):
+class ParseExampleDatasetTest(test_base.DatasetTestBase):
 
   def _test(self,
             input_tensor,
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
new file mode 100644
index 0000000000..f73725366c
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
@@ -0,0 +1,234 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.prefetch_to_device()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+
+
+class PrefetchToDeviceTest(test_base.DatasetTestBase):
+
+  def testPrefetchToDevice(self):
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+    self.assertEqual([], next_element.shape)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToSameDevice(self):
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device(
+            "/job:localhost/replica:0/task:0/device:CPU:0"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+    self.assertEqual([], next_element.shape)
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchDictToDevice(self):
+    host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element["a"].dtype)
+    self.assertEqual([], next_element["a"].shape)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      for i in range(10):
+        self.assertEqual({"a": i}, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchSparseTensorsToDevice(self):
+    def make_tensor(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=[[0, 0]], values=(i*[1]), dense_shape=[2, 2])
+    host_dataset = dataset_ops.Dataset.range(10).map(make_tensor)
+
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      for i in range(10):
+        actual = sess.run(next_element)
+        self.assertAllEqual([i], actual.values)
+        self.assertAllEqual([[0, 0]], actual.indices)
+        self.assertAllEqual([2, 2], actual.dense_shape)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToDeviceGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/gpu:0"))
+
+    iterator = device_dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToDeviceWithReInit(self):
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_initializable_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+    self.assertEqual([], next_element.shape)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      sess.run(iterator.initializer)
+      for i in range(5):
+        self.assertEqual(i, sess.run(next_element))
+      sess.run(iterator.initializer)
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToDeviceGpuWithReInit(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/gpu:0"))
+
+    iterator = device_dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer)
+      for i in range(5):
+        self.assertEqual(i, sess.run(next_element))
+      sess.run(iterator.initializer)
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
index b6ab80d132..fe0b3b5f3b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
@@ -63,11 +63,11 @@ class FixedLengthRecordDatasetTestBase(test_base.DatasetTestBase):
     return filenames
 
 
-class ReadBatchFeaturesTestBase(test_base.DatasetTestBase):
+class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing `make_batched_feature_dataset`."""
 
   def setUp(self):
-    super(ReadBatchFeaturesTestBase, self).setUp()
+    super(MakeBatchedFeaturesDatasetTestBase, self).setUp()
     self._num_files = 2
     self._num_records = 7
     self.test_filenames = self._createFiles()
diff --git a/tensorflow/python/data/experimental/kernel_tests/resample_test.py b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
similarity index 97%
rename from tensorflow/python/data/experimental/kernel_tests/resample_test.py
rename to tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
index 775648c943..4c879dbae6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/resample_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.rejection_resample()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -58,7 +58,7 @@ def _time_resampling(
   return end_time - start_time
 
 
-class ResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
+class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("InitialDistributionKnown", True),
diff --git a/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
similarity index 95%
rename from tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
index 3fc7157bc5..516e489d04 100644
--- a/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for the private `_RestructuredDataset` transformation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -26,7 +26,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class DatasetConstructorTest(test_base.DatasetTestBase):
+class RestructuredDatasetTest(test_base.DatasetTestBase):
 
   def testRestructureDataset(self):
     components = (array_ops.placeholder(dtypes.int32),
diff --git a/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/scan_test.py
index 78ec80de23..0730455431 100644
--- a/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.scan()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -34,7 +34,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ScanDatasetTest(test_base.DatasetTestBase):
+class ScanTest(test_base.DatasetTestBase):
 
   def _counting_dataset(self, start, scan_fn):
     return dataset_ops.Dataset.from_tensors(0).repeat().apply(
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index 58a335ae4f..e556b65b7c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -69,6 +69,26 @@ py_test(
     ],
 )
 
+py_test(
+    name = "checkpoint_input_pipeline_hook_test",
+    size = "small",
+    srcs = ["checkpoint_input_pipeline_hook_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/estimator:estimator_py",
+    ],
+)
+
 py_test(
     name = "concatenate_dataset_serialization_test",
     size = "small",
@@ -580,7 +600,7 @@ py_test(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/experimental/kernel_tests:sql_dataset_op_test_base",
+        "//tensorflow/python/data/experimental/kernel_tests:sql_dataset_test_base",
         "//tensorflow/python/data/experimental/ops:readers",
     ],
 )
diff --git a/tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/checkpoint_input_pipeline_hook_test.py
similarity index 100%
rename from tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/checkpoint_input_pipeline_hook_test.py
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
index a0dd6960b0..b3dfe21486 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
@@ -23,7 +23,7 @@ from tensorflow.python.platform import test
 
 
 class ParseExampleDatasetSerializationTest(
-    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase,
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase,
     dataset_serialization_test_base.DatasetSerializationTestBase):
 
   def ParseExampleDataset(self, num_repeat, batch_size):
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
index b179770ce3..006279bbe1 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_test_base
 from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.framework import dtypes
@@ -28,7 +28,7 @@ from tensorflow.python.platform import test
 
 
 class SqlDatasetSerializationTest(
-    sql_dataset_op_test_base.SqlDatasetTestBase,
+    sql_dataset_test_base.SqlDatasetTestBase,
     dataset_serialization_test_base.DatasetSerializationTestBase):
 
   def _build_dataset(self, num_repeats):
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
deleted file mode 100644
index 88d5c896c9..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Integration test for dataset serialization."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import test
-from tensorflow.python.training import saver as saver_lib
-
-
-class SerializationIntegrationTest(test.TestCase):
-
-  def _build_input_pipeline(self, name, num_outputs):
-    with ops.name_scope(name):
-      ds = dataset_ops.Dataset.range(num_outputs).shuffle(
-          10, reshuffle_each_iteration=False).prefetch(10)
-      iterator = ds.make_initializable_iterator()
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-      return iterator.initializer, iterator.get_next()
-
-  def _build_graph(self, num_pipelines, num_outputs):
-    init_ops = []
-    get_next_ops = []
-    for i in range(num_pipelines):
-      name = "input_pipeline_%d" % i
-      init_op, get_next_op = self._build_input_pipeline(name, num_outputs)
-      init_ops.append(init_op)
-      get_next_ops.append(get_next_op)
-    saver = saver_lib.Saver()
-    return init_ops, get_next_ops, saver
-
-  def _ckpt_path(self):
-    return os.path.join(self.get_temp_dir(), "iterator")
-
-  def testConcurrentSaves(self):
-    num_pipelines = 100
-    num_outputs = 100
-    break_point = 10
-    all_outputs = [[] for _ in range(num_pipelines)]
-    with ops.Graph().as_default() as g:
-      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
-                                                        num_outputs)
-      with self.session(graph=g) as sess:
-        sess.run(init_ops)
-        for _ in range(break_point):
-          output = sess.run(get_next_ops)
-          for i in range(num_pipelines):
-            all_outputs[i].append(output[i])
-        saver.save(sess, self._ckpt_path())
-
-    with ops.Graph().as_default() as g:
-      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
-                                                        num_outputs)
-      with self.session(graph=g) as sess:
-        saver.restore(sess, self._ckpt_path())
-        for _ in range(num_outputs - break_point):
-          output = sess.run(get_next_ops)
-          for i in range(num_pipelines):
-            all_outputs[i].append(output[i])
-
-    for output in all_outputs:
-      self.assertSequenceEqual(sorted(output), range(num_outputs))
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index 50895b5945..c208963a86 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.shuffle_and_repeat()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
index 301f75488a..a2c1169638 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for experimental sql input op."""
+"""Tests for `tf.data.experimental.SqlDataset`."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
 
 
-class SqlDatasetTest(sql_dataset_op_test_base.SqlDatasetTestBase):
+class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that SqlDataset can read from a database table.
   def testReadResultSet(self):
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
index a135c357f0..6aaaa90c65 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Base class for testing SqlDataset."""
-
+"""Base class for testing `tf.data.experimental.SqlDataset`."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 19f5a62d45..427654cd76 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -280,7 +280,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
 
 class FeatureStatsDatasetTest(
     stats_dataset_test_base.StatsDatasetTestBase,
-    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase):
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
 
   def testFeaturesStats(self):
     num_epochs = 5
diff --git a/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
index 25a2e63ba1..8fd0ad50c4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.TFRecordWriter`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
new file mode 100644
index 0000000000..0278a208cb
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
@@ -0,0 +1,300 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.unbatch()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  def testUnbatchWithUnknownRankInput(self):
+    placeholder = array_ops.placeholder(dtypes.int32)
+    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
+        batching.unbatch())
+    iterator = dataset.make_initializable_iterator()
+    next_elem = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
+      for i in range(4):
+        self.assertEqual(i, sess.run(next_elem))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_elem)
+
+  def testUnbatchScalarDataset(self):
+    data = tuple([math_ops.range(10) for _ in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    expected_types = (dtypes.int32,) * 3
+    data = data.batch(2)
+    self.assertEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual((i,) * 3, sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchDatasetWithStrings(self):
+    data = tuple([math_ops.range(10) for _ in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z))
+    expected_types = (dtypes.int32, dtypes.string, dtypes.int32)
+    data = data.batch(2)
+    self.assertEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchDatasetWithSparseTensor(self):
+    st = sparse_tensor.SparseTensorValue(
+        indices=[[i, i] for i in range(10)],
+        values=list(range(10)),
+        dense_shape=[10, 10])
+    data = dataset_ops.Dataset.from_tensors(st)
+    data = data.apply(batching.unbatch())
+    data = data.batch(5)
+    data = data.apply(batching.unbatch())
+    iterator = data.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        st_row = sess.run(next_element)
+        self.assertEqual([i], st_row.indices)
+        self.assertEqual([i], st_row.values)
+        self.assertEqual([10], st_row.dense_shape)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testUnbatchDatasetWithDenseAndSparseTensor(self):
+    st = sparse_tensor.SparseTensorValue(
+        indices=[[i, i] for i in range(10)],
+        values=list(range(10)),
+        dense_shape=[10, 10])
+    data = dataset_ops.Dataset.from_tensors((list(range(10)), st))
+    data = data.apply(batching.unbatch())
+    data = data.batch(5)
+    data = data.apply(batching.unbatch())
+    iterator = data.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        dense_elem, st_row = sess.run(next_element)
+        self.assertEqual(i, dense_elem)
+        self.assertEqual([i], st_row.indices)
+        self.assertEqual([i], st_row.values)
+        self.assertEqual([10], st_row.dense_shape)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testUnbatchSingleElementTupleDataset(self):
+    data = tuple([(math_ops.range(10),) for _ in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    expected_types = ((dtypes.int32,),) * 3
+    data = data.batch(2)
+    self.assertEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(((i,),) * 3, sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchMultiElementTupleDataset(self):
+    data = tuple([(math_ops.range(10 * i, 10 * i + 10),
+                   array_ops.fill([10], "hi")) for i in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    expected_types = ((dtypes.int32, dtypes.string),) * 3
+    data = data.batch(2)
+    self.assertAllEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertAllEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
+                         sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchEmpty(self):
+    data = dataset_ops.Dataset.from_tensors(
+        (constant_op.constant([]), constant_op.constant([], shape=[0, 4]),
+         constant_op.constant([], shape=[0, 4, 0])))
+    data = data.apply(batching.unbatch())
+    iterator = data.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testUnbatchStaticShapeMismatch(self):
+    data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
+                                             np.arange(9)))
+    with self.assertRaises(ValueError):
+      data.apply(batching.unbatch())
+
+  def testUnbatchDynamicShapeMismatch(self):
+    ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
+    ph2 = array_ops.placeholder(dtypes.int32, shape=None)
+    data = dataset_ops.Dataset.from_tensors((ph1, ph2))
+    data = data.apply(batching.unbatch())
+    iterator = data.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      # Mismatch in the 0th dimension.
+      sess.run(
+          iterator.initializer,
+          feed_dict={
+              ph1: np.arange(7).astype(np.int32),
+              ph2: np.arange(8).astype(np.int32)
+          })
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(next_element)
+
+      # No 0th dimension (i.e. scalar value) for one component.
+      sess.run(
+          iterator.initializer,
+          feed_dict={
+              ph1: np.arange(7).astype(np.int32),
+              ph2: 7
+          })
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(next_element)
+
+
+class UnbatchBenchmark(test.Benchmark):
+
+  def benchmarkNativeUnbatch(self):
+    batch_sizes = [1, 2, 5, 10, 20, 50]
+    elems_per_trial = 10000
+    with ops.Graph().as_default():
+      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
+      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
+      dataset = dataset.batch(batch_size_placeholder)
+      dataset = dataset.apply(batching.unbatch())
+      dataset = dataset.skip(elems_per_trial)
+      iterator = dataset.make_initializable_iterator()
+      next_element = iterator.get_next()
+
+      with session.Session() as sess:
+        for batch_size in batch_sizes:
+          deltas = []
+          for _ in range(5):
+            sess.run(
+                iterator.initializer,
+                feed_dict={batch_size_placeholder: batch_size})
+            start = time.time()
+            sess.run(next_element.op)
+            end = time.time()
+            deltas.append((end - start) / elems_per_trial)
+
+          median_wall_time = np.median(deltas)
+          print("Unbatch (native) batch size: %d Median wall time per element:"
+                " %f microseconds" % (batch_size, median_wall_time * 1e6))
+          self.report_benchmark(
+              iters=10000,
+              wall_time=median_wall_time,
+              name="benchmark_unbatch_dataset_native_batch_size_%d" %
+              batch_size)
+
+  # Include a benchmark of the previous `unbatch()` implementation that uses
+  # a composition of more primitive ops. Eventually we'd hope to generate code
+  # that is as good in both cases.
+  def benchmarkOldUnbatchImplementation(self):
+    batch_sizes = [1, 2, 5, 10, 20, 50]
+    elems_per_trial = 10000
+    with ops.Graph().as_default():
+      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
+      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
+      dataset = dataset.batch(batch_size_placeholder)
+      dataset = dataset.flat_map(dataset_ops.Dataset.from_tensor_slices)
+      dataset = dataset.skip(elems_per_trial)
+      iterator = dataset.make_initializable_iterator()
+      next_element = iterator.get_next()
+
+      with session.Session() as sess:
+        for batch_size in batch_sizes:
+          deltas = []
+          for _ in range(5):
+            sess.run(
+                iterator.initializer,
+                feed_dict={batch_size_placeholder: batch_size})
+            start = time.time()
+            sess.run(next_element.op)
+            end = time.time()
+            deltas.append((end - start) / elems_per_trial)
+
+          median_wall_time = np.median(deltas)
+          print("Unbatch (unfused) batch size: %d Median wall time per element:"
+                " %f microseconds" % (batch_size, median_wall_time * 1e6))
+          self.report_benchmark(
+              iters=10000,
+              wall_time=median_wall_time,
+              name="benchmark_unbatch_dataset_unfused_batch_size_%d" %
+              batch_size)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
similarity index 96%
rename from tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/unique_test.py
index b5a0b20f3f..847cff26b0 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.unique()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -26,7 +26,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class UniqueDatasetTest(test_base.DatasetTestBase):
+class UniqueTest(test_base.DatasetTestBase):
 
   def _testSimpleHelper(self, dtype, test_cases):
     """Test the `unique()` transformation on a list of test cases.
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 230ae3f3fd..0c372ebb10 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.Dataset.map()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -267,6 +267,35 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testCaptureIterator(self):
+
+    def _build_ds(iterator):
+
+      def _map_fn(x):
+        get_next = iterator.get_next()
+        return x * get_next
+
+      return dataset_ops.Dataset.range(10).map(_map_fn)
+
+    def _build_graph():
+      captured_iterator = dataset_ops.Dataset.range(
+          10).make_initializable_iterator()
+      ds = _build_ds(captured_iterator)
+      iterator = ds.make_initializable_iterator()
+      init_op = iterator.initializer
+      get_next = iterator.get_next()
+      return captured_iterator.initializer, init_op, get_next
+
+    with ops.Graph().as_default() as g:
+      captured_init_op, init_op, get_next = _build_graph()
+      with self.session(graph=g) as sess:
+        sess.run(captured_init_op)
+        sess.run(init_op)
+        for i in range(10):
+          self.assertEqual(i * i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
   def testCaptureHashTable(self):
     # NOTE(mrry): We must use the V2 variants of `HashTable`
     # etc. because these produce a `tf.resource`-typed output that is
-- 
GitLab


From 158b6b8becb6afd08f9d6c87f0c7f144ba5f0584 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Thu, 4 Oct 2018 12:59:38 -0700
Subject: [PATCH 1146/1357] Use weak symbols to inject flex delegates

PiperOrigin-RevId: 215788183
---
 tensorflow/contrib/lite/BUILD                 |  26 ++++++++--
 tensorflow/contrib/lite/delegates/flex/BUILD  |   4 +-
 .../contrib/lite/delegates/flex/delegate.cc   |   9 ++++
 tensorflow/contrib/lite/interpreter.h         |  15 +++---
 tensorflow/contrib/lite/interpreter_test.cc   |   6 ++-
 tensorflow/contrib/lite/model.cc              |  35 ++++++++++----
 tensorflow/contrib/lite/model_flex_test.cc    |  45 ++++++++++++++++++
 tensorflow/contrib/lite/model_test.cc         |  22 +++++++++
 .../contrib/lite/testdata/multi_add_flex.bin  | Bin 0 -> 1052 bytes
 tensorflow/contrib/lite/tools/benchmark/BUILD |  24 ++--------
 .../tools/benchmark/benchmark_tflite_model.cc |  12 -----
 .../tools/benchmark/benchmark_tflite_model.h  |   6 ---
 12 files changed, 141 insertions(+), 63 deletions(-)
 create mode 100644 tensorflow/contrib/lite/model_flex_test.cc
 create mode 100644 tensorflow/contrib/lite/testdata/multi_add_flex.bin

diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index f3ebe3b245..787a85644c 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -4,6 +4,7 @@ package(default_visibility = [
 
 licenses(["notice"])  # Apache 2.0
 
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops")
 
 exports_files(glob([
@@ -165,10 +166,6 @@ cc_library(
         "stderr_reporter.h",
     ],
     copts = tflite_copts(),
-    defines = select({
-        ":with_tflite_flex": ["TFLITE_FLEX"],
-        "//conditions:default": [],
-    }),
     linkopts = [
     ] + select({
         "//tensorflow:android": [
@@ -276,6 +273,7 @@ cc_test(
         "testdata/0_subgraphs.bin",
         "testdata/2_subgraphs.bin",
         "testdata/empty_model.bin",
+        "testdata/multi_add_flex.bin",
         "testdata/test_model.bin",
         "testdata/test_model_broken.bin",
     ],
@@ -283,6 +281,26 @@ cc_test(
         ":framework",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/testing:util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+# Test model framework with the flex library linked into the target.
+tf_cc_test(
+    name = "model_flex_test",
+    size = "small",
+    srcs = ["model_flex_test.cc"],
+    data = [
+        "testdata/multi_add_flex.bin",
+    ],
+    tags = ["no_windows"],  # TODO(b/116667551): No weak symbols with MSVC.
+    deps = [
+        ":framework",
+        "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
         "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
index 9dd38958e5..9b89ed4f84 100644
--- a/tensorflow/contrib/lite/delegates/flex/BUILD
+++ b/tensorflow/contrib/lite/delegates/flex/BUILD
@@ -2,7 +2,7 @@
 # This is a TF Lite delegate that is powered by TensorFlow's Eager.
 #
 package(default_visibility = [
-    "//visibility:public",
+    "//visibility:private",
 ])
 
 licenses(["notice"])  # Apache 2.0
@@ -50,6 +50,7 @@ cc_library(
     hdrs = [
         "delegate.h",
     ],
+    visibility = ["//visibility:public"],
     deps = [
         ":buffer_map",
         ":delegate_data",
@@ -66,6 +67,7 @@ cc_library(
             "//tensorflow/core:lib",
         ],
     }),
+    alwayslink = 1,
 )
 
 tf_cc_test(
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc
index ba065a8ff5..c72b0cf513 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc
@@ -83,6 +83,15 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
 }  // namespace delegate
 }  // namespace flex
 
+// Corresponding weak declaration found in lite/model.cc.
+std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>
+AcquireFlexDelegate() {
+  return std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
+      tflite::FlexDelegate::Create().release(), [](TfLiteDelegate* delegate) {
+        delete reinterpret_cast<tflite::FlexDelegate*>(delegate);
+      });
+}
+
 std::unique_ptr<FlexDelegate> FlexDelegate::Create() {
   std::unique_ptr<flex::DelegateData> delegate_data;
   if (!flex::DelegateData::Create(&delegate_data).ok()) {
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index 7ef736d01b..651a97e9dc 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -349,6 +349,10 @@ class Interpreter {
     return context_.allow_fp32_relax_to_fp16;
   }
 
+  // Owning handle to a TfLiteDelegate instance.
+  using TfLiteDelegatePtr =
+      std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
+
   // Allow a delegate to look at the graph and modify the graph to handle
   // parts of the graph themselves. After this is called, the graph may
   // contain new nodes that replace 1 more nodes.
@@ -574,19 +578,11 @@ class Interpreter {
                                  TfLiteExternalContextType type,
                                  TfLiteExternalContext* ctx);
 
-  using TfLiteDelegatePtr =
-      std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
-
   // Variant of the public ModifyGraphWithDelegate method that additionally
   // Assumes ownership of the provided delegate.
   // WARNING: This is an experimental API and subject to change.
-  template <typename Delegate>
-  TfLiteStatus ModifyGraphWithDelegate(std::unique_ptr<Delegate> typed_delegate,
+  TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegatePtr delegate,
                                        bool allow_dynamic_tensors = false) {
-    TfLiteDelegatePtr delegate(typed_delegate.release(),
-                               [](TfLiteDelegate* delegate) {
-                                 delete static_cast<Delegate*>(delegate);
-                               });
     // Note that we retain ownership of the delegate even if graph modification
     // fails, as delegate use will be in an indeterminate state at that point.
     owned_delegates_.push_back(std::move(delegate));
@@ -676,6 +672,7 @@ class Interpreter {
   // List of delegates that have been installed and are owned by this
   // interpreter instance. Useful if client delegate ownership is burdensome.
   // WARNING: This is an experimental API and subject to change.
+  // TODO(b/116667551): Use TfLiteExternalContext for storing state.
   std::vector<TfLiteDelegatePtr> owned_delegates_;
 
   std::unique_ptr<MemoryPlanner> memory_planner_;
diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc
index cdede430e2..6c71d5a8d7 100644
--- a/tensorflow/contrib/lite/interpreter_test.cc
+++ b/tensorflow/contrib/lite/interpreter_test.cc
@@ -30,7 +30,11 @@ class InterpreterTest : public ::testing::Test {
   template <typename Delegate>
   static TfLiteStatus ModifyGraphWithDelegate(
       Interpreter* interpreter, std::unique_ptr<Delegate> delegate) {
-    return interpreter->ModifyGraphWithDelegate(std::move(delegate));
+    Interpreter::TfLiteDelegatePtr tflite_delegate(
+        delegate.release(), [](TfLiteDelegate* delegate) {
+          delete reinterpret_cast<Delegate*>(delegate);
+        });
+    return interpreter->ModifyGraphWithDelegate(std::move(tflite_delegate));
   }
 
  protected:
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index d50c345194..d7b109ac1a 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -27,9 +27,6 @@ limitations under the License.
 #ifndef TFLITE_MCU
 #include "tensorflow/contrib/lite/nnapi_delegate.h"
 #endif
-#if defined(TFLITE_FLEX)
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif
 #include "tensorflow/contrib/lite/version.h"
 
 namespace tflite {
@@ -43,6 +40,25 @@ ErrorReporter* ValidateErrorReporter(ErrorReporter* e) {
 
 const char* kEmptyTensorName = "";
 
+// Normally we'd use ABSL_HAVE_ATTRIBUTE_WEAK and ABSL_ATTRIBUTE_WEAK, but
+// we avoid the absl dependency for binary size reasons.
+#ifdef __has_attribute
+#define TFLITE_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define TFLITE_HAS_ATTRIBUTE(x) 0
+#endif
+
+#if TFLITE_HAS_ATTRIBUTE(weak) || (defined(__GNUC__) && !defined(__clang__))
+// Using weak symbols for the flex delegate allows automatic injection of the
+// delegate simply by adding it as a dependency. See also the strong override in
+// lite/delegates/flex/delegate.cc.
+__attribute__((weak)) Interpreter::TfLiteDelegatePtr AcquireFlexDelegate() {
+  return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
+}
+#else
+Interpreter::TfLiteDelegatePtr (*AcquireFlexDelegate)() = nullptr;
+#endif
+
 #ifndef TFLITE_MCU
 // Loads a model from `filename`. If `mmap_file` is true then use mmap,
 // otherwise make a copy of the model in a buffer.
@@ -450,13 +466,14 @@ TfLiteStatus InterpreterBuilder::operator()(
   }
   (**interpreter).SetVariables(std::move(variables));
 
-#if defined(TFLITE_FLEX)
-  if (auto delegate = FlexDelegate::Create()) {
-    (**interpreter)
-        .ModifyGraphWithDelegate(std::move(delegate),
-                                 /*allow_dynamic_tensors=*/true);
+  // TODO(b/116667551): Only create the flex delegate if the model has flex ops.
+  if (AcquireFlexDelegate != nullptr) {
+    if (auto flex_delegate = AcquireFlexDelegate()) {
+      (**interpreter)
+          .ModifyGraphWithDelegate(std::move(flex_delegate),
+                                   /*allow_dynamic_tensors=*/true);
+    }
   }
-#endif
 
   return kTfLiteOk;
 }
diff --git a/tensorflow/contrib/lite/model_flex_test.cc b/tensorflow/contrib/lite/model_flex_test.cc
new file mode 100644
index 0000000000..52e76bee49
--- /dev/null
+++ b/tensorflow/contrib/lite/model_flex_test.cc
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/model.h"
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/testing/util.h"
+
+namespace tflite {
+
+// Ensures that a model with TensorFlow ops can be imported as long as the
+// appropriate delegate is linked into the client.
+TEST(FlexModel, WithFlexDelegate) {
+  auto model = FlatBufferModel::BuildFromFile(
+      "tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(InterpreterBuilder(*model,
+                               ops::builtin::BuiltinOpResolver{})(&interpreter),
+            kTfLiteOk);
+  ASSERT_TRUE(interpreter);
+
+  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk);
+}
+
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc
index ec7d46af7c..b969bea5dc 100644
--- a/tensorflow/contrib/lite/model_test.cc
+++ b/tensorflow/contrib/lite/model_test.cc
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include <gtest/gtest.h>
 #include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/testing/util.h"
 
 // Comparison for TfLiteRegistration. Since TfLiteRegistration is a C object,
@@ -193,6 +194,27 @@ TEST(BasicFlatBufferModel, TestModelInInterpreter) {
   }
 }
 
+// Test that loading a model with TensorFlow ops fails when the flex delegate is
+// not linked into the target.
+TEST(FlexModel, FailureWithoutFlexDelegate) {
+  auto model = FlatBufferModel::BuildFromFile(
+      "tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+  ASSERT_TRUE(model);
+
+  // Note that creation will succeed when using the BuiltinOpResolver, but
+  // unless the appropriate delegate is linked into the target or the client
+  // explicitly installs the delegate, execution will fail.
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(InterpreterBuilder(*model,
+                               ops::builtin::BuiltinOpResolver{})(&interpreter),
+            kTfLiteOk);
+  ASSERT_TRUE(interpreter);
+
+  // As the flex ops weren't resolved implicitly by the flex delegate, runtime
+  // allocation and execution will fail.
+  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteError);
+}
+
 // This tests on a flatbuffer that defines a shape of 2 to be a memory mapped
 // buffer. But the buffer is provided to be only 1 element.
 TEST(BasicFlatBufferModel, TestBrokenMmap) {
diff --git a/tensorflow/contrib/lite/testdata/multi_add_flex.bin b/tensorflow/contrib/lite/testdata/multi_add_flex.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9aac2155fedd11b81ed32e587655dfe53e5749a9
GIT binary patch
literal 1052
zcmb1OU|<Mw^D$;%;A4<rU}4~3;9(G85MkhBU|?WoU|?9n%)r3Iz`!8Dz`&ruz`(%B
zz`&5fz`(!{(&z8&ui%`YSC(2-lA5B&z`?-4V8g(`V8Xz_pu@nxpu)hwAj81GAi@AK
zg9T*HzyJS5{{8<i@$dhCnScNPEByQaU*+Hb{~G`P|JV8V|343d2;3DQdkq*E7(i|W
znZ1XRfq{vEfq|8Qfq{*IfdOP51Jq3*agcl2p>~EaFfjCh>}6zNh+$-4*u%iUaEF0`
zVIRnD1_lO@I*^$le}Vjz2r>v{=KufyL1G}YLE;Py_0ABQf#DZaFUW3Wy-85L4tVsM
z{r~?T<PVVBk@Y4+_156gTLje$auc%N3aH+c|Ns9_gQ@|A0kWD(sG5%d|NrlTssV)+
zNDat6Ait$R)qvt2lmb9<xWWLGPCzll08J^NbOJL6qz@F=ATu+eX0kAVAS5k)U|?V<
z0EICF1B1c;|NlX017r^<U4i@t(gM;83SW>LL4E;=n}X7!V@e8x8W&@w5VK=SiWFmt
z6l10muYO8uS!Qyom2*I-m4OyJ7h{MJlK~?Gldh(S2#W^DtQmOh0mTK#9+180_EbRa
zNhH!95pXI8g&xQ>P|5}6B1rnj=0}iyAiF_s1kuQT%!JyVB*mCaj2}T}fXut_|Nnnb
eHU+Uk>Dw(QwE`4uFK~#1;+vg;1DtQM=>q_$K#X(%

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index 502e181139..71bf61657e 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -40,7 +40,7 @@ cc_binary(
     srcs = [
         "benchmark_main.cc",
     ],
-    copts = common_copts + ["-DTFLITE_FLEX"],
+    copts = common_copts,
     linkopts = tflite_linkopts() + select({
         "//tensorflow:android": [
             "-pie",  # Android 5.0 and later supports only PIE
@@ -49,8 +49,9 @@ cc_binary(
         "//conditions:default": [],
     }),
     deps = [
-        ":benchmark_tflite_model_plus_flex_lib",
+        ":benchmark_tflite_model_lib",
         ":logging",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
     ],
 )
 
@@ -110,25 +111,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "benchmark_tflite_model_plus_flex_lib",
-    srcs = [
-        "benchmark_tflite_model.cc",
-        "logging.h",
-    ],
-    hdrs = ["benchmark_tflite_model.h"],
-    copts = common_copts + ["-DTFLITE_FLEX"],
-    deps = [
-        ":benchmark_model_lib",
-        ":logging",
-        "//tensorflow/contrib/lite:framework",
-        "//tensorflow/contrib/lite:string_util",
-        "//tensorflow/contrib/lite/delegates/flex:delegate",
-        "//tensorflow/contrib/lite/kernels:builtin_ops",
-        "//tensorflow/contrib/lite/profiling:profile_summarizer",
-    ],
-)
-
 cc_library(
     name = "benchmark_params",
     srcs = [
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
index 463d5993f4..2a3df7f289 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -23,9 +23,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#ifdef TFLITE_FLEX
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif  // TFLITE_FLEX
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/op_resolver.h"
@@ -305,15 +302,6 @@ void BenchmarkTfLiteModel::Init() {
 
   interpreter->UseNNAPI(use_nnapi);
 
-#ifdef TFLITE_FLEX
-  TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
-  delegate_ = FlexDelegate::Create();
-  if (delegate_) {
-    interpreter->ModifyGraphWithDelegate(delegate_.get(),
-                                         /*allow_dynamic_tensors=*/true);
-  }
-#endif  // TFLITE_FLEX
-
   auto interpreter_inputs = interpreter->inputs();
 
   if (!inputs.empty()) {
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
index b091e18a29..25a302b2aa 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
@@ -20,9 +20,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#ifdef TFLITE_FLEX
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif  // TFLITE_FLEX
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
 #include "tensorflow/contrib/lite/tools/benchmark/benchmark_model.h"
@@ -73,9 +70,6 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
   void PrepareInputsAndOutputs() override;
 
  private:
-#ifdef TFLITE_FLEX
-  std::unique_ptr<FlexDelegate> delegate_;
-#endif  // TFLITE_FLEX
   std::unique_ptr<tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> interpreter;
   std::vector<InputLayerInfo> inputs;
-- 
GitLab


From 074ff471fefbcf3bfd49914ad80bd9f9751df363 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Thu, 4 Oct 2018 13:00:49 -0700
Subject: [PATCH 1147/1357] Temporarily disable testCondInDefun test in
 control_flow_ops_py_test

PiperOrigin-RevId: 215788359
---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index a1be77601c..c7e89dd5f9 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -3422,7 +3422,8 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(r.numpy(), 10)
       self.assertFalse(isinstance(r, list))
 
-  def testCondInDefun(self):
+  # TODO(b/117279927): Re-enable once msan failure is fixed.
+  def DISABLED_testCondInDefun(self):
     if "GPU" in [d.device_type for d in device_lib.list_local_devices()]:
       return unittest.skip("b/113346829 (gpu failure)")
 
-- 
GitLab


From 7fcb05ff475a0c6c1076eacf9d11e17323d98bc2 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 4 Oct 2018 13:01:33 -0700
Subject: [PATCH 1148/1357] [tf.data] Add a notion of `captured args` to
 MapDefun

PiperOrigin-RevId: 215788485
---
 .../api_def/base_api/api_def_MapDefun.pbtxt   | 23 +++++--
 .../optimizers/data/map_vectorization.cc      |  1 +
 .../data/vectorization_utils_test.cc          |  3 +
 tensorflow/core/kernels/data/map_defun_op.cc  | 68 +++++++++----------
 tensorflow/core/ops/dataset_ops.cc            | 11 ++-
 .../kernel_tests/map_defun_op_test.py         | 12 ++++
 .../python/data/experimental/ops/map_defun.py |  8 ++-
 7 files changed, 77 insertions(+), 49 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt b/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt
index 4433693759..d158f4b502 100644
--- a/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt
@@ -4,22 +4,33 @@ op {
   in_arg {
     name: "arguments"
     description: <<END
-    A list of tensors whose types are Targuments, corresponding to the inputs the
-    function should be mapped over.
+    A list of tensors whose types are `Targuments`, corresponding to the inputs
+    the function should be mapped over.
+END
+  }
+  in_arg {
+    name: "captured_inputs"
+    description: <<END
+    A list of tensors whose types are `Tcaptured`, corresponding to the captured
+    inputs of the defun.
 END
   }
   out_arg {
     name: "output"
     description: <<END
-    A list of output tensors whose types are output_types and whose dimensions 0
-    are the same as the dimensions 0 of the tensors in arguments, and whose
-    remaining dimensions correspond to those in output_shapes.
+    A list of output tensors whose types are `output_types` and whose dimensions
+    0 are the same as the dimensions 0 of the tensors in `arguments`, and whose
+    remaining dimensions correspond to those in `output_shapes`.
 END
   }
   attr {
     name: "Targuments"
     description: "A list of types."
   }
+  attr {
+    name: "Tcaptured"
+    description: "A list of types."
+  }
   attr {
     name: "output_types"
     description: "A list of types."
@@ -29,6 +40,6 @@ END
     description: "A list of shapes."
   }
   summary: <<END
-  Maps a function on the list of tensors unpacked from inputs on dimension 0.
+  Maps a function on the list of tensors unpacked from arguments on dimension 0.
 END
 }
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index ba521e79bc..a9254ed58b 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -67,6 +67,7 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
     map_defun_node->add_input(input.name());
   }
   (*map_defun_node->mutable_attr())["Targuments"] = t_args;
+  AddNodeAttr("Tcaptured", DataTypeVector(), map_defun_node);
 
   // Set return values to match output names
   string output_prefix = strings::StrCat(map_defun_node->name(), ":output:");
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index a958d706c1..a6020e36bb 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -55,6 +55,7 @@ NodeDef* AddMapDefunNode(const string& name, const std::vector<string>& inputs,
   func.set_name(function_name);
   NodeDef* node = function_utils::AddNode(name, "MapDefun", inputs, {}, fn);
   graph_transforms::SetNodeAttr("Targuments", t_arguments, node);
+  graph_transforms::SetNodeAttr("Tcaptured", DataTypeVector(), node);
   graph_transforms::SetNodeAttr("output_types", output_types, node);
   graph_transforms::SetNodeAttr("output_shapes", output_shapes, node);
   graph_transforms::SetNodeAttr("f", func, node);
@@ -142,6 +143,8 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
+  Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized);
+  LOG(ERROR) << s;
   EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
diff --git a/tensorflow/core/kernels/data/map_defun_op.cc b/tensorflow/core/kernels/data/map_defun_op.cc
index 6657f2b2b3..705b0393de 100644
--- a/tensorflow/core/kernels/data/map_defun_op.cc
+++ b/tensorflow/core/kernels/data/map_defun_op.cc
@@ -62,24 +62,6 @@ class MapDefunOp : public AsyncOpKernel {
 
   ~MapDefunOp() override {}
 
-  Status GetInputBatchSize(OpKernelContext* ctx, int64* batch_size) {
-    // Validates inputs and gets the size of their leading dimension.
-    *batch_size = ctx->input(0).dims() > 0 ? ctx->input(0).dim_size(0) : -1;
-    for (size_t i = 0; i < ctx->num_inputs(); ++i) {
-      if (ctx->input(i).dims() == 0) {
-        return errors::InvalidArgument(
-            "All inputs must have rank at least 1. Input ", i,
-            " has a rank of 0.");
-      } else if (ctx->input(i).dim_size(0) != *batch_size) {
-        return errors::InvalidArgument(
-            "All inputs must have the same dimension 0. Input ", i,
-            " has leading dimension ", ctx->input(i).dim_size(0),
-            ", while all previous inputs have leading dimension ", batch_size);
-      }
-    }
-    return Status::OK();
-  }
-
   void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
     ComputeOptions* compute_opts = nullptr;
 
@@ -150,8 +132,9 @@ class MapDefunOp : public AsyncOpKernel {
     // all calls to the function are complete. This struct also encapsulates
     // all the components that need to be passed to each MapFunctionCallFrame.
 
-    const std::vector<Tensor> args;
+    OpInputList args;
     const std::vector<TensorShape> arg_shapes;
+    OpInputList captured_inputs;
     const int64 batch_size;
 
     // Output of a compute call
@@ -161,26 +144,31 @@ class MapDefunOp : public AsyncOpKernel {
 
     // Create a copy of output_shapes because every `Compute` may expect a
     // different output shape.
-    ComputeOptions(std::vector<Tensor> args,
+    ComputeOptions(OpInputList args, OpInputList captured_inputs,
                    std::vector<TensorShape> arg_shapes, int64 batch_size,
                    const std::vector<PartialTensorShape>& output_shapes_attr)
-        : args(std::move(args)),
+        : args(args),
           arg_shapes(std::move(arg_shapes)),
+          captured_inputs(captured_inputs),
           batch_size(batch_size),
           output_shapes(output_shapes_attr) {}
   };
 
   // Get inputs to Compute and check that they are valid.
   Status SetupArgs(OpKernelContext* ctx, ComputeOptions** compute_opts) {
-    int64 batch_size =
-        ctx->input(0).dims() > 0 ? ctx->input(0).dim_size(0) : -1;
+    OpInputList arguments;
+    TF_RETURN_IF_ERROR(ctx->input_list("arguments", &arguments));
+    OpInputList captured_inputs;
+    TF_RETURN_IF_ERROR(ctx->input_list("captured_inputs", &captured_inputs));
+
+    int64 batch_size = arguments[0].dims() > 0 ? arguments[0].dim_size(0) : -1;
 
-    for (size_t i = 0; i < ctx->num_inputs(); ++i) {
-      if (ctx->input(i).dims() == 0) {
+    for (size_t i = 0; i < arguments.size(); ++i) {
+      if (arguments[i].dims() == 0) {
         return errors::InvalidArgument(
             "All inputs must have rank at least 1. Input ", i,
             " has a rank of 0.");
-      } else if (ctx->input(i).dim_size(0) != batch_size) {
+      } else if (arguments[i].dim_size(0) != batch_size) {
         return errors::InvalidArgument(
             "All inputs must have the same dimension 0. Input ", i,
             " has leading dimension ", ctx->input(i).dim_size(0),
@@ -188,19 +176,17 @@ class MapDefunOp : public AsyncOpKernel {
       }
     }
 
-    std::vector<Tensor> args;
     std::vector<TensorShape> arg_shapes;
-    args.reserve(ctx->num_inputs());
-    arg_shapes.reserve(ctx->num_inputs());
+    arg_shapes.reserve(arguments.size());
 
-    for (size_t i = 0; i < ctx->num_inputs(); ++i) {
-      args.push_back(ctx->input(i));
-      arg_shapes.push_back(ctx->input(i).shape());
+    for (size_t i = 0; i < arguments.size(); ++i) {
+      arg_shapes.push_back(arguments[i].shape());
       arg_shapes.at(i).RemoveDim(0);
     }
 
-    *compute_opts = new ComputeOptions(std::move(args), std::move(arg_shapes),
-                                       batch_size, output_shapes_);
+    *compute_opts =
+        new ComputeOptions(arguments, captured_inputs, std::move(arg_shapes),
+                           batch_size, output_shapes_);
     return Status::OK();
   }
 
@@ -235,12 +221,21 @@ class MapDefunOp : public AsyncOpKernel {
     }
 
     Status GetArg(int index, Tensor* val) const override {
-      if (index < 0 || index >= compute_opts_->args.size()) {
+      if (index < 0 || index >= compute_opts_->args.size() +
+                                    compute_opts_->captured_inputs.size()) {
         return errors::InvalidArgument(
             "Mismatch in number of function inputs.");
       }
+
+      if (index >= compute_opts_->args.size()) {
+        // The function is calling for a captured input
+        *val =
+            compute_opts_->captured_inputs[index - compute_opts_->args.size()];
+        return Status::OK();
+      }
+
       bool result =
-          val->CopyFrom(compute_opts_->args.at(index).Slice(iter_, iter_ + 1),
+          val->CopyFrom(compute_opts_->args[index].Slice(iter_, iter_ + 1),
                         compute_opts_->arg_shapes.at(index));
       if (!result) {
         return errors::Internal("GetArg failed.");
@@ -248,7 +243,6 @@ class MapDefunOp : public AsyncOpKernel {
         // Ensure alignment
         *val = tensor::DeepCopy(*val);
       }
-
       return Status::OK();
     }
 
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 889a6a4640..ec22eee874 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -903,14 +903,18 @@ REGISTER_OP("ModelDataset")
 
 REGISTER_OP("MapDefun")
     .Input("arguments: Targuments")
+    .Input("captured_inputs: Tcaptured")
     .Output("output: output_types")
     .Attr("Targuments: list(type) >= 1")
+    .Attr("Tcaptured: list(type) >= 0 = []")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("f: func")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       std::vector<PartialTensorShape> output_shapes;
       TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      DataTypeVector t_args;
+      TF_RETURN_IF_ERROR(c->GetAttr("Targuments", &t_args));
       if (output_shapes.size() != c->num_outputs()) {
         return errors::InvalidArgument(
             "`output_shapes` must be the same length as `output_types` (",
@@ -918,10 +922,11 @@ REGISTER_OP("MapDefun")
       }
 
       int64 dim_zero = -1;
-      for (size_t i = 0; i < static_cast<size_t>(c->num_inputs()); ++i) {
+      for (size_t i = 0; i < t_args.size(); ++i) {
         if (c->Rank(c->input(i)) == 0) {
           return errors::InvalidArgument(
-              "Inputs must have rank at least 1. Input ", i, " has rank of 0");
+              "Arguments must have rank at least 1. Input ", i,
+              " has rank of 0.");
         }
         auto dim_handle = c->Dim(c->input(i), 0);
         if (c->ValueKnown(dim_handle)) {
@@ -929,7 +934,7 @@ REGISTER_OP("MapDefun")
             dim_zero = c->Value(dim_handle);
           } else if (c->Value(dim_handle) != dim_zero) {
             return errors::InvalidArgument(
-                "Inputs must have the same dimension 0.");
+                "Arguments must have the same dimension 0.");
           }
         }
       }
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index 612ee332c4..ae9dedb0ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -235,6 +235,18 @@ class MapDefunTest(test_base.DatasetTestBase):
       sess.close()
       thread.join()
 
+  def testMapDefunWithCapturedInputs(self):
+    c = constant_op.constant(2)
+
+    @function.Defun(dtypes.int32)
+    def fn(x):
+      return x + c
+
+    x = constant_op.constant([1, 2, 3, 4])
+    map_defun_op = map_defun.map_defun(fn, [x], [dtypes.int32], [()])[0]
+    expected = x + c
+    self.assertAllEqual(self.evaluate(expected), self.evaluate(map_defun_op))
+
 
 class MapDefunBenchmark(test.Benchmark):
 
diff --git a/tensorflow/python/data/experimental/ops/map_defun.py b/tensorflow/python/data/experimental/ops/map_defun.py
index 3d0d0993c9..3ac1158d8b 100644
--- a/tensorflow/python/data/experimental/ops/map_defun.py
+++ b/tensorflow/python/data/experimental/ops/map_defun.py
@@ -47,10 +47,12 @@ def map_defun(fn, elems, output_dtypes, output_shapes):
   if not isinstance(elems, list):
     raise ValueError("`elems` must be a list of tensors.")
   if not isinstance(output_dtypes, list):
-    raise ValueError("`output_dtypes` must be a list of tensors.")
+    raise ValueError("`output_dtypes` must be a list of `tf.DType` objects.")
   if not isinstance(output_shapes, list):
-    raise ValueError("`output_shapes` must be a list of tensors.")
+    raise ValueError("`output_shapes` must be a list of `tf.TensorShape` "
+                     "objects.")
 
   elems = [ops.convert_to_tensor(e) for e in elems]
   output_shapes = [tensor_shape.TensorShape(s) for s in output_shapes]
-  return gen_dataset_ops.map_defun(elems, output_dtypes, output_shapes, fn)
+  return gen_dataset_ops.map_defun(elems, fn.captured_inputs, output_dtypes,
+                                   output_shapes, fn)
-- 
GitLab


From b949f9ee60522ca43f7f8a89b15ea6eeed2ac570 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Thu, 4 Oct 2018 13:14:07 -0700
Subject: [PATCH 1149/1357] Enable masking through a Sequential model.

PiperOrigin-RevId: 215790636
---
 tensorflow/python/keras/engine/input_layer.py |  1 +
 .../python/keras/engine/topology_test.py      | 31 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py
index 8a4018a0df..6a69d0ed90 100644
--- a/tensorflow/python/keras/engine/input_layer.py
+++ b/tensorflow/python/keras/engine/input_layer.py
@@ -82,6 +82,7 @@ class InputLayer(base_layer.Layer):
     self.built = True
     self.sparse = sparse
     self.batch_size = batch_size
+    self.supports_masking = True
 
     if isinstance(input_shape, tensor_shape.TensorShape):
       input_shape = tuple(input_shape.as_list())
diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py
index a0da96334b..b4488033cd 100644
--- a/tensorflow/python/keras/engine/topology_test.py
+++ b/tensorflow/python/keras/engine/topology_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import test
+from tensorflow.python.training import rmsprop
 
 try:
   import yaml  # pylint:disable=g-import-not-at-top
@@ -1182,6 +1183,36 @@ class DefaultShapeInferenceBehaviorTest(test.TestCase):
     output = model(sample_input)
     self.assertEqual(output.shape, (1, 3))
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_sequential_as_downstream_of_masking_layer(self):
+    inputs = keras.layers.Input(shape=(3, 4))
+    x = keras.layers.Masking(mask_value=0., input_shape=(3, 4))(inputs)
+
+    s = keras.Sequential()
+    s.add(keras.layers.Dense(5, input_shape=(4,)))
+
+    x = keras.layers.wrappers.TimeDistributed(s)(x)
+    model = keras.Model(inputs=inputs, outputs=x)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(1e-3), loss='mse')
+
+    model_input = np.random.randint(
+        low=1, high=5, size=(10, 3, 4)).astype('float32')
+    for i in range(4):
+      model_input[i, i:, :] = 0.
+    model.fit(model_input,
+              np.random.random((10, 3, 5)), epochs=1, batch_size=6)
+
+    if not context.executing_eagerly():
+      # Note: this doesn't work in eager due to DeferredTensor/ops compatibility
+      # issue.
+      mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)]
+      mask_outputs += [model.layers[2].compute_mask(
+          model.layers[2].input, mask_outputs[-1])]
+      func = keras.backend.function([model.input], mask_outputs)
+      mask_outputs_val = func([model_input])
+      self.assertAllClose(mask_outputs_val[0], np.any(model_input, axis=-1))
+      self.assertAllClose(mask_outputs_val[1], np.any(model_input, axis=-1))
+
 
 class GraphUtilsTest(test.TestCase):
 
-- 
GitLab


From 23a698e670a10eff362c575eb1297c2b4f0bbe11 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 13:18:18 -0700
Subject: [PATCH 1150/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 215791283
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 88 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 26 ++++++
 2 files changed, 114 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 33f18ae13f..780c6f6448 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -30566,6 +30566,52 @@ op {
     type: "func"
   }
 }
+op {
+  name: "MapDefun"
+  input_arg {
+    name: "arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "captured_inputs"
+    type_list_attr: "Tcaptured"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Tcaptured"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+}
 op {
   name: "MapIncompleteSize"
   output_arg {
@@ -71843,6 +71889,48 @@ op {
     }
   }
 }
+op {
+  name: "Substr"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "pos"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "len"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "unit"
+    type: "string"
+    default_value {
+      s: "BYTE"
+    }
+    allowed_values {
+      list {
+        s: "BYTE"
+        s: "UTF8_CHAR"
+      }
+    }
+  }
+}
 op {
   name: "Sum"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 0e58a9475d..0d8997c1bd 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -15262,6 +15262,10 @@ op {
     name: "arguments"
     type_list_attr: "Targuments"
   }
+  input_arg {
+    name: "captured_inputs"
+    type_list_attr: "Tcaptured"
+  }
   output_arg {
     name: "output"
     type_list_attr: "output_types"
@@ -15272,6 +15276,15 @@ op {
     has_minimum: true
     minimum: 1
   }
+  attr {
+    name: "Tcaptured"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
   attr {
     name: "output_types"
     type: "list(type)"
@@ -33748,6 +33761,19 @@ op {
       }
     }
   }
+  attr {
+    name: "unit"
+    type: "string"
+    default_value {
+      s: "BYTE"
+    }
+    allowed_values {
+      list {
+        s: "BYTE"
+        s: "UTF8_CHAR"
+      }
+    }
+  }
 }
 op {
   name: "Sum"
-- 
GitLab


From 589e876139f4c7fbdf96edaa16fdcfe12c7a4b03 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 4 Oct 2018 13:20:58 -0700
Subject: [PATCH 1151/1357] Error out when PartitionedCall is created with the
 wrong number of arguments.

(used to be a segfault)

PiperOrigin-RevId: 215791737
---
 tensorflow/core/kernels/partitioned_function_ops.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index fdb4c84c46..3979e4b53a 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -97,6 +97,13 @@ class PartitionedCallOp : public AsyncOpKernel {
         OP_REQUIRES_ASYNC(ctx, fbody != nullptr,
                           errors::Internal("Could not find handle ", handle),
                           done);
+        OP_REQUIRES_ASYNC(
+            ctx, args.size() == fbody->arg_nodes.size(),
+            errors::InvalidArgument(
+                "Wrong number of arguments to the op; function expects ",
+                fbody->arg_nodes.size(), " but PartitionedCall received ",
+                args.size()),
+            done);
         // We need to pass global op_registry as default_registry when creating
         // graph. So that graph optimization passes can lookup all possible ops
         // by name.
-- 
GitLab


From 9e8c7afa5867bd19b6684458566b064148b2665b Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Thu, 4 Oct 2018 13:34:31 -0700
Subject: [PATCH 1152/1357] Add TF_BUILD_TEST_TIMEOUT to
 ci_parameterized_build.sh

PiperOrigin-RevId: 215793932
---
 .../tools/ci_build/ci_parameterized_build.sh  | 27 ++++++++++++-------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 99bdedf7b4..fdff867ff0 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -83,6 +83,9 @@
 #                     Use the specified configurations when building.
 #                     When set, overrides TF_BUILD_IS_OPT and TF_BUILD_MAVX
 #                     options, as this will replace the two.
+#   TF_BUILD_TEST_TIMEOUT:
+#                     Sets the value of bazel --test_timeout, defaults to -1
+#                     which uses the bazel defaults.
 #   TF_SKIP_CONTRIB_TESTS:
 #                     If set to any non-empty or non-0 value, will skip running
 #                     contrib tests.
@@ -125,6 +128,8 @@ NO_DOCKER_OPT_FLAG="--genrule_strategy=standalone"
 
 DO_DOCKER=1
 
+# Bazel uses defaults for all test sizes when given `-1`.
+TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1}
 
 # Helpful flags:
 # --test_summary=detailed: Tell us more about which targets are being built
@@ -132,7 +137,16 @@ DO_DOCKER=1
 # --build_tests_only: Don't build targets depended on by tests if the test is
 #                     disabled. Also saves some compilation time. Otherwise,
 #                     tries to build everything.
-BAZEL_TEST_FLAGS="--test_summary=detailed --build_tests_only --keep_going"
+# --test_timeout: Test timeouts in the order short,moderate,long,eternal.
+# --test_env: Environment variables to set when running bazel tests. These are
+#             especially important when using --run_under with
+#             parallel_gpu_execute.
+BAZEL_TEST_FLAGS=""\
+"--test_summary=detailed --build_tests_only --keep_going "\
+"--test_timeout=${TF_BUILD_TEST_TIMEOUT} "\
+"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
+"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
+"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}"
 BAZEL_BUILD_FLAGS="--keep_going"
 
 BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS}"
@@ -148,13 +162,6 @@ ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
-# Environment variables to set when running bazel tests.  These are especially
-# important when using --run_under with parallel_gpu_execute.
-BAZEL_TEST_ENV=""\
-"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
-"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
-"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB} "
-
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
 
 EXTRA_PARAMS=""
@@ -415,11 +422,11 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
   if [[ ${CTYPE} == cpu* ]] || \
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
-    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
       "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
-    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
 "--local_test_jobs=${TF_GPU_COUNT} "\
 "--run_under=${PARALLEL_GPU_TEST_CMD} "\
 "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
-- 
GitLab


From 9f2d1e2cf6be4a17b6318b429447a71d9d48af32 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 13:35:31 -0700
Subject: [PATCH 1153/1357] Few more fixes for issued in parsing invalid HLO
 module proto.

PiperOrigin-RevId: 215794086
---
 tensorflow/compiler/xla/literal.cc                |  8 ++++----
 .../compiler/xla/service/hlo_instruction.cc       |  4 ++--
 .../compiler/xla/service/hlo_parser_test.cc       |  2 +-
 tensorflow/compiler/xla/service/hlo_sharding.cc   | 15 +++++++++++++++
 tensorflow/compiler/xla/shape_util.cc             |  7 ++-----
 5 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index 177f39cc74..656ce720a1 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -1945,11 +1945,11 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) {
       }
     } break;
     case TUPLE:
-      LOG(FATAL) << "Should not be called on tuple shapes: "
-                 << ShapeUtil::HumanString(subshape());
-      break;
+      return InvalidArgument("Should not be called on tuple shapes: %s",
+                             ShapeUtil::HumanString(subshape()));
     default:
-      LOG(FATAL) << "Unhandled primitive type " << subshape().element_type();
+      return InvalidArgument("Is called on unsupported shape: %s",
+                             ShapeUtil::HumanString(subshape()));
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index fb91adc302..2f6db7cd7c 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -465,8 +465,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       break;
     }
     case HloOpcode::kIota:
-      TF_RET_CHECK(proto.dimensions_size() <= 1)
-          << "Iota instruction should have at most 1 dimension but sees "
+      TF_RET_CHECK(proto.dimensions_size() == 1)
+          << "Iota instruction should have 1 dimension but sees "
           << proto.dimensions_size();
       instruction = CreateIota(proto.shape(), proto.dimensions(0));
       break;
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index b618510640..255123d331 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1304,7 +1304,7 @@ TEST_F(HloParserTest, MoreConstants) {
 
 ENTRY %SelectScalarS32True.v4 () -> s32[] {
   %constant.2 = pred[] constant(true)
-  %constant.1 = s32[] constant(-42), sharding={s32[5,6] devices=[2,3]1,2,3,4}
+  %constant.1 = s32[] constant(-42), sharding={s32[5,6] devices=[2,2]1,2,3,4}
   %constant = s32[] constant(42)
   %select = s32[] select(pred[] %constant.2, s32[] %constant.1, s32[] %constant)
 }
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index 94c7bafd3b..188f4acc79 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/overflow_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace xla {
@@ -377,6 +378,20 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
       << "Maximal sharding is expected to have single device assignment, but "
       << proto.tile_assignment_devices().size() << " has provided.";
 
+  TF_RET_CHECK(proto.tile_assignment_devices().size() > 1);
+  TF_RET_CHECK(!proto.tile_assignment_dimensions().empty());
+
+  // RE: the product of tile assignment tensor dimensions must be
+  // equal to tile_assignment_devices.size().
+  int64 product_of_dimensions = 1;
+  for (auto dimension : proto.tile_assignment_dimensions()) {
+    TF_RET_CHECK(dimension > 0);
+    product_of_dimensions =
+        MultiplyWithoutOverflow(product_of_dimensions, dimension);
+    TF_RET_CHECK(product_of_dimensions > 0);
+  }
+  TF_RET_CHECK(product_of_dimensions == proto.tile_assignment_devices().size());
+
   // Some versions of gcc cannot infer the TileAssignment constructor from a
   // braced initializer-list, so create one manually.
   std::vector<int64> devices(proto.tile_assignment_devices().begin(),
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 476a9fe868..d244923532 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -869,11 +869,8 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
     return Status::OK();
   }
 
-  if (Rank(shape) != shape.dimensions_size()) {
-    return InvalidArgument(
-        "shape's rank is mismatched with dimension count; rank=%d "
-        "dimensions_size=%d",
-        Rank(shape), shape.dimensions_size());
+  if (LayoutUtil::IsSparseArray(shape) && Rank(shape) == 0) {
+    return InvalidArgument("sparse arrays must have rank > 0");
   }
   for (int64 i = 0; i < Rank(shape); ++i) {
     int64 dimension = shape.dimensions(i);
-- 
GitLab


From d96e073e77929006c519cd3082461d9757865dd7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 4 Oct 2018 13:42:48 -0700
Subject: [PATCH 1154/1357] [TF:XLA] Fix inverted condition in randomized test.

PiperOrigin-RevId: 215795518
---
 tensorflow/compiler/tests/randomized_tests.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index 7a96f4c25c..dc119fb0f8 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -1820,7 +1820,7 @@ TEST_F(OpTest, Diag) {
     do {
       dims = RandomDims(1);
       size = TensorShape(dims).num_elements();
-    } while (size * size < tf_xla_max_tensor_size);
+    } while (size * size > tf_xla_max_tensor_size);
     return ExpectTfAndXlaOutputsAreClose(
         OpTestBuilder("Diag").RandomInput(type, dims).Attr("T", type));
   });
-- 
GitLab


From 08ecc62a38dc58e85cb46ad281486d1c75b1db9b Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Thu, 4 Oct 2018 13:43:31 -0700
Subject: [PATCH 1155/1357] [TF:XLA] Improve the accounting for subcomputations
 in the List scheduler to avoid double-counting.

PiperOrigin-RevId: 215795640
---
 .../xla/service/hlo_memory_scheduler.cc       | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index bf30764488..5cee865b7a 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -195,13 +195,15 @@ class ListScheduler {
     return entry;
   }
 
-  // Returns the number of bytes freed if the HLO instruction is scheduled.
-  // If the instruction calls subcomputations, we count the memory used by the
-  // subcomputations as memory "defined" by the instruction. This is not
-  // entirely accurate, because subcomputation memory will be freed after the
-  // instruction finishes. But it is more accurate than not taking
-  // subcomputations into account at all. In the future, we may improve
-  // accounting for subcomputation memory (b/65409243).
+  // Returns the number of bytes freed *after* the HLO instruction finishes.
+  // The current List algorithm only considers two states for an instruction:
+  // right before it runs, and after it finishes. We don't represent memory
+  // usage during the execution of an instruction. But if the instruction calls
+  // subcomputations, they are only live during the instruction's execution.
+  // We end up counting the memory used by subcomputations as memory "defined"
+  // by the instruction. This is not entirely accurate, but it is more accurate
+  // than not taking subcomputations into account at all. In the future, we may
+  // improve accounting for subcomputation memory (b/65409243).
   int64 BytesFreedIfScheduled(const ReadyListEntry& entry) {
     int64 freed_bytes = 0;
     for (const auto& kv : entry.used_buffer_unscheduled_use_counts) {
@@ -223,7 +225,18 @@ class ListScheduler {
         }
       }
     }
-    return freed_bytes - entry.bytes_defined - max_subcomputation_bytes;
+    int64 bytes_defined;
+    if (max_subcomputation_bytes > 0 &&
+        (entry.instruction->opcode() == HloOpcode::kWhile ||
+         entry.instruction->opcode() == HloOpcode::kCall ||
+         entry.instruction->opcode() == HloOpcode::kConditional)) {
+      // The output buffer of while/call/conditional is always aliased with the
+      // output buffer of the root instruction in the body. Don't double count.
+      bytes_defined = max_subcomputation_bytes;
+    } else {
+      bytes_defined = entry.bytes_defined + max_subcomputation_bytes;
+    }
+    return freed_bytes - bytes_defined;
   }
 
   // Constructs the scheduling priority of the given instruction.
-- 
GitLab


From 4c1da53840fed235409cb2c571ea081e28388f75 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 4 Oct 2018 13:53:19 -0700
Subject: [PATCH 1156/1357] Internal change.

PiperOrigin-RevId: 215797256
---
 tensorflow/python/kernel_tests/depthwise_conv_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 6aee2eb0a3..737a73f97a 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -131,7 +131,7 @@ class DepthwiseConv2DTest(test.TestCase):
     with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-2,
-          dtypes.float32: 1e-6,
+          dtypes.float32: 1e-5,
           dtypes.float64: 1e-12,
       }[data_type]
 
-- 
GitLab


From a2e48d849f5c7a97b788ba8d2499e95aaef95945 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 14:18:22 -0700
Subject: [PATCH 1157/1357] Fix problem in quantized version of Comparison op
 handler

PiperOrigin-RevId: 215801773
---
 tensorflow/contrib/lite/kernels/comparisons.cc   | 16 +++++-----------
 .../contrib/lite/kernels/comparisons_test.cc     | 11 +++++++++++
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/comparisons.cc b/tensorflow/contrib/lite/kernels/comparisons.cc
index f765235e04..3926af5b97 100644
--- a/tensorflow/contrib/lite/kernels/comparisons.cc
+++ b/tensorflow/contrib/lite/kernels/comparisons.cc
@@ -66,31 +66,25 @@ TfLiteStatus ComparisonPrepare(TfLiteContext* context, TfLiteNode* node) {
     if (input1->type == kTfLiteUInt8) {                                        \
       auto input1_offset = -input1->params.zero_point;                         \
       auto input2_offset = -input2->params.zero_point;                         \
-      const int left_shift = 20;                                               \
-      const double twice_max_input_scale =                                     \
-          2 * std::max(input1->params.scale, input2->params.scale);            \
-      const double real_input1_multiplier =                                    \
-          input1->params.scale / twice_max_input_scale;                        \
-      const double real_input2_multiplier =                                    \
-          input2->params.scale / twice_max_input_scale;                        \
+      const int left_shift = 8;                                                \
                                                                                \
       int32 input1_multiplier;                                                 \
       int input1_shift;                                                        \
-      QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier,              \
+      QuantizeMultiplierSmallerThanOneExp(input1->params.scale,                \
                                           &input1_multiplier, &input1_shift);  \
       int32 input2_multiplier;                                                 \
       int input2_shift;                                                        \
-      QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier,              \
+      QuantizeMultiplierSmallerThanOneExp(input2->params.scale,                \
                                           &input2_multiplier, &input2_shift);  \
                                                                                \
       ComparisonParams op_params;                                              \
       op_params.left_shift = left_shift;                                       \
       op_params.input1_offset = input1_offset;                                 \
       op_params.input1_multiplier = input1_multiplier;                         \
-      op_params.input1_shift = -input1_shift;                                  \
+      op_params.input1_shift = input1_shift;                                   \
       op_params.input2_offset = input2_offset;                                 \
       op_params.input2_multiplier = input2_multiplier;                         \
-      op_params.input2_shift = -input2_shift;                                  \
+      op_params.input2_shift = input2_shift;                                   \
       if (requires_broadcast) {                                                \
         reference_ops::Broadcast4DSlow##opname##WithScaling(                   \
             op_params, GetTensorShape(input1), GetTensorData<uint8_t>(input1), \
diff --git a/tensorflow/contrib/lite/kernels/comparisons_test.cc b/tensorflow/contrib/lite/kernels/comparisons_test.cc
index 67a91c17fd..04c8bf2e30 100644
--- a/tensorflow/contrib/lite/kernels/comparisons_test.cc
+++ b/tensorflow/contrib/lite/kernels/comparisons_test.cc
@@ -402,6 +402,17 @@ TEST(ComparisonsTest, GreaterQuantized) {
   EXPECT_THAT(model.GetOutput(), ElementsAre(false, true, true, false));
 }
 
+TEST(ComparisonsTest, GreaterQuantizedSmallRange) {
+  ComparisonOpModel model({TensorType_UINT8, {1, 2, 2, 1}, 0.0, 1.0},
+                          {TensorType_UINT8, {1, 2, 2, 1}, 0.0, 2.0},
+                          TensorType_UINT8, BuiltinOperator_GREATER);
+  model.QuantizeAndPopulate<uint8_t>(model.input1(), {1.0, 0.5, 0.35, 0.1});
+  model.QuantizeAndPopulate<uint8_t>(model.input2(), {1.01, 0.25, 0.3, 0.4});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAre(false, true, true, false));
+}
+
 TEST(ComparisonsTest, GreaterEqualQuantized) {
   const float kMin = -1.f;
   const float kMax = 128.f;
-- 
GitLab


From b01ea7a51c07f6d2988d7f2aa117374591d1e25a Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 4 Oct 2018 14:18:58 -0700
Subject: [PATCH 1158/1357] Rename "Inliner" to "MapInliner".

PiperOrigin-RevId: 215801897
---
 tensorflow/compiler/xla/service/BUILD         | 69 +++++++++----------
 tensorflow/compiler/xla/service/cpu/BUILD     |  2 +-
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  4 +-
 .../compiler/xla/service/interpreter/BUILD    |  2 +-
 .../xla/service/interpreter/compiler.cc       |  2 +-
 .../service/{inliner.cc => map_inliner.cc}    | 19 +++--
 .../xla/service/{inliner.h => map_inliner.h}  | 22 +++---
 .../{inliner_test.cc => map_inliner_test.cc}  | 20 +++---
 8 files changed, 68 insertions(+), 72 deletions(-)
 rename tensorflow/compiler/xla/service/{inliner.cc => map_inliner.cc} (87%)
 rename tensorflow/compiler/xla/service/{inliner.h => map_inliner.h} (59%)
 rename tensorflow/compiler/xla/service/{inliner_test.cc => map_inliner_test.cc} (95%)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 2f8bab0614..4797cf3330 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1841,42 +1841,6 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "inliner",
-    srcs = ["inliner.cc"],
-    hdrs = ["inliner.h"],
-    deps = [
-        ":hlo",
-        ":hlo_pass",
-        ":hlo_query",
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
-tf_cc_test(
-    name = "inliner_test",
-    srcs = ["inliner_test.cc"],
-    deps = [
-        ":cpu_plugin",
-        ":hlo",
-        ":hlo_matchers",
-        ":inliner",
-        "//tensorflow/compiler/xla:literal",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla:util",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
-        "//tensorflow/compiler/xla/tests:literal_test_util",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "@com_google_absl//absl/memory",
-    ],
-)
-
 cc_library(
     name = "computation_placer",
     srcs = ["computation_placer.cc"],
@@ -3492,6 +3456,39 @@ cc_library(
     deps = ["//tensorflow/core:lib"],
 )
 
+cc_library(
+    name = "map_inliner",
+    srcs = ["map_inliner.cc"],
+    hdrs = ["map_inliner.h"],
+    deps = [
+        ":hlo",
+        ":hlo_pass",
+        ":hlo_query",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_test(
+    name = "map_inliner_test",
+    srcs = ["map_inliner_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_matchers",
+        ":map_inliner",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
+        "@com_google_absl//absl/memory",
+    ],
+)
+
 tf_cc_test(
     name = "hlo_casting_utils_test",
     srcs = ["hlo_casting_utils_test.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index ae4c6e962d..58abb330a6 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -94,6 +94,7 @@ cc_library(
         ":target_machine_features",
         "@com_google_absl//absl/types:span",
         "//tensorflow/compiler/tf2xla:cpu_function_runtime",
+        "//tensorflow/compiler/xla/service:map_inliner",
         "//tensorflow/compiler/xla/service:scatter_expander",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:protobuf_util",
@@ -127,7 +128,6 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_subcomputation_unification",
         "//tensorflow/compiler/xla/service:hlo_verifier",
         "//tensorflow/compiler/xla/service:indexed_array_analysis",
-        "//tensorflow/compiler/xla/service:inliner",
         "//tensorflow/compiler/xla/service:llvm_compiler",
         "//tensorflow/compiler/xla/service:reduce_precision_insertion",
         "//tensorflow/compiler/xla/service:reshape_mover",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index afc94f2185..5834f67285 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -86,8 +86,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_subcomputation_unification.h"
 #include "tensorflow/compiler/xla/service/hlo_verifier.h"
 #include "tensorflow/compiler/xla/service/indexed_array_analysis.h"
-#include "tensorflow/compiler/xla/service/inliner.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
 #include "tensorflow/compiler/xla/service/scatter_expander.h"
@@ -249,7 +249,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
       &pipeline, module->config().debug_options(),
       ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
 
-  pipeline.AddPass<Inliner>();
+  pipeline.AddPass<MapInliner>();
 
   // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner
   // pass.
diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD
index 146c9052f1..1484e14df1 100644
--- a/tensorflow/compiler/xla/service/interpreter/BUILD
+++ b/tensorflow/compiler/xla/service/interpreter/BUILD
@@ -45,8 +45,8 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
         "//tensorflow/compiler/xla/service:hlo_subcomputation_unification",
-        "//tensorflow/compiler/xla/service:inliner",
         "//tensorflow/compiler/xla/service:layout_assignment",
+        "//tensorflow/compiler/xla/service:map_inliner",
         "//tensorflow/compiler/xla/service:reshape_mover",
         "//tensorflow/compiler/xla/service:while_loop_simplifier",
         "//tensorflow/core:lib",
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 27fe89375d..7c79eb7d79 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -28,9 +28,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_pass_fix.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
 #include "tensorflow/compiler/xla/service/hlo_subcomputation_unification.h"
-#include "tensorflow/compiler/xla/service/inliner.h"
 #include "tensorflow/compiler/xla/service/interpreter/executable.h"
 #include "tensorflow/compiler/xla/service/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
 #include "tensorflow/compiler/xla/status_macros.h"
diff --git a/tensorflow/compiler/xla/service/inliner.cc b/tensorflow/compiler/xla/service/map_inliner.cc
similarity index 87%
rename from tensorflow/compiler/xla/service/inliner.cc
rename to tensorflow/compiler/xla/service/map_inliner.cc
index 50c408f5bb..2200ef054a 100644
--- a/tensorflow/compiler/xla/service/inliner.cc
+++ b/tensorflow/compiler/xla/service/map_inliner.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/inliner.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 
 #include <memory>
 #include <string>
@@ -32,10 +32,10 @@ limitations under the License.
 
 namespace xla {
 
-// InlinerVisitor traverses the HLO computation and inlines maps.
-class InlinerVisitor : public DfsHloVisitorWithDefault {
+// MapInlinerVisitor traverses the HLO computation and inlines maps.
+class MapInlinerVisitor : public DfsHloVisitorWithDefault {
  public:
-  explicit InlinerVisitor(HloComputation* computation)
+  explicit MapInlinerVisitor(HloComputation* computation)
       : computation_(computation) {}
 
   // Default visitor action is to do nothing and return OK.
@@ -49,24 +49,23 @@ class InlinerVisitor : public DfsHloVisitorWithDefault {
   StatusOr<bool> Run(HloComputation* computation);
 
  private:
-  // Current HloComputation instance the InlinerVisitor is traversing.
+  // Current HloComputation instance the MapInlinerVisitor is traversing.
   HloComputation* computation_;
 
   // Whether algebraic simplification has occurred.
   bool changed_ = false;
 };
 
-StatusOr<bool> InlinerVisitor::Run(HloComputation* computation) {
+StatusOr<bool> MapInlinerVisitor::Run(HloComputation* computation) {
   changed_ = false;
   computation_ = computation;
   TF_RETURN_IF_ERROR(computation->root_instruction()->Accept(this));
   return changed_;
 }
 
-Status InlinerVisitor::HandleMap(HloInstruction* map) {
+Status MapInlinerVisitor::HandleMap(HloInstruction* map) {
   HloComputation* function = map->to_apply();
   HloInstruction& root = *function->root_instruction();
-  // TODO(b/29249531): Add DCE pass to remove unused HloComputations.
   // Only inlining functions that are simply a single operation until a better
   // profitability model for inlining is defined.
   if (hlo_query::AllOperandsAreParameters(root)) {
@@ -112,8 +111,8 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
   return Status::OK();
 }
 
-StatusOr<bool> Inliner::Run(HloModule* module) {
-  InlinerVisitor visitor(/*computation=*/nullptr);
+StatusOr<bool> MapInliner::Run(HloModule* module) {
+  MapInlinerVisitor visitor(/*computation=*/nullptr);
   bool changed = false;
   for (HloComputation* computation : module->computations()) {
     TF_ASSIGN_OR_RETURN(bool computation_changed, visitor.Run(computation));
diff --git a/tensorflow/compiler/xla/service/inliner.h b/tensorflow/compiler/xla/service/map_inliner.h
similarity index 59%
rename from tensorflow/compiler/xla/service/inliner.h
rename to tensorflow/compiler/xla/service/map_inliner.h
index e20af08fb7..b679118118 100644
--- a/tensorflow/compiler/xla/service/inliner.h
+++ b/tensorflow/compiler/xla/service/map_inliner.h
@@ -13,27 +13,27 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_
 
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 
 namespace xla {
 
-// A pass which performs inlining. Which can result, for example, in functions
-// that were previously being mapped by Map instead directly applied to the
-// forwarded operands (i.e., map({X, Y}, max) -> max(X, Y)).
-class Inliner : public HloModulePass {
+// A pass which performs map inlining. This replaces kMap instructions with
+// their equivalent sequence of array operations. For example:
+//   map({X, Y}, add) -> add(X, Y)).
+class MapInliner : public HloModulePass {
  public:
-  ~Inliner() override = default;
-  absl::string_view name() const override { return "inline"; }
+  ~MapInliner() override = default;
+  absl::string_view name() const override { return "map-inline"; }
 
-  // Run inlining on the given computation. Returns whether the computation was
-  // changed.
+  // Run map inlining on the given computation. Returns whether the computation
+  // was changed.
   StatusOr<bool> Run(HloModule* module) override;
 };
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_
diff --git a/tensorflow/compiler/xla/service/inliner_test.cc b/tensorflow/compiler/xla/service/map_inliner_test.cc
similarity index 95%
rename from tensorflow/compiler/xla/service/inliner_test.cc
rename to tensorflow/compiler/xla/service/map_inliner_test.cc
index 98e0f2cfd7..84059dd0f7 100644
--- a/tensorflow/compiler/xla/service/inliner_test.cc
+++ b/tensorflow/compiler/xla/service/map_inliner_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/inliner.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 
 #include <memory>
 #include <utility>
@@ -35,10 +35,10 @@ namespace op = xla::testing::opcode_matchers;
 namespace xla {
 namespace {
 
-using InlinerTest = HloVerifiedTestBase;
+using MapInlinerTest = HloVerifiedTestBase;
 
 // Test that `map` with `max` is transformed to `max`
-TEST_F(InlinerTest, MapMax) {
+TEST_F(MapInlinerTest, MapMax) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   auto max_builder = HloComputation::Builder(TestName());
@@ -63,7 +63,7 @@ TEST_F(InlinerTest, MapMax) {
   hlo_module->AddEmbeddedComputation(std::move(max_f32));
   hlo_module->AddEntryComputation(std::move(computation));
 
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie());
   EXPECT_THAT(hlo_module->entry_computation()->root_instruction(),
               op::Maximum(lhs, rhs));
@@ -75,7 +75,7 @@ TEST_F(InlinerTest, MapMax) {
 }
 
 // Test that `constant` function is changed to `broadcast`.
-TEST_F(InlinerTest, MapConstant) {
+TEST_F(MapInlinerTest, MapConstant) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   auto const2_builder = HloComputation::Builder(TestName());
@@ -97,7 +97,7 @@ TEST_F(InlinerTest, MapConstant) {
   hlo_module->AddEmbeddedComputation(std::move(const2_f32));
   hlo_module->AddEntryComputation(std::move(computation));
   HloInstruction* root = hlo_module->entry_computation()->root_instruction();
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie());
   root = hlo_module->entry_computation()->root_instruction();
   EXPECT_THAT(root, op::Broadcast(op::Constant()));
@@ -108,7 +108,7 @@ TEST_F(InlinerTest, MapConstant) {
   EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
 }
 
-TEST_F(InlinerTest, MapSubtractOppositeOrder) {
+TEST_F(MapInlinerTest, MapSubtractOppositeOrder) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   // Note that the parameter ordinals are in the opposite order to their
@@ -135,7 +135,7 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) {
   hlo_module->AddEmbeddedComputation(std::move(max_f32));
   hlo_module->AddEntryComputation(std::move(computation));
 
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie());
   EXPECT_THAT(hlo_module->entry_computation()->root_instruction(),
           op::Subtract(rhs, lhs));
@@ -146,7 +146,7 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) {
   EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
 }
 
-TEST_F(InlinerTest, MapParameter) {
+TEST_F(MapInlinerTest, MapParameter) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   auto param_builder = HloComputation::Builder(TestName());
@@ -167,7 +167,7 @@ TEST_F(InlinerTest, MapParameter) {
   hlo_module->AddEmbeddedComputation(std::move(param_f32));
   hlo_module->AddEntryComputation(std::move(computation));
 
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module.get()).ValueOrDie());
   EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), rhs);
 
-- 
GitLab


From b74c9aa65fcbe615495a972a5021e983707d02f6 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 4 Oct 2018 14:24:25 -0700
Subject: [PATCH 1159/1357] Add apidefs for the list ops.

PiperOrigin-RevId: 215802845
---
 .../api_def/python_api/api_defTensorListPushBackBatch.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_EmptyTensorList.pbtxt     | 4 ++++
 .../api_def/python_api/api_def_TensorListConcatLists.pbtxt    | 4 ++++
 .../api_def/python_api/api_def_TensorListElementShape.pbtxt   | 4 ++++
 .../api_def/python_api/api_def_TensorListFromTensor.pbtxt     | 4 ++++
 .../core/api_def/python_api/api_def_TensorListGather.pbtxt    | 4 ++++
 .../core/api_def/python_api/api_def_TensorListGetItem.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListLength.pbtxt    | 4 ++++
 .../core/api_def/python_api/api_def_TensorListPopBack.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListPushBack.pbtxt  | 4 ++++
 .../core/api_def/python_api/api_def_TensorListReserve.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListScatter.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListSetItem.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListStack.pbtxt     | 4 ++++
 14 files changed, 56 insertions(+)
 create mode 100644 tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt

diff --git a/tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt b/tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt
new file mode 100644
index 0000000000..3d937c745c
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListPushBackBatch"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt b/tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt
new file mode 100644
index 0000000000..44f25b5d93
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "EmptyTensorList"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt
new file mode 100644
index 0000000000..45fc55e71e
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListConcatLists"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt
new file mode 100644
index 0000000000..e1ad713e7f
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListElementShape"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt
new file mode 100644
index 0000000000..4aaefba3c5
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListFromTensor"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt
new file mode 100644
index 0000000000..aaf607d70e
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListGather"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt
new file mode 100644
index 0000000000..3bb5f39cbc
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListGetItem"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt
new file mode 100644
index 0000000000..a04c20bb8a
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListLength"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt
new file mode 100644
index 0000000000..9287162f22
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListPopBack"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt
new file mode 100644
index 0000000000..da2bc11721
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListPushBack"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt
new file mode 100644
index 0000000000..77e63747d5
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListReserve"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt
new file mode 100644
index 0000000000..0015189d7f
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListScatter"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt
new file mode 100644
index 0000000000..4999ee7ad9
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListSetItem"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt
new file mode 100644
index 0000000000..2dc7b2784b
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListStack"
+  visibility: HIDDEN
+}
-- 
GitLab


From ac7b84de8803edbb2d4da573b3f8704e9fad8fa8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 14:45:32 -0700
Subject: [PATCH 1160/1357] Internal change.

PiperOrigin-RevId: 215806953
---
 tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index 9f62ac3f2c..c22a457a71 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -113,6 +113,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // input configuration.
   TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
 
+  TF_LITE_ENSURE_EQ(context, input->dims->size, 3);
   const int batch_size = input->dims->data[0];
   const int max_time = input->dims->data[1];
   const int fw_num_units = fw_input_weights->dims->data[0];
-- 
GitLab


From a742575879db1df48daf929b8d29e43a1d168dd7 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Thu, 4 Oct 2018 14:55:14 -0700
Subject: [PATCH 1161/1357] Automated rollback of commit
 6b538d9ce54e878576131cde0c76e43a893180c2

PiperOrigin-RevId: 215808649
---
 tensorflow/python/data/kernel_tests/BUILD     |  1 -
 tensorflow/tensorflow.bzl                     | 39 ++++++++-----------
 .../tools/pip_package/pip_smoke_test.py       |  2 +-
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 10ec0dbe1c..c7295d6e69 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -306,7 +306,6 @@ cuda_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
     tags = [
-        "no_oss",  # TODO(b/116813115): Investigate timeout and re-enable.
         "no_windows_gpu",
     ],
 )
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index dead44c57e..cad5de1b0c 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1798,29 +1798,22 @@ def cuda_py_test(
         flaky = 0,
         xla_enabled = False,
         grpc_enabled = False):
-    if main == None:
-        main = name + ".py"
-    for config in ["cpu", "gpu"]:
-        test_name = name
-        test_tags = tags
-        if config == "gpu":
-            test_name += "_gpu"
-            test_tags = test_tags + tf_cuda_tests_tags()
-        tf_py_test(
-            name = test_name,
-            size = size,
-            srcs = srcs,
-            data = data,
-            main = main,
-            args = args,
-            tags = test_tags,
-            shard_count = shard_count,
-            additional_deps = additional_deps,
-            kernels = kernels,
-            flaky = flaky,
-            xla_enabled = xla_enabled,
-            grpc_enabled = grpc_enabled,
-        )
+    test_tags = tags + tf_cuda_tests_tags()
+    tf_py_test(
+        name = name,
+        size = size,
+        srcs = srcs,
+        data = data,
+        main = main,
+        args = args,
+        tags = test_tags,
+        shard_count = shard_count,
+        additional_deps = additional_deps,
+        kernels = kernels,
+        flaky = flaky,
+        xla_enabled = xla_enabled,
+        grpc_enabled = grpc_enabled,
+    )
 
 register_extension_info(
     extension_name = "cuda_py_test",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index e7f9628fa6..c6ef82ccdc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -142,7 +142,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = ["_test", "_test.py", "_test_gpu", "_test_gpu.py"]
+  ignore_extensions = ["_test", "_test.py"]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
-- 
GitLab


From 2e2e89699c1186eef157911b57e4b062de376ce9 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Thu, 4 Oct 2018 14:59:43 -0700
Subject: [PATCH 1162/1357] Add basic TensorList op support in bridge.

* Add kernels for TensorListReserve. EmptyTensorList, TensorListElementShape, TensorListPushBack, TensorlistPopBack;
* Treat list type pretty much identical to Stack in the bridge for now;
* Support variant output by treating variant like a uint8 and leaving the interpretation up to the XlaExpression (variant type does not support tensor_data());

PiperOrigin-RevId: 215809335
---
 tensorflow/compiler/tests/BUILD               |  16 ++
 .../compiler/tests/tensor_list_ops_test.py    | 105 ++++++++
 tensorflow/compiler/tf2xla/kernels/BUILD      |   2 +
 .../tf2xla/kernels/tensor_list_ops.cc         | 226 ++++++++++++++++++
 tensorflow/compiler/tf2xla/xla_op_kernel.cc   |  40 +++-
 tensorflow/compiler/tf2xla/xla_op_kernel.h    |   5 +
 6 files changed, 384 insertions(+), 10 deletions(-)
 create mode 100644 tensorflow/compiler/tests/tensor_list_ops_test.py
 create mode 100644 tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index ee36729fd1..ba2401ed26 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -894,6 +894,22 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "tensor_list_ops_test",
+    size = "small",
+    srcs = ["tensor_list_ops_test.py"],
+    # TensorList ops are not implemented in the on-demand compilation model yet.
+    disabled_backends = "cpu_ondemand",
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:list_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:function",
+    ],
+)
+
 tf_xla_py_test(
     name = "ternary_ops_test",
     size = "small",
diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py
new file mode 100644
index 0000000000..b556723eec
--- /dev/null
+++ b/tensorflow/compiler/tests/tensor_list_ops_test.py
@@ -0,0 +1,105 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ops which manipulate lists of tensors via bridge."""
+
+# pylint: disable=g-bad-name
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from tensorflow.compiler.tests import xla_test
+from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import list_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.platform import test
+from tensorflow.python.training import server_lib
+
+
+def scalar_shape():
+  return ops.convert_to_tensor([], dtype=dtypes.int32)
+
+
+class ListOpsTest(xla_test.XLATestCase):
+
+  def testElementShape(self):
+    with self.cached_session() as sess, self.test_scope():
+      dim = array_ops.placeholder(dtypes.int32)
+      l = list_ops.tensor_list_reserve(
+          element_shape=(dim, 15), num_elements=20,
+          element_dtype=dtypes.float32)
+      e32 = list_ops.tensor_list_element_shape(l, shape_type=dtypes.int32)
+      e64 = list_ops.tensor_list_element_shape(l, shape_type=dtypes.int64)
+      self.assertAllEqual(sess.run(e32, {dim: 10}), (10, 15))
+      self.assertAllEqual(sess.run(e64, {dim: 7}), (7, 15))
+
+  def testPushPop(self):
+    with self.cached_session() as sess, self.test_scope():
+      num = array_ops.placeholder(dtypes.int32)
+      l = list_ops.tensor_list_reserve(
+          element_shape=(7, 15), num_elements=num, element_dtype=dtypes.float32)
+      l = list_ops.tensor_list_push_back(
+          l, constant_op.constant(1.0, shape=(7, 15)))
+      l = list_ops.tensor_list_push_back(
+          l, constant_op.constant(2.0, shape=(7, 15)))
+      l, e2 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      _, e1 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      self.assertAllEqual(sess.run(e2, {num: 10}), 2.0 * np.ones((7, 15)))
+      self.assertAllEqual(sess.run(e1, {num: 10}), 1.0 * np.ones((7, 15)))
+
+  def testPushPopSeparateLists(self):
+    with self.cached_session() as sess, self.test_scope():
+      num = array_ops.placeholder(dtypes.int32)
+      l = list_ops.tensor_list_reserve(
+          element_shape=scalar_shape(),
+          num_elements=num,
+          element_dtype=dtypes.float32)
+      l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
+      l2 = list_ops.tensor_list_push_back(l, constant_op.constant(2.0))
+      l3 = list_ops.tensor_list_push_back(l, constant_op.constant(3.0))
+      _, e11 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      l2, e21 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32)
+      l2, e22 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32)
+      l3, e31 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32)
+      l3, e32 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32)
+      result = sess.run([e11, [e21, e22], [e31, e32]], {num: 20})
+      self.assertEqual(result, [1.0, [2.0, 1.0], [3.0, 1.0]])
+
+  def testEmptyTensorList(self):
+    dim = 7
+    with self.cached_session() as sess, self.test_scope():
+      p = array_ops.placeholder(dtypes.int32)
+      l = list_ops.empty_tensor_list(
+          element_shape=(p, 15), element_dtype=dtypes.float32)
+      l = list_ops.tensor_list_push_back(
+          l, constant_op.constant(1.0, shape=(dim, 15)))
+      _, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "Use TensorListReserve instead"):
+        self.assertEqual(sess.run(e, {p: dim}), 1.0 * np.ones((dim, 15)))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 9a7130f253..95a010a119 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -95,6 +95,7 @@ tf_kernel_library(
         "stateless_random_ops.cc",
         "strided_slice_op.cc",
         "tensor_array_ops.cc",
+        "tensor_list_ops.cc",
         "tile_ops.cc",
         "topk_op.cc",
         "training_ops.cc",
@@ -158,6 +159,7 @@ tf_kernel_library(
         "//tensorflow/core/kernels:control_flow_ops",
         "//tensorflow/core/kernels:conv_ops",
         "//tensorflow/core/kernels:cwise_op",
+        "//tensorflow/core/kernels:list_kernels",
         "//tensorflow/core/kernels:no_op",
         "//tensorflow/core/kernels:ops_util",
         "//tensorflow/core/kernels:pooling_ops",
diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc
new file mode 100644
index 0000000000..74d4fcc425
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc
@@ -0,0 +1,226 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// XLA TensorList operators.
+
+#include <limits>
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace {
+
+Status GetTensorListShape(xla::XlaBuilder* builder, xla::XlaOp op,
+                          TensorShape* tensor_list_shape) {
+  auto shape_or_status = builder->GetShape(op);
+  if (!shape_or_status.ok()) {
+    return shape_or_status.status();
+  }
+  xla::Shape shape = shape_or_status.ValueOrDie();
+  TF_RET_CHECK(xla::ShapeUtil::IsTuple(shape));
+  return XLAShapeToTensorShape(xla::ShapeUtil::GetTupleElementShape(shape, 0),
+                               tensor_list_shape);
+}
+
+class TensorListReserveOp : public XlaOpKernel {
+ public:
+  explicit TensorListReserveOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    TensorShape element_shape;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &element_shape));
+    int64 num_elements;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &num_elements));
+
+    TensorShape tensor_shape;
+    tensor_shape.AddDim(num_elements);
+    tensor_shape.AppendShape(element_shape);
+
+    xla::XlaBuilder* b = ctx->builder();
+    ctx->SetOutput(0, xla::Tuple(b, {xla::Broadcast(XlaHelpers::Zero(b, dtype_),
+                                                    tensor_shape.dim_sizes()),
+                                     xla::ConstantR0<int32>(b, 0)}));
+  }
+
+ private:
+  DataType dtype_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListReserveOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListReserve")
+                    .CompileTimeConstInput("element_shape")
+                    .CompileTimeConstInput("num_elements"),
+                TensorListReserveOp);
+
+class EmptyTensorListOp : public XlaOpKernel {
+ public:
+  explicit EmptyTensorListOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    ctx->CtxFailure(
+        errors::InvalidArgument("XLA compilation requires a fixed tensor list "
+                                "size. Use TensorListReserve instead."));
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(EmptyTensorListOp);
+};
+
+REGISTER_XLA_OP(Name("EmptyTensorList"), EmptyTensorListOp);
+
+class TensorListElementShapeOp : public XlaOpKernel {
+ public:
+  explicit TensorListElementShapeOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("shape_type", &shape_type_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx, GetTensorListShape(b, ctx->Input(0), &shape));
+    shape.RemoveDim(0);
+
+    switch (shape_type_) {
+      case DT_INT64:
+        ctx->SetOutput(0, xla::ConstantR1<int64>(b, shape.dim_sizes()));
+        break;
+      case DT_INT32: {
+        std::vector<int32> size;
+        for (int64 s : shape.dim_sizes()) {
+          size.push_back(s);
+        }
+        ctx->SetOutput(0, xla::ConstantR1<int32>(b, size));
+        break;
+      }
+      default:
+        ctx->CtxFailure(
+            errors::InvalidArgument("Unsupported shape type requested"));
+        return;
+    }
+  }
+
+ private:
+  DataType shape_type_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListElementShapeOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListElementShape"), TensorListElementShapeOp);
+
+class TensorListPushBackOp : public XlaOpKernel {
+ public:
+  explicit TensorListPushBackOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    xla::XlaOp list = ctx->Input(0);
+    TensorShape elem_shape = ctx->InputShape(1);
+
+    xla::XlaOp ta = xla::GetTupleElement(list, 0);
+    xla::XlaOp index = xla::GetTupleElement(list, 1);
+    xla::XlaOp value = ctx->Input(1);
+
+    // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
+    auto start_indices =
+        xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0<int32>(b, 0),
+                 xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
+
+    TensorShape slice_shape = elem_shape;
+    slice_shape.InsertDim(0, 1LL);
+    auto update = xla::Reshape(value, slice_shape.dim_sizes());
+
+    // TODO(phawkins): We don't check the index is in bounds --- there is no
+    // error mechanism in XLA.
+    ctx->SetOutput(
+        0, xla::Tuple(b, {xla::DynamicUpdateSlice(ta, update, start_indices),
+                          index + xla::ConstantR0<int32>(b, 1)}));
+  }
+
+ private:
+  DataType dtype_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListPushBackOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListPushBack"), TensorListPushBackOp);
+
+class TensorListPopBackOp : public XlaOpKernel {
+ public:
+  explicit TensorListPopBackOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    xla::XlaOp state = ctx->Input(0);
+
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx, GetTensorListShape(b, state, &shape));
+
+    xla::XlaOp ta = xla::GetTupleElement(state, 0);
+    xla::XlaOp index = xla::GetTupleElement(state, 1);
+
+    index = index - xla::ConstantR0<int32>(b, 1);
+
+    // start_indices of the DynamicSlice are [index, 0, 0, ..., 0].
+    auto start_indices =
+        xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0<int32>(b, 0),
+                 xla::MakeEdgePaddingConfig({{0, shape.dims() - 1}}));
+
+    auto slice_shape = shape.dim_sizes();
+    slice_shape[0] = 1LL;
+
+    // TODO(phawkins): We don't check the index is in bounds --- there is no
+    // error mechanism in XLA.
+    xla::XlaOp read = xla::DynamicSlice(ta, start_indices, slice_shape);
+    // Remove the leading '1' dimension.
+    std::vector<int64> value_shape(slice_shape.begin() + 1, slice_shape.end());
+
+    ctx->SetOutput(0, xla::Tuple(b, {ta, index}));
+    ctx->SetOutput(1, xla::Reshape(read, value_shape));
+  }
+
+ private:
+  DataType dtype_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListPopBackOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListPopBack"), TensorListPopBackOp);
+
+}  // anonymous namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index 2a9eaeee14..dd3498ef7a 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -455,23 +455,43 @@ Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type,
   return Status::OK();
 }
 
+Status XlaOpKernelContext::allocate_output(int index, const xla::Shape& shape,
+                                           Tensor** output) {
+  // The step's default allocator is the dummy XlaCompilationAllocator which
+  // simply allocates a metadata buffer to hold the expression to which it
+  // corresponds.
+  if (expected_output_dtype(index) == DT_VARIANT) {
+    // tensor_data() is not supported for variant Tensor (i.e.,
+    // DataTypeCanUseMemcpy is false for DT_VARIANT), and so storing the
+    // XlaExpression inside the Tensor's tensor_data() does not work for
+    // variant. Instead construct a uint8 tensor and store the expression in its
+    // value.
+    // TODO(jpienaar): This should be refactored to stop masquerading
+    // XlaExpressions as Tensors.
+    *output = new Tensor();
+    TensorShape tensor_shape;
+    TF_RETURN_IF_ERROR(
+        context_->allocate_temp(DT_UINT8, tensor_shape, *output));
+    context_->set_output(index, **output);
+  } else {
+    TensorShape tensor_shape;
+    TF_RETURN_IF_ERROR(XLAShapeToTensorShape(shape, &tensor_shape));
+    TF_RETURN_IF_ERROR(context_->allocate_output(index, tensor_shape, output));
+  }
+  return Status::OK();
+}
+
 void XlaOpKernelContext::SetOutput(int index, const xla::XlaOp& handle) {
   // Makes the host Tensor that will refer to the expression.
   Tensor* output = nullptr;
-  auto shape = builder()->GetShape(handle);
-  if (!shape.ok()) {
-    SetStatus(shape.status());
+  auto shape_or = builder()->GetShape(handle);
+  if (!shape_or.ok()) {
+    SetStatus(shape_or.status());
     return;
   }
 
-  // The step's default allocator is the dummy XlaCompilationAllocator which
-  // simply allocates a metadata buffer to hold the expression to which it
-  // corresponds.
-  TensorShape tensor_shape;
-  OP_REQUIRES_OK(context_,
-                 XLAShapeToTensorShape(shape.ValueOrDie(), &tensor_shape));
   OP_REQUIRES_OK(context_,
-                 context_->allocate_output(index, tensor_shape, &output));
+                 allocate_output(index, shape_or.ValueOrDie(), &output));
 
   // The expression is stored in the tensor's data buffer. Fill in the
   // fields now.
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h
index a3a0d10cc0..aa00a45496 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@@ -255,6 +255,11 @@ class XlaOpKernelContext {
   // Returns the tensor of input `name`.
   const Tensor& GetInputTensorByName(absl::string_view name);
 
+  // Wraps OpKernelContext's allocate_output method while providing special
+  // behavior for DT_VARIANT: a variant is treated as DT_UINT8 scalar as the
+  // type to allow mapping for variant to more generic types.
+  Status allocate_output(int index, const xla::Shape& shape, Tensor** output);
+
   OpKernelContext* const context_;
 };
 
-- 
GitLab


From 26d3617d2ab5f4874b73059be524e94b9535465b Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 4 Oct 2018 15:11:26 -0700
Subject: [PATCH 1163/1357] Avoid creating control edges on not-this-graph.

PiperOrigin-RevId: 215811680
---
 tensorflow/python/eager/function.py       | 17 +++++++----------
 tensorflow/python/ops/control_flow_ops.py |  3 +++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index dd9f5e233c..2750461fb2 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -269,15 +269,6 @@ class FuncGraph(ops.Graph):
   def variables(self, var_list):
     self._weak_variables = [weakref.ref(v) for v in var_list]
 
-  def control_dependencies(self, control_inputs):
-    # Drop control dependencies to outside of the graph. TODO(b/117109273)
-    # unclear how to capture an op, not a tensor.
-    if not control_inputs:
-      return super(FuncGraph, self).control_dependencies(control_inputs)
-    return super(FuncGraph, self).control_dependencies(
-        [c for c in control_inputs
-         if getattr(c, "graph", None) is self])
-
   def create_op(
       self,
       op_type,
@@ -503,6 +494,9 @@ class _EagerDefinedFunction(object):
 
     Returns:
       The outputs of the function call.
+
+    Raises:
+      ValueError: if the number of arguments is incorrect.
     """
 
     executing_eagerly = ctx.executing_eagerly()
@@ -536,6 +530,10 @@ class _EagerDefinedFunction(object):
       # TODO(akshayka): Either remove this if the FunctionLibraryRuntime
       # creates `PartitionedCallOp` kernels by default, or remove the previous
       # branch if a TPU kernel is registered for `PartitionedCall`.
+      if len(args) != len(self.signature.input_arg):
+        raise ValueError(
+            "Arguments and signature arguments do not match: %s %s " %
+            (len(args), len(list(self.signature.input_arg))))
       outputs = functional_ops.partitioned_call(
           args=args,
           f=self,
@@ -756,7 +754,6 @@ class Function(object):
         BACKWARD_FUNCTION_ATTRIBUTE_NAME:
             self._backward_graph_function._inference_function.name})  # pylint: disable=protected-access
     forward_function_attr.update(self._attrs)
-
     self._forward_function = _EagerDefinedFunction(
         forward_function_name, self._func_graph, self._func_graph.inputs,
         self._func_graph.outputs + backwards_graph_captures,
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index f779c3d273..5bc217d355 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -1333,6 +1333,9 @@ class ControlFlowState(object):
     """
     if util.IsLoopSwitch(op):
       return None
+    if op.graph._building_function:  # pylint: disable=protected-access
+      # The optimization here is tricky to apply to functions
+      return array_ops.zeros_like(op.outputs[index])
     dead_branch = util.IsSwitch(op)
     forward_ctxt = _GetWhileContext(op)
     grad_state = self._map.get(forward_ctxt)
-- 
GitLab


From bd99ed794264668ce77ed7527bc41df7aba3927b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 15:17:46 -0700
Subject: [PATCH 1164/1357] Fix bug in Grappler constant folding: The logic
 detecting full reductions was flawed. Added better test coverage.

Also added a extra test for a related symbolic shape inference operation that I first suspected to be broken.

PiperOrigin-RevId: 215812753
---
 .../grappler/costs/graph_properties_test.cc   |   6 +
 .../grappler/optimizers/constant_folding.cc   |  47 ++++---
 .../optimizers/constant_folding_test.cc       | 130 ++++++++++++------
 3 files changed, 118 insertions(+), 65 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index 362092a6cf..db10f586bc 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -1340,6 +1340,8 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   Output zero = ops::Const(s.WithOpName("zero"), 0.0f, {});
   Output g = ops::Shape(s.WithOpName("g"), c);
   Output h = ops::Fill(s.WithOpName("h"), g, zero);
+  Output zero_idx = ops::Const(s.WithOpName("zero_idx"), {0}, {1});
+  Output j = ops::Sum(s.WithOpName("j"), a, zero_idx);
 
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -1382,6 +1384,10 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   ASSERT_EQ(2, shape_f.dim_size());
   EXPECT_EQ(shape_h.dim(0).size(), shape_c.dim(0).size());
   EXPECT_EQ(shape_h.dim(1).size(), shape_c.dim(1).size());
+
+  const auto shape_j = properties.GetOutputProperties("j").at(0).shape();
+  ASSERT_EQ(1, shape_j.dim_size());
+  EXPECT_EQ(shape_j.dim(0).size(), shape_a.dim(1).size());
 }
 
 TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) {
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index ca5d3a6dfd..3d0d95bba7 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -616,28 +616,37 @@ Status ConstantFolding::MaterializeReductionIndices(
     // We can't do anything if we don't know the rank of the input.
     return Status::OK();
   }
-  const int rank = input_prop.shape().dim_size();
-  if (rank == 0) {
+  const int input_rank = input_prop.shape().dim_size();
+  if (input_rank < 1) {
     // Unexpected graph, don't try to change it.
     return Status::OK();
   }
+  const OpInfo::TensorProperties& reduction_indices_prop = input_props[1];
+  DataType dtype = reduction_indices_prop.dtype();
+  if (dtype != DT_INT32 && dtype != DT_INT64) {
+    return Status::OK();
+  }
+  PartialTensorShape reduction_indices_shape(reduction_indices_prop.shape());
+  const int num_reduction_indices = reduction_indices_shape.num_elements();
+
   const std::vector<OpInfo::TensorProperties>& output_props =
       properties.GetOutputProperties(node->name());
   if (output_props.size() != 1) {
     return Status::OK();
   }
-  const bool keep_dims =
-      node->attr().count("keep_dims") && node->attr().at("keep_dims").b();
   const OpInfo::TensorProperties& output_prop = output_props[0];
-  PartialTensorShape output_shape(output_prop.shape());
-  if (output_shape.num_elements() != 1) {
-    bool full_reduction = false;
+  const int output_rank =
+      output_prop.shape().unknown_rank() ? -1 : output_prop.shape().dim_size();
+
+  bool full_reduction = output_rank == 0 || num_reduction_indices == input_rank;
+  if (!full_reduction) {
+    // A full reduction will generate a tensor of one of the shapes
+    // [], [1], [1, 1], [1, 1, ...]. Even if we do not know the number of
+    // elements in the output of the reduction, we may deduce it from reshape
+    // nodes following it.
     for (const NodeDef* fanout : node_map_->GetOutputs(node->name())) {
-      if (!IsReshape(*fanout) && !keep_dims) {
-        // Depending on how it's setup, a full reduction will generate a tensor
-        // of shape [], [1], [1, 1], [1, 1, ...]. If keep_dims isn't true, we
-        // rely on the existence of a reshape node following the reduction to
-        // ensure that the fanout is fed a scalar of the right shape.
+      full_reduction = false;
+      if (!IsReshape(*fanout)) {
         return Status::OK();
       }
       const std::vector<OpInfo::TensorProperties>& reshape_props =
@@ -658,20 +667,15 @@ Status ConstantFolding::MaterializeReductionIndices(
     }
   }
 
-  const OpInfo::TensorProperties& reduction_prop = input_props[1];
-  DataType dtype = reduction_prop.dtype();
-  if (dtype != DT_INT32 && dtype != DT_INT64) {
-    return Status::OK();
-  }
-  // We know it's a full reduction. We can generate the set of indices to
-  // reduce.
+  // We know it's a full reduction. We can generate the full set of indices to
+  // reduce as a constant node.
   string const_name = OptimizedNodeName(*node, "-reduction_indices");
   if (node_map_->GetNode(const_name)) {
     return Status::OK();
   }
   NodeDef* reduction_indices = graph_->add_node();
-  Tensor value(dtype, TensorShape({rank}));
-  for (int i = 0; i < rank; ++i) {
+  Tensor value(dtype, TensorShape({input_rank}));
+  for (int i = 0; i < input_rank; ++i) {
     if (dtype == DT_INT32) {
       value.vec<int32>()(i) = i;
     } else {
@@ -680,6 +684,7 @@ Status ConstantFolding::MaterializeReductionIndices(
   }
   TF_RETURN_IF_ERROR(
       CreateNodeDef(const_name, TensorValue(&value), reduction_indices));
+
   reduction_indices->set_device(node->device());
   string ctrl_dep =
       AddControlDependency(node->input(1), graph_, node_map_.get());
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index b09360a2c2..fab01edfed 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -2591,58 +2591,100 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs_InfiniteLoop) {
 }
 
 TEST_F(ConstantFoldingTest, MaterializeReductionIndices) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output input =
-      ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
-                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
-  Output indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
-  Output sum = ops::Sum(s.WithOpName("sum"), input, indices);
-  Output size = ops::Const(s.WithOpName("size"), 1, {1});
-  Output reshape = ops::Reshape(s.WithOpName("reshape"), sum, size);
+  for (bool use_reshape : {true, false}) {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output input =
+        ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
+                         ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+    // If use_reshape is false, we need to now the number of indices to apply
+    // the rewrite.
+    Output indices = ops::Placeholder(
+        s.WithOpName("indices"), DT_INT32,
+        ops::Placeholder::Shape(PartialTensorShape({use_reshape ? -1 : 2})));
+    Output sum = ops::Sum(s.WithOpName("sum"), input, indices);
+    if (use_reshape) {
+      Output size = ops::Const(s.WithOpName("size"), 1, {1});
+      Output reshape = ops::Reshape(s.WithOpName("reshape"), sum, size);
+    }
 
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  item.fetch.push_back("reshape");
+    GrapplerItem item;
+    TF_CHECK_OK(s.ToGraphDef(&item.graph));
+    item.fetch.push_back(use_reshape ? "reshape" : "sum");
 
-  auto input_t = GenerateRandomTensor<DT_FLOAT>(TensorShape({3, 4}));
-  Tensor indices_t(DT_INT32, TensorShape({2}));
-  indices_t.flat<int>()(0) = 0;
-  indices_t.flat<int>()(1) = 1;
-  auto tensors_expected = EvaluateNodes(
-      item.graph, item.fetch, {{"input", input_t}, {"indices", indices_t}});
-  EXPECT_EQ(1, tensors_expected.size());
+    auto input_t = GenerateRandomTensor<DT_FLOAT>(TensorShape({3, 4}));
+    Tensor indices_t(DT_INT32, TensorShape({2}));
+    indices_t.flat<int>()(0) = 0;
+    indices_t.flat<int>()(1) = 1;
+    auto tensors_expected = EvaluateNodes(
+        item.graph, item.fetch, {{"input", input_t}, {"indices", indices_t}});
+    EXPECT_EQ(1, tensors_expected.size());
 
-  ConstantFolding optimizer(nullptr /* cpu_device */);
-  GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
+    // Use aggressive mode to force the shape inference to propagate placeholder
+    // shapes.
+    ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                              nullptr /* cpu_device */);
+    GraphDef output;
+    Status status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
 
-  // Run a second time to make sure the optimization is idempotent.
-  item.graph.Swap(&output);
-  status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
+    // Run a second time to make sure the optimization is idempotent.
+    item.graph.Swap(&output);
+    status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
 
-  int found = 0;
-  for (const auto& node : output.node()) {
-    if (node.name() == "ConstantFolding/sum-reduction_indices") {
-      ++found;
-      EXPECT_EQ("Const", node.op());
-      EXPECT_EQ("^indices", node.input(0));
-      EXPECT_EQ(2, TensorShape(node.attr().at("value").tensor().tensor_shape())
-                       .num_elements());
-    } else if (node.name() == "sum") {
-      ++found;
-      EXPECT_EQ("ConstantFolding/sum-reduction_indices", node.input(1));
-    } else if (node.name() == "indices") {
-      ++found;
+    int found = 0;
+    for (const auto& node : output.node()) {
+      if (node.name() == "ConstantFolding/sum-reduction_indices") {
+        ++found;
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^indices", node.input(0));
+        EXPECT_EQ(2,
+                  TensorShape(node.attr().at("value").tensor().tensor_shape())
+                      .num_elements());
+      } else if (node.name() == "sum") {
+        ++found;
+        EXPECT_EQ("ConstantFolding/sum-reduction_indices", node.input(1));
+      } else if (node.name() == "indices") {
+        ++found;
+      }
     }
+    EXPECT_EQ(3, found);
+
+    auto tensors = EvaluateNodes(output, item.fetch,
+                                 {{"input", input_t}, {"indices", indices_t}});
+    EXPECT_EQ(1, tensors.size());
+    test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-5);
   }
-  EXPECT_EQ(3, found);
+}
 
-  auto tensors = EvaluateNodes(output, item.fetch,
-                               {{"input", input_t}, {"indices", indices_t}});
-  EXPECT_EQ(1, tensors.size());
-  test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-5);
+TEST_F(ConstantFoldingTest, MaterializeReductionIndices_NotFullReduction) {
+  for (bool input_rank_known : {true, false}) {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output input =
+        (input_rank_known ? ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
+                                             ops::Placeholder::Shape(
+                                                 PartialTensorShape({-1, -1})))
+                          : ops::Placeholder(s.WithOpName("input"), DT_FLOAT));
+    Output indices =
+        ops::Placeholder(s.WithOpName("indices"), DT_INT32,
+                         ops::Placeholder::Shape(
+                             PartialTensorShape({input_rank_known ? 1 : 2})));
+    Output sum = ops::Sum(s.WithOpName("sum"), input, indices);
+
+    GrapplerItem item;
+    TF_CHECK_OK(s.ToGraphDef(&item.graph));
+    item.fetch.push_back("sum");
+
+    // Use aggressive mode to force the shape inference to propagate placeholder
+    // shapes.
+    ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                              nullptr /* cpu_device */);
+    GraphDef output;
+    Status status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
+
+    CompareGraphs(item.graph, output);
+  }
 }
 
 TEST_F(ConstantFoldingTest, LargeConstant) {
-- 
GitLab


From feda8c786948b1c7cc6bd9fe447781ceaff6b3d3 Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Thu, 4 Oct 2018 15:20:56 -0700
Subject: [PATCH 1165/1357] Fix for memory issue in micro test code, spotted by
 asan checks

PiperOrigin-RevId: 215813259
---
 .../lite/experimental/micro/kernels/softmax_test.cc       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
index df7d87d623..694456d8ac 100644
--- a/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
@@ -160,7 +160,7 @@ void TestSoftmaxQuantized(std::initializer_list<int> input_dims_data,
 TF_LITE_MICRO_TESTS_BEGIN
 
 TF_LITE_MICRO_TEST(SimpleTest) {
-  const int output_dims_count = 6;
+  const int output_dims_count = 10;
   float output_data[output_dims_count];
   tflite::testing::TestSoftmaxFloat(  //
       {2, 2, 5},                      // Input shape.
@@ -181,7 +181,7 @@ TF_LITE_MICRO_TEST(SimpleTest) {
           0.031684921,
           0.011656231,
       },
-      {2, 2, 3},  // Output shape.
+      {2, 2, 5},  // Output shape.
       output_data);
 }
 
@@ -192,7 +192,7 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized) {
   const float input_max = 64.0f;
   const float output_min = 0.0f;
   const float output_max = (255.0f / 256.0f);
-  const int output_dims_count = 6;
+  const int output_dims_count = 5;
   uint8_t output_data[output_dims_count];
   tflite::testing::TestSoftmaxQuantized(  //
       {2, 1, 5},                          // Input shape.
@@ -212,7 +212,7 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized) {
           F2Q(0.234121657, output_min, output_max),
           F2Q(0.636408647, output_min, output_max),
       },
-      {2, 1, 3},               // Output shape.
+      {2, 1, 5},               // Output shape.
       output_min, output_max,  // Output quantized range.
       output_data);
 }
-- 
GitLab


From 3a457c7252f09afd03483092ce9dcc7aa292b8c6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 15:27:44 -0700
Subject: [PATCH 1166/1357] This CL fixes a bug in the eager benchmarks test
 that caused the defun tests to execute a different-sized matrix multiply than
 the eager tests.

PiperOrigin-RevId: 215814346
---
 tensorflow/python/eager/benchmarks_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py
index 3fe79ef244..2b0118c07f 100644
--- a/tensorflow/python/eager/benchmarks_test.py
+++ b/tensorflow/python/eager/benchmarks_test.py
@@ -353,7 +353,7 @@ class MicroBenchmarks(test.Benchmark):
                               num_iters,
                               execution_mode=None):
     f = function.defun(math_ops.matmul)
-    func = lambda: f(m, m, transpose_b)
+    func = lambda: f(m, m, transpose_b=transpose_b)
     self._run(func, num_iters, execution_mode=execution_mode)
 
   def _benchmark_defun_matmul_forward_backward(self,
@@ -366,7 +366,7 @@ class MicroBenchmarks(test.Benchmark):
     def func():
       with backprop.GradientTape() as gt:
         gt.watch(m)
-        y = f(m, m, transpose_b)
+        y = f(m, m, transpose_b=transpose_b)
       _ = gt.gradient(y, m)
 
     self._run(func, num_iters, execution_mode=execution_mode)
-- 
GitLab


From a08ca5bb74fcd828c19060216923ad0f378bb518 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Thu, 4 Oct 2018 15:29:58 -0700
Subject: [PATCH 1167/1357] Disable tensorrt:unary_test in OSS since it crashes
 with SEGV.

PiperOrigin-RevId: 215814732
---
 tensorflow/contrib/tensorrt/BUILD | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 9e8979bce4..5c16fcb760 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -455,7 +455,6 @@ cuda_py_tests(
         "test/multi_connection_neighbor_engine_test.py",
         "test/neighboring_engine_test.py",
         "test/rank_two_test.py",
-        "test/unary_test.py",
         "test/vgg_block_nchw_test.py",
         "test/vgg_block_test.py",
     ],
@@ -471,6 +470,25 @@ cuda_py_tests(
     ],
 )
 
+cuda_py_tests(
+    name = "tf_trt_integration_test_no_oss",
+    srcs = [
+        "test/unary_test.py",
+    ],
+    additional_deps = [
+        ":tf_trt_integration_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    tags = [
+        "no_cuda_on_cpu_tap",
+        "no_oss",  # TODO(b/117274186): re-enable in OSS after crash fixed
+        "no_pip",  # TODO(b/117274186): re-enable in OSS after crash fixed
+        "no_windows",
+        "nomac",
+    ],
+)
+
 cc_library(
     name = "utils",
     srcs = ["convert/utils.cc"],
-- 
GitLab


From d6a2e7bcca5683c377b592f177bcac9aeb1c550f Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Thu, 4 Oct 2018 15:54:20 -0700
Subject: [PATCH 1168/1357] Fix unused imports.

PiperOrigin-RevId: 215819072
---
 tensorflow/compiler/tests/tensor_list_ops_test.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py
index b556723eec..5c079d595c 100644
--- a/tensorflow/compiler/tests/tensor_list_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_list_ops_test.py
@@ -20,22 +20,13 @@ from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.compiler.tests import xla_test
-from tensorflow.python.client import session
-from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import list_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.platform import test
-from tensorflow.python.training import server_lib
 
 
 def scalar_shape():
-- 
GitLab


From cf8e7cf89abb4a7783b9a99f17574ea128fa767a Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 4 Oct 2018 16:10:21 -0700
Subject: [PATCH 1169/1357] Pin ops with small integer inputs (already on the
 cpu) to the cpu in eager.

An environment variable (TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING) is provided to turn this off if necessary (its on by default).

PiperOrigin-RevId: 215821915
---
 .../core/common_runtime/eager/context.cc      |  4 +-
 .../core/common_runtime/eager/context.h       |  2 +
 .../core/common_runtime/eager/execute.cc      | 67 ++++++++++++++++---
 tensorflow/python/eager/core_test.py          | 28 ++++++++
 4 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 18420b60fd..f23cefb33d 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -70,7 +70,9 @@ EagerContext::EagerContext(const SessionOptions& opts,
       async_default_(async),
       log_memory_(LogMemory::IsEnabled()),
       env_(opts.env),
-      use_send_tensor_rpc_(false) {
+      use_send_tensor_rpc_(false),
+      pin_small_ops_to_cpu_(ReadBoolFromEnvVar(
+          "TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING", true)) {
   if (device_mgr_owned) {
     local_device_manager_.reset(device_mgr);
     local_unowned_device_manager_ = nullptr;
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 5ed6057ec6..15eeaa8066 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -202,6 +202,7 @@ class EagerContext {
   // EagerService.SendTensor RPC. If false, _Send/_Recv ops should be used
   // instead (which in-turn use WorkerService.RecvTensor RPCs).
   bool UseSendTensorRPC() { return use_send_tensor_rpc_; }
+  bool PinSmallOpsToCPU() { return pin_small_ops_to_cpu_; }
 
  private:
   void InitDeviceMapAndAsync();
@@ -293,6 +294,7 @@ class EagerContext {
 #endif
 
   bool use_send_tensor_rpc_;
+  const bool pin_small_ops_to_cpu_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 1bc63616d0..a52f933d75 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -579,19 +579,23 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
   return Status::OK();
 #endif
 }
-}  // namespace
 
-Status EagerExecute(EagerOperation* op,
-                    gtl::InlinedVector<TensorHandle*, 2>* retvals,
-                    int* num_retvals) {
-  // Ensure all resource-touching ops run in the device the resource is,
-  // regardless of anything else that has been specified. This is identical to
-  // the graph mode behavior.
+// The Op device may be updated if:
+// - A resource touching input is specified: all resource-touching ops run in
+// the device the resource is, regardless of anything else that has been
+// specified. This is identical to the graph mode behavior.
+//
+// - All op inputs are on the CPU, small (<64 elements) and integers
+// (int32/int64). This can be disabled by setting the environment variable
+// "TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING" to "0" or "false".
+Status MaybeUpdateOpDevice(EagerOperation* op) {
   EagerContext* ctx = op->EagerContext();
+  bool device_set_for_resource_variable = false;
+  bool all_inputs_eligible_for_cpu_pinning = ctx->PinSmallOpsToCPU();
+
   for (int i = 0; i < op->Inputs().size(); ++i) {
     Device* input_op_device = nullptr;
-    auto status = op->Inputs()[i]->OpDevice(&input_op_device);
-    if (!status.ok()) return status;
+    TF_RETURN_IF_ERROR(op->Inputs()[i]->OpDevice(&input_op_device));
     VLOG(2) << "for op " << op->Name() << " input " << i << " "
             << DataTypeString(op->Inputs()[i]->dtype) << " "
             << (input_op_device == nullptr ? "cpu" : input_op_device->name())
@@ -603,8 +607,53 @@ Status EagerExecute(EagerOperation* op,
               << d->name() << " because input #" << i
               << " is a resource in this device.";
       op->SetDevice(d);
+
+      device_set_for_resource_variable = true;
+      all_inputs_eligible_for_cpu_pinning = false;
+    } else if (all_inputs_eligible_for_cpu_pinning) {
+      TensorHandle* handle = op->Inputs()[i];
+
+      // Input is on CPU.
+      if (input_op_device != nullptr && input_op_device != ctx->HostCPU()) {
+        all_inputs_eligible_for_cpu_pinning = false;
+        continue;
+      }
+
+      if (handle->dtype != DataType::DT_INT32 &&
+          handle->dtype != DataType::DT_INT64) {
+        all_inputs_eligible_for_cpu_pinning = false;
+        continue;
+      }
+
+      int64 num_elements;
+      TF_RETURN_IF_ERROR(handle->NumElements(&num_elements));
+      if (num_elements > 64) {
+        all_inputs_eligible_for_cpu_pinning = false;
+      }
     }
   }
+
+  // Ops without inputs are usually ops that generate a tensor in some way and
+  // usually require being present on whatever device they are scheduled on
+  // - for e.g. VarHandleOp or _Recv).
+  // TODO(nareshmodi): Is it possible there is no int32/int64 CPU kernel for
+  // an op, but there is a GPU kernel?
+  if (!op->Inputs().empty() && all_inputs_eligible_for_cpu_pinning) {
+    VLOG(1) << "Forcing op " << op->Name()
+            << " to be on the CPU since all input tensors have an "
+               "int32/int64 dtype, and are small (less than 64 elements).";
+    op->SetDevice(ctx->HostCPU());
+  }
+
+  return Status::OK();
+}
+}  // namespace
+
+Status EagerExecute(EagerOperation* op,
+                    gtl::InlinedVector<TensorHandle*, 2>* retvals,
+                    int* num_retvals) {
+  TF_RETURN_IF_ERROR(MaybeUpdateOpDevice(op));
+
   bool op_is_local = IsLocal(op->EagerContext(), op->Device());
 
   if (op_is_local) {
diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py
index fb5442b646..e601aa376f 100644
--- a/tensorflow/python/eager/core_test.py
+++ b/tensorflow/python/eager/core_test.py
@@ -631,6 +631,34 @@ class TFETest(test_util.TensorFlowTestCase):
     for t in tensors:
       self.assertIsInstance(t, ops.EagerTensor)
 
+  def testSmallIntegerOpsForcedToCPU(self):
+    if not context.context().num_gpus():
+      self.skipTest('No GPUs found')
+
+    a = constant_op.constant((1, 2, 3, 4, 5), dtype=dtypes.int64)
+    b = constant_op.constant((2, 3, 4, 5, 6), dtype=dtypes.int64)
+    with context.device('gpu:0'):
+      c = a + b
+
+    # Op forced to CPU since all constants are integers and small.
+    self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:CPU:0')
+
+    a = array_ops.zeros((8, 10), dtype=dtypes.int64)
+    b = array_ops.ones((8, 10), dtype=dtypes.int64)
+
+    with context.device('gpu:0'):
+      c = a + b
+
+    # Op not forced to CPU since the tensors are larger than 64 elements.
+    self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:GPU:0')
+
+    a = constant_op.constant((1, 2, 3, 4, 5), dtype=dtypes.float32)
+    b = constant_op.constant((2, 3, 4, 5, 6), dtype=dtypes.float32)
+    with context.device('gpu:0'):
+      c = a + b
+
+    # Op not forced to CPU since the constants are not integers.
+    self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:GPU:0')
 
 class SendRecvTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 4a00f2fc6514ad5ee60ab0a9645863fdf263499f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 16:29:47 -0700
Subject: [PATCH 1170/1357] Add Chaos Free Network (CFN) cell.

The implementation is based on: https://openreview.net/pdf?id=S1dIzvclg.

PiperOrigin-RevId: 215824867
---
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  65 +++++++++
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 129 ++++++++++++++++++
 2 files changed, 194 insertions(+)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index 0a27200015..aa1d7d2b01 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -1120,6 +1120,71 @@ class RNNCellTest(test.TestCase):
             r"input size \(3\) must be divisible by number_of_groups \(2\)"):
           gcell(glstm_input, gcell_zero_state)
 
+  def testCFNCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope("root"):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.CFNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.17188203, 0.17188203]])
+      with variable_scope.variable_scope("other"):
+        # Test CFN with input_size != num_units.
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.CFNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.15535763, 0.15535763]])
+
+  def testCFNCellEndToEnd(self):
+    with self.cached_session() as sess:
+      input_shape = 10
+      output_shape = 5
+      timestep = 4
+      batch = 100
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=output_shape)
+      y_train = utils.to_categorical(y_train)
+      cell = contrib_rnn_cell.CFNCell(output_shape)
+
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      predict = array_ops.placeholder(
+          dtypes.float32, shape=(None, output_shape))
+
+      outputs, state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape])
+      self.assertEqual(state.shape.as_list(), [None, output_shape])
+      loss = losses.softmax_cross_entropy(predict, state)
+      train_op = training.GradientDescentOptimizer(0.001).minimize(loss)
+
+      sess.run([variables.global_variables_initializer()])
+      _, outputs, state = sess.run(
+          [train_op, outputs, state], {inputs: x_train, predict: y_train})
+
+      self.assertEqual(len(outputs), batch)
+      self.assertEqual(len(state), batch)
+
   def testMinimalRNNCell(self):
     with self.cached_session() as sess:
       with variable_scope.variable_scope(
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 59a61af7b3..78cea8feb4 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -3510,3 +3510,132 @@ class MinimalRNNCell(rnn_cell_impl.LayerRNNCell):
 
     new_h = u * state + (1 - u) * feedforward
     return new_h, new_h
+
+
+class CFNCell(rnn_cell_impl.LayerRNNCell):
+  """Chaos Free Network cell.
+
+  The implementation is based on:
+
+    https://openreview.net/pdf?id=S1dIzvclg
+
+  Thomas Laurent, James von Brecht.
+  "A recurrent neural network without chaos." ICLR, 2017.
+
+  A CFN cell first projects the input to the hidden space. The hidden state
+  goes through a contractive mapping. The new hidden state is then calcuated
+  as a linear combination of the projected input and the contracted previous
+  hidden state, using decoupled input and forget gates.
+  """
+
+  def __init__(self,
+               units,
+               activation="tanh",
+               kernel_initializer="glorot_uniform",
+               bias_initializer="ones",
+               name=None,
+               dtype=None,
+               **kwargs):
+    """Initialize the parameters for a CFN cell.
+
+    Args:
+      units: int, The number of units in the CFN cell.
+      activation: Nonlinearity to use. Default: `tanh`.
+      kernel_initializer: Initializer for the `kernel` weights
+        matrix. Default: `glorot_uniform`.
+      bias_initializer: The initializer to use for the bias in the
+        gates. Default: `ones`.
+      name: String, the name of the cell.
+      dtype: Default dtype of the cell.
+      **kwargs: Dict, keyword named properties for common cell attributes.
+    """
+    super(CFNCell, self).__init__(name=name, dtype=dtype, **kwargs)
+
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self.units = units
+    self.activation = activations.get(activation)
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.bias_initializer = initializers.get(bias_initializer)
+
+  @property
+  def state_size(self):
+    return self.units
+
+  @property
+  def output_size(self):
+    return self.units
+
+  def build(self, inputs_shape):
+    if inputs_shape[-1] is None:
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
+                       % str(inputs_shape))
+
+    input_size = inputs_shape[-1]
+    # pylint: disable=protected-access
+    # `self.kernel` contains V_{\theta}, V_{\eta}, W.
+    # `self.recurrent_kernel` contains U_{\theta}, U_{\eta}.
+    # `self.bias` contains b_{\theta}, b_{\eta}.
+    self.kernel = self.add_weight(
+        shape=[input_size, 3 * self.units],
+        name=rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+        initializer=self.kernel_initializer)
+    self.recurrent_kernel = self.add_weight(
+        shape=[self.units, 2 * self.units],
+        name="recurrent_%s" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+        initializer=self.kernel_initializer)
+    self.bias = self.add_weight(
+        shape=[2 * self.units],
+        name=rnn_cell_impl._BIAS_VARIABLE_NAME,
+        initializer=self.bias_initializer)
+    # pylint: enable=protected-access
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Run one step of CFN.
+
+    Args:
+      inputs: input Tensor, must be 2-D, `[batch, input_size]`.
+      state: state Tensor, must be 2-D, `[batch, state_size]`.
+
+    Returns:
+      A tuple containing:
+
+      - Output: A `2-D` tensor with shape `[batch_size, state_size]`.
+      - New state: A `2-D` tensor with shape `[batch_size, state_size]`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    input_size = inputs.get_shape()[-1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+
+    # The variable names u, v, w, b are consistent with the notations in the
+    # original paper.
+    v, w = array_ops.split(
+        value=self.kernel,
+        num_or_size_splits=[2 * self.units, self.units],
+        axis=1)
+    u = self.recurrent_kernel
+    b = self.bias
+
+    gates = math_ops.matmul(state, u) + math_ops.matmul(inputs, v)
+    gates = nn_ops.bias_add(gates, b)
+    gates = math_ops.sigmoid(gates)
+    theta, eta = array_ops.split(value=gates,
+                                 num_or_size_splits=2,
+                                 axis=1)
+
+    proj_input = math_ops.matmul(inputs, w)
+
+    # The input gate is (1 - eta), which is different from the original paper.
+    # This is for the propose of initialization. With the default
+    # bias_initializer `ones`, the input gate is initialized to a small number.
+    new_h = theta * self.activation(state) + (1 - eta) * self.activation(
+        proj_input)
+
+    return new_h, new_h
-- 
GitLab


From 83ff640fa5026b8bd3cb9c2ceff9e99e8e03823a Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Thu, 4 Oct 2018 18:46:53 -0700
Subject: [PATCH 1171/1357] [XLA:GPU] Fix old-ptxas-version detection logic.

This was completely broken for CUDA versions > 9 and resulted in spurious warnings.

Reported in #22706#issuecomment-426861394 -- thank you!

PiperOrigin-RevId: 215841354
---
 tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index b4ae2e42c7..50e47542c4 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -401,7 +401,7 @@ void WarnIfBadPtxasVersion(const string& ptxas_path) {
            "prefers >= 9.2.88).  Compilation of XLA kernels below will likely "
            "fail.\n\nYou do not need to update CUDA; cherry-picking the ptxas "
            "binary is sufficient.";
-  } else if ((vmaj < 9 || vmin < 2 || vdot < 88)) {
+  } else if (std::make_tuple(vmaj, vmin, vdot) < std::make_tuple(9, 2, 88)) {
     LOG(WARNING)
         << "*** WARNING *** You are using ptxas " << vmaj << "." << vmin << "."
         << vdot
-- 
GitLab


From 5608454c31bb298096bb6aa463b33baa2fa68f08 Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Thu, 4 Oct 2018 19:07:44 -0700
Subject: [PATCH 1172/1357] Add 'device' property to TPUMirroredVariable, so
 tf.train.init_from_checkpoint can be supported.

PiperOrigin-RevId: 215843249
---
 tensorflow/contrib/distribute/python/values.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 18ceba42c2..0dd78ba185 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -571,6 +571,10 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
           ValueError("Device %s not found in %s (current device %s)" %
                      (device, self._index.keys(), device_util.current())), e)
 
+  @property
+  def device(self):
+    return self._get().device
+
   # The arguments to update() are automatically unwrapped so the update()
   # function would normally see regular variables, not MirroredVariables.
   # However, the update function can still operate on wrapped MirroredVariables
-- 
GitLab


From f4cef34fad7b00a3b1f288ff5c95001c5b83c1f8 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 4 Oct 2018 19:26:26 -0700
Subject: [PATCH 1173/1357] Fix regression that caused xrange to be ignored.

PiperOrigin-RevId: 215844450
---
 tensorflow/python/autograph/operators/py_builtins.py          | 1 +
 tensorflow/python/autograph/pyct/inspect_utils.py             | 4 ++++
 .../python/autograph/pyct/static_analysis/live_values.py      | 4 ++++
 3 files changed, 9 insertions(+)

diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py
index 91a2a22cc2..70e59272a9 100644
--- a/tensorflow/python/autograph/operators/py_builtins.py
+++ b/tensorflow/python/autograph/operators/py_builtins.py
@@ -228,5 +228,6 @@ BUILTIN_FUINCTIONS_MAP = {
     'len': len_,
     'print': print_,
     'range': range_,
+    # TODO(mdan): This might make more sense as tf.data.range.
     'xrange': range_,
 }
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index eef74599a7..1416988ea3 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -30,10 +30,14 @@ from tensorflow.python.util import tf_inspect
 
 
 def isbuiltin(f):
+  """Returns True if the argument is a built-in function."""
   # Note these return false for isinstance(f, types.BuiltinFunctionType) so we
   # need to specifically check for them.
   if f in (range, int, float):
     return True
+  if six.PY2:
+    if f in (xrange,):
+      return True
   if isinstance(f, types.BuiltinFunctionType):
     return True
   if tf_inspect.isbuiltin(f):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index 36b9e7074d..4ceddce53b 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -24,6 +24,7 @@ from __future__ import division
 from __future__ import print_function
 
 import gast
+import six
 
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import transformer
@@ -35,6 +36,9 @@ from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 # These symbols are legal in Python, but don't appear in the namespace.
 _SPECIAL_SYMBOLS = {'range': range, 'print': print}
 
+if six.PY2:
+  _SPECIAL_SYMBOLS['xrange'] = xrange
+
 
 class LiveValueResolver(transformer.Base):
   """Annotates nodes with live values."""
-- 
GitLab


From 176e6993c5e11631389e05f82b3d71a3a367e392 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Thu, 4 Oct 2018 21:25:33 -0700
Subject: [PATCH 1174/1357] Fix link in eager notebook stub.

PiperOrigin-RevId: 215853105
---
 .../notebooks/automatic_differentiation.ipynb |    2 +-
 .../performance/xla/operation_semantics.md    | 2426 -----------------
 2 files changed, 1 insertion(+), 2427 deletions(-)
 delete mode 100644 tensorflow/docs_src/performance/xla/operation_semantics.md

diff --git a/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb
index 8fae622e12..446e340118 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb
@@ -65,7 +65,7 @@
         "\u003ca target=\"_blank\"  href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\n",
         "    \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
         "\u003c/td\u003e\u003ctd\u003e\n",
-        "\u003ca target=\"_blank\"  href=\"https://github.com/tensorflow/tensorflow/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e"
+        "\u003ca target=\"_blank\"  href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e"
       ]
     }
   ],
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
deleted file mode 100644
index 96d269bec4..0000000000
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ /dev/null
@@ -1,2426 +0,0 @@
-# Operation Semantics
-
-The following describes the semantics of operations defined in the
-[`XlaBuilder`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-interface. Typically, these operations map one-to-one to operations defined in
-the RPC interface in
-[`xla_data.proto`](https://www.tensorflow.org/code/tensorflow/compiler/xla/xla_data.proto).
-
-A note on nomenclature: the generalized data type XLA deals with is an
-N-dimensional array holding elements of some uniform type (such as 32-bit
-float). Throughout the documentation, *array* is used to denote an
-arbitrary-dimensional array. For convenience, special cases have more specific
-and familiar names; for example a *vector* is a 1-dimensional array and a
-*matrix* is a 2-dimensional array.
-
-## AllToAll
-
-See also
-[`XlaBuilder::AllToAll`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Alltoall is a collective operation that sends data from all cores to all cores.
-It has two phases:
-
-1.  the scatter phase. On each core, the operand is split into `split_count`
-    number of blocks along the `split_dimensions`, and the blocks are scattered
-    to all cores, e.g., the ith block is send to the ith core.
-2.  the gather phase. Each core concatenates the received blocks along the
-    `concat_dimension`.
-
-The participating cores can be configured by:
-
--   `replica_groups`: each ReplicaGroup contains a list of replica id. If empty,
-    all replicas belong to one group in the order of 0 - (n-1). Alltoall will be
-    applied within subgroups in the specified order. For example, replica
-    groups = {{1,2,3},{4,5,0}} means, an Alltoall will be applied within replica
-    1, 2, 3, and in the gather phase, the received blocks will be concatenated
-    in the order of 1, 2, 3; another Alltoall will be applied within replica 4,
-    5, 0, and the concatenation order is 4, 5, 0.
-
-Prerequisites:
-
--   The dimension size of the operand on the split_dimension is divisible by
-    split_count.
--   The operand's shape is not tuple.
-
-<b> `AllToAll(operand, split_dimension, concat_dimension, split_count,
-replica_groups)` </b>
-
-
-| Arguments          | Type                  | Semantics                       |
-| ------------------ | --------------------- | ------------------------------- |
-| `operand`          | `XlaOp`               | n dimensional input array       |
-| `split_dimension`  | `int64`               | A value in the interval `[0,    |
-:                    :                       : n)` that names the dimension    :
-:                    :                       : along which the operand is      :
-:                    :                       : split                           :
-| `concat_dimension` | `int64`               | a value in the interval `[0,    |
-:                    :                       : n)` that names the dimension    :
-:                    :                       : along which the split blocks    :
-:                    :                       : are concatenated                :
-| `split_count`      | `int64`               | the number of cores that        |
-:                    :                       : participate this operation. If  :
-:                    :                       : `replica_groups` is empty, this :
-:                    :                       : should be the number of         :
-:                    :                       : replicas; otherwise, this       :
-:                    :                       : should be equal to the number   :
-:                    :                       : of replicas in each group.      :
-| `replica_groups`   | `ReplicaGroup` vector | each group contains a list of   |
-:                    :                       : replica id.                     :
-
-Below shows an example of Alltoall.
-
-```
-XlaBuilder b("alltoall");
-auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x");
-AllToAll(x, /*split_dimension=*/1, /*concat_dimension=*/0, /*split_count=*/4);
-```
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/xla/ops_alltoall.png">
-</div>
-
-In this example, there are 4 cores participating the Alltoall. On each core, the
-operand is split into 4 parts along dimension 0, so each part has shape
-f32[4,4]. The 4 parts are scattered to all cores. Then each core concatenates
-the received parts along dimension 1, in the order or core 0-4. So the output on
-each core has shape f32[16,4].
-
-## BatchNormGrad
-
-See also
-[`XlaBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
-
-Calculates gradients of batch norm.
-
-<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `XlaOp`                 | n dimensional array to be        |
-:                 :                         : normalized (x)                   :
-| `scale`         | `XlaOp`                 | 1 dimensional array              |
-:                 :                         : (\\(\gamma\\))                   :
-| `mean`          | `XlaOp`                 | 1 dimensional array (\\(\mu\\))  |
-| `variance`      | `XlaOp`                 | 1 dimensional array              |
-:                 :                         : (\\(\sigma^2\\))                 :
-| `grad_output`   | `XlaOp`                 | Gradients passed to              |
-:                 :                         : `BatchNormTraining`              :
-:                 :                         : (\\( \nabla y\\))                :
-| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
-| `feature_index` | `int64`                 | Index to feature dimension in    |
-:                 :                         : `operand`                        :
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the gradients with
-respect to `operand`, `offset` and `scale` across all the other dimensions. The
-`feature_index` must be a valid index for the feature dimension in `operand`.
-
-The three gradients are defined by the following formulas (assuming a
-4-dimensional tensor as `operand` and with feature dimension index \\(l\\),
-batch size `m` and spatial sizes `w` and `h`):
-
-\\[ \begin{split} c_l&=
-\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h
-\left( \nabla y_{ijkl} \frac{x_{ijkl} - \mu_l}{\sigma^2_l+\epsilon} \right)
-\\\\
-\nabla x_{ijkl} &= \frac{\gamma_{l}}{\sqrt{\sigma^2_{l}+\epsilon}}
-\left( \nabla y_{ijkl} - \mathrm{mean}(\nabla y) - c_l (x_{ijkl} - \mu_{l})
-\right)
-\\\\
-\nabla \gamma_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \left( \nabla y_{ijkl}
-\frac{x_{ijkl} - \mu_l}{\sqrt{\sigma^2_{l}+\epsilon}} \right)
-\\\\\
-\nabla \beta_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl}
-\end{split} \\]
-
-The inputs `mean` and `variance` represent moments value
-across batch and spatial dimensions.
-
-The output type is a tuple of three handles:
-
-| Outputs        | Type                    | Semantics                         |
-| -------------  | ----------------------- | --------------------------------- |
-| `grad_operand` | `XlaOp`                 | gradient with respect to input    |
-:                :                         : `operand` (\\( \nabla x\\))       :
-| `grad_scale`   | `XlaOp`                 | gradient with respect to input    |
-:                :                         : `scale` (\\( \nabla \gamma\\))    :
-| `grad_offset`  | `XlaOp`                 | gradient with respect to input    |
-:                :                         : `offset`(\\( \nabla \beta\\))     :
-
-## BatchNormInference
-
-See also
-[`XlaBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
-
-Normalizes an array across batch and spatial dimensions.
-
-<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
-
-Arguments       | Type    | Semantics
---------------- | ------- | ---------------------------------------
-`operand`       | `XlaOp` | n dimensional array to be normalized
-`scale`         | `XlaOp` | 1 dimensional array
-`offset`        | `XlaOp` | 1 dimensional array
-`mean`          | `XlaOp` | 1 dimensional array
-`variance`      | `XlaOp` | 1 dimensional array
-`epsilon`       | `float` | Epsilon value
-`feature_index` | `int64` | Index to feature dimension in `operand`
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and uses the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
-
-`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
-computing `mean` and `variance` for each batch. It uses the input `mean` and
-`variance` instead as estimated values. The purpose of this op is to reduce
-latency in inference, hence the name `BatchNormInference`.
-
-The output is an n-dimensional, normalized array with the same shape as input
-`operand`.
-
-## BatchNormTraining
-
-See also
-[`XlaBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and [`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
-
-Normalizes an array across batch and spatial dimensions.
-
-<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
-
-Arguments       | Type    | Semantics
---------------- | ------- | ----------------------------------------
-`operand`       | `XlaOp` | n dimensional array to be normalized (x)
-`scale`         | `XlaOp` | 1 dimensional array (\\(\gamma\\))
-`offset`        | `XlaOp` | 1 dimensional array (\\(\beta\\))
-`epsilon`       | `float` | Epsilon value (\\(\epsilon\\))
-`feature_index` | `int64` | Index to feature dimension in `operand`
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and uses the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
-
-The algorithm goes as follows for each batch in `operand` \\(x\\) that
-contains `m` elements with `w` and `h` as the size of spatial dimensions
-(assuming `operand` is an 4 dimensional array):
-
-- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
-\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
-
-- Calculates batch variance \\(\sigma^2_l\\):
-\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
-
-- Normalizes, scales and shifts:
-\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
-
-The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
-
-The output type is a tuple of three `XlaOp`s:
-
-| Outputs      | Type                    | Semantics                            |
-| ------------ | ----------------------- | -------------------------------------|
-| `output`     | `XlaOp`                 | n dimensional array with the same    |
-:              :                         : shape as input `operand` (y)         :
-| `batch_mean` | `XlaOp`                 | 1 dimensional array (\\(\mu\\))      |
-| `batch_var`  | `XlaOp`                 | 1 dimensional array (\\(\sigma^2\\)) |
-
-The `batch_mean` and `batch_var` are moments calculated across the batch and
-spatial dimensions using the formulas above.
-
-## BitcastConvertType
-
-See also
-[`XlaBuilder::BitcastConvertType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Similar to a `tf.bitcast` in TensorFlow, performs an element-wise bitcast
-operation from a data shape to a target shape. The dimensions must match, and
-the conversion is an element-wise one; e.g. `s32` elements become `f32` elements
-via bitcast routine. Bitcast is implemented as a low-level cast, so machines
-with different floating-point representations will give different results.
-
-<b> `BitcastConvertType(operand, new_element_type)` </b>
-
-Arguments          | Type            | Semantics
------------------- | --------------- | ---------------------------
-`operand`          | `XlaOp`         | array of type T with dims D
-`new_element_type` | `PrimitiveType` | type U
-
-The dimensions of the operand and the target shape must match. The bit-width of
-the source and destination element types must be equal. The source
-and destination element types must not be tuples.
-
-## Broadcast
-
-See also
-[`XlaBuilder::Broadcast`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Adds dimensions to an array by duplicating the data in the array.
-
-<b> `Broadcast(operand, broadcast_sizes)` </b>
-
-Arguments         | Type                | Semantics
------------------ | ------------------- | -------------------------------
-`operand`         | `XlaOp`             | The array to duplicate
-`broadcast_sizes` | `ArraySlice<int64>` | The sizes of the new dimensions
-
-The new dimensions are inserted on the left, i.e. if `broadcast_sizes` has
-values `{a0, ..., aN}` and the operand shape has dimensions `{b0, ..., bM}` then
-the shape of the output has dimensions `{a0, ..., aN, b0, ..., bM}`.
-
-The new dimensions index into copies of the operand, i.e.
-
-```
-output[i0, ..., iN, j0, ..., jM] = operand[j0, ..., jM]
-```
-
-For example, if `operand` is a scalar `f32` with value `2.0f`, and
-`broadcast_sizes` is `{2, 3}`, then the result will be an array with shape
-`f32[2, 3]` and all the values in the result will be `2.0f`.
-
-## Call
-
-See also
-[`XlaBuilder::Call`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Invokes a computation with the given arguments.
-
-<b> `Call(computation, args...)` </b>
-
-| Arguments     | Type                   | Semantics                           |
-| ------------- | ---------------------- | ----------------------------------- |
-| `computation` | `XlaComputation`       | computation of type `T_0, T_1, ..., |
-:               :                        : T_N -> S` with N parameters of      :
-:               :                        : arbitrary type                      :
-| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type       |
-
-The arity and types of the `args` must match the parameters of the
-`computation`. It is allowed to have no `args`.
-
-## Clamp
-
-See also
-[`XlaBuilder::Clamp`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Clamps an operand to within the range between a minimum and maximum value.
-
-<b> `Clamp(min, operand, max)` </b>
-
-Arguments | Type    | Semantics
---------- | ------- | ---------------
-`min`     | `XlaOp` | array of type T
-`operand` | `XlaOp` | array of type T
-`max`     | `XlaOp` | array of type T
-
-Given an operand and minimum and maximum values, returns the operand if it is in
-the range between the minimum and maximum, else returns the minimum value if the
-operand is below this range or the maximum value if the operand is above this
-range.  That is, `clamp(a, x, b) =  min(max(a, x), b)`.
-
-All three arrays must be the same shape. Alternatively, as a restricted form of
-[broadcasting](broadcasting.md), `min` and/or `max` can be a scalar of type `T`.
-
-Example with scalar `min` and `max`:
-
-```
-let operand: s32[3] = {-1, 5, 9};
-let min: s32 = 0;
-let max: s32 = 6;
-==>
-Clamp(min, operand, max) = s32[3]{0, 5, 6};
-```
-
-## Collapse
-
-See also
-[`XlaBuilder::Collapse`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and the `tf.reshape` operation.
-
-Collapses dimensions of an array into one dimension.
-
-<b> `Collapse(operand, dimensions)` </b>
-
-Arguments    | Type           | Semantics
------------- | -------------- | -----------------------------------------------
-`operand`    | `XlaOp`        | array of type T
-`dimensions` | `int64` vector | in-order, consecutive subset of T's dimensions.
-
-Collapse replaces the given subset of the operand's dimensions by a single
-dimension. The input arguments are an arbitrary array of type T and a
-compile-time-constant vector of dimension indices. The dimension indices must be
-an in-order (low to high dimension numbers), consecutive subset of T's
-dimensions. Thus, {0, 1, 2}, {0, 1}, or {1, 2} are all valid dimension sets, but
-{1, 0} or {0, 2} are not. They are replaced by a single new dimension, in the
-same position in the dimension sequence as those they replace, with the new
-dimension size equal to the product of original dimension sizes. The lowest
-dimension number in `dimensions` is the slowest varying dimension (most major)
-in the loop nest which collapses these dimension, and the highest dimension
-number is fastest varying (most minor). See the `tf.reshape` operator
-if more general collapse ordering is needed.
-
-For example, let v be an array of 24 elements:
-
-```
-let v = f32[4x2x3] {{{10, 11, 12},  {15, 16, 17}},
-                    {{20, 21, 22},  {25, 26, 27}},
-                    {{30, 31, 32},  {35, 36, 37}},
-                    {{40, 41, 42},  {45, 46, 47}}};
-
-// Collapse to a single dimension, leaving one dimension.
-let v012 = Collapse(v, {0,1,2});
-then v012 == f32[24] {10, 11, 12, 15, 16, 17,
-                      20, 21, 22, 25, 26, 27,
-                      30, 31, 32, 35, 36, 37,
-                      40, 41, 42, 45, 46, 47};
-
-// Collapse the two lower dimensions, leaving two dimensions.
-let v01 = Collapse(v, {0,1});
-then v01 == f32[4x6] {{10, 11, 12, 15, 16, 17},
-                      {20, 21, 22, 25, 26, 27},
-                      {30, 31, 32, 35, 36, 37},
-                      {40, 41, 42, 45, 46, 47}};
-
-// Collapse the two higher dimensions, leaving two dimensions.
-let v12 = Collapse(v, {1,2});
-then v12 == f32[8x3] {{10, 11, 12},
-                      {15, 16, 17},
-                      {20, 21, 22},
-                      {25, 26, 27},
-                      {30, 31, 32},
-                      {35, 36, 37},
-                      {40, 41, 42},
-                      {45, 46, 47}};
-
-```
-
-## Concatenate
-
-See also
-[`XlaBuilder::ConcatInDim`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Concatenate composes an array from multiple array operands. The array is of the
-same rank as each of the input array operands (which must be of the same rank as
-each other) and contains the arguments in the order that they were specified.
-
-<b> `Concatenate(operands..., dimension)` </b>
-
-| Arguments   | Type                  | Semantics                              |
-| ----------- | --------------------- | -------------------------------------- |
-| `operands`  | sequence of N `XlaOp` | N arrays of type T with dimensions     |
-:             :                       : [L0, L1, ...]. Requires N >= 1.        :
-| `dimension` | `int64`               | A value in the interval `[0, N)` that  |
-:             :                       : names the dimension to be concatenated :
-:             :                       : between the `operands`.                :
-
-With the exception of `dimension` all dimensions must be the same. This is
-because XLA does not support "ragged" arrays. Also note that rank-0 values
-cannot be concatenated (as it's impossible to name the dimension along which the
-concatenation occurs).
-
-1-dimensional example:
-
-```
-Concat({{2, 3}, {4, 5}, {6, 7}}, 0)
->>> {2, 3, 4, 5, 6, 7}
-```
-
-2-dimensional example:
-
-```
-let a = {
-  {1, 2},
-  {3, 4},
-  {5, 6},
-};
-let b = {
-  {7, 8},
-};
-Concat({a, b}, 0)
->>> {
-  {1, 2},
-  {3, 4},
-  {5, 6},
-  {7, 8},
-}
-```
-
-Diagram:
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="https://www.tensorflow.org/images/ops_concatenate.png">
-</div>
-
-## Conditional
-
-See also
-[`XlaBuilder::Conditional`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Conditional(pred, true_operand, true_computation, false_operand,
-false_computation)` </b>
-
-Arguments           | Type             | Semantics
-------------------- | ---------------- | ---------------------------------
-`pred`              | `XlaOp`          | Scalar of type `PRED`
-`true_operand`      | `XlaOp`          | Argument of type `T_0`
-`true_computation`  | `XlaComputation` | XlaComputation of type `T_0 -> S`
-`false_operand`     | `XlaOp`          | Argument of type `T_1`
-`false_computation` | `XlaComputation` | XlaComputation of type `T_1 -> S`
-
-Executes `true_computation` if `pred` is `true`, `false_computation` if `pred`
-is `false`, and returns the result.
-
-The `true_computation` must take in a single argument of type `T_0` and will be
-invoked with `true_operand` which must be of the same type. The
-`false_computation` must take in a single argument of type `T_1` and will be
-invoked with `false_operand` which must be of the same type. The type of the
-returned value of `true_computation` and `false_computation` must be the same.
-
-Note that only one of `true_computation` and `false_computation` will be
-executed depending on the value of `pred`.
-
-## Conv (convolution)
-
-See also
-[`XlaBuilder::Conv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-As ConvWithGeneralPadding, but the padding is specified in a short-hand way as
-either SAME or VALID. SAME padding pads the input (`lhs`) with zeroes so that
-the output has the same shape as the input when not taking striding into
-account. VALID padding simply means no padding.
-
-## ConvWithGeneralPadding (convolution)
-
-See also
-[`XlaBuilder::ConvWithGeneralPadding`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Computes a convolution of the kind used in neural networks. Here, a convolution
-can be thought of as a n-dimensional window moving across a n-dimensional base
-area and a computation is performed for each possible position of the window.
-
-| Arguments             | Type                 | Semantics                     |
-| --------------------- | -------------------- | ----------------------------- |
-| `lhs`                 | `XlaOp`              | rank n+2 array of inputs      |
-| `rhs`                 | `XlaOp`              | rank n+2 array of kernel      |
-:                       :                      : weights                       :
-| `window_strides`      | `ArraySlice<int64>`  | n-d array of kernel strides   |
-| `padding`             | `ArraySlice<         | n-d array of (low, high)      |
-:                       : pair<int64, int64>>` : padding                       :
-| `lhs_dilation`        | `ArraySlice<int64>`  | n-d lhs dilation factor array |
-| `rhs_dilation`        | `ArraySlice<int64>`  | n-d rhs dilation factor array |
-| `feature_group_count` | int64                | the number of feature groups  |
-
-Let n be the number of spatial dimensions. The `lhs` argument is a rank n+2
-array describing the base area. This is called the input, even though of course
-the rhs is also an input. In a neural network, these are the input activations.
-The n+2 dimensions are, in this order:
-
-*   `batch`: Each coordinate in this dimension represents an independent input
-    for which convolution is carried out.
-*   `z/depth/features`: Each (y,x) position in the base area has a vector
-    associated to it, which goes into this dimension.
-*   `spatial_dims`: Describes the `n` spatial dimensions that define the base
-    area that the window moves across.
-
-The `rhs` argument is a rank n+2 array describing the convolutional
-filter/kernel/window. The dimensions are, in this order:
-
-*   `output-z`: The `z` dimension of the output.
-*   `input-z`: The size of this dimension times `feature_group_count` should
-    equal the size of the `z` dimension in lhs.
-*   `spatial_dims`: Describes the `n` spatial dimensions that define the n-d
-    window that moves across the base area.
-
-The `window_strides` argument specifies the stride of the convolutional window
-in the spatial dimensions. For example, if the stride in the first spatial
-dimension is 3, then the window can only be placed at coordinates where the
-first spatial index is divisible by 3.
-
-The `padding` argument specifies the amount of zero padding to be applied to the
-base area. The amount of padding can be negative -- the absolute value of
-negative padding indicates the number of elements to remove from the specified
-dimension before doing the convolution. `padding[0]` specifies the padding for
-dimension `y` and `padding[1]` specifies the padding for dimension `x`. Each
-pair has the low padding as the first element and the high padding as the second
-element. The low padding is applied in the direction of lower indices while the
-high padding is applied in the direction of higher indices. For example, if
-`padding[1]` is `(2,3)` then there will be a padding by 2 zeroes on the left and
-by 3 zeroes on the right in the second spatial dimension. Using padding is
-equivalent to inserting those same zero values into the input (`lhs`) before
-doing the convolution.
-
-The `lhs_dilation` and `rhs_dilation` arguments specify the dilation factor to
-be applied to the lhs and rhs, respectively, in each spatial dimension. If the
-dilation factor in a spatial dimension is d, then d-1 holes are implicitly
-placed between each of the entries in that dimension, increasing the size of the
-array. The holes are filled with a no-op value, which for convolution means
-zeroes.
-
-Dilation of the rhs is also called atrous convolution. For more details, see
-`tf.nn.atrous_conv2d`. Dilation of the lhs is also called transposed
-convolution. For more details, see `tf.nn.conv2d_transpose`.
-
-The `feature_group_count` argument (default value 1) can be used for grouped
-convolutions. `feature_group_count` needs to be a divisor of both the input and
-the output feature dimension. If `feature_group_count` is greater than 1, it
-means that conceptually the input and output feature dimension and the `rhs`
-output feature dimension are split evenly into `feature_group_count` many
-groups, each group consisting of a consecutive subsequence of features. The
-input feature dimension of `rhs` needs to be equal to the `lhs` input feature
-dimension divided by `feature_group_count` (so it already has the size of a
-group of input features). The i-th groups are used together to compute
-`feature_group_count` many separate convolutions. The results of these
-convolutions are concatenated together in the output feature dimension.
-
-For depthwise convolution the `feature_group_count` argument would be set to the
-input feature dimension, and the filter would be reshaped from
-`[filter_height, filter_width, in_channels, channel_multiplier]` to
-`[filter_height, filter_width, 1, in_channels * channel_multiplier]`. For more
-details, see `tf.nn.depthwise_conv2d`.
-
-The output shape has these dimensions, in this order:
-
-*   `batch`: Same size as `batch` on the input (`lhs`).
-*   `z`: Same size as `output-z` on the kernel (`rhs`).
-*   `spatial_dims`: One value for each valid placement of the convolutional
-    window.
-
-The valid placements of the convolutional window are determined by the strides
-and the size of the base area after padding.
-
-To describe what a convolution does, consider a 2d convolution, and pick some
-fixed `batch`, `z`, `y`, `x` coordinates in the output. Then `(y,x)` is a
-position of a corner of the window within the base area (e.g. the upper left
-corner, depending on how you interpret the spatial dimensions). We now have a 2d
-window, taken from the base area, where each 2d point is associated to a 1d
-vector, so we get a 3d box. From the convolutional kernel, since we fixed the
-output coordinate `z`, we also have a 3d box. The two boxes have the same
-dimensions, so we can take the sum of the element-wise products between the two
-boxes (similar to a dot product). That is the output value.
-
-Note that if `output-z` is e.g., 5, then each position of the window produces 5
-values in the output into the `z` dimension of the output. These values differ
-in what part of the convolutional kernel is used - there is a separate 3d box of
-values used for each `output-z` coordinate. So you could think of it as 5
-separate convolutions with a different filter for each of them.
-
-Here is pseudo-code for a 2d convolution with padding and striding:
-
-```
-for (b, oz, oy, ox) {  // output coordinates
-  value = 0;
-  for (iz, ky, kx) {  // kernel coordinates and input z
-    iy = oy*stride_y + ky - pad_low_y;
-    ix = ox*stride_x + kx - pad_low_x;
-    if ((iy, ix) inside the base area considered without padding) {
-      value += input(b, iz, iy, ix) * kernel(oz, iz, ky, kx);
-    }
-  }
-  output(b, oz, oy, ox) = value;
-}
-```
-
-## ConvertElementType
-
-See also
-[`XlaBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Similar to an element-wise `static_cast` in C++, performs an element-wise
-conversion operation from a data shape to a target shape. The dimensions must
-match, and the conversion is an element-wise one; e.g. `s32` elements become
-`f32` elements via an `s32`-to-`f32` conversion routine.
-
-<b> `ConvertElementType(operand, new_element_type)` </b>
-
-Arguments          | Type            | Semantics
------------------- | --------------- | ---------------------------
-`operand`          | `XlaOp`         | array of type T with dims D
-`new_element_type` | `PrimitiveType` | type U
-
-The dimensions of the operand and the target shape must match. The source and
-destination element types must not be tuples.
-
-A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
-conversion routine such as round-to-nearest-even.
-
-> Note: The precise float-to-int and visa-versa conversions are currently
-> unspecified, but may become additional arguments to the convert operation in
-> the future.  Not all possible conversions have been implemented for all
->targets.
-
-```
-let a: s32[3] = {0, 1, 2};
-let b: f32[3] = convert(a, f32);
-then b == f32[3]{0.0, 1.0, 2.0}
-```
-
-## CrossReplicaSum
-
-See also
-[`XlaBuilder::CrossReplicaSum`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Computes a sum across replicas.
-
-<b> `CrossReplicaSum(operand)` </b>
-
-Arguments | Type    | Semantics
---------- | ------- | -----------------------------
-`operand` | `XlaOp` | Array to sum across replicas.
-| `replica_group_ids`    | `int64` vector | Group ID for each replica.      |
-
-The output shape is the same as the input shape. For example, if there are two
-replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.25)`
-respectively on the two replicas, then the output value from this op will be
-`(4.0, 7.75)` on both replicas.
-
-`replica_group_ids` identifies the group ID of each replica. The group ID must
-either be empty (all replicas belong to a single group), or contain the same
-number of elements as the number of replicas. For example, if
-`replica_group_ids` = {0, 1, 2, 3, 0, 1, 2, 3} has eight replicas, there are
-four subgroups of replica IDs: {0, 4}, {1, 5}, {2, 6}, and {3, 7}. The size of
-each subgroup *must* be identical, so, for example, using:
-`replica_group_ids` = {0, 1, 2, 0} for four replicas is invalid.
-
-Computing the result of CrossReplicaSum requires having one input from each
-replica, so if one replica executes a CrossReplicaSum node more times than
-another, then the former replica will wait forever. Since the replicas are all
-running the same program, there are not a lot of ways for that to happen, but it
-is possible when a while loop's condition depends on data from infeed and the
-data that is infed causes the while loop to iterate more times on one replica
-than another.
-
-## CustomCall
-
-See also
-[`XlaBuilder::CustomCall`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Call a user-provided function within a computation.
-
-<b> `CustomCall(target_name, args..., shape)` </b>
-
-| Arguments     | Type                   | Semantics                         |
-| ------------- | ---------------------- | --------------------------------- |
-| `target_name` | `string`               | Name of the function. A call      |
-:               :                        : instruction will be emitted which :
-:               :                        : targets this symbol name.         :
-| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type,    |
-:               :                        : which will be passed to the       :
-:               :                        : function.                         :
-| `shape`       | `Shape`                | Output shape of the function      |
-
-The function signature is the same, regardless of the arity or type of args:
-
-```
-extern "C" void target_name(void* out, void** in);
-```
-
-For example, if CustomCall is used as follows:
-
-```
-let x = f32[2] {1,2};
-let y = f32[2x3] {{10, 20, 30}, {40, 50, 60}};
-
-CustomCall("myfunc", {x, y}, f32[3x3])
-```
-
-Here is an example of an implementation of `myfunc`:
-
-```
-extern "C" void myfunc(void* out, void** in) {
-  float (&x)[2] = *static_cast<float(*)[2]>(in[0]);
-  float (&y)[2][3] = *static_cast<float(*)[2][3]>(in[1]);
-  EXPECT_EQ(1, x[0]);
-  EXPECT_EQ(2, x[1]);
-  EXPECT_EQ(10, y[0][0]);
-  EXPECT_EQ(20, y[0][1]);
-  EXPECT_EQ(30, y[0][2]);
-  EXPECT_EQ(40, y[1][0]);
-  EXPECT_EQ(50, y[1][1]);
-  EXPECT_EQ(60, y[1][2]);
-  float (&z)[3][3] = *static_cast<float(*)[3][3]>(out);
-  z[0][0] = x[1] + y[1][0];
-  // ...
-}
-```
-
-The user-provided function must not have side-effects and its execution must be
-idempotent.
-
-> Note: The opaque nature of the user-provided function restricts optimization
-> opportunities for the compiler. Try to express your computation in terms of
-> native XLA ops whenever possible; only use CustomCall as a last resort.
-
-## Dot
-
-See also
-[`XlaBuilder::Dot`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Dot(lhs, rhs)` </b>
-
-Arguments | Type    | Semantics
---------- | ------- | ---------------
-`lhs`     | `XlaOp` | array of type T
-`rhs`     | `XlaOp` | array of type T
-
-The exact semantics of this operation depend on the ranks of the operands:
-
-| Input                   | Output                | Semantics               |
-| ----------------------- | --------------------- | ----------------------- |
-| vector [n] `dot` vector | scalar                | vector dot product      |
-: [n]                     :                       :                         :
-| matrix [m x k] `dot`    | vector [m]            | matrix-vector           |
-: vector [k]              :                       : multiplication          :
-| matrix [m x k] `dot`    | matrix [m x n]        | matrix-matrix           |
-: matrix [k x n]          :                       : multiplication          :
-
-The operation performs sum of products over the last dimension of `lhs` and the
-one-before-last dimension of `rhs`. These are the "contracted" dimensions. The
-contracted dimensions of `lhs` and `rhs` must be of the same size. In practice,
-it can be used to perform dot products between vectors, vector/matrix
-multiplications or matrix/matrix multiplications.
-
-## DotGeneral
-
-See also
-[`XlaBuilder::DotGeneral`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `DotGeneral(lhs, rhs, dimension_numbers)` </b>
-
-Arguments           | Type                  | Semantics
-------------------- | --------------------- | ---------------
-`lhs`               | `XlaOp`               | array of type T
-`rhs`               | `XlaOp`               | array of type T
-`dimension_numbers` | `DotDimensionNumbers` | array of type T
-
-As Dot, but allows contracting and batch dimension numbers to be specified for
-both the 'lhs' and 'rhs'.
-
-| DotDimensionNumbers Fields | Type                    | Semantics
-| --------- | ----------------------- | ---------------
-| 'lhs_contracting_dimensions' | repeated int64 | 'lhs' contracting dimension numbers |
-| 'rhs_contracting_dimensions' | repeated int64 | 'rhs' contracting dimension numbers |
-| 'lhs_batch_dimensions' | repeated int64 | 'lhs' batch dimension numbers |
-| 'rhs_batch_dimensions' | repeated int64 | 'rhs' batch dimension numbers |
-
-DotGeneral performs the sum of products over contracting dimensions specified
-in 'dimension_numbers'.
-
-Associated contracting dimension numbers from the 'lhs' and 'rhs' do not need
-to be the same, but must be listed in the same order in both
-'lhs/rhs_contracting_dimensions' arrays and have the same dimension sizes.
-There must be exactly one contracting dimension on both 'lhs' and 'rhs'.
-
-Example with contracting dimension numbers:
-
-```
-lhs = { {1.0, 2.0, 3.0},
-        {4.0, 5.0, 6.0} }
-
-rhs = { {1.0, 1.0, 1.0},
-        {2.0, 2.0, 2.0} }
-
-DotDimensionNumbers dnums;
-dnums.add_lhs_contracting_dimensions(1);
-dnums.add_rhs_contracting_dimensions(1);
-
-DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0},
-                                 {15.0, 30.0} }
-```
-
-Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same
-dimension number, must be listed in the same order in both arrays, must
-have the same dimension sizes, and must be ordered before contracting and
-non-contracting/non-batch dimension numbers.
-
-Example with batch dimension numbers (batch size 2, 2x2 matrices):
-
-```
-lhs = { { {1.0, 2.0},
-          {3.0, 4.0} },
-        { {5.0, 6.0},
-          {7.0, 8.0} } }
-
-rhs = { { {1.0, 0.0},
-          {0.0, 1.0} },
-        { {1.0, 0.0},
-          {0.0, 1.0} } }
-
-DotDimensionNumbers dnums;
-dnums.add_lhs_contracting_dimensions(2);
-dnums.add_rhs_contracting_dimensions(1);
-dnums.add_lhs_batch_dimensions(0);
-dnums.add_rhs_batch_dimensions(0);
-
-DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
-                                   {3.0, 4.0} },
-                                 { {5.0, 6.0},
-                                   {7.0, 8.0} } }
-```
-
-| Input                               | Output            | Semantics        |
-| ----------------------------------- | ----------------- | ---------------- |
-| [b0, m, k] `dot` [b0, k, n]         | [b0, m, n]        |  batch matmul    |
-| [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n]    |  batch matmul    |
-
-It follows that the resulting dimension number starts with the batch dimension,
-then the 'lhs' non-contracting/non-batch dimension, and finally the 'rhs'
-non-contracting/non-batch dimension.
-
-## DynamicSlice
-
-See also
-[`XlaBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-DynamicSlice extracts a sub-array from the input array at dynamic
-`start_indices`. The size of the slice in each dimension is passed in
-`size_indices`, which specify the end point of exclusive slice intervals in each
-dimension: [start, start + size). The shape of `start_indices` must be rank ==
-1, with dimension size equal to the rank of `operand`.
-
-<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
-
-| Arguments       | Type                | Semantics                           |
-| --------------- | ------------------- | ----------------------------------- |
-| `operand`       | `XlaOp`             | N dimensional array of type T       |
-| `start_indices` | `XlaOp`             | Rank 1 array of N integers          |
-:                 :                     : containing the starting indices of  :
-:                 :                     : the slice for each dimension. Value :
-:                 :                     : must be greater than or equal to    :
-:                 :                     : zero.                               :
-| `size_indices`  | `ArraySlice<int64>` | List of N integers containing the   |
-:                 :                     : slice size for each dimension. Each :
-:                 :                     : value must be strictly greater than :
-:                 :                     : zero, and start + size must be less :
-:                 :                     : than or equal to the size of the    :
-:                 :                     : dimension to avoid wrapping modulo  :
-:                 :                     : dimension size.                     :
-
-The effective slice indices are computed by applying the following
-transformation for each index `i` in `[1, N)` before performing the slice:
-
-```
-start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - size_indices[i])
-```
-
-This ensures that the extracted slice is always in-bounds with respect to the
-operand array. If the slice is in-bounds before the transformation is applied,
-the transformation has no effect.
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let s = {2}
-
-DynamicSlice(a, s, {2}) produces:
-  {2.0, 3.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let s = {2, 1}
-
-DynamicSlice(b, s, {2, 2}) produces:
-  { { 7.0,  8.0},
-    {10.0, 11.0} }
-```
-## DynamicUpdateSlice
-
-See also
-[`XlaBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-DynamicUpdateSlice generates a result which is the value of the input array
-`operand`, with a slice `update` overwritten at `start_indices`.
-The shape of `update` determines the shape of the sub-array of the result which
-is updated.
-The shape of `start_indices` must be rank == 1, with dimension size equal to
-the rank of `operand`.
-
-<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
-
-| Arguments       | Type    | Semantics                                        |
-| --------------- | ------- | ------------------------------------------------ |
-| `operand`       | `XlaOp` | N dimensional array of type T                    |
-| `update`        | `XlaOp` | N dimensional array of type T containing the     |
-:                 :         : slice update. Each dimension of update shape     :
-:                 :         : must be strictly greater than zero, and start +  :
-:                 :         : update must be less than or equal to the operand :
-:                 :         : size for each dimension to avoid generating      :
-:                 :         : out-of-bounds update indices.                    :
-| `start_indices` | `XlaOp` | Rank 1 array of N integers containing the        |
-:                 :         : starting indices of the slice for each           :
-:                 :         : dimension. Value must be greater than or equal   :
-:                 :         : to zero.                                         :
-
-The effective slice indices are computed by applying the following
-transformation for each index `i` in `[1, N)` before performing the slice:
-
-```
-start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - update.dimension_size[i])
-```
-
-This ensures that the updated slice is always in-bounds with respect to the
-operand array. If the slice is in-bounds before the transformation is applied,
-the transformation has no effect.
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let u = {5.0, 6.0}
-let s = {2}
-
-DynamicUpdateSlice(a, u, s) produces:
-  {0.0, 1.0, 5.0, 6.0, 4.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let u =
- { {12.0,  13.0},
-   {14.0,  15.0},
-   {16.0,  17.0} }
-
-let s = {1, 1}
-
-DynamicUpdateSlice(b, u, s) produces:
- { {0.0,  1.0,  2.0},
-   {3.0, 12.0, 13.0},
-   {6.0, 14.0, 15.0},
-   {9.0, 16.0, 17.0} }
-```
-
-## Element-wise binary arithmetic operations
-
-See also
-[`XlaBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-A set of element-wise binary arithmetic operations is supported.
-
-<b> `Op(lhs, rhs)` </b>
-
-Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
-(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
-(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
-
-Arguments | Type    | Semantics
---------- | ------- | ----------------------------------------
-`lhs`     | `XlaOp` | left-hand-side operand: array of type T
-`rhs`     | `XlaOp` | right-hand-side operand: array of type T
-
-The arguments' shapes have to be either similar or compatible. See the
-[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
-be compatible. The result of an operation has a shape which is the result of
-broadcasting the two input arrays. In this variant, operations between arrays of
-different ranks are *not* supported, unless one of the operands is a scalar.
-
-When `Op` is `Rem`, the sign of the result is taken from the dividend, and the
-absolute value of the result is always less than the divisor's absolute value.
-
-Integer division overflow (signed/unsigned division/remainder by zero or signed
-divison/remainder of `INT_SMIN` with `-1`) produces an implementation defined
-value.
-
-An alternative variant with different-rank broadcasting support exists for these
-operations:
-
-<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
-
-Where `Op` is the same as above. This variant of the operation should be used
-for arithmetic operations between arrays of different ranks (such as adding a
-matrix to a vector).
-
-The additional `broadcast_dimensions` operand is a slice of integers used to
-expand the rank of the lower-rank operand up to the rank of the higher-rank
-operand. `broadcast_dimensions` maps the dimensions of the lower-rank shape to
-the dimensions of the higher-rank shape. The unmapped dimensions of the expanded
-shape are filled with dimensions of size one. Degenerate-dimension broadcasting
-then broadcasts the shapes along these degenerate dimensions to equalize the
-shapes of both operands. The semantics are described in detail on the
-[broadcasting page](../../performance/xla/broadcasting.md).
-
-## Element-wise comparison operations
-
-See also
-[`XlaBuilder::Eq`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-A set of standard element-wise binary comparison operations is supported. Note
-that standard IEEE 754 floating-point comparison semantics apply when comparing
-floating-point types.
-
-<b> `Op(lhs, rhs)` </b>
-
-Where `Op` is one of `Eq` (equal-to), `Ne` (not equal-to), `Ge`
-(greater-or-equal-than), `Gt` (greater-than), `Le` (less-or-equal-than), `Lt`
-(less-than).
-
-Arguments | Type    | Semantics
---------- | ------- | ----------------------------------------
-`lhs`     | `XlaOp` | left-hand-side operand: array of type T
-`rhs`     | `XlaOp` | right-hand-side operand: array of type T
-
-The arguments' shapes have to be either similar or compatible. See the
-[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
-be compatible. The result of an operation has a shape which is the result of
-broadcasting the two input arrays with the element type `PRED`. In this variant,
-operations between arrays of different ranks are *not* supported, unless one of
-the operands is a scalar.
-
-An alternative variant with different-rank broadcasting support exists for these
-operations:
-
-<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
-
-Where `Op` is the same as above. This variant of the operation should be used
-for comparison operations between arrays of different ranks (such as adding a
-matrix to a vector).
-
-The additional `broadcast_dimensions` operand is a slice of integers specifying
-the dimensions to use for broadcasting the operands. The semantics are described
-in detail on the [broadcasting page](../../performance/xla/broadcasting.md).
-
-## Element-wise unary functions
-
-XlaBuilder supports these element-wise unary functions:
-
-<b>`Abs(operand)`</b> Element-wise abs `x -> |x|`.
-
-<b>`Ceil(operand)`</b> Element-wise ceil `x -> ⌈x⌉`.
-
-<b>`Cos(operand)`</b> Element-wise cosine `x -> cos(x)`.
-
-<b>`Exp(operand)`</b> Element-wise natural exponential `x -> e^x`.
-
-<b>`Floor(operand)`</b> Element-wise floor `x -> ⌊x⌋`.
-
-<b>`IsFinite(operand)`</b> Tests whether each element of `operand` is finite,
-i.e., is not positive or negative infinity, and is not `NaN`. Returns an array
-of `PRED` values with the same shape as the input, where each element is `true`
-if and only if the corresponding input element is finite.
-
-<b>`Log(operand)`</b> Element-wise natural logarithm `x -> ln(x)`.
-
-<b>`LogicalNot(operand)`</b> Element-wise logical not `x -> !(x)`.
-
-<b>`Neg(operand)`</b> Element-wise negation `x -> -x`.
-
-<b>`Sign(operand)`</b> Element-wise sign operation `x -> sgn(x)` where
-
-$$\text{sgn}(x) = \begin{cases} -1 & x < 0\\ 0 & x = 0\\ 1 & x > 0 \end{cases}$$
-
-using the comparison operator of the element type of `operand`.
-
-<b>`Tanh(operand)`</b> Element-wise hyperbolic tangent `x -> tanh(x)`.
-
-
-Arguments | Type    | Semantics
---------- | ------- | ---------------------------
-`operand` | `XlaOp` | The operand to the function
-
-The function is applied to each element in the `operand` array, resulting in an
-array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
-
-## Gather
-
-The XLA gather operation stitches together several slices (each slice at a
-potentially different runtime offset) of an input array.
-
-### General Semantics
-
-See also
-[`XlaBuilder::Gather`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-For a more intuitive description, see the "Informal Description" section below.
-
-<b> `gather(operand, start_indices, offset_dims, collapsed_slice_dims, slice_sizes, start_index_map)` </b>
-
-|Arguments         | Type                    | Semantics                       |
-|----------------- | ----------------------- | --------------------------------|
-|`operand`         | `XlaOp`                 | The array we’re gathering       |
-:                  :                         : from.                           :
-|`start_indices`   | `XlaOp`                 | Array containing the starting  |
-:                  :                         : indices of the slices we gather.:
-|`index_vector_dim` | `int64`                | The dimension in                |
-:                  :                         : `start_indices` that "contains" :
-:                  :                         : the starting indices.  See      :
-:                  :                         : below for a detailed            :
-:                  :                         : description.                    :
-|`offset_dims`     | `ArraySlice<int64>`     | The set of dimensions in  the   :
-:                  :                         : output shape that offset into a :
-:                  :                         : array sliced from operand.     :
-|`slice_sizes`     | `ArraySlice<int64>`      | `slice_sizes[i]` is the bounds |
-:                  :                          : for the slice on dimension `i`.:
-|`collapsed_slice_dims` | `ArraySlice<int64>` | The set of dimensions in each  :
-|                  :                          | slice that are collapsed away. :
-|                  :                          | These dimensions must have size:
-|                  :                          | 1.                             |
-|`start_index_map` | `ArraySlice<int64>`      | A map that describes how to map|
-:                  :                          : indices in `start_indices` to  :
-:                  :                          : to legal indices into operand. :
-
-For convenience, we label dimensions in the output array not in `offset_dims`
-as `batch_dims`.
-
-The output is an array of rank `batch_dims.size` + `operand.rank` -
-`collapsed_slice_dims`.size.
-
-If `index_vector_dim` is equal to `start_indices.rank` we implicitly consider
-`start_indices` to have a trailing `1` dimension (i.e. if `start_indices` was of
-shape `[6,7]` and `index_vector_dim` is `2` then we implicitly consider the
-shape of `start_indices` to be `[6,7,1]`).
-
-The bounds for the output array along dimension `i` is computed as follows:
-
-  1. If `i` is present in `batch_dims` (i.e. is equal to `batch_dims[k]` for
-     some `k`) then we pick the corresponding dimension bounds out of
-     `start_indices.shape`, skipping `index_vector_dim` (i.e. pick
-     `start_indices.shape.dims`[`k`] if `k` < `index_vector_dim` and
-     `start_indices.shape.dims`[`k`+`1`] otherwise).
-
-  2. If `i` is present in `offset_dims` (i.e. equal to `offset_dims`[`k`] for
-     some `k`) then we pick the corresponding bound out of `slice_sizes` after
-     accounting for `collapsed_slice_dims` (i.e. we pick
-     `adjusted_slice_sizes`[`k`] where `adjusted_slice_sizes` is `slice_sizes`
-     with the bounds at indices `collapsed_slice_dims` removed).
-
-Formally, the operand index `In` corresponding to an output index `Out` is
-computed as follows:
-
-  1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }.  Use `G` to slice out
-     vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where
-     Combine(A, b) inserts b at position `index_vector_dim` into A.  Note that
-     this is well defined even if `G` is empty -- if `G` is empty then `S` =
-     `start_indices`.
-
-  2. Create a starting index, `S`<sub>`in`</sub>, into `operand` using `S` by
-     scattering `S` using `start_index_map`.  More precisely:
-       1. `S`<sub>`in`</sub>[`start_index_map`[`k`]] = `S`[`k`] if `k` <
-          `start_index_map.size`.
-       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
-
-  3. Create an index `O`<sub>`in`</sub> into `operand` by scattering the indices
-     at the offset dimensions in `Out` according to the `collapsed_slice_dims`
-     set.  More precisely:
-       1. `O`<sub>`in`</sub>[`expand_offset_dims`(`k`)] =
-          `Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size`
-          (`expand_offset_dims` is defined below).
-       2. `O`<sub>`in`</sub>[`_`] = `0` otherwise.
-  4. `In` is `O`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
-     addition.
-
-`expand_offset_dims` is the monotonic function with domain [`0`, `offset.size`)
-and range [`0`, `operand.rank`) \ `collapsed_slice_dims`.  So if, e.g.,
-`offset.size` is `4`, `operand.rank` is `6` and `collapsed_slice_dims` is {`0`,
-`2`} then `expand_offset_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}.
-
-### Informal Description and Examples
-
-Informally, every index `Out` in the output array corresponds to an element `E`
-in the operand array, computed as follows:
-
-  - We use the batch dimensions in `Out` to look up a starting index from
-    `start_indices`.
-
-  - We use `start_index_map` to map the starting index (which may have size less
-    than operand.rank) to a "full" starting index into operand.
-
-  - We dynamic-slice out a slice with size `slice_sizes` using the full starting
-    index.
-
-  - We reshape the slice by collapsing the `collapsed_slice_dims` dimensions.
-    Since all collapsed slice dimensions have to have bound 1 this reshape is
-    always legal.
-
-  - We use the offset dimensions in `Out` to index into this slice to get the
-    input element, `E`, corresponding to output index `Out`.
-
-`index_vector_dim` is set to `start_indices.rank` - `1` in all of the
-examples that follow.  More interesting values for `index_vector_dim` does not
-change the operation fundamentally, but makes the visual representation more
-cumbersome.
-
-To get an intuition on how all of the above fits together, let's look at an
-example that gathers 5 slices of shape `[8,6]` from a `[16,11]` array.  The
-position of a slice into the `[16,11]` array can be represented as an index
-vector of shape `S64[2]`, so the set of 5 positions can be represented as a
-`S64[5,2]` array.
-
-The behavior of the gather operation can then be depicted as an index
-transformation that takes [`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>], an index in
-the output shape, and maps it to an element in the input array in the following
-way:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/ops_xla_gather_0.svg">
-</div>
-
-We first select an (`X`,`Y`) vector from the gather indices array using `G`.
-The element in the output array at index
-[`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>] is then the element in the input
-array at index [`X`+`O`<sub>`0`</sub>,`Y`+`O`<sub>`1`</sub>].
-
-`slice_sizes` is `[8,6]`, which decides the range of W<sub>`0`</sub> and
-W<sub>`1`</sub>, and this in turn decides the bounds of the slice.
-
-This gather operation acts as a batch dynamic slice with `G` as the batch
-dimension.
-
-The gather indices may be multidimensional.  For instance, a more general
-version of the example above using a "gather indices" array of shape `[4,5,2]`
-would translate indices like this:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/ops_xla_gather_1.svg">
-</div>
-
-Again, this acts as a batch dynamic slice `G`<sub>`0`</sub> and
-`G`<sub>`1`</sub> as the batch dimensions.  The slice size is still `[8,6]`.
-
-The gather operation in XLA generalizes the informal semantics outlined above in
-the following ways:
-
- 1. We can configure which dimensions in the output shape are the offset
-    dimensions (dimensions containing `O`<sub>`0`</sub>, `O`<sub>`1`</sub> in
-    the last example).  The output batch dimensions (dimensions containing
-    `G`<sub>`0`</sub>, `G`<sub>`1`</sub> in the last example) are defined to be
-    the output dimensions that are not offset dimensions.
-
- 2. The number of output offset dimensions explicitly present in the output
-    shape may be smaller than the input rank.  These "missing" dimensions, which
-    are listed explicitly as `collapsed_slice_dims`, must have a slice size of
-    `1`.  Since they have a slice size of `1` the only valid index for them is
-    `0` and eliding them does not introduce ambiguity.
-
- 3. The slice extracted from the "Gather Indices" array ((`X`, `Y`) in the last
-    example) may have fewer elements than the input array rank, and an explicit
-    mapping dictates how the index should be expanded to have the same rank as
-    the input.
-
-As a final example, we use (2) and (3) to implement `tf.gather_nd`:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/ops_xla_gather_2.svg">
-</div>
-
-`G`<sub>`0`</sub> and `G`<sub>`1`</sub> are used to slice out a starting index
-from the gather indices array as usual, except the starting index has only one
-element, `X`.  Similarly, there is only one output offset index with the value
-`O`<sub>`0`</sub>.  However, before being used as indices into the input array,
-these are expanded in accordance to "Gather Index Mapping" (`start_index_map` in
-the formal description) and "Offset Mapping" (`expand_offset_dims` in the formal
-description) into [`0`,`O`<sub>`0`</sub>] and [`X`,`0`] respectively, adding up
-to [`X`,`O`<sub>`0`</sub>].  In other words, the output index
-[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`O`<sub>`0`</sub>] maps to the input index
-[`GatherIndices`[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`0`],`X`] which gives us
-the semantics for `tf.gather_nd`.
-
-`slice_sizes` for this case is `[1,11]`.  Intuitively this means that every
-index `X` in the gather indices array picks an entire row and the result is the
-concatenation of all these rows.
-
-## GetTupleElement
-
-See also
-[`XlaBuilder::GetTupleElement`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Indexes into a tuple with a compile-time-constant value.
-
-The value must be a compile-time-constant so that shape inference can determine
-the type of the resulting value.
-
-This is analogous to `std::get<int N>(t)` in C++. Conceptually:
-
-```
-let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-let s: s32 = 5;
-let t: (f32[10], s32) = tuple(v, s);
-let element_1: s32 = gettupleelement(t, 1);  // Inferred shape matches s32.
-```
-
-See also `tf.tuple`.
-
-## Infeed
-
-See also
-[`XlaBuilder::Infeed`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Infeed(shape)` </b>
-
-| Argument | Type    | Semantics                                             |
-| -------- | ------- | ----------------------------------------------------- |
-| `shape`  | `Shape` | Shape of the data read from the Infeed interface. The |
-:          :         : layout field of the shape must be set to match the    :
-:          :         : layout of the data sent to the device; otherwise its  :
-:          :         : behavior is undefined.                                :
-
-Reads a single data item from the implicit Infeed streaming interface of the
-device, interpreting the data as the given shape and its layout, and returns a
-`XlaOp` of the data. Multiple Infeed operations are allowed in a
-computation, but there must be a total order among the Infeed operations. For
-example, two Infeeds in the code below have a total order since there is a
-dependency between the while loops.
-
-```
-result1 = while (condition, init = init_value) {
-  Infeed(shape)
-}
-
-result2 = while (condition, init = result1) {
-  Infeed(shape)
-}
-```
-
-Nested tuple shapes are not supported. For an empty tuple shape, the Infeed
-operation is effectively a no-op and proceeds without reading any data from the
-Infeed of the device.
-
-> Note: We plan to allow multiple Infeed operations without a total order, in
-> which case the compiler will provide information about how the Infeed
-> operations are serialized in the compiled program.
-
-## Iota
-
-<b> `Iota()` </b>
-
-Builds a constant literal on device rather than a potentially large host
-transfer.  Creates a rank 1 tensor of values starting at zero and incrementing
-by one.
-
-Arguments          | Type            | Semantics
------------------- | --------------- | ---------------------------
-`type`             | `PrimitiveType` | type U
-`size`             | `int64`         | The number of elements in the tensor.
-
-## Map
-
-See also
-[`XlaBuilder::Map`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Map(operands..., computation)` </b>
-
-| Arguments         | Type                   | Semantics                      |
-| ----------------- | ---------------------- | ------------------------------ |
-| `operands`        | sequence of N `XlaOp`s | N arrays of types T_0..T_{N-1} |
-| `computation`     | `XlaComputation`       | computation of type `T_0, T_1, |
-:                   :                        : ..., T_{N + M -1} -> S` with N :
-:                   :                        : parameters of type T and M of  :
-:                   :                        : arbitrary type                 :
-| `dimensions`      | `int64` array          | array of map dimensions        |
-
-Applies a scalar function over the given `operands` arrays, producing an array
-of the same dimensions where each element is the result of the mapped function
-applied to the corresponding elements in the input arrays.
-
-The mapped function is an arbitrary computation with the restriction that it has
-N inputs of scalar type `T` and a single output with type `S`. The output has
-the same dimensions as the operands except that the element type T is replaced
-with S.
-
-For example: `Map(op1, op2, op3, computation, par1)` maps `elem_out <-
-computation(elem1, elem2, elem3, par1)` at each (multi-dimensional) index in the
-input arrays to produce the output array.
-
-## Pad
-
-See also
-[`XlaBuilder::Pad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Pad(operand, padding_value, padding_config)` </b>
-
-| Arguments        | Type            | Semantics                               |
-| ---------------- | --------------- | --------------------------------------- |
-| `operand`        | `XlaOp`         | array of type `T`                       |
-| `padding_value`  | `XlaOp`         | scalar of type `T` to fill in the added |
-:                  :                 : padding                                 :
-| `padding_config` | `PaddingConfig` | padding amount on both edges (low,      |
-:                  :                 : high) and between the elements of each  :
-:                  :                 : dimension                               :
-
-Expands the given `operand` array by padding around the array as well as between
-the elements of the array with the given `padding_value`. `padding_config`
-specifies the amount of edge padding and the interior padding for each
-dimension.
-
-`PaddingConfig` is a repeated field of `PaddingConfigDimension`, which contains
-three fields for each dimension: `edge_padding_low`, `edge_padding_high`, and
-`interior_padding`. `edge_padding_low` and `edge_padding_high` specify the
-amount of padding added at the low-end (next to index 0) and the high-end (next
-to the highest index) of each dimension respectively. The amount of edge padding
-can be negative -- the absolute value of negative padding indicates the number
-of elements to remove from the specified dimension. `interior_padding` specifies
-the amount of padding added between any two elements in each dimension. Interior
-padding occurs logically before edge padding, so in the case of negative edge
-padding elements are removed from the interior-padded operand. This operation is
-a no-op if the edge padding pairs are all (0, 0) and the interior padding values
-are all 0. The figure below shows examples of different `edge_padding` and
-`interior_padding` values for a two-dimensional array.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="https://www.tensorflow.org/images/ops_pad.png">
-</div>
-
-## Recv
-
-See also
-[`XlaBuilder::Recv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Recv(shape, channel_handle)` </b>
-
-| Arguments        | Type            | Semantics                            |
-| ---------------- | --------------- | ------------------------------------ |
-| `shape`          | `Shape`         | shape of the data to receive         |
-| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair |
-
-Receives data of the given shape from a `Send` instruction in another
-computation that shares the same channel handle. Returns a
-XlaOp for the received data.
-
-The client API of `Recv` operation represents synchronous communication.
-However, the instruction is internally decomposed into 2 HLO instructions
-(`Recv` and `RecvDone`) to enable asynchronous data transfers. See also
-[`HloInstruction::CreateRecv` and `HloInstruction::CreateRecvDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
-
-<b>`Recv(const Shape& shape, int64 channel_id)`</b>
-
-Allocates resources required to receive data from a `Send` instruction with the
-same channel_id. Returns a context for the allocated resources, which is used
-by a following `RecvDone` instruction to wait for the completion of the data
-transfer. The context is a tuple of {receive buffer (shape), request identifier
-(U32)} and it can only be used by a `RecvDone` instruction.
-
-<b> `RecvDone(HloInstruction context)` </b>
-
-Given a context created by a `Recv` instruction, waits for the data transfer to
-complete and returns the received data.
-
-## Reduce
-
-See also
-[`XlaBuilder::Reduce`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Applies a reduction function to one or more arrays in parallel.
-
-<b> `Reduce(operands..., init_values..., computation, dimensions)` </b>
-
-Arguments     | Type                  | Semantics
-------------- | --------------------- | ---------------------------------------
-`operands`    | Sequence of N `XlaOp` | N arrays of types `T_0, ..., T_N`.
-`init_values` | Sequence of N `XlaOp` | N scalars of types `T_0, ..., T_N`.
-`computation` | `XlaComputation`      | computation of type
-              :                       : `T_0, ..., T_N, T_0, ..., T_N -> Collate(T_0, ..., T_N)`
-`dimensions`  | `int64` array         | unordered array of dimensions to reduce
-
-Where:
-* N is required to be greater or equal to 1.
-* All input arrays must have the same dimensions.
-* If `N = 1`, `Collate(T)` is `T`.
-* If `N > 1`, `Collate(T_0, ..., T_N)` is a tuple of `N` elements of type `T`.
-
-The output of the op is `Collate(Q_0, ..., Q_N)` where `Q_i` is an array of type
-`T_i`, the dimensions of which are described below.
-
-This operation reduces one or more dimensions of each input array into scalars.
-The rank of each returned array is `rank(operand) - len(dimensions)`.
-`init_value` is the initial value used for every reduction and may be inserted
-anywhere during computation by the back-end. In most cases, `init_value` is an
-identity of the reduction function (for example, 0 for addition). The applied
-`computation` is always passed the `init_value` on the left-hand side.
-
-The evaluation order of the reduction function is arbitrary and may be
-non-deterministic. Therefore, the reduction function should not be overly
-sensitive to reassociation.
-
-Some reduction functions like addition are not strictly associative for floats.
-However, if the range of the data is limited, floating-point addition is close
-enough to being associative for most practical uses. It is possible to conceive
-of some completely non-associative reductions, however, and these will produce
-incorrect or unpredictable results in XLA reductions.
-
-As an example, when reducing across one dimension in a single 1D array with
-values [10, 11, 12, 13], with reduction function `f` (this is `computation`)
-then that could be computed as
-
-`f(10, f(11, f(12, f(init_value, 13)))`
-
-but there are also many other possibilities, e.g.
-
-`f(init_value, f(f(10, f(init_value, 11)), f(f(init_value, 12), f(init_value, 13))))`
-
-The following is a rough pseudo-code example of how reduction could be
-implemented, using summation as the reduction computation with an initial value
-of 0.
-
-```python
-result_shape <- remove all dims in dimensions from operand_shape
-
-# Iterate over all elements in result_shape. The number of r's here is equal
-# to the rank of the result
-for r0 in range(result_shape[0]), r1 in range(result_shape[1]), ...:
-  # Initialize this result element
-  result[r0, r1...] <- 0
-
-  # Iterate over all the reduction dimensions
-  for d0 in range(dimensions[0]), d1 in range(dimensions[1]), ...:
-    # Increment the result element with the value of the operand's element.
-    # The index of the operand's element is constructed from all ri's and di's
-    # in the right order (by construction ri's and di's together index over the
-    # whole operand shape).
-    result[r0, r1...] += operand[ri... di]
-```
-
-Here's an example of reducing a 2D array (matrix). The shape has rank 2,
-dimension 0 of size 2 and dimension 1 of size 3:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_2d_matrix.png">
-</div>
-
-Results of reducing dimensions 0 or 1 with an "add" function:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_2d_matrix.png">
-</div>
-
-Note that both reduction results are 1D arrays. The diagram shows one as column
-and another as row just for visual convenience.
-
-For a more complex example, here is a 3D array. Its rank is 3, dimension 0 of
-size 4, dimension 1 of size 2 and dimension 2 of size 3. For simplicity, the
-values 1 to 6 are replicated across dimension 0.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_3d_matrix.png">
-</div>
-
-Similarly to the 2D example, we can reduce just one dimension. If we reduce
-dimension 0, for example, we get a rank-2 array where all values across
-dimension 0 were folded into a scalar:
-
-```text
-|  4   8  12 |
-| 16  20  24 |
-```
-
-If we reduce dimension 2, we also get a rank-2 array where all values across
-dimension 2 were folded into a scalar:
-
-```text
-| 6  15 |
-| 6  15 |
-| 6  15 |
-| 6  15 |
-```
-
-Note that the relative order between the remaining dimensions in the input is
-preserved in the output, but some dimensions may get assigned new numbers (since
-the rank changes).
-
-We can also reduce multiple dimensions. Add-reducing dimensions 0 and 1 produces
-the 1D array `| 20 28 36 |`.
-
-Reducing the 3D array over all its dimensions produces the scalar `84`.
-
-When `N > 1`, reduce function application is slightly more complex, as it is
-applied simultaneously to all inputs. For example, consider the following
-reduction function, which can be used to compute the max and the argmax of a
-a 1-D tensor in parallel:
-
-```
-f: (Float, Int, Float, Int) -> Float, Int
-f(max, argmax, value, index):
-  if value >= argmax:
-    return (value, index)
-  else:
-    return (max, argmax)
-```
-
-For 1-D Input arrays `V = Float[N], K = Int[N]`, and init values
-`I_V = Float, I_K =  Int`, the result `f_(N-1)` of reducing across the only
-input dimension is equivalent to the following recursive application:
-```
-f_0 = f(I_V, I_K, V_0, K_0)
-f_1 = f(f_0.first, f_0.second, V_1, K_1)
-...
-f_(N-1) = f(f_(N-2).first, f_(N-2).second, V_(N-1), K_(N-1))
-```
-
-Applying this reduction to an array of values, and an array of sequential
-indices (i.e. iota), will co-iterate over the arrays, and return a tuple
-containing the maximal value and the matching index.
-
-## ReducePrecision
-
-See also
-[`XlaBuilder::ReducePrecision`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Models the effect of converting floating-point values to a lower-precision
-format (such as IEEE-FP16) and back to the original format.  The number of
-exponent and mantissa bits in the lower-precision format can be specified
-arbitrarily, although all bit sizes may not be supported on all hardware
-implementations.
-
-<b> `ReducePrecision(operand, mantissa_bits, exponent_bits)` </b>
-
-Arguments       | Type    | Semantics
---------------- | ------- | -------------------------------------------------
-`operand`       | `XlaOp` | array of floating-point type `T`.
-`exponent_bits` | `int32` | number of exponent bits in lower-precision format
-`mantissa_bits` | `int32` | number of mantissa bits in lower-precision format
-
-The result is an array of type `T`.  The input values are rounded to the nearest
-value representable with the given number of mantissa bits (using "ties to even"
-semantics), and any values that exceed the range specified by the number of
-exponent bits are clamped to positive or negative infinity.  `NaN` values are
-retained, although they may be converted to canonical `NaN` values.
-
-The lower-precision format must have at least one exponent bit (in order to
-distinguish a zero value from an infinity, since both have a zero mantissa), and
-must have a non-negative number of mantissa bits.  The number of exponent or
-mantissa bits may exceed the corresponding value for type `T`; the corresponding
-portion of the conversion is then simply a no-op.
-
-## ReduceWindow
-
-See also
-[`XlaBuilder::ReduceWindow`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Applies a reduction function to all elements in each window of the input
-multi-dimensional array, producing an output multi-dimensional array with the
-same number of elements as the number of valid positions of the window. A
-pooling layer can be expressed as a `ReduceWindow`. Similar to
-[`Reduce`](#reduce), the applied `computation` is always passed the `init_value`
-on the left-hand side.
-
-<b> `ReduceWindow(operand, init_value, computation, window_dimensions,
-window_strides, padding)` </b>
-
-| Arguments           | Type                | Semantics                        |
-| ------------------- | ------------------- | -------------------------------- |
-| `operand`           | `XlaOp`             | N dimensional array containing   |
-:                     :                     : elements of type T. This is the  :
-:                     :                     : base area on which the window is :
-:                     :                     : placed.                          :
-| `init_value`        | `XlaOp`             | Starting value for the           |
-:                     :                     : reduction. See [Reduce](#reduce) :
-:                     :                     : for details.                     :
-| `computation`       | `XlaComputation`    | Reduction function of type `T, T |
-:                     :                     : -> T`, to apply to all elements  :
-:                     :                     : in each window                   :
-| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : dimension values                 :
-| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : stride values                    :
-| `padding`           | `Padding`           | padding type for window          |
-:                     :                     : (Padding\:\:kSame or             :
-:                     :                     : Padding\:\:kValid)               :
-
-Below code and figure shows an example of using `ReduceWindow`. Input is a
-matrix of size [4x6] and both window_dimensions and window_stride_dimensions are
-[2x3].
-
-```
-// Create a computation for the reduction (maximum).
-XlaComputation max;
-{
-  XlaBuilder builder(client_, "max");
-  auto y = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "y");
-  auto x = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "x");
-  builder.Max(y, x);
-  max = builder.Build().ConsumeValueOrDie();
-}
-
-// Create a ReduceWindow computation with the max reduction computation.
-XlaBuilder builder(client_, "reduce_window_2x3");
-auto shape = ShapeUtil::MakeShape(F32, {4, 6});
-auto input = builder.Parameter(0, shape, "input");
-builder.ReduceWindow(
-    input, *max,
-    /*init_val=*/builder.ConstantLiteral(LiteralUtil::MinValue(F32)),
-    /*window_dimensions=*/{2, 3},
-    /*window_stride_dimensions=*/{2, 3},
-    Padding::kValid);
-```
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_window.png">
-</div>
-
-Stride of 1 in a dimension specifies that the position of a window in the
-dimension is 1 element away from its adjacent window. In order to specify that
-no windows overlap with each other, window_stride_dimensions should be equal to
-window_dimensions. The figure below illustrates the use of two different stride
-values. Padding is applied to each dimension of the input and the calculations
-are the same as though the input came in with the dimensions it has after
-padding.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:75%" src="https://www.tensorflow.org/images/ops_reduce_window_stride.png">
-</div>
-
-The evaluation order of the reduction function is arbitrary and may be
-non-deterministic. Therefore, the reduction function should not be overly
-sensitive to reassociation. See the discussion about associativity in the
-context of [`Reduce`](#reduce) for more details.
-
-## Reshape
-
-See also
-[`XlaBuilder::Reshape`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and the [`Collapse`](#collapse) operation.
-
-Reshapes the dimensions of an array into a new configuration.
-
-<b> `Reshape(operand, new_sizes)` </b>
-<b> `Reshape(operand, dimensions, new_sizes)` </b>
-
-Arguments    | Type           | Semantics
------------- | -------------- | ---------------------------------------
-`operand`    | `XlaOp`        | array of type T
-`dimensions` | `int64` vector | order in which dimensions are collapsed
-`new_sizes`  | `int64` vector | vector of sizes of new dimensions
-
-Conceptually, reshape first flattens an array into a one-dimensional vector of
-data values, and then refines this vector into a new shape. The input arguments
-are an arbitrary array of type T, a compile-time-constant vector of dimension
-indices, and a compile-time-constant vector of dimension sizes for the result.
-The values in the `dimension` vector, if given, must be a permutation of all of
-T's dimensions; the default if not given is `{0, ..., rank - 1}`. The order of
-the dimensions in `dimensions` is from slowest-varying dimension (most major) to
-fastest-varying dimension (most minor) in the loop nest which collapses the
-input array into a single dimension. The `new_sizes` vector determines the size
-of the output array. The value at index 0 in `new_sizes` is the size of
-dimension 0, the value at index 1 is the size of dimension 1, and so on. The
-product of the `new_size` dimensions must equal the product of the operand's
-dimension sizes. When refining the collapsed array into the multidimensional
-array defined by `new_sizes`, the dimensions in `new_sizes` are ordered from
-slowest varying (most major) and to fastest varying (most minor).
-
-For example, let v be an array of 24 elements:
-
-```
-let v = f32[4x2x3] {{{10, 11, 12}, {15, 16, 17}},
-                    {{20, 21, 22}, {25, 26, 27}},
-                    {{30, 31, 32}, {35, 36, 37}},
-                    {{40, 41, 42}, {45, 46, 47}}};
-
-In-order collapse:
-let v012_24 = Reshape(v, {0,1,2}, {24});
-then v012_24 == f32[24] {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
-                         30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47};
-
-let v012_83 = Reshape(v, {0,1,2}, {8,3});
-then v012_83 == f32[8x3] {{10, 11, 12}, {15, 16, 17},
-                          {20, 21, 22}, {25, 26, 27},
-                          {30, 31, 32}, {35, 36, 37},
-                          {40, 41, 42}, {45, 46, 47}};
-
-Out-of-order collapse:
-let v021_24 = Reshape(v, {1,2,0}, {24});
-then v012_24 == f32[24]  {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
-                          15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47};
-
-let v021_83 = Reshape(v, {1,2,0}, {8,3});
-then v021_83 == f32[8x3] {{10, 20, 30}, {40, 11, 21},
-                          {31, 41, 12}, {22, 32, 42},
-                          {15, 25, 35}, {45, 16, 26},
-                          {36, 46, 17}, {27, 37, 47}};
-
-
-let v021_262 = Reshape(v, {1,2,0}, {2,6,2});
-then v021_262 == f32[2x6x2] {{{10, 20}, {30, 40},
-                              {11, 21}, {31, 41},
-                              {12, 22}, {32, 42}},
-                             {{15, 25}, {35, 45},
-                              {16, 26}, {36, 46},
-                              {17, 27}, {37, 47}}};
-```
-
-As a special case, reshape can transform a single-element array to a scalar and
-vice versa. For example,
-
-```
-Reshape(f32[1x1] {{5}}, {0,1}, {}) == 5;
-Reshape(5, {}, {1,1}) == f32[1x1] {{5}};
-```
-
-## Rev (reverse)
-
-See also
-[`XlaBuilder::Rev`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b>`Rev(operand, dimensions)`</b>
-
-Arguments    | Type                | Semantics
------------- | ------------------- | ---------------------
-`operand`    | `XlaOp`             | array of type T
-`dimensions` | `ArraySlice<int64>` | dimensions to reverse
-
-Reverses the order of elements in the `operand` array along the specified
-`dimensions`, generating an output array of the same shape. Each element of the
-operand array at a multidimensional index is stored into the output array at a
-transformed index. The multidimensional index is transformed by reversing the
-index in each dimension to be reversed (i.e., if a dimension of size N is one of
-the reversing dimensions, its index i is transformed into N - 1 - i).
-
-One use for the `Rev` operation is to reverse the convolution weight array along
-the two window dimensions during the gradient computation in neural networks.
-
-## RngNormal
-
-See also
-[`XlaBuilder::RngNormal`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Constructs an output of a given shape with random numbers generated following
-the $$N(\mu, \sigma)$$ normal distribution. The parameters $$\mu$$ and
-$$\sigma$$, and output shape have to have a floating point elemental type. The
-parameters furthermore have to be scalar valued.
-
-<b>`RngNormal(mu, sigma, shape)`</b>
-
-| Arguments | Type    | Semantics                                           |
-| --------- | ------- | --------------------------------------------------- |
-| `mu`      | `XlaOp` | Scalar of type T specifying mean of generated       |
-:           :         : numbers                                   :
-| `sigma`   | `XlaOp` | Scalar of type T specifying standard deviation of   |
-:           :         : generated numbers                                   :
-| `shape`   | `Shape` | Output shape of type T                              |
-
-## RngUniform
-
-See also
-[`XlaBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Constructs an output of a given shape with random numbers generated following
-the uniform distribution over the interval $$[a,b)$$. The parameters and output
-element type have to be a boolean type, an integral type or a floating point
-types, and the types have to be consistent. The CPU and GPU backends currently
-only support F64, F32, F16, BF16, S64, U64, S32 and U32. Furthermore, the
-parameters need to be scalar valued. If $$b <= a$$ the result is
-implementation-defined.
-
-<b>`RngUniform(a, b, shape)`</b>
-
-| Arguments | Type                    | Semantics                         |
-| --------- | ----------------------- | --------------------------------- |
-| `a`       | `XlaOp`                 | Scalar of type T specifying lower |
-:           :                         : limit of interval                 :
-| `b`       | `XlaOp`                 | Scalar of type T specifying upper |
-:           :                         : limit of interval                 :
-| `shape`   | `Shape`                 | Output shape of type T            |
-
-## Scatter
-
-The XLA scatter operation generates a result which is the value of the input
-tensor `operand`, with several slices (at indices specified by
-`scatter_indices`) updated with the values in `updates` using
-`update_computation`.
-
-See also
-[`XlaBuilder::Scatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `scatter(operand, scatter_indices, updates, update_computation, index_vector_dim, update_window_dims, inserted_window_dims, scatter_dims_to_operand_dims)` </b>
-
-|Arguments         | Type                   | Semantics                        |
-|------------------|------------------------|----------------------------------|
-|`operand`         | `XlaOp`                | Tensor to be scattered into.     |
-|`scatter_indices` | `XlaOp`                | Tensor containing the starting   |
-:                  :                        : indices of the slices that must  :
-:                  :                        : be scattered to.                 :
-|`updates`         | `XlaOp`                | Tensor containing the values that|
-:                  :                        : must be used for scattering.     :
-|`update_computation`| `XlaComputation`     | Computation to be used for       |
-:                  :                        : combining the existing values in :
-:                  :                        : the input tensor and the updates :
-:                  :                        : during scatter. This computation :
-:                  :                        : should be of type `T, T -> T`.   :
-|`index_vector_dim`| `int64`                | The dimension in                 |
-:                  :                        : `scatter_indices` that contains  :
-:                  :                        : the starting indices.            :
-|`update_window_dims`| `ArraySlice<int64>`  | The set of dimensions in         |
-:                  :                        : `updates` shape that are _window :
-:                  :                        : dimensions_.                     :
-|`inserted_window_dims`| `ArraySlice<int64>`| The set of _window dimensions_   |
-:                  :                        : that must be inserted into       :
-:                  :                        : `updates` shape.                 :
-|`scatter_dims_to_operand_dims`| `ArraySlice<int64>`  | A dimensions map from  |
-:                  :                        : the scatter indices to the       :
-:                  :                        : operand index space. This array  :
-:                  :                        : is interpreted as mapping `i` to :
-:                  :                        : `scatter_dims_to_operand_dims[i]`:
-:                  :                        : . It has to be one-to-one and    :
-:                  :                        : total.                           :
-
-If `index_vector_dim` is equal to `scatter_indices.rank` we implicitly consider
-`scatter_indices` to have a trailing `1` dimension.
-
-We define `update_scatter_dims` of type `ArraySlice<int64>` as the set of
-dimensions in `updates` shape that are not in `update_window_dims`, in ascending
-order.
-
-The arguments of scatter should follow these constraints:
-
-  - `updates` tensor must be of rank `update_window_dims.size +
-  scatter_indices.rank - 1`.
-
-  - Bounds of dimension `i` in `updates` must conform to the following:
-      - If `i` is present in `update_window_dims` (i.e. equal to
-        `update_window_dims`[`k`] for some `k`), then the bound of dimension
-        `i` in `updates` must not exceed the corresponding bound of `operand`
-        after accounting for the `inserted_window_dims` (i.e.
-        `adjusted_window_bounds`[`k`], where `adjusted_window_bounds` contains
-        the bounds of `operand` with the bounds at indices
-        `inserted_window_dims` removed).
-      - If `i` is present in `update_scatter_dims` (i.e. equal to
-        `update_scatter_dims`[`k`] for some `k`), then the bound of dimension
-        `i` in `updates` must be equal to the corresponding bound of
-        `scatter_indices`, skipping `index_vector_dim` (i.e.
-        `scatter_indices.shape.dims`[`k`], if `k` < `index_vector_dim` and
-        `scatter_indices.shape.dims`[`k+1`] otherwise).
-
-  - `update_window_dims` must be in ascending order, not have any repeating
-    dimension numbers, and be in the range `[0, updates.rank)`.
-
-  - `inserted_window_dims` must be in ascending order, not have any
-    repeating dimension numbers, and be in the range `[0, operand.rank)`.
-
-  - `scatter_dims_to_operand_dims.size` must be equal to
-    `scatter_indices`[`index_vector_dim`], and its values must be in the range
-    `[0, operand.rank)`.
-
-For a given index `U` in the `updates` tensor, the corresponding index `I` in
-the `operand` tensor into which this update has to be applied is computed as
-follows:
-
-  1. Let `G` = { `U`[`k`] for `k` in `update_scatter_dims` }. Use `G` to look up
-     an index vector `S` in the `scatter_indices` tensor such that `S`[`i`] =
-     `scatter_indices`[Combine(`G`, `i`)] where Combine(A, b) inserts b at
-     positions `index_vector_dim` into A.
-  2. Create an index `S`<sub>`in`</sub> into `operand` using `S` by scattering
-     `S` using the `scatter_dims_to_operand_dims` map. More formally:
-       1. `S`<sub>`in`</sub>[`scatter_dims_to_operand_dims`[`k`]] = `S`[`k`] if
-          `k` < `scatter_dims_to_operand_dims.size`.
-       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
-  3. Create an index `W`<sub>`in`</sub> into `operand` by scattering the indices
-     at `update_window_dims` in `U` according to `inserted_window_dims`.
-     More formally:
-       1. `W`<sub>`in`</sub>[`window_dims_to_operand_dims`(`k`)] = `U`[`k`] if
-          `k` < `update_window_dims.size`, where `window_dims_to_operand_dims`
-          is the monotonic function with domain [`0`, `update_window_dims.size`)
-          and range [`0`, `operand.rank`) \\ `inserted_window_dims`. (For
-          example, if `update_window_dims.size` is `4`, `operand.rank` is `6`,
-          and `inserted_window_dims` is {`0`, `2`} then
-          `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`,
-          `3`→`5`}).
-       2. `W`<sub>`in`</sub>[`_`] = `0` otherwise.
-  4. `I` is `W`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
-     addition.
-
-In summary, the scatter operation can be defined as follows.
-
-   - Initialize `output` with `operand`, i.e. for all indices `O` in the
-     `operand` tensor:\
-       `output`[`O`] = `operand`[`O`]
-   - For every index `U` in the `updates` tensor and the corresponding index `O`
-     in the `operand` tensor:\
-       `output`[`O`] = `update_computation`(`output`[`O`], `updates`[`U`])
-
-The order in which updates are applied is non-deterministic. So, when multiple
-indices in `updates` refer to the same index in `operand`, the corresponding
-value in `output` will be non-deterministic.
-
-Note that the first parameter that is passed into the `update_computation` will
-always be the current value from the `output` tensor and the second parameter
-will always be the value from the `updates` tensor. This is important
-specifically for cases when the `update_computation` is _not commutative_.
-
-Informally, the scatter op can be viewed as an _inverse_ of the gather op, i.e.
-the scatter op updates the elements in the input that are extracted by the
-corresponding gather op.
-
-For a detailed informal description and examples, refer to the
-"Informal Description" section under `Gather`.
-
-## Select
-
-See also
-[`XlaBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Constructs an output array from elements of two input arrays, based on the
-values of a predicate array.
-
-<b> `Select(pred, on_true, on_false)` </b>
-
-Arguments  | Type    | Semantics
----------- | ------- | ------------------
-`pred`     | `XlaOp` | array of type PRED
-`on_true`  | `XlaOp` | array of type T
-`on_false` | `XlaOp` | array of type T
-
-The arrays `on_true` and `on_false` must have the same shape. This is also the
-shape of the output array. The array `pred` must have the same dimensionality as
-`on_true` and `on_false`, with the `PRED` element type.
-
-For each element `P` of `pred`, the corresponding element of the output array is
-taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
-value of `P` is `false`. As a restricted form of [broadcasting]
-(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
-output array is taken wholly from `on_true` if `pred` is `true`, and from
-`on_false` if `pred` is `false`.
-
-Example with non-scalar `pred`:
-
-```
-let pred: PRED[4] = {true, false, false, true};
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
-```
-
-Example with scalar `pred`:
-
-```
-let pred: PRED = true;
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
-```
-
-Selections between tuples are supported. Tuples are considered to be scalar
-types for this purpose. If `on_true` and `on_false` are tuples (which must have
-the same shape!) then `pred` has to be a scalar of type `PRED`.
-
-## SelectAndScatter
-
-See also
-[`XlaBuilder::SelectAndScatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-This operation can be considered as a composite operation that first computes
-`ReduceWindow` on the `operand` array to select an element from each window, and
-then scatters the `source` array to the indices of the selected elements to
-construct an output array with the same shape as the operand array. The binary
-`select` function is used to select an element from each window by applying it
-across each window, and it is called with the property that the first
-parameter's index vector is lexicographically less than the second parameter's
-index vector. The `select` function returns `true` if the first parameter is
-selected and returns `false` if the second parameter is selected, and the
-function must hold transitivity (i.e., if `select(a, b)` and `select(b, c)` are
-`true`, then `select(a, c)` is also `true`) so that the selected element does
-not depend on the order of the elements traversed for a given window.
-
-The function `scatter` is applied at each selected index in the output array. It
-takes two scalar parameters:
-
-1.  Current value at the selected index in the output array
-2.  The scatter value from `source` that applies to the selected index
-
-It combines the two parameters and returns a scalar value that's used to update
-the value at the selected index in the output array. Initially, all indices of
-the output array are set to `init_value`.
-
-The output array has the same shape as the `operand` array and the `source`
-array must have the same shape as the result of applying a `ReduceWindow`
-operation on the `operand` array. `SelectAndScatter` can be used to
-backpropagate the gradient values for a pooling layer in a neural network.
-
-<b>`SelectAndScatter(operand, select, window_dimensions, window_strides,
-padding, source, init_value, scatter)`</b>
-
-| Arguments           | Type                | Semantics                        |
-| ------------------- | ------------------- | -------------------------------- |
-| `operand`           | `XlaOp`             | array of type T over which the   |
-:                     :                     : windows slide                    :
-| `select`            | `XlaComputation`    | binary computation of type `T, T |
-:                     :                     : -> PRED`, to apply to all        :
-:                     :                     : elements in each window; returns :
-:                     :                     : `true` if the first parameter is :
-:                     :                     : selected and returns `false` if  :
-:                     :                     : the second parameter is selected :
-| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : dimension values                 :
-| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : stride values                    :
-| `padding`           | `Padding`           | padding type for window          |
-:                     :                     : (Padding\:\:kSame or             :
-:                     :                     : Padding\:\:kValid)               :
-| `source`            | `XlaOp`             | array of type T with the values  |
-:                     :                     : to scatter                       :
-| `init_value`        | `XlaOp`             | scalar value of type T for the   |
-:                     :                     : initial value of the output      :
-:                     :                     : array                            :
-| `scatter`           | `XlaComputation`    | binary computation of type `T, T |
-:                     :                     : -> T`, to apply each scatter     :
-:                     :                     : source element with its          :
-:                     :                     : destination element              :
-
-The figure below shows examples of using `SelectAndScatter`, with the `select`
-function computing the maximal value among its parameters. Note that when the
-windows overlap, as in the figure (2) below, an index of the `operand` array may
-be selected multiple times by different windows. In the figure, the element of
-value 9 is selected by both of the top windows (blue and red) and the binary
-addition `scatter` function produces the output element of value 8 (2 + 6).
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%"
-    src="https://www.tensorflow.org/images/ops_scatter_to_selected_window_element.png">
-</div>
-
-The evaluation order of the `scatter` function is arbitrary and may be
-non-deterministic. Therefore, the `scatter` function should not be overly
-sensitive to reassociation. See the discussion about associativity in the
-context of [`Reduce`](#reduce) for more details.
-
-## Send
-
-See also
-[`XlaBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Send(operand, channel_handle)` </b>
-
-Arguments        | Type            | Semantics
----------------- | --------------- | -----------------------------------------
-`operand`        | `XlaOp`         | data to send (array of type T)
-`channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair
-
-Sends the given operand data to a `Recv` instruction in another computation
-that shares the same channel handle. Does not return any data.
-
-Similar to the `Recv` operation, the client API of `Send` operation represents
-synchronous communication, and is internally decomposed into 2 HLO instructions
-(`Send` and `SendDone`) to enable asynchronous data transfers. See also
-[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
-
-<b>`Send(HloInstruction operand, int64 channel_id)`</b>
-
-Initiates an asynchronous transfer of the operand to the resources allocated by
-the `Recv` instruction with the same channel id. Returns a context, which is
-used by a following `SendDone` instruction to wait for the completion of the
-data transfer. The context is a tuple of {operand (shape), request identifier
-(U32)} and it can only be used by a `SendDone` instruction.
-
-<b> `SendDone(HloInstruction context)` </b>
-
-Given a context created by a `Send` instruction, waits for the data transfer to
-complete.  The instruction does not return any data.
-
-<b> Scheduling of channel instructions </b>
-
-The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
-`Send`, `SendDone`) is as below.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:70%" src="../../images/send_recv_order.png">
-</div>
-
-* `Recv` happens before `Send`
-* `Send` happens before `RecvDone`
-* `Recv` happens before `RecvDone`
-* `Send` happens before `SendDone`
-
-When the backend compilers generate a linear schedule for each computation that
-communicates via channel instructions, there must not be cycles across the
-computations. For example, below schedules lead to deadlocks.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/send_recv_schedule.png">
-</div>
-
-## Slice
-
-See also
-[`XlaBuilder::Slice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Slicing extracts a sub-array from the input array. The sub-array is of the same
-rank as the input and contains the values inside a bounding box within the input
-array where the dimensions and indices of the bounding box are given as
-arguments to the slice operation.
-
-<b> `Slice(operand, start_indices, limit_indices)` </b>
-
-| Arguments       | Type                | Semantics                            |
-| --------------- | ------------------- | ------------------------------------ |
-| `operand`       | `XlaOp`             | N dimensional array of type T        |
-| `start_indices` | `ArraySlice<int64>` | List of N integers containing the    |
-:                 :                     : starting indices of the slice for    :
-:                 :                     : each dimension. Values must be       :
-:                 :                     : greater than or equal to zero.       :
-| `limit_indices` | `ArraySlice<int64>` | List of N integers containing the    |
-:                 :                     : ending indices (exclusive) for the   :
-:                 :                     : slice for each dimension. Each value :
-:                 :                     : must be greater than or equal to the :
-:                 :                     : respective `start_indices` value for :
-:                 :                     : the dimension and less than or equal :
-:                 :                     : to the size of the dimension.        :
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-Slice(a, {2}, {4}) produces:
-  {2.0, 3.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-
-Slice(b, {2, 1}, {4, 3}) produces:
-  { { 7.0,  8.0},
-    {10.0, 11.0} }
-```
-
-## Sort
-
-See also
-[`XlaBuilder::Sort`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-There are two versions of the Sort instruction: a single-operand and a
-two-operand version.
-
-<b>`Sort(operand)`</b>
-
-Arguments   | Type    | Semantics
------------ | ------- | --------------------
-`operand`   | `XlaOp` | The operand to sort.
-`dimension` | `int64` | The dimension along which to sort.
-
-Sorts the elements in the operand in ascending order along the provided
-dimension. For example, for a rank-2 (matrix) operand, a `dimension` value of 0
-will sort each column independently, and a `dimension` value of 1 will sort each
-row independently. If the operand's elements have floating point type, and the
-operand contains NaN elements, the order of elements in the output is
-implementation-defined.
-
-<b>`Sort(key, value)`</b>
-
-Sorts both the key and the value operands. The keys are sorted as in the
-single-operand version. The values are sorted according to the order of their
-corresponding keys. For example, if the inputs are `keys = [3, 1]` and
-`values = [42, 50]`, then the output of the sort is the tuple 
-`{[1, 3], [50, 42]}`.
-
-The sort is not guaranteed to be stable, that is, if the keys array contains
-duplicates, the order of their corresponding values may not be preserved.
-
-Arguments   | Type    | Semantics
------------ | ------- | -------------------
-`keys`      | `XlaOp` | The sort keys.
-`values`    | `XlaOp` | The values to sort.
-`dimension` | `int64` | The dimension along which to sort.
-
-The `keys` and `values` must have the same dimensions, but may have different
-element types.
-
-## Transpose
-
-See also the `tf.reshape` operation.
-
-<b>`Transpose(operand)`</b>
-
-Arguments     | Type                | Semantics
-------------- | ------------------- | ------------------------------
-`operand`     | `XlaOp`             | The operand to transpose.
-`permutation` | `ArraySlice<int64>` | How to permute the dimensions.
-
-
-Permutes the operand dimensions with the given permutation, so
-`∀ i . 0 ≤ i < rank ⇒ input_dimensions[permutation[i]] = output_dimensions[i]`.
-
-This is the same as Reshape(operand, permutation,
-                            Permute(permutation, operand.shape.dimensions)).
-
-## Tuple
-
-See also
-[`XlaBuilder::Tuple`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-A tuple containing a variable number of data handles, each of which has its own
-shape.
-
-This is analogous to `std::tuple` in C++. Conceptually:
-
-```
-let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-let s: s32 = 5;
-let t: (f32[10], s32) = tuple(v, s);
-```
-
-Tuples can be deconstructed (accessed) via the [`GetTupleElement`]
-(#gettupleelement) operation.
-
-## While
-
-See also
-[`XlaBuilder::While`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `While(condition, body, init)` </b>
-
-| Arguments   | Type             | Semantics                                |
-| ----------- | ---------------- | ---------------------------------------- |
-| `condition` | `XlaComputation` | XlaComputation of type `T -> PRED` which |
-:             :                  : defines the termination condition of the :
-:             :                  : loop.                                    :
-| `body`      | `XlaComputation` | XlaComputation of type `T -> T` which    |
-:             :                  : defines the body of the loop.            :
-| `init`      | `T`              | Initial value for the parameter of       |
-:             :                  : `condition` and `body`.                  :
-
-Sequentially executes the `body` until the `condition` fails. This is similar to
-a typical while loop in many other languages except for the differences and
-restrictions listed below.
-
-*   A `While` node returns a value of type `T`, which is the result from the
-    last execution of the `body`.
-*   The shape of the type `T` is statically determined and must be the same
-    across all iterations.
-
-The T parameters of the computations are initialized with the `init` value in
-the first iteration and are automatically updated to the new result from `body`
-in each subsequent iteration.
-
-One main use case of the `While` node is to implement the repeated execution of
-training in neural networks. Simplified pseudocode is shown below with a graph
-that represents the computation. The code can be found in
-[`while_test.cc`](https://www.tensorflow.org/code/tensorflow/compiler/xla/tests/while_test.cc).
-The type `T` in this example is a `Tuple` consisting of an `int32` for the
-iteration count and a `vector[10]` for the accumulator. For 1000 iterations, the
-loop keeps adding a constant vector to the accumulator.
-
-```
-// Pseudocode for the computation.
-init = {0, zero_vector[10]} // Tuple of int32 and float[10].
-result = init;
-while (result(0) < 1000) {
-  iteration = result(0) + 1;
-  new_vector = result(1) + constant_vector[10];
-  result = {iteration, new_vector};
-}
-```
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="https://www.tensorflow.org/images/ops_while.png">
-</div>
-- 
GitLab


From 57d31aa599c83014397a22bbb8f1a27a33b0ade3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 22:30:20 -0700
Subject: [PATCH 1175/1357] Remove dependency on epsilon for diagonal shampoo.

PiperOrigin-RevId: 215857772
---
 .../contrib/opt/python/training/shampoo.py       | 16 +++++++++++-----
 .../contrib/opt/python/training/shampoo_test.py  |  8 ++++----
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/shampoo.py b/tensorflow/contrib/opt/python/training/shampoo.py
index f161521b97..e542f46892 100644
--- a/tensorflow/contrib/opt/python/training/shampoo.py
+++ b/tensorflow/contrib/opt/python/training/shampoo.py
@@ -108,7 +108,8 @@ class ShampooOptimizer(optimizer.Optimizer):
       precond_update_interval: We should update the preconditioners after
                                this many steps. Default = 1. Usually less than
                                svd_interval.
-      epsilon:  epsilon * I_n is added to each mat_gbar_j for stability
+      epsilon:  epsilon * I_n is added to each mat_gbar_j for stability for
+                non-diagonal version of shampoo.
       alpha:  total power of the preconditioners.
       use_iterative_root: should the optimizer use SVD (faster) or the
                           iterative root method (for TPU) for finding the
@@ -394,15 +395,20 @@ class ShampooOptimizer(optimizer.Optimizer):
           assert self._mat_gbar_decay == 1.0
           mat_g_updated = state_ops.scatter_add(mat_g, indices,
                                                 mat_gbar_weight_t * grad_outer)
-          mat_h = math_ops.pow(
-              array_ops.gather(mat_g_updated, indices) + self._epsilon,
-              neg_alpha)
+          mat_g_updated_slice = array_ops.gather(mat_g_updated, indices)
+          mat_h = array_ops.where(
+              math_ops.greater(mat_g_updated_slice, 0),
+              math_ops.pow(mat_g_updated_slice, neg_alpha),
+              array_ops.zeros_like(mat_g_updated_slice))
         else:
           mat_g_updated = self._weighted_average(mat_g,
                                                  self._mat_gbar_decay,
                                                  mat_gbar_decay_t,
                                                  mat_gbar_weight_t * grad_outer)
-          mat_h = math_ops.pow(mat_g_updated + self._epsilon, neg_alpha)
+          mat_h = array_ops.where(
+              math_ops.greater(mat_g_updated, 0),
+              math_ops.pow(mat_g_updated, neg_alpha),
+              array_ops.zeros_like(mat_g_updated))
 
         # Need to do the transpose to ensure that the tensor becomes
         # a d_{i+1} x ... x d_n x d_0 x ... d_i tensor as described above.
diff --git a/tensorflow/contrib/opt/python/training/shampoo_test.py b/tensorflow/contrib/opt/python/training/shampoo_test.py
index a2fd8fbd87..e88c8221a0 100644
--- a/tensorflow/contrib/opt/python/training/shampoo_test.py
+++ b/tensorflow/contrib/opt/python/training/shampoo_test.py
@@ -279,7 +279,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
       # Update rule is var = var - lr * gg^{-0.5} * grad
       # lr = 1
       mat_g = (grad_np * grad_np)
-      new_val_np = init_var_np - np.power(mat_g + RIDGE_EPSILON, -0.5) * grad_np
+      new_val_np = init_var_np - np.power(mat_g, -0.5) * grad_np
 
       self.assertAllCloseAccordingToType(
           new_val_np, new_val, atol=TOLERANCE, rtol=TOLERANCE)
@@ -288,7 +288,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
       new_val = sess.run(var)
 
       mat_g += (grad_np_2 * grad_np_2)
-      new_val_np -= np.power(mat_g + RIDGE_EPSILON, -0.5) * grad_np_2
+      new_val_np -= np.power(mat_g, -0.5) * grad_np_2
 
       self.assertAllCloseAccordingToType(
           new_val_np, new_val, atol=TOLERANCE, rtol=TOLERANCE)
@@ -339,7 +339,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
 
       mat_g1 = np.sum(
           grad_np * grad_np, axis=1, keepdims=True) / grad_np.shape[0]
-      mat_left = np.power(mat_g1 + RIDGE_EPSILON, -0.25)
+      mat_left = np.power(mat_g1, -0.25)
       mat_g2 = np.dot(grad_np.transpose(), grad_np) / grad_np.shape[1]
       mat_right = np_power(mat_g2 + RIDGE_EPSILON * np.eye(size[1]), -0.25)
       new_val_np = init_var_np - np.dot(grad_np * mat_left, mat_right)
@@ -353,7 +353,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
 
       mat_g1 += np.sum(
           grad_np_2 * grad_np_2, axis=1, keepdims=True) / grad_np_2.shape[0]
-      mat_left = np.power(mat_g1 + RIDGE_EPSILON, -0.25)
+      mat_left = np.power(mat_g1, -0.25)
       mat_g2 += np.dot(grad_np_2.transpose(), grad_np_2) / grad_np_2.shape[1]
       mat_right = np_power(mat_g2 + RIDGE_EPSILON * np.eye(size[1]), -0.25)
       new_val_np -= np.dot(grad_np_2 * mat_left, mat_right)
-- 
GitLab


From 3b94d75a9e10ef8ef33760d0ef6aad326e1353ba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 01:22:02 -0700
Subject: [PATCH 1176/1357] Merge the different LSTM EvalFloat/EvalHybrid calls
 into a single file.

PiperOrigin-RevId: 215870962
---
 tensorflow/contrib/lite/kernels/BUILD         |  13 +-
 .../kernels/bidirectional_sequence_lstm.cc    | 333 +------
 .../lite/kernels/internal/kernel_utils.cc     | 598 ------------
 .../lite/kernels/internal/kernel_utils.h      | 184 ----
 tensorflow/contrib/lite/kernels/lstm.cc       | 300 +-----
 tensorflow/contrib/lite/kernels/lstm_eval.cc  | 909 ++++++++++++++++++
 tensorflow/contrib/lite/kernels/lstm_eval.h   |  79 ++
 .../kernels/unidirectional_sequence_lstm.cc   | 310 +-----
 8 files changed, 1061 insertions(+), 1665 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/lstm_eval.cc
 create mode 100644 tensorflow/contrib/lite/kernels/lstm_eval.h

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 95e387814d..68636fb070 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -234,11 +234,11 @@ cc_library(
         ":activation_functor",
         ":eigen_support",
         ":kernel_util",
+        ":lstm_eval",
         ":op_macros",
         ":padding",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite:string_util",
-        "//tensorflow/contrib/lite:util",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite/kernels:gemm_support",
         "//tensorflow/contrib/lite/kernels/internal:audio_utils",
@@ -254,6 +254,17 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "lstm_eval",
+    srcs = ["lstm_eval.cc"],
+    hdrs = ["lstm_eval.h"],
+    deps = [
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/kernels/internal:kernel_utils",
+        "//tensorflow/contrib/lite/kernels/internal:tensor_utils",
+    ],
+)
+
 cc_library(
     name = "builtin_ops",
     srcs = ["register.cc"],
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 0532528f52..a326827b1e 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
 #include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
@@ -694,330 +695,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-TfLiteStatus EvalFloat(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
-    const TfLiteTensor* aux_input_to_input_weights,
-    const TfLiteTensor* aux_input_to_forget_weights,
-    const TfLiteTensor* aux_input_to_cell_weights,
-    const TfLiteTensor* aux_input_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
-
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existense of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  // Index the scratch buffers pointers to the global scratch buffer.
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  const float* input_to_input_weights_ptr =
-      (use_cifg) ? nullptr : input_to_input_weights->data.f;
-  const float* recurrent_to_input_weights_ptr =
-      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
-  const float* input_gate_bias_ptr =
-      (use_cifg) ? nullptr : input_gate_bias->data.f;
-  const float* cell_to_input_weights_ptr =
-      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
-  const float* cell_to_forget_weights_ptr =
-      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
-  const float* cell_to_output_weights_ptr =
-      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
-  const float* projection_weights_ptr =
-      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  float* aux_input_ptr = nullptr;
-  float* aux_input_to_input_weights_ptr = nullptr;
-  float* aux_input_to_forget_weights_ptr = nullptr;
-  float* aux_input_to_cell_weights_ptr = nullptr;
-  float* aux_input_to_output_weights_ptr = nullptr;
-  if (aux_input_size > 0) {
-    aux_input_ptr = aux_input->data.f;
-    aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f;
-    aux_input_to_forget_weights_ptr = aux_input_to_forget_weights->data.f;
-    aux_input_to_cell_weights_ptr = aux_input_to_cell_weights->data.f;
-    aux_input_to_output_weights_ptr = aux_input_to_output_weights->data.f;
-  }
-
-  // Loop through the sequence.
-  const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output->dims->data[2];
-  for (int t = 0; t < max_time; t++) {
-    // If this is the forward_sequence, step forward, otherwise step backwards.
-    const int t_rel = forward_sequence ? t : max_time - t - 1;
-    const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr_time =
-        output->data.f + t_rel * output_step + output_offset;
-
-    kernel_utils::LstmStepWithAuxInput(
-        input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
-        input_to_cell_weights->data.f, input_to_output_weights->data.f,
-        aux_input_ptr, aux_input_to_input_weights_ptr,
-        aux_input_to_forget_weights_ptr, aux_input_to_cell_weights_ptr,
-        aux_input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
-        recurrent_to_forget_weights->data.f, recurrent_to_cell_weights->data.f,
-        recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
-        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-        input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
-        output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
-        params, n_batch, n_cell, n_input, aux_input_size, n_output,
-        activation_state->data.f, cell_state->data.f, input_gate_scratch,
-        forget_gate_scratch, cell_scratch, output_gate_scratch,
-        output_ptr_time);
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalHybrid(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
-    const TfLiteTensor* aux_input_to_input_weights,
-    const TfLiteTensor* aux_input_to_forget_weights,
-    const TfLiteTensor* aux_input_to_cell_weights,
-    const TfLiteTensor* aux_input_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
-    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
-    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
-    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
-    TfLiteTensor* output_state, TfLiteTensor* cell_state,
-    TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  int8_t* input_to_input_weights_ptr = nullptr;
-  float input_to_input_weights_scale = 1.0f;
-  int8_t* recurrent_to_input_weights_ptr = nullptr;
-  float recurrent_to_input_weights_scale = 1.0f;
-  float* input_gate_bias_ptr = nullptr;
-  if (!use_cifg) {
-    input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
-    recurrent_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
-    input_gate_bias_ptr = input_gate_bias->data.f;
-    input_to_input_weights_scale = input_to_input_weights->params.scale;
-    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
-  }
-
-  int8_t* cell_to_input_weights_ptr = nullptr;
-  int8_t* cell_to_forget_weights_ptr = nullptr;
-  int8_t* cell_to_output_weights_ptr = nullptr;
-  float cell_to_input_weights_scale = 1.0f;
-  float cell_to_forget_weights_scale = 1.0f;
-  float cell_to_output_weights_scale = 1.0f;
-  if (use_peephole) {
-    if (!use_cifg) {
-      cell_to_input_weights_ptr =
-          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
-      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
-    }
-    cell_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
-    cell_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
-    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
-    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
-  }
-
-  const int8_t* projection_weights_ptr =
-      (projection_weights == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
-  const float projection_weights_scale =
-      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const int8_t* input_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
-  const float input_to_forget_weights_scale =
-      input_to_forget_weights->params.scale;
-  const int8_t* input_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
-  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
-  const int8_t* input_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
-  const float input_to_output_weights_scale =
-      input_to_output_weights->params.scale;
-  const int8_t* recurrent_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
-  const float recurrent_to_forget_weights_scale =
-      recurrent_to_forget_weights->params.scale;
-  const int8_t* recurrent_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
-  const float recurrent_to_cell_weights_scale =
-      recurrent_to_cell_weights->params.scale;
-  const int8_t* recurrent_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
-  const float recurrent_to_output_weights_scale =
-      recurrent_to_output_weights->params.scale;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* output_state_ptr = output_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-
-  // Temporary storage for quantized values and scaling factors.
-  int8_t* quantized_input_ptr =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
-  int8_t* quantized_aux_input_ptr =
-      (aux_input_quantized == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(aux_input_quantized->data.uint8);
-  int8_t* quantized_output_state_ptr =
-      reinterpret_cast<int8_t*>(output_state_quantized->data.uint8);
-  int8_t* quantized_cell_state_ptr =
-      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
-  float* scaling_factors_ptr = scaling_factors->data.f;
-  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
-  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
-
-  // Auxiliary input and weights.
-  float* aux_input_ptr = nullptr;
-  int8_t* aux_input_to_input_weights_ptr = nullptr;
-  int8_t* aux_input_to_forget_weights_ptr = nullptr;
-  int8_t* aux_input_to_cell_weights_ptr = nullptr;
-  int8_t* aux_input_to_output_weights_ptr = nullptr;
-  float aux_input_to_input_weights_scale = 0.0f;
-  float aux_input_to_forget_weights_scale = 0.0f;
-  float aux_input_to_cell_weights_scale = 0.0f;
-  float aux_input_to_output_weights_scale = 0.0f;
-  if (aux_input_size > 0) {
-    aux_input_ptr = aux_input->data.f;
-    aux_input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_input_weights->data.uint8);
-    aux_input_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_forget_weights->data.uint8);
-    aux_input_to_cell_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_cell_weights->data.uint8);
-    aux_input_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_output_weights->data.uint8);
-    aux_input_to_input_weights_scale = aux_input_to_input_weights->params.scale;
-    aux_input_to_forget_weights_scale =
-        aux_input_to_forget_weights->params.scale;
-    aux_input_to_cell_weights_scale = aux_input_to_cell_weights->params.scale;
-    aux_input_to_output_weights_scale =
-        aux_input_to_output_weights->params.scale;
-  }
-
-  // Feed the sequence into the LSTM step-by-step.
-  const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output->dims->data[2];
-  for (int t = 0; t < max_time; t++) {
-    // If this is the forward_sequence, step forward, otherwise step backwards.
-    const int t_rel = forward_sequence ? t : max_time - t - 1;
-    const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr = output->data.f + t_rel * output_step + output_offset;
-
-    kernel_utils::LstmStepWithAuxInput(
-        input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
-        input_to_forget_weights_ptr, input_to_forget_weights_scale,
-        input_to_cell_weights_ptr, input_to_cell_weights_scale,
-        input_to_output_weights_ptr, input_to_output_weights_scale,
-        aux_input_ptr, aux_input_to_input_weights_ptr,
-        aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
-        aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
-        aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
-        aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
-        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
-        n_input, aux_input_size, n_output, input_gate_scratch,
-        forget_gate_scratch, cell_scratch, output_gate_scratch,
-        scaling_factors_ptr, prod_scaling_factors_ptr,
-        recovered_cell_weights_ptr, quantized_input_ptr,
-        quantized_aux_input_ptr, quantized_output_state_ptr,
-        quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr);
-  }
-
-  return kTfLiteOk;
-}
-
 // The LSTM Op engine.
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
@@ -1157,7 +834,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (fw_input_to_output_weights->type) {
     case kTfLiteFloat32: {
-      TfLiteStatus fw_pass_status = EvalFloat(
+      TfLiteStatus fw_pass_status = lstm_eval::EvalFloat(
           input, fw_input_to_input_weights, fw_input_to_forget_weights,
           fw_input_to_cell_weights, fw_input_to_output_weights,
           fw_recurrent_to_input_weights, fw_recurrent_to_forget_weights,
@@ -1172,7 +849,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
-      TfLiteStatus bw_pass_status = EvalFloat(
+      TfLiteStatus bw_pass_status = lstm_eval::EvalFloat(
           input, bw_input_to_input_weights, bw_input_to_forget_weights,
           bw_input_to_cell_weights, bw_input_to_output_weights,
           bw_recurrent_to_input_weights, bw_recurrent_to_forget_weights,
@@ -1208,7 +885,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       TfLiteTensor* recovered_cell_weights =
           GetTemporary(context, node, kRecoveredCellWeights);
 
-      TfLiteStatus fw_pass_status = EvalHybrid(
+      TfLiteStatus fw_pass_status = lstm_eval::EvalHybrid(
           input, fw_input_to_input_weights, fw_input_to_forget_weights,
           fw_input_to_cell_weights, fw_input_to_output_weights,
           fw_recurrent_to_input_weights, fw_recurrent_to_forget_weights,
@@ -1226,7 +903,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
-      TfLiteStatus bw_pass_status = EvalHybrid(
+      TfLiteStatus bw_pass_status = lstm_eval::EvalHybrid(
           input, bw_input_to_input_weights, bw_input_to_forget_weights,
           bw_input_to_cell_weights, bw_input_to_output_weights,
           bw_recurrent_to_input_weights, bw_recurrent_to_forget_weights,
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
index 56e9367878..083e5839bd 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
@@ -169,603 +169,5 @@ void RnnBatchStep(
                                         hidden_state_ptr_batch);
 }
 
-void LstmStep(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr,
-    float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch) {
-  LstmStepWithAuxInput(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_forget_weights_ptr,
-      input_to_cell_weights_ptr, input_to_output_weights_ptr,
-      /*aux_input_ptr_batch=*/nullptr,
-      /*aux_input_to_input_weights_ptr=*/nullptr,
-      /*aux_input_to_forget_weights_ptr=*/nullptr,
-      /*aux_input_to_cell_weights_ptr=*/nullptr,
-      /*aux_input_to_output_weights_ptr=*/nullptr,
-      recurrent_to_input_weights_ptr, recurrent_to_forget_weights_ptr,
-      recurrent_to_cell_weights_ptr, recurrent_to_output_weights_ptr,
-      cell_to_input_weights_ptr, cell_to_forget_weights_ptr,
-      cell_to_output_weights_ptr, input_gate_bias_ptr, forget_gate_bias_ptr,
-      cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-      projection_bias_ptr, params, n_batch, n_cell, n_input, /*n_aux_input=*/0,
-      n_output, output_state_ptr, cell_state_ptr, input_gate_scratch,
-      forget_gate_scratch, cell_scratch, output_gate_scratch, output_ptr_batch);
-}
-
-void LstmStepWithAuxInput(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch,
-    const float* aux_input_to_input_weights_ptr,
-    const float* aux_input_to_forget_weights_ptr,
-    const float* aux_input_to_cell_weights_ptr,
-    const float* aux_input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
-    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch) {
-  // Since we have already checked that weights are all there or none, we can
-  // check the existense of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
-  const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
-  // Initialize scratch buffers with bias.
-  if (!use_cifg) {
-    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
-                                          input_gate_scratch);
-  }
-  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
-                                        forget_gate_scratch);
-  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
-                                        cell_scratch);
-  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
-                                        output_gate_scratch);
-
-  // For each batch and cell: compute input_weight * input.
-  if (!use_cifg) {
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-        input_gate_scratch, /*result_stride=*/1);
-  }
-
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-      forget_gate_scratch, /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-      cell_scratch, /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-      output_gate_scratch, /*result_stride=*/1);
-
-  // If auxiliary input is available then compute aux_input_weight * aux_input
-  if (aux_input_ptr_batch != nullptr) {
-    if (!use_cifg) {
-      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-          aux_input_to_input_weights_ptr, n_cell, n_aux_input,
-          aux_input_ptr_batch, n_batch, input_gate_scratch,
-          /*result_stride=*/1);
-    }
-
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_to_forget_weights_ptr, n_cell, n_aux_input,
-        aux_input_ptr_batch, n_batch, forget_gate_scratch, /*result_stride=*/1);
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_to_cell_weights_ptr, n_cell, n_aux_input, aux_input_ptr_batch,
-        n_batch, cell_scratch, /*result_stride=*/1);
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_to_output_weights_ptr, n_cell, n_aux_input,
-        aux_input_ptr_batch, n_batch, output_gate_scratch, /*result_stride=*/1);
-  }
-
-  // For each batch and cell: compute recurrent_weight * output_state.
-  if (!use_cifg) {
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr,
-        n_batch, input_gate_scratch, /*result_stride=*/1);
-  }
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr,
-      n_batch, forget_gate_scratch,
-      /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr,
-      n_batch, cell_scratch, /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr,
-      n_batch, output_gate_scratch,
-      /*result_stride=*/1);
-
-  // For each batch and cell: update input gate.
-  if (!use_cifg) {
-    if (use_peephole) {
-      tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-          cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch,
-          input_gate_scratch);
-    }
-    tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
-                                       input_gate_scratch);
-  }
-
-  // For each batch and cell: update forget gate.
-  if (use_peephole) {
-    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-        cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch,
-        forget_gate_scratch);
-  }
-  tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
-                                     forget_gate_scratch);
-
-  // For each batch and cell: update the cell.
-  tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
-                                         n_batch * n_cell, cell_state_ptr);
-  tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
-                                        params->activation, cell_scratch);
-  if (use_cifg) {
-    tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
-                             forget_gate_scratch);
-    tensor_utils::VectorVectorCwiseProductAccumulate(
-        cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr);
-  } else {
-    tensor_utils::VectorVectorCwiseProductAccumulate(
-        cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
-  }
-  if (params->cell_clip > 0.0) {
-    tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
-                             params->cell_clip, cell_state_ptr);
-  }
-
-  // For each batch and cell: update the output gate.
-  if (use_peephole) {
-    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-        cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch,
-        output_gate_scratch);
-  }
-  tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
-                                     output_gate_scratch);
-  tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
-                                        params->activation, cell_scratch);
-  tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
-                                         n_batch * n_cell, output_gate_scratch);
-
-  // For each batch: update the projection and output_state.
-  const bool use_projection_weight = (projection_weights_ptr != nullptr);
-  const bool use_projection_bias = (projection_bias_ptr != nullptr);
-  if (use_projection_weight) {
-    if (use_projection_bias) {
-      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
-                                            n_batch, output_ptr_batch);
-    } else {
-      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
-    }
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch,
-        output_ptr_batch, /*result_stride=*/1);
-    if (params->proj_clip > 0.0) {
-      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
-                               params->proj_clip, output_ptr_batch);
-    }
-  } else {
-    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
-                             output_ptr_batch);
-  }
-  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
-                           output_state_ptr);
-}
-
-void LstmStep(
-    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-    float input_to_input_weights_scale,
-    const int8_t* input_to_forget_weights_ptr,
-    float input_to_forget_weights_scale,
-    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
-    const int8_t* input_to_output_weights_ptr,
-    float input_to_output_weights_scale,
-    const int8_t* recurrent_to_input_weights_ptr,
-    float recurrent_to_input_weights_scale,
-    const int8_t* recurrent_to_forget_weights_ptr,
-    float recurrent_to_forget_weights_scale,
-    const int8_t* recurrent_to_cell_weights_ptr,
-    float recurrent_to_cell_weights_scale,
-    const int8_t* recurrent_to_output_weights_ptr,
-    float recurrent_to_output_weights_scale,
-    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
-    const int8_t* cell_to_forget_weights_ptr,
-    float cell_to_forget_weights_scale,
-    const int8_t* cell_to_output_weights_ptr,
-    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-    float projection_weights_scale, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_output, float* input_gate_scratch, float* forget_gate_scratch,
-    float* cell_scratch, float* output_gate_scratch, float* scaling_factors,
-    float* product_scaling_factors, float* recovered_cell_weights,
-    int8_t* quantized_input_ptr_batch, int8_t* quantized_output_state_ptr,
-    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
-    float* cell_state_ptr, float* output_ptr_batch) {
-  LstmStepWithAuxInput(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_input_weights_scale,
-      input_to_forget_weights_ptr, input_to_forget_weights_scale,
-      input_to_cell_weights_ptr, input_to_cell_weights_scale,
-      input_to_output_weights_ptr, input_to_output_weights_scale,
-      /*aux_input_ptr_batch=*/nullptr,
-      /*aux_input_to_input_weights_ptr=*/nullptr,
-      /*aux_input_to_input_weights_scale=*/0.0f,
-      /*aux_input_to_forget_weights_ptr=*/nullptr,
-      /*aux_input_to_forget_weights_scale=*/0.0f,
-      /*aux_input_to_cell_weights_ptr=*/nullptr,
-      /*aux_input_to_cell_weights_scale=*/0.0f,
-      /*aux_input_to_output_weights_ptr=*/nullptr,
-      /*aux_input_to_output_weights_scale=*/0.0f,
-      recurrent_to_input_weights_ptr, recurrent_to_input_weights_scale,
-      recurrent_to_forget_weights_ptr, recurrent_to_forget_weights_scale,
-      recurrent_to_cell_weights_ptr, recurrent_to_cell_weights_scale,
-      recurrent_to_output_weights_ptr, recurrent_to_output_weights_scale,
-      cell_to_input_weights_ptr, cell_to_input_weights_scale,
-      cell_to_forget_weights_ptr, cell_to_forget_weights_scale,
-      cell_to_output_weights_ptr, cell_to_output_weights_scale,
-      input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
-      output_gate_bias_ptr, projection_weights_ptr, projection_weights_scale,
-      projection_bias_ptr, params, n_batch, n_cell, n_input,
-      /*n_aux_input=*/0, n_output, input_gate_scratch, forget_gate_scratch,
-      cell_scratch, output_gate_scratch, scaling_factors,
-      product_scaling_factors, recovered_cell_weights,
-      quantized_input_ptr_batch,
-      /*quantized_aux_input_ptr_batch=*/nullptr, quantized_output_state_ptr,
-      quantized_cell_state_ptr, output_state_ptr, cell_state_ptr,
-      output_ptr_batch);
-    }
-
-    void LstmStepWithAuxInput(
-        const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-        float input_to_input_weights_scale,
-        const int8_t* input_to_forget_weights_ptr,
-        float input_to_forget_weights_scale,
-        const int8_t* input_to_cell_weights_ptr,
-        float input_to_cell_weights_scale,
-        const int8_t* input_to_output_weights_ptr,
-        float input_to_output_weights_scale, const float* aux_input_ptr_batch,
-        const int8_t* aux_input_to_input_weights_ptr,
-        float aux_input_to_input_weights_scale,
-        const int8_t* aux_input_to_forget_weights_ptr,
-        float aux_input_to_forget_weights_scale,
-        const int8_t* aux_input_to_cell_weights_ptr,
-        float aux_input_to_cell_weights_scale,
-        const int8_t* aux_input_to_output_weights_ptr,
-        float aux_input_to_output_weights_scale,
-        const int8_t* recurrent_to_input_weights_ptr,
-        float recurrent_to_input_weights_scale,
-        const int8_t* recurrent_to_forget_weights_ptr,
-        float recurrent_to_forget_weights_scale,
-        const int8_t* recurrent_to_cell_weights_ptr,
-        float recurrent_to_cell_weights_scale,
-        const int8_t* recurrent_to_output_weights_ptr,
-        float recurrent_to_output_weights_scale,
-        const int8_t* cell_to_input_weights_ptr,
-        float cell_to_input_weights_scale,
-        const int8_t* cell_to_forget_weights_ptr,
-        float cell_to_forget_weights_scale,
-        const int8_t* cell_to_output_weights_ptr,
-        float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-        const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-        const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-        float projection_weights_scale, const float* projection_bias_ptr,
-        const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-        int n_aux_input, int n_output, float* input_gate_scratch,
-        float* forget_gate_scratch, float* cell_scratch,
-        float* output_gate_scratch, float* scaling_factors,
-        float* product_scaling_factors, float* recovered_cell_weights,
-        int8_t* quantized_input_ptr_batch,
-        int8_t* quantized_aux_input_ptr_batch,
-        int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr,
-        float* output_state_ptr, float* cell_state_ptr,
-        float* output_ptr_batch) {
-      // Since we have already checked that weights are all there or none, we
-      // can check the existense of only one to the get the condition.
-      const bool use_cifg = (input_to_input_weights_ptr == nullptr);
-      const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
-      // Initialize scratch buffers with bias.
-      if (!use_cifg) {
-        tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
-                                              n_batch, input_gate_scratch);
-      }
-      tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell,
-                                            n_batch, forget_gate_scratch);
-      tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
-                                            cell_scratch);
-      tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell,
-                                            n_batch, output_gate_scratch);
-
-      if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) {
-        // Save quantization and matmul computation for all zero input.
-        float unused_min, unused_max;
-        for (int b = 0; b < n_batch; ++b) {
-          const int offset = b * n_input;
-          tensor_utils::SymmetricQuantizeFloats(
-              input_ptr_batch + offset, n_input,
-              quantized_input_ptr_batch + offset, &unused_min, &unused_max,
-              &scaling_factors[b]);
-        }
-        // For each batch and cell: compute input_weight * input.
-        if (!use_cifg) {
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * input_to_input_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              input_to_input_weights_ptr, n_cell, n_input,
-              quantized_input_ptr_batch, product_scaling_factors, n_batch,
-              input_gate_scratch, /*result_stride=*/1);
-        }
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * input_to_forget_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            input_to_forget_weights_ptr, n_cell, n_input,
-            quantized_input_ptr_batch, product_scaling_factors, n_batch,
-            forget_gate_scratch,
-            /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * input_to_cell_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            input_to_cell_weights_ptr, n_cell, n_input,
-            quantized_input_ptr_batch, product_scaling_factors, n_batch,
-            cell_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * input_to_output_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            input_to_output_weights_ptr, n_cell, n_input,
-            quantized_input_ptr_batch, product_scaling_factors, n_batch,
-            output_gate_scratch,
-            /*result_stride=*/1);
-      }
-
-      if (aux_input_ptr_batch != nullptr &&
-          !tensor_utils::IsZeroVector(aux_input_ptr_batch, n_batch * n_input)) {
-        // Save quantization and matmul computation for all zero input.
-        float unused_min, unused_max;
-        for (int b = 0; b < n_batch; ++b) {
-          const int offset = b * n_input;
-          tensor_utils::SymmetricQuantizeFloats(
-              aux_input_ptr_batch + offset, n_input,
-              quantized_aux_input_ptr_batch + offset, &unused_min, &unused_max,
-              &scaling_factors[b]);
-        }
-        // For each batch and cell: compute input_weight * input.
-        if (!use_cifg) {
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * aux_input_to_input_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              aux_input_to_input_weights_ptr, n_cell, n_input,
-              quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-              input_gate_scratch, /*result_stride=*/1);
-        }
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * aux_input_to_forget_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            aux_input_to_forget_weights_ptr, n_cell, n_input,
-            quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-            forget_gate_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * aux_input_to_cell_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            aux_input_to_cell_weights_ptr, n_cell, n_input,
-            quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-            cell_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * aux_input_to_output_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            aux_input_to_output_weights_ptr, n_cell, n_input,
-            quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-            output_gate_scratch, /*result_stride=*/1);
-      }
-
-      if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
-        // Save quantization and matmul computation for all zero input.
-        float unused_min, unused_max;
-        for (int b = 0; b < n_batch; ++b) {
-          const int offset = b * n_output;
-          tensor_utils::SymmetricQuantizeFloats(
-              output_state_ptr + offset, n_output,
-              quantized_output_state_ptr + offset, &unused_min, &unused_max,
-              &scaling_factors[b]);
-        }
-        // For each batch and cell: compute recurrent_weight * output_state.
-        if (!use_cifg) {
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * recurrent_to_input_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              recurrent_to_input_weights_ptr, n_cell, n_output,
-              quantized_output_state_ptr, product_scaling_factors, n_batch,
-              input_gate_scratch, /*result_stride=*/1);
-        }
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * recurrent_to_forget_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            recurrent_to_forget_weights_ptr, n_cell, n_output,
-            quantized_output_state_ptr, product_scaling_factors, n_batch,
-            forget_gate_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * recurrent_to_cell_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            recurrent_to_cell_weights_ptr, n_cell, n_output,
-            quantized_output_state_ptr, product_scaling_factors, n_batch,
-            cell_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * recurrent_to_output_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            recurrent_to_output_weights_ptr, n_cell, n_output,
-            quantized_output_state_ptr, product_scaling_factors, n_batch,
-            output_gate_scratch, /*result_stride=*/1);
-      }
-
-      // Save quantization and matmul computation for all zero input.
-      bool is_cell_state_all_zeros =
-          tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
-
-      // For each batch and cell: update input gate.
-      if (!use_cifg) {
-        if (use_peephole && !is_cell_state_all_zeros) {
-          tensor_utils::VectorScalarMultiply(cell_to_input_weights_ptr, n_cell,
-                                             cell_to_input_weights_scale,
-                                             recovered_cell_weights);
-          tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-              recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
-              input_gate_scratch);
-        }
-        tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
-                                           input_gate_scratch);
-      }
-
-      // For each batch and cell: update forget gate.
-      if (use_peephole && !is_cell_state_all_zeros) {
-        tensor_utils::VectorScalarMultiply(cell_to_forget_weights_ptr, n_cell,
-                                           cell_to_forget_weights_scale,
-                                           recovered_cell_weights);
-        tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-            recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
-            forget_gate_scratch);
-      }
-      tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
-                                         forget_gate_scratch);
-
-      // For each batch and cell: update the cell.
-      tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch,
-                                             cell_state_ptr, n_batch * n_cell,
-                                             cell_state_ptr);
-      tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
-                                            params->activation, cell_scratch);
-      if (use_cifg) {
-        tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
-                                 forget_gate_scratch);
-        tensor_utils::VectorVectorCwiseProductAccumulate(
-            cell_scratch, forget_gate_scratch, n_batch * n_cell,
-            cell_state_ptr);
-      } else {
-        tensor_utils::VectorVectorCwiseProductAccumulate(
-            cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
-      }
-      if (params->cell_clip > 0.0) {
-        tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
-                                 params->cell_clip, cell_state_ptr);
-      }
-
-      is_cell_state_all_zeros =
-          tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
-      // For each batch and cell: update the output gate.
-      if (use_peephole && !is_cell_state_all_zeros) {
-        tensor_utils::VectorScalarMultiply(cell_to_output_weights_ptr, n_cell,
-                                           cell_to_output_weights_scale,
-                                           recovered_cell_weights);
-        tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-            recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
-            output_gate_scratch);
-      }
-      tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
-                                         output_gate_scratch);
-      tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
-                                            params->activation, cell_scratch);
-      tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
-                                             n_batch * n_cell,
-                                             output_gate_scratch);
-
-      // For each batch: update the projection and output_state.
-      const bool use_projection_weight = (projection_weights_ptr != nullptr);
-      const bool use_projection_bias = (projection_bias_ptr != nullptr);
-      if (use_projection_weight) {
-        if (use_projection_bias) {
-          tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
-                                                n_batch, output_ptr_batch);
-        } else {
-          tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
-        }
-        if (!tensor_utils::IsZeroVector(output_gate_scratch,
-                                        n_batch * n_cell)) {
-          // Save quantization and matmul computation for all zero input.
-          float unused_min, unused_max;
-          for (int b = 0; b < n_batch; ++b) {
-            const int offset = b * n_cell;
-            tensor_utils::SymmetricQuantizeFloats(
-                output_gate_scratch + offset, n_cell,
-                quantized_cell_state_ptr + offset, &unused_min, &unused_max,
-                &scaling_factors[b]);
-          }
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * projection_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              projection_weights_ptr, n_output, n_cell,
-              quantized_cell_state_ptr, product_scaling_factors, n_batch,
-              output_ptr_batch,
-              /*result_stride=*/1);
-        }
-        if (params->proj_clip > 0.0) {
-          tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
-                                   params->proj_clip, output_ptr_batch);
-        }
-      } else {
-        tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
-                                 output_ptr_batch);
-      }
-      tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
-                               output_state_ptr);
-    }
-
 }  // namespace kernel_utils
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
index b5558cce55..74e0a4a53d 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
@@ -76,190 +76,6 @@ void RnnBatchStep(
     int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors,
     float* hidden_state_ptr_batch, float* output_ptr_batch);
 
-// Performs an LSTM batch inference step for input specified by input_ptr_batch.
-// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
-// biases (*_bias_ptr), and buffers (*_scratch), along with additional
-// parameters:
-//  - params: various LSTM params including activation, clipping, etc.,
-//  - n_batch: size of batch,
-//  - n_cell: number of cells (or units),
-//  - n_input: the input size,
-//  - n_output: the output size.
-//
-// The pointers to the cell and output state and the output are updated.
-//
-// The pointers with the suffix "_batch" point to data aligned in batch_major
-// order, and each step processes batch_size many inputs from input_ptr_batch,
-// and updates batch_size many cell and output states.
-void LstmStep(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr,
-    float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch);
-
-// Same as above but includes an auxiliary input with the corresponding weights.
-void LstmStepWithAuxInput(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch,
-    const float* aux_input_to_input_weights_ptr,
-    const float* aux_input_to_forget_weights_ptr,
-    const float* aux_input_to_cell_weights_ptr,
-    const float* aux_input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
-    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch);
-
-// Same as above but with quantized weight matrices. In detail:
-// Input of size 'n_batch * n_input':
-//   input_ptr_batch
-//
-// LSTM weights:
-// Quantized input weights of size 'n_cell * n_input':
-//   input_to_input_weights            - optional (can be nullptr)
-//   input_to_forget_weights
-//   input_to_cell_weights
-//   input_to_input_weights
-// Quantized recurrent weights of size 'n_cell * n_output':
-//   recurrent_to_input_weights        - optional
-//   recurrent_to_forget_weights
-//   recurrent_to_cell_weights
-//   recurrent_to_input_weights
-// Quantized peephole weights of size 'n_cell', representing diagonal matrices.
-//   cell_to_input_weights             - optional
-//   cell_to_cell_weights              - optional
-//   cell_to_output_weights            - optional
-// Quantized projection weights of size 'n_output * n_cell'
-//   projection_weights_ptr            - optional
-// Weight scales (scalars) for each of the weights above.
-//   input_to_input_weights_scale      - optional
-//   input_to_forget_weights_scale
-//   input_to_cell_weights_scale
-//   input_to_output_weights_scale
-//   recurrent_to_input_weights_scale  - optional
-//   recurrent_to_forget_weights_scale
-//   recurrent_to_cell_weights_scale
-//   recurrent_to_output_weights_scale
-//   cell_to_input_weights_scale,
-//   cell_to_forget_weights_scale,
-//   cell_to_output_weights_scale,
-//   projection_weights_scale          - optional
-// Gate biases of size 'n_cell':
-//   input_gate_bias_ptr               - optional
-//   forget_gate_bias_ptr
-//   cell_gate_bias_ptr
-//   output_gate_bias_ptr
-//
-// Temporary pre-allocated storage for quantized values:
-//   quantized_input_ptr_batch (same size as input_ptr_batch)
-//   quantized_output_state_ptr (same size as output_state_ptr)
-//   quantized_cell_state_ptr (same size as cell_state_ptr)
-// Temporary pre-allocated storage for recovered values:
-//   recovered_cell_weights (same size as cell_to_*_weights)
-//
-// Outputs:
-//   output_state_ptr - size 'n_batch * n_output'
-//   cell_state_ptr   - size 'n_batch * n_cell'
-//   output_ptr_batch - size 'n_batch * n_output'
-void LstmStep(
-    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-    float input_to_input_weights_scale,
-    const int8_t* input_to_forget_weights_ptr,
-    float input_to_forget_weights_scale,
-    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
-    const int8_t* input_to_output_weights_ptr,
-    float input_to_output_weights_scale,
-    const int8_t* recurrent_to_input_weights_ptr,
-    float recurrent_to_input_weights_scale,
-    const int8_t* recurrent_to_forget_weights_ptr,
-    float recurrent_to_forget_weights_scale,
-    const int8_t* recurrent_to_cell_weights_ptr,
-    float recurrent_to_cell_weights_scale,
-    const int8_t* recurrent_to_output_weights_ptr,
-    float recurrent_to_output_weights_scale,
-    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
-    const int8_t* cell_to_forget_weights_ptr,
-    float cell_to_forget_weights_scale,
-    const int8_t* cell_to_output_weights_ptr,
-    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-    float projection_weights_scale, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_output, float* input_gate_scratch, float* forget_gate_scratch,
-    float* cell_scratch, float* output_gate_scratch, float* scaling_factors,
-    float* product_scaling_factors, float* recovered_cell_weights,
-    int8_t* quantized_input_ptr_batch, int8_t* quantized_output_state_ptr,
-    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
-    float* cell_state_ptr, float* output_ptr_batch);
-
-void LstmStepWithAuxInput(
-    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-    float input_to_input_weights_scale,
-    const int8_t* input_to_forget_weights_ptr,
-    float input_to_forget_weights_scale,
-    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
-    const int8_t* input_to_output_weights_ptr,
-    float input_to_output_weights_scale, const float* aux_input_ptr_batch,
-    const int8_t* aux_input_to_input_weights_ptr,
-    float aux_input_to_input_weights_scale,
-    const int8_t* aux_input_to_forget_weights_ptr,
-    float aux_input_to_forget_weights_scale,
-    const int8_t* aux_input_to_cell_weights_ptr,
-    float aux_input_to_cell_weights_scale,
-    const int8_t* aux_input_to_output_weights_ptr,
-    float aux_input_to_output_weights_scale,
-    const int8_t* recurrent_to_input_weights_ptr,
-    float recurrent_to_input_weights_scale,
-    const int8_t* recurrent_to_forget_weights_ptr,
-    float recurrent_to_forget_weights_scale,
-    const int8_t* recurrent_to_cell_weights_ptr,
-    float recurrent_to_cell_weights_scale,
-    const int8_t* recurrent_to_output_weights_ptr,
-    float recurrent_to_output_weights_scale,
-    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
-    const int8_t* cell_to_forget_weights_ptr,
-    float cell_to_forget_weights_scale,
-    const int8_t* cell_to_output_weights_ptr,
-    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-    float projection_weights_scale, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_aux_input, int n_output, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* scaling_factors, float* product_scaling_factors,
-    float* recovered_cell_weights, int8_t* quantized_input_ptr_batch,
-    int8_t* quantized_aux_input_ptr_batch, int8_t* quantized_output_state_ptr,
-    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
-    float* cell_state_ptr, float* output_ptr_batch);
-
 }  // namespace kernel_utils
 }  // namespace tflite
 #endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_KERNEL_UTILS_H_
diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc
index 5b996d00bc..16d67a1a93 100644
--- a/tensorflow/contrib/lite/kernels/lstm.cc
+++ b/tensorflow/contrib/lite/kernels/lstm.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/internal/tensor.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
 #include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
@@ -424,263 +425,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-// The LSTM Op engine.
-TfLiteStatus EvalFloat(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* activation_state, TfLiteTensor* cell_state,
-    TfLiteTensor* output) {
-  const int n_batch = input->dims->data[0];
-  const int n_input = input->dims->data[1];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  const float* input_to_input_weights_ptr =
-      (use_cifg) ? nullptr : input_to_input_weights->data.f;
-  const float* recurrent_to_input_weights_ptr =
-      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
-  const float* input_gate_bias_ptr =
-      (use_cifg) ? nullptr : input_gate_bias->data.f;
-  const float* cell_to_input_weights_ptr =
-      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
-  const float* cell_to_forget_weights_ptr =
-      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
-  const float* cell_to_output_weights_ptr =
-      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
-  const float* projection_weights_ptr =
-      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const float* input_ptr_batch = input->data.f;
-  const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f;
-  const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f;
-  const float* input_to_output_weights_ptr = input_to_output_weights->data.f;
-  const float* recurrent_to_forget_weights_ptr =
-      recurrent_to_forget_weights->data.f;
-  const float* recurrent_to_cell_weights_ptr =
-      recurrent_to_cell_weights->data.f;
-  const float* recurrent_to_output_weights_ptr =
-      recurrent_to_output_weights->data.f;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-  float* output_ptr_batch = output->data.f;
-
-  kernel_utils::LstmStep(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_forget_weights_ptr,
-      input_to_cell_weights_ptr, input_to_output_weights_ptr,
-      recurrent_to_input_weights_ptr, recurrent_to_forget_weights_ptr,
-      recurrent_to_cell_weights_ptr, recurrent_to_output_weights_ptr,
-      cell_to_input_weights_ptr, cell_to_forget_weights_ptr,
-      cell_to_output_weights_ptr, input_gate_bias_ptr, forget_gate_bias_ptr,
-      cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-      projection_bias_ptr, params, n_batch, n_cell, n_input, n_output,
-      activation_state_ptr, cell_state_ptr, input_gate_scratch,
-      forget_gate_scratch, cell_scratch, output_gate_scratch, output_ptr_batch);
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalHybrid(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* scaling_factors, TfLiteTensor* prod_scaling_factors,
-    TfLiteTensor* recovered_cell_weights, TfLiteTensor* input_quantized,
-    TfLiteTensor* activation_state_quantized,
-    TfLiteTensor* cell_state_quantized, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output) {
-  const int n_batch = input->dims->data[0];
-  const int n_input = input->dims->data[1];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  int8_t* input_to_input_weights_ptr = nullptr;
-  float input_to_input_weights_scale = 1.0f;
-  int8_t* recurrent_to_input_weights_ptr = nullptr;
-  float recurrent_to_input_weights_scale = 1.0f;
-  float* input_gate_bias_ptr = nullptr;
-  if (!use_cifg) {
-    input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
-    recurrent_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
-    input_gate_bias_ptr = input_gate_bias->data.f;
-    input_to_input_weights_scale = input_to_input_weights->params.scale;
-    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
-  }
-
-  int8_t* cell_to_input_weights_ptr = nullptr;
-  int8_t* cell_to_forget_weights_ptr = nullptr;
-  int8_t* cell_to_output_weights_ptr = nullptr;
-  float cell_to_input_weights_scale = 1.0f;
-  float cell_to_forget_weights_scale = 1.0f;
-  float cell_to_output_weights_scale = 1.0f;
-  if (use_peephole) {
-    if (!use_cifg) {
-      cell_to_input_weights_ptr =
-          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
-      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
-    }
-    cell_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
-    cell_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
-    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
-    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
-  }
-
-  const int8_t* projection_weights_ptr =
-      (projection_weights == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
-  const float projection_weights_scale =
-      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const float* input_ptr_batch = input->data.f;
-  const int8_t* input_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
-  const float input_to_forget_weights_scale =
-      input_to_forget_weights->params.scale;
-  const int8_t* input_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
-  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
-  const int8_t* input_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
-  const float input_to_output_weights_scale =
-      input_to_output_weights->params.scale;
-  const int8_t* recurrent_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
-  const float recurrent_to_forget_weights_scale =
-      recurrent_to_forget_weights->params.scale;
-  const int8_t* recurrent_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
-  const float recurrent_to_cell_weights_scale =
-      recurrent_to_cell_weights->params.scale;
-  const int8_t* recurrent_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
-  const float recurrent_to_output_weights_scale =
-      recurrent_to_output_weights->params.scale;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-  float* output_ptr_batch = output->data.f;
-
-  // Temporary storage for quantized values and scaling factors.
-  int8_t* quantized_input_ptr =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
-  int8_t* quantized_activation_state_ptr =
-      reinterpret_cast<int8_t*>(activation_state_quantized->data.uint8);
-  int8_t* quantized_cell_state_ptr =
-      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
-  float* scaling_factors_ptr = scaling_factors->data.f;
-  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
-  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
-
-  kernel_utils::LstmStep(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_input_weights_scale,
-      input_to_forget_weights_ptr, input_to_forget_weights_scale,
-      input_to_cell_weights_ptr, input_to_cell_weights_scale,
-      input_to_output_weights_ptr, input_to_output_weights_scale,
-      recurrent_to_input_weights_ptr, recurrent_to_input_weights_scale,
-      recurrent_to_forget_weights_ptr, recurrent_to_forget_weights_scale,
-      recurrent_to_cell_weights_ptr, recurrent_to_cell_weights_scale,
-      recurrent_to_output_weights_ptr, recurrent_to_output_weights_scale,
-      cell_to_input_weights_ptr, cell_to_input_weights_scale,
-      cell_to_forget_weights_ptr, cell_to_forget_weights_scale,
-      cell_to_output_weights_ptr, cell_to_output_weights_scale,
-      input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
-      output_gate_bias_ptr, projection_weights_ptr, projection_weights_scale,
-      projection_bias_ptr, params, n_batch, n_cell, n_input, n_output,
-      input_gate_scratch, forget_gate_scratch, cell_scratch,
-      output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
-      recovered_cell_weights_ptr, quantized_input_ptr,
-      quantized_activation_state_ptr, quantized_cell_state_ptr,
-      activation_state_ptr, cell_state_ptr, output_ptr_batch);
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
   OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
@@ -738,15 +482,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // TODO(mirkov): add a check that weights are all uint8s or all floats.
   switch (input_to_output_weights->type) {
     case kTfLiteFloat32: {
-      return EvalFloat(input, input_to_input_weights, input_to_forget_weights,
-                       input_to_cell_weights, input_to_output_weights,
-                       recurrent_to_input_weights, recurrent_to_forget_weights,
-                       recurrent_to_cell_weights, recurrent_to_output_weights,
-                       cell_to_input_weights, cell_to_forget_weights,
-                       cell_to_output_weights, input_gate_bias,
-                       forget_gate_bias, cell_bias, output_gate_bias,
-                       projection_weights, projection_bias, params,
-                       scratch_buffer, activation_state, cell_state, output);
+      return lstm_eval::EvalFloat(
+          input, input_to_input_weights, input_to_forget_weights,
+          input_to_cell_weights, input_to_output_weights,
+          recurrent_to_input_weights, recurrent_to_forget_weights,
+          recurrent_to_cell_weights, recurrent_to_output_weights,
+          cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
+          output);
     }
     case kTfLiteUInt8: {
       TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1);
@@ -759,17 +509,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           GetTemporary(context, node, /*index=*/5);
       TfLiteTensor* recovered_cell_weights =
           GetTemporary(context, node, /*index=*/6);
-      return EvalHybrid(
+      return lstm_eval::EvalHybrid(
           input, input_to_input_weights, input_to_forget_weights,
           input_to_cell_weights, input_to_output_weights,
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
-          input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias,
-          projection_weights, projection_bias, params, scratch_buffer,
-          scaling_factors, prod_scaling_factors, recovered_cell_weights,
-          input_quantized, activation_state_quantized, cell_state_quantized,
-          activation_state, cell_state, output);
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, scaling_factors,
+          prod_scaling_factors, recovered_cell_weights, input_quantized,
+          /*aux_input_quantized=*/nullptr, activation_state_quantized,
+          cell_state_quantized, activation_state, cell_state, output);
     }
     default:
       context->ReportError(context, "Type %d is not currently supported.",
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
new file mode 100644
index 0000000000..c6c21eb085
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -0,0 +1,909 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
+
+#include <stdint.h>
+
+#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace lstm_eval {
+
+namespace {
+
+// Performs an LSTM batch inference step for input specified by input_ptr_batch.
+// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
+// biases (*_bias_ptr), and buffers (*_scratch), along with additional
+// parameters:
+//  - params: various LSTM params including activation, clipping, etc.,
+//  - n_batch: size of batch,
+//  - n_cell: number of cells (or units),
+//  - n_input: the input size,
+//  - n_output: the output size.
+//
+// The pointers to the cell and output state and the output are updated.
+//
+// The pointers with the suffix "_batch" point to data aligned in batch_major
+// order, and each step processes batch_size many inputs from input_ptr_batch,
+// and updates batch_size many cell and output states.
+inline void LstmStepWithAuxInput(
+    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
+    const float* input_to_forget_weights_ptr,
+    const float* input_to_cell_weights_ptr,
+    const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch,
+    const float* aux_input_to_input_weights_ptr,
+    const float* aux_input_to_forget_weights_ptr,
+    const float* aux_input_to_cell_weights_ptr,
+    const float* aux_input_to_output_weights_ptr,
+    const float* recurrent_to_input_weights_ptr,
+    const float* recurrent_to_forget_weights_ptr,
+    const float* recurrent_to_cell_weights_ptr,
+    const float* recurrent_to_output_weights_ptr,
+    const float* cell_to_input_weights_ptr,
+    const float* cell_to_forget_weights_ptr,
+    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
+    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
+    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
+    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
+    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
+    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
+    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
+    float* output_ptr_batch) {
+  // Since we have already checked that weights are all there or none, we can
+  // check the existense of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+  const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
+  // Initialize scratch buffers with bias.
+  if (!use_cifg) {
+    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
+                                          input_gate_scratch);
+  }
+  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
+                                        forget_gate_scratch);
+  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
+                                        cell_scratch);
+  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
+                                        output_gate_scratch);
+
+  // For each batch and cell: compute input_weight * input.
+  if (!use_cifg) {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+        input_gate_scratch, /*result_stride=*/1);
+  }
+
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+      forget_gate_scratch, /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+      cell_scratch, /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+      output_gate_scratch, /*result_stride=*/1);
+
+  // If auxiliary input is available then compute aux_input_weight * aux_input
+  if (aux_input_ptr_batch != nullptr) {
+    if (!use_cifg) {
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          aux_input_to_input_weights_ptr, n_cell, n_aux_input,
+          aux_input_ptr_batch, n_batch, input_gate_scratch,
+          /*result_stride=*/1);
+    }
+
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_forget_weights_ptr, n_cell, n_aux_input,
+        aux_input_ptr_batch, n_batch, forget_gate_scratch, /*result_stride=*/1);
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_cell_weights_ptr, n_cell, n_aux_input, aux_input_ptr_batch,
+        n_batch, cell_scratch, /*result_stride=*/1);
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_output_weights_ptr, n_cell, n_aux_input,
+        aux_input_ptr_batch, n_batch, output_gate_scratch, /*result_stride=*/1);
+  }
+
+  // For each batch and cell: compute recurrent_weight * output_state.
+  if (!use_cifg) {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr,
+        n_batch, input_gate_scratch, /*result_stride=*/1);
+  }
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr,
+      n_batch, forget_gate_scratch,
+      /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr,
+      n_batch, cell_scratch, /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr,
+      n_batch, output_gate_scratch,
+      /*result_stride=*/1);
+
+  // For each batch and cell: update input gate.
+  if (!use_cifg) {
+    if (use_peephole) {
+      tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+          cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch,
+          input_gate_scratch);
+    }
+    tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
+                                       input_gate_scratch);
+  }
+
+  // For each batch and cell: update forget gate.
+  if (use_peephole) {
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch,
+        forget_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
+                                     forget_gate_scratch);
+
+  // For each batch and cell: update the cell.
+  tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
+                                         n_batch * n_cell, cell_state_ptr);
+  tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  if (use_cifg) {
+    tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
+                             forget_gate_scratch);
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  } else {
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  }
+  if (params->cell_clip > 0.0) {
+    tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
+                             params->cell_clip, cell_state_ptr);
+  }
+
+  // For each batch and cell: update the output gate.
+  if (use_peephole) {
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch,
+        output_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
+                                     output_gate_scratch);
+  tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
+                                         n_batch * n_cell, output_gate_scratch);
+
+  // For each batch: update the projection and output_state.
+  const bool use_projection_weight = (projection_weights_ptr != nullptr);
+  const bool use_projection_bias = (projection_bias_ptr != nullptr);
+  if (use_projection_weight) {
+    if (use_projection_bias) {
+      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
+                                            n_batch, output_ptr_batch);
+    } else {
+      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch,
+        output_ptr_batch, /*result_stride=*/1);
+    if (params->proj_clip > 0.0) {
+      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
+                               params->proj_clip, output_ptr_batch);
+    }
+  } else {
+    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
+                             output_ptr_batch);
+  }
+  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
+                           output_state_ptr);
+}
+
+// Same as above but with quantized weight matrices. In detail:
+// Input of size 'n_batch * n_input':
+//   input_ptr_batch
+//
+// LSTM weights:
+// Quantized input weights of size 'n_cell * n_input':
+//   input_to_input_weights            - optional (can be nullptr)
+//   input_to_forget_weights
+//   input_to_cell_weights
+//   input_to_input_weights
+// Quantized recurrent weights of size 'n_cell * n_output':
+//   recurrent_to_input_weights        - optional
+//   recurrent_to_forget_weights
+//   recurrent_to_cell_weights
+//   recurrent_to_input_weights
+// Quantized peephole weights of size 'n_cell', representing diagonal matrices.
+//   cell_to_input_weights             - optional
+//   cell_to_cell_weights              - optional
+//   cell_to_output_weights            - optional
+// Quantized projection weights of size 'n_output * n_cell'
+//   projection_weights_ptr            - optional
+// Weight scales (scalars) for each of the weights above.
+//   input_to_input_weights_scale      - optional
+//   input_to_forget_weights_scale
+//   input_to_cell_weights_scale
+//   input_to_output_weights_scale
+//   recurrent_to_input_weights_scale  - optional
+//   recurrent_to_forget_weights_scale
+//   recurrent_to_cell_weights_scale
+//   recurrent_to_output_weights_scale
+//   cell_to_input_weights_scale,
+//   cell_to_forget_weights_scale,
+//   cell_to_output_weights_scale,
+//   projection_weights_scale          - optional
+// Gate biases of size 'n_cell':
+//   input_gate_bias_ptr               - optional
+//   forget_gate_bias_ptr
+//   cell_gate_bias_ptr
+//   output_gate_bias_ptr
+//
+// Temporary pre-allocated storage for quantized values:
+//   quantized_input_ptr_batch (same size as input_ptr_batch)
+//   quantized_output_state_ptr (same size as output_state_ptr)
+//   quantized_cell_state_ptr (same size as cell_state_ptr)
+// Temporary pre-allocated storage for recovered values:
+//   recovered_cell_weights (same size as cell_to_*_weights)
+//
+// Outputs:
+//   output_state_ptr - size 'n_batch * n_output'
+//   cell_state_ptr   - size 'n_batch * n_cell'
+//   output_ptr_batch - size 'n_batch * n_output'
+inline void LstmStepWithAuxInput(
+    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
+    float input_to_input_weights_scale,
+    const int8_t* input_to_forget_weights_ptr,
+    float input_to_forget_weights_scale,
+    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
+    const int8_t* input_to_output_weights_ptr,
+    float input_to_output_weights_scale, const float* aux_input_ptr_batch,
+    const int8_t* aux_input_to_input_weights_ptr,
+    float aux_input_to_input_weights_scale,
+    const int8_t* aux_input_to_forget_weights_ptr,
+    float aux_input_to_forget_weights_scale,
+    const int8_t* aux_input_to_cell_weights_ptr,
+    float aux_input_to_cell_weights_scale,
+    const int8_t* aux_input_to_output_weights_ptr,
+    float aux_input_to_output_weights_scale,
+    const int8_t* recurrent_to_input_weights_ptr,
+    float recurrent_to_input_weights_scale,
+    const int8_t* recurrent_to_forget_weights_ptr,
+    float recurrent_to_forget_weights_scale,
+    const int8_t* recurrent_to_cell_weights_ptr,
+    float recurrent_to_cell_weights_scale,
+    const int8_t* recurrent_to_output_weights_ptr,
+    float recurrent_to_output_weights_scale,
+    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
+    const int8_t* cell_to_forget_weights_ptr,
+    float cell_to_forget_weights_scale,
+    const int8_t* cell_to_output_weights_ptr,
+    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
+    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
+    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
+    float projection_weights_scale, const float* projection_bias_ptr,
+    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
+    int n_aux_input, int n_output, float* input_gate_scratch,
+    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
+    float* scaling_factors, float* product_scaling_factors,
+    float* recovered_cell_weights, int8_t* quantized_input_ptr_batch,
+    int8_t* quantized_aux_input_ptr_batch, int8_t* quantized_output_state_ptr,
+    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
+    float* cell_state_ptr, float* output_ptr_batch) {
+  // Since we have already checked that weights are all there or none, we
+  // can check the existense of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+  const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
+  // Initialize scratch buffers with bias.
+  if (!use_cifg) {
+    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
+                                          input_gate_scratch);
+  }
+  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
+                                        forget_gate_scratch);
+  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
+                                        cell_scratch);
+  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
+                                        output_gate_scratch);
+
+  if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) {
+    // Save quantization and matmul computation for all zero input.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_input;
+      tensor_utils::SymmetricQuantizeFloats(
+          input_ptr_batch + offset, n_input, quantized_input_ptr_batch + offset,
+          &unused_min, &unused_max, &scaling_factors[b]);
+    }
+    // For each batch and cell: compute input_weight * input.
+    if (!use_cifg) {
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * input_to_input_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          input_to_input_weights_ptr, n_cell, n_input,
+          quantized_input_ptr_batch, product_scaling_factors, n_batch,
+          input_gate_scratch, /*result_stride=*/1);
+    }
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * input_to_forget_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_forget_weights_ptr, n_cell, n_input, quantized_input_ptr_batch,
+        product_scaling_factors, n_batch, forget_gate_scratch,
+        /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * input_to_cell_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_cell_weights_ptr, n_cell, n_input, quantized_input_ptr_batch,
+        product_scaling_factors, n_batch, cell_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * input_to_output_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_output_weights_ptr, n_cell, n_input, quantized_input_ptr_batch,
+        product_scaling_factors, n_batch, output_gate_scratch,
+        /*result_stride=*/1);
+  }
+
+  if (aux_input_ptr_batch != nullptr &&
+      !tensor_utils::IsZeroVector(aux_input_ptr_batch, n_batch * n_input)) {
+    // Save quantization and matmul computation for all zero input.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_input;
+      tensor_utils::SymmetricQuantizeFloats(
+          aux_input_ptr_batch + offset, n_input,
+          quantized_aux_input_ptr_batch + offset, &unused_min, &unused_max,
+          &scaling_factors[b]);
+    }
+    // For each batch and cell: compute input_weight * input.
+    if (!use_cifg) {
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * aux_input_to_input_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          aux_input_to_input_weights_ptr, n_cell, n_input,
+          quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+          input_gate_scratch, /*result_stride=*/1);
+    }
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * aux_input_to_forget_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_forget_weights_ptr, n_cell, n_input,
+        quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+        forget_gate_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * aux_input_to_cell_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_cell_weights_ptr, n_cell, n_input,
+        quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+        cell_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * aux_input_to_output_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_output_weights_ptr, n_cell, n_input,
+        quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+        output_gate_scratch, /*result_stride=*/1);
+  }
+
+  if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
+    // Save quantization and matmul computation for all zero input.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_output;
+      tensor_utils::SymmetricQuantizeFloats(output_state_ptr + offset, n_output,
+                                            quantized_output_state_ptr + offset,
+                                            &unused_min, &unused_max,
+                                            &scaling_factors[b]);
+    }
+    // For each batch and cell: compute recurrent_weight * output_state.
+    if (!use_cifg) {
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * recurrent_to_input_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          recurrent_to_input_weights_ptr, n_cell, n_output,
+          quantized_output_state_ptr, product_scaling_factors, n_batch,
+          input_gate_scratch, /*result_stride=*/1);
+    }
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * recurrent_to_forget_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_forget_weights_ptr, n_cell, n_output,
+        quantized_output_state_ptr, product_scaling_factors, n_batch,
+        forget_gate_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * recurrent_to_cell_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_cell_weights_ptr, n_cell, n_output,
+        quantized_output_state_ptr, product_scaling_factors, n_batch,
+        cell_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * recurrent_to_output_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_output_weights_ptr, n_cell, n_output,
+        quantized_output_state_ptr, product_scaling_factors, n_batch,
+        output_gate_scratch, /*result_stride=*/1);
+  }
+
+  // Save quantization and matmul computation for all zero input.
+  bool is_cell_state_all_zeros =
+      tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
+
+  // For each batch and cell: update input gate.
+  if (!use_cifg) {
+    if (use_peephole && !is_cell_state_all_zeros) {
+      tensor_utils::VectorScalarMultiply(cell_to_input_weights_ptr, n_cell,
+                                         cell_to_input_weights_scale,
+                                         recovered_cell_weights);
+      tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+          recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
+          input_gate_scratch);
+    }
+    tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
+                                       input_gate_scratch);
+  }
+
+  // For each batch and cell: update forget gate.
+  if (use_peephole && !is_cell_state_all_zeros) {
+    tensor_utils::VectorScalarMultiply(cell_to_forget_weights_ptr, n_cell,
+                                       cell_to_forget_weights_scale,
+                                       recovered_cell_weights);
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
+        forget_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
+                                     forget_gate_scratch);
+
+  // For each batch and cell: update the cell.
+  tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
+                                         n_batch * n_cell, cell_state_ptr);
+  tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  if (use_cifg) {
+    tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
+                             forget_gate_scratch);
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  } else {
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  }
+  if (params->cell_clip > 0.0) {
+    tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
+                             params->cell_clip, cell_state_ptr);
+  }
+
+  is_cell_state_all_zeros =
+      tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
+  // For each batch and cell: update the output gate.
+  if (use_peephole && !is_cell_state_all_zeros) {
+    tensor_utils::VectorScalarMultiply(cell_to_output_weights_ptr, n_cell,
+                                       cell_to_output_weights_scale,
+                                       recovered_cell_weights);
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
+        output_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
+                                     output_gate_scratch);
+  tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
+                                         n_batch * n_cell, output_gate_scratch);
+
+  // For each batch: update the projection and output_state.
+  const bool use_projection_weight = (projection_weights_ptr != nullptr);
+  const bool use_projection_bias = (projection_bias_ptr != nullptr);
+  if (use_projection_weight) {
+    if (use_projection_bias) {
+      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
+                                            n_batch, output_ptr_batch);
+    } else {
+      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+    }
+    if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
+      // Save quantization and matmul computation for all zero input.
+      float unused_min, unused_max;
+      for (int b = 0; b < n_batch; ++b) {
+        const int offset = b * n_cell;
+        tensor_utils::SymmetricQuantizeFloats(
+            output_gate_scratch + offset, n_cell,
+            quantized_cell_state_ptr + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+      }
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * projection_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr,
+          product_scaling_factors, n_batch, output_ptr_batch,
+          /*result_stride=*/1);
+    }
+    if (params->proj_clip > 0.0) {
+      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
+                               params->proj_clip, output_ptr_batch);
+    }
+  } else {
+    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
+                             output_ptr_batch);
+  }
+  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
+                           output_state_ptr);
+}
+}  // namespace
+
+TfLiteStatus EvalFloat(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
+    TfLiteTensor* cell_state, TfLiteTensor* output) {
+  const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
+  const int n_batch = input->dims->data[input->dims->size - 2];
+  const int n_input = input->dims->data[input->dims->size - 1];
+  const int aux_input_size =
+      (aux_input) ? aux_input->dims->data[aux_input->dims->size - 1] : 0;
+
+  // n_cell and n_output will be the same size when there is no projection.
+  const int n_cell = input_to_output_weights->dims->data[0];
+  const int n_output = recurrent_to_output_weights->dims->data[1];
+
+  // Since we have already checked that weights are all there or none, we can
+  // check the existense of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  const bool use_peephole = (cell_to_output_weights != nullptr);
+
+  // Index the scratch buffers pointers to the global scratch buffer.
+  float* input_gate_scratch = nullptr;
+  float* cell_scratch = nullptr;
+  float* forget_gate_scratch = nullptr;
+  float* output_gate_scratch = nullptr;
+  if (use_cifg) {
+    cell_scratch = scratch_buffer->data.f;
+    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+  } else {
+    input_gate_scratch = scratch_buffer->data.f;
+    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
+  }
+
+  // Check optional tensors, the respective pointers can be null.
+  const float* input_to_input_weights_ptr =
+      (use_cifg) ? nullptr : input_to_input_weights->data.f;
+  const float* recurrent_to_input_weights_ptr =
+      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
+  const float* input_gate_bias_ptr =
+      (use_cifg) ? nullptr : input_gate_bias->data.f;
+  const float* cell_to_input_weights_ptr =
+      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
+  const float* cell_to_forget_weights_ptr =
+      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
+  const float* cell_to_output_weights_ptr =
+      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
+  const float* projection_weights_ptr =
+      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
+  const float* projection_bias_ptr =
+      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
+
+  float* aux_input_ptr = nullptr;
+  float* aux_input_to_input_weights_ptr = nullptr;
+  float* aux_input_to_forget_weights_ptr = nullptr;
+  float* aux_input_to_cell_weights_ptr = nullptr;
+  float* aux_input_to_output_weights_ptr = nullptr;
+  if (aux_input_size > 0) {
+    aux_input_ptr = aux_input->data.f;
+    aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f;
+    aux_input_to_forget_weights_ptr = aux_input_to_forget_weights->data.f;
+    aux_input_to_cell_weights_ptr = aux_input_to_cell_weights->data.f;
+    aux_input_to_output_weights_ptr = aux_input_to_output_weights->data.f;
+  }
+
+  // Loop through the sequence.
+  const int input_step = n_batch * n_input;
+  const int output_step = n_batch * output->dims->data[output->dims->size - 1];
+  for (int t = 0; t < max_time; t++) {
+    // If this is the forward_sequence, step forward, otherwise step backwards.
+    const int t_rel = forward_sequence ? t : max_time - t - 1;
+    const float* input_ptr = input->data.f + t_rel * input_step;
+    float* output_ptr_time =
+        output->data.f + t_rel * output_step + output_offset;
+
+    LstmStepWithAuxInput(
+        input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
+        input_to_cell_weights->data.f, input_to_output_weights->data.f,
+        aux_input_ptr, aux_input_to_input_weights_ptr,
+        aux_input_to_forget_weights_ptr, aux_input_to_cell_weights_ptr,
+        aux_input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
+        recurrent_to_forget_weights->data.f, recurrent_to_cell_weights->data.f,
+        recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
+        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
+        input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
+        output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
+        params, n_batch, n_cell, n_input, aux_input_size, n_output,
+        activation_state->data.f, cell_state->data.f, input_gate_scratch,
+        forget_gate_scratch, cell_scratch, output_gate_scratch,
+        output_ptr_time);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalHybrid(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
+    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
+    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
+    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
+    TfLiteTensor* output_state, TfLiteTensor* cell_state,
+    TfLiteTensor* output) {
+  const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
+  const int n_batch = input->dims->data[input->dims->size - 2];
+  const int n_input = input->dims->data[input->dims->size - 1];
+  const int aux_input_size =
+      (aux_input) ? aux_input->dims->data[aux_input->dims->size - 1] : 0;
+  // n_cell and n_output will be the same size when there is no projection.
+  const int n_cell = input_to_output_weights->dims->data[0];
+  const int n_output = recurrent_to_output_weights->dims->data[1];
+
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to get the condition.
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  const bool use_peephole = (cell_to_output_weights != nullptr);
+
+  float* input_gate_scratch = nullptr;
+  float* cell_scratch = nullptr;
+  float* forget_gate_scratch = nullptr;
+  float* output_gate_scratch = nullptr;
+  if (use_cifg) {
+    cell_scratch = scratch_buffer->data.f;
+    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+  } else {
+    input_gate_scratch = scratch_buffer->data.f;
+    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
+  }
+
+  // Check optional tensors, the respective pointers can be null.
+  int8_t* input_to_input_weights_ptr = nullptr;
+  float input_to_input_weights_scale = 1.0f;
+  int8_t* recurrent_to_input_weights_ptr = nullptr;
+  float recurrent_to_input_weights_scale = 1.0f;
+  float* input_gate_bias_ptr = nullptr;
+  if (!use_cifg) {
+    input_to_input_weights_ptr =
+        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
+    recurrent_to_input_weights_ptr =
+        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
+    input_gate_bias_ptr = input_gate_bias->data.f;
+    input_to_input_weights_scale = input_to_input_weights->params.scale;
+    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
+  }
+
+  int8_t* cell_to_input_weights_ptr = nullptr;
+  int8_t* cell_to_forget_weights_ptr = nullptr;
+  int8_t* cell_to_output_weights_ptr = nullptr;
+  float cell_to_input_weights_scale = 1.0f;
+  float cell_to_forget_weights_scale = 1.0f;
+  float cell_to_output_weights_scale = 1.0f;
+  if (use_peephole) {
+    if (!use_cifg) {
+      cell_to_input_weights_ptr =
+          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
+      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
+    }
+    cell_to_forget_weights_ptr =
+        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
+    cell_to_output_weights_ptr =
+        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
+    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
+    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
+  }
+
+  const int8_t* projection_weights_ptr =
+      (projection_weights == nullptr)
+          ? nullptr
+          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
+  const float projection_weights_scale =
+      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
+  const float* projection_bias_ptr =
+      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
+
+  // Required tensors, pointers are non-null.
+  const int8_t* input_to_forget_weights_ptr =
+      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
+  const float input_to_forget_weights_scale =
+      input_to_forget_weights->params.scale;
+  const int8_t* input_to_cell_weights_ptr =
+      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
+  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
+  const int8_t* input_to_output_weights_ptr =
+      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
+  const float input_to_output_weights_scale =
+      input_to_output_weights->params.scale;
+  const int8_t* recurrent_to_forget_weights_ptr =
+      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
+  const float recurrent_to_forget_weights_scale =
+      recurrent_to_forget_weights->params.scale;
+  const int8_t* recurrent_to_cell_weights_ptr =
+      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
+  const float recurrent_to_cell_weights_scale =
+      recurrent_to_cell_weights->params.scale;
+  const int8_t* recurrent_to_output_weights_ptr =
+      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
+  const float recurrent_to_output_weights_scale =
+      recurrent_to_output_weights->params.scale;
+  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
+  const float* cell_bias_ptr = cell_bias->data.f;
+  const float* output_gate_bias_ptr = output_gate_bias->data.f;
+
+  float* output_state_ptr = output_state->data.f;
+  float* cell_state_ptr = cell_state->data.f;
+
+  // Temporary storage for quantized values and scaling factors.
+  int8_t* quantized_input_ptr =
+      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+  int8_t* quantized_aux_input_ptr =
+      (aux_input_quantized == nullptr)
+          ? nullptr
+          : reinterpret_cast<int8_t*>(aux_input_quantized->data.uint8);
+  int8_t* quantized_output_state_ptr =
+      reinterpret_cast<int8_t*>(output_state_quantized->data.uint8);
+  int8_t* quantized_cell_state_ptr =
+      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
+  float* scaling_factors_ptr = scaling_factors->data.f;
+  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
+  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
+
+  // Auxiliary input and weights.
+  float* aux_input_ptr = nullptr;
+  int8_t* aux_input_to_input_weights_ptr = nullptr;
+  int8_t* aux_input_to_forget_weights_ptr = nullptr;
+  int8_t* aux_input_to_cell_weights_ptr = nullptr;
+  int8_t* aux_input_to_output_weights_ptr = nullptr;
+  float aux_input_to_input_weights_scale = 0.0f;
+  float aux_input_to_forget_weights_scale = 0.0f;
+  float aux_input_to_cell_weights_scale = 0.0f;
+  float aux_input_to_output_weights_scale = 0.0f;
+  if (aux_input_size > 0) {
+    aux_input_ptr = aux_input->data.f;
+    aux_input_to_input_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_input_weights->data.uint8);
+    aux_input_to_forget_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_forget_weights->data.uint8);
+    aux_input_to_cell_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_cell_weights->data.uint8);
+    aux_input_to_output_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_output_weights->data.uint8);
+    aux_input_to_input_weights_scale = aux_input_to_input_weights->params.scale;
+    aux_input_to_forget_weights_scale =
+        aux_input_to_forget_weights->params.scale;
+    aux_input_to_cell_weights_scale = aux_input_to_cell_weights->params.scale;
+    aux_input_to_output_weights_scale =
+        aux_input_to_output_weights->params.scale;
+  }
+
+  // Feed the sequence into the LSTM step-by-step.
+  const int input_step = n_batch * n_input;
+  const int output_step = n_batch * output->dims->data[output->dims->size - 1];
+  for (int t = 0; t < max_time; t++) {
+    // If this is the forward_sequence, step forward, otherwise step backwards.
+    const int t_rel = forward_sequence ? t : max_time - t - 1;
+    const float* input_ptr = input->data.f + t_rel * input_step;
+    float* output_ptr = output->data.f + t_rel * output_step + output_offset;
+
+    LstmStepWithAuxInput(
+        input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
+        input_to_forget_weights_ptr, input_to_forget_weights_scale,
+        input_to_cell_weights_ptr, input_to_cell_weights_scale,
+        input_to_output_weights_ptr, input_to_output_weights_scale,
+        aux_input_ptr, aux_input_to_input_weights_ptr,
+        aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
+        aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
+        aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
+        aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
+        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
+        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
+        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
+        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
+        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
+        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
+        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
+        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
+        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
+        n_input, aux_input_size, n_output, input_gate_scratch,
+        forget_gate_scratch, cell_scratch, output_gate_scratch,
+        scaling_factors_ptr, prod_scaling_factors_ptr,
+        recovered_cell_weights_ptr, quantized_input_ptr,
+        quantized_aux_input_ptr, quantized_output_state_ptr,
+        quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr);
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace lstm_eval
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.h b/tensorflow/contrib/lite/kernels/lstm_eval.h
new file mode 100644
index 0000000000..adf8cf0f64
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.h
@@ -0,0 +1,79 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_LSTM_EVAL_H_
+#define TENSORFLOW_CONTRIB_LITE_KERNELS_LSTM_EVAL_H_
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace lstm_eval {
+
+TfLiteStatus EvalFloat(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
+    TfLiteTensor* cell_state, TfLiteTensor* output);
+
+TfLiteStatus EvalHybrid(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
+    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
+    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
+    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
+    TfLiteTensor* output_state, TfLiteTensor* cell_state, TfLiteTensor* output);
+
+}  // namespace lstm_eval
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
+#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_LSTM_EVAL_H_
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
index 63817bd886..ec9cf38b83 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
 #include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
@@ -429,273 +430,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-// The LSTM Op engine.
-TfLiteStatus EvalFloat(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* activation_state, TfLiteTensor* cell_state,
-    TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  const float* input_to_input_weights_ptr =
-      (use_cifg) ? nullptr : input_to_input_weights->data.f;
-  const float* recurrent_to_input_weights_ptr =
-      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
-  const float* input_gate_bias_ptr =
-      (use_cifg) ? nullptr : input_gate_bias->data.f;
-  const float* cell_to_input_weights_ptr =
-      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
-  const float* cell_to_forget_weights_ptr =
-      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
-  const float* cell_to_output_weights_ptr =
-      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
-  const float* projection_weights_ptr =
-      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f;
-  const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f;
-  const float* input_to_output_weights_ptr = input_to_output_weights->data.f;
-  const float* recurrent_to_forget_weights_ptr =
-      recurrent_to_forget_weights->data.f;
-  const float* recurrent_to_cell_weights_ptr =
-      recurrent_to_cell_weights->data.f;
-  const float* recurrent_to_output_weights_ptr =
-      recurrent_to_output_weights->data.f;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-
-  // Feed the sequence into the LSTM step-by-step.
-  for (int t = 0; t < max_time; t++) {
-    const float* input_ptr_batch = input->data.f + t * n_batch * n_input;
-    float* output_ptr_batch = output->data.f + t * n_batch * n_output;
-
-    kernel_utils::LstmStep(
-        input_ptr_batch, input_to_input_weights_ptr,
-        input_to_forget_weights_ptr, input_to_cell_weights_ptr,
-        input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
-        recurrent_to_forget_weights_ptr, recurrent_to_cell_weights_ptr,
-        recurrent_to_output_weights_ptr, cell_to_input_weights_ptr,
-        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-        input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
-        output_gate_bias_ptr, projection_weights_ptr, projection_bias_ptr,
-        params, n_batch, n_cell, n_input, n_output, activation_state_ptr,
-        cell_state_ptr, input_gate_scratch, forget_gate_scratch, cell_scratch,
-        output_gate_scratch, output_ptr_batch);
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalHybrid(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* scaling_factors, TfLiteTensor* prod_scaling_factors,
-    TfLiteTensor* recovered_cell_weights, TfLiteTensor* input_quantized,
-    TfLiteTensor* activation_state_quantized,
-    TfLiteTensor* cell_state_quantized, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  int8_t* input_to_input_weights_ptr = nullptr;
-  float input_to_input_weights_scale = 1.0f;
-  int8_t* recurrent_to_input_weights_ptr = nullptr;
-  float recurrent_to_input_weights_scale = 1.0f;
-  float* input_gate_bias_ptr = nullptr;
-  if (!use_cifg) {
-    input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
-    recurrent_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
-    input_gate_bias_ptr = input_gate_bias->data.f;
-    input_to_input_weights_scale = input_to_input_weights->params.scale;
-    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
-  }
-
-  int8_t* cell_to_input_weights_ptr = nullptr;
-  int8_t* cell_to_forget_weights_ptr = nullptr;
-  int8_t* cell_to_output_weights_ptr = nullptr;
-  float cell_to_input_weights_scale = 1.0f;
-  float cell_to_forget_weights_scale = 1.0f;
-  float cell_to_output_weights_scale = 1.0f;
-  if (use_peephole) {
-    if (!use_cifg) {
-      cell_to_input_weights_ptr =
-          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
-      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
-    }
-    cell_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
-    cell_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
-    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
-    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
-  }
-
-  const int8_t* projection_weights_ptr =
-      (projection_weights == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
-  float projection_weights_scale =
-      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const int8_t* input_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
-  const float input_to_forget_weights_scale =
-      input_to_forget_weights->params.scale;
-  const int8_t* input_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
-  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
-  const int8_t* input_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
-  const float input_to_output_weights_scale =
-      input_to_output_weights->params.scale;
-  const int8_t* recurrent_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
-  const float recurrent_to_forget_weights_scale =
-      recurrent_to_forget_weights->params.scale;
-  const int8_t* recurrent_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
-  const float recurrent_to_cell_weights_scale =
-      recurrent_to_cell_weights->params.scale;
-  const int8_t* recurrent_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
-  const float recurrent_to_output_weights_scale =
-      recurrent_to_output_weights->params.scale;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-
-  // Temporary storage for quantized values and scaling factors.
-  int8_t* quantized_input_ptr =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
-  int8_t* quantized_activation_state_ptr =
-      reinterpret_cast<int8_t*>(activation_state_quantized->data.uint8);
-  int8_t* quantized_cell_state_ptr =
-      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
-  float* scaling_factors_ptr = scaling_factors->data.f;
-  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
-  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
-
-  // Feed the sequence into the LSTM step-by-step.
-  for (int t = 0; t < max_time; t++) {
-    const float* input_ptr_batch = input->data.f + t * n_batch * n_input;
-    float* output_ptr_batch = output->data.f + t * n_batch * n_output;
-
-    kernel_utils::LstmStep(
-        input_ptr_batch, input_to_input_weights_ptr,
-        input_to_input_weights_scale, input_to_forget_weights_ptr,
-        input_to_forget_weights_scale, input_to_cell_weights_ptr,
-        input_to_cell_weights_scale, input_to_output_weights_ptr,
-        input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
-        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
-        n_input, n_output, input_gate_scratch, forget_gate_scratch,
-        cell_scratch, output_gate_scratch, scaling_factors_ptr,
-        prod_scaling_factors_ptr, recovered_cell_weights_ptr,
-        quantized_input_ptr, quantized_activation_state_ptr,
-        quantized_cell_state_ptr, activation_state_ptr, cell_state_ptr,
-        output_ptr_batch);
-  }
-  return kTfLiteOk;
-}
-
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
@@ -750,15 +484,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input_to_output_weights->type) {
     case kTfLiteFloat32: {
-      return EvalFloat(input, input_to_input_weights, input_to_forget_weights,
-                       input_to_cell_weights, input_to_output_weights,
-                       recurrent_to_input_weights, recurrent_to_forget_weights,
-                       recurrent_to_cell_weights, recurrent_to_output_weights,
-                       cell_to_input_weights, cell_to_forget_weights,
-                       cell_to_output_weights, input_gate_bias,
-                       forget_gate_bias, cell_bias, output_gate_bias,
-                       projection_weights, projection_bias, params,
-                       scratch_buffer, activation_state, cell_state, output);
+      return lstm_eval::EvalFloat(
+          input, input_to_input_weights, input_to_forget_weights,
+          input_to_cell_weights, input_to_output_weights,
+          recurrent_to_input_weights, recurrent_to_forget_weights,
+          recurrent_to_cell_weights, recurrent_to_output_weights,
+          cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
+          output);
     }
     case kTfLiteUInt8: {
       TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1);
@@ -771,17 +511,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           GetTemporary(context, node, /*index=*/5);
       TfLiteTensor* recovered_cell_weights =
           GetTemporary(context, node, /*index=*/6);
-      return EvalHybrid(
+      return lstm_eval::EvalHybrid(
           input, input_to_input_weights, input_to_forget_weights,
           input_to_cell_weights, input_to_output_weights,
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
-          input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias,
-          projection_weights, projection_bias, params, scratch_buffer,
-          scaling_factors, prod_scaling_factors, recovered_cell_weights,
-          input_quantized, activation_state_quantized, cell_state_quantized,
-          activation_state, cell_state, output);
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, scaling_factors,
+          prod_scaling_factors, recovered_cell_weights, input_quantized,
+          /*aux_input_quantized=*/nullptr, activation_state_quantized,
+          cell_state_quantized, activation_state, cell_state, output);
     }
     default:
       context->ReportError(context, "Type %d is not currently supported.",
-- 
GitLab


From 123de2797a4348c963b597096762085bfa09eab1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 02:01:34 -0700
Subject: [PATCH 1177/1357] compat: Update forward compatibility horizon to
 2018-10-05

PiperOrigin-RevId: 215874612
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 76e08610ba..8f4e8e0b98 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 4)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 5)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 92c8a77ba480bf4aeddea412cc1d2988f6ad81cd Mon Sep 17 00:00:00 2001
From: HyoukJoong Lee <hyouklee@google.com>
Date: Fri, 5 Oct 2018 07:46:22 -0700
Subject: [PATCH 1178/1357] Use absl::Span for HloModuleGroupMetadata

PiperOrigin-RevId: 215905026
---
 .../compiler/xla/service/hlo_module_group_metadata.cc     | 2 +-
 .../compiler/xla/service/hlo_module_group_metadata.h      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
index 83352ef91b..b4aac4c807 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
@@ -59,7 +59,7 @@ string HloModuleGroupMetadata::TrackedInstruction::ToString() const {
 }
 
 /* static */ StatusOr<std::unique_ptr<HloModuleGroupMetadata>>
-HloModuleGroupMetadata::Build(const std::vector<HloModule*>& modules) {
+HloModuleGroupMetadata::Build(absl::Span<HloModule* const> modules) {
   auto metadata = absl::make_unique<HloModuleGroupMetadata>(modules);
   TF_RETURN_IF_ERROR(metadata->Build());
   return std::move(metadata);
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
index 0311b73207..928df0f5a7 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
@@ -102,14 +102,14 @@ class HloModuleGroupMetadata {
     HloInstruction* recv_done = nullptr;
   };
 
-  explicit HloModuleGroupMetadata(const std::vector<HloModule*>& modules)
-      : modules_(modules) {}
+  explicit HloModuleGroupMetadata(absl::Span<HloModule* const> modules)
+      : modules_(modules.begin(), modules.end()) {}
 
   ~HloModuleGroupMetadata() = default;
 
   // Build and return the metadata for the given modules.
   static StatusOr<std::unique_ptr<HloModuleGroupMetadata>> Build(
-      const std::vector<HloModule*>& modules);
+      absl::Span<HloModule* const> modules);
 
   // Returns true if the instruction is one of the 4 channel instructions (Send,
   // Recv, SendDone, RecvDone).
@@ -274,7 +274,7 @@ class HloModuleGroupMetadata {
   int64 max_channel_id_ = -1;
 
   // The modules that this metadata was built from.
-  const std::vector<HloModule*>& modules_;
+  const std::vector<HloModule*> modules_;
 
   absl::flat_hash_map<HloModule*, std::unique_ptr<TuplePointsToAnalysis>>
       points_to_analyses_;
-- 
GitLab


From 388ed2929ea024adcfb76ea9ddd78a38a87470b7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 5 Oct 2018 08:03:19 -0700
Subject: [PATCH 1179/1357] [TF:XLA] Move broadcasting code out of BroadcastTo
 op into a common helper library.

Change XlaBinaryOp::Broadcast to use the BroadcastTo lowering, since it produces fewer extraneous reshapes and transposes. Even if the reshapes and transposes would later optimize away, this yields more readable output and makes life easier for HLO rewrites that run early.

Change in preparation for removing reshapes from SoftmaxCrossEntropyWithLogits.

PiperOrigin-RevId: 215906847
---
 tensorflow/compiler/tf2xla/kernels/BUILD      |  1 +
 .../compiler/tf2xla/kernels/binary_ops.cc     | 10 +-
 .../tf2xla/kernels/broadcast_to_op.cc         | 63 +------------
 .../compiler/tf2xla/kernels/cwise_ops.cc      | 57 +++---------
 .../compiler/tf2xla/kernels/cwise_ops.h       |  3 +-
 tensorflow/compiler/tf2xla/lib/BUILD          | 16 ++++
 tensorflow/compiler/tf2xla/lib/broadcast.cc   | 93 +++++++++++++++++++
 tensorflow/compiler/tf2xla/lib/broadcast.h    | 32 +++++++
 8 files changed, 165 insertions(+), 110 deletions(-)
 create mode 100644 tensorflow/compiler/tf2xla/lib/broadcast.cc
 create mode 100644 tensorflow/compiler/tf2xla/lib/broadcast.h

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 95a010a119..224e5ea123 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -121,6 +121,7 @@ tf_kernel_library(
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/lib:batch_dot",
+        "//tensorflow/compiler/tf2xla/lib:broadcast",
         "//tensorflow/compiler/tf2xla/lib:cholesky",
         "//tensorflow/compiler/tf2xla/lib:qr",
         "//tensorflow/compiler/tf2xla/lib:random",
diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index a988d3c33e..47e517a657 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -64,7 +64,7 @@ XLA_MAKE_BINARY(Complex, xla::Complex(lhs, rhs, extend_dimensions));
 // }
 static xla::XlaOp DivNoNanImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto y_equals_0 = xla::Eq(y, zero);
   auto zeros = xla::ZerosLike(x);
@@ -84,7 +84,7 @@ XLA_MAKE_BINARY(DivNoNan,
 // }
 static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   if (DataTypeIsUnsigned(dtype)) {
     return xla::Div(x, y);
   }
@@ -105,7 +105,7 @@ XLA_MAKE_BINARY(FloorDiv,
 
 static xla::XlaOp XlogyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                             xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto is_zero = xla::Eq(x, zero);
   return xla::Select(is_zero, zero, xla::Mul(x, xla::Log(y)));
@@ -114,7 +114,7 @@ XLA_MAKE_BINARY(Xlogy, XlogyImpl(b, input_type(0), lhs, rhs, broadcast_helper));
 
 static xla::XlaOp XdivyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                             xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto is_zero = xla::Eq(x, zero);
   return xla::Select(is_zero, zero, xla::Div(x, y));
@@ -126,7 +126,7 @@ XLA_MAKE_BINARY(Xdivy, XdivyImpl(b, input_type(0), lhs, rhs, broadcast_helper));
 // return (x < T(0)) == (y < T(0)) ? trunc_mod : std::fmod(trunc_mod + y, y);
 static xla::XlaOp FloorModImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto same_sign = xla::Eq(xla::Lt(x, zero), xla::Lt(y, zero));
   auto trunc_mod = xla::Rem(x, y);
diff --git a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
index 696c1c39be..9bb11fb67e 100644
--- a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
@@ -13,16 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/algorithm/container.h"
-#include "tensorflow/compiler/tf2xla/shape_util.h"
-#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/lib/broadcast.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
-#include "tensorflow/compiler/xla/client/lib/constants.h"
-#include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/bcast.h"
 
 namespace tensorflow {
 namespace {
@@ -37,59 +32,9 @@ class BroadcastToOp : public XlaOpKernel {
     TensorShape output_shape;
     OP_REQUIRES_OK(context, context->ConstantInputAsShape(1, &output_shape));
 
-    OP_REQUIRES(context, input_shape.dims() <= output_shape.dims(),
-                errors::InvalidArgument(
-                    "Input rank (", input_shape.dims(),
-                    ") must be less than or equal to the output rank (",
-                    output_shape.dims(), ")"));
-
-    auto input_dims = input_shape.dim_sizes();
-    auto output_dims = output_shape.dim_sizes();
-
-    // Broadcasting is done right-to-left on right-aligned dimensions; reverse
-    // the two vectors so elements to be broadcast are aligned.
-    absl::c_reverse(input_dims);
-    absl::c_reverse(output_dims);
-
-    std::vector<int64> broadcast_dims;
-    std::vector<int64> broadcast_shape;
-    for (int i = 0; i < output_shape.dims(); ++i) {
-      if (i < input_shape.dims()) {
-        OP_REQUIRES(
-            context,
-            (output_dims[i] == 0 && input_dims[i] == 0) ||
-                (input_dims[i] != 0 && output_dims[i] % input_dims[i] == 0),
-            errors::InvalidArgument("invalid shape to broadcast from ",
-                                    input_shape.DebugString(), " to ",
-                                    output_shape.DebugString()));
-
-        broadcast_dims.push_back(broadcast_shape.size());
-        if (output_dims[i] == input_dims[i]) {
-          broadcast_shape.push_back(output_dims[i]);
-        } else if (output_dims[i] != input_dims[i]) {
-          // Add dimensions [I, O/I], which we will later flatten to just
-          // [O]. We must do this in two phases since XLA broadcasting does not
-          // support tiling.
-          broadcast_shape.push_back(input_dims[i]);
-          broadcast_shape.push_back(output_dims[i] / input_dims[i]);
-        }
-      } else {
-        broadcast_shape.push_back(output_dims[i]);
-      }
-    }
-    absl::c_reverse(broadcast_dims);
-    int broadcast_shape_size = broadcast_shape.size();
-    for (int64& broadcast_dim : broadcast_dims) {
-      broadcast_dim = broadcast_shape_size - broadcast_dim - 1;
-    }
-    absl::c_reverse(broadcast_shape);
-    xla::XlaOp output = xla::Reshape(
-        xla::BroadcastInDim(context->Input(0),
-                            xla::ShapeUtil::MakeShape(
-                                context->input_xla_type(0), broadcast_shape),
-                            broadcast_dims),
-        output_shape.dim_sizes());
-    context->SetOutput(0, output);
+    auto output = BroadcastTo(context->Input(0), output_shape.dim_sizes());
+    OP_REQUIRES_OK(context, output.status());
+    context->SetOutput(0, output.ValueOrDie());
   }
 };
 
diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc
index ef1015552d..234f7b4a01 100644
--- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h"
 
+#include "tensorflow/compiler/tf2xla/lib/broadcast.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
@@ -39,7 +40,8 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) {
   // compute valid broadcast shapes, but rely below on XLA to
   // automatically perform the broadcast assuming its valid shapes are
   // a superset of TensorFlow's valid shapes.
-  BCast bcast(BCast::FromShape(lhs_shape), BCast::FromShape(rhs_shape));
+  BCast bcast(BCast::FromShape(lhs_shape), BCast::FromShape(rhs_shape),
+              /*fewer_dims_optimization=*/false);
   if (!bcast.IsValid()) {
     ctx->SetStatus(errors::InvalidArgument("Incompatible shapes: ",
                                            lhs_shape.DebugString(), " vs. ",
@@ -86,51 +88,18 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) {
 }
 
 /* static */ std::pair<xla::XlaOp, xla::XlaOp> XlaBinaryOp::Broadcast(
-    xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs,
-    const BCast& broadcast_helper) {
-  // Manually construct the broadcasting since MapN does not do
-  // automatic broadcasting. The bcast helper ensures that
-  // lhs.reshape(bcast.x_reshape()).broadcast(bcast.x_bcast()) and
-  // rhs.reshape(bcast.y_reshape()).broadcast(bcast.y_bcast()) have
-  // the same shape, so can be operated on by MapN.
-
-  // First reshape the inputs, which should be a metadata-only
-  // operation since we are flattening the dimensions in order.
-  auto lhs_shaped = xla::Reshape(lhs, broadcast_helper.x_reshape());
-  auto rhs_shaped = xla::Reshape(rhs, broadcast_helper.y_reshape());
-
-  // Next broadcast the necessary input dimensions. We rely on the
-  // XLA optimizer to be smart about the fact that we are asking
-  // it to broadcast size 1 on some of these dimensions, to avoid
-  // adding complexity to this code.
-  auto lhs_broadcast = xla::Broadcast(lhs_shaped, broadcast_helper.x_bcast());
-  int lhs_size = broadcast_helper.x_bcast().size();
-  auto rhs_broadcast = xla::Broadcast(rhs_shaped, broadcast_helper.y_bcast());
-  int rhs_size = broadcast_helper.y_bcast().size();
-
-  // Now reshape them to the correct output shape. After the
-  // broadcast each side is twice as wide as it should be, since the
-  // broadcast dimensions were prepended to the shape. Reshape
-  // flattening each original dimension with the prepended broadcast
-  // dimension. E.g. if we started out with lhs_shaped with shape
-  // [5,2,3] and x_bcast was [2,1,7] then lhs_broadcast would have
-  // shape [2,1,7,5,2,3] and we want to reshape it to [10,2,21].
-  std::vector<int64> lhs_reorder;
-  for (int i = 0; i < lhs_size; ++i) {
-    lhs_reorder.push_back(i);
-    lhs_reorder.push_back(i + lhs_size);
+    xla::XlaOp lhs, xla::XlaOp rhs, const BCast& broadcast_helper) {
+  auto lhs_output = BroadcastTo(lhs, broadcast_helper.output_shape());
+  if (!lhs_output.ok()) {
+    xla::XlaOp error = lhs.builder()->ReportError(lhs_output.status());
+    return {error, error};
   }
-  auto lhs_output =
-      xla::Reshape(lhs_broadcast, lhs_reorder, broadcast_helper.output_shape());
-  std::vector<int64> rhs_reorder;
-  for (int i = 0; i < rhs_size; ++i) {
-    rhs_reorder.push_back(i);
-    rhs_reorder.push_back(i + rhs_size);
+  auto rhs_output = BroadcastTo(rhs, broadcast_helper.output_shape());
+  if (!rhs_output.ok()) {
+    xla::XlaOp error = rhs.builder()->ReportError(rhs_output.status());
+    return {error, error};
   }
-  auto rhs_output =
-      xla::Reshape(rhs_broadcast, rhs_reorder, broadcast_helper.output_shape());
-
-  return {lhs_output, rhs_output};
+  return {lhs_output.ValueOrDie(), rhs_output.ValueOrDie()};
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h
index 6653944a91..516ead4bfe 100644
--- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h
+++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h
@@ -67,8 +67,7 @@ class XlaBinaryOp : public XlaOpKernel {
   // 'broadcast_helper', yielding arguments 'lhs' and 'rhs' that have the same
   // shape.
   static std::pair<xla::XlaOp, xla::XlaOp> Broadcast(
-      xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs,
-      const BCast& broadcast_helper);
+      xla::XlaOp lhs, xla::XlaOp rhs, const BCast& broadcast_helper);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD
index 8597e7f139..1ce3930fd1 100644
--- a/tensorflow/compiler/tf2xla/lib/BUILD
+++ b/tensorflow/compiler/tf2xla/lib/BUILD
@@ -31,6 +31,22 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "broadcast",
+    srcs = ["broadcast.cc"],
+    hdrs = ["broadcast.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
 cc_library(
     name = "cholesky",
     srcs = ["cholesky.cc"],
diff --git a/tensorflow/compiler/tf2xla/lib/broadcast.cc b/tensorflow/compiler/tf2xla/lib/broadcast.cc
new file mode 100644
index 0000000000..3e402ef855
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/lib/broadcast.cc
@@ -0,0 +1,93 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/lib/broadcast.h"
+
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/util.h"
+
+namespace tensorflow {
+
+xla::StatusOr<xla::XlaOp> BroadcastTo(xla::XlaOp input,
+                                      absl::Span<int64 const> output_dims) {
+  xla::XlaBuilder* builder = input.builder();
+  TF_ASSIGN_OR_RETURN(xla::Shape input_shape, builder->GetShape(input));
+  absl::Span<int64 const> input_dims =
+      xla::AsInt64Slice(input_shape.dimensions());
+
+  if (input_dims == output_dims) {
+    return input;
+  }
+
+  if (input_dims.size() > output_dims.size()) {
+    return errors::InvalidArgument(
+        "Input shape (", xla::ShapeUtil::HumanString(input_shape),
+        ") must have rank less than or equal to the output shape [",
+        absl::StrJoin(output_dims, ","), "]");
+  }
+
+  std::vector<int64> broadcast_dims;
+  std::vector<int64> broadcast_shape;
+  auto input_it = input_dims.rbegin();
+  for (auto output_it = output_dims.rbegin(); output_it != output_dims.rend();
+       ++output_it) {
+    if (input_it != input_dims.rend()) {
+      if (!(*output_it == 0 && *input_it == 0) &&
+          !(*input_it != 0 && *output_it % *input_it == 0)) {
+        return errors::InvalidArgument("Invalid shape broadcast from ",
+                                       xla::ShapeUtil::HumanString(input_shape),
+                                       " to [", absl::StrJoin(output_dims, ","),
+                                       "]");
+      }
+
+      broadcast_dims.push_back(broadcast_shape.size());
+      if (*output_it == *input_it) {
+        broadcast_shape.push_back(*output_it);
+      } else if (*output_it != *input_it) {
+        // Add dimensions [I, O/I], which we will later flatten to just
+        // [O]. We must do this in two phases since XLA broadcasting does not
+        // support tiling.
+        broadcast_shape.push_back(*input_it);
+        broadcast_shape.push_back(*output_it / *input_it);
+      }
+      ++input_it;
+    } else {
+      broadcast_shape.push_back(*output_it);
+    }
+  }
+  TF_RET_CHECK(input_it == input_dims.rend());
+
+  absl::c_reverse(broadcast_dims);
+  int broadcast_shape_size = broadcast_shape.size();
+  for (int64& broadcast_dim : broadcast_dims) {
+    broadcast_dim = broadcast_shape_size - broadcast_dim - 1;
+  }
+  absl::c_reverse(broadcast_shape);
+  xla::XlaOp output = xla::BroadcastInDim(
+      input,
+      xla::ShapeUtil::MakeShape(input_shape.element_type(), broadcast_shape),
+      broadcast_dims);
+  if (broadcast_shape != output_dims) {
+    output = xla::Reshape(output, output_dims);
+  }
+  return output;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/broadcast.h b/tensorflow/compiler/tf2xla/lib/broadcast.h
new file mode 100644
index 0000000000..591e696f06
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/lib/broadcast.h
@@ -0,0 +1,32 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_
+#define TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_
+
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace tensorflow {
+
+// Broadcasts 'input' up to shape 'output_dims', using TensorFlow broadcasting
+// rules. Supports broadcasting a dimension of size x to size x*y, i.e., tiling.
+xla::StatusOr<xla::XlaOp> BroadcastTo(xla::XlaOp input,
+                                      absl::Span<int64 const> output_dims);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_
-- 
GitLab


From cea6b4959152981ab778001f30ff9ad87bb4fc9e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 08:34:28 -0700
Subject: [PATCH 1180/1357] Relax some unnecessary 4D array restrictions

PiperOrigin-RevId: 215910400
---
 tensorflow/contrib/lite/kernels/internal/types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index 64a39dd2a2..c6bc6074d4 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -268,8 +268,9 @@ class RuntimeShape {
   // This creates a shape padded to the desired size with the specified value.
   RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value)
       : size_(0) {
+    // If the following check fails, it is likely because a 4D-only kernel is
+    // being used with an array of larger dimension count.
     TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount());
-    TFLITE_CHECK_LE(new_shape_size, kMaxSmallSize);
     Resize(new_shape_size);
     const int size_increase = new_shape_size - shape.DimensionsCount();
     for (int i = 0; i < size_increase; ++i) {
-- 
GitLab


From 53faa313b7628cd8c9fbb836544cc6482cafb7a4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 08:46:54 -0700
Subject: [PATCH 1181/1357] Switch NCCL to build from open source (version
 2.3.5-5) by default.

Note to users manually patching ptxas from a later toolkit version:
Building NCCL requires the same version of ptxas and nvlink.

PiperOrigin-RevId: 215911973
---
 configure.py                        |   17 +-
 tensorflow/workspace.bzl            |   10 +-
 third_party/gpus/cuda_configure.bzl | 1979 ++++++++++++++-------------
 third_party/nccl/LICENSE            |  231 +---
 third_party/nccl/archive.BUILD      |  179 +++
 third_party/nccl/build_defs.bzl.tpl |  210 +++
 third_party/nccl/nccl_archive.BUILD |   68 -
 third_party/nccl/nccl_configure.bzl |  214 +--
 8 files changed, 1592 insertions(+), 1316 deletions(-)
 create mode 100644 third_party/nccl/archive.BUILD
 create mode 100644 third_party/nccl/build_defs.bzl.tpl
 delete mode 100644 third_party/nccl/nccl_archive.BUILD

diff --git a/configure.py b/configure.py
index a88fdb3555..65b4622995 100644
--- a/configure.py
+++ b/configure.py
@@ -35,7 +35,6 @@ except ImportError:
 
 _DEFAULT_CUDA_VERSION = '9.0'
 _DEFAULT_CUDNN_VERSION = '7'
-_DEFAULT_NCCL_VERSION = '2.2'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
@@ -1109,18 +1108,17 @@ def set_tf_nccl_install_path(environ_cp):
     raise ValueError('Currently NCCL is only supported on Linux platforms.')
 
   ask_nccl_version = (
-      'Please specify the NCCL version you want to use. If NCCL %s is not '
-      'installed, then you can use version 1.3 that can be fetched '
-      'automatically but it may have worse performance with multiple GPUs. '
-      '[Default is %s]: ') % (_DEFAULT_NCCL_VERSION, _DEFAULT_NCCL_VERSION)
+      'Please specify the locally installed NCCL version you want to use. '
+      '[Default is to use https://github.com/nvidia/nccl]: ')
 
   for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     tf_nccl_version = get_from_env_or_user_or_default(
-        environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION)
-    tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
+        environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, '')
+
+    if not tf_nccl_version:
+      break  # No need to get install path, building the open source code.
 
-    if tf_nccl_version == '1':
-      break  # No need to get install path, NCCL 1 is a GitHub repo.
+    tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
 
     # Look with ldconfig first if we can find the library in paths
     # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
@@ -1232,7 +1230,6 @@ def set_tf_nccl_install_path(environ_cp):
   environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
   write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version)
 
-
 def get_native_cuda_compute_capabilities(environ_cp):
   """Get native cuda compute capabilities.
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 72f3fd0cf8..8df41f96b8 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -585,12 +585,12 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "nccl_archive",
-        build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"),
-        sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176",
-        strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7",
+        build_file = clean_dep("//third_party:nccl/archive.BUILD"),
+        sha256 = "19132b5127fa8e02d95a09795866923f04064c8f1e0770b2b42ab551408882a4",
+        strip_prefix = "nccl-f93fe9bfd94884cec2ba711897222e0df5569a53",
         urls = [
-            "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
-            "https://github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
+            "https://mirror.bazel.build/github.com/nvidia/nccl/archive/f93fe9bfd94884cec2ba711897222e0df5569a53.tar.gz",
+            "https://github.com/nvidia/nccl/archive/f93fe9bfd94884cec2ba711897222e0df5569a53.tar.gz",
         ],
     )
 
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 69f4599c16..831a3067b2 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -126,118 +126,141 @@ load(
 )
 
 def _get_python_bin(repository_ctx):
-    """Gets the python bin path."""
-    python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
-    if python_bin != None:
-        return python_bin
-    python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
-    python_bin_path = repository_ctx.which(python_bin_name)
-    if python_bin_path != None:
-        return str(python_bin_path)
-    auto_configure_fail("Cannot find python in PATH, please make sure " +
-                        "python is installed and add its directory in PATH, or --define " +
-                        "%s='/something/else'.\nPATH=%s" % (
-                            _PYTHON_BIN_PATH,
-                            repository_ctx.os.environ.get("PATH", ""),
-                        ))
+  """Gets the python bin path."""
+  python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
+  if python_bin != None:
+    return python_bin
+  python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
+  python_bin_path = repository_ctx.which(python_bin_name)
+  if python_bin_path != None:
+    return str(python_bin_path)
+  auto_configure_fail(
+      "Cannot find python in PATH, please make sure " +
+      "python is installed and add its directory in PATH, or --define " +
+      "%s='/something/else'.\nPATH=%s" % (
+          _PYTHON_BIN_PATH,
+          repository_ctx.os.environ.get("PATH", ""),
+      ))
+
 
 def _get_nvcc_tmp_dir_for_windows(repository_ctx):
-    """Return the tmp directory for nvcc to generate intermediate source files."""
-    escaped_tmp_dir = escape_string(
-        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"),
-    )
-    return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
+  """Return the tmp directory for nvcc to generate intermediate source files."""
+  escaped_tmp_dir = escape_string(
+      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+          "\\", "\\\\"),)
+  return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
 
-def _get_msvc_compiler(repository_ctx):
-    vc_path = find_vc_path(repository_ctx)
-    return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
 
-def _get_win_cuda_defines(repository_ctx):
-    """Return CROSSTOOL defines for Windows"""
-
-    # If we are not on Windows, return empty vaules for Windows specific fields.
-    # This ensures the CROSSTOOL file parser is happy.
-    if not _is_windows(repository_ctx):
-        return {
-            "%{msvc_env_tmp}": "",
-            "%{msvc_env_path}": "",
-            "%{msvc_env_include}": "",
-            "%{msvc_env_lib}": "",
-            "%{msvc_cl_path}": "",
-            "%{msvc_ml_path}": "",
-            "%{msvc_link_path}": "",
-            "%{msvc_lib_path}": "",
-            "%{cxx_builtin_include_directory}": "",
-        }
-
-    vc_path = find_vc_path(repository_ctx)
-    if not vc_path:
-        auto_configure_fail("Visual C++ build tools not found on your machine." +
-                            "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using")
-        return {}
-
-    env = setup_vc_env_vars(repository_ctx, vc_path)
-    escaped_paths = escape_string(env["PATH"])
-    escaped_include_paths = escape_string(env["INCLUDE"])
-    escaped_lib_paths = escape_string(env["LIB"])
-    escaped_tmp_dir = escape_string(
-        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"),
-    )
+def _get_msvc_compiler(repository_ctx):
+  vc_path = find_vc_path(repository_ctx)
+  return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
 
-    msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat"
-    msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace("\\", "/")
-    msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace("\\", "/")
-    msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace("\\", "/")
 
-    # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
-    # The generated files are guranteed to have unique name, so they can share the same tmp directory
-    escaped_cxx_include_directories = ["cxx_builtin_include_directory: \"%s\"" % _get_nvcc_tmp_dir_for_windows(repository_ctx)]
-    for path in escaped_include_paths.split(";"):
-        if path:
-            escaped_cxx_include_directories.append("cxx_builtin_include_directory: \"%s\"" % path)
+def _get_win_cuda_defines(repository_ctx):
+  """Return CROSSTOOL defines for Windows"""
 
+  # If we are not on Windows, return empty vaules for Windows specific fields.
+  # This ensures the CROSSTOOL file parser is happy.
+  if not _is_windows(repository_ctx):
     return {
-        "%{msvc_env_tmp}": escaped_tmp_dir,
-        "%{msvc_env_path}": escaped_paths,
-        "%{msvc_env_include}": escaped_include_paths,
-        "%{msvc_env_lib}": escaped_lib_paths,
-        "%{msvc_cl_path}": msvc_cl_path,
-        "%{msvc_ml_path}": msvc_ml_path,
-        "%{msvc_link_path}": msvc_link_path,
-        "%{msvc_lib_path}": msvc_lib_path,
-        "%{cxx_builtin_include_directory}": "\n".join(escaped_cxx_include_directories),
+        "%{msvc_env_tmp}": "",
+        "%{msvc_env_path}": "",
+        "%{msvc_env_include}": "",
+        "%{msvc_env_lib}": "",
+        "%{msvc_cl_path}": "",
+        "%{msvc_ml_path}": "",
+        "%{msvc_link_path}": "",
+        "%{msvc_lib_path}": "",
+        "%{cxx_builtin_include_directory}": "",
     }
 
+  vc_path = find_vc_path(repository_ctx)
+  if not vc_path:
+    auto_configure_fail(
+        "Visual C++ build tools not found on your machine." +
+        "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using"
+    )
+    return {}
+
+  env = setup_vc_env_vars(repository_ctx, vc_path)
+  escaped_paths = escape_string(env["PATH"])
+  escaped_include_paths = escape_string(env["INCLUDE"])
+  escaped_lib_paths = escape_string(env["LIB"])
+  escaped_tmp_dir = escape_string(
+      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+          "\\", "\\\\"),)
+
+  msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat"
+  msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace(
+      "\\", "/")
+  msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace(
+      "\\", "/")
+  msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace(
+      "\\", "/")
+
+  # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
+  # The generated files are guranteed to have unique name, so they can share the same tmp directory
+  escaped_cxx_include_directories = [
+      "cxx_builtin_include_directory: \"%s\"" %
+      _get_nvcc_tmp_dir_for_windows(repository_ctx)
+  ]
+  for path in escaped_include_paths.split(";"):
+    if path:
+      escaped_cxx_include_directories.append(
+          "cxx_builtin_include_directory: \"%s\"" % path)
+
+  return {
+      "%{msvc_env_tmp}":
+          escaped_tmp_dir,
+      "%{msvc_env_path}":
+          escaped_paths,
+      "%{msvc_env_include}":
+          escaped_include_paths,
+      "%{msvc_env_lib}":
+          escaped_lib_paths,
+      "%{msvc_cl_path}":
+          msvc_cl_path,
+      "%{msvc_ml_path}":
+          msvc_ml_path,
+      "%{msvc_link_path}":
+          msvc_link_path,
+      "%{msvc_lib_path}":
+          msvc_lib_path,
+      "%{cxx_builtin_include_directory}":
+          "\n".join(escaped_cxx_include_directories),
+  }
+
 # TODO(dzc): Once these functions have been factored out of Bazel's
 # cc_configure.bzl, load them from @bazel_tools instead.
 # BEGIN cc_configure common functions.
 def find_cc(repository_ctx):
-    """Find the C++ compiler."""
-    if _is_windows(repository_ctx):
-        return _get_msvc_compiler(repository_ctx)
-
-    if _use_cuda_clang(repository_ctx):
-        target_cc_name = "clang"
-        cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
-        if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
-            return "extra_tools/bin/clang"
-    else:
-        target_cc_name = "gcc"
-        cc_path_envvar = _GCC_HOST_COMPILER_PATH
-    cc_name = target_cc_name
-
-    if cc_path_envvar in repository_ctx.os.environ:
-        cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
-        if cc_name_from_env:
-            cc_name = cc_name_from_env
-    if cc_name.startswith("/"):
-        # Absolute path, maybe we should make this supported by our which function.
-        return cc_name
-    cc = repository_ctx.which(cc_name)
-    if cc == None:
-        fail(("Cannot find {}, either correct your path or set the {}" +
-              " environment variable").format(target_cc_name, cc_path_envvar))
-    return cc
+  """Find the C++ compiler."""
+  if _is_windows(repository_ctx):
+    return _get_msvc_compiler(repository_ctx)
+
+  if _use_cuda_clang(repository_ctx):
+    target_cc_name = "clang"
+    cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
+    if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
+      return "extra_tools/bin/clang"
+  else:
+    target_cc_name = "gcc"
+    cc_path_envvar = _GCC_HOST_COMPILER_PATH
+  cc_name = target_cc_name
+
+  if cc_path_envvar in repository_ctx.os.environ:
+    cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+    if cc_name_from_env:
+      cc_name = cc_name_from_env
+  if cc_name.startswith("/"):
+    # Absolute path, maybe we should make this supported by our which function.
+    return cc_name
+  cc = repository_ctx.which(cc_name)
+  if cc == None:
+    fail(("Cannot find {}, either correct your path or set the {}" +
+          " environment variable").format(target_cc_name, cc_path_envvar))
+  return cc
+
 
 _INC_DIR_MARKER_BEGIN = "#include <...>"
 
@@ -246,80 +269,82 @@ _OSX_FRAMEWORK_SUFFIX = " (framework directory)"
 _OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
 
 def _cxx_inc_convert(path):
-    """Convert path returned by cc -E xc++ in a complete path."""
-    path = path.strip()
-    if path.endswith(_OSX_FRAMEWORK_SUFFIX):
-        path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
-    return path
+  """Convert path returned by cc -E xc++ in a complete path."""
+  path = path.strip()
+  if path.endswith(_OSX_FRAMEWORK_SUFFIX):
+    path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
+  return path
+
 
 def _normalize_include_path(repository_ctx, path):
-    """Normalizes include paths before writing them to the crosstool.
+  """Normalizes include paths before writing them to the crosstool.
 
     If path points inside the 'crosstool' folder of the repository, a relative
     path is returned.
     If path points outside the 'crosstool' folder, an absolute path is returned.
     """
-    path = str(repository_ctx.path(path))
-    crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
+  path = str(repository_ctx.path(path))
+  crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
+
+  if path.startswith(crosstool_folder):
+    # We drop the path to "$REPO/crosstool" and a trailing path separator.
+    return path[len(crosstool_folder) + 1:]
+  return path
 
-    if path.startswith(crosstool_folder):
-        # We drop the path to "$REPO/crosstool" and a trailing path separator.
-        return path[len(crosstool_folder) + 1:]
-    return path
 
 def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
-    """Compute the list of default C or C++ include directories."""
-    if lang_is_cpp:
-        lang = "c++"
-    else:
-        lang = "c"
-    result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
-    index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
-    if index1 == -1:
-        return []
-    index1 = result.stderr.find("\n", index1)
-    if index1 == -1:
-        return []
-    index2 = result.stderr.rfind("\n ")
-    if index2 == -1 or index2 < index1:
-        return []
-    index2 = result.stderr.find("\n", index2 + 1)
-    if index2 == -1:
-        inc_dirs = result.stderr[index1 + 1:]
-    else:
-        inc_dirs = result.stderr[index1 + 1:index2].strip()
+  """Compute the list of default C or C++ include directories."""
+  if lang_is_cpp:
+    lang = "c++"
+  else:
+    lang = "c"
+  result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
+  index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+  if index1 == -1:
+    return []
+  index1 = result.stderr.find("\n", index1)
+  if index1 == -1:
+    return []
+  index2 = result.stderr.rfind("\n ")
+  if index2 == -1 or index2 < index1:
+    return []
+  index2 = result.stderr.find("\n", index2 + 1)
+  if index2 == -1:
+    inc_dirs = result.stderr[index1 + 1:]
+  else:
+    inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+  return [
+      _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
+      for p in inc_dirs.split("\n")
+  ]
 
-    return [
-        _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
-        for p in inc_dirs.split("\n")
-    ]
 
 def get_cxx_inc_directories(repository_ctx, cc):
-    """Compute the list of default C and C++ include directories."""
-
-    # For some reason `clang -xc` sometimes returns include paths that are
-    # different from the ones from `clang -xc++`. (Symlink and a dir)
-    # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
-    includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
-    includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
-
-    includes_cpp_set = depset(includes_cpp)
-    return includes_cpp + [
-        inc
-        for inc in includes_c
-        if inc not in includes_cpp_set
-    ]
+  """Compute the list of default C and C++ include directories."""
+
+  # For some reason `clang -xc` sometimes returns include paths that are
+  # different from the ones from `clang -xc++`. (Symlink and a dir)
+  # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+  includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+  includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+  includes_cpp_set = depset(includes_cpp)
+  return includes_cpp + [
+      inc for inc in includes_c if inc not in includes_cpp_set
+  ]
+
 
 def auto_configure_fail(msg):
-    """Output failure message when cuda configuration fails."""
-    red = "\033[0;31m"
-    no_color = "\033[0m"
-    fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
+  """Output failure message when cuda configuration fails."""
+  red = "\033[0;31m"
+  no_color = "\033[0m"
+  fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
 
 # END cc_configure common functions (see TODO above).
 
 def _host_compiler_includes(repository_ctx, cc):
-    """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+  """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
 
     Args:
       repository_ctx: The repository context.
@@ -330,14 +355,15 @@ def _host_compiler_includes(repository_ctx, cc):
       host compiler include directories, which can be added to the CROSSTOOL
       file.
     """
-    inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
-    inc_entries = []
-    for inc_dir in inc_dirs:
-        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
-    return "\n".join(inc_entries)
+  inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+  inc_entries = []
+  for inc_dir in inc_dirs:
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+  return "\n".join(inc_entries)
+
 
 def _cuda_include_path(repository_ctx, cuda_config):
-    """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
+  """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
 
     Args:
       repository_ctx: The repository context.
@@ -348,39 +374,41 @@ def _cuda_include_path(repository_ctx, cuda_config):
       host compiler include directories, which can be added to the CROSSTOOL
       file.
     """
-    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" %
-                                    (
-                                        cuda_config.cuda_toolkit_path,
-                                        ".exe" if cuda_config.cpu_value == "Windows" else "",
-                                    ))
-    result = repository_ctx.execute([
-        nvcc_path,
-        "-v",
-        "/dev/null",
-        "-o",
-        "/dev/null",
-    ])
-    target_dir = ""
-    for one_line in result.stderr.splitlines():
-        if one_line.startswith("#$ _TARGET_DIR_="):
-            target_dir = (cuda_config.cuda_toolkit_path + "/" +
-                          one_line.replace("#$ _TARGET_DIR_=", "") + "/include")
-    inc_entries = []
-    if target_dir != "":
-        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
-    default_include = cuda_config.cuda_toolkit_path + "/include"
-    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" %
-                       default_include)
-    return "\n".join(inc_entries)
+  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+      cuda_config.cuda_toolkit_path,
+      ".exe" if cuda_config.cpu_value == "Windows" else "",
+  ))
+  result = repository_ctx.execute([
+      nvcc_path,
+      "-v",
+      "/dev/null",
+      "-o",
+      "/dev/null",
+  ])
+  target_dir = ""
+  for one_line in result.stderr.splitlines():
+    if one_line.startswith("#$ _TARGET_DIR_="):
+      target_dir = (
+          cuda_config.cuda_toolkit_path + "/" + one_line.replace(
+              "#$ _TARGET_DIR_=", "") + "/include")
+  inc_entries = []
+  if target_dir != "":
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
+  default_include = cuda_config.cuda_toolkit_path + "/include"
+  inc_entries.append(
+      "  cxx_builtin_include_directory: \"%s\"" % default_include)
+  return "\n".join(inc_entries)
+
 
 def _enable_cuda(repository_ctx):
-    if "TF_NEED_CUDA" in repository_ctx.os.environ:
-        enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
-        return enable_cuda == "1"
-    return False
+  if "TF_NEED_CUDA" in repository_ctx.os.environ:
+    enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
+    return enable_cuda == "1"
+  return False
+
 
-def _cuda_toolkit_path(repository_ctx):
-    """Finds the cuda toolkit directory.
+def cuda_toolkit_path(repository_ctx):
+  """Finds the cuda toolkit directory.
 
     Args:
       repository_ctx: The repository context.
@@ -388,27 +416,31 @@ def _cuda_toolkit_path(repository_ctx):
     Returns:
       A speculative real path of the cuda toolkit install directory.
     """
-    cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
-    if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
-        cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
-    if not repository_ctx.path(cuda_toolkit_path).exists:
-        auto_configure_fail("Cannot find cuda toolkit path.")
-    return str(repository_ctx.path(cuda_toolkit_path).realpath)
+  cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
+  if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
+    cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
+  if not repository_ctx.path(cuda_toolkit_path).exists:
+    auto_configure_fail("Cannot find cuda toolkit path.")
+  return str(repository_ctx.path(cuda_toolkit_path).realpath)
+
 
 def _cudnn_install_basedir(repository_ctx):
-    """Finds the cudnn install directory."""
-    cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
-    if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
-        cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
-    if not repository_ctx.path(cudnn_install_path).exists:
-        auto_configure_fail("Cannot find cudnn install path.")
-    return cudnn_install_path
+  """Finds the cudnn install directory."""
+  cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
+  if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
+    cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
+  if not repository_ctx.path(cudnn_install_path).exists:
+    auto_configure_fail("Cannot find cudnn install path.")
+  return cudnn_install_path
+
 
 def matches_version(environ_version, detected_version):
-    """Checks whether the user-specified version matches the detected version.
+  """Checks whether the user-specified version matches the detected version.
 
-    This function performs a weak matching so that if the user specifies only the
-    major or major and minor versions, the versions are still considered matching
+    This function performs a weak matching so that if the user specifies only
+    the
+    major or major and minor versions, the versions are still considered
+    matching
     if the version parts match. To illustrate:
 
         environ_version  detected_version  result
@@ -424,25 +456,25 @@ def matches_version(environ_version, detected_version):
         variables.
       detected_version: The version autodetected from the CUDA installation on
         the system.
-
     Returns: True if user-specified version matches detected version and False
       otherwise.
-    """
-    environ_version_parts = environ_version.split(".")
-    detected_version_parts = detected_version.split(".")
-    if len(detected_version_parts) < len(environ_version_parts):
-        return False
-    for i, part in enumerate(detected_version_parts):
-        if i >= len(environ_version_parts):
-            break
-        if part != environ_version_parts[i]:
-            return False
-    return True
+  """
+  environ_version_parts = environ_version.split(".")
+  detected_version_parts = detected_version.split(".")
+  if len(detected_version_parts) < len(environ_version_parts):
+    return False
+  for i, part in enumerate(detected_version_parts):
+    if i >= len(environ_version_parts):
+      break
+    if part != environ_version_parts[i]:
+      return False
+  return True
+
 
 _NVCC_VERSION_PREFIX = "Cuda compilation tools, release "
 
 def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
-    """Detects the version of CUDA installed on the system.
+  """Detects the version of CUDA installed on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -452,64 +484,61 @@ def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
       String containing the version of CUDA.
     """
 
-    # Run nvcc --version and find the line containing the CUDA version.
-    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" %
-                                    (
-                                        cuda_toolkit_path,
-                                        ".exe" if cpu_value == "Windows" else "",
-                                    ))
-    if not nvcc_path.exists:
-        auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
-    result = repository_ctx.execute([str(nvcc_path), "--version"])
-    if result.stderr:
-        auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
-    lines = result.stdout.splitlines()
-    version_line = lines[len(lines) - 1]
-    if version_line.find(_NVCC_VERSION_PREFIX) == -1:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-
-    # Parse the CUDA version from the line containing the CUDA version.
-    prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
-    parts = prefix_removed.split(",")
-    if len(parts) != 2 or len(parts[0]) < 2:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-    full_version = parts[1].strip()
-    if full_version.startswith("V"):
-        full_version = full_version[1:]
-
-    # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDA_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        auto_configure_fail(
-            ("CUDA version detected from nvcc (%s) does not match " +
-             "TF_CUDA_VERSION (%s)") % (full_version, environ_version),
-        )
-
-    # We only use the version consisting of the major and minor version numbers.
-    version_parts = full_version.split(".")
-    if len(version_parts) < 2:
-        auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
-    if cpu_value == "Windows":
-        version = "64_%s%s" % (version_parts[0], version_parts[1])
-    else:
-        version = "%s.%s" % (version_parts[0], version_parts[1])
-    return version
+  # Run nvcc --version and find the line containing the CUDA version.
+  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+      cuda_toolkit_path,
+      ".exe" if cpu_value == "Windows" else "",
+  ))
+  if not nvcc_path.exists:
+    auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
+  result = repository_ctx.execute([str(nvcc_path), "--version"])
+  if result.stderr:
+    auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
+  lines = result.stdout.splitlines()
+  version_line = lines[len(lines) - 1]
+  if version_line.find(_NVCC_VERSION_PREFIX) == -1:
+    auto_configure_fail(
+        "Could not parse CUDA version from nvcc --version. Got: %s" %
+        result.stdout,)
+
+  # Parse the CUDA version from the line containing the CUDA version.
+  prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
+  parts = prefix_removed.split(",")
+  if len(parts) != 2 or len(parts[0]) < 2:
+    auto_configure_fail(
+        "Could not parse CUDA version from nvcc --version. Got: %s" %
+        result.stdout,)
+  full_version = parts[1].strip()
+  if full_version.startswith("V"):
+    full_version = full_version[1:]
+
+  # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
+  # match the detected version.
+  environ_version = ""
+  if _TF_CUDA_VERSION in repository_ctx.os.environ:
+    environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
+  if environ_version and not matches_version(environ_version, full_version):
+    auto_configure_fail(
+        ("CUDA version detected from nvcc (%s) does not match " +
+         "TF_CUDA_VERSION (%s)") % (full_version, environ_version),)
+
+  # We only use the version consisting of the major and minor version numbers.
+  version_parts = full_version.split(".")
+  if len(version_parts) < 2:
+    auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
+  if cpu_value == "Windows":
+    version = "64_%s%s" % (version_parts[0], version_parts[1])
+  else:
+    version = "%s.%s" % (version_parts[0], version_parts[1])
+  return version
+
 
 _DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR"
 _DEFINE_CUDNN_MINOR = "#define CUDNN_MINOR"
 _DEFINE_CUDNN_PATCHLEVEL = "#define CUDNN_PATCHLEVEL"
 
 def find_cuda_define(repository_ctx, header_dir, header_file, define):
-    """Returns the value of a #define in a header file.
+  """Returns the value of a #define in a header file.
 
     Greps through a header file and returns the value of the specified #define.
     If the #define is not found, then raise an error.
@@ -524,52 +553,52 @@ def find_cuda_define(repository_ctx, header_dir, header_file, define):
       The value of the #define found in the header.
     """
 
-    # Confirm location of the header and grep for the line defining the macro.
-    h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
-    if not h_path.exists:
-        auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
-    result = repository_ctx.execute(
-        # Grep one more lines as some #defines are splitted into two lines.
-        ["grep", "--color=never", "-A1", "-E", define, str(h_path)],
-    )
-    if result.stderr:
-        auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
-
-    # Parse the version from the line defining the macro.
-    if result.stdout.find(define) == -1:
-        auto_configure_fail("Cannot find line containing '%s' in %s" %
-                            (define, h_path))
-
-    # Split results to lines
-    lines = result.stdout.split("\n")
-    num_lines = len(lines)
-    for l in range(num_lines):
-        line = lines[l]
-        if define in line:  # Find the line with define
-            version = line
-            if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
-                version = version[:-1] + lines[l + 1]
-            break
-
-    # Remove any comments
-    version = version.split("//")[0]
-
-    # Remove define name
-    version = version.replace(define, "").strip()
-
-    # Remove the code after the version number.
-    version_end = version.find(" ")
-    if version_end != -1:
-        if version_end == 0:
-            auto_configure_fail(
-                "Cannot extract the version from line containing '%s' in %s" %
-                (define, str(h_path)),
-            )
-        version = version[:version_end].strip()
-    return version
+  # Confirm location of the header and grep for the line defining the macro.
+  h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
+  if not h_path.exists:
+    auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
+  result = repository_ctx.execute(
+      # Grep one more lines as some #defines are splitted into two lines.
+      ["grep", "--color=never", "-A1", "-E", define,
+       str(h_path)],)
+  if result.stderr:
+    auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
+
+  # Parse the version from the line defining the macro.
+  if result.stdout.find(define) == -1:
+    auto_configure_fail(
+        "Cannot find line containing '%s' in %s" % (define, h_path))
+
+  # Split results to lines
+  lines = result.stdout.split("\n")
+  num_lines = len(lines)
+  for l in range(num_lines):
+    line = lines[l]
+    if define in line:  # Find the line with define
+      version = line
+      if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
+        version = version[:-1] + lines[l + 1]
+      break
+
+  # Remove any comments
+  version = version.split("//")[0]
+
+  # Remove define name
+  version = version.replace(define, "").strip()
+
+  # Remove the code after the version number.
+  version_end = version.find(" ")
+  if version_end != -1:
+    if version_end == 0:
+      auto_configure_fail(
+          "Cannot extract the version from line containing '%s' in %s" %
+          (define, str(h_path)),)
+    version = version[:version_end].strip()
+  return version
+
 
 def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
-    """Detects the version of cuDNN installed on the system.
+  """Detects the version of cuDNN installed on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -579,68 +608,68 @@ def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
     Returns:
       A string containing the version of cuDNN.
     """
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cudnn_install_basedir,
-    )
-    major_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MAJOR,
-    )
-    minor_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MINOR,
-    )
-    patch_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_PATCHLEVEL,
-    )
-    full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-
-    # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDNN_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        cudnn_h_path = repository_ctx.path("%s/include/cudnn.h" %
-                                           cudnn_install_basedir)
-        auto_configure_fail(
-            ("cuDNN version detected from %s (%s) does not match " +
-             "TF_CUDNN_VERSION (%s)") %
-            (str(cudnn_h_path), full_version, environ_version),
-        )
-
-    # We only use the major version since we use the libcudnn libraries that are
-    # only versioned with the major version (e.g. libcudnn.so.5).
-    version = major_version
-    if cpu_value == "Windows":
-        version = "64_" + version
-    return version
-
-def _compute_capabilities(repository_ctx):
-    """Returns a list of strings representing cuda compute capabilities."""
-    if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
-        return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-    capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
-    capabilities = capabilities_str.split(",")
-    for capability in capabilities:
-        # Workaround for Skylark's lack of support for regex. This check should
-        # be equivalent to checking:
-        #     if re.match("[0-9]+.[0-9]+", capability) == None:
-        parts = capability.split(".")
-        if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
-            auto_configure_fail("Invalid compute capability: %s" % capability)
-    return capabilities
+  cudnn_header_dir = _find_cudnn_header_dir(
+      repository_ctx,
+      cudnn_install_basedir,
+  )
+  major_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_MAJOR,
+  )
+  minor_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_MINOR,
+  )
+  patch_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_PATCHLEVEL,
+  )
+  full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+
+  # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
+  # match the detected version.
+  environ_version = ""
+  if _TF_CUDNN_VERSION in repository_ctx.os.environ:
+    environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
+  if environ_version and not matches_version(environ_version, full_version):
+    cudnn_h_path = repository_ctx.path(
+        "%s/include/cudnn.h" % cudnn_install_basedir)
+    auto_configure_fail(("cuDNN version detected from %s (%s) does not match " +
+                         "TF_CUDNN_VERSION (%s)") %
+                        (str(cudnn_h_path), full_version, environ_version),)
+
+  # We only use the major version since we use the libcudnn libraries that are
+  # only versioned with the major version (e.g. libcudnn.so.5).
+  version = major_version
+  if cpu_value == "Windows":
+    version = "64_" + version
+  return version
+
+
+def compute_capabilities(repository_ctx):
+  """Returns a list of strings representing cuda compute capabilities."""
+  if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
+    return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+  capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
+  capabilities = capabilities_str.split(",")
+  for capability in capabilities:
+    # Workaround for Skylark's lack of support for regex. This check should
+    # be equivalent to checking:
+    #     if re.match("[0-9]+.[0-9]+", capability) == None:
+    parts = capability.split(".")
+    if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
+      auto_configure_fail("Invalid compute capability: %s" % capability)
+  return capabilities
+
 
 def get_cpu_value(repository_ctx):
-    """Returns the name of the host operating system.
+  """Returns the name of the host operating system.
 
     Args:
       repository_ctx: The repository context.
@@ -648,20 +677,22 @@ def get_cpu_value(repository_ctx):
     Returns:
       A string containing the name of the host operating system.
     """
-    os_name = repository_ctx.os.name.lower()
-    if os_name.startswith("mac os"):
-        return "Darwin"
-    if os_name.find("windows") != -1:
-        return "Windows"
-    result = repository_ctx.execute(["uname", "-s"])
-    return result.stdout.strip()
+  os_name = repository_ctx.os.name.lower()
+  if os_name.startswith("mac os"):
+    return "Darwin"
+  if os_name.find("windows") != -1:
+    return "Windows"
+  result = repository_ctx.execute(["uname", "-s"])
+  return result.stdout.strip()
+
 
 def _is_windows(repository_ctx):
-    """Returns true if the host operating system is windows."""
-    return get_cpu_value(repository_ctx) == "Windows"
+  """Returns true if the host operating system is windows."""
+  return get_cpu_value(repository_ctx) == "Windows"
+
 
 def _lib_name(lib, cpu_value, version = "", static = False):
-    """Constructs the platform-specific name of a library.
+  """Constructs the platform-specific name of a library.
 
     Args:
       lib: The name of the library, such as "cudart"
@@ -672,23 +703,24 @@ def _lib_name(lib, cpu_value, version = "", static = False):
     Returns:
       The platform-specific name of the library.
     """
-    if cpu_value in ("Linux", "FreeBSD"):
-        if static:
-            return "lib%s.a" % lib
-        else:
-            if version:
-                version = ".%s" % version
-            return "lib%s.so%s" % (lib, version)
-    elif cpu_value == "Windows":
-        return "%s.lib" % lib
-    elif cpu_value == "Darwin":
-        if static:
-            return "lib%s.a" % lib
-        elif version:
-            version = ".%s" % version
-        return "lib%s%s.dylib" % (lib, version)
+  if cpu_value in ("Linux", "FreeBSD"):
+    if static:
+      return "lib%s.a" % lib
     else:
-        auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+      if version:
+        version = ".%s" % version
+      return "lib%s.so%s" % (lib, version)
+  elif cpu_value == "Windows":
+    return "%s.lib" % lib
+  elif cpu_value == "Darwin":
+    if static:
+      return "lib%s.a" % lib
+    elif version:
+      version = ".%s" % version
+    return "lib%s%s.dylib" % (lib, version)
+  else:
+    auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
 
 def _find_cuda_lib(
         lib,
@@ -697,7 +729,7 @@ def _find_cuda_lib(
         basedir,
         version = "",
         static = False):
-    """Finds the given CUDA or cuDNN library on the system.
+  """Finds the given CUDA or cuDNN library on the system.
 
     Args:
       lib: The name of the library, such as "cudart"
@@ -712,15 +744,16 @@ def _find_cuda_lib(
         file_name: The basename of the library found on the system.
         path: The full path to the library.
     """
-    file_name = _lib_name(lib, cpu_value, version, static)
-    for relative_path in CUDA_LIB_PATHS:
-        path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name))
-        if path.exists:
-            return struct(file_name = file_name, path = str(path.realpath))
-    auto_configure_fail("Cannot find cuda library %s" % file_name)
+  file_name = _lib_name(lib, cpu_value, version, static)
+  for relative_path in CUDA_LIB_PATHS:
+    path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+  auto_configure_fail("Cannot find cuda library %s" % file_name)
+
 
 def _find_cupti_header_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cupti.h
+  """Returns the path to the directory containing cupti.h
 
     On most systems, the cupti library is not installed in the same directory as
     the other CUDA libraries but rather in a special extras/CUPTI directory.
@@ -732,14 +765,17 @@ def _find_cupti_header_dir(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the cupti header.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUPTI_HEADER_PATHS:
-        if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cupti.h under %s" % ", ".join([cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUPTI_HEADER_PATHS:
+    if repository_ctx.path(
+        "%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
+      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail("Cannot find cupti.h under %s" % ", ".join(
+      [cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
+
 
 def _find_cupti_lib(repository_ctx, cuda_config):
-    """Finds the cupti library on the system.
+  """Finds the cupti library on the system.
 
     On most systems, the cupti library is not installed in the same directory as
     the other CUDA libraries but rather in a special extras/CUPTI directory.
@@ -753,23 +789,23 @@ def _find_cupti_lib(repository_ctx, cuda_config):
         file_name: The basename of the library found on the system.
         path: The full path to the library.
     """
-    file_name = _lib_name(
-        "cupti",
-        cuda_config.cpu_value,
-        cuda_config.cuda_version,
-    )
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUPTI_LIB_PATHS:
-        path = repository_ctx.path(
-            "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name),
-        )
-        if path.exists:
-            return struct(file_name = file_name, path = str(path.realpath))
+  file_name = _lib_name(
+      "cupti",
+      cuda_config.cpu_value,
+      cuda_config.cuda_version,
+  )
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUPTI_LIB_PATHS:
+    path = repository_ctx.path(
+        "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name),)
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+
+  auto_configure_fail("Cannot find cupti library %s" % file_name)
 
-    auto_configure_fail("Cannot find cupti library %s" % file_name)
 
 def _find_libs(repository_ctx, cuda_config):
-    """Returns the CUDA and cuDNN libraries on the system.
+  """Returns the CUDA and cuDNN libraries on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -778,64 +814,75 @@ def _find_libs(repository_ctx, cuda_config):
     Returns:
       Map of library names to structs of filename and path.
     """
-    cpu_value = cuda_config.cpu_value
-    return {
-        "cuda": _find_cuda_lib("cuda", repository_ctx, cpu_value, cuda_config.cuda_toolkit_path),
-        "cudart": _find_cuda_lib(
-            "cudart",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cudart_static": _find_cuda_lib(
-            "cudart_static",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-            static = True,
-        ),
-        "cublas": _find_cuda_lib(
-            "cublas",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cusolver": _find_cuda_lib(
-            "cusolver",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "curand": _find_cuda_lib(
-            "curand",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cufft": _find_cuda_lib(
-            "cufft",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cudnn": _find_cuda_lib(
-            "cudnn",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cudnn_install_basedir,
-            cuda_config.cudnn_version,
-        ),
-        "cupti": _find_cupti_lib(repository_ctx, cuda_config),
-    }
+  cpu_value = cuda_config.cpu_value
+  return {
+      "cuda":
+          _find_cuda_lib("cuda", repository_ctx, cpu_value,
+                         cuda_config.cuda_toolkit_path),
+      "cudart":
+          _find_cuda_lib(
+              "cudart",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cudart_static":
+          _find_cuda_lib(
+              "cudart_static",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+              static=True,
+          ),
+      "cublas":
+          _find_cuda_lib(
+              "cublas",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cusolver":
+          _find_cuda_lib(
+              "cusolver",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "curand":
+          _find_cuda_lib(
+              "curand",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cufft":
+          _find_cuda_lib(
+              "cufft",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cudnn":
+          _find_cuda_lib(
+              "cudnn",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cudnn_install_basedir,
+              cuda_config.cudnn_version,
+          ),
+      "cupti":
+          _find_cupti_lib(repository_ctx, cuda_config),
+  }
+
 
 def _find_cuda_include_path(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cuda.h
+  """Returns the path to the directory containing cuda.h
 
     Args:
       repository_ctx: The repository context.
@@ -844,14 +891,16 @@ def _find_cuda_include_path(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the CUDA headers.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path("%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUDA_INCLUDE_PATHS:
+    if repository_ctx.path(
+        "%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists:
+      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
+
 
 def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
-    """Returns the path to the directory containing cudnn.h
+  """Returns the path to the directory containing cudnn.h
 
     Args:
       repository_ctx: The repository context.
@@ -861,15 +910,17 @@ def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
     Returns:
       The path of the directory containing the cudnn header.
     """
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path("%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists:
-            return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
-    if repository_ctx.path("/usr/include/cudnn.h").exists:
-        return "/usr/include"
-    auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+  for relative_path in CUDA_INCLUDE_PATHS:
+    if repository_ctx.path(
+        "%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists:
+      return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
+  if repository_ctx.path("/usr/include/cudnn.h").exists:
+    return "/usr/include"
+  auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+
 
 def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing libdevice in bitcode format.
+  """Returns the path to the directory containing libdevice in bitcode format.
 
     Args:
       repository_ctx: The repository context.
@@ -878,19 +929,23 @@ def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the CUDA headers.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for libdevice_file in NVVM_LIBDEVICE_FILES:
-        for relative_path in NVVM_LIBDEVICE_PATHS:
-            if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path, libdevice_file)).exists:
-                return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find libdevice*.bc files under %s" % cuda_toolkit_path)
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for libdevice_file in NVVM_LIBDEVICE_FILES:
+    for relative_path in NVVM_LIBDEVICE_PATHS:
+      if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path,
+                                          libdevice_file)).exists:
+        return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail(
+      "Cannot find libdevice*.bc files under %s" % cuda_toolkit_path)
+
 
 def _cudart_static_linkopt(cpu_value):
-    """Returns additional platform-specific linkopts for cudart."""
-    return "" if cpu_value == "Darwin" else "\"-lrt\","
+  """Returns additional platform-specific linkopts for cudart."""
+  return "" if cpu_value == "Darwin" else "\"-lrt\","
+
 
 def _get_cuda_config(repository_ctx):
-    """Detects and returns information about the CUDA installation on the system.
+  """Detects and returns information about the CUDA installation on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -904,35 +959,39 @@ def _get_cuda_config(repository_ctx):
         compute_capabilities: A list of the system's CUDA compute capabilities.
         cpu_value: The name of the host operating system.
     """
-    cpu_value = get_cpu_value(repository_ctx)
-    cuda_toolkit_path = _cuda_toolkit_path(repository_ctx)
-    cuda_version = _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value)
-    cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
-    cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value)
-    return struct(
-        cuda_toolkit_path = cuda_toolkit_path,
-        cudnn_install_basedir = cudnn_install_basedir,
-        cuda_version = cuda_version,
-        cudnn_version = cudnn_version,
-        compute_capabilities = _compute_capabilities(repository_ctx),
-        cpu_value = cpu_value,
-    )
+  cpu_value = get_cpu_value(repository_ctx)
+  toolkit_path = cuda_toolkit_path(repository_ctx)
+  cuda_version = _cuda_version(repository_ctx, toolkit_path, cpu_value)
+  cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
+  cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir,
+                                 cpu_value)
+  return struct(
+      cuda_toolkit_path=toolkit_path,
+      cudnn_install_basedir=cudnn_install_basedir,
+      cuda_version=cuda_version,
+      cudnn_version=cudnn_version,
+      compute_capabilities=compute_capabilities(repository_ctx),
+      cpu_value=cpu_value,
+  )
+
 
 def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
-    if not out:
-        out = tpl.replace(":", "/")
-    repository_ctx.template(
-        out,
-        Label("//third_party/gpus/%s.tpl" % tpl),
-        substitutions,
-    )
+  if not out:
+    out = tpl.replace(":", "/")
+  repository_ctx.template(
+      out,
+      Label("//third_party/gpus/%s.tpl" % tpl),
+      substitutions,
+  )
+
 
 def _file(repository_ctx, label):
-    repository_ctx.template(
-        label.replace(":", "/"),
-        Label("//third_party/gpus/%s.tpl" % label),
-        {},
-    )
+  repository_ctx.template(
+      label.replace(":", "/"),
+      Label("//third_party/gpus/%s.tpl" % label),
+      {},
+  )
+
 
 _DUMMY_CROSSTOOL_BZL_FILE = """
 def error_gpu_disabled():
@@ -960,81 +1019,99 @@ error_gpu_disabled()
 """
 
 def _create_dummy_repository(repository_ctx):
-    cpu_value = get_cpu_value(repository_ctx)
+  cpu_value = get_cpu_value(repository_ctx)
+
+  # Set up BUILD file for cuda/.
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}": "False",
+          "%{cuda_extra_copts}": "[]",
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:BUILD",
+      {
+          "%{cuda_driver_lib}":
+              _lib_name("cuda", cpu_value),
+          "%{cudart_static_lib}":
+              _lib_name(
+                  "cudart_static",
+                  cpu_value,
+                  static=True,
+              ),
+          "%{cudart_static_linkopt}":
+              _cudart_static_linkopt(cpu_value),
+          "%{cudart_lib}":
+              _lib_name("cudart", cpu_value),
+          "%{cublas_lib}":
+              _lib_name("cublas", cpu_value),
+          "%{cusolver_lib}":
+              _lib_name("cusolver", cpu_value),
+          "%{cudnn_lib}":
+              _lib_name("cudnn", cpu_value),
+          "%{cufft_lib}":
+              _lib_name("cufft", cpu_value),
+          "%{curand_lib}":
+              _lib_name("curand", cpu_value),
+          "%{cupti_lib}":
+              _lib_name("cupti", cpu_value),
+          "%{cuda_include_genrules}":
+              "",
+          "%{cuda_headers}":
+              "",
+      },
+  )
 
-    # Set up BUILD file for cuda/.
-    _tpl(
-        repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "False",
-            "%{cuda_extra_copts}": "[]",
-        },
-    )
-    _tpl(
-        repository_ctx,
-        "cuda:BUILD",
-        {
-            "%{cuda_driver_lib}": _lib_name("cuda", cpu_value),
-            "%{cudart_static_lib}": _lib_name(
-                "cudart_static",
-                cpu_value,
-                static = True,
-            ),
-            "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value),
-            "%{cudart_lib}": _lib_name("cudart", cpu_value),
-            "%{cublas_lib}": _lib_name("cublas", cpu_value),
-            "%{cusolver_lib}": _lib_name("cusolver", cpu_value),
-            "%{cudnn_lib}": _lib_name("cudnn", cpu_value),
-            "%{cufft_lib}": _lib_name("cufft", cpu_value),
-            "%{curand_lib}": _lib_name("curand", cpu_value),
-            "%{cupti_lib}": _lib_name("cupti", cpu_value),
-            "%{cuda_include_genrules}": "",
-            "%{cuda_headers}": "",
-        },
-    )
+  # Create dummy files for the CUDA toolkit since they are still required by
+  # tensorflow/core/platform/default/build_config:cuda.
+  repository_ctx.file("cuda/cuda/include/cuda.h", "")
+  repository_ctx.file("cuda/cuda/include/cublas.h", "")
+  repository_ctx.file("cuda/cuda/include/cudnn.h", "")
+  repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "")
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value))
+  repository_ctx.file(
+      "cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value))
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(
+      repository_ctx,
+      "cuda:cuda_config.h",
+      {
+          "%{cuda_version}":
+              _DEFAULT_CUDA_VERSION,
+          "%{cudnn_version}":
+              _DEFAULT_CUDNN_VERSION,
+          "%{cuda_compute_capabilities}":
+              ",".join([
+                  "CudaVersion(\"%s\")" % c
+                  for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+              ]),
+          "%{cuda_toolkit_path}":
+              _DEFAULT_CUDA_TOOLKIT_PATH,
+      },
+      "cuda/cuda/cuda_config.h",
+  )
 
-    # Create dummy files for the CUDA toolkit since they are still required by
-    # tensorflow/core/platform/default/build_config:cuda.
-    repository_ctx.file("cuda/cuda/include/cuda.h", "")
-    repository_ctx.file("cuda/cuda/include/cublas.h", "")
-    repository_ctx.file("cuda/cuda/include/cudnn.h", "")
-    repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "")
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value))
-
-    # Set up cuda_config.h, which is used by
-    # tensorflow/stream_executor/dso_loader.cc.
-    _tpl(
-        repository_ctx,
-        "cuda:cuda_config.h",
-        {
-            "%{cuda_version}": _DEFAULT_CUDA_VERSION,
-            "%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
-            "%{cuda_compute_capabilities}": ",".join([
-                "CudaVersion(\"%s\")" % c
-                for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-            ]),
-            "%{cuda_toolkit_path}": _DEFAULT_CUDA_TOOLKIT_PATH,
-        },
-        "cuda/cuda/cuda_config.h",
-    )
+  # If cuda_configure is not configured to build with GPU support, and the user
+  # attempts to build with --config=cuda, add a dummy build rule to intercept
+  # this and fail with an actionable error message.
+  repository_ctx.file(
+      "crosstool/error_gpu_disabled.bzl",
+      _DUMMY_CROSSTOOL_BZL_FILE,
+  )
+  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
-    # If cuda_configure is not configured to build with GPU support, and the user
-    # attempts to build with --config=cuda, add a dummy build rule to intercept
-    # this and fail with an actionable error message.
-    repository_ctx.file(
-        "crosstool/error_gpu_disabled.bzl",
-        _DUMMY_CROSSTOOL_BZL_FILE,
-    )
-    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
 def _execute(
         repository_ctx,
@@ -1042,35 +1119,35 @@ def _execute(
         error_msg = None,
         error_details = None,
         empty_stdout_fine = False):
-    """Executes an arbitrary shell command.
+  """Executes an arbitrary shell command.
 
     Args:
       repository_ctx: the repository_ctx object
       cmdline: list of strings, the command to execute
       error_msg: string, a summary of the error if the command fails
       error_details: string, details about the error or steps to fix it
-      empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
-        it's an error
-    Return:
-      the result of repository_ctx.execute(cmdline)
-    """
-    result = repository_ctx.execute(cmdline)
-    if result.stderr or not (empty_stdout_fine or result.stdout):
-        auto_configure_fail(
-            "\n".join([
-                error_msg.strip() if error_msg else "Repository command failed",
-                result.stderr.strip(),
-                error_details if error_details else "",
-            ]),
-        )
-    return result
+      empty_stdout_fine: bool, if True, an empty stdout result is fine,
+        otherwise it's an error
+    Return: the result of repository_ctx.execute(cmdline)
+  """
+  result = repository_ctx.execute(cmdline)
+  if result.stderr or not (empty_stdout_fine or result.stdout):
+    auto_configure_fail(
+        "\n".join([
+            error_msg.strip() if error_msg else "Repository command failed",
+            result.stderr.strip(),
+            error_details if error_details else "",
+        ]),)
+  return result
+
 
 def _norm_path(path):
-    """Returns a path with '/' and remove the trailing slash."""
-    path = path.replace("\\", "/")
-    if path[-1] == "/":
-        path = path[:-1]
-    return path
+  """Returns a path with '/' and remove the trailing slash."""
+  path = path.replace("\\", "/")
+  if path[-1] == "/":
+    path = path[:-1]
+  return path
+
 
 def symlink_genrule_for_dir(
         repository_ctx,
@@ -1079,167 +1156,174 @@ def symlink_genrule_for_dir(
         genrule_name,
         src_files = [],
         dest_files = []):
-    """Returns a genrule to symlink(or copy if on Windows) a set of files.
+  """Returns a genrule to symlink(or copy if on Windows) a set of files.
 
     If src_dir is passed, files will be read from the given directory; otherwise
     we assume files are in src_files and dest_files
     """
-    if src_dir != None:
-        src_dir = _norm_path(src_dir)
-        dest_dir = _norm_path(dest_dir)
-        files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
-
-        # Create a list with the src_dir stripped to use for outputs.
-        dest_files = files.replace(src_dir, "").splitlines()
-        src_files = files.splitlines()
-    command = []
-    if not _is_windows(repository_ctx):
-        # We clear folders that might have been generated previously to avoid
-        # undesired inclusions
-        command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi')
-        command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
-        command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
-        command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi')
-    outs = []
-    for i in range(len(dest_files)):
-        if dest_files[i] != "":
-            # If we have only one file to link we do not want to use the dest_dir, as
-            # $(@D) will include the full path to the file.
-            dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
-
-            # Copy the headers to create a sandboxable setup.
-            cmd = "cp -f"
-            command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
-            outs.append('        "' + dest_dir + dest_files[i] + '",')
-    genrule = _genrule(
-        src_dir,
-        genrule_name,
-        " && ".join(command),
-        "\n".join(outs),
-    )
-    return genrule
+  if src_dir != None:
+    src_dir = _norm_path(src_dir)
+    dest_dir = _norm_path(dest_dir)
+    files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
+
+    # Create a list with the src_dir stripped to use for outputs.
+    dest_files = files.replace(src_dir, "").splitlines()
+    src_files = files.splitlines()
+  command = []
+  if not _is_windows(repository_ctx):
+    # We clear folders that might have been generated previously to avoid
+    # undesired inclusions
+    command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi')
+    command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+    command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+    command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi')
+  outs = []
+  for i in range(len(dest_files)):
+    if dest_files[i] != "":
+      # If we have only one file to link we do not want to use the dest_dir, as
+      # $(@D) will include the full path to the file.
+      dest = "$(@D)/" + dest_dir + dest_files[i] if len(
+          dest_files) != 1 else "$(@D)/" + dest_files[i]
+
+      # Copy the headers to create a sandboxable setup.
+      cmd = "cp -f"
+      command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
+      outs.append('        "' + dest_dir + dest_files[i] + '",')
+  genrule = _genrule(
+      src_dir,
+      genrule_name,
+      " && ".join(command),
+      "\n".join(outs),
+  )
+  return genrule
+
 
 def _genrule(src_dir, genrule_name, command, outs):
-    """Returns a string with a genrule.
+  """Returns a string with a genrule.
 
     Genrule executes the given command and produces the given outputs.
     """
-    return (
-        "genrule(\n" +
-        '    name = "' +
-        genrule_name + '",\n' +
-        "    outs = [\n" +
-        outs +
-        "\n    ],\n" +
-        '    cmd = """\n' +
-        command +
-        '\n   """,\n' +
-        ")\n"
-    )
+  return (
+      "genrule(\n" + '    name = "' + genrule_name + '",\n' + "    outs = [\n" +
+      outs + "\n    ],\n" + '    cmd = """\n' + command + '\n   """,\n' + ")\n")
+
 
 def _read_dir(repository_ctx, src_dir):
-    """Returns a string with all files in a directory.
+  """Returns a string with all files in a directory.
 
     Finds all files inside a directory, traversing subfolders and following
     symlinks. The returned string contains the full path of all files
     separated by line breaks.
     """
-    if _is_windows(repository_ctx):
-        src_dir = src_dir.replace("/", "\\")
-        find_result = _execute(
-            repository_ctx,
-            ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
-            empty_stdout_fine = True,
-        )
+  if _is_windows(repository_ctx):
+    src_dir = src_dir.replace("/", "\\")
+    find_result = _execute(
+        repository_ctx,
+        ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
+        empty_stdout_fine=True,
+    )
+
+    # src_files will be used in genrule.outs where the paths must
+    # use forward slashes.
+    result = find_result.stdout.replace("\\", "/")
+  else:
+    find_result = _execute(
+        repository_ctx,
+        ["find", src_dir, "-follow", "-type", "f"],
+        empty_stdout_fine=True,
+    )
+    result = find_result.stdout
+  return result
 
-        # src_files will be used in genrule.outs where the paths must
-        # use forward slashes.
-        result = find_result.stdout.replace("\\", "/")
-    else:
-        find_result = _execute(
-            repository_ctx,
-            ["find", src_dir, "-follow", "-type", "f"],
-            empty_stdout_fine = True,
-        )
-        result = find_result.stdout
-    return result
 
 def _flag_enabled(repository_ctx, flag_name):
-    if flag_name in repository_ctx.os.environ:
-        value = repository_ctx.os.environ[flag_name].strip()
-        return value == "1"
-    return False
+  if flag_name in repository_ctx.os.environ:
+    value = repository_ctx.os.environ[flag_name].strip()
+    return value == "1"
+  return False
+
 
 def _use_cuda_clang(repository_ctx):
-    return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
+  return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
+
 
 def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
-    if _use_cuda_clang(repository_ctx):
-        capability_flags = ["--cuda-gpu-arch=sm_" +
-                            cap.replace(".", "") for cap in compute_capabilities]
-    else:
-        # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
-        capability_flags = []
-    return str(capability_flags)
+  if _use_cuda_clang(repository_ctx):
+    capability_flags = [
+        "--cuda-gpu-arch=sm_" + cap.replace(".", "")
+        for cap in compute_capabilities
+    ]
+  else:
+    # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
+    # TODO(csigg): Make this consistent with cuda clang and pass to crosstool.
+    capability_flags = []
+  return str(capability_flags)
+
 
 def _create_local_cuda_repository(repository_ctx):
-    """Creates the repository containing files set up to build with CUDA."""
-    cuda_config = _get_cuda_config(repository_ctx)
+  """Creates the repository containing files set up to build with CUDA."""
+  cuda_config = _get_cuda_config(repository_ctx)
 
-    cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cuda_config.cudnn_install_basedir,
-    )
-    cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
-    nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
-
-    # Set up symbolic links for the cuda toolkit by creating genrules to do
-    # symlinking. We create one genrule for each directory we want to track under
-    # cuda_toolkit_path
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    genrules = [symlink_genrule_for_dir(
-        repository_ctx,
-        cuda_include_path,
-        "cuda/include",
-        "cuda-include",
-    )]
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        nvvm_libdevice_dir,
-        "cuda/nvvm/libdevice",
-        "cuda-nvvm",
-    ))
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        cupti_header_dir,
-        "cuda/extras/CUPTI/include",
-        "cuda-extras",
-    ))
-
-    cuda_libs = _find_libs(repository_ctx, cuda_config)
-    cuda_lib_src = []
-    cuda_lib_dest = []
-    for lib in cuda_libs.values():
-        cuda_lib_src.append(lib.path)
-        cuda_lib_dest.append("cuda/lib/" + lib.file_name)
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        None,
-        "",
-        "cuda-lib",
-        cuda_lib_src,
-        cuda_lib_dest,
-    ))
-
-    # Set up the symbolic links for cudnn if cndnn was not installed to
-    # CUDA_TOOLKIT_PATH.
-    included_files = _read_dir(repository_ctx, cuda_include_path).replace(
-        cuda_include_path,
-        "",
-    ).splitlines()
-    if "/cudnn.h" not in included_files:
-        genrules.append(symlink_genrule_for_dir(
+  cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
+  cudnn_header_dir = _find_cudnn_header_dir(
+      repository_ctx,
+      cuda_config.cudnn_install_basedir,
+  )
+  cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
+  nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
+
+  # Set up symbolic links for the cuda toolkit by creating genrules to do
+  # symlinking. We create one genrule for each directory we want to track under
+  # cuda_toolkit_path
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  genrules = [
+      symlink_genrule_for_dir(
+          repository_ctx,
+          cuda_include_path,
+          "cuda/include",
+          "cuda-include",
+      )
+  ]
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          nvvm_libdevice_dir,
+          "cuda/nvvm/libdevice",
+          "cuda-nvvm",
+      ))
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          cupti_header_dir,
+          "cuda/extras/CUPTI/include",
+          "cuda-extras",
+      ))
+
+  cuda_libs = _find_libs(repository_ctx, cuda_config)
+  cuda_lib_src = []
+  cuda_lib_dest = []
+  for lib in cuda_libs.values():
+    cuda_lib_src.append(lib.path)
+    cuda_lib_dest.append("cuda/lib/" + lib.file_name)
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          None,
+          "",
+          "cuda-lib",
+          cuda_lib_src,
+          cuda_lib_dest,
+      ))
+
+  # Set up the symbolic links for cudnn if cndnn was not installed to
+  # CUDA_TOOLKIT_PATH.
+  included_files = _read_dir(repository_ctx, cuda_include_path).replace(
+      cuda_include_path,
+      "",
+  ).splitlines()
+  if "/cudnn.h" not in included_files:
+    genrules.append(
+        symlink_genrule_for_dir(
             repository_ctx,
             None,
             "cuda/include/",
@@ -1247,204 +1331,229 @@ def _create_local_cuda_repository(repository_ctx):
             [cudnn_header_dir + "/cudnn.h"],
             ["cudnn.h"],
         ))
-    else:
-        genrules.append(
-            "filegroup(\n" +
-            '    name = "cudnn-include",\n' +
-            "    srcs = [],\n" +
-            ")\n",
-        )
-
-    # Set up BUILD file for cuda/
-    _tpl(
-        repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "True",
-            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
-                repository_ctx,
-                cuda_config.compute_capabilities,
-            ),
-        },
-    )
-    _tpl(
-        repository_ctx,
-        "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
-        {
-            "%{cuda_driver_lib}": cuda_libs["cuda"].file_name,
-            "%{cudart_static_lib}": cuda_libs["cudart_static"].file_name,
-            "%{cudart_static_linkopt}": _cudart_static_linkopt(
-                cuda_config.cpu_value,
-            ),
-            "%{cudart_lib}": cuda_libs["cudart"].file_name,
-            "%{cublas_lib}": cuda_libs["cublas"].file_name,
-            "%{cusolver_lib}": cuda_libs["cusolver"].file_name,
-            "%{cudnn_lib}": cuda_libs["cudnn"].file_name,
-            "%{cufft_lib}": cuda_libs["cufft"].file_name,
-            "%{curand_lib}": cuda_libs["curand"].file_name,
-            "%{cupti_lib}": cuda_libs["cupti"].file_name,
-            "%{cuda_include_genrules}": "\n".join(genrules),
-            "%{cuda_headers}": ('":cuda-include",\n' +
-                                '        ":cudnn-include",'),
-        },
-        "cuda/BUILD",
-    )
-
-    is_cuda_clang = _use_cuda_clang(repository_ctx)
+  else:
+    genrules.append(
+        "filegroup(\n" + '    name = "cudnn-include",\n' + "    srcs = [],\n" +
+        ")\n",)
+
+  # Set up BUILD file for cuda/
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}":
+              "True",
+          "%{cuda_extra_copts}":
+              _compute_cuda_extra_copts(
+                  repository_ctx,
+                  cuda_config.compute_capabilities,
+              ),
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
+      {
+          "%{cuda_driver_lib}":
+              cuda_libs["cuda"].file_name,
+          "%{cudart_static_lib}":
+              cuda_libs["cudart_static"].file_name,
+          "%{cudart_static_linkopt}":
+              _cudart_static_linkopt(cuda_config.cpu_value,),
+          "%{cudart_lib}":
+              cuda_libs["cudart"].file_name,
+          "%{cublas_lib}":
+              cuda_libs["cublas"].file_name,
+          "%{cusolver_lib}":
+              cuda_libs["cusolver"].file_name,
+          "%{cudnn_lib}":
+              cuda_libs["cudnn"].file_name,
+          "%{cufft_lib}":
+              cuda_libs["cufft"].file_name,
+          "%{curand_lib}":
+              cuda_libs["curand"].file_name,
+          "%{cupti_lib}":
+              cuda_libs["cupti"].file_name,
+          "%{cuda_include_genrules}":
+              "\n".join(genrules),
+          "%{cuda_headers}": ('":cuda-include",\n' + '        ":cudnn-include",'
+                             ),
+      },
+      "cuda/BUILD",
+  )
 
-    should_download_clang = is_cuda_clang and _flag_enabled(
-        repository_ctx,
-        _TF_DOWNLOAD_CLANG,
-    )
-    if should_download_clang:
-        download_clang(repository_ctx, "crosstool/extra_tools")
-
-    # Set up crosstool/
-    cc = find_cc(repository_ctx)
-    cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
-
-    host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
-    cuda_defines = {}
-    # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
-    # https://github.com/bazelbuild/bazel/issues/760).
-    # However, this stops our custom clang toolchain from picking the provided
-    # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
-    # toolchain.
-    # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
-    #       flag from the CROSSTOOL completely (see
-    #       https://github.com/bazelbuild/bazel/issues/5634)
-    if should_download_clang:
-      cuda_defines["%{linker_bin_path_flag}"] = ""
-    else:
-      cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+  is_cuda_clang = _use_cuda_clang(repository_ctx)
 
-    if is_cuda_clang:
-        cuda_defines["%{host_compiler_path}"] = str(cc)
-        cuda_defines["%{host_compiler_warnings}"] = """
+  should_download_clang = is_cuda_clang and _flag_enabled(
+      repository_ctx,
+      _TF_DOWNLOAD_CLANG,
+  )
+  if should_download_clang:
+    download_clang(repository_ctx, "crosstool/extra_tools")
+
+  # Set up crosstool/
+  cc = find_cc(repository_ctx)
+  cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
+
+  host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
+  cuda_defines = {}
+  # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
+  # https://github.com/bazelbuild/bazel/issues/760).
+  # However, this stops our custom clang toolchain from picking the provided
+  # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
+  # toolchain.
+  # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
+  #       flag from the CROSSTOOL completely (see
+  #       https://github.com/bazelbuild/bazel/issues/5634)
+  if should_download_clang:
+    cuda_defines["%{linker_bin_path_flag}"] = ""
+  else:
+    cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+
+  if is_cuda_clang:
+    cuda_defines["%{host_compiler_path}"] = str(cc)
+    cuda_defines["%{host_compiler_warnings}"] = """
         # Some parts of the codebase set -Werror and hit this warning, so
         # switch it off for now.
         flag: "-Wno-invalid-partial-specialization"
     """
-        cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
-        _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
-        repository_ctx.file("crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
-        repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
-        repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "")
-    else:
-        cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
-        cuda_defines["%{host_compiler_warnings}"] = ""
-
-        # nvcc has the system include paths built in and will automatically
-        # search them; we cannot work around that, so we add the relevant cuda
-        # system paths to the allowed compiler specific include paths.
-        cuda_defines["%{host_compiler_includes}"] = (
-            host_compiler_includes + "\n" +
-            _cuda_include_path(repository_ctx, cuda_config) +
-            "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
-            "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
-        nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" %
-                                            (
-                                                cuda_config.cuda_toolkit_path,
-                                                ".exe" if _is_windows(repository_ctx) else "",
-                                            )))
-        _tpl(
-            repository_ctx,
-            "crosstool:BUILD",
-            {
-                "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
-                "%{win_linker_files}": ":windows_msvc_wrapper_files",
-            },
-        )
-        wrapper_defines = {
-            "%{cpu_compiler}": str(cc),
-            "%{cuda_version}": cuda_config.cuda_version,
-            "%{nvcc_path}": nvcc_path,
-            "%{gcc_host_compiler_path}": str(cc),
-            "%{cuda_compute_capabilities}": ", ".join(
-                ["\"%s\"" % c for c in cuda_config.compute_capabilities],
-            ),
-            "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx),
-        }
-        _tpl(
-            repository_ctx,
-            "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
-            wrapper_defines,
-        )
-        _tpl(
-            repository_ctx,
-            "crosstool:windows/msvc_wrapper_for_nvcc.py",
-            wrapper_defines,
-        )
-        _tpl(
-            repository_ctx,
-            "crosstool:windows/msvc_wrapper_for_nvcc.bat",
-            {
-                "%{python_binary}": _get_python_bin(repository_ctx),
-            },
-        )
-
+    cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
+    _tpl(repository_ctx, "crosstool:BUILD", {
+        "%{linker_files}": ":empty",
+        "%{win_linker_files}": ":empty"
+    })
+    repository_ctx.file(
+        "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
+    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
+    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "")
+  else:
+    cuda_defines[
+        "%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
+    cuda_defines["%{host_compiler_warnings}"] = ""
+
+    # nvcc has the system include paths built in and will automatically
+    # search them; we cannot work around that, so we add the relevant cuda
+    # system paths to the allowed compiler specific include paths.
+    cuda_defines["%{host_compiler_includes}"] = (
+        host_compiler_includes + "\n" + _cuda_include_path(
+            repository_ctx, cuda_config) +
+        "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
+        "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
+    nvcc_path = str(
+        repository_ctx.path("%s/bin/nvcc%s" % (
+            cuda_config.cuda_toolkit_path,
+            ".exe" if _is_windows(repository_ctx) else "",
+        )))
     _tpl(
         repository_ctx,
-        "crosstool:CROSSTOOL",
-        cuda_defines + _get_win_cuda_defines(repository_ctx),
-        out = "crosstool/CROSSTOOL",
+        "crosstool:BUILD",
+        {
+            "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
+            "%{win_linker_files}": ":windows_msvc_wrapper_files",
+        },
     )
-
-    # Set up cuda_config.h, which is used by
-    # tensorflow/stream_executor/dso_loader.cc.
+    wrapper_defines = {
+        "%{cpu_compiler}":
+            str(cc),
+        "%{cuda_version}":
+            cuda_config.cuda_version,
+        "%{nvcc_path}":
+            nvcc_path,
+        "%{gcc_host_compiler_path}":
+            str(cc),
+        "%{cuda_compute_capabilities}":
+            ", ".join(
+                ["\"%s\"" % c for c in cuda_config.compute_capabilities],),
+        "%{nvcc_tmp_dir}":
+            _get_nvcc_tmp_dir_for_windows(repository_ctx),
+    }
     _tpl(
         repository_ctx,
-        "cuda:cuda_config.h",
-        {
-            "%{cuda_version}": cuda_config.cuda_version,
-            "%{cudnn_version}": cuda_config.cudnn_version,
-            "%{cuda_compute_capabilities}": ",".join(
-                [
-                    "CudaVersion(\"%s\")" % c
-                    for c in cuda_config.compute_capabilities
-                ],
-            ),
-            "%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path,
-        },
-        "cuda/cuda/cuda_config.h",
+        "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
+        wrapper_defines,
     )
-
-def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
-    """Creates pointers to a remotely configured repo set up to build with CUDA."""
     _tpl(
         repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "True",
-            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
-                repository_ctx,
-                _compute_capabilities(repository_ctx),
-            ),
-        },
+        "crosstool:windows/msvc_wrapper_for_nvcc.py",
+        wrapper_defines,
     )
     _tpl(
         repository_ctx,
-        "cuda:remote.BUILD",
+        "crosstool:windows/msvc_wrapper_for_nvcc.bat",
         {
-            "%{remote_cuda_repo}": remote_config_repo,
+            "%{python_binary}": _get_python_bin(repository_ctx),
         },
-        "cuda/BUILD",
     )
-    _tpl(repository_ctx, "crosstool:remote.BUILD", {
-        "%{remote_cuda_repo}": remote_config_repo,
-    }, "crosstool/BUILD")
+
+  _tpl(
+      repository_ctx,
+      "crosstool:CROSSTOOL",
+      cuda_defines + _get_win_cuda_defines(repository_ctx),
+      out="crosstool/CROSSTOOL",
+  )
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(
+      repository_ctx,
+      "cuda:cuda_config.h",
+      {
+          "%{cuda_version}":
+              cuda_config.cuda_version,
+          "%{cudnn_version}":
+              cuda_config.cudnn_version,
+          "%{cuda_compute_capabilities}":
+              ",".join([
+                  "CudaVersion(\"%s\")" % c
+                  for c in cuda_config.compute_capabilities
+              ],),
+          "%{cuda_toolkit_path}":
+              cuda_config.cuda_toolkit_path,
+      },
+      "cuda/cuda/cuda_config.h",
+  )
+
+
+def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
+  """Creates pointers to a remotely configured repo set up to build with CUDA."""
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}":
+              "True",
+          "%{cuda_extra_copts}":
+              _compute_cuda_extra_copts(
+                  repository_ctx,
+                  compute_capabilities(repository_ctx),
+              ),
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:remote.BUILD",
+      {
+          "%{remote_cuda_repo}": remote_config_repo,
+      },
+      "cuda/BUILD",
+  )
+  _tpl(repository_ctx, "crosstool:remote.BUILD", {
+      "%{remote_cuda_repo}": remote_config_repo,
+  }, "crosstool/BUILD")
+
 
 def _cuda_autoconf_impl(repository_ctx):
-    """Implementation of the cuda_autoconf repository rule."""
-    if not _enable_cuda(repository_ctx):
-        _create_dummy_repository(repository_ctx)
-    elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
-        _create_remote_cuda_repository(
-            repository_ctx,
-            repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
-        )
-    else:
-        _create_local_cuda_repository(repository_ctx)
+  """Implementation of the cuda_autoconf repository rule."""
+  if not _enable_cuda(repository_ctx):
+    _create_dummy_repository(repository_ctx)
+  elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
+    _create_remote_cuda_repository(
+        repository_ctx,
+        repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
+    )
+  else:
+    _create_local_cuda_repository(repository_ctx)
+
 
 cuda_configure = repository_rule(
     implementation = _cuda_autoconf_impl,
diff --git a/third_party/nccl/LICENSE b/third_party/nccl/LICENSE
index 146d9b765c..b958518186 100644
--- a/third_party/nccl/LICENSE
+++ b/third_party/nccl/LICENSE
@@ -1,203 +1,30 @@
-Copyright 2018 The TensorFlow Authors.  All rights reserved.
 
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2018, The TensorFlow Authors.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+ Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+  * Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
+    Laboratory, the U.S. Department of Energy, nor the names of their
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ The U.S. Department of Energy funded the development of this software
+ under subcontract 7078610 with Lawrence Berkeley National Laboratory.
diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD
new file mode 100644
index 0000000000..f57f04c75e
--- /dev/null
+++ b/third_party/nccl/archive.BUILD
@@ -0,0 +1,179 @@
+# NVIDIA NCCL 2
+# A package of optimized primitives for collective multi-GPU communication.
+
+licenses(["restricted"])
+
+exports_files(["LICENSE.txt"])
+
+load(
+    "@local_config_nccl//:build_defs.bzl",
+    "device_link",
+    "gen_nccl_h",
+    "nccl_library",
+    "rdc_copts",
+)
+load(
+    "@local_config_cuda//cuda:build_defs.bzl",
+    "cuda_default_copts",
+)
+
+# Generate the nccl.h header file.
+gen_nccl_h(
+    name = "nccl_h",
+    output = "src/nccl.h",
+    template = "src/nccl.h.in",
+)
+
+nccl_library(
+    name = "src_hdrs",
+    hdrs = [
+        "src/nccl.h",
+        # src/include/common_coll.h #includes "collectives/collectives.h".
+        # All other #includes of collectives.h are patched in process_srcs.
+        "src/collectives/collectives.h",
+    ],
+    strip_include_prefix = "src",
+)
+
+nccl_library(
+    name = "include_hdrs",
+    hdrs = glob(["src/include/*.h"]),
+    strip_include_prefix = "src/include",
+)
+
+filegroup(
+    name = "device_hdrs",
+    srcs = glob(["src/collectives/device/*.h"]),
+)
+
+filegroup(
+    name = "device_srcs",
+    srcs = [
+        "src/collectives/device/all_gather.cu",
+        "src/collectives/device/all_reduce.cu",
+        "src/collectives/device/broadcast.cu",
+        "src/collectives/device/reduce.cu",
+        "src/collectives/device/reduce_scatter.cu",
+    ],
+)
+
+nccl_library(
+    name = "sum",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=0"] + rdc_copts(),
+    prefix = "sum_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "prod",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=1"] + rdc_copts(),
+    prefix = "_prod",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "min",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=2"] + rdc_copts(),
+    prefix = "min_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "max",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=3"] + rdc_copts(),
+    prefix = "max_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "functions",
+    srcs = [
+        ":device_hdrs",
+        "src/collectives/device/functions.cu",
+    ],
+    copts = rdc_copts(),
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+device_link(
+    name = "device_code",
+    srcs = [
+        ":functions",
+        ":max",
+        ":min",
+        ":prod",
+        ":sum",
+    ],
+)
+
+# Primary NCCL target.
+nccl_library(
+    name = "nccl",
+    srcs = glob(
+        include = ["src/**/*.cu"],
+        # Exclude device-library code.
+        exclude = ["src/collectives/device/**"],
+    ) + [
+        # Required for header inclusion checking (see
+        # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs).
+        # Files in src/ which #include "nccl.h" load it from there rather than
+        # from the virtual includes directory.
+        "src/nccl.h",
+    ],
+    hdrs = ["src/nccl.h"],
+    include_prefix = "third_party/nccl",
+    strip_include_prefix = "src",
+    copts = cuda_default_copts(),
+    deps = [
+        ":device_code",
+        ":functions",
+        ":include_hdrs",
+        ":max",
+        ":min",
+        ":prod",
+        ":src_hdrs",
+        ":sum",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
new file mode 100644
index 0000000000..ede1d3dad5
--- /dev/null
+++ b/third_party/nccl/build_defs.bzl.tpl
@@ -0,0 +1,210 @@
+"""Repository rule for NCCL."""
+
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts")
+
+def _gen_nccl_h_impl(ctx):
+    """Creates nccl.h from a template."""
+    ctx.actions.expand_template(
+        output = ctx.outputs.output,
+        template = ctx.file.template,
+        substitutions = {
+            "${nccl:Major}": "2",
+            "${nccl:Minor}": "3",
+            "${nccl:Patch}": "5",
+            "${nccl:Suffix}": "",
+            "${nccl:Version}": "2305",
+        },
+    )
+gen_nccl_h = rule(
+    implementation = _gen_nccl_h_impl,
+    attrs = {
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
+    },
+)
+"""Creates the NCCL header file."""
+
+
+def _process_srcs_impl(ctx):
+    """Appends .cc to .cu files, patches include directives."""
+    files = []
+    for src in ctx.files.srcs:
+        if not src.is_source:
+          # Process only once, specifically "src/nccl.h".
+          files.append(src)
+          continue
+        name = src.basename
+        if src.extension == "cu":
+            name = ctx.attr.prefix + name + ".cc"
+        file = ctx.actions.declare_file(name, sibling = src)
+        ctx.actions.expand_template(
+            output = file,
+            template = src,
+            substitutions = {
+                "\"collectives.h": "\"collectives/collectives.h",
+                "\"../collectives.h": "\"collectives/collectives.h",
+                "#if __CUDACC_VER_MAJOR__":
+                    "#if defined __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__",
+                # Substitutions are applied in order.
+                "std::nullptr_t": "nullptr_t",
+                "nullptr_t": "std::nullptr_t",
+            },
+        )
+        files.append(file)
+    return [DefaultInfo(files = depset(files))]
+_process_srcs = rule(
+    implementation = _process_srcs_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "prefix": attr.string(default = ""),
+    },
+)
+"""Processes the NCCL srcs so they can be compiled with bazel and clang."""
+
+
+def nccl_library(name, srcs=None, hdrs=None, prefix=None, **kwargs):
+    """Processes the srcs and hdrs and creates a cc_library."""
+
+    _process_srcs(
+        name = name + "_srcs",
+        srcs = srcs,
+        prefix = prefix,
+    )
+    _process_srcs(
+        name = name + "_hdrs",
+        srcs = hdrs,
+    )
+
+    native.cc_library(
+        name = name,
+        srcs = [name + "_srcs"] if srcs else [],
+        hdrs = [name + "_hdrs"] if hdrs else [],
+        **kwargs
+    )
+
+
+def rdc_copts():
+    """Returns copts for compiling relocatable device code."""
+
+    # The global functions can not have a lower register count than the
+    # device functions. This is enforced by setting a fixed register count.
+    # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
+    maxrregcount = "-maxrregcount=96"
+
+    return cuda_default_copts() + select({
+          "@local_config_cuda//cuda:using_nvcc": [
+              "-nvcc_options",
+              "relocatable-device-code=true",
+              "-nvcc_options",
+              "ptxas-options=" + maxrregcount,
+          ],
+          "@local_config_cuda//cuda:using_clang": [
+              "-fcuda-rdc",
+              "-Xcuda-ptxas",
+              maxrregcount,
+          ],
+          "//conditions:default": [],
+      }) + ["-fvisibility=hidden"]
+
+
+def _filter_impl(ctx):
+    suffix = ctx.attr.suffix
+    files = [src for src in ctx.files.srcs if src.path.endswith(suffix)]
+    return [DefaultInfo(files = depset(files))]
+_filter = rule(
+    implementation = _filter_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "suffix": attr.string(),
+    },
+)
+"""Filters the srcs to the ones ending with suffix."""
+
+
+def _gen_link_src_impl(ctx):
+    ctx.actions.expand_template(
+        output = ctx.outputs.output,
+        template = ctx.file.template,
+        substitutions = {
+            "REGISTERLINKBINARYFILE": '"%s"' % ctx.file.register_hdr.short_path,
+            "FATBINFILE": '"%s"' % ctx.file.fatbin_hdr.short_path,
+        },
+    )
+_gen_link_src = rule(
+    implementation = _gen_link_src_impl,
+    attrs = {
+        "register_hdr": attr.label(allow_single_file = True),
+        "fatbin_hdr": attr.label(allow_single_file = True),
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
+    },
+)
+"""Patches the include directives for the link.stub file."""
+
+
+def device_link(name, srcs):
+    """Links seperately compiled relocatable device code into a cc_library."""
+
+    # From .a and .pic.a archives, just use the latter.
+    _filter(
+        name = name + "_pic_a",
+        srcs = srcs,
+        suffix = ".pic.a",
+    )
+
+    # Device-link to cubins for each architecture.
+    images = []
+    cubins = []
+    for arch in %{gpu_architectures}:
+        cubin = "%s_%s.cubin" % (name, arch)
+        register_hdr = "%s_%s.h" % (name, arch)
+        nvlink = "@local_config_nccl//:nvlink"
+        cmd = ("$(location %s) --cpu-arch=X86_64 " % nvlink +
+            "--arch=%s $(SRCS) " % arch +
+            "--register-link-binaries=$(location %s) " % register_hdr +
+            "--output-file=$(location %s)" % cubin)
+        native.genrule(
+            name = "%s_%s" % (name, arch),
+            outs = [register_hdr, cubin],
+            srcs = [name + "_pic_a"],
+            cmd = cmd,
+            tools = [nvlink],
+        )
+        images.append("--image=profile=%s,file=$(location %s)" % (arch, cubin))
+        cubins.append(cubin)
+
+    # Generate fatbin header from all cubins.
+    fatbin_hdr = name + ".fatbin.h"
+    fatbinary = "@local_config_nccl//:cuda/bin/fatbinary"
+    cmd = ("PATH=$$CUDA_TOOLKIT_PATH/bin:$$PATH " + # for bin2c
+          "$(location %s) -64 --cmdline=--compile-only --link " % fatbinary +
+          "--compress-all %s --create=%%{name}.fatbin " % " ".join(images) +
+          "--embedded-fatbin=$@")
+    native.genrule(
+        name = name + "_fatbin_h",
+        outs = [fatbin_hdr],
+        srcs = cubins,
+        cmd = cmd,
+        tools = [fatbinary],
+    )
+
+    # Generate the source file #including the headers generated above.
+    _gen_link_src(
+        name = name + "_cc",
+        # Include just the last one, they are equivalent.
+        register_hdr = register_hdr,
+        fatbin_hdr = fatbin_hdr,
+        template = "@local_config_nccl//:cuda/bin/crt/link.stub",
+        output = name + ".cc",
+    )
+
+    # Compile the source file into the cc_library.
+    native.cc_library(
+        name = name,
+        srcs = [name + "_cc"],
+        textual_hdrs = [register_hdr, fatbin_hdr],
+        deps = [
+            "@local_config_cuda//cuda:cuda_headers",
+            "@local_config_cuda//cuda:cudart_static",
+        ],
+    )
diff --git a/third_party/nccl/nccl_archive.BUILD b/third_party/nccl/nccl_archive.BUILD
deleted file mode 100644
index a05899e38d..0000000000
--- a/third_party/nccl/nccl_archive.BUILD
+++ /dev/null
@@ -1,68 +0,0 @@
-# NVIDIA nccl
-# A package of optimized primitives for collective multi-GPU communication.
-
-licenses(["notice"])  # BSD
-
-exports_files(["LICENSE.txt"])
-
-load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda")
-
-SRCS = [
-    "src/all_gather.cu",
-    "src/all_reduce.cu",
-    "src/broadcast.cu",
-    "src/core.cu",
-    "src/libwrap.cu",
-    "src/reduce.cu",
-    "src/reduce_scatter.cu",
-]
-
-# Copy .cu to .cu.cc so they can be in srcs of cc_library.
-[
-    genrule(
-        name = "gen_" + src,
-        srcs = [src],
-        outs = [src + ".cc"],
-        cmd = "cp $(location " + src + ") $(location " + src + ".cc)",
-    )
-    for src in SRCS
-]
-
-SRCS_CU_CC = [src + ".cc" for src in SRCS]
-
-cc_library(
-    name = "nccl",
-    srcs = if_cuda(SRCS_CU_CC + glob(["src/*.h"])),
-    hdrs = if_cuda(["src/nccl.h"]),
-    copts = [
-        "-DCUDA_MAJOR=0",
-        "-DCUDA_MINOR=0",
-        "-DNCCL_MAJOR=0",
-        "-DNCCL_MINOR=0",
-        "-DNCCL_PATCH=0",
-        "-Iexternal/nccl_archive/src",
-        "-O3",
-    ] + cuda_default_copts(),
-    include_prefix = "third_party/nccl",
-    linkopts = select({
-        "@org_tensorflow//tensorflow:android": [
-            "-pie",
-        ],
-        "@org_tensorflow//tensorflow:darwin": [
-            "-Wl,-framework",
-            "-Wl,CoreFoundation",
-            "-Wl,-framework",
-            "-Wl,Security",
-        ],
-        "@org_tensorflow//tensorflow:ios": [],
-        "@org_tensorflow//tensorflow:windows": [
-            "-DEFAULTLIB:ws2_32.lib",
-        ],
-        "//conditions:default": [
-            "-lrt",
-        ],
-    }),
-    strip_include_prefix = "src",
-    visibility = ["//visibility:public"],
-    deps = ["@local_config_cuda//cuda:cuda_headers"],
-)
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index d78fe8f3aa..7f00df0962 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -11,12 +11,16 @@
 load(
     "//third_party/gpus:cuda_configure.bzl",
     "auto_configure_fail",
+    "compute_capabilities",
+    "cuda_toolkit_path",
     "find_cuda_define",
     "matches_version",
 )
 
-_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
 _NCCL_HDR_PATH = "NCCL_HDR_PATH"
+_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
 _TF_NCCL_VERSION = "TF_NCCL_VERSION"
 _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
 
@@ -37,6 +41,12 @@ cc_library(
 """
 
 _NCCL_ARCHIVE_BUILD_CONTENT = """
+exports_files([
+    "cuda/bin/crt/link.stub",
+    "cuda/bin/fatbinary",
+    "nvlink",
+])
+
 filegroup(
   name = "LICENSE",
   data = ["@nccl_archive//:LICENSE.txt"],
@@ -50,113 +60,125 @@ alias(
 )
 """
 
-# Local build results in dynamic link and the license should not be included.
-_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl")
-_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl")
+def _label(file):
+    return Label("//third_party/nccl:{}".format(file))
 
 def _find_nccl_header(repository_ctx, nccl_install_path):
-  """Finds the NCCL header on the system.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library install directory.
+    """Finds the NCCL header on the system.
 
-  Returns:
-    The path to the NCCL header.
-  """
-  header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
-  if not header_path.exists:
-    auto_configure_fail("Cannot find %s" % str(header_path))
-  return header_path
+    Args:
+      repository_ctx: The repository context.
+      nccl_install_path: The NCCL library install directory.
 
+    Returns:
+      The path to the NCCL header.
+    """
+    header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
+    if not header_path.exists:
+        auto_configure_fail("Cannot find %s" % str(header_path))
+    return header_path
 
 def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
-  """Checks whether the header file matches the specified version of NCCL.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library install directory.
-    nccl_version: The expected NCCL version.
-
-  Returns:
-    A string containing the library version of NCCL.
-  """
-  header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
-  if not header_path.exists:
-    header_path = _find_nccl_header(repository_ctx, nccl_install_path)
-  header_dir = str(header_path.realpath.dirname)
-  major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_MAJOR)
-  minor_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_MINOR)
-  patch_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_PATCH)
-  header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-  if not matches_version(nccl_version, header_version):
-    auto_configure_fail(
-        ("NCCL library version detected from %s/nccl.h (%s) does not match " +
-         "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
-        (header_dir, header_version, nccl_version))
-
-
-def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
-  """Finds the given NCCL library on the system.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library installation directory.
-    nccl_version: The version of NCCL library files as returned
-      by _nccl_version.
-
-  Returns:
-    The path to the NCCL library.
-  """
-  lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
-                                                           nccl_version))
-  if not lib_path.exists:
-    auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
-  return lib_path
-
+    """Checks whether the header file matches the specified version of NCCL.
+
+    Args:
+      repository_ctx: The repository context.
+      nccl_install_path: The NCCL library install directory.
+      nccl_hdr_path: The NCCL header path.
+      nccl_version: The expected NCCL version.
+
+    Returns:
+      A string containing the library version of NCCL.
+    """
+    header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
+    if not header_path.exists:
+        header_path = _find_nccl_header(repository_ctx, nccl_install_path)
+    header_dir = str(header_path.realpath.dirname)
+    major_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_MAJOR,
+    )
+    minor_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_MINOR,
+    )
+    patch_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_PATCH,
+    )
+    header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+    if not matches_version(nccl_version, header_version):
+        auto_configure_fail(
+            ("NCCL library version detected from %s/nccl.h (%s) does not match " +
+             "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
+            (header_dir, header_version, nccl_version),
+        )
 
 def _nccl_configure_impl(repository_ctx):
-  """Implementation of the nccl_configure repository rule."""
-  if _TF_NCCL_VERSION not in repository_ctx.os.environ:
-    # Add a dummy build file to make bazel query happy.
-    repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
-    return
-
-  if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
-    # Forward to the pre-configured remote repository.
-    repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, {
-        "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
-    })
-    return
-
-  nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
-  if matches_version("1", nccl_version):
-    # Alias to GitHub target from @nccl_archive.
-    if not matches_version(nccl_version, "1.3"):
-      auto_configure_fail(
-          "NCCL from GitHub must use version 1.3 (got %s)" % nccl_version)
-    repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
-  else:
-    # Create target for locally installed NCCL.
-    nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
-    nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
-    _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
-    repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
-        "%{version}": nccl_version,
-        "%{install_path}": nccl_install_path,
-        "%{hdr_path}": nccl_hdr_path,
-    })
-
+    """Implementation of the nccl_configure repository rule."""
+    if _TF_NCCL_VERSION not in repository_ctx.os.environ:
+        # Add a dummy build file to make bazel query happy.
+        repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
+        return
+
+    if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
+        # Forward to the pre-configured remote repository.
+        repository_ctx.template("BUILD", _label("remote.BUILD.tpl"), {
+            "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
+        })
+        return
+
+    nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
+    if nccl_version == "":
+        # Alias to open source build from @nccl_archive.
+        repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
+
+        # TODO(csigg): implement and reuse in cuda_configure.bzl.
+        gpu_architectures = [
+            "sm_" + capability.replace(".", "")
+            for capability in compute_capabilities(repository_ctx)
+        ]
+
+        # Round-about way to make the list unique.
+        gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
+        repository_ctx.template("build_defs.bzl", _label("build_defs.bzl.tpl"), {
+            "%{gpu_architectures}": str(gpu_architectures),
+        })
+
+        repository_ctx.symlink(cuda_toolkit_path(repository_ctx), "cuda")
+
+        # Temporary work-around for setups which symlink ptxas to a newer
+        # version. The versions of nvlink and ptxas need to agree, so we find
+        # nvlink next to the real location of ptxas. This is only temporary and
+        # will be removed again soon.
+        nvlink_dir = repository_ctx.path("cuda/bin/ptxas").realpath.dirname
+        repository_ctx.symlink(nvlink_dir.get_child("nvlink"), "nvlink")
+    else:
+        # Create target for locally installed NCCL.
+        nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
+        nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
+        _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
+        repository_ctx.template("BUILD", _label("system.BUILD.tpl"), {
+            "%{version}": nccl_version,
+            "%{install_path}": nccl_install_path,
+            "%{hdr_path}": nccl_hdr_path,
+        })
 
 nccl_configure = repository_rule(
-    implementation=_nccl_configure_impl,
-    environ=[
-        _NCCL_INSTALL_PATH,
+    implementation = _nccl_configure_impl,
+    environ = [
+        _CUDA_TOOLKIT_PATH,
         _NCCL_HDR_PATH,
+        _NCCL_INSTALL_PATH,
         _TF_NCCL_VERSION,
+        _TF_CUDA_COMPUTE_CAPABILITIES,
+        _TF_NCCL_CONFIG_REPO,
     ],
 )
 """Detects and configures the NCCL configuration.
-- 
GitLab


From d258207f1583df4faa452265b051879af6c15dac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 08:55:53 -0700
Subject: [PATCH 1182/1357] BEGIN_PUBLIC Automated rollback of PR #21945
 END_PUBLIC Automated rollback of commit
 863f61412fcc654840c6b67473b742ea4e5e964e. Revert #21945.

PiperOrigin-RevId: 215913175
---
 tensorflow/python/ops/array_ops.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index e3e4d5f910..4be9c532f4 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1407,13 +1407,8 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
         gen_array_ops.conjugate_transpose
         if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
     if perm is None:
-      a = ops.convert_to_tensor(a, name="a")
-      if not a.get_shape().ndims:
-        rank = gen_array_ops.rank(a)
-        perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
-      else:
-        rank = a.get_shape().ndims
-        perm = (rank - 1) - np.arange(rank)
+      rank = gen_array_ops.rank(a)
+      perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
       ret = transpose_fn(a, perm, name=name)
       # NOTE(mrry): Setting the shape explicitly because
       #   reverse is not handled by the shape function.
-- 
GitLab


From 5a43e01ef0f8cb86d836a4d1c08a246630e26f8c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 09:29:00 -0700
Subject: [PATCH 1183/1357] Update XlaSort to match the underlying HLO.

PiperOrigin-RevId: 215917470
---
 tensorflow/compiler/tests/sort_ops_test.py    | 18 ++++++++++++++-
 .../compiler/tf2xla/kernels/sort_ops.cc       | 17 +++++++++++++-
 tensorflow/compiler/tf2xla/ops/xla_ops.cc     | 23 ++++++++++++++++++-
 tensorflow/compiler/tf2xla/python/xla.py      | 12 ++++++----
 .../compiler/xla/service/hlo_verifier.cc      |  2 +-
 5 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py
index dbf4beb693..57f0ab7a9e 100644
--- a/tensorflow/compiler/tests/sort_ops_test.py
+++ b/tensorflow/compiler/tests/sort_ops_test.py
@@ -48,13 +48,29 @@ class XlaSortOpTest(xla_test.XLATestCase):
         self.assertAllClose(v, result, rtol=1e-3)
 
   def testSort(self):
-    supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32])
+    supported_types = set(
+        [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
     for dtype in supported_types.intersection(self.numeric_types):
       x = np.arange(101, dtype=dtype)
       np.random.shuffle(x)
       self._assertOpOutputMatchesExpected(
           xla.sort, [x], expected=[np.arange(101, dtype=dtype)])
 
+  def testKeyValueSort(self):
+    supported_types = set(
+        [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
+    for key_type in supported_types.intersection(self.numeric_types):
+      for value_type in supported_types.intersection(self.numeric_types):
+        x = np.arange(101, dtype=key_type)
+        np.random.shuffle(x)
+        y = (-x).astype(value_type)
+        self._assertOpOutputMatchesExpected(
+            xla.key_value_sort, [x, y],
+            expected=[
+                np.arange(101, dtype=key_type),
+                -np.arange(101, dtype=value_type)
+            ])
+
   def testTopK(self):
     supported_types = set(
         [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
diff --git a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
index aaeeae01cc..45f03d8c21 100644
--- a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
@@ -25,11 +25,26 @@ class XlaSortOp : public XlaOpKernel {
   explicit XlaSortOp(OpKernelConstruction* context) : XlaOpKernel(context) {}
 
   void Compile(XlaOpKernelContext* context) override {
-    context->SetOutput(0, xla::Sort(context->Input(0)));
+    context->SetOutput(0, xla::Sort(context->Input("input")));
   }
 };
 
 REGISTER_XLA_OP(Name("XlaSort"), XlaSortOp);
 
+class XlaKeyValueSortOp : public XlaOpKernel {
+ public:
+  explicit XlaKeyValueSortOp(OpKernelConstruction* context)
+      : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    xla::XlaOp result =
+        xla::Sort(context->Input("keys"), context->Input("values"));
+    context->SetOutput(0, xla::GetTupleElement(result, 0));
+    context->SetOutput(1, xla::GetTupleElement(result, 1));
+  }
+};
+
+REGISTER_XLA_OP(Name("XlaKeyValueSort"), XlaKeyValueSortOp);
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index 733eeed3c6..557911553d 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -354,12 +354,33 @@ Wraps the XLA Sort operator, documented at
  https://www.tensorflow.org/performance/xla/operation_semantics#sort
 .
 
-Sorts a tensor. Currently only rank 1 sorts in ascending order are supported.
+Sorts a tensor. Currently only sorts in ascending order are supported.
 
 input: A `Tensor` of type T.
 output: A `Tensor` of type T.
 )doc");
 
+REGISTER_OP("XlaKeyValueSort")
+    .Input("keys: K")
+    .Input("values: V")
+    .Output("sorted_keys: K")
+    .Output("sorted_values: V")
+    .Attr("K: realnumbertype")
+    .Attr("V: type")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Wraps the XLA Sort operator, documented at
+ https://www.tensorflow.org/performance/xla/operation_semantics#sort
+.
+
+Sorts a tensor. Currently only sorts in ascending order are supported.
+
+keys: A `Tensor` of type K.
+values: A `Tensor` of type V.
+sorted_keys: A `Tensor` of type K.
+sorted_values: A `Tensor` of type V.
+)doc");
+
 // TODO(b/37549631) setting the While Op to always be stateful is too
 // conservative.
 REGISTER_OP("XlaWhile")
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index 27dd18a9bb..bc7924c371 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -212,9 +212,9 @@ bitcast_convert_type = array_ops.bitcast
 
 def broadcast(x, dims, name=None):
   x = ops.convert_to_tensor(x)
-  shape = array_ops.concat(
-      [constant_op.constant(dims),
-       array_ops.shape(x)], axis=0)
+  shape = array_ops.concat([constant_op.constant(dims),
+                            array_ops.shape(x)],
+                           axis=0)
   return array_ops.broadcast_to(x, shape, name=name)
 
 
@@ -332,12 +332,13 @@ def reduce_window(operand,
     init: a scalar tensor representing the initial value for the reduction
     reducer: a reduction function that combines a pair of scalars.
     window_dimensions: shape of the window, as a list of integers
-    window_strides: inter-window strides, as a list of integers. Optional;
-      if omitted, defaults to strides of 1.
+    window_strides: inter-window strides, as a list of integers. Optional; if
+      omitted, defaults to strides of 1.
     padding: padding to apply to 'operand'. List of (low, high) pairs of
       integers that specify the padding to apply before and after each
       dimension. Optional; if omitted, defaults to no padding.
     name: the operator name, or None.
+
   Returns:
     A tensor that represents the output of the reduce_window operator.
   """
@@ -377,4 +378,5 @@ def slice(x, start_dims, limit_dims, strides):
 
 
 sort = gen_xla_ops.xla_sort
+key_value_sort = gen_xla_ops.xla_key_value_sort
 while_loop = gen_xla_ops.xla_while
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index b5498bb936..c22ee03388 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -548,6 +548,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) {
     case HloOpcode::kTupleSelect:
     case HloOpcode::kSend:
     case HloOpcode::kSendDone:
+    case HloOpcode::kSort:
     case HloOpcode::kTuple:
     case HloOpcode::kWhile:
       break;
@@ -1153,7 +1154,6 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
-
   for (auto* computation : module->computations()) {
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
-- 
GitLab


From 8b7c789e7401fe56b4f648a04f675a3cb69119e5 Mon Sep 17 00:00:00 2001
From: Jing Li <jingli@google.com>
Date: Fri, 5 Oct 2018 09:54:40 -0700
Subject: [PATCH 1184/1357] - Don't set tpu optimizer parameter variable during
 weight initialization if the optimizer isn't set, e.g. loading weights and
 then predict. - Add load_weights for `KerasTpuModel`.

PiperOrigin-RevId: 215920993
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index a3a7fd8bb0..af183b3232 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -1998,6 +1998,9 @@ class KerasTPUModel(models.Model):
 
     logging.info('Setting weights on TPU model.')
     cloned_model.set_weights(weights)
+    if self._tpu_model.optimizer is None:
+      # tpu_model may not be compiled, e.g., loading weights and then predict.
+      return
     for k, v in six.iteritems(cpu_optimizer_config):
       opt_var = getattr(self._tpu_model.optimizer, k)
       if isinstance(opt_var, variables.Variable):
@@ -2052,6 +2055,10 @@ class KerasTPUModel(models.Model):
     self._cpu_model.set_weights(weights)
     self._tpu_weights_initialized = False
 
+  def load_weights(self, filepath, by_name=False):
+    self._cpu_model.load_weights(filepath, by_name)
+    self._tpu_weights_initialized = False
+
 
 # pylint: disable=bad-continuation
 def _validate_shapes(model):
-- 
GitLab


From d493a7f2fdbbc29a292741135f4c1598352e876b Mon Sep 17 00:00:00 2001
From: Mingsheng Hong <hongm@google.com>
Date: Fri, 5 Oct 2018 10:31:23 -0700
Subject: [PATCH 1185/1357] When running a native/builtin op via eager C API,
 automatically fill in default attr values that are not overridden e.g.
 transpose_a in the matmul op).

This is required for backward compatibility (a binary built via an older version
of TF should still run on a newer version of TF, where some ops may have added
attrs).

For non-eager graph building, the default attr values of graph ops are added by
tensorflow::AddDefaultsToNodeDef().

We ran into this issue when running the same S4TF test cases via eager APIs --
some tests failed due to "missing attrs", but are fixed by this patch.

PiperOrigin-RevId: 215927271
---
 tensorflow/c/eager/c_api_test_util.cc            |  2 --
 .../core/common_runtime/eager/attr_builder.cc    | 16 ++++++++++++++++
 .../core/common_runtime/eager/attr_builder.h     |  6 ++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index 5607c9dcb0..008f088c2d 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -99,8 +99,6 @@ TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) {
   TFE_OpAddInput(op, b, status);
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TF_DeleteStatus(status);
-  TFE_OpSetAttrBool(op, "transpose_a", 0);
-  TFE_OpSetAttrBool(op, "transpose_b", 0);
   TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(a));
 
   return op;
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.cc b/tensorflow/core/common_runtime/eager/attr_builder.cc
index cf1cd4134e..5c8369de87 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.cc
+++ b/tensorflow/core/common_runtime/eager/attr_builder.cc
@@ -136,6 +136,22 @@ void AttrBuilder::FillAttrValueMap(AttrValueMap* m,
       m->insert(*it);
     }
   }
+  // For any attr-value pairs that exist in the op def (from op registry) but
+  // not `m`, fill them into `m`, so that we can run a TFE_Op without having to
+  // specify all the default attr values (e.g. for matmul, the `transpose_a`
+  // attr defaults to false).
+  const OpDef* op_def = nullptr;
+  Status s = OpDefForOp(op_name_.c_str(), &op_def);
+  // This is expected, if this op is a custom function, and is therefore not
+  // present in the op registry.
+  if (!s.ok()) return;
+
+  DCHECK(op_def);
+  for (const auto& attr_def : op_def->attr()) {
+    if (attr_def.has_default_value() && !m->count(attr_def.name())) {
+      SetInAttrValueMap(m, attr_def.name(), attr_def.default_value());
+    }
+  }
 }
 
 const NodeDef& AttrBuilder::BuildNodeDef() {
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h
index cbe6a1cb50..c114ea4ba0 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.h
+++ b/tensorflow/core/common_runtime/eager/attr_builder.h
@@ -110,6 +110,12 @@ class AttrBuilder {
   using AttrVec = tensorflow::gtl::InlinedVector<std::pair<StringPiece, T>, 2>;
 
   void MayBeInitializeNodeDef();
+  // Fill `m` with the attr-value pairs set via AttrBuilder::Set() so far, as
+  // well as any default attr-value pairs from the associated op_def, if there
+  // is one.
+  //
+  // If `include_those_in_node_def` is true, also include any attr-value pairs
+  // from `node_def_`.
   void FillAttrValueMap(AttrValueMap* m, bool include_those_in_node_def) const;
 
   template <class T>
-- 
GitLab


From e2f80439c5bfee56581875219ea83cc5307854f5 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 5 Oct 2018 10:37:16 -0700
Subject: [PATCH 1186/1357] Refactoring TFLite export code. Unify OperatorCode
 generation logic.

PiperOrigin-RevId: 215928419
---
 tensorflow/contrib/lite/toco/tflite/export.cc | 176 ++++++++++--------
 tensorflow/contrib/lite/toco/tflite/export.h  |  19 +-
 .../contrib/lite/toco/tflite/export_test.cc   |  77 +++++---
 3 files changed, 163 insertions(+), 109 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 45ca7f7f0c..f6f76e48a4 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -63,21 +63,21 @@ bool IsControlFlowOp(const string& tensorflow_op) {
   return false;
 }
 
-details::OperatorKey GetOperatorKey(
-    const ::toco::Operator& op,
-    const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_flex_ops) {
-  string custom_code;
-  if (op.type == OperatorType::kUnsupported) {
-    const TensorFlowUnsupportedOperator& unsupported_op =
-        static_cast<const TensorFlowUnsupportedOperator&>(op);
-    custom_code = unsupported_op.tensorflow_op;
-  }
-  int version = 1;
-  if (ops_by_type.count(op.type) != 0) {
-    version = ops_by_type.at(op.type)->GetVersion(op);
+// Map from operator name to TF Lite enum value, for all builtins.
+const std::map<string, BuiltinOperator>& GetBuiltinOpsMap() {
+  static std::map<string, BuiltinOperator>* builtin_ops = nullptr;
+  if (builtin_ops == nullptr) {
+    builtin_ops = new std::map<string, BuiltinOperator>();
+
+    for (int i = BuiltinOperator_MIN; i <= BuiltinOperator_MAX; ++i) {
+      BuiltinOperator op = static_cast<BuiltinOperator>(i);
+      string name = EnumNameBuiltinOperator(op);
+      if (op != BuiltinOperator_CUSTOM && !name.empty()) {
+        (*builtin_ops)[name] = op;
+      }
+    }
   }
-  return details::OperatorKey(op.type, custom_code, version, allow_flex_ops);
+  return *builtin_ops;
 }
 
 void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
@@ -91,27 +91,59 @@ void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
 
 namespace details {
 
-OperatorKey::OperatorKey(OperatorType type, const std::string& custom_code,
-                         int version, bool allow_flex_ops) {
-  this->type = type;
-  this->custom_code = custom_code;
-  this->version = version;
-
-  if (type == OperatorType::kUnsupported) {
-    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
-    // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_flex_ops) {
-      // Memorize the original TensorFlow op name.
-      this->flex_tensorflow_op = custom_code;
-      // Prefix the custom code of the flex op.
-      this->custom_code = string(::tflite::kFlexCustomCodePrefix) + custom_code;
-      this->is_flex_op = true;
-
-      if (IsControlFlowOp(this->flex_tensorflow_op)) {
-        is_unsupported_flex_op = true;
+OperatorKey GetOperatorKey(
+    const ::toco::Operator& op,
+    const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
+    bool allow_flex_ops) {
+  string name = HelpfulOperatorTypeName(op);
+  const auto& builtin_ops = GetBuiltinOpsMap();
+
+  bool is_builtin = false;
+  OperatorKey key;
+  if (ops_by_type.count(op.type) != 0) {
+    key.version = ops_by_type.at(op.type)->GetVersion(op);
+    name = ops_by_type.at(op.type)->name();
+    is_builtin = (builtin_ops.count(name) > 0);
+  }
+
+  if (is_builtin) {
+    // For TFLite supported builtin ops, find out its BuiltinOperator enum used
+    // in FlatBuffer.
+    key.type = builtin_ops.at(name);
+  } else {
+    key.type = BuiltinOperator_CUSTOM;
+
+    key.is_custom_op = true;
+    if (op.type == OperatorType::kUnsupported) {
+      const TensorFlowUnsupportedOperator& unsupported_op =
+          static_cast<const TensorFlowUnsupportedOperator&>(op);
+      const auto tensorflow_op = unsupported_op.tensorflow_op;
+
+      // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
+      // to populate a regular custom op. We need to find a way to fix this.
+      if (allow_flex_ops) {
+        // Memorize the original TensorFlow op name.
+        key.flex_tensorflow_op = tensorflow_op;
+        // Prefix the custom code of the flex op.
+        key.custom_code =
+            string(::tflite::kFlexCustomCodePrefix) + tensorflow_op;
+        key.is_flex_op = true;
+
+        if (IsControlFlowOp(tensorflow_op)) {
+          key.is_unsupported_flex_op = true;
+        }
+      } else {
+        key.custom_code = tensorflow_op;
       }
+    } else {
+      // For Toco-supported/TFLite-unsupported ops, currently we produce a
+      // custom op. This gives developers a chance to implement custom ops.
+      // TODO(b/116800229): Also produce Toco-supported/TFLite-unsupported ops
+      // as Flex ops when Flex mode is enabled.
+      key.custom_code = name;
     }
   }
+  return key;
 }
 
 void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) {
@@ -145,6 +177,7 @@ void LoadOperatorsMap(
     ++index;
   }
 }
+
 }  // namespace details
 
 Offset<Vector<Offset<Tensor>>> ExportTensors(
@@ -230,7 +263,7 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
     const Model& model,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
     const details::OperatorsMap& operators_map, FlatBufferBuilder* builder,
-    std::set<string>* unsupported_ops, const ExportParams& params) {
+    const ExportParams& params) {
   // Map from operator name to TF Lite enum value, for all builtins.
   std::map<string, BuiltinOperator> builtin_ops;
   for (int i = BuiltinOperator_MIN; i <= BuiltinOperator_MAX; ++i) {
@@ -247,37 +280,16 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
 
   for (const auto& op : model.operators) {
     const details::OperatorKey operator_key =
-        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
+        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
     int op_index = operators_map.at(operator_key);
-    int op_version = operator_key.version;
 
-    string name = HelpfulOperatorTypeName(*op);
-    bool is_builtin = false;
-    if (ops_by_type.count(op->type) != 0) {
-      name = ops_by_type.at(op->type)->name();
-      is_builtin = (builtin_ops.count(name) > 0);
+    flatbuffers::Offset<flatbuffers::String> custom_code = 0;
+    if (!operator_key.custom_code.empty()) {
+      custom_code = builder->CreateString(operator_key.custom_code);
     }
 
-    if (is_builtin) {
-      ordered_opcodes[op_index] =
-          CreateOperatorCode(*builder, builtin_ops[name], 0, op_version);
-    } else {
-      // This could be a kUnsupported, in which case we should be
-      // able to retrieve the original Tensorflow name from the OperatorKey, or
-      // this could be a proper TOCO operator that is completely unknown to TF
-      // Lite.
-      if (!operator_key.custom_code.empty()) {
-        name = operator_key.custom_code;
-      }
-      // Either way, this is an operator that is not supported by TF Lite,
-      // so we output it as a custom op and add it to the error summary.
-      if (unsupported_ops) {
-        unsupported_ops->insert(name);
-      }
-      ordered_opcodes[op_index] =
-          CreateOperatorCode(*builder, BuiltinOperator_CUSTOM,
-                             builder->CreateString(name), op_version);
-    }
+    ordered_opcodes[op_index] = CreateOperatorCode(
+        *builder, operator_key.type, custom_code, operator_key.version);
   }
 
   std::vector<Offset<OperatorCode>> opcode_vector;
@@ -312,7 +324,7 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
     }
 
     int op_index = operators_map.at(
-        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
+        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
 
     auto tflite_op_it = ops_by_type.find(op->type);
     BaseOperator* tflite_op = tflite_op_it == ops_by_type.end()
@@ -386,9 +398,8 @@ void Export(
   Array empty_array;
   buffers_to_write.push_back(&empty_array);
 
-  std::set<string> unsupported_ops;
-  auto op_codes = ExportOperatorCodes(model, ops_by_type, operators_map,
-                                      &builder, &unsupported_ops, params);
+  auto op_codes =
+      ExportOperatorCodes(model, ops_by_type, operators_map, &builder, params);
 
   for (const auto& op : model.operators) {
     if (op->type == OperatorType::kFakeQuant) {
@@ -398,7 +409,20 @@ void Export(
                       "for --std_values and --mean_values.";
     }
   }
-  if (!unsupported_ops.empty()) {
+
+  std::set<string> custom_ops;
+  std::set<string> unsupported_flex_ops;
+  for (const auto& it : operators_map) {
+    const details::OperatorKey& key = it.first;
+    if (key.is_custom_op) {
+      custom_ops.insert(key.custom_code);
+    }
+    if (key.is_unsupported_flex_op) {
+      unsupported_flex_ops.insert(key.flex_tensorflow_op);
+    }
+  }
+
+  if (!custom_ops.empty()) {
     if (!params.allow_custom_ops) {
       // Remove ExpandDims and ReorderAxes from unimplemented list unless they
       // compose the list. Both ops are removed during graph transformations.
@@ -406,14 +430,14 @@ void Export(
       // transformation is unable to run because the output shape is not
       // defined. This causes unnecessary confusion during model conversion
       // time.
-      std::set<string> unsupported_ops_final;
-      for (const auto& op_type : unsupported_ops) {
+      std::set<string> custom_ops_final;
+      for (const auto& op_type : custom_ops) {
         if (op_type != "ReorderAxes" && op_type != "ExpandDims") {
-          unsupported_ops_final.insert(op_type);
+          custom_ops_final.insert(op_type);
         }
       }
-      if (unsupported_ops_final.empty()) {
-        unsupported_ops_final = unsupported_ops;
+      if (custom_ops_final.empty()) {
+        custom_ops_final = custom_ops;
       }
 
       LOG(QFATAL)
@@ -423,13 +447,13 @@ void Export(
              "--allow_custom_ops, or by setting allow_custom_ops=True "
              "when calling tf.contrib.lite.TFLiteConverter(). Here is a list "
              "of operators for which  you will need custom implementations: "
-          << absl::StrJoin(unsupported_ops_final, ", ") << ".";
+          << absl::StrJoin(custom_ops_final, ", ") << ".";
     }
 
     std::set<string> unsupported_control_flow_ops;
     // Check if unsupported ops contains control flow ops. It's impossible
     // to implement these ops as custom ops at the moment.
-    for (const auto& op : unsupported_ops) {
+    for (const auto& op : custom_ops) {
       if (IsControlFlowOp(op)) {
         unsupported_control_flow_ops.insert(op);
       }
@@ -441,14 +465,6 @@ void Export(
     }
   }
 
-  std::set<string> unsupported_flex_ops;
-  for (const auto& it : operators_map) {
-    const details::OperatorKey& key = it.first;
-    if (key.is_unsupported_flex_op) {
-      unsupported_flex_ops.insert(key.custom_code);
-    }
-  }
-
   if (!unsupported_flex_ops.empty()) {
     LOG(QFATAL) << "Some of the operators in the model are not supported by "
                    "TensorFlow Flex runtime: "
diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h
index 9efb282c6c..c627f48086 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.h
+++ b/tensorflow/contrib/lite/toco/tflite/export.h
@@ -81,16 +81,20 @@ using TensorsMap = std::unordered_map<string, int>;
 // Only when `type` is `kUnsupported`, `custom_code` is filled to
 // identify which operation is used.
 struct OperatorKey {
-  OperatorKey(OperatorType type, const std::string& custom_code, int version,
-              bool allow_flex_ops = false);
+  OperatorKey() {}
+  OperatorKey(::tflite::BuiltinOperator type, const std::string& custom_code,
+              int version)
+      : type(type), custom_code(custom_code), version(version) {}
 
   // Only `type`, `custom_code` and `version` is used to compute hash and
   // identity.
-  OperatorType type;
+  ::tflite::BuiltinOperator type = ::tflite::BuiltinOperator_CUSTOM;
   std::string custom_code;
-  int version;
+  int version = 1;
 
-  // THe fields below are not used to compute hash and identity.
+  // The fields below are not used to compute hash and identity.
+  // TODO(ycling): Consider to change these fields to accessor functions.
+  bool is_custom_op = false;
   bool is_flex_op = false;
   bool is_unsupported_flex_op = false;
   // The original TensorFlow op name for the flex op. Filled only when
@@ -124,6 +128,11 @@ struct OperatorKey {
   };
 };
 
+OperatorKey GetOperatorKey(
+    const ::toco::Operator& op,
+    const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
+    bool allow_flex_ops);
+
 // A maps from operator type to its final position in the TF Lite buffer.
 using OperatorsMap = std::unordered_map<OperatorKey, int, OperatorKey::Hash>;
 
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index a71a64d56f..d48ab78285 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -105,13 +105,15 @@ TEST_F(ExportTest, LoadOperatorsMap) {
 
   details::OperatorsMap operators;
   const auto ops_by_type = BuildOperatorByTypeMap();
-  // TODO(ycling): Add a test for allow_flex_ops.
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
-  EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "", 1)]);
-  EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "", 1)]);
-  EXPECT_EQ(2, operators[details::OperatorKey(OperatorType::kSub, "", 1)]);
-  EXPECT_EQ(3, operators[details::OperatorKey(OperatorType::kUnsupported,
+  EXPECT_EQ(
+      0, operators[details::OperatorKey(::tflite::BuiltinOperator_ADD, "", 1)]);
+  EXPECT_EQ(1, operators[details::OperatorKey(::tflite::BuiltinOperator_CONV_2D,
+                                              "", 1)]);
+  EXPECT_EQ(2, operators[details::OperatorKey(::tflite::BuiltinOperator_CUSTOM,
                                               "MyCrazyOp", 1)]);
+  EXPECT_EQ(
+      3, operators[details::OperatorKey(::tflite::BuiltinOperator_SUB, "", 1)]);
 }
 
 TEST_F(ExportTest, Export) {
@@ -133,7 +135,7 @@ TEST_F(ExportTest, Export) {
   }
 
   EXPECT_THAT(names, ElementsAre("builtin:ADD", "builtin:CONV_2D",
-                                 "builtin:SUB", "custom:MyCrazyOp"));
+                                 "custom:MyCrazyOp", "builtin:SUB"));
 
   std::vector<uint32_t> indices;
   auto operators = (*model->subgraphs())[0]->operators();
@@ -142,7 +144,7 @@ TEST_F(ExportTest, Export) {
     indices.push_back(op->opcode_index());
   }
 
-  EXPECT_THAT(indices, ElementsAre(1, 0, 3, 2));
+  EXPECT_THAT(indices, ElementsAre(1, 0, 2, 3));
 }
 
 TEST_F(ExportTest, QuantizeWeights) {
@@ -257,7 +259,8 @@ TEST_F(VersionedOpExportTest, LoadOperatorsMapWithOpV1) {
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
 
   EXPECT_EQ(1, operators.size());
-  EXPECT_EQ(0, operators.at(details::OperatorKey(OperatorType::kConv, "", 1)));
+  EXPECT_EQ(0, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 1)));
 }
 
 TEST_F(VersionedOpExportTest, LoadOperatorsMapWithOpV2) {
@@ -268,7 +271,8 @@ TEST_F(VersionedOpExportTest, LoadOperatorsMapWithOpV2) {
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
 
   EXPECT_EQ(1, operators.size());
-  EXPECT_EQ(0, operators.at(details::OperatorKey(OperatorType::kConv, "", 2)));
+  EXPECT_EQ(0, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 2)));
 }
 
 TEST_F(VersionedOpExportTest, LoadOperatorsMapWithBothVersions) {
@@ -280,8 +284,10 @@ TEST_F(VersionedOpExportTest, LoadOperatorsMapWithBothVersions) {
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
 
   EXPECT_EQ(2, operators.size());
-  EXPECT_EQ(0, operators.at(details::OperatorKey(OperatorType::kConv, "", 1)));
-  EXPECT_EQ(1, operators.at(details::OperatorKey(OperatorType::kConv, "", 2)));
+  EXPECT_EQ(0, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 1)));
+  EXPECT_EQ(1, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 2)));
 }
 
 TEST_F(VersionedOpExportTest, Export) {
@@ -314,38 +320,61 @@ TEST_F(VersionedOpExportTest, Export) {
 }
 
 TEST(OperatorKeyTest, TestBuiltinOp) {
-  details::OperatorKey key(OperatorType::kConv, "", 2);
-  EXPECT_EQ(key.type, OperatorType::kConv);
+  auto op = absl::make_unique<ConvOperator>();
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+  const auto key = details::GetOperatorKey(*op, ops_by_type, false);
+
+  EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CONV_2D);
   EXPECT_EQ(key.custom_code, "");
-  EXPECT_EQ(key.version, 2);
+  EXPECT_EQ(key.version, 1);
+}
+
+TEST(OperatorKeyTest, TestCustomOp) {
+  auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
+  op->tensorflow_op = "MyCrazyCustomOp";
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+  const auto key = details::GetOperatorKey(*op, ops_by_type, false);
+
+  EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+  EXPECT_EQ(key.custom_code, "MyCrazyCustomOp");
+  EXPECT_EQ(key.version, 1);
 }
 
 TEST(OperatorKeyTest, TestFlexOp) {
+  auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
+  op->tensorflow_op = "BatchMatMul";
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
   {
-    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
-                             false);
-    EXPECT_EQ(key.type, OperatorType::kUnsupported);
+    const auto key = details::GetOperatorKey(*op, ops_by_type, false);
     // It shouldn't be converted to Flex op if `allow_flex_op` is false.
-    EXPECT_EQ(key.custom_code, "SomeUnsupportedOp");
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "BatchMatMul");
     EXPECT_EQ(key.version, 1);
     EXPECT_FALSE(key.is_flex_op);
   }
 
   {
-    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
-                             true);
-    EXPECT_EQ(key.type, OperatorType::kUnsupported);
     // Verify that the custom op name is prefixed by "Flex" and `is_flex_op`
     // is true.
-    EXPECT_EQ(key.custom_code, "FlexSomeUnsupportedOp");
+    const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "FlexBatchMatMul");
     EXPECT_EQ(key.version, 1);
     EXPECT_TRUE(key.is_flex_op);
   }
 }
 
 TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
-  details::OperatorKey key(OperatorType::kUnsupported, "Merge", 1, true);
-  EXPECT_EQ(key.type, OperatorType::kUnsupported);
+  auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
+  op->tensorflow_op = "Merge";
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+  const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+
+  EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
   EXPECT_EQ(key.custom_code, "FlexMerge");
   EXPECT_EQ(key.version, 1);
   EXPECT_TRUE(key.is_flex_op);
-- 
GitLab


From dd8afaad37fdb284dce3518a9be22aca1c25e475 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 10:48:51 -0700
Subject: [PATCH 1187/1357] Fix documentation.

PiperOrigin-RevId: 215930596
---
 tensorflow/python/framework/importer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index c6595918ae..c9ac27e788 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -370,7 +370,8 @@ def import_graph_def(graph_def,
 
   Returns:
     A list of `Operation` and/or `Tensor` objects from the imported graph,
-    corresponding to the names in `return_elements`.
+    corresponding to the names in `return_elements`,
+    and None if `returns_elements` is None.
 
   Raises:
     TypeError: If `graph_def` is not a `GraphDef` proto,
-- 
GitLab


From f410ffc1699e864e84857089183db0d952ada7fe Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Thu, 26 Jul 2018 15:44:39 +0200
Subject: [PATCH 1188/1357] make sparsemax nan and infinity safe

logits that are -inf will be given 0 probability and logits that are
inf will result in a nan output. Likewise if all logits are -inf the
output will also be nan.

This is done by using where operators, mostly because 0 * inf = nan
and x/0 = sign(x) inf following the IEEE 754 standard. However these
results are not mathematically correct in the context of the sparsemax
algorithm.

Fixes: https://github.com/tensorflow/tensorflow/issues/15564
---
 .../kernel_tests/sparsemax_loss_test.py       | 64 +++++++++++++++++++
 .../python/kernel_tests/sparsemax_test.py     | 63 +++++++++++++++++-
 .../contrib/sparsemax/python/ops/sparsemax.py | 30 ++++++++-
 .../sparsemax/python/ops/sparsemax_loss.py    | 32 ++++++++--
 4 files changed, 178 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py
index 360e7dbe75..2db76a6d56 100644
--- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py
+++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py
@@ -109,6 +109,66 @@ class SparsemaxLossTest(test.TestCase):
         np_loss, tf_loss_out, half_atol=1e-2, half_rtol=5e-3)
     self.assertShapeEqual(np_loss, tf_loss_op)
 
+  def _test_sparsemax_loss_of_nan(self, dtype, random, use_gpu):
+    """check sparsemax-loss transfers nan"""
+    q = np.asarray([
+        [0, 0, 1],
+        [0, 0, 1],
+        [0, 0, 1]
+    ])
+    z_nan = np.asarray([
+        [0, np.nan, 0],
+        [0, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ]).astype(dtype)
+
+    _, tf_loss_nan = self._tf_sparsemax_loss(z_nan, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [np.nan, np.nan, np.nan],
+        tf_loss_nan)
+
+  def _test_sparsemax_loss_of_inf(self, dtype, random, use_gpu):
+    """check sparsemax-loss is infinity safe"""
+    q = np.asarray([
+        [0, 0, 1],
+        [0, 0, 1],
+        [0, 0, 1],
+        [0, 0, 1]
+    ])
+    z_neg = np.asarray([
+        [0, -np.inf, 0],
+        [0, -np.inf, -np.inf],
+        [-np.inf, -np.inf, 0],
+        [-np.inf, -np.inf, -np.inf],
+    ]).astype(dtype)
+    z_pos = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, np.inf],
+        [np.inf, np.inf, 0],
+        [np.inf, np.inf, np.inf]
+    ]).astype(dtype)
+    z_mix = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, -np.inf],
+        [-np.inf, np.inf, 0],
+        [-np.inf, np.inf, -np.inf]
+    ]).astype(dtype)
+
+    _, tf_loss_neg = self._tf_sparsemax_loss(z_neg, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [0.25, np.inf, 0, np.nan],
+        tf_loss_neg)
+
+    _, tf_loss_pos = self._tf_sparsemax_loss(z_pos, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [np.nan, np.nan, np.nan, np.nan],
+        tf_loss_pos)
+
+    _, tf_loss_mix = self._tf_sparsemax_loss(z_mix, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [np.nan, np.nan, np.nan, np.nan],
+        tf_loss_mix)
+
   def _test_constant_add(self, dtype, random, use_gpu):
     """check sparsemax-loss proposition 3"""
     z = random.uniform(low=-3, high=3, size=(test_obs, 10))
@@ -198,6 +258,10 @@ class SparsemaxLossTest(test.TestCase):
 
     self._test_sparsemax_loss_against_numpy(dtype, random, use_gpu=False)
 
+    self._test_sparsemax_loss_of_nan(dtype, random, use_gpu=False)
+
+    self._test_sparsemax_loss_of_inf(dtype, random, use_gpu=False)
+
     self._test_constant_add(dtype, random, use_gpu=False)
 
     self._test_sparsemax_loss_positive(dtype, random, use_gpu=False)
diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py
index 259e62bd86..38c6dd15db 100644
--- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py
+++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py
@@ -87,6 +87,61 @@ class SparsemaxTest(test.TestCase):
         p_sparemax, tf_sparsemax_out, half_atol=5e-3)
     self.assertShapeEqual(p_sparemax, tf_sparsemax_op)
 
+  def _test_sparsemax_of_nan(self, dtype, random, use_gpu):
+    """check sparsemax transfers nan"""
+    z_nan = np.asarray([
+        [0, np.nan, 0],
+        [0, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+    ]).astype(dtype)
+
+    _, tf_sparsemax_nan = self._tf_sparsemax(z_nan, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_nan)
+
+  def _test_sparsemax_of_inf(self, dtype, random, use_gpu):
+    """check sparsemax is infinity safe"""
+    z_neg = np.asarray([
+        [0, -np.inf, 0],
+        [0, -np.inf, -np.inf],
+        [-np.inf, -np.inf, -np.inf],
+    ]).astype(dtype)
+    z_pos = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, np.inf],
+        [np.inf, np.inf, np.inf]
+    ]).astype(dtype)
+    z_mix = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, -np.inf],
+        [-np.inf, np.inf, -np.inf]
+    ]).astype(dtype)
+
+    _, tf_sparsemax_neg = self._tf_sparsemax(z_neg, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [0.5, 0, 0.5],
+        [1, 0, 0],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_neg)
+
+    _, tf_sparsemax_pos = self._tf_sparsemax(z_pos, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_pos)
+
+    _, tf_sparsemax_mix = self._tf_sparsemax(z_mix, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_mix)
+
+
   def _test_sparsemax_of_zero(self, dtype, random, use_gpu):
     """check sparsemax proposition 1, part 1"""
     z = np.zeros((1, 10))
@@ -97,7 +152,7 @@ class SparsemaxTest(test.TestCase):
     self.assertAllCloseAccordingToType(p_sparemax, tf_sparsemax_out)
     self.assertShapeEqual(p_sparemax, tf_sparsemax_op)
 
-  def _test_sparsemax_of_inf(self, dtype, random, use_gpu):
+  def _test_sparsemax_of_to_inf(self, dtype, random, use_gpu):
     """check sparsemax proposition 1, part 2"""
     z = random.uniform(low=-3, high=3, size=(test_obs, 10))
 
@@ -210,10 +265,14 @@ class SparsemaxTest(test.TestCase):
 
     self._test_sparsemax_against_numpy(dtype, random, use_gpu=False)
 
-    self._test_sparsemax_of_zero(dtype, random, use_gpu=False)
+    self._test_sparsemax_of_nan(dtype, random, use_gpu=False)
 
     self._test_sparsemax_of_inf(dtype, random, use_gpu=False)
 
+    self._test_sparsemax_of_zero(dtype, random, use_gpu=False)
+
+    self._test_sparsemax_of_to_inf(dtype, random, use_gpu=False)
+
     self._test_constant_add(dtype, random, use_gpu=False)
 
     self._test_permutation(dtype, random, use_gpu=False)
diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py
index e617af2ff1..f903b629c7 100644
--- a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py
+++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py
@@ -49,7 +49,14 @@ def sparsemax(logits, name=None):
     obs = array_ops.shape(logits)[0]
     dims = array_ops.shape(logits)[1]
 
-    z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]
+    # In the paper, they call the logits z.
+    # The mean(logits) can be substracted from logits to make the algorithm
+    # more numerically stable. the instability in this algorithm comes mostly
+    # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
+    # to zero. However, in practise the numerical instability issues are very
+    # minor and substacting the mean causes extra issues with inf and nan
+    # input.
+    z = logits
 
     # sort z
     z_sorted, _ = nn.top_k(z, k=dims)
@@ -64,10 +71,27 @@ def sparsemax(logits, name=None):
     k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)
 
     # calculate tau(z)
-    indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1)
+    # If there are inf values or all values are -inf, the k_z will be zero,
+    # this is mathematically invalid and will also cause the gather_nd to fail.
+    # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
+    # fixed later (see p_safe) by returning p = nan. This results in the same
+    # behavior as softmax.
+    k_z_safe = math_ops.maximum(k_z, 1)
+    indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1)
     tau_sum = array_ops.gather_nd(z_cumsum, indices)
     tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)
 
     # calculate p
-    return math_ops.maximum(
+    p = math_ops.maximum(
         math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
+    # If k_z = 0 or if z = nan, then the input is invalid
+    p_safe = array_ops.where(
+        math_ops.logical_or(
+            math_ops.equal(k_z, 0),
+            math_ops.is_nan(z_cumsum[:, -1])
+        ),
+        array_ops.fill([obs, dims], math_ops.cast(float('nan'), logits.dtype)),
+        p
+    )
+
+    return p_safe
diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py
index 582d1e6136..9095cfe267 100644
--- a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py
+++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py
@@ -47,14 +47,34 @@ def sparsemax_loss(logits, sparsemax, labels, name=None):
     sparsemax = ops.convert_to_tensor(sparsemax, name="sparsemax")
     labels = ops.convert_to_tensor(labels, name="labels")
 
-    shifted_logits = logits - \
-        math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]
+    # In the paper, they call the logits z.
+    # A constant can be substracted from logits to make the algorithm
+    # more numerically stable in theory. However, there are really no major
+    # source numerical instability in this algorithm.
+    z = logits
 
     # sum over support
-    support = math_ops.cast(sparsemax > 0, sparsemax.dtype)
-    sum_s = support * sparsemax * (shifted_logits - 0.5 * sparsemax)
+    # Use a conditional where instead of a multiplication to support z = -inf.
+    # If z = -inf, and there is no support (sparsemax = 0), a multiplication
+    # would cause 0 * -inf = nan, which is not correct in this case.
+    sum_s = array_ops.where(
+        math_ops.logical_or(sparsemax > 0, math_ops.is_nan(sparsemax)),
+        sparsemax * (z - 0.5 * sparsemax),
+        array_ops.zeros_like(sparsemax)
+    )
 
     # - z_k + ||q||^2
-    q_part = labels * (0.5 * labels - shifted_logits)
+    q_part = labels * (0.5 * labels - z)
+    # Fix the case where labels = 0 and z = -inf, where q_part would
+    # otherwise be 0 * -inf = nan. But since the lables = 0, no cost for
+    # z = -inf should be consideredself.
+    # The code below also coveres the case where z = inf. Howeverm in this
+    # caose the sparsemax will be nan, which means the sum_s will also be nan,
+    # therefor this case doesn't need addtional special treatment.
+    q_part_safe = array_ops.where(
+        math_ops.logical_and(math_ops.equal(labels, 0), math_ops.is_inf(z)),
+        array_ops.zeros_like(z),
+        q_part
+    )
 
-    return math_ops.reduce_sum(sum_s + q_part, axis=1)
+    return math_ops.reduce_sum(sum_s + q_part_safe, axis=1)
-- 
GitLab


From b1325838aaf902e52fae4b085c6396848c445062 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Fri, 5 Oct 2018 11:13:53 -0700
Subject: [PATCH 1189/1357] Declare that stateless random ops are not
 differentiable in C++ code.

PiperOrigin-RevId: 215935319
---
 tensorflow/core/BUILD                        |  1 +
 tensorflow/core/ops/stateless_random_grad.cc | 23 ++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 tensorflow/core/ops/stateless_random_grad.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 6a3ee3c1cb..900a0e11c4 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1242,6 +1242,7 @@ cc_library(
     srcs = [
         "ops/math_grad.cc",
         "ops/random_grad.cc",
+        "ops/stateless_random_grad.cc",
     ],
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
diff --git a/tensorflow/core/ops/stateless_random_grad.cc b/tensorflow/core/ops/stateless_random_grad.cc
new file mode 100644
index 0000000000..331e1d0152
--- /dev/null
+++ b/tensorflow/core/ops/stateless_random_grad.cc
@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/function.h"
+
+namespace tensorflow {
+REGISTER_OP_NO_GRADIENT("StatelessRandomUniform");
+REGISTER_OP_NO_GRADIENT("StatelessRandomNormal");
+REGISTER_OP_NO_GRADIENT("StatelessTruncatedNormal");
+REGISTER_OP_NO_GRADIENT("StatelessMultinomial");
+}  // end namespace tensorflow
-- 
GitLab


From 1e446b37620dcdca73e855c83efcc0d14bb68a8c Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Fri, 5 Oct 2018 11:27:03 -0700
Subject: [PATCH 1190/1357] Make gradient tape stack thread local

PiperOrigin-RevId: 215937618
---
 tensorflow/python/eager/pywrap_tfe_src.cc | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 6193f40ce8..6d3ef9a37b 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1228,8 +1228,9 @@ static PyTypeObject TFE_Py_Tape_Type = {
 // GIL, which is always held when any TFE_Py_* methods are called. We should
 // revisit this if/when decide to not hold the GIL while manipulating the tape
 // stack.
-static tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>* tape_set = nullptr;
 tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>* GetTapeSet() {
+  thread_local tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>* tape_set{
+      nullptr};
   if (tape_set == nullptr) {
     tape_set = new tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>;
   }
@@ -1264,27 +1265,10 @@ class SafeTapeSet {
   tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*> tape_set_;
 };
 
-// xcode 7 doesn't define thread_local, so for compatibility we implement our
-// own. TODO(apassos) remove once we can deprecate xcode 7.
-#ifndef __APPLE__
 bool* ThreadTapeIsStopped() {
   thread_local bool thread_tape_is_stopped{false};
   return &thread_tape_is_stopped;
 }
-#else
-static std::unordered_map<std::thread::id, bool>* tape_is_stopped = nullptr;
-bool* ThreadTapeIsStopped() {
-  if (tape_is_stopped == nullptr) {
-    tape_is_stopped = new std::unordered_map<std::thread::id, bool>;
-  }
-  auto it = tape_is_stopped->find(std::this_thread::get_id());
-  if (it != tape_is_stopped->end()) {
-    return &(it->second);
-  }
-  return &(tape_is_stopped->emplace(std::this_thread::get_id(), false)
-               .first->second);
-}
-#endif
 
 void TFE_Py_TapeSetStopOnThread() { *ThreadTapeIsStopped() = true; }
 
-- 
GitLab


From 496bc1589831da2f00e6d49b12c68b97301730d4 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Fri, 5 Oct 2018 11:38:34 -0700
Subject: [PATCH 1191/1357] Disable
 micro/examples/micro_speech:micro_speech_test test under msan

PiperOrigin-RevId: 215939542
---
 .../lite/experimental/micro/examples/micro_speech/BUILD        | 3 +++
 .../contrib/lite/experimental/micro/testing/micro_test.bzl     | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
index 447c584387..dad58b6c1c 100644
--- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
@@ -17,6 +17,9 @@ tflite_micro_cc_test(
         "tiny_conv_model_data.cc",
         "tiny_conv_model_data.h",
     ],
+    tags = [
+        "nomsan",
+    ],
     deps = [
         "//tensorflow/contrib/lite:schema_fbs_version",
         "//tensorflow/contrib/lite/experimental/micro:micro_framework",
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
index 91e349cb24..916e3eeac3 100644
--- a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
+++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
@@ -10,6 +10,7 @@ def tflite_micro_cc_test(
         nocopts = "",
         linkopts = [],
         deps = [],
+        tags = [],
         visibility = None):
     """Tests a C/C++ binary without testing framework  dependencies`.
 
@@ -43,6 +44,7 @@ def tflite_micro_cc_test(
         nocopts = nocopts,
         linkopts = linkopts,
         deps = deps,
+        tags = tags,
         visibility = visibility,
     )
     native.sh_test(
@@ -61,4 +63,5 @@ def tflite_micro_cc_test(
         ],
         deps = [
         ],
+        tags = tags,
     )
-- 
GitLab


From 03b4161326897453fa6b2803b873954607f7623b Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Fri, 5 Oct 2018 11:49:19 -0700
Subject: [PATCH 1192/1357] [XLA] Extend the HLO verifier to check that
 non-layout-changing instructions preserve operand layouts.

Add an std::function member to the HloVerifier for a backend to specify the
function object used to determine whether an instruction can change layouts.
Use the function object to find out the non-layout-changing instructions and
check that such instructions should produce results with the same layouts as
its operands.

Add test cases.

PiperOrigin-RevId: 215941282
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  9 ++-
 .../xla/service/gpu/nvptx_compiler.cc         | 21 ++++--
 .../compiler/xla/service/hlo_verifier.cc      | 34 +++++++++-
 .../compiler/xla/service/hlo_verifier.h       | 14 +++-
 .../compiler/xla/service/hlo_verifier_test.cc | 67 +++++++++++++++++++
 .../compiler/xla/tests/hlo_test_base.cc       | 14 ++--
 tensorflow/compiler/xla/tests/hlo_test_base.h |  8 ++-
 8 files changed, 149 insertions(+), 19 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 4797cf3330..2b292ed053 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -2450,6 +2450,7 @@ tf_cc_test(
         ":hlo",
         ":hlo_parser",
         ":hlo_verifier",
+        ":layout_assignment",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:types",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 5834f67285..68c715a086 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -327,8 +327,13 @@ Status CpuCompiler::RunHloPassesAfterLayoutAssn(
   {
     auto& pass = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
         "simplification after layout assignement");
-    pass.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                          /*allow_mixed_precision=*/false);
+    // TODO(b/117156505): When the bug is fixed, the CPU backend should not
+    // produce layout changing elementwise operations. We will then pass
+    // LayoutAssignment::InstructionCanChangeLayout to the HLO verifier to
+    // enable stricter verification.
+    pass.AddInvariantChecker<HloVerifier>(
+        /*layout_sensitive=*/true,
+        /*allow_mixed_precision=*/false);
     pass.AddPass<HloPassFix<AlgebraicSimplifier>>(
         /*is_layout_sensitive=*/true,
         [](const Shape&, const Shape&) { return true; },
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 50e47542c4..ac6c2c5565 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -239,8 +239,10 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
   {
     HloPassPipeline pipeline("post-layout_assignment");
-    pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                              /*allow_mixed_precision=*/false);
+    pipeline.AddInvariantChecker<HloVerifier>(
+        /*layout_sensitive=*/true,
+        /*allow_mixed_precision=*/false,
+        LayoutAssignment::InstructionCanChangeLayout);
 
     // The LayoutAssignment pass may leave behind kCopy instructions which are
     // duplicate or NOPs, so remove them with algebraic simplification and CSE.
@@ -286,8 +288,10 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
   {
     HloPassFix<HloPassPipeline> fusion("fusion");
-    fusion.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                            /*allow_mixed_precision=*/false);
+    fusion.AddInvariantChecker<HloVerifier>(
+        /*layout_sensitive=*/true,
+        /*allow_mixed_precision=*/false,
+        LayoutAssignment::InstructionCanChangeLayout);
     fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/false);
     fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/true);
     fusion.AddPass<FusionMerger>();
@@ -299,7 +303,8 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
     HloPassPipeline reduce_pipeline("reduce-precision");
     reduce_pipeline.AddInvariantChecker<HloVerifier>(
-        /*is_layout_sensitive=*/true, /*allow_mixed_precision=*/false);
+        /*is_layout_sensitive=*/true, /*allow_mixed_precision=*/false,
+        LayoutAssignment::InstructionCanChangeLayout);
     ReducePrecisionInsertion::AddPasses(
         &reduce_pipeline, hlo_module->config().debug_options(),
         ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
@@ -325,8 +330,10 @@ Status PrepareHloModuleForIrEmitting(HloModule* hlo_module) {
   // (b/27180329). Therefore, in that case, we set the output to be a copy of
   // the parameter.
   HloPassPipeline pipeline("GPU-ir-emit-prepare");
-  pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                            /*allow_mixed_precision=*/false);
+  pipeline.AddInvariantChecker<HloVerifier>(
+      /*layout_sensitive=*/true,
+      /*allow_mixed_precision=*/false,
+      LayoutAssignment::InstructionCanChangeLayout);
 
   // Copy insertion should be performed immediately before IR emission to avoid
   // inserting unnecessary copies (later pass adds an instruction which
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index c22ee03388..fad3b14ec2 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1042,7 +1042,10 @@ Status CheckElementwiseInstruction(HloInstruction* instruction) {
 // not check result shape as that is checked in the ShapeVerifier.
 class InstructionVerifier : public DfsHloVisitorWithDefault {
  public:
-  InstructionVerifier() {}
+  explicit InstructionVerifier(std::function<bool(const HloInstruction*)>
+                                   instruction_can_change_layout_func)
+      : instruction_can_change_layout_func_(
+            instruction_can_change_layout_func) {}
 
   Status DefaultAction(HloInstruction*) override { return Status::OK(); }
 
@@ -1143,8 +1146,34 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  Status Postprocess(HloInstruction* instruction) override {
+    if (instruction_can_change_layout_func_ &&
+        LayoutUtil::IsDenseArray(instruction->shape()) &&
+        !instruction_can_change_layout_func_(instruction)) {
+      const Shape& result_shape = instruction->shape();
+      const Layout& result_layout = result_shape.layout();
+      for (HloInstruction* operand : instruction->operands()) {
+        const Shape& operand_shape = operand->shape();
+        if (LayoutUtil::IsDenseArray(operand_shape) &&
+            ShapeUtil::Rank(operand_shape) == ShapeUtil::Rank(result_shape)) {
+          const Layout& operand_layout = operand_shape.layout();
+          TF_RET_CHECK(LayoutUtil::Equal(result_layout, operand_layout))
+              << "Instruction shouldn't change layouts "
+              << instruction->ToString() << " From "
+              << ShapeUtil::HumanString(result_shape) << " To "
+              << ShapeUtil::HumanString(operand_shape);
+        }
+      }
+    }
+
+    return Status::OK();
+  }
+
  private:
   absl::flat_hash_map<string, const HloInstruction*> instructions_by_name_;
+  // Determines whether an instruction can change layouts.
+  std::function<bool(const HloInstruction*)>
+      instruction_can_change_layout_func_;
 };
 
 }  // namespace
@@ -1158,7 +1187,8 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
 
-    InstructionVerifier instruction_verifier;
+    InstructionVerifier instruction_verifier(
+        instruction_can_change_layout_func_);
     TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 6d16586c2c..cb49cb95ba 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -155,11 +155,17 @@ class HloVerifier : public HloModulePass {
  public:
   using ShapeVerifierFactory = std::function<std::unique_ptr<ShapeVerifier>()>;
 
-  explicit HloVerifier(bool layout_sensitive, bool allow_mixed_precision)
+  explicit HloVerifier(bool layout_sensitive, bool allow_mixed_precision,
+                       std::function<bool(const HloInstruction*)>
+                           instruction_can_change_layout_func = {})
       : shape_verifier_factory_([layout_sensitive, allow_mixed_precision] {
           return absl::make_unique<ShapeVerifier>(layout_sensitive,
                                                   allow_mixed_precision);
-        }) {}
+        }),
+        instruction_can_change_layout_func_(
+            std::move(instruction_can_change_layout_func)) {
+    CHECK(instruction_can_change_layout_func_ == nullptr || layout_sensitive);
+  }
 
   // Uses custom shape verification.
   explicit HloVerifier(ShapeVerifierFactory shape_verifier_factory)
@@ -177,6 +183,10 @@ class HloVerifier : public HloModulePass {
   // being a DfsHloVisitor, is stateful. We want a clean object
   // for each run of the verifier.
   ShapeVerifierFactory shape_verifier_factory_;
+
+  // Determines whether an instruction can change layouts.
+  std::function<bool(const HloInstruction*)>
+      instruction_can_change_layout_func_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
index 8f0423bb1c..afe01e5487 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/service/layout_assignment.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
@@ -50,6 +51,14 @@ class HloVerifierTestAllowMixedPrecision : public HloTestBase {
                     /*allow_mixed_precision_in_hlo_verifier=*/true) {}
 };
 
+class HloVerifierTestLayoutSensitive : public HloTestBase {
+ public:
+  HloVerifierTestLayoutSensitive()
+      : HloTestBase(/*verifier_layout_sensitive=*/true,
+                    /*allow_mixed_precision_in_hlo_verifier=*/false,
+                    LayoutAssignment::InstructionCanChangeLayout) {}
+};
+
 TEST_F(HloVerifierTest, NullInstructionParent) {
   HloComputation::Builder builder(TestName());
   const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
@@ -358,5 +367,63 @@ TEST_F(HloVerifierTest, ConvNegativeBaseDilationNotAllowed) {
               HasSubstr("non-positive base area dilation factor"));
 }
 
+static const char* const kAddWithLayoutChangeHlo = R"(
+   HloModule AddWithLayoutChange
+    ENTRY AddWithLayoutChange {
+      par0 = f32[3,4]{1,0} parameter(0)
+      par1 = f32[3,4]{0,1} parameter(1)
+      ROOT add0 = f32[3,4]{1,0} add(par0,par1)
+    }
+  )";
+
+TEST_F(HloVerifierTest, AddWithLayoutChange) {
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(kAddWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_TRUE(status.ok());
+}
+
+TEST_F(HloVerifierTestLayoutSensitive, AddWithLayoutChangeNotAllowed) {
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(kAddWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("Instruction shouldn't change layouts"));
+}
+
+TEST_F(HloVerifierTestLayoutSensitive, SliceWithLayoutChangeNotAllowed) {
+  const char* const kSliceWithLayoutChangeHlo = R"(
+   HloModule SliceWithLayoutChange
+    ENTRY SliceWithLayoutChange {
+      par0 = f32[4,5]{0,1} parameter(0)
+      par1 = s32[2] parameter(1)
+      ROOT dslice0 = f32[3,4]{1,0} dynamic-slice(par0, par1),
+        dynamic_slice_sizes={3,4}
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseHloString(kSliceWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("Instruction shouldn't change layouts"));
+}
+
+TEST_F(HloVerifierTestLayoutSensitive, ConcatWithLayoutChangeNotAllowed) {
+  const char* const kConcatWithLayoutChangeHlo = R"(
+   HloModule ConcatWithLayoutChange
+   ENTRY ConcatWithLayoutChange {
+      par0 = f32[3,5]{0,1} parameter(0)
+      par1 = f32[3,3]{1,0} parameter(1)
+      ROOT concat0 = f32[3,8]{1,0} concatenate(f32[3,5] par0, f32[3,3] par1),
+        dimensions={1}
+   }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseHloString(kConcatWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("Instruction shouldn't change layouts"));
+}
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index bdd4fd7e3d..7ab2ecda58 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -86,19 +86,25 @@ ProgramShape GetProgramShapeWithLayout(const HloModule& module) {
 }  // namespace
 
 HloTestBase::HloTestBase(bool verifier_layout_sensitive,
-                         bool allow_mixed_precision_in_hlo_verifier)
+                         bool allow_mixed_precision_in_hlo_verifier,
+                         std::function<bool(const HloInstruction*)>
+                             instruction_can_change_layout_func)
     : HloTestBase(GetTestPlatform(), GetReferencePlatform(),
                   verifier_layout_sensitive,
-                  allow_mixed_precision_in_hlo_verifier) {}
+                  allow_mixed_precision_in_hlo_verifier,
+                  instruction_can_change_layout_func) {}
 
 HloTestBase::HloTestBase(se::Platform* test_platform,
                          se::Platform* reference_platform,
                          bool verifier_layout_sensitive,
-                         bool allow_mixed_precision_in_hlo_verifier)
+                         bool allow_mixed_precision_in_hlo_verifier,
+                         std::function<bool(const HloInstruction*)>
+                             instruction_can_change_layout_func)
     : test_runner_(test_platform), reference_runner_(reference_platform) {
   hlo_verifier_ = absl::make_unique<HloVerifier>(
       /*layout_sensitive=*/verifier_layout_sensitive,
-      /*allow_mixed_precision=*/allow_mixed_precision_in_hlo_verifier);
+      /*allow_mixed_precision=*/allow_mixed_precision_in_hlo_verifier,
+      instruction_can_change_layout_func);
 }
 
 std::unique_ptr<HloModule> HloTestBase::CreateNewModule(const string& name) {
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
index 0ae4bdc104..217428befa 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h
@@ -88,14 +88,18 @@ class HloTestBase : public ::testing::Test {
   // interpreter is the only supported backend, it will be both the test backend
   // and the reference backend.
   HloTestBase(bool verifier_layout_sensitive = false,
-              bool allow_mixed_precision_in_hlo_verifier = true);
+              bool allow_mixed_precision_in_hlo_verifier = true,
+              std::function<bool(const HloInstruction*)>
+                  instruction_can_change_layout_func = {});
 
   // If your test doesn't use interpreter as the reference backend, you can use
   // this constructor. Note that your test target is responsible for linking in
   // both needed backends.
   HloTestBase(se::Platform* test_platform, se::Platform* reference_platform,
               bool verifier_layout_sensitive = false,
-              bool allow_mixed_precision_in_hlo_verifier = true);
+              bool allow_mixed_precision_in_hlo_verifier = true,
+              std::function<bool(const HloInstruction*)>
+                  instruction_can_change_layout_func = {});
 
   ~HloTestBase() override {}
 
-- 
GitLab


From 0541a277d5c74cf8e99c9f5a7a015926d1a05214 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Fri, 5 Oct 2018 12:09:01 -0700
Subject: [PATCH 1193/1357] Do 2 warmup runs in
 assert_no_new_pyobjects_executing_eagerly.

PiperOrigin-RevId: 215944829
---
 tensorflow/python/framework/test_util.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 4ec4b41b5e..95925bb471 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -506,9 +506,9 @@ def disable_control_flow_v2(unused_msg):
 def assert_no_new_pyobjects_executing_eagerly(f):
   """Decorator for asserting that no new Python objects persist after a test.
 
-  Runs the test multiple times executing eagerly, first as a warmup and then
-  several times to let objects accumulate. The warmup helps ignore caches which
-  do not grow as the test is run repeatedly.
+  Runs the test multiple times executing eagerly, first as a warmup and then to
+  let objects accumulate. The warmup helps ignore caches which do not grow as
+  the test is run repeatedly.
 
   Useful for checking that there are no missing Py_DECREFs in the C exercised by
   a bit of Python.
@@ -518,7 +518,14 @@ def assert_no_new_pyobjects_executing_eagerly(f):
     """Warms up, gets an object count, runs the test, checks for new objects."""
     with context.eager_mode():
       gc.disable()
-      f(self, **kwargs)
+      # Run the test 2 times as warmup, in an attempt to fill up caches, which
+      # should not grow as the test is run repeatedly below.
+      #
+      # TODO(b/117156879): Running warmup twice is black magic; we have seen
+      # tests that fail with 1 warmup run, and pass with 2, on various versions
+      # of python2.7.x.
+      for _ in range(2):
+        f(self, **kwargs)
       gc.collect()
       previous_count = len(gc.get_objects())
       if ops.has_default_graph():
-- 
GitLab


From d016650ca7636c96c6664bed2cf3a2fa8a3c674b Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Fri, 5 Oct 2018 12:17:31 -0700
Subject: [PATCH 1194/1357] Revert constant folding to previous state.

PiperOrigin-RevId: 215946205
---
 .../tf2xla/functionalize_control_flow.cc      | 64 +++----------------
 .../core/common_runtime/constant_folding.cc   | 35 +++-------
 .../core/common_runtime/constant_folding.h    |  4 --
 .../core/common_runtime/graph_optimizer.cc    |  5 +-
 .../core/common_runtime/graph_optimizer.h     |  5 +-
 5 files changed, 20 insertions(+), 93 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 28e09d7b79..0362682bd6 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -94,8 +94,9 @@ Status FunctionalizeControlFlowForFunction(
     }
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
+  Graph* g = body->graph;
 
-  // Check if the graph has Switch or Merge node before optimizing the graph.
+  // Check if the graph has Switch or Merge node.
   bool has_switch_or_merge = false;
   for (Node* n : body->graph->nodes()) {
     if (n->type_string() == "Switch" || n->type_string() == "Merge") {
@@ -108,58 +109,13 @@ Status FunctionalizeControlFlowForFunction(
   // in function body. We still need to rewrite those functions and modify
   // corresponding nodes.
 
-  // Call graph optimizer. The most important optimization we need is constant
-  // folding, which will replace ops like Shape/BroadcastGradientArgs with
-  // constant shape input. Without this optimization, those ops might become
-  // dynamic input for then/else body function and XLA will complain that input
-  // is not compile time constant. We enable function inlining as well, because
-  // otherwise we won't be able to infer shape for any node depending on
-  // function call nodes.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_opt_", func_name),
-        *body->graph, fld);
-  }
-  // Optimizer accepts std::unique_ptr<Graph>* as input and might change
-  // underlying pointer, thus we create a new Graph and copy from body->graph.
-  std::unique_ptr<Graph> optimized_graph(new Graph(fld));
-  CopyGraph(*body->graph, optimized_graph.get());
-  OptimizerOptions opts;
-  opts.set_opt_level(OptimizerOptions::L0);
-  opts.set_do_function_inlining(true);
-  opts.set_do_constant_folding(true);
-  GraphOptimizer optimizer(opts);
-  auto cf_consider_fn = [](const Node* n) {
-    // Skip SymbolicGradient op when doing constant folding.
-    // Enabling SymbolicGradient op in constant folding requires
-    // flr->device() to be non-null, and here we have not constructed
-    // proper Device object yet (it will be constructed in XlaCompiler).
-    return n->type_string() != FunctionLibraryDefinition::kGradientOp;
-  };
-  optimizer.Optimize(flr, flr->env(),
-                     /*device=*/nullptr, &optimized_graph,
-                     /*shape_map=*/nullptr, /*cse_consider_fn=*/nullptr,
-                     cf_consider_fn);
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_opt_", func_name),
-        *optimized_graph, fld);
-  }
-  // Some inlined functions might have Switch/Merge nodes.
-  for (Node* n : optimized_graph->nodes()) {
-    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
-      has_switch_or_merge = true;
-      break;
-    }
-  }
-
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
   // might involve node deletion/addition. Avoid modifying nodes while iterating
   // it.
   std::vector<std::pair<Node*, std::vector<AssociatedFunctionInfo>>>
       nodes_to_associated_functions;
-  for (auto* n : optimized_graph->nodes()) {
+  for (auto* n : g->nodes()) {
     auto associated_functions = GetAssociatedFunctions(*n, flr);
     if (!associated_functions.empty()) {
       nodes_to_associated_functions.push_back({n, associated_functions});
@@ -215,7 +171,7 @@ Status FunctionalizeControlFlowForFunction(
         // pointer. That's fine because in that case, associated_functions will
         // only have one member and the loop will only run once.
         TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-            optimized_graph.get(), n, fld, associated_function, new_name));
+            g, n, fld, associated_function, new_name));
       }
     }
   }
@@ -227,21 +183,21 @@ Status FunctionalizeControlFlowForFunction(
     if (VLOG_IS_ON(4)) {
       dump_graph::DumpGraphToFile(
           absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-          *optimized_graph, fld);
+          *g, fld);
     }
-    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(g, fld));
     if (VLOG_IS_ON(4)) {
       dump_graph::DumpGraphToFile(
-          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-          *optimized_graph, fld);
+          absl::StrCat("functionalize_control_flow_after_fdef_", func_name), *g,
+          fld);
     }
   }
 
   if (*modified) {
     // Add rewritten FunctionDef into library.
     FunctionDef functionalized_fdef;
-    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                          &functionalized_fdef));
+    TF_RETURN_IF_ERROR(
+        GraphToFunctionDef(*g, new_func_name, &functionalized_fdef));
     if (func_name == new_func_name) {
       VLOG(2) << "Replacing function " << func_name;
       TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index db137f1a19..e81e61b633 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -466,23 +466,23 @@ Graph* GetConstantGraph(
 bool ReplaceTensorWithConstant(
     Graph* graph, Device* partition_device, NodeAndOutput tensor,
     const Tensor& constant, const gtl::FlatSet<Node*>& control_deps,
-    int64 max_constant_size_in_bytes, bool disable_memory_output_type_check,
+    int64 max_constant_size_in_bytes,
     const ConstantFoldNameGenerator& generate_new_name) {
   // Be conservative when replacing a tensor with a constant, when not
   // running on CPU.
   // 1) Do not replace another constant.
   // 2) If the destination tensor is not an int32 tensor, and has HOST_MEMORY
   // constraint, do not replace it.
-  // 3) If the size of the constant in bytes is too large (>
+  // 3) If the destination tensor is an int32 tensor, and has DEVICE_MEMORY
+  // constraint, do not replace it.
+  // 4) If the size of the constant in bytes is too large (>
   // max_constant_in_bytes), do not replace it. This prevents the size of the
   // Graph from growing too large.
-  // 4) If the constant op created does not have a kernel implementation
+  // 5) If the constant op created does not have a kernel implementation
   // for the device, do not use it.
   // TODO(keveman): Consider adding a new constant op that has a kernel
   // implementation for all types, but with HostMemory constraint on it's
   // output.
-  // 5) If the constant op for the device has different output memory type
-  // from the original op output memory type, do not replace it.
   if (tensor.first->IsConstant()) {
     return false;
   }
@@ -497,7 +497,8 @@ bool ReplaceTensorWithConstant(
       return false;
     }
     bool is_int32 = tensor.first->output_type(tensor.second) == DT_INT32;
-    if (memory_type == HOST_MEMORY && !is_int32) {
+    if ((memory_type == HOST_MEMORY && !is_int32) ||
+        (memory_type == DEVICE_MEMORY && is_int32)) {
       return false;
     }
   }
@@ -535,25 +536,6 @@ bool ReplaceTensorWithConstant(
   if (!NodeBuilder(builder).Finalize(graph, &constant_node).ok()) {
     return false;
   }
-  if (!disable_memory_output_type_check) {
-    if (partition_device && device_type != DEVICE_CPU) {
-      MemoryType original_output_memory_type;
-      if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
-                               &original_output_memory_type)
-               .ok()) {
-        return false;
-      }
-      MemoryType const_output_memory_type;
-      if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
-                               &const_output_memory_type)
-               .ok()) {
-        return false;
-      }
-      if (original_output_memory_type != const_output_memory_type) {
-        return false;
-      }
-    }
-  }
   for (auto edge : edges_to_remove) {
     graph->AddEdge(constant_node, 0, edge->dst(), edge->dst_input());
     graph->RemoveEdge(edge);
@@ -660,8 +642,7 @@ Status ConstantFold(const ConstantFoldingOptions& opts,
         constant_control_deps[tensors_to_replace[c].first];
     if (ReplaceTensorWithConstant(
             graph, partition_device, tensors_to_replace[c], outputs[c],
-            control_deps, opts.max_constant_size_in_bytes,
-            opts.disable_memory_output_type_check, generate_new_name)) {
+            control_deps, opts.max_constant_size_in_bytes, generate_new_name)) {
       ++num_nodes_replaced;
     }
   }
diff --git a/tensorflow/core/common_runtime/constant_folding.h b/tensorflow/core/common_runtime/constant_folding.h
index 4c71b7bd27..a9a84f761b 100644
--- a/tensorflow/core/common_runtime/constant_folding.h
+++ b/tensorflow/core/common_runtime/constant_folding.h
@@ -45,10 +45,6 @@ struct ConstantFoldingOptions {
   // optimization.
   int64 max_constant_size_in_bytes = 10 * 1024 * 1024;
 
-  // If disable_memory_output_type_check is true, we will disable output memory
-  // type check for constant node replacement.
-  bool disable_memory_output_type_check = false;
-
   // A generator for the name suffix of constant folded nodes. A
   // default id generator that monotonically increases is used if nullptr is
   // passed.
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index 91194bc86f..37a979a8f1 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -39,8 +39,7 @@ void GraphOptimizer::Optimize(
     const std::unordered_map<string, std::vector<PartialTensorShape>>*
         shape_map,
     const std::function<bool(const Node*)>& cse_consider_fn,
-    const std::function<bool(const Node*)>& cf_consider_fn,
-    bool cf_disable_memory_output_type_check) {
+    const std::function<bool(const Node*)>& cf_consider_fn) {
   Graph* g = graph->get();
   DumpGraph("Initial", g);
 
@@ -65,8 +64,6 @@ void GraphOptimizer::Optimize(
       ConstantFoldingOptions cf_opts;
       cf_opts.shape_map = shape_map;
       cf_opts.consider = cf_consider_fn;
-      cf_opts.disable_memory_output_type_check =
-          cf_disable_memory_output_type_check;
       if (opts_.max_folded_constant_in_bytes() > 0) {
         cf_opts.max_constant_size_in_bytes =
             opts_.max_folded_constant_in_bytes();
diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h
index 8954e9612d..789cc56942 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.h
+++ b/tensorflow/core/common_runtime/graph_optimizer.h
@@ -47,16 +47,13 @@ class GraphOptimizer {
   // returns true will be considered for CSE.
   // If cf_consider_fn is not null then only nodes for which cf_consider_fn
   // returns true will be considered for CF.
-  // If cf_disable_memory_output_type_check is true, CF will discard output
-  // memory type check for constant node replacement.
   void Optimize(
       FunctionLibraryRuntime* runtime, Env* env, Device* device,
       std::unique_ptr<Graph>* graph,
       const std::unordered_map<string, std::vector<PartialTensorShape>>*
           shape_map,
       const std::function<bool(const Node*)>& cse_consider_fn = nullptr,
-      const std::function<bool(const Node*)>& cf_consider_fn = nullptr,
-      bool cf_disable_memory_output_type_check = false);
+      const std::function<bool(const Node*)>& cf_consider_fn = nullptr);
 
   const OptimizerOptions& options() { return opts_; }
 
-- 
GitLab


From 58845f229be9b5ba2e1e36150bff5ba7a85920d8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:25:22 -0700
Subject: [PATCH 1195/1357] Profiler collects the number of replicas and num
 cores per replica used in the model.

PiperOrigin-RevId: 215947354
---
 tensorflow/contrib/tpu/profiler/tf_op_stats.proto | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
index f88dc51636..1e66801efd 100644
--- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
+++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
@@ -168,6 +168,12 @@ message RunEnvironmentResult {
   optional HostIndependentJobInfoResult host_independent_job_info = 5;
   // Host-dependent job information.
   repeated HostDependentJobInfoResult host_dependent_job_info = 6;
+  // The number of replicas, corresponds to input parallelism.
+  // If there is no model parallelism, replica_count = tpu_core_count
+  optional int32 replica_count = 7;
+  // The number of cores used for a single replica, e.g. model parallelism.
+  // If there is no model parallelism, then num_cores_per_replica = 1
+  optional int32 num_cores_per_replica = 8;
 }
 
 // The types of host operations that are tracked.
-- 
GitLab


From 6919ab5787e6384d709adf051dc1ce99236b76bc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:33:55 -0700
Subject: [PATCH 1196/1357] Convert TensorFlow's jpeg dependency to new third
 party import method.

PiperOrigin-RevId: 215948571
---
 tensorflow/workspace.bzl                        | 14 ++------------
 third_party/jpeg/BUILD                          |  2 +-
 third_party/jpeg/{jpeg.BUILD => BUILD.bazel}    | 11 ++++++-----
 .../jpeg.BUILD => jpeg/BUILD.system}            |  0
 third_party/jpeg/jpeg_helpers.BUILD.bazel       |  1 +
 third_party/jpeg/workspace.bzl                  | 17 +++++++++++++++++
 6 files changed, 27 insertions(+), 18 deletions(-)
 rename third_party/jpeg/{jpeg.BUILD => BUILD.bazel} (99%)
 rename third_party/{systemlibs/jpeg.BUILD => jpeg/BUILD.system} (100%)
 create mode 100644 third_party/jpeg/jpeg_helpers.BUILD.bazel
 create mode 100644 third_party/jpeg/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 8df41f96b8..b9ced1bd6c 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -22,10 +22,12 @@ load(
 )
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
+load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
 
 def initialize_third_party():
     flatbuffers()
     icu()
+    jpeg()
 
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
@@ -246,18 +248,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "jpeg",
-        build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
-        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
-        strip_prefix = "libjpeg-turbo-2.0.0",
-        system_build_file = clean_dep("//third_party/systemlibs:jpeg.BUILD"),
-        urls = [
-            "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
-            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
-        ],
-    )
-
     tf_http_archive(
         name = "png_archive",
         build_file = clean_dep("//third_party:png.BUILD"),
diff --git a/third_party/jpeg/BUILD b/third_party/jpeg/BUILD
index 5b01f6e3e4..e3aec1fce9 100644
--- a/third_party/jpeg/BUILD
+++ b/third_party/jpeg/BUILD
@@ -1 +1 @@
-licenses(["notice"])
+# Needed to make this a package.
diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/BUILD.bazel
similarity index 99%
rename from third_party/jpeg/jpeg.BUILD
rename to third_party/jpeg/BUILD.bazel
index 1b9b9bf2f5..5243e995a3 100644
--- a/third_party/jpeg/jpeg.BUILD
+++ b/third_party/jpeg/BUILD.bazel
@@ -162,9 +162,9 @@ cc_library(
     hdrs = [
         "simd/powerpc/jccolext-altivec.c",
         "simd/powerpc/jcgryext-altivec.c",
+        "simd/powerpc/jcsample.h",
         "simd/powerpc/jdcolext-altivec.c",
         "simd/powerpc/jdmrgext-altivec.c",
-        "simd/powerpc/jcsample.h",
         "simd/powerpc/jsimd_altivec.h",
     ],
     copts = libjpegturbo_copts,
@@ -186,7 +186,6 @@ cc_library(
         "jsimd.h",
         "jsimddct.h",
         "simd/jsimd.h",
-        "simd/x86_64/jsimd.c",
         "simd/x86_64/jccolor-avx2.o",
         "simd/x86_64/jccolor-sse2.o",
         "simd/x86_64/jcgray-avx2.o",
@@ -213,6 +212,7 @@ cc_library(
         "simd/x86_64/jquantf-sse2.o",
         "simd/x86_64/jquanti-avx2.o",
         "simd/x86_64/jquanti-sse2.o",
+        "simd/x86_64/jsimd.c",
         "simd/x86_64/jsimdcpu.o",
     ],
     copts = libjpegturbo_copts,
@@ -322,9 +322,9 @@ cc_library(
         "jpeglib.h",
         "jsimd.h",
         "jsimddct.h",
-        "simd/jsimd.h",
         "simd/arm/jsimd.c",
         "simd/arm/jsimd_neon.S",
+        "simd/jsimd.h",
     ],
     copts = libjpegturbo_copts,
     nocopts = libjpegturbo_nocopts,
@@ -343,9 +343,9 @@ cc_library(
         "jpeglib.h",
         "jsimd.h",
         "jsimddct.h",
-        "simd/jsimd.h",
         "simd/arm64/jsimd.c",
         "simd/arm64/jsimd_neon.S",
+        "simd/jsimd.h",
     ],
     copts = libjpegturbo_copts,
     nocopts = libjpegturbo_nocopts,
@@ -366,7 +366,6 @@ cc_library(
         "jsimd.h",
         "jsimddct.h",
         "simd/jsimd.h",
-        "simd/x86_64/jsimd.c",
         "simd/x86_64/jccolor-avx2.obj",
         "simd/x86_64/jccolor-sse2.obj",
         "simd/x86_64/jcgray-avx2.obj",
@@ -393,6 +392,7 @@ cc_library(
         "simd/x86_64/jquantf-sse2.obj",
         "simd/x86_64/jquanti-avx2.obj",
         "simd/x86_64/jquanti-sse2.obj",
+        "simd/x86_64/jsimd.c",
         "simd/x86_64/jsimdcpu.obj",
     ],
     copts = libjpegturbo_copts,
@@ -603,6 +603,7 @@ JCONFIGINT_WIN_SUBSTITUTIONS = {
 }
 
 JCONFIGINT_NOWIN_SUBSTITUTIONS.update(JCONFIGINT_COMMON_SUBSTITUTIONS)
+
 JCONFIGINT_WIN_SUBSTITUTIONS.update(JCONFIGINT_COMMON_SUBSTITUTIONS)
 
 template_rule(
diff --git a/third_party/systemlibs/jpeg.BUILD b/third_party/jpeg/BUILD.system
similarity index 100%
rename from third_party/systemlibs/jpeg.BUILD
rename to third_party/jpeg/BUILD.system
diff --git a/third_party/jpeg/jpeg_helpers.BUILD.bazel b/third_party/jpeg/jpeg_helpers.BUILD.bazel
new file mode 100644
index 0000000000..5b01f6e3e4
--- /dev/null
+++ b/third_party/jpeg/jpeg_helpers.BUILD.bazel
@@ -0,0 +1 @@
+licenses(["notice"])
diff --git a/third_party/jpeg/workspace.bzl b/third_party/jpeg/workspace.bzl
new file mode 100644
index 0000000000..4b517240ec
--- /dev/null
+++ b/third_party/jpeg/workspace.bzl
@@ -0,0 +1,17 @@
+"""loads the jpeg library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "jpeg",
+        urls = [
+            "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
+            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
+        ],
+        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
+        strip_prefix = "libjpeg-turbo-2.0.0",
+        build_file = "//third_party/jpeg:BUILD.bazel",
+        # build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
+        system_build_file = "//third_party/jpeg:BUILD.system",
+    )
-- 
GitLab


From ef838969b95de39353a3ba495c335cbb14a0c9b5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:44:45 -0700
Subject: [PATCH 1197/1357] Brings V2 Optimizers into Keras w/ Keras signatures

PiperOrigin-RevId: 215950207
---
 .../contrib/distribute/python/combinations.py |   16 +-
 .../distribute/python/minimize_loss_test.py   |    5 -
 tensorflow/contrib/optimizer_v2/BUILD         |   11 +-
 tensorflow/contrib/optimizer_v2/adadelta.py   |   75 +-
 tensorflow/contrib/optimizer_v2/adagrad.py    |   79 +-
 .../contrib/optimizer_v2/adagrad_test.py      |    3 -
 tensorflow/contrib/optimizer_v2/adam.py       |  129 +-
 .../optimizer_v2/checkpointable_utils_test.py |   68 +-
 .../contrib/optimizer_v2/gradient_descent.py  |   40 +-
 tensorflow/contrib/optimizer_v2/momentum.py   |   69 +-
 .../contrib/optimizer_v2/optimizer_v2.py      | 1205 +--------------
 tensorflow/contrib/optimizer_v2/rmsprop.py    |  154 +-
 tensorflow/python/keras/BUILD                 |  155 ++
 .../python/keras/optimizer_v2/adadelta.py     |  116 ++
 .../keras/optimizer_v2/adadelta_test.py       |  166 ++
 .../python/keras/optimizer_v2/adagrad.py      |  119 ++
 .../python/keras/optimizer_v2/adagrad_test.py |  276 ++++
 tensorflow/python/keras/optimizer_v2/adam.py  |  203 +++
 .../python/keras/optimizer_v2/adam_test.py    |  333 ++++
 .../optimizer_v2/checkpointable_utils_test.py |  761 ++++++++++
 .../python/keras/optimizer_v2/optimizer_v2.py | 1349 +++++++++++++++++
 .../keras/optimizer_v2/optimizer_v2_test.py   |  277 ++++
 .../python/keras/optimizer_v2/rmsprop.py      |  239 +++
 .../python/keras/optimizer_v2/rmsprop_test.py |  444 ++++++
 tensorflow/python/keras/optimizer_v2/sgd.py   |  170 +++
 .../python/keras/optimizer_v2/sgd_test.py     |  759 ++++++++++
 26 files changed, 5487 insertions(+), 1734 deletions(-)
 create mode 100644 tensorflow/python/keras/optimizer_v2/adadelta.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adadelta_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adagrad.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adagrad_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adam.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adam_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/optimizer_v2.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/rmsprop.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/rmsprop_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/sgd.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/sgd_test.py

diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py
index cff4b0a463..63a163e76c 100644
--- a/tensorflow/contrib/distribute/python/combinations.py
+++ b/tensorflow/contrib/distribute/python/combinations.py
@@ -349,26 +349,26 @@ mirrored_strategy_with_two_gpus = NamedDistribution(
     required_gpus=2)
 
 
-adam_optimizer_v1_fn = NamedObject(
-    "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1))
 gradient_descent_optimizer_v1_fn = NamedObject(
     "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2))
 adagrad_optimizer_v1_fn = NamedObject(
     "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
+adam_optimizer_v1_fn = NamedObject("AdamV1",
+                                   lambda: adam.AdamOptimizer(0.001, epsilon=1))
 rmsprop_optimizer_v1_fn = NamedObject(
     "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001))
-optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn,
-                 adagrad_optimizer_v1_fn]
 
-adam_optimizer_v2_fn = NamedObject(
-    "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1))
+optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn]
+
 gradient_descent_optimizer_v2_fn = NamedObject(
     "GradientDescentV2",
     lambda: gradient_descent_v2.GradientDescentOptimizer(0.2))
 adagrad_optimizer_v2_fn = NamedObject(
     "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001))
-optimizers_v2 = [adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn,
-                 adagrad_optimizer_v2_fn]
+adam_optimizer_v2_fn = NamedObject(
+    "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1))
+
+optimizers_v2 = [gradient_descent_optimizer_v2_fn, adagrad_optimizer_v2_fn]
 
 graph_and_eager_modes = ["graph", "eager"]
 
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index ba147e7824..60e134055f 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -179,11 +179,6 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       def get_expected_variables(optimizer_fn, num_parameter_devices):
         variables_map = {
             "GradientDescent": ["dense/kernel", "dense/bias"],
-            "Adam": [
-                "dense/kernel", "dense/bias", "beta1_power", "beta2_power",
-                "dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam",
-                "dense/bias/Adam_1"
-            ],
             "Adagrad": [
                 "dense/kernel/Adagrad", "dense/kernel",
                 "dense/bias/Adagrad", "dense/bias"
diff --git a/tensorflow/contrib/optimizer_v2/BUILD b/tensorflow/contrib/optimizer_v2/BUILD
index 3ba3ee29ec..2cf445a85e 100644
--- a/tensorflow/contrib/optimizer_v2/BUILD
+++ b/tensorflow/contrib/optimizer_v2/BUILD
@@ -47,15 +47,8 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:distribute",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
+        "//tensorflow/python:util",
+        "//tensorflow/python/keras:optimizer_v2",
     ],
 )
 
diff --git a/tensorflow/contrib/optimizer_v2/adadelta.py b/tensorflow/contrib/optimizer_v2/adadelta.py
index b206f9f61b..9d73bddd1c 100644
--- a/tensorflow/contrib/optimizer_v2/adadelta.py
+++ b/tensorflow/contrib/optimizer_v2/adadelta.py
@@ -18,17 +18,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import adadelta
+from tensorflow.python.util import deprecation
 
 
-class AdadeltaOptimizer(optimizer_v2.OptimizerV2):
+class AdadeltaOptimizer(adadelta.Adadelta):
   """Optimizer that implements the Adadelta algorithm.
 
   See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
   ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8,
                use_locking=False, name="Adadelta"):
     """Construct a new Adadelta optimizer.
@@ -48,66 +52,5 @@ class AdadeltaOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name prefix for the operations created when applying
         gradients.  Defaults to "Adadelta".
     """
-    super(AdadeltaOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("rho", rho)
-    self._set_hyper("epsilon", epsilon)
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      state.zeros_slot(v, "accum")
-      state.zeros_slot(v, "accum_update")
-
-  def _apply_dense(self, grad, var, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.apply_adadelta(
-        var,
-        accum,
-        accum_update,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _resource_apply_dense(self, grad, var, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.resource_apply_adadelta(
-        var.handle,
-        accum.handle,
-        accum_update.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _apply_sparse(self, grad, var, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.sparse_apply_adadelta(
-        var,
-        accum,
-        accum_update,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad.values,
-        grad.indices,
-        use_locking=self._use_locking)
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.resource_sparse_apply_adadelta(
-        var.handle,
-        accum.handle,
-        accum_update.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad,
-        indices,
-        use_locking=self._use_locking)
+    super(AdadeltaOptimizer, self).__init__(
+        learning_rate=learning_rate, rho=rho, epsilon=epsilon, name=name)
diff --git a/tensorflow/contrib/optimizer_v2/adagrad.py b/tensorflow/contrib/optimizer_v2/adagrad.py
index dab1e02716..716361e29c 100644
--- a/tensorflow/contrib/optimizer_v2/adagrad.py
+++ b/tensorflow/contrib/optimizer_v2/adagrad.py
@@ -18,15 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import adagrad
+from tensorflow.python.util import deprecation
 
 
-class AdagradOptimizer(optimizer_v2.OptimizerV2):
+class AdagradOptimizer(adagrad.Adagrad):
   """Optimizer that implements the Adagrad algorithm.
 
   See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
@@ -34,6 +30,10 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2):
   [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate, initial_accumulator_value=0.1,
                use_locking=False, name="Adagrad"):
     """Construct a new Adagrad optimizer.
@@ -54,64 +54,7 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2):
     Raises:
       ValueError: If the `initial_accumulator_value` is invalid.
     """
-    if initial_accumulator_value <= 0.0:
-      raise ValueError("initial_accumulator_value must be positive: %s" %
-                       initial_accumulator_value)
-    super(AdagradOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-
-    self._initial_accumulator_value = initial_accumulator_value
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      dtype = v.dtype.base_dtype
-      if v.get_shape().is_fully_defined():
-        init = init_ops.constant_initializer(self._initial_accumulator_value,
-                                             dtype=dtype)
-      else:
-        def init(v=v, dtype=dtype):
-          # Use a Tensor instead of initializer if variable does not have
-          # static shape.
-          init_constant = gen_array_ops.fill(array_ops.shape(v),
-                                             self._initial_accumulator_value)
-          return math_ops.cast(init_constant, dtype)
-      state.create_slot_with_initializer(v, init, v.get_shape(), dtype,
-                                         "accumulator")
-
-  def _apply_dense(self, grad, var, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.apply_adagrad(
-        var,
-        acc,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _resource_apply_dense(self, grad, var, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.resource_apply_adagrad(
-        var.handle,
-        acc.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _apply_sparse(self, grad, var, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.sparse_apply_adagrad(
-        var,
-        acc,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad.values,
-        grad.indices,
-        use_locking=self._use_locking)
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.resource_sparse_apply_adagrad(
-        var.handle,
-        acc.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        indices,
-        use_locking=self._use_locking)
+    super(AdagradOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        initial_accumulator_value=initial_accumulator_value,
+        name=name)
diff --git a/tensorflow/contrib/optimizer_v2/adagrad_test.py b/tensorflow/contrib/optimizer_v2/adagrad_test.py
index debaaaeeba..320e41567f 100644
--- a/tensorflow/contrib/optimizer_v2/adagrad_test.py
+++ b/tensorflow/contrib/optimizer_v2/adagrad_test.py
@@ -68,9 +68,6 @@ class AdagradOptimizerTest(test.TestCase):
   def testBasicResource(self):
     self.doTestBasic(use_locking=False, use_resource=True)
 
-  def testBasicLocked(self):
-    self.doTestBasic(use_locking=True)
-
   def testMinimizeSparseResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
       with self.cached_session():
diff --git a/tensorflow/contrib/optimizer_v2/adam.py b/tensorflow/contrib/optimizer_v2/adam.py
index 04b1552b61..363e020757 100644
--- a/tensorflow/contrib/optimizer_v2/adam.py
+++ b/tensorflow/contrib/optimizer_v2/adam.py
@@ -18,22 +18,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.util import deprecation
 
 
-class AdamOptimizer(optimizer_v2.OptimizerV2):
+class AdamOptimizer(adam.Adam):
   """Optimizer that implements the Adam algorithm.
 
   See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
   ([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8,
                use_locking=False, name="Adam"):
     """Construct a new Adam optimizer.
@@ -87,111 +86,9 @@ class AdamOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name for the operations created when applying gradients.
         Defaults to "Adam".
     """
-    super(AdamOptimizer, self).__init__(use_locking, name)
-
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("beta1", beta1)
-    self._set_hyper("beta2", beta2)
-    self._set_hyper("epsilon", epsilon)
-
-  def _get_beta_accumulators(self, state=None):
-    if state is None:
-      state = self._get_per_graph_state()
-    return (state.get_non_slot("beta1_power"),
-            state.get_non_slot("beta2_power"))
-
-  def _create_vars(self, var_list, state):
-    # Non-slot variables end up on the same device(s).
-    state.create_non_slot(initial_value=lambda: state.get_hyper("beta1"),
-                          name="beta1_power")
-    state.create_non_slot(initial_value=lambda: state.get_hyper("beta2"),
-                          name="beta2_power")
-
-    # Create slots for the first and second moments.
-    for v in var_list:
-      state.zeros_slot(v, "m")
-      state.zeros_slot(v, "v")
-
-  def _apply_dense(self, grad, var, state):
-    m = state.get_slot(var, "m")
-    v = state.get_slot(var, "v")
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    return training_ops.apply_adam(
-        var, m, v,
-        math_ops.cast(beta1_power, var.dtype.base_dtype),
-        math_ops.cast(beta2_power, var.dtype.base_dtype),
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("beta1", var.dtype.base_dtype),
-        state.get_hyper("beta2", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad, use_locking=self._use_locking).op
-
-  def _resource_apply_dense(self, grad, var, state):
-    m = state.get_slot(var, "m")
-    v = state.get_slot(var, "v")
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    return training_ops.resource_apply_adam(
-        var.handle, m.handle, v.handle,
-        math_ops.cast(beta1_power, grad.dtype.base_dtype),
-        math_ops.cast(beta2_power, grad.dtype.base_dtype),
-        state.get_hyper("learning_rate", grad.dtype.base_dtype),
-        state.get_hyper("beta1", grad.dtype.base_dtype),
-        state.get_hyper("beta2", grad.dtype.base_dtype),
-        state.get_hyper("epsilon", grad.dtype.base_dtype),
-        grad, use_locking=self._use_locking)
-
-  def _apply_sparse_shared(self, grad, var, indices, scatter_add, state):
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
-    beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
-    lr_t = state.get_hyper("learning_rate", var.dtype.base_dtype)
-    beta1_t = state.get_hyper("beta1", var.dtype.base_dtype)
-    beta2_t = state.get_hyper("beta2", var.dtype.base_dtype)
-    epsilon_t = state.get_hyper("epsilon", var.dtype.base_dtype)
-    lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
-    # m_t = beta1 * m + (1 - beta1) * g_t
-    m = state.get_slot(var, "m")
-    m_scaled_g_values = grad * (1 - beta1_t)
-    m_t = state_ops.assign(m, m * beta1_t,
-                           use_locking=self._use_locking)
-    with ops.control_dependencies([m_t]):
-      m_t = scatter_add(m, indices, m_scaled_g_values)
-    # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
-    v = state.get_slot(var, "v")
-    v_scaled_g_values = (grad * grad) * (1 - beta2_t)
-    v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
-    with ops.control_dependencies([v_t]):
-      v_t = scatter_add(v, indices, v_scaled_g_values)
-    v_sqrt = math_ops.sqrt(v_t)
-    var_update = state_ops.assign_sub(var,
-                                      lr * m_t / (v_sqrt + epsilon_t),
-                                      use_locking=self._use_locking)
-    return control_flow_ops.group(*[var_update, m_t, v_t])
-
-  def _apply_sparse(self, grad, var, state):
-    return self._apply_sparse_shared(
-        grad.values, var, grad.indices,
-        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
-            x, i, v, use_locking=self._use_locking),
-        state)
-
-  def _resource_scatter_add(self, x, i, v):
-    with ops.control_dependencies(
-        [resource_variable_ops.resource_scatter_add(
-            x.handle, i, v)]):
-      return x.value()
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    return self._apply_sparse_shared(
-        grad, var, indices, self._resource_scatter_add, state)
-
-  def _finish(self, state):
-    # Update the power accumulators.
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    update_beta1 = beta1_power.assign(
-        beta1_power * state.get_hyper("beta1"),
-        use_locking=self._use_locking)
-    update_beta2 = beta2_power.assign(
-        beta2_power * state.get_hyper("beta2"),
-        use_locking=self._use_locking)
-    return control_flow_ops.group(update_beta1, update_beta2)
+    super(AdamOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        beta_1=beta1,
+        beta_2=beta2,
+        epsilon=epsilon,
+        name=name)
diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
index e13b82d1d2..3c68ef995a 100644
--- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
+++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
@@ -130,8 +130,8 @@ class CheckpointingTests(test.TestCase):
         # non-Layer dependency of the model
         "model/_non_layer/a_variable",
         # The optimizer creates two non-slot variables
-        "optimizer/beta1_power",
-        "optimizer/beta2_power",
+        "optimizer/beta_1_power",
+        "optimizer/beta_2_power",
         # Slot variables
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
@@ -161,21 +161,20 @@ class CheckpointingTests(test.TestCase):
         "my_model/dense/kernel",
         named_variables["model/_named_dense/kernel" + suffix].full_name)
     self.assertEqual(
-        "beta1_power",
-        named_variables["optimizer/beta1_power" + suffix].full_name)
+        "beta_1_power",
+        named_variables["optimizer/beta_1_power" + suffix].full_name)
     self.assertEqual(
-        "beta2_power",
-        named_variables["optimizer/beta2_power" + suffix].full_name)
+        "beta_2_power",
+        named_variables["optimizer/beta_2_power" + suffix].full_name)
     # Spot check the generated protocol buffers.
     self.assertEqual("optimizer",
                      serialized_graph.nodes[0].children[1].local_name)
     optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
         1].node_id]
-    self.assertEqual("beta1_power",
-                     optimizer_node.children[0].local_name)
-    self.assertEqual("beta1_power",
-                     serialized_graph.nodes[optimizer_node.children[0].node_id]
-                     .attributes[0].full_name)
+    self.assertEqual("beta_1_power", optimizer_node.children[0].local_name)
+    self.assertEqual(
+        "beta_1_power", serialized_graph.nodes[
+            optimizer_node.children[0].node_id].attributes[0].full_name)
     self.assertEqual(
         "my_model/dense/kernel",
         serialized_graph.nodes[optimizer_node.slot_variables[0]
@@ -241,9 +240,10 @@ class CheckpointingTests(test.TestCase):
     on_create_model = MyModel()
     on_create_optimizer = adam.AdamOptimizer(
         0.001,
-        # Preserve beta1_power and beta2_power when appying gradients so we can
-        # test that they've been restored correctly.
-        beta1=1.0, beta2=1.0)
+        # Preserve beta_1_power and beta_2_power when appying gradients
+        # so we can test that they've been restored correctly.
+        beta1=1.0,
+        beta2=1.0)
     on_create_root = util.Checkpoint(
         optimizer=on_create_optimizer, model=on_create_model)
     # Deferred restoration
@@ -263,9 +263,9 @@ class CheckpointingTests(test.TestCase):
     dummy_var = resource_variable_ops.ResourceVariable([1.])
     on_create_optimizer.minimize(loss=dummy_var.read_value)
     status.assert_consumed()
-    beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators()
-    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power))
-    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power))
+    beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators()
+    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta_1_power))
+    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta_2_power))
 
   # TODO(allenl): Debug garbage created by this test in python3.
   def testDeferredRestorationUsageEager(self):
@@ -477,7 +477,7 @@ class CheckpointingTests(test.TestCase):
     no_slot_status.run_restore_ops()
     self.assertEqual(12., self.evaluate(new_root.var))
     new_root.optimizer = adam.AdamOptimizer(0.1)
-    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
+    with self.assertRaisesRegexp(AssertionError, "beta_1_power"):
       slot_status.assert_consumed()
     self.assertEqual(12., self.evaluate(new_root.var))
     if context.executing_eagerly():
@@ -556,8 +556,8 @@ class CheckpointingTests(test.TestCase):
         self.evaluate(first_variable.assign([1.]))
         self.evaluate(optimizer.get_slot(
             var=first_variable, name="m").assign([2.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(3.))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(3.))
 
       # Save and load in a second graph
       second_graph = ops.Graph()
@@ -571,29 +571,29 @@ class CheckpointingTests(test.TestCase):
         self.evaluate(second_variable.assign([4.]))
         self.evaluate(optimizer.get_slot(
             var=second_variable, name="m").assign([5.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(6.))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(6.))
         save_path = second_root_checkpointable.save(checkpoint_prefix)
         self.evaluate(second_variable.assign([7.]))
         self.evaluate(optimizer.get_slot(
             var=second_variable, name="m").assign([8.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
         status = second_root_checkpointable.restore(save_path)
         status.assert_consumed().run_restore_ops()
         self.assertAllEqual([4.], self.evaluate(second_variable))
         self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
             var=second_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
 
       # Check that the first graph is unmolested
       with first_graph.as_default(), first_session.as_default():
         self.assertAllEqual([1.], self.evaluate(first_variable))
         self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
             var=first_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(3., self.evaluate(beta1_power))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(3., self.evaluate(beta_1_power))
 
 
 class TemplateTests(test.TestCase):
@@ -659,8 +659,8 @@ class CheckpointCompatibilityTests(test.TestCase):
     self.evaluate(model._named_dense.bias.assign([1.]))
     self.evaluate(optimizer.get_slot(
         var=model._named_dense.bias, name="m").assign([2.]))
-    beta1_power, _ = optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(3.))
+    beta_1_power, _ = optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(3.))
     return root_checkpointable
 
   def _set_sentinels(self, root_checkpointable):
@@ -669,8 +669,8 @@ class CheckpointCompatibilityTests(test.TestCase):
         root_checkpointable.optimizer.get_slot(
             var=root_checkpointable.model._named_dense.bias, name="m")
         .assign([102.]))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(103.))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(103.))
 
   def _check_sentinels(self, root_checkpointable):
     self.assertAllEqual(
@@ -678,8 +678,8 @@ class CheckpointCompatibilityTests(test.TestCase):
     self.assertAllEqual([2.], self.evaluate(
         root_checkpointable.optimizer.get_slot(
             var=root_checkpointable.model._named_dense.bias, name="m")))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.assertAllEqual(3., self.evaluate(beta1_power))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.assertAllEqual(3., self.evaluate(beta_1_power))
 
   def _write_name_based_checkpoint(self):
     checkpoint_directory = self.get_temp_dir()
diff --git a/tensorflow/contrib/optimizer_v2/gradient_descent.py b/tensorflow/contrib/optimizer_v2/gradient_descent.py
index 945c8de559..8bdf408217 100644
--- a/tensorflow/contrib/optimizer_v2/gradient_descent.py
+++ b/tensorflow/contrib/optimizer_v2/gradient_descent.py
@@ -18,15 +18,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.util import deprecation
 
 
-class GradientDescentOptimizer(optimizer_v2.OptimizerV2):
+class GradientDescentOptimizer(sgd.SGD):
   """Optimizer that implements the gradient descent algorithm."""
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate, use_locking=False, name="GradientDescent"):
     """Construct a new gradient descent optimizer.
 
@@ -41,29 +43,5 @@ class GradientDescentOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name prefix for the operations created when applying
         gradients. Defaults to "GradientDescent".
     """
-    super(GradientDescentOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-
-  def _apply_dense(self, grad, var, state):
-    return training_ops.apply_gradient_descent(
-        var,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking).op
-
-  def _resource_apply_dense(self, grad, handle, state):
-    lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
-    return training_ops.resource_apply_gradient_descent(
-        handle.handle, lr, grad, use_locking=self._use_locking)
-
-  def _resource_apply_sparse_duplicate_indices(
-      self, grad, handle, indices, state):
-    lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
-    return resource_variable_ops.resource_scatter_add(
-        handle.handle, indices, -grad * lr)
-
-  def _apply_sparse_duplicate_indices(self, grad, var, state):
-    delta = ops.IndexedSlices(
-        grad.values * state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad.indices, grad.dense_shape)
-    return var.scatter_sub(delta, use_locking=self._use_locking)
+    super(GradientDescentOptimizer, self).__init__(
+        learning_rate=learning_rate, name=name)
diff --git a/tensorflow/contrib/optimizer_v2/momentum.py b/tensorflow/contrib/optimizer_v2/momentum.py
index 0a5aadc2d1..0636f7e356 100644
--- a/tensorflow/contrib/optimizer_v2/momentum.py
+++ b/tensorflow/contrib/optimizer_v2/momentum.py
@@ -18,11 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.util import deprecation
 
 
-class MomentumOptimizer(optimizer_v2.OptimizerV2):
+class MomentumOptimizer(sgd.SGD):
   """Optimizer that implements the Momentum algorithm.
 
   Computes (if `use_nesterov = False`):
@@ -39,6 +39,10 @@ class MomentumOptimizer(optimizer_v2.OptimizerV2):
   when that part of the variable was used in the forward pass.
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate, momentum,
                use_locking=False, name="Momentum", use_nesterov=False):
     """Construct a new Momentum optimizer.
@@ -68,57 +72,8 @@ class MomentumOptimizer(optimizer_v2.OptimizerV2):
     optimizer functions.
     @end_compatibility
     """
-    super(MomentumOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("momentum", momentum)
-    self._use_nesterov = use_nesterov
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      state.zeros_slot(v, "momentum")
-
-  def _apply_dense(self, grad, var, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.apply_momentum(
-        var,
-        mom,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov).op
-
-  def _resource_apply_dense(self, grad, var, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.resource_apply_momentum(
-        var.handle,
-        mom.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov)
-
-  def _apply_sparse(self, grad, var, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.sparse_apply_momentum(
-        var,
-        mom,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad.values,
-        grad.indices,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov).op
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.resource_sparse_apply_momentum(
-        var.handle,
-        mom.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        indices,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov)
+    super(MomentumOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        momentum=momentum,
+        name=name,
+        nesterov=use_nesterov)
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 53e27c08c4..9c98dd93b4 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -20,462 +20,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import abc
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.util import deprecation
 
-from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gradients
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.training import distribute as distribute_lib
-from tensorflow.python.training import distribution_strategy_context
-from tensorflow.python.training import optimizer as optimizer_v1
-from tensorflow.python.training import slot_creator
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.util import nest
 
-
-class _OptimizableVariable(object):
-  """Interface for abstracting over variables in the optimizers."""
-
-  @abc.abstractmethod
-  def target(self):
-    """Returns the optimization target for this variable."""
-    raise NotImplementedError("Calling an abstract method.")
-
-  @abc.abstractmethod
-  def update_op(self, optimizer, g, *args):
-    """Returns the update ops for updating the variable."""
-    raise NotImplementedError("Calling an abstract method.")
-
-
-class _RefVariableProcessor(_OptimizableVariable):
-  """Processor for Variable."""
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v._ref()  # pylint: disable=protected-access
-
-  def update_op(self, optimizer, g, *args):
-    if isinstance(g, ops.Tensor):
-      update_op = optimizer._apply_dense(g, self._v, *args)  # pylint: disable=protected-access
-      if self._v.constraint is not None:
-        with ops.control_dependencies([update_op]):
-          return self._v.assign(self._v.constraint(self._v))
-      else:
-        return update_op
-    else:
-      assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
-                                                "tensor nor IndexedSlices.")
-      if self._v.constraint is not None:
-        raise RuntimeError(
-            "Cannot use a constraint function on a sparse variable.")
-      # pylint: disable=protected-access
-      return optimizer._apply_sparse_duplicate_indices(g, self._v, *args)
-
-
-class _DenseReadResourceVariableProcessor(_OptimizableVariable):
-  """Processor for dense ResourceVariables."""
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v
-
-  def update_op(self, optimizer, g, *args):
-    # pylint: disable=protected-access
-    update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0], *args)
-    if self._v.constraint is not None:
-      with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
-    else:
-      return update_op
-
-
-class _DenseResourceVariableProcessor(_OptimizableVariable):
-  """Processor for dense ResourceVariables."""
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v
-
-  def update_op(self, optimizer, g, *args):
-    # pylint: disable=protected-access
-    if isinstance(g, ops.IndexedSlices):
-      if self._v.constraint is not None:
-        raise RuntimeError(
-            "Cannot use a constraint function on a sparse variable.")
-      return optimizer._resource_apply_sparse_duplicate_indices(
-          g.values, self._v, g.indices, *args)
-    update_op = optimizer._resource_apply_dense(g, self._v, *args)
-    if self._v.constraint is not None:
-      with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
-    else:
-      return update_op
-
-
-class _TensorProcessor(_OptimizableVariable):
-  """Processor for ordinary Tensors.
-
-  Even though a Tensor can't really be updated, sometimes it is useful to
-  compute the gradients with respect to a Tensor using the optimizer. Updating
-  the Tensor is, of course, unsupported.
-  """
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v
-
-  def update_op(self, optimizer, g, *args):
-    raise NotImplementedError("Trying to update a Tensor ", self._v)
-
-
-def _get_processor(v):
-  """The processor of v."""
-  if context.executing_eagerly():
-    if isinstance(v, ops.Tensor):
-      return _TensorProcessor(v)
-    else:
-      return _DenseResourceVariableProcessor(v)
-  if v.op.type == "VarHandleOp":
-    return _DenseResourceVariableProcessor(v)
-  if isinstance(v, variables.Variable):
-    return _RefVariableProcessor(v)
-  if isinstance(v, ops.Tensor):
-    return _TensorProcessor(v)
-  raise NotImplementedError("Trying to optimize unsupported type ", v)
-
-
-def _var_key_v2(var):
-  """Key for representing a primary variable, for looking up slots."""
-  # pylint: disable=protected-access
-  if hasattr(var, "_distributed_container"):
-    distributed_container = var._distributed_container()
-    assert distributed_container is not None
-    if context.executing_eagerly():
-      return distributed_container._unique_id
-    return distributed_container._shared_name
-  if context.executing_eagerly():
-    return var._unique_id
-  return var.op.name
-
-
-def _resolve(value, name):
-  if callable(value):
-    value = value()
-  return ops.convert_to_tensor(value, name=name)
-
-
-def _is_dynamic(value):
-  """Returns true if __init__ arg `value` should be re-evaluated each step."""
-  if callable(value): return True
-  # Don't need to do anything special in graph mode, since dynamic values
-  # will propagate correctly automatically.
-  # TODO(josh11b): Add per-device caching across steps using variables for
-  # truly static values once we add distributed support.
-  if context.executing_eagerly() and isinstance(
-      value, resource_variable_ops.ResourceVariable):
-    return True
-  return False
-
-
-class _OptimizerV2State(object):
-  """Holds per-graph and per-step optimizer state.
-
-  Use _init_with_static_hyper() to create the state for a graph, and then
-  _copy_with_dynamic_hyper() to convert that to state for a particular step.
-  The difference between the two is that the former only has hyper
-  parameter values that are static and the latter also has values that
-  can change every step (according to _is_dynamic()).
-  """
-
-  def __init__(self, op_name):
-    self._op_name = op_name
-
-  def _init_with_static_hyper(self, hyper):
-    """Initialize a fresh state object from hyper dict."""
-    # self._hyper contains a dict from name to a dict with the Tensor values.
-    # This dict starts with a single item with key "None" with the hyper
-    # parameter value converted to a Tensor. Other items have dtype keys
-    # with that Tensor cast to that dtype.
-    with ops.init_scope():
-      self._hyper = {name: {None: ops.convert_to_tensor(value, name=name)}
-                     for name, (dynamic, value) in sorted(hyper.items())
-                     if not dynamic}
-    self._slots = {}
-    self._non_slot_dict = {}
-    # Extra state to help Optimizers implement Checkpointable. Holds information
-    # about variables which will be restored as soon as they're created.
-    self._deferred_dependencies = {}  # Non-slot variables
-    self._deferred_slot_restorations = {}  # Slot variables
-
-  def _copy_with_dynamic_hyper(self, hyper, distribution, non_slot_devices):
-    """Create a new state object for a particular step."""
-    ret = _OptimizerV2State(self._op_name)
-    # pylint: disable=protected-access
-    ret._slots = self._slots
-    ret._non_slot_dict = self._non_slot_dict
-    ret._deferred_dependencies = self._deferred_dependencies
-    ret._deferred_slot_restorations = self._deferred_slot_restorations
-    ret._hyper = {name: {None: _resolve(value, name)}
-                  for name, (dynamic, value) in sorted(hyper.items())
-                  if dynamic}
-    ret._hyper.update(self._hyper)
-    ret._non_slot_devices = non_slot_devices
-    ret._distribution = distribution
-    return ret
-
-  def _variables(self):
-    """Returns a list of all variables held by self."""
-    optimizer_variables = list(self._non_slot_dict.values())
-    for variable_dict in self._slots.values():
-      for slot_for_variable in variable_dict.values():
-        optimizer_variables.append(slot_for_variable)
-    # Sort variables by name so that the return is deterministic.
-    return sorted(optimizer_variables, key=lambda v: v.name)
-
-  def _slot_dict(self, slot_name):
-    """Returns a dict for caching slots created under the given name.
-
-    Args:
-      slot_name: Name for the slot.
-
-    Returns:
-      A dict that maps primary `Variable` objects to the slot created
-      for that variable, under the given slot name.
-    """
-    named_slots = self._slots.get(slot_name, None)
-    if named_slots is None:
-      named_slots = {}
-      self._slots[slot_name] = named_slots
-    return named_slots
-
-  def create_slot(self, var, val, slot_name, optional_op_name=None):
-    """Find or create a slot for a variable.
-
-    Args:
-      var: A `Variable` object.
-      val: A `Tensor`.  The initial value of the slot.
-      slot_name: Name for the slot.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-
-    Returns:
-      A `Variable` object.
-    """
-    named_slots = self._slot_dict(slot_name)
-    var_key = _var_key_v2(var)
-    if var_key not in named_slots:
-      new_slot_variable = slot_creator.create_slot(
-          var, val, optional_op_name or self._op_name)
-      self._restore_slot_variable(
-          slot_name=slot_name, variable=var,
-          slot_variable=new_slot_variable)
-      named_slots[var_key] = new_slot_variable
-    return named_slots[var_key]
-
-  def create_slot_with_initializer(self, var, initializer, shape, dtype,
-                                   slot_name, optional_op_name=None):
-    """Find or create a slot for a variable, using an Initializer.
-
-    Args:
-      var: A `Variable` object.
-      initializer: An `Initializer`.  The initial value of the slot.
-      shape: Shape of the initial value of the slot.
-      dtype: Type of the value of the slot.
-      slot_name: Name for the slot.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-
-    Returns:
-      A `Variable` object.
-    """
-    named_slots = self._slot_dict(slot_name)
-    var_key = _var_key_v2(var)
-    if var_key not in named_slots:
-      new_slot_variable = slot_creator.create_slot_with_initializer(
-          var, initializer, shape, dtype, optional_op_name or self._op_name)
-      self._restore_slot_variable(
-          slot_name=slot_name, variable=var,
-          slot_variable=new_slot_variable)
-      named_slots[var_key] = new_slot_variable
-    return named_slots[var_key]
-
-  def zeros_slot(self, var, slot_name, optional_op_name=None):
-    """Find or create a slot initialized with 0.0.
-
-    Args:
-      var: A `Variable` object.
-      slot_name: Name for the slot.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-
-    Returns:
-      A `Variable` object.
-    """
-    named_slots = self._slot_dict(slot_name)
-    var_key = _var_key_v2(var)
-    if var_key not in named_slots:
-      new_slot_variable = slot_creator.create_zeros_slot(
-          var, optional_op_name or self._op_name)
-      self._restore_slot_variable(
-          slot_name=slot_name, variable=var,
-          slot_variable=new_slot_variable)
-      named_slots[var_key] = new_slot_variable
-    return named_slots[var_key]
-
-  def _create_or_restore_slot_variable(
-      self, slot_variable_position, slot_name, variable,
-      optional_op_name=None):
-    """Restore a slot variable's value, possibly creating it.
-
-    Called when a variable which has an associated slot variable is created or
-    restored. When executing eagerly, we create the slot variable with a
-    restoring initializer.
-
-    No new variables are created when graph building. Instead,
-    _restore_slot_variable catches these after normal creation and adds restore
-    ops to the graph. This method is nonetheless important when graph building
-    for the case when a slot variable has already been created but `variable`
-    has just been added to a dependency graph (causing us to realize that the
-    slot variable needs to be restored).
-
-    Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
-      slot_name: The name of this `Optimizer`'s slot to restore into.
-      variable: The variable object this slot is being created for.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-    """
-    slot_variable = self.get_slot(var=variable, name=slot_name)
-    if (slot_variable is None and context.executing_eagerly() and
-        slot_variable_position.is_simple_variable()
-        # Defer slot variable creation if there is an active variable creator
-        # scope. Generally we'd like to eagerly create/restore slot variables
-        # when possible, but this may mean that scopes intended to catch
-        # `variable` also catch its eagerly created slot variable
-        # unintentionally (specifically make_template would add a dependency on
-        # a slot variable if not for this case). Deferring is mostly harmless
-        # (aside from double initialization), and makes variable creator scopes
-        # behave the same way they do when graph building.
-        and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
-      initializer = checkpointable.CheckpointInitialValue(
-          checkpoint_position=slot_variable_position)
-      slot_variable = self.create_slot(
-          var=variable,
-          val=initializer,
-          slot_name=slot_name,
-          optional_op_name=optional_op_name)
-      # Optimizers do not have unconditional dependencies on their slot
-      # variables (nor do any other objects). They are only saved if the
-      # variables they were created for are also saved.
-    if slot_variable is not None:
-      # If we've either made this slot variable, or if we've pulled out an
-      # existing slot variable, we should restore it.
-      slot_variable_position.restore(slot_variable)
-    else:
-      # We didn't make the slot variable. Defer restoring until it gets created
-      # normally. We keep a list rather than the one with the highest restore
-      # UID in case slot variables have their own dependencies, in which case
-      # those could differ between restores.
-      variable_key = _var_key_v2(variable)
-      self._deferred_slot_restorations.setdefault(
-          slot_name, {}).setdefault(variable_key, []).append(
-              slot_variable_position)
-
-  def get_slot(self, var, name):
-    """Return a slot named `name` created for `var` by the Optimizer.
-
-    Some `Optimizer` subclasses use additional variables.  For example
-    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
-    gives access to these `Variable` objects if for some reason you need them.
-
-    Use `get_slot_names()` to get the list of slot names created by the
-    `Optimizer`.
-
-    Args:
-      var: A variable passed to `minimize()` or `apply_gradients()`.
-      name: A string.
-
-    Returns:
-      The `Variable` for the slot if it was created, `None` otherwise.
-    """
-    named_slots = self._slots.get(name, None)
-    if not named_slots:
-      return None
-    return named_slots.get(_var_key_v2(var), None)
-
-  def get_slot_names(self):
-    """Return a list of the names of slots created by the `Optimizer`.
-
-    See `get_slot()`.
-
-    Returns:
-      A list of strings.
-    """
-    return sorted(self._slots.keys())
-
-  def create_non_slot(self, initial_value, name, colocate_with=None):
-    """Add an extra variable, not associated with a slot."""
-    v = self._non_slot_dict.get(name, None)
-    if v is None:
-      if colocate_with is None: colocate_with = self._non_slot_devices
-      with self._distribution.colocate_vars_with(colocate_with):
-        # TODO(josh11b): Use get_variable() except for the legacy Adam use case.
-        v = variable_scope.variable(initial_value, name=name, trainable=False)
-      self._non_slot_dict[name] = v
-      deferred_dependencies_list = self._deferred_dependencies.pop(name, ())
-      for checkpoint_position in sorted(
-          deferred_dependencies_list,
-          key=lambda restore: restore.checkpoint.restore_uid,
-          reverse=True):
-        checkpoint_position.restore(v)
-    return v
-
-  def _restore_slot_variable(self, slot_name, variable, slot_variable):
-    """Restore a newly created slot variable's value."""
-    variable_key = _var_key_v2(variable)
-    deferred_restorations = self._deferred_slot_restorations.get(
-        slot_name, {}).pop(variable_key, [])
-    # Iterate over restores, highest restore UID first to minimize the number
-    # of assignments.
-    deferred_restorations.sort(key=lambda position: position.restore_uid,
-                               reverse=True)
-    for checkpoint_position in deferred_restorations:
-      checkpoint_position.restore(slot_variable)
-
-  def get_non_slot(self, name):
-    """Returns the non-slot variable identified by `name`."""
-    return self._non_slot_dict.get(name, None)
-
-  def get_hyper(self, name, dtype=None):
-    """Returns the `name` hyper parameter, optionally cast to `dtype`."""
-    dtype_dict = self._hyper[name]
-    # Do we have the value cast to dtype already cached? This should always
-    # succeed when dtype is None.
-    if dtype in dtype_dict:
-      return dtype_dict[dtype]
-    # Not cached, cast to dtype and save the result in the cache.
-    result = math_ops.cast(dtype_dict[None], dtype)
-    dtype_dict[dtype] = result
-    return result
-
-
-class OptimizerV2(optimizer_v1.Optimizer):
+class OptimizerV2(optimizer_v2.OptimizerV2):
   """Updated base class for optimizers.
 
   This class defines the API to add Ops to train a model.  You never use this
@@ -586,6 +135,10 @@ class OptimizerV2(optimizer_v1.Optimizer):
   GATE_OP = 1
   GATE_GRAPH = 2
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, use_locking, name):
     """Create a new Optimizer.
 
@@ -606,746 +159,4 @@ class OptimizerV2(optimizer_v1.Optimizer):
       RuntimeError: If _create_slots has been overridden instead of
           _create_vars.
     """
-    # Note: We intentionally don't call parent __init__.
-
-    # Optimizer._create_slots was replaced by _create_vars in OptimizerV2.
-    if (self.__class__._create_slots.__code__ is not  # pylint: disable=protected-access
-        OptimizerV2._create_slots.__code__):
-      raise RuntimeError("Override _create_vars instead of _create_slots when "
-                         "descending from OptimizerV2 (class %s)" %
-                         self.__class__.__name__)
-    if not name:
-      raise ValueError("Must specify the optimizer name")
-
-    self._use_locking = use_locking
-    self._name = name
-    # Map from graph_key to state for that graph. We use the graph_key
-    # since it works in both eager and graph mode, and gives the outer
-    # graph inside functions.
-    tower_context = distribution_strategy_context.get_tower_context()
-    if tower_context is None:
-      # In a cross-tower context for a DistributionStrategy, which means
-      # only one Optimizer will be created, not one per tower.
-      self._per_graph_state = {}
-    else:
-      # We use get_tower_context().merge_call() to get a single dict
-      # shared across all model replicas when running with a
-      # DistributionStrategy.
-      self._per_graph_state = tower_context.merge_call(lambda _: {})
-
-    # Hyper parameters, and whether they should be re-evaluated every step.
-    self._hyper = {}
-
-  def _set_hyper(self, name, value):
-    self._hyper[name] = (_is_dynamic(value), value)
-
-  def minimize(self, loss, global_step=None, var_list=None,
-               gate_gradients=GATE_OP, aggregation_method=None,
-               colocate_gradients_with_ops=False, name=None,
-               grad_loss=None, stop_gradients=None,
-               scale_loss_by_num_towers=None):
-    """Add operations to minimize `loss` by updating `var_list`.
-
-    This method simply combines calls `compute_gradients()` and
-    `apply_gradients()`. If you want to process the gradient before applying
-    them call `compute_gradients()` and `apply_gradients()` explicitly instead
-    of using this function.
-
-    Args:
-      loss: A `Tensor` containing the value to minimize.
-      global_step: Optional `Variable` to increment by one after the
-        variables have been updated.
-      var_list: Optional list or tuple of `Variable` objects to update to
-        minimize `loss`.  Defaults to the list of variables collected in
-        the graph under the key `GraphKeys.TRAINABLE_VARIABLES`.
-      gate_gradients: How to gate the computation of gradients.  Can be
-        `GATE_NONE`, `GATE_OP`, or  `GATE_GRAPH`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with
-        the corresponding op.
-      name: Optional name for the returned operation.
-      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
-      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
-        through.
-      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
-        down by the number of towers. By default, auto-detects whether this
-        is needed.
-
-    Returns:
-      An Operation that updates the variables in `var_list`.  If `global_step`
-      was not `None`, that operation also increments `global_step`.
-
-    Raises:
-      ValueError: If some of the variables are not `Variable` objects.
-
-    @compatibility(eager)
-    When eager execution is enabled, `loss` should be a Python function that
-    takes elements of `var_list` as arguments and computes the value to be
-    minimized. If `var_list` is None, `loss` should take no arguments.
-    Minimization (and gradient computation) is done with respect to the
-    elements of `var_list` if not None, else with respect to any trainable
-    variables created during the execution of the `loss` function.
-    `gate_gradients`, `aggregation_method`, `colocate_gradients_with_ops` and
-    `grad_loss` are ignored when eager execution is enabled.
-    @end_compatibility
-    """
-    grads_and_vars = self.compute_gradients(
-        loss, var_list=var_list, gate_gradients=gate_gradients,
-        aggregation_method=aggregation_method,
-        colocate_gradients_with_ops=colocate_gradients_with_ops,
-        grad_loss=grad_loss, stop_gradients=stop_gradients,
-        scale_loss_by_num_towers=scale_loss_by_num_towers)
-
-    vars_with_grad = [v for g, v in grads_and_vars if g is not None]
-    if not vars_with_grad:
-      raise ValueError(
-          "No gradients provided for any variable, check your graph for ops"
-          " that do not support gradients, between variables %s and loss %s." %
-          ([str(v) for _, v in grads_and_vars], loss))
-
-    return self.apply_gradients(grads_and_vars, global_step=global_step,
-                                name=name)
-
-  def compute_gradients(self, loss, var_list=None,
-                        gate_gradients=GATE_OP,
-                        aggregation_method=None,
-                        colocate_gradients_with_ops=False,
-                        grad_loss=None, stop_gradients=None,
-                        scale_loss_by_num_towers=None):
-    """Compute gradients of `loss` for the variables in `var_list`.
-
-    This is the first part of `minimize()`.  It returns a list
-    of (gradient, variable) pairs where "gradient" is the gradient
-    for "variable".  Note that "gradient" can be a `Tensor`, an
-    `IndexedSlices`, or `None` if there is no gradient for the
-    given variable.
-
-    Args:
-      loss: A Tensor containing the value to minimize or a callable taking
-        no arguments which returns the value to minimize. When eager execution
-        is enabled it must be a callable.
-      var_list: Optional list or tuple of `tf.Variable` to update to minimize
-        `loss`.  Defaults to the list of variables collected in the graph
-        under the key `GraphKeys.TRAINABLE_VARIABLES`.
-      gate_gradients: How to gate the computation of gradients.  Can be
-        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with
-        the corresponding op.
-      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
-      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
-        through.
-      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
-        down by the number of towers. By default, auto-detects whether this
-        is needed.
-
-    Returns:
-      A list of (gradient, variable) pairs. Variable is always present, but
-      gradient can be `None`.
-
-    Raises:
-      TypeError: If `var_list` contains anything else than `Variable` objects.
-      ValueError: If some arguments are invalid.
-      RuntimeError: If called with eager execution enabled and `loss` is
-        not callable.
-
-    @compatibility(eager)
-    When eager execution is enabled, `gate_gradients`, `aggregation_method`,
-    and `colocate_gradients_with_ops` are ignored.
-    @end_compatibility
-    """
-    # TODO(josh11b): Test that we handle weight decay in a reasonable way.
-    if callable(loss):
-      with backprop.GradientTape() as tape:
-        if var_list is not None:
-          tape.watch(var_list)
-        loss_value = loss()
-
-        # Scale loss for number of towers (callable-loss case). In this case,
-        # we have to be careful to call distribute_lib.get_loss_reduction()
-        # *after* loss() is evaluated, so we know what loss reduction it uses.
-        if scale_loss_by_num_towers is None:
-          scale_loss_by_num_towers = (
-              distribute_lib.get_loss_reduction() ==
-              variable_scope.VariableAggregation.MEAN)
-        if scale_loss_by_num_towers:
-          num_towers = distribution_strategy_context.get_distribution_strategy(
-          ).num_towers
-          if num_towers > 1:
-            loss_value *= 1. / num_towers
-
-      if var_list is None:
-        var_list = tape.watched_variables()
-      grads = tape.gradient(loss_value, var_list, grad_loss)
-      return list(zip(grads, var_list))
-    if context.executing_eagerly():
-      raise RuntimeError(
-          "`loss` passed to Optimizer.compute_gradients should "
-          "be a function when eager execution is enabled.")
-
-    # Scale loss for number of towers (non-callable-loss case).
-    if scale_loss_by_num_towers is None:
-      scale_loss_by_num_towers = (
-          distribute_lib.get_loss_reduction() ==
-          variable_scope.VariableAggregation.MEAN)
-    if scale_loss_by_num_towers:
-      num_towers = distribution_strategy_context.get_distribution_strategy(
-      ).num_towers
-      if num_towers > 1:
-        loss *= 1. / num_towers
-
-    if gate_gradients not in [optimizer_v1.Optimizer.GATE_NONE,
-                              optimizer_v1.Optimizer.GATE_OP,
-                              optimizer_v1.Optimizer.GATE_GRAPH]:
-      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
-                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
-                       gate_gradients)
-    self._assert_valid_dtypes([loss])
-    if grad_loss is not None:
-      self._assert_valid_dtypes([grad_loss])
-    if var_list is None:
-      var_list = (
-          variables.trainable_variables() +
-          ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
-    else:
-      var_list = nest.flatten(var_list)
-    # pylint: disable=protected-access
-    var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS)
-    # pylint: enable=protected-access
-    processors = [_get_processor(v) for v in var_list]
-    if not var_list:
-      raise ValueError("No variables to optimize.")
-    var_refs = [p.target() for p in processors]
-    grads = gradients.gradients(
-        loss, var_refs, grad_ys=grad_loss,
-        gate_gradients=(gate_gradients == optimizer_v1.Optimizer.GATE_OP),
-        aggregation_method=aggregation_method,
-        colocate_gradients_with_ops=colocate_gradients_with_ops,
-        stop_gradients=stop_gradients)
-    if gate_gradients == optimizer_v1.Optimizer.GATE_GRAPH:
-      grads = control_flow_ops.tuple(grads)
-    grads_and_vars = list(zip(grads, var_list))
-    self._assert_valid_dtypes(
-        [v for g, v in grads_and_vars
-         if g is not None and v.dtype != dtypes.resource])
-    return grads_and_vars
-
-  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
-    """Apply gradients to variables.
-
-    This is the second part of `minimize()`. It returns an `Operation` that
-    applies gradients.
-
-    Args:
-      grads_and_vars: List of (gradient, variable) pairs as returned by
-        `compute_gradients()`.
-      global_step: Optional `Variable` to increment by one after the
-        variables have been updated.
-      name: Optional name for the returned operation.  Default to the
-        name passed to the `Optimizer` constructor.
-
-    Returns:
-      An `Operation` that applies the specified gradients. If `global_step`
-      was not None, that operation also increments `global_step`.
-
-    Raises:
-      TypeError: If `grads_and_vars` is malformed.
-      ValueError: If none of the variables have gradients.
-    """
-    # This is a default implementation of apply_gradients() that can be shared
-    # by most optimizers.  It relies on the subclass implementing the following
-    # methods: _create_vars(), _prepare(), _apply_dense(), and _apply_sparse().
-
-    # Filter out variables with gradients of `None`.
-    grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
-    if not grads_and_vars:
-      raise ValueError("No variables provided.")
-    filtered = tuple((g, v) for (g, v) in grads_and_vars if g is not None)
-    if not filtered:
-      raise ValueError("No gradients provided for any variable: %s." %
-                       ([str(v) for _, v in grads_and_vars],))
-    return distribution_strategy_context.get_tower_context().merge_call(
-        self._distributed_apply, filtered, global_step=global_step, name=name)
-
-  def _get_or_create_state(self, var_list=None):
-    """Either looks up or creates `_OptimizerV2State`.
-
-    If any variables are available, they should be passed via the `var_list`
-    argument, and these will be used to determine the graph to create/retrieve
-    state for. Otherwise the returned state is for the current default graph.
-
-    Args:
-      var_list: A list of variables to extract a graph from.
-
-    Returns:
-      An `_OptimizerV2State` object.
-    """
-    # Determine the graph_key from the current graph.
-    eager_execution = context.executing_eagerly()
-    if eager_execution or var_list is None:
-      graph = ops.get_default_graph()
-    else:
-      graph = ops._get_graph_from_inputs(var_list)  # pylint: disable=protected-access
-    assert graph is not None
-    graph_key = graph._graph_key  # pylint: disable=protected-access
-
-    # Get the per graph state by looking up the graph_key.
-    if graph_key in self._per_graph_state:
-      per_graph_state = self._per_graph_state[graph_key]
-    else:
-      per_graph_state = _OptimizerV2State(self._name)
-      per_graph_state._init_with_static_hyper(self._hyper)  # pylint: disable=protected-access
-      self._per_graph_state[graph_key] = per_graph_state
-    return per_graph_state
-
-  def _distributed_apply(self, distribution, grads_and_vars, global_step, name):
-    """`apply_gradients` for use with a `DistributionStrategy`."""
-    reduced_grads = distribution.batch_reduce(
-        variable_scope.VariableAggregation.SUM, grads_and_vars)
-    var_list = [v for _, v in grads_and_vars]
-    grads_and_vars = zip(reduced_grads, var_list)
-
-    unwrapped_var_list = [x for v in var_list for x in distribution.unwrap(v)]
-    eager_execution = context.executing_eagerly()
-    if eager_execution:
-      # Give a clear error in this case instead of "name not supported
-      # for Eager Tensors" when we compute non_slot_devices.
-      for v in unwrapped_var_list:
-        if isinstance(v, ops.Tensor):
-          raise NotImplementedError("Trying to update a Tensor ", v)
-
-    with ops.name_scope(name, self._name) as name:
-      per_graph_state = self._get_or_create_state(var_list=unwrapped_var_list)
-      # Include the current value of any dynamic hyper parameters in `state`.
-      non_slot_devices = distribution.non_slot_devices(var_list)
-      state = per_graph_state._copy_with_dynamic_hyper(  # pylint: disable=protected-access
-          self._hyper, distribution, non_slot_devices)
-
-    # Create any slot and non-slot variables we need in `state`.
-    with ops.init_scope():
-      self._create_vars(var_list, state)
-
-    with ops.name_scope(name):  # Re-enter name_scope created above
-      # Give the child class a chance to do something before we start
-      # applying gradients.
-      self._prepare(state)
-
-      def update(v, g):
-        """Update variable `v` using gradient `g`."""
-        assert v is not None
-
-        # Convert the grad to Tensor or IndexedSlices if necessary, and
-        # look up a processor for each variable's type.
-        try:
-          g = ops.convert_to_tensor_or_indexed_slices(g)
-        except TypeError:
-          raise TypeError(
-              "Gradient must be convertible to a Tensor"
-              " or IndexedSlices, or None: %s" % g)
-        if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
-          raise TypeError(
-              "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
-        processor = _get_processor(v)
-
-        # We colocate all ops created in _apply_dense or _apply_sparse
-        # on the same device as the variable.
-        # TODO(apassos): figure out how to get the variable name here.
-        scope_name = "" if eager_execution else v.op.name
-        # device_policy is set because non-mirrored tensors will be read in
-        # `update_op`.
-        # TODO(josh11b): Make different state objects for each device to
-        # avoid needing to set the device_policy.
-        with ops.name_scope("update_" + scope_name), \
-            context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
-          return processor.update_op(self, g, state)
-
-      # Use the processors to update the variables.
-      update_ops = []
-      for grad, var in grads_and_vars:
-        update_ops.extend(distribution.update(var, update, grad, grouped=False))
-
-      # Give the child class a chance to do something after applying
-      # gradients
-      def finish():
-        # TODO(josh11b): Make different state objects for each device to
-        # avoid needing to set the device_policy.
-        with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
-          return self._finish(state)
-
-      update_ops = control_flow_ops.group(update_ops)
-      with ops.control_dependencies([update_ops]):
-        finish_updates = distribution.update_non_slot(
-            non_slot_devices, finish, grouped=False)
-      # We said grouped=False, which means finish_updates is always a list.
-      # It will be [None] when finish() returns None.
-      if finish_updates == [None]:
-        finish_updates = [update_ops]
-
-      # Update `global_step` (if any).
-      if global_step is None:
-        apply_updates = distribution.group(finish_updates, name=name)
-      else:
-        with ops.control_dependencies(finish_updates):
-
-          def update_global_step(global_step, name):
-            return global_step.assign_add(1, read_value=False, name=name)
-
-          apply_updates = distribution.update(
-              global_step, update_global_step, name)
-
-      # Add the training op to the TRAIN_OP graph collection in graph mode.
-      if not eager_execution:
-        if isinstance(apply_updates, ops.Tensor):
-          apply_updates = apply_updates.op
-        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
-        if apply_updates not in train_op:
-          train_op.append(apply_updates)
-
-      return apply_updates
-
-  def get_slot(self, var, name):
-    """Return a slot named `name` created for `var` by the Optimizer.
-
-    Some `Optimizer` subclasses use additional variables.  For example
-    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
-    gives access to these `Variable` objects if for some reason you need them.
-
-    Use `get_slot_names()` to get the list of slot names created by the
-    `Optimizer`.
-
-    Args:
-      var: A variable passed to `minimize()` or `apply_gradients()`.
-      name: A string.
-
-    Returns:
-      The `Variable` for the slot if it was created, `None` otherwise.
-    """
-    state = self._get_state_for_var(var)
-    return state.get_slot(var, name) if state is not None else None
-
-  def get_slot_names(self):
-    """Return a list of the names of slots created by the `Optimizer`.
-
-    See `get_slot()`.
-
-    Returns:
-      A list of strings.
-    """
-    state = self._get_per_graph_state()
-    return state.get_slot_names() if state is not None else []
-
-  def variables(self):
-    """A list of variables which encode the current state of `Optimizer`.
-
-    Includes slot variables and additional global variables created by the
-    optimizer in the current default graph.
-
-    Returns:
-      A list of variables.
-    """
-    state = self._get_per_graph_state()
-    return state._variables() if state is not None else []  # pylint: disable=protected-access
-
-  # --------------
-  # Methods to be implemented by subclasses if they want to use the
-  # inherited implementation of apply_gradients() or compute_gradients().
-  # --------------
-  def _create_vars(self, var_list, state):
-    """Create all slots needed by the variables and any non-slot variables.
-
-    Args:
-      var_list: A list of `Variable` objects.
-      state: An object with these methods:
-        `create_slot(var, val, slot_name, optional_op_name)`,
-        `create_slot_with_initializer(`
-            `var, initializer, shape, dtype, slot_name, optional_op_name)`,
-        `zeros_slot(var, slot_name, optional_op_name)`,
-        `create_non_slot_variable(initial_value, name, colocate_with)`,
-        `get_hyper(name)`
-    """
-    # No slots needed by default
-    pass
-
-  def _prepare(self, state):
-    """Code to execute before applying gradients.
-
-    Note that most uses of _prepare() in Optimizer have been subsumed
-    by explicit support for hyper parameters in OptimizerV2
-
-    Args:
-      state: An object with a `get_hyper(name)` method.
-
-    Returns:
-      Return value will be ignored.
-    """
-    pass
-
-  def _apply_dense(self, grad, var, state):
-    """Add ops to apply dense gradients to `var`.
-
-    Args:
-      grad: A `Tensor`.
-      var: A `Variable` object.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation`.
-    """
-    raise NotImplementedError()
-
-  def _resource_apply_dense(self, grad, handle, state):
-    """Add ops to apply dense gradients to the variable `handle`.
-
-    Args:
-      grad: a `Tensor` representing the gradient.
-      handle: a `Tensor` of dtype `resource` which points to the variable
-       to be updated.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation` which updates the value of the variable.
-    """
-    raise NotImplementedError()
-
-  def _resource_apply_sparse_duplicate_indices(
-      self, grad, handle, indices, state):
-    """Add ops to apply sparse gradients to `handle`, with repeated indices.
-
-    Optimizers which override this method must deal with repeated indices. See
-    the docstring of `_apply_sparse_duplicate_indices` for details. By default
-    the correct behavior, to sum non-unique indices and their associated
-    gradients, is enforced by first pre-processing `grad` and `indices` and
-    passing them on to `_resource_apply_sparse`. Optimizers which deal correctly
-    with duplicate indices may instead override this method to avoid the
-    overhead of summing.
-
-    Args:
-      grad: a `Tensor` representing the gradient for the affected indices.
-      handle: a `Tensor` of dtype `resource` which points to the variable
-       to be updated.
-      indices: a `Tensor` of integral type representing the indices for
-       which the gradient is nonzero. Indices may be repeated.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation` which updates the value of the variable.
-    """
-    # pylint: disable=protected-access
-    summed_grad, unique_indices = optimizer_v1._deduplicate_indexed_slices(
-        values=grad, indices=indices)
-    # pylint: enable=protected-access
-    return self._resource_apply_sparse(
-        summed_grad, handle, unique_indices, state)
-
-  def _resource_apply_sparse(self, grad, handle, indices, state):
-    """Add ops to apply sparse gradients to the variable `handle`.
-
-    Similar to `_apply_sparse`, the `indices` argument to this method has been
-    de-duplicated. Optimizers which deal correctly with non-unique indices may
-    instead override `_resource_apply_sparse_duplicate_indices` to avoid this
-    overhead.
-
-    Args:
-      grad: a `Tensor` representing the gradient for the affected indices.
-      handle: a `Tensor` of dtype `resource` which points to the variable
-       to be updated.
-      indices: a `Tensor` of integral type representing the indices for
-       which the gradient is nonzero. Indices are unique.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation` which updates the value of the variable.
-    """
-    raise NotImplementedError()
-
-  def _apply_sparse_duplicate_indices(self, grad, var, state):
-    """Add ops to apply sparse gradients to `var`, with repeated sparse indices.
-
-    Optimizers which override this method must deal with IndexedSlices objects
-    such as the following:
-
-      IndexedSlicesValue(values=[1, 1], indices=[0, 0], dense_shape=[1])
-
-    The correct interpretation is:
-
-      IndexedSlicesValue(values=[2], indices=[0], dense_shape=[1])
-
-    Many optimizers deal incorrectly with repeated indices when updating based
-    on sparse gradients (e.g. summing squares rather than squaring the sum, or
-    applying momentum terms multiple times). Adding first is always the correct
-    behavior, so this is enforced here by reconstructing the IndexedSlices to
-    have only unique indices, then calling _apply_sparse.
-
-    Optimizers which deal correctly with repeated indices may instead override
-    this method to avoid the overhead of summing indices.
-
-    Args:
-      grad: `IndexedSlices`.
-      var: A `Variable` object.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation`.
-    """
-    # pylint: disable=protected-access
-    summed_values, unique_indices = optimizer_v1._deduplicate_indexed_slices(
-        values=grad.values, indices=grad.indices)
-    # pylint: enable=protected-access
-    gradient_no_duplicate_indices = ops.IndexedSlices(
-        indices=unique_indices,
-        values=summed_values,
-        dense_shape=grad.dense_shape)
-    return self._apply_sparse(gradient_no_duplicate_indices, var, state)
-
-  def _apply_sparse(self, grad, var, state):
-    """Add ops to apply sparse gradients to `var`.
-
-    The IndexedSlices object passed to `grad` in this function is by default
-    pre-processed in `_apply_sparse_duplicate_indices` to remove duplicate
-    indices (see its docstring for details). Optimizers which can tolerate or
-    have correct special cases for duplicate sparse indices may override
-    `_apply_sparse_duplicate_indices` instead of this function, avoiding that
-    overhead.
-
-    Args:
-      grad: `IndexedSlices`, with no repeated indices.
-      var: A `Variable` object.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation`.
-    """
-    raise NotImplementedError()
-
-  def _finish(self, state):
-    """Do what is needed to finish the update.
-
-    This is called inside a scope colocated with any non-slot variables.
-
-    Args:
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      The operation to apply updates, or None if no updates.
-    """
-    return None
-
-  # --------------
-  # Utility methods for subclasses.
-  # --------------
-  def _get_per_graph_state(self):
-    # pylint: disable=protected-access
-    return self._per_graph_state.get(ops.get_default_graph()._graph_key, None)
-
-  def _get_state_for_var(self, var):
-    # pylint: disable=protected-access
-    return self._per_graph_state.get(var._graph_key, None)
-
-  # --------------
-  # Overridden methods from Checkpointable.
-  # --------------
-
-  def _track_checkpointable(self, *args, **kwargs):
-    """Optimizers may not track dependencies. Raises an error."""
-    raise NotImplementedError(
-        "Optimizers may not have dependencies. File a feature request if this "
-        "limitation bothers you.")
-
-  @property
-  def _checkpoint_dependencies(self):
-    """From Checkpointable. Gather graph-specific non-slot variables to save."""
-    current_graph_non_slot_variables = []
-    state = self._get_per_graph_state()
-    if state is not None:
-      for name, variable_object in sorted(
-          state._non_slot_dict.items(),  # pylint: disable=protected-access
-          # Avoid comparing variables
-          key=lambda item: item[0]):
-        current_graph_non_slot_variables.append(
-            checkpointable.CheckpointableReference(
-                name=name, ref=variable_object))
-    # Note: ignores super(); Optimizers may not have any dependencies outside of
-    # state objects.
-    return current_graph_non_slot_variables
-
-  def _lookup_dependency(self, name):
-    """From Checkpointable. Find a non-slot variable in the current graph."""
-    state = self._get_per_graph_state()
-    if state is None:
-      return None
-    else:
-      return state.get_non_slot(name)
-
-  @property
-  def _deferred_dependencies(self):
-    """Lets Checkpointable know where non-slot variables are created.
-
-    If necessary, creates a new state object for the current default graph.
-    Checkpointable will then add entries to that state's deferred dependency
-    dictionary. The state object will check that dictionary when creating
-    non-slot variables, restoring their value if an entry is found.
-
-    Returns:
-      A dictionary which holds deferred dependencies for the current default
-      graph.
-    """
-    state = self._get_or_create_state()
-    return state._deferred_dependencies  # pylint: disable=protected-access
-
-  def _create_or_restore_slot_variable(
-      self, slot_variable_position, slot_name, variable):
-    """Checkpointable: Restore a slot variable's value, possibly creating it.
-
-    Called when a variable which has an associated slot variable is created or
-    restored.
-
-    Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
-      slot_name: The name of this `Optimizer`'s slot to restore into.
-      variable: The variable object this slot is being created for.
-    """
-    state = self._get_or_create_state(var_list=[variable])
-    state._create_or_restore_slot_variable(  # pylint: disable=protected-access
-        slot_variable_position=slot_variable_position,
-        slot_name=slot_name,
-        variable=variable,
-        optional_op_name=self._name)
-
-  # --------------
-  # Unsupported parent methods
-  # --------------
-  def _slot_dict(self, slot_name):
-    raise NotImplementedError(
-        "_slot_dict() method unsupported in OptimizerV2")
-
-  def _get_or_make_slot(self, var, val, slot_name, op_name):
-    raise NotImplementedError(
-        "_get_or_make_slot() method unsupported in OptimizerV2")
-
-  def _get_or_make_slot_with_initializer(self, var, initializer, shape, dtype,
-                                         slot_name, op_name):
-    raise NotImplementedError(
-        "_get_or_make_slot_with_initializer() method unsupported in "
-        "OptimizerV2")
-
-  def _create_non_slot_variable(self, initial_value, name, colocate_with):
-    raise NotImplementedError(
-        "_create_non_slot_variable() method unsupported in OptimizerV2")
-
-  def _get_non_slot_variable(self, name, graph=None):
-    raise NotImplementedError(
-        "_get_non_slot_variable() method unsupported in OptimizerV2")
-
-  def _non_slot_variables(self):
-    raise NotImplementedError(
-        "_non_slot_variables() method unsupported in OptimizerV2")
+    super(OptimizerV2, self).__init__(name)
diff --git a/tensorflow/contrib/optimizer_v2/rmsprop.py b/tensorflow/contrib/optimizer_v2/rmsprop.py
index 3de53405ec..090e257ddc 100644
--- a/tensorflow/contrib/optimizer_v2/rmsprop.py
+++ b/tensorflow/contrib/optimizer_v2/rmsprop.py
@@ -41,19 +41,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.ops import array_ops
+from tensorflow.python.keras.optimizer_v2 import rmsprop
+from tensorflow.python.util import deprecation
 
-from tensorflow.python.training import training_ops
 
-
-class RMSPropOptimizer(optimizer_v2.OptimizerV2):
+class RMSPropOptimizer(rmsprop.RMSProp):
   """Optimizer that implements the RMSProp algorithm.
 
   See the
   [paper](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf).
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self,
                learning_rate,
                decay=0.9,
@@ -96,138 +98,10 @@ class RMSPropOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name prefix for the operations created when applying
         gradients. Defaults to "RMSProp".
     """
-    super(RMSPropOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("decay", decay)
-    self._set_hyper("momentum", momentum)
-    self._set_hyper("epsilon", epsilon)
-
-    self._centered = centered
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      init_rms = state.get_hyper(
-          "epsilon", v.dtype.base_dtype) * array_ops.ones_like(v)
-      state.create_slot_with_initializer(v, init_rms, v.get_shape(),
-                                         v.dtype.base_dtype, "rms")
-      if self._centered:
-        state.zeros_slot(v, "mg")
-      state.zeros_slot(v, "momentum")
-
-  def _apply_dense(self, grad, var, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = state.get_slot(var, "mg")
-      return training_ops.apply_centered_rms_prop(
-          var,
-          mg,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          # epsilon is now the rms initial value and is not added to the
-          # denominator anymore, hence calling the kernel op with epsilon=0.
-          0,
-          grad,
-          use_locking=self._use_locking).op
-    else:
-      return training_ops.apply_rms_prop(
-          var,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          use_locking=self._use_locking).op
-
-  def _resource_apply_dense(self, grad, var, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = state.get_slot(var, "mg")
-      return training_ops.resource_apply_centered_rms_prop(
-          var.handle,
-          mg.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          use_locking=self._use_locking)
-    else:
-      return training_ops.resource_apply_rms_prop(
-          var.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          use_locking=self._use_locking)
-
-  def _apply_sparse(self, grad, var, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = state.get_slot(var, "mg")
-      return training_ops.sparse_apply_centered_rms_prop(
-          var,
-          mg,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad.values,
-          grad.indices,
-          use_locking=self._use_locking)
-    else:
-      return training_ops.sparse_apply_rms_prop(
-          var,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad.values,
-          grad.indices,
-          use_locking=self._use_locking)
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = self.get_slot(var, "mg")
-      return training_ops.resource_sparse_apply_centered_rms_prop(
-          var.handle,
-          mg.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          indices,
-          use_locking=self._use_locking)
-    else:
-      return training_ops.resource_sparse_apply_rms_prop(
-          var.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          indices,
-          use_locking=self._use_locking)
+    super(RMSPropOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        rho=decay,
+        momentum=momentum,
+        epsilon=epsilon,
+        centered=centered,
+        name=name)
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 4a72c4b3f3..c4d23f117f 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -62,6 +62,7 @@ py_library(
         ":backend",
         ":engine",
         ":layers",
+        ":optimizer_v2",
         "//tensorflow/python/saved_model",
         "//tensorflow/python:training",
     ],
@@ -189,6 +190,30 @@ py_library(
     ],
 )
 
+py_library(
+    name = "optimizer_v2",
+    srcs = [
+        "optimizer_v2/adadelta.py",
+        "optimizer_v2/adagrad.py",
+        "optimizer_v2/adam.py",
+        "optimizer_v2/optimizer_v2.py",
+        "optimizer_v2/rmsprop.py",
+        "optimizer_v2/sgd.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:distribute",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+    ],
+)
+
 py_test(
     name = "integration_test",
     size = "medium",
@@ -827,3 +852,133 @@ py_library(
         "//third_party/py/numpy",
     ],
 )
+
+cuda_py_test(
+    name = "adadelta_test",
+    size = "medium",
+    srcs = ["optimizer_v2/adadelta_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "adagrad_test",
+    size = "small",
+    srcs = ["optimizer_v2/adagrad_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "adam_test",
+    size = "small",
+    srcs = ["optimizer_v2/adam_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "checkpointable_utils_test",
+    srcs = ["optimizer_v2/checkpointable_utils_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "@six_archive//:six",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:layers",
+        "//tensorflow/python:layers_base",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/keras",
+    ],
+    tags = ["notsan"],
+)
+
+cuda_py_test(
+    name = "sgd_test",
+    size = "medium",
+    srcs = ["optimizer_v2/sgd_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:resources",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
+cuda_py_test(
+    name = "optimizer_v2_test",
+    size = "medium",
+    srcs = ["optimizer_v2/optimizer_v2_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:clip_ops",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:variables",
+    ],
+)
+
+cuda_py_test(
+    name = "rmsprop_test",
+    size = "small",
+    srcs = ["optimizer_v2/rmsprop_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+    tags = ["optonly"],
+)
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py
new file mode 100644
index 0000000000..d3b3c9c12e
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adadelta.py
@@ -0,0 +1,116 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adadelta for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.training import training_ops
+
+
+class Adadelta(optimizer_v2.OptimizerV2):
+  """Adadelta optimizer.
+
+  It is recommended to leave the parameters of this optimizer at their default
+  values.
+
+  See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
+  ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
+
+  Some of the args below are hyperparameters, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate. It is recommended
+        to leave it at the default value.
+      rho: float hyperparameter >= 0. The decay rate.
+      epsilon: float hyperparameter >= 0. Fuzz factor. A constant epsilon used
+        to better condition the grad update.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to 'Adadelta'.
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               rho=0.95,
+               epsilon=1e-8,
+               name="Adadelta"):
+    super(Adadelta, self).__init__(name)
+    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("rho", rho)
+    self._set_hyper("epsilon", epsilon)
+
+  def _create_vars(self, var_list, state):
+    for v in var_list:
+      state.zeros_slot(v, "accum")
+      state.zeros_slot(v, "accum_update")
+
+  def _apply_dense(self, grad, var, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.apply_adadelta(
+        var,
+        accum,
+        accum_update,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _resource_apply_dense(self, grad, var, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.resource_apply_adadelta(
+        var.handle,
+        accum.handle,
+        accum_update.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.sparse_apply_adadelta(
+        var,
+        accum,
+        accum_update,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad.values,
+        grad.indices,
+        use_locking=self._use_locking)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.resource_sparse_apply_adadelta(
+        var.handle,
+        accum.handle,
+        accum_update.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        indices,
+        use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta_test.py b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
new file mode 100644
index 0000000000..6e48f92e4f
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
@@ -0,0 +1,166 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Adadelta Optimizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras.optimizer_v2 import adadelta
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class AdadeltaOptimizerTest(test.TestCase):
+
+  def doTestBasic(self, use_resource=False):
+    num_updates = 4  # number of ADADELTA steps to perform
+    for dtype in [dtypes.half, dtypes.float32]:
+      for grad in [0.2, 0.1, 0.01]:
+        for lr in [1.0, 0.5, 0.1]:
+          with self.cached_session():
+            var0_init = [1.0, 2.0]
+            var1_init = [3.0, 4.0]
+            if use_resource:
+              var0 = resource_variable_ops.ResourceVariable(
+                  var0_init, dtype=dtype)
+              var1 = resource_variable_ops.ResourceVariable(
+                  var1_init, dtype=dtype)
+            else:
+              var0 = variables.Variable(var0_init, dtype=dtype)
+              var1 = variables.Variable(var1_init, dtype=dtype)
+
+            grads = constant_op.constant([grad, grad], dtype=dtype)
+
+            accum = 0.0
+            accum_update = 0.0
+
+            # ADADELTA gradient optimizer
+            rho = 0.95
+            epsilon = 1e-8
+            adadelta_opt = adadelta.Adadelta(lr, rho, epsilon)
+            adadelta_update = adadelta_opt.apply_gradients(
+                zip([grads, grads], [var0, var1]))
+
+            opt_vars = adadelta_opt.variables()
+            self.assertStartsWith(opt_vars[0].name, var0._shared_name)
+            self.assertStartsWith(opt_vars[1].name, var0._shared_name)
+            self.assertStartsWith(opt_vars[2].name, var1._shared_name)
+            self.assertStartsWith(opt_vars[3].name, var1._shared_name)
+            self.assertEqual(4, len(opt_vars))
+
+            variables.global_variables_initializer().run()
+
+            # Assign slots
+            slot = [None] * 2
+            slot_update = [None] * 2
+            self.assertEqual(["accum", "accum_update"],
+                             adadelta_opt.get_slot_names())
+            slot[0] = adadelta_opt.get_slot(var0, "accum")
+            self.assertEquals(slot[0].get_shape(), var0.get_shape())
+            self.assertFalse(slot[0] in variables.trainable_variables())
+
+            slot_update[0] = adadelta_opt.get_slot(var0, "accum_update")
+            self.assertEquals(slot_update[0].get_shape(), var0.get_shape())
+            self.assertFalse(slot_update[0] in variables.trainable_variables())
+
+            slot[1] = adadelta_opt.get_slot(var1, "accum")
+            self.assertEquals(slot[1].get_shape(), var1.get_shape())
+            self.assertFalse(slot[1] in variables.trainable_variables())
+
+            slot_update[1] = adadelta_opt.get_slot(var1, "accum_update")
+            self.assertEquals(slot_update[1].get_shape(), var1.get_shape())
+            self.assertFalse(slot_update[1] in variables.trainable_variables())
+
+            # Fetch params to validate initial values
+            self.assertAllClose(var0_init, var0.eval())
+            self.assertAllClose(var1_init, var1.eval())
+
+            update = [None] * num_updates
+            tot_update = 0
+            for step in range(num_updates):
+              # Run adadelta update for comparison
+              adadelta_update.run()
+
+              # Perform initial update without previous accum values
+              accum = accum * rho + (grad**2) * (1 - rho)
+              update[step] = (np.sqrt(accum_update + epsilon) *
+                              (1. / np.sqrt(accum + epsilon)) * grad)
+              accum_update = (accum_update * rho + (update[step]**2) *
+                              (1.0 - rho))
+              tot_update += update[step] * lr
+
+              # Check that the accumulators have been updated
+              for slot_idx in range(2):
+                self.assertAllCloseAccordingToType(
+                    np.array([accum, accum], dtype=dtype.as_numpy_dtype()),
+                    slot[slot_idx].eval(),
+                    rtol=1e-5)
+
+                self.assertAllCloseAccordingToType(
+                    np.array(
+                        [accum_update, accum_update],
+                        dtype=dtype.as_numpy_dtype()),
+                    slot_update[slot_idx].eval(),
+                    rtol=1e-5)
+
+              # Check that the parameters have been updated
+              self.assertAllCloseAccordingToType(
+                  np.array(
+                      [var0_init[0] - tot_update, var0_init[1] - tot_update],
+                      dtype=dtype.as_numpy_dtype()),
+                  var0.eval(),
+                  rtol=1e-5)
+
+              self.assertAllCloseAccordingToType(
+                  np.array(
+                      [var1_init[0] - tot_update, var1_init[1] - tot_update],
+                      dtype=dtype.as_numpy_dtype()),
+                  var1.eval(),
+                  rtol=1e-5)
+
+  def testBasic(self):
+    self.doTestBasic(use_resource=False)
+
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        loss = pred * pred
+        sgd_op = adadelta.Adadelta(1.0, 1.0, 1.0).minimize(loss)
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            [[-111, -138]], var0.eval())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py
new file mode 100644
index 0000000000..2d8cec2300
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adagrad.py
@@ -0,0 +1,119 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adagrad optimizer for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import training_ops
+
+
+class Adagrad(optimizer_v2.OptimizerV2):
+  """Adagrad optimizer.
+
+  It is recommended to leave the parameters of this optimizer at their default
+  values.
+
+  See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
+  or this
+  [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
+
+  The learning_rate arg below is a hyperparameter, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate.
+      initial_accumulator_value: A floating point value. Starting value for the
+        accumulators, must be positive.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to 'Adagrad'.
+
+  Raises:
+    ValueError: If the `initial_accumulator_value` is invalid.
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               initial_accumulator_value=0.1,
+               name="Adagrad"):
+    if initial_accumulator_value <= 0.0:
+      raise ValueError("initial_accumulator_value must be positive: %s" %
+                       initial_accumulator_value)
+    super(Adagrad, self).__init__(name)
+    self._set_hyper("learning_rate", learning_rate)
+
+    self._initial_accumulator_value = initial_accumulator_value
+
+  def _create_vars(self, var_list, state):
+    for v in var_list:
+      dtype = v.dtype.base_dtype
+      if v.get_shape().is_fully_defined():
+        init = init_ops.constant_initializer(self._initial_accumulator_value,
+                                             dtype=dtype)
+      else:
+        def init(v=v, dtype=dtype):
+          # Use a Tensor instead of initializer if variable does not have
+          # static shape.
+          init_constant = gen_array_ops.fill(array_ops.shape(v),
+                                             self._initial_accumulator_value)
+          return math_ops.cast(init_constant, dtype)
+      state.create_slot_with_initializer(v, init, v.get_shape(), dtype,
+                                         "accumulator")
+
+  def _apply_dense(self, grad, var, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.apply_adagrad(
+        var,
+        acc,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _resource_apply_dense(self, grad, var, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.resource_apply_adagrad(
+        var.handle,
+        acc.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.sparse_apply_adagrad(
+        var,
+        acc,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad.values,
+        grad.indices,
+        use_locking=self._use_locking)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.resource_sparse_apply_adagrad(
+        var.handle,
+        acc.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad,
+        indices,
+        use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad_test.py b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
new file mode 100644
index 0000000000..fc4ef5c399
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
@@ -0,0 +1,276 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for aggregate operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.optimizer_v2 import adagrad
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class AdagradOptimizerTest(test.TestCase):
+
+  def doTestBasic(self, use_resource=False):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+          var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+        else:
+          var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+          var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 3 steps of adagrad
+        for _ in range(3):
+          ada_update.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([2.715679168701172, 3.715679168701172]), var1.eval())
+
+  def testBasic(self):
+    self.doTestBasic()
+
+  def testBasicResource(self):
+    self.doTestBasic(use_resource=True)
+
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable(
+            [[1.0, 2.0], [3.0, 4.0]], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        loss = pred * pred
+        sgd_op = adagrad.Adagrad(1.0).minimize(loss)
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType(
+            [[1.0, 2.0], [3.0, 4.0]], var0.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            [[0, 1], [3, 4]], var0.eval(), atol=0.01)
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        ada_opt = adagrad.Adagrad(
+            constant_op.constant(3.0), initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 3 steps of adagrad
+        for _ in range(3):
+          ada_update.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([2.715679168701172, 3.715679168701172]), var1.eval())
+
+  def testSparseBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
+        var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1], shape=[1, 1], dtype=dtype),
+            constant_op.constant([0]),
+            constant_op.constant([2, 1]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(
+                [0.01], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([[1.0], [2.0]], var0.eval())
+        self.assertAllClose([[3.0], [4.0]], var1.eval())
+        # Run 3 step of sgd
+        for _ in range(3):
+          ada_update.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            np.array([[-1.6026098728179932], [2.0]]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([[3.0], [3.715679168701172]]), var1.eval())
+
+  def testSparseRepeatedIndices(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        repeated_index_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        aggregated_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        grad_repeated_index = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1, 0.1], shape=[2, 1], dtype=dtype),
+            constant_op.constant([1, 1]),
+            constant_op.constant([2, 1]))
+        grad_aggregated = ops.IndexedSlices(
+            constant_op.constant(
+                [0.2], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        repeated_update = adagrad.Adagrad(3.0).apply_gradients(
+            [(grad_repeated_index, repeated_index_update_var)])
+        aggregated_update = adagrad.Adagrad(3.0).apply_gradients(
+            [(grad_aggregated, aggregated_update_var)])
+        variables.global_variables_initializer().run()
+        self.assertAllClose(aggregated_update_var.eval(),
+                            repeated_index_update_var.eval())
+        for _ in range(3):
+          repeated_update.run()
+          aggregated_update.run()
+          self.assertAllClose(aggregated_update_var.eval(),
+                              repeated_index_update_var.eval())
+
+  def testSparseRepeatedIndicesResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var_repeated = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype)
+        loss_repeated = math_ops.reduce_sum(
+            embedding_ops.embedding_lookup(var_repeated, [0, 0]))
+        var_aggregated = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype)
+        loss_aggregated = 2 * math_ops.reduce_sum(
+            embedding_ops.embedding_lookup(var_aggregated, [0]))
+        update_op_repeated = adagrad.Adagrad(2.0).minimize(loss_repeated)
+        update_op_aggregated = adagrad.Adagrad(2.0).minimize(loss_aggregated)
+        variables.global_variables_initializer().run()
+        self.assertAllCloseAccordingToType(
+            var_repeated.eval(), var_aggregated.eval())
+        for _ in range(3):
+          update_op_repeated.run()
+          update_op_aggregated.run()
+          self.assertAllCloseAccordingToType(
+              var_repeated.eval(), var_aggregated.eval())
+
+  def testSparseStability(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        shape = [1, 6]
+        var0 = variables.Variable(
+            [[
+                0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
+                -0.0105945
+            ]],
+            dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(
+                [[
+                    -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
+                    -8.4877e-05, -9.48906e-05
+                ]],
+                shape=shape,
+                dtype=dtype),
+            constant_op.constant([0]),
+            constant_op.constant(shape))
+        ada_opt = adagrad.Adagrad(1.0, initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
+        self.assertEqual(["accumulator"], ada_opt.get_slot_names())
+        slot0 = ada_opt.get_slot(var0, "accumulator")
+        init = variables.global_variables_initializer()
+        for _ in range(100):
+          init.run()
+          ada_update.run()
+          self.assertAllCloseAccordingToType(
+              np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), slot0.eval())
+          self.assertAllCloseAccordingToType(
+              np.array([[
+                  0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573,
+                  -0.01029443
+              ]]), var0.eval())
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        ada_opt = adagrad.Adagrad(3.0)
+        # Apply the optimizer twice.  Both applications will use
+        # the same accums.
+        ada_update1 = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        ada_update2 = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        self.assertEqual(["accumulator"], ada_opt.get_slot_names())
+        slot0 = ada_opt.get_slot(var0, "accumulator")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        slot1 = ada_opt.get_slot(var1, "accumulator")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values.
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Mix the first and the second adagrad for 3 steps.
+        ada_update1.run()
+        ada_update2.run()
+        ada_update1.run()
+        # Validate updated params (the same as with only 1 Adagrad).
+        self.assertAllCloseAccordingToType(
+            np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([2.715679168701172, 3.715679168701172]), var1.eval())
+
+  def testDynamicShapeVariable_Ok(self):
+    with self.cached_session():
+      v = variable_scope.get_variable("v", initializer=constant_op.constant(1.),
+                                      validate_shape=False)
+      self.assertFalse(v.shape.is_fully_defined())
+      # Creating optimizer should cause no exception.
+      adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py
new file mode 100644
index 0000000000..8367228d7a
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adam.py
@@ -0,0 +1,203 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adam optimizer for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.training import training_ops
+
+
+class Adam(optimizer_v2.OptimizerV2):
+  r"""Adam Optimizer.
+
+  Default parameters follow those provided in the original paper.
+
+  See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
+  ([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
+
+  Some of the args below are hyperparameters where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Initialization:
+
+  $$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$
+  $$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$
+  $$t := 0 \text{(Initialize timestep)}$$
+  The update rule for `variable` with gradient `g` uses an optimization
+  described at the end of section2 of the paper:
+
+  $$t := t + 1$$
+  $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
+
+  $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
+  $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
+  $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
+
+  The default value of 1e-8 for epsilon might not be a good default in
+  general. For example, when training an Inception network on ImageNet a
+  current good choice is 1.0 or 0.1. Note that since AdamOptimizer uses the
+  formulation just before Section 2.1 of the Kingma and Ba paper rather than
+  the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon
+  hat" in the paper.
+
+  The sparse implementation of this algorithm (used when the gradient is an
+  IndexedSlices object, typically because of `tf.gather` or an embedding
+  lookup in the forward pass) does apply momentum to variable slices even if
+  they were not used in the forward pass (meaning they have a gradient equal
+  to zero). Momentum decay (beta1) is also applied to the entire momentum
+  accumulator. This means that the sparse behavior is equivalent to the dense
+  behavior (in contrast to some momentum implementations which ignore momentum
+  unless a variable slice was actually used).
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate.
+      beta_1: float hyperparameter, 0 < beta_1 < 1. Generally close to 1. The
+        exponential decay rate for the 1st moment estimates.
+      beta_2: float hyperparameter, 0 < beta_2 < 1. Generally close to 1. The
+        exponential decay rate for the 2nd moment estimates.
+      epsilon: float hyperparameter >= 0. Fuzz factor. This epsilon is "epsilon
+        hat" in the Kingma and Ba paper (in the formula just before Section
+        2.1), not the epsilon in Algorithm 1 of the paper.
+      name: Optional name for the operations created when applying gradients.
+        Defaults to "Adam".
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               beta_1=0.9,
+               beta_2=0.999,
+               epsilon=1e-8,
+               name="Adam"):
+    super(Adam, self).__init__(name)
+
+    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("beta_1", beta_1)
+    self._set_hyper("beta_2", beta_2)
+    self._set_hyper("epsilon", epsilon)
+
+  def _get_beta_accumulators(self, state=None):
+    if state is None:
+      state = self._get_per_graph_state()
+    return (state.get_non_slot("beta_1_power"),
+            state.get_non_slot("beta_2_power"))
+
+  def _create_vars(self, var_list, state):
+    # Non-slot variables end up on the same device(s).
+    state.create_non_slot(
+        initial_value=lambda: state.get_hyper("beta_1"), name="beta_1_power")
+    state.create_non_slot(
+        initial_value=lambda: state.get_hyper("beta_2"), name="beta_2_power")
+
+    # Create slots for the first and second moments.
+    for v in var_list:
+      state.zeros_slot(v, "m")
+      state.zeros_slot(v, "v")
+
+  def _apply_dense(self, grad, var, state):
+    m = state.get_slot(var, "m")
+    v = state.get_slot(var, "v")
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    return training_ops.apply_adam(
+        var,
+        m,
+        v,
+        math_ops.cast(beta_1_power, var.dtype.base_dtype),
+        math_ops.cast(beta_2_power, var.dtype.base_dtype),
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("beta_1", var.dtype.base_dtype),
+        state.get_hyper("beta_2", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var, state):
+    m = state.get_slot(var, "m")
+    v = state.get_slot(var, "v")
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    return training_ops.resource_apply_adam(
+        var.handle,
+        m.handle,
+        v.handle,
+        math_ops.cast(beta_1_power, grad.dtype.base_dtype),
+        math_ops.cast(beta_2_power, grad.dtype.base_dtype),
+        state.get_hyper("learning_rate", grad.dtype.base_dtype),
+        state.get_hyper("beta_1", grad.dtype.base_dtype),
+        state.get_hyper("beta_2", grad.dtype.base_dtype),
+        state.get_hyper("epsilon", grad.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse_shared(self, grad, var, indices, scatter_add, state):
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    beta_1_power = math_ops.cast(beta_1_power, var.dtype.base_dtype)
+    beta_2_power = math_ops.cast(beta_2_power, var.dtype.base_dtype)
+    lr_t = state.get_hyper("learning_rate", var.dtype.base_dtype)
+    beta_1_t = state.get_hyper("beta_1", var.dtype.base_dtype)
+    beta_2_t = state.get_hyper("beta_2", var.dtype.base_dtype)
+    epsilon_t = state.get_hyper("epsilon", var.dtype.base_dtype)
+    lr = (lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))
+    # m_t = beta_1 * m + (1 - beta_1) * g_t
+    m = state.get_slot(var, "m")
+    m_scaled_g_values = grad * (1 - beta_1_t)
+    m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
+    with ops.control_dependencies([m_t]):
+      m_t = scatter_add(m, indices, m_scaled_g_values)
+    # v_t = beta_2 * v + (1 - beta_2) * (g_t * g_t)
+    v = state.get_slot(var, "v")
+    v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
+    v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
+    with ops.control_dependencies([v_t]):
+      v_t = scatter_add(v, indices, v_scaled_g_values)
+    v_sqrt = math_ops.sqrt(v_t)
+    var_update = state_ops.assign_sub(var,
+                                      lr * m_t / (v_sqrt + epsilon_t),
+                                      use_locking=self._use_locking)
+    return control_flow_ops.group(*[var_update, m_t, v_t])
+
+  def _apply_sparse(self, grad, var, state):
+    return self._apply_sparse_shared(
+        grad.values, var, grad.indices,
+        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
+            x, i, v, use_locking=self._use_locking),
+        state)
+
+  def _resource_scatter_add(self, x, i, v):
+    with ops.control_dependencies(
+        [resource_variable_ops.resource_scatter_add(
+            x.handle, i, v)]):
+      return x.value()
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    return self._apply_sparse_shared(
+        grad, var, indices, self._resource_scatter_add, state)
+
+  def _finish(self, state):
+    # Update the power accumulators.
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    update_beta_1 = beta_1_power.assign(
+        beta_1_power * state.get_hyper("beta_1"), use_locking=self._use_locking)
+    update_beta_2 = beta_2_power.assign(
+        beta_2_power * state.get_hyper("beta_2"), use_locking=self._use_locking)
+    return control_flow_ops.group(update_beta_1, update_beta_2)
diff --git a/tensorflow/python/keras/optimizer_v2/adam_test.py b/tensorflow/python/keras/optimizer_v2/adam_test.py
new file mode 100644
index 0000000000..77796317a1
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adam_test.py
@@ -0,0 +1,333 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Adam optimizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+def adam_update_numpy(param,
+                      g_t,
+                      t,
+                      m,
+                      v,
+                      alpha=0.001,
+                      beta1=0.9,
+                      beta2=0.999,
+                      epsilon=1e-8):
+  alpha_t = alpha * np.sqrt(1 - beta2**t) / (1 - beta1**t)
+
+  m_t = beta1 * m + (1 - beta1) * g_t
+  v_t = beta2 * v + (1 - beta2) * g_t * g_t
+
+  param_t = param - alpha_t * m_t / (np.sqrt(v_t) + epsilon)
+  return param_t, m_t, v_t
+
+
+class AdamOptimizerTest(test.TestCase):
+
+  def doTestSparse(self, use_resource=False):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+        grads0_np_indices = np.array([0, 1], dtype=np.int32)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(grads0_np),
+            constant_op.constant(grads0_np_indices), constant_op.constant([2]))
+        grads1_np_indices = np.array([0, 1], dtype=np.int32)
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(grads1_np),
+            constant_op.constant(grads1_np_indices), constant_op.constant([2]))
+        opt = adam.Adam()
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testSparse(self):
+    self.doTestSparse(use_resource=False)
+
+  def testResourceSparse(self):
+    self.doTestSparse(use_resource=True)
+
+  def testSparseDevicePlacement(self):
+    for index_dtype in [dtypes.int32, dtypes.int64]:
+      with self.test_session(force_gpu=test.is_gpu_available()):
+        # If a GPU is available, tests that all optimizer ops can be placed on
+        # it (i.e. they have GPU kernels).
+        var = variables.Variable([[1.0], [2.0]])
+        indices = constant_op.constant([0, 1], dtype=index_dtype)
+        gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices))
+        optimizer = adam.Adam(3.0)
+        minimize_op = optimizer.minimize(gathered_sum)
+        variables.global_variables_initializer().run()
+        minimize_op.run()
+
+  def testSparseRepeatedIndices(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        repeated_index_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        aggregated_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        grad_repeated_index = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1, 0.1], shape=[2, 1], dtype=dtype),
+            constant_op.constant([1, 1]),
+            constant_op.constant([2, 1]))
+        grad_aggregated = ops.IndexedSlices(
+            constant_op.constant(
+                [0.2], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        repeated_update = adam.Adam().apply_gradients(
+            [(grad_repeated_index, repeated_index_update_var)])
+        aggregated_update = adam.Adam().apply_gradients(
+            [(grad_aggregated, aggregated_update_var)])
+        variables.global_variables_initializer().run()
+        self.assertAllClose(aggregated_update_var.eval(),
+                            repeated_index_update_var.eval())
+        for _ in range(3):
+          repeated_update.run()
+          aggregated_update.run()
+          self.assertAllClose(aggregated_update_var.eval(),
+                              repeated_index_update_var.eval())
+
+  def doTestBasic(self, use_resource=False):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      with self.session(graph=ops.Graph()):
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(
+              var0_np, name="var0_%d" % i)
+          var1 = resource_variable_ops.ResourceVariable(
+              var1_np, name="var1_%d" % i)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+
+        opt = adam.Adam()
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        opt_variables = opt.variables()
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+        self.assertTrue(beta1_power is not None)
+        self.assertTrue(beta2_power is not None)
+        self.assertIn(beta1_power, opt_variables)
+        self.assertIn(beta2_power, opt_variables)
+
+        with ops.Graph().as_default():
+          # Shouldn't return non-slot variables from other graphs.
+          self.assertEqual(0, len(opt.variables()))
+
+        if not context.executing_eagerly():
+          self.evaluate(variables.global_variables_initializer())
+          # Fetch params to validate initial values
+          self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+          self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          if not context.executing_eagerly():
+            self.evaluate(update)
+          elif t > 1:
+            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+
+          self.assertAllCloseAccordingToType(0.9**(t + 1),
+                                             self.evaluate(beta1_power))
+          self.assertAllCloseAccordingToType(0.999**(t + 1),
+                                             self.evaluate(beta2_power))
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+          if use_resource:
+            self.assertEqual("var0_%d/Adam:0" % (i,),
+                             opt.get_slot(var=var0, name="m").name)
+
+  def testBasic(self):
+    with self.cached_session():
+      self.doTestBasic(use_resource=False)
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = adam.Adam(constant_op.constant(0.001))
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = adam.Adam()
+        update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        # Run 3 steps of intertwined Adam1 and Adam2.
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          if t % 2 == 0:
+            update1.run()
+          else:
+            update2.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testTwoSessions(self):
+    optimizer = adam.Adam()
+    g = ops.Graph()
+    with g.as_default():
+      with session.Session():
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+        optimizer.apply_gradients([(grads0, var0)])
+
+    gg = ops.Graph()
+    with gg.as_default():
+      with session.Session():
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+
+        # If the optimizer saves any state not keyed by graph the following line
+        # fails.
+        optimizer.apply_gradients([(grads0, var0)])
+
+  def testSlotsUniqueEager(self):
+    with context.eager_mode():
+      v1 = resource_variable_ops.ResourceVariable(1.)
+      v2 = resource_variable_ops.ResourceVariable(1.)
+      opt = adam.Adam(1.)
+      opt.minimize(lambda: v1 + v2)
+      # There should be two non-slot variables, and two unique slot variables
+      # for v1 and v2 respectively.
+      self.assertEqual(6, len(set(opt.variables())))
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
new file mode 100644
index 0000000000..338c04148b
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
@@ -0,0 +1,761 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# TODO(josh11b): Forked from contrib/eager/python to test OptimizerV2 the same way
+# OptimizerV1 is tested. This file should be removed once the fork is resolved.
+
+import functools
+import os
+
+import six
+
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import template
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as core_saver
+from tensorflow.python.training import training_util
+from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.checkpointable import util
+
+
+class NonLayerCheckpointable(tracking.Checkpointable):
+
+  def __init__(self):
+    super(NonLayerCheckpointable, self).__init__()
+    self.a_variable = util.add_variable(
+        self, name="a_variable", shape=[])
+
+
+# pylint: disable=not-callable
+class MyModel(training.Model):
+  """A concrete Model for testing."""
+
+  def __init__(self):
+    super(MyModel, self).__init__()
+    self._named_dense = core.Dense(1, use_bias=True)
+    self._second = core.Dense(1, use_bias=False)
+    # We can still track Checkpointables which aren't Layers.
+    self._non_layer = NonLayerCheckpointable()
+
+  def call(self, values):
+    ret = self._second(self._named_dense(values))
+    return ret
+
+
+class _MirroringSaveable(
+    core_saver.BaseSaverBuilder.ResourceVariableSaveable):
+
+  def __init__(self, primary_variable, mirrored_variable, name):
+    self._primary_variable = primary_variable
+    self._mirrored_variable = mirrored_variable
+    super(_MirroringSaveable, self).__init__(
+        self._primary_variable, "", name)
+
+  def restore(self, restored_tensors, restored_shapes):
+    """Restore the same value into both variables."""
+    tensor, = restored_tensors
+    return control_flow_ops.group(
+        self._primary_variable.assign(tensor),
+        self._mirrored_variable.assign(tensor))
+
+
+class CheckpointingTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNamingWithOptimizer(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    # A nuisance Model using the same optimizer. Its slot variables should not
+    # go in the checkpoint, since it is never depended on.
+    other_model = MyModel()
+    optimizer = adam.Adam(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_checkpointable = util.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value),
+          global_step=optimizer_step)
+      optimizer.minimize(
+          lambda: other_model(input_value),
+          global_step=optimizer_step)
+    else:
+      train_op = optimizer.minimize(
+          model(input_value), global_step=optimizer_step)
+      optimizer.minimize(
+          other_model(input_value),
+          global_step=optimizer_step)
+      self.evaluate(util.gather_initializers(
+          root_checkpointable))
+      self.evaluate(train_op)
+    named_variables, serialized_graph, _ = (
+        util._serialize_object_graph(
+            root_checkpointable, saveables_cache=None))
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "optimizer_step",
+        "model/_second/kernel",
+        "model/_named_dense/kernel",
+        "model/_named_dense/bias",
+        # non-Layer dependency of the model
+        "model/_non_layer/a_variable",
+        # The optimizer creates two non-slot variables
+        "optimizer/beta_1_power",
+        "optimizer/beta_2_power",
+        # Slot variables
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
+    )
+    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
+    expected_checkpoint_names = [
+        name + suffix for name in expected_checkpoint_names]
+    # The Dense layers also save get_config() JSON
+    expected_checkpoint_names.extend(
+        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
+    named_variables = {v.name: v for v in named_variables}
+    six.assertCountEqual(self, expected_checkpoint_names,
+                         named_variables.keys())
+    # Check that we've mapped to the right variable objects (not exhaustive)
+    self.assertEqual(
+        "global_step",
+        named_variables["optimizer_step" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense_1/kernel",
+        named_variables["model/_second/kernel" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        named_variables["model/_named_dense/kernel" + suffix].full_name)
+    self.assertEqual(
+        "beta_1_power",
+        named_variables["optimizer/beta_1_power" + suffix].full_name)
+    self.assertEqual(
+        "beta_2_power",
+        named_variables["optimizer/beta_2_power" + suffix].full_name)
+    # Spot check the generated protocol buffers.
+    self.assertEqual("optimizer",
+                     serialized_graph.nodes[0].children[1].local_name)
+    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
+        1].node_id]
+    self.assertEqual("beta_1_power", optimizer_node.children[0].local_name)
+    self.assertEqual(
+        "beta_1_power", serialized_graph.nodes[
+            optimizer_node.children[0].node_id].attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .original_variable_node_id]
+        .attributes[0].full_name)
+    # We strip off the :0 suffix, as variable.name-based saving does.
+    self.assertEqual(
+        "my_model/dense/kernel/Adam",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .slot_variable_node_id]
+        .attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel/Adam:0",
+        optimizer.get_slot(
+            var=model._named_dense.kernel,
+            name="m").name)
+    self.assertEqual(
+        "model/_named_dense/kernel" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .original_variable_node_id].attributes[0].checkpoint_key)
+    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
+    self.assertEqual(
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .slot_variable_node_id].attributes[0].checkpoint_key)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testSaveRestore(self):
+    model = MyModel()
+    optimizer = adam.Adam(0.001)
+    root_checkpointable = util.Checkpoint(
+        optimizer=optimizer, model=model)
+    input_value = constant_op.constant([[3.]])
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value))
+    else:
+      train_op = optimizer.minimize(model(input_value))
+      # TODO(allenl): Make initialization more pleasant when graph building.
+      root_checkpointable.save_counter  # pylint: disable=pointless-statement
+      self.evaluate(util.gather_initializers(
+          root_checkpointable))
+      self.evaluate(train_op)
+    prefix = os.path.join(self.get_temp_dir(), "ckpt")
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
+    m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
+    self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
+    save_path = root_checkpointable.save(file_prefix=prefix)
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
+    self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
+    optimizer_variables = self.evaluate(optimizer.variables())
+    self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
+    # Immediate restoration
+    status = root_checkpointable.restore(save_path=save_path).assert_consumed()
+    status.run_restore_ops()
+    self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
+    self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
+    self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
+    if not context.executing_eagerly():
+      return  # Restore-on-create is only supported when executing eagerly
+    on_create_model = MyModel()
+    on_create_optimizer = adam.Adam(
+        0.001,
+        # Preserve beta_1_power and beta_2_power when appying gradients
+        # so we can test that they've been restored correctly.
+        beta_1=1.0,
+        beta_2=1.0)
+    on_create_root = util.Checkpoint(
+        optimizer=on_create_optimizer, model=on_create_model)
+    # Deferred restoration
+    status = on_create_root.restore(save_path=save_path)
+    on_create_model(constant_op.constant([[3.]]))  # create variables
+    self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
+    self.assertAllEqual([42.],
+                        self.evaluate(
+                            on_create_model._named_dense.variables[1]))
+    on_create_m_bias_slot = on_create_optimizer.get_slot(
+        on_create_model._named_dense.variables[1], "m")
+    # Optimizer slot variables are created when the original variable is
+    # restored.
+    self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
+    self.assertAllEqual(optimizer_variables[2:],
+                        self.evaluate(on_create_optimizer.variables()))
+    dummy_var = resource_variable_ops.ResourceVariable([1.])
+    on_create_optimizer.minimize(loss=dummy_var.read_value)
+    status.assert_consumed()
+    beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators()
+    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta_1_power))
+    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta_2_power))
+
+  # TODO(allenl): Debug garbage created by this test in python3.
+  def testDeferredRestorationUsageEager(self):
+    """An idiomatic eager execution example."""
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      model = MyModel()
+      optimizer = adam.Adam(0.001)
+      root = util.Checkpoint(
+          optimizer=optimizer, model=model,
+          optimizer_step=training_util.get_or_create_global_step())
+      root.restore(checkpoint_management.latest_checkpoint(
+          checkpoint_directory))
+      for _ in range(num_training_steps):
+        # TODO(allenl): Use a Dataset and serialize/checkpoint it.
+        input_value = constant_op.constant([[3.]])
+        optimizer.minimize(
+            lambda: model(input_value),  # pylint: disable=cell-var-from-loop
+            global_step=root.optimizer_step)
+      root.save(file_prefix=checkpoint_prefix)
+      self.assertEqual((training_continuation + 1) * num_training_steps,
+                       root.optimizer_step.numpy())
+
+  def testUsageGraph(self):
+    """Expected usage when graph building."""
+    with context.graph_mode():
+      num_training_steps = 10
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      for training_continuation in range(3):
+        with ops.Graph().as_default():
+          model = MyModel()
+          optimizer = adam.Adam(0.001)
+          root = util.Checkpoint(
+              optimizer=optimizer, model=model,
+              global_step=training_util.get_or_create_global_step())
+          input_value = constant_op.constant([[3.]])
+          train_op = optimizer.minimize(
+              model(input_value),
+              global_step=root.global_step)
+          checkpoint_path = checkpoint_management.latest_checkpoint(
+              checkpoint_directory)
+          with self.session(graph=ops.get_default_graph()) as session:
+            status = root.restore(save_path=checkpoint_path)
+            status.initialize_or_restore(session=session)
+            if checkpoint_path is None:
+              self.assertEqual(0, training_continuation)
+              with self.assertRaises(AssertionError):
+                status.assert_consumed()
+            else:
+              status.assert_consumed()
+            for _ in range(num_training_steps):
+              session.run(train_op)
+            root.save(file_prefix=checkpoint_prefix, session=session)
+            self.assertEqual((training_continuation + 1) * num_training_steps,
+                             session.run(root.global_step))
+            self.assertEqual(training_continuation + 1,
+                             session.run(root.save_counter))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testAgnosticUsage(self):
+    """Graph/eager agnostic usage."""
+    # Does create garbage when executing eagerly due to ops.Graph() creation.
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      with ops.Graph().as_default(), self.test_session(
+          graph=ops.get_default_graph()), test_util.device(use_gpu=True):
+        model = MyModel()
+        optimizer = adam.Adam(0.001)
+        root = util.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            checkpoint_directory)
+        status = root.restore(save_path=checkpoint_path)
+        input_value = constant_op.constant([[3.]])
+        train_fn = functools.partial(
+            optimizer.minimize,
+            functools.partial(model, input_value),
+            global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+
+  # pylint: disable=cell-var-from-loop
+  @test_util.run_in_graph_and_eager_modes
+  def testWithDefun(self):
+    num_training_steps = 2
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      with ops.Graph().as_default(), self.test_session(
+          graph=ops.get_default_graph()), test_util.device(use_gpu=True):
+        model = MyModel()
+        # Don't actually train so we can test variable values
+        optimizer = adam.Adam(0.)
+        root = util.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            checkpoint_directory)
+        status = root.restore(save_path=checkpoint_path)
+        def train_fn():
+          @function.defun
+          def _call_model(x):
+            return model(x)
+          with backprop.GradientTape() as tape:
+            loss = _call_model(constant_op.constant([[3.]]))
+          gradients = tape.gradient(loss, model.variables)
+          return optimizer.apply_gradients(zip(gradients, model.variables),
+                                           global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(
+              self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        if training_continuation > 0:
+          status.assert_consumed()
+          self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
+        else:
+          self.evaluate(model.variables[0].assign([[42.]]))
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+  # pylint: enable=cell-var-from-loop
+
+  def testAnonymousVarsInInit(self):
+
+    class Model(training.Model):
+
+      def __init__(self):
+        super(Model, self).__init__()
+        self.w = resource_variable_ops.ResourceVariable(0.0)
+        self.b = resource_variable_ops.ResourceVariable(0.0)
+        self.vars = [self.w, self.b]
+
+      def call(self, x):
+        return x * self.w + self.b
+
+    with context.eager_mode():
+      model = Model()
+      optimizer = adam.Adam(learning_rate=0.05)
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      checkpoint = util.Checkpoint(
+          model=model, optimizer=optimizer)
+      for _ in range(2):
+        checkpoint.save(checkpoint_prefix)
+        with backprop.GradientTape() as tape:
+          loss = (constant_op.constant(1.)
+                  - model(constant_op.constant(1.))) ** 2
+        grad = tape.gradient(loss, model.vars)
+        optimizer.apply_gradients(
+            [(g, v) for g, v in zip(grad, model.vars)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeferredSlotRestoration(self):
+    checkpoint_directory = self.get_temp_dir()
+
+    root = tracking.Checkpointable()
+    root.var = util.add_variable(
+        root, name="var", initializer=0.)
+    optimizer = adam.Adam(0.1)
+    if context.executing_eagerly():
+      optimizer.minimize(root.var.read_value)
+    else:
+      train_op = optimizer.minimize(root.var)
+      # Note that `optimizer` has not been added as a dependency of
+      # `root`. Create a one-off grouping so that slot variables for `root.var`
+      # get initialized too.
+      self.evaluate(util.gather_initializers(
+          util.Checkpoint(root=root, optimizer=optimizer)))
+      self.evaluate(train_op)
+    self.evaluate(state_ops.assign(root.var, 12.))
+    no_slots_path = util.CheckpointableSaver(root).save(
+        os.path.join(checkpoint_directory, "no_slots"))
+    root.optimizer = optimizer
+    self.evaluate(state_ops.assign(root.var, 13.))
+    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
+                                   14.))
+    slots_path = util.CheckpointableSaver(root).save(
+        os.path.join(checkpoint_directory, "with_slots"))
+    new_root = tracking.Checkpointable()
+    # Load the slot-containing checkpoint (deferred), then immediately overwrite
+    # the non-slot variable (also deferred).
+    slot_status = util.CheckpointableSaver(
+        new_root).restore(slots_path)
+    no_slot_status = util.CheckpointableSaver(
+        new_root).restore(no_slots_path)
+    with self.assertRaises(AssertionError):
+      no_slot_status.assert_consumed()
+    new_root.var = util.add_variable(
+        new_root, name="var", shape=[])
+    no_slot_status.assert_consumed()
+    no_slot_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    new_root.optimizer = adam.Adam(0.1)
+    with self.assertRaisesRegexp(AssertionError, "beta_1_power"):
+      slot_status.assert_consumed()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    if context.executing_eagerly():
+      # Slot variables are only created with restoring initializers when
+      # executing eagerly.
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+    else:
+      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
+                    None)
+    if context.executing_eagerly():
+      new_root.optimizer.minimize(new_root.var.read_value)
+    else:
+      train_op = new_root.optimizer.minimize(new_root.var)
+      # The slot variable now exists; restore() didn't create it, but we should
+      # now have a restore op for it.
+      slot_status.run_restore_ops()
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+      self.evaluate(train_op)
+    slot_status.assert_consumed()
+
+  def testManySavesGraph(self):
+    """Saves after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = tracking.Checkpointable()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.Adam(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(util.gather_initializers(obj))
+        saver = util.CheckpointableSaver(obj)
+        saver.save(checkpoint_prefix)
+        before_ops = graph.get_operations()
+        saver.save(checkpoint_prefix)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testManyRestoresGraph(self):
+    """Restores after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = tracking.Checkpointable()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.Adam(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(util.gather_initializers(obj))
+        saver = util.CheckpointableSaver(obj)
+        save_path = saver.save(checkpoint_prefix)
+        saver.restore(save_path)
+        before_ops = graph.get_operations()
+        saver.restore(save_path)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testMultipleGraphsNonSlotVariables(self):
+    with context.graph_mode():
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      optimizer = adam.Adam(0.001)
+      # Construct a model in one graph
+      first_graph = ops.Graph()
+      first_session = session_lib.Session(graph=first_graph)
+      with first_graph.as_default(), first_session.as_default():
+        first_variable = resource_variable_ops.ResourceVariable([1.])
+        first_root_checkpointable = util.Checkpoint(
+            optimizer=optimizer, variable=first_variable)
+        train_op = optimizer.minimize(first_variable.read_value)
+        self.evaluate(util.gather_initializers(
+            first_root_checkpointable))
+        self.evaluate(train_op)
+        self.evaluate(first_variable.assign([1.]))
+        self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m").assign([2.]))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(3.))
+
+      # Save and load in a second graph
+      second_graph = ops.Graph()
+      with second_graph.as_default(), session_lib.Session(graph=second_graph):
+        second_variable = resource_variable_ops.ResourceVariable([1.])
+        second_root_checkpointable = util.Checkpoint(
+            optimizer=optimizer, variable=second_variable)
+        train_op = optimizer.minimize(second_variable.read_value)
+        second_root_checkpointable.restore(None).initialize_or_restore()
+        self.evaluate(train_op)
+        self.evaluate(second_variable.assign([4.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([5.]))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(6.))
+        save_path = second_root_checkpointable.save(checkpoint_prefix)
+        self.evaluate(second_variable.assign([7.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([8.]))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
+        status = second_root_checkpointable.restore(save_path)
+        status.assert_consumed().run_restore_ops()
+        self.assertAllEqual([4.], self.evaluate(second_variable))
+        self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m")))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
+
+      # Check that the first graph is unmolested
+      with first_graph.as_default(), first_session.as_default():
+        self.assertAllEqual([1.], self.evaluate(first_variable))
+        self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m")))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(3., self.evaluate(beta_1_power))
+
+
+class TemplateTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_checkpointable_save_restore(self):
+
+    def _templated():
+      v = variable_scope.get_variable(
+          "v", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      v2 = variable_scope.get_variable(
+          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      return v, v + 1., v2
+
+    save_template = template.make_template("s1", _templated)
+    v1_save, _, v2_save = save_template()
+    optimizer = adam.Adam(0.0)
+    save_root = util.Checkpoint(
+        my_template=save_template, optimizer=optimizer)
+    optimizer.minimize(v1_save.read_value)
+    self.evaluate([v.initializer for v in optimizer.variables()])
+    self.evaluate(v1_save.assign([12.]))
+    self.evaluate(v2_save.assign([14.]))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = save_root.save(checkpoint_prefix)
+
+    load_template = template.make_template("s2", _templated)
+    load_optimizer = adam.Adam(0.0)
+    load_root = util.Checkpoint(
+        my_template=load_template, optimizer=load_optimizer)
+    status = load_root.restore(save_path)
+    var, var_plus_one, var2 = load_template()
+    load_optimizer.minimize(var.read_value)
+    self.assertEqual(2, len(load_template._checkpoint_dependencies))
+    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
+    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
+    status.assert_consumed().run_restore_ops()
+    self.assertAllEqual([12.], self.evaluate(var))
+    self.assertAllEqual([13.], self.evaluate(var_plus_one))
+    self.assertAllEqual([14.], self.evaluate(var2))
+
+
+class CheckpointCompatibilityTests(test.TestCase):
+
+  def _initialized_model(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    optimizer = adam.Adam(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_checkpointable = util.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    train_op = optimizer.minimize(
+        functools.partial(model, input_value),
+        global_step=optimizer_step)
+    self.evaluate(util.gather_initializers(
+        root_checkpointable))
+    self.evaluate(train_op)
+    # A regular variable, a slot variable, and a non-slot Optimizer variable
+    # with known values to check when loading.
+    self.evaluate(model._named_dense.bias.assign([1.]))
+    self.evaluate(optimizer.get_slot(
+        var=model._named_dense.bias, name="m").assign([2.]))
+    beta_1_power, _ = optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(3.))
+    return root_checkpointable
+
+  def _set_sentinels(self, root_checkpointable):
+    self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
+    self.evaluate(
+        root_checkpointable.optimizer.get_slot(
+            var=root_checkpointable.model._named_dense.bias, name="m")
+        .assign([102.]))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(103.))
+
+  def _check_sentinels(self, root_checkpointable):
+    self.assertAllEqual(
+        [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
+    self.assertAllEqual([2.], self.evaluate(
+        root_checkpointable.optimizer.get_slot(
+            var=root_checkpointable.model._named_dense.bias, name="m")))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.assertAllEqual(3., self.evaluate(beta_1_power))
+
+  def _write_name_based_checkpoint(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.test_session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        name_saver = core_saver.Saver()
+        return name_saver.save(
+            sess=session, save_path=checkpoint_prefix,
+            global_step=root.optimizer_step)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLoadFromNameBasedSaver(self):
+    """Save a name-based checkpoint, load it using the object-based API."""
+    with test_util.device(use_gpu=True):
+      save_path = self._write_name_based_checkpoint()
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      with self.assertRaises(AssertionError):
+        self._check_sentinels(root)
+      object_saver = util.CheckpointableSaver(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      if context.executing_eagerly():
+        self._check_sentinels(root)
+      if context.executing_eagerly():
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_consumed()
+      else:
+        # When graph building, we haven't read any keys, so we don't know
+        # whether the restore will be complete.
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_consumed()
+      status.run_restore_ops()
+      self._check_sentinels(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      status.initialize_or_restore()
+      self._check_sentinels(root)
+
+  # TODO(allenl): Test for the core name-based saver loading object-based
+  # checkpoints once object-based checkpointing is in core.
+
+  def testSaveGraphLoadEager(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.test_session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        save_path = root.save(
+            session=session, file_prefix=checkpoint_prefix)
+    with context.eager_mode():
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      root.restore(save_path).assert_consumed()
+      self._check_sentinels(root)
+
+  def testSaveEagerLoadGraph(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.eager_mode():
+      root = self._initialized_model()
+      save_path = root.save(file_prefix=checkpoint_prefix)
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.test_session(
+          graph=save_graph):
+        root = self._initialized_model()
+        self._set_sentinels(root)
+        root.restore(save_path).assert_consumed().run_restore_ops()
+        self._check_sentinels(root)
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
new file mode 100644
index 0000000000..bd5557f4fd
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -0,0 +1,1349 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Version 2 of class Optimizer."""
+# pylint: disable=g-bad-name
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import distribute as distribute_lib
+from tensorflow.python.training import distribution_strategy_context
+from tensorflow.python.training import optimizer as optimizer_v1
+from tensorflow.python.training import slot_creator
+from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.util import nest
+
+
+class _OptimizableVariable(object):
+  """Interface for abstracting over variables in the optimizers."""
+
+  @abc.abstractmethod
+  def target(self):
+    """Returns the optimization target for this variable."""
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def update_op(self, optimizer, g, *args):
+    """Returns the update ops for updating the variable."""
+    raise NotImplementedError("Calling an abstract method.")
+
+
+class _RefVariableProcessor(_OptimizableVariable):
+  """Processor for Variable."""
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v._ref()  # pylint: disable=protected-access
+
+  def update_op(self, optimizer, g, *args):
+    if isinstance(g, ops.Tensor):
+      update_op = optimizer._apply_dense(g, self._v, *args)  # pylint: disable=protected-access
+      if self._v.constraint is not None:
+        with ops.control_dependencies([update_op]):
+          return self._v.assign(self._v.constraint(self._v))
+      else:
+        return update_op
+    else:
+      assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
+                                                "tensor nor IndexedSlices.")
+      if self._v.constraint is not None:
+        raise RuntimeError(
+            "Cannot use a constraint function on a sparse variable.")
+      # pylint: disable=protected-access
+      return optimizer._apply_sparse_duplicate_indices(g, self._v, *args)
+
+
+class _DenseReadResourceVariableProcessor(_OptimizableVariable):
+  """Processor for dense ResourceVariables."""
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v
+
+  def update_op(self, optimizer, g, *args):
+    # pylint: disable=protected-access
+    update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0], *args)
+    if self._v.constraint is not None:
+      with ops.control_dependencies([update_op]):
+        return self._v.assign(self._v.constraint(self._v))
+    else:
+      return update_op
+
+
+class _DenseResourceVariableProcessor(_OptimizableVariable):
+  """Processor for dense ResourceVariables."""
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v
+
+  def update_op(self, optimizer, g, *args):
+    # pylint: disable=protected-access
+    if isinstance(g, ops.IndexedSlices):
+      if self._v.constraint is not None:
+        raise RuntimeError(
+            "Cannot use a constraint function on a sparse variable.")
+      return optimizer._resource_apply_sparse_duplicate_indices(
+          g.values, self._v, g.indices, *args)
+    update_op = optimizer._resource_apply_dense(g, self._v, *args)
+    if self._v.constraint is not None:
+      with ops.control_dependencies([update_op]):
+        return self._v.assign(self._v.constraint(self._v))
+    else:
+      return update_op
+
+
+class _TensorProcessor(_OptimizableVariable):
+  """Processor for ordinary Tensors.
+
+  Even though a Tensor can't really be updated, sometimes it is useful to
+  compute the gradients with respect to a Tensor using the optimizer. Updating
+  the Tensor is, of course, unsupported.
+  """
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v
+
+  def update_op(self, optimizer, g, *args):
+    raise NotImplementedError("Trying to update a Tensor ", self._v)
+
+
+def _get_processor(v):
+  """The processor of v."""
+  if context.executing_eagerly():
+    if isinstance(v, ops.Tensor):
+      return _TensorProcessor(v)
+    else:
+      return _DenseResourceVariableProcessor(v)
+  if v.op.type == "VarHandleOp":
+    return _DenseResourceVariableProcessor(v)
+  if isinstance(v, variables.Variable):
+    return _RefVariableProcessor(v)
+  if isinstance(v, ops.Tensor):
+    return _TensorProcessor(v)
+  raise NotImplementedError("Trying to optimize unsupported type ", v)
+
+
+def _var_key_v2(var):
+  """Key for representing a primary variable, for looking up slots."""
+  # pylint: disable=protected-access
+  if hasattr(var, "_distributed_container"):
+    distributed_container = var._distributed_container()
+    assert distributed_container is not None
+    if context.executing_eagerly():
+      return distributed_container._unique_id
+    return distributed_container._shared_name
+  if context.executing_eagerly():
+    return var._unique_id
+  return var.op.name
+
+
+def _resolve(value, name):
+  if callable(value):
+    value = value()
+  return ops.convert_to_tensor(value, name=name)
+
+
+def _is_dynamic(value):
+  """Returns true if __init__ arg `value` should be re-evaluated each step."""
+  if callable(value): return True
+  # Don't need to do anything special in graph mode, since dynamic values
+  # will propagate correctly automatically.
+  # TODO(josh11b): Add per-device caching across steps using variables for
+  # truly static values once we add distributed support.
+  if context.executing_eagerly() and isinstance(
+      value, resource_variable_ops.ResourceVariable):
+    return True
+  return False
+
+
+class _OptimizerV2State(object):
+  """Holds per-graph and per-step optimizer state.
+
+  Use _init_with_static_hyper() to create the state for a graph, and then
+  _copy_with_dynamic_hyper() to convert that to state for a particular step.
+  The difference between the two is that the former only has hyper
+  parameter values that are static and the latter also has values that
+  can change every step (according to _is_dynamic()).
+  """
+
+  def __init__(self, op_name):
+    self._op_name = op_name
+
+  def _init_with_static_hyper(self, hyper):
+    """Initialize a fresh state object from hyper dict."""
+    # self._hyper contains a dict from name to a dict with the Tensor values.
+    # This dict starts with a single item with key "None" with the hyper
+    # parameter value converted to a Tensor. Other items have dtype keys
+    # with that Tensor cast to that dtype.
+    with ops.init_scope():
+      self._hyper = {name: {None: ops.convert_to_tensor(value, name=name)}
+                     for name, (dynamic, value) in sorted(hyper.items())
+                     if not dynamic}
+    self._slots = {}
+    self._non_slot_dict = {}
+    # Extra state to help Optimizers implement Checkpointable. Holds information
+    # about variables which will be restored as soon as they're created.
+    self._deferred_dependencies = {}  # Non-slot variables
+    self._deferred_slot_restorations = {}  # Slot variables
+
+  def _copy_with_dynamic_hyper(self, hyper, distribution, non_slot_devices):
+    """Create a new state object for a particular step."""
+    ret = _OptimizerV2State(self._op_name)
+    # pylint: disable=protected-access
+    ret._slots = self._slots
+    ret._non_slot_dict = self._non_slot_dict
+    ret._deferred_dependencies = self._deferred_dependencies
+    ret._deferred_slot_restorations = self._deferred_slot_restorations
+    ret._hyper = {name: {None: _resolve(value, name)}
+                  for name, (dynamic, value) in sorted(hyper.items())
+                  if dynamic}
+    ret._hyper.update(self._hyper)
+    ret._non_slot_devices = non_slot_devices
+    ret._distribution = distribution
+    return ret
+
+  def _variables(self):
+    """Returns a list of all variables held by self."""
+    optimizer_variables = list(self._non_slot_dict.values())
+    for variable_dict in self._slots.values():
+      for slot_for_variable in variable_dict.values():
+        optimizer_variables.append(slot_for_variable)
+    # Sort variables by name so that the return is deterministic.
+    return sorted(optimizer_variables, key=lambda v: v.name)
+
+  def _slot_dict(self, slot_name):
+    """Returns a dict for caching slots created under the given name.
+
+    Args:
+      slot_name: Name for the slot.
+
+    Returns:
+      A dict that maps primary `Variable` objects to the slot created
+      for that variable, under the given slot name.
+    """
+    named_slots = self._slots.get(slot_name, None)
+    if named_slots is None:
+      named_slots = {}
+      self._slots[slot_name] = named_slots
+    return named_slots
+
+  def create_slot(self, var, val, slot_name, optional_op_name=None):
+    """Find or create a slot for a variable.
+
+    Args:
+      var: A `Variable` object.
+      val: A `Tensor`.  The initial value of the slot.
+      slot_name: Name for the slot.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+
+    Returns:
+      A `Variable` object.
+    """
+    named_slots = self._slot_dict(slot_name)
+    var_key = _var_key_v2(var)
+    if var_key not in named_slots:
+      new_slot_variable = slot_creator.create_slot(
+          var, val, optional_op_name or self._op_name)
+      self._restore_slot_variable(
+          slot_name=slot_name, variable=var,
+          slot_variable=new_slot_variable)
+      named_slots[var_key] = new_slot_variable
+    return named_slots[var_key]
+
+  def create_slot_with_initializer(self, var, initializer, shape, dtype,
+                                   slot_name, optional_op_name=None):
+    """Find or create a slot for a variable, using an Initializer.
+
+    Args:
+      var: A `Variable` object.
+      initializer: An `Initializer`.  The initial value of the slot.
+      shape: Shape of the initial value of the slot.
+      dtype: Type of the value of the slot.
+      slot_name: Name for the slot.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+
+    Returns:
+      A `Variable` object.
+    """
+    named_slots = self._slot_dict(slot_name)
+    var_key = _var_key_v2(var)
+    if var_key not in named_slots:
+      new_slot_variable = slot_creator.create_slot_with_initializer(
+          var, initializer, shape, dtype, optional_op_name or self._op_name)
+      self._restore_slot_variable(
+          slot_name=slot_name, variable=var,
+          slot_variable=new_slot_variable)
+      named_slots[var_key] = new_slot_variable
+    return named_slots[var_key]
+
+  def zeros_slot(self, var, slot_name, optional_op_name=None):
+    """Find or create a slot initialized with 0.0.
+
+    Args:
+      var: A `Variable` object.
+      slot_name: Name for the slot.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+
+    Returns:
+      A `Variable` object.
+    """
+    named_slots = self._slot_dict(slot_name)
+    var_key = _var_key_v2(var)
+    if var_key not in named_slots:
+      new_slot_variable = slot_creator.create_zeros_slot(
+          var, optional_op_name or self._op_name)
+      self._restore_slot_variable(
+          slot_name=slot_name, variable=var,
+          slot_variable=new_slot_variable)
+      named_slots[var_key] = new_slot_variable
+    return named_slots[var_key]
+
+  def _create_or_restore_slot_variable(
+      self, slot_variable_position, slot_name, variable,
+      optional_op_name=None):
+    """Restore a slot variable's value, possibly creating it.
+
+    Called when a variable which has an associated slot variable is created or
+    restored. When executing eagerly, we create the slot variable with a
+    restoring initializer.
+
+    No new variables are created when graph building. Instead,
+    _restore_slot_variable catches these after normal creation and adds restore
+    ops to the graph. This method is nonetheless important when graph building
+    for the case when a slot variable has already been created but `variable`
+    has just been added to a dependency graph (causing us to realize that the
+    slot variable needs to be restored).
+
+    Args:
+      slot_variable_position: A `checkpointable._CheckpointPosition` object
+        indicating the slot variable `Checkpointable` object to be restored.
+      slot_name: The name of this `Optimizer`'s slot to restore into.
+      variable: The variable object this slot is being created for.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+    """
+    slot_variable = self.get_slot(var=variable, name=slot_name)
+    if (slot_variable is None and context.executing_eagerly() and
+        slot_variable_position.is_simple_variable()
+        # Defer slot variable creation if there is an active variable creator
+        # scope. Generally we'd like to eagerly create/restore slot variables
+        # when possible, but this may mean that scopes intended to catch
+        # `variable` also catch its eagerly created slot variable
+        # unintentionally (specifically make_template would add a dependency on
+        # a slot variable if not for this case). Deferring is mostly harmless
+        # (aside from double initialization), and makes variable creator scopes
+        # behave the same way they do when graph building.
+        and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
+      initializer = checkpointable.CheckpointInitialValue(
+          checkpoint_position=slot_variable_position)
+      slot_variable = self.create_slot(
+          var=variable,
+          val=initializer,
+          slot_name=slot_name,
+          optional_op_name=optional_op_name)
+      # Optimizers do not have unconditional dependencies on their slot
+      # variables (nor do any other objects). They are only saved if the
+      # variables they were created for are also saved.
+    if slot_variable is not None:
+      # If we've either made this slot variable, or if we've pulled out an
+      # existing slot variable, we should restore it.
+      slot_variable_position.restore(slot_variable)
+    else:
+      # We didn't make the slot variable. Defer restoring until it gets created
+      # normally. We keep a list rather than the one with the highest restore
+      # UID in case slot variables have their own dependencies, in which case
+      # those could differ between restores.
+      variable_key = _var_key_v2(variable)
+      self._deferred_slot_restorations.setdefault(
+          slot_name, {}).setdefault(variable_key, []).append(
+              slot_variable_position)
+
+  def get_slot(self, var, name):
+    """Return a slot named `name` created for `var` by the Optimizer.
+
+    Some `Optimizer` subclasses use additional variables.  For example
+    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
+    gives access to these `Variable` objects if for some reason you need them.
+
+    Use `get_slot_names()` to get the list of slot names created by the
+    `Optimizer`.
+
+    Args:
+      var: A variable passed to `minimize()` or `apply_gradients()`.
+      name: A string.
+
+    Returns:
+      The `Variable` for the slot if it was created, `None` otherwise.
+    """
+    named_slots = self._slots.get(name, None)
+    if not named_slots:
+      return None
+    return named_slots.get(_var_key_v2(var), None)
+
+  def get_slot_names(self):
+    """Return a list of the names of slots created by the `Optimizer`.
+
+    See `get_slot()`.
+
+    Returns:
+      A list of strings.
+    """
+    return sorted(self._slots.keys())
+
+  def create_non_slot(self, initial_value, name, colocate_with=None):
+    """Add an extra variable, not associated with a slot."""
+    v = self._non_slot_dict.get(name, None)
+    if v is None:
+      if colocate_with is None: colocate_with = self._non_slot_devices
+      with self._distribution.colocate_vars_with(colocate_with):
+        # TODO(josh11b): Use get_variable() except for the legacy Adam use case.
+        v = variable_scope.variable(initial_value, name=name, trainable=False)
+      self._non_slot_dict[name] = v
+      deferred_dependencies_list = self._deferred_dependencies.pop(name, ())
+      for checkpoint_position in sorted(
+          deferred_dependencies_list,
+          key=lambda restore: restore.checkpoint.restore_uid,
+          reverse=True):
+        checkpoint_position.restore(v)
+    return v
+
+  def _restore_slot_variable(self, slot_name, variable, slot_variable):
+    """Restore a newly created slot variable's value."""
+    variable_key = _var_key_v2(variable)
+    deferred_restorations = self._deferred_slot_restorations.get(
+        slot_name, {}).pop(variable_key, [])
+    # Iterate over restores, highest restore UID first to minimize the number
+    # of assignments.
+    deferred_restorations.sort(key=lambda position: position.restore_uid,
+                               reverse=True)
+    for checkpoint_position in deferred_restorations:
+      checkpoint_position.restore(slot_variable)
+
+  def get_non_slot(self, name):
+    """Returns the non-slot variable identified by `name`."""
+    return self._non_slot_dict.get(name, None)
+
+  def get_hyper(self, name, dtype=None):
+    """Returns the `name` hyper parameter, optionally cast to `dtype`."""
+    dtype_dict = self._hyper[name]
+    # Do we have the value cast to dtype already cached? This should always
+    # succeed when dtype is None.
+    if dtype in dtype_dict:
+      return dtype_dict[dtype]
+    # Not cached, cast to dtype and save the result in the cache.
+    result = math_ops.cast(dtype_dict[None], dtype)
+    dtype_dict[dtype] = result
+    return result
+
+
+class OptimizerV2(optimizer_v1.Optimizer):
+  """Updated base class for optimizers.
+
+  This class defines the API to add Ops to train a model.  You never use this
+  class directly, but instead instantiate one of its subclasses such as
+  `GradientDescentOptimizer`, `AdagradOptimizer`, or `MomentumOptimizer`.
+
+  ### Usage
+
+  ```python
+  # Create an optimizer with the desired parameters.
+  opt = GradientDescentOptimizer(learning_rate=0.1)
+  # Add Ops to the graph to minimize a cost by updating a list of variables.
+  # "cost" is a Tensor, and the list of variables contains tf.Variable
+  # objects.
+  opt_op = opt.minimize(cost, var_list=<list of variables>)
+  ```
+
+  In the training program you will just have to run the returned Op.
+
+  ```python
+  # Execute opt_op to do one step of training:
+  opt_op.run()
+  ```
+
+  ### Processing gradients before applying them.
+
+  Calling `minimize()` takes care of both computing the gradients and
+  applying them to the variables.  If you want to process the gradients
+  before applying them you can instead use the optimizer in three steps:
+
+  1.  Compute the gradients with `compute_gradients()`.
+  2.  Process the gradients as you wish.
+  3.  Apply the processed gradients with `apply_gradients()`.
+
+  Example:
+
+  ```python
+  # Create an optimizer.
+  opt = GradientDescentOptimizer(learning_rate=0.1)
+
+  # Compute the gradients for a list of variables.
+  grads_and_vars = opt.compute_gradients(loss, <list of variables>)
+
+  # grads_and_vars is a list of tuples (gradient, variable).  Do whatever you
+  # need to the 'gradient' part, for example cap them, etc.
+  capped_grads_and_vars = [(MyCapper(gv[0]), gv[1]) for gv in grads_and_vars]
+
+  # Ask the optimizer to apply the capped gradients.
+  opt.apply_gradients(capped_grads_and_vars)
+  ```
+
+  ### Gating Gradients
+
+  Both `minimize()` and `compute_gradients()` accept a `gate_gradients`
+  argument that controls the degree of parallelism during the application of
+  the gradients.
+
+  The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
+
+  <b>`GATE_NONE`</b>: Compute and apply gradients in parallel.  This provides
+  the maximum parallelism in execution, at the cost of some non-reproducibility
+  in the results.  For example the two gradients of `matmul` depend on the input
+  values: With `GATE_NONE` one of the gradients could be applied to one of the
+  inputs _before_ the other gradient is computed resulting in non-reproducible
+  results.
+
+  <b>`GATE_OP`</b>: For each Op, make sure all gradients are computed before
+  they are used.  This prevents race conditions for Ops that generate gradients
+  for multiple inputs where the gradients depend on the inputs.
+
+  <b>`GATE_GRAPH`</b>: Make sure all gradients for all variables are computed
+  before any one of them is used.  This provides the least parallelism but can
+  be useful if you want to process all gradients before applying any of them.
+
+  ### Slots
+
+  Some optimizer subclasses, such as `MomentumOptimizer` and `AdagradOptimizer`
+  allocate and manage additional variables associated with the variables to
+  train.  These are called <i>Slots</i>.  Slots have names and you can ask the
+  optimizer for the names of the slots that it uses.  Once you have a slot name
+  you can ask the optimizer for the variable it created to hold the slot value.
+
+  This can be useful if you want to log debug a training algorithm, report stats
+  about the slots, etc.
+
+  ### Non-slot variables
+
+  Some optimizer subclasses, such as `AdamOptimizer` have variables that
+  are not associated with the variables to train, just the step itself.
+
+  ### Hyper parameters
+
+  These are arguments passed to the optimizer subclass constructor
+  (the `__init__` method), and then passed to `self._set_hyper()`.
+  They can be either regular Python values (like 1.0), tensors, or
+  callables. If they are callable, the callable will be called during
+  `apply_gradients()` to get the value for the hyper parameter.
+
+  ### State
+
+  Internal methods are passed a `state` argument with the correct
+  values to use for the slot and non-slot variables, and the hyper
+  parameters.
+  """
+
+  # Values for gate_gradients.
+  GATE_NONE = 0
+  GATE_OP = 1
+  GATE_GRAPH = 2
+
+  def __init__(self, name):
+    """Create a new Optimizer.
+
+    This must be called by the constructors of subclasses.
+    Note that Optimizer instances should not bind to a single graph,
+    and so shouldn't keep Tensors as member variables. Generally
+    you should be able to use the _set_hyper()/state.get_hyper()
+    facility instead.
+
+    Args:
+      name: A non-empty string.  The name to use for accumulators created
+        for the optimizer.
+
+    Raises:
+      ValueError: If name is malformed.
+      RuntimeError: If _create_slots has been overridden instead of
+          _create_vars.
+    """
+    # Note: We intentionally don't call parent __init__.
+
+    # Optimizer._create_slots was replaced by _create_vars in OptimizerV2.
+    if (self.__class__._create_slots.__code__ is not  # pylint: disable=protected-access
+        OptimizerV2._create_slots.__code__):
+      raise RuntimeError("Override _create_vars instead of _create_slots when "
+                         "descending from OptimizerV2 (class %s)" %
+                         self.__class__.__name__)
+    if not name:
+      raise ValueError("Must specify the optimizer name")
+
+    self._use_locking = False
+    self._name = name
+    # Map from graph_key to state for that graph. We use the graph_key
+    # since it works in both eager and graph mode, and gives the outer
+    # graph inside functions.
+    tower_context = distribution_strategy_context.get_tower_context()
+    if tower_context is None:
+      # In a cross-tower context for a DistributionStrategy, which means
+      # only one Optimizer will be created, not one per tower.
+      self._per_graph_state = {}
+    else:
+      # We use get_tower_context().merge_call() to get a single dict
+      # shared across all model replicas when running with a
+      # DistributionStrategy.
+      self._per_graph_state = tower_context.merge_call(lambda _: {})
+
+    # Hyper parameters, and whether they should be re-evaluated every step.
+    self._hyper = {}
+
+  def _set_hyper(self, name, value):
+    self._hyper[name] = (_is_dynamic(value), value)
+
+  def minimize(self, loss, global_step=None, var_list=None,
+               gate_gradients=GATE_OP, aggregation_method=None,
+               colocate_gradients_with_ops=False, name=None,
+               grad_loss=None, stop_gradients=None,
+               scale_loss_by_num_towers=None):
+    """Add operations to minimize `loss` by updating `var_list`.
+
+    This method simply combines calls `compute_gradients()` and
+    `apply_gradients()`. If you want to process the gradient before applying
+    them call `compute_gradients()` and `apply_gradients()` explicitly instead
+    of using this function.
+
+    Args:
+      loss: A `Tensor` containing the value to minimize.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      var_list: Optional list or tuple of `Variable` objects to update to
+        minimize `loss`.  Defaults to the list of variables collected in
+        the graph under the key `GraphKeys.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or  `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      name: Optional name for the returned operation.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
+        through.
+      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
+        down by the number of towers. By default, auto-detects whether this
+        is needed.
+
+    Returns:
+      An Operation that updates the variables in `var_list`.  If `global_step`
+      was not `None`, that operation also increments `global_step`.
+
+    Raises:
+      ValueError: If some of the variables are not `Variable` objects.
+
+    @compatibility(eager)
+    When eager execution is enabled, `loss` should be a Python function that
+    takes elements of `var_list` as arguments and computes the value to be
+    minimized. If `var_list` is None, `loss` should take no arguments.
+    Minimization (and gradient computation) is done with respect to the
+    elements of `var_list` if not None, else with respect to any trainable
+    variables created during the execution of the `loss` function.
+    `gate_gradients`, `aggregation_method`, `colocate_gradients_with_ops` and
+    `grad_loss` are ignored when eager execution is enabled.
+    @end_compatibility
+    """
+    grads_and_vars = self.compute_gradients(
+        loss, var_list=var_list, gate_gradients=gate_gradients,
+        aggregation_method=aggregation_method,
+        colocate_gradients_with_ops=colocate_gradients_with_ops,
+        grad_loss=grad_loss, stop_gradients=stop_gradients,
+        scale_loss_by_num_towers=scale_loss_by_num_towers)
+
+    vars_with_grad = [v for g, v in grads_and_vars if g is not None]
+    if not vars_with_grad:
+      raise ValueError(
+          "No gradients provided for any variable, check your graph for ops"
+          " that do not support gradients, between variables %s and loss %s." %
+          ([str(v) for _, v in grads_and_vars], loss))
+
+    return self.apply_gradients(grads_and_vars, global_step=global_step,
+                                name=name)
+
+  def compute_gradients(self, loss, var_list=None,
+                        gate_gradients=GATE_OP,
+                        aggregation_method=None,
+                        colocate_gradients_with_ops=False,
+                        grad_loss=None, stop_gradients=None,
+                        scale_loss_by_num_towers=None):
+    """Compute gradients of `loss` for the variables in `var_list`.
+
+    This is the first part of `minimize()`.  It returns a list
+    of (gradient, variable) pairs where "gradient" is the gradient
+    for "variable".  Note that "gradient" can be a `Tensor`, an
+    `IndexedSlices`, or `None` if there is no gradient for the
+    given variable.
+
+    Args:
+      loss: A Tensor containing the value to minimize or a callable taking
+        no arguments which returns the value to minimize. When eager execution
+        is enabled it must be a callable.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKeys.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
+        through.
+      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
+        down by the number of towers. By default, auto-detects whether this
+        is needed.
+
+    Returns:
+      A list of (gradient, variable) pairs. Variable is always present, but
+      gradient can be `None`.
+
+    Raises:
+      TypeError: If `var_list` contains anything else than `Variable` objects.
+      ValueError: If some arguments are invalid.
+      RuntimeError: If called with eager execution enabled and `loss` is
+        not callable.
+
+    @compatibility(eager)
+    When eager execution is enabled, `gate_gradients`, `aggregation_method`,
+    and `colocate_gradients_with_ops` are ignored.
+    @end_compatibility
+    """
+    # TODO(josh11b): Test that we handle weight decay in a reasonable way.
+    if callable(loss):
+      with backprop.GradientTape() as tape:
+        if var_list is not None:
+          tape.watch(var_list)
+        loss_value = loss()
+
+        # Scale loss for number of towers (callable-loss case). In this case,
+        # we have to be careful to call distribute_lib.get_loss_reduction()
+        # *after* loss() is evaluated, so we know what loss reduction it uses.
+        if scale_loss_by_num_towers is None:
+          scale_loss_by_num_towers = (
+              distribute_lib.get_loss_reduction() ==
+              variable_scope.VariableAggregation.MEAN)
+        if scale_loss_by_num_towers:
+          num_towers = distribution_strategy_context.get_distribution_strategy(
+          ).num_towers
+          if num_towers > 1:
+            loss_value *= 1. / num_towers
+
+      if var_list is None:
+        var_list = tape.watched_variables()
+      grads = tape.gradient(loss_value, var_list, grad_loss)
+      return list(zip(grads, var_list))
+    if context.executing_eagerly():
+      raise RuntimeError(
+          "`loss` passed to Optimizer.compute_gradients should "
+          "be a function when eager execution is enabled.")
+
+    # Scale loss for number of towers (non-callable-loss case).
+    if scale_loss_by_num_towers is None:
+      scale_loss_by_num_towers = (
+          distribute_lib.get_loss_reduction() ==
+          variable_scope.VariableAggregation.MEAN)
+    if scale_loss_by_num_towers:
+      num_towers = distribution_strategy_context.get_distribution_strategy(
+      ).num_towers
+      if num_towers > 1:
+        loss *= 1. / num_towers
+
+    if gate_gradients not in [optimizer_v1.Optimizer.GATE_NONE,
+                              optimizer_v1.Optimizer.GATE_OP,
+                              optimizer_v1.Optimizer.GATE_GRAPH]:
+      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
+                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
+                       gate_gradients)
+    self._assert_valid_dtypes([loss])
+    if grad_loss is not None:
+      self._assert_valid_dtypes([grad_loss])
+    if var_list is None:
+      var_list = (
+          variables.trainable_variables() +
+          ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
+    else:
+      var_list = nest.flatten(var_list)
+    # pylint: disable=protected-access
+    var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS)
+    # pylint: enable=protected-access
+    processors = [_get_processor(v) for v in var_list]
+    if not var_list:
+      raise ValueError("No variables to optimize.")
+    var_refs = [p.target() for p in processors]
+    grads = gradients.gradients(
+        loss, var_refs, grad_ys=grad_loss,
+        gate_gradients=(gate_gradients == optimizer_v1.Optimizer.GATE_OP),
+        aggregation_method=aggregation_method,
+        colocate_gradients_with_ops=colocate_gradients_with_ops,
+        stop_gradients=stop_gradients)
+    if gate_gradients == optimizer_v1.Optimizer.GATE_GRAPH:
+      grads = control_flow_ops.tuple(grads)
+    grads_and_vars = list(zip(grads, var_list))
+    self._assert_valid_dtypes(
+        [v for g, v in grads_and_vars
+         if g is not None and v.dtype != dtypes.resource])
+    return grads_and_vars
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to variables.
+
+    This is the second part of `minimize()`. It returns an `Operation` that
+    applies gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the `Optimizer` constructor.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+
+    Raises:
+      TypeError: If `grads_and_vars` is malformed.
+      ValueError: If none of the variables have gradients.
+    """
+    # This is a default implementation of apply_gradients() that can be shared
+    # by most optimizers.  It relies on the subclass implementing the following
+    # methods: _create_vars(), _prepare(), _apply_dense(), and _apply_sparse().
+
+    # Filter out variables with gradients of `None`.
+    grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
+    if not grads_and_vars:
+      raise ValueError("No variables provided.")
+    filtered = tuple((g, v) for (g, v) in grads_and_vars if g is not None)
+    if not filtered:
+      raise ValueError("No gradients provided for any variable: %s." %
+                       ([str(v) for _, v in grads_and_vars],))
+    return distribution_strategy_context.get_tower_context().merge_call(
+        self._distributed_apply, filtered, global_step=global_step, name=name)
+
+  def _get_or_create_state(self, var_list=None):
+    """Either looks up or creates `_OptimizerV2State`.
+
+    If any variables are available, they should be passed via the `var_list`
+    argument, and these will be used to determine the graph to create/retrieve
+    state for. Otherwise the returned state is for the current default graph.
+
+    Args:
+      var_list: A list of variables to extract a graph from.
+
+    Returns:
+      An `_OptimizerV2State` object.
+    """
+    # Determine the graph_key from the current graph.
+    eager_execution = context.executing_eagerly()
+    if eager_execution or var_list is None:
+      graph = ops.get_default_graph()
+    else:
+      graph = ops._get_graph_from_inputs(var_list)  # pylint: disable=protected-access
+    assert graph is not None
+    graph_key = graph._graph_key  # pylint: disable=protected-access
+
+    # Get the per graph state by looking up the graph_key.
+    if graph_key in self._per_graph_state:
+      per_graph_state = self._per_graph_state[graph_key]
+    else:
+      per_graph_state = _OptimizerV2State(self._name)
+      per_graph_state._init_with_static_hyper(self._hyper)  # pylint: disable=protected-access
+      self._per_graph_state[graph_key] = per_graph_state
+    return per_graph_state
+
+  def _distributed_apply(self, distribution, grads_and_vars, global_step, name):
+    """`apply_gradients` for use with a `DistributionStrategy`."""
+    reduced_grads = distribution.batch_reduce(
+        variable_scope.VariableAggregation.SUM, grads_and_vars)
+    var_list = [v for _, v in grads_and_vars]
+    grads_and_vars = zip(reduced_grads, var_list)
+
+    unwrapped_var_list = [x for v in var_list for x in distribution.unwrap(v)]
+    eager_execution = context.executing_eagerly()
+    if eager_execution:
+      # Give a clear error in this case instead of "name not supported
+      # for Eager Tensors" when we compute non_slot_devices.
+      for v in unwrapped_var_list:
+        if isinstance(v, ops.Tensor):
+          raise NotImplementedError("Trying to update a Tensor ", v)
+
+    with ops.name_scope(name, self._name) as name:
+      per_graph_state = self._get_or_create_state(var_list=unwrapped_var_list)
+      # Include the current value of any dynamic hyper parameters in `state`.
+      non_slot_devices = distribution.non_slot_devices(var_list)
+      state = per_graph_state._copy_with_dynamic_hyper(  # pylint: disable=protected-access
+          self._hyper, distribution, non_slot_devices)
+
+    # Create any slot and non-slot variables we need in `state`.
+    with ops.init_scope():
+      self._create_vars(var_list, state)
+
+    with ops.name_scope(name):  # Re-enter name_scope created above
+      # Give the child class a chance to do something before we start
+      # applying gradients.
+      self._prepare(state)
+
+      def update(v, g):
+        """Update variable `v` using gradient `g`."""
+        assert v is not None
+
+        # Convert the grad to Tensor or IndexedSlices if necessary, and
+        # look up a processor for each variable's type.
+        try:
+          g = ops.convert_to_tensor_or_indexed_slices(g)
+        except TypeError:
+          raise TypeError(
+              "Gradient must be convertible to a Tensor"
+              " or IndexedSlices, or None: %s" % g)
+        if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
+          raise TypeError(
+              "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
+        processor = _get_processor(v)
+
+        # We colocate all ops created in _apply_dense or _apply_sparse
+        # on the same device as the variable.
+        # TODO(apassos): figure out how to get the variable name here.
+        scope_name = "" if eager_execution else v.op.name
+        # device_policy is set because non-mirrored tensors will be read in
+        # `update_op`.
+        # TODO(josh11b): Make different state objects for each device to
+        # avoid needing to set the device_policy.
+        with ops.name_scope("update_" + scope_name), \
+            context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
+          return processor.update_op(self, g, state)
+
+      # Use the processors to update the variables.
+      update_ops = []
+      for grad, var in grads_and_vars:
+        update_ops.extend(distribution.update(var, update, grad, grouped=False))
+
+      # Give the child class a chance to do something after applying
+      # gradients
+      def finish():
+        # TODO(josh11b): Make different state objects for each device to
+        # avoid needing to set the device_policy.
+        with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
+          return self._finish(state)
+
+      update_ops = control_flow_ops.group(update_ops)
+      with ops.control_dependencies([update_ops]):
+        finish_updates = distribution.update_non_slot(
+            non_slot_devices, finish, grouped=False)
+      # We said grouped=False, which means finish_updates is always a list.
+      # It will be [None] when finish() returns None.
+      if finish_updates == [None]:
+        finish_updates = [update_ops]
+
+      # Update `global_step` (if any).
+      if global_step is None:
+        apply_updates = distribution.group(finish_updates, name=name)
+      else:
+        with ops.control_dependencies(finish_updates):
+
+          def update_global_step(global_step, name):
+            return global_step.assign_add(1, read_value=False, name=name)
+
+          apply_updates = distribution.update(global_step, update_global_step,
+                                              name)
+
+      # Add the training op to the TRAIN_OP graph collection in graph mode.
+      if not eager_execution:
+        if isinstance(apply_updates, ops.Tensor):
+          apply_updates = apply_updates.op
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        if apply_updates not in train_op:
+          train_op.append(apply_updates)
+
+      return apply_updates
+
+  def get_slot(self, var, name):
+    """Return a slot named `name` created for `var` by the Optimizer.
+
+    Some `Optimizer` subclasses use additional variables.  For example
+    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
+    gives access to these `Variable` objects if for some reason you need them.
+
+    Use `get_slot_names()` to get the list of slot names created by the
+    `Optimizer`.
+
+    Args:
+      var: A variable passed to `minimize()` or `apply_gradients()`.
+      name: A string.
+
+    Returns:
+      The `Variable` for the slot if it was created, `None` otherwise.
+    """
+    state = self._get_state_for_var(var)
+    return state.get_slot(var, name) if state is not None else None
+
+  def get_slot_names(self):
+    """Return a list of the names of slots created by the `Optimizer`.
+
+    See `get_slot()`.
+
+    Returns:
+      A list of strings.
+    """
+    state = self._get_per_graph_state()
+    return state.get_slot_names() if state is not None else []
+
+  def variables(self):
+    """A list of variables which encode the current state of `Optimizer`.
+
+    Includes slot variables and additional global variables created by the
+    optimizer in the current default graph.
+
+    Returns:
+      A list of variables.
+    """
+    state = self._get_per_graph_state()
+    return state._variables() if state is not None else []  # pylint: disable=protected-access
+
+  # --------------
+  # Methods to be implemented by subclasses if they want to use the
+  # inherited implementation of apply_gradients() or compute_gradients().
+  # --------------
+  def _create_vars(self, var_list, state):
+    """Create all slots needed by the variables and any non-slot variables.
+
+    Args:
+      var_list: A list of `Variable` objects.
+      state: An object with these methods:
+        `create_slot(var, val, slot_name, optional_op_name)`,
+        `create_slot_with_initializer(`
+            `var, initializer, shape, dtype, slot_name, optional_op_name)`,
+        `zeros_slot(var, slot_name, optional_op_name)`,
+        `create_non_slot_variable(initial_value, name, colocate_with)`,
+        `get_hyper(name)`
+    """
+    # No slots needed by default
+    pass
+
+  def _prepare(self, state):
+    """Code to execute before applying gradients.
+
+    Note that most uses of _prepare() in Optimizer have been subsumed
+    by explicit support for hyper parameters in OptimizerV2
+
+    Args:
+      state: An object with a `get_hyper(name)` method.
+
+    Returns:
+      Return value will be ignored.
+    """
+    pass
+
+  def _apply_dense(self, grad, var, state):
+    """Add ops to apply dense gradients to `var`.
+
+    Args:
+      grad: A `Tensor`.
+      var: A `Variable` object.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation`.
+    """
+    raise NotImplementedError()
+
+  def _resource_apply_dense(self, grad, handle, state):
+    """Add ops to apply dense gradients to the variable `handle`.
+
+    Args:
+      grad: a `Tensor` representing the gradient.
+      handle: a `Tensor` of dtype `resource` which points to the variable
+       to be updated.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    raise NotImplementedError()
+
+  def _resource_apply_sparse_duplicate_indices(
+      self, grad, handle, indices, state):
+    """Add ops to apply sparse gradients to `handle`, with repeated indices.
+
+    Optimizers which override this method must deal with repeated indices. See
+    the docstring of `_apply_sparse_duplicate_indices` for details. By default
+    the correct behavior, to sum non-unique indices and their associated
+    gradients, is enforced by first pre-processing `grad` and `indices` and
+    passing them on to `_resource_apply_sparse`. Optimizers which deal correctly
+    with duplicate indices may instead override this method to avoid the
+    overhead of summing.
+
+    Args:
+      grad: a `Tensor` representing the gradient for the affected indices.
+      handle: a `Tensor` of dtype `resource` which points to the variable
+       to be updated.
+      indices: a `Tensor` of integral type representing the indices for
+       which the gradient is nonzero. Indices may be repeated.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    # pylint: disable=protected-access
+    summed_grad, unique_indices = optimizer_v1._deduplicate_indexed_slices(
+        values=grad, indices=indices)
+    # pylint: enable=protected-access
+    return self._resource_apply_sparse(
+        summed_grad, handle, unique_indices, state)
+
+  def _resource_apply_sparse(self, grad, handle, indices, state):
+    """Add ops to apply sparse gradients to the variable `handle`.
+
+    Similar to `_apply_sparse`, the `indices` argument to this method has been
+    de-duplicated. Optimizers which deal correctly with non-unique indices may
+    instead override `_resource_apply_sparse_duplicate_indices` to avoid this
+    overhead.
+
+    Args:
+      grad: a `Tensor` representing the gradient for the affected indices.
+      handle: a `Tensor` of dtype `resource` which points to the variable
+       to be updated.
+      indices: a `Tensor` of integral type representing the indices for
+       which the gradient is nonzero. Indices are unique.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    raise NotImplementedError()
+
+  def _apply_sparse_duplicate_indices(self, grad, var, state):
+    """Add ops to apply sparse gradients to `var`, with repeated sparse indices.
+
+    Optimizers which override this method must deal with IndexedSlices objects
+    such as the following:
+
+      IndexedSlicesValue(values=[1, 1], indices=[0, 0], dense_shape=[1])
+
+    The correct interpretation is:
+
+      IndexedSlicesValue(values=[2], indices=[0], dense_shape=[1])
+
+    Many optimizers deal incorrectly with repeated indices when updating based
+    on sparse gradients (e.g. summing squares rather than squaring the sum, or
+    applying momentum terms multiple times). Adding first is always the correct
+    behavior, so this is enforced here by reconstructing the IndexedSlices to
+    have only unique indices, then calling _apply_sparse.
+
+    Optimizers which deal correctly with repeated indices may instead override
+    this method to avoid the overhead of summing indices.
+
+    Args:
+      grad: `IndexedSlices`.
+      var: A `Variable` object.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation`.
+    """
+    # pylint: disable=protected-access
+    summed_values, unique_indices = optimizer_v1._deduplicate_indexed_slices(
+        values=grad.values, indices=grad.indices)
+    # pylint: enable=protected-access
+    gradient_no_duplicate_indices = ops.IndexedSlices(
+        indices=unique_indices,
+        values=summed_values,
+        dense_shape=grad.dense_shape)
+    return self._apply_sparse(gradient_no_duplicate_indices, var, state)
+
+  def _apply_sparse(self, grad, var, state):
+    """Add ops to apply sparse gradients to `var`.
+
+    The IndexedSlices object passed to `grad` in this function is by default
+    pre-processed in `_apply_sparse_duplicate_indices` to remove duplicate
+    indices (see its docstring for details). Optimizers which can tolerate or
+    have correct special cases for duplicate sparse indices may override
+    `_apply_sparse_duplicate_indices` instead of this function, avoiding that
+    overhead.
+
+    Args:
+      grad: `IndexedSlices`, with no repeated indices.
+      var: A `Variable` object.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation`.
+    """
+    raise NotImplementedError()
+
+  def _finish(self, state):
+    """Do what is needed to finish the update.
+
+    This is called inside a scope colocated with any non-slot variables.
+
+    Args:
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      The operation to apply updates, or None if no updates.
+    """
+    return None
+
+  # --------------
+  # Utility methods for subclasses.
+  # --------------
+  def _get_per_graph_state(self):
+    # pylint: disable=protected-access
+    return self._per_graph_state.get(ops.get_default_graph()._graph_key, None)
+
+  def _get_state_for_var(self, var):
+    # pylint: disable=protected-access
+    return self._per_graph_state.get(var._graph_key, None)
+
+  # --------------
+  # Overridden methods from Checkpointable.
+  # --------------
+
+  def _track_checkpointable(self, *args, **kwargs):
+    """Optimizers may not track dependencies. Raises an error."""
+    raise NotImplementedError(
+        "Optimizers may not have dependencies. File a feature request if this "
+        "limitation bothers you.")
+
+  @property
+  def _checkpoint_dependencies(self):
+    """From Checkpointable. Gather graph-specific non-slot variables to save."""
+    current_graph_non_slot_variables = []
+    state = self._get_per_graph_state()
+    if state is not None:
+      for name, variable_object in sorted(
+          state._non_slot_dict.items(),  # pylint: disable=protected-access
+          # Avoid comparing variables
+          key=lambda item: item[0]):
+        current_graph_non_slot_variables.append(
+            checkpointable.CheckpointableReference(
+                name=name, ref=variable_object))
+    # Note: ignores super(); Optimizers may not have any dependencies outside of
+    # state objects.
+    return current_graph_non_slot_variables
+
+  def _lookup_dependency(self, name):
+    """From Checkpointable. Find a non-slot variable in the current graph."""
+    state = self._get_per_graph_state()
+    if state is None:
+      return None
+    else:
+      return state.get_non_slot(name)
+
+  @property
+  def _deferred_dependencies(self):
+    """Lets Checkpointable know where non-slot variables are created.
+
+    If necessary, creates a new state object for the current default graph.
+    Checkpointable will then add entries to that state's deferred dependency
+    dictionary. The state object will check that dictionary when creating
+    non-slot variables, restoring their value if an entry is found.
+
+    Returns:
+      A dictionary which holds deferred dependencies for the current default
+      graph.
+    """
+    state = self._get_or_create_state()
+    return state._deferred_dependencies  # pylint: disable=protected-access
+
+  def _create_or_restore_slot_variable(
+      self, slot_variable_position, slot_name, variable):
+    """Checkpointable: Restore a slot variable's value, possibly creating it.
+
+    Called when a variable which has an associated slot variable is created or
+    restored.
+
+    Args:
+      slot_variable_position: A `checkpointable._CheckpointPosition` object
+        indicating the slot variable `Checkpointable` object to be restored.
+      slot_name: The name of this `Optimizer`'s slot to restore into.
+      variable: The variable object this slot is being created for.
+    """
+    state = self._get_or_create_state(var_list=[variable])
+    state._create_or_restore_slot_variable(  # pylint: disable=protected-access
+        slot_variable_position=slot_variable_position,
+        slot_name=slot_name,
+        variable=variable,
+        optional_op_name=self._name)
+
+  # --------------
+  # Unsupported parent methods
+  # --------------
+  def _slot_dict(self, slot_name):
+    raise NotImplementedError(
+        "_slot_dict() method unsupported in OptimizerV2")
+
+  def _get_or_make_slot(self, var, val, slot_name, op_name):
+    raise NotImplementedError(
+        "_get_or_make_slot() method unsupported in OptimizerV2")
+
+  def _get_or_make_slot_with_initializer(self, var, initializer, shape, dtype,
+                                         slot_name, op_name):
+    raise NotImplementedError(
+        "_get_or_make_slot_with_initializer() method unsupported in "
+        "OptimizerV2")
+
+  def _create_non_slot_variable(self, initial_value, name, colocate_with):
+    raise NotImplementedError(
+        "_create_non_slot_variable() method unsupported in OptimizerV2")
+
+  def _get_non_slot_variable(self, name, graph=None):
+    raise NotImplementedError(
+        "_get_non_slot_variable() method unsupported in OptimizerV2")
+
+  def _non_slot_variables(self):
+    raise NotImplementedError(
+        "_non_slot_variables() method unsupported in OptimizerV2")
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
new file mode 100644
index 0000000000..a6c939393e
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
@@ -0,0 +1,277 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional test for OptimizerV2."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class OptimizerTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def testBasic(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      def loss():
+        return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
+      # Note that for eager execution, minimize expects a function instead of a
+      # Tensor.
+      global_step = resource_variable_ops.ResourceVariable(
+          array_ops.zeros([], dtypes.int64), name='global_step_%d' % i)
+      sgd_op = sgd.SGD(3.0)
+
+      self.evaluate(variables.global_variables_initializer())
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+      # Run 1 step of sgd through optimizer
+      opt_op = sgd_op.minimize(loss, global_step, [var0, var1])
+      self.evaluate(opt_op)
+      # Validate updated params
+      self.assertAllClose([-14., -13.], self.evaluate(var0))
+      self.assertAllClose([-6., -5.], self.evaluate(var1))
+
+  def testAggregationMethod(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        cost = 5 * var0 + 3 * var1
+        global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64), name='global_step')
+        sgd_op = sgd.SGD(3.0)
+        opt_op = sgd_op.minimize(
+            cost,
+            global_step, [var0, var1],
+            aggregation_method=gradients_impl.AggregationMethod.
+            EXPERIMENTAL_ACCUMULATE_N)
+
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd through optimizer
+        opt_op.run()
+        # Validate updated params
+        self.assertAllClose([-14., -13.], var0.eval())
+        self.assertAllClose([-6., -5.], var1.eval())
+
+  def testPrecomputedGradient(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        cost = 5 * var0 + 3 * var1
+        grad_loss = constant_op.constant([42, -42], dtype=dtype)
+        global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64), name='global_step')
+        sgd_op = sgd.SGD(3.0)
+        opt_op = sgd_op.minimize(
+            cost, global_step, [var0, var1], grad_loss=grad_loss)
+
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd through optimizer
+        opt_op.run()
+        # Validate updated params
+        self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)],
+                            var0.eval())
+        self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)],
+                            var1.eval())
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoVariables(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      # pylint: disable=cell-var-from-loop
+      def loss():
+        var0 = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype, trainable=False, name='a')
+        var1 = resource_variable_ops.ResourceVariable(
+            [3.0, 4.0], dtype=dtype, trainable=False, name='b')
+        return 5 * var0 + var1
+      # pylint: enable=cell-var-from-loop
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError, 'No.*variables'):
+        sgd_op.minimize(loss)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoGradients(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      # pylint: disable=cell-var-from-loop
+      def loss():
+        return 5 * var0
+      # pylint: enable=cell-var-from-loop
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError, 'No gradients'):
+        # var1 has no gradient
+        sgd_op.minimize(loss, var_list=[var1])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoGradientsForAnyVariables_Minimize(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      def loss():
+        return constant_op.constant(5.0)
+
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError,
+                                   'No gradients provided for any variable'):
+        sgd_op.minimize(loss, var_list=[var0, var1])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoGradientsForAnyVariables_ApplyGradients(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError,
+                                   'No gradients provided for any variable'):
+        sgd_op.apply_gradients([(None, var0), (None, var1)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testGradientsAsVariables(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      def loss():
+        return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
+
+      sgd_op = sgd.SGD(3.0)
+      grads_and_vars = sgd_op.compute_gradients(loss, [var0, var1])
+      # Convert gradients to tf.Variables
+      converted_grads = [
+          resource_variable_ops.ResourceVariable(array_ops.zeros([2], dtype),
+                                                 name='c_%d_%d' % (i, j))
+          for j, gv in enumerate(grads_and_vars)
+      ]
+      convert_ops = [
+          state_ops.assign(converted_grads[j], gv[0])
+          for j, gv in enumerate(grads_and_vars)
+      ]
+
+      self.evaluate(variables.global_variables_initializer())
+      # Run convert_ops to achieve the gradietns converting
+      self.evaluate(convert_ops)
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+      # Run 1 step of sgd through optimizer
+      converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
+      opt_op = sgd_op.apply_gradients(converted_grads_and_vars)
+      self.evaluate(opt_op)
+
+      # Validate updated params
+      self.assertAllClose([-14., -13.], self.evaluate(var0))
+      self.assertAllClose([-6., -5.], self.evaluate(var1))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testComputeGradientsWithTensors(self):
+    x = ops.convert_to_tensor(1.0)
+    def f():
+      return x * x
+
+    sgd_op = sgd.SGD(3.0)
+    grads_and_vars = sgd_op.compute_gradients(f, [x])
+    self.assertEqual(1, len(grads_and_vars))
+    grad, x_as_var = grads_and_vars[0]
+    self.assertIs(x, x_as_var)
+    self.assertEqual(2.0, self.evaluate(grad))
+
+    with self.assertRaises(NotImplementedError):
+      sgd_op.apply_gradients(grads_and_vars)
+
+  def testTrainOp(self):
+    with self.cached_session():
+      var0 = variables.Variable([1.0, 2.0])
+      var1 = variables.Variable([3.0, 4.0])
+      cost = 5 * var0 + 3 * var1
+      global_step = variables.Variable(
+          array_ops.zeros([], dtypes.int64), name='global_step')
+      sgd_op = sgd.SGD(3.0)
+      opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
+      self.assertTrue(opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
+
+  def testConstraint(self):
+    constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
+    constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
+    with self.cached_session():
+      var0 = variables.Variable([1.0, 2.0],
+                                constraint=constraint_01)
+      var1 = variables.Variable([3.0, 4.0],
+                                constraint=constraint_0)
+      cost = 5 * var0 + 3 * var1
+      global_step = variables.Variable(
+          array_ops.zeros([], dtypes.int64), name='global_step')
+      sgd_op = sgd.SGD(3.0)
+      opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
+
+      variables.global_variables_initializer().run()
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+      # Run 1 step of sgd through optimizer
+      opt_op.run()
+      # Validate updated params
+      self.assertAllClose([-0.1, -0.1], var0.eval())
+      self.assertAllClose([0., 0.], var1.eval())
+
+  def testStopGradients(self):
+    with self.cached_session():
+      var0 = variables.Variable([1.0, 2.0], name='var0')
+      var1 = variables.Variable([3.0, 4.0], name='var1')
+      var0_id = array_ops.identity(var0)
+      cost = 5 * var0_id + 3 * var1
+      sgd_op = sgd.SGD(3.0)
+      grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1],
+                                                stop_gradients=[var0_id])
+      grad_dict = {var.op.name: grad for grad, var in grads_and_vars}
+      self.assertIsNone(grad_dict['var0'])
+      self.assertIsNotNone(grad_dict['var1'])
+
+  def testDoNotOverrideCreateSlots(self):
+    class ShouldNotOverrideCreateSlots(optimizer_v2.OptimizerV2):
+
+      def _create_slots(self, var_list):
+        """In OptimizerV2 _create_slots was renamed _create_vars."""
+        return var_list
+
+    with self.assertRaises(RuntimeError):
+      ShouldNotOverrideCreateSlots('name')
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py
new file mode 100644
index 0000000000..2748d8eff7
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@@ -0,0 +1,239 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""RMSprop optimizer for Tensorflow.
+
+rmsprop algorithm [tieleman2012rmsprop]
+
+A detailed description of rmsprop.
+
+- maintain a moving (discounted) average of the square of gradients
+- divide gradient by the root of this average
+
+mean_square = rho * mean_square{t-1} + (1-rho) * gradient ** 2
+mom = momentum * mom{t-1} + learning_rate * g_t / sqrt(mean_square)
+delta = - mom
+
+This implementation of RMSProp uses plain momentum, not Nesterov momentum.
+
+The centered version additionally maintains a moving (discounted) average of the
+gradients, and uses that average to estimate the variance:
+
+mean_grad = rho * mean_square{t-1} + (1-rho) * gradient
+mean_square = rho * mean_square{t-1} + (1-rho) * gradient ** 2
+mom = momentum * mom{t-1} + learning_rate * g_t /
+    sqrt(mean_square - mean_grad**2)
+delta = - mom
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import array_ops
+
+from tensorflow.python.training import training_ops
+
+
+class RMSProp(optimizer_v2.OptimizerV2):
+  """RMSProp optimizer.
+
+  It is recommended to leave the parameters of this optimizer at their default
+  values (except the learning rate, which can be freely tuned).
+
+  This optimizer is usually a good choice for recurrent neural networks.
+
+  Some of the args below are hyperparameters, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Note that in the dense implementation of this algorithm, variables and their
+  corresponding accumulators (momentum, gradient moving average, square
+  gradient moving average) will be updated even if the gradient is zero
+  (i.e. accumulators will decay, momentum will be applied). The sparse
+  implementation (used when the gradient is an `IndexedSlices` object,
+  typically because of `tf.gather` or an embedding lookup in the forward pass)
+  will not update variable slices or their accumulators unless those slices
+  were used in the forward pass (nor is there an "eventual" correction to
+  account for these omitted updates). This leads to more efficient updates for
+  large embedding lookup tables (where most of the slices are not accessed in
+  a particular graph execution), but differs from the published algorithm.
+
+  Arguments:
+      learning_rate: A float hyperparameter >= 0. The learning rate.
+      rho: A float hyperparameter >= 0. Discounting factor for the
+        history/coming gradient.
+      momentum: A float hyperparameter >= 0.
+      epsilon: A float hyperparameter >= 0 . Small value to initialize the
+        average square gradient variable and avoid zero denominator.
+      centered: If True, gradients are normalized by the estimated variance of
+        the gradient; if False, by the uncentered second moment. Setting this to
+        True may help with training, but is slightly more expensive in terms of
+        computation and memory. Defaults to False.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "RMSProp".
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               rho=0.9,
+               momentum=None,
+               epsilon=1e-10,
+               centered=False,
+               name="RMSProp"):
+    super(RMSProp, self).__init__(name)
+    # Momentum default is `None` for consistency with SGD
+    # but underlying implementation uses `momentum` hyperparameter here
+    # regardless unlike SGD. Since extneral Keras RMSProp does not have
+    # a `momentum` weight, for compatibility with external Keras h5 files,
+    # when  `momentum` was set as `None` we should ignore the `momentum`
+    # variable in `get_weights` and not require it in `set_weights`.
+    if momentum is None:
+      momentum = 0.0
+    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("rho", rho)
+    self._set_hyper("momentum", momentum)
+    self._set_hyper("epsilon", epsilon)
+
+    self._centered = centered
+
+  def _create_vars(self, var_list, state):
+    for v in var_list:
+      init_rms = state.get_hyper(
+          "epsilon", v.dtype.base_dtype) * array_ops.ones_like(v)
+      state.create_slot_with_initializer(v, init_rms, v.get_shape(),
+                                         v.dtype.base_dtype, "rms")
+      if self._centered:
+        state.zeros_slot(v, "mg")
+      state.zeros_slot(v, "momentum")
+
+  def _apply_dense(self, grad, var, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = state.get_slot(var, "mg")
+      return training_ops.apply_centered_rms_prop(
+          var,
+          mg,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          # epsilon is now the rms initial value and is not added to the
+          # denominator anymore, hence calling the kernel op with epsilon=0.
+          0,
+          grad,
+          use_locking=self._use_locking).op
+    else:
+      return training_ops.apply_rms_prop(
+          var,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = state.get_slot(var, "mg")
+      return training_ops.resource_apply_centered_rms_prop(
+          var.handle,
+          mg.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          use_locking=self._use_locking)
+    else:
+      return training_ops.resource_apply_rms_prop(
+          var.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = state.get_slot(var, "mg")
+      return training_ops.sparse_apply_centered_rms_prop(
+          var,
+          mg,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad.values,
+          grad.indices,
+          use_locking=self._use_locking)
+    else:
+      return training_ops.sparse_apply_rms_prop(
+          var,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad.values,
+          grad.indices,
+          use_locking=self._use_locking)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = self.get_slot(var, "mg")
+      return training_ops.resource_sparse_apply_centered_rms_prop(
+          var.handle,
+          mg.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          indices,
+          use_locking=self._use_locking)
+    else:
+      return training_ops.resource_sparse_apply_rms_prop(
+          var.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          indices,
+          use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
new file mode 100644
index 0000000000..2c5eccdc5b
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
@@ -0,0 +1,444 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for rmsprop optimizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import math
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import rmsprop
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+_DATA_TYPES = [dtypes.half, dtypes.float32]
+
+_TEST_PARAM_VALUES = [
+    # learning_rate, rho, momentum, epsilon, centered, use_resource
+    [0.5, 0.9, 0.0, 1.0, True, False],
+    [0.5, 0.9, 0.0, 1.0, False, False],
+    [0.5, 0.9, 0.0, 1.0, True, True],
+    [0.5, 0.9, 0.0, 1.0, False, True],
+    [0.1, 0.9, 0.0, 1.0, True, False],
+    [0.5, 0.95, 0.0, 1.0, False, False],
+    [0.5, 0.8, 0.0, 1e-3, True, False],
+    [0.5, 0.8, 0.9, 1e-3, True, False],
+]
+
+
+class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
+
+  def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, rho, momentum,
+                            centered):
+    rms_t = rms * rho + (1 - rho) * g * g
+    if centered:
+      mg_t = mg * rho + (1 - rho) * g
+      denom_t = rms_t - mg_t * mg_t
+    else:
+      mg_t = mg
+      denom_t = rms_t
+    mom_t = momentum * mom + lr * g / np.sqrt(denom_t, dtype=denom_t.dtype)
+    var_t = var - mom_t
+    return var_t, mg_t, rms_t, mom_t
+
+  def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom,
+                                   lr, rho, momentum, centered):
+    mg_t = copy.deepcopy(mg)
+    rms_t = copy.deepcopy(rms)
+    mom_t = copy.deepcopy(mom)
+    var_t = copy.deepcopy(var)
+    for i in range(len(gindexs)):
+      gindex = gindexs[i]
+      gvalue = gvalues[i]
+      rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue
+      denom_t = rms_t[gindex]
+      if centered:
+        mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue
+        denom_t -= mg_t[gindex] * mg_t[gindex]
+      mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt(denom_t)
+      var_t[gindex] = var[gindex] - mom_t[gindex]
+    return var_t, mg_t, rms_t, mom_t
+
+  @parameterized.named_parameters(
+      *test_util.generate_combinations_with_testcase_name(
+          dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
+  def testDense(self, dtype, param_value):
+    (learning_rate, rho, momentum, epsilon, centered,
+     use_resource) = tuple(param_value)
+    with self.test_session(use_gpu=True):
+      # Initialize variables for numpy implementation.
+      var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+      grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
+      var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+      grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)
+
+      if use_resource:
+        var0 = resource_variable_ops.ResourceVariable(var0_np)
+        var1 = resource_variable_ops.ResourceVariable(var1_np)
+      else:
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+      grads0 = constant_op.constant(grads0_np)
+      grads1 = constant_op.constant(grads1_np)
+      opt = rmsprop.RMSProp(
+          learning_rate=learning_rate,
+          rho=rho,
+          momentum=momentum,
+          epsilon=epsilon,
+          centered=centered)
+
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      mg0 = opt.get_slot(var0, "mg")
+      self.assertEqual(mg0 is not None, centered)
+      mg1 = opt.get_slot(var1, "mg")
+      self.assertEqual(mg1 is not None, centered)
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+
+      # Run 4 steps of RMSProp
+      for _ in range(4):
+        update.run()
+
+        var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
+            var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho,
+            momentum, centered)
+        var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
+            var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho,
+            momentum, centered)
+
+        # Validate updated params
+        if centered:
+          self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
+          self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
+        self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
+        self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
+        self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
+        self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
+        self.assertAllCloseAccordingToType(
+            var0_np, var0.eval(), half_rtol=0.01, half_atol=0.01)
+        self.assertAllCloseAccordingToType(
+            var1_np, var1.eval(), half_rtol=0.01, half_atol=0.01)
+
+  @parameterized.parameters([dtypes.float32, dtypes.float64])
+  def testMinimizeSparseResourceVariable(self, dtype):
+    with self.cached_session():
+      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+      x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+      pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+      loss = pred * pred
+      sgd_op = rmsprop.RMSProp(
+          learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=0.0,
+          centered=False).minimize(loss)
+      variables.global_variables_initializer().run()
+      # Fetch params to validate initial values
+      self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+      # Run 1 step of sgd
+      sgd_op.run()
+      # Validate updated params
+      self.assertAllCloseAccordingToType(
+          [[0., 1.]], var0.eval(), atol=0.01)
+
+  @parameterized.parameters([dtypes.float32, dtypes.float64])
+  def testMinimizeSparseResourceVariableCentered(self, dtype):
+    with self.cached_session():
+      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+      x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+      pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+      loss = pred * pred
+      sgd_op = rmsprop.RMSProp(
+          learning_rate=1.0, rho=0.1, momentum=0.0, epsilon=1.0,
+          centered=True).minimize(loss)
+      variables.global_variables_initializer().run()
+      # Fetch params to validate initial values
+      self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+      # Run 1 step of sgd
+      sgd_op.run()
+      # Validate updated params
+      self.assertAllCloseAccordingToType(
+          [[-7/3.0, -4/3.0]], var0.eval(), atol=0.01)
+
+  @parameterized.named_parameters(
+      *test_util.generate_combinations_with_testcase_name(
+          dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
+  def testSparse(self, dtype, param_value):
+    (learning_rate, rho, momentum, epsilon, centered, _) = tuple(param_value)
+    with self.test_session(use_gpu=True):
+      # Initialize variables for numpy implementation.
+      var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+      grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
+      var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+      grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)
+
+      var0 = variables.Variable(var0_np)
+      var1 = variables.Variable(var1_np)
+      grads0_np_indices = np.array([0], dtype=np.int32)
+      grads0 = ops.IndexedSlices(
+          constant_op.constant(grads0_np),
+          constant_op.constant(grads0_np_indices), constant_op.constant([1]))
+      grads1_np_indices = np.array([1], dtype=np.int32)
+      grads1 = ops.IndexedSlices(
+          constant_op.constant(grads1_np),
+          constant_op.constant(grads1_np_indices), constant_op.constant([1]))
+      opt = rmsprop.RMSProp(
+          learning_rate=learning_rate,
+          rho=rho,
+          momentum=momentum,
+          epsilon=epsilon,
+          centered=centered)
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      mg0 = opt.get_slot(var0, "mg")
+      self.assertEqual(mg0 is not None, centered)
+      mg1 = opt.get_slot(var1, "mg")
+      self.assertEqual(mg1 is not None, centered)
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+
+      # Run 4 steps of RMSProp
+      for _ in range(4):
+        update.run()
+
+        var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy(
+            var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np,
+            learning_rate, rho, momentum, centered)
+        var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy(
+            var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np,
+            learning_rate, rho, momentum, centered)
+
+        # Validate updated params
+        if centered:
+          self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
+          self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
+        self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
+        self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
+        self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
+        self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
+        self.assertAllCloseAccordingToType(var0_np, var0.eval())
+        self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  @parameterized.parameters(_DATA_TYPES)
+  def testWithoutMomentum(self, dtype):
+    with self.test_session(use_gpu=True):
+      var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+      opt = rmsprop.RMSProp(
+          learning_rate=2.0, rho=0.9, momentum=0.0, epsilon=1.0)
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+      # Step 1: the rms accumulators where 1. So we should see a normal
+      # update: v -= grad * learning_rate
+      update.run()
+      # Check the root mean square accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901, 0.901]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001, 0.90001]), rms1.eval())
+      # Check the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901))
+          ]), var0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001))
+          ]), var1.eval())
+      # Step 2: the root mean square accumulators contain the previous update.
+      update.run()
+      # Check the rms accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
+      # Check the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))
+          ]), var0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))
+          ]), var1.eval())
+
+  @parameterized.parameters(_DATA_TYPES)
+  def testWithMomentum(self, dtype):
+    with self.test_session(use_gpu=True):
+      var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+
+      opt = rmsprop.RMSProp(
+          learning_rate=2.0, rho=0.9, momentum=0.5, epsilon=1.0)
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+      # Step 1: rms = 1, mom = 0. So we should see a normal
+      # update: v -= grad * learning_rate
+      update.run()
+      # Check the root mean square accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901, 0.901]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001, 0.90001]), rms1.eval())
+      # Check the momentum accumulators
+      self.assertAllCloseAccordingToType(
+          np.array([(0.1 * 2.0 / math.sqrt(0.901)),
+                    (0.1 * 2.0 / math.sqrt(0.901))]), mom0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([(0.01 * 2.0 / math.sqrt(0.90001)),
+                    (0.01 * 2.0 / math.sqrt(0.90001))]), mom1.eval())
+
+      # Check that the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901))
+          ]), var0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001))
+          ]), var1.eval())
+
+      # Step 2: the root mean square accumulators contain the previous update.
+      update.run()
+      # Check the rms accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)),
+              0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))
+          ]), mom0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)),
+              0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))
+          ]), mom1.eval())
+
+      # Check the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+               (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+               (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)))
+          ]), var0.eval())
+
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+               (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+               (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)))
+          ]), var1.eval())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/sgd.py b/tensorflow/python/keras/optimizer_v2/sgd.py
new file mode 100644
index 0000000000..f5583691f7
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/sgd.py
@@ -0,0 +1,170 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Momentum for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.training import training_ops
+
+
+class SGD(optimizer_v2.OptimizerV2):
+  """Stochastic gradient descent optimizer.
+
+  Includes support for momentum and Nesterov momentum.
+
+  Computes (if `nesterov = False`):
+
+  ```
+  accumulation = momentum * accumulation + gradient
+  variable -= learning_rate * accumulation
+  ```
+
+  Some of the args below are hyperparameters, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Note that in the dense version of this algorithm, `accumulation` is updated
+  and applied regardless of a gradient's value, whereas the sparse version (when
+  the gradient is an `IndexedSlices`, typically because of `tf.gather` or an
+  embedding) only updates variable slices and corresponding `accumulation` terms
+  when that part of the variable was used in the forward pass.
+
+  @compatibility(eager)
+  When eager execution is enabled, learning_rate and momentum can each be a
+  callable that takes no arguments and returns the actual value to use. This
+  can be useful for changing these values across different invocations of
+  optimizer functions.
+  @end_compatibility
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate.
+      momentum: float hyperparameter >= 0 or None. Parameter that accelerates
+        SGD in the relevant direction and dampens oscillations.
+      nesterov: boolean. Whether to apply Nesterov momentum. See [Sutskever et
+        al., 2013](http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). This
+          implementation always computes gradients at the value of the
+          variable(s) passed to the optimizer. Using Nesterov Momentum makes the
+          variable(s) track the values called `theta_t + mu*v_t` in the paper.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to 'SGD'.
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               momentum=None,
+               nesterov=False,
+               name="SGD"):
+    super(SGD, self).__init__(name)
+    self._set_hyper("learning_rate", learning_rate)
+    # Only create momentum variables and use momentum ops if needed.
+    if momentum is not None:
+      self._set_hyper("momentum", momentum)
+      self._use_nesterov = nesterov
+      self._use_momentum = True
+    else:
+      self._use_momentum = False
+
+  def _create_vars(self, var_list, state):
+    if self._use_momentum:
+      for v in var_list:
+        state.zeros_slot(v, "momentum")
+
+  def _apply_dense(self, grad, var, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.apply_momentum(
+          var,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov).op
+    else:
+      return training_ops.apply_gradient_descent(
+          var,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.resource_apply_momentum(
+          var.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov)
+    else:
+      lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
+      return training_ops.resource_apply_gradient_descent(
+          var.handle, lr, grad, use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.sparse_apply_momentum(
+          var,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad.values,
+          grad.indices,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov).op
+    else:
+      return super(SGD, self)._apply_sparse(grad, var, state)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.resource_sparse_apply_momentum(
+          var.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          indices,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov)
+    else:
+      return super(SGD, self)._resource_apply_sparse(grad, var, indices, state)
+
+  def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, state):
+    if self._use_momentum:
+      return super(SGD, self)._resource_apply_sparse_duplicate_indices(
+          grad, var, indices, state)
+    else:
+      lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
+      return resource_variable_ops.resource_scatter_add(var.handle, indices,
+                                                        -grad * lr)
+
+  def _apply_sparse_duplicate_indices(self, grad, var, state):
+    if self._use_momentum:
+      return super(SGD, self)._apply_sparse_duplicate_indices(grad, var, state)
+    else:
+      delta = ops.IndexedSlices(
+          grad.values * state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad.indices, grad.dense_shape)
+      return var.scatter_sub(delta, use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/sgd_test.py b/tensorflow/python/keras/optimizer_v2/sgd_test.py
new file mode 100644
index 0000000000..eb39aac283
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/sgd_test.py
@@ -0,0 +1,759 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Momentum."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import resources
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class GradientDescentOptimizerTest(test.TestCase):
+
+  def testBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        optimizer = sgd.SGD(3.0)
+        sgd_op = optimizer.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+        self.assertEqual(0, len(optimizer.variables()))
+
+  def testBasicResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        sgd_op = sgd.SGD(3.0).apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        # TODO(apassos) calling initialize_resources on all resources here
+        # doesn't work because the sessions and graph are reused across unit
+        # tests and this would mean trying to reinitialize variables. Figure out
+        # a long-term solution for this.
+        resources.initialize_resources([var0, var1]).run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+
+  def testMinimizeResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(var0, x) + var1
+        loss = pred * pred
+        sgd_op = sgd.SGD(1.0).minimize(loss)
+        # TODO(apassos) calling initialize_resources on all resources here
+        # doesn't work because the sessions and graph are reused across unit
+        # tests and this would mean trying to reinitialize variables. Figure out
+        # a long-term solution for this.
+        resources.initialize_resources([var0, var1]).run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
+        np_grad = 2 * np_pred
+        self.assertAllCloseAccordingToType(
+            [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
+
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        pred += var1
+        loss = pred * pred
+        sgd_op = sgd.SGD(1.0).minimize(loss)
+        # TODO(apassos) calling initialize_resources on all resources here
+        # doesn't work because the sessions and graph are reused across unit
+        # tests and this would mean trying to reinitialize variables. Figure out
+        # a long-term solution for this.
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
+        np_grad = 2 * np_pred
+        self.assertAllCloseAccordingToType(
+            [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        lrate = constant_op.constant(3.0)
+        sgd_op = sgd.SGD(lrate).apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+
+  def testGradWrtRef(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        opt = sgd.SGD(3.0)
+        values = [1.0, 3.0]
+        vars_ = [variables.Variable([v], dtype=dtype) for v in values]
+        grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_)
+        variables.global_variables_initializer().run()
+        for grad, _ in grads_and_vars:
+          self.assertAllCloseAccordingToType([1.0], grad.eval())
+
+  def testWithGlobalStep(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        global_step = variables.Variable(0, trainable=False)
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        sgd_op = sgd.SGD(3.0).apply_gradients(
+            zip([grads0, grads1], [var0, var1]), global_step=global_step)
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params and global_step
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+        self.assertAllCloseAccordingToType(1, global_step.eval())
+
+  def testSparseBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
+        var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
+            constant_op.constant([0]), constant_op.constant([2, 1]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant([0.01], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]), constant_op.constant([2, 1]))
+        sgd_op = sgd.SGD(3.0).apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval())
+        self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
+                                           var1.eval())
+
+
+if __name__ == "__main__":
+  test.main()
+
+
+class MomentumOptimizerTest(test.TestCase):
+
+  def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum):
+    var = var + accum * lr * momentum
+    accum = accum * momentum + g
+    var = var - lr * accum
+    var = var - accum * lr * momentum
+    return var, accum
+
+  def doTestBasic(self, use_resource=False, use_callable_params=False):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      if use_resource:
+        var0 = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype, name="var0_%d" % i)
+        var1 = resource_variable_ops.ResourceVariable(
+            [3.0, 4.0], dtype=dtype, name="var1_%d" % i)
+      else:
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+      learning_rate = lambda: 2.0
+      momentum = lambda: 0.9
+      if not use_callable_params:
+        learning_rate = learning_rate()
+        momentum = momentum()
+      mom_opt = sgd.SGD(learning_rate=learning_rate, momentum=momentum)
+      mom_update = mom_opt.apply_gradients(
+          zip([grads0, grads1], [var0, var1]))
+
+      if not context.executing_eagerly():
+        self.evaluate(variables.global_variables_initializer())
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+      # Check we have slots
+      self.assertEqual(["momentum"], mom_opt.get_slot_names())
+      slot0 = mom_opt.get_slot(var0, "momentum")
+      self.assertEquals(slot0.get_shape(), var0.get_shape())
+      slot1 = mom_opt.get_slot(var1, "momentum")
+      self.assertEquals(slot1.get_shape(), var1.get_shape())
+      if not context.executing_eagerly():
+        self.assertFalse(slot0 in variables.trainable_variables())
+        self.assertFalse(slot1 in variables.trainable_variables())
+
+      # Step 1: the momentum accumulators where 0. So we should see a normal
+      # update: v -= grad * learning_rate
+      if not context.executing_eagerly():
+        self.evaluate(mom_update)
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
+                                         self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
+          self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
+          self.evaluate(var1))
+      # Step 2: the momentum accumulators contain the previous update.
+      if context.executing_eagerly():
+        mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      else:
+        self.evaluate(mom_update)
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
+          self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+          self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
+              2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
+          ]), self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([
+              2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
+                  (0.9 * 0.01 + 0.01) * 2.0)
+          ]), self.evaluate(var1))
+
+  def testBasic(self):
+    with self.cached_session():
+      self.doTestBasic(use_resource=False)
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testBasicCallableParams(self):
+    with context.eager_mode():
+      self.doTestBasic(use_resource=True, use_callable_params=True)
+
+  def testVariablesAcrossGraphs(self):
+    optimizer = sgd.SGD(0.01, 0.5)
+    with ops.Graph().as_default():
+      var0 = resource_variable_ops.ResourceVariable(
+          [1.0, 2.0], dtype=dtypes.float32, name="var0")
+      var1 = resource_variable_ops.ResourceVariable(
+          [3.0, 4.0], dtype=dtypes.float32, name="var1")
+      loss = math_ops.reduce_sum(var0 + var1)
+      optimizer.minimize(loss)
+      optimizer_variables = optimizer.variables()
+      self.assertStartsWith(optimizer_variables[0].name, "var0")
+      self.assertStartsWith(optimizer_variables[1].name, "var1")
+      self.assertEquals(2, len(optimizer_variables))
+
+    with ops.Graph().as_default():
+      var2 = resource_variable_ops.ResourceVariable(
+          [1.0, 2.0], dtype=dtypes.float32, name="var2")
+      var3 = resource_variable_ops.ResourceVariable(
+          [3.0, 4.0], dtype=dtypes.float32, name="var3")
+      loss = math_ops.reduce_sum(var2 + var3)
+      optimizer.minimize(loss)
+      optimizer_variables = optimizer.variables()
+      self.assertStartsWith(optimizer_variables[0].name, "var2")
+      self.assertStartsWith(optimizer_variables[1].name, "var3")
+      self.assertEquals(2, len(optimizer_variables))
+
+  def testNesterovMomentum(self):
+    for dtype in [dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        cost = 5 * var0 * var0 + 3 * var1
+        global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64), name="global_step")
+        mom_op = sgd.SGD(learning_rate=2.0, momentum=0.9, nesterov=True)
+        opt_op = mom_op.minimize(cost, global_step, [var0, var1])
+        variables.global_variables_initializer().run()
+        for t in range(1, 5):
+          opt_op.run()
+          var0_np, accum0_np = self._update_nesterov_momentum_numpy(
+              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
+                                                                    accum1_np,
+                                                                    3, 2.0, 0.9)
+          self.assertAllClose(var0_np, var0.eval())
+          self.assertAllClose(var1_np, var1.eval())
+
+  def testSparseNesterovMomentum(self):
+    for dtype in [dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        grads = []
+        for t in range(1, 5):
+          grads.append(var0_np * 10)
+          var0_np, accum0_np = self._update_nesterov_momentum_numpy(
+              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
+                                                                    accum1_np,
+                                                                    3, 2.0, 0.9)
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        loss = 5 * var0 * var0 + 3 * var1
+        mom_op = sgd.SGD(learning_rate=2.0, momentum=0.9, nesterov=True)
+        x_feed = array_ops.placeholder(dtype)
+        y_feed = ops.IndexedSlices(
+            x_feed, constant_op.constant([0, 1]), constant_op.constant([2]))
+        grads_and_vars = [(y_feed, var0), (constant_op.constant(
+            [3.0, 3.0], dtype=dtype), var1)]
+        opt_update = mom_op.apply_gradients(grads_and_vars)
+        variables.global_variables_initializer().run()
+        for t in range(1, 5):
+          opt_update.run(feed_dict={x_feed: grads[t - 1]})
+          var0_np, accum0_np = self._update_nesterov_momentum_numpy(
+              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
+                                                                    accum1_np,
+                                                                    3, 2.0, 0.9)
+          self.assertAllClose(var0_np, var0.eval())
+          self.assertAllClose(var1_np, var1.eval())
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      # This test invokes the ResourceSparseApplyMomentum operation, which
+      # did not have a registered GPU kernel as of April 2018. With graph
+      # execution, the placement algorithm notices this and automatically
+      # places the variable in CPU (host) memory. With eager execution,
+      # the variable would be placed in GPU memory if available, which
+      # would then conflict with the future invocation of the
+      # ResourceSparseApplyMomentum operation.
+      # To work around this discrepancy, for now we force the variable
+      # to be placed on CPU.
+      with ops.device("/cpu:0"):
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+
+      # pylint: disable=cell-var-from-loop
+      def loss():
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        return pred * pred
+      # pylint: enable=cell-var-from-loop
+
+      opt = sgd.SGD(learning_rate=1.0, momentum=0.0)
+      sgd_op = opt.minimize(loss)
+      self.evaluate(variables.global_variables_initializer())
+      # Run 1 step of sgd
+      self.evaluate(sgd_op)
+      # Validate updated params
+      self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
+    # This test invokes the ResourceSparseApplyMomentum operation, which
+    # did not have a registered GPU kernel as of April 2018. With graph
+    # execution, the placement algorithm notices this and automatically
+    # places the variable in CPU (host) memory. With eager execution,
+    # the variable would be placed in GPU memory if available, which
+    # would then conflict with the future invocation of the
+    # ResourceSparseApplyMomentum operation.
+    # To work around this discrepancy, for now we force the variable
+    # to be placed on CPU.
+    with ops.device("/cpu:0"):
+      var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2]))
+
+    def loss():
+      return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]]))
+
+    opt = sgd.SGD(learning_rate=1.0, momentum=0.0)
+    sgd_op = opt.minimize(loss)
+    self.evaluate(variables.global_variables_initializer())
+    self.evaluate(sgd_op)
+    self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0))
+
+  def testTensorLearningRateAndMomentum(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        mom_opt = sgd.SGD(
+            learning_rate=constant_op.constant(2.0),
+            momentum=constant_op.constant(0.9))
+        mom_update = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Check we have slots
+        self.assertEqual(["momentum"], mom_opt.get_slot_names())
+        slot0 = mom_opt.get_slot(var0, "momentum")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        self.assertFalse(slot0 in variables.trainable_variables())
+        slot1 = mom_opt.get_slot(var1, "momentum")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+        self.assertFalse(slot1 in variables.trainable_variables())
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Step 1: the momentum accumulators where 0. So we should see a normal
+        # update: v -= grad * learning_rate
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval())
+        self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval())
+        # Step 2: the momentum accumulators contain the previous update.
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([
+                1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
+                2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
+            ]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([
+                2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
+                    (0.9 * 0.01 + 0.01) * 2.0)
+            ]), var1.eval())
+
+  def _dbParamsMom01(self):
+    """Return dist-belief momentum values.
+
+    Return values been generated from the dist-belief momentum unittest,
+    running with a learning rate of 0.1 and a momentum of 0.1.
+
+    These values record how a parameter vector of size 10, initialized with 0.0,
+    gets updated with 10 consecutive momentum steps.  It uses random gradients.
+
+    Returns:
+      db_grad: The gradients to apply
+      db_out: The parameters after the momentum update.
+    """
+    db_grad = [[]] * 10
+    db_out = [[]] * 10
+    # pylint: disable=line-too-long
+    db_grad[0] = [
+        0.00096264342, 0.17914793, 0.93945462, 0.41396621, 0.53037018,
+        0.93197989, 0.78648776, 0.50036013, 0.55345792, 0.96722615
+    ]
+    db_out[0] = [
+        -9.6264346e-05, -0.017914793, -0.093945466, -0.041396622, -0.053037018,
+        -0.093197994, -0.078648776, -0.050036013, -0.055345792, -0.096722618
+    ]
+    db_grad[1] = [
+        0.17075552, 0.88821375, 0.20873757, 0.25236958, 0.57578111, 0.15312378,
+        0.5513742, 0.94687688, 0.16012503, 0.22159521
+    ]
+    db_out[1] = [
+        -0.017181443, -0.10852765, -0.12421377, -0.070773244, -0.11591884,
+        -0.11783017, -0.14165108, -0.14972731, -0.076892875, -0.1285544
+    ]
+    db_grad[2] = [
+        0.35077485, 0.47304362, 0.44412705, 0.44368884, 0.078527533, 0.81223965,
+        0.31168157, 0.43203235, 0.16792089, 0.24644311
+    ]
+    db_out[2] = [
+        -0.053967446, -0.1648933, -0.1716533, -0.1180798, -0.13005978,
+        -0.20151734, -0.17911947, -0.20289968, -0.095839672, -0.15638189
+    ]
+    db_grad[3] = [
+        0.9694621, 0.75035888, 0.28171822, 0.83813518, 0.53807181, 0.3728098,
+        0.81454384, 0.03848977, 0.89759839, 0.93665648
+    ]
+    db_out[3] = [
+        -0.15459226, -0.24556576, -0.20456907, -0.20662397, -0.18528105,
+        -0.24716705, -0.2643207, -0.21206589, -0.18749419, -0.2528303
+    ]
+    db_grad[4] = [
+        0.38578293, 0.8536852, 0.88722926, 0.66276771, 0.13678469, 0.94036359,
+        0.69107032, 0.81897682, 0.5433259, 0.67860287
+    ]
+    db_out[4] = [
+        -0.20323303, -0.33900154, -0.29658359, -0.28175515, -0.20448165,
+        -0.34576839, -0.34194785, -0.29488021, -0.25099224, -0.33033544
+    ]
+    db_grad[5] = [
+        0.27885768, 0.76100707, 0.24625534, 0.81354135, 0.18959245, 0.48038563,
+        0.84163809, 0.41172323, 0.83259648, 0.44941229
+    ]
+    db_out[5] = [
+        -0.23598288, -0.42444581, -0.33041057, -0.3706224, -0.22536094,
+        -0.40366709, -0.43387437, -0.34433398, -0.34060168, -0.38302717
+    ]
+    db_grad[6] = [
+        0.27233034, 0.056316052, 0.5039115, 0.24105175, 0.35697976, 0.75913221,
+        0.73577434, 0.16014607, 0.57500273, 0.071136251
+    ]
+    db_out[6] = [
+        -0.26649091, -0.43862185, -0.38418442, -0.40361428, -0.26314685,
+        -0.48537019, -0.51664448, -0.36529395, -0.40706289, -0.39540997
+    ]
+    db_grad[7] = [
+        0.58697265, 0.2494842, 0.08106143, 0.39954534, 0.15892942, 0.12683646,
+        0.74053431, 0.16033, 0.66625422, 0.73515922
+    ]
+    db_out[7] = [
+        -0.32823896, -0.46498787, -0.39766794, -0.446868, -0.28281838,
+        -0.50622416, -0.59897494, -0.38342294, -0.48033443, -0.47016418
+    ]
+    db_grad[8] = [
+        0.8215279, 0.41994119, 0.95172721, 0.68000203, 0.79439718, 0.43384039,
+        0.55561525, 0.22567581, 0.93331909, 0.29438227
+    ]
+    db_out[8] = [
+        -0.41656655, -0.50961858, -0.49418902, -0.51919359, -0.36422527,
+        -0.55169362, -0.6627695, -0.40780342, -0.58099347, -0.50707781
+    ]
+    db_grad[9] = [
+        0.68297005, 0.67758518, 0.1748755, 0.13266537, 0.70697063, 0.055731893,
+        0.68593478, 0.50580865, 0.12602448, 0.093537711
+    ]
+    db_out[9] = [
+        -0.49369633, -0.58184016, -0.52132869, -0.5396927, -0.44306302,
+        -0.56181377, -0.73774242, -0.46082234, -0.60366184, -0.52012295
+    ]
+    # pylint: enable=line-too-long
+    return db_grad, db_out
+
+  def testLikeDistBeliefMom01(self):
+    with self.cached_session():
+      db_grad, db_out = self._dbParamsMom01()
+      num_samples = len(db_grad)
+      var0 = variables.Variable([0.0] * num_samples)
+      grads0 = constant_op.constant([0.0] * num_samples)
+      mom_opt = sgd.SGD(learning_rate=0.1, momentum=0.1)
+      mom_update = mom_opt.apply_gradients(zip([grads0], [var0]))
+      variables.global_variables_initializer().run()
+      for i in xrange(num_samples):
+        mom_update.run(feed_dict={grads0: db_grad[i]})
+        self.assertAllClose(np.array(db_out[i]), var0.eval())
+
+  def testSparse(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype))
+        var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2]))
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(
+                [[.1, .1]], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([4, 2]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(
+                [[.01, .01], [.01, .01]], dtype=dtype),
+            constant_op.constant([2, 3]),
+            constant_op.constant([4, 2]))
+        mom_opt = sgd.SGD(learning_rate=2.0, momentum=0.9)
+        mom_update = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        # Check we have slots
+        self.assertEqual(["momentum"], mom_opt.get_slot_names())
+        slot0 = mom_opt.get_slot(var0, "momentum")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        slot1 = mom_opt.get_slot(var1, "momentum")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+
+        # Fetch params to validate initial values
+        self.assertAllClose([0, 0], var0.eval()[0])
+        self.assertAllClose([0, 0], var0.eval()[1])
+        self.assertAllClose([1, 1], var1.eval()[2])
+
+        # Step 1: the momentum accumulators are 0. So we should see a normal
+        # update: v -= grad * learning_rate
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(np.array([0, 0]), slot0.eval()[0])
+        self.assertAllCloseAccordingToType(np.array([.1, .1]), slot0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([.01, .01]), slot1.eval()[2])
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(np.array([0, 0]), var0.eval()[0])
+        self.assertAllCloseAccordingToType(
+            np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), var0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), var1.eval()[2])
+        # Step 2: the momentum accumulators contain the previous update.
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllClose(np.array([0, 0]), slot0.eval()[0])
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+            slot1.eval()[2])
+        # Check that the parameters have been updated.
+        self.assertAllClose(np.array([0, 0]), var0.eval()[0])
+        self.assertAllCloseAccordingToType(
+            np.array([
+                -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), -(0.1 * 2.0) - (
+                    (0.9 * 0.1 + 0.1) * 2.0)
+            ]), var0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([
+                0.98 - ((0.9 * 0.01 + 0.01) * 2.0), 0.98 - (
+                    (0.9 * 0.01 + 0.01) * 2.0)
+            ]), var1.eval()[2])
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        mom_opt = sgd.SGD(learning_rate=2.0, momentum=0.9)
+        mom_update1 = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        mom_update2 = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        self.assertEqual(["momentum"], mom_opt.get_slot_names())
+        slot0 = mom_opt.get_slot(var0, "momentum")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        slot1 = mom_opt.get_slot(var1, "momentum")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Step 1: the momentum accumulators where 0. So we should see a normal
+        # update: v -= grad * learning_rate
+        mom_update1.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval())
+        self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval())
+        # Step 2: the second momentum accumulators contain the previous update.
+        mom_update2.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([
+                1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
+                2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
+            ]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([
+                2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
+                    (0.9 * 0.01 + 0.01) * 2.0)
+            ]), var1.eval())
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 3f54f1f60413cbd3e9a5a4126f8ae04bc4e06abc Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Fri, 5 Oct 2018 12:45:56 -0700
Subject: [PATCH 1198/1357] Workaround build errors in Android NDK r14b.

PiperOrigin-RevId: 215950376
---
 tensorflow/tools/ci_build/Dockerfile.android | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/tools/ci_build/Dockerfile.android b/tensorflow/tools/ci_build/Dockerfile.android
index dcf077791a..7e72eb0cbf 100644
--- a/tensorflow/tools/ci_build/Dockerfile.android
+++ b/tensorflow/tools/ci_build/Dockerfile.android
@@ -45,9 +45,14 @@ ENV ANDROID_NDK_FILENAME android-ndk-r14b-linux-x86_64.zip
 ENV ANDROID_NDK_URL https://dl.google.com/android/repository/${ANDROID_NDK_FILENAME}
 ENV ANDROID_NDK_HOME ${ANDROID_DEV_HOME}/ndk
 ENV PATH ${PATH}:${ANDROID_NDK_HOME}
+# Workaround for b/117156972: inject missing #include into NDK versions of
+# futex.h.
 RUN cd ${ANDROID_DEV_HOME} && \
     wget -q ${ANDROID_NDK_URL} && \
     unzip ${ANDROID_NDK_FILENAME} -d ${ANDROID_DEV_HOME} && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-arm/usr/include/linux/futex.h && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-mips/usr/include/linux/futex.h && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-x86/usr/include/linux/futex.h && \
     rm ${ANDROID_NDK_FILENAME} && \
     bash -c "ln -s ${ANDROID_DEV_HOME}/android-ndk-* ${ANDROID_NDK_HOME}"
 
-- 
GitLab


From 3427a3c638fb92a172d390266ed62403f9140f7d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:52:22 -0700
Subject: [PATCH 1199/1357] Internal change.

PiperOrigin-RevId: 215951354
---
 tensorflow/contrib/lite/kernels/BUILD        | 1 +
 tensorflow/contrib/lite/kernels/lstm_eval.cc | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 68636fb070..d2d8073abd 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -259,6 +259,7 @@ cc_library(
     srcs = ["lstm_eval.cc"],
     hdrs = ["lstm_eval.h"],
     deps = [
+        ":op_macros",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite/kernels/internal:kernel_utils",
         "//tensorflow/contrib/lite/kernels/internal:tensor_utils",
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
index c6c21eb085..20a4e30009 100644
--- a/tensorflow/contrib/lite/kernels/lstm_eval.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
 namespace ops {
@@ -599,6 +600,7 @@ TfLiteStatus EvalFloat(
     const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
     TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
     TfLiteTensor* cell_state, TfLiteTensor* output) {
+  TF_LITE_ASSERT(input->dims->size >= 2 && input->dims->size <= 3);
   const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
   const int n_batch = input->dims->data[input->dims->size - 2];
   const int n_input = input->dims->data[input->dims->size - 1];
@@ -716,6 +718,7 @@ TfLiteStatus EvalHybrid(
     TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
     TfLiteTensor* output_state, TfLiteTensor* cell_state,
     TfLiteTensor* output) {
+  TF_LITE_ASSERT(input->dims->size >= 2 && input->dims->size <= 3);
   const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
   const int n_batch = input->dims->data[input->dims->size - 2];
   const int n_input = input->dims->data[input->dims->size - 1];
-- 
GitLab


From ec451f5ab43467d7cb4ae7736f2de16331441e0b Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Fri, 5 Oct 2018 12:53:50 -0700
Subject: [PATCH 1200/1357] Break up build --define <option_name>=true into two
 steps: 1) define bazel config    build:<bazel_config_name> --define
 <option_name>s=true 2) set the config    build --config=<bazel_config_name>

PiperOrigin-RevId: 215951614
---
 configure.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 65b4622995..89dc79b6b6 100644
--- a/configure.py
+++ b/configure.py
@@ -383,7 +383,9 @@ def set_build_var(environ_cp,
   var = str(int(get_var(environ_cp, var_name, query_item, enabled_by_default)))
   environ_cp[var_name] = var
   if var == '1':
-    write_to_bazelrc('build --define %s=true' % option_name)
+    write_to_bazelrc(
+        'build:%s --define %s=true' % (bazel_config_name, option_name))
+    write_to_bazelrc('build --config=%s' % bazel_config_name)
   elif bazel_config_name is not None:
     # TODO(mikecase): Migrate all users of configure.py to use --config Bazel
     # options and not to set build configs through environment variables.
-- 
GitLab


From f14287eabf69c57a2d2e044c311f2db1413cb6a5 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Fri, 5 Oct 2018 13:24:34 -0700
Subject: [PATCH 1201/1357] Copy device from If op to the lowered ops. Enable
 GPU tests for cond_v2.

PiperOrigin-RevId: 215956220
---
 tensorflow/core/common_runtime/lower_if_op.cc |  9 +++-
 tensorflow/python/kernel_tests/BUILD          |  3 +-
 .../python/kernel_tests/cond_v2_test.py       | 49 +++++++++----------
 .../kernel_tests/control_flow_ops_py_test.py  |  5 --
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc
index a02084f223..9306386117 100644
--- a/tensorflow/core/common_runtime/lower_if_op.cc
+++ b/tensorflow/core/common_runtime/lower_if_op.cc
@@ -107,6 +107,8 @@ CondBuilder::CondBuilder(Node* if_op, const string& then_fn_name,
       then_call_builder_(NewName("then"), then_fn_name, graph->op_registry()),
       else_call_builder_(NewName("else"), else_fn_name, graph->op_registry()) {
   TF_CHECK_OK(if_op_->input_node(0, &pred_));
+  then_call_builder_.Device(if_op_->requested_device());
+  else_call_builder_.Device(if_op_->requested_device());
 }
 
 Status CondBuilder::CreatePivotNodes() {
@@ -117,15 +119,18 @@ Status CondBuilder::CreatePivotNodes() {
       NodeBuilder(NewName("switch_pred"), "Switch", graph_->op_registry())
           .Input(NodeOut(pred_, 0))
           .Input(NodeOut(pred_, 0))
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &switch_pred));
   control_predecessor_ = switch_pred;
   TF_RETURN_IF_ERROR(
       NodeBuilder(NewName("pivot_f"), "Identity", graph_->op_registry())
           .Input(switch_pred, kElseBranch)
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &pivot_f_));
   TF_RETURN_IF_ERROR(
       NodeBuilder(NewName("pivot_t"), "Identity", graph_->op_registry())
           .Input(switch_pred, kThenBranch)
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &pivot_t_));
   return Status::OK();
 }
@@ -140,6 +145,7 @@ Status CondBuilder::AddInput(Node* src, int src_output) {
       NodeBuilder(NewName(src->name()), "Switch", graph_->op_registry())
           .Input(src, src_output)
           .Input(pred_, 0)
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &input));
   then_call_builder_.Input(input, kThenBranch);
   else_call_builder_.Input(input, kElseBranch);
@@ -178,6 +184,7 @@ Status CondBuilder::AddOutputs() {
     TF_RETURN_IF_ERROR(
         NodeBuilder(graph_->NewName("merge"), "Merge", graph_->op_registry())
             .Input({NodeOut(then_call_node_, i), NodeOut(else_call_node_, i)})
+            .Device(if_op_->requested_device())
             .Finalize(graph_, &merges[i]));
     outputs_[i] = NodeOut(merges[i], 0);
   }
@@ -218,7 +225,7 @@ Status InlineCallInGraph(Node* n, const FunctionLibraryDefinition& flib,
 Status CondBuilder::BuildLoweredIfOutput() {
   // Build the identity node output.
   NodeBuilder ib(name_, "IdentityN");
-  ib.Input(outputs_);
+  ib.Input(outputs_).Device(if_op_->requested_device());
   return ib.Finalize(graph_, &lowered_if_output_);
 }
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index e055ef1c1b..4e8639dfc8 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3255,7 +3255,7 @@ tf_py_test(
     tags = ["no_pip"],
 )
 
-tf_py_test(
+cuda_py_test(
     name = "cond_v2_test",
     size = "medium",
     srcs = ["cond_v2_test.py"],
@@ -3272,7 +3272,6 @@ tf_py_test(
         "//tensorflow/python:training",
     ],
     grpc_enabled = True,
-    tags = ["no_gpu"],  # TODO(b/111656070)
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 377c041675..ec875aae59 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -172,7 +172,7 @@ class CondV2Test(test.TestCase):
     self._testCond(true_fn, false_fn, [y])
 
   def testNestedDefunInCond(self):
-    self.skipTest("b/110550782")
+    self.skipTest("b/117284369")
 
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
@@ -198,7 +198,7 @@ class CondV2Test(test.TestCase):
     self._testCond(true_fn, false_fn, [y])
 
   def testDoubleNestedDefunInCond(self):
-    self.skipTest("b/110550782")
+    self.skipTest("b/117284369")
 
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
@@ -468,7 +468,6 @@ class CondV2Test(test.TestCase):
             }), [5., 0.])
 
   def testBuildCondAndGradientInsideDefun(self):
-    self.skipTest("b/110550782")
 
     def build_graph():
       pred_outer = array_ops.placeholder(dtypes.bool, name="pred_outer")
@@ -502,29 +501,29 @@ class CondV2Test(test.TestCase):
 
       return grads, pred_outer, pred_inner
 
-    with ops.Graph().as_default():
+    with ops.Graph().as_default(), self.session(
+        graph=ops.get_default_graph()) as sess:
       grads, pred_outer, pred_inner = build_graph()
-      with self.session(graph=ops.get_default_graph()) as sess:
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: True,
-                pred_inner: True
-            }), [0., 0.])
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: True,
-                pred_inner: False
-            }), [0., 0.])
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: False,
-                pred_inner: True
-            }), [4., 2.])
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: False,
-                pred_inner: False
-            }), [5., 0.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: True,
+              pred_inner: True
+          }), [0., 0.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: True,
+              pred_inner: False
+          }), [0., 0.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: False,
+              pred_inner: True
+          }), [4., 2.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: False,
+              pred_inner: False
+          }), [5., 0.])
 
   def testSecondDerivative(self):
     with self.cached_session() as sess:
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index c7e89dd5f9..7fae5249aa 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -23,7 +23,6 @@ from __future__ import print_function
 import collections
 import math
 import time
-import unittest
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
@@ -661,7 +660,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
       sess.run(r)
 
-  @test_util.disable_control_flow_v2("b/113346829 (gpu failure)")
   def testCondGrad_1(self):
     graph = ops.Graph()
     with graph.as_default():
@@ -3424,9 +3422,6 @@ class EagerTest(test.TestCase):
 
   # TODO(b/117279927): Re-enable once msan failure is fixed.
   def DISABLED_testCondInDefun(self):
-    if "GPU" in [d.device_type for d in device_lib.list_local_devices()]:
-      return unittest.skip("b/113346829 (gpu failure)")
-
     with context.eager_mode():
 
       @eager_function.defun
-- 
GitLab


From 0c37dcc02f54395d2bde3cc5850574c8f98f1b46 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Fri, 5 Oct 2018 13:32:24 -0700
Subject: [PATCH 1202/1357] [XLA] Use the highest possible precision for large
 Iota inputs.

PiperOrigin-RevId: 215957327
---
 tensorflow/compiler/xla/tests/convolution_test.cc | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index 070b092d18..b851db14ec 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -91,7 +91,14 @@ class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest {
     XlaBuilder builder(TestName());
     auto lhs = ConstantR4FromArray4D<T>(&builder, *alhs);
     auto rhs = ConstantR4FromArray4D<T>(&builder, *arhs);
-    Conv(lhs, rhs, {1, 1}, Padding::kValid);
+    PrecisionConfig precision;
+    // The left hand side of the convolution is numbers between 0 and 2304 which
+    // requires at least 11 mantissa bits and the DEFAULT precision config is
+    // allowed to round to bfloat16 which only has 7 mantissa bits.
+    precision.add_operand_precision(PrecisionConfig::HIGHEST);
+    precision.add_operand_precision(PrecisionConfig::DEFAULT);
+    Conv(lhs, rhs, {1, 1}, Padding::kValid, /*feature_group_count=*/1,
+         &precision);
 
     ComputeAndCompare(&builder, {}, error_spec_);
   }
-- 
GitLab


From 4d69a79b1ebd0c2180959c1047fbc9db106701e1 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 5 Oct 2018 13:33:38 -0700
Subject: [PATCH 1203/1357] Handle Range & BatchMatMul in partial Flex mode

PiperOrigin-RevId: 215957535
---
 .../contrib/lite/toco/import_tensorflow.cc    | 37 ++++++++-
 tensorflow/contrib/lite/toco/model.h          |  9 +-
 tensorflow/contrib/lite/toco/tflite/export.cc | 83 +++++++++++--------
 .../contrib/lite/toco/tflite/export_test.cc   | 34 ++++++++
 .../contrib/lite/toco/tflite/operator.cc      | 32 ++++---
 .../contrib/lite/toco/tflite/operator.h       |  6 ++
 6 files changed, 155 insertions(+), 46 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 5eaf6e27fc..133ef79a34 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -477,6 +477,30 @@ string CreateConstArray(Model* model, string const& name,
   return array_name;
 }
 
+// Retain TensorFlow NodeDef in Toco Operator.
+//
+// If an op is supported by Toco but not supported by TFLite, TFLite exporter
+// will use the retained NodeDef to populate a Flex op when Flex mode is
+// enabled.
+//
+// This can't be easily applied to all operations, because a TensorFlow node
+// may become multiple Toco operators. Thus we need to call this function in
+// operator conversion functions one by one whenever feasible.
+//
+// This may cause problems if a graph transformation rule changes parameters
+// of the node. When calling this function, please check if any existing
+// graph transformation rule will change an existing operator with the same
+// type.
+//
+// This provides a route to handle Toco-supported & TFLite-unsupported ops
+// in Flex mode. However it's not a solid solution. Eventually we should
+// get rid of this.
+// TODO(b/117327937): Implement all Toco-supported ops in TFLite, and remove
+// this function.
+void RetainTensorFlowNodeDef(const NodeDef& node, Operator* op) {
+  node.SerializeToString(&op->tensorflow_node_def);
+}
+
 tensorflow::Status ConvertConstOperator(
     const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
     Model* model) {
@@ -990,6 +1014,10 @@ tensorflow::Status ConvertBatchMatMulOperator(
   auto* batch_matmul = new BatchMatMulOperator;
   batch_matmul->inputs = {node.input(0), node.input(1)};
   batch_matmul->outputs = {node.name()};
+
+  // For Flex mode. Please read the comments of the function.
+  RetainTensorFlowNodeDef(node, batch_matmul);
+
   model->operators.emplace_back(batch_matmul);
   return tensorflow::Status::OK();
 }
@@ -1081,7 +1109,10 @@ tensorflow::Status ConvertUnsupportedOperator(
 
   auto* op = new TensorFlowUnsupportedOperator;
   op->tensorflow_op = node.op();
-  node.SerializeToString(&op->tensorflow_node_def);
+
+  // For Flex mode. Please read the comments of the function.
+  RetainTensorFlowNodeDef(node, op);
+
   model->operators.emplace_back(op);
 
   // Parse inputs.
@@ -1605,6 +1636,10 @@ tensorflow::Status ConvertRangeOperator(
   op->inputs.push_back(node.input(1));
   op->inputs.push_back(node.input(2));
   op->outputs.push_back(node.name());
+
+  // For Flex mode. Please read the comments of the function.
+  RetainTensorFlowNodeDef(node, op);
+
   model->operators.emplace_back(op);
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 6e207fdf54..61f1f095e9 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -376,6 +376,13 @@ struct Operator {
   // looks unused.
   bool unresolved_outputs = false;
 
+  // A serialized tensorflow::NodeDef string.
+  // The field is filled only when importing from TensorFlow.
+  // It's guaranteed to be filled for `TensorFlowUnsupportedOperator`.
+  // It's not guaranteed to be filled for other ops. Ops created by graph
+  // transformations won't have TensorFlow NodeDef.
+  string tensorflow_node_def;
+
  protected:
   // Constructor used by subclasses for specific OperatorType's.
   explicit Operator(OperatorType t)
@@ -1535,8 +1542,6 @@ struct TensorFlowUnsupportedOperator : Operator {
 
   // The original TF operation type. Used for diagnostic purposes.
   string tensorflow_op;
-  // A serialized tensorflow::NodeDef string.
-  string tensorflow_node_def;
   // A boolean indicating if the unsupported op should be treated as quantized.
   bool quantized = false;
   // A boolean indicating if the unsupported op output should allow float values
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index f6f76e48a4..3b34cd6285 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -95,11 +95,13 @@ OperatorKey GetOperatorKey(
     const ::toco::Operator& op,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
     bool allow_flex_ops) {
+  // Get the op name (by Toco definition).
   string name = HelpfulOperatorTypeName(op);
-  const auto& builtin_ops = GetBuiltinOpsMap();
 
   bool is_builtin = false;
   OperatorKey key;
+
+  const auto& builtin_ops = GetBuiltinOpsMap();
   if (ops_by_type.count(op.type) != 0) {
     key.version = ops_by_type.at(op.type)->GetVersion(op);
     name = ops_by_type.at(op.type)->name();
@@ -110,37 +112,46 @@ OperatorKey GetOperatorKey(
     // For TFLite supported builtin ops, find out its BuiltinOperator enum used
     // in FlatBuffer.
     key.type = builtin_ops.at(name);
-  } else {
-    key.type = BuiltinOperator_CUSTOM;
-
-    key.is_custom_op = true;
-    if (op.type == OperatorType::kUnsupported) {
-      const TensorFlowUnsupportedOperator& unsupported_op =
-          static_cast<const TensorFlowUnsupportedOperator&>(op);
-      const auto tensorflow_op = unsupported_op.tensorflow_op;
-
-      // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
-      // to populate a regular custom op. We need to find a way to fix this.
-      if (allow_flex_ops) {
-        // Memorize the original TensorFlow op name.
-        key.flex_tensorflow_op = tensorflow_op;
-        // Prefix the custom code of the flex op.
-        key.custom_code =
-            string(::tflite::kFlexCustomCodePrefix) + tensorflow_op;
-        key.is_flex_op = true;
-
-        if (IsControlFlowOp(tensorflow_op)) {
-          key.is_unsupported_flex_op = true;
-        }
-      } else {
-        key.custom_code = tensorflow_op;
-      }
+    return key;
+  }
+
+  // The logic below is all for custom ops.
+  key.is_custom_op = true;
+  key.type = BuiltinOperator_CUSTOM;
+
+  if (op.type == OperatorType::kUnsupported) {
+    const TensorFlowUnsupportedOperator& unsupported_op =
+        static_cast<const TensorFlowUnsupportedOperator&>(op);
+    const auto tensorflow_op = unsupported_op.tensorflow_op;
+
+    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
+    // to populate a regular custom op. We need to find a way to fix this.
+    if (allow_flex_ops) {
+      key.is_flex_op = true;
+      key.flex_tensorflow_op = tensorflow_op;
+      key.custom_code =
+          string(::tflite::kFlexCustomCodePrefix) + key.flex_tensorflow_op;
     } else {
-      // For Toco-supported/TFLite-unsupported ops, currently we produce a
-      // custom op. This gives developers a chance to implement custom ops.
-      // TODO(b/116800229): Also produce Toco-supported/TFLite-unsupported ops
-      // as Flex ops when Flex mode is enabled.
-      key.custom_code = name;
+      key.custom_code = tensorflow_op;
+    }
+  } else if (allow_flex_ops && !op.tensorflow_node_def.empty()) {
+    // For Toco-supported/TFLite-unsupported ops, if the TensorFlow NodeDef
+    // is retained in the Toco Operator, we produce a Flex op if Flex mode
+    // is enabled.
+    key.is_flex_op = true;
+    key.flex_tensorflow_op = name;
+    key.custom_code =
+        string(::tflite::kFlexCustomCodePrefix) + key.flex_tensorflow_op;
+  } else {
+    // If Flex is disabled or the original TensorFlow NodeDef isn't available,
+    // we produce a custom op. This gives developers a chance to implemenr
+    // custom ops.
+    key.custom_code = name;
+  }
+
+  if (key.is_flex_op) {
+    if (IsControlFlowOp(key.flex_tensorflow_op)) {
+      key.is_unsupported_flex_op = true;
     }
   }
   return key;
@@ -323,8 +334,9 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
       outputs.push_back(tensors_map.at(output));
     }
 
-    int op_index = operators_map.at(
-        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
+    const auto key =
+        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
+    int op_index = operators_map.at(key);
 
     auto tflite_op_it = ops_by_type.find(op->type);
     BaseOperator* tflite_op = tflite_op_it == ops_by_type.end()
@@ -349,6 +361,11 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
           variable_tensor_indices->insert(variable_tensor_index);
         }
       }
+    } else if (key.is_flex_op && !op->tensorflow_node_def.empty()) {
+      auto fbb = WriteFlexOpOptions(op->tensorflow_node_def);
+      if (fbb) {
+        options = Options::Custom(builder->CreateVector(fbb->GetBuffer()));
+      }
     }
     // The only supported CustomOptionFormat is FLEXBUFFERS now.
     op_vector.push_back(CreateOperator(
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index d48ab78285..eda1aa78a3 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/tflite/builtin_operator.h"
 #include "tensorflow/contrib/lite/toco/tflite/operator.h"
 #include "tensorflow/contrib/lite/toco/tflite/types.h"
+#include "tensorflow/core/framework/node_def.pb.h"
 
 namespace toco {
 namespace tflite {
@@ -382,6 +383,39 @@ TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
   EXPECT_TRUE(key.is_unsupported_flex_op);
 }
 
+TEST(OperatorKeyTest, TestFlexWithPartiallySupportedOps) {
+  // Test Toco-supported/TFLite-unsupported operators.
+  // TODO(ycling): The test will be broken if Range is implemented in TFLite.
+  // Find a more robust way to test the fallback logic.
+  auto op = absl::make_unique<RangeOperator>();
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+
+  {
+    // If NodeDef isn't retained in the Toco op, a regular custom op
+    // will be exported.
+    const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "Range");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_FALSE(key.is_flex_op);
+  }
+
+  ::tensorflow::NodeDef node_def;
+  node_def.set_name("Range");
+  node_def.set_op("Range");
+  node_def.SerializeToString(&op->tensorflow_node_def);
+
+  {
+    // If NodeDef is retained in the Toco op, a Flex op will be exported.
+    const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "FlexRange");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_TRUE(key.is_flex_op);
+  }
+}
+
 // TODO(ahentz): tests for tensors, inputs, outputs, opcodes and operators.
 
 }  // namespace
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 9addbb81e7..ed37535fe0 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -1157,6 +1157,25 @@ class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
   int GetVersion(const Operator& op) const override { return 1; }
 };
 
+std::unique_ptr<flexbuffers::Builder> WriteFlexOpOptions(
+    const string& tensorflow_node_def) {
+  auto fbb = absl::make_unique<flexbuffers::Builder>();
+
+  ::tensorflow::NodeDef node_def;
+  if (!node_def.ParseFromString(tensorflow_node_def)) {
+    LOG(ERROR) << "Failed to parse TensorFlow NodeDef";
+    return {};
+  }
+
+  fbb->Vector([&]() {
+    fbb->String(node_def.op());
+    fbb->String(tensorflow_node_def);
+  });
+  fbb->Finish();
+  LOG(INFO) << "Writing flex op: " << node_def.op();
+  return std::unique_ptr<flexbuffers::Builder>(fbb.release());
+}
+
 class TensorFlowUnsupported : public BaseOperator {
  public:
   TensorFlowUnsupported(const string& name, OperatorType type,
@@ -1192,6 +1211,9 @@ class TensorFlowUnsupported : public BaseOperator {
 
   std::unique_ptr<flexbuffers::Builder> WriteOptions(
       const TensorFlowUnsupportedOperator& op) const {
+    if (allow_flex_ops_) {
+      return WriteFlexOpOptions(op.tensorflow_node_def);
+    }
     auto fbb = absl::make_unique<flexbuffers::Builder>();
 
     ::tensorflow::NodeDef node_def;
@@ -1200,16 +1222,6 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
-    if (allow_flex_ops_) {
-      fbb->Vector([&]() {
-        fbb->String(node_def.op());
-        fbb->String(op.tensorflow_node_def);
-      });
-      fbb->Finish();
-      LOG(INFO) << "Writing flex op: " << node_def.op();
-      return std::unique_ptr<flexbuffers::Builder>(fbb.release());
-    }
-
     bool has_valid_attr = false;
     size_t map_start = fbb->StartMap();
     for (const auto& pair : node_def.attr()) {
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 13d9f6c49a..6e4e0a16d1 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -16,6 +16,7 @@ limitations under the License.
 #define TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_OPERATOR_H_
 
 #include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
 #include "tensorflow/contrib/lite/schema/schema_generated.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 
@@ -36,6 +37,11 @@ std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
     bool allow_flex_ops = false);
 
+// Write the custom option FlexBuffer with a serialized TensorFlow NodeDef
+// for a Flex op.
+std::unique_ptr<flexbuffers::Builder> WriteFlexOpOptions(
+    const string& tensorflow_node_def);
+
 // These are the flatbuffer types for custom and builtin options.
 using CustomOptions = flatbuffers::Vector<uint8_t>;
 using BuiltinOptions = void;
-- 
GitLab


From efcf11cd44dfe8ddc441aa58f1b21ff7c8444568 Mon Sep 17 00:00:00 2001
From: shengfuintel <sheng.fu@intel.com>
Date: Fri, 5 Oct 2018 13:47:52 -0700
Subject: [PATCH 1204/1357] Clean up the code under INTEL_MKL_ML_ONLY

---
 tensorflow/core/graph/mkl_layout_pass.cc      | 2177 +----------------
 tensorflow/core/graph/mkl_layout_pass_test.cc | 1865 --------------
 2 files changed, 1 insertion(+), 4041 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 7394b1cddf..42a35727db 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -45,2181 +45,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-#ifdef INTEL_MKL_ML_ONLY
-
-// This pass implements rewriting of graph to support following scenarios:
-// (A) Merging nodes in the graph
-// (B) Rewriting a node in the graph to a new node
-//     Rewrite happens under following 2 scenarios:
-//     1) Propagating Mkl layout as an additional output tensor
-//        (we will loosely call a tensor that carries Mkl layout as Mkl tensor
-//         henceforth.) from every Mkl supported NN layer.
-//     2) Context-based rewrite: This is needed in order to optimize
-//        gradient ops of Conv2D+AddBias. Gradient op of both the Conv2D and
-//        MatMul is BiasAddGrad, and we need to rewrite BiasAddGrad into
-//        Conv2D-specific BiasAddGrad, and MatMul-specific BiasAddGrad.
-//        This is context-specific optimization, where the context is the
-//        forward operator that the BiasAddGrad corresponds to.
-//
-// Example of A : Merging nodes in the graph
-// -----------------------------------------
-// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as:
-//
-//           O = Conv2D(A, B)
-//           P = BiasAdd(O, C)
-//
-// We merge them into Conv2DWithBias as:
-//           P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m)
-//
-// The meaning of A_m, B_m and C_m is explained in B.1.
-//
-// Merge rules:
-//  - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_
-//    goes to BiasAdd.
-//  - Also, the intersection of attributes of both the nodes must have same
-//    values.
-//  - Both the nodes must have been assigned to same device (if any).
-//
-// Example of B.1 : Rewriting nodes to Mkl nodes
-// ---------------------------------------------
-// Consider a Relu node. Current definition of Relu node looks like:
-//
-//           O = Relu(A)
-//
-// Relu has 1 input (A), and 1 output (O).
-//
-// This rewrite pass will generate a new graph node for Relu (new node is
-// called MklRelu) as:
-//
-//          O, O_m = MklRelu(A, A_m)
-//
-// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is
-// same as input A of Relu; output O is same as output O of Relu. O_m is the
-// additional output tensor that will be set by MklRelu, and it represents
-// Mkl tensor corresponding to O -- in other words, O_m is some kind of
-// metadata for O. A_m is additional input of Relu, and it represents metadata
-// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives
-// this metadata from previous node in the graph.
-//
-// When a previous node in the graph is an Mkl node, A_m will represent a valid
-// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent
-// a dummy Mkl tensor.
-//
-// Rewriting rules:
-//  - Selection of a node for rewriting happens by registering the op type of
-//    the node with the rewriting pass. If the op type is not registered, then
-//    all nodes of this op type will not be rewritten.
-//  - Number of inputs after rewriting:
-//      Since for every input Tensorflow tensor, the rewritten node gets Mkl
-//      tensor(s), rewritten node gets 2*N inputs, where N is the number of
-//      inputs for the original node.
-//  - Number of outputs after rewriting:
-//      Since for every output Tensorflow tensor, the rewritten node generates
-//      Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the
-//      number of outputs of the original node.
-//  - Ordering of Tensorflow tensors and Mkl tensors:
-//      Since every rewritten node generates twice the number of inputs and
-//      outputs, one could imagine various orderings among Tensorflow tensors
-//      and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as
-//      inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m
-//      in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m
-//      order. Among N inputs one can get N! permutations.
-//
-//      So the question is: which order do we follow? We support 2 types of
-//      orderings: (1) interleaved, and (2) contiguous. Interleaved ordering
-//      follows an intuitive order where an Mkl tensor follows the
-//      corresponding Tensorflow tensor immediately. In the context of the
-//      above example, it will be: A, A_m, B, B_m. Note that the ordering rule
-//      applies to both the inputs and outputs. Contiguous ordering means
-//      all the Tensorflow tensors are contiguous followed by all the Mkl
-//      tensors. We use contiguous ordering as default.
-//
-// Graph rewrite algorithm:
-//      Algorithm: Graph Rewrite
-//      Input: Graph G, Names of the nodes to rewrite and their new names
-//      Output: Modified Graph G' if the nodes are modified, G otherwise.
-//      Start:
-//        N = Topological_Sort(G) // N is a set of nodes in toposort order.
-//        foreach node n in N
-//        do
-//          if (Is_MKL_Op(n))  // Can this node accept an Mkl layout as input.
-//          then
-//            E = set of <incoming edge and its src_output slot> of n
-//            E' = {}   // a new set of edges for rewritten node
-//            foreach <e,s> in E
-//            do
-//              E' U {<e,s>}  // First copy edge which generates Tensorflow
-//                            // tensor as it is
-//              m = Source node of edge e
-//              if Is_Rewritten(m)  // Did we rewrite this node in this pass?
-//              then
-//                E' U {<m,s+1>}    // If yes, then m will generate an Mkl
-//                                  // tensor as an additional output.
-//              else
-//                d = Generate_Dummy_Mkl_Tensor()  // If not, generate a dummy
-//                                                 // Mkl tensor.
-//                E' U {<d,0>}  // The dummy Mkl tensor has only 1 output slot.
-//              fi
-//            done
-//            n' = Build_New_Node(G,new_name,E')
-//            Mark_Rewritten(n')  // Mark the new node as being rewritten.
-//          fi
-//        done
-//
-//      Explanation:
-//        For graph rewrite, we visit nodes of the input graph in the
-//        topological sort order. With this ordering, we visit nodes in the
-//        top-to-bottom fashion. We need this order because while visiting a
-//        node we want that all of its input nodes are visited and rewritten if
-//        applicable. This is because if we need to rewrite a given node
-//        then all of its input nodes need to be fixed (in other words they
-//        cannot be deleted later.)
-//
-//        While visiting a node, we first check if the op type of the node is
-//        an Mkl op. If it is, then we rewrite that node after constructing
-//        new inputs to the node. If the op type of the node is not Mkl op,
-//        then we do not rewrite that node.
-//
-// Handling workspace propagation for certain ops:
-//
-//        Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require
-//        passing of a workspace from their respective forward ops. Workspace
-//        tensors provide memory for storing results of intermediate operations
-//        which are helpful in backward propagation. TensorFlow does not have
-//        a notion of a workspace and as a result does not allow producing
-//        additional outputs from these forward ops. For these ops, we need
-//        to add 2 extra edges between forward ops and their corresponding
-//        backward ops - the first extra edge carries a workspace tensor and
-//        the second one carries an Mkl tensor for the workspace tensor.
-//
-//        Example:
-//
-//        Typical graph for MaxPool and its gradient looks like:
-//
-//        A = MaxPool(T)
-//        B = MaxPoolGrad(X, A, Y)
-//
-//        We will transform this graph to propagate the workspace as:
-//        (with the contiguous ordering)
-//
-//        A, W, A_m, W_m = MklMaxPool(T, T_m)
-//        B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m)
-//
-//        Here W is the workspace tensor. Transformed tensor names with the
-//        suffix _m are Mkl tensors, and this transformation has been done
-//        using the algorithm discussed earlier. The transformation for
-//        workspace propagation only adds extra outputs (W, W_m) for a forward
-//        op and connects them to the corresponding backward ops.
-//
-//        Terms:
-//
-//        Forward op name = name of the op in the forward pass
-//          where a workspace tensor originates (MaxPool in this example)
-//        Backward op name = name of the op in the backward pass that receives
-//          a workspace tensor from the forward op (MaxPoolGrad in the example)
-//        Slot = Position of the output or input slot that will be
-//               used by the workspace tensor (1 for MklMaxPool as W is the 2nd
-//               output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad)
-//
-//        Question:
-//
-//        How do we associate a backward op to a forward op? There can be more
-//        than one op with the exact same name.
-//
-//        In this example, we associate MaxPoolGrad with MaxPool. But there
-//        could be more than one MaxPool ops. To solve this problem, we look
-//        for _direct_ edge between a forward op and a backward op (tensor A is
-//        flowing along this edge in the example).
-//
-//        How do we transform forward and backward ops when there is no direct
-//        edge between them? In such a case, we generate dummy tensors for
-//        workspace tensors. For the example, transformation of MaxPool will
-//        be exactly same as it would be when there is a direct edge between
-//        the forward and the backward op --- it is just that MaxPool won't
-//        generate any workspace tensor. For MaxPoolGrad, the transformation
-//        will also be same, but instead of connecting W and W_m with the
-//        outputs of MaxPool, we will produce dummy tensors for them, and we
-//        will set workspace_enabled attribute to false.
-//
-// Example of B.2 : Context-based node rewrite
-// -------------------------------------------
-// Consider BiasAddGrad op as:
-//
-//           O = _MklConv2D(A, B, C, A_m, B_m, C_m)
-//           P = BiasAddGrad(O)
-//
-// Then we rewrite it as:
-//
-//           P = Conv2DWithBiasBackpropBias(O, O_m)
-//
-// Rewrite of BiasAddGrad into Conv2DWithBiasBackpropBias takes place depending
-// on the matching 'context'. The term context is loosely related to which
-// forward op is _associated_ to BiasAddGrad. If it is _MklConv2DWithBias then
-// we consider it Conv2D context; if it is MatMul, then it is MatMul context.
-
-class MklLayoutRewritePass : public GraphOptimizationPass {
- public:
-  MklLayoutRewritePass() {
-    // NOTE: names are alphabetically sorted.
-    csinfo_.addn = "AddN";
-    csinfo_.avg_pool = "AvgPool";
-    csinfo_.avg_pool_grad = "AvgPoolGrad";
-    csinfo_.bias_add = "BiasAdd";
-    csinfo_.bias_add_grad = "BiasAddGrad";
-    csinfo_.concat = "Concat";
-    csinfo_.concatv2 = "ConcatV2";
-    csinfo_.conv2d = "Conv2D";
-    csinfo_.conv2d_grad_input = "Conv2DBackpropInput";
-    csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter";
-    csinfo_.fused_batch_norm = "FusedBatchNorm";
-    csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
-    csinfo_.identity = "Identity";
-    csinfo_.lrn = "LRN";
-    csinfo_.lrn_grad = "LRNGrad";
-    csinfo_.matmul = "MatMul";
-    csinfo_.max_pool = "MaxPool";
-    csinfo_.max_pool_grad = "MaxPoolGrad";
-    csinfo_.mkl_conv2d = "_MklConv2D";
-    csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput";
-    csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter";
-    csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
-    csinfo_.mkl_conv2d_with_bias_backprop_bias =
-        "_MklConv2DWithBiasBackpropBias";
-    csinfo_.relu = "Relu";
-    csinfo_.relu_grad = "ReluGrad";
-    csinfo_.reshape = "Reshape";
-    csinfo_.split = "Split";
-    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
-    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
-    // MklInputConversion op is added before it.
-    csinfo_.add = "Add";
-    csinfo_.maximum = "Maximum";
-    csinfo_.mul = "Mul";
-    csinfo_.squared_difference = "SquaredDifference";
-    csinfo_.sub = "Sub";
-    // End - element-wise ops. See note above.
-
-    // NOTE: names are alphabetically sorted.
-    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
-                      CopyAttrsAddN, AddNRewrite, nullptr});
-    rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.avg_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.avg_pool_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
-    // BiasAddGrad gets written into Conv2DWithBiasBackpropBias depending
-    // on if context contains Conv2D.
-    rinfo_.push_back({csinfo_.bias_add_grad,
-                      csinfo_.mkl_conv2d_with_bias_backprop_bias,
-                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
-                      &biasaddgrad_conv2dwithbias_context_});
-    // BiasAddGrad gets written into BiasAddGrad depending on if context
-    // contains MatMul.
-    rinfo_.push_back({csinfo_.bias_add_grad, csinfo_.matmul,
-                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
-                      &biasaddgrad_matmul_context_});
-    rinfo_.push_back({csinfo_.concat,
-                      mkl_op_registry::GetMklOpName(csinfo_.concat),
-                      CopyAttrsConcat, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.concatv2,
-                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
-                      CopyAttrsConcatV2, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.conv2d,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.conv2d_grad_filter,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.conv2d_grad_input,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
-
-    rinfo_.push_back({csinfo_.fused_batch_norm,
-                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
-                      CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
-    rinfo_.push_back(
-        {csinfo_.fused_batch_norm_grad,
-         mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
-         CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.identity,
-                      mkl_op_registry::GetMklOpName(csinfo_.identity),
-                      CopyAttrsIdentity, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn),
-                      CopyAttrsLRN, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.lrn_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
-                      CopyAttrsLRN, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.max_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
-                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite, nullptr});
-    rinfo_.push_back({csinfo_.max_pool_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.maximum,
-                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.mul, mkl_op_registry::GetMklOpName(csinfo_.mul),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.relu_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.reshape,
-                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
-                      CopyAttrsReshape, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.squared_difference,
-                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.sub, mkl_op_registry::GetMklOpName(csinfo_.sub),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-
-    // Add info about which ops to add workspace edge to and the slots.
-    wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
-    wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
-
-    // Add a rule for merging nodes
-    minfo_.push_back({csinfo_.mkl_conv2d, csinfo_.bias_add, 0,
-                      csinfo_.mkl_conv2d_with_bias});
-
-    biasaddgrad_matmul_context_ = {csinfo_.bias_add_grad, csinfo_.matmul,
-                                   IsBiasAddGradInMatMulContext};
-
-    biasaddgrad_conv2dwithbias_context_ = {
-        csinfo_.bias_add_grad, csinfo_.mkl_conv2d_with_bias,
-        IsBiasAddGradInConv2DWithBiasContext};
-
-    cinfo_.push_back(&biasaddgrad_matmul_context_);
-    cinfo_.push_back(&biasaddgrad_conv2dwithbias_context_);
-  }
-
-  // Standard interface to run pass
-  Status Run(const GraphOptimizationPassOptions& options);
-
-  // Helper function which does most of heavy lifting for rewriting
-  // Mkl nodes to propagate Mkl tensor as additional output
-  //
-  // Extracts common functionality between Run public interface and
-  // test interface.
-  //
-  // @return true, if and only if graph is mutated; false otherwise.
-  bool RunPass(std::unique_ptr<Graph>* g);
-
-  /// Structure to specify the context information used in a node rewrite rule
-  typedef struct {
-    string node;  // Name of the node to be rewritten
-    string fwd;   // Name of the node in the forward pass that this node
-                  // corresponds to
-    std::function<bool(const Node*, const Node**, void* c)> context_match_fn;
-  } ContextInfo;
-
-  /// Structure to specify the name of an original node, its new name after
-  /// rewrite, the number of inputs to the original node, the function to
-  /// be used to copy attributes for the op, and the rule (if any) which
-  /// must hold for rewriting the node
-  typedef struct {
-    string name;      // Original name of op of the node in the graph
-    string new_name;  // New name of the op of the node in the graph
-    // A function handler to copy attributes from an old node to a new node.
-    std::function<void(const Node*, NodeBuilder*)> copy_attrs;
-    // A rule under which to rewrite this node
-    std::function<bool(const Node*, const ContextInfo* c)> rewrite_rule;
-    // ContextInfo, if any, to be used for rewrite
-    ContextInfo* context;
-  } RewriteInfo;
-
-  /// Structure to specify a forward op, a backward op, and the slot numbers
-  /// in the forward and backward ops where we will add a workspace edge.
-  typedef struct {
-    string fwd_op;    // Name of a forward op in the graph
-    string bwd_op;    // Name of a backward op in the graph
-    int fwd_slot;     // Output slot in the forward op node where actual
-                      // output tensor resides
-    int bwd_slot;     // Input slot in the backward op node where actual
-                      // input tensor resides
-    int ws_fwd_slot;  // Output slot in the forward op node where workspace
-                      // edge is added
-    int ws_bwd_slot;  // Input slot in the backward op node where workspace
-                      // edge is added
-  } WorkSpaceInfo;
-
-  /// Structure to specify information used in node merge
-  typedef struct {
-    string pred;      // Predecessor node string
-    string succ;      // Successor node string
-    int op;           // The operand no the predecessor node corresponds
-                      // to the successor node
-    string new_node;  // Name of the node after merge
-  } MergeInfo;
-
-  /// Structure to store all constant strings
-  /// NOTE: names are alphabetically sorted.
-  typedef struct {
-    string addn;
-    string add;
-    string avg_pool;
-    string avg_pool_grad;
-    string bias_add;
-    string bias_add_grad;
-    string concat;
-    string concatv2;
-    string conv2d;
-    string conv2d_grad_input;
-    string conv2d_grad_filter;
-    string fused_batch_norm;
-    string fused_batch_norm_grad;
-    string identity;
-    string lrn;
-    string lrn_grad;
-    string matmul;
-    string max_pool;
-    string max_pool_grad;
-    string maximum;
-    string mkl_conv2d;
-    string mkl_conv2d_grad_input;
-    string mkl_conv2d_grad_filter;
-    string mkl_conv2d_with_bias;
-    string mkl_conv2d_with_bias_backprop_bias;
-    string mul;
-    string relu;
-    string relu_grad;
-    string reshape;
-    string split;
-    string squared_difference;
-    string sub;
-  } ConstStringsInfo;
-
- private:
-  /// Maintain info about nodes to rewrite
-  std::vector<RewriteInfo> rinfo_;
-
-  /// Maintain info about nodes to add workspace edge
-  std::vector<WorkSpaceInfo> wsinfo_;
-
-  /// Maintain info about nodes to be merged
-  std::vector<MergeInfo> minfo_;
-
-  /// Maintain info about nodes to rewrite
-  static std::vector<ContextInfo*> cinfo_;
-
-  /// Maintain structure of constant strings
-  static ConstStringsInfo csinfo_;
-
-  /// Context variables used in referencing rules
-  static ContextInfo biasaddgrad_matmul_context_;
-  static ContextInfo biasaddgrad_conv2dwithbias_context_;
-
- private:
-  // Is OpDef::ArgDef a list type? It could be N * T or list(type).
-  // Refer to opdef.proto for details of list type.
-  inline bool ArgIsList(const OpDef::ArgDef& arg) const {
-    return !arg.type_list_attr().empty() || !arg.number_attr().empty();
-  }
-
-  // Get length of a list in 'n' if 'arg' is of list type. Refer to
-  // description of ArgIsList for definition of list type.
-  inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) {
-    CHECK_EQ(ArgIsList(arg), true);
-    int N = 0;
-    const string attr_name = !arg.type_list_attr().empty()
-                                 ? arg.type_list_attr()
-                                 : arg.number_attr();
-    if (!arg.type_list_attr().empty()) {
-      std::vector<DataType> value;
-      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value));
-      N = value.size();
-    } else {
-      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N));
-    }
-    return N;
-  }
-
-  // Can op represented by node 'n' run on DEVICE_CPU?
-  // Op can run on CPU with MKL if the runtime assigned device or the
-  // user requested device contains device CPU, or both are empty.
-  bool CanOpRunOnCPUDevice(const Node* n) {
-    bool result = true;
-    string reason;
-
-    // Substring that should be checked for in device name for CPU device.
-    const char* const kCPUDeviceSubStr = "CPU";
-
-    // If Op has been specifically assigned to a non-CPU device, then No.
-    if (!n->assigned_device_name().empty() &&
-       !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) {
-      result = false;
-      reason = "Op has been assigned a runtime device that is not CPU.";
-    }
-
-    // If user has specifically assigned this op to a non-CPU device, then No.
-    if (!n->def().device().empty() &&
-       !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) {
-      result = false;
-      reason = "User has assigned a device that is not CPU.";
-    }
-
-    if (result == false) {
-      VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node "
-              << n->type_string() << ", reason: " << reason;
-    }
-
-    // Otherwise Yes.
-    return result;
-  }
-
-  // Return a node that can be merged with input node 'n'
-  //
-  // @return pointer to the node if we can find such a
-  // node. Otherwise, it returns nullptr.
-  Node* CheckForNodeMerge(const Node* n) const;
-
-  // Merge predecessor node with its successor.
-  // Currently, we merge Conv2D with BiasAdd only.
-  //
-  // Input nodes succ and pred may be deleted if the call to
-  // this function is successful. Attempt to use the pointers
-  // after the call to function may result in undefined behaviors.
-  //
-  // @input g - input graph, succ - successor node, pred - predecessor node
-  // @return Status::OK(), if merging is successful and supported.
-  //         Returns appropriate Status error code otherwise.
-  //         Graph is updated in case nodes are merged. Otherwise, it is
-  //         not updated.
-  Status MergeNode(std::unique_ptr<Graph>* g, Node* succ, Node* pred);
-
-  // Check if the node 'n' has any applicable rewrite rule
-  // We check for 2 scenarios for rewrite.
-  //
-  // @return RewriteInfo* for the applicable rewrite rule
-  const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
-
-  // Default rewrite rule to be used in scenario 1 for rewrite.
-  // @return - true (since we want to always rewrite)
-  static bool AlwaysRewrite(const Node* n, const ContextInfo* c = nullptr) {
-    return true;
-  }
-
-  // Check if we are performing pooling on depth or batch. If it is, then we
-  // do not rewrite MaxPool node to Mkl version.
-  // @return - true (if it is not a depth/batch wise pooling case);
-  //           false otherwise.
-  static bool NonDepthBatchWisePoolRewrite(const Node* n,
-                                           const ContextInfo* c) {
-    CHECK_NOTNULL(n);
-
-    string data_format_str;
-    TensorFormat data_format;
-    std::vector<int32> ksize, strides;
-    CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true);
-    CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true);
-    CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(), true);
-    CHECK_EQ(FormatFromString(data_format_str, &data_format), true);
-
-    // Condition that specifies non-batch-wise and non-depth-wise pooling.
-    if (GetTensorDim(ksize, data_format, 'N') == 1 &&
-        GetTensorDim(strides, data_format, 'N') == 1 &&
-        GetTensorDim(ksize, data_format, 'C') == 1 &&
-        GetTensorDim(strides, data_format, 'C') == 1) {
-      return true;
-    }
-
-    return false;
-  }
-
-  static bool AddNRewrite(const Node* n, const ContextInfo* c) {
-    CHECK_NOTNULL(n);
-
-    int num;
-    CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true);
-
-    // Condition that specifies non-batch-wise and non-depth-wise pooling.
-    if (num == 2) {
-      return true;
-    }
-
-    return false;
-  }
-  // Is BiasAddGrad node in 'n' is associated with Conv2DWithBias node
-  // specified in contextinfo 'ci'. Function updates fwd_node to point
-  // to Conv2DWithBias node if 'n' is associated with Conv2DWithBias.
-  //
-  // Association checks for one of the following graphs:
-  //
-  // Graph A:
-  //
-  // _ = Conv2DWithBias(F, I, _)
-  // ..
-  // _ = Conv2DBackpropFilter(F, _, G)
-  // _ = Conv2DBackpropInput(_, I, G)
-  // _ = BiasAddGrad(G)
-  //
-  // OR
-  //
-  // Graph B:
-  //
-  // _ = Conv2DWithBias(F, _, _)
-  // ..
-  // _ = Conv2DBackpropFilter(F, _, G)
-  // _ = BiasAddGrad(G)
-  //
-  // Here F, G, and I are graph nodes; _ represents graph nodes that we
-  // don't care here.
-  //
-  // @return - true (if BiasAddGrad is associated with Conv2DWithBias);
-  //           false otherwise.
-  static bool IsBiasAddGradInConv2DWithBiasContext(const Node* n,
-                                                   const Node** fwd_node,
-                                                   void* ci) {
-    CHECK_NOTNULL(n);
-    CHECK_NOTNULL(fwd_node);
-    CHECK_NOTNULL(ci);
-    *fwd_node = nullptr;
-
-    CHECK_EQ(n->type_string(), csinfo_.bias_add_grad);
-
-    // Get the only 1 input of BiasAddGrad.
-    CHECK_EQ(n->num_inputs(), 1);
-    const Node* bias_add_grad_inp = nullptr;
-    TF_CHECK_OK(n->input_node(0, &bias_add_grad_inp));
-    CHECK_NOTNULL(bias_add_grad_inp);
-
-    // Check if this input also goes to BackpropFilter and BackpropInput
-    // as 3rd input.
-    bool found_backprop_input = false;
-    bool found_backprop_filter = false;
-    Node* backprop_filter_node = nullptr;
-    Node* backprop_input_node = nullptr;
-
-    for (const Edge* e : bias_add_grad_inp->out_edges()) {
-      Node* third_input = nullptr;
-      if (e->dst()->type_string() == csinfo_.conv2d_grad_input ||
-          e->dst()->type_string() == csinfo_.mkl_conv2d_grad_input) {
-        // Third input (index 2) of BackpropInput
-        TF_CHECK_OK(e->dst()->input_node(2, &third_input));
-        // Third input (index 2) of BackpropInput must be same as the input
-        // of BiasAddGrad.
-        if (third_input == bias_add_grad_inp) {
-          found_backprop_input = true;
-          backprop_input_node = e->dst();
-        }
-      }
-
-      if (e->dst()->type_string() == csinfo_.conv2d_grad_filter ||
-          e->dst()->type_string() == csinfo_.mkl_conv2d_grad_filter) {
-        // Third input (index 2) of BackpropFilter
-        TF_CHECK_OK(e->dst()->input_node(2, &third_input));
-        // Third input (index 2) of BackpropFilter must be same as the input
-        // of BiasAddGrad.
-        if (third_input == bias_add_grad_inp) {
-          found_backprop_filter = true;
-          backprop_filter_node = e->dst();
-        }
-      }
-
-      // If we found both the nodes, then we can stop the search.
-      if (found_backprop_input && found_backprop_filter) {
-        break;
-      }
-    }
-
-    // If BackpropFilter node is not found, then this is not
-    // Conv2DWithBias context. For 2nd graph in the example above, only
-    // BackpropFilter would be present.
-    if (!found_backprop_filter) {
-      return false;
-    }
-
-    // Otherwise, we found the nodes.
-    CHECK_NOTNULL(backprop_filter_node);
-    if (found_backprop_input) {
-      CHECK_NOTNULL(backprop_input_node);
-    }
-
-    // Now that we confirmed that this is Conv2DWithBias context, we need to
-    // get access to the forward node (Conv2DWithBias). 2nd input of
-    // Conv2DWithBias is same as the 2nd input of Conv2DBackpropInput; 1st
-    // input of Conv2DWithBias is same as the 1st input of Conv2DBackpropFilter
-    // (This comes from definition of gradient computation for Conv2D).
-    if (found_backprop_input) {
-      // Graph A in the example.
-      Node* second_inp_of_input = nullptr;
-      Node* first_inp_of_filter = nullptr;
-      TF_CHECK_OK(backprop_input_node->input_node(1, &second_inp_of_input));
-      TF_CHECK_OK(backprop_filter_node->input_node(0, &first_inp_of_filter));
-      CHECK_NOTNULL(second_inp_of_input);
-      CHECK_NOTNULL(first_inp_of_filter);
-
-      // Now we need to find out Conv2DWithBias node from these input nodes.
-      // Conv2DWithBias node is the node that accepts both the nodes
-      // second_inp_of_input and first_inp_of_filter in 2nd and 1st input slots.
-      for (const Edge* fe : first_inp_of_filter->out_edges()) {
-        if (fe->dst()->type_string() == csinfo_.mkl_conv2d_with_bias &&
-            fe->dst_input() == 0) {
-          for (const Edge* ie : second_inp_of_input->out_edges()) {
-            if (ie->dst()->type_string() == csinfo_.mkl_conv2d_with_bias &&
-                ie->dst_input() == 1 && fe->dst() == ie->dst()) {
-              VLOG(1) << "MklLayoutRewritePass: found "
-                      << fe->dst()->DebugString()
-                      << " as the forward node for matching context, backward"
-                      << " node is: " << n->DebugString();
-              *fwd_node = fe->dst();
-              return true;
-            }
-          }
-        }
-      }
-    } else {
-      // We did not find BackpropInput, so we work with BackpropFilter only.
-      // Graph B in the example.
-      Node* first_inp_of_filter = nullptr;
-      TF_CHECK_OK(backprop_filter_node->input_node(0, &first_inp_of_filter));
-      CHECK_NOTNULL(first_inp_of_filter);
-
-      // Now we need to find out Conv2DWithBias node from first input of
-      // BackpropFIlter. Conv2DWithBias node is the node that accepts
-      // first_inp_of_filter in 1st input slot.
-      for (const Edge* fe : first_inp_of_filter->out_edges()) {
-        if (fe->dst()->type_string() == csinfo_.mkl_conv2d_with_bias &&
-            fe->dst_input() == 0) {
-          VLOG(1) << "MklLayoutRewritePass: found " << fe->dst()->DebugString()
-                  << " as the forward node for matching context, backward"
-                  << " node is: " << n->DebugString();
-          *fwd_node = fe->dst();
-          return true;
-        }
-      }
-    }
-
-    return false;
-  }
-
-  // Is BiasAddGrad node in 'n' is associated with MatMul node
-  // specified in contextinfo 'ci'. Function does not update fwd_node.
-  //
-  // @return - true (if BiasAddGrad is associated with MatMul);
-  //           false otherwise.
-  static bool IsBiasAddGradInMatMulContext(const Node* n, const Node** fwd_node,
-                                           void* ci) {
-    return (!IsBiasAddGradInConv2DWithBiasContext(n, fwd_node, ci));
-  }
-
-  // Rewrite rule that uses context-information for matching,
-  // used in scenario 2.
-  //
-  // @input - Node 'n' for which to search for matching context
-  // @input - The context 'c' under which to rewrite
-  // @return - true if we can rewrite node under context 'c';
-  //           false otherwise.
-  static bool ContextMatchRewrite(const Node* n, const ContextInfo* c);
-
-  // Helper function that searches the matching contextinfo for the node.
-  //
-  // @input n - Node (gradient op) whose contextinfo is to be searched,
-  //        fwd_node - pointer to node from the forward pass that this node
-  //        belongs to. fwd_node cannot be NULL.
-  // @return Matching contextinfo in case a match is found; null otherwise.
-  //         Also updates *fwd_node with pointer to forward node that this
-  //         context matches.
-  static const ContextInfo* SearchMatchingContext(const Node* n,
-                                                  const Node** fwd_node);
-
-  // Rewrites input node to a new node specified by its matching rewrite info.
-  //
-  // Method first searches matching rewrite info for input node and then
-  // uses that info to rewrite.
-  //
-  // Input node may be deleted in case of rewrite. Attempt to use the node
-  // after the call can result in undefined behaviors.
-  //
-  // @input  g - input graph, n - Node to be rewritten,
-  //         ri - matching rewriteinfo
-  // @return Status::OK(), if the input node is rewritten;
-  //         Returns appropriate Status error code otherwise.
-  //         Graph is updated in case the input node is rewritten.
-  //         Otherwise, it is not updated.
-  Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const RewriteInfo* ri);
-
-  // Get nodes that will feed a list of TF tensors to the new
-  // node that we are constructing.
-  //
-  // @input g - input graph,
-  // @input inputs - inputs to old node that we are using for constructing
-  //                 new inputs,
-  // @input input_idx - the index in the 'inputs' vector pointing to the
-  //                    current input that we have processed so far
-  // @output input_idx - index will be incremented by the number of nodes
-  //                     from 'inputs' that are processed
-  // @input list_length - The expected length of list of TF tensors
-  // @output output_nodes - the list of new nodes creating TF tensors
-  //
-  // @return None
-  void GetNodesProducingTFTensorList(
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-      int* input_idx, int list_length,
-      std::vector<NodeBuilder::NodeOut>* output_nodes);
-
-  // Get nodes that will feed a list of Mkl tensors to the new
-  // node that we are constructing.
-  //
-  // @input g - input graph,
-  // @input orig_node - Original node that we are rewriting
-  // @input inputs - inputs to old node that we are using for constructing
-  //                 new inputs,
-  // @input input_idx - the index in the 'inputs' vector pointing to the
-  //                    current input that we have processed so far
-  // @output input_idx - index will be incremented by the number of nodes
-  //                     from 'inputs' that are processed
-  // @input list_length - The expected length of list of Mkl tensors
-  // @output output_nodes - the list of new nodes creating Mkl tensors
-  //
-  // @return None
-  void GetNodesProducingMklTensorList(
-      std::unique_ptr<Graph>* g, Node* orig_node,
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-      int* input_idx, int list_length,
-      std::vector<NodeBuilder::NodeOut>* output_nodes);
-
-  // Get a node that will feed an Mkl tensor to the new
-  // node that we are constructing. The output node could be (1) 'n'
-  // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
-  // if 'n' is not an Mkl layer.
-  //
-  // @input g - input graph,
-  // @input orig_node - Original node that we are rewriting,
-  // @input n - Node based on which we are creating Mkl node,
-  // @input n_output_slot - the output slot of node 'n'
-  //            which is feeding to the node that we are constructing
-  // @output mkl_node - the new node that will feed Mkl tensor
-  // @output mkl_node_output_slot - the slot number of mkl_node that
-  //                                will feed the tensor
-  // @return None
-  void GetNodeProducingMklTensor(std::unique_ptr<Graph>* g, Node* orig_node,
-                                 Node* n, int n_output_slot, Node** mkl_node,
-                                 int* mkl_node_output_slot);
-
-  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
-  // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are
-  // set up in contiguous fashion. 'workspace_tensors' carry graph nodes
-  // producing workspace edges if 'are_workspace_tensors_available' is true.
-  // Otherwise, 'workspace_tensors' is empty vector.
-  //
-  // For details, refer to 'Ordering of inputs after rewriting' section in the
-  // documentation above.
-  //
-  // Returns Status::OK() if setting up inputs is successful, otherwise
-  // returns appropriate status code.
-  int SetUpContiguousInputs(
-      std::unique_ptr<Graph>* g,
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-      NodeBuilder* nb, Node* old_node,
-      std::vector<NodeBuilder::NodeOut>* workspace_tensors,
-      bool are_workspace_tensors_available);
-
-  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
-  // in graph 'g'. Original node is input in 'orig_node'.
-  //
-  // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors'
-  // section in the documentation above.
-  //
-  // Returns Status::OK() if setting up inputs is successful, otherwise
-  // returns appropriate status code.
-  Status SetUpInputs(std::unique_ptr<Graph>* g,
-                     const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-                     NodeBuilder* nb, Node* orig_node);
-
-  // Add workspace edge on the input or output side of Node 'orig_node' by using
-  // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate
-  // adding workspace edge then do not add it. Workspace Tensorflow and Mkl
-  // tensors, if they need to be added, will be set into these tensors.
-  // If we set workspace tensors, then are_ws_tensors_added should be true.
-  void AddWorkSpaceEdgeIfNeeded(std::unique_ptr<Graph>* g, Node* orig_node,
-                                NodeBuilder* nb,
-                                std::vector<NodeBuilder::NodeOut>* ws_tensors,
-                                bool* are_ws_tensors_added);
-
-  // Functions specific to operators to copy attributes
-  // We need operator-specific function to copy attributes because the framework
-  // does not provide any generic function for it.
-  // NOTE: names are alphabetically sorted.
-  static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsIdentity(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
-
-  // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
-  // using node for original node 'orig_node' and return it in '*out'.
-  // TODO(nhasabni) We should move this to mkl_util.h
-  void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
-                             Node* orig_node);
-  void GetDummyWorkspaceTensorNode(std::unique_ptr<Graph>* g, Node** out,
-                                   Node* orig_node);
-};
-
-MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
-MklLayoutRewritePass::ContextInfo
-    MklLayoutRewritePass::biasaddgrad_conv2dwithbias_context_;
-MklLayoutRewritePass::ContextInfo
-    MklLayoutRewritePass::biasaddgrad_matmul_context_;
-std::vector<MklLayoutRewritePass::ContextInfo*> MklLayoutRewritePass::cinfo_;
-
-// We register Mkl rewrite pass for phase 1 in post partitioning group.
-// We register it here so that we get a complete picture of all users of Mkl
-// nodes. Do not change the ordering of the Mkl passes.
-const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
-    OptimizationPassRegistry::POST_PARTITIONING;
-#ifdef ENABLE_MKL
-REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
-#endif  // ENABLE_MKL
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions for creating new node
-//////////////////////////////////////////////////////////////////////////
-
-static void FillInputs(const Node* n,
-                       gtl::InlinedVector<Node*, 4>* control_edges,
-                       gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
-  control_edges->clear();
-  for (const Edge* e : n->in_edges()) {
-    if (e->IsControlEdge()) {
-      control_edges->push_back(e->src());
-    } else {
-      (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
-    }
-  }
-  std::sort(control_edges->begin(), control_edges->end());
-  if (n->op_def().is_commutative()) {
-    // For commutative inputs, we sort the input by the input Node*
-    // to get a canonical ordering (so that add(a,b) and add(b, a) will
-    // hash to the same value if is_commutative is true for 'add').
-    std::sort(in->begin(), in->end());
-  }
-}
-
-void MklLayoutRewritePass::GetNodesProducingTFTensorList(
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
-    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
-  CHECK_LT(*input_idx, inputs.size());
-  CHECK_GT(list_length, 0);
-  CHECK_NOTNULL(output_nodes);
-  output_nodes->reserve(list_length);
-
-  while (list_length != 0) {
-    CHECK_GT(list_length, 0);
-    CHECK_LT(*input_idx, inputs.size());
-    Node* n = inputs[*input_idx].first;
-    int slot = inputs[*input_idx].second;
-    // If input node 'n' is just producing a single tensor at
-    // output slot 'slot' then we just add that single node.
-    output_nodes->push_back(NodeBuilder::NodeOut(n, slot));
-    (*input_idx)++;
-    list_length--;
-  }
-}
-
-// TODO(nhasabni) We should move this to mkl_util.h.
-void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
-                                                 Node** out, Node* orig_node) {
-  // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
-  // dummy Mkl tensor. 8 = 2*size_t.
-  const DataType dt = DataTypeToEnum<uint8>::v();
-  TensorProto proto;
-  proto.set_dtype(dt);
-  uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
-  proto.set_tensor_content(string(reinterpret_cast<const char*>(zero), 8));
-  TensorShape dummy_shape({8});
-  dummy_shape.AsProto(proto.mutable_tensor_shape());
-  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
-                  .Attr("value", proto)
-                  .Attr("dtype", dt)
-                  .Device(orig_node->def().device())  // We place this node on
-                                                      // the same device as the
-                                                      // device of the original
-                                                      // node.
-                  .Finalize(&**g, out));
-  CHECK_NOTNULL(*out); // Make sure we got a valid object before using it
-
-  // If number of inputs to the original node is > 0, then we add
-  // control dependency between 1st input (index 0) of the original node and
-  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
-  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
-  // rewritten node. Adding control edge between 1st input of the original node
-  // and the dummy Mkl node ensures that the dummy node is in the same frame
-  // as the original node. Choosing 1st input is not necessary - any input of
-  // the original node is fine because all the inputs of a node are always in
-  // the same frame.
-  if (orig_node->num_inputs() > 0) {
-    Node* orig_input0 = nullptr;
-    TF_CHECK_OK(
-        orig_node->input_node(0, const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
-  }
-
-  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
-}
-
-void MklLayoutRewritePass::GetNodesProducingMklTensorList(
-    std::unique_ptr<Graph>* g, Node* orig_node,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
-    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
-  CHECK_LT(*input_idx, inputs.size());
-  CHECK_GT(list_length, 0);
-  CHECK_NOTNULL(output_nodes);
-  output_nodes->reserve(list_length);
-
-  while (list_length != 0) {
-    CHECK_GT(list_length, 0);
-    CHECK_LT(*input_idx, inputs.size());
-    Node* n = inputs[*input_idx].first;
-    int slot = inputs[*input_idx].second;
-    // If 'n' is producing a single tensor, then create a single Mkl tensor
-    // node.
-    Node* mkl_node = nullptr;
-    int mkl_node_output_slot = 0;
-    GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node,
-                              &mkl_node_output_slot);
-    output_nodes->push_back(
-        NodeBuilder::NodeOut(mkl_node, mkl_node_output_slot));
-    (*input_idx)++;
-    list_length--;
-  }
-}
-
-// Get an input node that will feed Mkl tensor to the new
-// node that we are constructing. An input node could be (1) 'n'
-// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
-// if 'n' is not an Mkl layer.
-void MklLayoutRewritePass::GetNodeProducingMklTensor(
-    std::unique_ptr<Graph>* g, Node* orig_node, Node* n, int n_output_slot,
-    Node** mkl_node, int* mkl_node_output_slot) {
-  CHECK_NOTNULL(n);
-  CHECK_NOTNULL(mkl_node);
-  CHECK_NOTNULL(mkl_node_output_slot);
-
-  // If this is an MKL op, then it will create extra output for MKL layout.
-  DataType T;
-  if (GetNodeAttr(n->def(), "T", &T).ok() &&
-      mkl_op_registry::IsMklOp(n->type_string(), T)) {
-    // If this is an MKL op, then it will generate an edge that will receive
-    // Mkl tensor from a node.
-    // output slot number for Mkl tensor would be N+slot number of TensorFlow
-    // tensor, where N is total number of TensorFlow tensors.
-    *mkl_node = n;
-    *mkl_node_output_slot =
-        GetTensorMetaDataIndex(n_output_slot, n->num_outputs());
-  } else {
-    // If we have not visited the node and rewritten it, then we need
-    // to create a dummy node that will feed a dummy Mkl tensor to this node.
-    // DummyMklTensor node has no input and generates only 1 output
-    // (dummy Mkl tensor) as output slot number 0.
-    GetDummyMklTensorNode(g, mkl_node, orig_node);
-    CHECK_NOTNULL(*mkl_node);
-    *mkl_node_output_slot = 0;
-  }
-}
-
-int MklLayoutRewritePass::SetUpContiguousInputs(
-    std::unique_ptr<Graph>* g,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-    NodeBuilder* nb, Node* old_node,
-    std::vector<NodeBuilder::NodeOut>* workspace_tensors,
-    bool are_workspace_tensors_available) {
-  CHECK_NOTNULL(workspace_tensors);
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-
-  // TODO(nhasabni): Temporary solution to connect filter input of
-  // BackpropInput with the converted filter from Conv2D.
-  bool do_connect_conv2d_backprop_input_filter = false;
-  Node* conv2d_node = nullptr;
-  // Filter node is 2nd input (slot index 1) of Conv2D.
-  int kConv2DFilterInputSlotIdx = 1;
-  int kConv2DBackpropInputFilterInputSlotIdx = 1;
-  int kConv2DFilterOutputSlotIdx = 1;
-  if (old_node->type_string() == csinfo_.conv2d_grad_input) {
-    // We need to find Conv2D node from Conv2DBackpropInput.
-    // For that let's first find filter node that is 2nd input (slot 1)
-    // of BackpropInput.
-    Node* filter_node = nullptr;
-    TF_CHECK_OK(old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx,
-                                     &filter_node));
-    CHECK_NOTNULL(filter_node);
-
-    // Now check which nodes receive from filter_node. Filter feeds as
-    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
-    for (const Edge* e : filter_node->out_edges()) {
-      if (e->dst()->type_string() == csinfo_.mkl_conv2d &&
-          e->dst_input() == kConv2DFilterInputSlotIdx
-          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
-        if (conv2d_node != nullptr) {
-          VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
-                  << " feeding multiple Conv2D nodes: "
-                  << filter_node->DebugString();
-          // We will not connect filter input of Conv2DBackpropInput
-          // to be safe here.
-          do_connect_conv2d_backprop_input_filter = false;
-          break;
-        } else {
-          conv2d_node = e->dst();
-          do_connect_conv2d_backprop_input_filter = true;
-        }
-      }
-    }
-  }
-
-  // Number of input slots to original op
-  // Input slots are represented by .Input() calls in REGISTER_OP.
-  int old_node_input_slots = old_node->op_def().input_arg_size();
-  // Actual number of inputs can be greater than or equal to number
-  // of Input slots because inputs of type list could be unfolded.
-  CHECK_GE(old_node_inputs.size(), old_node_input_slots);
-  int nn_slot_idx = 0;  // slot index for inputs of new node
-
-  // Let's copy all inputs (TF tensors) of original node to new node.
-  int iidx = 0;
-  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
-    // An input slot could be a single tensor or a list. We need
-    // to handle this case accordingly.
-    CHECK_LT(iidx, old_node_inputs.size());
-    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
-    if (ArgIsList(arg)) {
-      std::vector<NodeBuilder::NodeOut> new_node_inputs;
-      int N = GetTensorListLength(arg, old_node);
-      GetNodesProducingTFTensorList(old_node_inputs, &iidx, N,
-                                    &new_node_inputs);
-      nb->Input(new_node_inputs);
-      nn_slot_idx++;
-    } else {
-      // Special case for connecting filter input of Conv2DBackpropInput
-      if (do_connect_conv2d_backprop_input_filter &&
-          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
-        nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx);
-      } else {
-        nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second);
-      }
-      iidx++;
-      nn_slot_idx++;
-    }
-  }
-
-  // If workspace tensors are available for this op and we are using
-  // contiguous ordering then we need to add Tensorflow tensor for
-  // workspace here because Tensorflow tensor for workspace is the
-  // last tensor in the list of Tensorflow tensors.
-  if (are_workspace_tensors_available) {
-    CHECK_EQ(workspace_tensors->size(), 2);
-    // Tensorflow tensor
-    nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index);
-    nn_slot_idx++;
-  }
-
-  // Let's now setup all Mkl inputs to new node.
-  // Number of Mkl inputs must be same as number of TF inputs.
-  iidx = 0;
-  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
-    // An input slot could be a single tensor or a list. We need
-    // to handle this case accordingly.
-    CHECK_LT(iidx, old_node_inputs.size());
-    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
-    if (ArgIsList(arg)) {
-      std::vector<NodeBuilder::NodeOut> new_node_inputs;
-      int N = GetTensorListLength(arg, old_node);
-      GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx, N,
-                                     &new_node_inputs);
-      nb->Input(new_node_inputs);
-      nn_slot_idx++;
-    } else {
-      Node* mkl_node = nullptr;
-      int mkl_node_output_slot = 0;
-      // Special case for connecting filter input of Conv2DBackpropInput
-      if (do_connect_conv2d_backprop_input_filter &&
-          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
-        GetNodeProducingMklTensor(g, old_node, conv2d_node,
-                                  kConv2DFilterOutputSlotIdx, &mkl_node,
-                                  &mkl_node_output_slot);
-      } else {
-        GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
-                                  old_node_inputs[iidx].second, &mkl_node,
-                                  &mkl_node_output_slot);
-      }
-      nb->Input(mkl_node, mkl_node_output_slot);
-      iidx++;
-      nn_slot_idx++;
-    }
-  }
-
-  // If workspace tensors are available for this op and we are using
-  // contiguous ordering then we need to add Mkl tensor for
-  // workspace here because Mkl tensor for workspace is the
-  // last tensor in the list of Mkl tensors.
-  if (are_workspace_tensors_available) {
-    CHECK_EQ(workspace_tensors->size(), 2);
-    // Mkl tensor
-    nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index);
-    nn_slot_idx++;
-  }
-
-  return nn_slot_idx;
-}
-
-Status MklLayoutRewritePass::SetUpInputs(
-    std::unique_ptr<Graph>* g,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-    NodeBuilder* nb, Node* old_node) {
-  // Let's check if we need to add workspace tensors for this node.
-  // We add workspace edge only for MaxPool, LRN and BatchNorm.
-  std::vector<NodeBuilder::NodeOut> workspace_tensors;
-  bool are_workspace_tensors_available = false;
-  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
-                           &are_workspace_tensors_available);
-
-  int new_node_input_slots = 0;
-  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-    // TODO(nhasabni): implement this function just for same of completion.
-    // We do not use interleaved ordering right now.
-    return Status(
-        error::Code::UNIMPLEMENTED,
-        "Interleaved ordering of tensors is currently not supported.");
-  } else {
-    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-    new_node_input_slots = SetUpContiguousInputs(
-        g, old_node_inputs, nb, old_node, &workspace_tensors,
-        are_workspace_tensors_available);
-  }
-
-  // Sanity check
-  int old_node_input_slots = old_node->op_def().input_arg_size();
-  if (!are_workspace_tensors_available) {
-    // If we are not adding workspace tensors for this op, then the total
-    // number of input slots to the new node _must_ be 2 times the number
-    // of input slots to the original node: N original Tensorflow tensors and
-    // N for Mkl tensors corresponding to each Tensorflow tensors.
-    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2);
-  } else {
-    // If we are adding workspace tensors for this op, then the total
-    // The total number of input slots to new node _must_ be 2 times the number
-    // of input slots to the original node: N original Tensorflow tensors and
-    // N for Mkl tensors corresponding to each Tensorflow tensors plus 2
-    // (for workspace Tensorflow tensor and workspace Mkl tensor).
-    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2);
-  }
-
-  return Status::OK();
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions related to workspace pass
-//////////////////////////////////////////////////////////////////////////
-
-// TODO(nhasabni) We should move this to mkl_util.h.
-void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
-    std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
-  // We use a tensor of shape {1} and value 0 to represent
-  // dummy float tensor. We need this as a dummy workspace tensor.
-  // Workspace tensor has type float.
-  const DataType dt = DataTypeToEnum<float>::v();
-  TensorProto proto;
-  proto.set_dtype(dt);
-  float zero[1] = {0};
-  proto.set_tensor_content(string(reinterpret_cast<char*>(&zero), 4));
-  TensorShape dummy_shape({1});
-  dummy_shape.AsProto(proto.mutable_tensor_shape());
-  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
-                  .Attr("value", proto)
-                  .Attr("dtype", dt)
-                  .Device(orig_node->def().device())  // We place this node on
-                                                      // same the device as the
-                                                      // device of the original
-                                                      // node.
-                  .Finalize(&**g, out));
-  CHECK_NOTNULL(*out); // Make sure we got a valid object before using it
-
-  // If number of inputs to the original node is > 0, then we add
-  // control dependency between 1st input (index 0) of the original node and
-  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
-  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
-  // rewritten node. Adding control edge between 1st input of the original node
-  // and the dummy Mkl node ensures that the dummy node is in the same frame
-  // as the original node. Choosing 1st input is not necessary - any input of
-  // the original node is fine because all the inputs of a node are always in
-  // the same frame.
-  if (orig_node->num_inputs() > 0) {
-    Node* orig_input0 = nullptr;
-    TF_CHECK_OK(
-        orig_node->input_node(0, const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
-  }
-
-  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
-}
-
-void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
-    std::unique_ptr<Graph>* g, Node* orig_node, NodeBuilder* nb,
-    std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added) {
-  bool workspace_edge_added = false;  // Default initializer
-  CHECK_NOTNULL(are_ws_tensors_added);
-  *are_ws_tensors_added = false;  // Default initializer
-
-  DataType T;
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  for (auto ws : wsinfo_) {
-    if (orig_node->type_string() == ws.fwd_op &&
-        mkl_op_registry::IsMklOp(
-            mkl_op_registry::GetMklOpName(orig_node->type_string()), T)) {
-      // If this op is a fwd op, then we need to check if there is an
-      // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
-      // an edge, then we just add an attribute on this node for setting
-      // workspace_passed to true. We don't add actual workspace edge
-      // in this node. Actual workspace edge gets added in the backward
-      // op for this node.
-      for (const Edge* e : orig_node->out_edges()) {
-        if (e->src_output() == ws.fwd_slot &&
-            e->dst()->type_string() == ws.bwd_op &&
-            e->dst_input() == ws.bwd_slot) {
-          nb->Attr("workspace_enabled", true);
-          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
-                  << orig_node->type_string();
-          workspace_edge_added = true;
-          // We found the edge that we were looking for, so break.
-          break;
-        }
-      }
-
-      if (!workspace_edge_added) {
-        // If we are here, then we did not find backward operator for this
-        // node.
-        nb->Attr("workspace_enabled", false);
-      }
-    } else if (orig_node->type_string() == ws.bwd_op &&
-               mkl_op_registry::IsMklOp(
-                   mkl_op_registry::GetMklOpName(orig_node->type_string()),
-                   T)) {
-      // If this op is a bwd op, then we need to add workspace edge and
-      // it's Mkl tensor edge between its corresponding fwd op and this
-      // op. Corresponding fwd op is specified in 'fwd_op' field of
-      // workspace info. fwd_slot and bwd_slot in workspace info specify
-      // an edge between which slots connect forward and backward op.
-      // Once all these criteria match, we add a workspace edge between
-      // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is
-      // determined by interleaved/contiguous ordering. Function
-      // DataIndexToMetaDataIndex tells us the location of Mkl tensor
-      // from the location of the Tensorflow tensor.
-      for (const Edge* e : orig_node->in_edges()) {
-        if (e->src_output() == ws.fwd_slot &&
-            // We would have rewritten the forward op, so we need to use
-            // GetMklOpName call to get its Mkl name.
-            e->src()->type_string() ==
-                mkl_op_registry::GetMklOpName(ws.fwd_op) &&
-            e->dst_input() == ws.bwd_slot) {
-          nb->Attr("workspace_enabled", true);
-          CHECK_NOTNULL(ws_tensors);
-          // Add workspace edge between fwd op and bwd op.
-          ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot));
-          // Add Mkl tensor edge for workspace edge between fwd op and bwd op.
-          ws_tensors->push_back(NodeBuilder::NodeOut(
-              e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot,
-                                                 e->src()->num_outputs())));
-          *are_ws_tensors_added = true;
-          // In terms of input ordering, we add these calls to add Input
-          // here because workspace edge (and its Mkl tensor) is the last
-          // edge in the fwdop and bwdop. So all inputs before workspace
-          // tensor have been added by SetUpInputs function.
-          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
-                  << orig_node->type_string();
-          workspace_edge_added = true;
-          // We found the edge that we were looking for, so break.
-          break;
-        }
-      }
-
-      // If we are here means we did not find fwd op that feeds to this
-      // bwd op. So in this case, we need to generate dummy tensors for
-      // workspace input and Mkl tensor for workspace, and set
-      // workspace_enabled to false.
-      if (!workspace_edge_added) {
-        nb->Attr("workspace_enabled", false);
-        Node* dmt_ws = nullptr;      // Dummy tensor for workspace
-        Node* dmt_mkl_ws = nullptr;  // Dummy Mkl tensor for workspace
-        GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node);
-        GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node);
-        CHECK_NOTNULL(dmt_ws);
-        CHECK_NOTNULL(dmt_mkl_ws);
-        CHECK_NOTNULL(ws_tensors);
-        // We add dummy tensor as workspace tensor.
-        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0));
-        // We add dummy tensor as Mkl tensor for workspace tensor.
-        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0));
-        *are_ws_tensors_added = true;
-        VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for "
-                << orig_node->type_string();
-      }
-    } else {
-      // If this node does not match any workspace info, then we do not
-      // do anything special for workspace propagation for it.
-    }
-  }
-}
-
-//////////////////////////////////////////////////////////////////////////
-// Op-specific functions to copy attributes from old node to new node
-//////////////////////////////////////////////////////////////////////////
-
-void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node,
-                                           NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  string padding;
-  std::vector<int32> strides;
-  bool use_cudnn_on_gpu;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-  TF_CHECK_OK(
-      GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("strides", strides);
-  nb->Attr("padding", padding);
-  nb->Attr("data_format", data_format);
-  nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
-}
-
-void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node,
-                                         NodeBuilder* nb) {
-  DataType T;
-  int N;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-}
-
-void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node,
-                                                NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  std::vector<int32> strides;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("strides", strides);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsIdentity(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  // Add attributes to new node.
-  nb->Attr("T", T);
-}
-
-void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
-                                        NodeBuilder* nb) {
-  DataType T;
-  int depth_radius;
-  float bias;
-  float alpha;
-  float beta;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("depth_radius", depth_radius);
-  nb->Attr("bias", bias);
-  nb->Attr("alpha", alpha);
-  nb->Attr("beta", beta);
-}
-
-void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
-                                            NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  string padding;
-  std::vector<int32> ksize, strides;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("ksize", ksize);
-  nb->Attr("strides", strides);
-  nb->Attr("padding", padding);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-}
-
-void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
-                                            NodeBuilder* nb) {
-  DataType T;
-  DataType Tshape;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape));
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("Tshape", Tshape);
-}
-
-void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
-                                          NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  int num_split;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("num_split", num_split);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node,
-                                           NodeBuilder* nb) {
-  DataType T;
-  int N;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-}
-
-void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-  int N;
-  DataType tidx;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-  nb->Attr("Tidx", tidx);
-}
-
-void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
-                                                   NodeBuilder* nb) {
-  DataType T;
-  float epsilon;
-  string data_format;
-  bool is_training;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("epsilon", epsilon);
-  nb->Attr("data_format", data_format);
-  nb->Attr("is_training", is_training);
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions related to node merge pass
-//////////////////////////////////////////////////////////////////////////
-
-Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const {
-  // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite
-  // once we support BiasAddGrad as Mkl layer.
-
-  // Search for all matching mergeinfo.
-  // We allow more than one match for extensibility.
-  std::vector<const MergeInfo*> matching_mi;
-  for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) {
-    if (a->type_string() == mi->succ) {
-      matching_mi.push_back(&*mi);
-    }
-  }
-
-  for (const MergeInfo* mi : matching_mi) {
-    const int N_in = a->num_inputs();
-    if (mi->op >= N_in) {
-      continue;
-    }
-
-    // Get the control edges and input of node
-    gtl::InlinedVector<Node*, 4> a_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> a_in(N_in);
-    FillInputs(a, &a_control_edges, &a_in);
-
-    // Get operand op of the operator
-    Node* b = nullptr;
-    b = a_in[mi->op].first;
-    if (b == nullptr || (b->type_string() != mi->pred)) {
-      // NOTE: Should the first check be assert?
-      continue;
-    }
-
-    const int B_in = b->num_inputs();
-    gtl::InlinedVector<Node*, 4> b_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(B_in);
-    FillInputs(b, &b_control_edges, &b_in);
-
-    // Shouldn't merge if a and b have different control edges.
-    if (a_control_edges != b_control_edges) {
-      continue;
-    } else {
-      // We found a match.
-      return b;
-    }
-  }
-
-  return nullptr;
-}
-
-Status MklLayoutRewritePass::MergeNode(std::unique_ptr<Graph>* g, Node* succ,
-                                       Node* pred) {
-  CHECK_NOTNULL(succ);
-  CHECK_NOTNULL(pred);
-
-  if (succ->type_string() == csinfo_.bias_add &&
-      pred->type_string() == csinfo_.mkl_conv2d) {
-    // 1. Get all attributes from input nodes.
-    DataType T_pred, T_succ;
-    string padding;
-    std::vector<int32> strides;
-    string data_format_pred, data_format_succ;
-    bool use_cudnn_on_gnu;
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
-    TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides));
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred));
-    TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
-    TF_CHECK_OK(
-        GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu));
-    // We check to ensure that data formats of both succ and pred are same.
-    // We expect them to be same, so we can enforce this as assert.
-    // But assert can be too strict, so we enforce this as a check.
-    // If the check fails, then we do not merge two nodes.
-    // We also do same check for devices.
-    if (data_format_pred != data_format_succ || T_pred != T_succ ||
-        pred->assigned_device_name() != succ->assigned_device_name() ||
-        pred->def().device() != succ->def().device()) {
-      return Status(error::Code::INVALID_ARGUMENT,
-                    "data_format or T attribute or devices of Conv2D and "
-                    "BiasAdd do not match. Will skip node merge optimization");
-    }
-
-    const int succ_num = succ->num_inputs();
-    gtl::InlinedVector<Node*, 4> succ_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> succ_in(succ_num);
-    FillInputs(succ, &succ_control_edges, &succ_in);
-
-    const int pred_num = pred->num_inputs();
-    gtl::InlinedVector<Node*, 4> pred_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> pred_in(pred_num);
-    FillInputs(pred, &pred_control_edges, &pred_in);
-
-    // We need to ensure that there is only 1 edge between Conv2D and AddBias.
-    // Otherwise, merging is semantically incorrect.
-    if (pred->out_edges().size() != 1) {
-      return Status(error::Code::INVALID_ARGUMENT,
-                    "Conv2D has multiple outputs."
-                    "Will skip node merge optimization");
-    }
-
-    for (const Edge* e : pred->out_edges()) {
-      if (e->dst() != succ) {
-        return Status(error::Code::INVALID_ARGUMENT,
-                      "Conv2D does not feed to BiasAdd."
-                      "Will skip node merge optimization");
-      }
-    }
-
-    // 2. Get inputs from both the nodes.
-    // Find the 2 inputs from the conv and the bias from the add Bias.
-    // Get operand 0, 1 of conv2D and their Mkl tensors.
-    CHECK_EQ(pred->in_edges().size(), 4);  // _MklConv2D must have 4 inputs.
-    // Get operand 1 of add_bias
-    // BiasAdd must have 2 inputs: Conv, bias
-    CHECK_EQ(succ->in_edges().size(), 2);
-    Node* oper3_mkl = nullptr;  // Mkl tensor corresponding to oper3
-    int oper3_mkl_slot = 0;     // For dummy MKL tensor node, output slot is 0.
-    GetDummyMklTensorNode(g, &oper3_mkl, pred);  // Get dummy Mkl tensor node
-    // as BiasAdd does not have Mkl tensor as input.
-    CHECK_NOTNULL(oper3_mkl);
-
-    // We will use the node name of BiasAdd as the name of new node
-    // Build new node. We use same name as original node, but change the op
-    // name.
-    NodeBuilder nb(succ->name(), csinfo_.mkl_conv2d_with_bias);
-    if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-      nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
-      // pred_in[1] will be Mkl tensor for In1 if we follow interleaved
-      // ordering, and it will be 2nd Tensorflow tensor for Conv2D if
-      // we follow contiguous ordering.
-      nb.Input(pred_in[1].first, pred_in[1].second);  // Mkl for In1
-      nb.Input(pred_in[2].first, pred_in[2].second);  // In2 of Conv2D
-      nb.Input(pred_in[3].first, pred_in[3].second);  // Mkl for In2
-      nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
-      nb.Input(oper3_mkl, oper3_mkl_slot);            // Mkl for In2 of BiasAdd
-    } else {
-      CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-      nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
-      // pred_in[1] will be Mkl tensor for In1 if we follow interleaved
-      // ordering, and it will be 2nd Tensorflow tensor for Conv2D if
-      // we follow contiguous ordering.
-      nb.Input(pred_in[1].first, pred_in[1].second);  // In2 of Conv2D
-      nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
-      nb.Input(pred_in[2].first, pred_in[2].second);  // Mkl for In1 of Conv2D
-      nb.Input(pred_in[3].first, pred_in[3].second);  // Mkl for In2 of Conv2D
-      nb.Input(oper3_mkl, oper3_mkl_slot);            // Mkl for In2 of BiasAdd
-    }
-
-    // Copy attributes from Conv2D to Conv2DWithBias.
-    CopyAttrsConv2D(const_cast<const Node*>(pred), &nb);
-
-    // Copy the device assigned to old node to new node.
-    nb.Device(succ->def().device());
-
-    // Create node.
-    Node* new_node;
-    TF_CHECK_OK(nb.Finalize(&**g, &new_node));
-    CHECK_NOTNULL(new_node);
-
-    // Set the Mkl layer label for this op.
-    new_node->AddAttr("_kernel", mkl_op_registry::kMklOpLabel);
-
-    // Incoming data edges from 'pred' node and 'succ' node to new 'new_node'
-    // node are already copied in BuildNode. We handle control edges now.
-    for (const Edge* e : pred->in_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-      }
-    }
-    for (const Edge* e : succ->in_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-      }
-    }
-
-    // Incoming edges are fixed, we will fix the outgoing edges now.
-    // First, we will fix outgoing control edges from 'pred' node.
-    // We don't need to handle outgoing data edges from 'pred' node
-    // because pred has only 1 output going to succ node (we enforced
-    // this check for merge already).
-    for (const Edge* e : pred->out_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-      }
-    }
-
-    // Second, we will fix outgoing control and data edges from 'succ' node.
-    for (const Edge* e : succ->out_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-      } else {
-        CHECK_NOTNULL(
-            (*g)->AddEdge(new_node, e->src_output(), e->dst(), e->dst_input()));
-      }
-    }
-
-    // Copy device assigned to old node to new node.
-    // It's ok to use pred or succ as we have enforced a check that
-    // both have same device assigned.
-    new_node->set_assigned_device_name(pred->assigned_device_name());
-
-    VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString()
-            << ", and node: " << succ->DebugString()
-            << ", into node:" << new_node->DebugString();
-
-    (*g)->RemoveNode(succ);
-    (*g)->RemoveNode(pred);
-
-    return Status::OK();
-  }
-
-  return Status(error::Code::UNIMPLEMENTED,
-                "Unimplemented case for node merge optimization.");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions for node rewrite
-//////////////////////////////////////////////////////////////////////////
-
-Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
-                                         Node* orig_node,
-                                         const RewriteInfo* ri) {
-  CHECK_NOTNULL(ri);
-  CHECK_NOTNULL(orig_node);
-
-  VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString();
-
-  // Check if this is scenario 2 (context-based rewrite).
-  // Get the matching ContextInfo if it is.
-  const Node* fwd_node = nullptr;
-  const ContextInfo* ci = nullptr;
-  bool is_context_based_rewrite = false;
-  if ((ci = SearchMatchingContext(orig_node, &fwd_node)) != nullptr) {
-    is_context_based_rewrite = true;
-
-    // Sanity checks for context-based rewrite (if any)
-    if (orig_node->type_string() == csinfo_.bias_add_grad &&
-        ri->new_name == csinfo_.mkl_conv2d_with_bias_backprop_bias) {
-      CHECK_NOTNULL(fwd_node);
-      DataType orig_T, ctx_T;
-      string orig_data_format, ctx_data_format;
-      TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &orig_T));
-      TF_CHECK_OK(
-          GetNodeAttr(orig_node->def(), "data_format", &orig_data_format));
-      TF_CHECK_OK(GetNodeAttr(fwd_node->def(), "T", &ctx_T));
-      TF_CHECK_OK(
-          GetNodeAttr(fwd_node->def(), "data_format", &ctx_data_format));
-
-      if (orig_data_format != ctx_data_format || orig_T != ctx_T ||
-          orig_node->assigned_device_name() !=
-              fwd_node->assigned_device_name() ||
-          orig_node->def().device() != fwd_node->def().device()) {
-        return Status(
-            error::Code::INVALID_ARGUMENT,
-            "data_format or T attribute or devices of BiasAddGrad and "
-            "Conv2D do not match. Will skip node rewrite optimization");
-      }
-    } else if (orig_node->type_string() == csinfo_.bias_add_grad &&
-               ri->new_name == csinfo_.matmul) {
-      // When BiasAddGrad has MatMul in context, we do not do any rewrite
-      // and leave BiasAddGrad as it is. But we check for this condition
-      // when we check for node rewrite rule. So we should not even come
-      // here for MatMul. So we will fail now.
-      return Status(
-          error::Code::INVALID_ARGUMENT,
-          "No rewrite is required for BiasAddGrad for MatMul context.");
-    }
-  }
-
-  // Get all inputs.
-  int num_inputs = orig_node->in_edges().size();
-
-  // Drop count for control edges from inputs
-  for (const Edge* e : orig_node->in_edges()) {
-    if (e->IsControlEdge()) {
-      num_inputs--;
-    }
-  }
-
-  gtl::InlinedVector<Node*, 4> control_edges;
-  gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num_inputs);
-  FillInputs(orig_node, &control_edges, &inputs);
-
-  // Build new node. We use same name as original node, but change the op name.
-  NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str());
-  // Copy user-specified device assigned to original node to new node.
-  nb.Device(orig_node->def().device());
-  // Set up new inputs to the rewritten node.
-  Status s = SetUpInputs(g, inputs, &nb, orig_node);
-  if (s != Status::OK()) {
-    return s;
-  }
-
-  // Copy attributes from original node to new node (for scenario 1).
-  // For context-based rewrite, we use context to copy the attributes.
-  if (is_context_based_rewrite) {
-    if (orig_node->type_string() == csinfo_.bias_add_grad &&
-        ri->new_name == csinfo_.mkl_conv2d_with_bias_backprop_bias) {
-      CHECK_NOTNULL(fwd_node);
-      ri->copy_attrs(fwd_node, &nb);
-    } else {
-      return Status(error::Code::UNIMPLEMENTED,
-                    "Unimplemented case for node rewrite optimization.");
-    }
-  } else {
-    ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
-  }
-  // Set the Mkl layer label for this op.
-  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
-
-  // Finalize graph and get new node.
-  Node* new_node = nullptr;
-  TF_CHECK_OK(nb.Finalize(&**g, &new_node));
-  CHECK_NOTNULL(new_node);
-
-  // Incoming data edges from 'orig_node' node to new 'new_node' node are
-  // already copied in BuildNode. We need to handle control edges now.
-  for (const Edge* e : orig_node->in_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-    }
-  }
-
-  // Copy outgoing edges from 'orig_node' node to new
-  // 'new_node' node, since the output also follows same ordering among
-  // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow
-  // tensors appropriately. Specifically, nth output of the original node
-  // will become 2*nth output of the Mkl node for the interleaved ordering
-  // of the tensors. For the contiguous ordering of the tensors, it will be n.
-  // GetTensorDataIndex provides this mapping function.
-  for (const Edge* e : orig_node->out_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-    } else {
-      CHECK_NOTNULL((*g)->AddEdge(
-          new_node,
-          GetTensorDataIndex(e->src_output(), e->src()->num_outputs()),
-          e->dst(), e->dst_input()));
-    }
-  }
-
-  // Copy the runtime device assigned from original code to new node.
-  new_node->set_assigned_device_name(orig_node->assigned_device_name());
-
-  // Delete original node and mark new node as rewritten.
-  (*g)->RemoveNode(orig_node);
-
-  VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString();
-  return Status::OK();
-}
-
-const MklLayoutRewritePass::ContextInfo*
-MklLayoutRewritePass::SearchMatchingContext(const Node* n,
-                                            const Node** fwd_node) {
-  CHECK_NOTNULL(n);
-  CHECK_NOTNULL(fwd_node);
-  *fwd_node = nullptr;
-
-  // Search for matching contextinfo based on node name and call
-  // callback function using matching contextinfo.
-  // There could be more than one matching contextinfos but whichever
-  // matches first is returned.
-  for (auto ci = cinfo_.cbegin(); ci != cinfo_.cend(); ++ci) {
-    if (n->type_string() == (*ci)->node &&
-        (*ci)->context_match_fn(n, fwd_node, *ci)) {
-      VLOG(1) << "Found context as matching: " << (*ci)->fwd;
-      return *ci;
-    }
-  }
-  return nullptr;
-}
-
-bool MklLayoutRewritePass::ContextMatchRewrite(const Node* n,
-                                               const ContextInfo* c) {
-  const Node* fwd_node = nullptr;
-  return SearchMatchingContext(n, &fwd_node) == c;
-}
-
-const MklLayoutRewritePass::RewriteInfo*
-MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
-  CHECK_NOTNULL(n);
-
-  // First check if node along with its type is supported by MKL layer.
-  // We do not want to rewrite an op into Mkl op if types are not supported.
-  // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
-  // MklRelu if type is INT32.
-  DataType T;
-  if (!GetNodeAttr(n->def(), "T", &T).ok()) {
-    return nullptr;
-  }
-
-  // BiasAddGrad is not an Mkl layer, so we make an exception for it.
-  if (n->type_string() != csinfo_.bias_add_grad) {
-    if (!mkl_op_registry::IsMklOp(
-            mkl_op_registry::GetMklOpName(n->type_string()), T)) {
-      return nullptr;
-    }
-  }
-
-  // For elementwise node, we reuse the Eigen implementation and pass the MKL
-  // metadata tensor through so we can avoid conversions. However, if all
-  // incoming edges are in TF format, we don't need all this overhead, so
-  // replace the elementwise node only if at least one of its parents is a MKL
-  // node.
-  //
-  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
-  // eigen code to reduce cross-library dependency.
-  if (mkl_op_registry::IsMklElementWiseOp(
-          mkl_op_registry::GetMklOpName(n->type_string()), T)) {
-    bool incoming_mkl_edge = false;
-    for (auto parent : n->in_edges()) {
-      if (mkl_op_registry::IsMklOp(
-              mkl_op_registry::GetMklOpName(parent->src()->type_string()), T)) {
-        incoming_mkl_edge = true;
-        break;
-      } else {
-        VLOG(1) << "Non-MKL parent is: " << parent->src()->type_string();
-      }
-    }
-    if (incoming_mkl_edge == false) {
-      VLOG(1) << "Skipping replacement of elementwise node which has no MKL "
-                 "parents.";
-      return nullptr;
-    }
-  }
-
-  // We support 2 types of node rewrites:
-  // 1. Rewriting BiasAddGrad depending on its MklConv2DWithBias context.
-  // 2. Rewriting an op to Mkl op always
-  // We return true if any of these 2 conditions is met.
-
-  // Find matching RewriteInfo and then check that rewrite rule applies.
-  for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
-    if (n->type_string().compare(ri->name) == 0 &&
-        ri->rewrite_rule(n, ri->context)) {
-      // If we are rewriting BiasAddGrad into BiasAddGrad for MatMul context,
-      // then we just return directly.
-      if (n->type_string() == csinfo_.bias_add_grad &&
-          ri->context->fwd == csinfo_.matmul &&
-          ri->new_name == csinfo_.bias_add_grad) {
-        return nullptr;
-      }
-      return &*ri;
-    }
-  }
-
-  // Else return not found.
-  return nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//              Run function for the pass
-///////////////////////////////////////////////////////////////////////////////
-
-bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
-  bool result = false;
-  CHECK_NOTNULL(g);
-
-  DumpGraph("Before running MklLayoutRewritePass", &**g);
-
-  std::vector<Node*> order;
-  GetReversePostOrder(**g, &order);  // This will give us topological sort.
-
-  for (Node* n : order) {
-    // If node is not an op or it cannot run on CPU device, then skip.
-    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
-      continue;
-    }
-
-    const RewriteInfo* ri = nullptr;
-    Node* predn = nullptr;
-    // We will first search if node is to be rewritten
-    if ((ri = CheckForNodeRewrite(n)) != nullptr) {
-      string node_name = n->name();
-      string op_name = n->type_string();
-
-      VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name
-              << " with op " << op_name << " for rewrite using"
-              << " layout optimization.";
-
-      if (RewriteNode(g, n, ri) == Status::OK()) {
-        VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name
-                << " with op " << op_name << " for Mkl layout optimization.";
-        result = true;
-      }
-    } else if ((predn = CheckForNodeMerge(n)) != nullptr) {
-      // Otherwise, we will check if the node is to be merged.
-      string n1_name = n->name();
-      string n2_name = predn->name();
-
-      VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and "
-              << n2_name << " for merging";
-
-      if (MergeNode(g, n, predn) == Status::OK()) {
-        VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and "
-                << n2_name;
-        result = true;
-      }
-    }
-  }
-
-  DumpGraph("After running MklLayoutRewritePass", &**g);
-
-  return result;
-}
-
-bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
-  return MklLayoutRewritePass().RunPass(g);
-}
-
-Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
-  if (options.graph == nullptr && options.partition_graphs == nullptr) {
-    return Status::OK();
-  }
-
-  auto process_graph = [&](std::unique_ptr<Graph>* g) {
-    // Get the ownership of a graph
-    std::unique_ptr<Graph>* ng = std::move(g);
-    RunPass(ng);
-    // Return the ownership of a graph back
-    g->reset(ng->release());
-  };
-
-  if (kMklLayoutRewritePassGroup !=
-      OptimizationPassRegistry::POST_PARTITIONING) {
-    // For any pre-partitioning phase, a graph is stored in options.graph.
-    process_graph(options.graph);
-  } else {
-    // For post partitioning phase, graphs are stored in
-    // options.partition_graphs.
-    for (auto& pg : *options.partition_graphs) {
-      process_graph(&pg.second);
-    }
-  }
-
-  return Status::OK();
-}
-
-#else   // INTEL_MKL_ML_ONLY
-
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
 // (B) Rewriting a node in the graph to a new node
@@ -4539,7 +2364,7 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
 
   return Status::OK();
 }
-#endif  // INTEL_MKL_ML_ONLY
+
 }  // namespace tensorflow
 
 #endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 77640e287c..0eda8170f8 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -37,1869 +37,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-#ifdef INTEL_MKL_ML_ONLY
-
-namespace {
-
-const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
-const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
-
-static void InitGraph(const string& s, Graph* graph,
-                      const string& device = kCPUDevice) {
-  GraphDef graph_def;
-
-  auto parser = protobuf::TextFormat::Parser();
-  //  parser.AllowRelaxedWhitespace(true);
-  CHECK(parser.MergeFromString(s, &graph_def)) << s;
-  GraphConstructorOptions opts;
-  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
-
-  for (Node* node : graph->nodes()) {
-    node->set_assigned_device_name(device);
-  }
-}
-
-class MklLayoutPassTest : public ::testing::Test {
- public:
-  MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
-
-  void InitGraph(const string& s, const string& device = kCPUDevice) {
-    ::tensorflow::InitGraph(s, &graph_, device);
-    original_ = CanonicalGraphString(&graph_);
-  }
-
-  static bool IncludeNode(const Node* n) { return n->IsOp(); }
-
-  static string EdgeId(const Node* n, int index) {
-    if (index == 0) {
-      return n->name();
-    } else if (index == Graph::kControlSlot) {
-      return strings::StrCat(n->name(), ":control");
-    } else {
-      return strings::StrCat(n->name(), ":", index);
-    }
-  }
-
-  string CanonicalGraphString(Graph* g) {
-    std::vector<string> nodes;
-    std::vector<string> edges;
-    for (const Node* n : g->nodes()) {
-      if (IncludeNode(n)) {
-        nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
-      }
-    }
-    for (const Edge* e : g->edges()) {
-      if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
-        edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
-                                        EdgeId(e->dst(), e->dst_input())));
-      }
-    }
-    // Canonicalize
-    std::sort(nodes.begin(), nodes.end());
-    std::sort(edges.begin(), edges.end());
-    return strings::StrCat(str_util::Join(nodes, ";"), "|",
-                           str_util::Join(edges, ";"));
-  }
-
-  string DoMklLayoutOptimizationPass() {
-    string before = CanonicalGraphString(&graph_);
-    LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
-
-    std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
-    RunMklLayoutRewritePass(ug);
-
-    string result = CanonicalGraphString(&graph_);
-    LOG(ERROR) << "After MKL layout rewrite pass:  " << result;
-    return result;
-  }
-
-  const string& OriginalGraph() const { return original_; }
-
-  Graph graph_;
-  string original_;
-};
-
-REGISTER_OP("Input").Output("o: float").SetIsStateful();
-REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
-REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
-REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
-REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
-REGISTER_OP("_MklInput2")
-    .Output("o: uint8")
-    .Output("o1: uint8")
-    .SetIsStateful();
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to node merge optiimization
-/////////////////////////////////////////////////////////////////////
-
-TEST_F(MklLayoutPassTest, Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Zeta);D(Zeta)|"
-            "A->C;A->D;B->C:1;B->D:1");
-}
-
-// Test set 1: Conv2D + AddBias
-
-// C=_MklConv2D(A,M,B,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved ordering)
-// C=_MklConv2D(A,B,M,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous ordering)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);"
-            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->E;"
-            "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;M->E:3;"
-            "N->E:4;Y->Z:1");
-}
-
-// C=_MklConv2D(A,M:1,B,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved)
-// C=_MklConv2D(A,B,M:1,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous)
-// Test for correct output slots selected
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive1) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput2'}"
-      "node { name: 'N' op: '_MklInput2'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M:1', 'N:1']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);"
-            "M(_MklInput2);N(_MklInput2);Y(Input);Z(Zeta)|A->E;"
-            "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;"
-            "M:1->E:3;N:1->E:4;Y->Z:1");
-}
-
-// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y);
-// This is a case of node rewrite followed by node merge.
-// We will first rewrite Conv2D to _MklConv2D, and then merge _MklConv2D
-// with BiasAdd to produce _MklConv2DWithBias.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive2) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|"
-            "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
-            "DMT/_2->E:5;E->Z;Y->Z:1");
-}
-
-// Graph contains only _MklConv2D, no AddBias.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);M(_MklInput);N(_MklInput)|"
-            "A->C;B->C:1;M->C:2;N->C:3");
-}
-
-// _MklConv2D output does not go to BiasAdd.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D', 'E'] }");  // Output of _MklConv2D does not go to BiasAdd.
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(Input);F(BiasAdd);"
-            "M(_MklInput);N(_MklInput)|A->C;B->C:1;D->F;E->F:1;M->C:2;N->C:3");
-}
-
-// _MklConv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
-// Merge should not be done in such case.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D', 'E'] }"  // Conv2D has two outputs.
-                              // No merge should happen.
-      "node { name: 'G' op: 'Zeta'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(Input);F(BiasAdd);"
-            "G(Zeta);M(_MklInput);N(_MklInput)|A->C;B->C:1;C->G;D->F;"
-            "E->F:1;E->G:1;M->C:2;N->C:3");
-}
-
-// data_format attribute value mismatch. Merge should not be done
-// in such case.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NHCW' } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);M(_MklInput);"
-            "N(_MklInput)|A->C;B->C:1;C->E;D->E:1;M->C:2;N->C:3");
-}
-
-// Test set 2: _MklConv2D..BiasAddGrad -> _MklConv2DWithBiasBackpropBias
-// rewrite tests
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter
-// and BackpropInput
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'Int32Input'}"
-      "node { name: 'I' op: '_MklConv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['H', 'B', 'E', 'M', 'N', 'O']}"
-      "node { name: 'J' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
-            "I(_MklConv2DBackpropInput);J(_MklConv2DWithBiasBackpropBias);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G;B->D:1;"
-            "B->I:1;C->D:2;D->E;DMT/_0->J:1;E->G:2;E->I:2;E->J;"
-            "E:control->DMT/_0:control;F->G:1;H->I;M->D:3;M->G:3;M->I:3;"
-            "N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter
-// and BackpropInput. But nodes do not match criteria for rewrite. So
-// rewrite should not happen.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'Int32Input'}"
-      "node { name: 'I' op: '_MklConv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['H', 'B', 'E', 'M', 'N', 'O']}"
-      "node { name: 'J' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
-            "I(_MklConv2DBackpropInput);J(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
-            "B->I:1;C->D:2;D->E;E->G;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;"
-            "M->I:3;N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter
-// and BackpropInput. But nodes do not match criteria for rewrite. So
-// rewrite should not happen.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['B', 'A', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'Int32Input'}"
-      "node { name: 'I' op: '_MklConv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['H', 'B', 'E', 'M', 'N', 'O']}"
-      "node { name: 'J' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
-            "I(_MklConv2DBackpropInput);J(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;"
-            "B->I:1;C->D:2;D->E;E->G:2;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;"
-            "M->I:3;N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);"
-            "H(_MklConv2DWithBiasBackpropBias);M(_MklInput);N(_MklInput);"
-            "O(_MklInput)|A->D;A->E:1;A->G;B->D:1;C->D:2;D->E;DMT/_0->H:1;"
-            "E->G:2;E->H;E:control->DMT/_0:control;F->G:1;M->D:3;M->G:3;"
-            "N->D:4;N->G:4;O->D:5;O->G:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only
-// But BackpropFilter node inputs do not satisfy criteria for rewrite.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
-            "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
-            "O->G:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only
-// But BackpropFilter node inputs do not satisfy criteria for rewrite.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['B', 'A', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;"
-            "C->D:2;D->E;E->G:2;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
-            "O->G:5");
-}
-
-// No _MklConv2DWithBias in context, but _MklConv2D in context.
-// No rewrite for BiasAddGrad should happen.
-// C=_MklConv2D(A,M,B,N); D=Zeta(C,A); E=BiasAddGrad(D) (for interleaved)
-// C=_MklConv2D(A,B,M,N); D=Zeta(C,A); E=BiasAddGrad(D) (for contiguous)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Neg_NoMklConv2DWithBias) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
-            "M->C:2;N->C:3");
-}
-
-// No Conv2D in the context for BiasAddGrad. No rewrite should happen.
-// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Polygamma'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// No Conv2D in the context for BiasAddGrad, but MatMul in context.
-// Rewrite should happen, but name of BiasAddGrad does not change.
-// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D_MatMul) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'MatMul'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'transpose_a'      value { b: false } }"
-      " attr { key: 'transpose_b'      value { b: false } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// Test set 3: MatMul..BiasAddGrad -> BiasAddGrad rewrite tests
-// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'MatMul'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'transpose_a'      value { b: false } }"
-      " attr { key: 'transpose_b'      value { b: false } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// No MatMul in the context for BiasAddGrad. No rewrite should happen.
-// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Negative_NoMatMul) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Polygamma'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to rewriting node to Mkl node
-/////////////////////////////////////////////////////////////////////
-
-// Single Conv2D Op; No Mkl layer on the input and on the output.
-// We will generate dummy Mkl tensor as 2nd input of Conv2D.
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
-            "DMT/_1->C:3");
-}
-
-// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
-// have 2 outputs, both of which will be inputs to next Conv2D.
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
-            "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
-}
-
-// Conv2D with INT32 which is not supported by Mkl
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
-  InitGraph(
-      "node { name: 'A' op: 'HalfInput'}"
-      "node { name: 'B' op: 'HalfInput'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_HALF } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
-            "A->C;B->C:1;B->D;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:4;DMT/_2->D:5");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['B', 'A', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
-            "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
-            "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
-            "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// Concat Op test: Concat with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(
-      DoMklLayoutOptimizationPass(),
-      "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
-      "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
-      "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
-      "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// Concat with 2 Mkl layers feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
-            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
-            "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;"
-            "G:control->DMT/_4:control;H->I:1");
-}
-
-// Concat with 1 Mkl and 1 non-Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
-            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
-            "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;"
-            "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
-}
-
-// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
-            "B:control->DMT/_0:control;B:control->DMT/_1:control;"
-            "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// ConcatV2 with 2 Mkl layers feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
-            "C:control->DMT/_0:control;C:control->DMT/_1:control;"
-            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
-            "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;"
-            "F:2->H:4;G->H:2;H->I:1");
-}
-
-// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
-            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
-            "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;"
-            "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
-            "G->H:2;H->I:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
-            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
-            "DMT/_1->C:2");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
-            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Int32Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'AvgPoolGrad' "
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
-            "DMT/_1->C:3");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'I' op: 'Int32Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'AvgPoolGrad' "
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['I', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
-            "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
-            "I:control->DMT/_1:control");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNormGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
-            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
-            "E->F:4;F->G:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNorm'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
-            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
-            "E->F:4;F->G:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to rewriting node for workspace edges
-/////////////////////////////////////////////////////////////////////
-
-/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */
-TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['B'] }"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['B', 'C', 'D'] }"
-      "node { name: 'F' op: 'Input'}"
-      "node { name: 'G' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['E', 'F', 'B'] }"
-      "node { name: 'H' op: 'Input'}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['H', 'G'] }");
-  EXPECT_EQ(
-      DoMklLayoutOptimizationPass(),
-      "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
-      "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
-      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
-      "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
-      "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
-      "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
-}
-
-/* Test LRN->LRNGrad replacement by workspace nodes. */
-TEST_F(MklLayoutPassTest, LRN_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'D', 'B'] }"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
-            "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
-            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
-}
-
-/* Test LRN->LRNGrad replacement when only one of them is present. */
-TEST_F(MklLayoutPassTest, LRN_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
-            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-/* Test LRN->LRNGrad replacement when only one of them is present. */
-TEST_F(MklLayoutPassTest, LRN_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
-}
-
-/* Test LRN->LRNGrad negative case, where single LRN feeds
-   2 LRNGrad nodes at different slots. */
-TEST_F(MklLayoutPassTest, LRN_Negative3) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'D', 'B'] }"
-      "node { name: 'F' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'B', 'D'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['E', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
-            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
-            "A:control->DMT/_0:control;B->E:2;"
-            "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
-            "C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "C:control->DMT/_3:control;C:control->DMT/_4:control;"
-            "C:control->DMT/_5:control;C:control->DMT/_6:control;"
-            "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;"
-            "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1");
-}
-
-/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['C', 'B', 'D'] }"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
-            "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
-            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
-}
-
-// Test MaxPool>MaxPoolGrad replacement when only one of them is present.
-// In this case, we will rewrite MaxPool node but workspace edges will not
-// be present.
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
-            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-// Test MaxPoolGrad replacement when only one of them is present.
-// In this case, we will rewrite MaxPoolGrad and for workspace tensor and
-// its Mkl part, we will generate dummy tensor.
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
-}
-
-// Test MaxPool handling for batch-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:2, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:2} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-
-// Single Conv2D Op on GPU device
-// No rewrite should happen
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
-            "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
-            "M->D:3;N->D:4;O->D:5");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
-            "A->D;A->E;B->D:1;C->D:2;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Concat Op test: Concat with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
-            "B->D:1;B:1->D:2;C->E;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
-            "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNorm'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);E(Input);"
-            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
-            "E->F:4;F->G:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
-            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
-            "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-
-static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
-  testing::StopTiming();
-  string s;
-  for (int in = 0; in < 10; in++) {
-    s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
-  }
-  random::PhiloxRandom philox(301, 17);
-  random::SimplePhilox rnd(&philox);
-  for (int op = 0; op < op_nodes; op++) {
-    s += strings::Printf(
-        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
-        "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
-        op, rnd.Uniform(10), rnd.Uniform(10));
-  }
-
-  bool first = true;
-  while (iters > 0) {
-    Graph* graph = new Graph(OpRegistry::Global());
-    InitGraph(s, graph);
-    int N = graph->num_node_ids();
-    if (first) {
-      testing::SetLabel(strings::StrCat("Per graph node.  Nodes: ", N));
-      first = false;
-    }
-    {
-      testing::StartTiming();
-      std::unique_ptr<Graph> ug(graph);
-      RunMklLayoutRewritePass(&ug);
-      testing::StopTiming();
-    }
-    iters -= N;  // Our benchmark units are individual graph nodes,
-                 // not whole graphs
-    // delete graph;
-  }
-}
-BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
-
-}  // namespace
-
-#else  // INTEL_MKL_ML_ONLY
-
 // NOTE: Unit tests in this file rely on a topological sorted graph for
 // printing. But since sibling nodes of a node in the topologically sorted graph
 // can be printed in different orders, tests may fail if the order in which
@@ -3602,8 +1739,6 @@ BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace
 
-#endif  // INTEL_MKL_ML_ONLY
-
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL && ENABLE_MKL
-- 
GitLab


From 470101040d2174ddcb41990e5e16ed6dfa6f6436 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 13:33:43 -0700
Subject: [PATCH 1205/1357] Remove commented out code errantly checked in.

PiperOrigin-RevId: 215957544
---
 third_party/jpeg/workspace.bzl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/third_party/jpeg/workspace.bzl b/third_party/jpeg/workspace.bzl
index 4b517240ec..2bb7dacd32 100644
--- a/third_party/jpeg/workspace.bzl
+++ b/third_party/jpeg/workspace.bzl
@@ -12,6 +12,5 @@ def repo():
         sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
         strip_prefix = "libjpeg-turbo-2.0.0",
         build_file = "//third_party/jpeg:BUILD.bazel",
-        # build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
         system_build_file = "//third_party/jpeg:BUILD.system",
     )
-- 
GitLab


From ae0bc6f006497cc04a2ee75166d4ec71c7154fd8 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Fri, 5 Oct 2018 13:34:01 -0700
Subject: [PATCH 1206/1357] [tf.data] Adding specialization for `MapDataset`,
 `ParallelMapDataset`, and `MapAndBatchDataset` whose user-provided functions
 have the property that each output argument take its value directly from an
 input argument (e.g. `lambda x, y: y, x`). This specialization can produce
 the result without having to schedule the function using the executor.

PiperOrigin-RevId: 215957592
---
 tensorflow/core/kernels/data/BUILD            |  14 ++
 tensorflow/core/kernels/data/dataset_utils.cc |  47 +++++
 tensorflow/core/kernels/data/dataset_utils.h  |  20 ++
 .../core/kernels/data/dataset_utils_test.cc   |  46 +++++
 .../core/kernels/data/filter_dataset_op.cc    | 162 +++++++---------
 .../kernels/data/map_and_batch_dataset_op.cc  | 180 +++++++++++-------
 .../core/kernels/data/map_dataset_op.cc       |  56 ++++--
 .../kernels/data/parallel_map_dataset_op.cc   |  73 ++++---
 .../kernels/data/parallel_map_iterator.cc     |  17 +-
 .../core/kernels/data/parallel_map_iterator.h |   2 +-
 .../kernels/data/parse_example_dataset_op.cc  |   2 +-
 .../kernel_tests/map_and_batch_test.py        |  20 ++
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  80 ++++++--
 .../python/data/kernel_tests/test_base.py     |  29 +++
 15 files changed, 520 insertions(+), 230 deletions(-)
 create mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 451f8c1a6c..37c1c54786 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -45,6 +45,16 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "dataset_utils_test",
+    srcs = ["dataset_utils_test.cc"],
+    deps = [
+        ":dataset_utils",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 cc_library(
     name = "captured_function",
     srcs = ["captured_function.cc"],
@@ -205,6 +215,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -232,6 +243,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -245,6 +257,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -285,6 +298,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         ":parallel_map_iterator",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index e10833f525..a40f7f2146 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -15,10 +15,57 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 
 namespace tensorflow {
 namespace data {
 
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices) {
+  FunctionLibraryRuntime::Handle fn_handle;
+  TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate(
+      func.name(), AttrSlice(&func.attr()), &fn_handle));
+  auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() {
+    Status s = ctx->function_library()->ReleaseHandle(fn_handle);
+    if (!s.ok()) {
+      LOG(WARNING) << "Failed to release handle: " << s.error_message();
+    }
+  });
+
+  const FunctionBody* fn_body =
+      ctx->function_library()->GetFunctionBody(fn_handle);
+  indices->resize(fn_body->ret_nodes.size());
+  for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) {
+    Node* ret_node = fn_body->ret_nodes[i];
+    Node* ret_input_node;
+    TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node));
+    if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) {
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i])));
+    } else {
+      indices->clear();
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
+  std::map<int, int> last_use;
+  for (size_t i = 0; i < indices.size(); ++i) {
+    last_use[indices[i]] = i;
+  }
+  std::vector<bool> can_move;
+  can_move.resize(indices.size());
+  for (size_t i = 0; i < indices.size(); ++i) {
+    can_move[i] = last_use[indices[i]] == i;
+  }
+  return can_move;
+}
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 6ec1350cd4..d777062293 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -22,6 +22,26 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
+// This method is used to determine whether we can short-circuit the evaluation
+// of the user-defined function `func`. Short-circuting is possible if every
+// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) =
+// (y,x)`, or `f(x) = (x,x)`).
+//
+// If short-circuiting is possible, the method stores the mapping from output
+// indices to input indices in `indices`. Otherwise, `indices` will be empty.
+//
+// Returns non-ok status if analysis of the function fails.
+//
+// TODO(jsimsa): Extend this to support constants as well.
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices);
+
+// Given a vector that maps output indices to input indices, return a vector
+// that identifies for which output indices can we move the input (assuming
+// output indices are processed left to right).
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
new file mode 100644
index 0000000000..43295b8ebb
--- /dev/null
+++ b/tensorflow/core/kernels/data/dataset_utils_test.cc
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/data/dataset_utils.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+TEST(DatasetUtils, ComputeMoveVector) {
+  struct TestCase {
+    std::vector<int> indices;
+    std::vector<bool> expected;
+  };
+
+  TestCase test_cases[] = {
+      TestCase{{}, {}},
+      TestCase{{1}, {true}},
+      TestCase{{1, 1}, {false, true}},
+      TestCase{{1, 2}, {true, true}},
+      TestCase{{1, 1, 2}, {false, true, true}},
+      TestCase{{1, 2, 2}, {true, false, true}},
+  };
+
+  for (auto& test_case : test_cases) {
+    EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices));
+  }
+}
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index 00884314a9..be7d182a1f 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -31,67 +33,84 @@ namespace {
 
 class FilterDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using FilterIteratorPredicate =
+      std::function<Status(IteratorContext*, std::vector<Tensor>, bool*)>;
+
   explicit FilterDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    FunctionLibraryRuntime::Handle pred_handle;
-    OP_REQUIRES_OK(ctx,
-                   ctx->function_library()->Instantiate(
-                       func_.name(), AttrSlice(&func_.attr()), &pred_handle));
-    auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() {
-      OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle));
-    });
-
-    const FunctionBody* pred_body =
-        ctx->function_library()->GetFunctionBody(pred_handle);
-    OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1,
-                errors::InvalidArgument(
-                    "predicate function must have a single return value."));
-    Node* ret_node = pred_body->ret_nodes[0];
-    Node* ret_input_node;
-    OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node));
-
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    if (ret_input_node->def().op() == "_Arg") {
-      int32 index = -1;
-      OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index));
-      *output = new FilterTensorDataset(ctx, input, func_,
-                                        std::move(captured_func), index);
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+    OP_REQUIRES(ctx, indices.size() <= 1,
+                errors::InvalidArgument(
+                    "predicate function has more than one return value."));
+
+    FilterIteratorPredicate filter_pred;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      filter_pred = [raw_captured_func](IteratorContext* ctx,
+                                        const std::vector<Tensor>& args,
+                                        bool* out_matched) {
+        std::vector<Tensor> result;
+        TF_RETURN_IF_ERROR(
+            raw_captured_func->RunWithBorrowedArgs(ctx, args, &result));
+
+        if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
+            result[0].NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = result[0].scalar<bool>()();
+        return Status::OK();
+      };
     } else {
-      *output = new FilterFunctionDataset(ctx, input, func_,
-                                          std::move(captured_func));
+      filter_pred = [indices](IteratorContext* ctx,
+                              const std::vector<Tensor>& args,
+                              bool* out_matched) {
+        const Tensor& predicate = args[indices[0]];
+        if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = predicate.scalar<bool>()();
+        return Status::OK();
+      };
     }
+
+    *output = new Dataset(ctx, input, func_, std::move(captured_func),
+                          std::move(filter_pred));
   }
 
  private:
-  const int graph_def_version_;
-
-  class FilterDatasetBase : public DatasetBase {
+  class Dataset : public DatasetBase {
    public:
-    FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
-                      const NameAttrList& func,
-                      std::unique_ptr<CapturedFunction> captured_func)
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func,
+            std::unique_ptr<CapturedFunction> captured_func,
+            FilterIteratorPredicate filter_pred)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          filter_pred_(std::move(filter_pred)) {
       input_->Ref();
     }
 
-    ~FilterDatasetBase() override { input_->Unref(); }
+    ~Dataset() override { input_->Unref(); }
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Filter")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Filter")},
+          filter_pred_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -133,17 +152,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
-    virtual Status EvaluatePredicate(IteratorContext* ctx,
-                                     const std::vector<Tensor>& element,
-                                     bool* out_matched) const = 0;
-
    private:
-    class Iterator : public DatasetIterator<FilterDatasetBase> {
+    class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<FilterDatasetBase>(params),
+      explicit Iterator(const Params& params,
+                        FilterIteratorPredicate filter_pred)
+          : DatasetIterator<Dataset>(params),
             filtered_elements_(0),
-            dropped_elements_(0) {
+            dropped_elements_(0),
+            filter_pred_(std::move(filter_pred)) {
         std::vector<string> components =
             str_util::Split(params.prefix, "::", str_util::SkipEmpty());
         prefix_end_ = components.back();
@@ -180,8 +197,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(
-              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -251,64 +267,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       int64 filtered_elements_ GUARDED_BY(mu_);
       int64 dropped_elements_ GUARDED_BY(mu_);
+      const FilterIteratorPredicate filter_pred_;
       string prefix_end_;
     };
 
     const DatasetBase* const input_;
     const NameAttrList func_;
-
-   protected:
     const std::unique_ptr<CapturedFunction> captured_func_;
-  };
-
-  class FilterFunctionDataset : public FilterDatasetBase {
-   public:
-    using FilterDatasetBase::FilterDatasetBase;
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-      // stack-rip the iterators and use async kernels.
-      std::vector<Tensor> result;
-      TF_RETURN_IF_ERROR(
-          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
-
-      if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
-          result[0].NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = result[0].scalar<bool>()();
-      return Status::OK();
-    }
-  };
-
-  class FilterTensorDataset : public FilterDatasetBase {
-   public:
-    FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input,
-                        const NameAttrList& func,
-                        std::unique_ptr<CapturedFunction> captured_func,
-                        int32 index)
-        : FilterDatasetBase(ctx, input, func, std::move(captured_func)),
-          index_(index) {}
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      const Tensor& predicate = element[index_];
-      if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = predicate.scalar<bool>()();
-      return Status::OK();
-    }
-
-   private:
-    const int32 index_;
+    const FilterIteratorPredicate filter_pred_;
   };
 
  private:
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index bf08970560..f9aaa3080e 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -29,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -41,6 +43,10 @@ namespace {
 // transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapAndBatchIteratorFunction =
+      std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
+                         std::shared_ptr<std::vector<Tensor>>, StatusCallback)>;
+
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
       : UnaryDatasetOpKernel(ctx),
         op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) {
@@ -91,31 +97,66 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
-                          drop_remainder, output_types_, output_shapes_, func_,
-                          std::move(captured_func), &ctx->eigen_cpu_device());
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapAndBatchIteratorFunction map_func;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      map_func = [raw_captured_func](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(),
+                                    std::move(done), prefix);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [indices, can_move](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (can_move[i]) {
+            out_tensors->push_back(std::move(args[indices[i]]));
+          } else {
+            out_tensors->push_back(args[indices[i]]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
+    *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_,
+                          std::move(captured_func), &ctx->eigen_cpu_device(),
+                          std::move(map_func));
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func, int64 batch_size,
             int64 num_parallel_calls, bool drop_remainder,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
-            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
-            const Eigen::ThreadPoolDevice* device)
+            const Eigen::ThreadPoolDevice* device,
+            MapAndBatchIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
+          func_(func),
           batch_size_(batch_size),
           num_parallel_calls_(num_parallel_calls),
           drop_remainder_(drop_remainder),
           output_types_(output_types),
           output_shapes_(output_shapes),
-          map_fn_(func),
           captured_func_(std::move(captured_func)),
-          device_(device) {
+          device_(device),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -123,8 +164,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")},
+          map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -143,7 +185,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* batch_size_node;
@@ -165,7 +207,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         other_arguments_types.emplace_back(t.dtype());
       }
       AttrValue f;
-      b->BuildAttrValue(map_fn_, &f);
+      b->BuildAttrValue(func_, &f);
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
@@ -185,12 +227,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
+      explicit Iterator(const Params& params,
+                        MapAndBatchIteratorFunction map_func)
           : DatasetIterator<Dataset>(params),
             mu_(std::make_shared<mutex>()),
             cond_var_(std::make_shared<condition_variable>()),
             num_parallel_calls_(std::make_shared<model::SharedState>(
-                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
+                params.dataset->num_parallel_calls_, mu_, cond_var_)),
+            map_func_(std::move(map_func)) {}
 
       ~Iterator() override {
         mutex_lock l(*mu_);
@@ -297,44 +341,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         int64 num_calls;  // access guarded by owner's mutex
       };
 
-      void Callback(const std::shared_ptr<IteratorContext>& ctx,
-                    const std::shared_ptr<BatchResult>& result,
-                    const std::shared_ptr<std::vector<Tensor>>& return_values,
-                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
-        result->UpdateStatus(status);
-        if (status.ok()) {
-          EnsureOutputAllocated(ctx, result, return_values);
-          for (size_t i = 0; i < return_values->size(); ++i) {
-            const Tensor& tensor = return_values->at(i);
-            Tensor* batch = &(result->output)[i];
-            if (tensor.NumElements() !=
-                (batch->NumElements() / batch->dim_size(0))) {
-              TensorShape batch_shape = batch->shape();
-              batch_shape.RemoveDim(0);
-              result->UpdateStatus(errors::InvalidArgument(
-                  "Cannot add tensor to the batch: number of elements does not "
-                  "match. Shapes are: [tensor]: ",
-                  tensor.shape().DebugString(),
-                  ", [batch]: ", batch_shape.DebugString()));
-              break;
-            }
-            // TODO(mrry): Add a version of DoParallelConcat that allows us to
-            // move `tensor` where possible, to speed up string tensor batching.
-            Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                *dataset()->device_, tensor, offset, batch);
-            if (!copy_status.ok()) {
-              result->UpdateStatus(copy_status);
-              break;
-            }
-          }
-          {
-            mutex_lock l(result->mu);
-            result->num_elements++;
-          }
-        }
-        CallCompleted(result);
-      }
-
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(*mu_) {
         mutex_lock l(*mu_);
@@ -363,21 +369,48 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           return;
         }
 
-        // Call `captured_func_(input_element)`, using `Callback` to store the
-        // result in `result`.
-        (*ctx->runner())(std::bind(
-            [this, result, offset](std::shared_ptr<IteratorContext> ctx,
-                                   std::vector<Tensor> input_element) {
-              std::shared_ptr<std::vector<Tensor>> return_values(
-                  new std::vector<Tensor>());
-              dataset()->captured_func_->RunAsync(
-                  ctx.get(), std::move(input_element), return_values.get(),
-                  [this, ctx, result, return_values, offset](Status status) {
-                    Callback(ctx, result, return_values, offset, status);
-                  },
-                  prefix());
-            },
-            ctx, std::move(input_element)));
+        std::shared_ptr<std::vector<Tensor>> return_values =
+            std::make_shared<std::vector<Tensor>>();
+        auto done = [this, ctx, result, return_values, offset](Status status) {
+          result->UpdateStatus(status);
+          if (status.ok()) {
+            EnsureOutputAllocated(ctx, result, return_values);
+            for (size_t i = 0; i < return_values->size(); ++i) {
+              const Tensor& tensor = return_values->at(i);
+              Tensor* batch = &(result->output)[i];
+              if (tensor.NumElements() !=
+                  (batch->NumElements() / batch->dim_size(0))) {
+                TensorShape batch_shape = batch->shape();
+                batch_shape.RemoveDim(0);
+                result->UpdateStatus(errors::InvalidArgument(
+                    "Cannot add tensor to the batch: number of elements does "
+                    "not match. Shapes are: [tensor]: ",
+                    tensor.shape().DebugString(),
+                    ", [batch]: ", batch_shape.DebugString()));
+                break;
+              }
+              // TODO(mrry): Add a version of DoParallelConcat that allows us to
+              // move `tensor` where possible, to speed up string tensor
+              // batching.
+              Status copy_status = ::tensorflow::functor::DoParallelConcat(
+                  *dataset()->device_, tensor, offset, batch);
+              if (!copy_status.ok()) {
+                result->UpdateStatus(copy_status);
+                break;
+              }
+            }
+            {
+              mutex_lock l(result->mu);
+              result->num_elements++;
+            }
+          }
+          CallCompleted(result);
+        };
+
+        // Apply the map function on `input_element`, storing the result in
+        // `return_values`, and invoking `done` when finished.
+        map_func_(ctx.get(), prefix(), std::move(input_element),
+                  std::move(return_values), std::move(done));
       }
 
       Status CopyPartialBatch(Tensor* output, const Tensor& value,
@@ -404,7 +437,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
-          std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+          auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
           runner_thread_.reset(ctx->env()->StartThread(
               {}, "runner_thread",
               std::bind(&Iterator::RunnerThread, this, ctx_copy)));
@@ -509,8 +542,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
             while (!busy()) {
               if (call_counter_ % dataset()->batch_size_ == 0) {
-                batch_results_.emplace_back(
-                    new BatchResult(dataset()->batch_size_));
+                batch_results_.push_back(
+                    std::make_shared<BatchResult>(dataset()->batch_size_));
               }
               int64 offset = call_counter_++ % dataset()->batch_size_;
               new_calls.emplace_back(batch_results_.back(), offset);
@@ -527,7 +560,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
                              size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
-        batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
+        batch_results_.push_back(
+            std::make_shared<BatchResult>(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -653,6 +687,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
       const std::shared_ptr<model::SharedState> num_parallel_calls_;
+      const MapAndBatchIteratorFunction map_func_;
+
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
@@ -671,9 +707,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     const bool drop_remainder_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
-    const NameAttrList map_fn_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const Eigen::ThreadPoolDevice* device_;  // not owned
+    const MapAndBatchIteratorFunction map_func_;
   };
 
   const int op_version_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index f112e1dc43..0abb2eb4f3 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -17,7 +17,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -28,6 +30,9 @@ namespace {
 
 class MapDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapIteratorFunction = std::function<Status(
+      IteratorContext*, std::vector<Tensor>, std::vector<Tensor>*)>;
+
   explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -43,8 +48,36 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapIteratorFunction map_func;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      map_func = [raw_captured_func](IteratorContext* ctx,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors) {
+        return raw_captured_func->Run(ctx, std::move(args), out_tensors);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [indices, can_move](IteratorContext* ctx,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors) {
+        std::map<int, int> counts;
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (can_move[i]) {
+            out_tensors->push_back(std::move(args[indices[i]]));
+          } else {
+            out_tensors->push_back(args[indices[i]]);
+          }
+        }
+        return Status::OK();
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          output_types_, output_shapes_);
+                          output_types_, output_shapes_, std::move(map_func));
   }
 
  private:
@@ -54,13 +87,15 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
             const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes)
+            const std::vector<PartialTensorShape>& output_shapes,
+            MapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
-          output_shapes_(output_shapes) {
+          output_shapes_(output_shapes),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -68,8 +103,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Map")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -116,8 +151,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params) {}
+      explicit Iterator(const Params& params, MapIteratorFunction map_func)
+          : DatasetIterator<Dataset>(params), map_func_(std::move(map_func)) {}
 
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
@@ -139,10 +174,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
           return Status::OK();
         }
 
-        // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-        // stack-rip the iterators and use async kernels.
-        Status s =
-            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
+        Status s = map_func_(ctx, args, out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -167,6 +199,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
+      const MapIteratorFunction map_func_;
     };
 
     const DatasetBase* const input_;
@@ -174,6 +207,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
+    const MapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 6abe6c8338..a34bb172d4 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/parallel_map_iterator.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -56,9 +57,49 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    ParallelMapIteratorFunction map_func;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors,
+                                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors,
+                                    std::move(done), prefix);
+      };
+      if (!use_inter_op_parallelism_) {
+        map_func = [map_func](IteratorContext* ctx, const string& prefix,
+                              std::vector<Tensor> args,
+                              std::vector<Tensor>* out_tensors,
+                              StatusCallback done) {
+          (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args),
+                                     out_tensors, std::move(done)));
+        };
+      }
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [indices, can_move](IteratorContext* ctx, const string& prefix,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors,
+                                     StatusCallback done) {
+        std::map<int, int> counts;
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (can_move[i]) {
+            out_tensors->push_back(std::move(args[indices[i]]));
+          } else {
+            out_tensors->push_back(args[indices[i]]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
                           output_shapes_, use_inter_op_parallelism_,
-                          std::move(captured_func));
+                          std::move(captured_func), std::move(map_func));
   }
 
  private:
@@ -69,7 +110,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
             bool use_inter_op_parallelism,
-            std::unique_ptr<CapturedFunction> captured_func)
+            std::unique_ptr<CapturedFunction> captured_func,
+            ParallelMapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
@@ -77,7 +119,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           use_inter_op_parallelism_(use_inter_op_parallelism),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -89,26 +132,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
         return captured_func_->Instantiate(ctx);
       };
 
-      const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
-      ParallelMapIteratorFunction map_func =
-          [this, new_prefix](IteratorContext* ctx,
-                             std::vector<Tensor> input_element,
-                             std::vector<Tensor>* result, StatusCallback done) {
-            captured_func_->RunAsync(ctx, std::move(input_element), result,
-                                     std::move(done), new_prefix);
-          };
-      if (!use_inter_op_parallelism_) {
-        map_func = [map_func](
-                       IteratorContext* ctx, std::vector<Tensor> input_element,
-                       std::vector<Tensor>* result, StatusCallback done) {
-          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
-                                     result, std::move(done)));
-        };
-      }
-
-      return NewParallelMapIterator({this, new_prefix}, input_,
-                                    std::move(init_func), std::move(map_func),
-                                    num_parallel_calls_);
+      return NewParallelMapIterator(
+          {this, strings::StrCat(prefix, "::ParallelMap")}, input_,
+          std::move(init_func), map_func_, num_parallel_calls_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -176,6 +202,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
     const bool use_inter_op_parallelism_;
     const std::unique_ptr<CapturedFunction> captured_func_;
+    const ParallelMapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 13bd4b6036..ebf41925c9 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -179,7 +180,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
-      std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+      auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
       runner_thread_.reset(ctx->env()->StartThread(
           {}, "runner_thread",
           std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
@@ -208,15 +209,15 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return;
     }
 
-    // Call `func_(input_element)`, store the result in `result->return_values`,
-    // and notify `result->notification` to unblock a consumer.
     auto done = [this, result](Status status) {
       result->status.Update(status);
       CallCompleted(result);
     };
 
-    map_func_(ctx.get(), std::move(input_element), &result->return_values,
-              std::move(done));
+    // Apply the map function on `input_element`, storing the result in
+    // `result->return_values`, and invoking `done` when finished.
+    map_func_(ctx.get(), prefix(), std::move(input_element),
+              &result->return_values, std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -349,9 +350,9 @@ std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
     ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return std::unique_ptr<IteratorBase>(
-      new ParallelMapIterator(params, input_dataset, std::move(init_func),
-                              std::move(map_func), num_parallel_calls));
+  return MakeUnique<ParallelMapIterator>(
+      params, input_dataset, std::move(init_func), std::move(map_func),
+      num_parallel_calls);
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index dc26c5cf25..813f13c9e4 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -30,7 +30,7 @@ namespace data {
 // 3. A `std::vector<Tensor>*` to which the function will write the result.
 // 4. A `StatusCallback` that should be invoked when the function is complete.
 using ParallelMapIteratorFunction =
-    std::function<void(IteratorContext*, std::vector<Tensor>,
+    std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
                        std::vector<Tensor>*, StatusCallback)>;
 
 // Returns a new iterator that applies `map_func` to the elements of
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 1d1a717062..7de5ea8860 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto map_fn = [this](IteratorContext* ctx,
+      auto map_fn = [this](IteratorContext* ctx, const string& prefix,
                            std::vector<Tensor> input_element,
                            std::vector<Tensor>* result, StatusCallback done) {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index afd0fc3abf..0703955fd4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -332,6 +332,26 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       for _ in range(10):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
+  @parameterized.named_parameters(
+      ("Identity", None, lambda x: x, None),
+      ("Replicate", None, lambda x: (x, x), None),
+      ("Swap", (None, None), lambda x, y: (y, x), None),
+      ("Project", (None, None), lambda x, y: x, None),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().apply(
+        batching.map_and_batch(map_fn, batch_size=10))
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(
+            *sess.run(self.structuredElement(structure, shape=[10])))
+      else:
+        expected = map_fn(
+            sess.run(self.structuredElement(structure, shape=[10])))
+      self.assertAllEqual(expected, sess.run(get_next))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
index 6b7afafa5d..a0c6b37a6d 100644
--- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
@@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testReturnComponent(self):
+  def testShortCircuit(self):
     iterator = (
         dataset_ops.Dataset.zip(
             (dataset_ops.Dataset.range(10),
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 0c372ebb10..6efbe31ca1 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -783,19 +783,57 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
 
+  @parameterized.named_parameters(
+      ("SequentialIdentity", None, lambda x: x, None),
+      ("SequentialReplicate", None, lambda x: (x, x), None),
+      ("SequentialSwap", (None, None), lambda x, y: (y, x), None),
+      ("SequentialProject", (None, None), lambda x, y: x, None),
+      ("ParallelIdentity", None, lambda x: x, 10),
+      ("ParallelReplicate", None, lambda x: (x, x), 10),
+      ("ParallelSwap", (None, None), lambda x, y: (y, x), 10),
+      ("ParallelProject", (None, None), lambda x, y: x, 10),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().map(
+        map_fn, num_parallel_calls=num_parallel_calls)
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(*sess.run(self.structuredElement(structure)))
+      else:
+        expected = map_fn(sess.run(self.structuredElement(structure)))
+      self.assertEqual(expected, sess.run(get_next))
+
 
 class MapDatasetBenchmark(test.Benchmark):
 
   def benchmarkChainOfMaps(self):
     chain_lengths = [0, 1, 2, 5, 10, 20, 50]
     for chain_length in chain_lengths:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda x: x
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
           for _ in range(chain_length):
             dataset = dataset_ops.MapDataset(
                 dataset,
-                lambda x: x,
+                map_fn,
                 use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -813,25 +851,39 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset chain length%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", chain_length, median_wall_time))
+                  (print_label, chain_length, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
                 name="benchmark_map_dataset_chain_latency_%d%s" %
-                (chain_length, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                (chain_length, benchmark_label))
 
   def benchmarkMapFanOut(self):
     fan_outs = [1, 2, 5, 10, 20, 50, 100]
     for fan_out in fan_outs:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda *xs: xs
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(
               tuple(0 for _ in range(fan_out))).repeat(None)
           dataset = dataset_ops.MapDataset(
               dataset,
-              lambda *xs: xs,
+              map_fn,
               use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -849,14 +901,12 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset fan out%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", fan_out, median_wall_time))
+                  (print_label, fan_out, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
-                name="benchmark_map_dataset_fan_out_%d%s" %
-                (fan_out, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                name="benchmark_map_dataset_fan_out_%d%s" % (fan_out,
+                                                             benchmark_label))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index b730e10949..b73a94e683 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -19,10 +19,13 @@ from __future__ import print_function
 
 import re
 
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -107,3 +110,29 @@ class DatasetTestBase(test.TestCase):
       with self.assertRaisesRegexp(exception_class,
                                    re.escape(expected_message)):
         self.evaluate(next2())
+
+  def structuredDataset(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns a singleton dataset with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return dataset_ops.Dataset.from_tensors(
+          array_ops.zeros(shape, dtype=dtype))
+    else:
+      return dataset_ops.Dataset.zip(
+          tuple([
+              self.structuredDataset(substructure, shape, dtype)
+              for substructure in structure
+          ]))
+
+  def structuredElement(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns an element with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return array_ops.zeros(shape, dtype=dtype)
+    else:
+      return tuple([
+          self.structuredElement(substructure, shape, dtype)
+          for substructure in structure
+      ])
-- 
GitLab


From 6123677f264c615042a816e713f7f1204685e544 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Fri, 5 Oct 2018 14:18:41 -0700
Subject: [PATCH 1207/1357] Fix bug in nonpip builds in
 ci_parameterized_build.sh

The extra spaces were confusing bash's string-line-continuation from
the backslash `\` on the previous line.

PiperOrigin-RevId: 215964853
---
 tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index fdff867ff0..489722c0e9 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -423,7 +423,7 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
     NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
-      "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
+"${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
     NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
-- 
GitLab


From c221f04b7efff5929f3a6d090983b52f3aa16166 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Fri, 5 Oct 2018 14:44:47 -0700
Subject: [PATCH 1208/1357] Automated rollback of commit
 ae0bc6f006497cc04a2ee75166d4ec71c7154fd8

PiperOrigin-RevId: 215969360
---
 tensorflow/core/kernels/data/BUILD            |  14 --
 tensorflow/core/kernels/data/dataset_utils.cc |  47 -----
 tensorflow/core/kernels/data/dataset_utils.h  |  20 --
 .../core/kernels/data/dataset_utils_test.cc   |  46 -----
 .../core/kernels/data/filter_dataset_op.cc    | 162 +++++++++-------
 .../kernels/data/map_and_batch_dataset_op.cc  | 180 +++++++-----------
 .../core/kernels/data/map_dataset_op.cc       |  56 ++----
 .../kernels/data/parallel_map_dataset_op.cc   |  73 +++----
 .../kernels/data/parallel_map_iterator.cc     |  17 +-
 .../core/kernels/data/parallel_map_iterator.h |   2 +-
 .../kernels/data/parse_example_dataset_op.cc  |   2 +-
 .../kernel_tests/map_and_batch_test.py        |  20 --
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  80 ++------
 .../python/data/kernel_tests/test_base.py     |  29 ---
 15 files changed, 230 insertions(+), 520 deletions(-)
 delete mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 37c1c54786..451f8c1a6c 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -45,16 +45,6 @@ cc_library(
     ],
 )
 
-tf_cc_test(
-    name = "dataset_utils_test",
-    srcs = ["dataset_utils_test.cc"],
-    deps = [
-        ":dataset_utils",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-    ],
-)
-
 cc_library(
     name = "captured_function",
     srcs = ["captured_function.cc"],
@@ -215,7 +205,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -243,7 +232,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -257,7 +245,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -298,7 +285,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         ":parallel_map_iterator",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index a40f7f2146..e10833f525 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -15,57 +15,10 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/common_runtime/device.h"
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/lib/gtl/cleanup.h"
 
 namespace tensorflow {
 namespace data {
 
-Status ComputeShortCircuitIndices(OpKernelContext* ctx,
-                                  const NameAttrList& func,
-                                  std::vector<int>* indices) {
-  FunctionLibraryRuntime::Handle fn_handle;
-  TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate(
-      func.name(), AttrSlice(&func.attr()), &fn_handle));
-  auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() {
-    Status s = ctx->function_library()->ReleaseHandle(fn_handle);
-    if (!s.ok()) {
-      LOG(WARNING) << "Failed to release handle: " << s.error_message();
-    }
-  });
-
-  const FunctionBody* fn_body =
-      ctx->function_library()->GetFunctionBody(fn_handle);
-  indices->resize(fn_body->ret_nodes.size());
-  for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) {
-    Node* ret_node = fn_body->ret_nodes[i];
-    Node* ret_input_node;
-    TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node));
-    if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) {
-      TF_RETURN_IF_ERROR(
-          GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i])));
-    } else {
-      indices->clear();
-      break;
-    }
-  }
-  return Status::OK();
-}
-
-std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
-  std::map<int, int> last_use;
-  for (size_t i = 0; i < indices.size(); ++i) {
-    last_use[indices[i]] = i;
-  }
-  std::vector<bool> can_move;
-  can_move.resize(indices.size());
-  for (size_t i = 0; i < indices.size(); ++i) {
-    can_move[i] = last_use[indices[i]] == i;
-  }
-  return can_move;
-}
-
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index d777062293..6ec1350cd4 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -22,26 +22,6 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
-// This method is used to determine whether we can short-circuit the evaluation
-// of the user-defined function `func`. Short-circuting is possible if every
-// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) =
-// (y,x)`, or `f(x) = (x,x)`).
-//
-// If short-circuiting is possible, the method stores the mapping from output
-// indices to input indices in `indices`. Otherwise, `indices` will be empty.
-//
-// Returns non-ok status if analysis of the function fails.
-//
-// TODO(jsimsa): Extend this to support constants as well.
-Status ComputeShortCircuitIndices(OpKernelContext* ctx,
-                                  const NameAttrList& func,
-                                  std::vector<int>* indices);
-
-// Given a vector that maps output indices to input indices, return a vector
-// that identifies for which output indices can we move the input (assuming
-// output indices are processed left to right).
-std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
-
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
deleted file mode 100644
index 43295b8ebb..0000000000
--- a/tensorflow/core/kernels/data/dataset_utils_test.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/data/dataset_utils.h"
-
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace data {
-namespace {
-
-TEST(DatasetUtils, ComputeMoveVector) {
-  struct TestCase {
-    std::vector<int> indices;
-    std::vector<bool> expected;
-  };
-
-  TestCase test_cases[] = {
-      TestCase{{}, {}},
-      TestCase{{1}, {true}},
-      TestCase{{1, 1}, {false, true}},
-      TestCase{{1, 2}, {true, true}},
-      TestCase{{1, 1, 2}, {false, true, true}},
-      TestCase{{1, 2, 2}, {true, false, true}},
-  };
-
-  for (auto& test_case : test_cases) {
-    EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices));
-  }
-}
-
-}  // namespace
-}  // namespace data
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index be7d182a1f..00884314a9 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -18,11 +18,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -33,84 +31,67 @@ namespace {
 
 class FilterDatasetOp : public UnaryDatasetOpKernel {
  public:
-  using FilterIteratorPredicate =
-      std::function<Status(IteratorContext*, std::vector<Tensor>, bool*)>;
-
   explicit FilterDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx) {
+      : UnaryDatasetOpKernel(ctx),
+        graph_def_version_(ctx->graph_def_version()) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
+    FunctionLibraryRuntime::Handle pred_handle;
+    OP_REQUIRES_OK(ctx,
+                   ctx->function_library()->Instantiate(
+                       func_.name(), AttrSlice(&func_.attr()), &pred_handle));
+    auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() {
+      OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle));
+    });
+
+    const FunctionBody* pred_body =
+        ctx->function_library()->GetFunctionBody(pred_handle);
+    OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1,
+                errors::InvalidArgument(
+                    "predicate function must have a single return value."));
+    Node* ret_node = pred_body->ret_nodes[0];
+    Node* ret_input_node;
+    OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node));
+
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-    OP_REQUIRES(ctx, indices.size() <= 1,
-                errors::InvalidArgument(
-                    "predicate function has more than one return value."));
-
-    FilterIteratorPredicate filter_pred;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      filter_pred = [raw_captured_func](IteratorContext* ctx,
-                                        const std::vector<Tensor>& args,
-                                        bool* out_matched) {
-        std::vector<Tensor> result;
-        TF_RETURN_IF_ERROR(
-            raw_captured_func->RunWithBorrowedArgs(ctx, args, &result));
-
-        if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
-            result[0].NumElements() != 1) {
-          return errors::InvalidArgument(
-              "Filter predicate `f` must return a scalar bool.");
-        }
-        *out_matched = result[0].scalar<bool>()();
-        return Status::OK();
-      };
+    if (ret_input_node->def().op() == "_Arg") {
+      int32 index = -1;
+      OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index));
+      *output = new FilterTensorDataset(ctx, input, func_,
+                                        std::move(captured_func), index);
     } else {
-      filter_pred = [indices](IteratorContext* ctx,
-                              const std::vector<Tensor>& args,
-                              bool* out_matched) {
-        const Tensor& predicate = args[indices[0]];
-        if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
-          return errors::InvalidArgument(
-              "Filter predicate `f` must return a scalar bool.");
-        }
-        *out_matched = predicate.scalar<bool>()();
-        return Status::OK();
-      };
+      *output = new FilterFunctionDataset(ctx, input, func_,
+                                          std::move(captured_func));
     }
-
-    *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          std::move(filter_pred));
   }
 
  private:
-  class Dataset : public DatasetBase {
+  const int graph_def_version_;
+
+  class FilterDatasetBase : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input,
-            const NameAttrList& func,
-            std::unique_ptr<CapturedFunction> captured_func,
-            FilterIteratorPredicate filter_pred)
+    FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
+                      const NameAttrList& func,
+                      std::unique_ptr<CapturedFunction> captured_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
-          captured_func_(std::move(captured_func)),
-          filter_pred_(std::move(filter_pred)) {
+          captured_func_(std::move(captured_func)) {
       input_->Ref();
     }
 
-    ~Dataset() override { input_->Unref(); }
+    ~FilterDatasetBase() override { input_->Unref(); }
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return MakeUnique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::Filter")},
-          filter_pred_);
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::Filter")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -152,15 +133,17 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
+    virtual Status EvaluatePredicate(IteratorContext* ctx,
+                                     const std::vector<Tensor>& element,
+                                     bool* out_matched) const = 0;
+
    private:
-    class Iterator : public DatasetIterator<Dataset> {
+    class Iterator : public DatasetIterator<FilterDatasetBase> {
      public:
-      explicit Iterator(const Params& params,
-                        FilterIteratorPredicate filter_pred)
-          : DatasetIterator<Dataset>(params),
+      explicit Iterator(const Params& params)
+          : DatasetIterator<FilterDatasetBase>(params),
             filtered_elements_(0),
-            dropped_elements_(0),
-            filter_pred_(std::move(filter_pred)) {
+            dropped_elements_(0) {
         std::vector<string> components =
             str_util::Split(params.prefix, "::", str_util::SkipEmpty());
         prefix_end_ = components.back();
@@ -197,7 +180,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(
+              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -267,14 +251,64 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       int64 filtered_elements_ GUARDED_BY(mu_);
       int64 dropped_elements_ GUARDED_BY(mu_);
-      const FilterIteratorPredicate filter_pred_;
       string prefix_end_;
     };
 
     const DatasetBase* const input_;
     const NameAttrList func_;
+
+   protected:
     const std::unique_ptr<CapturedFunction> captured_func_;
-    const FilterIteratorPredicate filter_pred_;
+  };
+
+  class FilterFunctionDataset : public FilterDatasetBase {
+   public:
+    using FilterDatasetBase::FilterDatasetBase;
+
+   protected:
+    Status EvaluatePredicate(IteratorContext* ctx,
+                             const std::vector<Tensor>& element,
+                             bool* out_matched) const override {
+      // TODO(mrry): Avoid blocking a threadpool thread. We will need to
+      // stack-rip the iterators and use async kernels.
+      std::vector<Tensor> result;
+      TF_RETURN_IF_ERROR(
+          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
+
+      if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
+          result[0].NumElements() != 1) {
+        return errors::InvalidArgument(
+            "Filter predicate `f` must return a scalar bool.");
+      }
+      *out_matched = result[0].scalar<bool>()();
+      return Status::OK();
+    }
+  };
+
+  class FilterTensorDataset : public FilterDatasetBase {
+   public:
+    FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input,
+                        const NameAttrList& func,
+                        std::unique_ptr<CapturedFunction> captured_func,
+                        int32 index)
+        : FilterDatasetBase(ctx, input, func, std::move(captured_func)),
+          index_(index) {}
+
+   protected:
+    Status EvaluatePredicate(IteratorContext* ctx,
+                             const std::vector<Tensor>& element,
+                             bool* out_matched) const override {
+      const Tensor& predicate = element[index_];
+      if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
+        return errors::InvalidArgument(
+            "Filter predicate `f` must return a scalar bool.");
+      }
+      *out_matched = predicate.scalar<bool>()();
+      return Status::OK();
+    }
+
+   private:
+    const int32 index_;
   };
 
  private:
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index f9aaa3080e..bf08970560 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -30,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -43,10 +41,6 @@ namespace {
 // transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
-  using MapAndBatchIteratorFunction =
-      std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
-                         std::shared_ptr<std::vector<Tensor>>, StatusCallback)>;
-
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
       : UnaryDatasetOpKernel(ctx),
         op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) {
@@ -97,66 +91,31 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-
-    MapAndBatchIteratorFunction map_func;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      map_func = [raw_captured_func](
-                     IteratorContext* ctx, const string& prefix,
-                     std::vector<Tensor> args,
-                     std::shared_ptr<std::vector<Tensor>> out_tensors,
-                     StatusCallback done) {
-        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(),
-                                    std::move(done), prefix);
-      };
-    } else {
-      std::vector<bool> can_move = ComputeMoveVector(indices);
-      map_func = [indices, can_move](
-                     IteratorContext* ctx, const string& prefix,
-                     std::vector<Tensor> args,
-                     std::shared_ptr<std::vector<Tensor>> out_tensors,
-                     StatusCallback done) {
-        for (size_t i = 0; i < indices.size(); ++i) {
-          if (can_move[i]) {
-            out_tensors->push_back(std::move(args[indices[i]]));
-          } else {
-            out_tensors->push_back(args[indices[i]]);
-          }
-        }
-        done(Status::OK());
-      };
-    }
-
-    *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls,
-                          drop_remainder, output_types_, output_shapes_,
-                          std::move(captured_func), &ctx->eigen_cpu_device(),
-                          std::move(map_func));
+    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_, func_,
+                          std::move(captured_func), &ctx->eigen_cpu_device());
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input,
-            const NameAttrList& func, int64 batch_size,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
             int64 num_parallel_calls, bool drop_remainder,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
+            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
-            const Eigen::ThreadPoolDevice* device,
-            MapAndBatchIteratorFunction map_func)
+            const Eigen::ThreadPoolDevice* device)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
-          func_(func),
           batch_size_(batch_size),
           num_parallel_calls_(num_parallel_calls),
           drop_remainder_(drop_remainder),
           output_types_(output_types),
           output_shapes_(output_shapes),
+          map_fn_(func),
           captured_func_(std::move(captured_func)),
-          device_(device),
-          map_func_(std::move(map_func)) {
+          device_(device) {
       input_->Ref();
     }
 
@@ -164,9 +123,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return MakeUnique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")},
-          map_func_);
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -185,7 +143,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name()));
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* batch_size_node;
@@ -207,7 +165,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         other_arguments_types.emplace_back(t.dtype());
       }
       AttrValue f;
-      b->BuildAttrValue(func_, &f);
+      b->BuildAttrValue(map_fn_, &f);
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
@@ -227,14 +185,12 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params,
-                        MapAndBatchIteratorFunction map_func)
+      explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
             mu_(std::make_shared<mutex>()),
             cond_var_(std::make_shared<condition_variable>()),
             num_parallel_calls_(std::make_shared<model::SharedState>(
-                params.dataset->num_parallel_calls_, mu_, cond_var_)),
-            map_func_(std::move(map_func)) {}
+                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
 
       ~Iterator() override {
         mutex_lock l(*mu_);
@@ -341,6 +297,44 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         int64 num_calls;  // access guarded by owner's mutex
       };
 
+      void Callback(const std::shared_ptr<IteratorContext>& ctx,
+                    const std::shared_ptr<BatchResult>& result,
+                    const std::shared_ptr<std::vector<Tensor>>& return_values,
+                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
+        result->UpdateStatus(status);
+        if (status.ok()) {
+          EnsureOutputAllocated(ctx, result, return_values);
+          for (size_t i = 0; i < return_values->size(); ++i) {
+            const Tensor& tensor = return_values->at(i);
+            Tensor* batch = &(result->output)[i];
+            if (tensor.NumElements() !=
+                (batch->NumElements() / batch->dim_size(0))) {
+              TensorShape batch_shape = batch->shape();
+              batch_shape.RemoveDim(0);
+              result->UpdateStatus(errors::InvalidArgument(
+                  "Cannot add tensor to the batch: number of elements does not "
+                  "match. Shapes are: [tensor]: ",
+                  tensor.shape().DebugString(),
+                  ", [batch]: ", batch_shape.DebugString()));
+              break;
+            }
+            // TODO(mrry): Add a version of DoParallelConcat that allows us to
+            // move `tensor` where possible, to speed up string tensor batching.
+            Status copy_status = ::tensorflow::functor::DoParallelConcat(
+                *dataset()->device_, tensor, offset, batch);
+            if (!copy_status.ok()) {
+              result->UpdateStatus(copy_status);
+              break;
+            }
+          }
+          {
+            mutex_lock l(result->mu);
+            result->num_elements++;
+          }
+        }
+        CallCompleted(result);
+      }
+
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(*mu_) {
         mutex_lock l(*mu_);
@@ -369,48 +363,21 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           return;
         }
 
-        std::shared_ptr<std::vector<Tensor>> return_values =
-            std::make_shared<std::vector<Tensor>>();
-        auto done = [this, ctx, result, return_values, offset](Status status) {
-          result->UpdateStatus(status);
-          if (status.ok()) {
-            EnsureOutputAllocated(ctx, result, return_values);
-            for (size_t i = 0; i < return_values->size(); ++i) {
-              const Tensor& tensor = return_values->at(i);
-              Tensor* batch = &(result->output)[i];
-              if (tensor.NumElements() !=
-                  (batch->NumElements() / batch->dim_size(0))) {
-                TensorShape batch_shape = batch->shape();
-                batch_shape.RemoveDim(0);
-                result->UpdateStatus(errors::InvalidArgument(
-                    "Cannot add tensor to the batch: number of elements does "
-                    "not match. Shapes are: [tensor]: ",
-                    tensor.shape().DebugString(),
-                    ", [batch]: ", batch_shape.DebugString()));
-                break;
-              }
-              // TODO(mrry): Add a version of DoParallelConcat that allows us to
-              // move `tensor` where possible, to speed up string tensor
-              // batching.
-              Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                  *dataset()->device_, tensor, offset, batch);
-              if (!copy_status.ok()) {
-                result->UpdateStatus(copy_status);
-                break;
-              }
-            }
-            {
-              mutex_lock l(result->mu);
-              result->num_elements++;
-            }
-          }
-          CallCompleted(result);
-        };
-
-        // Apply the map function on `input_element`, storing the result in
-        // `return_values`, and invoking `done` when finished.
-        map_func_(ctx.get(), prefix(), std::move(input_element),
-                  std::move(return_values), std::move(done));
+        // Call `captured_func_(input_element)`, using `Callback` to store the
+        // result in `result`.
+        (*ctx->runner())(std::bind(
+            [this, result, offset](std::shared_ptr<IteratorContext> ctx,
+                                   std::vector<Tensor> input_element) {
+              std::shared_ptr<std::vector<Tensor>> return_values(
+                  new std::vector<Tensor>());
+              dataset()->captured_func_->RunAsync(
+                  ctx.get(), std::move(input_element), return_values.get(),
+                  [this, ctx, result, return_values, offset](Status status) {
+                    Callback(ctx, result, return_values, offset, status);
+                  },
+                  prefix());
+            },
+            ctx, std::move(input_element)));
       }
 
       Status CopyPartialBatch(Tensor* output, const Tensor& value,
@@ -437,7 +404,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
-          auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
+          std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
           runner_thread_.reset(ctx->env()->StartThread(
               {}, "runner_thread",
               std::bind(&Iterator::RunnerThread, this, ctx_copy)));
@@ -542,8 +509,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
             while (!busy()) {
               if (call_counter_ % dataset()->batch_size_ == 0) {
-                batch_results_.push_back(
-                    std::make_shared<BatchResult>(dataset()->batch_size_));
+                batch_results_.emplace_back(
+                    new BatchResult(dataset()->batch_size_));
               }
               int64 offset = call_counter_++ % dataset()->batch_size_;
               new_calls.emplace_back(batch_results_.back(), offset);
@@ -560,8 +527,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
                              size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
-        batch_results_.push_back(
-            std::make_shared<BatchResult>(dataset()->batch_size_));
+        batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -687,8 +653,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
       const std::shared_ptr<model::SharedState> num_parallel_calls_;
-      const MapAndBatchIteratorFunction map_func_;
-
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
@@ -707,9 +671,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     const bool drop_remainder_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
+    const NameAttrList map_fn_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const Eigen::ThreadPoolDevice* device_;  // not owned
-    const MapAndBatchIteratorFunction map_func_;
   };
 
   const int op_version_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index 0abb2eb4f3..f112e1dc43 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -17,9 +17,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -30,9 +28,6 @@ namespace {
 
 class MapDatasetOp : public UnaryDatasetOpKernel {
  public:
-  using MapIteratorFunction = std::function<Status(
-      IteratorContext*, std::vector<Tensor>, std::vector<Tensor>*)>;
-
   explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -48,36 +43,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-
-    MapIteratorFunction map_func;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      map_func = [raw_captured_func](IteratorContext* ctx,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors) {
-        return raw_captured_func->Run(ctx, std::move(args), out_tensors);
-      };
-    } else {
-      std::vector<bool> can_move = ComputeMoveVector(indices);
-      map_func = [indices, can_move](IteratorContext* ctx,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors) {
-        std::map<int, int> counts;
-        for (size_t i = 0; i < indices.size(); ++i) {
-          if (can_move[i]) {
-            out_tensors->push_back(std::move(args[indices[i]]));
-          } else {
-            out_tensors->push_back(args[indices[i]]);
-          }
-        }
-        return Status::OK();
-      };
-    }
-
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          output_types_, output_shapes_, std::move(map_func));
+                          output_types_, output_shapes_);
   }
 
  private:
@@ -87,15 +54,13 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
             const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes,
-            MapIteratorFunction map_func)
+            const std::vector<PartialTensorShape>& output_shapes)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
-          output_shapes_(output_shapes),
-          map_func_(std::move(map_func)) {
+          output_shapes_(output_shapes) {
       input_->Ref();
     }
 
@@ -103,8 +68,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return MakeUnique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_);
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::Map")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -151,8 +116,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params, MapIteratorFunction map_func)
-          : DatasetIterator<Dataset>(params), map_func_(std::move(map_func)) {}
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params) {}
 
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
@@ -174,7 +139,10 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
           return Status::OK();
         }
 
-        Status s = map_func_(ctx, args, out_tensors);
+        // TODO(mrry): Avoid blocking a threadpool thread. We will need to
+        // stack-rip the iterators and use async kernels.
+        Status s =
+            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -199,7 +167,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
-      const MapIteratorFunction map_func_;
     };
 
     const DatasetBase* const input_;
@@ -207,7 +174,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
-    const MapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index a34bb172d4..6abe6c8338 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/parallel_map_iterator.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -57,49 +56,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-
-    ParallelMapIteratorFunction map_func;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors,
-                                     StatusCallback done) {
-        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors,
-                                    std::move(done), prefix);
-      };
-      if (!use_inter_op_parallelism_) {
-        map_func = [map_func](IteratorContext* ctx, const string& prefix,
-                              std::vector<Tensor> args,
-                              std::vector<Tensor>* out_tensors,
-                              StatusCallback done) {
-          (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args),
-                                     out_tensors, std::move(done)));
-        };
-      }
-    } else {
-      std::vector<bool> can_move = ComputeMoveVector(indices);
-      map_func = [indices, can_move](IteratorContext* ctx, const string& prefix,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors,
-                                     StatusCallback done) {
-        std::map<int, int> counts;
-        for (size_t i = 0; i < indices.size(); ++i) {
-          if (can_move[i]) {
-            out_tensors->push_back(std::move(args[indices[i]]));
-          } else {
-            out_tensors->push_back(args[indices[i]]);
-          }
-        }
-        done(Status::OK());
-      };
-    }
-
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
                           output_shapes_, use_inter_op_parallelism_,
-                          std::move(captured_func), std::move(map_func));
+                          std::move(captured_func));
   }
 
  private:
@@ -110,8 +69,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
             bool use_inter_op_parallelism,
-            std::unique_ptr<CapturedFunction> captured_func,
-            ParallelMapIteratorFunction map_func)
+            std::unique_ptr<CapturedFunction> captured_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
@@ -119,8 +77,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           use_inter_op_parallelism_(use_inter_op_parallelism),
-          captured_func_(std::move(captured_func)),
-          map_func_(std::move(map_func)) {
+          captured_func_(std::move(captured_func)) {
       input_->Ref();
     }
 
@@ -132,9 +89,26 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
         return captured_func_->Instantiate(ctx);
       };
 
-      return NewParallelMapIterator(
-          {this, strings::StrCat(prefix, "::ParallelMap")}, input_,
-          std::move(init_func), map_func_, num_parallel_calls_);
+      const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
+      ParallelMapIteratorFunction map_func =
+          [this, new_prefix](IteratorContext* ctx,
+                             std::vector<Tensor> input_element,
+                             std::vector<Tensor>* result, StatusCallback done) {
+            captured_func_->RunAsync(ctx, std::move(input_element), result,
+                                     std::move(done), new_prefix);
+          };
+      if (!use_inter_op_parallelism_) {
+        map_func = [map_func](
+                       IteratorContext* ctx, std::vector<Tensor> input_element,
+                       std::vector<Tensor>* result, StatusCallback done) {
+          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
+                                     result, std::move(done)));
+        };
+      }
+
+      return NewParallelMapIterator({this, new_prefix}, input_,
+                                    std::move(init_func), std::move(map_func),
+                                    num_parallel_calls_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -202,7 +176,6 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
     const bool use_inter_op_parallelism_;
     const std::unique_ptr<CapturedFunction> captured_func_;
-    const ParallelMapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index ebf41925c9..13bd4b6036 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,7 +22,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -180,7 +179,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
-      auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
+      std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
       runner_thread_.reset(ctx->env()->StartThread(
           {}, "runner_thread",
           std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
@@ -209,15 +208,15 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return;
     }
 
+    // Call `func_(input_element)`, store the result in `result->return_values`,
+    // and notify `result->notification` to unblock a consumer.
     auto done = [this, result](Status status) {
       result->status.Update(status);
       CallCompleted(result);
     };
 
-    // Apply the map function on `input_element`, storing the result in
-    // `result->return_values`, and invoking `done` when finished.
-    map_func_(ctx.get(), prefix(), std::move(input_element),
-              &result->return_values, std::move(done));
+    map_func_(ctx.get(), std::move(input_element), &result->return_values,
+              std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -350,9 +349,9 @@ std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
     ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return MakeUnique<ParallelMapIterator>(
-      params, input_dataset, std::move(init_func), std::move(map_func),
-      num_parallel_calls);
+  return std::unique_ptr<IteratorBase>(
+      new ParallelMapIterator(params, input_dataset, std::move(init_func),
+                              std::move(map_func), num_parallel_calls));
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index 813f13c9e4..dc26c5cf25 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -30,7 +30,7 @@ namespace data {
 // 3. A `std::vector<Tensor>*` to which the function will write the result.
 // 4. A `StatusCallback` that should be invoked when the function is complete.
 using ParallelMapIteratorFunction =
-    std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
+    std::function<void(IteratorContext*, std::vector<Tensor>,
                        std::vector<Tensor>*, StatusCallback)>;
 
 // Returns a new iterator that applies `map_func` to the elements of
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 7de5ea8860..1d1a717062 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto map_fn = [this](IteratorContext* ctx, const string& prefix,
+      auto map_fn = [this](IteratorContext* ctx,
                            std::vector<Tensor> input_element,
                            std::vector<Tensor>* result, StatusCallback done) {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index 0703955fd4..afd0fc3abf 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -332,26 +332,6 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       for _ in range(10):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
-  @parameterized.named_parameters(
-      ("Identity", None, lambda x: x, None),
-      ("Replicate", None, lambda x: (x, x), None),
-      ("Swap", (None, None), lambda x, y: (y, x), None),
-      ("Project", (None, None), lambda x, y: x, None),
-  )
-  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
-    dataset = self.structuredDataset(structure).repeat().apply(
-        batching.map_and_batch(map_fn, batch_size=10))
-    get_next = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      if isinstance(structure, tuple):
-        expected = map_fn(
-            *sess.run(self.structuredElement(structure, shape=[10])))
-      else:
-        expected = map_fn(
-            sess.run(self.structuredElement(structure, shape=[10])))
-      self.assertAllEqual(expected, sess.run(get_next))
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
index a0c6b37a6d..6b7afafa5d 100644
--- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
@@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testShortCircuit(self):
+  def testReturnComponent(self):
     iterator = (
         dataset_ops.Dataset.zip(
             (dataset_ops.Dataset.range(10),
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 6efbe31ca1..0c372ebb10 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
         self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
         self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -783,57 +783,19 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
 
-  @parameterized.named_parameters(
-      ("SequentialIdentity", None, lambda x: x, None),
-      ("SequentialReplicate", None, lambda x: (x, x), None),
-      ("SequentialSwap", (None, None), lambda x, y: (y, x), None),
-      ("SequentialProject", (None, None), lambda x, y: x, None),
-      ("ParallelIdentity", None, lambda x: x, 10),
-      ("ParallelReplicate", None, lambda x: (x, x), 10),
-      ("ParallelSwap", (None, None), lambda x, y: (y, x), 10),
-      ("ParallelProject", (None, None), lambda x, y: x, 10),
-  )
-  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
-    dataset = self.structuredDataset(structure).repeat().map(
-        map_fn, num_parallel_calls=num_parallel_calls)
-    get_next = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      if isinstance(structure, tuple):
-        expected = map_fn(*sess.run(self.structuredElement(structure)))
-      else:
-        expected = map_fn(sess.run(self.structuredElement(structure)))
-      self.assertEqual(expected, sess.run(get_next))
-
 
 class MapDatasetBenchmark(test.Benchmark):
 
   def benchmarkChainOfMaps(self):
     chain_lengths = [0, 1, 2, 5, 10, 20, 50]
     for chain_length in chain_lengths:
-      for mode in ["general", "single-threaded", "short-circuit"]:
-        if mode == "general":
-          map_fn = lambda x: x + 1
-          use_inter_op_parallelism = True
-          print_label = ""
-          benchmark_label = ""
-        if mode == "single-threaded":
-          map_fn = lambda x: x + 1
-          use_inter_op_parallelism = False
-          print_label = " (single threaded mode)"
-          benchmark_label = "_single_threaded"
-        if mode == "short-circuit":
-          map_fn = lambda x: x
-          use_inter_op_parallelism = True  # should not have any significance
-          print_label = " (short circuit mode)"
-          benchmark_label = "_short_circuit"
-
+      for use_inter_op_parallelism in [False, True]:
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
           for _ in range(chain_length):
             dataset = dataset_ops.MapDataset(
                 dataset,
-                map_fn,
+                lambda x: x,
                 use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -851,39 +813,25 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset chain length%s: %d Median wall time: %f" %
-                  (print_label, chain_length, median_wall_time))
+                  (" (single threaded mode)" if not use_inter_op_parallelism
+                   else "", chain_length, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
                 name="benchmark_map_dataset_chain_latency_%d%s" %
-                (chain_length, benchmark_label))
+                (chain_length, "_single_threaded"
+                 if not use_inter_op_parallelism else ""))
 
   def benchmarkMapFanOut(self):
     fan_outs = [1, 2, 5, 10, 20, 50, 100]
     for fan_out in fan_outs:
-      for mode in ["general", "single-threaded", "short-circuit"]:
-        if mode == "general":
-          map_fn = lambda *xs: [x + 1 for x in xs]
-          use_inter_op_parallelism = True
-          print_label = ""
-          benchmark_label = ""
-        if mode == "single-threaded":
-          map_fn = lambda *xs: [x + 1 for x in xs]
-          use_inter_op_parallelism = False
-          print_label = " (single threaded mode)"
-          benchmark_label = "_single_threaded"
-        if mode == "short-circuit":
-          map_fn = lambda *xs: xs
-          use_inter_op_parallelism = True  # should not have any significance
-          print_label = " (short circuit mode)"
-          benchmark_label = "_short_circuit"
-
+      for use_inter_op_parallelism in [False, True]:
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(
               tuple(0 for _ in range(fan_out))).repeat(None)
           dataset = dataset_ops.MapDataset(
               dataset,
-              map_fn,
+              lambda *xs: xs,
               use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -901,12 +849,14 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset fan out%s: %d Median wall time: %f" %
-                  (print_label, fan_out, median_wall_time))
+                  (" (single threaded mode)" if not use_inter_op_parallelism
+                   else "", fan_out, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
-                name="benchmark_map_dataset_fan_out_%d%s" % (fan_out,
-                                                             benchmark_label))
+                name="benchmark_map_dataset_fan_out_%d%s" %
+                (fan_out, "_single_threaded"
+                 if not use_inter_op_parallelism else ""))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index b73a94e683..b730e10949 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -19,13 +19,10 @@ from __future__ import print_function
 
 import re
 
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -110,29 +107,3 @@ class DatasetTestBase(test.TestCase):
       with self.assertRaisesRegexp(exception_class,
                                    re.escape(expected_message)):
         self.evaluate(next2())
-
-  def structuredDataset(self, structure, shape=None, dtype=dtypes.int64):
-    """Returns a singleton dataset with the given structure."""
-    if shape is None:
-      shape = []
-    if structure is None:
-      return dataset_ops.Dataset.from_tensors(
-          array_ops.zeros(shape, dtype=dtype))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self.structuredDataset(substructure, shape, dtype)
-              for substructure in structure
-          ]))
-
-  def structuredElement(self, structure, shape=None, dtype=dtypes.int64):
-    """Returns an element with the given structure."""
-    if shape is None:
-      shape = []
-    if structure is None:
-      return array_ops.zeros(shape, dtype=dtype)
-    else:
-      return tuple([
-          self.structuredElement(substructure, shape, dtype)
-          for substructure in structure
-      ])
-- 
GitLab


From 07921022ddc68aacbf210acc62545a90e3091fb1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 14:57:15 -0700
Subject: [PATCH 1209/1357] Add deprecation call-out for tf_mobile

PiperOrigin-RevId: 215971335
---
 .../lite/g3doc/tfmobile/android_build.md       | 18 +++++++++++++++++-
 .../contrib/lite/g3doc/tfmobile/index.md       | 18 +++++++++++++++++-
 .../contrib/lite/g3doc/tfmobile/ios_build.md   | 18 +++++++++++++++++-
 .../lite/g3doc/tfmobile/linking_libs.md        | 18 +++++++++++++++++-
 .../contrib/lite/g3doc/tfmobile/optimizing.md  | 18 +++++++++++++++++-
 .../lite/g3doc/tfmobile/prepare_models.md      | 18 +++++++++++++++++-
 6 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
index b0f32a8d6c..2eb776d10c 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
@@ -1,6 +1,22 @@
-
 # Building TensorFlow on Android
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 To get you started working with TensorFlow on Android, we'll walk through two
 ways to build our TensorFlow mobile demos and deploying them on an Android
 device. The first is Android Studio, which lets you build and deploy in an
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/index.md b/tensorflow/contrib/lite/g3doc/tfmobile/index.md
index 49ad35d4e6..15f0fd3961 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/index.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/index.md
@@ -1,6 +1,22 @@
-
 # Overview
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 TensorFlow was designed to be a good deep learning solution for mobile
 platforms. Currently we have two solutions for deploying machine learning
 applications on mobile and embedded devices: TensorFlow for Mobile and
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md
index be8b4100c8..d922907cdc 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md
@@ -1,6 +1,22 @@
-
 # Building TensorFlow on iOS
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 ## Using CocoaPods
 
 The simplest way to get started with TensorFlow on iOS is using the CocoaPods
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md
index 4d4bb3bc08..fd0e322c93 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md
@@ -1,6 +1,22 @@
-
 # Integrating TensorFlow libraries
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 Once you have made some progress on a model that addresses the problem you’re
 trying to solve, it’s important to test it out inside your application
 immediately. There are often unexpected differences between your training data
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md
index 7436594fd8..59ff8e774c 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md
@@ -1,6 +1,22 @@
-
 # Optimizing for mobile
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 There are some special issues that you have to deal with when you’re trying to
 ship on mobile or embedded devices, and you’ll need to think about these as
 you’re developing your model.
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md
index d1c67d4c61..1d373251dd 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md
@@ -1,6 +1,22 @@
-
 # Preparing models for mobile deployment
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 The requirements for storing model information during training are very
 different from when you want to release it as part of a mobile app. This section
 covers the tools involved in converting from a training model to something
-- 
GitLab


From 1e104d80826fed95f9fad6f07f68e35cae3527b2 Mon Sep 17 00:00:00 2001
From: Geoffrey Irving <irving@naml.us>
Date: Wed, 19 Sep 2018 09:33:19 -0700
Subject: [PATCH 1210/1357] Expand stateless random generators to match their
 stateful cousins

stateless_random_uniform now take minval+maxval and handles ints,
and stateless_normal/stateless_truncated_normal take mean+stddev.
Additionally, all of the stateless functions now have proper doc
strings.

This is step one of moving stateless random numbers out of contrib.
---
 tensorflow/contrib/stateless/BUILD            |   5 +-
 tensorflow/contrib/stateless/__init__.py      |   9 +-
 .../kernel_tests/stateless_random_ops_test.py | 156 ++++++-------
 .../contrib/stateless/python/stateless_ops.py | 214 ++++++++++++++++++
 .../api_def_StatelessRandomUniformInt.pbtxt   |  46 ++++
 tensorflow/core/kernels/random_op.cc          |  34 +--
 .../core/kernels/stateless_random_ops.cc      | 155 ++++++++-----
 tensorflow/core/ops/stateless_random_ops.cc   |  53 +++--
 8 files changed, 491 insertions(+), 181 deletions(-)
 create mode 100644 tensorflow/contrib/stateless/python/stateless_ops.py
 create mode 100644 tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt

diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD
index a217397c1a..e9ddec8889 100644
--- a/tensorflow/contrib/stateless/BUILD
+++ b/tensorflow/contrib/stateless/BUILD
@@ -11,7 +11,10 @@ load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
 
 py_library(
     name = "stateless",
-    srcs = ["__init__.py"],
+    srcs = [
+        "__init__.py",
+        "python/stateless_ops.py",
+    ],
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:framework_ops",
diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py
index fe23fe0dd8..30d0a7ab6a 100644
--- a/tensorflow/contrib/stateless/__init__.py
+++ b/tensorflow/contrib/stateless/__init__.py
@@ -32,16 +32,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import ops
-
 # pylint: disable=wildcard-import
-from tensorflow.python.ops.gen_stateless_random_ops import *
+from tensorflow.contrib.stateless.python.stateless_ops import *
 
 from tensorflow.python.util.all_util import remove_undocumented
 
-ops.NotDifferentiable("StatelessMultinomial")
-ops.NotDifferentiable("StatelessRandomNormal")
-ops.NotDifferentiable("StatelessRandomUniform")
-ops.NotDifferentiable("StatelessTruncatedNormal")
-
 remove_undocumented(__name__)
diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
index d724a5c014..c0c1430d84 100644
--- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
+++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 import numpy as np
 from tensorflow.contrib import stateless
 from tensorflow.python.framework import constant_op
@@ -27,10 +29,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 
-CASES = [(stateless.stateless_random_uniform, random_ops.random_uniform),
-         (stateless.stateless_random_normal, random_ops.random_normal),
-         (stateless.stateless_truncated_normal, random_ops.truncated_normal)]
-
 
 def invert_philox(key, value):
   """Invert the Philox bijection."""
@@ -51,96 +49,102 @@ def invert_philox(key, value):
 
 class StatelessOpsTest(test.TestCase):
 
-  def testMatchStateful(self):
+  def _test_match(self, cases):
     # Stateless ops should be the same as stateful ops on the first call
     # after seed scrambling.
+    cases = tuple(cases)
     key = 0x3ec8f720, 0x02461e29
     for seed in (7, 17), (11, 5), (2, 3):
       preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64)
       preseed = preseed[::2] | preseed[1::2] << 32
       random_seed.set_random_seed(seed[0])
       with self.test_session(use_gpu=True):
-        for stateless_op, stateful_op in CASES:
-          for shape in (), (3,), (2, 5):
-            stateful = stateful_op(shape, seed=seed[1])
-            pure = stateless_op(shape, seed=preseed)
-            self.assertAllEqual(stateful.eval(), pure.eval())
+        for stateless_op, stateful_op in cases:
+          stateful = stateful_op(seed=seed[1])
+          pure = stateless_op(seed=preseed)
+          self.assertAllEqual(stateful.eval(), pure.eval())
 
-  def testDeterminism(self):
+  def _test_determinism(self, cases):
     # Stateless values should be equal iff the seeds are equal (roughly)
+    cases = tuple(cases)
     with self.test_session(use_gpu=True):
       for seed_type in [dtypes.int32, dtypes.int64]:
         seed_t = array_ops.placeholder(seed_type, shape=[2])
         seeds = [(x, y) for x in range(5) for y in range(5)] * 3
-        for stateless_op, _ in CASES:
-          for shape in (), (3,), (2, 5):
-            pure = stateless_op(shape, seed=seed_t)
-            values = [(seed, pure.eval(feed_dict={seed_t: seed}))
-                      for seed in seeds]
-            for s0, v0 in values:
-              for s1, v1 in values:
-                self.assertEqual(s0 == s1, np.all(v0 == v1))
-
-  def testShapeType(self):
-    with self.test_session(use_gpu=True):
-      for shape_dtype in [dtypes.int32, dtypes.int64]:
-        seed_t = array_ops.placeholder(dtypes.int64, shape=[2])
-        seeds = [(x, y) for x in range(5) for y in range(5)] * 3
-        for stateless_op, _ in CASES:
-          for shape in (), (3,), (2, 5):
-            pure = stateless_op(constant_op.constant(shape, dtype=shape_dtype),
-                                seed=seed_t)
-            values = [(seed, pure.eval(feed_dict={seed_t: seed}))
-                      for seed in seeds]
-            for s0, v0 in values:
-              for s1, v1 in values:
-                self.assertEqual(s0 == s1, np.all(v0 == v1))
-
-  def testMatchStatefulMultinomial(self):
-    # Stateless ops should be the same as stateful ops on the first call
-    # after seed scrambling.
-    key = 0x3ec8f720, 0x02461e29
-    num_samples = 4
-    for logits_dtype in np.float16, np.float32, np.float64:
-      for output_dtype in dtypes.int32, dtypes.int64:
-        for seed in (7, 17), (11, 5), (2, 3):
-          preseed = invert_philox(key,
-                                  (seed[0], 0, seed[1], 0)).astype(np.uint64)
-          preseed = preseed[::2] | preseed[1::2] << 32
-          random_seed.set_random_seed(seed[0])
-          with self.test_session(use_gpu=True):
-            for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2],
-                                                      [0.25, 0.75]]):
-              logits_t = constant_op.constant(logits, dtype=logits_dtype)
-              stateful = random_ops.multinomial(
-                  logits_t,
-                  num_samples,
-                  seed=seed[1],
-                  output_dtype=output_dtype)
-              pure = stateless.stateless_multinomial(
-                  logits_t,
-                  num_samples,
-                  seed=preseed,
-                  output_dtype=output_dtype)
-              self.assertAllEqual(stateful.eval(), pure.eval())
+        for stateless_op, _ in cases:
+          pure = stateless_op(seed=seed_t)
+          values = [(seed, pure.eval(feed_dict={seed_t: seed}))
+                    for seed in seeds]
+          for s0, v0 in values:
+            for s1, v1 in values:
+              self.assertEqual(s0 == s1, np.all(v0 == v1))
 
-  def testDeterminismMultinomial(self):
-    # Stateless values should be equal iff the seeds are equal (roughly)
+  def _float_cases(self, shape_dtypes=(None,)):
+    float_cases = (
+        # Uniform distribution, with and without range
+        (stateless.stateless_random_uniform, random_ops.random_uniform, {}),
+        (stateless.stateless_random_uniform, random_ops.random_uniform,
+         dict(minval=2.2, maxval=7.1)),
+        # Normal distribution, with and without mean+stddev
+        (stateless.stateless_random_normal, random_ops.random_normal, {}),
+        (stateless.stateless_random_normal, random_ops.random_normal,
+         dict(mean=2, stddev=3)),
+        # Truncated normal distribution, with and without mean+stddev
+        (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}),
+        (stateless.stateless_truncated_normal, random_ops.truncated_normal,
+         dict(mean=3, stddev=4)),
+    )
+    for dtype in dtypes.float16, dtypes.float32, dtypes.float64:
+      for shape_dtype in shape_dtypes:
+        for shape in (), (3,), (2, 5):
+          if shape_dtype is not None:
+            shape = constant_op.constant(shape, dtype=shape_dtype)
+          for stateless_op, stateful_op, kwds in float_cases:
+            kwds = dict(shape=shape, dtype=dtype, **kwds)
+            yield (functools.partial(stateless_op, **kwds),
+                   functools.partial(stateful_op, **kwds))
+
+  def _int_cases(self, shape_dtypes=(None,)):
+    for shape_dtype in shape_dtypes:
+      for shape in (), (3,), (2, 5):
+        if shape_dtype is not None:
+          shape = constant_op.constant(shape, dtype=shape_dtype)
+        for dtype in dtypes.int32, dtypes.int64:
+          kwds = dict(minval=2, maxval=11111, dtype=dtype, shape=shape)
+          yield (functools.partial(stateless.stateless_random_uniform, **kwds),
+                 functools.partial(random_ops.random_uniform, **kwds))
+
+  def _multinomial_cases(self):
     num_samples = 10
-    with self.test_session(use_gpu=True):
-      for seed_type in [dtypes.int32, dtypes.int64]:
-        seed_t = array_ops.placeholder(seed_type, shape=[2])
-        seeds = [(x, y) for x in range(5) for y in range(5)] * 3
+    for logits_dtype in np.float16, np.float32, np.float64:
+      for output_dtype in dtypes.int32, dtypes.int64:
         for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2],
                                                   [0.25, 0.75]]):
-          pure = stateless.stateless_multinomial(
-              logits, num_samples, seed=seed_t)
-          values = [
-              (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds
-          ]
-          for s0, v0 in values:
-            for s1, v1 in values:
-              self.assertEqual(s0 == s1, np.all(v0 == v1))
+          kwds = dict(logits=constant_op.constant(logits, dtype=logits_dtype),
+                      num_samples=num_samples,
+                      output_dtype=output_dtype)
+          yield (functools.partial(stateless.stateless_multinomial, **kwds),
+                 functools.partial(random_ops.multinomial, **kwds))
+
+  def testMatchFloat(self):
+    self._test_match(self._float_cases())
+
+  def testMatchInt(self):
+    self._test_match(self._int_cases())
+
+  def testMatchMultinomial(self):
+    self._test_match(self._multinomial_cases())
+
+  def testDeterminismFloat(self):
+    self._test_determinism(self._float_cases(
+        shape_dtypes=(dtypes.int32, dtypes.int64)))
+
+  def testDeterminismInt(self):
+    self._test_determinism(self._int_cases(
+        shape_dtypes=(dtypes.int32, dtypes.int64)))
+
+  def testDeterminismMultinomial(self):
+    self._test_determinism(self._multinomial_cases())
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/stateless/python/stateless_ops.py b/tensorflow/contrib/stateless/python/stateless_ops.py
new file mode 100644
index 0000000000..db9b7a87f2
--- /dev/null
+++ b/tensorflow/contrib/stateless/python/stateless_ops.py
@@ -0,0 +1,214 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Stateless random ops which take seed as a tensor input."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.ops import gen_stateless_random_ops
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import math_ops
+
+ops.NotDifferentiable("StatelessMultinomial")
+ops.NotDifferentiable("StatelessRandomNormal")
+ops.NotDifferentiable("StatelessRandomUniform")
+ops.NotDifferentiable("StatelessRandomUniformInt")
+ops.NotDifferentiable("StatelessTruncatedNormal")
+
+
+def stateless_random_uniform(shape,
+                             seed,
+                             minval=0,
+                             maxval=None,
+                             dtype=dtypes.float32,
+                             name=None):
+  """Outputs deterministic pseudorandom values from a uniform distribution.
+
+  This is a stateless version of `tf.random_uniform`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  The generated values follow a uniform distribution in the range
+  `[minval, maxval)`. The lower bound `minval` is included in the range, while
+  the upper bound `maxval` is excluded.
+
+  For floats, the default range is `[0, 1)`.  For ints, at least `maxval` must
+  be specified explicitly.
+
+  In the integer case, the random integers are slightly biased unless
+  `maxval - minval` is an exact power of two.  The bias is small for values of
+  `maxval - minval` significantly smaller than the range of the output (either
+  `2**32` or `2**64`).
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    minval: A 0-D Tensor or Python value of type `dtype`. The lower bound on the
+      range of random values to generate.  Defaults to 0.
+    maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on
+      the range of random values to generate.  Defaults to 1 if `dtype` is
+      floating point.
+    dtype: The type of the output: `float16`, `float32`, `float64`, `int32`,
+      or `int64`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random uniform values.
+
+  Raises:
+    ValueError: If `dtype` is integral and `maxval` is not specified.
+  """
+  dtype = dtypes.as_dtype(dtype)
+  if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32,
+                   dtypes.float64, dtypes.int32, dtypes.int64):
+    raise ValueError("Invalid dtype %r" % dtype)
+  if maxval is None:
+    if dtype.is_integer:
+      raise ValueError("Must specify maxval for integer dtype %r" % dtype)
+    maxval = 1
+  with ops.name_scope(name, "stateless_random_uniform",
+                      [shape, seed, minval, maxval]) as name:
+    shape = random_ops._ShapeTensor(shape)  # pylint: disable=protected-access
+    minval = ops.convert_to_tensor(minval, dtype=dtype, name="min")
+    maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max")
+    if dtype.is_integer:
+      return gen_stateless_random_ops.stateless_random_uniform_int(
+          shape, seed=seed, minval=minval, maxval=maxval, name=name)
+    else:
+      rnd = gen_stateless_random_ops.stateless_random_uniform(
+          shape, seed=seed, dtype=dtype)
+      return math_ops.add(rnd * (maxval - minval), minval, name=name)
+
+
+def stateless_random_normal(shape,
+                            seed,
+                            mean=0.0,
+                            stddev=1.0,
+                            dtype=dtypes.float32,
+                            name=None):
+  """Outputs deterministic pseudorandom values from a normal distribution.
+
+  This is a stateless version of `tf.random_normal`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    mean: A 0-D Tensor or Python value of type `dtype`. The mean of the normal
+      distribution.
+    stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
+      of the normal distribution.
+    dtype: The type of the output.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random normal values.
+  """
+  with ops.name_scope(name, "stateless_random_normal",
+                      [shape, seed, mean, stddev]) as name:
+    shape = random_ops._ShapeTensor(shape)  # pylint: disable=protected-access
+    mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean")
+    stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev")
+    rnd = gen_stateless_random_ops.stateless_random_normal(shape, seed, dtype)
+    return math_ops.add(rnd * stddev, mean, name=name)
+
+
+def stateless_truncated_normal(shape,
+                               seed,
+                               mean=0.0,
+                               stddev=1.0,
+                               dtype=dtypes.float32,
+                               name=None):
+  """Outputs deterministic pseudorandom values, truncated normally distributed.
+
+  This is a stateless version of `tf.truncated_normal`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  The generated values follow a normal distribution with specified mean and
+  standard deviation, except that values whose magnitude is more than 2 standard
+  deviations from the mean are dropped and re-picked.
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    mean: A 0-D Tensor or Python value of type `dtype`. The mean of the
+      truncated normal distribution.
+    stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
+      of the normal distribution, before truncation.
+    dtype: The type of the output.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random truncated normal values.
+  """
+  with ops.name_scope(name, "stateless_truncated_normal",
+                      [shape, seed, mean, stddev]) as name:
+    shape = random_ops._ShapeTensor(shape)  # pylint: disable=protected-access
+    mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean")
+    stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev")
+    rnd = gen_stateless_random_ops.stateless_truncated_normal(
+        shape, seed, dtype)
+    return math_ops.add(rnd * stddev, mean, name=name)
+
+
+def stateless_multinomial(logits,
+                          num_samples,
+                          seed,
+                          output_dtype=dtypes.int64,
+                          name=None):
+  """Draws deterministic pseudorandom samples from a multinomial distribution.
+
+  This is a stateless version of `tf.multinomial`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  Example:
+
+  ```python
+  # samples has shape [1, 5], where each value is either 0 or 1 with equal
+  # probability.
+  samples = tf.contrib.stateless.stateless_multinomial(
+      tf.log([[10., 10.]]), 5, seed=[7, 17])
+  ```
+
+  Args:
+    logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice
+      `[i, :]` represents the unnormalized log-probabilities for all classes.
+    num_samples: 0-D.  Number of independent samples to draw for each row slice.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    name: Optional name for the operation.
+    output_dtype: integer type to use for the output. Defaults to int64.
+
+  Returns:
+    The drawn samples of shape `[batch_size, num_samples]`.
+  """
+  with ops.name_scope(name, "stateless_multinomial", [logits, seed]):
+    logits = ops.convert_to_tensor(logits, name="logits")
+    return gen_stateless_random_ops.stateless_multinomial(
+        logits, num_samples, seed, output_dtype=output_dtype)
diff --git a/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt
new file mode 100644
index 0000000000..b6a6dbdf54
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt
@@ -0,0 +1,46 @@
+op {
+  graph_op_name: "StatelessRandomUniformInt"
+  visibility: HIDDEN
+  in_arg {
+    name: "shape"
+    description: <<END
+The shape of the output tensor.
+END
+  }
+  in_arg {
+    name: "seed"
+    description: <<END
+2 seeds (shape [2]).
+END
+  }
+  in_arg {
+    name: "minval"
+    description: <<END
+Minimum value (inclusive, scalar).
+END
+  }
+  in_arg {
+    name: "maxval"
+    description: <<END
+Maximum value (exclusive, scalar).
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Random values with specified shape.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The type of the output.
+END
+  }
+  summary: "Outputs deterministic pseudorandom random integers from a uniform distribution."
+  description: <<END
+The generated values follow a uniform distribution in the range `[minval, maxval)`.
+
+The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
+END
+}
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 04a53697c0..3810d817ca 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -489,13 +489,15 @@ class RandomGammaOp : public OpKernel {
       Name("RandomGamma").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"),        \
       RandomGammaOp<TYPE>)
 
-#define REGISTER_INT(IntType)                                   \
-  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")              \
-                              .Device(DEVICE_CPU)               \
-                              .HostMemory("shape")              \
-                              .HostMemory("minval")             \
-                              .HostMemory("maxval")             \
-                              .TypeConstraint<IntType>("Tout"), \
+#define REGISTER_INT(IntType)                                                 \
+  template struct functor::FillPhiloxRandom<                                  \
+      CPUDevice, random::UniformDistribution<random::PhiloxRandom, IntType>>; \
+  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")                            \
+                              .Device(DEVICE_CPU)                             \
+                              .HostMemory("shape")                            \
+                              .HostMemory("minval")                           \
+                              .HostMemory("maxval")                           \
+                              .TypeConstraint<IntType>("Tout"),               \
                           RandomUniformIntOp<CPUDevice, IntType>);
 
 TF_CALL_half(REGISTER);
@@ -538,14 +540,16 @@ TF_CALL_int64(REGISTER_INT);
           random::TruncatedNormalDistribution<                                 \
               random::SingleSampleAdapter<random::PhiloxRandom>, TYPE>>);
 
-#define REGISTER_INT(IntType)                                   \
-  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")              \
-                              .Device(DEVICE_GPU)               \
-                              .HostMemory("shape")              \
-                              .HostMemory("minval")             \
-                              .HostMemory("maxval")             \
-                              .TypeConstraint<int32>("T")       \
-                              .TypeConstraint<IntType>("Tout"), \
+#define REGISTER_INT(IntType)                                                 \
+  template struct functor::FillPhiloxRandom<                                  \
+      GPUDevice, random::UniformDistribution<random::PhiloxRandom, IntType>>; \
+  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")                            \
+                              .Device(DEVICE_GPU)                             \
+                              .HostMemory("shape")                            \
+                              .HostMemory("minval")                           \
+                              .HostMemory("maxval")                           \
+                              .TypeConstraint<int32>("T")                     \
+                              .TypeConstraint<IntType>("Tout"),               \
                           RandomUniformIntOp<GPUDevice, IntType>);
 
 TF_CALL_half(REGISTER);
diff --git a/tensorflow/core/kernels/stateless_random_ops.cc b/tensorflow/core/kernels/stateless_random_ops.cc
index eab176c7fb..925f5291a6 100644
--- a/tensorflow/core/kernels/stateless_random_ops.cc
+++ b/tensorflow/core/kernels/stateless_random_ops.cc
@@ -113,74 +113,109 @@ class StatelessRandomOp : public StatelessRandomOpBase {
   }
 };
 
-#define REGISTER(TYPE)                                                 \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomUniform")                                   \
-          .Device(DEVICE_CPU)                                          \
-          .HostMemory("shape")                                         \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<CPUDevice, random::UniformDistribution<        \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomNormal")                                    \
-          .Device(DEVICE_CPU)                                          \
-          .HostMemory("shape")                                         \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<CPUDevice, random::NormalDistribution<         \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessTruncatedNormal")                                 \
-          .Device(DEVICE_CPU)                                          \
-          .HostMemory("shape")                                         \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<                                               \
-          CPUDevice,                                                   \
-          random::TruncatedNormalDistribution<                         \
-              random::SingleSampleAdapter<random::PhiloxRandom>, TYPE> >);
+template <typename Device, typename IntType>
+class StatelessRandomUniformIntOp : public StatelessRandomOpBase {
+ public:
+  using StatelessRandomOpBase::StatelessRandomOpBase;
 
-TF_CALL_half(REGISTER);
-TF_CALL_float(REGISTER);
-TF_CALL_double(REGISTER);
+  void Fill(OpKernelContext* context, random::PhiloxRandom random,
+            Tensor* output) override {
+    const Tensor& minval = context->input(2);
+    const Tensor& maxval = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(minval.shape()),
+                errors::InvalidArgument("minval must be 0-D, got shape ",
+                                        minval.shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(maxval.shape()),
+                errors::InvalidArgument("maxval must be 0-D, got shape ",
+                                        maxval.shape().DebugString()));
+
+    // Verify that minval < maxval.  Note that we'll never reach this point for
+    // empty output.  Zero impossible things are fine.
+    const auto lo = minval.scalar<IntType>()();
+    const auto hi = maxval.scalar<IntType>()();
+    OP_REQUIRES(
+        context, lo < hi,
+        errors::InvalidArgument("Need minval < maxval, got ", lo, " >= ", hi));
+
+    // Build distribution
+    typedef random::UniformDistribution<random::PhiloxRandom, IntType>
+        Distribution;
+    Distribution dist(lo, hi);
+
+    auto flat = output->flat<IntType>();
+    // Reuse the compute kernels from the stateful random ops
+    functor::FillPhiloxRandom<Device, Distribution>()(
+        context, context->eigen_device<Device>(), random, flat.data(),
+        flat.size(), dist);
+  }
+};
 
-#undef REGISTER
+#define REGISTER(DEVICE, TYPE)                                              \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("StatelessRandomUniform")                                        \
+          .Device(DEVICE_##DEVICE)                                          \
+          .HostMemory("shape")                                              \
+          .HostMemory("seed")                                               \
+          .TypeConstraint<TYPE>("dtype"),                                   \
+      StatelessRandomOp<DEVICE##Device, random::UniformDistribution<        \
+                                            random::PhiloxRandom, TYPE> >); \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("StatelessRandomNormal")                                         \
+          .Device(DEVICE_##DEVICE)                                          \
+          .HostMemory("shape")                                              \
+          .HostMemory("seed")                                               \
+          .TypeConstraint<TYPE>("dtype"),                                   \
+      StatelessRandomOp<DEVICE##Device, random::NormalDistribution<         \
+                                            random::PhiloxRandom, TYPE> >); \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("StatelessTruncatedNormal")                                      \
+          .Device(DEVICE_##DEVICE)                                          \
+          .HostMemory("shape")                                              \
+          .HostMemory("seed")                                               \
+          .TypeConstraint<TYPE>("dtype"),                                   \
+      StatelessRandomOp<                                                    \
+          DEVICE##Device,                                                   \
+          random::TruncatedNormalDistribution<                              \
+              random::SingleSampleAdapter<random::PhiloxRandom>, TYPE> >);
+
+#define REGISTER_INT(DEVICE, TYPE)                            \
+  REGISTER_KERNEL_BUILDER(Name("StatelessRandomUniformInt")   \
+                              .Device(DEVICE_##DEVICE)        \
+                              .HostMemory("shape")            \
+                              .HostMemory("seed")             \
+                              .HostMemory("minval")           \
+                              .HostMemory("maxval")           \
+                              .TypeConstraint<TYPE>("dtype"), \
+                          StatelessRandomUniformIntOp<DEVICE##Device, TYPE>);
+
+#define REGISTER_CPU(TYPE) REGISTER(CPU, TYPE)
+#define REGISTER_GPU(TYPE) REGISTER(GPU, TYPE)
+#define REGISTER_INT_CPU(TYPE) REGISTER_INT(CPU, TYPE)
+#define REGISTER_INT_GPU(TYPE) REGISTER_INT(GPU, TYPE)
+
+TF_CALL_half(REGISTER_CPU);
+TF_CALL_bfloat16(REGISTER_CPU);
+TF_CALL_float(REGISTER_CPU);
+TF_CALL_double(REGISTER_CPU);
+TF_CALL_int32(REGISTER_INT_CPU);
+TF_CALL_int64(REGISTER_INT_CPU);
 
 #if GOOGLE_CUDA
 
-#define REGISTER(TYPE)                                                 \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomUniform")                                   \
-          .Device(DEVICE_GPU)                                          \
-          .HostMemory("shape")                                         \
-          .HostMemory("seed")                                          \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<GPUDevice, random::UniformDistribution<        \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomNormal")                                    \
-          .Device(DEVICE_GPU)                                          \
-          .HostMemory("shape")                                         \
-          .HostMemory("seed")                                          \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<GPUDevice, random::NormalDistribution<         \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessTruncatedNormal")                                 \
-          .Device(DEVICE_GPU)                                          \
-          .HostMemory("shape")                                         \
-          .HostMemory("seed")                                          \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<                                               \
-          GPUDevice,                                                   \
-          random::TruncatedNormalDistribution<                         \
-              random::SingleSampleAdapter<random::PhiloxRandom>, TYPE> >);
+TF_CALL_half(REGISTER_GPU);
+TF_CALL_float(REGISTER_GPU);
+TF_CALL_double(REGISTER_GPU);
+TF_CALL_int32(REGISTER_INT_GPU);
+TF_CALL_int64(REGISTER_INT_GPU);
 
-TF_CALL_half(REGISTER);
-TF_CALL_float(REGISTER);
-TF_CALL_double(REGISTER);
+#endif  // GOOGLE_CUDA
 
 #undef REGISTER
-
-#endif  // GOOGLE_CUDA
+#undef REGISTER_INT
+#undef REGISTER_CPU
+#undef REGISTER_GPU
+#undef REGISTER_INT_CPU
+#undef REGISTER_INT_GPU
 
 }  // namespace
 
diff --git a/tensorflow/core/ops/stateless_random_ops.cc b/tensorflow/core/ops/stateless_random_ops.cc
index 742709fb18..f919a21d60 100644
--- a/tensorflow/core/ops/stateless_random_ops.cc
+++ b/tensorflow/core/ops/stateless_random_ops.cc
@@ -19,42 +19,55 @@ limitations under the License.
 namespace tensorflow {
 
 using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
 using shape_inference::ShapeHandle;
 
-static Status StatelessShape(shape_inference::InferenceContext* context) {
+static Status StatelessShape(InferenceContext* c) {
   // Check seed shape
   ShapeHandle seed;
-  TF_RETURN_IF_ERROR(context->WithRank(context->input(1), 1, &seed));
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &seed));
   DimensionHandle unused;
-  TF_RETURN_IF_ERROR(context->WithValue(context->Dim(seed, 0), 2, &unused));
+  TF_RETURN_IF_ERROR(c->WithValue(c->Dim(seed, 0), 2, &unused));
 
   // Set output shape
   ShapeHandle out;
-  TF_RETURN_IF_ERROR(context->MakeShapeFromShapeTensor(0, &out));
-  context->set_output(0, out);
+  TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out));
+  c->set_output(0, out);
   return Status::OK();
 }
 
-#define REGISTER_STATELESS_OP(name)                  \
-  REGISTER_OP(name)                                  \
-      .Input("shape: T")                             \
-      .Input("seed: Tseed")                          \
-      .Output("output: dtype")                       \
-      .Attr("dtype: {half,float,double} = DT_FLOAT") \
-      .Attr("T: {int32, int64} = DT_INT32")          \
-      .Attr("Tseed: {int32, int64} = DT_INT64")      \
+#define REGISTER_STATELESS_OP(name)                           \
+  REGISTER_OP(name)                                           \
+      .Input("shape: T")                                      \
+      .Input("seed: Tseed")                                   \
+      .Output("output: dtype")                                \
+      .Attr("dtype: {half,bfloat16,float,double} = DT_FLOAT") \
+      .Attr("T: {int32, int64} = DT_INT32")                   \
+      .Attr("Tseed: {int32, int64} = DT_INT64")               \
       .SetShapeFn(StatelessShape)
 
-// This op is exposed through contrib/stateless only.  The interface may change.
 REGISTER_STATELESS_OP("StatelessRandomUniform");
-
-// This op is exposed through contrib/stateless only.  The interface may change.
 REGISTER_STATELESS_OP("StatelessRandomNormal");
-
-// This op is exposed through contrib/stateless only.  The interface may change.
 REGISTER_STATELESS_OP("StatelessTruncatedNormal");
 
-// This op is exposed through contrib/stateless only.  The interface may change.
+#undef REGISTER_STATELESS_OP
+
+REGISTER_OP("StatelessRandomUniformInt")
+    .Input("shape: T")
+    .Input("seed: Tseed")
+    .Input("minval: dtype")
+    .Input("maxval: dtype")
+    .Output("output: dtype")
+    .Attr("dtype: {int32, int64}")
+    .Attr("T: {int32, int64}")
+    .Attr("Tseed: {int32, int64} = DT_INT64")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      return StatelessShape(c);
+    });
+
 REGISTER_OP("StatelessMultinomial")
     .Input("logits: T")
     .Input("num_samples: int32")
@@ -80,6 +93,4 @@ REGISTER_OP("StatelessMultinomial")
       return Status::OK();
     });
 
-#undef REGISTER_STATELESS_OP
-
 }  // namespace tensorflow
-- 
GitLab


From c966b5eed60a570f2121cb84ddb4ece84c413719 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 15:08:18 -0700
Subject: [PATCH 1211/1357] Add DistributionStrategy support to moving average
 APIs.

Fixes #21405.

PiperOrigin-RevId: 215973401
---
 tensorflow/contrib/distribute/python/BUILD    |  18 +++
 .../distribute/python/moving_averages_test.py | 141 ++++++++++++++++++
 tensorflow/python/training/moving_averages.py |  49 +++---
 3 files changed, 189 insertions(+), 19 deletions(-)
 create mode 100644 tensorflow/contrib/distribute/python/moving_averages_test.py

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 8267612236..76d5b59ce1 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -411,6 +411,24 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "moving_averages_test",
+    srcs = ["moving_averages_test.py"],
+    additional_deps = [
+        ":combinations",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+    ],
+    tags = [
+        "no_pip",
+    ],
+)
+
 cuda_py_test(
     name = "optimizer_v2_test",
     srcs = ["optimizer_v2_test.py"],
diff --git a/tensorflow/contrib/distribute/python/moving_averages_test.py b/tensorflow/contrib/distribute/python/moving_averages_test.py
new file mode 100644
index 0000000000..119352ad91
--- /dev/null
+++ b/tensorflow/contrib/distribute/python/moving_averages_test.py
@@ -0,0 +1,141 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for training.moving_averages when using a DistributionStrategy."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.contrib.distribute.python import combinations
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.training import moving_averages
+
+
+all_combinations = combinations.combine(
+    distribution=[combinations.default_strategy,
+                  combinations.one_device_strategy,
+                  combinations.mirrored_strategy_with_gpu_and_cpu],
+    mode=["graph"])
+
+
+class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(all_combinations)
+  def testTowerModeWithoutZeroDebias(self, distribution):
+    tower_id = [0]
+
+    def tower_fn():
+      var = variables.Variable([10.0, 11.0])
+      val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]])
+      tower_id[0] += 1
+      decay = 0.25
+      assign = moving_averages.assign_moving_average(
+          var, val, decay, zero_debias=False)
+      return var, assign
+
+    with distribution.scope(), self.cached_session() as sess:
+      var, assign = distribution.call_for_each_tower(tower_fn)
+      variables.global_variables_initializer().run()
+      self.assertAllClose([10.0, 11.0], var.eval())
+      sess.run(distribution.unwrap(assign))
+      # Mean of val across calls to tower_fn().
+      average_val = [1.0 + 0.5 * (tower_id[0] - 1),
+                     2.0 - 0.5 * (tower_id[0] - 1)]
+      val_weight = 1.0 - 0.25
+      self.assertAllClose(
+          [10.0 * 0.25 + average_val[0] * val_weight,
+           11.0 * 0.25 + average_val[1] * val_weight],
+          var.eval())
+
+  @combinations.generate(all_combinations)
+  def testTowerMode(self, distribution):
+    tower_id = [0]
+
+    def tower_fn():
+      var = variables.Variable([0.0, 0.0])
+      val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]])
+      tower_id[0] += 1
+      decay = 0.25
+      assign = moving_averages.assign_moving_average(var, val, decay)
+      return var, assign.op
+
+    with distribution.scope(), self.cached_session() as sess:
+      var, assign_op = distribution.call_for_each_tower(tower_fn)
+      variables.global_variables_initializer().run()
+      self.assertAllClose([0.0, 0.0], var.eval())
+      sess.run(distribution.unwrap(assign_op))
+      # Mean of val across calls to tower_fn().
+      average_val = [1.0 + 0.5 * (tower_id[0] - 1),
+                     2.0 - 0.5 * (tower_id[0] - 1)]
+      self.assertAllClose(average_val, var.eval())
+
+  @combinations.generate(all_combinations)
+  def testCrossTowerWithoutZeroDebias(self, distribution):
+    with distribution.scope(), self.cached_session() as sess:
+      var = variables.Variable([10.0, 11.0])
+      val = constant_op.constant([1.0, 2.0])
+      decay = 0.25
+      # NOTE(josh11b): We currently generate an error if val is a PerDevice value.
+      assign = moving_averages.assign_moving_average(
+          var, val, decay, zero_debias=False)
+
+      variables.global_variables_initializer().run()
+      self.assertAllClose([10.0, 11.0], var.eval())
+      sess.run(assign)
+      average_val = [1.0, 2.0]
+      val_weight = 1.0 - 0.25
+      self.assertAllClose(
+          [10.0 * 0.25 + average_val[0] * val_weight,
+           11.0 * 0.25 + average_val[1] * val_weight],
+          var.eval())
+      # Also try assign.op.
+      sess.run(assign.op)
+      orig_weight = 0.25 * 0.25
+      val_weight = 1.0 - orig_weight
+      self.assertAllClose(
+          [10.0 * orig_weight + average_val[0] * val_weight,
+           11.0 * orig_weight + average_val[1] * val_weight],
+          var.eval())
+
+  @combinations.generate(all_combinations)
+  def testCrossTower(self, distribution):
+    with distribution.scope(), self.cached_session() as sess:
+      var = variables.Variable([0.0, 0.0])
+      val = array_ops.placeholder(dtypes.float32)
+      decay = 0.25
+      # NOTE(josh11b): We currently generate an error if val is a PerDevice value.
+      assign = moving_averages.assign_moving_average(var, val, decay)
+
+      variables.global_variables_initializer().run()
+      self.assertAllClose([0.0, 0.0], var.eval())
+      sess.run(assign, feed_dict={val: [1.0, 2.0]})
+      self.assertAllClose([1.0, 2.0], var.eval())
+
+      # Also try assign.op.
+      sess.run(assign.op, feed_dict={val: [10.0, 0.0]})
+      self.assertAllClose(
+          [(1.0 * 0.25 + 10.0) / (1.0 * 0.25 + 1.0),
+           (2.0 * 0.25 + 0.0) / (1.0 * 0.25 + 1.0)],
+          var.eval())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py
index 041266da3e..89bfcaf4ad 100644
--- a/tensorflow/python/training/moving_averages.py
+++ b/tensorflow/python/training/moving_averages.py
@@ -25,6 +25,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import slot_creator
 from tensorflow.python.util.tf_export import tf_export
 
@@ -36,9 +37,8 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
   The moving average of 'variable' updated with 'value' is:
     variable * decay + value * (1 - decay)
 
-  The returned Operation sets 'variable' to the newly computed moving average.
-
-  The new value of 'variable' can be set with the 'AssignSub' op as:
+  The returned Operation sets 'variable' to the newly computed moving average,
+  by performing this subtraction:
      variable -= (1 - decay) * (variable - value)
 
   Since variables that are initialized to a `0` value will be `0` biased,
@@ -50,7 +50,7 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
 
   The names of the debias shadow variables, by default, include both the scope
   they were created in and the scope of the variables they debias. They are also
-  given a uniqifying-suffix.
+  given a uniquifying-suffix.
 
   E.g.:
 
@@ -58,8 +58,8 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
     with tf.variable_scope('scope1'):
       with tf.variable_scope('scope2'):
         var = tf.get_variable('foo')
-        tf.assign_moving_average(var, 0.0, 1.0)
-        tf.assign_moving_average(var, 0.0, 0.9)
+        update_1 = tf.assign_moving_average(var, 0.0, 1.0)
+        update_2 = tf.assign_moving_average(var, 0.0, 0.9)
 
     # var.name: 'scope1/scope2/foo'
     # shadow var names: 'scope1/scope2/scope1/scope2/foo/biased'
@@ -76,20 +76,33 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
     name: Optional name of the returned operation.
 
   Returns:
-    A reference to the input 'variable' tensor with the newly computed
-    moving average.
+    A tensor which if evaluated will compute and return the new moving average.
   """
+  def update_fn(v, value, decay=decay):
+    decay = ops.convert_to_tensor(1.0 - decay, name="decay")
+    if decay.dtype != v.dtype.base_dtype:
+      decay = math_ops.cast(decay, v.dtype.base_dtype)
+    if zero_debias:
+      update_delta = _zero_debias(v, value, decay)
+    else:
+      update_delta = (v - value) * decay
+    return state_ops.assign_sub(v, update_delta, name=scope)
+
   with ops.name_scope(name, "AssignMovingAvg",
                       [variable, value, decay]) as scope:
-    with ops.colocate_with(variable):
-      decay = ops.convert_to_tensor(1.0 - decay, name="decay")
-      if decay.dtype != variable.dtype.base_dtype:
-        decay = math_ops.cast(decay, variable.dtype.base_dtype)
-      if zero_debias:
-        update_delta = _zero_debias(variable, value, decay)
-      else:
-        update_delta = (variable - value) * decay
-      return state_ops.assign_sub(variable, update_delta, name=scope)
+    tower_context = distribution_strategy_context.get_tower_context()
+    if tower_context:
+      # In a tower context, we update variable using the mean of value across
+      # towers.
+      def merge_fn(strategy, v, value):
+        value = strategy.reduce(
+            variable_scope.VariableAggregation.MEAN, value, v)
+        return strategy.update(v, update_fn, value)
+
+      return tower_context.merge_call(merge_fn, variable, value)
+    else:
+      strategy = distribution_strategy_context.get_cross_tower_context()
+      return strategy.update(variable, update_fn, value)
 
 
 def weighted_moving_average(value,
@@ -379,8 +392,6 @@ class ExponentialMovingAverage(object):
 
     Raises:
       TypeError: If the arguments are not an allowed type.
-      ValueError: If the moving average of one of the variables is already
-        being computed.
     """
     # TODO(touts): op_scope
     if var_list is None:
-- 
GitLab


From 5ac6e1e4b8318bad2f2bc7e5a08a58a7ed31e4c6 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <penporn@google.com>
Date: Fri, 5 Oct 2018 15:43:32 -0700
Subject: [PATCH 1212/1357] Removes the INTEL_MKL_ML_ONLY option from the
 CMakeLists build file since the main logic for INTEL_MKL_ML_ONLY is getting
 removed in PR#22783. #22783

PiperOrigin-RevId: 215978712
---
 tensorflow/contrib/cmake/CMakeLists.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index f675c135f4..60f53b8b75 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -352,9 +352,7 @@ if (tensorflow_ENABLE_MKL_SUPPORT)
     list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES})
     list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn_copy_shared_to_destination)
     include_directories(${mkldnn_INCLUDE_DIRS})
-  else (tensorflow_ENABLE_MKLDNN_SUPPORT)
-    add_definitions(-DINTEL_MKL_ML_ONLY)
-  endif()
+  endif(tensorflow_ENABLE_MKLDNN_SUPPORT)
 endif (tensorflow_ENABLE_MKL_SUPPORT)
 
 if (tensorflow_ENABLE_GPU)
-- 
GitLab


From 4aad5382f0e7148d8489d24d8355b828b3f7811b Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 5 Oct 2018 15:43:58 -0700
Subject: [PATCH 1213/1357] Internal change

PiperOrigin-RevId: 215978771
---
 tensorflow/contrib/lite/java/BUILD            | 95 ++++++++++++++-----
 tensorflow/contrib/lite/java/aar_with_jni.bzl |  5 +-
 .../org/tensorflow/lite/TensorFlowLite.java   | 20 +++-
 .../tensorflow/lite/InterpreterFlexTest.java  | 46 +++++++++
 .../org/tensorflow/lite/InterpreterTest.java  | 14 +++
 5 files changed, 153 insertions(+), 27 deletions(-)
 create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java

diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD
index 098ba7e773..e68cd26f81 100644
--- a/tensorflow/contrib/lite/java/BUILD
+++ b/tensorflow/contrib/lite/java/BUILD
@@ -11,6 +11,10 @@ load("//tensorflow/java:build_defs.bzl", "JAVACOPTS")
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_jni_binary")
 load("//tensorflow/contrib/lite/java:aar_with_jni.bzl", "aar_with_jni")
 
+JAVA_SRCS = glob([
+    "src/main/java/org/tensorflow/lite/*.java",
+])
+
 # Building tensorflow-lite.aar including 4 variants of .so
 # To build an aar for release, run below command:
 # bazel build --cxxopt='--std=c++11' -c opt --fat_apk_cpu=x86,x86_64,arm64-v8a,armeabi-v7a \
@@ -20,28 +24,38 @@ aar_with_jni(
     android_library = ":tensorflowlite",
 )
 
+# EXPERIMENTAL: AAR target that supports TensorFlow op execution with TFLite.
+aar_with_jni(
+    name = "tensorflow-lite-flex",
+    android_library = ":tensorflowlite_flex",
+)
+
 android_library(
     name = "tensorflowlite",
-    srcs = glob(
-        [
-            "src/main/java/org/tensorflow/lite/*.java",
-        ],
-    ),
+    srcs = JAVA_SRCS,
+    manifest = "AndroidManifest.xml",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":tensorflowlite_native",
+        "@org_checkerframework_qual",
+    ],
+)
+
+# EXPERIMENTAL: Android target that supports TensorFlow op execution with TFLite.
+android_library(
+    name = "tensorflowlite_flex",
+    srcs = JAVA_SRCS,
     manifest = "AndroidManifest.xml",
     visibility = ["//visibility:public"],
     deps = [
-        ":tflite_runtime",
+        ":tensorflowlite_native_flex",
         "@org_checkerframework_qual",
     ],
 )
 
 android_library(
     name = "tensorflowlite_java",
-    srcs = glob(
-        [
-            "src/main/java/org/tensorflow/lite/*.java",
-        ],
-    ),
+    srcs = JAVA_SRCS,
     visibility = ["//visibility:public"],
     deps = [
         "@org_checkerframework_qual",
@@ -50,16 +64,23 @@ android_library(
 
 java_library(
     name = "tensorflowlitelib",
-    srcs = glob(
-        [
-            "src/main/java/org/tensorflow/lite/*.java",
-        ],
-    ),
+    srcs = JAVA_SRCS,
     javacopts = JAVACOPTS,
     visibility = ["//visibility:public"],
     deps = [
         ":libtensorflowlite_jni.so",
-        "//tensorflow/contrib/lite/java/src/main/native",
+        "@org_checkerframework_qual",
+    ],
+)
+
+# EXPERIMENTAL: Java target that supports TensorFlow op execution with TFLite.
+java_library(
+    name = "tensorflowlitelib_flex",
+    srcs = JAVA_SRCS,
+    javacopts = JAVACOPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":libtensorflowlite_flex_jni.so",
         "@org_checkerframework_qual",
     ],
 )
@@ -72,7 +93,6 @@ java_test(
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.TensorFlowLiteTest",
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
@@ -87,7 +107,6 @@ java_test(
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.DataTypeTest",
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
@@ -110,7 +129,6 @@ java_test(
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest",
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
@@ -125,19 +143,37 @@ java_test(
     data = [
         "src/testdata/add.bin",
         "src/testdata/mobilenet.tflite.bin",
+        "//tensorflow/contrib/lite:testdata/multi_add_flex.bin",
     ],
     javacopts = JAVACOPTS,
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.InterpreterTest",
     visibility = ["//visibility:private"],
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
     ],
 )
 
+java_test(
+    name = "InterpreterFlexTest",
+    size = "small",
+    srcs = ["src/test/java/org/tensorflow/lite/InterpreterFlexTest.java"],
+    data = [
+        "//tensorflow/contrib/lite:testdata/multi_add_flex.bin",
+    ],
+    javacopts = JAVACOPTS,
+    tags = ["no_oss"],
+    test_class = "org.tensorflow.lite.InterpreterFlexTest",
+    visibility = ["//visibility:private"],
+    deps = [
+        ":tensorflowlitelib_flex",
+        "@com_google_truth",
+        "@junit",
+    ],
+)
+
 java_test(
     name = "TensorTest",
     size = "small",
@@ -164,14 +200,29 @@ filegroup(
 )
 
 cc_library(
-    name = "tflite_runtime",
+    name = "tensorflowlite_native",
     srcs = ["libtensorflowlite_jni.so"],
     visibility = ["//visibility:public"],
 )
 
+cc_library(
+    name = "tensorflowlite_native_flex",
+    srcs = ["libtensorflowlite_flex_jni.so"],
+    visibility = ["//visibility:public"],
+)
+
 tflite_jni_binary(
     name = "libtensorflowlite_jni.so",
     deps = [
         "//tensorflow/contrib/lite/java/src/main/native",
     ],
 )
+
+# EXPERIMENTAL: Native target that supports TensorFlow op execution with TFLite.
+tflite_jni_binary(
+    name = "libtensorflowlite_flex_jni.so",
+    deps = [
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
+        "//tensorflow/contrib/lite/java/src/main/native",
+    ],
+)
diff --git a/tensorflow/contrib/lite/java/aar_with_jni.bzl b/tensorflow/contrib/lite/java/aar_with_jni.bzl
index 9d2aead266..360d622b1b 100644
--- a/tensorflow/contrib/lite/java/aar_with_jni.bzl
+++ b/tensorflow/contrib/lite/java/aar_with_jni.bzl
@@ -30,7 +30,10 @@ EOF
         # In some platforms we don't have an Android SDK/NDK and this target
         # can't be built. We need to prevent the build system from trying to
         # use the target in that case.
-        tags = ["manual"],
+        tags = [
+            "manual",
+            "no_cuda_on_cpu_tap",
+        ],
     )
 
     native.genrule(
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
index 711638a9f9..d5447b3bf8 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
@@ -18,7 +18,8 @@ package org.tensorflow.lite;
 /** Static utility methods loading the TensorFlowLite runtime. */
 public final class TensorFlowLite {
 
-  private static final String LIBNAME = "tensorflowlite_jni";
+  private static final String PRIMARY_LIBNAME = "tensorflowlite_jni";
+  private static final String FALLBACK_LIBNAME = "tensorflowlite_flex_jni";
 
   private TensorFlowLite() {}
 
@@ -29,13 +30,24 @@ public final class TensorFlowLite {
    * Load the TensorFlowLite runtime C library.
    */
   static boolean init() {
+    Throwable primaryLibException;
     try {
-      System.loadLibrary(LIBNAME);
+      System.loadLibrary(PRIMARY_LIBNAME);
       return true;
     } catch (UnsatisfiedLinkError e) {
-      System.err.println("TensorFlowLite: failed to load native library: " + e.getMessage());
-      return false;
+      primaryLibException = e;
     }
+
+    try {
+      System.loadLibrary(FALLBACK_LIBNAME);
+      return true;
+    } catch (UnsatisfiedLinkError e) {
+      // If the fallback fails, log the error for the primary load instead.
+      System.err.println(
+          "TensorFlowLite: failed to load native library: " + primaryLibException.getMessage());
+    }
+
+    return false;
   }
 
   static {
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java
new file mode 100644
index 0000000000..2791c3864b
--- /dev/null
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java
@@ -0,0 +1,46 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.File;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Unit tests for {@link org.tensorflow.lite.Interpreter} that validate execution with models that
+ * have TensorFlow ops.
+ */
+@RunWith(JUnit4.class)
+public final class InterpreterFlexTest {
+
+  private static final File FLEX_MODEL_FILE =
+      new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+
+  /** Smoke test validating that flex model loading works when the flex delegate is linked. */
+  @Test
+  public void testFlexModel() throws Exception {
+    try (Interpreter interpreter = new Interpreter(FLEX_MODEL_FILE)) {
+      assertThat(interpreter.getInputTensorCount()).isEqualTo(4);
+      assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+      assertThat(interpreter.getOutputTensorCount()).isEqualTo(4);
+      assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+      interpreter.run(new float[1], new float[1]);
+    }
+  }
+}
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index a98fca0132..f8b73c7cf3 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -43,6 +43,9 @@ public final class InterpreterTest {
   private static final File MOBILENET_MODEL_FILE =
       new File("tensorflow/contrib/lite/java/src/testdata/mobilenet.tflite.bin");
 
+  private static final File FLEX_MODEL_FILE =
+      new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+
   @Test
   public void testInterpreter() throws Exception {
     Interpreter interpreter = new Interpreter(MODEL_FILE);
@@ -345,4 +348,15 @@ public final class InterpreterTest {
     interpreter.close();
     interpreter.close();
   }
+
+  /** Smoke test validating that flex model loading fails when the flex delegate is not linked. */
+  @Test
+  public void testFlexModel() throws Exception {
+    try {
+      new Interpreter(FLEX_MODEL_FILE);
+      fail();
+    } catch (IllegalStateException e) {
+      // Expected failure.
+    }
+  }
 }
-- 
GitLab


From 89c887558d8b0067213c39a79d5d048d3422b6dd Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 5 Oct 2018 16:02:49 -0700
Subject: [PATCH 1214/1357] [TF:XLA] Bump open source abseil revision to
 e821380d69a549dc64900693942789d21aa4df5e

PiperOrigin-RevId: 215981413
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b9ced1bd6c..6f5aa85b01 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -112,11 +112,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "507903ef9353cb25cccd0a6840048fdd348fd20e98314d694f04a990c0f277e3",
-        strip_prefix = "abseil-cpp-f21d187b80e3b7f08fb279775ea9c8b48c636030",
+        sha256 = "f186bf5d9fce3037c602a21f86facbdd317adecef36e1726ec7bc7b496943a82",
+        strip_prefix = "abseil-cpp-e821380d69a549dc64900693942789d21aa4df5e",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
         ],
     )
 
-- 
GitLab


From 1daaf0fabee1c59af00e14f358d08ac9f5390b9f Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 5 Oct 2018 16:32:30 -0700
Subject: [PATCH 1215/1357] Orders non-resource-affecting stateful ops in
 defuns.

PiperOrigin-RevId: 215985679
---
 tensorflow/python/eager/function.py                |  7 +++++++
 tensorflow/python/kernel_tests/logging_ops_test.py | 13 +++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 2750461fb2..f06148b5d2 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1906,8 +1906,10 @@ class AutomaticControlDependencies(object):
               last_op_using_resource_tensor[inp] = op
         ops_which_must_run = set([op])
         continue
+      found_resource = False
       for inp in op.inputs:
         if inp.dtype == dtypes_module.resource:
+          found_resource = True
           # Deal with switches, finally.
           if inp.op.type == "Switch":
             self._process_switch(inp.op, ops_which_must_run,
@@ -1922,6 +1924,11 @@ class AutomaticControlDependencies(object):
           if inp in merge_for_resource:
             merge_for_resource[inp]._add_control_input(op)  # pylint: disable=protected-access
           last_op_using_resource_tensor[inp] = op
+      if (op.op_def.is_stateful and not found_resource
+          and op._control_flow_context is None):  # pylint: disable=protected-access
+        if None in last_op_using_resource_tensor:
+          op._add_control_input(last_op_using_resource_tensor[None])  # pylint: disable=protected-access
+        last_op_using_resource_tensor[None] = op
       control_inputs = [c for c in control_inputs
                         if c._control_flow_context is op._control_flow_context]  # pylint: disable=protected-access
       op._add_control_inputs(control_inputs)  # pylint: disable=protected-access
diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index 4beddd00bb..2f19ecc0e6 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -306,6 +306,19 @@ class PrintV2Test(test.TestCase):
           logging_ops.print_v2(tensor)
         self.assertTrue((expected + "\n") in printed.contents())
 
+  def testPrintsOrderedInDefun(self):
+    with context.eager_mode():
+
+      @function.defun
+      def prints():
+        logging_ops.print_v2("A")
+        logging_ops.print_v2("B")
+        logging_ops.print_v2("C")
+
+      with self.captureWritesToStream(sys.stderr) as printed:
+        prints()
+      self.assertTrue(("A\nB\nC\n") in printed.contents())
+
   @test_util.run_in_graph_and_eager_modes()
   def testPrintInDefunWithoutExplicitEvalOfPrint(self):
     @function.defun
-- 
GitLab


From 29af23aeadd1d6fccbfa4223b58dad8f5b8df4f8 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 5 Oct 2018 16:47:07 -0700
Subject: [PATCH 1216/1357] Fix api_compatibility_test diff for large files.
 assertEqual might be applied instead of assertMultiLineEqual if input is too
 large (https://bugs.python.org/issue11763). This change is switching to use
 unified_diff in that case.

PiperOrigin-RevId: 215987656
---
 tensorflow/python/util/protobuf/compare.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/util/protobuf/compare.py b/tensorflow/python/util/protobuf/compare.py
index a0e6bf65cf..3a3af4bffa 100644
--- a/tensorflow/python/util/protobuf/compare.py
+++ b/tensorflow/python/util/protobuf/compare.py
@@ -63,6 +63,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import difflib
 
 import six
 
@@ -101,10 +102,19 @@ def assertProtoEqual(self, a, b, check_initialized=True,  # pylint: disable=inva
     if normalize_numbers:
       NormalizeNumberFields(pb)
 
-  self.assertMultiLineEqual(
-      text_format.MessageToString(a, descriptor_pool=pool),
-      text_format.MessageToString(b, descriptor_pool=pool),
-      msg=msg)
+  a_str = text_format.MessageToString(a, descriptor_pool=pool)
+  b_str = text_format.MessageToString(b, descriptor_pool=pool)
+
+  # Some Python versions would perform regular diff instead of multi-line
+  # diff if string is longer than 2**16. We substitute this behavior
+  # with a call to unified_diff instead to have easier-to-read diffs.
+  # For context, see: https://bugs.python.org/issue11763.
+  if len(a_str) < 2**16 and len(b_str) < 2**16:
+    self.assertMultiLineEqual(a_str, b_str, msg=msg)
+  else:
+    diff = '\n' + ''.join(difflib.unified_diff(a_str.splitlines(True),
+                                               b_str.splitlines(True)))
+    self.fail('%s : %s' % (msg, diff))
 
 
 def NormalizeNumberFields(pb):
-- 
GitLab


From 55081a9d21ab42834ac4fb70351e3d2ee13ef78b Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Fri, 5 Oct 2018 16:47:51 -0700
Subject: [PATCH 1217/1357] [XLA:GPU] Use a struct for the return value of
 CudnnConvolutionAlgorithmPicker::PickBestAlgorithm.

Using a struct lets us return additional data -- namely, the elapsed time to
run the best algo -- without adding a fourth entry to the tuple, which would be
confusing.

No functional change.

PiperOrigin-RevId: 215987795
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  1 +
 .../gpu/cudnn_convolution_algorithm_picker.cc | 40 ++++++++-----------
 .../gpu/cudnn_convolution_algorithm_picker.h  | 11 ++++-
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 522e9f5948..7b84f691f6 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -404,6 +404,7 @@ cc_library(
         "//tensorflow/core:stream_executor_no_cuda",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/time",
         "@com_google_absl//absl/types:optional",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index 7125673887..590c0a7d54 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -145,7 +145,7 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) {
 // cache misses and doing extra work.  Overall, caching doesn't seem worth the
 // trouble, but we may want to revisit this if we ever find a model where
 // caching would speed up compilation a lot.
-StatusOr<std::tuple<int64, bool, int64>>
+StatusOr<CudnnConvolutionAlgorithmPicker::AutotuneResult>
 CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
     HloCustomCallInstruction* instr) {
   // TODO(timshen): for now only check fp16. It can be expanded to other types,
@@ -316,9 +316,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
             << AlgorithmToString(best_result.algorithm()) << ", takes "
             << best_result.elapsed_time_in_ms() << "ms, and uses "
             << best_result_bytes_used << "B of scratch memory.";
-    return std::make_tuple(best_result.algorithm().algo_id(),
-                           best_result.algorithm().tensor_ops_enabled(),
-                           best_result_bytes_used);
+    return AutotuneResult{best_result.algorithm().algo_id(),
+                          best_result.algorithm().tensor_ops_enabled(),
+                          best_result_bytes_used,
+                          absl::Milliseconds(best_result.elapsed_time_in_ms())};
   }
 
   return InternalError(
@@ -331,37 +332,30 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
     HloInstruction* instr) {
   CHECK(IsCustomCallToDnnConvolution(*instr));
 
-  StatusOr<std::tuple<int64, bool, int64>> alg_scratch_and_tc =
+  StatusOr<AutotuneResult> best_algo_or =
       PickBestAlgorithm(Cast<HloCustomCallInstruction>(instr));
-
-  if (!alg_scratch_and_tc.ok()) {
-    LOG(ERROR) << alg_scratch_and_tc.status();
+  if (!best_algo_or.ok()) {
+    LOG(ERROR) << best_algo_or.status();
     return false;
   }
 
-  int64 algorithm;
-  bool tensor_ops_enabled;
-  int64 scratch_bytes;
-
-  std::tie(algorithm, tensor_ops_enabled, scratch_bytes) =
-      alg_scratch_and_tc.ConsumeValueOrDie();
-
-  VLOG(1) << "Setting cudnn conv to use algorithm " << algorithm << " and "
-          << NumBytesToString(scratch_bytes)
+  auto best_algo = std::move(best_algo_or).ValueOrDie();
+  VLOG(1) << "Setting cudnn conv to use algorithm " << best_algo.algorithm
+          << " and " << NumBytesToString(best_algo.scratch_bytes)
           << " of scratch memory: " << instr->ToString()
-          << " tensor_ops_enabled: " << tensor_ops_enabled;
+          << " tensor_ops_enabled: " << best_algo.tensor_ops_enabled;
 
   // Replace instr with a new CustomCall which has the correct algorithm, and
   // whose output shape has the appropriate amount of scratch memory.
   HloComputation* computation = instr->parent();
-  Shape new_call_shape =
-      ShapeUtil::MakeTupleShape({instr->shape().tuple_shapes(0),
-                                 ShapeUtil::MakeShape(U8, {scratch_bytes})});
+  Shape new_call_shape = ShapeUtil::MakeTupleShape(
+      {instr->shape().tuple_shapes(0),
+       ShapeUtil::MakeShape(U8, {best_algo.scratch_bytes})});
 
   TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
                       instr->backend_config<CudnnConvBackendConfig>());
-  backend_config.set_algorithm(algorithm);
-  backend_config.set_tensor_ops_enabled(tensor_ops_enabled);
+  backend_config.set_algorithm(best_algo.algorithm);
+  backend_config.set_tensor_ops_enabled(best_algo.tensor_ops_enabled);
 
   HloInstruction* new_call = computation->AddInstruction(
       instr->CloneWithNewOperands(new_call_shape, instr->operands()));
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
index aeda2fc7f8..136c32210a 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_
 
+#include "absl/time/time.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
@@ -47,10 +48,16 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass {
   StatusOr<bool> Run(HloModule* module) override;
 
  private:
+  struct AutotuneResult {
+    int64 algorithm;
+    bool tensor_ops_enabled;
+    int64 scratch_bytes;
+    absl::Duration runtime;
+  };
+
   StatusOr<bool> RunOnComputation(HloComputation* computation);
   StatusOr<bool> RunOnInstruction(HloInstruction* instr);
-  StatusOr<std::tuple<int64, bool, int64>> PickBestAlgorithm(
-      HloCustomCallInstruction* instr);
+  StatusOr<AutotuneResult> PickBestAlgorithm(HloCustomCallInstruction* instr);
 
   se::StreamExecutor* stream_exec_;                   // never null
   DeviceMemoryAllocator* allocator_;                  // may be null
-- 
GitLab


From ab97f1323bd2a98d20ed82dc3ff8585481961f0d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 16:59:39 -0700
Subject: [PATCH 1218/1357] Automated rollback of commit
 d258207f1583df4faa452265b051879af6c15dac

PiperOrigin-RevId: 215989111
---
 tensorflow/python/ops/array_ops.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 4be9c532f4..e3e4d5f910 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1407,8 +1407,13 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
         gen_array_ops.conjugate_transpose
         if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
     if perm is None:
-      rank = gen_array_ops.rank(a)
-      perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      a = ops.convert_to_tensor(a, name="a")
+      if not a.get_shape().ndims:
+        rank = gen_array_ops.rank(a)
+        perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      else:
+        rank = a.get_shape().ndims
+        perm = (rank - 1) - np.arange(rank)
       ret = transpose_fn(a, perm, name=name)
       # NOTE(mrry): Setting the shape explicitly because
       #   reverse is not handled by the shape function.
-- 
GitLab


From 15d399cd8590c18dc643d979883fe4201c8ea631 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 5 Oct 2018 17:01:01 -0700
Subject: [PATCH 1219/1357] [tf.data vectorization] Feed inputs to vectorizers
 with notion of stackedness

PiperOrigin-RevId: 215989259
---
 .../optimizers/data/vectorization/BUILD       |  10 ++
 .../data/vectorization/cast_vectorizer.cc     |  16 +--
 .../data/vectorization/unpack_vectorizer.cc   |  16 +--
 .../data/vectorization/vectorizer.h           |  19 ++-
 .../data/vectorization/vectorizer_registry.cc |   2 -
 .../data/vectorization/vectorizer_registry.h  |  15 +--
 .../vectorization/vectorizer_registry_test.cc |  11 +-
 .../data/vectorization/wrapped_tensor.h       |  44 +++++++
 .../optimizers/data/vectorization_utils.cc    | 116 +++++++++---------
 9 files changed, 144 insertions(+), 105 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h

diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 37aa24b947..985d6c6c3a 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -12,10 +12,20 @@ VECTORIZER_DEPS = [
     "//tensorflow/core/grappler/optimizers/data:graph_utils",
 ] + tf_protos_all()
 
+cc_library(
+    name = "wrapped_tensor",
+    hdrs = ["wrapped_tensor.h"],
+    deps = [
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "vectorizer",
     hdrs = ["vectorizer.h"],
     deps = [
+        ":wrapped_tensor",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
     ] + tf_protos_all(),
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
index 3af6bab409..f445157531 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
@@ -19,13 +19,13 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
+namespace {
 
 class CastVectorizer : public Vectorizer {
  public:
   Status Vectorize(const Node& node, Graph* outer_scope,
-                   std::vector<Port>* input_ports,
-                   std::vector<Port>* output_ports) override {
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
     Status s;
     if (node.num_inputs() != 1) {
       return errors::Internal("Cast op should only have one input.");
@@ -35,15 +35,17 @@ class CastVectorizer : public Vectorizer {
     auto new_cast_node = outer_scope->AddNode(node.def(), &s);
     TF_RETURN_IF_ERROR(s);
 
-    // Add input and output mappings
-    input_ports->push_back({new_cast_node, 0});
-    output_ports->push_back({new_cast_node, 0});
+    outer_scope->AddEdge(inputs[0].node, inputs[0].output_index, new_cast_node,
+                         0);
+
+    // Add output mappings
+    outputs->push_back({new_cast_node, 0, true});
     return Status::OK();
   }
 };
 
 REGISTER_VECTORIZER("Cast", CastVectorizer);
 
-}  // namespace vectorization_utils
+}  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
index 74ce520ce1..f1ba741821 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
@@ -19,15 +19,15 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
+namespace {
 
 class UnpackVectorizer : public Vectorizer {
  public:
   Status Vectorize(const Node& node, Graph* outer_scope,
-                   std::vector<Port>* input_ports,
-                   std::vector<Port>* output_ports) override {
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
     Status s;
-    if (node.num_inputs() != 1) {
+    if (node.num_inputs() != 1 || inputs.size() != 1) {
       return errors::Internal("Unpack op should only have one input.");
     }
 
@@ -39,13 +39,13 @@ class UnpackVectorizer : public Vectorizer {
     int new_axis = node.def().attr().at("axis").i() + 1;
     new_unpack_node->AddAttr("axis", new_axis);
 
-    // Add the input mappings
-    input_ports->push_back({new_unpack_node, 0});
+    outer_scope->AddEdge(inputs[0].node, inputs[0].output_index,
+                         new_unpack_node, 0);
 
     // Add the output mappings
     int num = node.def().attr().at("num").i();
     for (int i = 0; i < num; ++i) {
-      output_ports->push_back({new_unpack_node, i});
+      outputs->push_back({new_unpack_node, i, true});
     }
 
     return Status::OK();
@@ -54,6 +54,6 @@ class UnpackVectorizer : public Vectorizer {
 
 REGISTER_VECTORIZER("Unpack", UnpackVectorizer);
 
-}  // namespace vectorization_utils
+}  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
index 56eb88c95e..8d4676aae0 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
@@ -18,15 +18,12 @@ limitations under the License.
 
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
-
-// Describes a tensor with its operation Node and output position
-typedef std::pair<Node*, int> Port;
 
 // Interface for vectorization of TensorFlow operations. See `CastVectorizer`
 // for an example.
@@ -36,17 +33,17 @@ class Vectorizer {
 
   // Vectorizes an operation, `node`, by adding Node(s) to `outer_scope`
   // that produce the same vector output(s) as executing `node`'s op
-  // on elements of the vector inputs. The new Node(s) collectively have the
+  // on elements of `inputs`. The new Node(s) collectively have the
   // same number of input and output ports as the node being converted.
-  // Adds mappings for the new nodes' input and output ports to `inputs` and
-  // `outputs` respectively, where the i'th Port in inputs/outputs
-  // corresponds to the i'th input/output port of the node to be converted.
+  // Adds edges between the newly created nodes and nodes in `inputs`, and adds
+  // mappings to the new nodes' output ports to `outputs`, where the i'th
+  // value in `outputs` corresponds to the i'th output port of the node
+  // to be converted.
   virtual Status Vectorize(const Node& node, Graph* outer_scope,
-                           std::vector<Port>* input_ports,
-                           std::vector<Port>* output_ports) = 0;
+                           std::vector<WrappedTensor>&& inputs,
+                           std::vector<WrappedTensor>* outputs) = 0;
 };
 
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
 #endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_H_
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
index a6551e36ac..e1cf77a7d5 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
@@ -19,7 +19,6 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
 
 VectorizerRegistry* VectorizerRegistry::Global() {
   static VectorizerRegistry* registry = new VectorizerRegistry;
@@ -42,6 +41,5 @@ void VectorizerRegistry::Register(const string& op_type,
   vectorizers_.insert(std::pair<const string&, std::unique_ptr<Vectorizer>>(
       op_type, std::move(vectorizer)));
 }
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
index 16159d47ca..ad54c74933 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
@@ -23,7 +23,6 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
 
 // A global VectorizerRegistry is used to hold all the vectorizers.
 class VectorizerRegistry {
@@ -59,16 +58,12 @@ class VectorizerRegistration {
 #define REGISTER_VECTORIZER_UNIQ_HELPER(ctr, op_type, vectorizer) \
   REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)
 
-#define REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)                  \
-  static ::tensorflow::grappler::vectorization_utils::                      \
-      vectorizer_registration::VectorizerRegistration                       \
-          vectorizer_registration_##ctr(                                    \
-              op_type,                                                      \
-              ::std::unique_ptr<                                            \
-                  ::tensorflow::grappler::vectorization_utils::Vectorizer>( \
-                  new vectorizer()))
+#define REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)                \
+  static ::tensorflow::grappler::vectorizer_registration::                \
+      VectorizerRegistration vectorizer_registration_##ctr(               \
+          op_type, ::std::unique_ptr<::tensorflow::grappler::Vectorizer>( \
+                       new vectorizer()))
 
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
index 663ceba027..054aeb9a8f 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
@@ -20,13 +20,12 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
 
 class TestVectorizer : public Vectorizer {
  public:
   Status Vectorize(const Node& node, Graph* outer_scope,
-                   std::vector<Port>* inputs,
-                   std::vector<Port>* outputs) override {
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
     return Status::OK();
   }
 };
@@ -43,10 +42,10 @@ TEST(TestVectorizer, TestTestVectorizer) {
   NodeDef node_def;
   Status s;
   Node* node = g.AddNode(node_def, &s);
-  std::vector<Port> inputs, outputs;
-  EXPECT_TRUE(vectorizer->Vectorize(*node, &g, &inputs, &outputs).ok());
+  std::vector<WrappedTensor> inputs, outputs;
+  EXPECT_TRUE(
+      vectorizer->Vectorize(*node, &g, std::move(inputs), &outputs).ok());
 }
 
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h b/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h
new file mode 100644
index 0000000000..4439b4ab4e
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h
@@ -0,0 +1,44 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_
+
+#include "tensorflow/core/graph/graph.h"
+
+namespace tensorflow {
+namespace grappler {
+
+// Represents a tensor that has been vectorized.
+struct WrappedTensor {
+  Node* const node;
+  const int output_index;
+
+  // Whether the tensor is stacked, i.e. represents the results of applying
+  // the operation on all slices of the input, where each row i of the
+  // tensor corresponds to the op's output on slice i of the input. False
+  // if the tensor is not stacked, i.e. represents the result of the op on
+  // a single slice of the input, where the result does not vary between
+  // slices.
+  bool stacked;
+
+  WrappedTensor(Node* node, int output_index, bool stacked)
+      : node(node), output_index(output_index), stacked(stacked) {}
+};
+
+}  // namespace grappler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 344c420902..ba857ab5d9 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -45,22 +45,6 @@ namespace {
 // Describes a tensor with its operation Node and output position
 typedef std::pair<Node*, int> TensorDesc;
 
-// Equivalent to python Pfor's WrappedTensor struct
-struct WrappedTensor {
-  TensorDesc tensor;
-
-  // Whether the tensor is stacked, i.e. represents the results of applying
-  // the operation on all slices of the input, where each row i of the
-  // tensor corresponds to the op's output on slice i of the input. False
-  // if the tensor is not stacked, i.e. represents the result of the op on
-  // a single slice of the input, where the result does not vary between
-  // slices.
-  bool stacked;
-
-  WrappedTensor(TensorDesc&& tensor, bool stacked)
-      : tensor(std::move(tensor)), stacked(stacked) {}
-};
-
 const char* const kRetValOp = "_Retval";
 
 void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
@@ -239,34 +223,48 @@ Status Vectorization::AddConversionMapping(Node* op_node) {
     return errors::Unimplemented("No vectorizer registered for op: ",
                                  op_node->type_string());
   }
-  std::vector<Port> input_ports, output_ports;
-  input_ports.reserve(op_node->num_inputs());
-  output_ports.reserve(op_node->num_outputs());
-  TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(),
-                                           &input_ports, &output_ports));
+  std::vector<WrappedTensor> inputs, outputs;
+  inputs.reserve(op_node->num_inputs());
+  outputs.reserve(op_node->num_outputs());
 
   std::vector<const Edge*> input_edges;
   TF_RETURN_IF_ERROR(op_node->input_edges(&input_edges));
 
-  if (op_node->num_outputs() != output_ports.size() ||
-      op_node->num_inputs() != input_ports.size() ||
-      input_edges.size() != input_ports.size()) {
-    return errors::Internal("Vectorizer inputs/outputs don't match.");
-  }
-
-  // Promote the inputs of the op to MapDefun outputs and connect the edges
-  // accordingly.
+  // The inputs for the node to be converted may already have been converted
+  // themselves. For those that are not, we promote them to MapDefun outputs.
   for (size_t i = 0; i < op_node->num_inputs(); ++i) {
     auto edge = input_edges[i];
-    TF_RETURN_IF_ERROR(AddMapDefunOutput(map_defun_fn_.get(), map_defun_node_,
-                                         {edge->src(), edge->src_output()}));
-    outer_scope_->AddEdge(map_defun_node_, map_defun_fn_->ret_nodes.size() - 1,
-                          input_ports[i].first, input_ports[i].second);
+    if (auto found = gtl::FindOrNull(conversion_map_,
+                                     {edge->src(), edge->src_output()})) {
+      inputs.push_back(*found);
+    } else {
+      // TODO(rachelim): Handle the case where unconverted inputs are unstacked.
+      // We assume that all unconverted inputs will be stacked, since we
+      // converted all unstacked nodes in `Initialize`. However, it's actually
+      // possible that yet-unconverted nodes may produce unstacked outputs after
+      // they are vectorized. (For example, see the "Shape" converter in
+      // tensorflow/python/ops/parallel_for/pfor.py). If a vectorizer expects
+      // an unstacked input but receives a stacked one, vectorizer->Vectorize
+      // will return an error.
+      TF_RETURN_IF_ERROR(AddMapDefunOutput(map_defun_fn_.get(), map_defun_node_,
+                                           {edge->src(), edge->src_output()}));
+      int output_index = map_defun_fn_->ret_nodes.size() - 1;
+      inputs.push_back({map_defun_node_, output_index, true});
+    }
+  }
+
+  TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(),
+                                           std::move(inputs), &outputs));
+
+  if (op_node->num_outputs() != outputs.size()) {
+    return errors::Internal(
+        "Number of vectorizer outputs does not match. Expected: ",
+        op_node->num_outputs(), " Actual: ", outputs.size());
   }
 
   // Add output mappings.
   for (size_t i = 0; i < op_node->num_outputs(); ++i) {
-    conversion_map_.insert({{op_node, i}, {std::move(output_ports[i]), true}});
+    conversion_map_.insert({{op_node, i}, outputs[i]});
   }
 
   return Status::OK();
@@ -281,25 +279,22 @@ Status Vectorization::ConvertOutput(int output_position) {
 
   TensorDesc output({ret_edge->src(), ret_edge->src_output()});
   TensorDesc converted_output;
-  if (auto found = gtl::FindOrNull(conversion_map_, output)) {
-    // It's possible the output already has a mapping, if it comes from a node
-    // that has already been converted.
-    if (found->stacked) {
-      converted_output = found->tensor;
-    } else {
-      // Some outputs may be unstacked if they don't derive from arg nodes
-      // (for example, if a function returns a constant). For these, we
-      // have to add extra nodes to tile it in the 0th dimension.
-      TF_RETURN_IF_ERROR(StackTensor(found, &converted_output));
-    }
-  } else {
-    // Note: All unstacked nodes are converted ahead of time in `Initialize`,
-    // and here we assume that all op vectorizers create only stacked outputs.
-    // This may not hold in the future, as more vectorizers are added that
-    // may actually create unstacked outputs. For example, see the `Shape`
-    // converter in third_party/tensorflow/python/ops/parallel_for/pfor.py
+
+  // It's possible the output already has a mapping, if it comes from a node
+  // that has already been converted.
+  auto found = gtl::FindOrNull(conversion_map_, output);
+  if (!found) {
     TF_RETURN_IF_ERROR(AddConversionMapping(output.first));
-    converted_output = conversion_map_.at(output).tensor;
+    found = &conversion_map_.at(output);
+  }
+
+  if (found->stacked) {
+    converted_output = {found->node, found->output_index};
+  } else {
+    // Some outputs may be unstacked if they don't derive from arg nodes
+    // (for example, if a function returns a constant). For these, we
+    // have to add extra nodes to tile it in the 0th dimension.
+    TF_RETURN_IF_ERROR(StackTensor(found, &converted_output));
   }
 
   ReplaceEdgeSources({map_defun_node_, output_position}, converted_output,
@@ -455,7 +450,7 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
 
   Node* ones_shape;
   TF_RETURN_IF_ERROR(node_builder("Shape")
-                         .Input(unstacked->tensor.first)  // input
+                         .Input(unstacked->node)  // input
                          .Finalize(g, &ones_shape));
 
   Node* ones;
@@ -473,8 +468,8 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
 
   Node* expand_dims;
   TF_RETURN_IF_ERROR(node_builder("ExpandDims")
-                         .Input(unstacked->tensor.first)  // input
-                         .Input(const_0)                  // dim
+                         .Input(unstacked->node)  // input
+                         .Input(const_0)          // dim
                          .Finalize(g, &expand_dims));
 
   TF_RETURN_IF_ERROR(node_builder("Tile")
@@ -491,11 +486,11 @@ Status Vectorization::AddArgNodeMappings() {
     TF_RETURN_IF_ERROR(map_defun_node_->input_node(
         arg_node->attrs().Find("index")->i(), &input_node));
 
-    conversion_map_.insert({{arg_node, 0}, {{input_node, 0}, true}});
+    conversion_map_.insert({{arg_node, 0}, {input_node, 0, true}});
 
     // Control inputs
     conversion_map_.insert({{arg_node, Graph::kControlSlot},
-                            {{input_node, Graph::kControlSlot}, true}});
+                            {input_node, Graph::kControlSlot, true}});
   }
   return Status::OK();
 }
@@ -541,7 +536,7 @@ bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor,
 
     if (auto found = gtl::FindOrNull(conversion_map_,
                                      {edge->src(), edge->src_output()})) {
-      outer_scope_->AddEdge(found->tensor.first, found->tensor.second, node,
+      outer_scope_->AddEdge(found->node, found->output_index, node,
                             edge->dst_input());
     } else {
       status->Update(errors::Internal(
@@ -552,11 +547,10 @@ bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor,
 
   // Add output mappings
   for (int i = 0; i < tensor.first->num_outputs(); ++i) {
-    conversion_map_.insert(
-        {{tensor.first, i}, WrappedTensor({node, i}, false)});
+    conversion_map_.insert({{tensor.first, i}, WrappedTensor(node, i, false)});
   }
   conversion_map_.insert({{tensor.first, Graph::kControlSlot},
-                          WrappedTensor({node, Graph::kControlSlot}, false)});
+                          WrappedTensor(node, Graph::kControlSlot, false)});
 
   return true;
 }
-- 
GitLab


From 4831740f90eaf266a99d3ffa7d390d54325b689f Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Fri, 5 Oct 2018 17:05:17 -0700
Subject: [PATCH 1220/1357] [XLA:GPU] Remove hidden flag for disabling
 heuristic layout assignment.

Heuristic NCHW/NHWC layout assignment works great; we've never had to flip this
flag.  Might as well remove it and simplify things a bit.

PiperOrigin-RevId: 215989807
---
 tensorflow/compiler/xla/service/gpu/BUILD     | 11 -------
 .../xla/service/gpu/gpu_layout_assignment.cc  | 11 ++-----
 .../compiler/xla/service/gpu/gpu_options.cc   | 28 ----------------
 .../compiler/xla/service/gpu/gpu_options.h    | 33 -------------------
 4 files changed, 2 insertions(+), 81 deletions(-)
 delete mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.cc
 delete mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.h

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 7b84f691f6..350fd32537 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -781,7 +781,6 @@ cc_library(
     srcs = ["gpu_layout_assignment.cc"],
     hdrs = ["gpu_layout_assignment.h"],
     deps = [
-        ":gpu_options",
         ":ir_emission_utils",
         ":stream_executor_util",
         "//tensorflow/compiler/xla:shape_util",
@@ -882,16 +881,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "gpu_options",
-    srcs = ["gpu_options.cc"],
-    hdrs = ["gpu_options.h"],
-    deps = [
-        "//tensorflow/compiler/xla/service:hlo_module_config",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
 cc_library(
     name = "stream_executor_util",
     srcs = ["stream_executor_util.cc"],
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 74352f26aa..1ffe855750 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <memory>
 
 #include "tensorflow/compiler/xla/layout_util.h"
-#include "tensorflow/compiler/xla/service/gpu/gpu_options.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
@@ -125,14 +124,8 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall(
     DataLayout input;
     FilterLayout filter;
     DataLayout output;
-    if (ConvUseLayoutHeuristic(instr->GetModule()->config())) {
-      std::tie(input, filter, output) =
-          HeuristicLayoutAssignment(instr, stream_executor_);
-    } else {
-      input = DataLayout::kBatchDepthYX;
-      filter = FilterLayout::kOutputInputYX;
-      output = DataLayout::kBatchDepthYX;
-    }
+    std::tie(input, filter, output) =
+        HeuristicLayoutAssignment(instr, stream_executor_);
 
     TF_ASSIGN_OR_RETURN(
         std::tie(*input_shape->mutable_layout(),
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.cc b/tensorflow/compiler/xla/service/gpu/gpu_options.cc
deleted file mode 100644
index 35b4b4e20b..0000000000
--- a/tensorflow/compiler/xla/service/gpu/gpu_options.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/gpu/gpu_options.h"
-#include "tensorflow/core/lib/gtl/map_util.h"
-
-namespace xla {
-namespace gpu {
-
-bool ConvUseLayoutHeuristic(const HloModuleConfig& config) {
-  return !config.debug_options().xla_backend_extra_options().count(
-      "xla_gpu_experimental_conv_disable_layout_heuristic");
-}
-
-}  // namespace gpu
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.h b/tensorflow/compiler/xla/service/gpu/gpu_options.h
deleted file mode 100644
index 498d4a9495..0000000000
--- a/tensorflow/compiler/xla/service/gpu/gpu_options.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_
-
-#include "tensorflow/compiler/xla/service/hlo_module_config.h"
-
-// Helper functions for querying options that are specific to the GPU backend.
-
-namespace xla {
-namespace gpu {
-
-// Returns true if we should use heuristics to assign convolution layouts, as
-// opposed to always assigning NCHW.
-bool ConvUseLayoutHeuristic(const HloModuleConfig& config);
-
-}  // namespace gpu
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_
-- 
GitLab


From 213d76a6ed77a696883502c53a3a4f81d2ee4042 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Fri, 5 Oct 2018 17:34:30 -0700
Subject: [PATCH 1221/1357] Simply the logic for bubbling captured tensors when
 building cond_v2 grad. The current logic tries to bubble the forward pass
 tensor to the outermost graph. That might not always be do-able e.g. when the
 cond is inside a while loop it will need to know accumulator logic for
 while_loop. So instead, the cond_grad now captures tensors from the forward
 If op's graph. When the grad If op is built these tensors will be
 appropriately captured by the surrounding FuncGraph.

PiperOrigin-RevId: 215993009
---
 .../kernel_tests/control_flow_ops_py_test.py  |  6 +--
 tensorflow/python/ops/cond_v2_impl.py         | 48 ++++++++-----------
 2 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 7fae5249aa..baea5c0f6d 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -661,8 +661,7 @@ class ControlFlowTest(test.TestCase):
       sess.run(r)
 
   def testCondGrad_1(self):
-    graph = ops.Graph()
-    with graph.as_default():
+    with self.cached_session():
       x = constant_op.constant(10.0, name="x")
       pred = math_ops.less(1, 2)
       fn1 = lambda: array_ops.identity(x)
@@ -670,8 +669,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
 
       grad = gradients_impl.gradients(r, [x])[0]
-      with self.cached_session():
-        self.assertAllEqual(1.0, grad.eval())
+      self.assertAllEqual(1.0, grad.eval())
 
   def testCondGrad_2(self):
     with self.cached_session():
diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py
index 195ad11c71..c9aa4d4889 100644
--- a/tensorflow/python/ops/cond_v2_impl.py
+++ b/tensorflow/python/ops/cond_v2_impl.py
@@ -282,9 +282,10 @@ def _resolve_grad_inputs(cond_graph, grad_graph):
      as is.
   2. Tensors in the forward pass graph. These tensors may not be "live"
      when the gradient is being computed. We replace such references by their
-     corresponding tensor in the least common ancestor graph of `grad_graph` and
-     `cond_graph`. Since we export intermediate tensors for all branch
-     functions, this is always possible.
+     corresponding tensor in `cond_graph.outer_graph`. In the case of nested
+     control flow or functions, the gradient logic handling
+     `grad_graph.outer_graph` will make sure the tensor from
+     `cond_graph.outer_graph` is also correctly captured.
 
   Args:
     cond_graph: function.FuncGraph. The forward-pass function.
@@ -296,24 +297,23 @@ def _resolve_grad_inputs(cond_graph, grad_graph):
   new_inputs = []
 
   for t in grad_graph.external_captures:
+    # `t` must either be in `grad_graph.outer_graph` or in the forward
+    # `cond_graph`.
     if t.graph != grad_graph.outer_graph:
-      # `t` is a tensor in `cond_graph` or one of its ancestors. We bubble this
-      # tensor to the least common ancestor of the `cond_graph` and
-      # `grad_graph` so that it is "in-scope" for `grad_graph`.
-      # TODO(srbs): `_is_ancestor` calls may be expensive. Compute the least
-      # common ancestor once and re-use.
-      assert _is_ancestor(cond_graph, t.graph)
-      while not _is_ancestor(grad_graph, t.graph):
-        assert isinstance(t.graph, _function.FuncGraph)
-        if t in t.graph.internal_captures:
-          # TODO(srbs): Consider building a map of internal_captures ->
-          # external_captures instead of searching for `t` twice.
-          t = t.graph.external_captures[t.graph.internal_captures.index(t)]
-        else:
-          # Note: All intermediate tensors are output by the If op.
-          # TODO(srbs): .index() calls may be expensive. Optimize.
-          t = t.graph._if.outputs[t.graph.outputs.index(t)]
-      assert _is_ancestor(grad_graph, t.graph)
+      assert t.graph == cond_graph
+      # `internal_captures` are not treated as intermediates and hence not added
+      # to If op outputs. So we get the outer tensor corresponding to those
+      # from the list of `external_captures`.
+      try:
+        t = t.graph._if.outputs[t.graph.outputs.index(t)]
+      except ValueError:
+        index = t.graph.internal_captures.index(t)
+        t = t.graph.external_captures[index]
+
+      # Note: We rely on the capturing logic of the gradient If op graph to
+      # correctly capture the tensors in `cond_graph.outer_graph`. Both cond_v2
+      # and while_v2 handle this while building their gradient functions.
+      assert t.graph == cond_graph.outer_graph
     new_inputs.append(t)
 
   return new_inputs
@@ -492,11 +492,3 @@ def _get_output_shapes(true_graph_outputs, false_graph_outputs):
       for t_out, f_out in zip(true_graph_outputs, false_graph_outputs)
   ]
   return output_shapes
-
-
-def _is_ancestor(graph, maybe_ancestor):
-  if maybe_ancestor == graph:
-    return True
-  if isinstance(graph, _function.FuncGraph):
-    return _is_ancestor(graph.outer_graph, maybe_ancestor)
-  return False
-- 
GitLab


From 1484bad99cfd46cb63a839643cfce917b6f0cdd8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 19:18:32 -0700
Subject: [PATCH 1222/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 216000752
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 224 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  59 +++++
 2 files changed, 283 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 780c6f6448..0753316724 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -70896,6 +70896,62 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessRandomUniform"
   input_arg {
@@ -70993,6 +71049,118 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "StatelessRandomUniformInt"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  input_arg {
+    name: "minval"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxval"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessTruncatedNormal"
   input_arg {
@@ -71090,6 +71258,62 @@ op {
     }
   }
 }
+op {
+  name: "StatelessTruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessWhile"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 0d8997c1bd..14cc9df9a2 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -32978,6 +32978,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -33033,6 +33034,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -33065,6 +33067,62 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomUniformInt"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  input_arg {
+    name: "minval"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxval"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessTruncatedNormal"
   input_arg {
@@ -33088,6 +33146,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
-- 
GitLab


From 45f594a0bce42787356700c0e20f5fbc47193fa3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 19:45:59 -0700
Subject: [PATCH 1223/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216001984

---
 tensorflow/go/op/wrappers.go | 712 +++++++++++++++++------------------
 1 file changed, 356 insertions(+), 356 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index a7bbb80c82..5d17605e37 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -9640,36 +9640,6 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...
 	return op.Output(0)
 }
 
-// Returns the element-wise sum of a list of tensors.
-//
-// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
-// wait for all of its inputs to be ready before beginning to sum. This can
-// save memory if inputs are ready at different times, since minimum temporary
-// storage is proportional to the output size rather than the inputs size.
-//
-// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
-//
-// Returns a `Tensor` of same shape and type as the elements of `inputs`.
-//
-// Arguments:
-//	inputs: A list of `Tensor` objects, each with same shape and type.
-//	shape: Shape of elements of `inputs`.
-func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shape": shape}
-	opspec := tf.OpSpec{
-		Type: "AccumulateNV2",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // RandomShuffleAttr is an optional argument to RandomShuffle.
 type RandomShuffleAttr func(optionalAttr)
 
@@ -10383,206 +10353,65 @@ func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.
 	return scope.AddOperation(opspec)
 }
 
-// Encode audio data using the WAV file format.
-//
-// This operation will generate a string suitable to be saved out to create a .wav
-// audio file. It will be encoded in the 16-bit PCM format. It takes in float
-// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
-// that range.
-//
-// `audio` is a 2-D float Tensor of shape `[length, channels]`.
-// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
-//
-// Arguments:
-//	audio: 2-D with shape `[length, channels]`.
-//	sample_rate: Scalar containing the sample frequency.
-//
-// Returns 0-D. WAV-encoded file contents.
-func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodeWav",
-		Input: []tf.Input{
-			audio, sample_rate,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes atan of x element-wise.
-func Atan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Atan",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
-type ResourceApplyAdaMaxAttr func(optionalAttr)
-
-// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AdaMax algorithm.
+// Locks a mutex resource.  The output is the lock.  So long as the lock tensor
 //
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// v_t <- max(beta2 * v_{t-1}, abs(g))
-// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+// is alive, any other request to use `MutexLock` with this mutex will wait.
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
+// This is particularly useful for creating a critical section when used in
+// conjunction with `MutexLockIdentity`:
 //
-// Returns the created operation.
-func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdaMax",
-		Input: []tf.Input{
-			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// AssertAttr is an optional argument to Assert.
-type AssertAttr func(optionalAttr)
-
-// AssertSummarize sets the optional summarize attribute to value.
+// ```python
 //
-// value: Print this many entries of each tensor.
-// If not specified, defaults to 3
-func AssertSummarize(value int64) AssertAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
-	}
-}
-
-// Asserts that the given condition is true.
+// mutex = mutex_v2(
+//   shared_name=handle_name, container=container, name=name)
 //
-// If `condition` evaluates to false, print the list of tensors in `data`.
-// `summarize` determines how many entries of the tensors to print.
+// def execute_in_critical_section(fn, *args, **kwargs):
+//   lock = gen_resource_variable_ops.mutex_lock(mutex)
 //
-// Arguments:
-//	condition: The condition to evaluate.
-//	data: The tensors to print out when condition is false.
+//   with ops.control_dependencies([lock]):
+//     r = fn(*args, **kwargs)
 //
-// Returns the created operation.
-func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Assert",
-		Input: []tf.Input{
-			condition, tf.OutputList(data),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Split a `SparseTensor` into `num_split` tensors along one dimension.
+//   with ops.control_dependencies(nest.flatten(r)):
+//     with ops.colocate_with(mutex):
+//       ensure_lock_exists = mutex_lock_identity(lock)
 //
-// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
-// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
-// For example, if `split_dim = 1` and `num_split = 2` and the input is
+//     # Make sure that if any element of r is accessed, all of
+//     # them are executed together.
+//     r = nest.map_structure(tf.identity, r)
 //
-//     input_tensor = shape = [2, 7]
-//     [    a   d e  ]
-//     [b c          ]
+//   with ops.control_dependencies([ensure_lock_exists]):
+//     return nest.map_structure(tf.identity, r)
+// ```
 //
-// Graphically the output tensors are:
+// While `fn` is running in the critical section, no other functions which wish to
+// use this critical section may run.
 //
-//     output_tensor[0] = shape = [2, 4]
-//     [    a  ]
-//     [b c    ]
+// Often the use case is that two executions of the same graph, in parallel,
+// wish to run `fn`; and we wish to ensure that only one of them executes
+// at a time.  This is especially important if `fn` modifies one or more
+// variables at a time.
 //
-//     output_tensor[1] = shape = [2, 3]
-//     [ d e  ]
-//     [      ]
+// It is also useful if two separate functions must share a resource, but we
+// wish to ensure the usage is exclusive.
 //
 // Arguments:
-//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
-// `[0, rank(shape))`.
-//	indices: 2-D tensor represents the indices of the sparse tensor.
-//	values: 1-D tensor represents the values of the sparse tensor.
-//	shape: 1-D. tensor represents the shape of the sparse tensor.
-// output indices: A list of 1-D tensors represents the indices of the output
-// sparse tensors.
-//	num_split: The number of ways to split.
+//	mutex: The mutex resource to lock.
 //
-// Returns A list of 1-D tensors represents the values of the output sparse
-// tensors.A list of 1-D tensors represents the shape of the output sparse
-// tensors.
-func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
+// Returns A tensor that keeps a shared pointer to a lock on the mutex;
+// when the Tensor is destroyed, the use count on the shared pointer is decreased
+// by 1.  When it reaches 0, the lock is released.
+func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "SparseSplit",
+		Type: "MutexLock",
 		Input: []tf.Input{
-			split_dim, indices, values, shape,
+			mutex,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	return output_indices, output_values, output_shape
+	return op.Output(0)
 }
 
 // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
@@ -11611,89 +11440,321 @@ func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToN
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringToNumber",
+		Type: "StringToNumber",
+		Input: []tf.Input{
+			string_tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
+type ResourceApplyFtrlV2Attr func(optionalAttr)
+
+// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 shrinkage regulariation. Must be a scalar.
+//
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyFtrlV2",
+		Input: []tf.Input{
+			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
+//
+// This Op does not require `a_indices` be sorted in standard lexicographic order.
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
+//	b: `ndims`-D Tensor.  With shape `a_shape`.
+func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorDenseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Encode audio data using the WAV file format.
+//
+// This operation will generate a string suitable to be saved out to create a .wav
+// audio file. It will be encoded in the 16-bit PCM format. It takes in float
+// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
+// that range.
+//
+// `audio` is a 2-D float Tensor of shape `[length, channels]`.
+// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+//
+// Arguments:
+//	audio: 2-D with shape `[length, channels]`.
+//	sample_rate: Scalar containing the sample frequency.
+//
+// Returns 0-D. WAV-encoded file contents.
+func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeWav",
+		Input: []tf.Input{
+			audio, sample_rate,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes atan of x element-wise.
+func Atan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
+type ResourceApplyAdaMaxAttr func(optionalAttr)
+
+// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AdaMax algorithm.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// v_t <- max(beta2 * v_{t-1}, abs(g))
+// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdaMax",
+		Input: []tf.Input{
+			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// AssertAttr is an optional argument to Assert.
+type AssertAttr func(optionalAttr)
+
+// AssertSummarize sets the optional summarize attribute to value.
+//
+// value: Print this many entries of each tensor.
+// If not specified, defaults to 3
+func AssertSummarize(value int64) AssertAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Asserts that the given condition is true.
+//
+// If `condition` evaluates to false, print the list of tensors in `data`.
+// `summarize` determines how many entries of the tensors to print.
+//
+// Arguments:
+//	condition: The condition to evaluate.
+//	data: The tensors to print out when condition is false.
+//
+// Returns the created operation.
+func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Assert",
 		Input: []tf.Input{
-			string_tensor,
+			condition, tf.OutputList(data),
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
-type ResourceApplyFtrlV2Attr func(optionalAttr)
-
-// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+// Split a `SparseTensor` into `num_split` tensors along one dimension.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the Ftrl-proximal scheme.
+// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
+// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
+// For example, if `split_dim = 1` and `num_split = 2` and the input is
 //
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
+//     input_tensor = shape = [2, 7]
+//     [    a   d e  ]
+//     [b c          ]
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
-//	l2: L2 shrinkage regulariation. Must be a scalar.
+// Graphically the output tensors are:
 //
-//	lr_power: Scaling factor. Must be a scalar.
+//     output_tensor[0] = shape = [2, 4]
+//     [    a  ]
+//     [b c    ]
 //
-// Returns the created operation.
-func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+//     output_tensor[1] = shape = [2, 3]
+//     [ d e  ]
+//     [      ]
+//
+// Arguments:
+//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
+// `[0, rank(shape))`.
+//	indices: 2-D tensor represents the indices of the sparse tensor.
+//	values: 1-D tensor represents the values of the sparse tensor.
+//	shape: 1-D. tensor represents the shape of the sparse tensor.
+// output indices: A list of 1-D tensors represents the indices of the output
+// sparse tensors.
+//	num_split: The number of ways to split.
+//
+// Returns A list of 1-D tensors represents the values of the output sparse
+// tensors.A list of 1-D tensors represents the shape of the output sparse
+// tensors.
+func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrlV2",
+		Type: "SparseSplit",
 		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+			split_dim, indices, values, shape,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	return output_indices, output_values, output_shape
 }
 
-// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
+// Returns the element-wise sum of a list of tensors.
 //
-// This Op does not require `a_indices` be sorted in standard lexicographic order.
+// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
+// wait for all of its inputs to be ready before beginning to sum. This can
+// save memory if inputs are ready at different times, since minimum temporary
+// storage is proportional to the output size rather than the inputs size.
+//
+// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
+//
+// Returns a `Tensor` of same shape and type as the elements of `inputs`.
 //
 // Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
-//	b: `ndims`-D Tensor.  With shape `a_shape`.
-func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+//	inputs: A list of `Tensor` objects, each with same shape and type.
+//	shape: Shape of elements of `inputs`.
+func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseAdd",
+		Type: "AccumulateNV2",
 		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
+			tf.OutputList(inputs),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -13925,67 +13986,6 @@ func CudnnRNNBackpropV2(scope *Scope, input tf.Output, input_h tf.Output, input_
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// Locks a mutex resource.  The output is the lock.  So long as the lock tensor
-//
-// is alive, any other request to use `MutexLock` with this mutex will wait.
-//
-// This is particularly useful for creating a critical section when used in
-// conjunction with `MutexLockIdentity`:
-//
-// ```python
-//
-// mutex = mutex_v2(
-//   shared_name=handle_name, container=container, name=name)
-//
-// def execute_in_critical_section(fn, *args, **kwargs):
-//   lock = gen_resource_variable_ops.mutex_lock(mutex)
-//
-//   with ops.control_dependencies([lock]):
-//     r = fn(*args, **kwargs)
-//
-//   with ops.control_dependencies(nest.flatten(r)):
-//     with ops.colocate_with(mutex):
-//       ensure_lock_exists = mutex_lock_identity(lock)
-//
-//     # Make sure that if any element of r is accessed, all of
-//     # them are executed together.
-//     r = nest.map_structure(tf.identity, r)
-//
-//   with ops.control_dependencies([ensure_lock_exists]):
-//     return nest.map_structure(tf.identity, r)
-// ```
-//
-// While `fn` is running in the critical section, no other functions which wish to
-// use this critical section may run.
-//
-// Often the use case is that two executions of the same graph, in parallel,
-// wish to run `fn`; and we wish to ensure that only one of them executes
-// at a time.  This is especially important if `fn` modifies one or more
-// variables at a time.
-//
-// It is also useful if two separate functions must share a resource, but we
-// wish to ensure the usage is exclusive.
-//
-// Arguments:
-//	mutex: The mutex resource to lock.
-//
-// Returns A tensor that keeps a shared pointer to a lock on the mutex;
-// when the Tensor is destroyed, the use count on the shared pointer is decreased
-// by 1.  When it reaches 0, the lock is released.
-func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MutexLock",
-		Input: []tf.Input{
-			mutex,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // StringFormatAttr is an optional argument to StringFormat.
 type StringFormatAttr func(optionalAttr)
 
@@ -16807,26 +16807,6 @@ func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values
 	return op.Output(0), op.Output(1)
 }
 
-// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
-//
-// The Hurwitz zeta function is defined as:
-//
-//
-// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
-func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Zeta",
-		Input: []tf.Input{
-			x, q,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns a list of tensors with the same shapes and contents as the input
 //
 // tensors.
@@ -18873,6 +18853,26 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D
 	return op.Output(0)
 }
 
+// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
+//
+// The Hurwitz zeta function is defined as:
+//
+//
+// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
+func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Zeta",
+		Input: []tf.Input{
+			x, q,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Inverse fast Fourier transform.
 //
 // Computes the inverse 1-dimensional discrete Fourier transform over the
@@ -22757,6 +22757,21 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output
 	return op.Output(0)
 }
 
+// Computes hyperbolic tangent of `x` element-wise.
+func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Tanh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the maximum along segments of a tensor.
 //
 // Read
@@ -22794,21 +22809,6 @@ func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.
 	return op.Output(0)
 }
 
-// Computes hyperbolic tangent of `x` element-wise.
-func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tanh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that skips `count` elements from the `input_dataset`.
 //
 // Arguments:
-- 
GitLab


From 7d3bfc143a74d8e49f138841a07f7f4693b0a911 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Fri, 5 Oct 2018 20:07:12 -0700
Subject: [PATCH 1224/1357] Add the plumbing for an autograph flag to defun.
 Disabled and experimental for now.

PiperOrigin-RevId: 216003028
---
 tensorflow/python/eager/BUILD       |  1 +
 tensorflow/python/eager/function.py | 61 +++++++++++++++++++++++------
 2 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index d0c1a93118..cae809a7c3 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -251,6 +251,7 @@ py_library(
         "//tensorflow/python:gradients_impl",
         "//tensorflow/python:graph_to_function_def",
         "//tensorflow/python:util",
+        "//tensorflow/python/autograph",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:core",
         "//tensorflow/python/eager:execute",
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f06148b5d2..bafe07de2b 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -31,6 +31,7 @@ import six
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.framework import function_pb2
+from tensorflow.python import autograph
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
@@ -877,7 +878,8 @@ def func_graph_from_py_func(name,
                             args,
                             kwargs,
                             signature=None,
-                            func_graph=None):
+                            func_graph=None,
+                            experimental_autograph=False):
   """Returns a `FuncGraph` generated from `python_func`.
 
   Args:
@@ -894,6 +896,8 @@ def func_graph_from_py_func(name,
       inputs.
     func_graph: Optional. An instance of FuncGraph. If provided, we will use
       this graph else a new one is built and returned.
+    experimental_autograph: whether to use autograph to compile `python_func`.
+      See https://www.tensorflow.org/guide/autograph for more information.
 
   Returns:
     A FuncGraph.
@@ -939,7 +943,17 @@ def func_graph_from_py_func(name,
 
     this_tape = tape.push_new_tape()
     try:
-      func_outputs = python_func(*func_args, **func_kwargs)
+      if experimental_autograph:
+        func_outputs = autograph.converted_call(
+            python_func,
+            autograph.ConversionOptions(
+                verbose=True,
+                recursive=True,
+                force_conversion=False,
+                strip_decorators=(defun,),
+                arg_types={}), *func_args, **func_kwargs)
+      else:
+        func_outputs = python_func(*func_args, **func_kwargs)
       # invariant: `func_outputs` contains only Tensors and `None`s.
       func_outputs = nest.map_structure(convert, func_outputs)
 
@@ -1035,7 +1049,8 @@ class PolymorphicFunction(object):
                python_function,
                name,
                input_signature=None,
-               attributes=None):
+               attributes=None,
+               experimental_autograph=False):
     """Initializes a polymorphic function.
 
     Args:
@@ -1045,7 +1060,10 @@ class PolymorphicFunction(object):
         specifying the input signature of this function. If `None`, a separate
         function is instantiated for each inferred input signature.
       attributes: dict, extra keyword arguments that will be added as attribute
-         of the function.
+        of the function.
+      experimental_autograph: whether to use autograph to compile
+        `python_function`. See https://www.tensorflow.org/guide/autograph for
+        more information.
 
     Raises:
       ValueError: if `input_signature` is not None and the `python_function`'s
@@ -1061,6 +1079,7 @@ class PolymorphicFunction(object):
       self._args_to_prepend = tuple()
       self._kwargs_to_include = {}
     self._name = name
+    self._experimental_autograph = experimental_autograph
     self._function_cache = collections.OrderedDict()
     self._function_attributes = attributes or {}
 
@@ -1286,8 +1305,13 @@ class PolymorphicFunction(object):
 
       if graph_function is None:
         graph_function = Function(
-            func_graph_from_py_func(self._name, self._python_function, args,
-                                    kwargs, self._input_signature),
+            func_graph_from_py_func(
+                self._name,
+                self._python_function,
+                args,
+                kwargs,
+                self._input_signature,
+                experimental_autograph=self._experimental_autograph),
             self._function_attributes)
         self._function_cache[cache_key] = graph_function
       return graph_function, [
@@ -1348,7 +1372,7 @@ def _validate_signature(signature):
                     "a possibly nested sequence of TensorSpec objects.")
 
 
-def defun(func=None, input_signature=None):
+def defun(func=None, input_signature=None, experimental_autograph=False):
   """Compiles a Python function into a callable TensorFlow graph.
 
   `defun` (short for "define function") trace-compiles a Python function
@@ -1657,6 +1681,10 @@ def defun(func=None, input_signature=None):
       function is instantiated for each inferred input signature.  If a
       signature is specified, every input to `func` must be a `Tensor`, and
       `func` cannot accept `**kwargs`.
+    experimental_autograph: Whether `func` should be compiled before
+      constructing the graph. See https://www.tensorflow.org/guide/autograph
+      for more information.
+
 
   Returns:
      If `func` is not None, returns a callable that will execute the compiled
@@ -1668,10 +1696,16 @@ def defun(func=None, input_signature=None):
     TypeError: If `input_signature` is neither `None` nor a sequence of
       `tf.contrib.eager.TensorSpec` objects.
   """
-  return defun_with_attributes(func=func, input_signature=input_signature)
+  return defun_with_attributes(
+      func=func,
+      input_signature=input_signature,
+      experimental_autograph=experimental_autograph)
 
 
-def defun_with_attributes(func=None, input_signature=None, attributes=None):
+def defun_with_attributes(func=None,
+                          input_signature=None,
+                          attributes=None,
+                          experimental_autograph=False):
   """Compiles a Python function into a callable TensorFlow graph.
 
   This function supports adding extra function attributes. See detailed
@@ -1686,6 +1720,7 @@ def defun_with_attributes(func=None, input_signature=None, attributes=None):
       attributes. Currently only support primitive types as value, and only
       whitelisted attribute name is allowed. Unwhitelisted attribute name or
       unsupported value will result into ValueError.
+    experimental_autograph: same as defun()'s experimental_autograph.
 
   Returns:
     Same as the return value of defun, with attributes added to the function in
@@ -1702,8 +1737,12 @@ def defun_with_attributes(func=None, input_signature=None, attributes=None):
       name = "function"
     return tf_decorator.make_decorator(
         function,
-        PolymorphicFunction(function, name, input_signature=input_signature,
-                            attributes=attributes))
+        PolymorphicFunction(
+            function,
+            name,
+            input_signature=input_signature,
+            attributes=attributes,
+            experimental_autograph=experimental_autograph))
 
   # This code path is for the `foo = tfe.defun(foo, ...)` use case
   if func is not None:
-- 
GitLab


From fb92d456476c36210cea3b76393f584a306f092b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 6 Oct 2018 02:01:17 -0700
Subject: [PATCH 1225/1357] compat: Update forward compatibility horizon to
 2018-10-06

PiperOrigin-RevId: 216021117
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 8f4e8e0b98..d85fb00414 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 5)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 6)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 5c0a6bdfeb1848b0146a36706d921dde06ba160a Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Sat, 6 Oct 2018 10:04:16 -0700
Subject: [PATCH 1226/1357] [XLA] Add base and window dilation support to
 ReduceWindow

PiperOrigin-RevId: 216041507
---
 .../tf2xla/kernels/reduce_window_op.cc        | 21 +++++++-
 .../compiler/tf2xla/kernels/scan_ops.cc       |  3 +-
 tensorflow/compiler/tf2xla/ops/xla_ops.cc     |  2 +
 tensorflow/compiler/tf2xla/python/xla.py      |  6 +++
 tensorflow/compiler/xla/client/xla_builder.cc | 15 ++++--
 tensorflow/compiler/xla/client/xla_builder.h  |  6 +++
 .../xla/python/local_computation_builder.cc   |  5 +-
 .../xla/python/local_computation_builder.h    |  2 +
 tensorflow/compiler/xla/python/xla_client.py  | 25 ++++++++-
 .../xla/service/algebraic_simplifier.cc       |  6 +++
 .../compiler/xla/service/cpu/ir_emitter.cc    | 27 +++++++---
 .../xla/service/gpu/elemental_ir_emitter.cc   | 26 ++++++----
 .../xla/service/hlo_evaluator_test.cc         | 52 +++++++++++++++++++
 .../xla/service/hlo_evaluator_typed_visitor.h | 13 ++++-
 .../compiler/xla/tests/reduce_window_test.cc  | 12 ++++-
 15 files changed, 191 insertions(+), 30 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc
index 8102faad28..8eee5b1299 100644
--- a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc
@@ -40,10 +40,16 @@ class ReduceWindowOp : public XlaOpKernel {
 
     std::vector<int64> window_dimensions;
     std::vector<int64> window_strides;
+    std::vector<int64> base_dilations;
+    std::vector<int64> window_dilations;
     OP_REQUIRES_OK(context, context->ConstantInputAsIntVector(
                                 "window_dimensions", &window_dimensions));
     OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("window_strides",
                                                               &window_strides));
+    OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("base_dilations",
+                                                              &base_dilations));
+    OP_REQUIRES_OK(context, context->ConstantInputAsIntVector(
+                                "window_dilations", &window_dilations));
 
     const int rank = input_shape.dims();
     OP_REQUIRES(context, rank == window_dimensions.size(),
@@ -56,6 +62,16 @@ class ReduceWindowOp : public XlaOpKernel {
                     "The size of window_strides must be equal to the input "
                     "rank (",
                     window_strides.size(), " vs. ", rank, ")"));
+    OP_REQUIRES(context, rank == base_dilations.size(),
+                errors::InvalidArgument(
+                    "The size of base_dilations must be equal to the input "
+                    "rank (",
+                    base_dilations.size(), " vs. ", rank, ")"));
+    OP_REQUIRES(context, rank == window_dilations.size(),
+                errors::InvalidArgument(
+                    "The size of window_dilations must be equal to the input "
+                    "rank (",
+                    window_dilations.size(), " vs. ", rank, ")"));
 
     // Build the reducer function.
     XlaCompiler::Argument reducer_arg;
@@ -102,7 +118,8 @@ class ReduceWindowOp : public XlaOpKernel {
 
     xla::XlaOp output = xla::ReduceWindowWithGeneralPadding(
         context->Input(0), context->Input(1), *reducer.computation,
-        window_dimensions, window_strides, padding);
+        window_dimensions, window_strides, base_dilations, window_dilations,
+        padding);
     context->SetOutput(0, output);
   }
 
@@ -115,6 +132,8 @@ class ReduceWindowOp : public XlaOpKernel {
 REGISTER_XLA_OP(Name("XlaReduceWindow")
                     .CompileTimeConstInput("window_dimensions")
                     .CompileTimeConstInput("window_strides")
+                    .CompileTimeConstInput("base_dilations")
+                    .CompileTimeConstInput("window_dilations")
                     .CompileTimeConstInput("padding"),
                 ReduceWindowOp);
 
diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
index ab094d7dd1..57afd608de 100644
--- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
@@ -104,7 +104,8 @@ class ScanOp : public XlaOpKernel {
     }
     auto output = xla::ReduceWindowWithGeneralPadding(
         XlaHelpers::ConvertElementType(builder, ctx->Input(0), dtype), init,
-        *reducer, window_dims, window_strides, padding);
+        *reducer, window_dims, window_strides,
+        /*base_dilations=*/{}, /*window_dilations=*/{}, padding);
     output =
         XlaHelpers::ConvertElementType(builder, output, ctx->input_type(0));
 
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index 557911553d..bd2c0a5ee8 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -283,6 +283,8 @@ REGISTER_OP("XlaReduceWindow")
     .Input("init_value: T")
     .Input("window_dimensions: Tindices")
     .Input("window_strides: Tindices")
+    .Input("base_dilations: Tindices")
+    .Input("window_dilations: Tindices")
     .Input("padding: Tindices")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index bc7924c371..5e86b5d8ec 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -320,6 +320,8 @@ def reduce_window(operand,
                   reducer,
                   window_dimensions,
                   window_strides=None,
+                  base_dilations=None,
+                  window_dilations=None,
                   padding=None,
                   name=None):
   """Wraps the XLA ReduceWindow operator.
@@ -343,12 +345,16 @@ def reduce_window(operand,
     A tensor that represents the output of the reduce_window operator.
   """
   window_strides = window_strides or [1] * len(window_dimensions)
+  base_dilations = base_dilations or [1] * len(window_dimensions)
+  window_dilations = window_dilations or [1] * len(window_dimensions)
   padding = padding or [(0, 0)] * len(window_dimensions)
   return gen_xla_ops.xla_reduce_window(
       input=operand,
       init_value=init,
       window_dimensions=window_dimensions,
       window_strides=window_strides,
+      base_dilations=base_dilations,
+      window_dilations=window_dilations,
       padding=padding,
       computation=reducer,
       name=name)
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index d196252db1..6b31831010 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -1789,9 +1789,9 @@ XlaOp XlaBuilder::ReduceWindow(const XlaOp& operand, const XlaOp& init_value,
     std::vector<std::pair<int64, int64>> padding_values =
         MakePadding(AsInt64Slice(operand_shape.dimensions()), window_dimensions,
                     window_strides, padding);
-    return ReduceWindowWithGeneralPadding(operand, init_value, computation,
-                                          window_dimensions, window_strides,
-                                          padding_values);
+    return ReduceWindowWithGeneralPadding(
+        operand, init_value, computation, window_dimensions, window_strides,
+        /*base_dilations=*/{}, /*window_dilations=*/{}, padding_values);
   });
 }
 
@@ -1800,6 +1800,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding(
     const XlaComputation& computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
@@ -1810,7 +1812,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding(
                         computation.GetProgramShape());
     TF_ASSIGN_OR_RETURN(*instr.mutable_window(),
                         MakeWindow(window_dimensions, window_strides, padding,
-                                   /*lhs_dilation=*/{}, /*rhs_dilation=*/{}));
+                                   /*lhs_dilation=*/base_dilations,
+                                   /*rhs_dilation=*/window_dilations));
     TF_ASSIGN_OR_RETURN(
         *instr.mutable_shape(),
         ShapeInference::InferReduceWindowShape(operand_shape, init_shape,
@@ -2800,10 +2803,12 @@ XlaOp ReduceWindowWithGeneralPadding(
     const XlaComputation& computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding) {
   return operand.builder()->ReduceWindowWithGeneralPadding(
       operand, init_value, computation, window_dimensions, window_strides,
-      padding);
+      base_dilations, window_dilations, padding);
 }
 
 XlaOp CrossReplicaSum(const XlaOp& operand,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index cd0d5ca5d3..2e14e47a35 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -671,6 +671,8 @@ class XlaBuilder {
       const XlaComputation& computation,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
+      absl::Span<const int64> base_dilations,
+      absl::Span<const int64> window_dilations,
       absl::Span<const std::pair<int64, int64>> padding);
 
   // Returns the sum of the operand value within each subgroup of replicas. All
@@ -1245,6 +1247,8 @@ class XlaBuilder {
       const XlaComputation& computation,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
+      absl::Span<const int64> base_dilations,
+      absl::Span<const int64> window_dilations,
       absl::Span<const std::pair<int64, int64>> padding);
   friend XlaOp CrossReplicaSum(const XlaOp& operand,
                                absl::Span<const ReplicaGroup> replica_groups);
@@ -1818,6 +1822,8 @@ XlaOp ReduceWindowWithGeneralPadding(
     const XlaComputation& computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding);
 
 // Returns the sum of the operand value within each subgroup of replicas. All
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index cd5fd33029..ffa336f304 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -532,10 +532,13 @@ LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding(
     const LocalComputation& local_computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding) {
   return xla::ReduceWindowWithGeneralPadding(
       operand.op(), init_value.op(), local_computation.computation(),
-      window_dimensions, window_strides, padding);
+      window_dimensions, window_strides, base_dilations, window_dilations,
+      padding);
 }
 
 LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu,
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index 2166bb6721..43332e0abd 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -278,6 +278,8 @@ class LocalComputationBuilder {
       const LocalComputation& local_computation,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
+      absl::Span<const int64> base_dilations,
+      absl::Span<const int64> window_dilations,
       absl::Span<const std::pair<int64, int64> > padding);
 
   LocalOp RngNormal(const LocalOp& mu, const LocalOp& sigma,
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index bb303c5678..f8197488fb 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -995,7 +995,30 @@ class ComputationBuilder(object):
         window_strides)
     return self._client.ReduceWindowWithGeneralPadding(
         operand, init_value, computation_to_apply.c_local_computation,
-        window_dimensions, window_strides, pads)
+        window_dimensions, window_strides, (), (), pads)
+
+  def ReduceWindowWithGeneralPadding(
+      self, operand, init_value, computation_to_apply, window_dimensions,
+      window_strides, base_dilations, window_dilations, padding):
+    """Enqueues a windowed reduction operation onto the computation.
+
+    Args:
+      operand: reduction operand (LocalOp).
+      init_value: reduction initial value (LocalOp).
+      computation_to_apply: a binary reduction function (Computation).
+      window_dimensions: dimensions of window (sequence of integers).
+      window_strides: strides for window (sequence of integers).
+      base_dilations: dilations for the base (sequence of integers).
+      window_dilations: dilations for window (sequence of integers).
+      padding: length-N array-like of pairs of integers of (low, high) padding.
+
+    Returns:
+      A LocalOp representing the added ReduceWindow op.
+    """
+    return self._client.ReduceWindowWithGeneralPadding(
+        operand, init_value, computation_to_apply.c_local_computation,
+        window_dimensions, window_strides, base_dilations, window_dilations,
+        padding)
 
   def RngNormal(self, mu, sigma, dims):
     """Enqueues an RngNormal operation onto the computation.
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 75dae7a714..86d9dbea90 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -2057,6 +2057,12 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow(
     return Status::OK();
   }
 
+  // Bail on dilation.
+  if (window_util::HasDilation(window)) {
+    VLOG(10) << "Not folding pad into reduce-window as there is dilation.";
+    return Status::OK();
+  }
+
   VLOG(10) << "Considering folding Pad: " << pad->ToString()
            << "\ninto reduce-window: " << reduce_window->ToString()
            << (convert != nullptr
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index a70abb117a..b2abdb39a5 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -688,8 +688,25 @@ StatusOr<llvm::Value*> IrEmitter::EmitTargetElementLoopBodyForReduceWindow(
   for (size_t i = 0; i < index.size(); ++i) {
     llvm::Value* strided_index =
         NSWMul(index[i], b_.getInt64(window.dimensions(i).stride()));
-    input_index[i] = NSWSub(NSWAdd(strided_index, window_index[i]),
-                            b_.getInt64(window.dimensions(i).padding_low()));
+    input_index[i] = NSWSub(
+        NSWAdd(strided_index,
+               NSWMul(window_index[i],
+                      b_.getInt64(window.dimensions(i).window_dilation()))),
+        b_.getInt64(window.dimensions(i).padding_low()));
+
+    // We need to verify that we are not in the dilated base area.
+    llvm::Value* dilation_condition = ICmpEQ(
+        SRem(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())),
+        b_.getInt64(0));
+    if (in_bounds_condition == nullptr) {
+      in_bounds_condition = dilation_condition;
+    } else {
+      in_bounds_condition = And(in_bounds_condition, dilation_condition);
+    }
+
+    // Apply base dilation to the index.
+    input_index[i] =
+        SDiv(input_index[i], b_.getInt64(window.dimensions(i).base_dilation()));
 
     // We need to check if 0 <= input_index[i] < bound, as otherwise we are in
     // the padding so that we can skip the computation. That is equivalent to
@@ -728,12 +745,6 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) {
       /*operands=*/{reduce_window->operand(0)},
       /*supported_types=*/{F32, BF16, S32, F16}));
 
-  // TODO(b/31410564): Implement dilation for reduce-window.
-  if (window_util::HasDilation(reduce_window->window())) {
-    return Unimplemented(
-        "Dilation for ReduceWindow is not implemented on CPU.");
-  }
-
   // Pseudo code for reduce window:
   //
   //   for (coordinates O in the output)
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
index c1aaa4bf04..6dcdaf1cfe 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
@@ -358,13 +358,6 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator(
         const HloInstruction* operand = hlo->operand(0);
         const Window& window = hlo->window();
 
-        // TODO(b/31410564): Implement dilation for reduce-window.
-        if (window_util::HasDilation(window)) {
-          return Unimplemented(
-              "Dilation for reduce-window not implemented on GPU. "
-              "See b/31410564.");
-        }
-
         PrimitiveType operand_element_type = operand->shape().element_type();
         llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry(
             llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_),
@@ -397,9 +390,24 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator(
         for (size_t i = 0; i < index.size(); ++i) {
           llvm::Value* stridden_index = NSWMul(
               index[i], index_typed_const(window.dimensions(i).stride()));
+          input_index[i] = NSWSub(
+              NSWAdd(stridden_index,
+                     NSWMul(window_index[i],
+                            index_typed_const(
+                                window.dimensions(i).window_dilation()))),
+              index_typed_const(window.dimensions(i).padding_low()));
+
+          // We need to verify that we are not in the dilated base area.
+          llvm::Value* dilation_condition = ICmpEQ(
+              SRem(input_index[i],
+                   index_typed_const(window.dimensions(i).base_dilation())),
+              index_typed_const(0));
+          in_bounds = And(in_bounds, dilation_condition);
+
+          // Apply base dilation to the index.
           input_index[i] =
-              NSWSub(NSWAdd(stridden_index, window_index[i]),
-                     index_typed_const(window.dimensions(i).padding_low()));
+              SDiv(input_index[i],
+                   index_typed_const(window.dimensions(i).base_dilation()));
 
           // We must check whether 0 ≤ input_index[i] < bound, as otherwise
           // we are in the pad and so can skip the computation. This
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index cee11a8a21..608a42bb60 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -1463,6 +1463,58 @@ TEST_P(HloEvaluatorTest, ReduceWindowMax) {
   EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
 }
 
+TEST_P(HloEvaluatorTest, ReduceWindowMaxWindowDilation) {
+  HloComputation::Builder b(TestName());
+
+  // arg:
+  // f32[3,3] {
+  //  { 1, 2, 3 },
+  //  { 5, 6, 7 },
+  //  { 9, 10, 11 },
+  // }
+  auto arg_array = absl::make_unique<Array2D<float>>(3, 3);
+  arg_array->FillUnique(1.0f);
+  auto arg_literal = LiteralUtil::CreateR2FromArray2D<float>(*arg_array);
+
+  HloInstruction* arg_instruction =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
+
+  auto init_value = b.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(0.f)));
+
+  HloComputation::Builder max_computation("max");
+  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  auto param_lhs = max_computation.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
+  auto param_rhs = max_computation.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
+  max_computation.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kMaximum, param_lhs, param_rhs));
+  auto max_func = module().AddEmbeddedComputation(max_computation.Build());
+
+  Window window;
+  WindowDimension dim;
+  dim.set_size(2);
+  dim.set_stride(1);
+  dim.set_padding_low(0);
+  dim.set_padding_high(0);
+  dim.set_window_dilation(2);
+  dim.set_base_dilation(1);
+  *window.add_dimensions() = dim;
+  *window.add_dimensions() = dim;
+
+  Shape shape = ShapeUtil::MakeShape(F32, {1, 1});
+  b.AddInstruction(HloInstruction::CreateReduceWindow(
+      shape, arg_instruction, init_value, window, max_func));
+
+  module().AddEntryComputation(b.Build());
+
+  Literal result = Evaluate();
+
+  auto expected = LiteralUtil::CreateR2<float>({{11}});
+  EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
+}
+
 TEST_P(HloEvaluatorTest, ReduceWindowAdd) {
   HloComputation::Builder b(TestName());
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index b2d12c94b8..a450dc6ff5 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -2613,8 +2613,17 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       std::vector<int64> base_index(rank);
       bool out_of_bound = false;
       for (int64 i = 0; i < rank; ++i) {
-        base_index[i] = window_count_index[i] * window.dimensions(i).stride() +
-                        window_index[i] - window.dimensions(i).padding_low();
+        base_index[i] =
+            window_count_index[i] * window.dimensions(i).stride() +
+            window_index[i] * window.dimensions(i).window_dilation() -
+            window.dimensions(i).padding_low();
+        // We are not in the base area if the dilation placed us out of bounds.
+        if (base_index[i] % window.dimensions(i).base_dilation() != 0) {
+          out_of_bound = true;
+          break;
+        }
+        // Apply the dilation to the base area.
+        base_index[i] /= window.dimensions(i).base_dilation();
         if (base_index[i] < 0 || base_index[i] >= base_shape.dimensions(i)) {
           out_of_bound = true;
           break;
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index c25ccafaf8..22fe4a2670 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -638,6 +638,8 @@ class R4ReduceWindowTest : public ReduceWindowTestBase,
         /*computation=*/computation,
         /*window_dimensions=*/param.window_bounds,
         /*window_strides=*/param.strides,
+        /*base_dilations=*/{},
+        /*window_dilations=*/{},
         /*padding=*/padding);
 
     CHECK(reducer == kAdd || reducer == kMax);
@@ -1158,7 +1160,10 @@ class R2ReduceWindowTest : public ReduceWindowTestBase,
         /*init_value=*/init_value,
         /*computation=*/computation,
         /*window_dimensions=*/param.window_bounds,
-        /*window_strides=*/param.strides, /*padding=*/padding);
+        /*window_strides=*/param.strides,
+        /*base_dilations=*/{},
+        /*window_dilations=*/{},
+        /*padding=*/padding);
 
     auto reduce_func = param.reducer == kAdd
                            ? +[](float a, float b) { return a + b; }
@@ -1369,7 +1374,10 @@ TEST_P(R1ReduceWindowTest, DoIt) {
       /*init_value=*/init_value,
       /*computation=*/computation,
       /*window_dimensions=*/param.window_bounds,
-      /*window_strides=*/param.strides, /*padding=*/padding);
+      /*window_strides=*/param.strides,
+      /*base_dilations=*/{},
+      /*window_dilations=*/{},
+      /*padding=*/padding);
 
   auto reduce_func = param.reducer == kAdd
                          ? +[](float a, float b) { return a + b; }
-- 
GitLab


From e93a18954689b6d522560f5273f6d3320d545b2e Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Sat, 6 Oct 2018 13:49:25 -0700
Subject: [PATCH 1227/1357] Mark tensorflow/contrib/tpu:datasets_test flaky

It fails 1/1000 runs in OSS builds.

PiperOrigin-RevId: 216050192
---
 tensorflow/contrib/tpu/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 10ed1c2891..8c36d5a297 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -302,6 +302,7 @@ tf_py_test(
         "//tensorflow/python:client_testlib",
         ":datasets",
     ],
+    flaky = 1,  # TODO(b/117363808): fails 1/1000 OSS runs
     grpc_enabled = True,
 )
 
-- 
GitLab


From 7fa6a6b42bc9d562e2b1cc765ca78d281b51f734 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 6 Oct 2018 21:00:57 -0700
Subject: [PATCH 1228/1357] Add SequenceLSTMOptions to schema to decouple the
 sequential Op from the LSTM.

PiperOrigin-RevId: 216066634
---
 tensorflow/contrib/lite/c/builtin_op_data.h   |   7 +
 .../lite/core/api/flatbuffer_conversions.cc   |  15 +-
 .../kernels/unidirectional_sequence_lstm.cc   |  14 +-
 .../unidirectional_sequence_lstm_test.cc      |  11 +-
 tensorflow/contrib/lite/schema/schema.fbs     |   8 +
 .../contrib/lite/schema/schema_generated.h    | 162 +++++++++++++++++-
 6 files changed, 205 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h
index 44daf7adaa..1e65c3cee2 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data.h
+++ b/tensorflow/contrib/lite/c/builtin_op_data.h
@@ -186,6 +186,13 @@ typedef struct {
   TfLiteLSTMKernelType kernel_type;
 } TfLiteLSTMParams;
 
+typedef struct {
+  // Parameters for the LSTM kernel.
+  TfLiteFusedActivation activation;
+  float cell_clip;
+  float proj_clip;
+} TfLiteUnidirectionalSequenceLSTMParams;
+
 typedef struct {
   // Parameters for the LSTM kernel.
   TfLiteFusedActivation activation;
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index eac7db9a88..b092e5ee54 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -371,7 +371,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
-    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_LSTM: {
       auto params = allocator->AllocatePOD<TfLiteLSTMParams>();
       if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
@@ -391,6 +390,20 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
+      auto* params =
+          allocator->AllocatePOD<TfLiteUnidirectionalSequenceLSTMParams>();
+      if (auto* seq_lstm_params =
+              op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) {
+        params->activation =
+            parse_activation(seq_lstm_params->fused_activation_function());
+        params->cell_clip = seq_lstm_params->cell_clip();
+        params->proj_clip = seq_lstm_params->proj_clip();
+      }
+      *builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+
     case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
       auto params =
           allocator->AllocatePOD<TfLiteBidirectionalSequenceLSTMParams>();
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
index ec9cf38b83..89d57e4599 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
@@ -431,7 +431,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
+  const auto* params =
+      reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
+          node->builtin_data);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
 
   const TfLiteTensor* input_to_input_weights =
@@ -482,6 +484,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
+  // Copy out the LSTM specific params so they can be passed in the function.
+  TfLiteLSTMParams lstm_params;
+  lstm_params.activation = params->activation;
+  lstm_params.cell_clip = params->cell_clip;
+  lstm_params.proj_clip = params->proj_clip;
+
   switch (input_to_output_weights->type) {
     case kTfLiteFloat32: {
       return lstm_eval::EvalFloat(
@@ -496,7 +504,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_cell_weights=*/nullptr,
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
-          projection_bias, params, /*forward_sequence=*/true,
+          projection_bias, &lstm_params, /*forward_sequence=*/true,
           /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
           output);
     }
@@ -523,7 +531,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_cell_weights=*/nullptr,
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
-          projection_bias, params, /*forward_sequence=*/true,
+          projection_bias, &lstm_params, /*forward_sequence=*/true,
           /*output_offset=*/0, scratch_buffer, scaling_factors,
           prod_scaling_factors, recovered_cell_weights, input_quantized,
           /*aux_input_quantized=*/nullptr, activation_state_quantized,
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
index cd3aac0532..c97b0fdd61 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
@@ -110,11 +110,12 @@ class UnidirectionalLSTMOpModel : public SingleOpModel {
 
     output_ = AddOutput(TensorType_FLOAT32);
 
-    SetBuiltinOp(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
-                 BuiltinOptions_LSTMOptions,
-                 CreateLSTMOptions(builder_, ActivationFunctionType_TANH,
-                                   cell_clip, proj_clip)
-                     .Union());
+    SetBuiltinOp(
+        BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+        BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+        CreateUnidirectionalSequenceLSTMOptions(
+            builder_, ActivationFunctionType_TANH, cell_clip, proj_clip)
+            .Union());
     BuildInterpreter(input_shapes);
   }
 
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index ff8430827c..cb7a282743 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -250,6 +250,7 @@ union BuiltinOptions {
   FillOptions,
   BidirectionalSequenceLSTMOptions,
   BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -394,6 +395,13 @@ table LSTMOptions {
   kernel_type: LSTMKernelType = FULL;
 }
 
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+}
+
 table BidirectionalSequenceLSTMOptions {
   fused_activation_function:ActivationFunctionType;
   cell_clip: float; // Optional, 0.0 means no clipping
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index f3cb113c9c..e7b7a59def 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -79,6 +79,9 @@ struct LocalResponseNormalizationOptionsT;
 struct LSTMOptions;
 struct LSTMOptionsT;
 
+struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsT;
+
 struct BidirectionalSequenceLSTMOptions;
 struct BidirectionalSequenceLSTMOptionsT;
 
@@ -681,11 +684,12 @@ enum BuiltinOptions {
   BuiltinOptions_FillOptions = 68,
   BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
   BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
+  BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_BidirectionalSequenceRNNOptions
+  BuiltinOptions_MAX = BuiltinOptions_UnidirectionalSequenceLSTMOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -757,7 +761,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] {
     BuiltinOptions_ZerosLikeOptions,
     BuiltinOptions_FillOptions,
     BuiltinOptions_BidirectionalSequenceLSTMOptions,
-    BuiltinOptions_BidirectionalSequenceRNNOptions
+    BuiltinOptions_BidirectionalSequenceRNNOptions,
+    BuiltinOptions_UnidirectionalSequenceLSTMOptions
   };
   return values;
 }
@@ -835,6 +840,7 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "FillOptions",
     "BidirectionalSequenceLSTMOptions",
     "BidirectionalSequenceRNNOptions",
+    "UnidirectionalSequenceLSTMOptions",
     nullptr
   };
   return names;
@@ -1129,6 +1135,10 @@ template<> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
 };
 
+template<> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1720,6 +1730,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
       reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
   }
+  UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  const UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -3469,6 +3487,84 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
 
 flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
+  typedef UnidirectionalSequenceLSTMOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  float cell_clip;
+  float proj_clip;
+  UnidirectionalSequenceLSTMOptionsT()
+      : fused_activation_function(ActivationFunctionType_NONE),
+        cell_clip(0.0f),
+        proj_clip(0.0f) {
+  }
+};
+
+struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnidirectionalSequenceLSTMOptionsT NativeTableType;
+  enum {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8
+  };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const {
+    return GetField<float>(VT_CELL_CLIP, 0.0f);
+  }
+  float proj_clip() const {
+    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           verifier.EndTable();
+  }
+  UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnidirectionalSequenceLSTMOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  UnidirectionalSequenceLSTMOptionsBuilder &operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
+  flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    float cell_clip = 0.0f,
+    float proj_clip = 0.0f) {
+  UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
   typedef BidirectionalSequenceLSTMOptions TableType;
   ActivationFunctionType fused_activation_function;
@@ -6488,6 +6584,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const {
     return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) : nullptr;
   }
+  const UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const {
+    return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6799,6 +6898,10 @@ template<> inline const BidirectionalSequenceRNNOptions *Operator::builtin_optio
   return builtin_options_as_BidirectionalSequenceRNNOptions();
 }
 
+template<> inline const UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const {
+  return builtin_options_as_UnidirectionalSequenceLSTMOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -7809,6 +7912,38 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBuffe
       _kernel_type);
 }
 
+inline UnidirectionalSequenceLSTMOptionsT *UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new UnidirectionalSequenceLSTMOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = cell_clip(); _o->cell_clip = _e; };
+  { auto _e = proj_clip(); _o->proj_clip = _e; };
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  return tflite::CreateUnidirectionalSequenceLSTMOptions(
+      _fbb,
+      _fused_activation_function,
+      _cell_clip,
+      _proj_clip);
+}
+
 inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new BidirectionalSequenceLSTMOptionsT();
   UnPackTo(_o, _resolver);
@@ -9620,6 +9755,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -9918,6 +10057,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -10204,6 +10347,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value);
       return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10490,6 +10637,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new BidirectionalSequenceRNNOptionsT(*reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      value = new UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -10847,6 +10998,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From 367f7d651f19c5b111ea0292243eab81fb4058c7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 7 Oct 2018 02:01:04 -0700
Subject: [PATCH 1229/1357] compat: Update forward compatibility horizon to
 2018-10-07

PiperOrigin-RevId: 216079665
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index d85fb00414..ee56480b00 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 6)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 7)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 393a13c1b1a7d51b0871a6d4b3d3413d8e1765bf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 02:03:43 -0700
Subject: [PATCH 1230/1357] compat: Update forward compatibility horizon to
 2018-10-08

PiperOrigin-RevId: 216151605
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index ee56480b00..349c84e13c 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 7)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 3bdf3c592472c2b54c513417de8d9b538d3f6078 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 08:08:31 -0700
Subject: [PATCH 1231/1357] Make ExecutorState preserve the thread context.

PiperOrigin-RevId: 216187878
---
 tensorflow/core/common_runtime/executor.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 2c48084cab..40ec1502da 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -54,6 +54,7 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/context.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -1240,6 +1241,7 @@ class ExecutorState {
   StepStatsCollectorInterface* const stats_collector_;
   const tracing::TraceCollector* const trace_collector_;
   const tracing::EventCollector* const event_collector_;
+  Context context_;
 
   // QUESTION: Make it a checkpoint::TensorSliceReaderCacheWrapper
   // instead of a pointer?  (avoids having to delete).
@@ -1367,6 +1369,7 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl)
       trace_collector_(tracing::GetTraceCollector()),
       event_collector_(
           tracing::GetEventCollector(tracing::EventCategory::kCompute)),
+      context_(ContextKind::kThread),
       slice_reader_cache_(new checkpoint::TensorSliceReaderCacheWrapper),
       call_frame_(args.call_frame),
       impl_(impl),
@@ -1586,6 +1589,7 @@ bool MightTrace(const NodeItem& item,
 }
 
 void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
+  WithContext wc(context_);
   const GraphView& gview = impl_->gview_;
   TaggedNodeSeq ready;
   TaggedNodeReadyQueue inline_ready;
-- 
GitLab


From 53961cc2f16dea9d9b2286950c1e4d4c0a3743c5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 08:22:48 -0700
Subject: [PATCH 1232/1357] Improve const correctness of HloDomainMap

PiperOrigin-RevId: 216189458
---
 tensorflow/compiler/xla/service/hlo_domain_map.cc | 12 +++++++-----
 tensorflow/compiler/xla/service/hlo_domain_map.h  | 14 +++++++-------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc
index 6ca1255ede..c6d02f9f67 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc
@@ -42,18 +42,19 @@ namespace xla {
   return std::move(domain_map);
 }
 
-bool HloDomainMap::InSameDomain(HloInstruction* instruction1,
-                                HloInstruction* instruction2) const {
+bool HloDomainMap::InSameDomain(const HloInstruction* instruction1,
+                                const HloInstruction* instruction2) const {
   int64 domain_id1 = GetDomainId(instruction1);
   int64 domain_id2 = GetDomainId(instruction2);
   return domain_id1 >= 0 && domain_id1 == domain_id2;
 }
 
-int64 HloDomainMap::GetDomainId(HloInstruction* instruction) const {
+int64 HloDomainMap::GetDomainId(const HloInstruction* instruction) const {
   return FindOrDefault(instruction_to_domain_, instruction, -1);
 }
 
-int64 HloDomainMap::GetDomainMetadataId(HloInstruction* instruction) const {
+int64 HloDomainMap::GetDomainMetadataId(
+    const HloInstruction* instruction) const {
   return FindOrDie(domain_metadata_id_, instruction);
 }
 
@@ -200,7 +201,8 @@ StatusOr<std::unique_ptr<DomainMetadata::Domain>> HloDomainMap::CreateDomain(
   return std::move(domain);
 }
 
-bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const {
+bool HloDomainMap::IsDomainInstruction(
+    const HloInstruction* instruction) const {
   if (instruction->opcode() != HloOpcode::kDomain) {
     return false;
   }
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h
index c8d581b746..bce7d1aa7c 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.h
@@ -58,21 +58,21 @@ class HloDomainMap {
   }
 
   // Checks whether two instructions are within the same domain.
-  bool InSameDomain(HloInstruction* instruction1,
-                    HloInstruction* instruction2) const;
+  bool InSameDomain(const HloInstruction* instruction1,
+                    const HloInstruction* instruction2) const;
 
   // Checks whether instruction is a kDomain instruction of the kind we are
   // currently processing.
-  bool IsDomainInstruction(HloInstruction* instruction) const;
+  bool IsDomainInstruction(const HloInstruction* instruction) const;
 
   // Retrieves the domain identifier of the instruction, or -1 in case
   // instruction is not found within any domain.
-  int64 GetDomainId(HloInstruction* instruction) const;
+  int64 GetDomainId(const HloInstruction* instruction) const;
 
   // Returns the unique id of the domain metadata for the domain the given
   // instruction belongs to. The given instruction must not be a kDomain
   // instruction since each domain instruction is associated with 2 domains.
-  int64 GetDomainMetadataId(HloInstruction* instruction) const;
+  int64 GetDomainMetadataId(const HloInstruction* instruction) const;
 
  private:
   // Map used for representing instruction ordering, i.e.
@@ -119,8 +119,8 @@ class HloDomainMap {
 
   string domain_kind_;
   std::vector<std::unique_ptr<DomainMetadata::Domain>> instruction_domains_;
-  absl::flat_hash_map<HloInstruction*, int64> instruction_to_domain_;
-  absl::flat_hash_map<HloInstruction*, int64> domain_metadata_id_;
+  absl::flat_hash_map<const HloInstruction*, int64> instruction_to_domain_;
+  absl::flat_hash_map<const HloInstruction*, int64> domain_metadata_id_;
 };
 
 }  // namespace xla
-- 
GitLab


From 75f57a8b7836a1ed3cda8ba81c88f6caf15cf0c6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 08:35:14 -0700
Subject: [PATCH 1233/1357] Remove Dims from types.h, create build structure.

PiperOrigin-RevId: 216191084
---
 .../contrib/lite/kernels/internal/BUILD       | 16 ++++++++++++
 .../lite/kernels/internal/legacy_types.h      | 26 +++++++++++++++++++
 .../internal/reference/legacy_reference_ops.h |  7 ++++-
 .../internal/reference/reference_ops.h        |  5 ----
 4 files changed, 48 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/internal/legacy_types.h

diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index afb5ec05df..5c9ca6e910 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -49,6 +49,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "legacy_types",
+    srcs = [],
+    hdrs = [
+        "compatibility.h",
+        "legacy_types.h",
+        "types.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite/kernels:op_macros",
+        "@com_google_absl//absl/base:core_headers",
+    ],
+)
+
 config_setting(
     name = "arm",
     values = {
@@ -198,6 +212,7 @@ cc_library(
         ":strided_slice_logic",
         ":tensor_utils",
         ":types",
+        ":legacy_types",
         ":legacy_reference_base",
         ":round",
         "//third_party/eigen3",
@@ -336,6 +351,7 @@ cc_library(
         ":quantization_util",
         ":round",
         ":strided_slice_logic",
+        ":legacy_types",
         ":types",
         "@gemmlowp",
         "//tensorflow/contrib/lite/c:c_api_internal",
diff --git a/tensorflow/contrib/lite/kernels/internal/legacy_types.h b/tensorflow/contrib/lite/kernels/internal/legacy_types.h
new file mode 100644
index 0000000000..2e4d3137f5
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/legacy_types.h
@@ -0,0 +1,26 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_
+#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_
+
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+// TODO(b/116772710): Insert legacy Dims<> code in here.
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
index be99240b1f..c8b64cfd96 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
@@ -19,10 +19,10 @@ limitations under the License.
 #include <sys/types.h>
 
 #include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/legacy_types.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
-#include "tensorflow/contrib/lite/kernels/internal/types.h"
 
 namespace tflite {
 
@@ -30,6 +30,11 @@ namespace reference_ops {
 
 static constexpr int kDepthwiseReverseShift = -1;
 
+inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) {
+  shape->BuildFrom(
+      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
+}
+
 inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           const float* bias_data, const Dims<4>& bias_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 59f17ae854..19d23fa80b 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -100,11 +100,6 @@ gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
 
 namespace reference_ops {
 
-inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) {
-  shape->BuildFrom(
-      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
-}
-
 template <typename T>
 int CountLeadingZeros(T integer_input) {
   static_assert(std::is_unsigned<T>::value,
-- 
GitLab


From 5f308cb408eb46ec9af0546be6b9ae1d5166b185 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 09:06:04 -0700
Subject: [PATCH 1234/1357] Optimize PinToHostOptimizer by adding cache, also
 add PinToHostOptimizer to benchmarks.

original runtime: 4.83492736816 secs
w/ cache runtime: 2.19033999443 secs

PiperOrigin-RevId: 216195286
---
 tensorflow/core/grappler/op_types.cc          |  22 ++-
 .../optimizers/pin_to_host_optimizer.cc       | 162 ++++++++++++------
 .../optimizers/pin_to_host_optimizer.h        |   4 +-
 .../optimizers/pin_to_host_optimizer_test.cc  |  76 +++++---
 4 files changed, 179 insertions(+), 85 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 1b5a215987..cbf5c8e038 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -102,15 +102,19 @@ bool IsConjugateTranspose(const NodeDef& node) {
 }
 
 bool IsControlFlow(const NodeDef& node) {
-  // clang-format off
-  return node.op() == "ControlTrigger" ||
-         node.op() == "Enter" ||
-         node.op() == "Exit" ||
-         node.op() == "LoopCond" ||
-         node.op() == "Merge" ||
-         node.op() == "NextIteration" ||
-         node.op() == "Switch";
-  // clang-format on
+  // TODO(williamchan): Add a microbenchmark to compare FlatSet vs. iterative
+  // string comparison.
+  static const gtl::FlatSet<string>* const kControFlowOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
+          "ControlTrigger",
+          "Enter",
+          "Exit",
+          "LoopCond",
+          "Merge",
+          "NextIteration",
+          "Switch",
+      }));
+  return kControFlowOps->count(node.op()) > 0;
 }
 
 bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 8ed4271fa4..29a3b2b74c 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -25,16 +25,29 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace grappler {
+
 namespace internal {
 
+namespace {
 // TODO(williamchan): Change this constant to be something smarter, maybe
 // dynamically determined.
 constexpr int64 kTensorMaxSize = 64;
 
+struct OpDevicePortHasher {
+  std::size_t operator()(const std::tuple<string, string, int>& x) const {
+    uint64 code = Hash64Combine(Hash64(std::get<0>(x)), Hash64(std::get<1>(x)));
+
+    return Hash64Combine(code, hash<int>()(std::get<2>(x)));
+  }
+};
+using OpDevicePortOnHostMap =
+    gtl::FlatMap<std::tuple<string, string, int>, bool, OpDevicePortHasher>;
+
 // All the nodes that should be blacklisted and not swapped.
 bool IsBlacklisted(const NodeDef& node) {
   return
@@ -82,10 +95,10 @@ Status TryFindKernelDef(const std::vector<DeviceType>& devices,
 
 // Checks if a node's output port is host friendly.
 // Roughly this means checking if the output port is on Host memory.
-Status IsNodeOutputPortHostFriendly(const GraphView& graph,
-                                    GraphProperties* properties,
-                                    const NodeDef& node, int port_id,
-                                    bool* is_candidate) {
+Status IsNodeOutputPortHostFriendly(
+    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
+    int port_id, OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
+    bool* is_candidate) {
   *is_candidate = false;
 
   // Make sure we are not a blacklisted op.
@@ -117,7 +130,8 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     for (const auto& fanin : graph.GetFanins(node, false)) {
       bool fanin_candidate = false;
       TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-          graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+          graph, properties, *fanin.node, fanin.port_id,
+          op_device_outport_pinned_to_host_cache, &fanin_candidate));
       if (!fanin_candidate) {
         return Status::OK();
       }
@@ -132,11 +146,22 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     return Status::OK();
   }
 
+  // Check `op_device_outport_pinned_to_host_cache` for our
+  // {op, device, port_id} combo to see if the arg is pinned on Host.
+  const std::tuple<string, string, int> cache_key(node.op(), node.device(),
+                                                  port_id);
+  auto it = op_device_outport_pinned_to_host_cache->find(cache_key);
+  if (it != op_device_outport_pinned_to_host_cache->end()) {
+    *is_candidate = it->second;
+    return Status::OK();
+  }
+
   // Check if op's output port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
+    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -146,6 +171,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     LOG(WARNING) << "Invalid port: " << port_id << "!\n"
                  << node.DebugString() << "\n"
                  << op->DebugString();
+    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -155,6 +181,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
                        &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
+    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -166,22 +193,35 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     }
   }
 
+  op_device_outport_pinned_to_host_cache->emplace(cache_key, *is_candidate);
+
   return Status::OK();
 }
 
 // Checks if a node's input port is Host friendly.
 // Roughly this means checking if the input port is on Host memory.
-bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
+bool IsNodeInputPortHostFriendly(
+    const NodeDef& node, int port_id,
+    OpDevicePortOnHostMap* op_device_inport_pinned_to_host_cache) {
   // If node is on Host, assume its inputs are Host friendly.
   if (str_util::StrContains(node.device(), DEVICE_CPU)) {
     return true;
   }
 
+  // Check `op_device_inport_pinned_to_host_cache` for our
+  // {op, device, port_id} combo to see if the arg is pinned on Host.
+  std::tuple<string, string, int> cache_key(node.op(), node.device(), port_id);
+  auto it = op_device_inport_pinned_to_host_cache->find(cache_key);
+  if (it != op_device_inport_pinned_to_host_cache->end()) {
+    return it->second;
+  }
+
   // Check if op's input port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
+    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
   const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id);
@@ -192,16 +232,20 @@ bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
       {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
+    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
 
   // Check if the input_arg is pinned to Host.
   for (const string& host_memory_arg : kernel->host_memory_arg()) {
     if (op->input_arg(input_arg_id).name() == host_memory_arg) {
+      op_device_inport_pinned_to_host_cache->emplace(cache_key, true);
       return true;
     }
   }
 
+  op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
+
   return false;
 }
 
@@ -211,18 +255,20 @@ bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
 // 2] Check if node can run on Host.
 // 3] Check all input/outputs are Host "friendly" (atm, friendly means small,
 //    ints, and pinned to Host).
-Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
-                           const NodeDef& node, bool* is_candidate) {
+Status IsNodeHostCandidate(
+    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
+    OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
+    bool* is_candidate) {
   *is_candidate = false;
 
-  // Check if node already on CPU.
-  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
-    *is_candidate = true;
+  // Skip these node types.
+  if (IsBlacklisted(node)) {
     return Status::OK();
   }
 
-  // Skip these node types.
-  if (IsBlacklisted(node)) {
+  // Check if node already on CPU.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    *is_candidate = true;
     return Status::OK();
   }
 
@@ -232,17 +278,6 @@ Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
     return Status::OK();
   }
 
-  // Check all inputs are Host friendly.
-  for (const GraphView::OutputPort& fanin :
-       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
-    bool fanin_candidate = false;
-    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-        graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
-    if (!fanin_candidate) {
-      return Status::OK();
-    }
-  }
-
   // Check all outputs are Host friendly.
   if (!properties->has_properties()) {
     // This is an expensive call, call it lazily.
@@ -255,16 +290,42 @@ Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
     }
   }
 
+  // Check all inputs are Host friendly.
+  for (const GraphView::OutputPort& fanin :
+       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
+    bool fanin_candidate = false;
+    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+        graph, properties, *fanin.node, fanin.port_id,
+        op_device_outport_pinned_to_host_cache, &fanin_candidate));
+    if (!fanin_candidate) {
+      return Status::OK();
+    }
+  }
+
   *is_candidate = true;
   return Status::OK();
 }
 
-string TryFindHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, const string& device) {
+bool IsTPUGraphDef(const GraphDef& def) {
+  for (const auto& node : def.node()) {
+    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
+        node.op() == "TPUPartitionedCall") {
+      return true;
+    }
+  }
+  return false;
+}
+}  // end namespace
+
+// Tries to swap `device` to a Host device from `devices`. Returns true iff
+// there was a swap.
+bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, string* device) {
   // Force this node onto the CPU.
-  if (device.empty() && has_device_cpu) {
-    return "/device:CPU:0";
-  } else if (str_util::StrContains(device, DEVICE_GPU)) {
+  if (device->empty() && has_device_cpu) {
+    *device = "/device:CPU:0";
+    return true;
+  } else if (str_util::StrContains(*device, DEVICE_GPU)) {
     // Sometimes the cluster can have:
     //   devices = {"/device:CPU:0", "/device:XLA_GPU:0"}
     // and we need to handle them properly.
@@ -272,27 +333,19 @@ string TryFindHostDevice(const gtl::FlatSet<string>& devices,
          {std::pair<string, string>("GPU", "CPU:0"),
           std::pair<string, string>("/device", "/device:CPU:0")}) {
       const string device_host =
-          strings::StrCat(device.substr(0, device.rfind(device_match.first)),
+          strings::StrCat(device->substr(0, device->rfind(device_match.first)),
                           device_match.second);
       if (devices.find(device_host) != devices.end()) {
-        return device_host;
+        *device = device_host;
+        return true;
       }
     }
   }
 
-  // We couldn't find an appropriate Host device, return original device.
-  return device;
-}
-
-bool IsTPUGraphDef(const GraphDef& def) {
-  for (const auto& node : def.node()) {
-    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
-        node.op() == "TPUPartitionedCall") {
-      return true;
-    }
-  }
+  // We couldn't find an appropriate Host device, return false.
   return false;
 }
+
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
@@ -324,20 +377,26 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   // All the Const nodes, and their original devices in topological order.
   std::vector<std::pair<NodeDef*, string>> const_nodes;
 
+  // Cache to map {op, device, port} -> bool on whether it is pinned to host.
+  internal::OpDevicePortOnHostMap op_device_outport_pinned_to_host_cache;
+  internal::OpDevicePortOnHostMap op_device_inport_pinned_to_host_cache;
+
   for (auto& node : *optimized_graph->mutable_node()) {
     bool is_candidate = false;
-    TF_RETURN_IF_ERROR(
-        internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate));
+    TF_RETURN_IF_ERROR(internal::IsNodeHostCandidate(
+        graph, &properties, node, &op_device_outport_pinned_to_host_cache,
+        &is_candidate));
     if (!is_candidate) {
       continue;
     }
 
-    if (IsConstant(node)) {
-      const_nodes.emplace_back(&node, node.device());
+    const string original_device = node.device();
+    const bool swapped = internal::TrySwapToHostDevice(devices, has_device_cpu,
+                                                       node.mutable_device());
+    // Keep track of all Const nodes that we swapped.
+    if (swapped && IsConstant(node)) {
+      const_nodes.emplace_back(&node, original_device);
     }
-    // Try and swap the device to Host.
-    node.set_device(
-        internal::TryFindHostDevice(devices, has_device_cpu, node.device()));
   }
 
   // Traverse all `const_nodes`, and map them back to GPU greedily.
@@ -349,8 +408,9 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     // this node back onto the original device.
     for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) {
       // The consumer is not Host friendly, swap it back to the original device.
-      if (!internal::IsNodeInputPortHostFriendly(*fanout.node,
-                                                 fanout.port_id)) {
+      if (!internal::IsNodeInputPortHostFriendly(
+              *fanout.node, fanout.port_id,
+              &op_device_inport_pinned_to_host_cache)) {
         node->set_device(device);
         break;
       }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
index d557a03463..bed4a9ef95 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
@@ -26,8 +26,8 @@ namespace tensorflow {
 namespace grappler {
 namespace internal {
 // Try and find an appropriate Host device in `devices` given `device`.
-string TryFindHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, const string& device);
+bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, string* device);
 }  // end namespace internal
 
 // Optimize TensorFlow ops that should be swapped into the CPU to avoid
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
index 7c64529441..9bb030b220 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -28,30 +28,60 @@ namespace {
 
 class PinToHostOptimizerTest : public GrapplerTest {};
 
-TEST_F(PinToHostOptimizerTest, TryFindHostDevice) {
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceNoDevices) {
   gtl::FlatSet<string> devices = {};
-  EXPECT_EQ("ABC", internal::TryFindHostDevice(devices, false, "ABC"));
-
-  devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
-  EXPECT_EQ(internal::TryFindHostDevice(devices, true, ""), "/device:CPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:0"),
-            "/device:CPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:*"),
-            "/device:CPU:0");
-
-  devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
-            "/device:XLA_CPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
-            "/device:XLA_CPU:0");
-
-  devices = {"/device:XLA_GPU:0"};
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
-            "/device:XLA_GPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
-            "/device:XLA_GPU:*");
+
+  string device = "ABC";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "ABC");
+}
+
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceCpuXlaGpu) {
+  gtl::FlatSet<string> devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
+
+  string device = "";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
+  EXPECT_EQ(device, "/device:CPU:0");
+
+  device = "/device:XLA_GPU:0";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
+  EXPECT_EQ(device, "/device:CPU:0");
+
+  device = "/device:XLA_GPU:*";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
+  EXPECT_EQ(device, "/device:CPU:0");
+}
+
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaCpuXlaGpu) {
+  gtl::FlatSet<string> devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
+
+  string device = "";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_TRUE(device.empty());
+
+  device = "/device:XLA_GPU:0";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_CPU:0");
+
+  device = "/device:XLA_GPU:*";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_CPU:0");
+}
+
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaGpu) {
+  gtl::FlatSet<string> devices = {"/device:XLA_GPU:0"};
+
+  string device = "";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_TRUE(device.empty());
+
+  device = "/device:XLA_GPU:0";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_GPU:0");
+
+  device = "/device:XLA_GPU:*";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_GPU:*");
 }
 
 TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) {
-- 
GitLab


From 411b9baa39636030181fdff15d2e985824b03d61 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Mon, 8 Oct 2018 09:42:50 -0700
Subject: [PATCH 1235/1357] Reduce tolerances for rmsprop_test float16, to fix
 OSS builds.

PiperOrigin-RevId: 216200439
---
 tensorflow/contrib/optimizer_v2/rmsprop_test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
index 44301ffe9e..83f5971039 100644
--- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
@@ -157,8 +157,11 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
         self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
         self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
         self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
-        self.assertAllCloseAccordingToType(var0_np, var0.eval())
-        self.assertAllCloseAccordingToType(var1_np, var1.eval())
+        # TODO(b/117393988): Reduce tolerances for float16.
+        self.assertAllCloseAccordingToType(
+            var0_np, var0.eval(), half_rtol=3e-3, half_atol=3e-3)
+        self.assertAllCloseAccordingToType(
+            var1_np, var1.eval(), half_rtol=3e-3, half_atol=3e-3)
 
   @parameterized.parameters([dtypes.float32, dtypes.float64])
   def testMinimizeSparseResourceVariable(self, dtype):
-- 
GitLab


From f435e776216c7a86f619a17064fd6e1deee638b3 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Mon, 8 Oct 2018 09:49:38 -0700
Subject: [PATCH 1236/1357] Avoid adding spurious ops when colocating with
 resource variables.

Prior to this change, tf.colocate_with(v) would insert spurious operations (a ReadVariableOp and an Identity) in the graph when v is a resource variable, and then
colocate the operations within the block with those newly added, otherwise disconnected, operations.

This commit avoids adding the unnecessary ReadVariableOp/Identity nodes and colocates
operations within the block with the VarHandleOp.

PiperOrigin-RevId: 216201638
---
 .../python/parameter_server_strategy_test.py  |  4 ++-
 tensorflow/python/framework/ops.py            | 28 ++++++++++++++++---
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index 353d11a583..9c112e4f85 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
@@ -262,7 +262,9 @@ class ParameterServerStrategyTestBase(
           h = f + 1.0
         self.assertEqual(
             device_util.canonicalize(u.device), tower_variable_device)
-        self.assertEqual(device_util.canonicalize(x.device), h.device)
+        self.assertEqual(
+            device_util.canonicalize(x.device),
+            device_util.canonicalize(h.device))
         return y_add, z_add, f
 
       y, z, f = d.call_for_each_tower(model_fn)
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 8bb177939e..77c2bc930e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -4140,10 +4140,7 @@ class Graph(object):
     if op is None and not ignore_existing:
       raise ValueError("Trying to reset colocation (op is None) but "
                        "ignore_existing is not True")
-
-    if op is not None and not isinstance(op, Operation):
-      # We always want to colocate with the reference op.
-      op = internal_convert_to_tensor_or_indexed_slices(op, as_ref=True).op
+    op = _op_to_colocate_with(op)
 
     # By default, colocate_with resets the device function stack,
     # since colocate_with is typically used in specific internal
@@ -6168,4 +6165,27 @@ def _operation_conversion_error(op, dtype=None, name=None, as_ref=False):
                                                                name, as_ref))
 
 
+def _op_to_colocate_with(v):
+  """Operation object corresponding to v to use for colocation constraints."""
+  if v is None:
+    return None
+  if isinstance(v, Operation):
+    return v
+  # We always want to colocate with the reference op.
+  # When 'v' is a ResourceVariable, the reference op is the handle creating op.
+  #
+  # What this should be is:
+  # if isinstance(v, ResourceVariable):
+  #   return v.handle.op
+  # However, that would require a circular import dependency.
+  # As of October 2018, there were attempts underway to remove
+  # colocation constraints altogether. Assuming that will
+  # happen soon, perhaps this hack to work around the circular
+  # import dependency is acceptable.
+  if hasattr(v, "handle") and hasattr(v.handle, "op") and isinstance(
+      v.handle.op, Operation):
+    return v.handle.op
+  return internal_convert_to_tensor_or_indexed_slices(v, as_ref=True).op
+
+
 register_tensor_conversion_function(Operation, _operation_conversion_error)
-- 
GitLab


From 87315f41ced19136819cef56ef37636c52c474de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 09:49:59 -0700
Subject: [PATCH 1237/1357] Remove Raises documentation on imperative_grads for
 ValueErrror not raised.

PiperOrigin-RevId: 216201714
---
 tensorflow/python/eager/imperative_grad.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py
index 5f5af4ab6c..5c35860e9d 100644
--- a/tensorflow/python/eager/imperative_grad.py
+++ b/tensorflow/python/eager/imperative_grad.py
@@ -51,11 +51,6 @@ def imperative_grad(
 
   Raises:
     RuntimeError: if something goes wrong.
-    ValueError: if there is no sequence of differentiable operations connecting
-     a source and any target Tensor. This can happen either if the target is
-     not computed based on the source, if the tracing was set up incorrectly,
-     or if only non-differentiable functions of the source were used in the
-     computation of target.
   """
   return pywrap_tensorflow.TFE_Py_TapeGradient(
       tape._tape,  # pylint: disable=protected-access
-- 
GitLab


From 07df147ab20c4a5329148e5fb5f7f6b187cb73a4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 09:50:08 -0700
Subject: [PATCH 1238/1357] Enable PinToHostOptimizer.

PiperOrigin-RevId: 216201732
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index c3d70a1fdf..3f33b16ba8 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -107,7 +107,8 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
   MK_OPT("scoped_allocator",
          new ScopedAllocatorOptimizer(cfg_.scoped_allocator_optimization(),
                                       cfg_.scoped_allocator_opts()));
-  MK_OPT("small_op", new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
+  MK_OPT("pin_to_host",
+         new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
 
   return std::unique_ptr<GraphOptimizer>();
 }
@@ -139,7 +140,7 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
-  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
+  if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
@@ -527,7 +528,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
-         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
+         cfg.pin_to_host_optimization() != RewriterConfig::OFF ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
-- 
GitLab


From da3abf6afeaf781b932bce9ccb6c17da911e49b6 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 8 Oct 2018 09:53:31 -0700
Subject: [PATCH 1239/1357] Benchmark for comparing original cond and cond_v2
 performance.

This benchmark creates many intermediates values, so we can make sure there's no performance overhead (it looks like there might be currently, or it might be from some other difference). It also runs in a defun and in legacy graph mode.

Results from my machine:

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v1_defun"
  iters: 500
  wall_time: 1.25822591782
}

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v2_defun"
  iters: 500
  wall_time: 5.99376106262
}

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v1_graph"
  iters: 500
  wall_time: 2.05277585983
}

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v2_graph"
  iters: 500
  wall_time: 2.84808516502
}

Clearly we have some work to do! I haven't looked into the time differences at all yet.

PiperOrigin-RevId: 216202325
---
 tensorflow/python/BUILD                       |  13 ++
 .../python/ops/control_flow_ops_benchmark.py  | 122 ++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 tensorflow/python/ops/control_flow_ops_benchmark.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index da3c56db92..822d596995 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -5196,6 +5196,19 @@ cuda_py_test(
     main = "ops/concat_benchmark.py",
 )
 
+cuda_py_test(
+    name = "control_flow_ops_benchmark",
+    srcs = ["ops/control_flow_ops_benchmark.py"],
+    additional_deps = [
+        ":client_testlib",
+        ":constant_op",
+        ":control_flow_ops",
+        ":framework_ops",
+        "//tensorflow/python/eager:function",
+    ],
+    main = "ops/control_flow_ops_benchmark.py",
+)
+
 cuda_py_test(
     name = "conv2d_benchmark",
     size = "large",
diff --git a/tensorflow/python/ops/control_flow_ops_benchmark.py b/tensorflow/python/ops/control_flow_ops_benchmark.py
new file mode 100644
index 0000000000..9ba5ff2c0f
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_ops_benchmark.py
@@ -0,0 +1,122 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark for control flow ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class CondWithManyIntermediatesBenchmark(test.Benchmark):
+  """Checks the runtime performance of outputting all intermediates."""
+
+  NUM_INTERMEDIATES = 1000
+  NUM_ITERS = 500
+  NUM_WARM_UP_ITERS = 50
+
+  def _create_cond(self, x):
+
+    def branch_fn():
+      # Use a random value so the adds can't be constant folded.
+      return x + sum(random_ops.random_normal([])
+                     for _ in range(self.NUM_INTERMEDIATES))
+
+    # Use a dynamic predicate to make sure the cond isn't constant folded.
+    return control_flow_ops.cond(math_ops.not_equal(x, -1),
+                                 branch_fn, lambda: 0.0)
+
+  def _benchmark_defun(self):
+    """Benchmarks cond in a defun."""
+
+    @function.defun
+    def cond_fn(x):
+      return self._create_cond(x)
+
+    # Warm up
+    for _ in range(self.NUM_WARM_UP_ITERS):
+      cond_fn(0.0)
+
+    start_time = time.time()
+
+    for _ in range(self.NUM_ITERS):
+      cond_fn(0.0)
+
+    self.report_benchmark(
+        wall_time=time.time() - start_time,
+        iters=self.NUM_ITERS)
+
+  def _benchmark_graph(self):
+    """Benchmarks cond in legacy graph mode."""
+    with context.graph_mode():
+      with ops.Graph().as_default():
+        x = array_ops.placeholder(dtypes.float32)
+        cond_val = self._create_cond(x)
+
+        with session.Session() as sess:
+          cond_fn = sess.make_callable(cond_val, [x])
+
+          # Warm up
+          for _ in range(self.NUM_WARM_UP_ITERS):
+            cond_fn(0.0)
+
+          start_time = time.time()
+
+          for _ in range(self.NUM_ITERS):
+            cond_fn(0.0)
+
+          self.report_benchmark(
+              wall_time=time.time() - start_time,
+              iters=self.NUM_ITERS)
+
+  def benchmark_cond_v1_defun(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = False
+    self._benchmark_defun()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+  def benchmark_cond_v2_defun(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = True
+    self._benchmark_defun()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+  def benchmark_cond_v1_graph(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = False
+    self._benchmark_graph()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+  def benchmark_cond_v2_graph(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = True
+    self._benchmark_graph()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+if __name__ == "__main__":
+  ops.enable_eager_execution()
+  test.main()
-- 
GitLab


From 6dd826b856acf6b060379251bfd91a950ee2b0af Mon Sep 17 00:00:00 2001
From: Makoto Uchida <muchida@google.com>
Date: Mon, 8 Oct 2018 10:00:18 -0700
Subject: [PATCH 1240/1357] Fix typo

PiperOrigin-RevId: 216203408
---
 .../experimental/kernel_tests/reader_dataset_ops_test_base.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
index fe0b3b5f3b..77df8310d4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
@@ -64,7 +64,7 @@ class FixedLengthRecordDatasetTestBase(test_base.DatasetTestBase):
 
 
 class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase):
-  """Base class for setting up and testing `make_batched_feature_dataset`."""
+  """Base class for setting up and testing `make_batched_features_dataset`."""
 
   def setUp(self):
     super(MakeBatchedFeaturesDatasetTestBase, self).setUp()
-- 
GitLab


From 0e1ba8886b6a333b1ed8ed7548c55041c34e9623 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 10:09:50 -0700
Subject: [PATCH 1241/1357] Fix compilation in unique_op when Eigen::Index !=
 int64.

PiperOrigin-RevId: 216205396
---
 tensorflow/core/kernels/unique_op.cc | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index 3559baa18e..3bdcfc90b8 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -108,7 +108,7 @@ class UniqueOp : public OpKernel {
 
       std::unordered_map<T, TIndex> uniq;
       uniq.reserve(2 * N);
-      for (int64 i = 0, j = 0; i < N; ++i) {
+      for (Eigen::Index i = 0, j = 0; i < N; ++i) {
         auto it = uniq.insert(std::make_pair(Tin(i), j));
         idx_vec(i) = it.first->second;
         if (it.second) {
@@ -131,19 +131,20 @@ class UniqueOp : public OpKernel {
       // General implementation when unique is run over multiple elements.
       auto Tin = input.shaped<T, 3>(new_sizes);
 
-      auto hash_fn = [&Tin](const int64& key) {
+      auto hash_fn = [&Tin](const Eigen::Index& key) {
         size_t h = 0;
-        for (int64 i = 0; i < Tin.dimension(0); i++) {
-          for (int64 j = 0; j < Tin.dimension(2); j++) {
+        for (Eigen::Index i = 0; i < Tin.dimension(0); i++) {
+          for (Eigen::Index j = 0; j < Tin.dimension(2); j++) {
             h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
           }
         }
         return h;
       };
 
-      auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
-        for (int64 i = 0; i < Tin.dimension(0); i++) {
-          for (int64 j = 0; j < Tin.dimension(2); j++) {
+      auto equal_to_fn = [&Tin](const Eigen::Index& lhs,
+                                const Eigen::Index& rhs) {
+        for (Eigen::Index i = 0; i < Tin.dimension(0); i++) {
+          for (Eigen::Index j = 0; j < Tin.dimension(2); j++) {
             if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
               return false;
             }
-- 
GitLab


From 0e42fd6d0a88b30ab57959f38c79bea19d745ec3 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 8 Oct 2018 10:14:58 -0700
Subject: [PATCH 1242/1357] [tf.data] Adding specialization for `MapDataset`,
 `ParallelMapDataset`, and `MapAndBatchDataset` whose user-provided functions
 have the property that each output argument take its value directly from an
 input argument (e.g. `lambda x, y: y, x`). This specialization can produce
 the result without having to schedule the function using the executor.

PiperOrigin-RevId: 216206232
---
 tensorflow/core/kernels/data/BUILD            |  14 ++
 tensorflow/core/kernels/data/dataset_utils.cc |  47 +++++
 tensorflow/core/kernels/data/dataset_utils.h  |  20 ++
 .../core/kernels/data/dataset_utils_test.cc   |  46 +++++
 .../core/kernels/data/filter_dataset_op.cc    | 162 ++++++---------
 .../kernels/data/map_and_batch_dataset_op.cc  | 187 +++++++++++-------
 .../core/kernels/data/map_dataset_op.cc       |  62 ++++--
 .../kernels/data/parallel_map_dataset_op.cc   |  79 +++++---
 .../kernels/data/parallel_map_iterator.cc     |  17 +-
 .../core/kernels/data/parallel_map_iterator.h |   2 +-
 .../kernels/data/parse_example_dataset_op.cc  |   2 +-
 .../kernel_tests/map_and_batch_test.py        |  31 +++
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  95 +++++++--
 .../python/data/kernel_tests/test_base.py     |  29 +++
 15 files changed, 565 insertions(+), 230 deletions(-)
 create mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 451f8c1a6c..37c1c54786 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -45,6 +45,16 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "dataset_utils_test",
+    srcs = ["dataset_utils_test.cc"],
+    deps = [
+        ":dataset_utils",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 cc_library(
     name = "captured_function",
     srcs = ["captured_function.cc"],
@@ -205,6 +215,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -232,6 +243,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -245,6 +257,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -285,6 +298,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         ":parallel_map_iterator",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index e10833f525..a40f7f2146 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -15,10 +15,57 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 
 namespace tensorflow {
 namespace data {
 
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices) {
+  FunctionLibraryRuntime::Handle fn_handle;
+  TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate(
+      func.name(), AttrSlice(&func.attr()), &fn_handle));
+  auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() {
+    Status s = ctx->function_library()->ReleaseHandle(fn_handle);
+    if (!s.ok()) {
+      LOG(WARNING) << "Failed to release handle: " << s.error_message();
+    }
+  });
+
+  const FunctionBody* fn_body =
+      ctx->function_library()->GetFunctionBody(fn_handle);
+  indices->resize(fn_body->ret_nodes.size());
+  for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) {
+    Node* ret_node = fn_body->ret_nodes[i];
+    Node* ret_input_node;
+    TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node));
+    if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) {
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i])));
+    } else {
+      indices->clear();
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
+  std::map<int, int> last_use;
+  for (size_t i = 0; i < indices.size(); ++i) {
+    last_use[indices[i]] = i;
+  }
+  std::vector<bool> can_move;
+  can_move.resize(indices.size());
+  for (size_t i = 0; i < indices.size(); ++i) {
+    can_move[i] = last_use[indices[i]] == i;
+  }
+  return can_move;
+}
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 6ec1350cd4..d777062293 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -22,6 +22,26 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
+// This method is used to determine whether we can short-circuit the evaluation
+// of the user-defined function `func`. Short-circuting is possible if every
+// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) =
+// (y,x)`, or `f(x) = (x,x)`).
+//
+// If short-circuiting is possible, the method stores the mapping from output
+// indices to input indices in `indices`. Otherwise, `indices` will be empty.
+//
+// Returns non-ok status if analysis of the function fails.
+//
+// TODO(jsimsa): Extend this to support constants as well.
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices);
+
+// Given a vector that maps output indices to input indices, return a vector
+// that identifies for which output indices can we move the input (assuming
+// output indices are processed left to right).
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
new file mode 100644
index 0000000000..43295b8ebb
--- /dev/null
+++ b/tensorflow/core/kernels/data/dataset_utils_test.cc
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/data/dataset_utils.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+TEST(DatasetUtils, ComputeMoveVector) {
+  struct TestCase {
+    std::vector<int> indices;
+    std::vector<bool> expected;
+  };
+
+  TestCase test_cases[] = {
+      TestCase{{}, {}},
+      TestCase{{1}, {true}},
+      TestCase{{1, 1}, {false, true}},
+      TestCase{{1, 2}, {true, true}},
+      TestCase{{1, 1, 2}, {false, true, true}},
+      TestCase{{1, 2, 2}, {true, false, true}},
+  };
+
+  for (auto& test_case : test_cases) {
+    EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices));
+  }
+}
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index 00884314a9..be7d182a1f 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -31,67 +33,84 @@ namespace {
 
 class FilterDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using FilterIteratorPredicate =
+      std::function<Status(IteratorContext*, std::vector<Tensor>, bool*)>;
+
   explicit FilterDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    FunctionLibraryRuntime::Handle pred_handle;
-    OP_REQUIRES_OK(ctx,
-                   ctx->function_library()->Instantiate(
-                       func_.name(), AttrSlice(&func_.attr()), &pred_handle));
-    auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() {
-      OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle));
-    });
-
-    const FunctionBody* pred_body =
-        ctx->function_library()->GetFunctionBody(pred_handle);
-    OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1,
-                errors::InvalidArgument(
-                    "predicate function must have a single return value."));
-    Node* ret_node = pred_body->ret_nodes[0];
-    Node* ret_input_node;
-    OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node));
-
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    if (ret_input_node->def().op() == "_Arg") {
-      int32 index = -1;
-      OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index));
-      *output = new FilterTensorDataset(ctx, input, func_,
-                                        std::move(captured_func), index);
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+    OP_REQUIRES(ctx, indices.size() <= 1,
+                errors::InvalidArgument(
+                    "predicate function has more than one return value."));
+
+    FilterIteratorPredicate filter_pred;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      filter_pred = [raw_captured_func](IteratorContext* ctx,
+                                        const std::vector<Tensor>& args,
+                                        bool* out_matched) {
+        std::vector<Tensor> result;
+        TF_RETURN_IF_ERROR(
+            raw_captured_func->RunWithBorrowedArgs(ctx, args, &result));
+
+        if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
+            result[0].NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = result[0].scalar<bool>()();
+        return Status::OK();
+      };
     } else {
-      *output = new FilterFunctionDataset(ctx, input, func_,
-                                          std::move(captured_func));
+      filter_pred = [indices](IteratorContext* ctx,
+                              const std::vector<Tensor>& args,
+                              bool* out_matched) {
+        const Tensor& predicate = args[indices[0]];
+        if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = predicate.scalar<bool>()();
+        return Status::OK();
+      };
     }
+
+    *output = new Dataset(ctx, input, func_, std::move(captured_func),
+                          std::move(filter_pred));
   }
 
  private:
-  const int graph_def_version_;
-
-  class FilterDatasetBase : public DatasetBase {
+  class Dataset : public DatasetBase {
    public:
-    FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
-                      const NameAttrList& func,
-                      std::unique_ptr<CapturedFunction> captured_func)
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func,
+            std::unique_ptr<CapturedFunction> captured_func,
+            FilterIteratorPredicate filter_pred)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          filter_pred_(std::move(filter_pred)) {
       input_->Ref();
     }
 
-    ~FilterDatasetBase() override { input_->Unref(); }
+    ~Dataset() override { input_->Unref(); }
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Filter")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Filter")},
+          filter_pred_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -133,17 +152,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
-    virtual Status EvaluatePredicate(IteratorContext* ctx,
-                                     const std::vector<Tensor>& element,
-                                     bool* out_matched) const = 0;
-
    private:
-    class Iterator : public DatasetIterator<FilterDatasetBase> {
+    class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<FilterDatasetBase>(params),
+      explicit Iterator(const Params& params,
+                        FilterIteratorPredicate filter_pred)
+          : DatasetIterator<Dataset>(params),
             filtered_elements_(0),
-            dropped_elements_(0) {
+            dropped_elements_(0),
+            filter_pred_(std::move(filter_pred)) {
         std::vector<string> components =
             str_util::Split(params.prefix, "::", str_util::SkipEmpty());
         prefix_end_ = components.back();
@@ -180,8 +197,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(
-              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -251,64 +267,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       int64 filtered_elements_ GUARDED_BY(mu_);
       int64 dropped_elements_ GUARDED_BY(mu_);
+      const FilterIteratorPredicate filter_pred_;
       string prefix_end_;
     };
 
     const DatasetBase* const input_;
     const NameAttrList func_;
-
-   protected:
     const std::unique_ptr<CapturedFunction> captured_func_;
-  };
-
-  class FilterFunctionDataset : public FilterDatasetBase {
-   public:
-    using FilterDatasetBase::FilterDatasetBase;
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-      // stack-rip the iterators and use async kernels.
-      std::vector<Tensor> result;
-      TF_RETURN_IF_ERROR(
-          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
-
-      if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
-          result[0].NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = result[0].scalar<bool>()();
-      return Status::OK();
-    }
-  };
-
-  class FilterTensorDataset : public FilterDatasetBase {
-   public:
-    FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input,
-                        const NameAttrList& func,
-                        std::unique_ptr<CapturedFunction> captured_func,
-                        int32 index)
-        : FilterDatasetBase(ctx, input, func, std::move(captured_func)),
-          index_(index) {}
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      const Tensor& predicate = element[index_];
-      if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = predicate.scalar<bool>()();
-      return Status::OK();
-    }
-
-   private:
-    const int32 index_;
+    const FilterIteratorPredicate filter_pred_;
   };
 
  private:
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index bf08970560..f45a239793 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -29,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -41,6 +43,10 @@ namespace {
 // transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapAndBatchIteratorFunction =
+      std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
+                         std::shared_ptr<std::vector<Tensor>>, StatusCallback)>;
+
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
       : UnaryDatasetOpKernel(ctx),
         op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) {
@@ -91,31 +97,73 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
-                          drop_remainder, output_types_, output_shapes_, func_,
-                          std::move(captured_func), &ctx->eigen_cpu_device());
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapAndBatchIteratorFunction map_func;
+    CapturedFunction* raw_captured_func = captured_func.get();
+    if (indices.empty()) {
+      map_func = [raw_captured_func](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(),
+                                    std::move(done), prefix);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [raw_captured_func, indices, can_move](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        const std::vector<Tensor>& captured_inputs =
+            raw_captured_func->captured_inputs();
+        size_t num_args = args.size();
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (indices[i] < num_args) {
+            if (can_move[i]) {
+              out_tensors->push_back(std::move(args[indices[i]]));
+            } else {
+              out_tensors->push_back(args[indices[i]]);
+            }
+          } else {
+            out_tensors->push_back(captured_inputs[indices[i] - num_args]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
+    *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_,
+                          std::move(captured_func), &ctx->eigen_cpu_device(),
+                          std::move(map_func));
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func, int64 batch_size,
             int64 num_parallel_calls, bool drop_remainder,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
-            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
-            const Eigen::ThreadPoolDevice* device)
+            const Eigen::ThreadPoolDevice* device,
+            MapAndBatchIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
+          func_(func),
           batch_size_(batch_size),
           num_parallel_calls_(num_parallel_calls),
           drop_remainder_(drop_remainder),
           output_types_(output_types),
           output_shapes_(output_shapes),
-          map_fn_(func),
           captured_func_(std::move(captured_func)),
-          device_(device) {
+          device_(device),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -123,8 +171,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")},
+          map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -143,7 +192,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* batch_size_node;
@@ -165,7 +214,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         other_arguments_types.emplace_back(t.dtype());
       }
       AttrValue f;
-      b->BuildAttrValue(map_fn_, &f);
+      b->BuildAttrValue(func_, &f);
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
@@ -185,12 +234,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
+      explicit Iterator(const Params& params,
+                        MapAndBatchIteratorFunction map_func)
           : DatasetIterator<Dataset>(params),
             mu_(std::make_shared<mutex>()),
             cond_var_(std::make_shared<condition_variable>()),
             num_parallel_calls_(std::make_shared<model::SharedState>(
-                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
+                params.dataset->num_parallel_calls_, mu_, cond_var_)),
+            map_func_(std::move(map_func)) {}
 
       ~Iterator() override {
         mutex_lock l(*mu_);
@@ -297,44 +348,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         int64 num_calls;  // access guarded by owner's mutex
       };
 
-      void Callback(const std::shared_ptr<IteratorContext>& ctx,
-                    const std::shared_ptr<BatchResult>& result,
-                    const std::shared_ptr<std::vector<Tensor>>& return_values,
-                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
-        result->UpdateStatus(status);
-        if (status.ok()) {
-          EnsureOutputAllocated(ctx, result, return_values);
-          for (size_t i = 0; i < return_values->size(); ++i) {
-            const Tensor& tensor = return_values->at(i);
-            Tensor* batch = &(result->output)[i];
-            if (tensor.NumElements() !=
-                (batch->NumElements() / batch->dim_size(0))) {
-              TensorShape batch_shape = batch->shape();
-              batch_shape.RemoveDim(0);
-              result->UpdateStatus(errors::InvalidArgument(
-                  "Cannot add tensor to the batch: number of elements does not "
-                  "match. Shapes are: [tensor]: ",
-                  tensor.shape().DebugString(),
-                  ", [batch]: ", batch_shape.DebugString()));
-              break;
-            }
-            // TODO(mrry): Add a version of DoParallelConcat that allows us to
-            // move `tensor` where possible, to speed up string tensor batching.
-            Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                *dataset()->device_, tensor, offset, batch);
-            if (!copy_status.ok()) {
-              result->UpdateStatus(copy_status);
-              break;
-            }
-          }
-          {
-            mutex_lock l(result->mu);
-            result->num_elements++;
-          }
-        }
-        CallCompleted(result);
-      }
-
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(*mu_) {
         mutex_lock l(*mu_);
@@ -363,21 +376,48 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           return;
         }
 
-        // Call `captured_func_(input_element)`, using `Callback` to store the
-        // result in `result`.
-        (*ctx->runner())(std::bind(
-            [this, result, offset](std::shared_ptr<IteratorContext> ctx,
-                                   std::vector<Tensor> input_element) {
-              std::shared_ptr<std::vector<Tensor>> return_values(
-                  new std::vector<Tensor>());
-              dataset()->captured_func_->RunAsync(
-                  ctx.get(), std::move(input_element), return_values.get(),
-                  [this, ctx, result, return_values, offset](Status status) {
-                    Callback(ctx, result, return_values, offset, status);
-                  },
-                  prefix());
-            },
-            ctx, std::move(input_element)));
+        std::shared_ptr<std::vector<Tensor>> return_values =
+            std::make_shared<std::vector<Tensor>>();
+        auto done = [this, ctx, result, return_values, offset](Status status) {
+          result->UpdateStatus(status);
+          if (status.ok()) {
+            EnsureOutputAllocated(ctx, result, return_values);
+            for (size_t i = 0; i < return_values->size(); ++i) {
+              const Tensor& tensor = return_values->at(i);
+              Tensor* batch = &(result->output)[i];
+              if (tensor.NumElements() !=
+                  (batch->NumElements() / batch->dim_size(0))) {
+                TensorShape batch_shape = batch->shape();
+                batch_shape.RemoveDim(0);
+                result->UpdateStatus(errors::InvalidArgument(
+                    "Cannot add tensor to the batch: number of elements does "
+                    "not match. Shapes are: [tensor]: ",
+                    tensor.shape().DebugString(),
+                    ", [batch]: ", batch_shape.DebugString()));
+                break;
+              }
+              // TODO(mrry): Add a version of DoParallelConcat that allows us to
+              // move `tensor` where possible, to speed up string tensor
+              // batching.
+              Status copy_status = ::tensorflow::functor::DoParallelConcat(
+                  *dataset()->device_, tensor, offset, batch);
+              if (!copy_status.ok()) {
+                result->UpdateStatus(copy_status);
+                break;
+              }
+            }
+            {
+              mutex_lock l(result->mu);
+              result->num_elements++;
+            }
+          }
+          CallCompleted(result);
+        };
+
+        // Apply the map function on `input_element`, storing the result in
+        // `return_values`, and invoking `done` when finished.
+        map_func_(ctx.get(), prefix(), std::move(input_element),
+                  std::move(return_values), std::move(done));
       }
 
       Status CopyPartialBatch(Tensor* output, const Tensor& value,
@@ -404,7 +444,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
-          std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+          auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
           runner_thread_.reset(ctx->env()->StartThread(
               {}, "runner_thread",
               std::bind(&Iterator::RunnerThread, this, ctx_copy)));
@@ -509,8 +549,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
             while (!busy()) {
               if (call_counter_ % dataset()->batch_size_ == 0) {
-                batch_results_.emplace_back(
-                    new BatchResult(dataset()->batch_size_));
+                batch_results_.push_back(
+                    std::make_shared<BatchResult>(dataset()->batch_size_));
               }
               int64 offset = call_counter_++ % dataset()->batch_size_;
               new_calls.emplace_back(batch_results_.back(), offset);
@@ -527,7 +567,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
                              size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
-        batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
+        batch_results_.push_back(
+            std::make_shared<BatchResult>(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -653,6 +694,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
       const std::shared_ptr<model::SharedState> num_parallel_calls_;
+      const MapAndBatchIteratorFunction map_func_;
+
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
@@ -671,9 +714,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     const bool drop_remainder_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
-    const NameAttrList map_fn_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const Eigen::ThreadPoolDevice* device_;  // not owned
+    const MapAndBatchIteratorFunction map_func_;
   };
 
   const int op_version_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index f112e1dc43..6b6ffabf4f 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -17,7 +17,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -28,6 +30,9 @@ namespace {
 
 class MapDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapIteratorFunction = std::function<Status(
+      IteratorContext*, std::vector<Tensor>, std::vector<Tensor>*)>;
+
   explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -43,8 +48,42 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapIteratorFunction map_func;
+    CapturedFunction* raw_captured_func = captured_func.get();
+    if (indices.empty()) {
+      map_func = [raw_captured_func](IteratorContext* ctx,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors) {
+        return raw_captured_func->Run(ctx, std::move(args), out_tensors);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [raw_captured_func, indices, can_move](
+                     IteratorContext* ctx, std::vector<Tensor> args,
+                     std::vector<Tensor>* out_tensors) {
+        const std::vector<Tensor>& captured_inputs =
+            raw_captured_func->captured_inputs();
+        size_t num_args = args.size();
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (indices[i] < num_args) {
+            if (can_move[i]) {
+              out_tensors->push_back(std::move(args[indices[i]]));
+            } else {
+              out_tensors->push_back(args[indices[i]]);
+            }
+          } else {
+            out_tensors->push_back(captured_inputs[indices[i] - num_args]);
+          }
+        }
+        return Status::OK();
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          output_types_, output_shapes_);
+                          output_types_, output_shapes_, std::move(map_func));
   }
 
  private:
@@ -54,13 +93,15 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
             const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes)
+            const std::vector<PartialTensorShape>& output_shapes,
+            MapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
-          output_shapes_(output_shapes) {
+          output_shapes_(output_shapes),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -68,8 +109,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Map")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -116,8 +157,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params) {}
+      explicit Iterator(const Params& params, MapIteratorFunction map_func)
+          : DatasetIterator<Dataset>(params), map_func_(std::move(map_func)) {}
 
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
@@ -139,10 +180,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
           return Status::OK();
         }
 
-        // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-        // stack-rip the iterators and use async kernels.
-        Status s =
-            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
+        Status s = map_func_(ctx, args, out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -167,6 +205,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
+      const MapIteratorFunction map_func_;
     };
 
     const DatasetBase* const input_;
@@ -174,6 +213,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
+    const MapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 6abe6c8338..3a14924fba 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/parallel_map_iterator.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -56,9 +57,55 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    ParallelMapIteratorFunction map_func;
+    CapturedFunction* raw_captured_func = captured_func.get();
+    if (indices.empty()) {
+      map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors,
+                                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors,
+                                    std::move(done), prefix);
+      };
+      if (!use_inter_op_parallelism_) {
+        map_func = [map_func](IteratorContext* ctx, const string& prefix,
+                              std::vector<Tensor> args,
+                              std::vector<Tensor>* out_tensors,
+                              StatusCallback done) {
+          (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args),
+                                     out_tensors, std::move(done)));
+        };
+      }
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [raw_captured_func, indices, can_move](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args, std::vector<Tensor>* out_tensors,
+                     StatusCallback done) {
+        const std::vector<Tensor>& captured_inputs =
+            raw_captured_func->captured_inputs();
+        size_t num_args = args.size();
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (indices[i] < num_args) {
+            if (can_move[i]) {
+              out_tensors->push_back(std::move(args[indices[i]]));
+            } else {
+              out_tensors->push_back(args[indices[i]]);
+            }
+          } else {
+            out_tensors->push_back(captured_inputs[indices[i] - num_args]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
                           output_shapes_, use_inter_op_parallelism_,
-                          std::move(captured_func));
+                          std::move(captured_func), std::move(map_func));
   }
 
  private:
@@ -69,7 +116,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
             bool use_inter_op_parallelism,
-            std::unique_ptr<CapturedFunction> captured_func)
+            std::unique_ptr<CapturedFunction> captured_func,
+            ParallelMapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
@@ -77,7 +125,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           use_inter_op_parallelism_(use_inter_op_parallelism),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -89,26 +138,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
         return captured_func_->Instantiate(ctx);
       };
 
-      const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
-      ParallelMapIteratorFunction map_func =
-          [this, new_prefix](IteratorContext* ctx,
-                             std::vector<Tensor> input_element,
-                             std::vector<Tensor>* result, StatusCallback done) {
-            captured_func_->RunAsync(ctx, std::move(input_element), result,
-                                     std::move(done), new_prefix);
-          };
-      if (!use_inter_op_parallelism_) {
-        map_func = [map_func](
-                       IteratorContext* ctx, std::vector<Tensor> input_element,
-                       std::vector<Tensor>* result, StatusCallback done) {
-          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
-                                     result, std::move(done)));
-        };
-      }
-
-      return NewParallelMapIterator({this, new_prefix}, input_,
-                                    std::move(init_func), std::move(map_func),
-                                    num_parallel_calls_);
+      return NewParallelMapIterator(
+          {this, strings::StrCat(prefix, "::ParallelMap")}, input_,
+          std::move(init_func), map_func_, num_parallel_calls_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -176,6 +208,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
     const bool use_inter_op_parallelism_;
     const std::unique_ptr<CapturedFunction> captured_func_;
+    const ParallelMapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 13bd4b6036..ebf41925c9 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -179,7 +180,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
-      std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+      auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
       runner_thread_.reset(ctx->env()->StartThread(
           {}, "runner_thread",
           std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
@@ -208,15 +209,15 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return;
     }
 
-    // Call `func_(input_element)`, store the result in `result->return_values`,
-    // and notify `result->notification` to unblock a consumer.
     auto done = [this, result](Status status) {
       result->status.Update(status);
       CallCompleted(result);
     };
 
-    map_func_(ctx.get(), std::move(input_element), &result->return_values,
-              std::move(done));
+    // Apply the map function on `input_element`, storing the result in
+    // `result->return_values`, and invoking `done` when finished.
+    map_func_(ctx.get(), prefix(), std::move(input_element),
+              &result->return_values, std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -349,9 +350,9 @@ std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
     ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return std::unique_ptr<IteratorBase>(
-      new ParallelMapIterator(params, input_dataset, std::move(init_func),
-                              std::move(map_func), num_parallel_calls));
+  return MakeUnique<ParallelMapIterator>(
+      params, input_dataset, std::move(init_func), std::move(map_func),
+      num_parallel_calls);
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index dc26c5cf25..813f13c9e4 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -30,7 +30,7 @@ namespace data {
 // 3. A `std::vector<Tensor>*` to which the function will write the result.
 // 4. A `StatusCallback` that should be invoked when the function is complete.
 using ParallelMapIteratorFunction =
-    std::function<void(IteratorContext*, std::vector<Tensor>,
+    std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
                        std::vector<Tensor>*, StatusCallback)>;
 
 // Returns a new iterator that applies `map_func` to the elements of
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 1d1a717062..7de5ea8860 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto map_fn = [this](IteratorContext* ctx,
+      auto map_fn = [this](IteratorContext* ctx, const string& prefix,
                            std::vector<Tensor> input_element,
                            std::vector<Tensor>* result, StatusCallback done) {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index afd0fc3abf..d444c4082e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -332,6 +332,37 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       for _ in range(10):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
+  @parameterized.named_parameters(
+      ("Identity", None, lambda x: x, None),
+      ("Replicate", None, lambda x: (x, x), None),
+      ("Swap", (None, None), lambda x, y: (y, x), None),
+      ("Project", (None, None), lambda x, y: x, None),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().apply(
+        batching.map_and_batch(map_fn, batch_size=10))
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(
+            *sess.run(self.structuredElement(structure, shape=[10])))
+      else:
+        expected = map_fn(
+            sess.run(self.structuredElement(structure, shape=[10])))
+      self.assertAllEqual(expected, sess.run(get_next))
+
+  def testShortCircuitCapturedInput(self):
+    captured_t = array_ops.placeholder(dtypes.int64, shape=[])
+    dataset = self.structuredDataset(None).repeat().apply(
+        batching.map_and_batch(lambda x: captured_t, batch_size=10))
+    iterator = dataset.make_initializable_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={captured_t: 42})
+      self.assertAllEqual([42] * 10, sess.run(get_next))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
index 6b7afafa5d..a0c6b37a6d 100644
--- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
@@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testReturnComponent(self):
+  def testShortCircuit(self):
     iterator = (
         dataset_ops.Dataset.zip(
             (dataset_ops.Dataset.range(10),
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 0c372ebb10..4683b1db91 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -783,19 +783,72 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
 
+  @parameterized.named_parameters(
+      ("SequentialIdentity", None, lambda x: x, None),
+      ("SequentialReplicate", None, lambda x: (x, x), None),
+      ("SequentialSwap", (None, None), lambda x, y: (y, x), None),
+      ("SequentialProject", (None, None), lambda x, y: x, None),
+      ("ParallelIdentity", None, lambda x: x, 10),
+      ("ParallelReplicate", None, lambda x: (x, x), 10),
+      ("ParallelSwap", (None, None), lambda x, y: (y, x), 10),
+      ("ParallelProject", (None, None), lambda x, y: x, 10),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().map(
+        map_fn, num_parallel_calls=num_parallel_calls)
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(*sess.run(self.structuredElement(structure)))
+      else:
+        expected = map_fn(sess.run(self.structuredElement(structure)))
+      self.assertEqual(expected, sess.run(get_next))
+
+  @parameterized.named_parameters(
+      ("Sequential", None),
+      ("Parallel", 10),
+  )
+  def testShortCircuitCapturedInput(self, num_parallel_calls):
+    captured_t = array_ops.placeholder(dtypes.int64, shape=[])
+    dataset = self.structuredDataset(None).repeat().map(
+        lambda x: captured_t, num_parallel_calls=num_parallel_calls)
+    iterator = dataset.make_initializable_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={captured_t: 42})
+      self.assertEqual(42, sess.run(get_next))
+
 
 class MapDatasetBenchmark(test.Benchmark):
 
   def benchmarkChainOfMaps(self):
     chain_lengths = [0, 1, 2, 5, 10, 20, 50]
     for chain_length in chain_lengths:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda x: x
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
           for _ in range(chain_length):
             dataset = dataset_ops.MapDataset(
                 dataset,
-                lambda x: x,
+                map_fn,
                 use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -813,25 +866,39 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset chain length%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", chain_length, median_wall_time))
+                  (print_label, chain_length, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
                 name="benchmark_map_dataset_chain_latency_%d%s" %
-                (chain_length, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                (chain_length, benchmark_label))
 
   def benchmarkMapFanOut(self):
     fan_outs = [1, 2, 5, 10, 20, 50, 100]
     for fan_out in fan_outs:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda *xs: xs
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(
               tuple(0 for _ in range(fan_out))).repeat(None)
           dataset = dataset_ops.MapDataset(
               dataset,
-              lambda *xs: xs,
+              map_fn,
               use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -849,14 +916,12 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset fan out%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", fan_out, median_wall_time))
+                  (print_label, fan_out, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
-                name="benchmark_map_dataset_fan_out_%d%s" %
-                (fan_out, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                name="benchmark_map_dataset_fan_out_%d%s" % (fan_out,
+                                                             benchmark_label))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index b730e10949..b73a94e683 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -19,10 +19,13 @@ from __future__ import print_function
 
 import re
 
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -107,3 +110,29 @@ class DatasetTestBase(test.TestCase):
       with self.assertRaisesRegexp(exception_class,
                                    re.escape(expected_message)):
         self.evaluate(next2())
+
+  def structuredDataset(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns a singleton dataset with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return dataset_ops.Dataset.from_tensors(
+          array_ops.zeros(shape, dtype=dtype))
+    else:
+      return dataset_ops.Dataset.zip(
+          tuple([
+              self.structuredDataset(substructure, shape, dtype)
+              for substructure in structure
+          ]))
+
+  def structuredElement(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns an element with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return array_ops.zeros(shape, dtype=dtype)
+    else:
+      return tuple([
+          self.structuredElement(substructure, shape, dtype)
+          for substructure in structure
+      ])
-- 
GitLab


From a04cd08ee7a8c5245d76a59849e1f7e8ba8a3f52 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 8 Oct 2018 10:20:52 -0700
Subject: [PATCH 1243/1357] Allow TensorSpec objects as arguments to defun's
 get_concrete_function

Will be helpful for specifying serving signatures when exporting SavedModels

PiperOrigin-RevId: 216207284
---
 tensorflow/python/eager/function.py      | 24 +++++----------
 tensorflow/python/eager/function_test.py | 37 ++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index bafe07de2b..93168826b1 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -855,20 +855,12 @@ class Function(object):
     return ret
 
 
-def _get_defun_inputs_from_signature(signature):
-  """Maps a signature to graph-construction inputs."""
-  function_inputs = [
-      graph_placeholder(spec.dtype, spec.shape)
-      for spec in nest.flatten(signature)
-  ]
-  return nest.pack_sequence_as(signature, function_inputs)
-
-
 def _get_defun_inputs_from_args(args):
   """Maps python function args to graph-construction inputs."""
   function_inputs = [
       graph_placeholder(arg.dtype, arg.shape)
-      if isinstance(arg, ops.Tensor) else arg for arg in nest.flatten(args)
+      if isinstance(arg, (ops.Tensor, tensor_spec.TensorSpec))
+      else arg for arg in nest.flatten(args)
   ]
   return nest.pack_sequence_as(args, function_inputs)
 
@@ -912,12 +904,12 @@ def func_graph_from_py_func(name,
   with func_graph.as_default(), AutomaticControlDependencies() as a:
     variable_scope.get_variable_scope().set_use_resource(True)
 
-    if signature is None:
-      func_args = _get_defun_inputs_from_args(args)
-      func_kwargs = _get_defun_inputs_from_args(kwargs)
-    else:
-      func_args = _get_defun_inputs_from_signature(signature)
-      func_kwargs = {}
+    if signature is not None:
+      args = signature
+      kwargs = {}
+
+    func_args = _get_defun_inputs_from_args(args)
+    func_kwargs = _get_defun_inputs_from_args(kwargs)
 
     # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
     # Variables to help check whether mutation happens in calling the function
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index a2cfb4b476..57e545be69 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -172,6 +172,43 @@ class FunctionTest(test.TestCase):
     out = sq_op(t)
     self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
 
+  def testInputSpecGraphFunction(self):
+    matmul = function.defun(math_ops.matmul)
+
+    @function.defun
+    def sq(a):
+      return matmul(a, a)
+
+    sq_op = sq.get_concrete_function(
+        tensor_spec.TensorSpec((None, None), dtypes.float32))
+    self.assertEqual([None, None], sq_op.output_shapes.as_list())
+
+    t1 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+    out1 = sq_op(t1)
+    self.assertAllEqual(out1, math_ops.matmul(t1, t1).numpy())
+
+    t2 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+    out2 = sq_op(t2)
+    self.assertAllEqual(out2, math_ops.matmul(t2, t2).numpy())
+
+  def testNestedInputSpecGraphFunction(self):
+    matmul = function.defun(math_ops.matmul)
+
+    @function.defun
+    def sq(mats):
+      ((a, b),) = mats
+      return matmul(a, b)
+
+    sq_op = sq.get_concrete_function(
+        [(tensor_spec.TensorSpec((None, None), dtypes.float32),
+          tensor_spec.TensorSpec((None, None), dtypes.float32))])
+    self.assertEqual([None, None], sq_op.output_shapes.as_list())
+
+    t1 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+    t2 = constant_op.constant([[1.4, 2.4], [3.4, 4.4]])
+    out = sq_op(t1, t2)  # Flattened structure for inputs to the graph function
+    self.assertAllEqual(out, math_ops.matmul(t1, t2).numpy())
+
   def testExecutingStatelessDefunConcurrently(self):
 
     @function.defun
-- 
GitLab


From 049d98c84ca7474459175914ca49c1fa3c11581d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 10:28:59 -0700
Subject: [PATCH 1244/1357] Wait for shared resources to initialize before
 initializing local resources. shared resources are very similar to global
 variables functionally and they are initialized at the same time but since
 workers are only waiting for global variables being initialized, there is a
 race condition that sometimes the shared resource is not ready.

PiperOrigin-RevId: 216208679
---
 tensorflow/python/training/monitored_session.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index 82f0e3be52..a479f38165 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -195,8 +195,12 @@ class Scaffold(object):
           default_ready_op)
     if self._ready_for_local_init_op is None:
       def default_ready_for_local_init_op():
-        return variables.report_uninitialized_variables(
-            variables.global_variables())
+        return array_ops.concat([
+            variables.report_uninitialized_variables(
+                variables.global_variables()),
+            resources.report_uninitialized_resources(
+                resources.shared_resources())
+        ], 0)
       self._ready_for_local_init_op = Scaffold.get_or_default(
           'ready_for_local_init_op', ops.GraphKeys.READY_FOR_LOCAL_INIT_OP,
           default_ready_for_local_init_op)
-- 
GitLab


From 153decedefc8da1fbd0717f4223b4b053e7aa517 Mon Sep 17 00:00:00 2001
From: Karmel Allison <karmel@google.com>
Date: Mon, 8 Oct 2018 10:36:38 -0700
Subject: [PATCH 1245/1357] Add support for SequenceExamples to
 sequence_feature_columns

PiperOrigin-RevId: 216210141
---
 .../contrib/estimator/python/estimator/rnn.py |  54 +-
 tensorflow/contrib/feature_column/BUILD       |  21 +
 .../feature_column/sequence_feature_column.py |  72 +-
 ...equence_feature_column_integration_test.py | 280 ++++++
 .../sequence_feature_column_test.py           | 912 ++++++++++++------
 .../python/feature_column/feature_column.py   |  53 +-
 tensorflow/python/ops/parsing_ops.py          |  13 +-
 7 files changed, 1018 insertions(+), 387 deletions(-)
 create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py

diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py
index 98660bb731..c595f47395 100644
--- a/tensorflow/contrib/estimator/python/estimator/rnn.py
+++ b/tensorflow/contrib/estimator/python/estimator/rnn.py
@@ -30,7 +30,6 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import partitioned_variables
@@ -92,55 +91,6 @@ def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'):
   return rnn_cell_fn
 
 
-def _concatenate_context_input(sequence_input, context_input):
-  """Replicates `context_input` across all timesteps of `sequence_input`.
-
-  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
-  This value is appended to `sequence_input` on dimension 2 and the result is
-  returned.
-
-  Args:
-    sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size,
-      padded_length, d0]`.
-    context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
-
-  Returns:
-    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
-    d0 + d1]`.
-
-  Raises:
-    ValueError: If `sequence_input` does not have rank 3 or `context_input` does
-      not have rank 2.
-  """
-  seq_rank_check = check_ops.assert_rank(
-      sequence_input,
-      3,
-      message='sequence_input must have rank 3',
-      data=[array_ops.shape(sequence_input)])
-  seq_type_check = check_ops.assert_type(
-      sequence_input,
-      dtypes.float32,
-      message='sequence_input must have dtype float32; got {}.'.format(
-          sequence_input.dtype))
-  ctx_rank_check = check_ops.assert_rank(
-      context_input,
-      2,
-      message='context_input must have rank 2',
-      data=[array_ops.shape(context_input)])
-  ctx_type_check = check_ops.assert_type(
-      context_input,
-      dtypes.float32,
-      message='context_input must have dtype float32; got {}.'.format(
-          context_input.dtype))
-  with ops.control_dependencies(
-      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
-    padded_length = array_ops.shape(sequence_input)[1]
-    tiled_context_input = array_ops.tile(
-        array_ops.expand_dims(context_input, 1),
-        array_ops.concat([[1], [padded_length], [1]], 0))
-  return array_ops.concat([sequence_input, tiled_context_input], 2)
-
-
 def _select_last_activations(activations, sequence_lengths):
   """Selects the nth set of activations for each n in `sequence_length`.
 
@@ -222,8 +172,8 @@ def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns,
         context_input = feature_column_lib.input_layer(
             features=features,
             feature_columns=context_feature_columns)
-        sequence_input = _concatenate_context_input(sequence_input,
-                                                    context_input)
+        sequence_input = seq_fc.concatenate_context_input(
+            context_input, sequence_input)
 
     cell = rnn_cell_fn(mode)
     # Ignore output state.
diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD
index aab7d0c9e8..a926ffd598 100644
--- a/tensorflow/contrib/feature_column/BUILD
+++ b/tensorflow/contrib/feature_column/BUILD
@@ -27,6 +27,7 @@ py_library(
         "//tensorflow/python:check_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:tensor_shape",
@@ -46,9 +47,29 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:parsing_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
         "//tensorflow/python/feature_column",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "sequence_feature_column_integration_test",
+    srcs = ["python/feature_column/sequence_feature_column_integration_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":sequence_feature_column",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:util",
+        "//tensorflow/python/feature_column",
+        "//tensorflow/python/keras:layers",
     ],
 )
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
index 05bcdac2ca..dd6da35ed0 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
@@ -33,7 +33,6 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variable_scope
 
 # pylint: disable=protected-access
-# TODO(b/73827486): Support SequenceExample.
 
 
 def sequence_input_layer(
@@ -110,6 +109,7 @@ def sequence_input_layer(
     output_tensors = []
     sequence_lengths = []
     ordered_columns = []
+
     for column in sorted(feature_columns, key=lambda x: x.name):
       ordered_columns.append(column)
       with variable_scope.variable_scope(
@@ -121,17 +121,67 @@ def sequence_input_layer(
         # Flattens the final dimension to produce a 3D Tensor.
         num_elements = column._variable_shape.num_elements()
         shape = array_ops.shape(dense_tensor)
+        target_shape = [shape[0], shape[1], num_elements]
         output_tensors.append(
-            array_ops.reshape(
-                dense_tensor,
-                shape=array_ops.concat([shape[:2], [num_elements]], axis=0)))
+            array_ops.reshape(dense_tensor, shape=target_shape))
         sequence_lengths.append(sequence_length)
+
     fc._verify_static_batch_size_equality(output_tensors, ordered_columns)
     fc._verify_static_batch_size_equality(sequence_lengths, ordered_columns)
     sequence_length = _assert_all_equal_and_return(sequence_lengths)
+
     return array_ops.concat(output_tensors, -1), sequence_length
 
 
+def concatenate_context_input(context_input, sequence_input):
+  """Replicates `context_input` across all timesteps of `sequence_input`.
+
+  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
+  This value is appended to `sequence_input` on dimension 2 and the result is
+  returned.
+
+  Args:
+    context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
+    sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size,
+      padded_length, d0]`.
+
+  Returns:
+    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
+    d0 + d1]`.
+
+  Raises:
+    ValueError: If `sequence_input` does not have rank 3 or `context_input` does
+      not have rank 2.
+  """
+  seq_rank_check = check_ops.assert_rank(
+      sequence_input,
+      3,
+      message='sequence_input must have rank 3',
+      data=[array_ops.shape(sequence_input)])
+  seq_type_check = check_ops.assert_type(
+      sequence_input,
+      dtypes.float32,
+      message='sequence_input must have dtype float32; got {}.'.format(
+          sequence_input.dtype))
+  ctx_rank_check = check_ops.assert_rank(
+      context_input,
+      2,
+      message='context_input must have rank 2',
+      data=[array_ops.shape(context_input)])
+  ctx_type_check = check_ops.assert_type(
+      context_input,
+      dtypes.float32,
+      message='context_input must have dtype float32; got {}.'.format(
+          context_input.dtype))
+  with ops.control_dependencies(
+      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
+    padded_length = array_ops.shape(sequence_input)[1]
+    tiled_context_input = array_ops.tile(
+        array_ops.expand_dims(context_input, 1),
+        array_ops.concat([[1], [padded_length], [1]], 0))
+  return array_ops.concat([sequence_input, tiled_context_input], 2)
+
+
 def sequence_categorical_column_with_identity(
     key, num_buckets, default_value=None):
   """Returns a feature column that represents sequences of integers.
@@ -453,9 +503,17 @@ class _SequenceNumericColumn(
         [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape],
         axis=0)
     dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)
-    sequence_length = fc._sequence_length_from_sparse_tensor(
-        sp_tensor, num_elements=self._variable_shape.num_elements())
+
+    # Get the number of timesteps per example
+    # For the 2D case, the raw values are grouped according to num_elements;
+    # for the 3D case, the grouping happens in the third dimension, and
+    # sequence length is not affected.
+    num_elements = (self._variable_shape.num_elements()
+                    if sp_tensor.shape.ndims == 2 else 1)
+    seq_length = fc._sequence_length_from_sparse_tensor(
+        sp_tensor, num_elements=num_elements)
+
     return fc._SequenceDenseColumn.TensorSequenceLengthPair(
-        dense_tensor=dense_tensor, sequence_length=sequence_length)
+        dense_tensor=dense_tensor, sequence_length=seq_length)
 
 # pylint: enable=protected-access
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py
new file mode 100644
index 0000000000..d8ca363627
--- /dev/null
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py
@@ -0,0 +1,280 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Integration test for sequence feature columns with SequenceExamples."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import string
+import tempfile
+
+from google.protobuf import text_format
+
+from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.keras.layers import recurrent
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class SequenceFeatureColumnIntegrationTest(test.TestCase):
+
+  def _make_sequence_example(self):
+    example = example_pb2.SequenceExample()
+    example.context.feature['int_ctx'].int64_list.value.extend([5])
+    example.context.feature['float_ctx'].float_list.value.extend([123.6])
+    for val in range(0, 10, 2):
+      feat = feature_pb2.Feature()
+      feat.int64_list.value.extend([val] * val)
+      example.feature_lists.feature_list['int_list'].feature.extend([feat])
+    for val in range(1, 11, 2):
+      feat = feature_pb2.Feature()
+      feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
+      example.feature_lists.feature_list['str_list'].feature.extend([feat])
+
+    return example
+
+  def _build_feature_columns(self):
+    col = fc.categorical_column_with_identity(
+        'int_ctx', num_buckets=100)
+    ctx_cols = [
+        fc.embedding_column(col, dimension=10),
+        fc.numeric_column('float_ctx')]
+
+    identity_col = sfc.sequence_categorical_column_with_identity(
+        'int_list', num_buckets=10)
+    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
+        'bytes_list', hash_bucket_size=100)
+    seq_cols = [
+        fc.embedding_column(identity_col, dimension=10),
+        fc.embedding_column(bucket_col, dimension=20)]
+
+    return ctx_cols, seq_cols
+
+  def test_sequence_example_into_input_layer(self):
+    examples = [_make_sequence_example().SerializeToString()] * 100
+    ctx_cols, seq_cols = self._build_feature_columns()
+
+    def _parse_example(example):
+      ctx, seq = parsing_ops.parse_single_sequence_example(
+          example,
+          context_features=fc.make_parse_example_spec(ctx_cols),
+          sequence_features=fc.make_parse_example_spec(seq_cols))
+      ctx.update(seq)
+      return ctx
+
+    ds = dataset_ops.Dataset.from_tensor_slices(examples)
+    ds = ds.map(_parse_example)
+    ds = ds.batch(20)
+
+    # Test on a single batch
+    features = ds.make_one_shot_iterator().get_next()
+
+    # Tile the context features across the sequence features
+    seq_layer, _ = sfc.sequence_input_layer(features, seq_cols)
+    ctx_layer = fc.input_layer(features, ctx_cols)
+    input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
+
+    rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
+    output = rnn_layer(input_layer)
+
+    with self.cached_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      features_r = sess.run(features)
+      self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
+
+      output_r = sess.run(output)
+      self.assertAllEqual(output_r.shape, [20, 10])
+
+
+class SequenceExampleParsingTest(test.TestCase):
+
+  def test_seq_ex_in_sequence_categorical_column_with_identity(self):
+    self._test_parsed_sequence_example(
+        'int_list', sfc.sequence_categorical_column_with_identity,
+        10, [3, 6], [2, 4, 6])
+
+  def test_seq_ex_in_sequence_categorical_column_with_hash_bucket(self):
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_hash_bucket,
+        10, [3, 4], [compat.as_bytes(x) for x in 'acg'])
+
+  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_list(self):
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_list,
+        list(string.ascii_lowercase), [3, 4],
+        [compat.as_bytes(x) for x in 'acg'])
+
+  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_file(self):
+    _, fname = tempfile.mkstemp()
+    with open(fname, 'w') as f:
+      f.write(string.ascii_lowercase)
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_file,
+        fname, [3, 4], [compat.as_bytes(x) for x in 'acg'])
+
+  def _test_parsed_sequence_example(
+      self, col_name, col_fn, col_arg, shape, values):
+    """Helper function to check that each FeatureColumn parses correctly.
+
+    Args:
+      col_name: string, name to give to the feature column. Should match
+        the name that the column will parse out of the features dict.
+      col_fn: function used to create the feature column. For example,
+        sequence_numeric_column.
+      col_arg: second arg that the target feature column is expecting.
+      shape: the expected dense_shape of the feature after parsing into
+        a SparseTensor.
+      values: the expected values at index [0, 2, 6] of the feature
+        after parsing into a SparseTensor.
+    """
+    example = _make_sequence_example()
+    columns = [
+        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
+        fc.numeric_column('float_ctx'),
+        col_fn(col_name, col_arg)
+    ]
+    context, seq_features = parsing_ops.parse_single_sequence_example(
+        example.SerializeToString(),
+        context_features=fc.make_parse_example_spec(columns[:2]),
+        sequence_features=fc.make_parse_example_spec(columns[2:]))
+
+    with self.cached_session() as sess:
+      ctx_result, seq_result = sess.run([context, seq_features])
+      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
+      self.assertEqual(
+          list(seq_result[col_name].values[[0, 2, 6]]), values)
+      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
+      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
+      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
+      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
+
+
+_SEQ_EX_PROTO = """
+context {
+  feature {
+    key: "float_ctx"
+    value {
+      float_list {
+        value: 123.6
+      }
+    }
+  }
+  feature {
+    key: "int_ctx"
+    value {
+      int64_list {
+        value: 5
+      }
+    }
+  }
+}
+feature_lists {
+  feature_list {
+    key: "bytes_list"
+    value {
+      feature {
+        bytes_list {
+          value: "a"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "b"
+          value: "c"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "d"
+          value: "e"
+          value: "f"
+          value: "g"
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "float_list"
+    value {
+      feature {
+        float_list {
+          value: 1.0
+        }
+      }
+      feature {
+        float_list {
+          value: 3.0
+          value: 3.0
+          value: 3.0
+        }
+      }
+      feature {
+        float_list {
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "int_list"
+    value {
+      feature {
+        int64_list {
+          value: 2
+          value: 2
+        }
+      }
+      feature {
+        int64_list {
+          value: 4
+          value: 4
+          value: 4
+          value: 4
+        }
+      }
+      feature {
+        int64_list {
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+        }
+      }
+    }
+  }
+}
+"""
+
+
+def _make_sequence_example():
+  example = example_pb2.SequenceExample()
+  return text_format.Parse(_SEQ_EX_PROTO, example)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
index 45d7b74046..929e83523a 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc
@@ -28,28 +29,61 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import monitored_session
 
 
-class SequenceInputLayerTest(test.TestCase):
+class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # example 0, ids [1]
+           # example 1, ids [2, 0]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(1, 2, 0),
+           dense_shape=(2, 2)),
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [2, 0]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           indices=(
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 0, 0, 1),
+           dense_shape=(2, 2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[2], [0]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(1, 1, 1, 2, 0),
+           dense_shape=(2, 2, 2)),
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
+           # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  def test_embedding_column(
+      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      expected_sequence_length):
 
-  def test_embedding_column(self):
     vocabulary_size = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [1]
-        # example 1, ids [2, 0]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(1, 2, 0),
-        dense_shape=(2, 2))
-
     embedding_dimension_a = 2
     embedding_values_a = (
         (1., 2.),  # id 0
@@ -70,14 +104,6 @@ class SequenceInputLayerTest(test.TestCase):
         return embedding_values
       return _initializer
 
-    expected_input_layer = [
-        # example 0, ids_a [2], ids_b [1]
-        [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
-        # example 1, ids_a [0, 1], ids_b [2, 0]
-        [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],
-    ]
-    expected_sequence_length = [1, 2]
-
     categorical_column_a = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column_a = fc.embedding_column(
@@ -233,29 +259,53 @@ class SequenceInputLayerTest(test.TestCase):
           },
           feature_columns=shared_embedding_columns)
 
-  def test_indicator_column(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # example 0, ids [1]
+           # example 1, ids [1, 0]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(1, 1, 0),
+           dense_shape=(2, 2)),
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [1, 0]
+           [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           indices=(
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 0, 0, 1),
+           dense_shape=(2, 2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[1], [0]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(1, 1, 1, 1, 0),
+           dense_shape=(2, 2, 2)),
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
+           # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
+           [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  def test_indicator_column(
+      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      expected_sequence_length):
     vocabulary_size_a = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
     vocabulary_size_b = 2
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [1]
-        # example 1, ids [1, 0]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(1, 1, 0),
-        dense_shape=(2, 2))
-
-    expected_input_layer = [
-        # example 0, ids_a [2], ids_b [1]
-        [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
-        # example 1, ids_a [0, 1], ids_b [1, 0]
-        [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]],
-    ]
-    expected_sequence_length = [1, 2]
 
     categorical_column_a = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size_a)
@@ -298,18 +348,32 @@ class SequenceInputLayerTest(test.TestCase):
           features={'aaa': sparse_input},
           feature_columns=[indicator_column_a])
 
-  def test_numeric_column(self):
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0.], [1]]
-        # example 1, [[10.]]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    expected_input_layer = [
-        [[0.], [1.]],
-        [[10.], [0.]],
-    ]
-    expected_sequence_length = [2, 1]
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [0., 1]
+           # example 1, [10.]
+           indices=((0, 0), (0, 1), (1, 0)),
+           values=(0., 1., 10.),
+           dense_shape=(2, 2)),
+       'expected_input_layer': [
+           [[0.], [1.]],
+           [[10.], [0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[20, 3], [5]]
+           # feature 1, ids [[3], [8]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(20, 3, 5., 3., 8.),
+           dense_shape=(2, 2, 2)),
+       'expected_input_layer': [
+           [[20.], [3.], [5.], [0.]],
+           [[3.], [0.], [8.], [0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  def test_numeric_column(
+      self, sparse_input, expected_input_layer, expected_sequence_length):
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -321,21 +385,38 @@ class SequenceInputLayerTest(test.TestCase):
       self.assertAllEqual(
           expected_sequence_length, sequence_length.eval(session=sess))
 
-  def test_numeric_column_multi_dim(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
+           # example 1, [10., 11., 12., 13.]
+           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 8)),
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 2, 4)),
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      )
+  def test_numeric_column_multi_dim(
+      self, sparse_input, expected_input_layer, expected_sequence_length):
     """Tests sequence_input_layer for multi-dimensional numeric_column."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
-        # example 1, [[[10., 11.],  [12., 13.]]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7),
-                 (1, 0), (1, 1), (1, 2), (1, 3)),
-        values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-        dense_shape=(2, 8))
-    # The output of numeric_column._get_dense_tensor should be flattened.
-    expected_input_layer = [
-        [[0., 1., 2., 3.], [4., 5., 6., 7.]],
-        [[10., 11., 12., 13.], [0., 0., 0., 0.]],
-    ]
-    expected_sequence_length = [2, 1]
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -377,6 +458,134 @@ class SequenceInputLayerTest(test.TestCase):
           r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'):
         sess.run(sequence_length)
 
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
+           # example 1, [[[10., 11.],  [12., 13.]]]
+           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 8)),
+       'expected_shape': [2, 2, 4]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2),
+                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 2, 4)),
+       'expected_shape': [2, 2, 4]},
+      )
+  def test_static_shape_from_tensors_numeric(
+      self, sparse_input, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
+
+    input_layer, _ = sfc.sequence_input_layer(
+        features={'aaa': sparse_input},
+        feature_columns=[numeric_column])
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
+           values=(2, 0, 1, 1),
+           dense_shape=(4, 2)),
+       'expected_shape': [4, 2, 3]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [0, 2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           values=(2, 0, 1, 2, 1, 0, 2),
+           dense_shape=(4, 2, 2)),
+       'expected_shape': [4, 2, 3]}
+      )
+  def test_static_shape_from_tensors_indicator(
+      self, sparse_input, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    indicator_column = fc.indicator_column(categorical_column)
+
+    input_layer, _ = sfc.sequence_input_layer(
+        features={'aaa': sparse_input}, feature_columns=[indicator_column])
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+
+class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
+  """Tests the utility fn concatenate_context_input."""
+
+  def test_concatenate_context_input(self):
+    seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2))
+    context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    input_layer = sfc.concatenate_context_input(context_input, seq_input)
+
+    expected = np.array([
+        [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]],
+        [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]]
+    ], dtype=np.float32)
+    with monitored_session.MonitoredSession() as sess:
+      output = sess.run(input_layer)
+      self.assertAllEqual(expected, output)
+
+  @parameterized.named_parameters(
+      {'testcase_name': 'rank_lt_3',
+       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(10, 10))},
+      {'testcase_name': 'rank_gt_3',
+       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 2, 2))}
+      )
+  def test_sequence_input_throws_error(self, seq_input):
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(ValueError, 'sequence_input must have rank 3'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  @parameterized.named_parameters(
+      {'testcase_name': 'rank_lt_2',
+       'context_input': ops.convert_to_tensor(np.arange(100))},
+      {'testcase_name': 'rank_gt_2',
+       'context_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))}
+      )
+  def test_context_input_throws_error(self, context_input):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(ValueError, 'context_input must have rank 2'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  def test_integer_seq_input_throws_error(self):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(
+        TypeError, 'sequence_input must have dtype float32'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  def test_integer_context_input_throws_error(self):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(
+        TypeError, 'context_input must have dtype float32'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
 
 class InputLayerTest(test.TestCase):
   """Tests input_layer with sequence feature columns."""
@@ -443,75 +652,79 @@ def _assert_sparse_tensor_indices_shape(test_case, expected, actual):
   test_case.assertAllEqual(expected.dense_shape, actual.dense_shape)
 
 
-class SequenceCategoricalColumnWithIdentityTest(test.TestCase):
-
-  def test_get_sparse_tensors(self):
-    column = sfc.sequence_categorical_column_with_identity(
-        'aaa', num_buckets=3)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(1, 2, 0),
-        dense_shape=(2, 2))
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=np.array((1, 2, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
+class SequenceCategoricalColumnWithIdentityTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(1, 2, 0),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           values=np.array((1, 2, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=(6, 7, 8),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=(6, 7, 8),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
+    column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_value(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
-
-  def test_get_sparse_tensors_inputs3d(self):
-    """Tests _get_sparse_tensors when the input is already 3D Tensor."""
-    column = sfc.sequence_categorical_column_with_identity(
-        'aaa', num_buckets=3)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=(1, 2, 0),
-        dense_shape=(2, 2, 1))
-
-    with self.assertRaisesRegexp(
-        errors.InvalidArgumentError,
-        r'Column aaa expected ID tensor of rank 2\.\s*'
-        r'id_tensor shape:\s*\[2 2 1\]'):
-      id_weight_pair = column._get_sparse_tensors(
-          _LazyBuilder({'aaa': inputs}))
-      with monitored_session.MonitoredSession() as sess:
-        id_weight_pair.id_tensor.eval(session=sess)
-
-
-class SequenceCategoricalColumnWithHashBucketTest(test.TestCase):
-
-  def test_get_sparse_tensors(self):
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
+
+
+class SequenceCategoricalColumnWithHashBucketTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=('omar', 'stringer', 'marlo'),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           # Ignored to avoid hash dependence in test.
+           values=np.array((0, 0, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=('omar', 'stringer', 'marlo'),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           # Ignored to avoid hash dependence in test.
+           values=np.array((0, 0, 0), dtype=np.int64),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
     column = sfc.sequence_categorical_column_with_hash_bucket(
         'aaa', hash_bucket_size=10)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=('omar', 'stringer', 'marlo'),
-        dense_shape=(2, 2))
-
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        # Ignored to avoid hash dependence in test.
-        values=np.array((0, 0, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_indices_shape(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
 
 
-class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase):
+class SequenceCategoricalColumnWithVocabularyFileTest(
+    test.TestCase, parameterized.TestCase):
 
   def _write_vocab(self, vocab_strings, file_name):
     vocab_file = os.path.join(self.get_temp_dir(), file_name)
@@ -527,68 +740,120 @@ class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase):
                                                         'wire_vocabulary.txt')
     self._wire_vocabulary_size = 3
 
-  def test_get_sparse_tensors(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=('marlo', 'skywalker', 'omar'),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           values=np.array((2, -1, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=('omar', 'skywalker', 'marlo'),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=np.array((0, -1, 2), dtype=np.int64),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
     column = sfc.sequence_categorical_column_with_vocabulary_file(
         key='aaa',
         vocabulary_file=self._wire_vocabulary_file_name,
         vocabulary_size=self._wire_vocabulary_size)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=('marlo', 'skywalker', 'omar'),
-        dense_shape=(2, 2))
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=np.array((2, -1, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_value(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
-
-
-class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase):
-
-  def test_get_sparse_tensors(self):
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
+
+
+class SequenceCategoricalColumnWithVocabularyListTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=('marlo', 'skywalker', 'omar'),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           values=np.array((2, -1, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=('omar', 'skywalker', 'marlo'),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=np.array((0, -1, 2), dtype=np.int64),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
     column = sfc.sequence_categorical_column_with_vocabulary_list(
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'))
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=('marlo', 'skywalker', 'omar'),
-        dense_shape=(2, 2))
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=np.array((2, -1, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_value(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
-
-
-class SequenceEmbeddingColumnTest(test.TestCase):
-
-  def test_get_sequence_dense_tensor(self):
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
+
+
+class SequenceEmbeddingColumnTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
+           values=(2, 0, 1, 1),
+           dense_shape=(4, 2)),
+       'expected': [
+           # example 0, ids [2]
+           [[7., 11.], [0., 0.]],
+           # example 1, ids [0, 1]
+           [[1., 2.], [3., 5.]],
+           # example 2, ids []
+           [[0., 0.], [0., 0.]],
+           # example 3, ids [1]
+           [[3., 5.], [0., 0.]]]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [0, 2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           values=(2, 0, 1, 2, 1, 0, 2),
+           dense_shape=(4, 2, 2)),
+       'expected': [
+           # example 0, ids [[2]]
+           [[7., 11.], [0., 0.]],
+           # example 1, ids [[0, 1], [2]]
+           [[2, 3.5], [7., 11.]],
+           # example 2, ids []
+           [[0., 0.], [0., 0.]],
+           # example 3, ids [[1], [0, 2]]
+           [[3., 5.], [4., 6.5]]]}
+      )
+  def test_get_sequence_dense_tensor(self, inputs, expected):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 2))
-
     embedding_dimension = 2
     embedding_values = (
         (1., 2.),  # id 0
@@ -601,17 +866,6 @@ class SequenceEmbeddingColumnTest(test.TestCase):
       self.assertIsNone(partition_info)
       return embedding_values
 
-    expected_lookups = [
-        # example 0, ids [2]
-        [[7., 11.], [0., 0.]],
-        # example 1, ids [0, 1]
-        [[1., 2.], [3., 5.]],
-        # example 2, ids []
-        [[0., 0.], [0., 0.]],
-        # example 3, ids [1]
-        [[3., 5.], [0., 0.]],
-    ]
-
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column = fc.embedding_column(
@@ -619,24 +873,35 @@ class SequenceEmbeddingColumnTest(test.TestCase):
         initializer=_initializer)
 
     embedding_lookup, _ = embedding_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
     self.assertItemsEqual(
         ('embedding_weights:0',), tuple([v.name for v in global_vars]))
     with monitored_session.MonitoredSession() as sess:
       self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
-      self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
-
-  def test_sequence_length(self):
+      self.assertAllEqual(expected, embedding_lookup.eval(session=sess))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 2),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2]}
+      )
+  def test_sequence_length(self, inputs, expected_sequence_length):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    expected_sequence_length = [1, 2]
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
@@ -644,7 +909,7 @@ class SequenceEmbeddingColumnTest(test.TestCase):
         categorical_column, dimension=2)
 
     _, sequence_length = embedding_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
@@ -855,56 +1120,87 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase):
           expected_sequence_length_b, sequence_length_b.eval(session=sess))
 
 
-class SequenceIndicatorColumnTest(test.TestCase):
-
-  def test_get_sequence_dense_tensor(self):
+class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
+           values=(2, 0, 1, 1),
+           dense_shape=(4, 2)),
+       'expected': [
+           # example 0, ids [2]
+           [[0., 0., 1.], [0., 0., 0.]],
+           # example 1, ids [0, 1]
+           [[1., 0., 0.], [0., 1., 0.]],
+           # example 2, ids []
+           [[0., 0., 0.], [0., 0., 0.]],
+           # example 3, ids [1]
+           [[0., 1., 0.], [0., 0., 0.]]]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [2, 2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           values=(2, 0, 1, 2, 1, 2, 2),
+           dense_shape=(4, 2, 2)),
+       'expected': [
+           # example 0, ids [[2]]
+           [[0., 0., 1.], [0., 0., 0.]],
+           # example 1, ids [[0, 1], [2]]
+           [[1., 1., 0.], [0., 0., 1.]],
+           # example 2, ids []
+           [[0., 0., 0.], [0., 0., 0.]],
+           # example 3, ids [[1], [2, 2]]
+           [[0., 1., 0.], [0., 0., 2.]]]}
+      )
+  def test_get_sequence_dense_tensor(self, inputs, expected):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 2))
-
-    expected_lookups = [
-        # example 0, ids [2]
-        [[0., 0., 1.], [0., 0., 0.]],
-        # example 1, ids [0, 1]
-        [[1., 0., 0.], [0., 1., 0.]],
-        # example 2, ids []
-        [[0., 0., 0.], [0., 0., 0.]],
-        # example 3, ids [1]
-        [[0., 1., 0.], [0., 0., 0.]],
-    ]
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     indicator_column = fc.indicator_column(categorical_column)
 
     indicator_tensor, _ = indicator_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess))
-
-  def test_sequence_length(self):
+      self.assertAllEqual(expected, indicator_tensor.eval(session=sess))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 2),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2]}
+      )
+  def test_sequence_length(self, inputs, expected_sequence_length):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    expected_sequence_length = [1, 2]
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     indicator_column = fc.indicator_column(categorical_column)
 
     _, sequence_length = indicator_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
@@ -938,7 +1234,7 @@ class SequenceIndicatorColumnTest(test.TestCase):
           expected_sequence_length, sequence_length.eval(session=sess))
 
 
-class SequenceNumericColumnTest(test.TestCase):
+class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   def test_defaults(self):
     a = sfc.sequence_numeric_column('aaa')
@@ -971,25 +1267,36 @@ class SequenceNumericColumnTest(test.TestCase):
     with self.assertRaisesRegexp(TypeError, 'must be a callable'):
       sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')
 
-  def test_get_sequence_dense_tensor(self):
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0.], [1]]
-        # example 1, [[10.]]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    expected_dense_tensor = [
-        [[0.], [1.]],
-        [[10.], [0.]],
-    ]
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, values [0., 1]
+           # example 1, [10.]
+           indices=((0, 0), (0, 1), (1, 0)),
+           values=(0., 1., 10.),
+           dense_shape=(2, 2)),
+       'expected': [
+           [[0.], [1.]],
+           [[10.], [0.]]]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[20, 3], [5]]
+           # feature 1, ids [[3], [8]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(20, 3, 5., 3., 8.),
+           dense_shape=(2, 2, 2)),
+       'expected': [
+           [[20.], [3.], [5.], [0.]],
+           [[3.], [0.], [8.], [0.]]]},
+      )
+  def test_get_sequence_dense_tensor(self, inputs, expected):
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_dense_tensor, dense_tensor.eval(session=sess))
+      self.assertAllEqual(expected, dense_tensor.eval(session=sess))
 
   def test_get_sequence_dense_tensor_with_normalizer_fn(self):
 
@@ -1026,41 +1333,34 @@ class SequenceNumericColumnTest(test.TestCase):
       self.assertAllEqual(
           expected_dense_tensor, dense_tensor.eval(session=sess))
 
-  def test_get_sequence_dense_tensor_with_shape(self):
-    """Tests get_sequence_dense_tensor with shape !=(1,)."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0., 1., 2.], [3., 4., 5.]]
-        # example 1, [[10., 11., 12.]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
-                 (1, 0), (1, 1), (1, 2)),
-        values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
-        dense_shape=(2, 6))
-    expected_dense_tensor = [
-        [[0., 1., 2.], [3., 4., 5.]],
-        [[10., 11., 12.], [0., 0., 0.]],
-    ]
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,))
-
-    dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_dense_tensor, dense_tensor.eval(session=sess))
-
-  def test_get_dense_tensor_multi_dim(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
+           # example 1, [[[10., 11.],  [12., 13.]]]
+           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 8)),
+       'expected_dense_tensor': [
+           [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
+           [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
+                    (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
+                    (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 2, 8)),
+       'expected_dense_tensor': [
+           [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]],
+            [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]],
+           [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]],
+            [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]},
+      )
+  def test_get_dense_tensor_multi_dim(
+      self, sparse_input, expected_dense_tensor):
     """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
-        # example 1, [[[10., 11.],  [12., 13.]]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7),
-                 (1, 0), (1, 1), (1, 2), (1, 3)),
-        values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-        dense_shape=(2, 8))
-    expected_dense_tensor = [
-        [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
-        [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]],
-    ]
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
@@ -1070,43 +1370,55 @@ class SequenceNumericColumnTest(test.TestCase):
       self.assertAllEqual(
           expected_dense_tensor, dense_tensor.eval(session=sess))
 
-  def test_sequence_length(self):
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0., 1., 2.], [3., 4., 5.]]
-        # example 1, [[10., 11., 12.]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
-                 (1, 0), (1, 1), (1, 2)),
-        values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
-        dense_shape=(2, 6))
-    expected_sequence_length = [2, 1]
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,))
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2., 0., 1.),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 2],
+       'shape': (1,)},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2., 0., 1., 2.),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2],
+       'shape': (1,)},
+      {'testcase_name': '2D_with_shape',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2., 0., 1.),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 1],
+       'shape': (2,)},
+      {'testcase_name': '3D_with_shape',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2., 0., 1., 2.),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2],
+       'shape': (2,)},
+      )
+  def test_sequence_length(self, inputs, expected_sequence_length, shape):
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=shape)
 
     _, sequence_length = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
       self.assertAllEqual(expected_sequence_length, sequence_length)
       self.assertEqual(np.int64, sequence_length.dtype)
 
-  def test_sequence_length_with_shape(self):
-    """Tests _sequence_length with shape !=(1,)."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0.], [1]]
-        # example 1, [[10.]]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    expected_sequence_length = [2, 1]
-    numeric_column = sfc.sequence_numeric_column('aaa')
-
-    _, sequence_length = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
   def test_sequence_length_with_empty_rows(self):
     """Tests _sequence_length when some examples do not have ids."""
     sparse_input = sparse_tensor.SparseTensorValue(
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 5352796174..28a8286544 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -2660,6 +2660,7 @@ class _EmbeddingColumn(
         inputs=inputs,
         weight_collections=weight_collections,
         trainable=trainable)
+
     sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
     sequence_length = _sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
@@ -3383,6 +3384,16 @@ class _IndicatorColumn(_DenseColumn, _SequenceDenseColumn,
 
 
 def _verify_static_batch_size_equality(tensors, columns):
+  """Validates that the first dim (batch size) of all tensors are equal or None.
+
+  Args:
+    tensors: list of tensors to check.
+    columns: list of feature columns matching tensors. Will be used for error
+      messaging.
+
+  Raises:
+    ValueError: if one of the tensors has a variant batch size
+  """
   # bath_size is a tf.Dimension object.
   expected_batch_size = None
   for i in range(0, len(tensors)):
@@ -3403,9 +3414,18 @@ def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1):
   with ops.name_scope(None, 'sequence_length') as name_scope:
     row_ids = sp_tensor.indices[:, 0]
     column_ids = sp_tensor.indices[:, 1]
+    # Add one to convert column indices to element length
     column_ids += array_ops.ones_like(column_ids)
-    seq_length = math_ops.to_int64(
-        math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements)
+    # Get the number of elements we will have per example/row
+    seq_length = math_ops.segment_max(column_ids, segment_ids=row_ids)
+
+    # The raw values are grouped according to num_elements;
+    # how many entities will we have after grouping?
+    # Example: orig tensor [[1, 2], [3]], col_ids = (0, 1, 1),
+    # row_ids = (0, 0, 1), seq_length = [2, 1]. If num_elements = 2,
+    # these will get grouped, and the final seq_length is [1, 1]
+    seq_length = math_ops.to_int64(math_ops.ceil(seq_length / num_elements))
+
     # If the last n rows do not have ids, seq_length will have shape
     # [batch_size - n]. Pad the remaining values with zeros.
     n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1]
@@ -3439,25 +3459,14 @@ class _SequenceCategoricalColumn(
     sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
     id_tensor = sparse_tensors.id_tensor
     weight_tensor = sparse_tensors.weight_tensor
-    # Expands final dimension, so that embeddings are not combined during
-    # embedding lookup.
-    check_id_rank = check_ops.assert_equal(
-        array_ops.rank(id_tensor), 2,
-        data=[
-            'Column {} expected ID tensor of rank 2. '.format(self.name),
-            'id_tensor shape: ', array_ops.shape(id_tensor)])
-    with ops.control_dependencies([check_id_rank]):
-      id_tensor = sparse_ops.sparse_reshape(
-          id_tensor,
-          shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0))
+
+    # Expands third dimension, if necessary so that embeddings are not
+    # combined during embedding lookup. If the tensor is already 3D, leave
+    # as-is.
+    shape = array_ops.shape(id_tensor)
+    target_shape = [shape[0], shape[1], -1]
+    id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape)
     if weight_tensor is not None:
-      check_weight_rank = check_ops.assert_equal(
-          array_ops.rank(weight_tensor), 2,
-          data=[
-              'Column {} expected weight tensor of rank 2.'.format(self.name),
-              'weight_tensor shape:', array_ops.shape(weight_tensor)])
-      with ops.control_dependencies([check_weight_rank]):
-        weight_tensor = sparse_ops.sparse_reshape(
-            weight_tensor,
-            shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0))
+      weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape)
+
     return _CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index ff50fe0d09..a2da6412ed 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -217,21 +217,21 @@ def _features_to_raw_params(features, types):
       feature = features[key]
       if isinstance(feature, VarLenFeature):
         if VarLenFeature not in types:
-          raise ValueError("Unsupported VarLenFeature %s." % feature)
+          raise ValueError("Unsupported VarLenFeature %s." % (feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         sparse_keys.append(key)
         sparse_types.append(feature.dtype)
       elif isinstance(feature, SparseFeature):
         if SparseFeature not in types:
-          raise ValueError("Unsupported SparseFeature %s." % feature)
+          raise ValueError("Unsupported SparseFeature %s." % (feature,))
 
         if not feature.index_key:
           raise ValueError(
-              "Missing index_key for SparseFeature %s." % feature)
+              "Missing index_key for SparseFeature %s." % (feature,))
         if not feature.value_key:
           raise ValueError(
-              "Missing value_key for SparseFeature %s." % feature)
+              "Missing value_key for SparseFeature %s." % (feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         index_keys = feature.index_key
@@ -260,7 +260,7 @@ def _features_to_raw_params(features, types):
           sparse_types.append(feature.dtype)
       elif isinstance(feature, FixedLenFeature):
         if FixedLenFeature not in types:
-          raise ValueError("Unsupported FixedLenFeature %s." % feature)
+          raise ValueError("Unsupported FixedLenFeature %s." % (feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         if feature.shape is None:
@@ -281,7 +281,8 @@ def _features_to_raw_params(features, types):
           dense_defaults[key] = feature.default_value
       elif isinstance(feature, FixedLenSequenceFeature):
         if FixedLenSequenceFeature not in types:
-          raise ValueError("Unsupported FixedLenSequenceFeature %s." % feature)
+          raise ValueError("Unsupported FixedLenSequenceFeature %s." % (
+              feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         if feature.shape is None:
-- 
GitLab


From 8ef3e7c8c053cb6dad530e13c478bbd406ea2c95 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Mon, 8 Oct 2018 10:43:01 -0700
Subject: [PATCH 1246/1357] Part 1/3 of the feature sync to the Keras 2.2.4
 API.

PiperOrigin-RevId: 216211279
---
 tensorflow/python/keras/activations.py        |   5 +
 tensorflow/python/keras/activations_test.py   |  10 +
 tensorflow/python/keras/backend.py            |  81 ++++++--
 tensorflow/python/keras/backend_test.py       |  44 ++++-
 tensorflow/python/keras/callbacks.py          |   4 +
 tensorflow/python/keras/engine/network.py     |   9 +-
 .../python/keras/layers/convolutional.py      | 177 ++++++++++++-----
 .../python/keras/layers/convolutional_test.py |  31 +++
 tensorflow/python/keras/layers/pooling.py     | 185 +++++++++++++-----
 .../python/keras/layers/pooling_test.py       |  30 +++
 tensorflow/python/keras/layers/wrappers.py    |   3 +
 tensorflow/python/keras/testing_utils.py      |   5 +-
 tensorflow/python/keras/utils/conv_utils.py   |  45 +++--
 .../python/keras/utils/multi_gpu_utils.py     |  17 +-
 .../keras/utils/multi_gpu_utils_test.py       |  26 +++
 tensorflow/python/keras/utils/np_utils.py     |   5 +-
 .../v1/tensorflow.keras.activations.pbtxt     |   4 +
 .../golden/v1/tensorflow.keras.backend.pbtxt  |   4 +-
 ...low.keras.layers.-average-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-avg-pool1-d.pbtxt |   2 +-
 ...flow.keras.layers.-conv2-d-transpose.pbtxt |   2 +-
 ...flow.keras.layers.-conv3-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution2-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution3-d-transpose.pbtxt |   2 +-
 ...as.layers.-global-average-pooling1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-avg-pool1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-max-pool1-d.pbtxt |   2 +-
 ....keras.layers.-global-max-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-max-pool1-d.pbtxt |   2 +-
 ...sorflow.keras.layers.-max-pooling1-d.pbtxt |   2 +-
 .../golden/v1/tensorflow.keras.utils.pbtxt    |   2 +-
 .../v2/tensorflow.keras.activations.pbtxt     |   4 +
 .../golden/v2/tensorflow.keras.backend.pbtxt  |   4 +-
 ...low.keras.layers.-average-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-avg-pool1-d.pbtxt |   2 +-
 ...flow.keras.layers.-conv2-d-transpose.pbtxt |   2 +-
 ...flow.keras.layers.-conv3-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution2-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution3-d-transpose.pbtxt |   2 +-
 ...as.layers.-global-average-pooling1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-avg-pool1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-max-pool1-d.pbtxt |   2 +-
 ....keras.layers.-global-max-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-max-pool1-d.pbtxt |   2 +-
 ...sorflow.keras.layers.-max-pooling1-d.pbtxt |   2 +-
 .../golden/v2/tensorflow.keras.utils.pbtxt    |   2 +-
 46 files changed, 581 insertions(+), 172 deletions(-)

diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index 99645de736..d69791ce8d 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -160,6 +160,11 @@ def sigmoid(x):
   return nn.sigmoid(x)
 
 
+@tf_export('keras.activations.exponential')
+def exponential(x):
+  return math_ops.exp(x)
+
+
 @tf_export('keras.activations.hard_sigmoid')
 def hard_sigmoid(x):
   """Hard sigmoid activation function.
diff --git a/tensorflow/python/keras/activations_test.py b/tensorflow/python/keras/activations_test.py
index dd0bbcff39..ad238cb0a9 100644
--- a/tensorflow/python/keras/activations_test.py
+++ b/tensorflow/python/keras/activations_test.py
@@ -169,6 +169,16 @@ class KerasActivationsTest(test.TestCase):
     expected = np.tanh(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
+  def test_exponential(self):
+    with self.cached_session():
+      test_values = np.random.random((2, 5))
+      x = keras.backend.placeholder(ndim=2)
+      exp = keras.activations.exponential(x)
+      f = keras.backend.function([x], [exp])
+      result = f([test_values])[0]
+    expected = np.exp(test_values)
+    self.assertAllClose(result, expected, rtol=1e-05)
+
   def test_linear(self):
     x = np.random.random((10, 5))
     self.assertAllClose(x, keras.activations.linear(x))
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 63e776a06b..13f52fbae7 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -2223,7 +2223,7 @@ def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3):
 
 
 @tf_export('keras.backend.batch_normalization')
-def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
+def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3):
   """Applies batch normalization on x given mean, var, beta and gamma.
 
   I.e. returns:
@@ -2235,11 +2235,49 @@ def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
       var: Variance of batch.
       beta: Tensor with which to center the input.
       gamma: Tensor by which to scale the input.
+      axis: Integer, the axis that should be normalized.
+          (typically the features axis).
       epsilon: Fuzz factor.
 
   Returns:
       A tensor.
   """
+  if ndim(x) == 4:
+    # The CPU implementation of `fused_batch_norm` only supports NHWC
+    if axis == 1 or axis == -3:
+      tf_data_format = 'NCHW'
+    elif axis == 3 or axis == -1:
+      tf_data_format = 'NHWC'
+    else:
+      tf_data_format = None
+
+    if (tf_data_format == 'NHWC' or
+        tf_data_format == 'NCHW' and _has_nchw_support()):
+      # The mean / var / beta / gamma tensors may be broadcasted
+      # so they may have extra axes of size 1, which should be squeezed.
+      if ndim(mean) > 1:
+        mean = array_ops.reshape(mean, [-1])
+      if ndim(var) > 1:
+        var = array_ops.reshape(var, [-1])
+      if beta is None:
+        beta = zeros_like(mean)
+      elif ndim(beta) > 1:
+        beta = array_ops.reshape(beta, [-1])
+      if gamma is None:
+        gamma = ones_like(mean)
+      elif ndim(gamma) > 1:
+        gamma = array_ops.reshape(gamma, [-1])
+    y, _, _ = nn.fused_batch_norm(
+        x,
+        gamma,
+        beta,
+        epsilon=epsilon,
+        mean=mean,
+        variance=var,
+        data_format=tf_data_format,
+        is_training=False
+    )
+    return y
   return nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
 
 
@@ -2880,7 +2918,7 @@ class Function(object):
 
     if session_kwargs:
       raise ValueError('Some keys in session_kwargs are not supported at this '
-                       'time: %s', session_kwargs.keys())
+                       'time: %s', (session_kwargs.keys(),))
 
     self._callable_fn = None
     self._feed_arrays = None
@@ -3798,19 +3836,23 @@ def _preprocess_conv1d_input(x, data_format):
   return x, tf_data_format
 
 
-def _preprocess_conv2d_input(x, data_format):
+def _preprocess_conv2d_input(x, data_format, force_transpose=False):
   """Transpose and cast the input before the conv2d.
 
   Arguments:
       x: input tensor.
       data_format: string, `"channels_last"` or `"channels_first"`.
+      force_transpose: Boolean. If True, the input will always be transposed
+          from NCHW to NHWC if `data_format` is `"channels_first"`.
+          If False, the transposition only occurs on CPU (GPU ops are
+          assumed to support NCHW).
 
   Returns:
       A tensor.
   """
   tf_data_format = 'NHWC'
   if data_format == 'channels_first':
-    if not _has_nchw_support():
+    if not _has_nchw_support() or force_transpose:
       x = array_ops.transpose(x, (0, 2, 3, 1))  # NCHW -> NHWC
     else:
       tf_data_format = 'NCHW'
@@ -3958,7 +4000,8 @@ def conv2d_transpose(x,
                      output_shape,
                      strides=(1, 1),
                      padding='valid',
-                     data_format=None):
+                     data_format=None,
+                     dilation_rate=(1, 1)):
   """2D deconvolution (i.e.
 
   transposed convolution).
@@ -3972,6 +4015,7 @@ def conv2d_transpose(x,
       data_format: string, `"channels_last"` or `"channels_first"`.
           Whether to use Theano or TensorFlow/CNTK data format
           for inputs/kernels/outputs.
+      dilation_rate: Tuple of 2 integers.
 
   Returns:
       A tensor, result of transposed 2D convolution.
@@ -3987,7 +4031,13 @@ def conv2d_transpose(x,
   if isinstance(output_shape, (tuple, list)):
     output_shape = array_ops.stack(output_shape)
 
-  x, tf_data_format = _preprocess_conv2d_input(x, data_format)
+  # `atrous_conv2d_transpose` only supports NHWC format, even on GPU.
+  if data_format == 'channels_first' and dilation_rate != (1, 1):
+    force_transpose = True
+  else:
+    force_transpose = False
+
+  x, tf_data_format = _preprocess_conv2d_input(x, data_format, force_transpose)
 
   if data_format == 'channels_first' and tf_data_format == 'NHWC':
     output_shape = (output_shape[0], output_shape[2], output_shape[3],
@@ -4002,13 +4052,18 @@ def conv2d_transpose(x,
   else:
     strides = (1, 1) + strides
 
-  x = nn.conv2d_transpose(
-      x,
-      kernel,
-      output_shape,
-      strides,
-      padding=padding,
-      data_format=tf_data_format)
+  if dilation_rate == (1, 1):
+    x = nn.conv2d_transpose(x, kernel, output_shape, strides,
+                            padding=padding,
+                            data_format=tf_data_format)
+  else:
+    assert dilation_rate[0] == dilation_rate[1]
+    x = nn.atrous_conv2d_transpose(
+        x,
+        kernel,
+        output_shape,
+        rate=dilation_rate[0],
+        padding=padding)
   if data_format == 'channels_first' and tf_data_format == 'NHWC':
     x = array_ops.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
   return x
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index ab71589940..0834448699 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -26,6 +26,7 @@ from tensorflow.python import keras
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import nn
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.util import tf_inspect
@@ -1381,6 +1382,36 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(mean.get_shape().as_list(), [3,])
     self.assertEqual(var.get_shape().as_list(), [3,])
 
+  def test_batch_normalization(self):
+    g_val = np.random.random((3,))
+    b_val = np.random.random((3,))
+    gamma = keras.backend.variable(g_val)
+    beta = keras.backend.variable(b_val)
+
+    # 3D NHC case
+    val = np.random.random((10, 5, 3))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 1), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 5, 3])
+
+    # 4D NHWC case
+    val = np.random.random((10, 5, 5, 3))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 1, 2), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3])
+
+    # 4D NCHW case
+    val = np.random.random((10, 3, 5, 5))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 2, 3), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5])
+
 
 class TestCTC(test.TestCase):
 
@@ -1506,12 +1537,13 @@ class TestRandomOps(test.TestCase):
       self.assertAllClose(np.min(y), -2., atol=0.1)
 
   def test_string_input(self):
-    seq = keras.Sequential([
-        keras.layers.InputLayer(input_shape=(1,), dtype=dtypes.string),
-        keras.layers.Lambda(lambda x: x[0])
-    ])
-    preds = seq.predict([['tensorflow eager']])
-    self.assertEqual(preds.shape, (1,))
+    with self.cached_session():
+      seq = keras.Sequential([
+          keras.layers.InputLayer(input_shape=(1,), dtype=dtypes.string),
+          keras.layers.Lambda(lambda x: x[0])
+      ])
+      preds = seq.predict([['tensorflow eager']])
+      self.assertEqual(preds.shape, (1,))
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 6dfbbf3694..3d6000f223 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -781,6 +781,10 @@ class LearningRateScheduler(Callback):
       print('\nEpoch %05d: LearningRateScheduler reducing learning '
             'rate to %s.' % (epoch + 1, lr))
 
+  def on_epoch_end(self, epoch, logs=None):
+    logs = logs or {}
+    logs['lr'] = K.get_value(self.model.optimizer.lr)
+
 
 @tf_export('keras.callbacks.TensorBoard')
 class TensorBoard(Callback):
diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 918488bd7a..5969fea2b2 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1641,10 +1641,11 @@ class Network(base_layer.Layer):
         ValueError: if `summary()` is called before the model is built.
     """
     if not self.built:
-      raise ValueError('This model has never been called, thus its weights '
-                       'have not yet been created, so no summary can be '
-                       'displayed. Build the model first '
-                       '(e.g. by calling it on some data).')
+      raise ValueError('This model has not yet been built. '
+                       'Build the model first by calling `build()` or calling '
+                       '`fit()` with some data, or specify '
+                       'an `input_shape` argument in the first layer(s) for '
+                       'automatic build.')
     layer_utils.print_summary(self,
                               line_length=line_length,
                               positions=positions,
diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index d00def07bb..8f5872385c 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py
@@ -645,6 +645,14 @@ class Conv2DTranspose(Conv2D):
           Specifying any stride value != 1 is incompatible with specifying
           any `dilation_rate` value != 1.
       padding: one of `"valid"` or `"same"` (case-insensitive).
+      output_padding: An integer or tuple/list of 2 integers,
+          specifying the amount of padding along the height and width
+          of the output tensor.
+          Can be a single integer to specify the same value for all
+          spatial dimensions.
+          The amount of output padding along a given dimension must be
+          lower than the stride along that same dimension.
+          If set to `None` (default), the output shape is inferred.
       data_format: A string,
           one of `channels_last` (default) or `channels_first`.
           The ordering of the dimensions in the inputs.
@@ -700,7 +708,9 @@ class Conv2DTranspose(Conv2D):
                kernel_size,
                strides=(1, 1),
                padding='valid',
+               output_padding=None,
                data_format=None,
+               dilation_rate=(1, 1),
                activation=None,
                use_bias=True,
                kernel_initializer='glorot_uniform',
@@ -717,6 +727,7 @@ class Conv2DTranspose(Conv2D):
         strides=strides,
         padding=padding,
         data_format=data_format,
+        dilation_rate=dilation_rate,
         activation=activations.get(activation),
         use_bias=use_bias,
         kernel_initializer=initializers.get(kernel_initializer),
@@ -728,6 +739,16 @@ class Conv2DTranspose(Conv2D):
         bias_constraint=constraints.get(bias_constraint),
         **kwargs)
 
+    self.output_padding = output_padding
+    if self.output_padding is not None:
+      self.output_padding = conv_utils.normalize_tuple(
+          self.output_padding, 2, 'output_padding')
+      for stride, out_pad in zip(self.strides, self.output_padding):
+        if out_pad >= stride:
+          raise ValueError('Stride ' + str(self.strides) + ' must be '
+                           'greater than output padding ' +
+                           str(self.output_padding))
+
   def build(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape)
     if len(input_shape) != 4:
@@ -769,51 +790,50 @@ class Conv2DTranspose(Conv2D):
     inputs_shape = array_ops.shape(inputs)
     batch_size = inputs_shape[0]
     if self.data_format == 'channels_first':
-      c_axis, h_axis, w_axis = 1, 2, 3
+      h_axis, w_axis = 2, 3
     else:
-      c_axis, h_axis, w_axis = 3, 1, 2
+      h_axis, w_axis = 1, 2
 
     height, width = inputs_shape[h_axis], inputs_shape[w_axis]
     kernel_h, kernel_w = self.kernel_size
     stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_h = out_pad_w = None
+    else:
+      out_pad_h, out_pad_w = self.output_padding
+
     # Infer the dynamic output shape:
     out_height = conv_utils.deconv_output_length(height,
                                                  kernel_h,
-                                                 self.padding,
-                                                 stride_h)
+                                                 padding=self.padding,
+                                                 output_padding=out_pad_h,
+                                                 stride=stride_h,
+                                                 dilation=self.dilation_rate[0])
     out_width = conv_utils.deconv_output_length(width,
                                                 kernel_w,
-                                                self.padding,
-                                                stride_w)
+                                                padding=self.padding,
+                                                output_padding=out_pad_w,
+                                                stride=stride_w,
+                                                dilation=self.dilation_rate[1])
     if self.data_format == 'channels_first':
       output_shape = (batch_size, self.filters, out_height, out_width)
-      strides = (1, 1, stride_h, stride_w)
     else:
       output_shape = (batch_size, out_height, out_width, self.filters)
-      strides = (1, stride_h, stride_w, 1)
 
     output_shape_tensor = array_ops.stack(output_shape)
-    outputs = nn.conv2d_transpose(
+    outputs = backend.conv2d_transpose(
         inputs,
         self.kernel,
         output_shape_tensor,
-        strides,
-        padding=self.padding.upper(),
-        data_format=conv_utils.convert_data_format(self.data_format, ndim=4))
+        strides=self.strides,
+        padding=self.padding,
+        data_format=self.data_format,
+        dilation_rate=self.dilation_rate)
 
     if not context.executing_eagerly():
       # Infer the static output shape:
-      out_shape = inputs.get_shape().as_list()
-      out_shape[c_axis] = self.filters
-      out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis],
-                                                          kernel_h,
-                                                          self.padding,
-                                                          stride_h)
-      out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis],
-                                                          kernel_w,
-                                                          self.padding,
-                                                          stride_w)
+      out_shape = self.compute_output_shape(inputs.shape)
       outputs.set_shape(out_shape)
 
     if self.use_bias:
@@ -837,13 +857,33 @@ class Conv2DTranspose(Conv2D):
     kernel_h, kernel_w = self.kernel_size
     stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_h = out_pad_w = None
+    else:
+      out_pad_h, out_pad_w = self.output_padding
+
     output_shape[c_axis] = self.filters
     output_shape[h_axis] = conv_utils.deconv_output_length(
-        output_shape[h_axis], kernel_h, self.padding, stride_h)
+        output_shape[h_axis],
+        kernel_h,
+        padding=self.padding,
+        output_padding=out_pad_h,
+        stride=stride_h,
+        dilation=self.dilation_rate[0])
     output_shape[w_axis] = conv_utils.deconv_output_length(
-        output_shape[w_axis], kernel_w, self.padding, stride_w)
+        output_shape[w_axis],
+        kernel_w,
+        padding=self.padding,
+        output_padding=out_pad_w,
+        stride=stride_w,
+        dilation=self.dilation_rate[1])
     return tensor_shape.TensorShape(output_shape)
 
+  def get_config(self):
+    config = super(Conv2DTranspose, self).get_config()
+    config['output_padding'] = self.output_padding
+    return config
+
 
 @tf_export('keras.layers.Conv3DTranspose',
            'keras.layers.Convolution3DTranspose')
@@ -878,6 +918,14 @@ class Conv3DTranspose(Conv3D):
           Specifying any stride value != 1 is incompatible with specifying
           any `dilation_rate` value != 1.
       padding: one of `"valid"` or `"same"` (case-insensitive).
+      output_padding: An integer or tuple/list of 3 integers,
+          specifying the amount of padding along the depth, height, and
+          width.
+          Can be a single integer to specify the same value for all
+          spatial dimensions.
+          The amount of output padding along a given dimension must be
+          lower than the stride along that same dimension.
+          If set to `None` (default), the output shape is inferred.
       data_format: A string,
           one of `channels_last` (default) or `channels_first`.
           The ordering of the dimensions in the inputs.
@@ -943,6 +991,7 @@ class Conv3DTranspose(Conv3D):
                kernel_size,
                strides=(1, 1, 1),
                padding='valid',
+               output_padding=None,
                data_format=None,
                activation=None,
                use_bias=True,
@@ -971,6 +1020,16 @@ class Conv3DTranspose(Conv3D):
         bias_constraint=constraints.get(bias_constraint),
         **kwargs)
 
+    self.output_padding = output_padding
+    if self.output_padding is not None:
+      self.output_padding = conv_utils.normalize_tuple(
+          self.output_padding, 3, 'output_padding')
+      for stride, out_pad in zip(self.strides, self.output_padding):
+        if out_pad >= stride:
+          raise ValueError('Stride ' + str(self.strides) + ' must be '
+                           'greater than output padding ' +
+                           str(self.output_padding))
+
   def build(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape)
     if len(input_shape) != 5:
@@ -1012,11 +1071,9 @@ class Conv3DTranspose(Conv3D):
     inputs_shape = array_ops.shape(inputs)
     batch_size = inputs_shape[0]
     if self.data_format == 'channels_first':
-      c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4
+      d_axis, h_axis, w_axis = 2, 3, 4
     else:
-      c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3
-
-    self.input_spec = InputSpec(ndim=5, axes={c_axis: inputs_shape[c_axis]})
+      d_axis, h_axis, w_axis = 1, 2, 3
 
     depth = inputs_shape[d_axis]
     height = inputs_shape[h_axis]
@@ -1025,19 +1082,27 @@ class Conv3DTranspose(Conv3D):
     kernel_d, kernel_h, kernel_w = self.kernel_size
     stride_d, stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_d = out_pad_h = out_pad_w = None
+    else:
+      out_pad_d, out_pad_h, out_pad_w = self.output_padding
+
     # Infer the dynamic output shape:
     out_depth = conv_utils.deconv_output_length(depth,
                                                 kernel_d,
-                                                self.padding,
-                                                stride_d)
+                                                padding=self.padding,
+                                                output_padding=out_pad_d,
+                                                stride=stride_d)
     out_height = conv_utils.deconv_output_length(height,
                                                  kernel_h,
-                                                 self.padding,
-                                                 stride_h)
+                                                 padding=self.padding,
+                                                 output_padding=out_pad_h,
+                                                 stride=stride_h)
     out_width = conv_utils.deconv_output_length(width,
                                                 kernel_w,
-                                                self.padding,
-                                                stride_w)
+                                                padding=self.padding,
+                                                output_padding=out_pad_w,
+                                                stride=stride_w)
     if self.data_format == 'channels_first':
       output_shape = (batch_size, self.filters, out_depth, out_height,
                       out_width)
@@ -1058,20 +1123,7 @@ class Conv3DTranspose(Conv3D):
 
     if not context.executing_eagerly():
       # Infer the static output shape:
-      out_shape = inputs.get_shape().as_list()
-      out_shape[c_axis] = self.filters
-      out_shape[d_axis] = conv_utils.deconv_output_length(out_shape[d_axis],
-                                                          kernel_d,
-                                                          self.padding,
-                                                          stride_d)
-      out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis],
-                                                          kernel_h,
-                                                          self.padding,
-                                                          stride_h)
-      out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis],
-                                                          kernel_w,
-                                                          self.padding,
-                                                          stride_w)
+      out_shape = self.compute_output_shape(inputs.shape)
       outputs.set_shape(out_shape)
 
     if self.use_bias:
@@ -1109,15 +1161,38 @@ class Conv3DTranspose(Conv3D):
     kernel_d, kernel_h, kernel_w = self.kernel_size
     stride_d, stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_d = out_pad_h = out_pad_w = None
+    else:
+      out_pad_d, out_pad_h, out_pad_w = self.output_padding
+
     output_shape[c_axis] = self.filters
     output_shape[d_axis] = conv_utils.deconv_output_length(
-        output_shape[d_axis], kernel_d, self.padding, stride_d)
+        output_shape[d_axis],
+        kernel_d,
+        padding=self.padding,
+        output_padding=out_pad_d,
+        stride=stride_d)
     output_shape[h_axis] = conv_utils.deconv_output_length(
-        output_shape[h_axis], kernel_h, self.padding, stride_h)
+        output_shape[h_axis],
+        kernel_h,
+        padding=self.padding,
+        output_padding=out_pad_h,
+        stride=stride_h)
     output_shape[w_axis] = conv_utils.deconv_output_length(
-        output_shape[w_axis], kernel_w, self.padding, stride_w)
+        output_shape[w_axis],
+        kernel_w,
+        padding=self.padding,
+        output_padding=out_pad_w,
+        stride=stride_w)
     return tensor_shape.TensorShape(output_shape)
 
+  def get_config(self):
+    config = super(Conv3DTranspose, self).get_config()
+    config.pop('dilation_rate')
+    config['output_padding'] = self.output_padding
+    return config
+
 
 class SeparableConv(Conv):
   """Abstract base layer for separable nD convolution.
diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index cad5e4c8bd..f88d632ab5 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -204,6 +204,9 @@ class Conv2DTransposeTest(test.TestCase):
     if test.is_gpu_available(cuda_only=True):
       self._run_test(kwargs, 'data_format', ['channels_first'])
 
+    kwargs['strides'] = (2, 2)
+    self._run_test(kwargs, 'output_padding', [(1, 1)])
+
   def test_conv2dtranspose_regularizers(self):
     kwargs = {
         'filters': 3,
@@ -239,6 +242,31 @@ class Conv2DTransposeTest(test.TestCase):
       self.assertEqual(layer.kernel.constraint, k_constraint)
       self.assertEqual(layer.bias.constraint, b_constraint)
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_conv2d_transpose_dilation(self):
+    testing_utils.layer_test(keras.layers.Conv2DTranspose,
+                             kwargs={'filters': 2,
+                                     'kernel_size': 3,
+                                     'padding': 'same',
+                                     'data_format': 'channels_last',
+                                     'dilation_rate': (2, 2)},
+                             input_shape=(2, 5, 6, 3))
+
+    input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32)
+    expected_output = np.float32([[192, 228, 192, 228],
+                                  [336, 372, 336, 372],
+                                  [192, 228, 192, 228],
+                                  [336, 372, 336, 372]]).reshape((1, 4, 4, 1))
+    testing_utils.layer_test(keras.layers.Conv2DTranspose,
+                             input_data=input_data,
+                             kwargs={'filters': 1,
+                                     'kernel_size': 3,
+                                     'padding': 'same',
+                                     'data_format': 'channels_last',
+                                     'dilation_rate': (2, 2),
+                                     'kernel_initializer': 'ones'},
+                             expected_output=expected_output)
+
 
 class Conv3DTransposeTest(test.TestCase):
 
@@ -270,6 +298,9 @@ class Conv3DTransposeTest(test.TestCase):
     if test.is_gpu_available(cuda_only=True):
       self._run_test(kwargs, 'data_format', ['channels_first'])
 
+    kwargs['strides'] = (2, 2, 2)
+    self._run_test(kwargs, 'output_padding', [(1, 1, 1)])
+
   def test_conv3dtranspose_regularizers(self):
     kwargs = {
         'filters': 3,
diff --git a/tensorflow/python/keras/layers/pooling.py b/tensorflow/python/keras/layers/pooling.py
index 912e8bd619..72a9c1d629 100644
--- a/tensorflow/python/keras/layers/pooling.py
+++ b/tensorflow/python/keras/layers/pooling.py
@@ -18,12 +18,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine.base_layer import InputSpec
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.utils import conv_utils
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.util.tf_export import tf_export
 
@@ -41,16 +44,18 @@ class Pooling1D(Layer):
       strides of the pooling operation.
     padding: A string. The padding method, either 'valid' or 'same'.
       Case-insensitive.
-    data_format: A string, one of `channels_last` (default) or `channels_first`.
+    data_format: A string,
+      one of `channels_last` (default) or `channels_first`.
       The ordering of the dimensions in the inputs.
       `channels_last` corresponds to inputs with shape
-      `(batch, length, channels)` while `channels_first` corresponds to
-      inputs with shape `(batch, channels, length)`.
+      `(batch, steps, features)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, features, steps)`.
     name: A string, the name of the layer.
   """
 
   def __init__(self, pool_function, pool_size, strides,
-               padding='valid', data_format=None,
+               padding='valid', data_format='channels_last',
                name=None, **kwargs):
     super(Pooling1D, self).__init__(name=name, **kwargs)
     if data_format is None:
@@ -65,45 +70,39 @@ class Pooling1D(Layer):
     self.input_spec = InputSpec(ndim=3)
 
   def call(self, inputs):
-    # There is no TF op for 1D pooling, hence we make the inputs 4D.
-    if self.data_format == 'channels_last':
-      # input is NWC, make it NHWC
-      inputs = array_ops.expand_dims(inputs, 1)
-      # pool on the W dim
-      pool_shape = (1, 1) + self.pool_size + (1,)
-      strides = (1, 1) + self.strides + (1,)
-      data_format = 'NHWC'
-    else:
-      # input is NCW, make it NCHW
-      inputs = array_ops.expand_dims(inputs, 2)
-      # pool on the W dim
-      pool_shape = (1, 1, 1) + self.pool_size
-      strides = (1, 1, 1) + self.strides
-      data_format = 'NCHW'
-
+    pad_axis = 2 if self.data_format == 'channels_last' else 3
+    inputs = array_ops.expand_dims(inputs, pad_axis)
     outputs = self.pool_function(
         inputs,
-        ksize=pool_shape,
-        strides=strides,
-        padding=self.padding.upper(),
-        data_format=data_format)
-
-    if self.data_format == 'channels_last':
-      return array_ops.squeeze(outputs, 1)
-    else:
-      return array_ops.squeeze(outputs, 2)
+        self.pool_size + (1,),
+        strides=self.strides + (1,),
+        padding=self.padding,
+        data_format=self.data_format)
+    return array_ops.squeeze(outputs, pad_axis)
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    length = conv_utils.conv_output_length(input_shape[1], self.pool_size[0],
-                                           self.padding, self.strides[0])
-    return tensor_shape.TensorShape([input_shape[0], length, input_shape[2]])
+    if self.data_format == 'channels_first':
+      steps = input_shape[2]
+      features = input_shape[1]
+    else:
+      steps = input_shape[1]
+      features = input_shape[2]
+    length = conv_utils.conv_output_length(steps,
+                                           self.pool_size[0],
+                                           self.padding,
+                                           self.strides[0])
+    if self.data_format == 'channels_first':
+      return tensor_shape.TensorShape([input_shape[0], features, length])
+    else:
+      return tensor_shape.TensorShape([input_shape[0], length, features])
 
   def get_config(self):
     config = {
         'strides': self.strides,
         'pool_size': self.pool_size,
-        'padding': self.padding
+        'padding': self.padding,
+        'data_format': self.data_format,
     }
     base_config = super(Pooling1D, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
@@ -119,19 +118,36 @@ class MaxPooling1D(Pooling1D):
           E.g. 2 will halve the input.
           If None, it will default to `pool_size`.
       padding: One of `"valid"` or `"same"` (case-insensitive).
+      data_format: A string,
+          one of `channels_last` (default) or `channels_first`.
+          The ordering of the dimensions in the inputs.
+          `channels_last` corresponds to inputs with shape
+          `(batch, steps, features)` while `channels_first`
+          corresponds to inputs with shape
+          `(batch, features, steps)`.
 
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
-      3D tensor with shape: `(batch_size, downsampled_steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, downsampled_steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, downsampled_steps)`
   """
 
   def __init__(self, pool_size=2, strides=None,
-               padding='valid', data_format=None, **kwargs):
+               padding='valid', data_format='channels_last', **kwargs):
 
     super(MaxPooling1D, self).__init__(
-        nn.max_pool,
+        functools.partial(backend.pool2d, pool_mode='max'),
         pool_size=pool_size,
         strides=strides,
         padding=padding,
@@ -149,18 +165,35 @@ class AveragePooling1D(Pooling1D):
           E.g. 2 will halve the input.
           If None, it will default to `pool_size`.
       padding: One of `"valid"` or `"same"` (case-insensitive).
+      data_format: A string,
+          one of `channels_last` (default) or `channels_first`.
+          The ordering of the dimensions in the inputs.
+          `channels_last` corresponds to inputs with shape
+          `(batch, steps, features)` while `channels_first`
+          corresponds to inputs with shape
+          `(batch, features, steps)`.
 
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
-      3D tensor with shape: `(batch_size, downsampled_steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, downsampled_steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, downsampled_steps)`
   """
 
   def __init__(self, pool_size=2, strides=None,
-               padding='valid', data_format=None, **kwargs):
+               padding='valid', data_format='channels_last', **kwargs):
     super(AveragePooling1D, self).__init__(
-        nn.avg_pool,
+        functools.partial(backend.pool2d, pool_mode='avg'),
         pool_size=pool_size,
         strides=strides,
         padding=padding,
@@ -561,41 +594,96 @@ class GlobalPooling1D(Layer):
   """Abstract class for different global pooling 1D layers.
   """
 
-  def __init__(self, **kwargs):
+  def __init__(self, data_format='channels_last', **kwargs):
     super(GlobalPooling1D, self).__init__(**kwargs)
     self.input_spec = InputSpec(ndim=3)
+    self.data_format = conv_utils.normalize_data_format(data_format)
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    return tensor_shape.TensorShape([input_shape[0], input_shape[2]])
+    if self.data_format == 'channels_first':
+      return tensor_shape.TensorShape([input_shape[0], input_shape[1]])
+    else:
+      return tensor_shape.TensorShape([input_shape[0], input_shape[2]])
 
   def call(self, inputs):
     raise NotImplementedError
 
+  def get_config(self):
+    config = {'data_format': self.data_format}
+    base_config = super(GlobalPooling1D, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
 
 @tf_export('keras.layers.GlobalAveragePooling1D',
            'keras.layers.GlobalAvgPool1D')
 class GlobalAveragePooling1D(GlobalPooling1D):
   """Global average pooling operation for temporal data.
 
+  Arguments:
+    data_format: A string,
+        one of `channels_last` (default) or `channels_first`.
+        The ordering of the dimensions in the inputs.
+        `channels_last` corresponds to inputs with shape
+        `(batch, steps, features)` while `channels_first`
+        corresponds to inputs with shape
+        `(batch, features, steps)`.
+
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
       2D tensor with shape:
       `(batch_size, features)`
   """
 
-  def call(self, inputs):
-    return backend.mean(inputs, axis=1)
+  def __init__(self, data_format='channels_last', **kwargs):
+    super(GlobalAveragePooling1D, self).__init__(data_format=data_format,
+                                                 **kwargs)
+    self.supports_masking = True
+
+  def call(self, inputs, mask=None):
+    steps_axis = 1 if self.data_format == 'channels_last' else 2
+    if mask is not None:
+      mask = math_ops.cast(mask, backend.floatx())
+      input_shape = inputs.shape.as_list()
+      broadcast_shape = [-1, input_shape[steps_axis], 1]
+      mask = array_ops.reshape(mask, broadcast_shape)
+      inputs *= mask
+      return backend.sum(inputs, axis=steps_axis) / math_ops.reduce_sum(
+          mask, axis=steps_axis)
+    else:
+      return backend.mean(inputs, axis=steps_axis)
+
+  def compute_mask(self, inputs, mask=None):
+    return None
 
 
 @tf_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D')
 class GlobalMaxPooling1D(GlobalPooling1D):
   """Global max pooling operation for temporal data.
 
+  Arguments:
+    data_format: A string,
+        one of `channels_last` (default) or `channels_first`.
+        The ordering of the dimensions in the inputs.
+        `channels_last` corresponds to inputs with shape
+        `(batch, steps, features)` while `channels_first`
+        corresponds to inputs with shape
+        `(batch, features, steps)`.
+
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
       2D tensor with shape:
@@ -603,7 +691,8 @@ class GlobalMaxPooling1D(GlobalPooling1D):
   """
 
   def call(self, inputs):
-    return backend.max(inputs, axis=1)
+    steps_axis = 1 if self.data_format == 'channels_last' else 2
+    return backend.max(inputs, axis=steps_axis)
 
 
 class GlobalPooling2D(Layer):
diff --git a/tensorflow/python/keras/layers/pooling_test.py b/tensorflow/python/keras/layers/pooling_test.py
index 2cd9939e66..936e73ecf9 100644
--- a/tensorflow/python/keras/layers/pooling_test.py
+++ b/tensorflow/python/keras/layers/pooling_test.py
@@ -18,11 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.training import rmsprop
 
 
 class GlobalPoolingTest(test.TestCase):
@@ -31,8 +34,26 @@ class GlobalPoolingTest(test.TestCase):
   def test_globalpooling_1d(self):
     testing_utils.layer_test(keras.layers.pooling.GlobalMaxPooling1D,
                              input_shape=(3, 4, 5))
+    testing_utils.layer_test(keras.layers.pooling.GlobalMaxPooling1D,
+                             kwargs={'data_format': 'channels_first'},
+                             input_shape=(3, 4, 5))
     testing_utils.layer_test(
         keras.layers.pooling.GlobalAveragePooling1D, input_shape=(3, 4, 5))
+    testing_utils.layer_test(keras.layers.pooling.GlobalAveragePooling1D,
+                             kwargs={'data_format': 'channels_first'},
+                             input_shape=(3, 4, 5))
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_globalpooling_1d_masking_support(self):
+    model = keras.Sequential()
+    model.add(keras.layers.Masking(mask_value=0., input_shape=(3, 4)))
+    model.add(keras.layers.GlobalAveragePooling1D())
+    model.compile(loss='mae', optimizer=rmsprop.RMSPropOptimizer(0.001))
+
+    model_input = np.random.random((2, 3, 4))
+    model_input[0, 1:, :] = 0
+    output = model.predict(model_input)
+    self.assertAllClose(output[0], model_input[0, 0, :])
 
   @tf_test_util.run_in_graph_and_eager_modes
   def test_globalpooling_2d(self):
@@ -172,6 +193,10 @@ class Pooling1DTest(test.TestCase):
             kwargs={'strides': stride,
                     'padding': padding},
             input_shape=(3, 5, 4))
+    testing_utils.layer_test(
+        keras.layers.MaxPooling1D,
+        kwargs={'data_format': 'channels_first'},
+        input_shape=(3, 2, 6))
 
   @tf_test_util.run_in_graph_and_eager_modes
   def test_averagepooling_1d(self):
@@ -183,6 +208,11 @@ class Pooling1DTest(test.TestCase):
                     'padding': padding},
             input_shape=(3, 5, 4))
 
+    testing_utils.layer_test(
+        keras.layers.AveragePooling1D,
+        kwargs={'data_format': 'channels_first'},
+        input_shape=(3, 2, 6))
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py
index a1933c11b0..d19d0b5f8c 100644
--- a/tensorflow/python/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/layers/wrappers.py
@@ -587,6 +587,9 @@ class Bidirectional(Wrapper):
       output = y * y_rev
     elif self.merge_mode is None:
       output = [y, y_rev]
+    else:
+      raise ValueError(
+          'Unrecognized value for `merge_mode`: %s' % (self.merge_mode))
 
     # Properly set learning phase
     if (getattr(y, '_uses_learning_phase', False) or
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 501b50ba5f..2fae094a1e 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -166,8 +166,9 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
     if expected_dim is not None:
       if expected_dim != actual_dim:
         raise AssertionError(
-            'When testing layer %s, for input %s, found output_shape='
-            '%s but expected to find %s.\nFull kwargs: %s' %
+            'When testing layer %s **after deserialization**, '
+            'for input %s, found output_shape='
+            '%s but expected to find inferred shape %s.\nFull kwargs: %s' %
             (layer_cls.__name__,
              x,
              actual_output_shape,
diff --git a/tensorflow/python/keras/utils/conv_utils.py b/tensorflow/python/keras/utils/conv_utils.py
index 8ebca1418d..f486e631e5 100644
--- a/tensorflow/python/keras/utils/conv_utils.py
+++ b/tensorflow/python/keras/utils/conv_utils.py
@@ -137,26 +137,49 @@ def conv_input_length(output_length, filter_size, padding, stride):
   return (output_length - 1) * stride - 2 * pad + filter_size
 
 
-def deconv_output_length(input_length, filter_size, padding, stride):
+def deconv_output_length(input_length, filter_size, padding,
+                         output_padding=None, stride=0, dilation=1):
   """Determines output length of a transposed convolution given input length.
 
   Arguments:
-      input_length: integer.
-      filter_size: integer.
-      padding: one of "same", "valid", "full".
-      stride: integer.
+      input_length: Integer.
+      filter_size: Integer.
+      padding: one of `"same"`, `"valid"`, `"full"`.
+      output_padding: Integer, amount of padding along the output dimension.
+          Can be set to `None` in which case the output length is inferred.
+      stride: Integer.
+      dilation: Integer.
 
   Returns:
       The output length (integer).
   """
+  assert padding in {'same', 'valid', 'full'}
   if input_length is None:
     return None
-  input_length *= stride
-  if padding == 'valid':
-    input_length += max(filter_size - stride, 0)
-  elif padding == 'full':
-    input_length -= (stride + filter_size - 2)
-  return input_length
+
+  # Get the dilated kernel size
+  filter_size = filter_size + (filter_size - 1) * (dilation - 1)
+
+  # Infer length if output padding is None, else compute the exact length
+  if output_padding is None:
+    if padding == 'valid':
+      length = input_length * stride + max(filter_size - stride, 0)
+    elif padding == 'full':
+      length = input_length * stride - (stride + filter_size - 2)
+    elif padding == 'same':
+      length = input_length * stride
+
+  else:
+    if padding == 'same':
+      pad = filter_size // 2
+    elif padding == 'valid':
+      pad = 0
+    elif padding == 'full':
+      pad = filter_size - 1
+
+    length = ((input_length - 1) * stride + filter_size - 2 * pad +
+              output_padding)
+  return length
 
 
 def normalize_data_format(value):
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils.py b/tensorflow/python/keras/utils/multi_gpu_utils.py
index e1c49bc852..04b2ea8fe3 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils.py
@@ -244,9 +244,24 @@ def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False):
         for o in range(len(outputs)):
           all_outputs[o].append(outputs[o])
 
+  # Deduplicate output names to handle Siamese networks.
+  occurrences = {}
+  for n in model.output_names:
+    if n not in occurrences:
+      occurrences[n] = 1
+    else:
+      occurrences[n] += 1
+  conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1}
+  output_names = []
+  for n in model.output_names:
+    if n in conflict_counter:
+      conflict_counter[n] += 1
+      n += '_%d' % conflict_counter[n]
+    output_names.append(n)
+
   # Merge outputs under expected scope.
   with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]):
     merged = []
-    for name, outputs in zip(model.output_names, all_outputs):
+    for name, outputs in zip(output_names, all_outputs):
       merged.append(concatenate(outputs, axis=0, name=name))
     return Model(model.inputs, merged)
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
index 3d0351a11f..1780ab6587 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
@@ -198,5 +198,31 @@ class TestMultiGPUModel(test.TestCase):
       parallel_model.compile(loss='mean_squared_error', optimizer='adam')
       parallel_model.train_on_batch(x, y)
 
+  def test_multi_gpu_with_siamese_network(self):
+    gpus = 2
+
+    if not check_if_compatible_devices(gpus=gpus):
+      return
+
+    with self.cached_session():
+      input_shape = (3,)
+      nested_model = keras.models.Sequential([
+          keras.layers.Dense(32, input_shape=input_shape),
+          keras.layers.Dense(1)
+      ], name='nested')
+
+      input1 = keras.Input(input_shape)
+      input2 = keras.Input(input_shape)
+      score1 = nested_model(input1)
+      score2 = nested_model(input2)
+      score_sum = keras.layers.Add(name='add')([score1, score2])
+
+      siamese = keras.models.Model(inputs=[input1, input2],
+                                   outputs=[score_sum, score1, score2],
+                                   name='siamese')
+      parallel_siamese = keras.utils.multi_gpu_model(siamese, gpus)
+      self.assertEqual(parallel_siamese.output_names,
+                       ['add', 'nested_1', 'nested_2'])
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/utils/np_utils.py b/tensorflow/python/keras/utils/np_utils.py
index c24e87308b..3763999bff 100644
--- a/tensorflow/python/keras/utils/np_utils.py
+++ b/tensorflow/python/keras/utils/np_utils.py
@@ -22,7 +22,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 @tf_export('keras.utils.to_categorical')
-def to_categorical(y, num_classes=None):
+def to_categorical(y, num_classes=None, dtype='float32'):
   """Converts a class vector (integers) to binary class matrix.
 
   E.g. for use with categorical_crossentropy.
@@ -31,6 +31,7 @@ def to_categorical(y, num_classes=None):
       y: class vector to be converted into a matrix
           (integers from 0 to num_classes).
       num_classes: total number of classes.
+      dtype: The data type expected by the input. Default: `'float32'`.
 
   Returns:
       A binary matrix representation of the input. The classes axis is placed
@@ -44,7 +45,7 @@ def to_categorical(y, num_classes=None):
   if not num_classes:
     num_classes = np.max(y) + 1
   n = y.shape[0]
-  categorical = np.zeros((n, num_classes), dtype=np.float32)
+  categorical = np.zeros((n, num_classes), dtype=dtype)
   categorical[np.arange(n), y] = 1
   output_shape = input_shape + (num_classes,)
   categorical = np.reshape(categorical, output_shape)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
index 2e9de9ebb2..eb315e356d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "elu"
     argspec: "args=[\'x\', \'alpha\'], varargs=None, keywords=None, defaults=[\'1.0\'], "
   }
+  member_method {
+    name: "exponential"
+    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get"
     argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
index a71a59e269..9feb7c09b8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
@@ -46,7 +46,7 @@ tf_module {
   }
   member_method {
     name: "batch_normalization"
-    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'0.001\'], "
+    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'axis\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'-1\', \'0.001\'], "
   }
   member_method {
     name: "batch_set_value"
@@ -98,7 +98,7 @@ tf_module {
   }
   member_method {
     name: "conv2d_transpose"
-    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\'], "
+    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\', \'(1, 1)\'], "
   }
   member_method {
     name: "conv3d"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index c3dd2ad046..014f5828fa 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index c440604aae..a6e4856de9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index 065bb4d35b..381839d6de 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index c7ba6056f9..2933f9f4b3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 8f4f7918ab..9c9c7461c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 93c442bd55..44ca598724 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 5ea61d118d..a8094c0bde 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 11dca17c6d..3ebe162f57 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 278429af6f..c0a53b847b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 935a69ab2f..ff6c6f3ec4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
index 238d96cca6..d26da270e7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index 4a45bf7997..524c5fd69e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
index 81b91d2780..138d97b11f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
@@ -70,6 +70,6 @@ tf_module {
   }
   member_method {
     name: "to_categorical"
-    argspec: "args=[\'y\', \'num_classes\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], "
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
index 2e9de9ebb2..eb315e356d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "elu"
     argspec: "args=[\'x\', \'alpha\'], varargs=None, keywords=None, defaults=[\'1.0\'], "
   }
+  member_method {
+    name: "exponential"
+    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get"
     argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
index a71a59e269..9feb7c09b8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
@@ -46,7 +46,7 @@ tf_module {
   }
   member_method {
     name: "batch_normalization"
-    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'0.001\'], "
+    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'axis\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'-1\', \'0.001\'], "
   }
   member_method {
     name: "batch_set_value"
@@ -98,7 +98,7 @@ tf_module {
   }
   member_method {
     name: "conv2d_transpose"
-    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\'], "
+    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\', \'(1, 1)\'], "
   }
   member_method {
     name: "conv3d"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index c3dd2ad046..014f5828fa 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index c440604aae..a6e4856de9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index 065bb4d35b..381839d6de 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index c7ba6056f9..2933f9f4b3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 8f4f7918ab..9c9c7461c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 93c442bd55..44ca598724 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 5ea61d118d..a8094c0bde 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 11dca17c6d..3ebe162f57 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 278429af6f..c0a53b847b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 935a69ab2f..ff6c6f3ec4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
index 238d96cca6..d26da270e7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index 4a45bf7997..524c5fd69e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
index 81b91d2780..138d97b11f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
@@ -70,6 +70,6 @@ tf_module {
   }
   member_method {
     name: "to_categorical"
-    argspec: "args=[\'y\', \'num_classes\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], "
   }
 }
-- 
GitLab


From d1588d72a820423cab36977ca97221aba01be713 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 8 Oct 2018 10:43:03 -0700
Subject: [PATCH 1247/1357] Add a utility that allows finding a name for an
 entity, relative to an existing namespace.

PiperOrigin-RevId: 216211286
---
 .../python/autograph/pyct/inspect_utils.py    | 34 +++++++++++++++++++
 .../autograph/pyct/inspect_utils_test.py      | 19 +++++++++++
 2 files changed, 53 insertions(+)

diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 1416988ea3..29c406c248 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -67,6 +67,40 @@ def getnamespace(f):
   return namespace
 
 
+def getqualifiedname(namespace, object_, max_depth=2):
+  """Returns the name by which a value can be referred to in a given namespace.
+
+  This function will recurse inside modules, but it will not search objects for
+  attributes. The recursion depth is controlled by max_depth.
+
+  Args:
+    namespace: Dict[str, Any], the namespace to search into.
+    object_: Any, the value to search.
+    max_depth: Optional[int], a limit to the recursion depth when searching
+        inside modules.
+  Returns: Union[str, None], the fully-qualified name that resolves to the value
+      o, or None if it couldn't be found.
+  """
+  for name, value in namespace.items():
+    # The value may be referenced by more than one symbol, case in which
+    # any symbol will be fine. If the program contains symbol aliases that
+    # change over time, this may capture a symbol that will later point to
+    # something else.
+    # TODO(mdan): Prefer the symbol that matches the value type name.
+    if object_ is value:
+      return name
+
+  # TODO(mdan): Use breadth-first search and avoid visiting modules twice.
+  if max_depth:
+    for name, value in namespace.items():
+      if tf_inspect.ismodule(value):
+        name_in_module = getqualifiedname(value.__dict__, object_,
+                                          max_depth - 1)
+        if name_in_module is not None:
+          return '{}.{}'.format(name, name_in_module)
+  return None
+
+
 def _get_unbound_function(m):
   # TODO(mdan): Figure out why six.get_unbound_function fails in some cases.
   # The failure case is for tf.keras.Model.
diff --git a/tensorflow/python/autograph/pyct/inspect_utils_test.py b/tensorflow/python/autograph/pyct/inspect_utils_test.py
index f3eb027822..11074debfc 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils_test.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from functools import wraps
+import imp
 
 import six
 
@@ -127,6 +128,24 @@ class InspectUtilsTest(test.TestCase):
     self.assertEqual(ns['closed_over_primitive'], closed_over_primitive)
     self.assertTrue('local_var' not in ns)
 
+  def test_getqualifiedname(self):
+    foo = object()
+    qux = imp.new_module('quxmodule')
+    bar = imp.new_module('barmodule')
+    baz = object()
+    bar.baz = baz
+
+    ns = {
+        'foo': foo,
+        'bar': bar,
+        'qux': qux,
+    }
+
+    self.assertIsNone(inspect_utils.getqualifiedname(ns, inspect_utils))
+    self.assertEqual(inspect_utils.getqualifiedname(ns, foo), 'foo')
+    self.assertEqual(inspect_utils.getqualifiedname(ns, bar), 'bar')
+    self.assertEqual(inspect_utils.getqualifiedname(ns, baz), 'bar.baz')
+
   def test_getmethodclass(self):
 
     self.assertEqual(
-- 
GitLab


From 0691d49fb6e15740b8ddf8019fea4edb91bca914 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 10:43:54 -0700
Subject: [PATCH 1248/1357] Convert TensorFlow's nasm dependency to new third
 party import method.

PiperOrigin-RevId: 216211467
---
 tensorflow/workspace.bzl                        | 15 ++-------------
 third_party/nasm/BUILD                          |  1 +
 third_party/{nasm.BUILD => nasm/BUILD.bazel}    | 12 ++++++------
 .../nasm.BUILD => nasm/BUILD.system}            |  0
 third_party/nasm/workspace.bzl                  | 17 +++++++++++++++++
 5 files changed, 26 insertions(+), 19 deletions(-)
 create mode 100644 third_party/nasm/BUILD
 rename third_party/{nasm.BUILD => nasm/BUILD.bazel} (100%)
 rename third_party/{systemlibs/nasm.BUILD => nasm/BUILD.system} (100%)
 create mode 100644 third_party/nasm/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 6f5aa85b01..adeac62e43 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -23,11 +23,13 @@ load(
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
 load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
+load("//third_party/nasm:workspace.bzl", nasm = "repo")
 
 def initialize_third_party():
     flatbuffers()
     icu()
     jpeg()
+    nasm()
 
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
@@ -235,19 +237,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "nasm",
-        build_file = clean_dep("//third_party:nasm.BUILD"),
-        sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011",
-        strip_prefix = "nasm-2.13.03",
-        system_build_file = clean_dep("//third_party/systemlibs:nasm.BUILD"),
-        urls = [
-            "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
-            "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2",
-            "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
-        ],
-    )
-
     tf_http_archive(
         name = "png_archive",
         build_file = clean_dep("//third_party:png.BUILD"),
diff --git a/third_party/nasm/BUILD b/third_party/nasm/BUILD
new file mode 100644
index 0000000000..e3aec1fce9
--- /dev/null
+++ b/third_party/nasm/BUILD
@@ -0,0 +1 @@
+# Needed to make this a package.
diff --git a/third_party/nasm.BUILD b/third_party/nasm/BUILD.bazel
similarity index 100%
rename from third_party/nasm.BUILD
rename to third_party/nasm/BUILD.bazel
index d746a65e7e..c68d713946 100644
--- a/third_party/nasm.BUILD
+++ b/third_party/nasm/BUILD.bazel
@@ -137,12 +137,6 @@ cc_binary(
         ":windows": ["config/msvc.h"],
         "//conditions:default": [],
     }),
-    includes = [
-        "asm",
-        "include",
-        "output",
-        "x86",
-    ],
     copts = select({
         ":windows": [],
         "//conditions:default": [
@@ -157,6 +151,12 @@ cc_binary(
             "HAVE_SYS_TYPES_H",
         ],
     }),
+    includes = [
+        "asm",
+        "include",
+        "output",
+        "x86",
+    ],
     visibility = ["@jpeg//:__pkg__"],
 )
 
diff --git a/third_party/systemlibs/nasm.BUILD b/third_party/nasm/BUILD.system
similarity index 100%
rename from third_party/systemlibs/nasm.BUILD
rename to third_party/nasm/BUILD.system
diff --git a/third_party/nasm/workspace.bzl b/third_party/nasm/workspace.bzl
new file mode 100644
index 0000000000..6d50f6fcad
--- /dev/null
+++ b/third_party/nasm/workspace.bzl
@@ -0,0 +1,17 @@
+"""loads the nasm library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "nasm",
+        urls = [
+            "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
+            "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2",
+            "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
+        ],
+        sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011",
+        strip_prefix = "nasm-2.13.03",
+        build_file = "//third_party/nasm:BUILD.bazel",
+        system_build_file = "//third_party/nasm:BUILD.system",
+    )
-- 
GitLab


From 3f0155133d668cf6cee1f1fb362d2a75c04836e3 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Mon, 8 Oct 2018 10:52:15 -0700
Subject: [PATCH 1249/1357] Fix support for a single tensor to be passed to
 target_tensors

PiperOrigin-RevId: 216212953
---
 tensorflow/python/keras/engine/training.py             | 6 ++++--
 tensorflow/python/keras/engine/training_distributed.py | 4 ----
 tensorflow/python/keras/engine/training_test.py        | 4 ++++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 2ebb4cf99f..ff2ae54ad4 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -563,9 +563,11 @@ class Model(Network):
         for name in self.output_names:
           tmp_target_tensors.append(target_tensors.get(name, None))
         target_tensors = tmp_target_tensors
+      elif tensor_util.is_tensor(target_tensors):
+        target_tensors = [target_tensors]
       else:
-        raise TypeError('Expected `target_tensors` to be '
-                        'a list or dict, but got:', target_tensors)
+        raise TypeError('Expected `target_tensors` to be a list or tuple or '
+                        'dict or a single tensor, but got:', target_tensors)
 
     for i in range(len(self.outputs)):
       if i in skip_target_indices:
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 04e8d079c0..ac759ef3aa 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -820,10 +820,6 @@ def _clone_and_build_model(model, inputs=None, targets=None):
     optimizer_config = model.optimizer.get_config()
     optimizer = model.optimizer.__class__.from_config(optimizer_config)
 
-  # TODO(priyag): Is there a cleaner way to do this? The API doc suggests a
-  # single tensor should be OK but it throws an error in that case.
-  if targets is not None and not isinstance(targets, (list, dict, tuple)):
-    targets = [targets]
   if isinstance(targets, tuple):
     targets = nest.flatten(targets)
   cloned_model.compile(
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 54ad74c08b..868fd1dc69 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -1865,6 +1865,10 @@ class TestTrainingWithDataTensors(test.TestCase):
       model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target])
       model.train_on_batch(input_val, None)
 
+      # single-output, as single tensor
+      model.compile(optimizer='rmsprop', loss='mse', target_tensors=target)
+      model.train_on_batch(input_val, None)
+
       # single-output, as dict
       model.compile(optimizer='rmsprop', loss='mse',
                     target_tensors={'dense': target})
-- 
GitLab


From 7d92890cb215f2f563fac96f1e3bde712a8749f8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 11:18:12 -0700
Subject: [PATCH 1250/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 216217887
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 0753316724..9df0ece69b 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -28980,6 +28980,74 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "LeakyRelu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LeakyReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "LearnedUnigramCandidateSampler"
   input_arg {
-- 
GitLab


From 1221a8e38a402513560ee71e6982b7cd8b6d901b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 11:54:12 -0700
Subject: [PATCH 1251/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216224026

---
 tensorflow/go/op/wrappers.go | 228 +++++++++++++++++------------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 5d17605e37..fe99915a6c 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -7221,6 +7221,45 @@ func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.
 	return components
 }
 
+// Deprecated. Use TensorArrayGradV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
+func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayWriteV2",
+		Input: []tf.Input{
+			handle, index, value, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Writes the given dataset to the given file using the TFRecord format.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to write.
+//	filename: A scalar string tensor representing the filename to use.
+//	compression_type: A scalar string tensor containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//
+// Returns the created operation.
+func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DatasetToTFRecord",
+		Input: []tf.Input{
+			input_dataset, filename, compression_type,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Computes rectified linear 6: `min(max(features, 0), 6)`.
 func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
@@ -8251,44 +8290,6 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt
 	return op.Output(0)
 }
 
-// Bucketizes 'input' based on 'boundaries'.
-//
-// For example, if the inputs are
-//     boundaries = [0, 10, 100]
-//     input = [[-5, 10000]
-//              [150,   10]
-//              [5,    100]]
-//
-// then the output will be
-//     output = [[0, 3]
-//               [3, 2]
-//               [1, 3]]
-//
-// Arguments:
-//	input: Any shape of Tensor contains with int or float type.
-//	boundaries: A sorted list of floats gives the boundary of the buckets.
-//
-// Returns Same shape with 'input', each value of input replaced with bucket index.
-//
-// @compatibility(numpy)
-// Equivalent to np.digitize.
-// @end_compatibility
-func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"boundaries": boundaries}
-	opspec := tf.OpSpec{
-		Type: "Bucketize",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2.
 type FusedBatchNormV2Attr func(optionalAttr)
 
@@ -10980,6 +10981,44 @@ func Tan(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
+// Bucketizes 'input' based on 'boundaries'.
+//
+// For example, if the inputs are
+//     boundaries = [0, 10, 100]
+//     input = [[-5, 10000]
+//              [150,   10]
+//              [5,    100]]
+//
+// then the output will be
+//     output = [[0, 3]
+//               [3, 2]
+//               [1, 3]]
+//
+// Arguments:
+//	input: Any shape of Tensor contains with int or float type.
+//	boundaries: A sorted list of floats gives the boundary of the buckets.
+//
+// Returns Same shape with 'input', each value of input replaced with bucket index.
+//
+// @compatibility(numpy)
+// Equivalent to np.digitize.
+// @end_compatibility
+func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"boundaries": boundaries}
+	opspec := tf.OpSpec{
+		Type: "Bucketize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // EncodeJpegAttr is an optional argument to EncodeJpeg.
 type EncodeJpegAttr func(optionalAttr)
 
@@ -21413,43 +21452,6 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the minimum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the min is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentMin",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
 type SdcaOptimizerAttr func(optionalAttr)
 
@@ -21924,6 +21926,43 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Computes the minimum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the min is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMin",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the sum along segments of a tensor.
 //
 // Read
@@ -29878,28 +29917,6 @@ func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) {
 	return op.Output(0)
 }
 
-// Writes the given dataset to the given file using the TFRecord format.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to write.
-//	filename: A scalar string tensor representing the filename to use.
-//	compression_type: A scalar string tensor containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//
-// Returns the created operation.
-func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DatasetToTFRecord",
-		Input: []tf.Input{
-			input_dataset, filename, compression_type,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // AvgPool3DAttr is an optional argument to AvgPool3D.
 type AvgPool3DAttr func(optionalAttr)
 
@@ -31692,23 +31709,6 @@ func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayGradV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
-func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayWriteV2",
-		Input: []tf.Input{
-			handle, index, value, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // SparseReduceMaxAttr is an optional argument to SparseReduceMax.
 type SparseReduceMaxAttr func(optionalAttr)
 
-- 
GitLab


From 723fd1245ed650ad07e5049faec021f4f0f6d408 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Mon, 8 Oct 2018 12:03:09 -0700
Subject: [PATCH 1252/1357] Fix the steps_per_epoch when training on mnist

PiperOrigin-RevId: 216225505
---
 tensorflow/contrib/distribute/python/examples/keras_mnist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/python/examples/keras_mnist.py b/tensorflow/contrib/distribute/python/examples/keras_mnist.py
index a84ef04196..da7f8c548f 100644
--- a/tensorflow/contrib/distribute/python/examples/keras_mnist.py
+++ b/tensorflow/contrib/distribute/python/examples/keras_mnist.py
@@ -113,7 +113,7 @@ def main(_):
                 distribute=strategy)
 
   # Train the model with the train dataset.
-  model.fit(x=train_ds, epochs=20, steps_per_epoch=310)
+  model.fit(x=train_ds, epochs=20, steps_per_epoch=468)
 
   # Evaluate the model with the eval dataset.
   score = model.evaluate(eval_ds, steps=10, verbose=0)
-- 
GitLab


From dcd3b4307a3095e3f18aef53f5034787e3cc3af6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 12:09:54 -0700
Subject: [PATCH 1253/1357] Remove the restrictions that constant resolution of
 reduce_sum operators must be on axis 0, and can only be on 1 or 2-d inputs.

PiperOrigin-RevId: 216226776
---
 .../resolve_constant_unary.cc                 |  93 +++++++++---
 .../toco/graph_transformations/tests/BUILD    |  13 ++
 .../tests/resolve_constant_unary_test.cc      | 140 ++++++++++++++++++
 3 files changed, 229 insertions(+), 17 deletions(-)
 create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index c698a9567a..5364eebbc9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -27,6 +27,73 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 
 namespace toco {
+namespace {
+
+// Using the function reducer, reduce input along all axes in axes.
+// Put the reduced data in output, which should aleady be appropriately sized.
+// check_output_shape is set to what this code computes the final shape
+// to be, so it can be cross checked with the shape computation logic.
+void ReduceGeneric(bool keep_dims, const std::vector<int>& axes,
+                   const Shape& input_shape, const std::vector<float>& input,
+                   Shape* check_output_shape, std::vector<float>* output,
+                   const std::function<float(float, float)>& reducer) {
+  if (!IsNonEmpty(input_shape)) {
+    // Zero-dimensions will break the NextIndices() logic, so just early out if
+    // we have an empty shape.
+    return;
+  }
+
+  // Set up output_shape to be the same length as input_shape, with
+  // appropriate dimensions squashed to 1.  If keep_dims is false, we'll strip
+  // out the one dimensions at the end, but it's convenient to leave them for
+  // now.  We recompute the shape because we need the output shape to have
+  // 1-dims in all the squashed dimensions; the shape from shape computation may
+  // remove those squashed dimensions, depending on the options used.
+  Shape output_shape = input_shape;
+
+  // Reduction mask will be elementwise multiplied against the input
+  // indices to figure out the output index for the element.
+  std::vector<int> reduction_mask(input_shape.dimensions_count(), 1);
+  for (int axis : axes) {
+    CHECK_GE(axis, 0);
+    CHECK_LT(axis, input_shape.dimensions_count());
+    reduction_mask[axis] = 0;
+    output_shape.mutable_dims()->at(axis) = 1;
+  }
+
+  std::vector<int> output_indices(input_shape.dimensions_count());
+  for (int input_offset = 0; input_offset < input.size(); ++input_offset) {
+    std::vector<int> input_indices = ReverseOffset(input_shape, input_offset);
+    // Calculate the output location by squashing input indices to 0
+    // in reduced axes.
+    for (int i = 0; i < input_shape.dimensions_count(); ++i) {
+      output_indices[i] = input_indices[i] * reduction_mask[i];
+    }
+    int output_offset = Offset(output_shape, output_indices);
+    if (input_indices == output_indices) {
+      // Base element for the reduced axes
+      output->at(output_offset) = input.at(input_offset);
+    } else {
+      // Reduce with existing element.
+      output->at(output_offset) =
+          reducer(output->at(output_offset), input.at(input_offset));
+    }
+  }
+
+  if (!keep_dims) {
+    // Strip out the dims from output_shape.
+    std::vector<int> new_dims;
+    for (int i = 0; i < output_shape.dimensions_count(); ++i) {
+      if (reduction_mask[i]) {
+        new_dims.push_back(output_shape.dims(i));
+      }
+    }
+    output_shape.mutable_dims()->swap(new_dims);
+  }
+  *check_output_shape = output_shape;
+}
+
+}  // namespace
 
 bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
   auto& output_array = model->GetArray(op.outputs[0]);
@@ -176,27 +243,19 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     }
     auto& axis_array = model->GetArray(unary_op->inputs[1]);
     CHECK(axis_array.data_type == ArrayDataType::kInt32);
-    int axis = axis_array.GetBuffer<ArrayDataType::kInt32>().data[0];
-    CHECK_LT(axis, input_shape.dimensions_count()) << "Axis out of bounds";
 
-    // We currently only handle reduction on axis 0.
-    CHECK_EQ(axis, 0) << "Only reduction along axis 0 is supported";
-    // We currently only handle 1-D and 2-D input tensors.
-    CHECK_LE(input_shape.dimensions_count(), 2) << "Rank >2 not yet supported";
     // We only support keep_dims=true; shape prop will need to change otherwise.
     auto sum_op = static_cast<const TensorFlowSumOperator*>(unary_op);
-    CHECK(sum_op->keep_dims) << "Only keep_dims=true is supported";
+    Shape check_output_shape;
 
-    std::vector<int> indices(input_shape.dimensions_count());
-    for (int i = 0; i < input_shape.dims(1); ++i) {
-      indices[1] = i;
-      float sum = 0.f;
-      for (int j = 0; j < input_shape.dims(0); ++j) {
-        indices[0] = j;
-        sum += (*input_float_data)[Offset(input_shape, indices)];
-      }
-      output_float_data[i] = sum;
-    }
+    ReduceGeneric(
+        sum_op->keep_dims, axis_array.GetBuffer<ArrayDataType::kInt32>().data,
+        input_shape, *input_float_data, &check_output_shape, &output_float_data,
+        [](float existing, float current) -> float {
+          return existing + current;
+        });
+    CHECK(check_output_shape == output_shape)
+        << "Shape propagation output shape doesn't match output shape from op";
   } else if (unary_op->type == OperatorType::kReduceMin) {
     // At the moment only full reduction across all dimensions is supported.
     // TODO(starka): Output should not be padded.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD
index acf1e3ede5..6f1be298ca 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD
@@ -30,3 +30,16 @@ tf_cc_test(
         "@com_google_googletest//:gtest_main",
     ],
 )
+
+tf_cc_test(
+    name = "resolve_constant_unary_test",
+    srcs = ["resolve_constant_unary_test.cc"],
+    tags = ["no_oss"],
+    deps = [
+        "//tensorflow/contrib/lite/toco:graph_transformations",
+        "//tensorflow/contrib/lite/toco:model",
+        "//tensorflow/contrib/lite/toco:tooling_util",
+        "@com_google_absl//absl/memory",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
new file mode 100644
index 0000000000..a53abc9941
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
@@ -0,0 +1,140 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <tuple>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "absl/memory/memory.h"
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+
+namespace toco {
+
+namespace {
+
+void RunResolveSum(const std::vector<float>& input,
+                   const std::vector<int>& input_shape,
+                   const std::vector<int>& axis,
+                   const std::vector<int>& output_shape,
+                   const std::vector<float>& expected_output) {
+  Model model;
+  Array& input0 = model.GetOrCreateArray("input0");
+  Array& input1 = model.GetOrCreateArray("input1");
+  Array& output = model.GetOrCreateArray("output");
+
+  *input0.mutable_shape()->mutable_dims() = input_shape;
+  input0.data_type = ArrayDataType::kFloat;
+  input0.GetMutableBuffer<ArrayDataType::kFloat>().data = input;
+
+  *input1.mutable_shape()->mutable_dims() = {static_cast<int>(axis.size())};
+  input1.GetMutableBuffer<ArrayDataType::kInt32>().data = axis;
+  input1.data_type = ArrayDataType::kInt32;
+
+  *output.mutable_shape()->mutable_dims() = output_shape;
+
+  auto sum_op = absl::make_unique<TensorFlowSumOperator>();
+  sum_op->keep_dims = true;
+  sum_op->inputs = {"input0", "input1"};
+  sum_op->outputs = {"output"};
+  model.operators.push_back(std::move(sum_op));
+  ResolveConstantUnaryOperator().Run(&model, 0);
+  EXPECT_EQ(model.GetArray("output").GetBuffer<ArrayDataType::kFloat>().data,
+            expected_output);
+  EXPECT_EQ(model.GetArray("output").shape().dims(), output_shape);
+}
+
+// Reduce a 2d array across axis 0
+TEST(ResolveConstantUnary, ResolveSumAxis0_2D) {
+  // clang-format off
+  RunResolveSum(
+      // Input data
+      {3, 1, 4, 1,
+       5, 9, 2, 6,
+       5, 3, 5, 8},
+
+      // Input shape
+      {3, 4},
+
+      // Axes
+      {0},
+
+      // Expected output shape,
+      {1, 4},
+
+      // Expected output
+      {13, 13, 11, 15});
+  // clang-format on
+}
+
+// Reduce a 2d array across axis 1
+TEST(ResolveConstantUnary, ResolveSumAxis1_2D) {
+  // clang-format off
+  RunResolveSum(
+      // Input data
+      {3, 1, 4, 1,
+       5, 9, 2, 6,
+       5, 3, 5, 8},
+
+      // Input shape
+      {3, 4},
+
+      // Axes
+      {1},
+
+      // Expected output shape,
+      {3, 1},
+
+      // Expected output
+      {9, 22, 21});
+  // clang-format on
+}
+
+// Reduce a 3d tensor across axes 0 and 2.
+TEST(ResolveConstantUnary, ResolveSumAxis0_2_3D) {
+  // clang-format off
+  RunResolveSum(
+      // Input data
+      {  0,   1,   2,
+         3,  10,  11,
+        12,  13,  20,
+        21,  22,  23,
+
+       100, 101, 102,
+       103, 110, 111,
+       112, 113, 120,
+       121, 122, 123,
+
+       200, 201, 202,
+       203, 210, 211,
+       212, 213, 220,
+       221, 222, 223 },
+
+      // Input shape
+      {3, 4, 3},
+
+      // Axes
+      {0, 2},
+
+      // Expected output shape,
+      {1, 4, 1},
+
+      // Expected output, generated using octave.
+      { 909, 972, 1035, 1098});
+  // clang-format on
+}
+
+}  // namespace
+}  // namespace toco
-- 
GitLab


From d3595b1534a855f3d0da35d3f1dd8b5d464b1b70 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 12:34:00 -0700
Subject: [PATCH 1254/1357] Fix a couple of reference leaks

PiperOrigin-RevId: 216230391
---
 tensorflow/python/pywrap_tfe.i | 1 +
 tensorflow/python/util/util.cc | 8 +++-----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index 61e0abbfcb..adbce95c6f 100755
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -209,6 +209,7 @@ limitations under the License.
     SWIG_fail;
   } else {
     int num_outputs = $1->size();
+    Py_CLEAR($result);
     $result = PyList_New(num_outputs);
     for (int i = 0; i < num_outputs; ++i) {
       PyObject *output;
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 7b3e618e84..11eb9ce947 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -825,18 +825,16 @@ PyObject* IsNamedtuple(PyObject* o, bool strict) {
 }
 
 PyObject* SameNamedtuples(PyObject* o1, PyObject* o2) {
-  PyObject* f1 = PyObject_GetAttrString(o1, "_fields");
-  PyObject* f2 = PyObject_GetAttrString(o2, "_fields");
+  Safe_PyObjectPtr f1 = make_safe(PyObject_GetAttrString(o1, "_fields"));
+  Safe_PyObjectPtr f2 = make_safe(PyObject_GetAttrString(o2, "_fields"));
   if (f1 == nullptr || f2 == nullptr) {
-    Py_XDECREF(f1);
-    Py_XDECREF(f2);
     PyErr_SetString(
         PyExc_RuntimeError,
         "Expected namedtuple-like objects (that have _fields attr)");
     return nullptr;
   }
 
-  if (PyObject_RichCompareBool(f1, f2, Py_NE)) {
+  if (PyObject_RichCompareBool(f1.get(), f2.get(), Py_NE)) {
     Py_RETURN_FALSE;
   }
 
-- 
GitLab


From 9b558126e31d25ec4e82cb4f50033d6eca44349a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 12:58:29 -0700
Subject: [PATCH 1255/1357] Add timeout mechanism to Grappler meta optimizer.
 This is only a best-effort mechanism, since the meta optimizer only checks if
 it has been cancelled before running each sub-optimizer. We can add
 cancellation to each sub-optimizer if necessary.

PiperOrigin-RevId: 216234262
---
 .../grappler/optimizers/graph_optimizer.h     | 21 ++++++
 .../grappler/optimizers/meta_optimizer.cc     | 68 ++++++++++++++++++-
 .../core/grappler/optimizers/meta_optimizer.h | 15 +++-
 .../optimizers/meta_optimizer_test.cc         | 62 +++++++++++++++++
 .../core/protobuf/rewriter_config.proto       |  4 ++
 5 files changed, 165 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h
index 765dd13263..bd6bf9f860 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h
@@ -16,8 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_
 
+#include <atomic>
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -29,6 +32,7 @@ struct GrapplerItem;
 // optimization of a GrapplerItem for running on a cluster.
 class GraphOptimizer {
  public:
+  GraphOptimizer() : is_cancelled_(false) {}
   virtual ~GraphOptimizer() {}
 
   virtual string name() const = 0;
@@ -45,8 +49,25 @@ class GraphOptimizer {
   // call to Optimize) performed.  Lower "result" scores are better.
   virtual void Feedback(Cluster* cluster, const GrapplerItem& item,
                         const GraphDef& optimized_graph, double result) = 0;
+
+  // Best effort cancellation. Sets is_cancelled to true and requests that the
+  // optimizer returns as soon as possible from active calls to Optimize() or
+  // FeedBack().
+  void Cancel() { is_cancelled_ = true; }
+
+  bool is_cancelled() const { return is_cancelled_; }
+
+ private:
+  std::atomic<bool> is_cancelled_;
 };
 
+#define GRAPPLER_RETURN_IF_CANCELLED()                                  \
+  do {                                                                  \
+    if (is_cancelled()) {                                               \
+      return errors::DeadlineExceeded(this->name(), " was cancelled."); \
+    }                                                                   \
+  } while (0)
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 3f33b16ba8..7488cedec5 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/meta_optimizer.h"
+
+#include <memory>
+
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
@@ -37,7 +40,11 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/notification.h"
+#include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
@@ -115,6 +122,21 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
 
 #undef MK_OPT
 
+MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
+    : cpu_device_(cpu_device), cfg_(cfg) {
+  // TODO(rmlarsen): Increase kNumThreads to, say, port::NumSchedulableCPUs()
+  // if we want to the threadpool for parallelizing Grappler
+  const int kNumThreads = 1;
+  thread_pool_ = absl::make_unique<thread::ThreadPool>(
+      Env::Default(), "MetaOptimizerThreadPool", kNumThreads);
+}
+
+MetaOptimizer::~MetaOptimizer() {
+  // The ThreadPool destructor waits for threads to finish, so we don't
+  // pull the rug out from under them.
+  thread_pool_.reset();
+}
+
 Status MetaOptimizer::InitializeOptimizers(
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
   if (cfg_.disable_meta_optimizer()) {
@@ -310,6 +332,7 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
 
     VLOG(4) << "Starting optimization iteration " << iteration;
     for (const auto& optimizer : optimizers) {
+      GRAPPLER_RETURN_IF_CANCELLED();
       // Some optimizers can run only once.
       if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue;
       // Some must run only on the last iteration.
@@ -368,6 +391,7 @@ Status MetaOptimizer::RunOptimizer(
   // resets optimized_graph to an empty graph.
   optimized_graph->Swap(&optimized_item->graph);
   *optimized_graph = GraphDef();
+  // TODO(rmlarsen): Add timeout for individual optimizers.
   Status status =
       optimizer->Optimize(cluster, *optimized_item, optimized_graph);
   uint64 end_us = Env::Default()->NowMicros();
@@ -389,14 +413,15 @@ Status MetaOptimizer::RunOptimizer(
   return status;
 }
 
-Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
-                               GraphDef* optimized_graph) {
+Status MetaOptimizer::OptimizeMainGraphAndFunctionLibrary(
+    Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) {
   VLOG(1) << "Starting optimization for grappler item: " << item.id;
   optimization_results_.clear();
 
   // 1. Optimize main graph
   TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph));
   VLOG(1) << "Optimized main graph.";
+  GRAPPLER_RETURN_IF_CANCELLED();
 
   // Skip optimizing functions if this is a TPU graph. Currently, Grappler
   // passes do not handle TPU functions correctly in a variety of ways (Note
@@ -432,6 +457,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     optimize_function_library = false;
 
     for (const FunctionDef& func : optimized_graph->library().function()) {
+      GRAPPLER_RETURN_IF_CANCELLED();
+
       const string& func_name = func.signature().name();
 
       // Skip already optimized functions.
@@ -506,6 +533,43 @@ void MetaOptimizer::PrintResult() {
   }
 }
 
+Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
+                               GraphDef* optimized_graph) {
+  const int64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000;
+  const int64 timeout_usec = (cfg_.meta_optimizer_timeout_ms() == 0
+                                  ? kFiveMinutesInUsec
+                                  : cfg_.meta_optimizer_timeout_ms() * 1000);
+  if (timeout_usec < 0) {
+    return OptimizeMainGraphAndFunctionLibrary(cluster, item, optimized_graph);
+  }
+
+  GraphDef optimized_with_timeout;
+  Status status;
+  Notification done;
+  thread_pool_->Schedule(
+      [this, cluster, &done, &optimized_with_timeout, &item, &status]() {
+        status = this->OptimizeMainGraphAndFunctionLibrary(
+            cluster, item, &optimized_with_timeout);
+        done.Notify();
+      });
+
+  const bool notified = WaitForNotificationWithTimeout(&done, timeout_usec);
+  if (notified && status.ok()) {
+    optimized_graph->Swap(&optimized_with_timeout);
+  } else {
+    *optimized_graph = item.graph;
+    if (!notified) {
+      this->Cancel();
+      done.WaitForNotification();
+      status = errors::DeadlineExceeded(
+          "Grappler MetaOptimizer timed out after ",
+          static_cast<float>(timeout_usec) / (1000 * 1000), " seconds");
+      LOG(WARNING) << status.error_message();
+    }
+  }
+  return status;
+}
+
 void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item,
                              const GraphDef& pruned_graph, double result) {
   // Nothing to do for MetaOptimizer.
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h
index 99a0a33ffa..35d6a4559b 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
 namespace tensorflow {
@@ -28,9 +29,8 @@ namespace grappler {
 // Run the other grappler optimizers based on the specified rewriter config.
 class MetaOptimizer : public GraphOptimizer {
  public:
-  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
-      : cpu_device_(cpu_device), cfg_(cfg) {}
-  ~MetaOptimizer() override = default;
+  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg);
+  ~MetaOptimizer();
 
   string name() const override { return "meta_optimizer"; };
 
@@ -65,9 +65,18 @@ class MetaOptimizer : public GraphOptimizer {
   Status OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
                        GraphDef* optimized_graph);
 
+  // Run optimization passes over the main graph and for functions in the
+  // function library.
+  Status OptimizeMainGraphAndFunctionLibrary(Cluster* cluster,
+                                             const GrapplerItem& item,
+                                             GraphDef* optimized_graph);
+
   DeviceBase* const cpu_device_;  // may be NULL
   RewriterConfig cfg_;
 
+  // Thread pool used for launching optimizers asynchronously.
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
+
   struct OptimizerResult {
     string optimizer_name;
     string result;
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index 3f3f43382f..7f1dd91f09 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -461,6 +461,68 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) {
   EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites);
 }
 
+class SleepingOptimizer : public CustomGraphOptimizer {
+ public:
+  SleepingOptimizer() {}
+  string name() const override { return "test_optimizer"; }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override {
+    *optimized_graph = item.graph;
+    optimized_graph->add_node();
+    sleep(1);
+    return Status::OK();
+  }
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override {}
+};
+
+REGISTER_GRAPH_OPTIMIZER(SleepingOptimizer);
+
+TEST_F(MetaOptimizerTest, OptimizerTimesOut) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  RewriterConfig rewriter_config;
+  rewriter_config.add_optimizers("SleepingOptimizer");
+  rewriter_config.set_min_graph_nodes(-1);
+  rewriter_config.set_meta_optimizer_timeout_ms(1500);
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO);
+
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+  GraphDef output;
+  const Status status = optimizer.Optimize(nullptr, item, &output);
+  EXPECT_EQ(status.error_message(),
+            "Grappler MetaOptimizer timed out after 1.5 seconds");
+  // Make sure the graph was reverted to the original regardless of when the
+  // optimizer timed out.
+  CompareGraphs(item.graph, output);
+}
+
+TEST_F(MetaOptimizerTest, OptimizerDoesNotTimeOut) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  RewriterConfig rewriter_config;
+  rewriter_config.add_optimizers("SleepingOptimizer");
+  rewriter_config.set_min_graph_nodes(-1);
+  rewriter_config.set_meta_optimizer_timeout_ms(1500);
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::ONE);
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+  GraphDef output;
+  const Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  EXPECT_EQ(item.graph.node_size() + 1, output.node_size());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 8c31468ff5..7ccd54b818 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -83,6 +83,10 @@ message RewriterConfig {
   // Controls how many times we run the optimizers in meta optimizer (default
   // is once).
   NumIterationsType meta_optimizer_iterations = 12;
+  // Maximum number of milliseconds to spend optimizing a single graph before
+  // timing out. If equal to 0 the system picks a default (currently 5 minutes).
+  // If less than 0 the optimizer will never time out.
+  int64 meta_optimizer_timeout_ms = 20;
 
   // The minimum number of nodes in a graph to optimizer. For smaller graphs,
   // optimization is skipped.
-- 
GitLab


From 76ab96c8a5b2d77dfc191c94ff54fd5e52c561f2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 13:31:52 -0700
Subject: [PATCH 1256/1357] Changed Adam algorithm variant formula from
 sqrt(max(v, epsilon**2)) to sqrt(v + epsilon**2) and changed flag name
 accordingly.

PiperOrigin-RevId: 216240045
---
 tensorflow/contrib/tpu/proto/optimization_parameters.proto | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index 8529b48c15..c2e3be03db 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -62,9 +62,9 @@ message FtrlParameters {
 // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If
 // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in
 // order to get correct results; a warning will be printed otherwise (which may
-// change to an error in the future). If use_max_with_epsilon is set, the Adam
+// change to an error in the future). If use_sum_inside_sqrt is set, the Adam
 // variable update formula will be changed from m / (sqrt(v) + epsilon) to
-// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU
+// m / sqrt(v + epsilon**2); this option improves the performance of TPU
 // training and is not expected to harm model quality.
 message AdamParameters {
   float beta1 = 3;
@@ -73,7 +73,7 @@ message AdamParameters {
   float initial_m = 6;
   float initial_v = 7;
   bool use_non_lazy_adam = 8;
-  bool use_max_with_epsilon = 9;
+  bool use_sum_inside_sqrt = 10;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
-- 
GitLab


From b052c51374f558c25a29c70918d79205dfec808b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 13:46:12 -0700
Subject: [PATCH 1257/1357] Add tf.BenchmarkConfig that returns a session
 config appropriate for benchmarking. At the moment, it returns a default
 config with only Grappler dependency optimizer disabled. Many benchmarks wrap
 the subgraph they want to time in control_flow_ops.group() to avoid including
 the overhead of copying the output back to the Python client in the
 measurement. In the graph, this only adds a control dependency between the
 subgraph output and the fetch node, which in turn (often) causes the
 dependency optimizer to turn all nodes in the graph into no-ops.

PiperOrigin-RevId: 216242463
---
 .../python/kernel_tests/benchmark_test.py     |  2 +-
 .../python/kernel_tests/cholesky_op_test.py   |  7 ++-
 .../kernel_tests/determinant_op_test.py       |  9 +--
 .../kernel_tests/matrix_band_part_op_test.py  |  5 +-
 .../matrix_exponential_op_test.py             |  5 +-
 .../kernel_tests/matrix_inverse_op_test.py    |  5 +-
 .../kernel_tests/matrix_logarithm_op_test.py  |  3 +-
 .../kernel_tests/matrix_solve_ls_op_test.py   |  5 +-
 .../kernel_tests/matrix_solve_op_test.py      |  5 +-
 .../sparse_tensors_map_ops_test.py            |  3 +-
 .../python/kernel_tests/where_op_test.py      |  5 +-
 tensorflow/python/ops/image_ops_test.py       | 62 +++++++++----------
 tensorflow/python/platform/benchmark.py       | 14 +++++
 .../tools/api/golden/v1/tensorflow.test.pbtxt |  4 ++
 .../tools/api/golden/v2/tensorflow.test.pbtxt |  4 ++
 15 files changed, 84 insertions(+), 54 deletions(-)

diff --git a/tensorflow/python/kernel_tests/benchmark_test.py b/tensorflow/python/kernel_tests/benchmark_test.py
index 78b6e38d94..5777a5d097 100644
--- a/tensorflow/python/kernel_tests/benchmark_test.py
+++ b/tensorflow/python/kernel_tests/benchmark_test.py
@@ -64,7 +64,7 @@ class TestReportingBenchmark(test.Benchmark):
                 "other_key": "string"})
 
   def benchmark_times_an_op(self):
-    with session.Session() as sess:
+    with session.Session(config=benchmark.benchmark_config()) as sess:
       a = constant_op.constant(0.0)
       a_plus_a = a + a
       return self.run_op_benchmark(
diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py
index 782e6b5068..2ebf74a4d7 100644
--- a/tensorflow/python/kernel_tests/cholesky_op_test.py
+++ b/tensorflow/python/kernel_tests/cholesky_op_test.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
@@ -327,7 +328,7 @@ class CholeskyBenchmark(test.Benchmark):
   def benchmarkCholeskyOp(self):
     for shape in self.shapes:
       with ops.Graph().as_default(), \
-          session.Session() as sess, \
+          session.Session(config=benchmark.benchmark_config()) as sess, \
           ops.device("/cpu:0"):
         matrix = variables.Variable(self._GenerateMatrix(shape))
         l = linalg_ops.cholesky(matrix)
@@ -341,7 +342,7 @@ class CholeskyBenchmark(test.Benchmark):
 
       if test.is_gpu_available(True):
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/device:GPU:0"):
           matrix = variables.Variable(self._GenerateMatrix(shape))
           l = linalg_ops.cholesky(matrix)
@@ -359,7 +360,7 @@ class CholeskyBenchmark(test.Benchmark):
       for shape in self.shapes:
         matrix = self._GenerateMatrix(shape)
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device(device):
           l = variables.Variable(np.linalg.cholesky(matrix))
           grad_matrix = variables.Variable(
diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py
index a52b2c0dc3..fb114f9f24 100644
--- a/tensorflow/python/kernel_tests/determinant_op_test.py
+++ b/tensorflow/python/kernel_tests/determinant_op_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -185,8 +186,8 @@ class MatrixDeterminantBenchmark(test.Benchmark):
 
   def benchmarkMatrixDeterminantOp(self):
     for shape in self.shapes:
-      with ops.Graph().as_default(), session.Session() as sess, ops.device(
-          "/cpu:0"):
+      with ops.Graph().as_default(), session.Session(
+          config=benchmark.benchmark_config()) as sess, ops.device("/cpu:0"):
         matrix = self._GenerateMatrix(shape)
         d = linalg_ops.matrix_determinant(matrix)
         variables.global_variables_initializer().run()
@@ -198,8 +199,8 @@ class MatrixDeterminantBenchmark(test.Benchmark):
             name="matrix_determinant_cpu_{shape}".format(shape=shape))
 
       if test.is_gpu_available(True):
-        with ops.Graph().as_default(), session.Session() as sess, ops.device(
-            "/gpu:0"):
+        with ops.Graph().as_default(), session.Session(
+            config=benchmark.benchmark_config()) as sess, ops.device("/gpu:0"):
           matrix = self._GenerateMatrix(shape)
           d = linalg_ops.matrix_determinant(matrix)
           variables.global_variables_initializer().run()
diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
index 68d626de2c..a0ef3a607e 100644
--- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test as test_lib
 
 
@@ -109,7 +110,7 @@ class MatrixBandPartBenchmark(test_lib.Benchmark):
     for shape_ in self.shapes:
       for limits in (-1, -1), (-1, 0), (0, -1), (2, 2):
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/cpu:0"):
           matrix = variables.Variable(array_ops.ones(shape_))
           band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
@@ -123,7 +124,7 @@ class MatrixBandPartBenchmark(test_lib.Benchmark):
 
         if test_lib.is_gpu_available(True):
           with ops.Graph().as_default(), \
-              session.Session() as sess, \
+              session.Session(config=benchmark.benchmark_config()) as sess, \
               ops.device("/gpu:0"):
             matrix = variables.Variable(array_ops.ones(shape_))
             band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
index 0386e91276..9630c052b8 100644
--- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg_impl
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -181,7 +182,7 @@ class MatrixExponentialBenchmark(test.Benchmark):
   def benchmarkMatrixExponentialOp(self):
     for shape in self.shapes:
       with ops.Graph().as_default(), \
-          session.Session() as sess, \
+          session.Session(config=benchmark.benchmark_config()) as sess, \
           ops.device("/cpu:0"):
         matrix = self._GenerateMatrix(shape)
         expm = linalg_impl.matrix_exponential(matrix)
@@ -195,7 +196,7 @@ class MatrixExponentialBenchmark(test.Benchmark):
 
       if test.is_gpu_available(True):
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/gpu:0"):
           matrix = self._GenerateMatrix(shape)
           expm = linalg_impl.matrix_exponential(matrix)
diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
index 720ba806e9..8bda04b53d 100644
--- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -179,7 +180,7 @@ class MatrixInverseBenchmark(test.Benchmark):
     for adjoint in False, True:
       for shape in self.shapes:
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/cpu:0"):
           matrix = self._GenerateMatrix(shape)
           inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
@@ -193,7 +194,7 @@ class MatrixInverseBenchmark(test.Benchmark):
 
         if test.is_gpu_available(True):
           with ops.Graph().as_default(), \
-              session.Session() as sess, \
+              session.Session(config=benchmark.benchmark_config()) as sess, \
               ops.device("/gpu:0"):
             matrix = self._GenerateMatrix(shape)
             inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
index 723a15fbd1..3205e211d9 100644
--- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg_impl
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -159,7 +160,7 @@ class MatrixLogarithmBenchmark(test.Benchmark):
   def benchmarkMatrixLogarithmOp(self):
     for shape in self.shapes:
       with ops.Graph().as_default(), \
-          session.Session() as sess, \
+          session.Session(config=benchmark.benchmark_config()) as sess, \
           ops.device("/cpu:0"):
         matrix = self._GenerateMatrix(shape)
         logm = gen_linalg_ops.matrix_logarithm(matrix)
diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
index de495968a7..225a10e117 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test as test_lib
 
 
@@ -313,7 +314,7 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark):
       for num_rhs in 1, 2, matrix_shape[-1]:
 
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/cpu:0"):
           matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
           x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
@@ -328,7 +329,7 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark):
 
         if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
           with ops.Graph().as_default(), \
-                session.Session() as sess, \
+                session.Session(config=benchmark.benchmark_config()) as sess, \
                 ops.device("/gpu:0"):
             matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
             x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
diff --git a/tensorflow/python/kernel_tests/matrix_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
index b8f2736b7b..264df2565c 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -167,7 +168,7 @@ class MatrixSolveBenchmark(test.Benchmark):
         for num_rhs in 1, 2, matrix_shape[-1]:
 
           with ops.Graph().as_default(), \
-              session.Session() as sess, \
+              session.Session(config=benchmark.benchmark_config()) as sess, \
               ops.device("/cpu:0"):
             matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs)
             x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint)
@@ -185,7 +186,7 @@ class MatrixSolveBenchmark(test.Benchmark):
 
           if run_gpu_test:
             with ops.Graph().as_default(), \
-                session.Session() as sess, \
+                session.Session(config=benchmark.benchmark_config()) as sess, \
                 ops.device("/gpu:0"):
               matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs)
               x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint)
diff --git a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
index 31e84341ae..fdfe1001b8 100644
--- a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 # pylint: disable=protected-access
@@ -192,7 +193,7 @@ class BenchmarkSparseTensorsMapVsSerialization(test.Benchmark):
         sorted(zip(indices_batch, indices_value)), dtype=np.int64)
     values = ["feature_value_for_embedding_lookup"] * num_elements
     shape = np.asarray([batch_size, num_elements], dtype=np.int64)
-    with session.Session() as sess:
+    with session.Session(config=benchmark.benchmark_config()) as sess:
       with ops.device("/cpu:0"):
         indices = variables.Variable(indices)
         values = variables.Variable(values)
diff --git a/tensorflow/python/kernel_tests/where_op_test.py b/tensorflow/python/kernel_tests/where_op_test.py
index 29fb002ef4..04ac589432 100644
--- a/tensorflow/python/kernel_tests/where_op_test.py
+++ b/tensorflow/python/kernel_tests/where_op_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -160,7 +161,7 @@ class WhereBenchmark(test.Benchmark):
           x = random_ops.random_uniform((m, n), dtype=dtypes.float32) <= p
           v = resource_variable_ops.ResourceVariable(x)
           op = array_ops.where(v)
-        with session.Session() as sess:
+        with session.Session(config=benchmark.benchmark_config()) as sess:
           v.initializer.run()
           r = self.run_op_benchmark(sess, op, min_iters=100, name=name)
           gb_processed_input = m * n / 1.0e9
@@ -186,7 +187,7 @@ class WhereBenchmark(test.Benchmark):
           y = resource_variable_ops.ResourceVariable(y_gen)
           c = resource_variable_ops.ResourceVariable(c_gen)
           op = array_ops.where(c, x, y)
-        with session.Session() as sess:
+        with session.Session(config=benchmark.benchmark_config()) as sess:
           x.initializer.run()
           y.initializer.run()
           c.initializer.run()
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 35fdee4fad..ff86df6346 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -602,20 +602,19 @@ class AdjustHueBenchmark(test.Benchmark):
     if cpu_count is not None:
       config.inter_op_parallelism_threads = 1
       config.intra_op_parallelism_threads = cpu_count
-    with session.Session("", graph=ops.Graph(), config=config) as sess:
-      with ops.device(device):
-        inputs = variables.Variable(
-            random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
-            trainable=False,
-            dtype=dtypes.float32)
-        delta = constant_op.constant(0.1, dtype=dtypes.float32)
-        outputs = image_ops.adjust_hue(inputs, delta)
-        run_op = control_flow_ops.group(outputs)
-        sess.run(variables.global_variables_initializer())
-        for i in xrange(warmup_rounds + benchmark_rounds):
-          if i == warmup_rounds:
-            start = time.time()
-          sess.run(run_op)
+    with self.benchmark_session(config=config, device=device) as sess:
+      inputs = variables.Variable(
+          random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
+          trainable=False,
+          dtype=dtypes.float32)
+      delta = constant_op.constant(0.1, dtype=dtypes.float32)
+      outputs = image_ops.adjust_hue(inputs, delta)
+      run_op = control_flow_ops.group(outputs)
+      sess.run(variables.global_variables_initializer())
+      for i in xrange(warmup_rounds + benchmark_rounds):
+        if i == warmup_rounds:
+          start = time.time()
+        sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -646,21 +645,20 @@ class AdjustSaturationBenchmark(test.Benchmark):
     if cpu_count is not None:
       config.inter_op_parallelism_threads = 1
       config.intra_op_parallelism_threads = cpu_count
-    with session.Session("", graph=ops.Graph(), config=config) as sess:
-      with ops.device(device):
-        inputs = variables.Variable(
-            random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
-            trainable=False,
-            dtype=dtypes.float32)
-        delta = constant_op.constant(0.1, dtype=dtypes.float32)
-        outputs = image_ops.adjust_saturation(inputs, delta)
-        run_op = control_flow_ops.group(outputs)
-        sess.run(variables.global_variables_initializer())
-        for _ in xrange(warmup_rounds):
-          sess.run(run_op)
-        start = time.time()
-        for _ in xrange(benchmark_rounds):
-          sess.run(run_op)
+    with self.benchmark_session(config=config, device=device) as sess:
+      inputs = variables.Variable(
+          random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
+          trainable=False,
+          dtype=dtypes.float32)
+      delta = constant_op.constant(0.1, dtype=dtypes.float32)
+      outputs = image_ops.adjust_saturation(inputs, delta)
+      run_op = control_flow_ops.group(outputs)
+      sess.run(variables.global_variables_initializer())
+      for _ in xrange(warmup_rounds):
+        sess.run(run_op)
+      start = time.time()
+      for _ in xrange(benchmark_rounds):
+        sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -699,7 +697,7 @@ class ResizeBilinearBenchmark(test.Benchmark):
         deps = [resize_op]
       benchmark_op = control_flow_ops.group(*deps)
 
-    with session.Session() as sess:
+    with self.benchmark_session() as sess:
       sess.run(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
@@ -747,7 +745,7 @@ class ResizeBicubicBenchmark(test.Benchmark):
         deps = [resize_op]
       benchmark_op = control_flow_ops.group(*deps)
 
-    with session.Session() as sess:
+    with self.benchmark_session() as sess:
       sess.run(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
@@ -804,7 +802,7 @@ class ResizeAreaBenchmark(test.Benchmark):
         deps = [resize_op]
       benchmark_op = control_flow_ops.group(*deps)
 
-    with session.Session() as sess:
+    with self.benchmark_session() as sess:
       sess.run(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
diff --git a/tensorflow/python/platform/benchmark.py b/tensorflow/python/platform/benchmark.py
index fa17b17d10..4f7abb311a 100644
--- a/tensorflow/python/platform/benchmark.py
+++ b/tensorflow/python/platform/benchmark.py
@@ -27,6 +27,7 @@ import time
 import six
 
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.core.util import test_log_pb2
 from tensorflow.python.client import timeline
 from tensorflow.python.platform import app
@@ -182,6 +183,19 @@ class Benchmark(six.with_metaclass(_BenchmarkRegistrar, object)):
         throughput=throughput, extras=extras)
 
 
+@tf_export("test.benchmark_config")
+def benchmark_config():
+  """Returns a tf.ConfigProto for disabling the dependency optimizer.
+
+    Returns:
+      A TensorFlow ConfigProto object.
+  """
+  config = config_pb2.ConfigProto()
+  config.graph_options.rewrite_options.dependency_optimization = (
+      rewriter_config_pb2.RewriterConfig.OFF)
+  return config
+
+
 @tf_export("test.Benchmark")
 class TensorFlowBenchmark(Benchmark):
   """Abstract class that provides helpers for TensorFlow benchmarks."""
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt
index abe9b068ae..984c584c9e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "assert_equal_graph_def"
     argspec: "args=[\'actual\', \'expected\', \'checkpoint_v2\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
+  member_method {
+    name: "benchmark_config"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "compute_gradient"
     argspec: "args=[\'x\', \'x_shape\', \'y\', \'y_shape\', \'x_init_value\', \'delta\', \'init_targets\', \'extra_feed_dict\'], varargs=None, keywords=None, defaults=[\'None\', \'0.001\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
index abe9b068ae..984c584c9e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "assert_equal_graph_def"
     argspec: "args=[\'actual\', \'expected\', \'checkpoint_v2\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
+  member_method {
+    name: "benchmark_config"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "compute_gradient"
     argspec: "args=[\'x\', \'x_shape\', \'y\', \'y_shape\', \'x_init_value\', \'delta\', \'init_targets\', \'extra_feed_dict\'], varargs=None, keywords=None, defaults=[\'None\', \'0.001\', \'None\', \'None\'], "
-- 
GitLab


From 494bbdfced3fd8596721d12e73676c4967f452e4 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 8 Oct 2018 13:48:19 -0700
Subject: [PATCH 1258/1357] Allow using more than one converter in the testing
 harness.

PiperOrigin-RevId: 216242862
---
 tensorflow/python/autograph/core/converter_testing.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py
index dc2d419d34..fcdbd0a82c 100644
--- a/tensorflow/python/autograph/core/converter_testing.py
+++ b/tensorflow/python/autograph/core/converter_testing.py
@@ -128,7 +128,13 @@ class TestCase(test.TestCase):
   @contextlib.contextmanager
   def converted(self, entity, converter_module, namespace, *tf_symbols):
     node, ctx = self.prepare(entity, namespace)
-    node = converter_module.transform(node, ctx)
+
+    if not isinstance(converter_module, (list, tuple)):
+      converter_module = (converter_module,)
+    for m in converter_module:
+      node = m.transform(node, ctx)
+      node = converter.standard_analysis(node, ctx, is_initial=True)
+
     with self.compiled(node, namespace, *tf_symbols) as result:
       yield result
 
-- 
GitLab


From eec9ca8f0baccd249a49046fe31b460903e44850 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 8 Oct 2018 13:50:12 -0700
Subject: [PATCH 1259/1357] Partial support tfe.defun in tf.gradients.

Doesn't attempt to deal with cases where we might have already generated
the functiondef for the parent function as in that case we cannot easily
modify the forward pass.

PiperOrigin-RevId: 216243224
---
 .../core/common_runtime/shape_refiner.cc      |  5 ++
 tensorflow/core/framework/shape_inference.cc  |  9 ++
 tensorflow/core/framework/shape_inference.h   |  9 +-
 tensorflow/core/graph/graph.cc                | 13 +++
 tensorflow/core/graph/graph.h                 |  5 ++
 tensorflow/core/graph/node_builder.cc         |  8 +-
 tensorflow/core/ops/resource_variable_ops.cc  |  3 +-
 tensorflow/python/eager/function.py           | 87 ++++++++++---------
 tensorflow/python/eager/function_test.py      | 18 +++-
 tensorflow/python/framework/op_def_library.py |  3 +-
 .../python/kernel_tests/cond_v2_test.py       |  1 +
 tensorflow/python/ops/custom_gradient.py      | 44 ++++++++++
 tensorflow/python/ops/gradients_impl.py       | 30 +++----
 tensorflow/python/ops/while_v2.py             |  3 +-
 14 files changed, 169 insertions(+), 69 deletions(-)

diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index fa4d1eda62..9488a44778 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -288,6 +288,11 @@ Status ShapeRefiner::SetShape(const Node* node, int output_port,
         "output_port '", output_port, "' is out of range, ", "node '",
         node->name(), "' has ", node->num_outputs(), " outputs");
   }
+  // Note: it's possible, if the node's been updated, that the shape inference
+  // context doesn't have the right number of outputs.
+  if (node->num_outputs() > c->num_outputs()) {
+    TF_RETURN_IF_ERROR(c->ExpandOutputs(node->num_outputs()));
+  }
 
   // Check compatibility, and merge the shapes.
   ShapeHandle existing_shape = c->output(output_port);
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 3e77028a5f..4dcc80680f 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -239,6 +239,15 @@ void InferenceContext::PreInputInit(
   output_handle_shapes_and_types_.resize(num_outputs);
 }
 
+Status InferenceContext::ExpandOutputs(int new_output_size) {
+  if (new_output_size < outputs_.size()) {
+    return errors::InvalidArgument("Trying to reduce number of outputs of op.");
+  }
+  outputs_.resize(new_output_size, nullptr);
+  output_handle_shapes_and_types_.resize(new_output_size);
+  return Status::OK();
+}
+
 void InferenceContext::PostInputInit(
     std::vector<std::unique_ptr<std::vector<ShapeAndType>>> input_handle_data) {
   int num_inputs_from_node_def = 0;
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 81258b55b3..e3885b7d9e 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -323,13 +323,13 @@ class InferenceContext {
     return input_tensors_as_shapes_;
   }
 
-  ShapeHandle output(int64 idx) const { return outputs_[idx]; }
-  void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; }
+  ShapeHandle output(int64 idx) const { return outputs_.at(idx); }
+  void set_output(int idx, ShapeHandle shape) { outputs_.at(idx) = shape; }
   Status set_output(StringPiece output_name,
                     const std::vector<ShapeHandle>& shapes);
 
   int num_outputs() const { return outputs_.size(); }
-  ShapeHandle output(int idx) const { return outputs_[idx]; }
+  ShapeHandle output(int idx) const { return outputs_.at(idx); }
   Status output(StringPiece output_name,
                 std::vector<ShapeHandle>* output) const;
 
@@ -645,6 +645,9 @@ class InferenceContext {
     return merged_dims_;
   }
 
+  // Adds new outputs; useful when mutating the graph.
+  Status ExpandOutputs(int new_output_size);
+
  private:
   // Creates and stores shapes for use in InferenceContext.
   class ShapeManager {
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 7a4a0096fa..6f068546d2 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -142,6 +142,19 @@ void Node::Clear() {
   assigned_device_name_index_ = 0;
 }
 
+void Node::UpdateProperties() {
+  DataTypeVector inputs;
+  DataTypeVector outputs;
+  Status status =
+      InOutTypesForNode(props_->node_def, *(props_->op_def), &inputs, &outputs);
+  if (!status.ok()) {
+    LOG(ERROR) << "Failed at updating node: " << status;
+    return;
+  }
+  props_ = std::make_shared<NodeProperties>(props_->op_def, props_->node_def,
+                                            inputs, outputs);
+}
+
 const string& Node::name() const { return props_->node_def.name(); }
 const string& Node::type_string() const { return props_->node_def.op(); }
 const NodeDef& Node::def() const { return props_->node_def; }
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 2944951f82..228b1331d9 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -171,6 +171,7 @@ class Node {
   template <typename T>
   void AddAttr(const string& name, const T& val) {
     SetAttrValue(val, AddAttrHelper(name));
+    UpdateProperties();
   }
 
   void ClearAttr(const string& name);
@@ -211,6 +212,10 @@ class Node {
   // e.g. in AddAttr.
   void MaybeCopyOnWrite();
 
+  // Called after an attr has changed. Decides whether we need to update some
+  // property of the node (stored in props_).
+  void UpdateProperties();
+
   AttrValue* AddAttrHelper(const string& name);
 
   // A set of mutually exclusive classes for different kinds of nodes,
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index d92874909f..68a20fcc5f 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -140,10 +140,10 @@ void NodeBuilder::AddIndexError(const Node* node, int i) {
         strings::StrCat("Attempt to add nullptr Node to node with type ",
                         def_builder_.op_def().name()));
   } else {
-    errors_.emplace_back(
-        strings::StrCat("Attempt to add output ", i, " of ", node->name(),
-                        " not in range [0, ", node->num_outputs(),
-                        ") to node with type ", def_builder_.op_def().name()));
+    errors_.emplace_back(strings::StrCat(
+        "Attempt to add output ", i, " of ", node->name(), " not in range [0, ",
+        node->num_outputs(), ") to node with type ",
+        def_builder_.op_def().name(), ". Node: ", node->DebugString()));
   }
 }
 
diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc
index adc9cd1486..65bdde375b 100644
--- a/tensorflow/core/ops/resource_variable_ops.cc
+++ b/tensorflow/core/ops/resource_variable_ops.cc
@@ -216,7 +216,8 @@ REGISTER_OP("VarIsInitializedOp")
 Status VariableShapeShapeFn(InferenceContext* c) {
   auto* handle_data = c->input_handle_shapes_and_types(0);
   if (handle_data == nullptr || handle_data->empty()) {
-    return errors::InvalidArgument("Handle doesn't have shape information.");
+    c->set_output(0, c->Vector(c->UnknownDim()));
+    return Status::OK();
   }
   ShapeHandle var_shape = (*handle_data)[0].shape;
   int64 rank = c->RankKnown(var_shape) ? c->Rank(var_shape)
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 93168826b1..99bf375ea7 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -46,6 +46,7 @@ from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import resource_variable_ops
@@ -81,49 +82,10 @@ def _create_substitute_placeholder(value, name=None, dtype=None):
   with ops.control_dependencies(None):
     placeholder = graph_placeholder(
         dtype=dtype or value.dtype, shape=value.shape, name=name)
-  _copy_handle_data(value, placeholder)
+  custom_gradient.copy_handle_data(value, placeholder)
   return placeholder
 
 
-def _copy_handle_data(source_t, target_t):
-  """Copies HandleData for variant and resource type tensors if available.
-
-  The CppShapeInferenceResult::HandleData proto contains information about the
-  shapes and types of the element tensors of resource/variant type tensors.
-  We need to copy this across function boundaries, i.e., when capturing a
-  placeholder or when returning a function tensor as output. If we don't do this
-  the element tensors will have unknown shapes, e.g., if a TensorList variant
-  tensor is captured as a placeholder, elements popped from that list would have
-  unknown shape.
-
-  Args:
-    source_t: The tensor to copy HandleData from.
-    target_t: The tensor to copy HandleData to.
-  """
-  if (target_t.dtype == dtypes_module.resource or
-      target_t.dtype == dtypes_module.variant):
-    if isinstance(source_t, ops.EagerTensor):
-      handle_data = source_t._handle_data  # pylint: disable=protected-access
-    else:
-      handle_data = resource_variable_ops.get_resource_handle_data(source_t)
-    if handle_data is not None and handle_data.is_set:
-      # pylint: disable=protected-access
-      pywrap_tensorflow.SetHandleShapeAndType(target_t.graph._c_graph,
-                                              target_t._as_tf_output(),
-                                              handle_data.SerializeToString())
-      # pylint: enable=protected-access
-      # Ensure that shapes and dtypes are propagated.
-      shapes, types = zip(*[(pair.shape, pair.dtype)
-                            for pair in handle_data.shape_and_type])
-      ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes]
-      shapes = [[d.size for d in s.dim]
-                if not s.unknown_rank else None for s in shapes]
-      pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper(
-          target_t._op._graph._c_graph,  # pylint: disable=protected-access
-          target_t._as_tf_output(),  # pylint: disable=protected-access
-          shapes, ranks, types)
-
-
 def _get_device_functions(ctx, graph):
   """Returns a tuple of device functions representing the device stack."""
   if ctx.executing_eagerly():
@@ -547,7 +509,7 @@ class _EagerDefinedFunction(object):
       for i, shape in enumerate(self._output_shapes):
         outputs[i].set_shape(shape)
       for i, func_graph_output in enumerate(self._func_graph_outputs):
-        _copy_handle_data(func_graph_output, outputs[i])
+        custom_gradient.copy_handle_data(func_graph_output, outputs[i])
       return outputs
 
 
@@ -658,7 +620,48 @@ class Function(object):
     if tape.should_record(tensor_inputs) or tape.should_record(captures):
       return self._backprop_call(args)
 
-    outputs = self._inference_function.call(ctx, args)
+    # Only need to override the gradient in graph mode and when we have outputs.
+    if context.executing_eagerly() or not self.outputs:
+      outputs = self._inference_function.call(ctx, args)
+    else:
+      name = "PartitionedCall-%s" % ops.uid()
+
+      @ops.RegisterGradient(name)
+      def grad_fn(op, *doutputs):  # pylint: disable=unused-variable
+        """Gradients of this function."""
+        if op.graph is not ops.get_default_graph():
+          # TODO(apassos) this will still emit SymbolicGradient ops when
+          # nested defuns are being differentiated. We need to somehow figure
+          # out a way to update the FunctionDef corresponding to the calling
+          # function when mutating a call to the forward pass.
+          return gradients_impl._SymGrad(op, list(doutputs))  # pylint: disable=protected-access
+        if self._backward_graph_function is None:
+          self._construct_backprop_function()
+        self._forward_function.add_to_graph(op.graph)
+        func = attr_value_pb2.AttrValue(
+            func=attr_value_pb2.NameAttrList(
+                name=self._forward_function.name))
+        # pylint: disable=protected-access
+        op._set_attr("f", func)
+        types = attr_value_pb2.AttrValue.ListValue(
+            type=self._forward_function._output_types)
+        op._set_attr("Tout", attr_value_pb2.AttrValue(list=types))
+        for i in range(
+            len(outputs), len(self._forward_function._output_types)):
+          t = ops.Tensor(op, i, self._forward_function._output_types[i])
+          t.set_shape(self._forward_function._output_shapes[i])
+          func_graph_output = self._forward_function._func_graph_outputs[i]
+          custom_gradient.copy_handle_data(func_graph_output, t)
+          op._outputs.append(t)
+        # pylint: enable=protected-access
+        side_outputs = op.outputs[len(outputs):]
+        return self._backward_graph_function(
+            *(list(doutputs) + list(side_outputs)))
+
+      with ops.get_default_graph().gradient_override_map(
+          {"PartitionedCall": name}):
+        outputs = self._inference_function.call(ctx, args)
+
     return self._build_call_outputs(outputs)
 
   @property
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 57e545be69..e46bde098b 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -286,7 +286,23 @@ class FunctionTest(test.TestCase):
       c = constant_op.constant([[2.]])
       f_c = f(c)
       g, = gradients_impl.gradients(f_c, c)
-      self.assertAllEqual(sess.run(g), [[1.0]])
+      self.assertAllEqual(sess.run(g).values, [[1.0]])
+
+  def testNoSymGradNestedDefun(self):
+
+    @function.defun
+    def outer():
+
+      @function.defun
+      def f(x):
+        return array_ops.gather_nd(x, [[0]])
+
+      c = constant_op.constant([[2.]])
+      f_c = f(c)
+      g, = gradients_impl.gradients(f_c, c)
+      self.assertTrue(isinstance(g, ops.IndexedSlices))
+
+    outer()
 
   def testNestedInputsGraphFunction(self):
     matmul = function.defun(math_ops.matmul)
diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py
index e85bba11cd..9955a9a2cd 100644
--- a/tensorflow/python/framework/op_def_library.py
+++ b/tensorflow/python/framework/op_def_library.py
@@ -482,7 +482,8 @@ class OpDefLibrary(object):
               else:
                 raise TypeError("%s that don't all match." % prefix)
             else:
-              raise TypeError("%s that are invalid." % prefix)
+              raise TypeError(
+                  "%s that are invalid. Tensors: %s" % (prefix, values))
 
           types = [x.dtype for x in values]
           inputs.extend(values)
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index ec875aae59..a424a0f219 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -153,6 +153,7 @@ class CondV2Test(test.TestCase):
         self.assertIn("foo_cond_1_false", ops.get_default_graph()._functions)
 
   def testDefunInCond(self):
+    self.skipTest("b/117293122")
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
 
diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py
index d7834ba350..bfe23834b7 100644
--- a/tensorflow/python/ops/custom_gradient.py
+++ b/tensorflow/python/ops/custom_gradient.py
@@ -18,9 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape as tape_lib
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
@@ -33,6 +35,45 @@ from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
 
+def copy_handle_data(source_t, target_t):
+  """Copies HandleData for variant and resource type tensors if available.
+
+  The CppShapeInferenceResult::HandleData proto contains information about the
+  shapes and types of the element tensors of resource/variant type tensors.
+  We need to copy this across function boundaries, i.e., when capturing a
+  placeholder or when returning a function tensor as output. If we don't do this
+  the element tensors will have unknown shapes, e.g., if a TensorList variant
+  tensor is captured as a placeholder, elements popped from that list would have
+  unknown shape.
+
+  Args:
+    source_t: The tensor to copy HandleData from.
+    target_t: The tensor to copy HandleData to.
+  """
+  if (target_t.dtype == dtypes.resource or
+      target_t.dtype == dtypes.variant):
+    if isinstance(source_t, ops.EagerTensor):
+      handle_data = source_t._handle_data  # pylint: disable=protected-access
+    else:
+      handle_data = resource_variable_ops.get_resource_handle_data(source_t)
+    if handle_data is not None and handle_data.is_set:
+      # pylint: disable=protected-access
+      pywrap_tensorflow.SetHandleShapeAndType(target_t.graph._c_graph,
+                                              target_t._as_tf_output(),
+                                              handle_data.SerializeToString())
+      # pylint: enable=protected-access
+      # Ensure that shapes and dtypes are propagated.
+      shapes, types = zip(*[(pair.shape, pair.dtype)
+                            for pair in handle_data.shape_and_type])
+      ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes]
+      shapes = [[d.size for d in s.dim]
+                if not s.unknown_rank else None for s in shapes]
+      pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper(
+          target_t._op._graph._c_graph,  # pylint: disable=protected-access
+          target_t._as_tf_output(),  # pylint: disable=protected-access
+          shapes, ranks, types)
+
+
 @tf_export("custom_gradient")
 def custom_gradient(f):
   """Decorator to define a function with a custom gradient.
@@ -180,8 +221,11 @@ def _graph_mode_decorator(f, *args, **kwargs):
     input_grads = nest.flatten(input_grads)
     return ([None] * len(flat_result)) + input_grads + variable_grads
 
+  original_tensors = all_tensors
   with ops.get_default_graph().gradient_override_map({"IdentityN": name}):
     all_tensors = array_ops.identity_n(all_tensors)
+  for ot, t in zip(original_tensors, all_tensors):
+    copy_handle_data(ot, t)
   return nest.pack_sequence_as(
       structure=result, flat_sequence=all_tensors[:len(flat_result)])
 
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index aac95037dc..6909fcaed5 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -800,23 +800,21 @@ def _GradientsHelper(ys,
         # pylint: enable=protected-access
         has_out_grads = any(isinstance(g, ops.Tensor) or g for g in out_grads)
         if has_out_grads and (op not in stop_ops):
-          if is_func_call:
-            if is_partitioned_call:
-              func_call = src_graph._get_function(  # pylint: disable=protected-access
-                  compat.as_bytes(op.get_attr("f").name))
+          try:
+            grad_fn = ops.get_gradient_function(op)
+          except LookupError:
+            if is_func_call:
+              if is_partitioned_call:
+                func_call = src_graph._get_function(  # pylint: disable=protected-access
+                    compat.as_bytes(op.get_attr("f").name))
+              else:
+                func_call = src_graph._get_function(op.type)  # pylint: disable=protected-access
+              # Note that __defun is not set if the graph is
+              # imported. If it's set, we prefer to access the original
+              # defun.
+              func_call = getattr(op, "__defun", func_call)
+              grad_fn = func_call.python_grad_func
             else:
-              func_call = src_graph._get_function(op.type)  # pylint: disable=protected-access
-            # Note that __defun is not set if the graph is
-            # imported. If it's set, we prefer to access the original
-            # defun.
-            func_call = getattr(op, "__defun", func_call)
-            grad_fn = func_call.python_grad_func
-          else:
-            # A grad_fn must be defined, either as a function or as None
-            # for ops that do not have gradients.
-            try:
-              grad_fn = ops.get_gradient_function(op)
-            except LookupError:
               raise LookupError(
                   "No gradient defined for operation '%s' (op type: %s)" %
                   (op.name, op.type))
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 8e88a84d60..0419656143 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -37,6 +37,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl as cond_v2
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import list_ops
@@ -580,7 +581,7 @@ def _check_shapes_compat(output_tensors, shape_invariants, input_tensors):
 
 def _copy_handle_data(src_tensors, tgt_tensors):
   for src_t, tgt_t in zip(src_tensors, tgt_tensors):
-    function._copy_handle_data(src_t, tgt_t)
+    custom_gradient.copy_handle_data(src_t, tgt_t)
 
 
 # TODO(srbs): Move to common utils for cond_v2 and while_v2.
-- 
GitLab


From 13b47e6c4f9d7b295948b1057139bf676e394b6f Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 14:16:55 -0700
Subject: [PATCH 1260/1357] Automated rollback of commit
 295b3c80555cc82d8d70faf96a47681e1d904b9c

PiperOrigin-RevId: 216247929
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 ---
 .../kernels/data/map_and_batch_dataset_op.cc  |  9 ++++---
 .../core/kernels/data/model_dataset_op.cc     | 10 ++++---
 .../data/parallel_interleave_dataset_op.cc    | 27 +++++++++++--------
 .../kernels/data/parallel_map_iterator.cc     |  9 ++++---
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 ++++---
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 7a833668ac..8acd6cc724 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,10 +16,8 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
-#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
-#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -27,13 +25,11 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index f45a239793..0fb721cd7c 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -445,9 +445,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              std::bind(&Iterator::RunnerThread, this, ctx_copy));
         }
       }
 
@@ -703,7 +704,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 9aa505f4f1..859df57962 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -126,9 +127,10 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_.reset(ctx->env()->StartThread(
-              {}, "optimize_thread",
-              [this, new_ctx]() { OptimizeThread(new_ctx); }));
+          optimize_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
+          optimize_thread_->Schedule(
+              [this, new_ctx]() { OptimizeThread(new_ctx); });
         }
         return Status::OK();
       }
@@ -167,7 +169,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 6b6b3d6ab9..9c836b836e 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -481,9 +482,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
           }
         }
         return Status::OK();
@@ -580,9 +582,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1047,7 +1050,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
+          GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1389,9 +1393,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              [this, new_ctx]() { RunnerThread(new_ctx); }));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              [this, new_ctx]() { RunnerThread(new_ctx); });
         }
       }
 
@@ -1645,7 +1650,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index ebf41925c9..e69274e4f2 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -181,9 +181,10 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-      runner_thread_.reset(ctx->env()->StartThread(
-          {}, "runner_thread",
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
+      runner_thread_ =
+          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+      runner_thread_->Schedule(
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
     }
   }
 
@@ -331,7 +332,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 754ed772db..e9c38eb8a0 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -256,10 +257,11 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
+        prefetch_thread_ =
+            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_.reset(ctx->env()->StartThread(
-            {}, "prefetch_thread",
-            [this, new_ctx]() { PrefetchThread(new_ctx); }));
+        prefetch_thread_->Schedule(
+            [this, new_ctx]() { PrefetchThread(new_ctx); });
       }
       return Status::OK();
     }
@@ -363,7 +365,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 3f76695bb1..7bb2077b62 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        thread_pool_(new thread::ThreadPool(
-            ctx->env(), ThreadOptions(),
-            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
-            1 /* num_threads */, false /* low_latency_hint */)) {}
+        background_worker_(
+            ctx->env(),
+            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
+  }
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    thread_pool_->Schedule([this, ctx, done]() {
+    background_worker_.Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  std::unique_ptr<thread::ThreadPool> thread_pool_;
+  BackgroundWorker background_worker_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From 09b0fc199129e0f487a39741bdf674cf09035cbc Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 14:17:24 -0700
Subject: [PATCH 1261/1357] [tf.data] Choose non-deterministic seed once per
 Python-level `Dataset` object.

This changes the behavior of randomness-introducing datasets (`tf.data.Dataset.shuffle()`, `tf.data.experimental.shuffle_and_repeat()`, and `tf.data.experimental.RandomDataset`). Previously, when you used the same `tf.data.Dataset` object multiple times in a pipeline (e.g. by zipping two datasets derived from the same randomness-introducing dataset) *and* you did not specify an explicit `seed`, the implementation would choose different non-deterministic seeds for each use of the `Dataset` object.

With this change, the seed will be chosen once per `Dataset` (technically, once per `Dataset`-`Graph` combination, due to the vagaries of capturing state in `Dataset.make_one_shot_iterator()`), which means that all uses of the same dataset object will observe the same sequence of values.

This change also revealed a small bug in how `Dataset.shuffle(..., reshuffle_each_iteration=False)` is serialized when an explicit seed is specified. The op-level seed was dropped, which could lead to non-deterministic behavior. This change fixes that issue by forwarding the op-level seed to the appropriate place.

PiperOrigin-RevId: 216248013
---
 .../core/kernels/data/shuffle_dataset_op.cc   |  2 +-
 .../data/experimental/kernel_tests/BUILD      | 13 ++++++
 .../kernel_tests/random_dataset_test.py       | 45 +++++++++++++++++++
 .../kernel_tests/shuffle_and_repeat_test.py   | 21 ++++++++-
 .../data/experimental/ops/random_ops.py       | 21 +++++++--
 .../data/experimental/ops/shuffle_ops.py      | 21 +++++++--
 tensorflow/python/data/kernel_tests/BUILD     |  1 +
 .../kernel_tests/shuffle_dataset_op_test.py   | 25 ++++++++++-
 tensorflow/python/data/ops/dataset_ops.py     | 22 +++++++--
 tensorflow/python/data/util/BUILD             |  1 +
 tensorflow/python/data/util/random_seed.py    |  5 ++-
 .../python/data/util/random_seed_test.py      | 13 +++++-
 12 files changed, 174 insertions(+), 16 deletions(-)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py

diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 66466d6a36..9f54c381a9 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
                      int64 buffer_size, int64 seed, int64 seed2, int64 count)
         : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
-          seed2_(seed) {}
+          seed2_(seed2) {}
 
     string DebugString() const override {
       return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_,
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index 4eef9580ad..a67f6ff031 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -453,6 +453,18 @@ cuda_py_test(
     tags = ["no_windows_gpu"],
 )
 
+py_test(
+    name = "random_dataset_test",
+    srcs = ["random_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python/data/experimental/ops:random_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 py_library(
     name = "reader_dataset_ops_test_base",
     testonly = 1,
@@ -562,6 +574,7 @@ py_test(
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
new file mode 100644
index 0000000000..d403a575ec
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
@@ -0,0 +1,45 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.RandomDataset()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.python.data.experimental.ops import random_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+
+
+class RandomDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("NoSeed", None),
+      ("WithSeed", 42),
+  )
+  def testZipRandomDataset(self, seed):
+    dataset = random_ops.RandomDataset(seed=seed).take(30)
+    dataset = dataset_ops.Dataset.zip((dataset, dataset))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(30):
+        x, y = sess.run(next_element)
+        self.assertEqual(x, y)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index c208963a86..883169495f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import shuffle_ops
@@ -27,7 +28,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
-class ShuffleAndRepeatTest(test_base.DatasetTestBase):
+class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _build_ds(self, seed, count=5, num_elements=20):
     return dataset_ops.Dataset.range(num_elements).apply(
@@ -110,6 +111,24 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
       with self.session(graph=g) as sess:
         sess.run(get_next_op)
 
+  @parameterized.named_parameters(
+      ("NoSeed", None),
+      ("WithSeed", 42),
+  )
+  def testShuffleAndRepeatAndZipDataset(self, seed):
+    dataset = dataset_ops.Dataset.range(10).apply(
+        shuffle_ops.shuffle_and_repeat(10, count=3, seed=seed))
+    dataset = dataset_ops.Dataset.zip((dataset, dataset))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(30):
+        x, y = sess.run(next_element)
+        self.assertEqual(x, y)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py
index e3a2aeab31..25d7fbf691 100644
--- a/tensorflow/python/data/experimental/ops/random_ops.py
+++ b/tensorflow/python/data/experimental/ops/random_ops.py
@@ -33,13 +33,26 @@ class RandomDataset(dataset_ops.DatasetSource):
   def __init__(self, seed=None):
     """A `Dataset` of pseudorandom values."""
     super(RandomDataset, self).__init__()
-    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
+    # is iterated over, and cache it in `self._graph_seed_map`. This supports
+    # two features: iterating over the same `ShuffleDataset` twice in the same
+    # pipeline and observing the same order (by tying the seeds together with
+    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
+    # which requires the stateful RNG op to be created inside the same graph as
+    # the dataset.
+    self._original_seed = seed
+    self._graph_seed_map = {}
 
   def _as_variant_tensor(self):
+    try:
+      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
+    except KeyError:
+      seed, seed2 = random_seed.get_seed(self._original_seed)
+      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
+
     return gen_dataset_ops.random_dataset(
-        seed=self._seed,
-        seed2=self._seed2,
-        **dataset_ops.flat_structure(self))
+        seed=seed, seed2=seed2, **dataset_ops.flat_structure(self))
 
   @property
   def output_classes(self):
diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py
index a4307212da..a82e4b7d09 100644
--- a/tensorflow/python/data/experimental/ops/shuffle_ops.py
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@@ -39,17 +39,32 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
     else:
       self._count = ops.convert_to_tensor(
           count, dtype=dtypes.int64, name="count")
-    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
+    # is iterated over, and cache it in `self._graph_seed_map`. This supports
+    # two features: iterating over the same `ShuffleDataset` twice in the same
+    # pipeline and observing the same order (by tying the seeds together with
+    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
+    # which requires the stateful RNG op to be created inside the same graph as
+    # the dataset.
+    self._original_seed = seed
+    self._graph_seed_map = {}
 
   def _as_variant_tensor(self):
+    try:
+      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
+    except KeyError:
+      seed, seed2 = random_seed.get_seed(self._original_seed)
+      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
+
     # pylint: disable=protected-access
     input_resource = self._input_dataset._as_variant_tensor()
     return gen_dataset_ops.shuffle_and_repeat_dataset(
         input_resource,
         buffer_size=self._buffer_size,
         count=self._count,
-        seed=self._seed,
-        seed2=self._seed2,
+        seed=seed,
+        seed2=seed2,
         **dataset_ops.flat_structure(self))
     # pylint: enable=protected-access
 
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index c7295d6e69..ecb24103b3 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -443,6 +443,7 @@ tf_py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     additional_deps = [
         ":test_base",
+        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index 347af18576..6001721726 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import collections
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.kernel_tests import test_base
@@ -31,7 +32,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ShuffleDatasetTest(test_base.DatasetTestBase):
+class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testShuffleDataset(self):
     components = (
@@ -209,5 +210,27 @@ class ShuffleDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
+  @parameterized.named_parameters(
+      ("ReshuffleEachIterationNoSeed", None, True),
+      ("ReshuffleEachIterationWithSeed", 42, True),
+      ("NoReshuffleEachIterationNoSeed", None, False),
+      ("NoReshuffleEachIterationWithSeed", 42, False),
+  )
+  def testShuffleAndZipDataset(self, seed, reshuffle):
+    dataset = (dataset_ops.Dataset.range(10)
+               .shuffle(10, seed=seed, reshuffle_each_iteration=reshuffle)
+               .repeat(3))
+    dataset = dataset_ops.Dataset.zip((dataset, dataset))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(30):
+        x, y = sess.run(next_element)
+        self.assertEqual(x, y)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index b7e19055f2..2d036fd0d6 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2254,18 +2254,34 @@ class ShuffleDataset(UnaryDataset):
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
-    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
+    # is iterated over, and cache it in `self._graph_seed_map`. This supports
+    # two features: iterating over the same `ShuffleDataset` twice in the same
+    # pipeline and observing the same order (by tying the seeds together with
+    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
+    # which requires the stateful RNG op to be created inside the same graph as
+    # the dataset.
+    self._original_seed = seed
+    self._graph_seed_map = {}
+
     if reshuffle_each_iteration is None:
       self._reshuffle_each_iteration = True
     else:
       self._reshuffle_each_iteration = reshuffle_each_iteration
 
   def _as_variant_tensor(self):
+    try:
+      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
+    except KeyError:
+      seed, seed2 = random_seed.get_seed(self._original_seed)
+      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
+
     return gen_dataset_ops.shuffle_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
-        seed=self._seed,
-        seed2=self._seed2,
+        seed=seed,
+        seed2=seed2,
         reshuffle_each_iteration=self._reshuffle_each_iteration,
         **flat_structure(self))
 
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index 39082ce370..95bf3209d7 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -142,6 +142,7 @@ py_test(
         ":random_seed",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:random_ops",
         "//tensorflow/python:util",
     ],
 )
diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py
index d5169f7a53..d24df6d957 100644
--- a/tensorflow/python/data/util/random_seed.py
+++ b/tensorflow/python/data/util/random_seed.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
 
 
 def get_seed(seed):
@@ -37,7 +38,7 @@ def get_seed(seed):
 
   Returns:
     A tuple of two `tf.int64` scalar tensors that should be used for the local
-    seed of the calling dataset.
+    seeds of the calling dataset.
   """
   seed, seed2 = random_seed.get_seed(seed)
   if seed is None:
@@ -45,7 +46,7 @@ def get_seed(seed):
   else:
     seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
   if seed2 is None:
-    seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
+    seed2 = random_ops.random_uniform([], 1, 2**63 - 1, dtype=dtypes.int64)
   else:
     with ops.name_scope("seed2") as scope:
       seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64)
diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py
index a809151e6e..5df2e38c62 100644
--- a/tensorflow/python/data/util/random_seed_test.py
+++ b/tensorflow/python/data/util/random_seed_test.py
@@ -41,7 +41,6 @@ class RandomSeedTest(test.TestCase):
         # (input_graph_seed, input_op_seed)
         # and output from get_seed:
         # (output_graph_seed, output_op_seed)
-        ((None, None), (0, 0)),
         ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)),
         ((1, 1), (1, 1)),
         ((0, 0), (0, 2**31 - 1)),  # Avoid nondeterministic (0, 0) output
@@ -78,6 +77,18 @@ class RandomSeedTest(test.TestCase):
       self.assertEqual((g_seed, op_seed), toutput, msg=msg)
       random_seed.set_random_seed(None)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testNondeterministicRandomSeed(self):
+    random_seed.set_random_seed(None)
+    op_seeds = []
+    for _ in range(50):
+      g_seed, op_seed = data_random_seed.get_seed(None)
+      g_seed = self.evaluate(g_seed)
+      op_seed = self.evaluate(op_seed)
+      self.assertEqual(0, g_seed)
+      self.assertNotEqual(0, op_seed)
+      op_seeds.append(op_seed)
+    self.assertGreater(len(set(op_seeds)), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From bc5635dc3ac78007caee88fabd81d23ad945b637 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Mon, 8 Oct 2018 14:19:49 -0700
Subject: [PATCH 1262/1357] Update performance documentation.

PiperOrigin-RevId: 216248418
---
 .../performance/model_size_vs_accuracy.png    | Bin 0 -> 18946 bytes
 .../performance/model_size_vs_latency.png     | Bin 0 -> 21380 bytes
 tensorflow/contrib/lite/g3doc/performance.md  |  21 ++++++++++++------
 3 files changed, 14 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png

diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png
new file mode 100644
index 0000000000000000000000000000000000000000..44d0ccd3128dea1c947e57ccbc4e18b2d34cef88
GIT binary patch
literal 18946
zcmeAS@N?(olHy`uVBq!ia0y~yU`l6TV0gyC#=yWZ;h>T$0|Ns~v6E*A2L}g74M$1`
z0|NtRfk$L90|U1Z2s2)~Tla^7fx)uGHKHUqKdq!Zu_%=xH?gE3C%+^oGfAN=wWv5V
zKTp9(&q&W$M<Ju6q`*pFAE7`mzbIW_S;U2vfkA=6)5S5QV$Pepl`)}LfBpD)J~4=e
zaj%7fmR}3AOg~45vBmV@S;BqhzQOy`?#_NT;n@tSn^P}cG7&XpJ+o)gGE3EE&txnf
zwlpz62=#CjNV&D&{>R2{3l6>mg7dz=KI;8FuKNAgIqk|e>$ZNkl5!0cX%TSZP;7BD
z(0a)LW*(Wp#6lP(WS|f<hf}ddz)3<t^Bh=;r$ZziBqPA)(q#;i;b2xeI#a-D{fZSD
zCQ`hTl9H@)uiwA-pKX>qZ}t3*$;XZI@9lYR70#jPrL1|bMT7O#m6gpiXG#_p7Vi9J
zo_&p{*UdR3B;+K^xBqM7_RebKl@8jN<hm+!^)c!EJqH)N_cKInOky=oKc{o?$2>I^
z6_#GN!|C&D!!9l4P&~x}GEw!_wYA<34h_3%|NW_yv#*P35=^x&c_DClxxc!CLW6t1
zT;a=0s%(5R5tkB0Z|y35y&`6((d+B$^}p@oHqO#)(WqTwA-rgcgjr6+I;Nz%yGnWF
z?QE_aa&mI=$lJ})3SIT!!Gi;TettfA>J*cLvGHWj$!dw&+1`7rzW#W*eEz37)>&6H
z-n@CUVf*&<V?B~7X=!2-*H;EF&$zis_3SKD<^@GhPxVL`Hg$<=GkrK;|8KISaT>#g
z&FTI+y1G~Om6etM{Qb+gqvog4T<h{>MNdyzSXyqZ{G7J;FDonS!)MQ?W#6Bq>iy#C
zYH^0$Wp5W1K0X$*CPJ{fx>`&>ZchH2lP6E++}zZ<e*eE&+1J+<{`&GV=B<05%)-#s
zVK;8v2v{AaYkF<U6cM{WADm@mWHxNuw#;|7*^(to1cZc~&dxF|j$Q0G*Xqsv{qhYr
zH>Y1-A8+5rC)+i3s_4Ie|K`|KPMW40os^v1Ja?|Fu$qs8uCA}`|3^o=v*JMU)5OZX
zqx!p^s8)!Aon4&myYKhwmwQdsTC#lk^WRmi0&R|6#)@8?{{H?#!ooXCUJA{eIg^Er
z&8XzXg_z@ezTc}>QBf&)e2kY**6PTwudgF_m+3OB4qKb_{M_7@wzi^QUoyAl+?@3K
zPW`{n^JmPMAs{H|n317T{r%n3-S78l8>|!4joOfYUM}tItfLnfyH|dC;<@kmyBiyu
zmn>14G-=Y0hwbt!;`Um}T9>I<TFyK-*V?$^L&CeeyWJBK6doV%SGTdTId59^?aj%l
z+TlCO-p<;Ue|ecNkDQG}L*Qb!k8ii%|McRbvR=#%hju>MPrd1{udR)Uin_GFzJ6EP
z+f%zrUtifa+dTi%uh;88->ZIK`2Sz+p+ko*yewhxnPt*xoPO@Z?fm^8kIUCTc>TJ2
zV&KENJ8bOipx~9Wt2r@2(b>SvEbZhZ)gwob7QVS*$R}fQ!2Es<^MS?g{V%Sp6u!Hw
zR9Q`}t)qj(s_c!&*H>3Re)@DO^g!N?4ULY?Y>8P}Q7bH31XMi)o%Vc^k&&5F`}>>b
z=Vxbwm-)?I_2%B*=$BVkPM$Px-Z}=al$0ftCQe*9W7aG!as9ZE^U)iVS}*OZt#%C*
zIePncbmYF8owv57pO<3|xODmQ)ZOLpwbIhkPCY)}fAyR1Y%|TcxVWxG8mDe;&7OK~
zZS-^L`|DzNPm!~&(ztf*T32gp>!<bgf4xthI1wP?8tB#|v9RXP$KyNu_7^?v3R>zV
z`t-xY!(EFsyb=-?Tw3NkJLY(gq_Nvf+v;ui{<ZN)E;=*Q*ga6hb!XMrRY{qdD^=9h
zwYwH+Y|XkF6t*T}p^A#illd3^uTMVSck1{1{p<I|MMp=g>gxIiinvB@NND`@Y<B)L
zOLb%8V9oe_dxXTqrY(Q_;9zst-o3T!YAx?uy9Q2)+?=MFdwW}LmXG%|oxtCBw`5LU
zbou2MIdKVz6<e~ddIgHOg516;boH`17KM*iUKewcV3V{IUL+!EmLoCSEcej;`}XIj
zq{YO@ynOl6!op(3Zy6bxj=nxN1vRy{sI6H~vrZR&e&&1W^5u=2Hy=KBj7?fv`d;n#
z*mrk#hp*TXSO2%P?9GjX^7Vfl3knSW{rmT`Z<nfy%7xwK`OnVJUvI})_4%2v-yDmD
z-*S_a4<GB5-dX*9-MwrcIh%;R|MM;{^WF3F+3b)N0gMgu^?xSD?k;=y;DN*MZ*MKj
z-psg{eP>_o>}Ga;qm&Z@3{~IX`DSIUvbtOS`&;g->+9z~-?L+fMNn|?*KMN5;`g&N
zgs+cV>fSFGvOZ2$TU&dX|NM1!KPJZWSbl74Zx@!7Jh`v-_l9lTjvYSycxRH3lZZa3
zTy>R^ky#PFT@O_3<lo=7^Hxr7uA!;v(`RRAzuFhQJufjopTA-5T-ojU_s?COwm58U
z)D-RTz8yO(KxzDNJHMxo&yz<-yU&ZvpEyxa*1Bwob30$mjsivh>aZ}eW_JE#w{Ooj
zOg{GH)&APw-?HxW%iF1FYIZ(7J^l6D<;Bm>y$VToaA>ev%K6~kJHLVggYWO|K0a%H
zKjlCJW7YbNiHF%XZQAtW=4SP)tHYPa?X6m}Y}u;d*;})&KKk|bb>+FuTemK)`T42p
zwvLWYk?oR+GiRRs`}=#b?Ba_VMY;Xb=6!9v(nW7?nF<LDM{Y=9>{_(K@Xn4x=lnlU
zPft%j(7;&p|L^yl8ygxE54XLv_2Wpa0M`+bd#g<M|M|o%CMLG?o3ymFal!!xyMI5D
z<!q~toSkj{^L+ik#rLM0B^+Sb_v@ARyE{7%H?#9cY{{5dwQlZQ+1Onr4-fO(s~8$i
zT(d@JUHtxY3!U4!deki}W*j)+AZJt2ut)<`=sLIaJ$(6c=JOl<_Wx!)d6Kg4&!^Kd
zzpKBzICyn+_{|N8%r9TQT(W$5@vAGEda=8fZ1)ci4*vE1{rM*+C;xohZ~x-XPGg45
z%*>oSJ0`ZYu=x1;Zrrr#)c3dzrLV6UW?$1ebm-8P^>YHZ|M*eS#v^&?-{0Ri_f#57
zN=hC%eq2~gtZm{%L1p*8C-?vVGdDCeOi4>CdUL~Yo=xSU?Dcz>_3c}_e7SM`zdy_G
zHM8IRJ~c8jGBGpr<kQpBi$#~%R((-0GZVA7w+AU-?A~ut_^9QUUCobzKY#z;+*i9>
zB;9F`6F4MS#_zA2w_LPXCw7+zH#hgItE=5-o8?N_R2V3`_pu~=d2zAi?X9WHd}lv;
z^=el7lia_*zAjn1^x}@fWUbIuF8k~LcJ%h@8qD*Ws<koes+Nqb?8Z%-l3rX`=rhkI
za<1N=)eQ{{ee(8xOTDIEQ8zQsyW?@ROZ4XMa{b1}#w#lVn_pdBZJc^)%5pKYW_JFx
z|Ns7ed4FGCSXj99e;cp#is0pZVPRowA~rVlN}E5k{Ql$P<2!pQC;$Jw|Nr8uudiN(
zuaDcCabW=?BO~LLb+NN!cb9!UDjpxOvnaL1N;ducJl2GdkB;uxu>+KHpPrgJ(<qfI
zGBPqEI{Ncj^ZOspS-(%n&+l(-b)C1|{D0oZM@MsRZRzY1)i$d7@*?K+gpf566Kj8e
z+n9WuZ&mpEzQv1`+4<!{Zbxs=>zz1JaNW9fJD06&6=?Ha{G_L!Td_r>)T-pgfd>bh
zE1%CT_p%aEbW!vyRBI7fdSzwsa!^{>o_{~=`cz>j3A>rsM4UQ;GLCczmb|*cX>V_@
zd8Vb}=!p|E!OcI$i4&0<bR3E;j1wREa45D29BS|Yndrok=s-lf@W_$}&R+A@uGKAl
zbw%@~0gqErk&&2~n5t~s!HIibs8^nx|HRGyWOv;p@xLnizg%5i1;xd`-+8`Y=xF3z
zt5S>FUnQRHi2_beP9Hvf(t2sY)Az*H{-pOm74u)7abG5bOt^Ud?Af=i=cZ@+f*PPA
zCoWyubmLdlh6KlTZ^PC`iF)rBR`=7nc73KixXs#Oq@(Sf7@c8aJ6-Nz?mxRKJ=;Uu
zE@W)l<n?ax_WyBn{(ifi?|Sd~%gf6vKRj@p_&rzx(&kMK37dB4#{2UIfA>GPl%MMm
z6D0L~vG9y-Uu}~2+b`bkaC+vcH`mw8A8zMQKQTd3!|Pk&a=*EYLRW{G<lYkD=jWHR
zt#V0AQ=6>rf9&pE*|lM(x8JW@9k)DocUj5%d$Q{4>c{$Iy~D!9f|vWLs;aWi^*cU6
z(Ye9i{jG>=;-@DkEv&5{KYHX85D+kLhLl;(4DDqy)Ai$z9X{-Qx38<~(9xr;4!swz
zU;nO}{^O^}OWSnsX*v&gzu)J)_~L^{j}G11n%&vkn|fh^<B{XXFYl==Hp{uuFmq;P
zU>hjNIBd7g?|tauBGZ4_sQ9hF=lLfewpx_U>0Y?~oZb1#g$ozP?5UWzI(+@7kH_VU
zzq|;vv9Xb`C{S>8blkCf_v9HfI(F`~Tphmt*sWW$f|vV!d~#AaYI~mVbiG&xi^@+b
z4<A0Xu&}tWHrjk!-rZA+T)QK-<wzzbCRYBsyQ@??F)?wDZS}MrJ1p+py_<P`UF^CU
zSyxs#zP`5Bvg(UQ?yW6>=W}mtP|VECba8VF+MXBN)YJsBoZ<8|-NoVS<8Iu%xp&K*
zeH}{Dmfd}Q&sMMB_vp*Z%P~6&8qb{ZVPa+m<+lI7uJ7;Y=vWZ6)N4)L-d!u4KneUv
z@ESXznad4MPn<OGWO=*t*KL&*6#<*me0zF&F1##padj<xdP?-on>PjJ<?IJeo#GM}
z7XJ3`?&)7&UtfG#vL)kU(~ln&>tc5=iv!g+64KI-A3r`kQQ3W-)cc9b?jJsWJb2^A
zjI67xF7B(X_M2-pHQ%S+JmCOC=@#AP7Tukl58LJI9_;;oPdnsXm-9rw?6j1W36mx{
zxw*0R%iAA2cdjqK?q_S=-(QRdNk=%g<=zgPq6o@+O7a^yHP2s-TNh*b_5HD9$5K*K
zo>cR?21e{E(M(KCY-w#3l$2cAHz(6%R?P8#wZ9`p)Rr|evu~>WoVG4*Z<pKRgBLGO
zyl^2vNl6J*WR<?U^6=y1<0csw9_)NvA=$OYV)ORx>6eyxURf6_ZEbD6X6@R{dwVR+
z%*<xYNIAdk+M3A8FJ8P@#N`Nbi+^Hz?mwru^-rHXammS9V>RjPi;K!eMn*4gZPh+K
zU0>bSR<>bv`1(n6=J2HWXS=z%O`1B@H6UPu6>nx%)+)LG;kmcBscL9+oH^t3?*9Js
zpiawcyG>13*2l{yCnrBUJA1ibX4(6Db9a}&7ZMe94G9qebuN^Y7G1cm!{u6b{$?{f
zKL;P5nvs!^w6rv+$~$lWKjy{f@O3dKPfgWk+)@0TPf=0PRpjMrRZUILqJ{0qMThp<
zO3AAVb6?b_Oj`P6@5lQdix;LX75Q44o|YygB;*tkA@T9!$Cj3s2~(!Hl$4lEn>Ovw
zx7+zOUoN_Xx^L?K^8(hzSnm7rh+E1eqoK8x)z8mQqpPX6m-q9tv&u?JP1ob9dFAEh
zlai7yyetV>71B9jg21lQ*KW;hyagpCDaU#wv+hsT3SAVm)XOB}0z<Fc;SNFN9W_6T
z*3DorRja*kY;JBIwkE>S!J*;aw>kT~Cb|9m{g_ApB)|Qi1Ly7kuelezt?qACpRDyU
zkjUNL<%Wia4{vNtJ~PjDwr%w{o3BxMd3l>S-u_qT{`XS7@}mEzN%kjuLB*_Bc3jcC
zYc@5yeN(>Jzqd6uH(j=Nv95NT+3^z^|5qLpKk4n^!O<<QpZ5IR-1DXT6ss@pT6$uc
zY4-JXXFonZUis&TVb`L92L~9X%=3D-Z#R$Lmcy8EcUS3`cXxNgvX+EoW`yAu%kFzc
z2an#9iuN{+d9yb1%QWxC)|nbsu4Zd*E^_7m_4Rf8>ebq6YHB9g*Ln<-k6qYXUCzcU
zb)-kqIOEzH$;EEHTB`&=qYQxz%F4WZOTSf3KcnFl7Z<m*$mCP1#--A8&(6#|Jjb%Q
z=<Vw8^>XU|^K?$-wSw|?T;wNCMr^|pWY=P-qZ{-!W`dITxpgr+oBWms?_aZK&4<sQ
zm34G@{Qdn)tYq`@^5)o7ZUTk-l)`D#rX9U@y@{2(D0h3-)m0w9rA@O$d}o`vuD*I{
zeZ2joNt3#^*ekkx)ZFyw%*m6T8#WjmI&{dQ_!*C=R>%VX`F5bheP)(v_l6Ay&(6&~
z?Ji#%5~9eV=(Tc<PHXPMi(g`@ShoNC{bJ`Aj@wR)Po4_$(pk1&JbnM)Z_yLKZBIEV
zbZ(yQ>7B*TSH$i%J3UQT`_jJ_fuqYKmYMR)SUCLs^|i2|z+v&l2{UGJI5;^mnPy$l
z=o4rWSn7hmUghTGJb3zacirD#7dIq2xA90eElQCue|1GOYHOD3<z>DfKYVy_aj|<!
zTAESir<6N)?_SK>TJ!tu_E)91wr0D3e|NXAs3_^pjg2X(sfMMmLV|*VF1#$^ku+lY
zQGCTOP(;mtUeD&u#(H{sEUc`e62X0^r|Um{_;6vKyQ}NNM~|2qrcRw&^>@k?5fiDi
zr+qdHEMl1{;;!D<*!bhe4~7TNpSxdQ7dz7=Qz$Yr()HhdZ$+1voBrnTxGO!Xm@{o=
zcD+}XTKoE=8z0B;>@EFPd5)Wx_vxdf-Jmw-wYAa34-c_MZO=P<ppluAi|f$cyK|2m
zaXEALET~^`?b@|DR;69){_`&ED$U+i@{;Mm$;s-R{QSoc9&F6JyK7_V>o76hs3V(F
zPjhf{pT7M5+S+JNZtll#Zf-tu^ytHP@BDU`z5Q~m<jae|TU#<0&%3=L(fQJ)OC3Eu
zOY-jSVqs-9EO_9s_3zv5_tUPgi#5r+Gedvhk4H<NgmVOb>e^%B?d_fX``cTcm>mKQ
zPft(ZoN`j=Ue#+|v;2E!Rs=4-vA@24-sdp1a#r@0wj!*Y-G6Du^c~fnJ;kTaEH`cb
z9q!CoS;5<I=8TV%lM@Fw_u-2dCw6pjC@Cqeh}x<Z7#IlZV1=v*XngYINlfLf9fiu~
zd3O%nzc1hL<Vni5oSRK6S8DqC`Q6!7+8w<;@8X(BV<u+ija#>d{ysHvv#f-K$L6%N
z7BxQ%Cd3x3joi$}&E0Kd`1ao3**!fyG3!s9J=@#LEpAltAt7vi+*$ScHH*F(CmrEv
z<C8rV9$$NOj%9Jh->=u>E%q<{X#3g71JtY(=~dD{SN8s1@9NdsS678Hg9^BspHHVV
ze0hI=e)+vh_xF{_$9fXa%rJEL#;>lgzkU(l*QpbXMU{#s?|Bg}_1P%%pWUNLmp<ig
z-)i#s3xhsGPjBzl>-+2f*L{6&**b0aZtH8;u2mMLWM`lL^78V_TU)c&{Z&*{ym8~k
zisScp6gGePQnF{yo}#Z`ukUMaZqB^1Vd1T9RbR7WVq!LI+H^{I^5VtG^CGgYu5$hV
z?=L9%iE4+XytuHiGH&zAm74qZ?W_5AGyUhYr5n5+S3gUg0BR*`O1ajZkA(E00~Wi1
z2HIE-=<om2w0LoHV7o$#hU=R(XX97=T|Z~eoQ~e!r&m@6GuTvp>FDX<0gZ8edV1Q^
z-=F=#tE;P3)YX}9UY<2^;>48HR6!AuC4SDcX3bi4{D0`}ZMmFWTuR2qlYf7I|M~U$
z{m*vwuTl8=`ug!3H)d3Rezr1rR@Wi{LBWOpYJY!wYm#$g!-eZ2P7>Rcbk9wiJo)DK
zeEHhn-$K8yjoRA9Z~te)GT+%owZBSAUj^AnM=CLF%e{T<&d%a)lWF_8xVe+BuZw;4
z?c3Yiky|n@a>rjkeY*Sda)0IvD}&YLY$`q+x~_B9ZR^VJohvuqGb;{GzLy!j{lax4
zV`F6vjgIg4s{5Du&c3v>I6Wvh7}QhBxU<7>-MV!a*4Cfj?S8-Na__r$?|fz$G`jUl
z1#Zua{q^M~b4vfW?5$B>8yjS5e|>RWef83o%wVgM7Y*yz>t9_RUcc<RO~C_)egFSe
zv++nUq==^|yY(D6dX!aMTs$c``R9|#{_|eWa^g6=H1OZQiV2KG#l^w(l~c<iBe`y$
zKepkQhss7y%^Q)kjaT``R++ACoH)@<)@j?8Ejrsud%HI4Hyk<Q^3`)o{r}(hL)J!3
zT^YRmndQ3B)nPgJ_sK>_-%gT@`u_d<^Di$i>(`!S(%07)(~s+!JzM(qwY87W&9w#%
z#OXi1wY2b*;`7Y2hp#{K@cjSzo^f$-@-3@do02k$dwRI)iau5r2@44wdUUk==FZ~h
zo7?`L(&&9ZbGqZFMf0Dy?iXJh{Y03Fr$a|D;D;*Hy12bwJByxf*t(Tf!OU#hG~MV&
zj~*TBku<&{;q2t};^N}=s(US*!dGVRd46tg_wL=+=gyr2jra9lJbk)bGkDp9hYt^C
zuity@;o){qZ|}#G{p}PrG&tt^xd#LY{QLLMXNCcz!u6)p&z?P#u`20!@+8HrPo}fI
zo&D@AQ)LSaiH0dtMA&$xTo$|azPP-cUsF>vt7dQ2S0x372buYkjn1F&INY`HY~+_|
z+<M=x-H4j?Fe(4+qU|wUu`@UoTNJgYEj4j@D3aF6kZt??Wwp%x!-p4#=nEEnc@Y>B
z8+-L_>gj2Qd3P+@_~p-CkFSqiQR6?uz;P~@lv$31b@{s`>F4J$q@<)2e0t(p_VyO2
z__(pZ-oE(xxwEE4(}JHcX4RFwxzV_Oz5d#$tx4C`M3%g`&}f={?ZbzM&NenSpb_2s
zb-#5*wL%_TTIxN=x?J!2>lGm@h5YB)DB9W0tN-&@zO_m=b>hT{ZTGCI_j;#^sOszM
zOR5SuiC9Wn8Y`^scX_6HrfA8M^iOvmdhE@Wlasr$CQ?{gS-HeYHZU+SW@izrwYBw~
zyLTJkT-2bT*YfpPR;}URyA6!YS1xYbFP@%#eVwYBTAN$1)XL+3LUVF-LRW<ZZr6|5
zvB0jvDCfq8<;UheesHjPOU6YeadGjwpHId0;`XfA_VkYW%{`UHdpEu=etXMQ&41pR
zXJ=;{nwqA*zqeN>Y756)zxL_V#aFIe2`b&=>;Hz<KJr_Ab<x$V;@IU`S63Z5aztd6
zzhaBPQvXKf^vK9aP<L_042d&m&lZ-IEz7#P>ZoA!OySuRf;!4B+x$K~ReSlryEkrF
zoVW6<U#h|7JzekWZkGf1m-EM`9Q}Cch)KZ%hhN{{``_PJdojbLa^4+NGqbb{3mkv^
z`t|AC?fmCi#^&bgii!&lRQUS%oH%tVXnSyQ@KyWc{qpBuUS7U3bhX&#^z&i2|4G$5
z{yZ~r%9JAwjLZ>Rvqa0v${fnp)&KeEK4;DxE}Q@BgMxz%&Gr8Xw>xntdQGo!KL;A&
zn_-v?>UM$p_zDUQ`TKsh`OUR@`10k+TU)b>|6UT-_nXF{@kfZUSIRV~pShr{OwGVR
z;6$8*NqciM^TUS^Z7M&twDZdsJvhMl;Jnh=YuBbNS)#Hn=VnmC9#QSECl?pHSKW5&
zlR0@if4}eTZMj!#V|SOOzPz;b)!v+i3l}nM$-ds_JKK!$rgGi$b8~~w2a2@rE`J}i
z+;1+^V?ig5K&z+GbI#5%Y+mTxUi9}@>9;pG4}ZU3e}0;7v_<(lncLg*)fE*Rd8N%n
z($_4V2Pp_ZjjW7}6;VGxrRDnl|D?pl#TPAJeDyIXVj37hen>mez-W?vjfX*6TH38&
zu6Ofh<JjG0q08&%gT{P6d?)~wspk3jQch3PT@$(4tybf~mzS5%%(0yO<x9zyC|%!K
zCW+6^%w*`1Ht$=qL`BLp>xlaN8mG_C&R)%)Ygx?Z@9*#9^Q0_5u|;EbiY6~}&<s_k
zy1!ooCf=FwM@vue!@u9}FK<rww<>)#<$2+|M{)lir&j*|`~BtB)#9?&Wi^`DCm+A_
zS$*e4e{ewu8jCu*ZHk+V3yX6*-_y0*?>)M)G5P%6g(v3g2!6;}=0E@3<Kz9_Rz1^C
z-Msm8sc4!Lhhm)X&vSE^EL#?|ucorJ`q`P8KYsmU0!`}e_15kXSNCQSiQAfW^~KfI
z)9-ByFTBD78qidgZ4z+m5SsS>MCAE-wvqem_U_8f%JK>c5i!rdcjQLI|Jp!s59ZO)
zC((2M{`q`fMNRF}+Gz8-zhA?}^yALVv#tK|zW)F0*O?aJF`SQ|o>;HV%*i>^!YM3a
zmLu``+1ccupPrtXZ$JOn*Vp3dHDJ?Hz4jfw*(t34<Im^whQ`LpzrMVjF?;sskH_Vc
z^YhQ2nQ6Rn<3`PM7T{jgsg|1V&E@xNr^oIt1Euua`TNgaT<p%t!}H|ALT4TsiwU;X
z-$c^qKn!-;cXVcFCud+_po+SBa$4H5clr1C*}lEKU4PmGP$y3D)U2BB&AzkEKEB<4
zKPfG3+Uu3z@j8ycl25wlWMpN3KI+z=SG8%8hW?j_H^62pZLgVr>BF~gX)iA=y|b%S
z`})5vTTDRZid&Dw!Cj@VyKEdlUhmkXa)09H`St&1nr2^n@c6NFN{Wh^nc0$M%O=g7
z={a35_Q!)}{ww?w#lVK@yYD+1`Sa6LN&7k*(9EB$?c5zZEY6%go65-xHce?te&M(9
zWy_Y`xOr2wR0pgnHE!Q`$jo=gBX!XHHmC=vxxpfOSBWOSoK3|u?)aTWtaJUA*F{D~
zK79UsdEDm2!)+#+ms<Mme!aN47;Lr1Qm#GE-_-yAE&uM_yB)iB85KS8*j4s6YQ@&G
zbLPz9;O1slFg6yhuCD&|<>ldn&Fq|fe9zw8+zgsVss8?M&cX6|u(betf2;;|b6Yf2
zxnwM7+t=G&Ul*HvWkukR-@hNfcyZ#(%ga0K{#Grtwk&wS0BU_5JlOd3^mOCWS0SgS
zYO_0BT@^atz6$Kwpk)F6*;yu1tHRg&eSLKmG=^}nnf=S_>;32ET0egMI{Lv+XkVw`
z>#I;cdAl>m<?DTZetH_Yr^4{vyLXl6B6k+4YKO05d2nv7bxTW&&EeBpdnZkrq#|Ag
zZb(1zFIqewH15(}H)*=kB9Q~<&-)jamWJ*sdATU(-kwNLZ|`6c*FYm<<Ed3&Ux}9N
z`t;<a@KgE!KiZ4Fzl(JZ6j|yyS?yE{r|^_DYjmC-=@j;wYgIaB&6+jK;}%zbe#ZJO
zI84T-V#5+=0}t>B^3Tb4UZ__>^D?N>!X;z*`*FYhja{YMe0+R6ze!6<ZY+L&j;VY9
z-QDGnU%m3$nst?7MdW6++TY(?7hinv@L}V`iGqcNg&`}0o?4#v1_#rn#Rsa-foAZu
z!q=_IQ&&?fdUS-7LCQ2MB!Qigk@3|OwF9&B_nkc0%-;A$^W2s#CONse^DPS?dRH8H
zlyhfCVMJ8as@v11P3w_1Ul$<m@8_3xe_!pFx3|OZnioCs$h)(nar0*5W;R~XnuFG=
zU%{XuwnaJ`1yR9)Q(}%zo;XoZQc|*Y*N)a!){h@Q?))}$=FE?Ozu&h{o&ias&W+0H
zUte8)yn6jUC0pCMU%r&=`}4{B)z#JEG8@5*Hl|pzilu9Zt$DEhe%;}F_vXzq%?5d+
zw38d;PL7X7q84i-w`PShF*A27i-E*CjMg7`q+@D2_3G;I;*XD9gMx#v{>{F&M)T4E
zNS0p2wA{3vSNho9-Q_nor}MwPz5V&Qxz-_TA~sqifme)Z6m#jEQ&LizVV*B1CME_N
zI-6hj%QN;lcqYMVPYkPA`m$xq7;fCSQSk5(tB{b;j`H_$6WP^3<_C(2R-C^%!!Wtx
z!vn_8&(ALpUmvHUs%n^jZ_lF+UvT)Gsmv;zHg#(2)TyGN!L-=jWp{QIF5dA-2oxq7
zljm8j%}hyYS-(EN@Td~l$Y-Jz=OdT<&5hVoVYog2{<D7je*ybJGKxn|hd;1QcXV`Q
zsQLT#`YXF$H|1bR+2*?7z#|<aqfHi%z`0I9WRXSm{<^*Q{vK}UuYY+E<PfJRrL1D<
z%l+mC{hv2$)~QBj_JY#VsgJ<ET*M&~F-JFcm&x^ShuirjO)>%|vZ{cr44fwVW6QJq
zkr5FmYBN9%7I0m1VB+R^w$+O+W;DEU%{+Ygu%tnP10pyg=J?Jsd3nCs*QcPMU_sj1
zS*}u3KyGr03{5Bu+g+CX=FJ<8_<c4Enwpv}uC9U0{pK#}gNBsmhk2X-{r#Ps^5uEr
zv17-+yuZKSqZtxTtGRT}85tQF7#jy~KYQkkhqt%(r3D}tDxO-!)V*0Nbk&C1-({uN
z78VlG(YKlOYo4AGEwP&WD>XH>b^iSMxBeX!k6&;xqocdK`rHL@-QhK#se7}icG!l5
zgG^c1*ZID^we{zl&F6o7yPeOyXT#*l!inI)>cVGdB(*|TILx=JW!O>s+pPNgyQO7s
zZ|VQzIt3aLZI`cGk*6QGXU5IV>Cdg)mS27vS^w_G$H$;1=3GDb#csV<ivRrmYiMNj
z=+4gKR~yfrJLlr+YHP}8Z*MPWU-!ppz3co569lThyjZyXZLj&g2M-UoGeks1?fbG1
z<Vxq@gu<}>b+w?W!HNothYufuhIm1P;WMoeMJHoJ{omK|KYsmM<y|r*6x3gHTKsb7
z7mmEo-!B%w6x`P8HhJpOCsylXb{6H_-DP?|@R;{>J<tp@sBh!v7xukh!mw$ocDRt3
z*s)e_@gm*Epv`H%c7MNIPVd^jqHFW=n_IKR&CJY>-dZl;#1S}&OXnP@k#V@4f8&-d
zM=o8O#Lh2gkbJBs{gsM=fq=Zcyh3%<m3#N*E%Tji7k>or@Y&?m4#tLtpiv5)$W1Qq
zw=yv?ftm)fyGj%-EF$jKzrSbu=FOXiH|&;D=6P$jeSLAUxm#Sn=;^7cHrH?OF4xzM
z-ge~b)v5pf{+>K#%8~H+T2~(*o_qK1Ra93$w_NTw*UD$M8Sh-b_Sv(g_y7NA9lN`1
zY2DvnYa%u>F}McKu&FdUckbMd9XkX>M3&5(KVyam%4pom>sQ04)cyPUe8=wHi?3#Z
zD(~Q8-4+3-JziWfUdi9z-PMWM;Ba?c$V#EzWp9)I{rTDYN<mk*_wDWNn+qN~$=TPP
zX=3HRvAaCK@%h@Aos*_&hcgvCKPT(t<ivIL{_01GURJ)oK?Q|D|E^7HoT%0&TP>H6
zo5$slmZlc9c3R+Kw}n@;cGms10(Bn!{`~#>^Uvq=7c)$BqPO)(nPxG3cs4sfXn$R8
ziIwcTckfDGU*k14Hr9*Z=aZA8V_W?#<<XH&pV?-;PoAXo$=jd1xj7xw#gEvMA^7as
zGZq$>5BI9y8zvp$&<bC7=FiX18@Fvc_xZ1fudk}9si?iZ{l0&{vOzNzmzH{adV427
zIWf^B=f(u*cD}+74;(?|wvdpJgn6EfnwlDDWyj;={g2<j_urazb%s$Y*XL(v7uWv&
zwq)6|qAxE3*Q{N8^2`|(6_u9Y<$i@PE-2R2)&`2S_S^rHC@%i2^sjI4tVxq5dEMPV
zce*Q5UA!nTp>W#FnI|W!`)h=+lbNvh+|9}A{#<ttzPr1-u(Wh()Yhyy_Vx2lobV{U
z^5)!J>m9px6@7Z*xhj19yw`gSlaCpsosnR;bLS3dTDD~V`lfl_{RMVb|Ch(bzBzmI
zW1O@?^_A7t-{18}8n@Y2f4i`wFnQ9XNgqCa0!`0dSryvd$jm-z=FF4R<Lf%ZRxd4j
zda9zb(lGfL&$oAXy%Q1?K<P<QQPIWK71Z?HQ~5dL(h^QSKE8;EhzBoUo^0h7@967G
zyR{|Lp{(%krd01ed-i||zO~WYul;uL`1$E6XjJUmw{LfL6f%R>;Z!{4Wl-~*)8V&V
z*}`JR$H&JnZ%lS)`1kw%|D>#}F2Ci6FJGQ~u$i6Npzcq_xw+QUm-{CtC-2#_=f=)r
zb#c9z2l4-ZiBI@@SkcaI-<7@YacvVO2%Pk`Lz;!wlwuW2cW&cZn0&l%&);vi8P>$^
zp7!u?yZEl=h6aXGuggb|wpM?Cw=x$r=l130<<_cSo40iOdQ?99#&7qd;aIQq>hJFy
z%j)#w_pQ<6nlod@iGKTkCth4!-1w%N?Rc;B@fQ~tuMAqs_3`7!S3A|z)M|b_Y_Iut
zGyRpFYanP|pOKOA{GVAxToua7%3t4G-j<Y<^tb)~{=2;0+d3E5z!_$_Qt$WuUboBt
z-*dGVfkj(1KFr&!8NAFO`52F0?5-nAy{FHxELIC%=A(J#=w$d{T<t>#Z)xk-%l!|(
z%swy48@;#6bi&`r<+IK6pZ)py*~QgWP*l{lpkPB(#NRKM{WZM4ZF#vh`?`Ud*}3B4
zzxMxs`hR<W|NP(I-x)uA`SRq&#l<czE(|yCA6^~4o+)5slB=9;)t5W#cjn&S#=^=9
zDl`B7{{Hjp_4tn;J}^9Za&oc@Y|>C5_tut^->pF1*|W1uRa8|O1!QH<Hna23v(^JG
zNthrKF^93_^|juk-Qt@oJ|^kt==|9K|Mz~U*gJ;~fo5vgtX=zco4c#))RTe64S~6@
zlJc_FY(IPF%8c?`*RI|Mt<m|)_VaK%e{z2Q`g_T27uLmEA3A&(G--agjn~4`a^dBd
zprTXSJnzU2i}LwtX=y%lECOXOt_fPo_3z)mA3uI9xSExDbCc?H{dl%b%V+H_eZA~^
zW5XfP@Kn^=Y0G?PUs~wQUQ=6p^<_?ZdAXX;42Q*Ty(^-(uiN#e%Pw|TN#chG2e;(h
z6cQH~|Mm5C_#6L;T#79MuC9xp@T|3Q&B)MTWMs^_w`b<A($|-^=f~fxe!uro2mgtE
z&)<A`dHLwI_`(yFQJ+6-wFtUoeD&|MWyiLZZb?0Q;hBWw)48`M|EvWquJDZegwoB`
zkJ|pE`uxqStHU`tI205V7^0)2+4y8S7HP=XRD9TJEnIJ8bZJ9UMseAk?uWY%PduWw
zBq2TboOSN&OTp33`H7%LTE`{*`;XQn9&X!`dwbek>+)6I=bmjpH?6|X%}uK6^_!P#
zH|xI)05v5N6+Z2Kp7Z+J+8Z}-K79N3?2C(wL3M2F3`S9K(??(7{-ZZ%&F`xi7)$`o
z!MuIjJAc0X_jh-b@9ZdSjdBSCH?wSZGtIxYWy6Mq4-XE$xv{a?y8PXRHIbWRUMGQ@
zxE<4%2Y6>bSrM{QDEU~=!vl@XuA=?m2GY!LKG1%fMigX0hZDg?%%DylS!-U1nJx6`
zdUYs#T}<MG1C6h~^-7!1urAlTc=6(!8ylI8jg4PjUw{7A)@)F3e3bzx_!Q$bUCoM~
zJ$d315g`FuG+|lX#wDtCVY$D&Rr$L!t=!_EA)57jze#b6>*+w|O%>xTj~cD5d~ksA
z)%ErL&CSfQyGjloQK>#~?(*fy*5&U++&PZAExu?_@*<$rT3NYSD|FR`#qRyf=b2Z3
z%K<GYPOtaz;raXPE3<-%N=xSDWqa>5FhZJEF)tp=+t>Mge|I<Y#s)>u2+gZ2EA?&F
z6k7xqt<jqHNC(_hnQ!y^`kKhYuh;L7lTqBW0mn3Ak!*Tm;=xvKaSu;V#)SL(YH!@W
zZC}3MH0w&j>1n$9BGr!bwpuXHFv%2}t{<;vVlrj1d;g~D@9&hl{5TX_G@`rS%-bvj
zTH$topY8cCPnXZH>+0;}bZ+ChSbgu*RBccPm7)CI9YqzDB`ch3SIz#|?^!j;ar!B)
z5Vi`RFH7xj6g<}9`@dp=-1M0<JtwRAe)#%zY3=WCI+2@N#B`$y-rccua&q$U@i}t+
z`t%z&B2G=!KHe^0=kfQ~*UqjkB{j9St=ZQnO_*?Cfn)QFi;LOI-``8UwIvf&{BPJ`
zU}R*(!O5wps@l4K-><F}D>RIZj3f+`SaNS|dH8&O{j=@&>y%ARr_PumacfKF;Ts#1
z87)d*iTwNbZ^`oI!HeB^m-);*v@UjcM_=Eu!-tu(v$Ln^L@wGlZL+%mrtIr_Y3Jwh
zIz&WBI669lrs{g#7T5W&3R?^6e;AmX>o0rn7U${dsrpaO&}Gu}Kc0Ee1ylZ?f?TF3
zZOgrFwzT;9xt-tE#_kUL_~lE;q{)-HOM9Q4oh|N~(faNF{r4KZY45^hmc2Ri`D($j
zywvCyvR@;(xw)Udxw-jNbpGC@OO`Cr@SSZI$|q;DqU8O(*gz52nFfhXM%CYPTmwa(
z9%y9ty1dL+6uSC0YHe8MpC22G{{O38ns#=US4qhhAxX)VCYhI90!3UcD?TiE^z!9O
zjjlyLv(0>OZb)oiq|x>1!9nJu7cT~SdU*w1TNk_f%G&7g${!yVPMS1n(Uo<v(K}0D
zPXim6dwZK#P|&1B8eN+*E-Iy-n`7CvNTaK{xw+{7zrQx?t!-^%SB9>hR(0;i#^kGZ
z#Wocm99BkeUsv?^*VpNj-#jn*@Zg}(+xr1n%1Du)Cn6mj9M)-`onQY?GCn?@Pu9w%
zl}mJ9(XxjTwJMX2XU?p;9dya~=eq)xy`}Yju~}QA7A;!D!NKvM*Zkgz(mxZG-G6*M
zF3<Sl!b0YcA3v(7sWE|8{hm2{*3jJi`0d-XC#(B|mLh-KUH(39TmJoZ_m)qaHS5&c
z=<N(IZfsP(cI_GiXo42B!u-QS=R=1NXWrdqI>C0)%}uGNm+#9H78c%B`ubR(to4iQ
z>*ekLei7ccZ(mlmn;V;*ot@7di-}cVU)g?*0+q5Fa}ZN)_v?PI-Iaf+g%dQWd~U8a
z^G$x%H4z&ZX&kOd1T8YUb8mIHzMAhW7Y`4Pj~_p_zH(oD(WdnGdJ)%$$K~sfoII&{
z{hzS9U%<K;%hKC7Zp`TE;Sq6NSp57P3k!>g)-)N=P{z$osUhoPB+bpuwZhh{*w!m)
z%;xUyZujFsbNa4jpmO<B_!54Wh=>RQdHMByj~HBBTnc`F%PpPB>#To<;mw-)b^25H
zeVeH4Zj^mZCnhGQ=xzS(ZMxFtc`Tde@7uC<>(NuExEdZjNC*lJ?(FYZS5aXpFaKWi
z`^~Mb+TPRkTmv`BdvNgb)@Jq0n>SC!wrWa82ZzG$f_+tAwHRRI<>#~S@3RH9HbJ`$
zE-Y|-^-t0`ZNh{J4fE&U5B?3Rn~rRr=+B|ig4Sp_yLo@w*;zhwEDX>8ySX{N_}LlB
zEm69`%l%UC?kdfi9k%-D?c37+{{Cs_=6Lo>nSScce0FB$lI6=6U(L$Aw1m?*{anqf
z>=jsB5AFwcmA-D<zP)_j=bql)mlqea?=F8IHl2BA@pHcUb-y$pK76=i*RDmEU;cQv
z`~AFI`#}N8<-Yg{&t3@wb8~aQ`F62Mf74Snd;b38X<vVI;-r7i_okyX3G7*a8nw2x
zJb3yvRHt^Ymx}%`tm&SaacSekiEX0Jvo}AE;khr{r(&kaYMottFEe`jyjxO7!$8w%
z0*k&(ezaynPmjr$Yd4=)X@Li>9;HsMJ_oM1aP1I6sa&VLdgOyz5v}N6`7Hh0o13rF
z!~TUrYoSfo%QRnDoj-j3eE;!&dGTLAj`z!7-dkNRWtQVHS<QEbRcY3~<9WBY_3kWw
zzOToCTUAG=Cw6z)%G<ZM=civ><f>m2tFNzL`gexkT&s@{4mOML1Es-4jkuGKaI6U}
zTE9FSRyUpe_vUo%?{A6u|NnR=f>v<oMsM2_@P@VW+9RHXOG`Xs_SMY1a3P>i&i2&1
zySu;2J%9cB^pB5^<K1Soe)|4sP5Amae_grx$2TM%{_$*fe%ve5RspAuNncKhrdzK5
z>i_?J<vDkCf$39@{d~c)EZ6q@jrZrDXkNHi@tC*n*GqK<CMKp2A3p}({`ccC|Gk>e
zzFVSn=gpfp$GY4v*Sh+f4ydPo@#4hL)nSYRqN1&vHy6*FF=_H-Ny{P?IXOAd0;>lP
z5*Ysc{rmFvcKy4%N<j-S`t5#s*rva~w-?mx3=a<nO^XEu3B}ib6*V+81oiQMYAgi}
ztN;7^d-ZKg<#JK2kbr(uW8=eDuTHftfA^&G<EyKyr)Y=sMMp<7C@Cw0n!kE_dS_-B
zGJAWUj=6r;F!2xzXg?FEZS(HkI}2-T<FqppUyE(`?%4zC$jZsd?fG^qTgI+t#+EH6
zc0Zp8H#Id~cv-^2%KGsA`|~ZF!VDK)mb{w2bEjoHpX@2~eoakH9x0O!w_d4>%l+lI
z-e`N%^gh!5Oe}|Di$Tizn8Iy;v8}!FS@*Y@jaSLQV8WFvA?M~=yLXG}UfCA8ISsUs
z<lJ2A<4vsG5_UB^Zk4^eV+opH0IjOKv$MFf^JMMa{lC6k_J4U{A+x&wyeAJ1Hv7!8
znfc^NibC}l4GoQs&(F^bi-;U~dwY9lcX#r?KR@5x-#`C;{eM|=bMt?n=l?e-eB_dK
zWyQgR&Fq#H9}?EtS(Uyz(kZNd<&cVohQ@XFiOe=NKPJdpmtEMHeB8!R&bCS<JUqPg
z_uudL)9>skoMT@vXKlT^CjNH3?+k;+;N^ZR7f-Y<e;0Dw$jAt^xVEmY?#xVMc4K2>
z(26b4LLzSN<Oc^DcdncI`QZ1@&(0n`d$u=hZPe4*$Df{_9=y<r71Y_AV_9rb{H&*H
zuGds8&~jZNAtA1>QlRYW{QZ<@I?4?5iJ0G8qI3fT13`lt$E5S0WLXzI;W*sJ%e=t3
zo$tu;<C8%Py58U03#!YmT@%aC&-a^WGc(_(`un@oi;G-elvwTAv&X~N_vy8@(F`ST
zZU|;)XP=s`@BjE%@6O84YRCKK)opC%Tw5Ex`0~q$?Rm21`S)sGy*4v6To}DQPsX~e
zr>BPplm)83zgxR4JtgJA<Hv`ObPBHwUmrK`b8>R>%AloPyK29@2rLybRA*<ktoWdy
z8@1)Y(W9-PCA8h*`m1tle}7|LQ2F_pi>s^Xt18fB$)_hLbMEbt3=Y0r^ZCc?_4`+S
z=gZm}wIwQd-D}YLiXT6IIDORwZFJlf{_gSYB;lZWJNJgKj{|MznlNF4(|@7s87BUB
z|7p7hf@=1{qM}n<{~zfTo}wM@ch}6!Oifvt8Qd-ZZ0vuWSIVU0=H~Rv%Y3Ep+_|$N
zV&kG)W@%?6KvP~1n)$!X%g)Q2mtNv|KikOAP*Gdk`>vm_Z|Hv+`#PKD{_~fGt&KV}
z&o(-j`O5W~pQX4b!Ka+3S`|%?m*bu^Yt}67^G+O!M^?X?Ds8!WvswK!>^0{5t(yqd
zn4#|va-r9kzvpd5>aH*OzUSN9+r?jBg@&z-nwsrZe5^-OMMdSo^XJp^=YyuLe=}#?
z-DTR?*vOE@#~>pk!@<x0{N!YHP%>v`=VPcB%iOkYn~JKc=&OSr%lA&3sO%1!cfY(X
zH@Y;wt}^HPI@#G~xl9iZwQ}!##~UI7o}k`a^%Xoptr*9;KBmxYm2d1B-`=m=X3m!S
z?00)t<+tl=A}4nWt3SJGU;QoT+#Jj1u+^!ruB==Wz1=VOa@N+0>gvZYU!FWO(|Bdz
zVz+&UXVIsY{cCwcSA`tBe_vi+UVclIu5R?UC!e04cKF5-u|4nZCmR8Vn~Q^Vqqn7e
ze|Ptl@9*+=cOG8$w^y~XkqHkEFR_xnU;90FUh(a%+3w(}aL_`rn!jJKgN8n3W!K7u
zuMX2?<B@3CyxEwUjb}mR<}}cJhe6esj5%}XzPz>db#V7pnS1x{wY0ad=4W}bqw=#_
z^tPPDv$ITJIWsQzpMTE0|M9V2Wlhaaw_d4_C)MX)*i%{jH7fnwoP(Dy3nwT4eGlsV
zJ_=nNSX37+4Qc|E#;%^T+>}4QTUqt*`Rem$e0wurXPoX@c-GM9R6W<HP;nulrs>nK
z7tj6u4>38u{OqMmla?%5BHpv^$KL<{et&s&l{-24@$+-unctM&Pru}c7(EpK`!8y{
zO@DaalQSo<FSYTiP5icKng9HMXR=pFTBWC_@AY2}8qiq3D*Rm|!wJ0$;K70Ff2^se
z7lBsEuMXGWo4fwe3dsX6E-v=AVo_`nc(k<YsT-sb;bjcoyTPHzb3*&0Ljz(r$sy1-
zE?qV4P2kNS$dirYfA!+`{c+mPy;tb_oTEpNPA%bJd+a)&L31sZ)=pkto_@(bpKOo|
zk;er-zPY(MeR_$&qnm$9TWdb5gZ8PQ^rIHt?|$-hYK4LW2e{`u0kQ`Lya}3dckOSp
zd-v{vn(k`8vpzhX9?x`B{zhJY{`$I%AM0Xwds$q16u#U~HYO$pRLcb$*8lm)KG(YZ
z*u{$z&&{>|{NP}-P~WDC*queLwf-+ktRQQDwI{!yK2g~{XlqvJoVjza*8bdUFXEc`
z=tw7M75Un*)9?5Hj|)l^x2&z4X;W#$D{bbo`s$_S{_=8mH5?8hAtKM7J$v;#?ahsi
zs-k5RFHfJo{O|6_9R-TKQYHsZp6s-({x-$s#8IdCI5s|+70;IV&9!<_VznS=Z~Uj<
zcmD*{AJaILaP-I#jr4PK0!3T{g@lDg-QC?mYg$jOiQGKpy#4<@EsM1y`S|!wfnw(E
zY@5nWn+hH}xdw`)K0MTV3bdwJXrW|FPY(}hgxM=3BxGjIei^G056j|bJ(rev3YW&$
zz3&Ij5x%^(cJ`#HQ<utEm-&4B@IhnZjPB-rd*0mK?C$CBA1>k=X!q-dvZ?;Wi4%AB
zX`lV_RCRy7IcQhwmZ-IpX3bg!+P}9leEqyh6DJ1Fc_=1bTlsS}X!+c!IhMsDuA0&B
zr~j!l-+yMlp;1xV0nj#&(o)t1Nk_X(vaWQzc=1B0?^FRRJ3DBdC<AD@!M5DnUKWcc
z6~hNpoITrvlt8Oro}QXo{Py(wzk)I{XFfbUd@;kMqN?iChlkEd$;sWe&mJw%*N2X~
zob>kh_kVY1XRy%1sT#GFC;bZw44T<^7sc!>y0SKUIvcN4!KWuDmHIZM*y!qaE!t4|
zIjyFqrpt2Gr0MtHKb!v3*sW`GxJyWgh)3$>vkVNJ?x1r47A{-}nhtA7Jw0vV)htQN
zqLi7l_c33{Fe!O?srA{}*_(@=dNKU_zW;w>dOEwq*H>46-v9qEe3Ig{6$cI+h_N?0
z{p&aDr}}yqclY4cVY**;%recMcJn{W+Q`jqpv7-@`+9noL~c&ol7D~R;dcJ%2`Q@1
z_xDRz?oY9<`C)K%Rp{Z17X$Bxi{00+|9kh3QT;KG!f#w2US5yZZohZv{CR$DZS7;d
z(#OBPzRt<befZqDK7RW@8<sfhl)3lGJp6vYKK=W<yP&f2&#%|(ciu9U;+5)M*0M-D
z5;C4M_ut>&%qM=k_HX+0@5Fh;$PVKsclNxyyFkOD|9_sZ2W=GW?CeyPN&Psn-rQeY
zKw!da`-aqye?NV{f42VXBKzbe7RCiHU%uS)^;$HKq*2Sh+TTn8YoknidV39xjFP^*
zxVW^;<8S7hw{IC&L~Yfo{q^PHlarG}R)_h@YW4N@F3q~SN>wtc^XA@a^L6Xig{%(a
zo$%ee@6SE?|LgZwb9b)Fe$4PsJot3hnIN7mliQY@>xoJe%v`DXNK;#h^`tYq=ZXby
zmMQqQ8F6wcy;9Or^}ej5s_h%7qx|7OMwvtK=1m8-_~;s!UaMBjn|1y0_ivT6zWm=^
zbnRC5&ofD9@10*`Tb=Li?Y*b|f84Eihuirt{h6h=E_QcY_4jv*s;aJ53nU~Z6Aw1A
zGW>dfJtaB0dDg61CwYpUc)#y_Si@LVT|IH;%uZSBGLO{Dx%+s^bL3Xn@=P|CRckr0
zE_Qc9dV0HaJD=y%{qtAvD}LU0ZK}!TgWuoZzj*PY;@`Q`r?c;j={s=10W{*yFlo}H
z4V9mto#ZLL^yjzq+miYE%x$yf8yg!v{QTMkn9k3$ZSL&sEXhi;s`&9iQAk*L;@r8s
zpcdMn&(F`lc>S80;mY;vi??kPyLRo`WYK3W((j6eB_ukoTnQ;DFJHWHA>-DqTMJ4{
z=dLkOcde_l3tJPxsI9HNGRNxd^oM<ClXvw?PWP6~@Uf};6Tu+B)#_wnWyR(0?mlty
zWMwC3=hhfKajsS;78aI{etCPpvK7Ls)~)l?nCi7KM5}f0-n{`UL!3AiqoSf(mM>o(
zv9D(5hfkk`)~;PUS?1-D*Kgj4e13jjJU2Ht=w`_r{euS&COtgVnsls3^3bJAK{5Mk
zDsTK;crnAJq-4v6ZQI0(cE(JZHqEc-%j5JFg&Q|+6buUsyJV-PrRBw;=;G$qwqg5r
z@xH#kLpN_mrlhC)L)1yWEM6S6vSsVmtsAy(75)DH{`?~+j$aRWtkfcqn3(wJ(b4Xx
z+kVS`8r|7gsq@{i#)&~tP;esO!%SZ#B_$=<Lr;1-IyyYs4J?(Fl$0(h`kd?N=;$cn
zm~0G}J~nsWJUJ(4=cU*8?%iu(Zr;9s|9|<fn@e5>ZP~J=r)oCi@qT&ty1IYzM-8Uz
zE`P6NXD7!XATG{sYiqlw^0V72`|@`(U%q?^2n$m?x*|a1!ILK}txk<=*61+&^)D|d
zFL%$+f4?GGY0-Z1$;L`j<(t#byOoxj?%K7BfuUc{_EP8WRmYDXPrkg&ckw%QLqoxf
z7cXAQZgpzBawX(tX;qb#jEu~dTOwj&PHV%Im6e&<`D7RvB&4Jcoj%Q-nVGq`ue0M{
zoX@$A45uHzekrM|vmbb{T%zXZCss#C#{jLV5xdKDSFKvbz_51h+7I8pwKX?0gVyKY
z&p%!9_gCqTUAqpQJPBHTcIocP_Dk2Vw}1FhaN=o^h6vXj+iEce0Xez8o*telQ>Pw0
zdD1iLq*O&qU~H^xO-;>)tgBi(cJ6%m_4W1UU+Y$`1g-u#bLPyFO$!z%WSXv7w~kLu
zO)cu{{_Onx_LVDFhTWbrWy(bM$sNl~)Y4r{6q}lxC(fGHHQ&B|(W+Hk+1J)ge3g`(
z{P4}2o>i-~=GasUS(m?KVVGl4$n^U9`s3Z=`WsSDi|wiY&d2cK)m7~!OP4NOvV<jT
ztJJSwzm(NXvbIY3`ueU|x9-^I=jR0l1s8_CfAolHYZUL+D9~oIBS(%fFnoJ=xBK(+
z^NaWFkx4%{=VFwsbU;uLXn{f2)RQSjpcS=OuU_@=^}Slf_K<;}pZ~<uqMREW7`3K)
zX-z-<R6|$yZ~S3Tm*5*cC;1vWma#7l>fD}x|IxFvvp;<OdUaLxrzf0w_xJHKeE9Ok
zB|BUD*6rJYF)=-7&iF7i%%4Ah%b}2ffQC<>iq^#Km14MX^{VNk=67q?t__Tj@BjPz
z`{DEF`S0Dk=bSfv+O)Q=F0Lm}pDx_IS=fKR-O+DvZ!i7z?%g}bT)Wy|EXBp29iK9v
z%*e~r<Kp51tu0&o>(C*m88c@tT(^!dE-voIjzZ;UXJ#_j*49c$NiABun7R1*IoWTv
zf)}qGebUpR=97@1kd%}Z5E9aI?b@~fyX6&G4jnpF@aoFS|6BI1T&bz0tvzw}>~8Pr
zdJnIyjowl8wCkGL)vH%Ky1ShV3k|<~{krgChC%*4n-y1I3I3jEnk}}k_P5!JJNNI)
zyZ6a7c6W297~S0PHa90HAu+LWx_-PD|Alw&-W@wL)A;}8YUM>M+n1YmW*l;Hb5k-i
z6XR-iT7Bp2SzjyN$dC{gV`JkJDMmYX?|yu&SDK-LonP+3>+9<o81(h^Q&LhE?A$5o
z@IU{Wl(cmF-~GC}x)(A`Zfwhy4$%_T($d=U>h`T$0zyJfPoAV?iu$NcPQ1A()x+PP
z{la{Ku&^+`SGUid^Lu^j4CAq5$1G}pne@q8n?0IgVr~v9KE0>uFkbj?ZEb99EFmG$
zbEB_gncliNf)`shZrU_yozI`xm>3x)W#xq-T8TM1Jxi9TF#O%`%FW5y*xbx~ZEbY>
z(Qa{OhBt5DE(}`v;O$%9UN>fDX6Be31&&??|JJTt$tfr(cp<}t!64(PuC_LJZ*T97
z&FTFi%fePqoHVJa_V+i(r9lgqFK0h^@L)kn$(5|v@80ztZs&LAP>l63P_VG5xUuoW
zhYt<=_SsccRZW;Mfq|Qwo8j+r>%V{gwB@?H1ka!M<L@VRC8b}<?EG>r4h{!q+?kgA
z`r29#508emYu`$^xVh9T&%36gB(-$0d;g>3{ql?7J-@y_o`Z`kDap;`Y~fQ#30VN8
i4RDu~$OY+t1_pnZqV0LozZe)87(8A5T-G@yGywoWIUg1P

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png
new file mode 100644
index 0000000000000000000000000000000000000000..94a6310612828db2370d19a094795341478e90f8
GIT binary patch
literal 21380
zcmeAS@N?(olHy`uVBq!ia0y~yV9H})V0gyC#=yXkbA_{?fq{Xg*vT`5gM)*kh9jke
zfq{Xuz$3Dlfq`2Hgc&d0t^32kz+hS88c`CQpH@<ySd_|;n^;nilV6gPnWRvbT2!2w
zpQm7?XQXGWqmWTjQedU8k5HhOUzDz|EaJk-z@Wh3>EaktG3U+Q%9zlrzkYl?|8PND
zXP2eo1Yr)12Z3y+>L&z@56dJKED8wBPn#AvD|1%bjkU9v#r1Al5_B-T?&v8--xFKr
z9OYzx)1)~;v#aS;#kKeR_3yM9o75g$eEzrUW7pnma`X0vKf4@Ny?$x1mu8EA6NloF
z+Vg96F@snw0;0;Dv%##+2}gW66k7xixp+KM11U>XQYjR6;!tcc5S+AwQ?W(BX$FVq
zofd(qN~WfwbN$>C6BBp-){EZOlXrL5-K(X?`eaY9iQN3|RfJ-TfhkBw#>GXfrrFn?
z9BSo0ddj-wMT6gRWmD6wRZPGCPt%L-nmk!}+O%mVnU`Aj)&8C|cP{UNNvhs6%yOf)
z9t)qf+t*E4NNB^>t!rC4S_Jr+Ko&D>$+<acs&;tR?RI|oXScRyUzNDXYybC)aQ@z}
zV%p*BRIIIMCm-(<l#qzHo5-o?vK8bb#seQ79{%}i_4+MuSFF%*>yZe|nSXk^zH!nK
z4k^<tm(S16&a|yIb6YIfuxOD|T>altDYG1p)nRLYJZzWulCds&;<0A!+Kn4GK770V
z{<0M-R@~X~?afW+>3Xp*E-Ykb;FU5_P*Gv=_V#9ot9ZzIukLp)!@SyWk*}_-T>N*&
z%$XZ=ZW=N4$=mm>U$4(AWpd!+#fhP-!=_g4oiRhg#>S>=YVDto$AA3(&Ag%RZ`G}B
zx!p^bs+yUZ)%<wa{`B7A_}ZUOr(0NCCue4Qf}Hf_vOnXFs;^r9_J2)YzI=JhcFFSP
z$tF^r9UTiwUthENb|aaEg{9!*Bi9G@&X)oM18x5Ocx;k$W5V}))&9G~0s{q$pPgA)
z`ubW#RMew)@6OFMPOo^mbovaF%%F`s{`~#>^T*@<_dn`d1VTMH73HeV&NdI9ZJr;N
zduvOer>Ezt4I2V(-ng;i%9Rk-6;q~&ES(Z`xSc=z-@kvaPEFO;QdM=ew6Iul-u{1#
z=F%&Vj&@I7ym;}izu)hNrll?W^zB<%gO-LyfS2aiJH_W$ZrETjb@JrJ=jK{3x3RUo
zI!V=gN{|=Gv#YYNuUmR-TkdVMuP2oISG>HueD&Mg+gIPYb0_JmqM~Bft1By2`=a;P
z?cKC(TiE{p|8}1`c`|U@wr#Jv_4fs^va)9V|M&OR#^Z9q?)`GQg?Fy54iBAcReI&*
zWc95^{QUe|i=KKdof70b+bs0<w%lMZ&C-vLT({=m-xu_F-(B0<|9`)~y0S9(REEi_
zmKK)O#KeVj)$Q!!Zr->da=ZTCp2}6h%l%%RnQ5$LZXVv++PZSV0)?ehf;2TXxdIlu
z^#)yE7aQE!*|{q1>?|!!O;63GSJ>ri7R1EFT>1F;_|%ITA)TF_uMV|xXI)<xd+OG$
zC{a;S&81VU>i_N8v|&R){hyESD-F37-|>U;&Xyy&*VaU4eR^^->u?+ItCh>=1+lWS
zMm-9Ph**(zWksOQ&B#qDov&_gULNG7DK9U-wc=ya+Purle4`$HJ1$?pVwPz(C@eF-
zYKN@}xOeZK*UH=KmR@%~4*mN2dikrXtC!2wehK{gYW4b6hYmR{4SCslyHi*_WU*WC
zmGk!h*Gvi0va*UQeSK~14ZFm|#H?dIl1t<AUtCz2_3X?{FU_TX^K4dvlK8r~y{o1K
zojP~UPd9RtOYFXyop0Eq8mo7O{{Q#)>YmEaQ-YSN=lS{hWu2a;d#kLfYL}6@c{tb~
zWzWt?uDy5pa<I3_?Jb#$udR!XepNbm&YX~#m^ndSD_Ob4RxF=ix6A1xm*T2^o~c}l
zCWqVk(~owE>O^l_bHw0fE4R3YhQ@-*&(ESFKR-L`>Fv#|U~4ODZM{3G)O@a8?X0FI
zrZsEVu8iMr_x1Jl^fNOI|NMMDU$pD>%jNT*?S8-S@uQ>NM~)ralylR_?*E_9LErb*
z{51Oe`+NGgH#ct`XI$<-KkfRu*jrm)UtfRz)6>&GANSjP*&d&u=)C3o->@|i2aC_!
zy1%`(mGQ^7+xe0E>uk@?Hea58e%_y7uh)z2YLCvnv7zzYT<gmV9GPETUw{7B*Vo=P
z5=E;oFZVZ2IKZG=mY$yerONjCxw)6WW!~Dd@>})WTU$?_JjwXr%uM4gd3SeR`P|mh
z^5LxceFhL=Q1j!1^*(V$m#dSG_;M`Fytl_vH+oyj-(O#OWGpsB#2#+rT^YETZMpya
zYp3e||1HnS$zj;AZQHXmGmVeF%G<Pg^Ww0zQ87CT8rQ7R*|cfXj-sbr?)`Fm7ymeO
z=FE$$tHu58eyX_l$!G@Vn@C9-rEu8U*|oH`itesEKhJh+c6w^+#L1HnKRVj&y%$uD
zJwHER-OzC2RPAu1loJAL&t1Me`NoZie}6vv&#QiC$;8C;?uFc^PoE@AG6dG||7Z35
z++1aC?e2ZGzZXqma#((Os>QK$=lXc1%|yEG&CSEBzim%WPd}c&|8H2<-3^I{J9>Mc
zJ~-GcV_nwsd|vgtzIU!1fnPxtk;vwfmqFX|?w(rQZx^*DX8m<x0f7ToRtBFuckbDj
zmzV#%j{l!@V?!bX3k%DKlj`#ezP*Xmi{0ho;?h#KX6shdz182-E-mql*;&N8_~MED
z{eO@7+y4#8v)Q<Dql>$H@yko9Q-a>y+dF%H-LKAbbFHV}D1UWjWz62HsXsqI7uSA&
zeZ9Q8|GX!!*Y97pU_nDu6VtahHxF-2KJK+Qtyjvl;@{8boc#RHA06%f@#`1Ug!%J9
ziDqZbPb1T;D+}`O?wT=k>fiZ+OGS3>+<CO_)9248PoG{KxY%vR?Ae<W53@ab^k_@+
zER#&9v^2Hp`tjFJf$GvLD}%Q#`T6JP=Of3CP5b?Z!L3*7=$6dO9)5mqnnxEpx1Tt3
zhULJuYhwQX{%!p7=N33NZ`iP5#Rbb&fvfI9=87(cYooX8eGGl-%Ppq!;G(<y($Ljm
zC9kjbDmu3bNJ()WcyV#@$IqV^*Z%%i^5(`ym;Srq@wKjXb#}S8w;eros_S4g`{aoe
z1M_Uc!oq64-AuQzun5?m7u&`w?N(7?;oQ!ryYc<0soFPp7ORJah209Dd3w73@k^H`
z?bu<lXz}9AYilHZXPZ4eYkprvUw{3X*L;#jExOU$KK%K7J~=OM-PHQOU$1Y;xY)F-
z^!28clS0$<<Kv!L*8O}cuA`&l^{3|3Np%($mW0&Qqg|rfTlB*i=ggVI!N<2ObamL3
z_3`t+d?|^md@B0q@86eqcbiX}Hm&5{otd$_%clN*!w^^f)|82f>BZgM=B2N%r9L?^
zF=l^V>}q~a&W9f!9yapPYU#YNa`xZPP8^DIR=2n1Cg0eQILEGb*OkO5#Up=RtM0T2
zJW>n|2srTP=VwXNtSKtF(00!w#gaES3}0Pe@9*ZuCamta=3POHfT)V|QcxR7K+OrH
z(}_dTr$r?XDk5YKYBg~PE5e#sE4CcbH8&U6*Vn(bJ>P$K+1rZebITbj9=3{S+}xzf
z*38&^&><;FsUW%SBd4%hLVkY#?Ag-4zrVkJPE=I1wYiy@kB`sA)ipHi1lWe}rk_8R
z@3h{Ubv5brwY67P2DdjgG3_dSeeANoJ?{bg|3CfN_~l}9{_{$kO_(#sr>MwiciG#d
zJ39*B+}hgREv|1={LJUuySv?f%MahZD_dV*zh=!EE$tGJqdR^FE&aOVWr@|RD=V27
ztmUoP%_*0hoGfXW#9}qK@7!GL$y2Agy1B8<Hp@M9{rYq^Ua5rC)YkRu^`*`8)~wmk
z3Tjfmi=O(cFFG(#kgd7#Sg-Ws*RQ>o`^{~T=62k@bLYobtJh~`vo2e@G%-DWxomcJ
zws%xi)Xb+~kMYm;lGb9q@cOHYn%bnPQ;#+<GFucq;dsCQzn$-FGgW*0`5QJEJUch{
z^p}^H|9{)6q^#W8)5D^mskt&@le*UQiy24G`0Oh-6ulI;%>CAxFCMdJ&U=-!`OFy~
zy_g*V3v0Da3wN)q`}^z1j~^QfAG?8CeGiUV7wGT%p(J1TLy^JE%xsQjv77a?H#axm
zxN##PIoa9C$!TLze*Sx*>m1zN$~rnc4E+53-`?Ggt~6Ka^7Qb~*y?-aujj4PAiLx;
zjV}k!&3*RpVc^4BZQZRl_5W-lBO@7N_SellJKOy6lP4~2ZfpgQwJ&YWzW(UZBc_J^
z`|aavzlwTkmc6^<xwq=;hHcx9oj>2dV~2%a?5-nAy{C6{cOO1~e*LCHVxaKZkslh>
zAMNJGrl_bWA}X3_BE`wUu^@hboz2fDljqpi%PkdQYi?Bap2l!tdw%@2_3{0C_u6jF
zzW(gA{(hDPj?HX7v(0$@{rwr1`Oo)jX5-b;&Vj^v*izNJyu3Uai-H9%Kd-Hg_P!gx
z{PM<(i;IF@J2^XZ^78Io{t?m>kg6;WLhdbeWR(`*yS~aG{oI_S+x|8mU8csAA8O(J
z{=`ld+=)2i{CIumZT<a!f?i&~v%6e>x?b!q_2;1KOtD4ad+Y4rA|5##33YY#Ha^*|
zAg%`6>Te&82>V~yS6jVj&z=+K&MjN9qT|Dd0vlV~!s6n`m;LRpp1Zu*oqt~CGf5d)
zSxM_M9k<03Pfk`Z{`;$xhmF~}osae3_1nv*DB0M^h>MHI#Kv}Zc50e0n+<A>PoF-0
z+uT{Rx>m2&ZV-FSE?-mNKQD0VnaALMPoN+>pUi_77Z;mkU+Y;HyL(gealSo!_Hgj<
zsHmy2`TP5?i`m)K+RExZU2kdJ-l{h@H@ENEW3#L5t=Ic|dl_%+tv2`c^fbx8H^;8_
z*M_vSQc6ln5jzSLJ3Bj7*X_=|Z6+ivY+3O^L0m89z~#%6<!q~_NCt1uySpWM?wmO%
z&YxFTRc)Q78!aR(?3|FGz`!l8cjW$k`Rwf1)t_pX90zyNzVqtq>*wU=8X6lPPB54-
zYu2ewVf78$wq0AswmN#d-m_=VjvPJu@oIQ{qJhMVD=UR(&YY>DuAcoo^Xe+ms=f0b
zJV@A<f4}bbYdOoJmIn_KzEs)z&N5k;etzDYo12%Pnek6cLBZkm(%mz6EM30*^ZfsR
z+Na0Y{hVhj59)+DaTMNP5xRQXx7~5;mv7jx;Y{t+Af1Q}4M&c+2nh>&dU{^0x>I5m
zyRr8FuG;m<Ts%A}S67A3v8fcA>(}1g%nS<JD=UL<+`fHzkt_GQ*xhY=_x_DN&IoGr
zwg^13d;a2uhnrj5HRfq^=lUilD%RE2>BQ}s(bUAm;5X0a<ki*Tjnd9bH8nLQdfk$e
zlC~^7D`lFM@%#R(XVtl`uC8<J>*wX(-X>a|zt(1L^me!P*NwBU>EzD%GBbVN!+!gJ
z3Z|x0uUrYqy1wr0%HZYyu5F%Beh1XoEsS3sxmnH7&~VS6Pp4z{*U55ob8pMJd1yu8
zVh>;6rOC(pG?r#uSs|ExeH|+();>NyUi|8cW=&0vgh2vBL2_=fxw*Mj*_#ueo}S*C
zUGo0k+&6D>Zf(!+_gntBncr?f=H+EF_H}y>6|%9h3CYR%d3l}MTg$Nh!pi-1QXd&6
zxISAwO=nq7)RMhE{aa?foM^T>j+2uU)C;Wn|Mz=&iBGAXQT)X{u5Di6g4L%5)FN&X
zc+~jl?c2WxzEybjqxa!Mq8?3~J-d7Qbn&;hw=a*~UAE`*IctU`OO{+%8La;1?ORSB
z9u<3g`G!4vY?zptH*Vj)Jb1a^QMK*W|Nnm9xPANaix($?dP%T~W~-O?ovf>?j$T_E
z4NAM6ot-&%c1)aWUH<9W?EFu!R<AF5bwzW{nl%x-N;Da~r|W@Q5W&m+K!rR=1=EBn
zQ(Q_)Ox*kBQXe1d{qtD<|AG}ORzQlgBhKeCLe|($IJTL4+hfCO<MVnq&fQ|pl<2)`
zFX+BhHP33UU-XWxkXBcTm2C9(ywnd54l=Z~v}Al<d*s+LAt52A0}l>1|NMAdKF~{0
zTG~4xKwxXu)kUeNr`@=Hdvo>occ-|bL4_5+;o?BUZ?kLne64S{oKrTF+w!N3-|jP0
zOxq=YpWT<`e^2YH?Dj>Alz7;fd!@~fUB5p4+S=&g{dKix&gR#g$pTkVQVf}ynKy3U
zWK77*^V?hX^~9MoI${5=CxcRv;=84#?3ZuWy<)olsk{GOtiHyh=TEq6$~E@RQrf*R
zXgTvm<QnZ=#<z#oqLPxHDJd!p>F4LEDk?6#v`)3_^vRPGpXWNk(}J<B`RzA{=3Dj@
z>%5-2#_8OA8waPbqN&HqR%^{Xz7|rAeR#co|D>r?T@w=(pFMjv$F^EbT3VVRCnv|E
z_!-Z(ZQDe|#EO1?N|mdAW4QLuvgrPup%D=hiHV5^dL=vLul$UD=WgV@BXi<6Bil=9
z;41S@&gSds`OlpE>Sq2nT@yTcFXO#X@f-0Ww*p@N{QUgnsZ&f3etmtNb8nAidU|?I
zO-;n!DpQ8Qz`!r>@5?jP*Vivvw1@#D2I^W^&E*4$EM9!_^l4>P)z&9Zo>)hH2?fQ-
zqD5<7NB&v+>r_~#ZJwm0WJ^m+KuAc-#S9e%g@(Gnzb<aejc#gcs`&kO`^yUpnU&r9
zSQ4I`n8+h<CsSQrEoGA7P*P&T@b=bLWd((XZMnCT^776ta_zqG`s-BDZJ;)YT$1wc
zttqEw9gfl0u#CwHbUwf0o$hMIRsUCRIdb;w*<NOLz9|~{S679ujBmC3bVB*hbNm0A
zUt@xo`=!3Rvhur)8@OVO{<%H>*ssHqf9BLIjQErm=<HwK-e&Xq+8U|Flea?hq)A$u
z)ij>s=ewO>*8D5xlAOLiIre;0;N9p?Yod+MoGG~~H6Pk8+-_ie`QrTq=X&Rp{fet+
zuL;!xr5#+OZ3J4Bpdl_|2JJw`xGYu8OFuW~;PK<_+j4JTS{G~m?EHNG13i+)8#Zn{
zm|&1#AQ7>@Zg1I{-|zQ>S`2-CeL1(bcxvC@QK;P4*LUOgZDB#dz(_@IP?+4gen$K4
zk!R0pzFc%Kd2?gpn>RUk?%w_Q^(*UxtKsoazg~~m-&P+KBy{oOMH$<wDPO*n%&U5(
z={L`Y^MJSh-XqV>&fd6n>(R@Xh3o6<Z*9wsj$8;acDj~s;pT1Ij@`N?#o+DjJ!kG*
zPj7GS;{P{pL^L%utq5H$rW?KO$h~{>?(QyUPRP&qUmdoVfrE=n$;>Pa(%!ZyS@lGA
z`;{vpd@>de%l+mufNI~oyu6mSwxXApR2eQ^y0l^Q=F5E>w{BI{)m?k02;`lPcHgC6
zcif#adGg@}j?FX7^W{LDhdVn8K~1H(e#f_DUY=o+DHLDx@#y`1ekmy`6DLk&m}6T#
zt)-=9f307jp{1qer#-$MTc*F9Xl4w`MaM&KtqBs|-qggz#w&H?!a`?Bivk4(-&rOX
zU%B_o`R=d#%dlbN#)oHS8b{r|xjB8~#*K^`oBxC*CMLGDwlY3=_^@&N_V2d)<pX!M
zw6Ij(Q*%KyE|ax#!!n(hdTF}#$#ibsY`lK&w^?EHCQe-V_h0@0@AW%&>|l8C{=I)m
ziAiN;<&q^!Cd`@BGkv=Fym|9f)YT_Xn&ebhXLs@9#UH<ahlkw&H7EtXul0Iecza_q
zdwl(0({=0CN${{SJUcV<@TsZVo<2S<%P(JC<jQUL_e(Itt*zPa4h{^UUgDiQb6mT{
z7G8hdDE<D(&!0a7Lqc3KGBn=a+PWIv19+;I=eBraGdusJdGq>OTUqt=^i1;Z?6@Ob
z{`wm4(vZ$i`)Yqrt2}dMW$;X^(yW!o@4T~sM2F+d&fB0~`kkG{H#enn^YQVSWL@dF
zb0@}co{i_*TU#ZK(|8mW6=%#m`G_0rh0uj(j=X)l{eIe=9fdW&UM_!8vMZ^y{@+h^
z?XWcuUa#M;rlPXsQnLtH?RL*QVe4WdpE>qQo2NZG(m7RV0;sTP(TLsoc+UTy=j%_L
zJI9uAZA~PPjKze%zrTl9C4luU+EFCDJ#uqeVrJ&aZ*OmdY8*++B9+I-dRKSd0OexE
zBf6)2@8tQN{CJq(UO-IjnDKcV=eoMOrR?CQrDB)1dEw@b8xzmWFg$YP2&i@b>C-0z
zsoqdQNCm#bb!X@8RiUdteEyt#d71B(_3`t&#q~d(um87MD+xRpFR)#soax@Jt=W$s
zKR!IuIDJdMkd#?Y#LGr-yS(GkDc?KGd}klqlzKYj?yjw7^Jn`lPkw!E?NqrrU?2XN
zUnIOebamLr+xhzgH>aIl)!_~*hn=P@yi<NBCMM>@sZ*=EA|PJLopSrJS?VbftCAND
z-QC<<v#wq`b$xAg_(~+ZoSd90KA$x&EG?bN`V`zaSryp2PiwA0qEqM*C9pYpi|^=d
zd3SfWaq20Ny1!q;zrDY|{_m9MXJ#7z`0?Yyw%q7jTeFY<`T3b~N5w~_ef#!VSXexG
z{rdD$@97KyUWSQ>Sl+ySTUcDIUDmr|g@%lbOhG|`f|k~+y!YT5<j|6-w;z{1IKaqI
z_U_Kgsi7%W0>#C}cE4UITa~@>C@V92_wL<~U%x;DeN|sy9er|ga%V?}f|63x&f@2r
z5)ZRYnmpOl&kr=%*CTD-7rVQx@Z%%bv~zP#-rHM!@ny-5-Mb&Zf8RfK>ec&uN<k@;
z<8n~mBQbgT`f&Aa=*5(>nc1|K7M2Iceiz@nckc{n6zN8UTaQGe-}1%9&(Ec#rcRtV
z(b36?Y2UtmCK(qP7~<pO&GPPe6cl`jJ*o-{ti)BPWVHT1o1LGxTkRz%`G0$RTf1Z;
zqA;^4e>Z2^1=HMHQ;MITW1KK;THD;YbIW?xuGP)V%uJmIE<<f{PQ58Sedf#(=XO4j
z``^9uTkO_*;_TU{S5^l9`EuDGG$+u}(NUy70j0Rp4O@Qs<(8`w|JI#2;i06g%xF>l
zZOygS>RR!BetkCi!b)#btCpMvw@?kInrRysRzIB@E@5A118R~T?~|3WD&gSf=AJWm
z?$2MZ*H`}ec>J_&rK2NbGaGNxiwg^RWGn<4)O=@6m^iU<>sC`qNlBBuI}!}i($eSV
zST2^W{`Mv^Cnsma#*Lca6wc4H?cTn<yfeGCwN+3|jIE))-F@Znj#CpSOi(y~yTzk=
zCa9CGoSRnVv-E7<@4ne_>yKZ*E*>9$-`Z|tP}f9d_eXEuoXK9lciGh6zkdshih?7)
zt7}pH|9??G54Cc0a&aYHUl)62bvS=^cJ`k?e=1(D-M(eL|KmrGD*pfd{@y|k9NEF)
z?~ZepXCLpAt@-=)`i@<@k_;pmW*DdQO;-0;Q&4DF>OK9^p335|wNb1N_xII;MuNY6
z`*vkr>}>u0e}o#?c%=+-Z<*NG+A_|ttKIc&VG}459yz?^lF;_NySo-n(c1d%xO}}%
z;IhjvBerA&uKeBJ)+Qt+RptL!*oouggO$O{`7A9hLyOj07C-xNEjmB*w!h6smi>P|
zc`uF4e{y2to4dQkmx|=x+LCy4Q|gtK!OJhLQ-A0G)bBuaNJxliAt-CBrJkE(d32k$
zaVGPRACLP%&G&Xb*+(Cb%cnm))H=1wwy<8yys)^SK*7dl&XOfdASJ?&+j;vk>?%Ga
z%$YMs!m4D28P}Hd^K$bl9&v(tHPfey-@JKKL{wBzPOfkHa`n8tJW!E)tFr3Li-X4J
zZJHM^R!%<NcQnJqsPa?Fy7>Kl2NM!rTv*5u^?rI#m#X))2ag^#tzNBdXlVHKSjpv&
zkB@)+^y$*J;NalG(o)t7X=wpog@1p2tv1tcHqd>_H&xE2V#13T8E(B&trs&?OiZRM
zSg=4SxEkcW?~|F?`3k<g2%Ke_?RI@#?9GkI>~V2%ENpB-!otnVm#c@bi#d4cP}5TH
z=|St`Y(bNEadC37yUUiAzrV)-8u4ASL}iwFzTbSi+KA0*yn=#)Ix#yY9654C=z8qt
zG~VTYb00l8*c`K?fYIOIzv|12z%-w`yGk$jl@=8#+1t;bGDRe6TaKrX4^LxbW7OYn
z{e35XeSMvIdz-F&?Uz7Rd&fe1t?kFpUb-~t;$rvDr}g(6l)sBfJ2$6u>sC`mMa4N*
zrK_}(K(*4l8PlhWZ`-zQ#>|<C1`-~=zE6*I3P(goXUp%ox;9$8xVZSyqenkptzK^+
z!Bg_;O6Qp~K9eR--uih<)>W>(asCAb1v@97&%eKKuKN5Mp}leblhu4*+}x~Q{q2pW
z@v{9~92^&_UjCc412oIFWcl*PZ{DnVt1PD{T6H}4?ygcE34?}CVRfU-ODg|<-~a#V
z&F1r$w&&kp)e*>{*s>#eYu43Eb=O~i6%iAgwsU<@7b~~egc&nDl9H4(H8rF9Z^yT*
z{68m<>Y9Y!-*?_vwKvW`AVA=L-S4-t#}$3TTkjQi_x7sV*vw&O=lgIgd;P^Fp2BSW
zaz69zYF8<Nn%j@&6_k{yC@VX^W;}iF+_DWD1nlkY#q{HR932@M)cxl@d3AO5f!>9e
zrc4p(aZ@%lG`w{IGzP!rZ~oQ2)#Z;KKi1UJdi3Y#=bYQy*50VjewulAmuc1Bd6Apb
zGWX_sY35Gx);zka^mRc|(XQX@iaudG-|vvMDp_&ne6Rh_CzGq?Kuv+b$gFE?K#{7y
z|Buo2_3_6K9ct2z-e!<>MI$IU_~ng_%3hjMrddaBZOvx<@a0R&pC2FRSe0_g%geLz
zNHE-q*m(Tx+1@@`>rFL3i{45xv+*QcT@|_||GwN@zvJxkH4c}T`C8Wfsc3!7!^5M}
z_h!lz*jfM^n>kaah&*uHBqk(uXsUL2fY-%+wbf$!aW(w&dkjRaLa(J{|Bv_Z;Q0Lf
z{PMcLzZhb6m2`SeR{MBVJU+y;q5SaS!_9lEzlW7e@bd;os=3@z02kV;{GFVbKx3Qt
z>;M0il~>PAi(0k$>^qfw&GHprE-ZA8y2J(=Y;R;{e`jX}F8K<>Px;>Iaa+92PvgMR
zZt?uzP0IzGI6f};@a@~a$M?(>wSsrP-{CvUq*E_;*M~o!&o7>m^!3$MpSf0B5e*gd
zLTSS!mUr*o9XWoS*<4L5=k~T-cuQ#Y1*Ok^etr>=k(tg*_W%E9-EaS|VksM<ofN$x
zfzdeq+!D{pYA?Q4O$mBYV#OzG6(R~Af&4DJIB?^;1S{|D%O@26w7T|HN8;vM)qihi
zC#I=ocXXW!es`Sn^Yio1k8}!yx*yK%e1$(gJmgZ%UAS<eN!}d^adGjrImdb=LBqQS
z5(W}HYvT9Et*%BH{m6VOZJu}J;zhyg>S`q=rFUQcwFo$MRG-QS>G_)`pa1uMBH!}Z
zXHI^0?U(h8Q_m(ItJ}=K_~W+CIh-H2Pno^r=FOXd>*MxX+kNEL-*eztueA4+{l8wV
zzBNh7eEJzLFE1r4t67WP`!^LncKi3Szdj%e9LP=D_iA!)N51x?M)soLR_rc5b0
zz7br`o!Zs>GRMBOG5e?M(NpWiMLrgPV%=vwCC+zZXS?H0=cQ|-wjR2Ddv^M~%4IjC
zve)nZb}P64^~J^RE4!ahQ!_K0wqSum*40(6r>E&|+`c{i`@6dgph37*q1p^hO-&uW
zy;J*@TLhd!W-q-R?4^=x4jR1pJU@8Glc=MPo$YQrZS(r=|5@zYx9`dG=ac8oUHkXW
z+E#AyN6((QWn`={vyX|2k}4}JyJfd++qNZ3mrjk;0~t2G=k)deVS5GY-FMpNz2Ebh
zkKy{d*u`aUZ`Hp}dVH)m`PP=q7bRB9d}o*WPe0ey)1zW!B-F4u{rsc}6AJv#X`h-t
zd9pIBy5Ic!^bNDh7=4Ys$9WGdYEx2I_rG=K$c$6-Hx~(SkFWnbb)vHSq{)*Hzu*7=
z+`(q{pI<Kf|NOrHf9-Mc>`O~Jzx{r3aq-LR>*ZmC`>U<1zv-BnnK7hJ`wt%sG2Nfi
zyJMAd_NSR*nfjNNbIa;aT1_}M#kAeq_nuDHi%y9rUUS!^WEu-6Yt^4SC95^pqEKnJ
zS?;5a$K{w8)c*dKb9>v{b$Zj_J+WKj%m4lR=i=@@dHQsBFE6f>Cr?&XRz7_B(zB?@
zNK;eu$@AyJva-GV_y3QL1&vVc5dU-AeDQ8)XPvAU-BY8lT)2F9q2l>nrmL5oIC6Nw
zog=&&-{wg+#I4s)KL4MSk8hdxbiFwig-p@W(bM!|Pf6$RX<WTpJ1;Nq$noQ!KOUE7
zzOXLVnu&=i<MuY)+FxH1FD`O@@wMvA*|U*53P2;P7S`6n0s;=Z%ie0$6kE;hOFZ0`
zdAqf>)g<eR$Lp8cr+Xxge|$L1?>%Mt^5w=A9}*t8rN!N=e0OY5<>wt`Z=+hj?=O2R
zb>8MPkDs64mh9_&Cnu{<-}(O5*6hmfcgx?q&Jdk6V@5>Isp+M!uic%j8M|li-kZD2
z^%?wazlLnwTGA@8Xw%G@FC&c3%<XVox8v58#ffug?YMPCIcb8nciYa>HhGJJrdo$&
zrKca)-~Xq{Z~5W{3mSfYe*XB)o0xZ<;p<`|7v(%TF){U>ed((!LDwDL-rm06Ece9R
zsxKGaEsLITK#T3Ov(2YlUWpPsJIgfr>#M8fwKcA1W}EjX9&WpP@plVohT6`~?#c7#
zmsf}Do8{a%aM|BJHfcY<-H!&4vriY~Ya1F)+*|!U>+SRR@9WQ3`?T}RpWFZcZ+@hG
zI7i^pGiOS^nBTZ?zu<fQa*jgtQ*R5SH>GfPi|H!q>helUOA84Lzkl_2+pA04^W*D&
zJY;|1W+W*iv*v2-o{EXj=T-B8#*NG0#pEPCY-Z=bwA5Q%Mn>kxo6YALJbZn1%@%&}
z4PPIpYGNXyudi=a^1>k}M~C6`G~MI}2O7V;yv)wd&VH}rG4I<MJ9b#?DtYO2yid0B
z-_PeKPo8`@+22ml)>c+tUcRQLrYko2@v&Z;|9?K;`gT@s|E;ar*}t!?iJaUgYhCp1
z%}pW0c{Y`kqPAvT+>q$Z5EK;j;?h#?b?esc`TOlQs8xBRGGEHR&gS$qUFHXO%kP7R
zZN$aJ4;?xb@amY#WMOr`Cuhy?A31%x+bs9ihtKEjC(oaM{>#hDot>Qr6AT1|g%58^
zJq_w8G?;O2KA3Re(4nUJb-z}gd0z4NS7}pIQ$bl-+JOefnEn6u>u>w>_pkTg`7DYp
z0*@R+@*b5vI>I?kKmOd#;^#j;pSS<{Y<9jy*_(*aBW3<9tgM2vvc7qFdhL9&i-MQ?
zMcuW2zsI?qPu8O3g+TGMGl@q!1f%XQbZ)<~t5kdczhBz#_x;v;`7+gi#gBW{?>|17
z?7yk{d)}kRk3n<y;8sQ4@)<KE_~h+;e0+G)&&_#wsFnN7Jlom2(c3oE|F5&LvH5X*
z|G%RxoWdE`)<}N*`0>c`<KnxI1^e3`T^GB1%jaW9jtGc|gyhU$vqop%?|0T!UtfhP
z9h0>#yRb3YJ@@z2@VLY~I|^m&Y9z9=Uvt@>{`mO#=JfM&YsEkRe!u_vD=u#Cn|rIx
zKR-L0{OrukD{CSrm*20Qe&B$EkgzZ(506gSy4jNr&d)OC=I7_1SMjLRXg+8e%gWf@
zX2<(vL0aD2-tKSRKXs~TWo2bbT3Xl(IZ&rEI4tkcw@;__!&khid;08|TUM5qcKA9S
z!za6Hf0woK%kv$W>~H6pmZlcIE{1VI{r`Uv5fK+QBs$mB)NJ8w^cEHucXx4NVQXg0
z&(AL^F8=xV`+de2S67RFeRcKmM0dG`xwp5?v8$am*SdVtlqoLP*Tou2@VL0T7XJG3
z^7g&<ojWaSe|>ql`~AMf8#YXkulv#X@9*#IudS`EEbQ#YiHBHn4?T)bO-Z@1B2YQE
z`gosg)ceP0(*x%H_)%e1`l@C1YVGLlc~4(lTpV@xKqE6_L3#Q54@XX)JbALEy?t}m
zRV~nJsAt7@w&l+Lwr<hv+0uD=c}LG(-ck7Yw%Rq&TF2%^XUgw@#vMSb3(n2AKfnLq
zSN*krPEFMo5uIvVfA;T*`}gNxkFTHG($d29{mhG!UAOb@dublse!s4J@#5r-pJI2H
ziALu-t-l@|5z!IE1?pqUJ>ur#N_u#x71WF8=jYea)!n*l!v+Hx8JQUtg-I)qM@2*w
z{QUH^=>5q%cVre{T#@_y*|V(we~#P#J9uwz^~TMclaKXCM#(O?`}}ORj=!HDA0s1U
z&9|HBQT_3e=QrBy+_^I%BI3?PL(r&`rq<@;y7u<-=gg7e;^yvNJ0oCw-d(SH$NzEM
zyu456mft&gb?Xl)mAkvk(|>+?dZ4#_x0G3q$K73}jnbzJo<4gvY358%Cnu(P^X8?_
zyMO+CKP$Ib#`_vsi-HH+K7!Uw$Xb^byt<<K;Fxuhy8k?u1D7ssIx|(X>f4)>|Nj2|
z`DXL^F5b!e{(ie1v#(}m<>zNpD}B?`)8AGx#O^LrwX%|OsAgk0JKJ1cQ?ql=9-B>@
zHvRZ`T)y$YY5}jbnTopla;N`%cizQ0IXhQ=JSzV3-rn6|Vb_Y^TwKhq9kzzy!HJ2=
zNAJ0YczrxBUw>hdYj@FOLC{cj>`dD{+v;yi($CM^^WhM;N&dY(TFJYf=j7(jG)Q#n
z{3L0dCLkr%HG8)7*6izQT3TMKi;Ig7A3xsCEv^UZanCZ%4tpl>05ocAQK*!cmnYHd
z7PT!W^3C7%`?IdB5Ij9iSJ}d1#-2Sk3~A@*`OY@W<>2OC>_6Y`*6zIRmtKGU^7?vz
z>gj2j8|T?pPy2qanqOUA{jKb_kB^TVXI)v5bo|?ki_M;s)fP^1T7KEE{$EWSkK~~{
zJBzz{ciy^rQ&2zvG+w3~y$v)1!vGq@ySUi>@uNphmzVkG&pvVQ$>Yb1d29V<8o6rQ
z*ZwkD9lrkAlarHgosyhi|F3f9`NBIp3iteY)NNsH{rJtBp5425m#wS){q5q;;`B3T
zPycm@;!tccsI)5GT)mj*`T6<JoB8b?ytugdXxp)Ed3U2;9=xl|&(EJbPyeI+`aPex
z&fERg0oAyjyR1rIO_)5nIQ;qAb2pd8L)OoD|5b3FaxNr|UHm_2G-b(IAOF&AH|IXl
zeOfoipyWlsoH=tmJUt)IEx)(WwOdT|Rj%Oo`P=VRz3x-{Zpx~sr#EQ}r(#P*#+2DR
zQq$6mik^66U0LCHexB{mN8S4GUf(kd+@|I`Yl<b~ySv}Eg>JvN*gd?Q!-+%DWvb@)
zqYI==vltR`bI<bI|8dxyc6LX_$0T_7K-;`<_s^q`+~sSJoYvoeW~z30-tVuAK*J|3
zD`cLk<(W^nUlqQ7-nXfXRr0RpUtZ=Lv%9R9k(n)|TmzJ8RwzB)_c&};wE7d#P<><L
z$>MPp3lDjRf(A(+9V`g%jJ7oH=<xb>D|@}5goMZQb8}Z)i-7t)Aq#hV-O1bj=dv$S
zN8t@*UDu-r4;t352Q6J-%2R0)R`WS<@1C5wxjAS+Q&t$hK<B91iQ0(VJ3A)k-ri<d
z{q4;wK37l!K6K5QBcNe`U8S$5{n)r=%bE*K_w@ICV3Ib^(+Lw36m(o4x7WkZZ&~>I
zI2RWeh7ItsNi`#*NrjJ(F`5hQooSrDDf_x!+53BE&&)L5n({tz%G|kUZ){8kwRTqq
zFZU~L2Q}h5w8GLJO`ALS>{RV=uRkg2>ERiIALVSTE-Y|tPD<UlabrtM3xj}=(4kvf
zv$uW{RCZf%`Q?i{JB=eFBU4gRHe_B_tEsKcys&^VEG+EDuV2$V*yPnf)5FJ)x8K`a
z9S#j)wsO;us_mdPyn6%gZ^;ZkH`kio0oo0ygLMP`8T@)WJw9S@l__XAq_kbBMW8-9
zv`^~HOylJ@z6tx=1g^X;ZCRwE?A~|e?AhLf&Fs@H5B`KUA+=4XMXRc*ZL0fQm0Nvx
zSLv-2*Y7`k`*yAF^)FX~{cW!(Cx<S)7541f%M#J5@O6uz%YgE9_RO0wq2T<v*t?%T
zf9B-oPChfkP{yic#jeAeit6gm8>{Z|TV?8B{;aSqGB7Z3>fOt0A~$!bW*0v@v+_zd
zzl_BMMrO8v<$khax=}~6*Y7<xLDAXk&G&D&^FQCKejj?X_{NSxWp%$f3(DT!Vz{z4
zdiuG!*8ktWPESqUn1A1{sHo_}hYz5!G*An(xnuRTlYcCOr)sFGx^B(B?sq%y^0Hnp
zfB$eV&8hR|t-G})bMcfQEgc=7wGkT^9jODY)=f-IjQSb>UAuet?%#WNwf;V|CG+x?
z;`6r4d)$&s+k;m9`0?WvpW_`yh!<`y?zdYtWmjRoZS}X1W;WiSzP`S!6B86~)xR!#
ze{b!sl9!9_+_?j4!S5=1+NGtV6S98)zg?d`d<a;#Ze7;NNvgLhca^_i_v_o+)gZ?l
z>ycdC)zx)ncK*JVXU?1n`Tgzf)d`BuU9q<_Ojccwua5<JV&{$>D_*>KQIx#v<&BNW
zr!HR(zU}Af8K|eHcdM+XW{;6nZ`PX|8?}^_79IIJ-=ff|vbs9^_W!@{>qDQPn>)3;
zyL*;ZY1YQ)pU)Vd&-lIf%O&qur}g)TcxjpzKl6F|@ZrK+TeFwD^~<e2dSi3C|JP&E
z`5ONI{!bq~Sg<Yk_Oef(K24cBcW%~=4GXjG@7vqe*B2IF_j760%caw&1bJ=EzaRIz
z^2tQ^5GSW5(Bg3YeLog``tl`&t=VyH?CvnlrB}9QUk^!0SWr|{6m;J9`<zdoJ_S{O
zdlLv=41P4J{{8Gpul^L5saJYW<(vBJ_wVfZHaAz-i~H;A*Zx_(exH-((Q|XHSB9^b
zyLRo`kDosU#l?>=^`5??<fTwyVd0OTKSNVbTzz+8p|eiprWV(3vAdV$ycAn@#LHL|
zG*o_mHgU?7BlZ7($Ctgi;aFE^*Vx#2X1@LWW_JEfsi(zsbaX%qD89bBntXnqEofoU
zdAr{|Q>KV8fO32jE4PQ2*Q4$C>yppTGIeorNk~j|OiEHZJzZbF?5=6{wFxt3te9(W
zV>73xr^hIH*VM_Ag@6A1d24HOQW6Ib&y`ot&&~CQ3_Mxr@BdTOS$%I~GP|sG*_Cae
z%HUY9^zny>+qG@npa1yy_~zzxe)m3^hrhnQ1}#B}icL&Z++F@Y?cScsGjl8_&oa&a
z^ltZiAqfeOq@<*c5f6_UWF;jjZB9FT=<wlY^?4Od-|toTtIw~Qba!{Tu)Mr~MTJH9
z`nc4OkB)){$CHou8G_axzP-JC?(tKny7ug`F)}vRF5!Q4E;Q<qn2e0ibiG)M!bdFI
zwr$gi-8JRpWcAIJpVMSyWNvIq<({nW&$b}{{yxyiXLWV8Tff{{P=Aa;$|U2!mdwi^
zzI-`y`SRqKmzPif@!+9jSVY8$6BCtZ8mIG}on^ZET|tY$qM(Y(%EavK?wK<s*_s=#
zTnX`;V*#3zm^W|Uf!;)0!egxlx3}l-e_Oge|Gr;AfkD;Yd1}71E^JQs|Muo)aGFoO
zsDOZjj}K3PQDODxv*w=u{>N|LjJ*9k3^eDFb;aX0DBuqsJjlQ!Z@0(kysn+yJiobC
z4`01Hb!~05W!0AzVde!792oB0z3bh(*94`h>lhOwBPb{+!Nay^@7|MV&p!Qrzdrr;
zw%l6*D`q3LeYwusDZcYty7%$6qNk^VPS^kWcw8veYHpuvx7gHO^%)r&3l}c@ZekV;
zYP;y@>PlJ`rIdoo=BdlV)<)fWU$yt&He+#(r4btv96K*gZ)s@}S}atbK4p=TAuAi(
zr$3+18y7rqD6x{|<KsK}>T8v)qoZTZhlA{(R?E>>IyyQ#YJL{&{5Wg&>|_(Et#9w#
ziFrH!|BvJLH+Glnmz9;BnPKR>dbjDnPxGGpT3T9yR&3u)pPy-4=cT#t@3&|fS=rOJ
z|H9yn;6;yCm8=K}xdWOH`~80Z`Skg<YR*fXI@<ddFIJY6l#JrPadzd3KY#v2?5Qwh
zWo2cUV^cY4b@+NF^ER*GpdbUOUeH|Y+GulztSc)PzO7&V?(S}3VPWUMK*8td=YvA8
zqOvm7;@Gvd(TlIYo@tsr?b_{SQyyJe8GP&g>uYNdpFFwp$i{Q$&fThmwRsoZn2-nB
zb>QWtq_5w<W{u9;&yUk4UE7f8TvSvPvA3#p=f|k1s7Z6@#%9H4WN3gEmfqc6e*F~4
zjT;ggw?$oQWM*%WzFfODVk1+7vU{J%?+ic9qw@8C9J|GIPxZRrk@dXu&e==z(4j*!
zj8eJQMsHsSPPnJ8u8EvHV}?Xr-Op6eM9lj2`a5^-?2$6<0<GxSnCu=C6C+`q)^ly~
zlb0_~{`mN~u&iuZ>gj1b5(W$n8#Wl+xpPOtsw87$Hb}tS-2B$|{PUCj?M_0+q~B|t
zJ8-~Z-_K{#OiWA-(*OVRadIk}n23ajhZ`9g&9JMj>U<2E;%Q`N-?Aj*-=Cj4QCm2|
z!>=b@j=KqO_cAgvmb|#Y*zoJ?YvZaf88I<2A3lG++%_fu`0?ZIOO~kcusLshc>D5l
z|IJljv-bUX#LdIT%+}nvxB9zL?ky88F0O*&;>EGM%Y0^=@ir7bJ~nak<mSzrjV~|v
zKYsUato8MMwZB1)m1obcnq?ksem}2Xs!-nG>v}Ky|9^@>wFeg$*DC8NT$KX*E2<}W
zR?n2n-2Cb9*Vm_Cg-tY@ZIIY>YfokI%2M<FTBfGLYCZwc+j3UEGkDik6RP#jtE$Q>
zc$rV)%S%f?)$2HMd}OG8y>>fjtgE!W(0}KS9T!%IuYbk&xKVB9!CDv1qo9>6GYpfx
zYL+Uu2rSCEckf<Bb#-{TL^){C`p56z)1$(#WpRJs4QcoC+j!o2r+mNex9;(N`Rj23
zqM(NAj;Shn-#bBzu@eumgsqQ@ePaw+fIa((?DpSBmUvE<Fv$p5s$ZuA>dIeMwaxq9
zcy_kAu)O^Fhlkt2t=5VK61`oe?3W{V*|zqazP@o?`oR-<x*zlJ^B5b?R1TgY=XqyU
zq@N#OVq&6B<R%tx@6)gN+&L8GB-|D!y)plN1F~L?VaJ5Y=ckyqN7h?3s~*0iS}CmN
z-sZLX=9bLMFYfKNuKo38<+}jTn!`=+5~?;XdAsa2tLvXRHI482PFBr2^u?n;+Vs}W
zVs+4ZAIqX8yPO4`I@r%$daJ2AY1`~|;vye^p4a?hej}tX#OaUirN?2upe-V%St9cF
ze+>I=zeSX`E3azsTb_JriRZ0v?ecXedL)gz-X<GJaBy*Hg>5T-?gttTIGAwY=FLd!
zw40k!XUv=_8m)fe=+V|ISFYSXwz2B#E3cVfj1?6d>;C?l8g+epe*C6Qn;2FEFF$9y
zf1AM4lRIku=3nJ5w4Zcj?W{fj|NY*Qb=50+Mug3etgEYz&N9v3QSi`d<=5kVvWr)&
z=$K#sFEZLhT0upn<-`dO(30JvCmq|=lp5Bq)qTJB`#ssCfsviJ->!+@KkwJq*WoWs
zTQ{sr|G52C>kfI(M`65uGM0xHxpx2fzW;x(pSu1rJDoFU&vJ5eKYrYAueY`C>8Yta
z@^*9X?X4D;mR>#e|Ig?1Bch|37hDHT9Ig$!zCPZ*PtF#!s@H3dzMfv6UhFQ=2-3Xz
zf0ac=ML(WQ_CImv%#q{A+2!Tu3mpgTqtFaqmS7<9;lqc7|Ns8}`1$kVuF~vjI+09w
zA|5KkS3q~`hSfejB{~mz&2*r3ekSK@&|U%P((1Y8_g0=r7Fl%2tt`9KmA7_fshH`P
zo>%uabtwxydcS(wk+rVfVmo&2%Br_K(94-oTU%RLP;lV-_37Tz^%%|nO`2hl$n@>o
zw;eloZmj?R?-j?-d74^UNmo_`o|$JW9UXo96`y;^#_4fYolj3sFaG)IsZgn*p&@7}
zPe$g<sj1pqKZVEFin_bIv++nASU$hbD=%+dSbAF8G3oq02j^H8cg=l`b5O40&Bo&$
z{r&28cKfbK$k+cV6cQGGd2g>ZsGGGqe0|#Ub91ATwx*qx0>x*=2ZiWuIf~%s5)YOJ
zR;pe1jvW@0CQk+p$#r&izPY`<Klk>w&_9V>fqehJoqh4{!Uh-QwScpfgde>NjqZ=G
zsj&g6zq?z!p`(MN>gy}lq9UWcRbQ2ql$aV2dod<XoY*66zHXP(K6Npjhy%*~HcGE|
z+Aeu@ZSCm}L1j=RcTIhNclY!*Ug=Li9`}RBr}<<oK$CHx#ROK>-`_1Qe}8WcsNW!d
z!nutn@%g#AOO`CjNVP71r=qFZxh{5hl7U1=SJ$I2FE5LBeSCJSbKSaiC9ke<dV70=
z_P(x-+Ir&DsifcE-m<W>FTVad^U@Md+v;yA=jYinR8&@8TotPQrONi;!Gk&X_w7CN
z`pD6vg5u)t?R>H?ZfsOO-Y>6iX}QzMkSkCSvh-0sX@d4Tv#%Ku%hlGIeeIBVq7~f8
zF}d`9O{kQ#w6~KJQ^Di8a{2lB&(6*D&dJeHcI$Ciei<~Vv1!w$6DLkwXzTOw^z!1`
zoPPe<_4xXuXMEgN%7+Q7`z=X6-Un(}MBO|!Rok-ekA-g37KS_?%evohw?{SI-&L9o
zYP!C-xH$953c;B(XWrWQ?AbG)Stgx3i=StudTG|w)^>lr6ME&W>h&{c&UESSPfJU4
zOH6#2+yD3Vh5v8kSOdHaQ%{K;>ya!zZpalFn3iVsOFHMt>%fl(R!n>2xcu_P-R1dr
z?%oA$F?n!jXK|F#HMHLDMo4crwDv;%<e;t%8w@~W@-i|qtE^=Ld!<Y=92^`#i=tT%
zWSDHqx~c^lU(U+XTDfxNkz>cQJg@WH|7i%~YIyP_<x5p<r}*FaiifPA#pgdiKW7Bh
zbk=2W9$Z-&%mCVrHd)=DDPVV5?xxL~-&_5|){tRDYxQh5I{*0j^W)<2HI9CMd<Bos
z{{oFNT`%NRJaTwJRMnkr`S<&JdwD@C<v@$CEG<P>Uw>Ir{^wF%O3IRHEc0xu!(Q**
z9iMY!L*wP;{*T|itGj>nTlv!`PizXlhe&pHb!~ZmWoL1^-One&LBYX4pG@}Ouw~1W
zCnqOI6{VU;dHVP~xt+g1_2MGe9$D*YQCqVXUVmNrxYzv0&*$?&k(rs9dCPKs-LFhB
zy_gf{=2~ao+M)@H^!WXCA|fIT0#Z_5*Vn~<{Q7li`uTaFEzYWTQ%;;cn|i!Y_R8w;
z^+$f@UyF>4OiWE}-MiP;YHr_z2?Blc_I^o8O1ZbTC|X#|`0}NMhmAQbEKEg3Ma263
z+2;Y9(|q3^W?|sw=Dt_;S~n+WN&1uZPtMN(FWg*qDb1?%bN$oRCwnA~R|GHTn_v4)
zl9Q7&B075V#EFgDx0|c`%}F>j!?2>dT3AMgr(yBp#oOW|ZES3|ls|vRUjOGKJ81sx
z=+Q(2iB;Bfs`V5V8T0eszv6QanXm=EHgvhw?Kgg(rfJD*?XCK{%FL(u>#NX5j~*2i
z7N&pND9XSfbP{wfz`^_X=dW0y!LVr2q8In~+ZQ}`eYt+$FRrB`*5&V(#O<w8`(AhZ
z$H&K#rdd~*XKn!vZ7q5g@-yJ&&f@2wT?q`Wt*tzgMlE)=zaG4NxpMFK%X=UzmF~<a
zto{4z>y6vDgLjwZ?)&rU^eVoe`H`E`c-#NSSAUlixmsyg@Mm{XV1S~bfR;j_a<6C^
ztIH9;3kuT&J~(xl7!(#cdf!SjdCMX>`-(&F_K4`2^F2#?e0|usO=ir`;O$yumLwfm
zxU=P!1d~=mvP<JJ*6FREe>clVhv_jomQA;rKl9JV=Wq5sPn`ee$C>JLzwMSTU9x=n
zY{&>)#7@Qeg|lrcjgB5YDk3ia`RDWbpKoQaXZ&!0nIE*V@@TiXWz`pr$H#h^Z_O{S
z{{43Q$`vaXY}s;U$NSl)J1Re`X@{*zxV|p7N6Pfoy5oT>Lv&(yUHNe<!QjEGSGSU@
z7xJxKxiaVGrlqGp-ntb9sv^JKTb7}4>C&ZNTdnu|Emzjk>RK7R{L$mbhhJS?&2Fm(
zT3vnX<UEn@7gmL??&#}NQ&CBAc-$jrJ8RP>qgk_V{eC5DS(M_yU$A@k?#XlK+Af%}
z&%2)6ykW@_6}^}p2Y!BjZdv+DWarME-!(uhD-KzCJ}m18O`=bqe*9RkG<)iv6DK_Q
z<ZL*$y#IN1V+8Z9-F`0a?u##GaPaY&Wm?6>#GH9^bMwb<-=5tqzt6g0)hf^ogx>WX
zrLV;}IXPKaSs%WCpP#dPOZE4>q@<*dj*bJjZ_j@6B;|4Xr%#_i`}=oo30<{z?b~f@
zFD-O#@96C{-S=+B$KsL_lYQ6k-J93Yz_4odYW81SLFwmkmq^}s&@l~$h7;f3-v0Q>
z6BCW(V@HopoH^68xY$@#RrSm))6*4ts;aE@_4Qs}UN<%*GM~5mz2;+#rzhv}KH10e
z|9`N9_V91pw(WQ}sC88N`B`8@#EFlOk4J9Jn!54k&al<0+S=WhE(Jk$B6n?Eduf_(
z^sX(TMdju1x2?VO=jUh3+Fx4=@AR4Z&%Jv6dhvq;jE4>#V*9hVR(|VN%{SKZhY#hl
zvaGW{bSRhgjdi?p^z2CS3EYY;5-ehuEfXy&|9-vB$;YRrrImHz*p&>EkkC-js>_s=
zB`0?_H8FjAcbEIXp32WFR<B;Xaid_v(Qfg_4<0l$H#09>xKKn)?AwlAruzE*e}8|M
zp5L9Fm-p<`)6)zst*sB=yx{?jE`SDlzJC2WW%~5ZNk_Sk^+-0ytp{xy-}_H?+qP{k
zE-nQR4lqvBi9B?$nf>OT%FQ45986HKvzs?nJKU)9)05plmhHQL>y}8}yt=+7{q>74
zZ)Ym_8*RJ)=Z-1<Uh6j{zvMA?*Ju$)Qdm+TosxcTj-svY+)I}(Im%aGT;!^vs#^Hy
z2<PN!?t5S6b1&Lk{r%Gk<^CBm_O-vZc)nJamX_vWYu>mv=y+Upt*E&8@#y@$M|TuH
z=HTH;d2?gqvEt6o&Wh^l&#UAAt~&p?^Z%{*nCze}r5Rb7PoErpEu*pW=uMxEoQf?H
z7CS?&I=)odo}Q-rxWE37^15~Fn(gYGoR~B<HCJ5Cn*3N%QSr^q&FmTVX=jceJzCLi
zrmo)p>{;5YSFb>WHpc1a-W2{hE?++<_x3i)+uC#O>-Sx+u)3vHx<%pjov_R|H#XXC
z3!P<MuIK9Ny65-1-D=xC>wn+Af9b*nhp4Ei_18she+N6JPG7XvIoMl!w_jLUwI7FK
zOX-9|xeZa#(I-!zW`1xMJl}je<>7&ZPhY;IJUulvwr}~eWkv7qSei)jt`1*smY2fw
zc6)@gvoj|jU)tGOraN};{%-MamT7iIW~Srfiw_<@K0M1byQ=s2)vHtQ?k@j)OgewT
z#f%xY)n?*)F$}lT&nYP>?b7jU_$Ij<wA^j>?Cd*TOLpy=_4W1j;J7$H2M30_y1F;F
zwyyTS-~D6#YF=MvEBzLMLz`yqeR_4T!4%!~US3`m&t|5dICaYCoQm%ZgG7&YxtdG1
zc0Y~Sv9{sTtTwsxt8M30y5>%(iIn`=8!3Ns=GTeS!tV=*uZuZ&_;B;n)6+MXy^Sid
zl3ly@?)_DpN;XMd{H0r6BE(l_c6VO!hRunUCzdGod2lEmNtkpfw_(-l)tNatOSW#E
z+SJ7K?EL)u&s&+NElD<gwEBOvSEeuX_5D|aqb?uFx6^z2{N?i{6`O^+m+cmQ^!C!y
zisB8M6_4C*ZJjkQe=4_P%Z8dwtG3L)bxY)%`1McAo4nSUnFN+U;h63({^*Ixw4a}!
zZ!UiB=hiQm`_AC^TK(06;=k|KZf7$zaq9`#nEaB5Z_1TTuTy^iU;KEkY=oRqXRB1X
zThEKi(2W949H-S=qwPz!yI;FLeczF?IqVNlX72guwdLx+!#Ad1?FiZt*)FIYqVw(Z
zgc?r8BL*rU#pc>i#kR{eOmdT6J#Xrys~w<D^V7-ccYY;Wyq{=#?`ERI^Hu>Tj>7km
zz7NY@Tu_v&d?L8aXXc@0zO&B=9WFrV+1zz#?bfYRL0ykKcVbSRK7I1Ui3NN1)SP%M
zac*<aCMnQT#|<@MfjdOS#Eeo;iKzL{>)E^4wy?19$&)8Ml`@Ab!&W7mK3cZ?$_m>#
zg_6nrw+_$r^<Q~&V@capy<MQ0sW#!&5px29gOe}yf;I_WUhdCdDbu}d_h-+Sp?-0D
z{DM!{&o7nMgIksp7P#XpXgC?Pi=IJTFGj(@;KRJw)vni_{%+r%nVEa|TpG)^WVfIv
z8SWSU-`VUPwIzSwPqy&z@ZxsGUml-QYgN*(xfi9?o|(CM)v8r?`>$(pne+4WuZ!84
zR1+H;+uPp$a!o)|l2T`9r?c#=gpBILU5%4#AW3abq<r?tnV~k1&5ED-=*8{vNKI9(
z+B@&e86Pp-sFZgbK*J4+ZBs(e?6`UB)~9RH`5!@@kXyIBoSc~K?ChTLd4p!Pk~|_^
zAF>I6cd;$md061?gy<{RToEo?qnfVe8YoiC4w_u!P~0~=)aEf{iQkd};lCf&*Z<|V
zx3_;>@8{>YWa-k4Sy#1|`OWQeTm0~(`urvP_T9U)u<QTz;B!`g9`e^aEDb9B`>WL0
zP1EYNE@;>;`PG$`KYsj}FlSEAGY{AHc}2^vW^wZI9Xo%%|G)tUE^h9#va4RCyuP;f
z$B!QmUcdIPsj)eG_H0O4m{ImM9Z(v%>~G&2x8A&bMy#gO9oFW93wG?>sTmp?dg|ff
z_NmjRiMa-fs9IV^diwd96_+fso?E<3qpRrWr&LuNo0yNEJ_UhB%8cFBcU`tjKi(%B
zeCW`jDbJpzxdw_neZ77^RLi%?^Z&Sjn#Cfnfm52<`L&9lpEJ!bIJ9EduBe$7g^Lzl
zehHGfaU<gC-|zRslaiCC_Vn<8%s46@A9AnqxomOUP2HJ=!sVcS@9pw+Guq{<KwF`6
za&KSX$->Sa92=Ya?1f9Y)pMQ)*Z2Q3oo$})wscZi+O$>0=eFhFKlkeDYDvo?6$XC0
z9}8wK7QXv(j`374*W_en23}s?9lN8;%jZ5kmSAw<%9WOb2@cCI8`k})=##ZRHc8c+
zq2k@n=Qp<J&!4&YjTe`BVYrd8ac6)3`=UR4zu$Y@sXouZ+2zshl5?QZB{@0J8s58S
z0z2=OoVzI=Uvu!}Wc8V**<!`d&pkae(>SfquVi`gIV(BUnFfhWo}QjLcXnJnbFq_8
zPEJlmRrTT1r$-^<m1)O%B=3~puRYeu;qULCm6dfyCNn%}M`zvT*I(b<-#`DxjflyQ
zUteE;{Mxl?>GLX&`AqDbS8{Hq-}2<cZM<t@cVD}c*co@{r?s&J&z&8G&7dii`~Uxa
z|MO+Jz3W6@Mx9f0EpP9uwFa#!IW<)~=kBhlsi&vSv~;@o@Y&hfPo6vph>Y~i&DGtW
zclXhSh0f3TR{MjJ>a^Ljy&W7F^78VICGReO|7?BzU+wB|Zze|Pi>)X=r=_K}V$GVA
zUteDO%(0mG>QxpK6BB6gC-<F!t9tprb2hfNot>SIOM?VOM2>U_DyIZ?w(c}em#_K2
zXlQ82aOBvrQ;%4<i@v-FRP&t$TJus?R(9mb5zxAfSFc|iO7J**e`~7SD3Z76zLAlU
zPu^rXtCEhhv&}cx|F4sgl|3sl^M&X0IV-Pbt&83578)uV8X8(rRTUHxa)iI`1GBNa
zrq%Ax<zlk3XFonZ-uUhIvZ}pu;^N}Pt?Ii9)6d<xd-vz<`~TPuY)n4>;oCQ}@Qg=%
zadB~=^(Ze&tVCQD?d|7hUS1|BCFNCCW_BU2LO*WLjWZL=RHZ+3)$J^OEe2Bj^=i0<
zwKcOr>M4=D`}@u^^V=|N(GR$>F}dCHu*lq!>6_1~`OP_Tb94GqjtvTTpZcu7K6%1~
zhOpH~@7<fH8@<gS|DKJpyOveS^vz98OrYUy9yaF1ZoQA@mfuS})+3qL7gFNk>8Yr#
z%^e*b{pauBn_IKRO-)UY96eh2_!#e>zkfab{mu0_Hi-m5&MJ%G+zH)Ssk)><7*uI&
z5Q0l-f;W9Du4xg09Muu##AUA7aul_*aH@%w`<LmtBb~zgiznaOw`cj`*X#G6ySUi>
zx2SXZx3{;`-^5IuHLJ^O>7&#7`%UKgdwV-C4J!QhCNjAvYpayKz5SIeQ%g%rhu>m~
z#m~+d=1FL^Nc;o2C1TFS#qPmjVQ#^}!VK<xG8;Q&&s<*@d-~7M&+Inmo}Hcj^QnG)
zX3g5L)hExKG1>R;#p3=CA3qxYJoEAK@rzkoQ_|Bn7d`c20PW)D=2q6$&OTtssko*Y
z<d<#{Ny(E3o7o%JSzo%pqp<nKiwqMf-h&4ZPMI==As{^5y`sXx&dyFJe&3xX)6=V}
ztQ;L3fBgLU@lm(_rCYadJ>D7}9SvGQ`0?vk*R^4rOJ9dEfCe2mZQg8I{Y|H?uI`xR
zhCZ`=QBhGB7Z-!%V?0hyPM~qq=^OX|c*O12CzHA9VPQ$hmO`<$ixx4Zr$3i7cgf4i
zdGhb~d-I=f=Gj(P{WoeAINSzttGKv(TAJF~vuCGFnPQNAP3Q5>DN{uL{Qaw`shN1V
zjo0S)o6QV=-j(lve9>M0>F4wI&rhn)e*$Wc|M@)szlyeYckpt*g;%pSZru2C>y3+9
zTd!QX5)c;VmYJ!kzvn~K$)5)fIBbd1jaoacql078q)7}OK0Yb8x8=^UF7MNg-ZtaR
zs#1&6S0apzjB3Z{%$WlkxJ*q=O-W1p^yRXD`Olb)42^yJ_JOu>mA<|v+5h|3ucBvX
zX1@Pr)hckfW$jL80jG|4875MzR;@a5;)H^c(WNIbj*g7g-`}w&+}l%m<?7XoH*TC*
zKELkNart^4hmepdg=t#$_VbIMp94*PGR&GatK!2!_Ub*~o}8Tg^VRD05gU_O0|NtF
z+S|*2?_0A*r>m>$#Oc$IZ|CoS`o8|Z^~Q}GkA2>rfB)U5nXALsKYI1**3%02g;hU3
zB!2q*x$^(t@4n~c?dw2?U1(`(ad2}#-gsONwDbYA#63U%eEq-A^2hpQZ$G}T(jp-T
zPKRq;yTw2g(w)NUD^{=G{kl-Z_2JIv^FT{QYpeeM{eIuSu+XrbUq0=4pX{Hf`t=X@
ze!pj2=^q&>xpL)7(1xEIH*Q?GcI{a0_q*+3tKXL9u(PvENlERJiQkaGxH@dDl7WGM
zy83cC`~M#f^FMy}%&oH0($Ue;p}Nr`>&gndzqw8vuhq7DwF-!7D=IclnIdu}YwO96
z6D9~eUVBbh-4C=$X!2y?;%8?bp0EFB%&>Fk&O3W5g+cRJn>KIWS@u@y!oQw7OTDL8
z?Roe0_4Ul`?8hsY&r7<yt5if(bmpA(>({H>+0Em(|8v2j`q`5wpt+PEFBbPVd`sH6
zW5<ruH|&31?Dq+qP*zY<0$RWNWb5^~*I%tc=1!eD)#l3uXNTX#wR(DbH*Vhi`pZ)B
z$Oi@EJSUFsr!QY-#yH-L%*xW@<mAjSk&=;>{rRAozu{Z@{YQ@<fBg9I;c@$ahH-Im
z3_i2XdP`ql6O@zli;k8qD=Pyns6F=Rxu2=2>7EaVxVLQG%2+aU<>`|rA6{DO{pU@(
zy|LWotgWD#|M)+T#MeY`@0&JF?9rn~pxt99)1RN2d02b>o~A__$6R@3A3b_h^+(Ui
z*?Hr}jSH79ZS|dP_VC#=wk`Ladb_$_eJpnBc<8#cB(6-gMc~oA>Yq=iNA4(ixFAPY
zNLcvrGT+%h9`e^)a7^S>)R}unOVp`D@!PA_>la;q$!60Rw_e@KDyqv&ai4q2G4P2a
xhrCYoKz683op1;$;_(pNkW0i<z8?9{*qp7ev-Q5CBm)BjgQu&X%Q~loCIDfAeCYrH

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/performance.md b/tensorflow/contrib/lite/g3doc/performance.md
index 6b7943caf8..ed11452716 100644
--- a/tensorflow/contrib/lite/g3doc/performance.md
+++ b/tensorflow/contrib/lite/g3doc/performance.md
@@ -3,8 +3,15 @@
 
 Mobile and embedded devices have limited computational resources and it is important to keep your application resource efficient. We have compiled a list of best practices and strategies you can use to optimize your model and application when using Tensorflow Lite.
 
-## Choose the most efficient model for the problem
-Some models may be too large to run on embedded devices. Instead of large models it is better to use a slightly less precise but smaller model for embedded devices. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices.
+## Choose the best model for the task
+Depending on the task you will need to make a tradeoff between model complexity and size. If your task requires high accuracy then you may need a large and complex model. Some tasks may work with a less precise model, for these tasks it is better to use a smaller but less precise model. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. For example, graphs below show accuracy and latency tradeoff for some common image classification models.
+
+![accuracy vs model size](images/performance/model_size_vs_accuracy.png "Accuracy vs Model size")
+
+
+![latency vs model size](images/performance/model_size_vs_latency.png "Latency vs Model size")
+
+One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices.
 
 You can retrain the listed models on your own dataset by using transfer learning. Check out our transfer learning tutorial for
 [image classification](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and
@@ -12,25 +19,25 @@ You can retrain the listed models on your own dataset by using transfer learning
 
 
 ## Profile your model
-Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
+Once you have selected a candidate model that is right for your task, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
 
 ## Profile and optimize operators in the graph
 If a particular operator appears frequently in the model and based on profiling you find the operator consuming the most amount of time, you can look into optimizing the operator.
  This scenario should be rare as Tensorflow Lite has optimized versions for most ops. However you may be able to write a faster version of a custom op, if you know the constraints in which the operator is executed. Check out our [custom operator documentation](custom_operators.md).
 
 ## Quantize your model
-If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. Fully quantized models can be remarkably power efficient as well.
+If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. 
 
 ## Tweak the number of threads
-Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads.
+Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads. Multi-threaded execution however comes at the cost of increased performance variability depending on what else is been executed concurrently. This is particularly the case for mobile apps. For example, isolated tests may show 2x speed up vs single-threaded but if another app is executing at the same time may result in worst performance than single-threaded.
 
 ## Eliminate redundant copies
-Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to [mmap a model file](https://github.com/tensorflow/tensorflow/blob/9982fd6c8831cbd2f58954f79ea71f26660393bc/tensorflow/contrib/lite/model.h#L152) and avoid copies. If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151).
+If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151).
 
 ## Profile your application with platform specific tools
 Platform specific tools like [Android profiler](https://developer.android.com/studio/profile/android-profiler) and [Instruments](https://help.apple.com/instruments/mac/current/) provide a wealth of profiling information that can be used to debug your app. Sometimes the performance bug may be not in the model but in parts of application code that interact with the model. Make sure to familiarize yourself with platform specific profiling tools and best practices for your platform.
 
-## Use hardware accelerators available on the device
+## Evaluate whether your model benefits from using hardware accelerators available on the device
 Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [Neural Networks API](https://developer.android.com/ndk/guides/neuralnetworks/) on Android.
 You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable Neural Networks API call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance.
 
-- 
GitLab


From 396a8a4105edd409d0821c4d5d0b920b315ffb72 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Mon, 8 Oct 2018 14:26:43 -0700
Subject: [PATCH 1263/1357] Add custom call with layout constraints. Add a
 variant of CustomCall which specifies arbitrary layout constraints on the
 operands and result. The existing non-layout-constrained CustomCall is
 changed to have no layout preference and can now be assigned arbitrary
 layouts by layout assignment.

PiperOrigin-RevId: 216249615
---
 .../compiler/tf2xla/kernels/index_ops_cpu.cc  |  22 +-
 tensorflow/compiler/xla/client/xla_builder.cc |  43 +++-
 tensorflow/compiler/xla/client/xla_builder.h  |  22 +-
 tensorflow/compiler/xla/layout_util.cc        |   6 +
 tensorflow/compiler/xla/layout_util.h         |   4 +
 .../xla/service/gpu/gpu_layout_assignment.cc  |  10 -
 .../xla/service/gpu/gpu_layout_assignment.h   |   2 -
 tensorflow/compiler/xla/service/hlo.proto     |   9 +-
 .../compiler/xla/service/hlo_instruction.cc   |  28 ++-
 .../compiler/xla/service/hlo_instruction.h    |  10 +
 .../compiler/xla/service/hlo_instructions.cc  |  33 ++-
 .../compiler/xla/service/hlo_instructions.h   |  32 ++-
 tensorflow/compiler/xla/service/hlo_parser.cc | 101 ++++++++--
 .../compiler/xla/service/hlo_parser_test.cc   |  67 ++++++
 .../compiler/xla/service/hlo_verifier.cc      |  22 +-
 .../compiler/xla/service/layout_assignment.cc | 108 +++++-----
 .../compiler/xla/service/layout_assignment.h  |  13 --
 .../xla/service/layout_assignment_test.cc     | 190 ++++++++++++++++++
 tensorflow/compiler/xla/shape_util.cc         |   2 +-
 .../compiler/xla/tests/custom_call_test.cc    |  50 ++++-
 20 files changed, 650 insertions(+), 124 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
index 3d81ae9eb8..f210bfbd88 100644
--- a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
+++ b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
@@ -88,20 +88,30 @@ class ArgMaxCustomCallOp : public XlaOpKernel {
           xla::ConstantLiteral(&b, xla::LiteralUtil::CreateR0<int32>(dim)));
     }
 
-    xla::Shape xla_shape =
-        xla::ShapeUtil::MakeShape(xla::S64, output_shape.dim_sizes());
+    // The argmax function expects row-major layout.
+    xla::Shape xla_shape = xla::ShapeUtil::MakeShapeWithDescendingLayout(
+        xla::S64, output_shape.dim_sizes());
+    std::vector<xla::Shape> arg_shapes;
+    for (const xla::XlaOp& arg : args) {
+      auto shape_status = b.GetShape(arg);
+      OP_REQUIRES_OK(ctx, shape_status.status());
+      xla::Shape arg_shape = shape_status.ConsumeValueOrDie();
+      *arg_shape.mutable_layout() = xla::LayoutUtil::MakeDescendingLayout(
+          xla::ShapeUtil::Rank(arg_shape));
+      arg_shapes.push_back(std::move(arg_shape));
+    }
 
     // Tell XLA to call the custom code, defined in
     // index_ops_kernel_argmax_float_1d.cc.
     xla::XlaOp output;
     switch (input_shape.dims()) {
       case 1:
-        output =
-            xla::CustomCall(&b, "argmax_float_1d_xla_impl", args, xla_shape);
+        output = xla::CustomCallWithLayout(&b, "argmax_float_1d_xla_impl", args,
+                                           xla_shape, arg_shapes);
         break;
       case 2:
-        output =
-            xla::CustomCall(&b, "argmax_float_2d_xla_impl", args, xla_shape);
+        output = xla::CustomCallWithLayout(&b, "argmax_float_2d_xla_impl", args,
+                                           xla_shape, arg_shapes);
         break;
       default:
         OP_REQUIRES(ctx, false,
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 6b31831010..e7cf9ae363 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -1279,9 +1279,10 @@ XlaOp XlaBuilder::AfterAll(absl::Span<const XlaOp> tokens) {
   });
 }
 
-XlaOp XlaBuilder::CustomCall(const string& call_target_name,
-                             absl::Span<const XlaOp> operands,
-                             const Shape& shape, const string& opaque) {
+XlaOp XlaBuilder::CustomCall(
+    const string& call_target_name, absl::Span<const XlaOp> operands,
+    const Shape& shape, const string& opaque,
+    absl::optional<absl::Span<const Shape>> operand_shapes_with_layout) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
     if (absl::StartsWith(call_target_name, "$")) {
@@ -1293,6 +1294,31 @@ XlaOp XlaBuilder::CustomCall(const string& call_target_name,
     *instr.mutable_shape() = shape;
     instr.set_custom_call_target(call_target_name);
     instr.set_custom_call_opaque(opaque);
+    if (operand_shapes_with_layout.has_value()) {
+      if (!LayoutUtil::HasLayout(shape)) {
+        return InvalidArgument(
+            "Result shape must have layout for custom call with constrained "
+            "layout.");
+      }
+      if (operands.size() != operand_shapes_with_layout->size()) {
+        return InvalidArgument(
+            "Must specify a shape with layout for each operand for custom call "
+            "with constrained layout; given %d shapes, expected %d",
+            operand_shapes_with_layout->size(), operands.size());
+      }
+      instr.set_constrain_layout(true);
+      int64 operand_num = 0;
+      for (const Shape& operand_shape : *operand_shapes_with_layout) {
+        if (!LayoutUtil::HasLayout(operand_shape)) {
+          return InvalidArgument(
+              "No layout specified for operand %d for custom call with "
+              "constrained layout.",
+              operand_num);
+        }
+        *instr.add_operand_shapes_with_layout() = operand_shape;
+        ++operand_num;
+      }
+    }
     return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands);
   });
 }
@@ -2690,7 +2716,16 @@ XlaOp Call(XlaBuilder* builder, const XlaComputation& computation,
 XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
                  absl::Span<const XlaOp> operands, const Shape& shape,
                  const string& opaque) {
-  return builder->CustomCall(call_target_name, operands, shape, opaque);
+  return builder->CustomCall(call_target_name, operands, shape, opaque,
+                             /*operand_shapes_with_layout=*/absl::nullopt);
+}
+
+XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name,
+                           absl::Span<const XlaOp> operands, const Shape& shape,
+                           absl::Span<const Shape> operand_shapes_with_layout,
+                           const string& opaque) {
+  return builder->CustomCall(call_target_name, operands, shape, opaque,
+                             operand_shapes_with_layout);
 }
 
 XlaOp Complex(const XlaOp& real, const XlaOp& imag,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 2e14e47a35..9ceede7a79 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -577,9 +577,10 @@ class XlaBuilder {
              absl::Span<const XlaOp> operands);
 
   // Enqueues a custom call instruction onto the computation.
-  XlaOp CustomCall(const string& call_target_name,
-                   absl::Span<const XlaOp> operands, const Shape& shape,
-                   const string& opaque);
+  XlaOp CustomCall(
+      const string& call_target_name, absl::Span<const XlaOp> operands,
+      const Shape& shape_with_layout, const string& opaque,
+      absl::optional<absl::Span<const Shape>> operand_shapes_with_layout);
 
   // The following methods enqueue element-wise binary arithmetic operations
   // onto the computation. The shapes of the operands have to match unless one
@@ -1197,6 +1198,10 @@ class XlaBuilder {
   friend XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
                           absl::Span<const XlaOp> operands, const Shape& shape,
                           const string& opaque);
+  friend XlaOp CustomCallWithLayout(
+      XlaBuilder* builder, const string& call_target_name,
+      absl::Span<const XlaOp> operands, const Shape& shape_with_layout,
+      absl::Span<const Shape> operand_shapes_with_layout, const string& opaque);
   friend XlaOp Complex(const XlaOp& real, const XlaOp& imag,
                        absl::Span<const int64> broadcast_dimensions);
   friend XlaOp Conj(const XlaOp& operand);
@@ -1732,6 +1737,17 @@ XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
                  absl::Span<const XlaOp> operands, const Shape& shape,
                  const string& opaque = "");
 
+// Overload which constructs a custom call with fixed layouts. The operands will
+// have the layouts specified by |operand_shapes_with_layout| when provided to
+// external code, and the external code is expected to produce a result with the
+// layout specified by |shape_with_layout|. All shapes in |shape_with_layout|
+// and |operand_shapes_with_layout| must have layouts.
+XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name,
+                           absl::Span<const XlaOp> operands,
+                           const Shape& shape_with_layout,
+                           absl::Span<const Shape> operand_shapes_with_layout,
+                           const string& opaque = "");
+
 // The following methods enqueue element-wise binary arithmetic operations
 // onto the computation. The shapes of the operands have to match unless one
 // of the operands is a scalar, or an explicit broadcast dimension is given
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index d310335618..3c8db9aa45 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -65,6 +65,12 @@ void SetDefaultLayoutToContainer(
   return layout;
 }
 
+/* static */ Layout LayoutUtil::MakeDescendingLayout(int64 rank) {
+  std::vector<int64> layout(rank);
+  std::iota(layout.rbegin(), layout.rend(), static_cast<int64>(0));
+  return MakeLayout(layout);
+}
+
 /* static */ Layout LayoutUtil::MakeLayoutFromMajorToMinor(
     absl::Span<const int64> major_to_minor) {
   Layout layout;
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index b78883c2d8..af032b1cae 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -40,6 +40,10 @@ class LayoutUtil {
   static Layout MakeLayoutFromMajorToMinor(
       absl::Span<const int64> major_to_minor);
 
+  // Returns a layout with descending ((i.e. {n, n-1, ..., 0}) minor-to-major
+  // dimensions.
+  static Layout MakeDescendingLayout(int64 rank);
+
   // Creates a sparse layout with the given maximum number of elements. (This is
   // a convenience function for protobuf construction.)
   static Layout MakeSparseLayout(int64 max_sparse_elements);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 1ffe855750..8c9a8adc61 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -213,16 +213,6 @@ Status GpuLayoutAssignment::AddBackendConstraints(
   return Status::OK();
 }
 
-bool GpuLayoutAssignment::CustomCallRequiresMajorFirstLayout(
-    const HloInstruction* instruction) {
-  // - Inputs to cudnn batchnorm custom calls don't need the major-first layout
-  //   (i.e. {n, n-1, ...0}) -- we can handle any layout.
-  // - Inputs to cudnn convolution require custom layouts handled in
-  //   AddBackendConstraints.
-  return !IsCustomCallToDnnBatchNorm(*instruction) &&
-         !IsCustomCallToDnnConvolution(*instruction);
-}
-
 Status GpuLayoutAssignment::PropagateOperandConstraint(
     const OperandLayoutConstraint& layout_constraint,
     LayoutConstraints* constraints) {
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index 4ba7989e9c..6a48e55fd2 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -46,8 +46,6 @@ class GpuLayoutAssignment : public LayoutAssignment {
   Status PropagateBufferConstraint(
       const BufferLayoutConstraint& buffer_constraint,
       LayoutConstraints* constraints) override;
-  bool CustomCallRequiresMajorFirstLayout(
-      const HloInstruction* instruction) override;
 
  private:
   Status AddBackendConstraintsToDnnConvCustomCall(
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 1ea26ddd5b..a0eb9e6ddc 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto";
 option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
-// Next ID: 56
+// Next ID: 58
 message HloInstructionProto {
   reserved 10;
   reserved "parameter_name";
@@ -184,6 +184,13 @@ message HloInstructionProto {
   // Sharding for kDomain instructions.
   xla.OpSharding domain_entry_sharding = 54;
   xla.OpSharding domain_exit_sharding = 55;
+
+  // For custom call this indicates that the layouts are constrained. If
+  // constrain_layout is true then the 'shape' field must contain a layout, and
+  // 'operand_shapes_with_layout' must contain a shape with layout for each
+  // operand.
+  bool constrain_layout = 56;
+  repeated Shape operand_shapes_with_layout = 57;
 }
 
 // Serialization of HloComputation.
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 2f6db7cd7c..5c3908a9a4 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -396,9 +396,22 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           operands(1), operands(2), computations(1));
       break;
     case HloOpcode::kCustomCall:
-      instruction = CreateCustomCall(proto.shape(), all_operands(),
-                                     proto.custom_call_target(),
-                                     proto.custom_call_opaque());
+      if (proto.constrain_layout()) {
+        // A proto RepeatedPtrField cannot be converted to a Span (it is a
+        // vector of pointers essentially) so create a vector of shapes to pass
+        // in.
+        std::vector<Shape> operand_shapes;
+        for (const Shape& shape : proto.operand_shapes_with_layout()) {
+          operand_shapes.push_back(shape);
+        }
+        instruction = CreateCustomCall(
+            proto.shape(), all_operands(), proto.custom_call_target(),
+            operand_shapes, proto.custom_call_opaque());
+      } else {
+        instruction = CreateCustomCall(proto.shape(), all_operands(),
+                                       proto.custom_call_target(),
+                                       proto.custom_call_opaque());
+      }
       if (proto.has_window()) {
         static_cast<HloCustomCallInstruction*>(instruction.get())
             ->set_window(proto.window());
@@ -1142,6 +1155,15 @@ bool HloInstruction::HasSideEffect() const {
       shape, operands, custom_call_target, opaque);
 }
 
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateCustomCall(
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
+    absl::string_view custom_call_target,
+    absl::Span<const Shape> operand_shapes_with_layout,
+    absl::string_view opaque) {
+  return absl::make_unique<HloCustomCallInstruction>(
+      shape, operands, custom_call_target, opaque, operand_shapes_with_layout);
+}
+
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateTuple(
     absl::Span<HloInstruction* const> elements) {
   std::vector<Shape> element_shapes;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 374862c4b6..44f776ebac 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -734,6 +734,16 @@ class HloInstruction {
       const Shape& shape, absl::Span<HloInstruction* const> operands,
       absl::string_view custom_call_target, absl::string_view opaque = "");
 
+  // Overload which constrains the layouts of the operand and result. 'shape'
+  // and 'operand_shapes_with_layout' must have layouts.
+  // 'operand_shapes_with_layout' must have a compatible element for each
+  // operand.
+  static std::unique_ptr<HloInstruction> CreateCustomCall(
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
+      absl::string_view custom_call_target,
+      absl::Span<const Shape> operand_shapes_with_layout,
+      absl::string_view opaque = "");
+
   // Creates a tuple instruction with the given elements. This is a convenience
   // wrapper around CreateVariadic.
   static std::unique_ptr<HloInstruction> CreateTuple(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 152d8eacdb..2ec233eaec 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1825,7 +1825,24 @@ HloCustomCallInstruction::HloCustomCallInstruction(
     : HloInstruction(HloOpcode::kCustomCall, shape),
       custom_call_target_(custom_call_target.begin(), custom_call_target.end()),
       opaque_(opaque.begin(), opaque.end()),
-      feature_group_count_(1) {
+      feature_group_count_(1),
+      layout_constrained_(false) {
+  for (auto operand : operands) {
+    AppendOperand(operand);
+  }
+}
+
+HloCustomCallInstruction::HloCustomCallInstruction(
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
+    absl::string_view custom_call_target, absl::string_view opaque,
+    absl::Span<const Shape> operand_shapes_with_layout)
+    : HloInstruction(HloOpcode::kCustomCall, shape),
+      custom_call_target_(custom_call_target.begin(), custom_call_target.end()),
+      opaque_(opaque.begin(), opaque.end()),
+      feature_group_count_(1),
+      layout_constrained_(true),
+      operand_shapes_with_layout_(operand_shapes_with_layout.begin(),
+                                  operand_shapes_with_layout.end()) {
   for (auto operand : operands) {
     AppendOperand(operand);
   }
@@ -1843,6 +1860,12 @@ HloInstructionProto HloCustomCallInstruction::ToProto() const {
   proto.set_custom_call_target(custom_call_target_);
   proto.set_custom_call_opaque(opaque_);
   proto.set_feature_group_count(feature_group_count_);
+  if (layout_constrained()) {
+    proto.set_constrain_layout(true);
+    for (const Shape& shape : operand_shapes_with_layout_) {
+      *proto.add_operand_shapes_with_layout() = shape;
+    }
+  }
   return proto;
 }
 
@@ -1870,6 +1893,14 @@ std::vector<string> HloCustomCallInstruction::ExtraAttributesToStringImpl(
   if (!opaque_.empty()) {
     extra.push_back(StrCat("opaque=\"", CEscape(opaque_), "\""));
   }
+  if (layout_constrained()) {
+    std::vector<string> shape_strings;
+    for (const Shape& shape : operand_shapes_with_layout_) {
+      shape_strings.push_back(ShapeUtil::HumanStringWithLayout(shape));
+    }
+    extra.push_back(StrCat("operand_layout_constraints={",
+                           StrJoin(shape_strings, ", "), "}"));
+  }
   return extra;
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index e169604072..4c5fc759a3 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -1053,10 +1053,19 @@ class HloSelectAndScatterInstruction : public HloInstruction {
 
 class HloCustomCallInstruction : public HloInstruction {
  public:
-  explicit HloCustomCallInstruction(const Shape& shape,
-                                    absl::Span<HloInstruction* const> operands,
-                                    absl::string_view custom_call_target,
-                                    absl::string_view opaque);
+  HloCustomCallInstruction(const Shape& shape,
+                           absl::Span<HloInstruction* const> operands,
+                           absl::string_view custom_call_target,
+                           absl::string_view opaque);
+
+  // Constructor for a custom call with constrained layout. 'shape' and
+  // 'operands_with_layout' must all have layouts.
+  HloCustomCallInstruction(const Shape& shape,
+                           absl::Span<HloInstruction* const> operands,
+                           absl::string_view custom_call_target,
+                           absl::string_view opaque,
+                           absl::Span<const Shape> operand_shapes_with_layout);
+
   const Window& window() const override {
     CHECK(window_ != nullptr);
     return *window_;
@@ -1085,6 +1094,16 @@ class HloCustomCallInstruction : public HloInstruction {
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
+  // Returns whether the result and operand layouts are constrained.
+  bool layout_constrained() const { return layout_constrained_; }
+
+  // Returns the shapes (with layout) of the operands. CHECKs if this custom
+  // call does not have constrained layouts.
+  const std::vector<Shape>& operand_shapes_with_layout() const {
+    CHECK(layout_constrained());
+    return operand_shapes_with_layout_;
+  }
+
  private:
   std::vector<string> ExtraAttributesToStringImpl(
       const HloPrintOptions& options) const override;
@@ -1106,6 +1125,11 @@ class HloCustomCallInstruction : public HloInstruction {
   std::unique_ptr<ConvolutionDimensionNumbers> convolution_dimension_numbers_;
   // The number of feature groups. This is used for grouped convolutions.
   int64 feature_group_count_;
+  // Whether the result and operand layouts are constrained.
+  bool layout_constrained_;
+  // For layout-constrained custom calls, this vector holds the shape with
+  // layout for each operand.
+  std::vector<Shape> operand_shapes_with_layout_;
 };
 
 class HloPadInstruction : public HloInstruction {
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index dd62988bcc..96f9ff6654 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -174,6 +174,7 @@ class HloParser {
     kDistribution,
     kDomain,
     kPrecisionList,
+    kShapeList
   };
 
   struct AttrConfig {
@@ -240,6 +241,7 @@ class HloParser {
 
   bool ParseSliceRanges(SliceRanges* result);
   bool ParsePrecisionList(std::vector<PrecisionConfig::Precision>* result);
+  bool ParseShapeList(std::vector<Shape>* result);
   bool ParseInt64List(const TokKind start, const TokKind end,
                       const TokKind delim,
                       std::vector<tensorflow::int64>* result);
@@ -1341,6 +1343,7 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
       optional<Window> window;
       optional<ConvolutionDimensionNumbers> dnums;
       optional<int64> feature_group_count;
+      optional<std::vector<Shape>> operand_layout_constraints;
       attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString,
                                      &custom_call_target};
       attrs["opaque"] = {/*required=*/false, AttrTy::kString, &opaque};
@@ -1349,12 +1352,52 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
                              AttrTy::kConvolutionDimensionNumbers, &dnums};
       attrs["feature_group_count"] = {/*required=*/false, AttrTy::kInt64,
                                       &feature_group_count};
+      attrs["operand_layout_constraints"] = {
+          /*required=*/false, AttrTy::kShapeList, &operand_layout_constraints};
       if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
         return false;
       }
-      instruction = builder->AddInstruction(
-          HloInstruction::CreateCustomCall(shape, operands, *custom_call_target,
-                                           opaque.has_value() ? *opaque : ""));
+      if (operand_layout_constraints.has_value()) {
+        if (!LayoutUtil::HasLayout(shape)) {
+          return Error(lexer_.GetLoc(),
+                       "Layout must be set on layout-constrained custom call");
+        }
+        if (operands.size() != operand_layout_constraints->size()) {
+          return Error(lexer_.GetLoc(),
+                       StrCat("Expected ", operands.size(),
+                              " operand layout constraints, ",
+                              operand_layout_constraints->size(), " given"));
+        }
+        for (int64 i = 0; i < operands.size(); ++i) {
+          const Shape& operand_shape_with_layout =
+              (*operand_layout_constraints)[i];
+          if (!LayoutUtil::HasLayout(operand_shape_with_layout)) {
+            return Error(lexer_.GetLoc(),
+                         StrCat("Operand layout constraint shape ",
+                                ShapeUtil::HumanStringWithLayout(
+                                    operand_shape_with_layout),
+                                " for operand ", i, " does not have a layout"));
+          }
+          if (!ShapeUtil::Compatible(operand_shape_with_layout,
+                                     operands[i]->shape())) {
+            return Error(
+                lexer_.GetLoc(),
+                StrCat(
+                    "Operand layout constraint shape ",
+                    ShapeUtil::HumanStringWithLayout(operand_shape_with_layout),
+                    " for operand ", i,
+                    " is not compatible with operand shape ",
+                    ShapeUtil::HumanStringWithLayout(operands[i]->shape())));
+          }
+        }
+        instruction = builder->AddInstruction(HloInstruction::CreateCustomCall(
+            shape, operands, *custom_call_target, *operand_layout_constraints,
+            opaque.has_value() ? *opaque : ""));
+      } else {
+        instruction = builder->AddInstruction(HloInstruction::CreateCustomCall(
+            shape, operands, *custom_call_target,
+            opaque.has_value() ? *opaque : ""));
+      }
       if (window.has_value()) {
         instruction->set_window(*window);
       }
@@ -2533,6 +2576,15 @@ bool HloParser::ParseAttributeHelper(
             ->emplace(result);
         return true;
       }
+      case AttrTy::kShapeList: {
+        std::vector<Shape> result;
+        if (!ParseShapeList(&result)) {
+          return false;
+        }
+        static_cast<optional<std::vector<Shape>>*>(attr_out_ptr)
+            ->emplace(result);
+        return true;
+      }
     }
   }();
   if (!success) {
@@ -2825,6 +2877,23 @@ bool HloParser::ParsePrecisionList(
                    parse_and_add_item);
 }
 
+// shapelist ::= '{' shapes '}'
+// precision_elements
+//   ::= /*empty*/
+//   ::= shape (',' shape)*
+bool HloParser::ParseShapeList(std::vector<Shape>* result) {
+  auto parse_and_add_item = [&]() {
+    Shape shape;
+    if (!ParseShape(&shape)) {
+      return false;
+    }
+    result->push_back(std::move(shape));
+    return true;
+  };
+  return ParseList(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma,
+                   parse_and_add_item);
+}
+
 // int64list ::= start int64_elements end
 // int64_elements
 //   ::= /*empty*/
@@ -2832,23 +2901,15 @@ bool HloParser::ParsePrecisionList(
 bool HloParser::ParseInt64List(const TokKind start, const TokKind end,
                                const TokKind delim,
                                std::vector<tensorflow::int64>* result) {
-  if (!ParseToken(start, StrCat("expects an int64 list starting with ",
-                                TokKindToString(start)))) {
-    return false;
-  }
-  if (lexer_.GetKind() == end) {
-    // empty
-  } else {
-    do {
-      tensorflow::int64 i;
-      if (!ParseInt64(&i)) {
-        return false;
-      }
-      result->push_back(i);
-    } while (EatIfPresent(delim));
-  }
-  return ParseToken(
-      end, StrCat("expects an int64 list to end with ", TokKindToString(end)));
+  auto parse_and_add_item = [&]() {
+    tensorflow::int64 i;
+    if (!ParseInt64(&i)) {
+      return false;
+    }
+    result->push_back(i);
+    return true;
+  };
+  return ParseList(start, end, delim, parse_and_add_item);
 }
 
 bool HloParser::ParseList(const TokKind start, const TokKind end,
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 255123d331..17538c05bc 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -802,6 +802,43 @@ ENTRY %ConstantUnsignedNoOverflow () -> u64[] {
   ROOT %constant = u64[] constant(9223372036854775807)
 }
 
+)"
+},
+// CustomCallWithLayoutConstraints
+{
+"CustomCallWithLayoutConstraints",
+R"(HloModule CustomCallWithLayoutConstraints
+
+ENTRY %CustomCallWithLayoutConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] {
+  %p0 = f32[42,2,3]{0,1,2} parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[123,4]{1,0}}
+}
+
+)"
+},
+// CustomCallWithLayoutConstraintsNoOperands
+{
+"CustomCallWithLayoutConstraintsNoOperands",
+R"(HloModule CustomCallWithLayoutConstraintsNoOperands
+
+ENTRY %CustomCallWithLayoutConstraints () -> f32[1,2,3] {
+  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(), custom_call_target="baz", operand_layout_constraints={}
+}
+
+)"
+},
+// CustomCallWithLayoutConstraintsTupleShapes
+{
+"CustomCallWithLayoutConstraintsTupleShapes",
+R"(HloModule CustomCallWithLayoutConstraintsTupleShapes
+
+ENTRY %CustomCallWithLayoutConstraints (p0: (f32[2,2], f32[42,2,3]), p1: f32[123,4]) -> (f32[1,2,3], f32[1,2,3]) {
+  %p0 = (f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = (f32[1,2,3]{0,2,1}, f32[1,2,3]{1,2,0}) custom-call((f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={(f32[2,2]{1,0}, f32[42,2,3]{2,0,1}), f32[123,4]{1,0}}
+}
+
 )"
 },
   });
@@ -2069,5 +2106,35 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
                              op::Broadcast(), op::Multiply(), op::Add()));
 }
 
+TEST_F(HloParserTest, CustomCallWrongNumberofOperandConstraints) {
+  const string original = R"(HloModule CustomCallWrongNumberofOperandConstraints
+
+ENTRY %CustomCallWrongNumberofOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] {
+  %p0 = f32[42,2,3]{0,1,2} parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}}
+}
+
+)";
+  ExpectHasSubstr(ParseHloString(original).status().error_message(),
+                  "Expected 2 operand layout constraints, 1 given");
+}
+
+TEST_F(HloParserTest, CustomCallIncompatibleOperandConstraints) {
+  const string original = R"(HloModule CustomCallIncompatibleOperandConstraints
+
+ENTRY %CustomCallIncompatibleOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] {
+  %p0 = f32[42,2,3]{0,1,2} parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[555,5]{1,0}}
+}
+
+)";
+  ExpectHasSubstr(ParseHloString(original).status().error_message(),
+                  "operand 1 is not compatible with operand shape");
+}
+
+// custom call incompatible shape.
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 496fe1795d..be3bee5975 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -360,7 +360,27 @@ Status ShapeVerifier::HandleCall(HloInstruction* call) {
   return CheckShape(call, call->to_apply()->root_instruction()->shape());
 }
 
-Status ShapeVerifier::HandleCustomCall(HloInstruction*) { return Status::OK(); }
+Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) {
+  const HloCustomCallInstruction* custom_call =
+      DynCast<const HloCustomCallInstruction>(instruction);
+  TF_RET_CHECK(custom_call != nullptr);
+  if (custom_call->layout_constrained()) {
+    // If the layout is constrained, verify all the respective shapes have
+    // layouts and that the constrained operand shapes match the shapes of the
+    // operands.
+    TF_RET_CHECK(LayoutUtil::HasLayout(custom_call->shape()));
+    TF_RET_CHECK(custom_call->operand_count() ==
+                 custom_call->operand_shapes_with_layout().size());
+    for (int64 i = 0; i < custom_call->operand_count(); ++i) {
+      const Shape& operand_shape_with_layout =
+          custom_call->operand_shapes_with_layout()[i];
+      TF_RET_CHECK(ShapeUtil::Compatible(custom_call->operand(i)->shape(),
+                                         operand_shape_with_layout));
+      TF_RET_CHECK(LayoutUtil::HasLayout(operand_shape_with_layout));
+    }
+  }
+  return Status::OK();
+}
 
 Status ShapeVerifier::HandleSlice(HloInstruction* slice) {
   return CheckShape(slice,
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index cc4a342e9d..ad65b147c1 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -419,6 +419,16 @@ Status LayoutAssignment::BuildHostChannelConstraints(
   return Status::OK();
 }
 
+namespace {
+
+bool IsLayoutConstrainedCustomCall(HloInstruction* instruction) {
+  const HloCustomCallInstruction* custom_call =
+      DynCast<HloCustomCallInstruction>(instruction);
+  return custom_call != nullptr && custom_call->layout_constrained();
+}
+
+}  // namespace
+
 Status LayoutAssignment::AddMandatoryConstraints(
     const ComputationLayout* computation_layout,
     ChannelLayoutConstraints* channel_constraints, HloComputation* computation,
@@ -434,7 +444,6 @@ Status LayoutAssignment::AddMandatoryConstraints(
   // Constrain layouts of instructions which define values with pre-existing
   // layouts.
   for (auto* instruction : computation->instructions()) {
-    Shape const* shape_with_layout = nullptr;
     if (instruction->opcode() == HloOpcode::kInfeed) {
       // Infeed layouts must match the layout of the original inserted
       // instruction.
@@ -456,17 +465,21 @@ Status LayoutAssignment::AddMandatoryConstraints(
         if (parameter_layout.LayoutIsSet()) {
           // Parameter layouts must match the respective layout in
           // ComputationLayout, if there is one.
-          shape_with_layout = &parameter_layout.shape();
+          TF_RETURN_IF_ERROR(constraints->SetInstructionLayout(
+              parameter_layout.shape(), instruction));
         }
       }
-    }
-    if (shape_with_layout != nullptr) {
+    } else if (IsLayoutConstrainedCustomCall(instruction)) {
+      const HloCustomCallInstruction* custom_call =
+          DynCast<HloCustomCallInstruction>(instruction);
       TF_RETURN_IF_ERROR(
-          constraints->SetInstructionLayout(*shape_with_layout, instruction));
-    }
-
-    if (instruction->opcode() == HloOpcode::kSend ||
-        instruction->opcode() == HloOpcode::kRecv) {
+          constraints->SetInstructionLayout(custom_call->shape(), custom_call));
+      for (int64 i = 0; i < custom_call->operand_count(); ++i) {
+        TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+            custom_call->operand_shapes_with_layout()[i], custom_call, i));
+      }
+    } else if (instruction->opcode() == HloOpcode::kSend ||
+               instruction->opcode() == HloOpcode::kRecv) {
       CHECK(get_channel_constraints(instruction))
           << "Multi-module layout assignment requires ChannelLayoutConstraints";
       int64 channel_id = instruction->channel_id();
@@ -621,31 +634,6 @@ Status LayoutAssignment::AddMandatoryConstraints(
       TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
           false_computation_layout.parameter_shape(0), instruction, 2,
           /*mandatory=*/true));
-    } else if (instruction->opcode() == HloOpcode::kCustomCall) {
-      if (!CustomCallRequiresMajorFirstLayout(instruction)) {
-        continue;
-      }
-      // Add constraints for kCustomCall instruction operands and instructions.
-      // For now we only support major-first layouts for all inputs and outputs.
-      Shape result_shape = ShapeUtil::MakeShapeWithDescendingLayout(
-          instruction->shape().element_type(),
-          AsInt64Slice(instruction->shape().dimensions()));
-      TF_RETURN_IF_ERROR(
-          constraints->SetInstructionLayout(result_shape, instruction));
-      for (int64 i = 0; i < instruction->operand_count(); ++i) {
-        const Shape& operand_shape = instruction->operand(i)->shape();
-        // Opaque operands don't get a layout constraint.
-        if (ShapeUtil::IsOpaque(operand_shape)) {
-          continue;
-        }
-
-        Shape row_major_operand_shape =
-            ShapeUtil::MakeShapeWithDescendingLayout(
-                operand_shape.element_type(),
-                AsInt64Slice(operand_shape.dimensions()));
-        TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
-            row_major_operand_shape, instruction, i));
-      }
     }
   }
   // Finally set the result layout to match ComputationLayout, if there is one.
@@ -676,16 +664,18 @@ Status CheckCallLayout(HloInstruction* call,
   return Status::OK();
 }
 
-// Custom calls have fixed input and output layouts.
-Status CheckCustomCallLayout(HloInstruction* custom_call) {
-  for (const HloInstruction* operand : custom_call->operands()) {
-    TF_RET_CHECK(
-        ShapeUtil::IsOpaque(operand->shape()) ||
-        LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout()));
+// Operands of layout-constrained custom calls must match the expected
+// constrained layouts.
+Status CheckCustomCallLayout(HloInstruction* instruction) {
+  if (IsLayoutConstrainedCustomCall(instruction)) {
+    const HloCustomCallInstruction* custom_call =
+        DynCast<HloCustomCallInstruction>(instruction);
+    for (int64 i = 0; i < custom_call->operand_count(); ++i) {
+      TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(
+          custom_call->operand(i)->shape(),
+          custom_call->operand_shapes_with_layout()[i]));
+    }
   }
-  TF_RET_CHECK(
-      ShapeUtil::IsOpaque(custom_call->shape()) ||
-      LayoutUtil::IsMonotonicWithDim0Major(custom_call->shape().layout()));
   return Status::OK();
 }
 
@@ -932,9 +922,7 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) {
               FindOrDie(computation_layouts_, instruction->to_apply())));
           break;
         case HloOpcode::kCustomCall:
-          if (CustomCallRequiresMajorFirstLayout(instruction)) {
-            TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction));
-          }
+          TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction));
           break;
         case HloOpcode::kFusion:
           TF_RETURN_IF_ERROR(CheckFusionLayout(instruction));
@@ -1554,11 +1542,11 @@ Status LayoutAssignment::CalculateComputationLayout(
 
 Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) {
   // Clear existing layouts of the instructions.  All layouts must be assigned
-  // by the LayoutAssignment pass, except for those on infeeds, parameters,
-  // and the computation result. The latter two are specified in
-  // computation_layout, so we only need to keep the existing layouts for
-  // infeeds.  Clearing the layouts here avoids hiding potential bugs in the
-  // layout assignment pass that may accidentally use the existing layout.
+  // by the LayoutAssignment pass, except for those on parameters, the
+  // computation result, and a couple special cases. The former two are
+  // specified in computation_layout.  Clearing the layouts here avoids hiding
+  // potential bugs in the layout assignment pass that may accidentally use the
+  // existing layout.
   for (HloInstruction* instruction : computation->instructions()) {
     if (instruction->opcode() == HloOpcode::kBitcast) {
       // bitcasts are inherently layout sensitive and so a bitcast instruction
@@ -1567,7 +1555,9 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) {
           "Unexpected bitcast operation seen during layout assignment: %s.",
           instruction->ToString());
     }
-    if (instruction->opcode() != HloOpcode::kInfeed) {
+    // Some instructions carry mandatory layouts in their shape.
+    if (instruction->opcode() != HloOpcode::kInfeed &&
+        !IsLayoutConstrainedCustomCall(instruction)) {
       LayoutUtil::ClearLayout(instruction->mutable_shape());
     }
   }
@@ -1802,6 +1792,18 @@ StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
   }
   TF_RETURN_IF_ERROR(Init());
 
+  // Verify computation layout is sane.
+  const HloComputation* entry = module->entry_computation();
+  TF_RET_CHECK(entry_computation_layout_->parameter_count() ==
+               entry->num_parameters());
+  for (int64 i = 0; i < entry->num_parameters(); ++i) {
+    TF_RET_CHECK(
+        ShapeUtil::Compatible(entry_computation_layout_->parameter_shape(i),
+                              entry->parameter_instruction(i)->shape()));
+  }
+  TF_RET_CHECK(ShapeUtil::Compatible(entry_computation_layout_->result_shape(),
+                                     entry->root_instruction()->shape()));
+
   // We do two passes. The first one we pass a nullptr ComputationLayout to
   // the RunOnComputation() calls (for non entry computations), and we register
   // the ComputationLayout which are naturally flowing in DFS fashion to the
@@ -1873,7 +1875,6 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kCrossReplicaSum:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
-    case HloOpcode::kCustomCall:
     case HloOpcode::kDivide:
     case HloOpcode::kDynamicSlice:
     case HloOpcode::kDynamicUpdateSlice:
@@ -1930,6 +1931,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kConstant:
     case HloOpcode::kConvolution:
     case HloOpcode::kCopy:
+    case HloOpcode::kCustomCall:
     case HloOpcode::kDomain:
     case HloOpcode::kDot:
     case HloOpcode::kFusion:
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 2d48e12263..cb56f4cd19 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -333,19 +333,6 @@ class LayoutAssignment : public HloModulePass {
       const ResultLayoutConstraint& layout_constraint,
       LayoutConstraints* constraints);
 
-  // By default LayoutAssignment ensures that inputs and outputs of CustomCalls
-  // have the "major-first" layout (i.e.  {n, n-1, ..., 0}).
-  //
-  // If this function returns true, LayoutAssignment does not set a layout for
-  // the given CustomCall.  It's up to the backend to set one in
-  // AddBackendConstraints, if necessary.
-  //
-  // Precondition: instruction->opcode() == HloOpcode::kCustomCall.
-  virtual bool CustomCallRequiresMajorFirstLayout(
-      const HloInstruction* /*instruction*/) {
-    return true;
-  }
-
   // Called after layouts of an instruction have been finalized to allow
   // subclasses to check for platform specific assumptions.
   virtual Status Verify(const HloInstruction* instruction) {
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 2c549cd872..ff6fdb5e4a 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -65,6 +65,27 @@ class LayoutAssignmentTest : public HloVerifiedTestBase {
         FindInstruction(module, name)->shape().layout().minor_to_major();
     return std::vector<int64>(minor_to_major.begin(), minor_to_major.end());
   }
+
+  void ExpectLayoutIs(const Shape& shape,
+                      absl::Span<const int64> minor_to_major) {
+    const Layout expected = LayoutUtil::MakeLayout(minor_to_major);
+    EXPECT_TRUE(LayoutUtil::Equal(shape.layout(), expected))
+        << "Expected layout " << expected << ", actual " << shape.layout();
+  }
+
+  void ExpectTupleLayoutIs(
+      const Shape& shape,
+      std::initializer_list<absl::Span<const int64>> minor_to_majors) {
+    int i = 0;
+    for (const absl::Span<const int64> minor_to_major : minor_to_majors) {
+      const Layout expected = LayoutUtil::MakeLayout(minor_to_major);
+      const Layout& actual = ShapeUtil::GetTupleElementShape(shape, i).layout();
+      EXPECT_TRUE(LayoutUtil::Equal(actual, expected))
+          << "Expected tuple element " << i << " layout " << expected
+          << ", actual " << actual;
+      ++i;
+    }
+  }
 };
 
 TEST_F(LayoutAssignmentTest, ComputationLayout) {
@@ -1102,5 +1123,174 @@ TEST_F(LayoutAssignmentTest, TupleCopyOnLayoutMismatch) {
   EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0));
 }
 
+TEST_F(LayoutAssignmentTest, CustomCallNotLayoutConstrained) {
+  const char* module_str = R"(
+HloModule CustomCallNotLayoutConstrained
+
+ENTRY %CustomCallWithNotLayoutConstrained (p: f32[42,2,3]) -> f32[1,2,3,4] {
+  %p = f32[42,2,3] parameter(0)
+  ROOT %custom-call = f32[1,2,3,4] custom-call(f32[42,2,3] %p), custom_call_target="baz"
+}
+)";
+  // Try with a couple different layouts. In each case the custom calls operand
+  // and result layout should match that of the computation.
+  {
+    TF_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<VerifiedHloModule> module,
+        ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+    ComputationLayout computation_layout = module->entry_computation_layout();
+    *computation_layout.mutable_parameter_layout(0) =
+        ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 2, 1}));
+    *computation_layout.mutable_result_layout() = ShapeLayout(
+        ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {3, 2, 0, 1}));
+    AssignLayouts(module.get(), &computation_layout);
+
+    HloInstruction* root = module->entry_computation()->root_instruction();
+    ASSERT_THAT(root, op::CustomCall(op::Parameter()));
+    ExpectLayoutIs(root->shape(), {3, 2, 0, 1});
+    ExpectLayoutIs(root->operand(0)->shape(), {0, 2, 1});
+  }
+  {
+    TF_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<VerifiedHloModule> module,
+        ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+    ComputationLayout computation_layout = module->entry_computation_layout();
+    *computation_layout.mutable_parameter_layout(0) =
+        ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 1, 2}));
+    *computation_layout.mutable_result_layout() = ShapeLayout(
+        ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {0, 2, 3, 1}));
+    AssignLayouts(module.get(), &computation_layout);
+
+    HloInstruction* root = module->entry_computation()->root_instruction();
+    ASSERT_THAT(root, op::CustomCall(op::Parameter()));
+    ExpectLayoutIs(root->shape(), {0, 2, 3, 1});
+    ExpectLayoutIs(root->operand(0)->shape(), {0, 1, 2});
+  }
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrained) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrained
+
+ENTRY %CustomCallWithLayoutConstraints (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] {
+  %p0 = f32[4,4] parameter(0)
+  %p1 = f32[2,3] parameter(1)
+  ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(f32[4,4] %p0, f32[2,3] %p1), custom_call_target="baz", operand_layout_constraints={f32[4,4]{0,1}, f32[2,3]{1,0}}
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}));
+  *computation_layout.mutable_parameter_layout(1) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0}));
+  *computation_layout.mutable_result_layout() = ShapeLayout(
+      ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  // The custom call should be partially encapsulated in kCopy instructions
+  // because of the layout mismatches.
+  ASSERT_THAT(module->entry_computation()->root_instruction(),
+              op::Copy(op::CustomCall(op::Copy(), op::Parameter())));
+
+  const HloInstruction* custom_call =
+      module->entry_computation()->root_instruction()->operand(0);
+  ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1});
+  ExpectLayoutIs(custom_call->operand(0)->shape(), {0, 1});
+  ExpectLayoutIs(custom_call->operand(1)->shape(), {1, 0});
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedZeroOperands) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrainedZeroOperands
+
+ENTRY %CustomCallLayoutConstrainedZeroOperands () -> f32[1,2,3,4] {
+  ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(), custom_call_target="baz", operand_layout_constraints={}
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_result_layout() = ShapeLayout(
+      ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  ASSERT_THAT(module->entry_computation()->root_instruction(),
+              op::Copy(op::CustomCall()));
+
+  const HloInstruction* custom_call =
+      module->entry_computation()->root_instruction()->operand(0);
+  ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1});
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleOperand) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrainedTupleOperand
+
+ENTRY %CustomCallLayoutConstrainedTupleOperand (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] {
+  %p0 = f32[4,4] parameter(0)
+  %p1 = f32[2,3] parameter(1)
+  %tuple = (f32[4,4], f32[2,3]) tuple(%p0, %p1)
+  ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(%tuple), custom_call_target="baz", operand_layout_constraints={(f32[4,4]{1,0}, f32[2,3]{0,1})}
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}));
+  *computation_layout.mutable_parameter_layout(1) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0}));
+  *computation_layout.mutable_result_layout() = ShapeLayout(
+      ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  ExpectLayoutIs(root->shape(), {2, 1, 0, 3});
+
+  ASSERT_THAT(module->entry_computation()->root_instruction(),
+              op::Copy(op::CustomCall(op::Tuple())));
+
+  const HloInstruction* custom_call =
+      module->entry_computation()->root_instruction()->operand(0);
+  ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1});
+  ExpectTupleLayoutIs(custom_call->operand(0)->shape(), {{1, 0}, {0, 1}});
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleResult) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrainedTupleResult
+
+ENTRY %CustomCallLayoutConstrainedTupleResult (p0: f32[4,4]) -> (f32[4,4]{1,0}, f32[2,3]{0,1}) {
+  %p0 = f32[4,4] parameter(0)
+  ROOT %custom-call = (f32[4,4]{1,0}, f32[2,3]{0,1}) custom-call(%p0), custom_call_target="baz", operand_layout_constraints={f32[4,4]{1,0}}
+}
+)";
+  // Try with a couple different layouts. In each case the custom calls operand
+  // and result layout should match that of the computation.
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}));
+  *computation_layout.mutable_result_layout() =
+      ShapeLayout(ShapeUtil::MakeTupleShape(
+          {ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}),
+           ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  ExpectTupleLayoutIs(module->entry_computation()->root_instruction()->shape(),
+                      {{1, 0}, {1, 0}});
+
+  const HloInstruction* custom_call =
+      FindInstruction(module.get(), "custom-call");
+  ExpectTupleLayoutIs(custom_call->shape(), {{1, 0}, {0, 1}});
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index d244923532..7f0201942b 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -1645,7 +1645,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape,
 }
 
 std::ostream& operator<<(std::ostream& out, const Shape& shape) {
-  out << ShapeUtil::HumanString(shape);
+  out << ShapeUtil::HumanStringWithLayout(shape);
   return out;
 }
 
diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc
index a693fa3595..001490c6a8 100644
--- a/tensorflow/compiler/xla/tests/custom_call_test.cc
+++ b/tensorflow/compiler/xla/tests/custom_call_test.cc
@@ -105,8 +105,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) {
   LiteralTestUtil::ExpectR0Near<float>(10.0f, result, error_spec_);
 }
 
-XLA_TEST_F(CustomCallTest,
-           DISABLED_ON_GPU(CustomCall_UsedInOtherComputations)) {
+XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(UsedInOtherComputations)) {
   auto module = CreateNewModule();
   auto b = HloComputation::Builder(TestName());
 
@@ -130,6 +129,53 @@ XLA_TEST_F(CustomCallTest,
       Array3D<float>{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, result);
 }
 
+XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(InputAndOutputLayoutDiffer)) {
+  auto module = CreateNewModule();
+  auto b = HloComputation::Builder(TestName());
+
+  auto input =
+      b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p"));
+  b.AddInstruction(
+      HloInstruction::CreateCustomCall(r2f32_, {input}, "Add1ToValues"));
+
+  module->AddEntryComputation(b.Build());
+  ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0}));
+  ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1}));
+
+  Literal argument = LiteralUtil::CreateR2<float>({{1.f, 2.f}, {3.f, 4.f}});
+
+  // Note, the expected result is transposed! This is because the input and
+  // output layouts of the custom call differ and the called function just
+  // blindly adds one to each element.
+  Literal result = ExecuteAndTransfer(std::move(module), {&argument});
+  LiteralTestUtil::ExpectR2Equal<float>({{2.f, 4.f}, {3.f, 5.f}}, result);
+}
+
+XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(LayoutConstrained)) {
+  // The argument and result of the computation are set to different layouts,
+  // but the custom call is layout constrained to a fixed operand and result
+  // layout, so the correct result should be produced.
+  auto module = CreateNewModule();
+  auto b = HloComputation::Builder(TestName());
+
+  auto input =
+      b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p"));
+
+  const Shape& r2f32_dim0_major =
+      ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0});
+  b.AddInstruction(HloInstruction::CreateCustomCall(
+      r2f32_dim0_major, {input}, "Add1ToValues", {r2f32_dim0_major}));
+
+  module->AddEntryComputation(b.Build());
+  ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0}));
+  ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1}));
+
+  Literal argument = LiteralUtil::CreateR2<float>({{1.f, 2.f}, {3.f, 4.f}});
+
+  Literal result = ExecuteAndTransfer(std::move(module), {&argument});
+  LiteralTestUtil::ExpectR2Equal<float>({{2.f, 3.f}, {4.f, 5.f}}, result);
+}
+
 class CustomCallClientAPITest : public ClientLibraryTestBase {};
 
 // When using the client API, CustomCall targets can't begin with '$' -- these
-- 
GitLab


From af5b714179ff5e279ba27c024f453e2d75636ac9 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 8 Oct 2018 14:43:55 -0700
Subject: [PATCH 1264/1357] Add more logging to the convolution
 transformations.

PiperOrigin-RevId: 216252980
---
 .../xla/service/gpu/cudnn_convolution_algorithm_picker.cc      | 3 +++
 .../compiler/xla/service/gpu/cudnn_convolution_rewriter.cc     | 3 +++
 .../xla/service/gpu/cudnn_fused_convolution_rewriter.cc        | 3 ++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index 590c0a7d54..6d4a72038f 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -360,6 +360,9 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
   HloInstruction* new_call = computation->AddInstruction(
       instr->CloneWithNewOperands(new_call_shape, instr->operands()));
 
+  VLOG(1) << "Replacing convolution " << instr->ToString() << " with "
+          << new_call->ToString();
+
   TF_RETURN_IF_ERROR(new_call->set_backend_config(backend_config));
 
   // Repackage new_call so it has the same shape as the original call, namely
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
index ef29237301..437d25727e 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
@@ -525,6 +525,9 @@ StatusOr<bool> RunOnInstruction(HloInstruction* conv) {
   TF_RETURN_IF_ERROR(
       custom_call->set_backend_config(GetDefaultBackendConfig()));
 
+  VLOG(1) << "Replacing convolution " << conv->ToString() << " with "
+          << custom_call->ToString();
+
   // The CustomCall returns a tuple (conv_result, scratch_memory).  Extract out
   // the conv result and replace `conv` with it.
   TF_RETURN_IF_ERROR(conv->parent()->ReplaceWithNewInstruction(
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
index 3761c19cfc..d508cbc2e1 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
@@ -234,7 +234,8 @@ StatusOr<std::unique_ptr<HloInstruction>> TryRewriteToCudnnForwardRelu(
   config.set_side_input_scale(alpha_side_input);
   TF_RETURN_IF_ERROR(new_conv->set_backend_config(config));
 
-  VLOG(1) << "Rewriting " << conv->name() << " to " << new_conv->name();
+  VLOG(1) << "Replacing convolution " << conv->ToString() << " with "
+          << new_conv->ToString();
   return HloInstruction::CreateGetTupleElement(conv->shape().tuple_shapes(0),
                                                new_conv, 0);
 }
-- 
GitLab


From b3bd7b378d00190fef831092836a5df62e39e7ed Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Mon, 8 Oct 2018 14:44:37 -0700
Subject: [PATCH 1265/1357] Ignore args and kwargs for defun's get_concrete_fn
 if `PolymorphicFunction` was created with an input_signature.

PiperOrigin-RevId: 216253122
---
 tensorflow/python/eager/function.py      | 14 ++++++++++++++
 tensorflow/python/eager/function_test.py |  9 ++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 99bf375ea7..ff138cad1e 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -664,6 +664,11 @@ class Function(object):
 
     return self._build_call_outputs(outputs)
 
+  @property
+  def name(self):
+    """Function name."""
+    return self._inference_function.name
+
   @property
   def graph(self):
     """Returns the graph from which this function was constructed."""
@@ -721,6 +726,10 @@ class Function(object):
     return nest.map_structure(lambda x: x.dtype if x is not None else None,
                               self._func_graph.structured_outputs)
 
+  def add_to_graph(self, g):
+    """Adds this function into the graph g."""
+    return self._inference_function.add_to_graph(g)
+
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
@@ -1133,6 +1142,8 @@ class PolymorphicFunction(object):
       *args: inputs to specialize on.
       **kwargs: inputs to specialize on.
     """
+    if self._input_signature:
+      args, kwargs = None, None
     graph_function, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
@@ -1322,6 +1333,9 @@ def register(func, *args, **kwargs):
   function definition into graph. Register function with different input param
   will result into multiple version of functions registered in graph.
 
+  Also, `args` and `kwargs` are ignored if this `PolymorphicFunction` was
+  created with an `input_signature`.
+
   Args:
     func: the PolymorphicFunction instance that generated by a @defun
     *args: input arguments for the Python function.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index e46bde098b..953f4300cf 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1841,11 +1841,10 @@ class FunctionTest(test.TestCase):
         # pylint: disable=protected-access
         self.assertEqual(len(graph._functions), 3)
 
-        # Test input param shape mismatch
-        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        with self.assertRaisesRegexp(
-            ValueError, 'Python inputs incompatible with input_signature'):
-          function.register(defun_matmul, t2, t2)
+        # Test register function with cache, note inputs are ignored.
+        function.register(defun_matmul)
+        graph = ops.get_default_graph()
+        self.assertEqual(len(graph._functions), 3)
 
   def testRegisterFunctionWithCache(self):
     def matmul(x, y):
-- 
GitLab


From 220c0f90af05ed1ca86831258888cc80757654fd Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Mon, 8 Oct 2018 15:00:36 -0700
Subject: [PATCH 1266/1357] [XLA] Simplify loop nesting in HandleConvolution

The calculation of a spatial coordinate in the kernel and activations is not
dependent on which part of the contracted dimension (input feature) we are in.

Rather than nesting the loops, the loops can be siblings:
- One loop over spatial dimensions
- One loop over the input feature group

This reduces the nesting depth which makes the code a little more readable and
might be slightly faster due work invariant in the spatial loop getting hoisted
out.

PiperOrigin-RevId: 216255839
---
 .../xla/service/hlo_evaluator_typed_visitor.h | 96 +++++++++----------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index a450dc6ff5..84fbbd3e0c 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -1072,66 +1072,66 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
       // Convolve input feature with kernel.
       do {
+        // Find corresponding spatial dimension index for input (lhs).
+        int64 lhs_linear_spatial_index = 0;
+        int64 rhs_linear_spatial_index = 0;
+        for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) {
+          // Spatial dimension number for input (lhs) and output.
+          const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki);
+          const int64 output_spatial_dim = dnums.output_spatial_dimensions(ki);
+
+          // Calculate lhs (input) index without taking base dilation into
+          // account.
+          const auto& window_dim = window.dimensions(ki);
+          const int64 undilated_index =
+              out_index[output_spatial_dim] * window_dim.stride() -
+              window_dim.padding_low() +
+              rhs_spatial_index[ki] * window_dim.window_dilation();
+          // Skip if the lhs (input) index is to be dilated.  As an
+          // optimization, skip this mod if there's no dilation.
+          if (window_dim.base_dilation() > 1 &&
+              undilated_index % window_dim.base_dilation() != 0) {
+            goto cnt;
+          }
+
+          // Calculate the actual lhs (input) index after dilation.  As an
+          // optimization, skip this integer divide if there's no dilation.
+          int64 lhs_spatial_index;
+          if (window_dim.base_dilation() > 1) {
+            lhs_spatial_index = undilated_index / window_dim.base_dilation();
+          } else {
+            lhs_spatial_index = undilated_index;
+          }
+
+          // Skip if input index is not in bounds.
+          if (!(lhs_spatial_index >= 0 &&
+                lhs_spatial_index < lhs_shape.dimensions(input_spatial_dim))) {
+            goto cnt;
+          }
+
+          lhs_linear_spatial_index +=
+              lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim];
+          rhs_linear_spatial_index +=
+              (window_dim.window_reversal()
+                   ? ((window_dim.size() - 1) - rhs_spatial_index[ki])
+                   : rhs_spatial_index[ki]) *
+              rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)];
+        }
+
         for (int64 rhs_iz = 0; rhs_iz < input_feature_group_size; ++rhs_iz) {
           const int64 iz =
               feature_group_index * input_feature_group_size + rhs_iz;
 
-          int64 lhs_linear_index = 0;
+          int64 lhs_linear_index = lhs_linear_spatial_index;
           lhs_linear_index += out_index[output_batch_dim] *
                               lhs_dim_multipliers[input_batch_dim];
           lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim];
 
-          int64 rhs_linear_index = 0;
+          int64 rhs_linear_index = rhs_linear_spatial_index;
           rhs_linear_index += out_index[output_z_dim] *
                               rhs_dim_multipliers[kernel_output_z_dim];
           rhs_linear_index += rhs_iz * rhs_dim_multipliers[kernel_input_z_dim];
 
-          // Find corresponding spatial dimension index for input (lhs).
-          for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) {
-            // Spatial dimension number for input (lhs) and output.
-            const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki);
-            const int64 output_spatial_dim =
-                dnums.output_spatial_dimensions(ki);
-
-            // Calculate lhs (input) index without taking base dilation into
-            // account.
-            const auto& window_dim = window.dimensions(ki);
-            const int64 undilated_index =
-                out_index[output_spatial_dim] * window_dim.stride() -
-                window_dim.padding_low() +
-                rhs_spatial_index[ki] * window_dim.window_dilation();
-            // Skip if the lhs (input) index is to be dilated.  As an
-            // optimization, skip this mod if there's no dilation.
-            if (window_dim.base_dilation() > 1 &&
-                undilated_index % window_dim.base_dilation() != 0) {
-              goto cnt;
-            }
-
-            // Calculate the actual lhs (input) index after dilation.  As an
-            // optimization, skip this integer divide if there's no dilation.
-            int64 lhs_spatial_index;
-            if (window_dim.base_dilation() > 1) {
-              lhs_spatial_index = undilated_index / window_dim.base_dilation();
-            } else {
-              lhs_spatial_index = undilated_index;
-            }
-            lhs_linear_index +=
-                lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim];
-
-            // Skip if input index is not in bounds.
-            if (!(lhs_spatial_index >= 0 &&
-                  lhs_spatial_index <
-                      lhs_shape.dimensions(input_spatial_dim))) {
-              goto cnt;
-            }
-
-            rhs_linear_index +=
-                (window_dim.window_reversal()
-                     ? ((window_dim.size() - 1) - rhs_spatial_index[ki])
-                     : rhs_spatial_index[ki]) *
-                rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)];
-          }
-
           result_val +=
               static_cast<ElementwiseT>(lhs_literal_data[lhs_linear_index]) *
               static_cast<ElementwiseT>(rhs_literal_data[rhs_linear_index]);
-- 
GitLab


From 5da3cebe00111aa43e34b5a3fc12d1a97b838ba7 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 15:02:13 -0700
Subject: [PATCH 1267/1357] Automated rollback of commit
 09b0fc199129e0f487a39741bdf674cf09035cbc

PiperOrigin-RevId: 216256115
---
 .../core/kernels/data/shuffle_dataset_op.cc   |  2 +-
 .../data/experimental/kernel_tests/BUILD      | 13 ------
 .../kernel_tests/random_dataset_test.py       | 45 -------------------
 .../kernel_tests/shuffle_and_repeat_test.py   | 21 +--------
 .../data/experimental/ops/random_ops.py       | 21 ++-------
 .../data/experimental/ops/shuffle_ops.py      | 21 ++-------
 tensorflow/python/data/kernel_tests/BUILD     |  1 -
 .../kernel_tests/shuffle_dataset_op_test.py   | 25 +----------
 tensorflow/python/data/ops/dataset_ops.py     | 22 ++-------
 tensorflow/python/data/util/BUILD             |  1 -
 tensorflow/python/data/util/random_seed.py    |  5 +--
 .../python/data/util/random_seed_test.py      | 13 +-----
 12 files changed, 16 insertions(+), 174 deletions(-)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py

diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 9f54c381a9..66466d6a36 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
                      int64 buffer_size, int64 seed, int64 seed2, int64 count)
         : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
-          seed2_(seed2) {}
+          seed2_(seed) {}
 
     string DebugString() const override {
       return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_,
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index a67f6ff031..4eef9580ad 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -453,18 +453,6 @@ cuda_py_test(
     tags = ["no_windows_gpu"],
 )
 
-py_test(
-    name = "random_dataset_test",
-    srcs = ["random_dataset_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python/data/experimental/ops:random_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
 py_library(
     name = "reader_dataset_ops_test_base",
     testonly = 1,
@@ -574,7 +562,6 @@ py_test(
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
     ],
 )
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
deleted file mode 100644
index d403a575ec..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for `tf.data.experimental.RandomDataset()`."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl.testing import parameterized
-
-from tensorflow.python.data.experimental.ops import random_ops
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
-
-
-class RandomDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      ("NoSeed", None),
-      ("WithSeed", 42),
-  )
-  def testZipRandomDataset(self, seed):
-    dataset = random_ops.RandomDataset(seed=seed).take(30)
-    dataset = dataset_ops.Dataset.zip((dataset, dataset))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(30):
-        x, y = sess.run(next_element)
-        self.assertEqual(x, y)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index 883169495f..c208963a86 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import shuffle_ops
@@ -28,7 +27,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
-class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase):
+class ShuffleAndRepeatTest(test_base.DatasetTestBase):
 
   def _build_ds(self, seed, count=5, num_elements=20):
     return dataset_ops.Dataset.range(num_elements).apply(
@@ -111,24 +110,6 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.session(graph=g) as sess:
         sess.run(get_next_op)
 
-  @parameterized.named_parameters(
-      ("NoSeed", None),
-      ("WithSeed", 42),
-  )
-  def testShuffleAndRepeatAndZipDataset(self, seed):
-    dataset = dataset_ops.Dataset.range(10).apply(
-        shuffle_ops.shuffle_and_repeat(10, count=3, seed=seed))
-    dataset = dataset_ops.Dataset.zip((dataset, dataset))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(30):
-        x, y = sess.run(next_element)
-        self.assertEqual(x, y)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py
index 25d7fbf691..e3a2aeab31 100644
--- a/tensorflow/python/data/experimental/ops/random_ops.py
+++ b/tensorflow/python/data/experimental/ops/random_ops.py
@@ -33,26 +33,13 @@ class RandomDataset(dataset_ops.DatasetSource):
   def __init__(self, seed=None):
     """A `Dataset` of pseudorandom values."""
     super(RandomDataset, self).__init__()
-
-    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
-    # is iterated over, and cache it in `self._graph_seed_map`. This supports
-    # two features: iterating over the same `ShuffleDataset` twice in the same
-    # pipeline and observing the same order (by tying the seeds together with
-    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
-    # which requires the stateful RNG op to be created inside the same graph as
-    # the dataset.
-    self._original_seed = seed
-    self._graph_seed_map = {}
+    self._seed, self._seed2 = random_seed.get_seed(seed)
 
   def _as_variant_tensor(self):
-    try:
-      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
-    except KeyError:
-      seed, seed2 = random_seed.get_seed(self._original_seed)
-      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
-
     return gen_dataset_ops.random_dataset(
-        seed=seed, seed2=seed2, **dataset_ops.flat_structure(self))
+        seed=self._seed,
+        seed2=self._seed2,
+        **dataset_ops.flat_structure(self))
 
   @property
   def output_classes(self):
diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py
index a82e4b7d09..a4307212da 100644
--- a/tensorflow/python/data/experimental/ops/shuffle_ops.py
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@@ -39,32 +39,17 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
     else:
       self._count = ops.convert_to_tensor(
           count, dtype=dtypes.int64, name="count")
-
-    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
-    # is iterated over, and cache it in `self._graph_seed_map`. This supports
-    # two features: iterating over the same `ShuffleDataset` twice in the same
-    # pipeline and observing the same order (by tying the seeds together with
-    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
-    # which requires the stateful RNG op to be created inside the same graph as
-    # the dataset.
-    self._original_seed = seed
-    self._graph_seed_map = {}
+    self._seed, self._seed2 = random_seed.get_seed(seed)
 
   def _as_variant_tensor(self):
-    try:
-      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
-    except KeyError:
-      seed, seed2 = random_seed.get_seed(self._original_seed)
-      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
-
     # pylint: disable=protected-access
     input_resource = self._input_dataset._as_variant_tensor()
     return gen_dataset_ops.shuffle_and_repeat_dataset(
         input_resource,
         buffer_size=self._buffer_size,
         count=self._count,
-        seed=seed,
-        seed2=seed2,
+        seed=self._seed,
+        seed2=self._seed2,
         **dataset_ops.flat_structure(self))
     # pylint: enable=protected-access
 
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index ecb24103b3..c7295d6e69 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -443,7 +443,6 @@ tf_py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     additional_deps = [
         ":test_base",
-        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index 6001721726..347af18576 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -19,7 +19,6 @@ from __future__ import print_function
 
 import collections
 
-from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.kernel_tests import test_base
@@ -32,7 +31,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+class ShuffleDatasetTest(test_base.DatasetTestBase):
 
   def testShuffleDataset(self):
     components = (
@@ -210,27 +209,5 @@ class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
-  @parameterized.named_parameters(
-      ("ReshuffleEachIterationNoSeed", None, True),
-      ("ReshuffleEachIterationWithSeed", 42, True),
-      ("NoReshuffleEachIterationNoSeed", None, False),
-      ("NoReshuffleEachIterationWithSeed", 42, False),
-  )
-  def testShuffleAndZipDataset(self, seed, reshuffle):
-    dataset = (dataset_ops.Dataset.range(10)
-               .shuffle(10, seed=seed, reshuffle_each_iteration=reshuffle)
-               .repeat(3))
-    dataset = dataset_ops.Dataset.zip((dataset, dataset))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(30):
-        x, y = sess.run(next_element)
-        self.assertEqual(x, y)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 2d036fd0d6..b7e19055f2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2254,34 +2254,18 @@ class ShuffleDataset(UnaryDataset):
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
-
-    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
-    # is iterated over, and cache it in `self._graph_seed_map`. This supports
-    # two features: iterating over the same `ShuffleDataset` twice in the same
-    # pipeline and observing the same order (by tying the seeds together with
-    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
-    # which requires the stateful RNG op to be created inside the same graph as
-    # the dataset.
-    self._original_seed = seed
-    self._graph_seed_map = {}
-
+    self._seed, self._seed2 = random_seed.get_seed(seed)
     if reshuffle_each_iteration is None:
       self._reshuffle_each_iteration = True
     else:
       self._reshuffle_each_iteration = reshuffle_each_iteration
 
   def _as_variant_tensor(self):
-    try:
-      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
-    except KeyError:
-      seed, seed2 = random_seed.get_seed(self._original_seed)
-      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
-
     return gen_dataset_ops.shuffle_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
-        seed=seed,
-        seed2=seed2,
+        seed=self._seed,
+        seed2=self._seed2,
         reshuffle_each_iteration=self._reshuffle_each_iteration,
         **flat_structure(self))
 
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index 95bf3209d7..39082ce370 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -142,7 +142,6 @@ py_test(
         ":random_seed",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:random_ops",
         "//tensorflow/python:util",
     ],
 )
diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py
index d24df6d957..d5169f7a53 100644
--- a/tensorflow/python/data/util/random_seed.py
+++ b/tensorflow/python/data/util/random_seed.py
@@ -24,7 +24,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
 
 
 def get_seed(seed):
@@ -38,7 +37,7 @@ def get_seed(seed):
 
   Returns:
     A tuple of two `tf.int64` scalar tensors that should be used for the local
-    seeds of the calling dataset.
+    seed of the calling dataset.
   """
   seed, seed2 = random_seed.get_seed(seed)
   if seed is None:
@@ -46,7 +45,7 @@ def get_seed(seed):
   else:
     seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
   if seed2 is None:
-    seed2 = random_ops.random_uniform([], 1, 2**63 - 1, dtype=dtypes.int64)
+    seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
   else:
     with ops.name_scope("seed2") as scope:
       seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64)
diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py
index 5df2e38c62..a809151e6e 100644
--- a/tensorflow/python/data/util/random_seed_test.py
+++ b/tensorflow/python/data/util/random_seed_test.py
@@ -41,6 +41,7 @@ class RandomSeedTest(test.TestCase):
         # (input_graph_seed, input_op_seed)
         # and output from get_seed:
         # (output_graph_seed, output_op_seed)
+        ((None, None), (0, 0)),
         ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)),
         ((1, 1), (1, 1)),
         ((0, 0), (0, 2**31 - 1)),  # Avoid nondeterministic (0, 0) output
@@ -77,18 +78,6 @@ class RandomSeedTest(test.TestCase):
       self.assertEqual((g_seed, op_seed), toutput, msg=msg)
       random_seed.set_random_seed(None)
 
-  @test_util.run_in_graph_and_eager_modes
-  def testNondeterministicRandomSeed(self):
-    random_seed.set_random_seed(None)
-    op_seeds = []
-    for _ in range(50):
-      g_seed, op_seed = data_random_seed.get_seed(None)
-      g_seed = self.evaluate(g_seed)
-      op_seed = self.evaluate(op_seed)
-      self.assertEqual(0, g_seed)
-      self.assertNotEqual(0, op_seed)
-      op_seeds.append(op_seed)
-    self.assertGreater(len(set(op_seeds)), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From b055d78b0edbf117ec5f7f2662d3bb2781ae02b3 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 8 Oct 2018 15:09:57 -0700
Subject: [PATCH 1268/1357] Fix issue with type inference for ops with fixed
 output types

Use the ArgDef::type field when available for propagating
the output types from a given unsupported operator.

PiperOrigin-RevId: 216257741
---
 tensorflow/contrib/lite/toco/import_tensorflow.cc |  7 +++++--
 .../contrib/lite/toco/import_tensorflow_test.cc   | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 133ef79a34..32f22e1ea0 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1151,11 +1151,14 @@ tensorflow::Status ConvertUnsupportedOperator(
     op->output_data_types.push_back(ConvertDataType(output_type));
   } else if (op_def != nullptr) {
     for (const auto& output_arg : op_def->output_arg()) {
-      if (HasAttr(node, output_arg.type_attr())) {
+      if (output_arg.type() != tensorflow::DT_INVALID) {
+        op->output_data_types.push_back(ConvertDataType(output_arg.type()));
+      } else if (HasAttr(node, output_arg.type_attr())) {
         op->output_data_types.push_back(
             ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
       } else {
-        LOG(INFO) << "Op node missing output type attribute: " << node.name();
+        LOG(WARNING) << "Op node missing output type attribute: "
+                     << node.name();
         op->output_data_types.clear();
         break;
       }
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index 8a236d4444..cd9a144b52 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -235,6 +235,21 @@ TEST_P(TypeImportTest, BasicTypeInference) {
 INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest,
                         ::testing::ValuesIn(UnaryTestTypes()));
 
+TEST(ImportTest, TypeInferenceWithFixedOutputType) {
+  // Create an op that has a fixed output type (bool).
+  Model model;
+  EXPECT_TRUE(ImportNode(BuildNode("IsFinite", {{1, 2}, {2, 3}}), &model).ok());
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast<const TensorFlowUnsupportedOperator*>(
+          model.operators[0].get());
+
+  // The static output type should be indicated in the imported op.
+  ASSERT_THAT(op->output_data_types,
+              ::testing::ElementsAre(ArrayDataType::kBool));
+}
+
 TEST(ImportTest, FailedTypeInference) {
   // Create a unary op with no Type ("T") annotation.
   NodeDef node;
-- 
GitLab


From 0b13d0806b061deaec0e96cfdca1ae4509174f89 Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Mon, 8 Oct 2018 15:24:56 -0700
Subject: [PATCH 1269/1357] Simple comment fix in CheckpointInputPipelineHook.

PiperOrigin-RevId: 216260216
---
 tensorflow/python/data/experimental/ops/iterator_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/experimental/ops/iterator_ops.py b/tensorflow/python/data/experimental/ops/iterator_ops.py
index 72d7d58f06..5eb2563977 100644
--- a/tensorflow/python/data/experimental/ops/iterator_ops.py
+++ b/tensorflow/python/data/experimental/ops/iterator_ops.py
@@ -198,7 +198,7 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
     # is run *after* this hook. That is troublesome because
     # 1. If a checkpoint exists and this hook restores it, the initializer hook
     #    will override it.
-    # 2. If no checkpoint exists, this hook will try to save an initialized
+    # 2. If no checkpoint exists, this hook will try to save an uninitialized
     #    iterator which will result in an exception.
     #
     # As a temporary fix we enter the following implicit contract between this
-- 
GitLab


From a991acba07ce6c5903ee84e4a72d3d59e22b77fc Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Mon, 8 Oct 2018 15:26:34 -0700
Subject: [PATCH 1270/1357] Internal Change.

PiperOrigin-RevId: 216260437
---
 tensorflow/contrib/__init__.py | 8 --------
 tensorflow/python/__init__.py  | 7 -------
 2 files changed, 15 deletions(-)

diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index e71b0e0ae3..f52a1a7bab 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -21,14 +21,6 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.python.tools import component_api_helper
-component_api_helper.package_hook(
-    parent_package_str=(
-        "tensorflow.contrib"),
-    child_package_str=(
-        "tensorflow_estimator.contrib.estimator"))
-del component_api_helper
-
 # Add projects here, they will show up under tf.contrib.
 from tensorflow.contrib import autograph
 from tensorflow.contrib import batching
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index 4921ecc43c..a2ab63bb48 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -48,13 +48,6 @@ import numpy as np
 
 from tensorflow.python import pywrap_tensorflow
 
-from tensorflow.python.tools import component_api_helper
-component_api_helper.package_hook(
-    parent_package_str='tensorflow.python',
-    child_package_str=(
-        'tensorflow_estimator.python.estimator'))
-del component_api_helper
-
 # Protocol buffers
 from tensorflow.core.framework.graph_pb2 import *
 from tensorflow.core.framework.node_def_pb2 import *
-- 
GitLab


From eb0f862ba60f41e8d0f06ceb6fc65f7f9905a25a Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 15:27:40 -0700
Subject: [PATCH 1271/1357] Automated rollback of commit
 13b47e6c4f9d7b295948b1057139bf676e394b6f

PiperOrigin-RevId: 216260575
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 +++
 .../kernels/data/map_and_batch_dataset_op.cc  |  9 +++----
 .../core/kernels/data/model_dataset_op.cc     | 10 +++----
 .../data/parallel_interleave_dataset_op.cc    | 27 ++++++++-----------
 .../kernels/data/parallel_map_iterator.cc     |  9 +++----
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 +++----
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 8acd6cc724..7a833668ac 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,8 +16,10 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
+#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -25,11 +27,13 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 0fb721cd7c..f45a239793 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -445,10 +445,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              std::bind(&Iterator::RunnerThread, this, ctx_copy));
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
         }
       }
 
@@ -704,7 +703,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 859df57962..9aa505f4f1 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -127,10 +126,9 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
-          optimize_thread_->Schedule(
-              [this, new_ctx]() { OptimizeThread(new_ctx); });
+          optimize_thread_.reset(ctx->env()->StartThread(
+              {}, "optimize_thread",
+              [this, new_ctx]() { OptimizeThread(new_ctx); }));
         }
         return Status::OK();
       }
@@ -169,7 +167,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 9c836b836e..6b6b3d6ab9 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -482,10 +481,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
           }
         }
         return Status::OK();
@@ -582,10 +580,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1050,8 +1047,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
-          GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1393,10 +1389,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              [this, new_ctx]() { RunnerThread(new_ctx); });
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              [this, new_ctx]() { RunnerThread(new_ctx); }));
         }
       }
 
@@ -1650,7 +1645,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index e69274e4f2..ebf41925c9 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -181,10 +181,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-      runner_thread_ =
-          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-      runner_thread_->Schedule(
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
+      runner_thread_.reset(ctx->env()->StartThread(
+          {}, "runner_thread",
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
     }
   }
 
@@ -332,7 +331,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index e9c38eb8a0..754ed772db 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -257,11 +256,10 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
-        prefetch_thread_ =
-            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_->Schedule(
-            [this, new_ctx]() { PrefetchThread(new_ctx); });
+        prefetch_thread_.reset(ctx->env()->StartThread(
+            {}, "prefetch_thread",
+            [this, new_ctx]() { PrefetchThread(new_ctx); }));
       }
       return Status::OK();
     }
@@ -365,7 +363,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 7bb2077b62..3f76695bb1 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        background_worker_(
-            ctx->env(),
-            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
-  }
+        thread_pool_(new thread::ThreadPool(
+            ctx->env(), ThreadOptions(),
+            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
+            1 /* num_threads */, false /* low_latency_hint */)) {}
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    background_worker_.Schedule([this, ctx, done]() {
+    thread_pool_->Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  BackgroundWorker background_worker_;
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From cb057ea64032e551027c8f9058a9d28a258c9d6b Mon Sep 17 00:00:00 2001
From: Chris Leary <leary@google.com>
Date: Mon, 8 Oct 2018 15:42:17 -0700
Subject: [PATCH 1272/1357] [XLA] Make overly-specific ShapeUtil predicate a
 little more general.

PiperOrigin-RevId: 216263039
---
 tensorflow/compiler/xla/service/hlo_instruction_test.cc | 3 ++-
 tensorflow/compiler/xla/service/hlo_query.cc            | 2 +-
 tensorflow/compiler/xla/shape_util.cc                   | 5 +++--
 tensorflow/compiler/xla/shape_util.h                    | 5 ++++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index c1b7c3832b..d93351fe04 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -135,7 +135,8 @@ TEST_F(HloInstructionTest, BasicProperties) {
   auto parameter = HloInstruction::CreateParameter(1, r0f32_, "foo");
 
   EXPECT_EQ(HloOpcode::kParameter, parameter->opcode());
-  EXPECT_TRUE(ShapeUtil::IsScalarF32(parameter->shape()));
+  EXPECT_TRUE(ShapeUtil::IsScalarWithElementType(parameter->shape(), F32));
+  EXPECT_FALSE(ShapeUtil::IsScalarWithElementType(parameter->shape(), S32));
   EXPECT_EQ(0, parameter->operand_count());
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc
index 2a07b6fcbc..2d5197be9e 100644
--- a/tensorflow/compiler/xla/service/hlo_query.cc
+++ b/tensorflow/compiler/xla/service/hlo_query.cc
@@ -24,7 +24,7 @@ namespace hlo_query {
 
 bool IsConstantR0F32(HloInstruction* instruction, float* out) {
   if (instruction->opcode() == HloOpcode::kConstant &&
-      ShapeUtil::IsScalarF32(instruction->shape())) {
+      ShapeUtil::IsScalarWithElementType(instruction->shape(), F32)) {
     *out = instruction->literal().Get<float>({});
     return true;
   }
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 7f0201942b..9267de3cfc 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -461,8 +461,9 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
   return ShapeUtil::IsArray(shape) && ElementsIn(shape) == 0;
 }
 
-/* static */ bool ShapeUtil::IsScalarF32(const Shape& shape) {
-  return shape.element_type() == F32 && Rank(shape) == 0;
+/* static */ bool ShapeUtil::IsScalarWithElementType(
+    const Shape& shape, PrimitiveType element_type) {
+  return IsScalar(shape) && shape.element_type() == element_type;
 }
 
 namespace {
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index d8bb27beae..73f541d505 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -312,7 +312,10 @@ class ShapeUtil {
   static bool IsEffectiveScalar(const Shape& shape) {
     return IsArray(shape) && TrueRank(shape) == 0;
   }
-  static bool IsScalarF32(const Shape& shape);
+
+  // Returns whether "shape" is a scalar (array) with the given element_type.
+  static bool IsScalarWithElementType(const Shape& shape,
+                                      PrimitiveType element_type);
 
   // Extracts the size of the shape's dimension at dimension number
   // GetDimensionNumber(dimension_number).
-- 
GitLab


From 783627bf63cdfa467e7811f2bf8330555d66f313 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 15:55:18 -0700
Subject: [PATCH 1273/1357] Convert TensorFlow's aws dependency to new third
 party import method.

PiperOrigin-RevId: 216265275
---
 tensorflow/workspace.bzl                   | 14 +++-----------
 third_party/aws/BUILD                      |  1 +
 third_party/{aws.BUILD => aws/BUILD.bazel} |  0
 third_party/aws/workspace.bzl              | 15 +++++++++++++++
 4 files changed, 19 insertions(+), 11 deletions(-)
 create mode 100644 third_party/aws/BUILD
 rename third_party/{aws.BUILD => aws/BUILD.bazel} (100%)
 create mode 100644 third_party/aws/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index adeac62e43..40c226a861 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -20,12 +20,15 @@ load(
     "//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl",
     "def_file_filter_configure",
 )
+load("//third_party/aws:workspace.bzl", aws = "repo")
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
 load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
 load("//third_party/nasm:workspace.bzl", nasm = "repo")
 
 def initialize_third_party():
+    """ Load third party repositories.  See above load() statements. """
+    aws()
     flatbuffers()
     icu()
     jpeg()
@@ -585,17 +588,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "aws",
-        build_file = clean_dep("//third_party:aws.BUILD"),
-        sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
-        strip_prefix = "aws-sdk-cpp-1.3.15",
-        urls = [
-            "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
-            "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
-        ],
-    )
-
     java_import_external(
         name = "junit",
         jar_sha256 = "59721f0805e223d84b90677887d9ff567dc534d7c502ca903c0c2b17f05c116a",
diff --git a/third_party/aws/BUILD b/third_party/aws/BUILD
new file mode 100644
index 0000000000..2f5d02becb
--- /dev/null
+++ b/third_party/aws/BUILD
@@ -0,0 +1 @@
+# Dummy BUILD file to make this directory a package.
diff --git a/third_party/aws.BUILD b/third_party/aws/BUILD.bazel
similarity index 100%
rename from third_party/aws.BUILD
rename to third_party/aws/BUILD.bazel
diff --git a/third_party/aws/workspace.bzl b/third_party/aws/workspace.bzl
new file mode 100644
index 0000000000..c216638154
--- /dev/null
+++ b/third_party/aws/workspace.bzl
@@ -0,0 +1,15 @@
+"""loads the aws library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "aws",
+        urls = [
+            "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
+            "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
+        ],
+        sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
+        strip_prefix = "aws-sdk-cpp-1.3.15",
+        build_file = "//third_party/aws:BUILD.bazel",
+    )
-- 
GitLab


From 46d296b2d03ddbb6f0723d213fdfa9c5226e1e2a Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 8 Oct 2018 16:24:49 -0700
Subject: [PATCH 1274/1357] Internal change

PiperOrigin-RevId: 216270385
---
 tensorflow/contrib/lite/build_def.bzl | 40 +++++++++++++++++++++++----
 tensorflow/contrib/lite/testing/BUILD |  4 +--
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 7ef26de69f..b9e933a8b6 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -212,7 +212,8 @@ def json_to_tflite(name, src, out):
 
 # This is the master list of generated examples that will be made into tests. A
 # function called make_XXX_tests() must also appear in generate_examples.py.
-# Disable a test by commenting it out. If you do, add a link to a bug or issue.
+# Disable a test by adding it to the blacklists specified in
+# generated_test_models_failing().
 def generated_test_models():
     return [
         "add",
@@ -291,12 +292,38 @@ def generated_test_models():
         "tile",
         "topk",
         "transpose",
-        #"transpose_conv",   # disabled due to b/111213074
+        "transpose_conv",
         "unpack",
         "where",
         "zeros_like",
     ]
 
+# List of models that fail generated tests for the conversion mode.
+# If you have to disable a test, please add here with a link to the appropriate
+# bug or issue.
+def generated_test_models_failing(conversion_mode):
+    if not conversion_mode:
+        return [
+            "transpose_conv",  # disabled due to b/111213074
+        ]
+
+    if conversion_mode == "toco-flex":
+        # TODO(b/117328698): Fix and enable the known flex failures.
+        return [
+            "arg_min_max",
+            "div",
+            "floor_div",
+            "gather ",
+            "lstm ",
+            "resize_bilinear",
+            "space_to_batch_nd",
+            "split",
+            "transpose",
+            "unpack",
+        ]
+
+    return []
+
 def generated_test_conversion_modes():
     """Returns a list of conversion modes."""
 
@@ -313,10 +340,14 @@ def generated_test_models_all():
     tests = generated_test_models()
     options = []
     for conversion_mode in conversion_modes:
+        failing_tests = generated_test_models_failing(conversion_mode)
         for test in tests:
+            tags = []
+            if test in failing_tests:
+                tags.append("notap")
             if conversion_mode:
                 test += "_%s" % conversion_mode
-            options.append((conversion_mode, test))
+            options.append((conversion_mode, test, tags))
     return options
 
 def gen_zip_test(name, test_name, conversion_mode, **kwargs):
@@ -336,9 +367,6 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs):
         # if conversion_mode == "pb2lite":
         #     toco = "//tensorflow/contrib/lite/experimental/pb2lite:pb2lite"
         flags = "--ignore_toco_errors --run_with_flex"
-        kwargs["tags"].append("skip_already_failing")
-        kwargs["tags"].append("no_oss")
-        kwargs["tags"].append("notap")
 
     gen_zipped_test_file(
         name = "zip_%s" % test_name,
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index f0bfec2338..45baad782a 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -35,7 +35,7 @@ load(
         ":zip_%s" % test_name,
     ],
     shard_count = 20,
-    tags = [
+    tags = tags + [
         "gen_zip_test",
         "no_oss",
         "tflite_not_portable_intentional",
@@ -61,7 +61,7 @@ load(
             "//tensorflow/core:android_tensorflow_test_lib",
         ],
     }),
-) for conversion_mode, test_name in generated_test_models_all()]
+) for conversion_mode, test_name, tags in generated_test_models_all()]
 
 test_suite(
     name = "generated_zip_tests",
-- 
GitLab


From 8815f34385eb28f1cfcb53bebd526c11573f3027 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 8 Oct 2018 16:25:40 -0700
Subject: [PATCH 1275/1357] Avoid calling get_default_graph() during
 tf.enable_eager_execution()

PiperOrigin-RevId: 216270497
---
 tensorflow/python/framework/ops.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 77c2bc930e..140bd098a6 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -5457,8 +5457,7 @@ def enable_eager_execution_internal(config=None,
         "tf.contrib.eager.ASYNC")
   if context.default_execution_mode == context.GRAPH_MODE:
     graph_mode_has_been_used = (
-        _default_session_stack.stack
-        or len(get_default_graph().get_operations()) > 0)  # pylint: disable=g-explicit-length-test
+        _default_graph_stack._global_default_graph is not None) # pylint: disable=protected-access
     if graph_mode_has_been_used:
       raise ValueError(
           "tf.enable_eager_execution must be called at program startup.")
-- 
GitLab


From 49643265c3f1f279a93bd8bc3a126e11e979bc44 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 8 Oct 2018 17:14:47 -0700
Subject: [PATCH 1276/1357] Remove deprecations for some of the endpoints in
 ApiDef files. These changes are made according to
 https://github.com/tensorflow/community/pull/16.

I am keeping a few symbols deprecated not mentioned in the doc:
tf.diag - it seems best to keep it next to tf.linalg.diag, so that the two are easy to compare and decide which one to use. The plan is to rename tf.diag to tf.tensor_diag.
tf.is_nan - similar to tf.is_inf, tf.is_finite, tf.is_numeric_tensor which are all getting deprecated and replaced by symbols in tf.debugging.
tf.string_to_number - other string endpoints in root namespace are getting deprecated: for e.g. tf.substr, tf.string_join.
tf.dequantize - all quantization ops should be under tf.quantize. I probably missed this one.
tf.check_numerics - similar to other debugging ops that are getting moved to tf.debugging.
tf.squared_difference - moved to tf.math namespace and not as popular as some other math ops such as tf.add to justify keeping endpoint in root.
tf.decode_raw - similar to other ops such as tf.decode_csv that are getting moved to tf.io.decode_csv.

PiperOrigin-RevId: 216278010
---
 tensorflow/core/api_def/python_api/api_def_Acos.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Add.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_AsString.pbtxt     | 1 -
 tensorflow/core/api_def/python_api/api_def_Asin.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Atan.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Cos.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Equal.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Exp.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Floor.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Greater.pbtxt      | 1 -
 tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt | 1 -
 tensorflow/core/api_def/python_api/api_def_Less.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt    | 1 -
 tensorflow/core/api_def/python_api/api_def_Log.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt   | 1 -
 tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt   | 1 -
 tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt    | 1 -
 tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt      | 1 -
 tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt      | 1 -
 tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt     | 1 -
 tensorflow/core/api_def/python_api/api_def_Sin.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Tan.pbtxt          | 1 -
 29 files changed, 29 deletions(-)

diff --git a/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt b/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt
index 1fd8baf05f..f4d7f498b2 100644
--- a/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "acos"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt
index f7946652ef..e921f26d1e 100644
--- a/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "acosh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Add.pbtxt b/tensorflow/core/api_def/python_api/api_def_Add.pbtxt
index fb505a91ac..4c6f387ebd 100644
--- a/tensorflow/core/api_def/python_api/api_def_Add.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Add.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "add"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt b/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt
index ea65543a76..d51defc376 100644
--- a/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "as_string"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt b/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt
index eedf4553c6..b13f5c398f 100644
--- a/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "asin"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt
index 10c2fb356e..89a3f9da44 100644
--- a/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "asinh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt
index 03dd5dc848..4403a2379c 100644
--- a/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "atan"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt
index 85b27bd881..56eed0f0fb 100644
--- a/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "atan2"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt
index ee7c0600d6..a8f5e792f0 100644
--- a/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "atanh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt
index 1af8c0c2c9..db52d25ff2 100644
--- a/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "cos"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt
index 2de87df40d..74bf573565 100644
--- a/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "cosh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt b/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt
index 78aa1b3bc5..34717e74bc 100644
--- a/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
index 70323fe5b4..38a9078d9f 100644
--- a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "exp"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt b/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt
index 9b93caa0b1..14accd2b20 100644
--- a/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "floor"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt b/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt
index 7de60d44c4..7926deaa3b 100644
--- a/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "greater"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt
index 9c8975c2a9..21bbb1b094 100644
--- a/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "greater_equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Less.pbtxt b/tensorflow/core/api_def/python_api/api_def_Less.pbtxt
index 055df2922a..0b5f06e99f 100644
--- a/tensorflow/core/api_def/python_api/api_def_Less.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Less.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "less"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt
index d2803ddb69..afc4f2a8c9 100644
--- a/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "less_equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
index 26d2473b9c..ac4a4454c7 100644
--- a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "log"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
index d85b6dccec..5a2d77a417 100644
--- a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "log1p"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
index 80bd98b740..d4e6a7a380 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "logical_and"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
index b2244c44b1..49068738a4 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "logical_not"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt
index cf78b52e07..a5133962dc 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "logical_or"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
index bcff379b71..130729ece1 100644
--- a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "maximum"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
index 9aae74226a..8aded1f154 100644
--- a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "minimum"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt
index f37317854f..07fe3b6af1 100644
--- a/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "not_equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt
index 9c19a1a177..a2b776ee0c 100644
--- a/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "sin"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt
index 155e58e6d5..38c7c729bf 100644
--- a/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "sinh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt b/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt
index ffa92f5580..20cfac05fd 100644
--- a/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "tan"
-    deprecated: true
   }
 }
-- 
GitLab


From 03d097bc96080981098ffdbaf1b3465e6e153a6a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 17:33:22 -0700
Subject: [PATCH 1277/1357] Consolidate device parameter arguments into a
 shared DeviceInfo struct

PiperOrigin-RevId: 216280197
---
 tensorflow/core/grappler/costs/cost_estimator.h           | 5 +++++
 tensorflow/core/grappler/costs/op_level_cost_estimator.cc | 2 +-
 tensorflow/core/grappler/costs/op_level_cost_estimator.h  | 6 ------
 tensorflow/python/grappler/cluster.i                      | 2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index e91f0cc9da..569d9da683 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -30,6 +30,11 @@ struct GrapplerItem;
 constexpr int64 kMemoryUnknown = -1ll;
 constexpr int64 kZeroMemory = 0ll;
 
+struct DeviceInfo {
+  double gigaops;     // Billions of operations executed per second.
+  double gb_per_sec;  // Bandwidth to main memory in GB per second.
+};
+
 // Holds the set of things we might want to estimate or measure in Grappler.
 // Always produce execution time. Other fields are optional depending on the
 // estimator being used.
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 71f4d9fd05..f363f2915f 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -372,7 +372,7 @@ Costs OpLevelCostEstimator::PredictCosts(const OpContext& op_context) const {
   return costs;
 }
 
-OpLevelCostEstimator::DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
+DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
     const DeviceProperties& device) const {
   double gflops = -1;
   double gb_per_sec = -1;
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index a277dfdf65..dd1ee39cb2 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -40,12 +40,6 @@ class OpLevelCostEstimator {
 
   virtual Costs PredictCosts(const OpContext& op_context) const;
 
-  // Basic device performance info, sufficient for roofline estimate.
-  struct DeviceInfo {
-    double gigaops;     // Billions of operations executed per second.
-    double gb_per_sec;  // Bandwidth to main memory in GB per second.
-  };
-
   // Returns basic device performance info.
   virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const;
 
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 6816e20407..87795ffcfb 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -308,7 +308,7 @@ static PyObject* TF_GetSupportedDevices(GCluster cluster, GItem item) {
 
 static double TF_EstimatePerformance(const tensorflow::NamedDevice& device) {
   tensorflow::grappler::OpLevelCostEstimator estimator;
-  tensorflow::grappler::OpLevelCostEstimator::DeviceInfo info =
+  tensorflow::grappler::DeviceInfo info =
       estimator.GetDeviceInfo(device.properties());
   return info.gigaops;
 }
-- 
GitLab


From 4ff7b81514ea1b86295bc74b620e3c1d3e127e6f Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 17:37:44 -0700
Subject: [PATCH 1278/1357] Fix the seeding for `Dataset.shuffle(...,
 reshuffle_each_iteration=False)`.

Previously, we were passing the first (graph-level) seed for both the
graph-level and op-level seeds when creating a C++ dataset. This
change passes the op-level seed to the appropriate point, and adds a test
for the behavior with graph-but-not-op-level seeds.

PiperOrigin-RevId: 216280641
---
 .../core/kernels/data/shuffle_dataset_op.cc   |  2 +-
 tensorflow/python/data/kernel_tests/BUILD     |  3 ++
 .../kernel_tests/shuffle_dataset_op_test.py   | 35 ++++++++++++++++++-
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 66466d6a36..9f54c381a9 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
                      int64 buffer_size, int64 seed, int64 seed2, int64 count)
         : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
-          seed2_(seed) {}
+          seed2_(seed2) {}
 
     string DebugString() const override {
       return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_,
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index c7295d6e69..671b7ca1bb 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -443,12 +443,15 @@ tf_py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     additional_deps = [
         ":test_base",
+        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:random_seed",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index 347af18576..8694f58a24 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import collections
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.kernel_tests import test_base
@@ -27,11 +28,13 @@ from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ShuffleDatasetTest(test_base.DatasetTestBase):
+class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testShuffleDataset(self):
     components = (
@@ -209,5 +212,35 @@ class ShuffleDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
+  @parameterized.named_parameters(
+      ("ReshuffleGraphLevelSeed", True, 38, None),
+      ("ReshuffleOpLevelSeed", True, None, 42),
+      ("ReshuffleGraphAndOpLevelSeed", True, 38, 42),
+      ("NoReshuffleGraphLevelSeed", False, 38, None),
+      ("NoReshuffleOpLevelSeed", False, None, 42),
+      ("NoReshuffleGraphAndOpLevelSeed", False, 38, 42),
+  )
+  def testShuffleSeed(self, reshuffle, graph_level_seed, op_level_seed):
+    results = []
+    for _ in range(2):
+      with ops.Graph().as_default() as g:
+        random_seed.set_random_seed(graph_level_seed)
+        dataset = dataset_ops.Dataset.range(10).shuffle(
+            10, seed=op_level_seed, reshuffle_each_iteration=reshuffle).repeat(
+                3)
+        iterator = dataset.make_one_shot_iterator()
+        next_element = iterator.get_next()
+
+        run_results = []
+        with self.session(graph=g) as sess:
+          for _ in range(30):
+            run_results.append(sess.run(next_element))
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(next_element)
+        results.append(run_results)
+
+    self.assertAllEqual(results[0], results[1])
+
+
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 934fde5b8c60987db36438ab4f70f8a91bce306b Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Mon, 8 Oct 2018 17:40:07 -0700
Subject: [PATCH 1279/1357] Register int64 SUM GPU kernel.

PiperOrigin-RevId: 216280913
---
 tensorflow/core/kernels/reduction_ops_sum.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc
index 5318d8c133..cf0d0f5c71 100644
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@@ -51,6 +51,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
           .HostMemory("reduction_indices"),                                    \
       ReductionOp<GPUDevice, type, int64, Eigen::internal::SumReducer<type>>);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_int64(REGISTER_GPU_KERNELS);
 TF_CALL_complex64(REGISTER_GPU_KERNELS);
 TF_CALL_complex128(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
-- 
GitLab


From d58712b7fc8de0e1f87fe2ea5221bc3c85230ed3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 18:12:42 -0700
Subject: [PATCH 1280/1357] Add a tracing::ScopedActivity event to track the
 duration of a Session::Run() call for better xprof tracing. Also annotate
 synchronous op execution with the session-run id (or step_id) as metadata
 leveraging the support introduced in cl/215985561. This should enable
 highlighting the duration of a Session::Run and all the ops that ran in it
 for visualizing latency regressions in the case of CPU inference.

PiperOrigin-RevId: 216284682
---
 tensorflow/core/common_runtime/direct_session.cc |  4 ++++
 tensorflow/core/common_runtime/executor.cc       | 12 ++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 458e133b68..52c1cd2691 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -64,6 +64,7 @@ limitations under the License.
 #include "tensorflow/core/platform/device_tracer.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/device_name_utils.h"
 #include "tensorflow/core/util/env_var.h"
@@ -453,6 +454,9 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
                                   CallFrameInterface* call_frame,
                                   ExecutorsAndKeys* executors_and_keys,
                                   RunMetadata* run_metadata) {
+  string session_id_meta = strings::StrCat("SessionRun #id=", step_id, "#");
+  tracing::ScopedActivity activity(session_id_meta);
+
   const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
 
   std::unique_ptr<DebuggerStateInterface> debugger_state;
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 40ec1502da..eb69d1991c 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -1771,14 +1771,18 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
             // The OpKernel may create child activities (such as GPU kernel
             // launches), so use a `ScopedAnnotation` to relate these activities
             // in the trace.
-            tracing::ScopedAnnotation activity(op_name,
-                                               op_kernel->type_string());
+            tracing::ScopedAnnotation activity(
+                op_name, strings::StrCat(op_kernel->type_string(),
+                                         "#id=", step_id_, "#"));
             device->Compute(op_kernel, &ctx);
           } else {
             // Use the cheaper `ScopedActivity` to trace just the OpKernel
             // execution.
-            tracing::ScopedActivity activity(op_name, op_kernel->type_string(),
-                                             item.kernel_is_expensive);
+            tracing::ScopedActivity activity(
+                op_name,
+                strings::StrCat(op_kernel->type_string(), "#id=", step_id_,
+                                "#"),
+                item.kernel_is_expensive);
             device->Compute(op_kernel, &ctx);
           }
         } else {
-- 
GitLab


From 375c109659d2d0e6265447dffdeb460693b3cccf Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Mon, 8 Oct 2018 21:18:36 -0700
Subject: [PATCH 1281/1357] [XLA] Introduce input/output alias config.

- This CL intruduces input/output alias config in HLO module that allows any HLO pass to configure it. Once the alias_config is set, each backend needs to follow the contract during execution time to make sure the input and output are indeed aliased.

- Copy insertion / buffer assignment and alias analysis has been updated to correctly honor the config and avoid any possible liveness interference.

PiperOrigin-RevId: 216299501
---
 tensorflow/compiler/xla/service/BUILD         |  21 ++
 .../compiler/xla/service/buffer_assignment.cc |  34 ++--
 .../compiler/xla/service/buffer_value.h       |   3 +
 .../compiler/xla/service/copy_insertion.cc    |  85 +++++++-
 .../xla/service/copy_insertion_test.cc        | 183 +++++++++++++++++
 tensorflow/compiler/xla/service/hlo.proto     |  29 +++
 .../xla/service/hlo_alias_analysis.cc         |  46 ++++-
 .../xla/service/hlo_alias_analysis_test.cc    | 175 +++++++++++++++++
 .../xla/service/hlo_dataflow_analysis.cc      |   2 +-
 .../service/hlo_input_output_alias_config.cc  | 172 ++++++++++++++++
 .../service/hlo_input_output_alias_config.h   | 101 ++++++++++
 .../hlo_input_output_alias_config_test.cc     | 184 ++++++++++++++++++
 tensorflow/compiler/xla/service/hlo_module.cc |   9 +
 tensorflow/compiler/xla/service/hlo_module.h  |  14 ++
 .../compiler/xla/service/hlo_verifier.cc      |   2 +
 tensorflow/compiler/xla/shape_util.h          |   2 +-
 16 files changed, 1037 insertions(+), 25 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 2b292ed053..26ebb88e96 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -294,6 +294,7 @@ cc_library(
     srcs = [
         "dfs_hlo_visitor.cc",
         "hlo_computation.cc",
+        "hlo_input_output_alias_config.cc",
         "hlo_instruction.cc",
         "hlo_instructions.cc",
         "hlo_module.cc",
@@ -308,6 +309,7 @@ cc_library(
         "hlo_clone_context.h",
         "hlo_computation.h",
         "hlo_domain_metadata.h",
+        "hlo_input_output_alias_config.h",
         "hlo_instruction.h",
         "hlo_instructions.h",
         "hlo_module.h",
@@ -1268,6 +1270,25 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "hlo_input_output_alias_config_test",
+    srcs = ["hlo_input_output_alias_config_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_dce",
+        ":hlo_memory_scheduler",
+        ":hlo_ordering",
+        ":hlo_parser",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+        "@com_google_absl//absl/algorithm:container",
+    ],
+)
+
 cc_library(
     name = "hlo_memory_scheduler",
     srcs = ["hlo_memory_scheduler.cc"],
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 2c2d1626c2..d5d6a044a8 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice(
 
 void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset,
                                      int64 size) {
-  VLOG(4) << "Trying to add " << buffer << " to " << this;
+  VLOG(4) << "Trying to add " << buffer << " to allocation #" << index();
   CHECK(assigned_buffers_.count(&buffer) == 0)
       << "LogicalBuffer " << buffer << " already assigned to allocation "
       << index_;
@@ -784,21 +784,6 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
     }
   }
 
-  if (allow_input_output_aliasing_ && allocation->maybe_live_out()) {
-    const HloComputation* entry_computation =
-        assignment->module_->entry_computation();
-    for (auto param : entry_computation->parameter_instructions()) {
-      for (auto& param_buffer :
-           assignment->points_to_analysis().GetBuffersDefinedByInstruction(
-               param)) {
-        if (assignment->liveness().MayInterfere(*param_buffer, buffer)) {
-          VLOG(4) << "Can't assign: Parameter interference with result";
-          return false;
-        }
-      }
-    }
-  }
-
   // If the buffer is live out of the computation then it should only be
   // assigned a buffer which exactly fits the result to avoid wasting memory
   // (result buffers can have arbitrary lifetimes).
@@ -1434,13 +1419,28 @@ BufferAssigner::MergeColocatedBufferSets(
 
 // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated
 // in the same allocation (currently just supports kWhile, kCall, and
-// kConditional).
+// kConditional and input output aliasing).
 void BufferAssigner::BuildColocatedBufferSets(
     const HloModule* module, const BufferLiveness& buffer_liveness,
     const LogicalBuffer::SizeFunction& buffer_size,
     std::vector<ColocatedBufferSet>* colocated_buffer_sets) {
   const TuplePointsToAnalysis& points_to_analysis =
       buffer_liveness.points_to_analysis();
+
+  // Set up colocated buffer set for input and output.
+  module->input_output_alias_config().ForEachAlias(
+      [&](const ShapeIndex& output_index, int64 param_number,
+          const ShapeIndex& param_index) {
+        std::vector<const LogicalBuffer*> colocated_set;
+        AddBufferToColocatedSet(module->entry_computation()->root_instruction(),
+                                output_index, points_to_analysis,
+                                &colocated_set);
+        AddBufferToColocatedSet(
+            module->entry_computation()->parameter_instruction(param_number),
+            param_index, points_to_analysis, &colocated_set);
+        AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
+      });
+
   for (const HloComputation* computation : module->MakeComputationPostOrder()) {
     if (computation->IsFusionComputation()) {
       continue;
diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h
index 69b3646356..11d8abc5ba 100644
--- a/tensorflow/compiler/xla/service/buffer_value.h
+++ b/tensorflow/compiler/xla/service/buffer_value.h
@@ -141,6 +141,9 @@ class BufferValue {
   // operator< is required for std::set.
   bool operator<(const BufferValue& other) const { return id_ < other.id_; }
 
+  bool operator==(const BufferValue& other) const { return id_ == other.id_; }
+  bool operator!=(const BufferValue& other) const { return id_ != other.id_; }
+
   virtual string ToString() const = 0;
 
   // TODO(lauj) rename LogicalBufferProto to BufferValueProto.
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index f35324aa35..cfe025fdd1 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -40,10 +40,12 @@ namespace {
 
 using absl::StrAppend;
 
-bool IsEntryParameterValue(const HloValue& value) {
+bool IsReadonlyEntryParameterValue(const HloValue& value) {
   const HloComputation* computation = value.defining_instruction()->parent();
   return value.defining_instruction()->opcode() == HloOpcode::kParameter &&
-         computation == computation->parent()->entry_computation();
+         computation == computation->parent()->entry_computation() &&
+         !computation->parent()->input_output_alias_config().ParameterHasAlias(
+             value.defining_instruction()->parameter_number());
 }
 
 bool IsConstantValue(const HloValue& value) {
@@ -51,7 +53,7 @@ bool IsConstantValue(const HloValue& value) {
 }
 
 bool ValueIsReadOnly(const HloValue& value) {
-  return IsConstantValue(value) || IsEntryParameterValue(value);
+  return IsConstantValue(value) || IsReadonlyEntryParameterValue(value);
 }
 
 // Data structure describing the action which should be taken on parts of a
@@ -332,6 +334,81 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis,
   return Status::OK();
 }
 
+// Conservatively adds copies before root instruction of entry computation and
+// each aliased parameter to resolve interference of aliased input and output
+// buffer. We later rely on the CopyRemover to drop the unnecessary ones.
+Status AddCopiesForAliasedInputOutputs(HloModule* module) {
+  HloComputation* entry = module->entry_computation();
+  HloInstruction* root = entry->root_instruction();
+
+  ShapeTree<bool> output_indices_to_copy(root->shape());
+  std::vector<ShapeTree<HloInstruction*>> copied_parameters;
+  bool has_alias = false;
+  for (auto* param : entry->parameter_instructions()) {
+    bool param_has_alias = false;
+    ShapeTree<bool> param_indices_to_copy(param->shape());
+
+    module->input_output_alias_config().ForEachAlias(
+        [&](const ShapeIndex& output_index, int64 param_number,
+            const ShapeIndex& param_index) {
+          if (param_number == param->parameter_number()) {
+            param_has_alias = true;
+            *(param_indices_to_copy.mutable_element(param_index)) = true;
+            *(output_indices_to_copy.mutable_element(output_index)) = true;
+          }
+        });
+
+    if (!param_has_alias) {
+      continue;
+    }
+
+    has_alias = true;
+    // Store a snapshot of users before DeepCopyInstruction, as
+    // DeepCopyInstruction introduces new users of the instruction.
+    std::vector<HloInstruction*> users = param->users();
+    ShapeTree<HloInstruction*> param_copy_tree(param->shape(),
+                                               /*init_value=*/nullptr);
+    TF_ASSIGN_OR_RETURN(HloInstruction * copied,
+                        entry->DeepCopyInstruction(
+                            param, &param_indices_to_copy, &param_copy_tree));
+    for (HloInstruction* user : users) {
+      TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied));
+    }
+
+    copied_parameters.push_back(param_copy_tree);
+  }
+
+  if (!has_alias) {
+    return Status::OK();
+  }
+
+  // Add copies before root instruction.
+  ShapeTree<HloInstruction*> output_copy_tree(root->shape(),
+                                              /*init_value=*/nullptr);
+
+  TF_ASSIGN_OR_RETURN(HloInstruction * root_copied,
+                      root->parent()->DeepCopyInstruction(
+                          root, &output_indices_to_copy, &output_copy_tree));
+
+  // Add control dependencies between the input/output copies.
+  TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus(
+      [&](const ShapeIndex& output_index, int64 param_number,
+          const ShapeIndex& input_index) -> Status {
+        HloInstruction* from =
+            copied_parameters[param_number].element(input_index);
+        HloInstruction* to = output_copy_tree.element(output_index);
+
+        TF_RET_CHECK(from != nullptr);
+        TF_RET_CHECK(to != nullptr);
+        TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to));
+        return Status::OK();
+      }));
+
+  entry->set_root_instruction(root_copied);
+
+  return Status::OK();
+}
+
 // Removes any control dependencies to or from the given instruction.
 Status StripControlDependenciesFrom(HloInstruction* instruction) {
   while (!instruction->control_successors().empty()) {
@@ -953,6 +1030,8 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) {
       }
     }
   }
+
+  TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 892d0d7b54..3096206c34 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -1351,6 +1351,189 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) {
   EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy()));
 }
 
+TEST_F(CopyInsertionTest, CrossingParameters) {
+  // Test a case where two parameters' dataflow cross with each other while
+  // input and output are aliased with same index:
+  //
+  //  (p0 ,  p1)
+  //   | \   /|
+  //   |  \ / |
+  // alias X  alias
+  //   |  / \ |
+  //   | /   \|
+  //  (p1  ,  p0)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 4);
+}
+
+TEST_F(CopyInsertionTest, ParametersAliasing) {
+  // Test a case where two parameters' dataflow don't interfere with each other
+  // while aliased.
+  //
+  //  (p0 ,  p1)
+  //   |      |
+  //   |      |
+  // alias   alias
+  //   |      |
+  //   |      |
+  //  (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+  InsertCopies(module.get());
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
+                        op::Copy(op::GetTupleElement(param, 1))));
+
+  EXPECT_EQ(CountCopies(*module), 2);
+}
+
+TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //  (p0 ,  p1)
+  //   |      |
+  //   |      |
+  // alias    |
+  //   |      |
+  //   |      |
+  //  (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
+                        op::Copy(op::GetTupleElement(param, 1))));
+
+  EXPECT_EQ(CountCopies(*module), 2);
+}
+
+TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //   +-- (p0 ,  p1)
+  //   |    |      |
+  //   |    |      |
+  // alias Negate  Negate
+  //   |    |      |
+  //   |    |      |
+  //   +-- (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+  builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 0);
+}
+
+TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //   +-- (p0 ,  p1)
+  //   |    |      |
+  //   |    |      |
+  // alias Negate  Negate
+  //   |    |      |
+  //   |    Add----+
+  //   |    |      |
+  //   +-- (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+
+  auto add = builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, negate0, negate1));
+  builder.AddInstruction(HloInstruction::CreateTuple({add, negate1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 0);
+}
+
 TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) {
   // Test a while instruction with a body which permutes its tuple parameter
   // elements and applies one operation to one of the elements. The addition of
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index a0eb9e6ddc..82c8fb1904 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -225,6 +225,32 @@ message HloScheduleProto {
   map<int64, InstructionSequence> sequences = 1;
 }
 
+message HloInputOutputAliasProto {
+  // The following proto describes a pair of aliased an input
+  // (described by parameter number and a ShapeIndex of the parameter)
+  // and an output (described by a ShapeIndex of the root
+  // instruction). For example:
+  //
+  // entry = {
+  //  output_shape_index={1},
+  //  parameter_number=0,
+  //  parameter_shape_index={1, 2},
+  // }
+  //
+  // This entry indicates that the first paremter's {1, 2} element is
+  // aliased with the {1} element of the root instruction.
+  message AliasEntryProto {
+    // ShapeIndex of the root hlo.
+    repeated int64 output_shape_index = 1;
+    // Number of the parameter in entry computation.
+    int64 parameter_number = 2;
+    // ShapeIndex of the parameter instruction.
+    repeated int64 parameter_shape_index = 3;
+  }
+
+  repeated AliasEntryProto entries = 1;
+}
+
 // Serialization of HloModule.
 message HloModuleProto {
   string name = 1;
@@ -243,6 +269,9 @@ message HloModuleProto {
 
   // The schedule for this module.
   HloScheduleProto schedule = 7;
+
+  // Describes alias information between inputs and outputs.
+  HloInputOutputAliasProto input_output_alias = 8;
 }
 
 // Serialization of LogicalBuffer.
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index c3da12e273..cf8e6594cb 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -59,8 +59,9 @@ class BufferValueMap {
   // construction process.
   using BufferNumber = int64;
 
-  explicit BufferValueMap(const HloDataflowAnalysis& dataflow)
-      : dataflow_(dataflow) {
+  explicit BufferValueMap(HloModule* module,
+                          const HloDataflowAnalysis& dataflow)
+      : module_(module), dataflow_(dataflow) {
     buffers_.reserve(dataflow_.values().size());
     value_to_buffer_number_.reserve(dataflow_.values().size());
     for (const HloValue* value : dataflow_.values()) {
@@ -171,6 +172,42 @@ class BufferValueMap {
     return value_to_buffer_number_.at(&value);
   }
 
+  void ComputeInputOutputAliasedBuffers(
+      const HloValue& value, std::vector<BufferNumber>* aliased_buffers) {
+    // Get parameter value from an aliased_input object.
+    const auto get_parameter_value =
+        [this](const std::pair<int64, ShapeIndex>& aliased_input)
+        -> const HloValue& {
+      int64 param_number = aliased_input.first;
+      const ShapeIndex& param_index = aliased_input.second;
+      return dataflow_.GetUniqueValueAt(
+          module_->entry_computation()->parameter_instruction(param_number),
+          param_index);
+    };
+
+    // If the value shows up in a root instruction, alias it with parameter
+    // intruction.
+    for (const HloPosition& pos : value.positions()) {
+      if (pos.instruction == module_->entry_computation()->root_instruction()) {
+        ShapeIndex output_index = pos.index;
+
+        auto aliased_input =
+            module_->input_output_alias_config().GetAliasedParameter(
+                output_index);
+        if (aliased_input) {
+          aliased_buffers->push_back(
+              GetBufferForValue(get_parameter_value(*aliased_input)));
+        }
+      }
+    }
+
+    // If the value is parameter instruction itself, alias it with itself.
+    if (value.instruction()->opcode() == HloOpcode::kParameter &&
+        value.instruction()->parent() == module_->entry_computation()) {
+      aliased_buffers->push_back(GetBufferForValue(value));
+    }
+  }
+
   void ComputeWhileAliasedBuffers(const HloValue& value,
                                   std::vector<BufferNumber>* aliased_buffers) {
     VLOG(3) << "Compute kWhile aliases";
@@ -278,6 +315,7 @@ class BufferValueMap {
       VLOG(2) << "Use of value " << value.ToShortString() << ": " << use;
     }
     std::vector<BufferNumber> aliased_buffers;
+    ComputeInputOutputAliasedBuffers(value, &aliased_buffers);
     ComputeWhileAliasedBuffers(value, &aliased_buffers);
     ComputeConditionalAliasedBuffers(value, &aliased_buffers);
     // Uniquify aliased buffers.
@@ -288,6 +326,8 @@ class BufferValueMap {
     return aliased_buffers;
   }
 
+  HloModule* module_;
+
   // Dataflow analysis used to construct the buffer map.
   const HloDataflowAnalysis& dataflow_;
 
@@ -461,7 +501,7 @@ StatusOr<std::unique_ptr<HloAliasAnalysis>> HloAliasAnalysis::Run(
                                                /*bitcast_defines_value=*/false,
                                                fusion_can_share_buffer));
 
-  BufferValueMap buffer_map(alias_analysis->dataflow_analysis());
+  BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis());
   buffer_map.MergeAliasedBuffers();
 
   // Create a vector of HloBuffers, one for each set of values in the
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
index 0cd0ab36fc..5c8d97b2d1 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
@@ -217,6 +217,181 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) {
   EXPECT_FALSE(AnyValuesInSameBufferInterfere());
 }
 
+TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) {
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  // Cannot alias an output twice.
+  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+}
+
+TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) {
+  // parameter 0 aliased with output 1 and parameter 1 aliased with output 0.
+  //
+  //  (p0 ,  p1)
+  //     \   /
+  //      \ /
+  // alias X
+  //      / \
+  //     /   \
+  //  (p0  ,  p1)
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
+
+  // Cannot alias an output twice.
+  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  // Every Ops in this graph are aliased with each other.
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+}
+
+TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) {
+  // Test a simple single while instruction can be aliased with input and output
+  // of the computation.
+  //
+  // body((F32[], F32[]) %tuple_param):
+  //   %add = Add(%tuple_param{0}, %tuple_param{1})
+  //   return Tuple(%tuple_param{0}, %add)
+  //
+  // condition((F32[], F32[]) %tuple_param):
+  //   return Constant(false)
+  //
+  // entry:
+  //   %param1 = param1
+  //   %while = While(%param1, body, condition)
+  //   %while_1 = GTE(%while, 0)
+  //   %while_2 = GTE(%while, 1)
+  //   %negate_1 = Negate(%while_1)
+  //   %negate_2 = Negate(%while_2)
+  //   return Tuple(negate_1, negate_2)
+  //
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  // Element 0 passes transparently through the body.
+  auto body_builder = HloComputation::Builder("body");
+  auto body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "param"));
+  auto body_element_0 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
+  auto body_element_1 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
+  auto add = body_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1));
+  auto body_tuple = body_builder.AddInstruction(
+      HloInstruction::CreateTuple({body_element_0, add}));
+  HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build());
+
+  // Condition computation trivially returns a constant "false".
+  auto cond_builder = HloComputation::Builder("condition");
+  auto cond_param = cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "param"));
+  cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(false)));
+  HloComputation* condition =
+      module_->AddEmbeddedComputation(cond_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+
+  auto xla_while = builder.AddInstruction(
+      HloInstruction::CreateWhile(tuple_shape, condition, body, param));
+  auto while_element_1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0));
+  auto while_element_2 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1));
+  auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape_, HloOpcode::kNegate, while_element_1));
+  auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape_, HloOpcode::kNegate, while_element_2));
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  EXPECT_THAT(
+      GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})),
+      UnorderedElementsAre(GetValueDefinedAt(param, {1}),
+                           GetValueDefinedAt(xla_while, /*index=*/{1}),
+                           GetValueDefinedAt(body_param, {1}),
+                           GetValueDefinedAt(cond_param, {1}),
+                           GetValueDefinedAt(add),
+                           GetValueDefinedAt(negate_2)));
+
+  EXPECT_THAT(
+      analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(),
+      UnorderedElementsAre(
+          HloPosition{param, {1}}, HloPosition{xla_while, {1}},
+          HloPosition{while_element_2, {}}, HloPosition{body_param, {1}},
+          HloPosition{body_element_1, {}}, HloPosition{add, {}},
+          HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}},
+          HloPosition{cond_param, {1}}, HloPosition{negate_2, {}}));
+
+  EXPECT_FALSE(AnyValuesInSameBufferInterfere());
+}
+
 TEST_F(HloAliasAnalysisTest, SingleCall) {
   // Test a single call of a subcomputation. The subcomputation adds its two
   // array-shaped parameters.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index c22adcdd8d..f401eac016 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction,
 
 const HloValue& HloDataflowAnalysis::GetValueDefinedAt(
     const HloInstruction* instruction, const ShapeIndex& index) const {
-  CHECK(ValueIsDefinedAt(instruction, index));
+  CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString();
   return GetUniqueValueAt(instruction, index);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
new file mode 100644
index 0000000000..9ad98e5038
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
@@ -0,0 +1,172 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace xla {
+Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index,
+                                             int64 param_number,
+                                             const ShapeIndex& param_index) {
+  // Output can't be aliased with multiple parameters.
+  TF_RET_CHECK(!alias_.element(output_index));
+  (*alias_.mutable_element(output_index)) =
+      std::make_pair(param_number, param_index);
+  return Status::OK();
+}
+
+HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const {
+  HloInputOutputAliasProto result;
+  alias_.ForEachElement(
+      [&](const ShapeIndex& index,
+          const absl::optional<std::pair<int64, ShapeIndex>>& data) {
+        if (data) {
+          HloInputOutputAliasProto::AliasEntryProto entry;
+          for (int64 i : index) {
+            entry.add_output_shape_index(i);
+          }
+          entry.set_parameter_number(data->first);
+          for (int64 i : data->second) {
+            entry.add_parameter_shape_index(i);
+          }
+          result.add_entries()->Swap(&entry);
+        }
+      });
+  return result;
+}
+
+StatusOr<HloInputOutputAliasConfig> HloInputOutputAliasConfig::CreateFromProto(
+    const HloModule* module, const HloInputOutputAliasProto& proto) {
+  HloInputOutputAliasConfig result(
+      module->entry_computation()->root_instruction()->shape());
+  for (const HloInputOutputAliasProto::AliasEntryProto& entry :
+       proto.entries()) {
+    ShapeIndex output_index(entry.output_shape_index().begin(),
+                            entry.output_shape_index().end());
+
+    int64 param_number = entry.parameter_number();
+    ShapeIndex param_index(entry.parameter_shape_index().begin(),
+                           entry.parameter_shape_index().end());
+    TF_RETURN_IF_ERROR(
+        result.SetUpAlias(output_index, param_number, param_index));
+  }
+
+  return result;
+}
+
+string HloInputOutputAliasConfig::ToString() const {
+  std::vector<string> pieces;
+  pieces.push_back("HloInputOutputAliasConfig");
+
+  ForEachAlias([&](const ShapeIndex& output_index, int64 param_number,
+                   const ShapeIndex& param_index) {
+    pieces.push_back(absl::StrFormat(
+        "  OutputIndex %s is aliased with parameter %lld at %s:",
+        output_index.ToString(), param_number, param_index.ToString()));
+  });
+
+  return absl::StrJoin(pieces, "\n");
+}
+
+bool HloInputOutputAliasConfig::ParameterHasAlias(int64 param_number) const {
+  bool output = false;
+  alias_.ForEachElement(
+      [&](const xla::ShapeIndex&,
+          absl::optional<std::pair<int64, ShapeIndex>> alias) {
+        if (alias && alias->first == param_number) {
+          output = true;
+        }
+      });
+  return output;
+}
+
+absl::optional<ShapeIndex> HloInputOutputAliasConfig::GetAliasedOutput(
+    int64 param_number, const ShapeIndex& param_index) const {
+  absl::optional<ShapeIndex> output;
+  alias_.ForEachElement(
+      [&](const xla::ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> alias) {
+        if (alias && alias->first == param_number &&
+            alias->second == param_index) {
+          output = output_index;
+        }
+      });
+  return output;
+}
+
+absl::optional<std::pair<int64, ShapeIndex>>
+HloInputOutputAliasConfig::GetAliasedParameter(
+    const ShapeIndex& output_index) const {
+  CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index));
+  return alias_.element(output_index);
+}
+
+void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const {
+  alias_.ForEachElement(
+      [&](const ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
+        if (aliased) {
+          fn(output_index, aliased->first, aliased->second);
+        }
+      });
+}
+
+Status HloInputOutputAliasConfig::ForEachAliasWithStatus(
+    AliasFnWithStatus fn) const {
+  return alias_.ForEachElementWithStatus(
+      [&](const ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
+        if (aliased) {
+          TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second));
+        }
+        return Status::OK();
+      });
+}
+
+Status HloInputOutputAliasConfig::Verify(const HloModule& module) const {
+  std::vector<ShapeTree<bool>> param_has_seen;
+  const HloComputation* entry = module.entry_computation();
+  for (int64 i = 0; i < entry->num_parameters(); ++i) {
+    HloInstruction* param = entry->parameter_instruction(i);
+    param_has_seen.emplace_back(param->shape());
+  }
+  return ForEachAliasWithStatus([&](const ShapeIndex& output_index,
+                                    int64 param_number,
+                                    const ShapeIndex& param_index) -> Status {
+    const HloInstruction* root = entry->root_instruction();
+
+    const Shape& param_shape =
+        entry->parameter_instruction(param_number)->shape();
+    const Shape& output_shape = root->shape();
+    TF_RET_CHECK(entry->num_parameters() > param_number);
+    TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index));
+    TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index));
+
+    // Check each param_number and param_index pair only show up once. No
+    // input can be aliased with output buffers.
+    TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false);
+
+    *(param_has_seen[param_number].mutable_element(param_index)) = true;
+
+    return Status::OK();
+  });
+}
+
+std::ostream& operator<<(std::ostream& out,
+                         const HloInputOutputAliasConfig& config) {
+  out << config.ToString();
+  return out;
+}
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
new file mode 100644
index 0000000000..02c46f65c8
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
@@ -0,0 +1,101 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
+
+#include <utility>
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/shape_tree.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+
+namespace xla {
+
+class HloModule;
+
+// This class specifies the alias map from output index to parameter number and
+// parameter index in the entry computation.
+class HloInputOutputAliasConfig {
+ public:
+  HloInputOutputAliasConfig() = default;
+
+  explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {}
+
+  virtual ~HloInputOutputAliasConfig() = default;
+
+  // Sets up alias config from `output_index` to `param_index` at
+  // `param_number`.
+  Status SetUpAlias(const ShapeIndex& output_index, int64 param_number,
+                    const ShapeIndex& param_index);
+
+  // Returns true if the given parameter is aliased with one of the output
+  // buffers.
+  bool ParameterHasAlias(int64 param_number) const;
+
+  // (De)Serializes an HloInputOutoutAliasConfig to/from an
+  // HloInputOutoutAliasProto.
+  HloInputOutputAliasProto ToProto() const;
+
+  static StatusOr<HloInputOutputAliasConfig> CreateFromProto(
+      const HloModule* module, const HloInputOutputAliasProto& proto);
+
+  // Returns the output index that the given parameter and parameter index is
+  // aliased with. A nullopt is returned if there is no output that is aliased
+  // with the parameter number and index.
+  absl::optional<ShapeIndex> GetAliasedOutput(
+      int64 param_number, const ShapeIndex& param_index) const;
+
+  // Returns the number of parameter and index of the parameter buffer that the
+  // given output buffer index is aliased with. A nullopt is returned if there
+  // is no parameter is aliased with the specific output.
+  absl::optional<std::pair<int64, ShapeIndex>> GetAliasedParameter(
+      const ShapeIndex& output_index) const;
+
+  using AliasFn =
+      std::function<void(const ShapeIndex& output_index, int64 param_number,
+                         const ShapeIndex& param_index)>;
+
+  // Iterates through each aliased output and input.
+  void ForEachAlias(AliasFn fn) const;
+
+  using AliasFnWithStatus =
+      std::function<Status(const ShapeIndex& output_index, int64 param_number,
+                           const ShapeIndex& param_index)>;
+
+  // Verifies that the given config is valid for the given module.
+  // Specifically, the config's input and output should be in-bound and size of
+  // the aliased buffers should match.
+  Status Verify(const HloModule& module) const;
+
+  Status ForEachAliasWithStatus(AliasFnWithStatus fn) const;
+
+  string ToString() const;
+
+ private:
+  // A ShapeTree which indicates the list of buffers that's expected to be
+  // aliased. The key on this shape tree represents the output index. The value
+  // is a pair of parameter number and index into the buffer. If the value is
+  // nullopt, it means there is no parameter aliasing for this output.
+  ShapeTree<absl::optional<std::pair<int64, ShapeIndex>>> alias_;
+};
+
+std::ostream& operator<<(std::ostream& out,
+                         const HloInputOutputAliasConfig& config);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
new file mode 100644
index 0000000000..3b61ff04e6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
@@ -0,0 +1,184 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
+
+#include <memory>
+#include <string>
+
+#include "absl/algorithm/container.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_ordering.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+class HloInputOutputAliasConfigTest : public HloTestBase {
+ protected:
+  void expect_aliased(const ShapeIndex& output_index, int64 param_number,
+                      const ShapeIndex& param_index,
+                      const HloInputOutputAliasConfig& config) {
+    absl::optional<ShapeIndex> aliased_output =
+        config.GetAliasedOutput(param_number, param_index);
+
+    EXPECT_TRUE(aliased_output);
+    EXPECT_EQ(aliased_output.value(), output_index);
+
+    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
+        config.GetAliasedParameter(output_index);
+
+    EXPECT_TRUE(aliased_param);
+    EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index));
+  }
+
+  void expect_not_aliased(const ShapeIndex& output_index, int64 param_number,
+                          const ShapeIndex& param_index,
+                          const HloInputOutputAliasConfig& config) {
+    absl::optional<ShapeIndex> aliased_output =
+        config.GetAliasedOutput(param_number, param_index);
+
+    EXPECT_FALSE(aliased_output && aliased_output == output_index);
+
+    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
+        config.GetAliasedParameter(output_index);
+
+    EXPECT_FALSE(aliased_param && aliased_param->first == param_number &&
+                 aliased_param->second == param_index);
+  }
+};
+
+TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
+                                 /*param_index=*/{}));
+
+  expect_aliased(/*output_index=*/{0}, /*param_number=*/1,
+                 /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
+                     /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                     /*param_index=*/{}, config);
+}
+
+TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  param = (f32[], f32[]) parameter(0)
+  gte1 = f32[] get-tuple-element(%param), index=0
+  gte2 = f32[] get-tuple-element(%param), index=1
+  ROOT root = (f32[], f32[]) tuple(%gte1, %gte2)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{0}));
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
+                                 /*param_index=*/{1}));
+
+  expect_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                 /*param_index=*/{0}, config);
+
+  expect_aliased(/*output_index=*/{1}, /*param_number=*/0,
+                 /*param_index=*/{1}, config);
+
+  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
+                     /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                     /*param_index=*/{}, config);
+}
+
+TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  ASSERT_IS_NOT_OK(config.Verify(*module));
+}
+
+TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
+                                     /*param_index=*/{}));
+}
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 93e04eb3db..547f74a0ed 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -73,6 +73,8 @@ HloComputation* HloModule::AddComputationInternal(
       config_.SetDefaultComputationLayout(
           entry_computation_->ComputeProgramShape());
     }
+    input_output_alias_config_ = HloInputOutputAliasConfig(
+        entry_computation_->root_instruction()->shape());
   }
 
   if (uniquify_identifiers) {
@@ -252,6 +254,9 @@ HloModuleProto HloModule::ToProto() const {
   if (has_schedule()) {
     *proto.mutable_schedule() = schedule().ToProto().ValueOrDie();
   }
+
+  *proto.mutable_input_output_alias() = input_output_alias_config().ToProto();
+
   return proto;
 }
 
@@ -328,6 +333,10 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
   }
   TF_RET_CHECK(module->entry_computation_ != nullptr);
 
+  TF_ASSIGN_OR_RETURN(module->input_output_alias_config_,
+                      HloInputOutputAliasConfig::CreateFromProto(
+                          module.get(), proto.input_output_alias()));
+
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
   absl::flat_hash_set<string> computation_names;
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 735804e827..9b9dc3ba9f 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_clone_context.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
@@ -212,6 +213,15 @@ class HloModule {
     return result;
   }
 
+  // input_output_alias_config indicates the list of aliased buffers that are
+  // expected from the module.
+  HloInputOutputAliasConfig& input_output_alias_config() {
+    return input_output_alias_config_;
+  }
+  const HloInputOutputAliasConfig& input_output_alias_config() const {
+    return input_output_alias_config_;
+  }
+
   // Returns the number of unique intruction ids given out.  All ids up to
   // this point are guaranteed to be in the range [0..NumUniqueInstructionIds())
   int NumUniqueInstructionIds() const { return next_unique_id_; }
@@ -284,6 +294,10 @@ class HloModule {
   // sequential order of instructions for each non-fusion computation in the
   // module.
   absl::optional<HloSchedule> schedule_;
+
+  // alias_config indicates the alias information of input/output buffers that
+  // are expected from the module.
+  HloInputOutputAliasConfig input_output_alias_config_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index be3bee5975..2902a11a42 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1220,6 +1220,8 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     TF_RETURN_IF_ERROR(module->schedule().Verify());
   }
 
+  TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module));
+
   return false;
 }
 
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 73f541d505..51cedce7f0 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -72,7 +72,7 @@ class ShapeIndex {
   void push_back(int64 value) { indices_.push_back(value); }
   void pop_back() { indices_.pop_back(); }
 
-  // push_front is O(n^2), but shapes don't usually have a ton of dimensions.
+  // push_front is O(n), but shapes don't usually have a ton of dimensions.
   void push_front(int64 value) { indices_.insert(indices_.begin(), value); }
 
   using container_type = absl::InlinedVector<int64, 2>;
-- 
GitLab


From a593c6885bec8c545665ec2f25d794777be55ba9 Mon Sep 17 00:00:00 2001
From: Reed Wanderman-Milne <reedwm@google.com>
Date: Mon, 8 Oct 2018 21:23:08 -0700
Subject: [PATCH 1282/1357] Automated rollback of commit
 07df147ab20c4a5329148e5fb5f7f6b187cb73a4

PiperOrigin-RevId: 216299809
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 7488cedec5..225c0a91e3 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -114,8 +114,7 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
   MK_OPT("scoped_allocator",
          new ScopedAllocatorOptimizer(cfg_.scoped_allocator_optimization(),
                                       cfg_.scoped_allocator_opts()));
-  MK_OPT("pin_to_host",
-         new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
+  MK_OPT("small_op", new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
 
   return std::unique_ptr<GraphOptimizer>();
 }
@@ -162,7 +161,7 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
-  if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) {
+  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
@@ -592,7 +591,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
-         cfg.pin_to_host_optimization() != RewriterConfig::OFF ||
+         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
-- 
GitLab


From d1f0494b89a31298df7743018c0a3fa388ac16a2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 22:13:04 -0700
Subject: [PATCH 1283/1357] Add Floor_mod to schema.

PiperOrigin-RevId: 216303340
---
 tensorflow/contrib/lite/builtin_ops.h         |   1 +
 .../lite/core/api/flatbuffer_conversions.cc   |   1 +
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   5 +
 .../contrib/lite/schema/schema_generated.h    | 124 +++++++++++++++++-
 5 files changed, 126 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h
index 7809d114e2..6117cbf9f1 100644
--- a/tensorflow/contrib/lite/builtin_ops.h
+++ b/tensorflow/contrib/lite/builtin_ops.h
@@ -120,6 +120,7 @@ typedef enum {
   kTfLiteBuiltinSquare = 92,
   kTfLiteBuiltinZerosLike = 93,
   kTfLiteBuiltinFill = 94,
+  kTfLiteBuiltinFloorMod = 95,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index b092e5ee54..890d9c04bb 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -651,6 +651,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_SQUARE:
     case BuiltinOperator_ZEROS_LIKE:
     case BuiltinOperator_FILL:
+    case BuiltinOperator_FLOOR_MOD:
       break;
   }
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index f23a0ccb80..c7005eb53e 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -679,6 +679,7 @@ TfLiteStatus AddOpsAndParams(
       case tflite::BuiltinOperator_SQUARE:
       case tflite::BuiltinOperator_ZEROS_LIKE:
       case tflite::BuiltinOperator_FILL:
+      case tflite::BuiltinOperator_FLOOR_MOD:
         logError("Op code %d is currently not delegated to NNAPI", builtin);
         return kTfLiteError;
         break;
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index cb7a282743..2b36209e5f 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -176,6 +176,7 @@ enum BuiltinOperator : byte {
   SQUARE = 92,
   ZEROS_LIKE = 93,
   FILL = 94,
+  FLOOR_MOD = 95,
 }
 
 // Options for the builtin operators.
@@ -251,6 +252,7 @@ union BuiltinOptions {
   BidirectionalSequenceLSTMOptions,
   BidirectionalSequenceRNNOptions,
   UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -618,6 +620,9 @@ table ZerosLikeOptions {
 table FillOptions {
 }
 
+table FloorModOptions {
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index e7b7a59def..3aaa99ec55 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -241,6 +241,9 @@ struct ZerosLikeOptionsT;
 struct FillOptions;
 struct FillOptionsT;
 
+struct FloorModOptions;
+struct FloorModOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -401,11 +404,12 @@ enum BuiltinOperator {
   BuiltinOperator_SQUARE = 92,
   BuiltinOperator_ZEROS_LIKE = 93,
   BuiltinOperator_FILL = 94,
+  BuiltinOperator_FLOOR_MOD = 95,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_FILL
+  BuiltinOperator_MAX = BuiltinOperator_FLOOR_MOD
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[95] {
   static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -500,7 +504,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
     BuiltinOperator_REDUCE_ANY,
     BuiltinOperator_SQUARE,
     BuiltinOperator_ZEROS_LIKE,
-    BuiltinOperator_FILL
+    BuiltinOperator_FILL,
+    BuiltinOperator_FLOOR_MOD
   };
   return values;
 }
@@ -602,6 +607,7 @@ inline const char * const *EnumNamesBuiltinOperator() {
     "SQUARE",
     "ZEROS_LIKE",
     "FILL",
+    "FLOOR_MOD",
     nullptr
   };
   return names;
@@ -685,11 +691,12 @@ enum BuiltinOptions {
   BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
   BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
   BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
+  BuiltinOptions_FloorModOptions = 72,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_UnidirectionalSequenceLSTMOptions
+  BuiltinOptions_MAX = BuiltinOptions_FloorModOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[73] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -762,7 +769,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] {
     BuiltinOptions_FillOptions,
     BuiltinOptions_BidirectionalSequenceLSTMOptions,
     BuiltinOptions_BidirectionalSequenceRNNOptions,
-    BuiltinOptions_UnidirectionalSequenceLSTMOptions
+    BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+    BuiltinOptions_FloorModOptions
   };
   return values;
 }
@@ -841,6 +849,7 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "BidirectionalSequenceLSTMOptions",
     "BidirectionalSequenceRNNOptions",
     "UnidirectionalSequenceLSTMOptions",
+    "FloorModOptions",
     nullptr
   };
   return names;
@@ -1139,6 +1148,10 @@ template<> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
 };
 
+template<> struct BuiltinOptionsTraits<FloorModOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1738,6 +1751,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
       reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
   }
+  FloorModOptionsT *AsFloorModOptions() {
+    return type == BuiltinOptions_FloorModOptions ?
+      reinterpret_cast<FloorModOptionsT *>(value) : nullptr;
+  }
+  const FloorModOptionsT *AsFloorModOptions() const {
+    return type == BuiltinOptions_FloorModOptions ?
+      reinterpret_cast<const FloorModOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -6241,6 +6262,46 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(
 
 flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct FloorModOptionsT : public flatbuffers::NativeTable {
+  typedef FloorModOptions TableType;
+  FloorModOptionsT() {
+  }
+};
+
+struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FloorModOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FloorModOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FloorModOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
+  flatbuffers::Offset<FloorModOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FloorModOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  FloorModOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -6587,6 +6648,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const {
     return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
   }
+  const FloorModOptions *builtin_options_as_FloorModOptions() const {
+    return builtin_options_type() == BuiltinOptions_FloorModOptions ? static_cast<const FloorModOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6902,6 +6966,10 @@ template<> inline const UnidirectionalSequenceLSTMOptions *Operator::builtin_opt
   return builtin_options_as_UnidirectionalSequenceLSTMOptions();
 }
 
+template<> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const {
+  return builtin_options_as_FloorModOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -9286,6 +9354,29 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBuffe
       _fbb);
 }
 
+inline FloorModOptionsT *FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new FloorModOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FloorModOptions> FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFloorModOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorModOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateFloorModOptions(
+      _fbb);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -9759,6 +9850,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -10061,6 +10156,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -10351,6 +10450,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value);
       return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const FloorModOptionsT *>(value);
+      return CreateFloorModOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10641,6 +10744,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_FloorModOptions: {
+      value = new FloorModOptionsT(*reinterpret_cast<FloorModOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -11003,6 +11110,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<FloorModOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From e27ee15fa45a5f4e43e10ed1fe0eb3a1feb4253a Mon Sep 17 00:00:00 2001
From: Peter Ma <pcma@google.com>
Date: Mon, 8 Oct 2018 23:12:08 -0700
Subject: [PATCH 1284/1357] Refactor CalculateOutputSize() from
 VirtualScheduler protected member function to utils; Refactor EstimateSize()
 from memory_optimizer.cc to utils; some small changes for readability
 improvement

PiperOrigin-RevId: 216307257
---
 tensorflow/core/grappler/costs/BUILD          |   1 +
 tensorflow/core/grappler/costs/utils.cc       |  40 ++++++-
 tensorflow/core/grappler/costs/utils.h        |  11 ++
 tensorflow/core/grappler/costs/utils_test.cc  | 112 +++++++++++++-----
 .../core/grappler/costs/virtual_scheduler.cc  |  48 ++------
 .../core/grappler/costs/virtual_scheduler.h   |  22 ++--
 .../grappler/costs/virtual_scheduler_test.cc  |  48 +-------
 tensorflow/core/grappler/optimizers/BUILD     |   1 +
 .../grappler/optimizers/memory_optimizer.cc   |  26 +---
 9 files changed, 161 insertions(+), 148 deletions(-)

diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index f3dc2c2091..46eacd3a06 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -236,6 +236,7 @@ tf_cc_test(
     name = "virtual_scheduler_test",
     srcs = ["virtual_scheduler_test.cc"],
     deps = [
+        ":utils",
         ":virtual_placer",
         ":virtual_scheduler",
         "//tensorflow/cc:cc_ops",
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index 5415324b48..2fcadf1de3 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -74,7 +74,8 @@ static std::vector<TensorProto> ExtractTensors(const AttrValue& attr_value) {
       }
       break;
     }
-    default: {}
+    default: {
+    }
   }
   return tensors;
 }
@@ -201,6 +202,43 @@ std::vector<OpInfo::TensorProperties> FindInputFeatures(
   return inputs;
 }
 
+int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
+  int64 size = DataTypeSize(BaseType(prop.dtype()));
+  TensorShapeProto shape = prop.shape();
+
+  // Can't infer the size if the rank is unknown. It has to be at least a
+  // scalar though.
+  if (shape.unknown_rank()) {
+    LOG(WARNING) << "CalculateTensorSize() -- unknown rank";
+    return size;
+  }
+
+  // If one of the dimensions is unknown statically, assume it's at least one.
+  for (int i = 0; i < shape.dim_size(); ++i) {
+    if (shape.dim(i).size() < 0) {
+      shape.mutable_dim(i)->set_size(1);
+      LOG(WARNING) << "CalculateTensorSize() -- unknown dim: " << i;
+    }
+  }
+
+  int64 num_elems = TensorShape(shape).num_elements();
+  return num_elems * size;
+}
+
+int64 CalculateOutputSize(
+    const std::vector<OpInfo::TensorProperties>& output_properties,
+    const int port_num) {
+  if (port_num < 0) return 4;  // 4B for control dependency.
+
+  if (port_num >= output_properties.size()) {
+    LOG(ERROR) << "CalculateOutputSize() -- port_num: " << port_num
+               << " >= output_properties.size(): " << output_properties.size();
+    return 0;
+  }
+
+  return CalculateTensorSize(output_properties[port_num]);
+}
+
 DeviceProperties GetDeviceInfo(const string& device_str) {
   DeviceProperties unknown;
   unknown.set_type("UNKNOWN");
diff --git a/tensorflow/core/grappler/costs/utils.h b/tensorflow/core/grappler/costs/utils.h
index 5fd6717712..ea64e5a41d 100644
--- a/tensorflow/core/grappler/costs/utils.h
+++ b/tensorflow/core/grappler/costs/utils.h
@@ -43,6 +43,17 @@ std::vector<OpInfo::TensorProperties> FindInputFeatures(
     const std::unordered_map<string, const CostGraphDef::Node*>& name_to_cost,
     const std::unordered_map<string, const NodeDef*>& name_to_node);
 
+// Returns the size of tensor (unit: bytes). For tensor shape with unknown rank,
+// it assumes the tensor to be scalar. For any unknown dimension, it assumes
+// size one.
+int64 CalculateTensorSize(const OpInfo::TensorProperties& prop);
+
+// Returns the size of output at port_num (unit: bytes). A special case is
+// port_num -1, which is for control dependency and assumed to be 4 bytes.
+int64 CalculateOutputSize(
+    const std::vector<OpInfo::TensorProperties>& output_properties,
+    int port_num);
+
 // Returns the DeviceProperties of the device on which 'node' runs.
 DeviceProperties GetDeviceInfo(const CostGraphDef::Node& node);
 DeviceProperties GetDeviceInfo(const string& device_str);
diff --git a/tensorflow/core/grappler/costs/utils_test.cc b/tensorflow/core/grappler/costs/utils_test.cc
index baa654f475..db5c11f0fe 100644
--- a/tensorflow/core/grappler/costs/utils_test.cc
+++ b/tensorflow/core/grappler/costs/utils_test.cc
@@ -26,36 +26,42 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-class UtilsTest : public ::testing::Test {
- public:
-  void CreateConstOp(const string& name, std::initializer_list<int64> dims,
-                     NodeDef* node) {
-    Tensor tensor(DT_FLOAT, TensorShape(dims));
-    for (int64 i = 0; i < tensor.NumElements(); ++i) {
-      tensor.flat<float>()(i) = i / 10.0f;
-    }
-    TF_CHECK_OK(NodeDefBuilder(name, "Const")
-                    .Attr("dtype", DT_FLOAT)
-                    .Attr("value", tensor)
-                    .Finalize(node));
-  }
+namespace {
 
-  void CreateConstSizesOp(const string& name, const std::vector<int32>& sizes,
-                          NodeDef* node) {
-    TensorShape shape;
-    shape.AddDim(sizes.size());
-    Tensor tensor(DT_INT32, shape);
-    for (int64 i = 0; i < tensor.NumElements(); ++i) {
-      tensor.flat<int32>()(i) = sizes[i];
-    }
-    TF_CHECK_OK(NodeDefBuilder(name, "Const")
-                    .Attr("dtype", DT_INT32)
-                    .Attr("value", tensor)
-                    .Finalize(node));
-  }
-};
+void CreateConstOp(const string& name, std::initializer_list<int64> dims,
+                   NodeDef* node) {
+  Tensor tensor(DT_FLOAT, TensorShape(dims));
+  for (int64 i = 0; i < tensor.NumElements(); ++i)
+    tensor.flat<float>()(i) = i / 10.0f;
+  TF_CHECK_OK(NodeDefBuilder(name, "Const")
+                  .Attr("dtype", DT_FLOAT)
+                  .Attr("value", tensor)
+                  .Finalize(node));
+}
 
-TEST_F(UtilsTest, ConvOpInfo) {
+void CreateConstSizesOp(const string& name, const std::vector<int32>& sizes,
+                        NodeDef* node) {
+  TensorShape shape;
+  shape.AddDim(sizes.size());
+  Tensor tensor(DT_INT32, shape);
+  for (int64 i = 0; i < tensor.NumElements(); ++i)
+    tensor.flat<int32>()(i) = sizes[i];
+  TF_CHECK_OK(NodeDefBuilder(name, "Const")
+                  .Attr("dtype", DT_INT32)
+                  .Attr("value", tensor)
+                  .Finalize(node));
+}
+
+// Helper method for converting shapes vector to TensorProperty.
+OpInfo::TensorProperties ShapeToTensorProperty(const std::vector<int>& shapes,
+                                               const DataType& data_type) {
+  OpInfo::TensorProperties prop;
+  prop.set_dtype(data_type);
+  for (int shape : shapes) prop.mutable_shape()->add_dim()->set_size(shape);
+  return prop;
+}
+
+TEST(UtilsTest, ConvOpInfo) {
   int batch = 32;
   int rows = 7;
   int cols = 9;
@@ -146,7 +152,7 @@ TEST_F(UtilsTest, ConvOpInfo) {
   }
 }
 
-TEST_F(UtilsTest, TestSkipControlInput) {
+TEST(UtilsTest, TestSkipControlInput) {
   GraphDef graph;
   TF_CHECK_OK(NodeDefBuilder("constant", "Const")
                   .Attr("dtype", DT_INT32)
@@ -172,6 +178,52 @@ TEST_F(UtilsTest, TestSkipControlInput) {
   EXPECT_TRUE(node_found);
 }
 
+TEST(UtilsTest, CalculateTensorSize) {
+  // Test normal usage.
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1,
+            CalculateTensorSize(ShapeToTensorProperty({1}, DT_FLOAT)));
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 4 * 4,
+            CalculateTensorSize(ShapeToTensorProperty({4, 4}, DT_FLOAT)));
+  EXPECT_EQ(DataTypeSize(DT_HALF) * 10 * 10 * 10,
+            CalculateTensorSize(ShapeToTensorProperty({10, 10, 10}, DT_HALF)));
+  EXPECT_EQ(
+      DataTypeSize(DT_FLOAT) * 100 * 7 * 8 * 99,
+      CalculateTensorSize(ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT)));
+
+  // Test unknown rank: assumes the tensor to be a scalar.
+  OpInfo::TensorProperties t = ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT);
+  t.mutable_shape()->set_unknown_rank(true);
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1, CalculateTensorSize(t));
+
+  // Test unknown shape: assumes unknown shape (-1) to have size 1.
+  EXPECT_EQ(
+      DataTypeSize(DT_FLOAT) * 1 * 7 * 8 * 99,
+      CalculateTensorSize(ShapeToTensorProperty({-1, 7, 8, 99}, DT_FLOAT)));
+  EXPECT_EQ(
+      DataTypeSize(DT_FLOAT) * 1 * 7 * 1 * 99,
+      CalculateTensorSize(ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT)));
+}
+
+TEST(UtilsTest, CalculateOutputSize) {
+  // Create a set of tensor properties.
+  std::vector<OpInfo::TensorProperties> output = {
+      ShapeToTensorProperty({4, 4}, DT_FLOAT),          // 0
+      ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT)  // 1
+  };
+
+  // Test valid outputs.
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 4 * 4, CalculateOutputSize(output, 0));
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1 * 7 * 1 * 99,
+            CalculateOutputSize(output, 1));
+
+  // port_num -1 is for control dependency: hard coded 4B.
+  EXPECT_EQ(4, CalculateOutputSize(output, -1));
+
+  // Invalid port_num (though it may be an error) shall yield zero
+  // output size.
+  EXPECT_EQ(0, CalculateOutputSize(output, 2));
+}
+
 // Class for testing TensorSizeHistogram.
 class TestTensorSizeHistogram : public TensorSizeHistogram {
  public:
@@ -285,5 +337,7 @@ TEST(DeviceClassTest, GetDeviceClassForNonChannelDevice) {
   EXPECT_EQ("//GPU", GetDeviceClassForNonChannelDevice("/device:GPU:7"));
 }
 
+}  // namespace
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 037a823096..5b93fb128f 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -473,6 +473,7 @@ Status VirtualScheduler::Init() {
     VLOG(1) << "Some feed nodes were not consumed by the fetch fanin: "
             << str_util::Join(feed_nodes, ",");
   }
+
   initialized_ = true;
   return Status::OK();
 }
@@ -695,38 +696,6 @@ NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) {
   return it->second;
 }
 
-int64 VirtualScheduler::CalculateOutputSize(
-    const std::vector<OpInfo::TensorProperties>& output_properties,
-    const int port_num) const {
-  if (port_num < 0) {
-    return 4;  // 4B for control dependency.
-  }
-
-  if (port_num >= output_properties.size()) {
-    VLOG(3) << "VirtualScheduler::CalculateOutputSize() -- "
-            << "port_num: " << port_num
-            << " >= output_properties.size(): " << output_properties.size();
-    return 0;
-  }
-
-  const auto& output = output_properties[port_num];
-  int64 output_size = DataTypeSize(BaseType(output.dtype()));
-
-  for (const auto& dim : output.shape().dim()) {
-    auto dim_size = dim.size();
-    if (dim_size < 0) {
-      // Zero output size if there's any unknown dim.
-      output_size = 0;
-      VLOG(3) << "VirtualScheduler::CalculateOutputSize() -- "
-              << "unknown dim: " << output_size;
-      break;
-    }
-    output_size *= dim_size;
-  }
-
-  return output_size;
-}
-
 Costs& VirtualScheduler::FindOrCreateZero(const string& op_name,
                                           std::map<string, Costs>* op_cost) {
   auto it = op_cost->find(op_name);
@@ -744,7 +713,10 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
   const NodeDef* node = ready_nodes_->GetCurrNode();
   const string& op_name = node->op();
 
-  // Also keep track of op counts and times per op (with their shapes).
+  auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_);
+  op_cost = CombineCosts(op_cost, node_costs);
+
+  // Also keep track of op counts and costs per op (with their shapes).
   OpContext op_context = GetCurrNode();
   string node_description = GetOpDescription(op_context.op_info);
   op_counts_[node_description] += 1;
@@ -752,9 +724,6 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
       std::make_pair(node_costs.execution_time.asMicroSeconds().count(),
                      !node_costs.inaccurate);
 
-  auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_);
-  op_cost = CombineCosts(op_cost, node_costs);
-
   // Update node and device states.
   auto& node_state = node_map_[node];
   auto& device = device_[node_state.device_name];
@@ -795,7 +764,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
           << ", scheduled: " << node_state.time_scheduled.count()
           << ", finished: " << node_state.time_finished.count();
 
-  // Increment num_inputs_ready of the output nodes
+  // Increment num_inputs_ready of the output nodes and maybe add to ready nodes
   for (const auto& port_num_output_pair : node_state.outputs) {
     for (auto* output_node : port_num_output_pair.second) {
       auto& output_state = node_map_[output_node];
@@ -812,7 +781,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
     }
   }
 
-  // Increment num_outputs_executed of the input nodes.
+  // Increment num_outputs_executed of the input nodes and maybe update memory.
   for (const auto& input_port : node_state.inputs) {
     auto* input = input_port.first;
     auto port = input_port.second;
@@ -841,7 +810,6 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
     }
   }
 
-  // Remove the current node; assume FIFO.
   ready_nodes_->RemoveCurrNode();
 
   return !ready_nodes_->Empty();
@@ -1007,7 +975,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
     return Summary();
   }
 
-  // Fill RunMetadata.
+  // Fill RunMetadata's step_stats and partition_graphs fields.
   StepStats* stepstats = metadata->mutable_step_stats();
   for (const auto& device : device_) {
     GraphDef* device_partition_graph = metadata->add_partition_graphs();
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index 0e66e8a463..bead84af29 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -107,10 +107,10 @@ struct DeviceState {
       mem_usage_snapshot_at_peak;
 
   Costs device_costs;
-  std::map<string, Costs> op_to_cost;    // Per-op cost.
-  std::map<string, int64> op_to_memory;  // Per-op memory usage at peak usage.
-  int64 memory_usage;
-  int64 max_memory_usage;
+  std::map<string, Costs> op_to_cost;  // Per-op cost.
+
+  int64 memory_usage;      // Current temporary memory usage
+  int64 max_memory_usage;  // Max temporary memory usage
 
   DeviceState() {
     device_costs = Costs::ZeroCosts();
@@ -283,13 +283,6 @@ class VirtualScheduler {
     return &node_map_;
   }
 
- protected:
-  // Returns the size of output at port_num (unit: bytes). A special case is
-  // port_num -1, which is for control dependency and assumed to be 4 bytes.
-  int64 CalculateOutputSize(
-      const std::vector<OpInfo::TensorProperties>& output_properties,
-      const int port_num) const;
-
  private:
   // Constants.
   const string kAttrInputSrc = "input_source_";
@@ -321,8 +314,11 @@ class VirtualScheduler {
   std::vector<std::unique_ptr<NodeDef>> additional_nodes_;
 
   // Stats:
-  std::map<string, int> op_counts_;  // Op counts with key with input shape.
-  // Individual op costs (with input shapes).
+  // Op counts with key with input shape.
+  // Example key: "[Op=AssignSub, input_shapes=[[7,1,160,160][7,1,160,160]]"
+  std::map<string, int> op_counts_;
+  // Individual op costs with key with input shape.
+  // Integer field for execution time in micro seconds.
   // Boolean field for whether the cost is accurate.
   std::map<string, std::pair<int, bool>> op_costs_;
 
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 80889afc86..99272dd7e9 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -19,12 +19,14 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_description.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/grappler/clusters/virtual_cluster.h"
+#include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/grappler/costs/virtual_placer.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
 namespace grappler {
+
 // Class for testing virtual scheduler.
 class TestVirtualScheduler : public VirtualScheduler {
  public:
@@ -33,7 +35,6 @@ class TestVirtualScheduler : public VirtualScheduler {
       : VirtualScheduler(grappler_item, use_static_shapes, cluster,
                          &ready_node_manager_) {}
 
-  FRIEND_TEST(VirtualSchedulerTest, CalculateOutputSize);
   FRIEND_TEST(VirtualSchedulerTest, MemoryUsage);
   FRIEND_TEST(VirtualSchedulerTest, ControlDependency);
   FRIEND_TEST(VirtualSchedulerTest, ComplexDependency);
@@ -1034,17 +1035,6 @@ versions {
     }
   }
 
-  // Helper method for converting shape vector to TensorProperty.
-  OpInfo::TensorProperties ShapeToTensorProperty(
-      const std::vector<int> shape, const DataType& data_type) const {
-    OpInfo::TensorProperties tensor_property;
-    tensor_property.set_dtype(data_type);
-    for (const auto& x : shape) {
-      tensor_property.mutable_shape()->add_dim()->set_size(x);
-    }
-    return tensor_property;
-  }
-
   // SetUp() inits cluster_ and placer_.
   std::unique_ptr<VirtualCluster> cluster_;
   std::unique_ptr<VirtualPlacer> placer_;
@@ -1729,38 +1719,6 @@ TEST_F(VirtualSchedulerTest, InitAndBasicScheduling) {
   EXPECT_EQ(2, ops_executed["c1"].op_info.inputs_size());
 }
 
-TEST_F(VirtualSchedulerTest, CalculateOutputSize) {
-  // Init.
-  CreateGrapplerItemWithAddN();
-  InitScheduler();
-
-  // Create a set of tensor properties.
-  std::vector<OpInfo::TensorProperties> output;
-  output.push_back(ShapeToTensorProperty({4, 4}, DT_FLOAT));           // 0
-  output.push_back(ShapeToTensorProperty({1}, DT_FLOAT));              // 1
-  output.push_back(ShapeToTensorProperty({10, 10, 10}, DT_HALF));      // 2
-  output.push_back(ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT));  // 3
-  output.push_back(ShapeToTensorProperty({-1, 7, 8, 99}, DT_FLOAT));   // 4
-  output.push_back(ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT));  // 4
-
-  // port_num -1 is for control dependency: hard coded 4B.
-  EXPECT_EQ(4, scheduler_->CalculateOutputSize(output, -1));
-
-  // Test valid outputs.
-  EXPECT_EQ(4 * 4 * 4, scheduler_->CalculateOutputSize(output, 0));
-  EXPECT_EQ(4 * 1, scheduler_->CalculateOutputSize(output, 1));
-  EXPECT_EQ(2 * 10 * 10 * 10, scheduler_->CalculateOutputSize(output, 2));
-  EXPECT_EQ(4 * 100 * 7 * 8 * 99, scheduler_->CalculateOutputSize(output, 3));
-
-  // Any unknown shape (-1) shall yield zero output size.
-  EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 4));
-  EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 5));
-
-  // Invalid port_num (though it may be an error) shall yield zero
-  // output size.
-  EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 6));
-}
-
 TEST_F(VirtualSchedulerTest, MemoryUsage) {
   // Init.
   CreateGrapplerItemWithAddN();
@@ -2041,7 +1999,7 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) {
     for (const auto& output_property : output_properties_) {
       output_properties.push_back(output_property);
     }
-    return scheduler_->CalculateOutputSize(output_properties, 0);
+    return CalculateOutputSize(output_properties, 0);
   };
 
   // Validate transfer size.
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index c708f84948..e898377ded 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -423,6 +423,7 @@ cc_library(
         "//tensorflow/core/grappler/clusters:virtual_cluster",
         "//tensorflow/core/grappler/costs:graph_memory",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/costs:utils",
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler/utils:traversal",
     ],
diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
index c775a26914..73f0977242 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/clusters/virtual_cluster.h"
 #include "tensorflow/core/grappler/costs/graph_memory.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/grappler/graph_view.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
@@ -43,6 +44,8 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+namespace {
+
 // Prefix added to nodes which are recomputed.
 const char* kRecomputedNodePrefix = "Recomputed";
 const char* kRecomputeTriggerNodePrefix = "RecomputeTrigger";
@@ -744,25 +747,6 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap,
   return Status::OK();
 }
 
-static int64 EstimateSize(const OpInfo::TensorProperties& t) {
-  DataType dtype = t.dtype();
-  int64 size = DataTypeSize(dtype);
-  TensorShapeProto shape = t.shape();
-  if (shape.unknown_rank()) {
-    // Can't infer the size if the rank is unknown. It has to be at least a
-    // scalar though.
-    return size;
-  }
-  // If one of the dimensions is unknown statically, assume it's at least one.
-  for (int i = 0; i < shape.dim_size(); ++i) {
-    if (shape.dim(i).size() < 0) {
-      shape.mutable_dim(i)->set_size(1);
-    }
-  }
-  int64 num_elems = TensorShape(shape).num_elements();
-  return num_elems * size;
-}
-
 struct SwapInfo {
   std::vector<int> inputs_to_swap;
   Costs::NanoSeconds time_to_swap = 0;
@@ -1149,7 +1133,7 @@ bool SwappingPass(RewriterConfig::MemOptType optimization_level,
     int64 bytes_to_swap = 0;
     for (int64 input_id : swap_info.inputs_to_swap) {
       const OpInfo::TensorProperties& t = props[input_id];
-      bytes_to_swap += EstimateSize(t);
+      bytes_to_swap += CalculateTensorSize(t);
     }
     // Let's assume we're going to swap over PCIe running at 16 GBps.
     swap_info.time_to_swap = bytes_to_swap / 16;
@@ -1299,6 +1283,8 @@ Status RelaxAllocatorConstraints(GraphDef* optimized_graph) {
   return Status::OK();
 }
 
+}  // namespace
+
 Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                  GraphDef* optimized_graph) {
   *optimized_graph = item.graph;
-- 
GitLab


From 129bb5e845ccb2ab6339e85d39545800dac6ca33 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 23:42:02 -0700
Subject: [PATCH 1285/1357] Automated rollback of commit
 5f308cb408eb46ec9af0546be6b9ae1d5166b185

PiperOrigin-RevId: 216309111
---
 tensorflow/core/grappler/op_types.cc          |  22 +--
 .../optimizers/pin_to_host_optimizer.cc       | 162 ++++++------------
 .../optimizers/pin_to_host_optimizer.h        |   4 +-
 .../optimizers/pin_to_host_optimizer_test.cc  |  76 +++-----
 4 files changed, 85 insertions(+), 179 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index cbf5c8e038..1b5a215987 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -102,19 +102,15 @@ bool IsConjugateTranspose(const NodeDef& node) {
 }
 
 bool IsControlFlow(const NodeDef& node) {
-  // TODO(williamchan): Add a microbenchmark to compare FlatSet vs. iterative
-  // string comparison.
-  static const gtl::FlatSet<string>* const kControFlowOps =
-      CHECK_NOTNULL((new gtl::FlatSet<string>{
-          "ControlTrigger",
-          "Enter",
-          "Exit",
-          "LoopCond",
-          "Merge",
-          "NextIteration",
-          "Switch",
-      }));
-  return kControFlowOps->count(node.op()) > 0;
+  // clang-format off
+  return node.op() == "ControlTrigger" ||
+         node.op() == "Enter" ||
+         node.op() == "Exit" ||
+         node.op() == "LoopCond" ||
+         node.op() == "Merge" ||
+         node.op() == "NextIteration" ||
+         node.op() == "Switch";
+  // clang-format on
 }
 
 bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 29a3b2b74c..8ed4271fa4 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -25,29 +25,16 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace grappler {
-
 namespace internal {
 
-namespace {
 // TODO(williamchan): Change this constant to be something smarter, maybe
 // dynamically determined.
 constexpr int64 kTensorMaxSize = 64;
 
-struct OpDevicePortHasher {
-  std::size_t operator()(const std::tuple<string, string, int>& x) const {
-    uint64 code = Hash64Combine(Hash64(std::get<0>(x)), Hash64(std::get<1>(x)));
-
-    return Hash64Combine(code, hash<int>()(std::get<2>(x)));
-  }
-};
-using OpDevicePortOnHostMap =
-    gtl::FlatMap<std::tuple<string, string, int>, bool, OpDevicePortHasher>;
-
 // All the nodes that should be blacklisted and not swapped.
 bool IsBlacklisted(const NodeDef& node) {
   return
@@ -95,10 +82,10 @@ Status TryFindKernelDef(const std::vector<DeviceType>& devices,
 
 // Checks if a node's output port is host friendly.
 // Roughly this means checking if the output port is on Host memory.
-Status IsNodeOutputPortHostFriendly(
-    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
-    int port_id, OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
-    bool* is_candidate) {
+Status IsNodeOutputPortHostFriendly(const GraphView& graph,
+                                    GraphProperties* properties,
+                                    const NodeDef& node, int port_id,
+                                    bool* is_candidate) {
   *is_candidate = false;
 
   // Make sure we are not a blacklisted op.
@@ -130,8 +117,7 @@ Status IsNodeOutputPortHostFriendly(
     for (const auto& fanin : graph.GetFanins(node, false)) {
       bool fanin_candidate = false;
       TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-          graph, properties, *fanin.node, fanin.port_id,
-          op_device_outport_pinned_to_host_cache, &fanin_candidate));
+          graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
       if (!fanin_candidate) {
         return Status::OK();
       }
@@ -146,22 +132,11 @@ Status IsNodeOutputPortHostFriendly(
     return Status::OK();
   }
 
-  // Check `op_device_outport_pinned_to_host_cache` for our
-  // {op, device, port_id} combo to see if the arg is pinned on Host.
-  const std::tuple<string, string, int> cache_key(node.op(), node.device(),
-                                                  port_id);
-  auto it = op_device_outport_pinned_to_host_cache->find(cache_key);
-  if (it != op_device_outport_pinned_to_host_cache->end()) {
-    *is_candidate = it->second;
-    return Status::OK();
-  }
-
   // Check if op's output port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
-    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -171,7 +146,6 @@ Status IsNodeOutputPortHostFriendly(
     LOG(WARNING) << "Invalid port: " << port_id << "!\n"
                  << node.DebugString() << "\n"
                  << op->DebugString();
-    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -181,7 +155,6 @@ Status IsNodeOutputPortHostFriendly(
                        &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
-    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -193,35 +166,22 @@ Status IsNodeOutputPortHostFriendly(
     }
   }
 
-  op_device_outport_pinned_to_host_cache->emplace(cache_key, *is_candidate);
-
   return Status::OK();
 }
 
 // Checks if a node's input port is Host friendly.
 // Roughly this means checking if the input port is on Host memory.
-bool IsNodeInputPortHostFriendly(
-    const NodeDef& node, int port_id,
-    OpDevicePortOnHostMap* op_device_inport_pinned_to_host_cache) {
+bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
   // If node is on Host, assume its inputs are Host friendly.
   if (str_util::StrContains(node.device(), DEVICE_CPU)) {
     return true;
   }
 
-  // Check `op_device_inport_pinned_to_host_cache` for our
-  // {op, device, port_id} combo to see if the arg is pinned on Host.
-  std::tuple<string, string, int> cache_key(node.op(), node.device(), port_id);
-  auto it = op_device_inport_pinned_to_host_cache->find(cache_key);
-  if (it != op_device_inport_pinned_to_host_cache->end()) {
-    return it->second;
-  }
-
   // Check if op's input port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
-    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
   const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id);
@@ -232,20 +192,16 @@ bool IsNodeInputPortHostFriendly(
       {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
-    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
 
   // Check if the input_arg is pinned to Host.
   for (const string& host_memory_arg : kernel->host_memory_arg()) {
     if (op->input_arg(input_arg_id).name() == host_memory_arg) {
-      op_device_inport_pinned_to_host_cache->emplace(cache_key, true);
       return true;
     }
   }
 
-  op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
-
   return false;
 }
 
@@ -255,29 +211,38 @@ bool IsNodeInputPortHostFriendly(
 // 2] Check if node can run on Host.
 // 3] Check all input/outputs are Host "friendly" (atm, friendly means small,
 //    ints, and pinned to Host).
-Status IsNodeHostCandidate(
-    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
-    OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
-    bool* is_candidate) {
+Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
+                           const NodeDef& node, bool* is_candidate) {
   *is_candidate = false;
 
-  // Skip these node types.
-  if (IsBlacklisted(node)) {
-    return Status::OK();
-  }
-
   // Check if node already on CPU.
   if (str_util::StrContains(node.device(), DEVICE_CPU)) {
     *is_candidate = true;
     return Status::OK();
   }
 
+  // Skip these node types.
+  if (IsBlacklisted(node)) {
+    return Status::OK();
+  }
+
   // Check the node can be run on CPU.
   Status s = TryFindKernelDef({DEVICE_CPU}, node, nullptr);
   if (!s.ok()) {
     return Status::OK();
   }
 
+  // Check all inputs are Host friendly.
+  for (const GraphView::OutputPort& fanin :
+       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
+    bool fanin_candidate = false;
+    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+        graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+    if (!fanin_candidate) {
+      return Status::OK();
+    }
+  }
+
   // Check all outputs are Host friendly.
   if (!properties->has_properties()) {
     // This is an expensive call, call it lazily.
@@ -290,42 +255,16 @@ Status IsNodeHostCandidate(
     }
   }
 
-  // Check all inputs are Host friendly.
-  for (const GraphView::OutputPort& fanin :
-       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
-    bool fanin_candidate = false;
-    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-        graph, properties, *fanin.node, fanin.port_id,
-        op_device_outport_pinned_to_host_cache, &fanin_candidate));
-    if (!fanin_candidate) {
-      return Status::OK();
-    }
-  }
-
   *is_candidate = true;
   return Status::OK();
 }
 
-bool IsTPUGraphDef(const GraphDef& def) {
-  for (const auto& node : def.node()) {
-    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
-        node.op() == "TPUPartitionedCall") {
-      return true;
-    }
-  }
-  return false;
-}
-}  // end namespace
-
-// Tries to swap `device` to a Host device from `devices`. Returns true iff
-// there was a swap.
-bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, string* device) {
+string TryFindHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, const string& device) {
   // Force this node onto the CPU.
-  if (device->empty() && has_device_cpu) {
-    *device = "/device:CPU:0";
-    return true;
-  } else if (str_util::StrContains(*device, DEVICE_GPU)) {
+  if (device.empty() && has_device_cpu) {
+    return "/device:CPU:0";
+  } else if (str_util::StrContains(device, DEVICE_GPU)) {
     // Sometimes the cluster can have:
     //   devices = {"/device:CPU:0", "/device:XLA_GPU:0"}
     // and we need to handle them properly.
@@ -333,19 +272,27 @@ bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
          {std::pair<string, string>("GPU", "CPU:0"),
           std::pair<string, string>("/device", "/device:CPU:0")}) {
       const string device_host =
-          strings::StrCat(device->substr(0, device->rfind(device_match.first)),
+          strings::StrCat(device.substr(0, device.rfind(device_match.first)),
                           device_match.second);
       if (devices.find(device_host) != devices.end()) {
-        *device = device_host;
-        return true;
+        return device_host;
       }
     }
   }
 
-  // We couldn't find an appropriate Host device, return false.
-  return false;
+  // We couldn't find an appropriate Host device, return original device.
+  return device;
 }
 
+bool IsTPUGraphDef(const GraphDef& def) {
+  for (const auto& node : def.node()) {
+    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
+        node.op() == "TPUPartitionedCall") {
+      return true;
+    }
+  }
+  return false;
+}
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
@@ -377,26 +324,20 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   // All the Const nodes, and their original devices in topological order.
   std::vector<std::pair<NodeDef*, string>> const_nodes;
 
-  // Cache to map {op, device, port} -> bool on whether it is pinned to host.
-  internal::OpDevicePortOnHostMap op_device_outport_pinned_to_host_cache;
-  internal::OpDevicePortOnHostMap op_device_inport_pinned_to_host_cache;
-
   for (auto& node : *optimized_graph->mutable_node()) {
     bool is_candidate = false;
-    TF_RETURN_IF_ERROR(internal::IsNodeHostCandidate(
-        graph, &properties, node, &op_device_outport_pinned_to_host_cache,
-        &is_candidate));
+    TF_RETURN_IF_ERROR(
+        internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate));
     if (!is_candidate) {
       continue;
     }
 
-    const string original_device = node.device();
-    const bool swapped = internal::TrySwapToHostDevice(devices, has_device_cpu,
-                                                       node.mutable_device());
-    // Keep track of all Const nodes that we swapped.
-    if (swapped && IsConstant(node)) {
-      const_nodes.emplace_back(&node, original_device);
+    if (IsConstant(node)) {
+      const_nodes.emplace_back(&node, node.device());
     }
+    // Try and swap the device to Host.
+    node.set_device(
+        internal::TryFindHostDevice(devices, has_device_cpu, node.device()));
   }
 
   // Traverse all `const_nodes`, and map them back to GPU greedily.
@@ -408,9 +349,8 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     // this node back onto the original device.
     for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) {
       // The consumer is not Host friendly, swap it back to the original device.
-      if (!internal::IsNodeInputPortHostFriendly(
-              *fanout.node, fanout.port_id,
-              &op_device_inport_pinned_to_host_cache)) {
+      if (!internal::IsNodeInputPortHostFriendly(*fanout.node,
+                                                 fanout.port_id)) {
         node->set_device(device);
         break;
       }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
index bed4a9ef95..d557a03463 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
@@ -26,8 +26,8 @@ namespace tensorflow {
 namespace grappler {
 namespace internal {
 // Try and find an appropriate Host device in `devices` given `device`.
-bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, string* device);
+string TryFindHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, const string& device);
 }  // end namespace internal
 
 // Optimize TensorFlow ops that should be swapped into the CPU to avoid
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
index 9bb030b220..7c64529441 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -28,60 +28,30 @@ namespace {
 
 class PinToHostOptimizerTest : public GrapplerTest {};
 
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceNoDevices) {
+TEST_F(PinToHostOptimizerTest, TryFindHostDevice) {
   gtl::FlatSet<string> devices = {};
-
-  string device = "ABC";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "ABC");
-}
-
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceCpuXlaGpu) {
-  gtl::FlatSet<string> devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
-
-  string device = "";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
-  EXPECT_EQ(device, "/device:CPU:0");
-
-  device = "/device:XLA_GPU:0";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
-  EXPECT_EQ(device, "/device:CPU:0");
-
-  device = "/device:XLA_GPU:*";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
-  EXPECT_EQ(device, "/device:CPU:0");
-}
-
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaCpuXlaGpu) {
-  gtl::FlatSet<string> devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
-
-  string device = "";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_TRUE(device.empty());
-
-  device = "/device:XLA_GPU:0";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_CPU:0");
-
-  device = "/device:XLA_GPU:*";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_CPU:0");
-}
-
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaGpu) {
-  gtl::FlatSet<string> devices = {"/device:XLA_GPU:0"};
-
-  string device = "";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_TRUE(device.empty());
-
-  device = "/device:XLA_GPU:0";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_GPU:0");
-
-  device = "/device:XLA_GPU:*";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_GPU:*");
+  EXPECT_EQ("ABC", internal::TryFindHostDevice(devices, false, "ABC"));
+
+  devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, ""), "/device:CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:0"),
+            "/device:CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:*"),
+            "/device:CPU:0");
+
+  devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
+            "/device:XLA_CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
+            "/device:XLA_CPU:0");
+
+  devices = {"/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
+            "/device:XLA_GPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
+            "/device:XLA_GPU:*");
 }
 
 TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) {
-- 
GitLab


From a198ca7d9bbc752a322c59b9a30519eab1b6730c Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Tue, 9 Oct 2018 00:56:23 -0700
Subject: [PATCH 1286/1357] Enable support for PRED values in KeyValueSort for
 the HloEvaluator.

PiperOrigin-RevId: 216315110
---
 tensorflow/compiler/xla/service/hlo_evaluator.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index eec8d242fa..6cba46135c 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/index_util.h"
@@ -1279,7 +1280,9 @@ StatusOr<Literal> EvaluateSortInternal(HloInstruction* sort,
                     return SafeLess<KeyType>(a.first, b.first);
                   });
         std::vector<KeyType> result_keys;
-        std::vector<ValueType> result_values;
+        // We use a InlinedVector here because we need to convert it to an
+        // absl::Span later, and this would not work with std::vector<bool>.
+        absl::InlinedVector<ValueType, 10> result_values;
         for (const auto& key_value : key_value_vector) {
           result_keys.push_back(key_value.first);
           result_values.push_back(key_value.second);
@@ -1316,6 +1319,9 @@ StatusOr<Literal> EvaluateSortCurried(HloInstruction* sort,
                                       const Literal& keys_literal,
                                       const Literal& values_literal) {
   switch (sort->operand(1)->shape().element_type()) {
+    case PRED:
+      return EvaluateSortInternal<KeyType, bool>(sort, keys_literal,
+                                                 values_literal);
     case F32:
       return EvaluateSortInternal<KeyType, float>(sort, keys_literal,
                                                   values_literal);
-- 
GitLab


From 69f60d4c8cb5edb6fdc63b837b6db29562d28744 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 02:09:06 -0700
Subject: [PATCH 1287/1357] compat: Update forward compatibility horizon to
 2018-10-09

PiperOrigin-RevId: 216323343
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 349c84e13c..0e14c0e044 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From e730b261f9028b2f3430461b82c30c86b9ece22f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 06:58:06 -0700
Subject: [PATCH 1288/1357] Automated rollback of commit
 375c109659d2d0e6265447dffdeb460693b3cccf

PiperOrigin-RevId: 216350134
---
 tensorflow/compiler/xla/service/BUILD         |  21 --
 .../compiler/xla/service/buffer_assignment.cc |  34 ++--
 .../compiler/xla/service/buffer_value.h       |   3 -
 .../compiler/xla/service/copy_insertion.cc    |  85 +-------
 .../xla/service/copy_insertion_test.cc        | 183 -----------------
 tensorflow/compiler/xla/service/hlo.proto     |  29 ---
 .../xla/service/hlo_alias_analysis.cc         |  46 +----
 .../xla/service/hlo_alias_analysis_test.cc    | 175 -----------------
 .../xla/service/hlo_dataflow_analysis.cc      |   2 +-
 .../service/hlo_input_output_alias_config.cc  | 172 ----------------
 .../service/hlo_input_output_alias_config.h   | 101 ----------
 .../hlo_input_output_alias_config_test.cc     | 184 ------------------
 tensorflow/compiler/xla/service/hlo_module.cc |   9 -
 tensorflow/compiler/xla/service/hlo_module.h  |  14 --
 .../compiler/xla/service/hlo_verifier.cc      |   2 -
 tensorflow/compiler/xla/shape_util.h          |   2 +-
 16 files changed, 25 insertions(+), 1037 deletions(-)
 delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
 delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
 delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 26ebb88e96..2b292ed053 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -294,7 +294,6 @@ cc_library(
     srcs = [
         "dfs_hlo_visitor.cc",
         "hlo_computation.cc",
-        "hlo_input_output_alias_config.cc",
         "hlo_instruction.cc",
         "hlo_instructions.cc",
         "hlo_module.cc",
@@ -309,7 +308,6 @@ cc_library(
         "hlo_clone_context.h",
         "hlo_computation.h",
         "hlo_domain_metadata.h",
-        "hlo_input_output_alias_config.h",
         "hlo_instruction.h",
         "hlo_instructions.h",
         "hlo_module.h",
@@ -1270,25 +1268,6 @@ tf_cc_test(
     ],
 )
 
-tf_cc_test(
-    name = "hlo_input_output_alias_config_test",
-    srcs = ["hlo_input_output_alias_config_test.cc"],
-    deps = [
-        ":hlo",
-        ":hlo_dce",
-        ":hlo_memory_scheduler",
-        ":hlo_ordering",
-        ":hlo_parser",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "//tensorflow/core:test",
-        "@com_google_absl//absl/algorithm:container",
-    ],
-)
-
 cc_library(
     name = "hlo_memory_scheduler",
     srcs = ["hlo_memory_scheduler.cc"],
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index d5d6a044a8..2c2d1626c2 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice(
 
 void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset,
                                      int64 size) {
-  VLOG(4) << "Trying to add " << buffer << " to allocation #" << index();
+  VLOG(4) << "Trying to add " << buffer << " to " << this;
   CHECK(assigned_buffers_.count(&buffer) == 0)
       << "LogicalBuffer " << buffer << " already assigned to allocation "
       << index_;
@@ -784,6 +784,21 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
     }
   }
 
+  if (allow_input_output_aliasing_ && allocation->maybe_live_out()) {
+    const HloComputation* entry_computation =
+        assignment->module_->entry_computation();
+    for (auto param : entry_computation->parameter_instructions()) {
+      for (auto& param_buffer :
+           assignment->points_to_analysis().GetBuffersDefinedByInstruction(
+               param)) {
+        if (assignment->liveness().MayInterfere(*param_buffer, buffer)) {
+          VLOG(4) << "Can't assign: Parameter interference with result";
+          return false;
+        }
+      }
+    }
+  }
+
   // If the buffer is live out of the computation then it should only be
   // assigned a buffer which exactly fits the result to avoid wasting memory
   // (result buffers can have arbitrary lifetimes).
@@ -1419,28 +1434,13 @@ BufferAssigner::MergeColocatedBufferSets(
 
 // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated
 // in the same allocation (currently just supports kWhile, kCall, and
-// kConditional and input output aliasing).
+// kConditional).
 void BufferAssigner::BuildColocatedBufferSets(
     const HloModule* module, const BufferLiveness& buffer_liveness,
     const LogicalBuffer::SizeFunction& buffer_size,
     std::vector<ColocatedBufferSet>* colocated_buffer_sets) {
   const TuplePointsToAnalysis& points_to_analysis =
       buffer_liveness.points_to_analysis();
-
-  // Set up colocated buffer set for input and output.
-  module->input_output_alias_config().ForEachAlias(
-      [&](const ShapeIndex& output_index, int64 param_number,
-          const ShapeIndex& param_index) {
-        std::vector<const LogicalBuffer*> colocated_set;
-        AddBufferToColocatedSet(module->entry_computation()->root_instruction(),
-                                output_index, points_to_analysis,
-                                &colocated_set);
-        AddBufferToColocatedSet(
-            module->entry_computation()->parameter_instruction(param_number),
-            param_index, points_to_analysis, &colocated_set);
-        AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
-      });
-
   for (const HloComputation* computation : module->MakeComputationPostOrder()) {
     if (computation->IsFusionComputation()) {
       continue;
diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h
index 11d8abc5ba..69b3646356 100644
--- a/tensorflow/compiler/xla/service/buffer_value.h
+++ b/tensorflow/compiler/xla/service/buffer_value.h
@@ -141,9 +141,6 @@ class BufferValue {
   // operator< is required for std::set.
   bool operator<(const BufferValue& other) const { return id_ < other.id_; }
 
-  bool operator==(const BufferValue& other) const { return id_ == other.id_; }
-  bool operator!=(const BufferValue& other) const { return id_ != other.id_; }
-
   virtual string ToString() const = 0;
 
   // TODO(lauj) rename LogicalBufferProto to BufferValueProto.
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index cfe025fdd1..f35324aa35 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -40,12 +40,10 @@ namespace {
 
 using absl::StrAppend;
 
-bool IsReadonlyEntryParameterValue(const HloValue& value) {
+bool IsEntryParameterValue(const HloValue& value) {
   const HloComputation* computation = value.defining_instruction()->parent();
   return value.defining_instruction()->opcode() == HloOpcode::kParameter &&
-         computation == computation->parent()->entry_computation() &&
-         !computation->parent()->input_output_alias_config().ParameterHasAlias(
-             value.defining_instruction()->parameter_number());
+         computation == computation->parent()->entry_computation();
 }
 
 bool IsConstantValue(const HloValue& value) {
@@ -53,7 +51,7 @@ bool IsConstantValue(const HloValue& value) {
 }
 
 bool ValueIsReadOnly(const HloValue& value) {
-  return IsConstantValue(value) || IsReadonlyEntryParameterValue(value);
+  return IsConstantValue(value) || IsEntryParameterValue(value);
 }
 
 // Data structure describing the action which should be taken on parts of a
@@ -334,81 +332,6 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis,
   return Status::OK();
 }
 
-// Conservatively adds copies before root instruction of entry computation and
-// each aliased parameter to resolve interference of aliased input and output
-// buffer. We later rely on the CopyRemover to drop the unnecessary ones.
-Status AddCopiesForAliasedInputOutputs(HloModule* module) {
-  HloComputation* entry = module->entry_computation();
-  HloInstruction* root = entry->root_instruction();
-
-  ShapeTree<bool> output_indices_to_copy(root->shape());
-  std::vector<ShapeTree<HloInstruction*>> copied_parameters;
-  bool has_alias = false;
-  for (auto* param : entry->parameter_instructions()) {
-    bool param_has_alias = false;
-    ShapeTree<bool> param_indices_to_copy(param->shape());
-
-    module->input_output_alias_config().ForEachAlias(
-        [&](const ShapeIndex& output_index, int64 param_number,
-            const ShapeIndex& param_index) {
-          if (param_number == param->parameter_number()) {
-            param_has_alias = true;
-            *(param_indices_to_copy.mutable_element(param_index)) = true;
-            *(output_indices_to_copy.mutable_element(output_index)) = true;
-          }
-        });
-
-    if (!param_has_alias) {
-      continue;
-    }
-
-    has_alias = true;
-    // Store a snapshot of users before DeepCopyInstruction, as
-    // DeepCopyInstruction introduces new users of the instruction.
-    std::vector<HloInstruction*> users = param->users();
-    ShapeTree<HloInstruction*> param_copy_tree(param->shape(),
-                                               /*init_value=*/nullptr);
-    TF_ASSIGN_OR_RETURN(HloInstruction * copied,
-                        entry->DeepCopyInstruction(
-                            param, &param_indices_to_copy, &param_copy_tree));
-    for (HloInstruction* user : users) {
-      TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied));
-    }
-
-    copied_parameters.push_back(param_copy_tree);
-  }
-
-  if (!has_alias) {
-    return Status::OK();
-  }
-
-  // Add copies before root instruction.
-  ShapeTree<HloInstruction*> output_copy_tree(root->shape(),
-                                              /*init_value=*/nullptr);
-
-  TF_ASSIGN_OR_RETURN(HloInstruction * root_copied,
-                      root->parent()->DeepCopyInstruction(
-                          root, &output_indices_to_copy, &output_copy_tree));
-
-  // Add control dependencies between the input/output copies.
-  TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus(
-      [&](const ShapeIndex& output_index, int64 param_number,
-          const ShapeIndex& input_index) -> Status {
-        HloInstruction* from =
-            copied_parameters[param_number].element(input_index);
-        HloInstruction* to = output_copy_tree.element(output_index);
-
-        TF_RET_CHECK(from != nullptr);
-        TF_RET_CHECK(to != nullptr);
-        TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to));
-        return Status::OK();
-      }));
-
-  entry->set_root_instruction(root_copied);
-
-  return Status::OK();
-}
-
 // Removes any control dependencies to or from the given instruction.
 Status StripControlDependenciesFrom(HloInstruction* instruction) {
   while (!instruction->control_successors().empty()) {
@@ -1030,8 +953,6 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) {
       }
     }
   }
-
-  TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 3096206c34..892d0d7b54 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -1351,189 +1351,6 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) {
   EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy()));
 }
 
-TEST_F(CopyInsertionTest, CrossingParameters) {
-  // Test a case where two parameters' dataflow cross with each other while
-  // input and output are aliased with same index:
-  //
-  //  (p0 ,  p1)
-  //   | \   /|
-  //   |  \ / |
-  // alias X  alias
-  //   |  / \ |
-  //   | /   \|
-  //  (p1  ,  p0)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-  InsertCopies(module.get());
-
-  EXPECT_EQ(CountCopies(*module), 4);
-}
-
-TEST_F(CopyInsertionTest, ParametersAliasing) {
-  // Test a case where two parameters' dataflow don't interfere with each other
-  // while aliased.
-  //
-  //  (p0 ,  p1)
-  //   |      |
-  //   |      |
-  // alias   alias
-  //   |      |
-  //   |      |
-  //  (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-  InsertCopies(module.get());
-
-  EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
-                        op::Copy(op::GetTupleElement(param, 1))));
-
-  EXPECT_EQ(CountCopies(*module), 2);
-}
-
-TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) {
-  // Test a case where one parameter is aliased with result while another one
-  // isn't.
-  //
-  //  (p0 ,  p1)
-  //   |      |
-  //   |      |
-  // alias    |
-  //   |      |
-  //   |      |
-  //  (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  InsertCopies(module.get());
-
-  EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
-                        op::Copy(op::GetTupleElement(param, 1))));
-
-  EXPECT_EQ(CountCopies(*module), 2);
-}
-
-TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) {
-  // Test a case where one parameter is aliased with result while another one
-  // isn't.
-  //
-  //   +-- (p0 ,  p1)
-  //   |    |      |
-  //   |    |      |
-  // alias Negate  Negate
-  //   |    |      |
-  //   |    |      |
-  //   +-- (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-
-  auto negate0 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
-
-  auto negate1 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
-  builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  InsertCopies(module.get());
-
-  EXPECT_EQ(CountCopies(*module), 0);
-}
-
-TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) {
-  // Test a case where one parameter is aliased with result while another one
-  // isn't.
-  //
-  //   +-- (p0 ,  p1)
-  //   |    |      |
-  //   |    |      |
-  // alias Negate  Negate
-  //   |    |      |
-  //   |    Add----+
-  //   |    |      |
-  //   +-- (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-
-  auto negate0 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
-
-  auto negate1 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
-
-  auto add = builder.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape_, HloOpcode::kAdd, negate0, negate1));
-  builder.AddInstruction(HloInstruction::CreateTuple({add, negate1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  InsertCopies(module.get());
-
-  EXPECT_EQ(CountCopies(*module), 0);
-}
-
 TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) {
   // Test a while instruction with a body which permutes its tuple parameter
   // elements and applies one operation to one of the elements. The addition of
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 82c8fb1904..a0eb9e6ddc 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -225,32 +225,6 @@ message HloScheduleProto {
   map<int64, InstructionSequence> sequences = 1;
 }
 
-message HloInputOutputAliasProto {
-  // The following proto describes a pair of aliased an input
-  // (described by parameter number and a ShapeIndex of the parameter)
-  // and an output (described by a ShapeIndex of the root
-  // instruction). For example:
-  //
-  // entry = {
-  //  output_shape_index={1},
-  //  parameter_number=0,
-  //  parameter_shape_index={1, 2},
-  // }
-  //
-  // This entry indicates that the first paremter's {1, 2} element is
-  // aliased with the {1} element of the root instruction.
-  message AliasEntryProto {
-    // ShapeIndex of the root hlo.
-    repeated int64 output_shape_index = 1;
-    // Number of the parameter in entry computation.
-    int64 parameter_number = 2;
-    // ShapeIndex of the parameter instruction.
-    repeated int64 parameter_shape_index = 3;
-  }
-
-  repeated AliasEntryProto entries = 1;
-}
-
 // Serialization of HloModule.
 message HloModuleProto {
   string name = 1;
@@ -269,9 +243,6 @@ message HloModuleProto {
 
   // The schedule for this module.
   HloScheduleProto schedule = 7;
-
-  // Describes alias information between inputs and outputs.
-  HloInputOutputAliasProto input_output_alias = 8;
 }
 
 // Serialization of LogicalBuffer.
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index cf8e6594cb..c3da12e273 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -59,9 +59,8 @@ class BufferValueMap {
   // construction process.
   using BufferNumber = int64;
 
-  explicit BufferValueMap(HloModule* module,
-                          const HloDataflowAnalysis& dataflow)
-      : module_(module), dataflow_(dataflow) {
+  explicit BufferValueMap(const HloDataflowAnalysis& dataflow)
+      : dataflow_(dataflow) {
     buffers_.reserve(dataflow_.values().size());
     value_to_buffer_number_.reserve(dataflow_.values().size());
     for (const HloValue* value : dataflow_.values()) {
@@ -172,42 +171,6 @@ class BufferValueMap {
     return value_to_buffer_number_.at(&value);
   }
 
-  void ComputeInputOutputAliasedBuffers(
-      const HloValue& value, std::vector<BufferNumber>* aliased_buffers) {
-    // Get parameter value from an aliased_input object.
-    const auto get_parameter_value =
-        [this](const std::pair<int64, ShapeIndex>& aliased_input)
-        -> const HloValue& {
-      int64 param_number = aliased_input.first;
-      const ShapeIndex& param_index = aliased_input.second;
-      return dataflow_.GetUniqueValueAt(
-          module_->entry_computation()->parameter_instruction(param_number),
-          param_index);
-    };
-
-    // If the value shows up in a root instruction, alias it with parameter
-    // intruction.
-    for (const HloPosition& pos : value.positions()) {
-      if (pos.instruction == module_->entry_computation()->root_instruction()) {
-        ShapeIndex output_index = pos.index;
-
-        auto aliased_input =
-            module_->input_output_alias_config().GetAliasedParameter(
-                output_index);
-        if (aliased_input) {
-          aliased_buffers->push_back(
-              GetBufferForValue(get_parameter_value(*aliased_input)));
-        }
-      }
-    }
-
-    // If the value is parameter instruction itself, alias it with itself.
-    if (value.instruction()->opcode() == HloOpcode::kParameter &&
-        value.instruction()->parent() == module_->entry_computation()) {
-      aliased_buffers->push_back(GetBufferForValue(value));
-    }
-  }
-
   void ComputeWhileAliasedBuffers(const HloValue& value,
                                   std::vector<BufferNumber>* aliased_buffers) {
     VLOG(3) << "Compute kWhile aliases";
@@ -315,7 +278,6 @@ class BufferValueMap {
       VLOG(2) << "Use of value " << value.ToShortString() << ": " << use;
     }
     std::vector<BufferNumber> aliased_buffers;
-    ComputeInputOutputAliasedBuffers(value, &aliased_buffers);
     ComputeWhileAliasedBuffers(value, &aliased_buffers);
     ComputeConditionalAliasedBuffers(value, &aliased_buffers);
     // Uniquify aliased buffers.
@@ -326,8 +288,6 @@ class BufferValueMap {
     return aliased_buffers;
   }
 
-  HloModule* module_;
-
   // Dataflow analysis used to construct the buffer map.
   const HloDataflowAnalysis& dataflow_;
 
@@ -501,7 +461,7 @@ StatusOr<std::unique_ptr<HloAliasAnalysis>> HloAliasAnalysis::Run(
                                                /*bitcast_defines_value=*/false,
                                                fusion_can_share_buffer));
 
-  BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis());
+  BufferValueMap buffer_map(alias_analysis->dataflow_analysis());
   buffer_map.MergeAliasedBuffers();
 
   // Create a vector of HloBuffers, one for each set of values in the
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
index 5c8d97b2d1..0cd0ab36fc 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
@@ -217,181 +217,6 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) {
   EXPECT_FALSE(AnyValuesInSameBufferInterfere());
 }
 
-TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) {
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-
-  auto negate0 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
-  auto negate1 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
-
-  auto tuple =
-      builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
-  module_->AddEntryComputation(builder.Build());
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-
-  // Cannot alias an output twice.
-  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
-
-  const HloAliasAnalysis& analysis = RunAnalysis();
-
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
-
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
-}
-
-TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) {
-  // parameter 0 aliased with output 1 and parameter 1 aliased with output 0.
-  //
-  //  (p0 ,  p1)
-  //     \   /
-  //      \ /
-  // alias X
-  //      / \
-  //     /   \
-  //  (p0  ,  p1)
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  auto tuple =
-      builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
-  module_->AddEntryComputation(builder.Build());
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1}));
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
-
-  // Cannot alias an output twice.
-  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-
-  const HloAliasAnalysis& analysis = RunAnalysis();
-
-  // Every Ops in this graph are aliased with each other.
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
-
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
-}
-
-TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) {
-  // Test a simple single while instruction can be aliased with input and output
-  // of the computation.
-  //
-  // body((F32[], F32[]) %tuple_param):
-  //   %add = Add(%tuple_param{0}, %tuple_param{1})
-  //   return Tuple(%tuple_param{0}, %add)
-  //
-  // condition((F32[], F32[]) %tuple_param):
-  //   return Constant(false)
-  //
-  // entry:
-  //   %param1 = param1
-  //   %while = While(%param1, body, condition)
-  //   %while_1 = GTE(%while, 0)
-  //   %while_2 = GTE(%while, 1)
-  //   %negate_1 = Negate(%while_1)
-  //   %negate_2 = Negate(%while_2)
-  //   return Tuple(negate_1, negate_2)
-  //
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  // Element 0 passes transparently through the body.
-  auto body_builder = HloComputation::Builder("body");
-  auto body_param = body_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "param"));
-  auto body_element_0 = body_builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
-  auto body_element_1 = body_builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
-  auto add = body_builder.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1));
-  auto body_tuple = body_builder.AddInstruction(
-      HloInstruction::CreateTuple({body_element_0, add}));
-  HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build());
-
-  // Condition computation trivially returns a constant "false".
-  auto cond_builder = HloComputation::Builder("condition");
-  auto cond_param = cond_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "param"));
-  cond_builder.AddInstruction(
-      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(false)));
-  HloComputation* condition =
-      module_->AddEmbeddedComputation(cond_builder.Build());
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-
-  auto xla_while = builder.AddInstruction(
-      HloInstruction::CreateWhile(tuple_shape, condition, body, param));
-  auto while_element_1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0));
-  auto while_element_2 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1));
-  auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary(
-      scalar_shape_, HloOpcode::kNegate, while_element_1));
-  auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      scalar_shape_, HloOpcode::kNegate, while_element_2));
-  auto tuple =
-      builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2}));
-  module_->AddEntryComputation(builder.Build());
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-
-  const HloAliasAnalysis& analysis = RunAnalysis();
-
-  EXPECT_THAT(
-      GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})),
-      UnorderedElementsAre(GetValueDefinedAt(param, {1}),
-                           GetValueDefinedAt(xla_while, /*index=*/{1}),
-                           GetValueDefinedAt(body_param, {1}),
-                           GetValueDefinedAt(cond_param, {1}),
-                           GetValueDefinedAt(add),
-                           GetValueDefinedAt(negate_2)));
-
-  EXPECT_THAT(
-      analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(),
-      UnorderedElementsAre(
-          HloPosition{param, {1}}, HloPosition{xla_while, {1}},
-          HloPosition{while_element_2, {}}, HloPosition{body_param, {1}},
-          HloPosition{body_element_1, {}}, HloPosition{add, {}},
-          HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}},
-          HloPosition{cond_param, {1}}, HloPosition{negate_2, {}}));
-
-  EXPECT_FALSE(AnyValuesInSameBufferInterfere());
-}
-
 TEST_F(HloAliasAnalysisTest, SingleCall) {
   // Test a single call of a subcomputation. The subcomputation adds its two
   // array-shaped parameters.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index f401eac016..c22adcdd8d 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction,
 
 const HloValue& HloDataflowAnalysis::GetValueDefinedAt(
     const HloInstruction* instruction, const ShapeIndex& index) const {
-  CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString();
+  CHECK(ValueIsDefinedAt(instruction, index));
   return GetUniqueValueAt(instruction, index);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
deleted file mode 100644
index 9ad98e5038..0000000000
--- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
-#include "tensorflow/compiler/xla/service/hlo_module.h"
-
-namespace xla {
-Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index,
-                                             int64 param_number,
-                                             const ShapeIndex& param_index) {
-  // Output can't be aliased with multiple parameters.
-  TF_RET_CHECK(!alias_.element(output_index));
-  (*alias_.mutable_element(output_index)) =
-      std::make_pair(param_number, param_index);
-  return Status::OK();
-}
-
-HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const {
-  HloInputOutputAliasProto result;
-  alias_.ForEachElement(
-      [&](const ShapeIndex& index,
-          const absl::optional<std::pair<int64, ShapeIndex>>& data) {
-        if (data) {
-          HloInputOutputAliasProto::AliasEntryProto entry;
-          for (int64 i : index) {
-            entry.add_output_shape_index(i);
-          }
-          entry.set_parameter_number(data->first);
-          for (int64 i : data->second) {
-            entry.add_parameter_shape_index(i);
-          }
-          result.add_entries()->Swap(&entry);
-        }
-      });
-  return result;
-}
-
-StatusOr<HloInputOutputAliasConfig> HloInputOutputAliasConfig::CreateFromProto(
-    const HloModule* module, const HloInputOutputAliasProto& proto) {
-  HloInputOutputAliasConfig result(
-      module->entry_computation()->root_instruction()->shape());
-  for (const HloInputOutputAliasProto::AliasEntryProto& entry :
-       proto.entries()) {
-    ShapeIndex output_index(entry.output_shape_index().begin(),
-                            entry.output_shape_index().end());
-
-    int64 param_number = entry.parameter_number();
-    ShapeIndex param_index(entry.parameter_shape_index().begin(),
-                           entry.parameter_shape_index().end());
-    TF_RETURN_IF_ERROR(
-        result.SetUpAlias(output_index, param_number, param_index));
-  }
-
-  return result;
-}
-
-string HloInputOutputAliasConfig::ToString() const {
-  std::vector<string> pieces;
-  pieces.push_back("HloInputOutputAliasConfig");
-
-  ForEachAlias([&](const ShapeIndex& output_index, int64 param_number,
-                   const ShapeIndex& param_index) {
-    pieces.push_back(absl::StrFormat(
-        "  OutputIndex %s is aliased with parameter %lld at %s:",
-        output_index.ToString(), param_number, param_index.ToString()));
-  });
-
-  return absl::StrJoin(pieces, "\n");
-}
-
-bool HloInputOutputAliasConfig::ParameterHasAlias(int64 param_number) const {
-  bool output = false;
-  alias_.ForEachElement(
-      [&](const xla::ShapeIndex&,
-          absl::optional<std::pair<int64, ShapeIndex>> alias) {
-        if (alias && alias->first == param_number) {
-          output = true;
-        }
-      });
-  return output;
-}
-
-absl::optional<ShapeIndex> HloInputOutputAliasConfig::GetAliasedOutput(
-    int64 param_number, const ShapeIndex& param_index) const {
-  absl::optional<ShapeIndex> output;
-  alias_.ForEachElement(
-      [&](const xla::ShapeIndex& output_index,
-          absl::optional<std::pair<int64, ShapeIndex>> alias) {
-        if (alias && alias->first == param_number &&
-            alias->second == param_index) {
-          output = output_index;
-        }
-      });
-  return output;
-}
-
-absl::optional<std::pair<int64, ShapeIndex>>
-HloInputOutputAliasConfig::GetAliasedParameter(
-    const ShapeIndex& output_index) const {
-  CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index));
-  return alias_.element(output_index);
-}
-
-void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const {
-  alias_.ForEachElement(
-      [&](const ShapeIndex& output_index,
-          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
-        if (aliased) {
-          fn(output_index, aliased->first, aliased->second);
-        }
-      });
-}
-
-Status HloInputOutputAliasConfig::ForEachAliasWithStatus(
-    AliasFnWithStatus fn) const {
-  return alias_.ForEachElementWithStatus(
-      [&](const ShapeIndex& output_index,
-          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
-        if (aliased) {
-          TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second));
-        }
-        return Status::OK();
-      });
-}
-
-Status HloInputOutputAliasConfig::Verify(const HloModule& module) const {
-  std::vector<ShapeTree<bool>> param_has_seen;
-  const HloComputation* entry = module.entry_computation();
-  for (int64 i = 0; i < entry->num_parameters(); ++i) {
-    HloInstruction* param = entry->parameter_instruction(i);
-    param_has_seen.emplace_back(param->shape());
-  }
-  return ForEachAliasWithStatus([&](const ShapeIndex& output_index,
-                                    int64 param_number,
-                                    const ShapeIndex& param_index) -> Status {
-    const HloInstruction* root = entry->root_instruction();
-
-    const Shape& param_shape =
-        entry->parameter_instruction(param_number)->shape();
-    const Shape& output_shape = root->shape();
-    TF_RET_CHECK(entry->num_parameters() > param_number);
-    TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index));
-    TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index));
-
-    // Check each param_number and param_index pair only show up once. No
-    // input can be aliased with output buffers.
-    TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false);
-
-    *(param_has_seen[param_number].mutable_element(param_index)) = true;
-
-    return Status::OK();
-  });
-}
-
-std::ostream& operator<<(std::ostream& out,
-                         const HloInputOutputAliasConfig& config) {
-  out << config.ToString();
-  return out;
-}
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
deleted file mode 100644
index 02c46f65c8..0000000000
--- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
-
-#include <utility>
-
-#include "absl/types/optional.h"
-#include "tensorflow/compiler/xla/service/hlo.pb.h"
-#include "tensorflow/compiler/xla/shape_tree.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-
-namespace xla {
-
-class HloModule;
-
-// This class specifies the alias map from output index to parameter number and
-// parameter index in the entry computation.
-class HloInputOutputAliasConfig {
- public:
-  HloInputOutputAliasConfig() = default;
-
-  explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {}
-
-  virtual ~HloInputOutputAliasConfig() = default;
-
-  // Sets up alias config from `output_index` to `param_index` at
-  // `param_number`.
-  Status SetUpAlias(const ShapeIndex& output_index, int64 param_number,
-                    const ShapeIndex& param_index);
-
-  // Returns true if the given parameter is aliased with one of the output
-  // buffers.
-  bool ParameterHasAlias(int64 param_number) const;
-
-  // (De)Serializes an HloInputOutoutAliasConfig to/from an
-  // HloInputOutoutAliasProto.
-  HloInputOutputAliasProto ToProto() const;
-
-  static StatusOr<HloInputOutputAliasConfig> CreateFromProto(
-      const HloModule* module, const HloInputOutputAliasProto& proto);
-
-  // Returns the output index that the given parameter and parameter index is
-  // aliased with. A nullopt is returned if there is no output that is aliased
-  // with the parameter number and index.
-  absl::optional<ShapeIndex> GetAliasedOutput(
-      int64 param_number, const ShapeIndex& param_index) const;
-
-  // Returns the number of parameter and index of the parameter buffer that the
-  // given output buffer index is aliased with. A nullopt is returned if there
-  // is no parameter is aliased with the specific output.
-  absl::optional<std::pair<int64, ShapeIndex>> GetAliasedParameter(
-      const ShapeIndex& output_index) const;
-
-  using AliasFn =
-      std::function<void(const ShapeIndex& output_index, int64 param_number,
-                         const ShapeIndex& param_index)>;
-
-  // Iterates through each aliased output and input.
-  void ForEachAlias(AliasFn fn) const;
-
-  using AliasFnWithStatus =
-      std::function<Status(const ShapeIndex& output_index, int64 param_number,
-                           const ShapeIndex& param_index)>;
-
-  // Verifies that the given config is valid for the given module.
-  // Specifically, the config's input and output should be in-bound and size of
-  // the aliased buffers should match.
-  Status Verify(const HloModule& module) const;
-
-  Status ForEachAliasWithStatus(AliasFnWithStatus fn) const;
-
-  string ToString() const;
-
- private:
-  // A ShapeTree which indicates the list of buffers that's expected to be
-  // aliased. The key on this shape tree represents the output index. The value
-  // is a pair of parameter number and index into the buffer. If the value is
-  // nullopt, it means there is no parameter aliasing for this output.
-  ShapeTree<absl::optional<std::pair<int64, ShapeIndex>>> alias_;
-};
-
-std::ostream& operator<<(std::ostream& out,
-                         const HloInputOutputAliasConfig& config);
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
deleted file mode 100644
index 3b61ff04e6..0000000000
--- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
-
-#include <memory>
-#include <string>
-
-#include "absl/algorithm/container.h"
-#include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/service/hlo_dce.h"
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
-#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h"
-#include "tensorflow/compiler/xla/service/hlo_opcode.h"
-#include "tensorflow/compiler/xla/service/hlo_ordering.h"
-#include "tensorflow/compiler/xla/service/hlo_parser.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
-#include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-
-namespace xla {
-namespace {
-class HloInputOutputAliasConfigTest : public HloTestBase {
- protected:
-  void expect_aliased(const ShapeIndex& output_index, int64 param_number,
-                      const ShapeIndex& param_index,
-                      const HloInputOutputAliasConfig& config) {
-    absl::optional<ShapeIndex> aliased_output =
-        config.GetAliasedOutput(param_number, param_index);
-
-    EXPECT_TRUE(aliased_output);
-    EXPECT_EQ(aliased_output.value(), output_index);
-
-    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
-        config.GetAliasedParameter(output_index);
-
-    EXPECT_TRUE(aliased_param);
-    EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index));
-  }
-
-  void expect_not_aliased(const ShapeIndex& output_index, int64 param_number,
-                          const ShapeIndex& param_index,
-                          const HloInputOutputAliasConfig& config) {
-    absl::optional<ShapeIndex> aliased_output =
-        config.GetAliasedOutput(param_number, param_index);
-
-    EXPECT_FALSE(aliased_output && aliased_output == output_index);
-
-    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
-        config.GetAliasedParameter(output_index);
-
-    EXPECT_FALSE(aliased_param && aliased_param->first == param_number &&
-                 aliased_param->second == param_index);
-  }
-};
-
-TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  a = f32[] parameter(0)
-  b = f32[] parameter(1)
-  ROOT root = (f32[], f32[]) tuple(%a, %b)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
-                                 /*param_index=*/{}));
-
-  expect_aliased(/*output_index=*/{0}, /*param_number=*/1,
-                 /*param_index=*/{}, config);
-
-  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
-                     /*param_index=*/{}, config);
-
-  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
-                     /*param_index=*/{}, config);
-}
-
-TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  param = (f32[], f32[]) parameter(0)
-  gte1 = f32[] get-tuple-element(%param), index=0
-  gte2 = f32[] get-tuple-element(%param), index=1
-  ROOT root = (f32[], f32[]) tuple(%gte1, %gte2)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
-                                 /*param_index=*/{0}));
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
-                                 /*param_index=*/{1}));
-
-  expect_aliased(/*output_index=*/{0}, /*param_number=*/0,
-                 /*param_index=*/{0}, config);
-
-  expect_aliased(/*output_index=*/{1}, /*param_number=*/0,
-                 /*param_index=*/{1}, config);
-
-  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
-                     /*param_index=*/{}, config);
-
-  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
-                     /*param_index=*/{}, config);
-}
-
-TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  a = f32[] parameter(0)
-  b = f32[] parameter(1)
-  ROOT root = (f32[], f32[]) tuple(%a, %b)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
-                                 /*param_index=*/{}));
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
-                                 /*param_index=*/{}));
-
-  ASSERT_IS_NOT_OK(config.Verify(*module));
-}
-
-TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  a = f32[] parameter(0)
-  b = f32[] parameter(1)
-  ROOT root = (f32[], f32[]) tuple(%a, %b)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
-                                 /*param_index=*/{}));
-
-  ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
-                                     /*param_index=*/{}));
-}
-}  // namespace
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 547f74a0ed..93e04eb3db 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -73,8 +73,6 @@ HloComputation* HloModule::AddComputationInternal(
       config_.SetDefaultComputationLayout(
           entry_computation_->ComputeProgramShape());
     }
-    input_output_alias_config_ = HloInputOutputAliasConfig(
-        entry_computation_->root_instruction()->shape());
   }
 
   if (uniquify_identifiers) {
@@ -254,9 +252,6 @@ HloModuleProto HloModule::ToProto() const {
   if (has_schedule()) {
     *proto.mutable_schedule() = schedule().ToProto().ValueOrDie();
   }
-
-  *proto.mutable_input_output_alias() = input_output_alias_config().ToProto();
-
   return proto;
 }
 
@@ -333,10 +328,6 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
   }
   TF_RET_CHECK(module->entry_computation_ != nullptr);
 
-  TF_ASSIGN_OR_RETURN(module->input_output_alias_config_,
-                      HloInputOutputAliasConfig::CreateFromProto(
-                          module.get(), proto.input_output_alias()));
-
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
   absl::flat_hash_set<string> computation_names;
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 9b9dc3ba9f..735804e827 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -31,7 +31,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_clone_context.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
@@ -213,15 +212,6 @@ class HloModule {
     return result;
   }
 
-  // input_output_alias_config indicates the list of aliased buffers that are
-  // expected from the module.
-  HloInputOutputAliasConfig& input_output_alias_config() {
-    return input_output_alias_config_;
-  }
-  const HloInputOutputAliasConfig& input_output_alias_config() const {
-    return input_output_alias_config_;
-  }
-
   // Returns the number of unique intruction ids given out.  All ids up to
   // this point are guaranteed to be in the range [0..NumUniqueInstructionIds())
   int NumUniqueInstructionIds() const { return next_unique_id_; }
@@ -294,10 +284,6 @@ class HloModule {
   // sequential order of instructions for each non-fusion computation in the
   // module.
   absl::optional<HloSchedule> schedule_;
-
-  // alias_config indicates the alias information of input/output buffers that
-  // are expected from the module.
-  HloInputOutputAliasConfig input_output_alias_config_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 2902a11a42..be3bee5975 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1220,8 +1220,6 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     TF_RETURN_IF_ERROR(module->schedule().Verify());
   }
 
-  TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module));
-
   return false;
 }
 
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 51cedce7f0..73f541d505 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -72,7 +72,7 @@ class ShapeIndex {
   void push_back(int64 value) { indices_.push_back(value); }
   void pop_back() { indices_.pop_back(); }
 
-  // push_front is O(n), but shapes don't usually have a ton of dimensions.
+  // push_front is O(n^2), but shapes don't usually have a ton of dimensions.
   void push_front(int64 value) { indices_.insert(indices_.begin(), value); }
 
   using container_type = absl::InlinedVector<int64, 2>;
-- 
GitLab


From a9a44b070bf639ee9bd60f0fd21157a297cd7f82 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 07:41:42 -0700
Subject: [PATCH 1289/1357] Removed unused load statements from the core BUILD.

PiperOrigin-RevId: 216354906
---
 tensorflow/core/BUILD | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 900a0e11c4..acea8e2217 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -73,12 +73,10 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "cc_header_only_library",
-    "full_path",
     "if_android",
     "if_ios",
     "if_linux_x86_64",
     "if_mobile",
-    "if_not_mobile",
     "if_not_windows",
     "if_windows",
     "tf_cc_test",
-- 
GitLab


From a0ed9452d5c7f897e26788d8dca5164cb6fba023 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 07:54:16 -0700
Subject: [PATCH 1290/1357] Fixing Toco for exporting graphs with strings

If the graph contains not constant array with strings it fails because the
array's size can't be estimated.

PiperOrigin-RevId: 216356162
---
 tensorflow/contrib/lite/toco/tooling_util.cc | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index e3f27e9e2a..083a96ad9d 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -1237,11 +1237,15 @@ void DedupeConstantArrays(Model* model, size_t min_size) {
         lhs_array.final_data_type != ArrayDataType::kNone
             ? lhs_array.final_data_type
             : lhs_array.data_type;
-    size_t array_byte_size =
-        lhs_array.buffer->Length() * ElementSize(final_data_type);
-    if (array_byte_size < min_size) {
-      // Too small; skip.
-      continue;
+    // Ignore small arrays, don't check string arrays because it is not possible
+    // to estimate its size.
+    if (final_data_type != ArrayDataType::kString) {
+      size_t array_byte_size =
+          lhs_array.buffer->Length() * ElementSize(final_data_type);
+      if (array_byte_size < min_size) {
+        // Too small; skip.
+        continue;
+      }
     }
 
     auto next_lhs_array_it = lhs_array_it;
-- 
GitLab


From cadcacc6224bcbb8a05bf3b70d625d9024a9c0f3 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 9 Oct 2018 08:16:49 -0700
Subject: [PATCH 1291/1357] Allowing for mixture of V1 and V2 feature columns
 usage in canned estimators. This is required for TF hub use cases where users
 might send in new feature columns to old model code. Implemented this support
 by making V2 feature columns support the V1 API. This is needed temporarily
 and would definitely be removed by TF 2.0, possibly earlier depending on what
 guarantees are provided by TF hub.

The only case we don't allow here is mixing in V2 shared embedding columns with V1 Feature columns. V2 Shared FC's depend on a SharedEmbeddingState manager that would have to be passed in to the various API's and there wasn't really a very clean way to make that work.

Mixing V2 feature columns with V1 shared embedding columns is fine though and along with all other combinations

PiperOrigin-RevId: 216359041
---
 .../canned/dnn_linear_combined_test.py        |  107 +-
 .../estimator/canned/dnn_testing_utils.py     |  109 +
 .../estimator/canned/linear_testing_utils.py  |   64 +
 tensorflow/python/feature_column/BUILD        |    1 +
 .../python/feature_column/feature_column.py   |    4 +
 .../feature_column/feature_column_v2.py       |  869 ++++-
 .../feature_column/feature_column_v2_test.py  | 3294 ++++++++++++++---
 7 files changed, 3772 insertions(+), 676 deletions(-)

diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
index ae968e717a..ab945d7b1a 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
@@ -317,16 +317,10 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
       writer_cache.FileWriterCache.clear()
       shutil.rmtree(self._model_dir)
 
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, batch_size,
-                          fc_impl):
-    linear_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
+  def _test_complete_flow_helper(
+      self, linear_feature_columns, dnn_feature_columns, feature_spec,
+      train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
     est = dnn_linear_combined.DNNLinearCombinedRegressor(
         linear_feature_columns=linear_feature_columns,
         dnn_hidden_units=(2, 2),
@@ -351,14 +345,63 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, label_dimension), predictions.shape)
 
     # EXPORT
-    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self, fc_impl):
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          fc_impl):
+    linear_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix1(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with numpy_input_fn."""
     label_dimension = 2
     batch_size = 10
@@ -381,7 +424,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         shuffle=False)
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=train_input_fn,
         eval_input_fn=eval_input_fn,
         predict_input_fn=predict_input_fn,
@@ -390,7 +433,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
-  def test_pandas_input_fn(self, fc_impl):
+  def test_numpy_input_fn_basic(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_numpy_input_fn_mix1(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_numpy_input_fn_mix2(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -415,7 +467,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         shuffle=False)
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=train_input_fn,
         eval_input_fn=eval_input_fn,
         predict_input_fn=predict_input_fn,
@@ -424,7 +476,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
-  def test_input_fn_from_parse_example(self, fc_impl):
+  def test_pandas_input_fn_basic(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_pandas_input_fn_mix1(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_pandas_input_fn_mix2(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with input_fn constructed from parse_example."""
     label_dimension = 2
     batch_size = 10
@@ -466,7 +527,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
       features.pop('y')
       return features, None
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=_train_input_fn,
         eval_input_fn=_eval_input_fn,
         predict_input_fn=_predict_input_fn,
@@ -475,6 +536,18 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
+  def test_input_fn_from_parse_example_basic(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow)
+
+  def test_input_fn_from_parse_example_mix1(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix1)
+
+  def test_input_fn_from_parse_example_mix2(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix2)
+
 
 # A function to mimic dnn-classifier init reuse same tests.
 def _dnn_classifier_fn(hidden_units,
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index cd66d0a3bd..71d7e54783 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -34,6 +34,7 @@ from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import prediction_keys
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -479,6 +480,60 @@ class BaseDNNModelFnTest(object):
           else:
             self.fail('Invalid mode: {}'.format(mode))
 
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        training_util.create_global_step()
+        head = mock_head(
+            self,
+            hidden_units=hidden_units,
+            logits_dimension=logits_dimension,
+            expected_logits=expected_logits)
+        estimator_spec = self._dnn_model_fn(
+            features={
+                'age': constant_op.constant(inputs[0]),
+                'height': constant_op.constant(inputs[1])
+            },
+            labels=constant_op.constant([[1]]),
+            mode=mode,
+            head=head,
+            hidden_units=hidden_units,
+            feature_columns=[
+                feature_column.numeric_column('age'),
+                feature_column_v2.numeric_column('height')
+            ],
+            optimizer=mock_optimizer(self, hidden_units))
+        with monitored_session.MonitoredTrainingSession(
+            checkpoint_dir=self._model_dir) as sess:
+          if mode == model_fn.ModeKeys.TRAIN:
+            sess.run(estimator_spec.train_op)
+          elif mode == model_fn.ModeKeys.EVAL:
+            sess.run(estimator_spec.loss)
+          elif mode == model_fn.ModeKeys.PREDICT:
+            sess.run(estimator_spec.predictions)
+          else:
+            self.fail('Invalid mode: {}'.format(mode))
+
   def test_features_tensor_raises_value_error(self):
     """Tests that passing a Tensor for features raises a ValueError."""
     hidden_units = (2, 2)
@@ -806,6 +861,60 @@ class BaseDNNLogitFnTest(object):
               checkpoint_dir=self._model_dir) as sess:
             self.assertAllClose(expected_logits, sess.run(logits))
 
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        # Global step needed for MonitoredSession, which is in turn used to
+        # explicitly set variable weights through a checkpoint.
+        training_util.create_global_step()
+        # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
+        # the checkpoint naming is shared.
+        with variable_scope.variable_scope('dnn'):
+          input_layer_partitioner = (
+              partitioned_variables.min_max_variable_partitioner(
+                  max_partitions=0, min_slice_size=64 << 20))
+          logit_fn = self._dnn_logit_fn_builder(
+              units=logits_dimension,
+              hidden_units=hidden_units,
+              feature_columns=[
+                  feature_column.numeric_column('age'),
+                  feature_column_v2.numeric_column('height')
+              ],
+              activation_fn=nn.relu,
+              dropout=None,
+              input_layer_partitioner=input_layer_partitioner,
+              batch_norm=False)
+          logits = logit_fn(
+              features={
+                  'age': constant_op.constant(inputs[0]),
+                  'height': constant_op.constant(inputs[1])
+              },
+              mode=mode)
+          with monitored_session.MonitoredTrainingSession(
+              checkpoint_dir=self._model_dir) as sess:
+            self.assertAllClose(expected_logits, sess.run(logits))
+
 
 class BaseDNNWarmStartingTest(object):
 
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index 827352a70b..2cfa2a8e15 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -400,6 +400,45 @@ class BaseLinearRegressorEvaluationTest(object):
     # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
     self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
 
+  def test_evaluation_for_multiple_feature_columns_mix(self):
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
+      variables_lib.Variable([5.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    batch_size = 2
+    feature_columns = [
+        feature_column.numeric_column('age'),
+        feature_column_v2.numeric_column('height')
+    ]
+
+    def _input_fn():
+      features_ds = dataset_ops.Dataset.from_tensor_slices({
+          'age': np.array([20, 40]),
+          'height': np.array([4, 8])
+      })
+      labels_ds = dataset_ops.Dataset.from_tensor_slices(
+          np.array([[213.], [421.]]))
+      return (dataset_ops.Dataset.zip((features_ds, labels_ds))
+              .batch(batch_size).repeat(None))
+
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns, model_dir=self._model_dir)
+
+    eval_metrics = est.evaluate(input_fn=_input_fn, steps=1)
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] =
+    # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
 
 class BaseLinearRegressorPredictTest(object):
 
@@ -497,6 +536,31 @@ class BaseLinearRegressorPredictTest(object):
     # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
     self.assertAllClose([[80.2]], predicted_scores)
 
+  def testTwoFeatureColumnsMix(self):
+    """Tests predict with two feature columns."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
+      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(feature_column.numeric_column('x0'),
+                         feature_column_v2.numeric_column('x1')),
+        model_dir=self._model_dir)
+
+    def _predict_input_fn():
+      return dataset_ops.Dataset.from_tensor_slices({
+          'x0': np.array([[2.]]),
+          'x1': np.array([[3.]])
+      }).batch(1)
+
+    predictions = linear_regressor.predict(input_fn=_predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
+    self.assertAllClose([[80.2]], predicted_scores)
+
   def testSparseCombiner(self):
     w_a = 2.0
     w_b = 3.0
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index ac53a84eef..82acde584e 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -54,6 +54,7 @@ py_library(
     srcs = ["feature_column_v2.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":feature_column",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:control_flow_ops",
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 28a8286544..8a11ca142c 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -121,6 +121,10 @@ Example of building model using FeatureColumns, this can be used in a
 
 NOTE: Functions prefixed with "_" indicate experimental or private parts of
 the API subject to change, and should not be relied upon!
+
+NOTE: The new feature columns are being developed in feature_column_v2.py and
+are a somewhat duplicate of the code here. Please make sure to update logic
+in both places.
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index b79373c475..6d089de991 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -136,6 +136,7 @@ import six
 
 
 from tensorflow.python.eager import context
+from tensorflow.python.feature_column import feature_column as fc_old
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
@@ -157,9 +158,16 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 
 
+_FEATURE_COLUMN_DEPRECATION_DATE = '2018-11-30'
+_FEATURE_COLUMN_DEPRECATION = ('The old _FeatureColumn APIs are being '
+                               'deprecated. Please use the new FeatureColumn '
+                               'APIs instead.')
+
+
 class StateManager(object):
   """Manages the state associated with FeatureColumns.
 
@@ -440,10 +448,6 @@ class FeatureLayer(Layer):
     return (input_shape[0], total_elements)
 
 
-def _strip_leading_slashes(name):
-  return name.rsplit('/', 1)[-1]
-
-
 class LinearModel(Layer):
   """Produces a linear prediction `Tensor` based on given `feature_columns`.
 
@@ -775,12 +779,12 @@ def embedding_column(
     categorical_column, dimension, combiner='mean', initializer=None,
     ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None,
     trainable=True):
-  """`_DenseColumn` that converts from sparse, categorical input.
+  """`DenseColumn` that converts from sparse, categorical input.
 
   Use this when your inputs are sparse, but you want to convert them to a dense
   representation (e.g., to feed to a DNN).
 
-  Inputs must be a `_CategoricalColumn` created by any of the
+  Inputs must be a `CategoricalColumn` created by any of the
   `categorical_column_*` function. Here is an example of using
   `embedding_column` with `DNNClassifier`:
 
@@ -814,12 +818,12 @@ def embedding_column(
   ```
 
   Args:
-    categorical_column: A `_CategoricalColumn` created by a
+    categorical_column: A `CategoricalColumn` created by a
       `categorical_column_with_*` function. This column produces the sparse IDs
       that are inputs to the embedding lookup.
     dimension: An integer specifying dimension of the embedding, must be > 0.
-    combiner: A string specifying how to reduce if there are multiple entries
-      in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with
+    combiner: A string specifying how to reduce if there are multiple entries in
+      a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with
       'mean' the default. 'sqrtn' often achieves good accuracy, in particular
       with bag-of-words columns. Each of this can be thought as example level
       normalizations on the column. For more information, see
@@ -830,14 +834,14 @@ def embedding_column(
       `1/sqrt(dimension)`.
     ckpt_to_load_from: String representing checkpoint name/pattern from which to
       restore column weights. Required if `tensor_name_in_ckpt` is not `None`.
-    tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from
-      which to restore the column weights. Required if `ckpt_to_load_from` is
-      not `None`.
+    tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from which
+      to restore the column weights. Required if `ckpt_to_load_from` is not
+      `None`.
     max_norm: If not `None`, embedding values are l2-normalized to this value.
     trainable: Whether or not the embedding is trainable. Default is True.
 
   Returns:
-    `_DenseColumn` that converts from sparse input.
+    `DenseColumn` that converts from sparse input.
 
   Raises:
     ValueError: if `dimension` not > 0.
@@ -1181,7 +1185,7 @@ def bucketized_column(source_column, boundaries):
       one-dimensional.
     ValueError: If `boundaries` is not a sorted list or tuple.
   """
-  if not isinstance(source_column, NumericColumn):
+  if not isinstance(source_column, (NumericColumn, fc_old._NumericColumn)):  # pylint: disable=protected-access
     raise ValueError(
         'source_column must be a column generated with numeric_column(). '
         'Given: {}'.format(source_column))
@@ -1390,7 +1394,7 @@ def categorical_column_with_vocabulary_file(key,
 
 def categorical_column_with_vocabulary_list(
     key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
-  """A `_CategoricalColumn` with in-memory vocabulary.
+  """A `CategoricalColumn` with in-memory vocabulary.
 
   Use this when your inputs are in string or integer format, and you have an
   in-memory vocabulary mapping each value to an integer ID. By default,
@@ -1439,14 +1443,14 @@ def categorical_column_with_vocabulary_list(
   ```
 
   Args:
-    key: A unique string identifying the input feature. It is used as the
-      column name and the dictionary key for feature parsing configs, feature
-      `Tensor` objects, and feature columns.
+    key: A unique string identifying the input feature. It is used as the column
+      name and the dictionary key for feature parsing configs, feature `Tensor`
+      objects, and feature columns.
     vocabulary_list: An ordered iterable defining the vocabulary. Each feature
       is mapped to the index of its value (if present) in `vocabulary_list`.
       Must be castable to `dtype`.
-    dtype: The type of features. Only string and integer types are supported.
-      If `None`, it will be inferred from `vocabulary_list`.
+    dtype: The type of features. Only string and integer types are supported. If
+      `None`, it will be inferred from `vocabulary_list`.
     default_value: The integer ID value to return for out-of-vocabulary feature
       values, defaults to `-1`. This can not be specified with a positive
       `num_oov_buckets`.
@@ -1604,7 +1608,7 @@ def indicator_column(categorical_column):
 
 def weighted_categorical_column(
     categorical_column, weight_feature_key, dtype=dtypes.float32):
-  """Applies weight values to a `_CategoricalColumn`.
+  """Applies weight values to a `CategoricalColumn`.
 
   Use this when each of your sparse inputs has both an ID and a value. For
   example, if you're representing text documents as a collection of word
@@ -1655,7 +1659,7 @@ def weighted_categorical_column(
   the same indices and dense shape.
 
   Args:
-    categorical_column: A `_CategoricalColumn` created by
+    categorical_column: A `CategoricalColumn` created by
       `categorical_column_with_*` functions.
     weight_feature_key: String key for weight values.
     dtype: Type of weights, such as `tf.float32`. Only float and integer weights
@@ -1788,12 +1792,13 @@ def crossed_column(keys, hash_bucket_size, hash_key=None):
         'keys must be a list with length > 1. Given: {}'.format(keys))
   for key in keys:
     if (not isinstance(key, six.string_types) and
-        not isinstance(key, CategoricalColumn)):
+        not isinstance(key, (CategoricalColumn, fc_old._CategoricalColumn))):  # pylint: disable=protected-access
       raise ValueError(
           'Unsupported key type. All keys must be either string, or '
           'categorical column except HashedCategoricalColumn. '
           'Given: {}'.format(key))
-    if isinstance(key, HashedCategoricalColumn):
+    if isinstance(key,
+                  (HashedCategoricalColumn, fc_old._HashedCategoricalColumn)):  # pylint: disable=protected-access
       raise ValueError(
           'categorical_column_with_hash_bucket is not supported for crossing. '
           'Hashing before crossing will increase probability of collision. '
@@ -1882,6 +1887,16 @@ class FeatureColumn(object):
     """
     pass
 
+  @abc.abstractproperty
+  def _is_v2_column(self):
+    """Returns whether this FeatureColumn is fully conformant to the new API.
+
+    This is needed for composition type cases where an EmbeddingColumn etc.
+    might take in old categorical columns as input and then we want to use the
+    old API.
+    """
+    pass
+
 
 class DenseColumn(FeatureColumn):
   """Represents a column which can be represented as `Tensor`.
@@ -1927,6 +1942,8 @@ def is_feature_column_v2(feature_columns):
   for feature_column in feature_columns:
     if not isinstance(feature_column, FeatureColumn):
       return False
+    if not feature_column._is_v2_column:  # pylint: disable=protected-access
+      return False
   return True
 
 
@@ -2201,19 +2218,6 @@ class FeatureTransformationCache(object):
           lambda: feature_tensor)
 
 
-# TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py
-def _shape_offsets(shape):
-  """Returns moving offset for each dimension given shape."""
-  offsets = []
-  for dim in reversed(shape):
-    if offsets:
-      offsets.append(dim * offsets[-1])
-    else:
-      offsets.append(dim)
-  offsets.reverse()
-  return offsets
-
-
 # TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py
 def _to_sparse_input_and_drop_ignore_values(input_tensor, ignore_value=None):
   """Converts a `Tensor` to a `SparseTensor`, dropping ignore_value cells.
@@ -2306,11 +2310,16 @@ def _normalize_feature_columns(feature_columns):
 
 class NumericColumn(
     DenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'NumericColumn',
         ('key', 'shape', 'default_value', 'dtype', 'normalizer_fn'))):
   """see `numeric_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2325,6 +2334,27 @@ class NumericColumn(
                                         self.default_value)
     }
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
+
+  def _transform_input_tensor(self, input_tensor):
+    if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
+      raise ValueError(
+          'The corresponding Tensor of numerical column must be a Tensor. '
+          'SparseTensor is not supported. key: {}'.format(self.key))
+    if self.normalizer_fn is not None:
+      input_tensor = self.normalizer_fn(input_tensor)
+    return math_ops.to_float(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = inputs.get(self.key)
+    return self._transform_input_tensor(input_tensor)
+
   def transform_feature(self, transformation_cache, state_manager):
     """See `FeatureColumn` base class.
 
@@ -2342,19 +2372,19 @@ class NumericColumn(
       ValueError: If a SparseTensor is passed in.
     """
     input_tensor = transformation_cache.get(self.key, state_manager)
-    if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
-      raise ValueError(
-          'The corresponding Tensor of numerical column must be a Tensor. '
-          'SparseTensor is not supported. key: {}'.format(self.key))
-    if self.normalizer_fn is not None:
-      input_tensor = self.normalizer_fn(input_tensor)
-    return math_ops.to_float(input_tensor)
+    return self._transform_input_tensor(input_tensor)
 
   @property
   def variable_shape(self):
     """See `DenseColumn` base class."""
     return tensor_shape.TensorShape(self.shape)
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return self.variable_shape
+
   def get_dense_tensor(self, transformation_cache, state_manager):
     """Returns dense `Tensor` representing numeric feature.
 
@@ -2371,12 +2401,28 @@ class NumericColumn(
     # representation created by _transform_feature.
     return transformation_cache.get(self, state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    del weight_collections
+    del trainable
+    return inputs.get(self)
+
 
-class BucketizedColumn(DenseColumn, CategoricalColumn,
-                       collections.namedtuple('BucketizedColumn',
-                                              ('source_column', 'boundaries'))):
+class BucketizedColumn(
+    DenseColumn,
+    CategoricalColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
+    collections.namedtuple('BucketizedColumn',
+                           ('source_column', 'boundaries'))):
   """See `bucketized_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.source_column, FeatureColumn) and
+            self.source_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2387,6 +2433,21 @@ class BucketizedColumn(DenseColumn, CategoricalColumn,
     """See `FeatureColumn` base class."""
     return self.source_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.source_column._parse_example_spec  # pylint: disable=protected-access
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    """Returns bucketized categorical `source_column` tensor."""
+    source_tensor = inputs.get(self.source_column)
+    return math_ops._bucketize(  # pylint: disable=protected-access
+        source_tensor,
+        boundaries=self.boundaries)
+
   def transform_feature(self, transformation_cache, state_manager):
     """Returns bucketized categorical `source_column` tensor."""
     source_tensor = transformation_cache.get(self.source_column, state_manager)
@@ -2400,24 +2461,45 @@ class BucketizedColumn(DenseColumn, CategoricalColumn,
     return tensor_shape.TensorShape(
         tuple(self.source_column.shape) + (len(self.boundaries) + 1,))
 
-  def get_dense_tensor(self, transformation_cache, state_manager):
-    """Returns one hot encoded dense `Tensor`."""
-    input_tensor = transformation_cache.get(self, state_manager)
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return self.variable_shape
+
+  def _get_dense_tensor_for_input_tensor(self, input_tensor):
     return array_ops.one_hot(
         indices=math_ops.to_int64(input_tensor),
         depth=len(self.boundaries) + 1,
         on_value=1.,
         off_value=0.)
 
+  def get_dense_tensor(self, transformation_cache, state_manager):
+    """Returns one hot encoded dense `Tensor`."""
+    input_tensor = transformation_cache.get(self, state_manager)
+    return self._get_dense_tensor_for_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    del weight_collections
+    del trainable
+    input_tensor = inputs.get(self)
+    return self._get_dense_tensor_for_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """See `CategoricalColumn` base class."""
     # By construction, source_column is always one-dimensional.
     return (len(self.boundaries) + 1) * self.source_column.shape[0]
 
-  def get_sparse_tensors(self, transformation_cache, state_manager):
-    """Converts dense inputs to SparseTensor so downstream code can use it."""
-    input_tensor = transformation_cache.get(self, state_manager)
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
+  def _get_sparse_tensors_for_input_tensor(self, input_tensor):
     batch_size = array_ops.shape(input_tensor)[0]
     # By construction, source_column is always one-dimensional.
     source_dimension = self.source_column.shape[0]
@@ -2443,15 +2525,38 @@ class BucketizedColumn(DenseColumn, CategoricalColumn,
         dense_shape=dense_shape)
     return CategoricalColumn.IdWeightPair(sparse_tensor, None)
 
+  def get_sparse_tensors(self, transformation_cache, state_manager):
+    """Converts dense inputs to SparseTensor so downstream code can use it."""
+    input_tensor = transformation_cache.get(self, state_manager)
+    return self._get_sparse_tensors_for_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    """Converts dense inputs to SparseTensor so downstream code can use it."""
+    del weight_collections
+    del trainable
+    input_tensor = inputs.get(self)
+    return self._get_sparse_tensors_for_input_tensor(input_tensor)
+
 
 class EmbeddingColumn(
-    DenseColumn, SequenceDenseColumn,
+    DenseColumn,
+    SequenceDenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._SequenceDenseColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'EmbeddingColumn',
         ('categorical_column', 'dimension', 'combiner', 'initializer',
          'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
   """See `embedding_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2462,18 +2567,35 @@ class EmbeddingColumn(
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
   def transform_feature(self, transformation_cache, state_manager):
     """Transforms underlying `categorical_column`."""
     return transformation_cache.get(self.categorical_column, state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    return inputs.get(self.categorical_column)
+
   @property
   def variable_shape(self):
     """See `DenseColumn` base class."""
     return tensor_shape.vector(self.dimension)
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return self.variable_shape
+
   def create_state(self, state_manager):
     """Creates the embedding lookup variable."""
-    embedding_shape = (self.categorical_column.num_buckets, self.dimension)
+    embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
     state_manager.create_variable(
         self,
         name='embedding_weights',
@@ -2482,17 +2604,11 @@ class EmbeddingColumn(
         trainable=self.trainable,
         initializer=self.initializer)
 
-  def _get_dense_tensor_internal(self, transformation_cache, state_manager):
-    """Private method that follows the signature of _get_dense_tensor."""
-    # Get sparse IDs and weights.
-    sparse_tensors = self.categorical_column.get_sparse_tensors(
-        transformation_cache, state_manager)
+  def _get_dense_tensor_internal_helper(self, sparse_tensors,
+                                        embedding_weights):
     sparse_ids = sparse_tensors.id_tensor
     sparse_weights = sparse_tensors.weight_tensor
 
-    embedding_weights = state_manager.get_variable(
-        self, name='embedding_weights')
-
     if self.ckpt_to_load_from is not None:
       to_restore = embedding_weights
       if isinstance(to_restore, variables.PartitionedVariable):
@@ -2510,6 +2626,30 @@ class EmbeddingColumn(
         name='%s_weights' % self.name,
         max_norm=self.max_norm)
 
+  def _get_dense_tensor_internal(self, sparse_tensors, state_manager):
+    """Private method that follows the signature of get_dense_tensor."""
+    embedding_weights = state_manager.get_variable(
+        self, name='embedding_weights')
+    return self._get_dense_tensor_internal_helper(sparse_tensors,
+                                                  embedding_weights)
+
+  def _old_get_dense_tensor_internal(self, sparse_tensors, weight_collections,
+                                     trainable):
+    """Private method that follows the signature of _get_dense_tensor."""
+    embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
+    if (weight_collections and
+        ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections):
+      weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
+    embedding_weights = variable_scope.get_variable(
+        name='embedding_weights',
+        shape=embedding_shape,
+        dtype=dtypes.float32,
+        initializer=self.initializer,
+        trainable=self.trainable and trainable,
+        collections=weight_collections)
+    return self._get_dense_tensor_internal_helper(sparse_tensors,
+                                                  embedding_weights)
+
   def get_dense_tensor(self, transformation_cache, state_manager):
     """Returns tensor after doing the embedding lookup.
 
@@ -2535,7 +2675,30 @@ class EmbeddingColumn(
           'sequence_input_layer instead of input_layer. '
           'Given (type {}): {}'.format(self.name, type(self.categorical_column),
                                        self.categorical_column))
-    return self._get_dense_tensor_internal(transformation_cache, state_manager)
+    # Get sparse IDs and weights.
+    sparse_tensors = self.categorical_column.get_sparse_tensors(
+        transformation_cache, state_manager)
+    return self._get_dense_tensor_internal(sparse_tensors, state_manager)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    if isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In embedding_column: {}. '
+          'categorical_column must not be of type _SequenceCategoricalColumn. '
+          'Suggested fix A: If you wish to use input_layer, use a '
+          'non-sequence categorical_column_with_*. '
+          'Suggested fix B: If you wish to create sequence input, use '
+          'sequence_input_layer instead of input_layer. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    sparse_tensors = self.categorical_column._get_sparse_tensors(  # pylint: disable=protected-access
+        inputs, weight_collections, trainable)
+    return self._old_get_dense_tensor_internal(sparse_tensors,
+                                               weight_collections, trainable)
 
   def get_sequence_dense_tensor(self, transformation_cache, state_manager):
     """See `SequenceDenseColumn` base class."""
@@ -2547,21 +2710,40 @@ class EmbeddingColumn(
           'Suggested fix: Use one of sequence_categorical_column_with_*. '
           'Given (type {}): {}'.format(self.name, type(self.categorical_column),
                                        self.categorical_column))
-    dense_tensor = self._get_dense_tensor_internal(  # pylint: disable=protected-access
+    sparse_tensors = self.categorical_column.get_sequence_sparse_tensors(
         transformation_cache, state_manager)
-    sparse_tensors = self.categorical_column.get_sparse_tensors(
-        transformation_cache, state_manager)
-    sequence_length = _sequence_length_from_sparse_tensor(
+    dense_tensor = self._get_dense_tensor_internal(sparse_tensors,
+                                                   state_manager)
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
 
-
-def _get_graph_for_variable(var):
-  if isinstance(var, variables.PartitionedVariable):
-    return list(var)[0].graph
-  else:
-    return var.graph
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sequence_dense_tensor(self,
+                                 inputs,
+                                 weight_collections=None,
+                                 trainable=None):
+    if not isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In embedding_column: {}. '
+          'categorical_column must be of type _SequenceCategoricalColumn '
+          'to use sequence_input_layer. '
+          'Suggested fix: Use one of sequence_categorical_column_with_*. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    dense_tensor = self._old_get_dense_tensor_internal(
+        sparse_tensors,
+        weight_collections=weight_collections,
+        trainable=trainable)
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+        sparse_tensors.id_tensor)
+    return SequenceDenseColumn.TensorSequenceLengthPair(
+        dense_tensor=dense_tensor, sequence_length=sequence_length)
 
 
 class SharedEmbeddingStateManager(Layer):
@@ -2633,8 +2815,17 @@ def maybe_create_shared_state_manager(feature_columns):
   return None
 
 
+def _raise_shared_embedding_column_error():
+  raise ValueError('SharedEmbeddingColumns are not supported in '
+                   '`linear_model` or `input_layer`. Please use '
+                   '`FeatureLayer` or `LinearModel` instead.')
+
+
 class SharedEmbeddingColumn(
-    DenseColumn, SequenceDenseColumn,
+    DenseColumn,
+    SequenceDenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._SequenceDenseColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'SharedEmbeddingColumn',
         ('categorical_column', 'dimension', 'combiner', 'initializer',
@@ -2642,6 +2833,10 @@ class SharedEmbeddingColumn(
          'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
   """See `embedding_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2662,15 +2857,26 @@ class SharedEmbeddingColumn(
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  def _parse_example_spec(self):
+    return _raise_shared_embedding_column_error()
+
   def transform_feature(self, transformation_cache, state_manager):
     """See `FeatureColumn` base class."""
     return transformation_cache.get(self.categorical_column, state_manager)
 
+  def _transform_feature(self, inputs):
+    return _raise_shared_embedding_column_error()
+
   @property
   def variable_shape(self):
     """See `DenseColumn` base class."""
     return tensor_shape.vector(self.dimension)
 
+  @property
+  def _variable_shape(self):
+    return _raise_shared_embedding_column_error()
+
   def create_state(self, state_manager):
     """Creates the shared embedding lookup variable."""
     if not isinstance(state_manager, SharedEmbeddingStateManager):
@@ -2731,6 +2937,9 @@ class SharedEmbeddingColumn(
                                        self.categorical_column))
     return self._get_dense_tensor_internal(transformation_cache, state_manager)
 
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    return _raise_shared_embedding_column_error()
+
   def get_sequence_dense_tensor(self, transformation_cache, state_manager):
     """See `SequenceDenseColumn` base class."""
     if not isinstance(self.categorical_column, SequenceCategoricalColumn):
@@ -2745,11 +2954,17 @@ class SharedEmbeddingColumn(
                                                   state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    sequence_length = _sequence_length_from_sparse_tensor(
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
 
+  def _get_sequence_dense_tensor(self,
+                                 inputs,
+                                 weight_collections=None,
+                                 trainable=None):
+    return _raise_shared_embedding_column_error()
+
 
 def _create_tuple(shape, value):
   """Returns a tuple with given shape and filled with value."""
@@ -2858,10 +3073,15 @@ def _check_default_value(shape, default_value, dtype, key):
 
 class HashedCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('HashedCategoricalColumn',
                            ('key', 'hash_bucket_size', 'dtype'))):
   """see `categorical_column_with_hash_bucket`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2872,10 +3092,14 @@ class HashedCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(self.dtype)}
 
-  def transform_feature(self, transformation_cache, state_manager):
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
+
+  def _transform_input_tensor(self, input_tensor):
     """Hashes the values in the feature_column."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
     if not isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
       raise ValueError('SparseColumn input must be a SparseTensor.')
 
@@ -2899,24 +3123,55 @@ class HashedCategoricalColumn(
     return sparse_tensor_lib.SparseTensor(
         input_tensor.indices, sparse_id_values, input_tensor.dense_shape)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Hashes the values in the feature_column."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.hash_bucket_size
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class VocabularyFileCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('VocabularyFileCategoricalColumn',
                            ('key', 'vocabulary_file', 'vocabulary_size',
                             'num_oov_buckets', 'dtype', 'default_value'))):
   """See `categorical_column_with_vocabulary_file`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2927,11 +3182,14 @@ class VocabularyFileCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(self.dtype)}
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Creates a lookup table for the vocabulary."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
 
+  def _transform_input_tensor(self, input_tensor):
+    """Creates a lookup table for the vocabulary."""
     if self.dtype.is_integer != input_tensor.dtype.is_integer:
       raise ValueError(
           'Column dtype and SparseTensors dtype must be compatible. '
@@ -2957,25 +3215,56 @@ class VocabularyFileCategoricalColumn(
         key_dtype=key_dtype,
         name='{}_lookup'.format(self.key)).lookup(input_tensor)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Creates a lookup table for the vocabulary."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.vocabulary_size + self.num_oov_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class VocabularyListCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'VocabularyListCategoricalColumn',
         ('key', 'vocabulary_list', 'dtype', 'default_value', 'num_oov_buckets'))
 ):
   """See `categorical_column_with_vocabulary_list`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2986,11 +3275,14 @@ class VocabularyListCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(self.dtype)}
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Creates a lookup table for the vocabulary list."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
 
+  def _transform_input_tensor(self, input_tensor):
+    """Creates a lookup table for the vocabulary list."""
     if self.dtype.is_integer != input_tensor.dtype.is_integer:
       raise ValueError(
           'Column dtype and SparseTensors dtype must be compatible. '
@@ -3015,24 +3307,55 @@ class VocabularyListCategoricalColumn(
         dtype=key_dtype,
         name='{}_lookup'.format(self.key)).lookup(input_tensor)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Creates a lookup table for the vocabulary list."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return len(self.vocabulary_list) + self.num_oov_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class IdentityCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('IdentityCategoricalColumn',
                            ('key', 'number_buckets', 'default_value'))):
 
   """See `categorical_column_with_identity`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -3043,11 +3366,14 @@ class IdentityCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(dtypes.int64)}
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Returns a SparseTensor with identity values."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
 
+  def _transform_input_tensor(self, input_tensor):
+    """Returns a SparseTensor with identity values."""
     if not input_tensor.dtype.is_integer:
       raise ValueError(
           'Invalid input, not integer. key: {} dtype: {}'.format(
@@ -3082,24 +3408,56 @@ class IdentityCategoricalColumn(
         values=values,
         dense_shape=input_tensor.dense_shape)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Returns a SparseTensor with identity values."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.number_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class WeightedCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'WeightedCategoricalColumn',
         ('categorical_column', 'weight_feature_key', 'dtype'))):
   """See `weighted_categorical_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -3116,15 +3474,29 @@ class WeightedCategoricalColumn(
     config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype)
     return config
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    config = self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+    if self.weight_feature_key in config:
+      raise ValueError('Parse config {} already exists for {}.'.format(
+          config[self.weight_feature_key], self.weight_feature_key))
+    config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype)
+    return config
+
   @property
   def num_buckets(self):
     """See `DenseColumn` base class."""
     return self.categorical_column.num_buckets
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Applies weights to tensor generated from `categorical_column`'."""
-    weight_tensor = transformation_cache.get(self.weight_feature_key,
-                                             state_manager)
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.categorical_column._num_buckets  # pylint: disable=protected-access
+
+  def _transform_weight_tensor(self, weight_tensor):
     if weight_tensor is None:
       raise ValueError('Missing weights {}.'.format(self.weight_feature_key))
     weight_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(
@@ -3138,27 +3510,63 @@ class WeightedCategoricalColumn(
           weight_tensor, ignore_value=0.0)
     if not weight_tensor.dtype.is_floating:
       weight_tensor = math_ops.to_float(weight_tensor)
+    return weight_tensor
+
+  def transform_feature(self, transformation_cache, state_manager):
+    """Applies weights to tensor generated from `categorical_column`'."""
+    weight_tensor = transformation_cache.get(self.weight_feature_key,
+                                             state_manager)
+    weight_tensor = self._transform_weight_tensor(weight_tensor)
     return (transformation_cache.get(self.categorical_column, state_manager),
             weight_tensor)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    """Applies weights to tensor generated from `categorical_column`'."""
+    weight_tensor = inputs.get(self.weight_feature_key)
+    weight_tensor = self._transform_weight_tensor(weight_tensor)
+    return (inputs.get(self.categorical_column), weight_tensor)
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     tensors = transformation_cache.get(self, state_manager)
     return CategoricalColumn.IdWeightPair(tensors[0], tensors[1])
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    tensors = inputs.get(self)
+    return CategoricalColumn.IdWeightPair(tensors[0], tensors[1])
+
 
 class CrossedColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('CrossedColumn',
                            ('keys', 'hash_bucket_size', 'hash_key'))):
   """See `crossed_column`."""
 
+  @property
+  def _is_v2_column(self):
+    for key in _collect_leaf_level_keys(self):
+      if isinstance(key, six.string_types):
+        continue
+      if not isinstance(key, FeatureColumn):
+        return False
+      if not key._is_v2_column:  # pylint: disable=protected-access
+        return False
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
     feature_names = []
     for key in _collect_leaf_level_keys(self):
-      if isinstance(key, FeatureColumn):
+      if isinstance(key, (FeatureColumn, fc_old._FeatureColumn)):  # pylint: disable=protected-access
         feature_names.append(key.name)
       else:  # key must be a string
         feature_names.append(key)
@@ -3171,17 +3579,25 @@ class CrossedColumn(
     for key in self.keys:
       if isinstance(key, FeatureColumn):
         config.update(key.parse_example_spec)
+      elif isinstance(key, fc_old._FeatureColumn):  # pylint: disable=protected-access
+        config.update(key._parse_example_spec)  # pylint: disable=protected-access
       else:  # key must be a string
         config.update({key: parsing_ops.VarLenFeature(dtypes.string)})
     return config
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
+
   def transform_feature(self, transformation_cache, state_manager):
     """Generates a hashed sparse cross from the input tensors."""
     feature_tensors = []
     for key in _collect_leaf_level_keys(self):
       if isinstance(key, six.string_types):
         feature_tensors.append(transformation_cache.get(key, state_manager))
-      elif isinstance(key, CategoricalColumn):
+      elif isinstance(key, (fc_old._CategoricalColumn, CategoricalColumn)):  # pylint: disable=protected-access
         ids_and_weights = key.get_sparse_tensors(transformation_cache,
                                                  state_manager)
         if ids_and_weights.weight_tensor is not None:
@@ -3197,16 +3613,54 @@ class CrossedColumn(
         num_buckets=self.hash_bucket_size,
         hash_key=self.hash_key)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    """Generates a hashed sparse cross from the input tensors."""
+    feature_tensors = []
+    for key in _collect_leaf_level_keys(self):
+      if isinstance(key, six.string_types):
+        feature_tensors.append(inputs.get(key))
+      elif isinstance(key, (CategoricalColumn, fc_old._CategoricalColumn)):  # pylint: disable=protected-access
+        ids_and_weights = key._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+        if ids_and_weights.weight_tensor is not None:
+          raise ValueError(
+              'crossed_column does not support weight_tensor, but the given '
+              'column populates weight_tensor. '
+              'Given column: {}'.format(key.name))
+        feature_tensors.append(ids_and_weights.id_tensor)
+      else:
+        raise ValueError('Unsupported column type. Given: {}'.format(key))
+    return sparse_ops.sparse_cross_hashed(
+        inputs=feature_tensors,
+        num_buckets=self.hash_bucket_size,
+        hash_key=self.hash_key)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.hash_bucket_size
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    """See `CategoricalColumn` base class."""
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 def _collect_leaf_level_keys(cross):
   """Collects base keys by expanding all nested crosses.
@@ -3382,9 +3836,12 @@ def _prune_invalid_weights(sparse_ids, sparse_weights):
   return sparse_ids, sparse_weights
 
 
-class IndicatorColumn(DenseColumn, SequenceDenseColumn,
-                      collections.namedtuple('IndicatorColumn',
-                                             ('categorical_column'))):
+class IndicatorColumn(
+    DenseColumn,
+    SequenceDenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._SequenceDenseColumn,  # pylint: disable=protected-access
+    collections.namedtuple('IndicatorColumn', ('categorical_column'))):
   """Represents a one-hot column for use in deep networks.
 
   Args:
@@ -3392,28 +3849,17 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
       `categorical_column_with_*` function.
   """
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
     return '{}_indicator'.format(self.categorical_column.name)
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Returns dense `Tensor` representing feature.
-
-    Args:
-      transformation_cache: A `FeatureTransformationCache` object to access
-        features.
-      state_manager: A `StateManager` to create / access resources such as
-        lookup tables.
-
-    Returns:
-      Transformed feature `Tensor`.
-
-    Raises:
-      ValueError: if input rank is not known at graph building time.
-    """
-    id_weight_pair = self.categorical_column.get_sparse_tensors(
-        transformation_cache, state_manager)
+  def _transform_id_weight_pair(self, id_weight_pair):
     id_tensor = id_weight_pair.id_tensor
     weight_tensor = id_weight_pair.weight_tensor
 
@@ -3422,7 +3868,7 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
       weighted_column = sparse_ops.sparse_merge(
           sp_ids=id_tensor,
           sp_values=weight_tensor,
-          vocab_size=int(self.variable_shape[-1]))
+          vocab_size=int(self._variable_shape[-1]))
       # Remove (?, -1) index
       weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0],
                                                 weighted_column.dense_shape)
@@ -3435,22 +3881,62 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
     # input_layer are float32.
     one_hot_id_tensor = array_ops.one_hot(
         dense_id_tensor,
-        depth=self.variable_shape[-1],
+        depth=self._variable_shape[-1],
         on_value=1.0,
         off_value=0.0)
 
     # Reduce to get a multi-hot per example.
     return math_ops.reduce_sum(one_hot_id_tensor, axis=[-2])
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Returns dense `Tensor` representing feature.
+
+    Args:
+      transformation_cache: A `FeatureTransformationCache` object to access
+        features.
+      state_manager: A `StateManager` to create / access resources such as
+        lookup tables.
+
+    Returns:
+      Transformed feature `Tensor`.
+
+    Raises:
+      ValueError: if input rank is not known at graph building time.
+    """
+    id_weight_pair = self.categorical_column.get_sparse_tensors(
+        transformation_cache, state_manager)
+    return self._transform_id_weight_pair(id_weight_pair)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    id_weight_pair = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    return self._transform_id_weight_pair(id_weight_pair)
+
   @property
   def parse_example_spec(self):
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
   @property
   def variable_shape(self):
     """Returns a `TensorShape` representing the shape of the dense `Tensor`."""
-    return tensor_shape.TensorShape([1, self.categorical_column.num_buckets])
+    if isinstance(self.categorical_column, FeatureColumn):
+      return tensor_shape.TensorShape([1, self.categorical_column.num_buckets])
+    else:
+      return tensor_shape.TensorShape([1, self.categorical_column._num_buckets])  # pylint: disable=protected-access
+
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return tensor_shape.TensorShape([1, self.categorical_column._num_buckets])  # pylint: disable=protected-access
 
   def get_dense_tensor(self, transformation_cache, state_manager):
     """Returns dense `Tensor` representing feature.
@@ -3481,6 +3967,27 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
     # representation created by transform_feature.
     return transformation_cache.get(self, state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    del weight_collections
+    del trainable
+    if isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In indicator_column: {}. '
+          'categorical_column must not be of type _SequenceCategoricalColumn. '
+          'Suggested fix A: If you wish to use input_layer, use a '
+          'non-sequence categorical_column_with_*. '
+          'Suggested fix B: If you wish to create sequence input, use '
+          'sequence_input_layer instead of input_layer. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    # Feature has been already transformed. Return the intermediate
+    # representation created by transform_feature.
+    return inputs.get(self)
+
   def get_sequence_dense_tensor(self, transformation_cache, state_manager):
     """See `SequenceDenseColumn` base class."""
     if not isinstance(self.categorical_column, SequenceCategoricalColumn):
@@ -3496,7 +4003,36 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
     dense_tensor = transformation_cache.get(self, state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    sequence_length = _sequence_length_from_sparse_tensor(
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+        sparse_tensors.id_tensor)
+    return SequenceDenseColumn.TensorSequenceLengthPair(
+        dense_tensor=dense_tensor, sequence_length=sequence_length)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sequence_dense_tensor(self,
+                                 inputs,
+                                 weight_collections=None,
+                                 trainable=None):
+    # Do nothing with weight_collections and trainable since no variables are
+    # created in this function.
+    del weight_collections
+    del trainable
+    if not isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In indicator_column: {}. '
+          'categorical_column must be of type _SequenceCategoricalColumn '
+          'to use sequence_input_layer. '
+          'Suggested fix: Use one of sequence_categorical_column_with_*. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    # Feature has been already transformed. Return the intermediate
+    # representation created by _transform_feature.
+    dense_tensor = inputs.get(self)
+    sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
@@ -3518,27 +4054,18 @@ def _verify_static_batch_size_equality(tensors, columns):
                 expected_batch_size, tensors[i].shape[0]))
 
 
-def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1):
-  """Returns a [batch_size] Tensor with per-example sequence length."""
-  with ops.name_scope(None, 'sequence_length') as name_scope:
-    row_ids = sp_tensor.indices[:, 0]
-    column_ids = sp_tensor.indices[:, 1]
-    column_ids += array_ops.ones_like(column_ids)
-    seq_length = math_ops.to_int64(
-        math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements)
-    # If the last n rows do not have ids, seq_length will have shape
-    # [batch_size - n]. Pad the remaining values with zeros.
-    n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1]
-    padding = array_ops.zeros(n_pad, dtype=seq_length.dtype)
-    return array_ops.concat([seq_length, padding], axis=0, name=name_scope)
-
-
-class SequenceCategoricalColumn(FeatureColumn,
-                                collections.namedtuple(
-                                    'SequenceCategoricalColumn',
-                                    ('categorical_column'))):
+class SequenceCategoricalColumn(
+    FeatureColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
+    collections.namedtuple('SequenceCategoricalColumn',
+                           ('categorical_column'))):
   """Represents sequences of categorical data."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -3549,16 +4076,46 @@ class SequenceCategoricalColumn(FeatureColumn,
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
   def transform_feature(self, transformation_cache, state_manager):
     """See `FeatureColumn` base class."""
     return self.categorical_column.transform_feature(transformation_cache,
                                                      state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    return self.categorical_column._transform_feature(inputs)  # pylint: disable=protected-access
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.categorical_column.num_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.categorical_column._num_buckets  # pylint: disable=protected-access
+
+  def _get_sparse_tensors_helper(self, sparse_tensors):
+    id_tensor = sparse_tensors.id_tensor
+    weight_tensor = sparse_tensors.weight_tensor
+    # Expands third dimension, if necessary so that embeddings are not
+    # combined during embedding lookup. If the tensor is already 3D, leave
+    # as-is.
+    shape = array_ops.shape(id_tensor)
+    target_shape = [shape[0], shape[1], -1]
+    id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape)
+    if weight_tensor is not None:
+      weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape)
+    return CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
+
   def get_sequence_sparse_tensors(self, transformation_cache, state_manager):
     """Returns an IdWeightPair.
 
@@ -3580,27 +4137,11 @@ class SequenceCategoricalColumn(FeatureColumn,
     """
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    id_tensor = sparse_tensors.id_tensor
-    weight_tensor = sparse_tensors.weight_tensor
-    # Expands final dimension, so that embeddings are not combined during
-    # embedding lookup.
-    check_id_rank = check_ops.assert_equal(
-        array_ops.rank(id_tensor), 2,
-        data=[
-            'Column {} expected ID tensor of rank 2. '.format(self.name),
-            'id_tensor shape: ', array_ops.shape(id_tensor)])
-    with ops.control_dependencies([check_id_rank]):
-      id_tensor = sparse_ops.sparse_reshape(
-          id_tensor,
-          shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0))
-    if weight_tensor is not None:
-      check_weight_rank = check_ops.assert_equal(
-          array_ops.rank(weight_tensor), 2,
-          data=[
-              'Column {} expected weight tensor of rank 2.'.format(self.name),
-              'weight_tensor shape:', array_ops.shape(weight_tensor)])
-      with ops.control_dependencies([check_weight_rank]):
-        weight_tensor = sparse_ops.sparse_reshape(
-            weight_tensor,
-            shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0))
-    return CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
+    return self._get_sparse_tensors_helper(sparse_tensors)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    return self._get_sparse_tensors_helper(sparse_tensors)
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index d3787146ed..31bc0485ef 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -31,12 +31,8 @@ from tensorflow.python.client import session
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column as fc_old
 from tensorflow.python.feature_column import feature_column_v2 as fc
-from tensorflow.python.feature_column.feature_column_v2 import _transform_features
-from tensorflow.python.feature_column.feature_column_v2 import FeatureColumn
-from tensorflow.python.feature_column.feature_column_v2 import FeatureLayer
-from tensorflow.python.feature_column.feature_column_v2 import FeatureTransformationCache
-from tensorflow.python.feature_column.feature_column_v2 import StateManager
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -46,6 +42,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
@@ -60,15 +57,29 @@ def _initialized_session(config=None):
   return sess
 
 
+def get_linear_model_bias(name='linear_model'):
+  with variable_scope.variable_scope(name, reuse=True):
+    return variable_scope.get_variable('bias_weights')
+
+
+def get_linear_model_column_var(column, name='linear_model'):
+  return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                            name + '/' + column.name)[0]
+
+
 class LazyColumnTest(test.TestCase):
 
   def test_transformations_called_once(self):
 
-    class TransformCounter(FeatureColumn):
+    class TransformCounter(fc.FeatureColumn):
 
       def __init__(self):
         self.num_transform = 0
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'TransformCounter'
@@ -81,7 +92,7 @@ class LazyColumnTest(test.TestCase):
       def parse_example_spec(self):
         pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     column = TransformCounter()
     self.assertEqual(0, column.num_transform)
@@ -92,7 +103,11 @@ class LazyColumnTest(test.TestCase):
 
   def test_returns_transform_output(self):
 
-    class Transformer(FeatureColumn):
+    class Transformer(fc.FeatureColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
 
       @property
       def name(self):
@@ -105,7 +120,7 @@ class LazyColumnTest(test.TestCase):
       def parse_example_spec(self):
         pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     column = Transformer()
     self.assertEqual('Output', transformation_cache.get(column, None))
@@ -113,7 +128,11 @@ class LazyColumnTest(test.TestCase):
 
   def test_does_not_pollute_given_features_dict(self):
 
-    class Transformer(FeatureColumn):
+    class Transformer(fc.FeatureColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
 
       @property
       def name(self):
@@ -127,12 +146,12 @@ class LazyColumnTest(test.TestCase):
         pass
 
     features = {'a': [[2], [3.]]}
-    transformation_cache = FeatureTransformationCache(features=features)
+    transformation_cache = fc.FeatureTransformationCache(features=features)
     transformation_cache.get(Transformer(), None)
     self.assertEqual(['a'], list(features.keys()))
 
   def test_error_if_feature_is_not_found(self):
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     with self.assertRaisesRegexp(ValueError,
                                  'bbb is not in features dictionary'):
@@ -143,7 +162,11 @@ class LazyColumnTest(test.TestCase):
 
   def test_not_supported_feature_column(self):
 
-    class NotAProperColumn(FeatureColumn):
+    class NotAProperColumn(fc.FeatureColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
 
       @property
       def name(self):
@@ -157,7 +180,7 @@ class LazyColumnTest(test.TestCase):
       def parse_example_spec(self):
         pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     with self.assertRaisesRegexp(ValueError,
                                  'NotAProperColumn is not supported'):
@@ -168,7 +191,7 @@ class LazyColumnTest(test.TestCase):
     class NotAFeatureColumn(object):
       pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     with self.assertRaisesRegexp(
         TypeError, '"key" must be either a "str" or "FeatureColumn".'):
@@ -176,7 +199,7 @@ class LazyColumnTest(test.TestCase):
 
   def test_expand_dim_rank_1_sparse_tensor_empty_batch(self):
     # empty 1-D sparse tensor:
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={
             'a':
                 sparse_tensor.SparseTensor(
@@ -201,6 +224,7 @@ class NumericColumnTest(test.TestCase):
     self.assertIsNone(a.default_value)
     self.assertEqual(dtypes.float32, a.dtype)
     self.assertIsNone(a.normalizer_fn)
+    self.assertTrue(a._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -317,7 +341,9 @@ class NumericColumnTest(test.TestCase):
       return input_tensor + 2.
 
     price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
-    output = _transform_features({'price': [[1., 2.], [5., 6.]]}, [price], None)
+    output = fc._transform_features({
+        'price': [[1., 2.], [5., 6.]]
+    }, [price], None)
     with self.cached_session():
       self.assertAllEqual([[3., 4.], [7., 8.]], output[price].eval())
 
@@ -327,7 +353,7 @@ class NumericColumnTest(test.TestCase):
       return input_tensor + 2.
 
     price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'price': [[1., 2.], [5., 6.]]
     })
     self.assertEqual(
@@ -336,7 +362,7 @@ class NumericColumnTest(test.TestCase):
 
   def test_sparse_tensor_not_supported(self):
     price = fc.numeric_column('price')
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'price':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0]], values=[0.3], dense_shape=[1, 1])
@@ -370,6 +396,20 @@ class NumericColumnTest(test.TestCase):
         sess.run(price_var.assign([[10.]]))
         self.assertAllClose([[10.], [50.]], predictions.eval())
 
+  def test_old_linear_model(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.]], price_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(price_var.assign([[10.]]))
+        self.assertAllClose([[10.], [50.]], predictions.eval())
+
 
 class BucketizedColumnTest(test.TestCase):
 
@@ -404,6 +444,13 @@ class BucketizedColumnTest(test.TestCase):
   def test_name(self):
     a = fc.numeric_column('aaa', dtype=dtypes.int32)
     b = fc.bucketized_column(a, boundaries=[0, 1])
+    self.assertTrue(b._is_v2_column)
+    self.assertEqual('aaa_bucketized', b.name)
+
+  def test_is_v2_column_old_numeric(self):
+    a = fc_old.numeric_column('aaa', dtype=dtypes.int32)
+    b = fc.bucketized_column(a, boundaries=[0, 1])
+    self.assertFalse(b._is_v2_column)
     self.assertEqual('aaa_bucketized', b.name)
 
   def test_parse_spec(self):
@@ -445,7 +492,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[2])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformed_tensor = _transform_features({
+      transformed_tensor = fc._transform_features({
           'price': [[-1., 1.], [5., 6.]]
       }, [bucketized_price], None)
       with _initialized_session():
@@ -457,7 +504,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[1])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1.], [1.], [5.], [6.]]
       })
       with _initialized_session():
@@ -476,7 +523,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[2])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1., 1.], [5., 6.]]
       })
       with _initialized_session():
@@ -493,7 +540,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[1])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1.], [1.], [5.], [6.]]
       })
       with _initialized_session() as sess:
@@ -511,7 +558,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[2])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1., 1.], [5., 6.]]
       })
       with _initialized_session() as sess:
@@ -529,7 +576,7 @@ class BucketizedColumnTest(test.TestCase):
   def test_sparse_tensor_input_not_supported(self):
     price = fc.numeric_column('price')
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 1])
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'price':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0]], values=[0.3], dense_shape=[1, 1])
@@ -599,6 +646,85 @@ class BucketizedColumnTest(test.TestCase):
         sess.run(bias.assign([1.]))
         self.assertAllClose([[81.], [141.]], predictions.eval())
 
+  def test_old_linear_model_one_input_value(self):
+    """Tests linear_model() for input with shape=[1]."""
+    price = fc.numeric_column('price', shape=[1])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    with ops.Graph().as_default():
+      features = {'price': [[-1.], [1.], [5.], [6.]]}
+      predictions = fc_old.linear_model(features, [bucketized_price])
+      bias = get_linear_model_bias()
+      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        # One weight variable per bucket, all initialized to zero.
+        self.assertAllClose([[0.], [0.], [0.], [0.], [0.]],
+                            bucketized_price_var.eval())
+        self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval())
+        sess.run(
+            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]]))
+        # price -1. is in the 0th bucket, whose weight is 10.
+        # price 1. is in the 1st bucket, whose weight is 20.
+        # price 5. is in the 3rd bucket, whose weight is 40.
+        # price 6. is in the 4th bucket, whose weight is 50.
+        self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval())
+        sess.run(bias.assign([1.]))
+        self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval())
+
+  def test_old_linear_model_two_input_values(self):
+    """Tests linear_model() for input with shape=[2]."""
+    price = fc.numeric_column('price', shape=[2])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    with ops.Graph().as_default():
+      features = {'price': [[-1., 1.], [5., 6.]]}
+      predictions = fc_old.linear_model(features, [bucketized_price])
+      bias = get_linear_model_bias()
+      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        # One weight per bucket per input column, all initialized to zero.
+        self.assertAllClose(
+            [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]],
+            bucketized_price_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(
+            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.],
+                                         [60.], [70.], [80.], [90.], [100.]]))
+        # 1st example:
+        #   price -1. is in the 0th bucket, whose weight is 10.
+        #   price 1. is in the 6th bucket, whose weight is 70.
+        # 2nd example:
+        #   price 5. is in the 3rd bucket, whose weight is 40.
+        #   price 6. is in the 9th bucket, whose weight is 100.
+        self.assertAllClose([[80.], [140.]], predictions.eval())
+        sess.run(bias.assign([1.]))
+        self.assertAllClose([[81.], [141.]], predictions.eval())
+
+  def test_old_linear_model_one_input_value_old_numeric(self):
+    """Tests linear_model() for input with shape=[1]."""
+    price = fc_old.numeric_column('price', shape=[1])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    with ops.Graph().as_default():
+      features = {'price': [[-1.], [1.], [5.], [6.]]}
+      predictions = fc_old.linear_model(features, [bucketized_price])
+      bias = get_linear_model_bias()
+      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        # One weight variable per bucket, all initialized to zero.
+        self.assertAllClose([[0.], [0.], [0.], [0.], [0.]],
+                            bucketized_price_var.eval())
+        self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval())
+        sess.run(
+            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]]))
+        # price -1. is in the 0th bucket, whose weight is 10.
+        # price 1. is in the 1st bucket, whose weight is 20.
+        # price 5. is in the 3rd bucket, whose weight is 40.
+        # price 6. is in the 4th bucket, whose weight is 50.
+        self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval())
+        sess.run(bias.assign([1.]))
+        self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval())
+
 
 class HashedCategoricalColumnTest(test.TestCase):
 
@@ -608,6 +734,7 @@ class HashedCategoricalColumnTest(test.TestCase):
     self.assertEqual('aaa', a.key)
     self.assertEqual(10, a.hash_bucket_size)
     self.assertEqual(dtypes.string, a.dtype)
+    self.assertTrue(a._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -675,7 +802,9 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=['omar', 'stringer', 'marlo'],
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
-    outputs = _transform_features({'wire': wire_tensor}, [hashed_sparse], None)
+    outputs = fc._transform_features({
+        'wire': wire_tensor
+    }, [hashed_sparse], None)
     output = outputs[hashed_sparse]
     # Check exact hashed output. If hashing changes this test will break.
     expected_values = [6, 4, 1]
@@ -705,7 +834,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=[101.],
         indices=[[0, 0]],
         dense_shape=[1, 1])
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'a_int': int_tensor,
         'a_string': string_tensor,
         'a_float': float_tensor
@@ -720,7 +849,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         'wire', 10, dtype=dtypes.int64)
     wire_tensor = sparse_tensor.SparseTensor(
         values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-    transformation_cache = FeatureTransformationCache({'wire': wire_tensor})
+    transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor})
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       transformation_cache.get(hashed_sparse, None)
 
@@ -731,7 +860,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=[101, 201, 301],
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
-    transformation_cache = FeatureTransformationCache({'wire': wire_tensor})
+    transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor})
     output = transformation_cache.get(hashed_sparse, None)
     # Check exact hashed output. If hashing changes this test will break.
     expected_values = [3, 7, 5]
@@ -745,7 +874,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=constant_op.constant([101, 201, 301], dtype=dtypes.int32),
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
-    transformation_cache = FeatureTransformationCache({'wire': wire_tensor})
+    transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor})
     output = transformation_cache.get(hashed_sparse, None)
     # Check exact hashed output. If hashing changes this test will break.
     expected_values = [3, 7, 5]
@@ -754,7 +883,7 @@ class HashedCategoricalColumnTest(test.TestCase):
 
   def test_get_sparse_tensors(self):
     hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10)
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'wire':
             sparse_tensor.SparseTensor(
                 values=['omar', 'stringer', 'marlo'],
@@ -769,7 +898,7 @@ class HashedCategoricalColumnTest(test.TestCase):
 
   def test_get_sparse_tensors_dense_input(self):
     hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10)
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'wire': (('omar', ''), ('stringer', 'marlo'))
     })
     id_weight_pair = hashed_sparse.get_sparse_tensors(transformation_cache,
@@ -800,6 +929,28 @@ class HashedCategoricalColumnTest(test.TestCase):
         # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6
         self.assertAllClose(((4.,), (6.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    wire_column = fc.categorical_column_with_hash_bucket('wire', 4)
+    self.assertEqual(4, wire_column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          wire_column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=('marlo', 'skywalker', 'omar'),
+                  dense_shape=(2, 2))
+      }, (wire_column,))
+      bias = get_linear_model_bias()
+      wire_var = get_linear_model_column_var(wire_column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
+        # 'marlo' -> 3: wire_var[3] = 4
+        # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6
+        self.assertAllClose(((4.,), (6.,)), predictions.eval())
+
 
 class CrossedColumnTest(test.TestCase):
 
@@ -841,8 +992,20 @@ class CrossedColumnTest(test.TestCase):
     a = fc.numeric_column('a', dtype=dtypes.int32)
     b = fc.bucketized_column(a, boundaries=[0, 1])
     crossed1 = fc.crossed_column(['d1', 'd2'], 10)
+    self.assertTrue(crossed1._is_v2_column)
+
+    crossed2 = fc.crossed_column([b, 'c', crossed1], 10)
+    self.assertTrue(crossed2._is_v2_column)
+    self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2.name)
+
+  def test_is_v2_column(self):
+    a = fc_old.numeric_column('a', dtype=dtypes.int32)
+    b = fc.bucketized_column(a, boundaries=[0, 1])
+    crossed1 = fc.crossed_column(['d1', 'd2'], 10)
+    self.assertTrue(crossed1._is_v2_column)
 
     crossed2 = fc.crossed_column([b, 'c', crossed1], 10)
+    self.assertFalse(crossed2._is_v2_column)
     self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2.name)
 
   def test_name_ordered_alphabetically(self):
@@ -927,7 +1090,7 @@ class CrossedColumnTest(test.TestCase):
             indices=[[0, 0], [1, 0], [1, 1]],
             dense_shape=[2, 2]),
     }
-    outputs = _transform_features(features, [price_cross_wire], None)
+    outputs = fc._transform_features(features, [price_cross_wire], None)
     output = outputs[price_cross_wire]
     with self.cached_session() as sess:
       output_val = sess.run(output)
@@ -943,7 +1106,7 @@ class CrossedColumnTest(test.TestCase):
     crossed1 = fc.crossed_column(['d1', 'd2'], 10)
     crossed2 = fc.crossed_column([b, 'c', crossed1], 15, hash_key=5)
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'a':
               constant_op.constant(((-1., .5), (.5, 1.))),
           'c':
@@ -983,7 +1146,7 @@ class CrossedColumnTest(test.TestCase):
     b = fc.bucketized_column(a, boundaries=(0, 1))
     crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'a':
               constant_op.constant(((-1., .5), (.5, 1.))),
           'c':
@@ -1040,6 +1203,10 @@ class CrossedColumnTest(test.TestCase):
     class _TestColumnWithWeights(fc.CategoricalColumn):
       """Produces sparse IDs and sparse weights."""
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'test_column'
@@ -1092,6 +1259,146 @@ class CrossedColumnTest(test.TestCase):
                     dense_shape=(2, 2)),
         })
 
+  def test_old_linear_model(self):
+    """Tests linear_model.
+
+    Uses data from test_get_sparse_tesnsors_simple.
+    """
+    a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,))
+    b = fc.bucketized_column(a, boundaries=(0, 1))
+    crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'a':
+              constant_op.constant(((-1., .5), (.5, 1.))),
+          'c':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=['cA', 'cB', 'cC'],
+                  dense_shape=(2, 2)),
+      }, (crossed,))
+      bias = get_linear_model_bias()
+      crossed_var = get_linear_model_column_var(crossed)
+      with _initialized_session() as sess:
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)),
+                            crossed_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,))))
+        # Expected ids after cross = (1, 0, 1, 3, 4, 2)
+        self.assertAllClose(((3.,), (14.,)), predictions.eval())
+        sess.run(bias.assign((.1,)))
+        self.assertAllClose(((3.1,), (14.1,)), predictions.eval())
+
+  def test_old_linear_model_with_weights(self):
+
+    class _TestColumnWithWeights(fc.CategoricalColumn,
+                                 fc_old._CategoricalColumn):
+      """Produces sparse IDs and sparse weights."""
+
+      @property
+      def _is_v2_column(self):
+        return True
+
+      @property
+      def name(self):
+        return 'test_column'
+
+      @property
+      def parse_example_spec(self):
+        return {
+            self.name:
+                parsing_ops.VarLenFeature(dtypes.int32),
+            '{}_weights'.format(self.name):
+                parsing_ops.VarLenFeature(dtypes.float32),
+        }
+
+      @property
+      def _parse_example_spec(self):
+        return self.parse_example_spec
+
+      @property
+      def num_buckets(self):
+        return 5
+
+      @property
+      def _num_buckets(self):
+        return self.num_buckets
+
+      def transform_feature(self, transformation_cache, state_manager):
+        raise ValueError('Should not be called.')
+
+      def _transform_feature(self, inputs):
+        return (inputs.get(self.name),
+                inputs.get('{}_weights'.format(self.name)))
+
+      def get_sparse_tensors(self, transformation_cache, state_manager):
+        raise ValueError('Should not be called.')
+
+      def _get_sparse_tensors(self,
+                              inputs,
+                              weight_collections=None,
+                              trainable=None):
+        """Populates both id_tensor and weight_tensor."""
+        ids_and_weights = inputs.get(self)
+        return fc.CategoricalColumn.IdWeightPair(
+            id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1])
+
+    t = _TestColumnWithWeights()
+    crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5)
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(
+          ValueError,
+          'crossed_column does not support weight_tensor.*{}'.format(t.name)):
+        fc_old.linear_model({
+            t.name:
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=[0, 1, 2],
+                    dense_shape=(2, 2)),
+            '{}_weights'.format(t.name):
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=[1., 10., 2.],
+                    dense_shape=(2, 2)),
+            'c':
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=['cA', 'cB', 'cC'],
+                    dense_shape=(2, 2)),
+        }, (crossed,))
+
+  def test_old_linear_model_old_numeric(self):
+    """Tests linear_model.
+
+    Uses data from test_get_sparse_tesnsors_simple.
+    """
+    a = fc_old.numeric_column('a', dtype=dtypes.int32, shape=(2,))
+    b = fc.bucketized_column(a, boundaries=(0, 1))
+    crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'a':
+              constant_op.constant(((-1., .5), (.5, 1.))),
+          'c':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=['cA', 'cB', 'cC'],
+                  dense_shape=(2, 2)),
+      }, (crossed,))
+      bias = get_linear_model_bias()
+      crossed_var = get_linear_model_column_var(crossed)
+      with _initialized_session() as sess:
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)),
+                            crossed_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,))))
+        # Expected ids after cross = (1, 0, 1, 3, 4, 2)
+        self.assertAllClose(((3.,), (14.,)), predictions.eval())
+        sess.run(bias.assign((.1,)))
+        self.assertAllClose(((3.1,), (14.1,)), predictions.eval())
+
 
 class LinearModelTest(test.TestCase):
 
@@ -1108,6 +1415,10 @@ class LinearModelTest(test.TestCase):
 
     class NotSupportedColumn(fc.FeatureColumn):
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'NotSupportedColumn'
@@ -1189,6 +1500,10 @@ class LinearModelTest(test.TestCase):
 
     class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn):
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'dense_and_sparse_column'
@@ -1735,60 +2050,1519 @@ class LinearModelTest(test.TestCase):
         self.assertAllClose([[25.], [105.]], predictions2.eval())
 
 
-class FeatureLayerTest(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes()
-  def test_retrieving_input(self):
-    features = {'a': [0.]}
-    feature_layer = FeatureLayer(fc.numeric_column('a'))
-    inputs = self.evaluate(feature_layer(features))
-    self.assertAllClose([[0.]], inputs)
+class OldLinearModelTest(test.TestCase):
 
-  def test_reuses_variables(self):
-    with context.eager_mode():
-      sparse_input = sparse_tensor.SparseTensor(
-          indices=((0, 0), (1, 0), (2, 0)),
-          values=(0, 1, 2),
-          dense_shape=(3, 3))
+  def test_raises_if_empty_feature_columns(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'feature_columns must not be empty'):
+      fc_old.linear_model(features={}, feature_columns=[])
 
-      # Create feature columns (categorical and embedding).
-      categorical_column = fc.categorical_column_with_identity(
-          key='a', num_buckets=3)
-      embedding_dimension = 2
-      def _embedding_column_initializer(shape, dtype, partition_info):
-        del shape  # unused
-        del dtype  # unused
-        del partition_info  # unused
-        embedding_values = (
-            (1, 0),  # id 0
-            (0, 1),  # id 1
-            (1, 1))  # id 2
-        return embedding_values
+  def test_should_be_feature_column(self):
+    with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'):
+      fc_old.linear_model(features={'a': [[0]]}, feature_columns='NotSupported')
 
-      embedding_column = fc.embedding_column(
-          categorical_column,
-          dimension=embedding_dimension,
-          initializer=_embedding_column_initializer)
+  def test_should_be_dense_or_categorical_column(self):
 
-      feature_layer = FeatureLayer([embedding_column])
-      features = {'a': sparse_input}
+    class NotSupportedColumn(fc.FeatureColumn, fc_old._FeatureColumn):
 
-      inputs = feature_layer(features)
-      variables = feature_layer.variables
+      @property
+      def _is_v2_column(self):
+        return True
 
-      # Sanity check: test that the inputs are correct.
-      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
+      @property
+      def name(self):
+        return 'NotSupportedColumn'
+
+      def transform_feature(self, transformation_cache, state_manager):
+        pass
+
+      def _transform_feature(self, inputs):
+        pass
+
+      @property
+      def parse_example_spec(self):
+        pass
+
+      @property
+      def _parse_example_spec(self):
+        pass
+
+    with self.assertRaisesRegexp(
+        ValueError, 'must be either a _DenseColumn or _CategoricalColumn'):
+      fc_old.linear_model(
+          features={'a': [[0]]}, feature_columns=[NotSupportedColumn()])
+
+  def test_does_not_support_dict_columns(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Expected feature_columns to be iterable, found dict.'):
+      fc_old.linear_model(
+          features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')})
+
+  def test_raises_if_duplicate_name(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Duplicate feature column name found for columns'):
+      fc_old.linear_model(
+          features={'a': [[0]]},
+          feature_columns=[fc.numeric_column('a'),
+                           fc.numeric_column('a')])
+
+  def test_dense_bias(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        sess.run(price_var.assign([[10.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[15.], [55.]], predictions.eval())
+
+  def test_sparse_bias(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(features, [wire_cast])
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval())
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [10015.]], predictions.eval())
+
+  def test_dense_and_sparse_bias(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [wire_cast, price])
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        sess.run(price_var.assign([[10.]]))
+        self.assertAllClose([[1015.], [10065.]], predictions.eval())
+
+  def test_dense_and_sparse_column(self):
+    """When the column is both dense and sparse, uses sparse tensors."""
+
+    class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn,
+                                fc_old._DenseColumn, fc_old._CategoricalColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
+
+      @property
+      def name(self):
+        return 'dense_and_sparse_column'
+
+      @property
+      def parse_example_spec(self):
+        return {self.name: parsing_ops.VarLenFeature(self.dtype)}
+
+      @property
+      def _parse_example_spec(self):
+        return self.parse_example_spec
+
+      def transform_feature(self, transformation_cache, state_manager):
+        raise ValueError('Should not use this method.')
+
+      def _transform_feature(self, inputs):
+        return inputs.get(self.name)
+
+      @property
+      def variable_shape(self):
+        return self.variable_shape
+
+      @property
+      def _variable_shape(self):
+        return self.variable_shape
+
+      def get_dense_tensor(self, transformation_cache, state_manager):
+        raise ValueError('Should not use this method.')
+
+      def _get_dense_tensor(self, inputs):
+        raise ValueError('Should not use this method.')
+
+      @property
+      def num_buckets(self):
+        return 4
+
+      @property
+      def _num_buckets(self):
+        return self.num_buckets
+
+      def get_sparse_tensors(self, transformation_cache, state_manager):
+        raise ValueError('Should not use this method.')
+
+      def _get_sparse_tensors(self,
+                              inputs,
+                              weight_collections=None,
+                              trainable=None):
+        sp_tensor = sparse_tensor.SparseTensor(
+            indices=[[0, 0], [1, 0], [1, 1]],
+            values=[2, 0, 3],
+            dense_shape=[2, 2])
+        return fc.CategoricalColumn.IdWeightPair(sp_tensor, None)
+
+    dense_and_sparse_column = _DenseAndSparseColumn()
+    with ops.Graph().as_default():
+      sp_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {dense_and_sparse_column.name: sp_tensor}
+      predictions = fc_old.linear_model(features, [dense_and_sparse_column])
+      bias = get_linear_model_bias()
+      dense_and_sparse_column_var = get_linear_model_column_var(
+          dense_and_sparse_column)
+      with _initialized_session() as sess:
+        sess.run(
+            dense_and_sparse_column_var.assign([[10.], [100.], [1000.],
+                                                [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [10015.]], predictions.eval())
+
+  def test_dense_multi_output(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [price], units=3)
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((3,)), bias.eval())
+        self.assertAllClose(np.zeros((1, 3)), price_var.eval())
+        sess.run(price_var.assign([[10., 100., 1000.]]))
+        sess.run(bias.assign([5., 6., 7.]))
+        self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]],
+                            predictions.eval())
+
+  def test_sparse_multi_output(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(features, [wire_cast], units=3)
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((3,)), bias.eval())
+        self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval())
+        sess.run(
+            wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.],
+                                  [1000., 1100., 1200.],
+                                  [10000., 11000., 12000.]]))
+        sess.run(bias.assign([5., 6., 7.]))
+        self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]],
+                            predictions.eval())
+
+  def test_dense_multi_dimension(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1., 2.], [5., 6.]]}
+      predictions = fc_old.linear_model(features, [price])
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([[0.], [0.]], price_var.eval())
+        sess.run(price_var.assign([[10.], [100.]]))
+        self.assertAllClose([[210.], [650.]], predictions.eval())
+
+  def test_sparse_multi_rank(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = array_ops.sparse_placeholder(dtypes.string)
+      wire_value = sparse_tensor.SparseTensorValue(
+          values=['omar', 'stringer', 'marlo', 'omar'],  # hashed = [2, 0, 3, 2]
+          indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]],
+          dense_shape=[2, 2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(features, [wire_cast])
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval())
+        self.assertAllClose(
+            np.zeros((2, 1)),
+            predictions.eval(feed_dict={wire_tensor: wire_value}))
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        self.assertAllClose(
+            [[1010.], [11000.]],
+            predictions.eval(feed_dict={wire_tensor: wire_value}))
+
+  def test_sparse_combiner(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(
+          features, [wire_cast], sparse_combiner='mean')
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [5010.]], predictions.eval())
+
+  def test_sparse_combiner_with_negative_weights(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast_weights = fc.weighted_categorical_column(wire_cast, 'weights')
+
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {
+          'wire_cast': wire_tensor,
+          'weights': constant_op.constant([[1., 1., -1.0]])
+      }
+      predictions = fc_old.linear_model(
+          features, [wire_cast_weights], sparse_combiner='sum')
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [-9985.]], predictions.eval())
+
+  def test_dense_multi_dimension_multi_output(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1., 2.], [5., 6.]]}
+      predictions = fc_old.linear_model(features, [price], units=3)
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((3,)), bias.eval())
+        self.assertAllClose(np.zeros((2, 3)), price_var.eval())
+        sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]]))
+        sess.run(bias.assign([2., 3., 4.]))
+        self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]],
+                            predictions.eval())
+
+  def test_raises_if_shape_mismatch(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      with self.assertRaisesRegexp(
+          Exception,
+          r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
+        fc_old.linear_model(features, [price])
+
+  def test_dense_reshaping(self):
+    price = fc.numeric_column('price', shape=[1, 2])
+    with ops.Graph().as_default():
+      features = {'price': [[[1., 2.]], [[5., 6.]]]}
+      predictions = fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.], [0.]], price_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(price_var.assign([[10.], [100.]]))
+        self.assertAllClose([[210.], [650.]], predictions.eval())
+
+  def test_dense_multi_column(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      predictions = fc_old.linear_model(features, [price1, price2])
+      bias = get_linear_model_bias()
+      price1_var = get_linear_model_column_var(price1)
+      price2_var = get_linear_model_column_var(price2)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.], [0.]], price1_var.eval())
+        self.assertAllClose([[0.]], price2_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(price1_var.assign([[10.], [100.]]))
+        sess.run(price2_var.assign([[1000.]]))
+        sess.run(bias.assign([7.]))
+        self.assertAllClose([[3217.], [4657.]], predictions.eval())
+
+  def test_fills_cols_to_vars(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      cols_to_vars = {}
+      fc_old.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars)
+      bias = get_linear_model_bias()
+      price1_var = get_linear_model_column_var(price1)
+      price2_var = get_linear_model_column_var(price2)
+      self.assertAllEqual(cols_to_vars['bias'], [bias])
+      self.assertAllEqual(cols_to_vars[price1], [price1_var])
+      self.assertAllEqual(cols_to_vars[price2], [price2_var])
+
+  def test_fills_cols_to_vars_partitioned_variables(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2', shape=3)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1., 2.], [6., 7.]],
+          'price2': [[3., 4., 5.], [8., 9., 10.]]
+      }
+      cols_to_vars = {}
+      with variable_scope.variable_scope(
+          'linear',
+          partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)):
+        fc_old.linear_model(
+            features, [price1, price2], cols_to_vars=cols_to_vars)
+      with _initialized_session():
+        self.assertEqual([0.], cols_to_vars['bias'][0].eval())
+        # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables.
+        self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval())
+        self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval())
+        # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and
+        # a [1, 1] Variable.
+        self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval())
+        self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval())
+
+  def test_fills_cols_to_output_tensors(self):
+    # Provide three _DenseColumn's to input_layer: a _NumericColumn, a
+    # _BucketizedColumn, and an _EmbeddingColumn.  Only the _EmbeddingColumn
+    # creates a Variable.
+    apple_numeric_column = fc.numeric_column('apple_numeric_column')
+    banana_dense_feature = fc.numeric_column('banana_dense_feature')
+    banana_dense_feature_bucketized = fc.bucketized_column(
+        banana_dense_feature, boundaries=[0.])
+    cherry_sparse_column = fc.categorical_column_with_hash_bucket(
+        'cherry_sparse_feature', hash_bucket_size=5)
+    dragonfruit_embedding_column = fc.embedding_column(
+        cherry_sparse_column, dimension=10)
+    with ops.Graph().as_default():
+      features = {
+          'apple_numeric_column': [[3.], [4.]],
+          'banana_dense_feature': [[-1.], [4.]],
+          'cherry_sparse_feature': [['a'], ['x']],
+      }
+      cols_to_output_tensors = {}
+      all_cols = [
+          apple_numeric_column, banana_dense_feature_bucketized,
+          dragonfruit_embedding_column
+      ]
+      input_layer = fc_old.input_layer(
+          features, all_cols, cols_to_output_tensors=cols_to_output_tensors)
+
+      # We check the mapping by checking that we have the right keys,
+      # and that the values (output_tensors) were indeed the ones used to
+      # form the input layer.
+      self.assertItemsEqual(all_cols, cols_to_output_tensors.keys())
+      input_layer_inputs = [tensor for tensor in input_layer.op.inputs[:-1]]
+      output_tensors = [tensor for tensor in cols_to_output_tensors.values()]
+      self.assertItemsEqual(input_layer_inputs, output_tensors)
+
+  def test_dense_collection(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default() as g:
+      features = {'price': [[1.], [5.]]}
+      fc_old.linear_model(features, [price], weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      self.assertIn(bias, my_vars)
+      self.assertIn(price_var, my_vars)
+
+  def test_sparse_collection(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      features = {'wire_cast': wire_tensor}
+      fc_old.linear_model(features, [wire_cast], weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      self.assertIn(bias, my_vars)
+      self.assertIn(wire_cast_var, my_vars)
+
+  def test_dense_trainable_default(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default() as g:
+      features = {'price': [[1.], [5.]]}
+      fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertIn(bias, trainable_vars)
+      self.assertIn(price_var, trainable_vars)
+
+  def test_sparse_trainable_default(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      features = {'wire_cast': wire_tensor}
+      fc_old.linear_model(features, [wire_cast])
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      self.assertIn(bias, trainable_vars)
+      self.assertIn(wire_cast_var, trainable_vars)
+
+  def test_dense_trainable_false(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default() as g:
+      features = {'price': [[1.], [5.]]}
+      fc_old.linear_model(features, [price], trainable=False)
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertEqual([], trainable_vars)
+
+  def test_sparse_trainable_false(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      features = {'wire_cast': wire_tensor}
+      fc_old.linear_model(features, [wire_cast], trainable=False)
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertEqual([], trainable_vars)
+
+  def test_column_order(self):
+    price_a = fc.numeric_column('price_a')
+    price_b = fc.numeric_column('price_b')
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      features = {
+          'price_a': [[1.]],
+          'price_b': [[3.]],
+          'wire_cast':
+              sparse_tensor.SparseTensor(
+                  values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      }
+      fc_old.linear_model(
+          features, [price_a, wire_cast, price_b],
+          weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      self.assertIn('price_a', my_vars[0].name)
+      self.assertIn('price_b', my_vars[1].name)
+      self.assertIn('wire_cast', my_vars[2].name)
+
+    with ops.Graph().as_default() as g:
+      features = {
+          'price_a': [[1.]],
+          'price_b': [[3.]],
+          'wire_cast':
+              sparse_tensor.SparseTensor(
+                  values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      }
+      fc_old.linear_model(
+          features, [wire_cast, price_b, price_a],
+          weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      self.assertIn('price_a', my_vars[0].name)
+      self.assertIn('price_b', my_vars[1].name)
+      self.assertIn('wire_cast', my_vars[2].name)
+
+  def test_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1.], [5.], [7.]],  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+    with self.assertRaisesRegexp(
+        ValueError,
+        'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+      fc_old.linear_model(features, [price1, price2])
+
+  def test_subset_of_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    price3 = fc.numeric_column('price3')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]],  # batchsize = 2
+          'price3': [[3.], [4.], [5.]]  # batchsize = 3
+      }
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+        fc_old.linear_model(features, [price1, price2, price3])
+
+  def test_runtime_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+      predictions = fc_old.linear_model(features, [price1, price2])
+      with _initialized_session() as sess:
+        with self.assertRaisesRegexp(errors.OpError,
+                                     'must have the same size and shape'):
+          sess.run(
+              predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]})
+
+  def test_runtime_batch_size_matches(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+          'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+      }
+      predictions = fc_old.linear_model(features, [price1, price2])
+      with _initialized_session() as sess:
+        sess.run(
+            predictions,
+            feed_dict={
+                features['price1']: [[1.], [5.]],
+                features['price2']: [[1.], [5.]],
+            })
+
+  def test_with_1d_sparse_tensor(self):
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
+        price, boundaries=[
+            0.,
+            10.,
+            100.,
+        ])
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price':
+            constant_op.constant([
+                -1.,
+                12.,
+            ]),
+        'body-style':
+            sparse_tensor.SparseTensor(
+                indices=((0,), (1,)),
+                values=('sedan', 'hardtop'),
+                dense_shape=(2,)),
+    }
+    self.assertEqual(1, features['price'].shape.ndims)
+    self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
+
+    net = fc_old.linear_model(features, [price_buckets, body_style])
+    with _initialized_session() as sess:
+      bias = get_linear_model_bias()
+      price_buckets_var = get_linear_model_column_var(price_buckets)
+      body_style_var = get_linear_model_column_var(body_style)
+
+      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
+      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
+      sess.run(bias.assign([5.]))
+
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
+
+  def test_with_1d_unknown_shape_sparse_tensor(self):
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
+        price, boundaries=[
+            0.,
+            10.,
+            100.,
+        ])
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    country = fc.categorical_column_with_vocabulary_list(
+        'country', vocabulary_list=['US', 'JP', 'CA'])
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+        'body-style': array_ops.sparse_placeholder(dtypes.string),
+        'country': array_ops.placeholder(dtypes.string),
+    }
+    self.assertIsNone(features['price'].shape.ndims)
+    self.assertIsNone(features['body-style'].get_shape().ndims)
+
+    price_data = np.array([-1., 12.])
+    body_style_data = sparse_tensor.SparseTensorValue(
+        indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,))
+    country_data = np.array(['US', 'CA'])
+
+    net = fc_old.linear_model(features, [price_buckets, body_style, country])
+    bias = get_linear_model_bias()
+    price_buckets_var = get_linear_model_column_var(price_buckets)
+    body_style_var = get_linear_model_column_var(body_style)
+    with _initialized_session() as sess:
+      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
+      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
+      sess.run(bias.assign([5.]))
+
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          sess.run(
+                              net,
+                              feed_dict={
+                                  features['price']: price_data,
+                                  features['body-style']: body_style_data,
+                                  features['country']: country_data
+                              }))
+
+  def test_with_rank_0_feature(self):
+    price = fc.numeric_column('price')
+    features = {
+        'price': constant_op.constant(0),
+    }
+    self.assertEqual(0, features['price'].shape.ndims)
+
+    # Static rank 0 should fail
+    with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
+      fc_old.linear_model(features, [price])
+
+    # Dynamic rank 0 should fail
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+    }
+    net = fc_old.linear_model(features, [price])
+    self.assertEqual(1, net.shape[1])
+    with _initialized_session() as sess:
+      with self.assertRaisesOpError('Feature .* cannot have rank 0'):
+        sess.run(net, feed_dict={features['price']: np.array(1)})
+
+  def test_multiple_linear_models(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features1 = {'price': [[1.], [5.]]}
+      features2 = {'price': [[2.], [10.]]}
+      predictions1 = fc_old.linear_model(features1, [price])
+      predictions2 = fc_old.linear_model(features2, [price])
+      bias1 = get_linear_model_bias(name='linear_model')
+      bias2 = get_linear_model_bias(name='linear_model_1')
+      price_var1 = get_linear_model_column_var(price, name='linear_model')
+      price_var2 = get_linear_model_column_var(price, name='linear_model_1')
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias1.eval())
+        sess.run(price_var1.assign([[10.]]))
+        sess.run(bias1.assign([5.]))
+        self.assertAllClose([[15.], [55.]], predictions1.eval())
+        self.assertAllClose([0.], bias2.eval())
+        sess.run(price_var2.assign([[10.]]))
+        sess.run(bias2.assign([5.]))
+        self.assertAllClose([[25.], [105.]], predictions2.eval())
+
+  def test_linear_model_v1_shared_embedding_all_other_v2(self):
+    price = fc.numeric_column('price')  # v2
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v2
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)  # v2
+    categorical_column_a = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc_old.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v1
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc_old.linear_model(features, all_cols)
+      bias = get_linear_model_bias()
+      with _initialized_session():
+        self.assertAllClose([0.], bias.eval())
+
+  def test_linear_model_v1_shared_embedding_with_v2_cat_all_other_v2(self):
+    price = fc.numeric_column('price')  # v2
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v2
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)  # v2
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v1
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc_old.linear_model(features, all_cols)
+      bias = get_linear_model_bias()
+      with _initialized_session():
+        self.assertAllClose([0.], bias.eval())
+
+  def test_linear_model_v1_v2_mix(self):
+    price = fc.numeric_column('price')  # v2
+    some_sparse_column = fc_old.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v1
+    some_embedding_column = fc_old.embedding_column(
+        some_sparse_column, dimension=10)  # v1
+    categorical_column_a = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc_old.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v1
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc_old.linear_model(features, all_cols)
+      bias = get_linear_model_bias()
+      with _initialized_session():
+        self.assertAllClose([0.], bias.eval())
+
+  def test_linear_model_v2_shared_embedding_all_other_v1(self):
+    price = fc_old.numeric_column('price')  # v1
+    some_sparse_column = fc_old.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v1
+    some_embedding_column = fc_old.embedding_column(
+        some_sparse_column, dimension=10)  # v1
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns_v2(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v2
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      with self.assertRaisesRegexp(ValueError,
+                                   'SharedEmbeddingColumns are not supported'):
+        fc_old.linear_model(features, all_cols)
+
+
+class FeatureLayerTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_retrieving_input(self):
+    features = {'a': [0.]}
+    feature_layer = fc.FeatureLayer(fc.numeric_column('a'))
+    inputs = self.evaluate(feature_layer(features))
+    self.assertAllClose([[0.]], inputs)
+
+  def test_reuses_variables(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(
+          key='a', num_buckets=3)
+      embedding_dimension = 2
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      feature_layer = fc.FeatureLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      inputs = feature_layer(features)
+      variables = feature_layer.variables
+
+      # Sanity check: test that the inputs are correct.
+      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
+
+      # Check that only one variable was created.
+      self.assertEqual(1, len(variables))
+
+      # Check that invoking feature_layer on the same features does not create
+      # additional variables
+      _ = feature_layer(features)
+      self.assertEqual(1, len(variables))
+      self.assertEqual(variables[0], feature_layer.variables[0])
+
+  def test_feature_column_feature_layer_gradient(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(
+          key='a', num_buckets=3)
+      embedding_dimension = 2
+
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      feature_layer = fc.FeatureLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      def scale_matrix():
+        matrix = feature_layer(features)
+        return 2 * matrix
+
+      # Sanity check: Verify that scale_matrix returns the correct output.
+      self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())
+
+      # Check that the returned gradient is correct.
+      grad_function = backprop.implicit_grad(scale_matrix)
+      grads_and_vars = grad_function()
+      indexed_slice = grads_and_vars[0][0]
+      gradient = grads_and_vars[0][0].values
+
+      self.assertAllEqual([0, 1, 2], indexed_slice.indices)
+      self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
+
+  def test_raises_if_empty_feature_columns(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'feature_columns must not be empty'):
+      fc.FeatureLayer(feature_columns=[])(features={})
+
+  def test_should_be_dense_column(self):
+    with self.assertRaisesRegexp(ValueError, 'must be a DenseColumn'):
+      fc.FeatureLayer(feature_columns=[
+          fc.categorical_column_with_hash_bucket('wire_cast', 4)
+      ])(
+          features={
+              'a': [[0]]
+          })
+
+  def test_does_not_support_dict_columns(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Expected feature_columns to be iterable, found dict.'):
+      fc.FeatureLayer(feature_columns={'a': fc.numeric_column('a')})(
+          features={
+              'a': [[0]]
+          })
+
+  def test_bare_column(self):
+    with ops.Graph().as_default():
+      features = features = {'a': [0.]}
+      net = fc.FeatureLayer(fc.numeric_column('a'))(features)
+      with _initialized_session():
+        self.assertAllClose([[0.]], net.eval())
+
+  def test_column_generator(self):
+    with ops.Graph().as_default():
+      features = features = {'a': [0.], 'b': [1.]}
+      columns = (fc.numeric_column(key) for key in features)
+      net = fc.FeatureLayer(columns)(features)
+      with _initialized_session():
+        self.assertAllClose([[0., 1.]], net.eval())
+
+  def test_raises_if_duplicate_name(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Duplicate feature column name found for columns'):
+      fc.FeatureLayer(
+          feature_columns=[fc.numeric_column('a'),
+                           fc.numeric_column('a')])(
+                               features={
+                                   'a': [[0]]
+                               })
+
+  def test_one_column(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      net = fc.FeatureLayer([price])(features)
+      with _initialized_session():
+        self.assertAllClose([[1.], [5.]], net.eval())
+
+  def test_multi_dimension(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1., 2.], [5., 6.]]}
+      net = fc.FeatureLayer([price])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
+
+  def test_compute_output_shape(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2', shape=4)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1., 2.], [5., 6.]],
+          'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
+      }
+      feature_layer = fc.FeatureLayer([price1, price2])
+      self.assertEqual((None, 6), feature_layer.compute_output_shape((None,)))
+      net = feature_layer(features)
+      with _initialized_session():
+        self.assertAllClose(
+            [[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], net.eval())
+
+  def test_raises_if_shape_mismatch(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      with self.assertRaisesRegexp(
+          Exception,
+          r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
+        fc.FeatureLayer([price])(features)
+
+  def test_reshaping(self):
+    price = fc.numeric_column('price', shape=[1, 2])
+    with ops.Graph().as_default():
+      features = {'price': [[[1., 2.]], [[5., 6.]]]}
+      net = fc.FeatureLayer([price])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
+
+  def test_multi_column(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1., 2.], [5., 6.]],
+          'price2': [[3.], [4.]]
+      }
+      net = fc.FeatureLayer([price1, price2])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
+
+  def test_cols_to_output_tensors(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      cols_dict = {}
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      feature_layer = fc.FeatureLayer([price1, price2])
+      net = feature_layer(features, cols_dict)
+      with _initialized_session():
+        self.assertAllClose([[1., 2.], [5., 6.]], cols_dict[price1].eval())
+        self.assertAllClose([[3.], [4.]], cols_dict[price2].eval())
+        self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
+
+  def test_column_order(self):
+    price_a = fc.numeric_column('price_a')
+    price_b = fc.numeric_column('price_b')
+    with ops.Graph().as_default():
+      features = {
+          'price_a': [[1.]],
+          'price_b': [[3.]],
+      }
+      net1 = fc.FeatureLayer([price_a, price_b])(features)
+      net2 = fc.FeatureLayer([price_b, price_a])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 3.]], net1.eval())
+        self.assertAllClose([[1., 3.]], net2.eval())
+
+  def test_fails_for_categorical_column(self):
+    animal = fc.categorical_column_with_identity('animal', num_buckets=4)
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+      with self.assertRaisesRegexp(Exception, 'must be a DenseColumn'):
+        fc.FeatureLayer([animal])(features)
+
+  def test_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1.], [5.], [7.]],  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+        fc.FeatureLayer([price1, price2])(features)
+
+  def test_subset_of_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    price3 = fc.numeric_column('price3')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]],  # batchsize = 2
+          'price3': [[3.], [4.], [5.]]  # batchsize = 3
+      }
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+        fc.FeatureLayer([price1, price2, price3])(features)
+
+  def test_runtime_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+      net = fc.FeatureLayer([price1, price2])(features)
+      with _initialized_session() as sess:
+        with self.assertRaisesRegexp(errors.OpError,
+                                     'Dimensions of inputs should match'):
+          sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]})
+
+  def test_runtime_batch_size_matches(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+          'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+      }
+      net = fc.FeatureLayer([price1, price2])(features)
+      with _initialized_session() as sess:
+        sess.run(
+            net,
+            feed_dict={
+                features['price1']: [[1.], [5.]],
+                features['price2']: [[1.], [5.]],
+            })
+
+  def test_multiple_layers_with_same_embedding_column(self):
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
+
+    with ops.Graph().as_default():
+      features = {
+          'sparse_feature': [['a'], ['x']],
+      }
+      all_cols = [some_embedding_column]
+      fc.FeatureLayer(all_cols)(features)
+      fc.FeatureLayer(all_cols)(features)
+      # Make sure that 2 variables get created in this case.
+      self.assertEqual(2, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      expected_var_names = [
+          'feature_layer/sparse_feature_embedding/embedding_weights:0',
+          'feature_layer_1/sparse_feature_embedding/embedding_weights:0'
+      ]
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+
+  def test_multiple_layers_with_same_shared_embedding_column(self):
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
+        [categorical_column_b, categorical_column_a],
+        dimension=embedding_dimension)
+    shared_state_manager = fc.SharedEmbeddingStateManager(
+        name='shared_feature_layer')
+
+    with ops.Graph().as_default():
+      features = {
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      all_cols = [embedding_column_a, embedding_column_b]
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager)(
+              features)
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager)(
+              features)
+      # Make sure that only 1 variable gets created in this case.
+      self.assertEqual(1, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      self.assertItemsEqual(
+          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+
+  def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self):
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
+        [categorical_column_b, categorical_column_a],
+        dimension=embedding_dimension)
+    all_cols = [embedding_column_a, embedding_column_b]
+
+    with ops.Graph().as_default():
+      shared_state_manager1 = fc.SharedEmbeddingStateManager(
+          name='shared_feature_layer')
+      features = {
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager1)(
+              features)
+      # Make sure that only 1 variable gets created in this case.
+      self.assertEqual(1, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+
+    with ops.Graph().as_default():
+      shared_state_manager2 = fc.SharedEmbeddingStateManager(
+          name='shared_feature_layer')
+      features1 = {
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager2)(
+              features1)
+      # Make sure that only 1 variable gets created in this case.
+      self.assertEqual(1, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      self.assertItemsEqual(
+          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+
+  def test_with_numpy_input_fn(self):
+    embedding_values = (
+        (1., 2., 3., 4., 5.),  # id 0
+        (6., 7., 8., 9., 10.),  # id 1
+        (11., 12., 13., 14., 15.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      del shape, dtype, partition_info
+      return embedding_values
+
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    # one_hot_body_style has 3 dims in feature_layer.
+    one_hot_body_style = fc.indicator_column(body_style)
+    # embedded_body_style has 5 dims in feature_layer.
+    embedded_body_style = fc.embedding_column(
+        body_style, dimension=5, initializer=_initializer)
+
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'price': np.array([11., 12., 13., 14.]),
+            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
+        },
+        batch_size=2,
+        shuffle=False)
+    features = input_fn()
+    net = fc.FeatureLayer([price, one_hot_body_style, embedded_body_style])(
+        features)
+    self.assertEqual(1 + 3 + 5, net.shape[1])
+    with _initialized_session() as sess:
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+
+      # Each row is formed by concatenating `embedded_body_style`,
+      # `one_hot_body_style`, and `price` in order.
+      self.assertAllEqual(
+          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
+           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
+          sess.run(net))
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def test_with_1d_sparse_tensor(self):
+    embedding_values = (
+        (1., 2., 3., 4., 5.),  # id 0
+        (6., 7., 8., 9., 10.),  # id 1
+        (11., 12., 13., 14., 15.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      del shape, dtype, partition_info
+      return embedding_values
+
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+
+    # one_hot_body_style has 3 dims in feature_layer.
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    one_hot_body_style = fc.indicator_column(body_style)
+
+    # embedded_body_style has 5 dims in feature_layer.
+    country = fc.categorical_column_with_vocabulary_list(
+        'country', vocabulary_list=['US', 'JP', 'CA'])
+    embedded_country = fc.embedding_column(
+        country, dimension=5, initializer=_initializer)
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price': constant_op.constant([11., 12.,]),
+        'body-style': sparse_tensor.SparseTensor(
+            indices=((0,), (1,)),
+            values=('sedan', 'hardtop'),
+            dense_shape=(2,)),
+        # This is dense tensor for the categorical_column.
+        'country': constant_op.constant(['CA', 'US']),
+    }
+    self.assertEqual(1, features['price'].shape.ndims)
+    self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
+    self.assertEqual(1, features['country'].shape.ndims)
+
+    net = fc.FeatureLayer([price, one_hot_body_style, embedded_country])(
+        features)
+    self.assertEqual(1 + 3 + 5, net.shape[1])
+    with _initialized_session() as sess:
+
+      # Each row is formed by concatenating `embedded_body_style`,
+      # `one_hot_body_style`, and `price` in order.
+      self.assertAllEqual(
+          [[0., 0., 1., 11., 12., 13., 14., 15., 11.],
+           [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
+          sess.run(net))
+
+  def test_with_1d_unknown_shape_sparse_tensor(self):
+    embedding_values = (
+        (1., 2.),  # id 0
+        (6., 7.),  # id 1
+        (11., 12.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      del shape, dtype, partition_info
+      return embedding_values
+
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+
+    # one_hot_body_style has 3 dims in feature_layer.
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    one_hot_body_style = fc.indicator_column(body_style)
+
+    # embedded_body_style has 5 dims in feature_layer.
+    country = fc.categorical_column_with_vocabulary_list(
+        'country', vocabulary_list=['US', 'JP', 'CA'])
+    embedded_country = fc.embedding_column(
+        country, dimension=2, initializer=_initializer)
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+        'body-style': array_ops.sparse_placeholder(dtypes.string),
+        # This is dense tensor for the categorical_column.
+        'country': array_ops.placeholder(dtypes.string),
+    }
+    self.assertIsNone(features['price'].shape.ndims)
+    self.assertIsNone(features['body-style'].get_shape().ndims)
+    self.assertIsNone(features['country'].shape.ndims)
+
+    price_data = np.array([11., 12.])
+    body_style_data = sparse_tensor.SparseTensorValue(
+        indices=((0,), (1,)),
+        values=('sedan', 'hardtop'),
+        dense_shape=(2,))
+    country_data = np.array([['US'], ['CA']])
+
+    net = fc.FeatureLayer([price, one_hot_body_style, embedded_country])(
+        features)
+    self.assertEqual(1 + 3 + 2, net.shape[1])
+    with _initialized_session() as sess:
+
+      # Each row is formed by concatenating `embedded_body_style`,
+      # `one_hot_body_style`, and `price` in order.
+      self.assertAllEqual(
+          [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
+          sess.run(
+              net,
+              feed_dict={
+                  features['price']: price_data,
+                  features['body-style']: body_style_data,
+                  features['country']: country_data
+              }))
+
+  def test_with_rank_0_feature(self):
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+    features = {
+        'price': constant_op.constant(0),
+    }
+    self.assertEqual(0, features['price'].shape.ndims)
+
+    # Static rank 0 should fail
+    with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
+      fc.FeatureLayer([price])(features)
+
+    # Dynamic rank 0 should fail
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+    }
+    net = fc.FeatureLayer([price])(features)
+    self.assertEqual(1, net.shape[1])
+    with _initialized_session() as sess:
+      with self.assertRaisesOpError('Feature .* cannot have rank 0'):
+        sess.run(net, feed_dict={features['price']: np.array(1)})
+
+
+class InputLayerTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_retrieving_input(self):
+    features = {'a': [0.]}
+    input_layer = fc_old.InputLayer(fc.numeric_column('a'))
+    inputs = self.evaluate(input_layer(features))
+    self.assertAllClose([[0.]], inputs)
+
+  def test_reuses_variables(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(
+          key='a', num_buckets=3)
+      embedding_dimension = 2
+
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      input_layer = fc_old.InputLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      inputs = input_layer(features)
+      variables = input_layer.variables
+
+      # Sanity check: test that the inputs are correct.
+      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
 
       # Check that only one variable was created.
       self.assertEqual(1, len(variables))
 
-      # Check that invoking feature_layer on the same features does not create
+      # Check that invoking input_layer on the same features does not create
       # additional variables
-      _ = feature_layer(features)
+      _ = input_layer(features)
       self.assertEqual(1, len(variables))
-      self.assertEqual(variables[0], feature_layer.variables[0])
+      self.assertEqual(variables[0], input_layer.variables[0])
 
-  def test_feature_column_feature_layer_gradient(self):
+  def test_feature_column_input_layer_gradient(self):
     with context.eager_mode():
       sparse_input = sparse_tensor.SparseTensor(
           indices=((0, 0), (1, 0), (2, 0)),
@@ -1815,11 +3589,11 @@ class FeatureLayerTest(test.TestCase):
           dimension=embedding_dimension,
           initializer=_embedding_column_initializer)
 
-      feature_layer = FeatureLayer([embedding_column])
+      input_layer = fc_old.InputLayer([embedding_column])
       features = {'a': sparse_input}
 
       def scale_matrix():
-        matrix = feature_layer(features)
+        matrix = input_layer(features)
         return 2 * matrix
 
       # Sanity check: Verify that scale_matrix returns the correct output.
@@ -1834,32 +3608,32 @@ class FeatureLayerTest(test.TestCase):
       self.assertAllEqual([0, 1, 2], indexed_slice.indices)
       self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
 
+
+class FunctionalInputLayerTest(test.TestCase):
+
   def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegexp(ValueError,
                                  'feature_columns must not be empty'):
-      FeatureLayer(feature_columns=[])(features={})
+      fc_old.input_layer(features={}, feature_columns=[])
 
   def test_should_be_dense_column(self):
-    with self.assertRaisesRegexp(ValueError, 'must be a DenseColumn'):
-      FeatureLayer(feature_columns=[
-          fc.categorical_column_with_hash_bucket('wire_cast', 4)
-      ])(
-          features={
-              'a': [[0]]
-          })
+    with self.assertRaisesRegexp(ValueError, 'must be a _DenseColumn'):
+      fc_old.input_layer(
+          features={'a': [[0]]},
+          feature_columns=[
+              fc.categorical_column_with_hash_bucket('wire_cast', 4)
+          ])
 
   def test_does_not_support_dict_columns(self):
     with self.assertRaisesRegexp(
         ValueError, 'Expected feature_columns to be iterable, found dict.'):
-      FeatureLayer(feature_columns={'a': fc.numeric_column('a')})(
-          features={
-              'a': [[0]]
-          })
+      fc_old.input_layer(
+          features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')})
 
   def test_bare_column(self):
     with ops.Graph().as_default():
       features = features = {'a': [0.]}
-      net = FeatureLayer(fc.numeric_column('a'))(features)
+      net = fc_old.input_layer(features, fc.numeric_column('a'))
       with _initialized_session():
         self.assertAllClose([[0.]], net.eval())
 
@@ -1867,25 +3641,23 @@ class FeatureLayerTest(test.TestCase):
     with ops.Graph().as_default():
       features = features = {'a': [0.], 'b': [1.]}
       columns = (fc.numeric_column(key) for key in features)
-      net = FeatureLayer(columns)(features)
+      net = fc_old.input_layer(features, columns)
       with _initialized_session():
         self.assertAllClose([[0., 1.]], net.eval())
 
   def test_raises_if_duplicate_name(self):
     with self.assertRaisesRegexp(
         ValueError, 'Duplicate feature column name found for columns'):
-      FeatureLayer(
+      fc_old.input_layer(
+          features={'a': [[0]]},
           feature_columns=[fc.numeric_column('a'),
-                           fc.numeric_column('a')])(
-                               features={
-                                   'a': [[0]]
-                               })
+                           fc.numeric_column('a')])
 
   def test_one_column(self):
     price = fc.numeric_column('price')
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
-      net = FeatureLayer([price])(features)
+      net = fc_old.input_layer(features, [price])
       with _initialized_session():
         self.assertAllClose([[1.], [5.]], net.eval())
 
@@ -1893,25 +3665,10 @@ class FeatureLayerTest(test.TestCase):
     price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
       features = {'price': [[1., 2.], [5., 6.]]}
-      net = FeatureLayer([price])(features)
+      net = fc_old.input_layer(features, [price])
       with _initialized_session():
         self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
 
-  def test_compute_output_shape(self):
-    price1 = fc.numeric_column('price1', shape=2)
-    price2 = fc.numeric_column('price2', shape=4)
-    with ops.Graph().as_default():
-      features = {
-          'price1': [[1., 2.], [5., 6.]],
-          'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
-      }
-      feature_layer = FeatureLayer([price1, price2])
-      self.assertEqual((None, 6), feature_layer.compute_output_shape((None,)))
-      net = feature_layer(features)
-      with _initialized_session():
-        self.assertAllClose(
-            [[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], net.eval())
-
   def test_raises_if_shape_mismatch(self):
     price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
@@ -1919,13 +3676,13 @@ class FeatureLayerTest(test.TestCase):
       with self.assertRaisesRegexp(
           Exception,
           r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
-        FeatureLayer([price])(features)
+        fc_old.input_layer(features, [price])
 
   def test_reshaping(self):
     price = fc.numeric_column('price', shape=[1, 2])
     with ops.Graph().as_default():
       features = {'price': [[[1., 2.]], [[5., 6.]]]}
-      net = FeatureLayer([price])(features)
+      net = fc_old.input_layer(features, [price])
       with _initialized_session():
         self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
 
@@ -1933,26 +3690,128 @@ class FeatureLayerTest(test.TestCase):
     price1 = fc.numeric_column('price1', shape=2)
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
-      features = {
-          'price1': [[1., 2.], [5., 6.]],
-          'price2': [[3.], [4.]]
-      }
-      net = FeatureLayer([price1, price2])(features)
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      net = fc_old.input_layer(features, [price1, price2])
       with _initialized_session():
         self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
 
-  def test_cols_to_output_tensors(self):
-    price1 = fc.numeric_column('price1', shape=2)
-    price2 = fc.numeric_column('price2')
+  def test_fills_cols_to_vars(self):
+    # Provide three _DenseColumn's to input_layer: a _NumericColumn, a
+    # _BucketizedColumn, and an _EmbeddingColumn.  Only the _EmbeddingColumn
+    # creates a Variable.
+    price1 = fc.numeric_column('price1')
+    dense_feature = fc.numeric_column('dense_feature')
+    dense_feature_bucketized = fc.bucketized_column(
+        dense_feature, boundaries=[0.])
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
     with ops.Graph().as_default():
-      cols_dict = {}
-      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
-      feature_layer = FeatureLayer([price1, price2])
-      net = feature_layer(features, cols_dict)
-      with _initialized_session():
-        self.assertAllClose([[1., 2.], [5., 6.]], cols_dict[price1].eval())
-        self.assertAllClose([[3.], [4.]], cols_dict[price2].eval())
-        self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
+      features = {
+          'price1': [[3.], [4.]],
+          'dense_feature': [[-1.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+      }
+      cols_to_vars = {}
+      all_cols = [price1, dense_feature_bucketized, some_embedding_column]
+      fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
+      self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
+      self.assertEqual(0, len(cols_to_vars[price1]))
+      self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
+      self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
+      self.assertIsInstance(cols_to_vars[some_embedding_column][0],
+                            variables_lib.Variable)
+      self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10])
+
+  def test_fills_cols_to_vars_shared_embedding(self):
+    # Provide 5 DenseColumn's to input_layer: a NumericColumn, a
+    # BucketizedColumn, an EmbeddingColumn, two SharedEmbeddingColumns. The
+    # EmbeddingColumn creates a Variable and the two SharedEmbeddingColumns
+    # shared one variable.
+    price1 = fc.numeric_column('price1')
+    dense_feature = fc.numeric_column('dense_feature')
+    dense_feature_bucketized = fc.bucketized_column(
+        dense_feature, boundaries=[0.])
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
+    categorical_column_a = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc_old.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[3.], [4.]],
+          'dense_feature': [[-1.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      cols_to_vars = {}
+      all_cols = [
+          price1, dense_feature_bucketized, some_embedding_column,
+          shared_embedding_a, shared_embedding_b
+      ]
+      fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
+      self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
+      self.assertEqual(0, len(cols_to_vars[price1]))
+      self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
+      self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
+      self.assertEqual(1, len(cols_to_vars[shared_embedding_a]))
+      # This is a bug in the current implementation and should be fixed in the
+      # new one.
+      self.assertEqual(0, len(cols_to_vars[shared_embedding_b]))
+      self.assertIsInstance(cols_to_vars[some_embedding_column][0],
+                            variables_lib.Variable)
+      self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10])
+      self.assertIsInstance(cols_to_vars[shared_embedding_a][0],
+                            variables_lib.Variable)
+      self.assertAllEqual(cols_to_vars[shared_embedding_a][0].shape, [3, 2])
+
+  def test_fills_cols_to_vars_partitioned_variables(self):
+    price1 = fc.numeric_column('price1')
+    dense_feature = fc.numeric_column('dense_feature')
+    dense_feature_bucketized = fc.bucketized_column(
+        dense_feature, boundaries=[0.])
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[3.], [4.]],
+          'dense_feature': [[-1.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+      }
+      cols_to_vars = {}
+      all_cols = [price1, dense_feature_bucketized, some_embedding_column]
+      with variable_scope.variable_scope(
+          'input_from_feature_columns',
+          partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0)):
+        fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
+      self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
+      self.assertEqual(0, len(cols_to_vars[price1]))
+      self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
+      self.assertEqual(3, len(cols_to_vars[some_embedding_column]))
+      self.assertEqual(
+          'input_from_feature_columns/input_layer/sparse_feature_embedding/'
+          'embedding_weights/part_0:0',
+          cols_to_vars[some_embedding_column][0].name)
+      self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [2, 10])
+      self.assertAllEqual(cols_to_vars[some_embedding_column][1].shape, [2, 10])
+      self.assertAllEqual(cols_to_vars[some_embedding_column][2].shape, [1, 10])
 
   def test_column_order(self):
     price_a = fc.numeric_column('price_a')
@@ -1962,8 +3821,8 @@ class FeatureLayerTest(test.TestCase):
           'price_a': [[1.]],
           'price_b': [[3.]],
       }
-      net1 = FeatureLayer([price_a, price_b])(features)
-      net2 = FeatureLayer([price_b, price_a])(features)
+      net1 = fc_old.input_layer(features, [price_a, price_b])
+      net2 = fc_old.input_layer(features, [price_b, price_a])
       with _initialized_session():
         self.assertAllClose([[1., 3.]], net1.eval())
         self.assertAllClose([[1., 3.]], net2.eval())
@@ -1976,8 +3835,8 @@ class FeatureLayerTest(test.TestCase):
               sparse_tensor.SparseTensor(
                   indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
       }
-      with self.assertRaisesRegexp(Exception, 'must be a DenseColumn'):
-        FeatureLayer([animal])(features)
+      with self.assertRaisesRegexp(Exception, 'must be a _DenseColumn'):
+        fc_old.input_layer(features, [animal])
 
   def test_static_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
@@ -1990,7 +3849,7 @@ class FeatureLayerTest(test.TestCase):
       with self.assertRaisesRegexp(
           ValueError,
           'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-        FeatureLayer([price1, price2])(features)
+        fc_old.input_layer(features, [price1, price2])
 
   def test_subset_of_static_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
@@ -2005,7 +3864,7 @@ class FeatureLayerTest(test.TestCase):
       with self.assertRaisesRegexp(
           ValueError,
           'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-        FeatureLayer([price1, price2, price3])(features)
+        fc_old.input_layer(features, [price1, price2, price3])
 
   def test_runtime_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
@@ -2015,7 +3874,7 @@ class FeatureLayerTest(test.TestCase):
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
           'price2': [[3.], [4.]]  # batchsize = 2
       }
-      net = FeatureLayer([price1, price2])(features)
+      net = fc_old.input_layer(features, [price1, price2])
       with _initialized_session() as sess:
         with self.assertRaisesRegexp(errors.OpError,
                                      'Dimensions of inputs should match'):
@@ -2029,7 +3888,7 @@ class FeatureLayerTest(test.TestCase):
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
           'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
       }
-      net = FeatureLayer([price1, price2])(features)
+      net = fc_old.input_layer(features, [price1, price2])
       with _initialized_session() as sess:
         sess.run(
             net,
@@ -2049,181 +3908,39 @@ class FeatureLayerTest(test.TestCase):
           'sparse_feature': [['a'], ['x']],
       }
       all_cols = [some_embedding_column]
-      FeatureLayer(all_cols)(features)
-      FeatureLayer(all_cols)(features)
-      # Make sure that 2 variables get created in this case.
-      self.assertEqual(2, len(
-          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
-      expected_var_names = [
-          'feature_layer/sparse_feature_embedding/embedding_weights:0',
-          'feature_layer_1/sparse_feature_embedding/embedding_weights:0'
-      ]
-      self.assertItemsEqual(
-          expected_var_names,
-          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
-
-  def test_multiple_layers_with_same_shared_embedding_column(self):
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    categorical_column_b = fc.categorical_column_with_identity(
-        key='bbb', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
-        [categorical_column_b, categorical_column_a],
-        dimension=embedding_dimension)
-    shared_state_manager = fc.SharedEmbeddingStateManager(
-        name='shared_feature_layer')
-
-    with ops.Graph().as_default():
-      features = {
-          'aaa':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(0, 1, 0),
-                  dense_shape=(2, 2)),
-          'bbb':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(1, 2, 1),
-                  dense_shape=(2, 2)),
-      }
-      all_cols = [embedding_column_a, embedding_column_b]
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager)(
-              features)
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager)(
-              features)
-      # Make sure that only 1 variable gets created in this case.
-      self.assertEqual(1, len(
-          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
-      self.assertItemsEqual(
-          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
-          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
-
-  def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self):
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    categorical_column_b = fc.categorical_column_with_identity(
-        key='bbb', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
-        [categorical_column_b, categorical_column_a],
-        dimension=embedding_dimension)
-    all_cols = [embedding_column_a, embedding_column_b]
-
-    with ops.Graph().as_default():
-      shared_state_manager1 = fc.SharedEmbeddingStateManager(
-          name='shared_feature_layer')
-      features = {
-          'aaa':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(0, 1, 0),
-                  dense_shape=(2, 2)),
-          'bbb':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(1, 2, 1),
-                  dense_shape=(2, 2)),
-      }
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager1)(
-              features)
-      # Make sure that only 1 variable gets created in this case.
-      self.assertEqual(1, len(
-          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
-
-    with ops.Graph().as_default():
-      shared_state_manager2 = fc.SharedEmbeddingStateManager(
-          name='shared_feature_layer')
-      features1 = {
-          'aaa':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(0, 1, 0),
-                  dense_shape=(2, 2)),
-          'bbb':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(1, 2, 1),
-                  dense_shape=(2, 2)),
-      }
-
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager2)(
-              features1)
-      # Make sure that only 1 variable gets created in this case.
-      self.assertEqual(1, len(
+      fc_old.input_layer(features, all_cols)
+      fc_old.input_layer(features, all_cols)
+      # Make sure that 2 variables get created in this case.
+      self.assertEqual(2, len(
           ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      expected_var_names = [
+          'input_layer/sparse_feature_embedding/embedding_weights:0',
+          'input_layer_1/sparse_feature_embedding/embedding_weights:0'
+      ]
       self.assertItemsEqual(
-          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
+          expected_var_names,
           [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
 
-  def test_with_numpy_input_fn(self):
-    embedding_values = (
-        (1., 2., 3., 4., 5.),  # id 0
-        (6., 7., 8., 9., 10.),  # id 1
-        (11., 12., 13., 14., 15.)  # id 2
-    )
-    def _initializer(shape, dtype, partition_info):
-      del shape, dtype, partition_info
-      return embedding_values
-
-    # price has 1 dimension in feature_layer
-    price = fc.numeric_column('price')
-    body_style = fc.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-    # one_hot_body_style has 3 dims in feature_layer.
-    one_hot_body_style = fc.indicator_column(body_style)
-    # embedded_body_style has 5 dims in feature_layer.
-    embedded_body_style = fc.embedding_column(
-        body_style, dimension=5, initializer=_initializer)
-
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'price': np.array([11., 12., 13., 14.]),
-            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
-        },
-        batch_size=2,
-        shuffle=False)
-    features = input_fn()
-    net = FeatureLayer([price, one_hot_body_style, embedded_body_style])(
-        features)
-    self.assertEqual(1 + 3 + 5, net.shape[1])
-    with _initialized_session() as sess:
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-
-      # Each row is formed by concatenating `embedded_body_style`,
-      # `one_hot_body_style`, and `price` in order.
-      self.assertAllEqual(
-          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
-           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
-          sess.run(net))
-
-      coord.request_stop()
-      coord.join(threads)
-
   def test_with_1d_sparse_tensor(self):
     embedding_values = (
         (1., 2., 3., 4., 5.),  # id 0
         (6., 7., 8., 9., 10.),  # id 1
         (11., 12., 13., 14., 15.)  # id 2
     )
+
     def _initializer(shape, dtype, partition_info):
       del shape, dtype, partition_info
       return embedding_values
 
-    # price has 1 dimension in feature_layer
+    # price has 1 dimension in input_layer
     price = fc.numeric_column('price')
 
-    # one_hot_body_style has 3 dims in feature_layer.
+    # one_hot_body_style has 3 dims in input_layer.
     body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
     one_hot_body_style = fc.indicator_column(body_style)
 
-    # embedded_body_style has 5 dims in feature_layer.
+    # embedded_body_style has 5 dims in input_layer.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
     embedded_country = fc.embedding_column(
@@ -2231,28 +3948,34 @@ class FeatureLayerTest(test.TestCase):
 
     # Provides 1-dim tensor and dense tensor.
     features = {
-        'price': constant_op.constant([11., 12.,]),
-        'body-style': sparse_tensor.SparseTensor(
-            indices=((0,), (1,)),
-            values=('sedan', 'hardtop'),
-            dense_shape=(2,)),
+        'price':
+            constant_op.constant([
+                11.,
+                12.,
+            ]),
+        'body-style':
+            sparse_tensor.SparseTensor(
+                indices=((0,), (1,)),
+                values=('sedan', 'hardtop'),
+                dense_shape=(2,)),
         # This is dense tensor for the categorical_column.
-        'country': constant_op.constant(['CA', 'US']),
+        'country':
+            constant_op.constant(['CA', 'US']),
     }
     self.assertEqual(1, features['price'].shape.ndims)
     self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
     self.assertEqual(1, features['country'].shape.ndims)
 
-    net = FeatureLayer([price, one_hot_body_style, embedded_country])(features)
+    net = fc_old.input_layer(features,
+                             [price, one_hot_body_style, embedded_country])
     self.assertEqual(1 + 3 + 5, net.shape[1])
     with _initialized_session() as sess:
 
       # Each row is formed by concatenating `embedded_body_style`,
       # `one_hot_body_style`, and `price` in order.
-      self.assertAllEqual(
-          [[0., 0., 1., 11., 12., 13., 14., 15., 11.],
-           [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
-          sess.run(net))
+      self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.],
+                           [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
+                          sess.run(net))
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     embedding_values = (
@@ -2260,19 +3983,20 @@ class FeatureLayerTest(test.TestCase):
         (6., 7.),  # id 1
         (11., 12.)  # id 2
     )
+
     def _initializer(shape, dtype, partition_info):
       del shape, dtype, partition_info
       return embedding_values
 
-    # price has 1 dimension in feature_layer
+    # price has 1 dimension in input_layer
     price = fc.numeric_column('price')
 
-    # one_hot_body_style has 3 dims in feature_layer.
+    # one_hot_body_style has 3 dims in input_layer.
     body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
     one_hot_body_style = fc.indicator_column(body_style)
 
-    # embedded_body_style has 5 dims in feature_layer.
+    # embedded_body_style has 5 dims in input_layer.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
     embedded_country = fc.embedding_column(
@@ -2291,12 +4015,11 @@ class FeatureLayerTest(test.TestCase):
 
     price_data = np.array([11., 12.])
     body_style_data = sparse_tensor.SparseTensorValue(
-        indices=((0,), (1,)),
-        values=('sedan', 'hardtop'),
-        dense_shape=(2,))
+        indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,))
     country_data = np.array([['US'], ['CA']])
 
-    net = FeatureLayer([price, one_hot_body_style, embedded_country])(features)
+    net = fc_old.input_layer(features,
+                             [price, one_hot_body_style, embedded_country])
     self.assertEqual(1 + 3 + 2, net.shape[1])
     with _initialized_session() as sess:
 
@@ -2313,7 +4036,7 @@ class FeatureLayerTest(test.TestCase):
               }))
 
   def test_with_rank_0_feature(self):
-    # price has 1 dimension in feature_layer
+    # price has 1 dimension in input_layer
     price = fc.numeric_column('price')
     features = {
         'price': constant_op.constant(0),
@@ -2322,13 +4045,13 @@ class FeatureLayerTest(test.TestCase):
 
     # Static rank 0 should fail
     with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
-      FeatureLayer([price])(features)
+      fc_old.input_layer(features, [price])
 
     # Dynamic rank 0 should fail
     features = {
         'price': array_ops.placeholder(dtypes.float32),
     }
-    net = FeatureLayer([price])(features)
+    net = fc_old.input_layer(features, [price])
     self.assertEqual(1, net.shape[1])
     with _initialized_session() as sess:
       with self.assertRaisesOpError('Feature .* cannot have rank 0'):
@@ -2337,10 +4060,14 @@ class FeatureLayerTest(test.TestCase):
 
 class MakeParseExampleSpecTest(test.TestCase):
 
-  class _TestFeatureColumn(FeatureColumn,
+  class _TestFeatureColumn(fc.FeatureColumn,
                            collections.namedtuple('_TestFeatureColumn',
                                                   ('parse_spec'))):
 
+    @property
+    def _is_v2_column(self):
+      return True
+
     @property
     def name(self):
       return '_TestFeatureColumn'
@@ -2458,6 +4185,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.string)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -2501,7 +4229,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    column.get_sparse_tensors(FeatureTransformationCache({'aaa': inputs}), None)
+    column.get_sparse_tensors(
+        fc.FeatureTransformationCache({
+            'aaa': inputs
+        }), None)
     with self.assertRaisesRegexp(errors.OpError, 'file_does_not_exist'):
       with self.cached_session():
         lookup_ops.tables_initializer().run()
@@ -2525,7 +4256,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    column.get_sparse_tensors(FeatureTransformationCache({'aaa': inputs}), None)
+    column.get_sparse_tensors(
+        fc.FeatureTransformationCache({
+            'aaa': inputs
+        }), None)
     with self.assertRaisesRegexp(errors.OpError, 'Invalid vocab_size'):
       with self.cached_session():
         lookup_ops.tables_initializer().run()
@@ -2564,7 +4298,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -2580,7 +4314,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -2616,7 +4350,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2637,7 +4371,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2659,7 +4393,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    id_tensor = _transform_features({'aaa': inputs}, [column], None)[column]
+    id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column]
     with _initialized_session():
       _assert_sparse_tensor_value(self,
                                   sparse_tensor.SparseTensorValue(
@@ -2675,7 +4409,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         vocabulary_file=self._wire_vocabulary_file_name,
         vocabulary_size=self._wire_vocabulary_size)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': (('marlo', ''), ('skywalker', 'omar'))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2699,7 +4433,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2723,7 +4457,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar', 'heisenberg'),
         dense_shape=(2, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2749,7 +4483,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2773,7 +4507,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=(11, 100, 30, 22),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2795,7 +4529,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         dtype=dtypes.int32,
         default_value=default_value)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': ((11, -1, -1), (100, 30, -1), (-1, -1, 22))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2820,7 +4554,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=(11, 100, 30, 22),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2859,6 +4593,32 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
         self.assertAllClose(((3.,), (5.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    wire_column = fc.categorical_column_with_vocabulary_file(
+        key='wire',
+        vocabulary_file=self._wire_vocabulary_file_name,
+        vocabulary_size=self._wire_vocabulary_size,
+        num_oov_buckets=1)
+    self.assertEqual(4, wire_column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          wire_column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=('marlo', 'skywalker', 'omar'),
+                  dense_shape=(2, 2))
+      }, (wire_column,))
+      bias = get_linear_model_bias()
+      wire_var = get_linear_model_column_var(wire_column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
+        # 'marlo' -> 2: wire_var[2] = 3
+        # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
+        self.assertAllClose(((3.,), (5.,)), predictions.eval())
+
 
 class VocabularyListCategoricalColumnTest(test.TestCase):
 
@@ -2871,6 +4631,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.string)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -2973,7 +4734,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -2987,7 +4748,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -3044,7 +4805,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3065,7 +4826,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    id_tensor = _transform_features({'aaa': inputs}, [column], None)[column]
+    id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column]
     with _initialized_session():
       _assert_sparse_tensor_value(
           self,
@@ -3080,7 +4841,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': (('marlo', ''), ('skywalker', 'omar'))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3103,7 +4864,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3126,7 +4887,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar', 'heisenberg'),
         dense_shape=(2, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3149,7 +4910,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=np.array((11, 100, 30, 22), dtype=np.int32),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3170,10 +4931,10 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         dtype=dtypes.int32,
         default_value=default_value)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa':
-                np.array(
-                    ((11, -1, -1), (100, 30, -1), (-1, -1, 22)), dtype=np.int32)
+                np.array(((11, -1, -1), (100, 30, -1), (-1, -1, 22)),
+                         dtype=np.int32)
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
     with _initialized_session():
@@ -3196,7 +4957,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=(11, 100, 30, 22),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3234,6 +4995,31 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
         self.assertAllClose(((3.,), (5.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    wire_column = fc.categorical_column_with_vocabulary_list(
+        key='aaa',
+        vocabulary_list=('omar', 'stringer', 'marlo'),
+        num_oov_buckets=1)
+    self.assertEqual(4, wire_column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          wire_column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=('marlo', 'skywalker', 'omar'),
+                  dense_shape=(2, 2))
+      }, (wire_column,))
+      bias = get_linear_model_bias()
+      wire_var = get_linear_model_column_var(wire_column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
+        # 'marlo' -> 2: wire_var[2] = 3
+        # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
+        self.assertAllClose(((3.,), (5.,)), predictions.eval())
+
 
 class IdentityCategoricalColumnTest(test.TestCase):
 
@@ -3245,6 +5031,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.int64)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -3285,7 +5072,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'Invalid input, not integer'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -3317,7 +5104,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(0, 1, 0),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3336,7 +5123,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(0, 1, 0),
         dense_shape=(2, 2))
-    id_tensor = _transform_features({'aaa': inputs}, [column], None)[column]
+    id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column]
     with _initialized_session():
       _assert_sparse_tensor_value(
           self,
@@ -3349,7 +5136,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
   def test_get_sparse_tensors_dense_input(self):
     column = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': ((0, -1), (1, 0))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3369,7 +5156,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(1, -1, 0),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3385,7 +5172,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(1, 99, 0),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3402,7 +5189,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(1, -1, 99),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3426,7 +5213,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=input_values,
         dense_shape=input_shape)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3465,6 +5252,28 @@ class IdentityCategoricalColumnTest(test.TestCase):
         # weight_var[2] + weight_var[1] = 3+2 = 5
         self.assertAllClose(((1.,), (5.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    column = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
+    self.assertEqual(3, column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] = 1
+        # weight_var[2] + weight_var[1] = 3+2 = 5
+        self.assertAllClose(((1.,), (5.,)), predictions.eval())
+
 
 class TransformFeaturesTest(test.TestCase):
 
@@ -3483,8 +5292,8 @@ class TransformFeaturesTest(test.TestCase):
                   indices=[[0, 0], [1, 0], [1, 1]],
                   dense_shape=[2, 2])
       }
-      transformed = _transform_features(features,
-                                        [bucketized_price, hashed_sparse], None)
+      transformed = fc._transform_features(
+          features, [bucketized_price, hashed_sparse], None)
       with _initialized_session():
         self.assertIn(bucketized_price.name, transformed[bucketized_price].name)
         self.assertAllEqual([[0], [3]], transformed[bucketized_price].eval())
@@ -3494,11 +5303,15 @@ class TransformFeaturesTest(test.TestCase):
   def test_column_order(self):
     """When the column is both dense and sparse, uses sparse tensors."""
 
-    class _LoggerColumn(FeatureColumn):
+    class _LoggerColumn(fc.FeatureColumn):
 
       def __init__(self, name):
         self._name = name
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return self._name
@@ -3516,12 +5329,12 @@ class TransformFeaturesTest(test.TestCase):
       column1 = _LoggerColumn('1')
       column2 = _LoggerColumn('2')
       call_logger = {'count': 0}
-      _transform_features({}, [column1, column2], None)
+      fc._transform_features({}, [column1, column2], None)
       self.assertEqual(0, column1.call_order)
       self.assertEqual(1, column2.call_order)
 
       call_logger = {'count': 0}
-      _transform_features({}, [column2, column1], None)
+      fc._transform_features({}, [column2, column1], None)
       self.assertEqual(0, column1.call_order)
       self.assertEqual(1, column2.call_order)
 
@@ -3534,17 +5347,19 @@ class IndicatorColumnTest(test.TestCase):
     self.assertEqual(indicator_a.categorical_column.name, 'a')
     self.assertEqual(indicator_a.name, 'a_indicator')
     self.assertEqual(indicator_a.variable_shape, [1, 4])
+    self.assertTrue(indicator_a._is_v2_column)
 
-    b = fc.categorical_column_with_hash_bucket('b', hash_bucket_size=100)
+    b = fc_old.categorical_column_with_hash_bucket('b', hash_bucket_size=100)
     indicator_b = fc.indicator_column(b)
     self.assertEqual(indicator_b.categorical_column.name, 'b')
     self.assertEqual(indicator_b.name, 'b_indicator')
     self.assertEqual(indicator_b.variable_shape, [1, 100])
+    self.assertFalse(indicator_b._is_v2_column)
 
   def test_1D_shape_succeeds(self):
     animal = fc.indicator_column(
         fc.categorical_column_with_hash_bucket('animal', 4))
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal': ['fox', 'fox']
     })
     output = transformation_cache.get(animal, None)
@@ -3555,7 +5370,7 @@ class IndicatorColumnTest(test.TestCase):
     # TODO(ispir/cassandrax): Swith to categorical_column_with_keys when ready.
     animal = fc.indicator_column(
         fc.categorical_column_with_hash_bucket('animal', 4))
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0], [1, 0]],
@@ -3570,7 +5385,7 @@ class IndicatorColumnTest(test.TestCase):
     animal = fc.indicator_column(
         fc.categorical_column_with_identity('animal', num_buckets=4))
 
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0], [0, 1]], values=[1, 1], dense_shape=[1, 2])
@@ -3582,7 +5397,7 @@ class IndicatorColumnTest(test.TestCase):
   def test_multi_hot2(self):
     animal = fc.indicator_column(
         fc.categorical_column_with_identity('animal', num_buckets=4))
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
@@ -3632,8 +5447,8 @@ class IndicatorColumnTest(test.TestCase):
             values=('marlo', 'skywalker', 'omar'),
             dense_shape=(2, 2))
     }
-    indicator_tensor = _transform_features(features, [a_indicator],
-                                           None)[a_indicator]
+    indicator_tensor = fc._transform_features(features, [a_indicator],
+                                              None)[a_indicator]
     with _initialized_session():
       self.assertAllEqual([[0, 0, 1], [1, 0, 0]], indicator_tensor.eval())
 
@@ -3647,8 +5462,8 @@ class IndicatorColumnTest(test.TestCase):
         'ids': constant_op.constant([['c', 'b', 'a']]),
         'weights': constant_op.constant([[2., 4., 6.]])
     }
-    indicator_tensor = _transform_features(features, [indicator],
-                                           None)[indicator]
+    indicator_tensor = fc._transform_features(features, [indicator],
+                                              None)[indicator]
     with _initialized_session():
       self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval())
 
@@ -3662,8 +5477,8 @@ class IndicatorColumnTest(test.TestCase):
         'ids': constant_op.constant([['c', 'b', 'unknown']]),
         'weights': constant_op.constant([[2., 4., 6.]])
     }
-    indicator_tensor = _transform_features(features, [indicator],
-                                           None)[indicator]
+    indicator_tensor = fc._transform_features(features, [indicator],
+                                              None)[indicator]
     with _initialized_session():
       self.assertAllEqual([[0., 4., 2.]], indicator_tensor.eval())
 
@@ -3675,8 +5490,8 @@ class IndicatorColumnTest(test.TestCase):
     features = {
         'ids': constant_op.constant([['c', 'b', 'unknown']]),
     }
-    indicator_tensor = _transform_features(features, [indicator],
-                                           None)[indicator]
+    indicator_tensor = fc._transform_features(features, [indicator],
+                                              None)[indicator]
     with _initialized_session():
       self.assertAllEqual([[0., 1., 1.]], indicator_tensor.eval())
 
@@ -3700,6 +5515,44 @@ class IndicatorColumnTest(test.TestCase):
         weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
         self.assertAllClose([[2. + 3.]], predictions.eval())
 
+  def test_old_linear_model(self):
+    animal = fc.indicator_column(
+        fc.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+
+      predictions = fc_old.linear_model(features, [animal])
+      weight_var = get_linear_model_column_var(animal)
+      with _initialized_session():
+        # All should be zero-initialized.
+        self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval())
+        self.assertAllClose([[0.]], predictions.eval())
+        weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
+        self.assertAllClose([[2. + 3.]], predictions.eval())
+
+  def test_old_linear_model_old_categorical(self):
+    animal = fc.indicator_column(
+        fc_old.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+
+      predictions = fc_old.linear_model(features, [animal])
+      weight_var = get_linear_model_column_var(animal)
+      with _initialized_session():
+        # All should be zero-initialized.
+        self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval())
+        self.assertAllClose([[0.]], predictions.eval())
+        weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
+        self.assertAllClose([[2. + 3.]], predictions.eval())
+
   def test_feature_layer(self):
     animal = fc.indicator_column(
         fc.categorical_column_with_identity('animal', num_buckets=4))
@@ -3709,12 +5562,38 @@ class IndicatorColumnTest(test.TestCase):
               sparse_tensor.SparseTensor(
                   indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
       }
-      net = FeatureLayer([animal])(features)
+      net = fc.FeatureLayer([animal])(features)
+      with _initialized_session():
+        self.assertAllClose([[0., 1., 1., 0.]], net.eval())
+
+  def test_input_layer(self):
+    animal = fc.indicator_column(
+        fc.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+      net = fc_old.input_layer(features, [animal])
+      with _initialized_session():
+        self.assertAllClose([[0., 1., 1., 0.]], net.eval())
+
+  def test_input_layer_old_categorical(self):
+    animal = fc.indicator_column(
+        fc_old.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+      net = fc_old.input_layer(features, [animal])
       with _initialized_session():
         self.assertAllClose([[0., 1., 1., 0.]], net.eval())
 
 
-class _TestStateManager(StateManager):
+class _TestStateManager(fc.StateManager):
 
   def __init__(self, trainable=True):
     # Dict of feature_column to a dict of variables.
@@ -3771,6 +5650,15 @@ class EmbeddingColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.int64)
     }, embedding_column.parse_example_spec)
+    self.assertTrue(embedding_column._is_v2_column)
+
+  def test_is_v2_column(self):
+    categorical_column = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=embedding_dimension)
+    self.assertFalse(embedding_column._is_v2_column)
 
   def test_all_constructor_args(self):
     categorical_column = fc.categorical_column_with_identity(
@@ -3860,7 +5748,7 @@ class EmbeddingColumnTest(test.TestCase):
             values=(0, 1, 0),
             dense_shape=(2, 2))
     }
-    outputs = _transform_features(features, [a, a_embedded], None)
+    outputs = fc._transform_features(features, [a, a_embedded], None)
     output_a = outputs[a]
     output_embedded = outputs[a_embedded]
     with _initialized_session():
@@ -3905,19 +5793,79 @@ class EmbeddingColumnTest(test.TestCase):
     )
 
     # Build columns.
-    categorical_column = fc.categorical_column_with_identity(
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=embedding_dimension,
+        initializer=_initializer)
+    state_manager = _TestStateManager()
+    embedding_column.create_state(state_manager)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup = embedding_column.get_dense_tensor(
+        fc.FeatureTransformationCache({
+            'aaa': sparse_input
+        }), state_manager)
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, global_vars[0].eval())
+      self.assertAllEqual(expected_lookups, embedding_lookup.eval())
+
+  def test_get_dense_tensor_old_categorical(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc_old.categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column = fc.embedding_column(
-        categorical_column, dimension=embedding_dimension,
+        categorical_column,
+        dimension=embedding_dimension,
         initializer=_initializer)
-    state_manager = _TestStateManager()
-    embedding_column.create_state(state_manager)
 
     # Provide sparse input and get dense result.
-    embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+    embedding_lookup = embedding_column._get_dense_tensor(
+        fc_old._LazyBuilder({
             'aaa': sparse_input
-        }), state_manager)
+        }))
 
     # Assert expected embedding variable and lookups.
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
@@ -3977,7 +5925,7 @@ class EmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': sparse_input
         }), state_manager)
 
@@ -4040,7 +5988,7 @@ class EmbeddingColumnTest(test.TestCase):
     input_values = array_ops.placeholder(dtype=dtypes.int64)
     input_shape = array_ops.placeholder(dtype=dtypes.int64)
     embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa':
                 sparse_tensor.SparseTensorValue(
                     indices=input_indices,
@@ -4108,7 +6056,7 @@ class EmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': sparse_input
         }), state_manager)
 
@@ -4120,7 +6068,263 @@ class EmbeddingColumnTest(test.TestCase):
       self.assertAllEqual(embedding_values, global_vars[0].eval())
       self.assertAllEqual(expected_lookups, embedding_lookup.eval())
 
-  def test_linear_model(self):
+  def test_linear_model(self):
+    # Inputs.
+    batch_size = 4
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(batch_size, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_shape = (vocabulary_size, embedding_dimension)
+    zeros_embedding_values = np.zeros(embedding_shape)
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual(embedding_shape, shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return zeros_embedding_values
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    with ops.Graph().as_default():
+      model = fc.LinearModel((embedding_column,))
+      predictions = model({categorical_column.name: sparse_input})
+      expected_var_names = (
+          'linear_model/bias_weights:0',
+          'linear_model/aaa_embedding/weights:0',
+          'linear_model/aaa_embedding/embedding_weights:0',
+      )
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+      trainable_vars = {
+          v.name: v for v in ops.get_collection(
+              ops.GraphKeys.TRAINABLE_VARIABLES)
+      }
+      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
+      bias = trainable_vars['linear_model/bias_weights:0']
+      embedding_weights = trainable_vars[
+          'linear_model/aaa_embedding/embedding_weights:0']
+      linear_weights = trainable_vars[
+          'linear_model/aaa_embedding/weights:0']
+      with _initialized_session():
+        # Predictions with all zero weights.
+        self.assertAllClose(np.zeros((1,)), bias.eval())
+        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights.eval())
+        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
+
+        # Predictions with all non-zero weights.
+        embedding_weights.assign((
+            (1., 2.),  # id 0
+            (3., 5.),  # id 1
+            (7., 11.)  # id 2
+        )).eval()
+        linear_weights.assign(((4.,), (6.,))).eval()
+        # example 0, ids [2], embedding[0] = [7, 11]
+        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
+        # example 2, ids [], embedding[2] = [0, 0]
+        # example 3, ids [1], embedding[3] = [3, 5]
+        # sum(embeddings * linear_weights)
+        # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
+        self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
+
+  def test_feature_layer(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    l = fc.FeatureLayer((embedding_column,))
+    feature_layer = l({'aaa': sparse_input})
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in trainable_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
+      self.assertAllEqual(expected_lookups, feature_layer.eval())
+
+  def test_feature_layer_not_trainable(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer,
+        trainable=False)
+
+    # Provide sparse input and get dense result.
+    feature_layer = fc.FeatureLayer((embedding_column,))({'aaa': sparse_input})
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    self.assertItemsEqual(
+        [], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, global_vars[0].eval())
+      self.assertAllEqual(expected_lookups, feature_layer.eval())
+
+  def test_input_layer(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    feature_layer = fc_old.input_layer({
+        'aaa': sparse_input
+    }, (embedding_column,))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+    self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in trainable_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
+      self.assertAllEqual(expected_lookups, feature_layer.eval())
+
+  def test_old_linear_model(self):
     # Inputs.
     batch_size = 4
     vocabulary_size = 3
@@ -4137,6 +6341,7 @@ class EmbeddingColumnTest(test.TestCase):
     embedding_dimension = 2
     embedding_shape = (vocabulary_size, embedding_dimension)
     zeros_embedding_values = np.zeros(embedding_shape)
+
     def _initializer(shape, dtype, partition_info):
       self.assertAllEqual(embedding_shape, shape)
       self.assertEqual(dtypes.float32, dtype)
@@ -4152,8 +6357,9 @@ class EmbeddingColumnTest(test.TestCase):
         initializer=_initializer)
 
     with ops.Graph().as_default():
-      model = fc.LinearModel((embedding_column,))
-      predictions = model({categorical_column.name: sparse_input})
+      predictions = fc_old.linear_model({
+          categorical_column.name: sparse_input
+      }, (embedding_column,))
       expected_var_names = (
           'linear_model/bias_weights:0',
           'linear_model/aaa_embedding/weights:0',
@@ -4163,15 +6369,14 @@ class EmbeddingColumnTest(test.TestCase):
           expected_var_names,
           [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
       trainable_vars = {
-          v.name: v for v in ops.get_collection(
-              ops.GraphKeys.TRAINABLE_VARIABLES)
+          v.name: v
+          for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
       }
       self.assertItemsEqual(expected_var_names, trainable_vars.keys())
       bias = trainable_vars['linear_model/bias_weights:0']
       embedding_weights = trainable_vars[
           'linear_model/aaa_embedding/embedding_weights:0']
-      linear_weights = trainable_vars[
-          'linear_model/aaa_embedding/weights:0']
+      linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0']
       with _initialized_session():
         # Predictions with all zero weights.
         self.assertAllClose(np.zeros((1,)), bias.eval())
@@ -4195,8 +6400,9 @@ class EmbeddingColumnTest(test.TestCase):
         # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
         self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
 
-  def test_feature_layer(self):
+  def test_old_linear_model_old_categorical(self):
     # Inputs.
+    batch_size = 4
     vocabulary_size = 3
     sparse_input = sparse_tensor.SparseTensorValue(
         # example 0, ids [2]
@@ -4205,114 +6411,70 @@ class EmbeddingColumnTest(test.TestCase):
         # example 3, ids [1]
         indices=((0, 0), (1, 0), (1, 4), (3, 0)),
         values=(2, 0, 1, 1),
-        dense_shape=(4, 5))
+        dense_shape=(batch_size, 5))
 
     # Embedding variable.
     embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return embedding_values
-
-    # Expected lookup result, using combiner='mean'.
-    expected_lookups = (
-        # example 0, ids [2], embedding = [7, 11]
-        (7., 11.),
-        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
-        (2., 3.5),
-        # example 2, ids [], embedding = [0, 0]
-        (0., 0.),
-        # example 3, ids [1], embedding = [3, 5]
-        (3., 5.),
-    )
-
-    # Build columns.
-    categorical_column = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc.embedding_column(
-        categorical_column,
-        dimension=embedding_dimension,
-        initializer=_initializer)
-
-    # Provide sparse input and get dense result.
-    l = FeatureLayer((embedding_column,))
-    feature_layer = l({'aaa': sparse_input})
-
-    # Assert expected embedding variable and lookups.
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
-                          tuple([v.name for v in global_vars]))
-    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
-                          tuple([v.name for v in trainable_vars]))
-    with _initialized_session():
-      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
-      self.assertAllEqual(expected_lookups, feature_layer.eval())
-
-  def test_feature_layer_not_trainable(self):
-    # Inputs.
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 5))
+    embedding_shape = (vocabulary_size, embedding_dimension)
+    zeros_embedding_values = np.zeros(embedding_shape)
 
-    # Embedding variable.
-    embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
     def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertAllEqual(embedding_shape, shape)
       self.assertEqual(dtypes.float32, dtype)
       self.assertIsNone(partition_info)
-      return embedding_values
-
-    # Expected lookup result, using combiner='mean'.
-    expected_lookups = (
-        # example 0, ids [2], embedding = [7, 11]
-        (7., 11.),
-        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
-        (2., 3.5),
-        # example 2, ids [], embedding = [0, 0]
-        (0., 0.),
-        # example 3, ids [1], embedding = [3, 5]
-        (3., 5.),
-    )
+      return zeros_embedding_values
 
     # Build columns.
-    categorical_column = fc.categorical_column_with_identity(
+    categorical_column = fc_old.categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column = fc.embedding_column(
         categorical_column,
         dimension=embedding_dimension,
-        initializer=_initializer,
-        trainable=False)
+        initializer=_initializer)
 
-    # Provide sparse input and get dense result.
-    feature_layer = FeatureLayer((embedding_column,))({'aaa': sparse_input})
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          categorical_column.name: sparse_input
+      }, (embedding_column,))
+      expected_var_names = (
+          'linear_model/bias_weights:0',
+          'linear_model/aaa_embedding/weights:0',
+          'linear_model/aaa_embedding/embedding_weights:0',
+      )
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+      trainable_vars = {
+          v.name: v
+          for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      }
+      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
+      bias = trainable_vars['linear_model/bias_weights:0']
+      embedding_weights = trainable_vars[
+          'linear_model/aaa_embedding/embedding_weights:0']
+      linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0']
+      with _initialized_session():
+        # Predictions with all zero weights.
+        self.assertAllClose(np.zeros((1,)), bias.eval())
+        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights.eval())
+        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
 
-    # Assert expected embedding variable and lookups.
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
-                          tuple([v.name for v in global_vars]))
-    self.assertItemsEqual(
-        [], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
-    with _initialized_session():
-      self.assertAllEqual(embedding_values, global_vars[0].eval())
-      self.assertAllEqual(expected_lookups, feature_layer.eval())
+        # Predictions with all non-zero weights.
+        embedding_weights.assign((
+            (1., 2.),  # id 0
+            (3., 5.),  # id 1
+            (7., 11.)  # id 2
+        )).eval()
+        linear_weights.assign(((4.,), (6.,))).eval()
+        # example 0, ids [2], embedding[0] = [7, 11]
+        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
+        # example 2, ids [], embedding[2] = [0, 0]
+        # example 3, ids [1], embedding[3] = [3, 5]
+        # sum(embeddings * linear_weights)
+        # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
+        self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
 
 
 class SharedEmbeddingColumnTest(test.TestCase):
@@ -4530,8 +6692,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
             values=(1, 2, 1),
             dense_shape=(2, 2)),
     }
-    outputs = _transform_features(features, [a, a_embedded, b, b_embedded],
-                                  None)
+    outputs = fc._transform_features(features, [a, a_embedded, b, b_embedded],
+                                     None)
     output_a = outputs[a]
     output_a_embedded = outputs[a_embedded]
     output_b = outputs[b]
@@ -4599,9 +6761,9 @@ class SharedEmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup_a = embedding_column_a.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
     embedding_lookup_b = embedding_column_b.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
 
     # Assert expected embedding variable and lookups.
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
@@ -4665,9 +6827,9 @@ class SharedEmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup_a = embedding_column_a.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
     embedding_lookup_b = embedding_column_b.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
 
     with _initialized_session() as sess:
       sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)
@@ -4852,7 +7014,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
     }
 
     # Provide sparse input and get dense result.
-    feature_layer = FeatureLayer(
+    feature_layer = fc.FeatureLayer(
         feature_columns=(embedding_column_b, embedding_column_a,
                          embedding_column_c, embedding_column_d),
         shared_state_manager=shared_state_manager)(
@@ -4946,6 +7108,14 @@ class WeightedCategoricalColumnTest(test.TestCase):
         'ids': parsing_ops.VarLenFeature(dtypes.int64),
         'values': parsing_ops.VarLenFeature(dtypes.float32)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
+
+  def test_is_v2_column(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc_old.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    self.assertFalse(column._is_v2_column)
 
   def test_deep_copy(self):
     """Tests deepcopy of categorical_column_with_hash_bucket."""
@@ -4987,7 +7157,10 @@ class WeightedCategoricalColumnTest(test.TestCase):
         values=('omar', 'stringer', 'marlo'),
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'Bad dtype'):
-      _transform_features({'ids': strings, 'values': strings}, (column,), None)
+      fc._transform_features({
+          'ids': strings,
+          'values': strings
+      }, (column,), None)
 
   def test_column_name_collision(self):
     with self.assertRaisesRegexp(ValueError, r'Parse config.*already exists'):
@@ -5007,7 +7180,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(
         ValueError, 'values is not in features dictionary'):
-      _transform_features({'ids': inputs}, (column,), None)
+      fc._transform_features({'ids': inputs}, (column,), None)
 
   def test_parse_example(self):
     a = fc.categorical_column_with_vocabulary_list(
@@ -5056,7 +7229,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(0.5, 1.0, 0.1),
         dense_shape=(2, 2))
-    id_tensor, weight_tensor = _transform_features({
+    id_tensor, weight_tensor = fc._transform_features({
         'ids': inputs,
         'values': weights,
     }, (column,), None)[column]
@@ -5085,7 +7258,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(0.5, 1.0, 0.1),
         dense_shape=(2, 2))
-    id_tensor, weight_tensor = _transform_features({
+    id_tensor, weight_tensor = fc._transform_features({
         'ids': ((0, -1), (1, 0)),
         'values': weights,
     }, (column,), None)[column]
@@ -5114,7 +7287,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(2, 1, 0),
         dense_shape=(2, 2))
-    id_tensor, weight_tensor = _transform_features({
+    id_tensor, weight_tensor = fc._transform_features({
         'ids': inputs,
         'values': ((.5, 0.), (1., .1)),
     }, (column,), None)[column]
@@ -5236,6 +7409,137 @@ class WeightedCategoricalColumnTest(test.TestCase):
         # = 3*1 + 2*.1 = 3+.2 = 3.2
         self.assertAllClose(((.5,), (3.2,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(.5, 1., .1),
+                  dense_shape=(2, 2))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
+        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
+        # = 3*1 + 2*.1 = 3+.2 = 3.2
+        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
+
+  def test_old_linear_model_mismatched_shape(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(ValueError,
+                                   r'Dimensions.*are not compatible'):
+        fc_old.linear_model({
+            'ids':
+                sparse_tensor.SparseTensorValue(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=(0, 2, 1),
+                    dense_shape=(2, 2)),
+            'values':
+                sparse_tensor.SparseTensorValue(
+                    indices=((0, 0), (0, 1), (1, 0), (1, 1)),
+                    values=(.5, 11., 1., .1),
+                    dense_shape=(2, 2))
+        }, (column,))
+
+  def test_old_linear_model_mismatched_dense_values(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values': ((.5,), (1.,))
+      }, (column,),
+                                        sparse_combiner='mean')
+      # Disabling the constant folding optimizer here since it changes the
+      # error message differently on CPU and GPU.
+      config = config_pb2.ConfigProto()
+      config.graph_options.rewrite_options.constant_folding = (
+          rewriter_config_pb2.RewriterConfig.OFF)
+      with _initialized_session(config):
+        with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'):
+          predictions.eval()
+
+  def test_old_linear_model_mismatched_dense_shape(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values': ((.5,), (1.,), (.1,))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
+        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
+        # = 3*1 + 2*.1 = 3+.2 = 3.2
+        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
+
+  def test_old_linear_model_old_categorical(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc_old.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(.5, 1., .1),
+                  dense_shape=(2, 2))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
+        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
+        # = 3*1 + 2*.1 = 3+.2 = 3.2
+        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
+
   # TODO(ptucker): Add test with embedding of weighted categorical.
 
 if __name__ == '__main__':
-- 
GitLab


From 45fb1429f86b5ee6589fd50d8325843b49f78409 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 08:48:02 -0700
Subject: [PATCH 1292/1357] Avoid extra calls to set_random_seed, as it is
 already called in tensorflowtestcase.

PiperOrigin-RevId: 216363450
---
 .../python/kernel_tests/linalg/linear_operator_addition_test.py | 2 --
 .../kernel_tests/linalg/linear_operator_block_diag_test.py      | 2 --
 .../kernel_tests/linalg/linear_operator_composition_test.py     | 2 --
 .../python/kernel_tests/linalg/linear_operator_diag_test.py     | 2 --
 .../kernel_tests/linalg/linear_operator_full_matrix_test.py     | 2 --
 .../python/kernel_tests/linalg/linear_operator_identity_test.py | 2 --
 .../kernel_tests/linalg/linear_operator_kronecker_test.py       | 2 --
 .../kernel_tests/linalg/linear_operator_low_rank_update_test.py | 2 --
 .../linalg/linear_operator_lower_triangular_test.py             | 2 --
 .../python/kernel_tests/linalg/linear_operator_util_test.py     | 2 --
 .../python/kernel_tests/linalg/linear_operator_zeros_test.py    | 2 --
 11 files changed, 22 deletions(-)

diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
index cf56168d63..628ed998c5 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
@@ -19,14 +19,12 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_addition
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 add_operators = linear_operator_addition.add_operators
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
index 3ede2aceaa..30951b1b0e 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_block_diag as block_diag
@@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py
index 99497914f2..02f56db596 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py
@@ -21,7 +21,6 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
@@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
index 52861ae84a..0758349531 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
@@ -27,7 +26,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 
 
 class LinearOperatorDiagTest(
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py
index 8373b5263f..8c2d2cf077 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
@@ -28,7 +27,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 
 
 class SquareLinearOperatorFullMatrixTest(
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
index 0c3c6b390f..465a8194dd 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import random_ops
@@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(2016)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py
index 7e81c9c6c4..f039b60f64 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py
@@ -21,7 +21,6 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_kronecker as kronecker
@@ -30,7 +29,6 @@ from tensorflow.python.ops.linalg import linear_operator_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py
index 61268607a4..207e5edf81 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
@@ -28,7 +27,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py
index eb4bff915b..e3c8f5cb68 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py
@@ -17,14 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 
 
 class LinearOperatorLowerTriangularTest(
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
index 86847d38c2..13218787e2 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
@@ -21,14 +21,12 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linear_operator_util
 from tensorflow.python.platform import test
 
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py
index f0556304ad..ad97d1a93e 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py
@@ -20,14 +20,12 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(2016)
 
 
-- 
GitLab


From 32b9901c0e20f82831a5cf0a42b016e7ff5197d0 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 9 Oct 2018 09:17:04 -0700
Subject: [PATCH 1293/1357] Internal change

PiperOrigin-RevId: 216367867
---
 tensorflow/contrib/lite/build_def.bzl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index b9e933a8b6..b3607a761c 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -313,8 +313,8 @@ def generated_test_models_failing(conversion_mode):
             "arg_min_max",
             "div",
             "floor_div",
-            "gather ",
-            "lstm ",
+            "gather",
+            "lstm",
             "resize_bilinear",
             "space_to_batch_nd",
             "split",
-- 
GitLab


From df11cce2e600581087f29ef0b85286f7e582572d Mon Sep 17 00:00:00 2001
From: Tamara Norman <tamaranorman@google.com>
Date: Tue, 9 Oct 2018 09:18:53 -0700
Subject: [PATCH 1294/1357] Throw error when evaluating have variable target in
 GradientTape.

PiperOrigin-RevId: 216368178
---
 tensorflow/python/eager/backprop.py      |  9 ++++++++-
 tensorflow/python/eager/backprop_test.py | 12 +++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index deac29111f..44ce69ee60 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -868,6 +868,7 @@ class GradientTape(object):
     Raises:
       RuntimeError: if called inside the context of the tape, or if called more
        than once on a non-persistent tape.
+      ValueError: if called on variable target.
     """
     if self._tape is None:
       raise RuntimeError("GradientTape.gradient can only be called once on "
@@ -887,6 +888,12 @@ class GradientTape(object):
                             "gradient in order to compute higher order "
                             "derrivatives.", 1)
 
+    flat_targets = nest.flatten(target)
+    for t in flat_targets:
+      if resource_variable_ops.is_resource_variable(t):
+        raise ValueError("GradientTape.gradient is not supported for variable "
+                         "targets.")
+
     flat_sources = nest.flatten(sources)
     flat_sources = [_handle_or_self(x) for x in flat_sources]
 
@@ -896,7 +903,7 @@ class GradientTape(object):
 
     flat_grad = imperative_grad.imperative_grad(
         self._tape,
-        nest.flatten(target),
+        flat_targets,
         flat_sources,
         output_gradients=output_gradients)
 
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 32731747b7..7e5c9f3cb6 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -548,6 +548,17 @@ class BackpropTest(test.TestCase):
     grad = g.gradient(y, [x])[0]
     self.assertEqual(self.evaluate(grad), 6.0)
 
+  @test_util.assert_no_new_tensors
+  @test_util.run_in_graph_and_eager_modes
+  def testGadientTapeCalledOnConstantTarget(self):
+    with backprop.GradientTape() as g:
+      x = variables.Variable([3.0])
+      y = variables.Variable([2.0])
+    with self.assertRaisesRegexp(
+        ValueError,
+        'GradientTape.gradient is not supported for variable targets.'):
+      g.gradient(x, y)
+
   @test_util.run_in_graph_and_eager_modes
   def testGradientTapeWithCond(self):
     x = constant_op.constant(3.0)
@@ -982,7 +993,6 @@ class BackpropTest(test.TestCase):
     self.assertIsNone(dy)
     self.assertEqual(self.evaluate(dz), 3.0)
 
-
   @test_util.run_in_graph_and_eager_modes
   def testDifferentiatingScalarCache(self):
     # In the following test, if x2 = x1 (i.e the objects are the exact same),
-- 
GitLab


From 92d533d19c44ab838a1f7954350fdafd62cfa889 Mon Sep 17 00:00:00 2001
From: Peter Ma <pcma@google.com>
Date: Tue, 9 Oct 2018 09:24:57 -0700
Subject: [PATCH 1295/1357] Change LOG(WARNING) to VLOG(1) in utils

PiperOrigin-RevId: 216369081
---
 tensorflow/core/grappler/costs/utils.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index 2fcadf1de3..87b74e2952 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -209,7 +209,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
   // Can't infer the size if the rank is unknown. It has to be at least a
   // scalar though.
   if (shape.unknown_rank()) {
-    LOG(WARNING) << "CalculateTensorSize() -- unknown rank";
+    VLOG(1) << "CalculateTensorSize() -- unknown rank";
     return size;
   }
 
@@ -217,7 +217,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
   for (int i = 0; i < shape.dim_size(); ++i) {
     if (shape.dim(i).size() < 0) {
       shape.mutable_dim(i)->set_size(1);
-      LOG(WARNING) << "CalculateTensorSize() -- unknown dim: " << i;
+      VLOG(1) << "CalculateTensorSize() -- unknown dim: " << i;
     }
   }
 
-- 
GitLab


From 87d8055c74a65ec9fb2a13f38e6e2c5d30b7e2e4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 09:30:32 -0700
Subject: [PATCH 1296/1357] Correctly pre-reserve visit state in
 HloInstruction::PostOrderDFS

Previously we pre-reserverd the visit state based on the number of
instructions but then started to index it with the instruction unique ID
what can be larger then the instruction count. This resulted in some
very expensive re-allocations what can be eliminated by reserving the
correctly sized buffer.

PiperOrigin-RevId: 216369849
---
 tensorflow/compiler/xla/service/hlo_instruction.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5c3908a9a4..050d28b289 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2474,7 +2474,7 @@ template <typename Visitor>
 static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
                            const InternalCompareFunction* operand_order,
                            bool ignore_control_predecessors) {
-  visitor->ReserveVisitStates(root->GetModule()->instruction_count());
+  visitor->ReserveVisitStates(root->GetModule()->NumUniqueInstructionIds());
 
   // dfs_stack holds pairs of <HloInstruction*->unique_id(), HloInstruction*>.
   //
-- 
GitLab


From 3e1a0792fb593953860162d57320c8602fd199eb Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Tue, 9 Oct 2018 09:32:50 -0700
Subject: [PATCH 1297/1357] Create SDCAOptimizerV2 op to fix the "adaptative"
 typo.

PiperOrigin-RevId: 216370193
---
 .../linear_optimizer/python/ops/sdca_ops.py   |  57 ++++--
 .../base_api/api_def_SdcaOptimizerV2.pbtxt    | 171 ++++++++++++++++++
 tensorflow/core/kernels/sdca_ops.cc           |   8 +-
 tensorflow/core/ops/sdca_ops.cc               |  28 +++
 tensorflow/python/ops/sdca_ops.py             |   1 +
 5 files changed, 246 insertions(+), 19 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt

diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index b98adf862b..48ac429701 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -22,6 +22,7 @@ import collections
 from six.moves import range
 
 from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable
+from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -485,24 +486,44 @@ class SdcaModel(object):
         sparse_weights.append(batch_gathered_weights)
 
       # pylint: disable=protected-access
-      esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
-          sparse_example_indices,
-          sparse_feature_indices,
-          sparse_features_values,
-          self._convert_n_to_tensor(self._examples['dense_features']),
-          internal_convert_to_tensor(self._examples['example_weights']),
-          internal_convert_to_tensor(self._examples['example_labels']),
-          sparse_indices,
-          sparse_weights,
-          self._convert_n_to_tensor(self._slots[
-              'unshrinked_dense_features_weights']),
-          example_state_data,
-          loss_type=self._options['loss_type'],
-          l1=self._options['symmetric_l1_regularization'],
-          l2=self._symmetric_l2_regularization(),
-          num_loss_partitions=self._num_loss_partitions(),
-          num_inner_iterations=1,
-          adaptative=self._adaptive())
+      if compat.forward_compatible(year=2018, month=10, day=30):
+        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
+            sparse_example_indices,
+            sparse_feature_indices,
+            sparse_features_values,
+            self._convert_n_to_tensor(self._examples['dense_features']),
+            internal_convert_to_tensor(self._examples['example_weights']),
+            internal_convert_to_tensor(self._examples['example_labels']),
+            sparse_indices,
+            sparse_weights,
+            self._convert_n_to_tensor(self._slots[
+                'unshrinked_dense_features_weights']),
+            example_state_data,
+            loss_type=self._options['loss_type'],
+            l1=self._options['symmetric_l1_regularization'],
+            l2=self._symmetric_l2_regularization(),
+            num_loss_partitions=self._num_loss_partitions(),
+            num_inner_iterations=1,
+            adaptive=self._adaptive())
+      else:
+        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
+            sparse_example_indices,
+            sparse_feature_indices,
+            sparse_features_values,
+            self._convert_n_to_tensor(self._examples['dense_features']),
+            internal_convert_to_tensor(self._examples['example_weights']),
+            internal_convert_to_tensor(self._examples['example_labels']),
+            sparse_indices,
+            sparse_weights,
+            self._convert_n_to_tensor(self._slots[
+                'unshrinked_dense_features_weights']),
+            example_state_data,
+            loss_type=self._options['loss_type'],
+            l1=self._options['symmetric_l1_regularization'],
+            l2=self._symmetric_l2_regularization(),
+            num_loss_partitions=self._num_loss_partitions(),
+            num_inner_iterations=1,
+            adaptative=self._adaptive())
       # pylint: enable=protected-access
 
       with ops.control_dependencies([esu]):
diff --git a/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
new file mode 100644
index 0000000000..c615dee8c7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
@@ -0,0 +1,171 @@
+op {
+  graph_op_name: "SdcaOptimizerV2"
+  visibility: HIDDEN
+  in_arg {
+    name: "sparse_example_indices"
+    description: <<END
+a list of vectors which contain example indices.
+END
+  }
+  in_arg {
+    name: "sparse_feature_indices"
+    description: <<END
+a list of vectors which contain feature indices.
+END
+  }
+  in_arg {
+    name: "sparse_feature_values"
+    description: <<END
+a list of vectors which contains feature value
+associated with each feature group.
+END
+  }
+  in_arg {
+    name: "dense_features"
+    description: <<END
+a list of matrices which contains the dense feature values.
+END
+  }
+  in_arg {
+    name: "example_weights"
+    description: <<END
+a vector which contains the weight associated with each
+example.
+END
+  }
+  in_arg {
+    name: "example_labels"
+    description: <<END
+a vector which contains the label/target associated with each
+example.
+END
+  }
+  in_arg {
+    name: "sparse_indices"
+    description: <<END
+a list of vectors where each value is the indices which has
+corresponding weights in sparse_weights. This field maybe omitted for the
+dense approach.
+END
+  }
+  in_arg {
+    name: "sparse_weights"
+    description: <<END
+a list of vectors where each value is the weight associated with
+a sparse feature group.
+END
+  }
+  in_arg {
+    name: "dense_weights"
+    description: <<END
+a list of vectors where the values are the weights associated
+with a dense feature group.
+END
+  }
+  in_arg {
+    name: "example_state_data"
+    description: <<END
+a list of vectors containing the example state data.
+END
+  }
+  out_arg {
+    name: "out_example_state_data"
+    description: <<END
+a list of vectors containing the updated example state
+data.
+END
+  }
+  out_arg {
+    name: "out_delta_sparse_weights"
+    description: <<END
+a list of vectors where each value is the delta
+weights associated with a sparse feature group.
+END
+  }
+  out_arg {
+    name: "out_delta_dense_weights"
+    description: <<END
+a list of vectors where the values are the delta
+weights associated with a dense feature group.
+END
+  }
+  attr {
+    name: "loss_type"
+    description: <<END
+Type of the primal loss. Currently SdcaSolver supports logistic,
+squared and hinge losses.
+END
+  }
+  attr {
+    name: "adaptive"
+    default_value {
+      b: True
+    }
+    description: <<END
+Whether to use Adaptive SDCA for the inner loop.
+END
+  }
+  attr {
+    name: "num_sparse_features"
+    description: <<END
+Number of sparse feature groups to train on.
+END
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    description: <<END
+Number of sparse feature groups with values
+associated with it, otherwise implicitly treats values as 1.0.
+END
+  }
+  attr {
+    name: "num_dense_features"
+    description: <<END
+Number of dense feature groups to train on.
+END
+  }
+  attr {
+    name: "l1"
+    description: <<END
+Symmetric l1 regularization strength.
+END
+  }
+  attr {
+    name: "l2"
+    description: <<END
+Symmetric l2 regularization strength.
+END
+  }
+  attr {
+    name: "num_loss_partitions"
+    description: <<END
+Number of partitions of the global loss function.
+END
+  }
+  attr {
+    name: "num_inner_iterations"
+    description: <<END
+Number of iterations per mini-batch.
+END
+  }
+  summary: "Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for"
+  description: <<END
+linear models with L1 + L2 regularization. As global optimization objective is
+strongly-convex, the optimizer optimizes the dual objective at each step. The
+optimizer applies each update one example at a time. Examples are sampled
+uniformly, and the optimizer is learning rate free and enjoys linear convergence
+rate.
+
+[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+Shai Shalev-Shwartz, Tong Zhang. 2012
+
+$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+
+[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+Peter Richtarik, Martin Takac. 2015
+
+[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
+END
+}
diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc
index 3bd4168dc7..d0e0b15da7 100644
--- a/tensorflow/core/kernels/sdca_ops.cc
+++ b/tensorflow/core/kernels/sdca_ops.cc
@@ -83,7 +83,11 @@ struct ComputeOptions {
           context, false,
           errors::InvalidArgument("Unsupported loss type: ", loss_type));
     }
-    OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptive));
+    auto s = context->GetAttr("adaptative", &adaptive);
+    if (!s.ok()) {
+      s = context->GetAttr("adaptive", &adaptive);
+    }
+    OP_REQUIRES_OK(context, s);
     OP_REQUIRES_OK(
         context, context->GetAttr("num_sparse_features", &num_sparse_features));
     OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features_with_values",
@@ -245,6 +249,8 @@ class SdcaOptimizer : public OpKernel {
 };
 REGISTER_KERNEL_BUILDER(Name("SdcaOptimizer").Device(DEVICE_CPU),
                         SdcaOptimizer);
+REGISTER_KERNEL_BUILDER(Name("SdcaOptimizerV2").Device(DEVICE_CPU),
+                        SdcaOptimizer);
 
 class SdcaShrinkL1 : public OpKernel {
  public:
diff --git a/tensorflow/core/ops/sdca_ops.cc b/tensorflow/core/ops/sdca_ops.cc
index fdf53a55dd..51d248f2d6 100644
--- a/tensorflow/core/ops/sdca_ops.cc
+++ b/tensorflow/core/ops/sdca_ops.cc
@@ -65,6 +65,34 @@ REGISTER_OP("SdcaOptimizer")
     .Output("out_delta_dense_weights: num_dense_features * float")
     .SetShapeFn(ApplySdcaOptimizerShapeFn);
 
+// The SdcaOptimizerV2 op fixes the "adaptative" typo in v1.
+REGISTER_OP("SdcaOptimizerV2")
+    .Attr(
+        "loss_type: {'logistic_loss', 'squared_loss', 'hinge_loss',"
+        "'smooth_hinge_loss', 'poisson_loss'}")
+    .Attr("adaptive : bool=false")
+    .Attr("num_sparse_features: int >= 0")
+    .Attr("num_sparse_features_with_values: int >= 0")
+    .Attr("num_dense_features: int >= 0")
+    .Attr("l1: float")
+    .Attr("l2: float")
+    .Attr("num_loss_partitions: int >= 1")
+    .Attr("num_inner_iterations: int >= 1")
+    .Input("sparse_example_indices: num_sparse_features * int64")
+    .Input("sparse_feature_indices: num_sparse_features * int64")
+    .Input("sparse_feature_values: num_sparse_features_with_values * float")
+    .Input("dense_features: num_dense_features * float")
+    .Input("example_weights: float")
+    .Input("example_labels: float")
+    .Input("sparse_indices: num_sparse_features * int64")
+    .Input("sparse_weights: num_sparse_features * float")
+    .Input("dense_weights: num_dense_features * float")
+    .Input("example_state_data: float")
+    .Output("out_example_state_data: float")
+    .Output("out_delta_sparse_weights: num_sparse_features * float")
+    .Output("out_delta_dense_weights: num_dense_features * float")
+    .SetShapeFn(ApplySdcaOptimizerShapeFn);
+
 REGISTER_OP("SdcaShrinkL1")
     .Attr("num_features: int >= 0")
     .Attr("l1: float")
diff --git a/tensorflow/python/ops/sdca_ops.py b/tensorflow/python/ops/sdca_ops.py
index 4d5aeec591..a1c68343ed 100644
--- a/tensorflow/python/ops/sdca_ops.py
+++ b/tensorflow/python/ops/sdca_ops.py
@@ -29,4 +29,5 @@ from tensorflow.python.ops.gen_sdca_ops import *
 
 ops.NotDifferentiable("SdcaFprint")
 ops.NotDifferentiable("SdcaOptimizer")
+ops.NotDifferentiable("SdcaOptimizerV2")
 ops.NotDifferentiable("SdcaShrinkL1")
-- 
GitLab


From 5d6adc910b8323b73a61d3089f3a3028be411e90 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 09:33:59 -0700
Subject: [PATCH 1298/1357] Improve docstring for tf.data.Dataset.shuffle()

PiperOrigin-RevId: 216370329
---
 tensorflow/python/data/ops/dataset_ops.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index b7e19055f2..cf52f7529a 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -733,6 +733,11 @@ class Dataset(object):
   def shuffle(self, buffer_size, seed=None, reshuffle_each_iteration=None):
     """Randomly shuffles the elements of this dataset.
 
+    This dataset fills a buffer with `buffer_size` elements, then randomly
+    samples elements from this buffer, replacing the selected elements with new
+    elements. For perfect shuffling, a buffer size greater than or equal to the
+    full size of the dataset is required.
+
     Args:
       buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
         number of elements from this dataset from which the new
-- 
GitLab


From 3ef35b81fd753401e3d69989b3bd1146749cc3b3 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 09:34:47 -0700
Subject: [PATCH 1299/1357] Include live-in symbols in liveness analysis. These
 are required for control flow conversion.

PiperOrigin-RevId: 216370439
---
 tensorflow/python/autograph/pyct/anno.py      |  1 +
 tensorflow/python/autograph/pyct/cfg.py       | 10 ++-
 .../pyct/static_analysis/liveness.py          | 36 +++++---
 .../pyct/static_analysis/liveness_test.py     | 86 +++++++++++++++++--
 4 files changed, 112 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py
index 1a52110ef3..5392e6ea03 100644
--- a/tensorflow/python/autograph/pyct/anno.py
+++ b/tensorflow/python/autograph/pyct/anno.py
@@ -91,6 +91,7 @@ class Static(NoValue):
   DEFINED_VARS_IN = (
       'Symbols defined when entering the node. See reaching_definitions.py.')
   LIVE_VARS_OUT = ('Symbols live when exiting the node. See liveness.py.')
+  LIVE_VARS_IN = ('Symbols live when entering the node. See liveness.py.')
 
 
 FAIL = object()
diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index fca0eb62e4..ec733ea38f 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@@ -22,6 +22,10 @@ Once built, the CFG itself is immutable, but the values it holds need not be;
 they are usually annotated with information extracted by walking the graph.
 """
 
+# TODO(mdan): The notion of 'statements' below is inaccurate.
+# They should rather be called 'block statements', because they include
+# statements that may have a body, e.g. if and while.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -763,9 +767,9 @@ class AstToCfg(gast.NodeVisitor):
 
     self.builder.enter_section(node)
 
-    # TODO(mdan): Strictly speaking, this should be node.target + node.iter.
-    # A blind dataflow analysis would have to process both node.target and
-    # node.iter to properly process read and write access.
+    # Note: Strictly speaking, this should be node.target + node.iter.
+    # However, the activity analysis accounts for this inconsistency,
+    # so dataflow analysis produces the correct values.
     self.builder.enter_loop_section(node, node.iter)
     for stmt in node.body:
       self.visit(stmt)
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness.py b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
index 41c903beb9..36960d0103 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
@@ -14,8 +14,13 @@
 # ==============================================================================
 """Live variable analysis.
 
-This analysis attaches a set containing the live symbols that are live at the
-exit of control flow statements.
+See https://en.wikipedia.org/wiki/Live_variable_analysis for a definition of
+the following idioms: live variable, live in, live out, which are used
+throughout this file.
+
+This analysis attaches the following:
+ * symbols that are live at the exit of control flow statements
+ * symbols that are live at the entry of control flow statements
 
 Requires activity analysis.
 """
@@ -164,23 +169,34 @@ class Annotator(transformer.Base):
     self.current_analyzer = parent_analyzer
     return node
 
-  def _aggregate_successors_live_in(self, node):
+  def _block_statement_live_out(self, node):
     successors = self.current_analyzer.graph.stmt_next[node]
-    node_live_out = set()
+    stmt_live_out = set()
     for s in successors:
-      node_live_out.update(self.current_analyzer.in_[s])
-    anno.setanno(node, anno.Static.LIVE_VARS_OUT, frozenset(node_live_out))
-    node = self.generic_visit(node)
+      stmt_live_out.update(self.current_analyzer.in_[s])
+    anno.setanno(node, anno.Static.LIVE_VARS_OUT, frozenset(stmt_live_out))
+    return node
+
+  def _block_statement_live_in(self, node, entry_node):
+    cfg_node = self.current_analyzer.graph.index[entry_node]
+    stmt_live_in = frozenset(self.current_analyzer.in_[cfg_node])
+    anno.setanno(node, anno.Static.LIVE_VARS_IN, stmt_live_in)
     return node
 
   def visit_If(self, node):
-    return self._aggregate_successors_live_in(node)
+    node = self.generic_visit(node)
+    node = self._block_statement_live_out(node)
+    return self._block_statement_live_in(node, node.test)
 
   def visit_For(self, node):
-    return self._aggregate_successors_live_in(node)
+    node = self.generic_visit(node)
+    node = self._block_statement_live_out(node)
+    return self._block_statement_live_in(node, node.iter)
 
   def visit_While(self, node):
-    return self._aggregate_successors_live_in(node)
+    node = self.generic_visit(node)
+    node = self._block_statement_live_out(node)
+    return self._block_statement_live_in(node, node.test)
 
 
 def resolve(node, source_info, graphs):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
index 0d5f369e92..7b67f8f608 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
@@ -47,14 +47,23 @@ class LivenessTest(test.TestCase):
 
   def assertHasLiveOut(self, node, expected):
     live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
-    live_out_str = set(str(v) for v in live_out)
+    live_out_strs = set(str(v) for v in live_out)
     if not expected:
       expected = ()
     if not isinstance(expected, tuple):
       expected = (expected,)
-    self.assertSetEqual(live_out_str, set(expected))
+    self.assertSetEqual(live_out_strs, set(expected))
 
-  def test_stacked_if(self):
+  def assertHasLiveIn(self, node, expected):
+    live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN)
+    live_in_strs = set(str(v) for v in live_in)
+    if not expected:
+      expected = ()
+    if not isinstance(expected, tuple):
+      expected = (expected,)
+    self.assertSetEqual(live_in_strs, set(expected))
+
+  def test_live_out_stacked_if(self):
 
     def test_fn(x, a):
       if a > 0:
@@ -69,7 +78,7 @@ class LivenessTest(test.TestCase):
     self.assertHasLiveOut(fn_body[0], ('a', 'x'))
     self.assertHasLiveOut(fn_body[1], 'x')
 
-  def test_stacked_if_else(self):
+  def test_live_out_stacked_if_else(self):
 
     def test_fn(x, a):
       if a > 0:
@@ -86,7 +95,7 @@ class LivenessTest(test.TestCase):
     self.assertHasLiveOut(fn_body[0], 'a')
     self.assertHasLiveOut(fn_body[1], 'x')
 
-  def test_for_basic(self):
+  def test_live_out_for_basic(self):
 
     def test_fn(x, a):
       for i in range(a):
@@ -98,7 +107,7 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], 'x')
 
-  def test_attributes(self):
+  def test_live_out_attributes(self):
 
     def test_fn(x, a):
       if a > 0:
@@ -110,7 +119,7 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], ('x.y', 'x'))
 
-  def test_nested_functions(self):
+  def test_live_out_nested_functions(self):
 
     def test_fn(a, b):
       if b:
@@ -126,7 +135,7 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], 'a')
 
-  def test_nested_functions_isolation(self):
+  def test_live_out_nested_functions_isolation(self):
 
     def test_fn(b):
       if b:
@@ -144,6 +153,67 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], 'max')
 
+  def test_live_in_stacked_if(self):
+
+    def test_fn(x, a, b, c):
+      if a > 0:
+        x = b
+      if c > 1:
+        x = 0
+      return x
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'b', 'c', 'x'))
+    self.assertHasLiveIn(fn_body[1], ('c', 'x'))
+
+  def test_live_in_stacked_if_else(self):
+
+    def test_fn(x, a, b, c, d):
+      if a > 1:
+        x = b
+      else:
+        x = c
+      if d > 0:
+        x = 0
+      return x
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'b', 'c', 'd'))
+    self.assertHasLiveIn(fn_body[1], ('d', 'x'))
+
+  def test_live_in_for_basic(self):
+
+    def test_fn(x, y, a):
+      for i in a:
+        x = i
+        y += x
+        z = 0
+      return y, z
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'y', 'z'))
+
+  def test_live_in_for_nested(self):
+
+    def test_fn(x, y, a):
+      for i in a:
+        for j in i:
+          x = i
+          y += x
+          z = j
+      return y, z
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'y', 'z'))
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 37146b89788c2a0796ca6b863bde9c4c0dc4068e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 09:46:04 -0700
Subject: [PATCH 1300/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216372144

---
 tensorflow/go/op/wrappers.go | 222 +++++++++++++++++------------------
 1 file changed, 111 insertions(+), 111 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index fe99915a6c..eb6df2af46 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -10415,6 +10415,79 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 	return op.Output(0)
 }
 
+// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
+//
+// Arguments:
+//	serialized: A scalar string containing a serialized TensorProto proto.
+//	out_type: The type of the serialized tensor.  The provided type must match the
+// type of the serialized tensor and no implicit conversion will take place.
+//
+// Returns A Tensor of type `out_type`.
+func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "ParseTensor",
+		Input: []tf.Input{
+			serialized,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
+type MaxPoolWithArgmaxAttr func(optionalAttr)
+
+// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
+// If not specified, defaults to DT_INT64
+func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
+	return func(m optionalAttr) {
+		m["Targmax"] = value
+	}
+}
+
+// Performs max pooling on the input and outputs both max values and indices.
+//
+// The indices in `argmax` are flattened, so that a maximum value at position
+// `[b, y, x, c]` becomes flattened index
+// `((b * height + y) * width + x) * channels + c`.
+//
+// The indices returned are always in `[0, height) x [0, width)` before flattening,
+// even if padding is involved and the mathematically correct answer is outside
+// (either negative or too large).  This is a bug, but fixing it is difficult to do
+// in a safe backwards compatible way, especially due to flattening.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The max pooled output tensor.4-D.  The flattened indices of the max values chosen for each output.
+func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolWithArgmax",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
 type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
 
@@ -14202,44 +14275,6 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu
 	return op.Output(0), op.Output(1)
 }
 
-// Computes the mean along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
-// over `j` such that `segment_ids[j] == i` and `N` is the total number of
-// values summed.
-//
-// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentMean",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp.
 type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr)
 
@@ -15941,79 +15976,6 @@ func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, label
 	return op.Output(0), op.Output(1)
 }
 
-// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
-//
-// Arguments:
-//	serialized: A scalar string containing a serialized TensorProto proto.
-//	out_type: The type of the serialized tensor.  The provided type must match the
-// type of the serialized tensor and no implicit conversion will take place.
-//
-// Returns A Tensor of type `out_type`.
-func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "ParseTensor",
-		Input: []tf.Input{
-			serialized,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
-type MaxPoolWithArgmaxAttr func(optionalAttr)
-
-// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
-// If not specified, defaults to DT_INT64
-func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
-	return func(m optionalAttr) {
-		m["Targmax"] = value
-	}
-}
-
-// Performs max pooling on the input and outputs both max values and indices.
-//
-// The indices in `argmax` are flattened, so that a maximum value at position
-// `[b, y, x, c]` becomes flattened index
-// `((b * height + y) * width + x) * channels + c`.
-//
-// The indices returned are always in `[0, height) x [0, width)` before flattening,
-// even if padding is involved and the mathematically correct answer is outside
-// (either negative or too large).  This is a bug, but fixing it is difficult to do
-// in a safe backwards compatible way, especially due to flattening.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The max pooled output tensor.4-D.  The flattened indices of the max values chosen for each output.
-func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolWithArgmax",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Returns the truth value of NOT x element-wise.
 func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
@@ -21926,6 +21888,44 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Computes the mean along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
+// over `j` such that `segment_ids[j] == i` and `N` is the total number of
+// values summed.
+//
+// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMean",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the minimum along segments of a tensor.
 //
 // Read
-- 
GitLab


From 1b4402137a76c8085c160edfcc0c3be3cfa8fa3a Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Tue, 9 Oct 2018 10:05:11 -0700
Subject: [PATCH 1301/1357]   Fixes typo in Sort description.

PiperOrigin-RevId: 216375421
---
 tensorflow/compiler/xla/client/xla_builder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 9ceede7a79..933c0e7b44 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -2002,7 +2002,7 @@ XlaOp Rev(const XlaOp& operand, absl::Span<const int64> dimensions);
 // the last dimension is chosen by default.
 //
 // If both keys and values are provided:
-// * The keys and the values must tensors with the same dimensions. The
+// * The keys and the values must be tensors with the same dimensions. The
 // element types of the tensors may be different.
 // * The result is a tuple that consists of a sorted tensor of keys (along the
 // provided dimension, as above) as the first element, and a tensor with their
-- 
GitLab


From 11f32ebbdcd4eaf5e9e09fe27571e26ec0bd9dd8 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Tue, 9 Oct 2018 10:40:23 -0700
Subject: [PATCH 1302/1357] [tf.data vectorization] Handle captured inputs in
 MapVectorization optimization

PiperOrigin-RevId: 216381943
---
 .../optimizers/data/map_vectorization.cc      | 31 +++++++++-------
 .../optimizers/data/vectorization_utils.cc    | 35 ++++++++++++++++---
 .../optimization/map_vectorization_test.py    |  9 ++---
 3 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index a9254ed58b..0576d075c2 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -60,14 +60,24 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
     graph_utils::CopyAttribute(k, map_node, map_defun_node);
   }
 
+  // Note that the inputs to the function are either regular arguments (for
+  // which the function is mapped across their 0th dimension) or captured inputs
+  // (for which the function takes the argument wholesale). We can infer
+  // the split between these arguments from the `map_node`'s attrs.
+  // The Targuments attr on `map_node` corresponds to a list of types of
+  // MapDataset's captured inputs.
+  auto t_captured = map_node.attr().at("Targuments");
+
   // Get types of input arguments from original map function
-  AttrValue t_args;
+  DataTypeVector t_args;  // Regular arguments
   for (const auto& input : vectorized_func->signature().input_arg()) {
-    t_args.mutable_list()->add_type(input.type());
+    t_args.push_back(input.type());
     map_defun_node->add_input(input.name());
   }
-  (*map_defun_node->mutable_attr())["Targuments"] = t_args;
-  AddNodeAttr("Tcaptured", DataTypeVector(), map_defun_node);
+  // Erase the captured arguments from Targuments
+  t_args.erase(t_args.end() - t_captured.list().type_size(), t_args.end());
+  AddNodeAttr("Targuments", t_args, map_defun_node);
+  AddNodeAttr("Tcaptured", t_captured, map_defun_node);
 
   // Set return values to match output names
   string output_prefix = strings::StrCat(map_defun_node->name(), ":output:");
@@ -96,7 +106,9 @@ FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
       *vectorized_func, map_defun_node, library, &result);
 
   if (!s.ok()) {
-    LOG(ERROR) << "VectorizeMapDefun failed: " << s;
+    LOG(WARNING) << "VectorizeMapDefun failed. The function will only be "
+                    "naively vectorized with MapDefun. Reason: "
+                 << s;
     return vectorized_func;
   }
   return result;
@@ -129,10 +141,6 @@ bool IsStatefulFn(const FunctionLibraryDefinition& library,
   return false;
 }
 
-bool HasCapturedInputs(const NodeDef& map_node) {
-  return map_node.attr().at("Targuments").list().type_size() > 0;
-}
-
 NodeDef MakeNewBatchNode(const NodeDef& old_batch_node,
                          const NodeDef& input_node,
                          const FunctionDef& vectorized_func,
@@ -239,15 +247,12 @@ Status MapVectorization::Optimize(Cluster* cluster, const GrapplerItem& item,
     // Check that this is a valid optimization.
     if (!IsOutputShapesFullyDefined(*input_node) ||
         !IsOutputShapesFullyDefined(*map_node) ||
-        IsStatefulFn(function_library, *orig_func) ||
-        HasCapturedInputs(*map_node)) {
+        IsStatefulFn(function_library, *orig_func)) {
       // 1. If any of the inputs have an unknown shape, don't optimize, since
       // inputs might not be batchable.
       // 2. If any of the map func outputs have an unknown shape, don't
       // optimize, so that batching errors surface as before.
       // 3. If the function is stateful, don't vectorize it.
-      // 4. TODO(rachelim): Make this work for MapDataset with captured inputs
-      // by tiling inputs or modifying the signature of MapDefun.
       continue;
     }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index ba857ab5d9..d977ff3198 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -253,8 +253,13 @@ Status Vectorization::AddConversionMapping(Node* op_node) {
     }
   }
 
-  TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(),
-                                           std::move(inputs), &outputs));
+  Status s = vectorizer->Vectorize(*op_node, outer_scope_.get(),
+                                   std::move(inputs), &outputs);
+  if (!s.ok()) {
+    VLOG(2) << "Vectorizer for op \"" << op_node->type_string()
+            << "\" failed with error: " << s;
+    return s;
+  }
 
   if (op_node->num_outputs() != outputs.size()) {
     return errors::Internal(
@@ -481,17 +486,37 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
 }
 
 Status Vectorization::AddArgNodeMappings() {
-  for (auto arg_node : map_defun_fn_->arg_nodes) {
+  // Note that inputs to map_defun_fn_ are either regular arguments (for which
+  // the operations are mapped across their 0th dimension) or captured inputs
+  // (for which the operations apply to the argument wholesale).
+  int num_args =
+      map_defun_node_->attrs().Find("Targuments")->list().type_size();
+
+  auto add_conversion = [this](Node* arg_node, bool stacked) {
     Node* input_node;
     TF_RETURN_IF_ERROR(map_defun_node_->input_node(
         arg_node->attrs().Find("index")->i(), &input_node));
 
-    conversion_map_.insert({{arg_node, 0}, {input_node, 0, true}});
+    conversion_map_.insert({{arg_node, 0}, {input_node, 0, stacked}});
 
     // Control inputs
     conversion_map_.insert({{arg_node, Graph::kControlSlot},
-                            {input_node, Graph::kControlSlot, true}});
+                            {input_node, Graph::kControlSlot, stacked}});
+
+    return Status::OK();
+  };
+
+  // Regular arguments
+  for (int i = 0; i < num_args; ++i) {
+    TF_RETURN_IF_ERROR(add_conversion(map_defun_fn_->arg_nodes[i], true));
+  }
+
+  // Captured inputs. These are applied (without slicing) to every iteration of
+  // the map function, hence are mapped to unstacked nodes.
+  for (int i = num_args; i < map_defun_fn_->arg_nodes.size(); ++i) {
+    TF_RETURN_IF_ERROR(add_conversion(map_defun_fn_->arg_nodes[i], false));
   }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index 971a2d94b9..803ff87924 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -105,15 +105,16 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testOptimizationWithCapturedInputs(self):
     # Tests that vectorization works with captured inputs
+    y = constant_op.constant(1, shape=(2,))
+    z = constant_op.constant(2, shape=(2,))
+
     def map_fn(x):
-      return x + y
+      return x, y, z
 
-    y = constant_op.constant(1, shape=(2,))
     base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2],
                                                            [3, 4]]).repeat(5)
-    # TODO(rachelim): when this optimization works, turn on expect_optimized
     unoptimized, optimized = self._get_test_datasets(
-        base_dataset, map_fn, expect_optimized=False)
+        base_dataset, map_fn, expect_optimized=True)
     self.assertDatasetsEqual(optimized, unoptimized)
 
   def testOptimizationIgnoreStateful(self):
-- 
GitLab


From aa8f428a9310b3fd8371bddf612e480b27618b2e Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 9 Oct 2018 10:47:19 -0700
Subject: [PATCH 1303/1357] Removing the _SHOULD_RECORD_SUMMARIES_NAME and
 _SUMMARY_WRITER_INIT_COLLECTION_NAME collections from the summaryV2
 implementation. Replacing them with global variables.

PiperOrigin-RevId: 216383152
---
 tensorflow/python/ops/summary_ops_v2.py | 56 +++++++++++++------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index a404507627..18cefb8e1c 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@@ -43,11 +43,12 @@ from tensorflow.python.training import training_util
 from tensorflow.python.util import tf_contextlib
 
 
-# Name for a collection which is expected to have at most a single boolean
-# Tensor. If this tensor is True the summary ops will record summaries.
-_SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries"
+# A global dictionary mapping graph keys to boolean values indicating whether
+# we should record summaries for this particular graph or not.
+_SHOULD_RECORD_SUMMARIES = {}
 
-_SUMMARY_WRITER_INIT_COLLECTION_NAME = "_SUMMARY_WRITER_V2"
+# A global dictionary mapping graph keys to a list of summary writer init ops.
+_SUMMARY_WRITER_INIT_OP = {}
 
 _EXPERIMENT_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,256}$")
 _RUN_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,512}$")
@@ -56,14 +57,9 @@ _USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I)
 
 def should_record_summaries():
   """Returns boolean Tensor which is true if summaries should be recorded."""
-  should_record_collection = ops.get_collection(_SHOULD_RECORD_SUMMARIES_NAME)
-  if not should_record_collection:
-    return False
-  if len(should_record_collection) != 1:
-    raise ValueError(
-        "More than one tensor specified for whether summaries "
-        "should be recorded: %s" % should_record_collection)
-  return should_record_collection[0]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  return _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
 
 
 # TODO(apassos) consider how to handle local step here.
@@ -72,38 +68,41 @@ def record_summaries_every_n_global_steps(n, global_step=None):
   """Sets the should_record_summaries Tensor to true if global_step % n == 0."""
   if global_step is None:
     global_step = training_util.get_or_create_global_step()
-  collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
-  old = collection_ref[:]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
   try:
     with ops.device("cpu:0"):
-      collection_ref[:] = [math_ops.equal(global_step % n, 0)]
+      _SHOULD_RECORD_SUMMARIES[key] = math_ops.equal(global_step % n, 0)
     yield
   finally:
-    collection_ref[:] = old
+    _SHOULD_RECORD_SUMMARIES[key] = old
 
 
 @tf_contextlib.contextmanager
 def always_record_summaries():
   """Sets the should_record_summaries Tensor to always true."""
-  collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
-  old = collection_ref[:]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
   try:
-    collection_ref[:] = [True]
+    _SHOULD_RECORD_SUMMARIES[key] = True
     yield
   finally:
-    collection_ref[:] = old
+    _SHOULD_RECORD_SUMMARIES[key] = old
 
 
 @tf_contextlib.contextmanager
 def never_record_summaries():
   """Sets the should_record_summaries Tensor to always false."""
-  collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
-  old = collection_ref[:]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
   try:
-    collection_ref[:] = [False]
+    _SHOULD_RECORD_SUMMARIES[key] = False
     yield
   finally:
-    collection_ref[:] = old
+    _SHOULD_RECORD_SUMMARIES[key] = old
 
 
 class SummaryWriter(object):
@@ -143,7 +142,6 @@ class SummaryWriter(object):
       finally:
         context.context().summary_writer_resource = old
 
-
   def init(self):
     """Operation to initialize the summary writer resource."""
     if self._resource is not None:
@@ -311,7 +309,9 @@ def _make_summary_writer(name, factory, **kwargs):
   if not context.executing_eagerly():
     # TODO(apassos): Consider doing this instead.
     #   ops.get_default_session().run(init_op)
-    ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, init_op)
+    global _SUMMARY_WRITER_INIT_OP
+    key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+    _SUMMARY_WRITER_INIT_OP.setdefault(key, []).append(init_op)
   return SummaryWriter(resource, init_op_fn)
 
 
@@ -352,7 +352,9 @@ def summary_writer_initializer_op():
     raise RuntimeError(
         "tf.contrib.summary.summary_writer_initializer_op is only "
         "supported in graph mode.")
-  return ops.get_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME)
+  global _SUMMARY_WRITER_INIT_OP
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  return _SUMMARY_WRITER_INIT_OP.setdefault(key, [])
 
 
 def summary_writer_function(name, tensor, function, family=None):
-- 
GitLab


From 3e8af7ea6b70104b05be22797451d0218c9e5262 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Tue, 9 Oct 2018 10:58:03 -0700
Subject: [PATCH 1304/1357] Internal change.

PiperOrigin-RevId: 216385202
---
 .../lite/testing/model_coverage/model_coverage_lib.py  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
index 72029ed03c..ab29f71138 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
@@ -297,7 +297,7 @@ def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs):
   compare_models_random_data(tflite_model, tf_eval_func)
 
 
-def test_keras_model(filename, **kwargs):
+def test_keras_model(filename, input_arrays=None, input_shapes=None, **kwargs):
   """Validates the tf.keras model converts to a TFLite model.
 
   Converts the tf.keras model to TFLite and checks the accuracy of the model on
@@ -305,9 +305,15 @@ def test_keras_model(filename, **kwargs):
 
   Args:
     filename: Full filepath of HDF5 file containing the tf.keras model.
+    input_arrays: List of input tensors to freeze graph with.
+    input_shapes: Dict of strings representing input tensor names to list of
+      integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
+      Automatically determined when input shapes is None (e.g., {"foo" : None}).
+        (default None)
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TFLiteConverter.from_keras_model_file(filename)
+  converter = _lite.TFLiteConverter.from_keras_model_file(
+      filename, input_arrays=input_arrays, input_shapes=input_shapes)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_keras_model(filename)
-- 
GitLab


From 1e4a3baad388b5d5250efdb19f91d5b670816fbe Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Tue, 9 Oct 2018 11:03:57 -0700
Subject: [PATCH 1305/1357] Update TFLite Converter documentation.

PiperOrigin-RevId: 216386450
---
 tensorflow/contrib/lite/toco/README.md        |  9 +-
 .../lite/toco/g3doc/cmdline_examples.md       | 66 ++++++-------
 .../lite/toco/g3doc/cmdline_reference.md      |  8 +-
 .../contrib/lite/toco/g3doc/python_api.md     | 95 ++++++++++---------
 4 files changed, 93 insertions(+), 85 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md
index 2db6a627ab..91f6f618a3 100644
--- a/tensorflow/contrib/lite/toco/README.md
+++ b/tensorflow/contrib/lite/toco/README.md
@@ -1,6 +1,6 @@
-# TOCO: TensorFlow Lite Optimizing Converter
+# TensorFlow Lite Converter
 
-The TensorFlow Lite Optimizing Converter converts TensorFlow graphs into
+The TensorFlow Lite Converter converts TensorFlow graphs into
 TensorFlow Lite graphs. There are additional usages that are also detailed in
 the usage documentation.
 
@@ -14,9 +14,10 @@ Usage information is given in these documents:
 
 ## Where the converter fits in the TensorFlow landscape
 
-Once an application developer has a trained TensorFlow model, TOCO will accept
+Once an application developer has a trained TensorFlow model, the TensorFlow
+Lite Converter will accept
 that model and generate a TensorFlow Lite
-[FlatBuffer](https://google.github.io/flatbuffers/) file. TOCO currently supports
+[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports
 [SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators),
 frozen graphs (models generated via
 [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)),
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
index aba7536cbd..e3c46eb377 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
@@ -1,7 +1,7 @@
-# TensorFlow Lite Optimizing Converter command-line examples
+# TensorFlow Lite Converter command-line examples
 
-This page provides examples on how to use TOCO via command line. It is
-complemented by the following documents:
+This page shows how to use the TensorFlow Lite Converter in the command line. It
+is complemented by the following documents:
 
 *   [README](../README.md)
 *   [Command-line glossary](cmdline_reference.md)
@@ -10,7 +10,7 @@ complemented by the following documents:
 Table of contents:
 
 *   [Command-line tools](#tools)
-    *   [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9)
+    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
 *   [Basic examples](#basic)
     *   [Convert a TensorFlow GraphDef](#graphdef)
     *   [Convert a TensorFlow SavedModel](#savedmodel)
@@ -31,27 +31,28 @@ Table of contents:
 
 ## Command-line tools <a name="tools"></a>
 
-There are two approaches to running TOCO via command line.
+There are two approaches to running the converter in the command line.
 
 *   `tflite_convert`: Starting from TensorFlow 1.9, the command-line tool
-    `tflite_convert` will be installed as part of the Python package. All of the
+    `tflite_convert` is installed as part of the Python package. All of the
     examples below use `tflite_convert` for simplicity.
     *   Example: `tflite_convert --output_file=...`
-*   `bazel`: In order to run the latest version of TOCO, [clone the TensorFlow
-    repository](https://www.tensorflow.org/install/source)
-    and use `bazel`. This is the recommended approach for converting models that
-    utilize new features that were not supported by TOCO in TensorFlow 1.9.
+*   `bazel`: In order to run the latest version of the TensorFlow Lite Converter
+    either install the nightly build using
+    [pip](https://www.tensorflow.org/install/pip) or
+    [clone the TensorFlow repository](https://www.tensorflow.org/install/source)
+    and use `bazel`.
     *   Example: `bazel run
         //tensorflow/contrib/lite/python:tflite_convert --
         --output_file=...`
 
-### Converting models prior to TensorFlow 1.9. <a name="pre-tensorflow-1.9"></a>
+### Converting models prior to TensorFlow 1.9 <a name="pre-tensorflow-1.9"></a>
 
-The recommended approach for using TOCO prior to TensorFlow 1.9 is the [Python
-API](python_api.md#pre-tensorflow-1.9). If a command line tool is desired, the
-`toco` command line tool was available in TensorFlow 1.7. Enter `toco --help` in
-Terminal for additional details on the command-line flags available. There were
-no command line tools in TensorFlow 1.8.
+The recommended approach for using the converter prior to TensorFlow 1.9 is the
+[Python API](python_api.md#pre-tensorflow-1.9). If a command line tool is
+desired, the `toco` command line tool was available in TensorFlow 1.7. Enter
+`toco --help` in Terminal for additional details on the command-line flags
+available. There were no command line tools in TensorFlow 1.8.
 
 ## Basic examples <a name="basic"></a>
 
@@ -117,9 +118,9 @@ tflite_convert \
 
 ### Convert a TensorFlow GraphDef for quantized inference <a name="graphdef-quant"></a>
 
-TOCO is compatible with fixed point quantization models described
-[here](https://www.tensorflow.org/performance/quantization). These are float
-models with
+The TensorFlow Lite Converter is compatible with fixed point quantization models
+described [here](https://www.tensorflow.org/performance/quantization). These are
+float models with
 [`FakeQuant*`](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization)
 ops inserted at the boundaries of fused layers to record min-max range
 information. This generates a quantized inference workload that reproduces the
@@ -141,12 +142,12 @@ tflite_convert \
 
 ### Use \"dummy-quantization\" to try out quantized inference on a float graph <a name="dummy-quant"></a>
 
-In order to evaluate the possible benefit of generating a quantized graph, TOCO
-allows "dummy-quantization" on float graphs. The flags `--default_ranges_min`
-and `--default_ranges_max` accept plausible values for the min-max ranges of the
-values in all arrays that do not have min-max information. "Dummy-quantization"
-will produce lower accuracy but will emulate the performance of a correctly
-quantized model.
+In order to evaluate the possible benefit of generating a quantized graph, the
+converter allows "dummy-quantization" on float graphs. The flags
+`--default_ranges_min` and `--default_ranges_max` accept plausible values for
+the min-max ranges of the values in all arrays that do not have min-max
+information. "Dummy-quantization" will produce lower accuracy but will emulate
+the performance of a correctly quantized model.
 
 The example below contains a model using Relu6 activation functions. Therefore,
 a reasonable guess is that most activation ranges should be contained in [0, 6].
@@ -207,10 +208,10 @@ tflite_convert \
 ### Specifying subgraphs
 
 Any array in the input file can be specified as an input or output array in
-order to extract subgraphs out of an input graph file. TOCO discards the parts
-of the graph outside of the specific subgraph. Use [graph
-visualizations](#graph-visualizations) to identify the input and output arrays
-that make up the desired subgraph.
+order to extract subgraphs out of an input graph file. The TensorFlow Lite
+Converter discards the parts of the graph outside of the specific subgraph. Use
+[graph visualizations](#graph-visualizations) to identify the input and output
+arrays that make up the desired subgraph.
 
 The follow command shows how to extract a single fused layer out of a TensorFlow
 GraphDef.
@@ -247,9 +248,10 @@ function tends to get fused).
 
 ## Graph visualizations
 
-TOCO can export a graph to the Graphviz Dot format for easy visualization via
-either the `--output_format` flag or the `--dump_graphviz_dir` flag. The
-subsections below outline the use cases for each.
+The converter can export a graph to the Graphviz Dot format for easy
+visualization using either the `--output_format` flag or the
+`--dump_graphviz_dir` flag. The subsections below outline the use cases for
+each.
 
 ### Using `--output_format=GRAPHVIZ_DOT` <a name="using-output-format-graphviz-dot"></a>
 
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
index 00bc8d4ccb..31200fd657 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
@@ -1,8 +1,8 @@
-# TensorFlow Lite Optimizing Converter command-line glossary
+# TensorFlow Lite Converter command-line glossary
 
-This page is complete reference of command-line flags used by TOCO's command
-line starting from TensorFlow 1.9 up until the most recent build of TensorFlow.
-It is complemented by the following other documents:
+This page is complete reference of command-line flags used by the TensorFlow
+Lite Converter's command line starting from TensorFlow 1.9 up until the most
+recent build of TensorFlow. It is complemented by the following other documents:
 
 *   [README](../README.md)
 *   [Command-line examples](cmdline_examples.md)
diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md
index 8c31c3dca8..1f741360c6 100644
--- a/tensorflow/contrib/lite/toco/g3doc/python_api.md
+++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md
@@ -1,7 +1,8 @@
-# TensorFlow Lite Optimizing Converter & Interpreter Python API reference
+# TensorFlow Lite Converter & Interpreter Python API reference
 
-This page provides examples on how to use TOCO and the TensorFlow Lite
-interpreter via the Python API. It is complemented by the following documents:
+This page provides examples on how to use the TensorFlow Lite Converter and the
+TensorFlow Lite interpreter using the Python API. It is complemented by the
+following documents:
 
 *   [README](../README.md)
 *   [Command-line examples](cmdline_examples.md)
@@ -23,39 +24,35 @@ Table of contents:
     *   [Using the interpreter from model data](#interpreter-data)
 *   [Additional instructions](#additional-instructions)
     *   [Build from source code](#latest-package)
-    *   [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9)
+    *   [Converting models in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11)
+    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
 
 ## High-level overview
 
-While the TensorFlow Lite Optimizing Converter can be used from the command
-line, it is often convenient to use it as part of a Python model build and
-training script. This is so that conversion can be part of your model
-development pipeline. This allows you to know early and often that you are
-designing a model that can be targeted to devices with mobile.
+While the TensorFlow Lite Converter can be used from the command line, it is
+often convenient to use in a Python script as part of the model development
+pipeline. This allows you to know early that you are designing a model that can
+be targeted to devices with mobile.
 
 ## API
 
 The API for converting TensorFlow models to TensorFlow Lite as of TensorFlow 1.9
-is `tf.contrib.lite.TocoConverter`. The API for calling the Python intepreter is
-`tf.contrib.lite.Interpreter`.
-
-**NOTE**: As of TensorFlow 1.12, the API for converting TensorFlow models to
-TFLite will be renamed to `TFLiteConverter`. `TFLiteConverter` is semantically
-identically to `TocoConverter`. The API is available at
-`tf.contrib.lite.TFLiteConverter` as of the Sept 26 `tf-nightly`.
-
-`TocoConverter` provides class methods based on the original format of the
-model. `TocoConverter.from_session()` is available for GraphDefs.
-`TocoConverter.from_saved_model()` is available for SavedModels.
-`TocoConverter.from_keras_model_file()` is available for `tf.Keras` files.
+is `tf.contrib.lite.TFLiteConverter`. The API for calling the Python intepreter
+is `tf.contrib.lite.Interpreter`.
+
+Note: Reference "Additional Instructions" sections for converting TensorFlow
+models to TensorFlow Lite
+[in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11) and
+[prior to TensorFlow 1.9](#pre-tensorflow-1.9)
+
+`TFLiteConverter` provides class methods based on the original format of the
+model. `TFLiteConverter.from_session()` is available for GraphDefs.
+`TFLiteConverter.from_saved_model()` is available for SavedModels.
+`TFLiteConverter.from_keras_model_file()` is available for `tf.Keras` files.
 Example usages for simple float-point models are shown in
 [Basic Examples](#basic). Examples usages for more complex models is shown in
 [Complex Examples](#complex).
 
-**NOTE**: Currently, `TocoConverter` will cause a fatal error to the Python
-interpreter when the conversion fails. This will be remedied as soon as
-possible.
-
 ## Basic examples <a name="basic"></a>
 
 The following section shows examples of how to convert a basic float-point model
@@ -76,7 +73,7 @@ out = tf.identity(val, name="out")
 
 with tf.Session() as sess:
   sess.run(tf.global_variables_initializer())
-  converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out])
+  converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out])
   tflite_model = converter.convert()
   open("converted_model.tflite", "wb").write(tflite_model)
 ```
@@ -89,7 +86,7 @@ TensorFlow Lite FlatBuffer when the GraphDef is stored in a file. Both `.pb` and
 
 The example uses
 [Mobilenet_1.0_224](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz).
-The function only supports GraphDefs frozen via
+The function only supports GraphDefs frozen using
 [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py).
 
 ```python
@@ -99,7 +96,7 @@ graph_def_file = "/path/to/Downloads/mobilenet_v1_1.0_224/frozen_graph.pb"
 input_arrays = ["input"]
 output_arrays = ["MobilenetV1/Predictions/Softmax"]
 
-converter = tf.contrib.lite.TocoConverter.from_frozen_graph(
+converter = tf.contrib.lite.TFLiteConverter.from_frozen_graph(
   graph_def_file, input_arrays, output_arrays)
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
@@ -113,25 +110,26 @@ FlatBuffer.
 ```python
 import tensorflow as tf
 
-converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir)
+converter = tf.contrib.lite.TFLiteConverter.from_saved_model(saved_model_dir)
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
 
 For more complex SavedModels, the optional parameters that can be passed into
-`TocoConverter.from_saved_model()` are `input_arrays`, `input_shapes`,
+`TFLiteConverter.from_saved_model()` are `input_arrays`, `input_shapes`,
 `output_arrays`, `tag_set` and `signature_key`. Details of each parameter are
-available by running `help(tf.contrib.lite.TocoConverter)`.
+available by running `help(tf.contrib.lite.TFLiteConverter)`.
 
 ### Exporting a tf.keras File <a name="basic-keras-file"></a>
 
 The following example shows how to convert a `tf.keras` model into a TensorFlow
-Lite FlatBuffer.
+Lite FlatBuffer. This example requires
+[`h5py`](http://docs.h5py.org/en/latest/build.html) to be installed.
 
 ```python
 import tensorflow as tf
 
-converter = tf.contrib.lite.TocoConverter.from_keras_model_file("keras_model.h5")
+converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file("keras_model.h5")
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
@@ -163,7 +161,7 @@ keras_file = "keras_model.h5"
 tf.keras.models.save_model(model, keras_file)
 
 # Convert to TensorFlow Lite model.
-converter = tf.contrib.lite.TocoConverter.from_keras_model_file(keras_file)
+converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(keras_file)
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
@@ -173,7 +171,7 @@ open("converted_model.tflite", "wb").write(tflite_model)
 For models where the default value of the attributes is not sufficient, the
 attribute's values should be set before calling `convert()`. In order to call
 any constants use `tf.contrib.lite.constants.<CONSTANT_NAME>` as seen below with
-`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TocoConverter)` in the Python
+`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TFLiteConverter)` in the Python
 terminal for detailed documentation on the attributes.
 
 Although the examples are demonstrated on GraphDefs containing only constants.
@@ -193,7 +191,7 @@ val = img + const
 out = tf.fake_quant_with_min_max_args(val, min=0., max=1., name="output")
 
 with tf.Session() as sess:
-  converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out])
+  converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out])
   converter.inference_type = tf.contrib.lite.constants.QUANTIZED_UINT8
   input_arrays = converter.get_input_arrays()
   converter.quantized_input_stats = {input_arrays[0] : (0., 1.)}  # mean, std_dev
@@ -250,7 +248,7 @@ val = img + const
 out = tf.identity(val, name="out")
 
 with tf.Session() as sess:
-  converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out])
+  converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out])
   tflite_model = converter.convert()
 
 # Load TFLite model and allocate tensors.
@@ -262,13 +260,20 @@ interpreter.allocate_tensors()
 
 ### Build from source code <a name="latest-package"></a>
 
-In order to run the latest version of the TOCO Python API, clone the TensorFlow
-repository, configure the installation, and build and install the pip package.
-Detailed instructions are available
-[here](https://www.tensorflow.org/install/source).
+In order to run the latest version of the TensorFlow Lite Converter Python API,
+either install the nightly build with
+[pip](https://www.tensorflow.org/install/pip) (recommended) or
+[Docker](https://www.tensorflow.org/install/docker), or
+[build the pip package from source](https://www.tensorflow.org/install/source).
+
+### Converting models in TensorFlow 1.9 to TensorFlow 1.11 <a name="#pre-tensorflow-1.11"></a>
+
+To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.9 through
+TensorFlow 1.11, use `TocoConverter`. `TocoConverter` is semantically
+identically to `TFLiteConverter`.
 
-### Converting models prior to TensorFlow 1.9. <a name="pre-tensorflow-1.9"></a>
+### Converting models prior to TensorFlow 1.9 <a name="pre-tensorflow-1.9"></a>
 
-To use TOCO in TensorFlow 1.7 and TensorFlow 1.8, use the `toco_convert`
-function. Run `help(tf.contrib.lite.toco_convert)` to get details about accepted
-parameters.
+To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.7 and TensorFlow
+1.8, use the `toco_convert` function. Run `help(tf.contrib.lite.toco_convert)`
+to get details about accepted parameters.
-- 
GitLab


From 84ace0358526bb51c04a3bef4b3072b93b9d1bec Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 9 Oct 2018 11:16:32 -0700
Subject: [PATCH 1306/1357] Improves tf.function prototype.

Specifically:
 - renames from def_function
 - returns an object with well-defined methods
 - doesn't force-retrace twice
 - uses the python descriptor API ( https://docs.python.org/3/howto/descriptor.html )
   to remove the need for a tf.method
PiperOrigin-RevId: 216388957
---
 tensorflow/python/eager/def_function.py      | 188 +++++++++++++++----
 tensorflow/python/eager/def_function_test.py |  32 +++-
 2 files changed, 179 insertions(+), 41 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 8dcacd5c99..b23891d394 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -19,8 +19,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+import weakref
+
 from tensorflow.python.eager import context
-from tensorflow.python.eager import function
+from tensorflow.python.eager import function as function_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -165,71 +168,184 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
     self._cached_shape_as_list = None
 
 
-def _defun_with_scope(scope, fn):
+def _defun_with_scope(scope, fn, input_signature):
 
   def wrapped_fn(*args, **kwds):
     with variable_scope.variable_creator_scope(scope):
       return fn(*args, **kwds)
 
-  return function.defun(wrapped_fn)
+  return function_lib.defun(wrapped_fn, input_signature=input_signature)
 
 
-def def_function(fn):
-  """Defines a function as per the "functions, not sessions" document."""
+def _call_concrete(fn, args, unused_kwargs):
+  """Calls the given concrete function with only the tensor arguments."""
+
+  def inner():
+    # TODO(apassos) figure out what to do with kwargs and concrete functions.
+    return fn(*[x for x in args if isinstance(x, ops.Tensor)])
+
+  return inner
+
+
+class PolymorphicFunction(object):
+  """Wrapper class for the graph functions defined for a Python function.
+
+  See the documentation for `tf.function` for more information on the semantics
+  of defined functions.
 
-  # Wrapping the values in lists to bypass python's lack of way to mutate
-  # symbols from an outer scope.
-  first_call = [True]
-  function_to_call = []
+  PolymorphicFunction is thread-compatible.
+  """
+
+  def __init__(self,
+               python_function,
+               input_signature=None,):
+    """Initializes a polymorphic function.
+
+    Args:
+      python_function: the function to be wrapped.
+      input_signature: a possibly nested sequence of `TensorSpec` objects
+        specifying the input signature of this function. If `None`, a separate
+        function is instantiated for each inferred input signature.
+
+    Raises:
+      ValueError: if `input_signature` is not None and the `python_function`'s
+        argspec has keyword arguments.
+    """
+    self._python_function = python_function
+    self._input_signature = input_signature
+    self._created_variables = None
+    self._stateful_fn = None
+    self._descriptor_cache = weakref.WeakKeyDictionary()
 
-  # TODO(apassos) represent this as an object and not as a closure.
-  def decorated_fn(*args, **kwds):
-    """Graph function for fn."""
-    if not first_call[0]:
-      return function_to_call[0](*args, **kwds)
+  def _initialize(self, args, kwds):
+    """Initializes, on the first call."""
 
-    first_call[0] = False
-    created_variables = []
+    self._created_variables = []
 
-    def variable_creator_scope(unused_next_creator, **kwds):
+    def variable_capturing_scope(unused_next_creator, **kwds):
       """Creates UnliftedInitializerVariables and saves references to them."""
       v = UnliftedInitializerVariable(**kwds)
-      created_variables.append(v)
+      self._created_variables.append(v)
       return v
 
-    first_graph_function = _defun_with_scope(variable_creator_scope, fn)
+    self._stateful_fn = _defun_with_scope(
+        variable_capturing_scope, self._python_function, self._input_signature)
 
     # Force the definition of the function for these arguments
-    first_concrete = first_graph_function.get_concrete_function(*args, **kwds)
+    self._concrete_stateful_fn = self._stateful_fn.get_concrete_function(
+        *args, **kwds)
 
     def invalid_creator_scope(*unused_args, **unused_kwds):
       """Disables variable creation."""
       raise ValueError(
-          "def_function-decorated function tried to create "
-          "variables on second call.")
+          "tf.function-decorated function tried to create "
+          "variables on non-first call.")
 
-    second_graph_function = _defun_with_scope(invalid_creator_scope, fn)
+    self._stateless_fn = _defun_with_scope(
+        invalid_creator_scope, self._python_function, self._input_signature)
 
-    function_to_call.append(second_graph_function)
-    if not created_variables:
-      # Note: this retracing might be unnecessary, but running the function
-      # forever in the scope which disallows variable creation is safer than not
-      # doing so.
-      return second_graph_function(*args, **kwds)
+  def __call__(self, *args, **kwds):
+    """Calls the graph function."""
+    if self._created_variables:
+      # In this case we have created variables on the first call, so we run the
+      # defunned version which is guaranteed to never create variables.
+      return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
+    elif self._stateful_fn is not None:
+      # In this case we have not created variables on the first call. So we can
+      # run the first trace but we should fail if variables are created.
+      results = self._first_trace(*args, **kwds)
+      if self._created_variables:
+        raise ValueError("Creating variables on a non-first call to a function"
+                         " decorated with tf.function.")
+      return results
+
+    self._initialize(args, kwds)
+
+    if not self._created_variables:
+      # If we did not create any variables the trace we have is good enough.
+      return _call_concrete(self._concrete_stateful_fn, args, kwds)()
 
     def fn_with_cond(*inner_args, **inner_kwds):
       """Conditionally runs initialization if it's needed."""
       condition = True
-      for variable in created_variables:
+      for variable in self._created_variables:
         condition = condition and resource_variable_ops.var_is_initialized_op(
             variable.handle)
-      # We want to call second_graph_function if possible because it avoids
-      # recomputing potentially expensive initializers.
+      # We want to call stateless_fn if possible because it avoids recomputing
+      # potentially expensive initializers.
       return control_flow_ops.cond(
           condition,
-          lambda: second_graph_function(*inner_args, **inner_kwds),
-          lambda: first_concrete(*inner_args, **inner_kwds))
+          lambda: self._stateless_fn(*inner_args, **inner_kwds),
+          _call_concrete(self._concrete_stateful_fn, inner_args, inner_kwds))
+
+    return function_lib.defun(fn_with_cond)(*args, **kwds)
+
+  @property
+  def python_function(self):
+    """The python function wrapped in this tf.function."""
+    return self._python_function
+
+  def get_concrete_function(self, *args, **kwargs):
+    """Returns a `Function` object specialized to inputs and execution context.
+
+    `args` and `kwargs` are ignored if this `PolymorphicFunction` was created
+    with an `input_signature`.
+
+    Args:
+      *args: inputs to specialize on.
+      **kwargs: inputs to specialize on.
 
-    return function.defun(fn_with_cond)(*args, **kwds)
+    Raises:
+      ValueError: if this object has not yet been called on concrete values.
+    """
+    # TODO(apassos) figure out how to handle this case (what should we return
+    # here?)
+    if self._stateful_fn is None:
+      raise ValueError(
+          "Call this function with concrete values before asking for a"
+          " concrete function. Calling the function will ensure that, in"
+          " case this function creates variables, that those are properly"
+          " initialized.")
+    if self._created_variables:
+      # In this case we have created variables on the first call, so we run the
+      # defunned version which is guaranteed to never create variables.
+      return self._stateless_fn.get_concrete_function(*args, **kwargs)
+    elif self._stateful_fn is not None:
+      # In this case we have not created variables on the first call. So we can
+      # run the first trace but we should fail if variables are created.
+      concrete = self._first_trace.get_concrete_function(*args, **kwargs)
+      if self._created_variables:
+        raise ValueError("Creating variables on a non-first call to a function"
+                         " decorated with tf.function.")
+      return concrete
 
-  return decorated_fn
+  def __get__(self, instance, owner):
+    """Makes it possible to defun instance methods."""
+    del owner
+    # `instance` here is the instance that this `PolymorphicFunction` was
+    # accessed through; e.g., for
+    #
+    #   class Foo(object):
+    #
+    #     @function.defun
+    #     def bar(self):
+    #       ...
+    #
+    #   foo = Foo()
+    #   foo.bar()  # `foo.bar` is a `PolymorphicFunction` instance
+    #
+    # then `instance` will be `foo` (and `owner` will be `Foo`).  We create a
+    # new instance of PolymorphicFunction here to allow different instances each
+    # to create variables once, thereby allowing methods to be decorated with
+    # tf.function. Keeps a cache to avoid retracing the function every time the
+    # descriptor is accessed.
+    if instance not in self._descriptor_cache:
+      self._descriptor_cache[instance] = PolymorphicFunction(
+          functools.partial(self.python_function, instance),
+          self._input_signature)
+    return self._descriptor_cache[instance]
+
+
+def function(fn=None, input_signature=None):
+  """Defines a function as per the "functions, not sessions" document."""
+  return PolymorphicFunction(fn, input_signature)
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 804436c4bb..39bad726d0 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -29,7 +29,7 @@ class DefFunctionTest(test.TestCase):
 
   def testNoVariables(self):
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       return 2 * x
 
@@ -37,7 +37,7 @@ class DefFunctionTest(test.TestCase):
 
   def testFailIfVariablesAreCreatedMoreThanOnce(self):
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       return variables.Variable(1.0) + x
 
@@ -47,7 +47,7 @@ class DefFunctionTest(test.TestCase):
   def testFailIfVariablesAreCreatedMoreThanOnceNoWeakRef(self):
     state = []
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       state.append(variables.Variable(1.0))
       return state[-1] + x
@@ -59,7 +59,7 @@ class DefFunctionTest(test.TestCase):
 
     state = []
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       if not state:
         state.append(variables.Variable(2.0))
@@ -72,7 +72,7 @@ class DefFunctionTest(test.TestCase):
 
     state = []
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       if not state:
         state.append(variables.Variable(2.0 * x))
@@ -81,6 +81,28 @@ class DefFunctionTest(test.TestCase):
     self.assertAllEqual(fn(constant_op.constant(1.0)), 2.0)
     self.assertAllEqual(fn(constant_op.constant(3.0)), 6.0)
 
+  def testMethod(self):
+
+    class MyModel(object):
+
+      def __init__(self):
+        self.var = None
+
+      @def_function.function
+      def apply(self, x):
+        if self.var is None:
+          self.var = variables.Variable(2.0)
+        return self.var * x
+
+    m0 = MyModel()
+    self.assertAllEqual(m0.apply(3.0), 6.0)
+    # Calling twice to exercise that we do not recreate variables.
+    m0.var.assign(3.0)
+    self.assertAllEqual(m0.apply(3.0), 9.0)
+
+    m1 = MyModel()
+    self.assertAllEqual(m1.apply(3.0), 6.0)
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
-- 
GitLab


From 931353c5f79c2d419afb3a5ecac59184c5558351 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 11:37:29 -0700
Subject: [PATCH 1307/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 216392772
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 119 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 119 ++++++++++++++++++
 2 files changed, 238 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 9df0ece69b..dcea70dffb 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -58500,6 +58500,125 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "SdcaOptimizerV2"
+  input_arg {
+    name: "sparse_example_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_values"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features_with_values"
+  }
+  input_arg {
+    name: "dense_features"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_weights"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "example_labels"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_delta_sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  output_arg {
+    name: "out_delta_dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  attr {
+    name: "loss_type"
+    type: "string"
+    allowed_values {
+      list {
+        s: "logistic_loss"
+        s: "squared_loss"
+        s: "hinge_loss"
+        s: "smooth_hinge_loss"
+        s: "poisson_loss"
+      }
+    }
+  }
+  attr {
+    name: "adaptive"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "num_sparse_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_dense_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "l1"
+    type: "float"
+  }
+  attr {
+    name: "l2"
+    type: "float"
+  }
+  attr {
+    name: "num_loss_partitions"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_inner_iterations"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "SdcaShrinkL1"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 2048ad26ac..93a297458f 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -28140,6 +28140,125 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "SdcaOptimizerV2"
+  input_arg {
+    name: "sparse_example_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_values"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features_with_values"
+  }
+  input_arg {
+    name: "dense_features"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_weights"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "example_labels"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_delta_sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  output_arg {
+    name: "out_delta_dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  attr {
+    name: "loss_type"
+    type: "string"
+    allowed_values {
+      list {
+        s: "logistic_loss"
+        s: "squared_loss"
+        s: "hinge_loss"
+        s: "smooth_hinge_loss"
+        s: "poisson_loss"
+      }
+    }
+  }
+  attr {
+    name: "adaptive"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "num_sparse_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_dense_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "l1"
+    type: "float"
+  }
+  attr {
+    name: "l2"
+    type: "float"
+  }
+  attr {
+    name: "num_loss_partitions"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_inner_iterations"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "SdcaShrinkL1"
   input_arg {
-- 
GitLab


From 12e164d1e7c0b197f06d5d3c2ed26318b89b5e4c Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 9 Oct 2018 11:38:15 -0700
Subject: [PATCH 1308/1357] Return ::tensorflow::Status in Toco Graph
 Transformations. PiperOrigin-RevId: 216392908

---
 .../convert_expanddims_to_reshape.cc          | 16 +++++----
 .../convert_pure_conv_to_depthwise.cc         | 24 +++++++------
 .../convert_reorder_axes.cc                   | 15 +++++---
 .../convert_squeeze_to_reshape.cc             | 18 ++++++----
 .../convert_trivial_addn_to_add.cc            | 12 ++++---
 .../convert_trivial_pack_to_reshape.cc        | 16 +++++----
 .../convert_trivial_tile_to_concat.cc         | 16 +++++----
 .../convert_trivial_transpose_to_reshape.cc   | 16 +++++----
 .../create_im2col_arrays.cc                   | 12 ++++---
 .../toco/graph_transformations/dequantize.cc  | 14 +++++---
 .../graph_transformations/drop_fake_quant.cc  | 13 ++++---
 .../drop_im2col_arrays.cc                     | 11 +++---
 .../ensure_bias_vectors.cc                    |  9 +++--
 ...int8_weights_safe_for_fast_int8_kernels.cc | 14 ++++----
 .../fuse_activation_functions.cc              | 22 +++++++-----
 .../fuse_binary_into_following_affine.cc      | 32 +++++++++--------
 .../fuse_binary_into_preceding_affine.cc      | 36 ++++++++++---------
 .../fuse_broadcast_into_following_binary.cc   | 16 +++++----
 .../graph_transformations.cc                  |  2 +-
 .../graph_transformations.h                   | 29 +++++++++------
 .../graph_transformations/hardcode_min_max.cc |  7 ++--
 .../identify_dilated_conv.cc                  | 16 +++++----
 .../identify_l2_normalization.cc              | 22 +++++++-----
 .../graph_transformations/identify_l2_pool.cc | 15 ++++----
 .../graph_transformations/identify_lstm.cc    | 33 +++++++++--------
 .../identify_lstm_merge_inputs.cc             | 16 +++++----
 .../identify_lstm_split_inputs.cc             | 16 +++++----
 .../graph_transformations/identify_prelu.cc   | 19 +++++-----
 .../graph_transformations/identify_relu1.cc   | 17 +++++----
 .../make_initial_dequantize_operator.cc       |  8 +++--
 .../merge_reshape_into_preceding_transpose.cc | 26 +++++++-------
 .../move_binary_operator_before_reshape.cc    | 30 +++++++++-------
 ...gate_activation_function_into_constants.cc | 20 ++++++-----
 .../propagate_array_data_types.cc             | 18 ++++++----
 .../propagate_default_min_max.cc              |  8 +++--
 .../propagate_fake_quant_num_bits.cc          | 12 ++++---
 .../propagate_fixed_sizes.cc                  | 12 ++++---
 .../toco/graph_transformations/quantize.cc    | 13 ++++---
 ...minmax_and_narrow_range_from_fake_quant.cc | 12 ++++---
 .../remove_final_dequantize_op.cc             | 12 ++++---
 .../remove_tensorflow_assert.cc               | 10 ++++--
 .../remove_tensorflow_identity.cc             | 10 ++++--
 .../remove_trivial_binary.cc                  | 22 +++++++-----
 .../remove_trivial_concatenation.cc           | 12 ++++---
 .../remove_trivial_concatenation_input.cc     | 12 ++++---
 .../remove_trivial_fake_quant.cc              | 12 ++++---
 ...emove_trivial_quantized_activation_func.cc | 15 ++++----
 .../remove_trivial_quantized_min_max.cc       | 12 ++++---
 .../remove_trivial_reshape.cc                 | 12 ++++---
 .../remove_trivial_slice.cc                   | 11 +++---
 .../graph_transformations/remove_unused_op.cc | 15 ++++----
 .../reorder_elementwise_unary.cc              | 18 ++++++----
 .../reorder_reshape_transpose.cc              | 24 +++++++------
 .../resolve_batch_normalization.cc            | 12 ++++---
 .../resolve_batch_to_space_nd_attributes.cc   | 21 ++++++-----
 .../resolve_constant_binary.cc                | 16 +++++----
 .../resolve_constant_concatenation.cc         | 24 ++++++++-----
 .../resolve_constant_fake_quant.cc            | 16 +++++----
 .../resolve_constant_fill.cc                  | 26 ++++++++------
 .../resolve_constant_gather.cc                | 20 ++++++-----
 .../resolve_constant_pack.cc                  | 16 +++++----
 .../resolve_constant_random_uniform.cc        | 18 ++++++----
 .../resolve_constant_range.cc                 | 20 ++++++-----
 .../resolve_constant_reshape.cc               | 20 ++++++-----
 .../resolve_constant_select.cc                | 21 ++++++-----
 .../resolve_constant_shape_or_rank.cc         | 16 +++++----
 .../resolve_constant_slice.cc                 | 28 ++++++++-------
 .../resolve_constant_strided_slice.cc         | 20 ++++++-----
 .../resolve_constant_tile.cc                  | 16 +++++----
 .../resolve_constant_transpose.cc             | 18 ++++++----
 .../resolve_constant_unary.cc                 | 28 ++++++++-------
 .../resolve_fake_quant_args_from_vars.cc      | 14 +++++---
 .../resolve_gather_attributes.cc              | 20 +++++++----
 .../resolve_multiply_by_zero.cc               | 30 +++++++++-------
 .../resolve_pad_attributes.cc                 | 17 +++++----
 .../resolve_padv2_attributes.cc               | 17 +++++----
 .../resolve_reduce_attributes.cc              | 30 +++++++++++-----
 .../resolve_reorder_axes.cc                   | 13 ++++---
 .../resolve_reshape_attributes.cc             | 14 +++++---
 .../resolve_slice_attributes.cc               | 22 +++++++-----
 .../resolve_space_to_batch_nd_attributes.cc   | 21 ++++++-----
 .../resolve_squeeze_attributes.cc             | 12 ++++---
 .../resolve_strided_slice_attributes.cc       | 32 ++++++++++-------
 .../resolve_tensorflow_concat.cc              | 12 ++++---
 .../resolve_tensorflow_matmul.cc              | 12 ++++---
 .../resolve_tensorflow_merge.cc               | 12 ++++---
 .../resolve_tensorflow_switch.cc              | 12 ++++---
 .../resolve_transpose_attributes.cc           | 18 ++++++----
 .../shuffle_fc_weights.cc                     | 27 +++++++-------
 .../resolve_constant_concatenation_test.cc    | 15 ++++++--
 .../tests/resolve_constant_unary_test.cc      |  3 +-
 .../unfuse_activation_functions.cc            | 12 ++++---
 .../unpartition_embedding_lookup.cc           | 24 +++++++------
 .../unroll_batch_matmul.cc                    | 15 +++++---
 94 files changed, 1003 insertions(+), 617 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
index 310a88484c..8a945ac435 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
@@ -25,10 +25,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertExpandDimsToReshape::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   auto expand_it = model->operators.begin() + op_index;
   if (expand_it->get()->type != OperatorType::kExpandDims) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   ExpandDimsOperator* expand_op =
       static_cast<ExpandDimsOperator*>(expand_it->get());
@@ -38,18 +41,18 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
   const auto& input_array = model->GetArray(expand_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& axis_array = model->GetArray(expand_op->inputs[1]);
   if (!axis_array.has_shape()) {
     // Yield until input axis array shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(RequiredBufferSizeForShape(axis_array.shape()), 1);
   if (!axis_array.buffer) {
     // Yield until the input axis array is constant
-    return false;
+    return ::tensorflow::Status::OK();
   }
   int axis = axis_array.GetBuffer<ArrayDataType::kInt32>().data[0];
   std::vector<int> reshape_dims(input_array.shape().dims());
@@ -90,7 +93,8 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(expand_it->get(), expand_op);
   model->operators.erase(expand_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc
index e88839be5d..a151012891 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc
@@ -24,29 +24,32 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertPureConvToDepthwise::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   auto conv_it = model->operators.begin() + op_index;
   if (conv_it->get()->type != OperatorType::kConv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* conv_op = static_cast<ConvOperator*>(conv_it->get());
   if (conv_op->stride_width != conv_op->stride_height) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if ((conv_op->dilation_width_factor != 1) ||
       (conv_op->dilation_height_factor != 1)) {
     // Depthwise conv does not support dilation
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& input_array = model->GetArray(conv_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Shapes not propagated yet
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array.shape().dims(3) != 1) {
     // Not a pure convolution: Conv does accumulation across the depth
     // dimension.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& weights_name = conv_op->inputs[1];
@@ -56,15 +59,15 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) {
         "Not changing %s to DepthwiseConv because the weights is consumed by "
         "another op.",
         LogName(*conv_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& weights_array = model->GetArray(weights_name);
   if (!weights_array.buffer) {
     // Yield until the weights are resolved as a constant array.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (weights_array.data_type != ArrayDataType::kFloat) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // At this point we know we have a pure conv. Rewrite it as DepthwiseConv.
   AddMessageF(
@@ -112,7 +115,8 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) {
   }
   *weights_array.mutable_shape()->mutable_dims() = {1, width, height, depth};
   weights_buffer.data = depthwise_conv_weights_data;
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc
index 0d274fc687..4a264e1cf1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc
@@ -86,9 +86,12 @@ TransposeOperator* CreateTransposeFromReorderAxes(
 
 // Converts ReorderAxes into Transpose and Reshape which are compatible with the
 // TFLite interpreter.
-bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertReorderAxes::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   auto reorder_it = model->operators.begin() + op_index;
-  if (reorder_it->get()->type != OperatorType::kReorderAxes) return false;
+  if (reorder_it->get()->type != OperatorType::kReorderAxes)
+    return ::tensorflow::Status::OK();
 
   auto* reorder_op = static_cast<ReorderAxesOperator*>(reorder_it->get());
   CHECK_EQ(reorder_op->inputs.size(), 1);
@@ -113,8 +116,9 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) {
   // Yield if input array contains constants or if output array size has not
   // been adjusted to reflect the permutations in ReorderAxes. ReorderAxes will
   // be merged into a constant array when possible.
-  if (IsConstantParameterArray(*model, constant_input_array_name)) return false;
-  if (!output_array.has_shape()) return false;
+  if (IsConstantParameterArray(*model, constant_input_array_name))
+    return ::tensorflow::Status::OK();
+  if (!output_array.has_shape()) return ::tensorflow::Status::OK();
 
   const auto input_axes_order = reorder_op->input_axes_order;
   const auto output_axes_order = reorder_op->output_axes_order;
@@ -143,7 +147,8 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(reorder_it->get(), reorder_op);
   model->operators.erase(reorder_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc
index 81cedb5dad..a0bd1ed4a4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc
@@ -30,10 +30,13 @@ namespace toco {
 // means that the data layout will never change with this op, just the shape.
 // By converting these to reshapes once we have run shape propagation we allow
 // standard reshape optimization transforms to do their magic.
-bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertSqueezeToReshape::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto squeeze_it = model->operators.begin() + op_index;
   if (squeeze_it->get()->type != OperatorType::kSqueeze) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto squeeze_op = static_cast<SqueezeOperator*>(squeeze_it->get());
   CHECK_EQ(squeeze_op->inputs.size(), 1);
@@ -42,16 +45,16 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
   const auto& input_array = model->GetArray(squeeze_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array.shape().dimensions_count() == 0) {
     // Input array cannot be 0-D.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!model->HasArray(squeeze_op->outputs[0]) ||
       !model->GetArray(squeeze_op->outputs[0]).has_shape()) {
     // Yield until shape propagation has set the output shape for us.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We use the output shape that has been calculated by shape propagation.
@@ -59,7 +62,7 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
 
   // Empty shapes will not work as empty data arrays.
   if (output_shape.dimensions_count() == 0) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto* reshape_op = new TensorFlowReshapeOperator;
@@ -79,7 +82,8 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(squeeze_it->get(), squeeze_op);
   model->operators.erase(squeeze_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc
index dcaaddbf3b..d7cacf77f4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc
@@ -20,10 +20,13 @@ namespace toco {
 
 // This pass will convert an AddN operator with only 2 inputs into a regular Add
 // operator, to which more optimizations may apply.
-bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialAddNToAdd::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto addn_it = model->operators.begin() + op_index;
   if (addn_it->get()->type != OperatorType::kAddN) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   AddNOperator* addn_op = static_cast<AddNOperator*>(addn_it->get());
   CHECK_GE(addn_op->inputs.size(), 2);
@@ -31,7 +34,7 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) {
 
   // We only reduce AddN with N=2 to a regular Add.
   if (addn_op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Copy inputs & outputs to regular Add.
@@ -45,7 +48,8 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) {
   addn_it = add_it + 1;
   CHECK_EQ(addn_it->get(), addn_op);
   model->operators.erase(addn_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc
index 75113a2a8c..78779243a9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc
@@ -25,27 +25,30 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialPackToReshape::Run(Model* model,
+                                                      std::size_t op_index,
+                                                      bool* modified) {
+  *modified = false;
   auto pack_it = model->operators.begin() + op_index;
   if (pack_it->get()->type != OperatorType::kPack) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* pack_op = static_cast<PackOperator*>(pack_it->get());
   if (pack_op->inputs.size() > 1) {
     // Not trivial.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(pack_op->outputs.size(), 1);
 
   const auto& input_array = model->GetArray(pack_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array.shape().dimensions_count() == 0) {
     // Input array cannot be 0-D.
     // (Unsure if this is TF behavior, but was required to get a test to pass.)
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Converting trivial %s to a reshape", LogName(*pack_op));
@@ -75,7 +78,8 @@ bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(pack_it->get(), pack_op);
   model->operators.erase(pack_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc
index b689be0792..b6d712ca44 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc
@@ -21,10 +21,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialTileToConcat::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   auto tile_it = model->operators.begin() + op_index;
   if (tile_it->get()->type != OperatorType::kTile) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* tile_op = static_cast<TransposeOperator*>(tile_it->get());
 
@@ -34,13 +37,13 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
   if (!input_array.has_shape() || !multiples_array.has_shape() ||
       !output_array.has_shape()) {
     // Yield until PropagateFixedSizes has been run on this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Note: We can assume we have error checked inputs in PropagateFixedSizes.
 
   if (!multiples_array.buffer) {
     // Yield until the multiples is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   std::vector<int32> const& multiples =
       multiples_array.GetBuffer<ArrayDataType::kInt32>().data;
@@ -59,7 +62,7 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
     // The tile is non-trivial. Good luck.
     AddMessageF("Tile %s is non-trivial (has more than one multiply dimension)",
                 LogName(*tile_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // The tile is like a concat.
@@ -88,7 +91,8 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(tile_it->get(), tile_op);
   model->operators.erase(tile_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
index 5a36a90b38..e5a96d4335 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
@@ -48,10 +48,13 @@ bool TransposeAffectsMemoryOrder(std::vector<int> perm,
 
 }  // namespace
 
-bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialTransposeToReshape::Run(Model* model,
+                                                           std::size_t op_index,
+                                                           bool* modified) {
+  *modified = false;
   auto transpose_it = model->operators.begin() + op_index;
   if (transpose_it->get()->type != OperatorType::kTranspose) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   TransposeOperator* transpose_op =
       static_cast<TransposeOperator*>(transpose_it->get());
@@ -60,14 +63,14 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
   const auto& output_array = model->GetArray(transpose_op->outputs[0]);
   if (!input_array.has_shape() || !output_array.has_shape()) {
     // Yield until PropagateFixedSizes has been run on this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Note: We can assume we have error checked inputs in PropagateFixedSizes.
 
   // Check that the permutation has propogated.
   std::vector<int> const& perm = transpose_op->perm;
   if (perm.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transpose is trivial if non-unitary dimensions remain in the same
@@ -76,7 +79,7 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
   std::vector<int> const& output_dims = output_array.shape().dims();
 
   if (TransposeAffectsMemoryOrder(perm, input_dims)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transpose is trivial. Replace it with a Reshape op.
@@ -109,7 +112,8 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(transpose_it->get(), transpose_op);
   model->operators.erase(transpose_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc
index 1e68cd678b..ebc0e9afca 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc
@@ -73,18 +73,22 @@ bool ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) {
   return true;
 }
 
-bool CreateIm2colArrays::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status CreateIm2colArrays::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
 
   switch (op->type) {
     case OperatorType::kConv:
-      return ProcessConvOperator(model, static_cast<ConvOperator*>(op));
+      *modified = ProcessConvOperator(model, static_cast<ConvOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kTransposeConv:
-      return ProcessTransposeConvOperator(
+      *modified = ProcessTransposeConvOperator(
           model, static_cast<TransposeConvOperator*>(op));
+      return ::tensorflow::Status::OK();
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc
index 1688586733..2119174950 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc
@@ -186,24 +186,27 @@ bool DequantizeArray(const string& array_name,
 
 }  // namespace
 
-bool Dequantize::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status Dequantize::Run(Model* model, std::size_t op_index,
+                                     bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
   auto* op = op_it->get();
 
   if (op->type == OperatorType::kDequantize) {
     auto& input_array = model->GetArray(op->inputs[0]);
     if (input_array.data_type == ArrayDataType::kFloat) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (input_array.final_data_type != ArrayDataType::kFloat) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     input_array.data_type = ArrayDataType::kFloat;
     input_array.quantization_params = nullptr;
     auto& output_array = model->GetArray(op->outputs[0]);
     output_array.data_type = ArrayDataType::kFloat;
     output_array.quantization_params = nullptr;
-    return RemoveTrivialPassthroughOp(this, model, op_index);
+    *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+    return ::tensorflow::Status::OK();
   }
 
   std::vector<string> arrays;
@@ -220,7 +223,8 @@ bool Dequantize::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc
index 95558ef5ec..1555cf60a1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc
@@ -25,21 +25,23 @@ limitations under the License.
 
 namespace toco {
 
-bool DropFakeQuant::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status DropFakeQuant::Run(Model* model, std::size_t op_index,
+                                        bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(fakequant_base_op);
 
   if (!fakequant_op->minmax) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& output_array = model->GetArray(fakequant_op->outputs[0]);
   if (!output_array.minmax) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Drop min/max inputs
@@ -50,7 +52,8 @@ bool DropFakeQuant::Run(Model* model, std::size_t op_index) {
   }
   fakequant_op->inputs.resize(1);
 
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc
index f7fd878b7e..7d66ea5dd2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc
@@ -19,15 +19,17 @@ limitations under the License.
 
 namespace toco {
 
-bool DropIm2colArrays::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status DropIm2colArrays::Run(Model* model, std::size_t op_index,
+                                           bool* modified) {
+  *modified = false;
   auto conv_it = model->operators.begin() + op_index;
   if (conv_it->get()->type != OperatorType::kConv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* conv_op = static_cast<ConvOperator*>(conv_it->get());
   if (conv_op->outputs.size() < 2) {
     // Conv op does not have im2col.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Drop the im2col array.
@@ -36,7 +38,8 @@ bool DropIm2colArrays::Run(Model* model, std::size_t op_index) {
   conv_op->outputs.resize(1);
   AddMessageF("Dropped an im2col array for %s", LogName(*conv_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc
index e80ed036b3..72b1dda3be 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc
@@ -62,17 +62,20 @@ bool ProcessLinearOperator(Model* model, Operator* op) {
 }
 }  // namespace
 
-bool EnsureBiasVectors::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status EnsureBiasVectors::Run(Model* model, std::size_t op_index,
+                                            bool* modified) {
+  *modified = false;
   auto* op = model->operators[op_index].get();
   if (op->type == OperatorType::kConv ||
       op->type == OperatorType::kDepthwiseConv ||
       op->type == OperatorType::kFullyConnected) {
     if (ProcessLinearOperator(model, op)) {
       AddMessageF("Added bias vector to %s as %s", LogName(*op), op->inputs[2]);
-      return true;
+      *modified = true;
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
index c13fc0de75..60dcd52684 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
@@ -108,8 +108,9 @@ namespace toco {
 // we can foresee these 'fast int8 kernels' to remain important to have into
 // the 2020s.
 //
-bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model,
-                                                   std::size_t op_index) {
+::tensorflow::Status EnsureUint8WeightsSafeForFastInt8Kernels::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto& op = *model->operators[op_index];
   int weights_index = 0;
   switch (op.type) {
@@ -148,16 +149,16 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model,
       // That's why at the moment we only handle operators that use a GEMM
       // (Conv, fully-connected --- note that LSTM merely wraps a
       // fully-connected operator).
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   const string& name = op.inputs[weights_index];
   auto& array = model->GetArray(name);
   if (!array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (array.data_type != ArrayDataType::kUint8) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& buffer_data = array.GetMutableBuffer<ArrayDataType::kUint8>().data;
 
@@ -212,7 +213,8 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model,
     AddMessageF("Tweaked weights values for %s", LogName(op));
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
index c5ce3fcd95..88511a7d3c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
@@ -25,27 +25,30 @@ limitations under the License.
 
 namespace toco {
 
-bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseActivationFunctions::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto ac_it = model->operators.begin() + op_index;
   const auto* ac_op = ac_it->get();
 
   if (ac_op->type != OperatorType::kRelu6 &&
       ac_op->type != OperatorType::kRelu1 &&
       ac_op->type != OperatorType::kRelu) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the op producing the array passed to this activation function
   Operator* op = GetOpWithOutput(*model, ac_op->inputs[0]);
 
-  if (!op) return false;
+  if (!op) return ::tensorflow::Status::OK();
 
   if (CountTrueOutputs(*model, *op) > 1) {
     AddMessageF(
         "Not fusing activation function %s into %s because it has more than "
         "one  consumed output",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->outputs[0], ac_op->inputs[0]);
@@ -57,7 +60,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function into %s because it is consumed by more "
         "than 1 other operator",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsDiscardableArray(*model, op->outputs[0])) {
@@ -65,7 +68,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function %s into %s because output %s it is not "
         "discardable",
         LogName(*ac_op), LogName(*op), op->outputs[0]);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (op->fused_activation_function != FusedActivationFunctionType::kNone) {
@@ -73,7 +76,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function %s into %s because it already has a "
         "fused activation function",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorSupportsFusedActivation(op->type)) {
@@ -81,7 +84,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function %s because the %s op doesn't support "
         "it",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Fusing activation function %s into the preceding %s",
@@ -98,7 +101,8 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
   model->EraseArray(ac_op->inputs[0]);
   op->outputs[0] = ac_op->outputs[0];
   model->operators.erase(ac_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc
index dcbbead517..0de22b8ff4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc
@@ -150,14 +150,17 @@ void FuseMulOrDivParamsIntoFollowingAffine(Model* model, Operator* following_op,
 
 }  // namespace
 
-bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseBinaryIntoFollowingAffine::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   auto* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(binary_op->inputs.size(), 2);
@@ -175,12 +178,12 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can fuse into a constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants
     // propagation, not for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -192,7 +195,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
     if (index_of_constant_input != 1) {
       AddMessageF("Not fusing %s because the denominator is not constant",
                   LogName(*binary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -204,7 +207,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
           "Not fusing %s into the following affine op, because we only know "
           "how to do so when the constant operand is a scalar",
           LogName(*binary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -212,7 +215,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
       FusedActivationFunctionType::kNone) {
     AddMessageF("Not fusing %s because it has a fused activation function",
                 LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   Operator* following_op = GetOpWithInput(*model, binary_op->outputs[0]);
@@ -221,7 +224,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Not fusing %s because it is not consumed by exactly one other op",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (following_op->type != OperatorType::kConv &&
@@ -231,14 +234,14 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the following %s is not of one of the supported "
         "types",
         LogName(*binary_op), LogName(*following_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (following_op->inputs.size() < 3) {
     AddMessageF(
         "Not fusing %s because the following %s does not have a bias vector",
         LogName(*following_op), LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& weights = model->GetArray(following_op->inputs[1]);
@@ -248,7 +251,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the following %s has non-constant weights or "
         "bias arrays",
         LogName(*binary_op), LogName(*following_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Try to fuse the binary params into the following op's params
@@ -260,7 +263,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         AddMessageF(
             "Not fusing %s because the following %s does not use VALID padding",
             LogName(*binary_op), LogName(*following_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
     if (following_op->type == OperatorType::kDepthwiseConv) {
@@ -269,7 +272,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         AddMessageF(
             "Not fusing %s because the following %s does not use VALID padding",
             LogName(*binary_op), LogName(*following_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
     FuseAddOrSubParamsIntoFollowingAffine(model, following_op, binary_op,
@@ -294,7 +297,8 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
     model->EraseArray(old_constant_param_name);
   }
   model->operators.erase(binary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc
index b324631579..b8da756d85 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc
@@ -188,14 +188,17 @@ void FuseMulOrDivParamsIntoPrecedingAffine(Model* model, Operator* preceding_op,
 }
 }  // namespace
 
-bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseBinaryIntoPrecedingAffine::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(binary_op->inputs.size(), 2);
@@ -213,12 +216,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can fuse into a constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants
     // propagation, not for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -230,7 +233,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
     if (index_of_constant_input != 1) {
       AddMessageF("Not fusing %s because the denominator is not constant",
                   LogName(*binary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -239,12 +242,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
   if (!preceding_op) {
     AddMessageF("Not fusing %s because it is not the output of another op",
                 LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   for (const string& output_array : model->flags.output_arrays()) {
     if (preceding_op->outputs[0] == output_array) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -255,7 +258,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the preceding %s is not of one of the supported "
         "types",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (preceding_op->fused_activation_function !=
@@ -264,14 +267,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the preceding %s has a fused activation "
         "function",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (preceding_op->inputs.size() < 3) {
     AddMessageF(
         "Not fusing %s because the preceding %s does not have a bias vector",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& weights_name = preceding_op->inputs[1];
@@ -289,14 +292,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
           "Not fusing %s because the preceding %s has a non-constant bias "
           "array",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (count_ops_consuming_bias > 1) {
       AddMessageF(
           "Not fusing %s because the bias of the preceding %s is consumed by "
           "another op",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   } else {
     if (!weights.buffer || !bias.buffer) {
@@ -304,14 +307,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
           "Not fusing %s because the preceding %s has non-constant weights or "
           "bias arrays",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (count_ops_consuming_weights > 1 || count_ops_consuming_bias > 1) {
       AddMessageF(
           "Not fusing %s because the weights or bias of the preceding %s is "
           "consumed by another op",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -323,7 +326,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the output of the preceding %s is consumed by "
         "another op",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Fusing %s into the preceding %s", LogName(*binary_op),
@@ -352,7 +355,8 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
     model->EraseArray(old_constant_param_name);
   }
   model->operators.erase(binary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc
index 874d8def57..4848867b9a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc
@@ -51,19 +51,22 @@ bool IsBroadcastingOp(const Model& model, Operator* op) {
 // Finds an operation that looks like a broadcast (concat of the same sources
 // along the last dimension) and drops it by relying on the ability of certain
 // binary ops to perform an implicit broadcast.
-bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseBroadcastIntoFollowingBinary::Run(Model* model,
+                                                           std::size_t op_index,
+                                                           bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   auto* binary_op = binary_it->get();
 
   // Test for binary ops of types that we know how to resolve
   if (binary_op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // NOTE: either of these ops may be nullptr if the input array is constant.
@@ -78,14 +81,14 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) {
   if (!is_op_0_broadcast && !is_op_1_broadcast) {
     // Neither input is a broadcast-looking thing.
     AddMessageF("Neither input looks broadcasty");
-    return false;
+    return ::tensorflow::Status::OK();
   } else if (is_op_0_broadcast && is_op_1_broadcast) {
     AddMessageF(
         "Unable to fuse broadcast into %s as both inputs (%s, %s) are "
         "broadcasts",
         LogName(*binary_op), op[0] ? LogName(*op[0]) : "(?)",
         op[1] ? LogName(*op[1]) : "(?)");
-    return false;
+    return ::tensorflow::Status::OK();
   }
   int broadcast_index = is_op_0_broadcast ? 0 : 1;
 
@@ -96,7 +99,8 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) {
   binary_op->inputs[broadcast_index] = op[broadcast_index]->inputs[0];
 
   // We leave the broadcast op in; it'll get cleaned up if it's not used later.
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
index 6961e23690..8b0bc2d865 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
@@ -142,7 +142,7 @@ bool GraphTransformationsPass(int increment, Model* model,
     for (const auto& transformation : transformations) {
       CHECK(!changed_now);
       CHECK(transformation->Messages().empty());
-      changed_now = transformation->Run(model, op_index);
+      CHECK(transformation->Run(model, op_index, &changed_now).ok());
       const char* made_a_change_msg =
           changed_now ? "made a change" : "did NOT make a change";
       const int log_level =
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
index 4d213b3f9c..a89db320ea 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
@@ -27,7 +27,8 @@ namespace toco {
 
 class GraphTransformation {
  public:
-  virtual bool Run(Model* model, std::size_t op_index) = 0;
+  virtual ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                                   bool* modified) = 0;
   virtual const char* Name() const = 0;
   virtual ~GraphTransformation() {}
   // Returns the list of messages that this graph transformation
@@ -104,11 +105,12 @@ class GraphTransformationsSet {
 void RunGraphTransformations(Model* model, const string& message,
                              const GraphTransformationsSet& transformations);
 
-#define DECLARE_GRAPH_TRANSFORMATION(GTName)               \
-  class GTName : public GraphTransformation {              \
-   public:                                                 \
-    bool Run(Model* model, std::size_t op_index) override; \
-    const char* Name() const override { return #GTName; }  \
+#define DECLARE_GRAPH_TRANSFORMATION(GTName)                     \
+  class GTName : public GraphTransformation {                    \
+   public:                                                       \
+    ::tensorflow::Status Run(Model* model, std::size_t op_index, \
+                             bool* modified) override;           \
+    const char* Name() const override { return #GTName; }        \
   };
 
 // List of all graph transformations
@@ -200,7 +202,8 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveGatherAttributes)
 
 class PropagateDefaultMinMax : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "PropagateDefaultMinMax"; }
 
   bool has_any_ranges_defined() const { return !type_ranges_.empty(); }
@@ -218,7 +221,8 @@ class PropagateDefaultMinMax : public GraphTransformation {
 
 class RemoveTrivialReshape : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "RemoveTrivialReshape"; }
   bool treat_expand_dims_as_trivial() const {
     return treat_expand_dims_as_trivial_;
@@ -233,7 +237,8 @@ class RemoveTrivialReshape : public GraphTransformation {
 
 class ResolveConstantFakeQuant : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "ResolveConstantFakeQuant"; }
 
   // True if the num_bits should adjust the final data type.
@@ -250,7 +255,8 @@ class ResolveConstantFakeQuant : public GraphTransformation {
 
 class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override {
     return "EnsureUint8WeightsSafeForFastInt8Kernels";
   }
@@ -267,7 +273,8 @@ class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation {
 
 class IdentifyDilatedConv : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "IdentifyDilatedConv"; }
   bool identify_depthwise_conv() const { return identify_depthwise_conv_; }
   void set_identify_depthwise_conv(bool val) { identify_depthwise_conv_ = val; }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
index 3114fa93e8..72df53548b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
@@ -372,7 +372,9 @@ bool HardcodeMinMaxForLstmCell(Model* model, Operator* op) {
 }
 }  // namespace
 
-bool HardcodeMinMax::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status HardcodeMinMax::Run(Model* model, std::size_t op_index,
+                                         bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   bool changed = false;
@@ -467,7 +469,8 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) {
   if (changed) {
     AddMessageF("Hardcoded min-max through %s", LogName(*op));
   }
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
index aac77eb39e..9e4a3005a1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
@@ -168,7 +168,10 @@ bool ResolveDilatedConv(Model* model, Operator* conv_base_op, Operator* stb_op,
   return true;
 }
 
-bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyDilatedConv::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* stb_op = it->get();
 
@@ -176,17 +179,17 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
   // ***************************************************************************
   // SpaceToBatch Op.
   if (stb_op->type != OperatorType::kSpaceToBatchND) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (stb_op->inputs.size() != 3) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(stb_op->outputs.size(), 1);
   // Extract the dilation factor from Input[1] of SpaceToBatch
   // TODO(mjmatthews): Support 2D dilation factors.
   const auto& block_shape_array = model->GetArray(stb_op->inputs[1]);
   if (!block_shape_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(block_shape_array.shape().dimensions_count(), 1);
   int dilation_factor =
@@ -195,7 +198,7 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
   // Expand Op
   auto* post_stb_op = GetOpWithInput(*model, stb_op->outputs[0]);
   if (!post_stb_op) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   bool has_expand_op = false;
   if (post_stb_op->type == OperatorType::kExpandDims) {
@@ -229,7 +232,8 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc
index b78efd7fc3..78f60f52fb 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc
@@ -39,7 +39,10 @@ std::vector<std::unique_ptr<Operator>>::iterator FindOperator(
 }
 }  // namespace
 
-bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyL2Normalization::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto div_it = model->operators.begin() + op_index;
   const auto* div_or_mul_op = div_it->get();
   OperatorType expected_op_type_producing_div_or_mul_input;
@@ -48,7 +51,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
   } else if (div_or_mul_op->type == OperatorType::kMul) {
     expected_op_type_producing_div_or_mul_input = OperatorType::kRsqrt;
   } else {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(div_or_mul_op->inputs.size(), 2);
   Operator* op_producing_div_or_mul_input[2] = {
@@ -58,14 +61,14 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
   if (!op_producing_div_or_mul_input[1] ||
       op_producing_div_or_mul_input[1]->type !=
           expected_op_type_producing_div_or_mul_input) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   Operator* sqrt_or_rsqrt_op = op_producing_div_or_mul_input[1];
   CHECK_EQ(sqrt_or_rsqrt_op->inputs.size(), 1);
   Operator* op_producing_sqrt_or_rsqrt_input =
       GetOpWithOutput(*model, sqrt_or_rsqrt_op->inputs[0]);
   if (!op_producing_sqrt_or_rsqrt_input) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // There may be an Add or a Maximum here, adding or clamping to a "small"
@@ -105,7 +108,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
           " because the operator producing the input to the square root, %s,"
           ", does not match the expected pattern",
           LogName(*op_producing_sqrt_or_rsqrt_input));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -116,7 +119,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Normalization subgraph: "
         "expected Sum op, got %s",
         LogName(*sum_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   Operator* square_op = GetOpWithOutput(*model, sum_op->inputs[0]);
@@ -125,7 +128,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Normalization subgraph: "
         "expected Square op, got %s",
         LogName(*square_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(square_op->inputs.size(), 1);
@@ -135,7 +138,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Normalization subgraph: %s does not "
         "take the same input as the Mul/Div node",
         LogName(*square_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Create and emplace the new L2Normalization
@@ -162,7 +165,8 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, sqrt_or_rsqrt_op));
   model->EraseArray(div_or_mul_op->inputs[1]);
   model->operators.erase(FindOperator(model, div_or_mul_op));
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc
index 705e73779b..13664bb344 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc
@@ -38,11 +38,13 @@ std::vector<std::unique_ptr<Operator>>::iterator FindOperator(
 }
 }  // namespace
 
-bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyL2Pool::Run(Model* model, std::size_t op_index,
+                                         bool* modified) {
+  *modified = false;
   const auto sqrt_it = model->operators.begin() + op_index;
   const auto* sqrt_op = sqrt_it->get();
   if (sqrt_op->type != OperatorType::kSqrt) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(sqrt_op->inputs.size(), 1);
@@ -56,7 +58,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Giving up trying to identify L2Pool subgraph: "
         "expected AveragePool op, but Sqrt op has no preceding op");
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (prev_to_sqrt_op->type != OperatorType::kAveragePool) {
@@ -64,7 +66,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Pool subgraph: "
         "expected AveragePool op, got %s",
         LogName(*prev_to_sqrt_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   avpool_op = static_cast<const AveragePoolOperator*>(prev_to_sqrt_op);
@@ -77,7 +79,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Pool subgraph: "
         "expected Square op, got %s",
         LogName(*square_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Create and emplace L2Pool node.
@@ -107,7 +109,8 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, avpool_op));
   model->operators.erase(FindOperator(model, sqrt_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc
index c0b014b45e..7fd8f906e2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc
@@ -132,7 +132,9 @@ bool MatchOperatorInputs(const Operator& op, const Model& model,
 
 }  // namespace
 
-bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyLstmCell::Run(Model* model, std::size_t op_index,
+                                           bool* modified) {
+  *modified = false;
   // This LSTM cell identification method is not invariant to commutation of
   // commutative operator inputs. For example, if input[0] and input[1] of the
   // final output multiplication were swapped, this method would not identify it
@@ -143,13 +145,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   auto op_it = model->operators.begin() + op_index;
   Operator* final_output_mul = op_it->get();
   if (final_output_mul->type != OperatorType::kMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   Operator *state_output_tanh, *fc_output_sig;
   if (!MatchOperatorInputs(*final_output_mul, *model, OperatorType::kTanh,
                            &state_output_tanh, OperatorType::kLogistic,
                            &fc_output_sig)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State output TanH
@@ -158,7 +160,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   Operator* state_combine_add;
   if (!MatchOperatorInputs(*state_output_tanh, *model, OperatorType::kAdd,
                            &state_combine_add)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State forget & remember addition
@@ -166,7 +168,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   if (!MatchOperatorInputs(*state_combine_add, *model, OperatorType::kMul,
                            &state_forget_mul, OperatorType::kMul,
                            &state_remember_mul)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const string prev_state = state_forget_mul->inputs[0];
 
@@ -175,7 +177,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   if (!MatchOperatorInputs(*state_forget_mul, *model, OperatorType::kNone,
                            nullptr, OperatorType::kLogistic,
                            &state_forget_sig)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State remember gate
@@ -183,40 +185,40 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   if (!MatchOperatorInputs(*state_remember_mul, *model, OperatorType::kLogistic,
                            &state_remember_sig, OperatorType::kTanh,
                            &state_info_tanh)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State remember "information" activation function
   Operator* fc_output_split;
   if (!MatchOperatorInputs(*state_info_tanh, *model, OperatorType::kSplit,
                            &fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // State remember gate activation function
   Operator* tmp;
   if (!MatchOperatorInputs(*state_remember_sig, *model, OperatorType::kSplit,
                            &tmp) ||
       (tmp != fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // State forget gate activation function
   if (!MatchOperatorInputs(*state_forget_sig, *model, OperatorType::kSplit,
                            &tmp) ||
       (tmp != fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Fully connected output activation function
   if (!MatchOperatorInputs(*fc_output_sig, *model, OperatorType::kSplit,
                            &tmp) ||
       (tmp != fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Fully connected output split
   Operator* fully_connected;
   if (!MatchOperatorInputs(*fc_output_split, *model, OperatorType::kNone,
                            nullptr, OperatorType::kFullyConnected,
                            &fully_connected)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Fully connected op
@@ -225,13 +227,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
                            OperatorType::kConcatenation, &concat_inputs,
                            OperatorType::kNone, nullptr, OperatorType::kNone,
                            nullptr)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (static_cast<FullyConnectedOperator*>(fully_connected)->weights_format !=
       FullyConnectedWeightsFormat::kDefault) {
     // Not yet implemented: experimental shuffled weights in fused LSTM cell.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Emplace a new LSTM cell operator
@@ -300,7 +302,8 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, *fully_connected));
   DeleteArrayIfUnused(concat_inputs->outputs[0], model);
   model->operators.erase(FindOperator(model, *concat_inputs));
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc
index 5b6a984ee1..6ccce923f3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc
@@ -25,19 +25,22 @@ limitations under the License.
 
 namespace toco {
 
-bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status MergeLstmCellInputs::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   // Find lstm cell.
   auto op_it = model->operators.begin() + op_index;
   auto src_op = op_it->get();
   if (src_op->type != OperatorType::kLstmCell) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Already a compact LstmCell. Do not need to merge cell inputs.
   const auto* src_lstm_op = static_cast<LstmCellOperator*>(src_op);
   if (src_lstm_op->kernel_type != LstmCellOperator::KERNEL_FULL ||
       src_lstm_op->inputs.size() != kExtendedLstmInputCount) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Identify prev_activ_input, prev_state_input as required Op inputs,
@@ -45,12 +48,12 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) {
   string prev_activ_input;
   if (!GetMatchingRnnArray(model, src_op->outputs[kOutputTensor],
                            &prev_activ_input)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   string prev_state_input;
   if (!GetMatchingRnnArray(model, src_op->outputs[kCellStateTensor],
                            &prev_state_input)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get LstmCell's cell, input, output size.
@@ -184,7 +187,8 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) {
   DeleteArrayIfUnused(src_op->inputs[kOutputGateBiasTensor], model);
   model->operators.erase(FindOp(*model, src_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc
index 46d1fce50e..ad5120e2aa 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc
@@ -25,19 +25,22 @@ limitations under the License.
 
 namespace toco {
 
-bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status SplitLstmCellInputs::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   // Find lstm cell.
   auto op_it = model->operators.begin() + op_index;
   auto curr_op = op_it->get();
   if (curr_op->type != OperatorType::kLstmCell) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* curr_lstm_op = static_cast<LstmCellOperator*>(curr_op);
   // Already an extended LstmCell. Do not need to split cell inputs.
   if (curr_lstm_op->kernel_type != LstmCellOperator::KERNEL_BASIC ||
       curr_lstm_op->inputs.size() != LstmCellOperator::NUM_INPUTS) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Make sure the WEIGHTS_INPUT and BIASES_INPUT are constant arrays,
@@ -46,13 +49,13 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) {
           *model, curr_op->inputs[LstmCellOperator::WEIGHTS_INPUT]) ||
       !IsConstantParameterArray(
           *model, curr_op->inputs[LstmCellOperator::BIASES_INPUT])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Make sure propagate_fixed_sizes has defined the size of the output.
   if (!model->GetArray(curr_op->outputs[LstmCellOperator::ACTIV_OUTPUT])
            .has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Emplace a new LstmCell operator with extended inputs (kernel/lstm.cc).
@@ -168,7 +171,8 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) {
   DeleteArrayIfUnused(curr_op->inputs[LstmCellOperator::BIASES_INPUT], model);
   model->operators.erase(FindOp(*model, curr_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc
index b90a156a0d..c11fee4dc9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc
@@ -43,13 +43,15 @@ limitations under the License.
 
 namespace toco {
 
-bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyPRelu::Run(Model* model, std::size_t op_index,
+                                        bool* modified) {
+  *modified = false;
   const auto add_op_it = model->operators.begin() + op_index;
   const auto* add_op = add_op_it->get();
   if (add_op == nullptr || add_op->type != OperatorType::kAdd ||
       add_op->inputs.size() != 2 ||
       add_op->fused_activation_function != FusedActivationFunctionType::kNone) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* relu_input_op = GetOpWithOutput(*model, add_op->inputs[0]);
@@ -57,7 +59,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
       relu_input_op->inputs.size() != 1 ||
       relu_input_op->fused_activation_function !=
           FusedActivationFunctionType::kNone) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // TODO(ycling): Both Add and Mul are commutative. Support the case where
@@ -66,7 +68,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
   if (mul_op == nullptr || mul_op->type != OperatorType::kMul ||
       mul_op->inputs.size() != 2 ||
       mul_op->fused_activation_function != FusedActivationFunctionType::kNone) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto neg_alpha_tensor_name = mul_op->inputs[0];
@@ -75,7 +77,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
 
   if (relu_neg_input_op == nullptr ||
       relu_neg_input_op->inputs.size() != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const Operator* final_input_op;
@@ -92,13 +94,13 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
         relu_neg_input_op->type != OperatorType::kRelu ||
         relu_neg_input_op->fused_activation_function !=
             FusedActivationFunctionType::kNone) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     final_input_op = neg_input_op;
   }
 
   if (relu_input_op->inputs[0] != final_input_op->inputs[0]) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto input_tensor_name = relu_input_op->inputs[0];
@@ -128,7 +130,8 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
   // intermediate tensors aren't used by other ops, those will be removed by
   // other graph transformation rules.
   model->operators.erase(FindOp(*model, add_op));
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc
index 94820a0166..51d0629362 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc
@@ -56,13 +56,15 @@ int GetSingleScalarInputIndexOfBinaryOp(Model* model, const Operator* op,
 }
 }  // namespace
 
-bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyRelu1::Run(Model* model, std::size_t op_index,
+                                        bool* modified) {
+  *modified = false;
   // Follow sequences of min+max and max+min. First get the leading op.
   const auto op_it = model->operators.begin() + op_index;
   const auto* op_0 = op_it->get();
   if (op_0->type != OperatorType::kMinimum &&
       op_0->type != OperatorType::kMaximum) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the paired op and ensure it's the counter to the first.
@@ -71,17 +73,17 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
       (op_1->type != OperatorType::kMinimum &&
        op_1->type != OperatorType::kMaximum) ||
       op_0->type == op_1->type) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* min_op = op_0->type == OperatorType::kMinimum ? op_0 : op_1;
   const auto* max_op = op_0->type == OperatorType::kMaximum ? op_0 : op_1;
 
   if (min_op->inputs.size() != 2 || max_op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (min_op->outputs.size() != 1 || max_op->outputs.size() != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the original input to the min+max pair.
@@ -90,7 +92,7 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
   int max_scalar_input_index =
       GetSingleScalarInputIndexOfBinaryOp(model, max_op, -1.0f);
   if (min_scalar_input_index == -1 || max_scalar_input_index == -1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   int op_0_scalar_input_index =
       op_0 == min_op ? min_scalar_input_index : max_scalar_input_index;
@@ -111,7 +113,8 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, op_0));
   model->operators.erase(FindOperator(model, op_1));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc
index f684de08ab..5bf17d5b4c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc
@@ -97,7 +97,10 @@ bool AddDequantizeOperatorToInput(const string& input_name, const Operator* op,
   return true;
 }
 
-bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status MakeInitialDequantizeOperator::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   // This is effectively a transformation applied to edges.  We iterate over the
   // specified node (op) and proceed for input edges.
   const auto it = model->operators.begin() + op_index;
@@ -114,7 +117,8 @@ bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) {
       }
     }
   }
-  return change_made;
+  *modified = change_made;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc
index 95bc7f7d4b..06de9b1cd8 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc
@@ -102,18 +102,19 @@ std::vector<int32> ReshapeToTranspose(const Model& model,
 // to be merged if the reshape does not affect memory ordering and does not
 // affects the number of dimensions. This only occurs when only unary dimensions
 // are shifting position.
-bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
-                                             std::size_t op_index) {
+::tensorflow::Status MergeReshapeIntoPrecedingTranspose::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* reshape_op = ConvertOperator<TensorFlowReshapeOperator*>(
       it->get(), OperatorType::kReshape);
 
   if (reshape_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const string intermediate_name = reshape_op->inputs[0];
@@ -121,13 +122,13 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
 
   // Guarantee the input is only consume by the reshape.
   if (CountOpsWithInput(*model, intermediate_name) != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check for the parent operator.
   const auto& transpose_it = FindOpWithOutput(*model, intermediate_name);
   if (transpose_it == model->operators.end()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the parent operator and guarantee it is a transpose.
@@ -135,16 +136,16 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
       transpose_it->get(), OperatorType::kTranspose);
 
   if (transpose_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!ReshapeIsEquivalentToTranspose(*model, reshape_op,
                                       false /*allow_extra_unary_dimensions*/)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that the intermediate is not an output array.
@@ -153,7 +154,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
         "Cannot fuse %s and %s as it would invalidate the transpose "
         "output array.",
         LogName(*transpose_op), LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Merging operations %s and %s", LogName(*transpose_op),
@@ -172,7 +173,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
 
   // Remove the reshape as passthrough operation.
   if (!RemoveTrivialPassthroughOp(this, model, op_index)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Update transpose_op's constant buffer to contain the new permutation.
@@ -184,7 +185,8 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
   // transpose_ops's shape will likely has changed.
   model->GetArray(transpose_op->outputs[0]).clear_shape();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc
index 7f44c65285..f0d8d924ad 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc
@@ -54,7 +54,10 @@ bool IsTailOfShape(const Shape& tail, const Shape& shape) {
 //
 // Note we are testing for one particular case of a broader set of possible
 // binary-reshape op transformations. This transformation could be generalized.
-bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status MoveBinaryOperatorBeforeReshape::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   Operator* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
@@ -69,7 +72,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
       binary_op->type != OperatorType::kLessEqual &&
       binary_op->type != OperatorType::kGreater &&
       binary_op->type != OperatorType::kGreaterEqual) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // BINARY OP INPUT CHECKS
@@ -81,11 +84,11 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
   if (!input_is_const[0] && !input_is_const[1]) {
     // To limit our scope, we require one constant input. Though there's no
     // reason this transformation wouldn't work with all variable inputs.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_is_const[0] && input_is_const[1]) {
     // Both inputs are constants. Leave this for constants propagation.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int constant_input_idx = input_is_const[0] ? 0 : 1;
   const int variable_input_idx = input_is_const[0] ? 1 : 0;
@@ -98,13 +101,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Not moving %s because it's non-constant input shape is not resolved.",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsTailOfShape(
           model->GetArray(binary_op->inputs[constant_input_idx]).shape(),
           model->GetArray(binary_op->inputs[variable_input_idx]).shape())) {
     // Constant array shape must be the latter part of the variable shape.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // RESHAPE OP CHECKS
@@ -113,13 +116,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
   if (reshape_it == model->operators.end()) {
     AddMessageF("Not moving %s because it's variable input is not connected.",
                 LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   Operator* reshape_op = reshape_it->get();
   if (reshape_op->type != OperatorType::kReshape) {
     AddMessageF("Not moving %s because the preceding %s is not a reshape op",
                 LogName(*binary_op), LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& reshape_input_array = model->GetArray(reshape_op->inputs[0]);
   if (!reshape_input_array.has_shape()) {
@@ -127,14 +130,14 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
         "Not moving %s because it's non-constant input shape is not resolved "
         "yet",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsTailOfShape(
           model->GetArray(binary_op->inputs[constant_input_idx]).shape(),
           model->GetArray(reshape_op->outputs[0]).shape())) {
     // Constant array shape must be the latter part of the binary op output
     // shape.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // EXTRA CHECKS ON CONNECTING ARRAY
@@ -143,7 +146,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
       AddMessageF(
           "Not moving %s because the output of reshape op %s is an output op.",
           LogName(*binary_op), LogName(*reshape_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
   int count_ops_consuming_output =
@@ -154,7 +157,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
         "Not moving %s because the output of reshape op %s is consumed by "
         "another op",
         LogName(*binary_op), LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // SWAP ORDER OF BINARY AND RESHAPE OPS
@@ -172,7 +175,8 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
   // Clear binary output shape so it will be re-propagated
   model->GetArray(binary_op->outputs[0]).clear_shape();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc
index cf17c49b10..9c1ed2b732 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc
@@ -26,20 +26,21 @@ limitations under the License.
 
 namespace toco {
 
-bool PropagateActivationFunctionIntoConstants::Run(Model* model,
-                                                   std::size_t op_index) {
+::tensorflow::Status PropagateActivationFunctionIntoConstants::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto ac_it = model->operators.begin() + op_index;
   const auto* ac_op = ac_it->get();
   if (ac_op->type != OperatorType::kRelu6 &&
       ac_op->type != OperatorType::kRelu1 &&
       ac_op->type != OperatorType::kRelu) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the op producing the array passed to this activation function.
   auto* src_op = GetOpWithOutput(*model, ac_op->inputs[0]);
   if (!src_op) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Ensure the src_op is not used without the activation function applied.
@@ -57,7 +58,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
       src_op_input = src_op->inputs[0];
       break;
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
   CHECK_EQ(src_op->outputs[0], ac_op->inputs[0]);
 
@@ -69,7 +70,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
         "Not propagating activation function %s into %s:%s because it is not "
         "constant",
         LogName(*ac_op), LogName(*src_op), src_op_input);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the array we'll be working with and ensure it's a compatible type.
@@ -79,7 +80,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
         "Not propagating activation function %s into %s:%s because it is "
         "non-float data",
         LogName(*ac_op), LogName(*src_op), src_op_input);
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& const_array_data =
       const_array.GetMutableBuffer<ArrayDataType::kFloat>().data;
@@ -108,14 +109,15 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
       }
       default:
         LOG(FATAL) << "Unsupported activation function " << LogName(*ac_op);
-        return false;
+        return ::tensorflow::Status::OK();
     }
     const_array_data[i] = new_value;
   }
 
   AddMessageF("Propagated activation function %s into %s:%s", LogName(*ac_op),
               LogName(*src_op), src_op_input);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 323eefcd3a..40cd6dea82 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -32,7 +32,10 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op,
 }
 }  // namespace
 
-bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateArrayDataTypes::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
 
@@ -40,7 +43,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
   for (const auto& input : op->inputs) {
     if (!model->IsOptionalArray(input) &&
         model->GetArray(input).data_type == ArrayDataType::kNone) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
   // Record data types of output before processing, so we can see at the
@@ -131,7 +134,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
       auto* rand_op = static_cast<RandomUniformOperator*>(op);
       // The output type of RandomUniform is specified with an attribute
       if (rand_op->dtype == ArrayDataType::kNone) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       CHECK_EQ(op->outputs.size(), 1);
       SetDataTypeForAllOutputs(model, op, rand_op->dtype);
@@ -153,7 +156,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
       // This can make unsupported_op->output_data_types have more elements than
       // op->outputs.
       if (unsupported_op->output_data_types.size() < op->outputs.size()) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       for (int i = 0; i < op->outputs.size(); ++i) {
         const string& output = op->outputs[i];
@@ -164,7 +167,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
     }
     case OperatorType::kExpandDims: {
       // Yield on ExpandDim until it is converted to Reshape
-      return false;
+      return ::tensorflow::Status::OK();
     }
     case OperatorType::kSelect: {
       // Select produces outputs with the same type as their 2nd input
@@ -248,10 +251,11 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
   // Return true if any output data type changed, false if none changed.
   for (const auto& output : op->outputs) {
     if (old_output_data_types[output] != model->GetArray(output).data_type) {
-      return true;
+      *modified = true;
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc
index cd078ef189..3cf191436d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc
@@ -39,7 +39,10 @@ bool SupportsMinMax(const Array& array) {
 // When provided a set of min/max values for uint8 arrays this will rescale
 // the values for other data types as required and preserving the floating point
 // range within the new type.
-bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateDefaultMinMax::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* op = it->get();
 
@@ -61,7 +64,8 @@ bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return did_change;
+  *modified = did_change;
+  return ::tensorflow::Status::OK();
 }
 
 // Sets the min/max on the given array, adjusting the reference_minmax for the
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc
index 3ad6b0ec6f..d0113237ce 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc
@@ -277,11 +277,14 @@ bool RecursivelyForwardPropagateDataType(GraphTransformation* transformation,
 // nice logging and integration with the graphviz video dumping mode.
 // In general you should not copy this style of transformation and stick to
 // local-only changes as seen in the other transformations.
-bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateFakeQuantNumBits::Run(Model* model,
+                                                    std::size_t op_index,
+                                                    bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if (op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(op);
 
@@ -290,7 +293,7 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) {
                                            &quantized_data_type)) {
     AddMessageF("FakeQuant op %s num_bits=%d is out of range, ignoring",
                 LogName(*op), fakequant_op->num_bits);
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& final_minmax = *fakequant_op->minmax;
 
@@ -311,7 +314,8 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) {
   did_change |=
       RecursivelyForwardPropagateDataType(this, model, op, quantized_data_type);
 
-  return did_change;
+  *modified = did_change;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index d056a8add7..5496e2093e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -1622,7 +1622,10 @@ void ProcessUnpackOperator(Model* model, UnpackOperator* op) {
 
 }  // namespace
 
-bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateFixedSizes::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   std::unordered_map<string, std::vector<int>> old_output_dims;
@@ -1836,7 +1839,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
           static_cast<TensorFlowUnsupportedOperator*>(op);
       // Attribute can be not specified, ignore it.
       if (unsupported_op->output_shapes.size() < op->outputs.size()) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       for (int i = 0; i < op->outputs.size(); ++i) {
         const string& output = op->outputs[i];
@@ -1886,10 +1889,11 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
         (old_output_dims[output] != model->GetArray(output).shape().dims())) {
       AddMessageF("Set shape of %s to [%s]", output,
                   absl::StrJoin(model->GetArray(output).shape().dims(), ","));
-      return true;
+      *modified = true;
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
index fb299c31b7..29ea17dc61 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
@@ -439,7 +439,9 @@ void FixMinMaxPostQuantization(GraphTransformation* transformation,
 
 }  // namespace
 
-bool Quantize::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status Quantize::Run(Model* model, std::size_t op_index,
+                                   bool* modified) {
+  *modified = false;
   // Our general "quantization" graph transformation consists in replacing
   //   QuantizedInputArrays[] ->
   //     DequantizeOperators[] ->
@@ -460,7 +462,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
   auto& op = *model->operators[op_index];
   if (op.type == OperatorType::kDequantize ||
       op.type == OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Our assumption here is that the input arrays are already quantized -
@@ -497,7 +499,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
       if (!array.minmax && !array.buffer) {
         LOG(ERROR) << "Can't quantize input array " << input
                    << " because it lacks min/max info";
-        return false;
+        return ::tensorflow::Status::OK();
       }
       const auto* other_op = GetOpWithOutput(*model, input);
       if (other_op && other_op->type != OperatorType::kDequantize) {
@@ -507,7 +509,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
             "which means that we should yield and let other ops "
             "get quantized first",
             LogName(op), input);
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
   }
@@ -672,7 +674,8 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
index eaa9d3bcda..0c32218ff2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
@@ -51,18 +51,19 @@ bool ApplyAttrsToArray(GraphTransformation* transformation, Model* model,
 
 }  // end namespace
 
-bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model,
-                                                     std::size_t op_index) {
+::tensorflow::Status ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fq_op = static_cast<FakeQuantOperator*>(fakequant_base_op);
 
   if (!fq_op->minmax) {
     // Need to be resolved first by ResolveFakeQuantArgsFromVars.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // At this point, this FakeQuantOperator should have a MinMax
@@ -74,7 +75,8 @@ bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model,
   bool changed = false;
   changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]);
   changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]);
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc
index c3b2709a33..fe8023ab8f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc
@@ -25,11 +25,14 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveFinalDequantizeOp::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto dequantize_it = model->operators.begin() + op_index;
   const auto* dequantize_op = dequantize_it->get();
   if (dequantize_op->type != OperatorType::kDequantize) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& output = dequantize_op->outputs[0];
   // We can remove any dequantize op whose output is not consumed by
@@ -38,7 +41,7 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) {
   // in the middle of the graph might be designated as an output
   // array.
   if (CountOpsWithInput(*model, output)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // If one of the model's output arrays was actually the Dequantize op's
@@ -53,7 +56,8 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) {
   AddMessageF("Removed final %s", LogName(*dequantize_op));
   model->EraseArray(output);
   model->operators.erase(dequantize_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc
index 73ad326299..be8c0acc7b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc
@@ -23,11 +23,14 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTensorFlowAssert::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto assert_it = model->operators.begin() + op_index;
   const auto* assert_op = assert_it->get();
   if (assert_op->type != OperatorType::kAssert) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   bool changed = false;
@@ -54,7 +57,8 @@ bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) {
 
   // That's it. We can stop here, no need to duplicate the work that
   // RemoveUnusedOp will do removing this now-unused node.
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc
index 7ec7752f25..37fe5fa3d7 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc
@@ -25,14 +25,18 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTensorFlowIdentity::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTensorFlowIdentity::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   const auto passthru_it = model->operators.begin() + op_index;
   const auto* passthru_op = passthru_it->get();
   if (passthru_op->type != OperatorType::kIdentity) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc
index 0dfdc40e4c..68c6fb65c5 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc
@@ -46,14 +46,17 @@ bool AreAllBufferElementsEqualTo(const std::vector<Scalar>& buffer_data,
 // For example, an Add operator is trivial if
 // one of its operands is constant 0, a Mul operator is trivial
 // if one of its operands is constant 1, etc.
-bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialBinaryOperator::Run(Model* model,
+                                                      std::size_t op_index,
+                                                      bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   auto* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(binary_op->inputs.size(), 2);
@@ -66,12 +69,12 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can resolve here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants
     // propagation, not for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -84,7 +87,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto& input_array_1 = model->GetArray(binary_op->inputs[1]);
   if (!input_array_0.has_shape() || !input_array_1.has_shape()) {
     // Both input shapes must be known.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array_0.shape().dimensions_count() ==
           input_array_1.shape().dimensions_count() &&
@@ -94,7 +97,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
         "(lhs %s, rhs %s)",
         LogName(*binary_op), ShapeToString(input_array_0.shape()),
         ShapeToString(input_array_1.shape()));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Now check if the constant operand makes this binary
@@ -103,7 +106,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
       model->GetArray(binary_op->inputs[index_of_constant_input]);
   // For now, we only handle floats here.
   if (constant_input_array.data_type != ArrayDataType::kFloat) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& constant_input_float_data =
       constant_input_array.GetBuffer<ArrayDataType::kFloat>().data;
@@ -121,12 +124,13 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
   }
 
   if (!is_trivial) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Now we know that this node is trivial, so we can remove it.
   AddMessageF("Removing trivial %s", LogName(*binary_op));
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc
index 3ceb93d8ee..faaa2a828e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc
@@ -25,16 +25,20 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTrivialConcatenation::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialConcatenation::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   const auto concat_it = model->operators.begin() + op_index;
   auto* concat_op = concat_it->get();
   if (concat_op->type != OperatorType::kConcatenation) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (concat_op->inputs.size() != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
index 936854a04f..ccfc181fe0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
@@ -25,7 +25,10 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialConcatenationInput::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   // TensorFlow allows Concatenation nodes to have 0-D inputs,
   // and they are then treated as empty i.e. omitted from concatenation,
   // in violation of the notion that 0-D is equivalent to 1x1x1x1.
@@ -36,7 +39,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
   const auto concat_it = model->operators.begin() + op_index;
   auto* concat_op = concat_it->get();
   if (concat_op->type != OperatorType::kConcatenation) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   std::vector<string> trivial_inputs;
   std::vector<string> nontrivial_inputs;
@@ -52,7 +55,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
   }
 
   if (trivial_inputs.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Drop trivial inputs.
@@ -63,7 +66,8 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
     }
   }
   concat_op->inputs = nontrivial_inputs;
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc
index 2c8d04440f..5448a816bc 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc
@@ -64,23 +64,27 @@ bool IsFakeQuantTrivial(GraphTransformation* transformation, const Model& model,
 }  // namespace
 
 // Removes FakeQuant ops that are trivial (have no effect, are redundant, etc).
-bool RemoveTrivialFakeQuant::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialFakeQuant::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
   auto* op = op_it->get();
   if (op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(op);
 
   if (!IsFakeQuantTrivial(this, *model, *fakequant_op)) {
     AddMessageF("%s is not trivial", LogName(*fakequant_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Removing trivial %s", LogName(*fakequant_op));
 
   CHECK_EQ(fakequant_op->inputs.size(), 1);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc
index 752560e075..4133815285 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc
@@ -94,12 +94,13 @@ bool IsTrivialFusedActivationFunc(
 // Attempts to remove both fused and unfused activation functions if the
 // quantization params indicate that the representable values fall inside the
 // activation range.
-bool RemoveTrivialQuantizedActivationFunc::Run(Model* model,
-                                               std::size_t op_index) {
+::tensorflow::Status RemoveTrivialQuantizedActivationFunc::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if (op->inputs.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (IsTrivialUnfusedActivationFunc(this, *model, op->type, op->inputs[0])) {
@@ -107,7 +108,8 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model,
         "Removing trivial unfused activation function %s because the input "
         "minmax imply at least as tight a clamp anyway.",
         LogName(*op));
-    return RemoveTrivialPassthroughOp(this, model, op_index);
+    *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+    return ::tensorflow::Status::OK();
   }
   if (IsTrivialFusedActivationFunc(this, *model, op->fused_activation_function,
                                    op->outputs[0])) {
@@ -117,9 +119,10 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model,
         "because the output quantization parameters imply at least as tight "
         "a clamp anyway.",
         LogName(*op));
-    return true;
+    *modified = true;
+    return ::tensorflow::Status::OK();
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc
index 142c876b15..0f0ae4af69 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc
@@ -69,22 +69,26 @@ bool IsTrivialMinMax(GraphTransformation* transformation, const Model& model,
 
 // Attempts to remove min/max functions if the quantization params indicate that
 // the representable values fall inside the clip range.
-bool RemoveTrivialQuantizedMinMax::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialQuantizedMinMax::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if ((op->type != OperatorType::kMinimum &&
        op->type != OperatorType::kMaximum) ||
       op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (IsTrivialMinMax(this, *model, op->type, op->inputs[0], op->inputs[1])) {
     AddMessageF(
         "Removing trivial min/max %s because the quantization parameters imply "
         "at least as tight a clamp anyway.",
         LogName(*op));
-    return RemoveTrivialPassthroughOp(this, model, op_index);
+    *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+    return ::tensorflow::Status::OK();
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc
index 5295eeccec..1caf944879 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc
@@ -81,22 +81,26 @@ bool IsReshapeTrivial(const Model& model, const Operator& op,
 
 }  // namespace
 
-bool RemoveTrivialReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialReshape::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto reshape_it = model->operators.begin() + op_index;
   auto* reshape_op = reshape_it->get();
   if (reshape_op->type != OperatorType::kReshape) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsReshapeTrivial(*model, *reshape_op, this)) {
     AddMessageF("%s is not trivial", LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Removing trivial %s", LogName(*reshape_op));
 
   CHECK_EQ(reshape_op->inputs.size(), 2);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc
index 0cbbcd7c81..dcb0148d58 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc
@@ -49,21 +49,24 @@ bool IsSliceTrivial(const Model& model, const Operator& op,
 
 }  // namespace
 
-bool RemoveTrivialSlice::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialSlice::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   const auto reshape_it = model->operators.begin() + op_index;
   auto* slice_op = reshape_it->get();
   if (slice_op->type != OperatorType::kSlice) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsSliceTrivial(*model, *slice_op, this)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Removing trivial %s", LogName(*slice_op));
 
   CHECK_EQ(slice_op->inputs.size(), 3);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
index dde91234a8..3cd5d06bae 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
@@ -25,7 +25,9 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveUnusedOp::Run(Model* model, std::size_t op_index,
+                                         bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* op = it->get();
 
@@ -58,7 +60,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     }
     for (const string& output_array : model->flags.output_arrays()) {
       if (output == output_array) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
     for (const auto& rnn_state : model->flags.rnn_states()) {
@@ -67,19 +69,19 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
         if (!IsDiscardableArray(*model, rnn_state.back_edge_source_array()) ||
             !IsDiscardableArray(*model, rnn_state.state_array()) ||
             CountOpsWithInput(*model, rnn_state.state_array())) {
-          return false;
+          return ::tensorflow::Status::OK();
         }
       }
     }
     if (CountOpsWithInput(*model, output)) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
   if (op->unresolved_outputs) {
     AddMessageF("Not discarding %s because it has unresolved outputs.",
                 LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Discarding %s because none of its outputs is used.",
@@ -105,7 +107,8 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     }
   }
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc
index 550de83018..3c8d411089 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc
@@ -63,29 +63,32 @@ bool IsMoveOperator(OperatorType optype) {
 
 // Swap elementwise operators such that all value operators occur before all
 // element move operators, e.g. negation then transpose.
-bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ReorderElementwiseUnary::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto element_op_it = model->operators.begin() + op_index;
   std::unique_ptr<Operator>& element_op = *element_op_it;
   if (!IsElementwiseOperator(element_op->type)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const string intermediate_name = element_op->inputs[0];
   auto it = FindOpWithOutput(*model, intermediate_name);
   if (it == model->operators.end()) {
     AddMessageF("No preceding operator");
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   std::unique_ptr<Operator>& move_op = *it;
   if (!IsMoveOperator(move_op->type)) {
     AddMessageF("Preceding operator is not a move operator");
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (CountOpsWithInput(*model, intermediate_name) != 1) {
     AddMessageF("Input %s used elsewhere", intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that the intermediate is discardable.
@@ -94,7 +97,7 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) {
         "Cannot swap elementwise as it would invalidate %s which is "
         "an output array.",
         intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // op->inputs may change so we need to keep a value by copy.
@@ -147,7 +150,8 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) {
   // Swap the order of the operators.
   element_op.swap(move_op);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc
index c907a597cb..a2c06e71e8 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc
@@ -101,37 +101,40 @@ std::vector<int> ComputeNewPerm(std::vector<int> input_dims,
 
 // Swaps reshape-transpose to transpose-reshape whenever possible. This is
 // possible when the reshape does not affect memory ordering.
-bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ReorderReshapeTranspose::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto transpose_it = model->operators.begin() + op_index;
 
   TransposeOperator* transpose_op = ConvertOperator<TransposeOperator*>(
       transpose_it->get(), OperatorType::kTranspose);
 
   if (transpose_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) {
     // Wait for values to propagate.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the operator that produces the transpose op.
   auto reshape_it = FindOpWithOutput(*model, transpose_op->inputs[0]);
   if (reshape_it == model->operators.end()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   TensorFlowReshapeOperator* reshape_op =
       ConvertOperator<TensorFlowReshapeOperator*>(reshape_it->get(),
                                                   OperatorType::kReshape);
   if (reshape_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Ignore if the reshape is uninitialized.
   if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Need to copy to keep static if permutated.
@@ -142,7 +145,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
   // Intermediate should not be consumed by any other operators.
   if (CountOpsWithInput(*model, intermediate_name) != 1) {
     AddMessageF("Input %s used elsewhere", intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that the intermediate is not an output array.
@@ -151,7 +154,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
         "Cannot reorder reshape-transpose as it would invalidate %s which is "
         "an output array.",
         intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the arrays.
@@ -173,7 +176,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
   // dimensions then it can be moved between the transpose.
   if (!ReshapeIsEquivalentToTranspose(*model, reshape_op,
                                       true /*allow_extra_unary_dims*/)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsDiscardableArray(*model, output_name)) {
@@ -242,7 +245,8 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
   // Swap the order of the operators.
   transpose_it->swap(*reshape_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc
index 8f2c1f8162..a79779f55d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc
@@ -25,10 +25,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveBatchNormalization::Run(Model* model,
+                                                    std::size_t op_index,
+                                                    bool* modified) {
+  *modified = false;
   auto bn_it = model->operators.begin() + op_index;
   if (bn_it->get()->type != OperatorType::kBatchNormalization) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* bn_op =
       static_cast<const BatchNormalizationOperator*>(bn_it->get());
@@ -53,7 +56,7 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) {
   // so we need to exit early if these buffers don't exist (i.e. if the params
   // haven't yet been resolved as constants).
   if (!mean_array.buffer || !multiplier_array.buffer || !offset_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Create the new Mul, Add operators
@@ -142,7 +145,8 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) {
   DCHECK_EQ(bn_it->get(), bn_op);
   model->operators.erase(bn_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
index b8b35161d7..d039d7d690 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
@@ -24,31 +24,35 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveBatchToSpaceNDAttributes::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
-  if (op_it->get()->type != OperatorType::kBatchToSpaceND) return false;
+  if (op_it->get()->type != OperatorType::kBatchToSpaceND)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<BatchToSpaceNDOperator*>(op_it->get());
 
   // The attributes are resolved only when the 3 attributes (block_shape,
   // before_crops, after_crops) are all constant.
   if (!op->block_shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->inputs.size(), 3);
   if (!IsConstantParameterArray(*model, op->inputs[1]) ||
       !IsConstantParameterArray(*model, op->inputs[2]))
-    return false;
+    return ::tensorflow::Status::OK();
 
   // Handle crops
   const auto& crops_array = model->GetArray(op->inputs[2]);
-  if (!crops_array.has_shape()) return false;
+  if (!crops_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& crops_dims = crops_array.shape().dims();
   if (crops_dims.size() != 2) {
     // Code only handles crops of 2 dimensions. Perhaps another transformation
     // will delete this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const std::vector<int>& crops_buffer =
       crops_array.GetBuffer<ArrayDataType::kInt32>().data;
@@ -59,7 +63,7 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
 
   // Handle block_shape
   const auto& block_shape_array = model->GetArray(op->inputs[1]);
-  if (!block_shape_array.has_shape()) return false;
+  if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& block_shape_dims = block_shape_array.shape().dims();
   CHECK_EQ(block_shape_dims.size(), 1);
   const std::vector<int>& block_shape_buffer =
@@ -68,7 +72,8 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
     op->block_shape.push_back(block_shape_buffer[i]);
   }
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
index f7e5aa6609..586f546a30 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
@@ -188,7 +188,10 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
 }
 }  // namespace
 
-bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantBinaryOperator::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
   // Test for binary ops of types that we know how to resolve
@@ -204,7 +207,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
       binary_op->type != OperatorType::kLessEqual &&
       binary_op->type != OperatorType::kGreater &&
       binary_op->type != OperatorType::kGreaterEqual) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(binary_op->inputs.size(), 2);
 
@@ -212,13 +215,13 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto& input1_array = model->GetArray(binary_op->inputs[1]);
   // Check if both inputs are constant parameters.
   if (!input0_array.buffer || !input1_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& output_array = model->GetArray(binary_op->outputs[0]);
   // Yield until the output array dims have been resolved.
   if (!output_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // At the moment we don't want to care about fused activation functions.
@@ -229,7 +232,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Not resolving constant %s because it has a fused activation function",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that input data types agree.
@@ -253,7 +256,8 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   AddMessageF("Resolved constant %s to the equivalent constant array",
               LogName(*binary_op));
   model->operators.erase(binary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index d916ae0ddf..0c60fdfeb3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -135,11 +135,14 @@ void SetMinMaxForConcatenedArray(GraphTransformation* transformation,
 }  // namespace
 
 // Resolves the concatenation operator if all its inputs are constant arrays.
-bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantConcatenation::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto concat_it = model->operators.begin() + op_index;
   const auto* concat_base_op = concat_it->get();
   if (concat_base_op->type != OperatorType::kConcatenation) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* concat_op =
       static_cast<const ConcatenationOperator*>(concat_base_op);
@@ -149,11 +152,15 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
     // We  also make sure the shapes of the input arrays are known and they are
     // all discardable.
     const Operator* input_op = GetOpWithOutput(*model, input_name);
-    if (input_op) return false;
-    if (!IsConstantParameterArray(*model, input_name)) return false;
-    if (!model->GetArray(input_name).has_shape()) return false;
-    if (model->GetArray(input_name).quantization_params) return false;
-    if (!IsDiscardableArray(*model, input_name)) return false;
+    if (input_op) return ::tensorflow::Status::OK();
+    if (!IsConstantParameterArray(*model, input_name))
+      return ::tensorflow::Status::OK();
+    if (!model->GetArray(input_name).has_shape())
+      return ::tensorflow::Status::OK();
+    if (model->GetArray(input_name).quantization_params)
+      return ::tensorflow::Status::OK();
+    if (!IsDiscardableArray(*model, input_name))
+      return ::tensorflow::Status::OK();
   }
 
   const int concatenation_axis = concat_op->axis;
@@ -205,7 +212,8 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
 
   // Remove concatenate operator.
   model->operators.erase(concat_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
index f5f2f77460..4f330fdd84 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
@@ -59,11 +59,14 @@ void GetBoundsForQuantizedDataType(ArrayDataType quantized_data_type,
   }
 }
 
-bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantFakeQuant::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   const auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* fakequant_op =
@@ -71,12 +74,12 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
 
   // Yield until the fakequant MinMax has been resolved.
   if (!fakequant_op->minmax) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transformation only applies when the input array is constant.
   if (!IsConstantParameterArray(*model, fakequant_op->inputs[0])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(fakequant_op->inputs[0]);
@@ -87,7 +90,7 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   if (!InferQuantizedDataTypeFromFakeQuant(*fakequant_op,
                                            &quantized_data_type)) {
     AddMessageF("Unsupported FakeQuant num_bits=%d", fakequant_op->num_bits);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Resolving constant %s", LogName(*fakequant_op));
@@ -136,7 +139,8 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   }
   model->operators.erase(fakequant_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
index f6f95481b5..5400d395ff 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
@@ -41,11 +41,14 @@ bool ComputeFillArray(Model* model, FillOperator* op) {
   return true;
 }
 
-bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantFill::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   const auto fill_it = model->operators.begin() + op_index;
   auto* base_op = fill_it->get();
   if (base_op->type != OperatorType::kFill) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* op = static_cast<FillOperator*>(base_op);
 
@@ -55,44 +58,44 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& val_array = model->GetArray(op->inputs[1]);
   if (!val_array.has_shape()) {
     // Yield until the value shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsConstantParameterArray(*model, op->inputs[1])) {
     // Yield until the value is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(RequiredBufferSizeForShape(val_array.shape()), 1);
 
   switch (output_array.data_type) {
     case ArrayDataType::kFloat:
       if (!ComputeFillArray<ArrayDataType::kFloat>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kUint8:
       if (!ComputeFillArray<ArrayDataType::kUint8>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt32:
       if (!ComputeFillArray<ArrayDataType::kInt32>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt64:
       if (!ComputeFillArray<ArrayDataType::kInt64>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     default:
@@ -114,7 +117,8 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   // Erase the operator
   model->operators.erase(fill_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
index 36d7dad0ce..6e3a6a69c2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
@@ -61,11 +61,14 @@ inline void Gather(const Array& input_array, int input_rank,
 // Resolves a constant Gather operation.
 // This simply performs the gather and produces the output array with the
 // appropriate values.
-bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantGather::Run(Model* model,
+                                                std::size_t op_index,
+                                                bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kGather) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const GatherOperator*>(base_op);
 
@@ -74,28 +77,28 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!op->axis) {
     // Yield until axis has been set by ResolveGatherAttributes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (op->axis.value() != 0) {
     // Only handling axis=0 for now.
     AddMessageF("%s has axis %d; only axis=0 is supported", LogName(*op),
                 op->axis.value());
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(op->inputs[0]);
   const Array& coords_array = model->GetArray(op->inputs[1]);
@@ -142,7 +145,8 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
index e86616574d..e257ec37e8 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
@@ -49,11 +49,14 @@ void Pack(Model* model, PackOperator const& op) {
 
 }  // namespace
 
-bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantPack::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kPack) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const PackOperator*>(base_op);
 
@@ -62,18 +65,18 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   for (const auto& input : op->inputs) {
     if (!IsConstantParameterArray(*model, input)) {
       // Yield if any input is mutable
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -111,7 +114,8 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
index 88d06d7dc7..db0fbba528 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
@@ -59,11 +59,14 @@ bool ComputeRandomUniformArray(Model* model, RandomUniformOperator* op) {
   return true;
 }
 
-bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantRandomUniform::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* base_op = it->get();
   if (base_op->type != OperatorType::kRandomUniform) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* op = static_cast<RandomUniformOperator*>(base_op);
 
@@ -73,12 +76,12 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if ((op->seed == 0) && (op->seed2 == 0)) {
@@ -86,13 +89,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
                  << "\" is truly random (using /dev/random system entropy). "
                     "Therefore, cannot resolve as constant. Set \"seed\" or "
                     "\"seed2\" attr non-zero to fix this";
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   switch (output_array.data_type) {
     case ArrayDataType::kFloat:
       if (!ComputeRandomUniformArray<ArrayDataType::kFloat>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     // For future support of double or half.
@@ -110,7 +113,8 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   // Erase the operator
   model->operators.erase(it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
index 1a0ba9e2bc..069d4dafaa 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
@@ -19,11 +19,14 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantRange::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* base_op = it->get();
   if (base_op->type != OperatorType::kRange) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* op = static_cast<RangeOperator*>(base_op);
 
@@ -31,23 +34,23 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   const auto& start_array = model->GetArray(op->inputs[0]);
   if (!start_array.has_shape()) {
     // Yield until all input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& limit_array = model->GetArray(op->inputs[1]);
   if (!limit_array.has_shape()) {
     // Yield until all input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& delta_array = model->GetArray(op->inputs[2]);
   if (!delta_array.has_shape()) {
     // Yield until all input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   for (const auto& input : op->inputs) {
     if (!IsConstantParameterArray(*model, input)) {
       // yield if any input is mutable
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -55,7 +58,7 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(RequiredBufferSizeForShape(start_array.shape()), 1)
@@ -101,7 +104,8 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   // Delete the operator
   model->operators.erase(it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
index a6f665b5f0..fccecef600 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
@@ -22,11 +22,14 @@ limitations under the License.
 namespace toco {
 
 // Resolves a constant reshape operation by copying the buffer.
-bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantReshape::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kReshape) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const TensorFlowReshapeOperator*>(base_op);
 
@@ -36,17 +39,17 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const Array& input_array = model->GetArray(op->inputs[0]);
@@ -54,7 +57,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
     AddMessageF("Constant reshape is non-trivial (%s -> %s)",
                 ShapeToString(input_array.shape()),
                 ShapeToString(output_array.shape()));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(!output_array.buffer);
@@ -95,7 +98,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
     default:
       LOG(FATAL) << "Unsupported data type: "
                  << ArrayDataTypeName(input_array.data_type);
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Resolving constant reshape of %s", LogName(*op));
@@ -112,7 +115,8 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
index e880a3f44d..ab1e0bd7a0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
@@ -27,11 +27,14 @@ namespace toco {
 // This implementation is looking strictly for all-or-nothing on the select
 // condition. It's possible to enhance this by looking per-element and possibly
 // producing a Mul op.
-bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantSelect::Run(Model* model,
+                                                std::size_t op_index,
+                                                bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kSelect) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const SelectOperator*>(base_op);
 
@@ -40,23 +43,23 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require the cond input to be constant.
   if (!IsConstantParameterArray(*model, op->inputs[0])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& cond_array = model->GetArray(op->inputs[0]);
   CHECK(cond_array.data_type == ArrayDataType::kBool)
       << "Only bool conditions are supported";
   const auto& cond_data = cond_array.GetBuffer<ArrayDataType::kBool>().data;
   if (cond_data.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check if the condition is the same for all elements.
@@ -67,12 +70,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
           "Cannot resolve %s as constant; cond_array has differing "
           "per-element values",
           LogName(*op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
   // Pass-through the selected input.
-  return RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2);
+  *modified =
+      RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
index 8a0e3e8995..a1756a8207 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
@@ -19,29 +19,32 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantShapeOrRank::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* op = it->get();
   if (!(op->type == OperatorType::kShape || op->type == OperatorType::kRank)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been resolved
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until the input array's shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Compute the output
@@ -65,7 +68,8 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
   }
 
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
index b35c3e19c4..869dfae98e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
@@ -86,11 +86,14 @@ bool Slice(SliceOperator const& op, Array const& input_array,
 
 }  // namespace
 
-bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantSlice::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kSlice) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const SliceOperator* op = static_cast<const SliceOperator*>(base_op);
@@ -99,49 +102,49 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (op->begin.empty() || op->size.empty()) {
     // Attributes have not resolved yet.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until the value shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsConstantParameterArray(*model, op->inputs[0])) {
     // Yield until the value is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(!output_array.buffer);
   switch (output_array.data_type) {
     case ArrayDataType::kFloat:
       if (!Slice<ArrayDataType::kFloat>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kUint8:
       if (!Slice<ArrayDataType::kUint8>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt32:
       if (!Slice<ArrayDataType::kInt32>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt64:
       if (!Slice<ArrayDataType::kInt64>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     default:
@@ -159,7 +162,8 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   // Erase the operator
   model->operators.erase(it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
index 8853ed87e6..99c5a64662 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
@@ -103,11 +103,14 @@ void StridedSlice(StridedSliceOperator const& op, Array const& input_array,
 
 }  // anonymous namespace
 
-bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantStridedSlice::Run(Model* model,
+                                                      std::size_t op_index,
+                                                      bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kStridedSlice) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const StridedSliceOperator* op =
@@ -117,28 +120,28 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (op->start_indices.empty() || op->stop_indices.empty() ||
       op->strides.empty()) {
     // Attributes have not resolved yet.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until the value shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsConstantParameterArray(*model, op->inputs[0])) {
     // Yield until the value is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(!output_array.buffer);
@@ -164,7 +167,8 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
 
   DeleteOpAndArraysIfUnused(model, it->get());
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
index 5cfa1a5582..c5e93c9bad 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
@@ -97,11 +97,14 @@ inline void Tile(const Array& input_array, const Array& multiples_array,
 }  // namespace
 
 // Resolves a constant Tile operation.
-bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantTile::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kTile) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const TensorFlowTileOperator*>(base_op);
 
@@ -110,17 +113,17 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(op->inputs[0]);
   const Array& multiples_array = model->GetArray(op->inputs[1]);
@@ -159,7 +162,8 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
index fe15dfa06f..b759c4d6dd 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
@@ -101,11 +101,14 @@ void Transpose(Model* model, const Array& input_array,
 
 }  // namespace
 
-bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantTranspose::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kTranspose) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const TransposeOperator*>(base_op);
 
@@ -114,17 +117,17 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(op->inputs[0]);
 
@@ -132,7 +135,7 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   if (op->perm.empty()) {
     // Yield until perm has been populated by ResolveTransposeAttributes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We currently only support 1-4 dimensions.
@@ -174,7 +177,8 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index 5364eebbc9..3034c1b1eb 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -112,7 +112,10 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
   return true;
 }
 
-bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantUnaryOperator::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
   // Test for unary ops of types that we know how to resolve.
@@ -133,28 +136,28 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     case OperatorType::kRelu:
       break;
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   // Check if the input is a constant parameter.
   if (!IsConstantParameterArray(*model, unary_op->inputs[0])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // if the unary op involves a tensor required by a rnn state, ignore it
   for (const auto& rnn_state : model->flags.rnn_states()) {
     if (unary_op->inputs[0] == rnn_state.back_edge_source_array()) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (unary_op->inputs[0] == rnn_state.state_array()) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
   auto& output_array = model->GetArray(unary_op->outputs[0]);
   if (!output_array.has_shape()) {
     // Yield until the output array dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // At the moment we don't want to care about fused activation functions.
@@ -166,7 +169,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
         "Not resolving constant %s "
         " because it has a fused activation function",
         LogName(*unary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // The min-max is only copied for ops that copy data without arithmetic.
@@ -187,7 +190,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
           "Not resolving constant %s because we currently only support casting "
           "to float",
           LogName(*unary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (cast_op->src_data_type != input_array.buffer->type) {
       AddMessageF(
@@ -197,7 +200,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     }
   } else {
     if (input_array.buffer->type != ArrayDataType::kFloat) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     input_float_data = &(input_array.GetBuffer<ArrayDataType::kFloat>().data);
   }
@@ -239,7 +242,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     CHECK_EQ(unary_op->inputs.size(), 2) << "Sum needs 2 inputs";
     if (!IsConstantParameterArray(*model, unary_op->inputs[1])) {
       AddMessageF("Axis input is non-constant");
-      return false;
+      return ::tensorflow::Status::OK();
     }
     auto& axis_array = model->GetArray(unary_op->inputs[1]);
     CHECK(axis_array.data_type == ArrayDataType::kInt32);
@@ -336,7 +339,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
         default:
           LOG(FATAL) << "Unsupported activation function "
                      << LogName(*unary_op);
-          return false;
+          return ::tensorflow::Status::OK();
       }
       output_float_data[i] = new_value;
     }
@@ -351,7 +354,8 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   AddMessageF("Resolved constant %s to the equivalent constant array",
               LogName(*unary_op));
   model->operators.erase(unary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
index 0dda1fd0b3..eed971c1d5 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
@@ -25,17 +25,20 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveFakeQuantArgsFromVars::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(fakequant_base_op);
 
   if (fakequant_op->minmax) {
     // Already resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(fakequant_op->inputs.size(), 3);
@@ -43,7 +46,7 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) {
   // resolved to constant arrays.
   for (int i = 1; i <= 2; i++) {
     if (!IsConstantParameterArray(*model, fakequant_op->inputs[i])) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -74,7 +77,8 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) {
     DeleteArrayIfUsedOnce(fakequant_op->inputs[i], model);
   }
   fakequant_op->inputs.resize(1);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc
index ce825c91af..69209b8dec 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc
@@ -24,20 +24,25 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveGatherAttributes::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto* gather_op = model->operators[op_index].get();
-  if (gather_op->type != OperatorType::kGather) return false;
+  if (gather_op->type != OperatorType::kGather)
+    return ::tensorflow::Status::OK();
   auto* op = static_cast<GatherOperator*>(gather_op);
 
   if (op->axis) {
     // Attributes already resolved
-    return false;
+    return ::tensorflow::Status::OK();
   }
-  if (op->inputs.size() != 3) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
+  if (op->inputs.size() != 3) return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[2]))
+    return ::tensorflow::Status::OK();
 
   const auto& indices_array = model->GetArray(op->inputs[2]);
-  if (!indices_array.has_shape()) return false;
+  if (!indices_array.has_shape()) return ::tensorflow::Status::OK();
   const auto& axis_data = indices_array.GetBuffer<ArrayDataType::kInt32>().data;
   CHECK_EQ(axis_data.size(), 1)
       << "Multidimensional gather not supported on " << LogName(*op);
@@ -47,7 +52,8 @@ bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) {
   DeleteArrayIfUsedOnce(op->inputs[2], model);
   op->inputs.resize(2);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
index b2b2ea151b..ac94f45321 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
@@ -51,27 +51,30 @@ void FillArrayWithZeros(Array* array) {
 // Removes a multiplication by array of constant zeros by making the output
 // array an array of constant zeros and removing the input arrays if they are no
 // longer needed.
-bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveMultiplyByZero::Run(Model* model,
+                                                std::size_t op_index,
+                                                bool* modified) {
+  *modified = false;
   const auto mul_it = model->operators.begin() + op_index;
   auto* mul_op = mul_it->get();
   if (mul_op->type != OperatorType::kMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& output_array_name = mul_op->outputs[0];
   auto& output_array = model->GetArray(output_array_name);
 
   if (!IsDiscardableArray(*model, output_array_name)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Yield if the output shape is not known yet.
   if (!output_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transformation only handles the case where one operand is all 0's and
@@ -83,12 +86,12 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can resolve here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants propagation, not
     // for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -105,7 +108,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kFloat>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kFloat>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kFloat>(&output_array);
     } break;
@@ -114,7 +117,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kUint8>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kUint8>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kUint8>(&output_array);
     } break;
@@ -123,7 +126,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kInt32>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kInt32>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kInt32>(&output_array);
     } break;
@@ -132,14 +135,14 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kInt64>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kInt64>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kInt64>(&output_array);
     } break;
     default:
       AddMessageF(
           "Cannot resolve multiply by 0 because of unsupported data type\n");
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   // Erase input arrays to the multiply if no longer used
@@ -149,7 +152,8 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   // Erase the multiply operator.
   model->operators.erase(mul_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc
index 8a8e723cf7..adc87753bc 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc
@@ -24,19 +24,23 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolvePadAttributes::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto pad_it = model->operators.begin() + op_index;
   auto* pad_op = pad_it->get();
-  if (pad_op->type != OperatorType::kPad) return false;
+  if (pad_op->type != OperatorType::kPad) return ::tensorflow::Status::OK();
 
   auto* op = static_cast<PadOperator*>(pad_op);
-  if (!op->left_padding.empty()) return false;
+  if (!op->left_padding.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 2);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
 
   const auto& array = model->GetArray(op->inputs[1]);
-  if (!array.has_shape()) return false;
+  if (!array.has_shape()) return ::tensorflow::Status::OK();
 
   const std::vector<int>& dims = array.shape().dims();
   CHECK_EQ(dims.size(), 2);
@@ -50,6 +54,7 @@ bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) {
 
   // TODO(dkalenichenko): Delete the extra input?
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc
index ebb023e342..1f0f17a37a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc
@@ -24,19 +24,23 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolvePadV2Attributes::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto pad_it = model->operators.begin() + op_index;
   auto* pad_op = pad_it->get();
-  if (pad_op->type != OperatorType::kPadV2) return false;
+  if (pad_op->type != OperatorType::kPadV2) return ::tensorflow::Status::OK();
 
   auto* op = static_cast<PadV2Operator*>(pad_op);
-  if (!op->left_padding.empty()) return false;
+  if (!op->left_padding.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 3);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
 
   const auto& array = model->GetArray(op->inputs[1]);
-  if (!array.has_shape()) return false;
+  if (!array.has_shape()) return ::tensorflow::Status::OK();
 
   const std::vector<int>& dims = array.shape().dims();
   CHECK_EQ(dims.size(), 2);
@@ -50,6 +54,7 @@ bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) {
 
   // TODO(dkalenichenko): Delete the extra input?
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc
index 73198ac7c0..c3246ab90f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc
@@ -39,23 +39,37 @@ bool ResolveAttributes(Model* model, T* op) {
   return true;
 }
 
-bool ResolveReduceAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveReduceAttributes::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   Operator* op = model->operators[op_index].get();
   switch (op->type) {
     case OperatorType::kMean:
-      return ResolveAttributes(model, static_cast<MeanOperator*>(op));
+      *modified = ResolveAttributes(model, static_cast<MeanOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kSum:
-      return ResolveAttributes(model, static_cast<TensorFlowSumOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowSumOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kReduceProd:
-      return ResolveAttributes(model, static_cast<TensorFlowProdOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowProdOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kReduceMin:
-      return ResolveAttributes(model, static_cast<TensorFlowMinOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowMinOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kReduceMax:
-      return ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kAny:
-      return ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      return ::tensorflow::Status::OK();
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
index 8e150db6fa..ee5c4810e6 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
@@ -78,11 +78,13 @@ void ReorderAxes(AxesOrder input_axes_order, AxesOrder output_axes_order,
   }
 }
 
-bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveReorderAxes::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if (op->type != OperatorType::kReorderAxes) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* reorder_op = static_cast<ReorderAxesOperator*>(op);
 
@@ -93,11 +95,11 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
   auto& input_array = model->GetArray(input_array_name);
   auto& output_array = model->GetArray(output_array_name);
   if (!input_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Yield until output dims have been resolved.
   if (!output_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Reorder the input array dims and buffer data
   if (input_array.buffer->type == ArrayDataType::kFloat) {
@@ -120,7 +122,8 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
   DeleteOpAndArraysIfUnused(model, op);
   RenameArray(model, output_array_name, input_array_name);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc
index b615c9a545..7b7a59264f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc
@@ -25,25 +25,29 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveReshapeAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveReshapeAttributes::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   const auto reshape_it = model->operators.begin() + op_index;
   auto* reshape_op = reshape_it->get();
   if (reshape_op->type != OperatorType::kReshape) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto* op = static_cast<TensorFlowReshapeOperator*>(reshape_op);
 
-  if (!op->shape.empty()) return false;
+  if (!op->shape.empty()) return ::tensorflow::Status::OK();
 
   if (IsConstantParameterArray(*model, reshape_op->inputs[1])) {
     const auto& constant_input_array = model->GetArray(reshape_op->inputs[1]);
     op->shape = constant_input_array.GetBuffer<ArrayDataType::kInt32>().data;
   }
 
-  if (op->shape.empty()) return false;
+  if (op->shape.empty()) return ::tensorflow::Status::OK();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc
index e760d08e5a..5a838168de 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc
@@ -24,29 +24,35 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveSliceAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveSliceAttributes::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto slice_it = model->operators.begin() + op_index;
   auto* slice_op = slice_it->get();
-  if (slice_op->type != OperatorType::kSlice) return false;
+  if (slice_op->type != OperatorType::kSlice) return ::tensorflow::Status::OK();
 
   auto* op = static_cast<SliceOperator*>(slice_op);
-  if (!op->begin.empty()) return false;
+  if (!op->begin.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 3);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[2]))
+    return ::tensorflow::Status::OK();
 
   const auto& begin_array = model->GetArray(op->inputs[1]);
-  if (!begin_array.has_shape()) return false;
+  if (!begin_array.has_shape()) return ::tensorflow::Status::OK();
 
   const auto& size_array = model->GetArray(op->inputs[2]);
-  if (!size_array.has_shape()) return false;
+  if (!size_array.has_shape()) return ::tensorflow::Status::OK();
 
   op->begin = begin_array.GetBuffer<ArrayDataType::kInt32>().data;
   op->size = size_array.GetBuffer<ArrayDataType::kInt32>().data;
 
   // TODO(dkalenichenko): Delete the extra inputs?
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
index fab50bec1f..3804145c4f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
@@ -24,16 +24,20 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveSpaceToBatchNDAttributes::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
-  if (op_it->get()->type != OperatorType::kSpaceToBatchND) return false;
+  if (op_it->get()->type != OperatorType::kSpaceToBatchND)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<SpaceToBatchNDOperator*>(op_it->get());
 
   // The attributes are resolved only when the 3 attributes (block_shape,
   // before_paddings, after_paddings) are all constant.
   if (!op->block_shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const int block_shape_index = 1;
@@ -42,16 +46,16 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 3);
   if (!IsConstantParameterArray(*model, op->inputs[block_shape_index]) ||
       !IsConstantParameterArray(*model, op->inputs[paddings_index]))
-    return false;
+    return ::tensorflow::Status::OK();
 
   // Handle paddings.
   const auto& paddings_array = model->GetArray(op->inputs[paddings_index]);
-  if (!paddings_array.has_shape()) return false;
+  if (!paddings_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& paddings_dims = paddings_array.shape().dims();
   if (paddings_dims.size() != 2) {
     // Code only handles padding of 2 dimensions. Perhaps another transformation
     // will delete this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const std::vector<int>& paddings_buffer =
       paddings_array.GetBuffer<ArrayDataType::kInt32>().data;
@@ -63,7 +67,7 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
   // Handle block_shape.
   const auto& block_shape_array =
       model->GetArray(op->inputs[block_shape_index]);
-  if (!block_shape_array.has_shape()) return false;
+  if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& block_shape_dims = block_shape_array.shape().dims();
   CHECK_EQ(block_shape_dims.size(), 1);
   const std::vector<int>& block_shape_buffer =
@@ -72,7 +76,8 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
     op->block_shape.push_back(block_shape_buffer[i]);
   }
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
index e8bb85704e..c601b0774e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
@@ -25,10 +25,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveSqueezeAttributes::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   auto* squeeze_op = model->operators[op_index].get();
   if (squeeze_op->type != OperatorType::kSqueeze) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   DCHECK_EQ(squeeze_op->inputs.size(), 1);
   DCHECK_EQ(squeeze_op->outputs.size(), 1);
@@ -42,10 +45,11 @@ bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) {
           "Reshape op",
           LogName(*squeeze_op));
 
-      return RemoveTrivialPassthroughOp(this, model, op_index);
+      *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
index 65132d7d1e..f54f5b42a1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
@@ -37,40 +37,47 @@ int PadAttributeArray(Array* attribute_array, std::vector<int> pad_values,
   return mask;
 }
 
-bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveStridedSliceAttributes::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto slice_it = model->operators.begin() + op_index;
   auto* slice_op = slice_it->get();
-  if (slice_op->type != OperatorType::kStridedSlice) return false;
+  if (slice_op->type != OperatorType::kStridedSlice)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<StridedSliceOperator*>(slice_op);
   if (!op->start_indices.empty()) {
     // We have already resolved these attributes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->inputs.size(), 4);
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // We require the dimensionality of the input to pad the indices
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& start_array = model->GetArray(op->inputs[1]);
-  if (!start_array.has_shape()) return false;
+  if (!start_array.has_shape()) return ::tensorflow::Status::OK();
   if (toco::RequiredBufferSizeForShape(start_array.shape()) > 4) {
     // Only 1-4D arrays are supported for now.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& stop_array = model->GetArray(op->inputs[2]);
-  if (!stop_array.has_shape()) return false;
+  if (!stop_array.has_shape()) return ::tensorflow::Status::OK();
 
   auto& stride_array = model->GetArray(op->inputs[3]);
-  if (!stride_array.has_shape()) return false;
+  if (!stride_array.has_shape()) return ::tensorflow::Status::OK();
 
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[3])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[2]))
+    return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[3]))
+    return ::tensorflow::Status::OK();
 
   int num_input_axes = input_array.shape().dimensions_count();
   int start_indices_size = start_array.shape().dims(0);
@@ -112,6 +119,7 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
   op->stop_indices = stop_array.GetBuffer<ArrayDataType::kInt32>().data;
   op->strides = stride_array.GetBuffer<ArrayDataType::kInt32>().data;
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
index fa5ee89933..4927ccd95d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
@@ -25,12 +25,15 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowConcat::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto concat_it = model->operators.begin() + op_index;
   const auto* tf_concat_op = concat_it->get();
   if (tf_concat_op->type != OperatorType::kConcat &&
       tf_concat_op->type != OperatorType::kConcatV2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_GE(tf_concat_op->inputs.size(), 2);
@@ -54,7 +57,7 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
   if (!axis_array.buffer) {
     AddMessageF("Waiting for the axis of %s to be resolved to a constant",
                 LogName(*tf_concat_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(axis_array.data_type == ArrayDataType::kInt32);
@@ -79,7 +82,8 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
   }
   // Remove the TensorFlowConcat op
   model->operators.erase(concat_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
index 65346c4fe4..da039da546 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
@@ -55,10 +55,13 @@ TransposeOperator* FindTransposeOpWithInput(const Model& model,
 
 }  // namespace
 
-bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowMatMul::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto matmul_it = model->operators.begin() + op_index;
   if (matmul_it->get()->type != OperatorType::kMatMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* matmul_op =
       static_cast<const TensorFlowMatMulOperator*>(matmul_it->get());
@@ -73,7 +76,7 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
         "Not replacing %s by a FullyConnected operator, because it has "
         "the transpose_a attribute",
         LogName(*matmul_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Reorder the axes on the second input. TensorFlow uses row-major ordering
@@ -198,7 +201,8 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
 
   // erase the MatMul operator
   model->operators.erase(matmul_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc
index 4edffe3d48..9beea3e937 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc
@@ -24,11 +24,14 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowMerge::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto merge_it = model->operators.begin() + op_index;
   const auto* merge_op = merge_it->get();
   if (merge_op->type != OperatorType::kMerge) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We need to yield until this Merge node has only 1 input, which will mean
@@ -37,7 +40,7 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) {
   // non-selected inputs, so that at some point there will be only 1 input left.
   if (merge_op->inputs.size() > 1) {
     AddMessageF("Waiting for %s to be resolved", LogName(*merge_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Now that the merge node has 1 input exactly, it is the same as an Identity
@@ -57,7 +60,8 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) {
   AddMessageF("Removing already-resolved %s", LogName(*merge_op));
   model->EraseArray(merge_op->outputs[0]);
   model->operators.erase(merge_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
index 8bef440afd..e215981b42 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
@@ -24,11 +24,14 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowSwitch::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto switch_it = model->operators.begin() + op_index;
   const auto* switch_op = switch_it->get();
   if (switch_op->type != OperatorType::kSwitch) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(switch_op->inputs.size(), 2);
@@ -40,7 +43,7 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Waiting for the boolean predicate of %s to be resolved to a constant",
         LogName(*switch_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // The predicate should be boolean, and should consist of a single value.
@@ -119,7 +122,8 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
   // Remove the switch node itself.
   AddMessageF("Removing already-resolved %s", LogName(*switch_op));
   model->operators.erase(switch_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
index a657ee00af..aa7945391c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
@@ -24,19 +24,24 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTransposeAttributes::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
-  if (op_it->get()->type != OperatorType::kTranspose) return false;
+  if (op_it->get()->type != OperatorType::kTranspose)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<TransposeOperator*>(op_it->get());
-  if (!op->perm.empty()) return false;
+  if (!op->perm.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 2);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
 
   // Handling perm.
   const auto& perm_array = model->GetArray(op->inputs[1]);
-  if (!perm_array.has_shape()) return false;
+  if (!perm_array.has_shape()) return ::tensorflow::Status::OK();
 
   const std::vector<int>& perm_dims = perm_array.shape().dims();
   CHECK_EQ(perm_dims.size(), 1);
@@ -47,7 +52,8 @@ bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) {
     op->perm.push_back(perm_buffer[i]);
   }
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc
index 22c258cec5..e9f24a29ab 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc
@@ -24,15 +24,17 @@ limitations under the License.
 
 namespace toco {
 
-bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ShuffleFCWeights::Run(Model* model, std::size_t op_index,
+                                           bool* modified) {
+  *modified = false;
   Operator* op = model->operators[op_index].get();
   if (op->type != OperatorType::kFullyConnected) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   FullyConnectedOperator* fc_op = static_cast<FullyConnectedOperator*>(op);
   // Exit if this FC op already has shuffled weights
   if (fc_op->weights_format != FullyConnectedWeightsFormat::kDefault) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(fc_op->inputs[0]);
   const string& weights_name = fc_op->inputs[1];
@@ -46,11 +48,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
       output_array.data_type != ArrayDataType::kInt16 ||
       !input_array.quantization_params || !weights_array.quantization_params ||
       !output_array.quantization_params) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the shapes aren't known
   if (!input_array.has_shape() || !weights_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if, based on the known shapes, this FC op is not a GEMV.
   // The shuffling of FC weights is only useful to enable fast GEMV paths.
@@ -64,7 +66,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
           "the input shape is not 1D or 2D (possibly with additional inner "
           "dimensions of size 1)",
           LogName(*op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
   if (input_shape.dims(0) != 1 && input_shape.dims(0) != 4) {
@@ -73,7 +75,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
         "the input shape's leading dimension, i.e. the 'batch size', is not "
         "equal to 1 or 4",
         LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the weights shape isn't an integral multiple of the shuffled
   // block shape, 4x16. We don't want to have to write code dealing with
@@ -88,7 +90,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
   // two.
   const Shape& weights_shape = weights_array.shape();
   if (weights_shape.dimensions_count() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int rows = weights_shape.dims(0);
   const int cols = weights_shape.dims(1);
@@ -97,11 +99,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
         "Not applying experimental shuffling to the weights of %s because its "
         "shape isn't a multiple of the shuffling block shape, 4x16",
         LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the weights aren't already a constant array.
   if (!weights_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the weights are used by more than one op.
   if (CountOpsWithInput(*model, weights_name) != 1) {
@@ -109,7 +111,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
         "Not applying experimental shuffling to the weights of %s because that "
         "array is consumed by other operators",
         LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Compute the shuffled weights
   auto& weights_data =
@@ -152,7 +154,8 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
   shuffled_input_workspace_array.GetOrCreateQuantizationParams() =
       input_array.GetQuantizationParams();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
index 66cfed4ac2..e2a6f12481 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
@@ -166,7 +166,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) {
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
   EXPECT_THAT(model.GetArrayMap().size(), 5);
-  (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0);
+  bool modified;
+  ASSERT_TRUE((*graph_transformation_set.begin())
+                  ->Run(&model, /*op_index=*/0, &modified)
+                  .ok());
   EXPECT_THAT(model.GetArrayMap().size(), 1);
 
   auto& concatenated_array = (*model.GetArrayMap().begin()).second;
@@ -185,7 +188,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) {
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
   EXPECT_THAT(model.GetArrayMap().size(), 5);
-  (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0);
+  bool modified;
+  ASSERT_TRUE((*graph_transformation_set.begin())
+                  ->Run(&model, /*op_index=*/0, &modified)
+                  .ok());
   EXPECT_THAT(model.GetArrayMap().size(), 1);
 
   auto& concatenated_array = (*model.GetArrayMap().begin()).second;
@@ -204,7 +210,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis2) {
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
   EXPECT_THAT(model.GetArrayMap().size(), 5);
-  (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0);
+  bool modified;
+  ASSERT_TRUE((*graph_transformation_set.begin())
+                  ->Run(&model, /*op_index=*/0, &modified)
+                  .ok());
   EXPECT_THAT(model.GetArrayMap().size(), 1);
 
   auto& concatenated_array = (*model.GetArrayMap().begin()).second;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
index a53abc9941..57d85a0435 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
@@ -50,7 +50,8 @@ void RunResolveSum(const std::vector<float>& input,
   sum_op->inputs = {"input0", "input1"};
   sum_op->outputs = {"output"};
   model.operators.push_back(std::move(sum_op));
-  ResolveConstantUnaryOperator().Run(&model, 0);
+  bool modified;
+  ASSERT_TRUE(ResolveConstantUnaryOperator().Run(&model, 0, &modified).ok());
   EXPECT_EQ(model.GetArray("output").GetBuffer<ArrayDataType::kFloat>().data,
             expected_output);
   EXPECT_EQ(model.GetArray("output").shape().dims(), output_shape);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc
index 69bad2fa89..4ada5c3fd0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc
@@ -25,13 +25,16 @@ limitations under the License.
 
 namespace toco {
 
-bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status UnfuseActivationFunctions::Run(Model* model,
+                                                    std::size_t op_index,
+                                                    bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* op = it->get();
 
   // If a conv operation has an im2col array, yield: it should be dropped first.
   if ((op->type == OperatorType::kConv) && (op->outputs.size() == 2)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   Operator* ac_op = nullptr;
@@ -46,7 +49,7 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) {
       ac_op = new Relu1Operator;
       break;
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   // At this point we know that the op has a fused activation function. At the
@@ -74,7 +77,8 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) {
 
   ac_op->inputs = {tmp_array_name};
   op->outputs = {tmp_array_name};
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc
index dd9e26e68b..e19527968d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc
@@ -22,7 +22,10 @@ limitations under the License.
 
 namespace toco {
 
-bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status UnpartitionEmbeddingLookup::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   // Collapses a partitioned tf.nn.embedding_lookup back into a single Gather.
   // https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup
   // This transform attempts to identify the len(params) > 1 case and collapse
@@ -47,7 +50,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
   // First look for the final DynamicStitch.
   auto op_it = model->operators.begin() + op_index;
   if (op_it->get()->type != OperatorType::kDynamicStitch) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* stitch_op = static_cast<DynamicStitchOperator*>(op_it->get());
 
@@ -72,7 +75,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
           "Skipping because indices input %s into "
           "%s is unexpected",
           LogName(*op), LogName(*stitch_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (!indices_partition_op) {
       indices_partition_op = static_cast<DynamicPartitionOperator*>(op);
@@ -83,7 +86,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
             "Skipping because indices input %s into "
             "%s is from a different source op than others",
             LogName(*op), LogName(*stitch_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
   }
@@ -92,12 +95,12 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
   // The data for the indices must be a constant range of the array shape.
   if (!IsConstantParameterArray(*model, indices_partition_op->inputs[0])) {
     AddMessageF("Skipping because indices partition data is non-constant");
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& indices_data_array = model->GetArray(indices_partition_op->inputs[0]);
   if (indices_data_array.data_type == ArrayDataType::kNone) {
     // Yield until data types are propagated.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK(indices_data_array.data_type == ArrayDataType::kInt32)
       << "Indices partition inputs must be int32";
@@ -117,7 +120,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
           "Skipping because data input %s into %s "
           "is unexpected",
           LogName(*op), LogName(*stitch_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     gather_ops.push_back(static_cast<GatherOperator*>(op));
   }
@@ -132,7 +135,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
           "Skipping because data input %s into "
           "%s is unexpected",
           LogName(*op), LogName(*gather_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (!data_partition_op) {
       data_partition_op = static_cast<DynamicPartitionOperator*>(op);
@@ -143,7 +146,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
             "Skipping because data input %s into "
             "%s is from a different source op than others",
             LogName(*op), LogName(*gather_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
   }
@@ -236,7 +239,8 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
   DeleteOpAndArraysIfUnused(model, indices_partition_op);
   DeleteOpAndArraysIfUnused(model, data_partition_op);
   DeleteOpAndArraysIfUnused(model, stitch_op);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc
index fedf4441e2..5ff39aa313 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc
@@ -36,10 +36,12 @@ namespace toco {
 //    slice_c = tf.matmul(slice_a, slice_b)
 //    result_slices[bat] = slice_c
 //  result = tf.stack(result_slices)
-bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status UnrollBatchMatMul::Run(Model* model, std::size_t op_index,
+                                            bool* modified) {
+  *modified = false;
   auto batch_op_it = model->operators.begin() + op_index;
   if (batch_op_it->get()->type != OperatorType::kBatchMatMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* batch_op =
       static_cast<const BatchMatMulOperator*>(batch_op_it->get());
@@ -47,7 +49,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
   // We must have the shape of at least one input to know our batch size.
   const auto& input_array_a = model->GetArray(batch_op->inputs[0]);
   const auto& input_array_b = model->GetArray(batch_op->inputs[1]);
-  if (!input_array_a.has_shape() || !input_array_b.has_shape()) return false;
+  if (!input_array_a.has_shape() || !input_array_b.has_shape())
+    return ::tensorflow::Status::OK();
 
   // We only support the rank 3 case. If you are batching on rank > 3 you'll
   // have to figure that out.
@@ -66,7 +69,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
     batch_op_it = matmul_op_it + 1;
     CHECK_EQ(batch_op_it->get(), batch_op);
     model->operators.erase(batch_op_it);
-    return true;
+    *modified = true;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(input_array_a.shape().dimensions_count(), 3)
       << "Input arrays must have rank 3";
@@ -167,7 +171,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
   CHECK(batch_op_it != model->operators.end());
   CHECK(batch_op_it->get() == batch_op);
   model->operators.erase(batch_op_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
-- 
GitLab


From 072fcb995a3fd658ee2461b59b159498c710513d Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 9 Oct 2018 11:54:20 -0700
Subject: [PATCH 1309/1357] [tf.data] NUMA-aware MapAndBatch dataset.

PiperOrigin-RevId: 216395709
---
 ...f_ExperimentalNumaMapAndBatchDataset.pbtxt |   58 +
 tensorflow/core/framework/model.h             |    2 +-
 .../core/grappler/optimizers/data/BUILD       |   35 +
 .../optimizers/data/graph_test_utils.cc       |   16 +
 .../optimizers/data/graph_test_utils.h        |    6 +
 .../map_and_batch_numa_aware_replacement.cc   |   62 +
 .../map_and_batch_numa_aware_replacement.h    |   48 +
 ...p_and_batch_numa_aware_replacement_test.cc |  112 ++
 .../core/kernels/data/experimental/BUILD      |   17 +
 .../numa_map_and_batch_dataset_op.cc          | 1135 +++++++++++++++++
 .../kernels/data/map_and_batch_dataset_op.cc  |   38 +-
 .../core/ops/experimental_dataset_ops.cc      |   26 +
 .../kernel_tests/map_and_batch_test.py        |  280 +++-
 .../kernel_tests/optimization/BUILD           |    2 +
 .../optimization/model_dataset_op_test.py     |   11 +-
 .../optimization/optimize_dataset_op_test.py  |   16 +
 .../kernel_tests/serialization/BUILD          |   15 +
 ...ap_and_batch_dataset_serialization_test.py |   95 ++
 tensorflow/python/data/experimental/ops/BUILD |    1 +
 tensorflow/python/data/ops/dataset_ops.py     |    7 +-
 .../golden/v1/tensorflow.data.-options.pbtxt  |    4 +
 .../golden/v2/tensorflow.data.-options.pbtxt  |    4 +
 22 files changed, 1909 insertions(+), 81 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc
 create mode 100644 tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py

diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
new file mode 100644
index 0000000000..243922d969
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
@@ -0,0 +1,58 @@
+op {
+  graph_op_name: "ExperimentalNumaMapAndBatchDataset"
+  visibility: HIDDEN
+  in_arg {
+    name: "input_dataset"
+    description: <<END
+A variant tensor representing the input dataset.
+END
+  }
+  in_arg {
+    name: "other_arguments"
+    description: <<END
+A list of tensors, typically values that were captured when building a closure
+for `f`.
+END
+  }
+  in_arg {
+    name: "batch_size"
+    description: <<END
+A scalar representing the number of elements to accumulate in a
+batch. It determines the number of concurrent invocations of `f` that process
+elements from `input_dataset` in parallel.
+END
+  }
+  in_arg {
+    name: "num_parallel_calls"
+    description: <<END
+A scalar representing the maximum number of parallel invocations of the `map_fn`
+function. Applying the `map_fn` on consecutive input elements in parallel has
+the potential to improve input pipeline throughput.
+END
+  }
+  in_arg {
+    name: "drop_remainder"
+    description: <<END
+A scalar representing whether the last batch should be dropped in case its size
+is smaller than desired.
+END
+  }
+  attr {
+    name: "f"
+    description: <<END
+A function to apply to the outputs of `input_dataset`.
+END
+  }
+  summary: "Creates a dataset that fuses mapping with batching."
+  description: <<END
+Creates a dataset that applies `f` to the outputs of `input_dataset` and then
+batches `batch_size` of them.
+
+Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up
+to `batch_size * num_parallel_batches` copies of `f` in parallel.
+
+Unlike "MapAndBatchDatasetV2", this dataset uses a NUMA-aware thread scheduling
+policy. Because it uses the single-threaded executor, it only supports the
+function-based control flow ops.
+END
+}
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index eae0fa70e8..9596252664 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -335,7 +335,7 @@ class Model {
       if (name_ == "Map") {
         return Type::MAP;
       }
-      if (name_ == "MapAndBatch") {
+      if (name_ == "MapAndBatch" || name_ == "NumaMapAndBatch") {
         return Type::MAP_AND_BATCH;
       }
       if (name_ == "PaddedBatch") {
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index ee7c14e3ab..1c553044a8 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -414,6 +414,40 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "map_and_batch_numa_aware_replacement",
+    srcs = ["map_and_batch_numa_aware_replacement.cc"],
+    hdrs = ["map_and_batch_numa_aware_replacement.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_utils",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:op_types",
+        "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+    ] + tf_protos_all(),
+)
+
+tf_cc_test(
+    name = "map_and_batch_numa_aware_replacement_test",
+    srcs = ["map_and_batch_numa_aware_replacement_test.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_test_utils",
+        ":graph_utils",
+        ":map_and_batch_numa_aware_replacement",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
+    ],
+)
+
 cc_library(
     name = "noop_elimination",
     srcs = ["noop_elimination.cc"],
@@ -490,6 +524,7 @@ cc_library(
         ":hoist_random_uniform",
         ":latency_all_edges",
         ":map_and_batch_fusion",
+        ":map_and_batch_numa_aware_replacement",
         ":map_and_filter_fusion",
         ":map_fusion",
         ":map_parallelization",
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
index b2eec7220e..1f03c6515c 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -44,6 +45,21 @@ NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
        {"output_types", gtl::ArraySlice<TensorShape>{}}});
 }
 
+NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece batch_size_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece drop_remainder_node_name,
+                            StringPiece function_name) {
+  return test::function::NDef(
+      name, "MapAndBatchDatasetV2",
+      {string(input_node_name), "", string(batch_size_node_name),
+       string(num_parallel_calls_node_name), string(drop_remainder_node_name)},
+      {{"predicate", FunctionDefHelper::FunctionRef(string(function_name))},
+       {"Targuments", {}},
+       {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+       {"output_types", gtl::ArraySlice<TensorShape>{}}});
+}
+
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
index ca0fde997d..f7891d5e1f 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
@@ -29,6 +29,12 @@ NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
 NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
                        StringPiece function_name = "IsZero");
 
+NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece batch_size_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece drop_remainder_node_name,
+                            StringPiece function_name = "XTimesTwo");
+
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc
new file mode 100644
index 0000000000..452089eb67
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc
@@ -0,0 +1,62 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h"
+
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+NodeDef MakeNumaAware(const NodeDef& node, MutableGraphView* graph) {
+  NodeDef numa_aware_node = node;
+  graph_utils::SetUniqueGraphNodeName("map_and_batch_numa_aware",
+                                      graph->GetGraph(), &numa_aware_node);
+  numa_aware_node.set_op("ExperimentalNumaMapAndBatchDataset");
+  return numa_aware_node;
+}
+
+}  // namespace
+
+Status MapAndBatchNumaAwareReplacement::Optimize(Cluster* cluster,
+                                                 const GrapplerItem& item,
+                                                 GraphDef* output) {
+  *output = item.graph;
+  MutableGraphView graph(output);
+  std::set<string> nodes_to_delete;
+
+  for (const NodeDef& node : item.graph.node()) {
+    if (node.op() != "MapAndBatchDatasetV2") continue;
+
+    auto* numa_node = graph.AddNode(MakeNumaAware(node, &graph));
+    graph.ReplaceInput(node, *numa_node);
+    nodes_to_delete.insert(node.name());
+  }
+  graph.DeleteNodes(nodes_to_delete);
+  return Status::OK();
+}
+
+REGISTER_GRAPH_OPTIMIZER_AS(MapAndBatchNumaAwareReplacement,
+                            "map_and_batch_numa_aware_replacement");
+
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h
new file mode 100644
index 0000000000..3b2acd288b
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_
+
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+
+namespace tensorflow {
+namespace grappler {
+
+class MapAndBatchNumaAwareReplacement : public CustomGraphOptimizer {
+ public:
+  MapAndBatchNumaAwareReplacement() = default;
+  ~MapAndBatchNumaAwareReplacement() override = default;
+
+  string name() const override {
+    return "map_and_batch_numa_aware_replacement";
+  }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* output) override;
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimize_output, double result) override {}
+};
+
+}  // namespace grappler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc
new file mode 100644
index 0000000000..3c5c61d1c2
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc
@@ -0,0 +1,112 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h"
+
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+TEST(MapAndBatchNumaAwareReplacementTest, ReplaceSimple) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {
+          NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+          NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+          NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+          NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+          NDef("batch_size", "Const", {}, {{"value", 3}, {"dtype", DT_INT32}}),
+          NDef("num_parallel_calls", "Const", {},
+               {{"value", 5}, {"dtype", DT_INT32}}),
+          NDef("drop_remainder", "Const", {},
+               {{"value", 0}, {"dtype", DT_BOOL}}),
+          graph_tests_utils::MakeMapAndBatchNode(
+              "map_and_batch", "range", "batch_size", "num_parallel_calls",
+              "drop_remainder"),
+      },
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  MapAndBatchNumaAwareReplacement optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map_and_batch", output));
+  EXPECT_FALSE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp(
+      "ExperimentalNumaMapAndBatchDataset", output));
+}
+
+TEST(MapAndBatchNumaAawareReplacementTest, ReplaceWithExtraChild) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {
+          NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+          NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+          NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+          NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+          NDef("batch_size", "Const", {}, {{"value", 3}, {"dtype", DT_INT32}}),
+          NDef("num_parallel_calls", "Const", {},
+               {{"value", 5}, {"dtype", DT_INT32}}),
+          NDef("drop_remainder", "Const", {},
+               {{"value", 0}, {"dtype", DT_BOOL}}),
+          graph_tests_utils::MakeMapAndBatchNode(
+              "map_and_batch", "range", "batch_size", "num_parallel_calls",
+              "drop_remainder"),
+          NDef("cache", "CacheDataset", {"map_and_batch"}, {}),
+      },
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  MapAndBatchNumaAwareReplacement optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map_and_batch", output));
+  EXPECT_FALSE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp(
+      "ExperimentalNumaMapAndBatchDataset", output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp("CacheDataset", output));
+
+  int numa_map_and_batch_component_id = graph_utils::FindGraphNodeWithOp(
+      "ExperimentalNumaMapAndBatchDataset", output);
+  auto& numa_map_and_batch_component =
+      output.node(numa_map_and_batch_component_id);
+  EXPECT_EQ(numa_map_and_batch_component.input(0), "range");
+
+  int cache_id = graph_utils::FindGraphNodeWithOp("CacheDataset", output);
+  auto& cache_node = output.node(cache_id);
+  EXPECT_EQ(cache_node.input(0), numa_map_and_batch_component.name());
+}
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index 43406db3ed..4cf5643bc0 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -102,6 +102,22 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "numa_map_and_batch_dataset_op",
+    srcs = ["numa_map_and_batch_dataset_op.cc"],
+    deps = [
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:inplace_ops",
+        "//tensorflow/core/kernels/data:captured_function",
+        "//tensorflow/core/kernels/data:dataset",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
 tf_kernel_library(
     name = "unique_dataset_op",
     srcs = ["unique_dataset_op.cc"],
@@ -132,6 +148,7 @@ tf_kernel_library(
         ":ignore_errors_dataset_op",
         ":indexed_dataset",
         ":lmdb_dataset_op",
+        ":numa_map_and_batch_dataset_op",
         ":prefetching_kernels",
         ":threadpool_dataset_op",
         ":unique_dataset_op",
diff --git a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
new file mode 100644
index 0000000000..d83edb9667
--- /dev/null
+++ b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
@@ -0,0 +1,1135 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#define EIGEN_USE_THREADS
+
+#include <atomic>
+#include <utility>
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/inplace_ops_functor.h"
+#include "tensorflow/core/lib/core/blocking_counter.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/numa.h"
+#include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+// kWindowSize is the fixed constant controlling the number of batch outputs
+// each NumaWorkerBlock may be processing at a time. This is currently a
+// constant and not user configurable to enable future performance optimizations
+// in the implementation.
+const int64 kWindowSize = 10;
+
+// Define a helper for more consistent logging.
+#define WORKER_VLOG(verbose_level)                                           \
+  VLOG(verbose_level) << "WorkerThread (" << numa_node << ", " << thread_num \
+                      << "): "
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit NumaMapAndBatchDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+  }
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    int64 batch_size;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "batch_size", &batch_size));
+    OP_REQUIRES(
+        ctx, batch_size > 0,
+        errors::InvalidArgument("batch_size must be greater than zero."));
+
+    int64 num_parallel_calls;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
+                                            &num_parallel_calls));
+    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
+                errors::InvalidArgument(
+                    "num_parallel_calls must be greater than zero."));
+
+    bool drop_remainder;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument(ctx, "drop_remainder", &drop_remainder));
+
+    std::unique_ptr<CapturedFunction> captured_func;
+    OP_REQUIRES_OK(
+        ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                      /* use_inter_op_parallelism = */ false,
+                                      &captured_func));
+
+    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_, func_,
+                          std::move(captured_func));
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
+            int64 num_parallel_calls, bool drop_remainder,
+            const DataTypeVector& output_types,
+            const std::vector<PartialTensorShape>& output_shapes,
+            const NameAttrList& func,
+            std::unique_ptr<CapturedFunction> captured_func)
+        : DatasetBase(DatasetContext(ctx)),
+          input_(input),
+          batch_size_(batch_size),
+          num_parallel_calls_(num_parallel_calls),
+          drop_remainder_(drop_remainder),
+          output_types_(output_types),
+          output_shapes_(output_shapes),
+          func_(func),
+          captured_func_(std::move(captured_func)) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIteratorInternal(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::NumaMapAndBatch")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return output_types_;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return output_shapes_;
+    }
+
+    string DebugString() const override {
+      return "NumaMapAndBatchDatasetOp::Dataset";
+    }
+
+   protected:
+    Status AsGraphDefInternal(SerializationContext* ctx,
+                              DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
+      Node* batch_size_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size_node));
+      Node* num_parallel_calls_node;
+      TF_RETURN_IF_ERROR(
+          b->AddScalar(num_parallel_calls_, &num_parallel_calls_node));
+      Node* drop_remainder_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(drop_remainder_, &drop_remainder_node));
+
+      DataTypeVector other_arguments_types;
+      other_arguments_types.reserve(captured_func_->captured_inputs().size());
+      std::vector<Node*> other_arguments;
+      other_arguments.reserve(captured_func_->captured_inputs().size());
+      for (const Tensor& t : captured_func_->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments.emplace_back(node);
+        other_arguments_types.emplace_back(t.dtype());
+      }
+      AttrValue f;
+      b->BuildAttrValue(func_, &f);
+      AttrValue other_arguments_types_attr;
+      b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this,
+          {std::make_pair(0, input_graph_node),
+           std::make_pair(2, batch_size_node),
+           std::make_pair(3, num_parallel_calls_node),
+           std::make_pair(4, drop_remainder_node)},  // Single tensor inputs.
+          {std::make_pair(1, other_arguments)},      // Tensor list inputs.
+          {std::make_pair("f", f),
+           std::make_pair("Targuments", other_arguments_types_attr)},  // Attrs
+          output));
+      return Status::OK();
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            mu_(std::make_shared<mutex>()),
+            autotune_cond_var_(std::make_shared<condition_variable>()),
+            num_parallel_calls_(std::make_shared<model::SharedState>(
+                params.dataset->num_parallel_calls_, mu_, autotune_cond_var_)) {
+      }
+
+      ~Iterator() override {
+        mutex_lock l(*mu_);
+        cancelled_ = true;
+        VLOG(3) << "NumaMapAndBatchIterator::~Iterator: cancelling operations.";
+        for (size_t i = 0; i < workers_.size(); ++i) {
+          workers_[i]->manager.Cancel();
+        }
+        VLOG(3) << "NumaMapAndBatchIterator::~Iterator: waiting for threads to "
+                   "shut down.";
+      }
+
+      Status Initialize(IteratorContext* ctx) override {
+        mutex_lock l(*mu_);
+        AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
+        if (num_parallel_calls_->value == kAutoTune) {
+          num_parallel_calls_->value = std::max(1, port::NUMANumNodes());
+          AddTunableParameter(ctx,
+                              /* name = */ "parallelism",
+                              /* state = */ num_parallel_calls_,
+                              /* min = */ num_parallel_calls_->value,
+                              /* max = */ port::NumSchedulableCPUs());
+        } else {
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
+        }
+        TF_RETURN_IF_ERROR(
+            dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
+        TF_RETURN_IF_ERROR(dataset()->captured_func_->Instantiate(ctx));
+        return Status::OK();
+      }
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        auto cleanup = gtl::MakeCleanup(
+            [] { VLOG(3) << "GetNextInternal call returning."; });
+        NumaWorkerBlock* worker = nullptr;
+        {
+          mutex_lock l(*mu_);
+          VLOG(3) << "GetNextInternal call; current block: " << cur_block_;
+          if (global_end_of_input_) {
+            *end_of_sequence = true;
+            return Status::OK();
+          }
+          TF_RETURN_IF_ERROR(EnsureBackgroundThreadsStarted(ctx));
+          worker = workers_[cur_block_].get();
+          cur_block_ = (cur_block_ + 1) % workers_.size();
+        }
+        TF_RETURN_IF_ERROR(worker->manager.GetBatch(
+            ctx, dataset()->drop_remainder_, &global_end_of_input_, out_tensors,
+            end_of_sequence));
+        return Status::OK();
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(*mu_);
+        for (size_t i = 0; i < workers_.size(); ++i) {
+          if (!workers_[i]->manager.Quiesce()) {
+            return errors::Cancelled(
+                "The iterator was deleted before it could reach a "
+                "checkpointable state.");
+          }
+        }
+
+        TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("num_workers"), workers_.size()));
+
+        for (size_t i = 0; i < workers_.size(); ++i) {
+          size_t index = (cur_block_ + i) % workers_.size();
+          TF_RETURN_IF_ERROR(workers_[index]->manager.Save(writer, this, i));
+        }
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(*mu_);
+        TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
+        int64 num_workers = -1;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("num_workers"), &num_workers));
+        // Note: num_workers can be 0 if the iterator wasn't started when
+        // first checkpointed.
+        if (num_workers < 0) {
+          return errors::DataLoss(
+              "When restoring from checkpoint, we encountered a data "
+              "consistency error: num_workers has an invalid value: ",
+              num_workers);
+        }
+        if (port::NUMAEnabled()) {
+          int actual_numa_domains = port::NUMANumNodes();
+          if (actual_numa_domains != num_workers && num_workers > 0) {
+            LOG(WARNING) << "# NUMA domains mismatch when restoring from "
+                            "checkpoint: checkpoint has "
+                         << num_workers
+                         << " NUMA domains, while this host has: "
+                         << actual_numa_domains << " NUMA domains.";
+          }
+        }
+        if (num_workers > 1 && !port::NUMAEnabled()) {
+          LOG(WARNING) << "NUMA is not enabled for this process, but restoring "
+                          "a checkpoint that assumes "
+                       << num_workers << " NUMA domains.";
+        }
+        workers_.resize(num_workers);
+        for (size_t i = 0; i < num_workers; ++i) {
+          workers_[i] = MakeUnique<NumaWorkerBlock>(this);
+          TF_RETURN_IF_ERROR(
+              workers_[i]->manager.Restore(ctx, reader, this, i));
+        }
+        cur_block_ = 0;
+        return Status::OK();
+      }
+
+     private:
+      // NumaBlockManager manages all the state for a set of threads pinned to a
+      // single NUMA domain.
+      //
+      // The methods can be divided into 3 categories based on who should call
+      // them:
+      //
+      //  (1) RunnerThread: WaitForInputSpace, PushInputs, SetEndOfInput.
+      //  (2) WorkerThread: RetrieveInput, GetBatchTensors.
+      //      RecordBatchEntryComplete
+      //  (3) Client threads: GetBatch, Cancel, Save, Restore.
+      //
+      // Internally, we manage state in a circular buffer of size `kWindowSize`.
+      // There are 3 pointers into the circular buffer, and must maintain the
+      // following order: (1) next_input_batch_ (corresponding to the next input
+      // batch to be pulled from the input iterator), (2) next_input_
+      // (corresponding to the batch the WorkerThreads should pull from for
+      // their next inputs), and (3) next_output_ corresponding to the next
+      // value to be consumed by the output iterator.
+      //
+      // Methods return errors::Cancelled if the iteration is cancelled before
+      // completing.
+      //
+      // NumaBlockManager is thread safe.
+      class NumaBlockManager {
+       public:
+        explicit NumaBlockManager(Iterator* itr) : itr_(itr) {}
+
+        // WaitForInputSpace blocks until there is space in the circular buffer
+        // to begin processing a new batch of elements.
+        //
+        // Returns true when there is space, false if the Iterator is cancelled.
+        bool WaitForInputSpace(IteratorContext* ctx) {
+          mutex_lock l(mu_);
+
+          size_t next = (next_input_batch_ + 1) % kWindowSize;
+          DCHECK(next < kWindowSize) << next;
+
+          // Wait for space in the circular buffer.
+          while (!cancelled_ && batches_[next].state != BatchState::kEmpty) {
+            VLOG(3) << "Waiting for input space; next: " << next
+                    << ", next_output_: " << next_output_
+                    << ", next_input_batch_: " << next_input_batch_;
+            itr_->RecordStop(ctx);
+            runner_cond_var_.wait(l);
+            itr_->RecordStart(ctx);
+          }
+          if (cancelled_) {
+            VLOG(3) << "WaitForInputSpace cancelled.";
+            return false;
+          }
+
+          DCHECK(batches_[next].state == BatchState::kEmpty);
+
+          next_input_batch_ = next;
+          return true;
+        }
+
+        // PushInputs sets the inputs for the next batch as retrieved from the
+        // input iterator.
+        void PushInputs(const Status& status,
+                        std::vector<std::vector<Tensor>> inputs) {
+          mutex_lock l(mu_);
+
+          DCHECK(next_input_ < kWindowSize) << next_input_;
+          DCHECK(batches_[next_input_batch_].state == BatchState::kEmpty);
+          DCHECK(batches_[next_input_batch_].next_input_to_process == 0)
+              << batches_[next_input_batch_].next_input_to_process;
+          DCHECK(batches_[next_input_batch_].status.ok())
+              << batches_[next_input_batch_].status;
+
+          batches_[next_input_batch_].inputs.swap(inputs);
+          batches_[next_input_batch_].state = BatchState::kInputsFilled;
+          batches_[next_input_batch_].status.Update(status);
+          if (batches_[next_input_batch_].status.ok()) {
+            worker_cond_var_.notify_all();
+          } else {
+            client_cond_var_.notify_all();
+            batches_[next_input_batch_].error_index = 0;
+          }
+        }
+
+        // SetEndOfInput records the fact that we have reached the end of the
+        // input iterator, and that we should return end_of_sequence = true when
+        // we have exhaused all buffered batches.
+        void SetEndOfInput() {
+          mutex_lock l(mu_);
+          reached_eof_ = true;
+          worker_cond_var_.notify_all();
+          client_cond_var_.notify_all();
+        }
+
+        // RetrieveInput gets the next input tuple to be mapped by a worker
+        // thread.
+        //
+        // Returns true if an input was retrieved, false if the iterator has
+        // been cancelled.
+        bool RetrieveInput(IteratorContext* ctx, std::vector<Tensor>* input,
+                           uint64* index, size_t* sequence_number) {
+          mutex_lock l(mu_);
+
+          // Wait for inputs to be ready.
+          while (!cancelled_ &&
+                 batches_[next_input_].state != BatchState::kInputsFilled) {
+            itr_->RecordStop(ctx);
+            worker_cond_var_.wait(l);
+            itr_->RecordStart(ctx);
+          }
+
+          if (cancelled_) {
+            return false;
+          }
+
+          DCHECK(batches_[next_input_].next_input_to_process <
+                 batches_[next_input_].inputs.size())
+              << "next_input_: " << next_input_ << ", next_input_to_process: "
+              << batches_[next_input_].next_input_to_process
+              << ", inputs.size(): " << batches_[next_input_].inputs.size()
+              << ", state: " << static_cast<int32>(batches_[next_input_].state)
+              << ", this: " << this;
+          *index = batches_[next_input_].next_input_to_process;
+          *sequence_number = next_input_;
+          input->swap(batches_[next_input_]
+                          .inputs[batches_[next_input_].next_input_to_process]);
+          // Increment pointers.
+          batches_[next_input_].next_input_to_process++;
+
+          if (batches_[next_input_].next_input_to_process ==
+              batches_[next_input_].inputs.size()) {
+            batches_[next_input_].state = BatchState::kAllMapsStarted;
+            next_input_ = (next_input_ + 1) % kWindowSize;
+          }
+          return true;
+        }
+
+        // GetBatchTensors returns a pointer to the output batch tensors for the
+        // worker thread to copy into.
+        //
+        // allocate_output is a function taking a batch size, and a pointer to
+        // the output tuple of Tensors to allocate them. The allocate_output
+        // function is called at most once per output batch.
+        std::vector<Tensor>* GetBatchTensors(
+            size_t sequence_number,
+            std::function<void(size_t, std::vector<Tensor>*)> allocate_output) {
+          mutex_lock l(mu_);
+          DCHECK(sequence_number < kWindowSize) << sequence_number;
+          DCHECK(batches_[sequence_number].state == BatchState::kInputsFilled ||
+                 batches_[sequence_number].state == BatchState::kAllMapsStarted)
+              << sequence_number;
+
+          if (batches_[sequence_number].outputs.empty()) {
+            allocate_output(batches_[sequence_number].inputs.size(),
+                            &batches_[sequence_number].outputs);
+          }
+          return &batches_[sequence_number].outputs;
+        }
+
+        // RecordBatchEntryComplete records an element of the batch has finished
+        // copying into the output tensors.
+        void RecordBatchEntryComplete(size_t sequence_number, uint64 index,
+                                      Status s) {
+          mutex_lock l(mu_);
+          DCHECK(sequence_number < kWindowSize) << sequence_number;
+          DCHECK(batches_[sequence_number].state == BatchState::kInputsFilled ||
+                 batches_[sequence_number].state == BatchState::kAllMapsStarted)
+              << sequence_number;
+
+          batches_[sequence_number].num_outputs_complete++;
+          if (!s.ok() && batches_[sequence_number].error_index > index) {
+            batches_[sequence_number].status = s;
+            batches_[sequence_number].error_index = index;
+          }
+
+          if (batches_[sequence_number].num_outputs_complete ==
+              batches_[sequence_number].inputs.size()) {
+            DCHECK(batches_[sequence_number].state ==
+                   BatchState::kAllMapsStarted);
+            batches_[sequence_number].state = BatchState::kOutputsComplete;
+            batches_[sequence_number].inputs.clear();  // Eagerly save memory.
+            batches_[sequence_number].inputs.shrink_to_fit();
+            client_cond_var_.notify_all();
+          }
+        }
+
+        // GetBatch retrieves the next output batch tensors.
+        Status GetBatch(IteratorContext* ctx, bool drop_remainder,
+                        bool* global_eof, std::vector<Tensor>* out_tensor,
+                        bool* end_of_sequence) {
+          mutex_lock l(mu_);
+          // Wait until one of 3 conditions occurs:
+          //  (1) we're cancelled.
+          //  (2) the state becomes kOutputsComplete
+          //  (3) state is empty && reached_eof.
+          while (!cancelled_ &&
+                 batches_[next_output_].state != BatchState::kOutputsComplete &&
+                 !(reached_eof_ &&
+                   batches_[next_output_].state == BatchState::kEmpty)) {
+            VLOG(3) << "Waiting in GetBatch.";
+            itr_->RecordStop(ctx);
+            client_cond_var_.wait(l);
+            itr_->RecordStart(ctx);
+          }
+
+          if (cancelled_) {
+            return errors::Cancelled(
+                "Cancelled in NumaMapAndBatch::GetNext call.");
+          }
+
+          if (reached_eof_ &&
+              batches_[next_output_].state == BatchState::kEmpty) {
+            VLOG(4) << "GetBatch returning end of sequence.";
+            *end_of_sequence = true;
+            *global_eof = true;
+            return Status::OK();
+          }
+
+          VLOG(3) << "Returning output index: " << next_output_
+                  << ", this: " << this;
+
+          *end_of_sequence = false;
+          Status s = batches_[next_output_].status;
+          if (s.ok()) {
+            out_tensor->swap(batches_[next_output_].outputs);
+          }
+          // Handle early termination.
+          if (errors::IsOutOfRange(s)) {
+            *global_eof = true;
+            s = Status::OK();
+            if (drop_remainder || batches_[next_output_].error_index == 0) {
+              *end_of_sequence = true;
+            } else {
+              std::vector<Tensor> true_outputs;
+              for (size_t i = 0; i < batches_[next_output_].outputs.size();
+                   ++i) {
+                TensorShape component_shape(
+                    batches_[next_output_].outputs[i].shape());
+                component_shape.set_dim(0, batches_[next_output_].error_index);
+                AllocatorAttributes attr;
+                attr.set_gpu_compatible(true);
+                Tensor component(ctx->allocator(attr),
+                                 batches_[next_output_].outputs[i].dtype(),
+                                 component_shape);
+                TF_RETURN_IF_ERROR(CopyPartialBatch(
+                    &component, batches_[next_output_].outputs[i],
+                    batches_[next_output_].error_index));
+                true_outputs.emplace_back(std::move(component));
+              }
+              out_tensor->swap(true_outputs);
+            }
+          }
+
+          batches_[next_output_].Reset();
+          next_output_ = (next_output_ + 1) % kWindowSize;
+          runner_cond_var_.notify_all();
+
+          return s;
+        }
+
+        void Cancel() {
+          mutex_lock l(mu_);
+          VLOG(3) << "Cancelling NUMA block.";
+          cancelled_ = true;
+          runner_cond_var_.notify_all();
+          worker_cond_var_.notify_all();
+          client_cond_var_.notify_all();
+        }
+
+        // Waits until all the worker threads have completed their work and all
+        // internal state has reached a "safe-point" where we can safely
+        // checkpoint.
+        //
+        // Returns true if completed successfully, false if cancelled while
+        // waiting.
+        bool Quiesce() {
+          mutex_lock l(mu_);
+          VLOG(3) << "Waiting until the operations have quiesced.";
+          while (!cancelled_ && !AllMapOperationsFinished()) {
+            client_cond_var_.wait(l);
+          }
+          if (cancelled_) {
+            return false;
+          }
+          return true;
+        }
+
+        Status Save(IteratorStateWriter* writer, Iterator* itr, size_t index) {
+          mutex_lock l(mu_);
+          string prefix = itr->full_name(strings::StrCat("numa_block_", index));
+          if (reached_eof_) {
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                strings::StrCat(prefix, "_end_of_input"), ""));
+          }
+          for (size_t i = 0; i < kWindowSize; ++i) {
+            size_t index = (next_output_ + i) % kWindowSize;
+            if (batches_[index].state == BatchState::kEmpty) {
+              break;
+            }
+            string batch_prefix = strings::StrCat(prefix, "_batch_", i);
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                strings::StrCat(batch_prefix, "_code"),
+                static_cast<int64>(batches_[index].status.code())));
+            if (!batches_[index].status.ok()) {
+              TF_RETURN_IF_ERROR(
+                  writer->WriteScalar(strings::StrCat(batch_prefix, "_msg"),
+                                      batches_[index].status.error_message()));
+              TF_RETURN_IF_ERROR(writer->WriteScalar(
+                  strings::StrCat(batch_prefix, "_error_index"),
+                  batches_[index].error_index));
+            }
+
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                strings::StrCat(batch_prefix, "_output_size"),
+                batches_[index].outputs.size()));
+            for (size_t j = 0; j < batches_[index].outputs.size(); ++j) {
+              string tensor_prefix =
+                  strings::StrCat(batch_prefix, "_output_", j);
+              if (!batches_[index].status.ok()) {
+                DCHECK(batches_[index].error_index >= 0 &&
+                       batches_[index].error_index <
+                           itr_->dataset()->batch_size_);
+                // If the batch is not full, we only store the first
+                // `error_index` values. The rest of the batch tensor might not
+                // be initialized, and accessing that will raise msan errors.
+                TF_RETURN_IF_ERROR(writer->WriteTensor(
+                    tensor_prefix, batches_[index].outputs[j].Slice(
+                                       0, batches_[index].error_index)));
+              } else {
+                TF_RETURN_IF_ERROR(writer->WriteTensor(
+                    tensor_prefix, batches_[index].outputs[j]));
+              }
+            }
+          }
+          return Status::OK();
+        }
+
+        Status Restore(IteratorContext* ctx, IteratorStateReader* reader,
+                       Iterator* itr, size_t index) {
+          mutex_lock l(mu_);
+          if (reached_eof_) {
+            return errors::FailedPrecondition(
+                "Already reached the end of the sequence.");
+          }
+          string prefix = itr->full_name(strings::StrCat("numa_block_", index));
+          reached_eof_ =
+              reader->Contains(strings::StrCat(prefix, "_end_of_input"));
+          for (size_t i = 0; i < kWindowSize; ++i) {
+            string batch_prefix = strings::StrCat(prefix, "_batch_", i);
+            if (!reader->Contains(strings::StrCat(batch_prefix, "_code"))) {
+              break;
+            }
+            Batch batch;
+            batch.state = BatchState::kOutputsComplete;
+            int64 code_int;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                strings::StrCat(batch_prefix, "_code"), &code_int));
+            error::Code code = static_cast<error::Code>(code_int);
+            if (code != error::Code::OK) {
+              string error_message;
+              TF_RETURN_IF_ERROR(reader->ReadScalar(
+                  strings::StrCat(batch_prefix, "_msg"), &error_message));
+              batch.status = Status(code, error_message);
+              int64 error_index_int = -1;
+              TF_RETURN_IF_ERROR(reader->ReadScalar(
+                  strings::StrCat(batch_prefix, "_error_index"),
+                  &error_index_int));
+              if (error_index_int < 0 ||
+                  error_index_int > itr->dataset()->batch_size_) {
+                return errors::FailedPrecondition(
+                    "Error index out of bounds when restoring from checkpoint; "
+                    "error index: ",
+                    error_index_int);
+              }
+              batch.error_index = static_cast<size_t>(error_index_int);
+            }
+            int64 output_size = -1;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                strings::StrCat(batch_prefix, "_output_size"), &output_size));
+            batch.outputs.reserve(output_size);
+            for (size_t j = 0; j < output_size; ++j) {
+              string tensor_name = strings::StrCat(batch_prefix, "_output_", j);
+              Tensor t;
+              TF_RETURN_IF_ERROR(reader->ReadTensor(tensor_name, &t));
+              batch.outputs.emplace_back(std::move(t));
+            }
+            batches_[i] = std::move(batch);
+          }
+          return Status::OK();
+        }
+
+       private:
+        bool AllMapOperationsFinished() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          for (size_t i = 0; i < kWindowSize; ++i) {
+            if (batches_[i].state == BatchState::kInputsFilled ||
+                batches_[i].state == BatchState::kAllMapsStarted) {
+              return false;
+            }
+            if (batches_[i].state != BatchState::kOutputsComplete &&
+                !reached_eof_) {
+              return false;
+            }
+          }
+          return true;
+        }
+
+        // Batches begin in the `kEmpty` state. Once the RunnerThread has
+        // filled the `inputs` to a `Batch`, it transitions to the
+        // `kInputsFilled` state. At this point, the Worker threads run the map
+        // function and copy the outputs appropriately. Once all worker threads
+        // have started, it transitions to `kAllMapsStarted`. After the outputs
+        // are complete, the GetNext call can consume the outputs, and return
+        // the batch to the kEmpty state.
+        enum class BatchState {
+          kEmpty,
+          kInputsFilled,
+          kAllMapsStarted,
+          kOutputsComplete,
+        };
+
+        // Batch captures all the state of an output batch as it progresses
+        // through the machinery. Once the RunnerThread fills inputs, it
+        // transitions to `kInputsFilled`. At this point, the worker threads can
+        // work on it, incrementing outputs_complete for every element of the
+        // input set that is copied into the output Tensors. Once all the input
+        // tuples have been processed (i.e. num_outputs_complete ==
+        // inputs.size()), it transitions to the `kOutputsComplete` stage, where
+        // it is ready to be returned by a `GetBatch` call (called from
+        // `GetNextInternal`).
+        struct Batch {
+          BatchState state;
+          // Aggregates the Status of the input iterator's GetNext
+          // calls, in addition to the Status of the map function invocations.
+          //
+          // In the case where multiple non-OK statuses are encountered, we
+          // return the first one encountered.
+          Status status;
+          // In order to return the correct error status, we keep track of the
+          // error_index.
+          size_t error_index;
+          // The batch_size input tuples (or fewer in the case of the last
+          // batch).
+          // TODO(saeta): Avoid re-allocating vectors all the time!
+          std::vector<std::vector<Tensor>> inputs;
+          std::vector<Tensor> outputs;
+          size_t next_input_to_process;
+          size_t num_outputs_complete;
+
+          Batch() { Reset(); }
+
+          // Resets the Batch state (e.g. after consuming the outputs).
+          void Reset() {
+            state = BatchState::kEmpty;
+            status = Status::OK();
+            inputs.clear();
+            inputs.shrink_to_fit();
+            outputs.clear();
+            outputs.shrink_to_fit();
+            next_input_to_process = 0;
+            num_outputs_complete = 0;
+            error_index = -1;
+          }
+        };
+
+        Iterator* itr_;  // Not owned.
+        mutex mu_;
+        Batch batches_[kWindowSize] GUARDED_BY(mu_);
+        size_t next_input_batch_ GUARDED_BY(mu_) = -1;
+        size_t next_input_ GUARDED_BY(mu_) = 0;
+        size_t next_output_ GUARDED_BY(mu_) = 0;
+        bool cancelled_ GUARDED_BY(mu_) = false;
+        bool reached_eof_ GUARDED_BY(mu_) = false;
+
+        // The runner thread waits on this condition variable for space to be
+        // available. When the client thread takes a value out of the circular
+        // buffer, it notifies this condition variable that space is now
+        // available.
+        condition_variable runner_cond_var_ GUARDED_BY(mu_);
+        // The worker threads wait on this condition variable for available
+        // inputs. When the runner thread makes new inputs available, it
+        // notifies this condition variable.
+        condition_variable worker_cond_var_ GUARDED_BY(mu_);
+        // The client threads wait on this condition variable for avaiable
+        // batched outputs. When worker threads complete a batch, they notify
+        // this condition variable.
+        condition_variable client_cond_var_ GUARDED_BY(mu_);
+      };
+      // Mark NumaBlockManager as a friend of Iterator in order to call
+      // protected Iterator methods during checkpointing.
+      friend NumaBlockManager;
+
+      struct NumaWorkerBlock {
+        NumaBlockManager manager;
+        // TODO(saeta): Migrate to BackgroundWorker.
+        std::vector<std::unique_ptr<Thread>> threads;
+
+        explicit NumaWorkerBlock(Iterator* itr) : manager(itr) {}
+      };
+
+      static void CustomNumaWorkerBlockDeleter(NumaWorkerBlock* ptr) {
+        ptr->~NumaWorkerBlock();
+        port::NUMAFree(ptr, sizeof(NumaWorkerBlock));
+      }
+      static void DefaultNumaWorkerBlockDeleter(NumaWorkerBlock* ptr) {
+        delete ptr;
+      }
+
+      static Status CopyPartialBatch(Tensor* output, const Tensor& value,
+                                     int64 num_elements) {
+        switch (value.dtype()) {
+#define HANDLE_TYPE(type)                                         \
+  case DataTypeToEnum<type>::value: {                             \
+    auto output_t = output->flat_outer_dims<type>();              \
+    auto value_t = value.flat_outer_dims<type>();                 \
+    for (size_t i = 0; i < num_elements; i++) {                   \
+      output_t.template chip<0>(i) = value_t.template chip<0>(i); \
+    }                                                             \
+    return Status::OK();                                          \
+  }
+          TF_CALL_DATASET_TYPES(HANDLE_TYPE);
+#undef HANDLE_TYPE
+          default:
+            return errors::InvalidArgument("Unsupported data type: ",
+                                           DataTypeString(value.dtype()));
+        }
+        return Status::OK();
+      }
+
+      Status EnsureBackgroundThreadsStarted(IteratorContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+        if (curr_num_parallel_calls_ >= num_parallel_calls_->value) {
+          // All necessary threads have been started.
+          curr_num_parallel_calls_ = num_parallel_calls_->value;
+          return Status::OK();
+        }
+
+        VLOG(4) << "Starting workers";
+        bool numa_enabled = port::NUMAEnabled();
+
+        if (!numa_enabled) {
+          LOG(INFO) << "NUMA not enabled on this host.";
+        }
+
+        int num_numa_nodes = port::NUMANumNodes();
+        if (num_numa_nodes < 1) {
+          return errors::Internal("The number of NUMA nodes is invalid: ",
+                                  num_numa_nodes);
+        }
+
+        // Only resize when empty to support restoring from checkpoints.
+        if (workers_.empty()) {
+          VLOG(3) << "# NUMA Nodes: " << num_numa_nodes
+                  << ", # Parallel Calls: " << num_parallel_calls_->value;
+          workers_.resize(num_numa_nodes);
+        } else {
+          num_numa_nodes = workers_.size();
+        }
+
+        // Round up num_parallel_calls, with a minimum of 1.
+        const size_t num_threads_per_block =
+            std::max(1LL, (num_parallel_calls_->value + num_numa_nodes - 1) /
+                              num_numa_nodes);
+
+        VLOG(3) << "Starting " << num_threads_per_block * num_numa_nodes
+                << " worker threads, with " << num_threads_per_block
+                << " threads per block.";
+
+        // Only allocate new_ctx if required.
+        std::shared_ptr<IteratorContext> new_ctx;
+
+        for (int i = 0; i < num_numa_nodes; ++i) {
+          if (!workers_[i]) {
+            if (numa_enabled) {
+              // Allocate in appropriate NUMA domain.
+              // 4k page align.
+              void* ptr = port::NUMAMalloc(i, sizeof(NumaWorkerBlock), 0);
+              if (ptr != nullptr) {
+                NumaWorkerBlock* block = new (ptr) NumaWorkerBlock(this);
+                workers_[i] =
+                    std::unique_ptr<NumaWorkerBlock,
+                                    std::function<void(NumaWorkerBlock*)>>(
+                        block, CustomNumaWorkerBlockDeleter);
+              } else {
+                LOG(ERROR) << "Could not NUMA-allocate worker block: " << i;
+              }
+            }
+            // If the NUMA allocation fails, or NUMA is not enabled.
+            if (!workers_[i]) {
+              workers_[i] =
+                  std::unique_ptr<NumaWorkerBlock,
+                                  std::function<void(NumaWorkerBlock*)>>(
+                      new NumaWorkerBlock(this), DefaultNumaWorkerBlockDeleter);
+            }
+          }
+          // Be sure to start threads if num_parallel_calls_ has changed.
+          for (size_t j = workers_[i]->threads.size();
+               j < num_threads_per_block; ++j) {
+            VLOG(3) << "Starting worker " << i << ", " << j;
+            if (!new_ctx) {
+              new_ctx = std::make_shared<IteratorContext>(*ctx);
+            }
+            workers_[i]->threads.emplace_back(ctx->env()->StartThread(
+                {},
+                strings::StrCat("numa_map_and_batch_block_", i, "_thread_", j),
+                [this, new_ctx, i, j]() { WorkerThread(new_ctx, i, j); }));
+            VLOG(3) << "Worker " << i << ", " << j << " successfully started.";
+          }
+        }
+        if (!runner_thread_) {
+          if (!new_ctx) {
+            new_ctx = std::make_shared<IteratorContext>(*ctx);
+          }
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "numa_map_runner_thread",
+              [this, new_ctx] { RunnerThread(new_ctx); }));
+        }
+        VLOG(3) << "All workers & runner thread started.";
+        return Status::OK();
+      }
+
+      void AllocateOutput(IteratorContext* ctx, size_t batch_size,
+                          const std::vector<Tensor>& map_fn_outputs,
+                          std::vector<Tensor>* batch_outputs) {
+        DCHECK(dataset()->output_dtypes().size() ==
+               dataset()->output_shapes().size());
+        DCHECK(map_fn_outputs.size() == dataset()->output_dtypes().size());
+        for (size_t i = 0; i < dataset()->output_dtypes().size(); ++i) {
+          TensorShape component_shape({static_cast<uint32>(batch_size)});
+          component_shape.AppendShape(map_fn_outputs.at(i).shape());
+          AllocatorAttributes attr;
+          attr.set_gpu_compatible(true);
+          Tensor component(ctx->allocator(attr), map_fn_outputs.at(i).dtype(),
+                           component_shape);
+          batch_outputs->emplace_back(std::move(component));
+        }
+      }
+
+      void RunnerThread(std::shared_ptr<IteratorContext> ctx)
+          LOCKS_EXCLUDED(mu_) {
+        RecordStart(ctx.get());
+        auto cleanup = gtl::MakeCleanup([this, &ctx] {
+          // Set end of input on all the managers in order to clean up in an
+          // orderly fashion.
+          VLOG(3) << "Setting End of Input on workers_[*]->manager";
+          for (size_t i = 0; i < workers_.size(); ++i) {
+            workers_[i]->manager.SetEndOfInput();
+          }
+          RecordStop(ctx.get());
+        });
+
+        const size_t num_blocks = workers_.size();
+
+        while (true) {
+          for (size_t block = 0; block < num_blocks; ++block) {
+            VLOG(4) << "RunnerThread waiting for input space in block: "
+                    << block;
+            if (TF_PREDICT_FALSE(
+                    !workers_[block]->manager.WaitForInputSpace(ctx.get()))) {
+              VLOG(3) << "RunnerThread exiting due to cancellation.";
+              return;
+            }
+            VLOG(4) << "RunnerThread has space; pulling on upstream for block "
+                    << block;
+
+            Status s;
+            std::vector<std::vector<Tensor>> inputs;
+            bool end_of_sequence = false;
+            for (size_t i = 0; i < dataset()->batch_size_; ++i) {
+              std::vector<Tensor> tuple;
+              s.Update(
+                  input_impl_->GetNext(ctx.get(), &tuple, &end_of_sequence));
+              if (!s.ok()) {
+                break;
+              }
+              if (end_of_sequence) {
+                VLOG(4) << "Runner thread encountered end of sequence.";
+                if (dataset()->drop_remainder_) {
+                  return;
+                }
+                break;
+              }
+              inputs.push_back(std::move(tuple));
+            }
+
+            VLOG(4) << "Moving inputs to block " << block
+                    << ", which has size: " << inputs.size();
+            if (!s.ok() || !inputs.empty()) {
+              workers_[block]->manager.PushInputs(s, std::move(inputs));
+              VLOG(4) << "Inputs moved into block " << block;
+            }
+            if (end_of_sequence) {
+              return;
+            }
+          }
+        }
+      }
+
+      void WorkerThread(std::shared_ptr<IteratorContext> ctx,
+                        const int numa_node, const int thread_num) {
+        RecordStart(ctx.get());
+        WORKER_VLOG(3) << "started.";
+        auto stop_cleanup =
+            gtl::MakeCleanup([this, numa_node, thread_num, &ctx]() {
+              RecordStop(ctx.get());
+              WORKER_VLOG(3) << "exiting.";
+            });
+
+        NumaWorkerBlock* block = workers_[numa_node].get();
+        port::NUMASetThreadNodeAffinity(numa_node);
+        const int num_numa_nodes = port::NUMANumNodes();
+        const int minimum_num_parallel_calls = thread_num * num_numa_nodes;
+
+        while (true) {
+          // Put threads to sleep based on autotuner.
+          {
+            mutex_lock l(*mu_);
+            while (minimum_num_parallel_calls >= num_parallel_calls_->value &&
+                   !cancelled_) {
+              RecordStop(ctx.get());
+              autotune_cond_var_->wait(l);
+              RecordStart(ctx.get());
+            }
+            if (cancelled_) {
+              return;
+            }
+          }
+
+          std::vector<Tensor> input;
+          uint64 index = 0;
+          size_t sequence_number = 0;
+          WORKER_VLOG(4) << "retrieving input.";
+          {
+            tracing::ScopedActivity trace(
+                "NumaMapAndBatch::Iterator::Worker::RetrieveInput");
+            if (!block->manager.RetrieveInput(ctx.get(), &input, &index,
+                                              &sequence_number)) {
+              return;
+            }
+          }
+
+          WORKER_VLOG(4) << "retrieved input; index: " << index
+                         << ", sequence_number: " << sequence_number;
+
+          std::vector<Tensor> return_values;
+          Status s;
+          {
+            tracing::ScopedActivity trace(
+                "NumaMapAndBatch::Iterator::Worker::FunctionExecution");
+            s = dataset()->captured_func_->Run(ctx.get(), std::move(input),
+                                               &return_values);
+          }
+          WORKER_VLOG(4) << "ran function for index: " << index
+                         << ", sequence_number: " << sequence_number;
+
+          if (s.ok()) {
+            std::vector<Tensor>* output = block->manager.GetBatchTensors(
+                sequence_number,
+                [this, ctx, &return_values](size_t batch_size,
+                                            std::vector<Tensor>* output) {
+                  AllocateOutput(ctx.get(), batch_size, return_values, output);
+                });
+            WORKER_VLOG(4) << "copying tensors to batch output.";
+            {
+              tracing::ScopedActivity trace(
+                  "NumaMapAndBatch::Iterator::Worker::BatchCopy");
+              for (size_t i = 0; i < return_values.size() && s.ok(); ++i) {
+                Tensor& tensor = return_values.at(i);
+                Tensor* batch = &output->at(i);
+                if (tensor.NumElements() !=
+                    (batch->NumElements() / batch->dim_size(0))) {
+                  s.Update(errors::InvalidArgument(
+                      "Cannot add tensor to the batch: number of elements does "
+                      "not match. Shapes are: [tensor]: ",
+                      tensor.shape().DebugString(),
+                      ", [batch]: ", batch->shape().DebugString()));
+                  break;
+                }
+                s.Update(batch_util::CopyElementToSlice(std::move(tensor),
+                                                        batch, index));
+              }
+            }
+          }
+
+          block->manager.RecordBatchEntryComplete(sequence_number, index, s);
+          WORKER_VLOG(4) << "finished index: " << index
+                         << ", sequence_number: " << sequence_number;
+        }
+      }
+
+      // mu_ protects shared internal state and is used to coordinate between
+      // the auto-tuner, client threads, worker threads, and the runner thread.
+      const std::shared_ptr<mutex> mu_;
+      const std::shared_ptr<condition_variable> autotune_cond_var_;
+      // The maximum number of parallel calls (can be auto-tuned).
+      const std::shared_ptr<model::SharedState> num_parallel_calls_;
+
+      // Caches the last-seen value of num_parallel_calls_->value to
+      // short-circuit starting workers.
+      int64 curr_num_parallel_calls_ GUARDED_BY(*mu_) = 0;
+
+      std::unique_ptr<IteratorBase> input_impl_;
+      int64 cur_block_ GUARDED_BY(*mu_) = 0;
+      bool global_end_of_input_ GUARDED_BY(*mu_) = false;
+      bool cancelled_ GUARDED_BY(*mu_) = false;
+      std::vector<std::unique_ptr<NumaWorkerBlock,
+                                  std::function<void(NumaWorkerBlock*)>>>
+          workers_;  // Const after initialization.
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+    };
+
+    const DatasetBase* const input_;
+    const int64 batch_size_;
+    const int64 num_parallel_calls_;
+    const bool drop_remainder_;
+    const DataTypeVector output_types_;
+    const std::vector<PartialTensorShape> output_shapes_;
+    const NameAttrList func_;
+    const std::unique_ptr<CapturedFunction> captured_func_;
+  };
+
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+  NameAttrList func_;
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalNumaMapAndBatchDataset").Device(DEVICE_CPU),
+    NumaMapAndBatchDatasetOp);
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index f45a239793..bae56828dc 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -324,6 +324,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
      private:
+      // BatchResult encapsulates the output batch, as well as anciliary
+      // metadata required to execute the fused map-and-batch operation.
       struct BatchResult {
         explicit BatchResult(int64 batch_size) {
           end_of_input = false;
@@ -331,11 +333,23 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           num_elements = 0;
           output_allocated = false;
           status = Status::OK();
+          status_offset = -1;
         }
 
-        void UpdateStatus(const Status& s) {
-          mutex_lock l(mu);
-          status.Update(s);
+        // UpdateStatus updates the batch's aggregate Status.
+        //
+        // In order to ensure that exactly the first non-OK status is returned
+        // (required to make the behavior is observably identical to a
+        // sequential execution of map followed by batch), we must also keep
+        // track of the offset into the batch that produced `s`.
+        void UpdateStatus(const Status& s, int64 offset) {
+          if (TF_PREDICT_FALSE(!s.ok())) {
+            mutex_lock l(mu);
+            if (status.ok() || offset < status_offset) {
+              status = s;
+              status_offset = offset;
+            }
+          }
         }
 
         mutex mu;
@@ -344,6 +358,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         std::vector<Tensor> output;
         bool output_allocated GUARDED_BY(mu);
         Status status GUARDED_BY(mu);
+        int64 status_offset GUARDED_BY(mu);
         // Counts the number of outstanding calls for this batch.
         int64 num_calls;  // access guarded by owner's mutex
       };
@@ -379,7 +394,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         std::shared_ptr<std::vector<Tensor>> return_values =
             std::make_shared<std::vector<Tensor>>();
         auto done = [this, ctx, result, return_values, offset](Status status) {
-          result->UpdateStatus(status);
+          result->UpdateStatus(status, offset);
           if (status.ok()) {
             EnsureOutputAllocated(ctx, result, return_values);
             for (size_t i = 0; i < return_values->size(); ++i) {
@@ -389,11 +404,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                   (batch->NumElements() / batch->dim_size(0))) {
                 TensorShape batch_shape = batch->shape();
                 batch_shape.RemoveDim(0);
-                result->UpdateStatus(errors::InvalidArgument(
-                    "Cannot add tensor to the batch: number of elements does "
-                    "not match. Shapes are: [tensor]: ",
-                    tensor.shape().DebugString(),
-                    ", [batch]: ", batch_shape.DebugString()));
+                result->UpdateStatus(
+                    errors::InvalidArgument(
+                        "Cannot add tensor to the batch: number of elements "
+                        "does "
+                        "not match. Shapes are: [tensor]: ",
+                        tensor.shape().DebugString(),
+                        ", [batch]: ", batch_shape.DebugString()),
+                    offset);
                 break;
               }
               // TODO(mrry): Add a version of DoParallelConcat that allows us to
@@ -402,7 +420,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
               Status copy_status = ::tensorflow::functor::DoParallelConcat(
                   *dataset()->device_, tensor, offset, batch);
               if (!copy_status.ok()) {
-                result->UpdateStatus(copy_status);
+                result->UpdateStatus(copy_status, offset);
                 break;
               }
             }
diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc
index f6bd5dce26..bbbecc50f8 100644
--- a/tensorflow/core/ops/experimental_dataset_ops.cc
+++ b/tensorflow/core/ops/experimental_dataset_ops.cc
@@ -138,6 +138,32 @@ REGISTER_OP("ExperimentalAssertNextDataset")
       return shape_inference::ScalarShape(c);
     });
 
+REGISTER_OP("ExperimentalNumaMapAndBatchDataset")
+    .Input("input_dataset: variant")
+    .Input("other_arguments: Targuments")
+    .Input("batch_size: int64")
+    .Input("num_parallel_calls: int64")
+    .Input("drop_remainder: bool")
+    .Output("handle: variant")
+    .Attr("f: func")
+    .Attr("Targuments: list(type) >= 0")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      // Use index from the end to retrieve the Input shapes,
+      // so that to avoid guessing the length of "other_arguments".
+      // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars.
+      shape_inference::ShapeHandle unused;
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(c->num_inputs() - 3), 0, &unused));
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(c->num_inputs() - 2), 0, &unused));
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(c->num_inputs() - 1), 0, &unused));
+
+      return shape_inference::ScalarShape(c);
+    });
+
 REGISTER_OP("ExperimentalLMDBDataset")
     .Input("filenames: string")
     .Output("handle: variant")
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index d444c4082e..5ead6d1c75 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
@@ -38,12 +39,17 @@ from tensorflow.python.platform import test
 class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
-      ("Default", None, None),
-      ("SequentialCalls", 1, None),
-      ("ParallelCalls", 2, None),
-      ("ParallelBatches", None, 10),
+      ("Default", None, None, False),
+      ("SequentialCalls", 1, None, False),
+      ("ParallelCalls", 2, None, False),
+      ("ParallelBatches", None, 10, False),
+      ("DefaultNUMA", None, None, True),
+      ("SequentialCallsNUMA", 1, None, True),
+      ("ParallelCallsNUMA", 2, None, True),
+      ("ParallelBatchesNUMA", None, 10, True),
   )
-  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches):
+  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches,
+                      numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
     # The pipeline is TensorSliceDataset ->
     # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size).
@@ -57,14 +63,20 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
 
-    iterator = (
+    dataset = (
         dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
             batching.map_and_batch(
                 map_func=_map_fn,
                 batch_size=batch_size,
                 num_parallel_calls=num_parallel_calls,
-                num_parallel_batches=num_parallel_batches))
-        .make_initializable_iterator())
+                num_parallel_batches=num_parallel_batches)))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+
+    iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -115,16 +127,25 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         sess.run(init_op, feed_dict={count: 14, batch_size: 0})
 
   @parameterized.named_parameters(
-      ("Even", False),
-      ("Uneven", True),
+      ("Even", False, False),
+      ("Uneven", True, False),
+      ("EvenNUMA", False, True),
+      ("UnevenNUMA", True, True),
   )
-  def testMapAndBatchPartialBatch(self, drop_remainder):
-    iterator = (
+  def testMapAndBatchPartialBatch(self, drop_remainder, numa_aware):
+    dataset = (
         dataset_ops.Dataset.range(10).apply(
             batching.map_and_batch(
                 lambda x: array_ops.reshape(x * x, [1]),
                 batch_size=4,
-                drop_remainder=drop_remainder)).make_one_shot_iterator())
+                drop_remainder=drop_remainder)))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+
     if drop_remainder:
       self.assertEqual([4, 1], iterator.output_shapes.as_list())
     else:
@@ -138,11 +159,21 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
-  def testMapAndBatchYieldsPartialBatch(self):
-    iterator = (dataset_ops.Dataset.range(10)
-                .apply(batching.map_and_batch(
-                    lambda x: array_ops.reshape(x * x, [1]), 4))
-                .make_one_shot_iterator())
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchYieldsPartialBatch(self, numa_aware):
+    dataset = (
+        dataset_ops.Dataset.range(10).apply(
+            batching.map_and_batch(lambda x: array_ops.reshape(x * x, [1]), 4)))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+
+    iterator = dataset.make_one_shot_iterator()
     self.assertEqual([None, 1], iterator.output_shapes.as_list())
     next_element = iterator.get_next()
     with self.cached_session() as sess:
@@ -152,10 +183,19 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
-  def testMapAndBatchParallelGetNext(self):
-    iterator = (dataset_ops.Dataset.range(50000)
-                .apply(batching.map_and_batch(lambda x: x, batch_size=100))
-                .make_one_shot_iterator())
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchParallelGetNext(self, numa_aware):
+    dataset = dataset_ops.Dataset.range(50000).apply(
+        batching.map_and_batch(lambda x: x, batch_size=100))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+
     elements = []
     for _ in range(100):
       elements.append(iterator.get_next())
@@ -165,17 +205,26 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         got.sort(key=lambda x: x[0])
         expected = []
         for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+          expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
         self.assertAllEqual(got, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(elements)
 
-  def testMapAndBatchParallelGetNextDropRemainder(self):
-    iterator = (
-        dataset_ops.Dataset.range(49999).apply(
-            batching.map_and_batch(
-                lambda x: x, batch_size=100, drop_remainder=True))
-        .make_one_shot_iterator())
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchParallelGetNextDropRemainder(self, numa_aware):
+    dataset = dataset_ops.Dataset.range(49999).apply(
+        batching.map_and_batch(
+            lambda x: x, batch_size=100, drop_remainder=True))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+
     elements = []
     for _ in range(100):
       elements.append(iterator.get_next())
@@ -185,19 +234,29 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         got.sort(key=lambda x: x[0])
         expected = []
         for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+          expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
         self.assertAllEqual(got, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(elements)
 
-  def testMapAndBatchSparse(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchSparse(self, numa_aware):
 
     def _sparse(i):
       return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
-    iterator = dataset_ops.Dataset.range(10).apply(
-        batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
+    dataset = dataset_ops.Dataset.range(10).apply(
+        batching.map_and_batch(_sparse, 5))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_initializable_iterator()
+
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -214,21 +273,33 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testMapAndBatchFails(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchFails(self, numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
     dataset = dataset_ops.Dataset.from_tensors(
         array_ops.check_numerics(
             constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
     batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
+    dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_initializable_iterator()
+
     init_op = iterator.initializer
     with self.cached_session() as sess:
       with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
         sess.run(init_op, feed_dict={batch_size: 14})
 
-  def testMapAndBatchShapeMismatch(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchShapeMismatch(self, numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
 
     def generator():
@@ -240,9 +311,13 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.from_generator(
         generator, output_types=dtypes.int32)
     batch_size = 4
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
+    dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_initializable_iterator()
+
     init_op = iterator.initializer
     get_next = iterator.get_next()
     with self.cached_session() as sess:
@@ -251,7 +326,11 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
                                    "number of elements does not match"):
         sess.run(get_next)
 
-  def testMapAndBatchImplicitDispose(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchImplicitDispose(self, numa_aware):
     # Tests whether a map and batch dataset will be cleaned up correctly when
     # the pipeline does not run it until exhaustion.
     # The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
@@ -266,6 +345,10 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
         1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
     dataset = dataset.prefetch(5)
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -274,26 +357,38 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         sess.run(get_next)
 
   @parameterized.named_parameters(
-      ("1", 0),
-      ("2", 5),
-      ("3", 10),
-      ("4", 90),
-      ("5", 95),
-      ("6", 99),
+      ("1", 0, False),
+      ("2", 5, False),
+      ("3", 10, False),
+      ("4", 90, False),
+      ("5", 95, False),
+      ("6", 99, False),
+      ("1NUMA", 0, True),
+      ("2NUMA", 5, True),
+      ("3NUMA", 10, True),
+      ("4NUMA", 90, True),
+      ("5NUMA", 95, True),
+      ("6NUMA", 99, True),
   )
-  def testMapAndBatchOutOfRangeError(self, threshold):
+  def testMapAndBatchOutOfRangeError(self, threshold, numa_aware):
 
     def raising_py_fn(i):
-      if i >= threshold:
+      if i == threshold:
         raise StopIteration()
+      elif i > threshold:
+        raise RuntimeError("Alternate error; you shouldn't see me! (i: %s)" % i)
       else:
         return i
 
-    iterator = (
-        dataset_ops.Dataset.range(100).apply(
-            batching.map_and_batch(
-                lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
-                batch_size=10)).make_one_shot_iterator())
+    dataset = dataset_ops.Dataset.range(100).apply(
+        batching.map_and_batch(
+            lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
+            batch_size=10))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
@@ -307,25 +402,42 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         sess.run(get_next)
 
   @parameterized.named_parameters(
-      ("1", False, dtypes.bool),
-      ("2", -42, dtypes.int8),
-      ("3", -42, dtypes.int16),
-      ("4", -42, dtypes.int32),
-      ("5", -42, dtypes.int64),
-      ("6", 42, dtypes.uint8),
-      ("7", 42, dtypes.uint16),
-      ("8", 42.0, dtypes.float16),
-      ("9", 42.0, dtypes.float32),
-      ("10", 42.0, dtypes.float64),
-      ("11", b"hello", dtypes.string),
+      ("1", False, dtypes.bool, False),
+      ("2", -42, dtypes.int8, False),
+      ("3", -42, dtypes.int16, False),
+      ("4", -42, dtypes.int32, False),
+      ("5", -42, dtypes.int64, False),
+      ("6", 42, dtypes.uint8, False),
+      ("7", 42, dtypes.uint16, False),
+      ("8", 42.0, dtypes.float16, False),
+      ("9", 42.0, dtypes.float32, False),
+      ("10", 42.0, dtypes.float64, False),
+      ("11", b"hello", dtypes.string, False),
+      ("1NUMA", False, dtypes.bool, True),
+      ("2NUMA", -42, dtypes.int8, True),
+      ("3NUMA", -42, dtypes.int16, True),
+      ("4NUMA", -42, dtypes.int32, True),
+      ("5NUMA", -42, dtypes.int64, True),
+      ("6NUMA", 42, dtypes.uint8, True),
+      ("7NUMA", 42, dtypes.uint16, True),
+      ("8NUMA", 42.0, dtypes.float16, True),
+      ("9NUMA", 42.0, dtypes.float32, True),
+      ("10NUMA", 42.0, dtypes.float64, True),
+      ("11NUMA", b"hello", dtypes.string, True),
   )
-  def testMapAndBatchTypes(self, element, dtype):
+  def testMapAndBatchTypes(self, element, dtype, numa_aware):
+
     def gen():
       yield element
 
     dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply(
         batching.map_and_batch(lambda x: x, batch_size=10))
 
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+
     get_next = dataset.make_one_shot_iterator().get_next()
 
     with self.cached_session() as sess:
@@ -363,6 +475,40 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(iterator.initializer, feed_dict={captured_t: 42})
       self.assertAllEqual([42] * 10, sess.run(get_next))
 
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchControlFlow(self, numa_aware):
+
+    def map_fn(x):
+      previous_cond_v2_value = control_flow_ops.ENABLE_COND_V2
+      control_flow_ops.ENABLE_COND_V2 = True
+      return_value = control_flow_ops.cond(x < 50, lambda: x + 1, lambda: x * x)
+      control_flow_ops.ENABLE_COND_V2 = previous_cond_v2_value
+      return return_value
+
+    dataset = dataset_ops.Dataset.range(100).apply(
+        batching.map_and_batch(map_fn, batch_size=10))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      for i in range(10):
+        print("Case %d" % i)
+        if i < 5:
+          self.assertAllEqual([i * 10 + j + 1 for j in range(10)],
+                              sess.run(get_next))
+        else:
+          self.assertAllEqual(
+              [((i * 10) + j) * ((i * 10) + j) for j in range(10)],
+              sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index c92bb8b9bc..5a0a73fd83 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -161,6 +161,7 @@ py_test(
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
@@ -199,6 +200,7 @@ py_test(
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
index 82516356df..d38255a6ea 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import time
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import batching
@@ -29,7 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class ModelDatasetTest(test_base.DatasetTestBase):
+class ModelDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testModelMap(self):
     k = 1024 * 1024
@@ -82,7 +83,11 @@ class ModelDatasetTest(test_base.DatasetTestBase):
           (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
            np.max(deltas)))
 
-  def testModelMapAndBatch(self):
+  @parameterized.named_parameters(
+      ("Default", False),
+      ("NUMA", True),
+  )
+  def testModelMapAndBatch(self, numa_aware):
     batch_size = 16
     k = 1024 * 1024
     dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
@@ -95,6 +100,8 @@ class ModelDatasetTest(test_base.DatasetTestBase):
             batch_size=batch_size))
     options = dataset_ops.Options()
     options.experimental_autotune = True
+    if numa_aware:
+      options.experimental_numa_aware = True
     iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
index 760cd8cc4e..2ef29796ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
@@ -59,6 +60,21 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testNumaAwareRewrite(self):
+    dataset = dataset_ops.Dataset.range(10).apply(
+        optimization.assert_next(["NumaMapAndBatch"])).apply(
+            batching.map_and_batch(lambda x: x * x, 10))
+    options = dataset_ops.Options()
+    options.experimental_numa_aware = True
+    dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      self.assertAllEqual([x * x for x in range(10)], sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
   def testOptimizationStatefulFunction(self):
     dataset = dataset_ops.Dataset.range(10).map(
         lambda _: random_ops.random_uniform([])).batch(10)
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index e556b65b7c..a97cff9fbb 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -306,6 +306,21 @@ py_test(
     ],
 )
 
+py_test(
+    name = "numa_map_and_batch_dataset_serialization_test",
+    size = "medium",
+    srcs = ["numa_map_and_batch_dataset_serialization_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":dataset_serialization_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 py_test(
     name = "map_dataset_serialization_test",
     size = "medium",
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py
new file mode 100644
index 0000000000..04aab329cd
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py
@@ -0,0 +1,95 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the MapAndBatchDataset serialization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class MapAndBatchDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def testNumParallelBatches(self):
+    range_size = 11
+    num_repeats = 2
+    batch_size = 5
+    total_outputs = range_size * num_repeats
+    num_outputs_drop_remainder = total_outputs // batch_size
+    num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size))
+    num_parallel_batches = 2
+
+    def build_ds(range_start, drop_remainder=False):
+
+      def _map_fn(x):
+        return math_ops.square(x)
+
+      ds = dataset_ops.Dataset.range(
+          range_start, range_start + range_size).repeat(num_repeats).apply(
+              batching.map_and_batch(
+                  map_func=_map_fn,
+                  batch_size=batch_size,
+                  num_parallel_batches=num_parallel_batches,
+                  drop_remainder=drop_remainder))
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      return ds.with_options(options)
+
+    self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15),
+                        num_outputs_keep_remainder)
+    self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True),
+                        num_outputs_drop_remainder)
+
+  def testNumParallelCalls(self):
+    range_size = 11
+    num_repeats = 2
+    batch_size = 5
+    total_outputs = range_size * num_repeats
+    num_outputs_drop_remainder = total_outputs // batch_size
+    num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size))
+    num_parallel_calls = 7
+
+    def build_ds(range_start, drop_remainder=False):
+
+      def _map_fn(x):
+        return math_ops.square(x)
+
+      ds = dataset_ops.Dataset.range(
+          range_start, range_start + range_size).repeat(num_repeats).apply(
+              batching.map_and_batch(
+                  map_func=_map_fn,
+                  batch_size=batch_size,
+                  num_parallel_calls=num_parallel_calls,
+                  drop_remainder=drop_remainder))
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      return ds.with_options(options)
+
+    self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15),
+                        num_outputs_keep_remainder)
+    self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True),
+                        num_outputs_drop_remainder)
+
+
+if __name__ == "__main__":
+  test.main()
+
diff --git a/tensorflow/python/data/experimental/ops/BUILD b/tensorflow/python/data/experimental/ops/BUILD
index 915d399f1b..46a9552b61 100644
--- a/tensorflow/python/data/experimental/ops/BUILD
+++ b/tensorflow/python/data/experimental/ops/BUILD
@@ -122,6 +122,7 @@ py_library(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dataset_ops_gen",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:tensor_shape",
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index cf52f7529a..6195747671 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1410,6 +1410,8 @@ class Options(object):
        "Whether to eliminate no-op transformations."),
       ("experimental_shuffle_and_repeat_fusion", bool,
        "Whether to fuse shuffle and repeat transformations."),
+      ("experimental_numa_aware", bool,
+       "Whether to use NUMA-aware operations."),
   ]:
 
     def _make_getter(name):  # pylint: disable=no-self-argument
@@ -1458,6 +1460,9 @@ class Options(object):
     for exp_opt in experimental_optimizations:
       if getattr(self, "experimental_" + exp_opt):
         result.append(exp_opt)
+
+    if getattr(self, "experimental_numa_aware"):
+      result.append("map_and_batch_numa_aware_replacement")
     return result
 
   def merge(self, options):
@@ -1485,7 +1490,7 @@ class Options(object):
           "experimental_map_and_filter_fusion", "experimental_map_fusion",
           "experimental_map_parallelization", "experimental_map_vectorization",
           "experimental_noop_elimination",
-          "experimental_shuffle_and_repeat_fusion"
+          "experimental_shuffle_and_repeat_fusion", "experimental_numa_aware",
       ]:
         this = getattr(result, name)
         that = getattr(other, name)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
index d15dccc173..22256996d3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
@@ -42,6 +42,10 @@ tf_class {
     name: "experimental_noop_elimination"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "experimental_numa_aware"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "experimental_shuffle_and_repeat_fusion"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
index d15dccc173..22256996d3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
@@ -42,6 +42,10 @@ tf_class {
     name: "experimental_noop_elimination"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "experimental_numa_aware"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "experimental_shuffle_and_repeat_fusion"
     mtype: "<type \'property\'>"
-- 
GitLab


From 8c2a52b26f21167ed0fcec7859850e38d0c216f9 Mon Sep 17 00:00:00 2001
From: Pavel Sountsov <siege@google.com>
Date: Tue, 9 Oct 2018 11:56:25 -0700
Subject: [PATCH 1310/1357] Silence tf.distributions deprecation messages
 caused by internal global function calls.

E.g. register_kl calls would trigger such warnings. This spam was exacerbated
by the fact that it happens before logging is initialized, so it is dumped
prominently to STDERR. Worse yet it also happened no matter whether the user
imported any symbols from tf.distributions or not as the relevant code is
executed when you import TensorFlow.

PiperOrigin-RevId: 216396036
---
 tensorflow/contrib/distributions/__init__.py  | 128 +++++++++---------
 .../python/ops/distributions/distributions.py |  35 ++---
 2 files changed, 85 insertions(+), 78 deletions(-)

diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 5cec93c4df..343eae3440 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -18,69 +18,73 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member
+from tensorflow.python.util import deprecation
 
-from tensorflow.contrib.distributions.python.ops import bijectors
-from tensorflow.contrib.distributions.python.ops.autoregressive import *
-from tensorflow.contrib.distributions.python.ops.batch_reshape import *
-from tensorflow.contrib.distributions.python.ops.binomial import *
-from tensorflow.contrib.distributions.python.ops.cauchy import *
-from tensorflow.contrib.distributions.python.ops.chi2 import *
-from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
-from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
-from tensorflow.contrib.distributions.python.ops.deterministic import *
-from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular
-from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse
-from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform
-from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp
-from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse
-from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag
-from tensorflow.contrib.distributions.python.ops.estimator import *
-from tensorflow.contrib.distributions.python.ops.geometric import *
-from tensorflow.contrib.distributions.python.ops.half_normal import *
-from tensorflow.contrib.distributions.python.ops.independent import *
-from tensorflow.contrib.distributions.python.ops.inverse_gamma import *
-from tensorflow.contrib.distributions.python.ops.kumaraswamy import *
-from tensorflow.contrib.distributions.python.ops.logistic import *
-from tensorflow.contrib.distributions.python.ops.mixture import *
-from tensorflow.contrib.distributions.python.ops.mixture_same_family import *
-from tensorflow.contrib.distributions.python.ops.moving_stats import *
-from tensorflow.contrib.distributions.python.ops.mvn_diag import *
-from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import *
-from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import *
-from tensorflow.contrib.distributions.python.ops.mvn_tril import *
-from tensorflow.contrib.distributions.python.ops.negative_binomial import *
-from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import *
-from tensorflow.contrib.distributions.python.ops.onehot_categorical import *
-from tensorflow.contrib.distributions.python.ops.poisson import *
-from tensorflow.contrib.distributions.python.ops.poisson_lognormal import *
-from tensorflow.contrib.distributions.python.ops.quantized_distribution import *
-from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import *
-from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import *
-from tensorflow.contrib.distributions.python.ops.sample_stats import *
-from tensorflow.contrib.distributions.python.ops.seed_stream import *
-from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import *
-from tensorflow.contrib.distributions.python.ops.test_util import *
-from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import *
-from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import *
-from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import *
-from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import *
-from tensorflow.contrib.distributions.python.ops.wishart import *
-from tensorflow.python.ops.distributions.bernoulli import *
-from tensorflow.python.ops.distributions.beta import *
-from tensorflow.python.ops.distributions.categorical import *
-from tensorflow.python.ops.distributions.dirichlet import *
-from tensorflow.python.ops.distributions.dirichlet_multinomial import *
-from tensorflow.python.ops.distributions.distribution import *
-from tensorflow.python.ops.distributions.exponential import *
-from tensorflow.python.ops.distributions.gamma import *
-from tensorflow.python.ops.distributions.kullback_leibler import *
-from tensorflow.python.ops.distributions.laplace import *
-from tensorflow.python.ops.distributions.multinomial import *
-from tensorflow.python.ops.distributions.normal import *
-from tensorflow.python.ops.distributions.student_t import *
-from tensorflow.python.ops.distributions.transformed_distribution import *
-from tensorflow.python.ops.distributions.uniform import *
+
+# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member,g-import-not-at-top
+
+with deprecation.silence():
+  from tensorflow.contrib.distributions.python.ops import bijectors
+  from tensorflow.contrib.distributions.python.ops.autoregressive import *
+  from tensorflow.contrib.distributions.python.ops.batch_reshape import *
+  from tensorflow.contrib.distributions.python.ops.binomial import *
+  from tensorflow.contrib.distributions.python.ops.cauchy import *
+  from tensorflow.contrib.distributions.python.ops.chi2 import *
+  from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
+  from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
+  from tensorflow.contrib.distributions.python.ops.deterministic import *
+  from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular
+  from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse
+  from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform
+  from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp
+  from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse
+  from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag
+  from tensorflow.contrib.distributions.python.ops.estimator import *
+  from tensorflow.contrib.distributions.python.ops.geometric import *
+  from tensorflow.contrib.distributions.python.ops.half_normal import *
+  from tensorflow.contrib.distributions.python.ops.independent import *
+  from tensorflow.contrib.distributions.python.ops.inverse_gamma import *
+  from tensorflow.contrib.distributions.python.ops.kumaraswamy import *
+  from tensorflow.contrib.distributions.python.ops.logistic import *
+  from tensorflow.contrib.distributions.python.ops.mixture import *
+  from tensorflow.contrib.distributions.python.ops.mixture_same_family import *
+  from tensorflow.contrib.distributions.python.ops.moving_stats import *
+  from tensorflow.contrib.distributions.python.ops.mvn_diag import *
+  from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import *
+  from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import *
+  from tensorflow.contrib.distributions.python.ops.mvn_tril import *
+  from tensorflow.contrib.distributions.python.ops.negative_binomial import *
+  from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import *
+  from tensorflow.contrib.distributions.python.ops.onehot_categorical import *
+  from tensorflow.contrib.distributions.python.ops.poisson import *
+  from tensorflow.contrib.distributions.python.ops.poisson_lognormal import *
+  from tensorflow.contrib.distributions.python.ops.quantized_distribution import *
+  from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import *
+  from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import *
+  from tensorflow.contrib.distributions.python.ops.sample_stats import *
+  from tensorflow.contrib.distributions.python.ops.seed_stream import *
+  from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import *
+  from tensorflow.contrib.distributions.python.ops.test_util import *
+  from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import *
+  from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import *
+  from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import *
+  from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import *
+  from tensorflow.contrib.distributions.python.ops.wishart import *
+  from tensorflow.python.ops.distributions.bernoulli import *
+  from tensorflow.python.ops.distributions.beta import *
+  from tensorflow.python.ops.distributions.categorical import *
+  from tensorflow.python.ops.distributions.dirichlet import *
+  from tensorflow.python.ops.distributions.dirichlet_multinomial import *
+  from tensorflow.python.ops.distributions.distribution import *
+  from tensorflow.python.ops.distributions.exponential import *
+  from tensorflow.python.ops.distributions.gamma import *
+  from tensorflow.python.ops.distributions.kullback_leibler import *
+  from tensorflow.python.ops.distributions.laplace import *
+  from tensorflow.python.ops.distributions.multinomial import *
+  from tensorflow.python.ops.distributions.normal import *
+  from tensorflow.python.ops.distributions.student_t import *
+  from tensorflow.python.ops.distributions.transformed_distribution import *
+  from tensorflow.python.ops.distributions.uniform import *
 
 # pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member
 
diff --git a/tensorflow/python/ops/distributions/distributions.py b/tensorflow/python/ops/distributions/distributions.py
index 59ed455e43..b18caa5b2e 100644
--- a/tensorflow/python/ops/distributions/distributions.py
+++ b/tensorflow/python/ops/distributions/distributions.py
@@ -17,21 +17,24 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.util import deprecation
 
-# pylint: disable=wildcard-import,unused-import
-from tensorflow.python.ops.distributions.bernoulli import Bernoulli
-from tensorflow.python.ops.distributions.beta import Beta
-from tensorflow.python.ops.distributions.categorical import Categorical
-from tensorflow.python.ops.distributions.dirichlet import Dirichlet
-from tensorflow.python.ops.distributions.dirichlet_multinomial import DirichletMultinomial
-from tensorflow.python.ops.distributions.distribution import *
-from tensorflow.python.ops.distributions.exponential import Exponential
-from tensorflow.python.ops.distributions.gamma import Gamma
-from tensorflow.python.ops.distributions.kullback_leibler import *
-from tensorflow.python.ops.distributions.laplace import Laplace
-from tensorflow.python.ops.distributions.multinomial import Multinomial
-from tensorflow.python.ops.distributions.normal import Normal
-from tensorflow.python.ops.distributions.student_t import StudentT
-from tensorflow.python.ops.distributions.uniform import Uniform
-# pylint: enable=wildcard-import,unused-import
 
+# pylint: disable=wildcard-import,unused-import,g-import-not-at-top
+with deprecation.silence():
+  from tensorflow.python.ops.distributions.bernoulli import Bernoulli
+  from tensorflow.python.ops.distributions.beta import Beta
+  from tensorflow.python.ops.distributions.categorical import Categorical
+  from tensorflow.python.ops.distributions.dirichlet import Dirichlet
+  from tensorflow.python.ops.distributions.dirichlet_multinomial import DirichletMultinomial
+  from tensorflow.python.ops.distributions.distribution import *
+  from tensorflow.python.ops.distributions.exponential import Exponential
+  from tensorflow.python.ops.distributions.gamma import Gamma
+  from tensorflow.python.ops.distributions.kullback_leibler import *
+  from tensorflow.python.ops.distributions.laplace import Laplace
+  from tensorflow.python.ops.distributions.multinomial import Multinomial
+  from tensorflow.python.ops.distributions.normal import Normal
+  from tensorflow.python.ops.distributions.student_t import StudentT
+  from tensorflow.python.ops.distributions.uniform import Uniform
+# pylint: enable=wildcard-import,unused-import
+del deprecation
-- 
GitLab


From 0c6baae5af46bb22ea52db724e2194845d3bbf8c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 12:24:05 -0700
Subject: [PATCH 1311/1357] Add RaggedTensors to tf.core. Moving the
 RaggedGather op kernel.

PiperOrigin-RevId: 216400726
---
 tensorflow/core/BUILD                         |  15 +
 .../base_api/api_def_RaggedGather.pbtxt       |  81 +++++
 tensorflow/core/kernels/BUILD                 |  31 ++
 tensorflow/core/kernels/ragged_gather_op.cc   | 292 ++++++++++++++++++
 .../core/kernels/ragged_gather_op_test.cc     | 281 +++++++++++++++++
 tensorflow/core/ops/ragged_array_ops.cc       |  85 +++++
 6 files changed, 785 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
 create mode 100644 tensorflow/core/kernels/ragged_gather_op.cc
 create mode 100644 tensorflow/core/kernels/ragged_gather_op_test.cc
 create mode 100644 tensorflow/core/ops/ragged_array_ops.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index acea8e2217..9e7806342a 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1154,6 +1154,19 @@ tf_gen_op_libs(
     ],
 )
 
+cc_library(
+    name = "ragged_ops",
+    deps = [
+        ":ragged_array_ops_op_lib",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = [
+        "ragged_array_ops",
+    ],
+)
+
 cc_library(
     name = "ops",
     visibility = ["//visibility:public"],
@@ -1187,6 +1200,7 @@ cc_library(
         ":nn_ops_op_lib",
         ":no_op_op_lib",
         ":parsing_ops_op_lib",
+        ":ragged_ops",
         ":random_ops_op_lib",
         ":remote_fused_graph_ops_op_lib",
         ":resource_variable_ops_op_lib",
@@ -1340,6 +1354,7 @@ cc_library(
         "//tensorflow/core/kernels:parameterized_truncated_normal_op",
         "//tensorflow/core/kernels:parsing",
         "//tensorflow/core/kernels:partitioned_function_ops",
+        "//tensorflow/core/kernels:ragged_ops",
         "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:random_poisson_op",
         "//tensorflow/core/kernels:remote_fused_graph_ops",
diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
new file mode 100644
index 0000000000..240c987dda
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
@@ -0,0 +1,81 @@
+op {
+  graph_op_name: "RaggedGather"
+  visibility: HIDDEN
+  in_arg {
+    name: "params_nested_splits"
+    description: <<END
+The `nested_row_splits` tensors that define the row-partitioning for the
+`params` RaggedTensor input.
+END
+  }
+  in_arg {
+    name: "params_dense_values"
+    description: <<END
+The `inner_values` for the `params` RaggedTensor. There was a terminology change
+at the python level from dense_values to inner_values, so dense_values is the
+deprecated name.
+END
+  }
+  in_arg {
+    name: "indices"
+    description: <<END
+Indices in the outermost dimension of `params` of the values that should be
+gathered.
+END
+  }
+  out_arg {
+    name: "output_nested_splits"
+    description: <<END
+The `nested_row_splits` tensors that define the row-partitioning for the
+returned RaggedTensor.
+END
+  }
+  out_arg {
+    name: "output_dense_values"
+    description: "The `inner_values` for the returned RaggedTensor."
+  }
+  attr {
+    name: "PARAMS_RAGGED_RANK"
+    description: <<END
+The ragged rank of the `params` RaggedTensor. `params_nested_splits` should
+contain this number of `row_splits` tensors. This value should equal
+`params.ragged_rank`.
+END
+  }
+  attr {
+    name: "OUTPUT_RAGGED_RANK"
+    description: <<END
+The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
+this number of `row_splits` tensors. This value should equal
+`indices.shape.ndims + params.ragged_rank - 1`.
+END
+  }
+  summary: <<END
+Gather ragged slices from `params` axis `0` according to `indices`.
+END
+  description: <<END
+Outputs a `RaggedTensor` output composed from `output_dense_values` and
+`output_nested_splits`, such that:
+
+```python
+output.shape = indices.shape + params.shape[1:]
+output.ragged_rank = indices.shape.ndims + params.ragged_rank
+output[i...j, d0...dn] = params[indices[i...j], d0...dn]
+```
+
+where
+
+* `params =
+   ragged.from_nested_row_splits(params_dense_values, params_nested_splits)`
+   provides the values that should be gathered.
+* `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which
+   values should be gathered.
+* `output =
+   ragged.from_nested_row_splits(output_dense_values, output_nested_splits)`
+   is the output tensor.
+
+(Note: This c++ op is used to implement the higher-level python
+`tf.ragged.gather` op, which also supports ragged indices.)
+
+END
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 3a920f26f3..1ca9c7b7f5 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -958,6 +958,37 @@ tf_kernel_library(
     ]) + ARRAY_DEPS,
 )
 
+cc_library(
+    name = "ragged_ops",
+    deps = [
+        ":ragged_gather_op",
+    ],
+)
+
+tf_kernel_library(
+    name = "ragged_gather_op",
+    srcs = ["ragged_gather_op.cc"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ragged_array_ops_op_lib",
+    ],
+)
+
+tf_cc_test(
+    name = "ragged_gather_op_test",
+    size = "small",
+    srcs = ["ragged_gather_op_test.cc"],
+    deps = [
+        ":ragged_gather_op",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ragged_array_ops_op_lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+    ],
+)
+
 tf_kernel_library(
     name = "cudnn_rnn_kernels",
     srcs = ["cudnn_rnn_ops.cc"],
diff --git a/tensorflow/core/kernels/ragged_gather_op.cc b/tensorflow/core/kernels/ragged_gather_op.cc
new file mode 100644
index 0000000000..b2a342f637
--- /dev/null
+++ b/tensorflow/core/kernels/ragged_gather_op.cc
@@ -0,0 +1,292 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/util/util.h"
+
+namespace tensorflow {
+
+namespace {
+
+// For each slice in `(start, limit)` in `value_slices`, append
+// `params_dense_values_in[start:limit] to `values_out`.  `value_size` indicates
+// the number of scalars contained in each value params_dense_values_in[i].
+template <typename VALUE_TYPE>
+void WriteValueSlices(const Tensor& params_dense_values_in,
+                      const std::vector<std::pair<int64, int64>>& value_slices,
+                      int64 value_size, Tensor* values_out) {
+  const auto& params_dense_values =
+      params_dense_values_in.flat_outer_dims<VALUE_TYPE, 2>();
+  auto values = values_out->flat_outer_dims<VALUE_TYPE, 2>();
+  int out_pos = 0;
+  for (const auto& slice : value_slices) {
+    for (int i = slice.first; i < slice.second; ++i) {
+      for (int j = 0; j < value_size; ++j) {
+        values(out_pos, j) = params_dense_values(i, j);
+      }
+      ++out_pos;
+    }
+  }
+}
+
+}  // namespace
+
+template <typename INDEX_TYPE>
+class RaggedGatherOpBase : public OpKernel {
+ public:
+  using OpKernel::OpKernel;
+
+  void Compute(OpKernelContext* context) override {
+    // Get the input Tensors.
+    OpInputList params_nested_splits_in;
+    OP_REQUIRES_OK(context, context->input_list("params_nested_splits",
+                                                &params_nested_splits_in));
+    const Tensor& params_dense_values_in =
+        context->input(params_nested_splits_in.size());
+    const Tensor& indices_in =
+        context->input(params_nested_splits_in.size() + 1);
+
+    DCHECK_GT(params_nested_splits_in.size(), 0);  // Enforced by REGISTER_OP.
+    int64 num_params = params_nested_splits_in[0].dim_size(0) - 1;
+    OP_REQUIRES_OK(context, ValidateIndices(indices_in, num_params));
+
+    OP_REQUIRES(context, params_dense_values_in.dims() > 0,
+                errors::InvalidArgument("params.rank must be nonzero"));
+    int64 num_params_dense_values = params_dense_values_in.dim_size(0);
+
+    // Calculate the `splits`, and store the value slices that we need to
+    // copy in `value_slices`.
+    std::vector<std::pair<int64, int64>> value_slices;
+    int64 num_values = 0;
+    std::vector<std::vector<int64>> out_splits;
+    OP_REQUIRES_OK(context, MakeSplits(indices_in, params_nested_splits_in,
+                                       num_params_dense_values, &out_splits,
+                                       &value_slices, &num_values));
+
+    // Write the output tensors.
+    OP_REQUIRES_OK(context, WriteSplits(out_splits, context));
+    OP_REQUIRES_OK(context,
+                   WriteValues(params_dense_values_in, value_slices,
+                               out_splits.size(), num_values, context));
+  }
+
+ private:
+  // Check if any indices are out-of-bounds.
+  ::tensorflow::Status ValidateIndices(const Tensor& indices_in,
+                                       int64 num_params) {
+    const auto& indices = indices_in.flat<INDEX_TYPE>();
+    for (int64 i = 0; i < indices.size(); ++i) {
+      int64 index = indices(i);
+      if (index < 0 || index >= num_params) {
+        return errors::InvalidArgument(
+            "indices", SliceDebugString(indices_in.shape(), i), " = ", index,
+            " is not in [0, ", num_params, ")");
+      }
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  // Construct the `splits` output tensors, encoded using a nested vector.
+  // Also find the slices of values that need to be copied, and store them
+  // in `value_slices`.  The total number of values that will be copied (which
+  // we need for allocating the output values tensor) is stored in `num_values`.
+  ::tensorflow::Status MakeSplits(
+      const Tensor& indices_in, const OpInputList& params_nested_splits_in,
+      int64 num_params_dense_values,
+      std::vector<std::vector<int64>>* out_splits,
+      std::vector<std::pair<int64, int64>>* value_slices, int64* num_values) {
+    *num_values = 0;
+    value_slices->clear();
+
+    int num_splits = indices_in.dims() - 1 + params_nested_splits_in.size();
+    out_splits->assign(num_splits, {0});
+
+    // Get Eigen tensors.
+    const auto& indices = indices_in.flat<INDEX_TYPE>();
+    std::vector<TTypes<int64>::ConstFlat> params_nested_splits;
+    params_nested_splits.reserve(params_nested_splits_in.size());
+    for (const auto& splits_in : params_nested_splits_in) {
+      params_nested_splits.push_back(splits_in.flat<int64>());
+    }
+
+    TF_RETURN_IF_ERROR(
+        ValidateSplits(params_nested_splits, num_params_dense_values));
+
+    // Add `splits` that come from all but the last dimension of the dense
+    // Tensor `indices`.  In particular, for each dimension D, we add a
+    // splits tensor whose values are:
+    //   range(splits.shape[D]*splits.shape[D+1] + 1, step=splits.shape[D+1])
+    // E.g., if indices.shape=[5, 3] then we will add a splits tensor
+    // [0, 3, 6, 9, 12, 15], since the outermost dimension has 5 elements,
+    // each of which contains 3 values.
+    for (int dim = 0; dim < indices_in.dims() - 1; ++dim) {
+      int stride = indices_in.dim_size(dim + 1);
+      int index = stride;
+      for (int i = 0; i < indices_in.dim_size(dim); ++i) {
+        out_splits->at(dim).push_back(index);
+        index += stride;
+      }
+    }
+
+    // Add `splits` that come from `params_nested_splits`.  Starting with the
+    // outermost ragged dimension (i.e., the first `splits` tensor), we work
+    // our way in, finding the range of values that should be copied.  As we
+    // go, we update the output `splits` for each dimension with the appropriate
+    // values.  In particular, the *lengths* of the slices from `param_splits`
+    // should be copied to generate corresponding slice lengths in the output
+    // splits.  E.g., if we are copying a ragged row with length 4, then we
+    // should add a new split point to out_splits that is 4 greater than the
+    // previous split point in out_splits.
+    for (int i = 0; i < indices.size(); ++i) {
+      int start = indices(i);
+      int limit = indices(i) + 1;
+
+      // Copy splits.
+      for (int dim = 0; dim < params_nested_splits.size(); ++dim) {
+        const auto& splits = params_nested_splits[dim];
+        int out_dim = dim + indices_in.dims() - 1;
+        if (out_dim >= 0) {
+          int64 delta = out_splits->at(out_dim).back() - splits(start);
+          for (int j = start; j < limit; ++j) {
+            out_splits->at(out_dim).push_back(splits(j + 1) + delta);
+          }
+        }
+        start = splits(start);
+        limit = splits(limit);
+      }
+      if (limit != start) {
+        value_slices->emplace_back(start, limit);
+        *num_values += limit - start;
+      }
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  ::tensorflow::Status ValidateSplits(
+      const std::vector<TTypes<int64>::ConstFlat>& params_nested_splits,
+      int64 num_params_dense_values) {
+    // Validate
+    for (int dim = 0; dim < params_nested_splits.size(); ++dim) {
+      const auto& splits = params_nested_splits[dim];
+      int64 last_split = (dim == params_nested_splits.size() - 1)
+                             ? num_params_dense_values
+                             : params_nested_splits[dim + 1].size();
+      if (splits.size() == 0) {
+        return errors::InvalidArgument("Ragged splits may not be empty");
+      }
+      if (splits(0) < 0) {
+        return errors::InvalidArgument("Ragged splits must be non-negative");
+      }
+      if (splits(splits.size() - 1) > last_split) {
+        return errors::InvalidArgument(
+            "Ragged splits must not point past values");
+      }
+      for (int i = 1; i < splits.size(); ++i) {
+        if (splits(i - 1) > splits(i)) {
+          return errors::InvalidArgument("Ragged splits must be sorted");
+        }
+      }
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  ::tensorflow::Status WriteSplits(
+      const std::vector<std::vector<int64>>& out_splits,
+      OpKernelContext* context) {
+    OpOutputList splits_out;
+    TF_RETURN_IF_ERROR(
+        context->output_list("output_nested_splits", &splits_out));
+    for (int i = 0; i < out_splits.size(); ++i) {
+      Tensor* splits;
+      int64 num_splits = out_splits[i].size();
+      TF_RETURN_IF_ERROR(
+          splits_out.allocate(i, TensorShape({num_splits}), &splits));
+      auto splits_flat = splits->flat<int64>();
+      std::copy_n(out_splits[i].data(), out_splits[i].size(),
+                  splits_flat.data());
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  ::tensorflow::Status WriteValues(
+      const Tensor& params_dense_values_in,
+      const std::vector<std::pair<int64, int64>>& value_slices,
+      int values_index, int64 num_values, OpKernelContext* context) const {
+    Tensor* values_out = nullptr;
+    TensorShape values_shape = params_dense_values_in.shape();
+    values_shape.set_dim(0, num_values);
+    TF_RETURN_IF_ERROR(
+        context->allocate_output(values_index, values_shape, &values_out));
+    int64 value_size = params_dense_values_in.NumElements() /
+                       params_dense_values_in.dim_size(0);
+    CallWriteValueSlices(params_dense_values_in, value_slices, value_size,
+                         values_out);
+    return ::tensorflow::Status::OK();
+  }
+
+ protected:
+  // Call WriteValueSlices() using the appropriate VALUE_TYPE template
+  // parameter.  This pattern is used to reduce binary size.  In particular,
+  // this allows us to have two instantiations of this class (one for each
+  // index type), rather than 14 (one for each index type and value type),
+  // which cuts the binary size of this op from ~300k to <90k.
+  virtual void CallWriteValueSlices(
+      const Tensor& params_dense_values_in,
+      const std::vector<std::pair<int64, int64>>& value_slices,
+      int64 value_size, Tensor* values_out) const = 0;
+};
+
+template <typename INDEX_TYPE, typename VALUE_TYPE>
+class RaggedGatherOp : public RaggedGatherOpBase<INDEX_TYPE> {
+ public:
+  using RaggedGatherOpBase<INDEX_TYPE>::RaggedGatherOpBase;
+
+ private:
+  void CallWriteValueSlices(
+      const Tensor& params_dense_values_in,
+      const std::vector<std::pair<int64, int64>>& value_slices,
+      int64 value_size, Tensor* values_out) const override {
+    WriteValueSlices<VALUE_TYPE>(params_dense_values_in, value_slices,
+                                 value_size, values_out);
+  }
+};
+
+#define REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(index_type, value_type)   \
+  REGISTER_KERNEL_BUILDER(Name("RaggedGather")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<index_type>("Tindices") \
+                              .TypeConstraint<value_type>("Tvalues"), \
+                          RaggedGatherOp<index_type, value_type>);
+#define REGISTER_CPU_KERNEL(value_type)                  \
+  REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(int32, value_type) \
+  REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(int64, value_type)
+TF_CALL_POD_TYPES(REGISTER_CPU_KERNEL);
+TF_CALL_string(REGISTER_CPU_KERNEL);
+TF_CALL_QUANTIZED_TYPES(REGISTER_CPU_KERNEL);
+TF_CALL_quint16(REGISTER_CPU_KERNEL);
+TF_CALL_qint16(REGISTER_CPU_KERNEL);
+TF_CALL_uint32(REGISTER_CPU_KERNEL);
+TF_CALL_uint64(REGISTER_CPU_KERNEL);
+#undef REGISTER_CPU_KERNEL
+#undef REGISTER_CPU_KERNEL_WITH_INDEX_TYPE
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/ragged_gather_op_test.cc b/tensorflow/core/kernels/ragged_gather_op_test.cc
new file mode 100644
index 0000000000..47be788151
--- /dev/null
+++ b/tensorflow/core/kernels/ragged_gather_op_test.cc
@@ -0,0 +1,281 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/shape_inference_testutil.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+class RaggedGatherOpTest : public ::tensorflow::OpsTestBase {
+ protected:
+  // Builds the tensorflow test graph for RaggedGather.
+  template <typename VALUE_TYPE, typename INDEX_TYPE>
+  void BuildRaggedGatherGraph(
+      const TensorShape& indices_shape, const std::vector<INDEX_TYPE>& indices,
+      const std::vector<std::vector<int64>>& params_nested_splits,
+      const TensorShape& params_dense_values_shape,
+      const gtl::ArraySlice<VALUE_TYPE> params_dense_values) {
+    const auto& value_dtype = DataTypeToEnum<VALUE_TYPE>::v();
+    const auto& index_dtype = DataTypeToEnum<INDEX_TYPE>::v();
+    int64 PARAMS_RAGGED_RANK = params_nested_splits.size();
+    int64 num_splits = PARAMS_RAGGED_RANK + indices_shape.dims() - 1;
+    TF_ASSERT_OK(
+        NodeDefBuilder("tested_op", "RaggedGather")
+            .Input(FakeInput(PARAMS_RAGGED_RANK))  // params_nested_splits
+            .Input(FakeInput(value_dtype))         // params_dense_values
+            .Input(FakeInput(index_dtype))         // indices
+            .Attr("PARAMS_RAGGED_RANK", PARAMS_RAGGED_RANK)
+            .Attr("OUTPUT_RAGGED_RANK", num_splits)
+            .Attr("Tvalues", value_dtype)
+            .Attr("Tindices", index_dtype)
+            .Finalize(node_def()));
+    TF_ASSERT_OK(InitOp());
+    for (const auto& splits : params_nested_splits) {
+      int64 splits_size = splits.size();
+      AddInputFromArray<int64>(TensorShape({splits_size}), splits);
+    }
+    AddInputFromArray<VALUE_TYPE>(params_dense_values_shape,
+                                  params_dense_values);
+    AddInputFromArray<INDEX_TYPE>(indices_shape, indices);
+  }
+};
+
+TEST_F(RaggedGatherOpTest, RaggedGather) {
+  // indices = [2, 1, 0, 3]
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  // params.shape = [4, None]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({4}),                     // indices.shape
+      {2, 1, 0, 3},                         // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[.4, .5, .6, .7], [.1, .2, .3], [], [.8, .9]]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 4, 4, 7, 9}));
+  test::ExpectTensorNear<float>(
+      *GetOutput(1),
+      test::AsTensor<float>({.4, .5, .6, .7, .1, .2, .3, .8, .9}), 0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_3DParams) {
+  // indices = [2, 1, 0, 2, 3]
+  // params = [[[]], [[.1, 2], [.3]], [], [[.4, .5], [.6, .7, .8]], [[.9]]]
+  // params.shape = [5, None, None]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({5}),                             // indices.shape
+      {2, 1, 0, 2, 3},                              // indices
+      {{0, 1, 3, 3, 5, 6}, {0, 0, 2, 3, 5, 8, 9}},  // params_nested_splits
+      TensorShape({9}),                             // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}          // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[], [[.1, 2], [.3]], [[]], [], [[.4, .5], [.6, .7, .8]]]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 0, 2, 3, 3, 5}));
+  test::ExpectTensorEqual<int64>(*GetOutput(1),
+                                 test::AsTensor<int64>({0, 2, 3, 3, 5, 8}));
+  test::ExpectTensorNear<float>(
+      *GetOutput(2), test::AsTensor<float>({.1, .2, .3, .4, .5, .6, .7, .8}),
+      0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_4DParams) {
+  // indices = [2, 1, 0, 2]
+  // params = [[[]], [[[1, 2], [3, 4], [5, 6]], [[7, 8]]], []]
+  // params.shape = [4, None, None, 2]
+  BuildRaggedGatherGraph<int32, int32>(
+      TensorShape({4}),              // indices.shape
+      {2, 1, 0, 2},                  // indices
+      {{0, 1, 3, 3}, {0, 0, 3, 4}},  // params_nested_splits
+      TensorShape({4, 2}),           // params_dense_values.shape
+      {1, 2, 3, 4, 5, 6, 7, 8}       // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[],
+  //            [[[1, 2], [3, 4], [5, 6]], [[7, 8]]],
+  //            [[]],
+  //            []]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 0, 2, 3, 3}));
+  test::ExpectTensorEqual<int64>(*GetOutput(1),
+                                 test::AsTensor<int64>({0, 3, 4, 4}));
+  test::ExpectTensorEqual<int32>(
+      *GetOutput(2),
+      test::AsTensor<int32>({1, 2, 3, 4, 5, 6, 7, 8}, TensorShape({4, 2})));
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_2DIndices) {
+  // indices = [[2, 1], [0, 3]]
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2, 2}),                  // indices.shape
+      {2, 1, 0, 3},                         // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [ [ [.4, .5, .6, .7], [.1, .2, .3] ],
+  //             [ [],               [.8, .9]     ] ]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 2, 4}));
+  test::ExpectTensorEqual<int64>(*GetOutput(1),
+                                 test::AsTensor<int64>({0, 4, 4, 7, 9}));
+  test::ExpectTensorNear<float>(
+      *GetOutput(2),
+      test::AsTensor<float>({.4, .5, .6, .7, .1, .2, .3, .8, .9}), 0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_ScalarIndices) {
+  // indices = 2
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({}),                      // indices.shape
+      {2},                                  // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [.4, .5, .6, .7]
+  test::ExpectTensorNear<float>(*GetOutput(0),
+                                test::AsTensor<float>({.4, .5, .6, .7}), 0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_OutOfBounds) {
+  // indices = [2, 10]
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {2, 10},                              // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("indices[1] = 10 is not in [0, 4)", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsNotSorted) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {0, 2},                               // indices
+      {{0, 3, 5, 2, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits must be sorted", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsNegative) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {0, 2},                               // indices
+      {{-1, 3, 2, 7, 9}},                   // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits must be non-negative",
+            RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsEmpty) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({0}),  // indices.shape
+      {},                // indices
+      {{}},              // params_nested_splits
+      TensorShape({0}),  // params_dense_values.shape
+      {}                 // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits may not be empty", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsTooBig) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {0, 2},                               // indices
+      {{0, 20, 40, 80, 100}},               // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits must not point past values",
+            RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, BadValuesShape) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({0}),  // indices.shape
+      {},                // indices
+      {{0}},             // params_nested_splits
+      TensorShape({}),   // params_dense_values.shape
+      {.1}               // params_dense_values
+  );
+  EXPECT_EQ("params.rank must be nonzero", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, ShapeFn) {
+  // RaggedGather(param_splits+, param_values, indices) -> [splits+, values]
+  ShapeInferenceTestOp op("RaggedGather");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(1);
+  INFER_OK(op, "?;?;?", "[?];?");
+  INFER_OK(op, "[?];[?];[?]", "[?];[?]");
+  INFER_OK(op, "[?];[?,?,?];[?]", "[?];[?,d1_1,d1_2]");
+  INFER_OK(op, "[5];[10];[15]", "[?];[?]");
+  INFER_OK(op, "[5];[10,2];[15]", "[?];[?,d1_1]");
+  INFER_ERROR("Shape must be rank 1 but is rank 0", op, "[5];[];[]");
+  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[1,2];[];[5]");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(2);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(2);
+  INFER_OK(op, "?;?;?;?", "[?];[?];?");
+  INFER_OK(op, "[?];[?];[?];[?]", "[?];[?];[?]");
+  INFER_OK(op, "[?];[?];[?,?,?];[?]", "[?];[?];[?,d2_1,d2_2]");
+  INFER_OK(op, "[5];[10];[15];[20]", "[?];[?];[?]");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(2);
+  INFER_OK(op, "?;?;?", "[?];[?];?");
+  INFER_OK(op, "[?];[?];[?,?]", "[?];[?];[?]");
+  INFER_OK(op, "[?];[?,?,?];[?,?]", "[?];[?];[?,d1_1,d1_2]");
+  INFER_OK(op, "[15];[20];[5,10]", "[?];[?];[?]");
+  INFER_OK(op, "[15];[20,2];[5,10]", "[?];[?];[?,d1_1]");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(0);
+  INFER_OK(op, "[?];[?];[]", "[?]");
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/ragged_array_ops.cc b/tensorflow/core/ops/ragged_array_ops.cc
new file mode 100644
index 0000000000..4642579939
--- /dev/null
+++ b/tensorflow/core/ops/ragged_array_ops.cc
@@ -0,0 +1,85 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+Status RaggedGatherShapeFn(InferenceContext* c);
+
+//==============================================================================
+// Registered Ops
+//==============================================================================
+
+REGISTER_OP("RaggedGather")
+    .Input("params_nested_splits: PARAMS_RAGGED_RANK * int64")
+    .Input("params_dense_values: Tvalues")
+    .Input("indices: Tindices")
+    .Output("output_nested_splits: OUTPUT_RAGGED_RANK * int64")
+    .Output("output_dense_values: Tvalues")
+    .Attr("Tvalues: type")
+    .Attr("Tindices: {int32, int64}")
+    .Attr("PARAMS_RAGGED_RANK: int >= 1")
+    .Attr("OUTPUT_RAGGED_RANK: int >= 0")
+    .SetShapeFn(RaggedGatherShapeFn);
+
+//==============================================================================
+// Shape Functions
+//==============================================================================
+
+Status RaggedGatherShapeFn(InferenceContext* c) {
+  int num_splits;
+  int64 PARAMS_RAGGED_RANK;
+  TF_RETURN_IF_ERROR(
+      c->GetAttr<int64>("PARAMS_RAGGED_RANK", &PARAMS_RAGGED_RANK));
+  TF_RETURN_IF_ERROR(c->GetAttr<int>("OUTPUT_RAGGED_RANK", &num_splits));
+
+  // Check rank of `indices`.
+  ShapeHandle indices = c->input(PARAMS_RAGGED_RANK + 1);
+  TF_RETURN_IF_ERROR(
+      c->WithRank(indices, num_splits - PARAMS_RAGGED_RANK + 1, &indices));
+
+  // Check that all params_nested_splits have rank 1.
+  for (int64 i = 0; i < PARAMS_RAGGED_RANK; ++i) {
+    ShapeHandle splits = c->input(i);
+    TF_RETURN_IF_ERROR(c->WithRank(splits, 1, &splits));
+  }
+
+  // Check that `params_dense_values` has rank>=1.
+  ShapeHandle params_dense_values = c->input(PARAMS_RAGGED_RANK);
+  TF_RETURN_IF_ERROR(
+      c->WithRankAtLeast(params_dense_values, 1, &params_dense_values));
+
+  // Set the rank for the `splits` outputs.
+  for (int i = 0; i < num_splits; ++i) {
+    c->set_output(i, c->UnknownShapeOfRank(1));
+  }
+
+  // Calculate the `values` shape.
+  ShapeHandle value = c->UnknownShape();
+  ShapeHandle values = c->UnknownShape();
+  TF_RETURN_IF_ERROR(c->Subshape(params_dense_values, 1, &value));
+  TF_RETURN_IF_ERROR(c->Concatenate(c->UnknownShapeOfRank(1), value, &values));
+  c->set_output(num_splits, values);
+
+  return Status::OK();
+}
+
+}  // namespace tensorflow
-- 
GitLab


From 1e13c38980ec17d9f26c041f4b251ecb3a791a2c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 13:23:52 -0700
Subject: [PATCH 1312/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 216410913
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 98 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 98 +++++++++++++++++++
 2 files changed, 196 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index dcea70dffb..cfb1055d3c 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -21858,6 +21858,54 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ExperimentalNumaMapAndBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "drop_remainder"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "ExperimentalThreadPoolDataset"
   input_arg {
@@ -43915,6 +43963,56 @@ op {
     }
   }
 }
+op {
+  name: "RaggedGather"
+  input_arg {
+    name: "params_nested_splits"
+    type: DT_INT64
+    number_attr: "PARAMS_RAGGED_RANK"
+  }
+  input_arg {
+    name: "params_dense_values"
+    type_attr: "Tvalues"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "output_nested_splits"
+    type: DT_INT64
+    number_attr: "OUTPUT_RAGGED_RANK"
+  }
+  output_arg {
+    name: "output_dense_values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "PARAMS_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "OUTPUT_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+  }
+}
 op {
   name: "RandomCrop"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 93a297458f..05b97bffad 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -10365,6 +10365,54 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ExperimentalNumaMapAndBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "drop_remainder"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "ExperimentalThreadPoolDataset"
   input_arg {
@@ -22288,6 +22336,56 @@ op {
     }
   }
 }
+op {
+  name: "RaggedGather"
+  input_arg {
+    name: "params_nested_splits"
+    type: DT_INT64
+    number_attr: "PARAMS_RAGGED_RANK"
+  }
+  input_arg {
+    name: "params_dense_values"
+    type_attr: "Tvalues"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "output_nested_splits"
+    type: DT_INT64
+    number_attr: "OUTPUT_RAGGED_RANK"
+  }
+  output_arg {
+    name: "output_dense_values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "PARAMS_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "OUTPUT_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+  }
+}
 op {
   name: "RandomCrop"
   input_arg {
-- 
GitLab


From 9989788be25c846d087ac70b76cf78759a209a3e Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 13:31:58 -0700
Subject: [PATCH 1313/1357] Small cleanup in function_test.

PiperOrigin-RevId: 216412380
---
 tensorflow/python/framework/function_test.py | 27 ++++++--------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 87f567db0e..16d4903d79 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -1639,29 +1639,18 @@ class FunctionInlineControlTest(test.TestCase):
       self.assertEqual(MetadataHasCell(run_metadata), noinline)
 
 
-@function.Defun(*[dtypes.float32] * 3)
-def Linear(w, b, x):
-  return nn_ops.relu(math_ops.matmul(x, w) + b)
-
-
-@function.Defun(*[dtypes.float32] * 5)
-def Linear2(w1, b1, w2, b2, x):
-  return Linear(w2, b2, Linear(w1, b1, x))
-
-
-@function.Defun(*[dtypes.float32] * 3)
-def LinearWithCApi(w, b, x):
-  return nn_ops.relu(math_ops.matmul(x, w) + b)
-
+class ModuleFunctionTest(test.TestCase):
 
-@function.Defun(*[dtypes.float32] * 5)
-def Linear2WithCApi(w1, b1, w2, b2, x):
-  return LinearWithCApi(w2, b2, LinearWithCApi(w1, b1, x))
+  def testBasic(self):
 
+    @function.Defun(*[dtypes.float32] * 3)
+    def LinearWithCApi(w, b, x):
+      return nn_ops.relu(math_ops.matmul(x, w) + b)
 
-class ModuleFunctionTest(test.TestCase):
+    @function.Defun(*[dtypes.float32] * 5)
+    def Linear2WithCApi(w1, b1, w2, b2, x):
+      return LinearWithCApi(w2, b2, LinearWithCApi(w1, b1, x))
 
-  def testBasic(self):
     with ops.Graph().as_default():
       a, b, c, d, e = [
           constant_op.constant([[_]], dtype=dtypes.float32) for _ in range(5)
-- 
GitLab


From 5d9a7fdf4f02c2db487a03e7ad2d520f8847c4e3 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 13:32:24 -0700
Subject: [PATCH 1314/1357] [XLA:GPU] Add an implementation of scatter for GPU

This simple has a kernel that runs on every element of the updates tensor,
figure out the right indices to perform the update, and applies it with an
atomic operation.

Currently we emit a CAS for plain (i.e. non-add) updates, which is inefficient.
Also TuplePointsToAnalysis doesn't know that it should alias the operand and
output buffers of a scatter, which would avoid a copy.

PiperOrigin-RevId: 216412467
---
 tensorflow/compiler/xla/service/gpu/BUILD     |   1 -
 .../xla/service/gpu/ir_emitter_unnested.cc    | 141 ++++++++++++++++++
 .../xla/service/gpu/ir_emitter_unnested.h     |   1 +
 .../xla/service/gpu/nvptx_compiler.cc         |   3 -
 .../compiler/xla/service/layout_assignment.cc |   2 +-
 5 files changed, 143 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 350fd32537..0144d59097 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -705,7 +705,6 @@ cc_library(
         "//tensorflow/compiler/xla/service:llvm_compiler",
         "//tensorflow/compiler/xla/service:reduce_precision_insertion",
         "//tensorflow/compiler/xla/service:reshape_mover",
-        "//tensorflow/compiler/xla/service:scatter_expander",
         "//tensorflow/compiler/xla/service:transpose_folding",
         "//tensorflow/compiler/xla/service:tuple_simplifier",
         "//tensorflow/compiler/xla/service:while_loop_constant_sinking",
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index c792dd2ddb..bef7a55301 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1958,6 +1958,147 @@ Status IrEmitterUnnested::HandleRng(HloInstruction* rng) {
   return Status::OK();
 }
 
+Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
+  const HloInstruction* operand = scatter->operand(0);
+  const HloInstruction* scatter_indices = scatter->operand(1);
+  const HloInstruction* updates = scatter->operand(2);
+  const ScatterDimensionNumbers& dim_numbers =
+      scatter->scatter_dimension_numbers();
+  CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape()));
+
+  std::vector<std::unique_ptr<Thunk>> thunks;
+
+  // Copy the operand into the output if it's not the same buffer already.
+  auto operand_buffer = GetAllocationSlice(*operand);
+  auto destination_buffer = GetAllocationSlice(*scatter);
+  if (operand_buffer != destination_buffer) {
+    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
+        /*source_address=*/operand_buffer,
+        /*destination_buffer=*/destination_buffer,
+        /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter));
+  }
+
+  auto loop_body_emitter = [&](const IrArray::Index& index) -> Status {
+    std::vector<llvm::Value*> raw_window_multidim;
+    std::vector<llvm::Value*> input_scatter_multidim;
+    std::vector<int64> raw_window_bounds;
+
+    // Partition the index into window indices and scatter indices.
+    for (int64 i = 0, e = index.size(); i != e; ++i) {
+      // For window indices also remember the window size, this comes in handy
+      // later.
+      if (absl::c_binary_search(dim_numbers.update_window_dims(), i)) {
+        raw_window_multidim.push_back(index[i]);
+        raw_window_bounds.push_back(updates->shape().dimensions(i));
+      } else {
+        input_scatter_multidim.push_back(index[i]);
+      }
+    }
+    DCHECK_EQ(raw_window_multidim.size(),
+              dim_numbers.update_window_dims_size());
+
+    // Apply inserted_window_dims to the window dimensions.
+    int64 raw_window_multidim_idx = 0;
+    std::vector<llvm::Value*> input_window_multidim;
+    std::vector<int64> input_window_bounds;
+    for (int64 i = 0, e = ShapeUtil::Rank(operand->shape()); i != e; ++i) {
+      if (absl::c_binary_search(dim_numbers.inserted_window_dims(), i)) {
+        input_window_bounds.push_back(1);  // Trivial dimension.
+        input_window_multidim.push_back(index.GetConstantWithIndexType(0));
+      } else {
+        input_window_bounds.push_back(
+            raw_window_bounds[raw_window_multidim_idx]);
+        input_window_multidim.push_back(
+            raw_window_multidim[raw_window_multidim_idx]);
+        ++raw_window_multidim_idx;
+      }
+    }
+    DCHECK_EQ(input_window_multidim.size(), ShapeUtil::Rank(operand->shape()));
+
+    // Insert a 1 dimension at the end if index_vector_dim requests one.
+    Shape scatter_indices_shape = scatter_indices->shape();
+    if (dim_numbers.index_vector_dim() ==
+        ShapeUtil::Rank(scatter_indices_shape)) {
+      scatter_indices_shape.add_dimensions(1);
+      scatter_indices_shape.mutable_layout()->add_minor_to_major(
+          dim_numbers.index_vector_dim());
+    }
+    llvm_ir::IrArray scatter_indices_reshaped =
+        GetIrArray(*scatter_indices, *scatter)
+            .CastToShape(scatter_indices_shape, &b_);
+
+    // Now load the indices corresponding to the current window from
+    // scatter_indices.
+    llvm_ir::IrArray::Index raw_scatter_index_index(input_scatter_multidim,
+                                                    index.GetType());
+    raw_scatter_index_index.InsertAt(dim_numbers.index_vector_dim(), nullptr);
+    llvm::Value* is_in_bounds = b_.getTrue();
+    for (int64 i = 0, e = dim_numbers.scatter_dims_to_operand_dims_size();
+         i != e; ++i) {
+      // Our index is stored along index_vector_dim, insert that into the lookup
+      // index into scatter_indices.
+      raw_scatter_index_index[dim_numbers.index_vector_dim()] =
+          raw_scatter_index_index.GetConstantWithIndexType(i);
+
+      int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i);
+      llvm::Value* loaded_scatter_index =
+          scatter_indices_reshaped.EmitReadArrayElement(raw_scatter_index_index,
+                                                        &b_, "scatter_index");
+      // And add the index to our window index. This yields the output index.
+      llvm::Value* dim_offset =
+          Add(input_window_multidim[operand_dim],
+              IntCast(loaded_scatter_index, index.GetType(),
+                      /*isSigned=*/true));
+      input_window_multidim[operand_dim] = dim_offset;
+
+      // Also do the bounds check now.
+      int64 max_index = operand->shape().dimensions(operand_dim) -
+                        input_window_bounds[operand_dim] + 1;
+      // is_in_bounds = dim_offset >= 0 && dim_offset < dim_size-window_size+1
+      //   --> dim_offset u< dim_size-window_size+1
+      is_in_bounds =
+          And(is_in_bounds,
+              ICmpULT(dim_offset, index.GetConstantWithIndexType(max_index)));
+    }
+
+    llvm_ir::LlvmIfData if_window_in_bounds_data = llvm_ir::EmitIfThenElse(
+        is_in_bounds, "scatter.in_bounds", &b_, /*emit_else=*/false);
+    llvm_ir::SetToFirstInsertPoint(if_window_in_bounds_data.true_block, &b_);
+    // All done, now just read from the calculated input from the window, and do
+    // an atomic store to the calculated location in the output.
+    llvm_ir::IrArray::Index input_window_index(input_window_multidim,
+                                               index.GetType());
+    llvm::Value* input_address =
+        GetIrArray(*updates, *scatter).EmitArrayElementAddress(index, &b_);
+    llvm::Value* output_address =
+        GetIrArray(*scatter, *scatter)
+            .EmitArrayElementAddress(input_window_index, &b_);
+    return EmitAtomicOperationForNestedComputation(
+        *scatter->to_apply(), output_address, input_address);
+  };
+
+  // Launch a kernel that reads every element in the updates tensor. We could
+  // also do one kernel per window instead if bounds checks turn out to be a
+  // bottleneck.
+  thunks.push_back(BuildKernelThunk(
+      scatter,
+      /*implements_whole_instruction=*/operand_buffer == destination_buffer));
+
+  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
+      updates->shape(), ir_emitter_context_->device_description());
+  UpdateLaunchDimensions(launch_dimensions,
+                         static_cast<KernelThunk*>(thunks.back().get()),
+                         ir_emitter_context_->llvm_module());
+
+  thunk_sequence_->emplace_back(
+      absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  return ParallelLoopEmitter(loop_body_emitter, updates->shape(),
+                             launch_dimensions, &b_)
+      .EmitLoop(IrName(scatter),
+                GetIndexTypeForKernel(scatter, launch_dimensions.launch_bound(),
+                                      &b_));
+}
+
 Status IrEmitterUnnested::HandleSelect(HloInstruction* select) {
   thunk_sequence_->push_back(
       BuildKernelThunk(select, /*implements_whole_instruction=*/true));
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index bd5db72051..2e36e7235b 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -76,6 +76,7 @@ class IrEmitterUnnested : public IrEmitter {
   Status HandleInfeed(HloInstruction* xla_infeed) override;
   Status HandleOutfeed(HloInstruction* outfeed) override;
   Status HandleRng(HloInstruction* random) override;
+  Status HandleScatter(HloInstruction* scatter) override;
   Status HandleSelect(HloInstruction* select) override;
   Status HandleSort(HloInstruction* sort) override;
   Status HandleTupleSelect(HloInstruction* tuple_select) override;
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index ac6c2c5565..5409f65589 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -75,7 +75,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
-#include "tensorflow/compiler/xla/service/scatter_expander.h"
 #include "tensorflow/compiler/xla/service/transpose_folding.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h"
@@ -176,8 +175,6 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
       // elimination has to come after that pass.
       pipeline.AddPass<ZeroSizedHloElimination>();
 
-      pipeline.AddPass<ScatterExpander>();
-
       pass.AddPass<AlgebraicSimplifier>(
           /*is_layout_sensitive=*/false,
           [](const Shape&, const Shape&) { return false; });
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index ad65b147c1..2cf5fc94ac 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1908,6 +1908,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kRemainder:
     case HloOpcode::kReverse:
     case HloOpcode::kRoundNearestAfz:
+    case HloOpcode::kScatter:
     case HloOpcode::kSelect:
     case HloOpcode::kSelectAndScatter:
     case HloOpcode::kShiftLeft:
@@ -1946,7 +1947,6 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kReduce:
     case HloOpcode::kReshape:
     case HloOpcode::kRng:
-    case HloOpcode::kScatter:
     case HloOpcode::kSend:
     case HloOpcode::kSendDone:
     case HloOpcode::kAfterAll:
-- 
GitLab


From 7b2f26280df8dee266d66e01a7ffac7a7eb25247 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 13:51:27 -0700
Subject: [PATCH 1315/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216416117

---
 tensorflow/go/op/wrappers.go | 728 +++++++++++++++++------------------
 1 file changed, 364 insertions(+), 364 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index eb6df2af46..f35117084a 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4396,6 +4396,172 @@ func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
+// Forwards `data` to the output port determined by `pred`.
+//
+// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
+// the data goes to `output_false`.
+//
+// See also `RefSwitch` and `Merge`.
+//
+// Arguments:
+//	data: The tensor to be forwarded to the appropriate output.
+//	pred: A scalar that specifies which output port will receive data.
+//
+// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
+func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Switch",
+		Input: []tf.Input{
+			data, pred,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
+type AudioSpectrogramAttr func(optionalAttr)
+
+// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
+//
+// value: Whether to return the squared magnitude or just the
+// magnitude. Using squared magnitude can avoid extra calculations.
+// If not specified, defaults to false
+func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
+	return func(m optionalAttr) {
+		m["magnitude_squared"] = value
+	}
+}
+
+// Produces a visualization of audio data over time.
+//
+// Spectrograms are a standard way of representing audio information as a series of
+// slices of frequency information, one slice for each window of time. By joining
+// these together into a sequence, they form a distinctive fingerprint of the sound
+// over time.
+//
+// This op expects to receive audio data as an input, stored as floats in the range
+// -1 to 1, together with a window width in samples, and a stride specifying how
+// far to move the window between slices. From this it generates a three
+// dimensional output. The lowest dimension has an amplitude value for each
+// frequency during that time slice. The next dimension is time, with successive
+// frequency slices. The final dimension is for the channels in the input, so a
+// stereo audio input would have two here for example.
+//
+// This means the layout when converted and saved as an image is rotated 90 degrees
+// clockwise from a typical spectrogram. Time is descending down the Y axis, and
+// the frequency decreases from left to right.
+//
+// Each value in the result represents the square root of the sum of the real and
+// imaginary parts of an FFT on the current window of samples. In this way, the
+// lowest dimension represents the power of each frequency in the current window,
+// and adjacent windows are concatenated in the next dimension.
+//
+// To get a more intuitive and visual look at what this operation does, you can run
+// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
+// resulting spectrogram as a PNG image.
+//
+// Arguments:
+//	input: Float representation of audio data.
+//	window_size: How wide the input window is in samples. For the highest efficiency
+// this should be a power of two, but other values are accepted.
+//	stride: How widely apart the center of adjacent sample windows should be.
+//
+// Returns 3D representation of the audio frequencies as an image.
+func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AudioSpectrogram",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
+type CTCBeamSearchDecoderAttr func(optionalAttr)
+
+// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
+//
+// value: If true, merge repeated classes in output.
+// If not specified, defaults to true
+func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
+	}
+}
+
+// Performs beam search decoding on the logits given in input.
+//
+// A note about the attribute merge_repeated: For the beam search decoder,
+// this means that if consecutive entries in a beam are the same, only
+// the first of these is emitted.  That is, when the top path is "A B B B B",
+// "A B" is returned if merge_repeated = True but "A B B B B" is
+// returned if merge_repeated = False.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch)`.
+//	beam_width: A scalar >= 0 (beam search beam width).
+//	top_paths: A scalar >= 0, <= beam_width (controls output size).
+//
+// Returns A list (length: top_paths) of indices matrices.  Matrix j,
+// size `(total_decoded_outputs[j] x 2)`, has indices of a
+// `SparseTensor<int64, 2>`.  The rows store: [batch, time].A list (length: top_paths) of values vectors.  Vector j,
+// size `(length total_decoded_outputs[j])`, has the values of a
+// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector.  Vector j,
+// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
+// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`.  The
+// sequence log-probabilities.
+func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CTCBeamSearchDecoder",
+		Input: []tf.Input{
+			inputs, sequence_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	log_probability = op.Output(idx)
+	return decoded_indices, decoded_values, decoded_shape, log_probability
+}
+
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -5662,90 +5828,6 @@ func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_i
 	return op.Output(0)
 }
 
-// Computes natural logarithm of (1 + x) element-wise.
-//
-// I.e., \\(y = \log_e (1 + x)\\).
-func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Log1p",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes rectified linear 6 gradients for a Relu6 operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
-//	features: The features passed as input to the corresponding Relu6 operation, or
-// its output; using either one produces the same result.
-//
-// Returns The gradients:
-// `gradients * (features > 0) * (features < 6)`.
-func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Relu6Grad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeBicubicAttr is an optional argument to ResizeBicubic.
-type ResizeBicubicAttr func(optionalAttr)
-
-// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// Resize `images` to `size` using bicubic interpolation.
-//
-// Input images can be of different types but output images are always float.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeBicubic",
-		Input: []tf.Input{
-			images, size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes natural logarithm of x element-wise.
 //
 // I.e., \\(y = \log_e x\\).
@@ -5886,146 +5968,6 @@ func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
-type AudioSpectrogramAttr func(optionalAttr)
-
-// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
-//
-// value: Whether to return the squared magnitude or just the
-// magnitude. Using squared magnitude can avoid extra calculations.
-// If not specified, defaults to false
-func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
-	return func(m optionalAttr) {
-		m["magnitude_squared"] = value
-	}
-}
-
-// Produces a visualization of audio data over time.
-//
-// Spectrograms are a standard way of representing audio information as a series of
-// slices of frequency information, one slice for each window of time. By joining
-// these together into a sequence, they form a distinctive fingerprint of the sound
-// over time.
-//
-// This op expects to receive audio data as an input, stored as floats in the range
-// -1 to 1, together with a window width in samples, and a stride specifying how
-// far to move the window between slices. From this it generates a three
-// dimensional output. The lowest dimension has an amplitude value for each
-// frequency during that time slice. The next dimension is time, with successive
-// frequency slices. The final dimension is for the channels in the input, so a
-// stereo audio input would have two here for example.
-//
-// This means the layout when converted and saved as an image is rotated 90 degrees
-// clockwise from a typical spectrogram. Time is descending down the Y axis, and
-// the frequency decreases from left to right.
-//
-// Each value in the result represents the square root of the sum of the real and
-// imaginary parts of an FFT on the current window of samples. In this way, the
-// lowest dimension represents the power of each frequency in the current window,
-// and adjacent windows are concatenated in the next dimension.
-//
-// To get a more intuitive and visual look at what this operation does, you can run
-// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
-// resulting spectrogram as a PNG image.
-//
-// Arguments:
-//	input: Float representation of audio data.
-//	window_size: How wide the input window is in samples. For the highest efficiency
-// this should be a power of two, but other values are accepted.
-//	stride: How widely apart the center of adjacent sample windows should be.
-//
-// Returns 3D representation of the audio frequencies as an image.
-func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSpectrogram",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
-type CTCBeamSearchDecoderAttr func(optionalAttr)
-
-// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If true, merge repeated classes in output.
-// If not specified, defaults to true
-func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
-	return func(m optionalAttr) {
-		m["merge_repeated"] = value
-	}
-}
-
-// Performs beam search decoding on the logits given in input.
-//
-// A note about the attribute merge_repeated: For the beam search decoder,
-// this means that if consecutive entries in a beam are the same, only
-// the first of these is emitted.  That is, when the top path is "A B B B B",
-// "A B" is returned if merge_repeated = True but "A B B B B" is
-// returned if merge_repeated = False.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch)`.
-//	beam_width: A scalar >= 0 (beam search beam width).
-//	top_paths: A scalar >= 0, <= beam_width (controls output size).
-//
-// Returns A list (length: top_paths) of indices matrices.  Matrix j,
-// size `(total_decoded_outputs[j] x 2)`, has indices of a
-// `SparseTensor<int64, 2>`.  The rows store: [batch, time].A list (length: top_paths) of values vectors.  Vector j,
-// size `(length total_decoded_outputs[j])`, has the values of a
-// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector.  Vector j,
-// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
-// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`.  The
-// sequence log-probabilities.
-func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCBeamSearchDecoder",
-		Input: []tf.Input{
-			inputs, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	log_probability = op.Output(idx)
-	return decoded_indices, decoded_values, decoded_shape, log_probability
-}
-
 // MatrixInverseAttr is an optional argument to MatrixInverse.
 type MatrixInverseAttr func(optionalAttr)
 
@@ -9615,25 +9557,109 @@ func DecodeRawLittleEndian(value bool) DecodeRawAttr {
 
 // Reinterpret the bytes of a string as a vector of numbers.
 //
-// Arguments:
-//	bytes: All the elements must have the same length.
+// Arguments:
+//	bytes: All the elements must have the same length.
+//
+//
+// Returns A Tensor with one more dimension than the input `bytes`.  The
+// added dimension will have size equal to the length of the elements
+// of `bytes` divided by the number of bytes to represent `out_type`.
+func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeRaw",
+		Input: []tf.Input{
+			bytes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes natural logarithm of (1 + x) element-wise.
+//
+// I.e., \\(y = \log_e (1 + x)\\).
+func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Log1p",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes rectified linear 6 gradients for a Relu6 operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
+//	features: The features passed as input to the corresponding Relu6 operation, or
+// its output; using either one produces the same result.
+//
+// Returns The gradients:
+// `gradients * (features > 0) * (features < 6)`.
+func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu6Grad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResizeBicubicAttr is an optional argument to ResizeBicubic.
+type ResizeBicubicAttr func(optionalAttr)
+
+// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Resize `images` to `size` using bicubic interpolation.
+//
+// Input images can be of different types but output images are always float.
 //
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// Returns A Tensor with one more dimension than the input `bytes`.  The
-// added dimension will have size equal to the length of the elements
-// of `bytes` divided by the number of bytes to represent `out_type`.
-func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeRaw",
+		Type: "ResizeBicubic",
 		Input: []tf.Input{
-			bytes,
+			images, size,
 		},
 		Attrs: attrs,
 	}
@@ -9641,6 +9667,52 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...
 	return op.Output(0)
 }
 
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+//
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppressionV2",
+		Input: []tf.Input{
+			boxes, scores, max_output_size, iou_threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // RandomShuffleAttr is an optional argument to RandomShuffle.
 type RandomShuffleAttr func(optionalAttr)
 
@@ -19332,65 +19404,6 @@ func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_
 	return op.Output(0)
 }
 
-// Computes the sum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \sum_j data_j\\) where sum is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentSum",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits the lines of one or more text files.
-//
-// Arguments:
-//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
-// read.
-//	compression_type: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//	buffer_size: A scalar containing the number of bytes to buffer.
-func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TextLineDataset",
-		Input: []tf.Input{
-			filenames, compression_type, buffer_size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns the set of files matching one or more glob patterns.
 //
 // Note that this routine only supports wildcard characters in the
@@ -21888,6 +21901,65 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Creates a dataset that emits the lines of one or more text files.
+//
+// Arguments:
+//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
+// read.
+//	compression_type: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//	buffer_size: A scalar containing the number of bytes to buffer.
+func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TextLineDataset",
+		Input: []tf.Input{
+			filenames, compression_type, buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \sum_j data_j\\) where sum is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentSum",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the mean along segments of a tensor.
 //
 // Read
@@ -27977,52 +28049,6 @@ func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr)
 	return op.Output(0)
 }
 
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-//
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
-//
-// Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV2",
-		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Greedily selects a subset of bounding boxes in descending order of score,
 //
 // pruning away boxes that have high intersection-over-union (IOU) overlap
@@ -33131,29 +33157,3 @@ func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output,
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
-
-// Forwards `data` to the output port determined by `pred`.
-//
-// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
-// the data goes to `output_false`.
-//
-// See also `RefSwitch` and `Merge`.
-//
-// Arguments:
-//	data: The tensor to be forwarded to the appropriate output.
-//	pred: A scalar that specifies which output port will receive data.
-//
-// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
-func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Switch",
-		Input: []tf.Input{
-			data, pred,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-- 
GitLab


From 1f556d3a4172c30cf461e7e66334b70ffad2d559 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 14:03:23 -0700
Subject: [PATCH 1316/1357] Do not create a graph as a global variable in
 tests.

PiperOrigin-RevId: 216418324
---
 .../copy_graph/python/util/copy_test.py       | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/copy_graph/python/util/copy_test.py b/tensorflow/contrib/copy_graph/python/util/copy_test.py
index ba97c78456..4d8651a79f 100644
--- a/tensorflow/contrib/copy_graph/python/util/copy_test.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py
@@ -26,15 +26,16 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
-graph1 = ops.Graph()
-graph2 = ops.Graph()
-
 
 class CopyVariablesTest(test.TestCase):
 
+  def setUp(self):
+    self.graph1 = ops.Graph()
+    self.graph2 = ops.Graph()
+
   def testVariableCopy(self):
 
-    with graph1.as_default():
+    with self.graph1.as_default():
       #Define a Variable in graph1
       some_var = variables.VariableV1(2)
       #Initialize session
@@ -43,13 +44,15 @@ class CopyVariablesTest(test.TestCase):
       variables.global_variables_initializer().run(session=sess1)
 
     #Make a copy of some_var in the defsult scope in graph2
-    copy1 = copy_elements.copy_variable_to_graph(some_var, graph2)
+    copy1 = copy_elements.copy_variable_to_graph(some_var, self.graph2)
 
     #Make another copy with different scope
-    copy2 = copy_elements.copy_variable_to_graph(some_var, graph2, "test_scope")
+    copy2 = copy_elements.copy_variable_to_graph(some_var,
+                                                 self.graph2,
+                                                 "test_scope")
 
     #Initialize both the copies
-    with graph2.as_default():
+    with self.graph2.as_default():
       #Initialize Session
       sess2 = session_lib.Session()
       #Initialize the Variables
@@ -67,9 +70,13 @@ class CopyVariablesTest(test.TestCase):
 
 class CopyOpsTest(test.TestCase):
 
+  def setUp(self):
+    self.graph1 = ops.Graph()
+    self.graph2 = ops.Graph()
+
   def testOpsCopy(self):
 
-    with graph1.as_default():
+    with self.graph1.as_default():
       #Initialize a basic expression y = ax + b
       x = array_ops.placeholder("float")
       a = variables.VariableV1(3.0)
@@ -82,21 +89,21 @@ class CopyOpsTest(test.TestCase):
       variables.global_variables_initializer().run(session=sess1)
 
     #First, initialize a as a Variable in graph2
-    a1 = copy_elements.copy_variable_to_graph(a, graph2)
+    a1 = copy_elements.copy_variable_to_graph(a, self.graph2)
 
     #Initialize a1 in graph2
-    with graph2.as_default():
+    with self.graph2.as_default():
       #Initialize session
       sess2 = session_lib.Session()
       #Initialize the Variable
       variables.global_variables_initializer().run(session=sess2)
 
     #Initialize a copy of y in graph2
-    y1 = copy_elements.copy_op_to_graph(y, graph2, [a1])
+    y1 = copy_elements.copy_op_to_graph(y, self.graph2, [a1])
 
     #Now that y has been copied, x must be copied too.
     #Get that instance
-    x1 = copy_elements.get_copied_op(x, graph2)
+    x1 = copy_elements.get_copied_op(x, self.graph2)
 
     #Compare values of y & y1 for a sample input
     #and check if they match
-- 
GitLab


From 5785c0202f4f84c464ef22d0ff180730813f59f3 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 14:04:23 -0700
Subject: [PATCH 1317/1357] Improve the control flow conversion for loops by
 using dataflow analysis to construct the state. This is part of a larger
 refactoring which removes the reliance on the deprecated Scope.created field.

PiperOrigin-RevId: 216418556
---
 .../autograph/converters/control_flow.py      | 162 ++++++++++--------
 .../autograph/converters/control_flow_test.py |   4 +-
 .../python/autograph/pyct/qual_names.py       |   3 +
 3 files changed, 93 insertions(+), 76 deletions(-)

diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index 416a60d2ee..70879f6c97 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py
@@ -90,23 +90,11 @@ class ControlFlowTransformer(converter.Base):
       return templates.replace(
           template, test=test, body_name=body_name, orelse_name=orelse_name)
 
-  def _fmt_symbol_list(self, symbol_set):
+  def _fmt_symbols(self, symbol_set):
     if not symbol_set:
       return 'no variables'
     return ', '.join(map(str, symbol_set))
 
-  def _validate_no_live_vars_created(self, node):
-    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
-    live_vars_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
-    live_vars_created_in_body = live_vars_out & body_scope.created
-    if live_vars_created_in_body:
-      raise ValueError(
-          'The following variables are created inside the loop and used later:'
-          '\n%s\n'
-          'Variables must be declared outside loops because loops may not'
-          ' necessarily execute.' % self._fmt_symbol_list(
-              live_vars_created_in_body))
-
   def visit_If(self, node):
     node = self.generic_visit(node)
 
@@ -138,8 +126,8 @@ class ControlFlowTransformer(converter.Base):
           ' creates %s, while the false branch creates %s. Make sure all'
           ' these variables are initialized either in both'
           ' branches or before the if statement.' %
-          (self._fmt_symbol_list(created_in_body),
-           self._fmt_symbol_list(created_in_orelse)))
+          (self._fmt_symbols(created_in_body),
+           self._fmt_symbols(created_in_orelse)))
 
     # Alias the closure variables inside the conditional functions, to allow
     # the functions access to the respective variables.
@@ -206,51 +194,97 @@ class ControlFlowTransformer(converter.Base):
 
     return body_def + orelse_def + cond_expr
 
-  def visit_While(self, node):
-    self.generic_visit(node)
-
-    self._validate_no_live_vars_created(node)
-
+  def _get_loop_state(self, node):
     body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
-    body_closure = body_scope.modified - body_scope.created
-    all_referenced = body_scope.referenced
-
-    cond_scope = anno.getanno(node, annos.NodeAnno.COND_SCOPE)
-    cond_closure = set()
-    for s in cond_scope.used:
-      for root in s.support_set:
-        if root not in body_scope.created:
-          cond_closure.add(root)
-
-    state = list(body_closure)
-    if not state:
+    defined_in = anno.getanno(node, anno.Static.DEFINED_VARS_IN)
+    live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN)
+    live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
+    reserved_symbols = body_scope.referenced
+
+    # Note that it doesn't matter whether the variables are live after the loop.
+    # If the loop modifies them nonlocally (e.g. the result of an iteration
+    # depends on the previous iteration), then they need to be included in
+    # the loop state, regardless of whether they are later used or not.
+    loop_state = body_scope.modified & live_in
+
+    undefined_lives = loop_state - defined_in
+    # Only simple variables must be defined. The composite ones will be
+    # implicitly checked at runtime.
+    undefined_simple_lives = {v for v in undefined_lives if v.is_simple()}
+    if undefined_simple_lives:
+      raise NameError(
+          'cannot convert loop: it includes symbols that are undefined'
+          ' when entering the loop: {}'.format(
+              self._fmt_symbols(undefined_simple_lives)))
+
+    live_defs_in_loop = (body_scope.modified - live_in) & live_out
+    if live_defs_in_loop:
+      # TODO(mdan): Include reference to explanation why.
+      raise NotImplementedError(
+          'cannot convert loop: it includes symbols that are defined'
+          ' inside the loop, but used later: {}. To fix, initialize'
+          ' these symbols before the loop'.format(
+              self._fmt_symbols(live_defs_in_loop)))
+
+    if not loop_state:
       # TODO(mdan): Implement this properly.
-      # To complete this statement, we need to check whether any variable
-      # created inside the body scope is used before being modified outside the
-      # scope. This should be done during activity analysis, and in general
-      # should cover the case where variables may not be initialized.
-      raise ValueError('cannot convert while loop: no outputs')
+      # We need to check whether any variable created inside the body scope
+      # is used before being modified outside the scope. This should be done
+      # during activity analysis, and in general should cover the case where
+      # variables may not be initialized.
+      raise ValueError('cannot convert loop: no outputs')
+
+    return loop_state, reserved_symbols
 
+  def _state_constructs(self, loop_state, reserved_symbols):
+    loop_state = list(loop_state)
     state_ssf = [
-        self.ctx.namer.new_symbol(s.ssf(), all_referenced) for s in state
+        self.ctx.namer.new_symbol(s.ssf(), reserved_symbols) for s in loop_state
     ]
     ssf_map = {
         name: ssf
-        for name, ssf in zip(state, state_ssf)
+        for name, ssf in zip(loop_state, state_ssf)
         if str(name) != ssf
     }
 
-    if len(state) == 1:
-      state = state[0]
+    if len(loop_state) == 1:
+      loop_state = loop_state[0]
       state_ssf = state_ssf[0]
-      state_ast_tuple = state
+      state_ast_tuple = loop_state
     else:
-      state_ast_tuple = gast.Tuple([n.ast() for n in state], None)
+      state_ast_tuple = gast.Tuple([n.ast() for n in loop_state], None)
+
+    return loop_state, state_ssf, state_ast_tuple, ssf_map
+
+  def visit_While(self, node):
+    self.generic_visit(node)
 
+    loop_state, reserved_symbols = self._get_loop_state(node)
+
+    # Note: one might expect we can dispatch based on the loop condition.
+    # But because that is dependent on the state, it cannot be evaluated ahead
+    # of time - doing that would risk duplicating any effects the condition has.
+    # Furthermore, we cannot evaluate slices and attributes, because they might
+    # trigger __getitem__ or __getattribute__.
+    #
+    # A case where this fails includes ops with side effects on a stateful
+    # resource captured in an object:
+    #
+    #   while self.v.read() > 0:
+    #     self.v.assign(1)
+    #
+    # TODO(mdan): Handle the case above.
+    cond_scope = anno.getanno(node, annos.NodeAnno.COND_SCOPE)
+    cond_closure = set()
+    for s in cond_scope.used:
+      cond_closure.update(s.support_set)
+    cond_closure -= loop_state
+
+    loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs(
+        loop_state, reserved_symbols)
     node_body = ast_util.rename_symbols(node.body, ssf_map)
     test = ast_util.rename_symbols(node.test, ssf_map)
 
-    # TODO(b/113118541) investigate the need-for and correctness-of extra_deps
     template = """
       def test_name(state_ssf):
         return test
@@ -262,12 +296,12 @@ class ControlFlowTransformer(converter.Base):
     """
     node = templates.replace(
         template,
-        state=state,
+        state=loop_state,
         state_ssf=state_ssf,
         state_ast_tuple=state_ast_tuple,
-        test_name=self.ctx.namer.new_symbol('loop_test', body_scope.referenced),
+        test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols),
         test=test,
-        body_name=self.ctx.namer.new_symbol('loop_body', body_scope.referenced),
+        body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
         body=node_body,
         extra_deps=tuple(s.ast() for s in cond_closure),
     )
@@ -277,30 +311,9 @@ class ControlFlowTransformer(converter.Base):
   def visit_For(self, node):
     self.generic_visit(node)
 
-    self._validate_no_live_vars_created(node)
-
-    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
-    body_closure = body_scope.modified - body_scope.created
-    all_referenced = body_scope.referenced
-
-    state = list(body_closure)
-
-    state_ssf = [
-        self.ctx.namer.new_symbol(s.ssf(), all_referenced) for s in state
-    ]
-    ssf_map = {
-        name: ssf
-        for name, ssf in zip(state, state_ssf)
-        if str(name) != ssf
-    }
-
-    if len(state) == 1:
-      state = state[0]
-      state_ssf = state_ssf[0]
-      state_ast_tuple = state
-    else:
-      state_ast_tuple = gast.Tuple([n.ast() for n in state], None)
-
+    loop_state, reserved_symbols = self._get_loop_state(node)
+    loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs(
+        loop_state, reserved_symbols)
     node_body = ast_util.rename_symbols(node.body, ssf_map)
     if anno.hasanno(node, 'extra_test'):
       extra_test = anno.getanno(node, 'extra_test')
@@ -321,14 +334,15 @@ class ControlFlowTransformer(converter.Base):
     """
     node = templates.replace(
         template,
-        state=state,
+        state=loop_state,
         state_ssf=state_ssf,
         state_ast_tuple=state_ast_tuple,
         iter_=node.iter,
         iterate=node.target,
-        extra_test_name=self.ctx.namer.new_symbol('extra_test', all_referenced),
+        extra_test_name=self.ctx.namer.new_symbol('extra_test',
+                                                  reserved_symbols),
         extra_test_expr=extra_test,
-        body_name=self.ctx.namer.new_symbol('loop_body', all_referenced),
+        body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
         body=node_body)
 
     return node
diff --git a/tensorflow/python/autograph/converters/control_flow_test.py b/tensorflow/python/autograph/converters/control_flow_test.py
index cfa0ea920c..03fdfc804e 100644
--- a/tensorflow/python/autograph/converters/control_flow_test.py
+++ b/tensorflow/python/autograph/converters/control_flow_test.py
@@ -83,7 +83,7 @@ class ControlFlowTest(converter_testing.TestCase):
       return s
 
     node, ctx = self.prepare(bad_while_loop, {})
-    with self.assertRaises(transformer.AutographParseError):
+    with self.assertRaises(NameError):
       control_flow.transform(node, ctx)
 
   def test_if_basic(self):
@@ -232,7 +232,7 @@ class ControlFlowTest(converter_testing.TestCase):
       return s
 
     node, ctx = self.prepare(bad_for_loop, {})
-    with self.assertRaises(transformer.AutographParseError):
+    with self.assertRaises(NameError):
       control_flow.transform(node, ctx)
 
   def test_for_tuple_unpacking(self):
diff --git a/tensorflow/python/autograph/pyct/qual_names.py b/tensorflow/python/autograph/pyct/qual_names.py
index 334cbd7d38..6ad6199acf 100644
--- a/tensorflow/python/autograph/pyct/qual_names.py
+++ b/tensorflow/python/autograph/pyct/qual_names.py
@@ -99,6 +99,9 @@ class QN(object):
   def is_symbol(self):
     return isinstance(self.qn[0], str)
 
+  def is_simple(self):
+    return len(self.qn) <= 1
+
   def is_composite(self):
     return len(self.qn) > 1
 
-- 
GitLab


From 5c6ea51834ee410586233d67d43bdb4f1729261f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 14:07:03 -0700
Subject: [PATCH 1318/1357] Internal Change

PiperOrigin-RevId: 216419037
---
 tensorflow/contrib/lite/build_def.bzl | 2 ++
 tensorflow/contrib/lite/testing/BUILD | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index b3607a761c..05efee18e7 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -345,6 +345,7 @@ def generated_test_models_all():
             tags = []
             if test in failing_tests:
                 tags.append("notap")
+                tags.append("manual")
             if conversion_mode:
                 test += "_%s" % conversion_mode
             options.append((conversion_mode, test, tags))
@@ -450,6 +451,7 @@ def gen_full_model_test(conversion_modes, models, data, test_suite_tag):
                 "no_oss",
                 "no_windows",
                 "notap",
+                "manual",
             ] + [test_suite_tag],
             deps = [
                 "//tensorflow/contrib/lite/testing:model_coverage_lib",
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 45baad782a..2edd420fea 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -380,6 +380,7 @@ py_test(
     srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "manual",
         "no_oss",
         "no_pip",
         "no_windows",
-- 
GitLab


From 4fa59ef694c19dc63d574b2d6a349cd753d9cdbd Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 9 Oct 2018 14:11:06 -0700
Subject: [PATCH 1319/1357] [tf.data] Lift parameterized test parameters into
 lambdas if they create TF ops.

The existing code triggers parts of the TensorFlow runtime that may not have been fully
initialized at the time the parameters are evaluated. Lifting into a lambda and invoking
the lambda inside the test method will achieve the proper order.

PiperOrigin-RevId: 216419757
---
 tensorflow/python/data/util/structure_test.py | 61 ++++++++++---------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/tensorflow/python/data/util/structure_test.py b/tensorflow/python/data/util/structure_test.py
index 2982763181..630a0c912b 100644
--- a/tensorflow/python/data/util/structure_test.py
+++ b/tensorflow/python/data/util/structure_test.py
@@ -34,52 +34,56 @@ from tensorflow.python.platform import test
 
 
 class StructureTest(test.TestCase, parameterized.TestCase):
-  # pylint disable=protected-access
 
+  # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they
+  # will be executed before the (eager- or graph-mode) test environment has been
+  # set up.
+  # pylint: disable=g-long-lambda,protected-access
   @parameterized.parameters(
-      (constant_op.constant(37.0), structure.TensorStructure, [dtypes.float32],
-       [[]]), (sparse_tensor.SparseTensor(
-           indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
-               structure.SparseTensorStructure, [dtypes.variant], [[3]]),
-      ((constant_op.constant(37.0), constant_op.constant([1, 2, 3])),
-       structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]), ({
-           "a": constant_op.constant(37.0),
-           "b": constant_op.constant([1, 2, 3])
-       }, structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
-      ({
-          "a":
-              constant_op.constant(37.0),
+      (lambda: constant_op.constant(37.0), structure.TensorStructure,
+       [dtypes.float32], [[]]),
+      (lambda: sparse_tensor.SparseTensor(
+          indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
+       structure.SparseTensorStructure, [dtypes.variant], [[3]]),
+      (lambda: (constant_op.constant(37.0), constant_op.constant([1, 2, 3])),
+       structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
+      (lambda: {
+          "a": constant_op.constant(37.0),
+          "b": constant_op.constant([1, 2, 3])
+      }, structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
+      (lambda: {
+          "a": constant_op.constant(37.0),
           "b": (sparse_tensor.SparseTensor(
               indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
                 sparse_tensor.SparseTensor(
                     indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
       }, structure.NestedStructure,
        [dtypes.float32, dtypes.variant, dtypes.variant], [[], [3], [3]]))
-  def testFlatStructure(self, value, expected_structure, expected_types,
+  def testFlatStructure(self, value_fn, expected_structure, expected_types,
                         expected_shapes):
+    value = value_fn()
     s = structure.Structure.from_value(value)
     self.assertIsInstance(s, expected_structure)
     self.assertEqual(expected_types, s._flat_types)
     self.assertEqual(expected_shapes, s._flat_shapes)
 
   @parameterized.parameters(
-      (constant_op.constant(37.0), [
+      (lambda: constant_op.constant(37.0), lambda: [
           constant_op.constant(38.0),
           array_ops.placeholder(dtypes.float32),
           variables.Variable(100.0), 42.0,
           np.array(42.0, dtype=np.float32)
-      ], [constant_op.constant([1.0, 2.0]),
-          constant_op.constant(37)]),
-      (sparse_tensor.SparseTensor(
+      ], lambda: [constant_op.constant([1.0, 2.0]), constant_op.constant(37)]),
+      (lambda: sparse_tensor.SparseTensor(
           indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
-       [
+       lambda: [
            sparse_tensor.SparseTensor(
                indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]),
            sparse_tensor.SparseTensorValue(
                indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]),
            array_ops.sparse_placeholder(dtype=dtypes.int32),
            array_ops.sparse_placeholder(dtype=dtypes.int32, shape=[None, None])
-       ], [
+       ], lambda: [
            constant_op.constant(37, shape=[4, 5]),
            sparse_tensor.SparseTensor(
                indices=[[3, 4]], values=[-1], dense_shape=[5, 6]),
@@ -88,13 +92,13 @@ class StructureTest(test.TestCase, parameterized.TestCase):
            sparse_tensor.SparseTensor(
                indices=[[3, 4]], values=[-1.0], dense_shape=[4, 5])
        ]),
-      ({
+      (lambda: {
           "a": constant_op.constant(37.0),
           "b": constant_op.constant([1, 2, 3])
-      }, [{
+      }, lambda: [{
           "a": constant_op.constant(15.0),
           "b": constant_op.constant([4, 5, 6])
-      }], [{
+      }], lambda: [{
           "a": constant_op.constant(15.0),
           "b": constant_op.constant([4, 5, 6, 7])
       }, {
@@ -108,8 +112,11 @@ class StructureTest(test.TestCase, parameterized.TestCase):
                   indices=[[0], [1], [2]], values=[4, 5, 6], dense_shape=[3])
       }, (constant_op.constant(15.0), constant_op.constant([4, 5, 6]))]),
   )
-  def testIsCompatibleWithStructure(self, original_value, compatible_values,
-                                    incompatible_values):
+  def testIsCompatibleWithStructure(
+      self, original_value_fn, compatible_values_fn, incompatible_values_fn):
+    original_value = original_value_fn()
+    compatible_values = compatible_values_fn()
+    incompatible_values = incompatible_values_fn()
     s = structure.Structure.from_value(original_value)
     for compatible_value in compatible_values:
       self.assertTrue(
@@ -120,10 +127,6 @@ class StructureTest(test.TestCase, parameterized.TestCase):
           s.is_compatible_with(
               structure.Structure.from_value(incompatible_value)))
 
-  # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they
-  # will be executed before the (eager- or graph-mode) test environment has been
-  # set up.
-  # pylint: disable=g-long-lambda
   @parameterized.parameters(
       (lambda: constant_op.constant(37.0),),
       (lambda: sparse_tensor.SparseTensor(
-- 
GitLab


From b145f46b735fe1e383be6629cafaa5269b07b7fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 14:12:25 -0700
Subject: [PATCH 1320/1357] Add support for time-major input in the
 bidirectional RNN Op.

PiperOrigin-RevId: 216419983
---
 .../kernels/bidirectional_sequence_rnn.cc     | 251 ++++++++++++------
 .../bidirectional_sequence_rnn_test.cc        |  94 +++++--
 2 files changed, 247 insertions(+), 98 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index c22a457a71..f544dd5ffa 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -114,8 +114,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
 
   TF_LITE_ENSURE_EQ(context, input->dims->size, 3);
-  const int batch_size = input->dims->data[0];
-  const int max_time = input->dims->data[1];
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
   const int fw_num_units = fw_input_weights->dims->data[0];
   const int bw_num_units = bw_input_weights->dims->data[0];
   TF_LITE_ASSERT_EQ(input->dims->data[2], fw_input_weights->dims->data[1]);
@@ -237,8 +240,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Resize outputs.
   TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
   TfLiteIntArray* fw_output_size_array = TfLiteIntArrayCreate(3);
-  fw_output_size_array->data[0] = batch_size;
-  fw_output_size_array->data[1] = max_time;
+  fw_output_size_array->data[0] = (time_major) ? max_time : batch_size;
+  fw_output_size_array->data[1] = (time_major) ? batch_size : max_time;
   fw_output_size_array->data[2] =
       params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   TF_LITE_ENSURE_OK(
@@ -266,8 +269,11 @@ TfLiteStatus EvalFloat(
     const TfLiteBidirectionalSequenceRNNParams* params,
     TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
     TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) {
-  const int batch_size = input->dims->data[0];
-  const int max_time = input->dims->data[1];
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
   const int input_size = input->dims->data[2];
   const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
 
@@ -292,48 +298,91 @@ TfLiteStatus EvalFloat(
       params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   const int bw_output_step =
       params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
-  for (int b = 0; b < batch_size; b++) {
+  if (time_major) {
+    // TODO(mirkov): add merge_outputs support for time_major inputs.
+    TF_LITE_ASSERT_EQ(params->merge_outputs, false);
+
     // Forward cell.
-    float* fw_hidden_state_ptr_batch =
-        fw_hidden_state->data.f + b * fw_num_units;
-    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
+    float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f;
     for (int s = 0; s < max_time; s++) {
       const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
+          input->data.f + s * input_size * batch_size;
       const float* aux_input_ptr_batch =
           (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
+              ? aux_input->data.f + s * input_size * batch_size
               : nullptr;
-      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
+      float* output_ptr_batch =
+          fw_output->data.f + s * fw_num_units * batch_size;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
           fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr,
-          input_size, aux_input_size, fw_num_units, /*batch_size=*/1,
+          input_size, aux_input_size, fw_num_units, batch_size,
           params->activation, fw_hidden_state_ptr_batch, output_ptr_batch);
     }
     // Backward cell.
-    float* bw_hidden_state_ptr_batch =
-        bw_hidden_state->data.f + b * bw_num_units;
-    float* bw_output_offset =
-        params->merge_outputs
-            ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units
-            : bw_output->data.f + b * bw_output_step * max_time;
+    float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f;
     for (int s = max_time - 1; s >= 0; s--) {
       const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
+          input->data.f + s * input_size * batch_size;
       const float* aux_input_ptr_batch =
           (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
+              ? aux_input->data.f + s * input_size * batch_size
               : nullptr;
-      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
+      float* output_ptr_batch =
+          bw_output->data.f + s * bw_num_units * batch_size;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
           bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr,
-          input_size, aux_input_size, bw_num_units, /*batch_size=*/1,
+          input_size, aux_input_size, bw_num_units, batch_size,
           params->activation, bw_hidden_state_ptr_batch, output_ptr_batch);
     }
+  } else {
+    for (int b = 0; b < batch_size; b++) {
+      // Forward cell.
+      float* fw_hidden_state_ptr_batch =
+          fw_hidden_state->data.f + b * fw_num_units;
+      float* fw_output_offset =
+          fw_output->data.f + b * fw_output_step * max_time;
+      for (int s = 0; s < max_time; s++) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = fw_output_offset + s * fw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
+            fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr,
+            input_size, aux_input_size, fw_num_units, /*batch_size=*/1,
+            params->activation, fw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+      // Backward cell.
+      float* bw_hidden_state_ptr_batch =
+          bw_hidden_state->data.f + b * bw_num_units;
+      float* bw_output_offset =
+          params->merge_outputs
+              ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units
+              : bw_output->data.f + b * bw_output_step * max_time;
+      for (int s = max_time - 1; s >= 0; s--) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = bw_output_offset + s * bw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
+            bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr,
+            input_size, aux_input_size, bw_num_units, /*batch_size=*/1,
+            params->activation, bw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+    }
   }
   return kTfLiteOk;
 }
@@ -351,8 +400,11 @@ TfLiteStatus EvalHybrid(
     TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
     TfLiteTensor* bw_hidden_state_quantized, TfLiteTensor* bw_hidden_state,
     TfLiteTensor* bw_output) {
-  const int batch_size = input->dims->data[0];
-  const int max_time = input->dims->data[1];
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
   const int input_size = input->dims->data[2];
   const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
 
@@ -403,55 +455,106 @@ TfLiteStatus EvalHybrid(
       params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   const int bw_output_step =
       params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
-  for (int b = 0; b < batch_size; b++) {
-    // Forward cell.
-    float* fw_hidden_state_ptr_batch =
-        fw_hidden_state->data.f + b * fw_num_units;
-    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
-    for (int s = 0; s < max_time; s++) {
-      const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
-      const float* aux_input_ptr_batch =
-          (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
-              : nullptr;
-      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
-
-      kernel_utils::RnnBatchStep(
-          input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
-          aux_input_ptr_batch, aux_fw_input_weights_ptr,
-          aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
-          fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
-          fw_num_units, /*batch_size=*/1, params->activation,
-          quantized_input_ptr, aux_quantized_input_ptr,
-          fw_quantized_hidden_state_ptr, scaling_factors_ptr,
-          fw_hidden_state_ptr_batch, output_ptr_batch);
+  if (time_major) {
+    for (int t = 0; t < max_time; t++) {
+      // TODO(mirkov): add merge_outputs support for time_major inputs.
+      TF_LITE_ASSERT_EQ(params->merge_outputs, false);
+
+      // Forward cell.
+      float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f;
+      for (int s = 0; s < max_time; s++) {
+        const float* input_ptr_batch =
+            input->data.f + s * input_size * batch_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + s * input_size * batch_size
+                : nullptr;
+        float* output_ptr_batch =
+            fw_output->data.f + s * fw_num_units * batch_size;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
+            aux_input_ptr_batch, aux_fw_input_weights_ptr,
+            aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
+            fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
+            fw_num_units, batch_size, params->activation, quantized_input_ptr,
+            aux_quantized_input_ptr, fw_quantized_hidden_state_ptr,
+            scaling_factors_ptr, fw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+      // Backward cell.
+      float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f;
+      for (int s = max_time - 1; s >= 0; s--) {
+        const float* input_ptr_batch =
+            input->data.f + s * input_size * batch_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + s * input_size * batch_size
+                : nullptr;
+        float* output_ptr_batch =
+            bw_output->data.f + s * bw_num_units * batch_size;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
+            aux_input_ptr_batch, aux_bw_input_weights_ptr,
+            aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
+            bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
+            bw_num_units, batch_size, params->activation, quantized_input_ptr,
+            aux_quantized_input_ptr, bw_quantized_hidden_state_ptr,
+            scaling_factors_ptr, bw_hidden_state_ptr_batch, output_ptr_batch);
+      }
     }
-    // Backward cell.
-    float* bw_hidden_state_ptr_batch =
-        bw_hidden_state->data.f + b * bw_num_units;
-    float* bw_output_offset =
-        params->merge_outputs
-            ? fw_output->data.f + b * bw_output_step * max_time
-            : bw_output->data.f + b * bw_output_step * max_time;
-    for (int s = max_time - 1; s >= 0; s--) {
-      const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
-      const float* aux_input_ptr_batch =
-          (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
-              : nullptr;
-      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
-
-      kernel_utils::RnnBatchStep(
-          input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
-          aux_input_ptr_batch, aux_bw_input_weights_ptr,
-          aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
-          bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
-          bw_num_units, /*batch_size=*/1, params->activation,
-          quantized_input_ptr, aux_quantized_input_ptr,
-          bw_quantized_hidden_state_ptr, scaling_factors_ptr,
-          bw_hidden_state_ptr_batch, output_ptr_batch);
+  } else {
+    for (int b = 0; b < batch_size; b++) {
+      // Forward cell.
+      float* fw_hidden_state_ptr_batch =
+          fw_hidden_state->data.f + b * fw_num_units;
+      float* fw_output_offset =
+          fw_output->data.f + b * fw_output_step * max_time;
+      for (int s = 0; s < max_time; s++) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = fw_output_offset + s * fw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
+            aux_input_ptr_batch, aux_fw_input_weights_ptr,
+            aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
+            fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
+            fw_num_units, /*batch_size=*/1, params->activation,
+            quantized_input_ptr, aux_quantized_input_ptr,
+            fw_quantized_hidden_state_ptr, scaling_factors_ptr,
+            fw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+      // Backward cell.
+      float* bw_hidden_state_ptr_batch =
+          bw_hidden_state->data.f + b * bw_num_units;
+      float* bw_output_offset =
+          params->merge_outputs
+              ? fw_output->data.f + b * bw_output_step * max_time
+              : bw_output->data.f + b * bw_output_step * max_time;
+      for (int s = max_time - 1; s >= 0; s--) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = bw_output_offset + s * bw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
+            aux_input_ptr_batch, aux_bw_input_weights_ptr,
+            aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
+            bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
+            bw_num_units, /*batch_size=*/1, params->activation,
+            quantized_input_ptr, aux_quantized_input_ptr,
+            bw_quantized_hidden_state_ptr, scaling_factors_ptr,
+            bw_hidden_state_ptr_batch, output_ptr_batch);
+      }
     }
   }
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
index f555c472f5..6c179ca05d 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
@@ -654,7 +654,8 @@ const std::initializer_list<float> recurrent_weights = {
 class BidirectionalRNNOpModel : public SingleOpModel {
  public:
   BidirectionalRNNOpModel(int batches, int sequence_len, int fw_units,
-                          int bw_units, int input_size, bool merge_outputs)
+                          int bw_units, int input_size, bool time_major,
+                          bool merge_outputs)
       : batches_(batches),
         sequence_len_(sequence_len),
         fw_units_(fw_units),
@@ -679,25 +680,29 @@ class BidirectionalRNNOpModel : public SingleOpModel {
       bw_output_ = AddOutput(TensorType_FLOAT32);
     }
 
-    SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
-                 BuiltinOptions_BidirectionalSequenceRNNOptions,
-                 CreateBidirectionalSequenceRNNOptions(
-                     builder_, /*time_major=*/false,
-                     ActivationFunctionType_RELU, merge_outputs)
-                     .Union());
+    SetBuiltinOp(
+        BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
+        BuiltinOptions_BidirectionalSequenceRNNOptions,
+        CreateBidirectionalSequenceRNNOptions(
+            builder_, time_major, ActivationFunctionType_RELU, merge_outputs)
+            .Union());
+    const auto input_shape =
+        (time_major) ? std::vector<int>({sequence_len_, batches_, input_size_})
+                     : std::vector<int>({batches_, sequence_len_, input_size_});
+
     BuildInterpreter({
-        {batches_, sequence_len_, input_size_},  // input
-        {fw_units_, input_size_},                // fw_weights
-        {fw_units_, fw_units_},                  // fw_recurrent_weights
-        {fw_units_},                             // fw_bias
-        {batches_, fw_units_},                   // fw_hidden_state
-        {bw_units_, input_size_},                // bw_weights
-        {bw_units_, bw_units_},                  // bw_recurrent_weights
-        {bw_units_},                             // bw_bias
-        {batches_, bw_units_},                   // bw_hidden_state
-        {batches_, sequence_len_, 0},            // aux_input
-        {fw_units_, 0},                          // aux_fw_weights
-        {bw_units_, 0},                          // aux_bw_weights
+        input_shape,                   // input
+        {fw_units_, input_size_},      // fw_weights
+        {fw_units_, fw_units_},        // fw_recurrent_weights
+        {fw_units_},                   // fw_bias
+        {batches_, fw_units_},         // fw_hidden_state
+        {bw_units_, input_size_},      // bw_weights
+        {bw_units_, bw_units_},        // bw_recurrent_weights
+        {bw_units_},                   // bw_bias
+        {batches_, bw_units_},         // bw_hidden_state
+        {batches_, sequence_len_, 0},  // aux_input
+        {fw_units_, 0},                // aux_fw_weights
+        {bw_units_, 0},                // aux_bw_weights
     });
   }
 
@@ -770,7 +775,8 @@ class BidirectionalRNNOpModel : public SingleOpModel {
 TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/false);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -803,11 +809,49 @@ TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   EXPECT_THAT(rnn.GetBwOutput(), ElementsAreArray(ArrayFloatNear(bw_expected)));
 }
 
-// Same as the previous test, yet with merged outputs.
+// Same as BlackBox test, but input is reshuffled to time_major format.
+TEST(BidirectionalRNNOpTest, BlackBoxTestTimeMajor) {
+  BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
+                              /*fw_units=*/16, /*bw_units=*/16,
+                              /*input_size=*/8, /*time_major=*/true,
+                              /*merge_outputs=*/false);
+  rnn.SetFwWeights(weights);
+  rnn.SetBwWeights(weights);
+  rnn.SetFwBias(biases);
+  rnn.SetBwBias(biases);
+  rnn.SetFwRecurrentWeights(recurrent_weights);
+  rnn.SetBwRecurrentWeights(recurrent_weights);
+
+  // const int input_sequence_size = rnn.input_size() * rnn.sequence_len();
+  // Insert the inputs in time_major format. The batch_major format is:
+  // [b0t0, b0t1, ..., b0t15, b1t0, b1t1, ..., b1t15]. This is reshuffled as:
+  // [b0t0, b1t0, b0t1, b1t1, ..., b0t15, b1t15].
+  for (int i = 0; i < rnn.sequence_len(); i++) {
+    float* batch_start = rnn_input + i * rnn.input_size();
+    float* batch_end = batch_start + rnn.input_size();
+    // The two batches are identical.
+    rnn.SetInput(2 * i * rnn.input_size(), batch_start, batch_end);
+    rnn.SetInput((2 * i + 1) * rnn.input_size(), batch_start, batch_end);
+  }
+
+  rnn.Invoke();
+
+  std::vector<float> fw_expected;
+  for (int i = 0; i < rnn.sequence_len(); i++) {
+    float* golden_fw_start = rnn_golden_fw_output + i * rnn.num_fw_units();
+    float* golden_fw_end = golden_fw_start + rnn.num_fw_units();
+    fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end);
+    fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end);
+  }
+  EXPECT_THAT(rnn.GetFwOutput(), ElementsAreArray(ArrayFloatNear(fw_expected)));
+}
+
+// Same as BlackBox test, yet with merged outputs.
 TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/true);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/true);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -845,7 +889,8 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) {
 TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/false);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -891,7 +936,8 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
 TEST(BidirectionalRNNOpTest, EndToEndTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/1, /*sequence_len=*/4,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/false);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/false);
   const int output_size = 4;
   float dnn_weights[] = {
       -0.5782342,  -0.052212059, 0.73036242,  -0.81216097, -0.80088139,
-- 
GitLab


From fa1542234857acf56af6e7f0dbe8d2084a18fa00 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 14:19:07 -0700
Subject: [PATCH 1321/1357] [XLA:GPU] Pattern match atomic "apply" into an
 atomic store

Otherwise we'd emit a CAS loop.

PiperOrigin-RevId: 216421161
---
 .../compiler/xla/service/gpu/ir_emitter.cc    | 15 +++++
 .../compiler/xla/service/gpu/tests/BUILD      | 12 ++++
 .../xla/service/gpu/tests/gpu_atomic_test.cc  | 58 +++++++++++++++++++
 3 files changed, 85 insertions(+)
 create mode 100644 tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index b7c37bcf3c..47102347cb 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -179,6 +179,21 @@ bool IrEmitter::MaybeEmitDirectAtomicOperation(
   bool is_atomic_integral = element_type == S32 || element_type == U32 ||
                             element_type == S64 || element_type == U64;
   llvm::Value* source = Load(source_address, "source");
+
+  // kCopy of RHS -> atomic store.
+  if (root_opcode == HloOpcode::kCopy &&
+      (element_type == F32 || is_atomic_integral) &&
+      computation.root_instruction()->operand(0)->opcode() ==
+          HloOpcode::kParameter &&
+      computation.root_instruction()->operand(0)->parameter_number() == 1) {
+    llvm::StoreInst* store = Store(source, output_address);
+    store->setAtomic(llvm::AtomicOrdering::Unordered);
+    // Derive a minimum alignment from the type. The optimizer can increase it
+    // later.
+    store->setAlignment(ShapeUtil::ByteSizeOfPrimitiveType(element_type));
+    return true;
+  }
+
   if (root_opcode == HloOpcode::kAdd) {
     // NVPTX supports atomicAdd on F32 and integer types.
     if (element_type == F32) {
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index a725533567..1f0436278c 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -223,3 +223,15 @@ tf_cc_test(
         "@com_google_absl//absl/strings",
     ],
 )
+
+tf_cc_test(
+    name = "gpu_atomic_test",
+    srcs = ["gpu_atomic_test.cc"],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla/tests:filecheck",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc
new file mode 100644
index 0000000000..6b18c4c637
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc
@@ -0,0 +1,58 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
+#include "tensorflow/compiler/xla/tests/filecheck.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+class GpuAtomicTest : public GpuCodegenTest {};
+
+TEST_F(GpuAtomicTest, TestStore) {
+  const char* hlo_string = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+)";
+
+  CompileAndVerifyIr(hlo_string, R"(
+CHECK: store atomic{{.*}}unordered, align 4
+)");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
-- 
GitLab


From 35caff957424a60bd7d7e4e92a1ec87f617781c6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 14:25:27 -0700
Subject: [PATCH 1322/1357] Export feature importance for oblivious tree nodes.

PiperOrigin-RevId: 216422334
---
 .../estimator_batch/custom_export_strategy.py         | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
index 48f12a64f9..a3df272e69 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
@@ -196,6 +196,10 @@ def convert_to_universal_format(dtec, sorted_feature_names,
           matching_id = categorical_test.value.add()
           matching_id.int64_value = split.feature_id
           node.custom_left_child_test.Pack(categorical_test)
+        elif (node_type == "oblivious_dense_float_binary_split" or
+              node_type == "oblivious_categorical_id_binary_split"):
+          raise ValueError("Universal tree format doesn't support oblivious "
+                           "trees")
         else:
           raise ValueError("Unexpected node type %s" % node_type)
         node.left_child_id.value = split.left_id
@@ -229,6 +233,13 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats,
         split = tree_node.categorical_id_binary_split
         split_column = feature_names[split.feature_column + num_dense_floats +
                                      num_sparse_float]
+      elif node_type == "oblivious_dense_float_binary_split":
+        split = tree_node.oblivious_dense_float_binary_split
+        split_column = feature_names[split.feature_column]
+      elif node_type == "oblivious_categorical_id_binary_split":
+        split = tree_node.oblivious_categorical_id_binary_split
+        split_column = feature_names[split.feature_column + num_dense_floats +
+                                     num_sparse_float]
       elif node_type == "categorical_id_set_membership_binary_split":
         split = tree_node.categorical_id_set_membership_binary_split
         split_column = feature_names[split.feature_column + num_dense_floats +
-- 
GitLab


From 950cf87104bfee28e2165fe368f66337b8a1336d Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Tue, 9 Oct 2018 14:36:33 -0700
Subject: [PATCH 1323/1357] [tf.data vectorization] Add vectorizer for `Add` op

PiperOrigin-RevId: 216424512
---
 tensorflow/core/graph/graph.cc                |   2 +-
 .../optimizers/data/vectorization/BUILD       |  34 ++--
 .../data/vectorization/add_vectorizer.cc      | 150 ++++++++++++++++++
 .../optimizers/data/vectorization_utils.cc    |  21 +--
 .../data/vectorization_utils_test.cc          | 103 ++++++++++--
 .../optimization/map_vectorization_test.py    |   1 +
 6 files changed, 280 insertions(+), 31 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 6f068546d2..a17491d4f7 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -34,7 +34,7 @@ namespace tensorflow {
 
 const int Graph::kControlSlot = -1;
 
-class NodeProperties {
+struct NodeProperties {
  public:
   NodeProperties(const OpDef* op_def, const NodeDef& node_def,
                  const DataTypeSlice inputs, const DataTypeSlice outputs)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 985d6c6c3a..09018d0124 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -9,7 +9,11 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all")
 
 VECTORIZER_DEPS = [
     ":vectorizer_registry",
+    "//tensorflow/cc:ops",
     "//tensorflow/core/grappler/optimizers/data:graph_utils",
+    "//tensorflow/core:core_cpu",
+    "//tensorflow/cc:scope_internal",
+    "//tensorflow/cc:cc_ops",
 ] + tf_protos_all()
 
 cc_library(
@@ -42,6 +46,24 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "vectorizer_registry_test",
+    srcs = ["vectorizer_registry_test.cc"],
+    deps = [
+        ":vectorizer_registry",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ] + tf_protos_all(),
+)
+
+cc_library(
+    name = "add_vectorizer",
+    srcs = ["add_vectorizer.cc"],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
 cc_library(
     name = "cast_vectorizer",
     srcs = ["cast_vectorizer.cc"],
@@ -61,20 +83,10 @@ cc_library(
     hdrs = ["vectorizer_registry.h"],
     visibility = ["//visibility:public"],
     deps = [
+        ":add_vectorizer",
         ":cast_vectorizer",
         ":unpack_vectorizer",
         ":vectorizer",
         ":vectorizer_registry",
     ],
 )
-
-tf_cc_test(
-    name = "vectorizer_registry_test",
-    srcs = ["vectorizer_registry_test.cc"],
-    deps = [
-        ":vectorizer_registry",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ] + tf_protos_all(),
-)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
new file mode 100644
index 0000000000..d90a51b01a
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
@@ -0,0 +1,150 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/math_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+
+namespace {
+
+const char* const kExpandDimsPrefix = "vectorized/expanddims/";
+
+// Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading
+// dimension, which may cause automatic broadcasting rules to expand the
+// input dimensions wrongly when the unstacked shapes have different ranks.
+// To avoid that, we reshape stacked inputs to the maximum rank they need
+// to be broadcasted to.
+//
+// For example, suppose we have inputs A and B, where A is a stacked tensor with
+// shape [n, 5] (where n is the stack size) and B is an unstacked tensor with
+// shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules
+// would expand the dimensions of A to [1, n, 5], then (incorrectly) check that
+// the dimensions n and 7 are compatible, and if so, create an output of shape
+// [12, 7, 5]. However, correct addition of these inputs would create an output
+// with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A
+// *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before
+// broadcasting.
+Status ExpandDimsForBroadcast(std::vector<WrappedTensor>* inputs, Graph* g) {
+  Status status;
+  Scope parent = NewInternalScope(g, &status, nullptr);
+  Scope s = parent.NewSubScope(kExpandDimsPrefix);
+
+  // TODO(rachelim): We can potentially get rid of all these ops if shapes are
+  // known statically
+
+  Output const_0 = ops::Const(s, 0);
+  Output const_1 = ops::Const(s, 1);
+
+  std::vector<Output> ranks;
+  ranks.reserve(inputs->size());
+
+  // Get the stacked rank of each input
+  for (const auto& input : *inputs) {
+    Output rank = ops::Rank(s, Output(input.node, input.output_index));
+
+    if (!input.stacked) {
+      // If the input is unstacked, add 1
+      rank = ops::Add(s, rank, const_1);
+    }
+
+    ranks.push_back(rank);
+  }
+
+  // Pack the ranks into one tensor to get the max
+  Output packed_ranks = ops::Stack(s, ranks);
+
+  Output max_rank =
+      ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true));
+
+  std::vector<WrappedTensor> expanded_inputs;
+  expanded_inputs.reserve(inputs->size());
+
+  // For all inputs that are stacked, expand dimensions after dim 0.
+  for (size_t i = 0; i < inputs->size(); ++i) {
+    if (!inputs->at(i).stacked) {
+      expanded_inputs.push_back(inputs->at(i));
+      continue;
+    }
+
+    Output input(inputs->at(i).node, inputs->at(i).output_index);
+
+    // Number of dimensions to expand
+    Output rank_diff = ops::Sub(s, max_rank, ranks[i]);
+
+    // [1] * rank_diff
+    Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff);
+
+    Output const_vec_1 = ops::Const(s, {1});
+
+    Output shape = ops::Shape(s, input);
+
+    // shape[:1]
+    Output concat_pre =
+        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
+                          ops::StridedSlice::Attrs().BeginMask(1));
+
+    // shape[1:]
+    Output concat_post =
+        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
+                          ops::StridedSlice::Attrs().EndMask(1));
+
+    // tf.concat([shape[:1], ones, shape[1:]], 0)
+    Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0);
+
+    Output result = ops::Reshape(s, input, new_shape);
+
+    expanded_inputs.push_back({result.node(), 0, true});
+  }
+
+  inputs->swap(expanded_inputs);
+  return status;
+}
+
+class AddVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
+    if (node.num_inputs() != 2) {
+      return errors::Internal("Add op should only have two inputs.");
+    }
+
+    TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope));
+
+    // Add new Add node with the same op and attrs as the original node
+    Node* new_add_node;
+    TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add")
+                           .Input(inputs[0].node, inputs[0].output_index)
+                           .Input(inputs[1].node, inputs[1].output_index)
+                           .Finalize(outer_scope, &new_add_node));
+
+    // Add output mappings
+    outputs->push_back({new_add_node, 0, true});
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("Add", AddVectorizer);
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index d977ff3198..8b93b1f2b8 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -64,9 +64,18 @@ void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
   }
 }
 
+// Update node attrs to keep its properties consistent with the function
+void UpdateMapDefunAttrs(FunctionBody* map_defun_fn, Node* map_defun_node) {
+  map_defun_node->AddAttr("output_types", map_defun_fn->ret_types);
+
+  // TODO(rachelim): Propagate precise shapes if they're known, which may enable
+  // subsequent optimizations.
+  map_defun_node->AddAttr("output_shapes", std::vector<PartialTensorShape>(
+                                               map_defun_fn->ret_types.size()));
+}
+
 Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
                          const TensorDesc& output) {
-  // Note that we don't update MapDefun attrs as we go, only when we are done
   DataType type = output.first->output_type(output.second);
   int index = map_defun_fn->ret_nodes.size();
 
@@ -83,13 +92,13 @@ Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
   map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0);
   map_defun_fn->ret_nodes.push_back(ret_node);
   map_defun_fn->ret_types.push_back(type);
+  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   return s;
 }
 
 void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                           FunctionBody* map_defun_fn, Node* map_defun_node) {
-  // Note that we don't update MapDefun attrs as we go, only when we are done
   DCHECK_LT(output_position, map_defun_fn->ret_nodes.size())
       << "Trying to remove output that doesn't exist. Output number: "
       << output_position;
@@ -102,6 +111,7 @@ void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                                 output_position);
   map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() +
                                 output_position);
+  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   // Renumber the nodes and edges that come after
   for (int i = 0; i < num_later_outputs; ++i) {
@@ -342,13 +352,6 @@ void Vectorization::VectorizeHelper() {
   // need the MapDefun node and can delete it.
   if (map_defun_fn_->ret_nodes.empty()) {
     outer_scope_->RemoveNode(map_defun_node_);
-  } else {
-    // Update MapDefun node attrs accordingly
-    DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size());
-    map_defun_node_->AddAttr(
-        "output_shapes",
-        std::vector<PartialTensorShape>(map_defun_fn_->ret_types.size()));
-    map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types);
   }
 }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index a6020e36bb..be498d150b 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -145,7 +145,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
   FunctionDef* vectorized;
   Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized);
   LOG(ERROR) << s;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_EQ(GetRetval(*vectorized, 0), "ret0");
@@ -237,7 +237,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
 
   auto map_defun_node = vectorized->node_def(
       function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized));
@@ -311,7 +311,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -389,7 +389,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -475,7 +475,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& unpack_node = vectorized->node_def(
@@ -574,7 +574,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -654,7 +654,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   // They should be unchanged
   // We check this somewhat manually as the names of nodes may have changed
   EXPECT_EQ(vectorized->node_def_size(), 1);
@@ -738,7 +738,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized));
@@ -817,7 +817,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   auto const_node = vectorized->node_def(
@@ -902,7 +902,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   *lib.add_function() = inner;
 
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
 
   auto find_const = [vectorized](int val) -> const NodeDef* {
     for (const auto& n : vectorized->node_def()) {
@@ -924,6 +924,89 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name()));
 }
 
+// Before:
+//
+//                   +------+
+// +-----------------+ Arg0 +----------------------+
+// |                 +---+--+                      |
+// |                     |                         |
+// |                 +---v--+                      |
+// |   +-------------+ Arg0 +------------------+   |
+// |   |             +---+--+                  |   |
+// |   |                 |                     |   |
+// |   |                 |          +-----+    |   |
+// |   |                 |          |Const|    |   |
+// |   |                 |          +-+---+    |   |
+// |   |                 |            |        |   |
+// |   |                 |   +--------+        |   |
+// |   |                 |   |                 |   |
+// |   |               +-v---v-+               |   |
+// |   |               |  Add  |               |   |
+// |   |               +-+-----+               |   |
+// |   |                 |                     |   |
+// |   |                 |                     |   |
+// |   | MapDefun      +-v----+                |   |
+// |   +---------------| Ret  |----------------+   |
+// |                   +--v---+                    |
+// |                      |                        |
+// |                      |                        |
+// |                   +--v----                    |
+// +-------------------| Ret  |--------------------+
+//                     +------+
+//
+//
+//  After:
+//
+//              +------+
+// +------------+ Arg0 +----------------------+
+// |            +---+--+                      |
+// |                |                         |
+// |                |              +-----+    |
+// |                |              |Const|    |
+// |              +-v---------+    +--+--+    |
+// |              |ExpandDims*|       |       |
+// |              +-----+-----+       |       |
+// |                    |             |       |
+// |                    +-----+ +-----+       |
+// |                          | |             |
+// |                        +-v-v-+           |
+// |                        | Add |           |
+// |                        +--+--+           |
+// |                           |              |
+// |                       +---v--+           |
+// +-----------------------+ Ret  +-----------+
+//                         +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunAdd) {
+  // Note that this checks that the "Add" vectorizer is successful, but does not
+  // check that the transformed function is correct (i.e. produces the same
+  // output as the unvectorized map defun). For the latter, the tests are in
+  // tensorflow/python/data/experimental/kernel_tests/optimization/
+  // map_vectorization_test.py
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2),
+       {{"Add"}, "Add", {"arg0", "Const:output:0"}, {{"T", DT_INT32}}}},
+      {{"ret0", "Add:z:0"}});
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
+                      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+}
+
 // TODO(rachelim): More test cases when we get around to implementing them:
 // [] A badly defined converter, e.g. doesn't produce nodes that have the
 //    same number of outputs/inputs as the nodes to be converted
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index 803ff87924..d1d6cf28ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -80,6 +80,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("Basic", lambda x: (x, x + 1), None),
       ("Const", lambda x: 2, 12),
       ("Parallel", lambda x: (x, x + 1), 12),
+      ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None),
       ("Gather", lambda x: array_ops.gather(x, 0), 12),
   )
   def testOptimization(self, map_fn, num_parallel_calls):
-- 
GitLab


From a6fcb9d3d81e9207650eda1c899051ccbb97dec7 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 14:38:55 -0700
Subject: [PATCH 1324/1357] Avoid creating sparse tensor objects before library
 is initialized.

PiperOrigin-RevId: 216425002
---
 .../sequence_feature_column_test.py           | 482 +++++++++---------
 1 file changed, 255 insertions(+), 227 deletions(-)

diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
index 929e83523a..707f93b2da 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
@@ -39,18 +39,18 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
            # example 0, ids [1]
            # example 1, ids [2, 0]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(1, 2, 0),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 2, 0),
+           'dense_shape': (2, 2)},
        'expected_input_layer': [
            # example 0, ids_a [2], ids_b [1]
            [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
@@ -58,20 +58,20 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
            [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # feature 0, ids [[2], [0, 1]]
            # feature 1, ids [[0, 0], [1]]
-           indices=(
+           'indices': (
                (0, 0, 0), (0, 1, 0), (0, 1, 1),
                (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 0, 0, 1),
-           dense_shape=(2, 2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
            # feature 0, ids [[1, 1], [1]]
            # feature 1, ids [[2], [0]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(1, 1, 1, 2, 0),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 2, 0),
+           'dense_shape': (2, 2, 2)},
        'expected_input_layer': [
            # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
            [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
@@ -80,9 +80,11 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
        'expected_sequence_length': [2, 2]},
       )
   def test_embedding_column(
-      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
       expected_sequence_length):
 
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
     vocabulary_size = 3
     embedding_dimension_a = 2
     embedding_values_a = (
@@ -261,18 +263,18 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
            # example 0, ids [1]
            # example 1, ids [1, 0]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(1, 1, 0),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 1, 0),
+           'dense_shape': (2, 2)},
        'expected_input_layer': [
            # example 0, ids_a [2], ids_b [1]
            [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
@@ -280,20 +282,20 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
            [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # feature 0, ids [[2], [0, 1]]
            # feature 1, ids [[0, 0], [1]]
-           indices=(
+           'indices': (
                (0, 0, 0), (0, 1, 0), (0, 1, 1),
                (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 0, 0, 1),
-           dense_shape=(2, 2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
            # feature 0, ids [[1, 1], [1]]
            # feature 1, ids [[1], [0]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(1, 1, 1, 1, 0),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 1, 0),
+           'dense_shape': (2, 2, 2)},
        'expected_input_layer': [
            # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
            [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
@@ -302,8 +304,11 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
        'expected_sequence_length': [2, 2]},
       )
   def test_indicator_column(
-      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
       expected_sequence_length):
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
+
     vocabulary_size_a = 3
     vocabulary_size_b = 2
 
@@ -350,30 +355,32 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [0., 1]
            # example 1, [10.]
-           indices=((0, 0), (0, 1), (1, 0)),
-           values=(0., 1., 10.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (0, 1), (1, 0)),
+           'values': (0., 1., 10.),
+           'dense_shape': (2, 2)},
        'expected_input_layer': [
            [[0.], [1.]],
            [[10.], [0.]]],
        'expected_sequence_length': [2, 1]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # feature 0, ids [[20, 3], [5]]
            # feature 1, ids [[3], [8]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(20, 3, 5., 3., 8.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (20, 3, 5., 3., 8.),
+           'dense_shape': (2, 2, 2)},
        'expected_input_layer': [
            [[20.], [3.], [5.], [0.]],
            [[3.], [0.], [8.], [0.]]],
        'expected_sequence_length': [2, 2]},
       )
   def test_numeric_column(
-      self, sparse_input, expected_input_layer, expected_sequence_length):
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -387,27 +394,27 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
            # example 1, [10., 11., 12., 13.]
-           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 8)),
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
        'expected_input_layer': [
            # The output of numeric_column._get_dense_tensor should be flattened.
            [[0., 1., 2., 3.], [4., 5., 6., 7.]],
            [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
        'expected_sequence_length': [2, 1]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
            # example 1, [[10., 11., 12., 13.], []]
-           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
-                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 2, 4)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
        'expected_input_layer': [
            # The output of numeric_column._get_dense_tensor should be flattened.
            [[0., 1., 2., 3.], [4., 5., 6., 7.]],
@@ -415,8 +422,10 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
        'expected_sequence_length': [2, 1]},
       )
   def test_numeric_column_multi_dim(
-      self, sparse_input, expected_input_layer, expected_sequence_length):
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
     """Tests sequence_input_layer for multi-dimensional numeric_column."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -460,28 +469,29 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
            # example 1, [[[10., 11.],  [12., 13.]]]
-           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 8)),
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
        'expected_shape': [2, 2, 4]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
            # example 1, [[10., 11., 12., 13.], []]
-           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2),
-                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 2, 4)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
        'expected_shape': [2, 2, 4]},
       )
   def test_static_shape_from_tensors_numeric(
-      self, sparse_input, expected_shape):
+      self, sparse_input_args, expected_shape):
     """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     input_layer, _ = sfc.sequence_input_layer(
@@ -492,30 +502,31 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
-           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-           values=(2, 0, 1, 1),
-           dense_shape=(4, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
        'expected_shape': [4, 2, 3]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
            # example 2, ids []
            # example 3, ids [[1], [0, 2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           values=(2, 0, 1, 2, 1, 0, 2),
-           dense_shape=(4, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 0, 2),
+           'dense_shape': (4, 2, 2)},
        'expected_shape': [4, 2, 3]}
       )
   def test_static_shape_from_tensors_indicator(
-      self, sparse_input, expected_shape):
+      self, sparse_input_args, expected_shape):
     """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=3)
     indicator_column = fc.indicator_column(categorical_column)
@@ -546,11 +557,12 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': 'rank_lt_3',
-       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(10, 10))},
+       'seq_input_arg': np.arange(100).reshape(10, 10)},
       {'testcase_name': 'rank_gt_3',
-       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 2, 2))}
+       'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)}
       )
-  def test_sequence_input_throws_error(self, seq_input):
+  def test_sequence_input_throws_error(self, seq_input_arg):
+    seq_input = ops.convert_to_tensor(seq_input_arg)
     context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
     seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
     context_input = math_ops.cast(context_input, dtype=dtypes.float32)
@@ -559,11 +571,12 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': 'rank_lt_2',
-       'context_input': ops.convert_to_tensor(np.arange(100))},
+       'context_input_arg': np.arange(100)},
       {'testcase_name': 'rank_gt_2',
-       'context_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))}
+       'context_input_arg': np.arange(100).reshape(5, 5, 4)}
       )
-  def test_context_input_throws_error(self, context_input):
+  def test_context_input_throws_error(self, context_input_arg):
+    context_input = ops.convert_to_tensor(context_input_arg)
     seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
     seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
     context_input = math_ops.cast(context_input, dtype=dtypes.float32)
@@ -657,25 +670,27 @@ class SequenceCategoricalColumnWithIdentityTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(1, 2, 0),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           values=np.array((1, 2, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 2, 0),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((1, 2, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=(6, 7, 8),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=(6, 7, 8),
-           dense_shape=(2, 2, 2))}
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': (6, 7, 8),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': (6, 7, 8),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
@@ -691,27 +706,29 @@ class SequenceCategoricalColumnWithHashBucketTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=('omar', 'stringer', 'marlo'),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('omar', 'stringer', 'marlo'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
            # Ignored to avoid hash dependence in test.
-           values=np.array((0, 0, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+           'values': np.array((0, 0, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=('omar', 'stringer', 'marlo'),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'stringer', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
            # Ignored to avoid hash dependence in test.
-           values=np.array((0, 0, 0), dtype=np.int64),
-           dense_shape=(2, 2, 2))}
+           'values': np.array((0, 0, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_hash_bucket(
         'aaa', hash_bucket_size=10)
 
@@ -742,25 +759,27 @@ class SequenceCategoricalColumnWithVocabularyFileTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=('marlo', 'skywalker', 'omar'),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           values=np.array((2, -1, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('marlo', 'skywalker', 'omar'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((2, -1, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=('omar', 'skywalker', 'marlo'),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=np.array((0, -1, 2), dtype=np.int64),
-           dense_shape=(2, 2, 2))}
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'skywalker', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': np.array((0, -1, 2), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_vocabulary_file(
         key='aaa',
         vocabulary_file=self._wire_vocabulary_file_name,
@@ -779,25 +798,27 @@ class SequenceCategoricalColumnWithVocabularyListTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=('marlo', 'skywalker', 'omar'),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           values=np.array((2, -1, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('marlo', 'skywalker', 'omar'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((2, -1, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=('omar', 'skywalker', 'marlo'),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=np.array((0, -1, 2), dtype=np.int64),
-           dense_shape=(2, 2, 2))}
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'skywalker', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': np.array((0, -1, 2), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_vocabulary_list(
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'))
@@ -815,14 +836,14 @@ class SequenceEmbeddingColumnTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
-           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-           values=(2, 0, 1, 1),
-           dense_shape=(4, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
        'expected': [
            # example 0, ids [2]
            [[7., 11.], [0., 0.]],
@@ -833,15 +854,15 @@ class SequenceEmbeddingColumnTest(
            # example 3, ids [1]
            [[3., 5.], [0., 0.]]]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
            # example 2, ids []
            # example 3, ids [[1], [0, 2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           values=(2, 0, 1, 2, 1, 0, 2),
-           dense_shape=(4, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 0, 2),
+           'dense_shape': (4, 2, 2)},
        'expected': [
            # example 0, ids [[2]]
            [[7., 11.], [0., 0.]],
@@ -852,7 +873,8 @@ class SequenceEmbeddingColumnTest(
            # example 3, ids [[1], [0, 2]]
            [[3., 5.], [4., 6.5]]]}
       )
-  def test_get_sequence_dense_tensor(self, inputs, expected):
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
     embedding_dimension = 2
     embedding_values = (
@@ -884,23 +906,24 @@ class SequenceEmbeddingColumnTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 2),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 2),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2]}
       )
-  def test_sequence_length(self, inputs, expected_sequence_length):
+  def test_sequence_length(self, inputs_args, expected_sequence_length):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
@@ -1124,14 +1147,14 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
-           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-           values=(2, 0, 1, 1),
-           dense_shape=(4, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
        'expected': [
            # example 0, ids [2]
            [[0., 0., 1.], [0., 0., 0.]],
@@ -1142,15 +1165,15 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
            # example 3, ids [1]
            [[0., 1., 0.], [0., 0., 0.]]]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
            # example 2, ids []
            # example 3, ids [[1], [2, 2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           values=(2, 0, 1, 2, 1, 2, 2),
-           dense_shape=(4, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 2, 2),
+           'dense_shape': (4, 2, 2)},
        'expected': [
            # example 0, ids [[2]]
            [[0., 0., 1.], [0., 0., 0.]],
@@ -1161,7 +1184,8 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
            # example 3, ids [[1], [2, 2]]
            [[0., 1., 0.], [0., 0., 2.]]]}
       )
-  def test_get_sequence_dense_tensor(self, inputs, expected):
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
@@ -1176,23 +1200,24 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 2),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 2),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2]}
       )
-  def test_sequence_length(self, inputs, expected_sequence_length):
+  def test_sequence_length(self, inputs_args, expected_sequence_length):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
@@ -1269,27 +1294,28 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, values [0., 1]
            # example 1, [10.]
-           indices=((0, 0), (0, 1), (1, 0)),
-           values=(0., 1., 10.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (0, 1), (1, 0)),
+           'values': (0., 1., 10.),
+           'dense_shape': (2, 2)},
        'expected': [
            [[0.], [1.]],
            [[10.], [0.]]]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # feature 0, ids [[20, 3], [5]]
            # feature 1, ids [[3], [8]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(20, 3, 5., 3., 8.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (20, 3, 5., 3., 8.),
+           'dense_shape': (2, 2, 2)},
        'expected': [
            [[20.], [3.], [5.], [0.]],
            [[3.], [0.], [8.], [0.]]]},
       )
-  def test_get_sequence_dense_tensor(self, inputs, expected):
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
@@ -1335,23 +1361,23 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
            # example 1, [[[10., 11.],  [12., 13.]]]
-           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 8)),
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
        'expected_dense_tensor': [
            [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
            [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
-                    (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
-                    (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 2, 8)),
+       'sparse_input_args': {
+           'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
+                       (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
+                       (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 8)},
        'expected_dense_tensor': [
            [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]],
             [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]],
@@ -1359,8 +1385,9 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
             [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]},
       )
   def test_get_dense_tensor_multi_dim(
-      self, sparse_input, expected_dense_tensor):
+      self, sparse_input_args, expected_dense_tensor):
     """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
@@ -1372,43 +1399,44 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2., 0., 1.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2., 0., 1.),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 2],
        'shape': (1,)},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2., 0., 1., 2.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2., 0., 1., 2.),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2],
        'shape': (1,)},
       {'testcase_name': '2D_with_shape',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2., 0., 1.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2., 0., 1.),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 1],
        'shape': (2,)},
       {'testcase_name': '3D_with_shape',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2., 0., 1., 2.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2., 0., 1., 2.),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2],
        'shape': (2,)},
       )
-  def test_sequence_length(self, inputs, expected_sequence_length, shape):
+  def test_sequence_length(self, inputs_args, expected_sequence_length, shape):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     numeric_column = sfc.sequence_numeric_column('aaa', shape=shape)
 
     _, sequence_length = numeric_column._get_sequence_dense_tensor(
-- 
GitLab


From c1093a3757224257fed0f7a1959d0fc99d5c757f Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Tue, 9 Oct 2018 15:02:51 -0700
Subject: [PATCH 1325/1357] In TPUMirroredVariable, when setting
 _initializer_op and _initial_value attributes, set the attributes of all the
 contained variables. This fixes a bug that tf.train.init_from_checkpoint
 doesn't overwrite the initialization values correctly for
 TPUMirroredVariable.

PiperOrigin-RevId: 216429476
---
 tensorflow/contrib/distribute/python/values.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 0dd78ba185..472cb4230c 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -475,6 +475,11 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
     self._aggregation = aggregation
     # Needed for GradientTape
     self._trainable = self._primary_var.trainable
+    # Typically like `DistributedVariable`, a `TPUMirroredVariable`'s
+    # initializer is composed of the initializers of the components variables.
+    # However, in some cases, such as when restoring from a checkpoint, we may
+    # set the _initializer_op property on the entire `TPUMirroredVariable`.
+    self._initializer_op = None
 
   def _get(self, device=None):
     """Returns the value for the current device or raises a ValueError."""
@@ -704,8 +709,12 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
 
   @property
   def initializer(self):
-    return control_flow_ops.group(
-        [v.initializer for v in nest.flatten(self._index)])
+    if self._initializer_op:
+      init_op = self._initializer_op
+    else:
+      init_op = control_flow_ops.group(
+          [v.initializer for v in self._index.values()])
+    return init_op
 
   @property
   def graph(self):
-- 
GitLab


From 5f69248a692f7b47ea11930621f4f19d0397fe8c Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 9 Oct 2018 15:07:47 -0700
Subject: [PATCH 1326/1357] Make defun work under distributed strategies.

The core of the change is have the gradient tape capture
distributed variables instead of plain ResourceVariables.
In other words, we move the distribution awareness from defun
down to tape and rely on distributed variable magic to provide us
with the right variable at runtime.

In tower context, we always watch the container (e.g. MirroredVariable).
In cross tower context, we always watch all the components.

PiperOrigin-RevId: 216430530
---
 .../distribute/python/mirrored_strategy.py    | 23 +++++---
 .../python/mirrored_strategy_multigpu_test.py | 58 +++++++++++++++++++
 tensorflow/python/eager/backprop_test.py      | 24 ++++++++
 tensorflow/python/eager/function.py           | 53 ++---------------
 tensorflow/python/eager/tape.py               | 31 +++++++++-
 5 files changed, 128 insertions(+), 61 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index a32424b316..0f82508428 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -293,7 +293,8 @@ def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs):
       collections.append(ops.GraphKeys.TRAINABLE_VARIABLES)
       l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES)
       for v in index.values():
-        l.remove(v)
+        if v in l:
+          l.remove(v)
     g.add_to_collections(collections, result)
   elif ops.GraphKeys.GLOBAL_STEP in collections:
     ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result)
@@ -461,16 +462,20 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
             # name as the absolute name of the variable.
             kwargs["name"] = "%s/replica_%d/" % (var0name, i)
             # Initialize replicas with the same value:
-            if context.executing_eagerly():
-              kwargs["initial_value"] = array_ops.identity(
-                  index[devices[0]].value())
-            else:
-              def initial_value_fn(device=d):
+            def initial_value_fn(device=d):
+              if context.executing_eagerly():
+                init_value = index[devices[0]].value()
+                return array_ops.identity(init_value)
+              else:
                 with ops.device(device):
-                  return array_ops.identity(index[devices[0]].initial_value)
-              kwargs["initial_value"] = initial_value_fn
+                  init_value = index[devices[0]].initial_value
+                  return array_ops.identity(init_value)
+            kwargs["initial_value"] = initial_value_fn
           with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
-            v = next_creator(*args, **kwargs)
+            # Don't record operations (e.g. other variable reads) during
+            # variable creation.
+            with tape.stop_recording():
+              v = next_creator(*args, **kwargs)
           assert not isinstance(v, values.DistributedVariable)
           index[d] = v
       return index
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index eeac528329..ed36639ce8 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import sys
 
+import numpy as np
+
 from tensorflow.contrib.distribute.python import mirrored_strategy
 from tensorflow.contrib.distribute.python import multi_worker_test_base
 from tensorflow.contrib.distribute.python import strategy_test_lib
@@ -34,7 +36,10 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training as keras_training
+from tensorflow.python.keras.layers import core as keras_core
 from tensorflow.python.layers import core
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn
 from tensorflow.python.ops import rnn_cell_impl
@@ -43,6 +48,8 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import device_util
 from tensorflow.python.training import distribution_strategy_context
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import optimizer as optimizer_lib
 from tensorflow.python.training import server_lib
 
 
@@ -1245,6 +1252,22 @@ class MockModel(object):
     return x
 
 
+class MiniModel(keras_training.Model):
+  """Minimal model for mnist.
+
+  Useful for testing and debugging on slow TPU simulators.
+  """
+
+  def __init__(self):
+    super(MiniModel, self).__init__(name="")
+    self.fc = keras_core.Dense(1, name="fc", kernel_initializer="ones",
+                               bias_initializer="ones")
+
+  def call(self, inputs, training=True):
+    inputs = array_ops.ones([1, 10])
+    return self.fc(inputs)
+
+
 class MirroredStrategyDefunTest(test.TestCase):
 
   def _skip_eager_if_gpus_less_than(self, num_gpus):
@@ -1365,6 +1388,41 @@ class MirroredStrategyDefunTest(test.TestCase):
                                         "GPU:0": 3.0 * 1.25})
     self._call_and_check(fn1, [factors], expected_result, [fn1])
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testTrain(self):
+    self._skip_eager_if_gpus_less_than(1)
+
+    cpu_dev = device_util.canonicalize("CPU:0")
+    gpu_dev = device_util.canonicalize("GPU:0")
+    devices = [cpu_dev, gpu_dev]
+    dist = mirrored_strategy.MirroredStrategy(devices)
+
+    with dist.scope():
+      mock_model = MiniModel()
+      mock_model.call = function.defun(mock_model.call)
+
+      def loss_fn(ctx):
+        del ctx
+        return mock_model(array_ops.ones([1, 10]))
+
+      gradients_fn = backprop.implicit_grad(loss_fn)
+      gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn)
+      grads_and_vars = dist.call_for_each_tower(
+          gradients_fn, None, run_concurrently=False)
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.25)
+      update_ops = optimizer._distributed_apply(dist, grads_and_vars)  # pylint: disable=protected-access
+
+      if not context.executing_eagerly():
+        self.evaluate(variables.global_variables_initializer())
+        self.evaluate(update_ops)
+
+      updated_var_values = self.evaluate(mock_model.variables)
+      # All variables start at 1.0 and get two updates of 0.25.
+      self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0])
+      self.assertAllEqual([0.5], updated_var_values[1])
+
+
 
 class MultiWorkerMirroredStrategyTest(
     multi_worker_test_base.MultiWorkerTestBase,
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 7e5c9f3cb6..b1b20fafd2 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -258,6 +258,30 @@ class BackpropTest(test.TestCase):
       loss += v * v
     self.assertAllEqual(t.gradient(loss, v), 2.0)
 
+  def testAutomaticWatchedVariables(self):
+    with backprop.GradientTape() as t:
+      self.assertEqual(0, len(t.watched_variables()))
+      v = resource_variable_ops.ResourceVariable(1.0)
+      loss = v * v
+      self.assertAllEqual([v], t.watched_variables())
+
+      t.reset()
+      self.assertEqual(0, len(t.watched_variables()))
+      loss += v * v
+      self.assertAllEqual([v], t.watched_variables())
+
+  def testExplicitWatchedVariables(self):
+    with backprop.GradientTape() as t:
+      self.assertEqual(0, len(t.watched_variables()))
+      v = resource_variable_ops.ResourceVariable(1.0)
+      t.watch(v)
+      self.assertAllEqual([v], t.watched_variables())
+
+      t.reset()
+      self.assertEqual(0, len(t.watched_variables()))
+      t.watch(v)
+      self.assertAllEqual([v], t.watched_variables())
+
   @test_util.assert_no_new_tensors
   def testGradientNone(self):
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index ff138cad1e..f1a63adce1 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -51,7 +51,6 @@ from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
@@ -202,6 +201,7 @@ class FuncGraph(ops.Graph):
     # from the default graph even in eager mode. Maybe it should be part of the
     # eager context?
     self._distribution_strategy_stack = graph._distribution_strategy_stack
+    self._variable_creator_stack = graph._variable_creator_stack
     # Inherit the graph key, since this is used for matching variables in
     # optimizers.
     self._graph_key = graph._graph_key
@@ -563,17 +563,6 @@ class Function(object):
         self._func_graph.inputs, self._func_graph.outputs, self._attrs)
     self._backward_graph_function = None
 
-    # Map holding distributed variables, keyed by resource handle tensors.
-    self._distributed_variables = {}
-    strategy = distribution_strategy_context.get_distribution_strategy()
-    for variable in self._func_graph.variables:
-      # If variable is not distributed, unwrap returns [variable].
-      component_variables = strategy.unwrap(variable)
-      # Only update the dictionary when the variable is actually distributed.
-      if (len(component_variables) > 1 or component_variables[0] != variable):
-        for component_variable in component_variables:
-          self._distributed_variables[component_variable.handle] = variable
-
   def __call__(self, *args):
     """Executes the wrapped function.
 
@@ -602,7 +591,6 @@ class Function(object):
       if v.trainable:
         tape.variable_accessed(v)
 
-    captures = self._resolve_captured_inputs()
     tensor_inputs = []
     for i, arg in enumerate(nest.flatten(args)):
       if isinstance(arg, resource_variable_ops.ResourceVariable):
@@ -615,9 +603,10 @@ class Function(object):
         raise ValueError("All inputs to `Function`s must be Tensors; "
                          "on invocation of %s, the %d-th input (%s) was not a "
                          "Tensor." % (self._func_graph.name, i, str(arg)))
-    args = tensor_inputs + captures
+    args = tensor_inputs + self._captured_inputs
 
-    if tape.should_record(tensor_inputs) or tape.should_record(captures):
+    if (tape.should_record(tensor_inputs) or
+        tape.should_record(self._captured_inputs)):
       return self._backprop_call(args)
 
     # Only need to override the gradient in graph mode and when we have outputs.
@@ -804,32 +793,6 @@ class Function(object):
                           args, backward_function)
     return self._build_call_outputs(real_outputs)
 
-  def _resolve_captured_inputs(self):
-    """Resolve captured distributed variables to their current values.
-
-    Some inputs can be distributed variables. Such variables yield a different
-    component (i.e. actual tf.Variable) variables depending on the context of
-    execution.
-
-    Returns:
-      a list of resolved captured input tensors.
-    """
-    if self._distributed_variables:
-      # Loop over each captured input and check if it corresponds to something
-      # distributed. If so, get its _distributed_container and fetch the
-      # component appropriate for the current execution context.
-      resolved_captured_inputs = self._captured_inputs[:]
-      for i, captured_input in enumerate(self._captured_inputs):
-        distributed_var = self._distributed_variables.get(captured_input, None)
-        if distributed_var is not None:
-          # distributed variables override __getattr__ and substitute the
-          # right component variable. In here, `distributed_var.handle`
-          # actually does the equivalent of
-          # distributed_var.get_current_component_var().handle.
-          resolved_captured_inputs[i] = distributed_var.handle
-      return resolved_captured_inputs
-    return self._captured_inputs
-
   def _build_call_outputs(self, result):
     """Maps the fdef output list to actual output structure.
 
@@ -1010,14 +973,6 @@ def func_graph_from_py_func(name,
         for x in _flatten(func_graph.structured_outputs)
         if x is not None)
 
-    # Some captured variables might be components of DistributedValues.
-    # Instead of storing non-distributed component variables, we
-    # store their distributed containers so we can retrieve the correct
-    # component variables at call-time.
-    strategy = distribution_strategy_context.get_distribution_strategy()
-    for i, variable in enumerate(variables):
-      # If variable is not distributed value_container returns itself.
-      variables[i] = strategy.value_container(variable)
     func_graph.variables = variables
 
   # Register any other functions defined in the graph.
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index 399d90223c..ade945f874 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -21,6 +21,15 @@ from __future__ import print_function
 import contextlib
 
 from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.util.lazy_loader import LazyLoader
+
+# There is a circular dependency between this, ops.py, and
+# distribution_strategy_context.
+# TODO(b/117329403): Remove this circular dependency.
+distribution_strategy_context = LazyLoader(
+    "distribute_lib", globals(),
+    "tensorflow.python.training."
+    "distribution_strategy_context")
 
 
 class Tape(object):
@@ -52,12 +61,28 @@ def watch(tape, tensor):
 
 def watch_variable(tape, variable):
   """Marks this variable to be watched by the given tape."""
-  pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, variable)  # pylint: disable=protected-access
+  strategy = distribution_strategy_context.get_distribution_strategy()
+  if distribution_strategy_context.get_tower_context():
+    variables = [strategy.value_container(variable)]
+  else:
+    variables = strategy.unwrap(variable)
+  for var in variables:
+    pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, var)  # pylint: disable=protected-access
 
 
 def variable_accessed(variable):
-  """Notifies all tapes in the stack that a variable has been accessed."""
-  pywrap_tensorflow.TFE_Py_TapeVariableAccessed(variable)
+  """Notifies all tapes in the stack that a variable has been accessed.
+
+  Args:
+    variable: variable to be watched.
+  """
+  strategy = distribution_strategy_context.get_distribution_strategy()
+  if distribution_strategy_context.get_tower_context():
+    variables = [strategy.value_container(variable)]
+  else:
+    variables = strategy.unwrap(variable)
+  for var in variables:
+    pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var)
 
 
 def pop_tape(tape):
-- 
GitLab


From 771955e2b8be98a0b38fada41bd67f663397c87d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 15:18:21 -0700
Subject: [PATCH 1327/1357] Raises an appropriate error if `add_weight` is
 called on a Keras network.

PiperOrigin-RevId: 216432358
---
 tensorflow/python/keras/engine/network.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 5969fea2b2..266c48d304 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -432,6 +432,27 @@ class Network(base_layer.Layer):
           'assign variables to attributes and they will show up in the weights '
           'and variables properties.')
 
+  def add_weight(self,
+                 name,
+                 shape,
+                 dtype=None,
+                 initializer=None,
+                 regularizer=None,
+                 trainable=None,
+                 constraint=None,
+                 partitioner=None,
+                 use_resource=None,
+                 synchronization=variables.VariableSynchronization.AUTO,
+                 aggregation=variables.VariableAggregation.NONE,
+                 **kwargs):
+    if self._is_graph_network:
+      raise NotImplementedError('`add_weight` is not supported on Networks.')
+    else:
+      raise NotImplementedError(
+          '`add_weight` is not supported on Networks. However, you may '
+          'assign variables to attributes and they will show up in the weights '
+          'and variables properties.')
+
   def add_loss(self, *args, **kwargs):
     if context.executing_eagerly():
       raise NotImplementedError('`add_loss` is not supported on Networks '
-- 
GitLab


From 69c4a426fc4a3afd83c8190467b07c17b8b2ed60 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 15:47:56 -0700
Subject: [PATCH 1328/1357] [XLA] Allow scatter to share the operand buffer
 with the output

This avoids a copy.

PiperOrigin-RevId: 216437329
---
 .../xla/service/hlo_dataflow_analysis.cc      |  1 +
 .../xla/service/hlo_dataflow_analysis_test.cc | 38 +++++++++++++++++++
 .../xla/service/tuple_points_to_analysis.cc   |  1 +
 .../service/tuple_points_to_analysis_test.cc  | 38 +++++++++++++++++++
 4 files changed, 78 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index c22adcdd8d..71122e73b1 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -1048,6 +1048,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser(
   }
 
   if (user->opcode() == HloOpcode::kDynamicUpdateSlice ||
+      user->opcode() == HloOpcode::kScatter ||
       user->opcode() == HloOpcode::kWhile) {
     // We eliminated other users in BufferLiveness::live_range_strictly_before,
     // so here we just need to check that the use is at operand index 0.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index 510d6360a1..d27786d160 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -2283,6 +2283,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) {
       dataflow_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {}));
 }
 
+TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) {
+  const char* hlo_text = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text));
+  computation_ = module_->entry_computation();
+  RunAnalysis();
+
+  HloInstruction* operand_param = computation_->parameter_instruction(0);
+  HloInstruction* indices_param = computation_->parameter_instruction(1);
+  HloInstruction* updates_param = computation_->parameter_instruction(2);
+  HloInstruction* scatter = computation_->root_instruction();
+
+  EXPECT_TRUE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      operand_param, {}, scatter, {}));
+  EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      indices_param, {}, scatter, {}));
+  EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      updates_param, {}, scatter, {}));
+}
+
 TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) {
   auto builder = HloComputation::Builder(TestName());
 
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index 811ac55e2d..ef4e69180d 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -756,6 +756,7 @@ bool TuplePointsToAnalysis::CanShareOperandBufferWithUser(
     }
   }
   if (user->opcode() == HloOpcode::kDynamicUpdateSlice ||
+      user->opcode() == HloOpcode::kScatter ||
       user->opcode() == HloOpcode::kWhile) {
     // We eliminated other users in BufferLiveness::live_range_strictly_before,
     // so here we just need to check that the use is at operand index 0.
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
index e9a07b14ed..a571bd571b 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
@@ -1010,6 +1010,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) {
       points_to_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {}));
 }
 
+TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) {
+  const char* hlo_text = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text));
+  computation_ = module_->entry_computation();
+  RunAnalysis();
+
+  HloInstruction* operand_param = computation_->parameter_instruction(0);
+  HloInstruction* indices_param = computation_->parameter_instruction(1);
+  HloInstruction* updates_param = computation_->parameter_instruction(2);
+  HloInstruction* scatter = computation_->root_instruction();
+
+  EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser(
+      operand_param, {}, scatter, {}));
+  EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(
+      indices_param, {}, scatter, {}));
+  EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(
+      updates_param, {}, scatter, {}));
+}
+
 TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) {
   auto builder = HloComputation::Builder(TestName());
 
-- 
GitLab


From c98ffffcb4e0cc668c0ff7b73d51677a7eb7dcf4 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Tue, 9 Oct 2018 16:19:46 -0700
Subject: [PATCH 1329/1357] Part 2/3 of the update of tf.keras to the Keras
 2.2.4 API.

PiperOrigin-RevId: 216442569
---
 tensorflow/python/keras/backend.py            |  64 ++++++----
 tensorflow/python/keras/callbacks.py          | 101 +++++++++++----
 tensorflow/python/keras/callbacks_test.py     | 118 ++++++++++++++++--
 .../python/keras/layers/convolutional.py      |  14 ++-
 .../python/keras/layers/convolutional_test.py |  36 ++++++
 tensorflow/python/kernel_tests/rnn_test.py    |   4 +-
 .../golden/v1/tensorflow.keras.backend.pbtxt  |   2 +-
 ...flow.keras.callbacks.-early-stopping.pbtxt |   6 +-
 ...orflow.keras.callbacks.-tensor-board.pbtxt |   2 +-
 ...sorflow.keras.layers.-up-sampling2-d.pbtxt |   2 +-
 .../golden/v2/tensorflow.keras.backend.pbtxt  |   2 +-
 ...flow.keras.callbacks.-early-stopping.pbtxt |   6 +-
 ...orflow.keras.callbacks.-tensor-board.pbtxt |   2 +-
 ...sorflow.keras.layers.-up-sampling2-d.pbtxt |   2 +-
 14 files changed, 296 insertions(+), 65 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 13f52fbae7..7509ef9c59 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -2338,7 +2338,8 @@ def permute_dimensions(x, pattern):
 
 
 @tf_export('keras.backend.resize_images')
-def resize_images(x, height_factor, width_factor, data_format):
+def resize_images(x, height_factor, width_factor, data_format,
+                  interpolation='nearest'):
   """Resizes the images contained in a 4D tensor.
 
   Arguments:
@@ -2346,40 +2347,55 @@ def resize_images(x, height_factor, width_factor, data_format):
       height_factor: Positive integer.
       width_factor: Positive integer.
       data_format: One of `"channels_first"`, `"channels_last"`.
+      interpolation: A string, one of `nearest` or `bilinear`.
 
   Returns:
       A tensor.
 
   Raises:
-      ValueError: if `data_format` is neither
-          `channels_last` or `channels_first`.
+      ValueError: in case of incorrect value for
+        `data_format` or `interpolation`.
   """
   if data_format == 'channels_first':
-    original_shape = int_shape(x)
-    new_shape = array_ops.shape(x)[2:]
-    new_shape *= constant_op.constant(
-        np.array([height_factor, width_factor]).astype('int32'))
+    rows, cols = 2, 3
+  elif data_format == 'channels_last':
+    rows, cols = 1, 2
+  else:
+    raise ValueError('Invalid `data_format` argument: %s' % (data_format,))
+
+  original_shape = int_shape(x)
+  new_shape = array_ops.shape(x)[rows:cols + 1]
+  new_shape *= constant_op.constant(
+      np.array([height_factor, width_factor], dtype='int32'))
+
+  if data_format == 'channels_first':
     x = permute_dimensions(x, [0, 2, 3, 1])
+  if interpolation == 'nearest':
     x = image_ops.resize_nearest_neighbor(x, new_shape)
+  elif interpolation == 'bilinear':
+    x = image_ops.resize_bilinear(x, new_shape)
+  else:
+    raise ValueError('interpolation should be one '
+                     'of "nearest" or "bilinear".')
+  if data_format == 'channels_first':
     x = permute_dimensions(x, [0, 3, 1, 2])
-    x.set_shape((None, None, original_shape[2] * height_factor
-                 if original_shape[2] is not None else None,
-                 original_shape[3] * width_factor
-                 if original_shape[3] is not None else None))
-    return x
-  elif data_format == 'channels_last':
-    original_shape = int_shape(x)
-    new_shape = array_ops.shape(x)[1:3]
-    new_shape *= constant_op.constant(
-        np.array([height_factor, width_factor]).astype('int32'))
-    x = image_ops.resize_nearest_neighbor(x, new_shape)
-    x.set_shape((None, original_shape[1] * height_factor
-                 if original_shape[1] is not None else None,
-                 original_shape[2] * width_factor
-                 if original_shape[2] is not None else None, None))
-    return x
+
+  if original_shape[rows] is None:
+    new_height = None
   else:
-    raise ValueError('Invalid data_format: ' + str(data_format))
+    new_height = original_shape[rows] * height_factor
+
+  if original_shape[cols] is None:
+    new_width = None
+  else:
+    new_width = original_shape[cols] * width_factor
+
+  if data_format == 'channels_first':
+    output_shape = (None, None, new_height, new_width)
+  else:
+    output_shape = (None, new_height, new_width, None)
+  x.set_shape(output_shape)
+  return x
 
 
 @tf_export('keras.backend.resize_volumes')
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 3d6000f223..4c12c83a4c 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -24,6 +24,7 @@ from collections import Iterable
 from collections import OrderedDict
 import copy
 import csv
+import io
 import json
 import math
 import os
@@ -606,24 +607,28 @@ class EarlyStopping(Callback):
   """Stop training when a monitored quantity has stopped improving.
 
   Arguments:
-      monitor: quantity to be monitored.
-      min_delta: minimum change in the monitored quantity
+      monitor: Quantity to be monitored.
+      min_delta: Minimum change in the monitored quantity
           to qualify as an improvement, i.e. an absolute
           change of less than min_delta, will count as no
           improvement.
-      patience: number of epochs with no improvement
+      patience: Number of epochs with no improvement
           after which training will be stopped.
       verbose: verbosity mode.
-      mode: one of {auto, min, max}. In `min` mode,
+      mode: One of `{"auto", "min", "max"}`. In `min` mode,
           training will stop when the quantity
           monitored has stopped decreasing; in `max`
           mode it will stop when the quantity
           monitored has stopped increasing; in `auto`
           mode, the direction is automatically inferred
           from the name of the monitored quantity.
-      baseline: baseline value for the monitored quantity.
+      baseline: Baseline value for the monitored quantity.
           Training will stop if the model doesn't show improvement over the
           baseline.
+      restore_best_weights: Whether to restore model weights from
+          the epoch with the best value of the monitored quantity.
+          If False, the model weights obtained at the last step of
+          training are used.
   """
 
   def __init__(self,
@@ -632,7 +637,8 @@ class EarlyStopping(Callback):
                patience=0,
                verbose=0,
                mode='auto',
-               baseline=None):
+               baseline=None,
+               restore_best_weights=False):
     super(EarlyStopping, self).__init__()
 
     self.monitor = monitor
@@ -642,6 +648,8 @@ class EarlyStopping(Callback):
     self.min_delta = abs(min_delta)
     self.wait = 0
     self.stopped_epoch = 0
+    self.restore_best_weights = restore_best_weights
+    self.best_weights = None
 
     if mode not in ['auto', 'min', 'max']:
       logging.warning('EarlyStopping mode %s is unknown, '
@@ -673,25 +681,37 @@ class EarlyStopping(Callback):
       self.best = np.Inf if self.monitor_op == np.less else -np.Inf
 
   def on_epoch_end(self, epoch, logs=None):
-    current = logs.get(self.monitor)
+    current = self.get_monitor_value(logs)
     if current is None:
-      logging.warning('Early stopping conditioned on metric `%s` '
-                      'which is not available. Available metrics are: %s',
-                      self.monitor, ','.join(list(logs.keys())))
       return
     if self.monitor_op(current - self.min_delta, self.best):
       self.best = current
       self.wait = 0
+      if self.restore_best_weights:
+        self.best_weights = self.model.get_weights()
     else:
       self.wait += 1
       if self.wait >= self.patience:
         self.stopped_epoch = epoch
         self.model.stop_training = True
+        if self.restore_best_weights:
+          if self.verbose > 0:
+            print('Restoring model weights from the end of the best epoch.')
+          self.model.set_weights(self.best_weights)
 
   def on_train_end(self, logs=None):
     if self.stopped_epoch > 0 and self.verbose > 0:
       print('Epoch %05d: early stopping' % (self.stopped_epoch + 1))
 
+  def get_monitor_value(self, logs):
+    logs = logs or {}
+    monitor_value = logs.get(self.monitor)
+    if monitor_value is None:
+      logging.warning('Early stopping conditioned on metric `%s` '
+                      'which is not available. Available metrics are: %s',
+                      self.monitor, ','.join(list(logs.keys())))
+    return monitor_value
+
 
 @tf_export('keras.callbacks.RemoteMonitor')
 class RemoteMonitor(Callback):
@@ -839,6 +859,12 @@ class TensorBoard(Callback):
           `embeddings_layer_names`. Numpy array (if the model has a single
           input) or list of Numpy arrays (if the model has multiple inputs).
           Learn [more about embeddings](https://www.tensorflow.org/programmers_guide/embedding)
+      update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`,
+          writes the losses and metrics to TensorBoard after each batch.
+          The same applies for `'epoch'`. If using an integer, let's say `1000`,
+          the callback will write the metrics and losses to TensorBoard every
+          1000 samples. Note that writing too frequently to TensorBoard
+          can slow down your training.
 
   Raises:
       ValueError: If histogram_freq is set and no validation data is provided.
@@ -862,7 +888,8 @@ class TensorBoard(Callback):
                embeddings_freq=0,
                embeddings_layer_names=None,
                embeddings_metadata=None,
-               embeddings_data=None):
+               embeddings_data=None,
+               update_freq='epoch'):
     super(TensorBoard, self).__init__()
     self.log_dir = log_dir
     self.histogram_freq = histogram_freq
@@ -882,6 +909,12 @@ class TensorBoard(Callback):
     self.embeddings_layer_names = embeddings_layer_names
     self.embeddings_metadata = embeddings_metadata
     self.embeddings_data = embeddings_data
+    if update_freq == 'batch':
+      self.update_freq = 1
+    else:
+      self.update_freq = update_freq
+    self._samples_seen = 0
+    self._samples_seen_at_last_write = 0
 
   def _init_writer(self):
     """Sets file writer."""
@@ -1045,13 +1078,17 @@ class TensorBoard(Callback):
       # use v2 summary ops
       with self.writer.as_default(), summary_ops_v2.always_record_summaries():
         for name, value in logs.items():
-          summary_ops_v2.scalar(name, value.item(), step=step)
+          if isinstance(value, np.ndarray):
+            value = value.item()
+          summary_ops_v2.scalar(name, value, step=step)
     else:
       # use FileWriter from v1 summary
       for name, value in logs.items():
+        if isinstance(value, np.ndarray):
+          value = value.item()
         summary = tf_summary.Summary()
         summary_value = summary.value.add()
-        summary_value.simple_value = value.item()
+        summary_value.simple_value = value
         summary_value.tag = name
         self.writer.add_summary(summary, step)
     self.writer.flush()
@@ -1076,10 +1113,14 @@ class TensorBoard(Callback):
     """Writes scalar summaries for metrics on every training batch."""
     # Don't output batch_size and batch number as Tensorboard summaries
     logs = logs or {}
-    batch_logs = {('batch_' + k): v
-                  for k, v in logs.items()
-                  if k not in ['batch', 'size', 'num_steps']}
-    self._write_custom_summaries(self._total_batches_seen, batch_logs)
+    self._samples_seen += logs.get('size', 1)
+    samples_seen_since = self._samples_seen - self._samples_seen_at_last_write
+    if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq:
+      batch_logs = {('batch_' + k): v
+                    for k, v in logs.items()
+                    if k not in ['batch', 'size', 'num_steps']}
+      self._write_custom_summaries(self._total_batches_seen, batch_logs)
+      self._samples_seen_at_last_write = self._samples_seen
     self._total_batches_seen += 1
 
   def on_epoch_begin(self, epoch, logs=None):
@@ -1103,7 +1144,11 @@ class TensorBoard(Callback):
     logs = {('epoch_' + k): v
             for k, v in logs.items()
             if k not in ['batch', 'size', 'num_steps']}
-    self._write_custom_summaries(epoch, logs)
+    if self.update_freq == 'epoch':
+      step = epoch
+    else:
+      step = self._samples_seen
+    self._write_custom_summaries(step, logs)
 
     # pop the histogram summary op after each epoch
     if self.histogram_freq:
@@ -1309,7 +1354,12 @@ class CSVLogger(Callback):
     self.writer = None
     self.keys = None
     self.append_header = True
-    self.file_flags = 'b' if six.PY2 and os.name == 'nt' else ''
+    if six.PY2:
+      self.file_flags = 'b'
+      self._open_args = {}
+    else:
+      self.file_flags = ''
+      self._open_args = {'newline': '\n'}
     super(CSVLogger, self).__init__()
 
   def on_train_begin(self, logs=None):
@@ -1317,9 +1367,12 @@ class CSVLogger(Callback):
       if os.path.exists(self.filename):
         with open(self.filename, 'r' + self.file_flags) as f:
           self.append_header = not bool(len(f.readline()))
-      self.csv_file = open(self.filename, 'a' + self.file_flags)
+      mode = 'a'
     else:
-      self.csv_file = open(self.filename, 'w' + self.file_flags)
+      mode = 'w'
+    self.csv_file = io.open(self.filename,
+                            mode + self.file_flags,
+                            **self._open_args)
 
   def on_epoch_end(self, epoch, logs=None):
     logs = logs or {}
@@ -1345,9 +1398,13 @@ class CSVLogger(Callback):
       class CustomDialect(csv.excel):
         delimiter = self.sep
 
+      fieldnames = ['epoch'] + self.keys
+      if six.PY2:
+        fieldnames = [unicode(x) for x in fieldnames]
+
       self.writer = csv.DictWriter(
           self.csv_file,
-          fieldnames=['epoch'] + self.keys,
+          fieldnames=fieldnames,
           dialect=CustomDialect)
       if self.append_header:
         self.writer.writeheader()
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 467bc4cdc4..bb85347033 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -313,6 +313,42 @@ class KerasCallbacksTest(test.TestCase):
       hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20)
       assert len(hist.epoch) >= patience
 
+  def test_EarlyStopping_final_weights_when_restoring_model_weights(self):
+
+    class DummyModel(object):
+
+      def __init__(self):
+        self.stop_training = False
+        self.weights = -1
+
+      def get_weights(self):
+        return self.weights
+
+      def set_weights(self, weights):
+        self.weights = weights
+
+      def set_weight_to_epoch(self, epoch):
+        self.weights = epoch
+
+    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss',
+                                               patience=2,
+                                               restore_best_weights=True)
+    early_stop.model = DummyModel()
+    losses = [0.2, 0.15, 0.1, 0.11, 0.12]
+    # The best configuration is in the epoch 2 (loss = 0.1000).
+    epochs_trained = 0
+    early_stop.on_train_begin()
+    for epoch in range(len(losses)):
+      epochs_trained += 1
+      early_stop.model.set_weight_to_epoch(epoch=epoch)
+      early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]})
+      if early_stop.model.stop_training:
+        break
+    # The best configuration is in epoch 2 (loss = 0.1000),
+    # and while patience = 2, we're restoring the best weights,
+    # so we end up at the epoch with the best weights, i.e. epoch 2
+    self.assertEqual(early_stop.model.get_weights(), 2)
+
   def test_RemoteMonitor(self):
     if requests is None:
       return
@@ -534,11 +570,15 @@ class KerasCallbacksTest(test.TestCase):
           batch_size=BATCH_SIZE,
           validation_data=(x_test, y_test),
           callbacks=cbks,
-          epochs=1,
+          epochs=2,
           verbose=0)
 
       with open(filepath) as csvfile:
-        output = ' '.join(csvfile.readlines())
+        list_lines = csvfile.readlines()
+        for line in list_lines:
+          assert line.count(sep) == 4
+        assert len(list_lines) == 5
+        output = ' '.join(list_lines)
         assert len(re.findall('epoch', output)) == 1
 
       os.remove(filepath)
@@ -1115,11 +1155,11 @@ class KerasCallbacksTest(test.TestCase):
     temp_dir = self.get_temp_dir()
     self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
 
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir)
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
     tb_cbk.writer = FileWriterStub(temp_dir)
 
     for batch in range(5):
-      tb_cbk.on_batch_end(batch, {'acc': np.float32(batch)})
+      tb_cbk.on_batch_end(batch, {'acc': batch})
     self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4])
     self.assertEqual(tb_cbk.writer.summary_values, [0., 1., 2., 3., 4.])
     self.assertEqual(tb_cbk.writer.summary_tags, ['batch_acc'] * 5)
@@ -1147,14 +1187,17 @@ class KerasCallbacksTest(test.TestCase):
     temp_dir = self.get_temp_dir()
     self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
 
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir)
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
     tb_cbk.writer = FileWriterStub(temp_dir)
 
-    tb_cbk.on_batch_end(0, {'acc': np.float32(5.0)})
-    tb_cbk.on_epoch_end(0, {'acc': np.float32(10.0)})
+    tb_cbk.on_batch_end(0, {'acc': 5.0})
     batch_step, batch_summary = tb_cbk.writer.batch_summary
     self.assertEqual(batch_step, 0)
     self.assertEqual(batch_summary.value[0].simple_value, 5.0)
+
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+    tb_cbk.on_epoch_end(0, {'acc': 10.0})
     epoch_step, epoch_summary = tb_cbk.writer.epoch_summary
     self.assertEqual(epoch_step, 0)
     self.assertEqual(epoch_summary.value[0].simple_value, 10.0)
@@ -1192,6 +1235,66 @@ class KerasCallbacksTest(test.TestCase):
 
     self.assertTrue(os.path.exists(temp_dir))
 
+  def test_TensorBoard_update_freq(self):
+
+    class FileWriterStub(object):
+
+      def __init__(self, logdir, graph=None):
+        self.logdir = logdir
+        self.graph = graph
+        self.batch_summaries = []
+        self.epoch_summaries = []
+
+      def add_summary(self, summary, step):
+        if 'batch_' in summary.value[0].tag:
+          self.batch_summaries.append((step, summary))
+        elif 'epoch_' in summary.value[0].tag:
+          self.epoch_summaries.append((step, summary))
+
+      def flush(self):
+        pass
+
+      def close(self):
+        pass
+
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+    # Epoch mode
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(tb_cbk.writer.batch_summaries, [])
+    tb_cbk.on_epoch_end(0, {'acc': 10.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.epoch_summaries), 1)
+
+    # Batch mode
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    self.assertFalse(tb_cbk.writer.epoch_summaries)
+
+    # Integer mode
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq=20)
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertFalse(tb_cbk.writer.batch_summaries)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    tb_cbk.on_batch_end(0, {'acc': 10.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    self.assertFalse(tb_cbk.writer.epoch_summaries)
+
   def test_RemoteMonitorWithJsonPayload(self):
     if requests is None:
       self.skipTest('`requests` required to run this test')
@@ -1226,6 +1329,7 @@ class KerasCallbacksTest(test.TestCase):
   def test_fit_generator_with_callback(self):
 
     class TestCallback(keras.callbacks.Callback):
+
       def set_model(self, model):
         # Check the model operations for the optimizer operations that
         # the _make_train_function adds under a named scope for the
diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index 8f5872385c..58024677ee 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py
@@ -1951,6 +1951,7 @@ class UpSampling2D(Layer):
           It defaults to the `image_data_format` value found in your
           Keras config file at `~/.keras/keras.json`.
           If you never set it, then it will be "channels_last".
+      interpolation: A string, one of `nearest` or `bilinear`.
 
   Input shape:
       4D tensor with shape:
@@ -1967,10 +1968,18 @@ class UpSampling2D(Layer):
           `(batch, channels, upsampled_rows, upsampled_cols)`
   """
 
-  def __init__(self, size=(2, 2), data_format=None, **kwargs):
+  def __init__(self,
+               size=(2, 2),
+               data_format=None,
+               interpolation='nearest',
+               **kwargs):
     super(UpSampling2D, self).__init__(**kwargs)
     self.data_format = conv_utils.normalize_data_format(data_format)
     self.size = conv_utils.normalize_tuple(size, 2, 'size')
+    if interpolation not in {'nearest', 'bilinear'}:
+      raise ValueError('`interpolation` argument should be one of `"nearest"` '
+                       'or `"bilinear"`.')
+    self.interpolation = interpolation
     self.input_spec = InputSpec(ndim=4)
 
   def compute_output_shape(self, input_shape):
@@ -1992,7 +2001,8 @@ class UpSampling2D(Layer):
 
   def call(self, inputs):
     return backend.resize_images(
-        inputs, self.size[0], self.size[1], self.data_format)
+        inputs, self.size[0], self.size[1], self.data_format,
+        interpolation=self.interpolation)
 
   def get_config(self):
     config = {'size': self.size, 'data_format': self.data_format}
diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index f88d632ab5..bdc175b8b9 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -789,6 +789,42 @@ class UpSamplingTest(test.TestCase):
 
             np.testing.assert_allclose(np_output, expected_out)
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_upsampling_2d_bilinear(self):
+    num_samples = 2
+    stack_size = 2
+    input_num_row = 11
+    input_num_col = 12
+    for data_format in ['channels_first', 'channels_last']:
+      if data_format == 'channels_first':
+        inputs = np.random.rand(num_samples, stack_size, input_num_row,
+                                input_num_col)
+      else:
+        inputs = np.random.rand(num_samples, input_num_row, input_num_col,
+                                stack_size)
+
+      testing_utils.layer_test(keras.layers.UpSampling2D,
+                               kwargs={'size': (2, 2),
+                                       'data_format': data_format,
+                                       'interpolation': 'bilinear'},
+                               input_shape=inputs.shape)
+
+      if not context.executing_eagerly():
+        for length_row in [2]:
+          for length_col in [2, 3]:
+            layer = keras.layers.UpSampling2D(
+                size=(length_row, length_col),
+                data_format=data_format)
+            layer.build(inputs.shape)
+            outputs = layer(keras.backend.variable(inputs))
+            np_output = keras.backend.eval(outputs)
+            if data_format == 'channels_first':
+              self.assertEqual(np_output.shape[2], length_row * input_num_row)
+              self.assertEqual(np_output.shape[3], length_col * input_num_col)
+            else:
+              self.assertEqual(np_output.shape[1], length_row * input_num_row)
+              self.assertEqual(np_output.shape[2], length_col * input_num_col)
+
   @tf_test_util.run_in_graph_and_eager_modes
   def test_upsampling_3d(self):
     num_samples = 2
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 2f6963f6b8..907e1277a9 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -571,8 +571,8 @@ class RNNTest(test.TestCase):
       cell.set_weights(tf_weights)
       [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train})
 
-    self.assertAllClose(tf_out, k_out)
-    self.assertAllClose(tf_state, k_state)
+    self.assertAllClose(tf_out, k_out, atol=1e-5)
+    self.assertAllClose(tf_state, k_state, atol=1e-5)
 
   def testBasicLSTMCellInterchangeWithLSTMCell(self):
     with self.session(graph=ops_lib.Graph()) as sess:
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
index 9feb7c09b8..5f0dfd7ae7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
@@ -386,7 +386,7 @@ tf_module {
   }
   member_method {
     name: "resize_images"
-    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'nearest\'], "
   }
   member_method {
     name: "resize_volumes"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
index f71292856c..ed0f37647f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -5,7 +5,11 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\'], "
+    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "get_monitor_value"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
index e58ba18c1c..e9d53b7225 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\', \'update_freq\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\', \'epoch\'], "
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index 40a56a0c94..b05e5ec84d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
@@ -82,7 +82,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'size\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\'], "
+    argspec: "args=[\'self\', \'size\', \'data_format\', \'interpolation\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\', \'nearest\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
index 9feb7c09b8..5f0dfd7ae7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
@@ -386,7 +386,7 @@ tf_module {
   }
   member_method {
     name: "resize_images"
-    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'nearest\'], "
   }
   member_method {
     name: "resize_volumes"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
index f71292856c..ed0f37647f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -5,7 +5,11 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\'], "
+    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "get_monitor_value"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
index e58ba18c1c..e9d53b7225 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\', \'update_freq\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\', \'epoch\'], "
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index 40a56a0c94..b05e5ec84d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
@@ -82,7 +82,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'size\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\'], "
+    argspec: "args=[\'self\', \'size\', \'data_format\', \'interpolation\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\', \'nearest\'], "
   }
   member_method {
     name: "add_loss"
-- 
GitLab


From 86777950480e10bc43b36facc478e2d706f23852 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 9 Oct 2018 16:21:56 -0700
Subject: [PATCH 1330/1357] Internal change

PiperOrigin-RevId: 216442906
---
 tensorflow/contrib/lite/build_def.bzl | 18 +++++++++---------
 tensorflow/contrib/lite/testing/BUILD |  5 ++---
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 05efee18e7..f962a138f7 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -310,15 +310,8 @@ def generated_test_models_failing(conversion_mode):
     if conversion_mode == "toco-flex":
         # TODO(b/117328698): Fix and enable the known flex failures.
         return [
-            "arg_min_max",
-            "div",
-            "floor_div",
-            "gather",
             "lstm",
-            "resize_bilinear",
-            "space_to_batch_nd",
             "split",
-            "transpose",
             "unpack",
         ]
 
@@ -334,7 +327,8 @@ def generated_test_models_all():
     """Generates a list of all tests with the different converters.
 
     Returns:
-      List of tuples representing (conversion mode, name of test).
+      List of tuples representing:
+            (conversion mode, name of test, test tags, test args).
     """
     conversion_modes = generated_test_conversion_modes()
     tests = generated_test_models()
@@ -343,12 +337,18 @@ def generated_test_models_all():
         failing_tests = generated_test_models_failing(conversion_mode)
         for test in tests:
             tags = []
+            args = []
             if test in failing_tests:
                 tags.append("notap")
                 tags.append("manual")
             if conversion_mode:
                 test += "_%s" % conversion_mode
-            options.append((conversion_mode, test, tags))
+
+            # Flex conversion shouldn't suffer from the same conversion bugs
+            # listed for the default TFLite kernel backend.
+            if conversion_mode == "toco-flex":
+                args.append("--ignore_known_bugs=false")
+            options.append((conversion_mode, test, tags, args))
     return options
 
 def gen_zip_test(name, test_name, conversion_mode, **kwargs):
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 2edd420fea..3dc666f631 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -20,8 +20,7 @@ load(
     name = "zip_test_%s" % test_name,
     size = "large",
     srcs = ["generated_examples_zip_test.cc"],
-    args = [
-    ] + select({
+    args = args + select({
         "//tensorflow:android": [],
         "//conditions:default": [
             "--zip_file_path=$(location :zip_%s)" % test_name,
@@ -61,7 +60,7 @@ load(
             "//tensorflow/core:android_tensorflow_test_lib",
         ],
     }),
-) for conversion_mode, test_name, tags in generated_test_models_all()]
+) for conversion_mode, test_name, tags, args in generated_test_models_all()]
 
 test_suite(
     name = "generated_zip_tests",
-- 
GitLab


From 2f5ebc0ea5e6d500ea8cd925234c569d6b32fd4e Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 9 Oct 2018 16:22:22 -0700
Subject: [PATCH 1331/1357] [TF:XLA] Bump open source abseil revision to
 445998d7ac4e5d3c50411d377e3b50e960d2d6c2

PiperOrigin-RevId: 216442983
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 40c226a861..b03af53cff 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -117,11 +117,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "f186bf5d9fce3037c602a21f86facbdd317adecef36e1726ec7bc7b496943a82",
-        strip_prefix = "abseil-cpp-e821380d69a549dc64900693942789d21aa4df5e",
+        sha256 = "cd1650daecfdd5591502bb017c70777c959cf604a962352bd5312bef8d78a8c6",
+        strip_prefix = "abseil-cpp-445998d7ac4e5d3c50411d377e3b50e960d2d6c2",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz",
         ],
     )
 
-- 
GitLab


From 6c391166b8b6ba43d2b0151e6fb9cf14864131a2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:23:35 -0700
Subject: [PATCH 1332/1357] Add 'remove' operation to MutableHashTable and
 MutableDenseHashTable.

PiperOrigin-RevId: 216443201
---
 .../linear_optimizer/python/ops/sdca_ops.py   |   3 +-
 .../ops/sharded_mutable_dense_hashtable.py    |   2 +
 .../sharded_mutable_dense_hashtable_test.py   |   6 +
 tensorflow/contrib/lookup/lookup_ops.py       |  81 ++++-
 tensorflow/contrib/lookup/lookup_ops_test.py  | 336 +++++++++++++++---
 .../python/timeseries/math_utils.py           |  19 +-
 .../python/timeseries/math_utils_test.py      |   8 +-
 .../python/timeseries/state_management.py     |   1 +
 .../api_def_LookupTableRemoveV2.pbtxt         |  24 ++
 tensorflow/core/framework/lookup_interface.cc |   8 +
 tensorflow/core/framework/lookup_interface.h  |  17 +
 .../core/kernels/initializable_lookup_table.h |   6 +
 tensorflow/core/kernels/lookup_table_op.cc    | 184 +++++++++-
 .../core/ops/compat/ops_history.v1.pbtxt      |  20 ++
 tensorflow/core/ops/lookup_ops.cc             |  14 +
 15 files changed, 643 insertions(+), 86 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt

diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 48ac429701..b5099a0bf6 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -152,7 +152,8 @@ class SdcaModel(object):
         default_value=[0.0, 0.0, 0.0, 0.0],
         # SdcaFprint never returns 0 or 1 for the low64 bits, so this a safe
         # empty_key (that will never collide with actual payloads).
-        empty_key=[0, 0])
+        empty_key=[0, 0],
+        deleted_key=[1, 1])
 
     summary.scalar('approximate_duality_gap', self.approximate_duality_gap())
     summary.scalar('examples_seen', self._hashtable.size())
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
index 5015fb0848..44a869f7c2 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
@@ -48,6 +48,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface):
                value_dtype,
                default_value,
                empty_key,
+               deleted_key,
                num_shards=1,
                checkpoint=True,
                name='ShardedMutableHashTable'):
@@ -62,6 +63,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface):
                 value_dtype=value_dtype,
                 default_value=default_value,
                 empty_key=empty_key,
+                deleted_key=deleted_key,
                 checkpoint=checkpoint,
                 name='%s-%d-of-%d' % (name, i + 1, num_shards)))
       self._table_shards = table_shards
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
index 553b116a3b..2b56d0fa3a 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
@@ -33,6 +33,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
       with self.cached_session():
         default_val = -1
         empty_key = 0
+        deleted_key = -1
         keys = constant_op.constant([11, 12, 13], dtypes.int64)
         values = constant_op.constant([0, 1, 2], dtypes.int64)
         table = ShardedMutableDenseHashTable(
@@ -40,6 +41,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
             dtypes.int64,
             default_val,
             empty_key,
+            deleted_key,
             num_shards=num_shards)
         self.assertAllEqual(0, table.size().eval())
 
@@ -56,6 +58,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
       with self.cached_session():
         default_val = [-0.1, 0.2]
         empty_key = [0, 1]
+        deleted_key = [1, 0]
         keys = constant_op.constant([[11, 12], [13, 14], [15, 16]],
                                     dtypes.int64)
         values = constant_op.constant([[0.5, 0.6], [1.5, 1.6], [2.5, 2.6]],
@@ -65,6 +68,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
             dtypes.float32,
             default_val,
             empty_key,
+            deleted_key,
             num_shards=num_shards)
         self.assertAllEqual(0, table.size().eval())
 
@@ -81,6 +85,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
   def testExportSharded(self):
     with self.cached_session():
       empty_key = -2
+      deleted_key = -3
       default_val = -1
       num_shards = 2
       keys = constant_op.constant([10, 11, 12], dtypes.int64)
@@ -90,6 +95,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
           dtypes.int64,
           default_val,
           empty_key,
+          deleted_key,
           num_shards=num_shards)
       self.assertAllEqual(0, table.size().eval())
 
diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py
index f83765a48d..5abef822e8 100644
--- a/tensorflow/contrib/lookup/lookup_ops.py
+++ b/tensorflow/contrib/lookup/lookup_ops.py
@@ -292,8 +292,8 @@ def index_to_string(tensor, mapping, default_value="UNK", name=None):
 class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase):
   """A generic mutable hash table implementation.
 
-  Data can be inserted by calling the insert method. It does not support
-  initialization via the init method.
+  Data can be inserted by calling the insert method and removed by calling the
+  remove method. It does not support initialization via the init method.
 
   Example usage:
 
@@ -391,6 +391,34 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase):
       with ops.colocate_with(self._table_ref):
         return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name)
 
+  def remove(self, keys, name=None):
+    """Removes `keys` and its associated values from the table.
+
+    If a key is not present in the table, it is silently ignored.
+
+    Args:
+      keys: Keys to remove. Can be a tensor of any shape. Must match the table's
+        key type.
+      name: A name for the operation (optional).
+
+    Returns:
+      The created Operation.
+
+    Raises:
+      TypeError: when `keys` do not match the table data types.
+    """
+    if keys.dtype != self._key_dtype:
+      raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
+                      (self._key_dtype, keys.dtype))
+
+    with ops.name_scope(name, "%s_lookup_table_remove" % self._name,
+                        (self._table_ref, keys, self._default_value)) as name:
+      # pylint: disable=protected-access
+      op = gen_lookup_ops.lookup_table_remove_v2(
+          self._table_ref, keys, name=name)
+
+    return op
+
   def lookup(self, keys, name=None):
     """Looks up `keys` in a table, outputs the corresponding values.
 
@@ -487,11 +515,11 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase):
 class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
   """A generic mutable hash table implementation using tensors as backing store.
 
-  Data can be inserted by calling the insert method. It does not support
-  initialization via the init method.
+  Data can be inserted by calling the insert method and removed by calling the
+  remove method. It does not support initialization via the init method.
 
   It uses "open addressing" with quadratic reprobing to resolve collisions.
-  Compared to `MutableHashTable` the insert and lookup operations in a
+  Compared to `MutableHashTable` the insert, remove and lookup operations in a
   `MutableDenseHashTable` are typically faster, but memory usage can be higher.
   However, `MutableDenseHashTable` does not require additional memory for
   temporary tensors created during checkpointing and restore operations.
@@ -502,7 +530,9 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
   table = tf.contrib.lookup.MutableDenseHashTable(key_dtype=tf.int64,
                                                   value_dtype=tf.int64,
                                                   default_value=-1,
-                                                  empty_key=0)
+                                                  empty_key=0,
+                                                  deleted_key=-1)
+
   sess.run(table.insert(keys, values))
   out = table.lookup(query_keys)
   print(out.eval())
@@ -516,6 +546,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
                value_dtype,
                default_value,
                empty_key,
+               deleted_key,
                initial_num_buckets=None,
                shared_name=None,
                name="MutableDenseHashTable",
@@ -530,7 +561,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
       value_dtype: the type of the value tensors.
       default_value: The value to use if a key is missing in the table.
       empty_key: the key to use to represent empty buckets internally. Must not
-        be used in insert or lookup operations.
+        be used in insert, remove or lookup operations.
       initial_num_buckets: the initial number of buckets.
       shared_name: If non-empty, this table will be shared under
         the given name across multiple sessions.
@@ -538,9 +569,12 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
       checkpoint: if True, the contents of the table are saved to and restored
         from checkpoints. If `shared_name` is empty for a checkpointed table, it
         is shared using the table node name.
+      deleted_key: the key to use to represent deleted buckets internally. Must
+        not be used in insert, remove or lookup operations and be different from
+        the empty_key.
 
     Returns:
-      A `MutableHashTable` object.
+      A `MutableDenseHashTable` object.
 
     Raises:
       ValueError: If checkpoint is True and no name was specified.
@@ -555,6 +589,8 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
     use_node_name_sharing = checkpoint and shared_name is None
     empty_key = ops.convert_to_tensor(
         empty_key, dtype=key_dtype, name="empty_key")
+    deleted_key = ops.convert_to_tensor(
+        deleted_key, dtype=key_dtype, name="deleted_key")
     executing_eagerly = context.executing_eagerly()
     if executing_eagerly and shared_name is None:
       # TODO(allenl): This will leak memory due to kernel caching by the
@@ -564,6 +600,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
       shared_name = "table_%d" % (ops.uid(),)
     self._table_ref = gen_lookup_ops.mutable_dense_hash_table_v2(
         empty_key=empty_key,
+        deleted_key=deleted_key,
         shared_name=shared_name,
         use_node_name_sharing=use_node_name_sharing,
         value_dtype=value_dtype,
@@ -648,6 +685,34 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
             self._table_ref, keys, values, name=name)
       return op
 
+  def remove(self, keys, name=None):
+    """Removes `keys` and its associated values from the table.
+
+    If a key is not present in the table, it is silently ignored.
+
+    Args:
+      keys: Keys to remove. Can be a tensor of any shape. Must match the table's
+        key type.
+      name: A name for the operation (optional).
+
+    Returns:
+      The created Operation.
+
+    Raises:
+      TypeError: when `keys` do not match the table data types.
+    """
+    if keys.dtype != self._key_dtype:
+      raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
+                      (self._key_dtype, keys.dtype))
+
+    with ops.name_scope(name, "%s_lookup_table_remove" % self._name,
+                        (self._table_ref, keys, self._default_value)) as name:
+      # pylint: disable=protected-access
+      op = gen_lookup_ops.lookup_table_remove_v2(
+          self._table_ref, keys, name=name)
+
+    return op
+
   def export(self, name=None):
     """Returns tensors of all keys and values in the table.
 
diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py
index 9e9345e875..35b0d1bc44 100644
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py
@@ -303,13 +303,17 @@ class MutableHashTableOpTest(test.TestCase):
   def testMutableHashTable(self):
     with self.cached_session():
       default_val = -1
-      keys = constant_op.constant(["brain", "salad", "surgery"])
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+      keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"])
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                       default_val)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["tarkus", "tank"])
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant(["brain", "salad", "tank"])
@@ -472,13 +476,18 @@ class MutableHashTableOpTest(test.TestCase):
   def testMutableHashTableOfTensors(self):
     with self.cached_session():
       default_val = constant_op.constant([-1, -1], dtypes.int64)
-      keys = constant_op.constant(["brain", "salad", "surgery"])
-      values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64)
+      keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"])
+      values = constant_op.constant([[0, 1], [2, 3], [4, 5], [6, 7]],
+                                    dtypes.int64)
       table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                       default_val)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["tarkus", "tank"])
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant(["brain", "salad", "tank"])
@@ -624,6 +633,26 @@ class MutableHashTableOpTest(test.TestCase):
       result = output.eval()
       self.assertAllEqual([0, 1, 3, -1], result)
 
+  def testMutableHashTableRemoveHighRank(self):
+    with self.test_session():
+      default_val = -1
+      keys = constant_op.constant([["brain", "salad"], ["surgery", "tank"]])
+      values = constant_op.constant([[0, 1], [2, 3]], dtypes.int64)
+      table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val)
+
+      table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["salad", "tarkus"])
+      table.remove(remove_string).run()
+      self.assertAllEqual(3, table.size().eval())
+
+      input_string = constant_op.constant(["brain", "salad", "tank", "tarkus"])
+      output = table.lookup(input_string)
+
+      result = output.eval()
+      self.assertAllEqual([0, -1, 3, -1], result)
+
   def testMutableHashTableOfTensorsFindHighRank(self):
     with self.cached_session():
       default_val = constant_op.constant([-1, -1, -1], dtypes.int64)
@@ -645,6 +674,30 @@ class MutableHashTableOpTest(test.TestCase):
       self.assertAllEqual(
           [[[0, 1, 2], [2, 3, 4]], [[-1, -1, -1], [-1, -1, -1]]], result)
 
+  def testMutableHashTableOfTensorsRemoveHighRank(self):
+    with self.test_session():
+      default_val = constant_op.constant([-1, -1, -1], dtypes.int64)
+      keys = constant_op.constant(["brain", "salad", "surgery"])
+      values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]],
+                                    dtypes.int64)
+      table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val)
+
+      table.insert(keys, values).run()
+      self.assertAllEqual(3, table.size().eval())
+
+      remove_string = constant_op.constant([["brain", "tank"]])
+      table.remove(remove_string).run()
+      self.assertAllEqual(2, table.size().eval())
+
+      input_string = constant_op.constant([["brain", "salad"],
+                                           ["surgery", "tank"]])
+      output = table.lookup(input_string)
+      self.assertAllEqual([2, 2, 3], output.get_shape())
+
+      result = output.eval()
+      self.assertAllEqual(
+          [[[-1, -1, -1], [2, 3, 4]], [[4, 5, 6], [-1, -1, -1]]], result)
+
   def testMultipleMutableHashTables(self):
     with self.cached_session() as sess:
       default_val = -1
@@ -792,13 +845,22 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
   def testBasic(self):
     with self.cached_session():
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=0)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=0,
+          deleted_key=-1)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant([12, 15], dtypes.int64)
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant([11, 12, 15], dtypes.int64)
@@ -806,17 +868,26 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual([3], output.get_shape())
 
       result = output.eval()
-      self.assertAllEqual([0, 1, -1], result)
+      self.assertAllEqual([0, -1, -1], result)
 
   def testBasicBool(self):
     with self.cached_session():
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([True, True, True], dtypes.bool)
+
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([True, True, True, True], dtypes.bool)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.bool, default_value=False, empty_key=0)
+          dtypes.int64,
+          dtypes.bool,
+          default_value=False,
+          empty_key=0,
+          deleted_key=-1)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant([11, 15], dtypes.int64)
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant([11, 12, 15], dtypes.int64)
@@ -824,14 +895,30 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual([3], output.get_shape())
 
       result = output.eval()
-      self.assertAllEqual([True, True, False], result)
+      self.assertAllEqual([False, True, False], result)
+
+  def testSameEmptyAndDeletedKey(self):
+    with self.cached_session():
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "deleted_key"):
+        table = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=42,
+            deleted_key=42)
+        self.assertAllEqual(0, table.size().eval())
 
   def testLookupUnknownShape(self):
     with self.cached_session():
       keys = constant_op.constant([11, 12, 13], dtypes.int64)
       values = constant_op.constant([0, 1, 2], dtypes.int64)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=0)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=0,
+          deleted_key=-1)
 
       table.insert(keys, values).run()
       self.assertAllEqual(3, table.size().eval())
@@ -844,45 +931,60 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
   def testMapStringToFloat(self):
     with self.cached_session():
-      keys = constant_op.constant(["a", "b", "c"], dtypes.string)
-      values = constant_op.constant([0.0, 1.1, 2.2], dtypes.float32)
+
+      keys = constant_op.constant(["a", "b", "c", "d"], dtypes.string)
+      values = constant_op.constant([0.0, 1.1, 2.2, 3.3], dtypes.float32)
       default_value = constant_op.constant(-1.5, dtypes.float32)
       table = lookup.MutableDenseHashTable(
           dtypes.string,
           dtypes.float32,
           default_value=default_value,
-          empty_key="")
+          empty_key="",
+          deleted_key="$")
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["b", "e"])
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
-      input_string = constant_op.constant(["a", "b", "d"], dtypes.string)
+      input_string = constant_op.constant(["a", "b", "d", "e"], dtypes.string)
       output = table.lookup(input_string)
-      self.assertAllEqual([3], output.get_shape())
+      self.assertAllEqual([4], output.get_shape())
 
       result = output.eval()
-      self.assertAllClose([0, 1.1, -1.5], result)
+      self.assertAllClose([0, -1.5, 3.3, -1.5], result)
 
   def testMapInt64ToFloat(self):
     for float_dtype in [dtypes.float32, dtypes.float64]:
       with self.cached_session():
-        keys = constant_op.constant([11, 12, 13], dtypes.int64)
-        values = constant_op.constant([0.0, 1.1, 2.2], float_dtype)
+
+        keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+        values = constant_op.constant([0.0, 1.1, 2.2, 3.3], float_dtype)
         default_value = constant_op.constant(-1.5, float_dtype)
         table = lookup.MutableDenseHashTable(
-            dtypes.int64, float_dtype, default_value=default_value, empty_key=0)
+            dtypes.int64,
+            float_dtype,
+            default_value=default_value,
+            empty_key=0,
+            deleted_key=-1)
         self.assertAllEqual(0, table.size().eval())
 
         table.insert(keys, values).run()
+        self.assertAllEqual(4, table.size().eval())
+
+        remove_string = constant_op.constant([12, 15], dtypes.int64)
+        table.remove(remove_string).run()
         self.assertAllEqual(3, table.size().eval())
 
-        input_string = constant_op.constant([11, 12, 15], dtypes.int64)
+        input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64)
         output = table.lookup(input_string)
-        self.assertAllEqual([3], output.get_shape())
+        self.assertAllEqual([4], output.get_shape())
 
         result = output.eval()
-        self.assertAllClose([0, 1.1, -1.5], result)
+        self.assertAllClose([0, -1.5, 3.3, -1.5], result)
 
   def testVectorValues(self):
     with self.cached_session():
@@ -895,6 +997,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=default_value,
           empty_key=0,
+          deleted_key=-1,
           initial_num_buckets=4)
       self.assertAllEqual(0, table.size().eval())
 
@@ -908,26 +1011,35 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual(4, table.size().eval())
       self.assertAllEqual(8, len(table.export()[0].eval()))
 
-      input_string = constant_op.constant([11, 12, 15], dtypes.int64)
+      remove_string = constant_op.constant([12, 16], dtypes.int64)
+      table.remove(remove_string).run()
+      self.assertAllEqual(3, table.size().eval())
+      self.assertAllEqual(8, len(table.export()[0].eval()))
+
+      input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual(
-          [3, 4], output.shape, msg="Saw shape: %s" % output.shape)
+      self.assertAllEqual([4, 4],
+                          output.shape,
+                          msg="Saw shape: %s" % output.shape)
 
       result = output.eval()
-      self.assertAllEqual([[0, 1, 2, 3], [3, 4, 5, 6], [-1, -2, -3, -4]],
-                          result)
+      self.assertAllEqual(
+          [[0, 1, 2, 3], [-1, -2, -3, -4], [2, 3, 4, 5], [-1, -2, -3, -4]],
+          result)
 
   def testVectorKeys(self):
     with self.cached_session():
       keys = constant_op.constant([[0, 1], [1, 2], [1, 3]], dtypes.int64)
       values = constant_op.constant([10, 11, 12], dtypes.int64)
       empty_key = constant_op.constant([0, 3], dtypes.int64)
+      deleted_key = constant_op.constant([-1, -1], dtypes.int64)
       default_value = constant_op.constant(-1, dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           initial_num_buckets=8)
       self.assertAllEqual(0, table.size().eval())
 
@@ -940,13 +1052,18 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual(4, table.size().eval())
       self.assertAllEqual(8, len(table.export()[0].eval()))
 
-      input_string = constant_op.constant([[0, 1], [1, 2], [0, 2]],
+      remove_string = constant_op.constant([[1, 2], [7, 8]], dtypes.int64)
+      table.remove(remove_string).run()
+      self.assertAllEqual(3, table.size().eval())
+      self.assertAllEqual(8, len(table.export()[0].eval()))
+
+      input_string = constant_op.constant([[0, 1], [1, 2], [1, 3], [0, 2]],
                                           dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual([3], output.get_shape())
+      self.assertAllEqual([4], output.get_shape())
 
       result = output.eval()
-      self.assertAllEqual([10, 11, -1], result)
+      self.assertAllEqual([10, -1, 12, -1], result)
 
   def testResize(self):
     with self.cached_session():
@@ -957,6 +1074,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=-1,
           empty_key=0,
+          deleted_key=-1,
           initial_num_buckets=4)
       self.assertAllEqual(0, table.size().eval())
 
@@ -964,31 +1082,42 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(4, len(table.export()[0].eval()))
 
-      keys2 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64)
-      values2 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64)
+      keys2 = constant_op.constant([12, 99], dtypes.int64)
+      table.remove(keys2).run()
+      self.assertAllEqual(2, table.size().eval())
+      self.assertAllEqual(4, len(table.export()[0].eval()))
+
+      keys3 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64)
+      values3 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64)
 
-      table.insert(keys2, values2).run()
-      self.assertAllEqual(7, table.size().eval())
+      table.insert(keys3, values3).run()
+      self.assertAllEqual(6, table.size().eval())
       self.assertAllEqual(16, len(table.export()[0].eval()))
 
-      keys3 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18],
+      keys4 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18],
                                    dtypes.int64)
-      output = table.lookup(keys3)
-      self.assertAllEqual([-1, 0, 1, 3, 4, 5, 6, 7, -1], output.eval())
+      output = table.lookup(keys4)
+      self.assertAllEqual([-1, 0, -1, 3, 4, 5, 6, 7, -1], output.eval())
 
   def testExport(self):
     with self.cached_session():
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([1, 2, 3], dtypes.int64)
+
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([1, 2, 3, 4], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=-1,
           empty_key=100,
+          deleted_key=200,
           initial_num_buckets=8)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      keys2 = constant_op.constant([12, 15], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
 
       exported_keys, exported_values = table.export()
@@ -1005,8 +1134,8 @@ class MutableDenseHashTableOpTest(test.TestCase):
       pairs = np.dstack((np_keys.flatten(), np_values.flatten()))[0]
       # sort by key
       pairs = pairs[pairs[:, 0].argsort()]
-      self.assertAllEqual([[11, 1], [12, 2], [13, 3], [100, 0], [100, 0],
-                           [100, 0], [100, 0], [100, 0]], pairs)
+      self.assertAllEqual([[11, 1], [13, 3], [14, 4], [100, 0], [100, 0],
+                           [100, 0], [100, 0], [200, 2]], pairs)
 
   def testSaveRestore(self):
     save_dir = os.path.join(self.get_temp_dir(), "save_restore")
@@ -1015,13 +1144,15 @@ class MutableDenseHashTableOpTest(test.TestCase):
     with self.session(graph=ops.Graph()) as sess:
       default_value = -1
       empty_key = 0
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+      deleted_key = -1
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=32)
@@ -1030,6 +1161,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       self.assertAllEqual(0, table.size().eval())
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+      self.assertAllEqual(32, len(table.export()[0].eval()))
+
+      keys2 = constant_op.constant([12, 15], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(32, len(table.export()[0].eval()))
 
@@ -1043,6 +1179,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=64)
@@ -1062,7 +1199,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       input_string = constant_op.constant([10, 11, 12, 13, 14], dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual([-1, 0, 1, 2, -1], output.eval())
+      self.assertAllEqual([-1, 0, -1, 2, 3], output.eval())
 
   @test_util.run_in_graph_and_eager_modes
   def testObjectSaveRestore(self):
@@ -1071,6 +1208,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     default_value = -1
     empty_key = 0
+    deleted_key = -1
     keys = constant_op.constant([11, 12, 13], dtypes.int64)
     values = constant_op.constant([0, 1, 2], dtypes.int64)
     save_table = lookup.MutableDenseHashTable(
@@ -1078,6 +1216,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
         dtypes.int64,
         default_value=default_value,
         empty_key=empty_key,
+        deleted_key=deleted_key,
         name="t1",
         checkpoint=True,
         initial_num_buckets=32)
@@ -1097,6 +1236,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
         dtypes.int64,
         default_value=default_value,
         empty_key=empty_key,
+        deleted_key=deleted_key,
         name="t1",
         checkpoint=True,
         initial_num_buckets=64)
@@ -1124,14 +1264,18 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-2, -3], dtypes.int64)
       default_value = constant_op.constant([-1, -2], dtypes.int64)
-      keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64)
-      values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64)
+      keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]],
+                                  dtypes.int64)
+      values = constant_op.constant([[0, 1], [2, 3], [2, 4], [4, 5]],
+                                    dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=32)
@@ -1140,6 +1284,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       self.assertAllEqual(0, table.size().eval())
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+      self.assertAllEqual(32, len(table.export()[0].eval()))
+
+      keys2 = constant_op.constant([[12, 13], [16, 17]], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(32, len(table.export()[0].eval()))
 
@@ -1149,12 +1298,14 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-2, -3], dtypes.int64)
       default_value = constant_op.constant([-1, -2], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=64)
@@ -1184,14 +1335,17 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-1, -1], dtypes.int64)
       default_value = constant_op.constant(-1, dtypes.int64)
-      keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64)
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+      keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]],
+                                  dtypes.int64)
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t2",
           checkpoint=True,
           initial_num_buckets=32)
@@ -1200,6 +1354,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       self.assertAllEqual(0, table.size().eval())
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+      self.assertAllEqual(32, len(table.export()[0].eval()))
+
+      keys2 = constant_op.constant([[12, 13], [15, 16]], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(32, len(table.export()[0].eval()))
 
@@ -1209,12 +1368,14 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-1, -1], dtypes.int64)
       default_value = constant_op.constant(-1, dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t2",
           checkpoint=True,
           initial_num_buckets=64)
@@ -1235,7 +1396,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
       input_string = constant_op.constant(
           [[11, 12], [11, 14], [11, 15], [13, 14], [13, 15]], dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual([0, 1, -1, 2, -1], output.eval())
+      self.assertAllEqual([0, 1, -1, 3, -1], output.eval())
 
   def testReprobe(self):
     with self.cached_session():
@@ -1248,6 +1409,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=-1,
           empty_key=0,
+          deleted_key=-1,
           initial_num_buckets=8)
       self.assertAllEqual(0, table.size().eval())
 
@@ -1267,7 +1429,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
       keys = constant_op.constant([11, 0, 13], dtypes.int64)
       values = constant_op.constant([0, 1, 2], dtypes.int64)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=12)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=12,
+          deleted_key=-1)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
@@ -1283,19 +1449,35 @@ class MutableDenseHashTableOpTest(test.TestCase):
   def testErrors(self):
     with self.cached_session():
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=0)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=0,
+          deleted_key=-1)
 
       # Inserting the empty key returns an error
-      keys = constant_op.constant([11, 0], dtypes.int64)
-      values = constant_op.constant([0, 1], dtypes.int64)
+      keys1 = constant_op.constant([11, 0], dtypes.int64)
+      values1 = constant_op.constant([0, 1], dtypes.int64)
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "empty_key"):
-        table.insert(keys, values).run()
+        table.insert(keys1, values1).run()
 
       # Looking up the empty key returns an error
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "empty_key"):
-        table.lookup(keys).eval()
+        table.lookup(keys1).eval()
+
+      # Inserting the deleted key returns an error
+      keys2 = constant_op.constant([11, -1], dtypes.int64)
+      values2 = constant_op.constant([0, 1], dtypes.int64)
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "deleted_key"):
+        table.insert(keys2, values2).run()
+
+      # Looking up the empty key returns an error
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "deleted_key"):
+        table.lookup(keys2).eval()
 
       # Arbitrary tensors of keys are not supported
       keys = constant_op.constant([[11, 0], [12, 1]], dtypes.int64)
@@ -1312,11 +1494,43 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=-1,
           empty_key=17,
+          deleted_key=-1,
           initial_num_buckets=12)
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "Number of buckets must be"):
         self.assertAllEqual(0, table2.size().eval())
 
+      with self.assertRaisesRegexp(
+          errors_impl.InvalidArgumentError,
+          "Empty and deleted keys must have same shape"):
+        table3 = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=42,
+            deleted_key=[1, 2])
+        self.assertAllEqual(0, table3.size().eval())
+
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "Empty and deleted keys cannot be equal"):
+        table4 = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=42,
+            deleted_key=42)
+        self.assertAllEqual(0, table4.size().eval())
+
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "Empty and deleted keys cannot be equal"):
+        table5 = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=[1, 2, 3],
+            deleted_key=[1, 2, 3])
+        self.assertAllEqual(0, table5.size().eval())
+
 
 class IndexTableFromFile(test.TestCase):
 
@@ -2558,7 +2772,11 @@ class MutableDenseHashTableBenchmark(MutableHashTableBenchmark):
 
   def _create_table(self):
     return lookup.MutableDenseHashTable(
-        dtypes.int64, dtypes.float32, default_value=0.0, empty_key=-1)
+        dtypes.int64,
+        dtypes.float32,
+        default_value=0.0,
+        empty_key=-1,
+        deleted_key=-2)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py
index 03da2b82e5..9c585fe6a7 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py
@@ -543,20 +543,25 @@ class TupleOfTensorsLookup(lookup.LookupInterface):
   overhead.
   """
 
-  def __init__(
-      self, key_dtype, default_values, empty_key, name, checkpoint=True):
+  def __init__(self,
+               key_dtype,
+               default_values,
+               empty_key,
+               deleted_key,
+               name,
+               checkpoint=True):
     default_values_flat = nest.flatten(default_values)
-    self._hash_tables = nest.pack_sequence_as(
-        default_values,
-        [TensorValuedMutableDenseHashTable(
+    self._hash_tables = nest.pack_sequence_as(default_values, [
+        TensorValuedMutableDenseHashTable(
             key_dtype=key_dtype,
             value_dtype=default_value.dtype.base_dtype,
             default_value=default_value,
             empty_key=empty_key,
+            deleted_key=deleted_key,
             name=name + "_{}".format(table_number),
             checkpoint=checkpoint)
-         for table_number, default_value
-         in enumerate(default_values_flat)])
+        for table_number, default_value in enumerate(default_values_flat)
+    ])
     self._name = name
 
   def lookup(self, keys):
diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py
index c0de42b15b..91265b9b2e 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py
@@ -223,10 +223,12 @@ class TestLookupTable(test.TestCase):
     hash_table = math_utils.TupleOfTensorsLookup(
         key_dtype=dtypes.int64,
         default_values=[[
-            array_ops.ones([3, 2], dtype=dtypes.float32), array_ops.zeros(
-                [5], dtype=dtypes.float64)
-        ], array_ops.ones([7, 7], dtype=dtypes.int64)],
+            array_ops.ones([3, 2], dtype=dtypes.float32),
+            array_ops.zeros([5], dtype=dtypes.float64)
+        ],
+                        array_ops.ones([7, 7], dtype=dtypes.int64)],
         empty_key=-1,
+        deleted_key=-2,
         name="test_lookup")
     def stack_tensor(base_tensor):
       return array_ops.stack([base_tensor + 1, base_tensor + 2])
diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_management.py b/tensorflow/contrib/timeseries/python/timeseries/state_management.py
index 13eecd4d82..138406c616 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/state_management.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_management.py
@@ -149,6 +149,7 @@ class ChainingStateManager(_OverridableStateManager):
         key_dtype=dtypes.int64,
         default_values=self._start_state,
         empty_key=-1,
+        deleted_key=-2,
         name="cached_states",
         checkpoint=self._checkpoint_state)
 
diff --git a/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt
new file mode 100644
index 0000000000..333fe6f4b2
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt
@@ -0,0 +1,24 @@
+op {
+  graph_op_name: "LookupTableRemoveV2"
+  visibility: HIDDEN
+  endpoint {
+    name: "LookupTableRemove"
+  }
+  in_arg {
+    name: "table_handle"
+    description: <<END
+Handle to the table.
+END
+  }
+  in_arg {
+    name: "keys"
+    description: <<END
+Any shape.  Keys of the elements to remove.
+END
+  }
+  summary: "Removes keys and its associated values from a table."
+  description: <<END
+The tensor `keys` must of the same type as the keys of the table. Keys not
+already in the table are silently ignored.
+END
+}
diff --git a/tensorflow/core/framework/lookup_interface.cc b/tensorflow/core/framework/lookup_interface.cc
index bf3204ea6e..117adbf65c 100644
--- a/tensorflow/core/framework/lookup_interface.cc
+++ b/tensorflow/core/framework/lookup_interface.cc
@@ -71,6 +71,14 @@ Status LookupInterface::CheckKeyAndValueTensorsForImport(const Tensor& keys,
   return CheckKeyAndValueTensorsHelper(keys, values);
 }
 
+Status LookupInterface::CheckKeyTensorForRemove(const Tensor& keys) {
+  if (keys.dtype() != key_dtype()) {
+    return errors::InvalidArgument("Key must be type ", key_dtype(),
+                                   " but got ", keys.dtype());
+  }
+  return CheckKeyShape(keys.shape());
+}
+
 Status LookupInterface::CheckFindArguments(const Tensor& key,
                                            const Tensor& default_value) {
   TF_RETURN_IF_ERROR(CheckKeyAndValueTypes(key, default_value));
diff --git a/tensorflow/core/framework/lookup_interface.h b/tensorflow/core/framework/lookup_interface.h
index 0622dd06cb..d33945fd1b 100644
--- a/tensorflow/core/framework/lookup_interface.h
+++ b/tensorflow/core/framework/lookup_interface.h
@@ -64,6 +64,17 @@ class LookupInterface : public ResourceBase {
   virtual Status Insert(OpKernelContext* ctx, const Tensor& keys,
                         const Tensor& values) = 0;
 
+  // Removes elements from the table.
+  // This method is only implemented in mutable tables that can be updated over
+  // the execution of the graph. It returns Status::NotImplemented for read-only
+  // tables that are initialized once before they can be looked up.
+
+  // Returns the following statuses:
+  // - OK: when the remove finishes successfully.
+  // - InvalidArgument: if any of the preconditions on the lookup key fails.
+  // - Unimplemented: if the table does not support removals.
+  virtual Status Remove(OpKernelContext* ctx, const Tensor& keys) = 0;
+
   // Returns the number of elements in the table.
   virtual size_t size() const = 0;
 
@@ -107,6 +118,12 @@ class LookupInterface : public ResourceBase {
   virtual Status CheckKeyAndValueTensorsForImport(const Tensor& keys,
                                                   const Tensor& values);
 
+  // Check format of the key tensor for the Remove function.
+  // Returns OK if all the following requirements are satisfied, otherwise it
+  // returns InvalidArgument:
+  // - DataType of the tensor keys equals to the table key_dtype
+  virtual Status CheckKeyTensorForRemove(const Tensor& keys);
+
   // Check the arguments of a find operation. Returns OK if all the following
   // requirements are satisfied, otherwise it returns InvalidArgument:
   // - DataType of the tensor keys equals to the table key_dtype
diff --git a/tensorflow/core/kernels/initializable_lookup_table.h b/tensorflow/core/kernels/initializable_lookup_table.h
index 424fe5df3c..a14d4967a5 100644
--- a/tensorflow/core/kernels/initializable_lookup_table.h
+++ b/tensorflow/core/kernels/initializable_lookup_table.h
@@ -51,6 +51,12 @@ class InitializableLookupTable : public LookupInterface {
         "Insert not supported by InitializableLookupTable implementations");
   }
 
+  // Returns errors::Unimplemented.
+  Status Remove(OpKernelContext* ctx, const Tensor& keys) final {
+    return errors::Unimplemented(
+        "Remove not supported by InitializableLookupTable implementations");
+  }
+
   Status ExportValues(OpKernelContext* context) override {
     return errors::Unimplemented(
         "ExportValues not supported by InitializableLookupTable "
diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc
index a495758861..0bc1ea77d6 100644
--- a/tensorflow/core/kernels/lookup_table_op.cc
+++ b/tensorflow/core/kernels/lookup_table_op.cc
@@ -89,6 +89,16 @@ class MutableHashTableOfScalars final : public LookupInterface {
     return DoInsert(false, keys, values);
   }
 
+  Status Remove(OpKernelContext* ctx, const Tensor& keys) override {
+    const auto key_values = keys.flat<K>();
+
+    mutex_lock l(mu_);
+    for (int64 i = 0; i < key_values.size(); ++i) {
+      table_.erase(SubtleMustCopyIfIntegral(key_values(i)));
+    }
+    return Status::OK();
+  }
+
   Status ImportValues(OpKernelContext* ctx, const Tensor& keys,
                       const Tensor& values) override {
     return DoInsert(true, keys, values);
@@ -212,6 +222,16 @@ class MutableHashTableOfTensors final : public LookupInterface {
     return DoInsert(false, keys, values);
   }
 
+  Status Remove(OpKernelContext* ctx, const Tensor& keys) override {
+    const auto key_values = keys.flat<K>();
+
+    mutex_lock l(mu_);
+    for (int64 i = 0; i < key_values.size(); ++i) {
+      table_.erase(SubtleMustCopyIfIntegral(key_values(i)));
+    }
+    return Status::OK();
+  }
+
   Status ImportValues(OpKernelContext* ctx, const Tensor& keys,
                       const Tensor& values) override {
     return DoInsert(true, keys, values);
@@ -326,6 +346,29 @@ class MutableDenseHashTable final : public LookupInterface {
         empty_key_input->template shaped<K, 2>({1, key_shape_.num_elements()}),
         0);
 
+    const Tensor* deleted_key_input;
+    OP_REQUIRES_OK(ctx, ctx->input("deleted_key", &deleted_key_input));
+    OP_REQUIRES(ctx, key_shape_.IsSameSize(deleted_key_input->shape()),
+                errors::InvalidArgument(
+                    "Empty and deleted keys must have same shape, got shapes: ",
+                    key_shape_.DebugString(), " and ",
+                    deleted_key_input->shape().DebugString()));
+    deleted_key_ = PersistentTensor(*deleted_key_input);
+    deleted_key_hash_ = HashKey(deleted_key_input->template shaped<K, 2>(
+                                    {1, key_shape_.num_elements()}),
+                                0);
+
+    if (empty_key_hash_ == deleted_key_hash_) {
+      const int64 key_size = key_shape_.num_elements();
+      const auto empty_key_matrix =
+          empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+      const auto deleted_key_matrix =
+          deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+      OP_REQUIRES(
+          ctx, !IsEqualKey(empty_key_matrix, 0, deleted_key_matrix, 0),
+          errors::InvalidArgument("Empty and deleted keys cannot be equal"));
+    }
+
     int64 initial_num_buckets;
     OP_REQUIRES_OK(ctx, GetNodeAttr(kernel->def(), "initial_num_buckets",
                                     &initial_num_buckets));
@@ -360,6 +403,8 @@ class MutableDenseHashTable final : public LookupInterface {
         value_buckets_.AccessTensor(ctx)->template matrix<V>();
     const auto empty_key_matrix =
         empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_matrix =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
     const int64 bit_mask = num_buckets_ - 1;
     // TODO(andreasst): parallelize using work_sharder
     for (int64 i = 0; i < num_elements; ++i) {
@@ -369,6 +414,11 @@ class MutableDenseHashTable final : public LookupInterface {
         return errors::InvalidArgument(
             "Using the empty_key as a table key is not allowed");
       }
+      if (deleted_key_hash_ == key_hash &&
+          IsEqualKey(deleted_key_matrix, 0, key_matrix, i)) {
+        return errors::InvalidArgument(
+            "Using the deleted_key as a table key is not allowed");
+      }
       int64 bucket_index = key_hash & bit_mask;
       int64 num_probes = 0;
       while (true) {
@@ -425,23 +475,40 @@ class MutableDenseHashTable final : public LookupInterface {
     return DoInsert(ctx, key, value, false);
   }
 
+  Status Remove(OpKernelContext* ctx, const Tensor& key) override
+      LOCKS_EXCLUDED(mu_) {
+    if (key.NumElements() != key.dim_size(0) * key_shape_.num_elements()) {
+      TensorShape expected_shape({key.dim_size(0)});
+      expected_shape.AppendShape(key_shape_);
+      return errors::InvalidArgument("Expected key shape ",
+                                     expected_shape.DebugString(), " got ",
+                                     key.shape().DebugString());
+    }
+    mutex_lock l(mu_);
+    return DoRemove(ctx, key);
+  }
+
   Status ImportValues(OpKernelContext* ctx, const Tensor& keys,
                       const Tensor& values) override LOCKS_EXCLUDED(mu_) {
     mutex_lock l(mu_);
     num_buckets_ = keys.dim_size(0);
     key_buckets_ = PersistentTensor(keys);
     value_buckets_ = PersistentTensor(values);
-    // Count the number of keys that are not the empty_key. This requires
-    // iterating through the whole table but that is OK as we only execute it
-    // during checkpoint restore.
+    // Count the number of keys that are not the empty_key or deleted_key.
+    // This requires iterating through the whole table but that is OK as we
+    // only execute it during checkpoint restore.
     num_entries_ = 0;
     const auto empty_key_tensor =
         empty_key_.AccessTensor(ctx)->template shaped<K, 2>(
             {1, key_shape_.num_elements()});
+    const auto deleted_key_tensor =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>(
+            {1, key_shape_.num_elements()});
     const auto key_buckets_tensor =
         key_buckets_.AccessTensor(ctx)->template matrix<K>();
     for (int64 i = 0; i < num_buckets_; ++i) {
-      if (!IsEqualKey(key_buckets_tensor, i, empty_key_tensor, 0)) {
+      if (!IsEqualKey(key_buckets_tensor, i, empty_key_tensor, 0) &&
+          !IsEqualKey(key_buckets_tensor, i, deleted_key_tensor, 0)) {
         ++num_entries_;
       }
     }
@@ -498,7 +565,8 @@ class MutableDenseHashTable final : public LookupInterface {
 
  private:
   Status DoInsert(OpKernelContext* ctx, const Tensor& key, const Tensor& value,
-                  bool ignore_empty_key) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                  bool ignore_empty_and_deleted_key)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     const int64 num_elements = (key.dims() == 0) ? 1 : key.dim_size(0);
     const int64 value_size = value_shape_.num_elements();
     const int64 key_size = key_shape_.num_elements();
@@ -511,17 +579,27 @@ class MutableDenseHashTable final : public LookupInterface {
         value_buckets_.AccessTensor(ctx)->template matrix<V>();
     const auto empty_key_tensor =
         empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_tensor =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
     const int64 bit_mask = num_buckets_ - 1;
     for (int64 i = 0; i < num_elements; ++i) {
       const uint64 key_hash = HashKey(key_matrix, i);
       if (empty_key_hash_ == key_hash &&
           IsEqualKey(empty_key_tensor, 0, key_matrix, i)) {
-        if (ignore_empty_key) {
+        if (ignore_empty_and_deleted_key) {
           continue;
         }
         return errors::InvalidArgument(
             "Using the empty_key as a table key is not allowed");
       }
+      if (deleted_key_hash_ == key_hash &&
+          IsEqualKey(deleted_key_tensor, 0, key_matrix, i)) {
+        if (ignore_empty_and_deleted_key) {
+          continue;
+        }
+        return errors::InvalidArgument(
+            "Using the deleted_key as a table key is not allowed");
+      }
       int64 bucket_index = key_hash & bit_mask;
       int64 num_probes = 0;
       while (true) {
@@ -532,7 +610,9 @@ class MutableDenseHashTable final : public LookupInterface {
           }
           break;
         }
-        if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0)) {
+        if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0) ||
+            IsEqualKey(key_buckets_matrix, bucket_index, deleted_key_tensor,
+                       0)) {
           ++num_entries_;
           for (int64 j = 0; j < key_size; ++j) {
             key_buckets_matrix(bucket_index, j) =
@@ -556,6 +636,59 @@ class MutableDenseHashTable final : public LookupInterface {
     return Status::OK();
   }
 
+  Status DoRemove(OpKernelContext* ctx, const Tensor& key)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    const int64 num_elements = key.dim_size(0);
+    const int64 key_size = key_shape_.num_elements();
+    const auto key_matrix = key.shaped<K, 2>({num_elements, key_size});
+
+    auto key_buckets_matrix =
+        key_buckets_.AccessTensor(ctx)->template matrix<K>();
+    const auto empty_key_tensor =
+        empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_tensor =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_flat =
+        deleted_key_.AccessTensor(ctx)->template flat<K>();
+    const int64 bit_mask = num_buckets_ - 1;
+    for (int64 i = 0; i < num_elements; ++i) {
+      const uint64 key_hash = HashKey(key_matrix, i);
+      if (empty_key_hash_ == key_hash &&
+          IsEqualKey(empty_key_tensor, 0, key_matrix, i)) {
+        return errors::InvalidArgument(
+            "Using the empty_key as a table key is not allowed");
+      }
+      if (deleted_key_hash_ == key_hash &&
+          IsEqualKey(deleted_key_tensor, 0, key_matrix, i)) {
+        return errors::InvalidArgument(
+            "Using the deleted_key as a table key is not allowed");
+      }
+      int64 bucket_index = key_hash & bit_mask;
+      int64 num_probes = 0;
+      while (true) {
+        if (IsEqualKey(key_buckets_matrix, bucket_index, key_matrix, i)) {
+          --num_entries_;
+          for (int64 j = 0; j < key_size; ++j) {
+            key_buckets_matrix(bucket_index, j) =
+                SubtleMustCopyIfIntegral(deleted_key_flat(j));
+          }
+          break;
+        }
+        if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0)) {
+          break;
+        }
+        ++num_probes;
+        bucket_index =
+            (bucket_index + num_probes) & bit_mask;  // quadratic probing
+        if (num_probes >= num_buckets_) {
+          return errors::Internal(
+              "Internal error in MutableDenseHashTable remove");
+        }
+      }
+    }
+    return Status::OK();
+  }
+
   Status AllocateBuckets(OpKernelContext* ctx, int64 new_num_buckets)
       EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     if (new_num_buckets < 4 ||
@@ -639,7 +772,9 @@ class MutableDenseHashTable final : public LookupInterface {
   PersistentTensor value_buckets_ GUARDED_BY(mu_);
   PersistentTensor empty_key_;
   uint64 empty_key_hash_;
-};
+  PersistentTensor deleted_key_;
+  uint64 deleted_key_hash_;
+};  // namespace lookup
 
 }  // namespace lookup
 
@@ -717,6 +852,39 @@ REGISTER_KERNEL_BUILDER(Name("LookupTableInsert").Device(DEVICE_CPU),
 REGISTER_KERNEL_BUILDER(Name("LookupTableInsertV2").Device(DEVICE_CPU),
                         LookupTableInsertOp);
 
+// Table remove op.
+class LookupTableRemoveOp : public OpKernel {
+ public:
+  explicit LookupTableRemoveOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    lookup::LookupInterface* table;
+    OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
+    core::ScopedUnref unref_me(table);
+
+    DataType expected_input_0 =
+        (ctx->input_dtype(0) == DT_RESOURCE) ? DT_RESOURCE : DT_STRING_REF;
+    DataTypeVector expected_inputs = {expected_input_0, table->key_dtype()};
+    OP_REQUIRES_OK(ctx, ctx->MatchSignature(expected_inputs, {}));
+
+    const Tensor& key = ctx->input(1);
+    OP_REQUIRES_OK(ctx, table->CheckKeyTensorForRemove(key));
+
+    int64 memory_used_before = 0;
+    if (ctx->track_allocations()) {
+      memory_used_before = table->MemoryUsed();
+    }
+    OP_REQUIRES_OK(ctx, table->Remove(ctx, key));
+    if (ctx->track_allocations()) {
+      ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+                                               memory_used_before);
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("LookupTableRemoveV2").Device(DEVICE_CPU),
+                        LookupTableRemoveOp);
+
 // Op that returns the size of the given table.
 class LookupTableSizeOp : public OpKernel {
  public:
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index cfb1055d3c..415e15b720 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -30320,6 +30320,22 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "LookupTableRemoveV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  is_stateful: true
+}
 op {
   name: "LookupTableSize"
   input_arg {
@@ -36706,6 +36722,10 @@ op {
     name: "empty_key"
     type_attr: "key_dtype"
   }
+  input_arg {
+    name: "deleted_key"
+    type_attr: "key_dtype"
+  }
   output_arg {
     name: "table_handle"
     type: DT_RESOURCE
diff --git a/tensorflow/core/ops/lookup_ops.cc b/tensorflow/core/ops/lookup_ops.cc
index 72a77be70d..a0987cd982 100644
--- a/tensorflow/core/ops/lookup_ops.cc
+++ b/tensorflow/core/ops/lookup_ops.cc
@@ -214,6 +214,19 @@ REGISTER_OP("LookupTableInsertV2")
       return Status::OK();
     });
 
+REGISTER_OP("LookupTableRemoveV2")
+    .Input("table_handle: resource")
+    .Input("keys: Tin")
+    .Attr("Tin: type")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle handle;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &handle));
+
+      // TODO(turboale): Validate keys shape.
+      return Status::OK();
+    });
+
 REGISTER_OP("LookupTableSize")
     .Input("table_handle: Ref(string)")
     .Output("size: int64")
@@ -407,6 +420,7 @@ REGISTER_OP("MutableDenseHashTable")
 
 REGISTER_OP("MutableDenseHashTableV2")
     .Input("empty_key: key_dtype")
+    .Input("deleted_key: key_dtype")
     .Output("table_handle: resource")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
-- 
GitLab


From d78c747e9177fc93d43a580acef2b62eb1420859 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Tue, 9 Oct 2018 16:39:33 -0700
Subject: [PATCH 1333/1357] Make lite_test.py run in open source.

PiperOrigin-RevId: 216445964
---
 tensorflow/contrib/lite/python/BUILD        |  2 --
 tensorflow/contrib/lite/python/lite_test.py | 14 +++++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 916788f215..be6c44d306 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -73,7 +73,6 @@ py_test(
     data = ["@tflite_mobilenet_ssd_quant_protobuf//:tflite_graph.pb"],
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
         "no_windows",
     ],
     deps = [
@@ -172,7 +171,6 @@ py_test(
     srcs = ["convert_saved_model_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
         "no_windows",
     ],
     visibility = ["//visibility:public"],
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index d243a494f6..ef9bbded2a 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -591,11 +591,19 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
         'Unable to parse input file \'{}\'.'.format(graph_def_file),
         str(error.exception))
 
-  # TODO(nupurgarg): Test model loading in open source.
   def _initObjectDetectionArgs(self):
     # Initializes the arguments required for the object detection model.
-    self._graph_def_file = resource_loader.get_path_to_datafile(
-        'testdata/tflite_graph.pb')
+    # Looks for the model file which is saved in a different location interally
+    # and externally.
+    filename = resource_loader.get_path_to_datafile('testdata/tflite_graph.pb')
+    if not os.path.exists(filename):
+      filename = os.path.join(
+          resource_loader.get_root_dir_with_all_resources(),
+          '../tflite_mobilenet_ssd_quant_protobuf/tflite_graph.pb')
+      if not os.path.exists(filename):
+        raise IOError("File '{0}' does not exist.".format(filename))
+
+    self._graph_def_file = filename
     self._input_arrays = ['normalized_input_image_tensor']
     self._output_arrays = [
         'TFLite_Detection_PostProcess', 'TFLite_Detection_PostProcess:1',
-- 
GitLab


From ef9d2e7be9ae9fbcd4720d46e1f8a8cac902a1cd Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 16:44:25 -0700
Subject: [PATCH 1334/1357] Remove the deprecated created and IS_LOCAL
 abstractions from activity analysis.

PiperOrigin-RevId: 216446750
---
 tensorflow/python/autograph/pyct/anno.py      |   2 -
 .../pyct/static_analysis/activity.py          |  82 ++----
 .../pyct/static_analysis/activity_test.py     | 268 +++++++-----------
 .../pyct/static_analysis/live_values.py       |   5 +-
 4 files changed, 121 insertions(+), 236 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py
index 5392e6ea03..e1f4af46cd 100644
--- a/tensorflow/python/autograph/pyct/anno.py
+++ b/tensorflow/python/autograph/pyct/anno.py
@@ -63,10 +63,8 @@ class Static(NoValue):
   The enum values are used strictly for documentation purposes.
   """
 
-  # Deprecated - use reaching definitions instead.
   # Symbols
   # These flags are boolean.
-  IS_LOCAL = 'Symbol is local to the function scope being analyzed.'
   IS_PARAM = 'Symbol is a parameter to the function being analyzed.'
 
   # Scopes
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index 086eda7574..cc159031ff 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -44,7 +44,6 @@ class Scope(object):
 
   Attributes:
     modified: identifiers modified in this scope
-    created: identifiers created in this scope
     used: identifiers referenced in this scope
   """
 
@@ -54,7 +53,8 @@ class Scope(object):
     Args:
       parent: A Scope or None.
       isolated: Whether the scope is isolated, that is, whether variables
-          created in this scope should be visible to the parent scope.
+          modified in this scope should be considered modified in the parent
+          scope.
       add_unknown_symbols: Whether to handle attributed and subscripts
           without having first seen the base name.
           E.g., analyzing the statement 'x.y = z' without first having seen 'x'.
@@ -63,13 +63,11 @@ class Scope(object):
     self.parent = parent
     self.add_unknown_symbols = add_unknown_symbols
     self.modified = set()
-    # TODO(mdan): Completely remove this.
-    self.created = set()
     self.used = set()
     self.params = {}
     self.returned = set()
 
-  # TODO(mdan): Rename to `locals`
+  # TODO(mdan): Rename to `reserved`
   @property
   def referenced(self):
     if not self.isolated and self.parent is not None:
@@ -77,8 +75,7 @@ class Scope(object):
     return self.used
 
   def __repr__(self):
-    return 'Scope{r=%s, c=%s, w=%s}' % (tuple(self.used), tuple(self.created),
-                                        tuple(self.modified))
+    return 'Scope{r=%s, w=%s}' % (tuple(self.used), tuple(self.modified))
 
   def copy_from(self, other):
     """Recursively copies the contents of this scope from another scope."""
@@ -88,7 +85,6 @@ class Scope(object):
       self.parent.copy_from(other.parent)
     self.isolated = other.isolated
     self.modified = copy.copy(other.modified)
-    self.created = copy.copy(other.created)
     self.used = copy.copy(other.used)
     self.params = copy.copy(other.params)
     self.returned = copy.copy(other.returned)
@@ -109,56 +105,28 @@ class Scope(object):
     if other.parent is not None:
       self.parent.merge_from(other.parent)
     self.modified |= other.modified
-    self.created |= other.created
     self.used |= other.used
     self.params.update(other.params)
     self.returned |= other.returned
 
-  def has(self, name):
-    if name in self.modified:
-      return True
-    elif self.parent is not None:
-      return self.parent.has(name)
-    return False
-
   def mark_read(self, name):
     self.used.add(name)
-    if self.parent is not None and name not in self.created:
+    if self.parent is not None and name not in self.params:
       self.parent.mark_read(name)
 
+  def mark_modified(self, name):
+    """Marks the given symbol as modified in the current scope."""
+    self.modified.add(name)
+    if not self.isolated:
+      if self.parent is not None:
+        self.parent.mark_modified(name)
+
   def mark_param(self, name, owner):
     # Assumption: all AST nodes have the same life span. This lets us use
     # a weak reference to mark the connection between a symbol node and the
     # function node whose argument that symbol is.
     self.params[name] = weakref.ref(owner)
 
-  def mark_creation(self, name, writes_create_symbol=False):
-    """Mark a qualified name as created."""
-    if name.is_composite():
-      parent = name.parent
-      if not writes_create_symbol:
-        return
-      else:
-        if not self.has(parent):
-          if self.add_unknown_symbols:
-            self.mark_read(parent)
-          else:
-            raise ValueError('Unknown symbol "%s".' % parent)
-    self.created.add(name)
-
-  def mark_write(self, name):
-    """Marks the given symbol as modified in the current scope."""
-    self.modified.add(name)
-    if self.isolated:
-      self.mark_creation(name)
-    else:
-      if self.parent is None:
-        self.mark_creation(name)
-      else:
-        if not self.parent.has(name):
-          self.mark_creation(name)
-        self.parent.mark_write(name)
-
   def mark_returned(self, name):
     self.returned.add(name)
     if not self.isolated and self.parent is not None:
@@ -197,10 +165,7 @@ class ActivityAnalyzer(transformer.Base):
         return True
     return False
 
-  def _track_symbol(self,
-                    node,
-                    composite_writes_alter_parent=False,
-                    writes_create_symbol=False):
+  def _track_symbol(self, node, composite_writes_alter_parent=False):
     # A QN may be missing when we have an attribute (or subscript) on a function
     # call. Example: a().b
     if not anno.hasanno(node, anno.Basic.QN):
@@ -208,11 +173,9 @@ class ActivityAnalyzer(transformer.Base):
     qn = anno.getanno(node, anno.Basic.QN)
 
     if isinstance(node.ctx, gast.Store):
-      self.scope.mark_write(qn)
+      self.scope.mark_modified(qn)
       if qn.is_composite and composite_writes_alter_parent:
-        self.scope.mark_write(qn.parent)
-      if writes_create_symbol:
-        self.scope.mark_creation(qn, writes_create_symbol=True)
+        self.scope.mark_modified(qn.parent)
       if self._in_aug_assign:
         self.scope.mark_read(qn)
     elif isinstance(node.ctx, gast.Load):
@@ -220,13 +183,11 @@ class ActivityAnalyzer(transformer.Base):
     elif isinstance(node.ctx, gast.Param):
       # Param contexts appear in function defs, so they have the meaning of
       # defining a variable.
-      self.scope.mark_write(qn)
+      self.scope.mark_modified(qn)
       self.scope.mark_param(qn, self.enclosing_entities[-1])
     else:
       raise ValueError('Unknown context %s for node %s.' % (type(node.ctx), qn))
 
-    anno.setanno(node, NodeAnno.IS_LOCAL, self.scope.has(qn))
-
     if self._in_return_statement:
       self.scope.mark_returned(qn)
 
@@ -243,6 +204,12 @@ class ActivityAnalyzer(transformer.Base):
     self._exit_scope()
     return node
 
+  def visit_nonlocal(self, node):
+    raise NotImplementedError()
+
+  def visit_global(self, node):
+    raise NotImplementedError()
+
   def visit_Expr(self, node):
     return self._process_statement(node)
 
@@ -271,8 +238,7 @@ class ActivityAnalyzer(transformer.Base):
   def visit_Attribute(self, node):
     node = self.generic_visit(node)
     if self._in_constructor and self._node_sets_self_attribute(node):
-      self._track_symbol(
-          node, composite_writes_alter_parent=True, writes_create_symbol=True)
+      self._track_symbol(node, composite_writes_alter_parent=True)
     else:
       self._track_symbol(node)
     return node
@@ -336,7 +302,7 @@ class ActivityAnalyzer(transformer.Base):
     # of its name, along with the usage of any decorator accompany it.
     self._enter_scope(False)
     node.decorator_list = self.visit_block(node.decorator_list)
-    self.scope.mark_write(qual_names.QN(node.name))
+    self.scope.mark_modified(qual_names.QN(node.name))
     anno.setanno(node, anno.Static.SCOPE, self.scope)
     self._exit_scope()
 
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
index d4a6ce8ac3..9a4f1bf09b 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
@@ -32,62 +32,63 @@ from tensorflow.python.platform import test
 
 class ScopeTest(test.TestCase):
 
+  def assertMissing(self, qn, scope):
+    self.assertNotIn(qn, scope.used)
+    self.assertNotIn(qn, scope.modified)
+
+  def assertReadOnly(self, qn, scope):
+    self.assertIn(qn, scope.used)
+    self.assertNotIn(qn, scope.modified)
+
+  def assertWriteOnly(self, qn, scope):
+    self.assertNotIn(qn, scope.used)
+    self.assertIn(qn, scope.modified)
+
+  def assertReadWrite(self, qn, scope):
+    self.assertIn(qn, scope.used)
+    self.assertIn(qn, scope.modified)
+
   def test_basic(self):
     scope = activity.Scope(None)
-    self.assertFalse(scope.has(QN('foo')))
+    self.assertMissing(QN('foo'), scope)
 
     scope.mark_read(QN('foo'))
-    self.assertFalse(scope.has(QN('foo')))
-
-    scope.mark_write(QN('foo'))
-    self.assertTrue(scope.has(QN('foo')))
+    self.assertReadOnly(QN('foo'), scope)
 
-    scope.mark_read(QN('bar'))
-    self.assertFalse(scope.has(QN('bar')))
+    scope.mark_modified(QN('foo'))
+    self.assertReadWrite(QN('foo'), scope)
 
   def test_copy_from(self):
     scope = activity.Scope(None)
-    scope.mark_write(QN('foo'))
-
+    scope.mark_modified(QN('foo'))
     other = activity.Scope(None)
     other.copy_from(scope)
 
-    self.assertTrue(QN('foo') in other.modified)
+    self.assertWriteOnly(QN('foo'), other)
 
-    scope.mark_write(QN('bar'))
+    scope.mark_modified(QN('bar'))
     scope.copy_from(other)
 
-    self.assertFalse(QN('bar') in scope.modified)
+    self.assertMissing(QN('bar'), scope)
 
-    scope.mark_write(QN('bar'))
+    scope.mark_modified(QN('bar'))
     scope.merge_from(other)
 
-    self.assertTrue(QN('bar') in scope.modified)
-    self.assertFalse(QN('bar') in other.modified)
+    self.assertWriteOnly(QN('bar'), scope)
+    self.assertMissing(QN('bar'), other)
 
   def test_copy_of(self):
     scope = activity.Scope(None)
     scope.mark_read(QN('foo'))
+    other = activity.Scope.copy_of(scope)
 
-    self.assertTrue(QN('foo') in activity.Scope.copy_of(scope).used)
+    self.assertReadOnly(QN('foo'), other)
 
     child_scope = activity.Scope(scope)
     child_scope.mark_read(QN('bar'))
+    other = activity.Scope.copy_of(child_scope)
 
-    self.assertTrue(QN('bar') in activity.Scope.copy_of(child_scope).used)
-
-  def test_nesting(self):
-    scope = activity.Scope(None)
-    scope.mark_write(QN('foo'))
-    scope.mark_read(QN('bar'))
-
-    child = activity.Scope(scope)
-    self.assertTrue(child.has(QN('foo')))
-    self.assertTrue(scope.has(QN('foo')))
-
-    child.mark_write(QN('bar'))
-    self.assertTrue(child.has(QN('bar')))
-    self.assertFalse(scope.has(QN('bar')))
+    self.assertReadOnly(QN('bar'), other)
 
   def test_referenced(self):
     scope = activity.Scope(None)
@@ -123,25 +124,6 @@ class ActivityAnalyzerTest(test.TestCase):
     node = activity.resolve(node, entity_info)
     return node, entity_info
 
-  def test_local_markers(self):
-
-    def test_fn(a):  # pylint:disable=unused-argument
-      b = c  # pylint:disable=undefined-variable
-      while b > 0:
-        b -= 1
-      return b
-
-    node, _ = self._parse_and_analyze(test_fn)
-    self.assertFalse(
-        anno.getanno(node.body[0].body[0].value,
-                     NodeAnno.IS_LOCAL))  # c in b = c
-    self.assertTrue(
-        anno.getanno(node.body[0].body[1].test.left,
-                     NodeAnno.IS_LOCAL))  # b in b > 0
-    self.assertTrue(
-        anno.getanno(node.body[0].body[2].value,
-                     NodeAnno.IS_LOCAL))  # b in return b
-
   def assertSymbolSetsAre(self, expected, actual, name):
     expected = set(expected)
     actual = set(str(s) for s in actual)
@@ -153,12 +135,10 @@ class ActivityAnalyzerTest(test.TestCase):
         '  Extra:    %s\n' % (name.upper(), expected, actual,
                               expected - actual, actual - expected))
 
-  def assertScopeIsRmc(self, scope, used, modified, created):
+  def assertScopeIs(self, scope, used, modified):
     """Assert the scope contains specific used, modified & created variables."""
     self.assertSymbolSetsAre(used, scope.used, 'read')
     self.assertSymbolSetsAre(modified, scope.modified, 'modified')
-    # Created is deprecated, we're no longer verifying it.
-    # self.assertSymbolSetsAre(created, scope.created, 'created')
 
   def test_print_statement(self):
 
@@ -181,7 +161,7 @@ class ActivityAnalyzerTest(test.TestCase):
       print_args_scope = anno.getanno(print_node, NodeAnno.ARGS_SCOPE)
     # We basically need to detect which variables are captured by the call
     # arguments.
-    self.assertScopeIsRmc(print_args_scope, ('a', 'b'), (), ())
+    self.assertScopeIs(print_args_scope, ('a', 'b'), ())
 
   def test_call_args(self):
 
@@ -195,8 +175,8 @@ class ActivityAnalyzerTest(test.TestCase):
     call_node = node.body[0].body[2].value
     # We basically need to detect which variables are captured by the call
     # arguments.
-    self.assertScopeIsRmc(
-        anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), (), ())
+    self.assertScopeIs(
+        anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), ())
 
   def test_call_args_attributes(self):
 
@@ -210,12 +190,8 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     call_node = node.body[0].body[1].value
-    self.assertScopeIsRmc(
-        anno.getanno(call_node, NodeAnno.ARGS_SCOPE),
-        ('a', 'a.b', 'a.c'),
-        (),
-        (),
-    )
+    self.assertScopeIs(
+        anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'a.b', 'a.c'), ())
 
   def test_call_args_subscripts(self):
 
@@ -230,12 +206,9 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     call_node = node.body[0].body[2].value
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
         anno.getanno(call_node, NodeAnno.ARGS_SCOPE),
-        ('a', 'a[0]', 'a[b]', 'b'),
-        (),
-        (),
-    )
+        ('a', 'a[0]', 'a[b]', 'b'), ())
 
   def test_while(self):
 
@@ -248,14 +221,13 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     while_node = node.body[0].body[1]
-    self.assertScopeIsRmc(
-        anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'),
-        ('c',))
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'))
+    self.assertScopeIs(
         anno.getanno(while_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'),
-        ('b', 'c'), ('a', 'b', 'c'))
-    self.assertScopeIsRmc(
-        anno.getanno(while_node, NodeAnno.COND_SCOPE), ('b',), (), ())
+        ('b', 'c'))
+    self.assertScopeIs(
+        anno.getanno(while_node, NodeAnno.COND_SCOPE), ('b',), ())
 
   def test_for(self):
 
@@ -268,11 +240,11 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     for_node = node.body[0].body[1]
-    self.assertScopeIsRmc(
-        anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), ('c',))
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'))
+    self.assertScopeIs(
         anno.getanno(for_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'),
-        ('b', 'c', '_'), ('a', 'b', 'c', '_'))
+        ('b', 'c', '_'))
 
   def test_if(self):
 
@@ -289,18 +261,16 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     if_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z'),
-        ('y', 'z'))
-    # TODO(mdan): Double check: is it ok to not mark a local symbol as not read?
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('x', 'z', 'u'),
-        ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u'))
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('x', 'y', 'z', 'u'),
+        ('x', 'y', 'z', 'u'))
+    self.assertScopeIs(
         anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('x', 'y'),
-        ('x', 'y', 'u'), ('y', 'u'))
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, ('x', 'z', 'u'),
+        ('x', 'y', 'u'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent,
         ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u'))
 
   def test_if_attributes(self):
@@ -316,24 +286,14 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     if_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE),
-        ('a', 'a.c'),
-        ('a.b', 'd'),
-        ('d',),
-    )
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE),
-        ('a', 'a.c'),
-        ('a.b', 'd'),
-        ('d',),
-    )
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent,
-        ('a', 'a.c', 'd'),
-        ('a.b', 'd'),
-        ('a', 'd'),
-    )
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a', 'a.c'), ('a.b', 'd'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('a', 'a.c'),
+        ('a.b', 'd'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('a', 'a.c', 'd'),
+        ('a.b', 'd'))
 
   def test_if_subscripts(self):
 
@@ -348,25 +308,15 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     if_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE),
-        ('a', 'b', 'c', 'a[c]'),
-        ('a[b]', 'd'),
-        ('d',),
-    )
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a', 'b', 'c', 'a[c]'),
+        ('a[b]', 'd'))
     # TODO(mdan): Should subscript writes (a[0] = 1) be considered to read "a"?
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE),
-        ('a', 'e'),
-        ('a[0]', 'd'),
-        ('d',),
-    )
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('a', 'e'), ('a[0]', 'd'))
+    self.assertScopeIs(
         anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent,
-        ('a', 'b', 'c', 'd', 'e', 'a[c]'),
-        ('d', 'a[b]', 'a[0]'),
-        ('a', 'b', 'c', 'd', 'e'),
-    )
+        ('a', 'b', 'c', 'd', 'e', 'a[c]'), ('d', 'a[b]', 'a[0]'))
 
   def test_nested_if(self):
 
@@ -380,12 +330,10 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     inner_if_node = node.body[0].body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',),
-        ('a',))
-    self.assertScopeIsRmc(
-        anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',),
-        ('a',))
+    self.assertScopeIs(
+        anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',))
+    self.assertScopeIs(
+        anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',))
 
   def test_nested_function(self):
 
@@ -404,11 +352,8 @@ class ActivityAnalyzerTest(test.TestCase):
     node, _ = self._parse_and_analyze(test_fn)
     fn_def_node = node.body[0].body[0]
 
-    self.assertScopeIsRmc(
-        anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',), (
-            'x',
-            'y',
-        ))
+    self.assertScopeIs(
+        anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',))
 
   def test_constructor_attributes(self):
 
@@ -420,12 +365,9 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(TestClass)
     init_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(init_node, NodeAnno.BODY_SCOPE),
-        ('self', 'a', 'self.b'),
-        ('self', 'self.b', 'self.b.c'),
-        ('self', 'a', 'self.b'),
-    )
+    self.assertScopeIs(
+        anno.getanno(init_node, NodeAnno.BODY_SCOPE), ('self', 'a', 'self.b'),
+        ('self', 'self.b', 'self.b.c'))
 
   def test_aug_assign_subscripts(self):
 
@@ -434,12 +376,8 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('a', 'a[0]'),
-        ('a[0]',),
-        ('a',),
-    )
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a', 'a[0]'), ('a[0]',))
 
   def test_return_vars_are_read(self):
 
@@ -448,16 +386,7 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('c',),
-        (),
-        (
-            'a',
-            'b',
-            'c',
-        ),
-    )
+    self.assertScopeIs(anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('c',), ())
 
   def test_aug_assign(self):
 
@@ -466,12 +395,8 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('a', 'b'),
-        ('a'),
-        ('a', 'b'),
-    )
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a', 'b'), ('a'))
 
   def test_aug_assign_rvalues(self):
 
@@ -485,23 +410,22 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('foo', 'x'),
-        (),
-        ('x',),
-    )
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('foo', 'x'), ())
 
-  def test_params_created(self):
+  def test_params(self):
 
     def test_fn(a, b):  # pylint: disable=unused-argument
       return b
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('b',), (('')),
-        (('a', 'b')))
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('b',), ())
+    self.assertScopeIs(body_scope.parent, ('b',), ('a', 'b'))
+
+    args_scope = anno.getanno(fn_node.args, anno.Static.SCOPE)
+    self.assertSymbolSetsAre(('a', 'b'), args_scope.params.keys(), 'params')
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index 4ceddce53b..dc363f9a47 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -28,7 +28,6 @@ import six
 
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import transformer
-from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 
 
 # TODO(aqj): Do we need this? Do other builtins fail in similar ways
@@ -133,11 +132,9 @@ class LiveValueResolver(transformer.Base):
         anno.setanno(node, 'fqn',
                      anno.getanno(node.value, 'type_fqn') + (node.attr,))
     elif isinstance(node.value, gast.Name):
-      stem_name = node.value
-      # All nonlocal symbols should be fully resolved.
-      assert anno.hasanno(stem_name, NodeAnno.IS_LOCAL), stem_name
       # TODO(mdan): Figure out what to do when calling attribute on local object
       # Maybe just leave as-is?
+      pass
     return node
 
 
-- 
GitLab


From c770568935b85d506dc1a1f671822a7e122b5056 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:48:34 -0700
Subject: [PATCH 1335/1357] Internal change.

PiperOrigin-RevId: 216447412
---
 .../contrib/lite/kernels/sparse_output_fully_connected.cc  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
index 843ed0768c..226bba2d47 100644
--- a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
+++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
@@ -88,6 +88,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const bool is_hybrid_op =
       (weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32);
 
+  // Resize output.
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(1);
+  output_size_array->data[0] = 1;
+  TF_LITE_ENSURE_OK(context,
+                    context->ResizeTensor(context, output, output_size_array));
+
   if (is_hybrid_op) {
     TfLiteIntArrayFree(node->temporaries);
     node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors);
-- 
GitLab


From d4526cf9d1d58cbe480e7d2b8199620e0e9f0572 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:52:15 -0700
Subject: [PATCH 1336/1357] [XLA] Added xla::CreateModuleFromProto(...)
 combining loading module from proto and verifying it with HloVerifier.

PiperOrigin-RevId: 216447947
---
 tensorflow/compiler/xla/layout_util.cc        |   2 +-
 tensorflow/compiler/xla/service/BUILD         |   1 +
 .../compiler/xla/service/hlo_instruction.cc   |  14 ++-
 .../compiler/xla/service/hlo_proto_util.cc    |  12 ++
 .../compiler/xla/service/hlo_proto_util.h     |   6 +
 .../compiler/xla/service/hlo_verifier.cc      | 104 +++++++++++++++++-
 6 files changed, 132 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 3c8db9aa45..19667b7ed9 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -205,7 +205,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     return Status::OK();
   }
 
-  if (layout.format() == INVALID_FORMAT) {
+  if (layout.format() == INVALID_FORMAT || !Format_IsValid(layout.format())) {
     return InvalidArgument(
         "Layout does not have a valid format: layout {%s}, shape {%s}",
         layout.ShortDebugString(), shape.ShortDebugString());
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 2b292ed053..f9f741aaee 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -3127,6 +3127,7 @@ cc_library(
         ":buffer_assignment",
         ":hlo",
         ":hlo_proto",
+        ":hlo_verifier",
         "//tensorflow/compiler/xla:status",
         "//tensorflow/compiler/xla:util",
     ],
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 050d28b289..09bcf8a9e7 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -305,6 +305,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
                                           proto.tuple_index());
       break;
     case HloOpcode::kReducePrecision:
+      TF_RET_CHECK(proto.operand_ids_size() == 1)
+          << "ReducePrecision instruction should have 1 operand but sees "
+          << proto.operand_ids_size();
       instruction =
           CreateReducePrecision(proto.shape(), operands(0),
                                 proto.exponent_bits(), proto.mantissa_bits());
@@ -312,12 +315,16 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     case HloOpcode::kInfeed: {
       const Shape& data_shape =
           ShapeUtil::GetTupleElementShape(proto.shape(), 0);
-      TF_RET_CHECK(proto.operand_ids_size() == 1);
+      TF_RET_CHECK(proto.operand_ids_size() == 1)
+          << "Infeed instruction should have 1 operand but sees "
+          << proto.operand_ids_size();
       instruction =
           CreateInfeed(data_shape, operands(0), proto.infeed_config());
     } break;
     case HloOpcode::kOutfeed:
-      TF_RET_CHECK(proto.operand_ids_size() == 2);
+      TF_RET_CHECK(proto.operand_ids_size() == 2)
+          << "Outfeed instruction should have 2 operands but sees "
+          << proto.operand_ids_size();
       TF_RETURN_IF_ERROR(
           ShapeUtil::ValidateShapeWithOptionalLayout(proto.outfeed_shape()));
       instruction = CreateOutfeed(proto.outfeed_shape(), operands(0),
@@ -349,6 +356,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       break;
     }
     case HloOpcode::kCollectivePermute: {
+      TF_RET_CHECK(proto.operand_ids_size() == 1)
+          << "CollectivePermute instruction should have 1 operand but sees "
+          << proto.operand_ids_size();
       std::vector<std::pair<int64, int64>> source_target_pairs(
           proto.source_target_pairs_size());
       for (int i = 0; i < source_target_pairs.size(); i++) {
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc
index b9c0b0c4ee..026a0e8fba 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/hlo_proto_util.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
 
 #include <string>
 
@@ -36,6 +37,17 @@ HloProto MakeHloProto(const HloModule& module) {
   return proto;
 }
 
+StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto, const HloModuleConfig& module_config) {
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
+                      HloModule::CreateFromProto(proto, module_config));
+  TF_RETURN_IF_ERROR(
+      HloVerifier(/*layout_sensitive=*/true, /*allow_mixed_precision=*/false)
+          .Run(module.get())
+          .status());
+  return std::move(module);
+}
+
 StatusOr<std::vector<const Shape*>> EntryComputationParameterShapes(
     const HloProto& hlo_proto) {
   if (!hlo_proto.has_hlo_module()) {
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.h b/tensorflow/compiler/xla/service/hlo_proto_util.h
index 3d9c375cd5..1db82dd6fc 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.h
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.h
@@ -35,6 +35,12 @@ HloProto MakeHloProto(const HloModule& module,
 // will not be included in the output.
 HloProto MakeHloProto(const HloModule& module);
 
+// Create an HLO state from serialized representation. In addition to
+// creating the proto with HloModule::CreateFromProto(...) it also
+// uses HloVerifier to ensure basic invariants are held.
+StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto, const HloModuleConfig& module_config);
+
 // Returns the shapes of the parameters of the entry computation. Shape pointers
 // refer to shapes inside of the given HloProto.
 StatusOr<std::vector<const Shape*>> EntryComputationParameterShapes(
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index be3bee5975..620458855f 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -27,6 +27,15 @@ limitations under the License.
 
 namespace xla {
 
+static Status CheckOperandCount(const HloInstruction* hlo, int expected) {
+  if (hlo->operand_count() != expected) {
+    return InternalError("Expected %d operands for %s instruction: %s",
+                         expected, HloOpcodeString(hlo->opcode()),
+                         hlo->ToString());
+  }
+  return Status::OK();
+}
+
 Status ShapeVerifier::HandleElementwiseUnary(HloInstruction* hlo) {
   return CheckUnaryShape(hlo);
 }
@@ -58,12 +67,14 @@ Status ShapeVerifier::HandleConcatenate(HloInstruction* concatenate) {
 }
 
 Status ShapeVerifier::HandleConvert(HloInstruction* convert) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1));
   return CheckShape(convert, ShapeInference::InferConvertShape(
                                  convert->operand(0)->shape(),
                                  convert->shape().element_type()));
 }
 
 Status ShapeVerifier::HandleBitcastConvert(HloInstruction* convert) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1));
   return CheckShape(convert, ShapeInference::InferBitcastConvertShape(
                                  convert->operand(0)->shape(),
                                  convert->shape().element_type()));
@@ -74,6 +85,7 @@ Status ShapeVerifier::HandleCopy(HloInstruction* copy) {
 }
 
 Status ShapeVerifier::HandleDot(HloInstruction* dot) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(dot, 2));
   TF_ASSIGN_OR_RETURN(const Shape expected,
                       ShapeInference::InferDotOpShape(
                           dot->operand(0)->shape(), dot->operand(1)->shape(),
@@ -82,6 +94,7 @@ Status ShapeVerifier::HandleDot(HloInstruction* dot) {
 }
 
 Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(convolution, 2));
   TF_ASSIGN_OR_RETURN(
       const Shape expected,
       ShapeInference::InferConvolveShape(
@@ -92,6 +105,7 @@ Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) {
 }
 
 Status ShapeVerifier::HandleFft(HloInstruction* fft) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(fft, 1));
   TF_ASSIGN_OR_RETURN(
       const Shape expected,
       ShapeInference::InferFftShape(fft->operand(0)->shape(), fft->fft_type(),
@@ -118,11 +132,13 @@ Status ShapeVerifier::HandleAllToAll(HloInstruction* hlo) {
 }
 
 Status ShapeVerifier::HandleCollectivePermute(HloInstruction* hlo) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 1));
   return CheckShape(hlo, ShapeInference::InferCollectivePermuteShape(
                              hlo->operand(0)->shape()));
 }
 
 Status ShapeVerifier::HandleReducePrecision(HloInstruction* reduce_precision) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reduce_precision, 1));
   return CheckShape(reduce_precision, ShapeInference::InferReducePrecisionShape(
                                           reduce_precision->operand(0)->shape(),
                                           reduce_precision->exponent_bits(),
@@ -156,6 +172,7 @@ Status ShapeVerifier::CheckOperandAndParameter(
 }
 
 Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1));
   HloInfeedInstruction* infeed = Cast<HloInfeedInstruction>(instruction);
   TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 0));
 
@@ -166,6 +183,7 @@ Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleOutfeed(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2));
   HloOutfeedInstruction* outfeed = Cast<HloOutfeedInstruction>(instruction);
   TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 1));
 
@@ -192,10 +210,7 @@ bool ShapeVerifier::HasCompatibleElementTypes(const Shape& shape_0,
 }
 
 Status ShapeVerifier::HandleRng(HloInstruction* instruction) {
-  if (instruction->operand_count() != 2) {
-    return InternalError("Expected two operands for Rng instruction: %s",
-                         instruction->ToString());
-  }
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2));
 
   const Shape& shape_0 = instruction->operand(0)->shape();
   const Shape& shape_1 = instruction->operand(1)->shape();
@@ -244,12 +259,17 @@ Status ShapeVerifier::HandleRng(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleReverse(HloInstruction* reverse) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reverse, 1));
   return CheckShape(
       reverse, ShapeInference::InferReverseShape(reverse->operand(0)->shape(),
                                                  reverse->dimensions()));
 }
 
 Status ShapeVerifier::HandleSort(HloInstruction* sort) {
+  if (sort->operand_count() < 1 || sort->operand_count() > 2) {
+    return InternalError("Expected 1 or 2 operands for %s instruction: %s",
+                         HloOpcodeString(sort->opcode()), sort->ToString());
+  }
   if (sort->operand_count() == 2 &&
       !ShapeUtil::SameDimensions(sort->operand(0)->shape(),
                                  sort->operand(1)->shape())) {
@@ -263,10 +283,12 @@ Status ShapeVerifier::HandleSort(HloInstruction* sort) {
 }
 
 Status ShapeVerifier::HandleConstant(HloInstruction* constant) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(constant, 0));
   return CheckShape(constant, constant->literal().shape());
 }
 
 Status ShapeVerifier::HandleIota(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 0));
   auto* iota = Cast<HloIotaInstruction>(instruction);
   const int64 rank = ShapeUtil::Rank(iota->shape());
   if (rank == 0) {
@@ -281,6 +303,7 @@ Status ShapeVerifier::HandleIota(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(get_tuple_element, 1));
   return CheckShape(get_tuple_element,
                     ShapeInference::InferGetTupleElementShape(
                         get_tuple_element->operand(0)->shape(),
@@ -288,6 +311,12 @@ Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) {
 }
 
 Status ShapeVerifier::HandleReduce(HloInstruction* reduce) {
+  if (reduce->operand_count() % 2 != 0) {
+    return InternalError(
+        "Expected an even number of operands for %s instruction: %s",
+        HloOpcodeString(reduce->opcode()), reduce->ToString());
+  }
+
   std::vector<const Shape*> operand_shapes;
   for (const HloInstruction* operand : reduce->operands()) {
     operand_shapes.push_back(&operand->shape());
@@ -298,10 +327,12 @@ Status ShapeVerifier::HandleReduce(HloInstruction* reduce) {
 }
 
 Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(bitcast, 1));
   return Status::OK();
 }
 
 Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(broadcast, 1));
   // HLO broadcast has no exact analog at the proto level so there is no
   // ShapeInference method. Check the output shape explicitly.
   const Shape& operand_shape = broadcast->operand(0)->shape();
@@ -322,6 +353,7 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) {
 }
 
 Status ShapeVerifier::HandleReshape(HloInstruction* reshape) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reshape, 1));
   // Check for mixed precision.
   TF_RETURN_IF_ERROR(CheckShape(reshape, reshape->shape()));
   TF_RET_CHECK(ShapeUtil::ElementsIn(reshape->shape()) ==
@@ -330,12 +362,14 @@ Status ShapeVerifier::HandleReshape(HloInstruction* reshape) {
 }
 
 Status ShapeVerifier::HandleTranspose(HloInstruction* transpose) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(transpose, 1));
   return CheckShape(
       transpose, ShapeInference::InferTransposeShape(
                      transpose->operand(0)->shape(), transpose->dimensions()));
 }
 
 Status ShapeVerifier::HandleParameter(HloInstruction* hlo) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 0));
   return Status::OK();
 }
 
@@ -383,6 +417,7 @@ Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleSlice(HloInstruction* slice) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(slice, 1));
   return CheckShape(slice,
                     ShapeInference::InferSliceShape(
                         slice->operand(0)->shape(), slice->slice_starts(),
@@ -390,6 +425,7 @@ Status ShapeVerifier::HandleSlice(HloInstruction* slice) {
 }
 
 Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_slice, 2));
   return CheckShape(dynamic_slice, ShapeInference::InferDynamicSliceShape(
                                        dynamic_slice->operand(0)->shape(),
                                        dynamic_slice->operand(1)->shape(),
@@ -398,6 +434,7 @@ Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) {
 
 Status ShapeVerifier::HandleDynamicUpdateSlice(
     HloInstruction* dynamic_update_slice) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_update_slice, 3));
   return CheckShape(dynamic_update_slice,
                     ShapeInference::InferDynamicUpdateSliceShape(
                         dynamic_update_slice->operand(0)->shape(),
@@ -427,6 +464,7 @@ Status ShapeVerifier::HandleMap(HloInstruction* map) {
 }
 
 Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reduce_window, 2));
   return CheckShape(
       reduce_window,
       ShapeInference::InferReduceWindowShape(
@@ -436,6 +474,7 @@ Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) {
 }
 
 Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3));
   return CheckShape(
       instruction,
       ShapeInference::InferSelectAndScatterShape(
@@ -446,6 +485,7 @@ Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(xla_while, 1));
   TF_RETURN_IF_ERROR(
       CheckOperandAndParameter(xla_while, 0, xla_while->while_body(), 0));
   TF_RETURN_IF_ERROR(
@@ -465,6 +505,7 @@ Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) {
 }
 
 Status ShapeVerifier::HandleConditional(HloInstruction* conditional) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(conditional, 3));
   TF_RETURN_IF_ERROR(CheckOperandAndParameter(
       conditional, 1, conditional->true_computation(), 0));
   TF_RETURN_IF_ERROR(CheckOperandAndParameter(
@@ -479,12 +520,14 @@ Status ShapeVerifier::HandleConditional(HloInstruction* conditional) {
 }
 
 Status ShapeVerifier::HandlePad(HloInstruction* pad) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(pad, 2));
   return CheckShape(pad, ShapeInference::InferPadShape(pad->operand(0)->shape(),
                                                        pad->operand(1)->shape(),
                                                        pad->padding_config()));
 }
 
 Status ShapeVerifier::HandleSend(HloInstruction* send) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(send, 2));
   return CheckShape(send,
                     ShapeUtil::MakeTupleShape({send->operand(0)->shape(),
                                                ShapeUtil::MakeShape(U32, {}),
@@ -492,10 +535,12 @@ Status ShapeVerifier::HandleSend(HloInstruction* send) {
 }
 
 Status ShapeVerifier::HandleSendDone(HloInstruction* send_done) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(send_done, 1));
   return CheckShape(send_done, ShapeUtil::MakeTokenShape());
 }
 
 Status ShapeVerifier::HandleRecv(HloInstruction* recv) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(recv, 1));
   return CheckShape(
       recv, ShapeUtil::MakeTupleShape(
                 {ShapeUtil::GetTupleElementShape(recv->shape(), 0),
@@ -503,6 +548,7 @@ Status ShapeVerifier::HandleRecv(HloInstruction* recv) {
 }
 
 Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(recv_done, 1));
   return CheckShape(
       recv_done,
       ShapeUtil::MakeTupleShape(
@@ -512,6 +558,7 @@ Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) {
 
 Status ShapeVerifier::HandleBatchNormTraining(
     HloInstruction* batch_norm_training) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_training, 3));
   return CheckShape(batch_norm_training,
                     ShapeInference::InferBatchNormTrainingShape(
                         batch_norm_training->operand(0)->shape(),
@@ -522,6 +569,7 @@ Status ShapeVerifier::HandleBatchNormTraining(
 
 Status ShapeVerifier::HandleBatchNormInference(
     HloInstruction* batch_norm_inference) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_inference, 5));
   return CheckShape(batch_norm_inference,
                     ShapeInference::InferBatchNormInferenceShape(
                         batch_norm_inference->operand(0)->shape(),
@@ -533,6 +581,7 @@ Status ShapeVerifier::HandleBatchNormInference(
 }
 
 Status ShapeVerifier::HandleBatchNormGrad(HloInstruction* batch_norm_grad) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_grad, 5));
   return CheckShape(batch_norm_grad, ShapeInference::InferBatchNormGradShape(
                                          batch_norm_grad->operand(0)->shape(),
                                          batch_norm_grad->operand(1)->shape(),
@@ -601,6 +650,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) {
 }  // namespace
 
 Status ShapeVerifier::HandleGather(HloInstruction* gather) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(gather, 2));
   return CheckShape(
       gather,
       ShapeInference::InferGatherShape(
@@ -609,6 +659,7 @@ Status ShapeVerifier::HandleGather(HloInstruction* gather) {
 }
 
 Status ShapeVerifier::HandleScatter(HloInstruction* scatter) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(scatter, 3));
   return CheckShape(
       scatter, ShapeInference::InferScatterShape(
                    scatter->operand(0)->shape(), scatter->operand(1)->shape(),
@@ -696,12 +747,14 @@ Status ShapeVerifier::CheckShape(const HloInstruction* instruction,
 }
 
 Status ShapeVerifier::CheckUnaryShape(const HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1));
   return CheckShape(instruction,
                     ShapeInference::InferUnaryOpShape(instruction->opcode(),
                                                       instruction->operand(0)));
 }
 
 Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2));
   return CheckShape(
       instruction, ShapeInference::InferBinaryOpShape(instruction->opcode(),
                                                       instruction->operand(0),
@@ -709,6 +762,7 @@ Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::CheckTernaryShape(const HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3));
   return CheckShape(instruction,
                     ShapeInference::InferTernaryOpShape(
                         instruction->opcode(), instruction->operand(0),
@@ -816,6 +870,47 @@ Status VerifyEntryAndExitShapes(const HloModule& module) {
   return Status::OK();
 }
 
+// Verifies that entry computation layout matches characteristics of
+// entry computation.
+Status CheckEntryComputationLayout(const HloModule& module) {
+  const HloComputation* computation = module.entry_computation();
+  const auto& layout = module.entry_computation_layout();
+
+  // TODO(117498192): Change into a call to Compatible(...).
+  if (!ShapeUtil::CompatibleIgnoringFpPrecision(
+          computation->root_instruction()->shape(),
+          layout.result_layout().shape())) {
+    return InternalError(
+        "Shape of the root instruction of entry computation (%s) should be "
+        "compatible to one specified in module's entry computation layout (%s)",
+        ShapeUtil::HumanString(computation->root_instruction()->shape()),
+        ShapeUtil::HumanString(layout.result_layout().shape()));
+  }
+
+  if (computation->num_parameters() != layout.parameter_count()) {
+    return InternalError(
+        "Number of parameters in entry computation layout (%d) must be same "
+        "as number of parameters of entry computation computation (%d)",
+        layout.parameter_count(), computation->num_parameters());
+  }
+
+  for (int i = 0; i < computation->num_parameters(); ++i) {
+    if (!ShapeUtil::Compatible(computation->parameter_instruction(i)->shape(),
+                               layout.parameter_shape(i))) {
+      return InternalError(
+          "Shape of the entry computation parameter %d is %s should be "
+          "compatible to the one specified in module's entry computation "
+          "layout %s",
+          i,
+          ShapeUtil::HumanString(
+              computation->parameter_instruction(i)->shape()),
+          ShapeUtil::HumanString(layout.parameter_shape(i)));
+    }
+  }
+
+  return Status::OK();
+}
+
 // Checks if the given two instructions share the same channel id.
 Status CheckSameChannel(const HloInstruction* instr1,
                         const HloInstruction* instr2) {
@@ -1213,6 +1308,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
+  TF_RETURN_IF_ERROR(CheckEntryComputationLayout(*module));
   TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module));
 
   // If the module has a schedule, it must be valid.
-- 
GitLab


From 65b7d0b2f84c334327a295bf41bc06c7f6b8ffe5 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 16:52:56 -0700
Subject: [PATCH 1337/1357] [XLA:GPU] Elide the SequentialThunk when emitting
 scatter with no copy

We have a 1-element thunk sequence if we're not copying. That's still two
thunks and hlo profiling gets confused if it sees two thunks for the same
instruction and one of them claims to be the whole instruction.

PiperOrigin-RevId: 216448063
---
 .../xla/service/gpu/ir_emitter_unnested.cc         | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index bef7a55301..09486d291a 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -2080,9 +2080,9 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
   // Launch a kernel that reads every element in the updates tensor. We could
   // also do one kernel per window instead if bounds checks turn out to be a
   // bottleneck.
-  thunks.push_back(BuildKernelThunk(
-      scatter,
-      /*implements_whole_instruction=*/operand_buffer == destination_buffer));
+  thunks.push_back(
+      BuildKernelThunk(scatter,
+                       /*implements_whole_instruction=*/thunks.empty()));
 
   LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
       updates->shape(), ir_emitter_context_->device_description());
@@ -2090,8 +2090,12 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
                          static_cast<KernelThunk*>(thunks.back().get()),
                          ir_emitter_context_->llvm_module());
 
-  thunk_sequence_->emplace_back(
-      absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  if (thunks.size() == 1) {
+    thunk_sequence_->push_back(std::move(thunks[0]));
+  } else {
+    thunk_sequence_->emplace_back(
+        absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  }
   return ParallelLoopEmitter(loop_body_emitter, updates->shape(),
                              launch_dimensions, &b_)
       .EmitLoop(IrName(scatter),
-- 
GitLab


From bb5fc614a4a358b350ef8dd19cb7010760fa9b29 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:53:29 -0700
Subject: [PATCH 1338/1357] [XLA] Cleanup: Make AllocationTracker::Resolve
 const.

So that when resolving some global data, we don't have to worry whether
"Resolve" is going to mutate the real data.

PiperOrigin-RevId: 216448145
---
 tensorflow/compiler/xla/service/allocation_tracker.cc | 6 +++---
 tensorflow/compiler/xla/service/allocation_tracker.h  | 8 ++++----
 tensorflow/compiler/xla/service/service.cc            | 4 ++--
 tensorflow/compiler/xla/service/service.h             | 4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc
index 1ed6142dce..ef5e211646 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.cc
+++ b/tensorflow/compiler/xla/service/allocation_tracker.cc
@@ -176,13 +176,13 @@ StatusOr<std::vector<GlobalDataHandle>> AllocationTracker::DeconstructTuple(
 }
 
 StatusOr<std::vector<const ShapedBuffer*>> AllocationTracker::Resolve(
-    const GlobalDataHandle& data) {
+    const GlobalDataHandle& data) const {
   tensorflow::mutex_lock lock(mutex_);
   return AllocationTracker::ResolveInternal(data);
 }
 
 StatusOr<const ShapedBuffer*> AllocationTracker::ResolveForReplica(
-    const GlobalDataHandle& data, int replica_id) {
+    const GlobalDataHandle& data, int replica_id) const {
   tensorflow::mutex_lock lock(mutex_);
   TF_ASSIGN_OR_RETURN(std::vector<const ShapedBuffer*> replicated_buffers,
                       ResolveInternal(data));
@@ -196,7 +196,7 @@ StatusOr<const ShapedBuffer*> AllocationTracker::ResolveForReplica(
 }
 
 StatusOr<std::vector<const ShapedBuffer*>> AllocationTracker::ResolveInternal(
-    const GlobalDataHandle& data) {
+    const GlobalDataHandle& data) const {
   VLOG(2) << "resolve:" << data.handle();
   auto it = handle_to_shaped_buffers_.find(data.handle());
   if (it == handle_to_shaped_buffers_.end()) {
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index 43feccee3c..98d1a302a9 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -65,13 +65,13 @@ class AllocationTracker {
   // replica, or provide an error status to say whether any of those buffers
   // were not found (or found, but found deallocated).
   StatusOr<std::vector<const ShapedBuffer*>> Resolve(
-      const GlobalDataHandle& data);
+      const GlobalDataHandle& data) const;
 
   // Resolves a handle from an XLA client and replica id to a shaped buffer, or
   // provide an error status to say whether it was not found (or found, but
   // found deallocated).
   StatusOr<const ShapedBuffer*> ResolveForReplica(const GlobalDataHandle& data,
-                                                  int replica_id);
+                                                  int replica_id) const;
 
  private:
   // Data structure encapsulating single memory allocation on the device.
@@ -87,7 +87,7 @@ class AllocationTracker {
   // Internal helper which resolves the given GlobalDataHandle to a
   // list of ScopedShapedBuffers.
   StatusOr<std::vector<const ShapedBuffer*>> ResolveInternal(
-      const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+      const GlobalDataHandle& data) const EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Internal helper which registers a vector of shaped buffers, one per
   // replica.  ShapedBufferTy is either ScopedShapedBuffer or ShapedBuffer.  If
@@ -113,7 +113,7 @@ class AllocationTracker {
   // maintained per device ordinal.
   using AllocationMap = absl::flat_hash_map<const void*, Allocation>;
 
-  tensorflow::mutex mutex_;
+  mutable tensorflow::mutex mutex_;
 
   // Backend to use with this tracker. The backend supplies the memory allocator
   // to use when deallocating memory.
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index b27a92f2a0..084df17951 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -207,7 +207,7 @@ Status Service::ValidateResultShape(const Shape& client_shape,
 StatusOr<std::vector<std::vector<const ShapedBuffer*>>>
 Service::ResolveAndValidateArguments(
     absl::Span<const GlobalDataHandle* const> arguments,
-    absl::Span<se::StreamExecutor* const> stream_executors) {
+    absl::Span<se::StreamExecutor* const> stream_executors) const {
   CHECK_EQ(options_.number_of_replicas(), stream_executors.size());
   std::vector<std::vector<const ShapedBuffer*>> replicated_arguments;
   replicated_arguments.resize(options_.number_of_replicas());
@@ -590,7 +590,7 @@ StatusOr<std::vector<se::StreamExecutor*>> Service::GetExecutors(
 
 StatusOr<std::vector<std::vector<const ShapedBuffer*>>> Service::GetArguments(
     const ExecutionOptions& execution_options,
-    absl::Span<const GlobalDataHandle* const> arguments) {
+    absl::Span<const GlobalDataHandle* const> arguments) const {
   // Resolve the allocations for the arguments of the computation, and create
   // a vector of device memory offsets for the arguments from the allocations.
   // In the case of partitioned computations, assume all arguments go on the
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 1f62fad4c8..8cf1a7b9f0 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -191,7 +191,7 @@ class Service : public ServiceInterface {
   // Prepare the arguments for executing parallel.
   StatusOr<std::vector<std::vector<const ShapedBuffer*>>> GetArguments(
       const ExecutionOptions& execution_options,
-      absl::Span<const GlobalDataHandle* const> arguments);
+      absl::Span<const GlobalDataHandle* const> arguments) const;
 
  protected:
   friend class LocalExecutable;
@@ -208,7 +208,7 @@ class Service : public ServiceInterface {
   StatusOr<std::vector<std::vector<const ShapedBuffer*>>>
   ResolveAndValidateArguments(
       absl::Span<const GlobalDataHandle* const> arguments,
-      absl::Span<se::StreamExecutor* const> stream_executors);
+      absl::Span<se::StreamExecutor* const> stream_executors) const;
 
   // Create a Hlo module config for the given program shape and arguments.
   // execution_options is optional; if not given a default is used.
-- 
GitLab


From 9bd459e4ceba14f9bb1af98d52a109325de952e8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:14:39 -0700
Subject: [PATCH 1339/1357] Adds an Objective-C API to TensorFlow Lite
 experimental.

PiperOrigin-RevId: 216451263
---
 .../contrib/lite/experimental/objc/BUILD      |  94 ++++
 .../contrib/lite/experimental/objc/README.md  |  10 +
 .../Configs/TensorFlowLiteObjc.tulsigen       |  60 +++
 .../project.tulsiconf                         |  17 +
 .../experimental/objc/apis/TFLInterpreter.h   | 188 ++++++++
 .../objc/apis/TFLInterpreterOptions.h         |  37 ++
 .../objc/apis/TFLQuantizationParameters.h     |  36 ++
 .../lite/experimental/objc/apis/TFLTensor.h   |  77 +++
 .../experimental/objc/sources/TFLErrorUtil.h  |  51 ++
 .../experimental/objc/sources/TFLErrorUtil.m  |  45 ++
 .../objc/sources/TFLInterpreter.mm            | 440 ++++++++++++++++++
 .../objc/sources/TFLInterpreterOptions.m      |  30 ++
 .../objc/sources/TFLQuantizationParameters.m  |  23 +
 .../objc/sources/TFLTensor+Internal.h         |  42 ++
 .../experimental/objc/sources/TFLTensor.m     |  54 +++
 .../objc/tests/TFLInterpreterOptionsTests.m   |  49 ++
 .../objc/tests/TFLInterpreterTests.m          | 266 +++++++++++
 .../tools/pip_package/pip_smoke_test.py       |   1 +
 18 files changed, 1520 insertions(+)
 create mode 100644 tensorflow/contrib/lite/experimental/objc/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/objc/README.md
 create mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
 create mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m

diff --git a/tensorflow/contrib/lite/experimental/objc/BUILD b/tensorflow/contrib/lite/experimental/objc/BUILD
new file mode 100644
index 0000000000..236b96adb5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/BUILD
@@ -0,0 +1,94 @@
+# TensorFlow Lite Objective-C API.
+
+package(default_visibility = ["//visibility:private"])
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tools/build_defs/apple:ios.bzl", "ios_unit_test")
+
+SOURCES = glob([
+    "sources/*.h",
+    "sources/*.m",
+    "sources/*.mm",
+])
+
+API_HEADERS = glob([
+    "apis/*.h",
+])
+
+MINIMUM_OS_VERSION = "8.0"
+
+# Compiler flags for building regular non-test libraries.
+RELEASE_COPTS = [
+    # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++.
+    "-Wall",
+    # Warns if functions, variables, and types marked with the deprecated attribute are being used.
+    "-Wdeprecated-declarations",
+    # Warns for errors in documentation.
+    "-Wdocumentation",
+    # Turns all warnings into errors.
+    "-Werror",
+    # Enables extra warning flags that are not enabled by -Wall.
+    "-Wextra",
+    # Warns if a global function is defined without a previous prototype declaration.
+    "-Wmissing-prototypes",
+    # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison.
+    "-Wno-sign-compare",
+    # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks.
+    "-Wno-unused-parameter",
+    # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable.
+    "-Wshadow",
+    # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of ().
+    "-Wstrict-prototypes",
+    # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet.
+    "-Wundeclared-selector",
+
+    # Turn off warnings for headers not part of TensorFlow Lite Objective-C API.
+    "--system-header-prefix=third_party/tensorflow/contrib/lite/experimental/c/",
+]
+
+# Compiler flags for building test libraries.
+TEST_COPTS = RELEASE_COPTS + [
+    # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument.
+    "-Wno-nonnull",
+    # Disables warning when a global or local variable or type declaration shadows another.
+    "-Wno-shadow",
+]
+
+objc_library(
+    name = "TensorFlowLiteObjCLib",
+    srcs = SOURCES,
+    hdrs = API_HEADERS,
+    copts = RELEASE_COPTS,
+    deps = [
+        "//tensorflow/contrib/lite/experimental/c:c_api",
+    ],
+    alwayslink = 1,
+)
+
+ios_unit_test(
+    name = "TensorFlowLiteObjCTests",
+    size = "small",
+    minimum_os_version = MINIMUM_OS_VERSION,
+    deps = [":TensorFlowLiteObjCTestLib"],
+)
+
+objc_library(
+    name = "TensorFlowLiteObjCTestLib",
+    testonly = 1,
+    srcs = glob([
+        "tests/*.m",
+    ]),
+    hdrs = glob([
+        "apis/*.h",
+        "sources/*.h",
+        "tests/*.h",
+    ]),
+    copts = TEST_COPTS,
+    resources = [
+        "//tensorflow/contrib/lite:testdata/add.bin",
+    ],
+    deps = [
+        ":TensorFlowLiteObjCLib",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/objc/README.md b/tensorflow/contrib/lite/experimental/objc/README.md
new file mode 100644
index 0000000000..e8f150b1e8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/README.md
@@ -0,0 +1,10 @@
+# TensorFlow Lite Objective-C API
+
+## TensorFlowLiteObjc Tulsi Project
+
+Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by
+running the following command in Terminal from the root source directory:
+
+```shell
+generate_xcodeproj.sh --genconfig tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj
+```
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
new file mode 100644
index 0000000000..babb5902d3
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
@@ -0,0 +1,60 @@
+{
+  "sourceFilters" : [
+    "third_party/tensorflow/contrib/lite",
+    "third_party/tensorflow/contrib/lite/experimental/c",
+    "third_party/tensorflow/contrib/lite/experimental/objc",
+    "third_party/tensorflow/contrib/lite/experimental/objc/apis",
+    "third_party/tensorflow/contrib/lite/experimental/objc/sources",
+    "third_party/tensorflow/contrib/lite/experimental/objc/tests",
+    "third_party/tensorflow/contrib/lite/kernels",
+    "third_party/tensorflow/contrib/lite/kernels/internal",
+    "third_party/tensorflow/contrib/lite/nnapi",
+    "third_party/tensorflow/contrib/lite/schema",
+  ],
+  "buildTargets" : [
+    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCLib",
+    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCTests",
+  ],
+  "projectName" : "TensorFlowLiteObjC",
+  "optionSet" : {
+    "LaunchActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "EnvironmentVariables" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "CommandlineArguments" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "LaunchActionPostActionScript" : {
+      "p" : "$(inherited)"
+    }
+  },
+  "additionalFilePaths" : [
+    "third_party/tensorflow/contrib/lite/experimental/objc/BUILD",
+  ]
+}
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
new file mode 100644
index 0000000000..00299cd4cf
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
@@ -0,0 +1,17 @@
+{
+  "configDefaults" : {
+    "optionSet" : {
+      "BazelBuildOptionsDebug" : {
+        "p" : "--ios_minimum_os=8.0"
+      },
+      "BazelBuildOptionsRelease" : {
+        "p" : "--ios_minimum_os=8.0"
+      },
+    }
+  },
+  "projectName" : "TensorFlowLiteObjC",
+  "packages" : [
+    "third_party/tensorflow/contrib/lite/experimental/objc"
+  ],
+  "workspaceRoot" : "../../../../../../.."
+}
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
new file mode 100644
index 0000000000..c07ffc06ff
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
@@ -0,0 +1,188 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+@class TFLInterpreterOptions;
+@class TFLTensor;
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLInterpreterErrorCode
+ * This enum specifies various error codes related to `TFLInterpreter`.
+ */
+typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) {
+  /** Provided tensor index is invalid. */
+  TFLInterpreterErrorCodeInvalidTensorIndex,
+
+  /** Input data has invalid byte size. */
+  TFLInterpreterErrorCodeInvalidInputByteSize,
+
+  /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */
+  TFLInterpreterErrorCodeInvalidShape,
+
+  /** Provided model cannot be loaded. */
+  TFLInterpreterErrorCodeFailedToLoadModel,
+
+  /** Failed to create `TFLInterpreter`. */
+  TFLInterpreterErrorCodeFailedToCreateInterpreter,
+
+  /** Failed to invoke `TFLInterpreter`. */
+  TFLInterpreterErrorCodeFailedToInvoke,
+
+  /** Failed to retrieve a tensor. */
+  TFLInterpreterErrorCodeFailedToGetTensor,
+
+  /** Failed to resize an input tensor. */
+  TFLInterpreterErrorCodeFailedToResizeInputTensor,
+
+  /** Failed to copy data into an input tensor. */
+  TFLInterpreterErrorCodeFailedToCopyDataToInputTensor,
+
+  /** Failed to get data from an output tensor. */
+  TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor,
+
+  /** Failed to allocate memory for tensors. */
+  TFLInterpreterErrorCodeFailedToAllocateTensors,
+
+  /** Operaton not allowed without allocating memory for tensors first. */
+  TFLInterpreterErrorCodeAllocateTensorsRequired,
+
+  /** Operaton not allowed without invoking the interpreter first. */
+  TFLInterpreterErrorCodeInvokeInterpreterRequired,
+};
+
+/**
+ * A TensorFlow Lite model interpreter.
+ */
+@interface TFLInterpreter : NSObject
+
+/** The total number of input tensors. 0 if the interpreter creation failed. */
+@property(nonatomic, readonly) NSUInteger inputTensorCount;
+
+/** The total number of output tensors. 0 if the interpreter creation failed. */
+@property(nonatomic, readonly) NSUInteger outputTensorCount;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+/**
+ * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the
+ * default interpreter options.
+ *
+ * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
+ *
+ * @return A new instance of `TFLInterpreter` with the given model and the default interpreter
+ *     options.
+ */
+- (instancetype)initWithModelPath:(NSString *)modelPath;
+
+/**
+ * Initializes a new TensorFlow Lite interpreter instance with the given model file path and
+ * options.
+ *
+ * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
+ * @param options Options to use for configuring the TensorFlow Lite interpreter.
+ *
+ * @return A new instance of `TFLInterpreter` with the given model and options.
+ */
+- (instancetype)initWithModelPath:(NSString *)modelPath
+                          options:(TFLInterpreterOptions *)options NS_DESIGNATED_INITIALIZER;
+
+/**
+ * Invokes the interpreter to run inference.
+ *
+ * @param error An optional error parameter populated when there is an error in invoking the
+ *     interpreter.
+ *
+ * @return Whether the invocation is successful. Returns NO if an error occurred.
+ */
+- (BOOL)invokeWithError:(NSError **)error;
+
+/**
+ * Returns the input tensor at the given index.
+ *
+ * @param index The index of an input tensor.
+ * @param error An optional error parameter populated when there is an error in looking up the input
+ *     tensor.
+ *
+ * @return The input tensor at the given index. `nil` if there is an error.
+ */
+- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Returns the output tensor at the given index.
+ *
+ * @param index The index of an output tensor.
+ * @param error An optional error parameter populated when there is an error in looking up the
+ *     output tensor.
+ *
+ * @return The output tensor at the given index. `nil` if there is an error.
+ */
+- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned
+ * integers).
+ *
+ * @param index The index of an input tensor.
+ * @param shape Shape that the given input tensor should be resized to. It should be an array of
+ *     positive unsigned integer(s) containing the size of each dimension.
+ * @param error An optional error parameter populated when there is an error in resizing the input
+ *     tensor.
+ *
+ * @return Whether the input tensor was resized successfully. Returns NO if an error occurred.
+ */
+- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
+                         toShape:(NSArray<NSNumber *> *)shape
+                           error:(NSError **)error;
+
+/**
+ * Copies the given data into the input tensor at the given index. This is allowed only before the
+ * interpreter is invoked.
+ *
+ * @param data The data to set. The byte size of the data must match what's required by the given
+ *     input tensor.
+ * @param index The index of an input tensor.
+ * @param error An optional error parameter populated when there is an error in setting the data.
+ *
+ * @return Whether the data was set into the input tensor successfully. Returns NO if an error
+ *     occurred.
+ */
+- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Gets the data from the output tensor at the given index. The interpreter invocation has to
+ * complete before the data can be retrieved from an output tensor.
+ *
+ * @param index The index of an output tensor.
+ * @param error An optional error parameter populated when there is an error in getting the data.
+ *
+ * @return The data of the output tensor at the given index. `nil` if there is an error.
+ */
+- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Allocates memory for tensors.
+ *
+ * @param error An optional error parameter populated when there is an error in allocating memory.
+ *
+ * @return Whether memory allocation is successful. Returns NO if an error occurred.
+ */
+- (BOOL)allocateTensorsWithError:(NSError **)error;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
new file mode 100644
index 0000000000..6461fbf017
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
@@ -0,0 +1,37 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Custom configuration options for a TensorFlow Lite interpreter. */
+@interface TFLInterpreterOptions : NSObject
+
+/**
+ * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting
+ * TensorFlow Lite to optimize the threading decision).
+ */
+@property(nonatomic) NSUInteger numberOfThreads;
+
+/**
+ * Initializes a new instance of `TFLInterpreterOptions`.
+ *
+ * @return A new instance of `TFLInterpreterOptions`.
+ */
+- (instancetype)init NS_DESIGNATED_INITIALIZER;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
new file mode 100644
index 0000000000..3d5cf793c5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
@@ -0,0 +1,36 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * Parameters for asymmetric quantization. Quantized values can be converted to float values using:
+ * `realValue = scale * (quantizedValue - zeroPoint)`.
+ */
+@interface TFLQuantizationParameters : NSObject
+
+/** Scale of asymmetric quantization. */
+@property(nonatomic, readonly) float scale;
+
+/** Zero point of asymmetric quantization. */
+@property(nonatomic, readonly) int32_t zeroPoint;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
new file mode 100644
index 0000000000..d08b8fc0e9
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
@@ -0,0 +1,77 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+@class TFLQuantizationParameters;
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLTensorDataType
+ * This enum specifies supported TensorFlow Lite tensor data types.
+ */
+typedef NS_ENUM(NSUInteger, TFLTensorDataType) {
+  /** Tensor data type not available. This indicates an error with the model. */
+  TFLTensorDataTypeNoType,
+
+  /** 32-bit single precision floating point. */
+  TFLTensorDataTypeFloat32,
+
+  /** 32-bit signed integer. */
+  TFLTensorDataTypeInt32,
+
+  /** 8-bit unsigned integer. */
+  TFLTensorDataTypeUInt8,
+
+  /** 64-bit signed integer. */
+  TFLTensorDataTypeInt64,
+
+  /** Boolean. */
+  TFLTensorDataTypeBool,
+
+  /** 16-bit signed integer. */
+  TFLTensorDataTypeInt16,
+};
+
+/**
+ * An input or output tensor in a TensorFlow Lite model.
+ */
+@interface TFLTensor : NSObject
+
+/** Name of the tensor. */
+@property(nonatomic, readonly, copy) NSString *name;
+
+/** Data type of the tensor. */
+@property(nonatomic, readonly) TFLTensorDataType dataType;
+
+/**
+ * Shape of the tensor, an array of positive unsigned integer(s) containing the size of each
+ * dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is
+ * [2, 2, 3].
+ */
+@property(nonatomic, readonly, copy) NSArray<NSNumber *> *shape;
+
+/** Number of bytes for the tensor data. */
+@property(nonatomic, readonly) NSUInteger byteSize;
+
+/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */
+@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
new file mode 100644
index 0000000000..b6fd4763d6
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
@@ -0,0 +1,51 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Helper utility for error reporting. */
+@interface TFLErrorUtil : NSObject
+
+/**
+ * Creates and returns an interpreter error with the given error code and description.
+ *
+ * @param code Error code.
+ * @param description Error description.
+ *
+ * @return The created interpreter error with the given error code and description.
+ */
++ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                          description:(NSString *)description;
+
+/**
+ * Creates and saves an interpreter error with the given error code and description.
+ *
+ * @param code Error code.
+ * @param description Error description.
+ * @param error Pointer to where to save the created error. If `nil`, no error will be saved.
+ */
++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                         description:(NSString *)description
+                               error:(NSError **)error;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
new file mode 100644
index 0000000000..756d69481c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
@@ -0,0 +1,45 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "TFLErrorUtil.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Error domain of TensorFlow Lite interpreter related errors. */
+static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter";
+
+@implementation TFLErrorUtil
+
+#pragma mark - Public
+
++ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                          description:(NSString *)description {
+  return [NSError errorWithDomain:TFLInterpreterErrorDomain
+                             code:code
+                         userInfo:@{NSLocalizedDescriptionKey : description}];
+}
+
++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                         description:(NSString *)description
+                               error:(NSError **)error {
+  if (error) {
+    *error = [NSError errorWithDomain:TFLInterpreterErrorDomain
+                                 code:code
+                             userInfo:@{NSLocalizedDescriptionKey : description}];
+  }
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
new file mode 100644
index 0000000000..0f940a5cf3
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
@@ -0,0 +1,440 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
+
+#import "TFLErrorUtil.h"
+#import "TFLTensor+Internal.h"
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+#include "third_party/tensorflow/contrib/lite/experimental/c/c_api.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLTensorType
+ * This enum specifies input or output tensor types.
+ */
+typedef NS_ENUM(NSUInteger, TFLTensorType) {
+  /** Input tensor type. */
+  TFLTensorTypeInput,
+
+  /** Output tensor type. */
+  TFLTensorTypeOutput,
+};
+
+// Names used for indicating input or output in error messages.
+static NSString *const kTFLInputDirection = @"input";
+static NSString *const kTFLOutputDirection = @"output";
+
+/**
+ * Error reporter for TFLInterpreter.
+ *
+ * @param user_data User data. Not used.
+ * @param format Error message which may contain argument formatting specifiers.
+ * @param args Values of the arguments in the error message.
+ */
+static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) {
+  NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]);
+}
+
+@interface TFLInterpreter ()
+
+/** TFL_Interpreter backed by C API. */
+@property(nonatomic, nullable) TFL_Interpreter *interpreter;
+
+/**
+ * An error in initializing the interpreter. If not `nil`, this error will be reported when the
+ * interpreter is used.
+ */
+@property(nonatomic, nullable) NSError *initializationError;
+
+@end
+
+@implementation TFLInterpreter
+
+#pragma mark - NSObject
+
+- (void)dealloc {
+  TFL_DeleteInterpreter(_interpreter);
+}
+
+#pragma mark - Public
+
+- (instancetype)initWithModelPath:(NSString *)modelPath {
+  return [self initWithModelPath:modelPath options:[[TFLInterpreterOptions alloc] init]];
+}
+
+- (instancetype)initWithModelPath:(NSString *)modelPath options:(TFLInterpreterOptions *)options {
+  self = [super init];
+
+  if (self != nil) {
+    const char *modelPathCString = modelPath.UTF8String;
+    NSString *pathErrorString =
+        [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath];
+    if (modelPathCString == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
+                                     description:pathErrorString];
+      return self;
+    }
+
+    TFL_Model *model = TFL_NewModelFromFile(modelPathCString);
+    if (model == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
+                                     description:pathErrorString];
+      return self;
+    }
+
+    TFL_InterpreterOptions *cOptions = TFL_NewInterpreterOptions();
+    if (cOptions == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                     description:@"Failed to create the interpreter."];
+      TFL_DeleteModel(model);
+      return self;
+    }
+
+    if (options.numberOfThreads > 0) {
+      TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads);
+    }
+    TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr);
+
+    _interpreter = TFL_NewInterpreter(model, cOptions);
+    if (_interpreter == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                     description:@"Failed to create the interpreter."];
+    } else {
+      _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter);
+      _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter);
+      if (_inputTensorCount <= 0 || _outputTensorCount <= 0) {
+        _initializationError =
+            [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                       description:@"Failed to create the interpreter."];
+      }
+    }
+    TFL_DeleteInterpreterOptions(cOptions);
+    TFL_DeleteModel(model);
+  }
+
+  return self;
+}
+
+- (BOOL)invokeWithError:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke
+                                   description:@"Failed to invoke the interpreter."
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return nil;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return nil;
+  }
+
+  return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error];
+}
+
+- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return nil;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
+    return nil;
+  }
+
+  return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error];
+}
+
+- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
+                         toShape:(NSArray<NSNumber *> *)shape
+                           error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return NO;
+  }
+
+  if (shape.count == 0) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
+                                   description:@"Invalid shape. Must not be empty."
+                                         error:error];
+    return NO;
+  }
+
+  int cDimensions[self.inputTensorCount];
+  for (int d = 0; d < shape.count; ++d) {
+    int dimension = shape[d].intValue;
+    if (dimension <= 0) {
+      NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers.";
+      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
+                                     description:errorDescription
+                                           error:error];
+      return NO;
+    }
+    cDimensions[d] = dimension;
+  }
+
+  if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions,
+                                       (int32_t)shape.count) != kTfLiteOk) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return NO;
+  }
+
+  TFL_Tensor *tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
+  if (tensor == nullptr) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to get input tensor at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
+  if (data.length != byteSize) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).",
+                         (unsigned long)index, byteSize, (unsigned long)data.length];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  if (TFL_TensorCopyFromBuffer(tensor, data.bytes, data.length) != kTfLiteOk) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).",
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return nil;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
+    return nil;
+  }
+
+  const TFL_Tensor *tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
+  if (tensor == nullptr) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to get output tensor at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil
+        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
+                         description:errorDescription
+                               error:error];
+    return nil;
+  }
+
+  void *bytes = TFL_TensorData(tensor);
+  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
+  if (bytes == nullptr || byteSize == 0) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to get output tensor data at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil
+        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
+                         description:errorDescription
+                               error:error];
+    return nil;
+  }
+
+  return [NSData dataWithBytes:bytes length:byteSize];
+}
+
+- (BOOL)allocateTensorsWithError:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors
+                                   description:@"Failed to allocate memory for tensors."
+                                         error:error];
+    return NO;
+  }
+  return YES;
+}
+
+#pragma mark - Private
+
+- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type
+                             atIndex:(NSUInteger)index
+                               error:(NSError **)error {
+  const TFL_Tensor *tensor = nullptr;
+  NSString *tensorType;
+  switch (type) {
+    case TFLTensorTypeInput:
+      tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
+      tensorType = kTFLInputDirection;
+      break;
+    case TFLTensorTypeOutput:
+      tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
+      tensorType = kTFLOutputDirection;
+      break;
+  }
+
+  if (tensor == nullptr) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType,
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+
+  const char *cName = TFL_TensorName(tensor);
+  if (cName == nullptr) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType,
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+  NSString *name = [NSString stringWithUTF8String:cName];
+
+  TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)];
+
+  int32_t rank = TFL_TensorNumDims(tensor);
+  if (rank <= 0) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType,
+                                   (unsigned long)index, rank];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank];
+  for (int32_t d = 0; d < rank; d++) {
+    int32_t dimension = TFL_TensorDim(tensor, d);
+    if (dimension <= 0) {
+      NSString *errorDescription =
+          [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).",
+                                     tensorType, (unsigned long)index, d, dimension];
+      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                     description:errorDescription
+                                           error:error];
+      return nil;
+    }
+    shape[d] = @((NSUInteger)dimension);
+  }
+
+  // TODO: Set quantization parameters when C API supports it.
+  return [[TFLTensor alloc] initWithName:name
+                                dataType:dataType
+                                   shape:shape
+                                byteSize:(NSUInteger)TFL_TensorByteSize(tensor)
+                  quantizationParameters:nil];
+}
+
+- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType {
+  switch (cTensorType) {
+    case kTfLiteFloat32:
+      return TFLTensorDataTypeFloat32;
+    case kTfLiteInt32:
+      return TFLTensorDataTypeInt32;
+    case kTfLiteUInt8:
+      return TFLTensorDataTypeUInt8;
+    case kTfLiteInt64:
+      return TFLTensorDataTypeInt64;
+    case kTfLiteBool:
+      return TFLTensorDataTypeBool;
+    case kTfLiteInt16:
+      return TFLTensorDataTypeInt16;
+    case kTfLiteNoType:
+    case kTfLiteString:
+    case kTfLiteComplex64:
+      // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API.
+      return TFLTensorDataTypeNoType;
+  }
+}
+
+- (void)saveInitializationErrorToDestination:(NSError **)destination {
+  if (destination != NULL) {
+    *destination = self.initializationError;
+  }
+}
+
+- (BOOL)isValidTensorIndex:(NSUInteger)index
+                belowLimit:(NSUInteger)totalTensorCount
+                     error:(NSError **)error {
+  if (index >= totalTensorCount) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).",
+                                   (unsigned long)index, (unsigned long)(totalTensorCount - 1)];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
new file mode 100644
index 0000000000..1776688288
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
@@ -0,0 +1,30 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@implementation TFLInterpreterOptions
+
+#pragma mark - Public
+
+- (instancetype)init {
+  self = [super init];
+  return self;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
new file mode 100644
index 0000000000..190f0479ce
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
@@ -0,0 +1,23 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@implementation TFLQuantizationParameters
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
new file mode 100644
index 0000000000..f2f13e5e5f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
@@ -0,0 +1,42 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface TFLTensor (Internal)
+
+/**
+ * Initializes a `TFLTensor` with the given name, data type, shape, and quantization parameters.
+ *
+ * @param name Name of the tensor.
+ * @param dataType Data type of the tensor.
+ * @param shape Shape of the tensor.
+ * @param byteSize Size of the tensor data in number of bytes.
+ * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not
+ *     use quantization.
+ *
+ * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization
+ *     parameters.
+ */
+- (instancetype)initWithName:(NSString *)name
+                    dataType:(TFLTensorDataType)dataType
+                       shape:(NSArray<NSNumber *> *)shape
+                    byteSize:(NSUInteger)byteSize
+      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
new file mode 100644
index 0000000000..adb1c5ad2c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
@@ -0,0 +1,54 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+#import "TFLTensor+Internal.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface TFLTensor ()
+
+// Redefines readonly properties.
+@property(nonatomic, copy) NSString *name;
+@property(nonatomic) TFLTensorDataType dataType;
+@property(nonatomic, copy) NSArray<NSNumber *> *shape;
+@property(nonatomic) NSUInteger byteSize;
+@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters;
+
+@end
+
+@implementation TFLTensor
+
+#pragma mark - TFLTensor (Internal)
+
+- (instancetype)initWithName:(NSString *)name
+                    dataType:(TFLTensorDataType)dataType
+                       shape:(NSArray<NSNumber *> *)shape
+                    byteSize:(NSUInteger)byteSize
+      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters {
+  self = [super init];
+  if (self != nil) {
+    _name = [name copy];
+    _dataType = dataType;
+    _shape = [shape copy];
+    _byteSize = byteSize;
+    _quantizationParameters = quantizationParameters;
+  }
+  return self;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
new file mode 100644
index 0000000000..17c495fa18
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
@@ -0,0 +1,49 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+
+#import <XCTest/XCTest.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * Unit tests for TFLInterpreterOptions.
+ */
+@interface TFLInterpreterOptionsTests : XCTestCase
+@end
+
+@implementation TFLInterpreterOptionsTests
+
+#pragma mark - Tests
+
+- (void)testInit {
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  XCTAssertNotNil(options);
+  XCTAssertEqual(options.numberOfThreads, 0);
+}
+
+- (void)testSetNumberOfThread {
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  options.numberOfThreads = 2;
+  XCTAssertEqual(options.numberOfThreads, 2);
+  options.numberOfThreads = 0;
+  XCTAssertEqual(options.numberOfThreads, 0);
+  options.numberOfThreads = 3;
+  XCTAssertEqual(options.numberOfThreads, 3);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
new file mode 100644
index 0000000000..9e6319a732
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
@@ -0,0 +1,266 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
+
+#import <XCTest/XCTest.h>
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Model resource name. */
+static NSString *const kAddModelResourceName = @"add";
+
+/** Model resource type. */
+static NSString *const kAddModelResourceType = @"bin";
+
+/** Rank of the input and output tensor in the Add model. */
+static const NSUInteger kAddModelTensorRank = 1U;
+
+/** Size of the first (and only) dimension of the input and output tensor in the Add model. */
+static const NSUInteger kAddModelTensorFirstDimensionSize = 2U;
+
+/** Invalid input tensor index. */
+static const NSUInteger kInvalidInputTensorIndex = 1U;
+
+/** Invalid output tensor index. */
+static const NSUInteger kInvalidOutputTensorIndex = 1U;
+
+/** Accurary used in comparing floating numbers. */
+static const float kTestAccuracy = 1E-5F;
+
+/**
+ * Unit tests for TFLInterpreter.
+ */
+@interface TFLInterpreterTests : XCTestCase
+
+/** Absolute path of the Add model resource. */
+@property(nonatomic, nullable) NSString *modelPath;
+
+/** Default interpreter using the Add model. */
+@property(nonatomic, nullable) TFLInterpreter *interpreter;
+
+@end
+
+@implementation TFLInterpreterTests
+
+#pragma mark - XCTestCase
+
+- (void)setUp {
+  [super setUp];
+
+  NSBundle *bundle = [NSBundle bundleForClass:[self class]];
+  self.modelPath = [bundle pathForResource:kAddModelResourceName ofType:kAddModelResourceType];
+  self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
+  XCTAssertNotNil(self.interpreter);
+  XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]);
+}
+
+- (void)tearDown {
+  self.modelPath = nil;
+  self.interpreter = nil;
+
+  [super tearDown];
+}
+
+#pragma mark - Tests
+
+- (void)testSuccessfulFullRun {
+  // Shape for both input and output tensor.
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+
+  // Creates the interpreter options.
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  XCTAssertNotNil(options);
+  options.numberOfThreads = 2;
+
+  // Creates the interpreter.
+  TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath
+                                                                        options:options];
+  XCTAssertNotNil(customInterpreter);
+
+  // Allocates memory for tensors.
+  NSError *error;
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies input and output tensor counts.
+  XCTAssertEqual(customInterpreter.inputTensorCount, 1);
+  XCTAssertEqual(customInterpreter.outputTensorCount, 1);
+
+  // Resizes the intput tensor.
+  XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertNil(error);
+
+  // Re-allocates memory for tensors.
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the input tensor.
+  TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(inputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([inputTensor.name isEqualToString:@"input"]);
+  XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32);
+  XCTAssertTrue([shape isEqualToArray:inputTensor.shape]);
+  XCTAssertEqual(inputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
+
+  // Copies the input data.
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:sizeof(float)];
+  XCTAssertTrue([customInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
+  XCTAssertNil(error);
+
+  // Invokes the interpreter.
+  XCTAssertTrue([customInterpreter invokeWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the output tensor.
+  TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(outputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([outputTensor.name isEqualToString:@"output"]);
+  XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32);
+  XCTAssertTrue([shape isEqualToArray:outputTensor.shape]);
+  XCTAssertEqual(outputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
+
+  // Tries to query an invalid output tensor index.
+  TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex
+                                                                    error:&error];
+  XCTAssertNil(invalidOutputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+
+  // Gets the output tensor data.
+  error = nil;
+  NSData *outputData = [customInterpreter dataFromOutputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(outputData);
+  XCTAssertNil(error);
+  float output[kAddModelTensorFirstDimensionSize];
+  [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)];
+  XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy);
+  XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy);
+}
+
+- (void)testInitWithModelPath_invalidPath {
+  // Shape for both input and output tensor.
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+
+  // Creates the interpreter.
+  TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath"];
+  XCTAssertNotNil(brokenInterpreter);
+  XCTAssertEqual(brokenInterpreter.inputTensorCount, 0);
+  XCTAssertEqual(brokenInterpreter.outputTensorCount, 0);
+
+  // Allocates memory for tensors.
+  NSError *error;
+  XCTAssertFalse([brokenInterpreter allocateTensorsWithError:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Resizes the intput tensor.
+  XCTAssertFalse([brokenInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Verifies the input tensor.
+  TFLTensor *inputTensor = [brokenInterpreter inputTensorAtIndex:0 error:&error];
+  XCTAssertNil(inputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Copies the input data.
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:sizeof(float)];
+  XCTAssertFalse([brokenInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Invokes the interpreter.
+  XCTAssertFalse([brokenInterpreter invokeWithError:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Verifies the output tensor.
+  TFLTensor *outputTensor = [brokenInterpreter outputTensorAtIndex:0 error:&error];
+  XCTAssertNil(outputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Gets the output tensor data.
+  NSData *outputData = [brokenInterpreter dataFromOutputTensorAtIndex:0 error:&error];
+  XCTAssertNil(outputData);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+}
+
+- (void)testInvoke_beforeAllocation {
+  TFLInterpreter *interpreterWithoutAllocation =
+      [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
+  XCTAssertNotNil(interpreterWithoutAllocation);
+
+  NSError *error;
+  XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke);
+}
+
+- (void)testInputTensorAtIndex_invalidIndex {
+  NSError *error;
+  TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex
+                                                          error:&error];
+  XCTAssertNil(inputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+}
+
+- (void)testResizeInputTensorAtIndex_invalidIndex {
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex
+                                                    toShape:shape
+                                                      error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+}
+
+- (void)testResizeInputTensorAtIndex_emptyShape {
+  NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
+}
+
+- (void)testResizeInputTensorAtIndex_zeroDimensionSize {
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:0];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
+}
+
+- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize {
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:(sizeof(float) - 1)];
+  NSError *error;
+  XCTAssertFalse([self.interpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index c6ef82ccdc..31b68c8f00 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -105,6 +105,7 @@ BLACKLIST = [
     "//tensorflow/contrib/timeseries/python/timeseries:test_utils",
     "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils",  # pylint:disable=line-too-long
     "//tensorflow/contrib/image:sparse_image_warp_test_data",
+    "//tools/build_defs/apple:ios.bzl",
 ]
 
 
-- 
GitLab


From 5be479930d3dcfa3edb863703b1d73b89d45f03c Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Tue, 9 Oct 2018 17:19:24 -0700
Subject: [PATCH 1340/1357] [XLA:GPU] Use CudnnConvKind in more places.

No functional change.

PiperOrigin-RevId: 216451881
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  1 +
 .../service/gpu/cudnn_convolution_runner.cc   | 99 ++++++++++---------
 .../xla/service/gpu/pad_for_tensor_cores.cc   | 84 +++++++++-------
 .../compiler/xla/service/gpu/pad_insertion.cc | 31 +++---
 4 files changed, 116 insertions(+), 99 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 0144d59097..62da43d68a 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -591,6 +591,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
         "//tensorflow/compiler/xla/service:hlo_creation_utils",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/compiler/xla/service:shape_inference",
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
index 89dd1bb272..a809c22b33 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
@@ -312,11 +312,12 @@ StatusOr<CudnnConvParams> GetCudnnConvParams(
 
   TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
                       conv->backend_config<CudnnConvBackendConfig>());
-  const auto& target = conv->custom_call_target();
+  TF_ASSIGN_OR_RETURN(CudnnConvKind kind, GetCudnnConvKind(conv));
   const auto& lhs_shape = conv->operand(0)->shape();
   const auto& rhs_shape = conv->operand(1)->shape();
   const auto& conv_result_shape = conv->shape().tuple_shapes(0);
 
+  params.kind = kind;
   params.window = &conv->window();
   params.dnums = &conv->convolution_dimension_numbers();
   params.feature_group_count = conv->feature_group_count();
@@ -324,55 +325,55 @@ StatusOr<CudnnConvParams> GetCudnnConvParams(
       backend_config.algorithm(), backend_config.tensor_ops_enabled()));
   params.conv_result_scale = backend_config.conv_result_scale();
 
-  if (target == kCudnnConvForwardCallTarget) {
-    params.kind = CudnnConvKind::kForward;
-    params.input_shape = &lhs_shape;
-    params.filter_shape = &rhs_shape;
-    params.output_shape = &conv_result_shape;
-    params.input_buf = operand_buffers[0];
-    params.filter_buf = operand_buffers[1];
-    params.output_buf = result_buffer;
-  } else if (target == kCudnnConvBackwardInputCallTarget) {
-    params.kind = CudnnConvKind::kBackwardInput;
-    params.input_shape = &conv_result_shape;
-    params.filter_shape = &rhs_shape;
-    params.output_shape = &lhs_shape;
-    params.input_buf = result_buffer;
-    params.filter_buf = operand_buffers[1];
-    params.output_buf = operand_buffers[0];
-  } else if (target == kCudnnConvBackwardFilterCallTarget) {
-    params.kind = CudnnConvKind::kBackwardFilter;
-    params.input_shape = &lhs_shape;
-    params.filter_shape = &conv_result_shape;
-    params.output_shape = &rhs_shape;
-    params.input_buf = operand_buffers[0];
-    params.filter_buf = result_buffer;
-    params.output_buf = operand_buffers[1];
-  } else if (target == kCudnnConvBiasActivationForwardCallTarget) {
-    params.kind = CudnnConvKind::kForwardActivation;
-    params.input_shape = &lhs_shape;
-    params.filter_shape = &rhs_shape;
-    params.output_shape = &conv_result_shape;
-    params.fusion.emplace();
-    auto& fusion = *params.fusion;
-    if (backend_config.activation_mode() <
-        static_cast<int64>(se::dnn::ActivationMode::kNumActivationModes)) {
-      fusion.mode = static_cast<se::dnn::ActivationMode>(
-          backend_config.activation_mode());
-    } else {
-      return InternalError("Bad activation mode: %s",
-                           backend_config.ShortDebugString());
-    }
-    fusion.side_input_scale = backend_config.side_input_scale();
-    params.input_buf = operand_buffers[0];
-    params.filter_buf = operand_buffers[1];
-    params.output_buf = result_buffer;
-    params.fusion->bias_buf = operand_buffers[2];
-    if (operand_buffers.size() >= 4) {
-      params.fusion->side_input_buf = operand_buffers[3];
+  switch (kind) {
+    case CudnnConvKind::kForward:
+      params.input_shape = &lhs_shape;
+      params.filter_shape = &rhs_shape;
+      params.output_shape = &conv_result_shape;
+      params.input_buf = operand_buffers[0];
+      params.filter_buf = operand_buffers[1];
+      params.output_buf = result_buffer;
+      break;
+    case CudnnConvKind::kBackwardInput:
+      params.input_shape = &conv_result_shape;
+      params.filter_shape = &rhs_shape;
+      params.output_shape = &lhs_shape;
+      params.input_buf = result_buffer;
+      params.filter_buf = operand_buffers[1];
+      params.output_buf = operand_buffers[0];
+      break;
+    case CudnnConvKind::kBackwardFilter:
+      params.input_shape = &lhs_shape;
+      params.filter_shape = &conv_result_shape;
+      params.output_shape = &rhs_shape;
+      params.input_buf = operand_buffers[0];
+      params.filter_buf = result_buffer;
+      params.output_buf = operand_buffers[1];
+      break;
+    case CudnnConvKind::kForwardActivation: {
+      params.kind = CudnnConvKind::kForwardActivation;
+      params.input_shape = &lhs_shape;
+      params.filter_shape = &rhs_shape;
+      params.output_shape = &conv_result_shape;
+      params.fusion.emplace();
+      auto& fusion = *params.fusion;
+      if (backend_config.activation_mode() <
+          static_cast<int64>(se::dnn::ActivationMode::kNumActivationModes)) {
+        fusion.mode = static_cast<se::dnn::ActivationMode>(
+            backend_config.activation_mode());
+      } else {
+        return InternalError("Bad activation mode: %s",
+                             backend_config.ShortDebugString());
+      }
+      fusion.side_input_scale = backend_config.side_input_scale();
+      params.input_buf = operand_buffers[0];
+      params.filter_buf = operand_buffers[1];
+      params.output_buf = result_buffer;
+      params.fusion->bias_buf = operand_buffers[2];
+      if (operand_buffers.size() >= 4) {
+        params.fusion->side_input_buf = operand_buffers[3];
+      }
     }
-  } else {
-    return InternalError("Unexpected custom call target: %s", target);
   }
   return params;
 }
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
index e3869b5c36..8f1f5a7bf5 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
@@ -105,38 +105,45 @@ static HloInstruction* PadInstruction(HloInstruction* instr,
 
 // Pads the input/output feature dimensions of the given cudnn convolution
 // custom-call to be multiples of kDesiredNumFeaturesFactor.
-static StatusOr<bool> PadFeaturesDims(HloInstruction* conv) {
+static StatusOr<bool> PadFeaturesDims(HloCustomCallInstruction* conv) {
   CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0))
       << "conv must use 0 scratch bytes, i.e. this pass must be run "
          "before CudnnConvolutionAlgorithmPicker.";
 
-  const auto& target = conv->custom_call_target();
+  TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv));
   const auto& dnums = conv->convolution_dimension_numbers();
   auto* lhs = conv->mutable_operand(0);
   auto* rhs = conv->mutable_operand(1);
   const Shape& result_shape = conv->shape().tuple_shapes(0);
 
   Shape new_lhs_shape = [&] {
-    if (target == kCudnnConvForwardCallTarget ||
-        target == kCudnnConvBackwardFilterCallTarget) {
-      // LHS is "input".
-      return PadShape(lhs->shape(), {dnums.input_feature_dimension()});
+    switch (kind) {
+      case CudnnConvKind::kForward:
+      case CudnnConvKind::kBackwardFilter:
+        // LHS is "input".
+        return PadShape(lhs->shape(), {dnums.input_feature_dimension()});
+      case CudnnConvKind::kBackwardInput:
+        // LHS is "output".
+        return PadShape(lhs->shape(), {dnums.output_feature_dimension()});
+      case CudnnConvKind::kForwardActivation:
+        LOG(FATAL) << "Not yet implemented.";
     }
-    CHECK_EQ(target, kCudnnConvBackwardInputCallTarget);
-    // LHS is "output".
-    return PadShape(lhs->shape(), {dnums.output_feature_dimension()});
   }();
 
   Shape new_rhs_shape = [&] {
-    if (target == kCudnnConvForwardCallTarget ||
-        target == kCudnnConvBackwardInputCallTarget) {
-      // RHS is "filter".
-      return PadShape(rhs->shape(), {dnums.kernel_input_feature_dimension(),
-                                     dnums.kernel_output_feature_dimension()});
+    switch (kind) {
+      case CudnnConvKind::kForward:
+      case CudnnConvKind::kBackwardInput:
+        // RHS is "filter".
+        return PadShape(rhs->shape(),
+                        {dnums.kernel_input_feature_dimension(),
+                         dnums.kernel_output_feature_dimension()});
+      case CudnnConvKind::kBackwardFilter:
+        // RHS is "output".
+        return PadShape(rhs->shape(), {dnums.output_feature_dimension()});
+      case CudnnConvKind::kForwardActivation:
+        LOG(FATAL) << "Not yet implemented.";
     }
-    CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget);
-    // RHS is "output".
-    return PadShape(rhs->shape(), {dnums.output_feature_dimension()});
   }();
 
   if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) &&
@@ -146,18 +153,21 @@ static StatusOr<bool> PadFeaturesDims(HloInstruction* conv) {
   }
 
   Shape new_result_shape = [&] {
-    if (target == kCudnnConvForwardCallTarget) {
-      // Result is "output".
-      return PadShape(result_shape, {dnums.output_feature_dimension()});
+    switch (kind) {
+      case CudnnConvKind::kForward:
+        // Result is "output".
+        return PadShape(result_shape, {dnums.output_feature_dimension()});
+      case CudnnConvKind::kBackwardInput:
+        // Result is "input".
+        return PadShape(result_shape, {dnums.input_feature_dimension()});
+      case CudnnConvKind::kBackwardFilter:
+        // Result is "filter".
+        return PadShape(result_shape,
+                        {dnums.kernel_input_feature_dimension(),
+                         dnums.kernel_output_feature_dimension()});
+      case CudnnConvKind::kForwardActivation:
+        LOG(FATAL) << "Not yet implemented.";
     }
-    if (target == kCudnnConvBackwardInputCallTarget) {
-      // Result is "input".
-      return PadShape(result_shape, {dnums.input_feature_dimension()});
-    }
-    CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget);
-    // Result is "filter".
-    return PadShape(result_shape, {dnums.kernel_input_feature_dimension(),
-                                   dnums.kernel_output_feature_dimension()});
   }();
 
   // Check that padding wouldn't increase the total bytes read/written by this
@@ -223,16 +233,20 @@ static StatusOr<bool> PadFeaturesDims(HloInstruction* conv) {
   return true;
 }
 
-static std::vector<HloInstruction*> GetRelevantConvs(HloComputation* comp) {
-  std::vector<HloInstruction*> convs;
+static std::vector<HloCustomCallInstruction*> GetRelevantConvs(
+    HloComputation* comp) {
+  std::vector<HloCustomCallInstruction*> convs;
   for (HloInstruction* instr : comp->instructions()) {
-    if (IsCustomCallToDnnConvolution(*instr) &&
-        instr->operand(0)->shape().element_type() == F16 &&
+    if (!IsCustomCallToDnnConvolution(*instr)) {
+      continue;
+    }
+    auto* custom_call = Cast<HloCustomCallInstruction>(instr);
+    if (custom_call->operand(0)->shape().element_type() == F16 &&
         // TODO(timshen): Disable for fused conv for now. Implement it if it's
         // needed.
-        Cast<HloCustomCallInstruction>(instr)->custom_call_target() !=
+        custom_call->custom_call_target() !=
             kCudnnConvBiasActivationForwardCallTarget) {
-      convs.push_back(instr);
+      convs.push_back(custom_call);
     }
   }
   return convs;
@@ -241,7 +255,7 @@ static std::vector<HloInstruction*> GetRelevantConvs(HloComputation* comp) {
 StatusOr<bool> PadForTensorCores::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* comp : module->MakeNonfusionComputations()) {
-    for (HloInstruction* conv : GetRelevantConvs(comp)) {
+    for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) {
       TF_ASSIGN_OR_RETURN(bool result, PadFeaturesDims(conv));
       changed |= result;
     }
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index b42a19e3a2..ae7abca7c6 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_creation_utils.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -378,25 +379,25 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution(
 
 StatusOr<bool> PadInsertion::RunOnComputation(HloComputation* computation) {
   bool changed = false;
-  std::vector<HloInstruction*> convs;
+  std::vector<HloCustomCallInstruction*> convs;
   for (auto* instr : computation->instructions()) {
     if (IsCustomCallToDnnConvolution(*instr)) {
-      convs.push_back(instr);
+      convs.push_back(Cast<HloCustomCallInstruction>(instr));
     }
   }
-  for (HloInstruction* instruction : convs) {
-    const auto& target = instruction->custom_call_target();
-    if (target == kCudnnConvForwardCallTarget ||
-        target == kCudnnConvBiasActivationForwardCallTarget) {
-      changed |= CanonicalizeForwardConvolution(instruction);
-    } else if (target == kCudnnConvBackwardFilterCallTarget) {
-      changed |= CanonicalizeBackwardFilterConvolution(instruction);
-    } else if (target == kCudnnConvBackwardInputCallTarget) {
-      changed |= CanonicalizeBackwardInputConvolution(instruction);
-    } else {
-      LOG(FATAL) << "Unknown custom call target for cudnn conv: "
-                 << instruction->ToString();
-    }
+  for (HloCustomCallInstruction* instruction : convs) {
+    TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(instruction));
+    changed |= [&] {
+      switch (kind) {
+        case CudnnConvKind::kForward:
+        case CudnnConvKind::kForwardActivation:
+          return CanonicalizeForwardConvolution(instruction);
+        case CudnnConvKind::kBackwardInput:
+          return CanonicalizeBackwardInputConvolution(instruction);
+        case CudnnConvKind::kBackwardFilter:
+          return CanonicalizeBackwardFilterConvolution(instruction);
+      }
+    }();
   }
   return changed;
 }
-- 
GitLab


From ee1cb110360b12d752c9cb4ebbb76d33930f67d7 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Tue, 9 Oct 2018 17:23:45 -0700
Subject: [PATCH 1341/1357] Move tflite_convert g3docs, so they will be pulled
 into the site.

PiperOrigin-RevId: 216452447
---
 tensorflow/contrib/lite/g3doc/_book.yaml      |  9 ++++
 .../tflite_convert}/cmdline_examples.md       | 54 ++++++++-----------
 .../tflite_convert}/cmdline_reference.md      | 17 ++----
 .../lite/g3doc/tflite_convert/index.md        | 22 ++++++++
 .../tflite_convert}/python_api.md             | 29 ++--------
 .../tflite_convert}/toco_landscape.svg        |  0
 tensorflow/contrib/lite/toco/g3doc/README.md  |  3 ++
 7 files changed, 63 insertions(+), 71 deletions(-)
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/cmdline_examples.md (90%)
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/cmdline_reference.md (93%)
 create mode 100644 tensorflow/contrib/lite/g3doc/tflite_convert/index.md
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/python_api.md (89%)
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/toco_landscape.svg (100%)
 create mode 100644 tensorflow/contrib/lite/toco/g3doc/README.md

diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml
index de6914e536..f6ec387ad2 100644
--- a/tensorflow/contrib/lite/g3doc/_book.yaml
+++ b/tensorflow/contrib/lite/g3doc/_book.yaml
@@ -38,6 +38,15 @@ upper_tabs:
         path: /lite/ios
       - title: TensorFlow Lite for Raspberry Pi
         path: /lite/rpi
+      - heading: TFLite Converter
+      - title: Overview
+        path: /lite/tflite_convert/
+      - title: Python API
+        path: /lite/tflite_convert/python_api
+      - title: Command Line Examples
+        path: /lite/tflite_convert/cmdline_examples
+      - title: Command Line Reference
+        path: /lite/tflite_convert/cmdline_reference
 
       - title: TF Mobile
         style: accordion
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
similarity index 90%
rename from tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
rename to tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
index e3c46eb377..d88acfae80 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
@@ -1,33 +1,8 @@
 # TensorFlow Lite Converter command-line examples
 
-This page shows how to use the TensorFlow Lite Converter in the command line. It
-is complemented by the following documents:
-
-*   [README](../README.md)
-*   [Command-line glossary](cmdline_reference.md)
-*   [Python API examples](python_api.md)
-
-Table of contents:
-
-*   [Command-line tools](#tools)
-    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
-*   [Basic examples](#basic)
-    *   [Convert a TensorFlow GraphDef](#graphdef)
-    *   [Convert a TensorFlow SavedModel](#savedmodel)
-    *   [Convert a tf.keras model](#keras)
-*   [Quantization](#quantization)
-    *   [Convert a TensorFlow GraphDef for quantized inference](#graphdef-quant)
-    *   [Use "dummy-quantization" to try out quantized inference on a float
-        graph](#dummy-quant)
-*   [Specifying input and output arrays](#specifying-input-and-output-arrays)
-    *   [Multiple input arrays](#multiple-input-arrays)
-    *   [Multiple output arrays](#multiple-output-arrays)
-    *   [Specifying subgraphs](#specifying-subgraphs)
-*   [Graph visualizations](#graph-visualizations)
-    *   [Using --output_format=GRAPHVIZ_DOT](#using-output-format-graphviz-dot)
-    *   [Using --dump_graphviz_dir](#using-dump-graphviz-dir)
-    *   [Graph "video" logging](#graph-video-logging)
-    *   [Legend for the graph visualizations](#graphviz-legend)
+This page shows how to use the TensorFlow Lite Converter in the command line.
+
+[TOC]
 
 ## Command-line tools <a name="tools"></a>
 
@@ -325,10 +300,23 @@ As before, these can be rendered to PDFs:
 dot -Tpdf -O /tmp/toco_*.dot
 ```
 
-Sample output files can be seen here:
-
-*   [toco_AT_IMPORT.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf)
-*   [toco_AFTER_TRANSFORMATIONS.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf).
+Sample output files can be seen here below. Note that it is the same
+`AveragePool` node in the top right of each image.
+
+<table><tr>
+  <td>
+    <a target="_blank" href="https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf">
+      <img src="https://www.tensorflow.org/images/tflite_convert/tflite_convert_before.png"/>
+    </a>
+  </td>
+  <td>
+    <a target="_blank" href="https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf">
+      <img src="https://www.tensorflow.org/images/tflite_convert/tflite_convert_after.png"/>
+    </a>
+  </td>
+</tr>
+<tr><td>before</td><td>after</td></tr>
+</table>
 
 ### Graph "video" logging
 
@@ -347,7 +335,7 @@ change was introduced in the graph.
     *   Some typically heavy operators (e.g. Conv) are rendered in a
         <span style="background-color:#c53929;color:white;border:1px;border-style:solid;border-color:black;padding:1px">darker
         red</span>.
-*   Arrays are octogons with the following colors:
+*   Arrays are octagons with the following colors:
     *   Constant arrays are
         <span style="background-color:#4285f4;color:white;border:1px;border-style:solid;border-color:black;padding:1px">blue</span>.
     *   Activation arrays are gray:
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
similarity index 93%
rename from tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
rename to tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
index 31200fd657..d65912fea6 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
@@ -2,18 +2,9 @@
 
 This page is complete reference of command-line flags used by the TensorFlow
 Lite Converter's command line starting from TensorFlow 1.9 up until the most
-recent build of TensorFlow. It is complemented by the following other documents:
+recent build of TensorFlow.
 
-*   [README](../README.md)
-*   [Command-line examples](cmdline_examples.md)
-*   [Python API examples](python_api.md)
-
-Table of contents:
-
-*   [High-level flags](#high-level-flags)
-*   [Model flags](#model-flags)
-*   [Transformation flags](#transformation-flags)
-*   [Logging flags](#logging-flags)
+[TOC]
 
 ## High-level flags
 
@@ -32,7 +23,7 @@ files. The flag `--output_file` is always required. Additionally, either
 *   `--output_format`. Type: string. Default: `TFLITE`. Specifies the format of
     the output file. Allowed values:
     *   `TFLITE`: TensorFlow Lite FlatBuffer format.
-    *   `GRAPHVIZ_DOT`: GraphViz `.dot` format containg a visualization of the
+    *   `GRAPHVIZ_DOT`: GraphViz `.dot` format containing a visualization of the
         graph after graph transformations.
         *   Note that passing `GRAPHVIZ_DOT` to `--output_format` leads to loss
             of TFLite specific transformations. Therefore, the resulting
@@ -68,7 +59,7 @@ based on index.
 *   `--input_shapes`. Type: colon-separated list of comma-separated lists of
     integers. Each comma-separated list of integers gives the shape of one of
     the input arrays specified in
-    [TensorFlow convention](https://www.tensorflow.org/versions/r1.2/programmers_guide/dims_types#shape).
+    [TensorFlow convention](https://www.tensorflow.org/guide/dims_types#shape).
     *   Example: `--input_shapes=1,60,80,3` for a typical vision model means a
         batch size of 1, an input image height of 60, an input image width of
         80, and an input image depth of 3 (representing RGB channels).
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/index.md b/tensorflow/contrib/lite/g3doc/tflite_convert/index.md
new file mode 100644
index 0000000000..12ba0225f6
--- /dev/null
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/index.md
@@ -0,0 +1,22 @@
+# TensorFlow Lite Converter
+
+The TensorFlow Lite Converter converts TensorFlow graphs into
+TensorFlow Lite graphs. There are additional usages that are also detailed in
+the usage documentation.
+
+
+## Where the converter fits in the TensorFlow landscape
+
+Once an application developer has a trained TensorFlow model, the TensorFlow
+Lite Converter will accept
+that model and generate a TensorFlow Lite
+[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports
+[SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators),
+frozen graphs (models generated via
+[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)),
+and `tf.Keras` model files.  The TensorFlow Lite FlatBuffer file can be shipped
+to client devices, generally mobile devices, where the TensorFlow Lite
+interpreter handles them on-device.  This flow is represented in the diagram
+below.
+
+![drawing](toco_landscape.svg)
diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
similarity index 89%
rename from tensorflow/contrib/lite/toco/g3doc/python_api.md
rename to tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
index 1f741360c6..e1c0e0c240 100644
--- a/tensorflow/contrib/lite/toco/g3doc/python_api.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
@@ -1,31 +1,10 @@
 # TensorFlow Lite Converter & Interpreter Python API reference
 
 This page provides examples on how to use the TensorFlow Lite Converter and the
-TensorFlow Lite interpreter using the Python API. It is complemented by the
-following documents:
-
-*   [README](../README.md)
-*   [Command-line examples](cmdline_examples.md)
-*   [Command-line glossary](cmdline_reference.md)
-
-Table of contents:
-
-*   [High-level overview](#high-level-overview)
-*   [API](#api)
-*   [Basic examples](#basic)
-    *   [Exporting a GraphDef from tf.Session](#basic-graphdef-sess)
-    *   [Exporting a GraphDef from file](#basic-graphdef-file)
-    *   [Exporting a SavedModel](#basic-savedmodel)
-    *   [Exporting a tf.keras File](#basic-keras-file)
-*   [Complex examples](#complex)
-    *   [Exporting a quantized GraphDef](#complex-quant)
-*   [TensorFlow Lite Python interpreter](#interpreter)
-    *   [Using the interpreter from a model file](#interpreter-file)
-    *   [Using the interpreter from model data](#interpreter-data)
-*   [Additional instructions](#additional-instructions)
-    *   [Build from source code](#latest-package)
-    *   [Converting models in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11)
-    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
+TensorFlow Lite interpreter using the Python API. 
+
+[TOC]
+
 
 ## High-level overview
 
diff --git a/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg b/tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg
similarity index 100%
rename from tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg
rename to tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg
diff --git a/tensorflow/contrib/lite/toco/g3doc/README.md b/tensorflow/contrib/lite/toco/g3doc/README.md
new file mode 100644
index 0000000000..2153b6cc63
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/g3doc/README.md
@@ -0,0 +1,3 @@
+# TOCO
+
+These files have moved to [../../g3doc/tflite_convert](../../g3doc/tflite_convert)
-- 
GitLab


From eaebeb1d4d939fb9fd0b75e32a76151cb517bfb6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:24:11 -0700
Subject: [PATCH 1342/1357] Update ops-related pbtxt files.

PiperOrigin-RevId: 216452496
---
 tensorflow/core/ops/ops.pbtxt | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 05b97bffad..a8da95dea3 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -15116,6 +15116,22 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "LookupTableRemoveV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  is_stateful: true
+}
 op {
   name: "LookupTableSize"
   input_arg {
@@ -17701,6 +17717,10 @@ op {
     name: "empty_key"
     type_attr: "key_dtype"
   }
+  input_arg {
+    name: "deleted_key"
+    type_attr: "key_dtype"
+  }
   output_arg {
     name: "table_handle"
     type: DT_RESOURCE
-- 
GitLab


From f0784e69761ef5b78480e9e8b1fd1aa558186646 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:35:41 -0700
Subject: [PATCH 1343/1357] Add support for modeling fast memory close to the
 processor/gpu

PiperOrigin-RevId: 216453979
---
 .../core/grappler/costs/cost_estimator.h      | 38 +++++++++-
 .../grappler/costs/op_level_cost_estimator.cc | 76 +++++++++++++------
 .../grappler/costs/op_level_cost_estimator.h  |  3 +-
 .../core/grappler/costs/virtual_scheduler.cc  | 31 +++++---
 4 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index 569d9da683..811e923b87 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -31,8 +31,37 @@ constexpr int64 kMemoryUnknown = -1ll;
 constexpr int64 kZeroMemory = 0ll;
 
 struct DeviceInfo {
-  double gigaops;     // Billions of operations executed per second.
-  double gb_per_sec;  // Bandwidth to main memory in GB per second.
+  // Billions of operations executed per second.
+  double gigaops;
+
+  // Bandwidth to main memory in GB per second.
+  double gb_per_sec;
+
+  // Read bandwidth to intermediate memory in GB per second.
+  double intermediate_read_gb_per_sec;
+
+  // Read bandwidth to intermediate memory in GB per second.
+  double intermediate_write_gb_per_sec;
+
+  DeviceInfo()
+      : gigaops(INFINITY),
+        gb_per_sec(INFINITY),
+        intermediate_read_gb_per_sec(INFINITY),
+        intermediate_write_gb_per_sec(INFINITY) {}
+
+  DeviceInfo(const DeviceInfo& input)
+      : gigaops(input.gigaops),
+        gb_per_sec(input.gb_per_sec),
+        intermediate_read_gb_per_sec(input.intermediate_read_gb_per_sec),
+        intermediate_write_gb_per_sec(input.intermediate_write_gb_per_sec) {}
+
+  DeviceInfo(double gigaops, double gb_per_sec,
+             double intermediate_read_gb_per_sec = INFINITY,
+             double intermediate_write_gb_per_sec = INFINITY)
+      : gigaops(gigaops),
+        gb_per_sec(gb_per_sec),
+        intermediate_read_gb_per_sec(intermediate_read_gb_per_sec),
+        intermediate_write_gb_per_sec(intermediate_write_gb_per_sec) {}
 };
 
 // Holds the set of things we might want to estimate or measure in Grappler.
@@ -101,6 +130,9 @@ struct Costs {
   // Memory access cost of running the graph.
   Duration memory_time;
 
+  // Intermediate memory access cost of running the graph
+  Duration intermediate_memory_time;
+
   // This field can be a very pessimistic estimate of the main memory
   // requirements of a graph. For example, it might assume that all activations
   // are live for all of a graph's execution.
@@ -146,6 +178,7 @@ Costs::Costs() {
   execution_time = Duration::zero();
   compute_time = Duration::zero();
   memory_time = Duration::zero();
+  intermediate_memory_time = Duration::zero();
   max_memory = kMemoryUnknown;
   persistent_memory = kMemoryUnknown;
   temporary_memory = kMemoryUnknown;
@@ -158,6 +191,7 @@ Costs Costs::ZeroCosts() {
   costs.execution_time = Duration::zero();
   costs.compute_time = Duration::zero();
   costs.memory_time = Duration::zero();
+  costs.intermediate_memory_time = Duration::zero();
   costs.max_memory = kZeroMemory;
   costs.persistent_memory = kZeroMemory;
   costs.temporary_memory = kZeroMemory;
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index f363f2915f..76e5c989fc 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -420,7 +420,7 @@ DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
   DCHECK_LT(0, gflops) << device.DebugString();
   DCHECK_LT(0, gb_per_sec) << device.DebugString();
 
-  return {gflops, gb_per_sec};
+  return DeviceInfo(gflops, gb_per_sec);
 }
 
 Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const {
@@ -478,8 +478,8 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
   bool unknown_shapes = false;
   const double input_size = CalculateInputSize(op_info, &unknown_shapes);
   const double output_size = CalculateOutputSize(op_info, &unknown_shapes);
-  const double total_io_bytes = input_size + output_size;
-  Costs costs = PredictOpCountBasedCost(operations, total_io_bytes, op_info);
+  Costs costs =
+      PredictOpCountBasedCost(operations, input_size, output_size, op_info);
   costs.inaccurate = unknown_shapes;
   costs.num_ops_with_unknown_shapes = unknown_shapes;
   costs.max_memory = output_size;
@@ -487,9 +487,13 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
 }
 
 Costs OpLevelCostEstimator::PredictOpCountBasedCost(
-    double operations, double total_io_bytes, const OpInfo& op_info) const {
+    double operations, double input_io_bytes, double output_io_bytes,
+    const OpInfo& op_info) const {
+  double total_io_bytes = input_io_bytes + output_io_bytes;
   const DeviceInfo device_info = GetDeviceInfo(op_info.device());
-  if (device_info.gigaops <= 0 || device_info.gb_per_sec <= 0) {
+  if (device_info.gigaops <= 0 || device_info.gb_per_sec <= 0 ||
+      device_info.intermediate_read_gb_per_sec <= 0 ||
+      device_info.intermediate_write_gb_per_sec <= 0) {
     VLOG(1) << "BAD DEVICE. Op:" << op_info.op()
             << " device type:" << op_info.device().type()
             << " device model:" << op_info.device().model();
@@ -504,9 +508,29 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
   VLOG(1) << "Op:" << op_info.op() << " Size (KB):" << (total_io_bytes) / 1e3
           << " Memory Time (ns):" << memory_cost.count();
 
+  // Check if bytes > 0.  If it's not and the bandwidth is set to infinity
+  // then the result would be undefined.
+  double intermediate_read_time =
+      (input_io_bytes > 0)
+          ? std::ceil(input_io_bytes / device_info.intermediate_read_gb_per_sec)
+          : 0;
+
+  double intermediate_write_time =
+      (output_io_bytes > 0)
+          ? std::ceil(output_io_bytes /
+                      device_info.intermediate_write_gb_per_sec)
+          : 0;
+
+  Costs::NanoSeconds intermediate_memory_cost(intermediate_read_time +
+                                              intermediate_write_time);
+  VLOG(1) << "Op:" << op_info.op() << " Size (KB):" << (total_io_bytes) / 1e3
+          << " Intermediate Memory Time (ns):"
+          << intermediate_memory_cost.count();
+
   Costs costs;
   costs.compute_time = compute_cost;
   costs.memory_time = memory_cost;
+  costs.intermediate_memory_time = intermediate_memory_cost;
   CombineCostsAndUpdateExecutionTime(&costs);
   return costs;
 }
@@ -1273,8 +1297,8 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice(
         CalculateTensorElementCount(op_info.inputs(1), &unknown_shapes);
   }
 
-  const double total_io = input_size + output_size;
-  Costs costs = PredictOpCountBasedCost(op_count, total_io, op_info);
+  Costs costs =
+      PredictOpCountBasedCost(op_count, input_size, output_size, op_info);
   costs.inaccurate = unknown_shapes;
   costs.num_ops_with_unknown_shapes = unknown_shapes;
   costs.max_memory = output_size;
@@ -1291,12 +1315,15 @@ Costs OpLevelCostEstimator::PredictFusedOp(
   // operations here; so we simply add the compute times of each component
   // operation, then update the execution time.
   Costs fused_cost = PredictOpCountBasedCost(0, op_context.op_info);
+
   fused_cost.compute_time = 0;
   fused_cost.inaccurate = false;
   for (auto& fused_op : fused_op_contexts) {
     auto op_cost = PredictCosts(fused_op);
+
     fused_cost.compute_time += op_cost.compute_time;
     fused_cost.inaccurate |= op_cost.inaccurate;
+    fused_cost.intermediate_memory_time += op_cost.intermediate_memory_time;
   }
 
   CombineCostsAndUpdateExecutionTime(&fused_cost);
@@ -1415,8 +1442,8 @@ Costs OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context) const {
   const double total_output_size =
       CalculateOutputSize(op_info, &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1458,8 +1485,8 @@ Costs OpLevelCostEstimator::PredictMaxPoolGrad(
   const double total_output_size =
       CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1491,8 +1518,8 @@ Costs OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context) const {
   const double total_output_size =
       CalculateOutputSize(op_info, &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1544,8 +1571,8 @@ Costs OpLevelCostEstimator::PredictAvgPoolGrad(
   const double total_output_size =
       CalculateOutputSize(op_info, &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1590,9 +1617,9 @@ Costs OpLevelCostEstimator::PredictFusedBatchNorm(
     total_output_size = size_nhwc;
   }
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size + total_internal_read_size,
-      op_info);
+  Costs costs =
+      PredictOpCountBasedCost(ops, total_input_size + total_internal_read_size,
+                              total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1624,9 +1651,9 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad(
   double total_internal_read_size = size_nhwc;
   double total_output_size = size_nhwc * 1 + size_c * 2;
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size + total_internal_read_size,
-      op_info);
+  Costs costs =
+      PredictOpCountBasedCost(ops, total_input_size + total_internal_read_size,
+                              total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1637,9 +1664,12 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad(
 void OpLevelCostEstimator::CombineCostsAndUpdateExecutionTime(
     Costs* costs) const {
   if (compute_memory_overlap_) {
-    costs->execution_time = std::max(costs->compute_time, costs->memory_time);
+    costs->execution_time =
+        std::max(costs->intermediate_memory_time,
+                 std::max(costs->compute_time, costs->memory_time));
   } else {
-    costs->execution_time = costs->compute_time + costs->memory_time;
+    costs->execution_time = costs->compute_time + costs->memory_time +
+                            costs->intermediate_memory_time;
   }
 }
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index dd1ee39cb2..84dd9213f7 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -54,7 +54,8 @@ class OpLevelCostEstimator {
   // Naive cost estimate based on the given operations count and the given total
   // io size in bytes. Sizes of op_info inputs and outputs are not taken into
   // consideration.
-  Costs PredictOpCountBasedCost(double operations, double total_io_bytes,
+  Costs PredictOpCountBasedCost(double operations, double input_io_bytes,
+                                double output_io_bytes,
                                 const OpInfo& op_info) const;
 
   // This family of routines counts the number of operations to perform the
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 5b93fb128f..5c5bdad1cb 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -47,6 +47,7 @@ Costs CombineCosts(const Costs& left, const Costs& right) {
   result.execution_time += right.execution_time;
   result.compute_time += right.compute_time;
   result.memory_time += right.memory_time;
+  result.intermediate_memory_time += right.intermediate_memory_time;
 
   result.num_ops_total += right.num_ops_total;
   if (right.inaccurate) result.inaccurate = true;
@@ -825,23 +826,29 @@ Costs VirtualScheduler::Summary() const {
   VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count();
   VLOG(1) << "Expected compute time: " << graph_costs_.compute_time.count();
   VLOG(1) << "Expected memory time: " << graph_costs_.memory_time.count();
+  VLOG(1) << "Expected intermediate memory time: "
+          << graph_costs_.intermediate_memory_time.count();
   VLOG(1) << "Expected max memory: " << graph_costs_.max_memory;
   VLOG(1) << "Expected max per-op buffers: " << graph_costs_.max_per_op_buffers;
   VLOG(1) << "Expected max per-op streaming buffers: "
           << graph_costs_.max_per_op_streaming;
 
-  VLOG(1) << "Per-op execution time / compute time / memory time:";
+  VLOG(1) << "Per-op execution time / compute time / memory time"
+          << " / intermediate memory time:";
   for (const auto& op_cost_pair : op_to_cost_) {
     const auto& op = op_cost_pair.first;
     const auto& cost = op_cost_pair.second.execution_time.count();
     const auto& compute_cost = op_cost_pair.second.compute_time.count();
     const auto& memory_cost = op_cost_pair.second.memory_time.count();
+    const auto& intermediate_memory_cost =
+        op_cost_pair.second.intermediate_memory_time.count();
     const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
     if (cost) {  // Skip printing out zero-cost ops.
       VLOG(1) << strings::Printf(
-          " + %30s : %c %10lld / %10lld / %10lld", op.c_str(),
+          " + %30s : %c %10lld / %10lld / %10lld / %10lld", op.c_str(),
           (is_op_cost_accurate ? ' ' : '~'), static_cast<int64>(cost),
-          static_cast<int64>(compute_cost), static_cast<int64>(memory_cost));
+          static_cast<int64>(compute_cost), static_cast<int64>(memory_cost),
+          static_cast<int64>(intermediate_memory_cost));
     }
   }
 
@@ -894,7 +901,8 @@ Costs VirtualScheduler::Summary() const {
             << " having unknown shapes";
 
     VLOG(1) << "Per-op execution time / compute time / memory time "
-               "(and memory usage at peak memory usage):";
+            << " / intermediate memory time"
+            << " (and memory usage at peak memory usage):";
 
     // Profile non-persistent op memory usage.
     for (const auto& node_port : state.mem_usage_snapshot_at_peak) {
@@ -910,6 +918,8 @@ Costs VirtualScheduler::Summary() const {
       const auto& cost = op_cost_pair.second.execution_time.count();
       const auto& compute_cost = op_cost_pair.second.compute_time.count();
       const auto& memory_cost = op_cost_pair.second.memory_time.count();
+      const auto& intermediate_memory_cost =
+          op_cost_pair.second.intermediate_memory_time.count();
       total_compute_time_ns += op_cost_pair.second.execution_time;
       const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
       if (!is_op_cost_accurate) {
@@ -927,12 +937,13 @@ Costs VirtualScheduler::Summary() const {
                                : 0.0;
       if (cost || mem_usage_percent > 1.0) {
         // Print out only non-zero cost ops or ops with > 1% memory usage.
-        VLOG(1) << strings::Printf(" + %30s : %c %10lld / %10lld / %10lld",
-                                   op.c_str(),
-                                   (is_op_cost_accurate ? ' ' : '~'),
-                                   static_cast<int64>(cost),
-                                   static_cast<int64>(compute_cost),
-                                   static_cast<int64>(memory_cost))
+        VLOG(1) << strings::Printf(
+                       " + %30s : %c %10lld / %10lld / %10lld / %10lld",
+                       op.c_str(), (is_op_cost_accurate ? ' ' : '~'),
+                       static_cast<int64>(cost),
+                       static_cast<int64>(compute_cost),
+                       static_cast<int64>(memory_cost),
+                       static_cast<int64>(intermediate_memory_cost))
                 << " (" << strings::HumanReadableNumBytes(op_mem_usage) << " ["
                 << mem_usage_percent << "%] "
                 << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")");
-- 
GitLab


From 75ee5ee51314feef5654ef315960c26d27d657a5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:46:22 -0700
Subject: [PATCH 1344/1357] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216455250

---
 tensorflow/go/op/wrappers.go | 111 ++++++++++++++++++-----------------
 1 file changed, 56 insertions(+), 55 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index f35117084a..c6ecd75587 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4562,6 +4562,59 @@ func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Out
 	return decoded_indices, decoded_values, decoded_shape, log_probability
 }
 
+// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
+type CTCGreedyDecoderAttr func(optionalAttr)
+
+// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
+//
+// value: If True, merge repeated classes in output.
+// If not specified, defaults to false
+func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
+	}
+}
+
+// Performs greedy decoding on the logits given in inputs.
+//
+// A note about the attribute merge_repeated: if enabled, when
+// consecutive logits' maximum indices are the same, only the first of
+// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
+// becomes "A B B" if merge_repeated = True and "A B B B B" if
+// merge_repeated = False.
+//
+// Regardless of the value of merge_repeated, if the maximum index of a given
+// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
+// element is emitted.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
+//
+// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
+// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
+// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
+// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
+// log-probabilities.
+func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CTCGreedyDecoder",
+		Input: []tf.Input{
+			inputs, sequence_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -18904,10 +18957,11 @@ func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2
 // Arguments:
 //	empty_key: The key used to represent empty key buckets internally. Must not
 // be used in insert or lookup operations.
+//
 //	value_dtype: Type of the table values.
 //
 // Returns Handle to a table.
-func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
+func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18918,7 +18972,7 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D
 	opspec := tf.OpSpec{
 		Type: "MutableDenseHashTableV2",
 		Input: []tf.Input{
-			empty_key,
+			empty_key, deleted_key,
 		},
 		Attrs: attrs,
 	}
@@ -33104,56 +33158,3 @@ func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_va
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1)
 }
-
-// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
-type CTCGreedyDecoderAttr func(optionalAttr)
-
-// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If True, merge repeated classes in output.
-// If not specified, defaults to false
-func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
-	return func(m optionalAttr) {
-		m["merge_repeated"] = value
-	}
-}
-
-// Performs greedy decoding on the logits given in inputs.
-//
-// A note about the attribute merge_repeated: if enabled, when
-// consecutive logits' maximum indices are the same, only the first of
-// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
-// becomes "A B B" if merge_repeated = True and "A B B B B" if
-// merge_repeated = False.
-//
-// Regardless of the value of merge_repeated, if the maximum index of a given
-// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
-// element is emitted.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
-//
-// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
-// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
-// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
-// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
-// log-probabilities.
-func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCGreedyDecoder",
-		Input: []tf.Input{
-			inputs, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-- 
GitLab


From a8cc3cbdeb1563c05d75043c9901135f8b9be65a Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 9 Oct 2018 17:50:47 -0700
Subject: [PATCH 1345/1357] Fix lite/kernels:add_test for Clang 8.0.0

PiperOrigin-RevId: 216455772
---
 tensorflow/contrib/lite/kernels/add_test.cc | 36 ++++++++++-----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc
index 0b58443211..261dd36ef0 100644
--- a/tensorflow/contrib/lite/kernels/add_test.cc
+++ b/tensorflow/contrib/lite/kernels/add_test.cc
@@ -108,7 +108,7 @@ TEST(FloatAddOpModel, ActivationRELU_N1_TO_1) {
 }
 
 TEST(FloatAddOpModel, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -124,7 +124,7 @@ TEST(FloatAddOpModel, VariousInputShapes) {
 }
 
 TEST(FloatAddOpModel, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -161,7 +161,7 @@ TEST(IntegerAddOpModel, ActivationRELU_N1_TO_1) {
 }
 
 TEST(IntegerAddOpModel, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerAddOpModel m({TensorType_INT32, test_shapes[i]},
@@ -176,7 +176,7 @@ TEST(IntegerAddOpModel, VariousInputShapes) {
 }
 
 TEST(IntegerAddOpModel, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerAddOpModel m({TensorType_INT32, test_shapes[i]},
@@ -193,11 +193,11 @@ TEST(IntegerAddOpModel, WithBroadcast) {
 
 TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<std::initializer_list<float>> inputs1 = {
+  std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {
+  std::vector<std::vector<float>> inputs2 = {
       {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {
+  std::vector<std::vector<float>> results = {
       {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
@@ -217,11 +217,11 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
   const float kMin = -1.f;
   const float kMax = 32767.f / 32768.f;
   float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
-  std::vector<std::initializer_list<float>> inputs1 = {
+  std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {
+  std::vector<std::vector<float>> inputs2 = {
       {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {
+  std::vector<std::vector<float>> results = {
       {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax},
@@ -240,12 +240,12 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
 
 TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<std::initializer_list<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
-                                                       {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
-                                                       {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {{-0.2, 0.6, 1.0, -0.1},
-                                                       {-0.2, 0.6, -0.1, 0.8}};
+  std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
+                                             {-0.8, 0.2, 0.7, 0.3}};
+  std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
+                                             {0.6, 0.4, -0.8, 0.5}};
+  std::vector<std::vector<float>> results = {{-0.2, 0.6, 1.0, -0.1},
+                                             {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
                           {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
@@ -262,7 +262,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
 
 TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
@@ -281,7 +281,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
 
 TEST(QuantizedAddOpModel, QuantizedWithBroadcast) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-- 
GitLab


From 2db20be49c660a0c475cb57fe0935791d66433ed Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 17:59:06 -0700
Subject: [PATCH 1346/1357] Enable support for lambda functions in static
 analyses.

The CFG treats lambdas as ordinary expressions. The activity analysis ensures that variables masked by the lambda's arguments are not being tracked.

Note: lambdas do not allow direct modification (we exclude indirect mutation via function or methods).
PiperOrigin-RevId: 216456682
---
 tensorflow/python/autograph/pyct/cfg.py       |  4 --
 tensorflow/python/autograph/pyct/cfg_test.py  | 16 +++++++
 .../pyct/static_analysis/activity.py          | 44 ++++++++++++++++---
 .../pyct/static_analysis/activity_test.py     | 34 ++++++++++++++
 4 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index ec733ea38f..fdfcd4dcc1 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@@ -679,10 +679,6 @@ class AstToCfg(gast.NodeVisitor):
     self.cfgs[node] = self.builder.build()
     self.builder = self.builder_stack.pop()
 
-  def visit_Lambda(self, node):
-    # TODO(mdan): Treat like FunctionDef? That would be a separate CFG.
-    raise NotImplementedError()
-
   def visit_Return(self, node):
     self._process_exit_statement(node, gast.FunctionDef)
 
diff --git a/tensorflow/python/autograph/pyct/cfg_test.py b/tensorflow/python/autograph/pyct/cfg_test.py
index bd82e70f7d..d5870124bc 100644
--- a/tensorflow/python/autograph/pyct/cfg_test.py
+++ b/tensorflow/python/autograph/pyct/cfg_test.py
@@ -964,6 +964,22 @@ class AstToCfgTest(test.TestCase):
         ),
     )
 
+  def test_lambda_basic(self):
+
+    def test_fn(a):
+      a = lambda b: a + b
+      return a
+
+    graph, = self._build_cfg(test_fn).values()
+
+    self.assertGraphMatches(
+        graph,
+        (
+            ('a', 'a = lambda b: a + b', 'return a'),
+            ('a = lambda b: a + b', 'return a', None),
+        ),
+    )
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index cc159031ff..0ce410d522 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -146,8 +146,15 @@ class ActivityAnalyzer(transformer.Base):
   def __init__(self, context, parent_scope=None, add_unknown_symbols=False):
     super(ActivityAnalyzer, self).__init__(context)
     self.scope = Scope(parent_scope, None, add_unknown_symbols)
+
+    # Note: all these flags crucially rely on the respective nodes are
+    # leaves in the AST, that is, they cannot contain other statements.
     self._in_return_statement = False
     self._in_aug_assign = False
+    self._in_lambda = False
+    self._in_function_def_args = False
+
+    self._untracked_symbols = None
 
   @property
   def _in_constructor(self):
@@ -172,6 +179,13 @@ class ActivityAnalyzer(transformer.Base):
       return
     qn = anno.getanno(node, anno.Basic.QN)
 
+    # Ignore any untracked symbols.
+    if self._untracked_symbols:
+      if qn in self._untracked_symbols:
+        return
+      if qn.owner_set & set(self._untracked_symbols):
+        return
+
     if isinstance(node.ctx, gast.Store):
       self.scope.mark_modified(qn)
       if qn.is_composite and composite_writes_alter_parent:
@@ -181,12 +195,20 @@ class ActivityAnalyzer(transformer.Base):
     elif isinstance(node.ctx, gast.Load):
       self.scope.mark_read(qn)
     elif isinstance(node.ctx, gast.Param):
-      # Param contexts appear in function defs, so they have the meaning of
-      # defining a variable.
-      self.scope.mark_modified(qn)
-      self.scope.mark_param(qn, self.enclosing_entities[-1])
+      if self._in_function_def_args:
+        # In function defs have the meaning of defining a variable.
+        self.scope.mark_modified(qn)
+        self.scope.mark_param(qn, self.enclosing_entities[-1])
+      elif self._in_lambda:
+        assert isinstance(self._untracked_symbols, set)
+        self._untracked_symbols.add(qn)
+      else:
+        # TODO(mdan): Is this case even possible?
+        raise NotImplementedError(
+            'Param "{}" outside a function arguments or lambda.'.format(qn))
     else:
-      raise ValueError('Unknown context %s for node %s.' % (type(node.ctx), qn))
+      raise ValueError('Unknown context {} for node "{}".'.format(
+          type(node.ctx), qn))
 
     if self._in_return_statement:
       self.scope.mark_returned(qn)
@@ -294,6 +316,15 @@ class ActivityAnalyzer(transformer.Base):
       self.scope.merge_from(after_child)
     return parent
 
+  def visit_Lambda(self, node):
+    assert not self._in_lambda or self._in_function_def_args
+    self._in_lambda = True
+    self._untracked_symbols = set()
+    node = self.generic_visit(node)
+    self._untracked_symbols = None
+    self._in_lambda = False
+    return node
+
   def visit_arguments(self, node):
     return self._process_statement(node)
 
@@ -308,7 +339,10 @@ class ActivityAnalyzer(transformer.Base):
 
     # A separate Scope tracks the actual function definition.
     self._enter_scope(True)
+    assert not self._in_function_def_args
+    self._in_function_def_args = True
     node.args = self.visit(node.args)
+    self._in_function_def_args = False
 
     # Track the body separately. This is for compatibility reasons, it may not
     # be strictly needed.
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
index 9a4f1bf09b..678199970c 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
@@ -427,6 +427,40 @@ class ActivityAnalyzerTest(test.TestCase):
     args_scope = anno.getanno(fn_node.args, anno.Static.SCOPE)
     self.assertSymbolSetsAre(('a', 'b'), args_scope.params.keys(), 'params')
 
+  def test_lambda_captures_reads(self):
+
+    def test_fn(a, b):
+      return lambda: a + b
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node.body[0]
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('a', 'b'), ())
+    # Nothing local to the lambda is tracked.
+    self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
+
+  def test_lambda_params_are_isolated(self):
+
+    def test_fn(a, b):  # pylint: disable=unused-argument
+      return lambda a: a + b
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node.body[0]
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('b',), ())
+    self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
+
+  def test_lambda_complex(self):
+
+    def test_fn(a, b, c, d):  # pylint: disable=unused-argument
+      a = (lambda a, b, c: a + b + c)(d, 1, 2) + b
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node.body[0]
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('b', 'd'), ('a',))
+    self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 48b24214dd5da842bd00414b46f3e46319c777ee Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Tue, 9 Oct 2018 18:47:55 -0700
Subject: [PATCH 1347/1357] Update model in keras dist strat learning phase
 test to return consistent values.

PiperOrigin-RevId: 216461637
---
 .../contrib/distribute/python/keras_test.py   | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 3511b7761f..6553642ad3 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -592,33 +592,37 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
     # meaningful values. Currently we don't pass the learning phase if the
     # Lambda layer uses the learning phase.
     with self.cached_session():
-      x = keras.layers.Input(shape=(16,), name='input')
-      y = keras.layers.Dense(16)(x)
+      x = keras.layers.Input(shape=(1,), name='input')
+      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
       z = keras.layers.Dropout(0.9999)(y)
       model = keras.Model(x, z)
+      initial_weights = model.get_weights()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.005)
       loss = 'mse'
       metrics = ['acc']
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
-                                                     '/device:CPU:0'])
+      strategy = mirrored_strategy.MirroredStrategy(
+          ['/device:GPU:0', '/device:GPU:1'])
 
       model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
 
-      inputs = np.random.rand(10, 16)
-      targets = np.ones((10, 16), dtype=np.float32)
+      inputs = np.ones((10, 1), dtype=np.float32)
+      targets = np.ones((10, 1), dtype=np.float32)
       dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.repeat(100)
-      dataset = dataset.batch(8)
-
-      hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1)
-      self.assertEqual(hist.history['acc'][0], 1)
+      dataset = dataset.repeat().batch(8)
+      hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1)
+      self.assertAlmostEqual(hist.history['acc'][0], 0, 0)
 
+      model.set_weights(initial_weights)
       evaluate_output = model.evaluate(dataset, steps=20)
-      self.assertEqual(evaluate_output[1], 0)
-
-      predict_output = model.predict(dataset, steps=1)
-      self.assertNotEqual(np.mean(predict_output), 0)
+      self.assertAlmostEqual(evaluate_output[1], 1, 0)
+
+      inputs = np.ones((10, 1), dtype=np.float32)
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
+      predict_dataset = predict_dataset.repeat().batch(5)
+      output = model.predict(predict_dataset, steps=10)
+      ref_output = np.ones((50, 1), dtype=np.float32)
+      self.assertArrayNear(output[0], ref_output, 1e-1)
 
 
 class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
-- 
GitLab


From 9369994b4b2c4fe822d67a9f65384532cc09c99d Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Tue, 9 Oct 2018 19:06:55 -0700
Subject: [PATCH 1348/1357] Automated rollback of commit
 d78c747e9177fc93d43a580acef2b62eb1420859

PiperOrigin-RevId: 216463443
---
 tensorflow/contrib/lite/python/BUILD        |  2 ++
 tensorflow/contrib/lite/python/lite_test.py | 14 +++-----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index be6c44d306..916788f215 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -73,6 +73,7 @@ py_test(
     data = ["@tflite_mobilenet_ssd_quant_protobuf//:tflite_graph.pb"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_windows",
     ],
     deps = [
@@ -171,6 +172,7 @@ py_test(
     srcs = ["convert_saved_model_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_windows",
     ],
     visibility = ["//visibility:public"],
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index ef9bbded2a..d243a494f6 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -591,19 +591,11 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
         'Unable to parse input file \'{}\'.'.format(graph_def_file),
         str(error.exception))
 
+  # TODO(nupurgarg): Test model loading in open source.
   def _initObjectDetectionArgs(self):
     # Initializes the arguments required for the object detection model.
-    # Looks for the model file which is saved in a different location interally
-    # and externally.
-    filename = resource_loader.get_path_to_datafile('testdata/tflite_graph.pb')
-    if not os.path.exists(filename):
-      filename = os.path.join(
-          resource_loader.get_root_dir_with_all_resources(),
-          '../tflite_mobilenet_ssd_quant_protobuf/tflite_graph.pb')
-      if not os.path.exists(filename):
-        raise IOError("File '{0}' does not exist.".format(filename))
-
-    self._graph_def_file = filename
+    self._graph_def_file = resource_loader.get_path_to_datafile(
+        'testdata/tflite_graph.pb')
     self._input_arrays = ['normalized_input_image_tensor']
     self._output_arrays = [
         'TFLite_Detection_PostProcess', 'TFLite_Detection_PostProcess:1',
-- 
GitLab


From 93eef55c4d04af24a6c8080f34629db179634f07 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 19:07:52 -0700
Subject: [PATCH 1349/1357] Automated rollback of commit
 9bd459e4ceba14f9bb1af98d52a109325de952e8

PiperOrigin-RevId: 216463491
---
 .../contrib/lite/experimental/objc/BUILD      |  94 ----
 .../contrib/lite/experimental/objc/README.md  |  10 -
 .../Configs/TensorFlowLiteObjc.tulsigen       |  60 ---
 .../project.tulsiconf                         |  17 -
 .../experimental/objc/apis/TFLInterpreter.h   | 188 --------
 .../objc/apis/TFLInterpreterOptions.h         |  37 --
 .../objc/apis/TFLQuantizationParameters.h     |  36 --
 .../lite/experimental/objc/apis/TFLTensor.h   |  77 ---
 .../experimental/objc/sources/TFLErrorUtil.h  |  51 --
 .../experimental/objc/sources/TFLErrorUtil.m  |  45 --
 .../objc/sources/TFLInterpreter.mm            | 440 ------------------
 .../objc/sources/TFLInterpreterOptions.m      |  30 --
 .../objc/sources/TFLQuantizationParameters.m  |  23 -
 .../objc/sources/TFLTensor+Internal.h         |  42 --
 .../experimental/objc/sources/TFLTensor.m     |  54 ---
 .../objc/tests/TFLInterpreterOptionsTests.m   |  49 --
 .../objc/tests/TFLInterpreterTests.m          | 266 -----------
 .../tools/pip_package/pip_smoke_test.py       |   1 -
 18 files changed, 1520 deletions(-)
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/BUILD
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/README.md
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m

diff --git a/tensorflow/contrib/lite/experimental/objc/BUILD b/tensorflow/contrib/lite/experimental/objc/BUILD
deleted file mode 100644
index 236b96adb5..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/BUILD
+++ /dev/null
@@ -1,94 +0,0 @@
-# TensorFlow Lite Objective-C API.
-
-package(default_visibility = ["//visibility:private"])
-
-licenses(["notice"])  # Apache 2.0
-
-load("//tools/build_defs/apple:ios.bzl", "ios_unit_test")
-
-SOURCES = glob([
-    "sources/*.h",
-    "sources/*.m",
-    "sources/*.mm",
-])
-
-API_HEADERS = glob([
-    "apis/*.h",
-])
-
-MINIMUM_OS_VERSION = "8.0"
-
-# Compiler flags for building regular non-test libraries.
-RELEASE_COPTS = [
-    # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++.
-    "-Wall",
-    # Warns if functions, variables, and types marked with the deprecated attribute are being used.
-    "-Wdeprecated-declarations",
-    # Warns for errors in documentation.
-    "-Wdocumentation",
-    # Turns all warnings into errors.
-    "-Werror",
-    # Enables extra warning flags that are not enabled by -Wall.
-    "-Wextra",
-    # Warns if a global function is defined without a previous prototype declaration.
-    "-Wmissing-prototypes",
-    # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison.
-    "-Wno-sign-compare",
-    # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks.
-    "-Wno-unused-parameter",
-    # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable.
-    "-Wshadow",
-    # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of ().
-    "-Wstrict-prototypes",
-    # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet.
-    "-Wundeclared-selector",
-
-    # Turn off warnings for headers not part of TensorFlow Lite Objective-C API.
-    "--system-header-prefix=third_party/tensorflow/contrib/lite/experimental/c/",
-]
-
-# Compiler flags for building test libraries.
-TEST_COPTS = RELEASE_COPTS + [
-    # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument.
-    "-Wno-nonnull",
-    # Disables warning when a global or local variable or type declaration shadows another.
-    "-Wno-shadow",
-]
-
-objc_library(
-    name = "TensorFlowLiteObjCLib",
-    srcs = SOURCES,
-    hdrs = API_HEADERS,
-    copts = RELEASE_COPTS,
-    deps = [
-        "//tensorflow/contrib/lite/experimental/c:c_api",
-    ],
-    alwayslink = 1,
-)
-
-ios_unit_test(
-    name = "TensorFlowLiteObjCTests",
-    size = "small",
-    minimum_os_version = MINIMUM_OS_VERSION,
-    deps = [":TensorFlowLiteObjCTestLib"],
-)
-
-objc_library(
-    name = "TensorFlowLiteObjCTestLib",
-    testonly = 1,
-    srcs = glob([
-        "tests/*.m",
-    ]),
-    hdrs = glob([
-        "apis/*.h",
-        "sources/*.h",
-        "tests/*.h",
-    ]),
-    copts = TEST_COPTS,
-    resources = [
-        "//tensorflow/contrib/lite:testdata/add.bin",
-    ],
-    deps = [
-        ":TensorFlowLiteObjCLib",
-    ],
-)
diff --git a/tensorflow/contrib/lite/experimental/objc/README.md b/tensorflow/contrib/lite/experimental/objc/README.md
deleted file mode 100644
index e8f150b1e8..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# TensorFlow Lite Objective-C API
-
-## TensorFlowLiteObjc Tulsi Project
-
-Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by
-running the following command in Terminal from the root source directory:
-
-```shell
-generate_xcodeproj.sh --genconfig tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj
-```
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
deleted file mode 100644
index babb5902d3..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-  "sourceFilters" : [
-    "third_party/tensorflow/contrib/lite",
-    "third_party/tensorflow/contrib/lite/experimental/c",
-    "third_party/tensorflow/contrib/lite/experimental/objc",
-    "third_party/tensorflow/contrib/lite/experimental/objc/apis",
-    "third_party/tensorflow/contrib/lite/experimental/objc/sources",
-    "third_party/tensorflow/contrib/lite/experimental/objc/tests",
-    "third_party/tensorflow/contrib/lite/kernels",
-    "third_party/tensorflow/contrib/lite/kernels/internal",
-    "third_party/tensorflow/contrib/lite/nnapi",
-    "third_party/tensorflow/contrib/lite/schema",
-  ],
-  "buildTargets" : [
-    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCLib",
-    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCTests",
-  ],
-  "projectName" : "TensorFlowLiteObjC",
-  "optionSet" : {
-    "LaunchActionPreActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildStartupOptionsRelease" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildOptionsRelease" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildOptionsDebug" : {
-      "p" : "$(inherited)"
-    },
-    "EnvironmentVariables" : {
-      "p" : "$(inherited)"
-    },
-    "BuildActionPreActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "CommandlineArguments" : {
-      "p" : "$(inherited)"
-    },
-    "TestActionPreActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildStartupOptionsDebug" : {
-      "p" : "$(inherited)"
-    },
-    "BuildActionPostActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "TestActionPostActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "LaunchActionPostActionScript" : {
-      "p" : "$(inherited)"
-    }
-  },
-  "additionalFilePaths" : [
-    "third_party/tensorflow/contrib/lite/experimental/objc/BUILD",
-  ]
-}
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
deleted file mode 100644
index 00299cd4cf..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "configDefaults" : {
-    "optionSet" : {
-      "BazelBuildOptionsDebug" : {
-        "p" : "--ios_minimum_os=8.0"
-      },
-      "BazelBuildOptionsRelease" : {
-        "p" : "--ios_minimum_os=8.0"
-      },
-    }
-  },
-  "projectName" : "TensorFlowLiteObjC",
-  "packages" : [
-    "third_party/tensorflow/contrib/lite/experimental/objc"
-  ],
-  "workspaceRoot" : "../../../../../../.."
-}
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
deleted file mode 100644
index c07ffc06ff..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-@class TFLInterpreterOptions;
-@class TFLTensor;
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * @enum TFLInterpreterErrorCode
- * This enum specifies various error codes related to `TFLInterpreter`.
- */
-typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) {
-  /** Provided tensor index is invalid. */
-  TFLInterpreterErrorCodeInvalidTensorIndex,
-
-  /** Input data has invalid byte size. */
-  TFLInterpreterErrorCodeInvalidInputByteSize,
-
-  /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */
-  TFLInterpreterErrorCodeInvalidShape,
-
-  /** Provided model cannot be loaded. */
-  TFLInterpreterErrorCodeFailedToLoadModel,
-
-  /** Failed to create `TFLInterpreter`. */
-  TFLInterpreterErrorCodeFailedToCreateInterpreter,
-
-  /** Failed to invoke `TFLInterpreter`. */
-  TFLInterpreterErrorCodeFailedToInvoke,
-
-  /** Failed to retrieve a tensor. */
-  TFLInterpreterErrorCodeFailedToGetTensor,
-
-  /** Failed to resize an input tensor. */
-  TFLInterpreterErrorCodeFailedToResizeInputTensor,
-
-  /** Failed to copy data into an input tensor. */
-  TFLInterpreterErrorCodeFailedToCopyDataToInputTensor,
-
-  /** Failed to get data from an output tensor. */
-  TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor,
-
-  /** Failed to allocate memory for tensors. */
-  TFLInterpreterErrorCodeFailedToAllocateTensors,
-
-  /** Operaton not allowed without allocating memory for tensors first. */
-  TFLInterpreterErrorCodeAllocateTensorsRequired,
-
-  /** Operaton not allowed without invoking the interpreter first. */
-  TFLInterpreterErrorCodeInvokeInterpreterRequired,
-};
-
-/**
- * A TensorFlow Lite model interpreter.
- */
-@interface TFLInterpreter : NSObject
-
-/** The total number of input tensors. 0 if the interpreter creation failed. */
-@property(nonatomic, readonly) NSUInteger inputTensorCount;
-
-/** The total number of output tensors. 0 if the interpreter creation failed. */
-@property(nonatomic, readonly) NSUInteger outputTensorCount;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-/**
- * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the
- * default interpreter options.
- *
- * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
- *
- * @return A new instance of `TFLInterpreter` with the given model and the default interpreter
- *     options.
- */
-- (instancetype)initWithModelPath:(NSString *)modelPath;
-
-/**
- * Initializes a new TensorFlow Lite interpreter instance with the given model file path and
- * options.
- *
- * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
- * @param options Options to use for configuring the TensorFlow Lite interpreter.
- *
- * @return A new instance of `TFLInterpreter` with the given model and options.
- */
-- (instancetype)initWithModelPath:(NSString *)modelPath
-                          options:(TFLInterpreterOptions *)options NS_DESIGNATED_INITIALIZER;
-
-/**
- * Invokes the interpreter to run inference.
- *
- * @param error An optional error parameter populated when there is an error in invoking the
- *     interpreter.
- *
- * @return Whether the invocation is successful. Returns NO if an error occurred.
- */
-- (BOOL)invokeWithError:(NSError **)error;
-
-/**
- * Returns the input tensor at the given index.
- *
- * @param index The index of an input tensor.
- * @param error An optional error parameter populated when there is an error in looking up the input
- *     tensor.
- *
- * @return The input tensor at the given index. `nil` if there is an error.
- */
-- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Returns the output tensor at the given index.
- *
- * @param index The index of an output tensor.
- * @param error An optional error parameter populated when there is an error in looking up the
- *     output tensor.
- *
- * @return The output tensor at the given index. `nil` if there is an error.
- */
-- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned
- * integers).
- *
- * @param index The index of an input tensor.
- * @param shape Shape that the given input tensor should be resized to. It should be an array of
- *     positive unsigned integer(s) containing the size of each dimension.
- * @param error An optional error parameter populated when there is an error in resizing the input
- *     tensor.
- *
- * @return Whether the input tensor was resized successfully. Returns NO if an error occurred.
- */
-- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
-                         toShape:(NSArray<NSNumber *> *)shape
-                           error:(NSError **)error;
-
-/**
- * Copies the given data into the input tensor at the given index. This is allowed only before the
- * interpreter is invoked.
- *
- * @param data The data to set. The byte size of the data must match what's required by the given
- *     input tensor.
- * @param index The index of an input tensor.
- * @param error An optional error parameter populated when there is an error in setting the data.
- *
- * @return Whether the data was set into the input tensor successfully. Returns NO if an error
- *     occurred.
- */
-- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Gets the data from the output tensor at the given index. The interpreter invocation has to
- * complete before the data can be retrieved from an output tensor.
- *
- * @param index The index of an output tensor.
- * @param error An optional error parameter populated when there is an error in getting the data.
- *
- * @return The data of the output tensor at the given index. `nil` if there is an error.
- */
-- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Allocates memory for tensors.
- *
- * @param error An optional error parameter populated when there is an error in allocating memory.
- *
- * @return Whether memory allocation is successful. Returns NO if an error occurred.
- */
-- (BOOL)allocateTensorsWithError:(NSError **)error;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
deleted file mode 100644
index 6461fbf017..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Custom configuration options for a TensorFlow Lite interpreter. */
-@interface TFLInterpreterOptions : NSObject
-
-/**
- * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting
- * TensorFlow Lite to optimize the threading decision).
- */
-@property(nonatomic) NSUInteger numberOfThreads;
-
-/**
- * Initializes a new instance of `TFLInterpreterOptions`.
- *
- * @return A new instance of `TFLInterpreterOptions`.
- */
-- (instancetype)init NS_DESIGNATED_INITIALIZER;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
deleted file mode 100644
index 3d5cf793c5..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * Parameters for asymmetric quantization. Quantized values can be converted to float values using:
- * `realValue = scale * (quantizedValue - zeroPoint)`.
- */
-@interface TFLQuantizationParameters : NSObject
-
-/** Scale of asymmetric quantization. */
-@property(nonatomic, readonly) float scale;
-
-/** Zero point of asymmetric quantization. */
-@property(nonatomic, readonly) int32_t zeroPoint;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
deleted file mode 100644
index d08b8fc0e9..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-@class TFLQuantizationParameters;
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * @enum TFLTensorDataType
- * This enum specifies supported TensorFlow Lite tensor data types.
- */
-typedef NS_ENUM(NSUInteger, TFLTensorDataType) {
-  /** Tensor data type not available. This indicates an error with the model. */
-  TFLTensorDataTypeNoType,
-
-  /** 32-bit single precision floating point. */
-  TFLTensorDataTypeFloat32,
-
-  /** 32-bit signed integer. */
-  TFLTensorDataTypeInt32,
-
-  /** 8-bit unsigned integer. */
-  TFLTensorDataTypeUInt8,
-
-  /** 64-bit signed integer. */
-  TFLTensorDataTypeInt64,
-
-  /** Boolean. */
-  TFLTensorDataTypeBool,
-
-  /** 16-bit signed integer. */
-  TFLTensorDataTypeInt16,
-};
-
-/**
- * An input or output tensor in a TensorFlow Lite model.
- */
-@interface TFLTensor : NSObject
-
-/** Name of the tensor. */
-@property(nonatomic, readonly, copy) NSString *name;
-
-/** Data type of the tensor. */
-@property(nonatomic, readonly) TFLTensorDataType dataType;
-
-/**
- * Shape of the tensor, an array of positive unsigned integer(s) containing the size of each
- * dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is
- * [2, 2, 3].
- */
-@property(nonatomic, readonly, copy) NSArray<NSNumber *> *shape;
-
-/** Number of bytes for the tensor data. */
-@property(nonatomic, readonly) NSUInteger byteSize;
-
-/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */
-@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
deleted file mode 100644
index b6fd4763d6..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Helper utility for error reporting. */
-@interface TFLErrorUtil : NSObject
-
-/**
- * Creates and returns an interpreter error with the given error code and description.
- *
- * @param code Error code.
- * @param description Error description.
- *
- * @return The created interpreter error with the given error code and description.
- */
-+ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                          description:(NSString *)description;
-
-/**
- * Creates and saves an interpreter error with the given error code and description.
- *
- * @param code Error code.
- * @param description Error description.
- * @param error Pointer to where to save the created error. If `nil`, no error will be saved.
- */
-+ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                         description:(NSString *)description
-                               error:(NSError **)error;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
deleted file mode 100644
index 756d69481c..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "TFLErrorUtil.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Error domain of TensorFlow Lite interpreter related errors. */
-static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter";
-
-@implementation TFLErrorUtil
-
-#pragma mark - Public
-
-+ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                          description:(NSString *)description {
-  return [NSError errorWithDomain:TFLInterpreterErrorDomain
-                             code:code
-                         userInfo:@{NSLocalizedDescriptionKey : description}];
-}
-
-+ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                         description:(NSString *)description
-                               error:(NSError **)error {
-  if (error) {
-    *error = [NSError errorWithDomain:TFLInterpreterErrorDomain
-                                 code:code
-                             userInfo:@{NSLocalizedDescriptionKey : description}];
-  }
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
deleted file mode 100644
index 0f940a5cf3..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
+++ /dev/null
@@ -1,440 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
-
-#import "TFLErrorUtil.h"
-#import "TFLTensor+Internal.h"
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-#include "third_party/tensorflow/contrib/lite/experimental/c/c_api.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * @enum TFLTensorType
- * This enum specifies input or output tensor types.
- */
-typedef NS_ENUM(NSUInteger, TFLTensorType) {
-  /** Input tensor type. */
-  TFLTensorTypeInput,
-
-  /** Output tensor type. */
-  TFLTensorTypeOutput,
-};
-
-// Names used for indicating input or output in error messages.
-static NSString *const kTFLInputDirection = @"input";
-static NSString *const kTFLOutputDirection = @"output";
-
-/**
- * Error reporter for TFLInterpreter.
- *
- * @param user_data User data. Not used.
- * @param format Error message which may contain argument formatting specifiers.
- * @param args Values of the arguments in the error message.
- */
-static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) {
-  NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]);
-}
-
-@interface TFLInterpreter ()
-
-/** TFL_Interpreter backed by C API. */
-@property(nonatomic, nullable) TFL_Interpreter *interpreter;
-
-/**
- * An error in initializing the interpreter. If not `nil`, this error will be reported when the
- * interpreter is used.
- */
-@property(nonatomic, nullable) NSError *initializationError;
-
-@end
-
-@implementation TFLInterpreter
-
-#pragma mark - NSObject
-
-- (void)dealloc {
-  TFL_DeleteInterpreter(_interpreter);
-}
-
-#pragma mark - Public
-
-- (instancetype)initWithModelPath:(NSString *)modelPath {
-  return [self initWithModelPath:modelPath options:[[TFLInterpreterOptions alloc] init]];
-}
-
-- (instancetype)initWithModelPath:(NSString *)modelPath options:(TFLInterpreterOptions *)options {
-  self = [super init];
-
-  if (self != nil) {
-    const char *modelPathCString = modelPath.UTF8String;
-    NSString *pathErrorString =
-        [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath];
-    if (modelPathCString == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
-                                     description:pathErrorString];
-      return self;
-    }
-
-    TFL_Model *model = TFL_NewModelFromFile(modelPathCString);
-    if (model == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
-                                     description:pathErrorString];
-      return self;
-    }
-
-    TFL_InterpreterOptions *cOptions = TFL_NewInterpreterOptions();
-    if (cOptions == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
-                                     description:@"Failed to create the interpreter."];
-      TFL_DeleteModel(model);
-      return self;
-    }
-
-    if (options.numberOfThreads > 0) {
-      TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads);
-    }
-    TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr);
-
-    _interpreter = TFL_NewInterpreter(model, cOptions);
-    if (_interpreter == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
-                                     description:@"Failed to create the interpreter."];
-    } else {
-      _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter);
-      _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter);
-      if (_inputTensorCount <= 0 || _outputTensorCount <= 0) {
-        _initializationError =
-            [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
-                                       description:@"Failed to create the interpreter."];
-      }
-    }
-    TFL_DeleteInterpreterOptions(cOptions);
-    TFL_DeleteModel(model);
-  }
-
-  return self;
-}
-
-- (BOOL)invokeWithError:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) {
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke
-                                   description:@"Failed to invoke the interpreter."
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return nil;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
-    return nil;
-  }
-
-  return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error];
-}
-
-- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return nil;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
-    return nil;
-  }
-
-  return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error];
-}
-
-- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
-                         toShape:(NSArray<NSNumber *> *)shape
-                           error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
-    return NO;
-  }
-
-  if (shape.count == 0) {
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
-                                   description:@"Invalid shape. Must not be empty."
-                                         error:error];
-    return NO;
-  }
-
-  int cDimensions[self.inputTensorCount];
-  for (int d = 0; d < shape.count; ++d) {
-    int dimension = shape[d].intValue;
-    if (dimension <= 0) {
-      NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers.";
-      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
-                                     description:errorDescription
-                                           error:error];
-      return NO;
-    }
-    cDimensions[d] = dimension;
-  }
-
-  if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions,
-                                       (int32_t)shape.count) != kTfLiteOk) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
-    return NO;
-  }
-
-  TFL_Tensor *tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
-  if (tensor == nullptr) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to get input tensor at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
-  if (data.length != byteSize) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).",
-                         (unsigned long)index, byteSize, (unsigned long)data.length];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  if (TFL_TensorCopyFromBuffer(tensor, data.bytes, data.length) != kTfLiteOk) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).",
-                                   (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return nil;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
-    return nil;
-  }
-
-  const TFL_Tensor *tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
-  if (tensor == nullptr) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to get output tensor at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil
-        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
-                         description:errorDescription
-                               error:error];
-    return nil;
-  }
-
-  void *bytes = TFL_TensorData(tensor);
-  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
-  if (bytes == nullptr || byteSize == 0) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to get output tensor data at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil
-        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
-                         description:errorDescription
-                               error:error];
-    return nil;
-  }
-
-  return [NSData dataWithBytes:bytes length:byteSize];
-}
-
-- (BOOL)allocateTensorsWithError:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) {
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors
-                                   description:@"Failed to allocate memory for tensors."
-                                         error:error];
-    return NO;
-  }
-  return YES;
-}
-
-#pragma mark - Private
-
-- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type
-                             atIndex:(NSUInteger)index
-                               error:(NSError **)error {
-  const TFL_Tensor *tensor = nullptr;
-  NSString *tensorType;
-  switch (type) {
-    case TFLTensorTypeInput:
-      tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
-      tensorType = kTFLInputDirection;
-      break;
-    case TFLTensorTypeOutput:
-      tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
-      tensorType = kTFLOutputDirection;
-      break;
-  }
-
-  if (tensor == nullptr) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType,
-                                   (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                   description:errorDescription
-                                         error:error];
-    return nil;
-  }
-
-  const char *cName = TFL_TensorName(tensor);
-  if (cName == nullptr) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType,
-                                   (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                   description:errorDescription
-                                         error:error];
-    return nil;
-  }
-  NSString *name = [NSString stringWithUTF8String:cName];
-
-  TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)];
-
-  int32_t rank = TFL_TensorNumDims(tensor);
-  if (rank <= 0) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType,
-                                   (unsigned long)index, rank];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                   description:errorDescription
-                                         error:error];
-    return nil;
-  }
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank];
-  for (int32_t d = 0; d < rank; d++) {
-    int32_t dimension = TFL_TensorDim(tensor, d);
-    if (dimension <= 0) {
-      NSString *errorDescription =
-          [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).",
-                                     tensorType, (unsigned long)index, d, dimension];
-      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                     description:errorDescription
-                                           error:error];
-      return nil;
-    }
-    shape[d] = @((NSUInteger)dimension);
-  }
-
-  // TODO: Set quantization parameters when C API supports it.
-  return [[TFLTensor alloc] initWithName:name
-                                dataType:dataType
-                                   shape:shape
-                                byteSize:(NSUInteger)TFL_TensorByteSize(tensor)
-                  quantizationParameters:nil];
-}
-
-- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType {
-  switch (cTensorType) {
-    case kTfLiteFloat32:
-      return TFLTensorDataTypeFloat32;
-    case kTfLiteInt32:
-      return TFLTensorDataTypeInt32;
-    case kTfLiteUInt8:
-      return TFLTensorDataTypeUInt8;
-    case kTfLiteInt64:
-      return TFLTensorDataTypeInt64;
-    case kTfLiteBool:
-      return TFLTensorDataTypeBool;
-    case kTfLiteInt16:
-      return TFLTensorDataTypeInt16;
-    case kTfLiteNoType:
-    case kTfLiteString:
-    case kTfLiteComplex64:
-      // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API.
-      return TFLTensorDataTypeNoType;
-  }
-}
-
-- (void)saveInitializationErrorToDestination:(NSError **)destination {
-  if (destination != NULL) {
-    *destination = self.initializationError;
-  }
-}
-
-- (BOOL)isValidTensorIndex:(NSUInteger)index
-                belowLimit:(NSUInteger)totalTensorCount
-                     error:(NSError **)error {
-  if (index >= totalTensorCount) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).",
-                                   (unsigned long)index, (unsigned long)(totalTensorCount - 1)];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
deleted file mode 100644
index 1776688288..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@implementation TFLInterpreterOptions
-
-#pragma mark - Public
-
-- (instancetype)init {
-  self = [super init];
-  return self;
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
deleted file mode 100644
index 190f0479ce..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@implementation TFLQuantizationParameters
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
deleted file mode 100644
index f2f13e5e5f..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@interface TFLTensor (Internal)
-
-/**
- * Initializes a `TFLTensor` with the given name, data type, shape, and quantization parameters.
- *
- * @param name Name of the tensor.
- * @param dataType Data type of the tensor.
- * @param shape Shape of the tensor.
- * @param byteSize Size of the tensor data in number of bytes.
- * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not
- *     use quantization.
- *
- * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization
- *     parameters.
- */
-- (instancetype)initWithName:(NSString *)name
-                    dataType:(TFLTensorDataType)dataType
-                       shape:(NSArray<NSNumber *> *)shape
-                    byteSize:(NSUInteger)byteSize
-      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
deleted file mode 100644
index adb1c5ad2c..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-#import "TFLTensor+Internal.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@interface TFLTensor ()
-
-// Redefines readonly properties.
-@property(nonatomic, copy) NSString *name;
-@property(nonatomic) TFLTensorDataType dataType;
-@property(nonatomic, copy) NSArray<NSNumber *> *shape;
-@property(nonatomic) NSUInteger byteSize;
-@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters;
-
-@end
-
-@implementation TFLTensor
-
-#pragma mark - TFLTensor (Internal)
-
-- (instancetype)initWithName:(NSString *)name
-                    dataType:(TFLTensorDataType)dataType
-                       shape:(NSArray<NSNumber *> *)shape
-                    byteSize:(NSUInteger)byteSize
-      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters {
-  self = [super init];
-  if (self != nil) {
-    _name = [name copy];
-    _dataType = dataType;
-    _shape = [shape copy];
-    _byteSize = byteSize;
-    _quantizationParameters = quantizationParameters;
-  }
-  return self;
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
deleted file mode 100644
index 17c495fa18..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-
-#import <XCTest/XCTest.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * Unit tests for TFLInterpreterOptions.
- */
-@interface TFLInterpreterOptionsTests : XCTestCase
-@end
-
-@implementation TFLInterpreterOptionsTests
-
-#pragma mark - Tests
-
-- (void)testInit {
-  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
-  XCTAssertNotNil(options);
-  XCTAssertEqual(options.numberOfThreads, 0);
-}
-
-- (void)testSetNumberOfThread {
-  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
-  options.numberOfThreads = 2;
-  XCTAssertEqual(options.numberOfThreads, 2);
-  options.numberOfThreads = 0;
-  XCTAssertEqual(options.numberOfThreads, 0);
-  options.numberOfThreads = 3;
-  XCTAssertEqual(options.numberOfThreads, 3);
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
deleted file mode 100644
index 9e6319a732..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
+++ /dev/null
@@ -1,266 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
-
-#import <XCTest/XCTest.h>
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Model resource name. */
-static NSString *const kAddModelResourceName = @"add";
-
-/** Model resource type. */
-static NSString *const kAddModelResourceType = @"bin";
-
-/** Rank of the input and output tensor in the Add model. */
-static const NSUInteger kAddModelTensorRank = 1U;
-
-/** Size of the first (and only) dimension of the input and output tensor in the Add model. */
-static const NSUInteger kAddModelTensorFirstDimensionSize = 2U;
-
-/** Invalid input tensor index. */
-static const NSUInteger kInvalidInputTensorIndex = 1U;
-
-/** Invalid output tensor index. */
-static const NSUInteger kInvalidOutputTensorIndex = 1U;
-
-/** Accurary used in comparing floating numbers. */
-static const float kTestAccuracy = 1E-5F;
-
-/**
- * Unit tests for TFLInterpreter.
- */
-@interface TFLInterpreterTests : XCTestCase
-
-/** Absolute path of the Add model resource. */
-@property(nonatomic, nullable) NSString *modelPath;
-
-/** Default interpreter using the Add model. */
-@property(nonatomic, nullable) TFLInterpreter *interpreter;
-
-@end
-
-@implementation TFLInterpreterTests
-
-#pragma mark - XCTestCase
-
-- (void)setUp {
-  [super setUp];
-
-  NSBundle *bundle = [NSBundle bundleForClass:[self class]];
-  self.modelPath = [bundle pathForResource:kAddModelResourceName ofType:kAddModelResourceType];
-  self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
-  XCTAssertNotNil(self.interpreter);
-  XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]);
-}
-
-- (void)tearDown {
-  self.modelPath = nil;
-  self.interpreter = nil;
-
-  [super tearDown];
-}
-
-#pragma mark - Tests
-
-- (void)testSuccessfulFullRun {
-  // Shape for both input and output tensor.
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
-
-  // Creates the interpreter options.
-  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
-  XCTAssertNotNil(options);
-  options.numberOfThreads = 2;
-
-  // Creates the interpreter.
-  TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath
-                                                                        options:options];
-  XCTAssertNotNil(customInterpreter);
-
-  // Allocates memory for tensors.
-  NSError *error;
-  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
-  XCTAssertNil(error);
-
-  // Verifies input and output tensor counts.
-  XCTAssertEqual(customInterpreter.inputTensorCount, 1);
-  XCTAssertEqual(customInterpreter.outputTensorCount, 1);
-
-  // Resizes the intput tensor.
-  XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
-  XCTAssertNil(error);
-
-  // Re-allocates memory for tensors.
-  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
-  XCTAssertNil(error);
-
-  // Verifies the input tensor.
-  TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error];
-  XCTAssertNotNil(inputTensor);
-  XCTAssertNil(error);
-  XCTAssertTrue([inputTensor.name isEqualToString:@"input"]);
-  XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32);
-  XCTAssertTrue([shape isEqualToArray:inputTensor.shape]);
-  XCTAssertEqual(inputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
-
-  // Copies the input data.
-  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
-  float one = 1.f;
-  float three = 3.f;
-  [inputData appendBytes:&one length:sizeof(float)];
-  [inputData appendBytes:&three length:sizeof(float)];
-  XCTAssertTrue([customInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
-  XCTAssertNil(error);
-
-  // Invokes the interpreter.
-  XCTAssertTrue([customInterpreter invokeWithError:&error]);
-  XCTAssertNil(error);
-
-  // Verifies the output tensor.
-  TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error];
-  XCTAssertNotNil(outputTensor);
-  XCTAssertNil(error);
-  XCTAssertTrue([outputTensor.name isEqualToString:@"output"]);
-  XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32);
-  XCTAssertTrue([shape isEqualToArray:outputTensor.shape]);
-  XCTAssertEqual(outputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
-
-  // Tries to query an invalid output tensor index.
-  TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex
-                                                                    error:&error];
-  XCTAssertNil(invalidOutputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
-
-  // Gets the output tensor data.
-  error = nil;
-  NSData *outputData = [customInterpreter dataFromOutputTensorAtIndex:0 error:&error];
-  XCTAssertNotNil(outputData);
-  XCTAssertNil(error);
-  float output[kAddModelTensorFirstDimensionSize];
-  [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)];
-  XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy);
-  XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy);
-}
-
-- (void)testInitWithModelPath_invalidPath {
-  // Shape for both input and output tensor.
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
-
-  // Creates the interpreter.
-  TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath"];
-  XCTAssertNotNil(brokenInterpreter);
-  XCTAssertEqual(brokenInterpreter.inputTensorCount, 0);
-  XCTAssertEqual(brokenInterpreter.outputTensorCount, 0);
-
-  // Allocates memory for tensors.
-  NSError *error;
-  XCTAssertFalse([brokenInterpreter allocateTensorsWithError:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Resizes the intput tensor.
-  XCTAssertFalse([brokenInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Verifies the input tensor.
-  TFLTensor *inputTensor = [brokenInterpreter inputTensorAtIndex:0 error:&error];
-  XCTAssertNil(inputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Copies the input data.
-  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
-  float one = 1.f;
-  float three = 3.f;
-  [inputData appendBytes:&one length:sizeof(float)];
-  [inputData appendBytes:&three length:sizeof(float)];
-  XCTAssertFalse([brokenInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Invokes the interpreter.
-  XCTAssertFalse([brokenInterpreter invokeWithError:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Verifies the output tensor.
-  TFLTensor *outputTensor = [brokenInterpreter outputTensorAtIndex:0 error:&error];
-  XCTAssertNil(outputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Gets the output tensor data.
-  NSData *outputData = [brokenInterpreter dataFromOutputTensorAtIndex:0 error:&error];
-  XCTAssertNil(outputData);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-}
-
-- (void)testInvoke_beforeAllocation {
-  TFLInterpreter *interpreterWithoutAllocation =
-      [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
-  XCTAssertNotNil(interpreterWithoutAllocation);
-
-  NSError *error;
-  XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke);
-}
-
-- (void)testInputTensorAtIndex_invalidIndex {
-  NSError *error;
-  TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex
-                                                          error:&error];
-  XCTAssertNil(inputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
-}
-
-- (void)testResizeInputTensorAtIndex_invalidIndex {
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
-  NSError *error;
-  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex
-                                                    toShape:shape
-                                                      error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
-}
-
-- (void)testResizeInputTensorAtIndex_emptyShape {
-  NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0];
-  NSError *error;
-  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
-}
-
-- (void)testResizeInputTensorAtIndex_zeroDimensionSize {
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:0];
-  NSError *error;
-  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
-}
-
-- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize {
-  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
-  float one = 1.f;
-  float three = 3.f;
-  [inputData appendBytes:&one length:sizeof(float)];
-  [inputData appendBytes:&three length:(sizeof(float) - 1)];
-  NSError *error;
-  XCTAssertFalse([self.interpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize);
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index 31b68c8f00..c6ef82ccdc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -105,7 +105,6 @@ BLACKLIST = [
     "//tensorflow/contrib/timeseries/python/timeseries:test_utils",
     "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils",  # pylint:disable=line-too-long
     "//tensorflow/contrib/image:sparse_image_warp_test_data",
-    "//tools/build_defs/apple:ios.bzl",
 ]
 
 
-- 
GitLab


From 58fcfc98cd59ae3952399fc55380b8733df08df9 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Tue, 9 Oct 2018 19:41:35 -0700
Subject: [PATCH 1350/1357] [XLA] Add documentation and HLO-level support for
 multi-value sort.

No support in any of the backends, and not yet exposed through XlaBuilder.

PiperOrigin-RevId: 216465753
---
 .../xla/service/algebraic_simplifier.cc       |  2 +-
 .../xla/service/algebraic_simplifier_test.cc  | 12 ++++++---
 .../service/bfloat16_normalization_test.cc    |  2 +-
 .../xla/service/hlo_dataflow_analysis_test.cc |  3 ++-
 .../compiler/xla/service/hlo_instruction.cc   | 17 ++++++------
 .../compiler/xla/service/hlo_instruction.h    |  4 +--
 .../compiler/xla/service/hlo_instructions.cc  |  9 +++----
 .../compiler/xla/service/hlo_instructions.h   |  2 +-
 tensorflow/compiler/xla/service/hlo_parser.cc | 20 +++-----------
 .../compiler/xla/service/hlo_parser_test.cc   | 15 +++++++++++
 .../compiler/xla/service/hlo_verifier.cc      | 22 +++++++++-------
 .../compiler/xla/service/shape_inference.cc   | 25 +++++++++++-------
 .../xla/service/shape_inference_test.cc       | 26 ++++++++++++++++++-
 .../service/tuple_points_to_analysis_test.cc  |  3 ++-
 tensorflow/compiler/xla/tests/test_utils.cc   |  6 +++--
 15 files changed, 104 insertions(+), 64 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 86d9dbea90..ca71f2cc12 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -2209,7 +2209,7 @@ Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) {
     }
     // If it is key/value sort, the output of sort is a tuple.
     return ReplaceWithNewInstruction(
-        sort, HloInstruction::CreateTuple({operand, sort->mutable_operand(1)}));
+        sort, HloInstruction::CreateTuple(sort->operands()));
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 2047f894b4..42d1f337dc 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -2133,16 +2133,20 @@ TEST_F(AlgebraicSimplifierTest, ReplaceEffectiveScalarKeyValueSortWithTuple) {
   Shape values_shape = ShapeUtil::MakeShape(S32, {5, 0});
   auto keys = builder.AddInstruction(
       HloInstruction::CreateParameter(0, keys_shape, "keys"));
-  auto values = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, values_shape, "values"));
+  auto values0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, values_shape, "values0"));
+  auto values1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, values_shape, "values1"));
   builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values));
+      ShapeUtil::MakeTupleShape({keys_shape, values_shape, values_shape}), 0,
+      keys, {values0, values1}));
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
   ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
-  EXPECT_THAT(computation->root_instruction(), op::Tuple(keys, values));
+  EXPECT_THAT(computation->root_instruction(),
+              op::Tuple(keys, values0, values1));
 }
 
 // Used for TEST_Ps that test merging (or not) of a kPad instruction into a
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
index cef0eba14e..2411fdcb20 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
@@ -284,7 +284,7 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) {
       HloInstruction::CreateParameter(1, s32_shape, "value"));
 
   HloInstruction* sort = builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, value));
+      ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, {value}));
   HloInstruction* gte = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(bf16_shape, sort, 0));
 
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index d27786d160..909853106d 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -2346,7 +2346,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) {
   auto values = builder.AddInstruction(
       HloInstruction::CreateParameter(1, values_shape, "values"));
   auto sort = builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values));
+      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys,
+      {values}));
 
   BuildModuleAndRunAnalysis(builder.Build());
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 09bcf8a9e7..c317e9e3b4 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -195,17 +195,16 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       }
       break;
     case HloOpcode::kSort: {
-      TF_RET_CHECK(proto.operand_ids_size() == 1 ||
-                   proto.operand_ids_size() == 2)
-          << "Sort instruction should have 1 or 2 operands but has "
+      TF_RET_CHECK(proto.operand_ids_size() >= 1)
+          << "Sort instruction should have at least 1 operand but has "
           << proto.operand_ids_size();
       TF_RET_CHECK(proto.dimensions().size() == 1)
           << "Sort instruction should have 1 dimension";
-      HloInstruction* keys = operands(0);
-      HloInstruction* values =
-          proto.operand_ids_size() == 2 ? operands(1) : nullptr;
-      instruction =
-          CreateSort(proto.shape(), proto.dimensions(0), keys, values);
+      auto sort_operands = all_operands();
+      HloInstruction* keys = sort_operands[0];
+      instruction = CreateSort(
+          proto.shape(), proto.dimensions(0), keys,
+          absl::Span<HloInstruction* const>(sort_operands).subspan(1));
       break;
     }
     case HloOpcode::kTranspose:
@@ -1078,7 +1077,7 @@ HloInstruction::CreateBroadcastSequence(
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateSort(
     const Shape& shape, int64 dimension, HloInstruction* keys,
-    HloInstruction* values) {
+    absl::Span<HloInstruction* const> values) {
   return absl::make_unique<HloSortInstruction>(shape, dimension, keys, values);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 44f776ebac..93ff04b1e4 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -670,10 +670,10 @@ class HloInstruction {
       const Shape& shape, HloInstruction* operand,
       absl::Span<const int64> dimensions);
 
-  // Creates a sort op, with a keys operand, and an optional values operand.
+  // Creates a sort op, with a keys operand, and optional values operands.
   static std::unique_ptr<HloInstruction> CreateSort(
       const Shape& shape, int64 dimension, HloInstruction* keys,
-      HloInstruction* values = nullptr);
+      absl::Span<HloInstruction* const> values = {});
 
   // Creates a while instruction, given a condition computation, a body
   // computation, and the initial value for the input of the computations. For
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 2ec233eaec..179ace2cdb 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -600,11 +600,11 @@ std::unique_ptr<HloInstruction> HloReduceInstruction::CloneWithNewOperandsImpl(
 
 HloSortInstruction::HloSortInstruction(const Shape& shape, int64 dimension,
                                        HloInstruction* keys,
-                                       HloInstruction* values)
+                                       absl::Span<HloInstruction* const> values)
     : HloInstruction(HloOpcode::kSort, shape), dimensions_({dimension}) {
   AppendOperand(keys);
-  if (values) {
-    AppendOperand(values);
+  for (auto* value : values) {
+    AppendOperand(value);
   }
 }
 
@@ -633,9 +633,8 @@ std::unique_ptr<HloInstruction> HloSortInstruction::CloneWithNewOperandsImpl(
     const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   HloInstruction* keys = new_operands[0];
-  HloInstruction* values = new_operands.size() == 2 ? new_operands[1] : nullptr;
   return absl::make_unique<HloSortInstruction>(shape, dimensions(0), keys,
-                                               values);
+                                               new_operands.subspan(1));
 }
 
 HloTransposeInstruction::HloTransposeInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 4c5fc759a3..3a0b7490dc 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -418,7 +418,7 @@ class HloSortInstruction : public HloInstruction {
  public:
   explicit HloSortInstruction(const Shape& shape, int64 dimension,
                               HloInstruction* keys,
-                              HloInstruction* values = nullptr);
+                              absl::Span<HloInstruction* const> values = {});
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 96f9ff6654..128113f7a5 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -839,8 +839,6 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
       break;
     }
     case HloOpcode::kSort: {
-      auto loc = lexer_.GetLoc();
-
       optional<std::vector<tensorflow::int64>> dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions};
@@ -848,20 +846,10 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
           dimensions->size() != 1) {
         return false;
       }
-      switch (operands.size()) {
-        case 1:
-          instruction = builder->AddInstruction(HloInstruction::CreateSort(
-              shape, dimensions->at(0), /*keys=*/operands[0]));
-          break;
-        case 2:
-          instruction = builder->AddInstruction(HloInstruction::CreateSort(
-              shape, dimensions->at(0),
-              /*keys=*/operands[0], /*values=*/operands[1]));
-          break;
-        default:
-          return Error(loc, StrCat("expects either 1 or 2 operands, but has ",
-                                   operands.size(), " operands"));
-      }
+      instruction = builder->AddInstruction(HloInstruction::CreateSort(
+          shape, dimensions->at(0),
+          /*keys=*/operands[0],
+          /*values=*/absl::Span<HloInstruction* const>(operands).subspan(1)));
       break;
     }
     case HloOpcode::kTuple: {
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 17538c05bc..ef2e74588c 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1003,6 +1003,21 @@ ENTRY Sort {
   ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}) sort(keys, values), dimensions={0}
 }
 
+)"
+},
+// Sort (Key, Value, Value, Value)
+{
+"SortManyValues",
+R"(HloModule sort
+
+ENTRY Sort {
+  keys = f32[1024,16]{0,1} parameter(0)
+  values.0 = s32[1024,16]{0,1} parameter(1)
+  values.1 = u32[1024,16]{0,1} parameter(2)
+  values.2 = f32[1024,16]{0,1} parameter(3)
+  ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}, u32[1024,16]{0,1}, f32[1024,16]{0,1}) sort(keys, values.0, values.1, values.2), dimensions={0}
+}
+
 )"
 },
 // Conditional
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 620458855f..a1f668921d 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -266,18 +266,20 @@ Status ShapeVerifier::HandleReverse(HloInstruction* reverse) {
 }
 
 Status ShapeVerifier::HandleSort(HloInstruction* sort) {
-  if (sort->operand_count() < 1 || sort->operand_count() > 2) {
-    return InternalError("Expected 1 or 2 operands for %s instruction: %s",
+  if (sort->operand_count() < 1) {
+    return InternalError("Expected at least 1 operand for %s instruction: %s",
                          HloOpcodeString(sort->opcode()), sort->ToString());
   }
-  if (sort->operand_count() == 2 &&
-      !ShapeUtil::SameDimensions(sort->operand(0)->shape(),
-                                 sort->operand(1)->shape())) {
-    return InternalError(
-        "Expected sort to have to have the same dimensions for the keys and "
-        "the values. Keys shape is: %s\n, Values shape is: %s",
-        StringifyShape(sort->operand(0)->shape()),
-        StringifyShape(sort->operand(1)->shape()));
+  for (int64 operand = 1; operand < sort->operand_count(); ++operand) {
+    if (!ShapeUtil::SameDimensions(sort->operand(0)->shape(),
+                                   sort->operand(operand)->shape())) {
+      return InternalError(
+          "Expected sort to have to have the same dimensions for the keys "
+          "and the values. Keys shape is: %s\n, Values shape (operand index "
+          "%lld) is: %s",
+          StringifyShape(sort->operand(0)->shape()), operand,
+          StringifyShape(sort->operand(operand)->shape()));
+    }
   }
   return CheckVariadicShape(sort);
 }
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index e379911462..aa49f98bcf 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -1029,17 +1029,22 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
     case HloOpcode::kSort: {
       if (operand_shapes.size() == 1) {
         return *operand_shapes[0];
-      } else if (operand_shapes.size() == 2) {
-        if (!ShapeUtil::SameDimensions(*operand_shapes[0],
-                                       *operand_shapes[1])) {
-          return InvalidArgument(
-              "Sort keys and values dimensions must match. "
-              "Keys shape is: %s\n, Values shape is: %s",
-              ShapeUtil::HumanString(*operand_shapes[0]),
-              ShapeUtil::HumanString(*operand_shapes[1]));
+      } else {
+        for (int64 operand = 1; operand < operand_shapes.size(); ++operand) {
+          if (!ShapeUtil::SameDimensions(*operand_shapes[0],
+                                         *operand_shapes[operand])) {
+            return InvalidArgument(
+                "Sort keys and values dimensions must match. "
+                "Keys shape is: %s\n, Values shape (operand index %lld) is: %s",
+                ShapeUtil::HumanString(*operand_shapes[0]), operand,
+                ShapeUtil::HumanString(*operand_shapes[operand]));
+          }
+        }
+        std::vector<Shape> operand_shape_values;
+        for (const Shape* operand_shape : operand_shapes) {
+          operand_shape_values.push_back(*operand_shape);
         }
-        return ShapeUtil::MakeTupleShape(
-            {*operand_shapes[0], *operand_shapes[1]});
+        return ShapeUtil::MakeTupleShape(operand_shape_values);
       }
       return InvalidArgument("Unexpected number of operands for sort");
     }
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index 864ed43118..7b65e8c1c9 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -1618,13 +1618,37 @@ TEST_F(ShapeInferenceTest, BadSort) {
   auto values = ShapeUtil::MakeShape(F32, {5});
   StatusOr<Shape> statusor =
       ShapeInference::InferVariadicOpShape(HloOpcode::kSort, {&keys, &values});
-  ASSERT_FALSE(statusor.ok());
+  EXPECT_FALSE(statusor.ok());
+  EXPECT_THAT(statusor.status().error_message(),
+              HasSubstr("dimensions must match"))
+      << statusor.status();
+}
 
+TEST_F(ShapeInferenceTest, BadSortValuesMismatch) {
+  auto keys = ShapeUtil::MakeShape(F32, {4});
+  auto values_good = ShapeUtil::MakeShape(F32, {4});
+  auto values_bad = ShapeUtil::MakeShape(F32, {5});
+  StatusOr<Shape> statusor = ShapeInference::InferVariadicOpShape(
+      HloOpcode::kSort, {&keys, &values_good, &values_bad});
+  EXPECT_FALSE(statusor.ok());
   EXPECT_THAT(statusor.status().error_message(),
               HasSubstr("dimensions must match"))
       << statusor.status();
 }
 
+TEST_F(ShapeInferenceTest, SortManyValues) {
+  auto keys = ShapeUtil::MakeShape(F32, {4});
+  auto values_s32 = ShapeUtil::MakeShape(S32, {4});
+  auto values_u32 = ShapeUtil::MakeShape(U32, {4});
+  StatusOr<Shape> statusor = ShapeInference::InferVariadicOpShape(
+      HloOpcode::kSort, {&keys, &values_s32, &values_u32});
+  EXPECT_IS_OK(statusor);
+  Shape inferred_shape = statusor.ValueOrDie();
+  EXPECT_TRUE(ShapeUtil::Compatible(
+      inferred_shape,
+      ShapeUtil::MakeTupleShape({keys, values_s32, values_u32})));
+}
+
 class ScatterGatherShapeInferenceTest : public ShapeInferenceTest {
  protected:
   const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {});
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
index a571bd571b..d9ebebf74e 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
@@ -1073,7 +1073,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) {
   auto values = builder.AddInstruction(
       HloInstruction::CreateParameter(1, values_shape, "values"));
   auto sort = builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values));
+      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys,
+      {values}));
 
   BuildModuleAndRunAnalysis(builder.Build());
 
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 5155f0c652..2f18036ff4 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -272,9 +272,11 @@ std::vector<HloInstruction*> FindConstrainedUses(
         constrained_uses.insert(constrained_uses.end(), converted_uses.begin(),
                                 converted_uses.end());
       } else if (opcode == HloOpcode::kSort &&
-                 instruction->operand_count() == 2 && op_num == 0) {
+                 instruction->operand_count() >= 2 && op_num == 0) {
         // Operand 0 of sort is the array of keys used for key/value
-        // (two-operand) kSort instructions.
+        // (two-operand) kSort instructions. Since sort stability is not
+        // guaranteed, constrain keys of key-value sort not to have duplicates,
+        // since otherwise the value order may legitimately differ.
         constrained_uses.push_back(instruction);
       }
     }
-- 
GitLab


From 854ae599743a1e92a31ad49cfe42c6454cefd3b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 20:05:22 -0700
Subject: [PATCH 1351/1357] Use Ophints to support TfLite
 UnidirectionaSequenceLstm and add an e2e test.

Support peephole and num_proj as well.

PiperOrigin-RevId: 216467578
---
 .../lite/experimental/examples/lstm/BUILD     |  40 ++
 .../experimental/examples/lstm/tflite_lstm.py | 396 ++++++++++++++++++
 .../lstm/unidirectional_sequence_lstm_test.py | 226 ++++++++++
 .../propagate_array_data_types.cc             |   6 +
 .../propagate_fixed_sizes.cc                  |  47 +++
 .../contrib/lite/toco/import_tensorflow.cc    |  44 ++
 tensorflow/contrib/lite/toco/model.h          |   6 +
 .../contrib/lite/toco/tflite/operator.cc      |  39 ++
 tensorflow/contrib/lite/toco/tooling_util.cc  |   5 +-
 .../tools/pip_package/pip_smoke_test.py       |   4 +
 10 files changed, 811 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py
 create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py

diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/BUILD b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD
new file mode 100644
index 0000000000..2125f218ca
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD
@@ -0,0 +1,40 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//tensorflow:internal"])
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_library(
+    name = "tflite_lstm",
+    srcs = ["tflite_lstm.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/lite/python:lite",
+        "//tensorflow/python:framework",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "unidirectional_sequence_lstm_test",
+    size = "large",
+    srcs = ["unidirectional_sequence_lstm_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+    ],
+    deps = [
+        ":tflite_lstm",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/lite/python:lite",
+        "//tensorflow/examples/tutorials/mnist:input_data",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform",
+        "//tensorflow/python/tools:optimize_for_inference",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py
new file mode 100644
index 0000000000..2357743266
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py
@@ -0,0 +1,396 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TfLite LSTMCell wrapper.
+
+TODO(renjieliu): Find a better home for this one.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+
+from tensorflow.contrib.lite.python import lite
+from tensorflow.python.keras import activations
+from tensorflow.python.keras import initializers
+from tensorflow.python.layers import base as base_layer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import rnn_cell_impl
+from tensorflow.python.platform import tf_logging as logging
+
+
+class TFLiteLSTMCell(rnn_cell_impl.LayerRNNCell):
+  """Long short-term memory unit (LSTM) recurrent network cell.
+
+  This is used only for TfLite, it provides hints and it also makes the
+  variables in the desired for the tflite ops  (transposed and seaparated).
+
+  The default non-peephole implementation is based on:
+
+    https://pdfs.semanticscholar.org/1154/0131eae85b2e11d53df7f1360eeb6476e7f4.pdf
+
+  Felix Gers, Jurgen Schmidhuber, and Fred Cummins.
+  "Learning to forget: Continual prediction with LSTM." IET, 850-855, 1999.
+
+  The peephole implementation is based on:
+
+    https://research.google.com/pubs/archive/43905.pdf
+
+  Hasim Sak, Andrew Senior, and Francoise Beaufays.
+  "Long short-term memory recurrent neural network architectures for
+   large scale acoustic modeling." INTERSPEECH, 2014.
+
+  The class uses optional peep-hole connections, optional cell clipping, and
+  an optional projection layer.
+
+  Note that this cell is not optimized for performance. Please use
+  `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or
+  `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for
+  better performance on CPU.
+  """
+
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               cell_clip=None,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
+               num_unit_shards=None,
+               num_proj_shards=None,
+               forget_bias=1.0,
+               state_is_tuple=True,
+               activation=None,
+               reuse=None,
+               name=None,
+               dtype=None):
+    """Initialize the parameters for an LSTM cell.
+
+    Args:
+      num_units: int, The number of units in the LSTM cell.
+      use_peepholes: bool, set True to enable diagonal/peephole connections.
+      cell_clip: (optional) A float value, if provided the cell state is clipped
+        by this value prior to the cell output activation.
+      initializer: (optional) The initializer to use for the weight and
+        projection matrices.
+      num_proj: (optional) int, The output dimensionality for the projection
+        matrices.  If None, no projection is performed.
+      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
+        provided, then the projected values are clipped elementwise to within
+        `[-proj_clip, proj_clip]`.
+      num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      forget_bias: Biases of the forget gate are initialized by default to 1 in
+        order to reduce the scale of forgetting at the beginning of the
+        training. Must set it manually to `0.0` when restoring from CudnnLSTM
+        trained checkpoints.
+      state_is_tuple: If True, accepted and returned states are 2-tuples of the
+        `c_state` and `m_state`.  If False, they are concatenated along the
+        column axis.  This latter behavior will soon be deprecated.
+      activation: Activation function of the inner states.  Default: `tanh`.
+      reuse: (optional) Python boolean describing whether to reuse variables in
+        an existing scope.  If not `True`, and the existing scope already has
+        the given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will share
+        weights, but to avoid mistakes we require reuse=True in such cases.
+      dtype: Default dtype of the layer (default of `None` means use the type of
+        the first input). Required when `build` is called before `call`.  When
+        restoring from CudnnLSTM-trained checkpoints, use
+        `CudnnCompatibleLSTMCell` instead.
+    """
+    super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
+    # TODO(raziel): decide if we want to just support tuples (yes please!).
+    if not state_is_tuple:
+      logging.warn(
+          "%s: Using a concatenated state is slower and will soon be "
+          "deprecated.  Use state_is_tuple=True.", self)
+    if num_unit_shards is not None or num_proj_shards is not None:
+      logging.warn(
+          "%s: The num_unit_shards and proj_unit_shards parameters are "
+          "deprecated and will be removed in Jan 2017.  "
+          "Use a variable scope with a partitioner instead.", self)
+
+    # Inputs must be 2-dimensional.
+    # TODO(raziel): layers stuff -- chop if un-layerizing Op.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self._tflite_wrapper = lite.OpHint("UnidirectionalSequenceLstm")
+
+    self._num_units = num_units
+    self._use_peepholes = use_peepholes
+    self._cell_clip = cell_clip
+    self._initializer = initializer
+    self._num_proj = num_proj
+    self._proj_clip = proj_clip
+    self._num_unit_shards = num_unit_shards
+    self._num_proj_shards = num_proj_shards
+    self._forget_bias = forget_bias
+    self._state_is_tuple = state_is_tuple
+    self._activation = activation or math_ops.tanh
+
+    self._output_size = num_proj if num_proj else num_units
+    self._state_size = (
+        tf.nn.rnn_cell.LSTMStateTuple(num_units, self._output_size)
+        if state_is_tuple else num_units + self._output_size)
+
+  @property
+  def state_size(self):
+    return self._state_size
+
+  @property
+  def output_size(self):
+    return self._output_size
+
+  def build(self, inputs_shape):
+    """Build TfLite LSTM cell graph.
+
+    Args:
+      inputs_shape: The inputs_shape must be known, and is [batch_size,
+        input_size] shape.
+
+    Raises:
+      ValueError: if the inputs_shape is invalid.
+    """
+    if len(inputs_shape) != 2 or inputs_shape[1].value is None:
+      raise ValueError("Invalid inputs_shape, saw shape: %s" % inputs_shape)
+
+    input_depth = inputs_shape[1].value
+    maybe_partitioner = (
+        partitioned_variables.fixed_size_partitioner(self._num_unit_shards)
+        if self._num_unit_shards is not None else None)
+    input_weight_shape = [self._num_units, input_depth]
+    cell_weight_shape = [self._num_units, self._output_size]
+    bias_shape = [self._num_units]
+
+    def add_variable_wrapped(name, shape, initializer, index, partitioner):
+      var = self.add_variable(
+          name, shape=shape, initializer=initializer, partitioner=partitioner)
+      return self._tflite_wrapper.add_input(
+          var, name="name", index_override=index)
+
+    weight_initializer = self._initializer
+    if self.dtype is None:
+      bias_initializer = init_ops.zeros_initializer
+    else:
+      bias_initializer = init_ops.zeros_initializer(dtype=self.dtype)
+
+    self.input_to_input_w = add_variable_wrapped(
+        "input_to_input_w", input_weight_shape, weight_initializer, 1,
+        maybe_partitioner)
+    self.input_to_forget_w = add_variable_wrapped(
+        "input_to_forget_w", input_weight_shape, weight_initializer, 2,
+        maybe_partitioner)
+    self.input_to_cell_w = add_variable_wrapped(
+        "input_to_cell_w", input_weight_shape, weight_initializer, 3,
+        maybe_partitioner)
+    self.input_to_output_w = add_variable_wrapped(
+        "input_to_output_w", input_weight_shape, weight_initializer, 4,
+        maybe_partitioner)
+    self.cell_to_input_w = add_variable_wrapped(
+        "cell_to_input_w", cell_weight_shape, weight_initializer, 5,
+        maybe_partitioner)
+    self.cell_to_forget_w = add_variable_wrapped(
+        "cell_to_forget_w", cell_weight_shape, weight_initializer, 6,
+        maybe_partitioner)
+    self.cell_to_cell_w = add_variable_wrapped(
+        "cell_to_cell_w", cell_weight_shape, weight_initializer, 7,
+        maybe_partitioner)
+    self.cell_to_output_w = add_variable_wrapped(
+        "cell_to_output_w", cell_weight_shape, weight_initializer, 8,
+        maybe_partitioner)
+
+    self.input_bias = add_variable_wrapped(
+        "input_bias", bias_shape, bias_initializer, 12, maybe_partitioner)
+    self.forget_bias = add_variable_wrapped(
+        "forget_bias", bias_shape, bias_initializer, 13, maybe_partitioner)
+    self.cell_bias = add_variable_wrapped(
+        "cell_bias", bias_shape, bias_initializer, 14, maybe_partitioner)
+    self.output_bias = add_variable_wrapped(
+        "output_bias", bias_shape, bias_initializer, 15, maybe_partitioner)
+
+    # index 9, 10, 11.
+    # f stands for forget, i stands for input and o stands for output.
+    if self._use_peepholes:
+      self._w_f_diag = add_variable_wrapped("w_f_diag", [self._num_units],
+                                            self._initializer, 9,
+                                            maybe_partitioner)
+      self._w_i_diag = add_variable_wrapped("w_i_diag", [self._num_units],
+                                            self._initializer, 10,
+                                            maybe_partitioner)
+      self._w_o_diag = add_variable_wrapped("w_o_diag", [self._num_units],
+                                            self._initializer, 11,
+                                            maybe_partitioner)
+
+    # index 16 for proj kernel.
+    if self._num_proj is not None:
+      maybe_proj_partitioner = (
+          partitioned_variables.fixed_size_partitioner(self._num_proj_shards)
+          if self._num_proj_shards is not None else None)
+      self._proj_kernel = add_variable_wrapped(
+          "projection/kernel", [self._num_proj, self._num_units],
+          self._initializer,
+          16,
+          partitioner=maybe_proj_partitioner)
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Run one step of LSTM.
+
+    Args:
+      inputs: input Tensor, 2D, `[batch, num_units]`.
+      state: if `state_is_tuple` is False, this must be a state Tensor, `2-D,
+        [batch, state_size]`.  If `state_is_tuple` is True, this must be a tuple
+        of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.
+
+    Returns:
+      A tuple containing:
+
+      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
+        LSTM after reading `inputs` when previous state was `state`.
+        Here output_dim is:
+           num_proj if num_proj was set,
+           num_units otherwise.
+      - Tensor(s) representing the new state of LSTM after reading `inputs` when
+        the previous state was `state`.  Same type and shape(s) as `state`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    inputs = self._tflite_wrapper.add_input(
+        inputs, tag="input", name="input", aggregate="stack", index_override=0)
+
+    # Make sure inputs and bias_initializer has the same type.
+    assert inputs.dtype == self.input_to_input_w.dtype
+
+    num_proj = self._num_units if self._num_proj is None else self._num_proj
+    sigmoid = math_ops.sigmoid
+
+    if self._state_is_tuple:
+      (c_prev, m_prev) = state
+    else:
+      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
+      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
+
+    # Note: For TfLite, cell_state is at index 19 while activation state at
+    # index 18.
+    c_prev = self._tflite_wrapper.add_input(
+        c_prev,
+        tag="c_prev",
+        name="c_prev",
+        aggregate="first",
+        index_override=19)
+    m_prev = self._tflite_wrapper.add_input(
+        m_prev,
+        tag="m_prev",
+        name="m_prev",
+        aggregate="first",
+        index_override=18)
+
+    input_size = inputs.get_shape().with_rank(2)[1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+
+    inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1)
+
+    # i stands for input gate.
+    # f stands for forget gate activation.
+    # o outputs.
+    # j output of LSTM unit.
+    # c is the final state.
+    # m is the output.
+    i = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_input_w, self.cell_to_input_w], axis=1),
+            transpose_b=True), self.input_bias)
+    f = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_forget_w, self.cell_to_forget_w], axis=1),
+            transpose_b=True), self.forget_bias)
+    o = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_output_w, self.cell_to_output_w], axis=1),
+            transpose_b=True), self.output_bias)
+    j = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_cell_w, self.cell_to_cell_w], axis=1),
+            transpose_b=True), self.cell_bias)
+
+    # Diagonal connections
+    if self._use_peepholes:
+      c = (
+          sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
+          sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
+    else:
+      c = (
+          sigmoid(f + self._forget_bias) * c_prev +
+          sigmoid(i) * self._activation(j))
+
+    if self._cell_clip is not None:
+      # pylint: disable=invalid-unary-operand-type
+      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
+      # pylint: enable=invalid-unary-operand-type
+    if self._use_peepholes:
+      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
+    else:
+      m = sigmoid(o) * self._activation(c)
+
+    if self._num_proj is not None:
+      transposed_proj_kernel = tf.transpose(self._proj_kernel)
+      m = math_ops.matmul(m, transposed_proj_kernel)
+
+      if self._proj_clip is not None:
+        # pylint: disable=invalid-unary-operand-type
+        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
+        # pylint: enable=invalid-unary-operand-type
+
+    c = self._tflite_wrapper.add_output(
+        c, tag="c", name="c", aggregate="last", index_override=1)
+    m = self._tflite_wrapper.add_output(
+        m, tag="m", name="m", index_override=2, aggregate="stack")
+
+    new_state = (
+        tf.nn.rnn_cell.LSTMStateTuple(c, m)
+        if self._state_is_tuple else array_ops.concat([c, m], 1))
+    return m, new_state
+
+  def get_config(self):
+    config = {
+        "num_units": self._num_units,
+        "use_peepholes": self._use_peepholes,
+        "cell_clip": self._cell_clip,
+        "initializer": initializers.serialize(self._initializer),
+        "num_proj": self._num_proj,
+        "proj_clip": self._proj_clip,
+        "num_unit_shards": self._num_unit_shards,
+        "num_proj_shards": self._num_proj_shards,
+        "forget_bias": self._forget_bias,
+        "state_is_tuple": self._state_is_tuple,
+        "activation": activations.serialize(self._activation),
+        "reuse": self._reuse,
+    }
+    base_config = super(TFLiteLSTMCell, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
new file mode 100644
index 0000000000..2ca977518c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
@@ -0,0 +1,226 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tempfile
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.lite.experimental.examples.lstm.tflite_lstm import TFLiteLSTMCell
+from tensorflow.examples.tutorials.mnist import input_data
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+from tensorflow.python.tools import optimize_for_inference_lib
+
+# Number of steps to train model.
+TRAIN_STEPS = 1
+
+CONFIG = tf.ConfigProto(device_count={"GPU": 0})
+
+
+class UnidirectionalSequenceLstmTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    tf.reset_default_graph()
+    # Import MNIST dataset
+    self.mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
+
+    # Define constants
+    # Unrolled through 28 time steps
+    self.time_steps = 28
+    # Rows of 28 pixels
+    self.n_input = 28
+    # Learning rate for Adam optimizer
+    self.learning_rate = 0.001
+    # MNIST is meant to be classified in 10 classes(0-9).
+    self.n_classes = 10
+    # Batch size
+    self.batch_size = 16
+    # Lstm Units.
+    self.num_units = 64
+
+  def buildLstmLayer(self):
+    return tf.nn.rnn_cell.MultiRNNCell([
+        TFLiteLSTMCell(
+            self.num_units, use_peepholes=True, forget_bias=0, name="rnn1"),
+        TFLiteLSTMCell(self.num_units, num_proj=64, forget_bias=0, name="rnn2"),
+        TFLiteLSTMCell(
+            self.num_units // 2,
+            use_peepholes=True,
+            num_proj=64,
+            forget_bias=0,
+            name="rnn3"),
+        TFLiteLSTMCell(self.num_units, forget_bias=0, name="rnn4")
+    ])
+
+  def buildModel(self, lstm_layer, is_dynamic_rnn, is_train):
+    # Weights and biases for output softmax layer.
+    out_weights = tf.Variable(
+        tf.random_normal([self.num_units, self.n_classes]))
+    out_bias = tf.Variable(tf.random_normal([self.n_classes]))
+
+    # input image placeholder
+    x = tf.placeholder(
+        "float", [None, self.time_steps, self.n_input], name="INPUT_IMAGE")
+
+    # For dynamic_rnn, train with dynamic_rnn and inference with static_rnn.
+    # x is shaped [batch_size,time_steps,num_inputs]
+    if is_dynamic_rnn:
+      if is_train:
+        lstm_input = x
+        outputs, _ = tf.nn.dynamic_rnn(lstm_layer, lstm_input, dtype="float32")
+        outputs = tf.unstack(outputs, axis=1)
+      else:
+        lstm_input = tf.unstack(x, self.time_steps, 1)
+        outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32")
+    else:
+      lstm_input = tf.unstack(x, self.time_steps, 1)
+      outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32")
+
+    # Compute logits by multiplying outputs[-1] of shape [batch_size,num_units]
+    # by the softmax layer's out_weight of shape [num_units,n_classes]
+    # plus out_bias
+    prediction = tf.matmul(outputs[-1], out_weights) + out_bias
+    output_class = tf.nn.softmax(prediction, name="OUTPUT_CLASS")
+
+    return x, prediction, output_class
+
+  def trainModel(self, x, prediction, output_class, sess):
+    # input label placeholder
+    y = tf.placeholder("float", [None, self.n_classes])
+    # Loss function
+    loss = tf.reduce_mean(
+        tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
+    # Optimization
+    opt = tf.train.AdamOptimizer(
+        learning_rate=self.learning_rate).minimize(loss)
+
+    # Initialize variables
+    init = tf.global_variables_initializer()
+    sess.run(init)
+    for _ in range(TRAIN_STEPS):
+      batch_x, batch_y = self.mnist.train.next_batch(
+          batch_size=self.batch_size, shuffle=False)
+
+      batch_x = batch_x.reshape((self.batch_size, self.time_steps,
+                                 self.n_input))
+      sess.run(opt, feed_dict={x: batch_x, y: batch_y})
+
+  def saveAndRestoreModel(self, lstm_layer, sess, saver, is_dynamic_rnn):
+    model_dir = tempfile.mkdtemp()
+    saver.save(sess, model_dir)
+
+    # Reset the graph.
+    tf.reset_default_graph()
+    x, prediction, output_class = self.buildModel(
+        lstm_layer, is_dynamic_rnn, is_train=False)
+
+    new_sess = tf.Session(config=CONFIG)
+    saver = tf.train.Saver()
+    saver.restore(new_sess, model_dir)
+    return x, prediction, output_class, new_sess
+
+  def getInferenceResult(self, x, output_class, sess):
+    b1, _ = self.mnist.train.next_batch(batch_size=1)
+    sample_input = np.reshape(b1, (1, self.time_steps, self.n_input))
+
+    expected_output = sess.run(output_class, feed_dict={x: sample_input})
+    frozen_graph = tf.graph_util.convert_variables_to_constants(
+        sess, sess.graph_def, [output_class.op.name])
+    return sample_input, expected_output, frozen_graph
+
+  def tfliteInvoke(self, graph, test_inputs, outputs):
+    tf.reset_default_graph()
+    # Turn the input into placeholder of shape 1
+    tflite_input = tf.placeholder(
+        "float", [1, self.time_steps, self.n_input], name="INPUT_IMAGE_LITE")
+    tf.import_graph_def(graph, name="", input_map={"INPUT_IMAGE": tflite_input})
+    with tf.Session() as sess:
+      curr = sess.graph_def
+      curr = tf.contrib.lite.convert_op_hints_to_stubs(graph_def=curr)
+
+    curr = optimize_for_inference_lib.optimize_for_inference(
+        curr, ["INPUT_IMAGE_LITE"], ["OUTPUT_CLASS"],
+        [tf.float32.as_datatype_enum])
+
+    tflite = tf.contrib.lite.toco_convert(
+        curr, [tflite_input], [outputs], allow_custom_ops=False)
+    interpreter = tf.contrib.lite.Interpreter(model_content=tflite)
+
+    try:
+      interpreter.allocate_tensors()
+    except ValueError:
+      assert False
+
+    input_index = (interpreter.get_input_details()[0]["index"])
+    interpreter.set_tensor(input_index, test_inputs)
+    interpreter.invoke()
+    output_index = (interpreter.get_output_details()[0]["index"])
+    result = interpreter.get_tensor(output_index)
+    # Reset all variables so it will not pollute other inferences.
+    interpreter.reset_all_variables()
+    return result
+
+  def testStaticRnnMultiRnnCell(self):
+    sess = tf.Session(config=CONFIG)
+
+    x, prediction, output_class = self.buildModel(
+        self.buildLstmLayer(), is_dynamic_rnn=False, is_train=True)
+    self.trainModel(x, prediction, output_class, sess)
+
+    saver = tf.train.Saver()
+    x, prediction, output_class, new_sess = self.saveAndRestoreModel(
+        self.buildLstmLayer(), sess, saver, is_dynamic_rnn=False)
+
+    test_inputs, expected_output, frozen_graph = self.getInferenceResult(
+        x, output_class, new_sess)
+
+    result = self.tfliteInvoke(frozen_graph, test_inputs, output_class)
+    self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3))
+
+  def testDynamicRnnMultiRnnCell(self):
+    sess = tf.Session(config=CONFIG)
+
+    x, prediction, output_class = self.buildModel(
+        self.buildLstmLayer(), is_dynamic_rnn=True, is_train=True)
+    self.trainModel(x, prediction, output_class, sess)
+
+    # Since we don't yet support OpHints for dynamic, we will load the model
+    # back in as a static model. This requires the variables to have the same
+    # names as if they were trained as a static. Thus, we get rid of while/rnn
+    # names.
+    variables_to_save = {}
+    for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
+      op_name = i.name
+      if op_name.startswith("while/rnn/"):
+        op_name = op_name.split("while/rnn/")[1]
+      if op_name.endswith(":0"):
+        op_name = op_name.split(":0")[0]
+      variables_to_save[op_name] = i
+    saver = tf.train.Saver(variables_to_save)
+
+    x, prediction, output_class, new_sess = self.saveAndRestoreModel(
+        self.buildLstmLayer(), sess, saver, is_dynamic_rnn=True)
+
+    test_inputs, expected_output, frozen_graph = self.getInferenceResult(
+        x, output_class, new_sess)
+
+    result = self.tfliteInvoke(frozen_graph, test_inputs, output_class)
+    self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 40cd6dea82..47faa20a29 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -239,6 +239,12 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op,
       }
       break;
     }
+    case OperatorType::kUnidirectionalSequenceLstm: {
+      const ArrayDataType data_type = model->GetArray(op->inputs[0]).data_type;
+      if (data_type != ArrayDataType::kFloat) return ::tensorflow::Status::OK();
+      SetDataTypeForAllOutputs(model, op, data_type);
+      break;
+    }
     default: {
       // These operators produce outputs with the same type as their 1st input
       CHECK_GT(op->inputs.size(), 0);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 5496e2093e..e861df2b3d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -946,6 +946,49 @@ void ProcessLstmCellOperator(Model* model, LstmCellOperator* op) {
       .copy_shape(activ_temp_shape);
 }
 
+void ProcessUnidirectionalSequenceLstmOperator(
+    Model* model, UnidirectionalSequenceLstmOperator* op) {
+  auto& output_array = model->GetArray(op->outputs[0]);
+  if (output_array.has_shape()) {
+    // Shape already propagated
+    return;
+  }
+
+  if (output_array.data_type == ArrayDataType::kNone) {
+    // Yield until the output type has been set by PropagateArrayDataTypes
+    return;
+  }
+
+  // TODO(renjieliu): check the inputs, as well as all kinds of weights.
+  const auto& input_array = model->GetArray(op->inputs[0]);
+  // Yield until input dims have been resolved.
+  if (!input_array.has_shape()) {
+    return;
+  }
+  const auto& input_shape = input_array.shape();
+  const int batch_size = input_shape.dims(1);
+  const int timestamp = input_shape.dims(0);
+
+  const auto& recurrent_to_output_weights_array =
+      model->GetArray(op->inputs[8]);
+  // Yield until input dims have been resolved.
+  if (!recurrent_to_output_weights_array.has_shape()) {
+    return;
+  }
+
+  constexpr int kInputActivationStateTensor = 18;
+  constexpr int kInputCellStateTensor = 19;
+  // b(115961645): This is a hack to work around.
+  model->GetArray(op->inputs[kInputActivationStateTensor]).buffer.reset();
+  model->GetArray(op->inputs[kInputCellStateTensor]).buffer.reset();
+
+  const auto& output_weights_shape = recurrent_to_output_weights_array.shape();
+  const int output_size = output_weights_shape.dims(1);
+
+  Shape* output_shape = output_array.mutable_shape();
+  output_shape->ReplaceDims({timestamp, batch_size, output_size});
+}
+
 void ProcessSpaceToBatchNDOperator(Model* model, SpaceToBatchNDOperator* op) {
   const auto& input_array = model->GetArray(op->inputs[0]);
   // Yield until input dims have been resolved.
@@ -1800,6 +1843,10 @@ void ProcessUnpackOperator(Model* model, UnpackOperator* op) {
       ProcessResizeBilinearOperator(model,
                                     static_cast<ResizeBilinearOperator*>(op));
       break;
+    case OperatorType::kUnidirectionalSequenceLstm:
+      ProcessUnidirectionalSequenceLstmOperator(
+          model, static_cast<UnidirectionalSequenceLstmOperator*>(op));
+      break;
     case OperatorType::kLstmCell:
       ProcessLstmCellOperator(model, static_cast<LstmCellOperator*>(op));
       break;
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 32f22e1ea0..6b195cc992 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/public/session_options.h"
@@ -2002,6 +2003,48 @@ tensorflow::Status ConvertCTCBeamSearchDecoderOperator(
   return tensorflow::Status::OK();
 }
 
+// This isn't a TensorFlow builtin op. Currently this node can only be generated
+// with TfLite OpHint API.
+tensorflow::Status ConvertUnidirectionalSequenceLstm(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
+  DCHECK_EQ(node.op(), "UnidirectionalSequenceLstm");
+
+  auto* op = new UnidirectionalSequenceLstmOperator();
+  const auto& indices = GetListAttr(node, "_tflite_input_indices");
+  if (indices.i_size() != node.input().size()) {
+    return tensorflow::errors::InvalidArgument("Input size does not match.");
+  }
+
+  // The input size needs to be the same as the TfLite UniDirectionalSequence
+  // Lstm implementation.
+  const int kInputsSize = 20;
+
+  op->inputs.resize(kInputsSize);
+  std::vector<bool> done(kInputsSize);
+  int idx = 0;
+  for (const string& input : node.input()) {
+    int real_index = indices.i(idx);
+    op->inputs[real_index] = (input);
+    done[real_index] = true;
+    idx++;
+  }
+
+  for (int idx = 0; idx < done.size(); idx++) {
+    if (!done[idx]) {
+      string optional_name = node.name() + "_" + std::to_string(idx);
+      model->CreateOptionalArray(optional_name);
+      op->inputs[idx] = optional_name;
+    }
+  }
+
+  // There're three outputs, only the last one is required.
+  op->outputs.push_back(node.name() + ":2");
+  model->operators.emplace_back(op);
+
+  return tensorflow::Status::OK();
+}
+
 }  // namespace
 
 namespace internal {
@@ -2121,6 +2164,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() {
       {"Transpose", ConvertSimpleOperator<TransposeOperator, 2>},
       {"Unpack", ConvertUnpackOperator},
       {"ZerosLike", ConvertSimpleOperator<TensorFlowZerosLikeOperator, 1>},
+      {"UnidirectionalSequenceLstm", ConvertUnidirectionalSequenceLstm},
   });
 }
 
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 61f1f095e9..f3b84430db 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -58,6 +58,7 @@ enum class OperatorType : uint8 {
   kL2Normalization,
   kL2Pool,
   kLstmCell,
+  kUnidirectionalSequenceLstm,
   kLocalResponseNormalization,
   kLog,
   kLogistic,
@@ -635,6 +636,11 @@ struct LstmCellOperator : Operator {
   KernelType kernel_type;
 };
 
+struct UnidirectionalSequenceLstmOperator : Operator {
+  UnidirectionalSequenceLstmOperator()
+      : Operator(OperatorType::kUnidirectionalSequenceLstm) {}
+};
+
 // Element-wise multiplication operator.
 //
 // Inputs:
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index ed37535fe0..e08a61d357 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -741,6 +741,42 @@ class Lstm : public BuiltinOperator<LstmCellOperator, ::tflite::LSTMOptions,
   }
 };
 
+class UnidirectionalSequenceLstm
+    : public BuiltinOperator<
+          UnidirectionalSequenceLstmOperator,
+          ::tflite::UnidirectionalSequenceLSTMOptions,
+          ::tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    // Current toco converter only supports tanh, no clip.
+    return ::tflite::CreateUnidirectionalSequenceLSTMOptions(
+        *builder, /*fused_activation_function=*/
+        ::tflite::ActivationFunctionType_TANH,
+        /*cell_clip=*/0.0,
+        /*proj_clip=*/0.0);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    // Only support tanh activation, so check that tflite type is tanh.
+    DCHECK(options.fused_activation_function() ==
+           ::tflite::ActivationFunctionType_TANH);
+  }
+
+  int GetVersion(const Operator& op) const override { return 1; }
+
+  std::vector<bool> GetMutatingInputVariables(
+      const Operator& op) const override {
+    std::vector<bool> mutating_input_variables(op.inputs.size(), false);
+    mutating_input_variables[kInputActivationStateTensor] = true;
+    mutating_input_variables[kInputCellStateTensor] = true;
+    return mutating_input_variables;
+  }
+};
+
 class Mean : public BuiltinOperator<MeanOperator, ::tflite::ReducerOptions,
                                     ::tflite::BuiltinOptions_ReducerOptions> {
  public:
@@ -1435,6 +1471,9 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
                                       OperatorType::kFakeQuant));
   ops.push_back(
       MakeUnique<Pack>(::tflite::BuiltinOperator_PACK, OperatorType::kPack));
+  ops.emplace_back(MakeUnique<UnidirectionalSequenceLstm>(
+      ::tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+      OperatorType::kUnidirectionalSequenceLstm));
   ops.push_back(MakeUnique<OneHot>(::tflite::BuiltinOperator_ONE_HOT,
                                    OperatorType::kOneHot));
   ops.push_back(MakeUnique<Unpack>(::tflite::BuiltinOperator_UNPACK,
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 083a96ad9d..61aa311212 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -407,6 +407,7 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(CTCBeamSearchDecoder)
     HANDLE_OPERATORTYPENAME_CASE(Unpack)
     HANDLE_OPERATORTYPENAME_CASE(ZerosLike)
+    HANDLE_OPERATORTYPENAME_CASE(UnidirectionalSequenceLstm)
     default:
       LOG(FATAL) << "Unhandled op type";
 #undef HANDLE_OPERATORTYPENAME_CASE
@@ -898,12 +899,12 @@ void CheckNoMissingArray(const Model& model) {
 void FixNoMissingArray(Model* model) {
   for (const auto& op : model->operators) {
     for (const auto& input : op->inputs) {
-      if (!model->HasArray(input)) {
+      if (!model->HasArray(input) && !model->IsOptionalArray(input)) {
         model->GetOrCreateArray(input);
       }
     }
     for (const auto& output : op->outputs) {
-      if (!model->HasArray(output)) {
+      if (!model->HasArray(output) && !model->IsOptionalArray(output)) {
         model->GetOrCreateArray(output);
       }
     }
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index c6ef82ccdc..45106b35fc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -85,6 +85,10 @@ BLACKLIST = [
     # contrib
     "//tensorflow/contrib/session_bundle:session_bundle_half_plus_two",
     "//tensorflow/contrib/keras:testing_utils",
+    "//tensorflow/contrib/lite/experimental/examples/lstm:tflite_lstm",
+    "//tensorflow/contrib/lite/experimental/examples/lstm:tflite_lstm.py",
+    "//tensorflow/contrib/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test",  # pylint:disable=line-too-long
+    "//tensorflow/contrib/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test.py",  # pylint:disable=line-too-long
     "//tensorflow/contrib/lite/python:interpreter",
     "//tensorflow/contrib/lite/python:interpreter_test",
     "//tensorflow/contrib/lite/python:interpreter.py",
-- 
GitLab


From 5d670479c6ea20c510fa46ae1bb45123df75e067 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 20:51:50 -0700
Subject: [PATCH 1352/1357] Add a more verbose error message.

PiperOrigin-RevId: 216471178
---
 tensorflow/contrib/lite/kernels/embedding_lookup.cc    | 10 ++++++++--
 .../contrib/lite/kernels/embedding_lookup_sparse.cc    |  4 +++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup.cc b/tensorflow/contrib/lite/kernels/embedding_lookup.cc
index fe33f98eb0..1d0c71ad48 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup.cc
@@ -78,7 +78,10 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
   for (int i = 0; i < SizeOfDimension(lookup, 0); i++) {
     int idx = lookup->data.i32[i];
     if (idx >= row_size || idx < 0) {
-      context->ReportError(context, "Embedding Lookup: index out of bounds.");
+      context->ReportError(context,
+                           "Embedding Lookup: index out of bounds. "
+                           "Got %d, and bounds are [0, %d]",
+                           idx, row_size - 1);
       return kTfLiteError;
     } else {
       memcpy(output->data.raw + i * row_bytes,
@@ -104,7 +107,10 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
   for (int i = 0; i < SizeOfDimension(lookup, 0); i++) {
     int idx = lookup->data.i32[i];
     if (idx >= row_size || idx < 0) {
-      context->ReportError(context, "Embedding Lookup: index out of bounds.");
+      context->ReportError(context,
+                           "Embedding Lookup: index out of bounds. "
+                           "Got %d, and bounds are [0, %d]",
+                           idx, row_size - 1);
       return kTfLiteError;
     } else {
       // Dequantize embedding values.
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc
index aa75b03990..0b076941ea 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc
@@ -188,7 +188,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     int idx = ids->data.i32[i];
     if (idx >= num_rows || idx < 0) {
       context->ReportError(context,
-                           "Embedding Lookup Sparse: index out of bounds.");
+                           "Embedding Lookup Sparse: index out of bounds. "
+                           "Got %d, and bounds are [0, %d]",
+                           idx, num_rows - 1);
       return kTfLiteError;
     }
 
-- 
GitLab


From 91d625c6f0377bb629b2509bb4f5cb040d870244 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 9 Oct 2018 21:54:32 -0700
Subject: [PATCH 1353/1357] Fix lstm_test&layer_norm_lstm_test w/ Clang 8.0.0

PiperOrigin-RevId: 216475683
---
 .../lite/kernels/layer_norm_lstm_test.cc      | 116 +++++++++---------
 tensorflow/contrib/lite/kernels/lstm_test.cc  |  92 +++++++-------
 2 files changed, 102 insertions(+), 106 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
index 479f6a7d3c..1535f750f9 100644
--- a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
@@ -129,87 +129,85 @@ class LayerNormLSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     PopulateTensor(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     PopulateTensor(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     PopulateTensor(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     PopulateTensor(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     PopulateTensor(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
-  void SetInputLayerNormWeights(std::initializer_list<float> f) {
+  void SetInputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(input_layer_norm_weights_, f);
   }
 
-  void SetForgetLayerNormWeights(std::initializer_list<float> f) {
+  void SetForgetLayerNormWeights(std::vector<float> f) {
     PopulateTensor(forget_layer_norm_weights_, f);
   }
 
-  void SetCellLayerNormWeights(std::initializer_list<float> f) {
+  void SetCellLayerNormWeights(std::vector<float> f) {
     PopulateTensor(cell_layer_norm_weights_, f);
   }
 
-  void SetOutputLayerNormWeights(std::initializer_list<float> f) {
+  void SetOutputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(output_layer_norm_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(std::vector<float> f) {
     PopulateTensor(input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(std::vector<float> f) {
     PopulateTensor(forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
-    PopulateTensor(cell_bias_, f);
-  }
+  void SetCellBias(std::vector<float> f) { PopulateTensor(cell_bias_, f); }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(std::vector<float> f) {
     PopulateTensor(output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     PopulateTensor(projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(std::vector<float> f) {
     PopulateTensor(projection_bias_, f);
   }
 
@@ -278,67 +276,67 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel {
                              use_projection_bias, cell_clip, proj_clip,
                              input_shapes, TensorType_UINT8) {}
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
   }
 
-  void SetInputLayerNormWeights(std::initializer_list<float> f) {
+  void SetInputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(input_layer_norm_weights_, f);
   }
 
-  void SetForgetLayerNormWeights(std::initializer_list<float> f) {
+  void SetForgetLayerNormWeights(std::vector<float> f) {
     PopulateTensor(forget_layer_norm_weights_, f);
   }
 
-  void SetCellLayerNormWeights(std::initializer_list<float> f) {
+  void SetCellLayerNormWeights(std::vector<float> f) {
     PopulateTensor(cell_layer_norm_weights_, f);
   }
 
-  void SetOutputLayerNormWeights(std::initializer_list<float> f) {
+  void SetOutputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(output_layer_norm_weights_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(projection_weights_, f);
   }
 };
@@ -346,26 +344,26 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel {
 class BaseLayerNormLstmTest : public ::testing::Test {
  protected:
   // Weights of the Layer Norm LSTM model. Some are optional.
-  std::initializer_list<float> input_to_input_weights_;
-  std::initializer_list<float> input_to_cell_weights_;
-  std::initializer_list<float> input_to_forget_weights_;
-  std::initializer_list<float> input_to_output_weights_;
-  std::initializer_list<float> input_gate_bias_;
-  std::initializer_list<float> cell_gate_bias_;
-  std::initializer_list<float> forget_gate_bias_;
-  std::initializer_list<float> output_gate_bias_;
-  std::initializer_list<float> recurrent_to_input_weights_;
-  std::initializer_list<float> recurrent_to_cell_weights_;
-  std::initializer_list<float> recurrent_to_forget_weights_;
-  std::initializer_list<float> recurrent_to_output_weights_;
-  std::initializer_list<float> cell_to_input_weights_;
-  std::initializer_list<float> cell_to_forget_weights_;
-  std::initializer_list<float> cell_to_output_weights_;
-  std::initializer_list<float> input_layer_norm_weights_;
-  std::initializer_list<float> forget_layer_norm_weights_;
-  std::initializer_list<float> cell_layer_norm_weights_;
-  std::initializer_list<float> output_layer_norm_weights_;
-  std::initializer_list<float> projection_weights_;
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> input_layer_norm_weights_;
+  std::vector<float> forget_layer_norm_weights_;
+  std::vector<float> cell_layer_norm_weights_;
+  std::vector<float> output_layer_norm_weights_;
+  std::vector<float> projection_weights_;
 
   // Layer Norm LSTM input is stored as num_batch x num_inputs vector.
   std::vector<std::vector<float>> layer_norm_lstm_input_;
diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc
index e7ddfceb45..f8947db724 100644
--- a/tensorflow/contrib/lite/kernels/lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_test.cc
@@ -116,71 +116,69 @@ class LSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     PopulateTensor(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     PopulateTensor(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     PopulateTensor(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     PopulateTensor(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     PopulateTensor(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(std::vector<float> f) {
     PopulateTensor(input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(std::vector<float> f) {
     PopulateTensor(forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
-    PopulateTensor(cell_bias_, f);
-  }
+  void SetCellBias(std::vector<float> f) { PopulateTensor(cell_bias_, f); }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(std::vector<float> f) {
     PopulateTensor(output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     PopulateTensor(projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(std::vector<float> f) {
     PopulateTensor(projection_bias_, f);
   }
 
@@ -243,51 +241,51 @@ class HybridLSTMOpModel : public LSTMOpModel {
                     use_projection_weights, use_projection_bias, cell_clip,
                     proj_clip, input_shapes, TensorType_UINT8) {}
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(projection_weights_, f);
   }
 };
@@ -295,22 +293,22 @@ class HybridLSTMOpModel : public LSTMOpModel {
 class BaseLstmTest : public ::testing::Test {
  protected:
   // Weights of the LSTM model. Some are optional.
-  std::initializer_list<float> input_to_input_weights_;
-  std::initializer_list<float> input_to_cell_weights_;
-  std::initializer_list<float> input_to_forget_weights_;
-  std::initializer_list<float> input_to_output_weights_;
-  std::initializer_list<float> input_gate_bias_;
-  std::initializer_list<float> cell_gate_bias_;
-  std::initializer_list<float> forget_gate_bias_;
-  std::initializer_list<float> output_gate_bias_;
-  std::initializer_list<float> recurrent_to_input_weights_;
-  std::initializer_list<float> recurrent_to_cell_weights_;
-  std::initializer_list<float> recurrent_to_forget_weights_;
-  std::initializer_list<float> recurrent_to_output_weights_;
-  std::initializer_list<float> cell_to_input_weights_;
-  std::initializer_list<float> cell_to_forget_weights_;
-  std::initializer_list<float> cell_to_output_weights_;
-  std::initializer_list<float> projection_weights_;
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> projection_weights_;
 
   // LSTM input is stored as num_batch x num_inputs vector.
   std::vector<std::vector<float>> lstm_input_;
-- 
GitLab


From dcf641daac0f2fee74eafbb0de1d32f6c8c4c6fd Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 22:57:45 -0700
Subject: [PATCH 1354/1357] Remove python shebang line from gen_git_source.

PiperOrigin-RevId: 216479972
---
 tensorflow/tensorflow.bzl              | 4 ++--
 tensorflow/tools/git/BUILD             | 6 ++++--
 tensorflow/tools/git/gen_git_source.py | 1 -
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index cad5de1b0c..df15914233 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1967,9 +1967,9 @@ def tf_version_info_genrule():
         ],
         outs = ["util/version_info.cc"],
         cmd =
-            "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}",
+            "$(location //tensorflow/tools/git:gen_git_source) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}",
         local = 1,
-        tools = [clean_dep("//tensorflow/tools/git:gen_git_source.py")],
+        tools = [clean_dep("//tensorflow/tools/git:gen_git_source")],
     )
 
 def tf_py_build_info_genrule():
diff --git a/tensorflow/tools/git/BUILD b/tensorflow/tools/git/BUILD
index daa17fbd50..34a5167948 100644
--- a/tensorflow/tools/git/BUILD
+++ b/tensorflow/tools/git/BUILD
@@ -6,6 +6,8 @@ package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
-exports_files(
-    ["gen_git_source.py"],
+py_binary(
+    name = "gen_git_source",
+    srcs = ["gen_git_source.py"],
+    srcs_version = "PY2AND3",
 )
diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index cc2288a7fa..8e7cd9b104 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
-- 
GitLab


From 5a2d98f7f7cf6f52eb0496bf27be07d9e1f29040 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 23:57:17 -0700
Subject: [PATCH 1355/1357] Run while loop test that was not being run before.

PiperOrigin-RevId: 216483744
---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index baea5c0f6d..a5f85b97f7 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1116,8 +1116,8 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(10.0, r.eval())
 
   def testWhile_Gpu_2(self):
-    self._testWhile_Gpu_1(use_gpu=False)
-    self._testWhile_Gpu_1(use_gpu=True)
+    self._testWhile_Gpu_2(use_gpu=False)
+    self._testWhile_Gpu_2(use_gpu=True)
 
   def testWhileShape(self):
     with self.cached_session():
-- 
GitLab


From 1409ea9dbd8275dcbd394451d2cb878e0e873d45 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 23:57:18 -0700
Subject: [PATCH 1356/1357] Delete dead code in batch_scatter_ops_test.

PiperOrigin-RevId: 216483746
---
 .../python/kernel_tests/batch_scatter_ops_test.py      | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
index 0d41a7e3b3..498e5f05a3 100644
--- a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
+++ b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
@@ -73,16 +73,6 @@ class ScatterTest(test.TestCase):
           tf_scatter(ref, indices, updates).eval()
           self.assertAllClose(ref.eval(), new)
 
-  def _VariableRankTests(self,
-                         tf_scatter):
-    vtypes = [np.float32, np.float64]
-    if tf_scatter != state_ops.scatter_div:
-      vtypes.append(np.int32)
-
-    for vtype in vtypes:
-      for itype in (np.int32, np.int64):
-        self._VariableRankTest(tf_scatter, vtype, itype)
-
   def testVariableRankUpdate(self):
     vtypes = [np.float32, np.float64]
     for vtype in vtypes:
-- 
GitLab


From 7575e0949703a4dd0ec19e51e568e9abba037728 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 02:01:57 -0700
Subject: [PATCH 1357/1357] compat: Update forward compatibility horizon to
 2018-10-10

PiperOrigin-RevId: 216495091
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 0e14c0e044..b7a1fce586 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 10)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab